diff --git a/.appveyor.yml b/.appveyor.yml
deleted file mode 100644
index 2e504f175fc0de..00000000000000
--- a/.appveyor.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-environment:
-  global:
-    CCACHE_DIR: C:\ccache
-    CYG_MIRROR: http://cygwin.mirror.constant.com
-    CYG_CACHE: '%CYG_ROOT%\var\cache\setup'
-    CYG_BASH: '%CYG_ROOT%\bin\bash'
-
-  matrix:
-  - MINGW_ARCH: "i686"
-    CYG_ROOT: C:\cygwin
-    CYG_SETUP: setup-x86.exe
-    JULIA_TEST_MAXRSS_MB: 500
-
-  - MINGW_ARCH: "x86_64"
-    CYG_ROOT: C:\cygwin64
-    CYG_SETUP: setup-x86_64.exe
-    JULIA_TEST_MAXRSS_MB: 450
-
-# Only build on master and PR's for now, not personal branches
-# Whether or not PR's get built is determined in the webhook settings
-branches:
-  only:
-    - master
-    - /^release-.*/
-
-# Note: use `[ci skip]` or `[skip ci]` anywhere in the commit message and AppVeyor won't be
-# built for that commit. You can use `[skip appveyor]` to explicitly skip AppVeyor and
-# allow other CI to still run.
-skip_commits:
-# Add [av skip] to commit messages for docfixes, etc to reduce load on queue
-  message: /\[av skip\]/
-# Skip running CI for changes only to the documentation
-# https://github.com/JuliaLang/julia/pull/27356#discussion_r192536676
-# files:
-#   - doc/
-
-notifications:
-  - provider: Email
-    on_build_success: false
-    on_build_failure: false
-    on_build_status_changed: false
-
-cache:
-  - '%CYG_CACHE%'
-  - '%CCACHE_DIR%'
-
-init:
-  - git config --global core.autocrlf input
-
-install:
-    - '%CYG_ROOT%\%CYG_SETUP% -gnq  -R "%CYG_ROOT%" -s "%CYG_MIRROR%" -l "%CYG_CACHE%" -P make,python2,libiconv,curl,time,p7zip,ccache,mingw64-%MINGW_ARCH%-gcc-g++,mingw64-%MINGW_ARCH%-gcc-fortran > NULL 2>&1'
-    - '%CYG_ROOT%\bin\cygcheck -dc cygwin'
-
-build_script:
-  - 'echo Building Julia'
-  - '%CYG_BASH% -lc "cd $APPVEYOR_BUILD_FOLDER && ./contrib/windows/appveyor_build.sh"'
-
-test_script:
-  - 'echo Testing Julia'
-  - usr\bin\julia -e "Base.require(Main, :InteractiveUtils).versioninfo()"
-  - usr\bin\julia --sysimage-native-code=no -e "true"
-  - cd julia-* && .\bin\julia.exe --check-bounds=yes share\julia\test\runtests.jl all &&
-      .\bin\julia.exe --check-bounds=yes share\julia\test\runtests.jl LibGit2/online download
-  - cd ..
-  - usr\bin\julia usr\share\julia\test\embedding\embedding-test.jl test\embedding\embedding.exe
diff --git a/.buildkite-external-version b/.buildkite-external-version
new file mode 100644
index 00000000000000..ba2906d0666cf7
--- /dev/null
+++ b/.buildkite-external-version
@@ -0,0 +1 @@
+main
diff --git a/.clang-format b/.clang-format
index 0322d0f6749a9d..39b5767a502918 100644
--- a/.clang-format
+++ b/.clang-format
@@ -109,7 +109,6 @@ StatementMacros:
   - checked_intrinsic_ctype
   - cvt_iintrinsic
   - fpiseq_n
-  - fpislt_n
   - ter_fintrinsic
   - ter_intrinsic_ctype
   - un_fintrinsic
diff --git a/.codecov.yml b/.codecov.yml
new file mode 100644
index 00000000000000..35cde5cd5e8543
--- /dev/null
+++ b/.codecov.yml
@@ -0,0 +1,4 @@
+coverage:
+  status:
+    project: off
+    patch: off
diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
new file mode 100644
index 00000000000000..d2da8839ddb39c
--- /dev/null
+++ b/.github/CODEOWNERS
@@ -0,0 +1,6 @@
+CODEOWNERS @JuliaLang/github-actions
+/.github/ @JuliaLang/github-actions
+/.buildkite/ @JuliaLang/github-actions
+
+/.github/workflows/retry.yml @DilumAluthge
+/.github/workflows/statuses.yml @DilumAluthge
diff --git a/.github/workflows/rerun_failed.yml b/.github/workflows/rerun_failed.yml
new file mode 100644
index 00000000000000..7d022920658a90
--- /dev/null
+++ b/.github/workflows/rerun_failed.yml
@@ -0,0 +1,92 @@
+# Please ping @DilumAluthge when making any changes to this file.
+
+# Here are some steps that we take in this workflow file for security reasons:
+# 1. We do not checkout any code.
+# 2. We only run actions that are defined in a repository in the `JuliaLang` GitHub organization.
+# 3. We do not give the `GITHUB_TOKEN` any permissions.
+# 4. We only give the Buildkite API token (`BUILDKITE_API_TOKEN_RETRY`) the minimum necessary
+#    set of permissions.
+
+# Important note to Buildkite maintainers:
+# In order to make this work, you need to tell Buildkite that it should NOT create a brand-new
+# build when someone closes and reopens a pull request. To do so:
+# 1. Go to the relevant pipeline (e.g. https://buildkite.com/julialang/julia-master).
+# 2. Click on the "Pipeline Settings" button.
+# 3. In the left sidebar, under "Pipeline Settings", click on "GitHub".
+# 4. In the "GitHub Settings", under "Build Pull Requests", make sure that the "Skip pull
+#    request builds for existing commits" checkbox is checked. This is the setting that tells
+#    Buildkite that it should NOT create a brand-new build when someone closes and reopens a
+#    pull request.
+# 5. At the bottom of the page, click the "Save GitHub Settings" button.
+
+name: Rerun Failed Buildkite Jobs
+
+# There are two ways that a user can rerun the failed Buildkite jobs:
+# 1. Close and reopen the pull request.
+#    In order to use this approach, the user must be in one of the following three categories:
+#        (i)   Author of the pull request
+#        (ii)  Commit permissions
+#        (iii) Triage permissions
+# 2. Post a comment on the pull request with exactly the following contents: /buildkite rerun failed
+#    In order to use this approach, the user must be in the following category:
+#        - A member of the JuliaLang GitHub organization (the membership must be publicized)
+
+on:
+  # When using the `pull_request_target` event, all PRs will get access to secret environment
+  # variables (such as the `BUILDKITE_API_TOKEN_RETRY` secret environment variable), even if
+  # the PR is from a fork. Therefore, for security reasons, we do not checkout any code in
+  # this workflow.
+  pull_request_target:
+    types: [ reopened ]
+  issue_comment:
+    types: [ created ]
+
+# We do not give the `GITHUB_TOKEN` any permissions.
+# Therefore, the `GITHUB_TOKEN` only has the same access as any member of the public.
+permissions:
+  contents: none
+
+jobs:
+  rerun-failed-buildkite-jobs:
+    name: Rerun Failed Buildkite Jobs
+    runs-on: ubuntu-latest
+    if: (github.repository == 'JuliaLang/julia') && ((github.event_name == 'pull_request_target' && github.event.action == 'reopened') || (github.event_name == 'issue_comment' && github.event.issue.pull_request && github.event.comment.body == '/buildkite rerun failed'))
+    steps:
+      # For security reasons, we do not checkout any code in this workflow.
+      - name: Check organization membership
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          if [[ "${{ github.event_name }}" == "pull_request_target" ]]; then
+            if [[ "${{ github.event.action }}" == "reopened" ]]; then
+              echo "This is a \"reopened\" event, so we do not need to check the user's organization membership."
+              echo "GOOD_TO_PROCEED=yes" >> ${GITHUB_ENV:?}
+              echo "PULL_REQUEST_NUMBER=${{ github.event.number }}" >> ${GITHUB_ENV:?}
+            else
+              echo "ERROR: The github.event_name is \"pull_request_target\", but the github.event.action is not \"reopened\"."
+              exit 1
+            fi
+          else
+            curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}"
+            curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}/orgs"
+            export USER_IS_ORGANIZATION_MEMBER=`curl -H "Authorization: token ${GITHUB_TOKEN:?}" "https://api.github.com/users/${{ github.event.sender.login }}/orgs" | jq '[.[] | .login] | index("JuliaLang") != null' | tr -s ' '`
+            if [[ "${USER_IS_ORGANIZATION_MEMBER:?}"   == "true" ]]; then
+              echo "The \"${{ github.event.sender.login }}\" user is a public member of the JuliaLang organization."
+              echo "GOOD_TO_PROCEED=yes" >> ${GITHUB_ENV:?}
+              echo "PULL_REQUEST_NUMBER=${{ github.event.issue.number }}" >> ${GITHUB_ENV:?}
+            else
+              echo "ERROR: the \"${{ github.event.sender.login }}\" user is NOT a public member of the JuliaLang organization."
+              echo "If you are a member, please make sure that you have publicized your membership."
+              exit 1
+            fi
+          fi
+      - run: |
+          echo "GOOD_TO_PROCEED: ${{ env.GOOD_TO_PROCEED }}"
+          echo "PULL_REQUEST_NUMBER: ${{ env.PULL_REQUEST_NUMBER }}"
+      - uses: JuliaLang/buildkite-rerun-failed@057f6f2d37aa29a57b7679fd2af0df1d9f9188b4
+        if: env.GOOD_TO_PROCEED == 'yes'
+        with:
+          buildkite_api_token: ${{ secrets.BUILDKITE_API_TOKEN_RETRY }}
+          buildkite_organization_slug: 'julialang'
+          buildkite_pipeline_slug: 'julia-master'
+          pr_number: ${{ env.PULL_REQUEST_NUMBER }}
diff --git a/.github/workflows/statuses.yml b/.github/workflows/statuses.yml
new file mode 100644
index 00000000000000..36a694a7c6d20f
--- /dev/null
+++ b/.github/workflows/statuses.yml
@@ -0,0 +1,65 @@
+# Please ping @DilumAluthge when making any changes to this file.
+
+# This is just a short-term solution until we have migrated all of CI to Buildkite.
+#
+# 1. TODO: delete this file once we have migrated all of CI to Buildkite.
+
+# Here are some steps that we take in this workflow file for security reasons:
+# 1. We do not checkout any code.
+# 2. We do not run any external actions.
+# 3. We only give the `GITHUB_TOKEN` the minimum necessary set of permissions.
+
+name: Create Buildbot Statuses
+
+on:
+  push:
+    branches:
+      - 'master'
+      - 'release-*'
+  # When using the `pull_request_target` event, all PRs will get a `GITHUB_TOKEN` that has
+  # write permissions, even if the PR is from a fork.
+  # Therefore, for security reasons, we do not checkout any code in this workflow.
+  pull_request_target:
+    types: [opened, synchronize]
+    branches:
+      - 'master'
+      - 'release-*'
+
+# These are the permissions for the `GITHUB_TOKEN`.
+# We should only give the token the minimum necessary set of permissions.
+permissions:
+  statuses: write
+
+jobs:
+  create-buildbot-statuses:
+    name: Create Buildbot Statuses
+    runs-on: ubuntu-latest
+    if: github.repository == 'JuliaLang/julia'
+    steps:
+      # For security reasons, we do not checkout any code in this workflow.
+      - run: echo "SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
+        if: github.event_name == 'pull_request_target'
+      - run: echo "SHA=${{ github.sha }}" >> $GITHUB_ENV
+        if: github.event_name != 'pull_request_target'
+      - run: echo "The SHA is ${{ env.SHA }}"
+
+      # As we incrementally migrate individual jobs from Buildbot to Buildkite, we should
+      # remove them from the `context_list`.
+      - run: |
+          declare -a CONTEXT_LIST=(
+                "buildbot/tester_freebsd64"
+                "buildbot/tester_win32"
+                "buildbot/tester_win64"
+                )
+          for CONTEXT in "${CONTEXT_LIST[@]}"
+          do
+            curl \
+              -X POST \
+              -H "Authorization: token $GITHUB_TOKEN" \
+              -H "Accept: application/vnd.github.v3+json" \
+              -d "{\"context\": \"$CONTEXT\", \"state\": \"$STATE\"}" \
+            https://api.github.com/repos/JuliaLang/julia/statuses/${{ env.SHA }}
+          done
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          STATE: "pending"
diff --git a/.gitignore b/.gitignore
index 2da56ff47739e3..2780210c41a9bd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,6 +22,7 @@
 *.so
 *.dylib
 *.dSYM
+*.h.gen
 *.jl.cov
 *.jl.*.cov
 *.jl.mem
@@ -31,3 +32,13 @@
 /perf*
 .DS_Store
 .idea/*
+.vscode/*
+
+# Buildkite: Ignore the entire .buildkite directory
+/.buildkite
+
+# Buildkite: Ignore the unencrypted repo_key
+repo_key
+
+# Buildkite: Ignore any agent keys (public or private) we have stored
+agent_key*
diff --git a/.mailmap b/.mailmap
index bcb3c842a76052..5335c88a63d7d9 100644
--- a/.mailmap
+++ b/.mailmap
@@ -24,6 +24,8 @@ Viral B. Shah <viral@mayin.org> <viral@mayin.org>
 Viral B. Shah <viral@mayin.org> <viral@beowulf1.csail.mit.edu>
 Viral B. Shah <viral@mayin.org> <viral@neumann.cs.ucsb.edu>
 Viral B. Shah <viral@mayin.org> <viral@ubuntu-VirtualBox.(none)>
+Viral B. Shah <viral@mayin.org> <viral@juliacomputing.com>
+Viral B. Shah <viral@mayin.org> <ViralBShah@users.noreply.github.com>
 
 George Xing <gxing@mit.edu> <gxing@mit.edu>
 George Xing <gxing@mit.edu> <noobiecubie@gmail.com>
@@ -257,3 +259,27 @@ Curtis Vogt <curtis.vogt@gmail.com> <curtis.vogt@invenia.ca>
 
 Rafael Fourquet <fourquet.rafael@gmail.com> <fourquet.rafael@gmail.com>
 Rafael Fourquet <fourquet.rafael@gmail.com> <fourquet.rafael+github@gmail.com>
+
+Nathan Daly <NHDaly@gmail.com> <NHDaly@gmail.com>
+Nathan Daly <NHDaly@gmail.com> <nhDaly@gmail.com>
+
+Mosè Giordano <mose@gnu.org> <mose@gnu.org>
+Mosè Giordano <mose@gnu.org> <giordano@users.noreply.github.com>
+
+Andy Ferris <ferris.andy@gmail.com> <ferris.andy@gmail.com>
+Andy Ferris <ferris.andy@gmail.com> <andy.ferris@roames.com.au>
+
+David Varela <varela.david.dv@gmail.com> <00.varela.david@gmail.com>
+David Varela <varela.david.dv@gmail.com> <varela.david.dv@gmail.com>
+
+Arch D. Robison <arch.robison@intel.com> <arch.robison@intel.com>
+Arch D. Robison <arch.robison@intel.com> <ArchRobison@users.noreply.github.com>
+
+Matt Bauman <mbauman@gmail.com> <mbauman@gmail.com>
+Matt Bauman <mbauman@gmail.com> <mbauman@juliacomputing.com>
+
+Daniel Karrasch <Daniel.Karrasch@gmx.de> <Daniel.Karrasch@gmx.de>
+Daniel Karrasch <Daniel.Karrasch@gmx.de> <daniel.karrasch@posteo.de>
+
+Roger Luo <rogerluo.rl18@gmail.com> <rogerluo.rl18@gmail.com>
+Roger Luo <rogerluo.rl18@gmail.com> <hiroger@qq.com>
diff --git a/.travis.yml b/.travis.yml
deleted file mode 100644
index 665cebadce6b1a..00000000000000
--- a/.travis.yml
+++ /dev/null
@@ -1,153 +0,0 @@
-language: cpp
-sudo: required
-dist: trusty
-matrix:
-  include:
-    - os: linux
-      env: ARCH="i686"
-      compiler: "g++-5 -m32"
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - ccache
-            - libssl1.0.0
-            - bar
-            - time
-            - binutils
-            - gcc-5
-            - g++-5
-            - gcc-5-multilib
-            - g++-5-multilib
-            - make:i386
-            - libssl-dev:i386
-            - gfortran-5
-            - gfortran-5-multilib
-    - os: linux
-      env: ARCH="x86_64"
-      compiler: "g++-5 -m64"
-      addons:
-        apt:
-          sources:
-            - ubuntu-toolchain-r-test
-          packages:
-            - ccache
-            - libssl1.0.0
-            - bar
-            - time
-            - g++-5
-            - gfortran-5
-    - os: osx
-      env: ARCH="x86_64"
-      osx_image: xcode8.3
-cache: ccache
-branches:
-  only:
-    - master
-    - /^release-.*/
-    - /^v\d+\.\d+\.\d+$/
-notifications:
-    email: false
-before_install:
-    - make check-whitespace
-    - if [ `uname` = "Linux" ]; then
-        contrib/travis_fastfail.sh || exit 1;
-        mkdir -p $HOME/bin;
-        ln -s /usr/bin/gcc-5 $HOME/bin/gcc;
-        ln -s /usr/bin/g++-5 $HOME/bin/g++;
-        ln -s /usr/bin/gfortran-5 $HOME/bin/gfortran;
-        ln -s /usr/bin/gcc-5 $HOME/bin/x86_64-linux-gnu-gcc;
-        ln -s /usr/bin/g++-5 $HOME/bin/x86_64-linux-gnu-g++;
-        gcc --version;
-        BAR="bar -i 30";
-        BUILDOPTS="-j5 VERBOSE=1 FORCE_ASSERTIONS=1 LLVM_ASSERTIONS=1 USECCACHE=1 USE_BINARYBUILDER_LIBUV=0 USE_BINARYBUILDER_LIBUNWIND=0 USE_BINARYBUILDER_LLVM=0";
-        echo "override ARCH=$ARCH" >> Make.user;
-        sudo sh -c "echo 0 > /proc/sys/net/ipv6/conf/lo/disable_ipv6";
-        export JULIA_CPU_THREADS=4;
-        export JULIA_TEST_MAXRSS_MB=1200;
-        TESTSTORUN="all";
-      elif [ `uname` = "Darwin" ]; then
-        brew update;
-        brew install -v jq pv ccache;
-        export PATH="$(brew --prefix ccache)/libexec:$PATH";
-        BAR="pv -i 30";
-        contrib/travis_fastfail.sh || exit 1;
-        brew rm --force gcc gmp mpfr pcre2;
-        brew install -v gcc gmp mpfr pcre2;
-        BUILDOPTS="-j3 USECLANG=1 USECCACHE=1 VERBOSE=1 FORCE_ASSERTIONS=1";
-        for proj in LLVM LLVM_ASSERTS OPENBLAS SUITESPARSE OPENLIBM; do
-            BUILDOPTS="$BUILDOPTS USE_BINARYBUILDER_${proj}=1";
-        done;
-        for lib in GMP MPFR LIBUNWIND; do
-            BUILDOPTS="$BUILDOPTS USE_SYSTEM_$lib=1";
-        done;
-        spawn_DYLD_FALLBACK_LIBRARY_PATH="/usr/local/lib:/lib:/usr/lib";
-        export JULIA_MACOS_SPAWN="DYLD_FALLBACK_LIBRARY_PATH=\"$spawn_DYLD_FALLBACK_LIBRARY_PATH\" \$1";
-        export BUILDOPTS="$BUILDOPTS spawn=\$(JULIA_MACOS_SPAWN)";
-        export JULIA_CPU_THREADS=2;
-        export JULIA_TEST_MAXRSS_MB=600;
-        TESTSTORUN="all --skip linalg/triangular subarray"; fi # TODO: re enable these if possible without timing out
-    - echo "override JULIA_CPU_TARGET=generic;native" >> Make.user
-    - wget http://http.debian.net/debian/pool/main/m/moreutils/moreutils_0.62.orig.tar.xz
-    - tar -xJvf moreutils_0.62.orig.tar.xz && mv moreutils-0.62 moreutils
-script:
-    - echo BUILDOPTS=$BUILDOPTS
-    - export BUILDOPTS
-    # compile / install dependencies
-    - contrib/download_cmake.sh
-    - make -C moreutils mispipe
-    - make $BUILDOPTS -C base version_git.jl.phony
-    # capture the log, but only print it if `make deps` fails
-    # try to show the end of the log first, because this log might be very long (> 4MB)
-    # and thus be truncated by travis
-    - moreutils/mispipe "make \$BUILDOPTS NO_GIT=1 -C deps 2> deps-err.log" "$BAR" > deps.log ||
-        { echo "-- deps build log stderr tail 100 --------------------------------------";
-          tail -n 100 deps-err.log;
-          echo "-- deps build log stdout tail 100 --------------------------------------";
-          tail -n 100 deps.log;
-          echo "-- deps build log stderr all -------------------------------------------";
-          cat deps-err.log;
-          echo "-- deps build log stdout all -------------------------------------------";
-          cat deps.log;
-          echo "-- end of deps build log -----------------------------------------------";
-          false; }
-    # compile / install Julia
-    - make $BUILDOPTS NO_GIT=1 prefix=/tmp/julia release | moreutils/ts -s "%.s"
-    - make $BUILDOPTS NO_GIT=1 prefix=/tmp/julia install | moreutils/ts -s "%.s"
-    - make $BUILDOPTS NO_GIT=1 build-stats
-    - du -sk /tmp/julia/*
-    - ls -l /tmp/julia/lib
-    - ls -l /tmp/julia/lib/julia
-    - FILES_CHANGED=$(git diff --name-only $TRAVIS_COMMIT_RANGE -- || git ls-files)
-    - cd .. && mv julia julia2
-    # run tests
-    - /tmp/julia/bin/julia --sysimage-native-code=no -e 'true'
-    # - /tmp/julia/bin/julia-debug --sysimage-native-code=no -e 'true'
-    - /tmp/julia/bin/julia -e 'Base.require(Main, :InteractiveUtils).versioninfo()'
-    - pushd /tmp/julia/share/julia/test
-    # skip tests if only files within the "doc" dir have changed
-    - if [ $(echo "$FILES_CHANGED" | grep -cv '^doc/') -gt 0 ]; then
-        /tmp/julia/bin/julia --check-bounds=yes runtests.jl $TESTSTORUN &&
-        /tmp/julia/bin/julia --check-bounds=yes runtests.jl LibGit2/online download; fi
-    - popd
-    # test that the embedding code works on our installation
-    - mkdir /tmp/embedding-test &&
-        make check -C /tmp/julia/share/julia/test/embedding
-             JULIA="/tmp/julia/bin/julia"
-             BIN=/tmp/embedding-test
-             "$(cd julia2 && make print-CC)"
-    # restore initial state and prepare for travis caching
-    - mv julia2 julia &&
-        rm -f julia/deps/scratch/libgit2-*/CMakeFiles/CMakeOutput.log
-    # run the LLVM tests on Linux
-    - if [ `uname` = "Linux" ]; then
-        pushd julia && make -C test/llvmpasses && popd; fi
-    # run the doctests on Linux 64-bit
-    - if [ `uname` = "Linux" ] && [ $ARCH = "x86_64" ]; then
-        pushd julia && make -C doc doctest=true && popd; fi
-# uncomment the following if failures are suspected to be due to the out-of-memory killer
-#    - dmesg
-after_success:
-    - if [ `uname` = "Linux" ] && [ $ARCH = "x86_64" ]; then
-        cd julia && make -C doc deploy; fi
diff --git a/CITATION.bib b/CITATION.bib
index af8cffe7aa5248..f1361a1eea0b89 100644
--- a/CITATION.bib
+++ b/CITATION.bib
@@ -8,68 +8,8 @@ @article{Julia-2017
     pages={65--98},
     year={2017},
     publisher={SIAM},
-    doi={10.1137/141000671}
+    doi={10.1137/141000671},
+    url={https://epubs.siam.org/doi/10.1137/141000671}
 }
 
-% The following citations are about specific aspects of Julia.
-
-@article{Julia-2019-a,
-   author = {Bezanson, Jeff and Chen, Jiahao and Chung, Benjamin and Karpinski, Stefan and Shah, Viral B. and Vitek, Jan and Zoubritzky, Lionel},
-   title = {Julia: Dynamism and Performance Reconciled by Design},
-   journal = {Proc. ACM Program. Lang.},
-   issue_date = {November 2018},
-   volume = {2},
-   number = {OOPSLA},
-   month = oct,
-   year = {2018},
-   issn = {2475-1421},
-   pages = {120:1--120:23},
-   articleno = {120},
-   numpages = {23},
-   url = {https://doi.acm.org/10.1145/3276490},
-   doi = {10.1145/3276490},
-   acmid = {3276490},
-   publisher = {ACM},
-   address = {New York, NY, USA},
-   keywords = {dynamic languages, just-in-time compilation, multiple dispatch},
-} 
-
-@article{Julia-2019-b,
-   author = {Zappa Nardelli, Francesco and Belyakova, Julia and Pelenitsyn, Artem and Chung, Benjamin and Bezanson, Jeff and Vitek, Jan},
-   title = {Julia Subtyping: A Rational Reconstruction},
-   journal = {Proc. ACM Program. Lang.},
-   issue_date = {November 2018},
-   volume = {2},
-   number = {OOPSLA},
-   month = oct,
-   year = {2018},
-   issn = {2475-1421},
-   pages = {113:1--113:27},
-   articleno = {113},
-   numpages = {27},
-   url = {https://doi.acm.org/10.1145/3276483},
-   doi = {10.1145/3276483},
-   acmid = {3276483},
-   publisher = {ACM},
-   address = {New York, NY, USA},
-   keywords = {Multiple Dispatch, Subtyping},
-} 
-
-@inproceedings{Julia-2014,
-   author = {Bezanson, Jeff and Chen, Jiahao and Karpinski, Stefan and Shah, Viral and Edelman, Alan},
-   title = {Array Operators Using Multiple Dispatch: A Design Methodology for Array Implementations in Dynamic Languages},
-   booktitle = {Proceedings of ACM SIGPLAN International Workshop on Libraries, Languages, and Compilers for Array Programming},
-   series = {ARRAY'14},
-   year = {2014},
-   isbn = {978-1-4503-2937-8},
-   location = {Edinburgh, United Kingdom},
-   pages = {56:56--56:61},
-   articleno = {56},
-   numpages = {6},
-   url = {https://doi.acm.org/10.1145/2627373.2627383},
-   doi = {10.1145/2627373.2627383},
-   acmid = {2627383},
-   publisher = {ACM},
-   address = {New York, NY, USA},
-   keywords = {Julia, array indexing, dynamic dispatch, multiple dispatch, static analysis, type inference},
-} 
+% For more details on research related to Julia, see https://julialang.org/research
diff --git a/CITATION.cff b/CITATION.cff
new file mode 100644
index 00000000000000..c88727bcfa3114
--- /dev/null
+++ b/CITATION.cff
@@ -0,0 +1,40 @@
+cff-version: 1.2.0
+message: "Cite this paper whenever you use Julia"
+authors:
+- family-names: "Bezanson"
+  given-names: "Jeff"
+- family-names: "Edelman"
+  given-names: "Alan"
+- family-names: "Karpinski"
+  given-names: "Stefan"
+- family-names: "Shah"
+  given-names: "Viral B."
+title: "Julia: A fresh approach to numerical computing"
+version: "v1"
+license: "MIT"
+doi: "10.1137/141000671"
+date-released: 2017-02-07
+url: "https://julialang.org"
+preferred-citation:
+  authors:
+    - family-names: "Bezanson"
+      given-names: "Jeff"
+    - family-names: "Edelman"
+      given-names: "Alan"
+    - family-names: "Karpinski"
+      given-names: "Stefan"
+    - family-names: "Shah"
+      given-names: "Viral B."
+  doi: "10.1137/141000671"
+  journal: "SIAM Review"
+  month: 9
+  start: 65
+  end: 98
+  pages: 33
+  title: "Julia: A fresh approach to numerical computing"
+  type: article
+  volume: 59
+  issue: 1
+  year: 2017
+  publisher:
+    name: "SIAM"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 63ea55230ddd82..2c924b2cdabb9b 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -6,7 +6,7 @@ If you are already familiar with Julia itself, this blog post by Katharine Hyatt
 
 ## Learning Julia
 
-[The learning page](https://julialang.org/learning) has a great list of resources for new and experienced users alike. [This tutorial video](https://www.youtube.com/watch?v=vWkgEddb4-A) is one recommended starting point, as is the "[Invitation to Julia](https://www.youtube.com/watch?v=gQ1y5NUD_RI)" workshop video from JuliaCon 2015  ([slide materials here](https://github.com/dpsanders/invitation_to_julia)). The [Julia documentation](https://docs.julialang.org) covers the language and core library features, and is searchable.
+[The learning page](https://julialang.org/learning) has a great list of resources for new and experienced users alike.
 
 ## Before filing an issue
 
@@ -90,7 +90,7 @@ from Julia's root directory. This will rebuild the Julia system image, then inst
 
 > **Note**
 >
-> When making changes to any of Julia's documentation it is recommended that you run `make docs` to check the your changes are valid and do not produce any errors before opening a pull request.
+> When making changes to any of Julia's documentation it is recommended that you run `make docs` to check that your changes are valid and do not produce any errors before opening a pull request.
 
 Below are outlined the three most common types of documentation changes and the steps required to perform them. Please note that the following instructions do not cover the full range of features provided by Documenter.jl. Refer to [Documenter's documentation](https://juliadocs.github.io/Documenter.jl/stable) if you encounter anything that is not covered by the sections below.
 
@@ -158,7 +158,9 @@ Examples written within docstrings can be used as testcases known as "doctests"
     "DOCSTRING TEST"
     ```
 
-A doctest needs to match an interactive REPL including the `julia>` prompt. To run doctests you need to run `make -C doc doctest=true` from the root directory. It is recommended to add the header `# Examples` above the doctests.
+A doctest needs to match an interactive REPL including the `julia>` prompt. It is recommended to add the header `# Examples` above the doctests.
+
+To run doctests you need to run `make -C doc doctest=true` from the root directory. You can use `make -C doc doctest=true revise=true` if you are modifying the doctests and don't want to rebuild Julia after each change (see details below about the Revise.jl workflow).
 
 #### News-worthy changes
 
@@ -189,7 +191,7 @@ Note: These instructions are for adding to or improving functionality in the bas
 
 Add new code to Julia's base libraries as follows (this is the "basic" approach; see a more efficient approach in the next section):
 
- 1. Edit the appropriate file in the `base/` directory, or add new files if necessary. Create tests for your functionality and add them to files in the `test/` directory. If you're editing C or Scheme code, most likely it lives in `src/` or one of its subdirectories, although some aspects of Julia's REPL initialization live in `ui/`.
+ 1. Edit the appropriate file in the `base/` directory, or add new files if necessary. Create tests for your functionality and add them to files in the `test/` directory. If you're editing C or Scheme code, most likely it lives in `src/` or one of its subdirectories, although some aspects of Julia's REPL initialization live in `cli/`.
 
  2. Add any new files to `sysimg.jl` in order to build them into the Julia system image.
 
@@ -207,8 +209,6 @@ or with the `runtests.jl` script, e.g. to run `test/bitarray.jl` and `test/math.
 
     ./usr/bin/julia test/runtests.jl bitarray math
 
-Make sure that [Travis](https://www.travis-ci.org) greenlights the pull request with a [`Good to merge` message](https://blog.travis-ci.com/2012-09-04-pull-requests-just-got-even-more-awesome).
-
 #### Modifying base more efficiently with Revise.jl
 
 [Revise](https://github.com/timholy/Revise.jl) is a package that
@@ -248,6 +248,51 @@ process before running the corresponding test. This can be useful as a shortcut
 on the command line (since tests aren't always designed to be run outside the
 runtest harness).
 
+### Contributing to patch releases
+
+The process of creating a patch release is roughly as follows:
+
+1. Create a new branch (e.g. `backports-release-1.6`) against the relevant minor release
+   branch (e.g. `release-1.6`). Usually a corresponding pull request is created as well.
+
+2. Add commits, nominally from `master` (hence "backports"), to that branch.
+   See below for more information on this process.
+
+3. Run the [BaseBenchmarks.jl](https://github.com/JuliaCI/BaseBenchmarks.jl) benchmark
+   suite and [PkgEval.jl](https://github.com/JuliaCI/PkgEval.jl) package ecosystem
+   exerciser against that branch. Nominally BaseBenchmarks.jl and PkgEval.jl are
+   invoked via [Nanosoldier.jl](https://github.com/JuliaCI/Nanosoldier.jl) from
+   the pull request associated with the backports branch. Fix any issues.
+
+4. Once all test and benchmark reports look good, merge the backports branch into
+   the corresponding release branch (e.g. merge `backports-release-1.6` into
+   `release-1.6`).
+
+5. Open a pull request that bumps the version of the relevant minor release to the
+   next patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37718).
+
+6. Ping `@JuliaLang/releases` to tag the patch release and update the website.
+
+7. Open a pull request that bumps the version of the relevant minor release to the
+   next prerelase patch version, e.g. as in [this pull request](https://github.com/JuliaLang/julia/pull/37724).
+
+Step 2 above, i.e. backporting commits to the `backports-release-X.Y` branch, has largely
+been automated via [`Backporter`](https://github.com/KristofferC/Backporter): Backporter
+searches for merged pull requests with the relevant `backport-X.Y` tag, and attempts to
+cherry-pick the commits from those pull requests onto the `backports-release-X.Y` branch.
+Some commits apply successfully without intervention, others not so much. The latter
+commits require "manual" backporting, with which help is generally much appreciated.
+Backporter generates a report identifying those commits it managed to backport automatically
+and those that require manual backporting; this report is usually copied into the first
+post of the pull request associated with `backports-release-X.Y` and maintained as
+additional commits are automatically and/or manually backported.
+
+When contributing a manual backport, if you have the necessary permissions, please push the
+backport directly to the `backports-release-X.Y` branch. If you lack the relevant
+permissions, please open a pull request against the `backports-release-X.Y` branch with the
+manual backport. Once the manual backport is live on the `backports-release-X.Y` branch,
+please remove the `backport-X.Y` tag from the originating pull request for the commits.
+
 ### Code Formatting Guidelines
 
 #### General Formatting Guidelines for Julia code contributions
@@ -289,6 +334,11 @@ runtest harness).
    - To remove whitespace relative to the `master` branch, run
      `git rebase --whitespace=fix master`.
 
+#### Git Recommendations For Pull Request Reviewers
+
+- When merging, we generally like `squash+merge`. Unless it is the rare case of a PR with carefully staged individual commits that you want in the history separately, in which case `merge` is acceptable, but usually prefer `squash+merge`.
+
+
 ## Resources
 
 * Julia
@@ -299,7 +349,7 @@ runtest harness).
   - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
 
 * Design of Julia
-  - [Julia: A Fresh Approach to Numerical Computing](https://julialang.org/research/julia-fresh-approach-BEKS.pdf)
+  - [Julia: A Fresh Approach to Numerical Computing](https://julialang.org/assets/research/julia-fresh-approach-BEKS.pdf)
   - [Julia: Dynamism and Performance Reconciled by Design](http://janvitek.org/pubs/oopsla18b.pdf)
   - [All Julia Publications](https://julialang.org/research)
 
diff --git a/HISTORY.md b/HISTORY.md
index 3e9142291dc91a..1fcb416d4d47f3 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,3 +1,1022 @@
+Julia v1.8 Release Notes
+========================
+
+New language features
+---------------------
+
+* Mutable struct fields may now be annotated as `const` to prevent changing them after construction,
+  providing for greater clarity and optimization ability of these objects ([#43305]).
+* Type annotations can now be added to global variables to make accessing them type stable ([#43671]).
+* Empty n-dimensional arrays can now be created using multiple semicolons inside square brackets,
+  e.g. `[;;;]` creates a 0×0×0 `Array` ([#41618]).
+* `try`-blocks can now optionally have an `else`-block which is executed right after the main body only if
+  no errors were thrown ([#42211]).
+* `@inline` and `@noinline` annotations can now be placed within a function body ([#41312]).
+* `@inline` and `@noinline` annotations can now be applied to a function call site or block
+  to enforce the involved function calls to be (or not to be) inlined ([#41328]).
+* `∀`, `∃`, and `∄` are now allowed as identifier characters ([#42314]).
+* Support for Unicode 14.0.0 ([#43443]).
+* `Module(:name, false, false)` can be used to create a `module` that contains no names
+  (it does not import `Base` or `Core` and does not contain a reference to itself) ([#40110], [#42154]).
+
+Language changes
+----------------
+
+* Newly-created Task objects (`@spawn`, `@async`, etc.) now adopt the world age for methods from their parent
+  Task upon creation, instead of using the global latest world at start. This is done to enable inference to
+  eventually optimize these calls. Places that wish for the old behavior may use `Base.invokelatest` ([#41449]).
+* Unbalanced Unicode bidirectional formatting directives are now disallowed within strings and comments,
+  to mitigate the ["trojan source"](https://www.trojansource.codes) vulnerability ([#42918]).
+* `Base.ifelse` is now defined as a generic function rather than a builtin one, allowing packages to
+  extend its definition ([#37343]).
+* Every assignment to a global variable now first goes through a call to `convert(Any, x)` (or `convert(T, x)`
+  respectively if a type `T` has been declared for the global). This means great care should be taken
+  to ensure the invariant `convert(Any, x) === x` always holds, as this change could otherwise lead to
+  unexpected behavior ([#43671]).
+* Builtin functions are now a bit more like generic functions, and can be enumerated with `methods` ([#43865]).
+
+Compiler/Runtime improvements
+-----------------------------
+
+* Bootstrapping time has been improved by about 25% ([#41794]).
+* The LLVM-based compiler has been separated from the run-time library into a new library,
+  `libjulia-codegen`. It is loaded by default, so normal usage should see no changes.
+  In deployments that do not need the compiler (e.g. system images where all needed code
+  is precompiled), this library (and its LLVM dependency) can simply be excluded ([#41936]).
+* Conditional type constraints can now be forwarded interprocedurally (i.e. propagated from caller to callee) ([#42529]).
+* Julia-level SROA (Scalar Replacement of Aggregates) has been improved: allowing elimination of
+  `getfield` calls with constant global fields ([#42355]), enabling elimination of mutable structs with
+  uninitialized fields ([#43208]), improving performance ([#43232]), and handling more nested `getfield`
+  calls ([#43239]).
+* Abstract call sites can now be inlined or statically resolved as long as the call site has a single
+  matching method ([#43113]).
+* Inference now tracks various effects such as side-effectful-ness and nothrow-ness on a per-specialization basis.
+  Code heavily dependent on constant propagation should see significant compile-time performance improvements and
+  certain cases (e.g. calls to uninlinable functions that are nevertheless effect free) should see runtime performance
+  improvements. Effects may be overwritten manually with the `@Base.assume_effects` macro ([#43852]).
+
+Command-line option changes
+---------------------------
+
+* The default behavior of observing `@inbounds` declarations is now an option via `auto` in `--check-bounds=yes|no|auto` ([#41551]).
+* New option `--strip-metadata` to remove docstrings, source location information, and local
+  variable names when building a system image ([#42513]).
+* New option `--strip-ir` to remove the compiler's IR (intermediate representation) of source
+  code when building a system image. The resulting image will only work if `--compile=all` is
+  used, or if all needed code is precompiled ([#42925]).
+* When the program file is `-` the code to be executed is read from standard in ([#43191]).
+
+Multi-threading changes
+-----------------------
+
+* `Threads.@threads` now defaults to a new `:dynamic` schedule option which is similar to the previous behavior except
+  that iterations will be scheduled dynamically to available worker threads rather than pinned to each thread. This
+  behavior is more composable with (possibly nested) `@spawn` and `@threads` loops ([#43919], [#44136]).
+
+Build system changes
+--------------------
+
+
+New library functions
+---------------------
+
+* New function `eachsplit(str)` for iteratively performing `split(str)` ([#39245]).
+* New function `allequal(itr)` for testing if all elements in an iterator are equal ([#43354]).
+* `hardlink(src, dst)` can be used to create hard links ([#41639]).
+* `setcpuaffinity(cmd, cpus)` can be used to set CPU affinity of sub-processes ([#42469]).
+* `diskstat(path=pwd())` can be used to return statistics about the disk ([#42248]).
+* New `@showtime` macro to show both the line being evaluated and the `@time` report ([#42431]).
+* The `LazyString` and the `lazy"str"` macro were added to support delayed construction of error messages in error paths ([#33711]).
+
+New library features
+--------------------
+
+* `@time` and `@timev` now take an optional description to allow annotating the source of time reports,
+  e.g. `@time "Evaluating foo" foo()` ([#42431]).
+* `range` accepts either `stop` or `length` as a sole keyword argument ([#39241]).
+* `precision` and `setprecision` now accept a `base` keyword argument ([#42428]).
+* TCP socket objects now expose `closewrite` functionality and support half-open mode usage ([#40783]).
+* `extrema` now accepts an `init` keyword argument ([#36265], [#43604]).
+* `Iterators.countfrom` now accepts any type that defines `+` ([#37747]).
+
+Standard library changes
+------------------------
+
+* Keys with value `nothing` are now removed from the environment in `addenv` ([#43271]).
+* `Iterators.reverse` (and hence `last`) now supports `eachline` iterators ([#42225]).
+* The `length` function on certain ranges of certain element types no longer checks for integer
+  overflow in most cases. The new function `checked_length` is now available, which will try to use checked
+  arithmetic to error if the result may be wrapping. Or use a package such as SaferIntegers.jl when
+  constructing the range ([#40382]).
+* Intersect returns a result with the eltype of the type-promoted eltypes of the two inputs ([#41769]).
+* Iterating an `Iterators.Reverse` now falls back on reversing the eachindex iterator, if possible ([#43110]).
+
+#### InteractiveUtils
+
+* New macro `@time_imports` for reporting any time spent importing packages and their dependencies ([#41612]).
+
+#### LinearAlgebra
+
+* The BLAS submodule now supports the level-2 BLAS subroutine `spr!` ([#42830]).
+* `cholesky[!]` now supports `LinearAlgebra.PivotingStrategy` (singleton type) values
+  as its optional `pivot` argument: the default is `cholesky(A, NoPivot())` (vs.
+  `cholesky(A, RowMaximum())`); the former `Val{true/false}`-based calls are deprecated ([#41640]).
+* The standard library `LinearAlgebra.jl` is now completely independent of `SparseArrays.jl`,
+  both in terms of the source code as well as unit testing ([#43127]). As a consequence,
+  sparse arrays are no longer (silently) returned by methods from `LinearAlgebra` applied
+  to `Base` or `LinearAlgebra` objects. Specifically, this results in the following breaking
+  changes:
+  * Concatenations involving special "sparse" matrices (`*diagonal`) now return dense matrices;
+    As a consequence, the `D1` and `D2` fields of `SVD` objects, constructed upon `getproperty`
+    calls are now dense matrices.
+  * 3-arg `similar(::SpecialSparseMatrix, ::Type, ::Dims)` returns a dense zero matrix.
+    As a consequence, products of bi-, tri- and symmetric tridiagonal matrices with each
+    other result in dense output. Moreover, constructing 3-arg similar matrices of special
+    "sparse" matrices of (nonstatic) matrices now fails for the lack of `zero(::Type{Matrix{T}})`.
+
+#### Printf
+
+* Now uses `textwidth` for formatting `%s` and `%c` widths ([#41085]).
+
+#### Profile
+
+* CPU profiling now records sample metadata including thread and task. `Profile.print()` has a new `groupby` kwarg that allows
+  grouping by thread, task, or nested thread/task, task/thread, and `threads` and `tasks` kwargs to allow filtering.
+  Further, percent utilization is now reported as a total or per-thread, based on whether the thread is idle or not at
+  each sample. `Profile.fetch()` includes the new metadata by default. For backwards compatibility with external
+  profiling data consumers, it can be excluded by passing `include_meta=false` ([#41742]).
+* The new `Profile.Allocs` module allows memory allocations to be profiled. The stack trace, type, and size of each
+  allocation is recorded, and a `sample_rate` argument allows a tunable amount of allocations to be skipped,
+  reducing performance overhead ([#42768]).
+* A fixed duration cpu profile can now be triggered by the user during running tasks without `Profile` being loaded
+  first and the report will show during execution. On MacOS & FreeBSD press `ctrl-t` or raise a `SIGINFO`.
+  For other platforms raise a `SIGUSR1` i.e. `% kill -USR1 $julia_pid`. Not currently available on windows ([#43179]).
+
+#### REPL
+
+* `RadioMenu` now supports optional `keybindings` to directly select options ([#41576]).
+* ` ?(x, y` followed by TAB displays all methods that can be called
+  with arguments `x, y, ...`. (The space at the beginning prevents entering help-mode.)
+  `MyModule.?(x, y` limits the search to `MyModule`. TAB requires that at least one
+  argument have a type more specific than `Any`; use SHIFT-TAB instead of TAB
+  to allow any compatible methods ([#38791]).
+* New `err` global variable in `Main` set when an expression throws an exception, akin to `ans`. Typing `err` reprints
+  the exception information ([#40642]).
+
+#### SparseArrays
+
+* The code for SparseArrays has been moved from the Julia repo to the external
+  repo at https://github.com/JuliaSparse/SparseArrays.jl. This is only a code
+  movement and does not impact any usage ([#43813]).
+* New sparse concatenation functions `sparse_hcat`, `sparse_vcat`, and `sparse_hvcat` return
+  `SparseMatrixCSC` output independent from the types of the input arguments. They make
+  concatenation behavior available, in which the presence of some special "sparse" matrix
+  argument resulted in sparse output by multiple dispatch. This is no longer possible after
+  making `LinearAlgebra.jl` independent from `SparseArrays.jl` ([#43127]).
+
+#### Logging
+
+* The standard log levels `BelowMinLevel`, `Debug`, `Info`, `Warn`, `Error`,
+  and `AboveMaxLevel` are now exported from the Logging stdlib ([#40980]).
+
+#### Unicode
+
+* Added function `isequal_normalized` to check for Unicode equivalence without
+  explicitly constructing normalized strings ([#42493]).
+* The `Unicode.normalize` function now accepts a `chartransform` keyword that can
+  be used to supply custom character mappings, and a `Unicode.julia_chartransform`
+  function is provided to reproduce the mapping used in identifier normalization
+  by the Julia parser ([#42561]).
+
+#### Test
+
+* `@test_throws "some message" triggers_error()` can now be used to check whether the displayed error text
+  contains "some message" regardless of the specific exception type.
+  Regular expressions, lists of strings, and matching functions are also supported ([#41888]).
+* `@testset foo()` can now be used to create a test set from a given function. The name of the test set
+  is the name of the called function. The called function can contain `@test` and other `@testset`
+  definitions, including to other function calls, while recording all intermediate test results ([#42518]).
+* `TestLogger` and `LogRecord` are now exported from the Test stdlib ([#44080]).
+
+Deprecated or removed
+---------------------
+
+
+External dependencies
+---------------------
+
+
+Tooling Improvements
+---------------------
+
+* `GC.enable_logging(true)` can be used to log each garbage collection, with the
+  time it took and the amount of memory that was collected ([#43511]).
+
+<!--- generated by NEWS-update.jl: -->
+[#33711]: https://github.com/JuliaLang/julia/issues/33711
+[#36265]: https://github.com/JuliaLang/julia/issues/36265
+[#37343]: https://github.com/JuliaLang/julia/issues/37343
+[#37747]: https://github.com/JuliaLang/julia/issues/37747
+[#38791]: https://github.com/JuliaLang/julia/issues/38791
+[#39241]: https://github.com/JuliaLang/julia/issues/39241
+[#39245]: https://github.com/JuliaLang/julia/issues/39245
+[#40110]: https://github.com/JuliaLang/julia/issues/40110
+[#40382]: https://github.com/JuliaLang/julia/issues/40382
+[#40642]: https://github.com/JuliaLang/julia/issues/40642
+[#40783]: https://github.com/JuliaLang/julia/issues/40783
+[#40980]: https://github.com/JuliaLang/julia/issues/40980
+[#41085]: https://github.com/JuliaLang/julia/issues/41085
+[#41312]: https://github.com/JuliaLang/julia/issues/41312
+[#41328]: https://github.com/JuliaLang/julia/issues/41328
+[#41449]: https://github.com/JuliaLang/julia/issues/41449
+[#41551]: https://github.com/JuliaLang/julia/issues/41551
+[#41576]: https://github.com/JuliaLang/julia/issues/41576
+[#41612]: https://github.com/JuliaLang/julia/issues/41612
+[#41618]: https://github.com/JuliaLang/julia/issues/41618
+[#41639]: https://github.com/JuliaLang/julia/issues/41639
+[#41640]: https://github.com/JuliaLang/julia/issues/41640
+[#41742]: https://github.com/JuliaLang/julia/issues/41742
+[#41769]: https://github.com/JuliaLang/julia/issues/41769
+[#41794]: https://github.com/JuliaLang/julia/issues/41794
+[#41888]: https://github.com/JuliaLang/julia/issues/41888
+[#41936]: https://github.com/JuliaLang/julia/issues/41936
+[#42154]: https://github.com/JuliaLang/julia/issues/42154
+[#42211]: https://github.com/JuliaLang/julia/issues/42211
+[#42225]: https://github.com/JuliaLang/julia/issues/42225
+[#42248]: https://github.com/JuliaLang/julia/issues/42248
+[#42314]: https://github.com/JuliaLang/julia/issues/42314
+[#42355]: https://github.com/JuliaLang/julia/issues/42355
+[#42428]: https://github.com/JuliaLang/julia/issues/42428
+[#42431]: https://github.com/JuliaLang/julia/issues/42431
+[#42469]: https://github.com/JuliaLang/julia/issues/42469
+[#42493]: https://github.com/JuliaLang/julia/issues/42493
+[#42513]: https://github.com/JuliaLang/julia/issues/42513
+[#42518]: https://github.com/JuliaLang/julia/issues/42518
+[#42529]: https://github.com/JuliaLang/julia/issues/42529
+[#42561]: https://github.com/JuliaLang/julia/issues/42561
+[#42768]: https://github.com/JuliaLang/julia/issues/42768
+[#42830]: https://github.com/JuliaLang/julia/issues/42830
+[#42918]: https://github.com/JuliaLang/julia/issues/42918
+[#42925]: https://github.com/JuliaLang/julia/issues/42925
+[#43110]: https://github.com/JuliaLang/julia/issues/43110
+[#43113]: https://github.com/JuliaLang/julia/issues/43113
+[#43127]: https://github.com/JuliaLang/julia/issues/43127
+[#43179]: https://github.com/JuliaLang/julia/issues/43179
+[#43191]: https://github.com/JuliaLang/julia/issues/43191
+[#43208]: https://github.com/JuliaLang/julia/issues/43208
+[#43232]: https://github.com/JuliaLang/julia/issues/43232
+[#43239]: https://github.com/JuliaLang/julia/issues/43239
+[#43271]: https://github.com/JuliaLang/julia/issues/43271
+[#43305]: https://github.com/JuliaLang/julia/issues/43305
+[#43354]: https://github.com/JuliaLang/julia/issues/43354
+[#43443]: https://github.com/JuliaLang/julia/issues/43443
+[#43511]: https://github.com/JuliaLang/julia/issues/43511
+[#43604]: https://github.com/JuliaLang/julia/issues/43604
+[#43671]: https://github.com/JuliaLang/julia/issues/43671
+[#43813]: https://github.com/JuliaLang/julia/issues/43813
+[#43852]: https://github.com/JuliaLang/julia/issues/43852
+[#43865]: https://github.com/JuliaLang/julia/issues/43865
+[#43919]: https://github.com/JuliaLang/julia/issues/43919
+[#44080]: https://github.com/JuliaLang/julia/issues/44080
+[#44136]: https://github.com/JuliaLang/julia/issues/44136
+
+Julia v1.7 Release Notes
+========================
+
+New language features
+---------------------
+
+* `(; a, b) = x` can now be used to destructure properties `a` and `b` of `x`.
+  This syntax is equivalent to `a = getproperty(x, :a); b = getproperty(x, :b)` ([#39285]).
+* Implicit multiplication by juxtaposition is now allowed for radical symbols (e.g. `x√y` and `x∛y`) ([#40173]).
+* The short-circuiting operators `&&` and `||` can now be dotted to participate in broadcast fusion
+  as `.&&` and `.||` ([#39594]).
+* `⫪` (U+2AEA, `\Top`, `\downvDash`) and `⫫` (U+2AEB, `\Bot`, `\upvDash`, `\indep`)
+  may now be used as binary operators with comparison precedence ([#39403]).
+* Repeated semicolons can now be used inside array concatenation expressions to separate dimensions
+  of an array, with the number of semicolons specifying the dimension. Just as a single semicolon
+  in `[A; B]` has always described concatenating in the first dimension (vertically), now two
+  semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the
+  third, and so on ([#33697]).
+* A backslash (`\`) before a newline inside a string literal now removes the newline while also
+  respecting indentation. This can be used to split up long strings without newlines into multiple
+  lines of code ([#40753]).
+* A backslash before a newline in command literals now always removes the newline, similar to standard string
+  literals, whereas the result was not well-defined before ([#40753]).
+
+Language changes
+----------------
+
+* `macroexpand`, `@macroexpand`, and `@macroexpand1` no longer wrap errors in a `LoadError`.
+  To reduce breakage, `@test_throws` has been modified so that many affected tests will still pass ([#38379]).
+* The middle dot `·` (`\cdotp` U+00b7) and the Greek interpunct `·` (U+0387) are now treated as equivalent to
+  the dot operator `⋅` (`\cdot` U+22c5) (#25157).
+* The minus sign `−` (`\minus` U+2212) is now treated as equivalent to the hyphen-minus sign `-` (U+002d) ([#40948]).
+* Destructuring will no longer mutate values on the left-hand side while iterating through values on
+  the right-hand side. In the example of an array `x`, `x[2], x[1] = x` will now swap the first and
+  second elements of `x`, whereas it used to fill both entries with `x[1]` because `x[2]` was mutated during
+  the iteration of `x` ([#40737]).
+* The default random number generator has changed, so all random numbers will be different (even with the
+  same seed) unless an explicit RNG object is used.
+  See the section on the `Random` standard library below ([#40546]).
+* `Iterators.peel(itr)` now returns `nothing` when `itr` is empty instead of throwing a `BoundsError` ([#39607]).
+* Multiple successive semicolons in an array expresion were previously ignored (e.g., `[1 ;; 2] == [1 ; 2]`).
+  This syntax is now used to separate dimensions (see **New language features**).
+
+Compiler/Runtime improvements
+-----------------------------
+
+
+Command-line option changes
+---------------------------
+
+* The Julia `--project` option and the `JULIA_PROJECT` environment variable now support selecting shared
+  environments like `.julia/environments/myenv` the same way the package management console does:
+  use `julia --project=@myenv` resp. `export JULIA_PROJECT="@myenv"` ([#40025]).
+
+Multi-threading changes
+-----------------------
+
+* Intrinsics for atomic pointer operations are now defined for certain byte sizes ([#37847]).
+* Support for declaring and using individual fields of a mutable struct as atomic has been
+  added; see the new `@atomic` macro ([#37847]).
+* If the `JULIA_NUM_THREADS` environment variable is set to `auto`, then the
+  number of threads will be set to the number of CPU threads ([#38952]).
+* Every `Task` object has a local random number generator state, providing
+  reproducible (schedule-independent) execution of parallel simulation code by
+  default. The default generator is also significantly faster in parallel than
+  in previous versions ([#40546]).
+* Tasks can now migrate among threads when they are re-scheduled. Previously, a Task
+  would always run on whichever thread executed it first ([#40715]).
+
+Build system changes
+--------------------
+
+
+New library functions
+---------------------
+
+* Two argument methods `findmax(f, domain)`, `argmax(f, domain)` and the corresponding
+  `min` versions ([#35316]).
+* `isunordered(x)` returns true if `x` is a value that is normally unordered, such as
+  `NaN` or `missing` ([#35316]).
+* New `keepat!(vector, inds)` function which is the inplace equivalent of `vector[inds]`
+  for a list `inds` of integers ([#36229]).
+* Two arguments method `lock(f, lck)` now accepts a `Channel` as the second argument ([#39312]).
+* New functor `Returns(value)`, which returns `value` for any arguments ([#39794]).
+* New macros `@something` and `@coalesce` which are short-circuiting versions of `something` and
+  `coalesce`, respectively ([#40729]).
+* New function `redirect_stdio` for redirecting `stdin`, `stdout` and `stderr` ([#37978]).
+* New macro `Base.@invoke f(arg1::T1, arg2::T2; kwargs...)` provides an easier syntax to call
+  `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)` ([#38438]).
+* New macro `Base.@invokelatest f(args...; kwargs...)` providing a convenient way to call
+  `Base.invokelatest(f, args...; kwargs...)` ([#37971]).
+
+New library features
+--------------------
+
+* The optional keyword argument `context` of `sprint` can now be set to a tuple of `:key => value`
+  pairs to specify multiple attributes ([#39381]).
+* `bytes2hex` and `hex2bytes` are no longer limited to arguments of type `Union{String,AbstractVector{UInt8}}`
+  and now only require that they're iterable and have a length ([#39710]).
+* `stat(file)` now has a more detailed and user-friendly `show` method ([#39463]).
+
+Standard library changes
+------------------------
+
+* `count` and `findall` now accept an `AbstractChar` argument to search for a character in
+  a string ([#38675]).
+* New methods `range(start, stop)` and `range(start, stop, length)` ([#39228]).
+* `range` now supports `start` as an optional keyword argument ([#38041]).
+* Some operations on ranges will return a `StepRangeLen` instead of a `StepRange`, to allow
+  the resulting step to be zero. Previously, `λ .* (1:9)` gave an error when `λ = 0` ([#40320]).
+* `islowercase` and `isuppercase` are now compliant with the Unicode lower/uppercase categories ([#38574]).
+* `iseven` and `isodd` functions now support non-`Integer` numeric types ([#38976]).
+* `escape_string` now accepts a collection of characters via the keyword
+  `keep` that are to be kept as they are ([#38597]).
+* `getindex` for `NamedTuple`s now accepts a tuple of symbols in order to index multiple values ([#38878]).
+* Subtypes of `AbstractRange` now correctly follow the general array indexing behavior when indexed by
+  `Bool`s, erroring for scalar `Bool`s and treating arrays (including ranges) of `Bool` as
+  logical indices ([#31829]).
+* `keys(::RegexMatch)` is now defined to return the capture's keys, by name if named, or by index if not ([#37299]).
+* `keys(::Generator)` is now defined to return the iterator's keys ([#34678]).
+* `RegexMatch` is now iterable, giving the captured substrings ([#34355]).
+* `lpad/rpad` are now defined in terms of `textwidth` ([#39044]).
+* `Test.@test` now accepts `broken` and `skip` boolean keyword arguments, which
+  mimic `Test.@test_broken` and `Test.@test_skip` behavior, but allows skipping
+  tests failing only under certain conditions.  For example
+  ```julia
+  if T == Float64
+      @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
+  else
+      @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
+  end
+  ```
+  can be replaced by
+  ```julia
+  @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T))) broken=(T == Float64)
+  ```
+  ([#39322]).
+* `@lock` is now exported from Base ([#39588]).
+* The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported
+  from Base and given a more specific return type ([#29901]).
+* Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`,
+  `acotd`, `atand` now accept a square matrix ([#39758]).
+* `replace(::String)` now accepts multiple patterns, which will be applied left-to-right simultaneously,
+  so only one pattern will be applied to any character, and the patterns will only be applied to the input
+  text, not the replacements ([#40484]).
+* New `replace` methods to replace elements of a `Tuple` ([#38216]).
+
+
+#### Package Manager
+
+* If a package is `using` or `import`ed from the `julia>` prompt that isn't found but is available
+  from a registry, a `pkg> add` prompt now offers to install the package into the current environment,
+  precompile it, and continue to load it ([#39026]).
+* A new `Manifest.toml` format is now used that captures extensible metadata fields, including the
+  julia version that generated the manifest. Old format manifests are still supported and will be
+  maintained in their original format, unless the user runs `Pkg.upgrade_manifest()` to upgrade the
+  format of the current environment's manifest without re-resolving ([#40765]).
+* `pkg> precompile` will now precompile new versions of packages that are already loaded, rather than
+  postponing to the next session (the `?`-marked dependencies) ([#40345]).
+* `pkg> rm`, `pin`, and `free` now accept the `--all` argument to call the action on all packages.
+* Registries downloaded from the Pkg Server (not git) are no longer uncompressed into files but instead
+  read directly from the compressed tarball into memory. This improves performance on
+  filesystems which do not handle a large number of files well. To turn this feature off, set the
+  environment variable `JULIA_PKG_UNPACK_REGISTRY=true`.
+* It is now possible to use an external `git` executable instead of the default libgit2 library
+  for the downloads that happen via the Git protocol by setting the environment variable
+  `JULIA_PKG_USE_CLI_GIT=true`.
+* Registries downloaded from the Pkg Server (not git) is now assumed to be immutable. Manual changes
+  to their files might not be picked up by a running Pkg session.
+* Adding packages by directory name in the REPL mode now requires prepending `./` to the name if the
+  package is in the current directory; e.g. `add ./Package` is required instead of `add Package`.
+  This is to avoid confusion between the package name `Package` and the local directory `Package`.
+* The `mode` keyword for `PackageSpec` has been removed.
+
+#### LinearAlgebra
+
+* Use [Libblastrampoline](https://github.com/staticfloat/libblastrampoline/) to pick a BLAS
+  and LAPACK at runtime. By default it forwards to OpenBLAS in the Julia distribution.
+  The forwarding mechanism can be used by packages to replace the BLAS and LAPACK with
+  user preferences ([#39455]).
+* On aarch64, OpenBLAS now uses an ILP64 BLAS like all other 64-bit platforms ([#39436]).
+* OpenBLAS is updated to 0.3.13 ([#39216]).
+* SuiteSparse is updated to 5.8.1 ([#39455]).
+* The shape of an `UpperHessenberg` matrix is preserved under certain arithmetic operations,
+  e.g. when multiplying or dividing by an `UpperTriangular` matrix ([#40039]).
+* Real quasitriangular Schur factorizations `S` can now be efficiently converted to complex
+  upper-triangular form with `Schur{Complex}(S)` ([#40573]).
+* `cis(A)` now supports matrix arguments ([#40194]).
+* `dot` now supports `UniformScaling` with `AbstractMatrix` ([#40250]).
+* `qr[!]` and `lu[!]` now support `LinearAlgebra.PivotingStrategy` (singleton type) values
+  as their optional `pivot` argument: defaults are `qr(A, NoPivot())` (vs. `qr(A, ColumnNorm())`
+  for pivoting) and `lu(A, RowMaximum())` (vs. `lu(A, NoPivot())` without pivoting); the former
+  `Val{true/false}`-based calls are deprecated ([#40623]).
+* `det(M::AbstractMatrix{BigInt})` now calls `det_bareiss(M)`, which uses the
+  [Bareiss](https://en.wikipedia.org/wiki/Bareiss_algorithm) algorithm to calculate precise
+  values ([#40868]).
+
+#### Markdown
+
+
+#### Printf
+
+
+#### Random
+
+* The default random number generator has been changed from Mersenne Twister to
+  [Xoshiro256++](https://prng.di.unimi.it/).
+  The new generator has smaller state, better performance, and superior statistical properties.
+  This generator is the one used for reproducible Task-local randomness ([#40546]).
+
+#### REPL
+
+* Long strings are now elided using the syntax `"head" ⋯ 12345 bytes ⋯ "tail"` when displayed
+  in the REPL ([#40736]).
+* Pasting repl examples into the repl (prompt pasting) now supports all repl modes (`julia`, `pkg`,
+  `shell`, `help?`) and switches mode automatically ([#40604]).
+* `help?>` for modules without docstrings now returns a list of exported names and prints
+  the contents of an associated `README.md` if found ([#39093]).
+
+#### SparseArrays
+
+* new `sizehint!(::SparseMatrixCSC, ::Integer)` method ([#30676]).
+* `cholesky()` now fully preserves the user-specified permutation ([#40560]).
+* `issparse` now applies consistently to all wrapper arrays, including nested, by checking
+  `issparse` on the wrapped parent array ([#37644]).
+
+#### Dates
+
+* The `Dates.periods` function can be used to get the `Vector` of `Period`s that comprise a
+  `CompoundPeriod` ([#39169]).
+
+#### Downloads
+
+* If a cookie header is set in a redirected request, the cookie will now be sent in following
+  requests (<https://github.com/JuliaLang/Downloads.jl/pull/98>).
+* If a `~/.netrc` file exists, it is used to get passwords for authenticated websites
+  (<https://github.com/JuliaLang/Downloads.jl/pull/98>).
+* [Server Name Indication](https://en.wikipedia.org/wiki/Server_Name_Indication) is now sent with
+  all TLS connections, even when the server's identity is not verified (see [NetworkOptions](https://github.com/JuliaLang/NetworkOptions.jl); <https://github.com/JuliaLang/Downloads.jl/pull/114>).
+* When verifying TLS connections on Windows, if the certificate revocation server cannot be
+  reached, the connection is allowed; this matches what other applications do and how revocation
+  is performed on macOS (<https://github.com/JuliaLang/Downloads.jl/pull/115>).
+* There is now a 30-second connection timeout and a 20-second timeout if no data is sent; in
+  combination, this guarantees that connections must make some progress or they will timeout in
+  under a minute (<https://github.com/JuliaLang/Downloads.jl/pull/126>).
+
+#### Statistics
+
+
+#### Sockets
+
+
+#### Tar
+
+* `Tar.extract` now ignores the exact permission mode in a tarball and normalizes modes in the
+  same way that `Tar.create` does, which is, in turn the same way that `git` normalizes them
+  (<https://github.com/JuliaIO/Tar.jl/pull/99>).
+* Functions that consume tarballs now handle hard links: the link target must be a previously seen
+  file; `Tar.list` lists the entry with `:hardlink` type and `.link` field giving the path to the
+  target; other functions — `Tar.extract`, `Tar.rewrite`, `Tar.tree_hash` — treat a hard link as a
+  copy of the target file (<https://github.com/JuliaIO/Tar.jl/pull/102>).
+* The standard format generated by `Tar.create` and `Tar.rewrite` now includes entries for non-empty
+  directories; this shouldn't be neccessary, but some tools that consume tarballs (including docker)
+  are confused by the absence of these directory entries (<https://github.com/JuliaIO/Tar.jl/pull/106>).
+* `Tar` now accepts tarballs with leading spaces in octal integer header fields: this is technically
+  not a valid format according to the POSIX spec, but old Solaris `tar` commands produced tarballs like
+  this so this format does occur in the wild, and it seems harmless to accept it
+  (<https://github.com/JuliaIO/Tar.jl/pull/116>).
+* `Tar.extract` now takes a `set_permissions` keyword argument, which defaults to `true`; if `false` is
+  passed instead, the permissions of extracted files are not modified on extraction
+  (<https://github.com/JuliaIO/Tar.jl/pull/113>).
+
+#### Distributed
+
+
+#### UUIDs
+
+
+#### Mmap
+
+* `mmap` is now exported ([#39816]).
+
+#### DelimitedFiles
+
+* `readdlm` now defaults to `use_mmap=false` on all OSes for consistent reliability in abnormal
+  filesystem situations ([#40415]).
+
+Deprecated or removed
+---------------------
+
+
+External dependencies
+---------------------
+
+
+Tooling Improvements
+---------------------
+
+
+<!--- generated by NEWS-update.jl: -->
+[#29901]: https://github.com/JuliaLang/julia/issues/29901
+[#30676]: https://github.com/JuliaLang/julia/issues/30676
+[#31829]: https://github.com/JuliaLang/julia/issues/31829
+[#33697]: https://github.com/JuliaLang/julia/issues/33697
+[#34355]: https://github.com/JuliaLang/julia/issues/34355
+[#34678]: https://github.com/JuliaLang/julia/issues/34678
+[#35316]: https://github.com/JuliaLang/julia/issues/35316
+[#36229]: https://github.com/JuliaLang/julia/issues/36229
+[#37299]: https://github.com/JuliaLang/julia/issues/37299
+[#37644]: https://github.com/JuliaLang/julia/issues/37644
+[#37847]: https://github.com/JuliaLang/julia/issues/37847
+[#37971]: https://github.com/JuliaLang/julia/issues/37971
+[#37978]: https://github.com/JuliaLang/julia/issues/37978
+[#38041]: https://github.com/JuliaLang/julia/issues/38041
+[#38216]: https://github.com/JuliaLang/julia/issues/38216
+[#38379]: https://github.com/JuliaLang/julia/issues/38379
+[#38438]: https://github.com/JuliaLang/julia/issues/38438
+[#38574]: https://github.com/JuliaLang/julia/issues/38574
+[#38597]: https://github.com/JuliaLang/julia/issues/38597
+[#38675]: https://github.com/JuliaLang/julia/issues/38675
+[#38878]: https://github.com/JuliaLang/julia/issues/38878
+[#38952]: https://github.com/JuliaLang/julia/issues/38952
+[#38976]: https://github.com/JuliaLang/julia/issues/38976
+[#39026]: https://github.com/JuliaLang/julia/issues/39026
+[#39044]: https://github.com/JuliaLang/julia/issues/39044
+[#39093]: https://github.com/JuliaLang/julia/issues/39093
+[#39169]: https://github.com/JuliaLang/julia/issues/39169
+[#39216]: https://github.com/JuliaLang/julia/issues/39216
+[#39228]: https://github.com/JuliaLang/julia/issues/39228
+[#39285]: https://github.com/JuliaLang/julia/issues/39285
+[#39312]: https://github.com/JuliaLang/julia/issues/39312
+[#39322]: https://github.com/JuliaLang/julia/issues/39322
+[#39381]: https://github.com/JuliaLang/julia/issues/39381
+[#39403]: https://github.com/JuliaLang/julia/issues/39403
+[#39436]: https://github.com/JuliaLang/julia/issues/39436
+[#39455]: https://github.com/JuliaLang/julia/issues/39455
+[#39463]: https://github.com/JuliaLang/julia/issues/39463
+[#39588]: https://github.com/JuliaLang/julia/issues/39588
+[#39594]: https://github.com/JuliaLang/julia/issues/39594
+[#39607]: https://github.com/JuliaLang/julia/issues/39607
+[#39710]: https://github.com/JuliaLang/julia/issues/39710
+[#39758]: https://github.com/JuliaLang/julia/issues/39758
+[#39794]: https://github.com/JuliaLang/julia/issues/39794
+[#39816]: https://github.com/JuliaLang/julia/issues/39816
+[#40025]: https://github.com/JuliaLang/julia/issues/40025
+[#40039]: https://github.com/JuliaLang/julia/issues/40039
+[#40173]: https://github.com/JuliaLang/julia/issues/40173
+[#40194]: https://github.com/JuliaLang/julia/issues/40194
+[#40250]: https://github.com/JuliaLang/julia/issues/40250
+[#40320]: https://github.com/JuliaLang/julia/issues/40320
+[#40345]: https://github.com/JuliaLang/julia/issues/40345
+[#40415]: https://github.com/JuliaLang/julia/issues/40415
+[#40484]: https://github.com/JuliaLang/julia/issues/40484
+[#40546]: https://github.com/JuliaLang/julia/issues/40546
+[#40560]: https://github.com/JuliaLang/julia/issues/40560
+[#40573]: https://github.com/JuliaLang/julia/issues/40573
+[#40604]: https://github.com/JuliaLang/julia/issues/40604
+[#40623]: https://github.com/JuliaLang/julia/issues/40623
+[#40715]: https://github.com/JuliaLang/julia/issues/40715
+[#40729]: https://github.com/JuliaLang/julia/issues/40729
+[#40736]: https://github.com/JuliaLang/julia/issues/40736
+[#40737]: https://github.com/JuliaLang/julia/issues/40737
+[#40753]: https://github.com/JuliaLang/julia/issues/40753
+[#40765]: https://github.com/JuliaLang/julia/issues/40765
+[#40868]: https://github.com/JuliaLang/julia/issues/40868
+[#40948]: https://github.com/JuliaLang/julia/issues/40948
+
+
+Julia v1.6 Release Notes
+========================
+
+New language features
+---------------------
+
+* Types written with `where` syntax can now be used to define constructors, e.g.
+  `(Foo{T} where T)(x) = ...`.
+* `<--` and `<-->` are now available as infix operators, with the same precedence
+  and associativity as other arrow-like operators ([#36666]).
+* Compilation and type inference can now be enabled or disabled at the module level
+  using the experimental macro `Base.Experimental.@compiler_options` ([#37041]).
+* The library name passed to `ccall` or `@ccall` can now be an expression involving
+  global variables and function calls. The expression will be evaluated the first
+  time the `ccall` executes ([#36458]).
+* `ꜛ` (U+A71B), `ꜜ` (U+A71C) and `ꜝ` (U+A71D) can now also be used as operator
+  suffixes. They can be tab-completed from `\^uparrow`, `\^downarrow` and `\^!` in the REPL
+  ([#37542]).
+* Standalone "dotted" operators now get parsed as `Expr(:., :op)`, which gets lowered to
+  `Base.BroadcastFunction(op)`. This means `.op` is functionally equivalent to
+  `(x...) -> (op).(x...)`, which can be useful for passing the broadcasted version of an
+  operator to higher-order functions, for example `map(.*, A, B)` for an elementwise
+  product of two arrays of arrays ([#37583]).
+* The syntax `import A as B` (plus `import A: x as y`, `import A.x as y`, and `using A: x as y`)
+  can now be used to rename imported modules and identifiers ([#1255]).
+* Unsigned literals (starting with `0x`) which are too big to fit in a `UInt128` object
+  are now interpreted as `BigInt` ([#23546]).
+* It is now possible to use `...` on the left-hand side of assignments for taking any
+  number of items from the front of an iterable collection, while also collecting the rest,
+  for example `a, b... = [1, 2, 3]`. This syntax is implemented using `Base.rest`,
+  which can be overloaded to customize its behavior for different collection types
+  ([#37410]).
+
+Language changes
+----------------
+
+* The postfix conjugate transpose operator `'` now accepts Unicode modifiers as
+  suffixes, so e.g. `a'ᵀ` is parsed as `var"'ᵀ"(a)`, which can be defined by the
+  user. `a'ᵀ` parsed as `a' * ᵀ` before, so this is a minor breaking change ([#37247]).
+* Macros that return `:quote` expressions (e.g. via `Expr(:quote, ...)`) were previously
+  able to work without escaping (`esc(...)`) their output when needed. This has been
+  corrected, and now `esc` must be used in these macros as it is in other macros ([#37540]).
+* The `-->` operator now lowers to a `:call` expression, so it can be defined as
+  a function like other operators. The dotted version `.-->` is now parsed as well.
+  For backwards compatibility, `-->` still parses using its own expression head
+  instead of `:call`.
+* The `a[begin, k]` syntax now calls `firstindex(a, 1)` rather than `first(axes(a, 1))` ([#35779]),
+  but the former now defaults to the latter for any `a` ([#38742]).
+* `⌿` (U+233F) and `¦` (U+00A6) are now infix operators with times-like and plus-like precedence,
+  respectively. Previously they were parsed as identifier characters ([#37973]).
+
+Compiler/Runtime improvements
+-----------------------------
+
+* All platforms can now use `@executable_path` within `jl_load_dynamic_library()`.
+  This allows executable-relative paths to be embedded within executables on all
+  platforms, not just MacOS, which the syntax is borrowed from ([#35627]).
+* Constant propagation now occurs through keyword arguments ([#35976]).
+* The precompilation cache is now created atomically ([#36416]). Invoking _n_
+  Julia processes simultaneously may create _n_ temporary caches.
+
+Command-line option changes
+---------------------------
+
+* There is no longer a concept of "home project": starting `julia --project=dir`
+  is now exactly equivalent to starting `julia` and then doing `pkg> activate
+  $dir` and `julia --project` is exactly equivalent to doing that where
+  `dir = Base.current_project()`. In particular, this means that if you do
+  `pkg> activate` after starting `julia` with the `--project` option (or with
+  `JULIA_PROJECT` set) it will take you to the default active project, which is
+  `@v1.6` unless you have modified `LOAD_PATH` ([#36434]).
+
+Multi-threading changes
+-----------------------
+
+* Locks now automatically inhibit finalizers from running, to avoid deadlock ([#38487]).
+* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption ([#34543]).
+
+Build system changes
+--------------------
+
+* Windows Installer now has the option to 'Add Julia to Path'. To unselect this option
+  from the commandline simply remove the tasks you do not want to be installed: e.g.
+  `./julia-installer.exe /TASKS="desktopicon,startmenu,addtopath"`, adds a desktop
+  icon, a startmenu group icon, and adds Julia to system PATH.
+
+New library functions
+---------------------
+
+* New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker
+  product in-place ([#31069]).
+* New function `Base.readeach(io, T)` for iteratively performing `read(io, T)` ([#36150]).
+* `Iterators.map` is added. It provides another syntax `Iterators.map(f, iterators...)`
+  for writing `(f(args...) for args in zip(iterators...))`, i.e. a lazy `map` ([#34352]).
+* New function `sincospi` for simultaneously computing `sinpi(x)` and `cospi(x)` more
+  efficiently ([#35816]).
+* New function `cispi(x)` for more accurately computing `cis(pi * x)` ([#38449]).
+* New function `addenv` for adding environment mappings into a `Cmd` object, returning the new `Cmd` object.
+* New function `insorted` for determining whether an element is in a sorted collection or not ([#37490]).
+* New function `Base.rest` for taking the rest of a collection, starting from a specific
+  iteration state, in a generic way ([#37410]).
+
+New library features
+--------------------
+
+* The `redirect_*` functions now accept `devnull` to discard all output redirected to it, and as an empty
+  input ([#36146]).
+* The `redirect_*` functions can now be called on `IOContext` objects ([#36688]).
+* `findfirst`, `findnext`, `findlast`, and `findall` now support `AbstractVector{<:Union{Int8,UInt8}}`
+  (pattern, array) arguments ([#37283]).
+* New constructor `NamedTuple(iterator)` that constructs a named tuple from a key-value pair iterator.
+* A new `reinterpret(reshape, T, a::AbstractArray{S})` reinterprets `a` to have eltype `T` while potentially
+  inserting or consuming the first dimension depending on the ratio of `sizeof(T)` and `sizeof(S)`.
+* New `append!(vector, collections...)` and `prepend!(vector, collections...)` methods accept multiple
+  collections to be appended or prepended ([#36227]).
+* `keys(io::IO)` has been added, which returns all keys of `io` if `io` is an `IOContext` and an empty
+  `Base.KeySet` otherwise ([#37753]).
+* `count` now accepts an optional `init` argument to control the accumulation type ([#37461]).
+* New method `occursin(haystack)` that returns a function that checks whether its argument occurs in
+  `haystack` ([#38475]).
+* New methods `∉(collection)`, `∋(item)`, and `∌(item)` returning corresponding containment-testing
+  functions ([#38475]).
+* The `nextprod` function now accepts tuples and other array types for its first argument ([#35791]).
+* The `reverse(A; dims)` function for multidimensional `A` can now reverse multiple dimensions at once
+  by passing a tuple for `dims`, and defaults to reversing all dimensions; there is also a multidimensional
+  in-place `reverse!(A; dims)` ([#37367]).
+* The function `isapprox(x,y)` now accepts the `norm` keyword argument also for numeric (i.e., non-array)
+  arguments `x` and `y` ([#35883]).
+* `ispow2(x)` now supports non-`Integer` arguments `x` ([#37635]).
+* `view`, `@view`, and `@views` now work on `AbstractString`s, returning a `SubString` when appropriate ([#35879]).
+* All `AbstractUnitRange{<:Integer}`s now work with `SubString`, `view`, `@view` and `@views` on strings ([#35879]).
+* `sum`, `prod`, `maximum`, and `minimum` now support `init` keyword argument ([#36188], [#35839]).
+* `unique(f, itr; seen=Set{T}())` now allows you to declare the container type used for
+  keeping track of values returned by `f` on elements of `itr` ([#36280]).
+* `first` and `last` functions now accept an integer as second argument to get that many
+  leading or trailing elements of any iterable ([#34868]).
+* `CartesianIndices` now supports step different from `1`. It can also be constructed from three
+  `CartesianIndex`es `I`, `S`, `J` using `I:S:J`. `step` for `CartesianIndices` now returns a
+  `CartesianIndex` ([#37829]).
+* `RegexMatch` objects can now be probed for whether a named capture group exists within it through `haskey()` ([#36717]).
+* For consistency `haskey(r::RegexMatch, i::Integer)` has also been added and returns if the capture group
+  for `i` exists ([#37300]).
+
+Standard library changes
+------------------------
+
+* A new standard library `TOML` has been added for parsing and printing [TOML files](https://toml.io) ([#37034]).
+* A new standard library `Downloads` has been added, which replaces the old `Base.download` function with
+  `Downloads.download`, providing cross-platform, multi-protocol, in-process download functionality implemented
+  with [libcurl](https://curl.haxx.se/libcurl/) ([#37340]).
+* `Libdl` has been moved to `Base.Libc.Libdl`, however it is still accessible as an stdlib ([#35628]).
+* To download artifacts lazily, `LazyArtifacts` now must be explicitly listed as a dependency, to avoid needing the
+  support machinery to be available when it is not commonly needed ([#37844]).
+* It is no longer possible to create a `LinRange`, `StepRange`, or `StepRangeLen` with a `<: Integer` eltype but
+  non-integer step ([#32439]).
+* `intersect` on `CartesianIndices` now returns `CartesianIndices` instead of `Vector{<:CartesianIndex}` ([#36643]).
+* `push!(c::Channel, v)` now returns channel `c`. Previously, it returned the pushed value `v` ([#34202]).
+* The composition operator `∘` now returns a `Base.ComposedFunction` instead of an anonymous function ([#37517]).
+* Logging (such as `@warn`) no longer catches exceptions in the logger itself ([#36600]).
+* `@time` now reports if the time presented included any compilation time, which is shown as a percentage ([#37678]).
+* `@varinfo` can now report non-exported objects within modules, look recursively into submodules, and return a sorted
+  results table ([#38042]).
+* `@testset` now supports the option `verbose` to show the test result summary
+  of the children even if they all pass ([#33755]).
+* In `LinearIndices(::Tuple)` and `CartesianIndices(::Tuple)`, integers (as opposed to ranges of integers) in the
+  argument tuple now consistently describe 1-based ranges, e.g, `CartesianIndices((3, 1:3))` is equivalent to
+  `CartesianIndices((1:3, 1:3))`. This is how tuples of integers have always been documented to work, but a
+  bug had caused erroneous behaviors with heterogeneous tuples containing both integers and ranges ([#37829], [#37928]).
+
+#### Package Manager
+
+* `pkg> precompile` is now parallelized through depth-first precompilation of dependencies. Errors will only throw for
+  direct dependencies listed in the `Project.toml`.
+* `pkg> precompile` is now automatically triggered whenever Pkg changes the active manifest. Auto-precompilation will
+  remember if a package has errored within the given environment and will not retry until it changes.
+  Auto-precompilation can be gracefully interrupted with a `ctrl-c` and disabled by setting the environment variable
+  `JULIA_PKG_PRECOMPILE_AUTO=0`.
+* The `Pkg.BinaryPlatforms` module has been moved into `Base` as `Base.BinaryPlatforms` and heavily reworked.
+  Applications that want to be compatible with the old API should continue to import `Pkg.BinaryPlatforms`,
+  however new users should use `Base.BinaryPlatforms` directly ([#37320]).
+* The `Pkg.Artifacts` module has been imported as a separate standard library.  It is still available as
+  `Pkg.Artifacts`, however starting from Julia v1.6+, packages may import simply `Artifacts` without importing
+  all of `Pkg` alongside ([#37320]).
+
+#### LinearAlgebra
+
+* New method `LinearAlgebra.issuccess(::CholeskyPivoted)` for checking whether pivoted Cholesky factorization was
+  successful ([#36002]).
+* `UniformScaling` can now be indexed into using ranges to return dense matrices and vectors ([#24359]).
+* New function `LinearAlgebra.BLAS.get_num_threads()` for getting the number of BLAS threads ([#36360]).
+* `(+)(::UniformScaling)` is now defined, making `+I` a valid unary operation ([#36784]).
+* Instances of `UniformScaling` are no longer `isequal` to matrices. Previous
+  behaviour violated the rule that `isequal(x, y)` implies `hash(x) == hash(y)`.
+* Transposing `*Triangular` matrices now returns matrices of the opposite triangular type, consistently
+  with `adjoint!(::*Triangular)` and `transpose!(::*Triangular)`. Packages containing methods with, e.g.,
+  `Adjoint{<:Any,<:LowerTriangular{<:Any,<:OwnMatrixType}}` should replace that by
+  `UpperTriangular{<:Any,<:Adjoint{<:Any,<:OwnMatrixType}}` in the method signature ([#38168]).
+
+#### Markdown
+
+
+#### Printf
+
+* Complete overhaul of internal code to use the ryu float printing algorithms (from Julia 1.4); leads to
+  consistent 2-5x performance improvements.
+* New `Printf.tofloat` function allowing custom float types to more easily integrate with Printf formatting
+  by converting their type to `Float16`, `Float32`, `Float64`, or `BigFloat`.
+* New `Printf.format"..."` and `Printf.Format(...)` functions that allow creating `Printf.Format` objects
+  that can be passed to `Printf.format` for easier dynamic printf formatting.
+* `Printf.format(f::Printf.Format, args...)` as a non-macro function that applies a printf format `f` to
+  provided `args`.
+
+#### Random
+
+
+#### REPL
+
+* The `AbstractMenu` extension interface of `REPL.TerminalMenus` has been extensively
+  overhauled. The new interface does not rely on global configuration variables, is more
+  consistent in delegating printing of the navigation/selection markers, and provides
+  improved support for dynamic menus.  These changes are compatible with the previous
+  (deprecated) interface, so are non-breaking.
+
+  The new API offers several enhancements:
+
+  + Menus are configured in their constructors via keyword arguments.
+  + For custom menu types, the new `Config` and `MultiSelectConfig` replace the global `CONFIG` `Dict`.
+  + `request(menu; cursor=1)` allows you to control the initial cursor position in the menu (defaults to first item).
+  + `MultiSelectMenu` allows you to pass a list of initially-selected items with the `selected` keyword argument.
+  + `writeLine` was deprecated to `writeline`, and `writeline` methods are not expected to print the cursor indicator.
+    The old `writeLine` continues to work, and any of its method extensions should print the cursor indicator as before.
+  + `printMenu` has been deprecated to `printmenu`, and it both accepts a state input and returns a state output
+    that controls the number of terminal lines erased when the menu is next refreshed. This plus related changes
+    makes `printmenu` work properly when the number of menu items might change depending on user choices.
+  + `numoptions`, returning the number of items in the menu, has been added as an alternative to implementing `options`.
+  + `suppress_output` (primarily a testing option) has been added as a keyword argument to `request`,
+    rather than a configuration option.
+* Tab completion now supports runs of consecutive sub/superscript characters,
+  e.g. `\^(3)` tab-completes to `⁽³⁾` ([#38649]).
+* Windows REPL now supports 24-bit colors, by correctly interpreting virtual terminal escapes.
+
+#### SparseArrays
+
+* Display large sparse matrices with a Unicode "spy" plot of their nonzero patterns,
+  and display small sparse matrices by an `Matrix`-like 2d layout of their contents ([#33821]).
+* New convenient `spdiagm([m, n,] v::AbstractVector)` methods which call
+  `spdiagm([m, n,] 0 => v)`, consistently with their dense `diagm` counterparts ([#37684]).
+
+#### Dates
+
+* `Quarter` period is defined ([#35519]).
+* `canonicalize` can now take `Period` as an input ([#37391]).
+* Zero-valued `FixedPeriod`s and `OtherPeriod`s now compare equal, e.g.,
+  `Year(0) == Day(0)`. The behavior of non-zero `Period`s is not changed ([#37486]).
+
+#### Statistics
+
+
+#### Sockets
+
+
+#### Distributed
+
+* Now supports invoking Windows workers via ssh (via new keyword argument `shell=:wincmd` in `addprocs`) ([#30614]).
+* Other new keyword arguments in `addprocs`: `ssh` to specify the ssh client path, `env` to pass environment
+  variables to workers, and `cmdline_cookie` to work around an ssh problem with Windows workers that run older
+  (pre-ConPTY) versions of Windows, Julia or OpenSSH ([#30614]).
+
+#### UUIDs
+
+* Change `uuid1` and `uuid4` to use `Random.RandomDevice()` as default random number generator ([#35872]).
+* Added `parse(::Type{UUID}, ::AbstractString)` method.
+
+#### Mmap
+
+* On Unix systems, the `Mmap.madvise!` function (along with OS-specific `Mmap.MADV_*`
+  constants) has been added to give advice on handling of memory-mapped arrays ([#37369]).
+
+Deprecated or removed
+---------------------
+
+* The `Base.download` function has been deprecated (silently, by default) in favor of the new `Downloads.download`
+  standard library function ([#37340]).
+* The `Base.Grisu` code has been officially removed (float printing was switched to the ryu algorithm code in 1.4).
+  The code is available from [JuliaAttic](https://github.com/JuliaAttic/Grisu.jl) if needed.
+
+External dependencies
+---------------------
+
+
+Tooling Improvements
+---------------------
+
+
+<!--- generated by NEWS-update.jl: -->
+[#1255]: https://github.com/JuliaLang/julia/issues/1255
+[#23546]: https://github.com/JuliaLang/julia/issues/23546
+[#24359]: https://github.com/JuliaLang/julia/issues/24359
+[#30614]: https://github.com/JuliaLang/julia/issues/30614
+[#31069]: https://github.com/JuliaLang/julia/issues/31069
+[#32439]: https://github.com/JuliaLang/julia/issues/32439
+[#33755]: https://github.com/JuliaLang/julia/issues/33755
+[#33821]: https://github.com/JuliaLang/julia/issues/33821
+[#34202]: https://github.com/JuliaLang/julia/issues/34202
+[#34352]: https://github.com/JuliaLang/julia/issues/34352
+[#34543]: https://github.com/JuliaLang/julia/issues/34543
+[#34868]: https://github.com/JuliaLang/julia/issues/34868
+[#35519]: https://github.com/JuliaLang/julia/issues/35519
+[#35627]: https://github.com/JuliaLang/julia/issues/35627
+[#35628]: https://github.com/JuliaLang/julia/issues/35628
+[#35779]: https://github.com/JuliaLang/julia/issues/35779
+[#35791]: https://github.com/JuliaLang/julia/issues/35791
+[#35816]: https://github.com/JuliaLang/julia/issues/35816
+[#35839]: https://github.com/JuliaLang/julia/issues/35839
+[#35872]: https://github.com/JuliaLang/julia/issues/35872
+[#35879]: https://github.com/JuliaLang/julia/issues/35879
+[#35883]: https://github.com/JuliaLang/julia/issues/35883
+[#35976]: https://github.com/JuliaLang/julia/issues/35976
+[#36002]: https://github.com/JuliaLang/julia/issues/36002
+[#36146]: https://github.com/JuliaLang/julia/issues/36146
+[#36150]: https://github.com/JuliaLang/julia/issues/36150
+[#36188]: https://github.com/JuliaLang/julia/issues/36188
+[#36227]: https://github.com/JuliaLang/julia/issues/36227
+[#36280]: https://github.com/JuliaLang/julia/issues/36280
+[#36360]: https://github.com/JuliaLang/julia/issues/36360
+[#36416]: https://github.com/JuliaLang/julia/issues/36416
+[#36434]: https://github.com/JuliaLang/julia/issues/36434
+[#36458]: https://github.com/JuliaLang/julia/issues/36458
+[#36600]: https://github.com/JuliaLang/julia/issues/36600
+[#36643]: https://github.com/JuliaLang/julia/issues/36643
+[#36666]: https://github.com/JuliaLang/julia/issues/36666
+[#36688]: https://github.com/JuliaLang/julia/issues/36688
+[#36717]: https://github.com/JuliaLang/julia/issues/36717
+[#36784]: https://github.com/JuliaLang/julia/issues/36784
+[#37034]: https://github.com/JuliaLang/julia/issues/37034
+[#37041]: https://github.com/JuliaLang/julia/issues/37041
+[#37247]: https://github.com/JuliaLang/julia/issues/37247
+[#37283]: https://github.com/JuliaLang/julia/issues/37283
+[#37300]: https://github.com/JuliaLang/julia/issues/37300
+[#37320]: https://github.com/JuliaLang/julia/issues/37320
+[#37340]: https://github.com/JuliaLang/julia/issues/37340
+[#37367]: https://github.com/JuliaLang/julia/issues/37367
+[#37369]: https://github.com/JuliaLang/julia/issues/37369
+[#37391]: https://github.com/JuliaLang/julia/issues/37391
+[#37410]: https://github.com/JuliaLang/julia/issues/37410
+[#37461]: https://github.com/JuliaLang/julia/issues/37461
+[#37486]: https://github.com/JuliaLang/julia/issues/37486
+[#37490]: https://github.com/JuliaLang/julia/issues/37490
+[#37517]: https://github.com/JuliaLang/julia/issues/37517
+[#37540]: https://github.com/JuliaLang/julia/issues/37540
+[#37542]: https://github.com/JuliaLang/julia/issues/37542
+[#37583]: https://github.com/JuliaLang/julia/issues/37583
+[#37635]: https://github.com/JuliaLang/julia/issues/37635
+[#37678]: https://github.com/JuliaLang/julia/issues/37678
+[#37684]: https://github.com/JuliaLang/julia/issues/37684
+[#37753]: https://github.com/JuliaLang/julia/issues/37753
+[#37829]: https://github.com/JuliaLang/julia/issues/37829
+[#37844]: https://github.com/JuliaLang/julia/issues/37844
+[#37973]: https://github.com/JuliaLang/julia/issues/37973
+[#38042]: https://github.com/JuliaLang/julia/issues/38042
+[#38062]: https://github.com/JuliaLang/julia/issues/38062
+[#38168]: https://github.com/JuliaLang/julia/issues/38168
+[#38449]: https://github.com/JuliaLang/julia/issues/38449
+[#38475]: https://github.com/JuliaLang/julia/issues/38475
+[#38487]: https://github.com/JuliaLang/julia/issues/38487
+[#38649]: https://github.com/JuliaLang/julia/issues/38649
+[#38742]: https://github.com/JuliaLang/julia/issues/38742
+
+
 Julia v1.5 Release Notes
 ========================
 
@@ -3083,7 +4102,7 @@ Library improvements
 
     + Using colons (`:`) to represent a collection of indices is deprecated. They now must be
       explicitly converted to a specialized array of integers with the `to_indices` function.
-      As a result, the type of `SubArray`s that represent views over colon indices has changed.
+      As a result, the type of `SubArray`s that represent views over colon indices has changed.
 
     + Logical indexing is now more efficient. Logical arrays are converted by `to_indices` to
       a lazy, iterable collection of indices that doesn't support indexing. A deprecation
@@ -4860,7 +5879,7 @@ New language features
     shell. For example:
 
         julia> ;ls
-        CONTRIBUTING.md  Makefile           VERSION      deps/      julia@  ui/
+        CONTRIBUTING.md  Makefile           VERSION      cli/       deps/   julia@
         DISTRIBUTING.md  NEWS.md            Windows.inc  doc/       src/    usr/
         LICENSE.md       README.md          base/        etc/       test/
         Make.inc         README.windows.md  contrib/     examples/  tmp/
diff --git a/LICENSE.md b/LICENSE.md
index d1438a5f68bfc0..fdf24e7603d730 100644
--- a/LICENSE.md
+++ b/LICENSE.md
@@ -1,82 +1,26 @@
-The Julia language is licensed under the MIT License. The "language" consists
-of the compiler (the contents of src/), most of the standard library (base/),
-and some utilities (most of the rest of the files in this repository). See below
-for exceptions.
+MIT License
 
-> Copyright (c) 2009-2019: Jeff Bezanson, Stefan Karpinski, Viral B. Shah,
-> and other contributors:
->
-> https://github.com/JuliaLang/julia/contributors
->
-> Permission is hereby granted, free of charge, to any person obtaining
-> a copy of this software and associated documentation files (the
-> "Software"), to deal in the Software without restriction, including
-> without limitation the rights to use, copy, modify, merge, publish,
-> distribute, sublicense, and/or sell copies of the Software, and to
-> permit persons to whom the Software is furnished to do so, subject to
-> the following conditions:
->
-> The above copyright notice and this permission notice shall be
-> included in all copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-> NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
-> LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
-> OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-> WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+Copyright (c) 2009-2022: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors
 
-Julia includes code from the following projects, which have their own licenses:
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
 
-- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
-- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
-- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/jitlayers.cpp and src/disasm.cpp) [BSD-3, effectively]
-- [MUSL](https://git.musl-libc.org/cgit/musl/tree/COPYRIGHT) (for getopt implementation on Windows) [MIT]
-- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
-- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
-- [Python](https://docs.python.org/3/license.html) (for strtod and joinpath implementation on Windows) [BSD-3, effectively]
-- [Google Benchmark](https://github.com/google/benchmark) (for cyclecount implementation) [Apache 2.0]
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
 
-The following components included in Julia `Base` have their own separate licenses:
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-- base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
-- base/grisu/* [BSD-3] (see [double-conversion](https://github.com/google/double-conversion/blob/master/LICENSE))
-- base/special/{exp,rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+end of terms and conditions
 
-The Julia language links to the following external libraries, which have their
-own licenses:
-
-- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
-- [LIBUNWIND](https://git.savannah.gnu.org/gitweb/?p=libunwind.git;a=blob_plain;f=LICENSE;hb=master) [MIT]
-- [LIBUV](https://github.com/joyent/libuv/blob/master/LICENSE) [MIT]
-- [LLVM](https://releases.llvm.org/6.0.0/LICENSE.TXT) [BSD-3, effectively]
-- [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
-
-Julia's `stdlib` uses the following external libraries, which have their own licenses:
-
-- [DSFMT](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/LICENSE.txt) [BSD-3]
-- [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
-- [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
-- [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
-- [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
-- [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
-- [MBEDTLS](https://tls.mbed.org/how-to-get) [either GPLv2 or Apache 2.0]
-- [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
-- [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
-- [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
-- [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
-- [SUITESPARSE](http://suitesparse.com) [mix of LGPL2+ and GPL2+; see individual module licenses]
-
-Julia's build process uses the following external tools:
-
-- [PATCHELF](https://nixos.org/patchelf.html)
-- [OBJCONV](https://www.agner.org/optimize/#objconv)
-
-Julia bundles the following external programs and libraries:
-
-- [7-Zip](https://www.7-zip.org/license.txt)
-- [ZLIB](https://zlib.net/zlib_license.html)
-
-On some platforms, distributions of Julia contain SSL certificate authority certificates,
-released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).
+Please see [THIRDPARTY.md](./THIRDPARTY.md) for license information for other software used in this project.
diff --git a/Make.inc b/Make.inc
index df2e522cad09f2..b70b56a8097360 100644
--- a/Make.inc
+++ b/Make.inc
@@ -1,4 +1,4 @@
-# -*- mode: makefile-gmake -*-
+# -*- mode: makefile -*-
 # vi:syntax=make
 
 ## Note:
@@ -14,8 +14,16 @@
 # Set to zero to turn off extra precompile (e.g. for the REPL)
 JULIA_PRECOMPILE ?= 1
 
+# Set FORCE_ASSERTIONS to 1 to enable assertions in the C and C++ portions
+# of the Julia code base. You may also want to set LLVM_ASSERTIONS to 1,
+# which will enable assertions in LLVM.
+# An "assert build" of Julia is a build that has both FORCE_ASSERTIONS=1
+# and LLVM_ASSERTIONS=1.
 FORCE_ASSERTIONS ?= 0
 
+# Set BOOTSTRAP_DEBUG_LEVEL to 1 to enable Julia-level stacktrace during bootstrapping.
+BOOTSTRAP_DEBUG_LEVEL ?= 0
+
 # OPENBLAS build options
 OPENBLAS_TARGET_ARCH:=
 OPENBLAS_SYMBOLSUFFIX:=
@@ -32,6 +40,7 @@ OPENBLAS_USE_THREAD:=1
 # Flags for using libraries available on the system instead of building them.
 # Please read the notes around usage of SYSTEM flags in README.md
 # Issues resulting from use of SYSTEM versions will generally not be accepted.
+USE_SYSTEM_CSL:=0
 USE_SYSTEM_LLVM:=0
 USE_SYSTEM_LIBUNWIND:=0
 DISABLE_LIBUNWIND:=0
@@ -40,11 +49,12 @@ USE_SYSTEM_LIBM:=0
 USE_SYSTEM_OPENLIBM:=0
 UNTRUSTED_SYSTEM_LIBM:=0
 USE_SYSTEM_DSFMT:=0
+USE_SYSTEM_LIBBLASTRAMPOLINE:=0
 USE_SYSTEM_BLAS:=0
 USE_SYSTEM_LAPACK:=0
 USE_SYSTEM_GMP:=0
 USE_SYSTEM_MPFR:=0
-USE_SYSTEM_SUITESPARSE:=0
+USE_SYSTEM_LIBSUITESPARSE:=0
 USE_SYSTEM_LIBUV:=0
 USE_SYSTEM_UTF8PROC:=0
 USE_SYSTEM_MBEDTLS:=0
@@ -53,36 +63,20 @@ USE_SYSTEM_NGHTTP2:=0
 USE_SYSTEM_CURL:=0
 USE_SYSTEM_LIBGIT2:=0
 USE_SYSTEM_PATCHELF:=0
+USE_SYSTEM_LIBWHICH:=0
 USE_SYSTEM_ZLIB:=0
 USE_SYSTEM_P7ZIP:=0
 
 # Link to the LLVM shared library
 USE_LLVM_SHLIB := 1
 
-## Settings for various Intel tools
-# Set to 1 to use MKL
-USE_INTEL_MKL ?= 0
-# Set to 1 to use Intel LIBM
-USE_INTEL_LIBM ?= 0
-# Set to 1 to enable profiling with Intel VTune Amplifier
-USE_INTEL_JITEVENTS ?= 0
-# Set to 1 to use Intel C, C++, and FORTRAN compilers
-USEICC  ?= 0
-USEIFC  ?= 0
-
 # Enable threading with one thread
 JULIA_THREADS := 1
 
-ifeq ($(USE_MKL), 1)
-$(warning "The julia make variable USE_MKL has been renamed to USE_INTEL_MKL")
-USE_INTEL_MKL := 1
-endif
-
 # Set to 1 to enable profiling with OProfile
 USE_OPROFILE_JITEVENTS ?= 0
 
-# Set to 1 to enable profiling with perf
-USE_PERF_JITEVENTS ?= 0
+# USE_PERF_JITEVENTS defined below since default is OS specific
 
 # assume we don't have LIBSSP support in our compiler, will enable later if likely true
 HAVE_SSP := 0
@@ -91,12 +85,28 @@ HAVE_SSP := 0
 WITH_GC_VERIFY := 0
 WITH_GC_DEBUG_ENV := 0
 
+# Enable DTrace support
+WITH_DTRACE := 0
+
 # Prevent picking up $ARCH from the environment variables
 ARCH:=
 
+
+# Literal values that are hard to use in Makefiles otherwise:
+define newline # a literal \n
+
+
+endef
+COMMA:=,
+SPACE:=$(eval) $(eval)
+
+# force a sane / stable configuration
+export LC_ALL=C
+export LANG=C
+
 # We need python for things like BB triplet recognition and relative path computation.
 # We don't really care about version, generally, so just find something that works:
-PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo not found)"
+PYTHON := "$(shell which python 2>/dev/null || which python3 2>/dev/null || which python2 2>/dev/null || echo "{python|python3|python2} not found")"
 PYTHON_SYSTEM := $(shell $(PYTHON) -c 'from __future__ import print_function; import platform; print(platform.system())')
 
 # If we're running on Cygwin, but using a native-windows Python, we need to use cygpath -w
@@ -319,11 +329,16 @@ endef
 $(foreach D,libdir private_libdir datarootdir libexecdir docdir sysconfdir includedir,$(eval $(call cache_rel_path,$(D),$(bindir))))
 $(foreach D,build_libdir build_private_libdir,$(eval $(call cache_rel_path,$(D),$(build_bindir))))
 
+# Save a special one: reverse_private_libdir_rel: usually just `../`, but good to be general:
+reverse_private_libdir_rel_eval = $(call rel_path,$(private_libdir),$(libdir))
+reverse_private_libdir_rel = $(call hit_cache,reverse_private_libdir_rel_eval)
+
 INSTALL_F := $(JULIAHOME)/contrib/install.sh 644
 INSTALL_M := $(JULIAHOME)/contrib/install.sh 755
 
 # LLVM Options
 LLVMROOT := $(build_prefix)
+# Set LLVM_ASSERTIONS to 1 to enable assertions in LLVM.
 LLVM_ASSERTIONS := 0
 LLVM_DEBUG := 0
 # set to 1 to get clang and compiler-rt
@@ -331,6 +346,7 @@ BUILD_LLVM_CLANG := 0
 # set to 1 to get lldb (often does not work, no chance with llvm3.2 and earlier)
 # see http://lldb.llvm.org/build.html for dependencies
 BUILD_LLDB := 0
+BUILD_LIBCXX := 0
 
 # Options to enable Polly and its code-generation options
 USE_POLLY := 0
@@ -340,6 +356,9 @@ USE_POLLY_ACC := 0     # Enable GPU code-generation
 # Options to use MLIR
 USE_MLIR := 0
 
+# Options to use RegionVectorizer
+USE_RV := 0
+
 # Cross-compile
 #XC_HOST := i686-w64-mingw32
 #XC_HOST := x86_64-w64-mingw32
@@ -366,8 +385,10 @@ ifeq ($(XC_HOST),)
 CROSS_COMPILE:=
 # delayed expansion of $(CC), since it won't be computed until later
 HOSTCC = $(CC)
+HOSTCXX = $(CXX)
 else
 HOSTCC ?= gcc
+HOSTCXX ?= g++
 OPENBLAS_DYNAMIC_ARCH := 1
 override CROSS_COMPILE:=$(XC_HOST)-
 ifneq (,$(findstring mingw,$(XC_HOST)))
@@ -416,6 +437,13 @@ fPIC := -fPIC
 EXE :=
 endif
 
+# Set to 1 to enable profiling with perf
+ifeq ("$(OS)", "Linux")
+USE_PERF_JITEVENTS ?= 1
+else
+USE_PERF_JITEVENTS ?= 0
+endif
+
 JULIACODEGEN := LLVM
 
 # flag for disabling assertions
@@ -429,97 +457,54 @@ CXX_DISABLE_ASSERTION := -DJL_NDEBUG
 DISABLE_ASSERTIONS := -DNDEBUG -DJL_NDEBUG
 endif
 
-ifeq ($(LLVM_ASSERTIONS),0)
-CXX_DISABLE_ASSERTION += -DNDEBUG
-endif
-
 # Compiler specific stuff
 
-ifeq ($(USEMSVC), 1)
-USEGCC := 0
-USECLANG := 0
-USEICC := 0
-else
-ifeq ($(USECLANG), 1)
+CC_VERSION_STRING = $(shell $(CC) --version)
+ifneq (,$(findstring clang,$(CC_VERSION_STRING)))
+USECLANG := 1
 USEGCC := 0
-USEICC := 0
 else
-ifeq ($(USEICC), 1)
-USEGCC := 0
 USECLANG := 0
-else  # default to gcc
 USEGCC := 1
-USECLANG := 0
-USEICC := 0
-endif
-endif
 endif
 
-ifeq ($(USEIFC), 1)
-FC := ifort
-else
 FC := $(CROSS_COMPILE)gfortran
-endif
-
-ifeq ($(OS), FreeBSD)
-USEGCC := 0
-USECLANG := 1
-endif
 
+# Note: Supporting only macOS Yosemite and above
 ifeq ($(OS), Darwin)
-DARWINVER := $(shell uname -r | cut -b 1-2)
-DARWINVER_GTE11 := $(shell expr $(DARWINVER) \>= 11)
-DARWINVER_GTE13 := $(shell expr $(DARWINVER) \>= 13)
-ifeq ($(DARWINVER_GTE11),0) # Snow Leopard specific configuration
-USEGCC := 1
-USECLANG := 0
-MACOSX_VERSION_MIN := 10.6
-OPENBLAS_TARGET_ARCH:=NEHALEM
-OPENBLAS_DYNAMIC_ARCH:=0
-USE_SYSTEM_LIBUNWIND:=1
+APPLE_ARCH := $(shell uname -m)
+ifneq ($(APPLE_ARCH),arm64)
+MACOSX_VERSION_MIN := 10.10
 else
-ifeq ($(DARWINVER_GTE13),0) # Lion / Mountain Lion specific configuration
-USEGCC := 0
-USECLANG := 1
-MACOSX_VERSION_MIN := 10.6
-else # Newer versions
-USEGCC := 0
-USECLANG := 1
-endif
+MACOSX_VERSION_MIN := 11.0
 endif
 endif
 
 ifeq ($(USEGCC),1)
-ifeq ($(SANITIZE),1)
-$(error Sanitizers are only supported with clang. Try setting SANITIZE=0)
-endif
 CC := $(CROSS_COMPILE)gcc
 CXX := $(CROSS_COMPILE)g++
-JCFLAGS := -std=gnu99 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
+JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
 # AArch64 needs this flag to generate the .eh_frame used by libunwind
 JCPPFLAGS := -fasynchronous-unwind-tables
-JCXXFLAGS := -pipe $(fPIC) -fno-rtti
+JCXXFLAGS := -pipe $(fPIC) -fno-rtti -std=c++14
 ifneq ($(OS), WINNT)
 # Do not enable on windows to avoid warnings from libuv.
 JCXXFLAGS += -pedantic
 endif
-DEBUGFLAGS := -O0 -ggdb2 -DJL_DEBUG_BUILD -fstack-protector-all
+DEBUGFLAGS := -O0 -ggdb2 -DJL_DEBUG_BUILD -fstack-protector
 SHIPFLAGS := -O3 -ggdb2 -falign-functions
 endif
 
 ifeq ($(USECLANG),1)
 CC := $(CROSS_COMPILE)clang
 CXX := $(CROSS_COMPILE)clang++
-JCFLAGS := -std=gnu99 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
+JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64
 # AArch64 needs this flag to generate the .eh_frame used by libunwind
 JCPPFLAGS := -fasynchronous-unwind-tables
-JCXXFLAGS := -pipe $(fPIC) -fno-rtti -pedantic
-DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector-all
+JCXXFLAGS := -pipe $(fPIC) -fno-rtti -pedantic -std=c++14
+DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector
 SHIPFLAGS := -O3 -g
 ifeq ($(OS), Darwin)
-ifeq ($(MACOSX_VERSION_MIN),)
-MACOSX_VERSION_MIN := 10.9
-endif
 CC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
 CXX += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
 FC += -mmacosx-version-min=$(MACOSX_VERSION_MIN)
@@ -529,18 +514,7 @@ JCPPFLAGS += -D_LARGEFILE_SOURCE -D_DARWIN_USE_64_BIT_INODE=1
 endif
 endif
 
-ifeq ($(USEICC),1)
-ifeq ($(SANITIZE),1)
-$(error Sanitizers only supported with clang. Try setting SANITIZE=0)
-endif
-CC  := icc
-CXX := icpc
-JCFLAGS := -std=gnu11 -pipe $(fPIC) -fno-strict-aliasing -D_FILE_OFFSET_BITS=64 -fp-model precise -fp-model except -no-ftz
-JCPPFLAGS :=
-JCXXFLAGS := -pipe $(fPIC) -fno-rtti
-DEBUGFLAGS := -O0 -g -DJL_DEBUG_BUILD -fstack-protector-all
-SHIPFLAGS := -O3 -g -falign-functions
-endif
+JLDFLAGS :=
 
 ifeq ($(USECCACHE), 1)
 # Expand CC, CXX and FC here already because we want the original definition and not the ccache version.
@@ -577,24 +551,15 @@ FC_ARG := $(shell echo $(FC) | cut -s -d' ' -f2-)
 endif
 
 JFFLAGS := -O2 $(fPIC)
-ifneq ($(USEMSVC),1)
 CPP := $(CC) -E
 AR := $(CROSS_COMPILE)ar
 AS := $(CROSS_COMPILE)as
 LD := $(CROSS_COMPILE)ld
-else #USEMSVC
-CPP := $(CC) -EP
-AR := lib
-ifeq ($(ARCH),x86_64)
-AS := ml64
-else
-AS := ml
-endif #ARCH
-LD := link
-endif #USEMSVC
 RANLIB := $(CROSS_COMPILE)ranlib
 OBJCOPY := $(CROSS_COMPILE)objcopy
 
+CPP_STDOUT := $(CPP) -P
+
 # file extensions
 ifeq ($(OS), WINNT)
   SHLIB_EXT := dll
@@ -604,11 +569,66 @@ else
   SHLIB_EXT := so
 endif
 
+ifeq ($(OS),WINNT)
+define versioned_libname
+$$(if $(2),$(1)-$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+endef
+else ifeq ($(OS),Darwin)
+define versioned_libname
+$$(if $(2),$(1).$(2).$(SHLIB_EXT),$(1).$(SHLIB_EXT))
+endef
+else
+define versioned_libname
+$$(if $(2),$(1).$(SHLIB_EXT).$(2),$(1).$(SHLIB_EXT))
+endef
+endif
+
+
+ifeq ($(SHLIB_EXT), so)
+define SONAME_FLAGS
+  -Wl,-soname=$1
+endef
+else
+define SONAME_FLAGS
+endef
+endif
+
+ifeq ($(OS),WINNT)
+define IMPLIB_FLAGS
+  -Wl,--out-implib,$(build_libdir)/$(notdir $1).a
+endef
+else
+define IMPLIB_FLAGS
+endef
+endif
+
 # On Windows, we want shared library files to end up in $(build_bindir), instead of $(build_libdir)
+# We also don't really have a private bindir on windows right now, due to lack of RPATH.
 ifeq ($(OS),WINNT)
+shlibdir := $(bindir)
+private_shlibdir := $(bindir)
 build_shlibdir := $(build_bindir)
+build_private_shlibdir := $(build_bindir)
 else
+shlibdir := $(libdir)
+private_shlibdir := $(private_libdir)
 build_shlibdir := $(build_libdir)
+build_private_shlibdir := $(build_private_libdir)
+endif
+
+# If we're on windows, don't do versioned shared libraries.  If we're on OSX,
+# put the version number before the .dylib.  Otherwise, put it after.
+ifeq ($(OS), WINNT)
+JL_MAJOR_MINOR_SHLIB_EXT := $(SHLIB_EXT)
+JL_MAJOR_SHLIB_EXT := $(SHLIB_EXT)
+else
+ifeq ($(OS), Darwin)
+JL_MAJOR_MINOR_SHLIB_EXT := $(SOMAJOR).$(SOMINOR).$(SHLIB_EXT)
+JL_MAJOR_SHLIB_EXT := $(SOMAJOR).$(SHLIB_EXT)
+else
+JL_MAJOR_MINOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR).$(SOMINOR)
+JL_MAJOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR)
+endif
 endif
 
 ifeq ($(OS), FreeBSD)
@@ -630,8 +650,8 @@ ifeq ($(OS),FreeBSD)
 ifneq (,$(findstring gfortran,$(FC)))
 
 # First let's figure out what version of GCC we're dealing with
-_GCCMAJOR := $(shell $(FC) -dumpversion | cut -d'.' -f1)
-_GCCMINOR := $(shell $(FC) -dumpversion | cut -d'.' -f2)
+_GCCMAJOR := $(shell $(FC) -dumpversion 2>/dev/null | cut -d'.' -f1)
+_GCCMINOR := $(shell $(FC) -dumpversion 2>/dev/null | cut -d'.' -f2)
 
 # The ports system uses major and minor for GCC < 5 (e.g. gcc49 for GCC 4.9), otherwise major only
 ifeq ($(_GCCMAJOR),4)
@@ -702,6 +722,13 @@ JCXXFLAGS += -DGC_DEBUG_ENV
 JCFLAGS += -DGC_DEBUG_ENV
 endif
 
+ifeq ($(WITH_DTRACE), 1)
+JCXXFLAGS += -DUSE_DTRACE
+JCFLAGS += -DUSE_DTRACE
+DTRACE := dtrace
+else
+endif
+
 # ===========================================================================
 
 # Select the cpu architecture to target, or automatically detects the user's compiler
@@ -734,6 +761,11 @@ XC_HOST := $(ARCH)$(shell echo $(BUILD_MACHINE) | sed "s/[^-]*\(.*\)$$/\1/")
 endif
 endif
 
+# Normalize ppc64le to powerpc64le
+ifeq ($(ARCH), ppc64le)
+override ARCH := powerpc64le
+endif
+
 ifeq ($(ARCH),mingw32)
 $(error "the mingw32 compiler you are using fails the openblas testsuite. please see the README.windows document for a replacement")
 else ifeq (cygwin, $(shell $(CC) -dumpmachine | cut -d\- -f3))
@@ -839,6 +871,10 @@ ifneq (,$(filter $(ARCH), powerpc64le ppc64le))
 DIST_ARCH:=ppc64le
 endif
 ifeq (1,$(ISX86))
+# on x86 make sure not to use 80 bit math when we want 64 bit math.
+ifeq (32,$(BINARY))
+JCFLAGS += -mfpmath=sse
+endif
 DIST_ARCH:=$(BINARY)
 endif
 ifneq (,$(findstring arm,$(ARCH)))
@@ -860,9 +896,11 @@ endif
 ifneq (,$(findstring aarch64,$(ARCH)))
 OPENBLAS_DYNAMIC_ARCH:=0
 OPENBLAS_TARGET_ARCH:=ARMV8
+USE_BLAS64:=1
+BINARY:=64
 ifeq ($(OS),Darwin)
 # Apple Chips are all at least A12Z
-MCPU:=apple-a12
+MCPU:=apple-m1
 endif
 endif
 
@@ -909,7 +947,6 @@ JCXXFLAGS += -D_FILE_OFFSET_BITS=64
 endif
 
 # Set some ARCH-specific flags
-ifneq ($(USEICC),1)
 ifeq ($(ISX86),1)
 CC += -m$(BINARY)
 CXX += -m$(BINARY)
@@ -918,7 +955,6 @@ CC_ARG += -m$(BINARY)
 CXX_ARG += -m$(BINARY)
 FC_ARG += -m$(BINARY)
 endif
-endif
 
 ifeq ($(OS),WINNT)
 ifneq ($(ARCH),x86_64)
@@ -949,8 +985,8 @@ JCPPFLAGS+=-DSYSTEM_LIBUNWIND
 endif
 else
 ifeq ($(OS),Darwin)
-LIBUNWIND:=-losxunwind
-JCPPFLAGS+=-DLIBOSXUNWIND
+LIBUNWIND:=-lunwind
+JCPPFLAGS+=-DLLVMLIBUNWIND
 else
 LIBUNWIND:=-lunwind
 endif
@@ -969,19 +1005,21 @@ ifeq ($(USE_SYSTEM_LLVM), 1)
 JCPPFLAGS+=-DSYSTEM_LLVM
 endif # SYSTEM_LLVM
 
-ifeq ($(BUILD_OS),$(OS))
-LLVM_CONFIG_HOST := $(LLVM_CONFIG)
-else
-LLVM_CONFIG_HOST := $(basename $(LLVM_CONFIG))-host$(BUILD_EXE)
-ifeq (exists, $(shell [ -f '$(LLVM_CONFIG_HOST)' ] && echo exists ))
-ifeq ($(shell $(LLVM_CONFIG_HOST) --version),3.3)
-# llvm-config-host <= 3.3 is broken, use llvm-config instead (in an emulator)
+# Windows builds need a little help finding the LLVM libraries for llvm-config
 # use delayed expansion (= not :=) because spawn isn't defined until later
-LLVM_CONFIG_HOST = $(call spawn,$(LLVM_CONFIG))
+# WINEPATH is only needed for a wine-based cross compile
+LLVM_CONFIG_PATH_FIX =
+ifeq ($(OS),WINNT)
+LLVM_CONFIG_PATH_FIX = PATH="$(build_bindir):$(PATH)" WINEPATH="$(call cygpath_w,$(build_bindir));$(WINEPATH)"
 endif
+
+ifeq ($(BUILD_OS),$(OS))
+LLVM_CONFIG_HOST = $(LLVM_CONFIG_PATH_FIX) $(LLVM_CONFIG)
 else
+LLVM_CONFIG_HOST := $(basename $(LLVM_CONFIG))-host$(BUILD_EXE)
+ifneq (exists, $(shell [ -f '$(LLVM_CONFIG_HOST)' ] && echo exists ))
 # llvm-config-host does not exist (cmake build)
-LLVM_CONFIG_HOST = $(call spawn,$(LLVM_CONFIG))
+LLVM_CONFIG_HOST = $(LLVM_CONFIG_PATH_FIX) $(call spawn,$(LLVM_CONFIG))
 endif
 endif
 
@@ -997,6 +1035,12 @@ else
 PATCHELF := $(build_depsbindir)/patchelf
 endif
 
+ifeq ($(USE_SYSTEM_LIBWHICH), 1)
+LIBWHICH := libwhich
+else
+LIBWHICH := $(build_depsbindir)/libwhich
+endif
+
 # On aarch64 and powerpc64le, we assume the page size is 64K.  Our binutils linkers
 # and such already assume this, but `patchelf` seems to be behind the times.  We
 # explicitly tell it to use this large page size so that when we rewrite rpaths and
@@ -1008,11 +1052,7 @@ endif
 # Use ILP64 BLAS interface when building openblas from source on 64-bit architectures
 ifeq ($(BINARY), 64)
 ifeq ($(USE_SYSTEM_BLAS), 1)
-ifeq ($(USE_INTEL_MKL), 1)
-USE_BLAS64 ?= 1
-else # non MKL system blas is most likely LP64
 USE_BLAS64 ?= 0
-endif
 else
 USE_BLAS64 ?= 1
 endif
@@ -1099,8 +1139,21 @@ endif
 USE_BINARYBUILDER ?= 0
 endif
 
+# Auto-detect triplet once, create different versions that we use as defaults below for each BB install target
+FC_VERSION := $(shell $(FC) --version 2>/dev/null | head -1)
+FC_OR_CC_VERSION := $(or $(FC_VERSION),$(shell $(CC) --version 2>/dev/null | head -1))
+BB_TRIPLET_LIBGFORTRAN_CXXABI := $(shell $(call invoke_python,$(JULIAHOME)/contrib/normalize_triplet.py) $(or $(XC_HOST),$(XC_HOST),$(BUILD_MACHINE)) "$(FC_OR_CC_VERSION)" "$(or $(shell echo '\#include <string>' | $(CXX) $(CXXFLAGS) -x c++ -dM -E - | grep _GLIBCXX_USE_CXX11_ABI | awk '{ print $$3 }' ),1)")
+BB_TRIPLET_LIBGFORTRAN := $(subst $(SPACE),-,$(filter-out cxx%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
+BB_TRIPLET_CXXABI := $(subst $(SPACE),-,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
+BB_TRIPLET := $(subst $(SPACE),-,$(filter-out cxx%,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI)))))
+
+LIBGFORTRAN_VERSION := $(subst libgfortran,,$(filter libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN))))
+
 # This is the set of projects that BinaryBuilder dependencies are hooked up for.
-BB_PROJECTS := OPENBLAS LLVM SUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP
+# Note: we explicitly _do not_ define `CSL` here, since it requires some more
+# advanced techniques to decide whether it should be installed from a BB source
+# or not.  See `deps/csl.mk` for more detail.
+BB_PROJECTS := BLASTRAMPOLINE OPENBLAS LLVM LIBSUITESPARSE OPENLIBM GMP MBEDTLS LIBSSH2 NGHTTP2 MPFR CURL LIBGIT2 PCRE LIBUV LIBUNWIND DSFMT OBJCONV ZLIB P7ZIP
 define SET_BB_DEFAULT
 # First, check to see if BB is disabled on a global setting
 ifeq ($$(USE_BINARYBUILDER),0)
@@ -1117,10 +1170,12 @@ endef
 $(foreach proj,$(BB_PROJECTS),$(eval $(call SET_BB_DEFAULT,$(proj))))
 
 
-
-# Use the Assertions build
-BINARYBUILDER_LLVM_ASSERTS ?= 0
-
+# Warn if the user tries to build something that requires `gfortran` but they don't have it installed.
+ifeq ($(FC_VERSION),)
+ifneq ($(USE_BINARYBUILDER_OPENBLAS)$(USE_BINARYBUILDER_LIBSUITESPARSE),11)
+$(error "Attempting to build OpenBLAS or SuiteSparse without a functioning fortran compiler!")
+endif
+endif
 
 
 # OS specific stuff
@@ -1155,51 +1210,49 @@ else ifeq ($(OS), Darwin)
   RPATH := -Wl,-rpath,'@executable_path/$(build_libdir_rel)'
   RPATH_ORIGIN := -Wl,-rpath,'@loader_path/'
   RPATH_ESCAPED_ORIGIN := $(RPATH_ORIGIN)
-  RPATH_LIB := -Wl,-rpath,'@loader_path/julia/' -Wl,-rpath,'@loader_path/'
+  RPATH_LIB := -Wl,-rpath,'@loader_path/'
 else
   RPATH := -Wl,-rpath,'$$ORIGIN/$(build_libdir_rel)' -Wl,-rpath,'$$ORIGIN/$(build_private_libdir_rel)' -Wl,-rpath-link,$(build_shlibdir) -Wl,-z,origin
   RPATH_ORIGIN := -Wl,-rpath,'$$ORIGIN' -Wl,-z,origin
   RPATH_ESCAPED_ORIGIN := -Wl,-rpath,'\$$\$$ORIGIN' -Wl,-z,origin -Wl,-rpath-link,$(build_shlibdir)
-  RPATH_LIB := -Wl,-rpath,'$$ORIGIN/julia' -Wl,-rpath,'$$ORIGIN' -Wl,-z,origin
+  RPATH_LIB := -Wl,-rpath,'$$ORIGIN/' -Wl,-z,origin
 endif
 
 # --whole-archive
 ifeq ($(OS), Darwin)
   WHOLE_ARCHIVE := -Xlinker -all_load
   NO_WHOLE_ARCHIVE :=
-else ifneq ($(USEMSVC), 1)
+else
   WHOLE_ARCHIVE := -Wl,--whole-archive
   NO_WHOLE_ARCHIVE := -Wl,--no-whole-archive
 endif
 
 ifeq ($(OS), Linux)
-OSLIBS += -Wl,--no-as-needed -ldl -lrt -lpthread -Wl,--export-dynamic,--as-needed,--no-whole-archive
+OSLIBS += -Wl,--no-as-needed -ldl -lrt -lpthread -latomic -Wl,--export-dynamic,--as-needed,--no-whole-archive
 # Detect if ifunc is supported
 IFUNC_DETECT_SRC := 'void (*f0(void))(void) { return (void(*)(void))0L; }; void f(void) __attribute__((ifunc("f0")));'
 ifeq (supported, $(shell echo $(IFUNC_DETECT_SRC) | $(CC) -Werror -x c - -S -o /dev/null > /dev/null 2>&1 && echo supported))
 JCPPFLAGS += -DJULIA_HAS_IFUNC_SUPPORT=1
 endif
-JLDFLAGS := -Wl,-Bdynamic
-ifneq ($(SANITIZE),1)
-ifneq ($(SANITIZE_MEMORY),1)
-ifneq ($(LLVM_SANITIZE),1)
+JLDFLAGS += -Wl,-Bdynamic
 OSLIBS += -Wl,--version-script=$(JULIAHOME)/src/julia.expmap
+ifneq ($(SANITIZE),1)
 JLDFLAGS += -Wl,-no-undefined
 endif
-endif
-endif
 ifeq (-Bsymbolic-functions, $(shell $(LD) --help | grep -o -e "-Bsymbolic-functions"))
 JLIBLDFLAGS := -Wl,-Bsymbolic-functions
 else
 JLIBLDFLAGS :=
 endif
+# Linker doesn't detect automatically that Julia doesn't need executable stack
+JLIBLDFLAGS += -Wl,-z,noexecstack
 else ifneq ($(OS), Darwin)
 JLIBLDFLAGS :=
 endif
 
 ifeq ($(OS), FreeBSD)
-JLDFLAGS := -Wl,-Bdynamic
-OSLIBS += -lelf -lkvm -lrt -lpthread
+JLDFLAGS += -Wl,-Bdynamic
+OSLIBS += -lelf -lkvm -lrt -lpthread -latomic
 
 # Tweak order of libgcc_s in DT_NEEDED,
 # make it loaded first to
@@ -1216,24 +1269,18 @@ SHLIB_EXT := dylib
 OSLIBS += -framework CoreFoundation
 WHOLE_ARCHIVE := -Xlinker -all_load
 NO_WHOLE_ARCHIVE :=
-JLDFLAGS :=
 HAVE_SSP := 1
 JLIBLDFLAGS := -Wl,-compatibility_version,$(SOMAJOR) -Wl,-current_version,$(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION).$(JULIA_PATCH_VERSION)
 endif
 
 ifeq ($(OS), WINNT)
-ifneq ($(USEMSVC), 1)
 HAVE_SSP := 1
 OSLIBS += -Wl,--export-all-symbols -Wl,--version-script=$(JULIAHOME)/src/julia.expmap \
-	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32
-JLDFLAGS := -Wl,--stack,8388608
+	$(NO_WHOLE_ARCHIVE) -lpsapi -lkernel32 -lws2_32 -liphlpapi -lwinmm -ldbghelp -luserenv -lsecur32 -latomic
+JLDFLAGS += -Wl,--stack,8388608
 ifeq ($(ARCH),i686)
 JLDFLAGS += -Wl,--large-address-aware
 endif
-else #USEMSVC
-OSLIBS += kernel32.lib ws2_32.lib psapi.lib advapi32.lib iphlpapi.lib shell32.lib winmm.lib userenv.lib secur32.lib
-JLDFLAGS := -stack:8388608
-endif
 JCPPFLAGS += -D_WIN32_WINNT=0x0502
 UNTRUSTED_SYSTEM_LIBM := 1
 endif
@@ -1262,38 +1309,6 @@ ifeq ($(USE_PERF_JITEVENTS), 1)
 JCPPFLAGS += -DJL_USE_PERF_JITEVENTS
 endif
 
-
-# Intel libraries
-
-ifeq ($(USE_INTEL_LIBM), 1)
-USE_SYSTEM_LIBM := 1
-LIBM := -L$(MKLROOT)/../compiler/lib/intel64 -limf
-LIBMNAME := libimf
-endif
-
-ifeq ($(USE_INTEL_MKL), 1)
-ifeq ($(USE_BLAS64), 1)
-export MKL_INTERFACE_LAYER := ILP64
-MKLLIB := $(MKLROOT)/lib/intel64
-else
-MKLLIB := $(MKLROOT)/lib/ia32
-endif
-USE_SYSTEM_BLAS:=1
-USE_SYSTEM_LAPACK:=1
-LIBBLASNAME := libmkl_rt
-LIBLAPACKNAME := libmkl_rt
-MKL_LDFLAGS := -L$(MKLLIB) -lmkl_rt
-ifneq ($(strip $(MKLLIB)),)
-  ifeq ($(OS), Linux)
-    RPATH_MKL := -Wl,-rpath,$(MKLLIB)
-    RPATH     += $(RPATH_MKL)
-    MKL_LDFLAGS += $(RPATH_MKL)
-  endif
-endif
-LIBBLAS   := $(MKL_LDFLAGS)
-LIBLAPACK := $(MKL_LDFLAGS)
-endif
-
 ifeq ($(HAVE_SSP),1)
 JCPPFLAGS += -DHAVE_SSP=1
 ifeq ($(USEGCC),1)
@@ -1301,24 +1316,6 @@ OSLIBS += -lssp
 endif
 endif
 
-# ATLAS
-
-# ATLAS must have been previously  installed to usr/lib/libatlas
-# (built as a shared library, for your platform, single threaded)
-USE_ATLAS := 0
-ATLAS_LIBDIR := $(build_libdir)
-#or ATLAS_LIBDIR := /path/to/system/atlas/lib
-
-ifeq ($(USE_ATLAS), 1)
-USE_BLAS64 := 0
-USE_SYSTEM_BLAS := 1
-USE_SYSTEM_LAPACK := 1
-LIBBLAS := -L$(ATLAS_LIBDIR) -lsatlas
-LIBLAPACK := $(LIBBLAS)
-LIBBLASNAME := libsatlas
-LIBLAPACKNAME := $(LIBBLASNAME)
-endif
-
 # Renaming OpenBLAS symbols, see #4923 and #8734
 ifeq ($(USE_SYSTEM_BLAS), 0)
 ifeq ($(USE_BLAS64), 1)
@@ -1333,6 +1330,7 @@ endif
 
 # Custom libcxx
 ifeq ($(BUILD_CUSTOM_LIBCXX),1)
+$(error BUILD_CUSTOM_LIBCXX is currently not supported, BUILD_LIBCXX will provide LIBCXX but not link it)
 LDFLAGS += -L$(build_libdir)
 CXXLDFLAGS += -L$(build_libdir) -lc++abi -lc++
 ifeq ($(USECLANG),1)
@@ -1343,19 +1341,14 @@ $(error BUILD_CUSTOM_LIBCXX is currently only supported with Clang. Try setting
 endif
 endif # Clang
 CUSTOM_LD_LIBRARY_PATH := LD_LIBRARY_PATH="$(build_libdir)"
-ifeq ($(USEICC),1)
-CXXFLAGS += -cxxlib-nostd -static-intel
-CLDFLAGS += -static-intel
-LDFLAGS += -cxxlib-nostd -static-intel
-endif
 endif
 
 # Some special restrictions on BB usage:
 ifeq ($(USE_SYSTEM_BLAS),1)
 # Since the names don't line up (`BLAS` vs. `OPENBLAS`), manually gate:
 USE_BINARYBUILDER_OPENBLAS := 0
-# Disable BB SuiteSparse if we're using system BLAS
-USE_BINARYBUILDER_SUITESPARSE := 0
+# Disable BB LIBSUITESPARSE if we're using system BLAS
+USE_BINARYBUILDER_LIBSUITESPARSE := 0
 endif
 
 ifeq ($(USE_SYSTEM_LIBM),1)
@@ -1401,7 +1394,7 @@ clean-$$(abspath $(2)/$(3)):
 ifeq ($(BUILD_OS), WINNT)
 	-cmd //C rmdir $$(call mingw_to_dos,$(2)/$(3),cd $(2) &&)
 else
-	-rm -r $$(abspath $(2)/$(3))
+	rm -rf $$(abspath $(2)/$(3))
 endif
 $$(abspath $(2)/$(3)): | $$(abspath $(2))
 ifeq ($$(BUILD_OS), WINNT)
@@ -1409,7 +1402,7 @@ ifeq ($$(BUILD_OS), WINNT)
 else ifneq (,$$(findstring CYGWIN,$$(BUILD_OS)))
 	@cmd /C mklink /J $$(call cygpath_w,$(2)/$(3)) $$(call cygpath_w,$(1))
 else ifdef JULIA_VAGRANT_BUILD
-	@rm -r $$@
+	@rm -rf $$@
 	@cp -R $$(abspath $(1)) $$@.tmp
 	@mv $$@.tmp $$@
 else
@@ -1417,6 +1410,9 @@ else
 endif
 endef
 
+# Overridable in Make.user
+WINE ?= wine
+
 # many of the following targets must be = not := because the expansion of the makefile functions (and $1) shouldn't happen until later
 ifeq ($(BUILD_OS), WINNT) # MSYS
 spawn = $(1)
@@ -1426,8 +1422,8 @@ spawn = $(1)
 cygpath_w = `cygpath -w $(1)`
 else
 ifeq ($(OS), WINNT) # unix-to-Windows cross-compile
-spawn = wine $(1)
-cygpath_w = `winepath -w $(1)`
+spawn = $(WINE) $(1)
+cygpath_w = `$(WINE) winepath.exe -w $(1)`
 else # not Windows
 spawn = $(1)
 cygpath_w = $(1)
@@ -1451,6 +1447,75 @@ JULIA_SYSIMG_debug := $(build_private_libdir)/sys-debug.$(SHLIB_EXT)
 JULIA_SYSIMG_release := $(build_private_libdir)/sys.$(SHLIB_EXT)
 JULIA_SYSIMG := $(JULIA_SYSIMG_$(JULIA_BUILD_MODE))
 
+define dep_lib_path
+$$($(PYTHON) $(call python_cygpath,$(JULIAHOME)/contrib/relative_path.py) $(1) $(2))
+endef
+
+LIBJULIAINTERNAL_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIAINTERNAL_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT))
+
+LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT))
+
+LIBJULIACODEGEN_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT))
+
+LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
+LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT))
+
+ifeq ($(OS),WINNT)
+ifeq ($(BINARY),32)
+LIBGCC_NAME := libgcc_s_sjlj-1.$(SHLIB_EXT)
+else
+LIBGCC_NAME := libgcc_s_seh-1.$(SHLIB_EXT)
+endif
+endif
+ifeq ($(OS),Darwin)
+ifeq ($(ARCH),aarch64)
+LIBGCC_NAME := libgcc_s.1.1.$(SHLIB_EXT)
+else
+LIBGCC_NAME := libgcc_s.1.$(SHLIB_EXT)
+endif
+endif
+ifneq ($(findstring $(OS),Linux FreeBSD),)
+LIBGCC_NAME := libgcc_s.$(SHLIB_EXT).1
+endif
+
+# USE_SYSTEM_CSL causes it to get symlinked into build_private_shlibdir
+ifeq ($(USE_SYSTEM_CSL),1)
+LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBGCC_NAME))
+else
+LIBGCC_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBGCC_NAME))
+endif
+LIBGCC_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBGCC_NAME))
+
+# USE_SYSTEM_LIBM and USE_SYSTEM_OPENLIBM causes it to get symlinked into build_private_shlibdir
+ifeq ($(USE_SYSTEM_LIBM),1)
+LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+else ifeq ($(USE_SYSTEM_OPENLIBM),1)
+LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+else
+LIBM_BUILD_DEPLIB := $(call dep_lib_path,$(build_libdir),$(build_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+endif
+LIBM_INSTALL_DEPLIB := $(call dep_lib_path,$(libdir),$(private_shlibdir)/$(LIBMNAME).$(SHLIB_EXT))
+
+# We list:
+#  * libgcc_s, because FreeBSD needs to load ours, not the system one.
+#  * libopenlibm, because Windows has an untrustworthy libm, and we want to use ours more than theirs
+#  * libjulia-internal, which must always come second-to-last.
+#  * libjulia-codegen, which must always come last
+#
+# We need these four separate variables because:
+#  * debug builds must link against libjuliadebug, not libjulia
+#  * install time relative paths are not equal to build time relative paths (../lib vs. ../lib/julia)
+# That second point will no longer be true for most deps once they are placed within Artifacts directories.
+# Note that we prefix `libjulia-codegen` and `libjulia-internal` with `@` to signify to the loader that it
+# should not automatically dlopen() it in its loading loop.
+LOADER_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):@$(LIBJULIAINTERNAL_BUILD_DEPLIB):@$(LIBJULIACODEGEN_BUILD_DEPLIB):
+LOADER_DEBUG_BUILD_DEP_LIBS = $(LIBGCC_BUILD_DEPLIB):$(LIBM_BUILD_DEPLIB):@$(LIBJULIAINTERNAL_DEBUG_BUILD_DEPLIB):@$(LIBJULIACODEGEN_DEBUG_BUILD_DEPLIB):
+LOADER_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):@$(LIBJULIAINTERNAL_INSTALL_DEPLIB):@$(LIBJULIACODEGEN_INSTALL_DEPLIB):
+LOADER_DEBUG_INSTALL_DEP_LIBS = $(LIBGCC_INSTALL_DEPLIB):$(LIBM_INSTALL_DEPLIB):@$(LIBJULIAINTERNAL_DEBUG_INSTALL_DEPLIB):@$(LIBJULIACODEGEN_DEBUG_INSTALL_DEPLIB):
+
 # Colors for make
 ifndef VERBOSE
 VERBOSE := 0
@@ -1468,6 +1533,7 @@ LINKCOLOR:="\033[34;1m"
 PERLCOLOR:="\033[35m"
 FLISPCOLOR:="\033[32m"
 JULIACOLOR:="\033[32;1m"
+DTRACECOLOR:="\033[32;1m"
 
 SRCCOLOR:="\033[33m"
 BINCOLOR:="\033[37;1m"
@@ -1481,6 +1547,7 @@ PRINT_LINK = printf '    %b %b\n' $(LINKCOLOR)LINK$(ENDCOLOR) $(BINCOLOR)$(GOAL)
 PRINT_PERL = printf '    %b %b\n' $(PERLCOLOR)PERL$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_FLISP = printf '    %b %b\n' $(FLISPCOLOR)FLISP$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 PRINT_JULIA = printf '    %b %b\n' $(JULIACOLOR)JULIA$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
+PRINT_DTRACE = printf '    %b %b\n' $(DTRACECOLOR)DTRACE$(ENDCOLOR) $(BINCOLOR)$(GOAL)$(ENDCOLOR); $(1)
 
 else
 QUIET_MAKE =
@@ -1490,20 +1557,12 @@ PRINT_LINK = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_PERL = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_FLISP = echo '$(subst ','\'',$(1))'; $(1)
 PRINT_JULIA = echo '$(subst ','\'',$(1))'; $(1)
+PRINT_DTRACE = echo '$(subst ','\'',$(1))'; $(1)
 
 endif
 
-define newline # a literal \n
-
-
-endef
-
 # Makefile debugging trick:
 # call print-VARIABLE to see the runtime value of any variable
 # (hardened against any special characters appearing in the output)
 print-%:
 	@echo '$*=$(subst ','\'',$(subst $(newline),\n,$($*)))'
-
-# Literal values that are hard to use in Makefiles otherwise:
-COMMA:=,
-SPACE:=$(eval) $(eval)
diff --git a/Makefile b/Makefile
index 30dfac383bed80..1bd8b66a009be7 100644
--- a/Makefile
+++ b/Makefile
@@ -9,7 +9,7 @@ all: debug release
 # sort is used to remove potential duplicates
 DIRS := $(sort $(build_bindir) $(build_depsbindir) $(build_libdir) $(build_private_libdir) $(build_libexecdir) $(build_includedir) $(build_includedir)/julia $(build_sysconfdir)/julia $(build_datarootdir)/julia $(build_datarootdir)/julia/stdlib $(build_man1dir))
 ifneq ($(BUILDROOT),$(JULIAHOME))
-BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa ui doc deps stdlib test test/embedding test/llvmpasses)
+BUILDDIRS := $(BUILDROOT) $(addprefix $(BUILDROOT)/,base src src/flisp src/support src/clangsa cli doc deps stdlib test test/clangsa test/embedding test/llvmpasses)
 BUILDDIRMAKE := $(addsuffix /Makefile,$(BUILDDIRS)) $(BUILDROOT)/sysimage.mk
 DIRS := $(DIRS) $(BUILDDIRS)
 $(BUILDDIRMAKE): | $(BUILDDIRS)
@@ -46,9 +46,9 @@ julia_flisp.boot.inc.phony: julia-deps
 $(BUILDROOT)/doc/_build/html/en/index.html: $(shell find $(BUILDROOT)/base $(BUILDROOT)/doc \( -path $(BUILDROOT)/doc/_build -o -path $(BUILDROOT)/doc/deps -o -name *_constants.jl -o -name *_h.jl -o -name version_git.jl \) -prune -o -type f -print)
 	@$(MAKE) docs
 
-julia-symlink: julia-ui-$(JULIA_BUILD_MODE)
+julia-symlink: julia-cli-$(JULIA_BUILD_MODE)
 ifeq ($(OS),WINNT)
-	@echo '@"%~dp0"\'"$$(echo $(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE)) | tr / '\\')" '%*' > $(BUILDROOT)/julia.bat
+	echo '@"%~dp0/'"$$(echo '$(call rel_path,$(BUILDROOT),$(JULIA_EXECUTABLE))')"'" %*' | tr / '\\' > $(BUILDROOT)/julia.bat
 	chmod a+x $(BUILDROOT)/julia.bat
 else
 ifndef JULIA_VAGRANT_BUILD
@@ -59,7 +59,8 @@ endif
 julia-deps: | $(DIRS) $(build_datarootdir)/julia/base $(build_datarootdir)/julia/test
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/deps
 
-julia-stdlib: | $(DIRS)
+# `julia-stdlib` depends on `julia-deps` so that the fake JLL stdlibs can copy in their Artifacts.toml files.
+julia-stdlib: | $(DIRS) julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/stdlib
 
 julia-base: julia-deps $(build_sysconfdir)/julia/startup.jl $(build_man1dir)/julia.1 $(build_datarootdir)/julia/julia-config.jl
@@ -71,31 +72,36 @@ julia-libccalltest: julia-deps
 julia-libllvmcalltest: julia-deps
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libllvmcalltest
 
-julia-src-release julia-src-debug : julia-src-% : julia-deps julia_flisp.boot.inc.phony
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src libjulia-$*
+julia-src-release julia-src-debug : julia-src-% : julia-deps julia_flisp.boot.inc.phony julia-cli-%
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/src $*
 
-julia-ui-release julia-ui-debug : julia-ui-% : julia-src-%
-	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/ui julia-$*
+julia-cli-release julia-cli-debug: julia-cli-% : julia-deps
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/cli $*
 
-julia-sysimg-ji : julia-stdlib julia-base julia-ui-$(JULIA_BUILD_MODE) | $(build_private_libdir)
+julia-sysimg-ji : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-ji JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
 
-julia-sysimg-bc : julia-stdlib julia-base julia-ui-$(JULIA_BUILD_MODE) | $(build_private_libdir)
+julia-sysimg-bc : julia-stdlib julia-base julia-cli-$(JULIA_BUILD_MODE) julia-src-$(JULIA_BUILD_MODE) | $(build_private_libdir)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-bc JULIA_EXECUTABLE='$(JULIA_EXECUTABLE)'
 
-julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-sysimg-ji julia-ui-%
+julia-sysimg-release julia-sysimg-debug : julia-sysimg-% : julia-sysimg-ji julia-src-%
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT) -f sysimage.mk sysimg-$*
 
-julia-debug julia-release : julia-% : julia-sysimg-% julia-symlink julia-libccalltest julia-libllvmcalltest julia-base-cache
+julia-debug julia-release : julia-% : julia-sysimg-% julia-src-% julia-symlink julia-libccalltest julia-libllvmcalltest julia-base-cache
 
 debug release : % : julia-%
 
 docs: julia-sysimg-$(JULIA_BUILD_MODE)
 	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no'
 
+docs-revise:
+	@$(MAKE) $(QUIET_MAKE) -C $(BUILDROOT)/doc JULIA_EXECUTABLE='$(call spawn,$(JULIA_EXECUTABLE_$(JULIA_BUILD_MODE))) --startup-file=no' revise=true
+
 check-whitespace:
 ifneq ($(NO_GIT), 1)
-	@$(JULIAHOME)/contrib/check-whitespace.sh
+	@# Append the directory containing the julia we just built to the end of `PATH`,
+	@# to give us the best chance of being able to run this check.
+	@PATH=$(PATH):$(dirname $(JULIA_EXECUTABLE)) $(JULIAHOME)/contrib/check-whitespace.jl
 else
 	$(warn "Skipping whitespace check because git is unavailable")
 endif
@@ -123,15 +129,16 @@ release-candidate: release testall
 	@echo 2. Update references to the julia version in the source directories, such as in README.md
 	@echo 3. Bump VERSION
 	@echo 4. Increase SOMAJOR and SOMINOR if needed.
-	@echo 5. Create tag, push to github "\(git tag v\`cat VERSION\` && git push --tags\)"		#"` # These comments deal with incompetent syntax highlighting rules
-	@echo 6. Clean out old .tar.gz files living in deps/, "\`git clean -fdx\`" seems to work	#"`
-	@echo 7. Replace github release tarball with tarballs created from make light-source-dist and make full-source-dist
-	@echo 8. Check that 'make && make install && make test' succeed with unpacked tarballs even without Internet access.
-	@echo 9. Follow packaging instructions in doc/build/distributing.md to create binary packages for all platforms
-	@echo 10. Upload to AWS, update https://julialang.org/downloads and http://status.julialang.org/stable links
-	@echo 11. Update checksums on AWS for tarball and packaged binaries
-	@echo 12. Announce on mailing lists
-	@echo 13. Change master to release-0.X in base/version.jl and base/version_git.sh as in 4cb1e20
+	@echo 5. Update SPDX document by running the script contrib/updateSPDX.jl
+	@echo 6. Create tag, push to github "\(git tag v\`cat VERSION\` && git push --tags\)"		#"` # These comments deal with incompetent syntax highlighting rules
+	@echo 7. Clean out old .tar.gz files living in deps/, "\`git clean -fdx\`" seems to work	#"`
+	@echo 8. Replace github release tarball with tarballs created from make light-source-dist and make full-source-dist with USE_BINARYBUILDER=0
+	@echo 9. Check that 'make && make install && make test' succeed with unpacked tarballs even without Internet access.
+	@echo 10. Follow packaging instructions in doc/build/distributing.md to create binary packages for all platforms
+	@echo 11. Upload to AWS, update https://julialang.org/downloads and http://status.julialang.org/stable links
+	@echo 12. Update checksums on AWS for tarball and packaged binaries
+	@echo 13. Announce on mailing lists
+	@echo 14. Change master to release-0.X in base/version.jl and base/version_git.sh as in 4cb1e20
 	@echo
 
 $(build_man1dir)/julia.1: $(JULIAHOME)/doc/man/julia.1 | $(build_man1dir)
@@ -150,7 +157,8 @@ $(build_depsbindir)/stringreplace: $(JULIAHOME)/contrib/stringreplace.c | $(buil
 	@$(call PRINT_CC, $(HOSTCC) -o $(build_depsbindir)/stringreplace $(JULIAHOME)/contrib/stringreplace.c)
 
 julia-base-cache: julia-sysimg-$(JULIA_BUILD_MODE) | $(DIRS) $(build_datarootdir)/julia
-	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) $(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
+	@JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+		$(call spawn, $(JULIA_EXECUTABLE) --startup-file=no $(call cygpath_w,$(JULIAHOME)/etc/write_base_cache.jl) \
 		$(call cygpath_w,$(build_datarootdir)/julia/base.cache))
 
 # public libraries, that are installed in $(prefix)/lib
@@ -160,33 +168,33 @@ JL_TARGETS += julia-debug
 endif
 
 # private libraries, that are installed in $(prefix)/lib/julia
-JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest
+JL_PRIVATE_LIBS-0 := libccalltest libllvmcalltest libjulia-internal libjulia-codegen
+ifeq ($(BUNDLE_DEBUG_LIBS),1)
+JL_PRIVATE_LIBS-0 += libjulia-internal-debug libjulia-codegen-debug
+endif
 ifeq ($(USE_GPL_LIBS), 1)
-JL_PRIVATE_LIBS-0 += libsuitesparse_wrapper
-JL_PRIVATE_LIBS-$(USE_SYSTEM_SUITESPARSE) += libamd libcamd libccolamd libcholmod libcolamd libumfpack libspqr libsuitesparseconfig
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSUITESPARSE) += libamd libbtf libcamd libccolamd libcholmod libcolamd libklu libldl librbio libspqr libsuitesparseconfig libumfpack
 endif
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBBLASTRAMPOLINE) += libblastrampoline
 JL_PRIVATE_LIBS-$(USE_SYSTEM_PCRE) += libpcre2-8
 JL_PRIVATE_LIBS-$(USE_SYSTEM_DSFMT) += libdSFMT
-JL_PRIVATE_LIBS-$(USE_SYSTEM_GMP) += libgmp
+JL_PRIVATE_LIBS-$(USE_SYSTEM_GMP) += libgmp libgmpxx
 JL_PRIVATE_LIBS-$(USE_SYSTEM_MPFR) += libmpfr
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBSSH2) += libssh2
 JL_PRIVATE_LIBS-$(USE_SYSTEM_NGHTTP2) += libnghttp2
 JL_PRIVATE_LIBS-$(USE_SYSTEM_MBEDTLS) += libmbedtls libmbedcrypto libmbedx509
 JL_PRIVATE_LIBS-$(USE_SYSTEM_CURL) += libcurl
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBGIT2) += libgit2
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUV) += libuv
 ifeq ($(OS),WINNT)
 JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += zlib
 else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_ZLIB) += libz
 endif
 ifeq ($(USE_LLVM_SHLIB),1)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM libLLVM-9jl
+JL_PRIVATE_LIBS-$(USE_SYSTEM_LLVM) += libLLVM libLLVM-13jl
 endif
-ifeq ($(OS),Darwin)
-JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUNWIND) += libosxunwind
-else
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LIBUNWIND) += libunwind
-endif
 
 ifeq ($(USE_SYSTEM_LIBM),0)
 JL_PRIVATE_LIBS-$(USE_SYSTEM_OPENLIBM) += libopenlibm
@@ -197,91 +205,29 @@ ifneq ($(LIBLAPACKNAME),$(LIBBLASNAME))
 JL_PRIVATE_LIBS-$(USE_SYSTEM_LAPACK) += $(LIBLAPACKNAME)
 endif
 
+JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libgfortran libquadmath libstdc++ libgcc_s libgomp libssp libatomic
 ifeq ($(OS),Darwin)
-ifeq ($(USE_SYSTEM_BLAS),1)
-ifeq ($(USE_SYSTEM_LAPACK),0)
-JL_PRIVATE_LIBS-0 += libgfortblas
-endif
+JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libc++
 endif
-endif
-
-# On FreeBSD, /lib/libgcc_s.so.1 is incompatible with Fortran; to use Fortran on FreeBSD,
-# we need to link to the libgcc_s that ships with the same GCC version used by libgfortran.
-# To work around this, we copy the GCC libraries we need, namely libgfortran, libgcc_s,
-# and libquadmath, into our build library directory, $(build_libdir). We also add them to
-# JL_PRIVATE_LIBS-0 so that they know where they need to live at install time.
-ifeq ($(OS),FreeBSD)
-define std_so
-julia-deps: | $$(build_libdir)/$(1).so
-$$(build_libdir)/$(1).so: | $$(build_libdir)
-	$$(INSTALL_M) $$(GCCPATH)/$(1).so* $$(build_libdir)
-JL_PRIVATE_LIBS-0 += $(1)
-endef
-
-$(eval $(call std_so,libgfortran))
-$(eval $(call std_so,libgcc_s))
-$(eval $(call std_so,libquadmath))
-endif # FreeBSD
-
 ifeq ($(OS),WINNT)
-# find the standard .dll folders
-ifeq ($(XC_HOST),)
-STD_LIB_PATH ?= $(PATH)
+JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libwinpthread
 else
-STD_LIB_PATH := $(shell LANG=C $(CC) -print-search-dirs | grep '^programs: =' | sed -e "s/^programs: =//")
-STD_LIB_PATH += :$(shell LANG=C $(CC) -print-search-dirs | grep '^libraries: =' | sed -e "s/^libraries: =//")
-ifneq (,$(findstring CYGWIN,$(BUILD_OS))) # the cygwin-mingw32 compiler lies about it search directory paths
-STD_LIB_PATH := $(shell echo '$(STD_LIB_PATH)' | sed -e "s!/lib/!/bin/!g")
-endif
+JL_PRIVATE_LIBS-$(USE_SYSTEM_CSL) += libpthread
 endif
 
-pathsearch = $(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
-
-define std_dll
-julia-deps-libs: | $$(build_bindir)/lib$(1).dll $$(build_depsbindir)/lib$(1).dll
-$$(build_bindir)/lib$(1).dll: | $$(build_bindir)
-	cp $$(or $$(call pathsearch,lib$(1).dll,$$(STD_LIB_PATH)),$$(error can't find lib$1.dll)) $$(build_bindir)
-$$(build_depsbindir)/lib$(1).dll: | $$(build_depsbindir)
-	cp $$(or $$(call pathsearch,lib$(1).dll,$$(STD_LIB_PATH)),$$(error can't find lib$1.dll)) $$(build_depsbindir)
-JL_TARGETS += $(1)
-endef
-julia-deps: julia-deps-libs
 
-# Given a list of space-separated libraries, return the first library name that is
-# correctly found through `pathsearch`.
-define select_std_dll
-$(firstword $(foreach name,$(1),$(if $(call pathsearch,lib$(name).dll,$(STD_LIB_PATH)),$(name),)))
-endef
-
-$(eval $(call std_dll,$(call select_std_dll,gfortran-3 gfortran-4 gfortran-5)))
-$(eval $(call std_dll,quadmath-0))
-$(eval $(call std_dll,stdc++-6))
-ifeq ($(ARCH),i686)
-$(eval $(call std_dll,gcc_s_sjlj-1))
-else
-$(eval $(call std_dll,gcc_s_seh-1))
+ifeq ($(OS),Darwin)
+ifeq ($(USE_SYSTEM_BLAS),1)
+ifeq ($(USE_SYSTEM_LAPACK),0)
+JL_PRIVATE_LIBS-0 += libgfortblas
+endif
 endif
-$(eval $(call std_dll,ssp-0))
-$(eval $(call std_dll,winpthread-1))
-$(eval $(call std_dll,atomic-1))
 endif
-
 
 define stringreplace
-	$(build_depsbindir)/stringreplace $$(strings -t x - $1 | grep '$2' | awk '{print $$1;}') '$3' 255 "$(call cygpath_w,$1)"
+	$(build_depsbindir)/stringreplace $$(strings -t x - $1 | grep $2 | awk '{print $$1;}') $3 255 "$(call cygpath_w,$1)"
 endef
 
-# Run fixup-libgfortran on all platforms but Windows and FreeBSD. On FreeBSD we
-# pull in the GCC libraries earlier and use them for the build to make sure we
-# don't inadvertently link to /lib/libgcc_s.so.1, which is incompatible with
-# libgfortran, and on Windows we copy them in earlier as well.
-ifeq (,$(findstring $(OS),FreeBSD WINNT))
-julia-base: $(build_libdir)/libgfortran*.$(SHLIB_EXT)*
-$(build_libdir)/libgfortran*.$(SHLIB_EXT)*: | $(build_libdir) julia-deps
-	-$(CUSTOM_LD_LIBRARY_PATH) PATH="$(PATH):$(build_depsbindir)" PATCHELF="$(PATCHELF)" FC="$(FC)" $(JULIAHOME)/contrib/fixup-libgfortran.sh --verbose $(build_libdir)
-JL_PRIVATE_LIBS-0 += libgfortran libgcc_s libquadmath
-endif
-
 
 install: $(build_depsbindir)/stringreplace $(BUILDROOT)/doc/_build/html/en/index.html
 ifeq ($(BUNDLE_DEBUG_LIBS),1)
@@ -348,8 +294,11 @@ endif
 		done \
 	done
 	for suffix in $(JL_PRIVATE_LIBS-1) ; do \
-		lib=$(build_private_libdir)/$${suffix}.$(SHLIB_EXT); \
-		$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) ; \
+		for lib in $(build_private_libdir)/$${suffix}.$(SHLIB_EXT)*; do \
+			if [ "$${lib##*.}" != "dSYM" ]; then \
+				$(INSTALL_M) $$lib $(DESTDIR)$(private_libdir) ; \
+			fi \
+		done \
 	done
 endif
 	# Install `7z` into libexec/
@@ -401,22 +350,48 @@ else ifneq (,$(findstring $(OS),Linux FreeBSD))
 	done
 endif
 
-	# Overwrite JL_SYSTEM_IMAGE_PATH in julia library
-	if [ $(DARWIN_FRAMEWORK) = 0 ]; then \
-		RELEASE_TARGET=$(DESTDIR)$(libdir)/libjulia.$(SHLIB_EXT); \
-		DEBUG_TARGET=$(DESTDIR)$(libdir)/libjulia-debug.$(SHLIB_EXT); \
+	# Overwrite JL_SYSTEM_IMAGE_PATH in libjulia-internal
+	if [ "$(DARWIN_FRAMEWORK)" = "0" ]; then \
+		RELEASE_TARGET=$(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT); \
+		DEBUG_TARGET=$(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT); \
 	else \
 		RELEASE_TARGET=$(DESTDIR)$(prefix)/$(framework_dylib); \
 		DEBUG_TARGET=$(DESTDIR)$(prefix)/$(framework_dylib)_debug; \
 	fi; \
 	$(call stringreplace,$${RELEASE_TARGET},sys.$(SHLIB_EXT)$$,$(private_libdir_rel)/sys.$(SHLIB_EXT)); \
-	if [ $(BUNDLE_DEBUG_LIBS) = 1 ]; then \
+	if [ "$(BUNDLE_DEBUG_LIBS)" = "1" ]; then \
 		$(call stringreplace,$${DEBUG_TARGET},sys-debug.$(SHLIB_EXT)$$,$(private_libdir_rel)/sys-debug.$(SHLIB_EXT)); \
 	fi;
+endif
+
+	# Set rpath for libjulia-internal, which is moving from `../lib` to `../lib/julia`.  We only need to do this for Linux/FreeBSD
+ifneq (,$(findstring $(OS),Linux FreeBSD))
+	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal.$(SHLIB_EXT)
+ifeq ($(BUNDLE_DEBUG_LIBS),1)
+	$(PATCHELF) --set-rpath '$$ORIGIN:$$ORIGIN/$(reverse_private_libdir_rel)' $(DESTDIR)$(private_libdir)/libjulia-internal-debug.$(SHLIB_EXT)
+endif
+endif
+
 
+ifneq ($(LOADER_BUILD_DEP_LIBS),$(LOADER_INSTALL_DEP_LIBS))
+	# Next, overwrite relative path to libjulia-internal in our loader if $$(LOADER_BUILD_DEP_LIBS) != $$(LOADER_INSTALL_DEP_LIBS)
+	$(call stringreplace,$(DESTDIR)$(shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT),$(LOADER_BUILD_DEP_LIBS)$$,$(LOADER_INSTALL_DEP_LIBS))
+ifeq ($(OS),Darwin)
+	# Codesign the libjulia we just modified
+	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(DESTDIR)$(shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT)"
 endif
-	# On FreeBSD, remove the build's libdir from each library's RPATH
+
+ifeq ($(BUNDLE_DEBUG_LIBS),1)
+	$(call stringreplace,$(DESTDIR)$(shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT),$(LOADER_DEBUG_BUILD_DEP_LIBS)$$,$(LOADER_DEBUG_INSTALL_DEP_LIBS))
+ifeq ($(OS),Darwin)
+	# Codesign the libjulia we just modified
+	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(DESTDIR)$(shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)"
+endif
+endif
+endif
+
 ifeq ($(OS),FreeBSD)
+	# On FreeBSD, remove the build's libdir from each library's RPATH
 	$(JULIAHOME)/contrib/fixup-rpath.sh "$(PATCHELF)" $(DESTDIR)$(libdir) $(build_libdir)
 	$(JULIAHOME)/contrib/fixup-rpath.sh "$(PATCHELF)" $(DESTDIR)$(private_libdir) $(build_libdir)
 	$(JULIAHOME)/contrib/fixup-rpath.sh "$(PATCHELF)" $(DESTDIR)$(bindir) $(build_libdir)
@@ -457,16 +432,19 @@ endif
 	@$(MAKE) -C $(BUILDROOT) -f $(JULIAHOME)/Makefile install
 	cp $(JULIAHOME)/LICENSE.md $(BUILDROOT)/julia-$(JULIA_COMMIT)
 ifeq ($(OS), Linux)
-	-$(JULIAHOME)/contrib/fixup-libstdc++.sh $(DESTDIR)$(libdir) $(DESTDIR)$(private_libdir)
-
-	# Copy over any bundled ca certs we picked up from the system during buildi
+	# Copy over any bundled ca certs we picked up from the system during build
 	-cp $(build_datarootdir)/julia/cert.pem $(DESTDIR)$(datarootdir)/julia/
 endif
 ifeq ($(OS), WINNT)
 	cd $(BUILDROOT)/julia-$(JULIA_COMMIT)/bin && rm -f llvm* llc.exe lli.exe opt.exe LTO.dll bugpoint.exe macho-dump.exe
+endif
+ifeq ($(OS),Darwin)
+	# If we're on macOS, and we have a codesigning identity, then codesign the binary-dist tarball!
+	$(JULIAHOME)/contrib/codesign.sh "$(MACOS_CODESIGN_IDENTITY)" "$(BUILDROOT)/julia-$(JULIA_COMMIT)"
 endif
 	cd $(BUILDROOT) && $(TAR) zcvf $(JULIA_BINARYDIST_FILENAME).tar.gz julia-$(JULIA_COMMIT)
 
+
 exe:
 	# run Inno Setup to compile installer
 	$(call spawn,$(JULIAHOME)/dist-extras/inno/iscc.exe /DAppVersion=$(JULIA_VERSION) /DSourceDir="$(call cygpath_w,$(BUILDROOT)/julia-$(JULIA_COMMIT))" /DRepoDir="$(call cygpath_w,$(JULIAHOME))" /F"$(JULIA_BINARYDIST_FILENAME)" /O"$(call cygpath_w,$(BUILDROOT))" $(INNO_ARGS) $(call cygpath_w,$(JULIAHOME)/contrib/windows/build-installer.iss))
@@ -490,11 +468,14 @@ endif
 	echo "base/version_git.jl" > light-source-dist.tmp
 
 	# Download all stdlibs and include the tarball filenames in light-source-dist.tmp
-	@$(MAKE) -C stdlib getall NO_GIT=1
+	@$(MAKE) -C stdlib getall DEPS_GIT=0 USE_BINARYBUILDER=0
 	-ls stdlib/srccache/*.tar.gz >> light-source-dist.tmp
+	-ls stdlib/*/StdlibArtifacts.toml >> light-source-dist.tmp
 
-	# Exclude git, github and CI config files
-	git ls-files | sed -E -e '/^\..+/d' -e '/\/\..+/d' -e '/appveyor.yml/d' >> light-source-dist.tmp
+	# Include all git-tracked filenames
+	git ls-files >> light-source-dist.tmp
+
+	# Include documentation filenames
 	find doc/_build/html >> light-source-dist.tmp
 
 # Make tarball with only Julia code + stdlib tarballs
@@ -513,7 +494,7 @@ source-dist:
 # Make tarball with Julia code plus all dependencies
 full-source-dist: light-source-dist.tmp
 	# Get all the dependencies downloaded
-	@$(MAKE) -C deps getall NO_GIT=1
+	@$(MAKE) -C deps getall DEPS_GIT=0 USE_BINARYBUILDER=0
 
 	# Create file full-source-dist.tmp to hold all the filenames that go into the tarball
 	cp light-source-dist.tmp full-source-dist.tmp
@@ -531,7 +512,7 @@ clean: | $(CLEAN_TARGETS)
 	@-$(MAKE) -C $(BUILDROOT)/base clean
 	@-$(MAKE) -C $(BUILDROOT)/doc clean
 	@-$(MAKE) -C $(BUILDROOT)/src clean
-	@-$(MAKE) -C $(BUILDROOT)/ui clean
+	@-$(MAKE) -C $(BUILDROOT)/cli clean
 	@-$(MAKE) -C $(BUILDROOT)/test clean
 	@-$(MAKE) -C $(BUILDROOT)/stdlib clean
 	-rm -f $(BUILDROOT)/julia
@@ -555,7 +536,7 @@ distcleanall: cleanall
 
 .PHONY: default debug release check-whitespace release-candidate \
 	julia-debug julia-release julia-stdlib julia-deps julia-deps-libs \
-	julia-ui-release julia-ui-debug julia-src-release julia-src-debug \
+	julia-cli-release julia-cli-debug julia-src-release julia-src-debug \
 	julia-symlink julia-base julia-sysimg julia-sysimg-ji julia-sysimg-release julia-sysimg-debug \
 	test testall testall1 test test-* test-revise-* \
 	clean distcleanall cleanall clean-* \
@@ -600,6 +581,9 @@ else
 LLVM_SIZE := $(build_depsbindir)/llvm-size$(EXE)
 endif
 build-stats:
+ifeq ($(USE_BINARYBUILDER_LLVM),1)
+	@$(MAKE) -C deps install-llvm-tools
+endif
 	@printf $(JULCOLOR)' ==> ./julia binary sizes\n'$(ENDCOLOR)
 	$(call spawn,$(LLVM_SIZE) -A $(call cygpath_w,$(build_private_libdir)/sys.$(SHLIB_EXT)) \
 		$(call cygpath_w,$(build_shlibdir)/libjulia.$(SHLIB_EXT)) \
@@ -608,3 +592,6 @@ build-stats:
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
 	@time $(call spawn,$(build_bindir)/julia$(EXE) -e '')
+
+print-locale:
+	@locale
diff --git a/NEWS.md b/NEWS.md
index 15229f5c28f58e..c2e60b4bc07450 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,188 +1,126 @@
-Julia v1.6 Release Notes
+Julia v1.9 Release Notes
 ========================
 
 New language features
 ---------------------
 
-* Types written with `where` syntax can now be used to define constructors, e.g.
-  `(Foo{T} where T)(x) = ...`.
-* `<--` and `<-->` are now available as infix operators, with the same precedence
-  and associativity as other arrow-like operators ([#36666]).
-* Compilation and type inference can now be enabled or disabled at the module level
-  using the experimental macro `Base.Experimental.@compiler_options` ([#37041]).
-* The library name passed to `ccall` or `@ccall` can now be an expression involving
-  global variables and function calls. The expression will be evaluated the first
-  time the `ccall` executes ([#36458]).
-* `ꜛ` (U+A71B), `ꜜ` (U+A71C) and `ꜝ` (U+A71D) can now also be used as operator
-  suffixes. They can be tab-completed from `\^uparrow`, `\^downarrow` and `\^!` in the REPL
-  ([#37542]).
+* It is now possible to assign to bindings in another module using `setproperty!(::Module, ::Symbol, x)`. ([#44137])
+* Slurping in assignments is now also allowed in non-final position. This is
+  handled via `Base.split_rest`. ([#42902])
 
 Language changes
 ----------------
 
-* The `-->` operator now lowers to a `:call` expression, so it can be defined as
-  a function like other operators. The dotted version `.-->` is now parsed as well.
-  For backwards compatibility, `-->` still parses using its own expression head
-  instead of `:call`.
+* New builtins `getglobal(::Module, ::Symbol[, order])` and `setglobal!(::Module, ::Symbol, x[, order])`
+  for reading from and writing to globals. `getglobal` should now be preferred for accessing globals over
+  `getfield`. ([#44137])
 
 Compiler/Runtime improvements
 -----------------------------
 
-* All platforms can now use `@executable_path` within `jl_load_dynamic_library()`.
-  This allows executable-relative paths to be embedded within executables on all
-  platforms, not just MacOS, which the syntax is borrowed from. ([#35627])
-* Constant propogation now occurs through keyword arguments ([#35976])
-* The precompilation cache is now created atomically ([#36416]). Invoking _n_
-  Julia processes simultaneously may create _n_ temporary caches.
 
 Command-line option changes
 ---------------------------
 
-* There is no longer a concept of "home project": starting `julia --project=dir`
-  is now exactly equivalent to starting `julia` and then doing `pkg> activate
-  $dir` and `julia --project` is exactly equivalent to doing that where
-  `dir = Base.current_project()`. In particular, this means that if you do
-  `pkg> activate` after starting `julia` with the `--project` option (or with
-  `JULIA_PROJECT` set) it will take you to the default active project, which is
-  `@v1.5` unless you have modified `LOAD_PATH`. ([#36434])
+* In Linux and Windows, `--threads=auto` now tries to infer usable number of CPUs from the
+  process affinity which is set typically in HPC and cloud environments ([#42340]).
+* `--math-mode=fast` is now a no-op ([#41638]). Users are encouraged to use the @fastmath macro instead, which has more well-defined semantics.
+* The `--threads` command-line option now accepts `auto|N[,auto|M]` where `M` specifies the
+  number of interactive threads to create (`auto` currently means 1) ([#42302]).
 
 Multi-threading changes
 -----------------------
 
+* `Threads.@spawn` now accepts an optional first argument: `:default` or `:interactive`.
+  An interactive task desires low latency and implicitly agrees to be short duration or to
+  yield frequently. Interactive tasks will run on interactive threads, if any are specified
+  when Julia is started ([#42302]).
 
 Build system changes
 --------------------
 
-* Windows Installer now has the option to 'Add Julia to Path'. To unselect this option
-  from the commandline simply remove the tasks you do not want to be installed: e.g.
-  `./julia-installer.exe /TASKS="desktopicon,startmenu,addtopath"`, adds a desktop
-  icon, a startmenu group icon, and adds Julia to system PATH.
-
-
-Library functions
------------------
-
-* The `Base.download` function has been deprecated (silently, by default) in favor of the new `Downloads.download` standard library function ([#37340]).
-* The `Base.Grisu` code has been officially removed (float printing was switched to the ryu algorithm code in 1.4)
 
 New library functions
 ---------------------
 
-* New function `Base.kron!` and corresponding overloads for various matrix types for performing Kronecker product in-place. ([#31069]).
-* New function `Base.Threads.foreach(f, channel::Channel)` for multithreaded `Channel` consumption. ([#34543]).
-* New function `Base.readeach(io, T)` for iteratively performing `read(io, T)`. ([#36150])
-* `Iterators.map` is added. It provides another syntax `Iterators.map(f, iterators...)`
-  for writing `(f(args...) for args in zip(iterators...))`, i.e. a lazy `map` ([#34352]).
-* New function `sincospi` for simultaneously computing `sinpi(x)` and `cospi(x)` more
-  efficiently ([#35816]).
-* New function `addenv` for adding environment mappings into a `Cmd` object, returning the new `Cmd` object.
+* `Iterators.flatmap` was added ([#44792]).
 
-New library features
---------------------
+Library changes
+---------------
 
-* The `redirect_*` functions can now be called on `IOContext` objects.
-* New constructor `NamedTuple(iterator)` that constructs a named tuple from a key-value pair iterator.
+* A known concurrency issue of `iterate` methods on `Dict` and other derived objects such
+  as `keys(::Dict)`, `values(::Dict)`, and `Set` is fixed.  These methods of `iterate` can
+  now be called on a dictionary or set shared by arbitrary tasks provided that there are no
+  tasks mutating the dictionary or set ([#44534]).
+* Predicate function negation `!f` now returns a composed function `(!) ∘ f` instead of an anonymous function ([#44752]).
+* `RoundFromZero` now works for non-`BigFloat` types ([#41246]).
+* `Dict` can be now shrunk manually by `sizehint!` ([#45004]).
+* `@time` now separates out % time spent recompiling invalidated methods ([#45015]).
+* `@time_imports` now shows any compilation and recompilation time percentages per import ([#45064]).
 
 Standard library changes
 ------------------------
 
-* The `nextprod` function now accepts tuples and other array types for its first argument ([#35791]).
-* The `reverse(A; dims)` function for multidimensional `A` can now reverse multiple dimensions at once
-  by passing a tuple for `dims`, and defaults to reversing all dimensions; there is also a multidimensional
-  in-place `reverse!(A; dims)` ([#37367]).
-* The function `isapprox(x,y)` now accepts the `norm` keyword argument also for numeric (i.e., non-array) arguments `x` and `y` ([#35883]).
-* `view`, `@view`, and `@views` now work on `AbstractString`s, returning a `SubString` when appropriate ([#35879]).
-* All `AbstractUnitRange{<:Integer}`s now work with `SubString`, `view`, `@view` and `@views` on strings ([#35879]).
-* `sum`, `prod`, `maximum`, and `minimum` now support `init` keyword argument ([#36188], [#35839]).
-* `unique(f, itr; seen=Set{T}())` now allows you to declare the container type used for
-  keeping track of values returned by `f` on elements of `itr` ([#36280]).
-* `Libdl` has been moved to `Base.Libc.Libdl`, however it is still accessible as an stdlib ([#35628]).
-* `first` and `last` functions now accept an integer as second argument to get that many
-  leading or trailing elements of any iterable ([#34868]).
-* `intersect` on `CartesianIndices` now returns `CartesianIndices` instead of `Vector{<:CartesianIndex}` ([#36643]).
-* `push!(c::Channel, v)` now returns channel `c`. Previously, it returned the pushed value `v` ([#34202]).
-* `RegexMatch` objects can now be probed for whether a named capture group exists within it through `haskey()` ([#36717]).
-* For consistency `haskey(r::RegexMatch, i::Integer)` has also been added and returns if the capture group for `i` exists ([#37300]).
-* A new standard library `TOML` has been added for parsing and printing [TOML files](https://toml.io) ([#37034]).
-* A new standard library `Downloads` has been added, which replaces the old `Base.download` function with `Downloads.download`, providing cross-platform, multi-protocol, in-process download functionality implemented with [libcurl](https://curl.haxx.se/libcurl/) ([#37340]).
-* The `Pkg.BinaryPlatforms` module has been moved into `Base` as `Base.BinaryPlatforms` and heavily reworked.
-  Applications that want to be compatible with the old API should continue to import `Pkg.BinaryPlatforms`,
-  however new users should use `Base.BinaryPlatforms` directly. ([#37320])
-* The `Pkg.Artifacts` module has been imported as a separate standard library.  It is still available as
-  `Pkg.Artifacts`, however starting from Julia v1.6+, packages may import simply `Artifacts` without importing
-  all of `Pkg` alongside. ([#37320])
+#### Package Manager
 
 #### LinearAlgebra
 
-* New method `LinearAlgebra.issuccess(::CholeskyPivoted)` for checking whether pivoted Cholesky factorization was successful ([#36002]).
-* `UniformScaling` can now be indexed into using ranges to return dense matrices and vectors ([#24359]).
-* New function `LinearAlgebra.BLAS.get_num_threads()` for getting the number of BLAS threads. ([#36360])
-* `(+)(::UniformScaling)` is now defined, making `+I` a valid unary operation. ([#36784])
+* The methods `a / b` and `b \ a` with `a` a scalar and `b` a vector,
+  which were equivalent to `a * pinv(b)`, have been removed due to the
+  risk of confusion with elementwise division ([#44358]).
+* We are now wholly reliant on libblastrampoline (LBT) for calling
+  BLAS and LAPACK. OpenBLAS is shipped by default, but building the
+  system image with other BLAS/LAPACK libraries is not
+  supported. Instead, it is recommended that the LBT mechanism be used
+  for swapping BLAS/LAPACK with vendor provided ones. ([#44360])
+* `normalize(x, p=2)` now supports any normed vector space `x`, including scalars ([#44925]).
 
 #### Markdown
 
 #### Printf
 
-* Complete overhaul of internal code to use the ryu float printing algorithms (from Julia 1.4); leads to consistent 2-5x performance improvements
-* New `Printf.tofloat` function allowing custom float types to more easily integrate with Printf formatting by converting their type to `Float16`, `Float32`, `Float64`, or `BigFloat`
-* New `Printf.format"..."` and `Printf.Format(...)` functions that allow creating `Printf.Format` objects that can be passed to `Printf.format` for easier dynamic printf formatting
-* `Printf.format(f::Printf.Format, args...)` as a non-macro function that applies a printf format `f` to provided `args`
-
-
 #### Random
 
+* `randn` and `randexp` now work for any `AbstractFloat` type defining `rand` ([#44714]).
 
 #### REPL
 
-* The `AbstractMenu` extension interface of `REPL.TerminalMenus` has been extensively
-  overhauled. The new interface does not rely on global configuration variables, is more
-  consistent in delegating printing of the navigation/selection markers, and provides
-  improved support for dynamic menus.  These changes are compatible with the previous
-  (deprecated) interface, so are non-breaking.
-
-  The new API offers several enhancements:
-
-  + Menus are configured in their constructors via keyword arguments
-  + For custom menu types, the new `Config` and `MultiSelectConfig` replace the global `CONFIG` Dict
-  + `request(menu; cursor=1)` allows you to control the initial cursor position in the menu (defaults to first item)
-  + `MultiSelectMenu` allows you to pass a list of initially-selected items with the `selected` keyword argument
-  + `writeLine` was deprecated to `writeline`, and `writeline` methods are not expected to print the cursor indicator.
-    The old `writeLine` continues to work, and any of its method extensions should print the cursor indicator as before.
-  + `printMenu` has been deprecated to `printmenu`, and it both accepts a state input and returns a state output
-    that controls the number of terminal lines erased when the menu is next refreshed. This plus related changes
-    makes `printmenu` work properly when the number of menu items might change depending on user choices.
-  + `numoptions`, returning the number of items in the menu, has been added as an alternative to implementing `options`
-  + `suppress_output` (primarily a testing option) has been added as a keyword argument to `request`,
-    rather than a configuration option
-
-* Windows REPL now supports 24-bit colors, by correctly interpreting virtual terminal escapes.
-
-
 #### SparseArrays
 
-* Display large sparse matrices with a Unicode "spy" plot of their nonzero patterns, and display small sparse matrices by an `Matrix`-like 2d layout of their contents.
-
 #### Dates
-* `Quarter` period is defined ([#35519]).
-* Zero-valued `FixedPeriod`s and `OtherPeriod`s now compare equal, e.g.,
-  `Year(0) == Day(0)`. The behavior of non-zero `Period`s is not changed. ([#37486])
 
-#### Statistics
+#### Downloads
 
+#### Statistics
 
 #### Sockets
 
+#### Tar
 
 #### Distributed
 
+* The package environment (active project, `LOAD_PATH`, `DEPOT_PATH`) are now propagated
+  when adding *local* workers (e.g. with `addprocs(N::Int)` or through the `--procs=N`
+  command line flag) ([#43270]).
+* `addprocs` for local workers now accept the `env` keyword argument for passing
+  environment variables to the workers processes. This was already supported for
+  remote workers ([#43270]).
 
 #### UUIDs
-* Change `uuid1` and `uuid4` to use `Random.RandomDevice()` as default random number generator ([#35872]).
-* Added `parse(::Type{UUID}, ::AbstractString)` method
+
+#### Unicode
+
+* `graphemes(s, m:n)` returns a substring of the `m`-th to `n`-th graphemes in `s` ([#44266]).
+
+#### Mmap
+
+#### DelimitedFiles
+
 
 Deprecated or removed
 ---------------------
 
+
 External dependencies
 ---------------------
 
@@ -190,5 +128,4 @@ External dependencies
 Tooling Improvements
 ---------------------
 
-
 <!--- generated by NEWS-update.jl: -->
diff --git a/README.md b/README.md
index a600f0f43fd838..abc6e9730f1e6c 100644
--- a/README.md
+++ b/README.md
@@ -5,18 +5,31 @@
 </a>
 </div>
 
-Code coverage:
-[![coveralls][coveralls-img]](https://coveralls.io/r/JuliaLang/julia?branch=master)
-[![codecov][codecov-img]](https://codecov.io/github/JuliaLang/julia?branch=master)
-
-Documentation:
-[![version 1][docs-img]](https://docs.julialang.org)
-
-[travis-img]: https://img.shields.io/travis/JuliaLang/julia/master.svg?label=Linux+/+macOS
-[appveyor-img]: https://img.shields.io/appveyor/ci/JuliaLang/julia/master.svg?label=Windows
-[coveralls-img]: https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls
-[codecov-img]: https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov
-[docs-img]: https://img.shields.io/badge/docs-v1-blue.svg
+<table>
+    <!-- Docs -->
+    <tr>
+        <td>Documentation</td>
+        <td>
+            <a href="https://docs.julialang.org"><img src='https://img.shields.io/badge/docs-v1-blue.svg'/></a>
+        </td>
+    </tr>
+    <!-- Continuous integration
+    To change the badge to point to a different pipeline, it is not sufficient to simply change the `?branch=` part.
+    You need to go to the Buildkite website and get the SVG URL for the correct pipeline. -->
+    <tr>
+        <td>Continuous integration</td>
+        <td>
+            <a href="https://buildkite.com/julialang/julia-master"><img src='https://badge.buildkite.com/f28e0d28b345f9fad5856ce6a8d64fffc7c70df8f4f2685cd8.svg?branch=master'/></a>
+        </td>
+    </tr>
+    <!-- Coverage -->
+    <tr>
+        <td>Code coverage</td>
+        <td>
+            <a href="https://coveralls.io/r/JuliaLang/julia?branch=master"><img src='https://img.shields.io/coveralls/github/JuliaLang/julia/master.svg?label=coveralls'/></a> <a href="https://codecov.io/github/JuliaLang/julia?branch=master"><img src='https://img.shields.io/codecov/c/github/JuliaLang/julia/master.svg?label=codecov'/></a>
+        </td>
+    </tr>
+</table>
 
 ## The Julia Language
 
@@ -34,7 +47,7 @@ and installing Julia, below.
 - **Documentation:** <https://docs.julialang.org>
 - **Packages:** <https://julialang.org/packages/>
 - **Discussion forum:** <https://discourse.julialang.org>
-- **Slack:** <https://julialang.slack.com> (get an invite from <https://slackinvite.julialang.org>)
+- **Slack:** <https://julialang.slack.com> (get an invite from <https://julialang.org/slack/>)
 - **YouTube:** <https://www.youtube.com/user/JuliaLanguage>
 - **Code coverage:** <https://coveralls.io/r/JuliaLang/julia>
 
@@ -46,7 +59,6 @@ helpful to start contributing to the Julia codebase.
 
 - [**StackOverflow**](https://stackoverflow.com/questions/tagged/julia-lang)
 - [**Twitter**](https://twitter.com/JuliaLanguage)
-- [**Meetup**](https://julia.meetup.com/)
 - [**Learning resources**](https://julialang.org/learning/)
 
 ## Binary Installation
@@ -71,17 +83,17 @@ recommend you use the official Julia binaries instead.
 ## Building Julia
 
 First, make sure you have all the [required
-dependencies](https://github.com/JuliaLang/julia/blob/master/doc/build/build.md#required-build-tools-and-external-libraries) installed.
+dependencies](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/build.md#required-build-tools-and-external-libraries) installed.
 Then, acquire the source code by cloning the git repository:
 
-    git clone git://github.com/JuliaLang/julia.git
+    git clone https://github.com/JuliaLang/julia.git
 
 By default you will be building the latest unstable version of
-Julia. However, most users should use the most recent stable version
+Julia. However, most users should use the [most recent stable version](https://github.com/JuliaLang/julia/releases)
 of Julia. You can get this version by changing to the Julia directory
 and running:
 
-    git checkout v1.5.0
+    git checkout v1.7.2
 
 Now run `make` to build the `julia` executable.
 
@@ -103,8 +115,8 @@ You can read about [getting
 started](https://docs.julialang.org/en/v1/manual/getting-started/)
 in the manual.
 
-In case this default build path did not work, detailed build instructions
-are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/build).
+Detailed build instructions, should they be necessary,
+are included in the [build documentation](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/).
 
 ### Uninstalling Julia
 
@@ -121,13 +133,12 @@ The Julia source code is organized as follows:
 | -                 | -                                                                  |
 | `base/`           | source code for the Base module (part of Julia's standard library) |
 | `stdlib/`         | source code for other standard library packages                    |
-| `contrib/`        | editor support for Julia source, miscellaneous scripts             |
+| `cli/`            | source for the command line interface/REPL                         |
+| `contrib/`        | miscellaneous scripts                                              |
 | `deps/`           | external dependencies                                              |
-| `doc/src/manual/` | source for the user manual                                         |
-| `doc/build/`      | detailed notes for building Julia                                  |
+| `doc/src/`        | source for the user manual                                         |
 | `src/`            | source for Julia language core                                     |
 | `test/`           | test suites                                                        |
-| `ui/`             | source for various front ends                                      |
 | `usr/`            | binaries and shared libraries loaded by Julia's standard libraries |
 
 ## Terminal, Editors and IDEs
@@ -146,10 +157,8 @@ Support for editing Julia is available for many
 [Sublime Text](https://github.com/JuliaEditorSupport/Julia-sublime), and many
 others.
 
-Supported IDEs include: [Juno](http://junolab.org/) (Atom plugin),
-[julia-vscode](https://github.com/JuliaEditorSupport/julia-vscode) (VS
-Code plugin), and
-[julia-intellij](https://github.com/JuliaEditorSupport/julia-intellij)
-(IntelliJ IDEA plugin). The popular [Jupyter](https://jupyter.org/)
-notebook interface is available through
-[IJulia](https://github.com/JuliaLang/IJulia.jl).
+For users who prefer IDEs, we recommend using VS Code with the
+[julia-vscode](https://www.julia-vscode.org/) plugin.
+For notebook users, [Jupyter](https://jupyter.org/) notebook support is available through the
+[IJulia](https://github.com/JuliaLang/IJulia.jl) package, and
+the [Pluto.jl](https://github.com/fonsp/Pluto.jl) package provides Pluto notebooks.
diff --git a/THIRDPARTY.md b/THIRDPARTY.md
new file mode 100644
index 00000000000000..4a35bbdb1b7cee
--- /dev/null
+++ b/THIRDPARTY.md
@@ -0,0 +1,56 @@
+The Julia language is licensed under the MIT License (see [LICENSE.md](./LICENSE.md) ). The "language" consists
+of the compiler (the contents of src/), most of the standard library (base/),
+and some utilities (most of the rest of the files in this repository). See below
+for exceptions.
+
+- [crc32c.c](https://stackoverflow.com/questions/17645167/implementing-sse-4-2s-crc32c-in-software) (CRC-32c checksum code by Mark Adler) [[ZLib](https://opensource.org/licenses/Zlib)].
+- [LDC](https://github.com/ldc-developers/ldc/blob/master/LICENSE) (for ccall/cfunction ABI definitions) [BSD-3]. The portion of code that Julia uses from LDC is [BSD-3] licensed.
+- [LLVM](https://releases.llvm.org/3.9.0/LICENSE.TXT) (for parts of src/disasm.cpp) [UIUC]
+- [MINGW](https://sourceforge.net/p/mingw/mingw-org-wsl/ci/legacy/tree/mingwrt/mingwex/dirname.c) (for dirname implementation on Windows) [MIT]
+- [NetBSD](https://www.netbsd.org/about/redistribution.html) (for setjmp, longjmp, and strptime implementations on Windows) [BSD-3]
+- [Python](https://docs.python.org/3/license.html) (for strtod implementation on Windows) [PSF]
+- [FEMTOLISP](https://github.com/JeffBezanson/femtolisp) [BSD-3]
+
+The following components included in Julia `Base` have their own separate licenses:
+
+- base/ryu/* [Boost] (see [ryu](https://github.com/ulfjack/ryu/blob/master/LICENSE-Boost))
+- base/special/{rem_pio2,hyperbolic}.jl [Freely distributable with preserved copyright notice] (see [FDLIBM](https://www.netlib.org/fdlibm))
+
+The Julia language links to the following external libraries, which have their
+own licenses:
+
+- [LIBUNWIND](https://github.com/libunwind/libunwind/blob/master/LICENSE) [MIT]
+- [LIBUV](https://github.com/JuliaLang/libuv/blob/julia-uv2-1.39.0/LICENSE) [MIT]
+- [LLVM](https://releases.llvm.org/12.0.1/LICENSE.TXT) [APACHE 2.0 with LLVM Exception]
+- [UTF8PROC](https://github.com/JuliaStrings/utf8proc) [MIT]
+
+Julia's `stdlib` uses the following external libraries, which have their own licenses:
+
+- [DSFMT](https://github.com/MersenneTwister-Lab/dSFMT/blob/master/LICENSE.txt) [BSD-3]
+- [OPENLIBM](https://github.com/JuliaMath/openlibm/blob/master/LICENSE.md) [MIT, BSD-2, ISC]
+- [GMP](https://gmplib.org/manual/Copying.html#Copying) [LGPL3+ or GPL2+]
+- [LIBGIT2](https://github.com/libgit2/libgit2/blob/development/COPYING) [GPL2+ with unlimited linking exception]
+- [CURL](https://curl.haxx.se/docs/copyright.html) [MIT/X derivative]
+- [LIBSSH2](https://github.com/libssh2/libssh2/blob/master/COPYING) [BSD-3]
+- [MBEDTLS](https://github.com/ARMmbed/mbedtls/blob/development/LICENSE) [Apache 2.0]
+- [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Copying) [LGPL3+]
+- [OPENBLAS](https://raw.github.com/xianyi/OpenBLAS/master/LICENSE) [BSD-3]
+- [LAPACK](https://netlib.org/lapack/LICENSE.txt) [BSD-3]
+- [PCRE](https://www.pcre.org/licence.txt) [BSD-3]
+- [SUITESPARSE](https://github.com/DrTimothyAldenDavis/SuiteSparse/blob/master/LICENSE.txt) [mix of LGPL2+ and GPL2+; see individual module licenses]
+- [LIBBLASTRAMPOLINE](https://github.com/staticfloat/libblastrampoline/blob/main/LICENSE) [MIT]
+- [NGHTTP2](https://github.com/nghttp2/nghttp2/blob/master/COPYING) [MIT]
+
+Julia's build process uses the following external tools:
+
+- [PATCHELF](https://nixos.org/patchelf.html)
+- [OBJCONV](https://www.agner.org/optimize/#objconv)
+- [LIBWHICH](https://github.com/vtjnash/libwhich/blob/master/LICENSE) [MIT]
+
+Julia bundles the following external programs and libraries:
+
+- [7-Zip](https://www.7-zip.org/license.txt)
+- [ZLIB](https://zlib.net/zlib_license.html)
+
+On some platforms, distributions of Julia contain SSL certificate authority certificates,
+released under the [Mozilla Public License](https://en.wikipedia.org/wiki/Mozilla_Public_License).
diff --git a/VERSION b/VERSION
index 17b2cde89054fd..e889581dd8a308 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-1.6.0-DEV
+1.9.0-DEV
diff --git a/base/Base.jl b/base/Base.jl
index 057c512887c6c2..e3fec462215ef6 100644
--- a/base/Base.jl
+++ b/base/Base.jl
@@ -20,19 +20,48 @@ include(path::String) = include(Base, path)
 const is_primary_base_module = ccall(:jl_module_parent, Ref{Module}, (Any,), Base) === Core.Main
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Base, is_primary_base_module)
 
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
 # Try to help prevent users from shooting them-selves in the foot
 # with ambiguities by defining a few common and critical operations
 # (and these don't need the extra convert code)
-getproperty(x::Module, f::Symbol) = getfield(x, f)
-setproperty!(x::Module, f::Symbol, v) = setfield!(x, f, v)
-getproperty(x::Type, f::Symbol) = getfield(x, f)
-setproperty!(x::Type, f::Symbol, v) = setfield!(x, f, v)
-getproperty(x::Tuple, f::Int) = getfield(x, f)
+getproperty(x::Module, f::Symbol) = (@inline; getglobal(x, f))
+getproperty(x::Type, f::Symbol) = (@inline; getfield(x, f))
+setproperty!(x::Type, f::Symbol, v) = error("setfield! fields of Types should not be changed")
+getproperty(x::Tuple, f::Int) = (@inline; getfield(x, f))
 setproperty!(x::Tuple, f::Int, v) = setfield!(x, f, v) # to get a decent error
 
-getproperty(x, f::Symbol) = getfield(x, f)
+getproperty(x, f::Symbol) = (@inline; getfield(x, f))
 setproperty!(x, f::Symbol, v) = setfield!(x, f, convert(fieldtype(typeof(x), f), v))
 
+dotgetproperty(x, f) = getproperty(x, f)
+
+getproperty(x::Module, f::Symbol, order::Symbol) = (@inline; getglobal(x, f, order))
+function setproperty!(x::Module, f::Symbol, v, order::Symbol=:monotonic)
+    @inline
+    val::Core.get_binding_type(x, f) = v
+    return setglobal!(x, f, val, order)
+end
+getproperty(x::Type, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Type, f::Symbol, v, order::Symbol) = error("setfield! fields of Types should not be changed")
+getproperty(x::Tuple, f::Int, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x::Tuple, f::Int, v, order::Symbol) = setfield!(x, f, v, order) # to get a decent error
+
+getproperty(x, f::Symbol, order::Symbol) = (@inline; getfield(x, f, order))
+setproperty!(x, f::Symbol, v, order::Symbol) = (@inline; setfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+
+swapproperty!(x, f::Symbol, v, order::Symbol=:notatomic) =
+    (@inline; Core.swapfield!(x, f, convert(fieldtype(typeof(x), f), v), order))
+modifyproperty!(x, f::Symbol, op, v, order::Symbol=:notatomic) =
+    (@inline; Core.modifyfield!(x, f, op, v, order))
+replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:notatomic, fail_order::Symbol=success_order) =
+    (@inline; Core.replacefield!(x, f, expected, convert(fieldtype(typeof(x), f), desired), success_order, fail_order))
+
+convert(::Type{Any}, Core.@nospecialize x) = x
+convert(::Type{T}, x::T) where {T} = x
 include("coreio.jl")
 
 eval(x) = Core.eval(Base, x)
@@ -78,6 +107,9 @@ include("options.jl")
 include("promotion.jl")
 include("tuple.jl")
 include("expr.jl")
+Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B} = (@inline; Pair{A, B}(convert(A, a)::A, convert(B, b)::B))
+#Pair{Any, B}(@nospecialize(a::Any), b) where {B} = (@inline; Pair{Any, B}(a, Base.convert(B, b)::B))
+#Pair{A, Any}(a, @nospecialize(b::Any)) where {A} = (@inline; Pair{A, Any}(Base.convert(A, a)::A, b))
 include("pair.jl")
 include("traits.jl")
 include("range.jl")
@@ -95,6 +127,9 @@ include("refpointer.jl")
 include("checked.jl")
 using .Checked
 
+# Lazy strings
+include("strings/lazy.jl")
+
 # array structures
 include("indices.jl")
 include("array.jl")
@@ -115,6 +150,24 @@ using .Iterators: Flatten, Filter, product  # for generators
 
 include("namedtuple.jl")
 
+# For OS specific stuff
+# We need to strcat things here, before strings are really defined
+function strcat(x::String, y::String)
+    out = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), Core.sizeof(x) + Core.sizeof(y))
+    GC.@preserve x y out begin
+        out_ptr = unsafe_convert(Ptr{UInt8}, out)
+        unsafe_copyto!(out_ptr, unsafe_convert(Ptr{UInt8}, x), Core.sizeof(x))
+        unsafe_copyto!(out_ptr + Core.sizeof(x), unsafe_convert(Ptr{UInt8}, y), Core.sizeof(y))
+    end
+    return out
+end
+include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
+include(strcat((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
+
+# These used to be in build_h.jl and are retained for backwards compatibility
+const libblas_name = "libblastrampoline"
+const liblapack_name = "libblastrampoline"
+
 # numeric operations
 include("hashing.jl")
 include("rounding.jl")
@@ -130,7 +183,7 @@ include("abstractarraymath.jl")
 include("arraymath.jl")
 
 # SIMD loops
-@pure sizeof(s::String) = Core.sizeof(s)  # needed by gensym as called from simdloop
+sizeof(s::String) = Core.sizeof(s)  # needed by gensym as called from simdloop
 include("simdloop.jl")
 using .SimdLoop
 
@@ -158,15 +211,12 @@ include("dict.jl")
 include("abstractset.jl")
 include("set.jl")
 
+# Strings
 include("char.jl")
 include("strings/basic.jl")
 include("strings/string.jl")
 include("strings/substring.jl")
 
-# For OS specific stuff
-include(string((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "build_h.jl"))     # include($BUILDROOT/base/build_h.jl)
-include(string((length(Core.ARGS)>=2 ? Core.ARGS[2] : ""), "version_git.jl")) # include($BUILDROOT/base/version_git.jl)
-
 # Initialize DL_LOAD_PATH as early as possible.  We are defining things here in
 # a slightly more verbose fashion than usual, because we're running so early.
 const DL_LOAD_PATH = String[]
@@ -174,8 +224,6 @@ let os = ccall(:jl_get_UNAME, Any, ())
     if os === :Darwin || os === :Apple
         if Base.DARWIN_FRAMEWORK
             push!(DL_LOAD_PATH, "@loader_path/Frameworks")
-        else
-            push!(DL_LOAD_PATH, "@loader_path/julia")
         end
         push!(DL_LOAD_PATH, "@loader_path")
     end
@@ -191,9 +239,9 @@ include("iobuffer.jl")
 # strings & printing
 include("intfuncs.jl")
 include("strings/strings.jl")
+include("regex.jl")
 include("parse.jl")
 include("shell.jl")
-include("regex.jl")
 include("show.jl")
 include("arrayshow.jl")
 include("methodshow.jl")
@@ -202,12 +250,11 @@ include("methodshow.jl")
 include("cartesian.jl")
 using .Cartesian
 include("multidimensional.jl")
-include("permuteddimsarray.jl")
-using .PermutedDimsArrays
 
 include("broadcast.jl")
 using .Broadcast
-using .Broadcast: broadcasted, broadcasted_kwsyntax, materialize, materialize!
+using .Broadcast: broadcasted, broadcasted_kwsyntax, materialize, materialize!,
+                  broadcast_preserving_zero_d, andand, oror
 
 # missing values
 include("missing.jl")
@@ -220,7 +267,9 @@ include("sysinfo.jl")
 include("libc.jl")
 using .Libc: getpid, gethostname, time
 
-include("env.jl")
+# Logging
+include("logging.jl")
+using .CoreLogging
 
 # Concurrency
 include("linked_list.jl")
@@ -228,16 +277,12 @@ include("condition.jl")
 include("threads.jl")
 include("lock.jl")
 include("channels.jl")
+include("partr.jl")
 include("task.jl")
 include("threads_overloads.jl")
 include("weakkeydict.jl")
 
-# Logging
-include("logging.jl")
-using .CoreLogging
-
-# BinaryPlatforms, used by Artifacts
-include("binaryplatforms.jl")
+include("env.jl")
 
 # functions defined in Random
 function rand end
@@ -266,7 +311,7 @@ const (∛)=cbrt
 delete_method(which(include, (Module, String)))
 let SOURCE_PATH = ""
     global function include(mod::Module, path::String)
-        prev = SOURCE_PATH
+        prev = SOURCE_PATH::String
         path = normpath(joinpath(dirname(prev), path))
         Core.println(path)
         ccall(:jl_uv_flush, Nothing, (Ptr{Nothing},), Core.io_pointer(Core.stdout))
@@ -279,9 +324,12 @@ let SOURCE_PATH = ""
 end
 
 # reduction along dims
-include("reducedim.jl")  # macros in this file relies on string.jl
+include("reducedim.jl")  # macros in this file rely on string.jl
 include("accumulate.jl")
 
+include("permuteddimsarray.jl")
+using .PermutedDimsArrays
+
 # basic data structures
 include("ordering.jl")
 using .Order
@@ -290,6 +338,9 @@ using .Order
 include("sort.jl")
 using .Sort
 
+# BinaryPlatforms, used by Artifacts.  Needs `Sort`.
+include("binaryplatforms.jl")
+
 # Fast math
 include("fastmath.jl")
 using .FastMath
@@ -314,9 +365,6 @@ using .MPFR
 
 include("combinatorics.jl")
 
-# more hashing definitions
-include("hashing2.jl")
-
 # irrational mathematical constants
 include("irrationals.jl")
 include("mathconstants.jl")
@@ -325,16 +373,19 @@ using .MathConstants: ℯ, π, pi
 # metaprogramming
 include("meta.jl")
 
+# Stack frames and traces
+include("stacktraces.jl")
+using .StackTraces
+
+# experimental API's
+include("experimental.jl")
+
 # utilities
 include("deepcopy.jl")
 include("download.jl")
 include("summarysize.jl")
 include("errorshow.jl")
 
-# Stack frames and traces
-include("stacktraces.jl")
-using .StackTraces
-
 include("initdefs.jl")
 
 # worker threads
@@ -352,9 +403,6 @@ include("util.jl")
 
 include("asyncmap.jl")
 
-# experimental API's
-include("experimental.jl")
-
 # deprecated functions
 include("deprecated.jl")
 
@@ -381,23 +429,66 @@ include(mapexpr::Function, mod::Module, _path::AbstractString) = _include(mapexp
 
 end_base_include = time_ns()
 
+const _sysimage_modules = PkgId[]
+in_sysimage(pkgid::PkgId) = pkgid in _sysimage_modules
+
+# Precompiles for Revise and other packages
+# TODO: move these to contrib/generate_precompile.jl
+# The problem is they don't work there
+for match = _methods(+, (Int, Int), -1, get_world_counter())
+    m = match.method
+    delete!(push!(Set{Method}(), m), m)
+    copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(match)))
+
+    empty!(Set())
+    push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
+    (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
+    (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
+    (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
+    (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
+    (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
+    Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
+    Dict(Base => [:(1+1)])[Base]
+    Dict(:one => [1])[:one]
+    Dict("abc" => Set())["abc"]
+    pushfirst!([], sum)
+    get(Base.pkgorigins, Base.PkgId(Base), nothing)
+    sort!([1,2,3])
+    unique!([1,2,3])
+    cumsum([1,2,3])
+    append!(Int[], BitSet())
+    isempty(BitSet())
+    delete!(BitSet([1,2]), 3)
+    deleteat!(Int32[1,2,3], [1,3])
+    deleteat!(Any[1,2,3], [1,3])
+    Core.svec(1, 2) == Core.svec(3, 4)
+    any(t->t[1].line > 1, [(LineNumberNode(2,:none), :(1+1))])
+
+    # Code loading uses this
+    sortperm(mtime.(readdir(".")), rev=true)
+    # JLLWrappers uses these
+    Dict{UUID,Set{String}}()[UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210")] = Set{String}()
+    get!(Set{String}, Dict{UUID,Set{String}}(), UUID("692b3bcd-3c85-4b1f-b108-f13ce0eb3210"))
+    eachindex(IndexLinear(), Expr[])
+    push!(Expr[], Expr(:return, false))
+    vcat(String[], String[])
+    k, v = (:hello => nothing)
+    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int))
+    precompile(indexed_iterate, (Pair{Symbol, Union{Nothing, String}}, Int, Int))
+    # Preferences uses these
+    precompile(get_preferences, (UUID,))
+    precompile(record_compiletime_preference, (UUID, String))
+    get(Dict{String,Any}(), "missing", nothing)
+    delete!(Dict{String,Any}(), "missing")
+    for (k, v) in Dict{String,Any}()
+        println(k)
+    end
+
+    break   # only actually need to do this once
+end
+
 if is_primary_base_module
 function __init__()
-    # try to ensuremake sure OpenBLAS does not set CPU affinity (#1070, #9639)
-    if !haskey(ENV, "OPENBLAS_MAIN_FREE") && !haskey(ENV, "GOTOBLAS_MAIN_FREE")
-        ENV["OPENBLAS_MAIN_FREE"] = "1"
-    end
-    # And try to prevent openblas from starting too many threads, unless/until specifically requested
-    if !haskey(ENV, "OPENBLAS_NUM_THREADS") && !haskey(ENV, "OMP_NUM_THREADS")
-        cpu_threads = Sys.CPU_THREADS::Int
-        if cpu_threads > 8 # always at most 8
-            ENV["OPENBLAS_NUM_THREADS"] = "8"
-        elseif haskey(ENV, "JULIA_CPU_THREADS") # or exactly as specified
-            ENV["OPENBLAS_NUM_THREADS"] = cpu_threads
-        end # otherwise, trust that openblas will pick CPU_THREADS anyways, without any intervention
-    end
-    # for the few uses of Libc.rand in Base:
-    Libc.srand()
     # Base library init
     reinit_stdio()
     Multimedia.reinit_displays() # since Multimedia.displays uses stdout as fallback
@@ -405,9 +496,17 @@ function __init__()
     init_depot_path()
     init_load_path()
     init_active_project()
+    append!(empty!(_sysimage_modules), keys(loaded_modules))
+    if haskey(ENV, "JULIA_MAX_NUM_PRECOMPILE_FILES")
+        MAX_NUM_PRECOMPILE_FILES[] = parse(Int, ENV["JULIA_MAX_NUM_PRECOMPILE_FILES"])
+    end
     nothing
 end
 
+# enable threads support
+@eval PCRE PCRE_COMPILE_LOCK = Threads.SpinLock()
+
 end
 
+
 end # baremodule Base
diff --git a/base/Enums.jl b/base/Enums.jl
index 06860402fbcb1c..f0a3c4c9f3a308 100644
--- a/base/Enums.jl
+++ b/base/Enums.jl
@@ -25,10 +25,16 @@ Base.isless(x::T, y::T) where {T<:Enum} = isless(basetype(T)(x), basetype(T)(y))
 
 Base.Symbol(x::Enum) = namemap(typeof(x))[Integer(x)]::Symbol
 
-Base.print(io::IO, x::Enum) = print(io, Symbol(x))
+function _symbol(x::Enum)
+    names = namemap(typeof(x))
+    x = Integer(x)
+    get(() -> Symbol("<invalid #$x>"), names, x)::Symbol
+end
+
+Base.print(io::IO, x::Enum) = print(io, _symbol(x))
 
 function Base.show(io::IO, x::Enum)
-    sym = Symbol(x)
+    sym = _symbol(x)
     if !(get(io, :compact, false)::Bool)
         from = get(io, :module, Main)
         def = typeof(x).name.module
@@ -119,6 +125,13 @@ To list all the instances of an enum use `instances`, e.g.
 julia> instances(Fruit)
 (apple, orange, kiwi)
 ```
+
+It is possible to construct a symbol from an enum instance:
+
+```jldoctest fruitenum
+julia> Symbol(apple)
+:apple
+```
 """
 macro enum(T::Union{Symbol,Expr}, syms...)
     if isempty(syms)
@@ -138,8 +151,7 @@ macro enum(T::Union{Symbol,Expr}, syms...)
     values = Vector{basetype}()
     seen = Set{Symbol}()
     namemap = Dict{basetype,Symbol}()
-    lo = hi = 0
-    i = zero(basetype)
+    lo = hi = i = zero(basetype)
     hasexpr = false
 
     if length(syms) == 1 && syms[1] isa Expr && syms[1].head === :block
@@ -180,7 +192,6 @@ macro enum(T::Union{Symbol,Expr}, syms...)
         if length(values) == 1
             lo = hi = i
         else
-            lo = min(lo, i)
             hi = max(hi, i)
         end
         i += oneunit(i)
diff --git a/base/Makefile b/base/Makefile
index 9ba9ee4be2a78a..5c12ab1c149d83 100644
--- a/base/Makefile
+++ b/base/Makefile
@@ -6,12 +6,6 @@ include $(JULIAHOME)/Make.inc
 
 TAGGED_RELEASE_BANNER := ""
 
-ifneq ($(USEMSVC), 1)
-CPP_STDOUT := $(CPP) -P
-else
-CPP_STDOUT := $(CPP) -E
-endif
-
 all: $(addprefix $(BUILDDIR)/,pcre_h.jl errno_h.jl build_h.jl.phony features_h.jl file_constants.jl uv_constants.jl version_git.jl.phony)
 
 PCRE_CONST := 0x[0-9a-fA-F]+|[0-9]+|\([\-0-9]+\)
@@ -23,11 +17,11 @@ endif
 
 define parse_features
 @echo "# $(2) features" >> $@
-@$(call PRINT_PERL, cat ../src/features_$(1).h | perl -lne 'print "const JL_$(2)_$$1 = UInt32($$2)" if /^\s*JL_FEATURE_DEF(?:_NAME)?\(\s*(\w+)\s*,\s*([^,]+)\s*,.*\)\s*(?:\/\/.*)?$$/' >> $@)
+@$(call PRINT_PERL, cat $(SRCDIR)/../src/features_$(1).h | perl -lne 'print "const JL_$(2)_$$1 = UInt32($$2)" if /^\s*JL_FEATURE_DEF(?:_NAME)?\(\s*(\w+)\s*,\s*([^,]+)\s*,.*\)\s*(?:\/\/.*)?$$/' >> $@)
 @echo >> $@
 endef
 
-$(BUILDDIR)/features_h.jl: ../src/features_x86.h ../src/features_aarch32.h ../src/features_aarch64.h
+$(BUILDDIR)/features_h.jl: $(SRCDIR)/../src/features_x86.h $(SRCDIR)/../src/features_aarch32.h $(SRCDIR)/../src/features_aarch64.h
 	@-rm -f $@
 	@$(call parse_features,x86,X86)
 	@$(call parse_features,aarch32,AArch32)
@@ -42,7 +36,7 @@ $(BUILDDIR)/errno_h.jl:
 $(BUILDDIR)/file_constants.jl: $(SRCDIR)/../src/file_constants.h
 	@$(call PRINT_PERL, $(CPP_STDOUT) -DJULIA $< | perl -nle 'print "$$1 0o$$2" if /^(\s*const\s+[A-z_]+\s+=)\s+(0[0-9]*)\s*$$/; print "$$1" if /^\s*(const\s+[A-z_]+\s+=\s+([1-9]|0x)[0-9A-z]*)\s*$$/' > $@)
 
-$(BUILDDIR)/uv_constants.jl: $(SRCDIR)/../src/uv_constants.h $(build_includedir)/uv/errno.h
+$(BUILDDIR)/uv_constants.jl: $(SRCDIR)/../src/uv_constants.h $(LIBUV_INC)/uv/errno.h
 	@$(call PRINT_PERL, $(CPP_STDOUT) "-I$(LIBUV_INC)" -DJULIA $< | tail -n 16 > $@)
 
 $(BUILDDIR)/build_h.jl.phony:
@@ -53,8 +47,6 @@ else
 	@echo "const MACHINE = \"$(XC_HOST)\"" >> $@
 endif
 	@echo "const libm_name = \"$(LIBMNAME)\"" >> $@
-	@echo "const libblas_name = \"$(LIBBLASNAME)\"" >> $@
-	@echo "const liblapack_name = \"$(LIBLAPACKNAME)\"" >> $@
 ifeq ($(USE_BLAS64), 1)
 	@echo "const USE_BLAS64 = true" >> $@
 else
@@ -91,6 +83,7 @@ ifeq ($(DARWIN_FRAMEWORK), 1)
 else
 	@echo "const DARWIN_FRAMEWORK = false" >> $@
 endif
+	@echo "const BUILD_TRIPLET = \"$(BB_TRIPLET_LIBGFORTRAN_CXXABI)\"" >> $@
 
 	@# This to ensure that we always rebuild this file, but only when it is modified do we touch build_h.jl,
 	@# ensuring we rebuild the system image as infrequently as possible
@@ -104,7 +97,7 @@ endif
 $(BUILDDIR)/version_git.jl.phony: $(SRCDIR)/version_git.sh
 ifneq ($(NO_GIT), 1)
 	sh $< $(SRCDIR) > $@
-	@# This to avoid touching git_version.jl when it is not modified,
+	@# This to avoid touching version_git.jl when it is not modified,
 	@# so that the system image does not need to be rebuilt.
 	@if ! cmp -s $@ version_git.jl; then \
 	    $(call PRINT_PERL,) \
@@ -168,52 +161,87 @@ endif
 #	echo "$$P"
 
 define symlink_system_library
-symlink_$1: $$(build_private_libdir)/$1.$$(SHLIB_EXT)
-$$(build_private_libdir)/$1.$$(SHLIB_EXT):
-	REALPATH=`$$(call spawn,$$(build_depsbindir)/libwhich) -p $$(notdir $$@)` && \
-	$$(call resolve_path,REALPATH) && \
-	[ -e "$$$$REALPATH" ] && \
-	([ ! -e "$$@" ] || rm "$$@") && \
-	echo ln -sf "$$$$REALPATH" "$$@" && \
-	ln -sf "$$$$REALPATH" "$$@"
-ifneq ($2,)
-ifneq ($$(USE_SYSTEM_$2),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_$1
-endif
+libname_$2 := $$(notdir $(call versioned_libname,$2,$3))
+libpath_$2 := $$(shell $$(call spawn,$$(LIBWHICH)) -p $$(libname_$2) 2>/dev/null)
+symlink_$2: $$(build_private_libdir)/$$(libname_$2)
+$$(build_private_libdir)/$$(libname_$2):
+	@if [ -e "$$(libpath_$2)" ]; then \
+		REALPATH=$$(libpath_$2); \
+		$$(call resolve_path,REALPATH) && \
+		[ -e "$$$$REALPATH" ] && \
+		rm -f "$$@" && \
+		echo ln -sf "$$$$REALPATH" "$$@" && \
+		ln -sf "$$$$REALPATH" "$$@"; \
+	else \
+		if [ "$4" != "ALLOW_FAILURE" ]; then \
+			echo "System library symlink failure: Unable to locate $$(libname_$2) on your system!" >&2; \
+			false; \
+		fi; \
+	fi
+ifneq ($$(USE_SYSTEM_$1),0)
+SYMLINK_SYSTEM_LIBRARIES += symlink_$2
 endif
 endef
 
+# libexec executables
+symlink_p7zip: $(build_bindir)/7z$(EXE)
+
+ifneq ($(USE_SYSTEM_P7ZIP),0)
+SYMLINK_SYSTEM_LIBRARIES += symlink_p7zip
+7Z_PATH := $(shell which 7z$(EXE))
+endif
+
+$(build_bindir)/7z$(EXE):
+	[ -e "$(7Z_PATH)" ] && \
+	rm -f "$@" && \
+	ln -svf "$(7Z_PATH)" "$@"
+
 # the following excludes: libuv.a, libutf8proc.a
 
-$(eval $(call symlink_system_library,$(LIBMNAME)))
 ifneq ($(USE_SYSTEM_LIBM),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_$(LIBMNAME)
+$(eval $(call symlink_system_library,LIBM,$(LIBMNAME)))
 else ifneq ($(USE_SYSTEM_OPENLIBM),0)
-SYMLINK_SYSTEM_LIBRARIES += symlink_$(LIBMNAME)
+$(eval $(call symlink_system_library,OPENLIBM,$(LIBMNAME)))
 endif
 
-$(eval $(call symlink_system_library,libpcre2-8,PCRE))
-$(eval $(call symlink_system_library,libdSFMT,DSFMT))
-$(eval $(call symlink_system_library,$(LIBBLASNAME),BLAS))
+ifeq ($(APPLE_ARCH),arm64)
+$(eval $(call symlink_system_library,CSL,libgcc_s,1.1))
+else
+$(eval $(call symlink_system_library,CSL,libgcc_s,1))
+endif
+ifneq (,$(LIBGFORTRAN_VERSION))
+$(eval $(call symlink_system_library,CSL,libgfortran,$(LIBGFORTRAN_VERSION)))
+endif
+$(eval $(call symlink_system_library,CSL,libquadmath,0))
+$(eval $(call symlink_system_library,CSL,libstdc++,6))
+# We allow libssp, libatomic and libgomp to fail as they are not available on all systems
+$(eval $(call symlink_system_library,CSL,libssp,0,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libatomic,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,CSL,libgomp,1,ALLOW_FAILURE))
+$(eval $(call symlink_system_library,PCRE,libpcre2-8))
+$(eval $(call symlink_system_library,DSFMT,libdSFMT))
+$(eval $(call symlink_system_library,LIBBLASTRAMPOLINE,libblastrampoline))
+$(eval $(call symlink_system_library,BLAS,$(LIBBLASNAME)))
 ifneq ($(LIBLAPACKNAME),$(LIBBLASNAME))
-$(eval $(call symlink_system_library,$(LIBLAPACKNAME),LAPACK))
+$(eval $(call symlink_system_library,LAPACK,$(LIBLAPACKNAME)))
 endif
-$(eval $(call symlink_system_library,libgmp,GMP))
-$(eval $(call symlink_system_library,libmpfr,MPFR))
-$(eval $(call symlink_system_library,libmbedtls,MBEDTLS))
-$(eval $(call symlink_system_library,libmbedcrypto,MBEDTLS))
-$(eval $(call symlink_system_library,libmbedx509,MBEDTLS))
-$(eval $(call symlink_system_library,libssh2,LIBSSH2))
-$(eval $(call symlink_system_library,libcurl,CURL))
-$(eval $(call symlink_system_library,libgit2,LIBGIT2))
-$(eval $(call symlink_system_library,libamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libcamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libccolamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libcholmod,SUITESPARSE))
-$(eval $(call symlink_system_library,libcolamd,SUITESPARSE))
-$(eval $(call symlink_system_library,libumfpack,SUITESPARSE))
-$(eval $(call symlink_system_library,libspqr,SUITESPARSE))
-$(eval $(call symlink_system_library,libsuitesparseconfig,SUITESPARSE))
+$(eval $(call symlink_system_library,GMP,libgmp))
+$(eval $(call symlink_system_library,MPFR,libmpfr))
+$(eval $(call symlink_system_library,MBEDTLS,libmbedtls))
+$(eval $(call symlink_system_library,MBEDTLS,libmbedcrypto))
+$(eval $(call symlink_system_library,MBEDTLS,libmbedx509))
+$(eval $(call symlink_system_library,LIBSSH2,libssh2))
+$(eval $(call symlink_system_library,NGHTTP2,libnghttp2))
+$(eval $(call symlink_system_library,CURL,libcurl))
+$(eval $(call symlink_system_library,LIBGIT2,libgit2))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libccolamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcholmod))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libcolamd))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libumfpack))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libspqr))
+$(eval $(call symlink_system_library,LIBSUITESPARSE,libsuitesparseconfig))
 # EXCLUDED LIBRARIES (installed/used, but not vendored for use with dlopen):
 # libunwind
 endif # WINNT
@@ -230,7 +258,7 @@ $(build_private_libdir)/libLLVM.$(SHLIB_EXT):
 	REALPATH=$(LLVM_CONFIG_HOST_LIBS) && \
 	$(call resolve_path,REALPATH) && \
 	[ -e "$$REALPATH" ] && \
-	([ ! -e "$@" ] || rm "$@") && \
+	rm -f "$@" && \
 	echo ln -sf "$$REALPATH" "$@" && \
 	ln -sf "$$REALPATH" "$@"
 ifneq ($(USE_SYSTEM_LLVM),0)
diff --git a/base/abstractarray.jl b/base/abstractarray.jl
index 478130f4834da1..239e75df525101 100644
--- a/base/abstractarray.jl
+++ b/base/abstractarray.jl
@@ -8,6 +8,8 @@
 Supertype for `N`-dimensional arrays (or array-like types) with elements of type `T`.
 [`Array`](@ref) and other types are subtypes of this. See the manual section on the
 [`AbstractArray` interface](@ref man-interface-array).
+
+See also: [`AbstractVector`](@ref), [`AbstractMatrix`](@ref), [`eltype`](@ref), [`ndims`](@ref).
 """
 AbstractArray
 
@@ -24,6 +26,8 @@ dimension to just get the length of that dimension.
 Note that `size` may not be defined for arrays with non-standard indices, in which case [`axes`](@ref)
 may be useful. See the manual chapter on [arrays with custom indices](@ref man-custom-indices).
 
+See also: [`length`](@ref), [`ndims`](@ref), [`eachindex`](@ref), [`sizeof`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (2,3,4));
@@ -45,15 +49,28 @@ Return the valid range of indices for array `A` along dimension `d`.
 See also [`size`](@ref), and the manual chapter on [arrays with custom indices](@ref man-custom-indices).
 
 # Examples
+
 ```jldoctest
 julia> A = fill(1, (5,6,7));
 
 julia> axes(A, 2)
 Base.OneTo(6)
 ```
+
+# Usage note
+
+Each of the indices has to be an `AbstractUnitRange{<:Integer}`, but at the same time can be
+a type that uses custom indices. So, for example, if you need a subset, use generalized
+indexing constructs like `begin`/`end` or [`firstindex`](@ref)/[`lastindex`](@ref):
+
+```julia
+ix = axes(v, 1)
+ix[2:end]          # will work for eg Vector, but may fail in general
+ix[(begin+1):end]  # works for generalized indexes
+```
 """
 function axes(A::AbstractArray{T,N}, d) where {T,N}
-    @_inline_meta
+    @inline
     d::Integer <= N ? axes(A)[d] : OneTo(1)
 end
 
@@ -62,7 +79,10 @@ end
 
 Return the tuple of valid indices for array `A`.
 
+See also: [`size`](@ref), [`keys`](@ref), [`eachindex`](@ref).
+
 # Examples
+
 ```jldoctest
 julia> A = fill(1, (5,6,7));
 
@@ -71,8 +91,8 @@ julia> axes(A)
 ```
 """
 function axes(A)
-    @_inline_meta
-    map(OneTo, size(A))
+    @inline
+    map(oneto, size(A))
 end
 
 """
@@ -82,7 +102,8 @@ end
 Return `true` if the indices of `A` start with something other than 1 along any axis.
 If multiple arguments are passed, equivalent to `has_offset_axes(A) | has_offset_axes(B) | ...`.
 """
-has_offset_axes(A)    = _tuple_any(x->Int(first(x))::Int != 1, axes(A))
+has_offset_axes(A) = _tuple_any(x->Int(first(x))::Int != 1, axes(A))
+has_offset_axes(A::AbstractVector) = Int(firstindex(A))::Int != 1 # improve performance of a common case (ranges)
 has_offset_axes(A...) = _tuple_any(has_offset_axes, A)
 has_offset_axes(::Colon) = false
 
@@ -92,12 +113,22 @@ require_one_based_indexing(A...) = !has_offset_axes(A...) || throw(ArgumentError
 # for d=1. 1d arrays are heavily used, and the first dimension comes up
 # in other applications.
 axes1(A::AbstractArray{<:Any,0}) = OneTo(1)
-axes1(A::AbstractArray) = (@_inline_meta; axes(A)[1])
-axes1(iter) = OneTo(length(iter))
+axes1(A::AbstractArray) = (@inline; axes(A)[1])
+axes1(iter) = oneto(length(iter))
 
-unsafe_indices(A) = axes(A)
-unsafe_indices(r::AbstractRange) = (OneTo(unsafe_length(r)),) # Ranges use checked_sub for size
+"""
+    keys(a::AbstractArray)
+
+Return an efficient array describing all valid indices for `a` arranged in the shape of `a` itself.
+
+They keys of 1-dimensional arrays (vectors) are integers, whereas all other N-dimensional
+arrays use [`CartesianIndex`](@ref) to describe their locations.  Often the special array
+types [`LinearIndices`](@ref) and [`CartesianIndices`](@ref) are used to efficiently
+represent these arrays of integers and `CartesianIndex`es, respectively.
 
+Note that the `keys` of an array might not be the most efficient index type; for maximum
+performance use  [`eachindex`](@ref) instead.
+"""
 keys(a::AbstractArray) = CartesianIndices(axes(a))
 keys(a::AbstractVector) = LinearIndices(a)
 
@@ -149,7 +180,45 @@ valtype(A::Type{<:AbstractArray}) = eltype(A)
 prevind(::AbstractArray, i::Integer) = Int(i)-1
 nextind(::AbstractArray, i::Integer) = Int(i)+1
 
+
+"""
+    eltype(type)
+
+Determine the type of the elements generated by iterating a collection of the given `type`.
+For dictionary types, this will be a `Pair{KeyType,ValType}`. The definition
+`eltype(x) = eltype(typeof(x))` is provided for convenience so that instances can be passed
+instead of types. However the form that accepts a type argument should be defined for new
+types.
+
+See also: [`keytype`](@ref), [`typeof`](@ref).
+
+# Examples
+```jldoctest
+julia> eltype(fill(1f0, (2,2)))
+Float32
+
+julia> eltype(fill(0x1, (2,2)))
+UInt8
+```
+"""
+eltype(::Type) = Any
+eltype(::Type{Bottom}) = throw(ArgumentError("Union{} does not have elements"))
+eltype(x) = eltype(typeof(x))
 eltype(::Type{<:AbstractArray{E}}) where {E} = @isdefined(E) ? E : Any
+
+"""
+    elsize(type)
+
+Compute the memory stride in bytes between consecutive elements of `eltype`
+stored inside the given `type`, if the array elements are stored densely with a
+uniform linear stride.
+
+# Examples
+```jldoctest
+julia> Base.elsize(rand(Float32, 10))
+4
+```
+"""
 elsize(A::AbstractArray) = elsize(typeof(A))
 
 """
@@ -157,6 +226,8 @@ elsize(A::AbstractArray) = elsize(typeof(A))
 
 Return the number of dimensions of `A`.
 
+See also: [`size`](@ref), [`axes`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (3,4,5));
@@ -166,7 +237,7 @@ julia> ndims(A)
 ```
 """
 ndims(::AbstractArray{T,N}) where {T,N} = N
-ndims(::Type{<:AbstractArray{T,N}}) where {T,N} = N
+ndims(::Type{<:AbstractArray{<:Any,N}}) where {N} = N
 
 """
     length(collection) -> Integer
@@ -175,6 +246,8 @@ Return the number of elements in the collection.
 
 Use [`lastindex`](@ref) to get the last valid index of an indexable collection.
 
+See also: [`size`](@ref), [`ndims`](@ref), [`eachindex`](@ref).
+
 # Examples
 ```jldoctest
 julia> length(1:5)
@@ -203,13 +276,13 @@ julia> length([1 2; 3 4])
 4
 ```
 """
-length(t::AbstractArray) = (@_inline_meta; prod(size(t)))
+length(t::AbstractArray) = (@inline; prod(size(t)))
 
 # `eachindex` is mostly an optimization of `keys`
 eachindex(itrs...) = keys(itrs...)
 
 # eachindex iterates over all indices. IndexCartesian definitions are later.
-eachindex(A::AbstractVector) = (@_inline_meta(); axes1(A))
+eachindex(A::AbstractVector) = (@inline(); axes1(A))
 
 
 @noinline function throw_eachindex_mismatch_indices(::IndexLinear, inds...)
@@ -233,7 +306,7 @@ If you supply more than one `AbstractArray` argument, `eachindex` will create an
 iterable object that is fast for all arguments (a [`UnitRange`](@ref)
 if all inputs have fast linear indexing, a [`CartesianIndices`](@ref)
 otherwise).
-If the arrays have different sizes and/or dimensionalities, a DimensionMismatch exception
+If the arrays have different sizes and/or dimensionalities, a `DimensionMismatch` exception
 will be thrown.
 # Examples
 ```jldoctest
@@ -254,27 +327,27 @@ CartesianIndex(1, 1)
 CartesianIndex(2, 1)
 ```
 """
-eachindex(A::AbstractArray) = (@_inline_meta(); eachindex(IndexStyle(A), A))
+eachindex(A::AbstractArray) = (@inline(); eachindex(IndexStyle(A), A))
 
 function eachindex(A::AbstractArray, B::AbstractArray)
-    @_inline_meta
+    @inline
     eachindex(IndexStyle(A,B), A, B)
 end
 function eachindex(A::AbstractArray, B::AbstractArray...)
-    @_inline_meta
+    @inline
     eachindex(IndexStyle(A,B...), A, B...)
 end
-eachindex(::IndexLinear, A::AbstractArray) = (@_inline_meta; OneTo(length(A)))
-eachindex(::IndexLinear, A::AbstractVector) = (@_inline_meta; axes1(A))
+eachindex(::IndexLinear, A::AbstractArray) = (@inline; oneto(length(A)))
+eachindex(::IndexLinear, A::AbstractVector) = (@inline; axes1(A))
 function eachindex(::IndexLinear, A::AbstractArray, B::AbstractArray...)
-    @_inline_meta
+    @inline
     indsA = eachindex(IndexLinear(), A)
     _all_match_first(X->eachindex(IndexLinear(), X), indsA, B...) ||
         throw_eachindex_mismatch_indices(IndexLinear(), eachindex(A), eachindex.(B)...)
     indsA
 end
 function _all_match_first(f::F, inds, A, B...) where F<:Function
-    @_inline_meta
+    @inline
     (inds == f(A)) & _all_match_first(f, inds, B...)
 end
 _all_match_first(f::F, inds) where F<:Function = true
@@ -291,6 +364,8 @@ Return the last index of `collection`. If `d` is given, return the last index of
 The syntaxes `A[end]` and `A[end, end]` lower to `A[lastindex(A)]` and
 `A[lastindex(A, 1), lastindex(A, 2)]`, respectively.
 
+See also: [`axes`](@ref), [`firstindex`](@ref), [`eachindex`](@ref), [`prevind`](@ref).
+
 # Examples
 ```jldoctest
 julia> lastindex([1,2,4])
@@ -300,8 +375,8 @@ julia> lastindex(rand(3,4,5), 2)
 4
 ```
 """
-lastindex(a::AbstractArray) = (@_inline_meta; last(eachindex(IndexLinear(), a)))
-lastindex(a::AbstractArray, d) = (@_inline_meta; last(axes(a, d)))
+lastindex(a::AbstractArray) = (@inline; last(eachindex(IndexLinear(), a)))
+lastindex(a, d) = (@inline; last(axes(a, d)))
 
 """
     firstindex(collection) -> Integer
@@ -309,6 +384,11 @@ lastindex(a::AbstractArray, d) = (@_inline_meta; last(axes(a, d)))
 
 Return the first index of `collection`. If `d` is given, return the first index of `collection` along dimension `d`.
 
+The syntaxes `A[begin]` and `A[1, begin]` lower to `A[firstindex(A)]` and
+`A[1, firstindex(A, 2)]`, respectively.
+
+See also: [`first`](@ref), [`axes`](@ref), [`lastindex`](@ref), [`nextind`](@ref).
+
 # Examples
 ```jldoctest
 julia> firstindex([1,2,4])
@@ -318,8 +398,8 @@ julia> firstindex(rand(3,4,5), 2)
 1
 ```
 """
-firstindex(a::AbstractArray) = (@_inline_meta; first(eachindex(IndexLinear(), a)))
-firstindex(a::AbstractArray, d) = (@_inline_meta; first(axes(a, d)))
+firstindex(a::AbstractArray) = (@inline; first(eachindex(IndexLinear(), a)))
+firstindex(a, d) = (@inline; first(axes(a, d)))
 
 first(a::AbstractArray) = a[first(eachindex(a))]
 
@@ -329,6 +409,8 @@ first(a::AbstractArray) = a[first(eachindex(a))]
 Get the first element of an iterable collection. Return the start point of an
 [`AbstractRange`](@ref) even if it is empty.
 
+See also: [`only`](@ref), [`firstindex`](@ref), [`last`](@ref).
+
 # Examples
 ```jldoctest
 julia> first(2:2:10)
@@ -347,9 +429,14 @@ end
 """
     first(itr, n::Integer)
 
-Get the first `n` elements of the iterable collection `itr`, or fewer elements if `v` is not
+Get the first `n` elements of the iterable collection `itr`, or fewer elements if `itr` is not
 long enough.
 
+See also: [`startswith`](@ref), [`Iterators.take`](@ref).
+
+!!! compat "Julia 1.6"
+    This method requires at least Julia 1.6.
+
 # Examples
 ```jldoctest
 julia> first(["foo", "bar", "qux"], 2)
@@ -378,6 +465,8 @@ Get the last element of an ordered collection, if it can be computed in O(1) tim
 accomplished by calling [`lastindex`](@ref) to get the last index. Return the end
 point of an [`AbstractRange`](@ref) even if it is empty.
 
+See also [`first`](@ref), [`endswith`](@ref).
+
 # Examples
 ```jldoctest
 julia> last(1:2:10)
@@ -392,9 +481,12 @@ last(a) = a[end]
 """
     last(itr, n::Integer)
 
-Get the last `n` elements of the iterable collection `itr`, or fewer elements if `v` is not
+Get the last `n` elements of the iterable collection `itr`, or fewer elements if `itr` is not
 long enough.
 
+!!! compat "Julia 1.6"
+    This method requires at least Julia 1.6.
+
 # Examples
 ```jldoctest
 julia> last(["foo", "bar", "qux"], 2)
@@ -421,6 +513,8 @@ end
 
 Return a tuple of the memory strides in each dimension.
 
+See also: [`stride`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (3,4,5));
@@ -436,6 +530,8 @@ function strides end
 
 Return the distance in memory (in number of elements) between adjacent elements in dimension `k`.
 
+See also: [`strides`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = fill(1, (3,4,5));
@@ -450,7 +546,13 @@ julia> stride(A,3)
 function stride(A::AbstractArray, k::Integer)
     st = strides(A)
     k ≤ ndims(A) && return st[k]
-    return sum(st .* size(A))
+    ndims(A) == 0 && return 1
+    sz = size(A)
+    s = st[1] * sz[1]
+    for i in 2:ndims(A)
+        s += st[i] * sz[i]
+    end
+    return s
 end
 
 @inline size_to_strides(s, d, sz...) = (s, size_to_strides(s * d, sz...)...)
@@ -487,14 +589,14 @@ end
 function trailingsize(inds::Indices, n)
     s = 1
     for i=n:length(inds)
-        s *= unsafe_length(inds[i])
+        s *= length(inds[i])
     end
     return s
 end
 # This version is type-stable even if inds is heterogeneous
 function trailingsize(inds::Indices)
-    @_inline_meta
-    prod(map(unsafe_length, inds))
+    @inline
+    prod(map(length, inds))
 end
 
 ## Bounds checking ##
@@ -541,18 +643,18 @@ false
 ```
 """
 function checkbounds(::Type{Bool}, A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     checkbounds_indices(Bool, axes(A), I)
 end
 
 # Linear indexing is explicitly allowed when there is only one (non-cartesian) index
 function checkbounds(::Type{Bool}, A::AbstractArray, i)
-    @_inline_meta
+    @inline
     checkindex(Bool, eachindex(IndexLinear(), A), i)
 end
 # As a special extension, allow using logical arrays that match the source array exactly
 function checkbounds(::Type{Bool}, A::AbstractArray{<:Any,N}, I::AbstractArray{Bool,N}) where N
-    @_inline_meta
+    @inline
     axes(A) == axes(I)
 end
 
@@ -562,7 +664,7 @@ end
 Throw an error if the specified indices `I` are not in bounds for the given array `A`.
 """
 function checkbounds(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     checkbounds(Bool, A, I...) || throw_boundserror(A, I)
     nothing
 end
@@ -588,17 +690,17 @@ of `IA`.
 See also [`checkbounds`](@ref).
 """
 function checkbounds_indices(::Type{Bool}, IA::Tuple, I::Tuple)
-    @_inline_meta
+    @inline
     checkindex(Bool, IA[1], I[1])::Bool & checkbounds_indices(Bool, tail(IA), tail(I))
 end
 function checkbounds_indices(::Type{Bool}, ::Tuple{}, I::Tuple)
-    @_inline_meta
+    @inline
     checkindex(Bool, OneTo(1), I[1])::Bool & checkbounds_indices(Bool, (), tail(I))
 end
-checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@_inline_meta; all(x->unsafe_length(x)==1, IA))
+checkbounds_indices(::Type{Bool}, IA::Tuple, ::Tuple{}) = (@inline; all(x->length(x)==1, IA))
 checkbounds_indices(::Type{Bool}, ::Tuple{}, ::Tuple{}) = true
 
-throw_boundserror(A, I) = (@_noinline_meta; throw(BoundsError(A, I)))
+throw_boundserror(A, I) = (@noinline; throw(BoundsError(A, I)))
 
 # check along a single dimension
 """
@@ -609,6 +711,8 @@ Return `true` if the given `index` is within the bounds of
 arrays can extend this method in order to provide a specialized bounds
 checking implementation.
 
+See also [`checkbounds`](@ref).
+
 # Examples
 ```jldoctest
 julia> checkindex(Bool, 1:20, 8)
@@ -630,7 +734,7 @@ end
 checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractVector{Bool}) = indx == axes1(I)
 checkindex(::Type{Bool}, indx::AbstractUnitRange, I::AbstractArray{Bool}) = false
 function checkindex(::Type{Bool}, inds::AbstractUnitRange, I::AbstractArray)
-    @_inline_meta
+    @inline
     b = true
     for i in I
         b &= checkindex(Bool, inds, i)
@@ -660,7 +764,7 @@ neither mutable nor support 2 dimensions:
 
 ```julia-repl
 julia> similar(1:10, 1, 4)
-1×4 Array{Int64,2}:
+1×4 Matrix{Int64}:
  4419743872  4374413872  4419743888  0
 ```
 
@@ -679,11 +783,12 @@ different element type it will create a regular `Array` instead:
 
 ```julia-repl
 julia> similar(falses(10), Float64, 2, 4)
-2×4 Array{Float64,2}:
+2×4 Matrix{Float64}:
  2.18425e-314  2.18425e-314  2.18425e-314  2.18425e-314
  2.18425e-314  2.18425e-314  2.18425e-314  2.18425e-314
 ```
 
+See also: [`undef`](@ref), [`isassigned`](@ref).
 """
 similar(a::AbstractArray{T}) where {T}                             = similar(a, T)
 similar(a::AbstractArray, ::Type{T}) where {T}                     = similar(a, T, to_shape(axes(a)))
@@ -696,6 +801,7 @@ similar(a::AbstractArray, ::Type{T}, dims::DimOrInd...) where {T}  = similar(a,
 # define this method to convert supported axes to Ints, with the expectation that an offset array
 # package will define a method with dims::Tuple{Union{Integer, UnitRange}, Vararg{Union{Integer, UnitRange}}}
 similar(a::AbstractArray, ::Type{T}, dims::Tuple{Union{Integer, OneTo}, Vararg{Union{Integer, OneTo}}}) where {T} = similar(a, T, to_shape(dims))
+similar(a::AbstractArray, ::Type{T}, dims::Tuple{Integer, Vararg{Integer}}) where {T} = similar(a, T, to_shape(dims))
 # similar creates an Array by default
 similar(a::AbstractArray, ::Type{T}, dims::Dims{N}) where {T,N}    = Array{T,N}(undef, dims)
 
@@ -739,6 +845,8 @@ similar(::Type{T}, dims::Dims) where {T<:AbstractArray} = T(undef, dims)
 
 Create an empty vector similar to `v`, optionally changing the `eltype`.
 
+See also: [`empty!`](@ref), [`isempty`](@ref), [`isassigned`](@ref).
+
 # Examples
 
 ```jldoctest
@@ -763,6 +871,7 @@ elements in `dst`.
 If `dst` and `src` are of the same type, `dst == src` should hold after
 the call. If `dst` and `src` are multidimensional arrays, they must have
 equal [`axes`](@ref).
+
 See also [`copyto!`](@ref).
 
 !!! compat "Julia 1.1"
@@ -787,6 +896,10 @@ end
 
 ## from general iterable to any array
 
+# This is `@Experimental.max_methods 1 function copyto! end`, which is not
+# defined at this point in bootstrap.
+typeof(function copyto! end).name.max_methods = UInt8(1)
+
 function copyto!(dest::AbstractArray, src)
     destiter = eachindex(dest)
     y = iterate(destiter)
@@ -811,19 +924,21 @@ end
 # copy from an some iterable object into an AbstractArray
 function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer)
     if (sstart < 1)
-        throw(ArgumentError(string("source start offset (",sstart,") is < 1")))
+        throw(ArgumentError(LazyString("source start offset (",sstart,") is < 1")))
     end
     y = iterate(src)
     for j = 1:(sstart-1)
         if y === nothing
-            throw(ArgumentError(string("source has fewer elements than required, ",
-                                       "expected at least ",sstart,", got ",j-1)))
+            throw(ArgumentError(LazyString(
+                "source has fewer elements than required, ",
+                "expected at least ", sstart,", got ", j-1)))
         end
         y = iterate(src, y[2])
     end
     if y === nothing
-        throw(ArgumentError(string("source has fewer elements than required, ",
-                                   "expected at least ",sstart,", got ",sstart-1)))
+        throw(ArgumentError(LazyString(
+            "source has fewer elements than required, ",
+            "expected at least ",sstart," got ", sstart-1)))
     end
     i = Int(dstart)
     while y !== nothing
@@ -837,19 +952,22 @@ end
 
 # this method must be separate from the above since src might not have a length
 function copyto!(dest::AbstractArray, dstart::Integer, src, sstart::Integer, n::Integer)
-    n < 0 && throw(ArgumentError(string("tried to copy n=", n, " elements, but n should be nonnegative")))
+    n < 0 && throw(ArgumentError(LazyString("tried to copy n=",n,
+        ", elements, but n should be nonnegative")))
     n == 0 && return dest
     dmax = dstart + n - 1
     inds = LinearIndices(dest)
     if (dstart ∉ inds || dmax ∉ inds) | (sstart < 1)
-        sstart < 1 && throw(ArgumentError(string("source start offset (",sstart,") is < 1")))
+        sstart < 1 && throw(ArgumentError(LazyString("source start offset (",
+            sstart,") is < 1")))
         throw(BoundsError(dest, dstart:dmax))
     end
     y = iterate(src)
     for j = 1:(sstart-1)
         if y === nothing
-            throw(ArgumentError(string("source has fewer elements than required, ",
-                                       "expected at least ",sstart,", got ",j-1)))
+            throw(ArgumentError(LazyString(
+                "source has fewer elements than required, ",
+                "expected at least ",sstart,", got ",j-1)))
         end
         y = iterate(src, y[2])
     end
@@ -870,11 +988,12 @@ end
 """
     copyto!(dest::AbstractArray, src) -> dest
 
-
 Copy all elements from collection `src` to array `dest`, whose length must be greater than
 or equal to the length `n` of `src`. The first `n` elements of `dest` are overwritten,
 the other elements are left untouched.
 
+See also [`copy!`](@ref Base.copy!), [`copy`](@ref).
+
 # Examples
 ```jldoctest
 julia> x = [1., 0., 3., 0., 5.];
@@ -960,7 +1079,8 @@ function copyto!(dest::AbstractArray, dstart::Integer,
                src::AbstractArray, sstart::Integer,
                n::Integer)
     n == 0 && return dest
-    n < 0 && throw(ArgumentError(string("tried to copy n=", n, " elements, but n should be nonnegative")))
+    n < 0 && throw(ArgumentError(LazyString("tried to copy n=",
+        n," elements, but n should be nonnegative")))
     destinds, srcinds = LinearIndices(dest), LinearIndices(src)
     (checkbounds(Bool, destinds, dstart) && checkbounds(Bool, destinds, dstart+n-1)) || throw(BoundsError(dest, dstart:dstart+n-1))
     (checkbounds(Bool, srcinds, sstart)  && checkbounds(Bool, srcinds, sstart+n-1))  || throw(BoundsError(src,  sstart:sstart+n-1))
@@ -978,12 +1098,12 @@ end
 function copyto!(B::AbstractVecOrMat{R}, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
                A::AbstractVecOrMat{S}, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int}) where {R,S}
     if length(ir_dest) != length(ir_src)
-        throw(ArgumentError(string("source and destination must have same size (got ",
-                                   length(ir_src)," and ",length(ir_dest),")")))
+        throw(ArgumentError(LazyString("source and destination must have same size (got ",
+            length(ir_src)," and ",length(ir_dest),")")))
     end
     if length(jr_dest) != length(jr_src)
-        throw(ArgumentError(string("source and destination must have same size (got ",
-                                   length(jr_src)," and ",length(jr_dest),")")))
+        throw(ArgumentError(LazyString("source and destination must have same size (got ",
+            length(jr_src)," and ",length(jr_dest),")")))
     end
     @boundscheck checkbounds(B, ir_dest, jr_dest)
     @boundscheck checkbounds(A, ir_src, jr_src)
@@ -1032,7 +1152,7 @@ function copymutable(a::AbstractArray)
 end
 copymutable(itr) = collect(itr)
 
-zero(x::AbstractArray{T}) where {T} = fill!(similar(x), zero(T))
+zero(x::AbstractArray{T}) where {T} = fill!(similar(x, typeof(zero(T))), zero(T))
 
 ## iteration support for arrays by iterating over `eachindex` in the array ##
 # Allows fast iteration by default for both IndexLinear and IndexCartesian arrays
@@ -1065,7 +1185,7 @@ end
 # convenience in cases that work.
 pointer(x::AbstractArray{T}) where {T} = unsafe_convert(Ptr{T}, x)
 function pointer(x::AbstractArray{T}, i::Integer) where T
-    @_inline_meta
+    @inline
     unsafe_convert(Ptr{T}, x) + Int(_memory_offset(x, i))::Int
 end
 
@@ -1088,9 +1208,9 @@ end
 """
     getindex(A, inds...)
 
-Return a subset of array `A` as specified by `inds`, where each `ind` may be an
-`Int`, an [`AbstractRange`](@ref), or a [`Vector`](@ref). See the manual section on
-[array indexing](@ref man-array-indexing) for details.
+Return a subset of array `A` as specified by `inds`, where each `ind` may be,
+for example, an `Int`, an [`AbstractRange`](@ref), or a [`Vector`](@ref).
+See the manual section on [array indexing](@ref man-array-indexing) for details.
 
 # Examples
 ```jldoctest
@@ -1120,18 +1240,24 @@ function getindex(A::AbstractArray, I...)
     _getindex(IndexStyle(A), A, to_indices(A, I)...)
 end
 # To avoid invalidations from multidimensional.jl: getindex(A::Array, i1::Union{Integer, CartesianIndex}, I::Union{Integer, CartesianIndex}...)
-getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
+@propagate_inbounds getindex(A::Array, i1::Integer, I::Integer...) = A[to_indices(A, (i1, I...))...]
 
 function unsafe_getindex(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     @inbounds r = getindex(A, I...)
     r
 end
 
+struct CanonicalIndexError
+    func::String
+    type::Any
+    CanonicalIndexError(func::String, @nospecialize(type)) = new(func, type)
+end
+
 error_if_canonical_getindex(::IndexLinear, A::AbstractArray, ::Int) =
-    error("getindex not defined for ", typeof(A))
+    throw(CanonicalIndexError("getindex", typeof(A)))
 error_if_canonical_getindex(::IndexCartesian, A::AbstractArray{T,N}, ::Vararg{Int,N}) where {T,N} =
-    error("getindex not defined for ", typeof(A))
+    throw(CanonicalIndexError("getindex", typeof(A)))
 error_if_canonical_getindex(::IndexStyle, ::AbstractArray, ::Any...) = nothing
 
 ## Internal definitions
@@ -1142,19 +1268,19 @@ _getindex(::IndexStyle, A::AbstractArray, I...) =
 _getindex(::IndexLinear, A::AbstractVector, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))  # ambiguity resolution in case packages specialize this (to be avoided if at all possible, but see Interpolations.jl)
 _getindex(::IndexLinear, A::AbstractArray, i::Int) = (@_propagate_inbounds_meta; getindex(A, i))
 function _getindex(::IndexLinear, A::AbstractArray, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...) # generally _to_linear_index requires bounds checking
     @inbounds r = getindex(A, _to_linear_index(A, I...))
     r
 end
 _to_linear_index(A::AbstractArray, i::Integer) = i
 _to_linear_index(A::AbstractVector, i::Integer, I::Integer...) = i
-_to_linear_index(A::AbstractArray) = 1
-_to_linear_index(A::AbstractArray, I::Integer...) = (@_inline_meta; _sub2ind(A, I...))
+_to_linear_index(A::AbstractArray) = first(LinearIndices(A))
+_to_linear_index(A::AbstractArray, I::Integer...) = (@inline; _sub2ind(A, I...))
 
 ## IndexCartesian Scalar indexing: Canonical method is full dimensionality of Ints
 function _getindex(::IndexCartesian, A::AbstractArray, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...) # generally _to_subscript_indices requires bounds checking
     @inbounds r = getindex(A, _to_subscript_indices(A, I...)...)
     r
@@ -1163,13 +1289,13 @@ function _getindex(::IndexCartesian, A::AbstractArray{T,N}, I::Vararg{Int, N}) w
     @_propagate_inbounds_meta
     getindex(A, I...)
 end
-_to_subscript_indices(A::AbstractArray, i::Integer) = (@_inline_meta; _unsafe_ind2sub(A, i))
-_to_subscript_indices(A::AbstractArray{T,N}) where {T,N} = (@_inline_meta; fill_to_length((), 1, Val(N)))
+_to_subscript_indices(A::AbstractArray, i::Integer) = (@inline; _unsafe_ind2sub(A, i))
+_to_subscript_indices(A::AbstractArray{T,N}) where {T,N} = (@inline; fill_to_length((), 1, Val(N)))
 _to_subscript_indices(A::AbstractArray{T,0}) where {T} = ()
 _to_subscript_indices(A::AbstractArray{T,0}, i::Integer) where {T} = ()
 _to_subscript_indices(A::AbstractArray{T,0}, I::Integer...) where {T} = ()
 function _to_subscript_indices(A::AbstractArray{T,N}, I::Integer...) where {T,N}
-    @_inline_meta
+    @inline
     J, Jrem = IteratorsMD.split(I, Val(N))
     _to_subscript_indices(A, J, Jrem)
 end
@@ -1177,15 +1303,15 @@ _to_subscript_indices(A::AbstractArray, J::Tuple, Jrem::Tuple{}) =
     __to_subscript_indices(A, axes(A), J, Jrem)
 function __to_subscript_indices(A::AbstractArray,
         ::Tuple{AbstractUnitRange,Vararg{AbstractUnitRange}}, J::Tuple, Jrem::Tuple{})
-    @_inline_meta
+    @inline
     (J..., map(first, tail(_remaining_size(J, axes(A))))...)
 end
 _to_subscript_indices(A, J::Tuple, Jrem::Tuple) = J # already bounds-checked, safe to drop
 _to_subscript_indices(A::AbstractArray{T,N}, I::Vararg{Int,N}) where {T,N} = I
 _remaining_size(::Tuple{Any}, t::Tuple) = t
-_remaining_size(h::Tuple, t::Tuple) = (@_inline_meta; _remaining_size(tail(h), tail(t)))
+_remaining_size(h::Tuple, t::Tuple) = (@inline; _remaining_size(tail(h), tail(t)))
 _unsafe_ind2sub(::Tuple{}, i) = () # _ind2sub may throw(BoundsError()) in this case
-_unsafe_ind2sub(sz, i) = (@_inline_meta; _ind2sub(sz, i))
+_unsafe_ind2sub(sz, i) = (@inline; _ind2sub(sz, i))
 
 ## Setindex! is defined similarly. We first dispatch to an internal _setindex!
 # function that allows dispatch on array storage
@@ -1195,7 +1321,7 @@ _unsafe_ind2sub(sz, i) = (@_inline_meta; _ind2sub(sz, i))
     A[inds...] = X
 
 Store values from array `X` within some subset of `A` as specified by `inds`.
-The syntax `A[inds...] = X` is equivalent to `setindex!(A, X, inds...)`.
+The syntax `A[inds...] = X` is equivalent to `(setindex!(A, X, inds...); X)`.
 
 # Examples
 ```jldoctest
@@ -1217,15 +1343,15 @@ function setindex!(A::AbstractArray, v, I...)
     _setindex!(IndexStyle(A), A, v, to_indices(A, I)...)
 end
 function unsafe_setindex!(A::AbstractArray, v, I...)
-    @_inline_meta
+    @inline
     @inbounds r = setindex!(A, v, I...)
     r
 end
 
 error_if_canonical_setindex(::IndexLinear, A::AbstractArray, ::Int) =
-    error("setindex! not defined for ", typeof(A))
+    throw(CanonicalIndexError("setindex!", typeof(A)))
 error_if_canonical_setindex(::IndexCartesian, A::AbstractArray{T,N}, ::Vararg{Int,N}) where {T,N} =
-    error("setindex! not defined for ", typeof(A))
+    throw(CanonicalIndexError("setindex!", typeof(A)))
 error_if_canonical_setindex(::IndexStyle, ::AbstractArray, ::Any...) = nothing
 
 ## Internal definitions
@@ -1235,7 +1361,7 @@ _setindex!(::IndexStyle, A::AbstractArray, v, I...) =
 ## IndexLinear Scalar indexing
 _setindex!(::IndexLinear, A::AbstractArray, v, i::Int) = (@_propagate_inbounds_meta; setindex!(A, v, i))
 function _setindex!(::IndexLinear, A::AbstractArray, v, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_linear_index(A, I...))
     r
@@ -1247,7 +1373,7 @@ function _setindex!(::IndexCartesian, A::AbstractArray{T,N}, v, I::Vararg{Int, N
     setindex!(A, v, I...)
 end
 function _setindex!(::IndexCartesian, A::AbstractArray, v, I::Vararg{Int,M}) where M
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     @inbounds r = setindex!(A, v, _to_subscript_indices(A, I...)...)
     r
@@ -1258,7 +1384,8 @@ end
 
 Return the underlying "parent array”. This parent array of objects of types `SubArray`, `ReshapedArray`
 or `LinearAlgebra.Transpose` is what was passed as an argument to `view`, `reshape`, `transpose`, etc.
-during object creation. If the input is not a wrapped object, return the input itself.
+during object creation. If the input is not a wrapped object, return the input itself. If the input is
+wrapped multiple times, only the outermost wrapper will be removed.
 
 # Examples
 ```jldoctest
@@ -1313,7 +1440,7 @@ much more common case where aliasing does not occur. By default,
 `Base.unaliascopy(A)`.
 """
 unaliascopy(A::Array) = copy(A)
-unaliascopy(A::AbstractArray)::typeof(A) = (@_noinline_meta; _unaliascopy(A, copy(A)))
+unaliascopy(A::AbstractArray)::typeof(A) = (@noinline; _unaliascopy(A, copy(A)))
 _unaliascopy(A::T, C::T) where {T} = C
 _unaliascopy(A, C) = throw(ArgumentError("""
     an array of type `$(typename(typeof(A)).wrapper)` shares memory with another argument
@@ -1367,6 +1494,9 @@ RangeVecIntList{A<:AbstractVector{Int}} = Union{Tuple{Vararg{Union{AbstractRange
 get(A::AbstractArray, i::Integer, default) = checkbounds(Bool, A, i) ? A[i] : default
 get(A::AbstractArray, I::Tuple{}, default) = checkbounds(Bool, A) ? A[] : default
 get(A::AbstractArray, I::Dims, default) = checkbounds(Bool, A, I...) ? A[I...] : default
+get(f::Callable, A::AbstractArray, i::Integer) = checkbounds(Bool, A, i) ? A[i] : f()
+get(f::Callable, A::AbstractArray, I::Tuple{}) = checkbounds(Bool, A) ? A[] : f()
+get(f::Callable, A::AbstractArray, I::Dims) = checkbounds(Bool, A, I...) ? A[I...] : f()
 
 function get!(X::AbstractVector{T}, A::AbstractVector, I::Union{AbstractRange,AbstractVector{Int}}, default::T) where T
     # 1d is not linear indexing
@@ -1424,10 +1554,10 @@ vcat(X::T...) where {T<:Number} = T[ X[i] for i=1:length(X) ]
 hcat(X::T...) where {T}         = T[ X[j] for i=1:1, j=1:length(X) ]
 hcat(X::T...) where {T<:Number} = T[ X[j] for i=1:1, j=1:length(X) ]
 
-vcat(X::Number...) = hvcat_fill(Vector{promote_typeof(X...)}(undef, length(X)), X)
-hcat(X::Number...) = hvcat_fill(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
-typed_vcat(::Type{T}, X::Number...) where {T} = hvcat_fill(Vector{T}(undef, length(X)), X)
-typed_hcat(::Type{T}, X::Number...) where {T} = hvcat_fill(Matrix{T}(undef, 1,length(X)), X)
+vcat(X::Number...) = hvcat_fill!(Vector{promote_typeof(X...)}(undef, length(X)), X)
+hcat(X::Number...) = hvcat_fill!(Matrix{promote_typeof(X...)}(undef, 1,length(X)), X)
+typed_vcat(::Type{T}, X::Number...) where {T} = hvcat_fill!(Vector{T}(undef, length(X)), X)
+typed_hcat(::Type{T}, X::Number...) where {T} = hvcat_fill!(Matrix{T}(undef, 1,length(X)), X)
 
 vcat(V::AbstractVector...) = typed_vcat(promote_eltype(V...), V...)
 vcat(V::AbstractVector{T}...) where {T} = typed_vcat(T, V...)
@@ -1437,12 +1567,11 @@ vcat(V::AbstractVector{T}...) where {T} = typed_vcat(T, V...)
 # but that solution currently fails (see #27188 and #27224)
 AbstractVecOrTuple{T} = Union{AbstractVector{<:T}, Tuple{Vararg{T}}}
 
-function _typed_vcat(::Type{T}, V::AbstractVecOrTuple{AbstractVector}) where T
-    n = 0
-    for Vk in V
-        n += Int(length(Vk))::Int
-    end
-    a = similar(V[1], T, n)
+_typed_vcat_similar(V, ::Type{T}, n) where T = similar(V[1], T, n)
+_typed_vcat(::Type{T}, V::AbstractVecOrTuple{AbstractVector}) where T =
+    _typed_vcat!(_typed_vcat_similar(V, T, sum(map(length, V))), V)
+
+function _typed_vcat!(a::AbstractVector{T}, V::AbstractVecOrTuple{AbstractVector}) where T
     pos = 1
     for k=1:Int(length(V))::Int
         Vk = V[k]
@@ -1531,12 +1660,23 @@ cat_size(A::AbstractArray) = size(A)
 cat_size(A, d) = 1
 cat_size(A::AbstractArray, d) = size(A, d)
 
+cat_length(::Any) = 1
+cat_length(a::AbstractArray) = length(a)
+
+cat_ndims(a) = 0
+cat_ndims(a::AbstractArray) = ndims(a)
+
 cat_indices(A, d) = OneTo(1)
 cat_indices(A::AbstractArray, d) = axes(A, d)
 
-cat_similar(A, T, shape) = Array{T}(undef, shape)
-cat_similar(A::AbstractArray, T, shape) = similar(A, T, shape)
+cat_similar(A, ::Type{T}, shape::Tuple) where T = Array{T}(undef, shape)
+cat_similar(A, ::Type{T}, shape::Vector) where T = Array{T}(undef, shape...)
+cat_similar(A::Array, ::Type{T}, shape::Tuple) where T = Array{T}(undef, shape)
+cat_similar(A::Array, ::Type{T}, shape::Vector) where T = Array{T}(undef, shape...)
+cat_similar(A::AbstractArray, T::Type, shape::Tuple) = similar(A, T, shape)
+cat_similar(A::AbstractArray, T::Type, shape::Vector) = similar(A, T, shape...)
 
+# These are for backwards compatibility (even though internal)
 cat_shape(dims, shape::Tuple{Vararg{Int}}) = shape
 function cat_shape(dims, shapes::Tuple)
     out_shape = ()
@@ -1545,10 +1685,15 @@ function cat_shape(dims, shapes::Tuple)
     end
     return out_shape
 end
+# The new way to compute the shape (more inferrable than combining cat_size & cat_shape, due to Varargs + issue#36454)
+cat_size_shape(dims) = ntuple(zero, Val(length(dims)))
+@inline cat_size_shape(dims, X, tail...) = _cat_size_shape(dims, _cshp(1, dims, (), cat_size(X)), tail...)
+_cat_size_shape(dims, shape) = shape
+@inline _cat_size_shape(dims, shape, X, tail...) = _cat_size_shape(dims, _cshp(1, dims, shape, cat_size(X)), tail...)
 
 _cshp(ndim::Int, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
 _cshp(ndim::Int, ::Tuple{}, ::Tuple{}, nshape) = nshape
-_cshp(ndim::Int, dims, ::Tuple{}, ::Tuple{}) = ntuple(b -> 1, Val(length(dims)))
+_cshp(ndim::Int, dims, ::Tuple{}, ::Tuple{}) = ntuple(Returns(1), Val(length(dims)))
 @inline _cshp(ndim::Int, dims, shape, ::Tuple{}) =
     (shape[1] + dims[1], _cshp(ndim + 1, tail(dims), tail(shape), ())...)
 @inline _cshp(ndim::Int, dims, ::Tuple{}, nshape) =
@@ -1571,59 +1716,59 @@ end
 _cs(d, a, b) = (a == b ? a : throw(DimensionMismatch(
     "mismatch in dimension $d (expected $a got $b)")))
 
-function dims2cat(::Val{n}) where {n}
-    n <= 0 && throw(ArgumentError("cat dimension must be a positive integer, but got $n"))
-    ntuple(i -> (i == n), Val(n))
-end
-
+dims2cat(::Val{dims}) where dims = dims2cat(dims)
 function dims2cat(dims)
-    if any(dims .<= 0)
+    if any(≤(0), dims)
         throw(ArgumentError("All cat dimensions must be positive integers, but got $dims"))
     end
     ntuple(in(dims), maximum(dims))
 end
 
-_cat(dims, X...) = cat_t(promote_eltypeof(X...), X...; dims=dims)
+_cat(dims, X...) = _cat_t(dims, promote_eltypeof(X...), X...)
 
-@inline cat_t(::Type{T}, X...; dims) where {T} = _cat_t(dims, T, X...)
 @inline function _cat_t(dims, ::Type{T}, X...) where {T}
     catdims = dims2cat(dims)
-    shape = cat_shape(catdims, map(cat_size, X)::Tuple{Vararg{Union{Int,Dims}}})::Dims
+    shape = cat_size_shape(catdims, X...)
     A = cat_similar(X[1], T, shape)
     if count(!iszero, catdims)::Int > 1
         fill!(A, zero(T))
     end
     return __cat(A, shape, catdims, X...)
 end
+# this version of `cat_t` is not very kind for inference and so its usage should be avoided,
+# nevertheless it is here just for compat after https://github.com/JuliaLang/julia/pull/45028
+@inline cat_t(::Type{T}, X...; dims) where {T} = _cat_t(dims, T, X...)
 
-function __cat(A, shape::NTuple{M,Int}, catdims, X...) where M
-    N = M::Int
-    offsets = zeros(Int, N)
-    inds = Vector{UnitRange{Int}}(undef, N)
-    concat = copyto!(zeros(Bool, N), catdims)
-    for x in X
-        for i = 1:N
-            if concat[i]
-                inds[i] = offsets[i] .+ cat_indices(x, i)
-                offsets[i] += cat_size(x, i)
-            else
-                inds[i] = 1:shape[i]
-            end
-        end
-        I::NTuple{N, UnitRange{Int}} = (inds...,)
-        if x isa AbstractArray
-            A[I...] = x
-        else
-            fill!(view(A, I...), x)
-        end
+# Why isn't this called `__cat!`?
+__cat(A, shape, catdims, X...) = __cat_offset!(A, shape, catdims, ntuple(zero, length(shape)), X...)
+
+function __cat_offset!(A, shape, catdims, offsets, x, X...)
+    # splitting the "work" on x from X... may reduce latency (fewer costly specializations)
+    newoffsets = __cat_offset1!(A, shape, catdims, offsets, x)
+    return __cat_offset!(A, shape, catdims, newoffsets, X...)
+end
+__cat_offset!(A, shape, catdims, offsets) = A
+
+function __cat_offset1!(A, shape, catdims, offsets, x)
+    inds = ntuple(length(offsets)) do i
+        (i <= length(catdims) && catdims[i]) ? offsets[i] .+ cat_indices(x, i) : 1:shape[i]
     end
-    return A
+    if x isa AbstractArray
+        A[inds...] = x
+    else
+        fill!(view(A, inds...), x)
+    end
+    newoffsets = ntuple(length(offsets)) do i
+        (i <= length(catdims) && catdims[i]) ? offsets[i] + cat_size(x, i) : offsets[i]
+    end
+    return newoffsets
 end
 
 """
     vcat(A...)
 
-Concatenate along dimension 1.
+Concatenate along dimension 1. To efficiently concatenate a large vector of arrays,
+use `reduce(vcat, x)`.
 
 # Examples
 ```jldoctest
@@ -1649,13 +1794,29 @@ julia> vcat(c...)
 2×3 Matrix{Int64}:
  1  2  3
  4  5  6
+
+julia> vs = [[1, 2], [3, 4], [5, 6]]
+3-element Vector{Vector{Int64}}:
+ [1, 2]
+ [3, 4]
+ [5, 6]
+
+julia> reduce(vcat, vs)
+6-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
 ```
 """
 vcat(X...) = cat(X...; dims=Val(1))
 """
     hcat(A...)
 
-Concatenate along dimension 2.
+Concatenate along dimension 2. To efficiently concatenate a large vector of arrays,
+use `reduce(hcat, x)`.
 
 # Examples
 ```jldoctest
@@ -1700,15 +1861,26 @@ julia> hcat(x, [1; 2; 3])
  1
  2
  3
+
+julia> vs = [[1, 2], [3, 4], [5, 6]]
+3-element Vector{Vector{Int64}}:
+ [1, 2]
+ [3, 4]
+ [5, 6]
+
+julia> reduce(hcat, vs)
+2×3 Matrix{Int64}:
+ 1  3  5
+ 2  4  6
 ```
 """
 hcat(X...) = cat(X...; dims=Val(2))
 
-typed_vcat(::Type{T}, X...) where T = cat_t(T, X...; dims=Val(1))
-typed_hcat(::Type{T}, X...) where T = cat_t(T, X...; dims=Val(2))
+typed_vcat(::Type{T}, X...) where T = _cat_t(Val(1), T, X...)
+typed_hcat(::Type{T}, X...) where T = _cat_t(Val(2), T, X...)
 
 """
-    cat(A...; dims=dims)
+    cat(A...; dims)
 
 Concatenate the input arrays along the specified dimensions in the iterable `dims`. For
 dimensions not in `dims`, all input arrays should have the same size, which will also be the
@@ -1721,9 +1893,28 @@ dimensions for every new input array and putting zero blocks elsewhere. For exam
 `cat(matrices...; dims=(1,2))` builds a block diagonal matrix, i.e. a block matrix with
 `matrices[1]`, `matrices[2]`, ... as diagonal blocks and matching zero blocks away from the
 diagonal.
+
+See also [`hcat`](@ref), [`vcat`](@ref), [`hvcat`](@ref), [`repeat`](@ref).
+
+# Examples
+```jldoctest
+julia> cat([1 2; 3 4], [pi, pi], fill(10, 2,3,1); dims=2)
+2×6×1 Array{Float64, 3}:
+[:, :, 1] =
+ 1.0  2.0  3.14159  10.0  10.0  10.0
+ 3.0  4.0  3.14159  10.0  10.0  10.0
+
+julia> cat(true, trues(2,2), trues(4)', dims=(1,2))
+4×7 Matrix{Bool}:
+ 1  0  0  0  0  0  0
+ 0  1  1  0  0  0  0
+ 0  1  1  0  0  0  0
+ 0  0  0  1  1  1  1
+```
 """
 @inline cat(A...; dims) = _cat(dims, A...)
-_cat(catdims, A::AbstractArray{T}...) where {T} = cat_t(T, A...; dims=catdims)
+# `@constprop :aggressive` allows `catdims` to be propagated as constant improving return type inference
+@constprop :aggressive _cat(catdims, A::AbstractArray{T}...) where {T} = _cat_t(catdims, T, A...)
 
 # The specializations for 1 and 2 inputs are important
 # especially when running with --inline=no, see #11158
@@ -1734,22 +1925,26 @@ hcat(A::AbstractArray) = cat(A; dims=Val(2))
 hcat(A::AbstractArray, B::AbstractArray) = cat(A, B; dims=Val(2))
 hcat(A::AbstractArray...) = cat(A...; dims=Val(2))
 
-typed_vcat(T::Type, A::AbstractArray) = cat_t(T, A; dims=Val(1))
-typed_vcat(T::Type, A::AbstractArray, B::AbstractArray) = cat_t(T, A, B; dims=Val(1))
-typed_vcat(T::Type, A::AbstractArray...) = cat_t(T, A...; dims=Val(1))
-typed_hcat(T::Type, A::AbstractArray) = cat_t(T, A; dims=Val(2))
-typed_hcat(T::Type, A::AbstractArray, B::AbstractArray) = cat_t(T, A, B; dims=Val(2))
-typed_hcat(T::Type, A::AbstractArray...) = cat_t(T, A...; dims=Val(2))
+typed_vcat(T::Type, A::AbstractArray) = _cat_t(Val(1), T, A)
+typed_vcat(T::Type, A::AbstractArray, B::AbstractArray) = _cat_t(Val(1), T, A, B)
+typed_vcat(T::Type, A::AbstractArray...) = _cat_t(Val(1), T, A...)
+typed_hcat(T::Type, A::AbstractArray) = _cat_t(Val(2), T, A)
+typed_hcat(T::Type, A::AbstractArray, B::AbstractArray) = _cat_t(Val(2), T, A, B)
+typed_hcat(T::Type, A::AbstractArray...) = _cat_t(Val(2), T, A...)
 
 # 2d horizontal and vertical concatenation
 
+# these are produced in lowering if splatting occurs inside hvcat
+hvcat_rows(rows::Tuple...) = hvcat(map(length, rows), (rows...)...)
+typed_hvcat_rows(T::Type, rows::Tuple...) = typed_hvcat(T, map(length, rows), (rows...)...)
+
 function hvcat(nbc::Integer, as...)
     # nbc = # of block columns
     n = length(as)
     mod(n,nbc) != 0 &&
         throw(ArgumentError("number of arrays $n is not a multiple of the requested number of block columns $nbc"))
     nbr = div(n,nbc)
-    hvcat(ntuple(i->nbc, nbr), as...)
+    hvcat(ntuple(Returns(nbc), nbr), as...)
 end
 
 """
@@ -1774,7 +1969,7 @@ julia> hvcat((3,3), a,b,c,d,e,f)
  1  2  3
  4  5  6
 
-julia> [a b;c d; e f]
+julia> [a b; c d; e f]
 3×2 Matrix{Int64}:
  1  2
  3  4
@@ -1860,9 +2055,13 @@ function hvcat(rows::Tuple{Vararg{Int}}, xs::T...) where T<:Number
     a
 end
 
-function hvcat_fill(a::Array, xs::Tuple)
-    k = 1
+function hvcat_fill!(a::Array, xs::Tuple)
     nr, nc = size(a,1), size(a,2)
+    len = length(xs)
+    if nr*nc != len
+        throw(ArgumentError("argument count $(len) does not match specified shape $((nr,nc))"))
+    end
+    k = 1
     for i=1:nr
         @inbounds for j=1:nc
             a[i,j] = xs[k]
@@ -1883,11 +2082,7 @@ function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, xs::Number...) where T
             throw(ArgumentError("row $(i) has mismatched number of columns (expected $nc, got $(rows[i]))"))
         end
     end
-    len = length(xs)
-    if nr*nc != len
-        throw(ArgumentError("argument count $(len) does not match specified shape $((nr,nc))"))
-    end
-    hvcat_fill(Matrix{T}(undef, nr, nc), xs)
+    hvcat_fill!(Matrix{T}(undef, nr, nc), xs)
 end
 
 function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T
@@ -1901,6 +2096,486 @@ function typed_hvcat(::Type{T}, rows::Tuple{Vararg{Int}}, as...) where T
     T[rs...;]
 end
 
+## N-dimensional concatenation ##
+
+"""
+    hvncat(dim::Int, row_first, values...)
+    hvncat(dims::Tuple{Vararg{Int}}, row_first, values...)
+    hvncat(shape::Tuple{Vararg{Tuple}}, row_first, values...)
+
+Horizontal, vertical, and n-dimensional concatenation of many `values` in one call.
+
+This function is called for block matrix syntax. The first argument either specifies the
+shape of the concatenation, similar to `hvcat`, as a tuple of tuples, or the dimensions that
+specify the key number of elements along each axis, and is used to determine the output
+dimensions. The `dims` form is more performant, and is used by default when the concatenation
+operation has the same number of elements along each axis (e.g., [a b; c d;;; e f ; g h]).
+The `shape` form is used when the number of elements along each axis is unbalanced
+(e.g., [a b ; c]). Unbalanced syntax needs additional validation overhead. The `dim` form
+is an optimization for concatenation along just one dimension. `row_first` indicates how
+`values` are ordered. The meaning of the first and second elements of `shape` are also
+swapped based on `row_first`.
+
+# Examples
+```jldoctest
+julia> a, b, c, d, e, f = 1, 2, 3, 4, 5, 6
+(1, 2, 3, 4, 5, 6)
+
+julia> [a b c;;; d e f]
+1×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+
+[:, :, 2] =
+ 4  5  6
+
+julia> hvncat((2,1,3), false, a,b,c,d,e,f)
+2×1×3 Array{Int64, 3}:
+[:, :, 1] =
+ 1
+ 2
+
+[:, :, 2] =
+ 3
+ 4
+
+[:, :, 3] =
+ 5
+ 6
+
+julia> [a b;;; c d;;; e f]
+1×2×3 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2
+
+[:, :, 2] =
+ 3  4
+
+[:, :, 3] =
+ 5  6
+
+julia> hvncat(((3, 3), (3, 3), (6,)), true, a, b, c, d, e, f)
+1×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  2  3
+
+[:, :, 2] =
+ 4  5  6
+```
+
+
+# Examples for construction of the arguments:
+```julia
+[a b c ; d e f ;;;
+ g h i ; j k l ;;;
+ m n o ; p q r ;;;
+ s t u ; v w x]
+=> dims = (2, 3, 4)
+
+[a b ; c ;;; d ;;;;]
+ ___   _     _
+ 2     1     1 = elements in each row (2, 1, 1)
+ _______     _
+ 3           1 = elements in each column (3, 1)
+ _____________
+ 4             = elements in each 3d slice (4,)
+ _____________
+ 4             = elements in each 4d slice (4,)
+ => shape = ((2, 1, 1), (3, 1), (4,), (4,)) with `rowfirst` = true
+```
+"""
+hvncat(dimsshape::Tuple, row_first::Bool, xs...) = _hvncat(dimsshape, row_first, xs...)
+hvncat(dim::Int, xs...) = _hvncat(dim, true, xs...)
+
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool) = _typed_hvncat(Any, dimsshape, row_first)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs...) = _typed_hvncat(promote_eltypeof(xs...), dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::T...) where T<:Number = _typed_hvncat(T, dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::Number...) = _typed_hvncat(promote_typeof(xs...), dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::AbstractArray...) = _typed_hvncat(promote_eltype(xs...), dimsshape, row_first, xs...)
+_hvncat(dimsshape::Union{Tuple, Int}, row_first::Bool, xs::AbstractArray{T}...) where T = _typed_hvncat(T, dimsshape, row_first, xs...)
+
+
+typed_hvncat(T::Type, dimsshape::Tuple, row_first::Bool, xs...) = _typed_hvncat(T, dimsshape, row_first, xs...)
+typed_hvncat(T::Type, dim::Int, xs...) = _typed_hvncat(T, Val(dim), xs...)
+
+# 1-dimensional hvncat methods
+
+_typed_hvncat(::Type, ::Val{0}) = _typed_hvncat_0d_only_one()
+_typed_hvncat(T::Type, ::Val{0}, x) = fill(convert(T, x))
+_typed_hvncat(T::Type, ::Val{0}, x::Number) = fill(convert(T, x))
+_typed_hvncat(T::Type, ::Val{0}, x::AbstractArray) = convert.(T, x)
+_typed_hvncat(::Type, ::Val{0}, ::Any...) = _typed_hvncat_0d_only_one()
+_typed_hvncat(::Type, ::Val{0}, ::Number...) = _typed_hvncat_0d_only_one()
+_typed_hvncat(::Type, ::Val{0}, ::AbstractArray...) = _typed_hvncat_0d_only_one()
+
+_typed_hvncat_0d_only_one() =
+    throw(ArgumentError("a 0-dimensional array may only contain exactly one element"))
+
+_typed_hvncat(T::Type, dim::Int, ::Bool, xs...) = _typed_hvncat(T, Val(dim), xs...) # catches from _hvncat type promoters
+
+function _typed_hvncat(::Type{T}, ::Val{N}) where {T, N}
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    return Array{T, N}(undef, ntuple(x -> 0, Val(N)))
+end
+
+function _typed_hvncat(T::Type, ::Val{N}, xs::Number...) where N
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    A = cat_similar(xs[1], T, (ntuple(x -> 1, Val(N - 1))..., length(xs)))
+    hvncat_fill!(A, false, xs)
+    return A
+end
+
+function _typed_hvncat(::Type{T}, ::Val{N}, as::AbstractArray...) where {T, N}
+    # optimization for arrays that can be concatenated by copying them linearly into the destination
+    # conditions: the elements must all have 1-length dimensions above N
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    for a ∈ as
+        ndims(a) <= N || all(x -> size(a, x) == 1, (N + 1):ndims(a)) ||
+            return _typed_hvncat(T, (ntuple(x -> 1, Val(N - 1))..., length(as), 1), false, as...)
+            # the extra 1 is to avoid an infinite cycle
+    end
+
+    nd = N
+
+    Ndim = 0
+    for i ∈ eachindex(as)
+        Ndim += cat_size(as[i], N)
+        nd = max(nd, cat_ndims(as[i]))
+        for d ∈ 1:N - 1
+            cat_size(as[1], d) == cat_size(as[i], d) || throw(ArgumentError("mismatched size along axis $d in element $i"))
+        end
+    end
+
+    A = cat_similar(as[1], T, (ntuple(d -> size(as[1], d), N - 1)..., Ndim, ntuple(x -> 1, nd - N)...))
+    k = 1
+    for a ∈ as
+        for i ∈ eachindex(a)
+            A[k] = a[i]
+            k += 1
+        end
+    end
+    return A
+end
+
+function _typed_hvncat(::Type{T}, ::Val{N}, as...) where {T, N}
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    N < 0 &&
+        throw(ArgumentError("concatenation dimension must be nonnegative"))
+    nd = N
+    Ndim = 0
+    for i ∈ eachindex(as)
+        Ndim += cat_size(as[i], N)
+        nd = max(nd, cat_ndims(as[i]))
+        for d ∈ 1:N-1
+            cat_size(as[i], d) == 1 ||
+                throw(ArgumentError("all dimensions of element $i other than $N must be of length 1"))
+        end
+    end
+
+    A = Array{T, nd}(undef, ntuple(x -> 1, Val(N - 1))..., Ndim, ntuple(x -> 1, nd - N)...)
+
+    k = 1
+    for a ∈ as
+        if a isa AbstractArray
+            lena = length(a)
+            copyto!(A, k, a, 1, lena)
+            k += lena
+        else
+            A[k] = a
+            k += 1
+        end
+    end
+    return A
+end
+
+# 0-dimensional cases for balanced and unbalanced hvncat method
+
+_typed_hvncat(T::Type, ::Tuple{}, ::Bool, x...) = _typed_hvncat(T, Val(0), x...)
+_typed_hvncat(T::Type, ::Tuple{}, ::Bool, x::Number...) = _typed_hvncat(T, Val(0), x...)
+
+
+# balanced dimensions hvncat methods
+
+_typed_hvncat(T::Type, dims::Tuple{Int}, ::Bool, as...) = _typed_hvncat_1d(T, dims[1], Val(false), as...)
+_typed_hvncat(T::Type, dims::Tuple{Int}, ::Bool, as::Number...) = _typed_hvncat_1d(T, dims[1], Val(false), as...)
+
+function _typed_hvncat_1d(::Type{T}, ds::Int, ::Val{row_first}, as...) where {T, row_first}
+    lengthas = length(as)
+    ds > 0 ||
+        throw(ArgumentError("`dimsshape` argument must consist of positive integers"))
+    lengthas == ds ||
+        throw(ArgumentError("number of elements does not match `dimshape` argument; expected $ds, got $lengthas"))
+    if row_first
+        return _typed_hvncat(T, Val(2), as...)
+    else
+        return _typed_hvncat(T, Val(1), as...)
+    end
+end
+
+function _typed_hvncat(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, xs::Number...) where {T, N}
+    all(>(0), dims) ||
+        throw(ArgumentError("`dims` argument must contain positive integers"))
+    A = Array{T, N}(undef, dims...)
+    lengtha = length(A)  # Necessary to store result because throw blocks are being deoptimized right now, which leads to excessive allocations
+    lengthx = length(xs) # Cuts from 3 allocations to 1.
+    if lengtha != lengthx
+       throw(ArgumentError("argument count does not match specified shape (expected $lengtha, got $lengthx)"))
+    end
+    hvncat_fill!(A, row_first, xs)
+    return A
+end
+
+function hvncat_fill!(A::Array, row_first::Bool, xs::Tuple)
+    # putting these in separate functions leads to unnecessary allocations
+    if row_first
+        nr, nc = size(A, 1), size(A, 2)
+        nrc = nr * nc
+        na = prod(size(A)[3:end])
+        k = 1
+        for d ∈ 1:na
+            dd = nrc * (d - 1)
+            for i ∈ 1:nr
+                Ai = dd + i
+                for j ∈ 1:nc
+                    A[Ai] = xs[k]
+                    k += 1
+                    Ai += nr
+                end
+            end
+        end
+    else
+        for k ∈ eachindex(xs)
+            A[k] = xs[k]
+        end
+    end
+end
+
+function _typed_hvncat(T::Type, dims::NTuple{N, Int}, row_first::Bool, as...) where {N}
+    # function barrier after calculating the max is necessary for high performance
+    nd = max(maximum(cat_ndims(a) for a ∈ as), N)
+    return _typed_hvncat_dims(T, (dims..., ntuple(x -> 1, nd - N)...), row_first, as)
+end
+
+function _typed_hvncat_dims(::Type{T}, dims::NTuple{N, Int}, row_first::Bool, as::Tuple) where {T, N}
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    all(>(0), dims) ||
+        throw(ArgumentError("`dims` argument must contain positive integers"))
+
+    d1 = row_first ? 2 : 1
+    d2 = row_first ? 1 : 2
+
+    outdims = zeros(Int, N)
+
+    # validate shapes for lowest level of concatenation
+    d = findfirst(>(1), dims)
+    if d !== nothing # all dims are 1
+        nblocks = length(as) ÷ dims[d]
+        for b ∈ 1:nblocks
+            offset = ((b - 1) * dims[d])
+            startelementi = offset + 1
+            for i ∈ offset .+ (2:dims[d])
+                for dd ∈ 1:N
+                    dd == d && continue
+                    if size(as[startelementi], dd) != size(as[i], dd)
+                        throw(ArgumentError("incompatible shape in element $i"))
+                    end
+                end
+            end
+        end
+    end
+
+    # discover number of rows or columns
+    for i ∈ 1:dims[d1]
+        outdims[d1] += cat_size(as[i], d1)
+    end
+
+    currentdims = zeros(Int, N)
+    blockcount = 0
+    elementcount = 0
+    for i ∈ eachindex(as)
+        elementcount += cat_length(as[i])
+        currentdims[d1] += cat_size(as[i], d1)
+        if currentdims[d1] == outdims[d1]
+            currentdims[d1] = 0
+            for d ∈ (d2, 3:N...)
+                currentdims[d] += cat_size(as[i], d)
+                if outdims[d] == 0 # unfixed dimension
+                    blockcount += 1
+                    if blockcount == dims[d]
+                        outdims[d] = currentdims[d]
+                        currentdims[d] = 0
+                        blockcount = 0
+                    else
+                        break
+                    end
+                else # fixed dimension
+                    if currentdims[d] == outdims[d] # end of dimension
+                        currentdims[d] = 0
+                    elseif currentdims[d] < outdims[d] # dimension in progress
+                        break
+                    else # exceeded dimension
+                        throw(ArgumentError("argument $i has too many elements along axis $d"))
+                    end
+                end
+            end
+        elseif currentdims[d1] > outdims[d1] # exceeded dimension
+            throw(ArgumentError("argument $i has too many elements along axis $d1"))
+        end
+    end
+
+    outlen = prod(outdims)
+    elementcount == outlen ||
+        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+
+    # copy into final array
+    A = cat_similar(as[1], T, outdims)
+    # @assert all(==(0), currentdims)
+    outdims .= 0
+    hvncat_fill!(A, currentdims, outdims, d1, d2, as)
+    return A
+end
+
+
+# unbalanced dimensions hvncat methods
+
+function _typed_hvncat(T::Type, shape::Tuple{Tuple}, row_first::Bool, xs...)
+    length(shape[1]) > 0 ||
+        throw(ArgumentError("each level of `shape` argument must have at least one value"))
+    return _typed_hvncat_1d(T, shape[1][1], Val(row_first), xs...)
+end
+
+function _typed_hvncat(T::Type, shape::NTuple{N, Tuple}, row_first::Bool, as...) where {N}
+    # function barrier after calculating the max is necessary for high performance
+    nd = max(maximum(cat_ndims(a) for a ∈ as), N)
+    return _typed_hvncat_shape(T, (shape..., ntuple(x -> shape[end], nd - N)...), row_first, as)
+end
+
+function _typed_hvncat_shape(::Type{T}, shape::NTuple{N, Tuple}, row_first, as::Tuple) where {T, N}
+    length(as) > 0 ||
+        throw(ArgumentError("must have at least one element"))
+    all(>(0), tuple((shape...)...)) ||
+        throw(ArgumentError("`shape` argument must consist of positive integers"))
+
+    d1 = row_first ? 2 : 1
+    d2 = row_first ? 1 : 2
+
+    shapev = collect(shape) # saves allocations later
+    all(!isempty, shapev) ||
+        throw(ArgumentError("each level of `shape` argument must have at least one value"))
+    length(shapev[end]) == 1 ||
+        throw(ArgumentError("last level of shape must contain only one integer"))
+    shapelength = shapev[end][1]
+    lengthas = length(as)
+    shapelength == lengthas || throw(ArgumentError("number of elements does not match shape; expected $(shapelength), got $lengthas)"))
+    # discover dimensions
+    nd = max(N, cat_ndims(as[1]))
+    outdims = zeros(Int, nd)
+    currentdims = zeros(Int, nd)
+    blockcounts = zeros(Int, nd)
+    shapepos = ones(Int, nd)
+
+    elementcount = 0
+    for i ∈ eachindex(as)
+        elementcount += cat_length(as[i])
+        wasstartblock = false
+        for d ∈ 1:N
+            ad = (d < 3 && row_first) ? (d == 1 ? 2 : 1) : d
+            dsize = cat_size(as[i], ad)
+            blockcounts[d] += 1
+
+            if d == 1 || i == 1 || wasstartblock
+                currentdims[d] += dsize
+            elseif dsize != cat_size(as[i - 1], ad)
+                throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
+                                    expected $(cat_size(as[i - 1], ad)), got $dsize"))
+            end
+
+            wasstartblock = blockcounts[d] == 1 # remember for next dimension
+
+            isendblock = blockcounts[d] == shapev[d][shapepos[d]]
+            if isendblock
+                if outdims[d] == 0
+                    outdims[d] = currentdims[d]
+                elseif outdims[d] != currentdims[d]
+                    throw(ArgumentError("argument $i has a mismatched number of elements along axis $ad; \
+                                        expected $(abs(outdims[d] - (currentdims[d] - dsize))), got $dsize"))
+                end
+                currentdims[d] = 0
+                blockcounts[d] = 0
+                shapepos[d] += 1
+                d > 1 && (blockcounts[d - 1] == 0 ||
+                    throw(ArgumentError("shape in level $d is inconsistent; level counts must nest \
+                                        evenly into each other")))
+            end
+        end
+    end
+
+    outlen = prod(outdims)
+    elementcount == outlen ||
+        throw(ArgumentError("mismatched number of elements; expected $(outlen), got $(elementcount)"))
+
+    if row_first
+        outdims[1], outdims[2] = outdims[2], outdims[1]
+    end
+
+    # @assert all(==(0), currentdims)
+    # @assert all(==(0), blockcounts)
+
+    # copy into final array
+    A = cat_similar(as[1], T, outdims)
+    hvncat_fill!(A, currentdims, blockcounts, d1, d2, as)
+    return A
+end
+
+function hvncat_fill!(A::AbstractArray{T, N}, scratch1::Vector{Int}, scratch2::Vector{Int}, d1::Int, d2::Int, as::Tuple{Vararg}) where {T, N}
+    outdims = size(A)
+    offsets = scratch1
+    inneroffsets = scratch2
+    for a ∈ as
+        if isa(a, AbstractArray)
+            for ai ∈ a
+                Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
+                A[Ai] = ai
+
+                for j ∈ 1:N
+                    inneroffsets[j] += 1
+                    inneroffsets[j] < cat_size(a, j) && break
+                    inneroffsets[j] = 0
+                end
+            end
+        else
+            Ai = hvncat_calcindex(offsets, inneroffsets, outdims, N)
+            A[Ai] = a
+        end
+
+        for j ∈ (d1, d2, 3:N...)
+            offsets[j] += cat_size(a, j)
+            offsets[j] < outdims[j] && break
+            offsets[j] = 0
+        end
+    end
+end
+
+@propagate_inbounds function hvncat_calcindex(offsets::Vector{Int}, inneroffsets::Vector{Int},
+                                              outdims::Tuple{Vararg{Int}}, nd::Int)
+    Ai = inneroffsets[1] + offsets[1] + 1
+    for j ∈ 2:nd
+        increment = inneroffsets[j] + offsets[j]
+        for k ∈ 1:j-1
+            increment *= outdims[k]
+        end
+        Ai += increment
+    end
+    Ai
+end
+
 ## Reductions and accumulates ##
 
 function isequal(A::AbstractArray, B::AbstractArray)
@@ -1925,6 +2600,11 @@ function cmp(A::AbstractVector, B::AbstractVector)
     return cmp(length(A), length(B))
 end
 
+"""
+    isless(A::AbstractVector, B::AbstractVector)
+
+Returns true when `A` is less than `B` in lexicographic order.
+"""
 isless(A::AbstractVector, B::AbstractVector) = cmp(A, B) < 0
 
 function (==)(A::AbstractArray, B::AbstractArray)
@@ -1946,12 +2626,12 @@ end
 # _sub2ind and _ind2sub
 # fallbacks
 function _sub2ind(A::AbstractArray, I...)
-    @_inline_meta
+    @inline
     _sub2ind(axes(A), I...)
 end
 
 function _ind2sub(A::AbstractArray, ind)
-    @_inline_meta
+    @inline
     _ind2sub(axes(A), ind)
 end
 
@@ -1959,49 +2639,49 @@ end
 _sub2ind(::Tuple{}) = 1
 _sub2ind(::DimsInteger) = 1
 _sub2ind(::Indices) = 1
-_sub2ind(::Tuple{}, I::Integer...) = (@_inline_meta; _sub2ind_recurse((), 1, 1, I...))
+_sub2ind(::Tuple{}, I::Integer...) = (@inline; _sub2ind_recurse((), 1, 1, I...))
 
 # Generic cases
-_sub2ind(dims::DimsInteger, I::Integer...) = (@_inline_meta; _sub2ind_recurse(dims, 1, 1, I...))
-_sub2ind(inds::Indices, I::Integer...) = (@_inline_meta; _sub2ind_recurse(inds, 1, 1, I...))
+_sub2ind(dims::DimsInteger, I::Integer...) = (@inline; _sub2ind_recurse(dims, 1, 1, I...))
+_sub2ind(inds::Indices, I::Integer...) = (@inline; _sub2ind_recurse(inds, 1, 1, I...))
 # In 1d, there's a question of whether we're doing cartesian indexing
 # or linear indexing. Support only the former.
 _sub2ind(inds::Indices{1}, I::Integer...) =
     throw(ArgumentError("Linear indexing is not defined for one-dimensional arrays"))
-_sub2ind(inds::Tuple{OneTo}, I::Integer...) = (@_inline_meta; _sub2ind_recurse(inds, 1, 1, I...)) # only OneTo is safe
+_sub2ind(inds::Tuple{OneTo}, I::Integer...) = (@inline; _sub2ind_recurse(inds, 1, 1, I...)) # only OneTo is safe
 _sub2ind(inds::Tuple{OneTo}, i::Integer)    = i
 
 _sub2ind_recurse(::Any, L, ind) = ind
 function _sub2ind_recurse(::Tuple{}, L, ind, i::Integer, I::Integer...)
-    @_inline_meta
+    @inline
     _sub2ind_recurse((), L, ind+(i-1)*L, I...)
 end
 function _sub2ind_recurse(inds, L, ind, i::Integer, I::Integer...)
-    @_inline_meta
+    @inline
     r1 = inds[1]
     _sub2ind_recurse(tail(inds), nextL(L, r1), ind+offsetin(i, r1)*L, I...)
 end
 
 nextL(L, l::Integer) = L*l
-nextL(L, r::AbstractUnitRange) = L*unsafe_length(r)
-nextL(L, r::Slice) = L*unsafe_length(r.indices)
+nextL(L, r::AbstractUnitRange) = L*length(r)
+nextL(L, r::Slice) = L*length(r.indices)
 offsetin(i, l::Integer) = i-1
 offsetin(i, r::AbstractUnitRange) = i-first(r)
 
-_ind2sub(::Tuple{}, ind::Integer) = (@_inline_meta; ind == 1 ? () : throw(BoundsError()))
-_ind2sub(dims::DimsInteger, ind::Integer) = (@_inline_meta; _ind2sub_recurse(dims, ind-1))
-_ind2sub(inds::Indices, ind::Integer)     = (@_inline_meta; _ind2sub_recurse(inds, ind-1))
+_ind2sub(::Tuple{}, ind::Integer) = (@inline; ind == 1 ? () : throw(BoundsError()))
+_ind2sub(dims::DimsInteger, ind::Integer) = (@inline; _ind2sub_recurse(dims, ind-1))
+_ind2sub(inds::Indices, ind::Integer)     = (@inline; _ind2sub_recurse(inds, ind-1))
 _ind2sub(inds::Indices{1}, ind::Integer) =
     throw(ArgumentError("Linear indexing is not defined for one-dimensional arrays"))
 _ind2sub(inds::Tuple{OneTo}, ind::Integer) = (ind,)
 
 _ind2sub_recurse(::Tuple{}, ind) = (ind+1,)
 function _ind2sub_recurse(indslast::NTuple{1}, ind)
-    @_inline_meta
+    @inline
     (_lookup(ind, indslast[1]),)
 end
 function _ind2sub_recurse(inds, ind)
-    @_inline_meta
+    @inline
     r1 = inds[1]
     indnext, f, l = _div(ind, r1)
     (ind-l*indnext+f, _ind2sub_recurse(tail(inds), indnext)...)
@@ -2010,7 +2690,7 @@ end
 _lookup(ind, d::Integer) = ind+1
 _lookup(ind, r::AbstractUnitRange) = ind+first(r)
 _div(ind, d::Integer) = div(ind, d), 1, d
-_div(ind, r::AbstractUnitRange) = (d = unsafe_length(r); (div(ind, d), first(r), d))
+_div(ind, r::AbstractUnitRange) = (d = length(r); (div(ind, d), first(r), d))
 
 # Vectorized forms
 function _sub2ind(inds::Indices{1}, I1::AbstractVector{T}, I::AbstractVector{T}...) where T<:Integer
@@ -2032,7 +2712,7 @@ function _sub2ind_vecs(inds, I::AbstractVector...)
 end
 
 function _sub2ind!(Iout, inds, Iinds, I)
-    @_noinline_meta
+    @noinline
     for i in Iinds
         # Iout[i] = _sub2ind(inds, map(Ij -> Ij[i], I)...)
         Iout[i] = sub2ind_vec(inds, i, I)
@@ -2040,8 +2720,8 @@ function _sub2ind!(Iout, inds, Iinds, I)
     Iout
 end
 
-sub2ind_vec(inds, i, I) = (@_inline_meta; _sub2ind(inds, _sub2ind_vec(i, I...)...))
-_sub2ind_vec(i, I1, I...) = (@_inline_meta; (I1[i], _sub2ind_vec(i, I...)...))
+sub2ind_vec(inds, i, I) = (@inline; _sub2ind(inds, _sub2ind_vec(i, I...)...))
+_sub2ind_vec(i, I1, I...) = (@inline; (I1[i], _sub2ind_vec(i, I...)...))
 _sub2ind_vec(i) = ()
 
 function _ind2sub(inds::Union{DimsInteger{N},Indices{N}}, ind::AbstractVector{<:Integer}) where N
@@ -2062,18 +2742,28 @@ end
     foreach(f, c...) -> Nothing
 
 Call function `f` on each element of iterable `c`.
-For multiple iterable arguments, `f` is called elementwise.
-`foreach` should be used instead of `map` when the results of `f` are not
+For multiple iterable arguments, `f` is called elementwise, and iteration stops when
+any iterator is finished.
+
+`foreach` should be used instead of [`map`](@ref) when the results of `f` are not
 needed, for example in `foreach(println, array)`.
 
 # Examples
 ```jldoctest
-julia> a = 1:3:7;
+julia> tri = 1:3:7; res = Int[];
 
-julia> foreach(x -> println(x^2), a)
-1
-16
-49
+julia> foreach(x -> push!(res, x^2), tri)
+
+julia> res
+3-element Vector{$(Int)}:
+  1
+ 16
+ 49
+
+julia> foreach((x, y) -> println(x, " with ", y), tri, 'a':'z')
+1 with a
+4 with b
+7 with c
 ```
 """
 foreach(f) = (f(); nothing)
@@ -2094,6 +2784,8 @@ colons go in this expression. The results are concatenated along the remaining d
 For example, if `dims` is `[1,2]` and `A` is 4-dimensional, `f` is called on `A[:,:,i,j]`
 for all `i` and `j`.
 
+See also [`eachcol`](@ref), [`eachslice`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = reshape(Vector(1:16),(2,2,2,2))
@@ -2172,9 +2864,9 @@ function mapslices(f, A::AbstractArray; dims)
     end
     nextra = max(0, length(dims)-ndims(r1))
     if eltype(Rsize) == Int
-        Rsize[dims] = [size(r1)..., ntuple(d->1, nextra)...]
+        Rsize[dims] = [size(r1)..., ntuple(Returns(1), nextra)...]
     else
-        Rsize[dims] = [axes(r1)..., ntuple(d->OneTo(1), nextra)...]
+        Rsize[dims] = [axes(r1)..., ntuple(Returns(OneTo(1)), nextra)...]
     end
     R = similar(r1, tuple(Rsize...,))
 
@@ -2219,6 +2911,10 @@ end
 concatenate_setindex!(R, v, I...) = (R[I...] .= (v,); R)
 concatenate_setindex!(R, X::AbstractArray, I...) = (R[I...] = X)
 
+## 0 arguments
+
+map(f) = f()
+
 ## 1 argument
 
 function map!(f::F, dest::AbstractArray, A::AbstractArray) where F
@@ -2235,14 +2931,13 @@ map(f, A::AbstractArray) = collect_similar(A, Generator(f,A))
 mapany(f, A::AbstractArray) = map!(f, Vector{Any}(undef, length(A)), A)
 mapany(f, itr) = Any[f(x) for x in itr]
 
-# default to returning an Array for `map` on general iterators
 """
     map(f, c...) -> collection
 
 Transform collection `c` by applying `f` to each element. For multiple collection arguments,
-apply `f` elementwise.
+apply `f` elementwise, and stop when when any of them is exhausted.
 
-See also: [`mapslices`](@ref)
+See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).
 
 # Examples
 ```jldoctest
@@ -2252,14 +2947,14 @@ julia> map(x -> x * 2, [1, 2, 3])
  4
  6
 
-julia> map(+, [1, 2, 3], [10, 20, 30])
+julia> map(+, [1, 2, 3], [10, 20, 30, 400, 5000])
 3-element Vector{Int64}:
  11
  22
  33
 ```
 """
-map(f, A) = collect(Generator(f,A))
+map(f, A) = collect(Generator(f,A)) # default to returning an Array for `map` on general iterators
 
 map(f, ::AbstractDict) = error("map is not defined on dictionaries")
 map(f, ::AbstractSet) = error("map is not defined on sets")
@@ -2297,7 +2992,9 @@ end
     map!(function, destination, collection...)
 
 Like [`map`](@ref), but stores the result in `destination` rather than a new
-collection. `destination` must be at least as large as the first collection.
+collection. `destination` must be at least as large as the smallest collection.
+
+See also: [`map`](@ref), [`foreach`](@ref), [`zip`](@ref), [`copyto!`](@ref).
 
 # Examples
 ```jldoctest
@@ -2310,6 +3007,14 @@ julia> a
  2.0
  4.0
  6.0
+
+julia> map!(+, zeros(Int, 5), 100:999, 1:3)
+5-element Vector{$(Int)}:
+ 101
+ 103
+ 105
+   0
+   0
 ```
 """
 function map!(f::F, dest::AbstractArray, As::AbstractArray...) where {F}
@@ -2318,7 +3023,31 @@ function map!(f::F, dest::AbstractArray, As::AbstractArray...) where {F}
     map_n!(f, dest, As)
 end
 
-map(f) = f()
+"""
+    map(f, A::AbstractArray...) -> N-array
+
+When acting on multi-dimensional arrays of the same [`ndims`](@ref),
+they must all have the same [`axes`](@ref), and the answer will too.
+
+See also [`broadcast`](@ref), which allows mismatched sizes.
+
+# Examples
+```
+julia> map(//, [1 2; 3 4], [4 3; 2 1])
+2×2 Matrix{Rational{$Int}}:
+ 1//4  2//3
+ 3//2  4//1
+
+julia> map(+, [1 2; 3 4], zeros(2,1))
+ERROR: DimensionMismatch
+
+julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is exhausted
+3-element Vector{Float64}:
+   2.0
+  13.0
+ 102.0
+```
+"""
 map(f, iters...) = collect(Generator(f, iters...))
 
 # multi-item push!, pushfirst! (built on top of type-specific 1-item version)
@@ -2330,13 +3059,21 @@ pushfirst!(A, a, b, c...) = pushfirst!(pushfirst!(A, c...), a, b)
 
 ## hashing AbstractArray ##
 
+const hash_abstractarray_seed = UInt === UInt64 ? 0x7e2d6fb6448beb77 : 0xd4514ce5
 function hash(A::AbstractArray, h::UInt)
-    h = hash(AbstractArray, h)
+    h += hash_abstractarray_seed
     # Axes are themselves AbstractArrays, so hashing them directly would stack overflow
     # Instead hash the tuple of firsts and lasts along each dimension
     h = hash(map(first, axes(A)), h)
     h = hash(map(last, axes(A)), h)
-    isempty(A) && return h
+
+    # For short arrays, it's not worth doing anything complicated
+    if length(A) < 8192
+        for x in A
+            h = hash(x, h)
+        end
+        return h
+    end
 
     # Goal: Hash approximately log(N) entries with a higher density of hashed elements
     # weighted towards the end and special consideration for repeated values. Colliding
@@ -2367,7 +3104,7 @@ function hash(A::AbstractArray, h::UInt)
     n = 0
     while true
         n += 1
-        # Hash the current key-index and its element
+        # Hash the element
         elt = A[keyidx]
         h = hash(keyidx=>elt, h)
 
@@ -2394,3 +3131,60 @@ function hash(A::AbstractArray, h::UInt)
 
     return h
 end
+
+# The semantics of `collect` are weird. Better to write our own
+function rest(a::AbstractArray{T}, state...) where {T}
+    v = Vector{T}(undef, 0)
+    # assume only very few items are taken from the front
+    sizehint!(v, length(a))
+    return foldl(push!, Iterators.rest(a, state...), init=v)
+end
+
+
+## keepat! ##
+
+# NOTE: since these use `@inbounds`, they are actually only intended for Vector and BitVector
+
+function _keepat!(a::AbstractVector, inds)
+    local prev
+    i = firstindex(a)
+    for k in inds
+        if @isdefined(prev)
+            prev < k || throw(ArgumentError("indices must be unique and sorted"))
+        end
+        ak = a[k] # must happen even when i==k for bounds checking
+        if i != k
+            @inbounds a[i] = ak # k > i, so a[i] is inbounds
+        end
+        prev = k
+        i = nextind(a, i)
+    end
+    deleteat!(a, i:lastindex(a))
+    return a
+end
+
+function _keepat!(a::AbstractVector, m::AbstractVector{Bool})
+    length(m) == length(a) || throw(BoundsError(a, m))
+    j = firstindex(a)
+    for i in eachindex(a, m)
+        @inbounds begin
+            if m[i]
+                i == j || (a[j] = a[i])
+                j = nextind(a, j)
+            end
+        end
+    end
+    deleteat!(a, j:lastindex(a))
+end
+
+## 1-d circshift ##
+function circshift!(a::AbstractVector, shift::Integer)
+    n = length(a)
+    n == 0 && return
+    shift = mod(shift, n)
+    shift == 0 && return
+    reverse!(a, 1, shift)
+    reverse!(a, shift+1, length(a))
+    reverse!(a)
+    return a
+end
diff --git a/base/abstractarraymath.jl b/base/abstractarraymath.jl
index 953c190ab12efd..9690fc0f2e4c4b 100644
--- a/base/abstractarraymath.jl
+++ b/base/abstractarraymath.jl
@@ -36,7 +36,7 @@ julia> vec(1:3)
 1:3
 ```
 
-See also [`reshape`](@ref).
+See also [`reshape`](@ref), [`dropdims`](@ref).
 """
 vec(a::AbstractArray) = reshape(a,length(a))
 vec(a::AbstractVector) = a
@@ -48,9 +48,15 @@ _sub(t::Tuple, s::Tuple) = _sub(tail(t), tail(s))
 """
     dropdims(A; dims)
 
-Remove the dimensions specified by `dims` from array `A`.
-Elements of `dims` must be unique and within the range `1:ndims(A)`.
-`size(A,i)` must equal 1 for all `i` in `dims`.
+Return an array with the same data as `A`, but with the dimensions specified by
+`dims` removed. `size(A,d)` must equal 1 for every `d` in `dims`,
+and repeated dimensions or numbers outside `1:ndims(A)` are forbidden.
+
+The result shares the same underlying data as `A`, such that the
+result is mutable if and only if `A` is mutable, and setting elements of one
+alters the values of the other.
+
+See also: [`reshape`](@ref), [`vec`](@ref).
 
 # Examples
 ```jldoctest
@@ -60,11 +66,17 @@ julia> a = reshape(Vector(1:4),(2,2,1,1))
  1  3
  2  4
 
-julia> dropdims(a; dims=3)
+julia> b = dropdims(a; dims=3)
 2×2×1 Array{Int64, 3}:
 [:, :, 1] =
  1  3
  2  4
+
+julia> b[1,1,1] = 5; a
+2×2×1×1 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 5  3
+ 2  4
 ```
 """
 dropdims(A; dims) = _dropdims(A, dims)
@@ -76,23 +88,134 @@ function _dropdims(A::AbstractArray, dims::Dims)
             dims[j] == dims[i] && throw(ArgumentError("dropped dims must be unique"))
         end
     end
-    d = ()
-    for i = 1:ndims(A)
-        if !in(i, dims)
-            d = tuple(d..., axes(A, i))
-        end
-    end
-    reshape(A, d::typeof(_sub(axes(A), dims)))
+    ax = _foldoneto((ds, d) -> d in dims ? ds : (ds..., axes(A,d)), (), Val(ndims(A)))
+    reshape(A, ax::typeof(_sub(axes(A), dims)))
 end
 _dropdims(A::AbstractArray, dim::Integer) = _dropdims(A, (Int(dim),))
 
 ## Unary operators ##
 
-conj(x::AbstractArray{<:Real}) = x
+"""
+    conj!(A)
+
+Transform an array to its complex conjugate in-place.
+
+See also [`conj`](@ref).
+
+# Examples
+```jldoctest
+julia> A = [1+im 2-im; 2+2im 3+im]
+2×2 Matrix{Complex{Int64}}:
+ 1+1im  2-1im
+ 2+2im  3+1im
+
+julia> conj!(A);
+
+julia> A
+2×2 Matrix{Complex{Int64}}:
+ 1-1im  2+1im
+ 2-2im  3-1im
+```
+"""
+conj!(A::AbstractArray{<:Number}) = (@inbounds broadcast!(conj, A, A); A)
 conj!(x::AbstractArray{<:Real}) = x
 
-real(x::AbstractArray{<:Real}) = x
-imag(x::AbstractArray{<:Real}) = zero(x)
+"""
+    conj(A::AbstractArray)
+
+Return an array containing the complex conjugate of each entry in array `A`.
+
+Equivalent to `conj.(A)`, except that when `eltype(A) <: Real`
+`A` is returned without copying, and that when `A` has zero dimensions,
+a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> conj([1, 2im, 3 + 4im])
+3-element Vector{Complex{Int64}}:
+ 1 + 0im
+ 0 - 2im
+ 3 - 4im
+
+julia> conj(fill(2 - im))
+0-dimensional Array{Complex{Int64}, 0}:
+2 + 1im
+```
+"""
+conj(A::AbstractArray) = broadcast_preserving_zero_d(conj, A)
+conj(A::AbstractArray{<:Real}) = A
+
+"""
+    real(A::AbstractArray)
+
+Return an array containing the real part of each entry in array `A`.
+
+Equivalent to `real.(A)`, except that when `eltype(A) <: Real`
+`A` is returned without copying, and that when `A` has zero dimensions,
+a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> real([1, 2im, 3 + 4im])
+3-element Vector{Int64}:
+ 1
+ 0
+ 3
+
+julia> real(fill(2 - im))
+0-dimensional Array{Int64, 0}:
+2
+```
+"""
+real(A::AbstractArray) = broadcast_preserving_zero_d(real, A)
+real(A::AbstractArray{<:Real}) = A
+
+"""
+    imag(A::AbstractArray)
+
+Return an array containing the imaginary part of each entry in array `A`.
+
+Equivalent to `imag.(A)`, except that when `A` has zero dimensions,
+a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> imag([1, 2im, 3 + 4im])
+3-element Vector{Int64}:
+ 0
+ 2
+ 4
+
+julia> imag(fill(2 - im))
+0-dimensional Array{Int64, 0}:
+-1
+```
+"""
+imag(A::AbstractArray) = broadcast_preserving_zero_d(imag, A)
+imag(A::AbstractArray{<:Real}) = zero(A)
+
+"""
+    reim(A::AbstractArray)
+
+Return a tuple of two arrays containing respectively the real and the imaginary
+part of each entry in `A`.
+
+Equivalent to `(real.(A), imag.(A))`, except that when `eltype(A) <: Real`
+`A` is returned without copying to represent the real part, and that when `A` has
+zero dimensions, a 0-dimensional array is returned (rather than a scalar).
+
+# Examples
+```jldoctest
+julia> reim([1, 2im, 3 + 4im])
+([1, 0, 3], [0, 2, 4])
+
+julia> reim(fill(2 - im))
+(fill(2), fill(-1))
+```
+"""
+reim(A::AbstractArray)
+
+-(A::AbstractArray) = broadcast_preserving_zero_d(-, A)
 
 +(x::AbstractArray{<:Number}) = x
 *(x::AbstractArray{<:Number,2}) = x
@@ -106,6 +229,8 @@ Return a view of all the data of `A` where the index for dimension `d` equals `i
 
 Equivalent to `view(A,:,:,...,i,:,:,...)` where `i` is in position `d`.
 
+See also: [`eachslice`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2 3 4; 5 6 7 8]
@@ -117,13 +242,18 @@ julia> selectdim(A, 2, 3)
 2-element view(::Matrix{Int64}, :, 3) with eltype Int64:
  3
  7
+
+julia> selectdim(A, 2, 3:4)
+2×2 view(::Matrix{Int64}, :, 3:4) with eltype Int64:
+ 3  4
+ 7  8
 ```
 """
 @inline selectdim(A::AbstractArray, d::Integer, i) = _selectdim(A, d, i, _setindex(i, d, map(Slice, axes(A))...))
 @noinline function _selectdim(A, d, i, idxs)
     d >= 1 || throw(ArgumentError("dimension must be ≥ 1, got $d"))
     nd = ndims(A)
-    d > nd && (i == 1 || throw(BoundsError(A, (ntuple(k->Colon(),d-1)..., i))))
+    d > nd && (i == 1 || throw(BoundsError(A, (ntuple(Returns(Colon()),d-1)..., i))))
     return view(A, idxs...)
 end
 
@@ -138,6 +268,8 @@ Circularly shift, i.e. rotate, the data in an array. The second argument is a tu
 vector giving the amount to shift in each dimension, or an integer to shift only in the
 first dimension.
 
+See also: [`circshift!`](@ref), [`circcopy!`](@ref), [`bitrotate`](@ref), [`<<`](@ref).
+
 # Examples
 ```jldoctest
 julia> b = reshape(Vector(1:16), (4,4))
@@ -185,8 +317,6 @@ julia> circshift(a, -1)
  1
  1
 ```
-
-See also [`circshift!`](@ref).
 """
 function circshift(a::AbstractArray, shiftamt)
     circshift!(similar(a), a, map(Integer, (shiftamt...,)))
@@ -199,6 +329,8 @@ end
 
 Construct an array by repeating array `A` a given number of times in each dimension, specified by `counts`.
 
+See also: [`fill`](@ref), [`Iterators.repeated`](@ref), [`Iterators.cycle`](@ref).
+
 # Examples
 ```jldoctest
 julia> repeat([1, 2, 3], 2)
@@ -225,7 +357,7 @@ function repeat(A::AbstractArray, counts...)
 end
 
 """
-    repeat(A::AbstractArray; inner=ntuple(x->1, ndims(A)), outer=ntuple(x->1, ndims(A)))
+    repeat(A::AbstractArray; inner=ntuple(Returns(1), ndims(A)), outer=ntuple(Returns(1), ndims(A)))
 
 Construct an array by repeating the entries of `A`. The i-th element of `inner` specifies
 the number of times that the individual entries of the i-th dimension of `A` should be
@@ -369,7 +501,6 @@ function repeat_outer(arr::AbstractArray{<:Any,N}, dims::NTuple{N,Any}) where {N
 end
 
 function repeat_inner(arr, inner)
-    basedims = size(arr)
     outsize = map(*, size(arr), inner)
     out = similar(arr, outsize)
     for I in CartesianIndices(arr)
@@ -392,7 +523,7 @@ end#module
 Create a generator that iterates over the first dimension of vector or matrix `A`,
 returning the rows as `AbstractVector` views.
 
-See also [`eachcol`](@ref) and [`eachslice`](@ref).
+See also [`eachcol`](@ref), [`eachslice`](@ref), [`mapslices`](@ref).
 
 !!! compat "Julia 1.1"
      This function requires at least Julia 1.1.
@@ -460,7 +591,7 @@ the data from the other dimensions in `A`.
 Only a single dimension in `dims` is currently supported. Equivalent to `(view(A,:,:,...,i,:,:
 ...)) for i in axes(A, dims))`, where `i` is in position `dims`.
 
-See also [`eachrow`](@ref), [`eachcol`](@ref), and [`selectdim`](@ref).
+See also [`eachrow`](@ref), [`eachcol`](@ref), [`mapslices`](@ref), and [`selectdim`](@ref).
 
 !!! compat "Julia 1.1"
      This function requires at least Julia 1.1.
@@ -491,7 +622,7 @@ julia> collect(eachslice(M, dims=2))
     length(dims) == 1 || throw(ArgumentError("only single dimensions are supported"))
     dim = first(dims)
     dim <= ndims(A) || throw(DimensionMismatch("A doesn't have $dim dimensions"))
-    inds_before = ntuple(d->(:), dim-1)
-    inds_after = ntuple(d->(:), ndims(A)-dim)
+    inds_before = ntuple(Returns(:), dim-1)
+    inds_after = ntuple(Returns(:), ndims(A)-dim)
     return (view(A, inds_before..., i, inds_after...) for i in axes(A, dim))
 end
diff --git a/base/abstractdict.jl b/base/abstractdict.jl
index 62bb3b8cf5a2e8..527b422fb56848 100644
--- a/base/abstractdict.jl
+++ b/base/abstractdict.jl
@@ -31,9 +31,13 @@ function in(p, a::AbstractDict)
 end
 
 function summary(io::IO, t::AbstractDict)
-    n = length(t)
     showarg(io, t, true)
-    print(io, " with ", n, (n==1 ? " entry" : " entries"))
+    if Base.IteratorSize(t) isa HasLength
+        n = length(t)
+        print(io, " with ", n, (n==1 ? " entry" : " entries"))
+    else
+        print(io, "(...)")
+    end
 end
 
 struct KeySet{K, T <: AbstractDict{K}} <: AbstractSet{K}
@@ -62,6 +66,8 @@ function iterate(v::Union{KeySet,ValueIterator}, state...)
     return (y[1][isa(v, KeySet) ? 1 : 2], y[2])
 end
 
+copy(v::KeySet) = copymutable(v)
+
 in(k, v::KeySet) = get(v.dict, k, secret_table_token) !== secret_table_token
 
 """
@@ -130,6 +136,38 @@ values(a::AbstractDict) = ValueIterator(a)
 Return an iterator over `key => value` pairs for any
 collection that maps a set of keys to a set of values.
 This includes arrays, where the keys are the array indices.
+
+# Examples
+```jldoctest
+julia> a = Dict(zip(["a", "b", "c"], [1, 2, 3]))
+Dict{String, Int64} with 3 entries:
+  "c" => 3
+  "b" => 2
+  "a" => 1
+
+julia> pairs(a)
+Dict{String, Int64} with 3 entries:
+  "c" => 3
+  "b" => 2
+  "a" => 1
+
+julia> foreach(println, pairs(["a", "b", "c"]))
+1 => "a"
+2 => "b"
+3 => "c"
+
+julia> (;a=1, b=2, c=3) |> pairs |> collect
+3-element Vector{Pair{Symbol, Int64}}:
+ :a => 1
+ :b => 2
+ :c => 3
+
+julia> (;a=1, b=2, c=3) |> collect
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
 """
 pairs(collection) = Generator(=>, keys(collection), values(collection))
 
@@ -151,7 +189,10 @@ empty(a::AbstractDict) = empty(a, keytype(a), valtype(a))
 empty(a::AbstractDict, ::Type{V}) where {V} = empty(a, keytype(a), V) # Note: this is the form which makes sense for `Vector`.
 
 copy(a::AbstractDict) = merge!(empty(a), a)
-copy!(dst::AbstractDict, src::AbstractDict) = merge!(empty!(dst), src)
+function copy!(dst::AbstractDict, src::AbstractDict)
+    dst === src && return dst
+    merge!(empty!(dst), src)
+end
 
 """
     merge!(d::AbstractDict, others::AbstractDict...)
@@ -176,6 +217,9 @@ Dict{Int64, Int64} with 3 entries:
 """
 function merge!(d::AbstractDict, others::AbstractDict...)
     for other in others
+        if haslength(d) && haslength(other)
+            sizehint!(d, length(d) + length(other))
+        end
         for (k,v) in other
             d[k] = v
         end
@@ -230,12 +274,14 @@ Dict{Int64, Int64} with 3 entries:
 ```
 """
 function mergewith!(combine, d::AbstractDict, others::AbstractDict...)
-    for other in others
-        for (k,v) in other
-            d[k] = haskey(d, k) ? combine(d[k], v) : v
-        end
+    foldl(mergewith!(combine), others; init = d)
+end
+
+function mergewith!(combine, d1::AbstractDict, d2::AbstractDict)
+    for (k, v) in d2
+        d1[k] = haskey(d1, k) ? combine(d1[k], v) : v
     end
-    return d
+    return d1
 end
 
 mergewith!(combine) = (args...) -> mergewith!(combine, args...)
@@ -245,7 +291,7 @@ merge!(combine::Callable, args...) = mergewith!(combine, args...)
 """
     keytype(type)
 
-Get the key type of an dictionary type. Behaves similarly to [`eltype`](@ref).
+Get the key type of a dictionary type. Behaves similarly to [`eltype`](@ref).
 
 # Examples
 ```jldoctest
@@ -259,7 +305,7 @@ keytype(a::AbstractDict) = keytype(typeof(a))
 """
     valtype(type)
 
-Get the value type of an dictionary type. Behaves similarly to [`eltype`](@ref).
+Get the value type of a dictionary type. Behaves similarly to [`eltype`](@ref).
 
 # Examples
 ```jldoctest
@@ -277,6 +323,7 @@ Construct a merged collection from the given collections. If necessary, the
 types of the resulting collection will be promoted to accommodate the types of
 the merged collections. If the same key is present in another collection, the
 value for that key will be the value it has in the last collection listed.
+See also [`mergewith`](@ref) for custom handling of values with the same key.
 
 # Examples
 ```jldoctest
@@ -477,6 +524,9 @@ function ==(l::AbstractDict, r::AbstractDict)
     return anymissing ? missing : true
 end
 
+# Fallback implementation
+sizehint!(d::AbstractDict, n) = d
+
 const hasha_seed = UInt === UInt64 ? 0x6d35bb51952d5539 : 0x952d5539
 function hash(a::AbstractDict, h::UInt)
     hv = hasha_seed
diff --git a/base/abstractset.jl b/base/abstractset.jl
index 05b53009528226..85d81480ab9902 100644
--- a/base/abstractset.jl
+++ b/base/abstractset.jl
@@ -1,9 +1,12 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 eltype(::Type{<:AbstractSet{T}}) where {T} = @isdefined(T) ? T : Any
-sizehint!(s::AbstractSet, n) = nothing
+sizehint!(s::AbstractSet, n) = s
 
-copy!(dst::AbstractSet, src::AbstractSet) = union!(empty!(dst), src)
+function copy!(dst::AbstractSet, src::AbstractSet)
+    dst === src && return dst
+    union!(empty!(dst), src)
+end
 
 ## set operations (union, intersection, symmetric difference)
 
@@ -11,28 +14,36 @@ copy!(dst::AbstractSet, src::AbstractSet) = union!(empty!(dst), src)
     union(s, itrs...)
     ∪(s, itrs...)
 
-Construct the union of sets. Maintain order with arrays.
+Construct an object containing all distinct elements from all of the arguments.
+
+The first argument controls what kind of container is returned.
+If this is an array, it maintains the order in which elements first appear.
+
+Unicode `∪` can be typed by writing `\\cup` then pressing tab in the Julia REPL, and in many editors.
+This is an infix operator, allowing `s ∪ itr`.
+
+See also [`unique`](@ref), [`intersect`](@ref), [`isdisjoint`](@ref), [`vcat`](@ref), [`Iterators.flatten`](@ref).
 
 # Examples
 ```jldoctest
-julia> union([1, 2], [3, 4])
-4-element Vector{Int64}:
+julia> union([1, 2], [3])
+3-element Vector{Int64}:
  1
  2
  3
- 4
 
-julia> union([1, 2], [2, 4])
-3-element Vector{Int64}:
- 1
- 2
- 4
+julia> union([4 2 3 4 4], 1:3, 3.0)
+4-element Vector{Float64}:
+ 4.0
+ 2.0
+ 3.0
+ 1.0
 
-julia> union([4, 2], 1:2)
-3-element Vector{Int64}:
- 4
- 2
- 1
+julia> (0, 0.0) ∪ (-0.0, NaN)
+3-element Vector{Real}:
+   0
+  -0.0
+ NaN
 
 julia> union(Set([1, 2]), 2:3)
 Set{Int64} with 3 elements:
@@ -43,8 +54,6 @@ Set{Int64} with 3 elements:
 """
 function union end
 
-_in(itr) = x -> x in itr
-
 union(s, sets...) = union!(emptymutable(s, promote_eltype(s, sets...)), s, sets...)
 union(s::AbstractSet) = copy(s)
 
@@ -53,21 +62,21 @@ const ∪ = union
 """
     union!(s::Union{AbstractSet,AbstractVector}, itrs...)
 
-Construct the union of passed in sets and overwrite `s` with the result.
+Construct the [`union`](@ref) of passed in sets and overwrite `s` with the result.
 Maintain order with arrays.
 
 # Examples
 ```jldoctest
-julia> a = Set([1, 3, 4, 5]);
+julia> a = Set([3, 4, 5]);
 
-julia> union!(a, 1:2:8);
+julia> union!(a, 1:2:7);
 
 julia> a
 Set{Int64} with 5 elements:
-  7
+  5
   4
+  7
   3
-  5
   1
 ```
 """
@@ -81,7 +90,11 @@ end
 max_values(::Type) = typemax(Int)
 max_values(T::Union{map(X -> Type{X}, BitIntegerSmall_types)...}) = 1 << (8*sizeof(T))
 # saturated addition to prevent overflow with typemax(Int)
-max_values(T::Union) = max(max_values(T.a), max_values(T.b), max_values(T.a) + max_values(T.b))
+function max_values(T::Union)
+    a = max_values(T.a)::Int
+    b = max_values(T.b)::Int
+    return max(a, b, a + b)
+end
 max_values(::Type{Bool}) = 2
 max_values(::Type{Nothing}) = 1
 
@@ -98,8 +111,19 @@ end
     intersect(s, itrs...)
     ∩(s, itrs...)
 
-Construct the intersection of sets.
-Maintain order with arrays.
+Construct the set containing those elements which appear in all of the arguments.
+
+The first argument controls what kind of container is returned.
+If this is an array, it maintains the order in which elements first appear.
+
+Unicode `∩` can be typed by writing `\\cap` then pressing tab in the Julia REPL, and in many editors.
+This is an infix operator, allowing `s ∩ itr`.
+
+See also [`setdiff`](@ref), [`isdisjoint`](@ref), [`issubset`](@ref Base.issubset), [`issetequal`](@ref).
+
+!!! compat "Julia 1.8"
+    As of Julia 1.8 intersect returns a result with the eltype of the
+    type-promoted eltypes of the two inputs
 
 # Examples
 ```jldoctest
@@ -107,19 +131,49 @@ julia> intersect([1, 2, 3], [3, 4, 5])
 1-element Vector{Int64}:
  3
 
-julia> intersect([1, 4, 4, 5, 6], [4, 6, 6, 7, 8])
+julia> intersect([1, 4, 4, 5, 6], [6, 4, 6, 7, 8])
 2-element Vector{Int64}:
  4
  6
 
-julia> intersect(Set([1, 2]), BitSet([2, 3]))
-Set{Int64} with 1 element:
-  2
+julia> intersect(1:16, 7:99)
+7:16
+
+julia> (0, 0.0) ∩ (-0.0, 0)
+1-element Vector{Real}:
+ 0
+
+julia> intersect(Set([1, 2]), BitSet([2, 3]), 1.0:10.0)
+Set{Float64} with 1 element:
+  2.0
 ```
 """
-intersect(s::AbstractSet, itr, itrs...) = intersect!(intersect(s, itr), itrs...)
+function intersect(s::AbstractSet, itr, itrs...)
+    # heuristics to try to `intersect` with the shortest Set on the left
+    if length(s)>50 && haslength(itr) && all(haslength, itrs)
+        min_length, min_idx = findmin(length, itrs)
+        if length(itr) > min_length
+            new_itrs = setindex(itrs, itr, min_idx)
+            return intersect(s, itrs[min_idx], new_itrs...)
+        end
+    end
+    T = promote_eltype(s, itr, itrs...)
+    if T == promote_eltype(s, itr)
+        out = intersect(s, itr)
+    else
+        out = union!(emptymutable(s, T), s)
+        intersect!(out, itr)
+    end
+    return intersect!(out, itrs...)
+end
 intersect(s) = union(s)
-intersect(s::AbstractSet, itr) = mapfilter(_in(s), push!, itr, emptymutable(s))
+function intersect(s::AbstractSet, itr)
+    if haslength(itr) && hasfastin(itr) && length(s) < length(itr)
+        return mapfilter(in(itr), push!, s, emptymutable(s, promote_eltype(s, itr)))
+    else
+        return mapfilter(in(s), push!, itr, emptymutable(s, promote_eltype(s, itr)))
+    end
+end
 
 const ∩ = intersect
 
@@ -135,7 +189,7 @@ function intersect!(s::AbstractSet, itrs...)
     end
     return s
 end
-intersect!(s::AbstractSet, s2::AbstractSet) = filter!(_in(s2), s)
+intersect!(s::AbstractSet, s2::AbstractSet) = filter!(in(s2), s)
 intersect!(s::AbstractSet, itr) =
     intersect!(s, union!(emptymutable(s, eltype(itr)), itr))
 
@@ -145,6 +199,8 @@ intersect!(s::AbstractSet, itr) =
 Construct the set of elements in `s` but not in any of the iterables in `itrs`.
 Maintain order with arrays.
 
+See also [`setdiff!`](@ref), [`union`](@ref) and [`intersect`](@ref).
+
 # Examples
 ```jldoctest
 julia> setdiff([1,2,3], [3,4,5])
@@ -194,6 +250,8 @@ Construct the symmetric difference of elements in the passed in sets.
 When `s` is not an `AbstractSet`, the order is maintained.
 Note that in this case the multiplicity of elements matters.
 
+See also [`symdiff!`](@ref), [`setdiff`](@ref), [`union`](@ref) and [`intersect`](@ref).
+
 # Examples
 ```jldoctest
 julia> symdiff([1,2,3], [3,4,5], [4,5,6])
@@ -246,6 +304,8 @@ function ⊇ end
 
 Determine whether every element of `a` is also in `b`, using [`in`](@ref).
 
+See also [`⊊`](@ref), [`⊈`](@ref), [`∩`](@ref intersect), [`∪`](@ref union), [`contains`](@ref).
+
 # Examples
 ```jldoctest
 julia> issubset([1, 2], [1, 2, 3])
@@ -262,21 +322,21 @@ issubset, ⊆, ⊇
 
 const FASTIN_SET_THRESHOLD = 70
 
-function issubset(l, r)
-    if haslength(r) && (isa(l, AbstractSet) || !hasfastin(r))
-        rlen = length(r) # conditions above make this length computed only when needed
-        # check l for too many unique elements
-        if isa(l, AbstractSet) && length(l) > rlen
+function issubset(a, b)
+    if haslength(b) && (isa(a, AbstractSet) || !hasfastin(b))
+        blen = length(b) # conditions above make this length computed only when needed
+        # check a for too many unique elements
+        if isa(a, AbstractSet) && length(a) > blen
             return false
         end
-        # when `in` would be too slow and r is big enough, convert it to a Set
+        # when `in` would be too slow and b is big enough, convert it to a Set
         # this threshold was empirically determined (cf. #26198)
-        if !hasfastin(r) && rlen > FASTIN_SET_THRESHOLD
-            return issubset(l, Set(r))
+        if !hasfastin(b) && blen > FASTIN_SET_THRESHOLD
+            return issubset(a, Set(b))
         end
     end
-    for elt in l
-        elt in r || return false
+    for elt in a
+        elt in b || return false
     end
     return true
 end
@@ -294,7 +354,7 @@ hasfastin(::Type) = false
 hasfastin(::Union{Type{<:AbstractSet},Type{<:AbstractDict},Type{<:AbstractRange}}) = true
 hasfastin(x) = hasfastin(typeof(x))
 
-⊇(l, r) = r ⊆ l
+⊇(a, b) = b ⊆ a
 
 ## strict subset comparison
 
@@ -306,6 +366,8 @@ function ⊋ end
 
 Determines if `a` is a subset of, but not equal to, `b`.
 
+See also [`issubset`](@ref) (`⊆`), [`⊈`](@ref).
+
 # Examples
 ```jldoctest
 julia> (1, 2) ⊊ (1, 2, 3)
@@ -317,9 +379,11 @@ false
 """
 ⊊, ⊋
 
-⊊(l::AbstractSet, r) = length(l) < length(r) && l ⊆ r
-⊊(l, r) = Set(l) ⊊ r
-⊋(l, r) = r ⊊ l
+⊊(a::AbstractSet, b::AbstractSet) = length(a) < length(b) && a ⊆ b
+⊊(a::AbstractSet, b) = a ⊊ Set(b)
+⊊(a, b::AbstractSet) = Set(a) ⊊ b
+⊊(a, b) = Set(a) ⊊ Set(b)
+⊋(a, b) = b ⊊ a
 
 function ⊈ end
 function ⊉ end
@@ -329,6 +393,8 @@ function ⊉ end
 
 Negation of `⊆` and `⊇`, i.e. checks that `a` is not a subset of `b`.
 
+See also [`issubset`](@ref) (`⊆`), [`⊊`](@ref).
+
 # Examples
 ```jldoctest
 julia> (1, 2) ⊈ (2, 3)
@@ -340,8 +406,8 @@ false
 """
 ⊈, ⊉
 
-⊈(l, r) = !⊆(l, r)
-⊉(l, r) = r ⊈ l
+⊈(a, b) = !⊆(a, b)
+⊉(a, b) = b ⊈ a
 
 ## set equality comparison
 
@@ -351,6 +417,8 @@ false
 Determine whether `a` and `b` have the same elements. Equivalent
 to `a ⊆ b && b ⊆ a` but more efficient when possible.
 
+See also: [`isdisjoint`](@ref), [`union`](@ref).
+
 # Examples
 ```jldoctest
 julia> issetequal([1, 2], [1, 2, 3])
@@ -360,54 +428,65 @@ julia> issetequal([1, 2], [2, 1])
 true
 ```
 """
-issetequal(l::AbstractSet, r::AbstractSet) = l == r
-issetequal(l::AbstractSet, r) = issetequal(l, Set(r))
+issetequal(a::AbstractSet, b::AbstractSet) = a == b
+issetequal(a::AbstractSet, b) = issetequal(a, Set(b))
 
-function issetequal(l, r::AbstractSet)
-    if haslength(l)
-        # check r for too many unique elements
-        length(l) < length(r) && return false
+function issetequal(a, b::AbstractSet)
+    if haslength(a)
+        # check b for too many unique elements
+        length(a) < length(b) && return false
     end
-    return issetequal(Set(l), r)
+    return issetequal(Set(a), b)
 end
 
-function issetequal(l, r)
-    haslength(l) && return issetequal(l, Set(r))
-    haslength(r) && return issetequal(r, Set(l))
-    return issetequal(Set(l), Set(r))
+function issetequal(a, b)
+    haslength(a) && return issetequal(a, Set(b))
+    haslength(b) && return issetequal(b, Set(a))
+    return issetequal(Set(a), Set(b))
 end
 
 ## set disjoint comparison
 """
-    isdisjoint(v1, v2) -> Bool
+    isdisjoint(a, b) -> Bool
+
+Determine whether the collections `a` and `b` are disjoint.
+Equivalent to `isempty(a ∩ b)` but more efficient when possible.
 
-Return whether the collections `v1` and `v2` are disjoint, i.e. whether
-their intersection is empty.
+See also: [`intersect`](@ref), [`isempty`](@ref), [`issetequal`](@ref).
 
 !!! compat "Julia 1.5"
     This function requires at least Julia 1.5.
+
+# Examples
+```jldoctest
+julia> isdisjoint([1, 2], [2, 3, 4])
+false
+
+julia> isdisjoint([3, 1], [2, 4])
+true
+```
 """
-function isdisjoint(l, r)
-    function _isdisjoint(l, r)
-        hasfastin(r) && return !any(in(r), l)
-        hasfastin(l) && return !any(in(l), r)
-        haslength(r) && length(r) < FASTIN_SET_THRESHOLD &&
-            return !any(in(r), l)
-        return !any(in(Set(r)), l)
+function isdisjoint(a, b)
+    function _isdisjoint(a, b)
+        hasfastin(b) && return !any(in(b), a)
+        hasfastin(a) && return !any(in(a), b)
+        haslength(b) && length(b) < FASTIN_SET_THRESHOLD &&
+            return !any(in(b), a)
+        return !any(in(Set(b)), a)
     end
-    if haslength(l) && haslength(r) && length(r) < length(l)
-        return _isdisjoint(r, l)
+    if haslength(a) && haslength(b) && length(b) < length(a)
+        return _isdisjoint(b, a)
     end
-    _isdisjoint(l, r)
+    _isdisjoint(a, b)
 end
 
 ## partial ordering of sets by containment
 
-==(l::AbstractSet, r::AbstractSet) = length(l) == length(r) && l ⊆ r
+==(a::AbstractSet, b::AbstractSet) = length(a) == length(b) && a ⊆ b
 # convenience functions for AbstractSet
 # (if needed, only their synonyms ⊊ and ⊆ must be specialized)
-<( l::AbstractSet, r::AbstractSet) = l ⊊ r
-<=(l::AbstractSet, r::AbstractSet) = l ⊆ r
+<( a::AbstractSet, b::AbstractSet) = a ⊊ b
+<=(a::AbstractSet, b::AbstractSet) = a ⊆ b
 
 ## filtering sets
 
diff --git a/base/accumulate.jl b/base/accumulate.jl
index fe06dbc1c2c70d..663bd850695a89 100644
--- a/base/accumulate.jl
+++ b/base/accumulate.jl
@@ -116,35 +116,29 @@ end
 """
     cumsum(itr)
 
-Cumulative sum an iterator. See also [`cumsum!`](@ref)
-to use a preallocated output array, both for performance and to control the precision of the
-output (e.g. to avoid overflow).
+Cumulative sum of an iterator.
+
+See also [`accumulate`](@ref) to apply functions other than `+`.
 
 !!! compat "Julia 1.5"
     `cumsum` on a non-array iterator requires at least Julia 1.5.
 
 # Examples
 ```jldoctest
-julia> cumsum([1, 1, 1])
+julia> cumsum(1:3)
 3-element Vector{Int64}:
  1
- 2
  3
+ 6
+
+julia> cumsum((true, false, true, false, true))
+(1, 1, 2, 2, 3)
 
-julia> cumsum([fill(1, 2) for i in 1:3])
+julia> cumsum(fill(1, 2) for i in 1:3)
 3-element Vector{Vector{Int64}}:
  [1, 1]
  [2, 2]
  [3, 3]
-
-julia> cumsum((1, 1, 1))
-(1, 2, 3)
-
-julia> cumsum(x^2 for x in 1:3)
-3-element Vector{Int64}:
-  1
-  5
- 14
 ```
 """
 cumsum(x::AbstractVector) = cumsum(x, dims=1)
@@ -177,10 +171,7 @@ to control the precision of the output (e.g. to avoid overflow).
 
 # Examples
 ```jldoctest
-julia> a = [1 2 3; 4 5 6]
-2×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
+julia> a = Int8[1 2 3; 4 5 6];
 
 julia> cumprod(a, dims=1)
 2×3 Matrix{Int64}:
@@ -200,9 +191,9 @@ end
 """
     cumprod(itr)
 
-Cumulative product of an iterator. See also
-[`cumprod!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow).
+Cumulative product of an iterator.
+
+See also [`cumprod!`](@ref), [`accumulate`](@ref), [`cumsum`](@ref).
 
 !!! compat "Julia 1.5"
     `cumprod` on a non-array iterator requires at least Julia 1.5.
@@ -215,20 +206,16 @@ julia> cumprod(fill(1//2, 3))
  1//4
  1//8
 
-julia> cumprod([fill(1//3, 2, 2) for i in 1:3])
-3-element Vector{Matrix{Rational{Int64}}}:
- [1//3 1//3; 1//3 1//3]
- [2//9 2//9; 2//9 2//9]
- [4//27 4//27; 4//27 4//27]
+julia> cumprod((1, 2, 1, 3, 1))
+(1, 2, 2, 6, 6)
 
-julia> cumprod((1, 2, 1))
-(1, 2, 2)
-
-julia> cumprod(x^2 for x in 1:3)
-3-element Vector{Int64}:
-  1
-  4
- 36
+julia> cumprod("julia")
+5-element Vector{String}:
+ "j"
+ "ju"
+ "jul"
+ "juli"
+ "julia"
 ```
 """
 cumprod(x::AbstractVector) = cumprod(x, dims=1)
@@ -241,8 +228,11 @@ cumprod(itr) = accumulate(mul_prod, itr)
 Cumulative operation `op` along the dimension `dims` of `A` (providing `dims` is optional
 for vectors). An initial value `init` may optionally be provided by a keyword argument. See
 also [`accumulate!`](@ref) to use a preallocated output array, both for performance and
-to control the precision of the output (e.g. to avoid overflow). For common operations
-there are specialized variants of `accumulate`, see: [`cumsum`](@ref), [`cumprod`](@ref)
+to control the precision of the output (e.g. to avoid overflow).
+
+For common operations there are specialized variants of `accumulate`,
+see [`cumsum`](@ref), [`cumprod`](@ref). For a lazy version, see
+[`Iterators.accumulate`](@ref).
 
 !!! compat "Julia 1.5"
     `accumulate` on a non-array iterator requires at least Julia 1.5.
@@ -255,35 +245,28 @@ julia> accumulate(+, [1,2,3])
  3
  6
 
-julia> accumulate(*, [1,2,3])
-3-element Vector{Int64}:
- 1
- 2
- 6
+julia> accumulate(min, (1, -2, 3, -4, 5), init=0)
+(0, -2, -2, -4, -4)
 
-julia> accumulate(+, [1,2,3]; init=100)
-3-element Vector{Int64}:
- 101
- 103
- 106
+julia> accumulate(/, (2, 4, Inf), init=100)
+(50.0, 12.5, 0.0)
 
-julia> accumulate(min, [1,2,-1]; init=0)
-3-element Vector{Int64}:
-  0
-  0
- -1
-
-julia> accumulate(+, fill(1, 3, 3), dims=1)
-3×3 Matrix{Int64}:
- 1  1  1
- 2  2  2
- 3  3  3
-
-julia> accumulate(+, fill(1, 3, 3), dims=2)
-3×3 Matrix{Int64}:
- 1  2  3
- 1  2  3
- 1  2  3
+julia> accumulate(=>, i^2 for i in 1:3)
+3-element Vector{Any}:
+          1
+        1 => 4
+ (1 => 4) => 9
+
+julia> accumulate(+, fill(1, 3, 4))
+3×4 Matrix{Int64}:
+ 1  4  7  10
+ 2  5  8  11
+ 3  6  9  12
+
+julia> accumulate(+, fill(1, 2, 5), dims=2, init=100.0)
+2×5 Matrix{Float64}:
+ 101.0  102.0  103.0  104.0  105.0
+ 101.0  102.0  103.0  104.0  105.0
 ```
 """
 function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
@@ -291,10 +274,10 @@ function accumulate(op, A; dims::Union{Nothing,Integer}=nothing, kw...)
         # This branch takes care of the cases not handled by `_accumulate!`.
         return collect(Iterators.accumulate(op, A; kw...))
     end
-    nt = kw.data
-    if nt isa NamedTuple{()}
+    nt = values(kw)
+    if isempty(kw)
         out = similar(A, promote_op(op, eltype(A), eltype(A)))
-    elseif nt isa NamedTuple{(:init,)}
+    elseif keys(nt) === (:init,)
         out = similar(A, promote_op(op, typeof(nt.init), eltype(A)))
     else
         throw(ArgumentError("acccumulate does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
@@ -316,48 +299,46 @@ end
 
 Cumulative operation `op` on `A` along the dimension `dims`, storing the result in `B`.
 Providing `dims` is optional for vectors.  If the keyword argument `init` is given, its
-value is used to instantiate the accumulation. See also [`accumulate`](@ref).
+value is used to instantiate the accumulation.
+
+See also [`accumulate`](@ref), [`cumsum!`](@ref), [`cumprod!`](@ref).
 
 # Examples
 ```jldoctest
 julia> x = [1, 0, 2, 0, 3];
 
-julia> y = [0, 0, 0, 0, 0];
+julia> y = rand(5);
 
 julia> accumulate!(+, y, x);
 
 julia> y
-5-element Vector{Int64}:
- 1
- 1
- 3
- 3
- 6
+5-element Vector{Float64}:
+ 1.0
+ 1.0
+ 3.0
+ 3.0
+ 6.0
 
-julia> A = [1 2; 3 4];
+julia> A = [1 2 3; 4 5 6];
 
-julia> B = [0 0; 0 0];
+julia> B = similar(A);
 
-julia> accumulate!(-, B, A, dims=1);
-
-julia> B
-2×2 Matrix{Int64}:
-  1   2
- -2  -2
-
-julia> accumulate!(-, B, A, dims=2);
+julia> accumulate!(-, B, A, dims=1)
+2×3 Matrix{Int64}:
+  1   2   3
+ -3  -3  -3
 
-julia> B
-2×2 Matrix{Int64}:
- 1  -1
- 3  -1
+julia> accumulate!(*, B, A, dims=2, init=10)
+2×3 Matrix{Int64}:
+ 10   20    60
+ 40  200  1200
 ```
 """
 function accumulate!(op, B, A; dims::Union{Integer, Nothing} = nothing, kw...)
-    nt = kw.data
-    if nt isa NamedTuple{()}
+    nt = values(kw)
+    if isempty(kw)
         _accumulate!(op, B, A, dims, nothing)
-    elseif nt isa NamedTuple{(:init,)}
+    elseif keys(kw) === (:init,)
         _accumulate!(op, B, A, dims, Some(nt.init))
     else
         throw(ArgumentError("acccumulate! does not support the keyword arguments $(setdiff(keys(nt), (:init,)))"))
@@ -441,7 +422,7 @@ function _accumulate1!(op, B, v1, A::AbstractVector, dim::Integer)
     inds = LinearIndices(A)
     inds == LinearIndices(B) || throw(DimensionMismatch("LinearIndices of A and B don't match"))
     dim > 1 && return copyto!(B, A)
-    (i1, state) = iterate(inds) # We checked earlier that A isn't empty
+    (i1, state) = iterate(inds)::NTuple{2,Any} # We checked earlier that A isn't empty
     cur_val = v1
     B[i1] = cur_val
     next = iterate(inds, state)
diff --git a/base/array.jl b/base/array.jl
index c88dd8cba5878f..b5a1ba31f0acf7 100644
--- a/base/array.jl
+++ b/base/array.jl
@@ -9,7 +9,7 @@ The objects called do not have matching dimensionality. Optional argument `msg`
 descriptive error string.
 """
 struct DimensionMismatch <: Exception
-    msg::AbstractString
+    msg::String
 end
 DimensionMismatch() = DimensionMismatch("")
 
@@ -54,6 +54,8 @@ Array
 
 One-dimensional dense array with elements of type `T`, often used to represent
 a mathematical vector. Alias for [`Array{T,1}`](@ref).
+
+See also [`empty`](@ref), [`similar`](@ref) and [`zero`](@ref) for creating vectors.
 """
 const Vector{T} = Array{T,1}
 
@@ -62,12 +64,28 @@ const Vector{T} = Array{T,1}
 
 Two-dimensional dense array with elements of type `T`, often used to represent
 a mathematical matrix. Alias for [`Array{T,2}`](@ref).
+
+See also [`fill`](@ref), [`zeros`](@ref), [`undef`](@ref) and [`similar`](@ref)
+for creating matrices.
 """
 const Matrix{T} = Array{T,2}
+
 """
     VecOrMat{T}
 
-Union type of [`Vector{T}`](@ref) and [`Matrix{T}`](@ref).
+Union type of [`Vector{T}`](@ref) and [`Matrix{T}`](@ref) which allows functions to accept either a Matrix or a Vector.
+
+# Examples
+```jldoctest
+julia> Vector{Float64} <: VecOrMat{Float64}
+true
+
+julia> Matrix{Float64} <: VecOrMat{Float64}
+true
+
+julia> Array{Float64, 3} <: VecOrMat{Float64}
+false
+```
 """
 const VecOrMat{T} = Union{Vector{T}, Matrix{T}}
 
@@ -102,29 +120,7 @@ const DenseVecOrMat{T} = Union{DenseVector{T}, DenseMatrix{T}}
 
 ## Basic functions ##
 
-"""
-    eltype(type)
-
-Determine the type of the elements generated by iterating a collection of the given `type`.
-For dictionary types, this will be a `Pair{KeyType,ValType}`. The definition
-`eltype(x) = eltype(typeof(x))` is provided for convenience so that instances can be passed
-instead of types. However the form that accepts a type argument should be defined for new
-types.
-
-# Examples
-```jldoctest
-julia> eltype(fill(1f0, (2,2)))
-Float32
-
-julia> eltype(fill(0x1, (2,2)))
-UInt8
-```
-"""
-eltype(::Type) = Any
-eltype(::Type{Bottom}) = throw(ArgumentError("Union{} does not have elements"))
-eltype(x) = eltype(typeof(x))
-
-import Core: arraysize, arrayset, arrayref, const_arrayref
+using Core: arraysize, arrayset, const_arrayref
 
 vect() = Vector{Any}()
 vect(X::T...) where {T} = T[ X[i] for i = 1:length(X) ]
@@ -154,11 +150,11 @@ end
 size(a::Array, d::Integer) = arraysize(a, convert(Int, d))
 size(a::Vector) = (arraysize(a,1),)
 size(a::Matrix) = (arraysize(a,1), arraysize(a,2))
-size(a::Array{<:Any,N}) where {N} = (@_inline_meta; ntuple(M -> size(a, M), Val(N))::Dims)
+size(a::Array{<:Any,N}) where {N} = (@inline; ntuple(M -> size(a, M), Val(N))::Dims)
 
 asize_from(a::Array, n) = n > ndims(a) ? () : (arraysize(a,n), asize_from(a, n+1)...)
 
-allocatedinline(T::Type) = (@_pure_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
+allocatedinline(T::Type) = (@_total_meta; ccall(:jl_stored_inline, Cint, (Any,), T) != Cint(0))
 
 """
     Base.isbitsunion(::Type{T})
@@ -178,7 +174,7 @@ isbitsunion(u::Union) = allocatedinline(u)
 isbitsunion(x) = false
 
 function _unsetindex!(A::Array{T}, i::Int) where {T}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, i)
     t = @_gc_preserve_begin A
     p = Ptr{Ptr{Cvoid}}(pointer(A, i))
@@ -197,17 +193,17 @@ end
 
 
 """
-    Base.bitsunionsize(U::Union)
+    Base.bitsunionsize(U::Union) -> Int
 
 For a `Union` of [`isbitstype`](@ref) types, return the size of the largest type; assumes `Base.isbitsunion(U) == true`.
 
 # Examples
 ```jldoctest
 julia> Base.bitsunionsize(Union{Float64, UInt8})
-0x0000000000000008
+8
 
 julia> Base.bitsunionsize(Union{Float64, UInt8, Int128})
-0x0000000000000010
+16
 ```
 """
 function bitsunionsize(u::Union)
@@ -216,12 +212,11 @@ function bitsunionsize(u::Union)
     return sz
 end
 
-length(a::Array) = arraylen(a)
-elsize(::Type{<:Array{T}}) where {T} = aligned_sizeof(T)
+elsize(@nospecialize _::Type{A}) where {T,A<:Array{T}} = aligned_sizeof(T)
 sizeof(a::Array) = Core.sizeof(a)
 
 function isassigned(a::Array, i::Int...)
-    @_inline_meta
+    @inline
     ii = (_sub2ind(size(a), i...) % UInt) - 1
     @boundscheck ii < length(a) % UInt || return false
     ccall(:jl_array_isassigned, Cint, (Any, UInt), a, ii) == 1
@@ -340,7 +335,7 @@ end
 # occurs, see discussion in #27874.
 # It is also mitigated by using a constant string.
 function _throw_argerror()
-    @_noinline_meta
+    @noinline
     throw(ArgumentError("Number of elements to copy must be nonnegative."))
 end
 
@@ -365,6 +360,8 @@ end
 Create a shallow copy of `x`: the outer structure is copied, but not all internal values.
 For example, copying an array produces a new array with identically-same elements as the
 original.
+
+See also [`copy!`](@ref Base.copy!), [`copyto!`](@ref).
 """
 copy
 
@@ -404,17 +401,20 @@ julia> getindex(Int8, 1, 2, 3)
 """
 function getindex(::Type{T}, vals...) where T
     a = Vector{T}(undef, length(vals))
-    @inbounds for i = 1:length(vals)
-        a[i] = vals[i]
+    if vals isa NTuple
+        @inbounds for i in 1:length(vals)
+            a[i] = vals[i]
+        end
+    else
+        # use afoldl to avoid type instability inside loop
+        afoldl(1, vals...) do i, v
+            @inbounds a[i] = v
+            return i + 1
+        end
     end
     return a
 end
 
-getindex(::Type{T}) where {T} = (@_inline_meta; Vector{T}())
-getindex(::Type{T}, x) where {T} = (@_inline_meta; a = Vector{T}(undef, 1); @inbounds a[1] = x; a)
-getindex(::Type{T}, x, y) where {T} = (@_inline_meta; a = Vector{T}(undef, 2); @inbounds (a[1] = x; a[2] = y); a)
-getindex(::Type{T}, x, y, z) where {T} = (@_inline_meta; a = Vector{T}(undef, 3); @inbounds (a[1] = x; a[2] = y; a[3] = z); a)
-
 function getindex(::Type{Any}, @nospecialize vals...)
     a = Vector{Any}(undef, length(vals))
     @inbounds for i = 1:length(vals)
@@ -433,14 +433,76 @@ to_dim(d::Integer) = d
 to_dim(d::OneTo) = last(d)
 
 """
-    fill(x, dims::Tuple)
-    fill(x, dims...)
+    fill(value, dims::Tuple)
+    fill(value, dims...)
+
+Create an array of size `dims` with every location set to `value`.
 
-Create an array filled with the value `x`. For example, `fill(1.0, (5,5))` returns a 5×5
-array of floats, with each element initialized to `1.0`.
+For example, `fill(1.0, (5,5))` returns a 5×5 array of floats,
+with `1.0` in every location of the array.
 
-`dims` may be specified as either a tuple or a sequence of arguments. For example,
-the common idiom `fill(x)` creates a zero-dimensional array containing the single value `x`.
+The dimension lengths `dims` may be specified as either a tuple or a sequence of arguments.
+An `N`-length tuple or `N` arguments following the `value` specify an `N`-dimensional
+array. Thus, a common idiom for creating a zero-dimensional array with its only location
+set to `x` is `fill(x)`.
+
+Every location of the returned array is set to (and is thus [`===`](@ref) to)
+the `value` that was passed; this means that if the `value` is itself modified,
+all elements of the `fill`ed array will reflect that modification because they're
+_still_ that very `value`. This is of no concern with `fill(1.0, (5,5))` as the
+`value` `1.0` is immutable and cannot itself be modified, but can be unexpected
+with mutable values like — most commonly — arrays.  For example, `fill([], 3)`
+places _the very same_ empty array in all three locations of the returned vector:
+
+```jldoctest
+julia> v = fill([], 3)
+3-element Vector{Vector{Any}}:
+ []
+ []
+ []
+
+julia> v[1] === v[2] === v[3]
+true
+
+julia> value = v[1]
+Any[]
+
+julia> push!(value, 867_5309)
+1-element Vector{Any}:
+ 8675309
+
+julia> v
+3-element Vector{Vector{Any}}:
+ [8675309]
+ [8675309]
+ [8675309]
+```
+
+To create an array of many independent inner arrays, use a [comprehension](@ref man-comprehensions) instead.
+This creates a new and distinct array on each iteration of the loop:
+
+```jldoctest
+julia> v2 = [[] for _ in 1:3]
+3-element Vector{Vector{Any}}:
+ []
+ []
+ []
+
+julia> v2[1] === v2[2] === v2[3]
+false
+
+julia> push!(v2[1], 8675309)
+1-element Vector{Any}:
+ 8675309
+
+julia> v2
+3-element Vector{Vector{Any}}:
+ [8675309]
+ []
+ []
+```
+
+See also: [`fill!`](@ref), [`zeros`](@ref), [`ones`](@ref), [`similar`](@ref).
 
 # Examples
 ```jldoctest
@@ -452,15 +514,15 @@ julia> fill(1.0, (2,3))
 julia> fill(42)
 0-dimensional Array{Int64, 0}:
 42
-```
 
-If `x` is an object reference, all elements will refer to the same object:
-```jldoctest
-julia> A = fill(zeros(2), 2);
+julia> A = fill(zeros(2), 2) # sets both elements to the same [0.0, 0.0] vector
+2-element Vector{Vector{Float64}}:
+ [0.0, 0.0]
+ [0.0, 0.0]
 
-julia> A[1][1] = 42; # modifies both A[1][1] and A[2][1]
+julia> A[1][1] = 42; # modifies the filled value to be [42.0, 0.0]
 
-julia> A
+julia> A # both A[1] and A[2] are the very same vector
 2-element Vector{Vector{Float64}}:
  [42.0, 0.0]
  [42.0, 0.0]
@@ -478,7 +540,7 @@ fill(v, dims::Tuple{}) = (a=Array{typeof(v),0}(undef, dims); fill!(a, v); a)
     zeros([T=Float64,] dims...)
 
 Create an `Array`, with element type `T`, of all zeros with size specified by `dims`.
-See also [`fill`](@ref), [`ones`](@ref).
+See also [`fill`](@ref), [`ones`](@ref), [`zero`](@ref).
 
 # Examples
 ```jldoctest
@@ -499,7 +561,7 @@ function zeros end
     ones([T=Float64,] dims...)
 
 Create an `Array`, with element type `T`, of all ones with size specified by `dims`.
-See also: [`fill`](@ref), [`zeros`](@ref).
+See also [`fill`](@ref), [`zeros`](@ref).
 
 # Examples
 ```jldoctest
@@ -552,6 +614,7 @@ oneunit(x::AbstractMatrix{T}) where {T} = _one(oneunit(T), x)
 ## Conversions ##
 
 convert(::Type{T}, a::AbstractArray) where {T<:Array} = a isa T ? a : T(a)
+convert(::Type{Union{}}, a::AbstractArray) = throw(MethodError(convert, (Union{}, a)))
 
 promote_rule(a::Type{Array{T,n}}, b::Type{Array{S,n}}) where {T,n,S} = el_same(promote_type(T,S), a, b)
 
@@ -582,23 +645,38 @@ julia> collect(Float64, 1:2:5)
 """
 collect(::Type{T}, itr) where {T} = _collect(T, itr, IteratorSize(itr))
 
-_collect(::Type{T}, itr, isz::HasLength) where {T} = copyto!(Vector{T}(undef, Int(length(itr)::Integer)), itr)
-_collect(::Type{T}, itr, isz::HasShape) where {T}  = copyto!(similar(Array{T}, axes(itr)), itr)
+_collect(::Type{T}, itr, isz::Union{HasLength,HasShape}) where {T} =
+    copyto!(_array_for(T, isz, _similar_shape(itr, isz)), itr)
 function _collect(::Type{T}, itr, isz::SizeUnknown) where T
     a = Vector{T}()
     for x in itr
-        push!(a,x)
+        push!(a, x)
     end
     return a
 end
 
 # make a collection similar to `c` and appropriate for collecting `itr`
-_similar_for(c::AbstractArray, ::Type{T}, itr, ::SizeUnknown) where {T} = similar(c, T, 0)
-_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasLength) where {T} =
-    similar(c, T, Int(length(itr)::Integer))
-_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasShape) where {T} =
-    similar(c, T, axes(itr))
-_similar_for(c, ::Type{T}, itr, isz) where {T} = similar(c, T)
+_similar_for(c, ::Type{T}, itr, isz, shp) where {T} = similar(c, T)
+
+_similar_shape(itr, ::SizeUnknown) = nothing
+_similar_shape(itr, ::HasLength) = length(itr)::Integer
+_similar_shape(itr, ::HasShape) = axes(itr)
+
+_similar_for(c::AbstractArray, ::Type{T}, itr, ::SizeUnknown, ::Nothing) where {T} =
+    similar(c, T, 0)
+_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasLength, len::Integer) where {T} =
+    similar(c, T, len)
+_similar_for(c::AbstractArray, ::Type{T}, itr, ::HasShape, axs) where {T} =
+    similar(c, T, axs)
+
+# make a collection appropriate for collecting `itr::Generator`
+_array_for(::Type{T}, ::SizeUnknown, ::Nothing) where {T} = Vector{T}(undef, 0)
+_array_for(::Type{T}, ::HasLength, len::Integer) where {T} = Vector{T}(undef, Int(len))
+_array_for(::Type{T}, ::HasShape{N}, axs) where {T,N} = similar(Array{T,N}, axs)
+
+# used by syntax lowering for simple typed comprehensions
+_array_for(::Type{T}, itr, isz) where {T} = _array_for(T, isz, _similar_shape(itr, isz))
+
 
 """
     collect(collection)
@@ -608,6 +686,8 @@ Return an `Array` of all items in a collection or iterator. For dictionaries, re
 [`HasShape`](@ref IteratorSize) trait, the result will have the same shape
 and number of dimensions as the argument.
 
+Used by comprehensions to turn a generator into an `Array`.
+
 # Examples
 ```jldoctest
 julia> collect(1:2:13)
@@ -619,6 +699,13 @@ julia> collect(1:2:13)
   9
  11
  13
+
+julia> [x^2 for x in 1:8 if isodd(x)]
+4-element Vector{Int64}:
+  1
+  9
+ 25
+ 49
 ```
 """
 collect(itr) = _collect(1:1 #= Array =#, itr, IteratorEltype(itr), IteratorSize(itr))
@@ -628,10 +715,10 @@ collect(A::AbstractArray) = _collect_indices(axes(A), A)
 collect_similar(cont, itr) = _collect(cont, itr, IteratorEltype(itr), IteratorSize(itr))
 
 _collect(cont, itr, ::HasEltype, isz::Union{HasLength,HasShape}) =
-    copyto!(_similar_for(cont, eltype(itr), itr, isz), itr)
+    copyto!(_similar_for(cont, eltype(itr), itr, isz, _similar_shape(itr, isz)), itr)
 
 function _collect(cont, itr, ::HasEltype, isz::SizeUnknown)
-    a = _similar_for(cont, eltype(itr), itr, isz)
+    a = _similar_for(cont, eltype(itr), itr, isz, nothing)
     for x in itr
         push!(a,x)
     end
@@ -646,6 +733,20 @@ function _collect_indices(indsA, A)
     copyto!(B, CartesianIndices(axes(B)), A, CartesianIndices(indsA))
 end
 
+# NOTE: this function is not meant to be called, only inferred, for the
+# purpose of bounding the types of values generated by an iterator.
+function _iterator_upper_bound(itr)
+    x = iterate(itr)
+    while x !== nothing
+        val = getfield(x, 1)
+        if inferencebarrier(nothing)
+            return val
+        end
+        x = iterate(itr, getfield(x, 2))
+    end
+    throw(nothing)
+end
+
 # define this as a macro so that the call to Core.Compiler
 # gets inlined into the caller before recursion detection
 # gets a chance to see it, so that recursive calls to the caller
@@ -655,10 +756,11 @@ if isdefined(Core, :Compiler)
         I = esc(itr)
         return quote
             if $I isa Generator && ($I).f isa Type
-                ($I).f
+                T = ($I).f
             else
-                Core.Compiler.return_type(first, Tuple{typeof($I)})
+                T = Core.Compiler.return_type(_iterator_upper_bound, Tuple{typeof($I)})
             end
+            promote_typejoin_union(T)
         end
     end
 else
@@ -666,7 +768,7 @@ else
         I = esc(itr)
         return quote
             if $I isa Generator && ($I).f isa Type
-                ($I).f
+                promote_typejoin_union($I.f)
             else
                 Any
             end
@@ -674,34 +776,44 @@ else
     end
 end
 
-_array_for(::Type{T}, itr, ::HasLength) where {T} = Vector{T}(undef, Int(length(itr)::Integer))
-_array_for(::Type{T}, itr, ::HasShape{N}) where {T,N} = similar(Array{T,N}, axes(itr))
-
 function collect(itr::Generator)
     isz = IteratorSize(itr.iter)
     et = @default_eltype(itr)
     if isa(isz, SizeUnknown)
         return grow_to!(Vector{et}(), itr)
     else
+        shp = _similar_shape(itr, isz)
         y = iterate(itr)
         if y === nothing
-            return _array_for(et, itr.iter, isz)
+            return _array_for(et, isz, shp)
         end
         v1, st = y
-        collect_to_with_first!(_array_for(typeof(v1), itr.iter, isz), v1, itr, st)
+        dest = _array_for(typeof(v1), isz, shp)
+        # The typeassert gives inference a helping hand on the element type and dimensionality
+        # (work-around for #28382)
+        et′ = et <: Type ? Type : et
+        RT = dest isa AbstractArray ? AbstractArray{<:et′, ndims(dest)} : Any
+        collect_to_with_first!(dest, v1, itr, st)::RT
     end
 end
 
 _collect(c, itr, ::EltypeUnknown, isz::SizeUnknown) =
-    grow_to!(_similar_for(c, @default_eltype(itr), itr, isz), itr)
+    grow_to!(_similar_for(c, @default_eltype(itr), itr, isz, nothing), itr)
 
 function _collect(c, itr, ::EltypeUnknown, isz::Union{HasLength,HasShape})
+    et = @default_eltype(itr)
+    shp = _similar_shape(itr, isz)
     y = iterate(itr)
     if y === nothing
-        return _similar_for(c, @default_eltype(itr), itr, isz)
+        return _similar_for(c, et, itr, isz, shp)
     end
     v1, st = y
-    collect_to_with_first!(_similar_for(c, typeof(v1), itr, isz), v1, itr, st)
+    dest = _similar_for(c, typeof(v1), itr, isz, shp)
+    # The typeassert gives inference a helping hand on the element type and dimensionality
+    # (work-around for #28382)
+    et′ = et <: Type ? Type : et
+    RT = dest isa AbstractArray ? AbstractArray{<:et′, ndims(dest)} : Any
+    collect_to_with_first!(dest, v1, itr, st)::RT
 end
 
 function collect_to_with_first!(dest::AbstractArray, v1, itr, st)
@@ -716,7 +828,7 @@ function collect_to_with_first!(dest, v1, itr, st)
 end
 
 function setindex_widen_up_to(dest::AbstractArray{T}, el, i) where T
-    @_inline_meta
+    @inline
     new = similar(dest, promote_typejoin(T, typeof(el)))
     f = first(LinearIndices(dest))
     copyto!(new, first(LinearIndices(new)), dest, f, i-f)
@@ -732,8 +844,8 @@ function collect_to!(dest::AbstractArray{T}, itr, offs, st) where T
         y = iterate(itr, st)
         y === nothing && break
         el, st = y
-        if el isa T || typeof(el) === T
-            @inbounds dest[i] = el::T
+        if el isa T
+            @inbounds dest[i] = el
             i += 1
         else
             new = setindex_widen_up_to(dest, el, i)
@@ -752,7 +864,7 @@ function grow_to!(dest, itr)
 end
 
 function push_widen(dest, el)
-    @_inline_meta
+    @inline
     new = sizehint!(empty(dest, promote_typejoin(eltype(dest), typeof(el))), length(dest))
     if new isa AbstractSet
         # TODO: merge back these two branches when copy! is re-enabled for sets/vectors
@@ -769,8 +881,8 @@ function grow_to!(dest, itr, st)
     y = iterate(itr, st)
     while y !== nothing
         el, st = y
-        if el isa T || typeof(el) === T
-            push!(dest, el::T)
+        if el isa T
+            push!(dest, el)
         else
             new = push_widen(dest, el)
             return grow_to!(new, itr, st)
@@ -782,7 +894,7 @@ end
 
 ## Iteration ##
 
-iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
+iterate(A::Array, i=1) = (@inline; (i % UInt) - 1 < length(A) ? (@inbounds A[i], i + 1) : nothing)
 
 ## Indexing: getindex ##
 
@@ -792,6 +904,8 @@ iterate(A::Array, i=1) = (@_inline_meta; (i % UInt) - 1 < length(A) ? (@inbounds
 Retrieve the value(s) stored at the given key or index within a collection. The syntax
 `a[i,j,...]` is converted by the compiler to `getindex(a, i, j, ...)`.
 
+See also [`get`](@ref), [`keys`](@ref), [`eachindex`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = Dict("a" => 1, "b" => 2)
@@ -805,21 +919,21 @@ julia> getindex(A, "a")
 """
 function getindex end
 
-# This is more complicated than it needs to be in order to get Win64 through bootstrap
-@eval getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)
-@eval getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@_inline_meta; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))
-
-# Faster contiguous indexing using copyto! for UnitRange and Colon
-function getindex(A::Array, I::UnitRange{Int})
-    @_inline_meta
+# Faster contiguous indexing using copyto! for AbstractUnitRange and Colon
+function getindex(A::Array, I::AbstractUnitRange{<:Integer})
+    @inline
     @boundscheck checkbounds(A, I)
     lI = length(I)
-    X = similar(A, lI)
+    X = similar(A, axes(I))
     if lI > 0
-        unsafe_copyto!(X, 1, A, first(I), lI)
+        copyto!(X, firstindex(X), A, first(I), lI)
     end
     return X
 end
+
+# getindex for carrying out logical indexing for AbstractUnitRange{Bool} as Bool <: Integer
+getindex(a::Array, r::AbstractUnitRange{Bool}) = getindex(a, to_index(r))
+
 function getindex(A::Array, c::Colon)
     lI = length(A)
     X = similar(A, lI)
@@ -846,7 +960,7 @@ function setindex! end
 
 @eval setindex!(A::Array{T}, x, i1::Int) where {T} = arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1)
 @eval setindex!(A::Array{T}, x, i1::Int, i2::Int, I::Int...) where {T} =
-    (@_inline_meta; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
+    (@inline; arrayset($(Expr(:boundscheck)), A, convert(T,x)::T, i1, i2, I...))
 
 # This is redundant with the abstract fallbacks but needed and helpful for bootstrap
 function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
@@ -865,8 +979,8 @@ function setindex!(A::Array, X::AbstractArray, I::AbstractVector{Int})
 end
 
 # Faster contiguous setindex! with copyto!
-function setindex!(A::Array{T}, X::Array{T}, I::UnitRange{Int}) where T
-    @_inline_meta
+function setindex!(A::Array{T}, X::Array{T}, I::AbstractUnitRange{Int}) where T
+    @inline
     @boundscheck checkbounds(A, I)
     lI = length(I)
     @boundscheck setindex_shape_check(X, lI)
@@ -876,7 +990,7 @@ function setindex!(A::Array{T}, X::Array{T}, I::UnitRange{Int}) where T
     return A
 end
 function setindex!(A::Array{T}, X::Array{T}, c::Colon) where T
-    @_inline_meta
+    @inline
     lI = length(A)
     @boundscheck setindex_shape_check(X, lI)
     if lI > 0
@@ -926,6 +1040,10 @@ julia> push!([1, 2, 3], 4, 5, 6)
 If `collection` is ordered, use [`append!`](@ref) to add all the elements of another
 collection to it. The result of the preceding example is equivalent to `append!([1, 2, 3], [4,
 5, 6])`. For `AbstractSet` objects, [`union!`](@ref) can be used instead.
+
+See [`sizehint!`](@ref) for notes about the performance model.
+
+See also [`pushfirst!`](@ref).
 """
 function push! end
 
@@ -933,7 +1051,7 @@ function push!(a::Array{T,1}, item) where T
     # convert first so we don't grow the array if the assignment won't work
     itemT = convert(T, item)
     _growend!(a, 1)
-    a[end] = itemT
+    @inbounds a[end] = itemT
     return a
 end
 
@@ -944,19 +1062,23 @@ function push!(a::Array{Any,1}, @nospecialize item)
 end
 
 """
-    append!(collection, collection2) -> collection.
+    append!(collection, collections...) -> collection.
 
-For an ordered container `collection`, add the elements of `collection2` to the end of it.
+For an ordered container `collection`, add the elements of each `collections`
+to the end of it.
+
+!!! compat "Julia 1.6"
+    Specifying multiple collections to be appended requires at least Julia 1.6.
 
 # Examples
 ```jldoctest
-julia> append!([1],[2,3])
+julia> append!([1], [2, 3])
 3-element Vector{Int64}:
  1
  2
  3
 
-julia> append!([1, 2, 3], [4, 5, 6])
+julia> append!([1, 2, 3], [4, 5], [6])
 6-element Vector{Int64}:
  1
  2
@@ -969,6 +1091,11 @@ julia> append!([1, 2, 3], [4, 5, 6])
 Use [`push!`](@ref) to add individual items to `collection` which are not already
 themselves in another collection. The result of the preceding example is equivalent to
 `push!([1, 2, 3], 4, 5, 6)`.
+
+See [`sizehint!`](@ref) for notes about the performance model.
+
+See also [`vcat`](@ref) for vectors, [`union!`](@ref) for sets,
+and [`prepend!`](@ref) and [`pushfirst!`](@ref) for the opposite order.
 """
 function append!(a::Vector, items::AbstractVector)
     itemindices = eachindex(items)
@@ -981,6 +1108,8 @@ end
 append!(a::AbstractVector, iter) = _append!(a, IteratorSize(iter), iter)
 push!(a::AbstractVector, iter...) = append!(a, iter)
 
+append!(a::AbstractVector, iter...) = foldl(append!, iter, init=a)
+
 function _append!(a, ::Union{HasLength,HasShape}, iter)
     n = length(a)
     i = lastindex(a)
@@ -999,17 +1128,32 @@ function _append!(a, ::IteratorSize, iter)
 end
 
 """
-    prepend!(a::Vector, items) -> collection
+    prepend!(a::Vector, collections...) -> collection
 
-Insert the elements of `items` to the beginning of `a`.
+Insert the elements of each `collections` to the beginning of `a`.
+
+When `collections` specifies multiple collections, order is maintained:
+elements of `collections[1]` will appear leftmost in `a`, and so on.
+
+!!! compat "Julia 1.6"
+    Specifying multiple collections to be prepended requires at least Julia 1.6.
 
 # Examples
 ```jldoctest
-julia> prepend!([3],[1,2])
+julia> prepend!([3], [1, 2])
 3-element Vector{Int64}:
  1
  2
  3
+
+julia> prepend!([6], [1, 2], [3, 4, 5])
+6-element Vector{Int64}:
+ 1
+ 2
+ 3
+ 4
+ 5
+ 6
 ```
 """
 function prepend! end
@@ -1029,6 +1173,8 @@ end
 prepend!(a::Vector, iter) = _prepend!(a, IteratorSize(iter), iter)
 pushfirst!(a::Vector, iter...) = prepend!(a, iter)
 
+prepend!(a::AbstractVector, iter...) = foldr((v, a) -> prepend!(a, v), iter, init=a)
+
 function _prepend!(a, ::Union{HasLength,HasShape}, iter)
     require_one_based_indexing(a)
     n = length(iter)
@@ -1093,9 +1239,22 @@ function resize!(a::Vector, nl::Integer)
 end
 
 """
-    sizehint!(s, n)
+    sizehint!(s, n) -> s
 
 Suggest that collection `s` reserve capacity for at least `n` elements. This can improve performance.
+
+# Notes on the performance model
+
+For types that support `sizehint!`,
+
+1. `push!` and `append!` methods generally may (but are not required to) preallocate extra
+   storage. For types implemented in `Base`, they typically do, using a heuristic optimized for
+   a general use case.
+
+2. `sizehint!` may control this preallocation. Again, it typically does this for types in
+   `Base`.
+
+3. `empty!` is nearly costless (and O(1)) for types that support this kind of preallocation.
 """
 function sizehint! end
 
@@ -1108,7 +1267,10 @@ end
     pop!(collection) -> item
 
 Remove an item in `collection` and return it. If `collection` is an
-ordered container, the last item is returned.
+ordered container, the last item is returned; for unordered containers,
+an arbitrary element is returned.
+
+See also: [`popfirst!`](@ref), [`popat!`](@ref), [`delete!`](@ref), [`deleteat!`](@ref), [`splice!`](@ref), and [`push!`](@ref).
 
 # Examples
 ```jldoctest
@@ -1158,7 +1320,8 @@ Remove the item at the given `i` and return it. Subsequent items
 are shifted to fill the resulting gap.
 When `i` is not a valid index for `a`, return `default`, or throw an error if
 `default` is not specified.
-See also [`deleteat!`](@ref) and [`splice!`](@ref).
+
+See also: [`pop!`](@ref), [`popfirst!`](@ref), [`deleteat!`](@ref), [`splice!`](@ref).
 
 !!! compat "Julia 1.5"
     This function is available as of Julia 1.5.
@@ -1203,6 +1366,8 @@ end
 
 Insert one or more `items` at the beginning of `collection`.
 
+This function is called `unshift` in many other programming languages.
+
 # Examples
 ```jldoctest
 julia> pushfirst!([1, 2, 3, 4], 5, 6)
@@ -1227,6 +1392,10 @@ end
 
 Remove the first `item` from `collection`.
 
+This function is called `shift` in many other programming languages.
+
+See also: [`pop!`](@ref), [`popat!`](@ref), [`delete!`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1, 2, 3, 4, 5, 6]
@@ -1265,16 +1434,19 @@ end
 Insert an `item` into `a` at the given `index`. `index` is the index of `item` in
 the resulting `a`.
 
+See also: [`push!`](@ref), [`replace`](@ref), [`popat!`](@ref), [`splice!`](@ref).
+
 # Examples
 ```jldoctest
-julia> insert!([6, 5, 4, 2, 1], 4, 3)
-6-element Vector{Int64}:
- 6
- 5
- 4
- 3
- 2
+julia> insert!(Any[1:6;], 3, "here")
+7-element Vector{Any}:
  1
+ 2
+  "here"
+ 3
+ 4
+ 5
+ 6
 ```
 """
 function insert!(a::Array{T,1}, i::Integer, item) where T
@@ -1292,6 +1464,8 @@ end
 Remove the item at the given `i` and return the modified `a`. Subsequent items
 are shifted to fill the resulting gap.
 
+See also: [`delete!`](@ref), [`popat!`](@ref), [`splice!`](@ref).
+
 # Examples
 ```jldoctest
 julia> deleteat!([6, 5, 4, 3, 2, 1], 2)
@@ -1303,14 +1477,24 @@ julia> deleteat!([6, 5, 4, 3, 2, 1], 2)
  1
 ```
 """
-deleteat!(a::Vector, i::Integer) = (_deleteat!(a, i, 1); a)
-
-function deleteat!(a::Vector, r::UnitRange{<:Integer})
-    n = length(a)
-    isempty(r) || _deleteat!(a, first(r), length(r))
+function deleteat!(a::Vector, i::Integer)
+    i isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
+    _deleteat!(a, i, 1)
     return a
 end
 
+function deleteat!(a::Vector, r::AbstractUnitRange{<:Integer})
+    if eltype(r) === Bool
+        return invoke(deleteat!, Tuple{Vector, AbstractVector{Bool}}, a, r)
+    else
+        n = length(a)
+        f = first(r)
+        f isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
+        isempty(r) || _deleteat!(a, f, length(r))
+        return a
+    end
+end
+
 """
     deleteat!(a::Vector, inds)
 
@@ -1345,6 +1529,23 @@ deleteat!(a::Vector, inds::AbstractVector) = _deleteat!(a, to_indices(a, (inds,)
 
 struct Nowhere; end
 push!(::Nowhere, _) = nothing
+_growend!(::Nowhere, _) = nothing
+
+@inline function _push_deleted!(dltd, a::Vector, ind)
+    if @inbounds isassigned(a, ind)
+        push!(dltd, @inbounds a[ind])
+    else
+        _growend!(dltd, 1)
+    end
+end
+
+@inline function _copy_item!(a::Vector, p, q)
+    if @inbounds isassigned(a, q)
+        @inbounds a[p] = a[q]
+    else
+        _unsetindex!(a, p)
+    end
+end
 
 function _deleteat!(a::Vector, inds, dltd=Nowhere())
     n = length(a)
@@ -1352,7 +1553,7 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
     y === nothing && return a
     (p, s) = y
     checkbounds(a, p)
-    push!(dltd, @inbounds a[p])
+    _push_deleted!(dltd, a, p)
     q = p+1
     while true
         y = iterate(inds, s)
@@ -1366,14 +1567,14 @@ function _deleteat!(a::Vector, inds, dltd=Nowhere())
             end
         end
         while q < i
-            @inbounds a[p] = a[q]
+            _copy_item!(a, p, q)
             p += 1; q += 1
         end
-        push!(dltd, @inbounds a[i])
+        _push_deleted!(dltd, a, i)
         q = i+1
     end
     while q <= n
-        @inbounds a[p] = a[q]
+        _copy_item!(a, p, q)
         p += 1; q += 1
     end
     _deleteend!(a, n-p+1)
@@ -1386,7 +1587,7 @@ function deleteat!(a::Vector, inds::AbstractVector{Bool})
     length(inds) == n || throw(BoundsError(a, inds))
     p = 1
     for (q, i) in enumerate(inds)
-        @inbounds a[p] = a[q]
+        _copy_item!(a, p, q)
         p += !i
     end
     _deleteend!(a, n-p+1)
@@ -1403,6 +1604,8 @@ Subsequent items are shifted left to fill the resulting gap.
 If specified, replacement values from an ordered
 collection will be spliced in place of the removed item.
 
+See also: [`replace`](@ref), [`delete!`](@ref), [`deleteat!`](@ref), [`pop!`](@ref), [`popat!`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [6, 5, 4, 3, 2, 1]; splice!(A, 5)
@@ -1469,7 +1672,7 @@ Remove items at specified indices, and return a collection containing
 the removed items.
 Subsequent items are shifted left to fill the resulting gaps.
 If specified, replacement values from an ordered collection will be spliced in
-place of the removed items; in this case, `indices` must be a `UnitRange`.
+place of the removed items; in this case, `indices` must be a `AbstractUnitRange`.
 
 To insert `replacement` before an index `n` without removing any items, use
 `splice!(collection, n:n-1, replacement)`.
@@ -1477,6 +1680,9 @@ To insert `replacement` before an index `n` without removing any items, use
 !!! compat "Julia 1.5"
     Prior to Julia 1.5, `indices` must always be a `UnitRange`.
 
+!!! compat "Julia 1.8"
+    Prior to Julia 1.8, `indices` must be a `UnitRange` if splicing in replacement values.
+
 # Examples
 ```jldoctest
 julia> A = [-1, -2, -3, 5, 4, 3, -1]; splice!(A, 4:3, 2)
@@ -1494,7 +1700,7 @@ julia> A
  -1
 ```
 """
-function splice!(a::Vector, r::UnitRange{<:Integer}, ins=_default_splice)
+function splice!(a::Vector, r::AbstractUnitRange{<:Integer}, ins=_default_splice)
     v = a[r]
     m = length(ins)
     if m == 0
@@ -1552,7 +1758,7 @@ end
     reverse(v [, start=1 [, stop=length(v) ]] )
 
 Return a copy of `v` reversed from start to stop.  See also [`Iterators.reverse`](@ref)
-for reverse-order iteration without making a copy.
+for reverse-order iteration without making a copy, and in-place [`reverse!`](@ref).
 
 # Examples
 ```jldoctest
@@ -1696,7 +1902,7 @@ function vcat(arrays::Vector{T}...) where T
     return arr
 end
 
-_cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(x->1, n-1)..., length(x)))
+_cat(n::Integer, x::Integer...) = reshape([x...], (ntuple(Returns(1), n-1)..., length(x)))
 
 ## find ##
 
@@ -1732,18 +1938,7 @@ julia> findnext(A, CartesianIndex(1, 1))
 CartesianIndex(2, 1)
 ```
 """
-function findnext(A, start)
-    l = last(keys(A))
-    i = oftype(l, start)
-    i > l && return nothing
-    while true
-        A[i] && return i
-        i == l && break
-        # nextind(A, l) can throw/overflow
-        i = nextind(A, i)
-    end
-    return nothing
-end
+findnext(A, start) = findnext(identity, A, start)
 
 """
     findfirst(A)
@@ -1755,6 +1950,8 @@ To search for other kinds of values, pass a predicate as the first argument.
 Indices or keys are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findall`](@ref), [`findnext`](@ref), [`findlast`](@ref), [`searchsortedfirst`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [false, false, true, false]
@@ -1778,14 +1975,7 @@ julia> findfirst(A)
 CartesianIndex(2, 1)
 ```
 """
-function findfirst(A)
-    for (i, a) in pairs(A)
-        if a
-            return i
-        end
-    end
-    return nothing
-end
+findfirst(A) = findfirst(identity, A)
 
 # Needed for bootstrap, and allows defining only an optimized findnext method
 findfirst(A::AbstractArray) = findnext(A, first(keys(A)))
@@ -1877,7 +2067,7 @@ findfirst(p::Union{Fix2{typeof(isequal),Int},Fix2{typeof(==),Int}}, r::OneTo{Int
     1 <= p.x <= r.stop ? p.x : nothing
 
 findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::AbstractUnitRange) where {T<:Integer} =
-    first(r) <= p.x <= last(r) ? 1+Int(p.x - first(r)) : nothing
+    first(r) <= p.x <= last(r) ? firstindex(r) + Int(p.x - first(r)) : nothing
 
 function findfirst(p::Union{Fix2{typeof(isequal),T},Fix2{typeof(==),T}}, r::StepRange{T,S}) where {T,S}
     isempty(r) && return nothing
@@ -1896,6 +2086,8 @@ or `nothing` if not found.
 Indices are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findnext`](@ref), [`findfirst`](@ref), [`findall`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [false, false, true, true]
@@ -1919,18 +2111,7 @@ julia> findprev(A, CartesianIndex(2, 1))
 CartesianIndex(2, 1)
 ```
 """
-function findprev(A, start)
-    f = first(keys(A))
-    i = oftype(f, start)
-    i < f && return nothing
-    while true
-        A[i] && return i
-        i == f && break
-        # prevind(A, f) can throw/underflow
-        i = prevind(A, i)
-    end
-    return nothing
-end
+findprev(A, start) = findprev(identity, A, start)
 
 """
     findlast(A)
@@ -1941,6 +2122,8 @@ Return `nothing` if there is no `true` value in `A`.
 Indices or keys are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findfirst`](@ref), [`findprev`](@ref), [`findall`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [true, false, true, false]
@@ -1966,14 +2149,7 @@ julia> findlast(A)
 CartesianIndex(2, 1)
 ```
 """
-function findlast(A)
-    for (i, a) in Iterators.reverse(pairs(A))
-        if a
-            return i
-        end
-    end
-    return nothing
-end
+findlast(A) = findlast(identity, A)
 
 # Needed for bootstrap, and allows defining only an optimized findprev method
 findlast(A::AbstractArray) = findprev(A, last(keys(A)))
@@ -2119,6 +2295,10 @@ julia> findall(x -> x >= 0, d)
 """
 findall(testf::Function, A) = collect(first(p) for p in pairs(A) if testf(last(p)))
 
+# Broadcasting is much faster for small testf, and computing
+# integer indices from logical index using findall has a negligible cost
+findall(testf::Function, A::AbstractArray) = findall(testf.(A))
+
 """
     findall(A)
 
@@ -2129,6 +2309,8 @@ To search for other kinds of values, pass a predicate as the first argument.
 Indices or keys are of the same type as those returned by [`keys(A)`](@ref)
 and [`pairs(A)`](@ref).
 
+See also: [`findfirst`](@ref), [`searchsorted`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [true, false, false, true]
@@ -2160,6 +2342,7 @@ Int64[]
 function findall(A)
     collect(first(p) for p in pairs(A) if last(p))
 end
+
 # Allocating result upfront is faster (possible only when collection can be iterated twice)
 function findall(A::AbstractArray{Bool})
     n = count(A)
@@ -2178,140 +2361,6 @@ findall(x::Bool) = x ? [1] : Vector{Int}()
 findall(testf::Function, x::Number) = testf(x) ? [1] : Vector{Int}()
 findall(p::Fix2{typeof(in)}, x::Number) = x in p.x ? [1] : Vector{Int}()
 
-"""
-    findmax(itr) -> (x, index)
-
-Return the maximum element of the collection `itr` and its index. If there are multiple
-maximal elements, then the first one will be returned.
-If any data element is `NaN`, this element is returned.
-The result is in line with `max`.
-
-The collection must not be empty.
-
-# Examples
-```jldoctest
-julia> findmax([8,0.1,-9,pi])
-(8.0, 1)
-
-julia> findmax([1,7,7,6])
-(7, 2)
-
-julia> findmax([1,7,7,NaN])
-(NaN, 4)
-```
-"""
-findmax(a) = _findmax(a, :)
-
-function _findmax(a, ::Colon)
-    p = pairs(a)
-    y = iterate(p)
-    if y === nothing
-        throw(ArgumentError("collection must be non-empty"))
-    end
-    (mi, m), s = y
-    i = mi
-    while true
-        y = iterate(p, s)
-        y === nothing && break
-        m != m && break
-        (i, ai), s = y
-        if ai != ai || isless(m, ai)
-            m = ai
-            mi = i
-        end
-    end
-    return (m, mi)
-end
-
-"""
-    findmin(itr) -> (x, index)
-
-Return the minimum element of the collection `itr` and its index. If there are multiple
-minimal elements, then the first one will be returned.
-If any data element is `NaN`, this element is returned.
-The result is in line with `min`.
-
-The collection must not be empty.
-
-# Examples
-```jldoctest
-julia> findmin([8,0.1,-9,pi])
-(-9.0, 3)
-
-julia> findmin([7,1,1,6])
-(1, 2)
-
-julia> findmin([7,1,1,NaN])
-(NaN, 4)
-```
-"""
-findmin(a) = _findmin(a, :)
-
-function _findmin(a, ::Colon)
-    p = pairs(a)
-    y = iterate(p)
-    if y === nothing
-        throw(ArgumentError("collection must be non-empty"))
-    end
-    (mi, m), s = y
-    i = mi
-    while true
-        y = iterate(p, s)
-        y === nothing && break
-        m != m && break
-        (i, ai), s = y
-        if ai != ai || isless(ai, m)
-            m = ai
-            mi = i
-        end
-    end
-    return (m, mi)
-end
-
-"""
-    argmax(itr) -> Integer
-
-Return the index of the maximum element in a collection. If there are multiple maximal
-elements, then the first one will be returned.
-
-The collection must not be empty.
-
-# Examples
-```jldoctest
-julia> argmax([8,0.1,-9,pi])
-1
-
-julia> argmax([1,7,7,6])
-2
-
-julia> argmax([1,7,7,NaN])
-4
-```
-"""
-argmax(a) = findmax(a)[2]
-
-"""
-    argmin(itr) -> Integer
-
-Return the index of the minimum element in a collection. If there are multiple minimal
-elements, then the first one will be returned.
-
-The collection must not be empty.
-
-# Examples
-```jldoctest
-julia> argmin([8,0.1,-9,pi])
-3
-
-julia> argmin([7,1,1,6])
-2
-
-julia> argmin([7,1,1,NaN])
-4
-```
-"""
-argmin(a) = findmin(a)[2]
-
 # similar to Matlab's ismember
 """
     indexin(a, b)
@@ -2320,6 +2369,8 @@ Return an array containing the first index in `b` for
 each value in `a` that is a member of `b`. The output
 array contains `nothing` wherever `a` is not a member of `b`.
 
+See also: [`sortperm`](@ref), [`findfirst`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = ['a', 'b', 'c', 'b', 'd', 'a'];
@@ -2416,7 +2467,8 @@ function findall(pred::Fix2{typeof(in),<:Union{Array{<:Real},Real}}, x::Array{<:
 end
 # issorted fails for some element types so the method above has to be restricted
 # to element with isless/< defined.
-findall(pred::Fix2{typeof(in)}, x::Union{AbstractArray, Tuple}) = _findin(x, pred.x)
+findall(pred::Fix2{typeof(in)}, x::AbstractArray) = _findin(x, pred.x)
+findall(pred::Fix2{typeof(in)}, x::Tuple) = _findin(x, pred.x)
 
 # Copying subregions
 function indcopy(sz::Dims, I::Vector)
@@ -2452,6 +2504,8 @@ The function `f` is passed one argument.
 !!! compat "Julia 1.4"
     Support for `a` as a tuple requires at least Julia 1.4.
 
+See also: [`filter!`](@ref), [`Iterators.filter`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1:10
@@ -2528,6 +2582,56 @@ function filter!(f, a::AbstractVector)
     return a
 end
 
+"""
+    keepat!(a::Vector, inds)
+    keepat!(a::BitVector, inds)
+
+Remove the items at all the indices which are not given by `inds`,
+and return the modified `a`.
+Items which are kept are shifted to fill the resulting gaps.
+
+`inds` must be an iterator of sorted and unique integer indices.
+See also [`deleteat!`](@ref).
+
+!!! compat "Julia 1.7"
+    This function is available as of Julia 1.7.
+
+# Examples
+```jldoctest
+julia> keepat!([6, 5, 4, 3, 2, 1], 1:2:5)
+3-element Vector{Int64}:
+ 6
+ 4
+ 2
+```
+"""
+keepat!(a::Vector, inds) = _keepat!(a, inds)
+
+"""
+    keepat!(a::Vector, m::AbstractVector{Bool})
+    keepat!(a::BitVector, m::AbstractVector{Bool})
+
+The in-place version of logical indexing `a = a[m]`. That is, `keepat!(a, m)` on
+vectors of equal length `a` and `m` will remove all elements from `a` for which
+`m` at the corresponding index is `false`.
+
+# Examples
+```jldoctest
+julia> a = [:a, :b, :c];
+
+julia> keepat!(a, [true, false, true])
+2-element Vector{Symbol}:
+ :a
+ :c
+
+julia> a
+2-element Vector{Symbol}:
+ :a
+ :c
+```
+"""
+keepat!(a::Vector, m::AbstractVector{Bool}) = _keepat!(a, m)
+
 # set-like operators for vectors
 # These are moderately efficient, preserve order, and remove dupes.
 
@@ -2561,19 +2665,27 @@ function _shrink!(shrinker!, v::AbstractVector, itrs)
     seen = Set{eltype(v)}()
     filter!(_grow_filter!(seen), v)
     shrinker!(seen, itrs...)
-    filter!(_in(seen), v)
+    filter!(in(seen), v)
 end
 
 intersect!(v::AbstractVector, itrs...) = _shrink!(intersect!, v, itrs)
 setdiff!(  v::AbstractVector, itrs...) = _shrink!(setdiff!, v, itrs)
 
-vectorfilter(f, v::AbstractVector) = filter(f, v) # TODO: do we want this special case?
-vectorfilter(f, v) = [x for x in v if f(x)]
+vectorfilter(T::Type, f, v) = T[x for x in v if f(x)]
 
 function _shrink(shrinker!, itr, itrs)
-    keep = shrinker!(Set(itr), itrs...)
-    vectorfilter(_shrink_filter!(keep), itr)
+    T = promote_eltype(itr, itrs...)
+    keep = shrinker!(Set{T}(itr), itrs...)
+    vectorfilter(T, _shrink_filter!(keep), itr)
 end
 
 intersect(itr, itrs...) = _shrink(intersect!, itr, itrs)
 setdiff(  itr, itrs...) = _shrink(setdiff!, itr, itrs)
+
+function intersect(v::AbstractVector, r::AbstractRange)
+    T = promote_eltype(v, r)
+    common = Iterators.filter(in(r), v)
+    seen = Set{T}(common)
+    return vectorfilter(T, _shrink_filter!(seen), common)
+end
+intersect(r::AbstractRange, v::AbstractVector) = intersect(v, r)
diff --git a/base/arraymath.jl b/base/arraymath.jl
index e75e98bf9dd62c..62dc3772e49381 100644
--- a/base/arraymath.jl
+++ b/base/arraymath.jl
@@ -1,36 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-## Unary operators ##
-
-"""
-    conj!(A)
-
-Transform an array to its complex conjugate in-place.
-
-See also [`conj`](@ref).
-
-# Examples
-```jldoctest
-julia> A = [1+im 2-im; 2+2im 3+im]
-2×2 Matrix{Complex{Int64}}:
- 1+1im  2-1im
- 2+2im  3+1im
-
-julia> conj!(A);
-
-julia> A
-2×2 Matrix{Complex{Int64}}:
- 1-1im  2+1im
- 2-2im  3-1im
-```
-"""
-conj!(A::AbstractArray{<:Number}) = (@inbounds broadcast!(conj, A, A); A)
-
-for f in (:-, :conj, :real, :imag)
-    @eval ($f)(A::AbstractArray) = broadcast_preserving_zero_d($f, A)
-end
-
-
 ## Binary arithmetic operators ##
 
 for f in (:+, :-)
diff --git a/base/arrayshow.jl b/base/arrayshow.jl
index 1cb002db24f8f1..0d480b64bb32d4 100644
--- a/base/arrayshow.jl
+++ b/base/arrayshow.jl
@@ -57,15 +57,16 @@ Parameter `sep::Integer` is number of spaces to put between elements.
 Alignment is reported as a vector of (left,right) tuples, one for each
 column going across the screen.
 """
-function alignment(io::IO, X::AbstractVecOrMat,
-        rows::AbstractVector, cols::AbstractVector,
-        cols_if_complete::Integer, cols_otherwise::Integer, sep::Integer)
-    a = Tuple{Int, Int}[]
+function alignment(io::IO, @nospecialize(X::AbstractVecOrMat),
+        rows::AbstractVector{T}, cols::AbstractVector{V},
+        cols_if_complete::Integer, cols_otherwise::Integer, sep::Integer,
+        #= `size(X) may not infer, set this in caller =# ncols::Integer=size(X, 2)) where {T,V}
+    a = Tuple{T, V}[]
     for j in cols # need to go down each column one at a time
         l = r = 0
         for i in rows # plumb down and see what largest element sizes are
             if isassigned(X,i,j)
-                aij = alignment(io, X[i,j])
+                aij = alignment(io, X[i,j])::Tuple{Int,Int}
             else
                 aij = undef_ref_alignment
             end
@@ -78,7 +79,7 @@ function alignment(io::IO, X::AbstractVecOrMat,
             break
         end
     end
-    if 1 < length(a) < length(axes(X,2))
+    if 1 < length(a) < ncols
         while sum(map(sum,a)) + sep*length(a) >= cols_otherwise
             pop!(a)
         end
@@ -94,8 +95,9 @@ is specified as string sep.
 `print_matrix_row` will also respect compact output for elements.
 """
 function print_matrix_row(io::IO,
-        X::AbstractVecOrMat, A::Vector,
-        i::Integer, cols::AbstractVector, sep::AbstractString)
+        @nospecialize(X::AbstractVecOrMat), A::Vector,
+        i::Integer, cols::AbstractVector, sep::AbstractString,
+        #= `axes(X)` may not infer, set this in caller =# idxlast::Integer=last(axes(X, 2)))
     for (k, j) = enumerate(cols)
         k > length(A) && break
         if isassigned(X,Int(i),Int(j)) # isassigned accepts only `Int` indices
@@ -114,7 +116,7 @@ function print_matrix_row(io::IO,
             sx = undef_ref_str
         end
         l = repeat(" ", A[k][1]-a[1]) # pad on left and right as needed
-        r = j == axes(X, 2)[end] ? "" : repeat(" ", A[k][2]-a[2])
+        r = j == idxlast ? "" : repeat(" ", A[k][2]-a[2])
         prettysx = replace_in_print_matrix(X,i,j,sx)
         print(io, l, prettysx, r)
         if k < length(A); print(io, sep); end
@@ -166,7 +168,12 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
                       vdots::AbstractString = "\u22ee",
                       ddots::AbstractString = "  \u22f1  ",
                       hmod::Integer = 5, vmod::Integer = 5)
+    _print_matrix(io, inferencebarrier(X), pre, sep, post, hdots, vdots, ddots, hmod, vmod, unitrange(axes(X,1)), unitrange(axes(X,2)))
+end
+
+function _print_matrix(io, @nospecialize(X::AbstractVecOrMat), pre, sep, post, hdots, vdots, ddots, hmod, vmod, rowsA, colsA)
     hmod, vmod = Int(hmod)::Int, Int(vmod)::Int
+    ncols, idxlast = length(colsA), last(colsA)
     if !(get(io, :limit, false)::Bool)
         screenheight = screenwidth = typemax(Int)
     else
@@ -178,7 +185,6 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
     postsp = ""
     @assert textwidth(hdots) == textwidth(ddots)
     sepsize = length(sep)::Int
-    rowsA, colsA = UnitRange{Int}(axes(X,1)), UnitRange{Int}(axes(X,2))
     m, n = length(rowsA), length(colsA)
     # To figure out alignments, only need to look at as many rows as could
     # fit down screen. If screen has at least as many rows as A, look at A.
@@ -187,33 +193,37 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
     halfheight = div(screenheight,2)
     if m > screenheight
         rowsA = [rowsA[(0:halfheight-1) .+ firstindex(rowsA)]; rowsA[(end-div(screenheight-1,2)+1):end]]
+    else
+        rowsA = [rowsA;]
     end
     # Similarly for columns, only necessary to get alignments for as many
     # columns as could conceivably fit across the screen
     maxpossiblecols = div(screenwidth, 1+sepsize)
     if n > maxpossiblecols
         colsA = [colsA[(0:maxpossiblecols-1) .+ firstindex(colsA)]; colsA[(end-maxpossiblecols+1):end]]
+    else
+	    colsA = [colsA;]
     end
-    A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize)
+    A = alignment(io, X, rowsA, colsA, screenwidth, screenwidth, sepsize, ncols)
     # Nine-slicing is accomplished using print_matrix_row repeatedly
     if m <= screenheight # rows fit vertically on screen
         if n <= length(A) # rows and cols fit so just print whole matrix in one piece
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,A,i,colsA,sep)
+                print_matrix_row(io, X,A,i,colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != last(rowsA); println(io); end
             end
         else # rows fit down screen but cols don't, so need horizontal ellipsis
             c = div(screenwidth-length(hdots)::Int+1,2)+1  # what goes to right of ellipsis
-            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize)) # alignments for right
+            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize, ncols)) # alignments for right
             c = screenwidth - sum(map(sum,Ralign)) - (length(Ralign)-1)*sepsize - length(hdots)::Int
-            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize) # alignments for left of ellipsis
+            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize, ncols) # alignments for left of ellipsis
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep)
+                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep,idxlast)
                 print(io, (i - first(rowsA)) % hmod == 0 ? hdots : repeat(" ", length(hdots)::Int))
-                print_matrix_row(io, X, Ralign, i, (n - length(Ralign)) .+ colsA, sep)
+                print_matrix_row(io, X, Ralign, i, (n - length(Ralign)) .+ colsA, sep, idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != last(rowsA); println(io); end
             end
@@ -222,7 +232,7 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
         if n <= length(A) # rows don't fit, cols do, so only vertical ellipsis
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,A,i,colsA,sep)
+                print_matrix_row(io, X,A,i,colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != rowsA[end] || i == rowsA[halfheight]; println(io); end
                 if i == rowsA[halfheight]
@@ -233,15 +243,15 @@ function print_matrix(io::IO, X::AbstractVecOrMat,
             end
         else # neither rows nor cols fit, so use all 3 kinds of dots
             c = div(screenwidth-length(hdots)::Int+1,2)+1
-            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize))
+            Ralign = reverse(alignment(io, X, rowsA, reverse(colsA), c, c, sepsize, ncols))
             c = screenwidth - sum(map(sum,Ralign)) - (length(Ralign)-1)*sepsize - length(hdots)::Int
-            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize)
+            Lalign = alignment(io, X, rowsA, colsA, c, c, sepsize, ncols)
             r = mod((length(Ralign)-n+1),vmod) # where to put dots on right half
             for i in rowsA
                 print(io, i == first(rowsA) ? pre : presp)
-                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep)
+                print_matrix_row(io, X,Lalign,i,colsA[1:length(Lalign)],sep,idxlast)
                 print(io, (i - first(rowsA)) % hmod == 0 ? hdots : repeat(" ", length(hdots)::Int))
-                print_matrix_row(io, X,Ralign,i,(n-length(Ralign)).+colsA,sep)
+                print_matrix_row(io, X,Ralign,i,(n-length(Ralign)).+colsA,sep,idxlast)
                 print(io, i == last(rowsA) ? post : postsp)
                 if i != rowsA[end] || i == rowsA[halfheight]; println(io); end
                 if i == rowsA[halfheight]
@@ -264,14 +274,21 @@ end
 
 # typeinfo agnostic
 # n-dimensional arrays
-function show_nd(io::IO, a::AbstractArray, print_matrix::Function, label_slices::Bool)
+show_nd(io::IO, a::AbstractArray, print_matrix::Function, show_full::Bool) =
+    _show_nd(io, inferencebarrier(a), print_matrix, show_full, map(unitrange, axes(a)))
+
+function _show_nd(io::IO, @nospecialize(a::AbstractArray), print_matrix::Function, show_full::Bool, axs::Tuple{Vararg{AbstractUnitRange}})
     limit::Bool = get(io, :limit, false)
     if isempty(a)
         return
     end
-    tailinds = tail(tail(axes(a)))
+    tailinds = tail(tail(axs))
     nd = ndims(a)-2
-    for I in CartesianIndices(tailinds)
+    show_full || print(io, "[")
+    Is = CartesianIndices(tailinds)
+    lastidxs = first(Is).I
+    reached_last_d = false
+    for I in Is
         idxs = I.I
         if limit
             for i = 1:nd
@@ -280,14 +297,15 @@ function show_nd(io::IO, a::AbstractArray, print_matrix::Function, label_slices:
                 if length(ind) > 10
                     if ii == ind[firstindex(ind)+3] && all(d->idxs[d]==first(tailinds[d]),1:i-1)
                         for j=i+1:nd
-                            szj = length(axes(a, j+2))
+                            szj = length(axs[j+2])
                             indj = tailinds[j]
                             if szj>10 && first(indj)+2 < idxs[j] <= last(indj)-3
                                 @goto skip
                             end
                         end
-                        #println(io, idxs)
-                        print(io, "...\n\n")
+                        print(io, ";"^(i+2))
+                        print(io, " \u2026 ")
+                        show_full && print(io, "\n\n")
                         @goto skip
                     end
                     if ind[firstindex(ind)+2] < ii <= ind[end-3]
@@ -296,18 +314,40 @@ function show_nd(io::IO, a::AbstractArray, print_matrix::Function, label_slices:
                 end
             end
         end
-        if label_slices
-            print(io, "[:, :, ")
-            for i = 1:(nd-1); print(io, "$(idxs[i]), "); end
-            println(io, idxs[end], "] =")
+        if show_full
+            _show_nd_label(io, a, idxs)
+        end
+        slice = view(a, axs[1], axs[2], idxs...)
+        if show_full
+            print_matrix(io, slice)
+            print(io, idxs == map(last,tailinds) ? "" : "\n\n")
+        else
+            idxdiff = lastidxs .- idxs .< 0
+            if any(idxdiff)
+                lastchangeindex = 2 + findlast(idxdiff)
+                print(io, ";"^lastchangeindex)
+                lastchangeindex == ndims(a) && (reached_last_d = true)
+                print(io, " ")
+            end
+            print_matrix(io, slice)
         end
-        slice = view(a, axes(a,1), axes(a,2), idxs...)
-        print_matrix(io, slice)
-        print(io, idxs == map(last,tailinds) ? "" : "\n\n")
         @label skip
+        lastidxs = idxs
+    end
+    if !show_full
+        reached_last_d || print(io, ";"^(nd+2))
+        print(io, "]")
     end
 end
 
+function _show_nd_label(io::IO, a::AbstractArray, idxs)
+    print(io, "[:, :, ")
+    for i = 1:length(idxs)-1
+        print(io, idxs[i], ", ")
+    end
+    println(io, idxs[end], "] =")
+end
+
 # print_array: main helper functions for show(io, text/plain, array)
 # typeinfo agnostic
 # Note that this is for showing the content inside the array, and for `MIME"text/plain".
@@ -370,14 +410,18 @@ end
 `_show_nonempty(io, X::AbstractMatrix, prefix)` prints matrix X with opening and closing square brackets,
 preceded by `prefix`, supposed to encode the type of the elements.
 """
-function _show_nonempty(io::IO, X::AbstractMatrix, prefix::String)
+_show_nonempty(io::IO, X::AbstractMatrix, prefix::String) =
+    _show_nonempty(io, inferencebarrier(X), prefix, false, axes(X))
+
+function _show_nonempty(io::IO, @nospecialize(X::AbstractMatrix), prefix::String, drop_brackets::Bool, axs::Tuple{AbstractUnitRange,AbstractUnitRange})
     @assert !isempty(X)
     limit = get(io, :limit, false)::Bool
-    indr, indc = axes(X,1), axes(X,2)
+    indr, indc = axs
     nr, nc = length(indr), length(indc)
     rdots, cdots = false, false
-    rr1, rr2 = UnitRange{Int}(indr), 1:0
-    cr1, cr2 = UnitRange{Int}(indc), 1:0
+    rr1, rr2 = unitrange(indr), 1:0
+    cr1 = unitrange(indc)
+    cr2 = first(cr1) .+ (0:-1)
     if limit
         if nr > 4
             rr1, rr2 = rr1[1:2], rr1[nr-1:nr]
@@ -388,7 +432,7 @@ function _show_nonempty(io::IO, X::AbstractMatrix, prefix::String)
             cdots = true
         end
     end
-    print(io, prefix, "[")
+    drop_brackets || print(io, prefix, "[")
     for rr in (rr1, rr2)
         for i in rr
             for cr in (cr1, cr2)
@@ -408,14 +452,18 @@ function _show_nonempty(io::IO, X::AbstractMatrix, prefix::String)
                 end
             end
         end
-        last(rr) != nr && rdots && print(io, "\u2026 ; ")
+        last(rr) != last(indr) && rdots && print(io, "\u2026 ; ")
+    end
+    if !drop_brackets
+        nc > 1 || print(io, ";;")
+        print(io, "]")
     end
-    print(io, "]")
+    return nothing
 end
 
 
 _show_nonempty(io::IO, X::AbstractArray, prefix::String) =
-    show_nd(io, X, (io, slice) -> _show_nonempty(io, slice, prefix), false)
+    show_nd(io, X, (io, slice) -> _show_nonempty(io, inferencebarrier(slice), prefix, true, axes(slice)), false)
 
 # a specific call path is used to show vectors (show_vector)
 _show_nonempty(::IO, ::AbstractVector, ::String) =
diff --git a/base/asyncevent.jl b/base/asyncevent.jl
index 234552e635e2c4..d3938bd66c8425 100644
--- a/base/asyncevent.jl
+++ b/base/asyncevent.jl
@@ -10,12 +10,14 @@ Create a async condition that wakes up tasks waiting for it
 when notified from C by a call to `uv_async_send`.
 Waiting tasks are woken with an error when the object is closed (by [`close`](@ref)).
 Use [`isopen`](@ref) to check whether it is still active.
+
+This provides an implicit acquire & release memory ordering between the sending and waiting threads.
 """
 mutable struct AsyncCondition
-    handle::Ptr{Cvoid}
+    @atomic handle::Ptr{Cvoid}
     cond::ThreadSynchronizer
-    isopen::Bool
-    set::Bool
+    @atomic isopen::Bool
+    @atomic set::Bool
 
     function AsyncCondition()
         this = new(Libc.malloc(_sizeof_uv_async), ThreadSynchronizer(), true, false)
@@ -43,10 +45,22 @@ the async condition object itself.
 """
 function AsyncCondition(cb::Function)
     async = AsyncCondition()
-    @async while _trywait(async)
+    t = @task begin
+        unpreserve_handle(async)
+        while _trywait(async)
             cb(async)
             isopen(async) || return
         end
+    end
+    # here we are mimicking parts of _trywait, in coordination with task `t`
+    preserve_handle(async)
+    @lock async.cond begin
+        if async.set
+            schedule(t)
+        else
+            _wait2(async.cond, t)
+        end
+    end
     return async
 end
 
@@ -57,22 +71,32 @@ end
 
 Create a timer that wakes up tasks waiting for it (by calling [`wait`](@ref) on the timer object).
 
-Waiting tasks are woken after an initial delay of `delay` seconds, and then repeating with the given
-`interval` in seconds. If `interval` is equal to `0`, the timer is only triggered once. When
-the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use [`isopen`](@ref)
-to check whether a timer is still active.
+Waiting tasks are woken after an initial delay of at least `delay` seconds, and then repeating after
+at least `interval` seconds again elapse. If `interval` is equal to `0`, the timer is only triggered
+once. When the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use
+[`isopen`](@ref) to check whether a timer is still active.
+
+!!! note
+    `interval` is subject to accumulating time skew. If you need precise events at a particular
+    absolute time, create a new timer at each expiration with the difference to the next time computed.
+
+!!! note
+    A `Timer` requires yield points to update its state. For instance, `isopen(t::Timer)` cannot be
+    used to timeout a non-yielding while loop.
+
 """
 mutable struct Timer
-    handle::Ptr{Cvoid}
+    @atomic handle::Ptr{Cvoid}
     cond::ThreadSynchronizer
-    isopen::Bool
-    set::Bool
+    @atomic isopen::Bool
+    @atomic set::Bool
 
     function Timer(timeout::Real; interval::Real = 0.0)
         timeout ≥ 0 || throw(ArgumentError("timer cannot have negative timeout of $timeout seconds"))
         interval ≥ 0 || throw(ArgumentError("timer cannot have negative repeat interval of $interval seconds"))
-        timeout = UInt64(round(timeout * 1000)) + 1
-        interval = UInt64(round(interval * 1000))
+        # libuv has a tendency to timeout 1 ms early, so we need +1 on the timeout (in milliseconds), unless it is zero
+        timeoutms = ceil(UInt64, timeout * 1000) + !iszero(timeout)
+        intervalms = ceil(UInt64, interval * 1000)
         loop = eventloop()
 
         this = new(Libc.malloc(_sizeof_uv_timer), ThreadSynchronizer(), true, false)
@@ -84,7 +108,7 @@ mutable struct Timer
         ccall(:uv_update_time, Cvoid, (Ptr{Cvoid},), loop)
         err = ccall(:uv_timer_start, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, UInt64, UInt64),
             this, @cfunction(uv_timercb, Cvoid, (Ptr{Cvoid},)),
-            timeout, interval)
+            timeoutms, intervalms)
         @assert err == 0
         iolock_end()
         return this
@@ -96,7 +120,11 @@ unsafe_convert(::Type{Ptr{Cvoid}}, async::AsyncCondition) = async.handle
 
 function _trywait(t::Union{Timer, AsyncCondition})
     set = t.set
-    if !set
+    if set
+        # full barrier now for AsyncCondition
+        t isa Timer || Core.Intrinsics.atomic_fence(:acquire_release)
+    else
+        t.isopen || return false
         t.handle == C_NULL && return false
         iolock_begin()
         set = t.set
@@ -105,14 +133,12 @@ function _trywait(t::Union{Timer, AsyncCondition})
             lock(t.cond)
             try
                 set = t.set
-                if !set
-                    if t.handle != C_NULL
-                        iolock_end()
-                        set = wait(t.cond)
-                        unlock(t.cond)
-                        iolock_begin()
-                        lock(t.cond)
-                    end
+                if !set && t.isopen && t.handle != C_NULL
+                    iolock_end()
+                    set = wait(t.cond)
+                    unlock(t.cond)
+                    iolock_begin()
+                    lock(t.cond)
                 end
             finally
                 unlock(t.cond)
@@ -121,7 +147,7 @@ function _trywait(t::Union{Timer, AsyncCondition})
         end
         iolock_end()
     end
-    t.set = false
+    @atomic :monotonic t.set = false
     return set
 end
 
@@ -131,12 +157,12 @@ function wait(t::Union{Timer, AsyncCondition})
 end
 
 
-isopen(t::Union{Timer, AsyncCondition}) = t.isopen
+isopen(t::Union{Timer, AsyncCondition}) = t.isopen && t.handle != C_NULL
 
 function close(t::Union{Timer, AsyncCondition})
     iolock_begin()
-    if t.handle != C_NULL && isopen(t)
-        t.isopen = false
+    if isopen(t)
+        @atomic :monotonic t.isopen = false
         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
     end
     iolock_end()
@@ -148,12 +174,12 @@ function uvfinalize(t::Union{Timer, AsyncCondition})
     lock(t.cond)
     try
         if t.handle != C_NULL
-            disassociate_julia_struct(t.handle) # not going to call the usual close hooks
+            disassociate_julia_struct(t.handle) # not going to call the usual close hooks anymore
             if t.isopen
-                t.isopen = false
-                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t)
+                @atomic :monotonic t.isopen = false
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
             end
-            t.handle = C_NULL
+            @atomic :monotonic t.handle = C_NULL
             notify(t.cond, false)
         end
     finally
@@ -166,9 +192,9 @@ end
 function _uv_hook_close(t::Union{Timer, AsyncCondition})
     lock(t.cond)
     try
-        t.isopen = false
-        t.handle = C_NULL
-        notify(t.cond, t.set)
+        @atomic :monotonic t.isopen = false
+        Libc.free(@atomicswap :monotonic t.handle = C_NULL)
+        notify(t.cond, false)
     finally
         unlock(t.cond)
     end
@@ -177,9 +203,9 @@ end
 
 function uv_asynccb(handle::Ptr{Cvoid})
     async = @handle_as handle AsyncCondition
-    lock(async.cond)
+    lock(async.cond) # acquire barrier
     try
-        async.set = true
+        @atomic :release async.set = true
         notify(async.cond, true)
     finally
         unlock(async.cond)
@@ -191,7 +217,7 @@ function uv_timercb(handle::Ptr{Cvoid})
     t = @handle_as handle Timer
     lock(t.cond)
     try
-        t.set = true
+        @atomic :monotonic t.set = true
         if ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 0
             # timer is stopped now
             close(t)
@@ -219,18 +245,18 @@ end
 """
     Timer(callback::Function, delay; interval = 0)
 
-Create a timer that wakes up tasks waiting for it (by calling [`wait`](@ref) on the timer object) and
-calls the function `callback`.
+Create a timer that runs the function `callback` at each timer expiration.
 
-Waiting tasks are woken and the function `callback` is called after an initial delay of `delay` seconds,
-and then repeating with the given `interval` in seconds. If `interval` is equal to `0`, the timer
-is only triggered once. The function `callback` is called with a single argument, the timer itself.
-When the timer is closed (by [`close`](@ref)) waiting tasks are woken with an error. Use [`isopen`](@ref)
-to check whether a timer is still active.
+Waiting tasks are woken and the function `callback` is called after an initial delay of `delay`
+seconds, and then repeating with the given `interval` in seconds. If `interval` is equal to `0`, the
+callback is only run once. The function `callback` is called with a single argument, the timer
+itself. Stop a timer by calling `close`. The `cb` may still be run one final time, if the timer has
+already expired.
 
 # Examples
 
-Here the first number is printed after a delay of two seconds, then the following numbers are printed quickly.
+Here the first number is printed after a delay of two seconds, then the following numbers are
+printed quickly.
 
 ```julia-repl
 julia> begin
@@ -248,53 +274,56 @@ julia> begin
 """
 function Timer(cb::Function, timeout::Real; interval::Real=0.0)
     timer = Timer(timeout, interval=interval)
-    @async while _trywait(timer)
-            cb(timer)
+    t = @task begin
+        unpreserve_handle(timer)
+        while _trywait(timer)
+            try
+                cb(timer)
+            catch err
+                write(stderr, "Error in Timer:\n")
+                showerror(stderr, err, catch_backtrace())
+                return
+            end
             isopen(timer) || return
         end
+    end
+    # here we are mimicking parts of _trywait, in coordination with task `t`
+    preserve_handle(timer)
+    @lock timer.cond begin
+        if timer.set
+            schedule(t)
+        else
+            _wait2(timer.cond, t)
+        end
+    end
     return timer
 end
 
 """
-    timedwait(callback::Function, timeout::Real; pollint::Real=0.1)
+    timedwait(testcb, timeout::Real; pollint::Real=0.1)
 
-Waits until `callback` returns `true` or `timeout` seconds have passed, whichever is earlier.
-`callback` is polled every `pollint` seconds. The minimum value for `timeout` and `pollint`
-is `0.001`, that is, 1 millisecond.
+Waits until `testcb()` returns `true` or `timeout` seconds have passed, whichever is earlier.
+The test function is polled every `pollint` seconds. The minimum value for `pollint` is 0.001 seconds,
+that is, 1 millisecond.
 
 Returns :ok or :timed_out
 """
-function timedwait(testcb::Function, timeout::Real; pollint::Real=0.1)
+function timedwait(testcb, timeout::Real; pollint::Real=0.1)
     pollint >= 1e-3 || throw(ArgumentError("pollint must be ≥ 1 millisecond"))
     start = time_ns()
     ns_timeout = 1e9 * timeout
-    done = Channel(1)
-    function timercb(aw)
-        try
-            if testcb()
-                put!(done, (:ok, nothing))
-            elseif (time_ns() - start) > ns_timeout
-                put!(done, (:timed_out, nothing))
-            end
-        catch e
-            put!(done, (:error, CapturedException(e, catch_backtrace())))
-        finally
-            isready(done) && close(aw)
-        end
-        nothing
-    end
-
-    try
-        testcb() && return :ok
-    catch e
-        throw(CapturedException(e, catch_backtrace()))
-    end
 
-    t = Timer(timercb, pollint, interval = pollint)
-    ret, e = fetch(done)
-    close(t)
+    testcb() && return :ok
 
-    ret === :error && throw(e)
-
-    return ret
+    t = Timer(pollint, interval=pollint)
+    while _trywait(t) # stop if we ever get closed
+        if testcb()
+            close(t)
+            return :ok
+        elseif (time_ns() - start) > ns_timeout
+            close(t)
+            break
+        end
+    end
+    return :timed_out
 end
diff --git a/base/asyncmap.jl b/base/asyncmap.jl
index 976ce6c7b85ca2..0b3678f6b4b9ba 100644
--- a/base/asyncmap.jl
+++ b/base/asyncmap.jl
@@ -15,7 +15,7 @@ up to 100 tasks will be used for concurrent mapping.
 
 `ntasks` can also be specified as a zero-arg function. In this case, the
 number of tasks to run in parallel is checked before processing every element and a new
-task started if the value of `ntasks_func` is less than the current number
+task started if the value of `ntasks_func` is greater than the current number
 of tasks.
 
 If `batch_size` is specified, the collection is processed in batch mode. `f` must
@@ -236,7 +236,7 @@ function start_worker_task!(worker_tasks, exec_func, chnl, batch_size=nothing)
             end
         catch e
             close(chnl)
-            retval = e
+            retval = capture_exception(e, catch_backtrace())
         end
         retval
     end
@@ -305,20 +305,7 @@ end
 function iterate(itr::AsyncCollector)
     itr.ntasks = verify_ntasks(itr.enumerator, itr.ntasks)
     itr.batch_size = verify_batch_size(itr.batch_size)
-    if itr.batch_size !== nothing
-        exec_func = batch -> begin
-            # extract indices from the input tuple
-            batch_idxs = map(x->x[1], batch)
-
-            # and the args tuple....
-            batched_args = map(x->x[2], batch)
 
-            results = f(batched_args)
-            foreach(x -> (itr.results[batch_idxs[x[1]]] = x[2]), enumerate(results))
-        end
-    else
-        exec_func = (i,args) -> (itr.results[i]=itr.f(args...))
-    end
     chnl, worker_tasks = setup_chnl_and_tasks((i,args) -> (itr.results[i]=itr.f(args...)), itr.ntasks, itr.batch_size)
     return iterate(itr, AsyncCollectorState(chnl, worker_tasks))
 end
diff --git a/base/atomics.jl b/base/atomics.jl
index 1a980eb6561ec1..e6d62c3fc807b2 100644
--- a/base/atomics.jl
+++ b/base/atomics.jl
@@ -335,7 +335,7 @@ const llvmtypes = IdDict{Any,String}(
     Int32 => "i32", UInt32 => "i32",
     Int64 => "i64", UInt64 => "i64",
     Int128 => "i128", UInt128 => "i128",
-    Float16 => "i16", # half
+    Float16 => "half",
     Float32 => "float",
     Float64 => "double",
 )
@@ -356,13 +356,13 @@ for typ in atomictypes
     rt = "$lt, $lt*"
     irt = "$ilt, $ilt*"
     @eval getindex(x::Atomic{$typ}) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  %rv = load atomic $rt %ptr acquire, align $(gc_alignment(typ))
                  ret $lt %rv
                  """, $typ, Tuple{Ptr{$typ}}, unsafe_convert(Ptr{$typ}, x))
     @eval setindex!(x::Atomic{$typ}, v::$typ) =
-        llvmcall($"""
+        GC.@preserve x llvmcall($"""
                  %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                  store atomic $lt %1, $lt* %ptr release, align $(gc_alignment(typ))
                  ret void
@@ -371,7 +371,7 @@ for typ in atomictypes
     # Note: atomic_cas! succeeded (i.e. it stored "new") if and only if the result is "cmp"
     if typ <: Integer
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                      %rs = cmpxchg $lt* %ptr, $lt %1, $lt %2 acq_rel acquire
                      %rv = extractvalue { $lt, i1 } %rs, 0
@@ -380,7 +380,7 @@ for typ in atomictypes
                      unsafe_convert(Ptr{$typ}, x), cmp, new)
     else
         @eval atomic_cas!(x::Atomic{$typ}, cmp::$typ, new::$typ) =
-            llvmcall($"""
+            GC.@preserve x llvmcall($"""
                      %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                      %icmp = bitcast $lt %1 to $ilt
                      %inew = bitcast $lt %2 to $ilt
@@ -403,7 +403,7 @@ for typ in atomictypes
         if rmwop in arithmetic_ops && !(typ <: ArithmeticTypes) continue end
         if typ <: Integer
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %ptr = inttoptr i$WORD_SIZE %0 to $lt*
                          %rv = atomicrmw $rmw $lt* %ptr, $lt %1 acq_rel
                          ret $lt %rv
@@ -411,7 +411,7 @@ for typ in atomictypes
         else
             rmwop === :xchg || continue
             @eval $fn(x::Atomic{$typ}, v::$typ) =
-                llvmcall($"""
+                GC.@preserve x llvmcall($"""
                          %iptr = inttoptr i$WORD_SIZE %0 to $ilt*
                          %ival = bitcast $lt %1 to $ilt
                          %irv = atomicrmw $rmw $ilt* %iptr, $ilt %ival acq_rel
diff --git a/base/baseext.jl b/base/baseext.jl
index 75ef96caa94be3..8ebd599312453e 100644
--- a/base/baseext.jl
+++ b/base/baseext.jl
@@ -2,6 +2,17 @@
 
 # extensions to Core types to add features in Base
 
+"""
+    VecElement{T}
+
+A wrapper type that holds a single value of type `T`. When used in the context of an
+`NTuple{N, VecElement{T}} where {T, N}` object, it provides a hint to the runtime
+system to align that struct to be more amenable to vectorization optimization
+opportunities. In `ccall`, such an NTuple in the type signature will also use the
+vector register ABI, rather than the usual struct ABI.
+"""
+VecElement
+
 # hook up VecElement constructor to Base.convert
 VecElement{T}(arg) where {T} = VecElement{T}(convert(T, arg))
 convert(::Type{T}, arg::T) where {T<:VecElement} = arg
diff --git a/base/binaryplatforms.jl b/base/binaryplatforms.jl
index a14b147108fce2..e2dda00bf58e7b 100644
--- a/base/binaryplatforms.jl
+++ b/base/binaryplatforms.jl
@@ -1,78 +1,16 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 module BinaryPlatforms
 
 export AbstractPlatform, Platform, HostPlatform, platform_dlext, tags, arch, os,
-       os_version, libc, compiler_abi, libgfortran_version, libstdcxx_version,
+       os_version, libc, libgfortran_version, libstdcxx_version,
        cxxstring_abi, parse_dl_name_version, detect_libgfortran_version,
        detect_libstdcxx_version, detect_cxxstring_abi, call_abi, wordsize, triplet,
        select_platform, platforms_match, platform_name
 import .Libc.Libdl
 
 ### Submodule with information about CPU features
-module CPUID
-
-export cpu_isa
-
-"""
-    ISA(features::Set{UInt32})
-
-A structure which represents the Instruction Set Architecture (ISA) of a
-computer.  It holds the `Set` of features of the CPU.
-
-The numerical values of the features are automatically generated from the C
-source code of Julia and stored in the `features_h.jl` Julia file.
-"""
-struct ISA
-    features::Set{UInt32}
-end
-
-Base.:<=(a::ISA, b::ISA) = a.features <= b.features
-Base.:<(a::ISA,  b::ISA) = a.features <  b.features
-Base.isless(a::ISA,  b::ISA) = a < b
-
-include("features_h.jl")
-
-# Keep in sync with `arch_march_isa_mapping`.
-const ISAs_by_family = Dict(
-    "x86_64" => (
-        # Source: https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html.
-        # Implicit in all sets, because always required: mmx, sse, sse2
-        "x86_64" => ISA(Set{UInt32}()),
-        "core2" => ISA(Set((JL_X86_sse3, JL_X86_ssse3))),
-        "nehalem" => ISA(Set((JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt))),
-        "sandybridge" => ISA(Set((JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_aes, JL_X86_pclmul))),
-        "haswell" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c))),
-        "skylake" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c, JL_X86_rdseed, JL_X86_adx, JL_X86_prfchw, JL_X86_clflushopt, JL_X86_xsavec, JL_X86_xsaves))),
-        "skylake_avx512" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_pku, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c, JL_X86_rdseed, JL_X86_adx, JL_X86_prfchw, JL_X86_clflushopt, JL_X86_xsavec, JL_X86_xsaves, JL_X86_avx512f, JL_X86_clwb, JL_X86_avx512vl, JL_X86_avx512bw, JL_X86_avx512dq, JL_X86_avx512cd))),
-    ),
-    "arm" => (
-        "armv7l" => ISA(Set{UInt32}()),
-        "armv7l_neon" => ISA(Set((JL_AArch32_neon,))),
-        "armv7l_neon_vfp4" => ISA(Set((JL_AArch32_neon, JL_AArch32_vfp4))),
-    ),
-    "aarch64" => (
-        # Implicit in all sets, because always required: fp, asimd
-        "armv8.0_a" => ISA(Set{UInt32}()),
-        "armv8.1_a" => ISA(Set((JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm))),
-        "armv8.2_a_crypto" => ISA(Set((JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2))),
-        "armv8.4_a_crypto_sve" => ISA(Set((JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_fp16fml, JL_AArch64_dotprod, JL_AArch64_aes, JL_AArch64_sha2, JL_AArch64_dotprod, JL_AArch64_sve))),
-    ),
-)
-
-test_cpu_feature(feature::UInt32) = ccall(:jl_test_cpu_feature, Bool, (UInt32,), feature)
-cpu_family() = String(Sys.ARCH)
-
-"""
-    cpu_isa()
-
-Return the [`ISA`](@ref) (instruction set architecture) of the current CPU.
-"""
-function cpu_isa()
-    all_features = last(last(get(ISAs_by_family, cpu_family(), "" => [ISA(Set{UInt32}())]))).features
-    return ISA(Set{UInt32}(feat for feat in all_features if test_cpu_feature(feat)))
-end
-
-end # module CPUID
-
+include("cpuid.jl")
 using .CPUID
 
 # This exists to ease compatibility with old-style Platform objects
@@ -102,29 +40,21 @@ struct Platform <: AbstractPlatform
     # The "compare strategy" allows selective overriding on how a tag is compared
     compare_strategies::Dict{String,Function}
 
-    function Platform(arch::String, os::String;
+    # Passing `tags` as a `Dict` avoids the need to infer different NamedTuple specializations
+    function Platform(arch::String, os::String, _tags::Dict{String};
                       validate_strict::Bool = false,
-                      compare_strategies::Dict{String,<:Function} = Dict{String,Function}(),
-                      kwargs...)
+                      compare_strategies::Dict{String,<:Function} = Dict{String,Function}())
         # A wee bit of normalization
         os = lowercase(os)
-        arch = lowercase(arch)
-        if arch ∈ ("amd64",)
-            arch = "x86_64"
-        elseif arch ∈ ("i386", "i586")
-            arch = "i686"
-        elseif arch ∈ ("arm",)
-            arch = "armv7l"
-        elseif arch ∈ ("ppc64le",)
-            arch = "powerpc64le"
-        end
+        arch = CPUID.normalize_arch(arch)
 
         tags = Dict{String,String}(
             "arch" => arch,
             "os" => os,
         )
-        for (tag, value) in kwargs
-            tag = lowercase(string(tag))
+        for (tag, value) in _tags
+            value = value::Union{String,VersionNumber,Nothing}
+            tag = lowercase(tag)
             if tag ∈ ("arch", "os")
                 throw(ArgumentError("Cannot double-pass key $(tag)"))
             end
@@ -139,17 +69,14 @@ struct Platform <: AbstractPlatform
             # doesn't parse nicely into a VersionNumber to persist, but if `validate_strict` is
             # set to `true`, it will cause an error later on.
             if tag ∈ ("libgfortran_version", "libstdcxx_version", "os_version")
-                normver(x::VersionNumber) = string(x)
-                function normver(str::AbstractString)
-                    v = tryparse(VersionNumber, str)
-                    if v === nothing
-                        # If this couldn't be parsed as a VersionNumber, return the original.
-                        return str
+                if isa(value, VersionNumber)
+                    value = string(value)
+                elseif isa(value, String)
+                    v = tryparse(VersionNumber, value)
+                    if isa(v, VersionNumber)
+                        value = string(v)
                     end
-                    # Otherwise, return the `string(VersionNumber(str))` version.
-                    return normver(v)
                 end
-                value = normver(value)
             end
 
             # Use `add_tag!()` to add the tag to our collection of tags
@@ -184,6 +111,19 @@ struct Platform <: AbstractPlatform
     end
 end
 
+# Keyword interface (to avoid inference of specialized NamedTuple methods, use the Dict interface for `tags`)
+function Platform(arch::String, os::String;
+                  validate_strict::Bool = false,
+                  compare_strategies::Dict{String,<:Function} = Dict{String,Function}(),
+                  kwargs...)
+    tags = Dict{String,Any}(String(tag)::String=>tagvalue(value) for (tag, value) in kwargs)
+    return Platform(arch, os, tags; validate_strict, compare_strategies)
+end
+
+tagvalue(v::Union{String,VersionNumber,Nothing}) = v
+tagvalue(v::Symbol) = String(v)
+tagvalue(v::AbstractString) = convert(String, v)::String
+
 # Simple tag insertion that performs a little bit of validation
 function add_tag!(tags::Dict{String,String}, tag::String, value::String)
     # I know we said only alphanumeric and dots, but let's be generous so that we can expand
@@ -215,6 +155,20 @@ function Base.setindex!(p::AbstractPlatform, v::String, k::String)
     return p
 end
 
+# Hash definition to ensure that it's stable
+function Base.hash(p::Platform, h::UInt)
+    h += 0x506c6174666f726d % UInt
+    h = hash(p.tags, h)
+    h = hash(p.compare_strategies, h)
+    return h
+end
+
+# Simple equality definition; for compatibility testing, use `platforms_match()`
+function Base.:(==)(a::Platform, b::Platform)
+    return a.tags == b.tags && a.compare_strategies == b.compare_strategies
+end
+
+
 # Allow us to easily serialize Platform objects
 function Base.repr(p::Platform; context=nothing)
     str = string(
@@ -228,21 +182,21 @@ function Base.repr(p::Platform; context=nothing)
     )
 end
 
-# Simple equality definition; for compatibility testing, use `platforms_match()`
-Base.:(==)(a::AbstractPlatform, b::AbstractPlatform) = tags(a) == tags(b)
-
-const ARCHITECTURE_FLAGS = Dict(
-    "x86_64" => ["x86_64", "avx", "avx2", "avx512"],
-    "i686" => ["prescott"],
-    "armv7l" => ["armv7l", "neon", "vfp4"],
-    "armv6l" => ["generic"],
-    "aarch64" => ["armv8", "thunderx2", "carmel"],
-    "powerpc64le" => ["generic"],
-)
+# Make showing the platform a bit more palatable
+function Base.show(io::IO, p::Platform)
+    str = string(platform_name(p), " ", arch(p))
+    # Add on all the other tags not covered by os/arch:
+    other_tags = sort(collect(filter(kv -> kv[1] ∉ ("os", "arch"), tags(p))))
+    if !isempty(other_tags)
+        str = string(str, " {", join([string(k, "=", v) for (k, v) in other_tags], ", "), "}")
+    end
+    print(io, str)
+end
+
 function validate_tags(tags::Dict)
     throw_invalid_key(k) = throw(ArgumentError("Key \"$(k)\" cannot have value \"$(tags[k])\""))
     # Validate `arch`
-    if tags["arch"] ∉ keys(ARCHITECTURE_FLAGS)
+    if tags["arch"] ∉ ("x86_64", "i686", "armv7l", "armv6l", "aarch64", "powerpc64le")
         throw_invalid_key("arch")
     end
     # Validate `os`
@@ -292,24 +246,14 @@ function validate_tags(tags::Dict)
         throw_version_number("libgfortran_version")
     end
 
-    # Validate `libstdcxx_version` is a parsable `VersionNumber`
-    if "libstdcxx_version" in keys(tags) && tryparse(VersionNumber, tags["libstdcxx_version"]) === nothing
-        throw_version_number("libstdcxx_version")
-    end
-
     # Validate `cxxstring_abi` is one of the two valid options:
     if "cxxstring_abi" in keys(tags) && tags["cxxstring_abi"] ∉ ("cxx03", "cxx11")
         throw_invalid_key("cxxstring_abi")
     end
 
-    # Validate `march` is one of our recognized microarchitectures for the architecture we're advertising
-    if "march" in keys(tags) && tags["march"] ∉ ARCHITECTURE_FLAGS[tags["arch"]]
-        throw(ArgumentError("\"march\" cannot have value \"$(tags["march"])\" for arch $(tags["arch"])"))
-    end
-
-    # Validate `cuda` is a parsable `VersionNumber`
-    if "cuda" in keys(tags) && tryparse(VersionNumber, tags["cuda"]) === nothing
-        throw_version_number("cuda")
+    # Validate `libstdcxx_version` is a parsable `VersionNumber`
+    if "libstdcxx_version" in keys(tags) && tryparse(VersionNumber, tags["libstdcxx_version"]) === nothing
+        throw_version_number("libstdcxx_version")
     end
 end
 
@@ -497,7 +441,7 @@ function VNorNothing(d::Dict, key)
     if v === nothing
         return nothing
     end
-    return VersionNumber(v)
+    return VersionNumber(v)::VersionNumber
 end
 
 """
@@ -568,7 +512,7 @@ julia> triplet(Platform("armv7l", "Linux"; libgfortran_version="3"))
 """
 function triplet(p::AbstractPlatform)
     str = string(
-        arch(p),
+        arch(p)::Union{Symbol,String},
         os_str(p),
         libc_str(p),
         call_abi_str(p),
@@ -578,16 +522,16 @@ function triplet(p::AbstractPlatform)
     if libgfortran_version(p) !== nothing
         str = string(str, "-libgfortran", libgfortran_version(p).major)
     end
-    if libstdcxx_version(p) !== nothing
-        str = string(str, "-libstdcxx", libstdcxx_version(p).patch)
-    end
     if cxxstring_abi(p) !== nothing
         str = string(str, "-", cxxstring_abi(p))
     end
+    if libstdcxx_version(p) !== nothing
+        str = string(str, "-libstdcxx", libstdcxx_version(p).patch)
+    end
 
     # Tack on all extra tags
     for (tag, val) in tags(p)
-        if tag ∈ ("os", "arch", "libc", "call_abi", "libgfortran_version", "libstdcxx_version", "cxxstring_abi")
+        if tag ∈ ("os", "arch", "libc", "call_abi", "libgfortran_version", "libstdcxx_version", "cxxstring_abi", "os_version")
             continue
         end
         str = string(str, "-", tag, "+", val)
@@ -621,15 +565,19 @@ end
 
 # Helper functions for Linux and FreeBSD libc/abi mishmashes
 function libc_str(p::AbstractPlatform)
-    if libc(p) === nothing
+    lc = libc(p)
+    if lc === nothing
         return ""
-    elseif libc(p) === "glibc"
+    elseif lc === "glibc"
         return "-gnu"
     else
-        return string("-", libc(p))
+        return string("-", lc)
     end
 end
-call_abi_str(p::AbstractPlatform) = (call_abi(p) === nothing) ? "" : call_abi(p)
+function call_abi_str(p::AbstractPlatform)
+    cabi = call_abi(p)
+    cabi === nothing ? "" : string(cabi::Union{Symbol,String})
+end
 
 Sys.isapple(p::AbstractPlatform) = os(p) == "macos"
 Sys.islinux(p::AbstractPlatform) = os(p) == "linux"
@@ -646,28 +594,40 @@ const arch_mapping = Dict(
     "powerpc64le" => "p(ower)?pc64le",
 )
 # Keep this in sync with `CPUID.ISAs_by_family`
+# These are the CPUID side of the microarchitectures targeted by GCC flags in BinaryBuilder.jl
 const arch_march_isa_mapping = let
     function get_set(arch, name)
         all = CPUID.ISAs_by_family[arch]
         return all[findfirst(x -> x.first == name, all)].second
     end
     Dict(
-        "x86_64" => Dict{String,CPUID.ISA}(
+        "i686" => [
+            "pentium4" => get_set("i686", "pentium4"),
+            "prescott" => get_set("i686", "prescott"),
+        ],
+        "x86_64" => [
             "x86_64" => get_set("x86_64", "x86_64"),
             "avx" => get_set("x86_64", "sandybridge"),
             "avx2" => get_set("x86_64", "haswell"),
             "avx512" => get_set("x86_64", "skylake_avx512"),
-        ),
-        "armv7l" => Dict{String,CPUID.ISA}(
-            "armv7l" => get_set("arm", "armv7l"),
-            "neon" => get_set("arm", "armv7l_neon"),
-            "vfp4" => get_set("arm", "armv7l_neon_vfp4"),
-        ),
-        "aarch64" => Dict{String,CPUID.ISA}(
-            "armv8" => get_set("aarch64", "armv8.0_a"),
-            "thunderx2" => get_set("aarch64", "armv8.1_a"),
-            "carmel" => get_set("aarch64", "armv8.2_a_crypto"),
-        ),
+        ],
+        "armv6l" => [
+            "arm1176jzfs" => get_set("armv6l", "arm1176jzfs"),
+        ],
+        "armv7l" => [
+            "armv7l" => get_set("armv7l", "armv7l"),
+            "neonvfpv4" => get_set("armv7l", "armv7l+neon+vfpv4"),
+        ],
+        "aarch64" => [
+            "armv8_0" => get_set("aarch64", "armv8.0-a"),
+            "armv8_1" => get_set("aarch64", "armv8.1-a"),
+            "armv8_2_crypto" => get_set("aarch64", "armv8.2-a+crypto"),
+            "a64fx" => get_set("aarch64", "a64fx"),
+            "apple_m1" => get_set("aarch64", "apple_m1"),
+        ],
+        "powerpc64le" => [
+            "power8" => get_set("powerpc64le", "power8"),
+        ]
     )
 end
 const os_mapping = Dict(
@@ -692,23 +652,22 @@ const libgfortran_version_mapping = Dict(
     "libgfortran4" => "(-libgfortran4)|(-gcc7)",
     "libgfortran5" => "(-libgfortran5)|(-gcc8)",
 )
-const libstdcxx_version_mapping = Dict{String,String}(
-    "libstdcxx_nothing" => "",
-    # This is sadly easier than parsing out the digit directly
-    ("libstdcxx$(idx)" => "-libstdcxx$(idx)" for idx in 18:26)...,
-)
 const cxxstring_abi_mapping = Dict(
     "cxxstring_nothing" => "",
     "cxx03" => "-cxx03",
     "cxx11" => "-cxx11",
 )
+const libstdcxx_version_mapping = Dict{String,String}(
+    "libstdcxx_nothing" => "",
+    "libstdcxx" => "-libstdcxx\\d+",
+)
 
 """
     parse(::Type{Platform}, triplet::AbstractString)
 
 Parses a string platform triplet back into a `Platform` object.
 """
-function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::Bool = false)
+function Base.parse(::Type{Platform}, triplet::String; validate_strict::Bool = false)
     # Helper function to collapse dictionary of mappings down into a regex of
     # named capture groups joined by "|" operators
     c(mapping) = string("(",join(["(?<$k>$v)" for (k, v) in mapping], "|"), ")")
@@ -723,8 +682,8 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
         c(call_abi_mapping),
         # Next, optional things, like libgfortran/libstdcxx/cxxstring abi
         c(libgfortran_version_mapping),
-        c(libstdcxx_version_mapping),
         c(cxxstring_abi_mapping),
+        c(libstdcxx_version_mapping),
         # Finally, the catch-all for extended tags
         "(?<tags>(?:-[^-]+\\+[^-]+)*)?",
         "\$",
@@ -745,7 +704,7 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
                     if startswith(k, "libgfortran")
                         return VersionNumber(parse(Int,k[12:end]))
                     elseif startswith(k, "libstdcxx")
-                        return VersionNumber(3, 4, parse(Int,k[10:end]))
+                        return VersionNumber(3, 4, parse(Int,m[k][11:end]))
                     else
                         return k
                     end
@@ -754,21 +713,22 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
         end
 
         # Extract the information we're interested in:
+        tags = Dict{String,Any}()
         arch = get_field(m, arch_mapping)
         os = get_field(m, os_mapping)
-        libc = get_field(m, libc_mapping)
-        call_abi = get_field(m, call_abi_mapping)
-        libgfortran_version = get_field(m, libgfortran_version_mapping)
-        libstdcxx_version = get_field(m, libstdcxx_version_mapping)
-        cxxstring_abi = get_field(m, cxxstring_abi_mapping)
+        tags["libc"] = get_field(m, libc_mapping)
+        tags["call_abi"] = get_field(m, call_abi_mapping)
+        tags["libgfortran_version"] = get_field(m, libgfortran_version_mapping)
+        tags["libstdcxx_version"] = get_field(m, libstdcxx_version_mapping)
+        tags["cxxstring_abi"] = get_field(m, cxxstring_abi_mapping)
         function split_tags(tagstr)
-            tag_fields = filter(!isempty, split(tagstr, "-"))
+            tag_fields = split(tagstr, "-"; keepempty=false)
             if isempty(tag_fields)
                 return Pair{String,String}[]
             end
-            return map(v -> Symbol(v[1]) => v[2], split.(tag_fields, "+"))
+            return map(v -> String(v[1]) => String(v[2]), split.(tag_fields, "+"))
         end
-        tags = split_tags(m["tags"])
+        merge!(tags, Dict(split_tags(m["tags"])))
 
         # Special parsing of os version number, if any exists
         function extract_os_version(os_name, pattern)
@@ -785,21 +745,14 @@ function Base.parse(::Type{Platform}, triplet::AbstractString; validate_strict::
         if os == "freebsd"
             os_version = extract_os_version("freebsd", r".*freebsd([\d.]+)")
         end
+        tags["os_version"] = os_version
 
-        return Platform(
-            arch, os;
-            validate_strict,
-            libc,
-            call_abi,
-            libgfortran_version,
-            libstdcxx_version,
-            cxxstring_abi,
-            os_version,
-            tags...,
-        )
+        return Platform(arch, os, tags; validate_strict)
     end
     throw(ArgumentError("Platform `$(triplet)` is not an officially supported platform"))
 end
+Base.parse(::Type{Platform}, triplet::AbstractString; kwargs...) =
+    parse(Platform, convert(String, triplet)::String; kwargs...)
 
 function Base.tryparse(::Type{Platform}, triplet::AbstractString)
     try
@@ -850,7 +803,7 @@ function parse_dl_name_version(path::String, os::String)
         dlregex = r"^(.*?)((?:\.[\d]+)*)\.dylib$"
     else
         # On Linux and FreeBSD, libraries look like `libnettle.so.6.3.0`
-        dlregex = r"^(.*?).so((?:\.[\d]+)*)$"
+        dlregex = r"^(.*?)\.so((?:\.[\d]+)*)$"
     end
 
     m = match(dlregex, basename(path))
@@ -904,12 +857,13 @@ function detect_libgfortran_version()
 end
 
 """
-    detect_libstdcxx_version()
+    detect_libstdcxx_version(max_minor_version::Int=30)
 
 Inspects the currently running Julia process to find out what version of libstdc++
-it is linked against (if any).
+it is linked against (if any).  `max_minor_version` is the latest version in the
+3.4 series of GLIBCXX where the search is performed.
 """
-function detect_libstdcxx_version()
+function detect_libstdcxx_version(max_minor_version::Int=30)
     libstdcxx_paths = filter(x -> occursin("libstdc++", x), Libdl.dllist())
     if isempty(libstdcxx_paths)
         # This can happen if we were built by clang, so we don't link against
@@ -919,7 +873,9 @@ function detect_libstdcxx_version()
 
     # Brute-force our way through GLIBCXX_* symbols to discover which version we're linked against
     hdl = Libdl.dlopen(first(libstdcxx_paths))
-    for minor_version in 26:-1:18
+    # Try all GLIBCXX versions down to GCC v4.8:
+    # https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html
+    for minor_version in max_minor_version:-1:18
         if Libdl.dlsym(hdl, "GLIBCXX_3.4.$(minor_version)"; throw_error=false) !== nothing
             Libdl.dlclose(hdl)
             return VersionNumber("3.4.$(minor_version)")
@@ -947,7 +903,7 @@ function detect_cxxstring_abi()
     end
 
     function open_libllvm(f::Function)
-        for lib_name in ("libLLVM", "LLVM", "libLLVMSupport")
+        for lib_name in ("libLLVM-13jl", "libLLVM", "LLVM", "libLLVMSupport")
             hdl = Libdl.dlopen_e(lib_name)
             if hdl != C_NULL
                 try
@@ -982,30 +938,36 @@ detect compiler ABI values such as `libgfortran_version`, `libstdcxx_version` an
 we have much of that built.
 """
 function host_triplet()
-    str = Sys.MACHINE
-    libgfortran_version = detect_libgfortran_version()
-    if libgfortran_version !== nothing
-        str = string(str, "-libgfortran", libgfortran_version.major)
+    str = Base.BUILD_TRIPLET
+
+    if !occursin("-libgfortran", str)
+        libgfortran_version = detect_libgfortran_version()
+        if libgfortran_version !== nothing
+            str = string(str, "-libgfortran", libgfortran_version.major)
+        end
     end
 
-    libstdcxx_version = detect_libstdcxx_version()
-    if libstdcxx_version !== nothing
-        str = string(str, "-libstdcxx", libstdcxx_version.patch)
+    if !occursin("-cxx", str)
+        cxxstring_abi = detect_cxxstring_abi()
+        if cxxstring_abi !== nothing
+            str = string(str, "-", cxxstring_abi)
+        end
     end
 
-    cxxstring_abi = detect_cxxstring_abi()
-    if cxxstring_abi !== nothing
-        str = string(str, "-", cxxstring_abi)
+    if !occursin("-libstdcxx", str)
+        libstdcxx_version = detect_libstdcxx_version()
+        if libstdcxx_version !== nothing
+            str = string(str, "-libstdcxx", libstdcxx_version.patch)
+        end
     end
 
     # Add on julia_version extended tag
-    str = string(str, "-julia_version+", VersionNumber(VERSION.major, VERSION.minor, VERSION.patch))
-
+    if !occursin("-julia_version+", str)
+        str = string(str, "-julia_version+", VersionNumber(VERSION.major, VERSION.minor, VERSION.patch))
+    end
     return str
 end
 
-# Cache the host platform value, and return it if someone asks for just `HostPlatform()`.
-default_host_platform = HostPlatform(parse(Platform, host_triplet()))
 """
     HostPlatform()
 
@@ -1015,7 +977,7 @@ relevant comparison strategies set to host platform mode.  This is equivalent to
     HostPlatform(parse(Platform, Base.BinaryPlatforms.host_triplet()))
 """
 function HostPlatform()
-    return default_host_platform::Platform
+    return HostPlatform(parse(Platform, host_triplet()))::Platform
 end
 
 """
@@ -1039,7 +1001,7 @@ only available in macOS `v"10.11"` and later, or an artifact can state that it r
 a libstdc++ that is at least `v"3.4.22"`, etc...
 """
 function platforms_match(a::AbstractPlatform, b::AbstractPlatform)
-    for k in union(keys(tags(a)), keys(tags(b)))
+    for k in union(keys(tags(a)::Dict{String,String}), keys(tags(b)::Dict{String,String}))
         ak = get(tags(a), k, nothing)
         bk = get(tags(b), k, nothing)
 
@@ -1114,4 +1076,9 @@ function select_platform(download_info::Dict, platform::AbstractPlatform = HostP
     return download_info[p]
 end
 
+# precompiles to reduce latency (see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1025692379)
+Dict{Platform,String}()[HostPlatform()] = ""
+Platform("x86_64", "linux", Dict{String,Any}(); validate_strict=true)
+Platform("x86_64", "linux", Dict{String,String}(); validate_strict=false)  # called this way from Artifacts.unpack_platform
+
 end # module
diff --git a/base/bitarray.jl b/base/bitarray.jl
index 51eff74a3f50f1..4494218172bf19 100644
--- a/base/bitarray.jl
+++ b/base/bitarray.jl
@@ -605,7 +605,7 @@ gen_bitarrayN(::Type{BitVector}, itsz, itr)                        = gen_bitarra
 gen_bitarrayN(::Type{BitVector}, itsz::HasShape{1}, itr)           = gen_bitarray(itsz, itr)
 gen_bitarrayN(::Type{BitArray{N}}, itsz::HasShape{N}, itr) where N = gen_bitarray(itsz, itr)
 # The first of these is just for ambiguity resolution
-gen_bitarrayN(::Type{BitVector}, itsz::HasShape{N}, itr) where N      = throw(DimensionMismatch("cannot create a $T from a $N-dimensional iterator"))
+gen_bitarrayN(::Type{BitVector}, itsz::HasShape{N}, itr) where N      = throw(DimensionMismatch("cannot create a BitVector from a $N-dimensional iterator"))
 gen_bitarrayN(@nospecialize(T::Type), itsz::HasShape{N}, itr) where N = throw(DimensionMismatch("cannot create a $T from a $N-dimensional iterator"))
 gen_bitarrayN(@nospecialize(T::Type), itsz, itr) = throw(DimensionMismatch("cannot create a $T from a generic iterator"))
 
@@ -703,7 +703,7 @@ end
 indexoffset(i) = first(i)-1
 indexoffset(::Colon) = 0
 
-@propagate_inbounds function setindex!(B::BitArray, X::AbstractArray, J0::Union{Colon,UnitRange{Int}})
+@propagate_inbounds function setindex!(B::BitArray, X::AbstractArray, J0::Union{Colon,AbstractUnitRange{Int}})
     _setindex!(IndexStyle(B), B, X, to_indices(B, (J0,))[1])
 end
 
@@ -947,6 +947,7 @@ function _deleteat!(B::BitVector, i::Int)
 end
 
 function deleteat!(B::BitVector, i::Integer)
+    i isa Bool && depwarn("passing Bool as an index is deprecated", :deleteat!)
     i = Int(i)
     n = length(B)
     1 <= i <= n || throw(BoundsError(B, i))
@@ -954,7 +955,7 @@ function deleteat!(B::BitVector, i::Integer)
     return _deleteat!(B, i)
 end
 
-function deleteat!(B::BitVector, r::UnitRange{Int})
+function deleteat!(B::BitVector, r::AbstractUnitRange{Int})
     n = length(B)
     i_f = first(r)
     i_l = last(r)
@@ -987,25 +988,27 @@ function deleteat!(B::BitVector, inds)
 
     (p, s) = y
     checkbounds(B, p)
+    p isa Bool && throw(ArgumentError("invalid index $p of type Bool"))
     q = p+1
     new_l -= 1
     y = iterate(inds, s)
     while y !== nothing
         (i, s) = y
         if !(q <= i <= n)
+            i isa Bool && throw(ArgumentError("invalid index $i of type Bool"))
             i < q && throw(ArgumentError("indices must be unique and sorted"))
             throw(BoundsError(B, i))
         end
         new_l -= 1
         if i > q
-            copy_chunks!(Bc, p, Bc, Int(q), Int(i-q))
+            copy_chunks!(Bc, Int(p), Bc, Int(q), Int(i-q))
             p += i-q
         end
         q = i+1
         y = iterate(inds, s)
     end
 
-    q <= n && copy_chunks!(Bc, p, Bc, Int(q), Int(n-q+1))
+    q <= n && copy_chunks!(Bc, Int(p), Bc, Int(q), Int(n-q+1))
 
     delta_k = num_bit_chunks(new_l) - length(Bc)
     delta_k < 0 && _deleteend!(Bc, -delta_k)
@@ -1019,7 +1022,55 @@ function deleteat!(B::BitVector, inds)
     return B
 end
 
+function deleteat!(B::BitVector, inds::AbstractVector{Bool})
+    length(inds) == length(B) || throw(BoundsError(B, inds))
+
+    n = new_l = length(B)
+    y = findfirst(inds)
+    y === nothing && return B
+
+    Bc = B.chunks
+
+    p = y
+    s = y + 1
+    checkbounds(B, p)
+    q = p + 1
+    new_l -= 1
+    y = findnext(inds, s)
+    while y !== nothing
+        i = y
+        s = y + 1
+        new_l -= 1
+        if i > q
+            copy_chunks!(Bc, Int(p), Bc, Int(q), Int(i-q))
+            p += i - q
+        end
+        q = i + 1
+        y = findnext(inds, s)
+    end
+
+    q <= n && copy_chunks!(Bc, Int(p), Bc, Int(q), Int(n - q + 1))
+
+    delta_k = num_bit_chunks(new_l) - length(Bc)
+    delta_k < 0 && _deleteend!(Bc, -delta_k)
+
+    B.len = new_l
+
+    if new_l > 0
+        Bc[end] &= _msk_end(new_l)
+    end
+
+    return B
+end
+
+keepat!(B::BitVector, inds) = _keepat!(B, inds)
+keepat!(B::BitVector, inds::AbstractVector{Bool}) = _keepat!(B, inds)
+
 function splice!(B::BitVector, i::Integer)
+    # TODO: after deprecation remove the four lines below
+    #       as v = B[i] is enough to do both bounds checking
+    #       and Bool check then just pass Int(i) to _deleteat!
+    i isa Bool && depwarn("passing Bool as an index is deprecated", :splice!)
     i = Int(i)
     n = length(B)
     1 <= i <= n || throw(BoundsError(B, i))
@@ -1031,9 +1082,11 @@ end
 
 const _default_bit_splice = BitVector()
 
-function splice!(B::BitVector, r::Union{UnitRange{Int}, Integer}, ins::AbstractArray = _default_bit_splice)
-    _splice_int!(B, isa(r, UnitRange{Int}) ? r : Int(r), ins)
+function splice!(B::BitVector, r::Union{AbstractUnitRange{Int}, Integer}, ins::AbstractArray = _default_bit_splice)
+    r isa Bool && depwarn("passing Bool as an index is deprecated", :splice!)
+    _splice_int!(B, isa(r, AbstractUnitRange{Int}) ? r : Int(r), ins)
 end
+
 function _splice_int!(B::BitVector, r, ins)
     n = length(B)
     i_f, i_l = first(r), last(r)
@@ -1073,7 +1126,7 @@ function _splice_int!(B::BitVector, r, ins)
     return v
 end
 
-function splice!(B::BitVector, r::Union{UnitRange{Int}, Integer}, ins)
+function splice!(B::BitVector, r::Union{AbstractUnitRange{Int}, Integer}, ins)
     Bins = BitVector(undef, length(ins))
     i = 1
     for x in ins
@@ -1386,15 +1439,15 @@ circshift!(B::BitVector, i::Integer) = circshift!(B, B, i)
 
 ## count & find ##
 
-function bitcount(Bc::Vector{UInt64})
-    n = 0
+function bitcount(Bc::Vector{UInt64}; init::T=0) where {T}
+    n::T = init
     @inbounds for i = 1:length(Bc)
-        n += count_ones(Bc[i])
+        n = (n + count_ones(Bc[i])) % T
     end
     return n
 end
 
-count(B::BitArray) = bitcount(B.chunks)
+_count(::typeof(identity), B::BitArray, ::Colon, init) = bitcount(B.chunks; init)
 
 function unsafe_bitfindnext(Bc::Vector{UInt64}, start::Int)
     chunk_start = _div64(start-1)+1
@@ -1707,6 +1760,8 @@ map!(::typeof(identity), dest::BitArray, A::BitArray) = copyto!(dest, A)
 for (T, f) in ((:(Union{typeof(&), typeof(*), typeof(min)}), :(&)),
                (:(Union{typeof(|), typeof(max)}),            :(|)),
                (:(Union{typeof(xor), typeof(!=)}),           :xor),
+               (:(typeof(nand)),                             :nand),
+               (:(typeof(nor)),                              :nor),
                (:(Union{typeof(>=), typeof(^)}),             :((p, q) -> p | ~q)),
                (:(typeof(<=)),                               :((p, q) -> ~p | q)),
                (:(typeof(==)),                               :((p, q) -> ~xor(p, q))),
@@ -1858,3 +1913,10 @@ function read!(s::IO, B::BitArray)
 end
 
 sizeof(B::BitArray) = sizeof(B.chunks)
+
+function _split_rest(a::Union{Vector, BitVector}, n::Int)
+    _check_length_split_rest(length(a), n)
+    last_n = a[end-n+1:end]
+    resize!(a, length(a) - n)
+    return a, last_n
+end
diff --git a/base/bitset.jl b/base/bitset.jl
index 7b94e2e4745f44..0abd9d4b782d29 100644
--- a/base/bitset.jl
+++ b/base/bitset.jl
@@ -11,7 +11,7 @@ const NO_OFFSET = Int === Int64 ? -one(Int) << 60 : -one(Int) << 29
 #   a small optimization in the in(x, ::BitSet) method
 
 mutable struct BitSet <: AbstractSet{Int}
-    bits::Vector{UInt64}
+    const bits::Vector{UInt64}
     # 1st stored Int equals 64*offset
     offset::Int
 
diff --git a/base/bool.jl b/base/bool.jl
index 92a27543d2fbc1..7648df3e0250ea 100644
--- a/base/bool.jl
+++ b/base/bool.jl
@@ -14,6 +14,8 @@ typemax(::Type{Bool}) = true
 Boolean not. Implements [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 returning [`missing`](@ref) if `x` is `missing`.
 
+See also [`~`](@ref) for bitwise not.
+
 # Examples
 ```jldoctest
 julia> !true
@@ -70,6 +72,74 @@ julia> [true; true; false] .⊻ [true; false; false]
 """
 xor(x::Bool, y::Bool) = (x != y)
 
+"""
+    nand(x, y)
+    ⊼(x, y)
+
+Bitwise nand (not and) of `x` and `y`. Implements
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
+returning [`missing`](@ref) if one of the arguments is `missing`.
+
+The infix operation `a ⊼ b` is a synonym for `nand(a,b)`, and
+`⊼` can be typed by tab-completing `\\nand` or `\\barwedge` in the Julia REPL.
+
+# Examples
+```jldoctest
+julia> nand(true, false)
+true
+
+julia> nand(true, true)
+false
+
+julia> nand(true, missing)
+missing
+
+julia> false ⊼ false
+true
+
+julia> [true; true; false] .⊼ [true; false; false]
+3-element BitVector:
+ 0
+ 1
+ 1
+```
+"""
+nand(x...) = ~(&)(x...)
+
+"""
+    nor(x, y)
+    ⊽(x, y)
+
+Bitwise nor (not or) of `x` and `y`. Implements
+[three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
+returning [`missing`](@ref) if one of the arguments is `missing`.
+
+The infix operation `a ⊽ b` is a synonym for `nor(a,b)`, and
+`⊽` can be typed by tab-completing `\\nor` or `\\barvee` in the Julia REPL.
+
+# Examples
+```jldoctest
+julia> nor(true, false)
+false
+
+julia> nor(true, true)
+false
+
+julia> nor(true, missing)
+false
+
+julia> false ⊽ false
+true
+
+julia> [true; true; false] .⊽ [true; false; false]
+3-element BitVector:
+ 0
+ 0
+ 1
+```
+"""
+nor(x...) = ~(|)(x...)
+
 >>(x::Bool, c::UInt) = Int(x) >> c
 <<(x::Bool, c::UInt) = Int(x) << c
 >>>(x::Bool, c::UInt) = Int(x) >>> c
diff --git a/base/boot.jl b/base/boot.jl
index e653a82399ba52..bb7fcfd0719edc 100644
--- a/base/boot.jl
+++ b/base/boot.jl
@@ -8,7 +8,7 @@
 #abstract type Vararg{T} end
 
 #mutable struct Symbol
-#    #opaque
+## opaque
 #end
 
 #mutable struct TypeName
@@ -53,28 +53,43 @@
 #abstract type DenseArray{T,N} <: AbstractArray{T,N} end
 
 #mutable struct Array{T,N} <: DenseArray{T,N}
+## opaque
 #end
 
 #mutable struct Module
-#    name::Symbol
+## opaque
+#end
+
+#mutable struct SimpleVector
+## opaque
+#end
+
+#mutable struct String
+## opaque
 #end
 
 #mutable struct Method
+#...
 #end
 
 #mutable struct MethodInstance
+#...
 #end
 
 #mutable struct CodeInstance
+#...
 #end
 
 #mutable struct CodeInfo
+#...
 #end
 
 #mutable struct TypeMapLevel
+#...
 #end
 
 #mutable struct TypeMapEntry
+#...
 #end
 
 #abstract type Ref{T} end
@@ -96,8 +111,8 @@
 #    module::Module
 #    method::Symbol
 #    file::Symbol
-#    line::Int
-#    inlined_at::Int
+#    line::Int32
+#    inlined_at::Int32
 #end
 
 #struct GotoNode
@@ -156,7 +171,7 @@ export
     # key types
     Any, DataType, Vararg, NTuple,
     Tuple, Type, UnionAll, TypeVar, Union, Nothing, Cvoid,
-    AbstractArray, DenseArray, NamedTuple,
+    AbstractArray, DenseArray, NamedTuple, Pair,
     # special objects
     Function, Method,
     Module, Symbol, Task, Array, UndefInitializer, undef, WeakRef, VecElement,
@@ -172,12 +187,15 @@ export
     InterruptException, InexactError, OutOfMemoryError, ReadOnlyMemoryError,
     OverflowError, StackOverflowError, SegmentationFault, UndefRefError, UndefVarError,
     TypeError, ArgumentError, MethodError, AssertionError, LoadError, InitError,
-    UndefKeywordError,
+    UndefKeywordError, ConcurrencyViolationError,
     # AST representation
     Expr, QuoteNode, LineNumberNode, GlobalRef,
     # object model functions
-    fieldtype, getfield, setfield!, nfields, throw, tuple, ===, isdefined, eval, ifelse,
-    # sizeof    # not exported, to avoid conflicting with Base.sizeof
+    fieldtype, getfield, setfield!, swapfield!, modifyfield!, replacefield!,
+    nfields, throw, tuple, ===, isdefined, eval,
+    # access to globals
+    getglobal, setglobal!,
+    # ifelse, sizeof    # not exported, to avoid conflicting with Base
     # type reflection
     <:, typeof, isa, typeassert,
     # method reflection
@@ -185,7 +203,7 @@ export
     # constants
     nothing, Main
 
-const getproperty = getfield
+const getproperty = getfield # TODO: use `getglobal` for modules instead
 const setproperty! = setfield!
 
 abstract type Number end
@@ -206,7 +224,7 @@ primitive type Char <: AbstractChar 32 end
 primitive type Int8    <: Signed   8 end
 #primitive type UInt8   <: Unsigned 8 end
 primitive type Int16   <: Signed   16 end
-primitive type UInt16  <: Unsigned 16 end
+#primitive type UInt16  <: Unsigned 16 end
 #primitive type Int32   <: Signed   32 end
 #primitive type UInt32  <: Unsigned 32 end
 #primitive type Int64   <: Signed   64 end
@@ -227,14 +245,23 @@ ccall(:jl_toplevel_eval_in, Any, (Any, Any),
       (f::typeof(Typeof))(x) = ($(_expr(:meta,:nospecialize,:x)); isa(x,Type) ? Type{x} : typeof(x))
       end)
 
-# let the compiler assume that calling Union{} as a constructor does not need
-# to be considered ever (which comes up often as Type{<:T})
-Union{}(a...) = throw(MethodError(Union{}, a))
 
 macro nospecialize(x)
     _expr(:meta, :nospecialize, x)
 end
 
+TypeVar(n::Symbol) = _typevar(n, Union{}, Any)
+TypeVar(n::Symbol, @nospecialize(ub)) = _typevar(n, Union{}, ub)
+TypeVar(n::Symbol, @nospecialize(lb), @nospecialize(ub)) = _typevar(n, lb, ub)
+
+UnionAll(v::TypeVar, @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v, t)
+
+const Vararg = ccall(:jl_toplevel_eval_in, Any, (Any, Any), Core, _expr(:new, TypeofVararg))
+
+# let the compiler assume that calling Union{} as a constructor does not need
+# to be considered ever (which comes up often as Type{<:T})
+Union{}(a...) = throw(MethodError(Union{}, a))
+
 Expr(@nospecialize args...) = _expr(args...)
 
 abstract type Exception end
@@ -242,20 +269,15 @@ struct ErrorException <: Exception
     msg::AbstractString
 end
 
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-
-macro _noinline_meta()
-    Expr(:meta, :noinline)
-end
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
 
 struct BoundsError <: Exception
     a::Any
     i::Any
     BoundsError() = new()
-    BoundsError(@nospecialize(a)) = (@_noinline_meta; new(a))
-    BoundsError(@nospecialize(a), i) = (@_noinline_meta; new(a,i))
+    BoundsError(@nospecialize(a)) = (@noinline; new(a))
+    BoundsError(@nospecialize(a), i) = (@noinline; new(a,i))
 end
 struct DivideError         <: Exception end
 struct OutOfMemoryError    <: Exception end
@@ -266,12 +288,15 @@ struct UndefRefError       <: Exception end
 struct UndefVarError <: Exception
     var::Symbol
 end
+struct ConcurrencyViolationError <: Exception
+    msg::AbstractString
+end
 struct InterruptException <: Exception end
 struct DomainError <: Exception
     val
     msg::AbstractString
-    DomainError(@nospecialize(val)) = (@_noinline_meta; new(val, ""))
-    DomainError(@nospecialize(val), @nospecialize(msg)) = (@_noinline_meta; new(val, msg))
+    DomainError(@nospecialize(val)) = (@noinline; new(val, ""))
+    DomainError(@nospecialize(val), @nospecialize(msg)) = (@noinline; new(val, msg))
 end
 struct TypeError <: Exception
     # `func` is the name of the builtin function that encountered a type error,
@@ -292,7 +317,7 @@ struct InexactError <: Exception
     func::Symbol
     T  # Type
     val
-    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@_noinline_meta; new(f, T, val))
+    InexactError(f::Symbol, @nospecialize(T), @nospecialize(val)) = (@noinline; new(f, T, val))
 end
 struct OverflowError <: Exception
     msg::AbstractString
@@ -363,12 +388,6 @@ mutable struct WeakRef
                                       (Ptr{Cvoid}, Any), getptls(), v)
 end
 
-TypeVar(n::Symbol) = _typevar(n, Union{}, Any)
-TypeVar(n::Symbol, @nospecialize(ub)) = _typevar(n, Union{}, ub)
-TypeVar(n::Symbol, @nospecialize(lb), @nospecialize(ub)) = _typevar(n, lb, ub)
-
-UnionAll(v::TypeVar, @nospecialize(t)) = ccall(:jl_type_unionall, Any, (Any, Any), v, t)
-
 Tuple{}() = ()
 
 struct VecElement{T}
@@ -377,39 +396,49 @@ struct VecElement{T}
 end
 VecElement(arg::T) where {T} = VecElement{T}(arg)
 
-_new(typ::Symbol, argty::Symbol) = eval(Core, :($typ(@nospecialize n::$argty) = $(Expr(:new, typ, :n))))
-_new(:GotoNode, :Int)
-_new(:NewvarNode, :SlotNumber)
-_new(:QuoteNode, :Any)
-_new(:SSAValue, :Int)
-_new(:Argument, :Int)
-_new(:ReturnNode, :Any)
-eval(Core, :(ReturnNode() = $(Expr(:new, :ReturnNode)))) # unassigned val indicates unreachable
-eval(Core, :(GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))))
-eval(Core, :(LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))))
-eval(Core, :(LineNumberNode(l::Int, @nospecialize(f)) = $(Expr(:new, :LineNumberNode, :l, :f))))
-LineNumberNode(l::Int, f::String) = LineNumberNode(l, Symbol(f))
-eval(Core, :(GlobalRef(m::Module, s::Symbol) = $(Expr(:new, :GlobalRef, :m, :s))))
-eval(Core, :(SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))))
-eval(Core, :(TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))))
-eval(Core, :(PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))))
-eval(Core, :(PiNode(val, typ) = $(Expr(:new, :PiNode, :val, :typ))))
-eval(Core, :(PhiCNode(values::Array{Any, 1}) = $(Expr(:new, :PhiCNode, :values))))
-eval(Core, :(UpsilonNode(val) = $(Expr(:new, :UpsilonNode, :val))))
-eval(Core, :(UpsilonNode() = $(Expr(:new, :UpsilonNode))))
-eval(Core, :(LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int, inlined_at::Int) =
-             $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))))
-eval(Core, :(CodeInstance(mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
-                          @nospecialize(inferred), const_flags::Int32,
-                          min_world::UInt, max_world::UInt) =
-                ccall(:jl_new_codeinst, Ref{CodeInstance}, (Any, Any, Any, Any, Int32, UInt, UInt),
-                    mi, rettype, inferred_const, inferred, const_flags, min_world, max_world)))
-eval(Core, :(Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))))
-eval(Core, :(PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))))
-eval(Core, :(MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) =
-    $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))))
-
-Module(name::Symbol=:anonymous, std_imports::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool), name, std_imports)
+eval(Core, quote
+    GotoNode(label::Int) = $(Expr(:new, :GotoNode, :label))
+    NewvarNode(slot::SlotNumber) = $(Expr(:new, :NewvarNode, :slot))
+    QuoteNode(@nospecialize value) = $(Expr(:new, :QuoteNode, :value))
+    SSAValue(id::Int) = $(Expr(:new, :SSAValue, :id))
+    Argument(n::Int) = $(Expr(:new, :Argument, :n))
+    ReturnNode(@nospecialize val) = $(Expr(:new, :ReturnNode, :val))
+    ReturnNode() = $(Expr(:new, :ReturnNode)) # unassigned val indicates unreachable
+    GotoIfNot(@nospecialize(cond), dest::Int) = $(Expr(:new, :GotoIfNot, :cond, :dest))
+    LineNumberNode(l::Int) = $(Expr(:new, :LineNumberNode, :l, nothing))
+    function LineNumberNode(l::Int, @nospecialize(f))
+        isa(f, String) && (f = Symbol(f))
+        return $(Expr(:new, :LineNumberNode, :l, :f))
+    end
+    LineInfoNode(mod::Module, @nospecialize(method), file::Symbol, line::Int32, inlined_at::Int32) =
+        $(Expr(:new, :LineInfoNode, :mod, :method, :file, :line, :inlined_at))
+    GlobalRef(m::Module, s::Symbol) = $(Expr(:new, :GlobalRef, :m, :s))
+    SlotNumber(n::Int) = $(Expr(:new, :SlotNumber, :n))
+    TypedSlot(n::Int, @nospecialize(t)) = $(Expr(:new, :TypedSlot, :n, :t))
+    PhiNode(edges::Array{Int32, 1}, values::Array{Any, 1}) = $(Expr(:new, :PhiNode, :edges, :values))
+    PiNode(@nospecialize(val), @nospecialize(typ)) = $(Expr(:new, :PiNode, :val, :typ))
+    PhiCNode(values::Array{Any, 1}) = $(Expr(:new, :PhiCNode, :values))
+    UpsilonNode(@nospecialize(val)) = $(Expr(:new, :UpsilonNode, :val))
+    UpsilonNode() = $(Expr(:new, :UpsilonNode))
+    function CodeInstance(
+        mi::MethodInstance, @nospecialize(rettype), @nospecialize(inferred_const),
+        @nospecialize(inferred), const_flags::Int32, min_world::UInt, max_world::UInt,
+        ipo_effects::UInt32, effects::UInt32, @nospecialize(argescapes#=::Union{Nothing,Vector{ArgEscapeInfo}}=#),
+        relocatability::UInt8)
+        return ccall(:jl_new_codeinst, Ref{CodeInstance},
+            (Any, Any, Any, Any, Int32, UInt, UInt, UInt32, UInt32, Any, UInt8),
+            mi, rettype, inferred_const, inferred, const_flags, min_world, max_world,
+            ipo_effects, effects, argescapes,
+            relocatability)
+    end
+    Const(@nospecialize(v)) = $(Expr(:new, :Const, :v))
+    PartialStruct(@nospecialize(typ), fields::Array{Any, 1}) = $(Expr(:new, :PartialStruct, :typ, :fields))
+    PartialOpaque(@nospecialize(typ), @nospecialize(env), parent::MethodInstance, source::Method) = $(Expr(:new, :PartialOpaque, :typ, :env, :parent, :source))
+    InterConditional(slot::Int, @nospecialize(vtype), @nospecialize(elsetype)) = $(Expr(:new, :InterConditional, :slot, :vtype, :elsetype))
+    MethodMatch(@nospecialize(spec_types), sparams::SimpleVector, method::Method, fully_covers::Bool) = $(Expr(:new, :MethodMatch, :spec_types, :sparams, :method, :fully_covers))
+end)
+
+Module(name::Symbol=:anonymous, std_imports::Bool=true, default_names::Bool=true) = ccall(:jl_f_new_module, Ref{Module}, (Any, Bool, Bool), name, std_imports, default_names)
 
 function _Task(@nospecialize(f), reserved_stack::Int, completion_future)
     return ccall(:jl_new_task, Ref{Task}, (Any, Any, Int), f, completion_future, reserved_stack)
@@ -425,7 +454,6 @@ unsafe_convert(::Type{T}, x::T) where {T} = x
 
 const NTuple{N,T} = Tuple{Vararg{T,N}}
 
-
 ## primitive Array constructors
 struct UndefInitializer end
 const undef = UndefInitializer()
@@ -564,10 +592,11 @@ function (g::GeneratedFunctionStub)(@nospecialize args...)
                          Expr(:meta, :push_loc, g.file, Symbol("@generated body")),
                          Expr(:return, body),
                          Expr(:meta, :pop_loc))))
-    if g.spnames === nothing
+    spnames = g.spnames
+    if spnames === nothing
         return lam
     else
-        return Expr(Symbol("with-static-parameters"), lam, g.spnames...)
+        return Expr(Symbol("with-static-parameters"), lam, spnames...)
     end
 end
 
@@ -584,26 +613,26 @@ eval(Core, :(NamedTuple{names,T}(args::T) where {names, T <: Tuple} =
 
 import .Intrinsics: eq_int, trunc_int, lshr_int, sub_int, shl_int, bitcast, sext_int, zext_int, and_int
 
-throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@_noinline_meta; throw(InexactError(f, T, val)))
+throw_inexacterror(f::Symbol, ::Type{T}, val) where {T} = (@noinline; throw(InexactError(f, T, val)))
 
 function is_top_bit_set(x)
-    @_inline_meta
+    @inline
     eq_int(trunc_int(UInt8, lshr_int(x, sub_int(shl_int(sizeof(x), 3), 1))), trunc_int(UInt8, 1))
 end
 
 function is_top_bit_set(x::Union{Int8,UInt8})
-    @_inline_meta
+    @inline
     eq_int(lshr_int(x, 7), trunc_int(typeof(x), 1))
 end
 
 function check_top_bit(::Type{To}, x) where {To}
-    @_inline_meta
+    @inline
     is_top_bit_set(x) && throw_inexacterror(:check_top_bit, To, x)
     x
 end
 
 function checked_trunc_sint(::Type{To}, x::From) where {To,From}
-    @_inline_meta
+    @inline
     y = trunc_int(To, x)
     back = sext_int(From, y)
     eq_int(x, back) || throw_inexacterror(:trunc, To, x)
@@ -611,7 +640,7 @@ function checked_trunc_sint(::Type{To}, x::From) where {To,From}
 end
 
 function checked_trunc_uint(::Type{To}, x::From) where {To,From}
-    @_inline_meta
+    @inline
     y = trunc_int(To, x)
     back = zext_int(From, y)
     eq_int(x, back) || throw_inexacterror(:trunc, To, x)
@@ -764,15 +793,15 @@ Unsigned(x::Int64)  = UInt64(x)
 Signed(x::UInt128)  = Int128(x)
 Unsigned(x::Int128) = UInt128(x)
 
-Signed(x::Union{Float32, Float64, Bool})   = Int(x)
-Unsigned(x::Union{Float32, Float64, Bool}) = UInt(x)
+Signed(x::Union{Float16, Float32, Float64, Bool})   = Int(x)
+Unsigned(x::Union{Float16, Float32, Float64, Bool}) = UInt(x)
 
 Integer(x::Integer) = x
-Integer(x::Union{Float32, Float64}) = Int(x)
+Integer(x::Union{Float16, Float32, Float64}) = Int(x)
 
 # Binding for the julia parser, called as
 #
-#    Core._parse(text, filename, offset, options)
+#    Core._parse(text, filename, lineno, offset, options)
 #
 # Parse Julia code from the buffer `text`, starting at `offset` and attributing
 # it to `filename`. `text` may be a `String` or `svec(ptr::Ptr{UInt8},
@@ -785,4 +814,19 @@ Integer(x::Union{Float32, Float64}) = Int(x)
 # The internal jl_parse which will call into Core._parse if not `nothing`.
 _parse = nothing
 
+# support for deprecated uses of internal _apply function
+_apply(x...) = Core._apply_iterate(Main.Base.iterate, x...)
+
+struct Pair{A, B}
+    first::A
+    second::B
+    # if we didn't inline this, it's probably because the callsite was actually dynamic
+    # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
+    # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
+    # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
+    Pair(a, b) = new{typeof(a), typeof(b)}(a, b)
+    Pair{A, B}(a::A, b::B) where {A, B} = new(a, b)
+    Pair{Any, Any}(@nospecialize(a::Any), @nospecialize(b::Any)) = new(a, b)
+end
+
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Core, true)
diff --git a/base/broadcast.jl b/base/broadcast.jl
index 12bddaf531e274..7c32e6893268f0 100644
--- a/base/broadcast.jl
+++ b/base/broadcast.jl
@@ -8,10 +8,10 @@ Module containing the broadcasting implementation.
 module Broadcast
 
 using .Base.Cartesian
-using .Base: Indices, OneTo, tail, to_shape, isoperator, promote_typejoin,
-             _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, unalias
+using .Base: Indices, OneTo, tail, to_shape, isoperator, promote_typejoin, promote_typejoin_union,
+             _msk_end, unsafe_bitgetindex, bitcache_chunks, bitcache_size, dumpbitcache, unalias, negate
 import .Base: copy, copyto!, axes
-export broadcast, broadcast!, BroadcastStyle, broadcast_axes, broadcastable, dotview, @__dot__, broadcast_preserving_zero_d
+export broadcast, broadcast!, BroadcastStyle, broadcast_axes, broadcastable, dotview, @__dot__, BroadcastFunction
 
 ## Computing the result's axes: deprecated name
 const broadcast_axes = axes
@@ -137,7 +137,7 @@ BroadcastStyle(a::AbstractArrayStyle, ::Style{Tuple})    = a
 BroadcastStyle(::A, ::A) where A<:ArrayStyle             = A()
 BroadcastStyle(::ArrayStyle, ::ArrayStyle)               = Unknown()
 BroadcastStyle(::A, ::A) where A<:AbstractArrayStyle     = A()
-Base.@pure function BroadcastStyle(a::A, b::B) where {A<:AbstractArrayStyle{M},B<:AbstractArrayStyle{N}} where {M,N}
+function BroadcastStyle(a::A, b::B) where {A<:AbstractArrayStyle{M},B<:AbstractArrayStyle{N}} where {M,N}
     if Base.typename(A) === Base.typename(B)
         return A(Val(max(M, N)))
     end
@@ -179,6 +179,21 @@ function Broadcasted{Style}(f::F, args::Args, axes=nothing) where {Style, F, Arg
     Broadcasted{Style, typeof(axes), Core.Typeof(f), Args}(f, args, axes)
 end
 
+struct AndAnd end
+const andand = AndAnd()
+broadcasted(::AndAnd, a, b) = broadcasted((a, b) -> a && b, a, b)
+function broadcasted(::AndAnd, a, bc::Broadcasted)
+    bcf = flatten(bc)
+    broadcasted((a, args...) -> a && bcf.f(args...), a, bcf.args...)
+end
+struct OrOr end
+const oror = OrOr()
+broadcasted(::OrOr, a, b) = broadcasted((a, b) -> a || b, a, b)
+function broadcasted(::OrOr, a, bc::Broadcasted)
+    bcf = flatten(bc)
+    broadcasted((a, args...) -> a || bcf.f(args...), a, bcf.args...)
+end
+
 Base.convert(::Type{Broadcasted{NewStyle}}, bc::Broadcasted{Style,Axes,F,Args}) where {NewStyle,Style,Axes,F,Args} =
     Broadcasted{NewStyle,Axes,F,Args}(bc.f, bc.args, bc.axes)
 
@@ -515,7 +530,12 @@ axistype(a, b) = UnitRange{Int}(a)
 check_broadcast_shape(shp) = nothing
 check_broadcast_shape(shp, ::Tuple{}) = nothing
 check_broadcast_shape(::Tuple{}, ::Tuple{}) = nothing
-check_broadcast_shape(::Tuple{}, Ashp::Tuple) = throw(DimensionMismatch("cannot broadcast array to have fewer dimensions"))
+function check_broadcast_shape(::Tuple{}, Ashp::Tuple)
+    if any(ax -> length(ax) != 1, Ashp)
+        throw(DimensionMismatch("cannot broadcast array to have fewer non-singleton dimensions"))
+    end
+    nothing
+end
 function check_broadcast_shape(shp, Ashp::Tuple)
     _bcsm(shp[1], Ashp[1]) || throw(DimensionMismatch("array could not be broadcast to match destination"))
     check_broadcast_shape(tail(shp), tail(Ashp))
@@ -546,18 +566,20 @@ an `Int`.
 """
 Base.@propagate_inbounds newindex(arg, I::CartesianIndex) = CartesianIndex(_newindex(axes(arg), I.I))
 Base.@propagate_inbounds newindex(arg, I::Integer) = CartesianIndex(_newindex(axes(arg), (I,)))
-Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(Base.unsafe_length(ax[1])==1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
+Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple) = (ifelse(length(ax[1]) == 1, ax[1][1], I[1]), _newindex(tail(ax), tail(I))...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple) = ()
 Base.@propagate_inbounds _newindex(ax::Tuple, I::Tuple{}) = (ax[1][1], _newindex(tail(ax), ())...)
 Base.@propagate_inbounds _newindex(ax::Tuple{}, I::Tuple{}) = ()
 
 # If dot-broadcasting were already defined, this would be `ifelse.(keep, I, Idefault)`.
 @inline newindex(I::CartesianIndex, keep, Idefault) = CartesianIndex(_newindex(I.I, keep, Idefault))
-@inline newindex(i::Integer, keep::Tuple{Bool}, idefault) = ifelse(keep[1], i, idefault[1])
+@inline newindex(i::Integer, keep::Tuple, idefault) = ifelse(keep[1], i, idefault[1])
 @inline newindex(i::Integer, keep::Tuple{}, idefault) = CartesianIndex(())
 @inline _newindex(I, keep, Idefault) =
     (ifelse(keep[1], I[1], Idefault[1]), _newindex(tail(I), tail(keep), tail(Idefault))...)
 @inline _newindex(I, keep::Tuple{}, Idefault) = ()  # truncate if keep is shorter than I
+@inline _newindex(I::Tuple{}, keep, Idefault) = ()  # or I is shorter
+@inline _newindex(I::Tuple{}, keep::Tuple{}, Idefault) = () # or both
 
 # newindexer(A) generates `keep` and `Idefault` (for use by `newindex` above)
 # for a particular array `A`; `shapeindexer` does so for its axes.
@@ -675,9 +697,9 @@ julia> Broadcast.broadcastable("hello") # Strings break convention of matching i
 Base.RefValue{String}("hello")
 ```
 """
-broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,Regex,Pair}) = Ref(x)
+broadcastable(x::Union{Symbol,AbstractString,Function,UndefInitializer,Nothing,RoundingMode,Missing,Val,Ptr,AbstractPattern,Pair,IO}) = Ref(x)
 broadcastable(::Type{T}) where {T} = Ref{Type{T}}(T)
-broadcastable(x::Union{AbstractArray,Number,Ref,Tuple,Broadcasted}) = x
+broadcastable(x::Union{AbstractArray,Number,AbstractChar,Ref,Tuple,Broadcasted}) = x
 # Default to collecting iterables — which will error for non-iterables
 broadcastable(x) = collect(x)
 broadcastable(::Union{AbstractDict, NamedTuple}) = throw(ArgumentError("broadcasting over dictionaries and `NamedTuple`s is reserved"))
@@ -692,7 +714,8 @@ eltypes(t::Tuple{Any,Any}) = Tuple{_broadcast_getindex_eltype(t[1]), _broadcast_
 eltypes(t::Tuple) = Tuple{_broadcast_getindex_eltype(t[1]), eltypes(tail(t)).types...}
 
 # Inferred eltype of result of broadcast(f, args...)
-combine_eltypes(f, args::Tuple) = Base._return_type(f, eltypes(args))
+combine_eltypes(f, args::Tuple) =
+    promote_typejoin_union(Base._return_type(f, eltypes(args)))
 
 ## Broadcasting core
 
@@ -877,7 +900,11 @@ const NonleafHandlingStyles = Union{DefaultArrayStyle,ArrayConflict}
     dest = similar(bc′, typeof(val))
     @inbounds dest[I] = val
     # Now handle the remaining values
-    return copyto_nonleaf!(dest, bc′, iter, state, 1)
+    # The typeassert gives inference a helping hand on the element type and dimensionality
+    # (work-around for #28382)
+    ElType′ = ElType === Union{} ? Any : ElType <: Type ? Type : ElType
+    RT = dest isa AbstractArray ? AbstractArray{<:ElType′, ndims(dest)} : Any
+    return copyto_nonleaf!(dest, bc′, iter, state, 1)::RT
 end
 
 ## general `copyto!` methods
@@ -914,8 +941,8 @@ broadcast_unalias(::Nothing, src) = src
 preprocess(dest, x) = extrude(broadcast_unalias(dest, x))
 
 @inline preprocess_args(dest, args::Tuple) = (preprocess(dest, args[1]), preprocess_args(dest, tail(args))...)
-preprocess_args(dest, args::Tuple{Any}) = (preprocess(dest, args[1]),)
-preprocess_args(dest, args::Tuple{}) = ()
+@inline preprocess_args(dest, args::Tuple{Any}) = (preprocess(dest, args[1]),)
+@inline preprocess_args(dest, args::Tuple{}) = ()
 
 # Specialize this method if all you want to do is specialize on typeof(dest)
 @inline function copyto!(dest::AbstractArray, bc::Broadcasted{Nothing})
@@ -928,8 +955,10 @@ preprocess_args(dest, args::Tuple{}) = ()
         end
     end
     bc′ = preprocess(dest, bc)
-    @simd for I in eachindex(bc′)
-        @inbounds dest[I] = bc′[I]
+    # Performance may vary depending on whether `@inbounds` is placed outside the
+    # for loop or not. (cf. https://github.com/JuliaLang/julia/issues/38086)
+    @inbounds @simd for I in eachindex(bc′)
+        dest[I] = bc′[I]
     end
     return dest
 end
@@ -944,14 +973,14 @@ end
     destc = dest.chunks
     cind = 1
     bc′ = preprocess(dest, bc)
-    for P in Iterators.partition(eachindex(bc′), bitcache_size)
+    @inbounds for P in Iterators.partition(eachindex(bc′), bitcache_size)
         ind = 1
         @simd for I in P
-            @inbounds tmp[ind] = bc′[I]
+            tmp[ind] = bc′[I]
             ind += 1
         end
         @simd for i in ind:bitcache_size
-            @inbounds tmp[i] = false
+            tmp[i] = false
         end
         dumpbitcache(destc, cind, tmp)
         cind += bitcache_chunks
@@ -1024,7 +1053,7 @@ function copyto_nonleaf!(dest, bc::Broadcasted, iter, state, count)
         y === nothing && break
         I, state = y
         @inbounds val = bc[I]
-        if val isa T || typeof(val) === T
+        if val isa T
             @inbounds dest[I] = val
         else
             # This element type doesn't fit in dest. Allocate a new dest with wider eltype,
@@ -1048,19 +1077,22 @@ end
 
 ## scalar-range broadcast operations ##
 # DefaultArrayStyle and \ are not available at the time of range.jl
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange) = r
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen) = r
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::LinRange) = r
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange) = r
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange) = range(-first(r), step=-step(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen) = StepRangeLen(-r.ref, -r.step, length(r), r.offset)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange) = range(-first(r), step=negate(step(r)), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange) = range(-first(r), -last(r), step=negate(step(r)))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen) = StepRangeLen(-r.ref, negate(r.step), length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::LinRange) = LinRange(-r.start, -r.stop, length(r))
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, length=length(r))
 # For #18336 we need to prevent promotion of the step type:
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractRange, x::Number) = range(first(r) + x, step=step(r), length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::AbstractRange) = range(x + first(r), step=step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::OrdinalRange, x::Integer) = range(first(r) + x, last(r) + x, step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::OrdinalRange) = range(x + first(r), x + last(r), step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Integer) = range(first(r) + x, last(r) + x)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::AbstractUnitRange) = range(x + first(r), x + last(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::AbstractUnitRange, x::Real) = range(first(r) + x, length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Real, r::AbstractUnitRange) = range(x + first(r), length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::StepRangeLen{T}, x::Number) where T =
     StepRangeLen{typeof(T(r.ref)+x)}(r.ref + x, r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::StepRangeLen{T}) where T =
@@ -1069,27 +1101,36 @@ broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r::LinRange, x::Number) = LinRa
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), x::Number, r::LinRange) = LinRange(x + r.start, x + r.stop, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(+), r1::AbstractRange, r2::AbstractRange) = r1 + r2
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Number) = range(first(r)-x, length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r)-x, step=step(r), length=length(r))
-broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x-first(r), step=-step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractRange, x::Number) = range(first(r) - x, step=step(r), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::AbstractRange) = range(x - first(r), step=negate(step(r)), length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::OrdinalRange, x::Integer) = range(first(r) - x, last(r) - x, step=step(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Integer, r::OrdinalRange) = range(x - first(r), x - last(r), step=negate(step(r)))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Integer) = range(first(r) - x, last(r) - x)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::AbstractUnitRange, x::Real) = range(first(r) - x, length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::StepRangeLen{T}, x::Number) where T =
     StepRangeLen{typeof(T(r.ref)-x)}(r.ref - x, r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::StepRangeLen{T}) where T =
-    StepRangeLen{typeof(x-T(r.ref))}(x - r.ref, -r.step, length(r), r.offset)
+    StepRangeLen{typeof(x-T(r.ref))}(x - r.ref, negate(r.step), length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r::LinRange, x::Number) = LinRange(r.start - x, r.stop - x, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), x::Number, r::LinRange) = LinRange(x - r.start, x - r.stop, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(-), r1::AbstractRange, r2::AbstractRange) = r1 - r2
 
-broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::AbstractRange) = range(x*first(r), step=x*step(r), length=length(r))
+# at present Base.range_start_step_length(1,0,5) is an error, so for 0 .* (-2:2) we explicitly construct StepRangeLen:
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::AbstractRange) = StepRangeLen(x*first(r), x*step(r), length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::StepRangeLen{T}) where {T} =
     StepRangeLen{typeof(x*T(r.ref))}(x*r.ref, x*r.step, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::Number, r::LinRange) = LinRange(x * r.start, x * r.stop, r.len)
-# separate in case of noncommutative multiplication
-broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::AbstractRange, x::Number) = range(first(r)*x, step=step(r)*x, length=length(r))
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), x::AbstractFloat, r::OrdinalRange) =
+    Base.range_start_step_length(x*first(r), x*step(r), length(r))  # 0.2 .* (-2:2) needs TwicePrecision
+# separate in case of noncommutative multiplication:
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::AbstractRange, x::Number) = StepRangeLen(first(r)*x, step(r)*x, length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::StepRangeLen{T}, x::Number) where {T} =
     StepRangeLen{typeof(T(r.ref)*x)}(r.ref*x, r.step*x, length(r), r.offset)
 broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::LinRange, x::Number) = LinRange(r.start * x, r.stop * x, r.len)
+broadcasted(::DefaultArrayStyle{1}, ::typeof(*), r::OrdinalRange, x::AbstractFloat) =
+    Base.range_start_step_length(first(r)*x, step(r)*x, length(r))
 
+#broadcasted(::DefaultArrayStyle{1}, ::typeof(/), r::AbstractRange, x::Number) = range(first(r)/x, last(r)/x, length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(/), r::AbstractRange, x::Number) = range(first(r)/x, step=step(r)/x, length=length(r))
 broadcasted(::DefaultArrayStyle{1}, ::typeof(/), r::StepRangeLen{T}, x::Number) where {T} =
     StepRangeLen{typeof(T(r.ref)/x)}(r.ref/x, r.step/x, length(r), r.offset)
@@ -1112,7 +1153,7 @@ broadcasted(::typeof(+), j::CartesianIndex{N}, I::CartesianIndices{N}) where N =
 broadcasted(::typeof(-), I::CartesianIndices{N}, j::CartesianIndex{N}) where N =
     CartesianIndices(map((rng, offset)->rng .- offset, I.indices, Tuple(j)))
 function broadcasted(::typeof(-), j::CartesianIndex{N}, I::CartesianIndices{N}) where N
-    diffrange(offset, rng) = range(offset-last(rng), length=length(rng))
+    diffrange(offset, rng) = range(offset-last(rng), length=length(rng), step=step(rng))
     Iterators.reverse(CartesianIndices(map(diffrange, Tuple(j), I.indices)))
 end
 
@@ -1196,18 +1237,12 @@ function __dot__(x::Expr)
            Meta.isexpr(x.args[1], :call) # function or macro definition
         Expr(x.head, x.args[1], dotargs[2])
     elseif x.head === :(<:) || x.head === :(>:)
-        tmp = x.head === :(<:) ? :(.<:) : :(.>:)
+        tmp = x.head === :(<:) ? :.<: : :.>:
         Expr(:call, tmp, dotargs...)
     else
-        if x.head === :&& || x.head === :||
-            error("""
-                Using `&&` and `||` is disallowed in `@.` expressions.
-                Use `&` or `|` for elementwise logical operations.
-                """)
-        end
-        head = string(x.head)
-        if last(head) == '=' && first(head) != '.'
-            Expr(Symbol('.',head), dotargs...)
+        head = String(x.head)::String
+        if last(head) == '=' && first(head) != '.' || head == "&&" || head == "||"
+            Expr(Symbol('.', head), dotargs...)
         else
             Expr(x.head, dotargs...)
         end
@@ -1242,7 +1277,13 @@ macro __dot__(x)
     esc(__dot__(x))
 end
 
-@inline broadcasted_kwsyntax(f, args...; kwargs...) = broadcasted((args...)->f(args...; kwargs...), args...)
+@inline function broadcasted_kwsyntax(f, args...; kwargs...)
+    if isempty(kwargs) # some BroadcastStyles dispatch on `f`, so try to preserve its type
+        return broadcasted(f, args...)
+    else
+        return broadcasted((args...) -> f(args...; kwargs...), args...)
+    end
+end
 @inline function broadcasted(f, args...)
     args′ = map(broadcastable, args)
     broadcasted(combine_styles(args′...), f, args′...)
@@ -1264,4 +1305,44 @@ end
 end
 @inline broadcasted(::S, f, args...) where S<:BroadcastStyle = Broadcasted{S}(f, args)
 
+"""
+    BroadcastFunction{F} <: Function
+
+Represents the "dotted" version of an operator, which broadcasts the operator over its
+arguments, so `BroadcastFunction(op)` is functionally equivalent to `(x...) -> (op).(x...)`.
+
+Can be created by just passing an operator preceded by a dot to a higher-order function.
+
+# Examples
+```jldoctest
+julia> a = [[1 3; 2 4], [5 7; 6 8]];
+
+julia> b = [[9 11; 10 12], [13 15; 14 16]];
+
+julia> map(.*, a, b)
+2-element Vector{Matrix{Int64}}:
+ [9 33; 20 48]
+ [65 105; 84 128]
+
+julia> Base.BroadcastFunction(+)(a, b) == a .+ b
+true
+```
+
+!!! compat "Julia 1.6"
+    `BroadcastFunction` and the standalone `.op` syntax are available as of Julia 1.6.
+"""
+struct BroadcastFunction{F} <: Function
+    f::F
+end
+
+@inline (op::BroadcastFunction)(x...; kwargs...) = op.f.(x...; kwargs...)
+
+function Base.show(io::IO, op::BroadcastFunction)
+    print(io, BroadcastFunction, '(')
+    show(io, op.f)
+    print(io, ')')
+    nothing
+end
+Base.show(io::IO, ::MIME"text/plain", op::BroadcastFunction) = show(io, op)
+
 end # module
diff --git a/base/c.jl b/base/c.jl
index a26f41856dc8ff..7d168f2293c9c5 100644
--- a/base/c.jl
+++ b/base/c.jl
@@ -270,6 +270,21 @@ reasonably represented in the target encoding; it always succeeds for
 conversions between UTF-XX encodings, even for invalid Unicode data.
 
 Only conversion to/from UTF-8 is currently supported.
+
+# Examples
+```jldoctest
+julia> str = "αβγ"
+"αβγ"
+
+julia> transcode(UInt16, str)
+3-element Vector{UInt16}:
+ 0x03b1
+ 0x03b2
+ 0x03b3
+
+julia> transcode(String, transcode(UInt16, str))
+"αβγ"
+```
 """
 function transcode end
 
@@ -533,6 +548,12 @@ function expand_ccallable(rt, def)
     error("expected method definition in @ccallable")
 end
 
+"""
+    @ccallable(def)
+
+Make the annotated function be callable from C using its name. This can, for example,
+be used to expose functionality as a C-API when creating a custom Julia sysimage.
+"""
 macro ccallable(def)
     expand_ccallable(nothing, def)
 end
@@ -712,3 +733,7 @@ name, if desired `"libglib-2.0".g_uri_escape_string(...`
 macro ccall(expr)
     return ccall_macro_lower(:ccall, ccall_macro_parse(expr)...)
 end
+
+macro ccall_effects(effects::UInt8, expr)
+    return ccall_macro_lower((:ccall, effects), ccall_macro_parse(expr)...)
+end
diff --git a/base/channels.jl b/base/channels.jl
index e4c0d003866aa9..da7b1d24583ca2 100644
--- a/base/channels.jl
+++ b/base/channels.jl
@@ -33,10 +33,11 @@ mutable struct Channel{T} <: AbstractChannel{T}
     cond_take::Threads.Condition                 # waiting for data to become available
     cond_wait::Threads.Condition                 # waiting for data to become maybe available
     cond_put::Threads.Condition                  # waiting for a writeable slot
-    state::Symbol
+    @atomic state::Symbol
     excp::Union{Exception, Nothing}      # exception to be thrown when state !== :open
 
     data::Vector{T}
+    @atomic n_avail_items::Int           # Available items for taking, can be read without lock
     sz_max::Int                          # maximum size of channel
 
     function Channel{T}(sz::Integer = 0) where T
@@ -46,7 +47,7 @@ mutable struct Channel{T} <: AbstractChannel{T}
         lock = ReentrantLock()
         cond_put, cond_take = Threads.Condition(lock), Threads.Condition(lock)
         cond_wait = (sz == 0 ? Threads.Condition(lock) : cond_take) # wait is distinct from take iff unbuffered
-        return new(cond_take, cond_wait, cond_put, :open, nothing, Vector{T}(), sz)
+        return new(cond_take, cond_wait, cond_put, :open, nothing, Vector{T}(), 0, sz)
     end
 end
 
@@ -62,6 +63,7 @@ Channel(sz=0) = Channel{Any}(sz)
 
 Create a new task from `func`, bind it to a new channel of type
 `T` and size `size`, and schedule the task, all in a single call.
+The channel is automatically closed when the task terminates.
 
 `func` must accept the bound channel as its only argument.
 
@@ -121,7 +123,7 @@ julia> chnl = Channel{Char}(1, spawn=true) do ch
                put!(ch, c)
            end
        end
-Channel{Char}(1) (1 item available)
+Channel{Char}(1) (2 items available)
 
 julia> String(collect(chnl))
 "hello world"
@@ -166,6 +168,8 @@ isbuffered(c::Channel) = c.sz_max==0 ? false : true
 
 function check_channel_state(c::Channel)
     if !isopen(c)
+        # if the monotonic load succeed, now do an acquire fence
+        (@atomic :acquire c.state) === :open && concurrency_violation()
         excp = c.excp
         excp !== nothing && throw(excp)
         throw(closed_exception())
@@ -182,8 +186,8 @@ Close a channel. An exception (optionally given by `excp`), is thrown by:
 function close(c::Channel, excp::Exception=closed_exception())
     lock(c)
     try
-        c.state = :closed
         c.excp = excp
+        @atomic :release c.state = :closed
         notify_error(c.cond_take, excp)
         notify_error(c.cond_wait, excp)
         notify_error(c.cond_put, excp)
@@ -192,7 +196,7 @@ function close(c::Channel, excp::Exception=closed_exception())
     end
     nothing
 end
-isopen(c::Channel) = (c.state === :open)
+isopen(c::Channel) = ((@atomic :monotonic c.state) === :open)
 
 """
     bind(chnl::Channel, task::Task)
@@ -283,7 +287,7 @@ function close_chnl_on_taskdone(t::Task, c::Channel)
     lock(c)
     try
         isopen(c) || return
-        if istaskfailed(t) && task_result(t) isa Exception
+        if istaskfailed(t)
             close(c, TaskFailedException(t))
             return
         end
@@ -295,9 +299,10 @@ function close_chnl_on_taskdone(t::Task, c::Channel)
 end
 
 struct InvalidStateException <: Exception
-    msg::AbstractString
+    msg::String
     state::Symbol
 end
+showerror(io::IO, ex::InvalidStateException) = print(io, "InvalidStateException: ", ex.msg)
 
 """
     put!(c::Channel, v)
@@ -316,17 +321,36 @@ function put!(c::Channel{T}, v) where T
     return isbuffered(c) ? put_buffered(c, v) : put_unbuffered(c, v)
 end
 
+# Atomically update channel n_avail, *assuming* we hold the channel lock.
+function _increment_n_avail(c, inc)
+    # We hold the channel lock so it's safe to non-atomically read and
+    # increment c.n_avail_items
+    newlen = c.n_avail_items + inc
+    # Atomically store c.n_avail_items to prevent data races with other threads
+    # reading this outside the lock.
+    @atomic :monotonic c.n_avail_items = newlen
+end
+
 function put_buffered(c::Channel, v)
     lock(c)
+    did_buffer = false
     try
+        # Increment channel n_avail eagerly (before push!) to count data in the
+        # buffer as well as offers from tasks which are blocked in wait().
+        _increment_n_avail(c, 1)
         while length(c.data) == c.sz_max
             check_channel_state(c)
             wait(c.cond_put)
         end
+        check_channel_state(c)
         push!(c.data, v)
+        did_buffer = true
         # notify all, since some of the waiters may be on a "fetch" call.
         notify(c.cond_take, nothing, true, false)
     finally
+        # Decrement the available items if this task had an exception before pushing the
+        # item to the buffer (e.g., during `wait(c.cond_put)`):
+        did_buffer || _increment_n_avail(c, -1)
         unlock(c)
     end
     return v
@@ -335,14 +359,17 @@ end
 function put_unbuffered(c::Channel, v)
     lock(c)
     taker = try
+        _increment_n_avail(c, 1)
         while isempty(c.cond_take.waitq)
             check_channel_state(c)
             notify(c.cond_wait)
             wait(c.cond_put)
         end
+        check_channel_state(c)
         # unfair scheduled version of: notify(c.cond_take, v, false, false); yield()
         popfirst!(c.cond_take.waitq)
     finally
+        _increment_n_avail(c, -1)
         unlock(c)
     end
     schedule(taker, v)
@@ -389,6 +416,7 @@ function take_buffered(c::Channel)
             wait(c.cond_take)
         end
         v = popfirst!(c.data)
+        _increment_n_avail(c, -1)
         notify(c.cond_put, nothing, false, false) # notify only one, since only one slot has become available for a put!.
         return v
     finally
@@ -418,10 +446,14 @@ For unbuffered channels returns `true` if there are tasks waiting
 on a [`put!`](@ref).
 """
 isready(c::Channel) = n_avail(c) > 0
-n_avail(c::Channel) = isbuffered(c) ? length(c.data) : length(c.cond_put.waitq)
-isempty(c::Channel) = isbuffered(c) ? isempty(c.data) : isempty(c.cond_put.waitq)
+isempty(c::Channel) = n_avail(c) == 0
+function n_avail(c::Channel)
+    # Lock-free equivalent to `length(c.data) + length(c.cond_put.waitq)`
+    @atomic :monotonic c.n_avail_items
+end
 
 lock(c::Channel) = lock(c.cond_take)
+lock(f, c::Channel) = lock(f, c.cond_take)
 unlock(c::Channel) = unlock(c.cond_take)
 trylock(c::Channel) = trylock(c.cond_take)
 
@@ -454,7 +486,7 @@ function show(io::IO, ::MIME"text/plain", c::Channel)
                 print(io, " (empty)")
             else
                 s = n == 1 ? "" : "s"
-                print(io, " (", n_avail(c), " item$s available)")
+                print(io, " (", n, " item$s available)")
             end
         end
     end
diff --git a/base/char.jl b/base/char.jl
index 173c84711e551a..c8b1c28166bbf9 100644
--- a/base/char.jl
+++ b/base/char.jl
@@ -45,9 +45,10 @@ represents a valid Unicode character.
 """
 Char
 
-(::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
-AbstractChar(x::Number) = Char(x)
-(::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
+@constprop :aggressive (::Type{T})(x::Number) where {T<:AbstractChar} = T(UInt32(x))
+@constprop :aggressive AbstractChar(x::Number) = Char(x)
+@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Number,AbstractChar}} = T(codepoint(x))
+@constprop :aggressive (::Type{T})(x::AbstractChar) where {T<:Union{Int32,Int64}} = codepoint(x) % T
 (::Type{T})(x::T) where {T<:AbstractChar} = x
 
 """
@@ -74,7 +75,7 @@ return a different-sized integer (e.g. `UInt8`).
 """
 function codepoint end
 
-codepoint(c::Char) = UInt32(c)
+@constprop :aggressive codepoint(c::Char) = UInt32(c)
 
 struct InvalidCharError{T<:AbstractChar} <: Exception
     char::T
@@ -82,11 +83,11 @@ end
 struct CodePointError{T<:Integer} <: Exception
     code::T
 end
-@noinline invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
-@noinline code_point_err(u::Integer) = throw(CodePointError(u))
+@noinline throw_invalid_char(c::AbstractChar) = throw(InvalidCharError(c))
+@noinline throw_code_point_err(u::Integer) = throw(CodePointError(u))
 
 function ismalformed(c::Char)
-    u = reinterpret(UInt32, c)
+    u = bitcast(UInt32, c)
     l1 = leading_ones(u) << 3
     t0 = trailing_zeros(u) & 56
     (l1 == 8) | (l1 + t0 > 32) |
@@ -96,7 +97,7 @@ end
 @inline is_overlong_enc(u::UInt32) = (u >> 24 == 0xc0) | (u >> 24 == 0xc1) | (u >> 21 == 0x0704) | (u >> 20 == 0x0f08)
 
 function isoverlong(c::Char)
-    u = reinterpret(UInt32, c)
+    u = bitcast(UInt32, c)
     is_overlong_enc(u)
 end
 
@@ -107,8 +108,9 @@ end
     ismalformed(c::AbstractChar) -> Bool
 
 Return `true` if `c` represents malformed (non-Unicode) data according to the
-encoding used by `c`.  Defaults to `false` for non-`Char` types.  See also
-[`show_invalid`](@ref).
+encoding used by `c`. Defaults to `false` for non-`Char` types.
+
+See also [`show_invalid`](@ref).
 """
 ismalformed(c::AbstractChar) = false
 
@@ -116,20 +118,21 @@ ismalformed(c::AbstractChar) = false
     isoverlong(c::AbstractChar) -> Bool
 
 Return `true` if `c` represents an overlong UTF-8 sequence. Defaults
-to `false` for non-`Char` types.  See also [`decode_overlong`](@ref)
-and [`show_invalid`](@ref).
+to `false` for non-`Char` types.
+
+See also [`decode_overlong`](@ref) and [`show_invalid`](@ref).
 """
 isoverlong(c::AbstractChar) = false
 
-function UInt32(c::Char)
+@constprop :aggressive function UInt32(c::Char)
     # TODO: use optimized inline LLVM
-    u = reinterpret(UInt32, c)
+    u = bitcast(UInt32, c)
     u < 0x80000000 && return u >> 24
     l1 = leading_ones(u)
     t0 = trailing_zeros(u) & 56
     (l1 == 1) | (8l1 + t0 > 32) |
     ((((u & 0x00c0c0c0) ⊻ 0x00808080) >> t0 != 0) | is_overlong_enc(u)) &&
-        invalid_char(c)::Union{}
+        throw_invalid_char(c)
     u &= 0xffffffff >> l1
     u >>= t0
     ((u & 0x0000007f) >> 0) | ((u & 0x00007f00) >> 2) |
@@ -145,8 +148,8 @@ that support overlong encodings should implement `Base.decode_overlong`.
 """
 function decode_overlong end
 
-function decode_overlong(c::Char)
-    u = reinterpret(UInt32, c)
+@constprop :aggressive function decode_overlong(c::Char)
+    u = bitcast(UInt32, c)
     l1 = leading_ones(u)
     t0 = trailing_zeros(u) & 56
     u &= 0xffffffff >> l1
@@ -155,24 +158,26 @@ function decode_overlong(c::Char)
     ((u & 0x007f0000) >> 4) | ((u & 0x7f000000) >> 6)
 end
 
-function Char(u::UInt32)
-    u < 0x80 && return reinterpret(Char, u << 24)
-    u < 0x00200000 || code_point_err(u)::Union{}
+@constprop :aggressive function Char(u::UInt32)
+    u < 0x80 && return bitcast(Char, u << 24)
+    u < 0x00200000 || throw_code_point_err(u)
     c = ((u << 0) & 0x0000003f) | ((u << 2) & 0x00003f00) |
         ((u << 4) & 0x003f0000) | ((u << 6) & 0x3f000000)
     c = u < 0x00000800 ? (c << 16) | 0xc0800000 :
         u < 0x00010000 ? (c << 08) | 0xe0808000 :
                          (c << 00) | 0xf0808080
-    reinterpret(Char, c)
+    bitcast(Char, c)
 end
 
-function (T::Union{Type{Int8},Type{UInt8}})(c::Char)
-    i = reinterpret(Int32, c)
-    i ≥ 0 ? ((i >>> 24) % T) : T(UInt32(c))
+@constprop :aggressive @noinline UInt32_cold(c::Char) = UInt32(c)
+@constprop :aggressive function (T::Union{Type{Int8},Type{UInt8}})(c::Char)
+    i = bitcast(Int32, c)
+    i ≥ 0 ? ((i >>> 24) % T) : T(UInt32_cold(c))
 end
 
-function Char(b::Union{Int8,UInt8})
-    0 ≤ b ≤ 0x7f ? reinterpret(Char, (b % UInt32) << 24) : Char(UInt32(b))
+@constprop :aggressive @noinline Char_cold(b::UInt32) = Char(b)
+@constprop :aggressive function Char(b::Union{Int8,UInt8})
+    0 ≤ b ≤ 0x7f ? bitcast(Char, (b % UInt32) << 24) : Char_cold(UInt32(b))
 end
 
 convert(::Type{AbstractChar}, x::Number) = Char(x) # default to Char
@@ -183,8 +188,8 @@ convert(::Type{T}, c::T) where {T<:AbstractChar} = c
 
 rem(x::AbstractChar, ::Type{T}) where {T<:Number} = rem(codepoint(x), T)
 
-typemax(::Type{Char}) = reinterpret(Char, typemax(UInt32))
-typemin(::Type{Char}) = reinterpret(Char, typemin(UInt32))
+typemax(::Type{Char}) = bitcast(Char, typemax(UInt32))
+typemin(::Type{Char}) = bitcast(Char, typemin(UInt32))
 
 size(c::AbstractChar) = ()
 size(c::AbstractChar, d::Integer) = d < 1 ? throw(BoundsError()) : 1
@@ -205,12 +210,12 @@ iterate(c::AbstractChar, done=false) = done ? nothing : (c, true)
 isempty(c::AbstractChar) = false
 in(x::AbstractChar, y::AbstractChar) = x == y
 
-==(x::Char, y::Char) = reinterpret(UInt32, x) == reinterpret(UInt32, y)
-isless(x::Char, y::Char) = reinterpret(UInt32, x) < reinterpret(UInt32, y)
+==(x::Char, y::Char) = bitcast(UInt32, x) == bitcast(UInt32, y)
+isless(x::Char, y::Char) = bitcast(UInt32, x) < bitcast(UInt32, y)
 hash(x::Char, h::UInt) =
-    hash_uint64(((reinterpret(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
+    hash_uint64(((bitcast(UInt32, x) + UInt64(0xd4d64234)) << 32) ⊻ UInt64(h))
 
-first_utf8_byte(c::Char) = (reinterpret(UInt32, c) >> 24) % UInt8
+first_utf8_byte(c::Char) = (bitcast(UInt32, c) >> 24) % UInt8
 
 # fallbacks:
 isless(x::AbstractChar, y::AbstractChar) = isless(Char(x), Char(y))
@@ -219,8 +224,26 @@ hash(x::AbstractChar, h::UInt) = hash(Char(x), h)
 widen(::Type{T}) where {T<:AbstractChar} = T
 
 @inline -(x::AbstractChar, y::AbstractChar) = Int(x) - Int(y)
-@inline -(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) - Int32(y))
-@inline +(x::T, y::Integer) where {T<:AbstractChar} = T(Int32(x) + Int32(y))
+@inline function -(x::T, y::Integer) where {T<:AbstractChar}
+    if x isa Char
+        u = Int32((bitcast(UInt32, x) >> 24) % Int8)
+        if u >= 0 # inline the runtime fast path
+            z = u - y
+            return 0 <= z < 0x80 ? bitcast(Char, (z % UInt32) << 24) : Char(UInt32(z))
+        end
+    end
+    return T(Int32(x) - Int32(y))
+end
+@inline function +(x::T, y::Integer) where {T<:AbstractChar}
+    if x isa Char
+        u = Int32((bitcast(UInt32, x) >> 24) % Int8)
+        if u >= 0 # inline the runtime fast path
+            z = u + y
+            return 0 <= z < 0x80 ? bitcast(Char, (z % UInt32) << 24) : Char(UInt32(z))
+        end
+    end
+    return T(Int32(x) + Int32(y))
+end
 @inline +(x::Integer, y::AbstractChar) = y + x
 
 # `print` should output UTF-8 by default for all AbstractChar types.
@@ -236,7 +259,7 @@ const hex_chars = UInt8['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
 
 function show_invalid(io::IO, c::Char)
     write(io, 0x27)
-    u = reinterpret(UInt32, c)
+    u = bitcast(UInt32, c)
     while true
         a = hex_chars[((u >> 28) & 0xf) + 1]
         b = hex_chars[((u >> 24) & 0xf) + 1]
diff --git a/base/checked.jl b/base/checked.jl
index 840015861923fc..1f9e319f50fbd1 100644
--- a/base/checked.jl
+++ b/base/checked.jl
@@ -6,14 +6,14 @@ module Checked
 
 export checked_neg, checked_abs, checked_add, checked_sub, checked_mul,
        checked_div, checked_rem, checked_fld, checked_mod, checked_cld,
-       add_with_overflow, sub_with_overflow, mul_with_overflow
+       checked_length, add_with_overflow, sub_with_overflow, mul_with_overflow
 
 import Core.Intrinsics:
        checked_sadd_int, checked_ssub_int, checked_smul_int, checked_sdiv_int,
        checked_srem_int,
        checked_uadd_int, checked_usub_int, checked_umul_int, checked_udiv_int,
        checked_urem_int
-import ..no_op_err, ..@_inline_meta, ..@_noinline_meta
+import ..no_op_err, ..@inline, ..@noinline, ..checked_length
 
 # define promotion behavior for checked operations
 checked_add(x::Integer, y::Integer) = checked_add(promote(x,y)...)
@@ -86,7 +86,7 @@ The overflow protection may impose a perceptible performance penalty.
 function checked_neg(x::T) where T<:Integer
     checked_sub(T(0), x)
 end
-throw_overflowerr_negation(x) = (@_noinline_meta;
+throw_overflowerr_negation(x) = (@noinline;
     throw(OverflowError(Base.invokelatest(string, "checked arithmetic: cannot compute -x for x = ", x, "::", typeof(x)))))
 if BrokenSignedInt != Union{}
 function checked_neg(x::BrokenSignedInt)
@@ -115,9 +115,10 @@ function checked_abs end
 
 function checked_abs(x::SignedInt)
     r = ifelse(x<0, -x, x)
-    r<0 && throw(OverflowError(string("checked arithmetic: cannot compute |x| for x = ", x, "::", typeof(x))))
-    r
- end
+    r<0 || return r
+    msg = LazyString("checked arithmetic: cannot compute |x| for x = ", x, "::", typeof(x))
+    throw(OverflowError(msg))
+end
 checked_abs(x::UnsignedInt) = x
 checked_abs(x::Bool) = x
 
@@ -150,8 +151,8 @@ end
 end
 
 
-throw_overflowerr_binaryop(op, x, y) = (@_noinline_meta;
-    throw(OverflowError(Base.invokelatest(string, x, " ", op, " ", y, " overflowed for type ", typeof(x)))))
+throw_overflowerr_binaryop(op, x, y) = (@noinline;
+    throw(OverflowError(LazyString(x, " ", op, " ", y, " overflowed for type ", typeof(x)))))
 
 """
     Base.checked_add(x, y)
@@ -161,7 +162,7 @@ Calculates `x+y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_add(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = add_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:+, x, y)
     z
@@ -218,7 +219,7 @@ Calculates `x-y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_sub(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = sub_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:-, x, y)
     z
@@ -283,7 +284,7 @@ Calculates `x*y`, checking for overflow errors where applicable.
 The overflow protection may impose a perceptible performance penalty.
 """
 function checked_mul(x::T, y::T) where T<:Integer
-    @_inline_meta
+    @inline
     z, b = mul_with_overflow(x, y)
     b && throw_overflowerr_binaryop(:*, x, y)
     z
@@ -349,4 +350,12 @@ The overflow protection may impose a perceptible performance penalty.
 """
 checked_cld(x::T, y::T) where {T<:Integer} = cld(x, y) # Base.cld already checks
 
+"""
+    Base.checked_length(r)
+
+Calculates `length(r)`, but may check for overflow errors where applicable when
+the result doesn't fit into `Union{Integer(eltype(r)),Int}`.
+"""
+checked_length(r) = length(r) # for most things, length doesn't error
+
 end
diff --git a/base/client.jl b/base/client.jl
index 25fb605d729e3e..d335f378f24c1a 100644
--- a/base/client.jl
+++ b/base/client.jl
@@ -84,27 +84,33 @@ end
 
 function scrub_repl_backtrace(bt)
     if bt !== nothing && !(bt isa Vector{Any}) # ignore our sentinel value types
-        bt = stacktrace(bt)
+        bt = bt isa Vector{StackFrame} ? copy(bt) : stacktrace(bt)
         # remove REPL-related frames from interactive printing
         eval_ind = findlast(frame -> !frame.from_c && frame.func === :eval, bt)
         eval_ind === nothing || deleteat!(bt, eval_ind:length(bt))
     end
     return bt
 end
+scrub_repl_backtrace(stack::ExceptionStack) =
+    ExceptionStack(Any[(;x.exception, backtrace = scrub_repl_backtrace(x.backtrace)) for x in stack])
 
-function display_error(io::IO, er, bt)
+istrivialerror(stack::ExceptionStack) =
+    length(stack) == 1 && length(stack[1].backtrace) ≤ 1
+    # frame 1 = top level; assumes already went through scrub_repl_backtrace
+
+function display_error(io::IO, stack::ExceptionStack)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
-    bt = scrub_repl_backtrace(bt)
-    showerror(IOContext(io, :limit => true), er, bt, backtrace = bt!==nothing)
+    show_exception_stack(IOContext(io, :limit => true), stack)
     println(io)
 end
-function display_error(io::IO, stack::Vector)
+display_error(stack::ExceptionStack) = display_error(stderr, stack)
+
+# these forms are depended on by packages outside Julia
+function display_error(io::IO, er, bt)
     printstyled(io, "ERROR: "; bold=true, color=Base.error_color())
-    bt = Any[ (x[1], scrub_repl_backtrace(x[2])) for x in stack ]
-    show_exception_stack(IOContext(io, :limit => true), bt)
+    showerror(IOContext(io, :limit => true), er, bt, backtrace = bt!==nothing)
     println(io)
 end
-display_error(stack::Vector) = display_error(stderr, stack)
 display_error(er, bt=nothing) = display_error(stderr, er, bt)
 
 function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
@@ -117,13 +123,15 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                 print(color_normal)
             end
             if lasterr !== nothing
+                lasterr = scrub_repl_backtrace(lasterr)
+                istrivialerror(lasterr) || setglobal!(Main, :err, lasterr)
                 invokelatest(display_error, errio, lasterr)
                 errcount = 0
                 lasterr = nothing
             else
                 ast = Meta.lower(Main, ast)
                 value = Core.eval(Main, ast)
-                ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, :ans, value)
+                setglobal!(Main, :ans, value)
                 if !(value === nothing) && show_value
                     if have_color
                         print(answer_color())
@@ -134,7 +142,6 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                         @error "Evaluation succeeded, but an error occurred while displaying the value" typeof(value)
                         rethrow()
                     end
-                    println()
                 end
             end
             break
@@ -143,7 +150,8 @@ function eval_user_input(errio, @nospecialize(ast), show_value::Bool)
                 @error "SYSTEM: display_error(errio, lasterr) caused an error"
             end
             errcount += 1
-            lasterr = catch_stack()
+            lasterr = scrub_repl_backtrace(current_exceptions())
+            setglobal!(Main, :err, lasterr)
             if errcount > 2
                 @error "It is likely that something important is broken, and Julia will not be able to continue normally" errcount
                 break
@@ -198,9 +206,6 @@ function incomplete_tag(ex::Expr)
     return :other
 end
 
-# call include() on a file, ignoring if not found
-include_ifexists(mod::Module, path::AbstractString) = isfile(path) && include(mod, path)
-
 function exec_options(opts)
     if !isempty(ARGS)
         idxs = findall(x -> x == "--", ARGS)
@@ -252,8 +257,18 @@ function exec_options(opts)
         invokelatest(Main.Distributed.process_opts, opts)
     end
 
+    interactiveinput = (repl || is_interactive::Bool) && isa(stdin, TTY)
+    is_interactive::Bool |= interactiveinput
+
     # load ~/.julia/config/startup.jl file
-    startup && load_julia_startup()
+    if startup
+        try
+            load_julia_startup()
+        catch
+            invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
+            !(repl || is_interactive::Bool) && exit(1)
+        end
+    end
 
     # process cmds list
     for (cmd, arg) in cmds
@@ -282,19 +297,20 @@ function exec_options(opts)
             exit_on_sigint(true)
         end
         try
-            include(Main, PROGRAM_FILE)
+            if PROGRAM_FILE == "-"
+                include_string(Main, read(stdin, String), "stdin")
+            else
+                include(Main, PROGRAM_FILE)
+            end
         catch
-            invokelatest(display_error, catch_stack())
+            invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
             if !is_interactive::Bool
                 exit(1)
             end
         end
     end
-    repl |= is_interactive::Bool
-    if repl
-        interactiveinput = isa(stdin, TTY)
+    if repl || is_interactive::Bool
         if interactiveinput
-            global is_interactive = true
             banner = (opts.banner != 0) # --banner!=no
         else
             banner = (opts.banner == 1) # --banner=yes
@@ -304,17 +320,33 @@ function exec_options(opts)
     nothing
 end
 
-function load_julia_startup()
+function _global_julia_startup_file()
     # If the user built us with a specific Base.SYSCONFDIR, check that location first for a startup.jl file
-    #   If it is not found, then continue on to the relative path based on Sys.BINDIR
-    BINDIR = Sys.BINDIR::String
-    SYSCONFDIR = Base.SYSCONFDIR::String
-    if !isempty(SYSCONFDIR) && isfile(joinpath(BINDIR, SYSCONFDIR, "julia", "startup.jl"))
-        include(Main, abspath(BINDIR, SYSCONFDIR, "julia", "startup.jl"))
-    else
-        include_ifexists(Main, abspath(BINDIR, "..", "etc", "julia", "startup.jl"))
+    # If it is not found, then continue on to the relative path based on Sys.BINDIR
+    BINDIR = Sys.BINDIR
+    SYSCONFDIR = Base.SYSCONFDIR
+    if !isempty(SYSCONFDIR)
+        p1 = abspath(BINDIR, SYSCONFDIR, "julia", "startup.jl")
+        isfile(p1) && return p1
+    end
+    p2 = abspath(BINDIR, "..", "etc", "julia", "startup.jl")
+    isfile(p2) && return p2
+    return nothing
+end
+
+function _local_julia_startup_file()
+    if !isempty(DEPOT_PATH)
+        path = abspath(DEPOT_PATH[1], "config", "startup.jl")
+        isfile(path) && return path
     end
-    !isempty(DEPOT_PATH) && include_ifexists(Main, abspath(DEPOT_PATH[1], "config", "startup.jl"))
+    return nothing
+end
+
+function load_julia_startup()
+    global_file = _global_julia_startup_file()
+    (global_file !== nothing) && include(Main, global_file)
+    local_file = _local_julia_startup_file()
+    (local_file !== nothing) && include(Main, local_file)
     return nothing
 end
 
@@ -465,26 +497,27 @@ Returns the result of the last evaluated expression of the input file. During in
 a task-local include path is set to the directory containing the file. Nested calls to
 `include` will search relative to that path. This function is typically used to load source
 interactively, or to combine files in packages that are broken into multiple source files.
+The argument `path` is normalized using [`normpath`](@ref) which will resolve
+relative path tokens such as `..` and convert `/` to the appropriate path separator.
 
 The optional first argument `mapexpr` can be used to transform the included code before
 it is evaluated: for each parsed expression `expr` in `path`, the `include` function
 actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
 
 Use [`Base.include`](@ref) to evaluate a file into another module.
+
+!!! compat "Julia 1.5"
+    Julia 1.5 is required for passing the `mapexpr` argument.
 """
 MainInclude.include
 
 function _start()
     empty!(ARGS)
     append!(ARGS, Core.ARGS)
-    if ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
-        # clear old invalid pointers
-        PCRE.__init__()
-    end
     try
         exec_options(JLOptions())
     catch
-        invokelatest(display_error, catch_stack())
+        invokelatest(display_error, scrub_repl_backtrace(current_exceptions()))
         exit(1)
     end
     if is_interactive && get(stdout, :color, false)
diff --git a/base/cmd.jl b/base/cmd.jl
index 210b77b11b1ca9..ecabb5c32b1d05 100644
--- a/base/cmd.jl
+++ b/base/cmd.jl
@@ -13,12 +13,14 @@ struct Cmd <: AbstractCmd
     flags::UInt32 # libuv process flags
     env::Union{Vector{String},Nothing}
     dir::String
+    cpus::Union{Nothing,Vector{UInt16}}
     Cmd(exec::Vector{String}) =
-        new(exec, false, 0x00, nothing, "")
-    Cmd(cmd::Cmd, ignorestatus, flags, env, dir) =
+        new(exec, false, 0x00, nothing, "", nothing)
+    Cmd(cmd::Cmd, ignorestatus, flags, env, dir, cpus = nothing) =
         new(cmd.exec, ignorestatus, flags, env,
-            dir === cmd.dir ? dir : cstr(dir))
+            dir === cmd.dir ? dir : cstr(dir), cpus)
     function Cmd(cmd::Cmd; ignorestatus::Bool=cmd.ignorestatus, env=cmd.env, dir::AbstractString=cmd.dir,
+                 cpus::Union{Nothing,Vector{UInt16}} = cmd.cpus,
                  detach::Bool = 0 != cmd.flags & UV_PROCESS_DETACHED,
                  windows_verbatim::Bool = 0 != cmd.flags & UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS,
                  windows_hide::Bool = 0 != cmd.flags & UV_PROCESS_WINDOWS_HIDE)
@@ -26,7 +28,7 @@ struct Cmd <: AbstractCmd
                 windows_verbatim * UV_PROCESS_WINDOWS_VERBATIM_ARGUMENTS |
                 windows_hide * UV_PROCESS_WINDOWS_HIDE
         new(cmd.exec, ignorestatus, flags, byteenv(env),
-            dir === cmd.dir ? dir : cstr(dir))
+            dir === cmd.dir ? dir : cstr(dir), cpus)
     end
 end
 
@@ -34,7 +36,8 @@ has_nondefault_cmd_flags(c::Cmd) =
     c.ignorestatus ||
     c.flags != 0x00 ||
     c.env !== nothing ||
-    c.dir !== ""
+    c.dir !== "" ||
+    c.cpus !== nothing
 
 """
     Cmd(cmd::Cmd; ignorestatus, detach, windows_verbatim, windows_hide, env, dir)
@@ -63,7 +66,7 @@ while changing the settings of the optional keyword arguments:
   array or tuple of `"var"=>val` pairs. In order to modify (rather than replace) the
   existing environment, initialize `env` with `copy(ENV)` and then set `env["var"]=val` as
   desired.  To add to an environment block within a `Cmd` object without replacing all
-  elements, use `addenv()` which will return a `Cmd` object with the updated environment.
+  elements, use [`addenv()`](@ref) which will return a `Cmd` object with the updated environment.
 * `dir::AbstractString`: Specify a working directory for the command (instead
   of the current directory).
 
@@ -103,13 +106,19 @@ shell_escape(cmd::Cmd; special::AbstractString="") =
     shell_escape(cmd.exec..., special=special)
 shell_escape_posixly(cmd::Cmd) =
     shell_escape_posixly(cmd.exec...)
-shell_escape_winsomely(cmd::Cmd) =
-    shell_escape_winsomely(cmd.exec...)
+shell_escape_csh(cmd::Cmd) =
+    shell_escape_csh(cmd.exec...)
+escape_microsoft_c_args(cmd::Cmd) =
+    escape_microsoft_c_args(cmd.exec...)
+escape_microsoft_c_args(io::IO, cmd::Cmd) =
+    escape_microsoft_c_args(io::IO, cmd.exec...)
 
 function show(io::IO, cmd::Cmd)
     print_env = cmd.env !== nothing
     print_dir = !isempty(cmd.dir)
     (print_env || print_dir) && print(io, "setenv(")
+    print_cpus = cmd.cpus !== nothing
+    print_cpus && print(io, "setcpuaffinity(")
     print(io, '`')
     join(io, map(cmd.exec) do arg
         replace(sprint(context=io) do io
@@ -119,6 +128,11 @@ function show(io::IO, cmd::Cmd)
         end, '`' => "\\`")
     end, ' ')
     print(io, '`')
+    if print_cpus
+        print(io, ", ")
+        show(io, collect(Int, something(cmd.cpus)))
+        print(io, ")")
+    end
     print_env && (print(io, ","); show(io, cmd.env))
     print_dir && (print(io, "; dir="); show(io, cmd.dir))
     (print_dir || print_env) && print(io, ")")
@@ -163,6 +177,7 @@ rawhandle(x::OS_HANDLE) = x
 if OS_HANDLE !== RawFD
     rawhandle(x::RawFD) = Libc._get_osfhandle(x)
 end
+setup_stdio(stdio::Union{DevNull,OS_HANDLE,RawFD}, ::Bool) = (stdio, false)
 
 const Redirectable = Union{IO, FileRedirect, RawFD, OS_HANDLE}
 const StdIOSet = NTuple{3, Redirectable}
@@ -228,48 +243,109 @@ byteenv(env::Union{AbstractVector{Pair{T,V}}, Tuple{Vararg{Pair{T,V}}}}) where {
     String[cstr(k*"="*string(v)) for (k,v) in env]
 
 """
-    setenv(command::Cmd, env; dir="")
+    setenv(command::Cmd, env; dir)
 
 Set environment variables to use when running the given `command`. `env` is either a
 dictionary mapping strings to strings, an array of strings of the form `"var=val"`, or
 zero or more `"var"=>val` pair arguments. In order to modify (rather than replace) the
 existing environment, create `env` through `copy(ENV)` and then setting `env["var"]=val`
-as desired, or use `addenv`.
+as desired, or use [`addenv`](@ref).
 
 The `dir` keyword argument can be used to specify a working directory for the command.
+`dir` defaults to the currently set `dir` for `command` (which is the current working
+directory if not specified already).
+
+See also [`Cmd`](@ref), [`addenv`](@ref), [`ENV`](@ref), [`pwd`](@ref).
 """
-setenv(cmd::Cmd, env; dir="") = Cmd(cmd; env=byteenv(env), dir=dir)
-setenv(cmd::Cmd, env::Pair{<:AbstractString}...; dir="") =
+setenv(cmd::Cmd, env; dir=cmd.dir) = Cmd(cmd; env=byteenv(env), dir=dir)
+setenv(cmd::Cmd, env::Pair{<:AbstractString}...; dir=cmd.dir) =
     setenv(cmd, env; dir=dir)
-setenv(cmd::Cmd; dir="") = Cmd(cmd; dir=dir)
+setenv(cmd::Cmd; dir=cmd.dir) = Cmd(cmd; dir=dir)
+
+# split environment entry string into before and after first `=` (key and value)
+function splitenv(e::String)
+    i = findnext('=', e, 2)
+    if i === nothing
+        throw(ArgumentError("malformed environment entry"))
+    end
+    e[1:prevind(e, i)], e[nextind(e, i):end]
+end
 
 """
-    addenv(command::Cmd, env...)
+    addenv(command::Cmd, env...; inherit::Bool = true)
+
+Merge new environment mappings into the given [`Cmd`](@ref) object, returning a new `Cmd` object.
+Duplicate keys are replaced.  If `command` does not contain any environment values set already,
+it inherits the current environment at time of `addenv()` call if `inherit` is `true`.
+Keys with value `nothing` are deleted from the env.
+
+See also [`Cmd`](@ref), [`setenv`](@ref), [`ENV`](@ref).
 
-Merge new environment mappings into the given `Cmd` object, returning a new `Cmd` object.
-Duplicate keys are replaced.
+!!! compat "Julia 1.6"
+    This function requires Julia 1.6 or later.
 """
-function addenv(cmd::Cmd, env::Dict)
+function addenv(cmd::Cmd, env::Dict; inherit::Bool = true)
     new_env = Dict{String,String}()
-    if cmd.env !== nothing
-        for (k, v) in split.(cmd.env, "=")
+    if cmd.env === nothing
+        if inherit
+            merge!(new_env, ENV)
+        end
+    else
+        for (k, v) in splitenv.(cmd.env)
             new_env[string(k)::String] = string(v)::String
         end
     end
     for (k, v) in env
-        new_env[string(k)::String] = string(v)::String
+        if v === nothing
+            delete!(new_env, string(k)::String)
+        else
+            new_env[string(k)::String] = string(v)::String
+        end
     end
     return setenv(cmd, new_env)
 end
 
-function addenv(cmd::Cmd, pairs::Pair{<:AbstractString}...)
-    return addenv(cmd, Dict(k => v for (k, v) in pairs))
+function addenv(cmd::Cmd, pairs::Pair{<:AbstractString}...; inherit::Bool = true)
+    return addenv(cmd, Dict(k => v for (k, v) in pairs); inherit)
 end
 
-function addenv(cmd::Cmd, env::Vector{<:AbstractString})
-    return addenv(cmd, Dict(k => v for (k, v) in split.(env, "=")))
+function addenv(cmd::Cmd, env::Vector{<:AbstractString}; inherit::Bool = true)
+    return addenv(cmd, Dict(k => v for (k, v) in splitenv.(env)); inherit)
 end
 
+"""
+    setcpuaffinity(original_command::Cmd, cpus) -> command::Cmd
+
+Set the CPU affinity of the `command` by a list of CPU IDs (1-based) `cpus`.  Passing
+`cpus = nothing` means to unset the CPU affinity if the `original_command` has any.
+
+This function is supported only in Linux and Windows.  It is not supported in macOS because
+libuv does not support affinity setting.
+
+!!! compat "Julia 1.8"
+    This function requires at least Julia 1.8.
+
+# Examples
+
+In Linux, the `taskset` command line program can be used to see how `setcpuaffinity` works.
+
+```julia
+julia> run(setcpuaffinity(`sh -c 'taskset -p \$\$'`, [1, 2, 5]));
+pid 2273's current affinity mask: 13
+```
+
+Note that the mask value `13` reflects that the first, second, and the fifth bits (counting
+from the least significant position) are turned on:
+
+```julia
+julia> 0b010011
+0x13
+```
+"""
+function setcpuaffinity end
+setcpuaffinity(cmd::Cmd, ::Nothing) = Cmd(cmd; cpus = nothing)
+setcpuaffinity(cmd::Cmd, cpus) = Cmd(cmd; cpus = collect(UInt16, cpus))
+
 (&)(left::AbstractCmd, right::AbstractCmd) = AndCmds(left, right)
 redir_out(src::AbstractCmd, dest::AbstractCmd) = OrCmds(src, dest)
 redir_err(src::AbstractCmd, dest::AbstractCmd) = ErrOrCmds(src, dest)
diff --git a/base/combinatorics.jl b/base/combinatorics.jl
index 9469452735da23..2dd69fbce4c42b 100644
--- a/base/combinatorics.jl
+++ b/base/combinatorics.jl
@@ -91,7 +91,7 @@ function isperm(P::Tuple)
     end
 end
 
-isperm(P::Any16) = _isperm(P)
+isperm(P::Any32) = _isperm(P)
 
 # swap columns i and j of a, in-place
 function swapcols!(a::AbstractMatrix, i, j)
@@ -103,6 +103,18 @@ function swapcols!(a::AbstractMatrix, i, j)
         @inbounds a[k,i],a[k,j] = a[k,j],a[k,i]
     end
 end
+
+# swap rows i and j of a, in-place
+function swaprows!(a::AbstractMatrix, i, j)
+    i == j && return
+    rows = axes(a,1)
+    @boundscheck i in rows || throw(BoundsError(a, (:,i)))
+    @boundscheck j in rows || throw(BoundsError(a, (:,j)))
+    for k in axes(a,2)
+        @inbounds a[i,k],a[j,k] = a[j,k],a[i,k]
+    end
+end
+
 # like permute!! applied to each row of a, in-place in a (overwriting p).
 function permutecols!!(a::AbstractMatrix, p::AbstractVector{<:Integer})
     require_one_based_indexing(a, p)
@@ -228,8 +240,15 @@ invpermute!(a, p::AbstractVector) = invpermute!!(a, copymutable(p))
 Return the inverse permutation of `v`.
 If `B = A[v]`, then `A == B[invperm(v)]`.
 
+See also [`sortperm`](@ref), [`invpermute!`](@ref), [`isperm`](@ref), [`permutedims`](@ref).
+
 # Examples
 ```jldoctest
+julia> p = (2, 3, 1);
+
+julia> invperm(p)
+(3, 1, 2)
+
 julia> v = [2; 4; 3; 1];
 
 julia> invperm(v)
@@ -286,7 +305,7 @@ function invperm(P::Tuple)
     end
 end
 
-invperm(P::Any16) = Tuple(invperm(collect(P)))
+invperm(P::Any32) = Tuple(invperm(collect(P)))
 
 #XXX This function should be moved to Combinatorics.jl but is currently used by Base.DSP.
 """
@@ -307,7 +326,7 @@ julia> 2^2 * 3^3
 !!! compat "Julia 1.6"
     The method that accepts a tuple requires Julia 1.6 or later.
 """
-function nextprod(a::Union{Tuple{Vararg{<:Integer}},AbstractVector{<:Integer}}, x::Real)
+function nextprod(a::Union{Tuple{Vararg{Integer}},AbstractVector{<:Integer}}, x::Real)
     if x > typemax(Int)
         throw(ArgumentError("unsafe for x > typemax(Int), got $x"))
     end
diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl
index d4f92a3b8176f3..36ab6b81f47a06 100644
--- a/base/compiler/abstractinterpretation.jl
+++ b/base/compiler/abstractinterpretation.jl
@@ -4,160 +4,226 @@
 # constants #
 #############
 
-const CoreNumType = Union{Int32, Int64, Float32, Float64}
-
 const _REF_NAME = Ref.body.name
 
 #########
 # logic #
 #########
 
-# see if the inference result might affect the final answer
-call_result_unused(frame::InferenceState, pc::LineNum=frame.currpc) =
-    isexpr(frame.src.code[frame.currpc], :call) && isempty(frame.ssavalue_uses[pc])
+# See if the inference result of the current statement's result value might affect
+# the final answer for the method (aside from optimization potential and exceptions).
+# To do that, we need to check both for slot assignment and SSA usage.
+call_result_unused(frame::InferenceState) =
+    isexpr(frame.src.code[frame.currpc], :call) && isempty(frame.ssavalue_uses[frame.currpc])
 
+function get_max_methods(mod::Module, interp::AbstractInterpreter)
+    max_methods = ccall(:jl_get_module_max_methods, Cint, (Any,), mod) % Int
+    max_methods < 0 ? InferenceParams(interp).MAX_METHODS : max_methods
+end
 
-function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f), argtypes::Vector{Any}, @nospecialize(atype), sv::InferenceState,
-                                  max_methods::Int = InferenceParams(interp).MAX_METHODS)
-    if sv.params.unoptimize_throw_blocks && sv.currpc in sv.throw_blocks
-        return CallMeta(Any, false)
+function get_max_methods(@nospecialize(f), mod::Module, interp::AbstractInterpreter)
+    if f !== nothing
+        fmm = typeof(f).name.max_methods
+        fmm !== UInt8(0) && return Int(fmm)
     end
-    valid_worlds = WorldRange()
-    atype_params = unwrap_unionall(atype).parameters
-    splitunions = 1 < countunionsplit(atype_params) <= InferenceParams(interp).MAX_UNION_SPLITTING
-    mts = Core.MethodTable[]
-    fullmatch = Bool[]
-    if splitunions
-        splitsigs = switchtupleunion(atype)
-        applicable = Any[]
-        infos = MethodMatchInfo[]
-        for sig_n in splitsigs
-            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
-            if mt === nothing
-                add_remark!(interp, sv, "Could not identify method table for call")
-                return CallMeta(Any, false)
-            end
-            mt = mt::Core.MethodTable
-            matches = findall(sig_n, method_table(interp); limit=max_methods)
-            if matches === missing
-                add_remark!(interp, sv, "For one of the union split cases, too many methods matched")
-                return CallMeta(Any, false)
-            end
-            push!(infos, MethodMatchInfo(matches))
-            append!(applicable, matches)
-            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
-            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
-            found = false
-            for (i, mt′) in enumerate(mts)
-                if mt′ === mt
-                    fullmatch[i] &= thisfullmatch
-                    found = true
-                    break
-                end
-            end
-            if !found
-                push!(mts, mt)
-                push!(fullmatch, thisfullmatch)
+    return get_max_methods(mod, interp)
+end
+
+const empty_bitset = BitSet()
+
+function should_infer_for_effects(sv::InferenceState)
+    sv.ipo_effects.terminates === ALWAYS_TRUE &&
+    sv.ipo_effects.effect_free === ALWAYS_TRUE
+end
+
+function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
+                                  arginfo::ArgInfo, @nospecialize(atype),
+                                  sv::InferenceState, max_methods::Int)
+    if !should_infer_for_effects(sv) &&
+            sv.params.unoptimize_throw_blocks &&
+            is_stmt_throw_block(get_curr_ssaflag(sv))
+        # Disable inference of calls in throw blocks, since we're unlikely to
+        # need their types. There is one exception however: If up until now, the
+        # function has not seen any side effects, we would like to make sure there
+        # aren't any in the throw block either to enable other optimizations.
+        add_remark!(interp, sv, "Skipped call in throw block")
+        nonoverlayed = false
+        if isoverlayed(method_table(interp)) && is_nonoverlayed(sv.ipo_effects)
+            # as we may want to concrete-evaluate this frame in cases when there are
+            # no overlayed calls, try an additional effort now to check if this call
+            # isn't overlayed rather than just handling it conservatively
+            matches = find_matching_methods(arginfo.argtypes, atype, method_table(interp),
+            InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
+            if !isa(matches, FailedMethodMatch)
+                nonoverlayed = matches.nonoverlayed
             end
+        else
+            nonoverlayed = true
         end
-        info = UnionSplitInfo(infos)
-    else
-        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
-        if mt === nothing
-            add_remark!(interp, sv, "Could not identify method table for call")
-            return CallMeta(Any, false)
-        end
-        mt = mt::Core.MethodTable
-        matches = findall(atype, method_table(interp, sv); limit=max_methods)
-        if matches === missing
-            # this means too many methods matched
-            # (assume this will always be true, so we don't compute / update valid age in this case)
-            add_remark!(interp, sv, "Too many methods matched")
-            return CallMeta(Any, false)
-        end
-        push!(mts, mt)
-        push!(fullmatch, _any(match->(match::MethodMatch).fully_covers, matches))
-        info = MethodMatchInfo(matches)
-        applicable = matches.matches
-        valid_worlds = matches.valid_worlds
+        # At this point we are guaranteed to end up throwing on this path,
+        # which is all that's required for :consistent-cy. Of course, we don't
+        # know anything else about this statement.
+        tristate_merge!(sv, Effects(; consistent=ALWAYS_TRUE, nonoverlayed))
+        return CallMeta(Any, false)
     end
+
+    argtypes = arginfo.argtypes
+    matches = find_matching_methods(argtypes, atype, method_table(interp),
+        InferenceParams(interp).MAX_UNION_SPLITTING, max_methods)
+    if isa(matches, FailedMethodMatch)
+        add_remark!(interp, sv, matches.reason)
+        tristate_merge!(sv, Effects())
+        return CallMeta(Any, false)
+    end
+
+    (; valid_worlds, applicable, info) = matches
     update_valid_age!(sv, valid_worlds)
-    applicable = applicable::Array{Any,1}
     napplicable = length(applicable)
     rettype = Bottom
-    edgecycle = false
-    edges = Any[]
-    nonbot = 0  # the index of the only non-Bottom inference result if > 0
-    seen = 0    # number of signatures actually inferred
-    istoplevel = sv.linfo.def isa Module
+    edges = MethodInstance[]
+    conditionals = nothing # keeps refinement information of call argument types when the return type is boolean
+    seen = 0               # number of signatures actually inferred
+    any_const_result = false
+    const_results = Union{Nothing,ConstResult}[]
     multiple_matches = napplicable > 1
-
-    if f !== nothing && napplicable == 1 && is_method_pure(applicable[1]::MethodMatch)
-        val = pure_eval_call(f, argtypes)
-        if val !== false
-            # TODO: add some sort of edge(s)
-            return CallMeta(val, MethodResultPure())
-        end
+    fargs = arginfo.fargs
+    all_effects = EFFECTS_TOTAL
+    if !matches.nonoverlayed
+        # currently we don't have a good way to execute the overlayed method definition,
+        # so we should give up pure/concrete eval when any of the matched methods is overlayed
+        f = nothing
+        all_effects = Effects(all_effects; nonoverlayed=false)
     end
 
+    # try pure-evaluation
+    val = pure_eval_call(interp, f, applicable, arginfo, sv)
+    val !== nothing && return CallMeta(val, MethodResultPure(info)) # TODO: add some sort of edge(s)
+
     for i in 1:napplicable
         match = applicable[i]::MethodMatch
         method = match.method
         sig = match.spec_types
-        if istoplevel && !isdispatchtuple(sig)
+        if bail_out_toplevel_call(interp, sig, sv)
             # only infer concrete call sites in top-level expressions
             add_remark!(interp, sv, "Refusing to infer non-concrete call site in top-level expression")
             rettype = Any
             break
         end
-        sigtuple = unwrap_unionall(sig)::DataType
-        splitunions = false
         this_rt = Bottom
-        # TODO: splitunions = 1 < countunionsplit(sigtuple.parameters) * napplicable <= InferenceParams(interp).MAX_UNION_SPLITTING
-        # currently this triggers a bug in inference recursion detection
+        splitunions = false
+        # TODO: this used to trigger a bug in inference recursion detection, and is unmaintained now
+        # sigtuple = unwrap_unionall(sig)::DataType
+        # splitunions = 1 < unionsplitcost(sigtuple.parameters) * napplicable <= InferenceParams(interp).MAX_UNION_SPLITTING
         if splitunions
             splitsigs = switchtupleunion(sig)
             for sig_n in splitsigs
-                rt, edgecycle1, edge = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, sv)
-                if edge !== nothing
-                    push!(edges, edge)
+                result = abstract_call_method(interp, method, sig_n, svec(), multiple_matches, sv)
+                rt = result.rt
+                edge = result.edge
+                edge !== nothing && push!(edges, edge)
+                this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
+                this_arginfo = ArgInfo(fargs, this_argtypes)
+                const_call_result = abstract_call_method_with_const_args(interp, result,
+                    f, this_arginfo, match, sv)
+                effects = result.edge_effects
+                const_result = nothing
+                if const_call_result !== nothing
+                    const_rt = const_call_result.rt
+                    if const_rt ⊑ rt
+                        rt = const_rt
+                        (; effects, const_result) = const_call_result
+                    end
                 end
-                edgecycle |= edgecycle1::Bool
+                all_effects = tristate_merge(all_effects, effects)
+                push!(const_results, const_result)
+                any_const_result |= const_result !== nothing
                 this_rt = tmerge(this_rt, rt)
-                this_rt === Any && break
+                if bail_out_call(interp, this_rt, sv)
+                    break
+                end
             end
+            this_conditional = ignorelimited(this_rt)
+            this_rt = widenwrappedconditional(this_rt)
         else
-            this_rt, edgecycle1, edge = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, sv)
-            edgecycle |= edgecycle1::Bool
-            if edge !== nothing
-                push!(edges, edge)
+            if infer_compilation_signature(interp)
+                # Also infer the compilation signature for this method, so it's available
+                # to the compiler in case it ends up needing it (which is likely).
+                csig = get_compileable_sig(method, sig, match.sparams)
+                if csig !== nothing && csig !== sig
+                    # The result of this inference is not directly used, so temporarily empty
+                    # the use set for the current SSA value.
+                    saved_uses = sv.ssavalue_uses[sv.currpc]
+                    sv.ssavalue_uses[sv.currpc] = empty_bitset
+                    abstract_call_method(interp, method, csig, match.sparams, multiple_matches, sv)
+                    sv.ssavalue_uses[sv.currpc] = saved_uses
+                end
             end
-        end
-        if this_rt !== Bottom
-            if nonbot === 0
-                nonbot = i
-            else
-                nonbot = -1
+
+            result = abstract_call_method(interp, method, sig, match.sparams, multiple_matches, sv)
+            this_conditional = ignorelimited(result.rt)
+            this_rt = widenwrappedconditional(result.rt)
+            edge = result.edge
+            edge !== nothing && push!(edges, edge)
+            # try constant propagation with argtypes for this match
+            # this is in preparation for inlining, or improving the return result
+            this_argtypes = isa(matches, MethodMatches) ? argtypes : matches.applicable_argtypes[i]
+            this_arginfo = ArgInfo(fargs, this_argtypes)
+            const_call_result = abstract_call_method_with_const_args(interp, result,
+                f, this_arginfo, match, sv)
+            effects = result.edge_effects
+            const_result = nothing
+            if const_call_result !== nothing
+                this_const_conditional = ignorelimited(const_call_result.rt)
+                this_const_rt = widenwrappedconditional(const_call_result.rt)
+                # return type of const-prop' inference can be wider than that of non const-prop' inference
+                # e.g. in cases when there are cycles but cached result is still accurate
+                if this_const_rt ⊑ this_rt
+                    this_conditional = this_const_conditional
+                    this_rt = this_const_rt
+                    (; effects, const_result) = const_call_result
+                end
             end
+            all_effects = tristate_merge(all_effects, effects)
+            push!(const_results, const_result)
+            any_const_result |= const_result !== nothing
         end
+        @assert !(this_conditional isa Conditional) "invalid lattice element returned from inter-procedural context"
         seen += 1
         rettype = tmerge(rettype, this_rt)
-        rettype === Any && break
-    end
-    # try constant propagation if only 1 method is inferred to non-Bottom
-    # this is in preparation for inlining, or improving the return result
-    is_unused = call_result_unused(sv)
-    if nonbot > 0 && seen == napplicable && (!edgecycle || !is_unused) && isa(rettype, Type) && InferenceParams(interp).ipo_constant_propagation
-        # if there's a possibility we could constant-propagate a better result
-        # (hopefully without doing too much work), try to do that now
-        # TODO: it feels like this could be better integrated into abstract_call_method / typeinf_edge
-        const_rettype = abstract_call_method_with_const_args(interp, rettype, f, argtypes, applicable[nonbot]::MethodMatch, sv, edgecycle)
-        if const_rettype ⊑ rettype
-            # use the better result, if it's a refinement of rettype
-            rettype = const_rettype
-        end
-    end
-    if is_unused && !(rettype === Bottom)
+        if this_conditional !== Bottom && is_lattice_bool(rettype) && fargs !== nothing
+            if conditionals === nothing
+                conditionals = Any[Bottom for _ in 1:length(argtypes)],
+                               Any[Bottom for _ in 1:length(argtypes)]
+            end
+            for i = 1:length(argtypes)
+                cnd = conditional_argtype(this_conditional, sig, argtypes, i)
+                conditionals[1][i] = tmerge(conditionals[1][i], cnd.vtype)
+                conditionals[2][i] = tmerge(conditionals[2][i], cnd.elsetype)
+            end
+        end
+        if bail_out_call(interp, rettype, sv)
+            break
+        end
+    end
+
+    if any_const_result && seen == napplicable
+        @assert napplicable == nmatches(info) == length(const_results)
+        info = ConstCallInfo(info, const_results)
+    end
+
+    if seen != napplicable
+        # there may be unanalyzed effects within unseen dispatch candidate,
+        # but we can still ignore nonoverlayed effect here since we already accounted for it
+        all_effects = tristate_merge(all_effects, EFFECTS_UNKNOWN)
+    elseif isa(matches, MethodMatches) ? (!matches.fullmatch || any_ambig(matches)) :
+            (!_all(b->b, matches.fullmatches) || any_ambig(matches))
+        # Account for the fact that we may encounter a MethodError with a non-covered or ambiguous signature.
+        all_effects = Effects(all_effects; nothrow=TRISTATE_UNKNOWN)
+    end
+
+    rettype = from_interprocedural!(rettype, sv, arginfo, conditionals)
+
+    if call_result_unused(sv) && !(rettype === Bottom)
         add_remark!(interp, sv, "Call result type was widened because the return value is unused")
         # We're mainly only here because the optimizer might want this code,
         # but we ourselves locally don't typically care about it locally
@@ -167,178 +233,278 @@ function abstract_call_gf_by_type(interp::AbstractInterpreter, @nospecialize(f),
         # and avoid keeping track of a more complex result type.
         rettype = Any
     end
-    if !(rettype === Any) # adding a new method couldn't refine (widen) this type
-        for edge in edges
-            add_backedge!(edge::MethodInstance, sv)
-        end
-        for (thisfullmatch, mt) in zip(fullmatch, mts)
-            if !thisfullmatch
-                # also need an edge to the method table in case something gets
-                # added that did not intersect with any existing method
-                add_mt_backedge!(mt, atype, sv)
-            end
+    add_call_backedges!(interp, rettype, all_effects, edges, matches, atype, sv)
+    if !isempty(sv.pclimitations) # remove self, if present
+        delete!(sv.pclimitations, sv)
+        for caller in sv.callers_in_cycle
+            delete!(sv.pclimitations, caller)
         end
     end
-    #print("=> ", rettype, "\n")
+    tristate_merge!(sv, all_effects)
     return CallMeta(rettype, info)
 end
 
+struct FailedMethodMatch
+    reason::String
+end
 
-function const_prop_profitable(@nospecialize(arg))
-    # have new information from argtypes that wasn't available from the signature
-    if isa(arg, PartialStruct)
-        for b in arg.fields
-            isconstType(b) && return true
-            const_prop_profitable(b) && return true
-        end
-    elseif !isa(arg, Const) || (isa(arg.val, Symbol) || isa(arg.val, Type) || (!isa(arg.val, String) && !ismutable(arg.val)))
-        # don't consider mutable values or Strings useful constants
-        return true
-    end
-    return false
+struct MethodMatches
+    applicable::Vector{Any}
+    info::MethodMatchInfo
+    valid_worlds::WorldRange
+    mt::Core.MethodTable
+    fullmatch::Bool
+    nonoverlayed::Bool
 end
+any_ambig(info::MethodMatchInfo) = info.results.ambig
+any_ambig(m::MethodMatches) = any_ambig(m.info)
 
-# This is a heuristic to avoid trying to const prop through complicated functions
-# where we would spend a lot of time, but are probably unliekly to get an improved
-# result anyway.
-function const_prop_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance)
-    # Peek at the inferred result for the function to determine if the optimizer
-    # was able to cut it down to something simple (inlineable in particular).
-    # If so, there's a good chance we might be able to const prop all the way
-    # through and learn something new.
-    code = get(code_cache(interp), mi, nothing)
-    declared_inline = isdefined(method, :source) && ccall(:jl_ir_flag_inlineable, Bool, (Any,), method.source)
-    cache_inlineable = declared_inline
-    if isdefined(code, :inferred) && !cache_inlineable
-        cache_inf = code.inferred
-        if !(cache_inf === nothing)
-            cache_src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), cache_inf)
-            cache_src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), cache_inf)
-            cache_inlineable = cache_src_inferred && cache_src_inlineable
-        end
-    end
-    if !cache_inlineable
-        return false
-    end
-    return true
+struct UnionSplitMethodMatches
+    applicable::Vector{Any}
+    applicable_argtypes::Vector{Vector{Any}}
+    info::UnionSplitInfo
+    valid_worlds::WorldRange
+    mts::Vector{Core.MethodTable}
+    fullmatches::Vector{Bool}
+    nonoverlayed::Bool
 end
+any_ambig(m::UnionSplitMethodMatches) = _any(any_ambig, m.info.matches)
 
-function abstract_call_method_with_const_args(interp::AbstractInterpreter, @nospecialize(rettype), @nospecialize(f), argtypes::Vector{Any}, match::MethodMatch, sv::InferenceState, edgecycle::Bool)
-    method = match.method
-    nargs::Int = method.nargs
-    method.isva && (nargs -= 1)
-    length(argtypes) >= nargs || return Any
-    haveconst = false
-    allconst = true
-    # see if any or all of the arguments are constant and propagating constants may be worthwhile
-    for a in argtypes
-        a = widenconditional(a)
-        if allconst && !isa(a, Const) && !isconstType(a) && !isa(a, PartialStruct)
-            allconst = false
+function find_matching_methods(argtypes::Vector{Any}, @nospecialize(atype), method_table::MethodTableView,
+                               union_split::Int, max_methods::Int)
+    # NOTE this is valid as far as any "constant" lattice element doesn't represent `Union` type
+    if 1 < unionsplitcost(argtypes) <= union_split
+        split_argtypes = switchtupleunion(argtypes)
+        infos = MethodMatchInfo[]
+        applicable = Any[]
+        applicable_argtypes = Vector{Any}[] # arrays like `argtypes`, including constants, for each match
+        valid_worlds = WorldRange()
+        mts = Core.MethodTable[]
+        fullmatches = Bool[]
+        nonoverlayed = true
+        for i in 1:length(split_argtypes)
+            arg_n = split_argtypes[i]::Vector{Any}
+            sig_n = argtypes_to_type(arg_n)
+            mt = ccall(:jl_method_table_for, Any, (Any,), sig_n)
+            mt === nothing && return FailedMethodMatch("Could not identify method table for call")
+            mt = mt::Core.MethodTable
+            result = findall(sig_n, method_table; limit = max_methods)
+            if result === missing
+                return FailedMethodMatch("For one of the union split cases, too many methods matched")
+            end
+            matches, overlayed = result
+            nonoverlayed &= !overlayed
+            push!(infos, MethodMatchInfo(matches))
+            for m in matches
+                push!(applicable, m)
+                push!(applicable_argtypes, arg_n)
+            end
+            valid_worlds = intersect(valid_worlds, matches.valid_worlds)
+            thisfullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+            found = false
+            for (i, mt′) in enumerate(mts)
+                if mt′ === mt
+                    fullmatches[i] &= thisfullmatch
+                    found = true
+                    break
+                end
+            end
+            if !found
+                push!(mts, mt)
+                push!(fullmatches, thisfullmatch)
+            end
         end
-        if !haveconst && has_nontrivial_const_info(a) && const_prop_profitable(a)
-            haveconst = true
+        return UnionSplitMethodMatches(applicable,
+                                       applicable_argtypes,
+                                       UnionSplitInfo(infos),
+                                       valid_worlds,
+                                       mts,
+                                       fullmatches,
+                                       nonoverlayed)
+    else
+        mt = ccall(:jl_method_table_for, Any, (Any,), atype)
+        if mt === nothing
+            return FailedMethodMatch("Could not identify method table for call")
         end
-        if haveconst && !allconst
-            break
+        mt = mt::Core.MethodTable
+        result = findall(atype, method_table; limit = max_methods)
+        if result === missing
+            # this means too many methods matched
+            # (assume this will always be true, so we don't compute / update valid age in this case)
+            return FailedMethodMatch("Too many methods matched")
+        end
+        matches, overlayed = result
+        fullmatch = _any(match->(match::MethodMatch).fully_covers, matches)
+        return MethodMatches(matches.matches,
+                             MethodMatchInfo(matches),
+                             matches.valid_worlds,
+                             mt,
+                             fullmatch,
+                             !overlayed)
+    end
+end
+
+"""
+    from_interprocedural!(rt, sv::InferenceState, arginfo::ArgInfo, maybecondinfo) -> newrt
+
+Converts inter-procedural return type `rt` into a local lattice element `newrt`,
+that is appropriate in the context of current local analysis frame `sv`, especially:
+- unwraps `rt::LimitedAccuracy` and collects its limitations into the current frame `sv`
+- converts boolean `rt` to new boolean `newrt` in a way `newrt` can propagate extra conditional
+  refinement information, e.g. translating `rt::InterConditional` into `newrt::Conditional`
+  that holds a type constraint information about a variable in `sv`
+
+This function _should_ be used wherever we propagate results returned from
+`abstract_call_method` or `abstract_call_method_with_const_args`.
+
+When `maybecondinfo !== nothing`, this function also tries extra conditional argument type refinement.
+In such cases `maybecondinfo` should be either of:
+- `maybecondinfo::Tuple{Vector{Any},Vector{Any}}`: precomputed argument type refinement information
+- method call signature tuple type
+When we deal with multiple `MethodMatch`es, it's better to precompute `maybecondinfo` by
+`tmerge`ing argument signature type of each method call.
+"""
+function from_interprocedural!(@nospecialize(rt), sv::InferenceState, arginfo::ArgInfo, @nospecialize(maybecondinfo))
+    rt = collect_limitations!(rt, sv)
+    if is_lattice_bool(rt)
+        if maybecondinfo === nothing
+            rt = widenconditional(rt)
+        else
+            rt = from_interconditional(rt, sv, arginfo, maybecondinfo)
         end
     end
-    haveconst || improvable_via_constant_propagation(rettype) || return Any
-    if nargs > 1
-        if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
-            arrty = argtypes[2]
-            # don't propagate constant index into indexing of non-constant array
-            if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
-                return Any
-            elseif arrty ⊑ Array
-                return Any
+    @assert !(rt isa InterConditional) "invalid lattice element returned from inter-procedural context"
+    return rt
+end
+
+function collect_limitations!(@nospecialize(typ), sv::InferenceState)
+    if isa(typ, LimitedAccuracy)
+        union!(sv.pclimitations, typ.causes)
+        return typ.typ
+    end
+    return typ
+end
+
+function from_interconditional(@nospecialize(typ), sv::InferenceState, (; fargs, argtypes)::ArgInfo, @nospecialize(maybecondinfo))
+    fargs === nothing && return widenconditional(typ)
+    slot = 0
+    vtype = elsetype = Any
+    condval = maybe_extract_const_bool(typ)
+    for i in 1:length(fargs)
+        # find the first argument which supports refinement,
+        # and intersect all equivalent arguments with it
+        arg = ssa_def_slot(fargs[i], sv)
+        arg isa SlotNumber || continue # can't refine
+        old = argtypes[i]
+        old isa Type || continue # unlikely to refine
+        id = slot_id(arg)
+        if slot == 0 || id == slot
+            if isa(maybecondinfo, Tuple{Vector{Any},Vector{Any}})
+                # if we have already computed argument refinement information, apply that now to get the result
+                new_vtype = maybecondinfo[1][i]
+                new_elsetype = maybecondinfo[2][i]
+            else
+                # otherwise compute it on the fly
+                cnd = conditional_argtype(typ, maybecondinfo, argtypes, i)
+                new_vtype = cnd.vtype
+                new_elsetype = cnd.elsetype
             end
-        elseif istopfunction(f, :iterate)
-            itrty = argtypes[2]
-            if itrty ⊑ Array
-                return Any
+            if condval === false
+                vtype = Bottom
+            elseif new_vtype ⊑ vtype
+                vtype = new_vtype
+            else
+                vtype = tmeet(vtype, widenconst(new_vtype))
+            end
+            if condval === true
+                elsetype = Bottom
+            elseif new_elsetype ⊑ elsetype
+                elsetype = new_elsetype
+            else
+                elsetype = tmeet(elsetype, widenconst(new_elsetype))
+            end
+            if (slot > 0 || condval !== false) && vtype ⋤ old
+                slot = id
+            elseif (slot > 0 || condval !== true) && elsetype ⋤ old
+                slot = id
+            else # reset: no new useful information for this slot
+                vtype = elsetype = Any
+                if slot > 0
+                    slot = 0
+                end
             end
         end
     end
-    if !allconst && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
-                     istopfunction(f, :(==)) || istopfunction(f, :!=) ||
-                     istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
-                     istopfunction(f, :<<) || istopfunction(f, :>>))
-        return Any
+    if vtype === Bottom && elsetype === Bottom
+        return Bottom # accidentally proved this call to be dead / throw !
+    elseif slot > 0
+        return Conditional(SlotNumber(slot), vtype, elsetype) # record a Conditional improvement to this slot
     end
-    force_inference = allconst || InferenceParams(interp).aggressive_constant_propagation
-    if istopfunction(f, :getproperty) || istopfunction(f, :setproperty!)
-        force_inference = true
+    return widenconditional(typ)
+end
+
+function conditional_argtype(@nospecialize(rt), @nospecialize(sig), argtypes::Vector{Any}, i::Int)
+    if isa(rt, InterConditional) && rt.slot == i
+        return rt
+    else
+        vtype = elsetype = tmeet(argtypes[i], fieldtype(sig, i))
+        condval = maybe_extract_const_bool(rt)
+        condval === true && (elsetype = Bottom)
+        condval === false && (vtype = Bottom)
+        return InterConditional(i, vtype, elsetype)
     end
-    mi = specialize_method(match, !force_inference)
-    mi === nothing && return Any
-    mi = mi::MethodInstance
-    # decide if it's likely to be worthwhile
-    if !force_inference && !const_prop_heuristic(interp, method, mi)
-        return Any
+end
+
+function add_call_backedges!(interp::AbstractInterpreter,
+    @nospecialize(rettype), all_effects::Effects,
+    edges::Vector{MethodInstance}, matches::Union{MethodMatches,UnionSplitMethodMatches}, @nospecialize(atype),
+    sv::InferenceState)
+    # we don't need to add backedges when:
+    # - a new method couldn't refine (widen) this type and
+    # - the effects are known to not provide any useful IPO information
+    if rettype === Any
+        if !isoverlayed(method_table(interp))
+            # we can ignore the `nonoverlayed` property if `interp` doesn't use
+            # overlayed method table at all since it will never be tainted anyway
+            all_effects = Effects(all_effects; nonoverlayed=false)
+        end
+        if all_effects === Effects()
+            return
+        end
     end
-    inf_cache = get_inference_cache(interp)
-    inf_result = cache_lookup(mi, argtypes, inf_cache)
-    if inf_result === nothing
-        if edgecycle
-            # if there might be a cycle, check to make sure we don't end up
-            # calling ourselves here.
-            infstate = sv
-            cyclei = 0
-            while !(infstate === nothing)
-                if method === infstate.linfo.def && any(infstate.result.overridden_by_const)
-                    return Any
-                end
-                if cyclei < length(infstate.callers_in_cycle)
-                    cyclei += 1
-                    infstate = infstate.callers_in_cycle[cyclei]
-                else
-                    cyclei = 0
-                    infstate = infstate.parent
-                end
-            end
+    for edge in edges
+        add_backedge!(edge, sv)
+    end
+    # also need an edge to the method table in case something gets
+    # added that did not intersect with any existing method
+    if isa(matches, MethodMatches)
+        matches.fullmatch || add_mt_backedge!(matches.mt, atype, sv)
+    else
+        for (thisfullmatch, mt) in zip(matches.fullmatches, matches.mts)
+            thisfullmatch || add_mt_backedge!(mt, atype, sv)
         end
-        inf_result = InferenceResult(mi, argtypes)
-        frame = InferenceState(inf_result, #=cache=#false, interp)
-        frame === nothing && return Any # this is probably a bad generated function (unsound), but just ignore it
-        frame.limited = true
-        frame.parent = sv
-        push!(inf_cache, inf_result)
-        typeinf(interp, frame) || return Any
     end
-    result = inf_result.result
-    # if constant inference hits a cycle, just bail out
-    isa(result, InferenceState) && return Any
-    add_backedge!(inf_result.linfo, sv)
-    return result
 end
 
 const RECURSION_UNUSED_MSG = "Bounded recursion detected with unused result. Annotated return type may be wider than true result."
+const RECURSION_MSG = "Bounded recursion detected. Call was widened to force convergence."
 
 function abstract_call_method(interp::AbstractInterpreter, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, sv::InferenceState)
     if method.name === :depwarn && isdefined(Main, :Base) && method.module === Main.Base
         add_remark!(interp, sv, "Refusing to infer into `depwarn`")
-        return Any, false, nothing
+        return MethodCallResult(Any, false, false, nothing, Effects())
     end
     topmost = nothing
     # Limit argument type tuple growth of functions:
     # look through the parents list to see if there's a call to the same method
     # and from the same method.
     # Returns the topmost occurrence of that repeated edge.
-    cyclei = 0
-    infstate = sv
     edgecycle = false
-    # The `method_for_inference_heuristics` will expand the given method's generator if
-    # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
-    # The other `CodeInfo`s we inspect will already have this field inflated, so we just
-    # access it directly instead (to avoid regeneration).
-    method2 = method_for_inference_heuristics(method, sig, sparams) # Union{Method, Nothing}
-    sv_method2 = sv.src.method_for_inference_limit_heuristics # limit only if user token match
-    sv_method2 isa Method || (sv_method2 = nothing) # Union{Method, Nothing}
-    while !(infstate === nothing)
-        infstate = infstate::InferenceState
+    edgelimited = false
+
+    for infstate in InfStackUnwind(sv)
         if method === infstate.linfo.def
-            if infstate.linfo.specTypes == sig
+            if infstate.linfo.specTypes::Type == sig::Type
                 # avoid widening when detecting self-recursion
                 # TODO: merge call cycle and return right away
                 if call_result_unused(sv)
@@ -347,127 +513,578 @@ function abstract_call_method(interp::AbstractInterpreter, method::Method, @nosp
                     # we have a self-cycle in the call-graph, but not in the inference graph (typically):
                     # break this edge now (before we record it) by returning early
                     # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                    return Any, true, nothing
+                    return MethodCallResult(Any, true, true, nothing, Effects())
                 end
                 topmost = nothing
                 edgecycle = true
                 break
             end
-            inf_method2 = infstate.src.method_for_inference_limit_heuristics # limit only if user token match
-            inf_method2 isa Method || (inf_method2 = nothing) # Union{Method, Nothing}
-            if topmost === nothing && method2 === inf_method2
-                if hardlimit
-                    topmost = infstate
-                    edgecycle = true
-                else
-                    # if this is a soft limit,
-                    # also inspect the parent of this edge,
-                    # to see if they are the same Method as sv
-                    # in which case we'll need to ensure it is convergent
-                    # otherwise, we don't
-                    for parent in infstate.callers_in_cycle
-                        # check in the cycle list first
-                        # all items in here are mutual parents of all others
-                        parent_method2 = parent.src.method_for_inference_limit_heuristics # limit only if user token match
-                        parent_method2 isa Method || (parent_method2 = nothing) # Union{Method, Nothing}
-                        if parent.linfo.def === sv.linfo.def && sv_method2 === parent_method2
-                            topmost = infstate
-                            edgecycle = true
-                            break
-                        end
-                    end
-                    let parent = infstate.parent
-                        # then check the parent link
-                        if topmost === nothing && parent !== nothing
-                            parent = parent::InferenceState
-                            parent_method2 = parent.src.method_for_inference_limit_heuristics # limit only if user token match
-                            parent_method2 isa Method || (parent_method2 = nothing) # Union{Method, Nothing}
-                            if (parent.cached || parent.limited) && parent.linfo.def === sv.linfo.def && sv_method2 === parent_method2
-                                topmost = infstate
-                                edgecycle = true
-                            end
-                        end
-                    end
+            topmost === nothing || continue
+            if edge_matches_sv(infstate, method, sig, sparams, hardlimit, sv)
+                topmost = infstate
+                edgecycle = true
+            end
+        end
+    end
+
+    if topmost !== nothing
+        sigtuple = unwrap_unionall(sig)::DataType
+        msig = unwrap_unionall(method.sig)::DataType
+        spec_len = length(msig.parameters) + 1
+        ls = length(sigtuple.parameters)
+
+        if method === sv.linfo.def
+            # Under direct self-recursion, permit much greater use of reducers.
+            # here we assume that complexity(specTypes) :>= complexity(sig)
+            comparison = sv.linfo.specTypes
+            l_comparison = length((unwrap_unionall(comparison)::DataType).parameters)
+            spec_len = max(spec_len, l_comparison)
+        else
+            comparison = method.sig
+        end
+
+        if isdefined(method, :recursion_relation)
+            # We don't recquire the recursion_relation to be transitive, so
+            # apply a hard limit
+            hardlimit = true
+        end
+
+        # see if the type is actually too big (relative to the caller), and limit it if required
+        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : sv.linfo.specTypes, InferenceParams(interp).TUPLE_COMPLEXITY_LIMIT_DEPTH, spec_len)
+
+        if newsig !== sig
+            # continue inference, but note that we've limited parameter complexity
+            # on this call (to ensure convergence), so that we don't cache this result
+            if call_result_unused(sv)
+                add_remark!(interp, sv, RECURSION_UNUSED_MSG)
+                # if we don't (typically) actually care about this result,
+                # don't bother trying to examine some complex abstract signature
+                # since it's very unlikely that we'll try to inline this,
+                # or want make an invoke edge to its calling convention return type.
+                # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
+                return MethodCallResult(Any, true, true, nothing, Effects())
+            end
+            add_remark!(interp, sv, RECURSION_MSG)
+            topmost = topmost::InferenceState
+            parentframe = topmost.parent
+            poison_callstack(sv, parentframe === nothing ? topmost : parentframe)
+            sig = newsig
+            sparams = svec()
+            edgelimited = true
+        end
+    end
+
+    # if sig changed, may need to recompute the sparams environment
+    if isa(method.sig, UnionAll) && isempty(sparams)
+        recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), sig, method.sig)::SimpleVector
+        #@assert recomputed[1] !== Bottom
+        # We must not use `sig` here, since that may re-introduce structural complexity that
+        # our limiting heuristic sought to eliminate. The alternative would be to not increment depth over covariant contexts,
+        # but we prefer to permit inference of tuple-destructuring, so we don't do that right now
+        # For example, with a signature such as `Tuple{T, Ref{T}} where {T <: S}`
+        # we might want to limit this to `Tuple{S, Ref}`, while type-intersection can instead give us back the original type
+        # (which moves `S` back up to a lower comparison depth)
+        # Optionally, we could try to drive this to a fixed point, but I think this is getting too complex,
+        # and this would only cause more questions and more problems
+        # (the following is only an example, most of the statements are probable in the wrong order):
+        #     newsig = sig
+        #     seen = IdSet()
+        #     while !(newsig in seen)
+        #         push!(seen, newsig)
+        #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
+        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).TUPLE_COMPLEXITY_LIMIT_DEPTH, lsig)
+        #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
+        #         newsig = recomputed[2]
+        #     end
+        #     sig = ?
+        sparams = recomputed[2]::SimpleVector
+    end
+
+    (; rt, edge, edge_effects) = typeinf_edge(interp, method, sig, sparams, sv)
+    if edge === nothing
+        edgecycle = edgelimited = true
+    end
+
+    # we look for the termination effect override here as well, since the :terminates effect
+    # may have been tainted due to recursion at this point even if it's overridden
+    if is_effect_overridden(sv, :terminates_globally)
+        # this frame is known to terminate
+        edge_effects = Effects(edge_effects, terminates=ALWAYS_TRUE)
+    elseif is_effect_overridden(method, :terminates_globally)
+        # this edge is known to terminate
+        edge_effects = Effects(edge_effects; terminates=ALWAYS_TRUE)
+    elseif edgecycle
+        # Some sort of recursion was detected. Even if we did not limit types,
+        # we cannot guarantee that the call will terminate
+        edge_effects = Effects(edge_effects; terminates=TRISTATE_UNKNOWN)
+    end
+    return MethodCallResult(rt, edgecycle, edgelimited, edge, edge_effects)
+end
+
+function edge_matches_sv(frame::InferenceState, method::Method, @nospecialize(sig), sparams::SimpleVector, hardlimit::Bool, sv::InferenceState)
+    # The `method_for_inference_heuristics` will expand the given method's generator if
+    # necessary in order to retrieve this field from the generated `CodeInfo`, if it exists.
+    # The other `CodeInfo`s we inspect will already have this field inflated, so we just
+    # access it directly instead (to avoid regeneration).
+    callee_method2 = method_for_inference_heuristics(method, sig, sparams) # Union{Method, Nothing}
+
+    inf_method2 = frame.src.method_for_inference_limit_heuristics # limit only if user token match
+    inf_method2 isa Method || (inf_method2 = nothing)
+    if callee_method2 !== inf_method2
+        return false
+    end
+    if !hardlimit
+        # if this is a soft limit,
+        # also inspect the parent of this edge,
+        # to see if they are the same Method as sv
+        # in which case we'll need to ensure it is convergent
+        # otherwise, we don't
+
+        # check in the cycle list first
+        # all items in here are mutual parents of all others
+        if !_any(p::InferenceState->matches_sv(p, sv), frame.callers_in_cycle)
+            let parent = frame.parent
+                parent !== nothing || return false
+                parent = parent::InferenceState
+                (parent.cached || parent.parent !== nothing) || return false
+                matches_sv(parent, sv) || return false
+            end
+        end
+
+        # If the method defines a recursion relation, give it a chance
+        # to tell us that this recursion is actually ok.
+        if isdefined(method, :recursion_relation)
+            if Core._apply_pure(method.recursion_relation, Any[method, callee_method2, sig, frame.linfo.specTypes])
+                return false
+            end
+        end
+    end
+    return true
+end
+
+# This function is used for computing alternate limit heuristics
+function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector)
+    if isdefined(method, :generator) && method.generator.expand_early && may_invoke_generator(method, sig, sparams)
+        method_instance = specialize_method(method, sig, sparams)
+        if isa(method_instance, MethodInstance)
+            cinfo = get_staged(method_instance)
+            if isa(cinfo, CodeInfo)
+                method2 = cinfo.method_for_inference_limit_heuristics
+                if method2 isa Method
+                    return method2
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function matches_sv(parent::InferenceState, sv::InferenceState)
+    sv_method2 = sv.src.method_for_inference_limit_heuristics # limit only if user token match
+    sv_method2 isa Method || (sv_method2 = nothing)
+    parent_method2 = parent.src.method_for_inference_limit_heuristics # limit only if user token match
+    parent_method2 isa Method || (parent_method2 = nothing)
+    return parent.linfo.def === sv.linfo.def && sv_method2 === parent_method2
+end
+
+# keeps result and context information of abstract_method_call, which will later be used for
+# backedge computation, and concrete evaluation or constant-propagation
+struct MethodCallResult
+    rt
+    edgecycle::Bool
+    edgelimited::Bool
+    edge::Union{Nothing,MethodInstance}
+    edge_effects::Effects
+    function MethodCallResult(@nospecialize(rt),
+                              edgecycle::Bool,
+                              edgelimited::Bool,
+                              edge::Union{Nothing,MethodInstance},
+                              edge_effects::Effects)
+        return new(rt, edgecycle, edgelimited, edge, edge_effects)
+    end
+end
+
+function pure_eval_eligible(interp::AbstractInterpreter,
+    @nospecialize(f), applicable::Vector{Any}, arginfo::ArgInfo, sv::InferenceState)
+    # XXX we need to check that this pure function doesn't call any overlayed method
+    return f !== nothing &&
+           length(applicable) == 1 &&
+           is_method_pure(applicable[1]::MethodMatch) &&
+           is_all_const_arg(arginfo)
+end
+
+function is_method_pure(method::Method, @nospecialize(sig), sparams::SimpleVector)
+    if isdefined(method, :generator)
+        method.generator.expand_early || return false
+        mi = specialize_method(method, sig, sparams)
+        isa(mi, MethodInstance) || return false
+        staged = get_staged(mi)
+        (staged isa CodeInfo && (staged::CodeInfo).pure) || return false
+        return true
+    end
+    return method.pure
+end
+is_method_pure(match::MethodMatch) = is_method_pure(match.method, match.spec_types, match.sparams)
+
+function pure_eval_call(interp::AbstractInterpreter,
+    @nospecialize(f), applicable::Vector{Any}, arginfo::ArgInfo, sv::InferenceState)
+    pure_eval_eligible(interp, f, applicable, arginfo, sv) || return nothing
+    return _pure_eval_call(f, arginfo)
+end
+function _pure_eval_call(@nospecialize(f), arginfo::ArgInfo)
+    args = collect_const_args(arginfo)
+    value = try
+        Core._apply_pure(f, args)
+    catch
+        return nothing
+    end
+    return Const(value)
+end
+
+function concrete_eval_eligible(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::InferenceState)
+    # disable concrete-evaluation since this function call is tainted by some overlayed
+    # method and currently there is no direct way to execute overlayed methods
+    isoverlayed(method_table(interp)) && !is_nonoverlayed(result.edge_effects) && return false
+    return f !== nothing &&
+           result.edge !== nothing &&
+           is_concrete_eval_eligible(result.edge_effects) &&
+           is_all_const_arg(arginfo)
+end
+
+function is_all_const_arg((; argtypes)::ArgInfo)
+    for i = 2:length(argtypes)
+        a = widenconditional(argtypes[i])
+        isa(a, Const) || isconstType(a) || issingletontype(a) || return false
+    end
+    return true
+end
+
+function collect_const_args((; argtypes)::ArgInfo)
+    return Any[ let a = widenconditional(argtypes[i])
+                    isa(a, Const) ? a.val :
+                    isconstType(a) ? (a::DataType).parameters[1] :
+                    (a::DataType).instance
+                end for i in 2:length(argtypes) ]
+end
+
+function concrete_eval_call(interp::AbstractInterpreter,
+    @nospecialize(f), result::MethodCallResult, arginfo::ArgInfo, sv::InferenceState)
+    concrete_eval_eligible(interp, f, result, arginfo, sv) || return nothing
+    args = collect_const_args(arginfo)
+    world = get_world_counter(interp)
+    value = try
+        Core._call_in_world_total(world, f, args...)
+    catch
+        # The evaulation threw. By :consistent-cy, we're guaranteed this would have happened at runtime
+        return ConstCallResults(Union{}, ConcreteResult(result.edge::MethodInstance, result.edge_effects), result.edge_effects)
+    end
+    if is_inlineable_constant(value) || call_result_unused(sv)
+        # If the constant is not inlineable, still do the const-prop, since the
+        # code that led to the creation of the Const may be inlineable in the same
+        # circumstance and may be optimizable.
+        return ConstCallResults(Const(value), ConcreteResult(result.edge::MethodInstance, EFFECTS_TOTAL, value), EFFECTS_TOTAL)
+    end
+    return nothing
+end
+
+function const_prop_enabled(interp::AbstractInterpreter, sv::InferenceState, match::MethodMatch)
+    if !InferenceParams(interp).ipo_constant_propagation
+        add_remark!(interp, sv, "[constprop] Disabled by parameter")
+        return false
+    end
+    method = match.method
+    if method.constprop == 0x02
+        add_remark!(interp, sv, "[constprop] Disabled by method parameter")
+        return false
+    end
+    return true
+end
+
+struct ConstCallResults
+    rt::Any
+    const_result::ConstResult
+    effects::Effects
+    ConstCallResults(@nospecialize(rt),
+                     const_result::ConstResult,
+                     effects::Effects) =
+        new(rt, const_result, effects)
+end
+
+function abstract_call_method_with_const_args(interp::AbstractInterpreter, result::MethodCallResult,
+                                              @nospecialize(f), arginfo::ArgInfo, match::MethodMatch,
+                                              sv::InferenceState)
+    if !const_prop_enabled(interp, sv, match)
+        return nothing
+    end
+    val = concrete_eval_call(interp, f, result, arginfo, sv)
+    if val !== nothing
+        add_backedge!(val.const_result.mi, sv)
+        return val
+    end
+    mi = maybe_get_const_prop_profitable(interp, result, f, arginfo, match, sv)
+    mi === nothing && return nothing
+    # try constant prop'
+    inf_cache = get_inference_cache(interp)
+    inf_result = cache_lookup(mi, arginfo.argtypes, inf_cache)
+    if inf_result === nothing
+        # if there might be a cycle, check to make sure we don't end up
+        # calling ourselves here.
+        let result = result # prevent capturing
+            if result.edgecycle && _any(InfStackUnwind(sv)) do infstate
+                    # if the type complexity limiting didn't decide to limit the call signature (`result.edgelimited = false`)
+                    # we can relax the cycle detection by comparing `MethodInstance`s and allow inference to
+                    # propagate different constant elements if the recursion is finite over the lattice
+                    return (result.edgelimited ? match.method === infstate.linfo.def : mi === infstate.linfo) &&
+                            any(infstate.result.overridden_by_const)
                 end
+                add_remark!(interp, sv, "[constprop] Edge cycle encountered")
+                return nothing
             end
         end
-        # iterate through the cycle before walking to the parent
-        if cyclei < length(infstate.callers_in_cycle)
-            cyclei += 1
-            infstate = infstate.callers_in_cycle[cyclei]
+        inf_result = InferenceResult(mi, (arginfo, sv))
+        if !any(inf_result.overridden_by_const)
+            add_remark!(interp, sv, "[constprop] Could not handle constant info in matching_cache_argtypes")
+            return nothing
+        end
+        frame = InferenceState(inf_result, #=cache=#:local, interp)
+        frame === nothing && return nothing # this is probably a bad generated function (unsound), but just ignore it
+        frame.parent = sv
+        typeinf(interp, frame) || return nothing
+    end
+    result = inf_result.result
+    # if constant inference hits a cycle, just bail out
+    isa(result, InferenceState) && return nothing
+    add_backedge!(mi, sv)
+    return ConstCallResults(result, ConstPropResult(inf_result), inf_result.ipo_effects)
+end
+
+# if there's a possibility we could get a better result with these constant arguments
+# (hopefully without doing too much work), returns `MethodInstance`, or nothing otherwise
+function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::MethodCallResult,
+                                         @nospecialize(f), arginfo::ArgInfo, match::MethodMatch,
+                                         sv::InferenceState)
+    method = match.method
+    force = force_const_prop(interp, f, method)
+    force || const_prop_entry_heuristic(interp, result, sv) || return nothing
+    nargs::Int = method.nargs
+    method.isva && (nargs -= 1)
+    length(arginfo.argtypes) < nargs && return nothing
+    if !const_prop_argument_heuristic(interp, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by argument and rettype heuristics")
+        return nothing
+    end
+    all_overridden = is_all_overridden(arginfo, sv)
+    if !force && !const_prop_function_heuristic(interp, f, arginfo, nargs, all_overridden,
+            sv.ipo_effects.nothrow === ALWAYS_TRUE, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by function heuristic")
+        return nothing
+    end
+    force |= all_overridden
+    mi = specialize_method(match; preexisting=!force)
+    if mi === nothing
+        add_remark!(interp, sv, "[constprop] Failed to specialize")
+        return nothing
+    end
+    mi = mi::MethodInstance
+    if !force && !const_prop_methodinstance_heuristic(interp, match, mi, arginfo, sv)
+        add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
+        return nothing
+    end
+    return mi
+end
+
+function const_prop_entry_heuristic(interp::AbstractInterpreter, result::MethodCallResult, sv::InferenceState)
+    if call_result_unused(sv) && result.edgecycle
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (edgecycle with unused result)")
+        return false
+    end
+    # check if this return type is improvable (i.e. whether it's possible that with more
+    # information, we might get a more precise type)
+    rt = result.rt
+    if isa(rt, Type)
+        # could always be improved to `Const`, `PartialStruct` or just a more precise type,
+        # unless we're already at `Bottom`
+        if rt === Bottom
+            add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (erroneous result)")
+            return false
         else
-            cyclei = 0
-            infstate = infstate.parent
+            return true
+        end
+    elseif isa(rt, PartialStruct) || isa(rt, InterConditional)
+        # could be improved to `Const` or a more precise wrapper
+        return true
+    elseif isa(rt, LimitedAccuracy)
+        # optimizations like inlining are disabled for limited frames,
+        # thus there won't be much benefit in constant-prop' here
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (limited accuracy)")
+        return false
+    else
+        if isa(rt, Const)
+            if result.edge_effects.nothrow !== ALWAYS_TRUE
+                # Could still be improved to Bottom (or at least could see the effects improved)
+                return true
+            end
+        end
+        add_remark!(interp, sv, "[constprop] Disabled by entry heuristic (unimprovable result)")
+        return false
+    end
+end
+
+# determines heuristically whether if constant propagation can be worthwhile
+# by checking if any of given `argtypes` is "interesting" enough to be propagated
+function const_prop_argument_heuristic(_::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
+    for i in 1:length(argtypes)
+        a = argtypes[i]
+        if isa(a, Conditional) && fargs !== nothing
+            is_const_prop_profitable_conditional(a, fargs, sv) && return true
+        else
+            a = widenconditional(a)
+            has_nontrivial_const_info(a) && is_const_prop_profitable_arg(a) && return true
+        end
+    end
+    return false
+end
+
+function is_const_prop_profitable_arg(@nospecialize(arg))
+    # have new information from argtypes that wasn't available from the signature
+    if isa(arg, PartialStruct)
+        for b in arg.fields
+            isconstType(b) && return true
+            is_const_prop_profitable_arg(b) && return true
+        end
+    end
+    isa(arg, PartialOpaque) && return true
+    isa(arg, Const) || return true
+    val = arg.val
+    # don't consider mutable values or Strings useful constants
+    return isa(val, Symbol) || isa(val, Type) || (!isa(val, String) && !ismutable(val))
+end
+
+function is_const_prop_profitable_conditional(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slotid = find_constrained_arg(cnd, fargs, sv)
+    if slotid !== nothing
+        return true
+    end
+    # as a minor optimization, we just check the result is a constant or not,
+    # since both `has_nontrivial_const_info`/`is_const_prop_profitable_arg` return `true`
+    # for `Const(::Bool)`
+    return isa(widenconditional(cnd), Const)
+end
+
+function find_constrained_arg(cnd::Conditional, fargs::Vector{Any}, sv::InferenceState)
+    slot = slot_id(cnd.var)
+    for i in 1:length(fargs)
+        arg = ssa_def_slot(fargs[i], sv)
+        if isa(arg, SlotNumber) && slot_id(arg) == slot
+            return i
+        end
+    end
+    return nothing
+end
+
+# checks if all argtypes has additional information other than what `Type` can provide
+function is_all_overridden((; fargs, argtypes)::ArgInfo, sv::InferenceState)
+    for a in argtypes
+        if isa(a, Conditional) && fargs !== nothing
+            is_const_prop_profitable_conditional(a, fargs, sv) || return false
+        else
+            a = widenconditional(a)
+            is_forwardable_argtype(a) || return false
+        end
+    end
+    return true
+end
+
+function force_const_prop(interp::AbstractInterpreter, @nospecialize(f), method::Method)
+    return method.constprop == 0x01 ||
+           InferenceParams(interp).aggressive_constant_propagation ||
+           istopfunction(f, :getproperty) ||
+           istopfunction(f, :setproperty!)
+end
+
+function const_prop_function_heuristic(
+    _::AbstractInterpreter, @nospecialize(f), (; argtypes)::ArgInfo,
+    nargs::Int, all_overridden::Bool, still_nothrow::Bool, _::InferenceState)
+    if nargs > 1
+        if istopfunction(f, :getindex) || istopfunction(f, :setindex!)
+            arrty = argtypes[2]
+            # don't propagate constant index into indexing of non-constant array
+            if arrty isa Type && arrty <: AbstractArray && !issingletontype(arrty)
+                # For static arrays, allow the constprop if we could possibly
+                # deduce nothrow as a result.
+                if !still_nothrow || ismutabletype(arrty)
+                    return false
+                end
+            elseif arrty ⊑ Array
+                return false
+            end
+        elseif istopfunction(f, :iterate)
+            itrty = argtypes[2]
+            if itrty ⊑ Array
+                return false
+            end
         end
     end
-
-    if !(topmost === nothing)
-        topmost = topmost::InferenceState
-        sigtuple = unwrap_unionall(sig)::DataType
-        msig = unwrap_unionall(method.sig)::DataType
-        spec_len = length(msig.parameters) + 1
-        ls = length(sigtuple.parameters)
-        if method === sv.linfo.def
-            # Under direct self-recursion, permit much greater use of reducers.
-            # here we assume that complexity(specTypes) :>= complexity(sig)
-            comparison = sv.linfo.specTypes
-            l_comparison = length(unwrap_unionall(comparison).parameters)
-            spec_len = max(spec_len, l_comparison)
-        else
-            comparison = method.sig
-        end
-        # see if the type is actually too big (relative to the caller), and limit it if required
-        newsig = limit_type_size(sig, comparison, hardlimit ? comparison : sv.linfo.specTypes, InferenceParams(interp).TUPLE_COMPLEXITY_LIMIT_DEPTH, spec_len)
-
-        if newsig !== sig
-            # continue inference, but note that we've limited parameter complexity
-            # on this call (to ensure convergence), so that we don't cache this result
-            if call_result_unused(sv)
-                add_remark!(interp, sv, RECURSION_UNUSED_MSG)
-                # if we don't (typically) actually care about this result,
-                # don't bother trying to examine some complex abstract signature
-                # since it's very unlikely that we'll try to inline this,
-                # or want make an invoke edge to its calling convention return type.
-                # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-                return Any, true, nothing
+    if !all_overridden && (istopfunction(f, :+) || istopfunction(f, :-) || istopfunction(f, :*) ||
+                           istopfunction(f, :(==)) || istopfunction(f, :!=) ||
+                           istopfunction(f, :<=) || istopfunction(f, :>=) || istopfunction(f, :<) || istopfunction(f, :>) ||
+                           istopfunction(f, :<<) || istopfunction(f, :>>))
+        # it is almost useless to inline the op when all the same type,
+        # but highly worthwhile to inline promote of a constant
+        length(argtypes) > 2 || return false
+        t1 = widenconst(argtypes[2])
+        for i in 3:length(argtypes)
+            at = argtypes[i]
+            ty = isvarargtype(at) ? unwraptv(at) : widenconst(at)
+            if ty !== t1
+                return true
             end
-            poison_callstack(sv, topmost::InferenceState, true)
-            sig = newsig
-            sparams = svec()
         end
+        return false
     end
+    return true
+end
 
-    # if sig changed, may need to recompute the sparams environment
-    if isa(method.sig, UnionAll) && isempty(sparams)
-        recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), sig, method.sig)::SimpleVector
-        #@assert recomputed[1] !== Bottom
-        # We must not use `sig` here, since that may re-introduce structural complexity that
-        # our limiting heuristic sought to eliminate. The alternative would be to not increment depth over covariant contexts,
-        # but we prefer to permit inference of tuple-destructuring, so we don't do that right now
-        # For example, with a signature such as `Tuple{T, Ref{T}} where {T <: S}`
-        # we might want to limit this to `Tuple{S, Ref}`, while type-intersection can instead give us back the original type
-        # (which moves `S` back up to a lower comparison depth)
-        # Optionally, we could try to drive this to a fixed point, but I think this is getting too complex,
-        # and this would only cause more questions and more problems
-        # (the following is only an example, most of the statements are probable in the wrong order):
-        #     newsig = sig
-        #     seen = IdSet()
-        #     while !(newsig in seen)
-        #         push!(seen, newsig)
-        #         lsig = length((unwrap_unionall(sig)::DataType).parameters)
-        #         newsig = limit_type_size(newsig, sig, sv.linfo.specTypes, InferenceParams(interp).TUPLE_COMPLEXITY_LIMIT_DEPTH, lsig)
-        #         recomputed = ccall(:jl_type_intersection_with_env, Any, (Any, Any), newsig, method.sig)::SimpleVector
-        #         newsig = recomputed[2]
-        #     end
-        #     sig = ?
-        sparams = recomputed[2]::SimpleVector
+# This is a heuristic to avoid trying to const prop through complicated functions
+# where we would spend a lot of time, but are probably unlikely to get an improved
+# result anyway.
+function const_prop_methodinstance_heuristic(
+    interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance,
+    (; argtypes)::ArgInfo, sv::InferenceState)
+    method = match.method
+    if method.is_for_opaque_closure
+        # Not inlining an opaque closure can be very expensive, so be generous
+        # with the const-prop-ability. It is quite possible that we can't infer
+        # anything at all without const-propping, so the inlining check below
+        # isn't particularly helpful here.
+        return true
     end
-
-    rt, edge = typeinf_edge(interp, method, sig, sparams, sv)
-    if edge === nothing
-        edgecycle = true
+    # Peek at the inferred result for the function to determine if the optimizer
+    # was able to cut it down to something simple (inlineable in particular).
+    # If so, there's a good chance we might be able to const prop all the way
+    # through and learn something new.
+    if isdefined(method, :source) && ccall(:jl_ir_flag_inlineable, Bool, (Any,), method.source)
+        return true
+    else
+        flag = get_curr_ssaflag(sv)
+        if is_stmt_inline(flag)
+            # force constant propagation for a call that is going to be inlined
+            # since the inliner will try to find this constant result
+            # if these constant arguments arrive there
+            return true
+        elseif is_stmt_noinline(flag)
+            # this call won't be inlined, thus this constant-prop' will most likely be unfruitful
+            return false
+        else
+            code = get(code_cache(interp), mi, nothing)
+            if isdefined(code, :inferred) && inlining_policy(
+                    interp, code.inferred, IR_FLAG_NULL, mi, argtypes) !== nothing
+                return true
+            end
+        end
     end
-    return rt, edgecycle, edge
+    return false # the cache isn't inlineable, so this constant-prop' will most likely be unfruitful
 end
 
 # This is only for use with `Conditional`.
@@ -514,34 +1131,32 @@ function precise_container_type(interp::AbstractInterpreter, @nospecialize(itft)
     if isa(tti, DataType) && tti.name === NamedTuple_typename
         # A NamedTuple iteration is the same as the iteration of its Tuple parameter:
         # compute a new `tti == unwrap_unionall(tti0)` based on that Tuple type
-        tti = tti.parameters[2]
-        while isa(tti, TypeVar)
-            tti = tti.ub
-        end
+        tti = unwraptv(tti.parameters[2])
         tti0 = rewrap_unionall(tti, tti0)
     end
     if isa(tti, Union)
         utis = uniontypes(tti)
-        if _any(t -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
+        if _any(@nospecialize(t) -> !isa(t, DataType) || !(t <: Tuple) || !isknownlength(t), utis)
             return Any[Vararg{Any}], nothing
         end
-        result = Any[rewrap_unionall(p, tti0) for p in utis[1].parameters]
-        for t in utis[2:end]
-            if length(t.parameters) != length(result)
+        ltp = length((utis[1]::DataType).parameters)
+        for t in utis
+            if length((t::DataType).parameters) != ltp
                 return Any[Vararg{Any}], nothing
             end
-            for j in 1:length(t.parameters)
-                result[j] = tmerge(result[j], rewrap_unionall(t.parameters[j], tti0))
+        end
+        result = Any[ Union{} for _ in 1:ltp ]
+        for t in utis
+            tps = (t::DataType).parameters
+            _all(valid_as_lattice, tps) || continue
+            for j in 1:ltp
+                result[j] = tmerge(result[j], rewrap_unionall(tps[j], tti0))
             end
         end
         return result, nothing
     elseif tti0 <: Tuple
         if isa(tti0, DataType)
-            if isvatuple(tti0) && length(tti0.parameters) == 1
-                return Any[Vararg{unwrapva(tti0.parameters[1])}], nothing
-            else
-                return Any[ p for p in tti0.parameters ], nothing
-            end
+            return Any[ p for p in tti0.parameters ], nothing
         elseif !isa(tti, DataType)
             return Any[Vararg{Any}], nothing
         else
@@ -565,33 +1180,28 @@ end
 
 # simulate iteration protocol on container type up to fixpoint
 function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(itertype), sv::InferenceState)
-    if !isdefined(Main, :Base) || !isdefined(Main.Base, :iterate) || !isconst(Main.Base, :iterate)
-        return Any[Vararg{Any}], nothing
-    end
-    if itft === nothing
-        iteratef = getfield(Main.Base, :iterate)
-        itft = Const(iteratef)
-    elseif isa(itft, Const)
+    if isa(itft, Const)
         iteratef = itft.val
     else
         return Any[Vararg{Any}], nothing
     end
     @assert !isvarargtype(itertype)
-    call = abstract_call_known(interp, iteratef, nothing, Any[itft, itertype], sv)
+    call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[itft, itertype]), sv)
     stateordonet = call.rt
     info = call.info
     # Return Bottom if this is not an iterator.
     # WARNING: Changes to the iteration protocol must be reflected here,
     # this is not just an optimization.
+    # TODO: this doesn't realize that Array, SimpleVector, Tuple, and NamedTuple do not use the iterate protocol
     stateordonet === Bottom && return Any[Bottom], AbstractIterationInfo(CallMeta[CallMeta(Bottom, info)])
     valtype = statetype = Bottom
     ret = Any[]
     calls = CallMeta[call]
+    stateordonet_widened = widenconst(stateordonet)
 
     # Try to unroll the iteration up to MAX_TUPLE_SPLAT, which covers any finite
     # length iterators, or interesting prefix
     while true
-        stateordonet_widened = widenconst(stateordonet)
         if stateordonet_widened === Nothing
             return ret, AbstractIterationInfo(calls)
         end
@@ -610,43 +1220,62 @@ function abstract_iteration(interp::AbstractInterpreter, @nospecialize(itft), @n
         valtype = getfield_tfunc(stateordonet, Const(1))
         push!(ret, valtype)
         statetype = nstatetype
-        call = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv)
+        call = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), sv)
         stateordonet = call.rt
+        stateordonet_widened = widenconst(stateordonet)
         push!(calls, call)
     end
     # From here on, we start asking for results on the widened types, rather than
     # the precise (potentially const) state type
-    statetype = widenconst(statetype)
-    valtype = widenconst(valtype)
+    # statetype and valtype are reinitialized in the first iteration below from the
+    # (widened) stateordonet, which has not yet been fully analyzed in the loop above
+    statetype = Bottom
+    valtype = Bottom
+    may_have_terminated = Nothing <: stateordonet_widened
     while valtype !== Any
-        stateordonet = abstract_call_known(interp, iteratef, nothing, Any[Const(iteratef), itertype, statetype], sv).rt
-        stateordonet = widenconst(stateordonet)
-        nounion = typesubtract(stateordonet, Nothing)
-        if !isa(nounion, DataType) || !(nounion <: Tuple) || isvatuple(nounion) || length(nounion.parameters) != 2
+        nounion = typeintersect(stateordonet_widened, Tuple{Any,Any})
+        if nounion !== Union{} && !isa(nounion, DataType)
+            # nounion is of a type we cannot handle
             valtype = Any
             break
         end
-        if nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype
-            if typeintersect(stateordonet, Nothing) === Union{}
-                # Reached a fixpoint, but Nothing is not possible => iterator is infinite or failing
-                return Any[Bottom], nothing
+        if nounion === Union{} || (nounion.parameters[1] <: valtype && nounion.parameters[2] <: statetype)
+            # reached a fixpoint or iterator failed/gave invalid answer
+            if !hasintersect(stateordonet_widened, Nothing)
+                # ... but cannot terminate
+                if !may_have_terminated
+                    #  ... and cannot have terminated prior to this loop
+                    return Any[Bottom], nothing
+                else
+                    # iterator may have terminated prior to this loop, but not during it
+                    valtype = Bottom
+                end
             end
             break
         end
         valtype = tmerge(valtype, nounion.parameters[1])
         statetype = tmerge(statetype, nounion.parameters[2])
+        stateordonet = abstract_call_known(interp, iteratef, ArgInfo(nothing, Any[Const(iteratef), itertype, statetype]), sv).rt
+        stateordonet_widened = widenconst(stateordonet)
+    end
+    if valtype !== Union{}
+        push!(ret, Vararg{valtype})
     end
-    push!(ret, Vararg{valtype})
     return ret, nothing
 end
 
 # do apply(af, fargs...), where af is a function value
-function abstract_apply(interp::AbstractInterpreter, @nospecialize(itft), @nospecialize(aft), aargtypes::Vector{Any}, sv::InferenceState,
-                        max_methods::Int = InferenceParams(interp).MAX_METHODS)
+function abstract_apply(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState,
+                        max_methods::Int = get_max_methods(sv.mod, interp))
+    itft = argtype_by_index(argtypes, 2)
+    aft = argtype_by_index(argtypes, 3)
+    (itft === Bottom || aft === Bottom) && return CallMeta(Bottom, false)
+    aargtypes = argtype_tail(argtypes, 4)
     aftw = widenconst(aft)
-    if !isa(aft, Const) && (!isType(aftw) || has_free_typevars(aftw))
+    if !isa(aft, Const) && !isa(aft, PartialOpaque) && (!isType(aftw) || has_free_typevars(aftw))
         if !isconcretetype(aftw) || (aftw <: Builtin)
-            add_remark!(interp, sv, "Core._apply called on a function of a non-concrete type")
+            add_remark!(interp, sv, "Core._apply_iterate called on a function of a non-concrete type")
+            tristate_merge!(sv, Effects())
             # bail now, since it seems unlikely that abstract_call will be able to do any better after splitting
             # this also ensures we don't call abstract_call_gf_by_type below on an IntrinsicFunction or Builtin
             return CallMeta(Any, false)
@@ -654,17 +1283,21 @@ function abstract_apply(interp::AbstractInterpreter, @nospecialize(itft), @nospe
     end
     res = Union{}
     nargs = length(aargtypes)
-    splitunions = 1 < countunionsplit(aargtypes) <= InferenceParams(interp).MAX_APPLY_UNION_ENUM
-    ctypes = Any[Any[aft]]
-    infos = [Union{Nothing, AbstractIterationInfo}[]]
+    splitunions = 1 < unionsplitcost(aargtypes) <= InferenceParams(interp).MAX_APPLY_UNION_ENUM
+    ctypes = [Any[aft]]
+    infos = Vector{MaybeAbstractIterationInfo}[MaybeAbstractIterationInfo[]]
     for i = 1:nargs
-        ctypes´ = []
-        infos′ = []
+        ctypes´ = Vector{Any}[]
+        infos′ = Vector{MaybeAbstractIterationInfo}[]
         for ti in (splitunions ? uniontypes(aargtypes[i]) : Any[aargtypes[i]])
             if !isvarargtype(ti)
-                cti, info = precise_container_type(interp, itft, ti, sv)
+                cti_info = precise_container_type(interp, itft, ti, sv)
+                cti = cti_info[1]::Vector{Any}
+                info = cti_info[2]::MaybeAbstractIterationInfo
             else
-                cti, info = precise_container_type(interp, itft, unwrapva(ti), sv)
+                cti_info = precise_container_type(interp, itft, unwrapva(ti), sv)
+                cti = cti_info[1]::Vector{Any}
+                info = cti_info[2]::MaybeAbstractIterationInfo
                 # We can't represent a repeating sequence of the same types,
                 # so tmerge everything together to get one type that represents
                 # everything.
@@ -681,12 +1314,11 @@ function abstract_apply(interp::AbstractInterpreter, @nospecialize(itft), @nospe
                 continue
             end
             for j = 1:length(ctypes)
-                ct = ctypes[j]
+                ct = ctypes[j]::Vector{Any}
                 if isvarargtype(ct[end])
                     # This is vararg, we're not gonna be able to do any inling,
                     # drop the info
                     info = nothing
-
                     tail = tuple_tail_elem(unwrapva(ct[end]), cti)
                     push!(ctypes´, push!(ct[1:(end - 1)], tail))
                 else
@@ -706,18 +1338,21 @@ function abstract_apply(interp::AbstractInterpreter, @nospecialize(itft), @nospe
         lct = length(ct)
         # truncate argument list at the first Vararg
         for i = 1:lct-1
-            if isvarargtype(ct[i])
-                ct[i] = tuple_tail_elem(ct[i], ct[(i+1):lct])
+            cti = ct[i]
+            if isvarargtype(cti)
+                ct[i] = tuple_tail_elem(unwrapva(cti), ct[(i+1):lct])
                 resize!(ct, i)
                 break
             end
         end
-        call = abstract_call(interp, nothing, ct, sv, max_methods)
+        call = abstract_call(interp, ArgInfo(nothing, ct), sv, max_methods)
         push!(retinfos, ApplyCallInfo(call.info, arginfo))
         res = tmerge(res, call.rt)
-        if res === Any
-            # No point carrying forward the info, we're not gonna inline it anyway
-            retinfo = nothing
+        if bail_out_apply(interp, res, sv)
+            if i != length(ctypes)
+                # No point carrying forward the info, we're not gonna inline it anyway
+                retinfo = false
+            end
             break
         end
     end
@@ -726,40 +1361,11 @@ function abstract_apply(interp::AbstractInterpreter, @nospecialize(itft), @nospe
     return CallMeta(res, retinfo)
 end
 
-function is_method_pure(method::Method, @nospecialize(sig), sparams::SimpleVector)
-    if isdefined(method, :generator)
-        method.generator.expand_early || return false
-        mi = specialize_method(method, sig, sparams, false)
-        isa(mi, MethodInstance) || return false
-        staged = get_staged(mi)
-        (staged isa CodeInfo && (staged::CodeInfo).pure) || return false
-        return true
-    end
-    return method.pure
-end
-is_method_pure(match::MethodMatch) = is_method_pure(match.method, match.spec_types, match.sparams)
-
-function pure_eval_call(@nospecialize(f), argtypes::Vector{Any})
-    for i = 2:length(argtypes)
-        a = widenconditional(argtypes[i])
-        if !(isa(a, Const) || isconstType(a))
-            return false
-        end
-    end
-
-    args = Any[ (a = widenconditional(argtypes[i]); isa(a, Const) ? a.val : a.parameters[1]) for i in 2:length(argtypes) ]
-    try
-        value = Core._apply_pure(f, args)
-        return Const(value)
-    catch
-        return false
-    end
-end
-
 function argtype_by_index(argtypes::Vector{Any}, i::Int)
     n = length(argtypes)
-    if isvarargtype(argtypes[n])
-        return i >= n ? unwrapva(argtypes[n]) : argtypes[i]
+    na = argtypes[n]
+    if isvarargtype(na)
+        return i >= n ? unwrapva(na) : argtypes[i]
     else
         return i > n ? Bottom : argtypes[i]
     end
@@ -773,36 +1379,39 @@ function argtype_tail(argtypes::Vector{Any}, i::Int)
     return argtypes[i:n]
 end
 
-function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::Union{Nothing,Vector{Any}},
-        argtypes::Vector{Any}, sv::InferenceState, max_methods::Int)
+function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, (; fargs, argtypes)::ArgInfo,
+                               sv::InferenceState, max_methods::Int)
+    @nospecialize f
     la = length(argtypes)
-    if f === ifelse && fargs isa Vector{Any} && la == 4 && argtypes[2] isa Conditional
-        # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
-        cnd = argtypes[2]::Conditional
-        tx = argtypes[3]
-        ty = argtypes[4]
-        a = ssa_def_slot(fargs[3], sv)
-        b = ssa_def_slot(fargs[4], sv)
-        if isa(a, Slot) && slot_id(cnd.var) == slot_id(a)
-            tx = typeintersect(tx, cnd.vtype)
-        end
-        if isa(b, Slot) && slot_id(cnd.var) == slot_id(b)
-            ty = typeintersect(ty, cnd.elsetype)
+    if f === Core.ifelse && fargs isa Vector{Any} && la == 4
+        cnd = argtypes[2]
+        if isa(cnd, Conditional)
+            newcnd = widenconditional(cnd)
+            tx = argtypes[3]
+            ty = argtypes[4]
+            if isa(newcnd, Const)
+                # if `cnd` is constant, we should just respect its constantness to keep inference accuracy
+                return newcnd.val::Bool ? tx : ty
+            else
+                # try to simulate this as a real conditional (`cnd ? x : y`), so that the penalty for using `ifelse` instead isn't too high
+                a = ssa_def_slot(fargs[3], sv)
+                b = ssa_def_slot(fargs[4], sv)
+                if isa(a, SlotNumber) && slot_id(cnd.var) == slot_id(a)
+                    tx = (cnd.vtype ⊑ tx ? cnd.vtype : tmeet(tx, widenconst(cnd.vtype)))
+                end
+                if isa(b, SlotNumber) && slot_id(cnd.var) == slot_id(b)
+                    ty = (cnd.elsetype ⊑ ty ? cnd.elsetype : tmeet(ty, widenconst(cnd.elsetype)))
+                end
+                return tmerge(tx, ty)
+            end
         end
-        return tmerge(tx, ty)
     end
     rt = builtin_tfunction(interp, f, argtypes[2:end], sv)
-    if f === getfield && isa(fargs, Vector{Any}) && la == 3 && isa(argtypes[3], Const) && isa(argtypes[3].val, Int) && argtypes[2] ⊑ Tuple
-        cti, _ = precise_container_type(interp, nothing, argtypes[2], sv)
-        idx = argtypes[3].val
-        if 1 <= idx <= length(cti)
-            rt = unwrapva(cti[idx])
-        end
-    elseif (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
+    if (rt === Bool || (isa(rt, Const) && isa(rt.val, Bool))) && isa(fargs, Vector{Any})
         # perform very limited back-propagation of type information for `is` and `isa`
         if f === isa
             a = ssa_def_slot(fargs[2], sv)
-            if isa(a, Slot)
+            if isa(a, SlotNumber)
                 aty = widenconst(argtypes[2])
                 if rt === Const(false)
                     return Conditional(a, Union{}, aty)
@@ -814,7 +1423,8 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                     tty_lb = tty_ub # TODO: this would be wrong if !isexact_tty, but instanceof_tfunc doesn't preserve this info
                     if !has_free_typevars(tty_lb) && !has_free_typevars(tty_ub)
                         ifty = typeintersect(aty, tty_ub)
-                        elty = typesubtract(aty, tty_lb)
+                        valid_as_lattice(ifty) || (ifty = Union{})
+                        elty = typesubtract(aty, tty_lb, InferenceParams(interp).MAX_UNION_SPLITTING)
                         return Conditional(a, ifty, elty)
                     end
                 end
@@ -825,31 +1435,31 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
             aty = argtypes[2]
             bty = argtypes[3]
             # if doing a comparison to a singleton, consider returning a `Conditional` instead
-            if isa(aty, Const) && isa(b, Slot)
+            if isa(aty, Const) && isa(b, SlotNumber)
                 if rt === Const(false)
                     aty = Union{}
                 elseif rt === Const(true)
                     bty = Union{}
                 elseif bty isa Type && isdefined(typeof(aty.val), :instance) # can only widen a if it is a singleton
-                    bty = typesubtract(bty, typeof(aty.val))
+                    bty = typesubtract(bty, typeof(aty.val), InferenceParams(interp).MAX_UNION_SPLITTING)
                 end
                 return Conditional(b, aty, bty)
             end
-            if isa(bty, Const) && isa(a, Slot)
+            if isa(bty, Const) && isa(a, SlotNumber)
                 if rt === Const(false)
                     bty = Union{}
                 elseif rt === Const(true)
                     aty = Union{}
                 elseif aty isa Type && isdefined(typeof(bty.val), :instance) # same for b
-                    aty = typesubtract(aty, typeof(bty.val))
+                    aty = typesubtract(aty, typeof(bty.val), InferenceParams(interp).MAX_UNION_SPLITTING)
                 end
                 return Conditional(a, bty, aty)
             end
             # narrow the lattice slightly (noting the dependency on one of the slots), to promote more effective smerge
-            if isa(b, Slot)
+            if isa(b, SlotNumber)
                 return Conditional(b, rt === Const(false) ? Union{} : bty, rt === Const(true) ? Union{} : bty)
             end
-            if isa(a, Slot)
+            if isa(a, SlotNumber)
                 return Conditional(a, rt === Const(false) ? Union{} : aty, rt === Const(true) ? Union{} : aty)
             end
         elseif f === Core.Compiler.not_int
@@ -864,18 +1474,42 @@ function abstract_call_builtin(interp::AbstractInterpreter, f::Builtin, fargs::U
                 end
                 return Conditional(aty.var, ifty, elty)
             end
+        elseif f === isdefined
+            uty = argtypes[2]
+            a = ssa_def_slot(fargs[2], sv)
+            if isa(uty, Union) && isa(a, SlotNumber)
+                fld = argtypes[3]
+                vtype = Union{}
+                elsetype = Union{}
+                for ty in uniontypes(uty)
+                    cnd = isdefined_tfunc(ty, fld)
+                    if isa(cnd, Const)
+                        if cnd.val::Bool
+                            vtype = tmerge(vtype, ty)
+                        else
+                            elsetype = tmerge(elsetype, ty)
+                        end
+                    else
+                        vtype = tmerge(vtype, ty)
+                        elsetype = tmerge(elsetype, ty)
+                    end
+                end
+                return Conditional(a, vtype, elsetype)
+            end
         end
     end
-    return isa(rt, TypeVar) ? rt.ub : rt
+    @assert !isa(rt, TypeVar) "unhandled TypeVar"
+    return rt
 end
 
 function abstract_call_unionall(argtypes::Vector{Any})
     if length(argtypes) == 3
         canconst = true
-        if isa(argtypes[3], Const)
-            body = argtypes[3].val
-        elseif isType(argtypes[3])
-            body = argtypes[3].parameters[1]
+        a3 = argtypes[3]
+        if isa(a3, Const)
+            body = a3.val
+        elseif isType(a3)
+            body = a3.parameters[1]
             canconst = false
         else
             return Any
@@ -884,11 +1518,11 @@ function abstract_call_unionall(argtypes::Vector{Any})
             return Any
         end
         if has_free_typevars(body)
-            if isa(argtypes[2], Const)
-                tv = argtypes[2].val
-            elseif isa(argtypes[2], PartialTypeVar)
-                ptv = argtypes[2]
-                tv = ptv.tv
+            a2 = argtypes[2]
+            if isa(a2, Const)
+                tv = a2.val
+            elseif isa(a2, PartialTypeVar)
+                tv = a2.tv
                 canconst = false
             else
                 return Any
@@ -902,37 +1536,101 @@ function abstract_call_unionall(argtypes::Vector{Any})
     return Any
 end
 
+function abstract_invoke(interp::AbstractInterpreter, (; fargs, argtypes)::ArgInfo, sv::InferenceState)
+    ft′ = argtype_by_index(argtypes, 2)
+    ft = widenconst(ft′)
+    ft === Bottom && return CallMeta(Bottom, false), EFFECTS_THROWS
+    (types, isexact, isconcrete, istype) = instanceof_tfunc(argtype_by_index(argtypes, 3))
+    types === Bottom && return CallMeta(Bottom, false), EFFECTS_THROWS
+    isexact || return CallMeta(Any, false), Effects()
+    argtype = argtypes_to_type(argtype_tail(argtypes, 4))
+    nargtype = typeintersect(types, argtype)
+    nargtype === Bottom && return CallMeta(Bottom, false), EFFECTS_THROWS
+    nargtype isa DataType || return CallMeta(Any, false), Effects() # other cases are not implemented below
+    isdispatchelem(ft) || return CallMeta(Any, false), Effects() # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
+    ft = ft::DataType
+    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)::Type
+    nargtype = Tuple{ft, nargtype.parameters...}
+    argtype = Tuple{ft, argtype.parameters...}
+    match, valid_worlds, overlayed = findsup(types, method_table(interp))
+    match === nothing && return CallMeta(Any, false), Effects()
+    update_valid_age!(sv, valid_worlds)
+    method = match.method
+    (ti, env::SimpleVector) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), nargtype, method.sig)::SimpleVector
+    (; rt, edge) = result = abstract_call_method(interp, method, ti, env, false, sv)
+    effects = result.edge_effects
+    edge !== nothing && add_backedge!(edge::MethodInstance, sv)
+    match = MethodMatch(ti, env, method, argtype <: method.sig)
+    res = nothing
+    sig = match.spec_types
+    argtypes′ = invoke_rewrite(argtypes)
+    fargs′ = fargs === nothing ? nothing : invoke_rewrite(fargs)
+    arginfo = ArgInfo(fargs′, argtypes′)
+    # # typeintersect might have narrowed signature, but the accuracy gain doesn't seem worth the cost involved with the lattice comparisons
+    # for i in 1:length(argtypes′)
+    #     t, a = ti.parameters[i], argtypes′[i]
+    #     argtypes′[i] = t ⊑ a ? t : a
+    # end
+    const_call_result = abstract_call_method_with_const_args(interp, result,
+        overlayed ? nothing : singleton_type(ft′), arginfo, match, sv)
+    const_result = nothing
+    if const_call_result !== nothing
+        if const_call_result.rt ⊑ rt
+            (; rt, effects, const_result) = const_call_result
+        end
+    end
+    effects = Effects(effects; nonoverlayed=!overlayed)
+    return CallMeta(from_interprocedural!(rt, sv, arginfo, sig), InvokeCallInfo(match, const_result)), effects
+end
+
+function invoke_rewrite(xs::Vector{Any})
+    x0 = xs[2]
+    newxs = xs[3:end]
+    newxs[1] = x0
+    return newxs
+end
+
 # call where the function is known exactly
 function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
-        fargs::Union{Nothing,Vector{Any}}, argtypes::Vector{Any},
-        sv::InferenceState,
-        max_methods::Int = InferenceParams(interp).MAX_METHODS)
-
+        arginfo::ArgInfo, sv::InferenceState,
+        max_methods::Int = get_max_methods(f, sv.mod, interp))
+    (; fargs, argtypes) = arginfo
     la = length(argtypes)
 
     if isa(f, Builtin)
-        if f === _apply
-            ft = argtype_by_index(argtypes, 2)
-            ft === Bottom && return CallMeta(Bottom, false)
-            return abstract_apply(interp, nothing, ft, argtype_tail(argtypes, 3), sv, max_methods)
-        elseif f === _apply_iterate
-            itft = argtype_by_index(argtypes, 2)
-            ft = argtype_by_index(argtypes, 3)
-            (itft === Bottom || ft === Bottom) && return CallMeta(Bottom, false)
-            return abstract_apply(interp, itft, ft, argtype_tail(argtypes, 4), sv, max_methods)
-        end
-        return CallMeta(abstract_call_builtin(interp, f, fargs, argtypes, sv, max_methods), nothing)
+        if f === _apply_iterate
+            return abstract_apply(interp, argtypes, sv, max_methods)
+        elseif f === invoke
+            call, effects = abstract_invoke(interp, arginfo, sv)
+            tristate_merge!(sv, effects)
+            return call
+        elseif f === modifyfield!
+            tristate_merge!(sv, Effects()) # TODO
+            return abstract_modifyfield!(interp, argtypes, sv)
+        end
+        rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods)
+        tristate_merge!(sv, builtin_effects(f, argtypes, rt))
+        return CallMeta(rt, false)
+    elseif isa(f, Core.OpaqueClosure)
+        # calling an OpaqueClosure about which we have no information returns no information
+        tristate_merge!(sv, Effects())
+        return CallMeta(Any, false)
     elseif f === Core.kwfunc
         if la == 2
-            ft = widenconst(argtypes[2])
-            if isa(ft, DataType) && isdefined(ft.name, :mt) && isdefined(ft.name.mt, :kwsorter)
-                return CallMeta(Const(ft.name.mt.kwsorter), false)
+            aty = argtypes[2]
+            if !isvarargtype(aty)
+                ft = widenconst(aty)
+                if isa(ft, DataType) && isdefined(ft.name, :mt) && isdefined(ft.name.mt, :kwsorter)
+                    return CallMeta(Const(ft.name.mt.kwsorter), MethodResultPure())
+                end
             end
         end
+        tristate_merge!(sv, EFFECTS_UNKNOWN) # TODO
         return CallMeta(Any, false)
     elseif f === TypeVar
         # Manually look through the definition of TypeVar to
         # make sure to be able to get `PartialTypeVar`s out.
+        tristate_merge!(sv, EFFECTS_UNKNOWN) # TODO
         (la < 2 || la > 4) && return CallMeta(Union{}, false)
         n = argtypes[2]
         ub_var = Const(Any)
@@ -943,33 +1641,36 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
         elseif la == 3
             ub_var = argtypes[3]
         end
-        return CallMeta(typevar_tfunc(n, lb_var, ub_var), nothing)
+        return CallMeta(typevar_tfunc(n, lb_var, ub_var), false)
     elseif f === UnionAll
+        tristate_merge!(sv, EFFECTS_UNKNOWN) # TODO
         return CallMeta(abstract_call_unionall(argtypes), false)
-    elseif f === Tuple && la == 2 && !isconcretetype(widenconst(argtypes[2]))
-        return CallMeta(Tuple, false)
-    elseif is_return_type(f)
-        rt_rt = return_type_tfunc(interp, argtypes, sv)
-        if rt_rt !== nothing
-            return CallMeta(rt_rt, nothing)
+    elseif f === Tuple && la == 2
+        tristate_merge!(sv, EFFECTS_UNKNOWN) # TODO
+        aty = argtypes[2]
+        ty = isvarargtype(aty) ? unwrapva(aty) : widenconst(aty)
+        if !isconcretetype(ty)
+            return CallMeta(Tuple, false)
         end
-        return CallMeta(Type, nothing)
+    elseif is_return_type(f)
+        tristate_merge!(sv, EFFECTS_UNKNOWN) # TODO
+        return return_type_tfunc(interp, argtypes, sv)
     elseif la == 2 && istopfunction(f, :!)
         # handle Conditional propagation through !Bool
         aty = argtypes[2]
         if isa(aty, Conditional)
-            call = abstract_call_gf_by_type(interp, f, Any[Const(f), Bool], Tuple{typeof(f), Bool}, sv) # make sure we've inferred `!(::Bool)`
+            call = abstract_call_gf_by_type(interp, f, ArgInfo(fargs, Any[Const(f), Bool]), Tuple{typeof(f), Bool}, sv, max_methods) # make sure we've inferred `!(::Bool)`
             return CallMeta(Conditional(aty.var, aty.elsetype, aty.vtype), call.info)
         end
     elseif la == 3 && istopfunction(f, :!==)
         # mark !== as exactly a negated call to ===
-        rty = abstract_call_known(interp, (===), fargs, argtypes, sv).rt
+        rty = abstract_call_known(interp, (===), arginfo, sv, max_methods).rt
         if isa(rty, Conditional)
-            return CallMeta(Conditional(rty.var, rty.elsetype, rty.vtype), nothing) # swap if-else
+            return CallMeta(Conditional(rty.var, rty.elsetype, rty.vtype), false) # swap if-else
         elseif isa(rty, Const)
-            return CallMeta(Const(rty.val === false), nothing)
+            return CallMeta(Const(rty.val === false), MethodResultPure())
         end
-        return CallMeta(rty, nothing)
+        return CallMeta(rty, false)
     elseif la == 3 && istopfunction(f, :(>:))
         # mark issupertype as a exact alias for issubtype
         # swap T1 and T2 arguments and call <:
@@ -979,51 +1680,90 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f),
             fargs = nothing
         end
         argtypes = Any[typeof(<:), argtypes[3], argtypes[2]]
-        return CallMeta(abstract_call_known(interp, <:, fargs, argtypes, sv).rt, false)
-    elseif la == 2 && isa(argtypes[2], Const) && isa(argtypes[2].val, SimpleVector) && istopfunction(f, :length)
+        return CallMeta(abstract_call_known(interp, <:, ArgInfo(fargs, argtypes), sv, max_methods).rt, false)
+    elseif la == 2 &&
+           (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
+           istopfunction(f, :length)
         # mark length(::SimpleVector) as @pure
-        return CallMeta(Const(length(argtypes[2].val)), false)
-    elseif la == 3 && isa(argtypes[2], Const) && isa(argtypes[3], Const) &&
-            isa(argtypes[2].val, SimpleVector) && isa(argtypes[3].val, Int) && istopfunction(f, :getindex)
+        return CallMeta(Const(length(svecval)), MethodResultPure())
+    elseif la == 3 &&
+           (a2 = argtypes[2]; isa(a2, Const)) && (svecval = a2.val; isa(svecval, SimpleVector)) &&
+           (a3 = argtypes[3]; isa(a3, Const)) && (idx = a3.val; isa(idx, Int)) &&
+           istopfunction(f, :getindex)
         # mark getindex(::SimpleVector, i::Int) as @pure
-        svecval = argtypes[2].val::SimpleVector
-        idx = argtypes[3].val::Int
         if 1 <= idx <= length(svecval) && isassigned(svecval, idx)
-            return CallMeta(Const(getindex(svecval, idx)), false)
+            return CallMeta(Const(getindex(svecval, idx)), MethodResultPure())
         end
     elseif la == 2 && istopfunction(f, :typename)
-        return CallMeta(typename_static(argtypes[2]), false)
-    elseif max_methods > 1 && istopfunction(f, :copyto!)
-        max_methods = 1
+        return CallMeta(typename_static(argtypes[2]), MethodResultPure())
     elseif la == 3 && istopfunction(f, :typejoin)
-        val = pure_eval_call(f, argtypes)
-        return CallMeta(val === false ? Type : val, MethodResultPure())
+        if is_all_const_arg(arginfo)
+            val = _pure_eval_call(f, arginfo)
+            return CallMeta(val === nothing ? Type : val, MethodResultPure())
+        end
     end
     atype = argtypes_to_type(argtypes)
-    return abstract_call_gf_by_type(interp, f, argtypes, atype, sv, max_methods)
+    return abstract_call_gf_by_type(interp, f, arginfo, atype, sv, max_methods)
+end
+
+function abstract_call_opaque_closure(interp::AbstractInterpreter, closure::PartialOpaque, arginfo::ArgInfo, sv::InferenceState)
+    sig = argtypes_to_type(arginfo.argtypes)
+    (; rt, edge) = result = abstract_call_method(interp, closure.source, sig, Core.svec(), false, sv)
+    edge !== nothing && add_backedge!(edge, sv)
+    tt = closure.typ
+    sigT = (unwrap_unionall(tt)::DataType).parameters[1]
+    match = MethodMatch(sig, Core.svec(), closure.source, sig <: rewrap_unionall(sigT, tt))
+    const_result = nothing
+    if !result.edgecycle
+        const_call_result = abstract_call_method_with_const_args(interp, result,
+            nothing, arginfo, match, sv)
+        if const_call_result !== nothing
+            if const_call_result.rt ⊑ rt
+                (; rt, const_result) = const_call_result
+            end
+        end
+    end
+    info = OpaqueClosureCallInfo(match, const_result)
+    return CallMeta(from_interprocedural!(rt, sv, arginfo, match.spec_types), info)
+end
+
+function most_general_argtypes(closure::PartialOpaque)
+    ret = Any[]
+    cc = widenconst(closure)
+    argt = (unwrap_unionall(cc)::DataType).parameters[1]
+    if !isa(argt, DataType) || argt.name !== typename(Tuple)
+        argt = Tuple
+    end
+    return most_general_argtypes(closure.source, argt, false)
 end
 
 # call where the function is any lattice element
-function abstract_call(interp::AbstractInterpreter, fargs::Union{Nothing,Vector{Any}}, argtypes::Vector{Any},
-                       sv::InferenceState, max_methods::Int = InferenceParams(interp).MAX_METHODS)
-    #print("call ", e.args[1], argtypes, "\n\n")
+function abstract_call(interp::AbstractInterpreter, arginfo::ArgInfo,
+                       sv::InferenceState, max_methods::Union{Int, Nothing} = nothing)
+    argtypes = arginfo.argtypes
     ft = argtypes[1]
-    if isa(ft, Const)
-        f = ft.val
-    elseif isconstType(ft)
-        f = ft.parameters[1]
-    elseif isa(ft, DataType) && isdefined(ft, :instance)
-        f = ft.instance
-    else
+    f = singleton_type(ft)
+    if isa(ft, PartialOpaque)
+        newargtypes = copy(argtypes)
+        newargtypes[1] = ft.env
+        tristate_merge!(sv, Effects()) # TODO
+        return abstract_call_opaque_closure(interp, ft, ArgInfo(arginfo.fargs, newargtypes), sv)
+    elseif (uft = unwrap_unionall(widenconst(ft)); isa(uft, DataType) && uft.name === typename(Core.OpaqueClosure))
+        tristate_merge!(sv, Effects()) # TODO
+        return CallMeta(rewrap_unionall((uft::DataType).parameters[2], widenconst(ft)), false)
+    elseif f === nothing
         # non-constant function, but the number of arguments is known
         # and the ft is not a Builtin or IntrinsicFunction
-        if typeintersect(widenconst(ft), Builtin) != Union{}
+        if hasintersect(widenconst(ft), Union{Builtin, Core.OpaqueClosure})
+            tristate_merge!(sv, Effects())
             add_remark!(interp, sv, "Could not identify method table for call")
             return CallMeta(Any, false)
         end
-        return abstract_call_gf_by_type(interp, nothing, argtypes, argtypes_to_type(argtypes), sv, max_methods)
+        max_methods = max_methods === nothing ? get_max_methods(sv.mod, interp) : max_methods
+        return abstract_call_gf_by_type(interp, nothing, arginfo, argtypes_to_type(argtypes), sv, max_methods)
     end
-    return abstract_call_known(interp, f, fargs, argtypes, sv, max_methods)
+    max_methods = max_methods === nothing ? get_max_methods(f, sv.mod, interp) : max_methods
+    return abstract_call_known(interp, f, arginfo, sv, max_methods)
 end
 
 function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
@@ -1045,10 +1785,11 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
         spsig = linfo.def.sig
         if isa(spsig, UnionAll)
             if !isempty(linfo.sparam_vals)
-                env = pointer_from_objref(linfo.sparam_vals) + sizeof(Ptr{Cvoid})
-                T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, env)
+                sparam_vals = Any[isvarargtype(v) ? TypeVar(:N, Union{}, Any) :
+                                  v for v in  linfo.sparam_vals]
+                T = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), T, spsig, sparam_vals)
                 isref && isreturn && T === Any && return Bottom # catch invalid return Ref{T} where T = Any
-                for v in linfo.sparam_vals
+                for v in sparam_vals
                     if isa(v, TypeVar)
                         T = UnionAll(v, T)
                     end
@@ -1058,10 +1799,7 @@ function sp_type_rewrap(@nospecialize(T), linfo::MethodInstance, isreturn::Bool)
             end
         end
     end
-    while isa(T, TypeVar)
-        T = T.ub
-    end
-    return T
+    return unwraptv(T)
 end
 
 function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::VarTable, sv::InferenceState)
@@ -1072,13 +1810,13 @@ function abstract_eval_cfunction(interp::AbstractInterpreter, e::Expr, vtypes::V
     # this may be the wrong world for the call,
     # but some of the result is likely to be valid anyways
     # and that may help generate better codegen
-    abstract_call(interp, nothing, at, sv)
+    abstract_call(interp, ArgInfo(nothing, at), sv)
     nothing
 end
 
 function abstract_eval_value_expr(interp::AbstractInterpreter, e::Expr, vtypes::VarTable, sv::InferenceState)
     if e.head === :static_parameter
-        n = e.args[1]
+        n = e.args[1]::Int
         t = Any
         if 1 <= n <= length(sv.sptypes)
             t = sv.sptypes[n]
@@ -1093,13 +1831,13 @@ end
 
 function abstract_eval_special_value(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
     if isa(e, QuoteNode)
-        return Const((e::QuoteNode).value)
+        return Const(e.value)
     elseif isa(e, SSAValue)
-        return abstract_eval_ssavalue(e::SSAValue, sv.src)
-    elseif isa(e, Slot)
+        return abstract_eval_ssavalue(e, sv)
+    elseif isa(e, SlotNumber) || isa(e, Argument)
         return vtypes[slot_id(e)].typ
     elseif isa(e, GlobalRef)
-        return abstract_eval_global(e.mod, e.name)
+        return abstract_eval_global(e.mod, e.name, sv)
     end
 
     return Const(e)
@@ -1109,80 +1847,139 @@ function abstract_eval_value(interp::AbstractInterpreter, @nospecialize(e), vtyp
     if isa(e, Expr)
         return abstract_eval_value_expr(interp, e, vtypes, sv)
     else
-        return abstract_eval_special_value(interp, e, vtypes, sv)
+        typ = abstract_eval_special_value(interp, e, vtypes, sv)
+        return collect_limitations!(typ, sv)
+    end
+end
+
+function collect_argtypes(interp::AbstractInterpreter, ea::Vector{Any}, vtypes::VarTable, sv::InferenceState)
+    n = length(ea)
+    argtypes = Vector{Any}(undef, n)
+    @inbounds for i = 1:n
+        ai = abstract_eval_value(interp, ea[i], vtypes, sv)
+        if ai === Bottom
+            return nothing
+        end
+        argtypes[i] = ai
     end
+    return argtypes
 end
 
 function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), vtypes::VarTable, sv::InferenceState)
     if !isa(e, Expr)
+        if isa(e, PhiNode)
+            rt = Union{}
+            for val in e.values
+                rt = tmerge(rt, abstract_eval_special_value(interp, val, vtypes, sv))
+            end
+            return rt
+        end
         return abstract_eval_special_value(interp, e, vtypes, sv)
     end
     e = e::Expr
-    if e.head === :call
+    ehead = e.head
+    if ehead === :call
         ea = e.args
-        n = length(ea)
-        argtypes = Vector{Any}(undef, n)
-        @inbounds for i = 1:n
-            ai = abstract_eval_value(interp, ea[i], vtypes, sv)
-            if ai === Bottom
-                return Bottom
-            end
-            argtypes[i] = ai
-        end
-        callinfo = abstract_call(interp, ea, argtypes, sv)
-        sv.stmt_info[sv.currpc] = callinfo.info
-        t = callinfo.rt
-    elseif e.head === :new
-        t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
-        if isconcretetype(t) && !t.mutable
-            args = Vector{Any}(undef, length(e.args)-1)
-            ats = Vector{Any}(undef, length(e.args)-1)
-            anyconst = false
-            allconst = true
+        argtypes = collect_argtypes(interp, ea, vtypes, sv)
+        if argtypes === nothing
+            t = Bottom
+        else
+            callinfo = abstract_call(interp, ArgInfo(ea, argtypes), sv)
+            sv.stmt_info[sv.currpc] = callinfo.info
+            t = callinfo.rt
+        end
+    elseif ehead === :new
+        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
+        is_nothrow = true
+        if isconcretedispatch(t)
+            fcount = fieldcount(t)
+            nargs = length(e.args) - 1
+            is_nothrow && (is_nothrow = fcount ≥ nargs)
+            ats = Vector{Any}(undef, nargs)
+            local anyrefine = false
+            local allconst = true
             for i = 2:length(e.args)
-                at = abstract_eval_value(interp, e.args[i], vtypes, sv)
-                if !anyconst
-                    anyconst = has_nontrivial_const_info(at)
-                end
-                ats[i-1] = at
+                at = widenconditional(abstract_eval_value(interp, e.args[i], vtypes, sv))
+                ft = fieldtype(t, i-1)
+                is_nothrow && (is_nothrow = at ⊑ ft)
+                at = tmeet(at, ft)
                 if at === Bottom
                     t = Bottom
-                    allconst = anyconst = false
-                    break
-                elseif at isa Const
-                    if !(at.val isa fieldtype(t, i - 1))
-                        t = Bottom
-                        allconst = anyconst = false
-                        break
-                    end
-                    args[i-1] = at.val
-                else
+                    tristate_merge!(sv, Effects(EFFECTS_TOTAL;
+                        # consistent = ALWAYS_TRUE, # N.B depends on !ismutabletype(t) above
+                        nothrow = TRISTATE_UNKNOWN))
+                    @goto t_computed
+                elseif !isa(at, Const)
                     allconst = false
                 end
+                if !anyrefine
+                    anyrefine = has_nontrivial_const_info(at) || # constant information
+                                at ⋤ ft                          # just a type-level information, but more precise than the declared type
+                end
+                ats[i-1] = at
             end
-            # For now, don't allow partially initialized Const/PartialStruct
-            if t !== Bottom && fieldcount(t) == length(ats)
+            # For now, don't allow:
+            # - Const/PartialStruct of mutables
+            # - partially initialized Const/PartialStruct
+            if !ismutabletype(t) && fcount == nargs
                 if allconst
-                    t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, args, length(args)))
-                elseif anyconst
+                    argvals = Vector{Any}(undef, nargs)
+                    for j in 1:nargs
+                        argvals[j] = (ats[j]::Const).val
+                    end
+                    t = Const(ccall(:jl_new_structv, Any, (Any, Ptr{Cvoid}, UInt32), t, argvals, nargs))
+                elseif anyrefine
                     t = PartialStruct(t, ats)
                 end
             end
-        end
-    elseif e.head === :splatnew
-        t = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))[1]
-        if length(e.args) == 2 && isconcretetype(t) && !t.mutable
+        else
+            is_nothrow = false
+        end
+        tristate_merge!(sv, Effects(EFFECTS_TOTAL;
+            consistent = !ismutabletype(t) ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+            nothrow = is_nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN))
+    elseif ehead === :splatnew
+        t, isexact = instanceof_tfunc(abstract_eval_value(interp, e.args[1], vtypes, sv))
+        is_nothrow = false # TODO: More precision
+        if length(e.args) == 2 && isconcretetype(t) && !ismutabletype(t)
             at = abstract_eval_value(interp, e.args[2], vtypes, sv)
             n = fieldcount(t)
-            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val) &&
-                let t = t, at = at; _all(i->at.val[i] isa fieldtype(t, i), 1:n); end
+            if isa(at, Const) && isa(at.val, Tuple) && n == length(at.val::Tuple) &&
+                let t = t, at = at; _all(i->getfield(at.val::Tuple, i) isa fieldtype(t, i), 1:n); end
+                is_nothrow = isexact && isconcretedispatch(t)
                 t = Const(ccall(:jl_new_structt, Any, (Any, Any), t, at.val))
-            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields) &&
-                let t = t, at = at; _all(i->at.fields[i] ⊑ fieldtype(t, i), 1:n); end
-                t = PartialStruct(t, at.fields)
+            elseif isa(at, PartialStruct) && at ⊑ Tuple && n == length(at.fields::Vector{Any}) &&
+                let t = t, at = at; _all(i->(at.fields::Vector{Any})[i] ⊑ fieldtype(t, i), 1:n); end
+                is_nothrow = isexact && isconcretedispatch(t)
+                t = PartialStruct(t, at.fields::Vector{Any})
+            end
+        end
+        tristate_merge!(sv, Effects(EFFECTS_TOTAL;
+            consistent = ismutabletype(t) ? TRISTATE_UNKNOWN : ALWAYS_TRUE,
+            nothrow = is_nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN))
+    elseif ehead === :new_opaque_closure
+        tristate_merge!(sv, Effects()) # TODO
+        t = Union{}
+        if length(e.args) >= 4
+            ea = e.args
+            argtypes = collect_argtypes(interp, ea, vtypes, sv)
+            if argtypes === nothing
+                t = Bottom
+            else
+                t = _opaque_closure_tfunc(argtypes[1], argtypes[2], argtypes[3],
+                    argtypes[4], argtypes[5:end], sv.linfo)
+                if isa(t, PartialOpaque)
+                    # Infer this now so that the specialization is available to
+                    # optimization.
+                    argtypes = most_general_argtypes(t)
+                    pushfirst!(argtypes, t.env)
+                    callinfo = abstract_call_opaque_closure(interp, t,
+                        ArgInfo(nothing, argtypes), sv)
+                    sv.stmt_info[sv.currpc] = OpaqueClosureCreateInfo(callinfo)
+                end
             end
         end
-    elseif e.head === :foreigncall
+    elseif ehead === :foreigncall
         abstract_eval_value(interp, e.args[1], vtypes, sv)
         t = sp_type_rewrap(e.args[2], sv.linfo, true)
         for i = 3:length(e.args)
@@ -1190,24 +1987,41 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Bottom
             end
         end
-    elseif e.head === :cfunction
+        cconv = e.args[5]
+        if isa(cconv, QuoteNode) && (v = cconv.value; isa(v, Tuple{Symbol, UInt8}))
+            effects = v[2]
+            effects = decode_effects_override(effects)
+            tristate_merge!(sv, Effects(
+                effects.consistent ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+                effects.effect_free ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+                effects.nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+                effects.terminates_globally ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+                #=nonoverlayed=#true
+            ))
+        else
+            tristate_merge!(sv, EFFECTS_UNKNOWN)
+        end
+    elseif ehead === :cfunction
+        tristate_merge!(sv, EFFECTS_UNKNOWN)
         t = e.args[1]
         isa(t, Type) || (t = Any)
         abstract_eval_cfunction(interp, e, vtypes, sv)
-    elseif e.head === :method
+    elseif ehead === :method
+        tristate_merge!(sv, EFFECTS_UNKNOWN)
         t = (length(e.args) == 1) ? Any : Nothing
-    elseif e.head === :copyast
+    elseif ehead === :copyast
+        tristate_merge!(sv, EFFECTS_UNKNOWN)
         t = abstract_eval_value(interp, e.args[1], vtypes, sv)
         if t isa Const && t.val isa Expr
             # `copyast` makes copies of Exprs
             t = Expr
         end
-    elseif e.head === :invoke
+    elseif ehead === :invoke || ehead === :invoke_modify
         error("type inference data-flow error: tried to double infer a function")
-    elseif e.head === :isdefined
+    elseif ehead === :isdefined
         sym = e.args[1]
         t = Bool
-        if isa(sym, Slot)
+        if isa(sym, SlotNumber)
             vtyp = vtypes[slot_id(sym)]
             if vtyp.typ === Bottom
                 t = Const(false) # never assigned previously
@@ -1223,7 +2037,7 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
                 t = Const(true)
             end
         elseif isa(sym, Expr) && sym.head === :static_parameter
-            n = sym.args[1]
+            n = sym.args[1]::Int
             if 1 <= n <= length(sv.sptypes)
                 spty = sv.sptypes[n]
                 if isa(spty, Const)
@@ -1232,211 +2046,371 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e),
             end
         end
     else
-        return abstract_eval_value_expr(interp, e, vtypes, sv)
+        t = abstract_eval_value_expr(interp, e, vtypes, sv)
     end
-    @assert !isa(t, TypeVar)
+    @label t_computed
+    @assert !isa(t, TypeVar) "unhandled TypeVar"
     if isa(t, DataType) && isdefined(t, :instance)
         # replace singleton types with their equivalent Const object
         t = Const(t.instance)
     end
+    if !isempty(sv.pclimitations)
+        if t isa Const || t === Union{}
+            empty!(sv.pclimitations)
+        else
+            t = LimitedAccuracy(t, sv.pclimitations)
+            sv.pclimitations = IdSet{InferenceState}()
+        end
+    end
     return t
 end
 
 function abstract_eval_global(M::Module, s::Symbol)
-    if isdefined(M,s) && isconst(M,s)
-        return Const(getfield(M,s))
+    if isdefined(M, s) && isconst(M, s)
+        return Const(getglobal(M, s))
     end
-    return Any
+    ty = ccall(:jl_binding_type, Any, (Any, Any), M, s)
+    ty === nothing && return Any
+    return ty
+end
+
+function abstract_eval_global(M::Module, s::Symbol, frame::InferenceState)
+    ty = abstract_eval_global(M, s)
+    isa(ty, Const) && return ty
+    if isdefined(M,s)
+        tristate_merge!(frame, Effects(EFFECTS_TOTAL; consistent=TRISTATE_UNKNOWN))
+    else
+        tristate_merge!(frame, Effects(EFFECTS_TOTAL;
+            consistent=TRISTATE_UNKNOWN,
+            nothrow=TRISTATE_UNKNOWN))
+    end
+    return ty
 end
 
+abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.src)
 function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo)
-    typ = src.ssavaluetypes[s.id]
+    typ = (src.ssavaluetypes::Vector{Any})[s.id]
     if typ === NOT_FOUND
         return Bottom
     end
     return typ
 end
 
+function widenreturn(@nospecialize(rt), @nospecialize(bestguess), nargs::Int, slottypes::Vector{Any}, changes::VarTable)
+    if !(bestguess ⊑ Bool) || bestguess === Bool
+        # give up inter-procedural constraint back-propagation
+        # when tmerge would widen the result anyways (as an optimization)
+        rt = widenconditional(rt)
+    else
+        if isa(rt, Conditional)
+            id = slot_id(rt.var)
+            if 1 ≤ id ≤ nargs
+                old_id_type = widenconditional(slottypes[id]) # same as `(states[1]::VarTable)[id].typ`
+                if (!(rt.vtype ⊑ old_id_type) || old_id_type ⊑ rt.vtype) &&
+                   (!(rt.elsetype ⊑ old_id_type) || old_id_type ⊑ rt.elsetype)
+                   # discard this `Conditional` since it imposes
+                   # no new constraint on the argument type
+                   # (the caller will recreate it if needed)
+                   rt = widenconditional(rt)
+               end
+            else
+                # discard this `Conditional` imposed on non-call arguments,
+                # since it's not interesting in inter-procedural context;
+                # we may give constraints on other call argument
+                rt = widenconditional(rt)
+            end
+        end
+        if isa(rt, Conditional)
+            rt = InterConditional(slot_id(rt.var), rt.vtype, rt.elsetype)
+        elseif is_lattice_bool(rt)
+            if isa(bestguess, InterConditional)
+                # if the bestguess so far is already `Conditional`, try to convert
+                # this `rt` into `Conditional` on the slot to avoid overapproximation
+                # due to conflict of different slots
+                rt = bool_rt_to_conditional(rt, slottypes, changes, bestguess.slot)
+            else
+                # pick up the first "interesting" slot, convert `rt` to its `Conditional`
+                # TODO: ideally we want `Conditional` and `InterConditional` to convey
+                # constraints on multiple slots
+                for slot_id in 1:nargs
+                    rt = bool_rt_to_conditional(rt, slottypes, changes, slot_id)
+                    rt isa InterConditional && break
+                end
+            end
+        end
+    end
+
+    # only propagate information we know we can store
+    # and is valid and good inter-procedurally
+    isa(rt, Conditional) && return InterConditional(slot_id(rt.var), rt.vtype, rt.elsetype)
+    isa(rt, InterConditional) && return rt
+    return widenreturn_noconditional(rt)
+end
+
+function widenreturn_noconditional(@nospecialize(rt))
+    isa(rt, Const) && return rt
+    isa(rt, Type) && return rt
+    if isa(rt, PartialStruct)
+        fields = copy(rt.fields)
+        local anyrefine = false
+        for i in 1:length(fields)
+            a = fields[i]
+            a = isvarargtype(a) ? a : widenreturn_noconditional(widenconditional(a))
+            if !anyrefine
+                # TODO: consider adding && const_prop_profitable(a) here?
+                anyrefine = has_const_info(a) ||
+                            a ⊏ fieldtype(rt.typ, i)
+            end
+            fields[i] = a
+        end
+        anyrefine && return PartialStruct(rt.typ, fields)
+    end
+    if isa(rt, PartialOpaque)
+        return rt # XXX: this case was missed in #39512
+    end
+    return widenconst(rt)
+end
+
+
+function handle_control_backedge!(frame::InferenceState, from::Int, to::Int)
+    if from > to
+        if is_effect_overridden(frame, :terminates_globally)
+            # this frame is known to terminate
+        elseif is_effect_overridden(frame, :terminates_locally)
+            # this backedge is known to terminate
+        else
+            tristate_merge!(frame, Effects(EFFECTS_TOTAL; terminates=TRISTATE_UNKNOWN))
+        end
+    end
+    return nothing
+end
+
 # make as much progress on `frame` as possible (without handling cycles)
 function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
     @assert !frame.inferred
     frame.dont_work_on_me = true # mark that this function is currently on the stack
     W = frame.ip
-    s = frame.stmt_types
-    n = frame.nstmts
-    while frame.pc´´ <= n
+    states = frame.stmt_types
+    def = frame.linfo.def
+    isva = isa(def, Method) && def.isva
+    nargs = length(frame.result.argtypes) - isva
+    slottypes = frame.slottypes
+    ssavaluetypes = frame.src.ssavaluetypes::Vector{Any}
+    while !isempty(W)
         # make progress on the active ip set
-        local pc::Int = frame.pc´´ # current program-counter
-        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
-            #print(pc,": ",s[pc],"\n")
-            local pc´::Int = pc + 1 # next program-counter (after executing instruction)
-            if pc == frame.pc´´
-                # need to update pc´´ to point at the new lowest instruction in W
-                min_pc = _bits_findnext(W.bits, pc + 1)
-                frame.pc´´ = min_pc == -1 ? n + 1 : min_pc
-            end
-            delete!(W, pc)
-            frame.currpc = pc
-            frame.cur_hand = frame.handler_at[pc]
-            frame.stmt_edges[pc] === nothing || empty!(frame.stmt_edges[pc])
-            stmt = frame.src.code[pc]
-            changes = s[pc]::VarTable
-            t = nothing
-
-            hd = isa(stmt, Expr) ? stmt.head : nothing
-
-            if isa(stmt, NewvarNode)
-                sn = slot_id(stmt.slot)
-                changes[sn] = VarState(Bottom, true)
-            elseif isa(stmt, GotoNode)
-                pc´ = (stmt::GotoNode).label
-            elseif isa(stmt, GotoIfNot)
-                condt = abstract_eval_value(interp, stmt.cond, s[pc], frame)
-                if condt === Bottom
-                    break
-                end
-                condval = maybe_extract_const_bool(condt)
-                l = stmt.dest::Int
-                # constant conditions
-                if condval === true
-                elseif condval === false
-                    pc´ = l
-                else
-                    # general case
-                    frame.handler_at[l] = frame.cur_hand
-                    changes_else = changes
-                    if isa(condt, Conditional)
-                        if condt.elsetype !== Any && condt.elsetype !== changes[slot_id(condt.var)]
-                            changes_else = StateUpdate(condt.var, VarState(condt.elsetype, false), changes_else)
-                        end
-                        if condt.vtype !== Any && condt.vtype !== changes[slot_id(condt.var)]
-                            changes = StateUpdate(condt.var, VarState(condt.vtype, false), changes)
-                        end
-                    end
-                    newstate_else = stupdate!(s[l], changes_else)
-                    if newstate_else !== false
-                        # add else branch to active IP list
-                        if l < frame.pc´´
-                            frame.pc´´ = l
-                        end
-                        push!(W, l)
-                        s[l] = newstate_else
-                    end
+        local pc::Int = popfirst!(W)
+        local pc´::Int = pc + 1 # next program-counter (after executing instruction)
+        frame.currpc = pc
+        edges = frame.stmt_edges[pc]
+        edges === nothing || empty!(edges)
+        frame.stmt_info[pc] = nothing
+        stmt = frame.src.code[pc]
+        changes = states[pc]::VarTable
+        t = nothing
+
+        hd = isa(stmt, Expr) ? stmt.head : nothing
+
+        if isa(stmt, NewvarNode)
+            sn = slot_id(stmt.slot)
+            changes[sn] = VarState(Bottom, true)
+        elseif isa(stmt, GotoNode)
+            l = (stmt::GotoNode).label
+            handle_control_backedge!(frame, pc, l)
+            pc´ = l
+        elseif isa(stmt, GotoIfNot)
+            condx = stmt.cond
+            condt = abstract_eval_value(interp, condx, changes, frame)
+            if condt === Bottom
+                empty!(frame.pclimitations)
+                continue
+            end
+            if !(isa(condt, Const) || isa(condt, Conditional)) && isa(condx, SlotNumber)
+                # if this non-`Conditional` object is a slot, we form and propagate
+                # the conditional constraint on it
+                condt = Conditional(condx, Const(true), Const(false))
+            end
+            condval = maybe_extract_const_bool(condt)
+            l = stmt.dest::Int
+            if !isempty(frame.pclimitations)
+                # we can't model the possible effect of control
+                # dependencies on the return value, so we propagate it
+                # directly to all the return values (unless we error first)
+                condval isa Bool || union!(frame.limitations, frame.pclimitations)
+                empty!(frame.pclimitations)
+            end
+            # constant conditions
+            if condval === true
+            elseif condval === false
+                handle_control_backedge!(frame, pc, l)
+                pc´ = l
+            else
+                # general case
+                changes_else = changes
+                if isa(condt, Conditional)
+                    changes_else = conditional_changes(changes_else, condt.elsetype, condt.var)
+                    changes      = conditional_changes(changes,      condt.vtype,    condt.var)
                 end
-            elseif isa(stmt, ReturnNode)
-                pc´ = n + 1
-                rt = widenconditional(abstract_eval_value(interp, stmt.val, s[pc], frame))
-                if !isa(rt, Const) && !isa(rt, Type) && !isa(rt, PartialStruct)
-                    # only propagate information we know we can store
-                    # and is valid inter-procedurally
-                    rt = widenconst(rt)
+                newstate_else = stupdate!(states[l], changes_else)
+                if newstate_else !== nothing
+                    handle_control_backedge!(frame, pc, l)
+                    # add else branch to active IP list
+                    push!(W, l)
+                    states[l] = newstate_else
                 end
-                if tchanged(rt, frame.bestguess)
-                    # new (wider) return type for frame
-                    frame.bestguess = tmerge(frame.bestguess, rt)
-                    for (caller, caller_pc) in frame.cycle_backedges
-                        # notify backedges of updated type information
-                        typeassert(caller.stmt_types[caller_pc], VarTable) # we must have visited this statement before
-                        if !(caller.src.ssavaluetypes[caller_pc] === Any)
-                            # no reason to revisit if that call-site doesn't affect the final result
-                            if caller_pc < caller.pc´´
-                                caller.pc´´ = caller_pc
-                            end
-                            push!(caller.ip, caller_pc)
-                        end
+            end
+        elseif isa(stmt, ReturnNode)
+            bestguess = frame.bestguess
+            rt = abstract_eval_value(interp, stmt.val, changes, frame)
+            rt = widenreturn(rt, bestguess, nargs, slottypes, changes)
+            # narrow representation of bestguess slightly to prepare for tmerge with rt
+            if rt isa InterConditional && bestguess isa Const
+                let slot_id = rt.slot
+                    old_id_type = slottypes[slot_id]
+                    if bestguess.val === true && rt.elsetype !== Bottom
+                        bestguess = InterConditional(slot_id, old_id_type, Bottom)
+                    elseif bestguess.val === false && rt.vtype !== Bottom
+                        bestguess = InterConditional(slot_id, Bottom, old_id_type)
                     end
                 end
-            elseif hd === :enter
-                l = stmt.args[1]::Int
-                frame.cur_hand = Pair{Any,Any}(l, frame.cur_hand)
-                # propagate type info to exception handler
-                old = s[l]
-                new = s[pc]::Array{Any,1}
-                newstate_catch = stupdate!(old, new)
-                if newstate_catch !== false
-                    if l < frame.pc´´
-                        frame.pc´´ = l
+            end
+            # copy limitations to return value
+            if !isempty(frame.pclimitations)
+                union!(frame.limitations, frame.pclimitations)
+                empty!(frame.pclimitations)
+            end
+            if !isempty(frame.limitations)
+                rt = LimitedAccuracy(rt, copy(frame.limitations))
+            end
+            if tchanged(rt, bestguess)
+                # new (wider) return type for frame
+                bestguess = tmerge(bestguess, rt)
+                # TODO: if bestguess isa InterConditional && !interesting(bestguess); bestguess = widenconditional(bestguess); end
+                frame.bestguess = bestguess
+                for (caller, caller_pc) in frame.cycle_backedges
+                    # notify backedges of updated type information
+                    typeassert(caller.stmt_types[caller_pc], VarTable) # we must have visited this statement before
+                    if !((caller.src.ssavaluetypes::Vector{Any})[caller_pc] === Any)
+                        # no reason to revisit if that call-site doesn't affect the final result
+                        push!(caller.ip, caller_pc)
                     end
-                    push!(W, l)
-                    s[l] = newstate_catch
                 end
-                typeassert(s[l], VarTable)
-                frame.handler_at[l] = frame.cur_hand
-            elseif hd === :leave
-                for i = 1:((stmt.args[1])::Int)
-                    frame.cur_hand = (frame.cur_hand::Pair{Any,Any}).second
+            end
+            continue
+        elseif hd === :enter
+            stmt = stmt::Expr
+            l = stmt.args[1]::Int
+            # propagate type info to exception handler
+            old = states[l]
+            newstate_catch = stupdate!(old, changes)
+            if newstate_catch !== nothing
+                push!(W, l)
+                states[l] = newstate_catch
+            end
+            typeassert(states[l], VarTable)
+        elseif hd === :leave
+        else
+            if hd === :(=)
+                stmt = stmt::Expr
+                t = abstract_eval_statement(interp, stmt.args[2], changes, frame)
+                if t === Bottom
+                    continue
+                end
+                ssavaluetypes[pc] = t
+                lhs = stmt.args[1]
+                if isa(lhs, SlotNumber)
+                    changes = StateUpdate(lhs, VarState(t, false), changes, false)
+                elseif isa(lhs, GlobalRef)
+                    tristate_merge!(frame, Effects(EFFECTS_TOTAL,
+                        effect_free=TRISTATE_UNKNOWN,
+                        nothrow=TRISTATE_UNKNOWN))
+                elseif !isa(lhs, SSAValue)
+                    tristate_merge!(frame, EFFECTS_UNKNOWN)
+                end
+            elseif hd === :method
+                stmt = stmt::Expr
+                fname = stmt.args[1]
+                if isa(fname, SlotNumber)
+                    changes = StateUpdate(fname, VarState(Any, false), changes, false)
                 end
+            elseif hd === :code_coverage_effect ||
+                    (hd !== :boundscheck && # :boundscheck can be narrowed to Bool
+                    hd !== nothing && is_meta_expr_head(hd))
+                # these do not generate code
             else
-                if hd === :(=)
-                    t = abstract_eval_statement(interp, stmt.args[2], changes, frame)
-                    t === Bottom && break
-                    frame.src.ssavaluetypes[pc] = t
-                    lhs = stmt.args[1]
-                    if isa(lhs, Slot)
-                        changes = StateUpdate(lhs, VarState(t, false), changes)
-                    end
-                elseif hd === :method
-                    fname = stmt.args[1]
-                    if isa(fname, Slot)
-                        changes = StateUpdate(fname, VarState(Any, false), changes)
-                    end
-                elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect
-                    # these do not generate code
+                t = abstract_eval_statement(interp, stmt, changes, frame)
+                if t === Bottom
+                    continue
+                end
+                if !isempty(frame.ssavalue_uses[pc])
+                    record_ssa_assign(pc, t, frame)
                 else
-                    t = abstract_eval_statement(interp, stmt, changes, frame)
-                    t === Bottom && break
-                    if !isempty(frame.ssavalue_uses[pc])
-                        record_ssa_assign(pc, t, frame)
-                    else
-                        frame.src.ssavaluetypes[pc] = t
-                    end
+                    ssavaluetypes[pc] = t
                 end
-                if frame.cur_hand !== nothing && isa(changes, StateUpdate)
-                    # propagate new type info to exception handler
-                    # the handling for Expr(:enter) propagates all changes from before the try/catch
-                    # so this only needs to propagate any changes
-                    l = frame.cur_hand.first::Int
-                    if stupdate1!(s[l]::VarTable, changes::StateUpdate) !== false
-                        if l < frame.pc´´
-                            frame.pc´´ = l
+            end
+            if isa(changes, StateUpdate)
+                let cur_hand = frame.handler_at[pc], l, enter
+                    while cur_hand != 0
+                        enter = frame.src.code[cur_hand]
+                        l = (enter::Expr).args[1]::Int
+                        # propagate new type info to exception handler
+                        # the handling for Expr(:enter) propagates all changes from before the try/catch
+                        # so this only needs to propagate any changes
+                        if stupdate1!(states[l]::VarTable, changes::StateUpdate) !== false
+                            push!(W, l)
                         end
-                        push!(W, l)
+                        cur_hand = frame.handler_at[cur_hand]
                     end
                 end
             end
+        end
 
-            if t === nothing
-                # mark other reached expressions as `Any` to indicate they don't throw
-                frame.src.ssavaluetypes[pc] = Any
-            end
+        @assert isempty(frame.pclimitations) "unhandled LimitedAccuracy"
 
-            pc´ > n && break # can't proceed with the fast-path fall-through
-            frame.handler_at[pc´] = frame.cur_hand
-            newstate = stupdate!(s[pc´], changes)
-            if isa(stmt, GotoNode) && frame.pc´´ < pc´
-                # if we are processing a goto node anyways,
-                # (such as a terminator for a loop, if-else, or try block),
-                # consider whether we should jump to an older backedge first,
-                # to try to traverse the statements in approximate dominator order
-                if newstate !== false
-                    s[pc´] = newstate
-                end
-                push!(W, pc´)
-                pc = frame.pc´´
-            elseif newstate !== false
-                s[pc´] = newstate
-                pc = pc´
-            elseif pc´ in W
-                pc = pc´
-            else
-                break
-            end
+        if t === nothing
+            # mark other reached expressions as `Any` to indicate they don't throw
+            ssavaluetypes[pc] = Any
+        end
+
+        newstate = stupdate!(states[pc´], changes)
+        if newstate !== nothing
+            states[pc´] = newstate
+            push!(W, pc´)
         end
     end
     frame.dont_work_on_me = false
     nothing
 end
 
+function conditional_changes(changes::VarTable, @nospecialize(typ), var::SlotNumber)
+    oldtyp = changes[slot_id(var)].typ
+    # approximate test for `typ ∩ oldtyp` being better than `oldtyp`
+    # since we probably formed these types with `typesubstract`, the comparison is likely simple
+    if ignorelimited(typ) ⊑ ignorelimited(oldtyp)
+        # typ is better unlimited, but we may still need to compute the tmeet with the limit "causes" since we ignored those in the comparison
+        oldtyp isa LimitedAccuracy && (typ = tmerge(typ, LimitedAccuracy(Bottom, oldtyp.causes)))
+        return StateUpdate(var, VarState(typ, false), changes, true)
+    end
+    return changes
+end
+
+function bool_rt_to_conditional(@nospecialize(rt), slottypes::Vector{Any}, state::VarTable, slot_id::Int)
+    old = slottypes[slot_id]
+    new = widenconditional(state[slot_id].typ) # avoid nested conditional
+    if new ⊑ old && !(old ⊑ new)
+        if isa(rt, Const)
+            val = rt.val
+            if val === true
+                return InterConditional(slot_id, new, Bottom)
+            elseif val === false
+                return InterConditional(slot_id, Bottom, new)
+            end
+        elseif rt === Bool
+            return InterConditional(slot_id, new, new)
+        end
+    end
+    return rt
+end
+
 # make as much progress on `frame` as possible (by handling cycles)
 function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
     typeinf_local(interp, frame)
@@ -1447,7 +2421,7 @@ function typeinf_nocycle(interp::AbstractInterpreter, frame::InferenceState)
         no_active_ips_in_callers = true
         for caller in frame.callers_in_cycle
             caller.dont_work_on_me && return false # cycle is above us on the stack
-            if caller.pc´´ <= caller.nstmts # equivalent to `isempty(caller.ip)`
+            if !isempty(caller.ip)
                 # Note that `typeinf_local(interp, caller)` can potentially modify the other frames
                 # `frame.callers_in_cycle`, which is why making incremental progress requires the
                 # outer while loop.
diff --git a/base/compiler/bootstrap.jl b/base/compiler/bootstrap.jl
index f351429aff7eb7..f335cf31a8467b 100644
--- a/base/compiler/bootstrap.jl
+++ b/base/compiler/bootstrap.jl
@@ -5,10 +5,21 @@
 # especially try to make sure any recursive and leaf functions have concrete signatures,
 # since we won't be able to specialize & infer them at runtime
 
-let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
-    world = get_world_counter(),
+time() = ccall(:jl_clock_now, Float64, ())
+
+let
+    world = get_world_counter()
     interp = NativeInterpreter(world)
 
+    analyze_escapes_tt = Tuple{typeof(analyze_escapes), IRCode, Int, Bool, typeof(null_escape_cache)}
+    fs = Any[
+        # we first create caches for the optimizer, because they contain many loop constructions
+        # and they're better to not run in interpreter even during bootstrapping
+        #=analyze_escapes_tt,=# run_passes,
+        # then we create caches for inference entries
+        typeinf_ext, typeinf, typeinf_edge,
+    ]
+    # tfuncs can't be inferred from the inference entries above, so here we infer them manually
     for x in T_FFUNC_VAL
         push!(fs, x[3])
     end
@@ -20,16 +31,22 @@ let fs = Any[typeinf_ext, typeinf, typeinf_edge, pure_eval_call, run_passes],
             println(stderr, "WARNING: tfunc missing for ", reinterpret(IntrinsicFunction, Int32(i)))
         end
     end
+    starttime = time()
     for f in fs
-        for m in _methods_by_ftype(Tuple{typeof(f), Vararg{Any}}, 10, typemax(UInt))
+        if isa(f, DataType) && f.name === typename(Tuple)
+            tt = f
+        else
+            tt = Tuple{typeof(f), Vararg{Any}}
+        end
+        for m in _methods_by_ftype(tt, 10, typemax(UInt))
             # remove any TypeVars from the intersection
             typ = Any[m.spec_types.parameters...]
             for i = 1:length(typ)
-                if isa(typ[i], TypeVar)
-                    typ[i] = typ[i].ub
-                end
+                typ[i] = unwraptv(typ[i])
             end
             typeinf_type(interp, m.method, Tuple{typ...}, m.sparams)
         end
     end
+    endtime = time()
+    println("Core.Compiler ──── ", sub_float(endtime,starttime), " seconds")
 end
diff --git a/base/compiler/cicache.jl b/base/compiler/cicache.jl
index 9adaf6ded0b0fd..294b1f0055f790 100644
--- a/base/compiler/cicache.jl
+++ b/base/compiler/cicache.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 """
     struct InternalCodeCache
 
diff --git a/base/compiler/compiler.jl b/base/compiler/compiler.jl
index 986b8f6497fa37..1132b8976e53c6 100644
--- a/base/compiler/compiler.jl
+++ b/base/compiler/compiler.jl
@@ -1,15 +1,19 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-getfield(getfield(Main, :Core), :eval)(getfield(Main, :Core), :(baremodule Compiler
+getfield(Core, :eval)(Core, :(baremodule Compiler
 
 using Core.Intrinsics, Core.IR
 
 import Core: print, println, show, write, unsafe_write, stdout, stderr,
-             _apply, _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
-             MethodInstance, CodeInstance, MethodMatch
+             _apply_iterate, svec, apply_type, Builtin, IntrinsicFunction,
+             MethodInstance, CodeInstance, MethodMatch, PartialOpaque,
+             TypeofVararg
 
 const getproperty = Core.getfield
 const setproperty! = Core.setfield!
+const swapproperty! = Core.swapfield!
+const modifyproperty! = Core.modifyfield!
+const replaceproperty! = Core.replacefield!
 
 ccall(:jl_set_istopmod, Cvoid, (Any, Bool), Compiler, false)
 
@@ -19,9 +23,13 @@ eval(m, x) = Core.eval(m, x)
 include(x) = Core.include(Compiler, x)
 include(mod, x) = Core.include(mod, x)
 
-#############
-# from Base #
-#############
+# The @inline/@noinline macros that can be applied to a function declaration are not available
+# until after array.jl, and so we will mark them within a function body instead.
+macro inline()   Expr(:meta, :inline)   end
+macro noinline() Expr(:meta, :noinline) end
+
+convert(::Type{Any}, Core.@nospecialize x) = x
+convert(::Type{T}, x::T) where {T} = x
 
 # essential files and libraries
 include("essentials.jl")
@@ -50,6 +58,9 @@ include("operators.jl")
 include("pointer.jl")
 include("refvalue.jl")
 
+# the same constructor as defined in float.jl, but with a different name to avoid redefinition
+_Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
+
 # checked arithmetic
 const checked_add = +
 const checked_sub = -
@@ -62,6 +73,8 @@ add_with_overflow(x::T, y::T) where {T<:SignedInt}   = checked_sadd_int(x, y)
 add_with_overflow(x::T, y::T) where {T<:UnsignedInt} = checked_uadd_int(x, y)
 add_with_overflow(x::Bool, y::Bool) = (x+y, false)
 
+include("strings/lazy.jl")
+
 # core array operations
 include("indices.jl")
 include("array.jl")
@@ -80,14 +93,16 @@ using .Iterators: Flatten, Filter, product  # for generators
 include("namedtuple.jl")
 
 ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@_inline_meta; (f(1),))
-ntuple(f, ::Val{2}) = (@_inline_meta; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@_inline_meta; (f(1), f(2), f(3)))
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
 ntuple(f, ::Val{n}) where {n} = ntuple(f, n::Int)
 ntuple(f, n) = (Any[f(i) for i = 1:n]...,)
 
 # core docsystem
 include("docs/core.jl")
+import Core.Compiler.CoreDocs
+Core.atdoc!(CoreDocs.docm)
 
 # sorting
 function sort end
@@ -99,15 +114,18 @@ using .Order
 include("sort.jl")
 using .Sort
 
+# We don't include some.jl, but this definition is still useful.
+something(x::Nothing, y...) = something(y...)
+something(x::Any, y...) = x
+
 ############
 # compiler #
 ############
 
+include("compiler/cicache.jl")
 include("compiler/types.jl")
 include("compiler/utilities.jl")
 include("compiler/validation.jl")
-
-include("compiler/cicache.jl")
 include("compiler/methodtable.jl")
 
 include("compiler/inferenceresult.jl")
@@ -123,6 +141,21 @@ include("compiler/abstractinterpretation.jl")
 include("compiler/typeinfer.jl")
 include("compiler/optimize.jl") # TODO: break this up further + extract utilities
 
+# required for bootstrap because sort.jl uses extrema
+# to decide whether to dispatch to counting sort.
+#
+# TODO: remove it.
+function extrema(x::Array)
+    isempty(x) && throw(ArgumentError("collection must be non-empty"))
+    vmin = vmax = x[1]
+    for i in 2:length(x)
+        xi = x[i]
+        vmax = max(vmax, xi)
+        vmin = min(vmin, xi)
+    end
+    return vmin, vmax
+end
+
 include("compiler/bootstrap.jl")
 ccall(:jl_set_typeinf_func, Cvoid, (Any,), typeinf_ext_toplevel)
 
@@ -131,4 +164,3 @@ Core.eval(Core, :(_parse = Compiler.fl_parse))
 
 end # baremodule Compiler
 ))
-
diff --git a/base/compiler/inferenceresult.jl b/base/compiler/inferenceresult.jl
index 89176b54ec93b5..36702382ef6f5b 100644
--- a/base/compiler/inferenceresult.jl
+++ b/base/compiler/inferenceresult.jl
@@ -1,63 +1,116 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-const EMPTY_VECTOR = Vector{Any}()
-
 function is_argtype_match(@nospecialize(given_argtype),
                           @nospecialize(cache_argtype),
                           overridden_by_const::Bool)
-    if isa(given_argtype, Const) || isa(given_argtype, PartialStruct)
+    if is_forwardable_argtype(given_argtype)
         return is_lattice_equal(given_argtype, cache_argtype)
     end
     return !overridden_by_const
 end
 
+function is_forwardable_argtype(@nospecialize x)
+    return isa(x, Const) ||
+           isa(x, Conditional) ||
+           isa(x, PartialStruct) ||
+           isa(x, PartialOpaque)
+end
+
 # In theory, there could be a `cache` containing a matching `InferenceResult`
 # for the provided `linfo` and `given_argtypes`. The purpose of this function is
 # to return a valid value for `cache_lookup(linfo, argtypes, cache).argtypes`,
 # so that we can construct cache-correct `InferenceResult`s in the first place.
-function matching_cache_argtypes(linfo::MethodInstance, given_argtypes::Vector)
-    @assert isa(linfo.def, Method) # ensure the next line works
-    nargs::Int = linfo.def.nargs
-    @assert length(given_argtypes) >= (nargs - 1)
-    given_argtypes = anymap(widenconditional, given_argtypes)
-    if linfo.def.isva
+function matching_cache_argtypes(
+    linfo::MethodInstance, (arginfo, sv)#=::Tuple{ArgInfo,InferenceState}=#)
+    (; fargs, argtypes) = arginfo
+    def = linfo.def
+    @assert isa(def, Method) # ensure the next line works
+    nargs::Int = def.nargs
+    cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing)
+    given_argtypes = Vector{Any}(undef, length(argtypes))
+    local condargs = nothing
+    for i in 1:length(argtypes)
+        argtype = argtypes[i]
+        # forward `Conditional` if it conveys a constraint on any other argument
+        if isa(argtype, Conditional) && fargs !== nothing
+            cnd = argtype
+            slotid = find_constrained_arg(cnd, fargs, sv)
+            if slotid !== nothing
+                # using union-split signature, we may be able to narrow down `Conditional`
+                sigt = widenconst(slotid > nargs ? argtypes[slotid] : cache_argtypes[slotid])
+                vtype = tmeet(cnd.vtype, sigt)
+                elsetype = tmeet(cnd.elsetype, sigt)
+                if vtype === Bottom && elsetype === Bottom
+                    # we accidentally proved this method match is impossible
+                    # TODO bail out here immediately rather than just propagating Bottom ?
+                    given_argtypes[i] = Bottom
+                else
+                    if condargs === nothing
+                        condargs = Tuple{Int,Int}[]
+                    end
+                    push!(condargs, (slotid, i))
+                    given_argtypes[i] = Conditional(SlotNumber(slotid), vtype, elsetype)
+                end
+                continue
+            end
+        end
+        given_argtypes[i] = widenconditional(argtype)
+    end
+    isva = def.isva
+    if isva || isvarargtype(given_argtypes[end])
         isva_given_argtypes = Vector{Any}(undef, nargs)
-        for i = 1:(nargs - 1)
+        for i = 1:(nargs - isva)
             isva_given_argtypes[i] = argtype_by_index(given_argtypes, i)
         end
-        if length(given_argtypes) >= nargs || !isvarargtype(given_argtypes[end])
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[nargs:end])
-        else
-            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[end:end])
+        if isva
+            if length(given_argtypes) < nargs && isvarargtype(given_argtypes[end])
+                last = length(given_argtypes)
+            else
+                last = nargs
+            end
+            isva_given_argtypes[nargs] = tuple_tfunc(given_argtypes[last:end])
+            # invalidate `Conditional` imposed on varargs
+            if condargs !== nothing
+                for (slotid, i) in condargs
+                    if slotid ≥ last
+                        isva_given_argtypes[i] = widenconditional(isva_given_argtypes[i])
+                    end
+                end
+            end
         end
         given_argtypes = isva_given_argtypes
     end
-    cache_argtypes, overridden_by_const = matching_cache_argtypes(linfo, nothing)
-    if nargs === length(given_argtypes)
-        for i in 1:nargs
-            given_argtype = given_argtypes[i]
-            cache_argtype = cache_argtypes[i]
-            if !is_argtype_match(given_argtype, cache_argtype, overridden_by_const[i])
-                # prefer the argtype we were given over the one computed from `linfo`
-                cache_argtypes[i] = given_argtype
-                overridden_by_const[i] = true
-            end
+    @assert length(given_argtypes) == nargs
+    for i in 1:nargs
+        given_argtype = given_argtypes[i]
+        cache_argtype = cache_argtypes[i]
+        if !is_argtype_match(given_argtype, cache_argtype, false)
+            # prefer the argtype we were given over the one computed from `linfo`
+            cache_argtypes[i] = given_argtype
+            overridden_by_const[i] = true
         end
     end
     return cache_argtypes, overridden_by_const
 end
 
-function matching_cache_argtypes(linfo::MethodInstance, ::Nothing)
-    toplevel = !isa(linfo.def, Method)
-    linfo_argtypes = Any[unwrap_unionall(linfo.specTypes).parameters...]
-    nargs::Int = toplevel ? 0 : linfo.def.nargs
+function most_general_argtypes(method::Union{Method, Nothing}, @nospecialize(specTypes),
+    withfirst::Bool = true)
+    toplevel = method === nothing
+    isva = !toplevel && method.isva
+    linfo_argtypes = Any[(unwrap_unionall(specTypes)::DataType).parameters...]
+    nargs::Int = toplevel ? 0 : method.nargs
+    if !withfirst
+        # For opaque closure, the closure environment is processed elsewhere
+        nargs -= 1
+    end
     cache_argtypes = Vector{Any}(undef, nargs)
     # First, if we're dealing with a varargs method, then we set the last element of `args`
     # to the appropriate `Tuple` type or `PartialStruct` instance.
-    if !toplevel && linfo.def.isva
-        if linfo.specTypes == Tuple
+    if !toplevel && isva
+        if specTypes::Type == Tuple
             if nargs > 1
-                linfo_argtypes = svec(Any[Any for i = 1:(nargs - 1)]..., Tuple.parameters[1])
+                linfo_argtypes = Any[Any for i = 1:nargs]
+                linfo_argtypes[end] = Vararg{Any}
             end
             vargtype = Tuple
         else
@@ -65,18 +118,17 @@ function matching_cache_argtypes(linfo::MethodInstance, ::Nothing)
             if nargs > linfo_argtypes_length
                 va = linfo_argtypes[linfo_argtypes_length]
                 if isvarargtype(va)
-                    new_va = rewrap_unionall(unconstrain_vararg_length(va), linfo.specTypes)
-                    vargtype_elements = Any[new_va]
+                    new_va = rewrap_unionall(unconstrain_vararg_length(va), specTypes)
                     vargtype = Tuple{new_va}
                 else
-                    vargtype_elements = Any[]
                     vargtype = Tuple{}
                 end
             else
                 vargtype_elements = Any[]
-                for p in linfo_argtypes[nargs:linfo_argtypes_length]
-                    p = isvarargtype(p) ? unconstrain_vararg_length(p) : p
-                    push!(vargtype_elements, rewrap(p, linfo.specTypes))
+                for i in nargs:linfo_argtypes_length
+                    p = linfo_argtypes[i]
+                    p = unwraptv(isvarargtype(p) ? unconstrain_vararg_length(p) : p)
+                    push!(vargtype_elements, elim_free_typevars(rewrap_unionall(p, specTypes)))
                 end
                 for i in 1:length(vargtype_elements)
                     atyp = vargtype_elements[i]
@@ -108,16 +160,14 @@ function matching_cache_argtypes(linfo::MethodInstance, ::Nothing)
                 atyp = unwrapva(atyp)
                 tail_index -= 1
             end
-            while isa(atyp, TypeVar)
-                atyp = atyp.ub
-            end
+            atyp = unwraptv(atyp)
             if isa(atyp, DataType) && isdefined(atyp, :instance)
                 # replace singleton types with their equivalent Const object
                 atyp = Const(atyp.instance)
             elseif isconstType(atyp)
                 atyp = Const(atyp.parameters[1])
             else
-                atyp = rewrap(atyp, linfo.specTypes)
+                atyp = elim_free_typevars(rewrap_unionall(atyp, specTypes))
             end
             i == n && (lastatype = atyp)
             cache_argtypes[i] = atyp
@@ -128,6 +178,25 @@ function matching_cache_argtypes(linfo::MethodInstance, ::Nothing)
     else
         @assert nargs == 0 "invalid specialization of method" # wrong number of arguments
     end
+    cache_argtypes
+end
+
+# eliminate free `TypeVar`s in order to make the life much easier down the road:
+# at runtime only `Type{...}::DataType` can contain invalid type parameters, and other
+# malformed types here are user-constructed type arguments given at an inference entry
+# so this function will replace only the malformed `Type{...}::DataType` with `Type`
+# and simply replace other possibilities with `Any`
+function elim_free_typevars(@nospecialize t)
+    if has_free_typevars(t)
+        return isType(t) ? Type : Any
+    else
+        return t
+    end
+end
+
+function matching_cache_argtypes(linfo::MethodInstance, ::Nothing)
+    mthd = isa(linfo.def, Method) ? linfo.def::Method : nothing
+    cache_argtypes = most_general_argtypes(mthd, linfo.specTypes)
     return cache_argtypes, falses(length(cache_argtypes))
 end
 
diff --git a/base/compiler/inferencestate.jl b/base/compiler/inferencestate.jl
index 56b766592787ea..24423deef86239 100644
--- a/base/compiler/inferencestate.jl
+++ b/base/compiler/inferencestate.jl
@@ -1,135 +1,344 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-const LineNum = Int
+# The type of a variable load is either a value or an UndefVarError
+# (only used in abstractinterpret, doesn't appear in optimize)
+struct VarState
+    typ
+    undef::Bool
+    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
+end
+
+"""
+    const VarTable = Vector{VarState}
+
+The extended lattice that maps local variables to inferred type represented as `AbstractLattice`.
+Each index corresponds to the `id` of `SlotNumber` which identifies each local variable.
+Note that `InferenceState` will maintain multiple `VarTable`s at each SSA statement
+to enable flow-sensitive analysis.
+"""
+const VarTable = Vector{VarState}
+
+mutable struct BitSetBoundedMinPrioritySet <: AbstractSet{Int}
+    elems::BitSet
+    min::Int
+    # Stores whether min is exact or a lower bound
+    # If exact, it is not set in elems
+    min_exact::Bool
+    max::Int
+end
+
+function BitSetBoundedMinPrioritySet(max::Int)
+    bs = BitSet()
+    bs.offset = 0
+    BitSetBoundedMinPrioritySet(bs, max+1, true, max)
+end
+
+@noinline function _advance_bsbmp!(bsbmp::BitSetBoundedMinPrioritySet)
+    @assert !bsbmp.min_exact
+    bsbmp.min = _bits_findnext(bsbmp.elems.bits, bsbmp.min)::Int
+    bsbmp.min < 0 && (bsbmp.min = bsbmp.max + 1)
+    bsbmp.min_exact = true
+    delete!(bsbmp.elems, bsbmp.min)
+    return nothing
+end
+
+function isempty(bsbmp::BitSetBoundedMinPrioritySet)
+    if bsbmp.min > bsbmp.max
+        return true
+    end
+    bsbmp.min_exact && return false
+    _advance_bsbmp!(bsbmp)
+    return bsbmp.min > bsbmp.max
+end
+
+function popfirst!(bsbmp::BitSetBoundedMinPrioritySet)
+    bsbmp.min_exact || _advance_bsbmp!(bsbmp)
+    m = bsbmp.min
+    m > bsbmp.max && throw(ArgumentError("BitSetBoundedMinPrioritySet must be non-empty"))
+    bsbmp.min = m+1
+    bsbmp.min_exact = false
+    return m
+end
+
+function push!(bsbmp::BitSetBoundedMinPrioritySet, idx::Int)
+    if idx <= bsbmp.min
+        if bsbmp.min_exact && bsbmp.min < bsbmp.max && idx != bsbmp.min
+            push!(bsbmp.elems, bsbmp.min)
+        end
+        bsbmp.min = idx
+        bsbmp.min_exact = true
+        return nothing
+    end
+    push!(bsbmp.elems, idx)
+    return nothing
+end
+
+function in(idx::Int, bsbmp::BitSetBoundedMinPrioritySet)
+    if bsbmp.min_exact && idx == bsbmp.min
+        return true
+    end
+    return idx in bsbmp.elems
+end
 
 mutable struct InferenceState
-    params::InferenceParams
-    result::InferenceResult # remember where to put the result
+    #= information about this method instance =#
     linfo::MethodInstance
-    sptypes::Vector{Any}    # types of static parameter
-    slottypes::Vector{Any}
+    world::UInt
     mod::Module
-    currpc::LineNum
-
-    # info on the state of inference and the linfo
+    sptypes::Vector{Any}
+    slottypes::Vector{Any}
     src::CodeInfo
-    world::UInt
-    valid_worlds::WorldRange
-    nargs::Int
-    stmt_types::Vector{Any}
-    stmt_edges::Vector{Any}
+
+    #= intermediate states for local abstract interpretation =#
+    currpc::Int
+    ip::BitSetBoundedMinPrioritySet # current active instruction pointers
+    handler_at::Vector{Int} # current exception handler info
+    ssavalue_uses::Vector{BitSet} # ssavalue sparsity and restart info
+    stmt_types::Vector{Union{Nothing, VarTable}}
+    stmt_edges::Vector{Union{Nothing, Vector{Any}}}
     stmt_info::Vector{Any}
-    # return type
-    bestguess #::Type
-    # current active instruction pointers
-    ip::BitSet
-    pc´´::LineNum
-    nstmts::Int
-    # current exception handler info
-    cur_hand #::Union{Nothing, Pair{LineNum, prev_handler}}
-    handler_at::Vector{Any}
-    n_handlers::Int
-    # ssavalue sparsity and restart info
-    ssavalue_uses::Vector{BitSet}
-    throw_blocks::BitSet
-
-    cycle_backedges::Vector{Tuple{InferenceState, LineNum}} # call-graph backedges connecting from callee to caller
+
+    #= interprocedural intermediate states for abstract interpretation =#
+    pclimitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on currpc ssavalue
+    limitations::IdSet{InferenceState} # causes of precision restrictions (LimitedAccuracy) on return
+    cycle_backedges::Vector{Tuple{InferenceState, Int}} # call-graph backedges connecting from callee to caller
     callers_in_cycle::Vector{InferenceState}
+    dont_work_on_me::Bool
     parent::Union{Nothing, InferenceState}
+    inferred::Bool # TODO move this to InferenceResult?
 
-    # TODO: move these to InferenceResult / Params?
-    cached::Bool
-    limited::Bool
-    inferred::Bool
-    dont_work_on_me::Bool
+    #= results =#
+    result::InferenceResult # remember where to put the result
+    valid_worlds::WorldRange
+    bestguess #::Type
+    ipo_effects::Effects
 
-    # The place to look up methods while working on this function.
-    # In particular, we cache method lookup results for the same function to
-    # fast path repeated queries.
-    method_table::CachedMethodTable{InternalMethodTable}
+    #= flags =#
+    params::InferenceParams
+    # Whether to restrict inference of abstract call sites to avoid excessive work
+    # Set by default for toplevel frame.
+    restrict_abstract_call_sites::Bool
+    cached::Bool # TODO move this to InferenceResult?
 
     # The interpreter that created this inference state. Not looked at by
     # NativeInterpreter. But other interpreters may use this to detect cycles
     interp::AbstractInterpreter
 
     # src is assumed to be a newly-allocated CodeInfo, that can be modified in-place to contain intermediate results
-    function InferenceState(result::InferenceResult, src::CodeInfo,
-                            cached::Bool, interp::AbstractInterpreter)
+    function InferenceState(result::InferenceResult,
+        src::CodeInfo, cache::Symbol, interp::AbstractInterpreter)
         linfo = result.linfo
-        code = src.code::Array{Any,1}
-        toplevel = !isa(linfo.def, Method)
-
-        sp = sptypes_from_meth_instance(linfo::MethodInstance)
+        world = get_world_counter(interp)
+        def = linfo.def
+        mod = isa(def, Method) ? def.module : def
+        sptypes = sptypes_from_meth_instance(linfo)
 
+        code = src.code::Vector{Any}
+        nstmts = length(code)
+        currpc = 1
+        ip = BitSetBoundedMinPrioritySet(nstmts)
+        handler_at = compute_trycatch(code, ip.elems)
+        push!(ip, 1)
         nssavalues = src.ssavaluetypes::Int
-        src.ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ]
-        stmt_info = Any[ nothing for i = 1:length(code) ]
-
-        n = length(code)
-        s_edges = Any[ nothing for i = 1:n ]
-        s_types = Any[ nothing for i = 1:n ]
+        ssavalue_uses = find_ssavalue_uses(code, nssavalues)
+        stmt_types = Union{Nothing, VarTable}[ nothing for i = 1:nstmts ]
+        stmt_edges = Union{Nothing, Vector{Any}}[ nothing for i = 1:nstmts ]
+        stmt_info = Any[ nothing for i = 1:nstmts ]
 
-        # initial types
         nslots = length(src.slotflags)
+        slottypes = Vector{Any}(undef, nslots)
         argtypes = result.argtypes
         nargs = length(argtypes)
-        s_argtypes = VarTable(undef, nslots)
-        slottypes = Vector{Any}(undef, nslots)
+        stmt_types[1] = stmt_type1 = VarTable(undef, nslots)
         for i in 1:nslots
-            at = (i > nargs) ? Bottom : argtypes[i]
-            s_argtypes[i] = VarState(at, i > nargs)
-            slottypes[i] = at
+            argtyp = (i > nargs) ? Bottom : argtypes[i]
+            stmt_type1[i] = VarState(argtyp, i > nargs)
+            slottypes[i] = argtyp
         end
-        s_types[1] = s_argtypes
-
-        ssavalue_uses = find_ssavalue_uses(code, nssavalues)
-        throw_blocks = find_throw_blocks(code)
 
-        # exception handlers
-        cur_hand = nothing
-        handler_at = Any[ nothing for i=1:n ]
-        n_handlers = 0
+        pclimitations = IdSet{InferenceState}()
+        limitations = IdSet{InferenceState}()
+        cycle_backedges = Vector{Tuple{InferenceState,Int}}()
+        callers_in_cycle = Vector{InferenceState}()
+        dont_work_on_me = false
+        parent = nothing
+        inferred = false
 
-        W = BitSet()
-        push!(W, 1) #initial pc to visit
+        valid_worlds = WorldRange(src.min_world, src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
+        bestguess = Bottom
+        # TODO: Currently, any :inbounds declaration taints consistency,
+        #       because we cannot be guaranteed whether or not boundschecks
+        #       will be eliminated and if they are, we cannot be guaranteed
+        #       that no undefined behavior will occur (the effects assumptions
+        #       are stronger than the inbounds assumptions, since the latter
+        #       requires dynamic reachability, while the former is global).
+        inbounds = inbounds_option()
+        inbounds_taints_consistency = !(inbounds === :on || (inbounds === :default && !any_inbounds(code)))
+        consistent = inbounds_taints_consistency ? TRISTATE_UNKNOWN : ALWAYS_TRUE
+        ipo_effects = Effects(EFFECTS_TOTAL; consistent, inbounds_taints_consistency)
 
-        if !toplevel
-            meth = linfo.def
-            inmodule = meth.module
-        else
-            inmodule = linfo.def::Module
-        end
+        params = InferenceParams(interp)
+        restrict_abstract_call_sites = isa(linfo.def, Module)
+        @assert cache === :no || cache === :local || cache === :global
+        cached = cache === :global
 
-        valid_worlds = WorldRange(src.min_world,
-            src.max_world == typemax(UInt) ? get_world_counter() : src.max_world)
         frame = new(
-            InferenceParams(interp), result, linfo,
-            sp, slottypes, inmodule, 0,
-            src, get_world_counter(interp), valid_worlds,
-            nargs, s_types, s_edges, stmt_info,
-            Union{}, W, 1, n,
-            cur_hand, handler_at, n_handlers,
-            ssavalue_uses, throw_blocks,
-            Vector{Tuple{InferenceState,LineNum}}(), # cycle_backedges
-            Vector{InferenceState}(), # callers_in_cycle
-            #=parent=#nothing,
-            cached, false, false, false,
-            CachedMethodTable(method_table(interp)),
+            linfo, world, mod, sptypes, slottypes, src,
+            currpc, ip, handler_at, ssavalue_uses, stmt_types, stmt_edges, stmt_info,
+            pclimitations, limitations, cycle_backedges, callers_in_cycle, dont_work_on_me, parent, inferred,
+            result, valid_worlds, bestguess, ipo_effects,
+            params, restrict_abstract_call_sites, cached,
             interp)
+
+        # some more setups
+        src.ssavaluetypes = Any[ NOT_FOUND for i = 1:nssavalues ]
+        params.unoptimize_throw_blocks && mark_throw_blocks!(src, handler_at)
         result.result = frame
-        cached && push!(get_inference_cache(interp), result)
+        cache !== :no && push!(get_inference_cache(interp), result)
+
         return frame
     end
 end
 
-method_table(interp::AbstractInterpreter, sv::InferenceState) = sv.method_table
+Effects(state::InferenceState) = state.ipo_effects
+
+function tristate_merge!(caller::InferenceState, effects::Effects)
+    caller.ipo_effects = tristate_merge(caller.ipo_effects, effects)
+end
+tristate_merge!(caller::InferenceState, callee::InferenceState) =
+    tristate_merge!(caller, Effects(callee))
+
+is_effect_overridden(sv::InferenceState, effect::Symbol) = is_effect_overridden(sv.linfo, effect)
+function is_effect_overridden(linfo::MethodInstance, effect::Symbol)
+    def = linfo.def
+    return isa(def, Method) && is_effect_overridden(def, effect)
+end
+is_effect_overridden(method::Method, effect::Symbol) = is_effect_overridden(decode_effects_override(method.purity), effect)
+is_effect_overridden(override::EffectsOverride, effect::Symbol) = getfield(override, effect)
+
+function any_inbounds(code::Vector{Any})
+    for i=1:length(code)
+        stmt = code[i]
+        if isa(stmt, Expr) && stmt.head === :inbounds
+            return true
+        end
+    end
+    return false
+end
+
+function compute_trycatch(code::Vector{Any}, ip::BitSet)
+    # The goal initially is to record the frame like this for the state at exit:
+    # 1: (enter 3) # == 0
+    # 3: (expr)    # == 1
+    # 3: (leave 1) # == 1
+    # 4: (expr)    # == 0
+    # then we can find all trys by walking backwards from :enter statements,
+    # and all catches by looking at the statement after the :enter
+    n = length(code)
+    empty!(ip)
+    ip.offset = 0 # for _bits_findnext
+    push!(ip, n + 1)
+    handler_at = fill(0, n)
+
+    # start from all :enter statements and record the location of the try
+    for pc = 1:n
+        stmt = code[pc]
+        if isexpr(stmt, :enter)
+            l = stmt.args[1]::Int
+            handler_at[pc + 1] = pc
+            push!(ip, pc + 1)
+            handler_at[l] = pc
+            push!(ip, l)
+        end
+    end
+
+    # now forward those marks to all :leave statements
+    pc´´ = 0
+    while true
+        # make progress on the active ip set
+        pc = _bits_findnext(ip.bits, pc´´)::Int
+        pc > n && break
+        while true # inner loop optimizes the common case where it can run straight from pc to pc + 1
+            pc´ = pc + 1 # next program-counter (after executing instruction)
+            if pc == pc´´
+                pc´´ = pc´
+            end
+            delete!(ip, pc)
+            cur_hand = handler_at[pc]
+            @assert cur_hand != 0 "unbalanced try/catch"
+            stmt = code[pc]
+            if isa(stmt, GotoNode)
+                pc´ = stmt.label
+            elseif isa(stmt, GotoIfNot)
+                l = stmt.dest::Int
+                if handler_at[l] != cur_hand
+                    @assert handler_at[l] == 0 "unbalanced try/catch"
+                    handler_at[l] = cur_hand
+                    if l < pc´´
+                        pc´´ = l
+                    end
+                    push!(ip, l)
+                end
+            elseif isa(stmt, ReturnNode)
+                @assert !isdefined(stmt, :val) "unbalanced try/catch"
+                break
+            elseif isa(stmt, Expr)
+                head = stmt.head
+                if head === :enter
+                    cur_hand = pc
+                elseif head === :leave
+                    l = stmt.args[1]::Int
+                    for i = 1:l
+                        cur_hand = handler_at[cur_hand]
+                    end
+                    cur_hand == 0 && break
+                end
+            end
 
-function InferenceState(result::InferenceResult, cached::Bool, interp::AbstractInterpreter)
+            pc´ > n && break # can't proceed with the fast-path fall-through
+            if handler_at[pc´] != cur_hand
+                @assert handler_at[pc´] == 0 "unbalanced try/catch"
+                handler_at[pc´] = cur_hand
+            elseif !in(pc´, ip)
+                break  # already visited
+            end
+            pc = pc´
+        end
+    end
+
+    @assert first(ip) == n + 1
+    return handler_at
+end
+
+"""
+    Iterate through all callers of the given InferenceState in the abstract
+    interpretation stack (including the given InferenceState itself), vising
+    children before their parents (i.e. ascending the tree from the given
+    InferenceState). Note that cycles may be visited in any order.
+"""
+struct InfStackUnwind
+    inf::InferenceState
+end
+iterate(unw::InfStackUnwind) = (unw.inf, (unw.inf, 0))
+function iterate(unw::InfStackUnwind, (infstate, cyclei)::Tuple{InferenceState, Int})
+    # iterate through the cycle before walking to the parent
+    if cyclei < length(infstate.callers_in_cycle)
+        cyclei += 1
+        infstate = infstate.callers_in_cycle[cyclei]
+    else
+        cyclei = 0
+        infstate = infstate.parent
+    end
+    infstate === nothing && return nothing
+    (infstate::InferenceState, (infstate, cyclei))
+end
+
+function InferenceState(result::InferenceResult, cache::Symbol, interp::AbstractInterpreter)
     # prepare an InferenceState object for inferring lambda
     src = retrieve_code_info(result.linfo)
     src === nothing && return nothing
     validate_code_in_debug_mode(result.linfo, src, "lowered")
-    return InferenceState(result, src, cached, interp)
+    return InferenceState(result, src, cache, interp)
 end
 
 function sptypes_from_meth_instance(linfo::MethodInstance)
@@ -160,7 +369,7 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
             while temp isa UnionAll
                 temp = temp.body
             end
-            sigtypes = temp.parameters
+            sigtypes = (temp::DataType).parameters
             for j = 1:length(sigtypes)
                 tj = sigtypes[j]
                 if isType(tj) && tj.parameters[1] === Pi
@@ -192,6 +401,8 @@ function sptypes_from_meth_instance(linfo::MethodInstance)
                     ty = UnionAll(tv, Type{tv})
                 end
             end
+        elseif isvarargtype(v)
+            ty = Int
         else
             ty = Const(v)
         end
@@ -212,19 +423,17 @@ end
 update_valid_age!(edge::InferenceState, sv::InferenceState) = update_valid_age!(sv, edge.valid_worlds)
 
 function record_ssa_assign(ssa_id::Int, @nospecialize(new), frame::InferenceState)
-    old = frame.src.ssavaluetypes[ssa_id]
+    ssavaluetypes = frame.src.ssavaluetypes::Vector{Any}
+    old = ssavaluetypes[ssa_id]
     if old === NOT_FOUND || !(new ⊑ old)
         # typically, we expect that old ⊑ new (that output information only
         # gets less precise with worse input information), but to actually
         # guarantee convergence we need to use tmerge here to ensure that is true
-        frame.src.ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(old, new)
+        ssavaluetypes[ssa_id] = old === NOT_FOUND ? new : tmerge(old, new)
         W = frame.ip
         s = frame.stmt_types
         for r in frame.ssavalue_uses[ssa_id]
             if s[r] !== nothing # s[r] === nothing => unreached statement
-                if r < frame.pc´´
-                    frame.pc´´ = r
-                end
                 push!(W, r)
             end
         end
@@ -243,61 +452,37 @@ end
 # temporarily accumulate our edges to later add as backedges in the callee
 function add_backedge!(li::MethodInstance, caller::InferenceState)
     isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs
-    if caller.stmt_edges[caller.currpc] === nothing
-        caller.stmt_edges[caller.currpc] = []
+    edges = caller.stmt_edges[caller.currpc]
+    if edges === nothing
+        edges = caller.stmt_edges[caller.currpc] = []
     end
-    push!(caller.stmt_edges[caller.currpc], li)
+    push!(edges, li)
     nothing
 end
 
 # used to temporarily accumulate our no method errors to later add as backedges in the callee method table
 function add_mt_backedge!(mt::Core.MethodTable, @nospecialize(typ), caller::InferenceState)
     isa(caller.linfo.def, Method) || return # don't add backedges to toplevel exprs
-    if caller.stmt_edges[caller.currpc] === nothing
-        caller.stmt_edges[caller.currpc] = []
+    edges = caller.stmt_edges[caller.currpc]
+    if edges === nothing
+        edges = caller.stmt_edges[caller.currpc] = []
     end
-    push!(caller.stmt_edges[caller.currpc], mt)
-    push!(caller.stmt_edges[caller.currpc], typ)
+    push!(edges, mt)
+    push!(edges, typ)
     nothing
 end
 
-function poison_callstack(infstate::InferenceState, topmost::InferenceState, poison_topmost::Bool)
-    poison_topmost && (topmost = topmost.parent)
-    while !(infstate === topmost)
-        if call_result_unused(infstate)
-            # If we won't propagate the result any further (since it's typically unused),
-            # it's OK that we keep and cache the "limited" result in the parents
-            # (non-typically, this means that we lose the ability to detect a guaranteed StackOverflow in some cases)
-            # TODO: we might be able to halt progress much more strongly here,
-            # since now we know we won't be able to keep anything much that we learned.
-            # We were mainly only here to compute the calling convention return type,
-            # but in most situations now, we are unlikely to be able to use that information.
-            break
-        end
-        infstate.limited = true
-        for infstate_cycle in infstate.callers_in_cycle
-            infstate_cycle.limited = true
-        end
-        infstate = infstate.parent
-        infstate === nothing && return
-    end
-end
-
-function is_specializable_vararg_slot(@nospecialize(arg), nargs::Int, vargs::Vector{Any})
-    return (isa(arg, Slot) && slot_id(arg) == nargs && !isempty(vargs))
-end
-
 function print_callstack(sv::InferenceState)
     while sv !== nothing
         print(sv.linfo)
-        sv.limited && print("  [limited]")
         !sv.cached && print("  [uncached]")
         println()
         for cycle in sv.callers_in_cycle
             print(' ', cycle.linfo)
-            cycle.limited && print("  [limited]")
             println()
         end
         sv = sv.parent
     end
 end
+
+get_curr_ssaflag(sv::InferenceState) = sv.src.ssaflags[sv.currpc]
diff --git a/base/compiler/methodtable.jl b/base/compiler/methodtable.jl
index 76c5ca4189dff2..7aa686009c1af5 100644
--- a/base/compiler/methodtable.jl
+++ b/base/compiler/methodtable.jl
@@ -1,5 +1,28 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 abstract type MethodTableView; end
 
+"""
+    struct InternalMethodTable <: MethodTableView
+
+A struct representing the state of the internal method table at a
+particular world age.
+"""
+struct InternalMethodTable <: MethodTableView
+    world::UInt
+end
+
+"""
+    struct OverlayMethodTable <: MethodTableView
+
+Overlays the internal method table such that specific queries can be redirected to an
+external table, e.g., to override existing method.
+"""
+struct OverlayMethodTable <: MethodTableView
+    world::UInt
+    mt::Core.MethodTable
+end
+
 struct MethodLookupResult
     # Really Vector{Core.MethodMatch}, but it's easier to represent this as
     # and work with Vector{Any} on the C side.
@@ -17,77 +40,95 @@ end
 getindex(result::MethodLookupResult, idx::Int) = getindex(result.matches, idx)::MethodMatch
 
 """
-    struct InternalMethodTable <: MethodTableView
+    findall(sig::Type, view::MethodTableView; limit::Int=typemax(Int)) ->
+        (matches::MethodLookupResult, overlayed::Bool) or missing
 
-A struct representing the state of the internal method table at a
-particular world age.
+Find all methods in the given method table `view` that are applicable to the given signature `sig`.
+If no applicable methods are found, an empty result is returned.
+If the number of applicable methods exceeded the specified limit, `missing` is returned.
+`overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-struct InternalMethodTable <: MethodTableView
-    world::UInt
+function findall(@nospecialize(sig::Type), table::InternalMethodTable; limit::Int=Int(typemax(Int32)))
+    result = _findall(sig, nothing, table.world, limit)
+    result === missing && return missing
+    return result, false
 end
 
-"""
-    struct CachedMethodTable <: MethodTableView
-
-Overlays another method table view with an additional local fast path cache that
-can respond to repeated, identical queries faster than the original method table.
-"""
-struct CachedMethodTable{T} <: MethodTableView
-    cache::IdDict{Any, Union{Missing, MethodLookupResult}}
-    table::T
+function findall(@nospecialize(sig::Type), table::OverlayMethodTable; limit::Int=Int(typemax(Int32)))
+    result = _findall(sig, table.mt, table.world, limit)
+    result === missing && return missing
+    nr = length(result)
+    if nr ≥ 1 && result[nr].fully_covers
+        # no need to fall back to the internal method table
+        return result, true
+    end
+    # fall back to the internal method table
+    fallback_result = _findall(sig, nothing, table.world, limit)
+    fallback_result === missing && return missing
+    # merge the fallback match results with the internal method table
+    return MethodLookupResult(
+        vcat(result.matches, fallback_result.matches),
+        WorldRange(
+            max(result.valid_worlds.min_world, fallback_result.valid_worlds.min_world),
+            min(result.valid_worlds.max_world, fallback_result.valid_worlds.max_world)),
+        result.ambig | fallback_result.ambig), !isempty(result)
 end
-CachedMethodTable(table::T) where T =
-    CachedMethodTable{T}(IdDict{Any, Union{Missing, MethodLookupResult}}(),
-        table)
-
-"""
-    findall(sig::Type{<:Tuple}, view::MethodTableView; limit=typemax(Int))
 
-Find all methods in the given method table `view` that are applicable to the
-given signature `sig`. If no applicable methods are found, an empty result is
-returned. If the number of applicable methods exeeded the specified limit,
-`missing` is returned.
-"""
-function findall(@nospecialize(sig::Type{<:Tuple}), table::InternalMethodTable; limit::Int=typemax(Int))
+function _findall(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable}, world::UInt, limit::Int)
     _min_val = RefValue{UInt}(typemin(UInt))
     _max_val = RefValue{UInt}(typemax(UInt))
     _ambig = RefValue{Int32}(0)
-    ms = _methods_by_ftype(sig, limit, table.world, false, _min_val, _max_val, _ambig)
+    ms = _methods_by_ftype(sig, mt, limit, world, false, _min_val, _max_val, _ambig)
     if ms === false
         return missing
     end
     return MethodLookupResult(ms::Vector{Any}, WorldRange(_min_val[], _max_val[]), _ambig[] != 0)
 end
 
-function findall(@nospecialize(sig::Type{<:Tuple}), table::CachedMethodTable; limit::Int=typemax(Int))
-    box = Core.Box(sig)
-    return get!(table.cache, sig) do
-        findall(box.contents, table.table; limit=limit)
-    end
-end
-
 """
-    findsup(sig::Type{<:Tuple}, view::MethodTableView)::Union{Tuple{MethodMatch, WorldRange}, Nothing}
-
-Find the (unique) method `m` such that `sig <: m.sig`, while being more
-specific than any other method with the same property. In other words, find
-the method which is the least upper bound (supremum) under the specificity/subtype
-relation of the queried `signature`. If `sig` is concrete, this is equivalent to
-asking for the method that will be called given arguments whose types match the
-given signature. This query is also used to implement `invoke`.
-
-Such a method `m` need not exist. It is possible that no method is an
-upper bound of `sig`, or it is possible that among the upper bounds, there
-is no least element. In both cases `nothing` is returned.
+    findsup(sig::Type, view::MethodTableView) ->
+        (match::MethodMatch, valid_worlds::WorldRange, overlayed::Bool) or nothing
+
+Find the (unique) method such that `sig <: match.method.sig`, while being more
+specific than any other method with the same property. In other words, find the method
+which is the least upper bound (supremum) under the specificity/subtype relation of
+the queried `sig`nature. If `sig` is concrete, this is equivalent to asking for the method
+that will be called given arguments whose types match the given signature.
+Note that this query is also used to implement `invoke`.
+
+Such a matching method `match` doesn't necessarily exist.
+It is possible that no method is an upper bound of `sig`, or
+it is possible that among the upper bounds, there is no least element.
+In both cases `nothing` is returned.
+
+`overlayed` indicates if any of the matching methods comes from an overlayed method table.
 """
-function findsup(@nospecialize(sig::Type{<:Tuple}), table::InternalMethodTable)
+function findsup(@nospecialize(sig::Type), table::InternalMethodTable)
+    return (_findsup(sig, nothing, table.world)..., false)
+end
+
+function findsup(@nospecialize(sig::Type), table::OverlayMethodTable)
+    match, valid_worlds = _findsup(sig, table.mt, table.world)
+    match !== nothing && return match, valid_worlds, true
+    # fall back to the internal method table
+    fallback_match, fallback_valid_worlds = _findsup(sig, nothing, table.world)
+    return (
+        fallback_match,
+        WorldRange(
+            max(valid_worlds.min_world, fallback_valid_worlds.min_world),
+            min(valid_worlds.max_world, fallback_valid_worlds.max_world)),
+        false)
+end
+
+function _findsup(@nospecialize(sig::Type), mt::Union{Nothing,Core.MethodTable}, world::UInt)
     min_valid = RefValue{UInt}(typemin(UInt))
     max_valid = RefValue{UInt}(typemax(UInt))
-    result = ccall(:jl_gf_invoke_lookup_worlds, Any, (Any, UInt, Ptr{Csize_t}, Ptr{Csize_t}),
-                   sig, table.world, min_valid, max_valid)::Union{Method, Nothing}
-    result === nothing && return nothing
-    (result, WorldRange(min_valid[], max_valid[]))
+    match = ccall(:jl_gf_invoke_lookup_worlds, Any, (Any, Any, UInt, Ptr{Csize_t}, Ptr{Csize_t}),
+                   sig, mt, world, min_valid, max_valid)::Union{MethodMatch, Nothing}
+    valid_worlds = WorldRange(min_valid[], max_valid[])
+    return match, valid_worlds
 end
 
-# This query is not cached
-findsup(sig::Type{<:Tuple}, table::CachedMethodTable) = findsup(sig, table.table)
+isoverlayed(::MethodTableView)     = error("unsatisfied MethodTableView interface")
+isoverlayed(::InternalMethodTable) = false
+isoverlayed(::OverlayMethodTable)  = true
diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index d53b8193e639ad..b00e24aec97348 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -1,5 +1,35 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#############
+# constants #
+#############
+
+# The slot has uses that are not statically dominated by any assignment
+# This is implied by `SLOT_USEDUNDEF`.
+# If this is not set, all the uses are (statically) dominated by the defs.
+# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
+const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
+const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
+const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
+# const SLOT_CALLED      = 64
+
+# NOTE make sure to sync the flag definitions below with julia.h and `jl_code_info_set_ir` in method.c
+
+const IR_FLAG_NULL        = 0x00
+# This statement is marked as @inbounds by user.
+# Ff replaced by inlining, any contained boundschecks may be removed.
+const IR_FLAG_INBOUNDS    = 0x01 << 0
+# This statement is marked as @inline by user
+const IR_FLAG_INLINE      = 0x01 << 1
+# This statement is marked as @noinline by user
+const IR_FLAG_NOINLINE    = 0x01 << 2
+const IR_FLAG_THROW_BLOCK = 0x01 << 3
+# This statement may be removed if its result is unused. In particular it must
+# thus be both pure and effect free.
+const IR_FLAG_EFFECT_FREE = 0x01 << 4
+
+const TOP_TUPLE = GlobalRef(Core, :tuple)
+
 #####################
 # OptimizationState #
 #####################
@@ -21,45 +51,60 @@ function push!(et::EdgeTracker, ci::CodeInstance)
     push!(et, ci.def)
 end
 
-struct InferenceCaches{T, S}
-    inf_cache::T
-    mi_cache::S
-end
-
-struct InliningState{S <: Union{EdgeTracker, Nothing}, T <: Union{InferenceCaches, Nothing}, V <: Union{Nothing, MethodTableView}}
+struct InliningState{S <: Union{EdgeTracker, Nothing}, MICache, I<:AbstractInterpreter}
     params::OptimizationParams
     et::S
-    caches::T
-    method_table::V
+    mi_cache::MICache # TODO move this to `OptimizationState` (as used by EscapeAnalysis as well)
+    interp::I
 end
 
+is_source_inferred(@nospecialize(src::Union{CodeInfo, Vector{UInt8}})) =
+    ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
+
+function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8,
+                         mi::MethodInstance, argtypes::Vector{Any})
+    if isa(src, CodeInfo) || isa(src, Vector{UInt8})
+        src_inferred = is_source_inferred(src)
+        src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
+        return src_inferred && src_inlineable ? src : nothing
+    elseif src === nothing && is_stmt_inline(stmt_flag)
+        # if this statement is forced to be inlined, make an additional effort to find the
+        # inferred source in the local cache
+        # we still won't find a source for recursive call because the "single-level" inlining
+        # seems to be more trouble and complex than it's worth
+        inf_result = cache_lookup(mi, argtypes, get_inference_cache(interp))
+        inf_result === nothing && return nothing
+        src = inf_result.src
+        if isa(src, CodeInfo)
+            src_inferred = is_source_inferred(src)
+            return src_inferred ? src : nothing
+        else
+            return nothing
+        end
+    end
+    return nothing
+end
+
+include("compiler/ssair/driver.jl")
+
 mutable struct OptimizationState
     linfo::MethodInstance
     src::CodeInfo
+    ir::Union{Nothing, IRCode}
     stmt_info::Vector{Any}
     mod::Module
-    nargs::Int
     sptypes::Vector{Any} # static parameters
     slottypes::Vector{Any}
-    const_api::Bool
     inlining::InliningState
     function OptimizationState(frame::InferenceState, params::OptimizationParams, interp::AbstractInterpreter)
-        s_edges = frame.stmt_edges[1]
-        if s_edges === nothing
-            s_edges = []
-            frame.stmt_edges[1] = s_edges
-        end
-        src = frame.src
+        s_edges = frame.stmt_edges[1]::Vector{Any}
         inlining = InliningState(params,
-            EdgeTracker(s_edges::Vector{Any}, frame.valid_worlds),
-            InferenceCaches(
-                get_inference_cache(interp),
-                WorldView(code_cache(interp), frame.world)),
-            method_table(interp))
+            EdgeTracker(s_edges, frame.valid_worlds),
+            WorldView(code_cache(interp), frame.world),
+            interp)
         return new(frame.linfo,
-                   src, frame.stmt_info, frame.mod, frame.nargs,
-                   frame.sptypes, frame.slottypes, false,
-                   inlining)
+                   frame.src, nothing, frame.stmt_info, frame.mod,
+                   frame.sptypes, frame.slottypes, inlining)
     end
     function OptimizationState(linfo::MethodInstance, src::CodeInfo, params::OptimizationParams, interp::AbstractInterpreter)
         # prepare src for running optimization passes
@@ -67,6 +112,8 @@ mutable struct OptimizationState
         nssavalues = src.ssavaluetypes
         if nssavalues isa Int
             src.ssavaluetypes = Any[ Any for i = 1:nssavalues ]
+        else
+            nssavalues = length(src.ssavaluetypes::Vector{Any})
         end
         nslots = length(src.slotflags)
         slottypes = src.slottypes
@@ -75,28 +122,18 @@ mutable struct OptimizationState
         end
         stmt_info = Any[nothing for i = 1:nssavalues]
         # cache some useful state computations
-        toplevel = !isa(linfo.def, Method)
-        if !toplevel
-            meth = linfo.def
-            inmodule = meth.module
-            nargs = meth.nargs
-        else
-            inmodule = linfo.def::Module
-            nargs = 0
-        end
+        def = linfo.def
+        mod = isa(def, Method) ? def.module : def
         # Allow using the global MI cache, but don't track edges.
         # This method is mostly used for unit testing the optimizer
         inlining = InliningState(params,
             nothing,
-            InferenceCaches(
-                get_inference_cache(interp),
-                WorldView(code_cache(interp), get_world_counter())),
-            method_table(interp))
+            WorldView(code_cache(interp), get_world_counter()),
+            interp)
         return new(linfo,
-                   src, stmt_info, inmodule, nargs,
-                   sptypes_from_meth_instance(linfo), slottypes, false,
-                   inlining)
-        end
+                   src, nothing, stmt_info, mod,
+                   sptypes_from_meth_instance(linfo), slottypes, inlining)
+    end
 end
 
 function OptimizationState(linfo::MethodInstance, params::OptimizationParams, interp::AbstractInterpreter)
@@ -105,33 +142,17 @@ function OptimizationState(linfo::MethodInstance, params::OptimizationParams, in
     return OptimizationState(linfo, src, params, interp)
 end
 
-
-#############
-# constants #
-#############
-
-# The slot has uses that are not statically dominated by any assignment
-# This is implied by `SLOT_USEDUNDEF`.
-# If this is not set, all the uses are (statically) dominated by the defs.
-# In particular, if a slot has `AssignedOnce && !StaticUndef`, it is an SSA.
-const SLOT_STATICUNDEF  = 1 # slot might be used before it is defined (structurally)
-const SLOT_ASSIGNEDONCE = 16 # slot is assigned to only once
-const SLOT_USEDUNDEF    = 32 # slot has uses that might raise UndefVarError
-# const SLOT_CALLED      = 64
-
-const IR_FLAG_INBOUNDS = 0x01
-
-# known to be always effect-free (in particular nothrow)
-const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
-
-# known to be effect-free if the are nothrow
-const _PURE_OR_ERROR_BUILTINS = [
-    fieldtype, apply_type, isa, UnionAll,
-    getfield, arrayref, const_arrayref, isdefined, Core.sizeof,
-    Core.kwfunc, ifelse, Core._typevar, (<:)
-]
-
-const TOP_TUPLE = GlobalRef(Core, :tuple)
+function ir_to_codeinf!(opt::OptimizationState)
+    (; linfo, src) = opt
+    optdef = linfo.def
+    replace_code_newstyle!(src, opt.ir::IRCode, isa(optdef, Method) ? Int(optdef.nargs) : 0)
+    opt.ir = nothing
+    widen_all_consts!(src)
+    src.inferred = true
+    # finish updating the result struct
+    validate_code_in_debug_mode(linfo, src, "optimized")
+    return src
+end
 
 #########
 # logic #
@@ -139,28 +160,9 @@ const TOP_TUPLE = GlobalRef(Core, :tuple)
 
 _topmod(sv::OptimizationState) = _topmod(sv.mod)
 
-function isinlineable(m::Method, me::OptimizationState, params::OptimizationParams, union_penalties::Bool, bonus::Int=0)
-    # compute the cost (size) of inlining this code
-    inlineable = false
-    cost_threshold = params.inline_cost_threshold
-    if m.module === _topmod(m.module)
-        # a few functions get special treatment
-        name = m.name
-        sig = m.sig
-        if ((name === :+ || name === :* || name === :min || name === :max) &&
-            isa(sig,DataType) &&
-            sig == Tuple{sig.parameters[1],Any,Any,Any,Vararg{Any}})
-            inlineable = true
-        elseif (name === :iterate || name === :unsafe_convert ||
-                name === :cconvert)
-            cost_threshold *= 4
-        end
-    end
-    if !inlineable
-        inlineable = inline_worthy(me.src.code, me.src, me.sptypes, me.slottypes, params, union_penalties, cost_threshold + bonus)
-    end
-    return inlineable
-end
+is_stmt_inline(stmt_flag::UInt8)      = stmt_flag & IR_FLAG_INLINE      ≠ 0
+is_stmt_noinline(stmt_flag::UInt8)    = stmt_flag & IR_FLAG_NOINLINE    ≠ 0
+is_stmt_throw_block(stmt_flag::UInt8) = stmt_flag & IR_FLAG_THROW_BLOCK ≠ 0
 
 # These affect control flow within the function (so may not be removed
 # if there is no usage within the function), but don't affect the purity
@@ -170,7 +172,7 @@ function stmt_affects_purity(@nospecialize(stmt), ir)
         return false
     end
     if isa(stmt, GotoIfNot)
-        t = argextype(stmt.cond, ir, ir.sptypes)
+        t = argextype(stmt.cond, ir)
         return !(t ⊑ Bool)
     end
     if isa(stmt, Expr)
@@ -179,32 +181,248 @@ function stmt_affects_purity(@nospecialize(stmt), ir)
     return true
 end
 
-# run the optimization work
-function optimize(opt::OptimizationState, params::OptimizationParams, @nospecialize(result))
-    def = opt.linfo.def
-    nargs = Int(opt.nargs) - 1
-    @timeit "optimizer" ir = run_passes(opt.src, nargs, opt)
-    force_noinline = _any(@nospecialize(x) -> isexpr(x, :meta) && x.args[1] === :noinline, ir.meta)
+"""
+    stmt_effect_free(stmt, rt, src::Union{IRCode,IncrementalCompact})
+
+Determine whether a `stmt` is "side-effect-free", i.e. may be removed if it has no uses.
+"""
+function stmt_effect_free(@nospecialize(stmt), @nospecialize(rt), src::Union{IRCode,IncrementalCompact})
+    isa(stmt, PiNode) && return true
+    isa(stmt, PhiNode) && return true
+    isa(stmt, ReturnNode) && return false
+    isa(stmt, GotoNode) && return false
+    isa(stmt, GotoIfNot) && return false
+    isa(stmt, Slot) && return false # Slots shouldn't occur in the IR at this point, but let's be defensive here
+    isa(stmt, GlobalRef) && return isdefined(stmt.mod, stmt.name)
+    if isa(stmt, Expr)
+        (; head, args) = stmt
+        if head === :static_parameter
+            etyp = (isa(src, IRCode) ? src.sptypes : src.ir.sptypes)[args[1]::Int]
+            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
+            return isa(etyp, Const)
+        end
+        if head === :call
+            f = argextype(args[1], src)
+            f = singleton_type(f)
+            f === nothing && return false
+            is_return_type(f) && return true
+            if isa(f, IntrinsicFunction)
+                intrinsic_effect_free_if_nothrow(f) || return false
+                return intrinsic_nothrow(f,
+                        Any[argextype(args[i], src) for i = 2:length(args)])
+            end
+            contains_is(_PURE_BUILTINS, f) && return true
+            # `get_binding_type` sets the type to Any if the binding doesn't exist yet
+            if f === Core.get_binding_type
+                length(args) == 3 || return false
+                M, s = argextype(args[2], src), argextype(args[3], src)
+                return get_binding_type_effect_free(M, s)
+            end
+            contains_is(_EFFECT_FREE_BUILTINS, f) || return false
+            rt === Bottom && return false
+            return _builtin_nothrow(f, Any[argextype(args[i], src) for i = 2:length(args)], rt)
+        elseif head === :new
+            typ = argextype(args[1], src)
+            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
+            typ, isexact = instanceof_tfunc(typ)
+            isexact || return false
+            isconcretedispatch(typ) || return false
+            typ = typ::DataType
+            fieldcount(typ) >= length(args) - 1 || return false
+            for fld_idx in 1:(length(args) - 1)
+                eT = argextype(args[fld_idx + 1], src)
+                fT = fieldtype(typ, fld_idx)
+                eT ⊑ fT || return false
+            end
+            return true
+        elseif head === :foreigncall
+            return foreigncall_effect_free(stmt, src)
+        elseif head === :new_opaque_closure
+            length(args) < 4 && return false
+            typ = argextype(args[1], src)
+            typ, isexact = instanceof_tfunc(typ)
+            isexact || return false
+            typ ⊑ Tuple || return false
+            rt_lb = argextype(args[2], src)
+            rt_ub = argextype(args[3], src)
+            src = argextype(args[4], src)
+            if !(rt_lb ⊑ Type && rt_ub ⊑ Type && src ⊑ Method)
+                return false
+            end
+            return true
+        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
+            return true
+        else
+            # e.g. :loopinfo
+            return false
+        end
+    end
+    return true
+end
+
+function foreigncall_effect_free(stmt::Expr, src::Union{IRCode,IncrementalCompact})
+    args = stmt.args
+    name = args[1]
+    isa(name, QuoteNode) && (name = name.value)
+    isa(name, Symbol) || return false
+    ndims = alloc_array_ndims(name)
+    if ndims !== nothing
+        if ndims == 0
+            return new_array_no_throw(args, src)
+        else
+            return alloc_array_no_throw(args, ndims, src)
+        end
+    end
+    return false
+end
+
+function alloc_array_ndims(name::Symbol)
+    if name === :jl_alloc_array_1d
+        return 1
+    elseif name === :jl_alloc_array_2d
+        return 2
+    elseif name === :jl_alloc_array_3d
+        return 3
+    elseif name === :jl_new_array
+        return 0
+    end
+    return nothing
+end
+
+const FOREIGNCALL_ARG_START = 6
+
+function alloc_array_no_throw(args::Vector{Any}, ndims::Int, src::Union{IRCode,IncrementalCompact})
+    length(args) ≥ ndims+FOREIGNCALL_ARG_START || return false
+    atype = instanceof_tfunc(argextype(args[FOREIGNCALL_ARG_START], src))[1]
+    dims = Csize_t[]
+    for i in 1:ndims
+        dim = argextype(args[i+FOREIGNCALL_ARG_START], src)
+        isa(dim, Const) || return false
+        dimval = dim.val
+        isa(dimval, Int) || return false
+        push!(dims, reinterpret(Csize_t, dimval))
+    end
+    return _new_array_no_throw(atype, ndims, dims)
+end
+
+function new_array_no_throw(args::Vector{Any}, src::Union{IRCode,IncrementalCompact})
+    length(args) ≥ FOREIGNCALL_ARG_START+1 || return false
+    atype = instanceof_tfunc(argextype(args[FOREIGNCALL_ARG_START], src))[1]
+    dims = argextype(args[FOREIGNCALL_ARG_START+1], src)
+    isa(dims, Const) || return dims === Tuple{}
+    dimsval = dims.val
+    isa(dimsval, Tuple{Vararg{Int}}) || return false
+    ndims = nfields(dimsval)
+    isa(ndims, Int) || return false
+    dims = Csize_t[reinterpret(Csize_t, dimval) for dimval in dimsval]
+    return _new_array_no_throw(atype, ndims, dims)
+end
+
+function _new_array_no_throw(@nospecialize(atype), ndims::Int, dims::Vector{Csize_t})
+    isa(atype, DataType) || return false
+    eltype = atype.parameters[1]
+    iskindtype(typeof(eltype)) || return false
+    elsz = aligned_sizeof(eltype)
+    return ccall(:jl_array_validate_dims, Cint,
+        (Ptr{Csize_t}, Ptr{Csize_t}, UInt32, Ptr{Csize_t}, Csize_t),
+        #=nel=#RefValue{Csize_t}(), #=tot=#RefValue{Csize_t}(), ndims, dims, elsz) == 0
+end
+
+"""
+    argextype(x, src::Union{IRCode,IncrementalCompact}) -> t
+    argextype(x, src::CodeInfo, sptypes::Vector{Any}) -> t
+
+Return the type of value `x` in the context of inferred source `src`.
+Note that `t` might be an extended lattice element.
+Use `widenconst(t)` to get the native Julia type of `x`.
+"""
+argextype(@nospecialize(x), ir::IRCode, sptypes::Vector{Any} = ir.sptypes) =
+    argextype(x, ir, sptypes, ir.argtypes)
+function argextype(@nospecialize(x), compact::IncrementalCompact, sptypes::Vector{Any} = compact.ir.sptypes)
+    isa(x, AnySSAValue) && return types(compact)[x]
+    return argextype(x, compact, sptypes, compact.ir.argtypes)
+end
+argextype(@nospecialize(x), src::CodeInfo, sptypes::Vector{Any}) = argextype(x, src, sptypes, src.slottypes::Vector{Any})
+function argextype(
+    @nospecialize(x), src::Union{IRCode,IncrementalCompact,CodeInfo},
+    sptypes::Vector{Any}, slottypes::Vector{Any})
+    if isa(x, Expr)
+        if x.head === :static_parameter
+            return sptypes[x.args[1]::Int]
+        elseif x.head === :boundscheck
+            return Bool
+        elseif x.head === :copyast
+            return argextype(x.args[1], src, sptypes, slottypes)
+        end
+        @assert false "argextype only works on argument-position values"
+    elseif isa(x, SlotNumber)
+        return slottypes[x.id]
+    elseif isa(x, TypedSlot)
+        return x.typ
+    elseif isa(x, SSAValue)
+        return abstract_eval_ssavalue(x, src)
+    elseif isa(x, Argument)
+        return slottypes[x.n]
+    elseif isa(x, QuoteNode)
+        return Const(x.value)
+    elseif isa(x, GlobalRef)
+        return abstract_eval_global(x.mod, x.name)
+    elseif isa(x, PhiNode)
+        return Any
+    elseif isa(x, PiNode)
+        return x.typ
+    else
+        return Const(x)
+    end
+end
+abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s]
+
+struct ConstAPI
+    val
+    ConstAPI(@nospecialize val) = new(val)
+end
+
+"""
+    finish(interp::AbstractInterpreter, opt::OptimizationState,
+           params::OptimizationParams, ir::IRCode, caller::InferenceResult) -> analyzed::Union{Nothing,ConstAPI}
+
+Post process information derived by Julia-level optimizations for later uses:
+- computes "purity", i.e. side-effect-freeness
+- computes inlining cost
+
+In a case when the purity is proven, `finish` can return `ConstAPI` object wrapping the constant
+value so that the runtime system will use the constant calling convention for the method calls.
+"""
+function finish(interp::AbstractInterpreter, opt::OptimizationState,
+                params::OptimizationParams, ir::IRCode, caller::InferenceResult)
+    (; src, linfo) = opt
+    (; def, specTypes) = linfo
+
+    analyzed = nothing # `ConstAPI` if this call can use constant calling convention
+    force_noinline = _any(x::Expr -> x.head === :meta && x.args[1] === :noinline, ir.meta)
 
     # compute inlining and other related optimizations
+    result = caller.result
+    @assert !(result isa LimitedAccuracy)
+    result = isa(result, InterConditional) ? widenconditional(result) : result
     if (isa(result, Const) || isconstType(result))
         proven_pure = false
-        # must be proven pure to use const_api; otherwise we might skip throwing errors
-        # (issue #20704)
+        # must be proven pure to use constant calling convention;
+        # otherwise we might skip throwing errors (issue #20704)
         # TODO: Improve this analysis; if a function is marked @pure we should really
         # only care about certain errors (e.g. method errors and type errors).
-        if length(ir.stmts) < 10
+        if length(ir.stmts) < 15
             proven_pure = true
             for i in 1:length(ir.stmts)
                 node = ir.stmts[i]
                 stmt = node[:inst]
-                if stmt_affects_purity(stmt, ir) && !stmt_effect_free(stmt, node[:type], ir, ir.sptypes)
+                if stmt_affects_purity(stmt, ir) && !stmt_effect_free(stmt, node[:type], ir)
                     proven_pure = false
                     break
                 end
             end
             if proven_pure
-                for fl in opt.src.slotflags
+                for fl in src.slotflags
                     if (fl & SLOT_USEDUNDEF) != 0
                         proven_pure = false
                         break
@@ -212,9 +430,6 @@ function optimize(opt::OptimizationState, params::OptimizationParams, @nospecial
                 end
             end
         end
-        if proven_pure
-            opt.src.pure = true
-        end
 
         if proven_pure
             # use constant calling convention
@@ -223,19 +438,26 @@ function optimize(opt::OptimizationState, params::OptimizationParams, @nospecial
             # to the `jl_call_method_internal` fast path
             # Still set pure flag to make sure `inference` tests pass
             # and to possibly enable more optimization in the future
-            if !(isa(result, Const) && !is_inlineable_constant(result.val))
-                opt.const_api = true
+            src.pure = true
+            if isa(result, Const)
+                val = result.val
+                if is_inlineable_constant(val)
+                    analyzed = ConstAPI(val)
+                end
+            else
+                @assert isconstType(result)
+                analyzed = ConstAPI(result.parameters[1])
             end
-            force_noinline || (opt.src.inlineable = true)
+            force_noinline || (src.inlineable = true)
         end
     end
 
-    replace_code_newstyle!(opt.src, ir, nargs)
+    opt.ir = ir
 
     # determine and cache inlineability
     union_penalties = false
     if !force_noinline
-        sig = unwrap_unionall(opt.linfo.specTypes)
+        sig = unwrap_unionall(specTypes)
         if isa(sig, DataType) && sig.name === Tuple.name
             for P in sig.parameters
                 P = unwrap_unionall(P)
@@ -247,44 +469,185 @@ function optimize(opt::OptimizationState, params::OptimizationParams, @nospecial
         else
             force_noinline = true
         end
-        if !opt.src.inlineable && result === Union{}
+        if !src.inlineable && result === Bottom
             force_noinline = true
         end
     end
     if force_noinline
-        opt.src.inlineable = false
+        src.inlineable = false
     elseif isa(def, Method)
-        if opt.src.inlineable && isdispatchtuple(opt.linfo.specTypes)
+        if src.inlineable && isdispatchtuple(specTypes)
             # obey @inline declaration if a dispatch barrier would not help
         else
-            bonus = 0
-            if result ⊑ Tuple && !isbitstype(widenconst(result))
-                bonus = params.inline_tupleret_bonus
+            # compute the cost (size) of inlining this code
+            cost_threshold = default = params.inline_cost_threshold
+            if result ⊑ Tuple && !isconcretetype(widenconst(result))
+                cost_threshold += params.inline_tupleret_bonus
             end
-            if opt.src.inlineable
-                # For functions declared @inline, increase the cost threshold 20x
-                bonus += params.inline_cost_threshold*19
+            # if the method is declared as `@inline`, increase the cost threshold 20x
+            if src.inlineable
+                cost_threshold += 19*default
+            end
+            # a few functions get special treatment
+            if def.module === _topmod(def.module)
+                name = def.name
+                if name === :iterate || name === :unsafe_convert || name === :cconvert
+                    cost_threshold += 4*default
+                end
             end
-            opt.src.inlineable = isinlineable(def, opt, params, union_penalties, bonus)
+            src.inlineable = inline_worthy(ir, params, union_penalties, cost_threshold)
+        end
+    end
+
+    return analyzed
+end
+
+# run the optimization work
+function optimize(interp::AbstractInterpreter, opt::OptimizationState,
+                  params::OptimizationParams, caller::InferenceResult)
+    @timeit "optimizer" ir = run_passes(opt.src, opt, caller)
+    return finish(interp, opt, params, ir, caller)
+end
+
+using .EscapeAnalysis
+import .EscapeAnalysis: EscapeState, ArgEscapeCache, is_ipo_profitable
+
+"""
+    cache_escapes!(caller::InferenceResult, estate::EscapeState)
+
+Transforms escape information of call arguments of `caller`,
+and then caches it into a global cache for later interprocedural propagation.
+"""
+cache_escapes!(caller::InferenceResult, estate::EscapeState) =
+    caller.argescapes = ArgEscapeCache(estate)
+
+function ipo_escape_cache(mi_cache::MICache) where MICache
+    return function (linfo::Union{InferenceResult,MethodInstance})
+        if isa(linfo, InferenceResult)
+            argescapes = linfo.argescapes
+        else
+            codeinst = get(mi_cache, linfo, nothing)
+            isa(codeinst, CodeInstance) || return nothing
+            argescapes = codeinst.argescapes
         end
+        return argescapes !== nothing ? argescapes::ArgEscapeCache : nothing
+    end
+end
+null_escape_cache(linfo::Union{InferenceResult,MethodInstance}) = nothing
+
+function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult)
+    @timeit "convert"   ir = convert_to_ircode(ci, sv)
+    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
+    # TODO: Domsorting can produce an updated domtree - no need to recompute here
+    @timeit "compact 1" ir = compact!(ir)
+    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
+    # @timeit "verify 2" verify_ir(ir)
+    @timeit "compact 2" ir = compact!(ir)
+    @timeit "SROA"      ir = sroa_pass!(ir)
+    @timeit "ADCE"      ir = adce_pass!(ir)
+    @timeit "type lift" ir = type_lift_pass!(ir)
+    @timeit "compact 3" ir = compact!(ir)
+    if JLOptions().debug_level == 2
+        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
     end
-    nothing
+    return ir
 end
 
+function convert_to_ircode(ci::CodeInfo, sv::OptimizationState)
+    linetable = ci.linetable
+    if !isa(linetable, Vector{LineInfoNode})
+        linetable = collect(LineInfoNode, linetable::Vector{Any})::Vector{LineInfoNode}
+    end
+
+    # check if coverage mode is enabled
+    coverage = coverage_enabled(sv.mod)
+    if !coverage && JLOptions().code_coverage == 3 # path-specific coverage mode
+        for line in linetable
+            if is_file_tracked(line.file)
+                # if any line falls in a tracked file enable coverage for all
+                coverage = true
+                break
+            end
+        end
+    end
 
-# whether `f` is pure for inference
-function is_pure_intrinsic_infer(f::IntrinsicFunction)
-    return !(f === Intrinsics.pointerref || # this one is volatile
-             f === Intrinsics.pointerset || # this one is never effect-free
-             f === Intrinsics.llvmcall ||   # this one is never effect-free
-             f === Intrinsics.arraylen ||   # this one is volatile
-             f === Intrinsics.sqrt_llvm ||  # this one may differ at runtime (by a few ulps)
-             f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
-             f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
+    # Go through and add an unreachable node after every
+    # Union{} call. Then reindex labels.
+    code = copy_exprargs(ci.code)
+    stmtinfo = sv.stmt_info
+    codelocs = ci.codelocs
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    ssaflags = ci.ssaflags
+    meta = Expr[]
+    idx = 1
+    oldidx = 1
+    ssachangemap = fill(0, length(code))
+    labelchangemap = coverage ? fill(0, length(code)) : ssachangemap
+    prevloc = zero(eltype(ci.codelocs))
+    while idx <= length(code)
+        codeloc = codelocs[idx]
+        if coverage && codeloc != prevloc && codeloc != 0
+            # insert a side-effect instruction before the current instruction in the same basic block
+            insert!(code, idx, Expr(:code_coverage_effect))
+            insert!(codelocs, idx, codeloc)
+            insert!(ssavaluetypes, idx, Nothing)
+            insert!(stmtinfo, idx, nothing)
+            insert!(ssaflags, idx, IR_FLAG_NULL)
+            ssachangemap[oldidx] += 1
+            if oldidx < length(labelchangemap)
+                labelchangemap[oldidx + 1] += 1
+            end
+            idx += 1
+            prevloc = codeloc
+        end
+        if code[idx] isa Expr && ssavaluetypes[idx] === Union{}
+            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
+                # insert unreachable in the same basic block after the current instruction (splitting it)
+                insert!(code, idx + 1, ReturnNode())
+                insert!(codelocs, idx + 1, codelocs[idx])
+                insert!(ssavaluetypes, idx + 1, Union{})
+                insert!(stmtinfo, idx + 1, nothing)
+                insert!(ssaflags, idx + 1, ssaflags[idx])
+                if oldidx < length(ssachangemap)
+                    ssachangemap[oldidx + 1] += 1
+                    coverage && (labelchangemap[oldidx + 1] += 1)
+                end
+                idx += 1
+            end
+        end
+        idx += 1
+        oldidx += 1
+    end
+
+    renumber_ir_elements!(code, ssachangemap, labelchangemap)
+
+    for i = 1:length(code)
+        code[i] = process_meta!(meta, code[i])
+    end
+    strip_trailing_junk!(ci, code, stmtinfo)
+    types = Any[]
+    stmts = InstructionStream(code, types, stmtinfo, codelocs, ssaflags)
+    cfg = compute_basic_blocks(code)
+    return IRCode(stmts, cfg, linetable, sv.slottypes, meta, sv.sptypes)
 end
 
-# whether `f` is effect free if nothrow
-intrinsic_effect_free_if_nothrow(f) = f === Intrinsics.pointerref || is_pure_intrinsic_infer(f)
+function process_meta!(meta::Vector{Expr}, @nospecialize stmt)
+    if isexpr(stmt, :meta) && length(stmt.args) ≥ 1
+        push!(meta, stmt)
+        return nothing
+    end
+    return stmt
+end
+
+function slot2reg(ir::IRCode, ci::CodeInfo, sv::OptimizationState)
+    # need `ci` for the slot metadata, IR for the code
+    svdef = sv.linfo.def
+    nargs = isa(svdef, Method) ? Int(svdef.nargs) : 0
+    @timeit "domtree 1" domtree = construct_domtree(ir.cfg.blocks)
+    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
+    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, sv.slottypes) # consumes `ir`
+    return ir
+end
 
 ## Computing the cost of a function body
 
@@ -294,21 +657,20 @@ plus_saturate(x::Int, y::Int) = max(x, y, x+y)
 # known return type
 isknowntype(@nospecialize T) = (T === Union{}) || isa(T, Const) || isconcretetype(widenconst(T))
 
-function statement_cost(ex::Expr, line::Int, src::CodeInfo, sptypes::Vector{Any},
-                        slottypes::Vector{Any}, union_penalties::Bool,
-                        params::OptimizationParams, error_path::Bool = false)
+function statement_cost(ex::Expr, line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
+                        union_penalties::Bool, params::OptimizationParams, error_path::Bool = false)
     head = ex.head
     if is_meta_expr_head(head)
         return 0
     elseif head === :call
         farg = ex.args[1]
-        ftyp = argextype(farg, src, sptypes, slottypes)
+        ftyp = argextype(farg, src, sptypes)
         if ftyp === IntrinsicFunction && farg isa SSAValue
             # if this comes from code that was already inlined into another function,
             # Consts have been widened. try to recover in simple cases.
-            farg = src.code[farg.id]
+            farg = isa(src, CodeInfo) ? src.code[farg.id] : src.stmts[farg.id][:inst]
             if isa(farg, GlobalRef) || isa(farg, QuoteNode) || isa(farg, IntrinsicFunction) || isexpr(farg, :static_parameter)
-                ftyp = argextype(farg, src, sptypes, slottypes)
+                ftyp = argextype(farg, src, sptypes)
             end
         end
         f = singleton_type(ftyp)
@@ -324,42 +686,44 @@ function statement_cost(ex::Expr, line::Int, src::CodeInfo, sptypes::Vector{Any}
             # The efficiency of operations like a[i] and s.b
             # depend strongly on whether the result can be
             # inferred, so check the type of ex
-            if f === Main.Core.getfield || f === Main.Core.tuple
+            if f === Core.getfield || f === Core.tuple || f === Core.getglobal
                 # we might like to penalize non-inferrability, but
                 # tuple iteration/destructuring makes that impossible
                 # return plus_saturate(argcost, isknowntype(extyp) ? 1 : params.inline_nonleaf_penalty)
                 return 0
-            elseif f === Main.Core.isa
+            elseif (f === Core.arrayref || f === Core.const_arrayref || f === Core.arrayset) && length(ex.args) >= 3
+                atyp = argextype(ex.args[3], src, sptypes)
+                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
+            elseif f === typeassert && isconstType(widenconst(argextype(ex.args[3], src, sptypes)))
+                return 1
+            elseif f === Core.isa
                 # If we're in a union context, we penalize type computations
                 # on union types. In such cases, it is usually better to perform
                 # union splitting on the outside.
-                if union_penalties && isa(argextype(ex.args[2],  src, sptypes, slottypes), Union)
+                if union_penalties && isa(argextype(ex.args[2],  src, sptypes), Union)
                     return params.inline_nonleaf_penalty
                 end
-            elseif (f === Main.Core.arrayref || f === Main.Core.const_arrayref) && length(ex.args) >= 3
-                atyp = argextype(ex.args[3], src, sptypes, slottypes)
-                return isknowntype(atyp) ? 4 : error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
             end
             fidx = find_tfunc(f)
             if fidx === nothing
-                # unknown/unhandled builtin or anonymous function
+                # unknown/unhandled builtin
                 # Use the generic cost of a direct function call
                 return 20
             end
             return T_FFUNC_COST[fidx]
         end
-        extyp = line == -1 ? Any : src.ssavaluetypes[line]
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
         if extyp === Union{}
             return 0
         end
         return error_path ? params.inline_error_path_cost : params.inline_nonleaf_penalty
-    elseif head === :foreigncall || head === :invoke
+    elseif head === :foreigncall || head === :invoke || head == :invoke_modify
         # Calls whose "return type" is Union{} do not actually return:
         # they are errors. Since these are not part of the typical
         # run-time of the function, we omit them from
         # consideration. This way, non-inlined error branches do not
         # prevent inlining.
-        extyp = line == -1 ? Any : src.ssavaluetypes[line]
+        extyp = line == -1 ? Any : argextype(SSAValue(line), src, sptypes)
         return extyp === Union{} ? 0 : 20
     elseif head === :(=)
         if ex.args[1] isa GlobalRef
@@ -369,7 +733,7 @@ function statement_cost(ex::Expr, line::Int, src::CodeInfo, sptypes::Vector{Any}
         end
         a = ex.args[2]
         if a isa Expr
-            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, slottypes, params, error_path))
+            cost = plus_saturate(cost, statement_cost(a, -1, src, sptypes, union_penalties, params, error_path))
         end
         return cost
     elseif head === :copyast
@@ -384,43 +748,42 @@ function statement_cost(ex::Expr, line::Int, src::CodeInfo, sptypes::Vector{Any}
     return 0
 end
 
-function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::CodeInfo, sptypes::Vector{Any},
-                                  slottypes::Vector{Any}, union_penalties::Bool, params::OptimizationParams,
-                                  throw_blocks::Union{Nothing,BitSet})
+function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any},
+                                  union_penalties::Bool, params::OptimizationParams)
     thiscost = 0
+    dst(tgt) = isa(src, IRCode) ? first(src.cfg.blocks[tgt].stmts) : tgt
     if stmt isa Expr
-        thiscost = statement_cost(stmt, line, src, sptypes, slottypes, union_penalties, params,
-                                  params.unoptimize_throw_blocks && line in throw_blocks)::Int
+        thiscost = statement_cost(stmt, line, src, sptypes, union_penalties, params,
+                                  is_stmt_throw_block(isa(src, IRCode) ? src.stmts.flag[line] : src.ssaflags[line]))::Int
     elseif stmt isa GotoNode
         # loops are generally always expensive
         # but assume that forward jumps are already counted for from
         # summing the cost of the not-taken branch
-        thiscost = stmt.label < line ? 40 : 0
+        thiscost = dst(stmt.label) < line ? 40 : 0
     elseif stmt isa GotoIfNot
-        thiscost = stmt.dest < line ? 40 : 0
+        thiscost = dst(stmt.dest) < line ? 40 : 0
     end
     return thiscost
 end
 
-function inline_worthy(body::Array{Any,1}, src::CodeInfo, sptypes::Vector{Any}, slottypes::Vector{Any},
+function inline_worthy(ir::IRCode,
                        params::OptimizationParams, union_penalties::Bool=false, cost_threshold::Integer=params.inline_cost_threshold)
     bodycost::Int = 0
-    throw_blocks = params.unoptimize_throw_blocks ? find_throw_blocks(body) : nothing
-    for line = 1:length(body)
-        stmt = body[line]
-        thiscost = statement_or_branch_cost(stmt, line, src, sptypes, slottypes, union_penalties, params, throw_blocks)
+    for line = 1:length(ir.stmts)
+        stmt = ir.stmts[line][:inst]
+        thiscost = statement_or_branch_cost(stmt, line, ir, ir.sptypes, union_penalties, params)
         bodycost = plus_saturate(bodycost, thiscost)
         bodycost > cost_threshold && return false
     end
     return true
 end
 
-function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::CodeInfo, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
-    throw_blocks = params.unoptimize_throw_blocks ? find_throw_blocks(body) : nothing
+function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::Union{CodeInfo, IRCode}, sptypes::Vector{Any}, unionpenalties::Bool, params::OptimizationParams)
     maxcost = 0
     for line = 1:length(body)
         stmt = body[line]
-        thiscost = statement_or_branch_cost(stmt, line, src, sptypes, src.slottypes, unionpenalties, params, throw_blocks)
+        thiscost = statement_or_branch_cost(stmt, line, src, sptypes,
+                                            unionpenalties, params)
         cost[line] = thiscost
         if thiscost > maxcost
             maxcost = thiscost
@@ -429,30 +792,33 @@ function statement_costs!(cost::Vector{Int}, body::Vector{Any}, src::CodeInfo, s
     return maxcost
 end
 
-function is_known_call(e::Expr, @nospecialize(func), src, sptypes::Vector{Any}, slottypes::Vector{Any} = empty_slottypes)
-    if e.head !== :call
-        return false
-    end
-    f = argextype(e.args[1], src, sptypes, slottypes)
-    return isa(f, Const) && f.val === func
+function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int})
+    return renumber_ir_elements!(body, ssachangemap, ssachangemap)
 end
 
-function renumber_ir_elements!(body::Vector{Any}, changemap::Vector{Int})
-    return renumber_ir_elements!(body, changemap, changemap)
+function cumsum_ssamap!(ssachangemap::Vector{Int})
+    any_change = false
+    rel_change = 0
+    for i = 1:length(ssachangemap)
+        val = ssachangemap[i]
+        any_change |= val ≠ 0
+        rel_change += val
+        if val == -1
+            # Keep a marker that this statement was deleted
+            ssachangemap[i] = typemin(Int)
+        else
+            ssachangemap[i] = rel_change
+        end
+    end
+    return any_change
 end
 
 function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, labelchangemap::Vector{Int})
-    for i = 2:length(labelchangemap)
-        labelchangemap[i] += labelchangemap[i - 1]
-    end
+    any_change = cumsum_ssamap!(labelchangemap)
     if ssachangemap !== labelchangemap
-        for i = 2:length(ssachangemap)
-            ssachangemap[i] += ssachangemap[i - 1]
-        end
-    end
-    if labelchangemap[end] == 0 && ssachangemap[end] == 0
-        return
+        any_change |= cumsum_ssamap!(ssachangemap)
     end
+    any_change || return
     for i = 1:length(body)
         el = body[i]
         if isa(el, GotoNode)
@@ -462,13 +828,35 @@ function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, lab
             if isa(cond, SSAValue)
                 cond = SSAValue(cond.id + ssachangemap[cond.id])
             end
-            body[i] = GotoIfNot(cond, el.dest + labelchangemap[el.dest])
+            was_deleted = labelchangemap[el.dest] == typemin(Int)
+            body[i] = was_deleted ? cond : GotoIfNot(cond, el.dest + labelchangemap[el.dest])
         elseif isa(el, ReturnNode)
-            if isdefined(el, :val) && isa(el.val, SSAValue)
-                body[i] = ReturnNode(SSAValue(el.val.id + ssachangemap[el.val.id]))
+            if isdefined(el, :val)
+                val = el.val
+                if isa(val, SSAValue)
+                    body[i] = ReturnNode(SSAValue(val.id + ssachangemap[val.id]))
+                end
             end
         elseif isa(el, SSAValue)
             body[i] = SSAValue(el.id + ssachangemap[el.id])
+        elseif isa(el, PhiNode)
+            i = 1
+            edges = el.edges
+            values = el.values
+            while i <= length(edges)
+                was_deleted = ssachangemap[edges[i]] == typemin(Int)
+                if was_deleted
+                    deleteat!(edges, i)
+                    deleteat!(values, i)
+                else
+                    edges[i] += ssachangemap[edges[i]]
+                    val = values[i]
+                    if isa(val, SSAValue)
+                        values[i] = SSAValue(val.id + ssachangemap[val.id])
+                    end
+                    i += 1
+                end
+            end
         elseif isa(el, Expr)
             if el.head === :(=) && el.args[2] isa Expr
                 el = el.args[2]::Expr
@@ -488,5 +876,3 @@ function renumber_ir_elements!(body::Vector{Any}, ssachangemap::Vector{Int}, lab
         end
     end
 end
-
-include("compiler/ssair/driver.jl")
diff --git a/base/compiler/parsing.jl b/base/compiler/parsing.jl
index 19aaf08cdde53b..8b474cf148fb29 100644
--- a/base/compiler/parsing.jl
+++ b/base/compiler/parsing.jl
@@ -3,17 +3,17 @@
 # Call Julia's builtin flisp-based parser. `offset` is 0-based offset into the
 # byte buffer or string.
 function fl_parse(text::Union{Core.SimpleVector,String},
-                  filename::String, offset, options)
+                  filename::String, lineno, offset, options)
     if text isa Core.SimpleVector
         # Will be generated by C entry points jl_parse_string etc
         text, text_len = text
     else
         text_len = sizeof(text)
     end
-    ccall(:jl_fl_parse, Any, (Ptr{UInt8}, Csize_t, Any, Csize_t, Any),
-          text, text_len, filename, offset, options)
+    ccall(:jl_fl_parse, Any, (Ptr{UInt8}, Csize_t, Any, Csize_t, Csize_t, Any),
+          text, text_len, filename, lineno, offset, options)
 end
 
-function fl_parse(text::AbstractString, filename::AbstractString, offset, options)
-    fl_parse(String(text), String(filename), offset, options)
+function fl_parse(text::AbstractString, filename::AbstractString, lineno, offset, options)
+    fl_parse(String(text), String(filename), lineno, offset, options)
 end
diff --git a/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
new file mode 100644
index 00000000000000..407b447a228a39
--- /dev/null
+++ b/base/compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl
@@ -0,0 +1,1913 @@
+baremodule EscapeAnalysis
+
+export
+    analyze_escapes,
+    getaliases,
+    isaliased,
+    has_no_escape,
+    has_arg_escape,
+    has_return_escape,
+    has_thrown_escape,
+    has_all_escape
+
+const _TOP_MOD = ccall(:jl_base_relative_to, Any, (Any,), EscapeAnalysis)::Module
+
+# imports
+import ._TOP_MOD: ==, getindex, setindex!
+# usings
+import Core:
+    MethodInstance, Const, Argument, SSAValue, PiNode, PhiNode, UpsilonNode, PhiCNode,
+    ReturnNode, GotoNode, GotoIfNot, SimpleVector, MethodMatch, CodeInstance,
+    sizeof, ifelse, arrayset, arrayref, arraysize
+import ._TOP_MOD:     # Base definitions
+    @__MODULE__, @eval, @assert, @specialize, @nospecialize, @inbounds, @inline, @noinline,
+    @label, @goto, !, !==, !=, ≠, +, -, *, ≤, <, ≥, >, &, |, <<, error, missing, copy,
+    Vector, BitSet, IdDict, IdSet, UnitRange, Csize_t, Callable, ∪, ⊆, ∩, :, ∈, ∉, =>,
+    in, length, get, first, last, haskey, keys, get!, isempty, isassigned,
+    pop!, push!, pushfirst!, empty!, delete!, max, min, enumerate, unwrap_unionall,
+    ismutabletype
+import Core.Compiler: # Core.Compiler specific definitions
+    Bottom, InferenceResult, IRCode, IR_FLAG_EFFECT_FREE,
+    isbitstype, isexpr, is_meta_expr_head, println, widenconst, argextype, singleton_type,
+    fieldcount_noerror, try_compute_field, try_compute_fieldidx, hasintersect, ⊑,
+    intrinsic_nothrow, array_builtin_common_typecheck, arrayset_typecheck,
+    setfield!_nothrow, alloc_array_ndims, stmt_effect_free, check_effect_free!
+
+include(x) = _TOP_MOD.include(@__MODULE__, x)
+if _TOP_MOD === Core.Compiler
+    include("compiler/ssair/EscapeAnalysis/disjoint_set.jl")
+else
+    include("disjoint_set.jl")
+end
+
+const AInfo = IdSet{Any}
+const LivenessSet = BitSet
+
+"""
+    x::EscapeInfo
+
+A lattice for escape information, which holds the following properties:
+- `x.Analyzed::Bool`: not formally part of the lattice, only indicates `x` has not been analyzed or not
+- `x.ReturnEscape::Bool`: indicates `x` can escape to the caller via return
+- `x.ThrownEscape::BitSet`: records SSA statement numbers where `x` can be thrown as exception:
+  * `isempty(x.ThrownEscape)`: `x` will never be thrown in this call frame (the bottom)
+  * `pc ∈ x.ThrownEscape`: `x` may be thrown at the SSA statement at `pc`
+  * `-1 ∈ x.ThrownEscape`: `x` may be thrown at arbitrary points of this call frame (the top)
+  This information will be used by `escape_exception!` to propagate potential escapes via exception.
+- `x.AliasInfo::Union{Bool,IndexableFields,IndexableElements,Unindexable}`: maintains all possible values
+  that can be aliased to fields or array elements of `x`:
+  * `x.AliasInfo === false` indicates the fields/elements of `x` aren't analyzed yet
+  * `x.AliasInfo === true` indicates the fields/elements of `x` can't be analyzed,
+    e.g. the type of `x` is not known or is not concrete and thus its fields/elements
+    can't be known precisely
+  * `x.AliasInfo::IndexableFields` records all the possible values that can be aliased to fields of object `x` with precise index information
+  * `x.AliasInfo::IndexableElements` records all the possible values that can be aliased to elements of array `x` with precise index information
+  * `x.AliasInfo::Unindexable` records all the possible values that can be aliased to fields/elements of `x` without precise index information
+- `x.Liveness::BitSet`: records SSA statement numbers where `x` should be live, e.g.
+  to be used as a call argument, to be returned to a caller, or preserved for `:foreigncall`:
+  * `isempty(x.Liveness)`: `x` is never be used in this call frame (the bottom)
+  * `0 ∈ x.Liveness` also has the special meaning that it's a call argument of the currently
+    analyzed call frame (and thus it's visible from the caller immediately).
+  * `pc ∈ x.Liveness`: `x` may be used at the SSA statement at `pc`
+  * `-1 ∈ x.Liveness`: `x` may be used at arbitrary points of this call frame (the top)
+
+There are utility constructors to create common `EscapeInfo`s, e.g.,
+- `NoEscape()`: the bottom(-like) element of this lattice, meaning it won't escape to anywhere
+- `AllEscape()`: the topmost element of this lattice, meaning it will escape to everywhere
+
+`analyze_escapes` will transition these elements from the bottom to the top,
+in the same direction as Julia's native type inference routine.
+An abstract state will be initialized with the bottom(-like) elements:
+- the call arguments are initialized as `ArgEscape()`, whose `Liveness` property includes `0`
+  to indicate that it is passed as a call argument and visible from a caller immediately
+- the other states are initialized as `NotAnalyzed()`, which is a special lattice element that
+  is slightly lower than `NoEscape`, but at the same time doesn't represent any meaning
+  other than it's not analyzed yet (thus it's not formally part of the lattice)
+"""
+struct EscapeInfo
+    Analyzed::Bool
+    ReturnEscape::Bool
+    ThrownEscape::LivenessSet
+    AliasInfo #::Union{IndexableFields,IndexableElements,Unindexable,Bool}
+    Liveness::LivenessSet
+
+    function EscapeInfo(
+        Analyzed::Bool,
+        ReturnEscape::Bool,
+        ThrownEscape::LivenessSet,
+        AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=#,
+        Liveness::LivenessSet,
+        )
+        @nospecialize AliasInfo
+        return new(
+            Analyzed,
+            ReturnEscape,
+            ThrownEscape,
+            AliasInfo,
+            Liveness,
+            )
+    end
+    function EscapeInfo(
+        x::EscapeInfo,
+        # non-concrete fields should be passed as default arguments
+        # in order to avoid allocating non-concrete `NamedTuple`s
+        AliasInfo#=::Union{IndexableFields,IndexableElements,Unindexable,Bool}=# = x.AliasInfo;
+        Analyzed::Bool = x.Analyzed,
+        ReturnEscape::Bool = x.ReturnEscape,
+        ThrownEscape::LivenessSet = x.ThrownEscape,
+        Liveness::LivenessSet = x.Liveness,
+        )
+        @nospecialize AliasInfo
+        return new(
+            Analyzed,
+            ReturnEscape,
+            ThrownEscape,
+            AliasInfo,
+            Liveness,
+            )
+    end
+end
+
+# precomputed default values in order to eliminate computations at each callsite
+
+const BOT_THROWN_ESCAPE = LivenessSet()
+# NOTE the lattice operations should try to avoid actual set computations on this top value,
+# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
+const TOP_THROWN_ESCAPE = LivenessSet(-1)
+
+const BOT_LIVENESS = LivenessSet()
+# NOTE the lattice operations should try to avoid actual set computations on this top value,
+# and e.g. LivenessSet(0:1000000) should also work without incurring excessive computations
+const TOP_LIVENESS = LivenessSet(-1:0)
+const ARG_LIVENESS = LivenessSet(0)
+
+# the constructors
+NotAnalyzed() = EscapeInfo(false, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS) # not formally part of the lattice
+NoEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, false, BOT_LIVENESS)
+ArgEscape() = EscapeInfo(true, false, BOT_THROWN_ESCAPE, true, ARG_LIVENESS)
+ReturnEscape(pc::Int) = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, LivenessSet(pc))
+AllReturnEscape() = EscapeInfo(true, true, BOT_THROWN_ESCAPE, false, TOP_LIVENESS)
+ThrownEscape(pc::Int) = EscapeInfo(true, false, LivenessSet(pc), false, BOT_LIVENESS)
+AllEscape() = EscapeInfo(true, true, TOP_THROWN_ESCAPE, true, TOP_LIVENESS)
+
+const ⊥, ⊤ = NotAnalyzed(), AllEscape()
+
+# Convenience names for some ⊑ₑ queries
+has_no_escape(x::EscapeInfo) = !x.ReturnEscape && isempty(x.ThrownEscape) && 0 ∉ x.Liveness
+has_arg_escape(x::EscapeInfo) = 0 in x.Liveness
+has_return_escape(x::EscapeInfo) = x.ReturnEscape
+has_return_escape(x::EscapeInfo, pc::Int) = x.ReturnEscape && (-1 ∈ x.Liveness || pc in x.Liveness)
+has_thrown_escape(x::EscapeInfo) = !isempty(x.ThrownEscape)
+has_thrown_escape(x::EscapeInfo, pc::Int) = -1 ∈ x.ThrownEscape  || pc in x.ThrownEscape
+has_all_escape(x::EscapeInfo) = ⊤ ⊑ₑ x
+
+# utility lattice constructors
+ignore_argescape(x::EscapeInfo) = EscapeInfo(x; Liveness=delete!(copy(x.Liveness), 0))
+ignore_thrownescapes(x::EscapeInfo) = EscapeInfo(x; ThrownEscape=BOT_THROWN_ESCAPE)
+ignore_aliasinfo(x::EscapeInfo) = EscapeInfo(x, false)
+ignore_liveness(x::EscapeInfo) = EscapeInfo(x; Liveness=BOT_LIVENESS)
+
+# AliasInfo
+struct IndexableFields
+    infos::Vector{AInfo}
+end
+struct IndexableElements
+    infos::IdDict{Int,AInfo}
+end
+struct Unindexable
+    info::AInfo
+end
+IndexableFields(nflds::Int) = IndexableFields(AInfo[AInfo() for _ in 1:nflds])
+Unindexable() = Unindexable(AInfo())
+
+merge_to_unindexable(AliasInfo::IndexableFields) = Unindexable(merge_to_unindexable(AliasInfo.infos))
+merge_to_unindexable(AliasInfo::Unindexable, AliasInfos::IndexableFields) = Unindexable(merge_to_unindexable(AliasInfo.info, AliasInfos.infos))
+merge_to_unindexable(infos::Vector{AInfo}) = merge_to_unindexable(AInfo(), infos)
+function merge_to_unindexable(info::AInfo, infos::Vector{AInfo})
+    for i = 1:length(infos)
+        info = info ∪ infos[i]
+    end
+    return info
+end
+merge_to_unindexable(AliasInfo::IndexableElements) = Unindexable(merge_to_unindexable(AliasInfo.infos))
+merge_to_unindexable(AliasInfo::Unindexable, AliasInfos::IndexableElements) = Unindexable(merge_to_unindexable(AliasInfo.info, AliasInfos.infos))
+merge_to_unindexable(infos::IdDict{Int,AInfo}) = merge_to_unindexable(AInfo(), infos)
+function merge_to_unindexable(info::AInfo, infos::IdDict{Int,AInfo})
+    for idx in keys(infos)
+        info = info ∪ infos[idx]
+    end
+    return info
+end
+
+# we need to make sure this `==` operator corresponds to lattice equality rather than object equality,
+# otherwise `propagate_changes` can't detect the convergence
+x::EscapeInfo == y::EscapeInfo = begin
+    # fast pass: better to avoid top comparison
+    x === y && return true
+    x.Analyzed === y.Analyzed || return false
+    x.ReturnEscape === y.ReturnEscape || return false
+    xt, yt = x.ThrownEscape, y.ThrownEscape
+    if xt === TOP_THROWN_ESCAPE
+        yt === TOP_THROWN_ESCAPE || return false
+    elseif yt === TOP_THROWN_ESCAPE
+        return false # x.ThrownEscape === TOP_THROWN_ESCAPE
+    else
+        xt == yt || return false
+    end
+    xa, ya = x.AliasInfo, y.AliasInfo
+    if isa(xa, Bool)
+        xa === ya || return false
+    elseif isa(xa, IndexableFields)
+        isa(ya, IndexableFields) || return false
+        xa.infos == ya.infos || return false
+    elseif isa(xa, IndexableElements)
+        isa(ya, IndexableElements) || return false
+        xa.infos == ya.infos || return false
+    else
+        xa = xa::Unindexable
+        isa(ya, Unindexable) || return false
+        xa.info == ya.info || return false
+    end
+    xl, yl = x.Liveness, y.Liveness
+    if xl === TOP_LIVENESS
+        yl === TOP_LIVENESS || return false
+    elseif yl === TOP_LIVENESS
+        return false # x.Liveness === TOP_LIVENESS
+    else
+        xl == yl || return false
+    end
+    return true
+end
+
+"""
+    x::EscapeInfo ⊑ₑ y::EscapeInfo -> Bool
+
+The non-strict partial order over [`EscapeInfo`](@ref).
+"""
+x::EscapeInfo ⊑ₑ y::EscapeInfo = begin
+    # fast pass: better to avoid top comparison
+    if y === ⊤
+        return true
+    elseif x === ⊤
+        return false # return y === ⊤
+    elseif x === ⊥
+        return true
+    elseif y === ⊥
+        return false # return x === ⊥
+    end
+    x.Analyzed ≤ y.Analyzed || return false
+    x.ReturnEscape ≤ y.ReturnEscape || return false
+    xt, yt = x.ThrownEscape, y.ThrownEscape
+    if xt === TOP_THROWN_ESCAPE
+        yt !== TOP_THROWN_ESCAPE && return false
+    elseif yt !== TOP_THROWN_ESCAPE
+        xt ⊆ yt || return false
+    end
+    xa, ya = x.AliasInfo, y.AliasInfo
+    if isa(xa, Bool)
+        xa && ya !== true && return false
+    elseif isa(xa, IndexableFields)
+        if isa(ya, IndexableFields)
+            xinfos, yinfos = xa.infos, ya.infos
+            xn, yn = length(xinfos), length(yinfos)
+            xn > yn && return false
+            for i in 1:xn
+                xinfos[i] ⊆ yinfos[i] || return false
+            end
+        elseif isa(ya, IndexableElements)
+            return false
+        elseif isa(ya, Unindexable)
+            xinfos, yinfo = xa.infos, ya.info
+            for i = length(xinfos)
+                xinfos[i] ⊆ yinfo || return false
+            end
+        else
+            ya === true || return false
+        end
+    elseif isa(xa, IndexableElements)
+        if isa(ya, IndexableElements)
+            xinfos, yinfos = xa.infos, ya.infos
+            keys(xinfos) ⊆ keys(yinfos) || return false
+            for idx in keys(xinfos)
+                xinfos[idx] ⊆ yinfos[idx] || return false
+            end
+        elseif isa(ya, IndexableFields)
+            return false
+        elseif isa(ya, Unindexable)
+            xinfos, yinfo = xa.infos, ya.info
+            for idx in keys(xinfos)
+                xinfos[idx] ⊆ yinfo || return false
+            end
+        else
+            ya === true || return false
+        end
+    else
+        xa = xa::Unindexable
+        if isa(ya, Unindexable)
+            xinfo, yinfo = xa.info, ya.info
+            xinfo ⊆ yinfo || return false
+        else
+            ya === true || return false
+        end
+    end
+    xl, yl = x.Liveness, y.Liveness
+    if xl === TOP_LIVENESS
+        yl !== TOP_LIVENESS && return false
+    elseif yl !== TOP_LIVENESS
+        xl ⊆ yl || return false
+    end
+    return true
+end
+
+"""
+    x::EscapeInfo ⊏ₑ y::EscapeInfo -> Bool
+
+The strict partial order over [`EscapeInfo`](@ref).
+This is defined as the irreflexive kernel of `⊏ₑ`.
+"""
+x::EscapeInfo ⊏ₑ y::EscapeInfo = x ⊑ₑ y && !(y ⊑ₑ x)
+
+"""
+    x::EscapeInfo ⋤ₑ y::EscapeInfo -> Bool
+
+This order could be used as a slightly more efficient version of the strict order `⊏ₑ`,
+where we can safely assume `x ⊑ₑ y` holds.
+"""
+x::EscapeInfo ⋤ₑ y::EscapeInfo = !(y ⊑ₑ x)
+
+"""
+    x::EscapeInfo ⊔ₑ y::EscapeInfo -> EscapeInfo
+
+Computes the join of `x` and `y` in the partial order defined by [`EscapeInfo`](@ref).
+"""
+x::EscapeInfo ⊔ₑ y::EscapeInfo = begin
+    # fast pass: better to avoid top join
+    if x === ⊤ || y === ⊤
+        return ⊤
+    elseif x === ⊥
+        return y
+    elseif y === ⊥
+        return x
+    end
+    xt, yt = x.ThrownEscape, y.ThrownEscape
+    if xt === TOP_THROWN_ESCAPE || yt === TOP_THROWN_ESCAPE
+        ThrownEscape = TOP_THROWN_ESCAPE
+    elseif xt === BOT_THROWN_ESCAPE
+        ThrownEscape = yt
+    elseif yt === BOT_THROWN_ESCAPE
+        ThrownEscape = xt
+    else
+        ThrownEscape = xt ∪ yt
+    end
+    AliasInfo = merge_alias_info(x.AliasInfo, y.AliasInfo)
+    xl, yl = x.Liveness, y.Liveness
+    if xl === TOP_LIVENESS || yl === TOP_LIVENESS
+        Liveness = TOP_LIVENESS
+    elseif xl === BOT_LIVENESS
+        Liveness = yl
+    elseif yl === BOT_LIVENESS
+        Liveness = xl
+    else
+        Liveness = xl ∪ yl
+    end
+    return EscapeInfo(
+        x.Analyzed | y.Analyzed,
+        x.ReturnEscape | y.ReturnEscape,
+        ThrownEscape,
+        AliasInfo,
+        Liveness,
+        )
+end
+
+function merge_alias_info(@nospecialize(xa), @nospecialize(ya))
+    if xa === true || ya === true
+        return true
+    elseif xa === false
+        return ya
+    elseif ya === false
+        return xa
+    elseif isa(xa, IndexableFields)
+        if isa(ya, IndexableFields)
+            xinfos, yinfos = xa.infos, ya.infos
+            xn, yn = length(xinfos), length(yinfos)
+            nmax, nmin = max(xn, yn), min(xn, yn)
+            infos = Vector{AInfo}(undef, nmax)
+            for i in 1:nmax
+                if i > nmin
+                    infos[i] = (xn > yn ? xinfos : yinfos)[i]
+                else
+                    infos[i] = xinfos[i] ∪ yinfos[i]
+                end
+            end
+            return IndexableFields(infos)
+        elseif isa(ya, Unindexable)
+            xinfos, yinfo = xa.infos, ya.info
+            return merge_to_unindexable(ya, xa)
+        else
+            return true # handle conflicting case conservatively
+        end
+    elseif isa(xa, IndexableElements)
+        if isa(ya, IndexableElements)
+            xinfos, yinfos = xa.infos, ya.infos
+            infos = IdDict{Int,AInfo}()
+            for idx in keys(xinfos)
+                if !haskey(yinfos, idx)
+                    infos[idx] = xinfos[idx]
+                else
+                    infos[idx] = xinfos[idx] ∪ yinfos[idx]
+                end
+            end
+            for idx in keys(yinfos)
+                haskey(xinfos, idx) && continue # unioned already
+                infos[idx] = yinfos[idx]
+            end
+            return IndexableElements(infos)
+        elseif isa(ya, Unindexable)
+            return merge_to_unindexable(ya, xa)
+        else
+            return true # handle conflicting case conservatively
+        end
+    else
+        xa = xa::Unindexable
+        if isa(ya, IndexableFields)
+            return merge_to_unindexable(xa, ya)
+        elseif isa(ya, IndexableElements)
+            return merge_to_unindexable(xa, ya)
+        else
+            ya = ya::Unindexable
+            xinfo, yinfo = xa.info, ya.info
+            info = xinfo ∪ yinfo
+            return Unindexable(info)
+        end
+    end
+end
+
+const AliasSet = IntDisjointSet{Int}
+
+const ArrayInfo = IdDict{Int,Vector{Int}}
+
+"""
+    estate::EscapeState
+
+Extended lattice that maps arguments and SSA values to escape information represented as [`EscapeInfo`](@ref).
+Escape information imposed on SSA IR element `x` can be retrieved by `estate[x]`.
+"""
+struct EscapeState
+    escapes::Vector{EscapeInfo}
+    aliasset::AliasSet
+    nargs::Int
+    arrayinfo::Union{Nothing,ArrayInfo}
+end
+function EscapeState(nargs::Int, nstmts::Int, arrayinfo::Union{Nothing,ArrayInfo})
+    escapes = EscapeInfo[
+        1 ≤ i ≤ nargs ? ArgEscape() : ⊥ for i in 1:(nargs+nstmts)]
+    aliasset = AliasSet(nargs+nstmts)
+    return EscapeState(escapes, aliasset, nargs, arrayinfo)
+end
+function getindex(estate::EscapeState, @nospecialize(x))
+    xidx = iridx(x, estate)
+    return xidx === nothing ? nothing : estate.escapes[xidx]
+end
+function setindex!(estate::EscapeState, v::EscapeInfo, @nospecialize(x))
+    xidx = iridx(x, estate)
+    if xidx !== nothing
+        estate.escapes[xidx] = v
+    end
+    return estate
+end
+
+"""
+    iridx(x, estate::EscapeState) -> xidx::Union{Int,Nothing}
+
+Tries to convert analyzable IR element `x::Union{Argument,SSAValue}` to
+its unique identifier number `xidx` that is valid in the analysis context of `estate`.
+Returns `nothing` if `x` isn't maintained by `estate` and thus unanalyzable (e.g. `x::GlobalRef`).
+
+`irval` is the inverse function of `iridx` (not formally), i.e.
+`irval(iridx(x::Union{Argument,SSAValue}, state), state) === x`.
+"""
+function iridx(@nospecialize(x), estate::EscapeState)
+    if isa(x, Argument)
+        xidx = x.n
+        @assert 1 ≤ xidx ≤ estate.nargs "invalid Argument"
+    elseif isa(x, SSAValue)
+        xidx = x.id + estate.nargs
+    else
+        return nothing
+    end
+    return xidx
+end
+
+"""
+    irval(xidx::Int, estate::EscapeState) -> x::Union{Argument,SSAValue}
+
+Converts its unique identifier number `xidx` to the original IR element `x::Union{Argument,SSAValue}`
+that is analyzable in the context of `estate`.
+
+`iridx` is the inverse function of `irval` (not formally), i.e.
+`iridx(irval(xidx, state), state) === xidx`.
+"""
+function irval(xidx::Int, estate::EscapeState)
+    x = xidx > estate.nargs ? SSAValue(xidx-estate.nargs) : Argument(xidx)
+    return x
+end
+
+function getaliases(x::Union{Argument,SSAValue}, estate::EscapeState)
+    xidx = iridx(x, estate)
+    aliases = getaliases(xidx, estate)
+    aliases === nothing && return nothing
+    return Union{Argument,SSAValue}[irval(aidx, estate) for aidx in aliases]
+end
+function getaliases(xidx::Int, estate::EscapeState)
+    aliasset = estate.aliasset
+    root = find_root!(aliasset, xidx)
+    if xidx ≠ root || aliasset.ranks[xidx] > 0
+        # the size of this alias set containing `key` is larger than 1,
+        # collect the entire alias set
+        aliases = Int[]
+        for aidx in 1:length(aliasset.parents)
+            if aliasset.parents[aidx] == root
+                push!(aliases, aidx)
+            end
+        end
+        return aliases
+    else
+        return nothing
+    end
+end
+
+isaliased(x::Union{Argument,SSAValue}, y::Union{Argument,SSAValue}, estate::EscapeState) =
+    isaliased(iridx(x, estate), iridx(y, estate), estate)
+isaliased(xidx::Int, yidx::Int, estate::EscapeState) =
+    in_same_set(estate.aliasset, xidx, yidx)
+
+struct ArgEscapeInfo
+    EscapeBits::UInt8
+end
+function ArgEscapeInfo(x::EscapeInfo)
+    x === ⊤ && return ArgEscapeInfo(ARG_ALL_ESCAPE)
+    EscapeBits = 0x00
+    has_return_escape(x) && (EscapeBits |= ARG_RETURN_ESCAPE)
+    has_thrown_escape(x) && (EscapeBits |= ARG_THROWN_ESCAPE)
+    return ArgEscapeInfo(EscapeBits)
+end
+
+const ARG_ALL_ESCAPE    = 0x01 << 0
+const ARG_RETURN_ESCAPE = 0x01 << 1
+const ARG_THROWN_ESCAPE = 0x01 << 2
+
+has_no_escape(x::ArgEscapeInfo)     = !has_all_escape(x) && !has_return_escape(x) && !has_thrown_escape(x)
+has_all_escape(x::ArgEscapeInfo)    = x.EscapeBits & ARG_ALL_ESCAPE    ≠ 0
+has_return_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_RETURN_ESCAPE ≠ 0
+has_thrown_escape(x::ArgEscapeInfo) = x.EscapeBits & ARG_THROWN_ESCAPE ≠ 0
+
+struct ArgAliasing
+    aidx::Int
+    bidx::Int
+end
+
+struct ArgEscapeCache
+    argescapes::Vector{ArgEscapeInfo}
+    argaliases::Vector{ArgAliasing}
+end
+
+function ArgEscapeCache(estate::EscapeState)
+    nargs = estate.nargs
+    argescapes = Vector{ArgEscapeInfo}(undef, nargs)
+    argaliases = ArgAliasing[]
+    for i = 1:nargs
+        info = estate.escapes[i]
+        @assert info.AliasInfo === true
+        argescapes[i] = ArgEscapeInfo(info)
+        for j = (i+1):nargs
+            if isaliased(i, j, estate)
+                push!(argaliases, ArgAliasing(i, j))
+            end
+        end
+    end
+    return ArgEscapeCache(argescapes, argaliases)
+end
+
+"""
+    is_ipo_profitable(ir::IRCode, nargs::Int) -> Bool
+
+Heuristically checks if there is any profitability to run the escape analysis on `ir`
+and generate IPO escape information cache. Specifically, this function examines
+if any call argument is "interesting" in terms of their escapability.
+"""
+function is_ipo_profitable(ir::IRCode, nargs::Int)
+    for i = 1:nargs
+        t = unwrap_unionall(widenconst(ir.argtypes[i]))
+        t <: IO && return false # bail out IO-related functions
+        is_ipo_profitable_type(t) && return true
+    end
+    return false
+end
+function is_ipo_profitable_type(@nospecialize t)
+    if isa(t, Union)
+        return is_ipo_profitable_type(t.a) && is_ipo_profitable_type(t.b)
+    end
+    (t === String || t === Symbol || t === Module || t === SimpleVector) && return false
+    return ismutabletype(t)
+end
+
+abstract type Change end
+struct EscapeChange <: Change
+    xidx::Int
+    xinfo::EscapeInfo
+end
+struct AliasChange <: Change
+    xidx::Int
+    yidx::Int
+end
+struct ArgAliasChange <: Change
+    xidx::Int
+    yidx::Int
+end
+struct LivenessChange <: Change
+    xidx::Int
+    livepc::Int
+end
+const Changes = Vector{Change}
+
+struct AnalysisState{T<:Callable}
+    ir::IRCode
+    estate::EscapeState
+    changes::Changes
+    get_escape_cache::T
+end
+
+function getinst(ir::IRCode, idx::Int)
+    nstmts = length(ir.stmts)
+    if idx ≤ nstmts
+        return ir.stmts[idx]
+    else
+        return ir.new_nodes.stmts[idx - nstmts]
+    end
+end
+
+"""
+    analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::Callable)
+        -> estate::EscapeState
+
+Analyzes escape information in `ir`:
+- `nargs`: the number of actual arguments of the analyzed call
+- `call_resolved`: if interprocedural calls are already resolved by `ssa_inlining_pass!`
+- `get_escape_cache(::Union{InferenceResult,MethodInstance}) -> Union{Nothing,ArgEscapeCache}`:
+  retrieves cached argument escape information
+"""
+function analyze_escapes(ir::IRCode, nargs::Int, call_resolved::Bool, get_escape_cache::T) where T<:Callable
+    stmts = ir.stmts
+    nstmts = length(stmts) + length(ir.new_nodes.stmts)
+
+    tryregions, arrayinfo, callinfo = compute_frameinfo(ir, call_resolved)
+    estate = EscapeState(nargs, nstmts, arrayinfo)
+    changes = Changes() # keeps changes that happen at current statement
+    astate = AnalysisState(ir, estate, changes, get_escape_cache)
+
+    local debug_itr_counter = 0
+    while true
+        local anyupdate = false
+
+        for pc in nstmts:-1:1
+            stmt = getinst(ir, pc)[:inst]
+
+            # collect escape information
+            if isa(stmt, Expr)
+                head = stmt.head
+                if head === :call
+                    if callinfo !== nothing
+                        escape_call!(astate, pc, stmt.args, callinfo)
+                    else
+                        escape_call!(astate, pc, stmt.args)
+                    end
+                elseif head === :invoke
+                    escape_invoke!(astate, pc, stmt.args)
+                elseif head === :new || head === :splatnew
+                    escape_new!(astate, pc, stmt.args)
+                elseif head === :(=)
+                    lhs, rhs = stmt.args
+                    if isa(lhs, GlobalRef) # global store
+                        add_escape_change!(astate, rhs, ⊤)
+                    else
+                        unexpected_assignment!(ir, pc)
+                    end
+                elseif head === :foreigncall
+                    escape_foreigncall!(astate, pc, stmt.args)
+                elseif head === :throw_undef_if_not # XXX when is this expression inserted ?
+                    add_escape_change!(astate, stmt.args[1], ThrownEscape(pc))
+                elseif is_meta_expr_head(head)
+                    # meta expressions doesn't account for any usages
+                    continue
+                elseif head === :enter || head === :leave || head === :the_exception || head === :pop_exception
+                    # ignore these expressions since escapes via exceptions are handled by `escape_exception!`
+                    # `escape_exception!` conservatively propagates `AllEscape` anyway,
+                    # and so escape information imposed on `:the_exception` isn't computed
+                    continue
+                elseif head === :static_parameter ||  # this exists statically, not interested in its escape
+                       head === :copyast ||           # XXX can this account for some escapes?
+                       head === :undefcheck ||        # XXX can this account for some escapes?
+                       head === :isdefined ||         # just returns `Bool`, nothing accounts for any escapes
+                       head === :gc_preserve_begin || # `GC.@preserve` expressions themselves won't be used anywhere
+                       head === :gc_preserve_end      # `GC.@preserve` expressions themselves won't be used anywhere
+                    continue
+                else
+                    add_conservative_changes!(astate, pc, stmt.args)
+                end
+            elseif isa(stmt, ReturnNode)
+                if isdefined(stmt, :val)
+                    add_escape_change!(astate, stmt.val, ReturnEscape(pc))
+                end
+            elseif isa(stmt, PhiNode)
+                escape_edges!(astate, pc, stmt.values)
+            elseif isa(stmt, PiNode)
+                escape_val_ifdefined!(astate, pc, stmt)
+            elseif isa(stmt, PhiCNode)
+                escape_edges!(astate, pc, stmt.values)
+            elseif isa(stmt, UpsilonNode)
+                escape_val_ifdefined!(astate, pc, stmt)
+            elseif isa(stmt, GlobalRef) # global load
+                add_escape_change!(astate, SSAValue(pc), ⊤)
+            elseif isa(stmt, SSAValue)
+                escape_val!(astate, pc, stmt)
+            elseif isa(stmt, Argument)
+                escape_val!(astate, pc, stmt)
+            else # otherwise `stmt` can be GotoNode, GotoIfNot, and inlined values etc.
+                continue
+            end
+
+            isempty(changes) && continue
+
+            anyupdate |= propagate_changes!(estate, changes)
+
+            empty!(changes)
+        end
+
+        tryregions !== nothing && escape_exception!(astate, tryregions)
+
+        debug_itr_counter += 1
+
+        anyupdate || break
+    end
+
+    # if debug_itr_counter > 2
+    #     println("[EA] excessive iteration count found ", debug_itr_counter, " (", singleton_type(ir.argtypes[1]), ")")
+    # end
+
+    return estate
+end
+
+"""
+    compute_frameinfo(ir::IRCode, call_resolved::Bool) -> (tryregions, arrayinfo, callinfo)
+
+A preparatory linear scan before the escape analysis on `ir` to find:
+- `tryregions::Union{Nothing,Vector{UnitRange{Int}}}`: regions in which potential `throw`s can be caught (used by `escape_exception!`)
+- `arrayinfo::Union{Nothing,IdDict{Int,Vector{Int}}}`: array allocations whose dimensions are known precisely (with some very simple local analysis)
+- `callinfo::`: when `!call_resolved`, `compute_frameinfo` additionally returns `callinfo::Vector{Union{MethodInstance,InferenceResult}}`,
+  which contains information about statically resolved callsites.
+  The inliner will use essentially equivalent interprocedural information to inline callees as well as resolve static callsites,
+  this additional information won't be required when analyzing post-inlining IR.
+
+!!! note
+    This array dimension analysis to compute `arrayinfo` is very local and doesn't account
+    for flow-sensitivity nor complex aliasing.
+    Ideally this dimension analysis should be done as a part of type inference that
+    propagates array dimenstions in a flow sensitive way.
+"""
+function compute_frameinfo(ir::IRCode, call_resolved::Bool)
+    nstmts, nnewnodes = length(ir.stmts), length(ir.new_nodes.stmts)
+    tryregions, arrayinfo = nothing, nothing
+    if !call_resolved
+        callinfo = Vector{Any}(undef, nstmts+nnewnodes)
+    else
+        callinfo = nothing
+    end
+    for idx in 1:nstmts+nnewnodes
+        inst = getinst(ir, idx)
+        stmt = inst[:inst]
+        if !call_resolved
+            # TODO don't call `check_effect_free!` in the inlinear
+            check_effect_free!(ir, idx, stmt, inst[:type])
+        end
+        if callinfo !== nothing && isexpr(stmt, :call)
+            callinfo[idx] = resolve_call(ir, stmt, inst[:info])
+        elseif isexpr(stmt, :enter)
+            @assert idx ≤ nstmts "try/catch inside new_nodes unsupported"
+            tryregions === nothing && (tryregions = UnitRange{Int}[])
+            leave_block = stmt.args[1]::Int
+            leave_pc = first(ir.cfg.blocks[leave_block].stmts)
+            push!(tryregions, idx:leave_pc)
+        elseif isexpr(stmt, :foreigncall)
+            args = stmt.args
+            name = args[1]
+            nn = normalize(name)
+            isa(nn, Symbol) || @goto next_stmt
+            ndims = alloc_array_ndims(nn)
+            ndims === nothing && @goto next_stmt
+            if ndims ≠ 0
+                length(args) ≥ ndims+6 || @goto next_stmt
+                dims = Int[]
+                for i in 1:ndims
+                    dim = argextype(args[i+6], ir)
+                    isa(dim, Const) || @goto next_stmt
+                    dim = dim.val
+                    isa(dim, Int) || @goto next_stmt
+                    push!(dims, dim)
+                end
+            else
+                length(args) ≥ 7 || @goto next_stmt
+                dims = argextype(args[7], ir)
+                if isa(dims, Const)
+                    dims = dims.val
+                    isa(dims, Tuple{Vararg{Int}}) || @goto next_stmt
+                    dims = collect(Int, dims)
+                else
+                    dims === Tuple{} || @goto next_stmt
+                    dims = Int[]
+                end
+            end
+            if arrayinfo === nothing
+                arrayinfo = ArrayInfo()
+            end
+            arrayinfo[idx] = dims
+        elseif arrayinfo !== nothing
+            # TODO this super limited alias analysis is able to handle only very simple cases
+            # this should be replaced with a proper forward dimension analysis
+            if isa(stmt, PhiNode)
+                values = stmt.values
+                local dims = nothing
+                for i = 1:length(values)
+                    if isassigned(values, i)
+                        val = values[i]
+                        if isa(val, SSAValue) && haskey(arrayinfo, val.id)
+                            if dims === nothing
+                                dims = arrayinfo[val.id]
+                                continue
+                            elseif dims == arrayinfo[val.id]
+                                continue
+                            end
+                        end
+                    end
+                    @goto next_stmt
+                end
+                if dims !== nothing
+                    arrayinfo[idx] = dims
+                end
+            elseif isa(stmt, PiNode)
+                if isdefined(stmt, :val)
+                    val = stmt.val
+                    if isa(val, SSAValue) && haskey(arrayinfo, val.id)
+                        arrayinfo[idx] = arrayinfo[val.id]
+                    end
+                end
+            end
+        end
+        @label next_stmt
+    end
+    return tryregions, arrayinfo, callinfo
+end
+
+# define resolve_call
+if _TOP_MOD === Core.Compiler
+    include("compiler/ssair/EscapeAnalysis/interprocedural.jl")
+else
+    include("interprocedural.jl")
+end
+
+# propagate changes, and check convergence
+function propagate_changes!(estate::EscapeState, changes::Changes)
+    local anychanged = false
+    for change in changes
+        if isa(change, EscapeChange)
+            anychanged |= propagate_escape_change!(estate, change)
+        elseif isa(change, LivenessChange)
+            anychanged |= propagate_liveness_change!(estate, change)
+        else
+            change = change::AliasChange
+            anychanged |= propagate_alias_change!(estate, change)
+        end
+    end
+    return anychanged
+end
+
+@inline propagate_escape_change!(estate::EscapeState, change::EscapeChange) =
+    propagate_escape_change!(⊔ₑ, estate, change)
+
+# allows this to work as lattice join as well as lattice meet
+@inline function propagate_escape_change!(@specialize(op),
+    estate::EscapeState, change::EscapeChange)
+    (; xidx, xinfo) = change
+    anychanged = _propagate_escape_change!(op, estate, xidx, xinfo)
+    # COMBAK is there a more efficient method of escape information equalization on aliasset?
+    aliases = getaliases(xidx, estate)
+    if aliases !== nothing
+        for aidx in aliases
+            anychanged |= _propagate_escape_change!(op, estate, aidx, xinfo)
+        end
+    end
+    return anychanged
+end
+
+@inline function _propagate_escape_change!(@specialize(op),
+    estate::EscapeState, xidx::Int, info::EscapeInfo)
+    old = estate.escapes[xidx]
+    new = op(old, info)
+    if old ≠ new
+        estate.escapes[xidx] = new
+        return true
+    end
+    return false
+end
+
+# propagate Liveness changes separately in order to avoid constructing too many LivenessSet
+@inline function propagate_liveness_change!(estate::EscapeState, change::LivenessChange)
+    (; xidx, livepc) = change
+    info = estate.escapes[xidx]
+    Liveness = info.Liveness
+    Liveness === TOP_LIVENESS && return false
+    livepc in Liveness && return false
+    if Liveness === BOT_LIVENESS || Liveness === ARG_LIVENESS
+        # if this Liveness is a constant, we shouldn't modify it and propagate this change as a new EscapeInfo
+        Liveness = copy(Liveness)
+        push!(Liveness, livepc)
+        estate.escapes[xidx] = EscapeInfo(info; Liveness)
+        return true
+    else
+        # directly modify Liveness property in order to avoid excessive copies
+        push!(Liveness, livepc)
+        return true
+    end
+end
+
+@inline function propagate_alias_change!(estate::EscapeState, change::AliasChange)
+    anychange = false
+    (; xidx, yidx) = change
+    aliasset = estate.aliasset
+    xroot = find_root!(aliasset, xidx)
+    yroot = find_root!(aliasset, yidx)
+    if xroot ≠ yroot
+        union!(aliasset, xroot, yroot)
+        return true
+    end
+    return false
+end
+
+function add_escape_change!(astate::AnalysisState, @nospecialize(x), xinfo::EscapeInfo,
+    force::Bool = false)
+    xinfo === ⊥ && return nothing # performance optimization
+    xidx = iridx(x, astate.estate)
+    if xidx !== nothing
+        if force || !isbitstype(widenconst(argextype(x, astate.ir)))
+            push!(astate.changes, EscapeChange(xidx, xinfo))
+        end
+    end
+    return nothing
+end
+
+function add_liveness_change!(astate::AnalysisState, @nospecialize(x), livepc::Int)
+    xidx = iridx(x, astate.estate)
+    if xidx !== nothing
+        if !isbitstype(widenconst(argextype(x, astate.ir)))
+            push!(astate.changes, LivenessChange(xidx, livepc))
+        end
+    end
+    return nothing
+end
+
+function add_alias_change!(astate::AnalysisState, @nospecialize(x), @nospecialize(y))
+    if isa(x, GlobalRef)
+        return add_escape_change!(astate, y, ⊤)
+    elseif isa(y, GlobalRef)
+        return add_escape_change!(astate, x, ⊤)
+    end
+    estate = astate.estate
+    xidx = iridx(x, estate)
+    yidx = iridx(y, estate)
+    if xidx !== nothing && yidx !== nothing
+        if !isaliased(xidx, yidx, astate.estate)
+            pushfirst!(astate.changes, AliasChange(xidx, yidx))
+        end
+        # add new escape change here so that it's shared among the expanded `aliasset` in `propagate_escape_change!`
+        xinfo = estate.escapes[xidx]
+        yinfo = estate.escapes[yidx]
+        add_escape_change!(astate, x, xinfo ⊔ₑ yinfo, #=force=#true)
+    end
+    return nothing
+end
+
+struct LocalDef
+    idx::Int
+end
+struct LocalUse
+    idx::Int
+end
+
+function add_alias_escapes!(astate::AnalysisState, @nospecialize(v), ainfo::AInfo)
+    estate = astate.estate
+    for x in ainfo
+        isa(x, LocalUse) || continue # ignore def
+        x = SSAValue(x.idx) # obviously this won't be true once we implement interprocedural AliasInfo
+        add_alias_change!(astate, v, x)
+    end
+end
+
+function add_thrown_escapes!(astate::AnalysisState, pc::Int, args::Vector{Any},
+    first_idx::Int = 1, last_idx::Int = length(args))
+    info = ThrownEscape(pc)
+    for i in first_idx:last_idx
+        add_escape_change!(astate, args[i], info)
+    end
+end
+
+function add_liveness_changes!(astate::AnalysisState, pc::Int, args::Vector{Any},
+    first_idx::Int = 1, last_idx::Int = length(args))
+    for i in first_idx:last_idx
+        arg = args[i]
+        add_liveness_change!(astate, arg, pc)
+    end
+end
+
+function add_fallback_changes!(astate::AnalysisState, pc::Int, args::Vector{Any},
+    first_idx::Int = 1, last_idx::Int = length(args))
+    info = ThrownEscape(pc)
+    for i in first_idx:last_idx
+        arg = args[i]
+        add_escape_change!(astate, arg, info)
+        add_liveness_change!(astate, arg, pc)
+    end
+end
+
+function add_conservative_changes!(astate::AnalysisState, pc::Int, args::Vector{Any},
+    first_idx::Int = 1, last_idx::Int = length(args))
+    for i in first_idx:last_idx
+        add_escape_change!(astate, args[i], ⊤)
+    end
+    add_escape_change!(astate, SSAValue(pc), ⊤) # it may return GlobalRef etc.
+    return nothing
+end
+
+function escape_edges!(astate::AnalysisState, pc::Int, edges::Vector{Any})
+    ret = SSAValue(pc)
+    for i in 1:length(edges)
+        if isassigned(edges, i)
+            v = edges[i]
+            add_alias_change!(astate, ret, v)
+        end
+    end
+end
+
+function escape_val_ifdefined!(astate::AnalysisState, pc::Int, x)
+    if isdefined(x, :val)
+        escape_val!(astate, pc, x.val)
+    end
+end
+
+function escape_val!(astate::AnalysisState, pc::Int, @nospecialize(val))
+    ret = SSAValue(pc)
+    add_alias_change!(astate, ret, val)
+end
+
+function escape_unanalyzable_obj!(astate::AnalysisState, @nospecialize(obj), objinfo::EscapeInfo)
+    objinfo = EscapeInfo(objinfo, true)
+    add_escape_change!(astate, obj, objinfo)
+    return objinfo
+end
+
+@noinline function unexpected_assignment!(ir::IRCode, pc::Int)
+    @eval Main (ir = $ir; pc = $pc)
+    error("unexpected assignment found: inspect `Main.pc` and `Main.pc`")
+end
+
+is_effect_free(ir::IRCode, pc::Int) = getinst(ir, pc)[:flag] & IR_FLAG_EFFECT_FREE ≠ 0
+
+# NOTE if we don't maintain the alias set that is separated from the lattice state, we can do
+# something like below: it essentially incorporates forward escape propagation in our default
+# backward propagation, and leads to inefficient convergence that requires more iterations
+# # lhs = rhs: propagate escape information of `rhs` to `lhs`
+# function escape_alias!(astate::AnalysisState, @nospecialize(lhs), @nospecialize(rhs))
+#     if isa(rhs, SSAValue) || isa(rhs, Argument)
+#         vinfo = astate.estate[rhs]
+#     else
+#         return
+#     end
+#     add_escape_change!(astate, lhs, vinfo)
+# end
+
+"""
+    escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{Int}})
+
+Propagates escapes via exceptions that can happen in `tryregions`.
+
+Naively it seems enough to propagate escape information imposed on `:the_exception` object,
+but actually there are several other ways to access to the exception object such as
+`Base.current_exceptions` and manual catch of `rethrow`n object.
+For example, escape analysis needs to account for potential escape of the allocated object
+via `rethrow_escape!()` call in the example below:
+```julia
+const Gx = Ref{Any}()
+@noinline function rethrow_escape!()
+    try
+        rethrow()
+    catch err
+        Gx[] = err
+    end
+end
+unsafeget(x) = isassigned(x) ? x[] : throw(x)
+
+code_escapes() do
+    r = Ref{String}()
+    try
+        t = unsafeget(r)
+    catch err
+        t = typeof(err)  # `err` (which `r` may alias to) doesn't escape here
+        rethrow_escape!() # `r` can escape here
+    end
+    return t
+end
+```
+
+As indicated by the above example, it requires a global analysis in addition to a base escape
+analysis to reason about all possible escapes via existing exception interfaces correctly.
+For now we conservatively always propagate `AllEscape` to all potentially thrown objects,
+since such an additional analysis might not be worthwhile to do given that exception handlings
+and error paths usually don't need to be very performance sensitive, and optimizations of
+error paths might be very ineffective anyway since they are sometimes "unoptimized"
+intentionally for latency reasons.
+"""
+function escape_exception!(astate::AnalysisState, tryregions::Vector{UnitRange{Int}})
+    estate = astate.estate
+    # NOTE if `:the_exception` is the only way to access the exception, we can do:
+    # exc = SSAValue(pc)
+    # excinfo = estate[exc]
+    excinfo = ⊤
+    escapes = estate.escapes
+    for i in 1:length(escapes)
+        x = escapes[i]
+        xt = x.ThrownEscape
+        xt === TOP_THROWN_ESCAPE && @goto propagate_exception_escape # fast pass
+        for pc in xt
+            for region in tryregions
+                pc in region && @goto propagate_exception_escape # early break because of AllEscape
+            end
+        end
+        continue
+        @label propagate_exception_escape
+        xval = irval(i, estate)
+        add_escape_change!(astate, xval, excinfo)
+    end
+end
+
+# escape statically-resolved call, i.e. `Expr(:invoke, ::MethodInstance, ...)`
+escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any}) =
+    escape_invoke!(astate, pc, args, first(args)::MethodInstance, 2)
+
+function escape_invoke!(astate::AnalysisState, pc::Int, args::Vector{Any},
+    linfo::Linfo, first_idx::Int, last_idx::Int = length(args))
+    if isa(linfo, InferenceResult)
+        cache = astate.get_escape_cache(linfo)
+        linfo = linfo.linfo
+    else
+        cache = astate.get_escape_cache(linfo)
+    end
+    if cache === nothing
+        return add_conservative_changes!(astate, pc, args, 2)
+    else
+        cache = cache::ArgEscapeCache
+    end
+    ret = SSAValue(pc)
+    retinfo = astate.estate[ret] # escape information imposed on the call statement
+    method = linfo.def::Method
+    nargs = Int(method.nargs)
+    for (i, argidx) in enumerate(first_idx:last_idx)
+        arg = args[argidx]
+        if i > nargs
+            # handle isva signature
+            # COMBAK will this be invalid once we take alias information into account?
+            i = nargs
+        end
+        arginfo = cache.argescapes[i]
+        info = from_interprocedural(arginfo, pc)
+        if has_return_escape(arginfo)
+            # if this argument can be "returned", in addition to propagating
+            # the escape information imposed on this call argument within the callee,
+            # we should also account for possible aliasing of this argument and the returned value
+            add_escape_change!(astate, arg, info)
+            add_alias_change!(astate, ret, arg)
+        else
+            # if this is simply passed as the call argument, we can just propagate
+            # the escape information imposed on this call argument within the callee
+            add_escape_change!(astate, arg, info)
+        end
+    end
+    for (; aidx, bidx) in cache.argaliases
+        add_alias_change!(astate, args[aidx-(first_idx-1)], args[bidx-(first_idx-1)])
+    end
+    # we should disable the alias analysis on this newly introduced object
+    add_escape_change!(astate, ret, EscapeInfo(retinfo, true))
+end
+
+"""
+    from_interprocedural(arginfo::ArgEscapeInfo, pc::Int) -> x::EscapeInfo
+
+Reinterprets the escape information imposed on the call argument which is cached as `arginfo`
+in the context of the caller frame, where `pc` is the SSA statement number of the return value.
+"""
+function from_interprocedural(arginfo::ArgEscapeInfo, pc::Int)
+    has_all_escape(arginfo) && return ⊤
+
+    ThrownEscape = has_thrown_escape(arginfo) ? LivenessSet(pc) : BOT_THROWN_ESCAPE
+
+    return EscapeInfo(
+        #=Analyzed=#true, #=ReturnEscape=#false, ThrownEscape,
+        # FIXME implement interprocedural memory effect-analysis
+        # currently, this essentially disables the entire field analysis
+        # it might be okay from the SROA point of view, since we can't remove the allocation
+        # as far as it's passed to a callee anyway, but still we may want some field analysis
+        # for e.g. stack allocation or some other IPO optimizations
+        #=AliasInfo=#true, #=Liveness=#LivenessSet(pc))
+end
+
+# escape every argument `(args[6:length(args[3])])` and the name `args[1]`
+# TODO: we can apply a similar strategy like builtin calls to specialize some foreigncalls
+function escape_foreigncall!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    nargs = length(args)
+    if nargs < 6
+        # invalid foreigncall, just escape everything
+        add_conservative_changes!(astate, pc, args)
+        return
+    end
+    argtypes = args[3]::SimpleVector
+    nargs = length(argtypes)
+    name = args[1]
+    nn = normalize(name)
+    if isa(nn, Symbol)
+        boundserror_ninds = array_resize_info(nn)
+        if boundserror_ninds !== nothing
+            boundserror, ninds = boundserror_ninds
+            escape_array_resize!(boundserror, ninds, astate, pc, args)
+            return
+        end
+        if is_array_copy(nn)
+            escape_array_copy!(astate, pc, args)
+            return
+        elseif is_array_isassigned(nn)
+            escape_array_isassigned!(astate, pc, args)
+            return
+        end
+        # if nn === :jl_gc_add_finalizer_th
+        #     # TODO add `FinalizerEscape` ?
+        # end
+    end
+    # NOTE array allocations might have been proven as nothrow (https://github.com/JuliaLang/julia/pull/43565)
+    nothrow = is_effect_free(astate.ir, pc)
+    name_info = nothrow ? ⊥ : ThrownEscape(pc)
+    add_escape_change!(astate, name, name_info)
+    add_liveness_change!(astate, name, pc)
+    for i = 1:nargs
+        # we should escape this argument if it is directly called,
+        # otherwise just impose ThrownEscape if not nothrow
+        if argtypes[i] === Any
+            arg_info = ⊤
+        else
+            arg_info = nothrow ? ⊥ : ThrownEscape(pc)
+        end
+        add_escape_change!(astate, args[5+i], arg_info)
+        add_liveness_change!(astate, args[5+i], pc)
+    end
+    for i = (5+nargs):length(args)
+        arg = args[i]
+        add_escape_change!(astate, arg, ⊥)
+        add_liveness_change!(astate, arg, pc)
+    end
+end
+
+normalize(@nospecialize x) = isa(x, QuoteNode) ? x.value : x
+
+function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any}, callinfo::Vector{Any})
+    info = callinfo[pc]
+    if isa(info, Bool)
+        info && return # known to be no escape
+        # now cascade to the builtin handling
+        escape_call!(astate, pc, args)
+        return
+    elseif isa(info, CallInfo)
+        for linfo in info.linfos
+            escape_invoke!(astate, pc, args, linfo, 1)
+        end
+        # accounts for a potential escape via MethodError
+        info.nothrow || add_thrown_escapes!(astate, pc, args)
+        return
+    else
+        @assert info === missing
+        # if this call couldn't be analyzed, escape it conservatively
+        add_conservative_changes!(astate, pc, args)
+    end
+end
+
+function escape_call!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    ir = astate.ir
+    ft = argextype(first(args), ir, ir.sptypes, ir.argtypes)
+    f = singleton_type(ft)
+    if isa(f, Core.IntrinsicFunction)
+        # XXX somehow `:call` expression can creep in here, ideally we should be able to do:
+        # argtypes = Any[argextype(args[i], astate.ir) for i = 2:length(args)]
+        argtypes = Any[]
+        for i = 2:length(args)
+            arg = args[i]
+            push!(argtypes, isexpr(arg, :call) ? Any : argextype(arg, ir))
+        end
+        if intrinsic_nothrow(f, argtypes)
+            add_liveness_changes!(astate, pc, args, 2)
+        else
+            add_fallback_changes!(astate, pc, args, 2)
+        end
+        return # TODO accounts for pointer operations?
+    end
+    result = escape_builtin!(f, astate, pc, args)
+    if result === missing
+        # if this call hasn't been handled by any of pre-defined handlers, escape it conservatively
+        add_conservative_changes!(astate, pc, args)
+        return
+    elseif result === true
+        add_liveness_changes!(astate, pc, args, 2)
+        return # ThrownEscape is already checked
+    else
+        # we escape statements with the `ThrownEscape` property using the effect-freeness
+        # computed by `stmt_effect_free` invoked within inlining
+        # TODO throwness ≠ "effect-free-ness"
+        if is_effect_free(astate.ir, pc)
+            add_liveness_changes!(astate, pc, args, 2)
+        else
+            add_fallback_changes!(astate, pc, args, 2)
+        end
+        return
+    end
+end
+
+escape_builtin!(@nospecialize(f), _...) = return missing
+
+# safe builtins
+escape_builtin!(::typeof(isa), _...) = return false
+escape_builtin!(::typeof(typeof), _...) = return false
+escape_builtin!(::typeof(sizeof), _...) = return false
+escape_builtin!(::typeof(===), _...) = return false
+# not really safe, but `ThrownEscape` will be imposed later
+escape_builtin!(::typeof(isdefined), _...) = return false
+escape_builtin!(::typeof(throw), _...) = return false
+
+function escape_builtin!(::typeof(ifelse), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) == 4 || return false
+    f, cond, th, el = args
+    ret = SSAValue(pc)
+    condt = argextype(cond, astate.ir)
+    if isa(condt, Const) && (cond = condt.val; isa(cond, Bool))
+        if cond
+            add_alias_change!(astate, th, ret)
+        else
+            add_alias_change!(astate, el, ret)
+        end
+    else
+        add_alias_change!(astate, th, ret)
+        add_alias_change!(astate, el, ret)
+    end
+    return false
+end
+
+function escape_builtin!(::typeof(typeassert), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) == 3 || return false
+    f, obj, typ = args
+    ret = SSAValue(pc)
+    add_alias_change!(astate, ret, obj)
+    return false
+end
+
+function escape_new!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    obj = SSAValue(pc)
+    objinfo = astate.estate[obj]
+    AliasInfo = objinfo.AliasInfo
+    nargs = length(args)
+    if isa(AliasInfo, Bool)
+        AliasInfo && @goto conservative_propagation
+        # AliasInfo of this object hasn't been analyzed yet: set AliasInfo now
+        typ = widenconst(argextype(obj, astate.ir))
+        nflds = fieldcount_noerror(typ)
+        if nflds === nothing
+            AliasInfo = Unindexable()
+            @goto escape_unindexable_def
+        else
+            AliasInfo = IndexableFields(nflds)
+            @goto escape_indexable_def
+        end
+    elseif isa(AliasInfo, IndexableFields)
+        @label escape_indexable_def
+        # fields are known precisely: propagate escape information imposed on recorded possibilities to the exact field values
+        infos = AliasInfo.infos
+        nf = length(infos)
+        objinfo′ = ignore_aliasinfo(objinfo)
+        for i in 2:nargs
+            i-1 > nf && break # may happen when e.g. ϕ-node merges values with different types
+            arg = args[i]
+            add_alias_escapes!(astate, arg, infos[i-1])
+            push!(infos[i-1], LocalDef(pc))
+            # propagate the escape information of this object ignoring field information
+            add_escape_change!(astate, arg, objinfo′)
+            add_liveness_change!(astate, arg, pc)
+        end
+        add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
+    elseif isa(AliasInfo, Unindexable)
+        @label escape_unindexable_def
+        # fields are known partially: propagate escape information imposed on recorded possibilities to all fields values
+        info = AliasInfo.info
+        objinfo′ = ignore_aliasinfo(objinfo)
+        for i in 2:nargs
+            arg = args[i]
+            add_alias_escapes!(astate, arg, info)
+            push!(info, LocalDef(pc))
+            # propagate the escape information of this object ignoring field information
+            add_escape_change!(astate, arg, objinfo′)
+            add_liveness_change!(astate, arg, pc)
+        end
+        add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
+    else
+        # this object has been used as array, but it is allocated as struct here (i.e. should throw)
+        # update obj's field information and just handle this case conservatively
+        objinfo = escape_unanalyzable_obj!(astate, obj, objinfo)
+        @label conservative_propagation
+        # the fields couldn't be analyzed precisely: propagate the entire escape information
+        # of this object to all its fields as the most conservative propagation
+        for i in 2:nargs
+            arg = args[i]
+            add_escape_change!(astate, arg, objinfo)
+            add_liveness_change!(astate, arg, pc)
+        end
+    end
+    if !is_effect_free(astate.ir, pc)
+        add_thrown_escapes!(astate, pc, args)
+    end
+end
+
+function escape_builtin!(::typeof(tuple), astate::AnalysisState, pc::Int, args::Vector{Any})
+    escape_new!(astate, pc, args)
+    return false
+end
+
+function analyze_fields(ir::IRCode, @nospecialize(typ), @nospecialize(fld))
+    nflds = fieldcount_noerror(typ)
+    if nflds === nothing
+        return Unindexable(), 0
+    end
+    if isa(typ, DataType)
+        fldval = try_compute_field(ir, fld)
+        fidx = try_compute_fieldidx(typ, fldval)
+    else
+        fidx = nothing
+    end
+    if fidx === nothing
+        return Unindexable(), 0
+    end
+    return IndexableFields(nflds), fidx
+end
+
+function reanalyze_fields(ir::IRCode, AliasInfo::IndexableFields, @nospecialize(typ), @nospecialize(fld))
+    nflds = fieldcount_noerror(typ)
+    if nflds === nothing
+        return merge_to_unindexable(AliasInfo), 0
+    end
+    if isa(typ, DataType)
+        fldval = try_compute_field(ir, fld)
+        fidx = try_compute_fieldidx(typ, fldval)
+    else
+        fidx = nothing
+    end
+    if fidx === nothing
+        return merge_to_unindexable(AliasInfo), 0
+    end
+    infos = AliasInfo.infos
+    ninfos = length(infos)
+    if nflds > ninfos
+        for _ in 1:(nflds-ninfos)
+            push!(infos, AInfo())
+        end
+    end
+    return AliasInfo, fidx
+end
+
+function escape_builtin!(::typeof(getfield), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) ≥ 3 || return false
+    ir, estate = astate.ir, astate.estate
+    obj = args[2]
+    typ = widenconst(argextype(obj, ir))
+    if hasintersect(typ, Module) # global load
+        add_escape_change!(astate, SSAValue(pc), ⊤)
+    end
+    if isa(obj, SSAValue) || isa(obj, Argument)
+        objinfo = estate[obj]
+    else
+        return false
+    end
+    AliasInfo = objinfo.AliasInfo
+    if isa(AliasInfo, Bool)
+        AliasInfo && @goto conservative_propagation
+        # AliasInfo of this object hasn't been analyzed yet: set AliasInfo now
+        AliasInfo, fidx = analyze_fields(ir, typ, args[3])
+        if isa(AliasInfo, IndexableFields)
+            @goto record_indexable_use
+        else
+            @goto record_unindexable_use
+        end
+    elseif isa(AliasInfo, IndexableFields)
+        AliasInfo, fidx = reanalyze_fields(ir, AliasInfo, typ, args[3])
+        isa(AliasInfo, Unindexable) && @goto record_unindexable_use
+        @label record_indexable_use
+        push!(AliasInfo.infos[fidx], LocalUse(pc))
+        add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
+    elseif isa(AliasInfo, Unindexable)
+        @label record_unindexable_use
+        push!(AliasInfo.info, LocalUse(pc))
+        add_escape_change!(astate, obj, EscapeInfo(objinfo, AliasInfo)) # update with new AliasInfo
+    else
+        # this object has been used as array, but it is used as struct here (i.e. should throw)
+        # update obj's field information and just handle this case conservatively
+        objinfo = escape_unanalyzable_obj!(astate, obj, objinfo)
+        @label conservative_propagation
+        # at the extreme case, a field of `obj` may point to `obj` itself
+        # so add the alias change here as the most conservative propagation
+        add_alias_change!(astate, obj, SSAValue(pc))
+    end
+    return false
+end
+
+function escape_builtin!(::typeof(setfield!), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) ≥ 4 || return false
+    ir, estate = astate.ir, astate.estate
+    obj = args[2]
+    val = args[4]
+    if isa(obj, SSAValue) || isa(obj, Argument)
+        objinfo = estate[obj]
+    else
+        # unanalyzable object (e.g. obj::GlobalRef): escape field value conservatively
+        add_escape_change!(astate, val, ⊤)
+        @goto add_thrown_escapes
+    end
+    AliasInfo = objinfo.AliasInfo
+    if isa(AliasInfo, Bool)
+        AliasInfo && @goto conservative_propagation
+        # AliasInfo of this object hasn't been analyzed yet: set AliasInfo now
+        typ = widenconst(argextype(obj, ir))
+        AliasInfo, fidx = analyze_fields(ir, typ, args[3])
+        if isa(AliasInfo, IndexableFields)
+            @goto escape_indexable_def
+        else
+            @goto escape_unindexable_def
+        end
+    elseif isa(AliasInfo, IndexableFields)
+        typ = widenconst(argextype(obj, ir))
+        AliasInfo, fidx = reanalyze_fields(ir, AliasInfo, typ, args[3])
+        isa(AliasInfo, Unindexable) && @goto escape_unindexable_def
+        @label escape_indexable_def
+        add_alias_escapes!(astate, val, AliasInfo.infos[fidx])
+        push!(AliasInfo.infos[fidx], LocalDef(pc))
+        objinfo = EscapeInfo(objinfo, AliasInfo)
+        add_escape_change!(astate, obj, objinfo) # update with new AliasInfo
+        # propagate the escape information of this object ignoring field information
+        add_escape_change!(astate, val, ignore_aliasinfo(objinfo))
+    elseif isa(AliasInfo, Unindexable)
+        info = AliasInfo.info
+        @label escape_unindexable_def
+        add_alias_escapes!(astate, val, AliasInfo.info)
+        push!(AliasInfo.info, LocalDef(pc))
+        objinfo = EscapeInfo(objinfo, AliasInfo)
+        add_escape_change!(astate, obj, objinfo) # update with new AliasInfo
+        # propagate the escape information of this object ignoring field information
+        add_escape_change!(astate, val, ignore_aliasinfo(objinfo))
+    else
+        # this object has been used as array, but it is used as struct here (i.e. should throw)
+        # update obj's field information and just handle this case conservatively
+        objinfo = escape_unanalyzable_obj!(astate, obj, objinfo)
+        @label conservative_propagation
+        # the field couldn't be analyzed: alias this object to the value being assigned
+        # as the most conservative propagation (as required for ArgAliasing)
+        add_alias_change!(astate, val, obj)
+    end
+    # also propagate escape information imposed on the return value of this `setfield!`
+    ssainfo = estate[SSAValue(pc)]
+    add_escape_change!(astate, val, ssainfo)
+    # compute the throwness of this setfield! call here since builtin_nothrow doesn't account for that
+    @label add_thrown_escapes
+    argtypes = Any[]
+    for i = 2:length(args)
+        push!(argtypes, argextype(args[i], ir))
+    end
+    setfield!_nothrow(argtypes) || add_thrown_escapes!(astate, pc, args, 2)
+    return true
+end
+
+function escape_builtin!(::typeof(arrayref), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) ≥ 4 || return false
+    # check potential thrown escapes from this arrayref call
+    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
+    boundcheckt = argtypes[1]
+    aryt = argtypes[2]
+    if !array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 3)
+        add_thrown_escapes!(astate, pc, args, 2)
+    end
+    ary = args[3]
+    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
+    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
+    # we don't track precise index information about this array and thus don't know what values
+    # can be referenced here: directly propagate the escape information imposed on the return
+    # value of this `arrayref` call to the array itself as the most conservative propagation
+    # but also with updated index information
+    estate = astate.estate
+    if isa(ary, SSAValue) || isa(ary, Argument)
+        aryinfo = estate[ary]
+    else
+        return true
+    end
+    AliasInfo = aryinfo.AliasInfo
+    if isa(AliasInfo, Bool)
+        AliasInfo && @goto conservative_propagation
+        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
+        idx = array_nd_index(astate, ary, args[4:end])
+        if isa(idx, Int)
+            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
+            @goto record_indexable_use
+        end
+        AliasInfo = Unindexable()
+        @goto record_unindexable_use
+    elseif isa(AliasInfo, IndexableElements)
+        idx = array_nd_index(astate, ary, args[4:end])
+        if !isa(idx, Int)
+            AliasInfo = merge_to_unindexable(AliasInfo)
+            @goto record_unindexable_use
+        end
+        @label record_indexable_use
+        info = get!(()->AInfo(), AliasInfo.infos, idx)
+        push!(info, LocalUse(pc))
+        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
+    elseif isa(AliasInfo, Unindexable)
+        @label record_unindexable_use
+        push!(AliasInfo.info, LocalUse(pc))
+        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
+    else
+        # this object has been used as struct, but it is used as array here (thus should throw)
+        # update ary's element information and just handle this case conservatively
+        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
+        @label conservative_propagation
+        # at the extreme case, an element of `ary` may point to `ary` itself
+        # so add the alias change here as the most conservative propagation
+        add_alias_change!(astate, ary, SSAValue(pc))
+    end
+    return true
+end
+
+function escape_builtin!(::typeof(arrayset), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) ≥ 5 || return false
+    # check potential escapes from this arrayset call
+    # NOTE here we essentially only need to account for TypeError, assuming that
+    # UndefRefError or BoundsError don't capture any of the arguments here
+    argtypes = Any[argextype(args[i], astate.ir) for i in 2:length(args)]
+    boundcheckt = argtypes[1]
+    aryt = argtypes[2]
+    valt = argtypes[3]
+    if !(array_builtin_common_typecheck(boundcheckt, aryt, argtypes, 4) &&
+         arrayset_typecheck(aryt, valt))
+        add_thrown_escapes!(astate, pc, args, 2)
+    end
+    ary = args[3]
+    val = args[4]
+    inbounds = isa(boundcheckt, Const) && !boundcheckt.val::Bool
+    inbounds || add_escape_change!(astate, ary, ThrownEscape(pc))
+    # we don't track precise index information about this array and won't record what value
+    # is being assigned here: directly propagate the escape information of this array to
+    # the value being assigned as the most conservative propagation
+    estate = astate.estate
+    if isa(ary, SSAValue) || isa(ary, Argument)
+        aryinfo = estate[ary]
+    else
+        # unanalyzable object (e.g. obj::GlobalRef): escape field value conservatively
+        add_escape_change!(astate, val, ⊤)
+        return true
+    end
+    AliasInfo = aryinfo.AliasInfo
+    if isa(AliasInfo, Bool)
+        AliasInfo && @goto conservative_propagation
+        # AliasInfo of this array hasn't been analyzed yet: set AliasInfo now
+        idx = array_nd_index(astate, ary, args[5:end])
+        if isa(idx, Int)
+            AliasInfo = IndexableElements(IdDict{Int,AInfo}())
+            @goto escape_indexable_def
+        end
+        AliasInfo = Unindexable()
+        @goto escape_unindexable_def
+    elseif isa(AliasInfo, IndexableElements)
+        idx = array_nd_index(astate, ary, args[5:end])
+        if !isa(idx, Int)
+            AliasInfo = merge_to_unindexable(AliasInfo)
+            @goto escape_unindexable_def
+        end
+        @label escape_indexable_def
+        info = get!(()->AInfo(), AliasInfo.infos, idx)
+        add_alias_escapes!(astate, val, info)
+        push!(info, LocalDef(pc))
+        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
+        # propagate the escape information of this array ignoring elements information
+        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
+    elseif isa(AliasInfo, Unindexable)
+        @label escape_unindexable_def
+        add_alias_escapes!(astate, val, AliasInfo.info)
+        push!(AliasInfo.info, LocalDef(pc))
+        add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo)) # update with new AliasInfo
+        # propagate the escape information of this array ignoring elements information
+        add_escape_change!(astate, val, ignore_aliasinfo(aryinfo))
+    else
+        # this object has been used as struct, but it is used as array here (thus should throw)
+        # update ary's element information and just handle this case conservatively
+        aryinfo = escape_unanalyzable_obj!(astate, ary, aryinfo)
+        @label conservative_propagation
+        add_alias_change!(astate, val, ary)
+    end
+    # also propagate escape information imposed on the return value of this `arrayset`
+    ssainfo = estate[SSAValue(pc)]
+    add_escape_change!(astate, ary, ssainfo)
+    return true
+end
+
+# NOTE this function models and thus should be synced with the implementation of:
+# size_t array_nd_index(jl_array_t *a, jl_value_t **args, size_t nidxs, ...)
+function array_nd_index(astate::AnalysisState, @nospecialize(ary), args::Vector{Any}, nidxs::Int = length(args))
+    isa(ary, SSAValue) || return nothing
+    aryid = ary.id
+    arrayinfo = astate.estate.arrayinfo
+    isa(arrayinfo, ArrayInfo) || return nothing
+    haskey(arrayinfo, aryid) || return nothing
+    dims = arrayinfo[aryid]
+    local i = 0
+    local k, stride = 0, 1
+    local nd = length(dims)
+    while k < nidxs
+        arg = args[k+1]
+        argval = argextype(arg, astate.ir)
+        isa(argval, Const) || return nothing
+        argval = argval.val
+        isa(argval, Int) || return nothing
+        ii = argval - 1
+        i += ii * stride
+        d = k ≥ nd ? 1 : dims[k+1]
+        k < nidxs - 1 && ii ≥ d && return nothing # BoundsError
+        stride *= d
+        k += 1
+    end
+    while k < nd
+        stride *= dims[k+1]
+        k += 1
+    end
+    i ≥ stride && return nothing # BoundsError
+    return i
+end
+
+function escape_builtin!(::typeof(arraysize), astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) == 3 || return false
+    ary = args[2]
+    dim = args[3]
+    if !arraysize_typecheck(ary, dim, astate.ir)
+        add_escape_change!(astate, ary, ThrownEscape(pc))
+        add_escape_change!(astate, dim, ThrownEscape(pc))
+    end
+    # NOTE we may still see "arraysize: dimension out of range", but it doesn't capture anything
+    return true
+end
+
+function arraysize_typecheck(@nospecialize(ary), @nospecialize(dim), ir::IRCode)
+    aryt = argextype(ary, ir)
+    aryt ⊑ Array || return false
+    dimt = argextype(dim, ir)
+    dimt ⊑ Int || return false
+    return true
+end
+
+# returns nothing if this isn't array resizing operation,
+# otherwise returns true if it can throw BoundsError and false if not
+function array_resize_info(name::Symbol)
+    if name === :jl_array_grow_beg || name === :jl_array_grow_end
+        return false, 1
+    elseif name === :jl_array_del_beg || name === :jl_array_del_end
+        return true, 1
+    elseif name === :jl_array_grow_at || name === :jl_array_del_at
+        return true, 2
+    else
+        return nothing
+    end
+end
+
+# NOTE may potentially throw "cannot resize array with shared data" error,
+# but just ignore it since it doesn't capture anything
+function escape_array_resize!(boundserror::Bool, ninds::Int,
+    astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) ≥ 6+ninds || return add_fallback_changes!(astate, pc, args)
+    ary = args[6]
+    aryt = argextype(ary, astate.ir)
+    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
+    for i in 1:ninds
+        ind = args[i+6]
+        indt = argextype(ind, astate.ir)
+        indt ⊑ Integer || return add_fallback_changes!(astate, pc, args)
+    end
+    if boundserror
+        # this array resizing can potentially throw `BoundsError`, impose it now
+        add_escape_change!(astate, ary, ThrownEscape(pc))
+    end
+    # give up indexing analysis whenever we see array resizing
+    # (since we track array dimensions only globally)
+    mark_unindexable!(astate, ary)
+    add_liveness_changes!(astate, pc, args, 6)
+end
+
+function mark_unindexable!(astate::AnalysisState, @nospecialize(ary))
+    isa(ary, SSAValue) || return
+    aryinfo = astate.estate[ary]
+    AliasInfo = aryinfo.AliasInfo
+    isa(AliasInfo, IndexableElements) || return
+    AliasInfo = merge_to_unindexable(AliasInfo)
+    add_escape_change!(astate, ary, EscapeInfo(aryinfo, AliasInfo))
+end
+
+is_array_copy(name::Symbol) = name === :jl_array_copy
+
+# FIXME this implementation is very conservative, improve the accuracy and solve broken test cases
+function escape_array_copy!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    length(args) ≥ 6 || return add_fallback_changes!(astate, pc, args)
+    ary = args[6]
+    aryt = argextype(ary, astate.ir)
+    aryt ⊑ Array || return add_fallback_changes!(astate, pc, args)
+    if isa(ary, SSAValue) || isa(ary, Argument)
+        newary = SSAValue(pc)
+        aryinfo = astate.estate[ary]
+        newaryinfo = astate.estate[newary]
+        add_escape_change!(astate, newary, aryinfo)
+        add_escape_change!(astate, ary, newaryinfo)
+    end
+    add_liveness_changes!(astate, pc, args, 6)
+end
+
+is_array_isassigned(name::Symbol) = name === :jl_array_isassigned
+
+function escape_array_isassigned!(astate::AnalysisState, pc::Int, args::Vector{Any})
+    if !array_isassigned_nothrow(args, astate.ir)
+        add_thrown_escapes!(astate, pc, args)
+    end
+    add_liveness_changes!(astate, pc, args, 6)
+end
+
+function array_isassigned_nothrow(args::Vector{Any}, src::IRCode)
+    # if !validate_foreigncall_args(args,
+    #     :jl_array_isassigned, Cint, svec(Any,Csize_t), 0, :ccall)
+    #     return false
+    # end
+    length(args) ≥ 7 || return false
+    arytype = argextype(args[6], src)
+    arytype ⊑ Array || return false
+    idxtype = argextype(args[7], src)
+    idxtype ⊑ Csize_t || return false
+    return true
+end
+
+# # COMBAK do we want to enable this (and also backport this to Base for array allocations?)
+# import Core.Compiler: Cint, svec
+# function validate_foreigncall_args(args::Vector{Any},
+#     name::Symbol, @nospecialize(rt), argtypes::SimpleVector, nreq::Int, convension::Symbol)
+#     length(args) ≥ 5 || return false
+#     normalize(args[1]) === name || return false
+#     args[2] === rt || return false
+#     args[3] === argtypes || return false
+#     args[4] === vararg || return false
+#     normalize(args[5]) === convension || return false
+#     return true
+# end
+
+if isdefined(Core, :ImmutableArray)
+
+import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
+
+escape_builtin!(::typeof(arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
+    is_safe_immutable_array_op(Array, astate, args)
+escape_builtin!(::typeof(mutating_arrayfreeze), astate::AnalysisState, pc::Int, args::Vector{Any}) =
+    is_safe_immutable_array_op(Array, astate, args)
+escape_builtin!(::typeof(arraythaw), astate::AnalysisState, pc::Int, args::Vector{Any}) =
+    is_safe_immutable_array_op(ImmutableArray, astate, args)
+function is_safe_immutable_array_op(@nospecialize(arytype), astate::AnalysisState, args::Vector{Any})
+    length(args) == 2 || return false
+    argextype(args[2], astate.ir) ⊑ arytype || return false
+    return true
+end
+
+end # if isdefined(Core, :ImmutableArray)
+
+if _TOP_MOD !== Core.Compiler
+    # NOTE define fancy package utilities when developing EA as an external package
+    include("EAUtils.jl")
+    using .EAUtils
+    export code_escapes, @code_escapes, __clear_cache!
+end
+
+end # baremodule EscapeAnalysis
diff --git a/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl b/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl
new file mode 100644
index 00000000000000..915bc214d7c3ce
--- /dev/null
+++ b/base/compiler/ssair/EscapeAnalysis/disjoint_set.jl
@@ -0,0 +1,143 @@
+# A disjoint set implementation adapted from
+# https://github.com/JuliaCollections/DataStructures.jl/blob/f57330a3b46f779b261e6c07f199c88936f28839/src/disjoint_set.jl
+# under the MIT license: https://github.com/JuliaCollections/DataStructures.jl/blob/master/License.md
+
+# imports
+import ._TOP_MOD:
+    length,
+    eltype,
+    union!,
+    push!
+# usings
+import ._TOP_MOD:
+    OneTo, collect, zero, zeros, one, typemax
+
+# Disjoint-Set
+
+############################################################
+#
+#   A forest of disjoint sets of integers
+#
+#   Since each element is an integer, we can use arrays
+#   instead of dictionary (for efficiency)
+#
+#   Disjoint sets over other key types can be implemented
+#   based on an IntDisjointSet through a map from the key
+#   to an integer index
+#
+############################################################
+
+_intdisjointset_bounds_err_msg(T) = "the maximum number of elements in IntDisjointSet{$T} is $(typemax(T))"
+
+"""
+    IntDisjointSet{T<:Integer}(n::Integer)
+
+A forest of disjoint sets of integers, which is a data structure
+(also called a union–find data structure or merge–find set)
+that tracks a set of elements partitioned
+into a number of disjoint (non-overlapping) subsets.
+"""
+mutable struct IntDisjointSet{T<:Integer}
+    parents::Vector{T}
+    ranks::Vector{T}
+    ngroups::T
+end
+
+IntDisjointSet(n::T) where {T<:Integer} = IntDisjointSet{T}(collect(OneTo(n)), zeros(T, n), n)
+IntDisjointSet{T}(n::Integer) where {T<:Integer} = IntDisjointSet{T}(collect(OneTo(T(n))), zeros(T, T(n)), T(n))
+length(s::IntDisjointSet) = length(s.parents)
+
+"""
+    num_groups(s::IntDisjointSet)
+
+Get a number of groups.
+"""
+num_groups(s::IntDisjointSet) = s.ngroups
+eltype(::Type{IntDisjointSet{T}}) where {T<:Integer} = T
+
+# find the root element of the subset that contains x
+# path compression is implemented here
+function find_root_impl!(parents::Vector{T}, x::Integer) where {T<:Integer}
+    p = parents[x]
+    @inbounds if parents[p] != p
+        parents[x] = p = _find_root_impl!(parents, p)
+    end
+    return p
+end
+
+# unsafe version of the above
+function _find_root_impl!(parents::Vector{T}, x::Integer) where {T<:Integer}
+    @inbounds p = parents[x]
+    @inbounds if parents[p] != p
+        parents[x] = p = _find_root_impl!(parents, p)
+    end
+    return p
+end
+
+"""
+    find_root!(s::IntDisjointSet{T}, x::T)
+
+Find the root element of the subset that contains an member `x`.
+Path compression happens here.
+"""
+find_root!(s::IntDisjointSet{T}, x::T) where {T<:Integer} = find_root_impl!(s.parents, x)
+
+"""
+    in_same_set(s::IntDisjointSet{T}, x::T, y::T)
+
+Returns `true` if `x` and `y` belong to the same subset in `s`, and `false` otherwise.
+"""
+in_same_set(s::IntDisjointSet{T}, x::T, y::T) where {T<:Integer} = find_root!(s, x) == find_root!(s, y)
+
+"""
+    union!(s::IntDisjointSet{T}, x::T, y::T)
+
+Merge the subset containing `x` and that containing `y` into one
+and return the root of the new set.
+"""
+function union!(s::IntDisjointSet{T}, x::T, y::T) where {T<:Integer}
+    parents = s.parents
+    xroot = find_root_impl!(parents, x)
+    yroot = find_root_impl!(parents, y)
+    return xroot != yroot ? root_union!(s, xroot, yroot) : xroot
+end
+
+"""
+    root_union!(s::IntDisjointSet{T}, x::T, y::T)
+
+Form a new set that is the union of the two sets whose root elements are
+`x` and `y` and return the root of the new set.
+Assume `x ≠ y` (unsafe).
+"""
+function root_union!(s::IntDisjointSet{T}, x::T, y::T) where {T<:Integer}
+    parents = s.parents
+    rks = s.ranks
+    @inbounds xrank = rks[x]
+    @inbounds yrank = rks[y]
+
+    if xrank < yrank
+        x, y = y, x
+    elseif xrank == yrank
+        rks[x] += one(T)
+    end
+    @inbounds parents[y] = x
+    s.ngroups -= one(T)
+    return x
+end
+
+"""
+    push!(s::IntDisjointSet{T})
+
+Make a new subset with an automatically chosen new element `x`.
+Returns the new element. Throw an `ArgumentError` if the
+capacity of the set would be exceeded.
+"""
+function push!(s::IntDisjointSet{T}) where {T<:Integer}
+    l = length(s)
+    l < typemax(T) || throw(ArgumentError(_intdisjointset_bounds_err_msg(T)))
+    x = l + one(T)
+    push!(s.parents, x)
+    push!(s.ranks, zero(T))
+    s.ngroups += one(T)
+    return x
+end
diff --git a/base/compiler/ssair/EscapeAnalysis/interprocedural.jl b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
new file mode 100644
index 00000000000000..5d75db990e6f4b
--- /dev/null
+++ b/base/compiler/ssair/EscapeAnalysis/interprocedural.jl
@@ -0,0 +1,151 @@
+# TODO this file contains many duplications with the inlining analysis code, factor them out
+
+import Core.Compiler:
+    MethodInstance, InferenceResult, Signature, ConstPropResult, ConcreteResult,
+    MethodResultPure, MethodMatchInfo, UnionSplitInfo, ConstCallInfo, InvokeCallInfo,
+    call_sig, argtypes_to_type, is_builtin, is_return_type, istopfunction, validate_sparams,
+    specialize_method, invoke_rewrite
+
+const Linfo = Union{MethodInstance,InferenceResult}
+struct CallInfo
+    linfos::Vector{Linfo}
+    nothrow::Bool
+end
+
+function resolve_call(ir::IRCode, stmt::Expr, @nospecialize(info))
+    sig = call_sig(ir, stmt)
+    if sig === nothing
+        return missing
+    end
+    # TODO handle _apply_iterate
+    if is_builtin(sig) && sig.f !== invoke
+        return false
+    end
+    # handling corresponding to late_inline_special_case!
+    (; f, argtypes) = sig
+    if length(argtypes) == 3 && istopfunction(f, :!==)
+        return true
+    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
+        return true
+    elseif f === TypeVar && 2 ≤ length(argtypes) ≤ 4 && (argtypes[2] ⊑ Symbol)
+        return true
+    elseif f === UnionAll && length(argtypes) == 3 && (argtypes[2] ⊑ TypeVar)
+        return true
+    elseif is_return_type(f)
+        return true
+    end
+    if info isa MethodResultPure
+        return true
+    elseif info === false
+        return missing
+    end
+    # TODO handle OpaqueClosureCallInfo
+    if sig.f === invoke
+        isa(info, InvokeCallInfo) || return missing
+        return analyze_invoke_call(sig, info)
+    elseif isa(info, ConstCallInfo)
+        return analyze_const_call(sig, info)
+    elseif isa(info, MethodMatchInfo)
+        infos = MethodMatchInfo[info]
+    elseif isa(info, UnionSplitInfo)
+        infos = info.matches
+    else # isa(info, ReturnTypeCallInfo), etc.
+        return missing
+    end
+    return analyze_call(sig, infos)
+end
+
+function analyze_invoke_call(sig::Signature, info::InvokeCallInfo)
+    match = info.match
+    if !match.fully_covers
+        # TODO: We could union split out the signature check and continue on
+        return missing
+    end
+    result = info.result
+    if isa(result, ConstPropResult)
+        return CallInfo(Linfo[result.result], true)
+    else
+        argtypes = invoke_rewrite(sig.argtypes)
+        mi = analyze_match(match, length(argtypes))
+        mi === nothing && return missing
+        return CallInfo(Linfo[mi], true)
+    end
+end
+
+function analyze_const_call(sig::Signature, cinfo::ConstCallInfo)
+    linfos = Linfo[]
+    (; call, results) = cinfo
+    infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
+    local nothrow = true # required to account for potential escape via MethodError
+    local j = 0
+    for i in 1:length(infos)
+        meth = infos[i].results
+        nothrow &= !meth.ambig
+        nmatch = Core.Compiler.length(meth)
+        if nmatch == 0 # No applicable methods
+            # mark this call may potentially throw, and the try next union split
+            nothrow = false
+            continue
+        end
+        for i = 1:nmatch
+            j += 1
+            result = results[j]
+            match = Core.Compiler.getindex(meth, i)
+            if result === nothing
+                mi = analyze_match(match, length(sig.argtypes))
+                mi === nothing && return missing
+                push!(linfos, mi)
+            elseif isa(result, ConcreteResult)
+                # TODO we may want to feedback information that this call always throws if !isdefined(result, :result)
+                push!(linfos, result.mi)
+            elseif isa(result, ConstPropResult)
+                push!(linfos, result.result)
+            end
+            nothrow &= match.fully_covers
+        end
+    end
+    return CallInfo(linfos, nothrow)
+end
+
+function analyze_call(sig::Signature, infos::Vector{MethodMatchInfo})
+    linfos = Linfo[]
+    local nothrow = true # required to account for potential escape via MethodError
+    for i in 1:length(infos)
+        meth = infos[i].results
+        nothrow &= !meth.ambig
+        nmatch = Core.Compiler.length(meth)
+        if nmatch == 0 # No applicable methods
+            # mark this call may potentially throw, and the try next union split
+            nothrow = false
+            continue
+        end
+        for i = 1:nmatch
+            match = Core.Compiler.getindex(meth, i)
+            mi = analyze_match(match, length(sig.argtypes))
+            mi === nothing && return missing
+            push!(linfos, mi)
+            nothrow &= match.fully_covers
+        end
+    end
+    return CallInfo(linfos, nothrow)
+end
+
+function analyze_match(match::MethodMatch, npassedargs::Int)
+    method = match.method
+    na = Int(method.nargs)
+    if na != npassedargs && !(na > 0 && method.isva)
+        # we have a method match only because an earlier
+        # inference step shortened our call args list, even
+        # though we have too many arguments to actually
+        # call this function
+        return nothing
+    end
+
+    # Bail out if any static parameters are left as TypeVar
+    # COMBAK is this needed for escape analysis?
+    validate_sparams(match.sparams) || return nothing
+
+    # See if there exists a specialization for this method signature
+    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
+    return mi
+end
diff --git a/base/compiler/ssair/basicblock.jl b/base/compiler/ssair/basicblock.jl
new file mode 100644
index 00000000000000..427aae707e6645
--- /dev/null
+++ b/base/compiler/ssair/basicblock.jl
@@ -0,0 +1,32 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+Like UnitRange{Int}, but can handle the `last` field, being temporarily
+< first (this can happen during compacting)
+"""
+struct StmtRange <: AbstractUnitRange{Int}
+    start::Int
+    stop::Int
+end
+
+first(r::StmtRange) = r.start
+last(r::StmtRange) = r.stop
+iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1)
+
+StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range))
+
+struct BasicBlock
+    stmts::StmtRange
+    preds::Vector{Int}
+    succs::Vector{Int}
+end
+
+function BasicBlock(stmts::StmtRange)
+    return BasicBlock(stmts, Int[], Int[])
+end
+
+function BasicBlock(old_bb, stmts)
+    return BasicBlock(stmts, old_bb.preds, old_bb.succs)
+end
+
+copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs))
diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl
index f9b407f9ddb3e5..fd49a7e118eb76 100644
--- a/base/compiler/ssair/domtree.jl
+++ b/base/compiler/ssair/domtree.jl
@@ -1,63 +1,573 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# This file implements the Semi-NCA (SNCA) dominator tree construction
+# described in Georgiadis' PhD thesis [LG05], which itself is a simplification
+# of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
+# the algorithm choice in LLVM and seems to be a sweet spot in implementation
+# simplicity and efficiency.
+#
+# This file also implements an extension of SNCA that supports updating the
+# dominator tree with insertion and deletion of edges in the control flow
+# graph, described in [GI16] as Dynamic SNCA. DSNCA was chosen over DBS, a
+# different algorithm which achieves the best overall performance in [GI16],
+# because it is simpler to understand and implement, performs well with edge
+# deletions, and is of similar performance overall.
+#
+# SNCA works by first computing semidominators, then computing immediate
+# dominators from them. The semidominator of a node is the node with minimum
+# preorder number such that there is a semidominator path from it to the node.
+# A semidominator path is a path in which the preorder numbers of all nodes not
+# at the endpoints are greater than the preorder number of the last node.
+# Intuitively, the semidominator approximates the immediate dominator of a node
+# by taking the path (in the CFG) that gets as close to the root as possible
+# while avoiding ancestors of the node in the DFS tree.
+#
+# In computing the semidominators, SNCA performs "path compression" whenever a
+# node has a nontrivial semidominator (i.e. a semidominator that is not just
+# its parent in the DFS tree). Path compression propagates the "label" of a
+# node, which represents a possible semidominator with associated semidominator
+# path passing through that node.
+#
+# For example, path compression will be performed for the following CFG, where
+# the edge not in the DFS tree is marked with asterisks. Note that nodes are
+# labeled with their preorder numbers, and all edges point downward.
+#
+#     1
+#     |\
+#     | \
+#     |  4
+#     |  |
+#     2  5
+#     |  |
+#     |  6
+#     | *
+#     |*
+#     3
+#
+# There is a nontrivial semidominator path from 1 to 3, passing through 4, 5,
+# and 6. Stepping through the whole algorithm on paper with an example like
+# this is very helpful for understanding how it works.
+#
+# DSNCA runs the whole algorithm from scratch if the DFS tree is invalidated by
+# the insertion or deletion, but otherwise recomputes a subset of the
+# semidominators (all immediate dominators then need to be recomputed).
+#
+# [LG05]  Linear-Time Algorithms for Dominators and Related Problems
+#         Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
+#         ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
+#
+# [LT79]  A fast algorithm for finding dominators in a flowgraph
+#         Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
+#         http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
+#
+# [GI16]  An Experimental Study of Dynamic Dominators
+#         Loukas Georgiadis, Giuseppe F. Italiano, Luigi Laura, Federico
+#         Santaroni, April 2016
+#         https://arxiv.org/abs/1604.02711
+
+# We could make these real structs, but probably not worth the extra
+# overhead. Still, give them names for documentary purposes.
+const BBNumber = Int
+const PreNumber = Int
+const PostNumber = Int
+
+struct DFSTree
+    # These map between BB number and pre- or postorder numbers
+    to_pre::Vector{PreNumber}
+    from_pre::Vector{BBNumber}
+    to_post::Vector{PostNumber}
+    from_post::Vector{BBNumber}
+
+    # Records parent relationships in the DFS tree
+    # (preorder number -> preorder number)
+    # Storing it this way saves a few lookups in the snca_compress! algorithm
+    to_parent_pre::Vector{PreNumber}
+end
+
+function DFSTree(n_blocks::Int)
+    return DFSTree(zeros(PreNumber, n_blocks),
+                   Vector{BBNumber}(undef, n_blocks),
+                   zeros(PostNumber, n_blocks),
+                   Vector{BBNumber}(undef, n_blocks),
+                   zeros(PreNumber, n_blocks))
+end
+
+copy(D::DFSTree) = DFSTree(copy(D.to_pre),
+                           copy(D.from_pre),
+                           copy(D.to_post),
+                           copy(D.from_post),
+                           copy(D.to_parent_pre))
+
+function copy!(dst::DFSTree, src::DFSTree)
+    copy!(dst.to_pre, src.to_pre)
+    copy!(dst.from_pre, src.from_pre)
+    copy!(dst.to_post, src.to_post)
+    copy!(dst.from_post, src.from_post)
+    copy!(dst.to_parent_pre, src.to_parent_pre)
+    return dst
+end
+
+length(D::DFSTree) = length(D.from_pre)
+
+function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
+    copy!(D, DFSTree(length(blocks)))
+    to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
+    pre_num = 1
+    post_num = 1
+    while !isempty(to_visit)
+        # Because we want the postorder number as well as the preorder number,
+        # we don't pop the current node from the stack until we're moving up
+        # the tree
+        (current_node_bb, parent_pre, pushed_children) = to_visit[end]
+
+        if pushed_children
+            # Going up the DFS tree, so all we need to do is record the
+            # postorder number, then move on
+            D.to_post[current_node_bb] = post_num
+            D.from_post[post_num] = current_node_bb
+            post_num += 1
+            pop!(to_visit)
+
+        elseif D.to_pre[current_node_bb] != 0
+            # Node has already been visited, move on
+            pop!(to_visit)
+            continue
+        else
+            # Going down the DFS tree
+
+            # Record preorder number
+            D.to_pre[current_node_bb] = pre_num
+            D.from_pre[pre_num] = current_node_bb
+            D.to_parent_pre[pre_num] = parent_pre
+
+            # Record that children (will) have been pushed
+            to_visit[end] = (current_node_bb, parent_pre, true)
+
+            # Push children to the stack
+            for succ_bb in blocks[current_node_bb].succs
+                push!(to_visit, (succ_bb, pre_num, false))
+            end
+
+            pre_num += 1
+        end
+    end
+
+    # If all blocks are reachable, this is a no-op, otherwise, we shrink these
+    # arrays.
+    resize!(D.from_pre, pre_num - 1)
+    resize!(D.from_post, post_num - 1) # should be same size as pre_num - 1
+    resize!(D.to_parent_pre, pre_num - 1)
+
+    return D
+end
+
+DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks)
+
+"""
+Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
+there are three separate length `n` arrays, `label`, `semi` and `ancestor`.
+Instead, for efficiency, we use one array in a array-of-structs style setup.
+"""
+struct SNCAData
+    semi::PreNumber
+    label::PreNumber
+end
+
 "Represents a Basic Block, in the DomTree"
 struct DomTreeNode
     # How deep we are in the DomTree
     level::Int
     # The BB indices in the CFG for all Basic Blocks we immediately dominate
-    children::Vector{Int}
+    children::Vector{BBNumber}
 end
-DomTreeNode() = DomTreeNode(1, Vector{Int}())
+
+DomTreeNode() = DomTreeNode(1, Vector{BBNumber}())
 
 "Data structure that encodes which basic block dominates which."
 struct DomTree
-    # Which basic block immediately dominates each basic block (ordered by BB indices)
-    # Note: this is the inverse of the nodes, children field
-    idoms::Vector{Int}
+    # These can be reused when updating domtree dynamically
+    dfs_tree::DFSTree
+    snca_state::Vector{SNCAData}
+
+    # Which basic block immediately dominates each basic block, using BB indices
+    idoms_bb::Vector{BBNumber}
 
     # The nodes in the tree (ordered by BB indices)
     nodes::Vector{DomTreeNode}
 end
 
+function DomTree()
+    return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
+end
+
+function construct_domtree(blocks::Vector{BasicBlock})
+    return update_domtree!(blocks, DomTree(), true, 0)
+end
+
+function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
+                         recompute_dfs::Bool, max_pre::PreNumber)
+    if recompute_dfs
+        DFS!(domtree.dfs_tree, blocks)
+    end
+
+    if max_pre == 0
+        max_pre = length(domtree.dfs_tree)
+    end
+
+    SNCA!(domtree, blocks, max_pre)
+    compute_domtree_nodes!(domtree)
+    return domtree
+end
+
+function compute_domtree_nodes!(domtree::DomTree)
+    # Compute children
+    copy!(domtree.nodes,
+          DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)])
+    for (idx, idom) in Iterators.enumerate(domtree.idoms_bb)
+        (idx == 1 || idom == 0) && continue
+        push!(domtree.nodes[idom].children, idx)
+    end
+    # Recursively set level
+    update_level!(domtree.nodes, 1, 1)
+    return domtree.nodes
+end
+
+function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int)
+    worklist = Tuple{BBNumber, Int}[(node, level)]
+    while !isempty(worklist)
+        (node, level) = pop!(worklist)
+        nodes[node] = DomTreeNode(level, nodes[node].children)
+        foreach(nodes[node].children) do child
+            push!(worklist, (child, level+1))
+        end
+    end
+end
+
+"""
+The main Semi-NCA algorithm. Matches Figure 2.8 in [LG05]. Note that the
+pseudocode in [LG05] is not entirely accurate. The best way to understand
+what's happening is to read [LT79], then the description of SLT in [LG05]
+(warning: inconsistent notation), then the description of Semi-NCA.
+"""
+function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
+    D = domtree.dfs_tree
+    state = domtree.snca_state
+    # There may be more blocks than are reachable in the DFS / dominator tree
+    n_blocks = length(blocks)
+    n_nodes = length(D)
+
+    # `label` is initialized to the identity mapping (though the paper doesn't
+    # make that clear). The rationale for this is Lemma 2.4 in [LG05] (i.e.
+    # Theorem 4 in [LT79]). Note however, that we don't ever look at `semi`
+    # until it is fully initialized, so we could leave it uninitialized here if
+    # we wanted to.
+    resize!(state, n_nodes)
+    for w in 1:max_pre
+        # Only reset semidominators for nodes we want to recompute
+        state[w] = SNCAData(typemax(PreNumber), w)
+    end
+
+    # If we are only recomputing some of the semidominators, the remaining
+    # labels should be reset, because they may have become inapplicable to the
+    # node/semidominator we are currently processing/recomputing. They can
+    # become inapplicable because of path compressions that were triggered by
+    # nodes that should only be processed after the current one (but were
+    # processed the last time `SNCA!` was run).
+    #
+    # So, for every node that is not being reprocessed, we reset its label to
+    # its semidominator, which is the value that its label assumes once its
+    # semidominator is computed. If this was too conservative, i.e. if the
+    # label would have been updated before we process the current node in a
+    # situation where all semidominators were recomputed, then path compression
+    # will produce the correct label.
+    for w in max_pre+1:n_nodes
+        semi = state[w].semi
+        state[w] = SNCAData(semi, semi)
+    end
+
+    # Calculate semidominators, but only for blocks with preorder number up to
+    # max_pre
+    ancestors = copy(D.to_parent_pre)
+    for w::PreNumber in reverse(2:max_pre)
+        # LLVM initializes this to the parent, the paper initializes this to
+        # `w`, but it doesn't really matter (the parent is a predecessor, so at
+        # worst we'll discover it below). Save a memory reference here.
+        semi_w = typemax(PreNumber)
+        last_linked = PreNumber(w + 1)
+        for v ∈ blocks[D.from_pre[w]].preds
+            # For the purpose of the domtree, ignore virtual predecessors into
+            # catch blocks.
+            v == 0 && continue
+
+            v_pre = D.to_pre[v]
+
+            # Ignore unreachable predecessors
+            v_pre == 0 && continue
+
+            # N.B.: This conditional is missing from the pseudocode in figure
+            # 2.8 of [LG05]. It corresponds to the `ancestor[v] != 0` check in
+            # the `eval` implementation in figure 2.6
+            if v_pre >= last_linked
+                # `v` has already been processed, so perform path compression
+
+                # For performance, if the number of ancestors is small avoid
+                # the extra allocation of the worklist.
+                if length(ancestors) <= 32
+                    snca_compress!(state, ancestors, v_pre, last_linked)
+                else
+                    snca_compress_worklist!(state, ancestors, v_pre, last_linked)
+                end
+            end
+
+            # The (preorder number of the) semidominator of a block is the
+            # minimum over the labels of its predecessors
+            semi_w = min(semi_w, state[v_pre].label)
+        end
+        state[w] = SNCAData(semi_w, semi_w)
+    end
+
+    # Compute immediate dominators, which for a node must be the nearest common
+    # ancestor in the (immediate) dominator tree between its semidominator and
+    # its parent (see Lemma 2.6 in [LG05]).
+    idoms_pre = copy(D.to_parent_pre)
+    for v in 2:n_nodes
+        idom = idoms_pre[v]
+        vsemi = state[v].semi
+        while idom > vsemi
+            idom = idoms_pre[idom]
+        end
+        idoms_pre[v] = idom
+    end
+
+    # Express idoms in BB indexing
+    resize!(domtree.idoms_bb, n_blocks)
+    for i::BBNumber in 1:n_blocks
+        if i == 1 || D.to_pre[i] == 0
+            domtree.idoms_bb[i] = 0
+        else
+            domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]]
+        end
+    end
+end
+
 """
-    Checks if bb1 dominates bb2.
-    bb1 and bb2 are indexes into the CFG blocks.
-    bb1 dominates bb2 if the only way to enter bb2 is via bb1.
-    (Other blocks may be in between, e.g bb1->bbX->bb2).
+Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
+modification suggested in the paper to use `last_linked` to determine whether
+an ancestor has been processed rather than storing `0` in the ancestor array.
 """
-function dominates(domtree::DomTree, bb1::Int, bb2::Int)
+function snca_compress!(state::Vector{SNCAData}, ancestors::Vector{PreNumber},
+                        v::PreNumber, last_linked::PreNumber)
+    u = ancestors[v]
+    @assert u < v
+    if u >= last_linked
+        snca_compress!(state, ancestors, u, last_linked)
+        if state[u].label < state[v].label
+            state[v] = SNCAData(state[v].semi, state[u].label)
+        end
+        ancestors[v] = ancestors[u]
+    end
+    nothing
+end
+
+function snca_compress_worklist!(
+        state::Vector{SNCAData}, ancestors::Vector{PreNumber},
+        v::PreNumber, last_linked::PreNumber)
+    # TODO: There is a smarter way to do this
+    u = ancestors[v]
+    worklist = Tuple{PreNumber, PreNumber}[(u,v)]
+    @assert u < v
+    while !isempty(worklist)
+        u, v = last(worklist)
+        if u >= last_linked
+            if ancestors[u] >= last_linked
+                push!(worklist, (ancestors[u], u))
+                continue
+            end
+            if state[u].label < state[v].label
+                state[v] = SNCAData(state[v].semi, state[u].label)
+            end
+            ancestors[v] = ancestors[u]
+        end
+        pop!(worklist)
+    end
+end
+
+"Given updated blocks, update the given dominator tree with an inserted edge."
+function domtree_insert_edge!(domtree::DomTree, blocks::Vector{BasicBlock},
+                              from::BBNumber, to::BBNumber)
+    # `from` is unreachable, so `from` and `to` aren't in domtree
+    if bb_unreachable(domtree, from)
+        return domtree
+    end
+
+    # Implements Section 3.1 of [GI16]
+    dt        = domtree.dfs_tree
+    from_pre  = dt.to_pre[from]
+    to_pre    = dt.to_pre[to]
+    from_post = dt.to_post[from]
+    to_post   = dt.to_post[to]
+    if to_pre == 0 || (from_pre < to_pre && from_post < to_post)
+        # The DFS tree is invalidated by the edge insertion, so run from
+        # scratch
+        update_domtree!(blocks, domtree, true, 0)
+    else
+        # DFS tree is still valid, so update only affected nodes
+        update_domtree!(blocks, domtree, false, to_pre)
+    end
+
+    return domtree
+end
+
+"Given updated blocks, update the given dominator tree with a deleted edge."
+function domtree_delete_edge!(domtree::DomTree, blocks::Vector{BasicBlock},
+                              from::BBNumber, to::BBNumber)
+    # `from` is unreachable, so `from` and `to` aren't in domtree
+    if bb_unreachable(domtree, from)
+        return domtree
+    end
+
+    # Implements Section 3.1 of [GI16]
+    if is_parent(domtree.dfs_tree, from, to)
+        # The `from` block is the parent of the `to` block in the DFS tree, so
+        # deleting the edge invalidates the DFS tree, so start from scratch
+        update_domtree!(blocks, domtree, true, 0)
+    elseif on_semidominator_path(domtree, from, to)
+        # Recompute semidominators for blocks with preorder number up to that
+        # of `to` block. Semidominators for blocks with preorder number greater
+        # than that of `to` aren't affected because no semidominator path to
+        # the block can pass through the `to` block (the preorder number of
+        # `to` would be lower than those of these blocks, and `to` is not their
+        # parent in the DFS tree).
+        to_pre = domtree.dfs_tree.to_pre[to]
+        update_domtree!(blocks, domtree, false, to_pre)
+    end
+    # Otherwise, dominator tree is not affected
+
+    return domtree
+end
+
+"Check if x is the parent of y in the given DFS tree."
+function is_parent(dfs_tree::DFSTree, x::BBNumber, y::BBNumber)
+    x_pre = dfs_tree.to_pre[x]
+    y_pre = dfs_tree.to_pre[y]
+    return x_pre == dfs_tree.to_parent_pre[y_pre]
+end
+
+"""
+Check if x is on some semidominator path from the semidominator of y to y,
+assuming there is an edge from x to y.
+"""
+function on_semidominator_path(domtree::DomTree, x::BBNumber, y::BBNumber)
+    x_pre = domtree.dfs_tree.to_pre[x]
+    y_pre = domtree.dfs_tree.to_pre[y]
+
+    semi_y = domtree.snca_state[y_pre].semi
+    current_block = x_pre
+
+    # Follow the semidominators of `x` up the DFS tree to see if we ever reach
+    # the semidominator of `y`. If so, `x` is on a semidominator path between
+    # `y` and its semidominator. We can stop if the preorder number of the
+    # semidominators becomes less than that of the semidominator of `y`,
+    # because it can only decrease further.
+    while current_block >= semi_y
+        if semi_y == current_block
+            return true
+        end
+        current_block = domtree.snca_state[current_block].semi
+    end
+    return false
+end
+
+"""
+Rename basic block numbers in a dominator tree, removing the block if it is
+renamed to -1.
+"""
+function rename_nodes!(domtree::DomTree, rename_bb::Vector{BBNumber})
+    # Rename DFS tree
+    rename_nodes!(domtree.dfs_tree, rename_bb)
+
+    # `snca_state` is indexed by preorder number, so should be unchanged
+
+    # Rename `idoms_bb` and `nodes`
+    old_idoms_bb = copy(domtree.idoms_bb)
+    old_nodes = copy(domtree.nodes)
+    for (old_bb, new_bb) in enumerate(rename_bb)
+        if new_bb != -1
+            domtree.idoms_bb[new_bb] = (new_bb == 1) ?
+                0 : rename_bb[old_idoms_bb[old_bb]]
+            domtree.nodes[new_bb] = old_nodes[old_bb]
+            map!(i -> rename_bb[i],
+                 domtree.nodes[new_bb].children,
+                 domtree.nodes[new_bb].children)
+        end
+    end
+
+    # length of `to_pre` after renaming DFS tree is new number of basic blocks
+    resize!(domtree.idoms_bb, length(domtree.dfs_tree.to_pre))
+    resize!(domtree.nodes, length(domtree.dfs_tree.to_pre))
+    return domtree
+end
+
+"""
+Rename basic block numbers in a DFS tree, removing the block if it is renamed
+to -1.
+"""
+function rename_nodes!(D::DFSTree, rename_bb::Vector{BBNumber})
+    n_blocks = length(D.to_pre)
+    n_reachable_blocks = length(D.from_pre)
+
+    old_to_pre = copy(D.to_pre)
+    old_from_pre = copy(D.from_pre)
+    old_to_post = copy(D.to_post)
+    old_from_post = copy(D.from_post)
+    max_new_bb = 0
+    for (old_bb, new_bb) in enumerate(rename_bb)
+        if new_bb != -1
+            D.to_pre[new_bb] = old_to_pre[old_bb]
+            D.from_pre[old_to_pre[old_bb]] = new_bb
+            D.to_post[new_bb] = old_to_post[old_bb]
+            D.from_post[old_to_post[old_bb]] = new_bb
+
+            # Keep track of highest BB number to resize arrays with
+            if new_bb > max_new_bb
+                max_new_bb = new_bb
+            end
+        end
+    end
+    resize!(D.to_pre, max_new_bb)
+    resize!(D.to_post, max_new_bb)
+    # `to_parent_pre` should be unchanged
+    return D
+end
+
+"""
+Checks if bb1 dominates bb2.
+bb1 and bb2 are indexes into the CFG blocks.
+bb1 dominates bb2 if the only way to enter bb2 is via bb1.
+(Other blocks may be in between, e.g bb1->bbX->bb2).
+"""
+function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber)
     bb1 == bb2 && return true
     target_level = domtree.nodes[bb1].level
     source_level = domtree.nodes[bb2].level
     source_level < target_level && return false
     for _ in (source_level - 1):-1:target_level
-        bb2 = domtree.idoms[bb2]
+        bb2 = domtree.idoms_bb[bb2]
     end
     return bb1 == bb2
 end
 
-bb_unreachable(domtree::DomTree, bb::Int) = bb != 1 && domtree.nodes[bb].level == 1
-
-function update_level!(domtree::Vector{DomTreeNode}, node::Int, level::Int)
-    worklist = Tuple{Int, Int}[(node, level)]
-    while !isempty(worklist)
-        (node, level) = pop!(worklist)
-        domtree[node] = DomTreeNode(level, domtree[node].children)
-        foreach(domtree[node].children) do child
-            push!(worklist, (child, level+1))
-        end
-    end
-end
+bb_unreachable(domtree::DomTree, bb::BBNumber) = bb != 1 && domtree.dfs_tree.to_pre[bb] == 0
 
 "Iterable data structure that walks though all dominated blocks"
 struct DominatedBlocks
     domtree::DomTree
-    worklist::Vector{Int}
+    worklist::Vector{BBNumber}
 end
 
 "Returns an iterator that walks through all blocks dominated by the basic block at index `root`"
-function dominated(domtree::DomTree, root::Int)
-    doms = DominatedBlocks(domtree, Vector{Int}())
+function dominated(domtree::DomTree, root::BBNumber)
+    doms = DominatedBlocks(domtree, Vector{BBNumber}())
     push!(doms.worklist, root)
     doms
 end
@@ -71,8 +581,8 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
     return (bb, nothing)
 end
 
-function naive_idoms(cfg::CFG)
-    nblocks = length(cfg.blocks)
+function naive_idoms(blocks::Vector{BasicBlock})
+    nblocks = length(blocks)
     # The extra +1 helps us detect unreachable blocks below
     dom_all = BitSet(1:nblocks+1)
     dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
@@ -80,10 +590,10 @@ function naive_idoms(cfg::CFG)
     while changed
         changed = false
         for n = 2:nblocks
-            if isempty(cfg.blocks[n].preds)
+            if isempty(blocks[n].preds)
                 continue
             end
-            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, cfg.blocks[n].preds))
+            firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))::NTuple{2,Any}
             new_doms = copy(dominators[firstp])
             for p in rest
                 intersect!(new_doms, dominators[p])
@@ -115,213 +625,3 @@ function naive_idoms(cfg::CFG)
     end
     idoms
 end
-
-# Construct Dom Tree
-function construct_domtree(cfg::CFG)
-    idoms = SNCA(cfg)
-    # Compute children
-    nblocks = length(cfg.blocks)
-    domtree = DomTreeNode[DomTreeNode() for _ = 1:nblocks]
-    for (idx, idom) in Iterators.enumerate(idoms)
-        (idx == 1 || idom == 0) && continue
-        push!(domtree[idom].children, idx)
-    end
-    # Recursively set level
-    update_level!(domtree, 1, 1)
-    DomTree(idoms, domtree)
-end
-
-#================================ [SNCA] ======================================#
-#
-#   This section implements the Semi-NCA (SNCA) dominator tree construction from
-#   described in Georgiadis' PhD thesis [LG05], which itself is a simplification
-#   of the Simple Lenguare-Tarjan (SLT) algorithm [LG79]. This algorithm matches
-#   the algorithm choice in LLVM and seems to be a sweet spot in implementation
-#   simplicity and efficiency.
-#
-#   [LG05]  Linear-Time Algorithms for Dominators and Related Problems
-#           Loukas Georgiadis, Princeton University, November 2005, pp. 21-23:
-#           ftp://ftp.cs.princeton.edu/reports/2005/737.pdf
-#
-#   [LT79]  A fast algorithm for finding dominators in a flowgraph
-#           Thomas Lengauer, Robert Endre Tarjan, July 1979, ACM TOPLAS 1-1
-#           http://www.dtic.mil/dtic/tr/fulltext/u2/a054144.pdf
-#
-begin
-    # We could make these real structs, but probably not worth the extra
-    # overhead. Still, give them names for documentary purposes.
-    const BBNumber = UInt
-    const DFSNumber = UInt
-
-    """
-    Keeps the per-BB state of the Semi NCA algorithm. In the original
-    formulation, there are three separate length `n` arrays, `label`, `semi` and
-    `ancestor`. Instead, for efficiency, we use one array in a array-of-structs
-    style setup.
-    """
-    struct Node
-        semi::DFSNumber
-        label::DFSNumber
-    end
-
-    struct DFSTree
-        # Maps DFS number to BB number
-        numbering::Vector{BBNumber}
-        # Maps BB number to DFS number
-        reverse::Vector{DFSNumber}
-        # Records parent relationships in the DFS tree (DFS number -> DFS number)
-        # Storing it this way saves a few lookups in the snca_compress! algorithm
-        parents::Vector{DFSNumber}
-    end
-    length(D::DFSTree) = length(D.numbering)
-    preorder(D::DFSTree) = OneTo(length(D))
-    _drop(xs::AbstractUnitRange, n::Integer) = (first(xs)+n):last(xs)
-
-    function DFSTree(nblocks::Int)
-        DFSTree(
-            Vector{BBNumber}(undef, nblocks),
-            zeros(DFSNumber, nblocks),
-            Vector{DFSNumber}(undef, nblocks))
-    end
-
-    function DFS(cfg::CFG, current_node::BBNumber)::DFSTree
-        dfs = DFSTree(length(cfg.blocks))
-        # TODO: We could reuse the storage in DFSTree for our worklist. We're
-        # guaranteed for the worklist to be smaller than the remaining space in
-        # DFSTree
-        worklist = Tuple{DFSNumber, BBNumber}[(0, current_node)]
-        dfs_num = 1
-        parent = 0
-        while !isempty(worklist)
-            (parent, current_node) = pop!(worklist)
-            dfs.reverse[current_node] != 0 && continue
-            dfs.reverse[current_node] = dfs_num
-            dfs.numbering[dfs_num] = current_node
-            dfs.parents[dfs_num] = parent
-            for succ in cfg.blocks[current_node].succs
-                push!(worklist, (dfs_num, succ))
-            end
-            dfs_num += 1
-        end
-        # If all blocks are reachable, this is a no-op, otherwise,
-        # we shrink these arrays.
-        resize!(dfs.numbering, dfs_num - 1)
-        resize!(dfs.parents, dfs_num - 1)
-        dfs
-    end
-
-    """
-    Matches the snca_compress algorithm in Figure 2.8 of [LG05], with the
-    modification suggested in the paper to use `last_linked` to determine
-    whether an ancestor has been processed rather than storing `0` in the
-    ancestor array.
-    """
-    function snca_compress!(state::Vector{Node}, ancestors::Vector{DFSNumber},
-                            v::DFSNumber, last_linked::DFSNumber)
-        u = ancestors[v]
-        @assert u < v
-        if u >= last_linked
-            snca_compress!(state, ancestors, u, last_linked)
-            if state[u].label < state[v].label
-                state[v] = Node(state[v].semi, state[u].label)
-            end
-            ancestors[v] = ancestors[u]
-        end
-        nothing
-    end
-
-    function snca_compress_worklist!(
-            state::Vector{Node}, ancestors::Vector{DFSNumber},
-            v::DFSNumber, last_linked::DFSNumber)
-        # TODO: There is a smarter way to do this
-        u = ancestors[v]
-        worklist = Tuple{DFSNumber, DFSNumber}[(u,v)]
-        @assert u < v
-        while !isempty(worklist)
-            u, v = last(worklist)
-            if u >= last_linked
-                if ancestors[u] >= last_linked
-                    push!(worklist, (ancestors[u], u))
-                    continue
-                end
-                if state[u].label < state[v].label
-                    state[v] = Node(state[v].semi, state[u].label)
-                end
-                ancestors[v] = ancestors[u]
-            end
-            pop!(worklist)
-        end
-    end
-
-    """
-        SNCA(cfg::CFG)
-
-    Determines a map from basic blocks to the block which immediately dominate them.
-    Expressed as indexes into `cfg.blocks`.
-
-    The main Semi-NCA algrithm. Matches Figure 2.8 in [LG05].
-    Note that the pseudocode in [LG05] is not entirely accurate.
-    The best way to understand what's happening is to read [LT79], then the
-    description of SLT in [LG05] (warning: inconsistent notation), then
-    the description of Semi-NCA.
-    """
-    function SNCA(cfg::CFG)
-        D = DFS(cfg, BBNumber(1))
-        # `label` is initialized to the identity mapping (though
-        # the paper doesn't make that clear). The rational for this is Lemma
-        # 2.4 in [LG05] (i.e. Theorem 4 in ). Note however, that we don't
-        # ever look at `semi` until it is fully initialized, so we could leave
-        # it uninitialized here if we wanted to.
-        state = Node[ Node(typemax(DFSNumber), w) for w in preorder(D) ]
-        # Initialize idoms to parents. Note that while idoms are eventually
-        # BB indexed, we keep it DFS indexed until a final post-processing
-        # pass to avoid extra memory references during the O(N^2) phase below.
-        idoms_dfs = copy(D.parents)
-        # We abuse the parents array as the ancestors array.
-        # Semi-NCA does not look at the parents array at all.
-        # SLT would, but never simultaneously, so we could still
-        # do this.
-        ancestors = D.parents
-        for w::DFSNumber ∈ reverse(_drop(preorder(D), 1))
-            # LLVM initializes this to the parent, the paper initializes this to
-            # `w`, but it doesn't really matter (the parent is a predecessor,
-            # so at worst we'll discover it below). Save a memory reference here.
-            semi_w = typemax(DFSNumber)
-            for v ∈ cfg.blocks[D.numbering[w]].preds
-                # For the purpose of the domtree, ignore virtual predecessors
-                # into catch blocks.
-                v == 0 && continue
-                vdfs = D.reverse[v]
-                # Ignore unreachable predecessors
-                vdfs == 0 && continue
-                last_linked = DFSNumber(w + 1)
-                # N.B.: This conditional is missing from the psuedocode
-                # in figure 2.8 of [LG05]. It corresponds to the
-                # `ancestor[v] != 0` check in the `eval` implementation in
-                # figure 2.6
-                if vdfs >= last_linked
-                    # For performance, if the number of ancestors is small
-                    # avoid the extra allocation of the worklist.
-                    if length(ancestors) <= 32
-                        snca_compress!(state, ancestors, vdfs, last_linked)
-                    else
-                        snca_compress_worklist!(state, ancestors, vdfs, last_linked)
-                    end
-                end
-                semi_w = min(semi_w, state[vdfs].label)
-            end
-            state[w] = Node(semi_w, semi_w)
-        end
-        for v ∈ _drop(preorder(D), 1)
-            idom = idoms_dfs[v]
-            vsemi = state[v].semi
-            while idom > vsemi
-                idom = idoms_dfs[idom]
-            end
-            idoms_dfs[v] = idom
-        end
-        # Reexpress the idom relationship in BB indexing
-        idoms_bb = Int[ (i == 1 || D.reverse[i] == 0) ? 0 : D.numbering[idoms_dfs[D.reverse[i]]] for i = 1:length(cfg.blocks) ]
-        idoms_bb
-    end
-end
diff --git a/base/compiler/ssair/driver.jl b/base/compiler/ssair/driver.jl
index 465102e82e1556..7759d8d80b9cc8 100644
--- a/base/compiler/ssair/driver.jl
+++ b/base/compiler/ssair/driver.jl
@@ -1,7 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core: LineInfoNode
-
 if false
     import Base: Base, @show
 else
@@ -10,134 +8,17 @@ else
     end
 end
 
-include("compiler/ssair/ir.jl")
+function argextype end # imported by EscapeAnalysis
+function stmt_effect_free end # imported by EscapeAnalysis
+function alloc_array_ndims end # imported by EscapeAnalysis
+function try_compute_field end # imported by EscapeAnalysis
+
+include("compiler/ssair/basicblock.jl")
 include("compiler/ssair/domtree.jl")
+include("compiler/ssair/ir.jl")
 include("compiler/ssair/slot2ssa.jl")
-include("compiler/ssair/queries.jl")
-include("compiler/ssair/passes.jl")
 include("compiler/ssair/inlining.jl")
 include("compiler/ssair/verify.jl")
 include("compiler/ssair/legacy.jl")
-#@isdefined(Base) && include("compiler/ssair/show.jl")
-
-function normalize(@nospecialize(stmt), meta::Vector{Any})
-    if isa(stmt, Expr)
-        if stmt.head === :meta
-            args = stmt.args
-            if length(args) > 0
-                push!(meta, stmt)
-            end
-            return nothing
-        end
-    end
-    return stmt
-end
-
-function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, nargs::Int, sv::OptimizationState)
-    # Go through and add an unreachable node after every
-    # Union{} call. Then reindex labels.
-    idx = 1
-    oldidx = 1
-    changemap = fill(0, length(code))
-    labelmap = coverage ? fill(0, length(code)) : changemap
-    prevloc = zero(eltype(ci.codelocs))
-    stmtinfo = sv.stmt_info
-    while idx <= length(code)
-        codeloc = ci.codelocs[idx]
-        if coverage && codeloc != prevloc && codeloc != 0
-            # insert a side-effect instruction before the current instruction in the same basic block
-            insert!(code, idx, Expr(:code_coverage_effect))
-            insert!(ci.codelocs, idx, codeloc)
-            insert!(ci.ssavaluetypes, idx, Nothing)
-            insert!(stmtinfo, idx, nothing)
-            changemap[oldidx] += 1
-            if oldidx < length(labelmap)
-                labelmap[oldidx + 1] += 1
-            end
-            idx += 1
-            prevloc = codeloc
-        end
-        if code[idx] isa Expr && ci.ssavaluetypes[idx] === Union{}
-            if !(idx < length(code) && isa(code[idx + 1], ReturnNode) && !isdefined((code[idx + 1]::ReturnNode), :val))
-                # insert unreachable in the same basic block after the current instruction (splitting it)
-                insert!(code, idx + 1, ReturnNode())
-                insert!(ci.codelocs, idx + 1, ci.codelocs[idx])
-                insert!(ci.ssavaluetypes, idx + 1, Union{})
-                insert!(stmtinfo, idx + 1, nothing)
-                if oldidx < length(changemap)
-                    changemap[oldidx + 1] += 1
-                    coverage && (labelmap[oldidx + 1] += 1)
-                end
-                idx += 1
-            end
-        end
-        idx += 1
-        oldidx += 1
-    end
-    renumber_ir_elements!(code, changemap, labelmap)
-
-    inbounds_depth = 0 # Number of stacked inbounds
-    meta = Any[]
-    flags = fill(0x00, length(code))
-    for i = 1:length(code)
-        stmt = code[i]
-        if isexpr(stmt, :inbounds)
-            arg1 = stmt.args[1]
-            if arg1 === true # push
-                inbounds_depth += 1
-            elseif arg1 === false # clear
-                inbounds_depth = 0
-            elseif inbounds_depth > 0 # pop
-                inbounds_depth -= 1
-            end
-            stmt = nothing
-        else
-            stmt = normalize(stmt, meta)
-        end
-        code[i] = stmt
-        if !(stmt === nothing)
-            if inbounds_depth > 0
-                flags[i] |= IR_FLAG_INBOUNDS
-            end
-        end
-    end
-    strip_trailing_junk!(ci, code, stmtinfo, flags)
-    cfg = compute_basic_blocks(code)
-    types = Any[]
-    stmts = InstructionStream(code, types, stmtinfo, ci.codelocs, flags)
-    ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable), sv.slottypes, meta, sv.sptypes)
-    return ir
-end
-
-function slot2reg(ir::IRCode, ci::CodeInfo, nargs::Int, sv::OptimizationState)
-    # need `ci` for the slot metadata, IR for the code
-    @timeit "domtree 1" domtree = construct_domtree(ir.cfg)
-    defuse_insts = scan_slot_def_use(nargs, ci, ir.stmts.inst)
-    @timeit "construct_ssa" ir = construct_ssa!(ci, ir, domtree, defuse_insts, nargs, sv.sptypes, sv.slottypes) # consumes `ir`
-    return ir
-end
-
-function run_passes(ci::CodeInfo, nargs::Int, sv::OptimizationState)
-    preserve_coverage = coverage_enabled(sv.mod)
-    ir = convert_to_ircode(ci, copy_exprargs(ci.code), preserve_coverage, nargs, sv)
-    ir = slot2reg(ir, ci, nargs, sv)
-    #@Base.show ("after_construct", ir)
-    # TODO: Domsorting can produce an updated domtree - no need to recompute here
-    @timeit "compact 1" ir = compact!(ir)
-    @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
-    #@timeit "verify 2" verify_ir(ir)
-    ir = compact!(ir)
-    #@Base.show ("before_sroa", ir)
-    @timeit "SROA" ir = getfield_elim_pass!(ir)
-    #@Base.show ir.new_nodes
-    #@Base.show ("after_sroa", ir)
-    ir = adce_pass!(ir)
-    #@Base.show ("after_adce", ir)
-    @timeit "type lift" ir = type_lift_pass!(ir)
-    @timeit "compact 3" ir = compact!(ir)
-    #@Base.show ir
-    if JLOptions().debug_level == 2
-        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
-    end
-    return ir
-end
+include("compiler/ssair/EscapeAnalysis/EscapeAnalysis.jl")
+include("compiler/ssair/passes.jl")
diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl
index 4a8e5f5e0a622f..1738c05678211b 100644
--- a/base/compiler/ssair/inlining.jl
+++ b/base/compiler/ssair/inlining.jl
@@ -2,22 +2,12 @@
 
 @nospecialize
 
-struct InvokeData
-    entry::Method
-    types0
-    min_valid::UInt
-    max_valid::UInt
-end
-
 struct Signature
     f::Any
     ft::Any
-    atypes::Vector{Any}
-    atype::Type
-    Signature(f, ft, atypes) = new(f, ft, atypes)
-    Signature(f, ft, atypes, atype) = new(f, ft, atypes, atype)
+    argtypes::Vector{Any}
+    Signature(@nospecialize(f), @nospecialize(ft), argtypes::Vector{Any}) = new(f, ft, argtypes)
 end
-with_atype(sig::Signature) = Signature(sig.f, sig.ft, sig.atypes, argtypes_to_type(sig.atypes))
 
 struct ResolvedInliningSpec
     # The LineTable and IR of the inlinee
@@ -25,17 +15,18 @@ struct ResolvedInliningSpec
     # If the function being inlined is a single basic block we can use a
     # simpler inlining algorithm. This flag determines whether that's allowed
     linear_inline_eligible::Bool
+    # Effects of the call statement
+    effects::Effects
 end
 
 """
-    Represents a callsite that our analysis has determined is legal to inline,
-    but did not resolve during the analysis step to allow the outer inlining
-    pass to apply its own inlining policy decisions.
+Represents a callsite that our analysis has determined is legal to inline,
+but did not resolve during the analysis step to allow the outer inlining
+pass to apply its own inlining policy decisions.
 """
 struct DelayedInliningSpec
-    match::MethodMatch
-    atypes::Vector{Any}
-    stmttype::Any
+    match::Union{MethodMatch, InferenceResult}
+    argtypes::Vector{Any}
 end
 
 struct InliningTodo
@@ -44,19 +35,42 @@ struct InliningTodo
     spec::Union{ResolvedInliningSpec, DelayedInliningSpec}
 end
 
-InliningTodo(mi::MethodInstance, match::MethodMatch, atypes::Vector{Any}, @nospecialize(stmttype)) = InliningTodo(mi, DelayedInliningSpec(match, atypes, stmttype))
+InliningTodo(mi::MethodInstance, match::MethodMatch, argtypes::Vector{Any}) =
+    InliningTodo(mi, DelayedInliningSpec(match, argtypes))
+
+InliningTodo(result::InferenceResult, argtypes::Vector{Any}) =
+    InliningTodo(result.linfo, DelayedInliningSpec(result, argtypes))
 
 struct ConstantCase
     val::Any
     ConstantCase(val) = new(val)
 end
 
+struct SomeCase
+    val::Any
+    SomeCase(val) = new(val)
+end
+
+struct InvokeCase
+    invoke::MethodInstance
+    effects::Effects
+end
+
+struct InliningCase
+    sig  # Type
+    item # Union{InliningTodo, MethodInstance, ConstantCase}
+    function InliningCase(@nospecialize(sig), @nospecialize(item))
+        @assert isa(item, Union{InliningTodo, InvokeCase, ConstantCase}) "invalid inlining item"
+        return new(sig, item)
+    end
+end
+
 struct UnionSplit
     fully_covered::Bool
-    atype # ::Type
-    cases::Vector{Pair{Any, Any}}
+    atype::DataType
+    cases::Vector{InliningCase}
     bbs::Vector{Int}
-    UnionSplit(fully_covered::Bool, atype, cases::Vector{Pair{Any, Any}}) =
+    UnionSplit(fully_covered::Bool, atype::DataType, cases::Vector{InliningCase}) =
         new(fully_covered, atype, cases, Int[])
 end
 
@@ -68,13 +82,12 @@ function ssa_inlining_pass!(ir::IRCode, linetable::Vector{LineInfoNode}, state::
     @timeit "analysis" todo = assemble_inline_todo!(ir, state)
     isempty(todo) && return ir
     # Do the actual inlining for every call we identified
-    @timeit "execution" ir = batch_inline!(todo, ir, linetable, propagate_inbounds)
+    @timeit "execution" ir = batch_inline!(todo, ir, linetable, propagate_inbounds, state.params)
     return ir
 end
 
 mutable struct CFGInliningState
     new_cfg_blocks::Vector{BasicBlock}
-    inserted_block_ranges::Vector{UnitRange{Int}}
     todo_bbs::Vector{Tuple{Int, Int}}
     first_bb::Int
     bb_rename::Vector{Int}
@@ -87,7 +100,6 @@ end
 function CFGInliningState(ir::IRCode)
     CFGInliningState(
         BasicBlock[],
-        UnitRange{Int}[],
         Tuple{Int, Int}[],
         0,
         zeros(Int, length(ir.cfg.blocks)),
@@ -99,25 +111,25 @@ function CFGInliningState(ir::IRCode)
 end
 
 # Tells the inliner that we're now inlining into block `block`, meaning
-# all previous blocks have been proceesed and can be added to the new cfg
+# all previous blocks have been processed and can be added to the new cfg
 function inline_into_block!(state::CFGInliningState, block::Int)
     if state.first_bb != block
         new_range = state.first_bb+1:block
         l = length(state.new_cfg_blocks)
         state.bb_rename[new_range] = (l+1:l+length(new_range))
-        append!(state.new_cfg_blocks, map(copy, state.cfg.blocks[new_range]))
+        append!(state.new_cfg_blocks, (copy(block) for block in state.cfg.blocks[new_range]))
         push!(state.merged_orig_blocks, last(new_range))
     end
     state.first_bb = block
     return
 end
 
-function cfg_inline_item!(idx::Int, spec::ResolvedInliningSpec, state::CFGInliningState, from_unionsplit::Bool=false)
+function cfg_inline_item!(ir::IRCode, idx::Int, spec::ResolvedInliningSpec, state::CFGInliningState, from_unionsplit::Bool=false)
     inlinee_cfg = spec.ir.cfg
     # Figure out if we need to split the BB
     need_split_before = false
     need_split = true
-    block = block_for_inst(state.cfg, idx)
+    block = block_for_inst(ir, idx)
     inline_into_block!(state, block)
 
     if !isempty(inlinee_cfg.blocks[1].preds)
@@ -127,20 +139,19 @@ function cfg_inline_item!(idx::Int, spec::ResolvedInliningSpec, state::CFGInlini
     last_block_idx = last(state.cfg.blocks[block].stmts)
     if false # TODO: ((idx+1) == last_block_idx && isa(ir[SSAValue(last_block_idx)], GotoNode))
         need_split = false
-        post_bb_id = -ir[SSAValue(last_block_idx)].label
+        post_bb_id = -ir[SSAValue(last_block_idx)][:inst].label
     else
         post_bb_id = length(state.new_cfg_blocks) + length(inlinee_cfg.blocks) + (need_split_before ? 1 : 0)
         need_split = true #!(idx == last_block_idx)
     end
 
-    if !need_split
-        delete!(state.merged_orig_blocks, last(new_range))
-    end
+    need_split || delete!(state.merged_orig_blocks, last(new_range))
 
     push!(state.todo_bbs, (length(state.new_cfg_blocks) - 1 + (need_split_before ? 1 : 0), post_bb_id))
 
     from_unionsplit || delete!(state.split_targets, length(state.new_cfg_blocks))
-    orig_succs = copy(state.new_cfg_blocks[end].succs)
+    local orig_succs
+    need_split && (orig_succs = copy(state.new_cfg_blocks[end].succs))
     empty!(state.new_cfg_blocks[end].succs)
     if need_split_before
         l = length(state.new_cfg_blocks)
@@ -160,7 +171,6 @@ function cfg_inline_item!(idx::Int, spec::ResolvedInliningSpec, state::CFGInlini
         from_unionsplit || push!(state.split_targets, length(state.new_cfg_blocks))
     end
     new_block_range = (length(state.new_cfg_blocks)-length(inlinee_cfg.blocks)+1):length(state.new_cfg_blocks)
-    push!(state.inserted_block_ranges, new_block_range)
 
     # Fixup the edges of the newely added blocks
     for (old_block, new_block) in enumerate(bb_rename_range)
@@ -190,7 +200,7 @@ function cfg_inline_item!(idx::Int, spec::ResolvedInliningSpec, state::CFGInlini
     for (old_block, new_block) in enumerate(bb_rename_range)
         if (length(state.new_cfg_blocks[new_block].succs) == 0)
             terminator_idx = last(inlinee_cfg.blocks[old_block].stmts)
-            terminator = spec.ir[SSAValue(terminator_idx)]
+            terminator = spec.ir[SSAValue(terminator_idx)][:inst]
             if isa(terminator, ReturnNode) && isdefined(terminator, :val)
                 any_edges = true
                 push!(state.new_cfg_blocks[new_block].succs, post_bb_id)
@@ -200,53 +210,52 @@ function cfg_inline_item!(idx::Int, spec::ResolvedInliningSpec, state::CFGInlini
             end
         end
     end
+    any_edges || push!(state.dead_blocks, post_bb_id)
 
-    if !any_edges
-        push!(state.dead_blocks, post_bb_id)
-    end
+    return nothing
 end
 
-function cfg_inline_unionsplit!(idx::Int, item::UnionSplit, state::CFGInliningState)
-    block = block_for_inst(state.cfg, idx)
-    inline_into_block!(state, block)
+function cfg_inline_unionsplit!(ir::IRCode, idx::Int,
+                                (; fully_covered, #=atype,=# cases, bbs)::UnionSplit,
+                                state::CFGInliningState,
+                                params::OptimizationParams)
+    inline_into_block!(state, block_for_inst(ir, idx))
     from_bbs = Int[]
     delete!(state.split_targets, length(state.new_cfg_blocks))
     orig_succs = copy(state.new_cfg_blocks[end].succs)
     empty!(state.new_cfg_blocks[end].succs)
-    for (i, (_, case)) in enumerate(item.cases)
+    for i in 1:length(cases)
         # The condition gets sunk into the previous block
         # Add a block for the union-split body
         push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
         cond_bb = length(state.new_cfg_blocks)-1
         push!(state.new_cfg_blocks[end].preds, cond_bb)
         push!(state.new_cfg_blocks[cond_bb].succs, cond_bb+1)
+        case = cases[i].item
         if isa(case, InliningTodo)
             spec = case.spec::ResolvedInliningSpec
             if !spec.linear_inline_eligible
-                cfg_inline_item!(idx, spec, state, true)
+                cfg_inline_item!(ir, idx, spec, state, true)
             end
         end
-        bb = length(state.new_cfg_blocks)
-        push!(from_bbs, bb)
+        push!(from_bbs, length(state.new_cfg_blocks))
         # TODO: Right now we unconditionally generate a fallback block
         # in case of subtyping errors - This is probably unnecessary.
-        if true # i != length(item.cases) || !item.fully_covered
+        if i != length(cases) || (!fully_covered || (!params.trust_inference))
             # This block will have the next condition or the final else case
             push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx)))
             push!(state.new_cfg_blocks[cond_bb].succs, length(state.new_cfg_blocks))
             push!(state.new_cfg_blocks[end].preds, cond_bb)
-            push!(item.bbs, length(state.new_cfg_blocks))
+            push!(bbs, length(state.new_cfg_blocks))
         end
     end
     # The edge from the fallback block.
-    if !item.fully_covered
-        push!(from_bbs, length(state.new_cfg_blocks))
-    end
+    fully_covered || push!(from_bbs, length(state.new_cfg_blocks))
     # This block will be the block everyone returns to
     push!(state.new_cfg_blocks, BasicBlock(StmtRange(idx, idx), from_bbs, orig_succs))
     join_bb = length(state.new_cfg_blocks)
     push!(state.split_targets, join_bb)
-    push!(item.bbs, join_bb)
+    push!(bbs, join_bb)
     for bb in from_bbs
         push!(state.new_cfg_blocks[bb].succs, join_bb)
     end
@@ -254,8 +263,10 @@ end
 
 function finish_cfg_inline!(state::CFGInliningState)
     new_range = (state.first_bb + 1):length(state.cfg.blocks)
-    l = length(state.new_cfg_blocks)
-    state.bb_rename[new_range] = (l+1:l+length(new_range))
+    state.bb_rename[new_range] = let
+        l = length(state.new_cfg_blocks)
+        l+1:l+length(new_range)
+    end
     append!(state.new_cfg_blocks, state.cfg.blocks[new_range])
 
     # Rename edges original bbs
@@ -300,34 +311,60 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                          boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
     # Ok, do the inlining here
     spec = item.spec::ResolvedInliningSpec
-    inline_cfg = spec.ir.cfg
-    stmt = compact.result[idx][:inst]
+    sparam_vals = item.mi.sparam_vals
+    def = item.mi.def::Method
     linetable_offset::Int32 = length(linetable)
     # Append the linetable of the inlined function to our line table
-    inlined_at = Int(compact.result[idx][:line])
-    for entry in spec.ir.linetable
-        push!(linetable, LineInfoNode(entry.module, entry.method, entry.file, entry.line,
-            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset : inlined_at)))
-    end
-    nargs_def = item.mi.def.nargs
-    isva = nargs_def > 0 && item.mi.def.isva
-    if isva
-        vararg = mk_tuplecall!(compact, argexprs[nargs_def:end], compact.result[idx][:line])
-        argexprs = Any[argexprs[1:(nargs_def - 1)]..., vararg]
-    end
-    flag = compact.result[idx][:flag]
-    boundscheck_idx = boundscheck
-    if boundscheck_idx === :default || boundscheck_idx === :propagate
-        if (flag & IR_FLAG_INBOUNDS) != 0
-            boundscheck_idx = :off
+    inlined_at = compact.result[idx][:line]
+    topline::Int32 = linetable_offset + Int32(1)
+    coverage = coverage_enabled(def.module)
+    coverage_by_path = JLOptions().code_coverage == 3
+    push!(linetable, LineInfoNode(def.module, def.name, def.file, def.line, inlined_at))
+    oldlinetable = spec.ir.linetable
+    for oldline in 1:length(oldlinetable)
+        entry = oldlinetable[oldline]
+        if !coverage && coverage_by_path && is_file_tracked(entry.file)
+            # include topline coverage entry if in path-specific coverage mode, and any file falls under path
+            coverage = true
+        end
+        newentry = LineInfoNode(entry.module, entry.method, entry.file, entry.line,
+            (entry.inlined_at > 0 ? entry.inlined_at + linetable_offset + (oldline == 1) : inlined_at))
+        if oldline == 1
+            # check for a duplicate on the first iteration (likely true)
+            if newentry === linetable[topline]
+                continue
+            else
+                linetable_offset += 1
+            end
+        end
+        push!(linetable, newentry)
+    end
+    if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline
+        insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline))
+    end
+    if def.isva
+        nargs_def = Int(def.nargs::Int32)
+        if nargs_def > 0
+            argexprs = fix_va_argexprs!(compact, argexprs, nargs_def, topline)
+        end
+    end
+    if def.is_for_opaque_closure
+        # Replace the first argument by a load of the capture environment
+        argexprs[1] = insert_node_here!(compact,
+            NewInstruction(Expr(:call, GlobalRef(Core, :getfield), argexprs[1], QuoteNode(:captures)),
+            spec.ir.argtypes[1], topline))
+    end
+    if boundscheck === :default || boundscheck === :propagate
+        if (compact.result[idx][:flag] & IR_FLAG_INBOUNDS) != 0
+            boundscheck = :off
         end
     end
     # If the iterator already moved on to the next basic block,
     # temporarily re-open in again.
     local return_value
+    sig = def.sig
     # Special case inlining that maintains the current basic block if there's only one BB in the target
     if spec.linear_inline_eligible
-        terminator = spec.ir[SSAValue(last(inline_cfg.blocks[1].stmts))]
         #compact[idx] = nothing
         inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
@@ -335,15 +372,13 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
             # face of rename_arguments! mutating in place - should figure out
             # something better eventually.
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.mi.def.sig, item.mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck, compact)
             if isa(stmt′, ReturnNode)
-                isa(stmt′.val, SSAValue) && (compact.used_ssas[stmt′.val.id] += 1)
-                return_value = SSAValue(idx′)
-                inline_compact[idx′] = stmt′.val
                 val = stmt′.val
-                inline_compact.result[idx′][:type] = (isa(val, Argument) || isa(val, Expr)) ?
-                    compact_exprtype(compact, stmt′.val) :
-                    compact_exprtype(inline_compact, stmt′.val)
+                return_value = SSAValue(idx′)
+                inline_compact[idx′] = val
+                inline_compact.result[idx′][:type] =
+                    argextype(val, isa(val, Argument) || isa(val, Expr) ? compact : inline_compact)
                 break
             end
             inline_compact[idx′] = stmt′
@@ -362,7 +397,7 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         inline_compact = IncrementalCompact(compact, spec.ir, compact.result_idx)
         for ((_, idx′), stmt′) in inline_compact
             inline_compact[idx′] = nothing
-            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, item.mi.def.sig, item.mi.sparam_vals, linetable_offset, boundscheck_idx, compact)
+            stmt′ = ssa_substitute!(idx′, stmt′, argexprs, sig, sparam_vals, linetable_offset, boundscheck, compact)
             if isa(stmt′, ReturnNode)
                 if isdefined(stmt′, :val)
                     val = stmt′.val
@@ -371,23 +406,21 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
                     push!(pn.edges, inline_compact.active_result_bb-1)
                     if isa(val, GlobalRef) || isa(val, Expr)
                         stmt′ = val
-                        inline_compact.result[idx′][:type] = (isa(val, Argument) || isa(val, Expr)) ?
-                            compact_exprtype(compact, val) :
-                            compact_exprtype(inline_compact, val)
-                        insert_node_here!(inline_compact, GotoNode(post_bb_id),
-                                          Any, compact.result[idx′][:line],
+                        inline_compact.result[idx′][:type] =
+                            argextype(val, isa(val, Expr) ? compact : inline_compact)
+                        insert_node_here!(inline_compact, NewInstruction(GotoNode(post_bb_id),
+                                          Any, compact.result[idx′][:line]),
                                           true)
                         push!(pn.values, SSAValue(idx′))
                     else
                         push!(pn.values, val)
                         stmt′ = GotoNode(post_bb_id)
                     end
-
                 end
             elseif isa(stmt′, GotoNode)
                 stmt′ = GotoNode(stmt′.label + bb_offset)
             elseif isa(stmt′, Expr) && stmt′.head === :enter
-                stmt′ = Expr(:enter, stmt′.args[1] + bb_offset)
+                stmt′ = Expr(:enter, stmt′.args[1]::Int + bb_offset)
             elseif isa(stmt′, GotoIfNot)
                 stmt′ = GotoIfNot(stmt′.cond, stmt′.dest + bb_offset)
             elseif isa(stmt′, PhiNode)
@@ -398,71 +431,151 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector
         just_fixup!(inline_compact)
         compact.result_idx = inline_compact.result_idx
         compact.active_result_bb = inline_compact.active_result_bb
-        for i = 1:length(pn.values)
-            isassigned(pn.values, i) || continue
-            if isa(pn.values[i], SSAValue)
-                compact.used_ssas[pn.values[i].id] += 1
-            end
-        end
         if length(pn.edges) == 1
             return_value = pn.values[1]
         else
-            return_value = insert_node_here!(compact, pn, compact_exprtype(compact, SSAValue(idx)), compact.result[idx][:line])
+            return_value = insert_node_here!(compact,
+                NewInstruction(pn, argextype(SSAValue(idx), compact), compact.result[idx][:line]))
         end
     end
     return_value
 end
 
-const fatal_type_bound_error = ErrorException("fatal error in type inference (type bound)")
+function fix_va_argexprs!(compact::IncrementalCompact,
+    argexprs::Vector{Any}, nargs_def::Int, line_idx::Int32)
+    newargexprs = argexprs[1:(nargs_def-1)]
+    tuple_call = Expr(:call, TOP_TUPLE)
+    tuple_typs = Any[]
+    for i in nargs_def:length(argexprs)
+        arg = argexprs[i]
+        push!(tuple_call.args, arg)
+        push!(tuple_typs, argextype(arg, compact))
+    end
+    tuple_typ = tuple_tfunc(tuple_typs)
+    push!(newargexprs, insert_node_here!(compact, NewInstruction(tuple_call, tuple_typ, line_idx)))
+    return newargexprs
+end
+
+const FATAL_TYPE_BOUND_ERROR = ErrorException("fatal error in type inference (type bound)")
+
+"""
+    ir_inline_unionsplit!
+
+The core idea of this function is to simulate the dispatch semantics by generating
+(flat) `isa`-checks corresponding to the signatures of union-split dispatch candidates,
+and then inline their bodies into each `isa`-conditional block.
+This `isa`-based virtual dispatch requires few pre-conditions to hold in order to simulate
+the actual semantics correctly.
+
+The first one is that these dispatch candidates need to be processed in order of their specificity,
+and the corresponding `isa`-checks should reflect the method specificities, since now their
+signatures are not necessarily concrete.
+For example, given the following definitions:
+
+    f(x::Int)    = ...
+    f(x::Number) = ...
+    f(x::Any)    = ...
+
+and a callsite:
+
+    f(x::Any)
+
+then a correct `isa`-based virtual dispatch would be:
+
+    if isa(x, Int)
+        [inlined/resolved f(x::Int)]
+    elseif isa(x, Number)
+        [inlined/resolved f(x::Number)]
+    else # implies `isa(x, Any)`, which fully covers this call signature,
+         # otherwise we need to insert a fallback dynamic dispatch case also
+        [inlined/resolved f(x::Any)]
+    end
 
+Fortunately, `ml_matches` should already sorted them in that way, except cases when there is
+any ambiguity, from which we already bail out at this point.
+
+Another consideration is type equality constraint from type variables: the `isa`-checks are
+not enough to simulate the dispatch semantics in cases like:
+Given a definition:
+
+    g(x::T, y::T) where T<:Integer = ...
+
+transform a callsite:
+
+    g(x::Any, y::Any)
+
+into the optimized form:
+
+    if isa(x, Integer) && isa(y, Integer)
+        [inlined/resolved g(x::Integer, y::Integer)]
+    else
+        g(x, y) # fallback dynamic dispatch
+    end
+
+But again, we should already bail out from such cases at this point, essentially by
+excluding cases where `case.sig::UnionAll`.
+
+In short, here we can process the dispatch candidates in order, assuming we haven't changed
+their order somehow somewhere up to this point.
+"""
 function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
                                argexprs::Vector{Any}, linetable::Vector{LineInfoNode},
-                               item::UnionSplit, boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}})
+                               (; fully_covered, atype, cases, bbs)::UnionSplit,
+                               boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}},
+                               params::OptimizationParams)
     stmt, typ, line = compact.result[idx][:inst], compact.result[idx][:type], compact.result[idx][:line]
-    atype = item.atype
-    generic_bb = item.bbs[end-1]
-    join_bb = item.bbs[end]
-    bb = compact.active_result_bb
+    join_bb = bbs[end]
     pn = PhiNode()
-    has_generic = false
-    @assert length(item.bbs) > length(item.cases)
-    for ((metharg, case), next_cond_bb) in zip(item.cases, item.bbs)
-        @assert !isa(metharg, UnionAll)
+    local bb = compact.active_result_bb
+    ncases = length(cases)
+    @assert length(bbs) >= ncases
+    for i = 1:ncases
+        ithcase = cases[i]
+        mtype = ithcase.sig::DataType # checked within `handle_cases!`
+        case = ithcase.item
+        next_cond_bb = bbs[i]
         cond = true
-        @assert length(atype.parameters) == length(metharg.parameters)
-        for i in 1:length(atype.parameters)
-            a, m = atype.parameters[i], metharg.parameters[i]
-            # If this is always true, we don't need to check for it
-            a <: m && continue
-            # Generate isa check
-            isa_expr = Expr(:call, isa, argexprs[i], m)
-            ssa = insert_node_here!(compact, isa_expr, Bool, line)
-            if cond === true
-                cond = ssa
-            else
-                and_expr = Expr(:call, and_int, cond, ssa)
-                cond = insert_node_here!(compact, and_expr, Bool, line)
+        nparams = fieldcount(atype)
+        @assert nparams == fieldcount(mtype)
+        if i != ncases || !fully_covered || !params.trust_inference
+            for i = 1:nparams
+                a, m = fieldtype(atype, i), fieldtype(mtype, i)
+                # If this is always true, we don't need to check for it
+                a <: m && continue
+                # Generate isa check
+                isa_expr = Expr(:call, isa, argexprs[i], m)
+                ssa = insert_node_here!(compact, NewInstruction(isa_expr, Bool, line))
+                if cond === true
+                    cond = ssa
+                else
+                    and_expr = Expr(:call, and_int, cond, ssa)
+                    cond = insert_node_here!(compact, NewInstruction(and_expr, Bool, line))
+                end
             end
+            insert_node_here!(compact, NewInstruction(GotoIfNot(cond, next_cond_bb), Union{}, line))
         end
-        insert_node_here!(compact, GotoIfNot(cond, next_cond_bb), Union{}, line)
         bb = next_cond_bb - 1
         finish_current_bb!(compact, 0)
         argexprs′ = argexprs
         if !isa(case, ConstantCase)
             argexprs′ = copy(argexprs)
-            for i = 1:length(metharg.parameters)
-                a, m = atype.parameters[i], metharg.parameters[i]
-                (isa(argexprs[i], SSAValue) || isa(argexprs[i], Argument)) || continue
+            for i = 1:nparams
+                argex = argexprs[i]
+                (isa(argex, SSAValue) || isa(argex, Argument)) || continue
+                a, m = fieldtype(atype, i), fieldtype(mtype, i)
                 if !(a <: m)
-                    argexprs′[i] = insert_node_here!(compact, PiNode(argexprs′[i], m),
-                                                     m, line)
+                    argexprs′[i] = insert_node_here!(compact,
+                        NewInstruction(PiNode(argex, m), m, line))
                 end
             end
         end
         if isa(case, InliningTodo)
             val = ir_inline_item!(compact, idx, argexprs′, linetable, case, boundscheck, todo_bbs)
-        elseif isa(case, MethodInstance)
-            val = insert_node_here!(compact, Expr(:invoke, case, argexprs′...), typ, line)
+        elseif isa(case, InvokeCase)
+            effect_free = is_removable_if_unused(case.effects)
+            val = insert_node_here!(compact,
+                NewInstruction(Expr(:invoke, case.invoke, argexprs′...), typ, nothing,
+                    line, effect_free ? IR_FLAG_EFFECT_FREE : IR_FLAG_NULL, effect_free))
         else
             case = case::ConstantCase
             val = case.val
@@ -470,44 +583,47 @@ function ir_inline_unionsplit!(compact::IncrementalCompact, idx::Int,
         if !isempty(compact.result_bbs[bb].preds)
             push!(pn.edges, bb)
             push!(pn.values, val)
-            insert_node_here!(compact, GotoNode(join_bb), Union{}, line)
+            insert_node_here!(compact,
+                NewInstruction(GotoNode(join_bb), Union{}, line))
         else
-            insert_node_here!(compact, ReturnNode(), Union{}, line)
+            insert_node_here!(compact,
+                NewInstruction(ReturnNode(), Union{}, line))
         end
         finish_current_bb!(compact, 0)
     end
     bb += 1
     # We're now in the fall through block, decide what to do
-    if item.fully_covered
-        e = Expr(:call, GlobalRef(Core, :throw), fatal_type_bound_error)
-        insert_node_here!(compact, e, Union{}, line)
-        insert_node_here!(compact, ReturnNode(), Union{}, line)
-        finish_current_bb!(compact, 0)
+    if fully_covered
+        if !params.trust_inference
+            e = Expr(:call, GlobalRef(Core, :throw), FATAL_TYPE_BOUND_ERROR)
+            insert_node_here!(compact, NewInstruction(e, Union{}, line))
+            insert_node_here!(compact, NewInstruction(ReturnNode(), Union{}, line))
+            finish_current_bb!(compact, 0)
+        end
     else
-        ssa = insert_node_here!(compact, stmt, typ, line)
+        ssa = insert_node_here!(compact, NewInstruction(stmt, typ, line))
         push!(pn.edges, bb)
         push!(pn.values, ssa)
-        insert_node_here!(compact, GotoNode(join_bb), Union{}, line)
+        insert_node_here!(compact, NewInstruction(GotoNode(join_bb), Union{}, line))
         finish_current_bb!(compact, 0)
     end
 
     # We're now in the join block.
-    compact.ssa_rename[compact.idx-1] = insert_node_here!(compact, pn, typ, line)
-    nothing
+    return insert_node_here!(compact, NewInstruction(pn, typ, line))
 end
 
-function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool)
+function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vector{LineInfoNode}, propagate_inbounds::Bool, params::OptimizationParams)
     # Compute the new CFG first (modulo statement ranges, which will be computed below)
     state = CFGInliningState(ir)
     for (idx, item) in todo
         if isa(item, UnionSplit)
-            cfg_inline_unionsplit!(idx, item::UnionSplit, state)
+            cfg_inline_unionsplit!(ir, idx, item, state, params)
         else
             item = item::InliningTodo
             spec = item.spec::ResolvedInliningSpec
             # A linear inline does not modify the CFG
             spec.linear_inline_eligible && continue
-            cfg_inline_item!(idx, spec, state, false)
+            cfg_inline_item!(ir, idx, spec, state, false)
         end
     end
     finish_cfg_inline!(state)
@@ -532,23 +648,29 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
         (inline_idx, item) = popfirst!(todo)
         for ((old_idx, idx), stmt) in compact
             if old_idx == inline_idx
+                stmt = stmt::Expr
                 argexprs = copy(stmt.args)
                 refinish = false
                 if compact.result_idx == first(compact.result_bbs[compact.active_result_bb].stmts)
                     compact.active_result_bb -= 1
                     refinish = true
                 end
-                # At the moment we will allow globalrefs in argument position, turn those into ssa values
+                # It is possible for GlobalRefs and Exprs to be in argument position
+                # at this point in the IR, though in that case they are required
+                # to be effect-free. However, we must still move them out of argument
+                # position, since `Argument` is allowed in PhiNodes, but `GlobalRef`
+                # and `Expr` are not, so a substitution could anger the verifier.
                 for aidx in 1:length(argexprs)
                     aexpr = argexprs[aidx]
-                    if isa(aexpr, GlobalRef) || isa(aexpr, Expr)
-                        argexprs[aidx] = insert_node_here!(compact, aexpr, compact_exprtype(compact, aexpr), compact.result[idx][:line])
+                    if isa(aexpr, Expr) || isa(aexpr, GlobalRef)
+                        ninst = effect_free(NewInstruction(aexpr, argextype(aexpr, compact), compact.result[idx][:line]))
+                        argexprs[aidx] = insert_node_here!(compact, ninst)
                     end
                 end
                 if isa(item, InliningTodo)
                     compact.ssa_rename[old_idx] = ir_inline_item!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs)
                 elseif isa(item, UnionSplit)
-                    ir_inline_unionsplit!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs)
+                    compact.ssa_rename[old_idx] = ir_inline_unionsplit!(compact, idx, argexprs, linetable, item, boundscheck, state.todo_bbs, params)
                 end
                 compact[idx] = nothing
                 refinish && finish_current_bb!(compact, 0)
@@ -560,7 +682,7 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
             elseif isa(stmt, GotoNode)
                 compact[idx] = GotoNode(state.bb_rename[stmt.label])
             elseif isa(stmt, Expr) && stmt.head === :enter
-                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]])
+                compact[idx] = Expr(:enter, state.bb_rename[stmt.args[1]::Int])
             elseif isa(stmt, GotoIfNot)
                 compact[idx] = GotoIfNot(stmt.cond, state.bb_rename[stmt.dest])
             elseif isa(stmt, PhiNode)
@@ -573,33 +695,33 @@ function batch_inline!(todo::Vector{Pair{Int, Any}}, ir::IRCode, linetable::Vect
     return ir
 end
 
-# This assumes the caller has verified that all arguments to the _apply call are Tuples.
-function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int,
-        argexprs::Vector{Any}, atypes::Vector{Any}, arginfos::Vector{Any},
-        arg_start::Int, et::Union{EdgeTracker, Nothing}, caches::Union{InferenceCaches, Nothing},
-        params::OptimizationParams)
-
+# This assumes the caller has verified that all arguments to the _apply_iterate call are Tuples.
+function rewrite_apply_exprargs!(
+    ir::IRCode, idx::Int, stmt::Expr, argtypes::Vector{Any},
+    arginfos::Vector{MaybeAbstractIterationInfo}, arg_start::Int, istate::InliningState, todo::Vector{Pair{Int, Any}})
+    flag = ir.stmts[idx][:flag]
+    argexprs = stmt.args
     new_argexprs = Any[argexprs[arg_start]]
-    new_atypes = Any[atypes[arg_start]]
+    new_argtypes = Any[argtypes[arg_start]]
     # loop over original arguments and flatten any known iterators
     for i in (arg_start+1):length(argexprs)
         def = argexprs[i]
-        def_type = atypes[i]
+        def_type = argtypes[i]
         thisarginfo = arginfos[i-arg_start]
         if thisarginfo === nothing
             if def_type isa PartialStruct
                 # def_type.typ <: Tuple is assumed
-                def_atypes = def_type.fields
+                def_argtypes = def_type.fields
             else
-                def_atypes = Any[]
+                def_argtypes = Any[]
                 if isa(def_type, Const) # && isa(def_type.val, Union{Tuple, SimpleVector}) is implied
                     for p in def_type.val
-                        push!(def_atypes, Const(p))
+                        push!(def_argtypes, Const(p))
                     end
                 else
-                    ti = widenconst(def_type)
+                    ti = widenconst(def_type)::DataType # checked by `is_valid_type_for_apply_rewrite`
                     if ti.name === NamedTuple_typename
-                        ti = ti.parameters[2]
+                        ti = ti.parameters[2]::DataType # checked by `is_valid_type_for_apply_rewrite`
                     end
                     for p in ti.parameters
                         if isa(p, DataType) && isdefined(p, :instance)
@@ -608,125 +730,157 @@ function rewrite_apply_exprargs!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::
                         elseif isconstType(p)
                             p = Const(p.parameters[1])
                         end
-                        push!(def_atypes, p)
+                        push!(def_argtypes, p)
                     end
                 end
             end
-            # now push flattened types into new_atypes and getfield exprs into new_argexprs
-            for j in 1:length(def_atypes)
-                def_atype = def_atypes[j]
+            # now push flattened types into new_argtypes and getfield exprs into new_argexprs
+            for j in 1:length(def_argtypes)
+                def_atype = def_argtypes[j]
                 if isa(def_atype, Const) && is_inlineable_constant(def_atype.val)
                     new_argexpr = quoted(def_atype.val)
                 else
                     new_call = Expr(:call, GlobalRef(Core, :getfield), def, j)
-                    new_argexpr = insert_node!(ir, idx, def_atype, new_call)
+                    new_argexpr = insert_node!(ir, idx, NewInstruction(new_call, def_atype))
                 end
                 push!(new_argexprs, new_argexpr)
-                push!(new_atypes, def_atype)
+                push!(new_argtypes, def_atype)
             end
         else
             state = Core.svec()
             for i = 1:length(thisarginfo.each)
                 call = thisarginfo.each[i]
                 new_stmt = Expr(:call, argexprs[2], def, state...)
-                state1 = insert_node!(ir, idx, call.rt, new_stmt)
-                new_sig = with_atype(call_sig(ir, new_stmt))
-                if isa(call.info, MethodMatchInfo) || isa(call.info, UnionSplitInfo)
-                    info = isa(call.info, MethodMatchInfo) ?
-                        MethodMatchInfo[call.info] : call.info.matches
+                state1 = insert_node!(ir, idx, NewInstruction(new_stmt, call.rt))
+                new_sig = call_sig(ir, new_stmt)::Signature
+                new_info = call.info
+                if isa(new_info, ConstCallInfo)
+                    handle_const_call!(
+                        ir, state1.id, new_stmt, new_info, flag,
+                        new_sig, istate, todo)
+                elseif isa(new_info, MethodMatchInfo) || isa(new_info, UnionSplitInfo)
+                    new_infos = isa(new_info, MethodMatchInfo) ? MethodMatchInfo[new_info] : new_info.matches
                     # See if we can inline this call to `iterate`
-                    analyze_single_call!(ir, todo, state1.id, new_stmt,
-                        new_sig, call.rt, info, et, caches, params)
+                    analyze_single_call!(
+                        ir, state1.id, new_stmt, new_infos, flag,
+                        new_sig, istate, todo)
                 end
                 if i != length(thisarginfo.each)
                     valT = getfield_tfunc(call.rt, Const(1))
-                    val_extracted = insert_node!(ir, idx, valT,
-                        Expr(:call, GlobalRef(Core, :getfield), state1, 1))
+                    val_extracted = insert_node!(ir, idx, NewInstruction(
+                        Expr(:call, GlobalRef(Core, :getfield), state1, 1),
+                        valT))
                     push!(new_argexprs, val_extracted)
-                    push!(new_atypes, valT)
-                    state_extracted = insert_node!(ir, idx, getfield_tfunc(call.rt, Const(2)),
-                        Expr(:call, GlobalRef(Core, :getfield), state1, 2))
+                    push!(new_argtypes, valT)
+                    state_extracted = insert_node!(ir, idx, NewInstruction(
+                        Expr(:call, GlobalRef(Core, :getfield), state1, 2),
+                        getfield_tfunc(call.rt, Const(2))))
                     state = Core.svec(state_extracted)
                 end
             end
         end
     end
-    return new_argexprs, new_atypes
+    stmt.args = new_argexprs
+    return new_argtypes
 end
 
-function rewrite_invoke_exprargs!(argexprs::Vector{Any})
-    argexpr0 = argexprs[2]
-    argexprs = argexprs[4:end]
-    pushfirst!(argexprs, argexpr0)
-    return argexprs
+function compileable_specialization(et::Union{EdgeTracker, Nothing}, match::MethodMatch, effects::Effects)
+    mi = specialize_method(match; compilesig=true)
+    mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
+    mi === nothing && return nothing
+    return InvokeCase(mi, effects)
 end
 
-function singleton_type(@nospecialize(ft))
-    if isa(ft, Const)
-        return ft.val
-    elseif ft isa DataType && isdefined(ft, :instance)
-        return ft.instance
-    end
-    return nothing
+function compileable_specialization(et::Union{EdgeTracker, Nothing}, linfo::MethodInstance, effects::Effects)
+    mi = specialize_method(linfo.def::Method, linfo.specTypes, linfo.sparam_vals; compilesig=true)
+    mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
+    mi === nothing && return nothing
+    return InvokeCase(mi, effects)
 end
 
-function compileable_specialization(et::Union{EdgeTracker, Nothing}, match::MethodMatch)
-    mi = specialize_method(match, false, true)
-    mi !== nothing && et !== nothing && push!(et, mi::MethodInstance)
-    return mi
+function compileable_specialization(et::Union{EdgeTracker, Nothing}, (; linfo)::InferenceResult, effects::Effects)
+    return compileable_specialization(et, linfo, effects)
 end
 
-function resolve_todo(todo::InliningTodo, et::Union{EdgeTracker, Nothing}, caches::InferenceCaches)
-    spec = todo.spec::DelayedInliningSpec
-    isconst, src = find_inferred(todo.mi, spec.atypes, caches, spec.stmttype)
+function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8)
+    mi = todo.mi
+    (; match, argtypes) = todo.spec::DelayedInliningSpec
+    et = state.et
+
+    #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
+    if isa(match, InferenceResult)
+        inferred_src = match.src
+        if isa(inferred_src, ConstAPI)
+            # use constant calling convention
+            et !== nothing && push!(et, mi)
+            return ConstantCase(quoted(inferred_src.val))
+        else
+            src = inferred_src
+        end
+        effects = match.ipo_effects
+    else
+        code = get(state.mi_cache, mi, nothing)
+        if code isa CodeInstance
+            if use_const_api(code)
+                # in this case function can be inlined to a constant
+                et !== nothing && push!(et, mi)
+                return ConstantCase(quoted(code.rettype_const))
+            else
+                src = code.inferred
+            end
+            effects = decode_effects(code.ipo_purity_bits)
+        else
+            effects = Effects()
+            src = code
+        end
+    end
 
-    if isconst
-        push!(et, todo.mi)
-        return ConstantCase(src)
+    # the duplicated check might have been done already within `analyze_method!`, but still
+    # we need it here too since we may come here directly using a constant-prop' result
+    if !state.params.inlining || is_stmt_noinline(flag)
+        return compileable_specialization(et, match, effects)
     end
 
+    src = inlining_policy(state.interp, src, flag, mi, argtypes)
+
     if src === nothing
-        return compileable_specialization(et, spec.match)
+        return compileable_specialization(et, match, effects)
     end
 
-    if isa(src, CodeInfo) || isa(src, Vector{UInt8})
-        src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
-        src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
-
-        if !(src_inferred && src_inlineable)
-            return compileable_specialization(et, spec.match)
-        end
-    elseif isa(src, IRCode)
+    if isa(src, IRCode)
         src = copy(src)
     end
 
-    et !== nothing && push!(et, todo.mi)
-    return InliningTodo(todo.mi, src)
+    et !== nothing && push!(et, mi)
+    return InliningTodo(mi, src, effects)
 end
 
-function resolve_todo(todo::UnionSplit, et::Union{EdgeTracker, Nothing}, caches::InferenceCaches)
-    UnionSplit(todo.fully_covered, todo.atype,
-        Pair{Any,Any}[sig=>resolve_todo(item, et, caches) for (sig, item) in todo.cases])
+function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8)
+    ncases = length(cases)
+    newcases = Vector{InliningCase}(undef, ncases)
+    for i in 1:ncases
+        (; sig, item) = cases[i]
+        newitem = resolve_todo(item, state, flag)
+        push!(newcases, InliningCase(sig, newitem))
+    end
+    return UnionSplit(fully_covered, atype, newcases)
 end
 
-function resolve_todo!(todo::Vector{Pair{Int, Any}}, et::Union{EdgeTracker, Nothing}, caches::InferenceCaches)
-    for i = 1:length(todo)
-        idx, item = todo[i]
-        todo[i] = idx=>resolve_todo(item, et, caches)
+function validate_sparams(sparams::SimpleVector)
+    for i = 1:length(sparams)
+        (isa(sparams[i], TypeVar) || isvarargtype(sparams[i])) && return false
     end
-    todo
+    return true
 end
 
-function analyze_method!(match::MethodMatch, atypes::Vector{Any},
-                         et::Union{EdgeTracker, Nothing},
-                         caches::Union{InferenceCaches, Nothing},
-                         params::OptimizationParams, @nospecialize(stmttyp))
+function analyze_method!(match::MethodMatch, argtypes::Vector{Any},
+                         flag::UInt8, state::InliningState)
     method = match.method
-    methsig = method.sig
+    spec_types = match.spec_types
 
-    # Check that we habe the correct number of arguments
+    # Check that we have the correct number of arguments
     na = Int(method.nargs)
-    npassedargs = length(atypes)
+    npassedargs = length(argtypes)
     if na != npassedargs && !(na > 0 && method.isva)
         # we have a method match only because an earlier
         # inference step shortened our call args list, even
@@ -734,113 +888,72 @@ function analyze_method!(match::MethodMatch, atypes::Vector{Any},
         # call this function
         return nothing
     end
+    if !match.fully_covers
+        # type-intersection was not able to give us a simple list of types, so
+        # ir_inline_unionsplit won't be able to deal with inlining this
+        if !(spec_types isa DataType && length(spec_types.parameters) == length(argtypes) && !isvarargtype(spec_types.parameters[end]))
+            return nothing
+        end
+    end
 
     # Bail out if any static parameters are left as TypeVar
-    ok = true
-    for i = 1:length(match.sparams)
-        isa(match.sparams[i], TypeVar) && return nothing
-    end
+    validate_sparams(match.sparams) || return nothing
 
-    if !params.inlining
-        return compileable_specialization(et, match)
-    end
+    et = state.et
 
     # See if there exists a specialization for this method signature
-    mi = specialize_method(match, true) # Union{Nothing, MethodInstance}
-    if !isa(mi, MethodInstance)
-        return compileable_specialization(et, match)
-    end
+    mi = specialize_method(match; preexisting=true) # Union{Nothing, MethodInstance}
+    isa(mi, MethodInstance) || return compileable_specialization(et, match, Effects())
 
-    todo = InliningTodo(mi, match, atypes, stmttyp)
+    todo = InliningTodo(mi, match, argtypes)
     # If we don't have caches here, delay resolving this MethodInstance
     # until the batch inlining step (or an external post-processing pass)
-    caches === nothing && return todo
-    return resolve_todo(todo, et, caches)
+    state.mi_cache === nothing && return todo
+    return resolve_todo(todo, state, flag)
 end
 
-function InliningTodo(mi::MethodInstance, ir::IRCode)
-    return InliningTodo(mi, ResolvedInliningSpec(ir, linear_inline_eligible(ir)))
+function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects)
+    return InliningTodo(mi, ResolvedInliningSpec(ir, linear_inline_eligible(ir), effects))
 end
 
-function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Array{UInt8, 1}})
+function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Array{UInt8, 1}}, effects::Effects)
     if !isa(src, CodeInfo)
         src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo
     end
 
-    @timeit "inline IR inflation" begin
-        return InliningTodo(mi, inflate_ir(src, mi)::IRCode)
+    @timeit "inline IR inflation" begin;
+        return InliningTodo(mi, inflate_ir(src, mi)::IRCode, effects)
     end
 end
 
-# Neither the product iterator not CartesianIndices are available
-# here, so use this poor man's version
-struct SimpleCartesian
-    ranges::Vector{UnitRange{Int}}
-end
-function iterate(s::SimpleCartesian, state::Vector{Int}=Int[1 for _ in 1:length(s.ranges)])
-    state[end] > last(s.ranges[end]) && return nothing
-    vals = copy(state)
-    any = false
-    for i = 1:length(s.ranges)
-        if state[i] < last(s.ranges[i])
-            for j = 1:(i-1)
-                state[j] = first(s.ranges[j])
-            end
-            state[i] += 1
-            any = true
-            break
-        end
-    end
-    if !any
-        state[end] += 1
-    end
-    (vals, state)
-end
-
-# Given a signure, iterate over the signatures to union split over
-struct UnionSplitSignature
-    it::SimpleCartesian
-    typs::Vector{Any}
-end
-
-function UnionSplitSignature(atypes::Vector{Any})
-    typs = Any[uniontypes(widenconst(atypes[i])) for i = 1:length(atypes)]
-    ranges = UnitRange{Int}[1:length(typs[i]) for i = 1:length(typs)]
-    return UnionSplitSignature(SimpleCartesian(ranges), typs)
-end
-
-function iterate(split::UnionSplitSignature, state::Vector{Int}...)
-    y = iterate(split.it, state...)
-    y === nothing && return nothing
-    idxs, state = y
-    sig = Any[split.typs[i][j] for (i, j) in enumerate(idxs)]
-    return (sig, state)
-end
-
-function handle_single_case!(ir::IRCode, stmt::Expr, idx::Int, @nospecialize(case), isinvoke::Bool, todo::Vector{Pair{Int, Any}})
+function handle_single_case!(
+    ir::IRCode, idx::Int, stmt::Expr,
+    @nospecialize(case), todo::Vector{Pair{Int, Any}}, params::OptimizationParams, isinvoke::Bool = false)
     if isa(case, ConstantCase)
-        ir[SSAValue(idx)] = case.val
-    elseif isa(case, MethodInstance)
-        if isinvoke
-            stmt.args = rewrite_invoke_exprargs!(stmt.args)
-        end
+        ir[SSAValue(idx)][:inst] = case.val
+    elseif isa(case, InvokeCase)
+        is_total(case.effects) && inline_const_if_inlineable!(ir[SSAValue(idx)]) && return nothing
+        isinvoke && rewrite_invoke_exprargs!(stmt)
         stmt.head = :invoke
-        pushfirst!(stmt.args, case)
+        pushfirst!(stmt.args, case.invoke)
+        if is_removable_if_unused(case.effects)
+            ir[SSAValue(idx)][:flag] |= IR_FLAG_EFFECT_FREE
+        end
     elseif case === nothing
         # Do, well, nothing
     else
-        if isinvoke
-            stmt.args = rewrite_invoke_exprargs!(stmt.args)
-        end
+        isinvoke && rewrite_invoke_exprargs!(stmt)
         push!(todo, idx=>(case::InliningTodo))
     end
     nothing
 end
 
+rewrite_invoke_exprargs!(expr::Expr) = (expr.args = invoke_rewrite(expr.args); expr)
+
 function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::OptimizationParams)
-    if isa(typ, Const) && isa(typ.val, SimpleVector)
-        length(typ.val) > params.MAX_TUPLE_SPLAT && return false
-        for p in typ.val
+    if isa(typ, Const) && (v = typ.val; isa(v, SimpleVector))
+        length(v) > params.MAX_TUPLE_SPLAT && return false
+        for p in v
             is_inlineable_constant(p) || return false
         end
         return true
@@ -848,9 +961,7 @@ function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::Optimizatio
     typ = widenconst(typ)
     if isa(typ, DataType) && typ.name === NamedTuple_typename
         typ = typ.parameters[2]
-        while isa(typ, TypeVar)
-            typ = typ.ub
-        end
+        typ = unwraptv(typ)
     end
     isa(typ, DataType) || return false
     if typ.name === Tuple.name
@@ -860,24 +971,22 @@ function is_valid_type_for_apply_rewrite(@nospecialize(typ), params::Optimizatio
     end
 end
 
-function inline_splatnew!(ir::IRCode, idx::Int)
-    stmt = ir.stmts[idx][:inst]
-    ty = ir.stmts[idx][:type]
-    nf = nfields_tfunc(ty)
+function inline_splatnew!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(rt))
+    nf = nfields_tfunc(rt)
     if nf isa Const
         eargs = stmt.args
         tup = eargs[2]
-        tt = argextype(tup, ir, ir.sptypes)
+        tt = argextype(tup, ir)
         tnf = nfields_tfunc(tt)
         # TODO: hoisting this tnf.val === nf.val check into codegen
         # would enable us to almost always do this transform
         if tnf isa Const && tnf.val === nf.val
-            n = tnf.val
+            n = tnf.val::Int
             new_argexprs = Any[eargs[1]]
             for j = 1:n
                 atype = getfield_tfunc(tt, Const(j))
                 new_call = Expr(:call, Core.getfield, tup, j)
-                new_argexpr = insert_node!(ir, idx, atype, new_call)
+                new_argexpr = insert_node!(ir, idx, NewInstruction(new_call, atype))
                 push!(new_argexprs, new_argexpr)
             end
             stmt.head = :new
@@ -889,52 +998,50 @@ end
 
 function call_sig(ir::IRCode, stmt::Expr)
     isempty(stmt.args) && return nothing
-    ft = argextype(stmt.args[1], ir, ir.sptypes)
+    ft = argextype(stmt.args[1], ir)
     has_free_typevars(ft) && return nothing
     f = singleton_type(ft)
     f === Core.Intrinsics.llvmcall && return nothing
     f === Core.Intrinsics.cglobal && return nothing
-    atypes = Vector{Any}(undef, length(stmt.args))
-    atypes[1] = ft
-    ok = true
+    argtypes = Vector{Any}(undef, length(stmt.args))
+    argtypes[1] = ft
     for i = 2:length(stmt.args)
-        a = argextype(stmt.args[i], ir, ir.sptypes)
+        a = argextype(stmt.args[i], ir)
         (a === Bottom || isvarargtype(a)) && return nothing
-        atypes[i] = a
+        argtypes[i] = a
     end
-
-    Signature(f, ft, atypes)
+    return Signature(f, ft, argtypes)
 end
 
-function inline_apply!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sig::Signature,
-                       et, caches, params::OptimizationParams)
-    stmt = ir.stmts[idx][:inst]
-    while sig.f === Core._apply || sig.f === Core._apply_iterate
+function inline_apply!(
+    ir::IRCode, idx::Int, stmt::Expr, sig::Signature,
+    state::InliningState, todo::Vector{Pair{Int, Any}})
+    while sig.f === Core._apply_iterate
         info = ir.stmts[idx][:info]
         if isa(info, UnionSplitApplyCallInfo)
             if length(info.infos) != 1
                 # TODO: Handle union split applies?
-                new_info = info = nothing
+                new_info = info = false
             else
                 info = info.infos[1]
                 new_info = info.call
             end
         else
             @assert info === nothing || info === false
-            new_info = info = nothing
+            new_info = info = false
         end
-        arg_start = sig.f === Core._apply ? 2 : 3
-        atypes = sig.atypes
-        if arg_start > length(atypes)
+        arg_start = 3
+        argtypes = sig.argtypes
+        if arg_start > length(argtypes)
             return nothing
         end
-        ft = atypes[arg_start]
+        ft = argtypes[arg_start]
         if ft isa Const && ft.val === Core.tuple
             # if one argument is a tuple already, and the rest are empty, we can just return it
             # e.g. rewrite `((t::Tuple)...,)` to `t`
             nonempty_idx = 0
-            for i = (arg_start + 1):length(atypes)
-                ti = atypes[i]
+            for i = (arg_start + 1):length(argtypes)
+                ti = argtypes[i]
                 ti ⊑ Tuple{} && continue
                 if ti ⊑ Tuple && nonempty_idx == 0
                     nonempty_idx = i
@@ -950,25 +1057,27 @@ function inline_apply!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, sig::
         end
         # Try to figure out the signature of the function being called
         # and if rewrite_apply_exprargs can deal with this form
-        infos = Any[]
-        for i = (arg_start + 1):length(atypes)
+        arginfos = MaybeAbstractIterationInfo[]
+        for i = (arg_start + 1):length(argtypes)
             thisarginfo = nothing
-            if !is_valid_type_for_apply_rewrite(atypes[i], params)
+            if !is_valid_type_for_apply_rewrite(argtypes[i], state.params)
                 if isa(info, ApplyCallInfo) && info.arginfo[i-arg_start] !== nothing
                     thisarginfo = info.arginfo[i-arg_start]
                 else
                     return nothing
                 end
             end
-            push!(infos, thisarginfo)
+            push!(arginfos, thisarginfo)
         end
         # Independent of whether we can inline, the above analysis allows us to rewrite
         # this apply call to a regular call
-        stmt.args, atypes = rewrite_apply_exprargs!(ir, todo, idx, stmt.args, atypes, infos, arg_start, et, caches, params)
+        argtypes = rewrite_apply_exprargs!(
+            ir, idx, stmt, argtypes,
+            arginfos, arg_start, state, todo)
         ir.stmts[idx][:info] = new_info
         has_free_typevars(ft) && return nothing
         f = singleton_type(ft)
-        sig = Signature(f, ft, atypes)
+        sig = Signature(f, ft, argtypes)
     end
     sig
 end
@@ -980,277 +1089,379 @@ is_builtin(s::Signature) =
     isa(s.f, Builtin) ||
     s.ft ⊑ Builtin
 
-function inline_invoke!(ir::IRCode, idx::Int, sig::Signature, invoke_data::InvokeData, state::InliningState, todo::Vector{Pair{Int, Any}})
-    stmt = ir.stmts[idx][:inst]
-    calltype = ir.stmts[idx][:type]
-    method = invoke_data.entry
-    (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any),
-            sig.atype, method.sig)::SimpleVector
-    methsp = methsp::SimpleVector
-    match = MethodMatch(metharg, methsp, method, true)
-    result = analyze_method!(match, sig.atypes, state.et, state.caches, state.params, calltype)
-    handle_single_case!(ir, stmt, idx, result, true, todo)
-    intersect!(state.et, WorldRange(invoke_data.min_valid, invoke_data.max_valid))
+function inline_invoke!(
+    ir::IRCode, idx::Int, stmt::Expr, info::InvokeCallInfo, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    match = info.match
+    if !match.fully_covers
+        # TODO: We could union split out the signature check and continue on
+        return nothing
+    end
+    result = info.result
+    if isa(result, ConcreteResult)
+        item = concrete_result_item(result, state)
+    else
+        argtypes = invoke_rewrite(sig.argtypes)
+        if isa(result, ConstPropResult)
+            (; mi) = item = InliningTodo(result.result, argtypes)
+            validate_sparams(mi.sparam_vals) || return nothing
+            if argtypes_to_type(argtypes) <: mi.def.sig
+                state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+                handle_single_case!(ir, idx, stmt, item, todo, state.params, true)
+                return nothing
+            end
+        end
+        item = analyze_method!(match, argtypes, flag, state)
+    end
+    handle_single_case!(ir, idx, stmt, item, todo, state.params, true)
     return nothing
 end
 
+function narrow_opaque_closure!(ir::IRCode, stmt::Expr, @nospecialize(info), state::InliningState)
+    if isa(info, OpaqueClosureCreateInfo)
+        lbt = argextype(stmt.args[2], ir)
+        lb, exact = instanceof_tfunc(lbt)
+        exact || return
+        ubt = argextype(stmt.args[3], ir)
+        ub, exact = instanceof_tfunc(ubt)
+        exact || return
+        # Narrow opaque closure type
+        newT = widenconst(tmeet(tmerge(lb, info.unspec.rt), ub))
+        if newT != ub
+            # N.B.: Narrowing the ub requires a backdge on the mi whose type
+            # information we're using, since a change in that function may
+            # invalidate ub result.
+            stmt.args[3] = newT
+        end
+    end
+end
+
+# As a matter of convenience, this pass also computes effect-freenes.
+# For primitives, we do that right here. For proper calls, we will
+# discover this when we consult the caches.
+function check_effect_free!(ir::IRCode, idx::Int, @nospecialize(stmt), @nospecialize(rt))
+    if stmt_effect_free(stmt, rt, ir)
+        ir.stmts[idx][:flag] |= IR_FLAG_EFFECT_FREE
+        return true
+    end
+    return false
+end
+
 # Handles all analysis and inlining of intrinsics and builtins. In particular,
 # this method does not access the method table or otherwise process generic
 # functions.
-function process_simple!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, state::InliningState)
+function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vector{Pair{Int, Any}})
     stmt = ir.stmts[idx][:inst]
-    stmt isa Expr || return nothing
-    if stmt.head === :splatnew
-        inline_splatnew!(ir, idx)
+    rt = ir.stmts[idx][:type]
+    if !(stmt isa Expr)
+        check_effect_free!(ir, idx, stmt, rt)
+        return nothing
+    end
+    head = stmt.head
+    if head !== :call
+        if head === :splatnew
+            inline_splatnew!(ir, idx, stmt, rt)
+        elseif head === :new_opaque_closure
+            narrow_opaque_closure!(ir, stmt, ir.stmts[idx][:info], state)
+        end
+        check_effect_free!(ir, idx, stmt, rt)
         return nothing
     end
-
-    stmt.head === :call || return nothing
 
     sig = call_sig(ir, stmt)
     sig === nothing && return nothing
 
-    # Handle _apply
-    sig = inline_apply!(ir, todo, idx, sig, state.et, state.caches, state.params)
+    # Handle _apply_iterate
+    sig = inline_apply!(ir, idx, stmt, sig, state, todo)
     sig === nothing && return nothing
 
     # Check if we match any of the early inliners
-    calltype = ir.stmts[idx][:type]
-    res = early_inline_special_case(ir, sig, stmt, state.params, calltype)
-    if res !== nothing
-        ir.stmts[idx][:inst] = res
+    earlyres = early_inline_special_case(ir, stmt, rt, sig, state.params)
+    if isa(earlyres, SomeCase)
+        ir.stmts[idx][:inst] = earlyres.val
         return nothing
     end
-
-    # Handle invoke
-    invoke_data = nothing
-    if sig.f === Core.invoke && length(sig.atypes) >= 3
-        res = compute_invoke_data(sig.atypes, state.method_table)
-        res === nothing && return nothing
-        (sig, invoke_data) = res
-    elseif is_builtin(sig)
-        # No inlining for builtins (other than what was previously handled)
+    if (sig.f === modifyfield! || sig.ft ⊑ typeof(modifyfield!)) && 5 <= length(stmt.args) <= 6
+        let info = ir.stmts[idx][:info]
+            info isa MethodResultPure && (info = info.info)
+            info isa ConstCallInfo && (info = info.call)
+            info isa MethodMatchInfo || return nothing
+            length(info.results) == 1 || return nothing
+            match = info.results[1]::MethodMatch
+            match.fully_covers || return nothing
+            case = compileable_specialization(state.et, match, Effects())
+            case === nothing && return nothing
+            stmt.head = :invoke_modify
+            pushfirst!(stmt.args, case.invoke)
+            ir.stmts[idx][:inst] = stmt
+        end
         return nothing
     end
 
-    sig = with_atype(sig)
+    if check_effect_free!(ir, idx, stmt, rt)
+        if sig.f === typeassert || sig.ft ⊑ typeof(typeassert)
+            # typeassert is a no-op if effect free
+            ir.stmts[idx][:inst] = stmt.args[2]
+            return nothing
+        end
+    end
 
-    # In :invoke, make sure that the arguments we're passing are a subtype of the
-    # signature we're invoking.
-    (invoke_data === nothing || sig.atype <: invoke_data.types0) || return nothing
+    if sig.f !== Core.invoke && is_builtin(sig)
+        # No inlining for builtins (other invoke/apply/typeassert)
+        return nothing
+    end
 
     # Special case inliners for regular functions
-    if late_inline_special_case!(ir, sig, idx, stmt, state.params) || is_return_type(sig.f)
+    lateres = late_inline_special_case!(ir, idx, stmt, rt, sig, state.params)
+    if isa(lateres, SomeCase)
+        ir[SSAValue(idx)][:inst] = lateres.val
+        check_effect_free!(ir, idx, lateres.val, rt)
+        return nothing
+    elseif is_return_type(sig.f)
+        check_effect_free!(ir, idx, stmt, rt)
         return nothing
     end
-    return (sig, invoke_data)
-end
 
-# This is not currently called in the regular course, but may be needed
-# if we ever want to re-run inlining again later in the pass pipeline after
-# additional type information was discovered.
-function recompute_method_matches(@nospecialize(atype), params::OptimizationParams, et::EdgeTracker, method_table::MethodTableView)
-    # Regular case: Retrieve matching methods from cache (or compute them)
-    # World age does not need to be taken into account in the cache
-    # because it is forwarded from type inference through `sv.params`
-    # in the case that the cache is nonempty, so it should be unchanged
-    # The max number of methods should be the same as in inference most
-    # of the time, and should not affect correctness otherwise.
-    results = findall(atype, method_table; limit=params.MAX_METHODS)
-    results !== missing && intersect!(et, results.valid_worlds)
-    MethodMatchInfo(results)
+    return stmt, sig
 end
 
-function analyze_single_call!(ir::IRCode, todo::Vector{Pair{Int, Any}}, idx::Int, @nospecialize(stmt),
-        sig::Signature, @nospecialize(calltype), infos::Vector{MethodMatchInfo},
-        et, caches, params)
-    cases = Pair{Any, Any}[]
-    signature_union = Union{}
-    only_method = nothing  # keep track of whether there is one matching method
-    too_many = false
-    local meth
-    local fully_covered = true
+# TODO inline non-`isdispatchtuple`, union-split callsites?
+function analyze_single_call!(
+    ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    argtypes = sig.argtypes
+    cases = InliningCase[]
+    local any_fully_covered = false
+    local handled_all_cases = true
     for i in 1:length(infos)
-        info = infos[i]
-        meth = info.results
-        if meth === missing || meth.ambig
+        meth = infos[i].results
+        if meth.ambig
             # Too many applicable methods
             # Or there is a (partial?) ambiguity
-            too_many = true
-            break
+            return nothing
         elseif length(meth) == 0
             # No applicable methods; try next union split
+            handled_all_cases = false
             continue
-        elseif length(meth) == 1 && only_method !== false
-            if only_method === nothing
-                only_method = meth[1].method
-            elseif only_method !== meth[1].method
-                only_method = false
-            end
-        else
-            only_method = false
         end
         for match in meth
-            signature_union = Union{signature_union, match.spec_types}
-            if !isdispatchtuple(match.spec_types)
-                fully_covered = false
-                continue
-            end
-            case = analyze_method!(match, sig.atypes, et, caches, params, calltype)
-            if case === nothing
-                fully_covered = false
-                continue
-            elseif _any(p->p[1] === match.spec_types, cases)
-                continue
-            end
-            push!(cases, Pair{Any,Any}(match.spec_types, case))
+            handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
+            any_fully_covered |= match.fully_covers
         end
     end
 
-    too_many && return
+    if !handled_all_cases
+        # if we've not seen all candidates, union split is valid only for dispatch tuples
+        filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
+    end
 
-    signature_fully_covered = sig.atype <: signature_union
-    # If we're fully covered and there's only one applicable method,
-    # we inline, even if the signature is not a dispatch tuple
-    if signature_fully_covered && length(cases) == 0 && only_method isa Method
-        if length(infos) > 1
-            (metharg, methsp) = ccall(:jl_type_intersection_with_env, Any, (Any, Any),
-                sig.atype, only_method.sig)::SimpleVector
-            match = MethodMatch(metharg, methsp, only_method, true)
-        else
-            @assert length(meth) == 1
-            match = meth[1]
+    handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases,
+        handled_all_cases & any_fully_covered, todo, state.params)
+end
+
+# similar to `analyze_single_call!`, but with constant results
+function handle_const_call!(
+    ir::IRCode, idx::Int, stmt::Expr, cinfo::ConstCallInfo, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    argtypes = sig.argtypes
+    (; call, results) = cinfo
+    infos = isa(call, MethodMatchInfo) ? MethodMatchInfo[call] : call.matches
+    cases = InliningCase[]
+    local any_fully_covered = false
+    local handled_all_cases = true
+    local j = 0
+    for i in 1:length(infos)
+        meth = infos[i].results
+        if meth.ambig
+            # Too many applicable methods
+            # Or there is a (partial?) ambiguity
+            return nothing
+        elseif length(meth) == 0
+            # No applicable methods; try next union split
+            handled_all_cases = false
+            continue
+        end
+        for match in meth
+            j += 1
+            result = results[j]
+            any_fully_covered |= match.fully_covers
+            if isa(result, ConcreteResult)
+                case = concrete_result_item(result, state)
+                push!(cases, InliningCase(result.mi.specTypes, case))
+            elseif isa(result, ConstPropResult)
+                handled_all_cases &= handle_const_prop_result!(result, argtypes, flag, state, cases, true)
+            else
+                @assert result === nothing
+                handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true)
+            end
         end
-        fully_covered = true
-        case = analyze_method!(match, sig.atypes, et, caches, params, calltype)
-        case === nothing && return
-        push!(cases, Pair{Any,Any}(match.spec_types, case))
     end
-    if !signature_fully_covered
-        fully_covered = false
+
+    if !handled_all_cases
+        # if we've not seen all candidates, union split is valid only for dispatch tuples
+        filter!(case::InliningCase->isdispatchtuple(case.sig), cases)
     end
 
+    handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases,
+        handled_all_cases & any_fully_covered, todo, state.params)
+end
+
+function handle_match!(
+    match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
+    cases::Vector{InliningCase}, allow_abstract::Bool = false)
+    spec_types = match.spec_types
+    allow_abstract || isdispatchtuple(spec_types) || return false
+    # we may see duplicated dispatch signatures here when a signature gets widened
+    # during abstract interpretation: for the purpose of inlining, we can just skip
+    # processing this dispatch candidate
+    _any(case->case.sig === spec_types, cases) && return true
+    item = analyze_method!(match, argtypes, flag, state)
+    item === nothing && return false
+    push!(cases, InliningCase(spec_types, item))
+    return true
+end
+
+function handle_const_prop_result!(
+    result::ConstPropResult, argtypes::Vector{Any}, flag::UInt8, state::InliningState,
+    cases::Vector{InliningCase}, allow_abstract::Bool = false)
+    (; mi) = item = InliningTodo(result.result, argtypes)
+    spec_types = mi.specTypes
+    allow_abstract || isdispatchtuple(spec_types) || return false
+    validate_sparams(mi.sparam_vals) || return false
+    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+    item === nothing && return false
+    push!(cases, InliningCase(spec_types, item))
+    return true
+end
+
+function concrete_result_item(result::ConcreteResult, state::InliningState)
+    if !isdefined(result, :result) || !is_inlineable_constant(result.result)
+        return compileable_specialization(state.et, result.mi, result.effects)
+    end
+    @assert result.effects === EFFECTS_TOTAL
+    return ConstantCase(quoted(result.result))
+end
+
+function handle_cases!(ir::IRCode, idx::Int, stmt::Expr, @nospecialize(atype),
+    cases::Vector{InliningCase}, fully_covered::Bool, todo::Vector{Pair{Int, Any}},
+    params::OptimizationParams)
     # If we only have one case and that case is fully covered, we may either
     # be able to do the inlining now (for constant cases), or push it directly
     # onto the todo list
     if fully_covered && length(cases) == 1
-        handle_single_case!(ir, stmt, idx, cases[1][2], false, todo)
-        return
+        handle_single_case!(ir, idx, stmt, cases[1].item, todo, params)
+    elseif length(cases) > 0
+        isa(atype, DataType) || return nothing
+        for case in cases
+            isa(case.sig, DataType) || return nothing
+        end
+        push!(todo, idx=>UnionSplit(fully_covered, atype, cases))
     end
-    length(cases) == 0 && return
-    push!(todo, idx=>UnionSplit(fully_covered, sig.atype, cases))
     return nothing
 end
 
+function handle_const_opaque_closure_call!(
+    ir::IRCode, idx::Int, stmt::Expr, result::ConstPropResult, flag::UInt8,
+    sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}})
+    item = InliningTodo(result.result, sig.argtypes)
+    isdispatchtuple(item.mi.specTypes) || return
+    validate_sparams(item.mi.sparam_vals) || return
+    state.mi_cache !== nothing && (item = resolve_todo(item, state, flag))
+    handle_single_case!(ir, idx, stmt, item, todo, state.params)
+    return nothing
+end
+
+function inline_const_if_inlineable!(inst::Instruction)
+    rt = inst[:type]
+    if rt isa Const && is_inlineable_constant(rt.val)
+        inst[:inst] = quoted(rt.val)
+        return true
+    end
+    inst[:flag] |= IR_FLAG_EFFECT_FREE
+    return false
+end
+
 function assemble_inline_todo!(ir::IRCode, state::InliningState)
     # todo = (inline_idx, (isva, isinvoke, na), method, spvals, inline_linetable, inline_ir, lie)
     todo = Pair{Int, Any}[]
-    if state.params.unoptimize_throw_blocks
-        skip = find_throw_blocks(ir.stmts.inst, RefValue(ir))
-    end
+    et = state.et
+
     for idx in 1:length(ir.stmts)
-        state.params.unoptimize_throw_blocks && idx in skip && continue
-        r = process_simple!(ir, todo, idx, state)
-        r === nothing && continue
+        simpleres = process_simple!(ir, idx, state, todo)
+        simpleres === nothing && continue
+        stmt, sig = simpleres
 
-        stmt = ir.stmts[idx][:inst]
-        calltype = ir.stmts[idx][:type]
         info = ir.stmts[idx][:info]
-        # Inference determined this couldn't be analyzed. Don't question it.
+
+        # Check whether this call was @pure and evaluates to a constant
+        if info isa MethodResultPure
+            inline_const_if_inlineable!(ir[SSAValue(idx)]) && continue
+            info = info.info
+        end
         if info === false
+            # Inference determined this couldn't be analyzed. Don't question it.
             continue
         end
 
-        (sig, invoke_data) = r
+        flag = ir.stmts[idx][:flag]
 
-        # Check whether this call was @pure and evaluates to a constant
-        if calltype isa Const && info isa MethodResultPure
-            if is_inlineable_constant(calltype.val)
-                ir.stmts[idx][:inst] = quoted(calltype.val)
-                continue
+        if isa(info, OpaqueClosureCallInfo)
+            result = info.result
+            if isa(result, ConstPropResult)
+                handle_const_opaque_closure_call!(
+                    ir, idx, stmt, result, flag,
+                    sig, state, todo)
+            else
+                if isa(result, ConcreteResult)
+                    item = concrete_result_item(result, state)
+                else
+                    item = analyze_method!(info.match, sig.argtypes, flag, state)
+                end
+                handle_single_case!(ir, idx, stmt, item, todo, state.params)
+            end
+            continue
+        end
+
+        # Handle invoke
+        if sig.f === Core.invoke
+            if isa(info, InvokeCallInfo)
+                inline_invoke!(ir, idx, stmt, info, flag, sig, state, todo)
             end
+            continue
         end
 
-        # Ok, now figure out what method to call
-        if invoke_data !== nothing
-            inline_invoke!(ir, idx, sig, invoke_data, state, todo)
+        # if inference arrived here with constant-prop'ed result(s),
+        # we can perform a specialized analysis for just this case
+        if isa(info, ConstCallInfo)
+            handle_const_call!(
+                ir, idx, stmt, info, flag,
+                sig, state, todo)
             continue
         end
 
-        nu = countunionsplit(sig.atypes)
-        if nu == 1 || nu > state.params.MAX_UNION_SPLITTING
-            if !isa(info, MethodMatchInfo)
-                if state.method_table === nothing
-                    continue
-                end
-                info = recompute_method_matches(sig.atype, state.params, state.et, state.method_table)
-            end
+        # Ok, now figure out what method to call
+        if isa(info, MethodMatchInfo)
             infos = MethodMatchInfo[info]
+        elseif isa(info, UnionSplitInfo)
+            infos = info.matches
         else
-            if !isa(info, UnionSplitInfo)
-                if state.method_table === nothing
-                    continue
-                end
-                infos = MethodMatchInfo[]
-                for union_sig in UnionSplitSignature(sig.atypes)
-                    push!(infos, recompute_method_matches(argtypes_to_type(union_sig), state.params, state.et, state.method_table))
-                end
-            else
-                infos = info.matches
-            end
+            continue # isa(info, ReturnTypeCallInfo), etc.
         end
 
-        analyze_single_call!(ir, todo, idx, stmt, sig, calltype, infos, state.et, state.caches, state.params)
+        analyze_single_call!(ir, idx, stmt, infos, flag, sig, state, todo)
     end
-    todo
-end
 
-function mk_tuplecall!(compact::IncrementalCompact, args::Vector{Any}, line_idx::Int32)
-    e = Expr(:call, TOP_TUPLE, args...)
-    etyp = tuple_tfunc(Any[compact_exprtype(compact, args[i]) for i in 1:length(args)])
-    return insert_node_here!(compact, e, etyp, line_idx)
+    return todo
 end
 
 function linear_inline_eligible(ir::IRCode)
     length(ir.cfg.blocks) == 1 || return false
-    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))]
+    terminator = ir[SSAValue(last(ir.cfg.blocks[1].stmts))][:inst]
     isa(terminator, ReturnNode) || return false
     isdefined(terminator, :val) || return false
     return true
 end
 
-function compute_invoke_data(@nospecialize(atypes), method_table)
-    ft = widenconst(atypes[2])
-    if !isdispatchelem(ft) || has_free_typevars(ft) || (ft <: Builtin)
-        # TODO: this can be rather aggressive at preventing inlining of closures
-        # but we need to check that `ft` can't have a subtype at runtime before using the supertype lookup below
-        return nothing
-    end
-    invoke_tt = widenconst(atypes[3])
-    if !isType(invoke_tt) || has_free_typevars(invoke_tt)
-        return nothing
-    end
-    invoke_tt = invoke_tt.parameters[1]
-    if !(isa(unwrap_unionall(invoke_tt), DataType) && invoke_tt <: Tuple)
-        return nothing
-    end
-    if method_table === nothing
-        # TODO: These should be forwarded in stmt_info, just like regular
-        # method lookup results
-        return nothing
-    end
-    invoke_types = rewrap_unionall(Tuple{ft, unwrap_unionall(invoke_tt).parameters...}, invoke_tt)
-    invoke_entry = findsup(invoke_types, method_table)
-    invoke_entry === nothing && return nothing
-    method, valid_worlds = invoke_entry
-    invoke_data = InvokeData(method, invoke_types, first(valid_worlds), last(valid_worlds))
-    atype0 = atypes[2]
-    atypes = atypes[4:end]
-    pushfirst!(atypes, atype0)
-    f = singleton_type(ft)
-    return (Signature(f, ft, atypes), invoke_data)
-end
-
 # Check for a number of functions known to be pure
 function ispuretopfunction(@nospecialize(f))
     return istopfunction(f, :typejoin) ||
@@ -1259,76 +1470,78 @@ function ispuretopfunction(@nospecialize(f))
         istopfunction(f, :promote_type)
 end
 
-function early_inline_special_case(ir::IRCode, s::Signature, e::Expr, params::OptimizationParams,
-                                   @nospecialize(etype))
-    f, ft, atypes = s.f, s.ft, s.atypes
-    if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes) == 3
-        # typeassert(x::S, T) => x, when S<:T
-        a3 = atypes[3]
-        if (isType(a3) && !has_free_typevars(a3) && atypes[2] ⊑ a3.parameters[1]) ||
-            (isa(a3, Const) && isa(a3.val, Type) && atypes[2] ⊑ a3.val)
-            val = e.args[2]
-            val === nothing && return QuoteNode(val)
-            return val
-        end
-    end
-
-    if params.inlining
-        if isa(etype, Const) # || isconstType(etype)
-            val = etype.val
-            is_inlineable_constant(val) || return nothing
-            if isa(f, IntrinsicFunction)
-                if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, atypes[2:end])
-                    return quoted(val)
-                end
-            elseif ispuretopfunction(f) || contains_is(_PURE_BUILTINS, f)
-                return quoted(val)
-            elseif contains_is(_PURE_OR_ERROR_BUILTINS, f)
-                if _builtin_nothrow(f, atypes[2:end], etype)
-                    return quoted(val)
-                end
+function early_inline_special_case(
+    ir::IRCode, stmt::Expr, @nospecialize(type), sig::Signature,
+    params::OptimizationParams)
+    params.inlining || return nothing
+    (; f, ft, argtypes) = sig
+
+    if isa(type, Const) # || isconstType(type)
+        val = type.val
+        is_inlineable_constant(val) || return nothing
+        if isa(f, IntrinsicFunction)
+            if is_pure_intrinsic_infer(f) && intrinsic_nothrow(f, argtypes[2:end])
+                return SomeCase(quoted(val))
+            end
+        elseif ispuretopfunction(f) || contains_is(_PURE_BUILTINS, f)
+            return SomeCase(quoted(val))
+        elseif contains_is(_EFFECT_FREE_BUILTINS, f)
+            if _builtin_nothrow(f, argtypes[2:end], type)
+                return SomeCase(quoted(val))
+            end
+        elseif f === Core.get_binding_type
+            length(argtypes) == 3 || return nothing
+            if get_binding_type_effect_free(argtypes[2], argtypes[3])
+                return SomeCase(quoted(val))
             end
         end
     end
-
     return nothing
 end
 
-function late_inline_special_case!(ir::IRCode, sig::Signature, idx::Int, stmt::Expr, params::OptimizationParams)
-    f, ft, atypes = sig.f, sig.ft, sig.atypes
-    typ = ir.stmts[idx][:type]
-    if params.inlining && length(atypes) == 3 && istopfunction(f, :!==)
+# special-case some regular method calls whose results are not folded within `abstract_call_known`
+# (and thus `early_inline_special_case` doesn't handle them yet)
+# NOTE we manually inline the method bodies, and so the logic here needs to precisely sync with their definitions
+function late_inline_special_case!(
+    ir::IRCode, idx::Int, stmt::Expr, @nospecialize(type), sig::Signature,
+    params::OptimizationParams)
+    params.inlining || return nothing
+    (; f, ft, argtypes) = sig
+    if length(argtypes) == 3 && istopfunction(f, :!==)
         # special-case inliner for !== that precedes _methods_by_ftype union splitting
         # and that works, even though inference generally avoids inferring the `!==` Method
-        if isa(typ, Const)
-            ir[SSAValue(idx)] = quoted(typ.val)
-            return true
+        if isa(type, Const)
+            return SomeCase(quoted(type.val))
         end
         cmp_call = Expr(:call, GlobalRef(Core, :(===)), stmt.args[2], stmt.args[3])
-        cmp_call_ssa = insert_node!(ir, idx, Bool, cmp_call)
+        cmp_call_ssa = insert_node!(ir, idx, effect_free(NewInstruction(cmp_call, Bool)))
         not_call = Expr(:call, GlobalRef(Core.Intrinsics, :not_int), cmp_call_ssa)
-        ir[SSAValue(idx)] = not_call
-        return true
-    elseif params.inlining && length(atypes) == 3 && istopfunction(f, :(>:))
+        return SomeCase(not_call)
+    elseif length(argtypes) == 3 && istopfunction(f, :(>:))
         # special-case inliner for issupertype
         # that works, even though inference generally avoids inferring the `>:` Method
-        if isa(typ, Const)
-            ir[SSAValue(idx)] = quoted(typ.val)
-            return true
+        if isa(type, Const) && _builtin_nothrow(<:, Any[argtypes[3], argtypes[2]], type)
+            return SomeCase(quoted(type.val))
         end
         subtype_call = Expr(:call, GlobalRef(Core, :(<:)), stmt.args[3], stmt.args[2])
-        ir[SSAValue(idx)] = subtype_call
-        return true
+        return SomeCase(subtype_call)
+    elseif f === TypeVar && 2 <= length(argtypes) <= 4 && (argtypes[2] ⊑ Symbol)
+        typevar_call = Expr(:call, GlobalRef(Core, :_typevar), stmt.args[2],
+            length(stmt.args) < 4 ? Bottom : stmt.args[3],
+            length(stmt.args) == 2 ? Any : stmt.args[end])
+        return SomeCase(typevar_call)
+    elseif f === UnionAll && length(argtypes) == 3 && (argtypes[2] ⊑ TypeVar)
+        unionall_call = Expr(:foreigncall, QuoteNode(:jl_type_unionall), Any, svec(Any, Any),
+            0, QuoteNode(:ccall), stmt.args[2], stmt.args[3])
+        return SomeCase(unionall_call)
     elseif is_return_type(f)
-        if isconstType(typ)
-            ir[SSAValue(idx)] = quoted(typ.parameters[1])
-            return true
-        elseif isa(typ, Const)
-            ir[SSAValue(idx)] = quoted(typ.val)
-            return true
+        if isconstType(type)
+            return SomeCase(quoted(type.parameters[1]))
+        elseif isa(type, Const)
+            return SomeCase(quoted(type.val))
         end
     end
-    return false
+    return nothing
 end
 
 function ssa_substitute!(idx::Int, @nospecialize(val), arg_replacements::Vector{Any},
@@ -1348,25 +1561,22 @@ function ssa_substitute_op!(@nospecialize(val), arg_replacements::Vector{Any},
         e = val::Expr
         head = e.head
         if head === :static_parameter
-            return quoted(spvals[e.args[1]])
+            return quoted(spvals[e.args[1]::Int])
         elseif head === :cfunction
             @assert !isa(spsig, UnionAll) || !isempty(spvals)
             e.args[3] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[3], spsig, spvals)
             e.args[4] = svec(Any[
                 ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
-                for argt
-                in e.args[4] ]...)
+                for argt in e.args[4]::SimpleVector ]...)
         elseif head === :foreigncall
             @assert !isa(spsig, UnionAll) || !isempty(spvals)
             for i = 1:length(e.args)
                 if i == 2
                     e.args[2] = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), e.args[2], spsig, spvals)
                 elseif i == 3
-                    argtuple = Any[
+                    e.args[3] = svec(Any[
                         ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), argt, spsig, spvals)
-                        for argt
-                        in e.args[3] ]
-                    e.args[3] = svec(argtuple...)
+                        for argt in e.args[3]::SimpleVector ]...)
                 end
             end
         elseif head === :boundscheck
@@ -1385,46 +1595,3 @@ function ssa_substitute_op!(@nospecialize(val), arg_replacements::Vector{Any},
     end
     return urs[]
 end
-
-function find_inferred(mi::MethodInstance, atypes::Vector{Any}, caches::InferenceCaches, @nospecialize(rettype))
-    if caches.inf_cache !== nothing
-        # see if the method has a InferenceResult in the current cache
-        # or an existing inferred code info store in `.inferred`
-        haveconst = false
-        for i in 1:length(atypes)
-            if has_nontrivial_const_info(atypes[i])
-                # have new information from argtypes that wasn't available from the signature
-                haveconst = true
-                break
-            end
-        end
-        if haveconst || improvable_via_constant_propagation(rettype)
-            inf_result = cache_lookup(mi, atypes, caches.inf_cache) # Union{Nothing, InferenceResult}
-        else
-            inf_result = nothing
-        end
-        #XXX: update_valid_age!(min_valid[1], max_valid[1], sv)
-        if isa(inf_result, InferenceResult)
-            let inferred_src = inf_result.src
-                if isa(inferred_src, CodeInfo)
-                    return svec(false, inferred_src)
-                end
-                if isa(inferred_src, Const) && is_inlineable_constant(inferred_src.val)
-                    return svec(true, quoted(inferred_src.val),)
-                end
-            end
-        end
-    end
-
-    linfo = get(caches.mi_cache, mi, nothing)
-    if linfo isa CodeInstance
-        if invoke_api(linfo) == 2
-            # in this case function can be inlined to a constant
-            return svec(true, quoted(linfo.rettype_const))
-        end
-        return svec(false, linfo.inferred)
-    else
-        # `linfo` may be `nothing` or an IRCode here
-        return svec(false, linfo)
-    end
-end
diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl
index e7003473e1cbd3..2f1359e4002aea 100644
--- a/base/compiler/ssair/ir.jl
+++ b/base/compiler/ssair/ir.jl
@@ -1,45 +1,41 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-@inline isexpr(@nospecialize(stmt), head::Symbol) = isa(stmt, Expr) && stmt.head === head
 Core.PhiNode() = Core.PhiNode(Int32[], Any[])
 
-"""
-Like UnitRange{Int}, but can handle the `last` field, being temporarily
-< first (this can happen during compacting)
-"""
-struct StmtRange <: AbstractUnitRange{Int}
-    start::Int
-    stop::Int
-end
-first(r::StmtRange) = r.start
-last(r::StmtRange) = r.stop
-iterate(r::StmtRange, state=0) = (last(r) - first(r) < state) ? nothing : (first(r) + state, state + 1)
-
-StmtRange(range::UnitRange{Int}) = StmtRange(first(range), last(range))
-
-struct BasicBlock
-    stmts::StmtRange
-    preds::Vector{Int}
-    succs::Vector{Int}
-end
-function BasicBlock(stmts::StmtRange)
-    return BasicBlock(stmts, Int[], Int[])
-end
-function BasicBlock(old_bb, stmts)
-    return BasicBlock(stmts, old_bb.preds, old_bb.succs)
-end
-copy(bb::BasicBlock) = BasicBlock(bb.stmts, copy(bb.preds), copy(bb.succs))
+isterminator(@nospecialize(stmt)) = isa(stmt, GotoNode) || isa(stmt, GotoIfNot) || isa(stmt, ReturnNode)
 
 struct CFG
     blocks::Vector{BasicBlock}
     index::Vector{Int} # map from instruction => basic-block number
                        # TODO: make this O(1) instead of O(log(n_blocks))?
 end
+
 copy(c::CFG) = CFG(BasicBlock[copy(b) for b in c.blocks], copy(c.index))
 
+function cfg_insert_edge!(cfg::CFG, from::Int, to::Int)
+    # Assumes that this edge does not already exist
+    push!(cfg.blocks[to].preds, from)
+    push!(cfg.blocks[from].succs, to)
+    nothing
+end
+
+function cfg_delete_edge!(cfg::CFG, from::Int, to::Int)
+    preds = cfg.blocks[to].preds
+    succs = cfg.blocks[from].succs
+    # Assumes that blocks appear at most once in preds and succs
+    deleteat!(preds, findfirst(x->x === from, preds)::Int)
+    deleteat!(succs, findfirst(x->x === to, succs)::Int)
+    nothing
+end
+
 function block_for_inst(index::Vector{Int}, inst::Int)
     return searchsortedfirst(index, inst, lt=(<=))
 end
+
+function block_for_inst(index::Vector{BasicBlock}, inst::Int)
+    return searchsortedfirst(index, BasicBlock(StmtRange(inst, inst)), by=x->first(x.stmts), lt=(<=))-1
+end
+
 block_for_inst(cfg::CFG, inst::Int) = block_for_inst(cfg.index, inst)
 
 function basic_blocks_starts(stmts::Vector{Any})
@@ -92,7 +88,7 @@ function compute_basic_blocks(stmts::Vector{Any})
     bb_starts = basic_blocks_starts(stmts)
     # Compute ranges
     pop!(bb_starts, 1)
-    basic_block_index = collect(bb_starts)
+    basic_block_index = sort!(collect(bb_starts); alg=QuickSort)
     blocks = BasicBlock[]
     sizehint!(blocks, length(basic_block_index))
     let first = 1
@@ -129,9 +125,6 @@ function compute_basic_blocks(stmts::Vector{Any})
                 # :enter gets a virtual edge to the exception handler and
                 # the exception handler gets a virtual edge from outside
                 # the function.
-                # See the devdocs on exception handling in SSA form (or
-                # bug Keno to write them, if you're reading this and they
-                # don't exist)
                 block′ = block_for_inst(basic_block_index, terminator.args[1]::Int)
                 push!(blocks[block′].preds, num)
                 push!(blocks[block′].preds, 0)
@@ -147,6 +140,7 @@ function compute_basic_blocks(stmts::Vector{Any})
     return CFG(blocks, basic_block_index)
 end
 
+# this function assumes insert position exists
 function first_insert_for_bb(code, cfg::CFG, block::Int)
     for idx in cfg.blocks[block].stmts
         stmt = code[idx]
@@ -154,9 +148,54 @@ function first_insert_for_bb(code, cfg::CFG, block::Int)
             return idx
         end
     end
+    error("any insert position isn't found")
+end
+
+# SSA values that need renaming
+struct OldSSAValue
+    id::Int
+end
+
+# SSA values that are in `new_new_nodes` of an `IncrementalCompact` and are to
+# be actually inserted next time (they become `new_nodes` next time)
+struct NewSSAValue
+    id::Int
 end
 
+const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
+
+
 # SSA-indexed nodes
+
+struct NewInstruction
+    stmt::Any
+    type::Any
+    info::Any
+    # If nothing, copy the line from previous statement
+    # in the insertion location
+    line::Union{Int32, Nothing}
+    flag::UInt8
+
+    ## Insertion options
+
+    # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced).
+    # Don't bother redoing so on insertion.
+    effect_free_computed::Bool
+    NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info),
+            line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) =
+        new(stmt, type, info, line, flag, effect_free_computed)
+end
+NewInstruction(@nospecialize(stmt), @nospecialize(type)) =
+    NewInstruction(stmt, type, nothing)
+NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) =
+    NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false)
+
+effect_free(inst::NewInstruction) =
+    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true)
+non_effect_free(inst::NewInstruction) =
+    NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true)
+
+
 struct InstructionStream
     inst::Vector{Any}
     type::Vector{Any}
@@ -170,7 +209,7 @@ function InstructionStream(len::Int)
     info = Array{Any}(undef, len)
     fill!(info, nothing)
     lines = fill(Int32(0), len)
-    flags = fill(0x00, len)
+    flags = fill(IR_FLAG_NULL, len)
     return InstructionStream(insts, types, info, lines, flags)
 end
 InstructionStream() = InstructionStream(0)
@@ -198,7 +237,7 @@ function resize!(stmts::InstructionStream, len)
     resize!(stmts.flag, len)
     for i in (old_length + 1):len
         stmts.line[i] = 0
-        stmts.flag[i] = 0x00
+        stmts.flag[i] = IR_FLAG_NULL
         stmts.info[i] = nothing
     end
     return stmts
@@ -228,6 +267,10 @@ function setindex!(is::InstructionStream, newval::Instruction, idx::Int)
     is.flag[idx] = newval[:flag]
     return is
 end
+function setindex!(is::InstructionStream, newval::AnySSAValue, idx::Int)
+    is.inst[idx] = newval
+    return is
+end
 function setindex!(node::Instruction, newval::Instruction)
     node.data[node.idx] = newval
     return node
@@ -259,9 +302,9 @@ struct IRCode
     linetable::Vector{LineInfoNode}
     cfg::CFG
     new_nodes::NewNodeStream
-    meta::Vector{Any}
+    meta::Vector{Expr}
 
-    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Any}, sptypes::Vector{Any})
+    function IRCode(stmts::InstructionStream, cfg::CFG, linetable::Vector{LineInfoNode}, argtypes::Vector{Any}, meta::Vector{Expr}, sptypes::Vector{Any})
         return new(stmts, argtypes, sptypes, linetable, cfg, NewNodeStream(), meta)
     end
     function IRCode(ir::IRCode, stmts::InstructionStream, cfg::CFG, new_nodes::NewNodeStream)
@@ -272,152 +315,143 @@ struct IRCode
         copy(ir.linetable), copy(ir.cfg), copy(ir.new_nodes), copy(ir.meta))
 end
 
+function block_for_inst(ir::IRCode, inst::Int)
+    if inst > length(ir.stmts)
+        inst = ir.new_nodes.info[inst - length(ir.stmts)].pos
+    end
+    block_for_inst(ir.cfg, inst)
+end
+
 function getindex(x::IRCode, s::SSAValue)
     if s.id <= length(x.stmts)
-        return x.stmts[s.id][:inst]
+        return x.stmts[s.id]
     else
-        return x.new_nodes.stmts[s.id - length(x.stmts)][:inst]
+        return x.new_nodes.stmts[s.id - length(x.stmts)]
     end
 end
 
-function setindex!(x::IRCode, @nospecialize(repl), s::SSAValue)
+function setindex!(x::IRCode, repl::Union{Instruction, AnySSAValue}, s::SSAValue)
     if s.id <= length(x.stmts)
-        x.stmts[s.id][:inst] = repl
+        x.stmts[s.id] = repl
     else
-        x.new_nodes.stmts[s.id - length(x.stmts)][:inst] = repl
+        x.new_nodes.stmts[s.id - length(x.stmts)] = repl
     end
     return x
 end
 
-# SSA values that need renaming
-struct OldSSAValue
-    id::Int
-end
-
-# SSA values that are in `new_new_nodes` of an `IncrementalCompact` and are to
-# be actually inserted next time (they become `new_nodes` next time)
-struct NewSSAValue
-    id::Int
-end
-
-const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue}
-
-mutable struct UseRef
+mutable struct UseRefIterator
     stmt::Any
-    op::Int
-    UseRef(@nospecialize(a)) = new(a, 0)
-end
-struct UseRefIterator
-    use::Tuple{UseRef, Nothing}
     relevant::Bool
-    UseRefIterator(@nospecialize(a), relevant::Bool) = new((UseRef(a), nothing), relevant)
+    UseRefIterator(@nospecialize(a), relevant::Bool) = new(a, relevant)
 end
-getindex(it::UseRefIterator) = it.use[1].stmt
+getindex(it::UseRefIterator) = it.stmt
 
-# TODO: stack-allocation
-#struct UseRef
-#    urs::UseRefIterator
-#    use::Int
-#end
-
-struct OOBToken
-end
-
-struct UndefToken
+struct UseRef
+    urs::UseRefIterator
+    op::Int
+    UseRef(urs::UseRefIterator) = new(urs, 0)
+    UseRef(urs::UseRefIterator, op::Int) = new(urs, op)
 end
-const undef_token = UndefToken()
 
+struct OOBToken end; const OOB_TOKEN = OOBToken()
+struct UndefToken end; const UNDEF_TOKEN = UndefToken()
 
-function getindex(x::UseRef)
-    stmt = x.stmt
+@noinline function _useref_getindex(@nospecialize(stmt), op::Int)
     if isa(stmt, Expr) && stmt.head === :(=)
         rhs = stmt.args[2]
         if isa(rhs, Expr)
             if is_relevant_expr(rhs)
-                x.op > length(rhs.args) && return OOBToken()
-                return rhs.args[x.op]
+                op > length(rhs.args) && return OOB_TOKEN
+                return rhs.args[op]
             end
         end
-        x.op == 1 || return OOBToken()
+        op == 1 || return OOB_TOKEN
         return rhs
     elseif isa(stmt, Expr) # @assert is_relevant_expr(stmt)
-        x.op > length(stmt.args) && return OOBToken()
-        return stmt.args[x.op]
+        op > length(stmt.args) && return OOB_TOKEN
+        return stmt.args[op]
     elseif isa(stmt, GotoIfNot)
-        x.op == 1 || return OOBToken()
+        op == 1 || return OOB_TOKEN
         return stmt.cond
     elseif isa(stmt, ReturnNode)
-        isdefined(stmt, :val) || return OOBToken()
-        x.op == 1 || return OOBToken()
+        isdefined(stmt, :val) || return OOB_TOKEN
+        op == 1 || return OOB_TOKEN
         return stmt.val
     elseif isa(stmt, PiNode)
-        isdefined(stmt, :val) || return OOBToken()
-        x.op == 1 || return OOBToken()
+        isdefined(stmt, :val) || return OOB_TOKEN
+        op == 1 || return OOB_TOKEN
         return stmt.val
     elseif isa(stmt, UpsilonNode)
-        isdefined(stmt, :val) || return OOBToken()
-        x.op == 1 || return OOBToken()
+        isdefined(stmt, :val) || return OOB_TOKEN
+        op == 1 || return OOB_TOKEN
         return stmt.val
     elseif isa(stmt, PhiNode)
-        x.op > length(stmt.values) && return OOBToken()
-        isassigned(stmt.values, x.op) || return UndefToken()
-        return stmt.values[x.op]
+        op > length(stmt.values) && return OOB_TOKEN
+        isassigned(stmt.values, op) || return UNDEF_TOKEN
+        return stmt.values[op]
     elseif isa(stmt, PhiCNode)
-        x.op > length(stmt.values) && return OOBToken()
-        isassigned(stmt.values, x.op) || return UndefToken()
-        return stmt.values[x.op]
+        op > length(stmt.values) && return OOB_TOKEN
+        isassigned(stmt.values, op) || return UNDEF_TOKEN
+        return stmt.values[op]
     else
-        return OOBToken()
+        return OOB_TOKEN
     end
 end
+@inline getindex(x::UseRef) = _useref_getindex(x.urs.stmt, x.op)
 
 function is_relevant_expr(e::Expr)
-    return e.head in (:call, :invoke, :new, :splatnew, :(=), :(&),
+    return e.head in (:call, :invoke, :invoke_modify,
+                      :new, :splatnew, :(=), :(&),
                       :gc_preserve_begin, :gc_preserve_end,
                       :foreigncall, :isdefined, :copyast,
                       :undefcheck, :throw_undef_if_not,
-                      :cfunction, :method, :pop_exception)
+                      :cfunction, :method, :pop_exception,
+                      :new_opaque_closure)
 end
 
-function setindex!(x::UseRef, @nospecialize(v))
-    stmt = x.stmt
+@noinline function _useref_setindex!(@nospecialize(stmt), op::Int, @nospecialize(v))
     if isa(stmt, Expr) && stmt.head === :(=)
         rhs = stmt.args[2]
         if isa(rhs, Expr)
             if is_relevant_expr(rhs)
-                x.op > length(rhs.args) && throw(BoundsError())
-                rhs.args[x.op] = v
-                return v
+                op > length(rhs.args) && throw(BoundsError())
+                rhs.args[op] = v
+                return stmt
             end
         end
-        x.op == 1 || throw(BoundsError())
+        op == 1 || throw(BoundsError())
         stmt.args[2] = v
     elseif isa(stmt, Expr) # @assert is_relevant_expr(stmt)
-        x.op > length(stmt.args) && throw(BoundsError())
-        stmt.args[x.op] = v
+        op > length(stmt.args) && throw(BoundsError())
+        stmt.args[op] = v
     elseif isa(stmt, GotoIfNot)
-        x.op == 1 || throw(BoundsError())
-        x.stmt = GotoIfNot(v, stmt.dest)
+        op == 1 || throw(BoundsError())
+        stmt = GotoIfNot(v, stmt.dest)
     elseif isa(stmt, ReturnNode)
-        x.op == 1 || throw(BoundsError())
-        x.stmt = typeof(stmt)(v)
+        op == 1 || throw(BoundsError())
+        stmt = typeof(stmt)(v)
     elseif isa(stmt, UpsilonNode)
-        x.op == 1 || throw(BoundsError())
-        x.stmt = typeof(stmt)(v)
+        op == 1 || throw(BoundsError())
+        stmt = typeof(stmt)(v)
     elseif isa(stmt, PiNode)
-        x.op == 1 || throw(BoundsError())
-        x.stmt = typeof(stmt)(v, stmt.typ)
+        op == 1 || throw(BoundsError())
+        stmt = typeof(stmt)(v, stmt.typ)
     elseif isa(stmt, PhiNode)
-        x.op > length(stmt.values) && throw(BoundsError())
-        isassigned(stmt.values, x.op) || throw(BoundsError())
-        stmt.values[x.op] = v
+        op > length(stmt.values) && throw(BoundsError())
+        isassigned(stmt.values, op) || throw(BoundsError())
+        stmt.values[op] = v
     elseif isa(stmt, PhiCNode)
-        x.op > length(stmt.values) && throw(BoundsError())
-        isassigned(stmt.values, x.op) || throw(BoundsError())
-        stmt.values[x.op] = v
+        op > length(stmt.values) && throw(BoundsError())
+        isassigned(stmt.values, op) || throw(BoundsError())
+        stmt.values[op] = v
     else
         throw(BoundsError())
     end
+    return stmt
+end
+
+@inline function setindex!(x::UseRef, @nospecialize(v))
+    x.urs.stmt = _useref_setindex!(x.urs.stmt, x.op, v)
     return x
 end
 
@@ -428,18 +462,22 @@ function userefs(@nospecialize(x))
     return UseRefIterator(x, relevant)
 end
 
-iterate(it::UseRefIterator) = (it.use[1].op = 0; iterate(it, nothing))
-@noinline function iterate(it::UseRefIterator, ::Nothing)
-    it.relevant || return nothing
-    use = it.use[1]
+@noinline function _advance(@nospecialize(stmt), op)
     while true
-        use.op += 1
-        y = use[]
-        y === OOBToken() && return nothing
-        y === UndefToken() || return it.use
+        op += 1
+        y = _useref_getindex(stmt, op)
+        y === OOB_TOKEN && return nothing
+        y === UNDEF_TOKEN || return op
     end
 end
 
+@inline function iterate(it::UseRefIterator, op::Int=0)
+    it.relevant || return nothing
+    op = _advance(it.stmt, op)
+    op === nothing && return nothing
+    return (UseRef(it, op), op)
+end
+
 # This function is used from the show code, which may have a different
 # `push!`/`used` type since it's in Base.
 function scan_ssa_use!(push!, used, @nospecialize(stmt))
@@ -487,10 +525,16 @@ function foreachssa(f, @nospecialize(stmt))
     end
 end
 
-function insert_node!(ir::IRCode, pos::Int, @nospecialize(typ), @nospecialize(val), attach_after::Bool=false)
-    line = ir.stmts[pos][:line]
+function insert_node!(ir::IRCode, pos::Int, inst::NewInstruction, attach_after::Bool=false)
     node = add!(ir.new_nodes, pos, attach_after)
-    node[:inst], node[:type], node[:line], node[:flag] = val, typ, line, 0x00
+    node[:line] = something(inst.line, ir.stmts[pos][:line])
+    flag = inst.flag
+    if !inst.effect_free_computed
+        if stmt_effect_free(inst.stmt, inst.type, ir)
+            flag |= IR_FLAG_EFFECT_FREE
+        end
+    end
+    node[:inst], node[:type], node[:flag] = inst.stmt, inst.type, flag
     return SSAValue(length(ir.stmts) + node.idx)
 end
 
@@ -519,6 +563,7 @@ mutable struct IncrementalCompact
     new_nodes_idx::Int
     # This supports insertion while compacting
     new_new_nodes::NewNodeStream  # New nodes that were before the compaction point at insertion time
+    new_new_used_ssas::Vector{Int}
     # TODO: Switch these two to a min-heap of some sort
     pending_nodes::NewNodeStream  # New nodes that were after the compaction point at insertion time
     pending_perm::Vector{Int}
@@ -539,12 +584,14 @@ mutable struct IncrementalCompact
         new_len = length(code.stmts) + length(code.new_nodes)
         result = InstructionStream(new_len)
         used_ssas = fill(0, new_len)
+        new_new_used_ssas = Vector{Int}()
         blocks = code.cfg.blocks
         if allow_cfg_transforms
             bb_rename = Vector{Int}(undef, length(blocks))
             cur_bb = 1
+            domtree = construct_domtree(blocks)
             for i = 1:length(bb_rename)
-                if i != 1 && length(blocks[i].preds) == 0
+                if bb_unreachable(domtree, i)
                     bb_rename[i] = -1
                 else
                     bb_rename[i] = cur_bb
@@ -580,7 +627,7 @@ mutable struct IncrementalCompact
         pending_nodes = NewNodeStream()
         pending_perm = Int[]
         return new(code, result, result_bbs, ssa_rename, bb_rename, bb_rename, used_ssas, late_fixup, perm, 1,
-            new_new_nodes, pending_nodes, pending_perm,
+            new_new_nodes, new_new_used_ssas, pending_nodes, pending_perm,
             1, 1, 1, false, allow_cfg_transforms, allow_cfg_transforms)
     end
 
@@ -589,7 +636,7 @@ mutable struct IncrementalCompact
         perm = my_sortperm(Int[code.new_nodes.info[i].pos for i in 1:length(code.new_nodes)])
         new_len = length(code.stmts) + length(code.new_nodes)
         ssa_rename = Any[SSAValue(i) for i = 1:new_len]
-        used_ssas = fill(0, new_len)
+        new_new_used_ssas = Vector{Int}()
         late_fixup = Vector{Int}()
         bb_rename = Vector{Int}()
         new_new_nodes = NewNodeStream()
@@ -598,7 +645,7 @@ mutable struct IncrementalCompact
         return new(code, parent.result,
             parent.result_bbs, ssa_rename, bb_rename, bb_rename, parent.used_ssas,
             late_fixup, perm, 1,
-            new_new_nodes, pending_nodes, pending_perm,
+            new_new_nodes, new_new_used_ssas, pending_nodes, pending_perm,
             1, result_offset, parent.active_result_bb, false, false, false)
     end
 end
@@ -608,6 +655,7 @@ struct TypesView{T}
 end
 types(ir::Union{IRCode, IncrementalCompact}) = TypesView(ir)
 
+# TODO We can be a bit better about access here by using a pattern similar to InstructionStream
 function getindex(compact::IncrementalCompact, idx::Int)
     if idx < compact.result_idx
         return compact.result[idx][:inst]
@@ -623,7 +671,10 @@ end
 
 function getindex(compact::IncrementalCompact, ssa::OldSSAValue)
     id = ssa.id
-    if id <= length(compact.ir.stmts)
+    if id < compact.idx
+        new_idx = compact.ssa_rename[id]
+        return compact.result[new_idx][:inst]
+    elseif id <= length(compact.ir.stmts)
         return compact.ir.stmts[id][:inst]
     end
     id -= length(compact.ir.stmts)
@@ -638,21 +689,85 @@ function getindex(compact::IncrementalCompact, ssa::NewSSAValue)
     return compact.new_new_nodes.stmts[ssa.id][:inst]
 end
 
+function block_for_inst(compact::IncrementalCompact, idx::SSAValue)
+    id = idx.id
+    if id < compact.result_idx # if ssa within result
+        return block_for_inst(compact.result_bbs, id)
+    else
+        return block_for_inst(compact.ir.cfg, id)
+    end
+end
+
+function block_for_inst(compact::IncrementalCompact, idx::OldSSAValue)
+    id = idx.id
+    if id < compact.idx # if ssa within result
+        return block_for_inst(compact.result_bbs, compact.ssa_rename[id])
+    else
+        return block_for_inst(compact.ir.cfg, id)
+    end
+end
+
+function block_for_inst(compact::IncrementalCompact, idx::NewSSAValue)
+    block_for_inst(compact, SSAValue(compact.new_new_nodes.info[idx.id].pos))
+end
+
+function dominates_ssa(compact::IncrementalCompact, domtree::DomTree, x::AnySSAValue, y::AnySSAValue)
+    xb = block_for_inst(compact, x)
+    yb = block_for_inst(compact, y)
+    if xb == yb
+        xinfo = yinfo = nothing
+        if isa(x, OldSSAValue)
+            x′ = compact.ssa_rename[x.id]::SSAValue
+        elseif isa(x, NewSSAValue)
+            xinfo = compact.new_new_nodes.info[x.id]
+            x′ = SSAValue(xinfo.pos)
+        else
+            x′ = x
+        end
+        if isa(y, OldSSAValue)
+            y′ = compact.ssa_rename[y.id]::SSAValue
+        elseif isa(y, NewSSAValue)
+            yinfo = compact.new_new_nodes.info[y.id]
+            y′ = SSAValue(yinfo.pos)
+        else
+            y′ = y
+        end
+        if x′.id == y′.id && (xinfo !== nothing || yinfo !== nothing)
+            if xinfo !== nothing && yinfo !== nothing
+                if xinfo.attach_after == yinfo.attach_after
+                    return x.id < y.id
+                end
+                return yinfo.attach_after
+            elseif xinfo !== nothing
+                return !xinfo.attach_after
+            else
+                return yinfo.attach_after
+            end
+        end
+        return x′.id < y′.id
+    end
+    return dominates(domtree, xb, yb)
+end
+
 function count_added_node!(compact::IncrementalCompact, @nospecialize(v))
-    needs_late_fixup = isa(v, NewSSAValue)
+    needs_late_fixup = false
     if isa(v, SSAValue)
         compact.used_ssas[v.id] += 1
+    elseif isa(v, NewSSAValue)
+        compact.new_new_used_ssas[v.id] += 1
+        needs_late_fixup = true
     else
         for ops in userefs(v)
             val = ops[]
             if isa(val, SSAValue)
                 compact.used_ssas[val.id] += 1
             elseif isa(val, NewSSAValue)
+                compact.new_new_used_ssas[val.id] += 1
                 needs_late_fixup = true
             end
         end
     end
-    needs_late_fixup
+    return needs_late_fixup
 end
 
 function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
@@ -663,18 +778,20 @@ function add_pending!(compact::IncrementalCompact, pos::Int, attach_after::Bool)
     return node
 end
 
-function insert_node!(compact::IncrementalCompact, before, @nospecialize(typ), @nospecialize(val), attach_after::Bool=false)
+function insert_node!(compact::IncrementalCompact, before, inst::NewInstruction, attach_after::Bool=false)
+    @assert inst.effect_free_computed
     if isa(before, SSAValue)
         if before.id < compact.result_idx
-            count_added_node!(compact, val)
-            line = compact.result[before.id][:line]
+            count_added_node!(compact, inst.stmt)
+            line = something(inst.line, compact.result[before.id][:line])
             node = add!(compact.new_new_nodes, before.id, attach_after)
-            node[:inst], node[:type], node[:line] = val, typ, line
+            push!(compact.new_new_used_ssas, 0)
+            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
             return NewSSAValue(node.idx)
         else
-            line = compact.ir.stmts[before.id][:line]
+            line = something(inst.line, compact.ir.stmts[before.id][:line])
             node = add_pending!(compact, before.id, attach_after)
-            node[:inst], node[:type], node[:line] = val, typ, line
+            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
             os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
             push!(compact.ssa_rename, os)
             push!(compact.used_ssas, 0)
@@ -682,33 +799,47 @@ function insert_node!(compact::IncrementalCompact, before, @nospecialize(typ), @
         end
     elseif isa(before, OldSSAValue)
         pos = before.id
-        if pos > length(compact.ir.stmts)
-            #@assert attach_after
-            info = compact.pending_nodes.info[pos - length(compact.ir.stmts) - length(compact.ir.new_nodes)]
-            pos, attach_after = info.pos, info.attach_after
+        if pos < compact.idx
+            renamed = compact.ssa_rename[pos]::AnySSAValue
+            count_added_node!(compact, inst.stmt)
+            line = something(inst.line, compact.result[renamed.id][:line])
+            node = add!(compact.new_new_nodes, renamed.id, attach_after)
+            push!(compact.new_new_used_ssas, 0)
+            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            return NewSSAValue(node.idx)
+        else
+            if pos > length(compact.ir.stmts)
+                #@assert attach_after
+                info = compact.pending_nodes.info[pos - length(compact.ir.stmts) - length(compact.ir.new_nodes)]
+                pos, attach_after = info.pos, info.attach_after
+            end
+            line = something(inst.line, compact.ir.stmts[pos][:line])
+            node = add_pending!(compact, pos, attach_after)
+            node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, line, inst.flag
+            os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
+            push!(compact.ssa_rename, os)
+            push!(compact.used_ssas, 0)
+            return os
         end
-        line = compact.ir.stmts[pos][:line]
-        node = add_pending!(compact, pos, attach_after)
-        node[:inst], node[:type], node[:line] = val, typ, line
-        os = OldSSAValue(length(compact.ir.stmts) + length(compact.ir.new_nodes) + length(compact.pending_nodes))
-        push!(compact.ssa_rename, os)
-        push!(compact.used_ssas, 0)
-        return os
     elseif isa(before, NewSSAValue)
         before_entry = compact.new_new_nodes.info[before.id]
-        line = compact.new_new_nodes.stmts[before.id][:line]
+        line = something(inst.line, compact.new_new_nodes.stmts[before.id][:line])
         new_entry = add!(compact.new_new_nodes, before_entry.pos, attach_after)
-        new_entry[:inst], new_entry[:type], new_entry[:line] = val, typ, line
+        new_entry[:inst], new_entry[:type], new_entry[:line], new_entry[:flag] = inst.stmt, inst.type, line, inst.flag
+        push!(compact.new_new_used_ssas, 0)
         return NewSSAValue(new_entry.idx)
     else
         error("Unsupported")
     end
 end
 
-function insert_node_here!(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typ), ltable_idx::Int32, reverse_affinity::Bool=false)
+function insert_node_here!(compact::IncrementalCompact, inst::NewInstruction, reverse_affinity::Bool=false)
+    @assert inst.line !== nothing
     refinish = false
     result_idx = compact.result_idx
-    if result_idx == first(compact.result_bbs[compact.active_result_bb].stmts) && reverse_affinity
+    if reverse_affinity &&
+            ((compact.active_result_bb == length(compact.result_bbs) + 1) ||
+             result_idx == first(compact.result_bbs[compact.active_result_bb].stmts))
         compact.active_result_bb -= 1
         refinish = true
     end
@@ -716,11 +847,13 @@ function insert_node_here!(compact::IncrementalCompact, @nospecialize(val), @nos
         @assert result_idx == length(compact.result) + 1
         resize!(compact, result_idx)
     end
-    node = compact.result[result_idx]
-    node[:inst], node[:type], node[:line], node[:flag] = val, typ, ltable_idx, 0x00
-    if count_added_node!(compact, val)
-        push!(compact.late_fixup, result_idx)
+    flag = inst.flag
+    if !inst.effect_free_computed && stmt_effect_free(inst.stmt, inst.type, compact)
+        flag |= IR_FLAG_EFFECT_FREE
     end
+    node = compact.result[result_idx]
+    node[:inst], node[:type], node[:line], node[:flag] = inst.stmt, inst.type, inst.line, flag
+    count_added_node!(compact, inst.stmt) && push!(compact.late_fixup, result_idx)
     compact.result_idx = result_idx + 1
     inst = SSAValue(result_idx)
     refinish && finish_current_bb!(compact, 0)
@@ -742,22 +875,50 @@ function getindex(view::TypesView, v::OldSSAValue)
     return view.ir.pending_nodes.stmts[id][:type]
 end
 
-function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
-    @assert idx.id < compact.result_idx
-    (compact.result[idx.id][:inst] === v) && return
-    # Kill count for current uses
-    for ops in userefs(compact.result[idx.id][:inst])
+function kill_current_uses(compact::IncrementalCompact, @nospecialize(stmt))
+    for ops in userefs(stmt)
         val = ops[]
         if isa(val, SSAValue)
             @assert compact.used_ssas[val.id] >= 1
             compact.used_ssas[val.id] -= 1
+        elseif isa(val, NewSSAValue)
+            @assert compact.new_new_used_ssas[val.id] >= 1
+            compact.new_new_used_ssas[val.id] -= 1
         end
     end
+end
+
+function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::SSAValue)
+    @assert idx.id < compact.result_idx
+    (compact.result[idx.id][:inst] === v) && return
+    # Kill count for current uses
+    kill_current_uses(compact, compact.result[idx.id][:inst])
     compact.result[idx.id][:inst] = v
     # Add count for new use
-    if count_added_node!(compact, v)
-        push!(compact.late_fixup, idx.id)
+    count_added_node!(compact, v) && push!(compact.late_fixup, idx.id)
+    return compact
+end
+
+function setindex!(compact::IncrementalCompact, @nospecialize(v), idx::OldSSAValue)
+    id = idx.id
+    if id < compact.idx
+        new_idx = compact.ssa_rename[id]
+        (compact.result[new_idx][:inst] === v) && return
+        kill_current_uses(compact, compact.result[new_idx][:inst])
+        compact.result[new_idx][:inst] = v
+        count_added_node!(compact, v) && push!(compact.late_fixup, new_idx)
+        return compact
+    elseif id <= length(compact.ir.stmts)  # ir.stmts, new_nodes, and pending_nodes uses aren't counted yet, so no need to adjust
+        compact.ir.stmts[id][:inst] = v
+        return compact
+    end
+    id -= length(compact.ir.stmts)
+    if id <= length(compact.ir.new_nodes)
+        compact.ir.new_nodes.stmts[id][:inst] = v
+        return compact
     end
+    id -= length(compact.ir.new_nodes)
+    compact.pending_nodes.stmts[id][:inst] = v
     return compact
 end
 
@@ -801,6 +962,7 @@ end
 function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int},
                                 processed_idx::Int, result_idx::Int,
                                 ssa_rename::Vector{Any}, used_ssas::Vector{Int},
+                                new_new_used_ssas::Vector{Int},
                                 do_rename_ssa::Bool)
     values = Vector{Any}(undef, length(old_values))
     for i = 1:length(old_values)
@@ -812,7 +974,7 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
                     push!(late_fixup, result_idx)
                     val = OldSSAValue(val.id)
                 else
-                    val = renumber_ssa2(val, ssa_rename, used_ssas, do_rename_ssa)
+                    val = renumber_ssa2(val, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa)
                 end
             else
                 used_ssas[val.id] += 1
@@ -822,17 +984,19 @@ function process_phinode_values(old_values::Vector{Any}, late_fixup::Vector{Int}
                 push!(late_fixup, result_idx)
             else
                 # Always renumber these. do_rename_ssa applies only to actual SSAValues
-                val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, true)
+                val = renumber_ssa2(SSAValue(val.id), ssa_rename, used_ssas, new_new_used_ssas, true)
             end
         elseif isa(val, NewSSAValue)
             push!(late_fixup, result_idx)
+            new_new_used_ssas[val.id] += 1
         end
         values[i] = val
     end
     return values
 end
 
-function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int}, do_rename_ssa::Bool)
+function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int},
+        new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool)
     id = val.id
     if id > length(ssanums)
         return val
@@ -841,22 +1005,26 @@ function renumber_ssa2(val::SSAValue, ssanums::Vector{Any}, used_ssas::Vector{In
         val = ssanums[id]
     end
     if isa(val, SSAValue)
-        if used_ssas !== nothing
-            used_ssas[val.id] += 1
-        end
+        used_ssas[val.id] += 1
     end
     return val
 end
 
-function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool)
+function renumber_ssa2(val::NewSSAValue, ssanums::Vector{Any}, used_ssas::Vector{Int},
+        new_new_used_ssas::Vector{Int}, do_rename_ssa::Bool)
+    new_new_used_ssas[val.id] += 1
+    return val
+end
+
+function renumber_ssa2!(@nospecialize(stmt), ssanums::Vector{Any}, used_ssas::Vector{Int}, new_new_used_ssas::Vector{Int}, late_fixup::Vector{Int}, result_idx::Int, do_rename_ssa::Bool)
     urs = userefs(stmt)
     for op in urs
         val = op[]
         if isa(val, OldSSAValue) || isa(val, NewSSAValue)
             push!(late_fixup, result_idx)
         end
-        if isa(val, SSAValue)
-            val = renumber_ssa2(val, ssanums, used_ssas, do_rename_ssa)
+        if isa(val, Union{SSAValue, NewSSAValue})
+            val = renumber_ssa2(val, ssanums, used_ssas, new_new_used_ssas, do_rename_ssa)
         end
         if isa(val, OldSSAValue) || isa(val, NewSSAValue)
             push!(late_fixup, result_idx)
@@ -890,7 +1058,7 @@ function kill_edge!(compact::IncrementalCompact, active_bb::Int, from::Int, to::
     # Check if the block is now dead
     if length(preds) == 0
         for succ in copy(compact.result_bbs[compact.bb_rename_succ[to]].succs)
-            kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred))
+            kill_edge!(compact, active_bb, to, findfirst(x->x === succ, compact.bb_rename_pred)::Int)
         end
         if to < active_bb
             # Kill all statements in the block
@@ -936,26 +1104,33 @@ end
 
 function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instruction, idx::Int, processed_idx::Int, active_bb::Int, do_rename_ssa::Bool)
     stmt = inst[:inst]
-    result = compact.result
-    ssa_rename = compact.ssa_rename
-    late_fixup = compact.late_fixup
-    used_ssas = compact.used_ssas
+    (; result, ssa_rename, late_fixup, used_ssas, new_new_used_ssas, cfg_transforms_enabled, fold_constant_branches) = compact
     ssa_rename[idx] = SSAValue(result_idx)
     if stmt === nothing
         ssa_rename[idx] = stmt
     elseif isa(stmt, OldSSAValue)
         ssa_rename[idx] = ssa_rename[stmt.id]
-    elseif isa(stmt, GotoNode) && compact.cfg_transforms_enabled
+    elseif isa(stmt, GotoNode) && cfg_transforms_enabled
         result[result_idx][:inst] = GotoNode(compact.bb_rename_succ[stmt.label])
         result_idx += 1
-    elseif isa(stmt, GlobalRef) || isa(stmt, GotoNode)
+    elseif isa(stmt, GlobalRef)
+        result[result_idx][:inst] = stmt
+        result[result_idx][:type] = argextype(stmt, compact)
+        result_idx += 1
+    elseif isa(stmt, GotoNode)
         result[result_idx][:inst] = stmt
         result_idx += 1
-    elseif isa(stmt, GotoIfNot) && compact.cfg_transforms_enabled
-        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa)::GotoIfNot
+    elseif isa(stmt, GotoIfNot) && cfg_transforms_enabled
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::GotoIfNot
         result[result_idx][:inst] = stmt
         cond = stmt.cond
-        if isa(cond, Bool) && compact.fold_constant_branches
+        if fold_constant_branches
+            if !isa(cond, Bool)
+                condT = widenconditional(argextype(cond, compact))
+                isa(condT, Const) || @goto bail
+                cond = condT.val
+                isa(cond, Bool) || @goto bail
+            end
             if cond
                 result[result_idx][:inst] = nothing
                 kill_edge!(compact, active_bb, active_bb, stmt.dest)
@@ -966,12 +1141,13 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
                 result_idx += 1
             end
         else
+            @label bail
             result[result_idx][:inst] = GotoIfNot(cond, compact.bb_rename_succ[stmt.dest])
             result_idx += 1
         end
     elseif isa(stmt, Expr)
-        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa)::Expr
-        if compact.cfg_transforms_enabled && isexpr(stmt, :enter)
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::Expr
+        if cfg_transforms_enabled && isexpr(stmt, :enter)
             stmt.args[1] = compact.bb_rename_succ[stmt.args[1]::Int]
         end
         result[result_idx][:inst] = stmt
@@ -980,10 +1156,11 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         # As an optimization, we eliminate any trivial pinodes. For performance, we use ===
         # type equality. We may want to consider using == in either a separate pass or if
         # performance turns out ok
-        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa)::PiNode
+        stmt = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)::PiNode
         pi_val = stmt.val
         if isa(pi_val, SSAValue)
-            if stmt.typ === compact.result[pi_val.id][:type]
+            if stmt.typ === result[pi_val.id][:type]
+                used_ssas[pi_val.id] -= 1
                 ssa_rename[idx] = pi_val
                 return result_idx
             end
@@ -1002,10 +1179,10 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
         result[result_idx][:inst] = stmt
         result_idx += 1
     elseif isa(stmt, ReturnNode) || isa(stmt, UpsilonNode) || isa(stmt, GotoIfNot)
-        result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, late_fixup, result_idx, do_rename_ssa)
+        result[result_idx][:inst] = renumber_ssa2!(stmt, ssa_rename, used_ssas, new_new_used_ssas, late_fixup, result_idx, do_rename_ssa)
         result_idx += 1
     elseif isa(stmt, PhiNode)
-        if compact.cfg_transforms_enabled
+        if cfg_transforms_enabled
             # Rename phi node edges
             map!(i -> compact.bb_rename_pred[i], stmt.edges, stmt.edges)
 
@@ -1039,27 +1216,31 @@ function process_node!(compact::IncrementalCompact, result_idx::Int, inst::Instr
             values = stmt.values
         end
 
-        values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa)
+        values = process_phinode_values(values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa)
         # Don't remove the phi node if it is before the definition of its value
         # because doing so can create forward references. This should only
         # happen with dead loops, but can cause problems when optimization
         # passes look at all code, dead or not. This check should be
         # unnecessary when DCE can remove those dead loops entirely, so this is
         # just to be safe.
-        before_def = isassigned(values, 1) && isa(values[1], OldSSAValue) &&
-            idx < values[1].id
+        before_def = isassigned(values, 1) && (v = values[1]; isa(v, OldSSAValue)) && idx < v.id
         if length(edges) == 1 && isassigned(values, 1) && !before_def &&
-                length(compact.cfg_transforms_enabled ?
+                length(cfg_transforms_enabled ?
                     compact.result_bbs[compact.bb_rename_succ[active_bb]].preds :
                     compact.ir.cfg.blocks[active_bb].preds) == 1
             # There's only one predecessor left - just replace it
-            ssa_rename[idx] = values[1]
+            v = values[1]
+            @assert !isa(v, NewSSAValue)
+            if isa(v, SSAValue)
+                used_ssas[v.id] -= 1
+            end
+            ssa_rename[idx] = v
         else
             result[result_idx][:inst] = PhiNode(edges, values)
             result_idx += 1
         end
     elseif isa(stmt, PhiCNode)
-        result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, do_rename_ssa))
+        result[result_idx][:inst] = PhiCNode(process_phinode_values(stmt.values, late_fixup, processed_idx, result_idx, ssa_rename, used_ssas, new_new_used_ssas, do_rename_ssa))
         result_idx += 1
     elseif isa(stmt, SSAValue)
         # identity assign, replace uses of this ssa value with its result
@@ -1196,8 +1377,8 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         resize!(compact, old_result_idx)
     end
     bb = compact.ir.cfg.blocks[active_bb]
-    if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && length(bb.preds) == 0
-        # No predecessors, kill the entire block.
+    if compact.cfg_transforms_enabled && active_bb > 1 && active_bb <= length(compact.bb_rename_succ) && compact.bb_rename_succ[active_bb] == -1
+        # Dead block, so kill the entire block.
         compact.idx = last(bb.stmts)
         # Pop any remaining insertion nodes
         while compact.new_nodes_idx <= length(compact.perm)
@@ -1260,30 +1441,35 @@ function iterate(compact::IncrementalCompact, (idx, active_bb)::Tuple{Int, Int}=
         compact.result[old_result_idx][:inst]), (compact.idx, active_bb)
 end
 
-function maybe_erase_unused!(extra_worklist, compact, idx, callback = x->nothing)
-    stmt = compact.result[idx][:inst]
+function maybe_erase_unused!(
+    extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, in_worklist::Bool,
+    callback = null_dce_callback)
+
+    inst = idx <= length(compact.result) ? compact.result[idx] :
+        compact.new_new_nodes.stmts[idx - length(compact.result)]
+    stmt = inst[:inst]
     stmt === nothing && return false
-    if compact_exprtype(compact, SSAValue(idx)) === Bottom
+    if inst[:type] === Bottom
         effect_free = false
     else
-        effect_free = stmt_effect_free(stmt, compact.result[idx][:type], compact, compact.ir.sptypes)
+        effect_free = inst[:flag] & IR_FLAG_EFFECT_FREE != 0
     end
-    if effect_free
-        for ops in userefs(stmt)
-            val = ops[]
-            # If the pass we ran inserted new nodes, it's possible for those
-            # to be outside our used_ssas count.
-            if isa(val, SSAValue) && val.id <= length(compact.used_ssas)
-                if compact.used_ssas[val.id] == 1
-                    if val.id < idx
-                        push!(extra_worklist, val.id)
-                    end
-                end
-                compact.used_ssas[val.id] -= 1
-                callback(val)
+    function kill_ssa_value(val::SSAValue)
+        if compact.used_ssas[val.id] == 1
+            if val.id < idx || in_worklist
+                push!(extra_worklist, val.id)
             end
         end
-        compact.result[idx][:inst] = nothing
+        compact.used_ssas[val.id] -= 1
+        callback(val)
+    end
+    if effect_free
+        if isa(stmt, SSAValue)
+            kill_ssa_value(stmt)
+        else
+            foreachssa(kill_ssa_value, stmt)
+        end
+        inst[:inst] = nothing
         return true
     end
     return false
@@ -1294,13 +1480,8 @@ function fixup_phinode_values!(compact::IncrementalCompact, old_values::Vector{A
     for i = 1:length(old_values)
         isassigned(old_values, i) || continue
         val = old_values[i]
-        if isa(val, OldSSAValue)
-            val = compact.ssa_rename[val.id]
-            if isa(val, SSAValue)
-                compact.used_ssas[val.id] += 1
-            end
-        elseif isa(val, NewSSAValue)
-            val = SSAValue(length(compact.result) + val.id)
+        if isa(val, Union{OldSSAValue, NewSSAValue})
+            val = fixup_node(compact, val)
         end
         values[i] = val
     end
@@ -1315,29 +1496,30 @@ function fixup_node(compact::IncrementalCompact, @nospecialize(stmt))
     elseif isa(stmt, NewSSAValue)
         return SSAValue(length(compact.result) + stmt.id)
     elseif isa(stmt, OldSSAValue)
-        return compact.ssa_rename[stmt.id]
+        val = compact.ssa_rename[stmt.id]
+        if isa(val, SSAValue)
+            # If `val.id` is greater than the length of `compact.result` or
+            # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so
+            # don't count the use
+            compact.used_ssas[val.id] += 1
+        end
+        return val
     else
         urs = userefs(stmt)
         for ur in urs
             val = ur[]
-            if isa(val, NewSSAValue)
-                val = SSAValue(length(compact.result) + val.id)
-            elseif isa(val, OldSSAValue)
-                val = compact.ssa_rename[val.id]
-            end
-            if isa(val, SSAValue) && val.id <= length(compact.used_ssas)
-                # If `val.id` is greater than the length of `compact.result` or
-                # `compact.used_ssas`, this SSA value is in `new_new_nodes`, so
-                # don't count the use
-                compact.used_ssas[val.id] += 1
+            if isa(val, Union{NewSSAValue, OldSSAValue})
+                ur[] = fixup_node(compact, val)
             end
-            ur[] = val
         end
         return urs[]
     end
 end
 
 function just_fixup!(compact::IncrementalCompact)
+    resize!(compact.used_ssas, length(compact.result))
+    append!(compact.used_ssas, compact.new_new_used_ssas)
+    empty!(compact.new_new_used_ssas)
     for idx in compact.late_fixup
         stmt = compact.result[idx][:inst]
         new_stmt = fixup_node(compact, stmt)
@@ -1353,19 +1535,21 @@ function just_fixup!(compact::IncrementalCompact)
     end
 end
 
-function simple_dce!(compact::IncrementalCompact)
+function simple_dce!(compact::IncrementalCompact, callback = null_dce_callback)
     # Perform simple DCE for unused values
+    @assert isempty(compact.new_new_used_ssas) # just_fixup! wasn't run?
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
-        idx >= compact.result_idx && break
         nused == 0 || continue
-        maybe_erase_unused!(extra_worklist, compact, idx)
+        maybe_erase_unused!(extra_worklist, compact, idx, false, callback)
     end
     while !isempty(extra_worklist)
-        maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist))
+        maybe_erase_unused!(extra_worklist, compact, pop!(extra_worklist), true, callback)
     end
 end
 
+null_dce_callback(x::SSAValue) = return
+
 function non_dce_finish!(compact::IncrementalCompact)
     result_idx = compact.result_idx
     resize!(compact.result, result_idx - 1)
diff --git a/base/compiler/ssair/legacy.jl b/base/compiler/ssair/legacy.jl
index 1fa847734359b2..ffafa77d8fc58e 100644
--- a/base/compiler/ssair/legacy.jl
+++ b/base/compiler/ssair/legacy.jl
@@ -23,32 +23,31 @@ function inflate_ir(ci::CodeInfo, sptypes::Vector{Any}, argtypes::Vector{Any})
         elseif isa(stmt, PhiNode)
             code[i] = PhiNode(Int32[block_for_inst(cfg, Int(edge)) for edge in stmt.edges], stmt.values)
         elseif isa(stmt, Expr) && stmt.head === :enter
-            stmt.args[1] = block_for_inst(cfg, stmt.args[1])
-            code[i] = stmt
-        else
+            stmt.args[1] = block_for_inst(cfg, stmt.args[1]::Int)
             code[i] = stmt
         end
     end
-    ssavaluetypes = ci.ssavaluetypes
     nstmts = length(code)
-    ssavaluetypes = ci.ssavaluetypes isa Vector{Any} ? copy(ci.ssavaluetypes) : Any[ Any for i = 1:(ci.ssavaluetypes::Int) ]
+    ssavaluetypes = let ssavaluetypes = ci.ssavaluetypes
+        ssavaluetypes isa Vector{Any} ? copy(ssavaluetypes) : Any[ Any for i = 1:(ssavaluetypes::Int) ]
+    end
     stmts = InstructionStream(code, ssavaluetypes, Any[nothing for i = 1:nstmts], copy(ci.codelocs), copy(ci.ssaflags))
-    ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable), argtypes, Any[], sptypes)
+    ir = IRCode(stmts, cfg, collect(LineInfoNode, ci.linetable), argtypes, Expr[], sptypes)
     return ir
 end
 
 function replace_code_newstyle!(ci::CodeInfo, ir::IRCode, nargs::Int)
     @assert isempty(ir.new_nodes)
     # All but the first `nargs` slots will now be unused
-    resize!(ci.slotflags, nargs + 1)
+    resize!(ci.slotflags, nargs)
     stmts = ir.stmts
     ci.code, ci.ssavaluetypes, ci.codelocs, ci.ssaflags, ci.linetable =
         stmts.inst, stmts.type, stmts.line, stmts.flag, ir.linetable
     for metanode in ir.meta
         push!(ci.code, metanode)
         push!(ci.codelocs, 1)
-        push!(ci.ssavaluetypes, Any)
-        push!(ci.ssaflags, 0x00)
+        push!(ci.ssavaluetypes::Vector{Any}, Any)
+        push!(ci.ssaflags, IR_FLAG_NULL)
     end
     # Translate BB Edges to statement edges
     # (and undo normalization for now)
diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl
index 4af13d81b76d01..c2597363df2824 100644
--- a/base/compiler/ssair/passes.jl
+++ b/base/compiler/ssair/passes.jl
@@ -1,71 +1,93 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+function is_known_call(@nospecialize(x), @nospecialize(func), ir::Union{IRCode,IncrementalCompact})
+    isexpr(x, :call) || return false
+    ft = argextype(x.args[1], ir)
+    return singleton_type(ft) === func
+end
+
+struct SSAUse
+    kind::Symbol
+    idx::Int
+end
+GetfieldUse(idx::Int)  = SSAUse(:getfield, idx)
+PreserveUse(idx::Int)  = SSAUse(:preserve, idx)
+NoPreserve()           = SSAUse(:nopreserve, 0)
+IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx)
+
 """
-    This struct keeps track of all uses of some mutable struct allocated
-    in the current function. `uses` are all instances of `getfield` on the
-    struct. `defs` are all instances of `setfield!` on the struct. The terminology
-    refers to the uses/defs of the ``slot bundle'' that the mutable struct represents.
-
-    In addition we keep track of all instances of a foreigncall preserve of this mutable
-    struct. Somewhat counterintuitively, we don't actually need to make sure that the
-    struct itself is live (or even allocated) at a ccall site. If there are no other places
-    where the struct escapes (and thus e.g. where its address is taken), it need not be
-    allocated. We do however, need to make sure to preserve any elements of this struct.
+    du::SSADefUse
+
+This struct keeps track of all uses of some mutable struct allocated in the current function:
+- `du.uses::Vector{SSAUse}` are some "usages" (like `getfield`) of the struct
+- `du.defs::Vector{Int}` are all instances of `setfield!` on the struct
+The terminology refers to the uses/defs of the "slot bundle" that the mutable struct represents.
+
+`du.uses` tracks all instances of `getfield` and `isdefined` calls on the struct.
+Additionally it also tracks all instances of a `:foreigncall` that preserves of this mutable
+struct. Somewhat counterintuitively, we don't actually need to make sure that the struct
+itself is live (or even allocated) at a `ccall` site. If there are no other places where
+the struct escapes (and thus e.g. where its address is taken), it need not be allocated.
+We do however, need to make sure to preserve any elements of this struct.
 """
 struct SSADefUse
-    uses::Vector{Int}
+    uses::Vector{SSAUse}
     defs::Vector{Int}
-    ccall_preserve_uses::Vector{Int}
 end
-SSADefUse() = SSADefUse(Int[], Int[], Int[])
+SSADefUse() = SSADefUse(SSAUse[], Int[])
 
-function try_compute_fieldidx_expr(@nospecialize(typ), @nospecialize(use_expr))
-    field = use_expr.args[3]
-    isa(field, QuoteNode) && (field = field.value)
-    isa(field, Union{Int, Symbol}) || return nothing
-    return try_compute_fieldidx(typ, field)
+function compute_live_ins(cfg::CFG, du::SSADefUse)
+    uses = Int[]
+    for use in du.uses
+        use.kind === :isdefined && continue # filter out `isdefined` usages
+        push!(uses, use.idx)
+    end
+    compute_live_ins(cfg, du.defs, uses)
 end
 
-function lift_defuse(cfg::CFG, ssa::SSADefUse)
-    # We remove from `uses` any block where all uses are dominated
-    # by a def. This prevents insertion of dead phi nodes at the top
-    # of such a block if that block happens to be in a loop
-    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in ssa.uses]
-    for x in ssa.defs
-        push!(ordered, (x, block_for_inst(cfg, x), false))
-    end
-    ordered = sort(ordered, by=x->x[1])
-    bb_defs = Int[]
-    bb_uses = Int[]
-    last_bb = last_def_bb = 0
-    for (_, bb, is_use) in ordered
-        if bb != last_bb && is_use
-            push!(bb_uses, bb)
-        end
-        last_bb = bb
-        if last_def_bb != bb && !is_use
-            push!(bb_defs, bb)
-            last_def_bb = bb
+# assume `stmt == getfield(obj, field, ...)` or `stmt == setfield!(obj, field, val, ...)`
+try_compute_field_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr) =
+    try_compute_field(ir, stmt.args[3])
+
+function try_compute_field(ir::Union{IncrementalCompact,IRCode}, @nospecialize(field))
+    # fields are usually literals, handle them manually
+    if isa(field, QuoteNode)
+        field = field.value
+    elseif isa(field, Int) || isa(field, Symbol)
+    else
+        # try to resolve other constants, e.g. global reference
+        field = argextype(field, ir)
+        if isa(field, Const)
+            field = field.val
+        else
+            return nothing
         end
     end
-    SSADefUse(bb_uses, bb_defs, Int[])
+    return isa(field, Union{Int, Symbol}) ? field : nothing
+end
+
+function try_compute_fieldidx_stmt(ir::Union{IncrementalCompact,IRCode}, stmt::Expr, typ::DataType)
+    field = try_compute_field_stmt(ir, stmt)
+    return try_compute_fieldidx(typ, field)
 end
 
 function find_curblock(domtree::DomTree, allblocks::Vector{Int}, curblock::Int)
     # TODO: This can be much faster by looking at current level and only
     # searching for those blocks in a sorted order
     while !(curblock in allblocks)
-        curblock = domtree.idoms[curblock]
+        curblock = domtree.idoms_bb[curblock]
     end
     return curblock
 end
 
 function val_for_def_expr(ir::IRCode, def::Int, fidx::Int)
-    if isexpr(ir[SSAValue(def)], :new)
-        return ir[SSAValue(def)].args[1+fidx]
+    ex = ir[SSAValue(def)][:inst]
+    if isexpr(ex, :new)
+        return ex.args[1+fidx]
     else
+        @assert isa(ex, Expr)
         # The use is whatever the setfield was
-        return ir[SSAValue(def)].args[4]
+        return ex.args[4]
     end
 end
 
@@ -80,34 +102,14 @@ function compute_value_for_block(ir::IRCode, domtree::DomTree, allblocks::Vector
     def == 0 ? phinodes[curblock] : val_for_def_expr(ir, def, fidx)
 end
 
-function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use_idx::Int)
-    # Find the first dominating def
-    curblock = stmtblock = block_for_inst(ir.cfg, use_idx)
-    curblock = find_curblock(domtree, allblocks, curblock)
-    defblockdefs = let curblock = curblock
-        Int[stmt for stmt in du.defs if block_for_inst(ir.cfg, stmt) == curblock]
-    end
-    def = 0
-    if !isempty(defblockdefs)
-        if curblock != stmtblock
-            # Find the last def in this block
-            def = 0
-            for x in defblockdefs
-                def = max(def, x)
-            end
-        else
-            # Find the last def before our use
-            def = 0
-            for x in defblockdefs
-                def = max(def, x >= use_idx ? 0 : x)
-            end
-        end
-    end
+function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{Int},
+    du::SSADefUse, phinodes::IdDict{Int, SSAValue}, fidx::Int, use::Int)
+    def, useblock, curblock = find_def_for_use(ir, domtree, allblocks, du, use)
     if def == 0
         if !haskey(phinodes, curblock)
             # If this happens, we need to search the predecessors for defs. Which
             # one doesn't matter - if it did, we'd have had a phinode
-            return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[stmtblock].preds))
+            return compute_value_for_block(ir, domtree, allblocks, du, phinodes, fidx, first(ir.cfg.blocks[useblock].preds))
         end
         # The use is the phinode
         return phinodes[curblock]
@@ -116,7 +118,75 @@ function compute_value_for_use(ir::IRCode, domtree::DomTree, allblocks::Vector{I
     end
 end
 
-function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#), pi_callback=(pi, idx)->false)
+# even when the allocation contains an uninitialized field, we try an extra effort to check
+# if this load at `idx` have any "safe" `setfield!` calls that define the field
+function has_safe_def(
+    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse,
+    newidx::Int, idx::Int)
+    def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx)
+    # will throw since we already checked this `:new` site doesn't define this field
+    def == newidx && return false
+    # found a "safe" definition
+    def ≠ 0 && return true
+    # we may still be able to replace this load with `PhiNode`
+    # examine if all predecessors of `block` have any "safe" definition
+    block = block_for_inst(ir, idx)
+    seen = BitSet(block)
+    worklist = BitSet(ir.cfg.blocks[block].preds)
+    isempty(worklist) && return false
+    while !isempty(worklist)
+        pred = pop!(worklist)
+        # if this block has already been examined, bail out to avoid infinite cycles
+        pred in seen && return false
+        idx = last(ir.cfg.blocks[pred].stmts)
+        # NOTE `idx` isn't a load, thus we can use inclusive coondition within the `find_def_for_use`
+        def, _, _ = find_def_for_use(ir, domtree, allblocks, du, idx, true)
+        # will throw since we already checked this `:new` site doesn't define this field
+        def == newidx && return false
+        push!(seen, pred)
+        # found a "safe" definition for this predecessor
+        def ≠ 0 && continue
+        # check for the predecessors of this predecessor
+        for newpred in ir.cfg.blocks[pred].preds
+            push!(worklist, newpred)
+        end
+    end
+    return true
+end
+
+# find the first dominating def for the given use
+function find_def_for_use(
+    ir::IRCode, domtree::DomTree, allblocks::Vector{Int}, du::SSADefUse, use::Int, inclusive::Bool=false)
+    useblock = block_for_inst(ir.cfg, use)
+    curblock = find_curblock(domtree, allblocks, useblock)
+    local def = 0
+    for idx in du.defs
+        if block_for_inst(ir.cfg, idx) == curblock
+            if curblock != useblock
+                # Find the last def in this block
+                def = max(def, idx)
+            else
+                # Find the last def before our use
+                if inclusive
+                    def = max(def, idx ≤ use ? idx : 0)
+                else
+                    def = max(def, idx < use ? idx : 0)
+                end
+            end
+        end
+    end
+    return def, useblock, curblock
+end
+
+function collect_leaves(compact::IncrementalCompact, @nospecialize(val), @nospecialize(typeconstraint))
+    if isa(val, Union{OldSSAValue, SSAValue})
+        val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
+    end
+    return walk_to_defs(compact, val, typeconstraint)
+end
+
+function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
+                     callback = (@nospecialize(pi), @nospecialize(idx)) -> false)
     while true
         if isa(defssa, OldSSAValue)
             if already_inserted(compact, defssa)
@@ -130,7 +200,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
         end
         def = compact[defssa]
         if isa(def, PiNode)
-            if pi_callback(def, defssa)
+            if callback(def, defssa)
                 return defssa
             end
             def = def.val
@@ -141,7 +211,7 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
             end
             defssa = def
         elseif isa(def, AnySSAValue)
-            pi_callback(def, defssa)
+            callback(def, defssa)
             if isa(def, SSAValue)
                 is_old(compact, defssa) && (def = OldSSAValue(def.id))
             end
@@ -154,33 +224,31 @@ function simple_walk(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSA
     end
 end
 
-function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defidx), @nospecialize(typeconstraint) = types(compact)[defidx])
+function simple_walk_constraint(compact::IncrementalCompact, @nospecialize(defssa#=::AnySSAValue=#),
+                                @nospecialize(typeconstraint))
     callback = function (@nospecialize(pi), @nospecialize(idx))
-        isa(pi, PiNode) && (typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ)))
+        if isa(pi, PiNode)
+            typeconstraint = typeintersect(typeconstraint, widenconst(pi.typ))
+        end
         return false
     end
-    def = simple_walk(compact, defidx, callback)
+    def = simple_walk(compact, defssa, callback)
     return Pair{Any, Any}(def, typeconstraint)
 end
 
 """
-    walk_to_defs(compact, val, intermediaries)
+    walk_to_defs(compact, val, typeconstraint)
 
-Starting at `val` walk use-def chains to get all the leaves feeding into
-this val (pruning those leaves rules out by path conditions).
+Starting at `val` walk use-def chains to get all the leaves feeding into this `val`
+(pruning those leaves rules out by path conditions).
 """
-function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint), visited_phinodes::Vector{Any}=Any[])
-    if !isa(defssa, AnySSAValue) || !isa(compact[defssa], PhiNode)
-        return Any[defssa]
-    end
-    # Step 2: Figure out what the struct is defined as
+function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospecialize(typeconstraint))
+    visited_phinodes = AnySSAValue[]
+    isa(defssa, AnySSAValue) || return Any[defssa], visited_phinodes
     def = compact[defssa]
-    ## Track definitions through PiNode/PhiNode
-    found_def = false
-    ## Track which PhiNodes, SSAValue intermediaries
-    ## we forwarded through.
-    visited = IdDict{Any, Any}()
-    worklist_defs = Any[]
+    isa(def, PhiNode) || return Any[defssa], visited_phinodes
+    visited_constraints = IdDict{AnySSAValue, Any}()
+    worklist_defs = AnySSAValue[]
     worklist_constraints = Any[]
     leaves = Any[]
     push!(worklist_defs, defssa)
@@ -188,7 +256,7 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
     while !isempty(worklist_defs)
         defssa = pop!(worklist_defs)
         typeconstraint = pop!(worklist_constraints)
-        visited[defssa] = typeconstraint
+        visited_constraints[defssa] = typeconstraint
         def = compact[defssa]
         if isa(def, PhiNode)
             push!(visited_phinodes, defssa)
@@ -199,8 +267,8 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
                 if is_old(compact, defssa) && isa(val, SSAValue)
                     val = OldSSAValue(val.id)
                 end
-                edge_typ = widenconst(compact_exprtype(compact, val))
-                typeintersect(edge_typ, typeconstraint) === Union{} && continue
+                edge_typ = widenconst(argextype(val, compact))
+                hasintersect(edge_typ, typeconstraint) || continue
                 push!(possible_predecessors, n)
             end
             for n in possible_predecessors
@@ -212,15 +280,15 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
                 if isa(val, AnySSAValue)
                     new_def, new_constraint = simple_walk_constraint(compact, val, typeconstraint)
                     if isa(new_def, AnySSAValue)
-                        if !haskey(visited, new_def)
+                        if !haskey(visited_constraints, new_def)
                             push!(worklist_defs, new_def)
                             push!(worklist_constraints, new_constraint)
-                        elseif !(new_constraint <: visited[new_def])
+                        elseif !(new_constraint <: visited_constraints[new_def])
                             # We have reached the same definition via a different
                             # path, with a different type constraint. We may have
                             # to redo some work here with the wider typeconstraint
                             push!(worklist_defs, new_def)
-                            push!(worklist_constraints, tmerge(new_constraint, visited[new_def]))
+                            push!(worklist_constraints, tmerge(new_constraint, visited_constraints[new_def]))
                         end
                         continue
                     end
@@ -240,12 +308,14 @@ function walk_to_defs(compact::IncrementalCompact, @nospecialize(defssa), @nospe
             push!(leaves, defssa)
         end
     end
-    leaves
+    return leaves, visited_phinodes
 end
 
-function process_immutable_preserve(new_preserves::Vector{Any}, compact::IncrementalCompact, def::Expr)
-    for arg in (isexpr(def, :new) ? def.args : def.args[2:end])
-        if !isbitstype(widenconst(compact_exprtype(compact, arg)))
+function record_immutable_preserve!(new_preserves::Vector{Any}, def::Expr, compact::IncrementalCompact)
+    args = isexpr(def, :new) ? def.args : def.args[2:end]
+    for i = 1:length(args)
+        arg = args[i]
+        if !isbitstype(widenconst(argextype(arg, compact)))
             push!(new_preserves, arg)
         end
     end
@@ -258,7 +328,7 @@ function already_inserted(compact::IncrementalCompact, old::OldSSAValue)
     end
     id -= length(compact.ir.stmts)
     if id < length(compact.ir.new_nodes)
-        error()
+        error("")
     end
     id -= length(compact.ir.new_nodes)
     @assert id <= length(compact.pending_nodes)
@@ -269,55 +339,52 @@ function is_pending(compact::IncrementalCompact, old::OldSSAValue)
     return old.id > length(compact.ir.stmts) + length(compact.ir.new_nodes)
 end
 
-function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
-        @nospecialize(result_t), field::Int, leaves::Vector{Any})
+function is_getfield_captures(@nospecialize(def), compact::IncrementalCompact)
+    isa(def, Expr) || return false
+    length(def.args) >= 3 || return false
+    is_known_call(def, getfield, compact) || return false
+    which = argextype(def.args[3], compact)
+    isa(which, Const) || return false
+    which.val === :captures || return false
+    oc = argextype(def.args[2], compact)
+    return oc ⊑ Core.OpaqueClosure
+end
+
+struct LiftedValue
+    x
+    LiftedValue(@nospecialize x) = new(x)
+end
+const LiftedLeaves = IdDict{Any, Union{Nothing,LiftedValue}}
+
+# try to compute lifted values that can replace `getfield(x, field)` call
+# where `x` is an immutable struct that are defined at any of `leaves`
+function lift_leaves(compact::IncrementalCompact,
+                     @nospecialize(result_t), field::Int, leaves::Vector{Any})
     # For every leaf, the lifted value
-    lifted_leaves = IdDict{Any, Any}()
+    lifted_leaves = LiftedLeaves()
     maybe_undef = false
-    for leaf in leaves
-        leaf_key = leaf
+    for i = 1:length(leaves)
+        leaf = leaves[i]
+        cache_key = leaf
         if isa(leaf, AnySSAValue)
-            if isa(leaf, OldSSAValue) && already_inserted(compact, leaf)
-                leaf = compact.ssa_rename[leaf.id]
-                if isa(leaf, AnySSAValue)
-                    leaf = simple_walk(compact, leaf)
-                end
-                if isa(leaf, AnySSAValue)
-                    def = compact[leaf]
-                else
-                    def = leaf
-                end
-            else
-                def = compact[leaf]
-            end
-            if is_tuple_call(compact, def) && isa(field, Int) && 1 <= field < length(def.args)
-                lifted = def.args[1+field]
-                if is_old(compact, leaf) && isa(lifted, SSAValue)
-                    lifted = OldSSAValue(lifted.id)
-                end
-                if isa(lifted, GlobalRef) || isa(lifted, Expr)
-                    lifted = insert_node!(compact, leaf, compact_exprtype(compact, lifted), lifted)
-                    def.args[1+field] = lifted
-                    (isa(leaf, SSAValue) && (leaf.id < compact.result_idx)) && push!(compact.late_fixup, leaf.id)
-                end
-                lifted_leaves[leaf_key] = RefValue{Any}(lifted)
+            (def, leaf) = walk_to_def(compact, leaf)
+            if is_known_call(def, tuple, compact) && 1 ≤ field < length(def.args)
+                lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves)
                 continue
             elseif isexpr(def, :new)
-                typ = widenconst(types(compact)[leaf])
-                if isa(typ, UnionAll)
-                    typ = unwrap_unionall(typ)
-                end
-                (isa(typ, DataType) && (!typ.abstract)) || return nothing
-                @assert !typ.mutable
-                field = try_compute_fieldidx_expr(typ, stmt)
-                field === nothing && return nothing
-                if length(def.args) < 1 + field
+                typ = unwrap_unionall(widenconst(types(compact)[leaf]))
+                (isa(typ, DataType) && !isabstracttype(typ)) || return nothing
+                @assert !ismutabletype(typ)
+                if length(def.args) < 1+field
+                    if field > fieldcount(typ)
+                        return nothing
+                    end
                     ftyp = fieldtype(typ, field)
                     if !isbitstype(ftyp)
                         # On this branch, this will be a guaranteed UndefRefError.
                         # We use the regular undef mechanic to lift this to a boolean slot
                         maybe_undef = true
-                        lifted_leaves[leaf_key] = nothing
+                        lifted_leaves[cache_key] = nothing
                         continue
                     end
                     return nothing
@@ -325,45 +392,36 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
                     compact[leaf] = nothing
                     for i = (length(def.args) + 1):(1+field)
                         ftyp = fieldtype(typ, i - 1)
-                        isbits(ftyp) || return nothing
-                        push!(def.args, insert_node!(compact, leaf, result_t, Expr(:new, ftyp)))
+                        isbitstype(ftyp) || return nothing
+                        ninst = effect_free(NewInstruction(Expr(:new, ftyp), result_t))
+                        push!(def.args, insert_node!(compact, leaf, ninst))
                     end
                     compact[leaf] = def
                 end
-                lifted = def.args[1+field]
-                if is_old(compact, leaf) && isa(lifted, SSAValue)
-                    lifted = OldSSAValue(lifted.id)
+                lift_arg!(compact, leaf, cache_key, def, 1+field, lifted_leaves)
+                continue
+            elseif is_getfield_captures(def, compact)
+                # Walk to new_opaque_closure
+                ocleaf = def.args[2]
+                if isa(ocleaf, AnySSAValue)
+                    ocleaf = simple_walk(compact, ocleaf)
                 end
-                if isa(lifted, GlobalRef) || isa(lifted, Expr)
-                    lifted = insert_node!(compact, leaf, compact_exprtype(compact, lifted), lifted)
-                    def.args[1+field] = lifted
-                    (isa(leaf, SSAValue) && (leaf.id < compact.result_idx)) && push!(compact.late_fixup, leaf.id)
+                ocdef, _ = walk_to_def(compact, ocleaf)
+                if isexpr(ocdef, :new_opaque_closure) && isa(field, Int) && 1 ≤ field ≤ length(ocdef.args)-4
+                    lift_arg!(compact, leaf, cache_key, ocdef, 4+field, lifted_leaves)
+                    continue
                 end
-                lifted_leaves[leaf_key] = RefValue{Any}(lifted)
-                continue
+                return nothing
             else
-                typ = compact_exprtype(compact, leaf)
+                typ = argextype(leaf, compact)
                 if !isa(typ, Const)
+                    # TODO: (disabled since #27126)
                     # If the leaf is an old ssa value, insert a getfield here
                     # We will revisit this getfield later when compaction gets
                     # to the appropriate point.
                     # N.B.: This can be a bit dangerous because it can lead to
                     # infinite loops if we accidentally insert a node just ahead
                     # of where we are
-                    if is_old(compact, leaf) && (isa(field, Int) || isa(field, Symbol))
-                        (isa(typ, DataType) && (!typ.abstract)) || return nothing
-                        @assert !typ.mutable
-                        # If there's the potential for an undefref error on access, we cannot insert a getfield
-                        if field > typ.ninitialized && !isbits(fieldtype(typ, field))
-                            return nothing
-                            lifted_leaves[leaf] = RefValue{Any}(insert_node!(compact, leaf, make_MaybeUndef(result_t), Expr(:call, :unchecked_getfield, SSAValue(leaf.id), field), true))
-                            maybe_undef = true
-                        else
-                            return nothing
-                            lifted_leaves[leaf] = RefValue{Any}(insert_node!(compact, leaf, result_t, Expr(:call, getfield, SSAValue(leaf.id), field), true))
-                        end
-                        continue
-                    end
                     return nothing
                 end
                 leaf = typ.val
@@ -374,72 +432,157 @@ function lift_leaves(compact::IncrementalCompact, @nospecialize(stmt),
         elseif isa(leaf, GlobalRef)
             mod, name = leaf.mod, leaf.name
             if isdefined(mod, name) && isconst(mod, name)
-                leaf = getfield(mod, name)
+                leaf = getglobal(mod, name)
             else
                 return nothing
             end
-        elseif isa(leaf, Union{Argument, Expr})
+        elseif isa(leaf, Argument) || isa(leaf, Expr)
             return nothing
         end
-        !ismutable(leaf) || return nothing
+        ismutable(leaf) && return nothing
         isdefined(leaf, field) || return nothing
         val = getfield(leaf, field)
         is_inlineable_constant(val) || return nothing
-        lifted_leaves[leaf_key] = RefValue{Any}(quoted(val))
+        lifted_leaves[cache_key] = LiftedValue(quoted(val))
+    end
+    return lifted_leaves, maybe_undef
+end
+
+function lift_arg!(
+    compact::IncrementalCompact, @nospecialize(leaf), @nospecialize(cache_key),
+    stmt::Expr, argidx::Int, lifted_leaves::LiftedLeaves)
+    lifted = stmt.args[argidx]
+    if is_old(compact, leaf) && isa(lifted, SSAValue)
+        lifted = OldSSAValue(lifted.id)
+        if already_inserted(compact, lifted)
+            lifted = compact.ssa_rename[lifted.id]
+        end
+    end
+    if isa(lifted, GlobalRef) || isa(lifted, Expr)
+        lifted = insert_node!(compact, leaf, effect_free(NewInstruction(lifted, argextype(lifted, compact))))
+        compact[leaf] = nothing
+        stmt.args[argidx] = lifted
+        compact[leaf] = stmt
+        if isa(leaf, SSAValue) && leaf.id < compact.result_idx
+            push!(compact.late_fixup, leaf.id)
+        end
     end
-    lifted_leaves, maybe_undef
+    lifted_leaves[cache_key] = LiftedValue(lifted)
+    nothing
+end
+
+function walk_to_def(compact::IncrementalCompact, @nospecialize(leaf))
+    if isa(leaf, OldSSAValue) && already_inserted(compact, leaf)
+        leaf = compact.ssa_rename[leaf.id]
+        if isa(leaf, AnySSAValue)
+            leaf = simple_walk(compact, leaf)
+        end
+        if isa(leaf, AnySSAValue)
+            def = compact[leaf]
+        else
+            def = leaf
+        end
+    elseif isa(leaf, AnySSAValue)
+        def = compact[leaf]
+    else
+        def = leaf
+    end
+    return Pair{Any, Any}(def, leaf)
 end
 
 make_MaybeUndef(@nospecialize(typ)) = isa(typ, MaybeUndef) ? typ : MaybeUndef(typ)
 
-function lift_comparison!(compact::IncrementalCompact, idx::Int,
-        @nospecialize(c1), @nospecialize(c2), stmt::Expr,
-        lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
-    if isa(c1, Const)
-        cmp = c1
-        typeconstraint = widenconst(c2)
-        val = stmt.args[3]
+"""
+    lift_comparison!(cmp, compact::IncrementalCompact, idx::Int, stmt::Expr)
+
+Replaces `cmp(φ(x, y)::Union{X,Y}, constant)` by `φ(cmp(x, constant), cmp(y, constant))`,
+where `cmp(x, constant)` and `cmp(y, constant)` can be replaced with constant `Bool`eans.
+It helps codegen avoid generating expensive code for `cmp` with `Union` types.
+In particular, this is supposed to improve the performance of the iteration protocol:
+```julia
+while x !== nothing
+    x = iterate(...)::Union{Nothing,Tuple{Any,Any}}
+end
+```
+"""
+function lift_comparison! end
+
+function lift_comparison!(::typeof(===), compact::IncrementalCompact,
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    args = stmt.args
+    length(args) == 3 || return
+    lhs, rhs = args[2], args[3]
+    vl = argextype(lhs, compact)
+    vr = argextype(rhs, compact)
+    if isa(vl, Const)
+        isa(vr, Const) && return
+        val = rhs
+        cmp = vl
+    elseif isa(vr, Const)
+        val = lhs
+        cmp = vr
     else
-        cmp = c2
-        typeconstraint = widenconst(c1)
-        val = stmt.args[2]
+        return
     end
+    lift_comparison_leaves!(egal_tfunc, compact, val, cmp, lifting_cache, idx)
+end
 
-    is_type_only = isdefined(typeof(cmp), :instance)
+function lift_comparison!(::typeof(isa), compact::IncrementalCompact,
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    args = stmt.args
+    length(args) == 3 || return
+    cmp = argextype(args[3], compact)
+    val = args[2]
+    lift_comparison_leaves!(isa_tfunc, compact, val, cmp, lifting_cache, idx)
+end
+
+function lift_comparison!(::typeof(isdefined), compact::IncrementalCompact,
+    idx::Int, stmt::Expr, lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue})
+    args = stmt.args
+    length(args) == 3 || return
+    cmp = argextype(args[3], compact)
+    isa(cmp, Const) || return # `isdefined_tfunc` won't return Const
+    val = args[2]
+    lift_comparison_leaves!(isdefined_tfunc, compact, val, cmp, lifting_cache, idx)
+end
 
+function lift_comparison_leaves!(@specialize(tfunc),
+    compact::IncrementalCompact, @nospecialize(val), @nospecialize(cmp),
+    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, idx::Int)
+    typeconstraint = widenconst(argextype(val, compact))
     if isa(val, Union{OldSSAValue, SSAValue})
         val, typeconstraint = simple_walk_constraint(compact, val, typeconstraint)
     end
-
-    visited_phinodes = Any[]
-    leaves = walk_to_defs(compact, val, typeconstraint, visited_phinodes)
-
-    # Let's check if we evaluate the comparison for each one of the leaves
-    lifted_leaves = IdDict{Any, Any}()
-    for leaf in leaves
-        r = egal_tfunc(compact_exprtype(compact, leaf), cmp)
-        if isa(r, Const)
-            lifted_leaves[leaf] = RefValue{Any}(r.val)
+    isa(typeconstraint, Union) || return # bail out if there won't be a good chance for lifting
+    leaves, visited_phinodes = collect_leaves(compact, val, typeconstraint)
+    length(leaves) ≤ 1 && return # bail out if we don't have multiple leaves
+
+    # check if we can evaluate the comparison for each one of the leaves
+    lifted_leaves = nothing
+    for i = 1:length(leaves)
+        leaf = leaves[i]
+        result = tfunc(argextype(leaf, compact), cmp)
+        if isa(result, Const)
+            if lifted_leaves === nothing
+                lifted_leaves = LiftedLeaves()
+            end
+            lifted_leaves[leaf] = LiftedValue(result.val)
         else
-            # TODO: In some cases it might be profitable to hoist the ===
-            # here.
-            return
+            return # TODO In some cases it might be profitable to hoist the comparison here
         end
     end
 
-    lifted_val = perform_lifting!(compact, visited_phinodes, cmp, lifting_cache, Bool, lifted_leaves, val)
-    @assert lifted_val !== nothing
+    # perform lifting
+    lifted_val = perform_lifting!(compact,
+        visited_phinodes, cmp, lifting_cache, Bool,
+        lifted_leaves::LiftedLeaves, val, nothing)::LiftedValue
 
-    #global assertion_counter
-    #assertion_counter::Int += 1
-    #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), lifted_val), nothing, 0, true)
-    #return
     compact[idx] = lifted_val.x
 end
 
 struct LiftedPhi
     ssa::AnySSAValue
-    node::Any
+    node::PhiNode
     need_argupdate::Bool
 end
 
@@ -449,40 +592,88 @@ function is_old(compact, @nospecialize(old_node_ssa))
         !already_inserted(compact, old_node_ssa)
 end
 
+mutable struct LazyDomtree
+    ir::IRCode
+    domtree::DomTree
+    LazyDomtree(ir::IRCode) = new(ir)
+end
+function get(x::LazyDomtree)
+    isdefined(x, :domtree) && return x.domtree
+    return @timeit "domtree 2" x.domtree = construct_domtree(x.ir.cfg.blocks)
+end
+
 function perform_lifting!(compact::IncrementalCompact,
-        visited_phinodes::Vector{Any}, @nospecialize(cache_key),
-        lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
-        @nospecialize(result_t), lifted_leaves::IdDict{Any, Any}, @nospecialize(stmt_val))
-    reverse_mapping = IdDict{Any, Any}(ssa => id for (id, ssa) in enumerate(visited_phinodes))
+    visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key),
+    lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue},
+    @nospecialize(result_t), lifted_leaves::LiftedLeaves, @nospecialize(stmt_val),
+    lazydomtree::Union{LazyDomtree,Nothing})
+    reverse_mapping = IdDict{AnySSAValue, Int}(ssa => id for (id, ssa) in enumerate(visited_phinodes))
+
+    # Check if all the lifted leaves are the same
+    local the_leaf
+    all_same = true
+    for (_, val) in lifted_leaves
+        if !@isdefined(the_leaf)
+            the_leaf = val
+            continue
+        end
+        if val !== the_leaf
+            all_same = false
+        end
+    end
+
+    the_leaf_val = isa(the_leaf, LiftedValue) ? the_leaf.x : nothing
+    if !isa(the_leaf_val, SSAValue)
+        all_same = false
+    end
+
+    if all_same
+        dominates_all = true
+        if lazydomtree !== nothing
+            domtree = get(lazydomtree)
+            for item in visited_phinodes
+                if !dominates_ssa(compact, domtree, the_leaf_val, item)
+                    dominates_all = false
+                    break
+                end
+            end
+            if dominates_all
+                return the_leaf
+            end
+        end
+    end
 
     # Insert PhiNodes
     lifted_phis = LiftedPhi[]
     for item in visited_phinodes
-        if (item, cache_key) in keys(lifting_cache)
-            ssa = lifting_cache[Pair{AnySSAValue, Any}(item, cache_key)]
-            push!(lifted_phis, LiftedPhi(ssa, compact[ssa], false))
+        # FIXME this cache is broken somehow
+        # ckey = Pair{AnySSAValue, Any}(item, cache_key)
+        # cached = ckey in keys(lifting_cache)
+        cached = false
+        if cached
+            ssa = lifting_cache[ckey]
+            push!(lifted_phis, LiftedPhi(ssa, compact[ssa]::PhiNode, false))
             continue
         end
         n = PhiNode()
-        ssa = insert_node!(compact, item, result_t, n)
-        lifting_cache[Pair{AnySSAValue, Any}(item, cache_key)] = ssa
+        ssa = insert_node!(compact, item, effect_free(NewInstruction(n, result_t)))
+        # lifting_cache[ckey] = ssa
         push!(lifted_phis, LiftedPhi(ssa, n, true))
     end
 
     # Fix up arguments
     for (old_node_ssa, lf) in zip(visited_phinodes, lifted_phis)
-        old_node = compact[old_node_ssa]
+        old_node = compact[old_node_ssa]::PhiNode
         new_node = lf.node
         lf.need_argupdate || continue
         for i = 1:length(old_node.edges)
             edge = old_node.edges[i]
             isassigned(old_node.values, i) || continue
             val = old_node.values[i]
-            orig_val = val
             if is_old(compact, old_node_ssa) && isa(val, SSAValue)
                 val = OldSSAValue(val.id)
             end
-            if isa(val, Union{NewSSAValue, SSAValue, OldSSAValue})
+            if isa(val, AnySSAValue)
                 val = simple_walk(compact, val)
             end
             if val in keys(lifted_leaves)
@@ -492,22 +683,20 @@ function perform_lifting!(compact::IncrementalCompact,
                     resize!(new_node.values, length(new_node.values)+1)
                     continue
                 end
-                lifted_val = lifted_val.x
-                if isa(lifted_val, Union{NewSSAValue, SSAValue, OldSSAValue})
-                    lifted_val = simple_walk(compact, lifted_val, (pi, idx)->true)
+                val = lifted_val.x
+                if isa(val, AnySSAValue)
+                    callback = (@nospecialize(pi), @nospecialize(idx)) -> true
+                    val = simple_walk(compact, val, callback)
                 end
-                push!(new_node.values, lifted_val)
-            elseif isa(val, Union{NewSSAValue, SSAValue, OldSSAValue}) && val in keys(reverse_mapping)
+                push!(new_node.values, val)
+            elseif isa(val, AnySSAValue) && val in keys(reverse_mapping)
                 push!(new_node.edges, edge)
                 push!(new_node.values, lifted_phis[reverse_mapping[val]].ssa)
             else
                 # Probably ignored by path condition, skip this
             end
         end
-    end
-
-    for lf in lifted_phis
-        count_added_node!(compact, lf.node)
+        count_added_node!(compact, new_node)
     end
 
     # Fixup the stmt itself
@@ -516,81 +705,77 @@ function perform_lifting!(compact::IncrementalCompact,
     end
 
     if stmt_val in keys(lifted_leaves)
-        stmt_val = lifted_leaves[stmt_val]
-    else
-        isa(stmt_val, Union{SSAValue, OldSSAValue}) && stmt_val in keys(reverse_mapping)
-        stmt_val = RefValue{Any}(lifted_phis[reverse_mapping[stmt_val]].ssa)
+        return lifted_leaves[stmt_val]
+    elseif isa(stmt_val, AnySSAValue) && stmt_val in keys(reverse_mapping)
+        return LiftedValue(lifted_phis[reverse_mapping[stmt_val]].ssa)
     end
 
-    return stmt_val
+    return stmt_val # N.B. should never happen
 end
 
-assertion_counter = 0
-function getfield_elim_pass!(ir::IRCode)
+# NOTE we use `IdSet{Int}` instead of `BitSet` for in these passes since they work on IR after inlining,
+# which can be very large sometimes, and program counters in question are often very sparse
+const SPCSet = IdSet{Int}
+
+"""
+    sroa_pass!(ir::IRCode) -> newir::IRCode
+
+`getfield` elimination pass, a.k.a. Scalar Replacements of Aggregates optimization.
+
+This pass is based on a local field analysis by def-use chain walking.
+It looks for struct allocation sites ("definitions"), and `getfield` calls as well as
+`:foreigncall`s that preserve the structs ("usages"). If "definitions" have enough information,
+then this pass will replace corresponding usages with forwarded values.
+`mutable struct`s require additional cares and need to be handled separately from immutables.
+For `mutable struct`s, `setfield!` calls account for "definitions" also, and the pass should
+give up the lifting conservatively when there are any "intermediate usages" that may escape
+the mutable struct (e.g. non-inlined generic function call that takes the mutable struct as
+its argument).
+
+In a case when all usages are fully eliminated, `struct` allocation may also be erased as
+a result of succeeding dead code elimination.
+"""
+function sroa_pass!(ir::IRCode)
     compact = IncrementalCompact(ir)
-    insertions = Vector{Any}()
-    defuses = IdDict{Int, Tuple{IdSet{Int}, SSADefUse}}()
+    defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations
     lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}()
-    revisit_worklist = Int[]
-    #ndone, nmax = 0, 200
+    # initialization of domtree is delayed to avoid the expensive computation in many cases
+    lazydomtree = LazyDomtree(ir)
     for ((_, idx), stmt) in compact
+        # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement)
         isa(stmt, Expr) || continue
-        #ndone >= nmax && continue
-        #ndone += 1
-        result_t = compact_exprtype(compact, SSAValue(idx))
-        is_getfield = is_setfield = false
-        is_ccall = false
-        is_unchecked = false
-        # Step 1: Check whether the statement we're looking at is a getfield/setfield!
+        is_setfield = is_isdefined = false
+        field_ordering = :unspecified
         if is_known_call(stmt, setfield!, compact)
-            is_setfield = true
             4 <= length(stmt.args) <= 5 || continue
+            is_setfield = true
+            if length(stmt.args) == 5
+                field_ordering = argextype(stmt.args[5], compact)
+            end
         elseif is_known_call(stmt, getfield, compact)
-            is_getfield = true
+            3 <= length(stmt.args) <= 5 || continue
+            if length(stmt.args) == 5
+                field_ordering = argextype(stmt.args[5], compact)
+            elseif length(stmt.args) == 4
+                field_ordering = argextype(stmt.args[4], compact)
+                widenconst(field_ordering) === Bool && (field_ordering = :unspecified)
+            end
+        elseif is_known_call(stmt, isdefined, compact)
             3 <= length(stmt.args) <= 4 || continue
-        elseif is_known_call(stmt, isa, compact)
-            # TODO
-            continue
-        elseif is_known_call(stmt, typeassert, compact)
-            # Canonicalize
-            #   X = typeassert(Y, T)::S
-            # into
-            #   typeassert(Y, T)
-            #   X = PiNode(Y, S)
-            # N.B.: Inference may have a more precise type for `S`, than
-            #       just T, but from here on out, there's no problem with
-            #       using just using that.
-            # so subsequent analysis only has to deal with the latter
-            # form. TODO: This isn't the best place to put this.
-            # Also, we should probably have a version of typeassert
-            # that's defined not to return its value to make life easier
-            # for the backend.
-            pi = insert_node_here!(compact,
-                PiNode(stmt.args[2], compact.result[idx][:type]),
-                compact.result[idx][:type],
-                compact.result[idx][:line], true)
-            compact.ssa_rename[compact.idx-1] = pi
-            continue
-        elseif is_known_call(stmt, (===), compact)
-            c1 = compact_exprtype(compact, stmt.args[2])
-            c2 = compact_exprtype(compact, stmt.args[3])
-            if !(isa(c1, Const) || isa(c2, Const))
-                continue
+            is_isdefined = true
+            if length(stmt.args) == 4
+                field_ordering = argextype(stmt.args[4], compact)
+                widenconst(field_ordering) === Bool && (field_ordering = :unspecified)
             end
-            (isa(c1, Const) && isa(c2, Const)) && continue
-            lift_comparison!(compact, idx, c1, c2, stmt, lifting_cache)
-            continue
-        elseif isexpr(stmt, :call) && stmt.args[1] === :unchecked_getfield
-            is_getfield = true
-            is_unchecked = true
         elseif isexpr(stmt, :foreigncall)
             nccallargs = length(stmt.args[3]::SimpleVector)
+            preserved = Int[]
             new_preserves = Any[]
-            old_preserves = stmt.args[(6+nccallargs):end]
-            for (pidx, preserved_arg) in enumerate(old_preserves)
+            for pidx in (6+nccallargs):length(stmt.args)
+                preserved_arg = stmt.args[pidx]
                 isa(preserved_arg, SSAValue) || continue
-                let intermediaries = IdSet()
-                    callback = function(@nospecialize(pi), ssa::AnySSAValue)
+                let intermediaries = SPCSet()
+                    callback = function (@nospecialize(pi), @nospecialize(ssa))
                         push!(intermediaries, ssa.id)
                         return false
                     end
@@ -598,146 +783,160 @@ function getfield_elim_pass!(ir::IRCode)
                     isa(def, SSAValue) || continue
                     defidx = def.id
                     def = compact[defidx]
-                    if is_tuple_call(compact, def)
-                        process_immutable_preserve(new_preserves, compact, def)
-                        old_preserves[pidx] = nothing
+                    if is_known_call(def, tuple, compact)
+                        record_immutable_preserve!(new_preserves, def, compact)
+                        push!(preserved, preserved_arg.id)
                         continue
                     elseif isexpr(def, :new)
-                        typ = widenconst(compact_exprtype(compact, SSAValue(defidx)))
-                        if isa(typ, UnionAll)
-                            typ = unwrap_unionall(typ)
-                        end
-                        if typ isa DataType && !typ.mutable
-                            process_immutable_preserve(new_preserves, compact, def)
-                            old_preserves[pidx] = nothing
+                        typ = unwrap_unionall(widenconst(argextype(SSAValue(defidx), compact)))
+                        if typ isa DataType && !ismutabletype(typ)
+                            record_immutable_preserve!(new_preserves, def, compact)
+                            push!(preserved, preserved_arg.id)
                             continue
                         end
                     else
                         continue
                     end
-                    mid, defuse = get!(defuses, defidx, (IdSet{Int}(), SSADefUse()))
-                    push!(defuse.ccall_preserve_uses, idx)
+                    if defuses === nothing
+                        defuses = IdDict{Int, Tuple{SPCSet, SSADefUse}}()
+                    end
+                    mid, defuse = get!(()->(SPCSet(),SSADefUse()), defuses, defidx)
+                    push!(defuse.uses, PreserveUse(idx))
                     union!(mid, intermediaries)
                 end
                 continue
             end
             if !isempty(new_preserves)
-                old_preserves = filter(ssa->ssa !== nothing, old_preserves)
-                new_expr = Expr(:foreigncall, stmt.args[1:(6+nccallargs-1)]...,
-                    old_preserves..., new_preserves...)
-                compact[idx] = new_expr
+                compact[idx] = nothing
+                compact[idx] = form_new_preserves(stmt, preserved, new_preserves)
             end
             continue
-        else
+        else # TODO: This isn't the best place to put these
+            if is_known_call(stmt, typeassert, compact)
+                canonicalize_typeassert!(compact, idx, stmt)
+            elseif is_known_call(stmt, (===), compact)
+                lift_comparison!(===, compact, idx, stmt, lifting_cache)
+            elseif is_known_call(stmt, isa, compact)
+                lift_comparison!(isa, compact, idx, stmt, lifting_cache)
+            end
             continue
         end
-        ## Normalize the field argument to getfield/setfield
-        field = stmt.args[3]
-        isa(field, QuoteNode) && (field = field.value)
-        isa(field, Union{Int, Symbol}) || continue
 
-        struct_typ = unwrap_unionall(widenconst(compact_exprtype(compact, stmt.args[2])))
+        # analyze this `getfield` / `isdefined` / `setfield!` call
+
+        field = try_compute_field_stmt(compact, stmt)
+        field === nothing && continue
+
+        val = stmt.args[2]
+
+        struct_typ = unwrap_unionall(widenconst(argextype(val, compact)))
+        if isa(struct_typ, Union) && struct_typ <: Tuple
+            struct_typ = unswitchtupleunion(struct_typ)
+        end
+        if isa(struct_typ, Union) && is_isdefined
+            lift_comparison!(isdefined, compact, idx, stmt, lifting_cache)
+            continue
+        end
         isa(struct_typ, DataType) || continue
 
-        def, typeconstraint = stmt.args[2], struct_typ
+        struct_typ.name.atomicfields == C_NULL || continue # TODO: handle more
+        if !((field_ordering === :unspecified) ||
+             (field_ordering isa Const && field_ordering.val === :not_atomic))
+            continue
+        end
 
-        if struct_typ.mutable
-            isa(def, SSAValue) || continue
-            let intermediaries = IdSet()
-                callback = function(@nospecialize(pi), ssa::AnySSAValue)
+        # analyze this mutable struct here for the later pass
+        if ismutabletype(struct_typ)
+            isa(val, SSAValue) || continue
+            let intermediaries = SPCSet()
+                callback = function (@nospecialize(pi), @nospecialize(ssa))
                     push!(intermediaries, ssa.id)
                     return false
                 end
-                def = simple_walk(compact, def, callback)
+                def = simple_walk(compact, val, callback)
                 # Mutable stuff here
                 isa(def, SSAValue) || continue
-                mid, defuse = get!(defuses, def.id, (IdSet{Int}(), SSADefUse()))
+                if defuses === nothing
+                    defuses = IdDict{Int, Tuple{SPCSet, SSADefUse}}()
+                end
+                mid, defuse = get!(()->(SPCSet(),SSADefUse()), defuses, def.id)
                 if is_setfield
                     push!(defuse.defs, idx)
+                elseif is_isdefined
+                    push!(defuse.uses, IsdefinedUse(idx))
                 else
-                    push!(defuse.uses, idx)
+                    push!(defuse.uses, GetfieldUse(idx))
                 end
                 union!(mid, intermediaries)
             end
             continue
         elseif is_setfield
-            continue
+            continue # invalid `setfield!` call, but just ignore here
+        elseif is_isdefined
+            continue # TODO?
         end
 
-        if isa(def, Union{OldSSAValue, SSAValue})
-            def, typeconstraint = simple_walk_constraint(compact, def, typeconstraint)
-        end
+        # perform SROA on immutable structs here on
 
-        visited_phinodes = Any[]
-        leaves = walk_to_defs(compact, def, typeconstraint, visited_phinodes)
+        field = try_compute_fieldidx(struct_typ, field)
+        field === nothing && continue
 
+        leaves, visited_phinodes = collect_leaves(compact, val, struct_typ)
         isempty(leaves) && continue
 
-        field = try_compute_fieldidx_expr(struct_typ, stmt)
-        field === nothing && continue
-
-        r = lift_leaves(compact, stmt, result_t, field, leaves)
-        r === nothing && continue
-        lifted_leaves, any_undef = r
+        result_t = argextype(SSAValue(idx), compact)
+        lifted_result = lift_leaves(compact, result_t, field, leaves)
+        lifted_result === nothing && continue
+        lifted_leaves, any_undef = lifted_result
 
         if any_undef
             result_t = make_MaybeUndef(result_t)
         end
 
-#        @Base.show result_t
-#        @Base.show stmt
-#        for (k,v) in lifted_leaves
-#            @Base.show (k, v)
-#            if isa(k, AnySSAValue)
-#                @Base.show compact[k]
-#            end
-#            if isa(v, RefValue) && isa(v.x, AnySSAValue)
-#                @Base.show compact[v.x]
-#            end
-#        end
-        val = perform_lifting!(compact, visited_phinodes, field, lifting_cache, result_t, lifted_leaves, stmt.args[2])
+        val = perform_lifting!(compact,
+            visited_phinodes, field, lifting_cache, result_t, lifted_leaves, val, lazydomtree)
 
         # Insert the undef check if necessary
-        if any_undef && !is_unchecked
+        if any_undef
             if val === nothing
-                insert_node!(compact, SSAValue(idx), Nothing, Expr(:throw_undef_if_not, Symbol("##getfield##"), false))
+                insert_node!(compact, SSAValue(idx),
+                    non_effect_free(NewInstruction(Expr(:throw_undef_if_not, Symbol("##getfield##"), false), Nothing)))
             else
-                insert_node!(compact, SSAValue(idx), Nothing, Expr(:undefcheck, Symbol("##getfield##"), val.x))
+                # val must be defined
             end
         else
             @assert val !== nothing
         end
 
-        global assertion_counter
-        assertion_counter::Int += 1
-        #insert_node_here!(compact, Expr(:assert_egal, Symbol(string("assert_egal_", assertion_counter)), SSAValue(idx), val), nothing, 0, true)
-        #continue
         compact[idx] = val === nothing ? nothing : val.x
     end
 
-
     non_dce_finish!(compact)
-    # Copy the use count, `simple_dce!` may modify it and for our predicate
-    # below we need it consistent with the state of the IR here (after tracking
-    # phi node arguments, but before dce).
-    used_ssas = copy(compact.used_ssas)
-    simple_dce!(compact)
-    ir = complete(compact)
-
-    # Compute domtree, needed below, now that we have finished compacting the
-    # IR. This needs to be after we iterate through the IR with
-    # `IncrementalCompact` because removing dead blocks can invalidate the
-    # domtree.
-    @timeit "domtree 2" domtree = construct_domtree(ir.cfg)
-
-    # Now go through any mutable structs and see which ones we can eliminate
+    if defuses !== nothing
+        # now go through analyzed mutable structs and see which ones we can eliminate
+        # NOTE copy the use count here, because `simple_dce!` may modify it and we need it
+        # consistent with the state of the IR here (after tracking `PhiNode` arguments,
+        # but before the DCE) for our predicate within `sroa_mutables!`, but we also
+        # try an extra effort using a callback so that reference counts are updated
+        used_ssas = copy(compact.used_ssas)
+        simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1)
+        ir = complete(compact)
+        sroa_mutables!(ir, defuses, used_ssas, lazydomtree)
+        return ir
+    else
+        simple_dce!(compact)
+        return complete(compact)
+    end
+end
+
+function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree)
     for (idx, (intermediaries, defuse)) in defuses
         intermediaries = collect(intermediaries)
         # Check if there are any uses we did not account for. If so, the variable
         # escapes and we cannot eliminate the allocation. This works, because we're guaranteed
         # not to include any intermediaries that have dead uses. As a result, missing uses will only ever
         # show up in the nuses_total count.
-        nleaves = length(defuse.uses) + length(defuse.defs) + length(defuse.ccall_preserve_uses)
+        nleaves = length(defuse.uses) + length(defuse.defs)
         nuses = 0
         for idx in intermediaries
             nuses += used_ssas[idx]
@@ -745,112 +944,210 @@ function getfield_elim_pass!(ir::IRCode)
         nuses_total = used_ssas[idx] + nuses - length(intermediaries)
         nleaves == nuses_total || continue
         # Find the type for this allocation
-        defexpr = ir[SSAValue(idx)]
+        defexpr = ir[SSAValue(idx)][:inst]
         isexpr(defexpr, :new) || continue
-        typ = ir.stmts[idx][:type]
-        if isa(typ, UnionAll)
-            typ = unwrap_unionall(typ)
-        end
-        # Could still end up here if we tried to setfield! and immutable, which would
+        newidx = idx
+        typ = unwrap_unionall(ir.stmts[newidx][:type])
+        # Could still end up here if we tried to setfield! on an immutable, which would
         # error at runtime, but is not illegal to have in the IR.
-        typ.mutable || continue
+        ismutabletype(typ) || continue
+        typ = typ::DataType
         # Partition defuses by field
         fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)]
-        ok = true
+        all_eliminated = all_forwarded = true
         for use in defuse.uses
-            stmt = ir[SSAValue(use)]
+            if use.kind === :preserve
+                for du in fielddefuse
+                    push!(du.uses, use)
+                end
+                continue
+            end
+            stmt = ir[SSAValue(use.idx)][:inst] # == `getfield`/`isdefined` call
             # We may have discovered above that this use is dead
             # after the getfield elim of immutables. In that case,
             # it would have been deleted. That's fine, just ignore
             # the use in that case.
-            stmt === nothing && continue
-            field = try_compute_fieldidx_expr(typ, stmt)
-            field === nothing && (ok = false; break)
+            if stmt === nothing
+                all_forwarded = false
+                continue
+            end
+            field = try_compute_fieldidx_stmt(ir, stmt::Expr, typ)
+            field === nothing && @goto skip
             push!(fielddefuse[field].uses, use)
         end
-        ok || continue
-        for use in defuse.defs
-            field = try_compute_fieldidx_expr(typ, ir[SSAValue(use)])
-            field === nothing && (ok = false; break)
-            push!(fielddefuse[field].defs, use)
+        for def in defuse.defs
+            stmt = ir[SSAValue(def)][:inst]::Expr # == `setfield!` call
+            field = try_compute_fieldidx_stmt(ir, stmt, typ)
+            field === nothing && @goto skip
+            isconst(typ, field) && @goto skip # we discovered an attempt to mutate a const field, which must error
+            push!(fielddefuse[field].defs, def)
         end
-        ok || continue
         # Check that the defexpr has defined values for all the fields
         # we're accessing. In the future, we may want to relax this,
         # but we should come up with semantics for well defined semantics
         # for uninitialized fields first.
-        for (fidx, du) in pairs(fielddefuse)
+        ndefuse = length(fielddefuse)
+        blocks = Vector{Tuple{#=phiblocks=# Vector{Int}, #=allblocks=# Vector{Int}}}(undef, ndefuse)
+        for fidx in 1:ndefuse
+            du = fielddefuse[fidx]
             isempty(du.uses) && continue
+            push!(du.defs, newidx)
+            ldu = compute_live_ins(ir.cfg, du)
+            if isempty(ldu.live_in_bbs)
+                phiblocks = Int[]
+            else
+                phiblocks = iterated_dominance_frontier(ir.cfg, ldu, get(lazydomtree))
+            end
+            allblocks = sort(vcat(phiblocks, ldu.def_bbs))
+            blocks[fidx] = phiblocks, allblocks
             if fidx + 1 > length(defexpr.args)
-                ok = false
-                break
+                for i = 1:length(du.uses)
+                    use = du.uses[i]
+                    if use.kind === :isdefined
+                        if has_safe_def(ir, get(lazydomtree), allblocks, du, newidx, use.idx)
+                            ir[SSAValue(use.idx)][:inst] = true
+                        else
+                            all_eliminated = false
+                        end
+                        continue
+                    elseif use.kind === :preserve
+                        if length(du.defs) == 1 # allocation with this field unintialized
+                            # there is nothing to preserve, just ignore this use
+                            du.uses[i] = NoPreserve()
+                            continue
+                        end
+                    end
+                    has_safe_def(ir, get(lazydomtree), allblocks, du, newidx, use.idx) || @goto skip
+                end
+            else # always have some definition at the allocation site
+                for i = 1:length(du.uses)
+                    use = du.uses[i]
+                    if use.kind === :isdefined
+                        ir[SSAValue(use.idx)][:inst] = true
+                    end
+                end
             end
         end
-        ok || continue
-        preserve_uses = IdDict{Int, Vector{Any}}((idx=>Any[] for idx in IdSet{Int}(defuse.ccall_preserve_uses)))
-        # Everything accounted for. Go field by field and perform idf
-        for (fidx, du) in pairs(fielddefuse)
+        # Everything accounted for. Go field by field and perform idf:
+        # Compute domtree now, needed below, now that we have finished compacting the IR.
+        # This needs to be after we iterate through the IR with `IncrementalCompact`
+        # because removing dead blocks can invalidate the domtree.
+        domtree = get(lazydomtree)
+        local preserve_uses = nothing
+        for fidx in 1:ndefuse
+            du = fielddefuse[fidx]
             ftyp = fieldtype(typ, fidx)
             if !isempty(du.uses)
-                push!(du.defs, idx)
-                ldu = compute_live_ins(ir.cfg, du)
-                phiblocks = Int[]
-                if !isempty(ldu.live_in_bbs)
-                    phiblocks = idf(ir.cfg, ldu, domtree)
-                end
+                phiblocks, allblocks = blocks[fidx]
                 phinodes = IdDict{Int, SSAValue}()
                 for b in phiblocks
-                    n = PhiNode()
-                    phinodes[b] = insert_node!(ir, first(ir.cfg.blocks[b].stmts), ftyp, n)
+                    phinodes[b] = insert_node!(ir, first(ir.cfg.blocks[b].stmts),
+                        NewInstruction(PhiNode(), ftyp))
                 end
                 # Now go through all uses and rewrite them
-                allblocks = sort(vcat(phiblocks, ldu.def_bbs))
-                for stmt in du.uses
-                    ir[SSAValue(stmt)] = compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, stmt)
-                end
-                if !isbitstype(fieldtype(typ, fidx))
-                    for (use, list) in preserve_uses
-                        push!(list, compute_value_for_use(ir, domtree, allblocks, du, phinodes, fidx, use))
+                for use in du.uses
+                    if use.kind === :getfield
+                        ir[SSAValue(use.idx)][:inst] = compute_value_for_use(ir, domtree, allblocks,
+                            du, phinodes, fidx, use.idx)
+                    elseif use.kind === :isdefined
+                        continue # already rewritten if possible
+                    elseif use.kind === :nopreserve
+                        continue # nothing to preserve (may happen when there are unintialized fields)
+                    elseif use.kind === :preserve
+                        newval = compute_value_for_use(ir, domtree, allblocks,
+                            du, phinodes, fidx, use.idx)
+                        if !isbitstype(widenconst(argextype(newval, ir)))
+                            if preserve_uses === nothing
+                                preserve_uses = IdDict{Int, Vector{Any}}()
+                            end
+                            push!(get!(()->Any[], preserve_uses, use.idx), newval)
+                        end
+                    else
+                        @assert false "sroa_mutables!: unexpected use"
                     end
                 end
                 for b in phiblocks
+                    n = ir[phinodes[b]][:inst]::PhiNode
                     for p in ir.cfg.blocks[b].preds
-                        n = ir[phinodes[b]]
                         push!(n.edges, p)
                         push!(n.values, compute_value_for_block(ir, domtree,
                             allblocks, du, phinodes, fidx, p))
                     end
                 end
             end
+            all_eliminated || continue
+            # all "usages" (i.e. `getfield` and `isdefined` calls) are eliminated,
+            # now eliminate "definitions" (`setfield!`) calls
+            # (NOTE the allocation itself will be eliminated by DCE pass later)
             for stmt in du.defs
-                stmt == idx && continue
-                ir[SSAValue(stmt)] = nothing
+                stmt == newidx && continue
+                ir[SSAValue(stmt)][:inst] = nothing
             end
-            continue
         end
-        isempty(defuse.ccall_preserve_uses) && continue
-        push!(intermediaries, idx)
+        preserve_uses === nothing && continue
+        if all_forwarded
+            # this means all ccall preserves have been replaced with forwarded loads
+            # so we can potentially eliminate the allocation, otherwise we must preserve
+            # the whole allocation.
+            push!(intermediaries, newidx)
+        end
         # Insert the new preserves
-        for (use, new_preserves) in preserve_uses
-            useexpr = ir[SSAValue(use)]
-            nccallargs = length(useexpr.args[3]::SimpleVector)
-            old_preserves = let intermediaries = intermediaries
-                filter(ssa->!isa(ssa, SSAValue) || !(ssa.id in intermediaries), useexpr.args[(6+nccallargs):end])
-            end
-            new_expr = Expr(:foreigncall, useexpr.args[1:(6+nccallargs-1)]...,
-                old_preserves..., new_preserves...)
-            ir[SSAValue(use)] = new_expr
+        for (useidx, new_preserves) in preserve_uses
+            ir[SSAValue(useidx)][:inst] = form_new_preserves(ir[SSAValue(useidx)][:inst]::Expr,
+                intermediaries, new_preserves)
         end
+
+        @label skip
     end
-    ir
 end
 
-function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int)
+function form_new_preserves(origex::Expr, intermediates::Vector{Int}, new_preserves::Vector{Any})
+    newex = Expr(:foreigncall)
+    nccallargs = length(origex.args[3]::SimpleVector)
+    for i in 1:(6+nccallargs-1)
+        push!(newex.args, origex.args[i])
+    end
+    for i in (6+nccallargs):length(origex.args)
+        x = origex.args[i]
+        # don't need to preserve intermediaries
+        if isa(x, SSAValue) && x.id in intermediates
+            continue
+        end
+        push!(newex.args, x)
+    end
+    for i in 1:length(new_preserves)
+        push!(newex.args, new_preserves[i])
+    end
+    return newex
+end
+
+"""
+    canonicalize_typeassert!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+
+Canonicalizes `X = typeassert(Y, T)::S` into `typeassert(Y, T); X = π(Y, S)`
+so that subsequent analysis only has to deal with the latter form.
+
+N.B. Inference may have a more precise type for `S`, than just `T`, but from here on out,
+there's no problem with just using that.
+We should probably have a version of `typeassert` that's defined not to return its value to
+make life easier for the backend.
+"""
+function canonicalize_typeassert!(compact::IncrementalCompact, idx::Int, stmt::Expr)
+    length(stmt.args) == 3 || return
+    pi = insert_node_here!(compact,
+        NewInstruction(
+            PiNode(stmt.args[2], compact.result[idx][:type]),
+            compact.result[idx][:type],
+            compact.result[idx][:line]), true)
+    compact.ssa_rename[compact.idx-1] = pi
+end
+
+function adce_erase!(phi_uses::Vector{Int}, extra_worklist::Vector{Int}, compact::IncrementalCompact, idx::Int, in_worklist::Bool)
     # return whether this made a change
     if isa(compact.result[idx][:inst], PhiNode)
-        return maybe_erase_unused!(extra_worklist, compact, idx, val -> phi_uses[val.id] -= 1)
+        return maybe_erase_unused!(extra_worklist, compact, idx, in_worklist, val::SSAValue -> phi_uses[val.id] -= 1)
     else
-        return maybe_erase_unused!(extra_worklist, compact, idx)
+        return maybe_erase_unused!(extra_worklist, compact, idx, in_worklist)
     end
 end
 
@@ -863,7 +1160,7 @@ function count_uses(@nospecialize(stmt), uses::Vector{Int})
     end
 end
 
-function mark_phi_cycles(compact::IncrementalCompact, safe_phis::BitSet, phi::Int)
+function mark_phi_cycles!(compact::IncrementalCompact, safe_phis::SPCSet, phi::Int)
     worklist = Int[]
     push!(worklist, phi)
     while !isempty(worklist)
@@ -879,38 +1176,145 @@ function mark_phi_cycles(compact::IncrementalCompact, safe_phis::BitSet, phi::In
     end
 end
 
+function is_some_union(@nospecialize(t))
+    isa(t, MaybeUndef) && (t = t.typ)
+    return isa(t, Union)
+end
+
+function is_union_phi(compact::IncrementalCompact, idx::Int)
+    inst = compact.result[idx]
+    return isa(inst[:inst], PhiNode) && is_some_union(inst[:type])
+end
+
+"""
+    adce_pass!(ir::IRCode) -> newir::IRCode
+
+Aggressive Dead Code Elimination pass.
+
+In addition to a simple DCE for unused values and allocations,
+this pass also nullifies `typeassert` calls that can be proved to be no-op,
+in order to allow LLVM to emit simpler code down the road.
+
+Note that this pass is more effective after SROA optimization (i.e. `sroa_pass!`),
+since SROA often allows this pass to:
+- eliminate allocation of object whose field references are all replaced with scalar values, and
+- nullify `typeassert` call whose first operand has been replaced with a scalar value
+  (, which may have introduced new type information that inference did not understand)
+
+Also note that currently this pass _needs_ to run after `sroa_pass!`, because
+the `typeassert` elimination depends on the transformation by `canonicalize_typeassert!` done
+within `sroa_pass!` which redirects references of `typeassert`ed value to the corresponding `PiNode`.
+"""
 function adce_pass!(ir::IRCode)
     phi_uses = fill(0, length(ir.stmts) + length(ir.new_nodes))
     all_phis = Int[]
+    unionphis = Pair{Int,Any}[] # sorted
     compact = IncrementalCompact(ir)
     for ((_, idx), stmt) in compact
         if isa(stmt, PhiNode)
             push!(all_phis, idx)
+            if is_some_union(compact.result[idx][:type])
+                push!(unionphis, Pair{Int,Any}(idx, Union{}))
+            end
+        elseif isa(stmt, PiNode)
+            val = stmt.val
+            if isa(val, SSAValue) && is_union_phi(compact, val.id)
+                r = searchsorted(unionphis, val.id; by = first)
+                if !isempty(r)
+                    unionphi = unionphis[first(r)]
+                    t = tmerge(unionphi[2], stmt.typ)
+                    unionphis[first(r)] = Pair{Int,Any}(unionphi[1], t)
+                end
+            end
+        else
+            if is_known_call(stmt, typeassert, compact) && length(stmt.args) == 3
+                # nullify safe `typeassert` calls
+                ty, isexact = instanceof_tfunc(argextype(stmt.args[3], compact))
+                if isexact && argextype(stmt.args[2], compact) ⊑ ty
+                    compact[idx] = nothing
+                    continue
+                end
+            end
+            for ur in userefs(stmt)
+                use = ur[]
+                if isa(use, SSAValue) && is_union_phi(compact, use.id)
+                    r = searchsorted(unionphis, use.id; by = first)
+                    if !isempty(r)
+                        deleteat!(unionphis, first(r))
+                    end
+                end
+            end
         end
     end
     non_dce_finish!(compact)
     for phi in all_phis
-        count_uses(compact.result[phi][:inst]::PhiNode, phi_uses)
+        inst = compact.result[phi]
+        for ur in userefs(inst[:inst]::PhiNode)
+            use = ur[]
+            if isa(use, SSAValue)
+                phi_uses[use.id] += 1
+                stmt = compact.result[use.id][:inst]
+                if isa(stmt, PhiNode)
+                    r = searchsorted(unionphis, use.id; by=first)
+                    if !isempty(r)
+                        unionphi = unionphis[first(r)]
+                        unionphis[first(r)] = Pair{Int,Any}(unionphi[1],
+                            tmerge(unionphi[2], inst[:type]))
+                    end
+                end
+            end
+        end
+    end
+    # Narrow any union phi nodes that have unused branches
+    for i = 1:length(unionphis)
+        unionphi = unionphis[i]
+        phi = unionphi[1]
+        t = unionphi[2]
+        if t === Union{}
+            compact.result[phi][:inst] = nothing
+            continue
+        elseif t === Any
+            continue
+        elseif compact.result[phi][:type] ⊑ t
+            continue
+        end
+        to_drop = Int[]
+        stmt = compact[phi]
+        stmt === nothing && continue
+        stmt = stmt::PhiNode
+        for i = 1:length(stmt.values)
+            if !isassigned(stmt.values, i)
+                # Should be impossible to have something used only by PiNodes that's undef
+                push!(to_drop, i)
+            elseif !hasintersect(widenconst(argextype(stmt.values[i], compact)),
+                                 widenconst(t))
+                push!(to_drop, i)
+            end
+        end
+        compact.result[phi][:type] = t
+        isempty(to_drop) && continue
+        deleteat!(stmt.values, to_drop)
+        deleteat!(stmt.edges, to_drop)
     end
     # Perform simple DCE for unused values
     extra_worklist = Int[]
     for (idx, nused) in Iterators.enumerate(compact.used_ssas)
         idx >= compact.result_idx && break
         nused == 0 || continue
-        adce_erase!(phi_uses, extra_worklist, compact, idx)
+        adce_erase!(phi_uses, extra_worklist, compact, idx, false)
     end
     while !isempty(extra_worklist)
-        adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist))
+        adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
     end
     # Go back and erase any phi cycles
     changed = true
     while changed
         changed = false
-        safe_phis = BitSet()
+        safe_phis = SPCSet()
         for phi in all_phis
             # Save any phi cycles that have non-phi uses
             if compact.used_ssas[phi] - phi_uses[phi] != 0
-                mark_phi_cycles(compact, safe_phis, phi)
+                mark_phi_cycles!(compact, safe_phis, phi)
             end
         end
         for phi in all_phis
@@ -919,7 +1323,7 @@ function adce_pass!(ir::IRCode)
             end
         end
         while !isempty(extra_worklist)
-            if adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist))
+            if adce_erase!(phi_uses, extra_worklist, compact, pop!(extra_worklist), true)
                 changed = true
             end
         end
@@ -928,24 +1332,36 @@ function adce_pass!(ir::IRCode)
 end
 
 function type_lift_pass!(ir::IRCode)
-    type_ctx_uses = Vector{Vector{Int}}[]
-    has_non_type_ctx_uses = IdSet{Int}()
     lifted_undef = IdDict{Int, Any}()
     insts = ir.stmts
     for idx in 1:length(insts)
         stmt = insts[idx][:inst]
         stmt isa Expr || continue
         if (stmt.head === :isdefined || stmt.head === :undefcheck)
-            val = (stmt.head === :isdefined) ? stmt.args[1] : stmt.args[2]
-            # undef can only show up by being introduced in a phi
-            # node (or an UpsilonNode() argument to a PhiC node),
-            # so lift all these nodes that have maybe undef values
+            # after optimization, undef can only show up by being introduced in
+            # a phi node (or an UpsilonNode() argument to a PhiC node), so lift
+            # all these nodes that have maybe undef values
+            val = stmt.args[(stmt.head === :isdefined) ? 1 : 2]
+            if stmt.head === :isdefined && (val isa Slot || val isa GlobalRef ||
+                    isexpr(val, :static_parameter) || val isa Argument || val isa Symbol)
+                # this is a legal node, so assume it was not introduced by
+                # slot2ssa (at worst, we might leave in a runtime check that
+                # shouldn't have been there)
+                continue
+            end
+            # otherwise, we definitely have a corrupt node from slot2ssa, and
+            # must fix or delete that now
             processed = IdDict{Int, Union{SSAValue, Bool}}()
-            while isa(val, SSAValue) && isa(insts[val.id][:inst], PiNode)
-                val = (insts[val.id][:inst]::PiNode).val
+            def = val
+            while true
+                # peek through PiNodes
+                isa(val, SSAValue) || break
+                def = insts[val.id][:inst]
+                isa(def, PiNode) || break
+                val = def.val
             end
-            if !isa(val, SSAValue) || (!isa(insts[val.id][:inst], PhiNode) && !isa(insts[val.id][:inst], PhiCNode))
-                (isa(val, GlobalRef) || isexpr(val, :static_parameter)) && continue
+            if !isa(val, SSAValue) || (!isa(def, PhiNode) && !isa(def, PhiCNode))
+                # in most cases, reaching this statement implies we had a value
                 if stmt.head === :undefcheck
                     insts[idx][:inst] = nothing
                 else
@@ -955,7 +1371,6 @@ function type_lift_pass!(ir::IRCode)
             end
             stmt_id = val.id
             worklist = Tuple{Int, Int, SSAValue, Int}[(stmt_id, 0, SSAValue(0), 0)]
-            def = insts[stmt_id][:inst]
             if !haskey(lifted_undef, stmt_id)
                 first = true
                 while !isempty(worklist)
@@ -964,10 +1379,19 @@ function type_lift_pass!(ir::IRCode)
                     if isa(def, PhiNode)
                         edges = copy(def.edges)
                         values = Vector{Any}(undef, length(edges))
-                        new_phi = length(values) == 0 ? false : insert_node!(ir, item, Bool, PhiNode(edges, values))
+                        new_phi = if length(values) == 0
+                            false
+                        else
+                            insert_node!(ir, item, NewInstruction(PhiNode(edges, values), Bool))
+                        end
                     else
+                        def = def::PhiCNode
                         values = Vector{Any}(undef, length(def.values))
-                        new_phi = length(values) == 0 ? false : insert_node!(ir, item, Bool, PhiCNode(values))
+                        new_phi = if length(values) == 0
+                            false
+                        else
+                            insert_node!(ir, item, NewInstruction(PhiCNode(values), Bool))
+                        end
                     end
                     processed[item] = new_phi
                     if first
@@ -981,7 +1405,7 @@ function type_lift_pass!(ir::IRCode)
                         elseif !isa(def.values[i], SSAValue)
                             val = true
                         else
-                            up_id = id = def.values[i].id
+                            up_id = id = (def.values[i]::SSAValue).id
                             @label restart
                             if !isa(ir.stmts[id][:type], MaybeUndef)
                                 val = true
@@ -993,7 +1417,7 @@ function type_lift_pass!(ir::IRCode)
                                     elseif !isa(node.val, SSAValue)
                                         val = true
                                     else
-                                        id = node.val.id
+                                        id = (node.val::SSAValue).id
                                         @goto restart
                                     end
                                 else
@@ -1005,7 +1429,7 @@ function type_lift_pass!(ir::IRCode)
                                         if haskey(processed, id)
                                             val = processed[id]
                                         else
-                                            push!(worklist, (id, up_id, new_phi, i))
+                                            push!(worklist, (id, up_id, new_phi::SSAValue, i))
                                             continue
                                         end
                                     else
@@ -1017,34 +1441,69 @@ function type_lift_pass!(ir::IRCode)
                         if isa(def, PhiNode)
                             values[i] = val
                         else
-                            values[i] = insert_node!(ir, up_id, Bool, UpsilonNode(val))
+                            values[i] = insert_node!(ir, up_id, NewInstruction(UpsilonNode(val), Bool))
                         end
                     end
                     if which !== SSAValue(0)
-                        phi = ir[which]
+                        phi = ir[which][:inst]
                         if isa(phi, PhiNode)
                             phi.values[use] = new_phi
                         else
                             phi = phi::PhiCNode
-                            phi.values[use] = insert_node!(ir, w_up_id, Bool, UpsilonNode(new_phi))
+                            phi.values[use] = insert_node!(ir, w_up_id, NewInstruction(UpsilonNode(new_phi), Bool))
                         end
                     end
                 end
             end
-            if stmt.head === :isdefined
-                insts[idx][:inst] = lifted_undef[stmt_id]
-            else
-                insts[idx][:inst] = Expr(:throw_undef_if_not, stmt.args[1], lifted_undef[stmt_id])
+            inst = lifted_undef[stmt_id]
+            if stmt.head === :undefcheck
+                inst = Expr(:throw_undef_if_not, stmt.args[1], inst)
             end
+            insts[idx][:inst] = inst
         end
     end
     ir
 end
 
+function is_bb_empty(ir::IRCode, bb::BasicBlock)
+    isempty(bb.stmts) && return true
+    if length(bb.stmts) == 1
+        stmt = ir[SSAValue(first(bb.stmts))][:inst]
+        return stmt === nothing || isa(stmt, GotoNode)
+    end
+    return false
+end
+
+# TODO: This is terrible, we should change the IR for GotoIfNot to gain an else case
+function is_legal_bb_drop(ir::IRCode, bbidx::Int, bb::BasicBlock)
+    # If the block we're going to is the same as the fallthrow, it's always legal to drop
+    # the block.
+    length(bb.stmts) == 0 && return true
+    if length(bb.stmts) == 1
+        stmt = ir[SSAValue(first(bb.stmts))][:inst]
+        stmt === nothing && return true
+        ((stmt::GotoNode).label == bbidx + 1) && return true
+    end
+    # Otherwise make sure we're not the fallthrough case of any predecessor
+    for pred in bb.preds
+        if pred == bbidx - 1
+            terminator = ir[SSAValue(first(bb.stmts)-1)][:inst]
+            if isa(terminator, GotoIfNot)
+                if terminator.dest != bbidx
+                    return false
+                end
+            end
+            break
+        end
+    end
+    return true
+end
+
 function cfg_simplify!(ir::IRCode)
     bbs = ir.cfg.blocks
     merge_into = zeros(Int, length(bbs))
     merged_succ = zeros(Int, length(bbs))
+    dropped_bbs = Vector{Int}() # sorted
     function follow_merge_into(idx::Int)
         while merge_into[idx] != 0
             idx = merge_into[idx]
@@ -1069,6 +1528,27 @@ function cfg_simplify!(ir::IRCode)
                     merge_into[succ] = idx
                     merged_succ[idx] = succ
                 end
+            elseif is_bb_empty(ir, bb) && is_legal_bb_drop(ir, idx, bb)
+                # If this BB is empty, we can still merge it as long as none of our successor's phi nodes
+                # reference our predecessors.
+                found_interference = false
+                for idx in bbs[succ].stmts
+                    stmt = ir[SSAValue(idx)][:inst]
+                    stmt === nothing && continue
+                    isa(stmt, PhiNode) || break
+                    for edge in stmt.edges
+                        for pred in bb.preds
+                            if pred == edge
+                                found_interference = true
+                                @goto done
+                            end
+                        end
+                    end
+                end
+                @label done
+                if !found_interference
+                    push!(dropped_bbs, idx)
+                end
             end
         end
     end
@@ -1085,6 +1565,10 @@ function cfg_simplify!(ir::IRCode)
         if i != 1 && length(ir.cfg.blocks[i].preds) == 0
             bb_rename_succ[i] = -1
         end
+        # Mark dropped blocks for fixup
+        if !isempty(searchsorted(dropped_bbs, i))
+            bb_rename_succ[i] = -bbs[i].succs[1]
+        end
 
         bb_rename_succ[i] != 0 && continue
 
@@ -1101,6 +1585,30 @@ function cfg_simplify!(ir::IRCode)
                 break
             end
             curr += 1
+            if !isempty(searchsorted(dropped_bbs, curr))
+                break
+            end
+        end
+    end
+
+    # Compute map from new to old blocks
+    result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1]
+
+    # Fixup dropped BBs
+    resolved_all = false
+    while !resolved_all
+        # TODO: There are faster ways to do this
+        resolved_all = true
+        for bb in dropped_bbs
+            obb = bb_rename_succ[bb]
+            if obb < -1
+                nsucc = bb_rename_succ[-obb]
+                if nsucc == -1
+                    nsucc = -merge_into[-obb]
+                end
+                bb_rename_succ[bb] = nsucc
+                resolved_all = false
+            end
         end
     end
 
@@ -1113,13 +1621,14 @@ function cfg_simplify!(ir::IRCode)
             bb_rename_pred[i] = -1
             continue
         end
-        bbnum = follow_merge_into(i)
+        pred = i
+        while pred !== 1 && !isempty(searchsorted(dropped_bbs, pred))
+            pred = bbs[pred].preds[1]
+        end
+        bbnum = follow_merge_into(pred)
         bb_rename_pred[i] = bb_rename_succ[bbnum]
     end
 
-    # Compute map from new to old blocks
-    result_bbs = Int[findfirst(j->i==j, bb_rename_succ) for i = 1:max_bb_num-1]
-
     # Compute new block lengths
     result_bbs_lengths = zeros(Int, max_bb_num-1)
     for (idx, orig_bb) in enumerate(result_bbs)
@@ -1146,12 +1655,12 @@ function cfg_simplify!(ir::IRCode)
         # Compute (renamed) successors and predecessors given (renamed) block
         function compute_succs(i)
             orig_bb = follow_merged_succ(result_bbs[i])
-            return map(i -> bb_rename_succ[i], bbs[orig_bb].succs)
+            return Int[bb_rename_succ[i] for i in bbs[orig_bb].succs]
         end
         function compute_preds(i)
             orig_bb = result_bbs[i]
             preds = bbs[orig_bb].preds
-            return map(pred -> bb_rename_pred[pred], preds)
+            return Int[bb_rename_pred[pred] for pred in preds]
         end
 
         BasicBlock[
@@ -1163,6 +1672,25 @@ function cfg_simplify!(ir::IRCode)
             for i = 1:length(result_bbs)]
     end
 
+    # Fixup terminators for any blocks that would have caused double edges
+    for (bbidx, (new_bb, old_bb)) in enumerate(zip(cresult_bbs, result_bbs))
+        @assert length(new_bb.succs) <= 2
+        length(new_bb.succs) <= 1 && continue
+        if new_bb.succs[1] == new_bb.succs[2]
+            terminator = ir[SSAValue(last(bbs[old_bb].stmts))]
+            @assert isa(terminator[:inst], GotoIfNot)
+            terminator[:inst] = GotoNode(terminator[:inst].dest)
+            pop!(new_bb.succs)
+            new_succ = cresult_bbs[new_bb.succs[1]]
+            for (i, nsp) in enumerate(new_succ.preds)
+                if nsp == bbidx
+                    deleteat!(new_succ.preds, i)
+                    break
+                end
+            end
+        end
+    end
+
     compact = IncrementalCompact(ir, true)
     # Run instruction compaction to produce the result,
     # but we're messing with the CFG
diff --git a/base/compiler/ssair/queries.jl b/base/compiler/ssair/queries.jl
deleted file mode 100644
index 6a6ac89c91e7c6..00000000000000
--- a/base/compiler/ssair/queries.jl
+++ /dev/null
@@ -1,87 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Determine whether a statement is side-effect-free, i.e. may be removed if it has no uses.
-"""
-function stmt_effect_free(@nospecialize(stmt), @nospecialize(rt), src, sptypes::Vector{Any})
-    isa(stmt, PiNode) && return true
-    isa(stmt, PhiNode) && return true
-    isa(stmt, ReturnNode) && return false
-    isa(stmt, GotoNode) && return false
-    isa(stmt, GotoIfNot) && return false
-    isa(stmt, Slot) && return false # Slots shouldn't occur in the IR at this point, but let's be defensive here
-    isa(stmt, GlobalRef) && return isdefined(stmt.mod, stmt.name)
-    if isa(stmt, Expr)
-        e = stmt::Expr
-        head = e.head
-        if head === :static_parameter
-            etyp = sptypes[e.args[1]]
-            # if we aren't certain enough about the type, it might be an UndefVarError at runtime
-            return isa(etyp, Const)
-        end
-        ea = e.args
-        if head === :call
-            f = argextype(ea[1], src, sptypes)
-            f = singleton_type(f)
-            f === nothing && return false
-            is_return_type(f) && return true
-            if isa(f, IntrinsicFunction)
-                intrinsic_effect_free_if_nothrow(f) || return false
-                return intrinsic_nothrow(f,
-                        Any[argextype(ea[i], src, sptypes) for i = 2:length(ea)])
-            end
-            contains_is(_PURE_BUILTINS, f) && return true
-            contains_is(_PURE_OR_ERROR_BUILTINS, f) || return false
-            rt === Bottom && return false
-            return _builtin_nothrow(f, Any[argextype(ea[i], src, sptypes) for i = 2:length(ea)], rt)
-        elseif head === :new
-            a = ea[1]
-            typ = argextype(a, src, sptypes)
-            # `Expr(:new)` of unknown type could raise arbitrary TypeError.
-            typ, isexact = instanceof_tfunc(typ)
-            isexact || return false
-            isconcretedispatch(typ) || return false
-            typ = typ::DataType
-            fieldcount(typ) >= length(ea) - 1 || return false
-            for fld_idx in 1:(length(ea) - 1)
-                eT = argextype(ea[fld_idx + 1], src, sptypes)
-                fT = fieldtype(typ, fld_idx)
-                eT ⊑ fT || return false
-            end
-            return true
-        elseif head === :isdefined || head === :the_exception || head === :copyast || head === :inbounds || head === :boundscheck
-            return true
-        else
-            # e.g. :loopinfo
-            return false
-        end
-    end
-    return true
-end
-
-function abstract_eval_ssavalue(s::SSAValue, src::IRCode)
-    return types(src)[s]
-end
-
-function abstract_eval_ssavalue(s::SSAValue, src::IncrementalCompact)
-    return types(src)[s]
-end
-
-function compact_exprtype(compact::IncrementalCompact, @nospecialize(value))
-    if isa(value, AnySSAValue)
-        return types(compact)[value]
-    elseif isa(value, Argument)
-        return compact.ir.argtypes[value.n]
-    end
-    return argextype(value, compact.ir, compact.ir.sptypes)
-end
-
-is_tuple_call(ir::IRCode, @nospecialize(def)) = isa(def, Expr) && is_known_call(def, tuple, ir, ir.sptypes)
-is_tuple_call(compact::IncrementalCompact, @nospecialize(def)) = isa(def, Expr) && is_known_call(def, tuple, compact)
-function is_known_call(e::Expr, @nospecialize(func), src::IncrementalCompact)
-    if e.head !== :call
-        return false
-    end
-    f = compact_exprtype(src, e.args[1])
-    return singleton_type(f) === func
-end
diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl
index 861c75c7d888b6..f4c826a45156fd 100644
--- a/base/compiler/ssair/show.jl
+++ b/base/compiler/ssair/show.jl
@@ -47,7 +47,7 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         # XXX: this is wrong if `sig` is not a concretetype method
         # more correct would be to use `fieldtype(sig, i)`, but that would obscure / discard Varargs information in show
         sig = linfo.specTypes == Tuple ? Core.svec() : Base.unwrap_unionall(linfo.specTypes).parameters::Core.SimpleVector
-        print_arg(i) = sprint() do io
+        print_arg(i) = sprint(; context=io) do io
             show_unquoted(io, stmt.args[i], indent)
             if (i - 1) <= length(sig)
                 print(io, "::", sig[i - 1])
@@ -64,6 +64,10 @@ function print_stmt(io::IO, idx::Int, @nospecialize(stmt), used::BitSet, maxleng
         show_unquoted_phinode(io, stmt, indent, "#")
     elseif stmt isa GotoIfNot
         show_unquoted_gotoifnot(io, stmt, indent, "#")
+    elseif stmt isa TypedSlot
+        # call `show` with the type set to Any so it will not be shown, since
+        # we will show the type ourselves.
+        show_unquoted(io, SlotNumber(stmt.id), indent, show_type ? prec_decl : 0)
     # everything else in the IR, defer to the generic AST printer
     else
         show_unquoted(io, stmt, indent, show_type ? prec_decl : 0)
@@ -75,14 +79,15 @@ show_unquoted(io::IO, val::Argument, indent::Int, prec::Int) = show_unquoted(io,
 
 show_unquoted(io::IO, stmt::PhiNode, indent::Int, ::Int) = show_unquoted_phinode(io, stmt, indent, "%")
 function show_unquoted_phinode(io::IO, stmt::PhiNode, indent::Int, prefix::String)
-    args = map(1:length(stmt.edges)) do i
+    args = String[let
         e = stmt.edges[i]
         v = !isassigned(stmt.values, i) ? "#undef" :
-            sprint() do io′
+            sprint(; context=io) do io′
                 show_unquoted(io′, stmt.values[i], indent)
             end
-        return "$prefix$e => $v"
-    end
+        "$prefix$e => $v"
+        end for i in 1:length(stmt.edges)
+    ]
     print(io, "φ ", '(')
     join(io, args, ", ")
     print(io, ')')
@@ -188,7 +193,7 @@ example (taken from `@code_typed sin(1.0)`):
 ```
 
 The three annotations are indicated with `*`. The first one is the line number of the
-active function (printed once whenver the outer most line number changes). The second
+active function (printed once whenever the outer most line number changes). The second
 is the inlining indicator. The number of lines indicate the level of nesting, with a
 half-size line (╷) indicating the start of a scope and a full size line (│) indicating
 a continuing scope. The last annotation is the most complicated one. It is a heuristic
@@ -197,7 +202,7 @@ scope that hasn't been printed before. Let's work a number of examples to see th
 and tradeoffs involved.
 
 ```
-f() = leaf_function() # Delibarately not defined to end up in the IR verbatim
+f() = leaf_function() # Deliberately not defined to end up in the IR verbatim
 g() = f()
 h() = g()
 top_function() = h()
@@ -371,20 +376,40 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                 nctx = i
             end
             update_line_only::Bool = false
-            if collapse && 0 < nctx
-                # check if we're adding more frames with the same method name,
-                # if so, drop all existing calls to it from the top of the context
-                # AND check if instead the context was previously printed that way
-                # but now has removed the recursive frames
-                let method = method_name(context[nctx])
-                    if (nctx < nframes && method_name(DI[nframes - nctx]) === method) ||
-                       (nctx < length(context) && method_name(context[nctx + 1]) === method)
-                        update_line_only = true
-                        while nctx > 0 && method_name(context[nctx]) === method
-                            nctx -= 1
+            if collapse
+                if nctx > 0
+                    # check if we're adding more frames with the same method name,
+                    # if so, drop all existing calls to it from the top of the context
+                    # AND check if instead the context was previously printed that way
+                    # but now has removed the recursive frames
+                    let method = method_name(context[nctx]) # last matching frame
+                        if (nctx < nframes && method_name(DI[nframes - nctx]) === method) ||
+                           (nctx < length(context) && method_name(context[nctx + 1]) === method)
+                            update_line_only = true
+                            while nctx > 0 && method_name(context[nctx]) === method
+                                nctx -= 1
+                            end
+                        end
+                    end
+                end
+                # look at the first non-matching element to see if we are only changing the line number
+                if !update_line_only && nctx < length(context) && nctx < nframes
+                    let CtxLine = context[nctx + 1],
+                        FrameLine = DI[nframes - nctx]
+                        if method_name(CtxLine) === method_name(FrameLine)
+                            update_line_only = true
                         end
                     end
                 end
+            elseif nctx < length(context) && nctx < nframes
+                # look at the first non-matching element to see if we are only changing the line number
+                let CtxLine = context[nctx + 1],
+                    FrameLine = DI[nframes - nctx]
+                    if CtxLine.file === FrameLine.file &&
+                            method_name(CtxLine) === method_name(FrameLine)
+                        update_line_only = true
+                    end
+                end
             end
             # examine what frames we're returning from
             if nctx < length(context)
@@ -400,16 +425,6 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                     end
                 else
                     npops = length(context) - nctx
-                    # look at the first non-matching element to see if we are only changing the line number
-                    if !update_line_only && nctx < nframes
-                        let CtxLine = context[nctx + 1],
-                            FrameLine = DI[nframes - nctx]
-                            if CtxLine.file === FrameLine.file &&
-                                    method_name(CtxLine) === method_name(FrameLine)
-                                update_line_only = true
-                            end
-                        end
-                    end
                 end
                 resize!(context, nctx)
                 update_line_only && (npops -= 1)
@@ -448,7 +463,7 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
                     if frame.line != typemax(frame.line) && frame.line != 0
                         print(io, ":", frame.line)
                     end
-                    print(io, " within `", method_name(frame), "'")
+                    print(io, " within `", method_name(frame), "`")
                     if collapse
                         method = method_name(frame)
                         while nctx < nframes
@@ -480,31 +495,81 @@ function DILineInfoPrinter(linetable::Vector, showtypes::Bool=false)
     return emit_lineinfo_update
 end
 
+# line_info_preprinter(io::IO, indent::String, idx::Int) may print relevant info
+#   at the beginning of the line, and should at least print `indent`. It returns a
+#   string that will be printed after the final basic-block annotation.
+# line_info_postprinter(io::IO, typ, used::Bool) prints the type-annotation at the end
+#   of the statement
+# should_print_stmt(idx::Int) -> Bool: whether the statement at index `idx` should be
+#   printed as part of the IR or not
+# bb_color: color used for printing the basic block brackets on the left
+struct IRShowConfig
+    line_info_preprinter
+    line_info_postprinter
+    should_print_stmt
+    bb_color::Symbol
+    function IRShowConfig(line_info_preprinter, line_info_postprinter=default_expr_type_printer;
+                          should_print_stmt=Returns(true), bb_color::Symbol=:light_black)
+        return new(line_info_preprinter, line_info_postprinter, should_print_stmt, bb_color)
+    end
+end
 
-function show_ir(io::IO, code::IRCode, expr_type_printer=default_expr_type_printer; verbose_linetable=false)
-    cols = (displaysize(io)::Tuple{Int,Int})[2]
-    used = BitSet()
+struct _UNDEF
+    global const UNDEF = _UNDEF.instance
+end
+
+function _stmt(code::IRCode, idx::Int)
     stmts = code.stmts
-    isempty(stmts) && return # unlikely, but avoid errors from reducing over empty sets
-    cfg = code.cfg
-    max_bb_idx_size = length(string(length(cfg.blocks)))
-    new_nodes = code.new_nodes.stmts
-    new_nodes_info = code.new_nodes.info
-    bb_idx = 1
-    for stmt in stmts
-        scan_ssa_use!(push!, used, stmt[:inst])
-    end
-    if any(i -> !isassigned(new_nodes.inst, i), 1:length(new_nodes))
-        printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
-        new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
-    else
-        new_nodes_perm = collect(1:length(new_nodes))
-    end
-    for nn in new_nodes_perm
-        scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+    return isassigned(stmts.inst, idx) ? stmts[idx][:inst] : UNDEF
+end
+function _stmt(code::CodeInfo, idx::Int)
+    code = code.code
+    return isassigned(code, idx) ? code[idx] : UNDEF
+end
+
+function _type(code::IRCode, idx::Int)
+    stmts = code.stmts
+    return isassigned(stmts.type, idx) ? stmts[idx][:type] : UNDEF
+end
+function _type(code::CodeInfo, idx::Int)
+    types = code.ssavaluetypes
+    types isa Vector{Any} || return nothing
+    return isassigned(types, idx) ? types[idx] : UNDEF
+end
+
+function statement_indices_to_labels(stmt, cfg::CFG)
+    # convert statement index to labels, as expected by print_stmt
+    if stmt isa Expr
+        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
+            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
+        end
+    elseif isa(stmt, GotoIfNot)
+        stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
+    elseif stmt isa GotoNode
+        stmt = GotoNode(block_for_inst(cfg, stmt.label))
+    elseif stmt isa PhiNode
+        e = stmt.edges
+        stmt = PhiNode(Int32[block_for_inst(cfg, Int(e[i])) for i in 1:length(e)], stmt.values)
     end
-    sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
-    perm_idx = 1
+    return stmt
+end
+
+# Show a single statement, code.stmts[idx]/code.code[idx], in the context of the whole IRCode/CodeInfo.
+# Returns the updated value of bb_idx.
+# pop_new_node!(idx::Int) -> (node_idx, new_node_inst, new_node_type) may return a new
+#   node at the current index `idx`, which is printed before the statement at index
+#   `idx`. This function is repeatedly called until it returns `nothing`
+function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, config::IRShowConfig,
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing))
+    return show_ir_stmt(io, code, idx, config.line_info_preprinter, config.line_info_postprinter,
+                        used, cfg, bb_idx; pop_new_node!, config.bb_color)
+end
+
+function show_ir_stmt(io::IO, code::Union{IRCode, CodeInfo}, idx::Int, line_info_preprinter, line_info_postprinter,
+                      used::BitSet, cfg::CFG, bb_idx::Int; pop_new_node! = Returns(nothing), bb_color=:light_black)
+    stmt = _stmt(code, idx)
+    type = _type(code, idx)
+    max_bb_idx_size = length(string(length(cfg.blocks)))
 
     if isempty(used)
         maxlength_idx = 0
@@ -512,255 +577,233 @@ function show_ir(io::IO, code::IRCode, expr_type_printer=default_expr_type_print
         maxused = maximum(used)
         maxlength_idx = length(string(maxused))
     end
-    if !verbose_linetable
-        (loc_annotations, loc_methods, loc_lineno) = compute_ir_line_annotations(code)
-        max_loc_width = maximum(length(str) for str in loc_annotations)
-        max_lineno_width = maximum(length(str) for str in loc_lineno)
-        max_method_width = maximum(length(str) for str in loc_methods)
+
+    if stmt === UNDEF
+        # This is invalid, but do something useful rather
+        # than erroring, to make debugging easier
+        printstyled(io, "#UNDEF\n", color=:red)
+        return bb_idx
     end
-    max_depth = maximum(compute_inlining_depth(code.linetable, stmts[i][:line]) for i in 1:length(stmts.line))
-    last_stack = []
-    for idx in 1:length(stmts)
-        if !isassigned(stmts.inst, idx)
-            # This is invalid, but do something useful rather
-            # than erroring, to make debugging easier
-            printstyled(io, "#UNDEF\n", color=:red)
-            continue
-        end
-        stmt = stmts[idx]
+
+    i = 1
+    while true
+        next = pop_new_node!(idx)
         # Compute BB guard rail
         if bb_idx > length(cfg.blocks)
-            # Even if invariants are violated, try our best to still print
-            bbrange = (length(cfg.blocks) == 0 ? 1 : last(cfg.blocks[end].stmts) + 1):typemax(Int)
-            bb_idx_str = "!"
-            bb_type = "─"
+            # If invariants are violated, print a special leader
+            linestart = " "^(max_bb_idx_size + 2) # not inside a basic block bracket
+            inlining_indent = line_info_preprinter(io, linestart, i == 1 ? idx : 0)
+            printstyled(io, "!!! ", "─"^max_bb_idx_size, color=bb_color)
         else
             bbrange = cfg.blocks[bb_idx].stmts
             bbrange = bbrange.start:bbrange.stop
-            bb_idx_str = string(bb_idx)
-            bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
-        end
-        bb_pad = max_bb_idx_size - length(bb_idx_str)
-        bb_start_str = string(bb_idx_str, " ", bb_type, "─"^bb_pad, " ")
-        bb_guard_rail_cont = string("│  ", " "^max_bb_idx_size)
-        if idx == first(bbrange)
-            bb_guard_rail = bb_start_str
-        else
-            bb_guard_rail = bb_guard_rail_cont
-        end
-        # Print linetable information
-        if verbose_linetable
-            stack = compute_loc_stack(code.linetable, stmt[:line])
-            # We need to print any stack frames that did not exist in the last stack
-            ndepth = max(1, length(stack))
-            rail = string(" "^(max_depth+1-ndepth), "│"^ndepth)
-            start_column = cols - max_depth - 10
-            for (i, x) in enumerate(stack)
-                if i > length(last_stack) || last_stack[i] != x
-                    entry = code.linetable[x]
-                    printstyled(io, "\e[$(start_column)G$(rail)\e[1G", color = :light_black)
-                    print(io, bb_guard_rail)
-                    ssa_guard = " "^(maxlength_idx + 4 + (i - 1))
-                    entry_label = "$(ssa_guard)$(method_name(entry)) at $(entry.file):$(entry[:line]) "
-                    hline = string("─"^(start_column-length(entry_label)-length(bb_guard_rail)+max_depth-i), "┐")
-                    printstyled(io, string(entry_label, hline), "\n"; color=:light_black)
-                    bb_guard_rail = bb_guard_rail_cont
-                end
-            end
-            printstyled(io, "\e[$(start_column)G$(rail)\e[1G", color = :light_black)
-            last_stack = stack
-        else
-            if idx <= length(loc_annotations)
-                # N.B.: The line array length not matching is invalid,
-                # but let's be robust here
-                annotation = loc_annotations[idx]
-                loc_method = loc_methods[idx]
-                lineno = loc_lineno[idx]
+            # Print line info update
+            linestart = idx == first(bbrange) ? "  " : sprint(io -> printstyled(io, "│ ", color=bb_color), context=io)
+            linestart *= " "^max_bb_idx_size
+            # idx == 0 means only indentation is printed, so we don't print linfos
+            # multiple times if the are new nodes
+            inlining_indent = line_info_preprinter(io, linestart, i == 1 ? idx : 0)
+
+            if i == 1 && idx == first(bbrange)
+                bb_idx_str = string(bb_idx)
+                bb_pad = max_bb_idx_size - length(bb_idx_str)
+                bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
+                printstyled(io, bb_idx_str, " ", bb_type, "─"^bb_pad, color=bb_color)
+            elseif next === nothing && idx == last(bbrange) # print separator
+                printstyled(io, "└", "─"^(1 + max_bb_idx_size), color=bb_color)
             else
-                annotation = "!"
-                loc_method = ""
-                lineno = ""
-            end
-            # Print location information right aligned. If the line below is too long, it'll overwrite this,
-            # but that's what we want.
-            if get(io, :color, false)
-                method_start_column = cols - max_method_width - max_loc_width - 2
-                filler = " "^(max_loc_width-length(annotation))
-                printstyled(io, "\e[$(method_start_column)G$(annotation)$(filler)$(loc_method)\e[1G", color = :light_black)
+                printstyled(io, "│ ", " "^max_bb_idx_size, color=bb_color)
             end
-            printstyled(io, lineno, " "^(max_lineno_width - length(lineno) + 1); color = :light_black)
         end
-        idx != last(bbrange) && print(io, bb_guard_rail)
-        print_sep = false
-        if idx == last(bbrange)
-            print_sep = true
+        print(io, inlining_indent, " ")
+
+        if next === nothing
+            if bb_idx <= length(cfg.blocks) && idx == last(bbrange)
+                bb_idx += 1
+            end
+            break
         end
-        floop = true
+
         # print new nodes first in the right position
-        while perm_idx <= length(new_nodes_perm)
-            node_idx = new_nodes_perm[perm_idx]
-            if new_nodes_info[node_idx].pos != idx
-                break
-            end
-            perm_idx += 1
-            if !floop && !verbose_linetable
-                print(io, " "^(max_lineno_width + 1))
-            end
-            if print_sep
-                if idx == first(bbrange) && floop
-                    print(io, bb_start_str)
-                else
-                    print(io, "│  ", " "^max_bb_idx_size)
-                end
-            end
-            print_sep = true
-            floop = false
-            new_node = new_nodes[node_idx]
-            node_idx += length(stmts)
-            show_type = should_print_ssa_type(new_node[:inst])
+        node_idx, new_node_inst, new_node_type = next
+
+        @assert new_node_inst !== UNDEF # we filtered these out earlier
+        show_type = should_print_ssa_type(new_node_inst)
+        let maxlength_idx=maxlength_idx, show_type=show_type
             with_output_color(:green, io) do io′
-                print_stmt(io′, node_idx, new_node[:inst], used, maxlength_idx, false, show_type)
-            end
-            if !isassigned(stmts.type, idx) # try to be robust against errors
-                printstyled(io, "::#UNDEF", color=:red)
-            elseif show_type
-                expr_type_printer(io, new_node[:type], node_idx in used)
-            end
-            println(io)
-        end
-        if !floop && !verbose_linetable
-            print(io, " "^(max_lineno_width + 1))
-        end
-        if print_sep
-            if idx == first(bbrange) && floop
-                print(io, bb_start_str)
-            elseif idx == last(bbrange)
-                print(io, "└", "─"^(1 + max_bb_idx_size), " ")
-            else
-                print(io, "│  ", " "^max_bb_idx_size)
+                print_stmt(io′, node_idx, new_node_inst, used, maxlength_idx, false, show_type)
             end
         end
-        if idx == last(bbrange)
-            bb_idx += 1
-        end
-        show_type = should_print_ssa_type(stmt[:inst])
-        print_stmt(io, idx, stmt[:inst], used, maxlength_idx, true, show_type)
-        if !isassigned(stmts.type, idx) # try to be robust against errors
+
+        if new_node_type === UNDEF # try to be robust against errors
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            expr_type_printer(io, stmt[:type], idx in used)
+            line_info_postprinter(IOContext(io, :idx => node_idx), new_node_type, node_idx in used)
         end
         println(io)
+        i += 1
     end
-end
-
-# Show a single statement, code.code[idx], in the context of the whole CodeInfo.
-# Returns the updated value of bb_idx.
-# line_info_preprinter(io::IO, indent::String, idx::Int) may print relevant info
-#   at the beginning of the line, and should at least print `indent`. It returns a
-#   string that will be printed after the final basic-block annotation.
-# line_info_postprinter(io::IO, typ, used::Bool) prints the type-annotation at the end
-#   of the statement
-function show_ir_stmt(io::IO, code::CodeInfo, idx::Int, line_info_preprinter, line_info_postprinter, used::BitSet, cfg::CFG, bb_idx::Int)
-    ds = get(io, :displaysize, (24, 80))::Tuple{Int,Int}
-    cols = ds[2]
-    stmts = code.code
-    types = code.ssavaluetypes
-    max_bb_idx_size = length(string(length(cfg.blocks)))
-
-    if isempty(used)
-        maxlength_idx = 0
-    else
-        maxused = maximum(used)
-        maxlength_idx = length(string(maxused))
-    end
-
-    if !isassigned(stmts, idx)
-        # This is invalid, but do something useful rather
-        # than erroring, to make debugging easier
-        printstyled(io, "#UNDEF\n", color=:red)
-        return bb_idx
-    end
-    stmt = stmts[idx]
-    # Compute BB guard rail
-    if bb_idx > length(cfg.blocks)
-        # If invariants are violated, print a special leader
-        linestart = " "^(max_bb_idx_size + 2) # not inside a basic block bracket
-        inlining_indent = line_info_preprinter(io, linestart, idx)
-        printstyled(io, "!!! ", "─"^max_bb_idx_size, color=:light_black)
-    else
-        bbrange = cfg.blocks[bb_idx].stmts
-        bbrange = bbrange.start:bbrange.stop
-        # Print line info update
-        linestart = idx == first(bbrange) ? "  " : sprint(io -> printstyled(io, "│ ", color=:light_black), context=io)
-        linestart *= " "^max_bb_idx_size
-        inlining_indent = line_info_preprinter(io, linestart, idx)
-        if idx == first(bbrange)
-            bb_idx_str = string(bb_idx)
-            bb_pad = max_bb_idx_size - length(bb_idx_str)
-            bb_type = length(cfg.blocks[bb_idx].preds) <= 1 ? "─" : "┄"
-            printstyled(io, bb_idx_str, " ", bb_type, "─"^bb_pad, color=:light_black)
-        elseif idx == last(bbrange) # print separator
-            printstyled(io, "└", "─"^(1 + max_bb_idx_size), color=:light_black)
-        else
-            printstyled(io, "│ ", " "^max_bb_idx_size, color=:light_black)
-        end
-        if idx == last(bbrange)
-            bb_idx += 1
-        end
-    end
-    print(io, inlining_indent, " ")
-    # convert statement index to labels, as expected by print_stmt
-    if stmt isa Expr
-        if stmt.head === :enter && length(stmt.args) == 1 && stmt.args[1] isa Int
-            stmt = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
-        end
-    elseif isa(stmt, GotoIfNot)
-        stmt = GotoIfNot(stmt.cond, block_for_inst(cfg, stmt.dest))
-    elseif stmt isa GotoNode
-        stmt = GotoNode(block_for_inst(cfg, stmt.label))
-    elseif stmt isa PhiNode
-        e = stmt.edges
-        stmt = PhiNode(Int32[block_for_inst(cfg, Int(e[i])) for i in 1:length(e)], stmt.values)
+    if code isa CodeInfo
+        stmt = statement_indices_to_labels(stmt, cfg)
     end
-    show_type = types isa Vector{Any} && should_print_ssa_type(stmt)
+    show_type = type !== nothing && should_print_ssa_type(stmt)
     print_stmt(io, idx, stmt, used, maxlength_idx, true, show_type)
-    if types isa Vector{Any} # ignore types for pre-inference code
-        if !isassigned(types, idx)
+    if type !== nothing # ignore types for pre-inference code
+        if type === UNDEF
             # This is an error, but can happen if passes don't update their type information
             printstyled(io, "::#UNDEF", color=:red)
         elseif show_type
-            typ = types[idx]
-            line_info_postprinter(io, typ, idx in used)
+            line_info_postprinter(IOContext(io, :idx => idx), type, idx in used)
         end
     end
     println(io)
     return bb_idx
 end
 
+function ircode_new_nodes_iter(code::IRCode)
+    stmts = code.stmts
+    new_nodes = code.new_nodes.stmts
+    new_nodes_info = code.new_nodes.info
+    new_nodes_perm = filter(i -> isassigned(new_nodes.inst, i), 1:length(new_nodes))
+    sort!(new_nodes_perm, by = x -> (x = new_nodes_info[x]; (x.pos, x.attach_after)))
+    perm_idx = Ref(1)
+
+    function (idx::Int)
+        perm_idx[] <= length(new_nodes_perm) || return nothing
+        node_idx = new_nodes_perm[perm_idx[]]
+        if new_nodes_info[node_idx].pos != idx
+            return nothing
+        end
+        perm_idx[] += 1
+        new_node = new_nodes[node_idx]
+        new_node_inst = isassigned(new_nodes.inst, node_idx) ? new_node[:inst] : UNDEF
+        new_node_type = isassigned(new_nodes.type, node_idx) ? new_node[:type] : UNDEF
+        node_idx += length(stmts)
+        return node_idx, new_node_inst, new_node_type
+    end
+end
+
+# print only line numbers on the left, some of the method names and nesting depth on the right
+function inline_linfo_printer(code::IRCode)
+    loc_annotations, loc_methods, loc_lineno = compute_ir_line_annotations(code)
+    max_loc_width = maximum(length, loc_annotations)
+    max_lineno_width = maximum(length, loc_lineno)
+    max_method_width = maximum(length, loc_methods)
+
+    function (io::IO, indent::String, idx::Int)
+        cols = (displaysize(io)::Tuple{Int,Int})[2]
+
+        if idx == 0
+            annotation = ""
+            loc_method = ""
+            lineno = ""
+        elseif idx <= length(loc_annotations)
+            # N.B.: The line array length not matching is invalid,
+            # but let's be robust here
+            annotation = loc_annotations[idx]
+            loc_method = loc_methods[idx]
+            lineno = loc_lineno[idx]
+        else
+            annotation = "!"
+            loc_method = ""
+            lineno = ""
+        end
+        # Print location information right aligned. If the line below is too long, it'll overwrite this,
+        # but that's what we want.
+        if get(io, :color, false)
+            method_start_column = cols - max_method_width - max_loc_width - 2
+            filler = " "^(max_loc_width-length(annotation))
+            printstyled(io, "\e[$(method_start_column)G$(annotation)$(filler)$(loc_method)\e[1G", color = :light_black)
+        end
+        printstyled(io, lineno, " "^(max_lineno_width - length(lineno) + 1); color = :light_black)
+        return ""
+    end
+end
+
+_strip_color(s::String) = replace(s, r"\e\[\d+m" => "")
+
+function statementidx_lineinfo_printer(f, code::IRCode)
+    printer = f(code.linetable)
+    function (io::IO, indent::String, idx::Int)
+        printer(io, indent, idx > 0 ? code.stmts[idx][:line] : typemin(Int32))
+    end
+end
 function statementidx_lineinfo_printer(f, code::CodeInfo)
     printer = f(code.linetable)
-    return (io::IO, indent::String, idx::Int) -> printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
+    function (io::IO, indent::String, idx::Int)
+        printer(io, indent, idx > 0 ? code.codelocs[idx] : typemin(Int32))
+    end
 end
-statementidx_lineinfo_printer(code::CodeInfo) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
+statementidx_lineinfo_printer(code) = statementidx_lineinfo_printer(DILineInfoPrinter, code)
 
-function show_ir(io::IO, code::CodeInfo, line_info_preprinter=statementidx_lineinfo_printer(code), line_info_postprinter=default_expr_type_printer)
-    ioctx = IOContext(io, :displaysize => displaysize(io)::Tuple{Int,Int})
+function stmts_used(io::IO, code::IRCode, warn_unset_entry=true)
+    stmts = code.stmts
+    used = BitSet()
+    for stmt in stmts
+        scan_ssa_use!(push!, used, stmt[:inst])
+    end
+    new_nodes = code.new_nodes.stmts
+    for nn in 1:length(new_nodes)
+        if isassigned(new_nodes.inst, nn)
+            scan_ssa_use!(push!, used, new_nodes[nn][:inst])
+        elseif warn_unset_entry
+            printstyled(io, "ERROR: New node array has unset entry\n", color=:red)
+            warn_unset_entry = false
+        end
+    end
+    return used
+end
+
+function stmts_used(::IO, code::CodeInfo)
     stmts = code.code
     used = BitSet()
-    cfg = compute_basic_blocks(stmts)
     for stmt in stmts
         scan_ssa_use!(push!, used, stmt)
     end
+    return used
+end
+
+function default_config(code::IRCode; verbose_linetable=false)
+    return IRShowConfig(verbose_linetable ? statementidx_lineinfo_printer(code)
+                                          : inline_linfo_printer(code);
+                        bb_color=:normal)
+end
+default_config(code::CodeInfo) = IRShowConfig(statementidx_lineinfo_printer(code))
+
+function show_ir(io::IO, code::Union{IRCode, CodeInfo}, config::IRShowConfig=default_config(code);
+                 pop_new_node! = code isa IRCode ? ircode_new_nodes_iter(code) : Returns(nothing))
+    stmts = code isa IRCode ? code.stmts : code.code
+    used = stmts_used(io, code)
+    cfg = code isa IRCode ? code.cfg : compute_basic_blocks(stmts)
     bb_idx = 1
 
     for idx in 1:length(stmts)
-        bb_idx = show_ir_stmt(ioctx, code, idx, line_info_preprinter, line_info_postprinter, used, cfg, bb_idx)
+        if config.should_print_stmt(code, idx, used)
+            bb_idx = show_ir_stmt(io, code, idx, config, used, cfg, bb_idx; pop_new_node!)
+        elseif bb_idx <= length(cfg.blocks) && idx == cfg.blocks[bb_idx].stmts.stop
+            bb_idx += 1
+        end
     end
 
     max_bb_idx_size = length(string(length(cfg.blocks)))
-    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
+    config.line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
     nothing
 end
 
+tristate_letter(t::TriState) = t === ALWAYS_TRUE ? '+' : t === ALWAYS_FALSE ? '!' : '?'
+tristate_color(t::TriState) = t === ALWAYS_TRUE ? :green : t === ALWAYS_FALSE ? :red : :orange
+
+function Base.show(io::IO, e::Core.Compiler.Effects)
+    print(io, "(")
+    printstyled(io, string(tristate_letter(e.consistent), 'c'); color=tristate_color(e.consistent))
+    print(io, ',')
+    printstyled(io, string(tristate_letter(e.effect_free), 'e'); color=tristate_color(e.effect_free))
+    print(io, ',')
+    printstyled(io, string(tristate_letter(e.nothrow), 'n'); color=tristate_color(e.nothrow))
+    print(io, ',')
+    printstyled(io, string(tristate_letter(e.terminates), 't'); color=tristate_color(e.terminates))
+    print(io, ')')
+    e.nonoverlayed || printstyled(io, '′'; color=:red)
+end
+
 @specialize
diff --git a/base/compiler/ssair/slot2ssa.jl b/base/compiler/ssair/slot2ssa.jl
index 057bb72ff11521..a5dd6a0fd8f299 100644
--- a/base/compiler/ssair/slot2ssa.jl
+++ b/base/compiler/ssair/slot2ssa.jl
@@ -33,21 +33,11 @@ function scan_entry!(result::Vector{SlotInfo}, idx::Int, @nospecialize(stmt))
 end
 
 
-function lift_defuse(cfg::CFG, defuse)
-    map(defuse) do slot
-        SlotInfo(
-            Int[block_for_inst(cfg, x) for x in slot.defs],
-            Int[block_for_inst(cfg, x) for x in slot.uses],
-            slot.any_newvar
-        )
-    end
-end
-
 function scan_slot_def_use(nargs::Int, ci::CodeInfo, code::Vector{Any})
     nslots = length(ci.slotflags)
     result = SlotInfo[SlotInfo() for i = 1:nslots]
     # Set defs for arguments
-    for var in result[1:(1+nargs)]
+    for var in result[1:nargs]
         push!(var.defs, 0)
     end
     for idx in 1:length(code)
@@ -77,7 +67,7 @@ function make_ssa!(ci::CodeInfo, code::Vector{Any}, idx, slot, @nospecialize(typ
     stmt = code[idx]
     @assert isexpr(stmt, :(=))
     code[idx] = stmt.args[2]
-    ci.ssavaluetypes[idx] = typ
+    (ci.ssavaluetypes::Vector{Any})[idx] = typ
     idx
 end
 
@@ -98,17 +88,20 @@ end
 function fixup_slot!(ir::IRCode, ci::CodeInfo, idx::Int, slot::Int, @nospecialize(stmt::Union{SlotNumber, TypedSlot}), @nospecialize(ssa))
     # We don't really have the information here to get rid of these.
     # We'll do so later
-    if ssa === undef_token
-        insert_node!(ir, idx, Any, Expr(:throw_undef_if_not, ci.slotnames[slot], false))
-        return undef_token
+    if ssa === UNDEF_TOKEN
+        insert_node!(ir, idx, NewInstruction(
+            Expr(:throw_undef_if_not, ci.slotnames[slot], false), Any))
+        return UNDEF_TOKEN
     end
     if !isa(ssa, Argument) && !(ssa === nothing) && ((ci.slotflags[slot] & SLOT_USEDUNDEF) != 0)
-        insert_node!(ir, idx, Any, Expr(:undefcheck, ci.slotnames[slot], ssa))
+        # insert a temporary node. type_lift_pass! will remove it
+        insert_node!(ir, idx, NewInstruction(
+            Expr(:undefcheck, ci.slotnames[slot], ssa), Any))
     end
     if isa(stmt, SlotNumber)
         return ssa
     elseif isa(stmt, TypedSlot)
-        return NewSSAValue(insert_node!(ir, idx, stmt.typ, PiNode(ssa, stmt.typ)).id - length(ir.stmts))
+        return NewSSAValue(insert_node!(ir, idx, NewInstruction(PiNode(ssa, stmt.typ), stmt.typ)).id - length(ir.stmts))
     end
     @assert false # unreachable
 end
@@ -141,12 +134,13 @@ function fixemup!(cond, rename, ir::IRCode, ci::CodeInfo, idx::Int, @nospecializ
                 return true
             else
                 ssa = rename(val)
-                if ssa === undef_token
+                if ssa === UNDEF_TOKEN
                     return false
                 elseif !isa(ssa, SSAValue) && !isa(ssa, NewSSAValue)
                     return true
                 end
             end
+            # temporarily corrupt the isdefined node. type_lift_pass! will fix it
             stmt.args[1] = ssa
         end
         return stmt
@@ -158,12 +152,12 @@ function fixemup!(cond, rename, ir::IRCode, ci::CodeInfo, idx::Int, @nospecializ
             x = fixup_slot!(ir, ci, idx, slot_id(val), val, rename(val))
             # We inserted an undef error node. Delete subsequent statement
             # to avoid confusing the optimizer
-            if x === undef_token
+            if x === UNDEF_TOKEN
                 return nothing
             end
             op[] = x
-        elseif isa(val, GlobalRef) && !isdefined(val.mod, val.name)
-            op[] = NewSSAValue(insert_node!(ir, idx, Any, val).id - length(ir.stmts))
+        elseif isa(val, GlobalRef) && !(isdefined(val.mod, val.name) && isconst(val.mod, val.name))
+            op[] = NewSSAValue(insert_node!(ir, idx, NewInstruction(val, Any)).id - length(ir.stmts))
         end
     end
     return urs[]
@@ -179,16 +173,18 @@ function rename_uses!(ir::IRCode, ci::CodeInfo, idx::Int, @nospecialize(stmt), r
     return fixemup!(stmt->true, stmt->renames[slot_id(stmt)], ir, ci, idx, stmt)
 end
 
-function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}, flags::Vector{UInt8})
+function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any})
     # Remove `nothing`s at the end, we don't handle them well
     # (we expect the last instruction to be a terminator)
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
+    (; codelocs, ssaflags) = ci
     for i = length(code):-1:1
         if code[i] !== nothing
             resize!(code, i)
-            resize!(ci.ssavaluetypes, i)
-            resize!(ci.codelocs, i)
+            resize!(ssavaluetypes, i)
+            resize!(codelocs, i)
             resize!(info, i)
-            resize!(flags, i)
+            resize!(ssaflags, i)
             break
         end
     end
@@ -197,10 +193,10 @@ function strip_trailing_junk!(ci::CodeInfo, code::Vector{Any}, info::Vector{Any}
     term = code[end]
     if !isa(term, GotoIfNot) && !isa(term, GotoNode) && !isa(term, ReturnNode)
         push!(code, ReturnNode())
-        push!(ci.ssavaluetypes, Union{})
-        push!(ci.codelocs, 0)
+        push!(ssavaluetypes, Union{})
+        push!(codelocs, 0)
         push!(info, nothing)
-        push!(flags, 0x00)
+        push!(ssaflags, IR_FLAG_NULL)
     end
     nothing
 end
@@ -213,16 +209,16 @@ end
 function typ_for_val(@nospecialize(x), ci::CodeInfo, sptypes::Vector{Any}, idx::Int, slottypes::Vector{Any})
     if isa(x, Expr)
         if x.head === :static_parameter
-            return sptypes[x.args[1]]
+            return sptypes[x.args[1]::Int]
         elseif x.head === :boundscheck
             return Bool
         elseif x.head === :copyast
             return typ_for_val(x.args[1], ci, sptypes, idx, slottypes)
         end
-        return ci.ssavaluetypes[idx]
+        return (ci.ssavaluetypes::Vector{Any})[idx]
     end
     isa(x, GlobalRef) && return abstract_eval_global(x.mod, x.name)
-    isa(x, SSAValue) && return ci.ssavaluetypes[x.id]
+    isa(x, SSAValue) && return (ci.ssavaluetypes::Vector{Any})[x.id]
     isa(x, Argument) && return slottypes[x.n]
     isa(x, NewSSAValue) && return DelayedTyp(x)
     isa(x, QuoteNode) && return Const(x.value)
@@ -235,41 +231,49 @@ struct BlockLiveness
     live_in_bbs::Vector{Int}
 end
 
-# Run iterated dominance frontier
-#
-# The algorithm we have here essentially follows LLVM, which itself is a
-# a cleaned up version of the linear-time algorithm described in
-#
-#  A Linear Time Algorithm for Placing phi-Nodes (by Sreedhar and Gao)
-#
-# The algorithm here, is quite straightforward. Suppose we have a CFG:
-#
-# A -> B -> D -> F
-#  \-> C -------/
-#
-# and a corresponding dominator tree:
-#
-# A
-# |- B - D
-# |- C
-# |- F
-#
-# Now, for every definition of our slot, we simply walk down the dominator
-# tree and look for any edges that leave the sub-domtree rooted by our definition.
-#
-# E.g. in our example above, if we have a definition in `B`, we look at its successors,
-#      which is only `D`, which is dominated by `B` and hence doesn't need a phi node.
-#      Then we descend down the subtree rooted at `B` and end up in `D`. `D` has a successor
-#      `F`, which is not part of the current subtree, (i.e. not dominated by `B`), so it
-#      needs a phi node.
-#
-# Now, the key insight of that algorithm is that we have two defs, in blocks `A` and `B`,
-# and `A` dominates `B`, then we do not need to recurse into `B`, because the set of
-# potential backedges from a subtree rooted at `B` (to outside the subtree) is a strict
-# subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted
-# at `A`). Note however that this does not work the other way. Thus, the algorithm
-# needs to make sure that we always visit `B` before `A`.
-function idf(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
+"""
+    iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
+        -> phinodes::Vector{Int}
+
+Run iterated dominance frontier.
+The algorithm we have here essentially follows LLVM, which itself is a
+a cleaned up version of the linear-time algorithm described in [^SG95].
+
+The algorithm here, is quite straightforward. Suppose we have a CFG:
+
+    A -> B -> D -> F
+     \\-> C ------>/
+
+and a corresponding dominator tree:
+
+    A
+    |- B - D
+    |- C
+    |- F
+
+Now, for every definition of our slot, we simply walk down the dominator
+tree and look for any edges that leave the sub-domtree rooted by our definition.
+
+In our example above, if we have a definition in `B`, we look at its successors,
+which is only `D`, which is dominated by `B` and hence doesn't need a ϕ-node.
+Then we descend down the subtree rooted at `B` and end up in `D`. `D` has a successor
+`F`, which is not part of the current subtree, (i.e. not dominated by `B`),
+so it needs a ϕ-node.
+
+Now, the key insight of that algorithm is that we have two defs, in blocks `A` and `B`,
+and `A` dominates `B`, then we do not need to recurse into `B`, because the set of
+potential backedges from a subtree rooted at `B` (to outside the subtree) is a strict
+subset of those backedges from a subtree rooted at `A` (out outside the subtree rooted
+at `A`). Note however that this does not work the other way. Thus, the algorithm
+needs to make sure that we always visit `B` before `A`.
+
+[^SG95]: Vugranam C. Sreedhar and Guang R. Gao. 1995.
+         A linear time algorithm for placing φ-nodes.
+         In Proceedings of the 22nd ACM SIGPLAN-SIGACT symposium on Principles of programming languages (POPL '95).
+         Association for Computing Machinery, New York, NY, USA, 62–73.
+         DOI: <https://doi.org/10.1145/199448.199464>.
+"""
+function iterated_dominance_frontier(cfg::CFG, liveness::BlockLiveness, domtree::DomTree)
     # This should be a priority queue, but TODO - sorted array for now
     defs = liveness.def_bbs
     pq = Tuple{Int, Int}[(defs[i], domtree.nodes[defs[i]].level) for i in 1:length(defs)]
@@ -367,11 +371,11 @@ function rename_phinode_edges(node, bb, result_order, bb_rename)
 end
 
 """
-    Sort the basic blocks in `ir` into domtree order (i.e. if bb`` is higher in
-    the domtree than bb2, it will come first in the linear order). The resulting
-    ir has the property that a linear traversal of basic blocks will also be a
-    RPO traversal and in particular, any use of an SSA value must come after (by linear
-    order) its definition.
+Sort the basic blocks in `ir` into domtree order (i.e. if `bb1` is higher in
+the domtree than `bb2`, it will come first in the linear order). The resulting
+`ir` has the property that a linear traversal of basic blocks will also be a
+RPO traversal and in particular, any use of an SSA value must come after
+(by linear order) its definition.
 """
 function domsort_ssa!(ir::IRCode, domtree::DomTree)
     # First compute the new order of basic blocks
@@ -519,12 +523,14 @@ function domsort_ssa!(ir::IRCode, domtree::DomTree)
     return new_ir
 end
 
-function compute_live_ins(cfg::CFG, defuse)
+compute_live_ins(cfg::CFG, slot::SlotInfo) = compute_live_ins(cfg, slot.defs, slot.uses)
+
+function compute_live_ins(cfg::CFG, defs::Vector{Int}, uses::Vector{Int})
     # We remove from `uses` any block where all uses are dominated
     # by a def. This prevents insertion of dead phi nodes at the top
     # of such a block if that block happens to be in a loop
-    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in defuse.uses]
-    for x in defuse.defs
+    ordered = Tuple{Int, Int, Bool}[(x, block_for_inst(cfg, x), true) for x in uses]
+    for x in defs
         push!(ordered, (x, block_for_inst(cfg, x), false))
     end
     ordered = sort(ordered, by=x->x[1])
@@ -545,13 +551,13 @@ function compute_live_ins(cfg::CFG, defuse)
     extra_liveins = BitSet()
     worklist = Int[]
     for bb in bb_uses
-        append!(worklist, filter(p->p != 0 && !(p in bb_defs), cfg.blocks[bb].preds))
+        append!(worklist, Iterators.filter(p->p != 0 && !(p in bb_defs), cfg.blocks[bb].preds))
     end
     while !isempty(worklist)
         elem = pop!(worklist)
         (elem in bb_uses || elem in extra_liveins) && continue
         push!(extra_liveins, elem)
-        append!(worklist, filter(p->p != 0 && !(p in bb_defs), cfg.blocks[elem].preds))
+        append!(worklist, Iterators.filter(p->p != 0 && !(p in bb_defs), cfg.blocks[elem].preds))
     end
     append!(bb_uses, extra_liveins)
     BlockLiveness(bb_defs, bb_uses)
@@ -581,16 +587,15 @@ function recompute_type(node::Union{PhiNode, PhiCNode}, ci::CodeInfo, ir::IRCode
     return new_typ
 end
 
-function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, nargs::Int, sptypes::Vector{Any},
-                        slottypes::Vector{Any})
+function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree,
+                        defuses::Vector{SlotInfo}, slottypes::Vector{Any})
     code = ir.stmts.inst
     cfg = ir.cfg
-    left = Int[]
     catch_entry_blocks = Tuple{Int, Int}[]
     for idx in 1:length(code)
         stmt = code[idx]
         if isexpr(stmt, :enter)
-            push!(catch_entry_blocks, (block_for_inst(cfg, idx), block_for_inst(cfg, stmt.args[1])))
+            push!(catch_entry_blocks, (block_for_inst(cfg, idx), block_for_inst(cfg, stmt.args[1]::Int)))
         end
     end
 
@@ -611,7 +616,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
     for (_, exc) in catch_entry_blocks
         phicnodes[exc] = Vector{Tuple{SlotNumber, NewSSAValue, PhiCNode}}()
     end
-    @timeit "idf" for (idx, slot) in Iterators.enumerate(defuse)
+    @timeit "idf" for (idx, slot) in Iterators.enumerate(defuses)
         # No uses => no need for phi nodes
         isempty(slot.uses) && continue
         # TODO: Restore this optimization
@@ -624,12 +629,13 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 typ = MaybeUndef(Union{})
                 ssaval = nothing
                 for use in slot.uses[]
-                    insert_node!(ir, use, Union{}, Expr(:throw_undef_if_not, ci.slotnames[idx], false))
+                    insert_node!(ir, use,
+                        NewInstruction(Expr(:throw_undef_if_not, ci.slotnames[idx], false), Union{}))
                 end
                 fixup_uses!(ir, ci, code, slot.uses, idx, nothing)
             else
                 val = code[slot.defs[]].args[2]
-                typ = typ_for_val(val, ci, sptypes, slot.defs[], slottypes)
+                typ = typ_for_val(val, ci, ir.sptypes, slot.defs[], slottypes)
                 ssaval = SSAValue(make_ssa!(ci, code, slot.defs[], idx, typ))
                 fixup_uses!(ir, ci, code, slot.uses, idx, ssaval)
             end
@@ -643,7 +649,9 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 # Create a PhiC node in the catch entry block and
                 # an upsilon node in the corresponding enter block
                 node = PhiCNode(Any[])
-                phic_ssa = NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, li), Union{}, node).id - length(ir.stmts))
+                phic_ssa = NewSSAValue(
+                    insert_node!(ir, first_insert_for_bb(code, cfg, li),
+                        NewInstruction(node, Union{})).id - length(ir.stmts))
                 push!(phicnodes[li], (SlotNumber(idx), phic_ssa, node))
                 # Inform IDF that we now have a def in the catch block
                 if !(li in live.def_bbs)
@@ -651,21 +659,21 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 end
             end
         end
-        phiblocks = idf(cfg, live, domtree)
+        phiblocks = iterated_dominance_frontier(cfg, live, domtree)
         for block in phiblocks
             push!(phi_slots[block], idx)
             node = PhiNode()
-            ssa = NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, block), Union{}, node).id - length(ir.stmts))
+            ssa = NewSSAValue(insert_node!(ir,
+                first_insert_for_bb(code, cfg, block), NewInstruction(node, Union{})).id - length(ir.stmts))
             push!(phi_nodes[block], ssa=>node)
         end
-        push!(left, idx)
     end
     # Perform SSA renaming
     initial_incoming_vals = Any[
-        if 0 in defuse[x].defs
+        if 0 in defuses[x].defs
             Argument(x)
-        elseif !defuse[x].any_newvar
-            undef_token
+        elseif !defuses[x].any_newvar
+            UNDEF_TOKEN
         else
             SSAValue(-2)
         end for x in 1:length(ci.slotflags)
@@ -702,7 +710,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 continue
             end
             push!(node.edges, pred)
-            if incoming_val === undef_token
+            if incoming_val === UNDEF_TOKEN
                 resize!(node.values, length(node.values)+1)
             else
                 push!(node.values, incoming_val)
@@ -712,7 +720,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             if isa(incoming_val, NewSSAValue)
                 push!(type_refine_phi, ssaval.id)
             end
-            typ = incoming_val === undef_token ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, sptypes, -1, slottypes)
+            typ = incoming_val === UNDEF_TOKEN ? MaybeUndef(Union{}) : typ_for_val(incoming_val, ci, ir.sptypes, -1, slottypes)
             old_entry = new_nodes.stmts[ssaval.id]
             if isa(typ, DelayedTyp)
                 push!(type_refine_phi, ssaval.id)
@@ -734,12 +742,12 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         if eidx !== nothing
             for (slot, _, node) in phicnodes[catch_entry_blocks[eidx][2]]
                 ival = incoming_vals[slot_id(slot)]
-                ivalundef = ival === undef_token
+                ivalundef = ival === UNDEF_TOKEN
                 unode = ivalundef ? UpsilonNode() : UpsilonNode(ival)
-                typ = ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, sptypes, -1, slottypes)
+                typ = ivalundef ? MaybeUndef(Union{}) : typ_for_val(ival, ci, ir.sptypes, -1, slottypes)
                 push!(node.values,
                     NewSSAValue(insert_node!(ir, first_insert_for_bb(code, cfg, item),
-                                 typ, unode, true).id - length(ir.stmts)))
+                                 NewInstruction(unode, typ), true).id - length(ir.stmts)))
             end
         end
         push!(visited, item)
@@ -747,7 +755,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             stmt = code[idx]
             (isa(stmt, PhiNode) || (isexpr(stmt, :(=)) && isa(stmt.args[2], PhiNode))) && continue
             if isa(stmt, NewvarNode)
-                incoming_vals[slot_id(stmt.slot)] = undef_token
+                incoming_vals[slot_id(stmt.slot)] = UNDEF_TOKEN
                 code[idx] = nothing
             else
                 stmt = rename_uses!(ir, ci, idx, stmt, incoming_vals)
@@ -760,14 +768,14 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                 if isexpr(stmt, :(=)) && isa(stmt.args[1], SlotNumber)
                     id = slot_id(stmt.args[1])
                     val = stmt.args[2]
-                    typ = typ_for_val(val, ci, sptypes, idx, slottypes)
-                    # Having undef_token appear on the RHS is possible if we're on a dead branch.
+                    typ = typ_for_val(val, ci, ir.sptypes, idx, slottypes)
+                    # Having UNDEF_TOKEN appear on the RHS is possible if we're on a dead branch.
                     # Do something reasonable here, by marking the LHS as undef as well.
-                    if val !== undef_token
-                        incoming_vals[id] = SSAValue(make_ssa!(ci, code, idx, id, typ))
+                    if val !== UNDEF_TOKEN
+                        incoming_vals[id] = SSAValue(make_ssa!(ci, code, idx, id, typ)::Int)
                     else
                         code[idx] = nothing
-                        incoming_vals[id] = undef_token
+                        incoming_vals[id] = UNDEF_TOKEN
                     end
                     eidx = item
                     while haskey(exc_handlers, eidx)
@@ -775,12 +783,12 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
                         cidx = findfirst(x->slot_id(x[1]) == id, phicnodes[exc])
                         if cidx !== nothing
                             node = UpsilonNode(incoming_vals[id])
-                            if incoming_vals[id] === undef_token
+                            if incoming_vals[id] === UNDEF_TOKEN
                                 node = UpsilonNode()
                                 typ = MaybeUndef(Union{})
                             end
                             push!(phicnodes[exc][cidx][3].values,
-                                NewSSAValue(insert_node!(ir, idx, typ, node, true).id - length(ir.stmts)))
+                                NewSSAValue(insert_node!(ir, idx, NewInstruction(node, typ), true).id - length(ir.stmts)))
                         end
                     end
                 end
@@ -801,9 +809,10 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         end
     end
     # Convert into IRCode form
+    ssavaluetypes = ci.ssavaluetypes::Vector{Any}
     nstmts = length(ir.stmts)
     new_code = Vector{Any}(undef, nstmts)
-    ssavalmap = fill(SSAValue(-1), length(ci.ssavaluetypes) + 1)
+    ssavalmap = fill(SSAValue(-1), length(ssavaluetypes) + 1)
     result_types = Any[Any for _ in 1:nstmts]
     # Detect statement positions for assignments and construct array
     for (bb, idx) in bbidxiter(ir)
@@ -815,19 +824,19 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             new_dest = block_for_inst(cfg, stmt.dest)
             if new_dest == bb+1
                 # Drop this node - it's a noop
-                new_code[idx] = stmt.cond
+                new_code[idx] = Expr(:call, GlobalRef(Core, :typeassert), stmt.cond, GlobalRef(Core, :Bool))
             else
                 new_code[idx] = GotoIfNot(stmt.cond, new_dest)
             end
         elseif isexpr(stmt, :enter)
-            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]))
+            new_code[idx] = Expr(:enter, block_for_inst(cfg, stmt.args[1]::Int))
             ssavalmap[idx] = SSAValue(idx) # Slot to store token for pop_exception
         elseif isexpr(stmt, :leave) || isexpr(stmt, :(=)) || isa(stmt, ReturnNode) ||
             isexpr(stmt, :meta) || isa(stmt, NewvarNode)
             new_code[idx] = stmt
         else
             ssavalmap[idx] = SSAValue(idx)
-            result_types[idx] = ci.ssavaluetypes[idx]
+            result_types[idx] = ssavaluetypes[idx]
             if isa(stmt, PhiNode)
                 edges = Int32[edge == 0 ? 0 : block_for_inst(cfg, Int(edge)) for edge in stmt.edges]
                 new_code[idx] = PhiNode(edges, stmt.values)
@@ -845,7 +854,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
             node = new_nodes.stmts[new_idx]
             phic_values = (node[:inst]::PhiCNode).values
             for i = 1:length(phic_values)
-                orig_typ = typ = typ_for_val(phic_values[i], ci, sptypes, -1, slottypes)
+                orig_typ = typ = typ_for_val(phic_values[i], ci, ir.sptypes, -1, slottypes)
                 @assert !isa(typ, MaybeUndef)
                 while isa(typ, DelayedTyp)
                     typ = types(ir)[typ.phi::NewSSAValue]
@@ -863,7 +872,7 @@ function construct_ssa!(ci::CodeInfo, ir::IRCode, domtree::DomTree, defuse, narg
         changed = false
         for new_idx in type_refine_phi
             node = new_nodes.stmts[new_idx]
-            new_typ = recompute_type(node[:inst], ci, ir, sptypes, slottypes)
+            new_typ = recompute_type(node[:inst]::Union{PhiNode,PhiCNode}, ci, ir, ir.sptypes, slottypes)
             if !(node[:type] ⊑ new_typ) || !(new_typ ⊑ node[:type])
                 node[:type] = new_typ
                 changed = true
diff --git a/base/compiler/ssair/verify.jl b/base/compiler/ssair/verify.jl
index 40cc8731ce4771..1578bdb9c348ac 100644
--- a/base/compiler/ssair/verify.jl
+++ b/base/compiler/ssair/verify.jl
@@ -14,17 +14,17 @@ end
 function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int, use_idx::Int, print::Bool)
     if isa(op, SSAValue)
         if op.id > length(ir.stmts)
-            def_bb = block_for_inst(ir.cfg, ir.new_nodes[op.id - length(ir.stmts)].pos)
+            def_bb = block_for_inst(ir.cfg, ir.new_nodes.info[op.id - length(ir.stmts)].pos)
         else
             def_bb = block_for_inst(ir.cfg, op.id)
         end
         if (def_bb == use_bb)
             if op.id > length(ir.stmts)
-                @assert ir.new_nodes[op.id - length(ir.stmts)].pos <= use_idx
+                @assert ir.new_nodes.info[op.id - length(ir.stmts)].pos <= use_idx
             else
                 if op.id >= use_idx
                     @verify_error "Def ($(op.id)) does not dominate use ($(use_idx)) in same BB"
-                    error()
+                    error("")
                 end
             end
         else
@@ -32,21 +32,21 @@ function check_op(ir::IRCode, domtree::DomTree, @nospecialize(op), use_bb::Int,
                 # At the moment, we allow GC preserve tokens outside the standard domination notion
                 #@Base.show ir
                 @verify_error "Basic Block $def_bb does not dominate block $use_bb (tried to use value $(op.id))"
-                error()
+                error("")
             end
         end
     elseif isa(op, GlobalRef)
-        if !isdefined(op.mod, op.name)
+        if !isdefined(op.mod, op.name) || !isconst(op.mod, op.name)
             @verify_error "Unbound GlobalRef not allowed in value position"
-            error()
+            error("")
         end
     elseif isa(op, Union{OldSSAValue, NewSSAValue})
         #@Base.show ir
         @verify_error "Left over SSA marker"
-        error()
+        error("")
     elseif isa(op, Union{SlotNumber, TypedSlot})
         @verify_error "Left over slot detected in converted IR"
-        error()
+        error("")
     end
 end
 
@@ -66,12 +66,12 @@ function verify_ir(ir::IRCode, print::Bool=true)
     # Verify CFG
     last_end = 0
     # Verify statements
-    domtree = construct_domtree(ir.cfg)
+    domtree = construct_domtree(ir.cfg.blocks)
     for (idx, block) in pairs(ir.cfg.blocks)
         if first(block.stmts) != last_end + 1
             #ranges = [(idx,first(bb.stmts),last(bb.stmts)) for (idx, bb) in pairs(ir.cfg.blocks)]
             @verify_error "First statement of BB $idx ($(first(block.stmts))) does not match end of previous ($last_end)"
-            error()
+            error("")
         end
         last_end = last(block.stmts)
         terminator = ir.stmts[last_end][:inst]
@@ -82,38 +82,38 @@ function verify_ir(ir::IRCode, print::Bool=true)
             c = count_int(idx, ir.cfg.blocks[p].succs)
             if c == 0
                 @verify_error "Predecessor $p of block $idx not in successor list"
-                error()
+                error("")
             elseif c == 2
                 if count_int(p, block.preds) != 2
                     @verify_error "Double edge from $p to $idx not correctly accounted"
-                    error()
+                    error("")
                 end
             end
         end
         if isa(terminator, ReturnNode)
             if !isempty(block.succs)
                 @verify_error "Block $idx ends in return or unreachable, but has successors"
-                error()
+                error("")
             end
         elseif isa(terminator, GotoNode)
             if length(block.succs) != 1 || block.succs[1] != terminator.label
                 @verify_error "Block $idx successors ($(block.succs)), does not match GotoNode terminator"
-                error()
+                error("")
             end
         elseif isa(terminator, GotoIfNot)
             if terminator.dest == idx + 1
                 @verify_error "Block $idx terminator forms a double edge to block $(idx+1)"
-                error()
+                error("")
             end
             if length(block.succs) != 2 || (block.succs != [terminator.dest, idx+1] && block.succs != [idx+1, terminator.dest])
                 @verify_error "Block $idx successors ($(block.succs)), does not match GotoIfNot terminator"
-                error()
+                error("")
             end
         elseif isexpr(terminator, :enter)
             @label enter_check
-            if length(block.succs) != 2 || (block.succs != [terminator.args[1], idx+1] && block.succs != [idx+1, terminator.args[1]])
+            if length(block.succs) != 2 || (block.succs != Int[terminator.args[1], idx+1] && block.succs != Int[idx+1, terminator.args[1]])
                 @verify_error "Block $idx successors ($(block.succs)), does not match :enter terminator"
-                error()
+                error("")
             end
         else
             if length(block.succs) != 1 || block.succs[1] != idx + 1
@@ -128,7 +128,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
                     isa(stmt, PhiNode) || break
                 end
                 @verify_error "Block $idx successors ($(block.succs)), does not match fall-through terminator ($terminator)"
-                error()
+                error("")
             end
         end
         for s in block.succs
@@ -137,7 +137,7 @@ function verify_ir(ir::IRCode, print::Bool=true)
                 #@Base.show ir
                 #@Base.show ir.argtypes
                 @verify_error "Successor $s of block $idx not in predecessor list"
-                error()
+                error("")
             end
         end
     end
@@ -151,11 +151,19 @@ function verify_ir(ir::IRCode, print::Bool=true)
             @assert length(stmt.edges) == length(stmt.values)
             for i = 1:length(stmt.edges)
                 edge = stmt.edges[i]
+                for j = (i+1):length(stmt.edges)
+                    edge′ = stmt.edges[j]
+                    if edge == edge′
+                        # TODO: Move `unique` to Core.Compiler. For now we assume the predecessor list is
+                        @verify_error "Edge list φ node $idx in bb $bb not unique (double edge?)"
+                        error("")
+                    end
+                end
                 if !(edge == 0 && bb == 1) && !(edge in ir.cfg.blocks[bb].preds)
                     #@Base.show ir.argtypes
                     #@Base.show ir
                     @verify_error "Edge $edge of φ node $idx not in predecessor list"
-                    error()
+                    error("")
                 end
                 edge == 0 && continue
                 isassigned(stmt.values, i) || continue
@@ -168,11 +176,11 @@ function verify_ir(ir::IRCode, print::Bool=true)
                         #    PhiNode type was $phiT
                         #    Value type was $(ir.stmts[val.id][:type])
                         #"""
-                        #error()
+                        #error("")
                     end
                 elseif isa(val, GlobalRef) || isa(val, Expr)
                     @verify_error "GlobalRefs and Exprs are not allowed as PhiNode values"
-                    error()
+                    error("")
                 end
                 check_op(ir, domtree, val, Int(edge), last(ir.cfg.blocks[stmt.edges[i]].stmts)+1, print)
             end
@@ -181,11 +189,11 @@ function verify_ir(ir::IRCode, print::Bool=true)
                 val = stmt.values[i]
                 if !isa(val, SSAValue)
                     @verify_error "Operand $i of PhiC node $idx must be an SSA Value."
-                    error()
+                    error("")
                 end
-                if !isa(ir[val], UpsilonNode)
+                if !isa(ir[val][:inst], UpsilonNode)
                     @verify_error "Operand $i of PhiC node $idx must reference an Upsilon node."
-                    error()
+                    error("")
                 end
             end
         else
@@ -200,13 +208,20 @@ function verify_ir(ir::IRCode, print::Bool=true)
                 if stmt.head === :(=)
                     if stmt.args[1] isa SSAValue
                         @verify_error "SSAValue as assignment LHS"
-                        error()
+                        error("")
+                    end
+                    if stmt.args[2] isa GlobalRef
+                        # undefined GlobalRef as assignment RHS is OK
+                        continue
                     end
                 elseif stmt.head === :gc_preserve_end
                     # We allow gc_preserve_end tokens to span across try/catch
                     # blocks, which isn't allowed for regular SSA values, so
                     # we skip the validation below.
                     continue
+                elseif stmt.head === :isdefined && length(stmt.args) == 1 && stmt.args[1] isa GlobalRef
+                    # a GlobalRef isdefined check does not evaluate its argument
+                    continue
                 end
             end
             for op in userefs(stmt)
@@ -222,7 +237,7 @@ function verify_linetable(linetable::Vector{LineInfoNode}, print::Bool=true)
         line = linetable[i]
         if i <= line.inlined_at
             @verify_error "Misordered linetable"
-            error()
+            error("")
         end
     end
 end
diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl
index 214fd89a170788..4832ce1af4a3aa 100644
--- a/base/compiler/stmtinfo.jl
+++ b/base/compiler/stmtinfo.jl
@@ -1,65 +1,112 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+@nospecialize
+
 """
-    struct MethodMatchInfo
+    call::CallMeta
 
-Captures the result of a `method_matches` lookup for the given call. This
-info may then be used by the optimizer to inline the matches, without having
-to re-consult the method table. This info is illegal on any statement that is
-not a call to a generic function.
+A simple struct that captures both the return type (`call.rt`)
+and any additional information (`call.info`) for a given generic call.
 """
-struct MethodMatchInfo
-    results::Union{Missing, MethodLookupResult}
+struct CallMeta
+    rt::Any
+    info::Any
 end
 
 """
-    struct MethodResultPure
+    info::MethodMatchInfo
 
-This singleton represents a method result constant was proven to be
-effect-free, including being no-throw (typically because the value was computed
-by calling an `@pure` function).
+Captures the result of a `:jl_matching_methods` lookup for the given call (`info.results`).
+This info may then be used by the optimizer to inline the matches, without having
+to re-consult the method table. This info is illegal on any statement that is
+not a call to a generic function.
 """
-struct MethodResultPure end
+struct MethodMatchInfo
+    results::MethodLookupResult
+end
 
 """
-    struct UnionSplitInfo
+    info::UnionSplitInfo
 
 If inference decides to partition the method search space by splitting unions,
 it will issue a method lookup query for each such partition. This info indicates
-that such partitioning happened and wraps the corresponding MethodMatchInfo for
-each partition. This info is illegal on any statement that is not a call to a
-generic function.
+that such partitioning happened and wraps the corresponding `MethodMatchInfo` for
+each partition (`info.matches::Vector{MethodMatchInfo}`).
+This info is illegal on any statement that is not a call to a generic function.
 """
 struct UnionSplitInfo
     matches::Vector{MethodMatchInfo}
 end
 
+nmatches(info::MethodMatchInfo) = length(info.results)
+function nmatches(info::UnionSplitInfo)
+    n = 0
+    for mminfo in info.matches
+        n += nmatches(mminfo)
+    end
+    return n
+end
+
+struct ConstPropResult
+    result::InferenceResult
+end
+
+struct ConcreteResult
+    mi::MethodInstance
+    effects::Effects
+    result
+    ConcreteResult(mi::MethodInstance, effects::Effects) = new(mi, effects)
+    ConcreteResult(mi::MethodInstance, effects::Effects, @nospecialize val) = new(mi, effects, val)
+end
+
+const ConstResult = Union{ConstPropResult,ConcreteResult}
+
 """
-    struct CallMeta
+    info::ConstCallInfo
 
-A simple struct that captures both the return type (`rt`) and any additional information
-(`info`) for a given generic call.
+The precision of this call was improved using constant information.
+In addition to the original call information `info.call`, this info also keeps the results
+of constant inference `info.results::Vector{Union{Nothing,ConstResult}}`.
 """
-struct CallMeta
-    rt::Any
-    info::Any
+struct ConstCallInfo
+    call::Union{MethodMatchInfo,UnionSplitInfo}
+    results::Vector{Union{Nothing,ConstResult}}
+end
+
+"""
+    info::MethodResultPure
+
+This struct represents a method result constant was proven to be
+effect-free, including being no-throw (typically because the value was computed
+by calling an `@pure` function).
+"""
+struct MethodResultPure
+    info::Union{MethodMatchInfo,UnionSplitInfo,Bool}
+end
+let instance = MethodResultPure(false)
+    global MethodResultPure
+    MethodResultPure() = instance
 end
 
 """
-    struct AbstractIterationInfo
+    info::AbstractIterationInfo
 
 Captures all the information for abstract iteration analysis of a single value.
-Each (abstract) call to `iterate`, corresponds to one entry in `each`.
+Each (abstract) call to `iterate`, corresponds to one entry in `info.each::Vector{CallMeta}`.
 """
 struct AbstractIterationInfo
     each::Vector{CallMeta}
 end
 
+const MaybeAbstractIterationInfo = Union{Nothing, AbstractIterationInfo}
+
 """
-    struct ApplyCallInfo
+    info::ApplyCallInfo
 
 This info applies to any call of `_apply_iterate(...)` and captures both the
 info of the actual call being applied and the info for any implicit call
 to the `iterate` function. Note that it is possible for the call itself
-to be yet another `_apply_iterate`, in which case the `.call` field will
+to be yet another `_apply_iterate`, in which case the `info.call` field will
 be another `ApplyCallInfo`. This info is illegal on any statement that is
 not an `_apply_iterate` call.
 """
@@ -67,15 +114,72 @@ struct ApplyCallInfo
     # The info for the call itself
     call::Any
     # AbstractIterationInfo for each argument, if applicable
-    arginfo::Vector{Union{Nothing, AbstractIterationInfo}}
+    arginfo::Vector{MaybeAbstractIterationInfo}
 end
 
 """
-    struct UnionSplitApplyCallInfo
+    info::UnionSplitApplyCallInfo
 
-Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than MethodMatchInfo.
+Like `UnionSplitInfo`, but for `ApplyCallInfo` rather than `MethodMatchInfo`.
 This info is illegal on any statement that is not an `_apply_iterate` call.
 """
 struct UnionSplitApplyCallInfo
     infos::Vector{ApplyCallInfo}
 end
+
+"""
+    info::InvokeCallInfo
+
+Represents a resolved call to `Core.invoke`, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
+struct InvokeCallInfo
+    match::MethodMatch
+    result::Union{Nothing,ConstResult}
+end
+
+"""
+    info::OpaqueClosureCallInfo
+
+Represents a resolved call of opaque closure, carrying the `info.match::MethodMatch` of
+the method that has been processed.
+Optionally keeps `info.result::InferenceResult` that keeps constant information.
+"""
+struct OpaqueClosureCallInfo
+    match::MethodMatch
+    result::Union{Nothing,ConstResult}
+end
+
+"""
+    info::OpaqueClosureCreateInfo
+
+This info may be constructed upon opaque closure construction, with `info.unspec::CallMeta`
+carrying out inference result of an unreal, partially specialized call (i.e. specialized on
+the closure environment, but not on the argument types of the opaque closure) in order to
+allow the optimizer to rewrite the return type parameter of the `OpaqueClosure` based on it.
+"""
+struct OpaqueClosureCreateInfo
+    unspec::CallMeta
+    function OpaqueClosureCreateInfo(unspec::CallMeta)
+        @assert isa(unspec.info, OpaqueClosureCallInfo)
+        return new(unspec)
+    end
+end
+
+# Stmt infos that are used by external consumers, but not by optimization.
+# These are not produced by default and must be explicitly opted into by
+# the AbstractInterpreter.
+
+"""
+    info::ReturnTypeCallInfo
+
+Represents a resolved call of `Core.Compiler.return_type`.
+`info.call` wraps the info corresponding to the call that `Core.Compiler.return_type` call
+was supposed to analyze.
+"""
+struct ReturnTypeCallInfo
+    info::Any
+end
+
+@specialize
diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl
index cbf8e4f0f591de..e6625e2d559259 100644
--- a/base/compiler/tfuncs.jl
+++ b/base/compiler/tfuncs.jl
@@ -10,7 +10,7 @@ const _NAMEDTUPLE_NAME = NamedTuple.body.body.name
 
 const INT_INF = typemax(Int) # integer infinity
 
-const N_IFUNC = reinterpret(Int32, arraylen) + 1
+const N_IFUNC = reinterpret(Int32, have_fma) + 1
 const T_IFUNC = Vector{Tuple{Int, Int, Any}}(undef, N_IFUNC)
 const T_IFUNC_COST = Vector{Int}(undef, N_IFUNC)
 const T_FFUNC_KEY = Vector{Any}()
@@ -24,16 +24,7 @@ function find_tfunc(@nospecialize f)
     end
 end
 
-const DATATYPE_NAME_FIELDINDEX = fieldindex(DataType, :name)
-const DATATYPE_PARAMETERS_FIELDINDEX = fieldindex(DataType, :parameters)
 const DATATYPE_TYPES_FIELDINDEX = fieldindex(DataType, :types)
-const DATATYPE_SUPER_FIELDINDEX = fieldindex(DataType, :super)
-const DATATYPE_MUTABLE_FIELDINDEX = fieldindex(DataType, :mutable)
-const DATATYPE_INSTANCE_FIELDINDEX = fieldindex(DataType, :instance)
-
-const TYPENAME_NAME_FIELDINDEX = fieldindex(Core.TypeName, :name)
-const TYPENAME_MODULE_FIELDINDEX = fieldindex(Core.TypeName, :module)
-const TYPENAME_WRAPPER_FIELDINDEX = fieldindex(Core.TypeName, :wrapper)
 
 ##########
 # tfuncs #
@@ -58,40 +49,55 @@ end
 add_tfunc(throw, 1, 1, (@nospecialize(x)) -> Bottom, 0)
 
 # the inverse of typeof_tfunc
-# returns (type, isexact)
+# returns (type, isexact, isconcrete, istype)
 # if isexact is false, the actual runtime type may (will) be a subtype of t
+# if isconcrete is true, the actual runtime type is definitely concrete (unreachable if not valid as a typeof)
+# if istype is true, the actual runtime value will definitely be a type (e.g. this is false for Union{Type{Int}, Int})
 function instanceof_tfunc(@nospecialize(t))
     if isa(t, Const)
-        if isa(t.val, Type)
-            return t.val, true
+        if isa(t.val, Type) && valid_as_lattice(t.val)
+            return t.val, true, isconcretetype(t.val), true
         end
-        return Bottom, true
+        return Bottom, true, false, false # runtime throws on non-Type
     end
     t = widenconst(t)
-    if t === Bottom || t === typeof(Bottom) || typeintersect(t, Type) === Bottom
-        return Bottom, true
+    if t === Bottom
+        return Bottom, true, true, false # runtime unreachable
+    elseif t === typeof(Bottom) || !hasintersect(t, Type)
+        return Bottom, true, false, false # literal Bottom or non-Type
     elseif isType(t)
         tp = t.parameters[1]
-        return tp, !has_free_typevars(tp)
+        valid_as_lattice(tp) || return Bottom, true, false, false # runtime unreachable / throws on non-Type
+        return tp, !has_free_typevars(tp), isconcretetype(tp), true
     elseif isa(t, UnionAll)
         t′ = unwrap_unionall(t)
-        t′′, isexact = instanceof_tfunc(t′)
+        t′′, isexact, isconcrete, istype = instanceof_tfunc(t′)
         tr = rewrap_unionall(t′′, t)
-        if t′′ isa DataType && !has_free_typevars(tr)
+        if t′′ isa DataType && t′′.name !== Tuple.name && !has_free_typevars(tr)
             # a real instance must be within the declared bounds of the type,
             # so we can intersect with the original wrapper.
             tr = typeintersect(tr, t′′.name.wrapper)
+            isconcrete = !isabstracttype(t′′)
+            if tr === Union{}
+                # runtime unreachable (our inference Type{T} where S is
+                # uninhabited with any runtime T that exists)
+                isexact = true
+            end
         end
-        return tr, isexact
+        return tr, isexact, isconcrete, istype
     elseif isa(t, Union)
-        ta, isexact_a = instanceof_tfunc(t.a)
-        tb, isexact_b = instanceof_tfunc(t.b)
-        ta === Union{} && return tb, isexact_b
-        tb === Union{} && return ta, isexact_a
-        ta == tb && return ta, isexact_a && isexact_b
-        return Union{ta, tb}, false # at runtime, will be exactly one of these
-    end
-    return Any, false
+        ta, isexact_a, isconcrete_a, istype_a = instanceof_tfunc(t.a)
+        tb, isexact_b, isconcrete_b, istype_b = instanceof_tfunc(t.b)
+        isconcrete = isconcrete_a && isconcrete_b
+        istype = istype_a && istype_b
+        # most users already handle the Union case, so here we assume that
+        # `isexact` only cares about the answers where there's actually a Type
+        # (and assuming other cases causing runtime errors)
+        ta === Union{} && return tb, isexact_b, isconcrete, istype
+        tb === Union{} && return ta, isexact_a, isconcrete, istype
+        return Union{ta, tb}, false, isconcrete, istype # at runtime, will be exactly one of these
+    end
+    return Any, false, false, false
 end
 bitcast_tfunc(@nospecialize(t), @nospecialize(x)) = instanceof_tfunc(t)[1]
 math_tfunc(@nospecialize(x)) = widenconst(x)
@@ -176,7 +182,6 @@ add_tfunc(ne_float, 2, 2, cmp_tfunc, 2)
 add_tfunc(lt_float, 2, 2, cmp_tfunc, 2)
 add_tfunc(le_float, 2, 2, cmp_tfunc, 2)
 add_tfunc(fpiseq, 2, 2, cmp_tfunc, 1)
-add_tfunc(fpislt, 2, 2, cmp_tfunc, 1)
 add_tfunc(eq_float_fast, 2, 2, cmp_tfunc, 1)
 add_tfunc(ne_float_fast, 2, 2, cmp_tfunc, 1)
 add_tfunc(lt_float_fast, 2, 2, cmp_tfunc, 1)
@@ -197,6 +202,7 @@ cglobal_tfunc(@nospecialize(fptr)) = Ptr{Cvoid}
 cglobal_tfunc(@nospecialize(fptr), @nospecialize(t)) = (isType(t) ? Ptr{t.parameters[1]} : Ptr)
 cglobal_tfunc(@nospecialize(fptr), t::Const) = (isa(t.val, Type) ? Ptr{t.val} : Ptr)
 add_tfunc(Core.Intrinsics.cglobal, 1, 2, cglobal_tfunc, 5)
+add_tfunc(Core.Intrinsics.have_fma, 1, 1, @nospecialize(x)->Bool, 1)
 
 function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
     if isa(cnd, Const)
@@ -214,7 +220,7 @@ function ifelse_tfunc(@nospecialize(cnd), @nospecialize(x), @nospecialize(y))
     end
     return tmerge(x, y)
 end
-add_tfunc(ifelse, 3, 3, ifelse_tfunc, 1)
+add_tfunc(Core.ifelse, 3, 3, ifelse_tfunc, 1)
 
 function egal_tfunc(@nospecialize(x), @nospecialize(y))
     xx = widenconditional(x)
@@ -229,7 +235,7 @@ function egal_tfunc(@nospecialize(x), @nospecialize(y))
         return Const(false)
     elseif isa(xx, Const) && isa(yy, Const)
         return Const(xx.val === yy.val)
-    elseif typeintersect(widenconst(xx), widenconst(yy)) === Bottom
+    elseif !hasintersect(widenconst(xx), widenconst(yy))
         return Const(false)
     elseif (isa(xx, Const) && y === typeof(xx.val) && isdefined(y, :instance)) ||
            (isa(yy, Const) && x === typeof(yy.val) && isdefined(x, :instance))
@@ -241,10 +247,11 @@ add_tfunc(===, 2, 2, egal_tfunc, 1)
 
 function isdefined_nothrow(argtypes::Array{Any, 1})
     length(argtypes) == 2 || return false
-    return typeintersect(widenconst(argtypes[1]), Module) === Union{} ?
-        (argtypes[2] ⊑ Symbol || argtypes[2] ⊑ Int) :
-         argtypes[2] ⊑ Symbol
+    return hasintersect(widenconst(argtypes[1]), Module) ?
+           argtypes[2] ⊑ Symbol :
+           (argtypes[2] ⊑ Symbol || argtypes[2] ⊑ Int)
 end
+isdefined_tfunc(arg1, sym, order) = (@nospecialize; isdefined_tfunc(arg1, sym))
 function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
     if isa(arg1, Const)
         a1 = typeof(arg1.val)
@@ -255,10 +262,11 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
         return Bool
     end
     a1 = unwrap_unionall(a1)
-    if isa(a1, DataType) && !a1.abstract
+    if isa(a1, DataType) && !isabstracttype(a1)
         if a1 === Module
-            Symbol <: widenconst(sym) || return Bottom
-            if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) && isdefined(arg1.val, sym.val)
+            hasintersect(widenconst(sym), Symbol) || return Bottom
+            if isa(sym, Const) && isa(sym.val, Symbol) && isa(arg1, Const) &&
+               isdefined(arg1.val::Module, sym.val::Symbol)
                 return Const(true)
             end
         elseif isa(sym, Const)
@@ -270,62 +278,76 @@ function isdefined_tfunc(@nospecialize(arg1), @nospecialize(sym))
             else
                 return Bottom
             end
-            if 1 <= idx <= a1.ninitialized
+            if 1 <= idx <= datatype_min_ninitialized(a1)
                 return Const(true)
             elseif a1.name === _NAMEDTUPLE_NAME
                 if isconcretetype(a1)
                     return Const(false)
+                else
+                    ns = a1.parameters[1]
+                    if isa(ns, Tuple)
+                        return Const(1 <= idx <= length(ns))
+                    end
                 end
             elseif idx <= 0 || (!isvatuple(a1) && idx > fieldcount(a1))
                 return Const(false)
-            elseif !isvatuple(a1) && isbitstype(fieldtype(a1, idx))
-                return Const(true)
             elseif isa(arg1, Const)
                 arg1v = (arg1::Const).val
-                if !ismutable(arg1v) || isdefined(arg1v, idx) || (isa(arg1v, DataType) && is_dt_const_field(idx))
+                if !ismutable(arg1v) || isdefined(arg1v, idx) || isconst(typeof(arg1v), idx)
                     return Const(isdefined(arg1v, idx))
                 end
+            elseif !isvatuple(a1)
+                fieldT = fieldtype(a1, idx)
+                if isa(fieldT, DataType) && isbitstype(fieldT)
+                    return Const(true)
+                end
             end
         end
+    elseif isa(a1, Union)
+        return tmerge(isdefined_tfunc(a1.a, sym),
+                      isdefined_tfunc(a1.b, sym))
     end
     return Bool
 end
-add_tfunc(isdefined, 2, 2, isdefined_tfunc, 1)
+add_tfunc(isdefined, 2, 3, isdefined_tfunc, 1)
 
 function sizeof_nothrow(@nospecialize(x))
     if isa(x, Const)
-        x = x.val
-        if !isa(x, Type) || x === DataType
+        if !isa(x.val, Type) || x.val === DataType
             return true
         end
     elseif isa(x, Conditional)
         return true
     end
-    if isa(x, Union)
-        return sizeof_nothrow(x.a) && sizeof_nothrow(x.b)
+    xu = unwrap_unionall(x)
+    if isa(xu, Union)
+        return sizeof_nothrow(rewrap_unionall(xu.a, x)) &&
+               sizeof_nothrow(rewrap_unionall(xu.b, x))
     end
-    t, exact = instanceof_tfunc(x)
-    if !exact
-        # Could always be bottom at runtime, which throws
-        return false
-    end
-    if t !== Bottom
-        t === DataType && return true
-        x = t
-        x = unwrap_unionall(x)
-        if isa(x, Union)
-            isinline, sz, _ = uniontype_layout(x)
-            return isinline
-        end
-        isa(x, DataType) || return false
-        x.layout == C_NULL && return false
-        (datatype_nfields(x) == 0 && !datatype_pointerfree(x)) && return false
-        return true
-    else
+    t, exact, isconcrete = instanceof_tfunc(x)
+    if t === Bottom
+        # x must be an instance (not a Type) or is the Bottom type object
         x = widenconst(x)
-        x === DataType && return false
-        return isconcretetype(x) || isprimitivetype(x)
+        return !hasintersect(x, Type)
+    end
+    x = unwrap_unionall(t)
+    if isconcrete
+        if isa(x, DataType) && x.layout != C_NULL
+            # there's just a few concrete types with an opaque layout
+            (datatype_nfields(x) == 0 && !datatype_pointerfree(x)) && return false
+        end
+        return true # these must always have a size of these
+    end
+    exact || return false # Could always be the type Bottom at runtime, for example, which throws
+    t === DataType && return true # DataType itself has a size
+    if isa(x, Union)
+        isinline = uniontype_layout(x)[1]
+        return isinline # even any subset of this union would have a size
     end
+    isa(x, DataType) || return false
+    x.layout == C_NULL && return false
+    (datatype_nfields(x) == 0 && !datatype_pointerfree(x)) && return false # is-layout-opaque
+    return true
 end
 
 function _const_sizeof(@nospecialize(x))
@@ -346,8 +368,10 @@ function sizeof_tfunc(@nospecialize(x),)
     isa(x, Const) && return _const_sizeof(x.val)
     isa(x, Conditional) && return _const_sizeof(Bool)
     isconstType(x) && return _const_sizeof(x.parameters[1])
-    if isa(x, Union)
-        return tmerge(sizeof_tfunc(x.a), sizeof_tfunc(x.b))
+    xu = unwrap_unionall(x)
+    if isa(xu, Union)
+        return tmerge(sizeof_tfunc(rewrap_unionall(xu.a, x)),
+                      sizeof_tfunc(rewrap_unionall(xu.b, x)))
     end
     # Core.sizeof operates on either a type or a value. First check which
     # case we're in.
@@ -356,9 +380,9 @@ function sizeof_tfunc(@nospecialize(x),)
         # The value corresponding to `x` at runtime could be a type.
         # Normalize the query to ask about that type.
         x = unwrap_unionall(t)
-        if isa(x, Union)
-            isinline, sz, _ = uniontype_layout(x)
-            return isinline ? Const(Int(sz)) : (exact ? Bottom : Int)
+        if exact && isa(x, Union)
+            isinline = uniontype_layout(x)[1]
+            return isinline ? Const(Int(Core.sizeof(x))) : Bottom
         end
         isa(x, DataType) || return Int
         (isconcretetype(x) || isprimitivetype(x)) && return _const_sizeof(x)
@@ -375,7 +399,7 @@ function nfields_tfunc(@nospecialize(x))
     isa(x, Conditional) && return Const(0)
     x = unwrap_unionall(widenconst(x))
     isconstType(x) && return Const(nfields(x.parameters[1]))
-    if isa(x, DataType) && !x.abstract
+    if isa(x, DataType) && !isabstracttype(x)
         if !(x.name === Tuple.name && isvatuple(x)) &&
            !(x.name === _NAMEDTUPLE_NAME && !isconcretetype(x))
             return Const(isdefined(x, :types) ? length(x.types) : length(x.name.names))
@@ -396,7 +420,8 @@ function typevar_tfunc(@nospecialize(n), @nospecialize(lb_arg), @nospecialize(ub
     ub = Any
     ub_certain = lb_certain = true
     if isa(n, Const)
-        isa(n.val, Symbol) || return Union{}
+        nval = n.val
+        isa(nval, Symbol) || return Union{}
         if isa(lb_arg, Const)
             lb = lb_arg.val
         elseif isType(lb_arg)
@@ -413,7 +438,7 @@ function typevar_tfunc(@nospecialize(n), @nospecialize(lb_arg), @nospecialize(ub
         else
             return TypeVar
         end
-        tv = TypeVar(n.val, lb, ub)
+        tv = TypeVar(nval, lb, ub)
         return PartialTypeVar(tv, lb_certain, ub_certain)
     end
     return TypeVar
@@ -422,9 +447,7 @@ function typebound_nothrow(b)
     b = widenconst(b)
     (b ⊑ TypeVar) && return true
     if isType(b)
-        b = unwrap_unionall(b.parameters[1])
-        b === Union{} && return true
-        return !isa(b, DataType) || b.name != _va_typename
+        return true
     end
     return false
 end
@@ -437,26 +460,105 @@ end
 add_tfunc(Core._typevar, 3, 3, typevar_tfunc, 100)
 add_tfunc(applicable, 1, INT_INF, (@nospecialize(f), args...)->Bool, 100)
 add_tfunc(Core.Intrinsics.arraylen, 1, 1, @nospecialize(x)->Int, 4)
-add_tfunc(arraysize, 2, 2, (@nospecialize(a), @nospecialize(d))->Int, 4)
+
+function arraysize_tfunc(@nospecialize(ary), @nospecialize(dim))
+    hasintersect(widenconst(ary), Array) || return Bottom
+    hasintersect(widenconst(dim), Int) || return Bottom
+    return Int
+end
+add_tfunc(arraysize, 2, 2, arraysize_tfunc, 4)
+
+function arraysize_nothrow(argtypes::Vector{Any})
+    length(argtypes) == 2 || return false
+    ary = argtypes[1]
+    dim = argtypes[2]
+    ary ⊑ Array || return false
+    if isa(dim, Const)
+        dimval = dim.val
+        return isa(dimval, Int) && dimval > 0
+    end
+    return false
+end
+
+struct MemoryOrder x::Cint end
+const MEMORY_ORDER_UNSPECIFIED = MemoryOrder(-2)
+const MEMORY_ORDER_INVALID     = MemoryOrder(-1)
+const MEMORY_ORDER_NOTATOMIC   = MemoryOrder(0)
+const MEMORY_ORDER_UNORDERED   = MemoryOrder(1)
+const MEMORY_ORDER_MONOTONIC   = MemoryOrder(2)
+const MEMORY_ORDER_CONSUME     = MemoryOrder(3)
+const MEMORY_ORDER_ACQUIRE     = MemoryOrder(4)
+const MEMORY_ORDER_RELEASE     = MemoryOrder(5)
+const MEMORY_ORDER_ACQ_REL     = MemoryOrder(6)
+const MEMORY_ORDER_SEQ_CST     = MemoryOrder(7)
+
+function get_atomic_order(order::Symbol, loading::Bool, storing::Bool)
+    if order === :not_atomic
+        return MEMORY_ORDER_NOTATOMIC
+    elseif order === :unordered && (loading ⊻ storing)
+        return MEMORY_ORDER_UNORDERED
+    elseif order === :monotonic && (loading | storing)
+        return MEMORY_ORDER_MONOTONIC
+    elseif order === :acquire && loading
+        return MEMORY_ORDER_ACQUIRE
+    elseif order === :release && storing
+        return MEMORY_ORDER_RELEASE
+    elseif order === :acquire_release && (loading & storing)
+        return MEMORY_ORDER_ACQ_REL
+    elseif order === :sequentially_consistent
+        return MEMORY_ORDER_SEQ_CST
+    end
+    return MEMORY_ORDER_INVALID
+end
+
 function pointer_eltype(@nospecialize(ptr))
     a = widenconst(ptr)
-    if a <: Ptr
-        if isa(a,DataType) && isa(a.parameters[1],Type)
-            return a.parameters[1]
-        elseif isa(a,UnionAll) && !has_free_typevars(a)
-            unw = unwrap_unionall(a)
-            if isa(unw,DataType)
-                return rewrap_unionall(unw.parameters[1], a)
-            end
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(T, a)
         end
     end
     return Any
 end
-add_tfunc(pointerref, 3, 3,
-          function (@nospecialize(a), @nospecialize(i), @nospecialize(align))
-            return pointer_eltype(a)
-          end, 4)
-add_tfunc(pointerset, 4, 4, (@nospecialize(a), @nospecialize(v), @nospecialize(i), @nospecialize(align)) -> a, 5)
+function atomic_pointermodify_tfunc(ptr, op, v, order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            # note: we could sometimes refine this to a PartialStruct if we analyzed `op(T, T)::T`
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(Pair{T, T}, a)
+        end
+    end
+    return Pair
+end
+function atomic_pointerreplace_tfunc(ptr, x, v, success_order, failure_order)
+    @nospecialize
+    a = widenconst(ptr)
+    if !has_free_typevars(a)
+        unw = unwrap_unionall(a)
+        if isa(unw, DataType) && unw.name === Ptr.body.name
+            T = unw.parameters[1]
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(ccall(:jl_apply_cmpswap_type, Any, (Any,), T), a)
+        end
+    end
+    return ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+end
+add_tfunc(pointerref, 3, 3, (a, i, align) -> (@nospecialize; pointer_eltype(a)), 4)
+add_tfunc(pointerset, 4, 4, (a, v, i, align) -> (@nospecialize; a), 5)
+add_tfunc(atomic_fence, 1, 1, (order) -> (@nospecialize; Nothing), 4)
+add_tfunc(atomic_pointerref, 2, 2, (a, order) -> (@nospecialize; pointer_eltype(a)), 4)
+add_tfunc(atomic_pointerset, 3, 3, (a, v, order) -> (@nospecialize; a), 5)
+add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_eltype(a)), 5)
+add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5)
+add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5)
+add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0)
 
 # more accurate typeof_tfunc for vararg tuples abstract only in length
 function typeof_concrete_vararg(t::DataType)
@@ -464,9 +566,8 @@ function typeof_concrete_vararg(t::DataType)
     for i = 1:np
         p = t.parameters[i]
         if i == np && isvarargtype(p)
-            pp = unwrap_unionall(p)
-            if isconcretetype(pp.parameters[1]) && pp.parameters[2] isa TypeVar
-                return rewrap_unionall(Type{Tuple{t.parameters[1:np-1]..., pp}}, p)
+            if isdefined(p, :T) && !isdefined(p, :N) && isconcretetype(p.T)
+                return Type{Tuple{t.parameters[1:np-1]..., Vararg{p.T, N}}} where N
             end
         elseif !isconcretetype(p)
             break
@@ -496,14 +597,12 @@ function typeof_tfunc(@nospecialize(t))
             return Type{<:t}
         end
     elseif isa(t, Union)
-        a = widenconst(typeof_tfunc(t.a))
-        b = widenconst(typeof_tfunc(t.b))
+        a = widenconst(_typeof_tfunc(t.a))
+        b = widenconst(_typeof_tfunc(t.b))
         return Union{a, b}
-    elseif isa(t, TypeVar) && !(Any === t.ub)
-        return typeof_tfunc(t.ub)
     elseif isa(t, UnionAll)
         u = unwrap_unionall(t)
-        if isa(u, DataType) && !u.abstract
+        if isa(u, DataType) && !isabstracttype(u)
             if u.name === Tuple.name
                 uu = typeof_concrete_vararg(u)
                 if uu !== nothing
@@ -517,43 +616,19 @@ function typeof_tfunc(@nospecialize(t))
     end
     return DataType # typeof(anything)::DataType
 end
-add_tfunc(typeof, 1, 1, typeof_tfunc, 0)
-
-function typeassert_type_instance(@nospecialize(v), @nospecialize(t))
-    if isa(v, Const)
-        if !has_free_typevars(t) && !isa(v.val, t)
-            return Bottom
-        end
-        return v
-    elseif isa(v, PartialStruct)
-        has_free_typevars(t) && return v
-        widev = widenconst(v)
-        if widev <: t
-            return v
-        elseif typeintersect(widev, t) === Bottom
-            return Bottom
-        end
-        @assert widev <: Tuple
-        new_fields = Vector{Any}(undef, length(v.fields))
-        for i = 1:length(new_fields)
-            new_fields[i] = typeassert_type_instance(v.fields[i], getfield_tfunc(t, Const(i)))
-            if new_fields[i] === Bottom
-                return Bottom
-            end
-        end
-        return tuple_tfunc(new_fields)
-    elseif isa(v, Conditional)
-        if !(Bool <: t)
-            return Bottom
-        end
-        return v
+# helper function of `typeof_tfunc`, which accepts `TypeVar`
+function _typeof_tfunc(@nospecialize(t))
+    if isa(t, TypeVar)
+        return t.ub !== Any ? _typeof_tfunc(t.ub) : DataType
     end
-    return typeintersect(widenconst(v), t)
+    return typeof_tfunc(t)
 end
+add_tfunc(typeof, 1, 1, typeof_tfunc, 1)
+
 function typeassert_tfunc(@nospecialize(v), @nospecialize(t))
     t = instanceof_tfunc(t)[1]
     t === Any && return v
-    return typeassert_type_instance(v, t)
+    return tmeet(v, t)
 end
 add_tfunc(typeassert, 2, 2, typeassert_tfunc, 4)
 
@@ -562,9 +637,7 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
     if t === Bottom
         # check if t could be equivalent to typeof(Bottom), since that's valid in `isa`, but the set of `v` is empty
         # if `t` cannot have instances, it's also invalid on the RHS of isa
-        if typeintersect(widenconst(tt), Type) === Union{}
-            return Union{}
-        end
+        hasintersect(widenconst(tt), Type) || return Union{}
         return Const(false)
     end
     if !has_free_typevars(t)
@@ -580,7 +653,7 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
             end
             v = widenconst(v)
             isdispatchelem(v) && return Const(false)
-            if typeintersect(v, t) === Bottom
+            if !hasintersect(v, t)
                 # similar to `isnotbrokensubtype` check above, `typeintersect(v, t)`
                 # can't be trusted for kind types so we do an extra check here
                 if !iskindtype(v)
@@ -592,7 +665,7 @@ function isa_tfunc(@nospecialize(v), @nospecialize(tt))
     # TODO: handle non-leaftype(t) by testing against lower and upper bounds
     return Bool
 end
-add_tfunc(isa, 2, 2, isa_tfunc, 0)
+add_tfunc(isa, 2, 2, isa_tfunc, 1)
 
 function subtype_tfunc(@nospecialize(a), @nospecialize(b))
     a, isexact_a = instanceof_tfunc(a)
@@ -603,31 +676,14 @@ function subtype_tfunc(@nospecialize(a), @nospecialize(b))
                 return Const(true)
             end
         else
-            if isexact_a || (b !== Bottom && typeintersect(a, b) === Union{})
+            if isexact_a || (b !== Bottom && !hasintersect(a, b))
                 return Const(false)
             end
         end
     end
     return Bool
 end
-add_tfunc(<:, 2, 2, subtype_tfunc, 0)
-
-is_dt_const_field(fld::Int) = (
-     fld == DATATYPE_NAME_FIELDINDEX ||
-     fld == DATATYPE_PARAMETERS_FIELDINDEX ||
-     fld == DATATYPE_TYPES_FIELDINDEX ||
-     fld == DATATYPE_SUPER_FIELDINDEX ||
-     fld == DATATYPE_MUTABLE_FIELDINDEX ||
-     fld == DATATYPE_INSTANCE_FIELDINDEX
-    )
-function const_datatype_getfield_tfunc(@nospecialize(sv), fld::Int)
-    if fld == DATATYPE_INSTANCE_FIELDINDEX
-        return isdefined(sv, fld) ? Const(getfield(sv, fld)) : Union{}
-    elseif is_dt_const_field(fld) && isdefined(sv, fld)
-        return Const(getfield(sv, fld))
-    end
-    return nothing
-end
+add_tfunc(<:, 2, 2, subtype_tfunc, 10)
 
 function fieldcount_noerror(@nospecialize t)
     if t isa UnionAll || t isa Union
@@ -636,7 +692,7 @@ function fieldcount_noerror(@nospecialize t)
             return nothing
         end
         t = t::DataType
-    elseif t == Union{}
+    elseif t === Union{}
         return 0
     end
     if !(t isa DataType)
@@ -652,7 +708,7 @@ function fieldcount_noerror(@nospecialize t)
         end
         abstr = true
     else
-        abstr = t.abstract || (t.name === Tuple.name && isvatuple(t))
+        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
     end
     if abstr
         return nothing
@@ -665,7 +721,8 @@ function try_compute_fieldidx(typ::DataType, @nospecialize(field))
     if isa(field, Symbol)
         field = fieldindex(typ, field, false)
         field == 0 && return nothing
-    elseif isa(field, Integer)
+    elseif isa(field, Int)
+        # Numerical field name can only be of type `Int`
         max_fields = fieldcount_noerror(typ)
         max_fields === nothing && return nothing
         (1 <= field <= max_fields) || return nothing
@@ -675,15 +732,36 @@ function try_compute_fieldidx(typ::DataType, @nospecialize(field))
     return field
 end
 
+function getfield_boundscheck(argtypes::Vector{Any}) # ::Union{Bool, Nothing, Type{Bool}}
+    if length(argtypes) == 2
+        boundscheck = Bool
+    elseif length(argtypes) == 3
+        boundscheck = argtypes[3]
+        if boundscheck === Const(:not_atomic) # TODO: this is assuming not atomic
+            boundscheck = Bool
+        end
+    elseif length(argtypes) == 4
+        boundscheck = argtypes[4]
+    else
+        return nothing
+    end
+    widenconst(boundscheck) !== Bool && return nothing
+    boundscheck = widenconditional(boundscheck)
+    if isa(boundscheck, Const)
+        return boundscheck.val
+    else
+        return Bool
+    end
+end
+
 function getfield_nothrow(argtypes::Vector{Any})
-    2 <= length(argtypes) <= 3 || return false
-    length(argtypes) == 2 && return getfield_nothrow(argtypes[1], argtypes[2], Const(true))
-    return getfield_nothrow(argtypes[1], argtypes[2], argtypes[3])
-end
-function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize(inbounds))
-    bounds_check_disabled = isa(inbounds, Const) && inbounds.val === false
-    # If we don't have invounds and don't know the field, don't even bother
-    if !bounds_check_disabled
+    boundscheck = getfield_boundscheck(argtypes)
+    boundscheck === nothing && return false
+    return getfield_nothrow(argtypes[1], argtypes[2], !(boundscheck === false))
+end
+function getfield_nothrow(@nospecialize(s00), @nospecialize(name), boundscheck::Bool)
+    # If we don't have boundscheck and don't know the field, don't even bother
+    if boundscheck
         isa(name, Const) || return false
     end
 
@@ -695,13 +773,14 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize
             sv = s00.val
         end
         if isa(name, Const)
-            if !isa(name.val, Symbol)
+            nval = name.val
+            if !isa(nval, Symbol)
                 isa(sv, Module) && return false
-                isa(name.val, Int) || return false
+                isa(nval, Int) || return false
             end
-            return isdefined(sv, name.val)
+            return isdefined(sv, nval)
         end
-        if bounds_check_disabled && !isa(sv, Module)
+        if !boundscheck && !isa(sv, Module)
             # If bounds checking is disabled and all fields are assigned,
             # we may assume that we don't throw
             for i = 1:fieldcount(typeof(sv))
@@ -712,101 +791,113 @@ function getfield_nothrow(@nospecialize(s00), @nospecialize(name), @nospecialize
         return false
     end
 
-    s = unwrap_unionall(widenconst(s00))
+    s0 = widenconst(s00)
+    s = unwrap_unionall(s0)
     if isa(s, Union)
-        return getfield_nothrow(rewrap(s.a, s00), name, inbounds) &&
-            getfield_nothrow(rewrap(s.b, s00), name, inbounds)
+        return getfield_nothrow(rewrap_unionall(s.a, s00), name, boundscheck) &&
+               getfield_nothrow(rewrap_unionall(s.b, s00), name, boundscheck)
     elseif isa(s, DataType)
         # Can't say anything about abstract types
-        s.abstract && return false
+        isabstracttype(s) && return false
+        s.name.atomicfields == C_NULL || return false # TODO: currently we're only testing for ordering == :not_atomic
         # If all fields are always initialized, and bounds check is disabled, we can assume
         # we don't throw
-        if bounds_check_disabled && !isvatuple(s) && s.name !== NamedTuple.body.body.name && fieldcount(s) == s.ninitialized
+        if !boundscheck && s.name.n_uninitialized == 0
             return true
         end
         # Else we need to know what the field is
         isa(name, Const) || return false
         field = try_compute_fieldidx(s, name.val)
         field === nothing && return false
-        field <= s.ninitialized && return true
+        field <= datatype_min_ninitialized(s) && return true
+        # `try_compute_fieldidx` already check for field index bound.
+        !isvatuple(s) && isbitstype(fieldtype(s0, field)) && return true
     end
 
     return false
 end
 
-getfield_tfunc(@nospecialize(s00), @nospecialize(name), @nospecialize(inbounds)) =
-    getfield_tfunc(s00, name)
-function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
-    s = unwrap_unionall(s00)
-    if isa(s, Union)
-        return tmerge(getfield_tfunc(rewrap(s.a,s00), name),
-                      getfield_tfunc(rewrap(s.b,s00), name))
-    elseif isa(s, Conditional)
+function getfield_tfunc(s00, name, boundscheck_or_order)
+    @nospecialize
+    t = isvarargtype(boundscheck_or_order) ? unwrapva(boundscheck_or_order) :
+        widenconst(boundscheck_or_order)
+    hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
+    return getfield_tfunc(s00, name)
+end
+function getfield_tfunc(s00, name, order, boundscheck)
+    @nospecialize
+    hasintersect(widenconst(order), Symbol) || return Bottom
+    if isvarargtype(boundscheck)
+        t = unwrapva(boundscheck)
+        hasintersect(t, Symbol) || hasintersect(t, Bool) || return Bottom
+    else
+        hasintersect(widenconst(boundscheck), Bool) || return Bottom
+    end
+    return getfield_tfunc(s00, name)
+end
+getfield_tfunc(@nospecialize(s00), @nospecialize(name)) = _getfield_tfunc(s00, name, false)
+function _getfield_tfunc(@nospecialize(s00), @nospecialize(name), setfield::Bool)
+    if isa(s00, Conditional)
         return Bottom # Bool has no fields
-    elseif isa(s, Const) || isconstType(s)
-        if !isa(s, Const)
-            sv = s.parameters[1]
+    elseif isa(s00, Const) || isconstType(s00)
+        if !isa(s00, Const)
+            sv = s00.parameters[1]
         else
-            sv = s.val
+            sv = s00.val
         end
         if isa(name, Const)
             nv = name.val
-            if !(isa(nv,Symbol) || isa(nv,Int))
+            if isa(sv, Module)
+                setfield && return Bottom
+                if isa(nv, Symbol)
+                    return abstract_eval_global(sv, nv)
+                end
                 return Bottom
             end
-            if isa(sv, UnionAll)
-                if nv === :var || nv === 1
-                    return Const(sv.var)
-                elseif nv === :body || nv === 2
-                    return Const(sv.body)
-                end
-            elseif isa(sv, DataType)
-                idx = nv
-                if isa(idx, Symbol)
-                    idx = fieldindex(DataType, idx, false)
-                end
-                if isa(idx, Int)
-                    t = const_datatype_getfield_tfunc(sv, idx)
-                    t === nothing || return t
-                end
-            elseif isa(sv, Core.TypeName)
-                fld = isa(nv, Symbol) ? fieldindex(Core.TypeName, nv, false) : nv
-                if (fld == TYPENAME_NAME_FIELDINDEX ||
-                    fld == TYPENAME_MODULE_FIELDINDEX ||
-                    fld == TYPENAME_WRAPPER_FIELDINDEX)
-                    return Const(getfield(sv, fld))
-                end
+            if isa(nv, Symbol)
+                nv = fieldindex(typeof(sv), nv, false)
             end
-            if isa(sv, Module) && isa(nv, Symbol)
-                return abstract_eval_global(sv, nv)
+            if !isa(nv, Int)
+                return Bottom
             end
-            if (isa(sv, SimpleVector) || !ismutable(sv)) && isdefined(sv, nv)
+            if isa(sv, DataType) && nv == DATATYPE_TYPES_FIELDINDEX && isdefined(sv, nv)
                 return Const(getfield(sv, nv))
             end
+            if isconst(typeof(sv), nv)
+                if isdefined(sv, nv)
+                    return Const(getfield(sv, nv))
+                end
+                return Union{}
+            end
         end
         s = typeof(sv)
-    elseif isa(s, PartialStruct)
+    elseif isa(s00, PartialStruct)
+        s = widenconst(s00)
+        sty = unwrap_unionall(s)::DataType
         if isa(name, Const)
             nv = name.val
             if isa(nv, Symbol)
-                nv = fieldindex(widenconst(s), nv, false)
+                nv = fieldindex(sty, nv, false)
             end
-            if isa(nv, Int) && 1 <= nv <= length(s.fields)
-                return s.fields[nv]
+            if isa(nv, Int) && 1 <= nv <= length(s00.fields)
+                return unwrapva(s00.fields[nv])
             end
         end
-        s = widenconst(s)
+    else
+        s = unwrap_unionall(s00)
     end
-    if isType(s) || !isa(s, DataType) || s.abstract
-        return Any
+    if isa(s, Union)
+        return tmerge(_getfield_tfunc(rewrap_unionall(s.a, s00), name, setfield),
+                      _getfield_tfunc(rewrap_unionall(s.b, s00), name, setfield))
     end
-    if s <: Tuple && name ⊑ Symbol
+    isa(s, DataType) || return Any
+    isabstracttype(s) && return Any
+    if s <: Tuple && !(Int <: widenconst(name))
         return Bottom
     end
     if s <: Module
-        if name ⊑ Int
-            return Bottom
-        end
+        setfield && return Bottom
+        hasintersect(widenconst(name), Symbol) || return Bottom
         return Any
     end
     if s.name === _NAMEDTUPLE_NAME && !isconcretetype(s)
@@ -819,33 +910,36 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
         elseif Symbol ⊑ name
             name = Int
         end
-        _ts = s.parameters[2]
-        while isa(_ts, TypeVar)
-            _ts = _ts.ub
-        end
+        _ts = unwraptv(s.parameters[2])
         _ts = rewrap_unionall(_ts, s00)
         if !(_ts <: Tuple)
             return Any
         end
-        return getfield_tfunc(_ts, name)
+        return _getfield_tfunc(_ts, name, setfield)
     end
     ftypes = datatype_fieldtypes(s)
-    if isempty(ftypes)
+    nf = length(ftypes)
+    # If no value has this type, then this statement should be unreachable.
+    # Bail quickly now.
+    if !has_concrete_subtype(s) || nf == 0
         return Bottom
     end
     if isa(name, Conditional)
         return Bottom # can't index fields with Bool
     end
     if !isa(name, Const)
+        name = widenconst(name)
         if !(Int <: name || Symbol <: name)
             return Bottom
         end
-        if length(ftypes) == 1
+        if nf == 1
             return rewrap_unionall(unwrapva(ftypes[1]), s00)
         end
         # union together types of all fields
         t = Bottom
-        for _ft in ftypes
+        for i in 1:nf
+            _ft = ftypes[i]
+            setfield && isconst(s, i) && continue
             t = tmerge(t, rewrap_unionall(unwrapva(_ft), s00))
             t === Any && break
         end
@@ -858,23 +952,13 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
     if !isa(fld, Int)
         return Bottom
     end
-    nf = length(ftypes)
     if s <: Tuple && fld >= nf && isvarargtype(ftypes[nf])
         return rewrap_unionall(unwrapva(ftypes[nf]), s00)
     end
     if fld < 1 || fld > nf
         return Bottom
-    end
-    if isconstType(s00)
-        sp = s00.parameters[1]
-    elseif isa(s00, Const)
-        sp = s00.val
-    else
-        sp = nothing
-    end
-    if isa(sp, DataType)
-        t = const_datatype_getfield_tfunc(sp, fld)
-        t !== nothing && return t
+    elseif setfield && isconst(s, fld)
+        return Bottom
     end
     R = ftypes[fld]
     if isempty(s.parameters)
@@ -882,10 +966,127 @@ function getfield_tfunc(@nospecialize(s00), @nospecialize(name))
     end
     return rewrap_unionall(R, s00)
 end
-add_tfunc(getfield, 2, 3, getfield_tfunc, 1)
-add_tfunc(setfield!, 3, 3, (@nospecialize(o), @nospecialize(f), @nospecialize(v)) -> v, 3)
-fieldtype_tfunc(@nospecialize(s0), @nospecialize(name), @nospecialize(inbounds)) =
-    fieldtype_tfunc(s0, name)
+
+function setfield!_tfunc(o, f, v, order)
+    @nospecialize
+    if !isvarargtype(order)
+        hasintersect(widenconst(order), Symbol) || return Bottom
+    end
+    return setfield!_tfunc(o, f, v)
+end
+function setfield!_tfunc(o, f, v)
+    @nospecialize
+    mutability_errorcheck(o) || return Bottom
+    ft = _getfield_tfunc(o, f, true)
+    ft === Bottom && return Bottom
+    hasintersect(widenconst(v), widenconst(ft)) || return Bottom
+    return v
+end
+function mutability_errorcheck(@nospecialize obj)
+    objt0 = widenconst(obj)
+    objt = unwrap_unionall(objt0)
+    if isa(objt, Union)
+        return mutability_errorcheck(rewrap_unionall(objt.a, objt0)) ||
+               mutability_errorcheck(rewrap_unionall(objt.b, objt0))
+    elseif isa(objt, DataType)
+        # Can't say anything about abstract types
+        isabstracttype(objt) && return true
+        return ismutabletype(objt)
+    end
+    return true
+end
+
+function setfield!_nothrow(argtypes::Vector{Any})
+    if length(argtypes) == 4
+        order = argtypes[4]
+        order === Const(:not_atomic) || return false # currently setfield!_nothrow is assuming not atomic
+    else
+        length(argtypes) == 3 || return false
+    end
+    return setfield!_nothrow(argtypes[1], argtypes[2], argtypes[3])
+end
+function setfield!_nothrow(s00, name, v)
+    @nospecialize
+    s0 = widenconst(s00)
+    s = unwrap_unionall(s0)
+    if isa(s, Union)
+        return setfield!_nothrow(rewrap_unionall(s.a, s00), name, v) &&
+               setfield!_nothrow(rewrap_unionall(s.b, s00), name, v)
+    elseif isa(s, DataType)
+        # Can't say anything about abstract types
+        isabstracttype(s) && return false
+        ismutabletype(s) || return false
+        s.name.atomicfields == C_NULL || return false # TODO: currently we're only testing for ordering == :not_atomic
+        isa(name, Const) || return false
+        field = try_compute_fieldidx(s, name.val)
+        field === nothing && return false
+        # `try_compute_fieldidx` already check for field index bound.
+        isconst(s, field) && return false
+        v_expected = fieldtype(s0, field)
+        return v ⊑ v_expected
+    end
+    return false
+end
+
+swapfield!_tfunc(o, f, v, order) = (@nospecialize; getfield_tfunc(o, f))
+swapfield!_tfunc(o, f, v) = (@nospecialize; getfield_tfunc(o, f))
+modifyfield!_tfunc(o, f, op, v, order) = (@nospecialize; modifyfield!_tfunc(o, f, op, v))
+function modifyfield!_tfunc(o, f, op, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(Pair)
+    return instanceof_tfunc(apply_type_tfunc(PT, T, T))[1]
+end
+function abstract_modifyfield!(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState)
+    nargs = length(argtypes)
+    if !isempty(argtypes) && isvarargtype(argtypes[nargs])
+        nargs - 1 <= 6 || return CallMeta(Bottom, false)
+        nargs > 3 || return CallMeta(Any, false)
+    else
+        5 <= nargs <= 6 || return CallMeta(Bottom, false)
+    end
+    o = unwrapva(argtypes[2])
+    f = unwrapva(argtypes[3])
+    RT = modifyfield!_tfunc(o, f, Any, Any)
+    info = false
+    if nargs >= 5 && RT !== Bottom
+        # we may be able to refine this to a PartialStruct by analyzing `op(o.f, v)::T`
+        # as well as compute the info for the method matches
+        op = unwrapva(argtypes[4])
+        v = unwrapva(argtypes[5])
+        TF = getfield_tfunc(o, f)
+        push!(sv.ssavalue_uses[sv.currpc], sv.currpc) # temporarily disable `call_result_unused` check for this call
+        callinfo = abstract_call(interp, ArgInfo(nothing, Any[op, TF, v]), sv, #=max_methods=# 1)
+        pop!(sv.ssavalue_uses[sv.currpc], sv.currpc)
+        TF2 = tmeet(callinfo.rt, widenconst(TF))
+        if TF2 === Bottom
+            RT = Bottom
+        elseif isconcretetype(RT) && has_nontrivial_const_info(TF2) # isconcrete condition required to form a PartialStruct
+            RT = PartialStruct(RT, Any[TF, TF2])
+        end
+        info = callinfo.info
+    end
+    return CallMeta(RT, info)
+end
+replacefield!_tfunc(o, f, x, v, success_order, failure_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
+replacefield!_tfunc(o, f, x, v, success_order) = (@nospecialize; replacefield!_tfunc(o, f, x, v))
+function replacefield!_tfunc(o, f, x, v)
+    @nospecialize
+    T = _fieldtype_tfunc(o, isconcretetype(o), f)
+    T === Bottom && return Bottom
+    PT = Const(ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T)
+    return instanceof_tfunc(apply_type_tfunc(PT, T))[1]
+end
+
+# we could use tuple_tfunc instead of widenconst, but `o` is mutable, so that is unlikely to be beneficial
+
+add_tfunc(getfield, 2, 4, getfield_tfunc, 1)
+add_tfunc(setfield!, 3, 4, setfield!_tfunc, 3)
+
+add_tfunc(swapfield!, 3, 4, swapfield!_tfunc, 3)
+add_tfunc(modifyfield!, 4, 5, modifyfield!_tfunc, 3)
+add_tfunc(replacefield!, 4, 6, replacefield!_tfunc, 3)
 
 function fieldtype_nothrow(@nospecialize(s0), @nospecialize(name))
     s0 === Bottom && return true # unreachable
@@ -919,7 +1120,7 @@ function _fieldtype_nothrow(@nospecialize(s), exact::Bool, name::Const)
         return exact ? (a || b) : (a && b)
     end
     u isa DataType || return false
-    u.abstract && return false
+    isabstracttype(u) && return false
     if u.name === _NAMEDTUPLE_NAME && !isconcretetype(u)
         # TODO: better approximate inference
         return false
@@ -944,25 +1145,28 @@ function _fieldtype_nothrow(@nospecialize(s), exact::Bool, name::Const)
     return true
 end
 
+fieldtype_tfunc(s0, name, boundscheck) = (@nospecialize; fieldtype_tfunc(s0, name))
 function fieldtype_tfunc(@nospecialize(s0), @nospecialize(name))
     if s0 === Bottom
         return Bottom
     end
     if s0 === Any || s0 === Type || DataType ⊑ s0 || UnionAll ⊑ s0
-        return Type
+        # For a generic DataType, one of the fields could still be a TypeVar
+        # which is not a Type. Tuple{...} can also contain Symbols etc.
+        return Any
     end
-    # fieldtype only accepts Types, errors on `Module`
-    if isa(s0, Const) && (!(isa(s0.val, DataType) || isa(s0.val, UnionAll) || isa(s0.val, Union)) || s0.val === Module)
+    # fieldtype only accepts Types
+    if isa(s0, Const) && !(isa(s0.val, DataType) || isa(s0.val, UnionAll) || isa(s0.val, Union))
         return Bottom
     end
-    if (s0 isa Type && (s0 == Type{Module} || s0 == Type{Union{}})) || isa(s0, Conditional)
+    if (s0 isa Type && s0 == Type{Union{}}) || isa(s0, Conditional)
         return Bottom
     end
 
     su = unwrap_unionall(s0)
     if isa(su, Union)
-        return tmerge(fieldtype_tfunc(rewrap(su.a, s0), name),
-                      fieldtype_tfunc(rewrap(su.b, s0), name))
+        return tmerge(fieldtype_tfunc(rewrap_unionall(su.a, s0), name),
+                      fieldtype_tfunc(rewrap_unionall(su.b, s0), name))
     end
 
     s, exact = instanceof_tfunc(s0)
@@ -974,14 +1178,32 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
     exact = exact && !has_free_typevars(s)
     u = unwrap_unionall(s)
     if isa(u, Union)
-        return tmerge(_fieldtype_tfunc(rewrap(u.a, s), exact, name),
-                      _fieldtype_tfunc(rewrap(u.b, s), exact, name))
+        ta0 = _fieldtype_tfunc(rewrap_unionall(u.a, s), exact, name)
+        tb0 = _fieldtype_tfunc(rewrap_unionall(u.b, s), exact, name)
+        ta0 ⊑ tb0 && return tb0
+        tb0 ⊑ ta0 && return ta0
+        ta, exacta, _, istypea = instanceof_tfunc(ta0)
+        tb, exactb, _, istypeb = instanceof_tfunc(tb0)
+        if exact && exacta && exactb
+            return Const(Union{ta, tb})
+        end
+        if istypea && istypeb
+            return Type{<:Union{ta, tb}}
+        end
+        return Any
+    end
+    u isa DataType || return Any
+    if isabstracttype(u)
+        # Abstract types have no fields
+        exact && return Bottom
+        # Type{...} without free typevars has no subtypes, so it is actually
+        # exact, even if `exact` is false.
+        isType(u) && !has_free_typevars(u.parameters[1]) && return Bottom
+        return Any
     end
-    u isa DataType || return Type
-    u.abstract && return Type
     if u.name === _NAMEDTUPLE_NAME && !isconcretetype(u)
         # TODO: better approximate inference
-        return Type
+        return Union{Type, TypeVar}
     end
     ftypes = datatype_fieldtypes(u)
     if isempty(ftypes)
@@ -1004,8 +1226,15 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
                 else
                     ft1 = Type{ft1}
                 end
+            elseif ft1 isa Type || ft1 isa TypeVar
+                if ft1 === Any && u.name === Tuple.name
+                    # Tuple{:x} is possible in this case
+                    ft1 = Any
+                else
+                    ft1 = Type{ft} where ft<:ft1
+                end
             else
-                ft1 = Type{ft} where ft<:ft1
+                ft1 = Const(ft1)
             end
             t = tmerge(t, ft1)
             t === Any && break
@@ -1028,6 +1257,9 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
     else
         ft = ftypes[fld]
     end
+    if !isa(ft, Type) && !isa(ft, TypeVar)
+        return Const(ft)
+    end
 
     exactft = exact || (!has_free_typevars(ft) && u.name !== Tuple.name)
     ft = rewrap_unionall(ft, s)
@@ -1037,10 +1269,30 @@ function _fieldtype_tfunc(@nospecialize(s), exact::Bool, @nospecialize(name))
         end
         return Type{ft}
     end
+    if u.name === Tuple.name && ft === Any
+        # Tuple{:x} is possible
+        return Any
+    end
     return Type{<:ft}
 end
 add_tfunc(fieldtype, 2, 3, fieldtype_tfunc, 0)
 
+# Like `valid_tparam`, but in the type domain.
+function valid_tparam_type(T::DataType)
+    T === Symbol && return true
+    isbitstype(T) && return true
+    if T <: Tuple
+        isconcretetype(T) || return false
+        for P in T.parameters
+            (P === Symbol || isbitstype(P)) || return false
+        end
+        return true
+    end
+    return false
+end
+valid_tparam_type(U::Union) = valid_tparam_type(U.a) && valid_tparam_type(U.b)
+valid_tparam_type(U::UnionAll) = valid_tparam_type(unwrap_unionall(U))
+
 function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
     rt === Type && return false
     length(argtypes) >= 1 || return false
@@ -1052,7 +1304,7 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
     else
         return false
     end
-    # We know the apply_type is well formed. Oherwise our rt would have been
+    # We know the apply_type is well formed. Otherwise our rt would have been
     # Bottom (or Type).
     (headtype === Union) && return true
     isa(rt, Const) && return true
@@ -1060,14 +1312,14 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
     for i = 2:length(argtypes)
         isa(u, UnionAll) || return false
         ai = widenconditional(argtypes[i])
-        if ai ⊑ TypeVar
+        if ai ⊑ TypeVar || ai === DataType
             # We don't know anything about the bounds of this typevar, but as
             # long as the UnionAll is not constrained, that's ok.
             if !(u.var.lb === Union{} && u.var.ub === Any)
                 return false
             end
-        elseif isa(ai, Const) && isa(ai.val, Type)
-            ai = ai.val
+        elseif (isa(ai, Const) && isa(ai.val, Type)) || isconstType(ai)
+            ai = isa(ai, Const) ? ai.val : (ai::DataType).parameters[1]
             if has_free_typevars(u.var.lb) || has_free_typevars(u.var.ub)
                 return false
             end
@@ -1075,7 +1327,23 @@ function apply_type_nothrow(argtypes::Array{Any, 1}, @nospecialize(rt))
                 return false
             end
         else
-            return false
+            T, exact, _, istype = instanceof_tfunc(ai)
+            if T === Bottom
+                if !(u.var.lb === Union{} && u.var.ub === Any)
+                    return false
+                end
+                if !valid_tparam_type(widenconst(ai))
+                    return false
+                end
+            else
+                istype || return false
+                if !(T <: u.var.ub)
+                    return false
+                end
+                if exact ? !(u.var.lb <: T) : !(u.var.lb === Bottom)
+                    return false
+                end
+            end
         end
         u = u.body
     end
@@ -1092,10 +1360,10 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
     elseif isconstType(headtypetype)
         headtype = headtypetype.parameters[1]
     else
-        return Type
+        return Any
     end
     if !isempty(args) && isvarargtype(args[end])
-        return Type
+        return isvarargtype(headtype) ? TypeofVararg : Type
     end
     largs = length(args)
     if headtype === Union
@@ -1113,7 +1381,7 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
                 end
             else
                 if !isType(ai)
-                    if !isa(ai, Type) || typeintersect(ai, Type) !== Bottom || typeintersect(ai, TypeVar) !== Bottom
+                    if !isa(ai, Type) || hasintersect(ai, Type) || hasintersect(ai, TypeVar)
                         hasnonType = true
                     else
                         return Bottom
@@ -1121,7 +1389,11 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
                 end
             end
         end
-        largs == 1 && return isa(args[1], Type) ? typeintersect(args[1], Type) : Type
+        if largs == 1 # Union{T} --> T
+            u1 = typeintersect(widenconst(args[1]), Type)
+            valid_as_lattice(u1) || return Bottom
+            return u1
+        end
         hasnonType && return Type
         ty = Union{}
         allconst = true
@@ -1137,16 +1409,15 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
         end
         return allconst ? Const(ty) : Type{ty}
     end
-    istuple = (headtype == Tuple)
-    if !istuple && !isa(headtype, UnionAll)
+    istuple = isa(headtype, Type) && (headtype == Tuple)
+    if !istuple && !isa(headtype, UnionAll) && !isvarargtype(headtype)
         return Union{}
     end
     uw = unwrap_unionall(headtype)
-    isnamedtuple = isa(uw, DataType) && uw.name === _NAMEDTUPLE_NAME
     uncertain = false
     canconst = true
     tparams = Any[]
-    outervars = Any[]
+    outervars = TypeVar[]
     varnamectr = 1
     ua = headtype
     for i = 1:largs
@@ -1155,7 +1426,8 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
             aip1 = ai.parameters[1]
             canconst &= !has_free_typevars(aip1)
             push!(tparams, aip1)
-        elseif isa(ai, Const) && (isa(ai.val, Type) || isa(ai.val, TypeVar) || valid_tparam(ai.val))
+        elseif isa(ai, Const) && (isa(ai.val, Type) || isa(ai.val, TypeVar) ||
+                                  valid_tparam(ai.val) || (istuple && isvarargtype(ai.val)))
             push!(tparams, ai.val)
         elseif isa(ai, PartialTypeVar)
             canconst = false
@@ -1192,7 +1464,7 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
             #    end
             else
                 # Is this the second parameter to a NamedTuple?
-                if isnamedtuple && isa(ua, UnionAll) && uw.parameters[2] === ua.var
+                if isa(uw, DataType) && uw.name === _NAMEDTUPLE_NAME && isa(ua, UnionAll) && uw.parameters[2] === ua.var
                     # If the names are known, keep the upper bound, but otherwise widen to Tuple.
                     # This is a widening heuristic to avoid keeping type information
                     # that's unlikely to be useful.
@@ -1221,11 +1493,11 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
     catch ex
         # type instantiation might fail if one of the type parameters
         # doesn't match, which could happen if a type estimate is too coarse
-        return Type{<:headtype}
+        return isvarargtype(headtype) ? TypeofVararg : Type{<:headtype}
     end
     !uncertain && canconst && return Const(appl)
-    if isvarargtype(headtype)
-        return Type
+    if isvarargtype(appl)
+        return TypeofVararg
     end
     if istuple
         return Type{<:appl}
@@ -1238,58 +1510,58 @@ function apply_type_tfunc(@nospecialize(headtypetype), @nospecialize args...)
 end
 add_tfunc(apply_type, 1, INT_INF, apply_type_tfunc, 10)
 
-function invoke_tfunc(interp::AbstractInterpreter, @nospecialize(ft), @nospecialize(types), @nospecialize(argtype), sv::InferenceState)
-    argtype = typeintersect(types, argtype)
-    argtype === Bottom && return Bottom
-    argtype isa DataType || return Any # other cases are not implemented below
-    isdispatchelem(ft) || return Any # check that we might not have a subtype of `ft` at runtime, before doing supertype lookup below
-    types = rewrap_unionall(Tuple{ft, unwrap_unionall(types).parameters...}, types)
-    argtype = Tuple{ft, argtype.parameters...}
-    result = findsup(types, method_table(interp))
-    if result === nothing
-        return Any
-    end
-    method, valid_worlds = result
-    update_valid_age!(sv, valid_worlds)
-    (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), argtype, method.sig)::SimpleVector
-    rt, edge = typeinf_edge(interp, method, ti, env, sv)
-    edge !== nothing && add_backedge!(edge::MethodInstance, sv)
-    return rt
+function has_struct_const_info(x)
+    isa(x, PartialTypeVar) && return true
+    isa(x, Conditional) && return true
+    return has_nontrivial_const_info(x)
 end
 
 # convert the dispatch tuple type argtype to the real (concrete) type of
 # the tuple of those values
-function tuple_tfunc(atypes::Vector{Any})
-    atypes = anymap(widenconditional, atypes)
+function tuple_tfunc(argtypes::Vector{Any})
+    argtypes = anymap(widenconditional, argtypes)
     all_are_const = true
-    for i in 1:length(atypes)
-        if !isa(atypes[i], Const)
+    for i in 1:length(argtypes)
+        if !isa(argtypes[i], Const)
             all_are_const = false
             break
         end
     end
     if all_are_const
-        return Const(ntuple(i -> atypes[i].val, length(atypes)))
+        return Const(ntuple(i -> argtypes[i].val, length(argtypes)))
     end
-    params = Vector{Any}(undef, length(atypes))
+    params = Vector{Any}(undef, length(argtypes))
     anyinfo = false
-    for i in 1:length(atypes)
-        x = atypes[i]
-        # TODO ignore singleton Const (don't forget to update cache logic if you implement this)
-        if !anyinfo
-            anyinfo = !isa(x, Type) || isType(x)
+    for i in 1:length(argtypes)
+        x = argtypes[i]
+        if has_struct_const_info(x)
+            anyinfo = true
+        else
+            if !isvarargtype(x)
+                x = widenconst(x)
+            end
+            argtypes[i] = x
         end
         if isa(x, Const)
             params[i] = typeof(x.val)
         else
-            x = widenconst(x)
+            x = isvarargtype(x) ? x : widenconst(x)
+            # since there don't exist any values whose runtime type are `Tuple{Type{...}}`,
+            # here we should turn such `Type{...}`-parameters to valid parameters, e.g.
+            # (::Type{Int},) -> Tuple{DataType} (or PartialStruct for more accuracy)
+            # (::Union{Type{Int32},Type{Int64}}) -> Tuple{Type}
             if isType(x)
+                anyinfo = true
                 xparam = x.parameters[1]
                 if hasuniquerep(xparam) || xparam === Bottom
                     params[i] = typeof(xparam)
                 else
                     params[i] = Type
                 end
+            elseif iskindtype(x)
+                params[i] = x
+            elseif !isvarargtype(x) && hasintersect(x, Type)
+                params[i] = Union{x, Type}
             else
                 params[i] = x
             end
@@ -1298,72 +1570,136 @@ function tuple_tfunc(atypes::Vector{Any})
     typ = Tuple{params...}
     # replace a singleton type with its equivalent Const object
     isdefined(typ, :instance) && return Const(typ.instance)
-    return anyinfo ? PartialStruct(typ, atypes) : typ
-end
-
-function arrayref_tfunc(@nospecialize(boundscheck), @nospecialize(a), @nospecialize i...)
-    a = widenconst(a)
-    if a <: Array
-        if isa(a, DataType) && (isa(a.parameters[1], Type) || isa(a.parameters[1], TypeVar))
-            # TODO: the TypeVar case should not be needed here
-            a = a.parameters[1]
-            return isa(a, TypeVar) ? a.ub : a
-        elseif isa(a, UnionAll) && !has_free_typevars(a)
-            unw = unwrap_unionall(a)
-            if isa(unw, DataType)
-                return rewrap_unionall(unw.parameters[1], a)
-            end
+    return anyinfo ? PartialStruct(typ, argtypes) : typ
+end
+
+arrayref_tfunc(@nospecialize(boundscheck), @nospecialize(ary), @nospecialize idxs...) =
+    _arrayref_tfunc(boundscheck, ary, idxs)
+function _arrayref_tfunc(@nospecialize(boundscheck), @nospecialize(ary),
+    @nospecialize idxs::Tuple)
+    isempty(idxs) && return Bottom
+    array_builtin_common_errorcheck(boundscheck, ary, idxs) || return Bottom
+    return array_elmtype(ary)
+end
+add_tfunc(arrayref, 3, INT_INF, arrayref_tfunc, 20)
+add_tfunc(const_arrayref, 3, INT_INF, arrayref_tfunc, 20)
+
+function arrayset_tfunc(@nospecialize(boundscheck), @nospecialize(ary), @nospecialize(item),
+    @nospecialize idxs...)
+    hasintersect(widenconst(item), _arrayref_tfunc(boundscheck, ary, idxs)) || return Bottom
+    return ary
+end
+add_tfunc(arrayset, 4, INT_INF, arrayset_tfunc, 20)
+
+function array_builtin_common_errorcheck(@nospecialize(boundscheck), @nospecialize(ary),
+    @nospecialize idxs::Tuple)
+    hasintersect(widenconst(boundscheck), Bool) || return false
+    hasintersect(widenconst(ary), Array) || return false
+    for i = 1:length(idxs)
+        idx = getfield(idxs, i)
+        idx = isvarargtype(idx) ? unwrapva(idx) : widenconst(idx)
+        hasintersect(idx, Int) || return false
+    end
+    return true
+end
+
+function array_elmtype(@nospecialize ary)
+    a = widenconst(ary)
+    if !has_free_typevars(a) && a <: Array
+        a0 = a
+        if isa(a, UnionAll)
+            a = unwrap_unionall(a0)
+        end
+        if isa(a, DataType)
+            T = a.parameters[1]
+            valid_as_lattice(T) || return Bottom
+            return rewrap_unionall(T, a0)
         end
     end
     return Any
 end
-add_tfunc(arrayref, 3, INT_INF, arrayref_tfunc, 20)
-add_tfunc(const_arrayref, 3, INT_INF, arrayref_tfunc, 20)
-add_tfunc(arrayset, 4, INT_INF, (@nospecialize(boundscheck), @nospecialize(a), @nospecialize(v), @nospecialize i...)->a, 20)
 
-function array_type_undefable(@nospecialize(a))
-    if isa(a, Union)
-        return array_type_undefable(a.a) || array_type_undefable(a.b)
-    elseif isa(a, UnionAll)
+function _opaque_closure_tfunc(@nospecialize(arg), @nospecialize(lb), @nospecialize(ub),
+        @nospecialize(source), env::Vector{Any}, linfo::MethodInstance)
+
+    argt, argt_exact = instanceof_tfunc(arg)
+    lbt, lb_exact = instanceof_tfunc(lb)
+    if !lb_exact
+        lbt = Union{}
+    end
+
+    ubt, ub_exact = instanceof_tfunc(ub)
+
+    t = (argt_exact ? Core.OpaqueClosure{argt, T} : Core.OpaqueClosure{<:argt, T}) where T
+    t = lbt == ubt ? t{ubt} : (t{T} where lbt <: T <: ubt)
+
+    (isa(source, Const) && isa(source.val, Method)) || return t
+
+    return PartialOpaque(t, tuple_tfunc(env), linfo, source.val)
+end
+
+# whether getindex for the elements can potentially throw UndefRef
+function array_type_undefable(@nospecialize(arytype))
+    if isa(arytype, Union)
+        return array_type_undefable(arytype.a) || array_type_undefable(arytype.b)
+    elseif isa(arytype, UnionAll)
         return true
     else
-        etype = (a::DataType).parameters[1]
-        return !(etype isa Type && (isbitstype(etype) || isbitsunion(etype)))
+        elmtype = (arytype::DataType).parameters[1]
+        return !(elmtype isa Type && (isbitstype(elmtype) || isbitsunion(elmtype)))
     end
 end
 
-function array_builtin_common_nothrow(argtypes::Array{Any,1}, first_idx_idx::Int)
+function array_builtin_common_nothrow(argtypes::Vector{Any}, first_idx_idx::Int)
     length(argtypes) >= 4 || return false
-    atype = argtypes[2]
-    (argtypes[1] ⊑ Bool && atype ⊑ Array) || return false
-    for i = first_idx_idx:length(argtypes)
-        argtypes[i] ⊑ Int || return false
-    end
+    boundscheck = argtypes[1]
+    arytype = argtypes[2]
+    array_builtin_common_typecheck(boundscheck, arytype, argtypes, first_idx_idx) || return false
     # If we could potentially throw undef ref errors, bail out now.
-    atype = widenconst(atype)
-    array_type_undefable(atype) && return false
+    arytype = widenconst(arytype)
+    array_type_undefable(arytype) && return false
     # If we have @inbounds (first argument is false), we're allowed to assume
     # we don't throw bounds errors.
-    (isa(argtypes[1], Const) && !argtypes[1].val) && return true
+    if isa(boundscheck, Const)
+        !(boundscheck.val::Bool) && return true
+    end
     # Else we can't really say anything here
     # TODO: In the future we may be able to track the shapes of arrays though
     # inference.
     return false
 end
 
+function array_builtin_common_typecheck(
+    @nospecialize(boundscheck), @nospecialize(arytype),
+    argtypes::Vector{Any}, first_idx_idx::Int)
+    (boundscheck ⊑ Bool && arytype ⊑ Array) || return false
+    for i = first_idx_idx:length(argtypes)
+        argtypes[i] ⊑ Int || return false
+    end
+    return true
+end
+
+function arrayset_typecheck(@nospecialize(arytype), @nospecialize(elmtype))
+    # Check that we can determine the element type
+    arytype = widenconst(arytype)
+    isa(arytype, DataType) || return false
+    elmtype_expected = arytype.parameters[1]
+    isa(elmtype_expected, Type) || return false
+    # Check that the element type is compatible with the element we're assigning
+    elmtype ⊑ elmtype_expected || return false
+    return true
+end
+
 # Query whether the given builtin is guaranteed not to throw given the argtypes
 function _builtin_nothrow(@nospecialize(f), argtypes::Array{Any,1}, @nospecialize(rt))
     if f === arrayset
         array_builtin_common_nothrow(argtypes, 4) || return true
         # Additionally check element type compatibility
-        a = widenconst(argtypes[2])
-        # Check that we can determine the element type
-        (isa(a, DataType) && isa(a.parameters[1], Type)) || return false
-        # Check that the element type is compatible with the element we're assigning
-        (argtypes[3] ⊑ a.parameters[1]::Type) || return false
-        return true
+        return arrayset_typecheck(argtypes[2], argtypes[3])
     elseif f === arrayref || f === const_arrayref
         return array_builtin_common_nothrow(argtypes, 3)
+    elseif f === arraysize
+        return arraysize_nothrow(argtypes)
     elseif f === Core._expr
         length(argtypes) >= 1 || return false
         return argtypes[1] ⊑ Symbol
@@ -1399,10 +1735,107 @@ function _builtin_nothrow(@nospecialize(f), argtypes::Array{Any,1}, @nospecializ
     elseif f === Core.ifelse
         length(argtypes) == 3 || return false
         return argtypes[1] ⊑ Bool
+    elseif f === typeassert
+        length(argtypes) == 2 || return false
+        a3 = argtypes[2]
+        if (isType(a3) && !has_free_typevars(a3) && argtypes[1] ⊑ a3.parameters[1]) ||
+            (isa(a3, Const) && isa(a3.val, Type) && argtypes[1] ⊑ a3.val)
+            return true
+        end
+        return false
+    elseif f === getglobal
+        return getglobal_nothrow(argtypes)
+    elseif f === Core.get_binding_type
+        length(argtypes) == 2 || return false
+        return argtypes[1] ⊑ Module && argtypes[2] ⊑ Symbol
+    elseif f === donotdelete
+        return true
     end
     return false
 end
 
+# known to be always effect-free (in particular nothrow)
+const _PURE_BUILTINS = Any[tuple, svec, ===, typeof, nfields]
+
+# known to be effect-free (but not necessarily nothrow)
+const _EFFECT_FREE_BUILTINS = [
+    fieldtype, apply_type, isa, UnionAll,
+    getfield, arrayref, const_arrayref, isdefined, Core.sizeof,
+    Core.kwfunc, Core.ifelse, Core._typevar, (<:),
+    typeassert, throw, arraysize, getglobal,
+]
+
+const _CONSISTENT_BUILTINS = Any[
+    tuple, # tuple is immutable, thus tuples of egal arguments are egal
+    ===,
+    typeof,
+    nfields,
+    fieldtype,
+    apply_type,
+    isa,
+    UnionAll,
+    Core.sizeof,
+    Core.kwfunc,
+    Core.ifelse,
+    (<:),
+    typeassert,
+    throw
+]
+
+const _SPECIAL_BUILTINS = Any[
+    Core._apply_iterate
+]
+
+function builtin_effects(f::Builtin, argtypes::Vector{Any}, rt)
+    if isa(f, IntrinsicFunction)
+        return intrinsic_effects(f, argtypes)
+    end
+
+    @assert !contains_is(_SPECIAL_BUILTINS, f)
+
+    nothrow = false
+    if (f === Core.getfield || f === Core.isdefined) && length(argtypes) >= 3
+        # consistent if the argtype is immutable
+        if isvarargtype(argtypes[2])
+            return Effects(; effect_free=ALWAYS_TRUE, terminates=ALWAYS_TRUE, nonoverlayed=true)
+        end
+        s = widenconst(argtypes[2])
+        if isType(s) || !isa(s, DataType) || isabstracttype(s)
+            return Effects(; effect_free=ALWAYS_TRUE, terminates=ALWAYS_TRUE, nonoverlayed=true)
+        end
+        s = s::DataType
+        ipo_consistent = !ismutabletype(s)
+        nothrow = false
+        if f === Core.getfield && !isvarargtype(argtypes[end]) &&
+                getfield_boundscheck(argtypes[2:end]) !== true
+            # If we cannot independently prove inboundsness, taint consistency.
+            # The inbounds-ness assertion requires dynamic reachability, while
+            # :consistent needs to be true for all input values.
+            # N.B. We do not taint for `--check-bounds=no` here -that happens in
+            # InferenceState.
+            nothrow = getfield_nothrow(argtypes[2], argtypes[3], true)
+            ipo_consistent &= nothrow
+        else
+            nothrow = isvarargtype(argtypes[end]) ? false :
+                builtin_nothrow(f, argtypes[2:end], rt)
+        end
+        effect_free = true
+    elseif f === getglobal && length(argtypes) >= 3
+        nothrow = getglobal_nothrow(argtypes[2:end])
+        ipo_consistent = nothrow && isconst((argtypes[2]::Const).val, (argtypes[3]::Const).val)
+        effect_free = true
+    else
+        ipo_consistent = contains_is(_CONSISTENT_BUILTINS, f)
+        effect_free = contains_is(_EFFECT_FREE_BUILTINS, f) || contains_is(_PURE_BUILTINS, f)
+        nothrow = isvarargtype(argtypes[end]) ? false : builtin_nothrow(f, argtypes[2:end], rt)
+    end
+
+    return Effects(EFFECTS_TOTAL;
+        consistent = ipo_consistent ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+        effect_free = effect_free ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+        nothrow = nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN)
+end
+
 function builtin_nothrow(@nospecialize(f), argtypes::Array{Any, 1}, @nospecialize(rt))
     rt === Bottom && return false
     contains_is(_PURE_BUILTINS, f) && return true
@@ -1413,30 +1846,10 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
                            sv::Union{InferenceState,Nothing})
     if f === tuple
         return tuple_tfunc(argtypes)
-    elseif f === invoke
-        if length(argtypes) > 1 && sv !== nothing && (isa(argtypes[1], Const) || isa(argtypes[1], Type))
-            if isa(argtypes[1], Const)
-                ft = Core.Typeof(argtypes[1].val)
-            else
-                ft = argtypes[1]
-            end
-            sig = argtypes[2]
-            if isa(sig, Const)
-                sigty = sig.val
-            elseif isType(sig)
-                sigty = sig.parameters[1]
-            else
-                sigty = nothing
-            end
-            if isa(sigty, Type) && !has_free_typevars(sigty) && sigty <: Tuple
-                return invoke_tfunc(interp, ft, sigty, argtypes_to_type(argtypes[3:end]), sv)
-            end
-        end
-        return Any
     end
     if isa(f, IntrinsicFunction)
         if is_pure_intrinsic_infer(f) && _all(@nospecialize(a) -> isa(a, Const), argtypes)
-            argvals = anymap(a::Const -> a.val, argtypes)
+            argvals = anymap(@nospecialize(a) -> (a::Const).val, argtypes)
             try
                 return Const(f(argvals...))
             catch
@@ -1465,7 +1878,7 @@ function builtin_tfunction(interp::AbstractInterpreter, @nospecialize(f), argtyp
         if length(argtypes) - 1 == tf[2]
             argtypes = argtypes[1:end-1]
         else
-            vatype = argtypes[end]
+            vatype = argtypes[end]::TypeofVararg
             argtypes = argtypes[1:end-1]
             while length(argtypes) < tf[1]
                 push!(argtypes, unwrapva(vatype))
@@ -1483,6 +1896,10 @@ end
 
 # Query whether the given intrinsic is nothrow
 
+_iszero(x) = x === Intrinsics.xor_int(x, x)
+_isneg1(x) = _iszero(Intrinsics.not_int(x))
+_istypemin(x) = !_iszero(x) && Intrinsics.neg_int(x) === x
+
 function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
     # First check that we have the correct number of arguments
     iidx = Int(reinterpret(Int32, f::IntrinsicFunction)) + 1
@@ -1509,11 +1926,10 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
             return false
         end
         den_val = argtypes[2].val
-        den_val !== zero(typeof(den_val)) || return false
+        _iszero(den_val) && return false
         f !== Intrinsics.checked_sdiv_int && return true
         # Nothrow as long as we additionally don't do typemin(T)/-1
-        return den_val !== -1 || (isa(argtypes[1], Const) &&
-            argtypes[1].val !== typemin(typeof(den_val)))
+        return !_isneg1(den_val) || (isa(argtypes[1], Const) && !_istypemin(argtypes[1].val))
     end
     if f === Intrinsics.pointerref
         # Nothrow as long as the types are ok. N.B.: dereferencability is not
@@ -1531,18 +1947,22 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
         return argtypes[1] ⊑ Array
     end
     if f === Intrinsics.bitcast
-        ty = instanceof_tfunc(argtypes[1])[1]
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
         xty = widenconst(argtypes[2])
-        return isprimitivetype(ty) && isprimitivetype(xty) && ty.size === xty.size
+        return isconcrete && isprimitivetype(ty) && isprimitivetype(xty) && Core.sizeof(ty) === Core.sizeof(xty)
     end
     if f in (Intrinsics.sext_int, Intrinsics.zext_int, Intrinsics.trunc_int,
              Intrinsics.fptoui, Intrinsics.fptosi, Intrinsics.uitofp,
              Intrinsics.sitofp, Intrinsics.fptrunc, Intrinsics.fpext)
-        # If !isexact, `ty` may be Union{} at runtime even if we have
+        # If !isconcrete, `ty` may be Union{} at runtime even if we have
         # isprimitivetype(ty).
-        ty, isexact = instanceof_tfunc(argtypes[1])
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
         xty = widenconst(argtypes[2])
-        return isexact && isprimitivetype(ty) && isprimitivetype(xty)
+        return isconcrete && isprimitivetype(ty) && isprimitivetype(xty)
+    end
+    if f === Intrinsics.have_fma
+        ty, isexact, isconcrete = instanceof_tfunc(argtypes[1])
+        return isconcrete && isprimitivetype(ty)
     end
     # The remaining intrinsics are math/bits/comparison intrinsics. They work on all
     # primitive types of the same type.
@@ -1558,6 +1978,42 @@ function intrinsic_nothrow(f::IntrinsicFunction, argtypes::Array{Any, 1})
     return true
 end
 
+# whether `f` is pure for inference
+function is_pure_intrinsic_infer(f::IntrinsicFunction)
+    return !(f === Intrinsics.pointerref || # this one is volatile
+             f === Intrinsics.pointerset || # this one is never effect-free
+             f === Intrinsics.llvmcall ||   # this one is never effect-free
+             f === Intrinsics.arraylen ||   # this one is volatile
+             f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
+             f === Intrinsics.have_fma ||  # this one depends on the runtime environment
+             f === Intrinsics.cglobal)  # cglobal lookup answer changes at runtime
+end
+
+# whether `f` is effect free if nothrow
+intrinsic_effect_free_if_nothrow(f) = f === Intrinsics.pointerref ||
+    f === Intrinsics.have_fma || is_pure_intrinsic_infer(f)
+
+function intrinsic_effects(f::IntrinsicFunction, argtypes::Vector{Any})
+    if f === Intrinsics.llvmcall
+        # llvmcall can do arbitrary things
+        return Effects()
+    end
+
+    ipo_consistent = !(
+        f === Intrinsics.pointerref ||      # this one is volatile
+        f === Intrinsics.arraylen ||        # this one is volatile
+        f === Intrinsics.sqrt_llvm_fast ||  # this one may differ at runtime (by a few ulps)
+        f === Intrinsics.have_fma ||        # this one depends on the runtime environment
+        f === Intrinsics.cglobal)           # cglobal lookup answer changes at runtime
+    effect_free = !(f === Intrinsics.pointerset)
+    nothrow = !isvarargtype(argtypes[end]) && intrinsic_nothrow(f, argtypes[2:end])
+
+    return Effects(EFFECTS_TOTAL;
+        consistent = ipo_consistent ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+        effect_free = effect_free ? ALWAYS_TRUE : TRISTATE_UNKNOWN,
+        nothrow = nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN)
+end
+
 # TODO: this function is a very buggy and poor model of the return_type function
 # since abstract_call_gf_by_type is a very inaccurate model of _method and of typeinf_type,
 # while this assumes that it is an absolutely precise and accurate and exact model of both
@@ -1568,37 +2024,49 @@ function return_type_tfunc(interp::AbstractInterpreter, argtypes::Vector{Any}, s
             aft = argtypes[2]
             if isa(aft, Const) || (isType(aft) && !has_free_typevars(aft)) ||
                    (isconcretetype(aft) && !(aft <: Builtin))
-                af_argtype = isa(tt, Const) ? tt.val : tt.parameters[1]
+                af_argtype = isa(tt, Const) ? tt.val : (tt::DataType).parameters[1]
                 if isa(af_argtype, DataType) && af_argtype <: Tuple
                     argtypes_vec = Any[aft, af_argtype.parameters...]
                     if contains_is(argtypes_vec, Union{})
-                        return Const(Union{})
+                        return CallMeta(Const(Union{}), false)
                     end
-                    rt = abstract_call(interp, nothing, argtypes_vec, sv, -1).rt
+                    # Run the abstract_call without restricting abstract call
+                    # sites. Otherwise, our behavior model of abstract_call
+                    # below will be wrong.
+                    old_restrict = sv.restrict_abstract_call_sites
+                    sv.restrict_abstract_call_sites = false
+                    call = abstract_call(interp, ArgInfo(nothing, argtypes_vec), sv, -1)
+                    sv.restrict_abstract_call_sites = old_restrict
+                    info = verbose_stmt_info(interp) ? ReturnTypeCallInfo(call.info) : false
+                    rt = widenconditional(call.rt)
                     if isa(rt, Const)
                         # output was computed to be constant
-                        return Const(typeof(rt.val))
+                        return CallMeta(Const(typeof(rt.val)), info)
+                    end
+                    rt = widenconst(rt)
+                    if rt === Bottom || (isconcretetype(rt) && !iskindtype(rt))
+                        # output cannot be improved so it is known for certain
+                        return CallMeta(Const(rt), info)
+                    elseif !isempty(sv.pclimitations)
+                        # conservatively express uncertainty of this result
+                        # in two ways: both as being a subtype of this, and
+                        # because of LimitedAccuracy causes
+                        return CallMeta(Type{<:rt}, info)
+                    elseif (isa(tt, Const) || isconstType(tt)) &&
+                        (isa(aft, Const) || isconstType(aft))
+                        # input arguments were known for certain
+                        # XXX: this doesn't imply we know anything about rt
+                        return CallMeta(Const(rt), info)
+                    elseif isType(rt)
+                        return CallMeta(Type{rt}, info)
                     else
-                        rt = widenconst(rt)
-                        if hasuniquerep(rt) || rt === Bottom
-                            # output type was known for certain
-                            return Const(rt)
-                        elseif (isa(tt, Const) || isconstType(tt)) &&
-                            (isa(aft, Const) || isconstType(aft))
-                            # input arguments were known for certain
-                            # XXX: this doesn't imply we know anything about rt
-                            return Const(rt)
-                        elseif isType(rt)
-                            return Type{rt}
-                        else
-                            return Type{<:rt}
-                        end
+                        return CallMeta(Type{<:rt}, info)
                     end
                 end
             end
         end
     end
-    return nothing
+    return CallMeta(Type, false)
 end
 
 # N.B.: typename maps type equivalence classes to a single value
@@ -1609,4 +2077,66 @@ function typename_static(@nospecialize(t))
     return isType(t) ? _typename(t.parameters[1]) : Core.TypeName
 end
 
+function global_order_nothrow(@nospecialize(o), loading::Bool, storing::Bool)
+    o isa Const || return false
+    sym = o.val
+    if sym isa Symbol
+        order = get_atomic_order(sym, loading, storing)
+        return order !== MEMORY_ORDER_INVALID && order !== MEMORY_ORDER_NOTATOMIC
+    end
+    return false
+end
+function getglobal_nothrow(argtypes::Vector{Any})
+    2 ≤ length(argtypes) ≤ 3 || return false
+    if length(argtypes) == 3
+        global_order_nothrow(argtypes[3], true, false) || return false
+    end
+    M, s = argtypes
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            return isdefined(M, s)
+        end
+    end
+    return false
+end
+function getglobal_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(_=Symbol))
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            return abstract_eval_global(M, s)
+        end
+        return Bottom
+    elseif !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
+        return Bottom
+    end
+    return Any
+end
+function setglobal!_tfunc(@nospecialize(M), @nospecialize(s), @nospecialize(v),
+                          @nospecialize(_=Symbol))
+    if !(hasintersect(widenconst(M), Module) && hasintersect(widenconst(s), Symbol))
+        return Bottom
+    end
+    return v
+end
+add_tfunc(getglobal, 2, 3, getglobal_tfunc, 1)
+add_tfunc(setglobal!, 3, 4, setglobal!_tfunc, 3)
+
+function get_binding_type_effect_free(@nospecialize(M), @nospecialize(s))
+    if M isa Const && s isa Const
+        M, s = M.val, s.val
+        if M isa Module && s isa Symbol
+            return ccall(:jl_binding_type, Any, (Any, Any), M, s) !== nothing
+        end
+    end
+    return false
+end
+function get_binding_type_tfunc(@nospecialize(M), @nospecialize(s))
+    if get_binding_type_effect_free(M, s)
+        return Const(Core.get_binding_type((M::Const).val, (s::Const).val))
+    end
+    return Type
+end
+add_tfunc(Core.get_binding_type, 2, 2, get_binding_type_tfunc, 0)
+
 @specialize
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 2cd89d0442fdb9..fefa2669972faa 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -1,109 +1,324 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# build (and start inferring) the inference frame for the linfo
-function typeinf(interp::AbstractInterpreter, result::InferenceResult, cached::Bool)
-    frame = InferenceState(result, cached, interp)
+# Tracking of newly-inferred MethodInstances during precompilation
+const track_newly_inferred = RefValue{Bool}(false)
+const newly_inferred = MethodInstance[]
+
+# build (and start inferring) the inference frame for the top-level MethodInstance
+function typeinf(interp::AbstractInterpreter, result::InferenceResult, cache::Symbol)
+    frame = InferenceState(result, cache, interp)
     frame === nothing && return false
-    cached && lock_mi_inference(interp, result.linfo)
+    cache === :global && lock_mi_inference(interp, result.linfo)
     return typeinf(interp, frame)
 end
 
+"""
+The module `Core.Compiler.Timings` provides a simple implementation of nested timers that
+can be used to measure the exclusive time spent inferring each method instance that is
+recursively inferred during type inference.
+
+This is meant to be internal to the compiler, and makes some specific assumptions about
+being used for this purpose alone.
+"""
+module Timings
+
+using Core.Compiler: -, +, :, Vector, length, first, empty!, push!, pop!, @inline,
+    @inbounds, copy, backtrace
+
+# What we record for any given frame we infer during type inference.
+struct InferenceFrameInfo
+    mi::Core.MethodInstance
+    world::UInt64
+    sptypes::Vector{Any}
+    slottypes::Vector{Any}
+    nargs::Int
+end
+
+function _typeinf_identifier(frame::Core.Compiler.InferenceState)
+    mi_info = InferenceFrameInfo(
+        frame.linfo,
+        frame.world,
+        copy(frame.sptypes),
+        copy(frame.slottypes),
+        length(frame.result.argtypes),
+    )
+    return mi_info
+end
+
+"""
+    Core.Compiler.Timing(mi_info, start_time, ...)
+
+Internal type containing the timing result for running type inference on a single
+MethodInstance.
+"""
+struct Timing
+    mi_info::InferenceFrameInfo
+    start_time::UInt64
+    cur_start_time::UInt64
+    time::UInt64
+    children::Core.Array{Timing,1}
+    bt         # backtrace collected upon initial entry to typeinf
+end
+Timing(mi_info, start_time, cur_start_time, time, children) = Timing(mi_info, start_time, cur_start_time, time, children, nothing)
+Timing(mi_info, start_time) = Timing(mi_info, start_time, start_time, UInt64(0), Timing[])
+
+_time_ns() = ccall(:jl_hrtime, UInt64, ())  # Re-implemented here because Base not yet available.
+
+# We keep a stack of the Timings for each of the MethodInstances currently being timed.
+# Since type inference currently operates via a depth-first search (during abstract
+# evaluation), this vector operates like a call stack. The last node in _timings is the
+# node currently being inferred, and its parent is directly before it, etc.
+# Each Timing also contains its own vector for all of its children, so that the tree
+# call structure through type inference is recorded. (It's recorded as a tree, not a graph,
+# because we create a new node for duplicates.)
+const _timings = Timing[]
+# ROOT() is an empty function used as the top-level Timing node to measure all time spent
+# *not* in type inference during a given recording trace. It is used as a "dummy" node.
+function ROOT() end
+const ROOTmi = Core.Compiler.specialize_method(
+    first(Core.Compiler.methods(ROOT)), Tuple{typeof(ROOT)}, Core.svec())
+"""
+    Core.Compiler.reset_timings()
+
+Empty out the previously recorded type inference timings (`Core.Compiler._timings`), and
+start the ROOT() timer again. `ROOT()` measures all time spent _outside_ inference.
+"""
+function reset_timings()
+    empty!(_timings)
+    push!(_timings, Timing(
+        # The MethodInstance for ROOT(), and default empty values for other fields.
+        InferenceFrameInfo(ROOTmi, 0x0, Any[], Any[Core.Const(ROOT)], 1),
+        _time_ns()))
+    return nothing
+end
+reset_timings()
+
+# (This is split into a function so that it can be called both in this module, at the top
+# of `enter_new_timer()`, and once at the Very End of the operation, by whoever started
+# the operation and called `reset_timings()`.)
+# NOTE: the @inline annotations here are not to make it faster, but to reduce the gap between
+# timer manipulations and the tasks we're timing.
+@inline function close_current_timer()
+    stop_time = _time_ns()
+    parent_timer = _timings[end]
+    accum_time = stop_time - parent_timer.cur_start_time
+
+    # Add in accum_time ("modify" the immutable struct)
+    @inbounds begin
+        _timings[end] = Timing(
+            parent_timer.mi_info,
+            parent_timer.start_time,
+            parent_timer.cur_start_time,
+            parent_timer.time + accum_time,
+            parent_timer.children,
+            parent_timer.bt,
+        )
+    end
+    return nothing
+end
+
+@inline function enter_new_timer(frame)
+    # Very first thing, stop the active timer: get the current time and add in the
+    # time since it was last started to its aggregate exclusive time.
+    close_current_timer()
+
+    mi_info = _typeinf_identifier(frame)
+
+    # Start the new timer right before returning
+    push!(_timings, Timing(mi_info, UInt64(0)))
+    len = length(_timings)
+    new_timer = @inbounds _timings[len]
+    # Set the current time _after_ appending the node, to try to exclude the
+    # overhead from measurement.
+    start = _time_ns()
+
+    @inbounds begin
+        _timings[len] = Timing(
+            new_timer.mi_info,
+            start,
+            start,
+            new_timer.time,
+            new_timer.children,
+        )
+    end
+
+    return nothing
+end
+
+# _expected_frame_ is not needed within this function; it is used in the `@assert`, to
+# assert that indeed we are always returning to a parent after finishing all of its
+# children (that is, asserting that inference proceeds via depth-first-search).
+@inline function exit_current_timer(_expected_frame_)
+    # Finish the new timer
+    stop_time = _time_ns()
+
+    expected_mi_info = _typeinf_identifier(_expected_frame_)
+
+    # Grab the new timer again because it might have been modified in _timings
+    # (since it's an immutable struct)
+    # And remove it from the current timings stack
+    new_timer = pop!(_timings)
+    Core.Compiler.@assert new_timer.mi_info.mi === expected_mi_info.mi
+
+    # Prepare to unwind one level of the stack and record in the parent
+    parent_timer = _timings[end]
+
+    accum_time = stop_time - new_timer.cur_start_time
+    # Add in accum_time ("modify" the immutable struct)
+    new_timer = Timing(
+        new_timer.mi_info,
+        new_timer.start_time,
+        new_timer.cur_start_time,
+        new_timer.time + accum_time,
+        new_timer.children,
+        parent_timer.mi_info.mi === ROOTmi ? backtrace() : nothing,
+    )
+    # Record the final timing with the original parent timer
+    push!(parent_timer.children, new_timer)
+
+    # And finally restart the parent timer:
+    len = length(_timings)
+    @inbounds begin
+        _timings[len] = Timing(
+            parent_timer.mi_info,
+            parent_timer.start_time,
+            _time_ns(),
+            parent_timer.time,
+            parent_timer.children,
+            parent_timer.bt,
+        )
+    end
+
+    return nothing
+end
+
+end  # module Timings
+
+"""
+    Core.Compiler.__set_measure_typeinf(onoff::Bool)
+
+If set to `true`, record per-method-instance timings within type inference in the Compiler.
+"""
+__set_measure_typeinf(onoff::Bool) = __measure_typeinf__[] = onoff
+const __measure_typeinf__ = fill(false)
+
+# Wrapper around _typeinf that optionally records the exclusive time for each invocation.
 function typeinf(interp::AbstractInterpreter, frame::InferenceState)
+    if __measure_typeinf__[]
+        Timings.enter_new_timer(frame)
+        v = _typeinf(interp, frame)
+        Timings.exit_current_timer(frame)
+        return v
+    else
+        return _typeinf(interp, frame)
+    end
+end
+
+function finish!(interp::AbstractInterpreter, caller::InferenceResult)
+    # If we didn't transform the src for caching, we may have to transform
+    # it anyway for users like typeinf_ext. Do that here.
+    opt = caller.src
+    if opt isa OptimizationState # implies `may_optimize(interp) === true`
+        if opt.ir !== nothing
+            caller.src = ir_to_codeinf!(opt)
+        end
+    end
+    return caller.src
+end
+
+function _typeinf(interp::AbstractInterpreter, frame::InferenceState)
     typeinf_nocycle(interp, frame) || return false # frame is now part of a higher cycle
     # with no active ip's, frame is done
     frames = frame.callers_in_cycle
     isempty(frames) && push!(frames, frame)
+    valid_worlds = WorldRange()
     for caller in frames
         @assert !(caller.dont_work_on_me)
         caller.dont_work_on_me = true
+        # might might not fully intersect these earlier, so do that now
+        valid_worlds = intersect(caller.valid_worlds, valid_worlds)
     end
     for caller in frames
+        caller.valid_worlds = valid_worlds
         finish(caller, interp)
+        # finalize and record the linfo result
+        caller.inferred = true
     end
     # collect results for the new expanded frame
-    results = Tuple{InferenceResult, Bool}[ ( frames[i].result,
-        frames[i].cached || frames[i].parent !== nothing ) for i in 1:length(frames) ]
-    # empty!(frames)
-    valid_worlds = frame.valid_worlds
-    cached = frame.cached
-    if cached || frame.parent !== nothing
-        for (caller, doopt) in results
-            opt = caller.src
-            if opt isa OptimizationState
-                run_optimizer = doopt && may_optimize(interp)
-                if run_optimizer
-                    optimize(opt, OptimizationParams(interp), caller.result)
-                    finish(opt.src, interp)
-                    # finish updating the result struct
-                    validate_code_in_debug_mode(opt.linfo, opt.src, "optimized")
-                    if opt.const_api
-                        if caller.result isa Const
-                            caller.src = caller.result
-                        else
-                            @assert isconstType(caller.result)
-                            caller.src = Const(caller.result.parameters[1])
-                        end
-                    elseif opt.src.inferred
-                        caller.src = opt.src::CodeInfo # stash a copy of the code (for inlining)
-                    else
-                        caller.src = nothing
-                    end
-                end
-                # As a hack the et reuses frame_edges[1] to push any optimization
-                # edges into, so we don't need to handle them specially here
-                valid_worlds = intersect(valid_worlds, opt.inlining.et.valid_worlds[])
+    results = Tuple{InferenceResult, Vector{Any}, Bool}[
+            ( frames[i].result,
+              frames[i].stmt_edges[1]::Vector{Any},
+              frames[i].cached )
+        for i in 1:length(frames) ]
+    empty!(frames)
+    for (caller, _, _) in results
+        opt = caller.src
+        if opt isa OptimizationState # implies `may_optimize(interp) === true`
+            analyzed = optimize(interp, opt, OptimizationParams(interp), caller)
+            if isa(analyzed, ConstAPI)
+                # XXX: The work in ir_to_codeinf! is essentially wasted. The only reason
+                # we're doing it is so that code_llvm can return the code
+                # for the `return ...::Const` (which never runs anyway). We should do this
+                # as a post processing step instead.
+                ir_to_codeinf!(opt)
+                caller.src = analyzed
             end
+            caller.valid_worlds = (opt.inlining.et::EdgeTracker).valid_worlds[]
         end
     end
-    if last(valid_worlds) == get_world_counter()
-        valid_worlds = WorldRange(first(valid_worlds), typemax(UInt))
-    end
-    for caller in frames
-        caller.valid_worlds = valid_worlds
-        caller.src.min_world = first(valid_worlds)
-        caller.src.max_world = last(valid_worlds)
-        if cached
-            cache_result!(interp, caller.result, valid_worlds)
-        end
-        if last(valid_worlds) == typemax(UInt)
+    for (caller, edges, cached) in results
+        valid_worlds = caller.valid_worlds
+        if last(valid_worlds) >= get_world_counter()
             # if we aren't cached, we don't need this edge
             # but our caller might, so let's just make it anyways
-            for caller in frames
-                store_backedges(caller)
-            end
+            store_backedges(caller, edges)
         end
-        # finalize and record the linfo result
-        caller.inferred = true
+        if cached
+            cache_result!(interp, caller)
+        end
+        finish!(interp, caller)
     end
     return true
 end
 
-function CodeInstance(result::InferenceResult, @nospecialize(inferred_result::Any),
-                      valid_worlds::WorldRange)
+function CodeInstance(
+    result::InferenceResult, @nospecialize(inferred_result), valid_worlds::WorldRange)
     local const_flags::Int32
-    if inferred_result isa Const
+    result_type = result.result
+    @assert !(result_type isa LimitedAccuracy)
+    if inferred_result isa ConstAPI
         # use constant calling convention
-        rettype_const = (result.src::Const).val
+        rettype_const = inferred_result.val
         const_flags = 0x3
         inferred_result = nothing
     else
-        if isa(result.result, Const)
-            rettype_const = (result.result::Const).val
+        if isa(result_type, Const)
+            rettype_const = result_type.val
             const_flags = 0x2
-        elseif isconstType(result.result)
-            rettype_const = result.result.parameters[1]
+        elseif isa(result_type, PartialOpaque)
+            rettype_const = result_type
             const_flags = 0x2
-        elseif isa(result.result, PartialStruct)
-            rettype_const = (result.result::PartialStruct).fields
+        elseif isconstType(result_type)
+            rettype_const = result_type.parameters[1]
+            const_flags = 0x2
+        elseif isa(result_type, PartialStruct)
+            rettype_const = result_type.fields
+            const_flags = 0x2
+        elseif isa(result_type, InterConditional)
+            rettype_const = result_type
             const_flags = 0x2
         else
             rettype_const = nothing
             const_flags = 0x00
         end
     end
+    relocatability = isa(inferred_result, Vector{UInt8}) ? inferred_result[end] : UInt8(0)
     return CodeInstance(result.linfo,
-        widenconst(result.result), rettype_const, inferred_result,
-        const_flags, first(valid_worlds), last(valid_worlds))
+        widenconst(result_type), rettype_const, inferred_result,
+        const_flags, first(valid_worlds), last(valid_worlds),
+        # TODO: Actually do something with non-IPO effects
+	    encode_effects(result.ipo_effects), encode_effects(result.ipo_effects), result.argescapes,
+        relocatability)
 end
 
 # For the NativeInterpreter, we don't need to do an actual cache query to know
@@ -121,15 +336,17 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
     if toplevel
         return ci
     end
-    cache_the_tree = !may_discard_trees(interp) || (ci.inferred &&
-        (ci.inlineable ||
-        ccall(:jl_isa_compileable_sig, Int32, (Any, Any), linfo.specTypes, def) != 0))
+    if may_discard_trees(interp)
+        cache_the_tree = ci.inferred && (ci.inlineable || isa_compileable_sig(linfo.specTypes, def))
+    else
+        cache_the_tree = true
+    end
     if cache_the_tree
         if may_compress(interp)
             nslots = length(ci.slotflags)
-            resize!(ci.slottypes, nslots)
+            resize!(ci.slottypes::Vector{Any}, nslots)
             resize!(ci.slotnames, nslots)
-            return ccall(:jl_compress_ir, Any, (Any, Any), def, ci)
+            return ccall(:jl_compress_ir, Vector{UInt8}, (Any, Any), def, ci)
         else
             return ci
         end
@@ -139,81 +356,181 @@ function maybe_compress_codeinfo(interp::AbstractInterpreter, linfo::MethodInsta
 end
 
 function transform_result_for_cache(interp::AbstractInterpreter, linfo::MethodInstance,
-                                    @nospecialize(inferred_result))
-    local const_flags::Int32
+                                    valid_worlds::WorldRange, @nospecialize(inferred_result),
+                                    ipo_effects::Effects)
     # If we decided not to optimize, drop the OptimizationState now.
     # External interpreters can override as necessary to cache additional information
     if inferred_result isa OptimizationState
-        inferred_result = inferred_result.src
+        inferred_result = ir_to_codeinf!(inferred_result)
     end
     if inferred_result isa CodeInfo
+        inferred_result.min_world = first(valid_worlds)
+        inferred_result.max_world = last(valid_worlds)
         inferred_result = maybe_compress_codeinfo(interp, linfo, inferred_result)
     end
     # The global cache can only handle objects that codegen understands
-    if !isa(inferred_result, Union{CodeInfo, Vector{UInt8}, Const})
+    if !isa(inferred_result, Union{CodeInfo, Vector{UInt8}, ConstAPI})
         inferred_result = nothing
     end
     return inferred_result
 end
 
-function cache_result!(interp::AbstractInterpreter, result::InferenceResult, valid_worlds::WorldRange)
+function cache_result!(interp::AbstractInterpreter, result::InferenceResult)
+    valid_worlds = result.valid_worlds
+    if last(valid_worlds) == get_world_counter()
+        # if we've successfully recorded all of the backedges in the global reverse-cache,
+        # we can now widen our applicability in the global cache too
+        valid_worlds = WorldRange(first(valid_worlds), typemax(UInt))
+    end
     # check if the existing linfo metadata is also sufficient to describe the current inference result
     # to decide if it is worth caching this
-    already_inferred = already_inferred_quick_test(interp, result.linfo)
-    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), result.linfo)
+    linfo = result.linfo
+    already_inferred = already_inferred_quick_test(interp, linfo)
+    if !already_inferred && haskey(WorldView(code_cache(interp), valid_worlds), linfo)
         already_inferred = true
     end
 
     # TODO: also don't store inferred code if we've previously decided to interpret this function
     if !already_inferred
-        inferred_result = transform_result_for_cache(interp, result.linfo, result.src)
-        code_cache(interp)[result.linfo] = CodeInstance(result, inferred_result, valid_worlds)
+        inferred_result = transform_result_for_cache(interp, linfo, valid_worlds, result.src, result.ipo_effects)
+        code_cache(interp)[linfo] = CodeInstance(result, inferred_result, valid_worlds)
+        if track_newly_inferred[]
+            m = linfo.def
+            if isa(m, Method)
+                m.module != Core && push!(newly_inferred, linfo)
+            end
+        end
     end
-    unlock_mi_inference(interp, result.linfo)
+    unlock_mi_inference(interp, linfo)
     nothing
 end
 
+function cycle_fix_limited(@nospecialize(typ), sv::InferenceState)
+    if typ isa LimitedAccuracy
+        if sv.parent === nothing
+            # when part of a cycle, we might have unintentionally introduced a limit marker
+            @assert !isempty(sv.callers_in_cycle)
+            return typ.typ
+        end
+        causes = copy(typ.causes)
+        delete!(causes, sv)
+        for caller in sv.callers_in_cycle
+            delete!(causes, caller)
+        end
+        if isempty(causes)
+            return typ.typ
+        end
+        if length(causes) != length(typ.causes)
+            return LimitedAccuracy(typ.typ, causes)
+        end
+    end
+    return typ
+end
+
+function adjust_effects(sv::InferenceState)
+    ipo_effects = Effects(sv)
+
+    # Always throwing an error counts or never returning both count as consistent,
+    # but we don't currently model idempontency using dataflow, so we don't notice.
+    # Fix that up here to improve precision.
+    if !ipo_effects.inbounds_taints_consistency && sv.bestguess === Union{}
+        ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+    end
+
+    # override the analyzed effects using manually annotated effect settings
+    def = sv.linfo.def
+    if isa(def, Method)
+        override = decode_effects_override(def.purity)
+        if is_effect_overridden(override, :consistent)
+            ipo_effects = Effects(ipo_effects; consistent=ALWAYS_TRUE)
+        end
+        if is_effect_overridden(override, :effect_free)
+            ipo_effects = Effects(ipo_effects; effect_free=ALWAYS_TRUE)
+        end
+        if is_effect_overridden(override, :nothrow)
+            ipo_effects = Effects(ipo_effects; nothrow=ALWAYS_TRUE)
+        end
+        if is_effect_overridden(override, :terminates_globally)
+            ipo_effects = Effects(ipo_effects; terminates=ALWAYS_TRUE)
+        end
+    end
+
+    return ipo_effects
+end
+
 # inference completed on `me`
 # update the MethodInstance
 function finish(me::InferenceState, interp::AbstractInterpreter)
     # prepare to run optimization passes on fulltree
-    if me.limited && me.cached && me.parent !== nothing
-        # a top parent will be cached still, but not this intermediate work
+    s_edges = me.stmt_edges[1]
+    if s_edges === nothing
+        s_edges = me.stmt_edges[1] = []
+    end
+    for edges in me.stmt_edges
+        edges === nothing && continue
+        edges === s_edges && continue
+        append!(s_edges, edges)
+        empty!(edges)
+    end
+    if me.src.edges !== nothing
+        append!(s_edges, me.src.edges::Vector)
+        me.src.edges = nothing
+    end
+    # inspect whether our inference had a limited result accuracy,
+    # else it may be suitable to cache
+    me.bestguess = cycle_fix_limited(me.bestguess, me)
+    limited_ret = me.bestguess isa LimitedAccuracy
+    limited_src = false
+    if !limited_ret
+        gt = me.src.ssavaluetypes::Vector{Any}
+        for j = 1:length(gt)
+            gt[j] = gtj = cycle_fix_limited(gt[j], me)
+            if gtj isa LimitedAccuracy && me.parent !== nothing
+                limited_src = true
+                break
+            end
+        end
+    end
+    if limited_ret
+        # a parent may be cached still, but not this intermediate work:
         # we can throw everything else away now
+        me.result.src = nothing
         me.cached = false
+        me.src.inlineable = false
         unlock_mi_inference(interp, me.linfo)
+    elseif limited_src
+        # a type result will be cached still, but not this intermediate work:
+        # we can throw everything else away now
+        me.result.src = nothing
         me.src.inlineable = false
     else
-        # annotate fulltree with type information
-        type_annotate!(me)
-        me.result.src = OptimizationState(me, OptimizationParams(interp), interp)
+        # annotate fulltree with type information,
+        # either because we are the outermost code, or we might use this later
+        doopt = (me.cached || me.parent !== nothing)
+        type_annotate!(me, doopt)
+        if doopt && may_optimize(interp)
+            me.result.src = OptimizationState(me, OptimizationParams(interp), interp)
+        else
+            me.result.src = me.src::CodeInfo # stash a convenience copy of the code (e.g. for reflection)
+        end
     end
+    me.result.valid_worlds = me.valid_worlds
     me.result.result = me.bestguess
-    nothing
-end
-
-function finish(src::CodeInfo, interp::AbstractInterpreter)
-    # convert all type information into the form consumed by the cache for inlining and code-generation
-    widen_all_consts!(src)
-    src.inferred = true
+    me.ipo_effects = me.result.ipo_effects = adjust_effects(me)
+    validate_code_in_debug_mode(me.linfo, me.src, "inferred")
     nothing
 end
 
 # record the backedges
-function store_backedges(frame::InferenceState)
+function store_backedges(frame::InferenceResult, edges::Vector{Any})
     toplevel = !isa(frame.linfo.def, Method)
-    if !toplevel && (frame.cached || frame.parent !== nothing)
-        caller = frame.result.linfo
-        for edges in frame.stmt_edges
-            store_backedges(caller, edges)
-        end
-        store_backedges(caller, frame.src.edges)
-        frame.src.edges = nothing
+    if !toplevel
+        store_backedges(frame.linfo, edges)
     end
+    nothing
 end
 
-store_backedges(caller, edges::Nothing) = nothing
-function store_backedges(caller, edges::Vector)
+function store_backedges(caller::MethodInstance, edges::Vector{Any})
     i = 1
     while i <= length(edges)
         to = edges[i]
@@ -231,8 +548,9 @@ end
 
 # widen all Const elements in type annotations
 function widen_all_consts!(src::CodeInfo)
-    for i = 1:length(src.ssavaluetypes)
-        src.ssavaluetypes[i] = widenconst(src.ssavaluetypes[i])
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for i = 1:length(ssavaluetypes)
+        ssavaluetypes[i] = widenconst(ssavaluetypes[i])
     end
 
     for i = 1:length(src.code)
@@ -260,7 +578,7 @@ function annotate_slot_load!(e::Expr, vtypes::VarTable, sv::InferenceState, unde
         subex = e.args[i]
         if isa(subex, Expr)
             annotate_slot_load!(subex, vtypes, sv, undefs)
-        elseif isa(subex, Slot)
+        elseif isa(subex, SlotNumber)
             e.args[i] = visit_slot_load!(subex, vtypes, sv, undefs)
         end
     end
@@ -269,16 +587,16 @@ end
 function annotate_slot_load(@nospecialize(e), vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
     if isa(e, Expr)
         annotate_slot_load!(e, vtypes, sv, undefs)
-    elseif isa(e, Slot)
+    elseif isa(e, SlotNumber)
         return visit_slot_load!(e, vtypes, sv, undefs)
     end
     return e
 end
 
-function visit_slot_load!(sl::Slot, vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
+function visit_slot_load!(sl::SlotNumber, vtypes::VarTable, sv::InferenceState, undefs::Array{Bool,1})
     id = slot_id(sl)
     s = vtypes[id]
-    vt = widenconditional(s.typ)
+    vt = widenconditional(ignorelimited(s.typ))
     if s.undef
         # find used-undef variables
         undefs[id] = true
@@ -297,6 +615,7 @@ function record_slot_assign!(sv::InferenceState)
     states = sv.stmt_types
     body = sv.src.code::Vector{Any}
     slottypes = sv.slottypes::Vector{Any}
+    ssavaluetypes = sv.src.ssavaluetypes::Vector{Any}
     for i = 1:length(body)
         expr = body[i]
         st_i = states[i]
@@ -304,8 +623,8 @@ function record_slot_assign!(sv::InferenceState)
         if isa(st_i, VarTable) && isa(expr, Expr) && expr.head === :(=)
             lhs = expr.args[1]
             rhs = expr.args[2]
-            if isa(lhs, Slot)
-                vt = widenconst(sv.src.ssavaluetypes[i])
+            if isa(lhs, SlotNumber)
+                vt = widenconst(ssavaluetypes[i])
                 if vt !== Bottom
                     id = slot_id(lhs)
                     otherTy = slottypes[id]
@@ -323,51 +642,55 @@ function record_slot_assign!(sv::InferenceState)
 end
 
 # annotate types of all symbols in AST
-function type_annotate!(sv::InferenceState)
-    # delete dead statements only if we're building this IR to cache it
-    # (otherwise, we'll run the optimization passes later, outside of inference)
-    run_optimizer = (sv.cached || sv.parent !== nothing)
+function type_annotate!(sv::InferenceState, run_optimizer::Bool)
+    # as an optimization, we delete dead statements immediately if we're going to run the optimizer
+    # (otherwise, we'll perhaps run the optimization passes later, outside of inference)
 
     # remove all unused ssa values
-    gt = sv.src.ssavaluetypes
-    for j = 1:length(gt)
-        if gt[j] === NOT_FOUND
-            gt[j] = Union{}
-        end
-        gt[j] = widenconditional(gt[j])
+    src = sv.src
+    ssavaluetypes = src.ssavaluetypes::Vector{Any}
+    for j = 1:length(ssavaluetypes)
+        t = ssavaluetypes[j]
+        ssavaluetypes[j] = t === NOT_FOUND ? Union{} : widenconditional(t)
     end
 
     # compute the required type for each slot
     # to hold all of the items assigned into it
     record_slot_assign!(sv)
     sv.src.slottypes = sv.slottypes
+    @assert !(sv.bestguess isa LimitedAccuracy)
     sv.src.rettype = sv.bestguess
 
     # annotate variables load types
     # remove dead code optimization
     # and compute which variables may be used undef
-    src = sv.src
     states = sv.stmt_types
-    nargs = sv.nargs
-    nslots = length(states[1]::Array{Any,1})
+    nslots = length(states[1]::VarTable)
     undefs = fill(false, nslots)
     body = src.code::Array{Any,1}
     nexpr = length(body)
 
-    # replace GotoIfNot with its condition if the branch target is unreachable
-    for i = 1:nexpr
-        expr = body[i]
-        if isa(expr, GotoIfNot)
-            if !isa(states[expr.dest], VarTable)
-                body[i] = expr.cond
+    # eliminate GotoIfNot if either of branch target is unreachable
+    if run_optimizer
+        for idx = 1:nexpr
+            stmt = body[idx]
+            if isa(stmt, GotoIfNot) && widenconst(argextype(stmt.cond, src, sv.sptypes)) === Bool
+                # replace live GotoIfNot with:
+                # - GotoNode if the fallthrough target is unreachable
+                # - no-op if the branch target is unreachable
+                if states[idx+1] === nothing
+                    body[idx] = GotoNode(stmt.dest)
+                elseif states[stmt.dest] === nothing
+                    body[idx] = nothing
+                end
             end
         end
     end
 
+    # dead code elimination for unreachable regions
     i = 1
     oldidx = 0
     changemap = fill(0, nexpr)
-
     while i <= nexpr
         oldidx += 1
         st_i = states[i]
@@ -380,7 +703,7 @@ function type_annotate!(sv::InferenceState)
                 body[i] = ReturnNode(annotate_slot_load(expr.val, st_i, sv, undefs))
             elseif isa(expr, GotoIfNot)
                 body[i] = GotoIfNot(annotate_slot_load(expr.cond, st_i, sv, undefs), expr.dest)
-            elseif isa(expr, Slot)
+            elseif isa(expr, SlotNumber)
                 body[i] = visit_slot_load!(expr, st_i, sv, undefs)
             end
         else
@@ -389,13 +712,12 @@ function type_annotate!(sv::InferenceState)
             elseif run_optimizer
                 deleteat!(body, i)
                 deleteat!(states, i)
-                deleteat!(src.ssavaluetypes, i)
+                deleteat!(ssavaluetypes, i)
                 deleteat!(src.codelocs, i)
                 deleteat!(sv.stmt_info, i)
+                deleteat!(src.ssaflags, i)
                 nexpr -= 1
-                if oldidx < length(changemap)
-                    changemap[oldidx + 1] = -1
-                end
+                changemap[oldidx] = -1
                 continue
             else
                 body[i] = Const(expr) # annotate that this statement actually is dead
@@ -403,7 +725,6 @@ function type_annotate!(sv::InferenceState)
         end
         i += 1
     end
-
     if run_optimizer
         renumber_ir_elements!(body, changemap)
     end
@@ -436,22 +757,21 @@ function union_caller_cycle!(a::InferenceState, b::InferenceState)
     return
 end
 
-function merge_call_chain!(parent::InferenceState, ancestor::InferenceState, child::InferenceState, limited::Bool)
+function merge_call_chain!(parent::InferenceState, ancestor::InferenceState, child::InferenceState)
     # add backedge of parent <- child
     # then add all backedges of parent <- parent.parent
     # and merge all of the callers into ancestor.callers_in_cycle
     # and ensure that walking the parent list will get the same result (DAG) from everywhere
+    # Also taint the termination effect, because we can no longer guarantee the absence
+    # of recursion.
+    tristate_merge!(parent, Effects(EFFECTS_TOTAL; terminates=TRISTATE_UNKNOWN))
     while true
         add_cycle_backedge!(child, parent, parent.currpc)
         union_caller_cycle!(ancestor, child)
+        tristate_merge!(child, Effects(EFFECTS_TOTAL; terminates=TRISTATE_UNKNOWN))
         child = parent
-        parent = child.parent
         child === ancestor && break
-    end
-    if limited
-        for caller in ancestor.callers_in_cycle
-            caller.limited = true
-        end
+        parent = child.parent::InferenceState
     end
 end
 
@@ -459,6 +779,11 @@ function is_same_frame(interp::AbstractInterpreter, linfo::MethodInstance, frame
     return linfo === frame.linfo
 end
 
+function poison_callstack(infstate::InferenceState, topmost::InferenceState)
+    push!(infstate.pclimitations, topmost)
+    nothing
+end
+
 # Walk through `linfo`'s upstream call chain, starting at `parent`. If a parent
 # frame matching `linfo` is encountered, then there is a cycle in the call graph
 # (i.e. `linfo` is a descendant callee of itself). Upon encountering this cycle,
@@ -469,28 +794,26 @@ end
 function resolve_call_cycle!(interp::AbstractInterpreter, linfo::MethodInstance, parent::InferenceState)
     frame = parent
     uncached = false
-    limited = false
     while isa(frame, InferenceState)
         uncached |= !frame.cached # ensure we never add an uncached frame to a cycle
-        limited |= frame.limited
         if is_same_frame(interp, linfo, frame)
             if uncached
                 # our attempt to speculate into a constant call lead to an undesired self-cycle
                 # that cannot be converged: poison our call-stack (up to the discovered duplicate frame)
                 # with the limited flag and abort (set return type to Any) now
-                poison_callstack(parent, frame, false)
+                poison_callstack(parent, frame)
                 return true
             end
-            merge_call_chain!(parent, frame, frame, limited)
+            merge_call_chain!(parent, frame, frame)
             return frame
         end
         for caller in frame.callers_in_cycle
             if is_same_frame(interp, linfo, caller)
                 if uncached
-                    poison_callstack(parent, frame, false)
+                    poison_callstack(parent, frame)
                     return true
                 end
-                merge_call_chain!(parent, frame, caller, limited)
+                merge_call_chain!(parent, frame, caller)
                 return caller
             end
         end
@@ -499,24 +822,56 @@ function resolve_call_cycle!(interp::AbstractInterpreter, linfo::MethodInstance,
     return false
 end
 
+generating_sysimg() = ccall(:jl_generating_output, Cint, ()) != 0 && JLOptions().incremental == 0
+
+ipo_effects(code::CodeInstance) = decode_effects(code.ipo_purity_bits)
+
+struct EdgeCallResult
+    rt #::Type
+    edge::Union{Nothing,MethodInstance}
+    edge_effects::Effects
+    function EdgeCallResult(@nospecialize(rt),
+                            edge::Union{Nothing,MethodInstance},
+                            edge_effects::Effects)
+        return new(rt, edge, edge_effects)
+    end
+end
+
 # compute (and cache) an inferred AST and return the current best estimate of the result type
-function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector, caller::InferenceState)
-    mi = specialize_method(method, atypes, sparams)::MethodInstance
+function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, caller::InferenceState)
+    mi = specialize_method(method, atype, sparams)::MethodInstance
     code = get(code_cache(interp), mi, nothing)
     if code isa CodeInstance # return existing rettype if the code is already inferred
-        update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
-        if isdefined(code, :rettype_const)
-            if isa(code.rettype_const, Vector{Any}) && !(Vector{Any} <: code.rettype)
-                return PartialStruct(code.rettype, code.rettype_const), mi
-            else
-                return Const(code.rettype_const), mi
-            end
+        if code.inferred === nothing && is_stmt_inline(get_curr_ssaflag(caller))
+            # we already inferred this edge before and decided to discard the inferred code,
+            # nevertheless we re-infer it here again and keep it around in the local cache
+            # since the inliner will request to use it later
+            cache = :local
         else
-            return code.rettype, mi
+            effects = ipo_effects(code)
+            update_valid_age!(caller, WorldRange(min_world(code), max_world(code)))
+            rettype = code.rettype
+            if isdefined(code, :rettype_const)
+                rettype_const = code.rettype_const
+                # the second subtyping conditions are necessary to distinguish usual cases
+                # from rare cases when `Const` wrapped those extended lattice type objects
+                if isa(rettype_const, Vector{Any}) && !(Vector{Any} <: rettype)
+                    rettype = PartialStruct(rettype, rettype_const)
+                elseif isa(rettype_const, PartialOpaque) && rettype <: Core.OpaqueClosure
+                    rettype = rettype_const
+                elseif isa(rettype_const, InterConditional) && !(InterConditional <: rettype)
+                    rettype = rettype_const
+                else
+                    rettype = Const(rettype_const)
+                end
+            end
+            return EdgeCallResult(rettype, mi, effects)
         end
+    else
+        cache = :global # cache edge targets by default
     end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0
-        return Any, nothing
+    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
+        return EdgeCallResult(Any, nothing, Effects())
     end
     if !caller.cached && caller.parent === nothing
         # this caller exists to return to the user
@@ -529,51 +884,51 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         # completely new
         lock_mi_inference(interp, mi)
         result = InferenceResult(mi)
-        frame = InferenceState(result, #=cached=#true, interp) # always use the cache for edge targets
+        frame = InferenceState(result, cache, interp) # always use the cache for edge targets
         if frame === nothing
             # can't get the source for this, so we know nothing
             unlock_mi_inference(interp, mi)
-            return Any, nothing
+            return EdgeCallResult(Any, nothing, Effects())
         end
-        if caller.cached || caller.limited # don't involve uncached functions in cycle resolution
+        if caller.cached || caller.parent !== nothing # don't involve uncached functions in cycle resolution
             frame.parent = caller
         end
         typeinf(interp, frame)
         update_valid_age!(frame, caller)
-        return widenconst_bestguess(frame.bestguess), frame.inferred ? mi : nothing
+        edge = frame.inferred ? mi : nothing
+        return EdgeCallResult(frame.bestguess, edge, Effects(frame)) # effects are adjusted already within `finish`
     elseif frame === true
         # unresolvable cycle
-        return Any, nothing
+        return EdgeCallResult(Any, nothing, Effects())
     end
     # return the current knowledge about this cycle
     frame = frame::InferenceState
     update_valid_age!(frame, caller)
-    return widenconst_bestguess(frame.bestguess), nothing
-end
-
-function widenconst_bestguess(bestguess)
-    !isa(bestguess, Const) && !isa(bestguess, PartialStruct) && !isa(bestguess, Type) && return widenconst(bestguess)
-    return bestguess
+    return EdgeCallResult(frame.bestguess, nothing, adjust_effects(frame))
 end
 
 #### entry points for inferring a MethodInstance given a type signature ####
 
 # compute an inferred AST and return type
-function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector, run_optimizer::Bool)
-    mi = specialize_method(method, atypes, sparams)::MethodInstance
+function typeinf_code(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
+    frame = typeinf_frame(interp, method, atype, sparams, run_optimizer)
+    frame === nothing && return nothing, Any
+    frame.inferred || return nothing, Any
+    code = frame.src
+    rt = widenconst(ignorelimited(frame.result.result))
+    return code, rt
+end
+
+# compute an inferred frame
+function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector, run_optimizer::Bool)
+    mi = specialize_method(method, atype, sparams)::MethodInstance
     ccall(:jl_typeinf_begin, Cvoid, ())
     result = InferenceResult(mi)
-    frame = InferenceState(result, false, interp)
-    frame === nothing && return (nothing, Any)
-    if typeinf(interp, frame) && run_optimizer
-        opt_params = OptimizationParams(interp)
-        opt = OptimizationState(frame, opt_params, interp)
-        optimize(opt, opt_params, result.result)
-        opt.src.inferred = true
-    end
+    frame = InferenceState(result, run_optimizer ? :global : :no, interp)
+    frame === nothing && return nothing
+    typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
-    frame.inferred || return (nothing, Any)
-    return (frame.src, widenconst(result.result))
+    return frame
 end
 
 # compute (and cache) an inferred AST and return type
@@ -585,22 +940,23 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
         if code isa CodeInstance
             # see if this code already exists in the cache
             inf = code.inferred
-            if invoke_api(code) == 2
+            if use_const_api(code)
                 i == 2 && ccall(:jl_typeinf_end, Cvoid, ())
                 tree = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
-                tree.code = Any[ ReturnNode(quoted(code.rettype_const)) ]
+                rettype_const = code.rettype_const
+                tree.code = Any[ ReturnNode(quoted(rettype_const)) ]
                 nargs = Int(method.nargs)
                 tree.slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), method.slot_syms)
-                tree.slotflags = fill(0x00, nargs)
+                tree.slotflags = fill(IR_FLAG_NULL, nargs)
                 tree.ssavaluetypes = 1
                 tree.codelocs = Int32[1]
-                tree.linetable = [LineInfoNode(method.module, method.name, method.file, Int(method.line), 0)]
+                tree.linetable = [LineInfoNode(method.module, method.name, method.file, method.line, Int32(0))]
                 tree.inferred = true
                 tree.ssaflags = UInt8[0]
                 tree.pure = true
                 tree.inlineable = true
                 tree.parent = mi
-                tree.rettype = Core.Typeof(code.rettype_const)
+                tree.rettype = Core.Typeof(rettype_const)
                 tree.min_world = code.min_world
                 tree.max_world = code.max_world
                 return tree
@@ -622,11 +978,11 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
             end
         end
     end
-    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0
+    if ccall(:jl_get_module_infer, Cint, (Any,), method.module) == 0 && !generating_sysimg()
         return retrieve_code_info(mi)
     end
     lock_mi_inference(interp, mi)
-    frame = InferenceState(InferenceResult(mi), #=cached=#true, interp)
+    frame = InferenceState(InferenceResult(mi), #=cache=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_end, Cvoid, ())
@@ -635,11 +991,11 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
 end
 
 # compute (and cache) an inferred AST and return the inferred return type
-function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atypes), sparams::SimpleVector)
-    if contains_is(unwrap_unionall(atypes).parameters, Union{})
+function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize(atype), sparams::SimpleVector)
+    if contains_is(unwrap_unionall(atype).parameters, Union{})
         return Union{} # don't ask: it does weird and unnecessary things, if it occurs during bootstrap
     end
-    mi = specialize_method(method, atypes, sparams)::MethodInstance
+    mi = specialize_method(method, atype, sparams)::MethodInstance
     for i = 1:2 # test-and-lock-and-test
         i == 2 && ccall(:jl_typeinf_begin, Cvoid, ())
         code = get(code_cache(interp), mi, nothing)
@@ -649,11 +1005,11 @@ function typeinf_type(interp::AbstractInterpreter, method::Method, @nospecialize
             return code.rettype
         end
     end
-    frame = InferenceResult(mi)
-    typeinf(interp, frame, true)
+    result = InferenceResult(mi)
+    typeinf(interp, result, :global)
     ccall(:jl_typeinf_end, Cvoid, ())
-    frame.result isa InferenceState && return nothing
-    return widenconst(frame.result)
+    result.result isa InferenceState && return nothing
+    return widenconst(ignorelimited(result.result))
 end
 
 # This is a bridge for the C code calling `jl_typeinf_func()`
@@ -669,7 +1025,7 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
             ccall(:jl_typeinf_begin, Cvoid, ())
             if !src.inferred
                 result = InferenceResult(linfo)
-                frame = InferenceState(result, src, #=cached=#true, interp)
+                frame = InferenceState(result, src, #=cache=#:global, interp)
                 typeinf(interp, frame)
                 @assert frame.inferred # TODO: deal with this better
                 src = frame.src
@@ -680,25 +1036,37 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
     return src
 end
 
+function return_type(@nospecialize(f), t::DataType) # this method has a special tfunc
+    world = ccall(:jl_get_tls_world_age, UInt, ())
+    args = Any[_return_type, NativeInterpreter(world), Tuple{Core.Typeof(f), t.parameters...}]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), args, length(args))
+end
 
-function return_type(@nospecialize(f), @nospecialize(t))
+function return_type(@nospecialize(f), t::DataType, world::UInt)
+    return return_type(Tuple{Core.Typeof(f), t.parameters...}, world)
+end
+
+function return_type(t::DataType)
     world = ccall(:jl_get_tls_world_age, UInt, ())
-    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), Any[_return_type, f, t, world], 4)
+    return return_type(t, world)
 end
 
-_return_type(@nospecialize(f), @nospecialize(t), world) = _return_type(NativeInterpreter(world), f, t)
+function return_type(t::DataType, world::UInt)
+    args = Any[_return_type, NativeInterpreter(world), t]
+    return ccall(:jl_call_in_typeinf_world, Any, (Ptr{Ptr{Cvoid}}, Cint), args, length(args))
+end
 
-function _return_type(interp::AbstractInterpreter, @nospecialize(f), @nospecialize(t))
+function _return_type(interp::AbstractInterpreter, t::DataType)
     rt = Union{}
+    f = singleton_type(t.parameters[1])
     if isa(f, Builtin)
-        rt = builtin_tfunction(interp, f, Any[t.parameters...], nothing)
-        if isa(rt, TypeVar)
-            rt = rt.ub
-        else
-            rt = widenconst(rt)
-        end
+        args = Any[t.parameters...]
+        popfirst!(args)
+        rt = builtin_tfunction(interp, f, args, nothing)
+        rt = widenconst(rt)
     else
-        for match in _methods(f, t, -1, get_world_counter(interp))
+        for match in _methods_by_ftype(t, -1, get_world_counter(interp))::Vector
+            match = match::MethodMatch
             ty = typeinf_type(interp, match.method, match.spec_types, match.sparams)
             ty === nothing && return Any
             rt = tmerge(rt, ty)
diff --git a/base/compiler/typelattice.jl b/base/compiler/typelattice.jl
index 17a444e840b770..79db3b6cf20b65 100644
--- a/base/compiler/typelattice.jl
+++ b/base/compiler/typelattice.jl
@@ -4,7 +4,7 @@
 # structs/constants #
 #####################
 
-# N.B.: Const/PartialStruct are defined in Core, to allow them to be used
+# N.B.: Const/PartialStruct/InterConditional are defined in Core, to allow them to be used
 # inside the global code cache.
 #
 # # The type of a value might be constant
@@ -18,9 +18,8 @@
 # end
 import Core: Const, PartialStruct
 
-
 # The type of this value might be Bool.
-# However, to enable a limited amount of back-propagagation,
+# However, to enable a limited amount of back-propagation,
 # we also keep some information about how this Bool value was created.
 # In particular, if you branch on this value, then may assume that in
 # the true branch, the type of `var` will be limited by `vtype` and in
@@ -34,7 +33,7 @@ import Core: Const, PartialStruct
 # end
 # ```
 struct Conditional
-    var::Slot
+    var::SlotNumber
     vtype
     elsetype
     function Conditional(
@@ -45,6 +44,18 @@ struct Conditional
     end
 end
 
+# # Similar to `Conditional`, but conveys inter-procedural constraints imposed on call arguments.
+# # This is separate from `Conditional` to catch logic errors: the lattice element name is InterConditional
+# # while processing a call, then Conditional everywhere else. Thus InterConditional does not appear in
+# # CompilerTypes—these type's usages are disjoint—though we define the lattice for InterConditional.
+# struct InterConditional
+#     slot::Int
+#     vtype
+#     elsetype
+# end
+import Core: InterConditional
+const AnyConditional = Union{Conditional,InterConditional}
+
 struct PartialTypeVar
     tv::TypeVar
     # N.B.: Currently unused, but would allow turning something back
@@ -56,27 +67,40 @@ end
 
 # Wraps a type and represents that the value may also be undef at this point.
 # (only used in optimize, not abstractinterpret)
+# N.B. in the lattice, this is epsilon bigger than `typ` (even Any)
 struct MaybeUndef
     typ
     MaybeUndef(@nospecialize(typ)) = new(typ)
 end
 
-# The type of a variable load is either a value or an UndefVarError
-# (only used in abstractinterpret, doesn't appear in optimize)
-struct VarState
-    typ
-    undef::Bool
-    VarState(@nospecialize(typ), undef::Bool) = new(typ, undef)
-end
-
-const VarTable = Array{Any,1}
-
 struct StateUpdate
-    var::Union{Slot,SSAValue}
+    var::SlotNumber
     vtype::VarState
     state::VarTable
+    conditional::Bool
 end
 
+# Represent that the type estimate has been approximated, due to "causes"
+# (only used in abstract interpretion, doesn't appear in optimization)
+# N.B. in the lattice, this is epsilon smaller than `typ` (except Union{})
+struct LimitedAccuracy
+    typ
+    causes::IdSet{InferenceState}
+    function LimitedAccuracy(@nospecialize(typ), causes::IdSet{InferenceState})
+        @assert !isa(typ, LimitedAccuracy) "malformed LimitedAccuracy"
+        return new(typ, causes)
+    end
+end
+
+"""
+    struct NotFound end
+    const NOT_FOUND = NotFound()
+
+A special sigleton that represents a variable has not been analyzed yet.
+Particularly, all SSA value types are initialized as `NOT_FOUND` when creating a new `InferenceState`.
+Note that this is only used for `smerge`, which updates abstract state `VarTable`,
+and thus we don't define the lattice for this.
+"""
 struct NotFound end
 
 const NOT_FOUND = NotFound()
@@ -90,11 +114,10 @@ const CompilerTypes = Union{MaybeUndef, Const, Conditional, NotFound, PartialStr
 # lattice logic #
 #################
 
-function issubconditional(a::Conditional, b::Conditional)
-    avar = a.var
-    bvar = b.var
-    if (isa(avar, Slot) && isa(bvar, Slot) && slot_id(avar) === slot_id(bvar)) ||
-       (isa(avar, SSAValue) && isa(bvar, SSAValue) && avar === bvar)
+# `Conditional` and `InterConditional` are valid in opposite contexts
+# (i.e. local inference and inter-procedural call), as such they will never be compared
+function issubconditional(a::C, b::C) where {C<:AnyConditional}
+    if is_same_conditionals(a, b)
         if a.vtype ⊑ b.vtype
             if a.elsetype ⊑ b.elsetype
                 return true
@@ -104,32 +127,54 @@ function issubconditional(a::Conditional, b::Conditional)
     return false
 end
 
-maybe_extract_const_bool(c::Const) = isa(c.val, Bool) ? c.val : nothing
-function maybe_extract_const_bool(c::Conditional)
+is_same_conditionals(a::Conditional,      b::Conditional)      = slot_id(a.var) === slot_id(b.var)
+is_same_conditionals(a::InterConditional, b::InterConditional) = a.slot === b.slot
+
+is_lattice_bool(@nospecialize(typ)) = typ !== Bottom && typ ⊑ Bool
+
+maybe_extract_const_bool(c::Const) = (val = c.val; isa(val, Bool)) ? val : nothing
+function maybe_extract_const_bool(c::AnyConditional)
     (c.vtype === Bottom && !(c.elsetype === Bottom)) && return false
     (c.elsetype === Bottom && !(c.vtype === Bottom)) && return true
     nothing
 end
 maybe_extract_const_bool(@nospecialize c) = nothing
 
-function ⊑(@nospecialize(a), @nospecialize(b))
+"""
+    a ⊑ b -> Bool
+
+The non-strict partial order over the type inference lattice.
+"""
+@nospecialize(a) ⊑ @nospecialize(b) = begin
+    if isa(b, LimitedAccuracy)
+        if !isa(a, LimitedAccuracy)
+            return false
+        end
+        if b.causes ⊈ a.causes
+            return false
+        end
+        b = b.typ
+    end
+    isa(a, LimitedAccuracy) && (a = a.typ)
     if isa(a, MaybeUndef) && !isa(b, MaybeUndef)
         return false
     end
     isa(a, MaybeUndef) && (a = a.typ)
     isa(b, MaybeUndef) && (b = b.typ)
-    (a === NOT_FOUND || b === Any) && return true
-    (a === Any || b === NOT_FOUND) && return false
+    b === Any && return true
+    a === Any && return false
     a === Union{} && return true
     b === Union{} && return false
-    if isa(a, Conditional)
-        if isa(b, Conditional)
+    @assert !isa(a, TypeVar) "invalid lattice item"
+    @assert !isa(b, TypeVar) "invalid lattice item"
+    if isa(a, AnyConditional)
+        if isa(b, AnyConditional)
             return issubconditional(a, b)
         elseif isa(b, Const) && isa(b.val, Bool)
             return maybe_extract_const_bool(a) === b.val
         end
         a = Bool
-    elseif isa(b, Conditional)
+    elseif isa(b, AnyConditional)
         return false
     end
     if isa(a, PartialStruct)
@@ -155,13 +200,21 @@ function ⊑(@nospecialize(a), @nospecialize(b))
             end
             for i in 1:nfields(a.val)
                 # XXX: let's handle varargs later
-                isdefined(a.val, i) || return false
+                isdefined(a.val, i) || continue # since ∀ T Union{} ⊑ T
                 ⊑(Const(getfield(a.val, i)), b.fields[i]) || return false
             end
             return true
         end
         return false
     end
+    if isa(a, PartialOpaque)
+        if isa(b, PartialOpaque)
+            (a.parent === b.parent && a.source === b.source) || return false
+            return (widenconst(a) <: widenconst(b)) &&
+                ⊑(a.env, b.env)
+        end
+        return widenconst(a) ⊑ b
+    end
     if isa(a, Const)
         if isa(b, Const)
             return a.val === b.val
@@ -177,14 +230,29 @@ function ⊑(@nospecialize(a), @nospecialize(b))
         return false
     elseif isa(a, PartialTypeVar) && b === TypeVar
         return true
-    elseif !(isa(a, Type) || isa(a, TypeVar)) ||
-           !(isa(b, Type) || isa(b, TypeVar))
-        return a === b
-    else
+    elseif isa(a, Type) && isa(b, Type)
         return a <: b
+    else # handle this conservatively in the remaining cases
+        return a === b
     end
 end
 
+"""
+    a ⊏ b -> Bool
+
+The strict partial order over the type inference lattice.
+This is defined as the irreflexive kernel of `⊑`.
+"""
+@nospecialize(a) ⊏ @nospecialize(b) = a ⊑ b && !⊑(b, a)
+
+"""
+    a ⋤ b -> Bool
+
+This order could be used as a slightly more efficient version of the strict order `⊏`,
+where we can safely assume `a ⊑ b` holds.
+"""
+@nospecialize(a) ⋤ @nospecialize(b) = !⊑(b, a)
+
 # Check if two lattice elements are partial order equivalent. This is basically
 # `a ⊑ b && b ⊑ a` but with extra performance optimizations.
 function is_lattice_equal(@nospecialize(a), @nospecialize(b))
@@ -199,27 +267,80 @@ function is_lattice_equal(@nospecialize(a), @nospecialize(b))
         return true
     end
     isa(b, PartialStruct) && return false
-    a isa Const && return false
-    b isa Const && return false
+    if a isa Const
+        if issingletontype(b)
+            return a.val === b.instance
+        end
+        return false
+    end
+    if b isa Const
+        if issingletontype(a)
+            return a.instance === b.val
+        end
+        return false
+    end
+    if isa(a, PartialOpaque)
+        isa(b, PartialOpaque) || return false
+        widenconst(a) == widenconst(b) || return false
+        a.source === b.source || return false
+        a.parent === b.parent || return false
+        return is_lattice_equal(a.env, b.env)
+    end
     return a ⊑ b && b ⊑ a
 end
 
-widenconst(c::Conditional) = Bool
-function widenconst(c::Const)
-    if isa(c.val, Type)
-        if isvarargtype(c.val)
-            return Type
+# compute typeintersect over the extended inference lattice,
+# as precisely as we can,
+# where v is in the extended lattice, and t is a Type.
+function tmeet(@nospecialize(v), @nospecialize(t))
+    if isa(v, Const)
+        if !has_free_typevars(t) && !isa(v.val, t)
+            return Bottom
+        end
+        return v
+    elseif isa(v, PartialStruct)
+        has_free_typevars(t) && return v
+        widev = widenconst(v)
+        if widev <: t
+            return v
+        end
+        ti = typeintersect(widev, t)
+        valid_as_lattice(ti) || return Bottom
+        @assert widev <: Tuple
+        new_fields = Vector{Any}(undef, length(v.fields))
+        for i = 1:length(new_fields)
+            vfi = v.fields[i]
+            if isvarargtype(vfi)
+                new_fields[i] = vfi
+            else
+                new_fields[i] = tmeet(vfi, widenconst(getfield_tfunc(t, Const(i))))
+                if new_fields[i] === Bottom
+                    return Bottom
+                end
+            end
         end
-        return Type{c.val}
-    else
-        return typeof(c.val)
+        return tuple_tfunc(new_fields)
+    elseif isa(v, Conditional)
+        if !(Bool <: t)
+            return Bottom
+        end
+        return v
     end
+    ti = typeintersect(widenconst(v), t)
+    valid_as_lattice(ti) || return Bottom
+    return ti
 end
+
+widenconst(c::AnyConditional) = Bool
+widenconst((; val)::Const) = isa(val, Type) ? Type{val} : typeof(val)
 widenconst(m::MaybeUndef) = widenconst(m.typ)
 widenconst(c::PartialTypeVar) = TypeVar
 widenconst(t::PartialStruct) = t.typ
+widenconst(t::PartialOpaque) = t.typ
 widenconst(t::Type) = t
-widenconst(t::TypeVar) = t
+widenconst(t::TypeVar) = error("unhandled TypeVar")
+widenconst(t::TypeofVararg) = error("unhandled Vararg")
+widenconst(t::LimitedAccuracy) = error("unhandled LimitedAccuracy")
 
 issubstate(a::VarState, b::VarState) = (a.typ ⊑ b.typ && a.undef <= b.undef)
 
@@ -233,31 +354,42 @@ function smerge(sa::Union{NotFound,VarState}, sb::Union{NotFound,VarState})
 end
 
 @inline tchanged(@nospecialize(n), @nospecialize(o)) = o === NOT_FOUND || (n !== NOT_FOUND && !(n ⊑ o))
-@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n, o)))
-
-widenconditional(@nospecialize typ) = typ
-function widenconditional(typ::Conditional)
-    if typ.vtype === Union{}
-        return Const(false)
-    elseif typ.elsetype === Union{}
-        return Const(true)
-    else
-        return Bool
+@inline schanged(@nospecialize(n), @nospecialize(o)) = (n !== o) && (o === NOT_FOUND || (n !== NOT_FOUND && !issubstate(n::VarState, o::VarState)))
+
+function widenconditional(@nospecialize typ)
+    if isa(typ, AnyConditional)
+        if typ.vtype === Union{}
+            return Const(false)
+        elseif typ.elsetype === Union{}
+            return Const(true)
+        else
+            return Bool
+        end
     end
+    return typ
 end
+widenconditional(t::LimitedAccuracy) = error("unhandled LimitedAccuracy")
+
+widenwrappedconditional(@nospecialize(typ))   = widenconditional(typ)
+widenwrappedconditional(typ::LimitedAccuracy) = LimitedAccuracy(widenconditional(typ.typ), typ.causes)
+
+ignorelimited(@nospecialize typ) = typ
+ignorelimited(typ::LimitedAccuracy) = typ.typ
 
 function stupdate!(state::Nothing, changes::StateUpdate)
     newst = copy(changes.state)
-    if isa(changes.var, Slot)
-        changeid = slot_id(changes.var::Slot)
-        newst[changeid] = changes.vtype
-        # remove any Conditional for this Slot from the vtable
+    changeid = slot_id(changes.var)
+    newst[changeid] = changes.vtype
+    # remove any Conditional for this slot from the vtable
+    # (unless this change is came from the conditional)
+    if !changes.conditional
         for i = 1:length(newst)
             newtype = newst[i]
             if isa(newtype, VarState)
-                newtypetyp = newtype.typ
+                newtypetyp = ignorelimited(newtype.typ)
                 if isa(newtypetyp, Conditional) && slot_id(newtypetyp.var) == changeid
-                    newst[i] = VarState(widenconditional(newtypetyp), newtype.undef)
+                    newtypetyp = widenwrappedconditional(newtype.typ)
+                    newst[i] = VarState(newtypetyp, newtype.undef)
                 end
             end
         end
@@ -266,11 +398,8 @@ function stupdate!(state::Nothing, changes::StateUpdate)
 end
 
 function stupdate!(state::VarTable, changes::StateUpdate)
-    if !isa(changes.var, Slot)
-        return stupdate!(state, changes.state)
-    end
-    newstate = false
-    changeid = slot_id(changes.var::Slot)
+    newstate = nothing
+    changeid = slot_id(changes.var)
     for i = 1:length(state)
         if i == changeid
             newtype = changes.vtype
@@ -278,11 +407,13 @@ function stupdate!(state::VarTable, changes::StateUpdate)
             newtype = changes.state[i]
         end
         oldtype = state[i]
-        # remove any Conditional for this Slot from the vtable
-        if isa(newtype, VarState)
-            newtypetyp = newtype.typ
+        # remove any Conditional for this slot from the vtable
+        # (unless this change is came from the conditional)
+        if !changes.conditional && isa(newtype, VarState)
+            newtypetyp = ignorelimited(newtype.typ)
             if isa(newtypetyp, Conditional) && slot_id(newtypetyp.var) == changeid
-                newtype = VarState(widenconditional(newtypetyp), newtype.undef)
+                newtypetyp = widenwrappedconditional(newtype.typ)
+                newtype = VarState(newtypetyp, newtype.undef)
             end
         end
         if schanged(newtype, oldtype)
@@ -294,7 +425,7 @@ function stupdate!(state::VarTable, changes::StateUpdate)
 end
 
 function stupdate!(state::VarTable, changes::VarTable)
-    newstate = false
+    newstate = nothing
     for i = 1:length(state)
         newtype = changes[i]
         oldtype = state[i]
@@ -308,20 +439,24 @@ end
 
 stupdate!(state::Nothing, changes::VarTable) = copy(changes)
 
-stupdate!(state::Nothing, changes::Nothing) = false
+stupdate!(state::Nothing, changes::Nothing) = nothing
 
 function stupdate1!(state::VarTable, change::StateUpdate)
-    if !isa(change.var, Slot)
-        return false
-    end
-    changeid = slot_id(change.var::Slot)
-    # remove any Conditional for this Slot from the catch block vtable
-    for i = 1:length(state)
-        oldtype = state[i]
-        if isa(oldtype, VarState)
-            oldtypetyp = oldtype.typ
-            if isa(oldtypetyp, Conditional) && slot_id(oldtypetyp.var) == changeid
-                state[i] = VarState(widenconditional(oldtypetyp), oldtype.undef)
+    changeid = slot_id(change.var)
+    # remove any Conditional for this slot from the catch block vtable
+    # (unless this change is came from the conditional)
+    if !change.conditional
+        for i = 1:length(state)
+            oldtype = state[i]
+            if isa(oldtype, VarState)
+                oldtypetyp = ignorelimited(oldtype.typ)
+                if isa(oldtypetyp, Conditional) && slot_id(oldtypetyp.var) == changeid
+                    oldtypetyp = widenconditional(oldtypetyp)
+                    if oldtype.typ isa LimitedAccuracy
+                        oldtypetyp = LimitedAccuracy(oldtypetyp, (oldtype.typ::LimitedAccuracy).causes)
+                    end
+                    state[i] = VarState(oldtypetyp, oldtype.undef)
+                end
             end
         end
     end
diff --git a/base/compiler/typelimits.jl b/base/compiler/typelimits.jl
index 22be265287fa51..2c5adb92e5a09c 100644
--- a/base/compiler/typelimits.jl
+++ b/base/compiler/typelimits.jl
@@ -21,7 +21,7 @@ function limit_type_size(@nospecialize(t), @nospecialize(compare), @nospecialize
     type_more_complex(t, compare, source, 1, allowed_tupledepth, allowed_tuplelen) || return t
     r = _limit_type_size(t, compare, source, 1, allowed_tuplelen)
     #@assert t <: r # this may fail if t contains a typevar in invariant and multiple times
-        # in covariant position and r looses the occurence in invariant position (see #36407)
+        # in covariant position and r looses the occurrence in invariant position (see #36407)
     if !(t <: r) # ideally, this should never happen
         # widen to minimum complexity to obtain a valid result
         r = _limit_type_size(t, Any, source, 1, allowed_tuplelen)
@@ -39,6 +39,8 @@ function is_derived_type(@nospecialize(t), @nospecialize(c), mindepth::Int)
     if t === c
         return mindepth <= 1
     end
+    isvarargtype(t) && (t = unwrapva(t))
+    isvarargtype(c) && (c = unwrapva(c))
     if isa(c, Union)
         # see if it is one of the elements of the union
         return is_derived_type(t, c.a, mindepth) || is_derived_type(t, c.b, mindepth)
@@ -77,6 +79,7 @@ end
 # The goal of this function is to return a type of greater "size" and less "complexity" than
 # both `t` or `c` over the lattice defined by `sources`, `depth`, and `allowed_tuplelen`.
 function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVector, depth::Int, allowed_tuplelen::Int)
+    @assert isa(t, Type) && isa(c, Type) "unhandled TypeVar / Vararg"
     if t === c
         return t # quick egal test
     elseif t === Union{}
@@ -85,7 +88,7 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
         return t # fast path: unparameterized are always simple
     else
         ut = unwrap_unionall(t)
-        if isa(ut, DataType) && ut.name !== _va_typename && isa(c, Type) && c !== Union{} && c <: t
+        if isa(ut, DataType) && isa(c, Type) && c !== Union{} && c <: t
             # TODO: need to check that the UnionAll bounds on t are limited enough too
             return t # t is already wider than the comparison in the type lattice
         elseif is_derived_type_from_any(ut, sources, depth)
@@ -96,41 +99,35 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
     # first attempt to turn `c` into a type that contributes meaningful information
     # by peeling off meaningless non-matching wrappers of comparison one at a time
     # then unwrap `t`
-    if isa(c, TypeVar)
-        if isa(t, TypeVar) && t.ub === c.ub && (t.lb === Union{} || t.lb === c.lb)
-            return t # it's ok to change the name, or widen `lb` to Union{}, so we can handle this immediately here
-        end
-        return _limit_type_size(t, c.ub, sources, depth, allowed_tuplelen)
-    end
+    # NOTE that `TypeVar` / `Vararg` are handled separately to catch the logic errors
     if isa(c, UnionAll)
-        return _limit_type_size(t, c.body, sources, depth, allowed_tuplelen)
+        return __limit_type_size(t, c.body, sources, depth, allowed_tuplelen)::Type
     end
     if isa(t, UnionAll)
-        tbody = _limit_type_size(t.body, c, sources, depth, allowed_tuplelen)
+        tbody = __limit_type_size(t.body, c, sources, depth, allowed_tuplelen)
         tbody === t.body && return t
-        return UnionAll(t.var, tbody)
-    elseif isa(t, TypeVar)
-        # don't have a matching TypeVar in comparison, so we keep just the upper bound
-        return _limit_type_size(t.ub, c, sources, depth, allowed_tuplelen)
+        return UnionAll(t.var, tbody)::Type
     elseif isa(t, Union)
         if isa(c, Union)
-            a = _limit_type_size(t.a, c.a, sources, depth, allowed_tuplelen)
-            b = _limit_type_size(t.b, c.b, sources, depth, allowed_tuplelen)
+            a = __limit_type_size(t.a, c.a, sources, depth, allowed_tuplelen)
+            b = __limit_type_size(t.b, c.b, sources, depth, allowed_tuplelen)
             return Union{a, b}
         end
     elseif isa(t, DataType)
-        if isa(c, DataType)
+        if isType(t) # see equivalent case in type_more_complex
+            tt = unwrap_unionall(t.parameters[1])
+            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
+                is_derived_type_from_any(tt, sources, depth + 1) && return t
+            else
+                isType(c) && (c = unwrap_unionall(c.parameters[1]))
+                type_more_complex(tt, c, sources, depth, 0, 0) || return t
+            end
+            return Type
+        elseif isa(c, DataType)
             tP = t.parameters
             cP = c.parameters
             if t.name === c.name && !isempty(cP)
-                if isvarargtype(t)
-                    VaT = _limit_type_size(tP[1], cP[1], sources, depth + 1, 0)
-                    N = tP[2]
-                    if isa(N, TypeVar) || N === cP[2]
-                        return Vararg{VaT, N}
-                    end
-                    return Vararg{VaT}
-                elseif t.name === Tuple.name
+                if t.name === Tuple.name
                     # for covariant datatypes (Tuple),
                     # apply type-size limit element-wise
                     ltP = length(tP)
@@ -151,25 +148,12 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
                         else
                             cPi = Any
                         end
-                        Q[i] = _limit_type_size(Q[i], cPi, sources, depth + 1, 0)
+                        Q[i] = __limit_type_size(Q[i], cPi, sources, depth + 1, 0)
                     end
                     return Tuple{Q...}
                 end
-            elseif isvarargtype(c)
-                # Tuple{Vararg{T}} --> Tuple{T} is OK
-                return _limit_type_size(t, cP[1], sources, depth, 0)
             end
         end
-        if isType(t) # allow taking typeof as Type{...}, but ensure it doesn't start nesting
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, DataType) && !isType(tt)
-                is_derived_type_from_any(tt, sources, depth) && return t
-            end
-        end
-        if isvarargtype(t)
-            # never replace Vararg with non-Vararg
-            return Vararg
-        end
         if allowed_tuplelen < 1 && t.name === Tuple.name
             return Any
         end
@@ -185,6 +169,38 @@ function _limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVec
     return Any
 end
 
+# helper function of `_limit_type_size`, which has the right to take and return `TypeVar` / `Vararg`
+function __limit_type_size(@nospecialize(t), @nospecialize(c), sources::SimpleVector, depth::Int, allowed_tuplelen::Int)
+    cN = 0
+    if isvarargtype(c) # Tuple{Vararg{T}} --> Tuple{T} is OK
+        isdefined(c, :N) && (cN = c.N)
+        c = unwrapva(c)
+    end
+    if isa(c, TypeVar)
+        if isa(t, TypeVar) && t.ub === c.ub && (t.lb === Union{} || t.lb === c.lb)
+            return t # it's ok to change the name, or widen `lb` to Union{}, so we can handle this immediately here
+        end
+        return __limit_type_size(t, c.ub, sources, depth, allowed_tuplelen)
+    elseif isa(t, TypeVar)
+        # don't have a matching TypeVar in comparison, so we keep just the upper bound
+        return __limit_type_size(t.ub, c, sources, depth, allowed_tuplelen)
+    elseif isvarargtype(t)
+        # Tuple{Vararg{T,N}} --> Tuple{Vararg{S,M}} is OK
+        # Tuple{T} --> Tuple{Vararg{T}} is OK
+        # but S must be more limited than T, and must not introduce a new number for M
+        VaT = __limit_type_size(unwrapva(t), c, sources, depth + 1, 0)
+        if isdefined(t, :N)
+            tN = t.N
+            if isa(tN, TypeVar) || tN === cN
+                return Vararg{VaT, tN}
+            end
+        end
+        return Vararg{VaT}
+    else
+        return _limit_type_size(t, c, sources, depth, allowed_tuplelen)
+    end
+end
+
 function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVector, depth::Int, tupledepth::Int, allowed_tuplelen::Int)
     # detect cases where the comparison is trivial
     if t === c
@@ -200,6 +216,8 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
         return false # t isn't something new
     end
     # peel off wrappers
+    isvarargtype(t) && (t = unwrapva(t))
+    isvarargtype(c) && (c = unwrapva(c))
     if isa(c, UnionAll)
         # allow wrapping type with fewer UnionAlls than comparison if in a covariant context
         if !isa(t, UnionAll) && tupledepth == 0
@@ -230,7 +248,18 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
     # base case for data types
     if isa(t, DataType)
         tP = t.parameters
-        if isa(c, DataType) && t.name === c.name
+        if isType(t)
+            # Treat Type{T} and T as equivalent to allow taking typeof any
+            # source type (DataType) anywhere as Type{...}, as long as it isn't
+            # nesting as Type{Type{...}}
+            tt = unwrap_unionall(t.parameters[1])
+            if isa(tt, Union) || isa(tt, TypeVar) || isType(tt)
+                return !is_derived_type_from_any(tt, sources, depth + 1)
+            else
+                isType(c) && (c = unwrap_unionall(c.parameters[1]))
+                return type_more_complex(tt, c, sources, depth, 0, 0)
+            end
+        elseif isa(c, DataType) && t.name === c.name
             cP = c.parameters
             length(cP) < length(tP) && return true
             length(cP) > length(tP) && !isvarargtype(tP[end]) && depth == 1 && return false
@@ -238,7 +267,7 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
             # allow creating variation within a nested tuple, but only so deep
             if t.name === Tuple.name && tupledepth > 0
                 tupledepth -= 1
-            elseif !isvarargtype(t)
+            else
                 tupledepth = 0
             end
             isgenerator = (t.name.name === :Generator && t.name.module === _topmod(t.name.module))
@@ -249,7 +278,7 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
                     let tPi = unwrap_unionall(tPi),
                         cPi = unwrap_unionall(cPi)
                         if isa(tPi, DataType) && isa(cPi, DataType) &&
-                                !tPi.abstract && !cPi.abstract &&
+                            !isabstracttype(tPi) && !isabstracttype(cPi) &&
                                 sym_isless(cPi.name.name, tPi.name.name)
                             # allow collect on (anonymous) Generators to nest, provided that their functions are appropriately ordered
                             # TODO: is there a better way?
@@ -260,15 +289,6 @@ function type_more_complex(@nospecialize(t), @nospecialize(c), sources::SimpleVe
                 type_more_complex(tPi, cPi, sources, depth + 1, tupledepth, 0) && return true
             end
             return false
-        elseif isvarargtype(c)
-            return type_more_complex(t, unwrapva(c), sources, depth, tupledepth, 0)
-        end
-        if isType(t) # allow taking typeof any source type anywhere as Type{...}, as long as it isn't nesting Type{Type{...}}
-            tt = unwrap_unionall(t.parameters[1])
-            if isa(tt, DataType) && !isType(tt)
-                is_derived_type_from_any(tt, sources, depth) || return true
-                return false
-            end
         end
     end
     return true
@@ -278,7 +298,55 @@ union_count_abstract(x::Union) = union_count_abstract(x.a) + union_count_abstrac
 union_count_abstract(@nospecialize(x)) = !isdispatchelem(x)
 
 function issimpleenoughtype(@nospecialize t)
-    return unionlen(t)+union_count_abstract(t) <= MAX_TYPEUNION_LENGTH && unioncomplexity(t) <= MAX_TYPEUNION_COMPLEXITY
+    return unionlen(t) + union_count_abstract(t) <= MAX_TYPEUNION_LENGTH &&
+           unioncomplexity(t) <= MAX_TYPEUNION_COMPLEXITY
+end
+
+# A simplified type_more_complex query over the extended lattice
+# (assumes typeb ⊑ typea)
+function issimplertype(@nospecialize(typea), @nospecialize(typeb))
+    typea = ignorelimited(typea)
+    typeb = ignorelimited(typeb)
+    typea isa MaybeUndef && (typea = typea.typ) # n.b. does not appear in inference
+    typeb isa MaybeUndef && (typeb = typeb.typ) # n.b. does not appear in inference
+    typea === typeb && return true
+    if typea isa PartialStruct
+        aty = widenconst(typea)
+        for i = 1:length(typea.fields)
+            ai = unwrapva(typea.fields[i])
+            bi = fieldtype(aty, i)
+            is_lattice_equal(ai, bi) && continue
+            tni = _typename(widenconst(ai))
+            if tni isa Const
+                bi = (tni.val::Core.TypeName).wrapper
+                is_lattice_equal(ai, bi) && continue
+            end
+            bi = getfield_tfunc(typeb, Const(i))
+            is_lattice_equal(ai, bi) && continue
+            # It is not enough for ai to be simpler than bi: it must exactly equal
+            # (for this, an invariant struct field, by contrast to
+            # type_more_complex above which handles covariant tuples).
+            return false
+        end
+    elseif typea isa Type
+        return issimpleenoughtype(typea)
+    # elseif typea isa Const # fall-through good
+    elseif typea isa Conditional # follow issubconditional query
+      typeb isa Const && return true
+      typeb isa Conditional || return false
+      is_same_conditionals(typea, typeb) || return false
+      issimplertype(typea.vtype, typeb.vtype) || return false
+      issimplertype(typea.elsetype, typeb.elsetype) || return false
+    elseif typea isa InterConditional # ibid
+      typeb isa Const && return true
+      typeb isa InterConditional || return false
+      is_same_conditionals(typea, typeb) || return false
+      issimplertype(typea.vtype, typeb.vtype) || return false
+      issimplertype(typea.elsetype, typeb.elsetype) || return false
+    elseif typea isa PartialOpaque
+        # TODO
+    end
+    return true
 end
 
 # pick a wider type that contains both typea and typeb,
@@ -288,12 +356,32 @@ end
 function tmerge(@nospecialize(typea), @nospecialize(typeb))
     typea === Union{} && return typeb
     typeb === Union{} && return typea
+    typea === typeb && return typea
+
     suba = typea ⊑ typeb
-    suba && issimpleenoughtype(typeb) && return typeb
+    suba && issimplertype(typeb, typea) && return typeb
     subb = typeb ⊑ typea
     suba && subb && return typea
-    subb && issimpleenoughtype(typea) && return typea
+    subb && issimplertype(typea, typeb) && return typea
 
+    # type-lattice for LimitedAccuracy wrapper
+    # the merge create a slightly narrower type than needed, but we can't
+    # represent the precise intersection of causes and don't attempt to
+    # enumerate some of these cases where we could
+    if isa(typea, LimitedAccuracy) && isa(typeb, LimitedAccuracy)
+        if typea.causes ⊆ typeb.causes
+            causes = typeb.causes
+        elseif typeb.causes ⊆ typea.causes
+            causes = typea.causes
+        else
+            causes = union!(copy(typea.causes), typeb.causes)
+        end
+        return LimitedAccuracy(tmerge(typea.typ, typeb.typ), causes)
+    elseif isa(typea, LimitedAccuracy)
+        return LimitedAccuracy(tmerge(typea.typ, typeb), typea.causes)
+    elseif isa(typeb, LimitedAccuracy)
+        return LimitedAccuracy(tmerge(typea, typeb.typ), typeb.causes)
+    end
     # type-lattice for MaybeUndef wrapper
     if isa(typea, MaybeUndef) || isa(typeb, MaybeUndef)
         return MaybeUndef(tmerge(
@@ -316,10 +404,10 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         end
     end
     if isa(typea, Conditional) && isa(typeb, Conditional)
-        if typea.var === typeb.var
+        if is_same_conditionals(typea, typeb)
             vtype = tmerge(typea.vtype, typeb.vtype)
             elsetype = tmerge(typea.elsetype, typeb.elsetype)
-            if vtype != elsetype
+            if vtype !== elsetype
                 return Conditional(typea.var, vtype, elsetype)
             end
         end
@@ -329,26 +417,93 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         end
         return Bool
     end
-    if (isa(typea, PartialStruct) || isa(typea, Const)) &&
-       (isa(typeb, PartialStruct) || isa(typeb, Const)) &&
-        widenconst(typea) === widenconst(typeb)
-
-       typea_nfields = nfields_tfunc(typea)
-       typeb_nfields = nfields_tfunc(typeb)
-       if !isa(typea_nfields, Const) || !isa(typeb_nfields, Const) || typea_nfields.val !== typeb_nfields.val
+    # type-lattice for InterConditional wrapper, InterConditional will never be merged with Conditional
+    if isa(typea, InterConditional) && isa(typeb, Const)
+        if typeb.val === true
+            typeb = InterConditional(typea.slot, Any, Union{})
+        elseif typeb.val === false
+            typeb = InterConditional(typea.slot, Union{}, Any)
+        end
+    end
+    if isa(typeb, InterConditional) && isa(typea, Const)
+        if typea.val === true
+            typea = InterConditional(typeb.slot, Any, Union{})
+        elseif typea.val === false
+            typea = InterConditional(typeb.slot, Union{}, Any)
+        end
+    end
+    if isa(typea, InterConditional) && isa(typeb, InterConditional)
+        if is_same_conditionals(typea, typeb)
+            vtype = tmerge(typea.vtype, typeb.vtype)
+            elsetype = tmerge(typea.elsetype, typeb.elsetype)
+            if vtype !== elsetype
+                return InterConditional(typea.slot, vtype, elsetype)
+            end
+        end
+        val = maybe_extract_const_bool(typea)
+        if val isa Bool && val === maybe_extract_const_bool(typeb)
+            return Const(val)
+        end
+        return Bool
+    end
+    # type-lattice for Const and PartialStruct wrappers
+    if ((isa(typea, PartialStruct) || isa(typea, Const)) &&
+        (isa(typeb, PartialStruct) || isa(typeb, Const)))
+        aty = widenconst(typea)
+        bty = widenconst(typeb)
+        if aty === bty
+            # must have egal here, since we do not create PartialStruct for non-concrete types
+            typea_nfields = nfields_tfunc(typea)
+            typeb_nfields = nfields_tfunc(typeb)
+            isa(typea_nfields, Const) || return aty
+            isa(typeb_nfields, Const) || return aty
+            type_nfields = typea_nfields.val::Int
+            type_nfields === typeb_nfields.val::Int || return aty
+            type_nfields == 0 && return aty
+            fields = Vector{Any}(undef, type_nfields)
+            anyrefine = false
+            for i = 1:type_nfields
+                ai = getfield_tfunc(typea, Const(i))
+                bi = getfield_tfunc(typeb, Const(i))
+                ft = fieldtype(aty, i)
+                if is_lattice_equal(ai, bi) || is_lattice_equal(ai, ft)
+                    # Since ai===bi, the given type has no restrictions on complexity.
+                    # and can be used to refine ft
+                    tyi = ai
+                elseif is_lattice_equal(bi, ft)
+                    tyi = bi
+                else
+                    # Otherwise choose between using the fieldtype or some other simple merged type.
+                    # The wrapper type never has restrictions on complexity,
+                    # so try to use that to refine the estimated type too.
+                    tni = _typename(widenconst(ai))
+                    if tni isa Const && tni === _typename(widenconst(bi))
+                        # A tmeet call may cause tyi to become complex, but since the inputs were
+                        # strictly limited to being egal, this has no restrictions on complexity.
+                        # (Otherwise, we would need to use <: and take the narrower one without
+                        # intersection. See the similar comment in abstract_call_method.)
+                        tyi = typeintersect(ft, (tni.val::Core.TypeName).wrapper)
+                    else
+                        # Since aty===bty, the fieldtype has no restrictions on complexity.
+                        tyi = ft
+                    end
+                end
+                fields[i] = tyi
+                if !anyrefine
+                    anyrefine = has_nontrivial_const_info(tyi) || # constant information
+                                tyi ⋤ ft                          # just a type-level information, but more precise than the declared type
+                end
+            end
+            return anyrefine ? PartialStruct(aty, fields) : aty
+        end
+    end
+    if isa(typea, PartialOpaque) && isa(typeb, PartialOpaque) && widenconst(typea) == widenconst(typeb)
+        if !(typea.source === typeb.source &&
+             typea.parent === typeb.parent)
             return widenconst(typea)
-       end
-
-       type_nfields = typea_nfields.val::Int
-       fields = Vector{Any}(undef, type_nfields)
-       anyconst = false
-       for i = 1:type_nfields
-            fields[i] = tmerge(getfield_tfunc(typea, Const(i)),
-                               getfield_tfunc(typeb, Const(i)))
-            anyconst |= has_nontrivial_const_info(fields[i])
-       end
-       return anyconst ? PartialStruct(widenconst(typea), fields) :
-            widenconst(typea)
+        end
+        return PartialOpaque(typea.typ, tmerge(typea.env, typeb.env),
+            typea.parent, typea.source)
     end
     # no special type-inference lattice, join the types
     typea, typeb = widenconst(typea), widenconst(typeb)
@@ -372,9 +527,14 @@ function tmerge(@nospecialize(typea), @nospecialize(typeb))
         # bail if everything isn't a well-formed DataType
         ti = types[i]
         uw = unwrap_unionall(ti)
-        (uw isa DataType && ti <: uw.name.wrapper) || return Any
+        uw isa DataType || return Any
+        ti <: uw.name.wrapper || return Any
         typenames[i] = uw.name
     end
+    u = Union{types...}
+    if issimpleenoughtype(u)
+        return u
+    end
     # see if any of the union elements have the same TypeName
     # in which case, simplify this tmerge by replacing it with
     # the widest possible version of itself (the wrapper)
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 45a5bfcb121697..e594c233353d92 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -6,16 +6,184 @@
 An abstract base class that allows multiple dispatch to determine the method of
 executing Julia code.  The native Julia LLVM pipeline is enabled by using the
 `NativeInterpreter` concrete instantiation of this abstract class, others can be
-swapped in as long as they follow the AbstractInterpreter API.
+swapped in as long as they follow the `AbstractInterpreter` API.
+
+If `interp` is an `AbstractInterpreter`, it is expected to provide at least the following methods:
+- `InferenceParams(interp)` - return an `InferenceParams` instance
+- `OptimizationParams(interp)` - return an `OptimizationParams` instance
+- `get_world_counter(interp)` - return the world age for this interpreter
+- `get_inference_cache(interp)` - return the runtime inference cache
+- `code_cache(interp)` - return the global inference cache
+"""
+abstract type AbstractInterpreter end
+
+struct ArgInfo
+    fargs::Union{Nothing,Vector{Any}}
+    argtypes::Vector{Any}
+end
+
+struct TriState; state::UInt8; end
+const ALWAYS_FALSE     = TriState(0x00)
+const ALWAYS_TRUE      = TriState(0x01)
+const TRISTATE_UNKNOWN = TriState(0x02)
 
-All AbstractInterpreters are expected to provide at least the following methods:
+function tristate_merge(old::TriState, new::TriState)
+    (old === ALWAYS_FALSE || new === ALWAYS_FALSE) && return ALWAYS_FALSE
+    old === TRISTATE_UNKNOWN && return old
+    return new
+end
 
-- InferenceParams(interp) - return an `InferenceParams` instance
-- OptimizationParams(interp) - return an `OptimizationParams` instance
-- get_world_counter(interp) - return the world age for this interpreter
-- get_inference_cache(interp) - return the runtime inference cache
 """
-abstract type AbstractInterpreter; end
+    effects::Effects
+
+Represents computational effects of a method call.
+
+The effects are composed of the following set of different properties:
+- `effects.consistent::TriState`: this method is guaranteed to return or terminate consistently
+- `effect_free::TriState`: this method is free from externally semantically visible side effects
+- `nothrow::TriState`: this method is guaranteed to not throw an exception
+- `terminates::TriState`: this method is guaranteed to terminate
+- `nonoverlayed::Bool`: indicates that any methods that may be called within this method
+  are not defined in an [overlayed method table](@ref OverlayMethodTable)
+See [`Base.@assume_effects`](@ref) for more detailed explanation on the definitions of these properties.
+
+Along the abstract interpretation, `Effects` at each statement are analyzed locally and
+they are merged into the single global `Effects` that represents the entire effects of
+the analyzed method (see `tristate_merge!`).
+Each effect property is represented as tri-state and managed separately.
+The tri-state consists of `ALWAYS_TRUE`, `TRISTATE_UNKNOWN` and `ALWAYS_FALSE`.
+An effect property is initialized with `ALWAYS_TRUE` and then transitioned towards
+`TRISTATE_UNKNOWN` or `ALWAYS_FALSE`. When we find a statement that has some effect,
+`ALWAYS_TRUE` is propagated if that effect is known to _always_ happen, otherwise
+`TRISTATE_UNKNOWN` is propagated. If a property is known to be `ALWAYS_FALSE`,
+there is no need to do additional analysis as it can not be refined anyway.
+Note that however, within the current data-flow analysis design, it is hard to derive a global
+conclusion from a local analysis on each statement, and as a result, the effect analysis
+usually propagates `TRISTATE_UNKNOWN` currently.
+"""
+struct Effects
+    consistent::TriState
+    effect_free::TriState
+    nothrow::TriState
+    terminates::TriState
+    nonoverlayed::Bool
+    # This effect is currently only tracked in inference and modified
+    # :consistent before caching. We may want to track it in the future.
+    inbounds_taints_consistency::Bool
+end
+function Effects(
+    consistent::TriState,
+    effect_free::TriState,
+    nothrow::TriState,
+    terminates::TriState,
+    nonoverlayed::Bool)
+    return Effects(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates,
+        nonoverlayed,
+        false)
+end
+
+const EFFECTS_TOTAL    = Effects(ALWAYS_TRUE,      ALWAYS_TRUE,      ALWAYS_TRUE,      ALWAYS_TRUE,      true)
+const EFFECTS_THROWS   = Effects(ALWAYS_TRUE,      ALWAYS_TRUE,      TRISTATE_UNKNOWN, ALWAYS_TRUE,      true)
+const EFFECTS_UNKNOWN  = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, true)  # mostly unknown, but it's not overlayed at least (e.g. it's not a call)
+const EFFECTS_UNKNOWN′ = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, false) # unknown, really
+
+function Effects(e::Effects = EFFECTS_UNKNOWN′;
+    consistent::TriState = e.consistent,
+    effect_free::TriState = e.effect_free,
+    nothrow::TriState = e.nothrow,
+    terminates::TriState = e.terminates,
+    nonoverlayed::Bool = e.nonoverlayed,
+    inbounds_taints_consistency::Bool = e.inbounds_taints_consistency)
+    return Effects(
+        consistent,
+        effect_free,
+        nothrow,
+        terminates,
+        nonoverlayed,
+        inbounds_taints_consistency)
+end
+
+is_consistent(effects::Effects)   = effects.consistent === ALWAYS_TRUE
+is_effect_free(effects::Effects)  = effects.effect_free === ALWAYS_TRUE
+is_nothrow(effects::Effects)      = effects.nothrow === ALWAYS_TRUE
+is_terminates(effects::Effects)   = effects.terminates === ALWAYS_TRUE
+is_nonoverlayed(effects::Effects) = effects.nonoverlayed
+
+is_concrete_eval_eligible(effects::Effects) =
+    is_consistent(effects) &&
+    is_effect_free(effects) &&
+    is_terminates(effects)
+
+is_total(effects::Effects) =
+    is_concrete_eval_eligible(effects) &&
+    is_nothrow(effects)
+
+is_removable_if_unused(effects::Effects) =
+    is_effect_free(effects) &&
+    is_terminates(effects) &&
+    is_nothrow(effects)
+
+function encode_effects(e::Effects)
+    return (e.consistent.state << 0) |
+           (e.effect_free.state << 2) |
+           (e.nothrow.state << 4) |
+           (e.terminates.state << 6) |
+           (UInt32(e.nonoverlayed) << 8)
+end
+function decode_effects(e::UInt32)
+    return Effects(
+        TriState((e >> 0) & 0x03),
+        TriState((e >> 2) & 0x03),
+        TriState((e >> 4) & 0x03),
+        TriState((e >> 6) & 0x03),
+        _Bool(   (e >> 8) & 0x01),
+        false)
+end
+
+function tristate_merge(old::Effects, new::Effects)
+    return Effects(
+        tristate_merge(
+            old.consistent, new.consistent),
+        tristate_merge(
+            old.effect_free, new.effect_free),
+        tristate_merge(
+            old.nothrow, new.nothrow),
+        tristate_merge(
+            old.terminates, new.terminates),
+        old.nonoverlayed & new.nonoverlayed,
+        old.inbounds_taints_consistency | new.inbounds_taints_consistency)
+end
+
+struct EffectsOverride
+    consistent::Bool
+    effect_free::Bool
+    nothrow::Bool
+    terminates_globally::Bool
+    terminates_locally::Bool
+end
+
+function encode_effects_override(eo::EffectsOverride)
+    e = 0x00
+    eo.consistent && (e |= 0x01)
+    eo.effect_free && (e |= 0x02)
+    eo.nothrow && (e |= 0x04)
+    eo.terminates_globally && (e |= 0x08)
+    eo.terminates_locally && (e |= 0x10)
+    return e
+end
+
+function decode_effects_override(e::UInt8)
+    return EffectsOverride(
+        (e & 0x01) != 0x00,
+        (e & 0x02) != 0x00,
+        (e & 0x04) != 0x00,
+        (e & 0x08) != 0x00,
+        (e & 0x10) != 0x00)
+end
 
 """
     InferenceResult
@@ -26,15 +194,20 @@ mutable struct InferenceResult
     linfo::MethodInstance
     argtypes::Vector{Any}
     overridden_by_const::BitVector
-    result # ::Type, or InferenceState if WIP
-    src #::Union{CodeInfo, OptimizationState, Nothing} # if inferred copy is available
-    function InferenceResult(linfo::MethodInstance, given_argtypes = nothing)
-        argtypes, overridden_by_const = matching_cache_argtypes(linfo, given_argtypes)
-        return new(linfo, argtypes, overridden_by_const, Any, nothing)
+    result                   # ::Type, or InferenceState if WIP
+    src                      # ::Union{CodeInfo, OptimizationState} if inferred copy is available, nothing otherwise
+    valid_worlds::WorldRange # if inference and optimization is finished
+    ipo_effects::Effects     # if inference is finished
+    effects::Effects         # if optimization is finished
+    argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
+    function InferenceResult(linfo::MethodInstance,
+                             arginfo#=::Union{Nothing,Tuple{ArgInfo,InferenceState}}=# = nothing)
+        argtypes, overridden_by_const = matching_cache_argtypes(linfo, arginfo)
+        return new(linfo, argtypes, overridden_by_const, Any, nothing,
+            WorldRange(), Effects(), Effects(), nothing)
     end
 end
 
-
 """
     OptimizationParams
 
@@ -44,27 +217,27 @@ struct OptimizationParams
     inlining::Bool              # whether inlining is enabled
     inline_cost_threshold::Int  # number of CPU cycles beyond which it's not worth inlining
     inline_nonleaf_penalty::Int # penalty for dynamic dispatch
-    inline_tupleret_bonus::Int  # extra willingness for non-isbits tuple return types
+    inline_tupleret_bonus::Int  # extra inlining willingness for non-concrete tuple return types (in hopes of splitting it up)
     inline_error_path_cost::Int # cost of (un-optimized) calls in blocks that throw
 
+    trust_inference::Bool
+
     # Duplicating for now because optimizer inlining requires it.
     # Keno assures me this will be removed in the near future
     MAX_METHODS::Int
     MAX_TUPLE_SPLAT::Int
     MAX_UNION_SPLITTING::Int
 
-    unoptimize_throw_blocks::Bool
-
     function OptimizationParams(;
             inlining::Bool = inlining_enabled(),
             inline_cost_threshold::Int = 100,
             inline_nonleaf_penalty::Int = 1000,
-            inline_tupleret_bonus::Int = 400,
+            inline_tupleret_bonus::Int = 250,
             inline_error_path_cost::Int = 20,
             max_methods::Int = 3,
             tuple_splat::Int = 32,
             union_splitting::Int = 4,
-            unoptimize_throw_blocks::Bool = true,
+            trust_inference::Bool = false
         )
         return new(
             inlining,
@@ -72,10 +245,10 @@ struct OptimizationParams
             inline_nonleaf_penalty,
             inline_tupleret_bonus,
             inline_error_path_cost,
+            trust_inference,
             max_methods,
             tuple_splat,
-            union_splitting,
-            unoptimize_throw_blocks,
+            union_splitting
         )
     end
 end
@@ -101,14 +274,14 @@ struct InferenceParams
     # before computing the set of matching methods
     MAX_UNION_SPLITTING::Int
     # the maximum number of union-tuples to swap / expand
-    # when inferring a call to _apply
+    # when inferring a call to _apply_iterate
     MAX_APPLY_UNION_ENUM::Int
 
     # parameters limiting large (tuple) types
     TUPLE_COMPLEXITY_LIMIT_DEPTH::Int
 
-    # when attempting to inlining _apply, abort the optimization if the tuple
-    # contains more than this many elements
+    # when attempting to inline _apply_iterate, abort the optimization if the
+    # tuple contains more than this many elements
     MAX_TUPLE_SPLAT::Int
 
     function InferenceParams(;
@@ -164,7 +337,6 @@ struct NativeInterpreter <: AbstractInterpreter
         # incorrect, fail out loudly.
         @assert world <= get_world_counter()
 
-
         return new(
             # Initially empty cache
             Vector{InferenceResult}(),
@@ -184,30 +356,70 @@ InferenceParams(ni::NativeInterpreter) = ni.inf_params
 OptimizationParams(ni::NativeInterpreter) = ni.opt_params
 get_world_counter(ni::NativeInterpreter) = ni.world
 get_inference_cache(ni::NativeInterpreter) = ni.cache
-
-code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, ni.world)
+code_cache(ni::NativeInterpreter) = WorldView(GLOBAL_CI_CACHE, get_world_counter(ni))
 
 """
     lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance)
 
-Hint that `mi` is in inference to help accelerate bootstrapping. This helps limit the amount of wasted work we might do when inference is working on initially inferring itself by letting us detect when inference is already in progress and not running a second copy on it. This creates a data-race, but the entry point into this code from C (jl_type_infer) already includes detection and restriction on recursion, so it is hopefully mostly a benign problem (since it should really only happen during the first phase of bootstrapping that we encounter this flag).
+Hint that `mi` is in inference to help accelerate bootstrapping.
+This helps us limit the amount of wasted work we might do when inference is working on initially inferring itself
+by letting us detect when inference is already in progress and not running a second copy on it.
+This creates a data-race, but the entry point into this code from C (`jl_type_infer`) already includes detection and restriction on recursion,
+so it is hopefully mostly a benign problem (since it should really only happen during the first phase of bootstrapping that we encounter this flag).
 """
-lock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
+lock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = true; nothing)
+lock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
 
 """
-    See lock_mi_inference
+See `lock_mi_inference`.
 """
-unlock_mi_inference(ni::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
+unlock_mi_inference(::NativeInterpreter, mi::MethodInstance) = (mi.inInference = false; nothing)
+unlock_mi_inference(::AbstractInterpreter, ::MethodInstance) = return
 
 """
-Emit an analysis remark during inference for the current line (`sv.pc`). These annotations are ignored
-by the native interpreter, but can be used by external tooling to annotate
-inference results.
+Emit an analysis remark during inference for the current line (`sv.pc`).
+These annotations are ignored by the native interpreter, but can be used by external tooling
+to annotate inference results.
 """
-add_remark!(ni::NativeInterpreter, sv, s) = nothing
+add_remark!(::AbstractInterpreter, sv#=::InferenceState=#, s) = return
 
-may_optimize(ni::NativeInterpreter) = true
-may_compress(ni::NativeInterpreter) = true
-may_discard_trees(ni::NativeInterpreter) = true
+may_optimize(::AbstractInterpreter) = true
+may_compress(::AbstractInterpreter) = true
+may_discard_trees(::AbstractInterpreter) = true
+verbose_stmt_info(::AbstractInterpreter) = false
 
-method_table(ai::AbstractInterpreter) = InternalMethodTable(get_world_counter(ai))
+"""
+    method_table(interp::AbstractInterpreter) -> MethodTableView
+
+Returns a method table this `interp` uses for method lookup.
+External `AbstractInterpreter` can optionally return `OverlayMethodTable` here
+to incorporate customized dispatches for the overridden methods.
+"""
+method_table(interp::AbstractInterpreter) = InternalMethodTable(get_world_counter(interp))
+
+"""
+By default `AbstractInterpreter` implements the following inference bail out logic:
+- `bail_out_toplevel_call(::AbstractInterpreter, sig, ::InferenceState)`: bail out from inter-procedural inference when inferring top-level and non-concrete call site `callsig`
+- `bail_out_call(::AbstractInterpreter, rt, ::InferenceState)`: bail out from inter-procedural inference when return type `rt` grows up to `Any`
+- `bail_out_apply(::AbstractInterpreter, rt, ::InferenceState)`: bail out from `_apply_iterate` inference when return type `rt` grows up to `Any`
+
+It also bails out from local statement/frame inference when any lattice element gets down to `Bottom`,
+but `AbstractInterpreter` doesn't provide a specific interface for configuring it.
+"""
+bail_out_toplevel_call(::AbstractInterpreter, @nospecialize(callsig), sv#=::InferenceState=#) =
+    return sv.restrict_abstract_call_sites && !isdispatchtuple(callsig)
+bail_out_call(::AbstractInterpreter, @nospecialize(rt), sv#=::InferenceState=#) =
+    return rt === Any
+bail_out_apply(::AbstractInterpreter, @nospecialize(rt), sv#=::InferenceState=#) =
+    return rt === Any
+
+"""
+    infer_compilation_signature(::AbstractInterpreter)::Bool
+
+For some call sites (for example calls to varargs methods), the signature to be compiled
+and executed at run time can differ from the argument types known at the call site.
+This flag controls whether we should always infer the compilation signature in addition
+to the call site signature.
+"""
+infer_compilation_signature(::AbstractInterpreter) = false
+infer_compilation_signature(::NativeInterpreter) = true
diff --git a/base/compiler/typeutils.jl b/base/compiler/typeutils.jl
index d6eb7305b1c8d9..75675e60e1ca42 100644
--- a/base/compiler/typeutils.jl
+++ b/base/compiler/typeutils.jl
@@ -4,13 +4,6 @@
 # lattice utilities #
 #####################
 
-function rewrap(@nospecialize(t), @nospecialize(u))
-    if isa(t, TypeVar) || isa(t, Type)
-        return rewrap_unionall(t, u)
-    end
-    return t
-end
-
 isType(@nospecialize t) = isa(t, DataType) && t.name === _TYPE_NAME
 
 # true if Type{T} is inlineable as constant T
@@ -34,20 +27,81 @@ end
 
 function has_nontrivial_const_info(@nospecialize t)
     isa(t, PartialStruct) && return true
-    return isa(t, Const) && !isdefined(typeof(t.val), :instance) && !(isa(t.val, Type) && hasuniquerep(t.val))
+    isa(t, PartialOpaque) && return true
+    isa(t, Const) || return false
+    val = t.val
+    return !isdefined(typeof(val), :instance) && !(isa(val, Type) && hasuniquerep(val))
 end
 
+has_const_info(@nospecialize x) = (!isa(x, Type) && !isvarargtype(x)) || isType(x)
+
 # Subtyping currently intentionally answers certain queries incorrectly for kind types. For
 # some of these queries, this check can be used to somewhat protect against making incorrect
 # decisions based on incorrect subtyping. Note that this check, itself, is broken for
 # certain combinations of `a` and `b` where one/both isa/are `Union`/`UnionAll` type(s)s.
-isnotbrokensubtype(@nospecialize(a), @nospecialize(b)) = (!iskindtype(b) || !isType(a) || hasuniquerep(a.parameters[1]))
+isnotbrokensubtype(@nospecialize(a), @nospecialize(b)) = (!iskindtype(b) || !isType(a) || hasuniquerep(a.parameters[1]) || b <: a)
 
-argtypes_to_type(argtypes::Array{Any,1}) = Tuple{anymap(widenconst, argtypes)...}
+argtypes_to_type(argtypes::Array{Any,1}) = Tuple{anymap(@nospecialize(a) -> isvarargtype(a) ? a : widenconst(a), argtypes)...}
 
 function isknownlength(t::DataType)
     isvatuple(t) || return true
-    return length(t.parameters) > 0 && isa(unwrap_unionall(t.parameters[end]).parameters[2], Int)
+    va = t.parameters[end]
+    return isdefined(va, :N) && va.N isa Int
+end
+
+# Compute the minimum number of initialized fields for a particular datatype
+# (therefore also a lower bound on the number of fields)
+function datatype_min_ninitialized(t::DataType)
+    isabstracttype(t) && return 0
+    if t.name === NamedTuple_typename
+        names, types = t.parameters[1], t.parameters[2]
+        if names isa Tuple
+            return length(names)
+        end
+        t = argument_datatype(types)
+        t isa DataType || return 0
+        t.name === Tuple.name || return 0
+    end
+    if t.name === Tuple.name
+        n = length(t.parameters)
+        n == 0 && return 0
+        va = t.parameters[n]
+        if isvarargtype(va)
+            n -= 1
+            if isdefined(va, :N)
+                va = va.N
+                if va isa Int
+                    n += va
+                end
+            end
+        end
+        return n
+    end
+    return length(t.name.names) - t.name.n_uninitialized
+end
+
+has_concrete_subtype(d::DataType) = d.flags & 0x20 == 0x20 # n.b. often computed only after setting the type and layout fields
+
+# determine whether x is a valid lattice element tag
+# For example, Type{v} is not valid if v is a value
+# Accepts TypeVars also, since it assumes the user will rewrap it correctly
+function valid_as_lattice(@nospecialize(x))
+    x === Bottom && false
+    x isa TypeVar && return valid_as_lattice(x.ub)
+    x isa UnionAll && (x = unwrap_unionall(x))
+    if x isa Union
+        # the Union constructor ensures this (and we'll recheck after
+        # operations that might remove the Union itself)
+        return true
+    end
+    if x isa DataType
+        if isType(x)
+            p = x.parameters[1]
+            p isa Type || p isa TypeVar || return false
+        end
+        return true
+    end
+    return false
 end
 
 # test if non-Type, non-TypeVar `x` can be used to parameterize a type
@@ -61,21 +115,59 @@ function valid_tparam(@nospecialize(x))
     return isa(x, Symbol) || isbits(x)
 end
 
+function compatible_vatuple(a::DataType, b::DataType)
+    vaa = a.parameters[end]
+    vab = a.parameters[end]
+    if !(isvarargtype(vaa) && isvarargtype(vab))
+        return isvarargtype(vaa) == isvarargtype(vab)
+    end
+    (isdefined(vaa, :N) == isdefined(vab, :N)) || return false
+    !isdefined(vaa, :N) && return true
+    return vaa.N === vab.N
+end
+
 # return an upper-bound on type `a` with type `b` removed
 # such that `return <: a` && `Union{return, b} == Union{a, b}`
-function typesubtract(@nospecialize(a), @nospecialize(b))
+function typesubtract(@nospecialize(a), @nospecialize(b), MAX_UNION_SPLITTING::Int)
     if a <: b && isnotbrokensubtype(a, b)
         return Bottom
     end
-    if isa(a, Union)
-        return Union{typesubtract(a.a, b),
-                     typesubtract(a.b, b)}
+    ua = unwrap_unionall(a)
+    if isa(ua, Union)
+        uua = typesubtract(rewrap_unionall(ua.a, a), b, MAX_UNION_SPLITTING)
+        uub = typesubtract(rewrap_unionall(ua.b, a), b, MAX_UNION_SPLITTING)
+        return Union{valid_as_lattice(uua) ? uua : Union{},
+                     valid_as_lattice(uub) ? uub : Union{}}
     elseif a isa DataType
-        if b isa DataType
-            if a.name === b.name === Tuple.name && length(a.types) == length(b.types)
-                ta = switchtupleunion(a)
-                if length(ta) > 1
-                    return typesubtract(Union{ta...}, b)
+        ub = unwrap_unionall(b)
+        if ub isa DataType
+            if a.name === ub.name === Tuple.name &&
+                    length(a.parameters) == length(ub.parameters)
+                if 1 < unionsplitcost(a.parameters) <= MAX_UNION_SPLITTING
+                    ta = switchtupleunion(a)
+                    return typesubtract(Union{ta...}, b, 0)
+                elseif b isa DataType
+                    if !compatible_vatuple(a, b)
+                        return a
+                    end
+                    # if exactly one element is not bottom after calling typesubtract
+                    # then the result is all of the elements as normal except that one
+                    notbottom = fill(false, length(a.parameters))
+                    for i = 1:length(notbottom)
+                        ap = unwrapva(a.parameters[i])
+                        bp = unwrapva(b.parameters[i])
+                        notbottom[i] = !(ap <: bp && isnotbrokensubtype(ap, bp))
+                    end
+                    let i = findfirst(notbottom)
+                        if i !== nothing && findnext(notbottom, i + 1) === nothing
+                            ta = collect(a.parameters)
+                            ap = a.parameters[i]
+                            bp = b.parameters[i]
+                            (isvarargtype(ap) || isvarargtype(bp)) && return a
+                            ta[i] = typesubtract(ap, bp, min(2, MAX_UNION_SPLITTING))
+                            return Tuple{ta...}
+                        end
+                    end
                 end
             end
         end
@@ -83,12 +175,7 @@ function typesubtract(@nospecialize(a), @nospecialize(b))
     return a # TODO: improve this bound?
 end
 
-function tvar_extent(@nospecialize t)
-    while t isa TypeVar
-        t = t.ub
-    end
-    return t
-end
+hasintersect(@nospecialize(a), @nospecialize(b)) = typeintersect(a, b) !== Bottom
 
 _typename(@nospecialize a) = Union{}
 _typename(a::TypeVar) = Core.TypeName
@@ -107,19 +194,28 @@ function tuple_tail_elem(@nospecialize(init), ct::Vector{Any})
     t = init
     for x in ct
         # FIXME: this is broken: it violates subtyping relations and creates invalid types with free typevars
-        t = tmerge(t, tvar_extent(unwrapva(x)))
+        t = tmerge(t, unwraptv(unwrapva(x)))
     end
     return Vararg{widenconst(t)}
 end
 
-function countunionsplit(atypes::Union{SimpleVector,Vector{Any}})
+# Gives a cost function over the effort to switch a tuple-union representation
+# as a cartesian product, relative to the size of the original representation.
+# Thus, we count the longest element as being roughly invariant to being inside
+# or outside of the Tuple/Union nesting, though somewhat more expensive to be
+# outside than inside because the representation is larger (because and it
+# informs the callee whether any splitting is possible).
+function unionsplitcost(argtypes::Union{SimpleVector,Vector{Any}})
     nu = 1
-    for ti in atypes
+    max = 2
+    for ti in argtypes
         if isa(ti, Union)
-            nu, ovf = Core.Intrinsics.checked_smul_int(nu, unionlen(ti::Union))
-            if ovf
-                return typemax(Int)
+            nti = unionlen(ti)
+            if nti > max
+                max, nti = nti, max
             end
+            nu, ovf = Core.Intrinsics.checked_smul_int(nu, nti)
+            ovf && return typemax(Int)
         end
     end
     return nu
@@ -133,10 +229,16 @@ function switchtupleunion(@nospecialize(ty))
     return _switchtupleunion(Any[tparams...], length(tparams), [], ty)
 end
 
+switchtupleunion(argtypes::Vector{Any}) = _switchtupleunion(argtypes, length(argtypes), [], nothing)
+
 function _switchtupleunion(t::Vector{Any}, i::Int, tunion::Vector{Any}, @nospecialize(origt))
     if i == 0
-        tpl = rewrap_unionall(Tuple{t...}, origt)
-        push!(tunion, tpl)
+        if origt === nothing
+            push!(tunion, copy(t))
+        else
+            tpl = rewrap_unionall(Tuple{t...}, origt)
+            push!(tunion, tpl)
+        end
     else
         ti = t[i]
         if isa(ti, Union)
@@ -154,25 +256,47 @@ end
 
 # unioncomplexity estimates the number of calls to `tmerge` to obtain the given type by
 # counting the Union instances, taking also into account those hidden in a Tuple or UnionAll
-function unioncomplexity(u::Union)
-    return unioncomplexity(u.a) + unioncomplexity(u.b) + 1
-end
-function unioncomplexity(t::DataType)
-    t.name === Tuple.name || isvarargtype(t) || return 0
-    c = 0
-    for ti in t.parameters
-        c = max(c, unioncomplexity(ti))
+unioncomplexity(@nospecialize x) = _unioncomplexity(x)::Int
+function _unioncomplexity(@nospecialize x)
+    if isa(x, DataType)
+        x.name === Tuple.name || isvarargtype(x) || return 0
+        c = 0
+        for ti in x.parameters
+            c = max(c, unioncomplexity(ti))
+        end
+        return c
+    elseif isa(x, Union)
+        return unioncomplexity(x.a) + unioncomplexity(x.b) + 1
+    elseif isa(x, UnionAll)
+        return max(unioncomplexity(x.body), unioncomplexity(x.var.ub))
+    elseif isa(x, TypeofVararg)
+        return isdefined(x, :T) ? unioncomplexity(x.T) : 0
+    else
+        return 0
     end
-    return c
 end
-unioncomplexity(u::UnionAll) = max(unioncomplexity(u.body), unioncomplexity(u.var.ub))
-unioncomplexity(@nospecialize(x)) = 0
 
-function improvable_via_constant_propagation(@nospecialize(t))
-    if isconcretetype(t) && t <: Tuple
-        for p in t.parameters
-            p === DataType && return true
+# convert a Union of Tuple types to a Tuple of Unions
+function unswitchtupleunion(u::Union)
+    ts = uniontypes(u)
+    n = -1
+    for t in ts
+        if t isa DataType && t.name === Tuple.name && length(t.parameters) != 0 && !isvarargtype(t.parameters[end])
+            if n == -1
+                n = length(t.parameters)
+            elseif n != length(t.parameters)
+                return u
+            end
+        else
+            return u
         end
     end
-    return false
+    Tuple{Any[ Union{Any[(t::DataType).parameters[i] for t in ts]...} for i in 1:n ]...}
+end
+
+function unwraptv(@nospecialize t)
+    while isa(t, TypeVar)
+        t = t.ub
+    end
+    return t
 end
diff --git a/base/compiler/utilities.jl b/base/compiler/utilities.jl
index 834d2759f3b753..07281a353dbb6b 100644
--- a/base/compiler/utilities.jl
+++ b/base/compiler/utilities.jl
@@ -4,7 +4,7 @@
 # generic #
 ###########
 
-if !isdefined(@__MODULE__, Symbol("@timeit"))
+if !@isdefined(var"@timeit")
     # This is designed to allow inserting timers when loading a second copy
     # of inference for performing performance experiments.
     macro timeit(args...)
@@ -19,6 +19,8 @@ function _any(@nospecialize(f), a)
     end
     return false
 end
+any(@nospecialize(f), itr) = _any(f, itr)
+any(itr) = _any(identity, itr)
 
 function _all(@nospecialize(f), a)
     for x in a
@@ -26,6 +28,8 @@ function _all(@nospecialize(f), a)
     end
     return true
 end
+all(@nospecialize(f), itr) = _all(f, itr)
+all(itr) = _all(identity, itr)
 
 function contains_is(itr, @nospecialize(x))
     for y in itr
@@ -48,7 +52,7 @@ function istopfunction(@nospecialize(f), name::Symbol)
     tn = typeof(f).name
     if tn.mt.name === name
         top = _topmod(tn.module)
-        return isdefined(top, name) && isconst(top, name) && f === getfield(top, name)
+        return isdefined(top, name) && isconst(top, name) && f === getglobal(top, name)
     end
     return false
 end
@@ -59,7 +63,7 @@ end
 
 # Meta expression head, these generally can't be deleted even when they are
 # in a dead branch but can be ignored when analyzing uses/liveness.
-is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo)
+is_meta_expr_head(head::Symbol) = head === :boundscheck || head === :meta || head === :loopinfo
 
 sym_isless(a::Symbol, b::Symbol) = ccall(:strcmp, Int32, (Ptr{UInt8}, Ptr{UInt8}), a, b) < 0
 
@@ -72,26 +76,43 @@ function quoted(@nospecialize(x))
     return is_self_quoting(x) ? x : QuoteNode(x)
 end
 
-function is_inlineable_constant(@nospecialize(x))
-    if x isa Type || x isa Symbol
-        return true
+function count_const_size(@nospecialize(x), count_self::Bool = true)
+    (x isa Type || x isa Symbol) && return 0
+    ismutable(x) && return MAX_INLINE_CONST_SIZE + 1
+    isbits(x) && return Core.sizeof(x)
+    dt = typeof(x)
+    sz = count_self ? sizeof(dt) : 0
+    sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
+    dtfd = DataTypeFieldDesc(dt)
+    for i = 1:nfields(x)
+        isdefined(x, i) || continue
+        f = getfield(x, i)
+        if !dtfd[i].isptr && datatype_pointerfree(typeof(f))
+            continue
+        end
+        sz += count_const_size(f, dtfd[i].isptr)
+        sz > MAX_INLINE_CONST_SIZE && return MAX_INLINE_CONST_SIZE + 1
     end
-    return isbits(x) && Core.sizeof(x) <= MAX_INLINE_CONST_SIZE
+    return sz
+end
+
+function is_inlineable_constant(@nospecialize(x))
+    return count_const_size(x) <= MAX_INLINE_CONST_SIZE
 end
 
 ###########################
 # MethodInstance/CodeInfo #
 ###########################
 
-function invoke_api(li::CodeInstance)
-    return ccall(:jl_invoke_api, Cint, (Any,), li)
-end
+invoke_api(li::CodeInstance) = ccall(:jl_invoke_api, Cint, (Any,), li)
+use_const_api(li::CodeInstance) = invoke_api(li) == 2
 
-function get_staged(li::MethodInstance)
-    may_invoke_generator(li) || return nothing
+function get_staged(mi::MethodInstance)
+    may_invoke_generator(mi) || return nothing
     try
         # user code might throw errors – ignore them
-        return ccall(:jl_code_for_staged, Any, (Any,), li)::CodeInfo
+        ci = ccall(:jl_code_for_staged, Any, (Any,), mi)::CodeInfo
+        return ci
     catch
         return nothing
     end
@@ -116,92 +137,92 @@ function retrieve_code_info(linfo::MethodInstance)
         c.parent = linfo
         return c
     end
+    return nothing
 end
 
-# Get at the nonfunction_mt, which happens to be the mt of SimpleVector
-const nonfunction_mt = typename(SimpleVector).mt
-
-function get_compileable_sig(method::Method, @nospecialize(atypes), sparams::SimpleVector)
-    isa(atypes, DataType) || return Nothing
-    mt = ccall(:jl_method_table_for, Any, (Any,), atypes)
+function get_compileable_sig(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    isa(atype, DataType) || return nothing
+    mt = ccall(:jl_method_table_for, Any, (Any,), atype)
     mt === nothing && return nothing
     return ccall(:jl_normalize_to_compilable_sig, Any, (Any, Any, Any, Any),
-        mt, atypes, sparams, method)
+        mt, atype, sparams, method)
+end
+
+isa_compileable_sig(@nospecialize(atype), method::Method) =
+    !iszero(ccall(:jl_isa_compileable_sig, Int32, (Any, Any), atype, method))
+
+# eliminate UnionAll vars that might be degenerate due to having identical bounds,
+# or a concrete upper bound and appearing covariantly.
+function subst_trivial_bounds(@nospecialize(atype))
+    if !isa(atype, UnionAll)
+        return atype
+    end
+    v = atype.var
+    if isconcretetype(v.ub) || v.lb === v.ub
+        subst = try
+            atype{v.ub}
+        catch
+            # Note in rare cases a var bound might not be valid to substitute.
+            nothing
+        end
+        if subst !== nothing
+            return subst_trivial_bounds(subst)
+        end
+    end
+    return UnionAll(v, subst_trivial_bounds(atype.body))
+end
+
+# If removing trivial vars from atype results in an equivalent type, use that
+# instead. Otherwise we can get a case like issue #38888, where a signature like
+#   f(x::S) where S<:Int
+# gets cached and matches a concrete dispatch case.
+function normalize_typevars(method::Method, @nospecialize(atype), sparams::SimpleVector)
+    at2 = subst_trivial_bounds(atype)
+    if at2 !== atype && at2 == atype
+        atype = at2
+        sp_ = ccall(:jl_type_intersection_with_env, Any, (Any, Any), at2, method.sig)::SimpleVector
+        sparams = sp_[2]::SimpleVector
+    end
+    return atype, sparams
 end
 
 # get a handle to the unique specialization object representing a particular instantiation of a call
-function specialize_method(method::Method, @nospecialize(atypes), sparams::SimpleVector, preexisting::Bool=false, compilesig::Bool=false)
+function specialize_method(method::Method, @nospecialize(atype), sparams::SimpleVector; preexisting::Bool=false, compilesig::Bool=false)
+    if isa(atype, UnionAll)
+        atype, sparams = normalize_typevars(method, atype, sparams)
+    end
     if compilesig
-        new_atypes = get_compileable_sig(method, atypes, sparams)
-        new_atypes === nothing && return nothing
-        atypes = new_atypes
+        new_atype = get_compileable_sig(method, atype, sparams)
+        new_atype === nothing && return nothing
+        atype = new_atype
     end
     if preexisting
         # check cached specializations
         # for an existing result stored there
-        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atypes)
+        return ccall(:jl_specializations_lookup, Any, (Any, Any), method, atype)::Union{Nothing,MethodInstance}
     end
-    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atypes, sparams)
+    return ccall(:jl_specializations_get_linfo, Ref{MethodInstance}, (Any, Any, Any), method, atype, sparams)
 end
 
-function specialize_method(match::MethodMatch, preexisting::Bool=false, compilesig::Bool=false)
-    return specialize_method(match.method, match.spec_types, match.sparams, preexisting, compilesig)
+function specialize_method(match::MethodMatch; kwargs...)
+    return specialize_method(match.method, match.spec_types, match.sparams; kwargs...)
 end
 
-# This function is used for computing alternate limit heuristics
-function method_for_inference_heuristics(method::Method, @nospecialize(sig), sparams::SimpleVector)
-    if isdefined(method, :generator) && method.generator.expand_early && may_invoke_generator(method, sig, sparams)
-        method_instance = specialize_method(method, sig, sparams, false)
-        if isa(method_instance, MethodInstance)
-            cinfo = get_staged(method_instance)
-            if isa(cinfo, CodeInfo)
-                method2 = cinfo.method_for_inference_limit_heuristics
-                if method2 isa Method
-                    return method2
-                end
-            end
-        end
+#########
+# types #
+#########
+
+function singleton_type(@nospecialize(ft))
+    if isa(ft, Const)
+        return ft.val
+    elseif isconstType(ft)
+        return ft.parameters[1]
+    elseif ft isa DataType && isdefined(ft, :instance)
+        return ft.instance
     end
     return nothing
 end
 
-argextype(@nospecialize(x), state) = argextype(x, state.src, state.sptypes, state.slottypes)
-
-const empty_slottypes = Any[]
-
-function argextype(@nospecialize(x), src, sptypes::Vector{Any}, slottypes::Vector{Any} = empty_slottypes)
-    if isa(x, Expr)
-        if x.head === :static_parameter
-            return sptypes[x.args[1]]
-        elseif x.head === :boundscheck
-            return Bool
-        elseif x.head === :copyast
-            return argextype(x.args[1], src, sptypes, slottypes)
-        end
-        @assert false "argextype only works on argument-position values"
-    elseif isa(x, SlotNumber)
-        return slottypes[(x::SlotNumber).id]
-    elseif isa(x, TypedSlot)
-        return (x::TypedSlot).typ
-    elseif isa(x, SSAValue)
-        return abstract_eval_ssavalue(x::SSAValue, src)
-    elseif isa(x, Argument)
-        return isa(src, IncrementalCompact) ? src.ir.argtypes[x.n] :
-            isa(src, IRCode) ? src.argtypes[x.n] :
-            slottypes[x.n]
-    elseif isa(x, QuoteNode)
-        return Const((x::QuoteNode).value)
-    elseif isa(x, GlobalRef)
-        return abstract_eval_global(x.mod, (x::GlobalRef).name)
-    elseif isa(x, PhiNode)
-        return Any
-    elseif isa(x, PiNode)
-        return x.typ
-    else
-        return Const(x)
-    end
-end
-
 ###################
 # SSAValues/Slots #
 ###################
@@ -219,6 +240,8 @@ function find_ssavalue_uses(body::Vector{Any}, nvals::Int)
             push!(uses[e.id], line)
         elseif isa(e, Expr)
             find_ssavalue_uses(e, uses, line)
+        elseif isa(e, PhiNode)
+            find_ssavalue_uses(e, uses, line)
         end
     end
     return uses
@@ -239,6 +262,14 @@ function find_ssavalue_uses(e::Expr, uses::Vector{BitSet}, line::Int)
     end
 end
 
+function find_ssavalue_uses(e::PhiNode, uses::Vector{BitSet}, line::Int)
+    for val in e.values
+        if isa(val, SSAValue)
+            push!(uses[val.id], line)
+        end
+    end
+end
+
 function is_throw_call(e::Expr)
     if e.head === :call
         f = e.args[1]
@@ -252,25 +283,27 @@ function is_throw_call(e::Expr)
     return false
 end
 
-function find_throw_blocks(code::Vector{Any}, ir = RefValue{IRCode}())
+function mark_throw_blocks!(src::CodeInfo, handler_at::Vector{Int})
+    for stmt in find_throw_blocks(src.code, handler_at)
+        src.ssaflags[stmt] |= IR_FLAG_THROW_BLOCK
+    end
+    return nothing
+end
+
+function find_throw_blocks(code::Vector{Any}, handler_at::Vector{Int})
     stmts = BitSet()
     n = length(code)
-    try_depth = 0
     for i in n:-1:1
         s = code[i]
         if isa(s, Expr)
-            if s.head === :enter
-                try_depth -= 1
-            elseif s.head === :leave
-                try_depth += (s.args[1]::Int)
-            elseif s.head === :gotoifnot
-                tgt = s.args[2]::Int
-                if i+1 in stmts && tgt in stmts
+            if s.head === :gotoifnot
+                if i+1 in stmts && s.args[2]::Int in stmts
                     push!(stmts, i)
                 end
             elseif s.head === :return
+                # see `ReturnNode` handling
             elseif is_throw_call(s)
-                if try_depth == 0
+                if handler_at[i] == 0
                     push!(stmts, i)
                 end
             elseif i+1 in stmts
@@ -281,22 +314,12 @@ function find_throw_blocks(code::Vector{Any}, ir = RefValue{IRCode}())
             # (where !isdefined(s, :val)) as `throw` points, but that can cause
             # worse codegen around the call site (issue #37558)
         elseif isa(s, GotoNode)
-            tgt = s.label
-            if isassigned(ir)
-                tgt = first(ir[].cfg.blocks[tgt].stmts)
-            end
-            if tgt in stmts
+            if s.label in stmts
                 push!(stmts, i)
             end
         elseif isa(s, GotoIfNot)
-            if i+1 in stmts
-                tgt = s.dest::Int
-                if isassigned(ir)
-                    tgt = first(ir[].cfg.blocks[tgt].stmts)
-                end
-                if tgt in stmts
-                    push!(stmts, i)
-                end
+            if i+1 in stmts && s.dest in stmts
+                push!(stmts, i)
             end
         elseif i+1 in stmts
             push!(stmts, i)
@@ -319,12 +342,12 @@ inlining_enabled() = (JLOptions().can_inline == 1)
 function coverage_enabled(m::Module)
     ccall(:jl_generating_output, Cint, ()) == 0 || return false # don't alter caches
     cov = JLOptions().code_coverage
-    if cov == 1
+    if cov == 1 # user
         m = moduleroot(m)
         m === Core && return false
         isdefined(Main, :Base) && m === Main.Base && return false
         return true
-    elseif cov == 2
+    elseif cov == 2 # all
         return true
     end
     return false
diff --git a/base/compiler/validation.jl b/base/compiler/validation.jl
index 1a232a290b3a7e..0931686184a2e0 100644
--- a/base/compiler/validation.jl
+++ b/base/compiler/validation.jl
@@ -1,9 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Expr head => argument count bounds
-const VALID_EXPR_HEADS = IdDict{Any,Any}(
+const VALID_EXPR_HEADS = IdDict{Symbol,UnitRange{Int}}(
     :call => 1:typemax(Int),
     :invoke => 2:typemax(Int),
+    :invoke_modify => 3:typemax(Int),
     :static_parameter => 1:1,
     :(&) => 1:1,
     :(=) => 2:2,
@@ -16,11 +17,13 @@ const VALID_EXPR_HEADS = IdDict{Any,Any}(
     :leave => 1:1,
     :pop_exception => 1:1,
     :inbounds => 1:1,
+    :inline => 1:1,
+    :noinline => 1:1,
     :boundscheck => 0:0,
     :copyast => 1:1,
     :meta => 0:typemax(Int),
     :global => 1:1,
-    :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, cconv, args..., roots...
+    :foreigncall => 5:typemax(Int), # name, RT, AT, nreq, (cconv, effects), args..., roots...
     :cfunction => 5:5,
     :isdefined => 1:1,
     :code_coverage_effect => 0:0,
@@ -28,7 +31,10 @@ const VALID_EXPR_HEADS = IdDict{Any,Any}(
     :gc_preserve_begin => 0:typemax(Int),
     :gc_preserve_end => 0:typemax(Int),
     :thunk => 1:1,
-    :throw_undef_if_not => 2:2
+    :throw_undef_if_not => 2:2,
+    :aliasscope => 0:0,
+    :popaliasscope => 0:0,
+    :new_opaque_closure => 4:typemax(Int)
 )
 
 # @enum isn't defined yet, otherwise I'd use it for this
@@ -42,13 +48,15 @@ const EMPTY_SLOTNAMES = "slotnames field is empty"
 const SLOTFLAGS_MISMATCH = "length(slotnames) < length(slotflags)"
 const SSAVALUETYPES_MISMATCH = "not all SSAValues in AST have a type in ssavaluetypes"
 const SSAVALUETYPES_MISMATCH_UNINFERRED = "uninferred CodeInfo ssavaluetypes field does not equal the number of present SSAValues"
+const SSAFLAGS_MISMATCH = "not all SSAValues have a corresponding `ssaflags`"
 const NON_TOP_LEVEL_METHOD = "encountered `Expr` head `:method` in non-top-level code (i.e. `nargs` > 0)"
 const NON_TOP_LEVEL_GLOBAL = "encountered `Expr` head `:global` in non-top-level code (i.e. `nargs` > 0)"
 const SIGNATURE_NARGS_MISMATCH = "method signature does not match number of method arguments"
 const SLOTNAMES_NARGS_MISMATCH = "CodeInfo for method contains fewer slotnames than the number of method arguments"
+const INVALID_SIGNATURE_OPAQUE_CLOSURE = "invalid signature of method for opaque closure - `sig` field must always be set to `Tuple`"
 
 struct InvalidCodeError <: Exception
-    kind::AbstractString
+    kind::String
     meta::Any
 end
 InvalidCodeError(kind::AbstractString) = InvalidCodeError(kind, nothing)
@@ -73,7 +81,7 @@ end
 
 function _validate_val!(@nospecialize(x), errors, ssavals::BitSet)
     if isa(x, Expr)
-        if x.head === :call || x.head === :invoke
+        if x.head === :call || x.head === :invoke || x.head === :invoke_modify
             f = x.args[1]
             if f isa GlobalRef && (f.name === :cglobal) && x.head === :call
                 # TODO: these are not yet linearized
@@ -133,12 +141,13 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
                 end
                 validate_val!(lhs)
                 validate_val!(rhs)
-            elseif head === :call || head === :invoke || head === :gc_preserve_end || head === :meta ||
+            elseif head === :call || head === :invoke || x.head === :invoke_modify ||
+                head === :gc_preserve_end || head === :meta ||
                 head === :inbounds || head === :foreigncall || head === :cfunction ||
                 head === :const || head === :enter || head === :leave || head === :pop_exception ||
                 head === :method || head === :global || head === :static_parameter ||
                 head === :new || head === :splatnew || head === :thunk || head === :loopinfo ||
-                head === :throw_undef_if_not || head === :code_coverage_effect
+                head === :throw_undef_if_not || head === :code_coverage_effect || head === :inline || head === :noinline
                 validate_val!(x)
             else
                 # TODO: nothing is actually in statement position anymore
@@ -176,12 +185,16 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, c::CodeInfo, is_top_
     nssavals = length(c.code)
     !is_top_level && nslotnames == 0 && push!(errors, InvalidCodeError(EMPTY_SLOTNAMES))
     nslotnames < nslotflags && push!(errors, InvalidCodeError(SLOTFLAGS_MISMATCH, (nslotnames, nslotflags)))
-    if c.inferred
-        nssavaluetypes = length(c.ssavaluetypes)
+    ssavaluetypes = c.ssavaluetypes
+    if isa(ssavaluetypes, Vector{Any})
+        nssavaluetypes = length(ssavaluetypes)
         nssavaluetypes < nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH, (nssavals, nssavaluetypes)))
     else
-        c.ssavaluetypes != nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, c.ssavaluetypes)))
+        nssavaluetypes = ssavaluetypes::Int
+        nssavaluetypes ≠ nssavals && push!(errors, InvalidCodeError(SSAVALUETYPES_MISMATCH_UNINFERRED, (nssavals, nssavaluetypes)))
     end
+    nssaflags = length(c.ssaflags)
+    nssavals ≠ nssaflags && push!(errors, InvalidCodeError(SSAFLAGS_MISMATCH, (nssavals, nssaflags)))
     return errors
 end
 
@@ -202,8 +215,10 @@ function validate_code!(errors::Vector{>:InvalidCodeError}, mi::Core.MethodInsta
     else
         m = mi.def::Method
         mnargs = m.nargs
-        n_sig_params = length(Core.Compiler.unwrap_unionall(m.sig).parameters)
-        if (m.isva ? (n_sig_params < (mnargs - 1)) : (n_sig_params != mnargs))
+        n_sig_params = length((unwrap_unionall(m.sig)::DataType).parameters)
+        if m.is_for_opaque_closure
+            m.sig === Tuple || push!(errors, InvalidCodeError(INVALID_SIGNATURE_OPAQUE_CLOSURE, (m.sig, m.isva)))
+        elseif (m.isva ? (n_sig_params < (mnargs - 1)) : (n_sig_params != mnargs))
             push!(errors, InvalidCodeError(SIGNATURE_NARGS_MISMATCH, (m.isva, n_sig_params, mnargs)))
         end
     end
@@ -232,7 +247,7 @@ end
 
 function is_valid_rvalue(@nospecialize(x))
     is_valid_argument(x) && return true
-    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
+    if isa(x, Expr) && x.head in (:new, :splatnew, :the_exception, :isdefined, :call, :invoke, :invoke_modify, :foreigncall, :cfunction, :gc_preserve_begin, :copyast)
         return true
     end
     return false
diff --git a/base/complex.jl b/base/complex.jl
index 8aec9cf1301ee1..f68e519386d939 100644
--- a/base/complex.jl
+++ b/base/complex.jl
@@ -7,6 +7,8 @@ Complex number type with real and imaginary part of type `T`.
 
 `ComplexF16`, `ComplexF32` and `ComplexF64` are aliases for
 `Complex{Float16}`, `Complex{Float32}` and `Complex{Float64}` respectively.
+
+See also: [`Real`](@ref), [`complex`](@ref), [`real`](@ref).
 """
 struct Complex{T<:Real} <: Number
     re::T
@@ -20,10 +22,15 @@ Complex(x::Real) = Complex(x, zero(x))
 
 The imaginary unit.
 
+See also: [`imag`](@ref), [`angle`](@ref), [`complex`](@ref).
+
 # Examples
 ```jldoctest
 julia> im * im
 -1 + 0im
+
+julia> (2.0 + 3im)^2
+-5.0 + 12.0im
 ```
 """
 const im = Complex(false, true)
@@ -54,6 +61,8 @@ float(::Type{Complex{T}}) where {T} = Complex{float(T)}
 
 Return the real part of the complex number `z`.
 
+See also: [`imag`](@ref), [`reim`](@ref), [`complex`](@ref), [`isreal`](@ref), [`Real`](@ref).
+
 # Examples
 ```jldoctest
 julia> real(1 + 3im)
@@ -67,6 +76,8 @@ real(z::Complex) = z.re
 
 Return the imaginary part of the complex number `z`.
 
+See also: [`conj`](@ref), [`reim`](@ref), [`adjoint`](@ref), [`angle`](@ref).
+
 # Examples
 ```jldoctest
 julia> imag(1 + 3im)
@@ -80,7 +91,7 @@ imag(x::Real) = zero(x)
 """
     reim(z)
 
-Return both the real and imaginary parts of the complex number `z`.
+Return a tuple of the real and imaginary parts of the complex number `z`.
 
 # Examples
 ```jldoctest
@@ -254,6 +265,8 @@ end
 
 Compute the complex conjugate of a complex number `z`.
 
+See also: [`angle`](@ref), [`adjoint`](@ref).
+
 # Examples
 ```jldoctest
 julia> conj(1 + 3im)
@@ -334,30 +347,37 @@ muladd(z::Complex, w::Complex, x::Real) =
 
 function /(a::Complex{T}, b::Complex{T}) where T<:Real
     are = real(a); aim = imag(a); bre = real(b); bim = imag(b)
-    if abs(bre) <= abs(bim)
-        if isinf(bre) && isinf(bim)
-            r = sign(bre)/sign(bim)
-        else
-            r = bre / bim
+    if (isinf(bre) | isinf(bim))
+        if isfinite(a)
+            return complex(zero(T)*sign(are)*sign(bre), -zero(T)*sign(aim)*sign(bim))
         end
+        return T(NaN)+T(NaN)*im
+    end
+    if abs(bre) <= abs(bim)
+        r = bre / bim
         den = bim + r*bre
         Complex((are*r + aim)/den, (aim*r - are)/den)
     else
-        if isinf(bre) && isinf(bim)
-            r = sign(bim)/sign(bre)
-        else
-            r = bim / bre
-        end
+        r = bim / bre
         den = bre + r*bim
         Complex((are + aim*r)/den, (aim - are*r)/den)
     end
 end
 
-inv(z::Complex{<:Union{Float16,Float32}}) =
-    oftype(z, inv(widen(z)))
-
-/(z::Complex{T}, w::Complex{T}) where {T<:Union{Float16,Float32}} =
-    oftype(z, widen(z)*inv(widen(w)))
+function /(z::Complex{T}, w::Complex{T}) where {T<:Union{Float16,Float32}}
+    c, d = reim(widen(w))
+    a, b = reim(widen(z))
+    if (isinf(c) | isinf(d))
+        if isfinite(z)
+            return complex(zero(T)*sign(real(z))*sign(real(w)), -zero(T)*sign(imag(z))*sign(imag(w)))
+        end
+        return T(NaN)+T(NaN)*im
+    end
+    mag = inv(muladd(c, c, d^2))
+    re_part = muladd(a, c, b*d)
+    im_part = muladd(b, c, -a*d)
+    return oftype(z, Complex(re_part*mag, im_part*mag))
+end
 
 # robust complex division for double precision
 # variables are scaled & unscaled to avoid over/underflow, if necessary
@@ -369,7 +389,12 @@ function /(z::ComplexF64, w::ComplexF64)
     a, b = reim(z); c, d = reim(w)
     absa = abs(a); absb = abs(b);  ab = absa >= absb ? absa : absb # equiv. to max(abs(a),abs(b)) but without NaN-handling (faster)
     absc = abs(c); absd = abs(d);  cd = absc >= absd ? absc : absd
-
+    if (isinf(c) | isinf(d))
+        if isfinite(z)
+            return complex(0.0*sign(a)*sign(c), -0.0*sign(b)*sign(d))
+        end
+        return NaN+NaN*im
+    end
     halfov = 0.5*floatmax(Float64)              # overflow threshold
     twounϵ = floatmin(Float64)*2.0/eps(Float64) # underflow threshold
 
@@ -436,32 +461,42 @@ function robust_cdiv2(a::Float64, b::Float64, c::Float64, d::Float64, r::Float64
     end
 end
 
+function inv(z::Complex{T}) where T<:Union{Float16,Float32}
+    c, d = reim(widen(z))
+    (isinf(c) | isinf(d)) && return complex(copysign(zero(T), c), flipsign(-zero(T), d))
+    mag = inv(muladd(c, c, d^2))
+    return oftype(z, Complex(c*mag, -d*mag))
+end
 function inv(w::ComplexF64)
     c, d = reim(w)
     (isinf(c) | isinf(d)) && return complex(copysign(0.0, c), flipsign(-0.0, d))
-    half = 0.5
-    two = 2.0
-    cd = max(abs(c), abs(d))
-    ov = floatmax(c)
-    un = floatmin(c)
-    ϵ = eps(Float64)
-    bs = two/(ϵ*ϵ)
+    absc, absd = abs(c), abs(d)
+    cd = ifelse(absc>absd, absc, absd) # cheap `max`: don't need sign- and nan-checks here
+
+    ϵ  = eps(Float64)
+    bs = 2/(ϵ*ϵ)
+
+    # scaling
     s = 1.0
-    cd >= half*ov  && (c=half*c; d=half*d; s=s*half) # scale down c,d
-    cd <= un*two/ϵ && (c=c*bs; d=d*bs; s=s*bs      ) # scale up c,d
-    if abs(d)<=abs(c)
-        r = d/c
-        t = 1.0/(c+d*r)
-        p = t
-        q = -r * t
+    if cd >= floatmax(Float64)/2
+        c *= 0.5; d *= 0.5; s = 0.5 # scale down c, d
+    elseif cd <= 2floatmin(Float64)/ϵ
+        c *= bs;  d *= bs;  s = bs  # scale up c, d
+    end
+
+    # inversion operations
+    if absd <= absc
+        p, q = robust_cinv(c, d)
     else
-        c, d = d, c
-        r = d/c
-        t = 1.0/(c+d*r)
-        p = r * t
-        q = -t
+        q, p = robust_cinv(-d, -c)
     end
-    return ComplexF64(p*s,q*s) # undo scaling
+    return ComplexF64(p*s, q*s) # undo scaling
+end
+function robust_cinv(c::Float64, d::Float64)
+    r = d/c
+    p = inv(muladd(d, r, c))
+    q = -r*p
+    return p, q
 end
 
 function ssqs(x::T, y::T) where T<:Real
@@ -518,16 +553,12 @@ end
 #     return Complex(abs(iz)/r/2, copysign(r,iz))
 # end
 
-# compute exp(im*theta)
-function cis(theta::Real)
-    s, c = sincos(theta)
-    Complex(c, s)
-end
-
 """
-    cis(z)
+    cis(x)
+
+More efficient method for `exp(im*x)` by using Euler's formula: ``cos(x) + i sin(x) = \\exp(i x)``.
 
-Return ``\\exp(iz)``.
+See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref), [`angle`](@ref).
 
 # Examples
 ```jldoctest
@@ -535,17 +566,52 @@ julia> cis(π) ≈ -1
 true
 ```
 """
+function cis end
+function cis(theta::Real)
+    s, c = sincos(theta)
+    Complex(c, s)
+end
+
 function cis(z::Complex)
     v = exp(-imag(z))
     s, c = sincos(real(z))
     Complex(v * c, v * s)
 end
 
+"""
+    cispi(x)
+
+More accurate method for `cis(pi*x)` (especially for large `x`).
+
+See also [`cis`](@ref), [`sincospi`](@ref), [`exp`](@ref), [`angle`](@ref).
+
+# Examples
+```jldoctest
+julia> cispi(10000)
+1.0 + 0.0im
+
+julia> cispi(0.25 + 1im)
+0.030556854645952924 + 0.030556854645952924im
+```
+
+!!! compat "Julia 1.6"
+    This function requires Julia 1.6 or later.
+"""
+function cispi end
+cispi(theta::Real) = Complex(reverse(sincospi(theta))...)
+
+function cispi(z::Complex)
+    sipi, copi = sincospi(z)
+    return complex(real(copi) - imag(sipi), imag(copi) + real(sipi))
+end
+
 """
     angle(z)
 
 Compute the phase angle in radians of a complex number `z`.
 
+See also: [`atan`](@ref), [`cis`](@ref).
+
 # Examples
 ```jldoctest
 julia> rad2deg(angle(1 + im))
diff --git a/base/condition.jl b/base/condition.jl
index 0efbd2c897da9a..4965b43a7019b4 100644
--- a/base/condition.jl
+++ b/base/condition.jl
@@ -5,7 +5,7 @@
 @noinline function concurrency_violation()
     # can be useful for debugging
     #try; error(); catch; ccall(:jlbacktrace, Cvoid, ()); end
-    error("concurrency violation detected")
+    throw(ConcurrencyViolationError("lock must be held"))
 end
 
 """
@@ -34,7 +34,7 @@ assert_havelock(l::AbstractLock, tid::Nothing) = concurrency_violation()
 This struct does not implement a real lock, but instead
 pretends to be always locked on the original thread it was allocated on,
 and simply ignores all other interactions.
-It also does not synchronize tasks; for that use a real lock such as [`RecursiveLock`](@ref).
+It also does not synchronize tasks; for that use a real lock such as [`ReentrantLock`](@ref).
 This can be used in the place of a real lock to, instead, simply and cheaply assert
 that the operation is only occurring on a single cooperatively-scheduled thread.
 It is thus functionally equivalent to allocating a real, recursive, task-unaware lock
@@ -61,12 +61,12 @@ Abstract implementation of a condition object
 for synchronizing tasks objects with a given lock.
 """
 struct GenericCondition{L<:AbstractLock}
-    waitq::InvasiveLinkedList{Task}
+    waitq::IntrusiveLinkedList{Task}
     lock::L
 
-    GenericCondition{L}() where {L<:AbstractLock} = new{L}(InvasiveLinkedList{Task}(), L())
-    GenericCondition{L}(l::L) where {L<:AbstractLock} = new{L}(InvasiveLinkedList{Task}(), l)
-    GenericCondition(l::AbstractLock) = new{typeof(l)}(InvasiveLinkedList{Task}(), l)
+    GenericCondition{L}() where {L<:AbstractLock} = new{L}(IntrusiveLinkedList{Task}(), L())
+    GenericCondition{L}(l::L) where {L<:AbstractLock} = new{L}(IntrusiveLinkedList{Task}(), l)
+    GenericCondition(l::AbstractLock) = new{typeof(l)}(IntrusiveLinkedList{Task}(), l)
 end
 
 assert_havelock(c::GenericCondition) = assert_havelock(c.lock)
@@ -76,7 +76,25 @@ trylock(c::GenericCondition) = trylock(c.lock)
 islocked(c::GenericCondition) = islocked(c.lock)
 
 lock(f, c::GenericCondition) = lock(f, c.lock)
-unlock(f, c::GenericCondition) = unlock(f, c.lock)
+
+# have waiter wait for c
+function _wait2(c::GenericCondition, waiter::Task)
+    ct = current_task()
+    assert_havelock(c)
+    push!(c.waitq, waiter)
+    # since _wait2 is similar to schedule, we should observe the sticky bit now
+    if waiter.sticky && Threads.threadid(waiter) == 0
+        # Issue #41324
+        # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+        # the parent task. If the parent (current_task) is not sticky we must
+        # set it to be sticky.
+        # XXX: Ideally we would be able to unset this
+        ct.sticky = true
+        tid = Threads.threadid()
+        ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
+    end
+    return
+end
 
 """
     wait([x])
@@ -84,7 +102,8 @@ unlock(f, c::GenericCondition) = unlock(f, c.lock)
 Block the current task until some event occurs, depending on the type of the argument:
 
 * [`Channel`](@ref): Wait for a value to be appended to the channel.
-* [`Condition`](@ref): Wait for [`notify`](@ref) on a condition.
+* [`Condition`](@ref): Wait for [`notify`](@ref) on a condition and return the `val`
+  parameter passed to `notify`.
 * `Process`: Wait for a process or process chain to exit. The `exitcode` field of a process
   can be used to determine success or failure.
 * [`Task`](@ref): Wait for a `Task` to finish. If the task fails with an exception, a
@@ -99,8 +118,7 @@ proceeding.
 """
 function wait(c::GenericCondition)
     ct = current_task()
-    assert_havelock(c)
-    push!(c.waitq, ct)
+    _wait2(c, ct)
     token = unlockall(c.lock)
     try
         return wait()
@@ -121,7 +139,7 @@ is raised as an exception in the woken tasks.
 
 Return the count of tasks woken up. Return 0 if no tasks are waiting on `condition`.
 """
-notify(c::GenericCondition, @nospecialize(arg = nothing); all=true, error=false) = notify(c, arg, all, error)
+@constprop :none notify(c::GenericCondition, @nospecialize(arg = nothing); all=true, error=false) = notify(c, arg, all, error)
 function notify(c::GenericCondition, @nospecialize(arg), all, error)
     assert_havelock(c)
     cnt = 0
diff --git a/base/coreio.jl b/base/coreio.jl
index 2796c53e759f54..3e508c64a0a64d 100644
--- a/base/coreio.jl
+++ b/base/coreio.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-print(xs...)   = print(stdout::IO, xs...)
-println(xs...) = println(stdout::IO, xs...)
+print(xs...)   = print(stdout, xs...)
+println(xs...) = println(stdout, xs...)
 println(io::IO) = print(io, '\n')
 
 function show end
@@ -9,24 +9,26 @@ function repr end
 
 struct DevNull <: IO end
 const devnull = DevNull()
-isreadable(::DevNull) = false
-iswritable(::DevNull) = true
-isopen(::DevNull) = true
-read(::DevNull, ::Type{UInt8}) = throw(EOFError())
 write(::DevNull, ::UInt8) = 1
 unsafe_write(::DevNull, ::Ptr{UInt8}, n::UInt)::Int = n
 close(::DevNull) = nothing
-flush(::DevNull) = nothing
-wait_readnb(::DevNull) = wait()
 wait_close(::DevNull) = wait()
-eof(::DevNull) = true
+bytesavailable(io::DevNull) = 0
 
 let CoreIO = Union{Core.CoreSTDOUT, Core.CoreSTDERR}
-    global write, unsafe_write
-    write(io::CoreIO, x::UInt8) = Core.write(io, x)
-    unsafe_write(io::CoreIO, x::Ptr{UInt8}, nb::UInt) = Core.unsafe_write(io, x, nb)
+    global write(io::CoreIO, x::UInt8) = Core.write(io, x)
+    global unsafe_write(io::CoreIO, x::Ptr{UInt8}, nb::UInt) = Core.unsafe_write(io, x, nb)
+
+    CoreIO = Union{CoreIO, DevNull}
+    global read(::CoreIO, ::Type{UInt8}) = throw(EOFError())
+    global isopen(::CoreIO) = true
+    global isreadable(::CoreIO) = false
+    global iswritable(::CoreIO) = true
+    global flush(::CoreIO) = nothing
+    global eof(::CoreIO) = true
+    global wait_readnb(::CoreIO, nb::Int) = nothing
 end
 
-stdin = devnull
-stdout = Core.stdout
-stderr = Core.stderr
+stdin::IO = devnull
+stdout::IO = Core.stdout
+stderr::IO = Core.stderr
diff --git a/base/cpuid.jl b/base/cpuid.jl
new file mode 100644
index 00000000000000..48930d8064ba99
--- /dev/null
+++ b/base/cpuid.jl
@@ -0,0 +1,115 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module CPUID
+
+export cpu_isa
+
+"""
+    ISA(features::Set{UInt32})
+
+A structure which represents the Instruction Set Architecture (ISA) of a
+computer.  It holds the `Set` of features of the CPU.
+
+The numerical values of the features are automatically generated from the C
+source code of Julia and stored in the `features_h.jl` Julia file.
+"""
+struct ISA
+    features::Set{UInt32}
+end
+
+Base.:<=(a::ISA, b::ISA) = a.features <= b.features
+Base.:<(a::ISA,  b::ISA) = a.features <  b.features
+Base.isless(a::ISA,  b::ISA) = a < b
+
+include(string(length(Core.ARGS) >= 2 ? Core.ARGS[2] : "", "features_h.jl"))  # include($BUILDROOT/base/features_h.jl)
+
+# Keep in sync with `arch_march_isa_mapping`.
+const ISAs_by_family = Dict(
+    "i686" => [
+        # Source: https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html.
+        # Implicit in all sets, because always required by Julia: mmx, sse, sse2
+        "pentium4" => ISA(Set{UInt32}()),
+        "prescott" => ISA(Set((JL_X86_sse3,))),
+    ],
+    "x86_64" => [
+        # Source: https://gcc.gnu.org/onlinedocs/gcc/x86-Options.html.
+        # Implicit in all sets, because always required by x86-64 architecture: mmx, sse, sse2
+        "x86_64" => ISA(Set{UInt32}()),
+        "core2" => ISA(Set((JL_X86_sse3, JL_X86_ssse3))),
+        "nehalem" => ISA(Set((JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt))),
+        "sandybridge" => ISA(Set((JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_aes, JL_X86_pclmul))),
+        "haswell" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c))),
+        "skylake" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c, JL_X86_rdseed, JL_X86_adx, JL_X86_prfchw, JL_X86_clflushopt, JL_X86_xsavec, JL_X86_xsaves))),
+        "skylake_avx512" => ISA(Set((JL_X86_movbe, JL_X86_sse3, JL_X86_ssse3, JL_X86_sse41, JL_X86_sse42, JL_X86_popcnt, JL_X86_pku, JL_X86_avx, JL_X86_avx2, JL_X86_aes, JL_X86_pclmul, JL_X86_fsgsbase, JL_X86_rdrnd, JL_X86_fma, JL_X86_bmi, JL_X86_bmi2, JL_X86_f16c, JL_X86_rdseed, JL_X86_adx, JL_X86_prfchw, JL_X86_clflushopt, JL_X86_xsavec, JL_X86_xsaves, JL_X86_avx512f, JL_X86_clwb, JL_X86_avx512vl, JL_X86_avx512bw, JL_X86_avx512dq, JL_X86_avx512cd))),
+    ],
+    "armv6l" => [
+        # The only armv6l processor we know of that runs Julia on armv6l
+        # We don't have a good way to tell the different armv6l variants apart through features,
+        # and honestly we don't care much since it's basically this one chip that people want to use with Julia.
+        "arm1176jzfs" => ISA(Set{UInt32}()),
+    ],
+    "armv7l" => [
+        "armv7l" => ISA(Set{UInt32}()),
+        "armv7l+neon" => ISA(Set((JL_AArch32_neon,))),
+        "armv7l+neon+vfpv4" => ISA(Set((JL_AArch32_neon, JL_AArch32_vfp4))),
+    ],
+    "aarch64" => [
+        # Implicit in all sets, because always required: fp, asimd
+        "armv8.0-a" => ISA(Set{UInt32}()),
+        "armv8.1-a" => ISA(Set((JL_AArch64_v8_1a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm))),
+        "armv8.2-a+crypto" => ISA(Set((JL_AArch64_v8_2a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2))),
+        "a64fx" => ISA(Set((JL_AArch64_v8_2a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_sha2, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fullfp16, JL_AArch64_sve))),
+        "apple_m1" => ISA(Set((JL_AArch64_v8_5a, JL_AArch64_lse, JL_AArch64_crc, JL_AArch64_rdm, JL_AArch64_aes, JL_AArch64_sha2, JL_AArch64_sha3, JL_AArch64_ccpp, JL_AArch64_complxnum, JL_AArch64_fp16fml, JL_AArch64_fullfp16, JL_AArch64_dotprod, JL_AArch64_rcpc, JL_AArch64_altnzcv))),
+    ],
+    "powerpc64le" => [
+        # We have no way to test powerpc64le features yet, so we're only going to declare the lowest ISA:
+        "power8" => ISA(Set{UInt32}()),
+    ]
+)
+
+# Test a CPU feature exists on the currently-running host
+test_cpu_feature(feature::UInt32) = ccall(:jl_test_cpu_feature, Bool, (UInt32,), feature)
+
+# Normalize some variation in ARCH values (which typically come from `uname -m`)
+function normalize_arch(arch::String)
+    arch = lowercase(arch)
+    if arch ∈ ("amd64",)
+        arch = "x86_64"
+    elseif arch ∈ ("i386", "i486", "i586")
+        arch = "i686"
+    elseif arch ∈ ("armv6",)
+        arch = "armv6l"
+    elseif arch ∈ ("arm", "armv7", "armv8", "armv8l")
+        arch = "armv7l"
+    elseif arch ∈ ("arm64",)
+        arch = "aarch64"
+    elseif arch ∈ ("ppc64le",)
+        arch = "powerpc64le"
+    end
+    return arch
+end
+
+let
+    # Collect all relevant features for the current architecture, if any.
+    FEATURES = UInt32[]
+    arch = normalize_arch(String(Sys.ARCH))
+    if arch in keys(ISAs_by_family)
+        for isa in ISAs_by_family[arch]
+            unique!(append!(FEATURES, last(isa).features))
+        end
+    end
+
+    # Use `@eval` to inline the list of features.
+    @eval function cpu_isa()
+        return ISA(Set{UInt32}(feat for feat in $(FEATURES) if test_cpu_feature(feat)))
+    end
+end
+
+"""
+    cpu_isa()
+
+Return the [`ISA`](@ref) (instruction set architecture) of the current CPU.
+"""
+cpu_isa
+
+end # module CPUID
diff --git a/base/deepcopy.jl b/base/deepcopy.jl
index 36c9c399def541..317d999004c42f 100644
--- a/base/deepcopy.jl
+++ b/base/deepcopy.jl
@@ -53,7 +53,7 @@ end
 function deepcopy_internal(@nospecialize(x), stackdict::IdDict)
     T = typeof(x)::DataType
     nf = nfields(x)
-    if T.mutable
+    if ismutable(x)
         if haskey(stackdict, x)
             return stackdict[x]
         end
@@ -87,7 +87,7 @@ end
 
 function deepcopy_internal(x::Array, stackdict::IdDict)
     if haskey(stackdict, x)
-        return stackdict[x]
+        return stackdict[x]::typeof(x)
     end
     _deepcopy_array_t(x, eltype(x), stackdict)
 end
@@ -126,3 +126,21 @@ function deepcopy_internal(x::Union{Dict,IdDict}, stackdict::IdDict)
     end
     dest
 end
+
+function deepcopy_internal(x::AbstractLock, stackdict::IdDict)
+    if haskey(stackdict, x)
+        return stackdict[x]
+    end
+    y = typeof(x)()
+    stackdict[x] = y
+    return y
+end
+
+function deepcopy_internal(x::GenericCondition, stackdict::IdDict)
+    if haskey(stackdict, x)
+        return stackdict[x]
+    end
+    y = typeof(x)(deepcopy_internal(x.lock))
+    stackdict[x] = y
+    return y
+end
diff --git a/base/deprecated.jl b/base/deprecated.jl
index 45adac55a355cf..28a35e23635f4c 100644
--- a/base/deprecated.jl
+++ b/base/deprecated.jl
@@ -15,11 +15,12 @@
 # is only printed the first time for each call place.
 
 """
-    @deprecate old new [ex=true]
+    @deprecate old new [export_old=true]
 
-Deprecate method `old` and specify the replacement call `new`. Prevent `@deprecate` from
-exporting `old` by setting `ex` to `false`. `@deprecate` defines a new method with the same
-signature as `old`.
+Deprecate method `old` and specify the replacement call `new`, defining a new method `old`
+with the specified signature in the process.
+
+To prevent `old` from being exported, set `export_old` to `false`.
 
 !!! compat "Julia 1.5"
     As of Julia 1.5, functions defined by `@deprecate` do not print warning when `julia`
@@ -34,14 +35,31 @@ old (generic function with 1 method)
 julia> @deprecate old(x) new(x) false
 old (generic function with 1 method)
 ```
+
+Calls to `@deprecate` without explicit type-annotations will define deprecated methods
+accepting arguments of type `Any`. To restrict deprecation to a specific signature, annotate
+the arguments of `old`. For example,
+```jldoctest; filter = r"in Main at.*"
+julia> new(x::Int) = x;
+
+julia> new(x::Float64) = 2x;
+
+julia> @deprecate old(x::Int) new(x);
+
+julia> methods(old)
+# 1 method for generic function "old":
+[1] old(x::Int64) in Main at deprecated.jl:70
+```
+will define and deprecate a method `old(x::Int)` that mirrors `new(x::Int)` but will not
+define nor deprecate the method `old(x::Float64)`.
 """
-macro deprecate(old, new, ex=true)
+macro deprecate(old, new, export_old=true)
     meta = Expr(:meta, :noinline)
     if isa(old, Symbol)
         oldname = Expr(:quote, old)
         newname = Expr(:quote, new)
         Expr(:toplevel,
-            ex ? Expr(:export, esc(old)) : nothing,
+            export_old ? Expr(:export, esc(old)) : nothing,
             :(function $(esc(old))(args...)
                   $meta
                   depwarn($"`$old` is deprecated, use `$new` instead.", Core.Typeof($(esc(old))).name.mt.name)
@@ -65,7 +83,7 @@ macro deprecate(old, new, ex=true)
             error("invalid usage of @deprecate")
         end
         Expr(:toplevel,
-            ex ? Expr(:export, esc(oldsym)) : nothing,
+        export_old ? Expr(:export, esc(oldsym)) : nothing,
             :($(esc(old)) = begin
                   $meta
                   depwarn($"`$oldcall` is deprecated, use `$newcall` instead.", Core.Typeof($(esc(oldsym))).name.mt.name)
@@ -88,8 +106,13 @@ function depwarn(msg, funcsym; force::Bool=false)
         _module=begin
             bt = backtrace()
             frame, caller = firstcaller(bt, funcsym)
-            # TODO: Is it reasonable to attribute callers without linfo to Core?
-            caller.linfo isa Core.MethodInstance ? caller.linfo.def.module : Core
+            linfo = caller.linfo
+            if linfo isa Core.MethodInstance
+                def = linfo.def
+                def isa Module ? def : def.module
+            else
+                Core    # TODO: Is it reasonable to attribute callers without linfo to Core?
+            end
         end,
         _file=String(caller.file),
         _line=caller.line,
@@ -117,12 +140,14 @@ function firstcaller(bt::Vector, funcsyms)
             end
             found = lkup.func in funcsyms
             # look for constructor type name
-            if !found && lkup.linfo isa Core.MethodInstance
+            if !found
                 li = lkup.linfo
-                ft = ccall(:jl_first_argument_datatype, Any, (Any,), li.def.sig)
-                if isa(ft, DataType) && ft.name === Type.body.name
-                    ft = unwrap_unionall(ft.parameters[1])
-                    found = (isa(ft, DataType) && ft.name.name in funcsyms)
+                if li isa Core.MethodInstance
+                    ft = ccall(:jl_first_argument_datatype, Any, (Any,), (li.def::Method).sig)
+                    if isa(ft, DataType) && ft.name === Type.body.name
+                        ft = unwrap_unionall(ft.parameters[1])
+                        found = (isa(ft, DataType) && ft.name.name in funcsyms)
+                    end
                 end
             end
         end
@@ -226,16 +251,46 @@ getindex(match::Core.MethodMatch, field::Int) =
 tuple_type_head(T::Type) = fieldtype(T, 1)
 tuple_type_cons(::Type, ::Type{Union{}}) = Union{}
 function tuple_type_cons(::Type{S}, ::Type{T}) where T<:Tuple where S
-    @_pure_meta
+    @_total_may_throw_meta
     Tuple{S, T.parameters...}
 end
 function parameter_upper_bound(t::UnionAll, idx)
-    @_pure_meta
+    @_total_may_throw_meta
     return rewrap_unionall((unwrap_unionall(t)::DataType).parameters[idx], t)
 end
 
 # these were internal functions, but some packages seem to be relying on them
-@deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes)
+@deprecate cat_shape(dims, shape::Tuple{}, shapes::Tuple...) cat_shape(dims, shapes) false
 cat_shape(dims, shape::Tuple{}) = () # make sure `cat_shape(dims, ())` do not recursively calls itself
 
+@deprecate unsafe_indices(A) axes(A) false
+@deprecate unsafe_length(r) length(r) false
+
+# these were internal type aliases, but some pacakges seem to be relying on them
+const Any16{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
+                        Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any,N}}
+const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,Vararg{T,N}}
+
 # END 1.6 deprecations
+
+# BEGIN 1.7 deprecations
+
+# the plan is to eventually overload getproperty to access entries of the dict
+@noinline function getproperty(x::Pairs, s::Symbol)
+    depwarn("use values(kwargs) and keys(kwargs) instead of kwargs.data and kwargs.itr", :getproperty, force=true)
+    return getfield(x, s)
+end
+
+# This function was marked as experimental and not exported.
+@deprecate catch_stack(task=current_task(); include_bt=true) current_exceptions(task; backtrace=include_bt) false
+
+# END 1.7 deprecations
+
+# BEGIN 1.8 deprecations
+
+const var"@_inline_meta" = var"@inline"
+const var"@_noinline_meta" = var"@noinline"
+@deprecate getindex(t::Tuple, i::Real) t[convert(Int, i)]
+
+# END 1.8 deprecations
diff --git a/base/dict.jl b/base/dict.jl
index 8f885e26c45aaa..199edba251cdbf 100644
--- a/base/dict.jl
+++ b/base/dict.jl
@@ -55,7 +55,8 @@ Dict{String, Int64} with 2 entries:
 ```
 """
 mutable struct Dict{K,V} <: AbstractDict{K,V}
-    slots::Array{UInt8,1}
+    # Metadata: empty => 0x00, removed => 0x7f, full => 0b1[7 most significant hash bits]
+    slots::Vector{UInt8}
     keys::Array{K,1}
     vals::Array{V,1}
     ndel::Int
@@ -145,13 +146,24 @@ end
 
 empty(a::AbstractDict, ::Type{K}, ::Type{V}) where {K, V} = Dict{K, V}()
 
-hashindex(key, sz) = (((hash(key)::UInt % Int) & (sz-1)) + 1)::Int
+# Gets 7 most significant bits from the hash (hsh), first bit is 1
+_shorthash7(hsh::UInt32) = (hsh >> UInt(25))%UInt8 | 0x80
+_shorthash7(hsh::UInt64) = (hsh >> UInt(57))%UInt8 | 0x80
 
-@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x0
-@propagate_inbounds isslotfilled(h::Dict, i::Int) = h.slots[i] == 0x1
-@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x2
+# hashindex (key, sz) - computes optimal position and shorthash7
+#     idx - optimal position in the hash table
+#     sh::UInt8 - short hash (7 highest hash bits)
+function hashindex(key, sz)
+    hsh = hash(key)::UInt
+    idx = (((hsh % Int) & (sz-1)) + 1)::Int
+    return idx, _shorthash7(hsh)
+end
+
+@propagate_inbounds isslotempty(h::Dict, i::Int) = h.slots[i] == 0x00
+@propagate_inbounds isslotfilled(h::Dict, i::Int) = (h.slots[i] & 0x80) != 0
+@propagate_inbounds isslotmissing(h::Dict, i::Int) = h.slots[i] == 0x7f
 
-function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
+@constprop :none function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
     olds = h.slots
     oldk = h.keys
     oldv = h.vals
@@ -161,7 +173,7 @@ function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
     h.idxfloor = 1
     if h.count == 0
         resize!(h.slots, newsz)
-        fill!(h.slots, 0)
+        fill!(h.slots, 0x0)
         resize!(h.keys, newsz)
         resize!(h.vals, newsz)
         h.ndel = 0
@@ -176,51 +188,41 @@ function rehash!(h::Dict{K,V}, newsz = length(h.keys)) where V where K
     maxprobe = 0
 
     for i = 1:sz
-        @inbounds if olds[i] == 0x1
+        @inbounds if (olds[i] & 0x80) != 0
             k = oldk[i]
             v = oldv[i]
-            index0 = index = hashindex(k, newsz)
+            index, sh = hashindex(k, newsz)
+            index0 = index
             while slots[index] != 0
                 index = (index & (newsz-1)) + 1
             end
             probe = (index - index0) & (newsz-1)
             probe > maxprobe && (maxprobe = probe)
-            slots[index] = 0x1
+            slots[index] = olds[i]
             keys[index] = k
             vals[index] = v
             count += 1
-
-            if h.age != age0
-                # if `h` is changed by a finalizer, retry
-                return rehash!(h, newsz)
-            end
         end
     end
 
+    @assert h.age == age0 "Muliple concurent writes to Dict detected!"
+    h.age += 1
     h.slots = slots
     h.keys = keys
     h.vals = vals
     h.count = count
     h.ndel = 0
     h.maxprobe = maxprobe
-    @assert h.age == age0
-
     return h
 end
 
 function sizehint!(d::Dict{T}, newsz) where T
     oldsz = length(d.slots)
     # limit new element count to max_values of the key type
-    newsz = min(newsz, max_values(T)::Int)
+    newsz = min(max(newsz, length(d)), max_values(T)::Int)
     # need at least 1.5n space to hold n elements
-    newsz = cld(3 * newsz, 2)
-    if newsz <= oldsz
-        # todo: shrink
-        # be careful: rehash!() assumes everything fits. it was only designed
-        # for growing.
-        return d
-    end
-    rehash!(d, newsz)
+    newsz = _tablesz(cld(3 * newsz, 2))
+    return newsz == oldsz ? d : rehash!(d, newsz)
 end
 
 """
@@ -257,45 +259,44 @@ end
 
 # get the index where a key is stored, or -1 if not present
 function ht_keyindex(h::Dict{K,V}, key) where V where K
+    isempty(h) && return -1
     sz = length(h.keys)
     iter = 0
     maxprobe = h.maxprobe
-    index = hashindex(key, sz)
+    index, sh = hashindex(key, sz)
     keys = h.keys
 
     @inbounds while true
-        if isslotempty(h,index)
-            break
-        end
-        if !isslotmissing(h,index) && (key === keys[index] || isequal(key,keys[index]))
-            return index
+        isslotempty(h,index) && return -1
+        if h.slots[index] == sh
+            k = keys[index]
+            if (key ===  k || isequal(key, k))
+                return index
+            end
         end
 
         index = (index & (sz-1)) + 1
-        iter += 1
-        iter > maxprobe && break
+        (iter += 1) > maxprobe && return -1
     end
-    return -1
+    # This line is unreachable
 end
 
-# get the index where a key is stored, or -pos if not present
-# and the key would be inserted at pos
+# get (index, sh) for the key
+#     index - where a key is stored, or -pos if not present
+#             and the key would be inserted at pos
+#     sh::UInt8 - short hash (7 highest hash bits)
 # This version is for use by setindex! and get!
-function ht_keyindex2!(h::Dict{K,V}, key) where V where K
-    age0 = h.age
+function ht_keyindex2_shorthash!(h::Dict{K,V}, key) where V where K
     sz = length(h.keys)
     iter = 0
     maxprobe = h.maxprobe
-    index = hashindex(key, sz)
+    index, sh = hashindex(key, sz)
     avail = 0
     keys = h.keys
 
     @inbounds while true
         if isslotempty(h,index)
-            if avail < 0
-                return avail
-            end
-            return -index
+            return (avail < 0 ? avail : -index), sh
         end
 
         if isslotmissing(h,index)
@@ -304,8 +305,11 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K
                 # in case "key" already exists in a later collided slot.
                 avail = -index
             end
-        elseif key === keys[index] || isequal(key, keys[index])
-            return index
+        elseif h.slots[index] == sh
+            k = keys[index]
+            if key === k || isequal(key, k)
+                return index, sh
+            end
         end
 
         index = (index & (sz-1)) + 1
@@ -313,14 +317,14 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K
         iter > maxprobe && break
     end
 
-    avail < 0 && return avail
+    avail < 0 && return avail, sh
 
     maxallowed = max(maxallowedprobe, sz>>maxprobeshift)
     # Check if key is not present, may need to keep searching to find slot
     @inbounds while iter < maxallowed
         if !isslotfilled(h,index)
             h.maxprobe = iter
-            return -index
+            return -index, sh
         end
         index = (index & (sz-1)) + 1
         iter += 1
@@ -328,11 +332,14 @@ function ht_keyindex2!(h::Dict{K,V}, key) where V where K
 
     rehash!(h, h.count > 64000 ? sz*2 : sz*4)
 
-    return ht_keyindex2!(h, key)
+    return ht_keyindex2_shorthash!(h, key)
 end
 
-@propagate_inbounds function _setindex!(h::Dict, v, key, index)
-    h.slots[index] = 0x1
+# Only for better backward compatibility. It can be removed in the future.
+ht_keyindex2!(h::Dict, key) = ht_keyindex2_shorthash!(h, key)[1]
+
+@propagate_inbounds function _setindex!(h::Dict, v, key, index, sh = _shorthash7(hash(key)))
+    h.slots[index] = sh
     h.keys[index] = key
     h.vals[index] = v
     h.count += 1
@@ -347,6 +354,7 @@ end
         # > 3/4 deleted or > 2/3 full
         rehash!(h, h.count > 64000 ? h.count*2 : h.count*4)
     end
+    nothing
 end
 
 function setindex!(h::Dict{K,V}, v0, key0) where V where K
@@ -359,19 +367,35 @@ end
 
 function setindex!(h::Dict{K,V}, v0, key::K) where V where K
     v = convert(V, v0)
-    index = ht_keyindex2!(h, key)
+    index, sh = ht_keyindex2_shorthash!(h, key)
+
+    if index > 0
+        h.age += 1
+        @inbounds h.keys[index] = key
+        @inbounds h.vals[index] = v
+    else
+        @inbounds _setindex!(h, v, key, -index, sh)
+    end
+
+    return h
+end
+
+function setindex!(h::Dict{K,Any}, v, key::K) where K
+    @nospecialize v
+    index, sh = ht_keyindex2_shorthash!(h, key)
 
     if index > 0
         h.age += 1
         @inbounds h.keys[index] = key
         @inbounds h.vals[index] = v
     else
-        @inbounds _setindex!(h, v, key, -index)
+        @inbounds _setindex!(h, v, key, -index, sh)
     end
 
     return h
 end
 
+
 """
     get!(collection, key, default)
 
@@ -436,26 +460,25 @@ function get!(default::Callable, h::Dict{K,V}, key0) where V where K
 end
 
 function get!(default::Callable, h::Dict{K,V}, key::K) where V where K
-    index = ht_keyindex2!(h, key)
+    index, sh = ht_keyindex2_shorthash!(h, key)
 
     index > 0 && return h.vals[index]
 
     age0 = h.age
     v = convert(V, default())
     if h.age != age0
-        index = ht_keyindex2!(h, key)
+        index, sh = ht_keyindex2_shorthash!(h, key)
     end
     if index > 0
         h.age += 1
         @inbounds h.keys[index] = key
         @inbounds h.vals[index] = v
     else
-        @inbounds _setindex!(h, v, key, -index)
+        @inbounds _setindex!(h, v, key, -index, sh)
     end
     return v
 end
 
-
 function getindex(h::Dict{K,V}, key) where V where K
     index = ht_keyindex(h, key)
     @inbounds return (index < 0) ? throw(KeyError(key)) : h.vals[index]::V
@@ -467,6 +490,9 @@ end
 Return the value stored for the given key, or the given default value if no mapping for the
 key is present.
 
+!!! compat "Julia 1.7"
+    For tuples and numbers, this function requires at least Julia 1.7.
+
 # Examples
 ```jldoctest
 julia> d = Dict("a"=>1, "b"=>2);
@@ -603,7 +629,7 @@ function pop!(h::Dict)
 end
 
 function _delete!(h::Dict{K,V}, index) where {K,V}
-    @inbounds h.slots[index] = 0x2
+    @inbounds h.slots[index] = 0x7f
     @inbounds _unsetindex!(h.keys, index)
     @inbounds _unsetindex!(h.vals, index)
     h.ndel += 1
@@ -662,7 +688,7 @@ end
 
 @propagate_inbounds _iterate(t::Dict{K,V}, i) where {K,V} = i == 0 ? nothing : (Pair{K,V}(t.keys[i],t.vals[i]), i == typemax(Int) ? 0 : i+1)
 @propagate_inbounds function iterate(t::Dict)
-    _iterate(t, skip_deleted_floor!(t))
+    _iterate(t, skip_deleted(t, t.idxfloor))
 end
 @propagate_inbounds iterate(t::Dict, i) = _iterate(t, skip_deleted(t, i))
 
@@ -680,7 +706,7 @@ end
 function filter!(pred, h::Dict{K,V}) where {K,V}
     h.count == 0 && return h
     @inbounds for i=1:length(h.slots)
-        if h.slots[i] == 0x01 && !pred(Pair{K,V}(h.keys[i], h.vals[i]))
+        if ((h.slots[i] & 0x80) != 0) && !pred(Pair{K,V}(h.keys[i], h.vals[i]))
             _delete!(h, i)
         end
     end
@@ -696,7 +722,7 @@ end
 function map!(f, iter::ValueIterator{<:Dict})
     dict = iter.dict
     vals = dict.vals
-    # @inbounds is here so the it gets propagated to isslotfiled
+    # @inbounds is here so that it gets propagated to isslotfilled
     @inbounds for i = dict.idxfloor:lastindex(vals)
         if isslotfilled(dict, i)
             vals[i] = f(vals[i])
@@ -705,6 +731,22 @@ function map!(f, iter::ValueIterator{<:Dict})
     return iter
 end
 
+function mergewith!(combine, d1::Dict{K, V}, d2::AbstractDict) where {K, V}
+    haslength(d2) && sizehint!(d1, length(d1) + length(d2))
+    for (k, v) in d2
+        i, sh = ht_keyindex2_shorthash!(d1, k)
+        if i > 0
+            d1.vals[i] = combine(d1.vals[i], v)
+        else
+            if !isequal(k, convert(K, k))
+                throw(ArgumentError("$(limitrepr(k)) is not a valid key for type $K"))
+            end
+            @inbounds _setindex!(d1, convert(V, v), k, -i, sh)
+        end
+    end
+    return d1
+end
+
 struct ImmutableDict{K,V} <: AbstractDict{K,V}
     parent::ImmutableDict{K,V}
     key::K
@@ -771,15 +813,23 @@ function get(dict::ImmutableDict, key, default)
     return default
 end
 
+function get(default::Callable, dict::ImmutableDict, key)
+    while isdefined(dict, :parent)
+        isequal(dict.key, key) && return dict.value
+        dict = dict.parent
+    end
+    return default()
+end
+
 # this actually defines reverse iteration (e.g. it should not be used for merge/copy/filter type operations)
 function iterate(d::ImmutableDict{K,V}, t=d) where {K, V}
     !isdefined(t, :parent) && return nothing
     (Pair{K,V}(t.key, t.value), t.parent)
 end
-length(t::ImmutableDict) = count(x->true, t)
+length(t::ImmutableDict) = count(Returns(true), t)
 isempty(t::ImmutableDict) = !isdefined(t, :parent)
 empty(::ImmutableDict, ::Type{K}, ::Type{V}) where {K, V} = ImmutableDict{K,V}()
 
-_similar_for(c::Dict, ::Type{Pair{K,V}}, itr, isz) where {K, V} = empty(c, K, V)
-_similar_for(c::AbstractDict, ::Type{T}, itr, isz) where {T} =
+_similar_for(c::AbstractDict, ::Type{Pair{K,V}}, itr, isz, len) where {K, V} = empty(c, K, V)
+_similar_for(c::AbstractDict, ::Type{T}, itr, isz, len) where {T} =
     throw(ArgumentError("for AbstractDicts, similar requires an element type of Pair;\n  if calling map, consider a comprehension instead"))
diff --git a/base/div.jl b/base/div.jl
index 1923da121cdf91..7b172ecc95a631 100644
--- a/base/div.jl
+++ b/base/div.jl
@@ -5,14 +5,20 @@
 """
     div(x, y, r::RoundingMode=RoundToZero)
 
-The quotient from Euclidean division. Computes x/y, rounded to an integer according
-to the rounding mode `r`. In other words, the quantity
+The quotient from Euclidean (integer) division. Computes x/y, rounded to
+an integer according to the rounding mode `r`. In other words, the quantity
 
     round(x/y,r)
 
 without any intermediate rounding.
 
-See also: [`fld`](@ref), [`cld`](@ref) which are special cases of this function
+!!! compat "Julia 1.4"
+    The three-argument method taking a `RoundingMode` requires Julia 1.4 or later.
+
+See also [`fld`](@ref) and [`cld`](@ref), which are special cases of this function.
+
+!!! compat "Julia 1.9"
+    `RoundFromZero` requires at least Julia 1.9.
 
 # Examples:
 ```jldoctest
@@ -30,6 +36,10 @@ julia> div(-5, 2, RoundNearestTiesAway)
 -3
 julia> div(-5, 2, RoundNearestTiesUp)
 -2
+julia> div(4, 3, RoundFromZero)
+2
+julia> div(-4, 3, RoundFromZero)
+-2
 ```
 """
 div(x, y, r::RoundingMode)
@@ -60,6 +70,26 @@ without any intermediate rounding.
   `[0,-y)` otherwise. The result may not be exact if `x` and `y` have the same sign, and
   `abs(x) < abs(y)`. See also [`RoundUp`](@ref).
 
+- if `r == RoundFromZero`, then the result is in the interval `(-y, 0]` if `y` is positive, or
+  `[0, -y)` otherwise. The result may not be exact if `x` and `y` have the same sign, and
+  `abs(x) < abs(y)`. See also [`RoundFromZero`](@ref).
+
+!!! compat "Julia 1.9"
+    `RoundFromZero` requires at least Julia 1.9.
+
+# Examples:
+```jldoctest
+julia> x = 9; y = 4;
+
+julia> x % y  # same as rem(x, y)
+1
+
+julia> x ÷ y  # same as div(x, y)
+2
+
+julia> x == div(x, y) * y + rem(x, y)
+true
+```
 """
 rem(x, y, r::RoundingMode)
 
@@ -70,18 +100,41 @@ rem(x, y, ::RoundingMode{:Up}) = mod(x, -y)
 rem(x, y, r::RoundingMode{:Nearest}) = x - y*div(x, y, r)
 rem(x::Integer, y::Integer, r::RoundingMode{:Nearest}) = divrem(x, y, r)[2]
 
+function rem(x, y, ::typeof(RoundFromZero))
+    signbit(x) == signbit(y) ? rem(x, y, RoundUp) : rem(x, y, RoundDown)
+end
+
 """
     fld(x, y)
 
 Largest integer less than or equal to `x/y`. Equivalent to `div(x, y, RoundDown)`.
 
-See also: [`div`](@ref)
+See also [`div`](@ref), [`cld`](@ref), [`fld1`](@ref).
 
 # Examples
 ```jldoctest
 julia> fld(7.3,5.5)
 1.0
+
+julia> fld.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -2  -2  -1  -1  -1  0  0  0  1  1  1
 ```
+Because `fld(x, y)` implements strictly correct floored rounding based on the true
+value of floating-point numbers, unintuitive situations can arise. For example:
+```jldoctest
+julia> fld(6.0,0.1)
+59.0
+julia> 6.0/0.1
+60.0
+julia> 6.0/big(0.1)
+59.99999999999999666933092612453056361837965690217069245739573412231113406246995
+```
+What is happening here is that the true value of the floating-point number written
+as `0.1` is slightly larger than the numerical value 1/10 while `6.0` represents
+the number 6 precisely. Therefore the true value of `6.0 / 0.1` is slightly less
+than 60. When doing division, this is rounded to precisely `60.0`, but
+`fld(6.0, 0.1)` always takes the floor of the true value, so the result is `59.0`.
 """
 fld(a, b) = div(a, b, RoundDown)
 
@@ -90,12 +143,16 @@ fld(a, b) = div(a, b, RoundDown)
 
 Smallest integer larger than or equal to `x/y`. Equivalent to `div(x, y, RoundUp)`.
 
-See also: [`div`](@ref)
+See also [`div`](@ref), [`fld`](@ref).
 
 # Examples
 ```jldoctest
 julia> cld(5.5,2.2)
 3.0
+
+julia> cld.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -1  -1  -1  0  0  0  1  1  1  2  2
 ```
 """
 cld(a, b) = div(a, b, RoundUp)
@@ -108,6 +165,8 @@ The quotient and remainder from Euclidean division.
 Equivalent to `(div(x,y,r), rem(x,y,r))`. Equivalently, with the default
 value of `r`, this call is equivalent to `(x÷y, x%y)`.
 
+See also: [`fldmod`](@ref), [`cld`](@ref).
+
 # Examples
 ```jldoctest
 julia> divrem(3,7)
@@ -118,6 +177,8 @@ julia> divrem(7,3)
 ```
 """
 divrem(x, y) = divrem(x, y, RoundToZero)
+
+
 function divrem(a, b, r::RoundingMode)
     if r === RoundToZero
         # For compat. Remove in 2.0.
@@ -129,6 +190,25 @@ function divrem(a, b, r::RoundingMode)
         (div(a, b, r), rem(a, b, r))
     end
 end
+#avoids calling rem for Integers-Integers (all modes),
+#a-d*b not precise for Floats - AbstractFloat, AbstractIrrational. Rationals are still slower
+function divrem(a::Integer, b::Integer, r::Union{typeof(RoundUp),
+                                                typeof(RoundDown),
+                                                typeof(RoundToZero)})
+    if r === RoundToZero
+        # For compat. Remove in 2.0.
+        d = div(a, b)
+        (d, a - d*b)
+    elseif r === RoundDown
+        # For compat. Remove in 2.0.
+        d = fld(a, b)
+        (d, a - d*b)
+    elseif r === RoundUp
+        # For compat. Remove in 2.0.
+        d = div(a, b, r)
+        (d, a - d*b)
+    end
+end
 function divrem(x::Integer, y::Integer, rnd::typeof(RoundNearest))
     (q, r) = divrem(x, y)
     if x >= 0
@@ -178,11 +258,17 @@ function divrem(x::Integer, y::Integer, rnd::typeof(RoundNearestTiesUp))
     end
 end
 
+function divrem(x, y, ::typeof(RoundFromZero))
+    signbit(x) == signbit(y) ? divrem(x, y, RoundUp) : divrem(x, y, RoundDown)
+end
+
 """
     fldmod(x, y)
 
 The floored quotient and modulus after division. A convenience wrapper for
 `divrem(x, y, RoundDown)`. Equivalent to `(fld(x,y), mod(x,y))`.
+
+See also: [`fld`](@ref), [`cld`](@ref), [`fldmod1`](@ref).
 """
 fldmod(x,y) = divrem(x, y, RoundDown)
 
@@ -212,12 +298,16 @@ function div(x::Integer, y::Integer, rnd::Union{typeof(RoundNearest),
     divrem(x, y, rnd)[1]
 end
 
+function div(x::Integer, y::Integer, ::typeof(RoundFromZero))
+    signbit(x) == signbit(y) ? div(x, y, RoundUp) : div(x, y, RoundDown)
+end
+
 # For bootstrapping purposes, we define div for integers directly. Provide the
 # generic signature also
 div(a::T, b::T, ::typeof(RoundToZero)) where {T<:Union{BitSigned, BitUnsigned64}} = div(a, b)
 div(a::Bool, b::Bool, r::RoundingMode) = div(a, b)
 # Prevent ambiguities
-for rm in (RoundUp, RoundDown, RoundToZero)
+for rm in (RoundUp, RoundDown, RoundToZero, RoundFromZero)
     @eval div(a::Bool, b::Bool, r::$(typeof(rm))) = div(a, b)
 end
 function div(x::Bool, y::Bool, rnd::Union{typeof(RoundNearest),
diff --git a/base/docs/Docs.jl b/base/docs/Docs.jl
index da315cadf81b18..b84b3ee8d55f48 100644
--- a/base/docs/Docs.jl
+++ b/base/docs/Docs.jl
@@ -73,11 +73,16 @@ const modules = Module[]
 const META    = gensym(:meta)
 const METAType = IdDict{Any,Any}
 
-meta(m::Module) = isdefined(m, META) ? getfield(m, META)::METAType : METAType()
+function meta(m::Module)
+    if !isdefined(m, META) || getfield(m, META) === nothing
+        initmeta(m)
+    end
+    return getfield(m, META)::METAType
+end
 
 function initmeta(m::Module)
-    if !isdefined(m, META)
-        Core.eval(m, :(const $META = $(METAType())))
+    if !isdefined(m, META) || getfield(m, META) === nothing
+        Core.eval(m, :($META = $(METAType())))
         push!(modules, m)
     end
     nothing
@@ -286,14 +291,15 @@ end
 
 uncurly(@nospecialize ex) = isexpr(ex, :curly) ? ex.args[1] : ex
 
-namify(@nospecialize x) = astname(x, isexpr(x, :macro))
+namify(@nospecialize x) = astname(x, isexpr(x, :macro))::Union{Symbol,Expr,GlobalRef}
 
 function astname(x::Expr, ismacro::Bool)
-    if isexpr(x, :.)
+    head = x.head
+    if head === :.
         ismacro ? macroname(x) : x
     # Call overloading, e.g. `(a::A)(b) = b` or `function (a::A)(b) b end` should document `A(b)`
-    elseif (isexpr(x, :function) || isexpr(x, :(=))) && isexpr(x.args[1], :call) && isexpr(x.args[1].args[1], :(::))
-        return astname(x.args[1].args[1].args[end], ismacro)
+    elseif (head === :function || head === :(=)) && isexpr(x.args[1], :call) && isexpr((x.args[1]::Expr).args[1], :(::))
+        return astname(((x.args[1]::Expr).args[1]::Expr).args[end], ismacro)
     else
         n = isexpr(x, (:module, :struct)) ? 2 : 1
         astname(x.args[n], ismacro)
@@ -342,11 +348,11 @@ function metadata(__source__, __module__, expr, ismodule)
         P = Pair{Symbol,Any}
         fields = P[]
         last_docstr = nothing
-        for each in expr.args[3].args
+        for each in (expr.args[3]::Expr).args
             if isa(each, Symbol) || isexpr(each, :(::))
                 # a field declaration
                 if last_docstr !== nothing
-                    push!(fields, P(namify(each), last_docstr))
+                    push!(fields, P(namify(each::Union{Symbol,Expr}), last_docstr))
                     last_docstr = nothing
                 end
             elseif isexpr(each, :function) || isexpr(each, :(=))
@@ -354,7 +360,7 @@ function metadata(__source__, __module__, expr, ismodule)
             elseif isa(each, String) || isexpr(each, :string) || isexpr(each, :call) ||
                 (isexpr(each, :macrocall) && each.args[1] === Symbol("@doc_str"))
                 # forms that might be doc strings
-                last_docstr = each
+                last_docstr = each::Union{String,Expr}
             end
         end
         dict = :($(Dict{Symbol,Any})($([(:($(P)($(quot(f)), $d)))::Expr for (f, d) in fields]...)))
@@ -401,8 +407,7 @@ function moduledoc(__source__, __module__, meta, def, def′::Expr)
         def = unblock(def)
         block = def.args[3].args
         if !def.args[1]
-            isempty(block) && error("empty baremodules are not documentable.")
-            insert!(block, 2, :(import Base: @doc))
+            pushfirst!(block, :(import Base: @doc))
         end
         push!(block, docex)
         esc(Expr(:toplevel, def))
diff --git a/base/docs/basedocs.jl b/base/docs/basedocs.jl
index 9dcc50bc23eb0e..c547709ba2f07b 100644
--- a/base/docs/basedocs.jl
+++ b/base/docs/basedocs.jl
@@ -23,6 +23,9 @@ as well as many great tutorials and learning resources:
 For help on a specific function or macro, type `?` followed
 by its name, e.g. `?cos`, or `?@time`, and press enter.
 Type `;` to enter shell mode, `]` to enter package mode.
+
+To exit the interactive session, type `CTRL-D` (press the
+control key together with the `d` key), or type `exit()`.
 """
 kw"help", kw"Julia", kw"julia", kw""
 
@@ -70,7 +73,7 @@ abstract type Real <: Number end
 ```
 [`Number`](@ref) has no supertype, whereas [`Real`](@ref) is an abstract subtype of `Number`.
 """
-kw"abstract type"
+kw"abstract type", kw"abstract"
 
 """
     module
@@ -106,7 +109,7 @@ kw"module"
 `__init__()` function in your module would executes immediately *after* the module is loaded at
 runtime for the first time (i.e., it is only called once and only after all statements in the
 module have been executed). Because it is called *after* fully importing the module, `__init__`
-functions of submodules will be executed *first*. Two typical uses of __init__ are calling
+functions of submodules will be executed *first*. Two typical uses of `__init__` are calling
 runtime initialization functions of external C libraries and initializing global constants
 that involve pointers returned by external libraries.
 See the [manual section about modules](@ref modules) for more details.
@@ -127,7 +130,7 @@ kw"__init__"
     baremodule
 
 `baremodule` declares a module that does not contain `using Base` or local definitions of
-[`eval`](@ref Base.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
+[`eval`](@ref Base.MainInclude.eval) and [`include`](@ref Base.include). It does still import `Core`. In other words,
 
 ```julia
 module Mod
@@ -150,6 +153,7 @@ include(p) = Base.include(Mod, p)
 ...
 
 end
+```
 """
 kw"baremodule"
 
@@ -178,8 +182,8 @@ kw"primitive type"
 A macro maps a sequence of argument expressions to a returned expression, and the
 resulting expression is substituted directly into the program at the point where
 the macro is invoked.
-Macros are a way to run generated code without calling [`eval`](@ref Base.eval), since the generated
-code instead simply becomes part of the surrounding program.
+Macros are a way to run generated code without calling [`eval`](@ref Base.MainInclude.eval),
+since the generated code instead simply becomes part of the surrounding program.
 Macro arguments may include expressions, literal values, and symbols. Macros can be defined for
 variable number of arguments (varargs), but do not accept keyword arguments.
 Every macro also implicitly gets passed the arguments `__source__`, which contains the line number
@@ -207,6 +211,24 @@ Say: hey there friend
 """
 kw"macro"
 
+"""
+    __module__
+
+The argument `__module__` is only visible inside the macro, and it provides information
+(in the form of a `Module` object) about the expansion context of the macro invocation.
+See the manual section on [Macro invocation](@ref) for more information.
+"""
+kw"__module__"
+
+"""
+    __source__
+
+The argument `__source__` is only visible inside the macro, and it provides information
+(in the form of a `LineNumberNode` object) about the parser location of the `@` sign from
+the macro invocation. See the manual section on [Macro invocation](@ref) for more information.
+"""
+kw"__source__"
+
 """
     local
 
@@ -257,6 +279,19 @@ julia> z
 """
 kw"global"
 
+"""
+    ' '
+
+A pair of single-quote characters delimit a [`Char`](@ref) (that is, character) literal.
+
+# Examples
+```jldoctest
+julia> 'j'
+'j': ASCII/Unicode U+006A (category Ll: Letter, lowercase)
+```
+"""
+kw"''"
+
 """
     =
 
@@ -330,7 +365,7 @@ julia> push!(a, 2, 3)
 Assigning `[]` does not eliminate elements from a collection; instead use [`filter!`](@ref).
 ```jldoctest
 julia> a = collect(1:3); a[a .<= 1] = []
-ERROR: DimensionMismatch("tried to assign 0 elements to 1 destinations")
+ERROR: DimensionMismatch: tried to assign 0 elements to 1 destinations
 [...]
 
 julia> filter!(x -> x > 1, a) # in-place & thus more efficient than a = a[a .> 1]
@@ -395,21 +430,98 @@ kw"."
 """
     let
 
-`let` statements allocate new variable bindings each time they run. Whereas an
-assignment modifies an existing value location, `let` creates new locations. This
-difference is only detectable in the case of variables that outlive their scope via
-closures. The `let` syntax accepts a comma-separated series of assignments and variable
-names:
+`let` blocks create a new hard scope and optionally introduce new local bindings.
+
+Just like the [other scope constructs](@ref man-scope-table), `let` blocks define
+the block of code where newly introduced local variables are accessible.
+Additionally, the syntax has a special meaning for comma-separated assignments
+and variable names that may optionally appear on the same line as the `let`:
 
 ```julia
 let var1 = value1, var2, var3 = value3
     code
 end
 ```
-The assignments are evaluated in order, with each right-hand side evaluated in the scope
-before the new variable on the left-hand side has been introduced. Therefore it makes
-sense to write something like `let x = x`, since the two `x` variables are distinct and
-have separate storage.
+
+The variables introduced on this line are local to the `let` block and the assignments are
+evaluated in order, with each right-hand side evaluated in the scope
+without considering the name on the left-hand side. Therefore it makes
+sense to write something like `let x = x`, since the two `x` variables are distinct with
+the left-hand side locally shadowing the `x` from the outer scope. This can even
+be a useful idiom as new local variables are freshly created each time local scopes
+are entered, but this is only observable in the case of variables that outlive their
+scope via closures.
+
+By contrast, [`begin`](@ref) blocks also group multiple expressions together but do
+not introduce scope or have the special assignment syntax.
+
+### Examples
+
+In the function below, there is a single `x` that is iteratively updated three times by the `map`.
+The closures returned all reference that one `x` at its final value:
+
+```jldoctest
+julia> function test_outer_x()
+           x = 0
+           map(1:3) do _
+               x += 1
+               return ()->x
+           end
+       end
+test_outer_x (generic function with 1 method)
+
+julia> [f() for f in test_outer_x()]
+3-element Vector{Int64}:
+ 3
+ 3
+ 3
+```
+
+If, however, we add a `let` block that introduces a _new_ local variable we will end up
+with three distinct variables being captured (one at each iteration) even though we
+chose to use (shadow) the same name.
+
+```jldoctest
+julia> function test_let_x()
+           x = 0
+           map(1:3) do _
+               x += 1
+               let x = x
+                   return ()->x
+               end
+           end
+       end
+test_let_x (generic function with 1 method)
+
+julia> [f() for f in test_let_x()]
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
+
+All scope constructs that introduce new local variables behave this way
+when repeatedly run; the distinctive feature of `let` is its ability
+to succinctly declare new `local`s that may shadow outer variables of the same
+name. For example, directly using the argument of the `do` function similarly
+captures three distinct variables:
+
+```jldoctest
+julia> function test_do_x()
+           map(1:3) do x
+               return ()->x
+           end
+       end
+test_do_x (generic function with 1 method)
+
+julia> [f() for f in test_do_x()]
+3-element Vector{Int64}:
+ 1
+ 2
+ 3
+```
+
+
 """
 kw"let"
 
@@ -432,6 +544,18 @@ For other purposes, `:( ... )` and `quote .. end` blocks are treated identically
 """
 kw"quote"
 
+"""
+    @
+
+The at sign followed by a macro name marks a macro call. Macros provide the
+ability to include generated code in the final body of a program. A macro maps
+a tuple of arguments, expressed as space-separated expressions or a
+function-call-like argument list, to a returned *expression*. The resulting
+expression is compiled directly into the surrounding code. See
+[Metaprogramming](@ref man-macros) for more details and examples.
+"""
+kw"@"
+
 """
     {}
 
@@ -582,6 +706,32 @@ the last expression in the function body.
 """
 kw"function"
 
+"""
+    x -> y
+
+Create an anonymous function mapping argument(s) `x` to the function body `y`.
+
+```jldoctest
+julia> f = x -> x^2 + 2x - 1
+#1 (generic function with 1 method)
+
+julia> f(2)
+7
+```
+
+Anonymous functions can also be defined for multiple argumets.
+```jldoctest
+julia> g = (x,y) -> x^2 + y^2
+#2 (generic function with 1 method)
+
+julia> g(2,3)
+13
+```
+
+See the manual section on [anonymous functions](@ref man-anonymous-functions) for more details.
+"""
+kw"->"
+
 """
     return
 
@@ -646,6 +796,13 @@ otherwise the condition expression `x > y` is evaluated, and if it is true, the
 corresponding block is evaluated; if neither expression is true, the `else` block is
 evaluated. The `elseif` and `else` blocks are optional, and as many `elseif` blocks as
 desired can be used.
+
+In contrast to some other languages conditions must be of type `Bool`. It does not
+suffice for conditions to be convertible to `Bool`.
+```jldoctest
+julia> if 1 end
+ERROR: TypeError: non-boolean (Int64) used in boolean context
+```
 """
 kw"if", kw"elseif", kw"else"
 
@@ -665,7 +822,7 @@ See the manual section on [control flow](@ref man-conditional-evaluation) for mo
 ```
 julia> x = 1; y = 2;
 
-julia> println(x > y ? "x is larger" : "y is larger")
+julia> x > y ? println("x is larger") : println("y is larger")
 y is larger
 ```
 """
@@ -719,8 +876,9 @@ kw"while"
 `end` marks the conclusion of a block of expressions, for example
 [`module`](@ref), [`struct`](@ref), [`mutable struct`](@ref),
 [`begin`](@ref), [`let`](@ref), [`for`](@ref) etc.
-`end` may also be used when indexing into an array to represent
-the last index of a dimension.
+
+`end` may also be used when indexing to represent the last index of a
+collection or the last index of a dimension of an array.
 
 # Examples
 ```jldoctest
@@ -895,12 +1053,22 @@ kw"..."
     ;
 
 `;` has a similar role in Julia as in many C-like languages, and is used to delimit the
-end of the previous statement. `;` is not necessary after new lines, but can be used to
+end of the previous statement.
+
+`;` is not necessary at the end of a line, but can be used to
 separate statements on a single line or to join statements into a single expression.
-`;` is also used to suppress output printing in the REPL and similar interfaces.
+
+Adding `;` at the end of a line in the REPL will suppress printing the result of that expression.
+
+In function declarations, and optionally in calls, `;` separates regular arguments from keywords.
+
+While constructing arrays, if the arguments inside the square brackets are separated by `;`
+then their contents are vertically concatenated together.
+
+In the standard REPL, typing `;` on an empty line will switch to shell mode.
 
 # Examples
-```julia
+```jldoctest
 julia> function foo()
            x = "Hello, "; x *= "World!"
            return x
@@ -914,6 +1082,19 @@ julia> foo();
 
 julia> bar()
 "Hello, Mars!"
+
+julia> function plot(x, y; style="solid", width=1, color="black")
+           ###
+       end
+
+julia> [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> ; # upon typing ;, the prompt changes (in place) to: shell>
+shell> echo hello
+hello
 ```
 """
 kw";"
@@ -922,6 +1103,19 @@ kw";"
     x && y
 
 Short-circuiting boolean AND.
+
+See also [`&`](@ref), the ternary operator `? :`, and the manual section on [control flow](@ref man-conditional-evaluation).
+
+# Examples
+```jldoctest
+julia> x = 3;
+
+julia> x > 1 && x < 10 && x isa Int
+true
+
+julia> x < 0 && error("expected positive x")
+false
+```
 """
 kw"&&"
 
@@ -929,6 +1123,17 @@ kw"&&"
     x || y
 
 Short-circuiting boolean OR.
+
+See also: [`|`](@ref), [`xor`](@ref), [`&&`](@ref).
+
+# Examples
+```jldoctest
+julia> pi < 3 || ℯ < 3
+true
+
+julia> false || true || println("neither is true!")
+true
+```
 """
 kw"||"
 
@@ -965,7 +1170,7 @@ first argument:
   with arguments are available as consecutive unnamed SSA variables (%0, %1, etc.);
 - as a 2-element tuple, containing a string of module IR and a string representing the name
   of the entry-point function to call;
-- as a 2-element tuple, but with the module provided as an `Vector{UINt8}` with bitcode.
+- as a 2-element tuple, but with the module provided as an `Vector{UInt8}` with bitcode.
 
 Note that contrary to `ccall`, the argument types must be specified as a tuple type, and not
 a tuple of types. All types, as well as the LLVM code, should be specified as literals, and
@@ -990,6 +1195,22 @@ end
 
 Usually `begin` will not be necessary, since keywords such as [`function`](@ref) and [`let`](@ref)
 implicitly begin blocks of code. See also [`;`](@ref).
+
+`begin` may also be used when indexing to represent the first index of a
+collection or the first index of a dimension of an array.
+
+# Examples
+```jldoctest
+julia> A = [1 2; 3 4]
+2×2 Array{Int64,2}:
+ 1  2
+ 3  4
+
+julia> A[begin, :]
+2-element Array{Int64,1}:
+ 1
+ 2
+```
 """
 kw"begin"
 
@@ -1043,10 +1264,10 @@ fields of the type to be set after construction. See the manual section on
 kw"mutable struct"
 
 """
-    new
+    new, or new{A,B,...}
 
-Special function available to inner constructors which created a new object
-of the type.
+Special function available to inner constructors which creates a new object
+of the type. The form new{A,B,...} explicitly specifies values of parameters for parametric types.
 See the manual section on [Inner Constructor Methods](@ref man-inner-constructor-methods)
 for more information.
 """
@@ -1136,6 +1357,8 @@ devnull
     Nothing
 
 A type with no fields that is the type of [`nothing`](@ref).
+
+See also: [`isnothing`](@ref), [`Some`](@ref), [`Missing`](@ref).
 """
 Nothing
 
@@ -1144,6 +1367,8 @@ Nothing
 
 The singleton instance of type [`Nothing`](@ref), used by convention when there is no value to return
 (as in a C `void` function) or when a variable or field holds no value.
+
+See also: [`isnothing`](@ref), [`something`](@ref), [`missing`](@ref).
 """
 nothing
 
@@ -1215,7 +1440,7 @@ julia> isa(+, Function)
 true
 
 julia> typeof(sin)
-typeof(sin)
+typeof(sin) (singleton type of function sin, subtype of Function)
 
 julia> ans <: Function
 true
@@ -1384,8 +1609,9 @@ DomainError
 """
     Task(func)
 
-Create a `Task` (i.e. coroutine) to execute the given function `func` (which must be
-callable with no arguments). The task exits when this function returns.
+Create a `Task` (i.e. coroutine) to execute the given function `func` (which
+must be callable with no arguments). The task exits when this function returns.
+The task will run in the "world age" from the parent at construction when [`schedule`](@ref)d.
 
 # Examples
 ```jldoctest
@@ -1686,6 +1912,8 @@ NaN
 julia> false * NaN
 0.0
 ```
+
+See also: [`digits`](@ref), [`iszero`](@ref), [`NaN`](@ref).
 """
 Bool
 
@@ -1772,19 +2000,31 @@ Symbol(x...)
 
 Construct a tuple of the given objects.
 
+See also [`Tuple`](@ref), [`NamedTuple`](@ref).
+
 # Examples
 ```jldoctest
-julia> tuple(1, 'a', pi)
-(1, 'a', π)
+julia> tuple(1, 'b', pi)
+(1, 'b', π)
+
+julia> ans === (1, 'b', π)
+true
+
+julia> Tuple(Real[1, 2, pi])  # takes a collection
+(1, 2, π)
 ```
 """
 tuple
 
 """
-    getfield(value, name::Symbol)
-    getfield(value, i::Int)
+    getfield(value, name::Symbol, [order::Symbol])
+    getfield(value, i::Int, [order::Symbol])
 
-Extract a field from a composite `value` by name or position.
+Extract a field from a composite `value` by name or position. Optionally, an
+ordering can be defined for the operation. If the field was declared `@atomic`,
+the specification is strongly recommended to be compatible with the stores to
+that location. Otherwise, if not declared as `@atomic`, this parameter must be
+`:not_atomic` if specified.
 See also [`getproperty`](@ref Base.getproperty) and [`fieldnames`](@ref).
 
 # Examples
@@ -1805,10 +2045,14 @@ julia> getfield(a, 1)
 getfield
 
 """
-    setfield!(value, name::Symbol, x)
+    setfield!(value, name::Symbol, x, [order::Symbol])
+    setfield!(value, i::Int, x, [order::Symbol])
 
-Assign `x` to a named field in `value` of composite type.
-The `value` must be mutable and `x` must be a subtype of `fieldtype(typeof(value), name)`.
+Assign `x` to a named field in `value` of composite type. The `value` must be
+mutable and `x` must be a subtype of `fieldtype(typeof(value), name)`.
+Additionally, an ordering can be specified for this operation. If the field was
+declared `@atomic`, this specification is mandatory. Otherwise, if not declared
+as `@atomic`, it must be `:not_atomic` if specified.
 See also [`setproperty!`](@ref Base.setproperty!).
 
 # Examples
@@ -1828,16 +2072,68 @@ julia> a = 1//2
 1//2
 
 julia> setfield!(a, :num, 3);
-ERROR: setfield! immutable struct of type Rational cannot be changed
+ERROR: setfield!: immutable struct of type Rational cannot be changed
 ```
 """
 setfield!
 
+"""
+    swapfield!(value, name::Symbol, x, [order::Symbol])
+    swapfield!(value, i::Int, x, [order::Symbol])
+
+These atomically perform the operations to simultaneously get and set a field:
+
+    y = getfield(value, name)
+    setfield!(value, name, x)
+    return y
+"""
+swapfield!
+
+"""
+    modifyfield!(value, name::Symbol, op, x, [order::Symbol]) -> Pair
+    modifyfield!(value, i::Int, op, x, [order::Symbol]) -> Pair
+
+These atomically perform the operations to get and set a field after applying
+the function `op`.
+
+    y = getfield(value, name)
+    z = op(y, x)
+    setfield!(value, name, z)
+    return y => z
+
+If supported by the hardware (for example, atomic increment), this may be
+optimized to the appropriate hardware instruction, otherwise it'll use a loop.
+"""
+modifyfield!
+
+"""
+    replacefield!(value, name::Symbol, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+    replacefield!(value, i::Int, expected, desired,
+                  [success_order::Symbol, [fail_order::Symbol=success_order]) -> (; old, success::Bool)
+
+These atomically perform the operations to get and conditionally set a field to
+a given value.
+
+    y = getfield(value, name, fail_order)
+    ok = y === expected
+    if ok
+        setfield!(value, name, desired, success_order)
+    end
+    return (; old = y, success = ok)
+
+If supported by the hardware, this may be optimized to the appropriate hardware
+instruction, otherwise it'll use a loop.
+"""
+replacefield!
+
 """
     typeof(x)
 
 Get the concrete type of `x`.
 
+See also [`eltype`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1//2;
@@ -1854,12 +2150,16 @@ Matrix{Float64} (alias for Array{Float64, 2})
 typeof
 
 """
-    isdefined(m::Module, s::Symbol)
-    isdefined(object, s::Symbol)
-    isdefined(object, index::Int)
+    isdefined(m::Module, s::Symbol, [order::Symbol])
+    isdefined(object, s::Symbol, [order::Symbol])
+    isdefined(object, index::Int, [order::Symbol])
 
-Tests whether a global variable or object field is defined. The arguments can be a module and a symbol
-or a composite object and field name (as a symbol) or index.
+Tests whether a global variable or object field is defined. The arguments can
+be a module and a symbol or a composite object and field name (as a symbol) or
+index. Optionally, an ordering can be defined for the operation. If the field
+was declared `@atomic`, the specification is strongly recommended to be
+compatible with the stores to that location. Otherwise, if not declared as
+`@atomic`, this parameter must be `:not_atomic` if specified.
 
 To test whether an array element is defined, use [`isassigned`](@ref) instead.
 
@@ -1894,7 +2194,7 @@ isdefined
 """
     Vector{T}(undef, n)
 
-Construct an uninitialized [`Vector{T}`](@ref) of length `n`. See [`undef`](@ref).
+Construct an uninitialized [`Vector{T}`](@ref) of length `n`.
 
 # Examples
 ```julia-repl
@@ -1944,14 +2244,19 @@ Vector{T}(::Missing, n)
 """
     Matrix{T}(undef, m, n)
 
-Construct an uninitialized [`Matrix{T}`](@ref) of size `m`×`n`. See [`undef`](@ref).
+Construct an uninitialized [`Matrix{T}`](@ref) of size `m`×`n`.
 
 # Examples
 ```julia-repl
 julia> Matrix{Float64}(undef, 2, 3)
 2×3 Array{Float64, 2}:
- 6.93517e-310  6.93517e-310  6.93517e-310
- 6.93517e-310  6.93517e-310  1.29396e-320
+ 2.36365e-314  2.28473e-314    5.0e-324
+ 2.26704e-314  2.26711e-314  NaN
+
+julia> similar(ans, Int32, 2, 2)
+2×2 Matrix{Int32}:
+ 490537216  1277177453
+         1  1936748399
 ```
 """
 Matrix{T}(::UndefInitializer, m, n)
@@ -1999,19 +2304,28 @@ containing elements of type `T`. `N` can either be supplied explicitly,
 as in `Array{T,N}(undef, dims)`, or be determined by the length or number of `dims`.
 `dims` may be a tuple or a series of integer arguments corresponding to the lengths
 in each dimension. If the rank `N` is supplied explicitly, then it must
-match the length or number of `dims`. See [`undef`](@ref).
+match the length or number of `dims`. Here [`undef`](@ref) is
+the [`UndefInitializer`](@ref).
 
 # Examples
 ```julia-repl
 julia> A = Array{Float64, 2}(undef, 2, 3) # N given explicitly
-2×3 Array{Float64, 2}:
+2×3 Matrix{Float64}:
  6.90198e-310  6.90198e-310  6.90198e-310
  6.90198e-310  6.90198e-310  0.0
 
-julia> B = Array{Float64}(undef, 2) # N determined by the input
-2-element Array{Float64, 1}:
- 1.87103e-320
- 0.0
+julia> B = Array{Float64}(undef, 4) # N determined by the input
+4-element Vector{Float64}:
+   2.360075077e-314
+ NaN
+   2.2671131793e-314
+   2.299821756e-314
+
+julia> similar(B, 2, 4, 1) # use typeof(B), and the given size
+2×4×1 Array{Float64, 3}:
+[:, :, 1] =
+ 2.26703e-314  2.26708e-314  0.0           2.80997e-314
+ 0.0           2.26703e-314  2.26708e-314  0.0
 ```
 """
 Array{T,N}(::UndefInitializer, dims)
@@ -2088,10 +2402,12 @@ Alias for `UndefInitializer()`, which constructs an instance of the singleton ty
 [`UndefInitializer`](@ref), used in array initialization to indicate the
 array-constructor-caller would like an uninitialized array.
 
+See also: [`missing`](@ref), [`similar`](@ref).
+
 # Examples
 ```julia-repl
 julia> Array{Float64, 1}(undef, 3)
-3-element Array{Float64, 1}:
+3-element Vector{Float64}:
  2.2752528595e-314
  2.202942107e-314
  2.275252907e-314
@@ -2127,6 +2443,8 @@ julia> +(1, 20, 4)
 
 Unary minus operator.
 
+See also: [`abs`](@ref), [`flipsign`](@ref).
+
 # Examples
 ```jldoctest
 julia> -1
@@ -2198,8 +2516,8 @@ julia> 4.5/2
 """
     ArgumentError(msg)
 
-The parameters to a function call do not match a valid signature. Argument `msg` is a
-descriptive error string.
+The arguments passed to a function are invalid.
+`msg` is a descriptive error message.
 """
 ArgumentError
 
@@ -2232,6 +2550,9 @@ AssertionError
 
 An error occurred while [`include`](@ref Base.include)ing, [`require`](@ref Base.require)ing, or [`using`](@ref) a file. The error specifics
 should be available in the `.error` field.
+
+!!! compat "Julia 1.7"
+    LoadErrors are no longer emitted by `@macroexpand`, `@macroexpand1`, and `macroexpand` as of Julia 1.7.
 """
 LoadError
 
@@ -2278,14 +2599,14 @@ union [`Union{}`](@ref) is the bottom type of Julia.
 julia> IntOrString = Union{Int,AbstractString}
 Union{Int64, AbstractString}
 
-julia> 1 :: IntOrString
-1
+julia> 1 isa IntOrString
+true
 
-julia> "Hello!" :: IntOrString
-"Hello!"
+julia> "Hello!" isa IntOrString
+true
 
-julia> 1.0 :: IntOrString
-ERROR: TypeError: in typeassert, expected Union{Int64, AbstractString}, got a value of type Float64
+julia> 1.0 isa IntOrString
+false
 ```
 """
 Union
@@ -2311,10 +2632,20 @@ UnionAll
 """
     ::
 
-With the `::`-operator type annotations are attached to expressions and variables in programs.
-See the manual section on [Type Declarations](@ref).
+The `::` operator either asserts that a value has the given type, or declares that
+a local variable or function return always has the given type.
+
+Given `expression::T`, `expression` is first evaluated. If the result is of type
+`T`, the value is simply returned. Otherwise, a [`TypeError`](@ref) is thrown.
+
+In local scope, the syntax `local x::T` or `x::T = expression` declares that local variable
+`x` always has type `T`. When a value is assigned to the variable, it will be
+converted to type `T` by calling [`convert`](@ref).
 
-Outside of declarations `::` is used to assert that expressions and variables in programs have a given type.
+In a method declaration, the syntax `function f(x)::T` causes any value returned by
+the method to be converted to type `T`.
+
+See the manual section on [Type Declarations](@ref).
 
 # Examples
 ```jldoctest
@@ -2323,6 +2654,13 @@ ERROR: TypeError: typeassert: expected AbstractFloat, got a value of type Int64
 
 julia> (1+2)::Int
 3
+
+julia> let
+           local x::Int
+           x = 2.0
+           x
+       end
+2
 ```
 """
 kw"::"
@@ -2330,15 +2668,17 @@ kw"::"
 """
     Vararg{T,N}
 
-The last parameter of a tuple type [`Tuple`](@ref) can be the special type `Vararg`, which denotes any
-number of trailing elements. The type `Vararg{T,N}` corresponds to exactly `N` elements of type `T`.
+The last parameter of a tuple type [`Tuple`](@ref) can be the special value `Vararg`, which denotes any
+number of trailing elements. `Vararg{T,N}` corresponds to exactly `N` elements of type `T`. Finally
 `Vararg{T}` corresponds to zero or more elements of type `T`. `Vararg` tuple types are used to represent the
 arguments accepted by varargs methods (see the section on [Varargs Functions](@ref) in the manual.)
 
+See also [`NTuple`](@ref).
+
 # Examples
 ```jldoctest
 julia> mytupletype = Tuple{AbstractString, Vararg{Int}}
-Tuple{AbstractString, Vararg{Int64, N} where N}
+Tuple{AbstractString, Vararg{Int64}}
 
 julia> isa(("1",), mytupletype)
 true
@@ -2367,6 +2707,8 @@ is considered an abstract type, and tuple types are only concrete if their param
 field names; fields are only accessed by index.
 
 See the manual section on [Tuple Types](@ref).
+
+See also [`Vararg`](@ref), [`NTuple`](@ref), [`tuple`](@ref), [`NamedTuple`](@ref).
 """
 Tuple
 
@@ -2422,8 +2764,11 @@ typeassert
 
 """
     getproperty(value, name::Symbol)
+    getproperty(value, name::Symbol, order::Symbol)
 
 The syntax `a.b` calls `getproperty(a, :b)`.
+The syntax `@atomic order a.b` calls `getproperty(a, :b, :order)` and
+the syntax `@atomic a.b` calls `getproperty(a, :b, :sequentially_consistent)`.
 
 # Examples
 ```jldoctest
@@ -2448,21 +2793,65 @@ julia> obj.x
 1
 ```
 
-See also [`propertynames`](@ref Base.propertynames) and
+See also [`getfield`](@ref Core.getfield),
+[`propertynames`](@ref Base.propertynames) and
 [`setproperty!`](@ref Base.setproperty!).
 """
 Base.getproperty
 
 """
     setproperty!(value, name::Symbol, x)
+    setproperty!(value, name::Symbol, x, order::Symbol)
 
 The syntax `a.b = c` calls `setproperty!(a, :b, c)`.
+The syntax `@atomic order a.b = c` calls `setproperty!(a, :b, c, :order)`
+and the syntax `@atomic a.b = c` calls `getproperty(a, :b, :sequentially_consistent)`.
+
+!!! compat "Julia 1.8"
+    `setproperty!` on modules requires at least Julia 1.8.
 
-See also [`propertynames`](@ref Base.propertynames) and
+See also [`setfield!`](@ref Core.setfield!),
+[`propertynames`](@ref Base.propertynames) and
 [`getproperty`](@ref Base.getproperty).
 """
 Base.setproperty!
 
+"""
+    swapproperty!(x, f::Symbol, v, order::Symbol=:not_atomic)
+
+The syntax `@atomic a.b, _ = c, a.b` returns `(c, swapproperty!(a, :b, c, :sequentially_consistent))`,
+where there must be one getfield expression common to both sides.
+
+See also [`swapfield!`](@ref Core.swapfield!)
+and [`setproperty!`](@ref Base.setproperty!).
+"""
+Base.swapproperty!
+
+"""
+    modifyproperty!(x, f::Symbol, op, v, order::Symbol=:not_atomic)
+
+The syntax `@atomic max(a().b, c)` returns `modifyproperty!(a(), :b,
+max, c, :sequentially_consistent))`, where the first argument must be a
+`getfield` expression and is modified atomically.
+
+See also [`modifyfield!`](@ref Core.modifyfield!)
+and [`setproperty!`](@ref Base.setproperty!).
+"""
+Base.modifyproperty!
+
+"""
+    replaceproperty!(x, f::Symbol, expected, desired, success_order::Symbol=:not_atomic, fail_order::Symbol=success_order)
+
+Perform a compare-and-swap operation on `x.f` from `expected` to `desired`, per
+egal. The syntax `@atomic_replace! x.f expected => desired` can be used instead
+of the function call form.
+
+See also [`replacefield!`](@ref Core.replacefield!)
+and [`setproperty!`](@ref Base.setproperty!).
+"""
+Base.replaceproperty!
+
+
 """
     StridedArray{T, N}
 
@@ -2503,6 +2892,13 @@ StridedVecOrMat
     Module
 
 A `Module` is a separate global variable workspace. See [`module`](@ref) and the [manual section about modules](@ref modules) for details.
+
+    Module(name::Symbol=:anonymous, std_imports=true, default_names=true)
+
+Return a module with the specified name. A `baremodule` corresponds to `Module(:ModuleName, false)`
+
+An empty module containing no names at all can be created with `Module(:ModuleName, false, false)`.
+This module will not import `Base` or `Core` and does not contain a reference to itself.
 """
 Module
 
@@ -2538,4 +2934,81 @@ A quoted piece of code, that does not support interpolation. See the [manual sec
 """
 QuoteNode
 
+
+"""
+    "
+`"` Is used to delimit string literals.
+
+# Examples
+
+```jldoctest
+julia> "Hello World!"
+"Hello World!"
+
+julia> "Hello World!\\n"
+"Hello World!\\n"
+```
+
+See also [`\"""`](@ref \"\"\").
+"""
+kw"\""
+
+"""
+    \"""
+`\"""` is used to delimit string literals. Strings created by triple quotation marks can contain `"` characters without escaping and are dedented to the level of the least-indented line. This is useful for defining strings within code that is indented.
+
+# Examples
+
+```jldoctest
+julia> \"""Hello World!\"""
+"Hello World!"
+
+julia> \"""Contains "quote" characters\"""
+"Contains \\"quote\\" characters"
+
+julia> \"""
+         Hello,
+         world.\"""
+"Hello,\\nworld."
+```
+
+See also [`"`](@ref \")
+"""
+kw"\"\"\""
+
+"""
+    donotdelete(args...)
+
+This function prevents dead-code elimination (DCE) of itself and any arguments
+passed to it, but is otherwise the lightest barrier possible. In particular,
+it is not a GC safepoint, does model an observable heap effect, does not expand
+to any code itself and may be re-ordered with respect to other side effects
+(though the total number of executions may not change).
+
+A useful model for this function is that it hashes all memory `reachable` from
+args and escapes this information through some observable side-channel that does
+not otherwise impact program behavior. Of course that's just a model. The
+function does nothing and returns `nothing`.
+
+This is intended for use in benchmarks that want to guarantee that `args` are
+actually computed. (Otherwise DCE may see that the result of the benchmark is
+unused and delete the entire benchmark code).
+
+**Note**: `donotdelete` does not affect constant folding. For example, in
+          `donotdelete(1+1)`, no add instruction needs to be executed at runtime and
+          the code is semantically equivalent to `donotdelete(2).`
+
+# Examples
+
+function loop()
+    for i = 1:1000
+        # The complier must guarantee that there are 1000 program points (in the correct
+        # order) at which the value of `i` is in a register, but has otherwise
+        # total control over the program.
+        donotdelete(i)
+    end
+end
+"""
+Base.donotdelete
+
 end
diff --git a/base/docs/utils.jl b/base/docs/utils.jl
index b1f3327086808c..928dfde01ccf00 100644
--- a/base/docs/utils.jl
+++ b/base/docs/utils.jl
@@ -18,6 +18,13 @@ You can also use a stream for large amounts of data:
     HTML() do io
       println(io, "<div>foo</div>")
     end
+
+!!! warning
+    `HTML` is currently exported to maintain
+    backwards compatibility, but this export
+    is deprecated. It is recommended to use
+    this type as `Docs.HTML` or to explicitly
+    import it from `Docs`.
 """
 mutable struct HTML{T}
     content::T
@@ -38,6 +45,12 @@ show(io::IO, ::MIME"text/html", h::HTML{<:Function}) = h.content(io)
     @html_str -> Docs.HTML
 
 Create an `HTML` object from a literal string.
+
+# Examples
+```jldoctest
+julia> html"Julia"
+HTML{String}("Julia")
+```
 """
 macro html_str(s)
     :(HTML($s))
@@ -63,6 +76,13 @@ You can also use a stream for large amounts of data:
     Text() do io
       println(io, "foo")
     end
+
+!!! warning
+    `Text` is currently exported to maintain
+    backwards compatibility, but this export
+    is deprecated. It is recommended to use
+    this type as `Docs.Text` or to explicitly
+    import it from `Docs`.
 """
 mutable struct Text{T}
     content::T
@@ -79,6 +99,12 @@ hash(t::T, h::UInt) where {T<:Union{HTML,Text}} = hash(T, hash(t.content, h))
     @text_str -> Docs.Text
 
 Create a `Text` object from a literal string.
+
+# Examples
+```jldoctest
+julia> text"Julia"
+Julia
+```
 """
 macro text_str(s)
     :(Text($s))
diff --git a/base/download.jl b/base/download.jl
index 60f4823c7a4e8d..59cbadcb2f6afb 100644
--- a/base/download.jl
+++ b/base/download.jl
@@ -1,14 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-const DOWNLOAD_HOOKS = Callable[]
-
-function download_url(url::AbstractString)
-    for hook in DOWNLOAD_HOOKS
-        url = String(hook(url)::AbstractString)
-    end
-    return url
-end
-
 Downloads() = require(PkgId(
         UUID((0xf43a241f_c20a_4ad4, 0x852c_f6b1247861c6)),
         "Downloads",
@@ -25,9 +16,10 @@ specified, a temporary path. Returns the path of the downloaded file.
     around `Downloads.download`. In new code, you should use that function
     directly instead of calling this.
 """
-function download(url::AbstractString, path::AbstractString)
-    invokelatest(Downloads().download, download_url(url), path)
-end
-function download(url::AbstractString)
-    invokelatest(Downloads().download, download_url(url))
+download(url::AbstractString, path::AbstractString) = do_download(url, path)
+download(url::AbstractString) = do_download(url, nothing)
+
+function do_download(url::AbstractString, path::Union{AbstractString, Nothing})
+    depwarn("Base.download is deprecated; use Downloads.download instead", :download)
+    invokelatest(Downloads().download, url, path)
 end
diff --git a/base/env.jl b/base/env.jl
index 8f5256f25915ee..4fdc02e582a4c6 100644
--- a/base/env.jl
+++ b/base/env.jl
@@ -32,7 +32,7 @@ if Sys.iswindows()
     function _unsetenv(svar::AbstractString)
         var = cwstring(svar)
         ret = ccall(:SetEnvironmentVariableW,stdcall,Int32,(Ptr{UInt16},Ptr{UInt16}),var,C_NULL)
-        windowserror(:setenv, ret == 0)
+        windowserror(:setenv, ret == 0 && Libc.GetLastError() != ERROR_ENVVAR_NOT_FOUND)
     end
 else # !windows
     _getenv(var::AbstractString) = ccall(:getenv, Cstring, (Cstring,), var)
@@ -77,7 +77,7 @@ variable may result in an uppercase `ENV` key.)
 const ENV = EnvDict()
 
 getindex(::EnvDict, k::AbstractString) = access_env(k->throw(KeyError(k)), k)
-get(::EnvDict, k::AbstractString, def) = access_env(k->def, k)
+get(::EnvDict, k::AbstractString, def) = access_env(Returns(def), k)
 get(f::Callable, ::EnvDict, k::AbstractString) = access_env(k->f(), k)
 in(k::AbstractString, ::KeySet{String, EnvDict}) = _hasenv(k)
 pop!(::EnvDict, k::AbstractString) = (v = ENV[k]; _unsetenv(k); v)
@@ -87,7 +87,7 @@ setindex!(::EnvDict, v, k::AbstractString) = _setenv(k,string(v))
 push!(::EnvDict, kv::Pair{<:AbstractString}) = setindex!(ENV, kv.second, kv.first)
 
 if Sys.iswindows()
-    GESW() = (pos = ccall(:GetEnvironmentStringsW,stdcall,Ptr{UInt16},()); (pos,pos))
+    GESW() = (pos = ccall(:GetEnvironmentStringsW, stdcall, Ptr{UInt16}, ()); (pos, pos))
     function winuppercase(s::AbstractString)
         isempty(s) && return s
         LOCALE_INVARIANT = 0x0000007f
@@ -99,32 +99,43 @@ if Sys.iswindows()
         return transcode(String, ws)
     end
     function iterate(hash::EnvDict, block::Tuple{Ptr{UInt16},Ptr{UInt16}} = GESW())
-        if unsafe_load(block[1]) == 0
-            ccall(:FreeEnvironmentStringsW, stdcall, Int32, (Ptr{UInt16},), block[2])
-            return nothing
+        while true
+            if unsafe_load(block[1]) == 0
+                ccall(:FreeEnvironmentStringsW, stdcall, Int32, (Ptr{UInt16},), block[2])
+                return nothing
+            end
+            pos = block[1]
+            blk = block[2]
+            len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
+            buf = Vector{UInt16}(undef, len)
+            GC.@preserve buf unsafe_copyto!(pointer(buf), pos, len)
+            env = transcode(String, buf)
+            pos += (len + 1) * 2
+            if !isempty(env)
+                m = findnext('=', env, nextind(env, firstindex(env)))
+            else
+                m = nothing
+            end
+            if m === nothing
+                @warn "malformed environment entry: $env"
+                continue
+            end
+            return (Pair{String,String}(winuppercase(env[1:prevind(env, m)]), env[nextind(env, m):end]), (pos, blk))
         end
-        pos = block[1]
-        blk = block[2]
-        len = ccall(:wcslen, UInt, (Ptr{UInt16},), pos)
-        buf = Vector{UInt16}(undef, len)
-        GC.@preserve buf unsafe_copyto!(pointer(buf), pos, len)
-        env = transcode(String, buf)
-        m = match(r"^(=?[^=]+)=(.*)$"s, env)
-        if m === nothing
-            error("malformed environment entry: $env")
-        end
-        return (Pair{String,String}(winuppercase(m.captures[1]), m.captures[2]), (pos+(len+1)*2, blk))
     end
 else # !windows
     function iterate(::EnvDict, i=0)
-        env = ccall(:jl_environ, Any, (Int32,), i)
-        env === nothing && return nothing
-        env = env::String
-        m = match(r"^(.*?)=(.*)$"s, env)
-        if m === nothing
-            error("malformed environment entry: $env")
+        while true
+            env = ccall(:jl_environ, Any, (Int32,), i)
+            env === nothing && return nothing
+            env = env::String
+            m = findfirst('=', env)
+            if m === nothing
+                @warn "malformed environment entry: $env"
+                nothing
+            end
+            return (Pair{String,String}(env[1:prevind(env, m)], env[nextind(env, m):end]), i+1)
         end
-        return (Pair{String,String}(m.captures[1], m.captures[2]), i+1)
     end
 end # os-test
 
@@ -144,7 +155,7 @@ function show(io::IO, ::EnvDict)
 end
 
 """
-    withenv(f::Function, kv::Pair...)
+    withenv(f, kv::Pair...)
 
 Execute `f` in an environment that is temporarily modified (not replaced as in `setenv`)
 by zero or more `"var"=>val` arguments `kv`. `withenv` is generally used via the
@@ -152,7 +163,7 @@ by zero or more `"var"=>val` arguments `kv`. `withenv` is generally used via the
 environment variable (if it is set). When `withenv` returns, the original environment has
 been restored.
 """
-function withenv(f::Function, keyvals::Pair{T}...) where T<:AbstractString
+function withenv(f, keyvals::Pair{T}...) where T<:AbstractString
     old = Dict{T,Any}()
     for (key,val) in keyvals
         old[key] = get(ENV,key,nothing)
@@ -165,4 +176,4 @@ function withenv(f::Function, keyvals::Pair{T}...) where T<:AbstractString
         end
     end
 end
-withenv(f::Function) = f() # handle empty keyvals case; see #10853
+withenv(f) = f() # handle empty keyvals case; see #10853
diff --git a/base/error.jl b/base/error.jl
index 16b66af68be12d..4459e54def19b5 100644
--- a/base/error.jl
+++ b/base/error.jl
@@ -20,6 +20,8 @@
     throw(e)
 
 Throw an object as an exception.
+
+See also: [`rethrow`](@ref), [`error`](@ref).
 """
 throw
 
@@ -38,7 +40,7 @@ error(s::AbstractString) = throw(ErrorException(s))
 Raise an `ErrorException` with the given message.
 """
 function error(s::Vararg{Any,N}) where {N}
-    @_noinline_meta
+    @noinline
     throw(ErrorException(Main.Base.string(s...)))
 end
 
@@ -54,10 +56,10 @@ exception will continue propagation as if it had not been caught.
     the program state at the time of the error so you're encouraged to instead
     throw a new exception using `throw(e)`. In Julia 1.1 and above, using
     `throw(e)` will preserve the root cause exception on the stack, as
-    described in [`catch_stack`](@ref).
+    described in [`current_exceptions`](@ref).
 """
 rethrow() = ccall(:jl_rethrow, Bottom, ())
-rethrow(e) = ccall(:jl_rethrow_other, Bottom, (Any,), e)
+rethrow(@nospecialize(e)) = ccall(:jl_rethrow_other, Bottom, (Any,), e)
 
 struct InterpreterIP
     code::Union{CodeInfo,Core.MethodInstance,Nothing}
@@ -105,7 +107,7 @@ end
 Get a backtrace object for the current program point.
 """
 function backtrace()
-    @_noinline_meta
+    @noinline
     # skip frame for backtrace(). Note that for this to work properly,
     # backtrace() itself must not be interpreted nor inlined.
     skip = 1
@@ -123,37 +125,43 @@ function catch_backtrace()
     return _reformat_bt(bt::Vector{Ptr{Cvoid}}, bt2::Vector{Any})
 end
 
+struct ExceptionStack <: AbstractArray{Any,1}
+    stack::Array{Any,1}
+end
+
 """
-    catch_stack(task=current_task(); [inclue_bt=true])
+    current_exceptions(task::Task=current_task(); [backtrace::Bool=true])
 
 Get the stack of exceptions currently being handled. For nested catch blocks
 there may be more than one current exception in which case the most recently
-thrown exception is last in the stack. The stack is returned as a Vector of
-`(exception,backtrace)` pairs, or a Vector of exceptions if `include_bt` is
-false.
+thrown exception is last in the stack. The stack is returned as an
+`ExceptionStack` which is an AbstractVector of named tuples
+`(exception,backtrace)`. If `backtrace` is false, the backtrace in each pair
+will be set to `nothing`.
 
 Explicitly passing `task` will return the current exception stack on an
 arbitrary task. This is useful for inspecting tasks which have failed due to
 uncaught exceptions.
 
-!!! compat "Julia 1.1"
-    This function is experimental in Julia 1.1 and will likely be renamed in a
-    future release (see https://github.com/JuliaLang/julia/pull/29901).
+!!! compat "Julia 1.7"
+    This function went by the experimental name `catch_stack()` in Julia
+    1.1–1.6, and had a plain Vector-of-tuples as a return type.
 """
-function catch_stack(task=current_task(); include_bt=true)
-    raw = ccall(:jl_get_excstack, Any, (Any,Cint,Cint), task, include_bt, typemax(Cint))::Vector{Any}
+function current_exceptions(task::Task=current_task(); backtrace::Bool=true)
+    raw = ccall(:jl_get_excstack, Any, (Any,Cint,Cint), task, backtrace, typemax(Cint))::Vector{Any}
     formatted = Any[]
-    stride = include_bt ? 3 : 1
+    stride = backtrace ? 3 : 1
     for i = reverse(1:stride:length(raw))
-        e = raw[i]
-        push!(formatted, include_bt ? (e,Base._reformat_bt(raw[i+1],raw[i+2])) : e)
+        exc = raw[i]
+        bt = backtrace ? Base._reformat_bt(raw[i+1],raw[i+2]) : nothing
+        push!(formatted, (exception=exc,backtrace=bt))
     end
-    formatted
+    ExceptionStack(formatted)
 end
 
 ## keyword arg lowering generates calls to this ##
 function kwerr(kw, args::Vararg{Any,N}) where {N}
-    @_noinline_meta
+    @noinline
     throw(MethodError(typeof(args[1]).name.mt.kwsorter, (kw,args...)))
 end
 
@@ -253,7 +261,7 @@ function iterate(ebo::ExponentialBackOff, state= (ebo.n, min(ebo.first_delay, eb
     state[1] < 1 && return nothing
     next_n = state[1]-1
     curr_delay = state[2]
-    next_delay = min(ebo.max_delay, state[2] * ebo.factor * (1.0 - ebo.jitter + (rand(Float64) * 2.0 * ebo.jitter)))
+    next_delay = min(ebo.max_delay, state[2] * ebo.factor * (1.0 - ebo.jitter + (Libc.rand(Float64) * 2.0 * ebo.jitter)))
     (curr_delay, (next_n, next_delay))
 end
 length(ebo::ExponentialBackOff) = ebo.n
diff --git a/base/errorshow.jl b/base/errorshow.jl
index 9042d40b5c949b..2f6fa6604b775b 100644
--- a/base/errorshow.jl
+++ b/base/errorshow.jl
@@ -9,7 +9,7 @@ This method is used to display the exception after a call to [`throw`](@ref).
 # Examples
 ```jldoctest
 julia> struct MyException <: Exception
-           msg::AbstractString
+           msg::String
        end
 
 julia> function Base.showerror(io::IO, err::MyException)
@@ -31,7 +31,7 @@ showerror(io::IO, ex) = show(io, ex)
 
 show_index(io::IO, x::Any) = show(io, x)
 show_index(io::IO, x::Slice) = show_index(io, x.indices)
-show_index(io::IO, x::LogicalIndex) = show_index(io, x.mask)
+show_index(io::IO, x::LogicalIndex) = summary(io, x.mask)
 show_index(io::IO, x::OneTo) = print(io, "1:", x.stop)
 show_index(io::IO, x::Colon) = print(io, ':')
 
@@ -92,7 +92,7 @@ function showerror(io::IO, ex, bt; backtrace=true)
 end
 
 function showerror(io::IO, ex::LoadError, bt; backtrace=true)
-    print(io, "LoadError: ")
+    !isa(ex.error, LoadError) && print(io, "LoadError: ")
     showerror(io, ex.error, bt, backtrace=backtrace)
     print(io, "\nin expression starting at $(ex.file):$(ex.line)")
 end
@@ -152,6 +152,7 @@ showerror(io::IO, ex::KeyError) = (print(io, "KeyError: key ");
                                    print(io, " not found"))
 showerror(io::IO, ex::InterruptException) = print(io, "InterruptException:")
 showerror(io::IO, ex::ArgumentError) = print(io, "ArgumentError: ", ex.msg)
+showerror(io::IO, ex::DimensionMismatch) = print(io, "DimensionMismatch: ", ex.msg)
 showerror(io::IO, ex::AssertionError) = print(io, "AssertionError: ", ex.msg)
 showerror(io::IO, ex::OverflowError) = print(io, "OverflowError: ", ex.msg)
 
@@ -159,14 +160,8 @@ showerror(io::IO, ex::UndefKeywordError) =
     print(io, "UndefKeywordError: keyword argument $(ex.var) not assigned")
 
 function showerror(io::IO, ex::UndefVarError)
-    if ex.var in [:UTF16String, :UTF32String, :WString, :utf16, :utf32, :wstring, :RepString]
-        return showerror(io, ErrorException("""
-        `$(ex.var)` has been moved to the package LegacyStrings.jl:
-        Run Pkg.add("LegacyStrings") to install LegacyStrings on Julia v0.5-;
-        Then do `using LegacyStrings` to get `$(ex.var)`.
-        """))
-    end
     print(io, "UndefVarError: $(ex.var) not defined")
+    Experimental.show_error_hints(io, ex)
 end
 
 function showerror(io::IO, ex::InexactError)
@@ -176,6 +171,10 @@ function showerror(io::IO, ex::InexactError)
     Experimental.show_error_hints(io, ex)
 end
 
+function showerror(io::IO, ex::CanonicalIndexError)
+    print(io, "CanonicalIndexError: ", ex.func, " not defined for ", ex.type)
+end
+
 typesof(@nospecialize args...) = Tuple{Any[ Core.Typeof(args[i]) for i in 1:length(args) ]...}
 
 function print_with_compare(io::IO, @nospecialize(a::DataType), @nospecialize(b::DataType), color::Symbol)
@@ -205,19 +204,20 @@ function print_with_compare(io::IO, @nospecialize(a), @nospecialize(b), color::S
     end
 end
 
-function show_convert_error(io::IO, ex::MethodError, @nospecialize(arg_types_param))
+function show_convert_error(io::IO, ex::MethodError, arg_types_param)
     # See #13033
     T = striptype(ex.args[1])
     if T === nothing
         print(io, "First argument to `convert` must be a Type, got ", ex.args[1])
     else
-        print_one_line = isa(T, DataType) && isa(arg_types_param[2], DataType) && T.name != arg_types_param[2].name
+        p2 = arg_types_param[2]
+        print_one_line = isa(T, DataType) && isa(p2, DataType) && T.name != p2.name
         printstyled(io, "Cannot `convert` an object of type ")
         print_one_line || printstyled(io, "\n  ")
-        print_with_compare(io, arg_types_param[2], T, :light_green)
+        print_with_compare(io, p2, T, :light_green)
         printstyled(io, " to an object of type ")
         print_one_line || printstyled(io, "\n  ")
-        print_with_compare(io, T, arg_types_param[2], :light_red)
+        print_with_compare(io, T, p2, :light_red)
     end
 end
 
@@ -228,10 +228,11 @@ function showerror(io::IO, ex::MethodError)
     arg_types = (is_arg_types ? ex.args : typesof(ex.args...))::DataType
     f = ex.f
     meth = methods_including_ambiguous(f, arg_types)
-    if length(meth) > 1
+    if isa(meth, MethodList) && length(meth) > 1
         return showerror_ambiguous(io, meth, f, arg_types)
     end
     arg_types_param::SimpleVector = arg_types.parameters
+    show_candidates = true
     print(io, "MethodError: ")
     ft = typeof(f)
     name = ft.name.mt.name
@@ -248,7 +249,10 @@ function showerror(io::IO, ex::MethodError)
     if f === Base.convert && length(arg_types_param) == 2 && !is_arg_types
         f_is_function = true
         show_convert_error(io, ex, arg_types_param)
-    elseif isempty(methods(f)) && isa(f, DataType) && f.abstract
+    elseif f === mapreduce_empty || f === reduce_empty
+        print(io, "reducing over an empty collection is not allowed; consider supplying `init` to the reducer")
+        show_candidates = false
+    elseif isempty(methods(f)) && isa(f, DataType) && isabstracttype(f)
         print(io, "no constructors have been defined for ", f)
     elseif isempty(methods(f)) && !isa(f, Function) && !isa(f, Type)
         print(io, "objects of type ", ft, " are not callable")
@@ -281,14 +285,11 @@ function showerror(io::IO, ex::MethodError)
         if any(x -> x <: AbstractArray{<:Number}, arg_types_param) &&
             any(x -> x <: Number, arg_types_param)
 
-            nouns = Dict{Any,String}(
-                Base.:+ => "addition",
-                Base.:- => "subtraction",
-            )
+            nounf = f === Base.:+ ? "addition" : "subtraction"
             varnames = ("scalar", "array")
             first, second = arg_types_param[1] <: Number ? varnames : reverse(varnames)
             fstring = f === Base.:+ ? "+" : "-"  # avoid depending on show_default for functions (invalidation)
-            print(io, "\nFor element-wise $(nouns[f]), use broadcasting with dot syntax: $first .$fstring $second")
+            print(io, "\nFor element-wise $nounf, use broadcasting with dot syntax: $first .$fstring $second")
         end
     end
     if ft <: AbstractArray
@@ -316,14 +317,14 @@ function showerror(io::IO, ex::MethodError)
             hasrows |= isrow
             push!(vec_args, isrow ? vec(arg) : arg)
         end
-        if hasrows && applicable(f, vec_args...)
+        if hasrows && applicable(f, vec_args...) && isempty(kwargs)
             print(io, "\n\nYou might have used a 2d row vector where a 1d column vector was required.",
                       "\nNote the difference between 1d column vector [1,2,3] and 2d row vector [1 2 3].",
                       "\nYou can convert to a column vector with the vec() function.")
         end
     end
     Experimental.show_error_hints(io, ex, arg_types_param, kwargs)
-    try
+    show_candidates && try
         show_method_candidates(io, ex, kwargs)
     catch ex
         @error "Error showing method candidates, aborted" exception=ex,catch_backtrace()
@@ -349,13 +350,15 @@ function showerror_ambiguous(io::IO, meth, f, args)
         sigfix = typeintersect(m.sig, sigfix)
     end
     if isa(unwrap_unionall(sigfix), DataType) && sigfix <: Tuple
-        if all(m->morespecific(sigfix, m.sig), meth)
-            print(io, "\nPossible fix, define\n  ")
-            Base.show_tuple_as_call(io, :function,  sigfix)
-        else
-            println(io)
-            print(io, "To resolve the ambiguity, try making one of the methods more specific, or ")
-            print(io, "adding a new method more specific than any of the existing applicable methods.")
+        let sigfix=sigfix
+            if all(m->morespecific(sigfix, m.sig), meth)
+                print(io, "\nPossible fix, define\n  ")
+                Base.show_tuple_as_call(io, :function,  sigfix)
+            else
+                println(io)
+                print(io, "To resolve the ambiguity, try making one of the methods more specific, or ")
+                print(io, "adding a new method more specific than any of the existing applicable methods.")
+            end
         end
     end
     nothing
@@ -371,6 +374,13 @@ function showerror_nostdio(err, msg::AbstractString)
     ccall(:jl_printf, Cint, (Ptr{Cvoid},Cstring), stderr_stream, "\n")
 end
 
+stacktrace_expand_basepaths()::Bool =
+    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_EXPAND_BASEPATHS", "false")) === true
+stacktrace_contract_userdir()::Bool =
+    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_CONTRACT_HOMEDIR", "true")) === true
+stacktrace_linebreaks()::Bool =
+    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_LINEBREAKS", "false")) === true
+
 function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=())
     is_arg_types = isa(ex.args, DataType)
     arg_types = is_arg_types ? ex.args : typesof(ex.args...)
@@ -399,7 +409,11 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
             buf = IOBuffer()
             iob0 = iob = IOContext(buf, io)
             tv = Any[]
-            sig0 = method.sig
+            if func isa Core.OpaqueClosure
+                sig0 = signature_type(func, typeof(func).parameters[1])
+            else
+                sig0 = method.sig
+            end
             while isa(sig0, UnionAll)
                 push!(tv, sig0.var)
                 iob = IOContext(iob, :unionall_env => sig0.var)
@@ -423,7 +437,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 # If isvarargtype then it checks whether the rest of the input arguments matches
                 # the varargtype
                 if Base.isvarargtype(sig[i])
-                    sigstr = (unwrap_unionall(sig[i]).parameters[1], "...")
+                    sigstr = (unwrapva(unwrap_unionall(sig[i])), "...")
                     j = length(t_i)
                 else
                     sigstr = (sig[i],)
@@ -460,7 +474,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 # It ensures that methods like f(a::AbstractString...) gets the correct
                 # number of right_matches
                 for t in arg_types_param[length(sig):end]
-                    if t <: rewrap_unionall(unwrap_unionall(sig[end]).parameters[1], method.sig)
+                    if t <: rewrap_unionall(unwrapva(unwrap_unionall(sig[end])), method.sig)
                         right_matches += 1
                     end
                 end
@@ -473,7 +487,7 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                     for (k, sigtype) in enumerate(sig[length(t_i)+1:end])
                         sigtype = isvarargtype(sigtype) ? unwrap_unionall(sigtype) : sigtype
                         if Base.isvarargtype(sigtype)
-                            sigstr = ((sigtype::DataType).parameters[1], "...")
+                            sigstr = (unwrapva(sigtype::Core.TypeofVararg), "...")
                         else
                             sigstr = (sigtype,)
                         end
@@ -498,7 +512,12 @@ function show_method_candidates(io::IO, ex::MethodError, @nospecialize kwargs=()
                 end
                 print(iob, ")")
                 show_method_params(iob0, tv)
-                print(iob, " at ", method.file, ":", method.line)
+                file, line = updated_methodloc(method)
+                if file === nothing
+                    file = string(method.file)
+                end
+                stacktrace_contract_userdir() && (file = contractuser(file))
+                print(iob, " at ", file, ":", line)
                 if !isempty(kwargs)::Bool
                     unexpected = Symbol[]
                     if isempty(kwords) || !(any(endswith(string(kword), "...") for kword in kwords))
@@ -555,35 +574,21 @@ end
 # replace `sf` as needed.
 const update_stackframes_callback = Ref{Function}(identity)
 
-function replaceuserpath(str)
-    str = replace(str, homedir() => "~")
-    # seems to be necessary for some paths with small letter drive c:// etc
-    str = replace(str, lowercasefirst(homedir()) => "~")
-    return str
-end
-
-const STACKTRACE_MODULECOLORS = [:light_blue, :light_yellow,
-        :light_magenta, :light_green, :light_cyan, :light_red,
-        :blue, :yellow, :magenta, :green, :cyan, :red]
-stacktrace_expand_basepaths()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_EXPAND_BASEPATHS", "false")) === true
-stacktrace_contract_userdir()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_CONTRACT_HOMEDIR", "true")) === true
-stacktrace_linebreaks()::Bool =
-    tryparse(Bool, get(ENV, "JULIA_STACKTRACE_LINEBREAKS", "false")) === true
+const STACKTRACE_MODULECOLORS = [:magenta, :cyan, :green, :yellow]
+const STACKTRACE_FIXEDCOLORS = IdDict(Base => :light_black, Core => :light_black)
 
 function show_full_backtrace(io::IO, trace::Vector; print_linebreaks::Bool)
-    n = length(trace)
-    ndigits_max = ndigits(n)
+    num_frames = length(trace)
+    ndigits_max = ndigits(num_frames)
 
-    modulecolordict = Dict{Module, Symbol}()
+    modulecolordict = copy(STACKTRACE_FIXEDCOLORS)
     modulecolorcycler = Iterators.Stateful(Iterators.cycle(STACKTRACE_MODULECOLORS))
 
     println(io, "\nStacktrace:")
 
-    for (i, frame) in enumerate(trace)
-        print_stackframe(io, i, frame, 1, ndigits_max, modulecolordict, modulecolorcycler)
-        if i < n
+    for (i, (frame, n)) in enumerate(trace)
+        print_stackframe(io, i, frame, n, ndigits_max, modulecolordict, modulecolorcycler)
+        if i < num_frames
             println(io)
             print_linebreaks && println(io)
         end
@@ -678,7 +683,7 @@ end
 # Print a stack frame where the module color is determined by looking up the parent module in
 # `modulecolordict`. If the module does not have a color, yet, a new one can be drawn
 # from `modulecolorcycler`.
-function print_stackframe(io, i, frame, n, digit_align_width, modulecolordict, modulecolorcycler)
+function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, modulecolordict, modulecolorcycler)
     m = Base.parentmodule(frame)
     if m !== nothing
         while parentmodule(m) !== m
@@ -698,10 +703,11 @@ end
 
 
 # Print a stack frame where the module color is set manually with `modulecolor`.
-function print_stackframe(io, i, frame, n, digit_align_width, modulecolor)
+function print_stackframe(io, i, frame::StackFrame, n::Int, digit_align_width, modulecolor)
     file, line = string(frame.file), frame.line
+    file = fixup_stdlib_path(file)
     stacktrace_expand_basepaths() && (file = something(find_source_file(file), file))
-    stacktrace_contract_userdir() && (file = replaceuserpath(file))
+    stacktrace_contract_userdir() && (file = contractuser(file))
 
     # Used by the REPL to make it possible to open
     # the location of a stackframe/method in the editor.
@@ -741,13 +747,7 @@ function print_stackframe(io, i, frame, n, digit_align_width, modulecolor)
     # filename, separator, line
     # use escape codes for formatting, printstyled can't do underlined and color
     # codes are bright black (90) and underlined (4)
-    function print_underlined(io::IO, s...)
-        colored = get(io, :color, false)::Bool
-        start_s = colored ? "\033[90;4m" : ""
-        end_s   = colored ? "\033[0m"    : ""
-        print(io, start_s, s..., end_s)
-    end
-    print_underlined(io, pathparts[end], ":", line)
+    printstyled(io, pathparts[end], ":", line; color = :light_black, underline = true)
 
     # inlined
     printstyled(io, inlined ? " [inlined]" : "", color = :light_black)
@@ -782,8 +782,7 @@ function show_backtrace(io::IO, t::Vector)
 
     try invokelatest(update_stackframes_callback[], filtered) catch end
     # process_backtrace returns a Vector{Tuple{Frame, Int}}
-    frames = map(x->first(x)::StackFrame, filtered)
-    show_full_backtrace(io, frames; print_linebreaks = stacktrace_linebreaks())
+    show_full_backtrace(io, filtered; print_linebreaks = stacktrace_linebreaks())
     return
 end
 
@@ -796,10 +795,9 @@ end
 # For improved user experience, filter out frames for include() implementation
 # - see #33065. See also #35371 for extended discussion of internal frames.
 function _simplify_include_frames(trace)
-    i = length(trace)
-    kept_frames = trues(i)
+    kept_frames = trues(length(trace))
     first_ignored = nothing
-    while i >= 1
+    for i in length(trace):-1:1
         frame::StackFrame, _ = trace[i]
         mod = parentmodule(frame)
         if first_ignored === nothing
@@ -821,10 +819,9 @@ function _simplify_include_frames(trace)
                 first_ignored = nothing
             end
         end
-        i -= 1
     end
     if first_ignored !== nothing
-        kept_frames[i:first_ignored] .= false
+        kept_frames[1:first_ignored] .= false
     end
     return trace[kept_frames]
 end
@@ -872,7 +869,7 @@ function process_backtrace(t::Vector, limit::Int=typemax(Int); skipC = true)
     return _simplify_include_frames(ret)
 end
 
-function show_exception_stack(io::IO, stack::Vector)
+function show_exception_stack(io::IO, stack)
     # Display exception stack with the top of the stack first.  This ordering
     # means that the user doesn't have to scroll up in the REPL to discover the
     # root cause.
@@ -896,3 +893,28 @@ function show(io::IO, ip::InterpreterIP)
         print(io, " in $(ip.code) at statement $(Int(ip.stmt))")
     end
 end
+
+# handler for displaying a hint in case the user tries to call
+# the instance of a number (probably missing the operator)
+# eg: (1 + 2)(3 + 4)
+function noncallable_number_hint_handler(io, ex, arg_types, kwargs)
+    @nospecialize
+    if ex.f isa Number
+        print(io, "\nMaybe you forgot to use an operator such as ")
+        printstyled(io, "*, ^, %, / etc. ", color=:cyan)
+        print(io, "?")
+    end
+end
+
+Experimental.register_error_hint(noncallable_number_hint_handler, MethodError)
+
+# ExceptionStack implementation
+size(s::ExceptionStack) = size(s.stack)
+getindex(s::ExceptionStack, i::Int) = s.stack[i]
+
+function show(io::IO, ::MIME"text/plain", stack::ExceptionStack)
+    nexc = length(stack)
+    printstyled(io, nexc, "-element ExceptionStack", nexc == 0 ? "" : ":\n")
+    show_exception_stack(io, stack)
+end
+show(io::IO, stack::ExceptionStack) = show(io, MIME("text/plain"), stack)
diff --git a/base/essentials.jl b/base/essentials.jl
index fb360ea6482dbe..498c6f8f4f1967 100644
--- a/base/essentials.jl
+++ b/base/essentials.jl
@@ -1,11 +1,18 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Core: CodeInfo, SimpleVector
+using Core: CodeInfo, SimpleVector, donotdelete, arrayref
 
 const Callable = Union{Function,Type}
 
 const Bottom = Union{}
 
+# Define minimal array interface here to help code used in macros:
+length(a::Array) = arraylen(a)
+
+# This is more complicated than it needs to be in order to get Win64 through bootstrap
+eval(:(getindex(A::Array, i1::Int) = arrayref($(Expr(:boundscheck)), A, i1)))
+eval(:(getindex(A::Array, i1::Int, i2::Int, I::Int...) = (@inline; arrayref($(Expr(:boundscheck)), A, i1, i2, I...))))
+
 """
     AbstractSet{T}
 
@@ -23,14 +30,24 @@ An `AbstractDict{K, V}` should be an iterator of `Pair{K, V}`.
 """
 abstract type AbstractDict{K,V} end
 
-# The real @inline macro is not available until after array.jl, so this
-# internal macro splices the meta Expr directly into the function body.
-macro _inline_meta()
-    Expr(:meta, :inline)
-end
-macro _noinline_meta()
-    Expr(:meta, :noinline)
+"""
+    Iterators.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
+
+Transforms an indexable container into a Dictionary-view of the same data.
+Modifying the key-space of the underlying data may invalidate this object.
+"""
+struct Pairs{K, V, I, A} <: AbstractDict{K, V}
+    data::A
+    itr::I
 end
+Pairs{K, V}(data::A, itr::I) where {K, V, I, A} = Pairs{K, V, I, A}(data, itr)
+Pairs{K}(data::A, itr::I) where {K, I, A} = Pairs{K, eltype(A), I, A}(data, itr)
+Pairs(data::A, itr::I) where  {I, A} = Pairs{eltype(I), eltype(A), I, A}(data, itr)
+pairs(::Type{NamedTuple}) = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}} where {V, N, names, T<:NTuple{N, Any}}
+
+## optional pretty printer:
+#const NamedTuplePair{N, V, names, T<:NTuple{N, Any}} = Pairs{Symbol, V, NTuple{N, Symbol}, NamedTuple{names, T}}
+#export NamedTuplePair
 
 macro _gc_preserve_begin(arg1)
     Expr(:gc_preserve_begin, esc(arg1))
@@ -44,12 +61,12 @@ end
     @nospecialize
 
 Applied to a function argument name, hints to the compiler that the method
-should not be specialized for different types of that argument,
-but instead to use precisely the declared type for each argument.
-This is only a hint for avoiding excess code generation.
-Can be applied to an argument within a formal argument list,
+implementation should not be specialized for different types of that argument,
+but instead use the declared type for that argument.
+It can be applied to an argument within a formal argument list,
 or in the function body.
-When applied to an argument, the macro must wrap the entire argument expression.
+When applied to an argument, the macro must wrap the entire argument expression, e.g.,
+`@nospecialize(x::Real)` or `@nospecialize(i::Integer...)` rather than wrapping just the argument name.
 When used in a function body, the macro must occur in statement position and
 before any code.
 
@@ -77,6 +94,38 @@ end
 f(y) = [x for x in y]
 @specialize
 ```
+
+!!! note
+    `@nospecialize` affects code generation but not inference: it limits the diversity
+    of the resulting native code, but it does not impose any limitations (beyond the
+    standard ones) on type-inference.
+
+# Example
+
+```julia
+julia> f(A::AbstractArray) = g(A)
+f (generic function with 1 method)
+
+julia> @noinline g(@nospecialize(A::AbstractArray)) = A[1]
+g (generic function with 1 method)
+
+julia> @code_typed f([1.0])
+CodeInfo(
+1 ─ %1 = invoke Main.g(_2::AbstractArray)::Float64
+└──      return %1
+) => Float64
+```
+
+Here, the `@nospecialize` annotation results in the equivalent of
+
+```julia
+f(A::AbstractArray) = invoke(g, Tuple{AbstractArray}, A)
+```
+
+ensuring that only one version of native code will be generated for `g`,
+one that is generic for any `AbstractArray`.
+However, the specific return type is still inferred for both `g` and `f`,
+and this is still used in optimizing the callers of `f` and `g`.
 """
 macro nospecialize(vars...)
     if nfields(vars) === 1
@@ -111,7 +160,8 @@ end
 
 Tests whether variable `s` is defined in the current scope.
 
-See also [`isdefined`](@ref).
+See also [`isdefined`](@ref) for field properties and [`isassigned`](@ref) for
+array indexes or [`haskey`](@ref) for other mappings.
 
 # Examples
 ```jldoctest
@@ -140,9 +190,37 @@ macro isdefined(s::Symbol)
     return Expr(:escape, Expr(:isdefined, s))
 end
 
+function _is_internal(__module__)
+    if ccall(:jl_base_relative_to, Any, (Any,), __module__)::Module === Core.Compiler ||
+       nameof(__module__) === :Base
+        return true
+    end
+    return false
+end
+
+# can be used in place of `@pure` (supposed to be used for bootstrapping)
 macro _pure_meta()
-    return Expr(:meta, :pure)
+    return _is_internal(__module__) && Expr(:meta, :pure)
+end
+# can be used in place of `@assume_effects :total` (supposed to be used for bootstrapping)
+macro _total_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#true,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false))
+end
+# can be used in place of `@assume_effects :total_may_throw` (supposed to be used for bootstrapping)
+macro _total_may_throw_meta()
+    return _is_internal(__module__) && Expr(:meta, Expr(:purity,
+        #=:consistent=#true,
+        #=:effect_free=#true,
+        #=:nothrow=#false,
+        #=:terminates_globally=#true,
+        #=:terminates_locally=#false))
 end
+
 # another version of inlining that propagates an inbounds context
 macro _propagate_inbounds_meta()
     return Expr(:meta, :inline, :propagate_inbounds)
@@ -170,7 +248,7 @@ Stacktrace:
 [...]
 ```
 
-If `T` is a [`AbstractFloat`](@ref) or [`Rational`](@ref) type,
+If `T` is a [`AbstractFloat`](@ref) type,
 then it will return the closest value to `x` representable by `T`.
 
 ```jldoctest
@@ -180,11 +258,8 @@ julia> x = 1/3
 julia> convert(Float32, x)
 0.33333334f0
 
-julia> convert(Rational{Int32}, x)
-1//3
-
-julia> convert(Rational{Int64}, x)
-6004799503160661//18014398509481984
+julia> convert(BigFloat, x)
+0.333333333333333314829616256247390992939472198486328125
 ```
 
 If `T` is a collection type and `x` a collection, the result of
@@ -197,12 +272,12 @@ julia> y = convert(Vector{Int}, x);
 julia> y === x
 true
 ```
+
+See also: [`round`](@ref), [`trunc`](@ref), [`oftype`](@ref), [`reinterpret`](@ref).
 """
 function convert end
 
-convert(::Type{Union{}}, x) = throw(MethodError(convert, (Union{}, x)))
-convert(::Type{Any}, x) = x
-convert(::Type{T}, x::T) where {T} = x
+convert(::Type{Union{}}, @nospecialize x) = throw(MethodError(convert, (Union{}, x)))
 convert(::Type{Type}, x::Type) = x # the ssair optimizer is strongly dependent on this method existing to avoid over-specialization
                                    # in the absence of inlining-enabled
                                    # (due to fields typed as `Type`, which is generally a bad idea)
@@ -216,10 +291,10 @@ Evaluate an expression with values interpolated into it using `eval`.
 If two arguments are provided, the first is the module to evaluate in.
 """
 macro eval(ex)
-    :(Core.eval($__module__, $(Expr(:quote,ex))))
+    return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), __module__, Expr(:quote, ex)))
 end
 macro eval(mod, ex)
-    :(Core.eval($(esc(mod)), $(Expr(:quote,ex))))
+    return Expr(:escape, Expr(:call, GlobalRef(Core, :eval), mod, Expr(:quote, ex)))
 end
 
 argtail(x, rest...) = rest
@@ -229,6 +304,8 @@ argtail(x, rest...) = rest
 
 Return a `Tuple` consisting of all but the first component of `x`.
 
+See also: [`front`](@ref Base.front), [`rest`](@ref Base.rest), [`first`](@ref), [`Iterators.peel`](@ref).
+
 # Examples
 ```jldoctest
 julia> Base.tail((1,2,3))
@@ -255,6 +332,18 @@ function rewrap_unionall(@nospecialize(t), @nospecialize(u))
     return UnionAll(u.var, rewrap_unionall(t, u.body))
 end
 
+function rewrap_unionall(t::Core.TypeofVararg, @nospecialize(u))
+    isdefined(t, :T) || return t
+    if !isa(u, UnionAll)
+        return t
+    end
+    T = rewrap_unionall(t.T, u)
+    if !isdefined(t, :N) || t.N === u.var
+        return Vararg{T}
+    end
+    return Vararg{T, t.N}
+end
+
 # replace TypeVars in all enclosing UnionAlls with fresh TypeVars
 function rename_unionall(@nospecialize(u))
     if !isa(u, UnionAll)
@@ -271,10 +360,8 @@ function rename_unionall(@nospecialize(u))
     return UnionAll(nv, body{nv})
 end
 
-const _va_typename = Vararg.body.body.name
 function isvarargtype(@nospecialize(t))
-    t = unwrap_unionall(t)
-    return isa(t, DataType) && (t::DataType).name === _va_typename
+    return isa(t, Core.TypeofVararg)
 end
 
 function isvatuple(@nospecialize(t))
@@ -286,18 +373,14 @@ function isvatuple(@nospecialize(t))
     return false
 end
 
-function unwrapva(@nospecialize(t))
-    # NOTE: this returns a related type, but it's NOT a subtype of the original tuple
-    t2 = unwrap_unionall(t)
-    return isvarargtype(t2) ? rewrap_unionall(t2.parameters[1], t) : t
-end
+unwrapva(t::Core.TypeofVararg) = isdefined(t, :T) ? t.T : Any
+unwrapva(@nospecialize(t)) = t
 
-function unconstrain_vararg_length(@nospecialize(va))
+function unconstrain_vararg_length(va::Core.TypeofVararg)
     # construct a new Vararg type where its length is unconstrained,
     # but its element type still captures any dependencies the input
     # element type may have had on the input length
-    T = unwrap_unionall(va).parameters[1]
-    return rewrap_unionall(Vararg{T}, va)
+    return Vararg{unwrapva(va)}
 end
 
 typename(a) = error("typename does not apply to this type")
@@ -310,7 +393,7 @@ function typename(a::Union)
 end
 typename(union::UnionAll) = typename(union.body)
 
-_tuple_error(T::Type, x) = (@_noinline_meta; throw(MethodError(convert, (T, x))))
+_tuple_error(T::Type, x) = (@noinline; throw(MethodError(convert, (T, x))))
 
 convert(::Type{T}, x::T) where {T<:Tuple} = x
 function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
@@ -319,7 +402,7 @@ function convert(::Type{T}, x::NTuple{N,Any}) where {N, T<:Tuple}
     if typeintersect(NTuple{N,Any}, T) === Union{}
         _tuple_error(T, x)
     end
-    cvt1(n) = (@_inline_meta; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
+    cvt1(n) = (@inline; convert(fieldtype(T, n), getfield(x, n, #=boundscheck=#false)))
     return ntuple(cvt1, Val(N))::NTuple{N,Any}
 end
 
@@ -420,15 +503,15 @@ julia> reinterpret(Float32, UInt32[1 2 3 4 5])
 ```
 """
 reinterpret(::Type{T}, x) where {T} = bitcast(T, x)
-reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16,x)
-reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16,x)
 
 """
     sizeof(T::DataType)
     sizeof(obj)
 
 Size, in bytes, of the canonical binary representation of the given `DataType` `T`, if any.
-Size, in bytes, of object `obj` if it is not `DataType`.
+Or the size, in bytes, of object `obj` if it is not a `DataType`.
+
+See also [`summarysize`](@ref).
 
 # Examples
 ```jldoctest
@@ -441,7 +524,7 @@ julia> sizeof(ComplexF64)
 julia> sizeof(1.0)
 8
 
-julia> sizeof([1.0:10.0;])
+julia> sizeof(collect(1.0:10.0))
 80
 ```
 
@@ -456,21 +539,24 @@ Stacktrace:
 """
 sizeof(x) = Core.sizeof(x)
 
-# simple Array{Any} operations needed for bootstrap
-@eval setindex!(A::Array{Any}, @nospecialize(x), i::Int) = arrayset($(Expr(:boundscheck)), A, x, i)
-
 """
-    precompile(f, args::Tuple{Vararg{Any}})
+    ifelse(condition::Bool, x, y)
+
+Return `x` if `condition` is `true`, otherwise return `y`. This differs from `?` or `if` in
+that it is an ordinary function, so all the arguments are evaluated first. In some cases,
+using `ifelse` instead of an `if` statement can eliminate the branch in generated code and
+provide higher performance in tight loops.
 
-Compile the given function `f` for the argument tuple (of types) `args`, but do not execute it.
+# Examples
+```jldoctest
+julia> ifelse(1 > 2, 1, 2)
+2
+```
 """
-function precompile(@nospecialize(f), args::Tuple)
-    ccall(:jl_compile_hint, Int32, (Any,), Tuple{Core.Typeof(f), args...}) != 0
-end
+ifelse(condition::Bool, x, y) = Core.ifelse(condition, x, y)
 
-function precompile(argt::Type)
-    ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
-end
+# simple Array{Any} operations needed for bootstrap
+@eval setindex!(A::Array{Any}, @nospecialize(x), i::Int) = arrayset($(Expr(:boundscheck)), A, x, i)
 
 """
     esc(e)
@@ -521,7 +607,7 @@ julia> f2()
     As noted there, the caller must verify—using information they can access—that
     their accesses are valid before using `@inbounds`. For indexing into your
     [`AbstractArray`](@ref) subclasses, for example, this involves checking the
-    indices against its [`size`](@ref). Therefore, `@boundscheck` annotations
+    indices against its [`axes`](@ref). Therefore, `@boundscheck` annotations
     should only be added to a [`getindex`](@ref) or [`setindex!`](@ref)
     implementation after you are certain its behavior is correct.
 """
@@ -540,7 +626,7 @@ element `i` of array `A` is skipped to improve performance.
 ```julia
 function sum(A::AbstractArray)
     r = zero(eltype(A))
-    for i = 1:length(A)
+    for i in eachindex(A)
         @inbounds r += A[i]
     end
     return r
@@ -705,12 +791,11 @@ call obsolete versions of a function `f`.
 `f` directly, and the type of the result cannot be inferred by the compiler.)
 """
 function invokelatest(@nospecialize(f), @nospecialize args...; kwargs...)
+    kwargs = merge(NamedTuple(), kwargs)
     if isempty(kwargs)
-        return Core._apply_latest(f, args)
+        return Core._call_latest(f, args...)
     end
-    # We use a closure (`inner`) to handle kwargs.
-    inner() = f(args...; kwargs...)
-    Core._apply_latest(inner)
+    return Core._call_latest(Core.kwfunc(f), kwargs, f, args...)
 end
 
 """
@@ -740,21 +825,28 @@ of [`invokelatest`](@ref).
     world age refers to system state unrelated to the main Julia session.
 """
 function invoke_in_world(world::UInt, @nospecialize(f), @nospecialize args...; kwargs...)
+    kwargs = Base.merge(NamedTuple(), kwargs)
     if isempty(kwargs)
-        return Core._apply_in_world(world, f, args)
+        return Core._call_in_world(world, f, args...)
     end
-    inner() = f(args...; kwargs...)
-    Core._apply_in_world(world, inner)
+    return Core._call_in_world(world, Core.kwfunc(f), kwargs, f, args...)
 end
 
 # TODO: possibly make this an intrinsic
-inferencebarrier(@nospecialize(x)) = Ref{Any}(x)[]
+inferencebarrier(@nospecialize(x)) = RefValue{Any}(x).x
 
 """
     isempty(collection) -> Bool
 
 Determine whether a collection is empty (has no elements).
 
+!!! warning
+
+    `isempty(itr)` may consume the next element of a stateful iterator `itr`
+    unless an appropriate `Base.isdone(itr)` or `isempty` method is defined.
+    Use of `isempty` should therefore be avoided when writing generic
+    code which should support any iterator type.
+
 # Examples
 ```jldoctest
 julia> isempty([])
@@ -799,6 +891,8 @@ values(itr) = itr
 
 A type with no fields whose singleton instance [`missing`](@ref) is used
 to represent missing values.
+
+See also: [`skipmissing`](@ref), [`nonmissingtype`](@ref), [`Nothing`](@ref).
 """
 struct Missing end
 
@@ -806,6 +900,8 @@ struct Missing end
     missing
 
 The singleton instance of type [`Missing`](@ref) representing a missing value.
+
+See also: [`NaN`](@ref), [`skipmissing`](@ref), [`nonmissingtype`](@ref).
 """
 const missing = Missing()
 
@@ -813,9 +909,10 @@ const missing = Missing()
     ismissing(x)
 
 Indicate whether `x` is [`missing`](@ref).
+
+See also: [`skipmissing`](@ref), [`isnothing`](@ref), [`isnan`](@ref).
 """
-ismissing(::Any) = false
-ismissing(::Missing) = true
+ismissing(x) = x === missing
 
 function popfirst! end
 
diff --git a/base/experimental.jl b/base/experimental.jl
index 3e5038fb997391..9edd197c198e9c 100644
--- a/base/experimental.jl
+++ b/base/experimental.jl
@@ -10,6 +10,7 @@
 module Experimental
 
 using Base: Threads, sync_varname
+using Base.Meta
 
 """
     Const(A::Array)
@@ -28,9 +29,9 @@ Base.IndexStyle(::Type{<:Const}) = IndexLinear()
 Base.size(C::Const) = size(C.a)
 Base.axes(C::Const) = axes(C.a)
 @eval Base.getindex(A::Const, i1::Int) =
-    (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
+    (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1))
 @eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =
-  (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+  (Base.@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
 
 """
     @aliasscope expr
@@ -114,14 +115,43 @@ parent module.
 Supported values are 0, 1, 2, and 3.
 
 The effective optimization level is the minimum of that specified on the
-command line and in per-module settings.
+command line and in per-module settings. If a `--min-optlevel` value is
+set on the command line, that is enforced as a lower bound.
 """
 macro optlevel(n::Int)
     return Expr(:meta, :optlevel, n)
 end
 
 """
-    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no}
+    Experimental.@max_methods n::Int
+
+Set the maximum number of potentially-matching methods considered when running inference
+for methods defined in the current module. This setting affects inference of calls with
+incomplete knowledge of the argument types.
+
+Supported values are `1`, `2`, `3`, `4`, and `default` (currently equivalent to `3`).
+"""
+macro max_methods(n::Int)
+    0 < n < 5 || error("We must have that `1 <= max_methods <= 4`, but `max_methods = $n`.")
+    return Expr(:meta, :max_methods, n)
+end
+
+"""
+    Experimental.@max_methods n::Int function fname end
+
+Set the maximum number of potentially-matching methods considered when running inference
+for the generic function `fname`. Overrides any module-level or global inference settings
+for max_methods. This setting is global for the entire generic function (or more precisely
+the MethodTable).
+"""
+macro max_methods(n::Int, fdef::Expr)
+    0 < n <= 255 || error("We must have that `1 <= max_methods <= 255`, but `max_methods = $n`.")
+    (fdef.head == :function && length(fdef.args) == 1) || error("Second argument must be a function forward declaration")
+    return :(typeof($(esc(fdef))).name.max_methods = $(UInt8(n)))
+end
+
+"""
+    Experimental.@compiler_options optimize={0,1,2,3} compile={yes,no,all,min} infer={yes,no} max_methods={default,1,2,3,...}
 
 Set compiler options for code in the enclosing module. Options correspond directly to
 command-line options with the same name, where applicable. The following options
@@ -131,6 +161,7 @@ are currently supported:
   * `compile`: Toggle native code compilation. Currently only `min` is supported, which
     requests the minimum possible amount of compilation.
   * `infer`: Enable or disable type inference. If disabled, implies [`@nospecialize`](@ref).
+  * `max_methods`: Maximum number of matching methods considered when running type inference.
 """
 macro compiler_options(args...)
     opts = Expr(:block)
@@ -150,6 +181,12 @@ macro compiler_options(args...)
                 a = a === false || a === :no  ? 0 :
                     a === true  || a === :yes ? 1 : error("invalid argument to \"infer\" option")
                 push!(opts.args, Expr(:meta, :infer, a))
+            elseif ex.args[1] === :max_methods
+                a = ex.args[2]
+                a = a === :default ? 3 :
+                  a isa Int ? ((0 < a < 5) ? a : error("We must have that `1 <= max_methods <= 4`, but `max_methods = $a`.")) :
+                  error("invalid argument to \"max_methods\" option")
+                push!(opts.args, Expr(:meta, :max_methods, a))
             else
                 error("unknown option \"$(ex.args[1])\"")
             end
@@ -160,6 +197,30 @@ macro compiler_options(args...)
     return opts
 end
 
+"""
+    Experimental.@force_compile
+
+Force compilation of the block or function (Julia's built-in interpreter is blocked from executing it).
+
+# Examples
+
+```
+julia> occursin("interpreter", string(stacktrace(begin
+           # with forced compilation
+           Base.Experimental.@force_compile
+           backtrace()
+       end, true)))
+false
+
+julia> occursin("interpreter", string(stacktrace(begin
+           # without forced compilation
+           backtrace()
+       end, true)))
+true
+```
+"""
+macro force_compile() Expr(:meta, :force_compile) end
+
 # UI features for errors
 
 """
@@ -241,7 +302,8 @@ the handler for that type.
     This interface is experimental and subject to change or removal without notice.
 """
 function show_error_hints(io, ex, args...)
-    hinters = get!(()->[], _hint_handlers, typeof(ex))
+    hinters = get(_hint_handlers, typeof(ex), nothing)
+    isnothing(hinters) && return
     for handler in hinters
         try
             Base.invokelatest(handler, io, ex, args...)
@@ -252,4 +314,49 @@ function show_error_hints(io, ex, args...)
     end
 end
 
+# OpaqueClosure
+include("opaque_closure.jl")
+
+"""
+    Experimental.@overlay mt [function def]
+
+Define a method and add it to the method table `mt` instead of to the global method table.
+This can be used to implement a method override mechanism. Regular compilation will not
+consider these methods, and you should customize the compilation flow to look in these
+method tables (e.g., using [`Core.Compiler.OverlayMethodTable`](@ref)).
+
+"""
+macro overlay(mt, def)
+    def = macroexpand(__module__, def) # to expand @inline, @generated, etc
+    if !isexpr(def, [:function, :(=)])
+        error("@overlay requires a function Expr")
+    end
+    if isexpr(def.args[1], :call)
+        def.args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1])
+    elseif isexpr(def.args[1], :where)
+        def.args[1].args[1].args[1] = Expr(:overlay, mt, def.args[1].args[1].args[1])
+    else
+        error("@overlay requires a function Expr")
+    end
+    esc(def)
+end
+
+let new_mt(name::Symbol, mod::Module) = begin
+        ccall(:jl_check_top_level_effect, Cvoid, (Any, Cstring), mod, "@MethodTable")
+        ccall(:jl_new_method_table, Any, (Any, Any), name, mod)
+    end
+    @eval macro MethodTable(name::Symbol)
+        esc(:(const $name = $$new_mt($(quot(name)), $(__module__))))
+    end
+end
+
+"""
+    Experimental.@MethodTable(name)
+
+Create a new MethodTable in the current module, bound to `name`. This method table can be
+used with the [`Experimental.@overlay`](@ref) macro to define methods for a function without
+adding them to the global method table.
+"""
+:@MethodTable
+
 end
diff --git a/base/exports.jl b/base/exports.jl
index 121f42db09af97..dff6b0c9bc2081 100644
--- a/base/exports.jl
+++ b/base/exports.jl
@@ -22,6 +22,8 @@ export
     AbstractVector,
     AbstractVecOrMat,
     Array,
+    AbstractMatch,
+    AbstractPattern,
     AbstractDict,
     BigFloat,
     BigInt,
@@ -38,6 +40,7 @@ export
     ComplexF64,
     ComplexF32,
     ComplexF16,
+    ComposedFunction,
     DenseMatrix,
     DenseVecOrMat,
     DenseVector,
@@ -54,6 +57,7 @@ export
     IOStream,
     LinRange,
     Irrational,
+    LazyString,
     Matrix,
     MergeSort,
     Missing,
@@ -67,6 +71,7 @@ export
     Rational,
     Regex,
     RegexMatch,
+    Returns,
     RoundFromZero,
     RoundDown,
     RoundingMode,
@@ -117,6 +122,7 @@ export
     Cwstring,
 
 # Exceptions
+    CanonicalIndexError,
     CapturedException,
     CompositeException,
     DimensionMismatch,
@@ -165,6 +171,10 @@ export
     ≢,
     xor,
     ⊻,
+    nand,
+    nor,
+    ⊼,
+    ⊽,
     %,
     ÷,
     &,
@@ -226,6 +236,7 @@ export
     cbrt,
     ceil,
     cis,
+    cispi,
     clamp,
     cld,
     cmp,
@@ -376,12 +387,14 @@ export
     eachindex,
     eachrow,
     eachslice,
+    extrema!,
     extrema,
     fill!,
     fill,
     first,
     hcat,
     hvcat,
+    hvncat,
     indexin,
     argmax,
     argmin,
@@ -457,6 +470,7 @@ export
     searchsorted,
     searchsortedfirst,
     searchsortedlast,
+    insorted,
     startswith,
 
 # linear algebra
@@ -484,6 +498,7 @@ export
 # collections
     all!,
     all,
+    allequal,
     allunique,
     any!,
     any,
@@ -493,6 +508,7 @@ export
     count,
     delete!,
     deleteat!,
+    keepat!,
     eltype,
     empty!,
     empty,
@@ -561,11 +577,14 @@ export
     bytes2hex,
     chomp,
     chop,
+    chopprefix,
+    chopsuffix,
     codepoint,
     codeunit,
     codeunits,
     digits,
     digits!,
+    eachsplit,
     escape_string,
     hex2bytes,
     hex2bytes!,
@@ -650,7 +669,9 @@ export
     isbits,
     isequal,
     ismutable,
+    ismutabletype,
     isless,
+    isunordered,
     ifelse,
     objectid,
     sizeof,
@@ -663,6 +684,7 @@ export
     istaskstarted,
     istaskfailed,
     lock,
+    @lock,
     notify,
     ReentrantLock,
     schedule,
@@ -675,6 +697,7 @@ export
     timedwait,
     asyncmap,
     asyncmap!,
+    errormonitor,
 
 # channels
     take!,
@@ -685,9 +708,11 @@ export
 
 # missing values
     coalesce,
+    @coalesce,
     ismissing,
     missing,
     skipmissing,
+    @something,
     something,
     isnothing,
     nonmissingtype,
@@ -700,6 +725,7 @@ export
 # errors
     backtrace,
     catch_backtrace,
+    current_exceptions,
     error,
     rethrow,
     retry,
@@ -712,6 +738,9 @@ export
     convert,
     getproperty,
     setproperty!,
+    swapproperty!,
+    modifyproperty!,
+    replaceproperty!,
     fieldoffset,
     fieldname,
     fieldnames,
@@ -781,6 +810,7 @@ export
 
 # I/O and events
     close,
+    closewrite,
     countlines,
     eachline,
     readeach,
@@ -814,6 +844,7 @@ export
     readline,
     readlines,
     readuntil,
+    redirect_stdio,
     redirect_stderr,
     redirect_stdin,
     redirect_stdout,
@@ -868,10 +899,12 @@ export
     chown,
     cp,
     ctime,
+    diskstat,
     download,
     filemode,
     filesize,
     gperm,
+    hardlink,
     isblockdev,
     ischardev,
     isdir,
@@ -905,7 +938,7 @@ export
     uperm,
     walkdir,
 
-# external processes ## TODO: whittle down these exports.
+# external processes
     detach,
     getpid,
     ignorestatus,
@@ -915,6 +948,7 @@ export
     run,
     setenv,
     addenv,
+    setcpuaffinity,
     success,
     withenv,
 
@@ -955,6 +989,7 @@ export
     @v_str,    # version number
     @raw_str,  # raw string with no interpolation/unescaping
     @NamedTuple,
+    @lazy_str, # lazy string
 
     # documentation
     @text_str,
@@ -966,6 +1001,7 @@ export
 
     # profiling
     @time,
+    @showtime,
     @timed,
     @timev,
     @elapsed,
@@ -995,6 +1031,9 @@ export
     @polly,
 
     @assert,
+    @atomic,
+    @atomicswap,
+    @atomicreplace,
     @__dot__,
     @enum,
     @label,
diff --git a/base/expr.jl b/base/expr.jl
index ff5e92005b8dd0..e0cd8a9b0a32c6 100644
--- a/base/expr.jl
+++ b/base/expr.jl
@@ -1,11 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+isexpr(@nospecialize(ex), heads) = isa(ex, Expr) && in(ex.head, heads)
+isexpr(@nospecialize(ex), heads, n::Int) = isa(ex, Expr) && in(ex.head, heads) && length(ex.args) == n
+const is_expr = isexpr
+
 ## symbols ##
 
 """
     gensym([tag])
 
-Generates a symbol which will not conflict with other variable names.
+Generates a symbol which will not conflict with other variable names (in the same module).
 """
 gensym() = ccall(:jl_gensym, Ref{Symbol}, ())
 
@@ -31,6 +35,9 @@ end
 
 ## expressions ##
 
+isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
+isexpr(@nospecialize(ex), head::Symbol, n::Int) = isa(ex, Expr) && ex.head === head && length(ex.args) == n
+
 copy(e::Expr) = exprarray(e.head, copy_exprargs(e.args))
 
 # copy parts of an AST that the compiler mutates
@@ -185,18 +192,74 @@ Give a hint to the compiler that this function is worth inlining.
 Small functions typically do not need the `@inline` annotation,
 as the compiler does it automatically. By using `@inline` on bigger functions,
 an extra nudge can be given to the compiler to inline it.
-This is shown in the following example:
+
+`@inline` can be applied immediately before the definition or in its function body.
 
 ```julia
-@inline function bigfunction(x)
-    #=
-        Function Definition
-    =#
+# annotate long-form definition
+@inline function longdef(x)
+    ...
+end
+
+# annotate short-form definition
+@inline shortdef(x) = ...
+
+# annotate anonymous function that a `do` block creates
+f() do
+    @inline
+    ...
 end
 ```
+
+!!! compat "Julia 1.8"
+    The usage within a function body requires at least Julia 1.8.
+
+---
+    @inline block
+
+Give a hint to the compiler that calls within `block` are worth inlining.
+
+```julia
+# The compiler will try to inline `f`
+@inline f(...)
+
+# The compiler will try to inline `f`, `g` and `+`
+@inline f(...) + g(...)
+```
+
+!!! note
+    A callsite annotation always has the precedence over the annotation applied to the
+    definition of the called function:
+    ```julia
+    @noinline function explicit_noinline(args...)
+        # body
+    end
+
+    let
+        @inline explicit_noinline(args...) # will be inlined
+    end
+    ```
+
+!!! note
+    When there are nested callsite annotations, the innermost annotation has the precedence:
+    ```julia
+    @noinline let a0, b0 = ...
+        a = @inline f(a0)  # the compiler will try to inline this call
+        b = f(b0)          # the compiler will NOT try to inline this call
+        return a, b
+    end
+    ```
+
+!!! warning
+    Although a callsite annotation will try to force inlining in regardless of the cost model,
+    there are still chances it can't succeed in it. Especially, recursive calls can not be
+    inlined even if they are annotated as `@inline`d.
+
+!!! compat "Julia 1.8"
+    The callsite annotation requires at least Julia 1.8.
 """
-macro inline(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :inline) : ex)
+macro inline(x)
+    return annotate_meta_def_or_block(x, :inline)
 end
 
 """
@@ -206,42 +269,339 @@ Give a hint to the compiler that it should not inline a function.
 
 Small functions are typically inlined automatically.
 By using `@noinline` on small functions, auto-inlining can be
-prevented. This is shown in the following example:
+prevented.
+
+`@noinline` can be applied immediately before the definition or in its function body.
 
 ```julia
-@noinline function smallfunction(x)
-    #=
-        Function Definition
-    =#
+# annotate long-form definition
+@noinline function longdef(x)
+    ...
+end
+
+# annotate short-form definition
+@noinline shortdef(x) = ...
+
+# annotate anonymous function that a `do` block creates
+f() do
+    @noinline
+    ...
 end
 ```
 
+!!! compat "Julia 1.8"
+    The usage within a function body requires at least Julia 1.8.
+
+---
+    @noinline block
+
+Give a hint to the compiler that it should not inline the calls within `block`.
+
+```julia
+# The compiler will try to not inline `f`
+@noinline f(...)
+
+# The compiler will try to not inline `f`, `g` and `+`
+@noinline f(...) + g(...)
+```
+
+!!! note
+    A callsite annotation always has the precedence over the annotation applied to the
+    definition of the called function:
+    ```julia
+    @inline function explicit_inline(args...)
+        # body
+    end
+
+    let
+        @noinline explicit_inline(args...) # will not be inlined
+    end
+    ```
+
+!!! note
+    When there are nested callsite annotations, the innermost annotation has the precedence:
+    ```julia
+    @inline let a0, b0 = ...
+        a = @noinline f(a0)  # the compiler will NOT try to inline this call
+        b = f(b0)            # the compiler will try to inline this call
+        return a, b
+    end
+    ```
+
+!!! compat "Julia 1.8"
+    The callsite annotation requires at least Julia 1.8.
+
+---
 !!! note
     If the function is trivial (for example returning a constant) it might get inlined anyway.
 """
-macro noinline(ex)
-    esc(isa(ex, Expr) ? pushmeta!(ex, :noinline) : ex)
+macro noinline(x)
+    return annotate_meta_def_or_block(x, :noinline)
 end
 
 """
     @pure ex
-    @pure(ex)
 
 `@pure` gives the compiler a hint for the definition of a pure function,
 helping for type inference.
 
-A pure function can only depend on immutable information.
-This also means a `@pure` function cannot use any global mutable state, including
-generic functions. Calls to generic functions depend on method tables which are
-mutable global state.
-Use with caution, incorrect `@pure` annotation of a function may introduce
-hard to identify bugs. Double check for calls to generic functions.
-This macro is intended for internal compiler use and may be subject to changes.
+!!! warning
+    This macro is intended for internal compiler use and may be subject to changes.
+
+!!! warning
+    In Julia 1.8 and higher, it is favorable to use [`@assume_effects`](@ref) instead of `@pure`.
+    This is because `@assume_effects` allows a finer grained control over Julia's purity
+    modeling and the effect system enables a wider range of optimizations.
 """
 macro pure(ex)
     esc(isa(ex, Expr) ? pushmeta!(ex, :pure) : ex)
 end
 
+"""
+    @constprop setting ex
+
+`@constprop` controls the mode of interprocedural constant propagation for the
+annotated function. Two `setting`s are supported:
+
+- `@constprop :aggressive ex`: apply constant propagation aggressively.
+  For a method where the return type depends on the value of the arguments,
+  this can yield improved inference results at the cost of additional compile time.
+- `@constprop :none ex`: disable constant propagation. This can reduce compile
+  times for functions that Julia might otherwise deem worthy of constant-propagation.
+  Common cases are for functions with `Bool`- or `Symbol`-valued arguments or keyword arguments.
+"""
+macro constprop(setting, ex)
+    if isa(setting, QuoteNode)
+        setting = setting.value
+    end
+    setting === :aggressive && return esc(isa(ex, Expr) ? pushmeta!(ex, :aggressive_constprop) : ex)
+    setting === :none && return esc(isa(ex, Expr) ? pushmeta!(ex, :no_constprop) : ex)
+    throw(ArgumentError("@constprop $setting not supported"))
+end
+
+"""
+    @assume_effects setting... ex
+
+`@assume_effects` overrides the compiler's effect modeling for the given method.
+`ex` must be a method definition or `@ccall` expression.
+
+```jldoctest
+julia> Base.@assume_effects :terminates_locally function pow(x)
+           # this :terminates_locally allows `pow` to be constant-folded
+           res = 1
+           1 < x < 20 || error("bad pow")
+           while x > 1
+               res *= x
+               x -= 1
+           end
+           return res
+       end
+pow (generic function with 1 method)
+
+julia> code_typed() do
+           pow(12)
+       end
+1-element Vector{Any}:
+ CodeInfo(
+1 ─     return 479001600
+) => Int64
+
+julia> Base.@assume_effects :total_may_throw @ccall jl_type_intersection(Vector{Int}::Any, Vector{<:Integer}::Any)::Any
+Vector{Int64} (alias for Array{Int64, 1})
+```
+
+!!! warning
+    Improper use of this macro causes undefined behavior (including crashes,
+    incorrect answers, or other hard to track bugs). Use with care and only if
+    absolutely required.
+
+In general, each `setting` value makes an assertion about the behavior of the
+function, without requiring the compiler to prove that this behavior is indeed
+true. These assertions are made for all world ages. It is thus advisable to limit
+the use of generic functions that may later be extended to invalidate the
+assumption (which would cause undefined behavior).
+
+The following `setting`s are supported.
+- `:consistent`
+- `:effect_free`
+- `:nothrow`
+- `:terminates_globally`
+- `:terminates_locally`
+- `:total`
+
+---
+# `:consistent`
+
+The `:consistent` setting asserts that for egal (`===`) inputs:
+- The manner of termination (return value, exception, non-termination) will always be the same.
+- If the method returns, the results will always be egal.
+
+!!! note
+    This in particular implies that the return value of the method must be
+    immutable. Multiple allocations of mutable objects (even with identical
+    contents) are not egal.
+
+!!! note
+    The `:consistent`-cy assertion is made world-age wise. More formally, write
+    ``fᵢ`` for the evaluation of ``f`` in world-age ``i``, then we require:
+    ```math
+    ∀ i, x, y: x ≡ y → fᵢ(x) ≡ fᵢ(y)
+    ```
+    However, for two world ages ``i``, ``j`` s.t. ``i ≠ j``, we may have ``fᵢ(x) ≢ fⱼ(y)``.
+
+    A further implication is that `:consistent` functions may not make their
+    return value dependent on the state of the heap or any other global state
+    that is not constant for a given world age.
+
+!!! note
+    The `:consistent`-cy includes all legal rewrites performed by the optimizer.
+    For example, floating-point fastmath operations are not considered `:consistent`,
+    because the optimizer may rewrite them causing the output to not be `:consistent`,
+    even for the same world age (e.g. because one ran in the interpreter, while
+    the other was optimized).
+
+!!! note
+    If `:consistent` functions terminate by throwing an exception, that exception
+    itself is not required to meet the egality requirement specified above.
+
+---
+# `:effect_free`
+
+The `:effect_free` setting asserts that the method is free of externally semantically
+visible side effects. The following is an incomplete list of externally semantically
+visible side effects:
+- Changing the value of a global variable.
+- Mutating the heap (e.g. an array or mutable value), except as noted below
+- Changing the method table (e.g. through calls to eval)
+- File/Network/etc. I/O
+- Task switching
+
+However, the following are explicitly not semantically visible, even if they
+may be observable:
+- Memory allocations (both mutable and immutable)
+- Elapsed time
+- Garbage collection
+- Heap mutations of objects whose lifetime does not exceed the method (i.e.
+  were allocated in the method and do not escape).
+- The returned value (which is externally visible, but not a side effect)
+
+The rule of thumb here is that an externally visible side effect is anything
+that would affect the execution of the remainder of the program if the function
+were not executed.
+
+!!! note
+    The `:effect_free` assertion is made both for the method itself and any code
+    that is executed by the method. Keep in mind that the assertion must be
+    valid for all world ages and limit use of this assertion accordingly.
+
+---
+# `:nothrow`
+
+The `:nothrow` settings asserts that this method does not terminate abnormally
+(i.e. will either always return a value or never return).
+
+!!! note
+    It is permissible for `:nothrow` annotated methods to make use of exception
+    handling internally as long as the exception is not rethrown out of the
+    method itself.
+
+!!! note
+    `MethodErrors` and similar exceptions count as abnormal termination.
+
+---
+# `:terminates_globally`
+
+The `:terminates_globally` settings asserts that this method will eventually terminate
+(either normally or abnormally), i.e. does not loop indefinitely.
+
+!!! note
+    This `:terminates_globally` assertion covers any other methods called by the annotated method.
+
+!!! note
+    The compiler will consider this a strong indication that the method will
+    terminate relatively *quickly* and may (if otherwise legal), call this
+    method at compile time. I.e. it is a bad idea to annotate this setting
+    on a method that *technically*, but not *practically*, terminates.
+
+---
+# `:terminates_locally`
+
+The `:terminates_locally` setting is like `:terminates_globally`, except that it only
+applies to syntactic control flow *within* the annotated method. It is thus
+a much weaker (and thus safer) assertion that allows for the possibility of
+non-termination if the method calls some other method that does not terminate.
+
+!!! note
+    `:terminates_globally` implies `:terminates_locally`.
+
+---
+# `:total`
+
+This `setting` combines the following other assertions:
+- `:consistent`
+- `:effect_free`
+- `:nothrow`
+- `:terminates_globally`
+and is a convenient shortcut.
+
+---
+# `:total_may_throw`
+
+This `setting` combines the following other assertions:
+- `:consistent`
+- `:effect_free`
+- `:terminates_globally`
+and is a convenient shortcut.
+
+!!! note
+    This setting is particularly useful since it allows the compiler to evaluate a call of
+    the applied method when all the call arguments are fully known to be constant, no matter
+    if the call results in an error or not.
+
+    `@assume_effects :total_may_throw` is similar to [`@pure`](@ref) with the primary
+    distinction that the `:consistent`-cy requirement applies world-age wise rather
+    than globally as described above. However, in particular, a method annotated
+    `@pure` should always be `:total` or `:total_may_throw`.
+    Another advantage is that effects introduced by `@assume_effects` are propagated to
+    callers interprocedurally while a purity defined by `@pure` is not.
+"""
+macro assume_effects(args...)
+    (consistent, effect_free, nothrow, terminates_globally, terminates_locally) =
+        (false, false, false, false, false, false)
+    for setting in args[1:end-1]
+        if isa(setting, QuoteNode)
+            setting = setting.value
+        end
+        if setting === :consistent
+            consistent = true
+        elseif setting === :effect_free
+            effect_free = true
+        elseif setting === :nothrow
+            nothrow = true
+        elseif setting === :terminates_globally
+            terminates_globally = true
+        elseif setting === :terminates_locally
+            terminates_locally = true
+        elseif setting === :total
+            consistent = effect_free = nothrow = terminates_globally = true
+        elseif setting === :total_may_throw
+            consistent = effect_free = terminates_globally = true
+        else
+            throw(ArgumentError("@assume_effects $setting not supported"))
+        end
+    end
+    ex = args[end]
+    isa(ex, Expr) || throw(ArgumentError("Bad expression `$ex` in `@assume_effects [settings] ex`"))
+    if ex.head === :macrocall && ex.args[1] == Symbol("@ccall")
+        ex.args[1] = GlobalRef(Base, Symbol("@ccall_effects"))
+        insert!(ex.args, 3, Core.Compiler.encode_effects_override(Core.Compiler.EffectsOverride(
+            consistent, effect_free, nothrow, terminates_globally, terminates_locally
+        )))
+        return esc(ex)
+    end
+    return esc(pushmeta!(ex, :purity, consistent, effect_free, nothrow, terminates_globally, terminates_locally))
+end
+
 """
     @propagate_inbounds
 
@@ -266,6 +626,15 @@ end
 
 ## some macro utilities ##
 
+unwrap_macrocalls(@nospecialize(x)) = x
+function unwrap_macrocalls(ex::Expr)
+    inner = ex
+    while inner.head === :macrocall
+        inner = inner.args[end]::Expr
+    end
+    return inner
+end
+
 function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
     if isempty(args)
         tag = sym
@@ -273,10 +642,7 @@ function pushmeta!(ex::Expr, sym::Symbol, args::Any...)
         tag = Expr(sym, args...)::Expr
     end
 
-    inner = ex
-    while inner.head === :macrocall
-        inner = inner.args[end]::Expr
-    end
+    inner = unwrap_macrocalls(ex)
 
     idx, exargs = findmeta(inner)
     if idx != 0
@@ -326,8 +692,23 @@ function findmetaarg(metaargs, sym)
     return 0
 end
 
-function is_short_function_def(ex)
-    ex.head === :(=) || return false
+function annotate_meta_def_or_block(@nospecialize(ex), meta::Symbol)
+    inner = unwrap_macrocalls(ex)
+    if is_function_def(inner)
+        # annotation on a definition
+        return esc(pushmeta!(ex, meta))
+    else
+        # annotation on a block
+        return Expr(:block,
+                    Expr(meta, true),
+                    Expr(:local, Expr(:(=), :val, esc(ex))),
+                    Expr(meta, false),
+                    :val)
+    end
+end
+
+function is_short_function_def(@nospecialize(ex))
+    isexpr(ex, :(=)) || return false
     while length(ex.args) >= 1 && isa(ex.args[1], Expr)
         (ex.args[1].head === :call) && return true
         (ex.args[1].head === :where || ex.args[1].head === :(::)) || return false
@@ -335,9 +716,11 @@ function is_short_function_def(ex)
     end
     return false
 end
+is_function_def(@nospecialize(ex)) =
+    return isexpr(ex, :function) || is_short_function_def(ex) || isexpr(ex, :->)
 
 function findmeta(ex::Expr)
-    if ex.head === :function || is_short_function_def(ex) || ex.head === :->
+    if is_function_def(ex)
         body = ex.args[2]::Expr
         body.head === :block || error(body, " is not a block expression")
         return findmeta_block(ex.args)
@@ -391,7 +774,7 @@ end
 
 """
     @generated f
-    @generated(f)
+
 `@generated` is used to annotate a function which will be generated.
 In the body of the generated function, only types of arguments can be read
 (not the values). The function returns a quoted expression evaluated when the
@@ -401,7 +784,7 @@ the global scope or depending on mutable elements.
 See [Metaprogramming](@ref) for further details.
 
 ## Example:
-```julia
+```jldoctest
 julia> @generated function bar(x)
            if x <: Integer
                return :(x ^ 2)
@@ -422,12 +805,16 @@ macro generated(f)
     if isa(f, Expr) && (f.head === :function || is_short_function_def(f))
         body = f.args[2]
         lno = body.args[1]
+        tmp = gensym("tmp")
         return Expr(:escape,
                     Expr(f.head, f.args[1],
                          Expr(:block,
                               lno,
                               Expr(:if, Expr(:generated),
-                                   body,
+                                   # https://github.com/JuliaLang/julia/issues/25678
+                                   Expr(:block,
+                                        :(local $tmp = $body),
+                                        :(if $tmp isa $(GlobalRef(Core, :CodeInfo)); return $tmp; else $tmp; end)),
                                    Expr(:block,
                                         Expr(:meta, :generated_only),
                                         Expr(:return, nothing))))))
@@ -435,3 +822,234 @@ macro generated(f)
         error("invalid syntax; @generated must be used with a function definition")
     end
 end
+
+
+"""
+    @atomic var
+    @atomic order ex
+
+Mark `var` or `ex` as being performed atomically, if `ex` is a supported expression.
+
+    @atomic a.b.x = new
+    @atomic a.b.x += addend
+    @atomic :release a.b.x = new
+    @atomic :acquire_release a.b.x += addend
+
+Perform the store operation expressed on the right atomically and return the
+new value.
+
+With `=`, this operation translates to a `setproperty!(a.b, :x, new)` call.
+With any operator also, this operation translates to a `modifyproperty!(a.b,
+:x, +, addend)[2]` call.
+
+    @atomic a.b.x max arg2
+    @atomic a.b.x + arg2
+    @atomic max(a.b.x, arg2)
+    @atomic :acquire_release max(a.b.x, arg2)
+    @atomic :acquire_release a.b.x + arg2
+    @atomic :acquire_release a.b.x max arg2
+
+Perform the binary operation expressed on the right atomically. Store the
+result into the field in the first argument and return the values `(old, new)`.
+
+This operation translates to a `modifyproperty!(a.b, :x, func, arg2)` call.
+
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+```jldoctest
+julia> mutable struct Atomic{T}; @atomic x::T; end
+
+julia> a = Atomic(1)
+Atomic{Int64}(1)
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+1
+
+julia> @atomic :sequentially_consistent a.x = 2 # set field x of a, with sequential consistency
+2
+
+julia> @atomic a.x += 1 # increment field x of a, with sequential consistency
+3
+
+julia> @atomic a.x + 1 # increment field x of a, with sequential consistency
+3 => 4
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+4
+
+julia> @atomic max(a.x, 10) # change field x of a to the max value, with sequential consistency
+4 => 10
+
+julia> @atomic a.x max 5 # again change field x of a to the max value, with sequential consistency
+10 => 10
+```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
+"""
+macro atomic(ex)
+    if !isa(ex, Symbol) && !is_expr(ex, :(::))
+        return make_atomic(QuoteNode(:sequentially_consistent), ex)
+    end
+    return esc(Expr(:atomic, ex))
+end
+macro atomic(order, ex)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomic(order, ex)
+end
+macro atomic(a1, op, a2)
+    return make_atomic(QuoteNode(:sequentially_consistent), a1, op, a2)
+end
+macro atomic(order, a1, op, a2)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomic(order, a1, op, a2)
+end
+function make_atomic(order, ex)
+    @nospecialize
+    if ex isa Expr
+        if isexpr(ex, :., 2)
+            l, r = esc(ex.args[1]), esc(ex.args[2])
+            return :(getproperty($l, $r, $order))
+        elseif isexpr(ex, :call, 3)
+            return make_atomic(order, ex.args[2], ex.args[1], ex.args[3])
+        elseif ex.head === :(=)
+            l, r = ex.args[1], esc(ex.args[2])
+            if is_expr(l, :., 2)
+                ll, lr = esc(l.args[1]), esc(l.args[2])
+                return :(setproperty!($ll, $lr, $r, $order))
+            end
+        end
+        if length(ex.args) == 2
+            if ex.head === :(+=)
+                op = :+
+            elseif ex.head === :(-=)
+                op = :-
+            elseif @isdefined string
+                shead = string(ex.head)
+                if endswith(shead, '=')
+                    op = Symbol(shead[1:prevind(shead, end)])
+                end
+            end
+            if @isdefined(op)
+                return Expr(:ref, make_atomic(order, ex.args[1], op, ex.args[2]), 2)
+            end
+        end
+    end
+    error("could not parse @atomic expression $ex")
+end
+function make_atomic(order, a1, op, a2)
+    @nospecialize
+    is_expr(a1, :., 2) || error("@atomic modify expression missing field access")
+    a1l, a1r, op, a2 = esc(a1.args[1]), esc(a1.args[2]), esc(op), esc(a2)
+    return :(modifyproperty!($a1l, $a1r, $op, $a2, $order))
+end
+
+
+"""
+    @atomicswap a.b.x = new
+    @atomicswap :sequentially_consistent a.b.x = new
+
+Stores `new` into `a.b.x` and returns the old value of `a.b.x`.
+
+This operation translates to a `swapproperty!(a.b, :x, new)` call.
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+```jldoctest
+julia> mutable struct Atomic{T}; @atomic x::T; end
+
+julia> a = Atomic(1)
+Atomic{Int64}(1)
+
+julia> @atomicswap a.x = 2+2 # replace field x of a with 4, with sequential consistency
+1
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+4
+```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
+"""
+macro atomicswap(order, ex)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomicswap(order, ex)
+end
+macro atomicswap(ex)
+    return make_atomicswap(QuoteNode(:sequentially_consistent), ex)
+end
+function make_atomicswap(order, ex)
+    @nospecialize
+    is_expr(ex, :(=), 2) || error("@atomicswap expression missing assignment")
+    l, val = ex.args[1], esc(ex.args[2])
+    is_expr(l, :., 2) || error("@atomicswap expression missing field access")
+    ll, lr = esc(l.args[1]), esc(l.args[2])
+    return :(swapproperty!($ll, $lr, $val, $order))
+end
+
+
+"""
+    @atomicreplace a.b.x expected => desired
+    @atomicreplace :sequentially_consistent a.b.x expected => desired
+    @atomicreplace :sequentially_consistent :monotonic a.b.x expected => desired
+
+Perform the conditional replacement expressed by the pair atomically, returning
+the values `(old, success::Bool)`. Where `success` indicates whether the
+replacement was completed.
+
+This operation translates to a `replaceproperty!(a.b, :x, expected, desired)` call.
+
+See [Per-field atomics](@ref man-atomics) section in the manual for more details.
+
+```jldoctest
+julia> mutable struct Atomic{T}; @atomic x::T; end
+
+julia> a = Atomic(1)
+Atomic{Int64}(1)
+
+julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 1, success = true)
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+2
+
+julia> @atomicreplace a.x 1 => 2 # replace field x of a with 2 if it was 1, with sequential consistency
+(old = 2, success = false)
+
+julia> xchg = 2 => 0; # replace field x of a with 0 if it was 1, with sequential consistency
+
+julia> @atomicreplace a.x xchg
+(old = 2, success = true)
+
+julia> @atomic a.x # fetch field x of a, with sequential consistency
+0
+```
+
+!!! compat "Julia 1.7"
+    This functionality requires at least Julia 1.7.
+"""
+macro atomicreplace(success_order, fail_order, ex, old_new)
+    fail_order isa QuoteNode || (fail_order = esc(fail_order))
+    success_order isa QuoteNode || (success_order = esc(success_order))
+    return make_atomicreplace(success_order, fail_order, ex, old_new)
+end
+macro atomicreplace(order, ex, old_new)
+    order isa QuoteNode || (order = esc(order))
+    return make_atomicreplace(order, order, ex, old_new)
+end
+macro atomicreplace(ex, old_new)
+    return make_atomicreplace(QuoteNode(:sequentially_consistent), QuoteNode(:sequentially_consistent), ex, old_new)
+end
+function make_atomicreplace(success_order, fail_order, ex, old_new)
+    @nospecialize
+    is_expr(ex, :., 2) || error("@atomicreplace expression missing field access")
+    ll, lr = esc(ex.args[1]), esc(ex.args[2])
+    if is_expr(old_new, :call, 3) && old_new.args[1] === :(=>)
+        exp, rep = esc(old_new.args[2]), esc(old_new.args[3])
+        return :(replaceproperty!($ll, $lr, $exp, $rep, $success_order, $fail_order))
+    else
+        old_new = esc(old_new)
+        return :(replaceproperty!($ll, $lr, $old_new::Pair..., $success_order, $fail_order))
+    end
+end
diff --git a/base/fastmath.jl b/base/fastmath.jl
index 62223d5b88516c..05a5ce0503e689 100644
--- a/base/fastmath.jl
+++ b/base/fastmath.jl
@@ -158,7 +158,7 @@ end
 
 # Basic arithmetic
 
-const FloatTypes = Union{Float32,Float64}
+const FloatTypes = Union{Float16,Float32,Float64}
 
 sub_fast(x::FloatTypes) = neg_float_fast(x)
 
@@ -273,37 +273,18 @@ end
 
 
 # Math functions
+exp2_fast(x::Union{Float32,Float64})  = Base.Math.exp2_fast(x)
+exp_fast(x::Union{Float32,Float64})   = Base.Math.exp_fast(x)
+exp10_fast(x::Union{Float32,Float64}) = Base.Math.exp10_fast(x)
 
 # builtins
 
-pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32", llvmcall, Float32, (Float32, Int32), x, y)
-pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64", llvmcall, Float64, (Float64, Int32), x, y)
+pow_fast(x::Float32, y::Integer) = ccall("llvm.powi.f32.i32", llvmcall, Float32, (Float32, Int32), x, y)
+pow_fast(x::Float64, y::Integer) = ccall("llvm.powi.f64.i32", llvmcall, Float64, (Float64, Int32), x, y)
 pow_fast(x::FloatTypes, ::Val{p}) where {p} = pow_fast(x, p) # inlines already via llvm.powi
 @inline pow_fast(x, v::Val) = Base.literal_pow(^, x, v)
 
 sqrt_fast(x::FloatTypes) = sqrt_llvm_fast(x)
-
-# libm
-
-const libm = Base.libm_name
-
-for f in (:acosh, :asinh, :atanh, :cbrt,
-          :cosh, :exp2, :expm1, :log10, :log1p, :log2,
-          :log, :sinh, :tanh)
-    f_fast = fast_op[f]
-    @eval begin
-        $f_fast(x::Float32) =
-            ccall(($(string(f,"f")),libm), Float32, (Float32,), x)
-        $f_fast(x::Float64) =
-            ccall(($(string(f)),libm), Float64, (Float64,), x)
-    end
-end
-
-pow_fast(x::Float32, y::Float32) =
-    ccall(("powf",libm), Float32, (Float32,Float32), x, y)
-pow_fast(x::Float64, y::Float64) =
-    ccall(("pow",libm), Float64, (Float64,Float64), x, y)
-
 sincos_fast(v::FloatTypes) = sincos(v)
 
 @inline function sincos_fast(v::Float16)
diff --git a/base/file.jl b/base/file.jl
index a844f06d0cf546..371a56acf753aa 100644
--- a/base/file.jl
+++ b/base/file.jl
@@ -8,6 +8,8 @@ export
     chown,
     cp,
     cptree,
+    diskstat,
+    hardlink,
     mkdir,
     mkpath,
     mktemp,
@@ -34,6 +36,8 @@ export
 
 Get the current working directory.
 
+See also: [`cd`](@ref), [`tempdir`](@ref).
+
 # Examples
 ```julia-repl
 julia> pwd()
@@ -56,7 +60,7 @@ function pwd()
         elseif rc == Base.UV_ENOBUFS
             resize!(buf, sz[] - 1) # space for null-terminator implied by StringVector
         else
-            uv_error(:cwd, rc)
+            uv_error("pwd()", rc)
         end
     end
 end
@@ -67,6 +71,8 @@ end
 
 Set the current working directory.
 
+See also: [`pwd`](@ref), [`mkdir`](@ref), [`mkpath`](@ref), [`mktempdir`](@ref).
+
 # Examples
 ```julia-repl
 julia> cd("/home/JuliaUser/Projects/julia")
@@ -81,7 +87,9 @@ julia> pwd()
 ```
 """
 function cd(dir::AbstractString)
-    uv_error("chdir $dir", ccall(:uv_chdir, Cint, (Cstring,), dir))
+    err = ccall(:uv_chdir, Cint, (Cstring,), dir)
+    err < 0 && uv_error("cd($(repr(dir)))", err)
+    return nothing
 end
 cd() = cd(homedir())
 
@@ -173,10 +181,10 @@ function mkdir(path::AbstractString; mode::Integer = 0o777)
                     (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Cint, Ptr{Cvoid}),
                     C_NULL, req, path, checkmode(mode), C_NULL)
         if ret < 0
-            ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
-            uv_error("mkdir", ret)
+            uv_fs_req_cleanup(req)
+            uv_error("mkdir($(repr(path)); mode=0o$(string(mode,base=8)))", ret)
         end
-        ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
+        uv_fs_req_cleanup(req)
         return path
     finally
         Libc.free(req)
@@ -186,21 +194,20 @@ end
 """
     mkpath(path::AbstractString; mode::Unsigned = 0o777)
 
-Create all directories in the given `path`, with permissions `mode`. `mode` defaults to
-`0o777`, modified by the current file creation mask.
-Return `path`.
+Create all intermediate directories in the `path` as required. Directories are created with
+the permissions `mode` which defaults to `0o777` and is modified by the current file
+creation mask. Unlike [`mkdir`](@ref), `mkpath` does not error if `path` (or parts of it)
+already exists. However, an error will be thrown if `path` (or parts of it) points to an
+existing file. Return `path`.
+
+If `path` includes a filename you will probably want to use `mkpath(dirname(path))` to
+avoid creating a directory using the filename.
 
 # Examples
 ```julia-repl
-julia> mkdir("testingdir")
-"testingdir"
+julia> cd(mktempdir())
 
-julia> cd("testingdir")
-
-julia> pwd()
-"/home/JuliaUser/testingdir"
-
-julia> mkpath("my/test/dir")
+julia> mkpath("my/test/dir") # creates three directories
 "my/test/dir"
 
 julia> readdir()
@@ -216,6 +223,13 @@ julia> readdir()
 julia> readdir("test")
 1-element Array{String,1}:
  "dir"
+
+julia> mkpath("intermediate_dir/actually_a_directory.txt") # creates two directories
+"intermediate_dir/actually_a_directory.txt"
+
+julia> isdir("intermediate_dir/actually_a_directory.txt")
+true
+
 ```
 """
 function mkpath(path::AbstractString; mode::Integer = 0o777)
@@ -251,7 +265,7 @@ julia> rm("my", recursive=true)
 julia> rm("this_file_does_not_exist", force=true)
 
 julia> rm("this_file_does_not_exist")
-ERROR: IOError: unlink: no such file or directory (ENOENT)
+ERROR: IOError: unlink("this_file_does_not_exist"): no such file or directory (ENOENT)
 Stacktrace:
 [...]
 ```
@@ -261,7 +275,8 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
         try
             @static if Sys.iswindows()
                 # is writable on windows actually means "is deletable"
-                if (filemode(lstat(path)) & 0o222) == 0
+                st = lstat(path)
+                if ispath(st) && (filemode(st) & 0o222) == 0
                     chmod(path, 0o777)
                 end
             end
@@ -274,16 +289,25 @@ function rm(path::AbstractString; force::Bool=false, recursive::Bool=false)
         end
     else
         if recursive
-            for p in readdir(path)
-                rm(joinpath(path, p), force=force, recursive=true)
+            try
+                for p in readdir(path)
+                    rm(joinpath(path, p), force=force, recursive=true)
+                end
+            catch err
+                if !(force && isa(err, IOError) && err.code==Base.UV_EACCES)
+                    rethrow(err)
+                end
             end
         end
-        @static if Sys.iswindows()
-            ret = ccall(:_wrmdir, Int32, (Cwstring,), path)
-        else
-            ret = ccall(:rmdir, Int32, (Cstring,), path)
+        req = Libc.malloc(_sizeof_uv_fs)
+        try
+            ret = ccall(:uv_fs_rmdir, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}), C_NULL, req, path, C_NULL)
+            uv_fs_req_cleanup(req)
+            ret < 0 && uv_error("rm($(repr(path)))", ret)
+            nothing
+        finally
+            Libc.free(req)
         end
-        systemerror(:rmdir, ret != 0, extrainfo=path)
     end
 end
 
@@ -298,12 +322,12 @@ function checkfor_mv_cp_cptree(src::AbstractString, dst::AbstractString, txt::Ab
             if Base.samefile(src, dst)
                 abs_src = islink(src) ? abspath(readlink(src)) : abspath(src)
                 abs_dst = islink(dst) ? abspath(readlink(dst)) : abspath(dst)
-                throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir.",
+                throw(ArgumentError(string("'src' and 'dst' refer to the same file/dir. ",
                                            "This is not supported.\n  ",
                                            "`src` refers to: $(abs_src)\n  ",
                                            "`dst` refers to: $(abs_dst)\n")))
             end
-            rm(dst; recursive=true)
+            rm(dst; recursive=true, force=true)
         else
             throw(ArgumentError(string("'$dst' exists. `force=true` ",
                                        "is required to remove '$dst' before $(txt).")))
@@ -311,8 +335,8 @@ function checkfor_mv_cp_cptree(src::AbstractString, dst::AbstractString, txt::Ab
     end
 end
 
-function cptree(src::AbstractString, dst::AbstractString; force::Bool=false,
-                                                          follow_symlinks::Bool=false)
+function cptree(src::String, dst::String; force::Bool=false,
+                                          follow_symlinks::Bool=false)
     isdir(src) || throw(ArgumentError("'$src' is not a directory. Use `cp(src, dst)`"))
     checkfor_mv_cp_cptree(src, dst, "copying"; force=force)
     mkdir(dst)
@@ -328,6 +352,8 @@ function cptree(src::AbstractString, dst::AbstractString; force::Bool=false,
         end
     end
 end
+cptree(src::AbstractString, dst::AbstractString; kwargs...) =
+    cptree(String(src)::String, String(dst)::String; kwargs...)
 
 """
     cp(src::AbstractString, dst::AbstractString; force::Bool=false, follow_symlinks::Bool=false)
@@ -339,6 +365,13 @@ If `follow_symlinks=false`, and `src` is a symbolic link, `dst` will be created
 symbolic link. If `follow_symlinks=true` and `src` is a symbolic link, `dst` will be a copy
 of the file or directory `src` refers to.
 Return `dst`.
+
+!!! note
+    The `cp` function is different from the `cp` command. The `cp` function always operates on
+    the assumption that `dst` is a file, while the command does different things depending
+    on whether `dst` is a directory or a file.
+    Using `force=true` when `dst` is a directory will result in loss of all the contents present
+    in the `dst` directory, and `dst` will become a file that has the contents of `src` instead.
 """
 function cp(src::AbstractString, dst::AbstractString; force::Bool=false,
                                                       follow_symlinks::Bool=false)
@@ -396,6 +429,7 @@ end
 
 """
     touch(path::AbstractString)
+    touch(fd::File)
 
 Update the last-modified timestamp on a file to the current time.
 
@@ -421,19 +455,14 @@ We can see the [`mtime`](@ref) has been modified by `touch`.
 function touch(path::AbstractString)
     f = open(path, JL_O_WRONLY | JL_O_CREAT, 0o0666)
     try
-        if Sys.isunix()
-            ret = ccall(:futimes, Cint, (Cint, Ptr{Cvoid}), fd(f), C_NULL)
-            systemerror(:futimes, ret != 0, extrainfo=path)
-        else
-            t = time()
-            futime(f,t,t)
-        end
+        touch(f)
     finally
         close(f)
     end
     path
 end
 
+
 """
     tempdir()
 
@@ -453,7 +482,7 @@ function tempdir()
         elseif rc == Base.UV_ENOBUFS
             resize!(buf, sz[] - 1)  # space for null-terminator implied by StringVector
         else
-            uv_error(:tmpdir, rc)
+            uv_error("tempdir()", rc)
         end
     end
 end
@@ -475,7 +504,7 @@ function prepare_for_deletion(path::AbstractString)
 
     try chmod(path, filemode(path) | 0o333)
     catch; end
-    for (root, dirs, files) in walkdir(path)
+    for (root, dirs, files) in walkdir(path; onerror=x->())
         for dir in dirs
             dpath = joinpath(root, dir)
             try chmod(dpath, filemode(dpath) | 0o333)
@@ -522,15 +551,52 @@ end
 
 const temp_prefix = "jl_"
 
-if Sys.iswindows()
+# Use `Libc.rand()` to generate random strings
+function _rand_filename(len = 10)
+    slug = Base.StringVector(len)
+    chars = b"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    for i = 1:len
+        slug[i] = chars[(Libc.rand() % length(chars)) + 1]
+    end
+    return String(slug)
+end
+
+
+# Obtain a temporary filename.
+function tempname(parent::AbstractString=tempdir(); max_tries::Int = 100, cleanup::Bool=true)
+    isdir(parent) || throw(ArgumentError("$(repr(parent)) is not a directory"))
+
+    prefix = joinpath(parent, temp_prefix)
+    filename = nothing
+    for i in 1:max_tries
+        filename = string(prefix, _rand_filename())
+        if ispath(filename)
+            filename = nothing
+        else
+            break
+        end
+    end
 
-function _win_tempname(temppath::AbstractString, uunique::UInt32)
+    if filename === nothing
+        error("tempname: max_tries exhausted")
+    end
+
+    cleanup && temp_cleanup_later(filename)
+    return filename
+end
+
+if Sys.iswindows()
+# While this isn't a true analog of `mkstemp`, it _does_ create an
+# empty file for us, ensuring that other simultaneous calls to
+# `_win_mkstemp()` won't collide, so it's a better name for the
+# function than `tempname()`.
+function _win_mkstemp(temppath::AbstractString)
     tempp = cwstring(temppath)
     temppfx = cwstring(temp_prefix)
     tname = Vector{UInt16}(undef, 32767)
     uunique = ccall(:GetTempFileNameW, stdcall, UInt32,
                     (Ptr{UInt16}, Ptr{UInt16}, UInt32, Ptr{UInt16}),
-                    tempp, temppfx, uunique, tname)
+                    tempp, temppfx, UInt32(0), tname)
     windowserror("GetTempFileName", uunique == 0)
     lentname = something(findfirst(iszero, tname))
     @assert lentname > 0
@@ -539,49 +605,13 @@ function _win_tempname(temppath::AbstractString, uunique::UInt32)
 end
 
 function mktemp(parent::AbstractString=tempdir(); cleanup::Bool=true)
-    filename = _win_tempname(parent, UInt32(0))
+    filename = _win_mkstemp(parent)
     cleanup && temp_cleanup_later(filename)
     return (filename, Base.open(filename, "r+"))
 end
 
-# generate a random string from random bytes
-function _rand_string()
-    nchars = 10
-    A = Vector{UInt8}(undef, nchars)
-    windowserror("SystemFunction036 (RtlGenRandom)", 0 == ccall(
-        (:SystemFunction036, :Advapi32), stdcall, UInt8, (Ptr{Cvoid}, UInt32),
-            A, sizeof(A)))
-
-    slug = Base.StringVector(10)
-    chars = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
-    for i = 1:nchars
-        slug[i] = chars[(A[i] % length(chars)) + 1]
-    end
-    return name = String(slug)
-end
-
-function tempname(parent::AbstractString=tempdir(); cleanup::Bool=true)
-    isdir(parent) || throw(ArgumentError("$(repr(parent)) is not a directory"))
-    name = _rand_string()
-    filename = joinpath(parent, temp_prefix * name)
-    @assert !ispath(filename)
-    cleanup && temp_cleanup_later(filename)
-    return filename
-end
-
 else # !windows
 
-# Obtain a temporary filename.
-function tempname(parent::AbstractString=tempdir(); cleanup::Bool=true)
-    isdir(parent) || throw(ArgumentError("$(repr(parent)) is not a directory"))
-    p = ccall(:tempnam, Cstring, (Cstring, Cstring), parent, temp_prefix)
-    systemerror(:tempnam, p == C_NULL)
-    s = unsafe_string(p)
-    Libc.free(p)
-    cleanup && temp_cleanup_later(s)
-    return s
-end
-
 # Create and return the name of a temporary file along with an IOStream
 function mktemp(parent::AbstractString=tempdir(); cleanup::Bool=true)
     b = joinpath(parent, temp_prefix * "XXXXXX")
@@ -591,7 +621,6 @@ function mktemp(parent::AbstractString=tempdir(); cleanup::Bool=true)
     return (b, fdio(p, true))
 end
 
-
 end # os-test
 
 
@@ -634,6 +663,11 @@ tempname()
 Return `(path, io)`, where `path` is the path of a new temporary file in `parent`
 and `io` is an open file object for this path. The `cleanup` option controls whether
 the temporary file is automatically deleted when the process exits.
+
+!!! compat "Julia 1.3"
+    The `cleanup` keyword argument was added in Julia 1.3. Relatedly, starting from 1.3,
+    Julia will remove the temporary paths created by `mktemp` when the Julia process exits,
+    unless `cleanup` is explicitly set to `false`.
 """
 mktemp(parent)
 
@@ -645,6 +679,16 @@ constructed from the given prefix and a random suffix, and return its path.
 Additionally, any trailing `X` characters may be replaced with random characters.
 If `parent` does not exist, throw an error. The `cleanup` option controls whether
 the temporary directory is automatically deleted when the process exits.
+
+!!! compat "Julia 1.2"
+    The `prefix` keyword argument was added in Julia 1.2.
+
+!!! compat "Julia 1.3"
+    The `cleanup` keyword argument was added in Julia 1.3. Relatedly, starting from 1.3,
+    Julia will remove the temporary paths created by `mktempdir` when the Julia process
+    exits, unless `cleanup` is explicitly set to `false`.
+
+See also: [`mktemp`](@ref), [`mkdir`](@ref).
 """
 function mktempdir(parent::AbstractString=tempdir();
     prefix::AbstractString=temp_prefix, cleanup::Bool=true)
@@ -661,11 +705,11 @@ function mktempdir(parent::AbstractString=tempdir();
                     (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
                     C_NULL, req, tpath, C_NULL)
         if ret < 0
-            ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
-            uv_error("mktempdir", ret)
+            uv_fs_req_cleanup(req)
+            uv_error("mktempdir($(repr(parent)))", ret)
         end
         path = unsafe_string(ccall(:jl_uv_fs_t_path, Cstring, (Ptr{Cvoid},), req))
-        ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
+        uv_fs_req_cleanup(req)
         cleanup && temp_cleanup_later(path)
         return path
     finally
@@ -679,6 +723,8 @@ end
 
 Apply the function `f` to the result of [`mktemp(parent)`](@ref) and remove the
 temporary file upon completion.
+
+See also: [`mktempdir`](@ref).
 """
 function mktemp(fn::Function, parent::AbstractString=tempdir())
     (tmp_path, tmp_io) = mktemp(parent, cleanup=false)
@@ -701,6 +747,11 @@ end
 
 Apply the function `f` to the result of [`mktempdir(parent; prefix)`](@ref) and remove the
 temporary directory all of its contents upon completion.
+
+See also: [`mktemp`](@ref), [`mkdir`](@ref).
+
+!!! compat "Julia 1.2"
+    The `prefix` keyword argument was added in Julia 1.2.
 """
 function mktempdir(fn::Function, parent::AbstractString=tempdir();
     prefix::AbstractString=temp_prefix)
@@ -802,28 +853,31 @@ julia> readdir(abspath("base"), join=true)
 """
 function readdir(dir::AbstractString; join::Bool=false, sort::Bool=true)
     # Allocate space for uv_fs_t struct
-    uv_readdir_req = zeros(UInt8, ccall(:jl_sizeof_uv_fs_t, Int32, ()))
-
-    # defined in sys.c, to call uv_fs_readdir, which sets errno on error.
-    err = ccall(:uv_fs_scandir, Int32, (Ptr{Cvoid}, Ptr{UInt8}, Cstring, Cint, Ptr{Cvoid}),
-                C_NULL, uv_readdir_req, dir, 0, C_NULL)
-    err < 0 && throw(_UVError("readdir", err, "with ", repr(dir)))
-
-    # iterate the listing into entries
-    entries = String[]
-    ent = Ref{uv_dirent_t}()
-    while Base.UV_EOF != ccall(:uv_fs_scandir_next, Cint, (Ptr{Cvoid}, Ptr{uv_dirent_t}), uv_readdir_req, ent)
-        name = unsafe_string(ent[].name)
-        push!(entries, join ? joinpath(dir, name) : name)
-    end
+    req = Libc.malloc(_sizeof_uv_fs)
+    try
+        # defined in sys.c, to call uv_fs_readdir, which sets errno on error.
+        err = ccall(:uv_fs_scandir, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Cint, Ptr{Cvoid}),
+                    C_NULL, req, dir, 0, C_NULL)
+        err < 0 && uv_error("readdir($(repr(dir)))", err)
+
+        # iterate the listing into entries
+        entries = String[]
+        ent = Ref{uv_dirent_t}()
+        while Base.UV_EOF != ccall(:uv_fs_scandir_next, Cint, (Ptr{Cvoid}, Ptr{uv_dirent_t}), req, ent)
+            name = unsafe_string(ent[].name)
+            push!(entries, join ? joinpath(dir, name) : name)
+        end
 
-    # Clean up the request string
-    ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{UInt8},), uv_readdir_req)
+        # Clean up the request string
+        uv_fs_req_cleanup(req)
 
-    # sort entries unless opted out
-    sort && sort!(entries)
+        # sort entries unless opted out
+        sort && sort!(entries)
 
-    return entries
+        return entries
+    finally
+        Libc.free(req)
+    end
 end
 readdir(; join::Bool=false, sort::Bool=true) =
     readdir(join ? pwd() : ".", join=join, sort=sort)
@@ -911,7 +965,7 @@ end
 
 function unlink(p::AbstractString)
     err = ccall(:jl_fs_unlink, Int32, (Cstring,), p)
-    uv_error("unlink", err)
+    err < 0 && uv_error("unlink($(repr(p)))", err)
     nothing
 end
 
@@ -949,19 +1003,66 @@ function sendfile(src::AbstractString, dst::AbstractString)
 end
 
 if Sys.iswindows()
+    const UV_FS_SYMLINK_DIR      = 0x0001
     const UV_FS_SYMLINK_JUNCTION = 0x0002
+    const UV__EPERM              = -4048
+end
+
+"""
+    hardlink(src::AbstractString, dst::AbstractString)
+
+Creates a hard link to an existing source file `src` with the name `dst`. The
+destination, `dst`, must not exist.
+
+See also: [`symlink`](@ref).
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+"""
+function hardlink(src::AbstractString, dst::AbstractString)
+    err = ccall(:jl_fs_hardlink, Int32, (Cstring, Cstring), src, dst)
+    if err < 0
+        msg = "hardlink($(repr(src)), $(repr(dst)))"
+        uv_error(msg, err)
+    end
+    return nothing
 end
 
 """
-    symlink(target::AbstractString, link::AbstractString)
+    symlink(target::AbstractString, link::AbstractString; dir_target = false)
 
 Creates a symbolic link to `target` with the name `link`.
 
+On Windows, symlinks must be explicitly declared as referring to a directory
+or not.  If `target` already exists, by default the type of `link` will be auto-
+detected, however if `target` does not exist, this function defaults to creating
+a file symlink unless `dir_target` is set to `true`.  Note that if the user
+sets `dir_target` but `target` exists and is a file, a directory symlink will
+still be created, but dereferencing the symlink will fail, just as if the user
+creates a file symlink (by calling `symlink()` with `dir_target` set to `false`
+before the directory is created) and tries to dereference it to a directory.
+
+Additionally, there are two methods of making a link on Windows; symbolic links
+and junction points.  Junction points are slightly more efficient, but do not
+support relative paths, so if a relative directory symlink is requested (as
+denoted by `isabspath(target)` returning `false`) a symlink will be used, else
+a junction point will be used.  Best practice for creating symlinks on Windows
+is to create them only after the files/directories they reference are already
+created.
+
+See also: [`hardlink`](@ref).
+
 !!! note
     This function raises an error under operating systems that do not support
     soft symbolic links, such as Windows XP.
+
+!!! compat "Julia 1.6"
+    The `dir_target` keyword argument was added in Julia 1.6.  Prior to this,
+    symlinks to nonexistant paths on windows would always be file symlinks, and
+    relative symlinks to directories were not supported.
 """
-function symlink(p::AbstractString, np::AbstractString)
+function symlink(target::AbstractString, link::AbstractString;
+                 dir_target::Bool = false)
     @static if Sys.iswindows()
         if Sys.windows_version() < Sys.WINDOWS_VISTA_VER
             error("Windows XP does not support soft symlinks")
@@ -969,18 +1070,38 @@ function symlink(p::AbstractString, np::AbstractString)
     end
     flags = 0
     @static if Sys.iswindows()
-        if isdir(p)
-            flags |= UV_FS_SYMLINK_JUNCTION
-            p = abspath(p)
+        # If we're going to create a directory link, we need to know beforehand.
+        # First, if `target` is not an absolute path, let's immediately resolve
+        # it so that we can peek and see if it's a directory.
+        resolved_target = target
+        if !isabspath(target)
+            resolved_target = joinpath(dirname(link), target)
+        end
+
+        # If it is a directory (or `dir_target` is set), we'll need to add one
+        # of `UV_FS_SYMLINK_{DIR,JUNCTION}` to the flags, depending on whether
+        # `target` is an absolute path or not.
+        if (ispath(resolved_target) && isdir(resolved_target)) || dir_target
+            if isabspath(target)
+                flags |= UV_FS_SYMLINK_JUNCTION
+            else
+                flags |= UV_FS_SYMLINK_DIR
+            end
         end
     end
-    err = ccall(:jl_fs_symlink, Int32, (Cstring, Cstring, Cint), p, np, flags)
-    @static if Sys.iswindows()
-        if err < 0 && !isdir(p)
-            @warn "On Windows, creating file symlinks requires Administrator privileges" maxlog=1 _group=:file
+    err = ccall(:jl_fs_symlink, Int32, (Cstring, Cstring, Cint), target, link, flags)
+    if err < 0
+        msg = "symlink($(repr(target)), $(repr(link)))"
+        @static if Sys.iswindows()
+            # creating file/directory symlinks requires Administrator privileges
+            # while junction points apparently do not
+            if flags & UV_FS_SYMLINK_JUNCTION == 0 && err == UV__EPERM
+                msg = "On Windows, creating symlinks requires Administrator privileges.\n$msg"
+            end
         end
+        uv_error(msg, err)
     end
-    uv_error("symlink",err)
+    return nothing
 end
 
 """
@@ -995,12 +1116,12 @@ function readlink(path::AbstractString)
             (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
             C_NULL, req, path, C_NULL)
         if ret < 0
-            ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
-            uv_error("readlink", ret)
+            uv_fs_req_cleanup(req)
+            uv_error("readlink($(repr(path)))", ret)
             @assert false
         end
         tgt = unsafe_string(ccall(:jl_uv_fs_t_ptr, Cstring, (Ptr{Cvoid},), req))
-        ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
+        uv_fs_req_cleanup(req)
         return tgt
     finally
         Libc.free(req)
@@ -1022,7 +1143,7 @@ Return `path`.
 """
 function chmod(path::AbstractString, mode::Integer; recursive::Bool=false)
     err = ccall(:jl_fs_chmod, Int32, (Cstring, Cint), path, mode)
-    uv_error("chmod", err)
+    err < 0 && uv_error("chmod($(repr(path)), 0o$(string(mode, base=8)))", err)
     if recursive && isdir(path)
         for p in readdir(path)
             if !islink(joinpath(path, p))
@@ -1042,6 +1163,60 @@ Return `path`.
 """
 function chown(path::AbstractString, owner::Integer, group::Integer=-1)
     err = ccall(:jl_fs_chown, Int32, (Cstring, Cint, Cint), path, owner, group)
-    uv_error("chown",err)
+    err < 0 && uv_error("chown($(repr(path)), $owner, $group)", err)
     path
 end
+
+
+# - http://docs.libuv.org/en/v1.x/fs.html#c.uv_fs_statfs (libuv function docs)
+# - http://docs.libuv.org/en/v1.x/fs.html#c.uv_statfs_t (libuv docs of the returned struct)
+"""
+    DiskStat
+
+Stores information about the disk in bytes. Populate by calling `diskstat`.
+"""
+struct DiskStat
+    ftype::UInt64
+    bsize::UInt64
+    blocks::UInt64
+    bfree::UInt64
+    bavail::UInt64
+    files::UInt64
+    ffree::UInt64
+    fspare::NTuple{4, UInt64} # reserved
+end
+
+function Base.getproperty(stats::DiskStat, field::Symbol)
+    total = Int64(getfield(stats, :bsize) * getfield(stats, :blocks))
+    available = Int64(getfield(stats, :bsize) * getfield(stats, :bavail))
+    field === :total && return total
+    field === :available && return available
+    field === :used && return total - available
+    return getfield(stats, field)
+end
+
+@eval Base.propertynames(stats::DiskStat) =
+    $((fieldnames(DiskStat)[1:end-1]..., :available, :total, :used))
+
+Base.show(io::IO, x::DiskStat) =
+    print(io, "DiskStat(total=$(x.total), used=$(x.used), available=$(x.available))")
+
+"""
+    diskstat(path=pwd())
+
+Returns statistics in bytes about the disk that contains the file or directory pointed at by
+`path`. If no argument is passed, statistics about the disk that contains the current
+working directory are returned.
+
+!!! compat "Julia 1.8"
+    This method was added in Julia 1.8.
+"""
+function diskstat(path::AbstractString=pwd())
+    req = zeros(UInt8, _sizeof_uv_fs)
+    err = ccall(:uv_fs_statfs, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
+                C_NULL, req, path, C_NULL)
+    err < 0 && uv_error("diskstat($(repr(path)))", err)
+    statfs_ptr = ccall(:jl_uv_fs_t_ptr, Ptr{Nothing}, (Ptr{Cvoid},), req)
+
+    return unsafe_load(reinterpret(Ptr{DiskStat}, statfs_ptr))
+end
diff --git a/base/filesystem.jl b/base/filesystem.jl
index fcc3b3427a7e0c..f338f8733523f4 100644
--- a/base/filesystem.jl
+++ b/base/filesystem.jl
@@ -4,18 +4,32 @@
 
 module Filesystem
 
-const S_IRUSR = 0o400
-const S_IWUSR = 0o200
-const S_IXUSR = 0o100
-const S_IRWXU = 0o700
-const S_IRGRP = 0o040
-const S_IWGRP = 0o020
-const S_IXGRP = 0o010
-const S_IRWXG = 0o070
-const S_IROTH = 0o004
-const S_IWOTH = 0o002
-const S_IXOTH = 0o001
-const S_IRWXO = 0o007
+const S_IFDIR  = 0o040000  # directory
+const S_IFCHR  = 0o020000  # character device
+const S_IFBLK  = 0o060000  # block device
+const S_IFREG  = 0o100000  # regular file
+const S_IFIFO  = 0o010000  # fifo (named pipe)
+const S_IFLNK  = 0o120000  # symbolic link
+const S_IFSOCK = 0o140000  # socket file
+const S_IFMT   = 0o170000
+
+const S_ISUID = 0o4000  # set UID bit
+const S_ISGID = 0o2000  # set GID bit
+const S_ENFMT = S_ISGID # file locking enforcement
+const S_ISVTX = 0o1000  # sticky bit
+
+const S_IRUSR = 0o0400  # read by owner
+const S_IWUSR = 0o0200  # write by owner
+const S_IXUSR = 0o0100  # execute by owner
+const S_IRWXU = 0o0700  # mask for owner permissions
+const S_IRGRP = 0o0040  # read by group
+const S_IWGRP = 0o0020  # write by group
+const S_IXGRP = 0o0010  # execute by group
+const S_IRWXG = 0o0070  # mask for group permissions
+const S_IROTH = 0o0004  # read by other
+const S_IWOTH = 0o0002  # write by other
+const S_IXOTH = 0o0001  # execute by other
+const S_IRWXO = 0o0007  # mask for other permissions
 
 export File,
        StatStruct,
@@ -42,7 +56,7 @@ import .Base:
     IOError, _UVError, _sizeof_uv_fs, check_open, close, eof, eventloop, fd, isopen,
     bytesavailable, position, read, read!, readavailable, seek, seekend, show,
     skip, stat, unsafe_read, unsafe_write, write, transcode, uv_error,
-    rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
+    setup_stdio, rawhandle, OS_HANDLE, INVALID_OS_HANDLE, windowserror, filesize
 
 import .Base.RefValue
 
@@ -54,6 +68,9 @@ end
 # On Windows we use the MAX_PATH = 260 value on Win32.
 const AVG_PATH = Sys.iswindows() ? 260 : 512
 
+# helper function to clean up libuv request
+uv_fs_req_cleanup(req) = ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
+
 include("path.jl")
 include("stat.jl")
 include("file.jl")
@@ -73,6 +90,7 @@ if OS_HANDLE !== RawFD
 end
 
 rawhandle(file::File) = file.handle
+setup_stdio(file::File, ::Bool) = (file, false)
 
 # Filesystem.open, not Base.open
 function open(path::AbstractString, flags::Integer, mode::Integer=0)
@@ -83,8 +101,8 @@ function open(path::AbstractString, flags::Integer, mode::Integer=0)
                     (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32, Int32, Ptr{Cvoid}),
                     C_NULL, req, path, flags, mode, C_NULL)
         handle = ccall(:uv_fs_get_result, Cssize_t, (Ptr{Cvoid},), req)
-        ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
-        uv_error("open", ret)
+        uv_fs_req_cleanup(req)
+        ret < 0 && uv_error("open($(repr(path)), $flags, $mode)", ret)
     finally # conversion to Cstring could cause an exception
         Libc.free(req)
     end
@@ -245,4 +263,16 @@ end
 fd(f::File) = f.handle
 stat(f::File) = stat(f.handle)
 
+function touch(f::File)
+    @static if Sys.isunix()
+        ret = ccall(:futimes, Cint, (Cint, Ptr{Cvoid}), fd(f), C_NULL)
+        systemerror(:futimes, ret != 0)
+    else
+        t = time()
+        futime(f, t, t)
+    end
+    f
+end
+
+
 end
diff --git a/base/float.jl b/base/float.jl
index 0857d172eaff0a..60850b7e02f64a 100644
--- a/base/float.jl
+++ b/base/float.jl
@@ -36,6 +36,20 @@ const Inf = Inf64
     Inf, Inf64
 
 Positive infinity of type [`Float64`](@ref).
+
+See also: [`isfinite`](@ref), [`typemax`](@ref), [`NaN`](@ref), [`Inf32`](@ref).
+
+# Examples
+```jldoctest
+julia> π/0
+Inf
+
+julia> +1.0 / -0.0
+-Inf
+
+julia> ℯ^-Inf
+0.0
+```
 """
 Inf, Inf64
 
@@ -44,17 +58,89 @@ const NaN = NaN64
     NaN, NaN64
 
 A not-a-number value of type [`Float64`](@ref).
+
+See also: [`isnan`](@ref), [`missing`](@ref), [`NaN32`](@ref), [`Inf`](@ref).
+
+# Examples
+```jldoctest
+julia> 0/0
+NaN
+
+julia> Inf - Inf
+NaN
+
+julia> NaN == NaN, isequal(NaN, NaN), NaN === NaN
+(false, true, true)
+```
 """
 NaN, NaN64
 
+# bit patterns
+reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x)
+reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x)
+reinterpret(::Type{Unsigned}, x::Float16) = reinterpret(UInt16, x)
+reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x)
+reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x)
+reinterpret(::Type{Signed}, x::Float16) = reinterpret(Int16, x)
+
+sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
+exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
+exponent_one(::Type{Float64}) =     0x3ff0_0000_0000_0000
+exponent_half(::Type{Float64}) =    0x3fe0_0000_0000_0000
+significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff
+
+sign_mask(::Type{Float32}) =        0x8000_0000
+exponent_mask(::Type{Float32}) =    0x7f80_0000
+exponent_one(::Type{Float32}) =     0x3f80_0000
+exponent_half(::Type{Float32}) =    0x3f00_0000
+significand_mask(::Type{Float32}) = 0x007f_ffff
+
+sign_mask(::Type{Float16}) =        0x8000
+exponent_mask(::Type{Float16}) =    0x7c00
+exponent_one(::Type{Float16}) =     0x3c00
+exponent_half(::Type{Float16}) =    0x3800
+significand_mask(::Type{Float16}) = 0x03ff
+
+for T in (Float16, Float32, Float64)
+    @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
+    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
+    @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T)))
+    # maximum float exponent
+    @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T) - 1)
+    # maximum float exponent without bias
+    @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)))
+end
+
+"""
+    exponent_max(T)
+
+Maximum [`exponent`](@ref) value for a floating point number of type `T`.
+
+# Examples
+```jldoctest
+julia> Base.exponent_max(Float64)
+1023
+```
+
+Note, `exponent_max(T) + 1` is a possible value of the exponent field
+with bias, which might be used as sentinel value for `Inf` or `NaN`.
+"""
+function exponent_max end
+
+"""
+    exponent_raw_max(T)
+
+Maximum value of the [`exponent`](@ref) field for a floating point number of type `T` without bias,
+i.e. the maximum integer value representable by [`exponent_bits(T)`](@ref) bits.
+"""
+function exponent_raw_max end
+
 ## conversions to floating-point ##
+
+# TODO: deprecate in 2.0
 Float16(x::Integer) = convert(Float16, convert(Float32, x)::Float32)
-for t in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
-    @eval promote_rule(::Type{Float16}, ::Type{$t}) = Float16
-end
-promote_rule(::Type{Float16}, ::Type{Bool}) = Float16
 
-for t1 in (Float32, Float64)
+for t1 in (Float16, Float32, Float64)
     for st in (Int8, Int16, Int32, Int64)
         @eval begin
             (::Type{$t1})(x::($st)) = sitofp($t1, x)
@@ -68,7 +154,6 @@ for t1 in (Float32, Float64)
         end
     end
 end
-(::Type{T})(x::Float16) where {T<:Integer} = T(Float32(x))
 
 Bool(x::Real) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool, x))
 
@@ -76,6 +161,8 @@ promote_rule(::Type{Float64}, ::Type{UInt128}) = Float64
 promote_rule(::Type{Float64}, ::Type{Int128}) = Float64
 promote_rule(::Type{Float32}, ::Type{UInt128}) = Float32
 promote_rule(::Type{Float32}, ::Type{Int128}) = Float32
+promote_rule(::Type{Float16}, ::Type{UInt128}) = Float16
+promote_rule(::Type{Float16}, ::Type{Int128}) = Float16
 
 function Float64(x::UInt128)
     x == 0 && return 0.0
@@ -137,123 +224,17 @@ function Float32(x::Int128)
     reinterpret(Float32, s | d + y)
 end
 
-# Float32 -> Float16 algorithm from:
-#   "Fast Half Float Conversion" by Jeroen van der Zijp
-#   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
-#
-# With adjustments for round-to-nearest, ties to even.
-#
-let _basetable = Vector{UInt16}(undef, 512),
-    _shifttable = Vector{UInt8}(undef, 512)
-    for i = 0:255
-        e = i - 127
-        if e < -25  # Very small numbers map to zero
-            _basetable[i|0x000+1] = 0x0000
-            _basetable[i|0x100+1] = 0x8000
-            _shifttable[i|0x000+1] = 25
-            _shifttable[i|0x100+1] = 25
-        elseif e < -14  # Small numbers map to denorms
-            _basetable[i|0x000+1] = 0x0000
-            _basetable[i|0x100+1] = 0x8000
-            _shifttable[i|0x000+1] = -e-1
-            _shifttable[i|0x100+1] = -e-1
-        elseif e <= 15  # Normal numbers just lose precision
-            _basetable[i|0x000+1] = ((e+15)<<10)
-            _basetable[i|0x100+1] = ((e+15)<<10) | 0x8000
-            _shifttable[i|0x000+1] = 13
-            _shifttable[i|0x100+1] = 13
-        elseif e < 128  # Large numbers map to Infinity
-            _basetable[i|0x000+1] = 0x7C00
-            _basetable[i|0x100+1] = 0xFC00
-            _shifttable[i|0x000+1] = 24
-            _shifttable[i|0x100+1] = 24
-        else  # Infinity and NaN's stay Infinity and NaN's
-            _basetable[i|0x000+1] = 0x7C00
-            _basetable[i|0x100+1] = 0xFC00
-            _shifttable[i|0x000+1] = 13
-            _shifttable[i|0x100+1] = 13
-        end
-    end
-    global const shifttable = (_shifttable...,)
-    global const basetable = (_basetable...,)
-end
+# TODO: optimize
+Float16(x::UInt128) = convert(Float16, Float32(x))
+Float16(x::Int128)  = convert(Float16, Float32(x))
 
-function Float16(val::Float32)
-    f = reinterpret(UInt32, val)
-    if isnan(val)
-        t = 0x8000 ⊻ (0x8000 & ((f >> 0x10) % UInt16))
-        return reinterpret(Float16, t ⊻ ((f >> 0xd) % UInt16))
-    end
-    i = ((f & ~significand_mask(Float32)) >> significand_bits(Float32)) + 1
-    @inbounds sh = shifttable[i]
-    f &= significand_mask(Float32)
-    # If `val` is subnormal, the tables are set up to force the
-    # result to 0, so the significand has an implicit `1` in the
-    # cases we care about.
-    f |= significand_mask(Float32) + 0x1
-    @inbounds h = (basetable[i] + (f >> sh) & significand_mask(Float16)) % UInt16
-    # round
-    # NOTE: we maybe should ignore NaNs here, but the payload is
-    # getting truncated anyway so "rounding" it might not matter
-    nextbit = (f >> (sh-1)) & 1
-    if nextbit != 0 && (h & 0x7C00) != 0x7C00
-        # Round halfway to even or check lower bits
-        if h&1 == 1 || (f & ((1<<(sh-1))-1)) != 0
-            h += UInt16(1)
-        end
-    end
-    reinterpret(Float16, h)
-end
-
-function Float32(val::Float16)
-    local ival::UInt32 = reinterpret(UInt16, val)
-    local sign::UInt32 = (ival & 0x8000) >> 15
-    local exp::UInt32  = (ival & 0x7c00) >> 10
-    local sig::UInt32  = (ival & 0x3ff) >> 0
-    local ret::UInt32
-
-    if exp == 0
-        if sig == 0
-            sign = sign << 31
-            ret = sign | exp | sig
-        else
-            n_bit = 1
-            bit = 0x0200
-            while (bit & sig) == 0
-                n_bit = n_bit + 1
-                bit = bit >> 1
-            end
-            sign = sign << 31
-            exp = ((-14 - n_bit + 127) << 23) % UInt32
-            sig = ((sig & (~bit)) << n_bit) << (23 - 10)
-            ret = sign | exp | sig
-        end
-    elseif exp == 0x1f
-        if sig == 0  # Inf
-            if sign == 0
-                ret = 0x7f800000
-            else
-                ret = 0xff800000
-            end
-        else  # NaN
-            ret = 0x7fc00000 | (sign<<31) | (sig<<(23-10))
-        end
-    else
-        sign = sign << 31
-        exp  = ((exp - 15 + 127) << 23) % UInt32
-        sig  = sig << (23 - 10)
-        ret = sign | exp | sig
-    end
-    return reinterpret(Float32, ret)
-end
-
-#convert(::Type{Float16}, x::Float32) = fptrunc(Float16, x)
+Float16(x::Float32) = fptrunc(Float16, x)
+Float16(x::Float64) = fptrunc(Float16, x)
 Float32(x::Float64) = fptrunc(Float32, x)
-Float16(x::Float64) = Float16(Float32(x))
 
-#convert(::Type{Float32}, x::Float16) = fpext(Float32, x)
+Float32(x::Float16) = fpext(Float32, x)
 Float64(x::Float32) = fpext(Float64, x)
-Float64(x::Float16) = Float64(Float32(x))
+Float64(x::Float16) = fpext(Float64, x)
 
 AbstractFloat(x::Bool)    = Float64(x)
 AbstractFloat(x::Int8)    = Float64(x)
@@ -273,6 +254,17 @@ Bool(x::Float16) = x==0 ? false : x==1 ? true : throw(InexactError(:Bool, Bool,
     float(x)
 
 Convert a number or array to a floating point data type.
+
+See also: [`complex`](@ref), [`oftype`](@ref), [`convert`](@ref).
+
+# Examples
+```jldoctest
+julia> float(1:1000)
+1.0:1.0:1000.0
+
+julia> float(typemax(Int32))
+2.147483647e9
+```
 """
 float(x) = AbstractFloat(x)
 
@@ -298,23 +290,29 @@ float(::Type{T}) where {T<:AbstractFloat} = T
     unsafe_trunc(T, x)
 
 Return the nearest integral value of type `T` whose absolute value is
-less than or equal to `x`. If the value is not representable by `T`, an arbitrary value will
-be returned.
+less than or equal to the absolute value of `x`. If the value is not representable by `T`,
+an arbitrary value will be returned.
+See also [`trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> unsafe_trunc(Int, -2.2)
+-2
+
+julia> unsafe_trunc(Int, NaN)
+-9223372036854775808
+```
 """
 function unsafe_trunc end
 
 for Ti in (Int8, Int16, Int32, Int64)
     @eval begin
-        unsafe_trunc(::Type{$Ti}, x::Float16) = unsafe_trunc($Ti, Float32(x))
-        unsafe_trunc(::Type{$Ti}, x::Float32) = fptosi($Ti, x)
-        unsafe_trunc(::Type{$Ti}, x::Float64) = fptosi($Ti, x)
+        unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptosi($Ti, x)
     end
 end
 for Ti in (UInt8, UInt16, UInt32, UInt64)
     @eval begin
-        unsafe_trunc(::Type{$Ti}, x::Float16) = unsafe_trunc($Ti, Float32(x))
-        unsafe_trunc(::Type{$Ti}, x::Float32) = fptoui($Ti, x)
-        unsafe_trunc(::Type{$Ti}, x::Float64) = fptoui($Ti, x)
+        unsafe_trunc(::Type{$Ti}, x::IEEEFloat) = fptoui($Ti, x)
     end
 end
 
@@ -351,35 +349,25 @@ unsafe_trunc(::Type{Int128}, x::Float16) = unsafe_trunc(Int128, Float32(x))
 
 # matches convert methods
 # also determines floor, ceil, round
-trunc(::Type{Signed}, x::Float32) = trunc(Int,x)
-trunc(::Type{Signed}, x::Float64) = trunc(Int,x)
-trunc(::Type{Unsigned}, x::Float32) = trunc(UInt,x)
-trunc(::Type{Unsigned}, x::Float64) = trunc(UInt,x)
-trunc(::Type{Integer}, x::Float32) = trunc(Int,x)
-trunc(::Type{Integer}, x::Float64) = trunc(Int,x)
-trunc(::Type{T}, x::Float16) where {T<:Integer} = trunc(T, Float32(x))
+trunc(::Type{Signed}, x::IEEEFloat) = trunc(Int,x)
+trunc(::Type{Unsigned}, x::IEEEFloat) = trunc(UInt,x)
+trunc(::Type{Integer}, x::IEEEFloat) = trunc(Int,x)
 
 # fallbacks
 floor(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundDown))
-floor(::Type{T}, x::Float16) where {T<:Integer} = floor(T, Float32(x))
 ceil(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundUp))
-ceil(::Type{T}, x::Float16) where {T<:Integer} = ceil(T, Float32(x))
 round(::Type{T}, x::AbstractFloat) where {T<:Integer} = trunc(T,round(x, RoundNearest))
-round(::Type{T}, x::Float16) where {T<:Integer} = round(T, Float32(x))
-
-round(x::Float64, r::RoundingMode{:ToZero})  = trunc_llvm(x)
-round(x::Float32, r::RoundingMode{:ToZero})  = trunc_llvm(x)
-round(x::Float64, r::RoundingMode{:Down})    = floor_llvm(x)
-round(x::Float32, r::RoundingMode{:Down})    = floor_llvm(x)
-round(x::Float64, r::RoundingMode{:Up})      = ceil_llvm(x)
-round(x::Float32, r::RoundingMode{:Up})      = ceil_llvm(x)
-round(x::Float64, r::RoundingMode{:Nearest}) = rint_llvm(x)
-round(x::Float32, r::RoundingMode{:Nearest}) = rint_llvm(x)
-
-round(x::Float16, r::RoundingMode{:ToZero}) = Float16(round(Float32(x), r))
-round(x::Float16, r::RoundingMode{:Down}) = Float16(round(Float32(x), r))
-round(x::Float16, r::RoundingMode{:Up}) = Float16(round(Float32(x), r))
-round(x::Float16, r::RoundingMode{:Nearest}) = Float16(round(Float32(x), r))
+
+# Bool
+trunc(::Type{Bool}, x::AbstractFloat) = (-1 < x < 2) ? 1 <= x : throw(InexactError(:trunc, Bool, x))
+floor(::Type{Bool}, x::AbstractFloat) = (0 <= x < 2) ? 1 <= x : throw(InexactError(:floor, Bool, x))
+ceil(::Type{Bool}, x::AbstractFloat)  = (-1 < x <= 1) ? 0 < x : throw(InexactError(:ceil, Bool, x))
+round(::Type{Bool}, x::AbstractFloat) = (-0.5 <= x < 1.5) ? 0.5 < x : throw(InexactError(:round, Bool, x))
+
+round(x::IEEEFloat, r::RoundingMode{:ToZero})  = trunc_llvm(x)
+round(x::IEEEFloat, r::RoundingMode{:Down})    = floor_llvm(x)
+round(x::IEEEFloat, r::RoundingMode{:Up})      = ceil_llvm(x)
+round(x::IEEEFloat, r::RoundingMode{:Nearest}) = rint_llvm(x)
 
 ## floating point promotions ##
 promote_rule(::Type{Float32}, ::Type{Float16}) = Float32
@@ -390,40 +378,20 @@ widen(::Type{Float16}) = Float32
 widen(::Type{Float32}) = Float64
 
 ## floating point arithmetic ##
--(x::Float64) = neg_float(x)
--(x::Float32) = neg_float(x)
--(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) ⊻ 0x8000)
+-(x::IEEEFloat) = neg_float(x)
 
-for op in (:+, :-, :*, :/, :\, :^)
-    @eval ($op)(a::Float16, b::Float16) = Float16(($op)(Float32(a), Float32(b)))
-end
-+(x::Float32, y::Float32) = add_float(x, y)
-+(x::Float64, y::Float64) = add_float(x, y)
--(x::Float32, y::Float32) = sub_float(x, y)
--(x::Float64, y::Float64) = sub_float(x, y)
-*(x::Float32, y::Float32) = mul_float(x, y)
-*(x::Float64, y::Float64) = mul_float(x, y)
-/(x::Float32, y::Float32) = div_float(x, y)
-/(x::Float64, y::Float64) = div_float(x, y)
-
-muladd(x::Float32, y::Float32, z::Float32) = muladd_float(x, y, z)
-muladd(x::Float64, y::Float64, z::Float64) = muladd_float(x, y, z)
-function muladd(a::Float16, b::Float16, c::Float16)
-    Float16(muladd(Float32(a), Float32(b), Float32(c)))
-end
++(x::T, y::T) where {T<:IEEEFloat} = add_float(x, y)
+-(x::T, y::T) where {T<:IEEEFloat} = sub_float(x, y)
+*(x::T, y::T) where {T<:IEEEFloat} = mul_float(x, y)
+/(x::T, y::T) where {T<:IEEEFloat} = div_float(x, y)
+
+muladd(x::T, y::T, z::T) where {T<:IEEEFloat} = muladd_float(x, y, z)
 
 # TODO: faster floating point div?
 # TODO: faster floating point fld?
 # TODO: faster floating point mod?
 
-for func in (:div,:fld,:cld,:rem,:mod)
-    @eval begin
-        $func(a::Float16,b::Float16) = Float16($func(Float32(a),Float32(b)))
-    end
-end
-
-rem(x::Float32, y::Float32) = rem_float(x, y)
-rem(x::Float64, y::Float64) = rem_float(x, y)
+rem(x::T, y::T) where {T<:IEEEFloat} = rem_float(x, y)
 
 cld(x::T, y::T) where {T<:AbstractFloat} = -fld(-x,y)
 
@@ -439,32 +407,24 @@ function mod(x::T, y::T) where T<:AbstractFloat
 end
 
 ## floating point comparisons ##
-function ==(x::Float16, y::Float16)
-    ix = reinterpret(UInt16,x)
-    iy = reinterpret(UInt16,y)
-    if (ix|iy)&0x7fff > 0x7c00 #isnan(x) || isnan(y)
-        return false
-    end
-    if (ix|iy)&0x7fff == 0x0000
-        return true
-    end
-    return ix == iy
+==(x::T, y::T) where {T<:IEEEFloat} = eq_float(x, y)
+!=(x::T, y::T) where {T<:IEEEFloat} = ne_float(x, y)
+<( x::T, y::T) where {T<:IEEEFloat} = lt_float(x, y)
+<=(x::T, y::T) where {T<:IEEEFloat} = le_float(x, y)
+
+isequal(x::T, y::T) where {T<:IEEEFloat} = fpiseq(x, y)
+
+# interpret as sign-magnitude integer
+@inline function _fpint(x)
+    IntT = inttype(typeof(x))
+    ix = reinterpret(IntT, x)
+    return ifelse(ix < zero(IntT), ix ⊻ typemax(IntT), ix)
 end
-==(x::Float32, y::Float32) = eq_float(x, y)
-==(x::Float64, y::Float64) = eq_float(x, y)
-!=(x::Float32, y::Float32) = ne_float(x, y)
-!=(x::Float64, y::Float64) = ne_float(x, y)
-<( x::Float32, y::Float32) = lt_float(x, y)
-<( x::Float64, y::Float64) = lt_float(x, y)
-<=(x::Float32, y::Float32) = le_float(x, y)
-<=(x::Float64, y::Float64) = le_float(x, y)
-
-isequal(x::Float32, y::Float32) = fpiseq(x, y)
-isequal(x::Float64, y::Float64) = fpiseq(x, y)
-isless( x::Float32, y::Float32) = fpislt(x, y)
-isless( x::Float64, y::Float64) = fpislt(x, y)
-for op in (:<, :<=, :isless)
-    @eval ($op)(a::Float16, b::Float16) = ($op)(Float32(a), Float32(b))
+
+@inline function isless(a::T, b::T) where T<:IEEEFloat
+    (isnan(a) || isnan(b)) && return !isnan(a)
+
+    return _fpint(a) < _fpint(b)
 end
 
 # Exact Float (Tf) vs Integer (Ti) comparisons
@@ -523,22 +483,20 @@ for op in (:(==), :<, :<=)
 end
 
 
-abs(x::Float16) = reinterpret(Float16, reinterpret(UInt16, x) & 0x7fff)
-abs(x::Float32) = abs_float(x)
-abs(x::Float64) = abs_float(x)
+abs(x::IEEEFloat) = abs_float(x)
 
 """
     isnan(f) -> Bool
 
 Test whether a number value is a NaN, an indeterminate value which is neither an infinity
 nor a finite number ("not a number").
+
+See also: [`iszero`](@ref), [`isone`](@ref), [`isinf`](@ref), [`ismissing`](@ref).
 """
 isnan(x::AbstractFloat) = (x != x)::Bool
-isnan(x::Float16) = reinterpret(UInt16,x)&0x7fff > 0x7c00
-isnan(x::Real) = false
+isnan(x::Number) = false
 
 isfinite(x::AbstractFloat) = x - x == 0
-isfinite(x::Float16) = reinterpret(UInt16,x)&0x7c00 != 0x7c00
 isfinite(x::Real) = decompose(x)[3] != 0
 isfinite(x::Integer) = true
 
@@ -546,33 +504,165 @@ isfinite(x::Integer) = true
     isinf(f) -> Bool
 
 Test whether a number is infinite.
+
+See also: [`Inf`](@ref), [`iszero`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
 """
 isinf(x::Real) = !isnan(x) & !isfinite(x)
 
-## hashing small, built-in numeric types ##
+const hx_NaN = hash_uint64(reinterpret(UInt64, NaN))
+let Tf = Float64, Tu = UInt64, Ti = Int64
+    @eval function hash(x::$Tf, h::UInt)
+        # see comments on trunc and hash(Real, UInt)
+        if $(Tf(typemin(Ti))) <= x < $(Tf(typemax(Ti)))
+            xi = fptosi($Ti, x)
+            if isequal(xi, x)
+                return hash(xi, h)
+            end
+        elseif $(Tf(typemin(Tu))) <= x < $(Tf(typemax(Tu)))
+            xu = fptoui($Tu, x)
+            if isequal(xu, x)
+                return hash(xu, h)
+            end
+        elseif isnan(x)
+            return hx_NaN ⊻ h # NaN does not have a stable bit pattern
+        end
+        return hash_uint64(bitcast(UInt64, x)) - 3h
+    end
+end
+
+hash(x::Float32, h::UInt) = hash(Float64(x), h)
+hash(x::Float16, h::UInt) = hash(Float64(x), h)
 
-hx(a::UInt64, b::Float64, h::UInt) = hash_uint64((3a + reinterpret(UInt64,b)) - h)
-const hx_NaN = hx(UInt64(0), NaN, UInt(0  ))
+## generic hashing for rational values ##
 
-hash(x::UInt64,  h::UInt) = hx(x, Float64(x), h)
-hash(x::Int64,   h::UInt) = hx(reinterpret(UInt64, abs(x)), Float64(x), h)
-hash(x::Float64, h::UInt) = isnan(x) ? (hx_NaN ⊻ h) : hx(fptoui(UInt64, abs(x)), x, h)
+function hash(x::Real, h::UInt)
+    # decompose x as num*2^pow/den
+    num, pow, den = decompose(x)
+
+    # handle special values
+    num == 0 && den == 0 && return hash(NaN, h)
+    num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h)
+    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
+
+    # normalize decomposition
+    if den < 0
+        num = -num
+        den = -den
+    end
+    z = trailing_zeros(num)
+    if z != 0
+        num >>= z
+        pow += z
+    end
+    z = trailing_zeros(den)
+    if z != 0
+        den >>= z
+        pow -= z
+    end
+
+    # handle values representable as Int64, UInt64, Float64
+    if den == 1
+        left = ndigits0z(num,2) + pow
+        right = trailing_zeros(num) + pow
+        if -1074 <= right
+            if 0 <= right && left <= 64
+                left <= 63                     && return hash(Int64(num) << Int(pow), h)
+                signbit(num) == signbit(den)   && return hash(UInt64(num) << Int(pow), h)
+            end # typemin(Int64) handled by Float64 case
+            left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num),pow), h)
+        end
+    end
+
+    # handle generic rational values
+    h = hash_integer(den, h)
+    h = hash_integer(pow, h)
+    h = hash_integer(num, h)
+    return h
+end
+
+#=
+`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
+
+The decompose function is the point where rational-valued numeric types that support
+hashing hook into the hashing protocol. `decompose(x)` should return three integer
+values `num, pow, den`, such that the value of `x` is mathematically equal to
+
+    num*2^pow/den
+
+The decomposition need not be canonical in the sense that it just needs to be *some*
+way to express `x` in this form, not any particular way – with the restriction that
+`num` and `den` may not share any odd common factors. They may, however, have powers
+of two in common – the generic hashing code will normalize those as necessary.
+
+Special values:
+
+ - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
+ - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
+ - `x` is not a number: `num` and `den` should both be zero
+=#
+
+decompose(x::Integer) = x, 0, 1
+
+function decompose(x::Float16)::NTuple{3,Int}
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(UInt16, x)
+    s = (n & 0x03ff) % Int16
+    e = ((n & 0x7c00) >> 10) % Int
+    s |= Int16(e != 0) << 10
+    d = ifelse(signbit(x), -1, 1)
+    s, e - 25 + (e == 0), d
+end
+
+function decompose(x::Float32)::NTuple{3,Int}
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(UInt32, x)
+    s = (n & 0x007fffff) % Int32
+    e = ((n & 0x7f800000) >> 23) % Int
+    s |= Int32(e != 0) << 23
+    d = ifelse(signbit(x), -1, 1)
+    s, e - 150 + (e == 0), d
+end
+
+function decompose(x::Float64)::Tuple{Int64, Int, Int}
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
+    n = reinterpret(UInt64, x)
+    s = (n & 0x000fffffffffffff) % Int64
+    e = ((n & 0x7ff0000000000000) >> 52) % Int
+    s |= Int64(e != 0) << 52
+    d = ifelse(signbit(x), -1, 1)
+    s, e - 1075 + (e == 0), d
+end
 
-hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)
-hash(x::Float32, h::UInt) = hash(Float64(x), h)
 
 """
-    precision(num::AbstractFloat)
+    precision(num::AbstractFloat; base::Integer=2)
+    precision(T::Type; base::Integer=2)
 
 Get the precision of a floating point number, as defined by the effective number of bits in
-the significand.
+the significand, or the precision of a floating-point type `T` (its current default, if
+`T` is a variable-precision type like [`BigFloat`](@ref)).
+
+If `base` is specified, then it returns the maximum corresponding
+number of significand digits in that base.
+
+!!! compat "Julia 1.8"
+    The `base` keyword requires at least Julia 1.8.
 """
 function precision end
 
-precision(::Type{Float16}) = 11
-precision(::Type{Float32}) = 24
-precision(::Type{Float64}) = 53
-precision(::T) where {T<:AbstractFloat} = precision(T)
+_precision(::Type{Float16}) = 11
+_precision(::Type{Float32}) = 24
+_precision(::Type{Float64}) = 53
+function _precision(x, base::Integer=2)
+    base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
+    p = _precision(x)
+    return base == 2 ? Int(p) : floor(Int, p / log2(base))
+end
+precision(::Type{T}; base::Integer=2) where {T<:AbstractFloat} = _precision(T, base)
+precision(::T; base::Integer=2) where {T<:AbstractFloat} = precision(T; base)
 
 """
     uabs(x::Integer)
@@ -590,7 +680,7 @@ uabs(x::BitSigned) = unsigned(abs(x))
     nextfloat(x::AbstractFloat, n::Integer)
 
 The result of `n` iterative applications of `nextfloat` to `x` if `n >= 0`, or `-n`
-applications of `prevfloat` if `n < 0`.
+applications of [`prevfloat`](@ref) if `n < 0`.
 """
 function nextfloat(f::IEEEFloat, d::Integer)
     F = typeof(f)
@@ -635,6 +725,8 @@ end
 
 Return the smallest floating point number `y` of the same type as `x` such `x < y`. If no
 such `y` exists (e.g. if `x` is `Inf` or `NaN`), then return `x`.
+
+See also: [`prevfloat`](@ref), [`eps`](@ref), [`issubnormal`](@ref).
 """
 nextfloat(x::AbstractFloat) = nextfloat(x,1)
 
@@ -642,7 +734,7 @@ nextfloat(x::AbstractFloat) = nextfloat(x,1)
     prevfloat(x::AbstractFloat, n::Integer)
 
 The result of `n` iterative applications of `prevfloat` to `x` if `n >= 0`, or `-n`
-applications of `nextfloat` if `n < 0`.
+applications of [`nextfloat`](@ref) if `n < 0`.
 """
 prevfloat(x::AbstractFloat, d::Integer) = nextfloat(x, -d)
 
@@ -655,7 +747,7 @@ such `y` exists (e.g. if `x` is `-Inf` or `NaN`), then return `x`.
 prevfloat(x::AbstractFloat) = nextfloat(x,-1)
 
 for Ti in (Int8, Int16, Int32, Int64, Int128, UInt8, UInt16, UInt32, UInt64, UInt128)
-    for Tf in (Float32, Float64)
+    for Tf in (Float16, Float32, Float64)
         if Ti <: Unsigned || sizeof(Ti) < sizeof(Tf)
             # Here `Tf(typemin(Ti))-1` is exact, so we can compare the lower-bound
             # directly. `Tf(typemax(Ti))+1` is either always exactly representable, or
@@ -711,6 +803,10 @@ function issubnormal(x::T) where {T<:IEEEFloat}
     (y & exponent_mask(T) == 0) & (y & significand_mask(T) != 0)
 end
 
+ispow2(x::AbstractFloat) = !iszero(x) && frexp(x)[1] == 0.5
+iseven(x::AbstractFloat) = isinteger(x) && (abs(x) > maxintfloat(x) || iseven(Integer(x)))
+isodd(x::AbstractFloat) = isinteger(x) && abs(x) ≤ maxintfloat(x) && isodd(Integer(x))
+
 @eval begin
     typemin(::Type{Float16}) = $(bitcast(Float16, 0xfc00))
     typemax(::Type{Float16}) = $(Inf16)
@@ -760,6 +856,8 @@ floatmin(x::T) where {T<:AbstractFloat} = floatmin(T)
 
 Return the largest finite number representable by the floating-point type `T`.
 
+See also: [`typemax`](@ref), [`floatmin`](@ref), [`eps`](@ref).
+
 # Examples
 ```jldoctest
 julia> floatmax(Float16)
@@ -770,6 +868,9 @@ julia> floatmax(Float32)
 
 julia> floatmax()
 1.7976931348623157e308
+
+julia> typemax(Float64)
+Inf
 ```
 """
 floatmax(x::T) where {T<:AbstractFloat} = floatmax(T)
@@ -824,6 +925,8 @@ is the nearest floating point number to ``y``, then
 |y-x| \\leq \\operatorname{eps}(x)/2.
 ```
 
+See also: [`nextfloat`](@ref), [`issubnormal`](@ref), [`floatmax`](@ref).
+
 # Examples
 ```jldoctest
 julia> eps(1.0)
@@ -851,46 +954,21 @@ eps(::AbstractFloat)
 ## byte order swaps for arbitrary-endianness serialization/deserialization ##
 bswap(x::IEEEFloat) = bswap_int(x)
 
-# bit patterns
-reinterpret(::Type{Unsigned}, x::Float64) = reinterpret(UInt64, x)
-reinterpret(::Type{Unsigned}, x::Float32) = reinterpret(UInt32, x)
-reinterpret(::Type{Signed}, x::Float64) = reinterpret(Int64, x)
-reinterpret(::Type{Signed}, x::Float32) = reinterpret(Int32, x)
-
-sign_mask(::Type{Float64}) =        0x8000_0000_0000_0000
-exponent_mask(::Type{Float64}) =    0x7ff0_0000_0000_0000
-exponent_one(::Type{Float64}) =     0x3ff0_0000_0000_0000
-exponent_half(::Type{Float64}) =    0x3fe0_0000_0000_0000
-significand_mask(::Type{Float64}) = 0x000f_ffff_ffff_ffff
-
-sign_mask(::Type{Float32}) =        0x8000_0000
-exponent_mask(::Type{Float32}) =    0x7f80_0000
-exponent_one(::Type{Float32}) =     0x3f80_0000
-exponent_half(::Type{Float32}) =    0x3f00_0000
-significand_mask(::Type{Float32}) = 0x007f_ffff
-
-sign_mask(::Type{Float16}) =        0x8000
-exponent_mask(::Type{Float16}) =    0x7c00
-exponent_one(::Type{Float16}) =     0x3c00
-exponent_half(::Type{Float16}) =    0x3800
-significand_mask(::Type{Float16}) = 0x03ff
-
-for T in (Float16, Float32, Float64)
-    @eval significand_bits(::Type{$T}) = $(trailing_ones(significand_mask(T)))
-    @eval exponent_bits(::Type{$T}) = $(sizeof(T)*8 - significand_bits(T) - 1)
-    @eval exponent_bias(::Type{$T}) = $(Int(exponent_one(T) >> significand_bits(T)))
-    # maximum float exponent
-    @eval exponent_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)) - exponent_bias(T))
-    # maximum float exponent without bias
-    @eval exponent_raw_max(::Type{$T}) = $(Int(exponent_mask(T) >> significand_bits(T)))
-end
-
 # integer size of float
 uinttype(::Type{Float64}) = UInt64
 uinttype(::Type{Float32}) = UInt32
 uinttype(::Type{Float16}) = UInt16
+inttype(::Type{Float64}) = Int64
+inttype(::Type{Float32}) = Int32
+inttype(::Type{Float16}) = Int16
+# float size of integer
+floattype(::Type{UInt64}) = Float64
+floattype(::Type{UInt32}) = Float32
+floattype(::Type{UInt16}) = Float16
+floattype(::Type{Int64}) = Float64
+floattype(::Type{Int32}) = Float32
+floattype(::Type{Int16}) = Float16
 
-Base.iszero(x::Float16) = reinterpret(UInt16, x) & ~sign_mask(Float16) == 0x0000
 
 ## Array operations on floating point numbers ##
 
diff --git a/base/floatfuncs.jl b/base/floatfuncs.jl
index 97f72134c28f45..4276ec0daecaf1 100644
--- a/base/floatfuncs.jl
+++ b/base/floatfuncs.jl
@@ -97,9 +97,8 @@ julia> round(357.913; sigdigits=4, base=2)
     Rounding to specified digits in bases other than 2 can be inexact when
     operating on binary floating point numbers. For example, the [`Float64`](@ref)
     value represented by `1.15` is actually *less* than 1.15, yet will be
-    rounded to 1.2.
+    rounded to 1.2. For example:
 
-    # Examples
     ```jldoctest; setup = :(using Printf)
     julia> x = 1.15
     1.15
@@ -166,6 +165,16 @@ function _round_invstep(x, invstep, r::RoundingMode)
     return y
 end
 
+# round x to multiples of 1/(invstepsqrt^2)
+# Using square root of step prevents overflowing
+function _round_invstepsqrt(x, invstepsqrt, r::RoundingMode)
+    y = round((x * invstepsqrt) * invstepsqrt, r) / invstepsqrt / invstepsqrt
+    if !isfinite(y)
+        return x
+    end
+    return y
+end
+
 # round x to multiples of step
 function _round_step(x, step, r::RoundingMode)
     # TODO: use div with rounding mode
@@ -186,10 +195,15 @@ function _round_digits(x, r::RoundingMode, digits::Integer, base)
     fx = float(x)
     if digits >= 0
         invstep = oftype(fx, base)^digits
-        _round_invstep(fx, invstep, r)
+        if isfinite(invstep)
+            return _round_invstep(fx, invstep, r)
+        else
+            invstepsqrt = oftype(fx, base)^oftype(fx, digits/2)
+            return _round_invstepsqrt(fx, invstepsqrt, r)
+        end
     else
         step = oftype(fx, base)^-digits
-        _round_step(fx, step, r)
+        return _round_step(fx, step, r)
     end
 end
 
@@ -221,17 +235,23 @@ function round(x::T, ::RoundingMode{:NearestTiesUp}) where {T <: AbstractFloat}
     copysign(floor((x + (T(0.25) - eps(T(0.5)))) + (T(0.25) + eps(T(0.5)))), x)
 end
 
+function Base.round(x::AbstractFloat, ::typeof(RoundFromZero))
+    signbit(x) ? round(x, RoundDown) : round(x, RoundUp)
+end
+
 # isapprox: approximate equality of numbers
 """
     isapprox(x, y; atol::Real=0, rtol::Real=atol>0 ? 0 : √eps, nans::Bool=false[, norm::Function])
 
-Inexact equality comparison: `true` if `norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))`. The
-default `atol` is zero and the default `rtol` depends on the types of `x` and `y`. The keyword
-argument `nans` determines whether or not NaN values are considered equal (defaults to false).
+Inexact equality comparison. Two numbers compare equal if their relative distance *or* their
+absolute distance is within tolerance bounds: `isapprox` returns `true` if
+`norm(x-y) <= max(atol, rtol*max(norm(x), norm(y)))`. The default `atol` is zero and the
+default `rtol` depends on the types of `x` and `y`. The keyword argument `nans` determines
+whether or not NaN values are considered equal (defaults to false).
 
 For real or complex floating-point values, if an `atol > 0` is not specified, `rtol` defaults to
 the square root of [`eps`](@ref) of the type of `x` or `y`, whichever is bigger (least precise).
-This corresponds to requiring equality of about half of the significand digits. Otherwise,
+This corresponds to requiring equality of about half of the significant digits. Otherwise,
 e.g. for integer arguments or if an `atol > 0` is supplied, `rtol` defaults to zero.
 
 The `norm` keyword defaults to `abs` for numeric `(x,y)` and to `LinearAlgebra.norm` for
@@ -253,16 +273,22 @@ for example, in `x - y ≈ 0`, `atol=1e-9` is an absurdly small tolerance if `x`
 but an absurdly large tolerance if `x` is the
 [radius of a Hydrogen atom](https://en.wikipedia.org/wiki/Bohr_radius) in meters.
 
+!!! compat "Julia 1.6"
+    Passing the `norm` keyword argument when comparing numeric (non-array) arguments
+    requires Julia 1.6 or later.
 
 # Examples
 ```jldoctest
-julia> 0.1 ≈ (0.1 - 1e-10)
+julia> isapprox(0.1, 0.15; atol=0.05)
 true
 
-julia> isapprox(10, 11; atol = 2)
+julia> isapprox(0.1, 0.15; rtol=0.34)
 true
 
-julia> isapprox([10.0^9, 1.0], [10.0^9, 2.0])
+julia> isapprox(0.1, 0.15; rtol=0.33)
+false
+
+julia> 0.1 + 1e-10 ≈ 0.1
 true
 
 julia> 1e-10 ≈ 0
@@ -270,6 +296,9 @@ false
 
 julia> isapprox(1e-10, 0, atol=1e-8)
 true
+
+julia> isapprox([10.0^9, 1.0], [10.0^9, 2.0]) # using `norm`
+true
 ```
 """
 function isapprox(x::Number, y::Number;
@@ -284,6 +313,9 @@ end
 Create a function that compares its argument to `x` using `≈`, i.e. a function equivalent to `y -> y ≈ x`.
 
 The keyword arguments supported here are the same as those in the 2-argument `isapprox`.
+
+!!! compat "Julia 1.5"
+    This method requires Julia 1.5 or later.
 """
 isapprox(y; kwargs...) = x -> isapprox(x, y; kwargs...)
 
@@ -313,30 +345,88 @@ significantly more expensive than `x*y+z`. `fma` is used to improve accuracy in
 algorithms. See [`muladd`](@ref).
 """
 function fma end
+function fma_emulated(a::Float32, b::Float32, c::Float32)::Float32
+    ab = Float64(a) * b
+    res = ab+c
+    reinterpret(UInt64, res)&0x1fff_ffff!=0x1000_0000 && return res
+    # yes error compensation is necessary. It sucks
+    reslo = abs(c)>abs(ab) ? ab-(res - c) : c-(res - ab)
+    res = iszero(reslo) ? res : (signbit(reslo) ? prevfloat(res) : nextfloat(res))
+    return res
+end
+
+""" Splits a Float64 into a hi bit and a low bit where the high bit has 27 trailing 0s and the low bit has 26 trailing 0s"""
+@inline function splitbits(x::Float64)
+    hi = reinterpret(Float64, reinterpret(UInt64, x) & 0xffff_ffff_f800_0000)
+    return hi, x-hi
+end
 
-fma_libm(x::Float32, y::Float32, z::Float32) =
-    ccall(("fmaf", libm_name), Float32, (Float32,Float32,Float32), x, y, z)
-fma_libm(x::Float64, y::Float64, z::Float64) =
-    ccall(("fma", libm_name), Float64, (Float64,Float64,Float64), x, y, z)
+function twomul(a::Float64, b::Float64)
+    ahi, alo = splitbits(a)
+    bhi, blo = splitbits(b)
+    abhi = a*b
+    blohi, blolo = splitbits(blo)
+    ablo = alo*blohi - (((abhi - ahi*bhi) - alo*bhi) - ahi*blo) + blolo*alo
+    return abhi, ablo
+end
+
+function fma_emulated(a::Float64, b::Float64,c::Float64)
+    abhi, ablo = @inline twomul(a,b)
+    if !isfinite(abhi+c) || isless(abs(abhi), nextfloat(0x1p-969)) || issubnormal(a) || issubnormal(b)
+        aandbfinite = isfinite(a) && isfinite(b)
+        if !(isfinite(c) && aandbfinite)
+            return aandbfinite ? c : abhi+c
+        end
+        (iszero(a) || iszero(b)) && return abhi+c
+        # The checks above satisfy exponent's nothrow precondition
+        bias = Math._exponent_finite_nonzero(a) + Math._exponent_finite_nonzero(b)
+        c_denorm = ldexp(c, -bias)
+        if isfinite(c_denorm)
+            # rescale a and b to [1,2), equivalent to ldexp(a, -exponent(a))
+            issubnormal(a) && (a *= 0x1p52)
+            issubnormal(b) && (b *= 0x1p52)
+            a = reinterpret(Float64, (reinterpret(UInt64, a) & ~Base.exponent_mask(Float64)) | Base.exponent_one(Float64))
+            b = reinterpret(Float64, (reinterpret(UInt64, b) & ~Base.exponent_mask(Float64)) | Base.exponent_one(Float64))
+            c = c_denorm
+            abhi, ablo = twomul(a,b)
+            # abhi <= 4 -> isfinite(r)      (α)
+            r = abhi+c
+            # s ≈ 0                         (β)
+            s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo)
+            # α ⩓ β -> isfinite(sumhi)      (γ)
+            sumhi = r+s
+            # If result is subnormal, ldexp will cause double rounding because subnormals have fewer mantisa bits.
+            # As such, we need to check whether round to even would lead to double rounding and manually round sumhi to avoid it.
+            if issubnormal(ldexp(sumhi, bias))
+                sumlo = r-sumhi+s
+                # finite: See γ
+                # non-zero: If sumhi == ±0., then ldexp(sumhi, bias) == ±0,
+                # so we don't take this branch.
+                bits_lost = -bias-Math._exponent_finite_nonzero(sumhi)-1022
+                sumhiInt = reinterpret(UInt64, sumhi)
+                if (bits_lost != 1) ⊻ (sumhiInt&1 == 1)
+                    sumhi = nextfloat(sumhi, cmp(sumlo,0))
+                end
+            end
+            return ldexp(sumhi, bias)
+        end
+        isinf(abhi) && signbit(c) == signbit(a*b) && return abhi
+        # fall through
+    end
+    r = abhi+c
+    s = (abs(abhi) > abs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo)
+    return r+s
+end
 fma_llvm(x::Float32, y::Float32, z::Float32) = fma_float(x, y, z)
 fma_llvm(x::Float64, y::Float64, z::Float64) = fma_float(x, y, z)
+
 # Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
-# onto a broken system libm; if so, use openlibm's fma instead
-# 1.0000305f0 = 1 + 1/2^15
-# 1.0000000009313226 = 1 + 1/2^30
-# If fma_llvm() clobbers the rounding mode, the result of 0.1 + 0.2 will be 0.3
-# instead of the properly-rounded 0.30000000000000004; check after calling fma
-if (Sys.ARCH !== :i686 && fma_llvm(1.0000305f0, 1.0000305f0, -1.0f0) == 6.103609f-5 &&
-    (fma_llvm(1.0000000009313226, 1.0000000009313226, -1.0) ==
-     1.8626451500983188e-9) && 0.1 + 0.2 == 0.30000000000000004)
-    fma(x::Float32, y::Float32, z::Float32) = fma_llvm(x,y,z)
-    fma(x::Float64, y::Float64, z::Float64) = fma_llvm(x,y,z)
-else
-    fma(x::Float32, y::Float32, z::Float32) = fma_libm(x,y,z)
-    fma(x::Float64, y::Float64, z::Float64) = fma_libm(x,y,z)
-end
+# onto a broken system libm; if so, use a software emulated fma
+@assume_effects :consistent fma(x::Float32, y::Float32, z::Float32) = Core.Intrinsics.have_fma(Float32) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
+@assume_effects :consistent fma(x::Float64, y::Float64, z::Float64) = Core.Intrinsics.have_fma(Float64) ? fma_llvm(x,y,z) : fma_emulated(x,y,z)
+
 function fma(a::Float16, b::Float16, c::Float16)
-    Float16(fma(Float32(a), Float32(b), Float32(c)))
+    Float16(muladd(Float32(a), Float32(b), Float32(c))) #don't use fma if the hardware doesn't have it.
 end
 
 # This is necessary at least on 32-bit Intel Linux, since fma_llvm may
diff --git a/base/gcutils.jl b/base/gcutils.jl
index 51e3943877444c..d17301a1be9b07 100644
--- a/base/gcutils.jl
+++ b/base/gcutils.jl
@@ -50,8 +50,7 @@ function finalizer(@nospecialize(f), @nospecialize(o))
     return o
 end
 
-function finalizer(f::Ptr{Cvoid}, o::T) where T
-    @_inline_meta
+function finalizer(f::Ptr{Cvoid}, o::T) where T @inline
     if !ismutable(o)
         error("objects of type ", typeof(o), " cannot be finalized")
     end
@@ -65,8 +64,8 @@ end
 
 Immediately run finalizers registered for object `x`.
 """
-finalize(@nospecialize(o)) = ccall(:jl_finalize_th, Cvoid, (Ptr{Cvoid}, Any,),
-                                   Core.getptls(), o)
+finalize(@nospecialize(o)) = ccall(:jl_finalize_th, Cvoid, (Any, Any,),
+                                   current_task(), o)
 
 """
     Base.GC
@@ -106,6 +105,27 @@ Control whether garbage collection is enabled using a boolean argument (`true` f
 """
 enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0
 
+"""
+    GC.enable_finalizers(on::Bool)
+
+Increment or decrement the counter that controls the running of finalizers on
+the current Task. Finalizers will only run when the counter is at zero. (Set
+`true` for enabling, `false` for disabling). They may still run concurrently on
+another Task or thread.
+"""
+enable_finalizers(on::Bool) = on ? enable_finalizers() : disable_finalizers()
+
+function enable_finalizers() @inline
+    ccall(:jl_gc_enable_finalizers_internal, Cvoid, ())
+    if Core.Intrinsics.atomic_pointerref(cglobal(:jl_gc_have_pending_finalizers, Cint), :monotonic) != 0
+        ccall(:jl_gc_run_pending_finalizers, Cvoid, (Ptr{Cvoid},), C_NULL)
+    end
+end
+
+function disable_finalizers() @inline
+    ccall(:jl_gc_disable_finalizers_internal, Cvoid, ())
+end
+
 """
     GC.@preserve x1 x2 ... xn expr
 
@@ -177,4 +197,13 @@ collection to run.
 """
 safepoint() = ccall(:jl_gc_safepoint, Cvoid, ())
 
+"""
+    GC.enable_logging(on::Bool)
+
+When turned on, print statistics about each GC to stderr.
+"""
+function enable_logging(on::Bool=true)
+    ccall(:jl_enable_gc_logging, Cvoid, (Cint,), on)
+end
+
 end # module GC
diff --git a/base/generator.jl b/base/generator.jl
index 203d1dbba11990..d11742fe5b72f0 100644
--- a/base/generator.jl
+++ b/base/generator.jl
@@ -40,7 +40,7 @@ Generator(::Type{T}, iter::I) where {T,I} = Generator{I,Type{T}}(T, iter)
 Generator(::Type{T}, I1, I2, Is...) where {T} = Generator(a->T(a...), zip(I1, I2, Is...))
 
 function iterate(g::Generator, s...)
-    @_inline_meta
+    @inline
     y = iterate(g.iter, s...)
     y === nothing && return nothing
     y = y::Tuple{Any, Any} # try to give inference some idea of what to expect about the behavior of the next line
@@ -51,7 +51,10 @@ length(g::Generator) = length(g.iter)
 size(g::Generator) = size(g.iter)
 axes(g::Generator) = axes(g.iter)
 ndims(g::Generator) = ndims(g.iter)
-
+keys(g::Generator) = keys(g.iter)
+last(g::Generator) = g.f(last(g.iter))
+isempty(g::Generator) = isempty(g.iter)
+isdone(g::Generator, state...) = isdone(g.iter, state...)
 
 ## iterator traits
 
diff --git a/base/gmp.jl b/base/gmp.jl
index be2d8128edde52..5d3cabac87e40f 100644
--- a/base/gmp.jl
+++ b/base/gmp.jl
@@ -4,13 +4,13 @@ module GMP
 
 export BigInt
 
-import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor,
+import .Base: *, +, -, /, <, <<, >>, >>>, <=, ==, >, >=, ^, (~), (&), (|), xor, nand, nor,
              binomial, cmp, convert, div, divrem, factorial, cld, fld, gcd, gcdx, lcm, mod,
-             ndigits, promote_rule, rem, show, isqrt, string, powermod,
-             sum, trailing_zeros, trailing_ones, count_ones, tryparse_internal,
+             ndigits, promote_rule, rem, show, isqrt, string, powermod, sum, prod,
+             trailing_zeros, trailing_ones, count_ones, count_zeros, tryparse_internal,
              bin, oct, dec, hex, isequal, invmod, _prevpow2, _nextpow2, ndigits0zpb,
              widen, signed, unsafe_trunc, trunc, iszero, isone, big, flipsign, signbit,
-             sign, hastypemax, isodd, digits!
+             sign, hastypemax, isodd, iseven, digits!, hash, hash_integer
 
 if Clong == Int32
     const ClongMax = Union{Int8, Int16, Int32}
@@ -94,10 +94,10 @@ const ALLOC_OVERFLOW_FUNCTION = Ref(false)
 function __init__()
     try
         if version().major != VERSION.major || bits_per_limb() != BITS_PER_LIMB
-            msg = bits_per_limb() != BITS_PER_LIMB ? error : warn
-            msg("The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))\n",
-                "does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).\n",
-                "Please rebuild Julia.")
+            msg = """The dynamically loaded GMP library (v\"$(version())\" with __gmp_bits_per_limb == $(bits_per_limb()))
+                     does not correspond to the compile time version (v\"$VERSION\" with __gmp_bits_per_limb == $BITS_PER_LIMB).
+                     Please rebuild Julia."""
+            bits_per_limb() != BITS_PER_LIMB ? @error(msg) : @warn(msg)
         end
 
         ccall((:__gmp_set_memory_functions, :libgmp), Cvoid,
@@ -132,7 +132,7 @@ module MPZ
 # - a method modifying its input has a "!" appendend to its name, according to Julia's conventions
 # - some convenient methods are added (in addition to the pure MPZ ones), e.g. `add(a, b) = add!(BigInt(), a, b)`
 #   and `add!(x, a) = add!(x, x, a)`.
-using .Base.GMP: BigInt, Limb, BITS_PER_LIMB
+using ..GMP: BigInt, Limb, BITS_PER_LIMB
 
 const mpz_t = Ref{BigInt}
 const bitcnt_t = Culong
@@ -178,7 +178,9 @@ ui_sub!(x::BigInt, a, b::BigInt) = (ccall((:__gmpz_ui_sub, :libgmp), Cvoid, (mpz
 ui_sub(a, b::BigInt) = ui_sub!(BigInt(), a, b)
 
 for op in (:scan1, :scan0)
-    @eval $op(a::BigInt, b) = Int(ccall($(gmpz(op)), Culong, (mpz_t, Culong), a, b))
+    # when there is no meaningful answer, ccall returns typemax(Culong), where Culong can
+    # be UInt32 (Windows) or UInt64; we return -1 in this case for all architectures
+    @eval $op(a::BigInt, b) = Int(signed(ccall($(gmpz(op)), Culong, (mpz_t, Culong), a, b)))
 end
 
 mul_si!(x::BigInt, a::BigInt, b) = (ccall((:__gmpz_mul_si, :libgmp), Cvoid, (mpz_t, mpz_t, Clong), x, a, b); x)
@@ -203,7 +205,7 @@ for (op, T) in ((:fac_ui, Culong), (:set_ui, Culong), (:set_si, Clong), (:set_d,
     end
 end
 
-popcount(a::BigInt) = Int(ccall((:__gmpz_popcount, :libgmp), Culong, (mpz_t,), a))
+popcount(a::BigInt) = Int(signed(ccall((:__gmpz_popcount, :libgmp), Culong, (mpz_t,), a)))
 
 mpn_popcount(d::Ptr{Limb}, s::Integer) = Int(ccall((:__gmpn_popcount, :libgmp), Culong, (Ptr{Limb}, Csize_t), d, s))
 mpn_popcount(a::BigInt) = mpn_popcount(a.d, abs(a.size))
@@ -292,14 +294,14 @@ BigInt(x::Union{Clong,Int32}) = MPZ.set_si(x)
 BigInt(x::Union{Culong,UInt32}) = MPZ.set_ui(x)
 BigInt(x::Bool) = BigInt(UInt(x))
 
-unsafe_trunc(::Type{BigInt}, x::Union{Float32,Float64}) = MPZ.set_d(x)
+unsafe_trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64}) = MPZ.set_d(x)
 
-function BigInt(x::Union{Float32,Float64})
+function BigInt(x::Float64)
     isinteger(x) || throw(InexactError(:BigInt, BigInt, x))
     unsafe_trunc(BigInt,x)
 end
 
-function trunc(::Type{BigInt}, x::Union{Float32,Float64})
+function trunc(::Type{BigInt}, x::Union{Float16,Float32,Float64})
     isfinite(x) || throw(InexactError(:trunc, BigInt, x))
     unsafe_trunc(BigInt,x)
 end
@@ -308,21 +310,21 @@ BigInt(x::Float16) = BigInt(Float64(x))
 BigInt(x::Float32) = BigInt(Float64(x))
 
 function BigInt(x::Integer)
-    x == 0 && return BigInt(Culong(0))
+    # On 64-bit Windows, `Clong` is `Int32`, not `Int64`, so construction of
+    # `Int64` constants, e.g. `BigInt(3)`, uses this method.
+    isbits(x) && typemin(Clong) <= x <= typemax(Clong) && return BigInt((x % Clong)::Clong)
     nd = ndigits(x, base=2)
     z = MPZ.realloc2(nd)
-    s = sign(x)
-    s == -1 && (x = -x)
-    x = unsigned(x)
+    ux = unsigned(x < 0 ? -x : x)
     size = 0
     limbnbits = sizeof(Limb) << 3
     while nd > 0
         size += 1
-        unsafe_store!(z.d, x % Limb, size)
-        x >>>= limbnbits
+        unsafe_store!(z.d, ux % Limb, size)
+        ux >>= limbnbits
         nd -= limbnbits
     end
-    z.size = s*size
+    z.size = x < 0 ? -size : size
     z
 end
 
@@ -343,6 +345,7 @@ end
 rem(x::Integer, ::Type{BigInt}) = BigInt(x)
 
 isodd(x::BigInt) = MPZ.tstbit(x, 0)
+iseven(x::BigInt) = !isodd(x)
 
 function (::Type{T})(x::BigInt) where T<:Base.BitUnsigned
     if sizeof(T) < sizeof(Limb)
@@ -551,10 +554,30 @@ end
 >>(x::BigInt, c::UInt) = c == 0 ? x : MPZ.fdiv_q_2exp(x, c)
 >>>(x::BigInt, c::UInt) = x >> c
 
-trailing_zeros(x::BigInt) = MPZ.scan1(x, 0)
-trailing_ones(x::BigInt) = MPZ.scan0(x, 0)
+function trailing_zeros(x::BigInt)
+    c = MPZ.scan1(x, 0)
+    c == -1 && throw(DomainError(x, "`x` must be non-zero"))
+    c
+end
+
+function trailing_ones(x::BigInt)
+    c = MPZ.scan0(x, 0)
+    c == -1 && throw(DomainError(x, "`x` must not be equal to -1"))
+    c
+end
+
+function count_ones(x::BigInt)
+    c = MPZ.popcount(x)
+    c == -1 && throw(DomainError(x, "`x` cannot be negative"))
+    c
+end
 
-count_ones(x::BigInt) = MPZ.popcount(x)
+# generic definition is not used to provide a better error message
+function count_zeros(x::BigInt)
+    c = MPZ.popcount(~x)
+    c == -1 && throw(DomainError(x, "`x` must be negative"))
+    c
+end
 
 """
     count_ones_abs(x::BigInt)
@@ -564,6 +587,7 @@ Number of ones in the binary representation of abs(x).
 count_ones_abs(x::BigInt) = iszero(x) ? 0 : MPZ.mpn_popcount(x)
 
 divrem(x::BigInt, y::BigInt) = MPZ.tdiv_qr(x, y)
+divrem(x::BigInt, y::Integer) = MPZ.tdiv_qr(x, big(y))
 
 cmp(x::BigInt, y::BigInt) = sign(MPZ.cmp(x, y))
 cmp(x::BigInt, y::ClongMax) = sign(MPZ.cmp_si(x, y))
@@ -631,13 +655,26 @@ function gcdx(a::BigInt, b::BigInt)
     g, s, t
 end
 
-sum(arr::AbstractArray{BigInt}) = foldl(MPZ.add!, arr; init=BigInt(0))
-# Note: a similar implementation for `prod` won't be efficient:
-# 1) the time complexity of the allocations is negligible compared to the multiplications
-# 2) assuming arr contains similarly sized BigInts, the multiplications are much more
-# performant when doing e.g. ((a1*a2)*(a3*a4))*(...) rather than a1*(a2*(a3*(...))),
-# which is exactly what the default implementation of `prod` does, via `mapreduce`
-# (which maybe could be slightly optimized for BigInt).
++(x::BigInt, y::BigInt, rest::BigInt...) = sum(tuple(x, y, rest...))
+sum(arr::Union{AbstractArray{BigInt}, Tuple{BigInt, Vararg{BigInt}}}) =
+    foldl(MPZ.add!, arr; init=BigInt(0))
+
+function prod(arr::AbstractArray{BigInt})
+    # compute first the needed number of bits for the result,
+    # to avoid re-allocations;
+    # GMP will always request n+m limbs for the result in MPZ.mul!,
+    # if the arguments have n and m limbs; so we add all the bits
+    # taken by the array elements, and add BITS_PER_LIMB to that,
+    # to account for the rounding to limbs in MPZ.mul!
+    # (BITS_PER_LIMB-1 would typically be enough, to which we add
+    # 1 for the initial multiplication by init=1 in foldl)
+    nbits = GC.@preserve arr sum(arr; init=BITS_PER_LIMB) do x
+        abs(x.size) * BITS_PER_LIMB - leading_zeros(unsafe_load(x.d))
+    end
+    init = BigInt(; nbits)
+    MPZ.set_si!(init, 1)
+    foldl(MPZ.mul!, arr; init)
+end
 
 factorial(x::BigInt) = isneg(x) ? BigInt(0) : MPZ.fac_ui(x)
 
@@ -700,7 +737,7 @@ function digits!(a::AbstractVector{T}, n::BigInt; base::Integer = 10) where {T<:
         i, j = firstindex(a)-1, length(s)+1
         lasti = min(lastindex(a), firstindex(a) + length(s)-1 - isneg(n))
         while i < lasti
-            # base ≤ 36: 0-9, plus a-z for 10-35
+            # base ≤ 36: 0-9, plus a-z for 10-35
             # base > 36: 0-9, plus A-Z for 10-35 and a-z for 36..61
             x = s[j -= 1]
             a[i += 1] = base ≤ 36 ? (x>0x39 ? x-0x57 : x-0x30) : (x>0x39 ? (x>0x60 ? x-0x3d : x-0x37) : x-0x30)
@@ -755,13 +792,237 @@ Base.add_with_overflow(a::BigInt, b::BigInt) = a + b, false
 Base.sub_with_overflow(a::BigInt, b::BigInt) = a - b, false
 Base.mul_with_overflow(a::BigInt, b::BigInt) = a * b, false
 
-function Base.deepcopy_internal(x::BigInt, stackdict::IdDict)
-    if haskey(stackdict, x)
-        return stackdict[x]
+Base.deepcopy_internal(x::BigInt, stackdict::IdDict) = get!(() -> MPZ.set(x), stackdict, x)
+
+## streamlined hashing for BigInt, by avoiding allocation from shifts ##
+
+if Limb === UInt
+    # this condition is true most (all?) of the time, and in this case we can define
+    # an optimized version for BigInt of hash_integer (used e.g. for Rational{BigInt}),
+    # and of hash
+
+    using .Base: hash_uint
+
+    function hash_integer(n::BigInt, h::UInt)
+        GC.@preserve n begin
+            s = n.size
+            s == 0 && return hash_integer(0, h)
+            p = convert(Ptr{UInt}, n.d)
+            b = unsafe_load(p)
+            h ⊻= hash_uint(ifelse(s < 0, -b, b) ⊻ h)
+            for k = 2:abs(s)
+                h ⊻= hash_uint(unsafe_load(p, k) ⊻ h)
+            end
+            return h
+        end
+    end
+
+    _divLimb(n) = UInt === UInt64 ? n >>> 6 : n >>> 5
+    _modLimb(n) = UInt === UInt64 ? n & 63 : n & 31
+
+    function hash(x::BigInt, h::UInt)
+        GC.@preserve x begin
+            sz = x.size
+            sz == 0 && return hash(0, h)
+            ptr = Ptr{UInt}(x.d)
+            if sz == 1
+                return hash(unsafe_load(ptr), h)
+            elseif sz == -1
+                limb = unsafe_load(ptr)
+                limb <= typemin(Int) % UInt && return hash(-(limb % Int), h)
+            end
+            pow = trailing_zeros(x)
+            nd = Base.ndigits0z(x, 2)
+            idx = _divLimb(pow) + 1
+            shift = _modLimb(pow) % UInt
+            upshift = BITS_PER_LIMB - shift
+            asz = abs(sz)
+            if shift == 0
+                limb = unsafe_load(ptr, idx)
+            else
+                limb1 = unsafe_load(ptr, idx)
+                limb2 = idx < asz ? unsafe_load(ptr, idx+1) : UInt(0)
+                limb = limb2 << upshift | limb1 >> shift
+            end
+            if nd <= 1024 && nd - pow <= 53
+                return hash(ldexp(flipsign(Float64(limb), sz), pow), h)
+            end
+            h = hash_integer(1, h)
+            h = hash_integer(pow, h)
+            h ⊻= hash_uint(flipsign(limb, sz) ⊻ h)
+            for idx = idx+1:asz
+                if shift == 0
+                    limb = unsafe_load(ptr, idx)
+                else
+                    limb1 = limb2
+                    if idx == asz
+                        limb = limb1 >> shift
+                        limb == 0 && break # don't hash leading zeros
+                    else
+                        limb2 = unsafe_load(ptr, idx+1)
+                        limb = limb2 << upshift | limb1 >> shift
+                    end
+                end
+                h ⊻= hash_uint(limb ⊻ h)
+            end
+            return h
+        end
+    end
+end
+
+module MPQ
+
+# Rational{BigInt}
+import .Base: unsafe_rational, __throw_rational_argerror_zero
+import ..GMP: BigInt, MPZ, Limb, isneg
+
+gmpq(op::Symbol) = (Symbol(:__gmpq_, op), :libgmp)
+
+mutable struct _MPQ
+    num_alloc::Cint
+    num_size::Cint
+    num_d::Ptr{Limb}
+    den_alloc::Cint
+    den_size::Cint
+    den_d::Ptr{Limb}
+    # to prevent GC
+    rat::Rational{BigInt}
+end
+
+const mpq_t = Ref{_MPQ}
+
+_MPQ(x::BigInt,y::BigInt) = _MPQ(x.alloc, x.size, x.d,
+                                 y.alloc, y.size, y.d,
+                                 unsafe_rational(BigInt, x, y))
+_MPQ() = _MPQ(BigInt(), BigInt())
+_MPQ(x::Rational{BigInt}) = _MPQ(x.num, x.den)
+
+function sync_rational!(xq::_MPQ)
+    xq.rat.num.alloc = xq.num_alloc
+    xq.rat.num.size  = xq.num_size
+    xq.rat.num.d     = xq.num_d
+    xq.rat.den.alloc = xq.den_alloc
+    xq.rat.den.size  = xq.den_size
+    xq.rat.den.d     = xq.den_d
+    return xq.rat
+end
+
+function Rational{BigInt}(num::BigInt, den::BigInt)
+    if iszero(den)
+        iszero(num) && __throw_rational_argerror_zero(BigInt)
+        return set_si(flipsign(1, num), 0)
+    end
+    xq = _MPQ(MPZ.set(num), MPZ.set(den))
+    ccall((:__gmpq_canonicalize, :libgmp), Cvoid, (mpq_t,), xq)
+    return sync_rational!(xq)
+end
+
+# define set, set_ui, set_si, set_z, and their inplace versions
+function set!(z::Rational{BigInt}, x::Rational{BigInt})
+    zq = _MPQ(z)
+    ccall((:__gmpq_set, :libgmp), Cvoid, (mpq_t, mpq_t), zq, _MPQ(x))
+    return sync_rational!(zq)
+end
+
+function set_z!(z::Rational{BigInt}, x::BigInt)
+    zq = _MPQ(z)
+    ccall((:__gmpq_set_z, :libgmp), Cvoid, (mpq_t, MPZ.mpz_t), zq, x)
+    return sync_rational!(zq)
+end
+
+for (op, T) in ((:set, Rational{BigInt}), (:set_z, BigInt))
+    op! = Symbol(op, :!)
+    @eval $op(a::$T) = $op!(unsafe_rational(BigInt(), BigInt()), a)
+end
+
+# note that rationals returned from set_ui and set_si are not checked,
+# set_ui(0, 0) will return 0//0 without errors, just like unsafe_rational
+for (op, T1, T2) in ((:set_ui, Culong, Culong), (:set_si, Clong, Culong))
+    op! = Symbol(op, :!)
+    @eval begin
+        function $op!(z::Rational{BigInt}, a, b)
+            zq = _MPQ(z)
+            ccall($(gmpq(op)), Cvoid, (mpq_t, $T1, $T2), zq, a, b)
+            return sync_rational!(zq)
+        end
+        $op(a, b) = $op!(unsafe_rational(BigInt(), BigInt()), a, b)
+    end
+end
+
+# define add, sub, mul, div, and their inplace versions
+function add!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
+    if iszero(x.den) || iszero(y.den)
+        if iszero(x.den) && iszero(y.den) && isneg(x.num) != isneg(y.num)
+            throw(DivideError())
+        end
+        return set!(z, iszero(x.den) ? x : y)
+    end
+    zq = _MPQ(z)
+    ccall((:__gmpq_add, :libgmp), Cvoid,
+          (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
+    return sync_rational!(zq)
+end
+
+function sub!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
+    if iszero(x.den) || iszero(y.den)
+        if iszero(x.den) && iszero(y.den) && isneg(x.num) == isneg(y.num)
+            throw(DivideError())
+        end
+        iszero(x.den) && return set!(z, x)
+        return set_si!(z, flipsign(-1, y.num), 0)
+    end
+    zq = _MPQ(z)
+    ccall((:__gmpq_sub, :libgmp), Cvoid,
+          (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
+    return sync_rational!(zq)
+end
+
+function mul!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
+    if iszero(x.den) || iszero(y.den)
+        if iszero(x.num) || iszero(y.num)
+            throw(DivideError())
+        end
+        return set_si!(z, ifelse(xor(isneg(x.num), isneg(y.num)), -1, 1), 0)
+    end
+    zq = _MPQ(z)
+    ccall((:__gmpq_mul, :libgmp), Cvoid,
+          (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
+    return sync_rational!(zq)
+end
+
+function div!(z::Rational{BigInt}, x::Rational{BigInt}, y::Rational{BigInt})
+    if iszero(x.den)
+        if iszero(y.den)
+            throw(DivideError())
+        end
+        isneg(y.num) || return set!(z, x)
+        return set_si!(z, flipsign(-1, x.num), 0)
+    elseif iszero(y.den)
+        return set_si!(z, 0, 1)
+    elseif iszero(y.num)
+        if iszero(x.num)
+            throw(DivideError())
+        end
+        return set_si!(z, flipsign(1, x.num), 0)
+    end
+    zq = _MPQ(z)
+    ccall((:__gmpq_div, :libgmp), Cvoid,
+          (mpq_t,mpq_t,mpq_t), zq, _MPQ(x), _MPQ(y))
+    return sync_rational!(zq)
+end
+
+for (fJ, fC) in ((:+, :add), (:-, :sub), (:*, :mul), (://, :div))
+    fC! = Symbol(fC, :!)
+    @eval begin
+        ($fC!)(x::Rational{BigInt}, y::Rational{BigInt}) = $fC!(x, x, y)
+        (Base.$fJ)(x::Rational{BigInt}, y::Rational{BigInt}) = $fC!(unsafe_rational(BigInt(), BigInt()), x, y)
     end
-    y = MPZ.set(x)
-    stackdict[x] = y
-    return y
 end
 
+function Base.cmp(x::Rational{BigInt}, y::Rational{BigInt})
+    Int(ccall((:__gmpq_cmp, :libgmp), Cint, (mpq_t, mpq_t), _MPQ(x), _MPQ(y)))
+end
+
+end # MPQ module
+
 end # module
diff --git a/base/hashing.jl b/base/hashing.jl
index f40ccb50f0f5bd..746017f978dcb0 100644
--- a/base/hashing.jl
+++ b/base/hashing.jl
@@ -3,17 +3,19 @@
 ## hashing a single value ##
 
 """
-    hash(x[, h::UInt])
+    hash(x[, h::UInt]) -> UInt
 
 Compute an integer hash code such that `isequal(x,y)` implies `hash(x)==hash(y)`. The
 optional second argument `h` is a hash code to be mixed with the result.
 
 New types should implement the 2-argument form, typically by calling the 2-argument `hash`
 method recursively in order to mix hashes of the contents with each other (and with `h`).
-Typically, any type that implements `hash` should also implement its own `==` (hence
-`isequal`) to guarantee the property mentioned above. Types supporting subtraction
+Typically, any type that implements `hash` should also implement its own [`==`](@ref) (hence
+[`isequal`](@ref)) to guarantee the property mentioned above. Types supporting subtraction
 (operator `-`) should also implement [`widen`](@ref), which is required to hash
 values inside heterogeneous arrays.
+
+See also: [`objectid`](@ref), [`Dict`](@ref), [`Set`](@ref).
 """
 hash(x::Any) = hash(x, zero(UInt))
 hash(w::WeakRef, h::UInt) = hash(w.value, h)
@@ -22,6 +24,8 @@ hash(w::WeakRef, h::UInt) = hash(w.value, h)
 
 hash(@nospecialize(x), h::UInt) = hash_uint(3h - objectid(x))
 
+hash(x::Symbol) = objectid(x)
+
 ## core data hashing functions ##
 
 function hash_64_64(n::UInt64)
@@ -66,6 +70,23 @@ else
     hash_uint(x::UInt)     = hash_32_32(x)
 end
 
+## efficient value-based hashing of integers ##
+
+hash(x::Int64,  h::UInt) = hash_uint64(bitcast(UInt64, x)) - 3h
+hash(x::UInt64, h::UInt) = hash_uint64(x) - 3h
+hash(x::Union{Bool,Int8,UInt8,Int16,UInt16,Int32,UInt32}, h::UInt) = hash(Int64(x), h)
+
+function hash_integer(n::Integer, h::UInt)
+    h ⊻= hash_uint((n % UInt) ⊻ h)
+    n = abs(n)
+    n >>>= sizeof(UInt) << 3
+    while n != 0
+        h ⊻= hash_uint((n % UInt) ⊻ h)
+        n >>>= sizeof(UInt) << 3
+    end
+    return h
+end
+
 ## symbol & expression hashing ##
 
 if UInt === UInt64
diff --git a/base/hashing2.jl b/base/hashing2.jl
deleted file mode 100644
index f7ea3838aa0969..00000000000000
--- a/base/hashing2.jl
+++ /dev/null
@@ -1,232 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## efficient value-based hashing of integers ##
-
-function hash_integer(n::Integer, h::UInt)
-    h ⊻= hash_uint((n % UInt) ⊻ h)
-    n = abs(n)
-    n >>>= sizeof(UInt) << 3
-    while n != 0
-        h ⊻= hash_uint((n % UInt) ⊻ h)
-        n >>>= sizeof(UInt) << 3
-    end
-    return h
-end
-
-# this condition is true most (all?) of the time, and in this case we can define
-# an optimized version of the above hash_integer(::Integer, ::UInt) method for BigInt
-if GMP.Limb === UInt
-    # used e.g. for Rational{BigInt}
-    function hash_integer(n::BigInt, h::UInt)
-        GC.@preserve n begin
-            s = n.size
-            s == 0 && return hash_integer(0, h)
-            p = convert(Ptr{UInt}, n.d)
-            b = unsafe_load(p)
-            h ⊻= hash_uint(ifelse(s < 0, -b, b) ⊻ h)
-            for k = 2:abs(s)
-                h ⊻= hash_uint(unsafe_load(p, k) ⊻ h)
-            end
-            return h
-        end
-    end
-end
-
-## generic hashing for rational values ##
-
-function hash(x::Real, h::UInt)
-    # decompose x as num*2^pow/den
-    num, pow, den = decompose(x)
-
-    # handle special values
-    num == 0 && den == 0 && return hash(NaN, h)
-    num == 0 && return hash(ifelse(den > 0, 0.0, -0.0), h)
-    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
-
-    # normalize decomposition
-    if den < 0
-        num = -num
-        den = -den
-    end
-    z = trailing_zeros(num)
-    if z != 0
-        num >>= z
-        pow += z
-    end
-    z = trailing_zeros(den)
-    if z != 0
-        den >>= z
-        pow -= z
-    end
-
-    # handle values representable as Int64, UInt64, Float64
-    if den == 1
-        left = ndigits0z(num,2) + pow
-        right = trailing_zeros(num) + pow
-        if -1074 <= right
-            if 0 <= right && left <= 64
-                left <= 63                     && return hash(Int64(num) << Int(pow), h)
-                signbit(num) == signbit(den)   && return hash(UInt64(num) << Int(pow), h)
-            end # typemin(Int64) handled by Float64 case
-            left <= 1024 && left - right <= 53 && return hash(ldexp(Float64(num),pow), h)
-        end
-    end
-
-    # handle generic rational values
-    h = hash_integer(den, h)
-    h = hash_integer(pow, h)
-    h = hash_integer(num, h)
-    return h
-end
-
-## streamlined hashing for BigInt, by avoiding allocation from shifts ##
-
-if GMP.Limb === UInt
-    _divLimb(n) = UInt === UInt64 ? n >>> 6 : n >>> 5
-    _modLimb(n) = UInt === UInt64 ? n & 63 : n & 31
-
-    function hash(x::BigInt, h::UInt)
-        GC.@preserve x begin
-            sz = x.size
-            sz == 0 && return hash(0, h)
-            ptr = Ptr{UInt}(x.d)
-            if sz == 1
-                return hash(unsafe_load(ptr), h)
-            elseif sz == -1
-                limb = unsafe_load(ptr)
-                limb <= typemin(Int) % UInt && return hash(-(limb % Int), h)
-            end
-            pow = trailing_zeros(x)
-            nd = ndigits0z(x, 2)
-            idx = _divLimb(pow) + 1
-            shift = _modLimb(pow) % UInt
-            upshift = GMP.BITS_PER_LIMB - shift
-            asz = abs(sz)
-            if shift == 0
-                limb = unsafe_load(ptr, idx)
-            else
-                limb1 = unsafe_load(ptr, idx)
-                limb2 = idx < asz ? unsafe_load(ptr, idx+1) : UInt(0)
-                limb = limb2 << upshift | limb1 >> shift
-            end
-            if nd <= 1024 && nd - pow <= 53
-                return hash(ldexp(flipsign(Float64(limb), sz), pow), h)
-            end
-            h = hash_integer(1, h)
-            h = hash_integer(pow, h)
-            h ⊻= hash_uint(flipsign(limb, sz) ⊻ h)
-            for idx = idx+1:asz
-                if shift == 0
-                    limb = unsafe_load(ptr, idx)
-                else
-                    limb1 = limb2
-                    if idx == asz
-                        limb = limb1 >> shift
-                        limb == 0 && break # don't hash leading zeros
-                    else
-                        limb2 = unsafe_load(ptr, idx+1)
-                        limb = limb2 << upshift | limb1 >> shift
-                    end
-                end
-                h ⊻= hash_uint(limb ⊻ h)
-            end
-            return h
-        end
-    end
-end
-
-#=
-`decompose(x)`: non-canonical decomposition of rational values as `num*2^pow/den`.
-
-The decompose function is the point where rational-valued numeric types that support
-hashing hook into the hashing protocol. `decompose(x)` should return three integer
-values `num, pow, den`, such that the value of `x` is mathematically equal to
-
-    num*2^pow/den
-
-The decomposition need not be canonical in the sense that it just needs to be *some*
-way to express `x` in this form, not any particular way – with the restriction that
-`num` and `den` may not share any odd common factors. They may, however, have powers
-of two in common – the generic hashing code will normalize those as necessary.
-
-Special values:
-
- - `x` is zero: `num` should be zero and `den` should have the same sign as `x`
- - `x` is infinite: `den` should be zero and `num` should have the same sign as `x`
- - `x` is not a number: `num` and `den` should both be zero
-=#
-
-decompose(x::Integer) = x, 0, 1
-decompose(x::Rational) = numerator(x), 0, denominator(x)
-
-function decompose(x::Float16)::NTuple{3,Int}
-    isnan(x) && return 0, 0, 0
-    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
-    n = reinterpret(UInt16, x)
-    s = (n & 0x03ff) % Int16
-    e = ((n & 0x7c00) >> 10) % Int
-    s |= Int16(e != 0) << 10
-    d = ifelse(signbit(x), -1, 1)
-    s, e - 25 + (e == 0), d
-end
-
-function decompose(x::Float32)::NTuple{3,Int}
-    isnan(x) && return 0, 0, 0
-    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
-    n = reinterpret(UInt32, x)
-    s = (n & 0x007fffff) % Int32
-    e = ((n & 0x7f800000) >> 23) % Int
-    s |= Int32(e != 0) << 23
-    d = ifelse(signbit(x), -1, 1)
-    s, e - 150 + (e == 0), d
-end
-
-function decompose(x::Float64)::Tuple{Int64, Int, Int}
-    isnan(x) && return 0, 0, 0
-    isinf(x) && return ifelse(x < 0, -1, 1), 0, 0
-    n = reinterpret(UInt64, x)
-    s = (n & 0x000fffffffffffff) % Int64
-    e = ((n & 0x7ff0000000000000) >> 52) % Int
-    s |= Int64(e != 0) << 52
-    d = ifelse(signbit(x), -1, 1)
-    s, e - 1075 + (e == 0), d
-end
-
-function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
-    isnan(x) && return 0, 0, 0
-    isinf(x) && return x.sign, 0, 0
-    x == 0 && return 0, 0, x.sign
-    s = BigInt()
-    s.size = cld(x.prec, 8*sizeof(GMP.Limb)) # limbs
-    b = s.size * sizeof(GMP.Limb)            # bytes
-    ccall((:__gmpz_realloc2, :libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
-    ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), s.d, x.d, b) # bytes
-    s, x.exp - 8b, x.sign
-end
-
-## streamlined hashing for smallish rational types ##
-
-function hash(x::Rational{<:BitInteger64}, h::UInt)
-    num, den = Base.numerator(x), Base.denominator(x)
-    den == 1 && return hash(num, h)
-    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
-    if isodd(den)
-        pow = trailing_zeros(num)
-        num >>= pow
-    else
-        pow = trailing_zeros(den)
-        den >>= pow
-        pow = -pow
-        if den == 1 && abs(num) < 9007199254740992
-            return hash(ldexp(Float64(num),pow),h)
-        end
-    end
-    h = hash_integer(den, h)
-    h = hash_integer(pow, h)
-    h = hash_integer(num, h)
-    return h
-end
-
-## hashing Float16s ##
-
-hash(x::Float16, h::UInt) = hash(Float64(x), h)
diff --git a/base/iddict.jl b/base/iddict.jl
index 3ea3a01f6244b0..7247a85c9afc80 100644
--- a/base/iddict.jl
+++ b/base/iddict.jl
@@ -1,10 +1,27 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 """
     IdDict([itr])
 
-`IdDict{K,V}()` constructs a hash table using object-id as hash and
+`IdDict{K,V}()` constructs a hash table using [`objectid`](@ref) as hash and
 `===` as equality with keys of type `K` and values of type `V`.
 
-See [`Dict`](@ref) for further help.
+See [`Dict`](@ref) for further help. In the example below, The `Dict`
+keys are all `isequal` and therefore get hashed the same, so they get overwritten.
+The `IdDict` hashes by object-id, and thus preserves the 3 different keys.
+
+# Examples
+```julia-repl
+julia> Dict(true => "yes", 1 => "no", 1.0 => "maybe")
+Dict{Real, String} with 1 entry:
+  1.0 => "maybe"
+
+julia> IdDict(true => "yes", 1 => "no", 1.0 => "maybe")
+IdDict{Any, String} with 3 entries:
+  true => "yes"
+  1.0  => "maybe"
+  1    => "no"
+```
 """
 mutable struct IdDict{K,V} <: AbstractDict{K,V}
     ht::Vector{Any}
diff --git a/base/idset.jl b/base/idset.jl
index cec8ed96caff8d..0a4d4275b42315 100644
--- a/base/idset.jl
+++ b/base/idset.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # Like Set, but using IdDict
 mutable struct IdSet{T} <: AbstractSet{T}
     dict::IdDict{T,Nothing}
@@ -10,6 +12,7 @@ IdSet{T}(itr) where {T} = union!(IdSet{T}(), itr)
 IdSet() = IdSet{Any}()
 
 copymutable(s::IdSet) = typeof(s)(s)
+emptymutable(s::IdSet{T}, ::Type{U}=T) where {T,U} = IdSet{U}()
 copy(s::IdSet) = typeof(s)(s)
 
 isempty(s::IdSet) = isempty(s.dict)
diff --git a/base/indices.jl b/base/indices.jl
index d337d7979b20af..8cea043569ae62 100644
--- a/base/indices.jl
+++ b/base/indices.jl
@@ -23,11 +23,11 @@ A linear indexing style uses one integer index to describe the position in the a
 (even if it's a multidimensional array) and column-major
 ordering is used to efficiently access the elements. This means that
 requesting [`eachindex`](@ref) from an array that is `IndexLinear` will return
-a simple one-dimensional range, even if it is multidimensional.
+a simple one-dimensional range, even if it is multidimensional.
 
 A custom array that reports its `IndexStyle` as `IndexLinear` only needs
 to implement indexing (and indexed assignment) with a single `Int` index;
-all other indexing expressions — including multidimensional accesses — will
+all other indexing expressions — including multidimensional accesses — will
 be recomputed to the linear index.  For example, if `A` were a `2×3` custom
 matrix with linear indexing, and we referenced `A[1, 3]`, this would be
 recomputed to the equivalent linear index and call `A[5]` since `2*1 + 3 = 5`.
@@ -50,13 +50,13 @@ a range of [`CartesianIndices`](@ref).
 
 A `N`-dimensional custom array that reports its `IndexStyle` as `IndexCartesian` needs
 to implement indexing (and indexed assignment) with exactly `N` `Int` indices;
-all other indexing expressions — including linear indexing — will
+all other indexing expressions — including linear indexing — will
 be recomputed to the equivalent Cartesian location.  For example, if `A` were a `2×3` custom
 matrix with cartesian indexing, and we referenced `A[5]`, this would be
 recomputed to the equivalent Cartesian index and call `A[1, 3]` since `5 = 2*1 + 3`.
 
 It is significantly more expensive to compute Cartesian indices from a linear index than it is
-to go the other way.  The former operation requires division — a very costly operation — whereas
+to go the other way.  The former operation requires division — a very costly operation — whereas
 the latter only uses multiplication and addition and is essentially free. This asymmetry means it
 is far more costly to use linear indexing with an `IndexCartesian` array than it is to use
 Cartesian indexing with an `IndexLinear` array.
@@ -239,6 +239,9 @@ setindex_shape_check(X::AbstractArray) =
 setindex_shape_check(X::AbstractArray, i::Integer) =
     (length(X)==i || throw_setindex_mismatch(X, (i,)))
 
+setindex_shape_check(X::AbstractArray{<:Any, 0}, i::Integer...) =
+    (length(X) == prod(i) || throw_setindex_mismatch(X, i))
+
 setindex_shape_check(X::AbstractArray{<:Any,1}, i::Integer) =
     (length(X)==i || throw_setindex_mismatch(X, (i,)))
 
@@ -256,7 +259,7 @@ function setindex_shape_check(X::AbstractArray{<:Any,2}, i::Integer, j::Integer)
 end
 
 setindex_shape_check(::Any...) =
-    throw(ArgumentError("indexed assignment with a single value to many locations is not supported; perhaps use broadcasting `.=` instead?"))
+    throw(ArgumentError("indexed assignment with a single value to possibly many locations is not supported; perhaps use broadcasting `.=` instead?"))
 
 # convert to a supported index type (array or Int)
 """
@@ -318,16 +321,16 @@ which they index. To support those cases, `to_indices(A, I)` calls
 given tuple of indices and the dimensional indices of `A` in tandem. As such,
 not all index types are guaranteed to propagate to `Base.to_index`.
 """
-to_indices(A, I::Tuple) = (@_inline_meta; to_indices(A, axes(A), I))
-to_indices(A, I::Tuple{Any}) = (@_inline_meta; to_indices(A, (eachindex(IndexLinear(), A),), I))
+to_indices(A, I::Tuple) = (@inline; to_indices(A, axes(A), I))
+to_indices(A, I::Tuple{Any}) = (@inline; to_indices(A, (eachindex(IndexLinear(), A),), I))
 # In simple cases, we know that we don't need to use axes(A), optimize those.
 # Having this here avoids invalidations from multidimensional.jl: to_indices(A, I::Tuple{Vararg{Union{Integer, CartesianIndex}}})
 to_indices(A, I::Tuple{}) = ()
 to_indices(A, I::Tuple{Vararg{Int}}) = I
-to_indices(A, I::Tuple{Vararg{Integer}}) = (@_inline_meta; to_indices(A, (), I))
+to_indices(A, I::Tuple{Vararg{Integer}}) = (@inline; to_indices(A, (), I))
 to_indices(A, inds, ::Tuple{}) = ()
 to_indices(A, inds, I::Tuple{Any, Vararg{Any}}) =
-    (@_inline_meta; (to_index(A, I[1]), to_indices(A, _maybetail(inds), tail(I))...))
+    (@inline; (to_index(A, I[1]), to_indices(A, _maybetail(inds), tail(I))...))
 
 _maybetail(::Tuple{}) = ()
 _maybetail(t::Tuple) = tail(t)
@@ -348,25 +351,23 @@ struct Slice{T<:AbstractUnitRange} <: AbstractUnitRange{Int}
     indices::T
 end
 Slice(S::Slice) = S
+Slice{T}(S::Slice) where {T<:AbstractUnitRange} = Slice{T}(T(S.indices))
+
 axes(S::Slice) = (IdentityUnitRange(S.indices),)
-unsafe_indices(S::Slice) = (IdentityUnitRange(S.indices),)
 axes1(S::Slice) = IdentityUnitRange(S.indices)
 axes(S::Slice{<:OneTo}) = (S.indices,)
-unsafe_indices(S::Slice{<:OneTo}) = (S.indices,)
 axes1(S::Slice{<:OneTo}) = S.indices
 
 first(S::Slice) = first(S.indices)
 last(S::Slice) = last(S.indices)
 size(S::Slice) = (length(S.indices),)
 length(S::Slice) = length(S.indices)
-unsafe_length(S::Slice) = unsafe_length(S.indices)
-getindex(S::Slice, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::Slice, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::Slice, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 show(io::IO, r::Slice) = print(io, "Base.Slice(", r.indices, ")")
 iterate(S::Slice, s...) = iterate(S.indices, s...)
 
-
 """
     IdentityUnitRange(range::AbstractUnitRange)
 
@@ -378,25 +379,28 @@ struct IdentityUnitRange{T<:AbstractUnitRange} <: AbstractUnitRange{Int}
     indices::T
 end
 IdentityUnitRange(S::IdentityUnitRange) = S
+IdentityUnitRange{T}(S::IdentityUnitRange) where {T<:AbstractUnitRange} = IdentityUnitRange{T}(T(S.indices))
+
 # IdentityUnitRanges are offset and thus have offset axes, so they are their own axes
 axes(S::IdentityUnitRange) = (S,)
-unsafe_indices(S::IdentityUnitRange) = (S,)
 axes1(S::IdentityUnitRange) = S
 axes(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
-unsafe_indices(S::IdentityUnitRange{<:OneTo}) = (S.indices,)
 axes1(S::IdentityUnitRange{<:OneTo}) = S.indices
 
 first(S::IdentityUnitRange) = first(S.indices)
 last(S::IdentityUnitRange) = last(S.indices)
 size(S::IdentityUnitRange) = (length(S.indices),)
 length(S::IdentityUnitRange) = length(S.indices)
-unsafe_length(S::IdentityUnitRange) = unsafe_length(S.indices)
-getindex(S::IdentityUnitRange, i::Int) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
-getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@_inline_meta; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::Int) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::AbstractUnitRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
+getindex(S::IdentityUnitRange, i::StepRange{<:Integer}) = (@inline; @boundscheck checkbounds(S, i); i)
 show(io::IO, r::IdentityUnitRange) = print(io, "Base.IdentityUnitRange(", r.indices, ")")
 iterate(S::IdentityUnitRange, s...) = iterate(S.indices, s...)
 
+# For OneTo, the values and indices of the values are identical, so this may be defined in Base.
+# In general such an indexing operation would produce offset ranges
+getindex(S::OneTo, I::IdentityUnitRange{<:AbstractUnitRange{<:Integer}}) = (@inline; @boundscheck checkbounds(S, I); I)
+
 """
     LinearIndices(A::AbstractArray)
 
@@ -447,37 +451,42 @@ julia> linear[1,2]
 struct LinearIndices{N,R<:NTuple{N,AbstractUnitRange{Int}}} <: AbstractArray{Int,N}
     indices::R
 end
+convert(::Type{LinearIndices{N,R}}, inds::LinearIndices{N}) where {N,R<:NTuple{N,AbstractUnitRange{Int}}} =
+    LinearIndices{N,R}(convert(R, inds.indices))
 
 LinearIndices(::Tuple{}) = LinearIndices{0,typeof(())}(())
 LinearIndices(inds::NTuple{N,AbstractUnitRange{<:Integer}}) where {N} =
     LinearIndices(map(r->convert(AbstractUnitRange{Int}, r), inds))
-LinearIndices(sz::NTuple{N,<:Integer}) where {N} = LinearIndices(map(Base.OneTo, sz))
 LinearIndices(inds::NTuple{N,Union{<:Integer,AbstractUnitRange{<:Integer}}}) where {N} =
-    LinearIndices(map(i->first(i):last(i), inds))
+    LinearIndices(map(_convert2ind, inds))
 LinearIndices(A::Union{AbstractArray,SimpleVector}) = LinearIndices(axes(A))
 
-promote_rule(::Type{LinearIndices{N,R1}}, ::Type{LinearIndices{N,R2}}) where {N,R1,R2} =
-    LinearIndices{N,indices_promote_type(R1,R2)}
+_convert2ind(i::Integer) = Base.OneTo(i)
+_convert2ind(ind::AbstractUnitRange) = first(ind):last(ind)
 
 function indices_promote_type(::Type{Tuple{R1,Vararg{R1,N}}}, ::Type{Tuple{R2,Vararg{R2,N}}}) where {R1,R2,N}
     R = promote_type(R1, R2)
-    Tuple{R,Vararg{R,N}}
+    return Tuple{R, Vararg{R, N}}
 end
 
-convert(::Type{LinearIndices{N,R}}, inds::LinearIndices{N}) where {N,R} =
-    LinearIndices(convert(R, inds.indices))
+promote_rule(::Type{LinearIndices{N,R1}}, ::Type{LinearIndices{N,R2}}) where {N,R1,R2} =
+    LinearIndices{N,indices_promote_type(R1,R2)}
+promote_rule(a::Type{Slice{T1}}, b::Type{Slice{T2}}) where {T1,T2} =
+    el_same(promote_type(T1, T2), a, b)
+promote_rule(a::Type{IdentityUnitRange{T1}}, b::Type{IdentityUnitRange{T2}}) where {T1,T2} =
+    el_same(promote_type(T1, T2), a, b)
 
 # AbstractArray implementation
 IndexStyle(::Type{<:LinearIndices}) = IndexLinear()
 axes(iter::LinearIndices) = map(axes1, iter.indices)
-size(iter::LinearIndices) = map(unsafe_length, iter.indices)
+size(iter::LinearIndices) = map(length, iter.indices)
 function getindex(iter::LinearIndices, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(iter, i)
     i
 end
 function getindex(iter::LinearIndices, i::AbstractRange{<:Integer})
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(iter, i)
     @inbounds isa(iter, LinearIndices{1}) ? iter.indices[1][i] : (first(iter):last(iter))[i]
 end
@@ -488,6 +497,6 @@ iterate(iter::LinearIndices, i=1) = i > length(iter) ? nothing : (i, i+1)
 
 # Needed since firstindex and lastindex are defined in terms of LinearIndices
 first(iter::LinearIndices) = 1
-first(iter::LinearIndices{1}) = (@_inline_meta; first(axes1(iter.indices[1])))
-last(iter::LinearIndices) = (@_inline_meta; length(iter))
-last(iter::LinearIndices{1}) = (@_inline_meta; last(axes1(iter.indices[1])))
+first(iter::LinearIndices{1}) = (@inline; first(axes1(iter.indices[1])))
+last(iter::LinearIndices) = (@inline; length(iter))
+last(iter::LinearIndices{1}) = (@inline; last(axes1(iter.indices[1])))
diff --git a/base/initdefs.jl b/base/initdefs.jl
index 9a6c1a7a707041..4106ef4eb7777d 100644
--- a/base/initdefs.jl
+++ b/base/initdefs.jl
@@ -81,9 +81,8 @@ Here is an overview of some of the subdirectories that may exist in a depot:
 * `packages`: Contains packages, some of which were explicitly installed and some which are implicit dependencies. Maintained by `Pkg.jl`.
 * `registries`: Contains package registries. By default only `General`. Maintained by `Pkg.jl`.
 
-See also:
-[`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
-[Code Loading](@ref Code-Loading).
+See also [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
+[Code Loading](@ref code-loading).
 """
 const DEPOT_PATH = String[]
 
@@ -101,7 +100,7 @@ function init_depot_path()
     if haskey(ENV, "JULIA_DEPOT_PATH")
         str = ENV["JULIA_DEPOT_PATH"]
         isempty(str) && return
-        for path in split(str, Sys.iswindows() ? ';' : ':')
+        for path in eachsplit(str, Sys.iswindows() ? ';' : ':')
             if isempty(path)
                 append_default_depot_path!(DEPOT_PATH)
             else
@@ -161,16 +160,20 @@ have special meanings:
 The fully expanded value of `LOAD_PATH` that is searched for projects and packages
 can be seen by calling the `Base.load_path()` function.
 
-See also:
+See also
 [`JULIA_LOAD_PATH`](@ref JULIA_LOAD_PATH),
 [`JULIA_PROJECT`](@ref JULIA_PROJECT),
 [`JULIA_DEPOT_PATH`](@ref JULIA_DEPOT_PATH), and
-[Code Loading](@ref Code-Loading).
+[Code Loading](@ref code-loading).
 """
 const LOAD_PATH = copy(DEFAULT_LOAD_PATH)
 # HOME_PROJECT is no longer used, here just to avoid breaking things
 const HOME_PROJECT = Ref{Union{String,Nothing}}(nothing)
-const ACTIVE_PROJECT = Ref{Union{String,Nothing}}(nothing)
+const ACTIVE_PROJECT = Ref{Union{String,Nothing}}(nothing) # Modify this only via `Base.set_active_project(proj)`
+## Watchers for when the active project changes (e.g., Revise)
+# Each should be a thunk, i.e., `f()`. To determine the current active project,
+# the thunk can query `Base.active_project()`.
+const active_project_callbacks = []
 
 function current_project(dir::AbstractString)
     # look for project file in current dir and parents
@@ -199,7 +202,7 @@ end
 function parse_load_path(str::String)
     envs = String[]
     isempty(str) && return envs
-    for env in split(str, Sys.iswindows() ? ';' : ':')
+    for env in eachsplit(str, Sys.iswindows() ? ';' : ':')
         if isempty(env)
             for env′ in DEFAULT_LOAD_PATH
                 env′ in envs || push!(envs, env′)
@@ -232,10 +235,11 @@ function init_active_project()
     project = (JLOptions().project != C_NULL ?
         unsafe_string(Base.JLOptions().project) :
         get(ENV, "JULIA_PROJECT", nothing))
-    ACTIVE_PROJECT[] =
+    set_active_project(
         project === nothing ? nothing :
         project == "" ? nothing :
-        project == "@." ? current_project() : abspath(expanduser(project))
+        startswith(project, "@") ? load_path_expand(project) : abspath(expanduser(project))
+    )
 end
 
 ## load path expansion: turn LOAD_PATH entries into concrete paths ##
@@ -247,7 +251,7 @@ function load_path_expand(env::AbstractString)::Union{String, Nothing}
         # if you put a `@` in LOAD_PATH manually, it's expanded late
         env == "@" && return active_project(false)
         env == "@." && return current_project()
-        env == "@stdlib" && return Sys.STDLIB::String
+        env == "@stdlib" && return Sys.STDLIB
         env = replace(env, '#' => VERSION.major, count=1)
         env = replace(env, '#' => VERSION.minor, count=1)
         env = replace(env, '#' => VERSION.patch, count=1)
@@ -278,6 +282,11 @@ function load_path_expand(env::AbstractString)::Union{String, Nothing}
 end
 load_path_expand(::Nothing) = nothing
 
+"""
+    active_project()
+
+Return the path of the active `Project.toml` file. See also [`Base.set_active_project`](@ref).
+"""
 function active_project(search_load_path::Bool=true)
     for project in (ACTIVE_PROJECT[],)
         project == "@" && continue
@@ -302,7 +311,32 @@ function active_project(search_load_path::Bool=true)
     end
 end
 
+"""
+    set_active_project(projfile::Union{AbstractString,Nothing})
+
+Set the active `Project.toml` file to `projfile`. See also [`Base.active_project`](@ref).
+"""
+function set_active_project(projfile::Union{AbstractString,Nothing})
+    ACTIVE_PROJECT[] = projfile
+    for f in active_project_callbacks
+        try
+            Base.invokelatest(f)
+        catch
+            @error "active project callback $f failed" maxlog=1
+        end
+    end
+end
+
+
+"""
+    load_path()
+
+Return the fully expanded value of [`LOAD_PATH`](@ref) that is searched for projects and
+packages.
+"""
 function load_path()
+    cache = LOADING_CACHE[]
+    cache !== nothing && return cache.load_path
     paths = String[]
     for env in LOAD_PATH
         path = load_path_expand(env)
diff --git a/base/int.jl b/base/int.jl
index b3acd29c6492fd..41e53e990be5b3 100644
--- a/base/int.jl
+++ b/base/int.jl
@@ -87,15 +87,22 @@ signed(::Type{T}) where {T<:Signed} = T
 (+)(x::T, y::T) where {T<:BitInteger} = add_int(x, y)
 (*)(x::T, y::T) where {T<:BitInteger} = mul_int(x, y)
 
+negate(x) = -x
+negate(x::Unsigned) = -convert(Signed, x)
+#widenegate(x) = -convert(widen(signed(typeof(x))), x)
+
 inv(x::Integer) = float(one(x)) / float(x)
 (/)(x::T, y::T) where {T<:Integer} = float(x) / float(y)
 # skip promotion for system integer types
 (/)(x::BitInteger, y::BitInteger) = float(x) / float(y)
 
 """
-    isodd(x::Integer) -> Bool
+    isodd(x::Number) -> Bool
+
+Return `true` if `x` is an odd integer (that is, an integer not divisible by 2), and `false` otherwise.
 
-Return `true` if `x` is odd (that is, not divisible by 2), and `false` otherwise.
+!!! compat "Julia 1.7"
+    Non-`Integer` arguments require Julia 1.7 or later.
 
 # Examples
 ```jldoctest
@@ -106,12 +113,16 @@ julia> isodd(10)
 false
 ```
 """
-isodd(n::Integer) = rem(n, 2) != 0
+isodd(n::Number) = isreal(n) && isodd(real(n))
+isodd(n::Real) = isinteger(n) && !iszero(rem(Integer(n), 2))
 
 """
-    iseven(x::Integer) -> Bool
+    iseven(x::Number) -> Bool
+
+Return `true` if `x` is an even integer (that is, an integer divisible by 2), and `false` otherwise.
 
-Return `true` if `x` is even (that is, divisible by 2), and `false` otherwise.
+!!! compat "Julia 1.7"
+    Non-`Integer` arguments require Julia 1.7 or later.
 
 # Examples
 ```jldoctest
@@ -122,7 +133,8 @@ julia> iseven(10)
 true
 ```
 """
-iseven(n::Integer) = !isodd(n)
+iseven(n::Number) = isreal(n) && iseven(real(n))
+iseven(n::Real) = isinteger(n) && iszero(rem(Integer(n), 2))
 
 signbit(x::Integer) = x < 0
 signbit(x::Unsigned) = false
@@ -152,6 +164,8 @@ when `abs` is applied to the minimum representable value of a signed
 integer. That is, when `x == typemin(typeof(x))`, `abs(x) == x < 0`,
 not `-x` as might be expected.
 
+See also: [`abs2`](@ref), [`unsigned`](@ref), [`sign`](@ref).
+
 # Examples
 ```jldoctest
 julia> abs(-3)
@@ -176,12 +190,17 @@ abs(x::Signed) = flipsign(x,x)
 
 Convert a number to an unsigned integer. If the argument is signed, it is reinterpreted as
 unsigned without checking for negative values.
+
+See also: [`signed`](@ref), [`sign`](@ref), [`signbit`](@ref).
+
 # Examples
 ```jldoctest
 julia> unsigned(-2)
 0xfffffffffffffffe
+
 julia> unsigned(2)
 0x0000000000000002
+
 julia> signed(unsigned(-2))
 -2
 ```
@@ -194,6 +213,8 @@ unsigned(x::BitSigned) = reinterpret(typeof(convert(Unsigned, zero(x))), x)
 
 Convert a number to a signed integer. If the argument is unsigned, it is reinterpreted as
 signed without checking for overflow.
+
+See also: [`unsigned`](@ref), [`sign`](@ref), [`signbit`](@ref).
 """
 signed(x) = x % typeof(convert(Signed, zero(x)))
 signed(x::BitUnsigned) = reinterpret(typeof(convert(Signed, zero(x))), x)
@@ -231,6 +252,8 @@ exceptions, see note below).
     type, and so rounding error may occur. In particular, if the exact result is very
     close to `y`, then it may be rounded to `y`.
 
+See also: [`rem`](@ref), [`div`](@ref), [`fld`](@ref), [`mod1`](@ref), [`invmod`](@ref).
+
 ```jldoctest
 julia> mod(8, 3)
 2
@@ -246,6 +269,10 @@ julia> mod(eps(), 3)
 
 julia> mod(-eps(), 3)
 3.0
+
+julia> mod.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ 1  2  0  1  2  0  1  2  0  1  2
 ```
 """
 function mod(x::T, y::T) where T<:Integer
@@ -270,6 +297,8 @@ rem(x::T, y::T) where {T<:BitUnsigned64} = checked_urem_int(x, y)
 
 Bitwise not.
 
+See also: [`!`](@ref), [`&`](@ref), [`|`](@ref).
+
 # Examples
 ```jldoctest
 julia> ~4
@@ -291,6 +320,8 @@ Bitwise and. Implements [three-valued logic](https://en.wikipedia.org/wiki/Three
 returning [`missing`](@ref) if one operand is `missing` and the other is `true`. Add parentheses for
 function application form: `(&)(x, y)`.
 
+See also: [`|`](@ref), [`xor`](@ref), [`&&`](@ref).
+
 # Examples
 ```jldoctest
 julia> 4 & 10
@@ -314,6 +345,8 @@ false
 Bitwise or. Implements [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic),
 returning [`missing`](@ref) if one operand is `missing` and the other is `false`.
 
+See also: [`&`](@ref), [`xor`](@ref), [`||`](@ref).
+
 # Examples
 ```jldoctest
 julia> 4 | 10
@@ -367,6 +400,9 @@ Number of ones in the binary representation of `x`.
 ```jldoctest
 julia> count_ones(7)
 3
+
+julia> count_ones(Int32(-1))
+32
 ```
 """
 count_ones(x::BitInteger) = (ctpop_int(x) % Int)::Int
@@ -406,6 +442,9 @@ Number of zeros in the binary representation of `x`.
 ```jldoctest
 julia> count_zeros(Int32(2 ^ 16 - 1))
 16
+
+julia> count_zeros(-1)
+0
 ```
 """
 count_zeros(x::Integer) = count_ones(~x)
@@ -496,6 +535,8 @@ A negative value of `k` will rotate to the right instead.
 !!! compat "Julia 1.5"
     This function requires Julia 1.5 or later.
 
+See also: [`<<`](@ref), [`circshift`](@ref), [`BitArray`](@ref).
+
 ```jldoctest
 julia> bitrotate(UInt8(114), 2)
 0xc9
@@ -552,12 +593,26 @@ unsafe_trunc(::Type{T}, x::Integer) where {T<:Integer} = rem(x, T)
     trunc(x; sigdigits::Integer= [, base = 10])
 
 `trunc(x)` returns the nearest integral value of the same type as `x` whose absolute value
-is less than or equal to `x`.
+is less than or equal to the absolute value of `x`.
 
 `trunc(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
 not representable.
 
-`digits`, `sigdigits` and `base` work as for [`round`](@ref).
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
+
+See also: [`%`](@ref rem), [`floor`](@ref), [`unsigned`](@ref), [`unsafe_trunc`](@ref).
+
+# Examples
+```jldoctest
+julia> trunc(2.22)
+2.0
+
+julia> trunc(-2.22, digits=1)
+-2.2
+
+julia> trunc(Int, -2.22)
+-2
+```
 """
 function trunc end
 
@@ -572,7 +627,7 @@ equal to `x`.
 `floor(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is
 not representable.
 
-`digits`, `sigdigits` and `base` work as for [`round`](@ref).
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
 """
 function floor end
 
@@ -587,7 +642,7 @@ equal to `x`.
 `ceil(T, x)` converts the result to type `T`, throwing an `InexactError` if the value is not
 representable.
 
-`digits`, `sigdigits` and `base` work as for [`round`](@ref).
+Keywords `digits`, `sigdigits` and `base` work as for [`round`](@ref).
 """
 function ceil end
 
@@ -600,10 +655,19 @@ floor(::Type{T}, x::Integer) where {T<:Integer} = convert(T, x)
 
 """
     @int128_str str
-    @int128_str(str)
 
-`@int128_str` parses a string into a Int128
-Throws an `ArgumentError` if the string is not a valid integer
+Parse `str` as an [`Int128`](@ref).
+Throw an `ArgumentError` if the string is not a valid integer.
+
+# Examples
+```jldoctest
+julia> int128"123456789123"
+123456789123
+
+julia> int128"123456789123.4"
+ERROR: LoadError: ArgumentError: invalid base 10 digit '.' in "123456789123.4"
+[...]
+```
 """
 macro int128_str(s)
     return parse(Int128, s)
@@ -611,10 +675,19 @@ end
 
 """
     @uint128_str str
-    @uint128_str(str)
 
-`@uint128_str` parses a string into a UInt128
-Throws an `ArgumentError` if the string is not a valid integer
+Parse `str` as an [`UInt128`](@ref).
+Throw an `ArgumentError` if the string is not a valid integer.
+
+# Examples
+```
+julia> uint128"123456789123"
+0x00000000000000000000001cbe991a83
+
+julia> uint128"-123456789123"
+ERROR: LoadError: ArgumentError: invalid base 10 digit '-' in "-123456789123"
+[...]
+```
 """
 macro uint128_str(s)
     return parse(UInt128, s)
@@ -622,7 +695,6 @@ end
 
 """
     @big_str str
-    @big_str(str)
 
 Parse a string into a [`BigInt`](@ref) or [`BigFloat`](@ref),
 and throw an `ArgumentError` if the string is not a valid number.
@@ -635,28 +707,37 @@ julia> big"123_456"
 
 julia> big"7891.5"
 7891.5
+
+julia> big"_"
+ERROR: ArgumentError: invalid number format _ for BigInt or BigFloat
+[...]
 ```
 """
 macro big_str(s)
+    message = "invalid number format $s for BigInt or BigFloat"
+    throw_error =  :(throw(ArgumentError($message)))
     if '_' in s
         # remove _ in s[2:end-1]
         bf = IOBuffer(maxsize=lastindex(s))
-        print(bf, s[1])
+        c = s[1]
+        print(bf, c)
+        is_prev_underscore = (c == '_')
+        is_prev_dot = (c == '.')
         for c in SubString(s, 2, lastindex(s)-1)
             c != '_' && print(bf, c)
+            c == '_' && is_prev_dot && return throw_error
+            c == '.' && is_prev_underscore && return throw_error
+            is_prev_underscore = (c == '_')
+            is_prev_dot = (c == '.')
         end
         print(bf, s[end])
-        seekstart(bf)
-        n = tryparse(BigInt, String(take!(bf)))
-        n === nothing || return n
-    else
-        n = tryparse(BigInt, s)
-        n === nothing || return n
-        n = tryparse(BigFloat, s)
-        n === nothing || return n
+        s = String(take!(bf))
     end
-    message = "invalid number format $s for BigInt or BigFloat"
-    return :(throw(ArgumentError($message)))
+    n = tryparse(BigInt, s)
+    n === nothing || return n
+    n = tryparse(BigFloat, s)
+    n === nothing || return n
+    return throw_error
 end
 
 ## integer promotions ##
@@ -700,6 +781,8 @@ function typemin end
 
 The highest value representable by the given (real) numeric `DataType`.
 
+See also: [`floatmax`](@ref), [`typemin`](@ref), [`eps`](@ref).
+
 # Examples
 ```jldoctest
 julia> typemax(Int8)
@@ -707,6 +790,12 @@ julia> typemax(Int8)
 
 julia> typemax(UInt32)
 0xffffffff
+
+julia> typemax(Float64)
+Inf
+
+julia> floatmax(Float32)  # largest finite floating point number
+3.4028235f38
 ```
 """
 function typemax end
diff --git a/base/intfuncs.jl b/base/intfuncs.jl
index 198f73191ecb8d..059091b8bf8b1b 100644
--- a/base/intfuncs.jl
+++ b/base/intfuncs.jl
@@ -3,9 +3,9 @@
 ## number-theoretic functions ##
 
 """
-    gcd(x,y)
+    gcd(x, y...)
 
-Greatest common (positive) divisor (or zero if `x` and `y` are both zero).
+Greatest common (positive) divisor (or zero if all arguments are zero).
 The arguments may be integer and rational numbers.
 
 !!! compat "Julia 1.4"
@@ -13,26 +13,29 @@ The arguments may be integer and rational numbers.
 
 # Examples
 ```jldoctest
-julia> gcd(6,9)
+julia> gcd(6, 9)
 3
 
-julia> gcd(6,-9)
+julia> gcd(6, -9)
 3
 
-julia> gcd(6,0)
+julia> gcd(6, 0)
 6
 
-julia> gcd(0,0)
+julia> gcd(0, 0)
 0
 
-julia> gcd(1//3,2//3)
+julia> gcd(1//3, 2//3)
 1//3
 
-julia> gcd(1//3,-2//3)
+julia> gcd(1//3, -2//3)
 1//3
 
-julia> gcd(1//3,2)
+julia> gcd(1//3, 2)
 1//3
+
+julia> gcd(0, 0, 10, 15)
+5
 ```
 """
 function gcd(a::T, b::T) where T<:Integer
@@ -44,11 +47,21 @@ function gcd(a::T, b::T) where T<:Integer
     checked_abs(a)
 end
 
-# binary GCD (aka Stein's) algorithm
-# about 1.7x (2.1x) faster for random Int64s (Int128s)
 function gcd(a::T, b::T) where T<:BitInteger
     a == 0 && return checked_abs(b)
     b == 0 && return checked_abs(a)
+    r = _gcd(a, b)
+    signbit(r) && __throw_gcd_overflow(a, b)
+    return r
+end
+@noinline __throw_gcd_overflow(a, b) =
+    throw(OverflowError(LazyString("gcd(", a, ", ", b, ") overflows")))
+
+# binary GCD (aka Stein's) algorithm
+# about 1.7x (2.1x) faster for random Int64s (Int128s)
+# Unfortunately, we need to manually annotate this as `@assume_effects :terminates_locally` to work around #41694.
+# Since this is used in the Rational constructor, constant folding is something we do care about here.
+@assume_effects :terminates_locally function _gcd(a::T, b::T) where T<:BitInteger
     za = trailing_zeros(a)
     zb = trailing_zeros(b)
     k = min(za, zb)
@@ -62,16 +75,13 @@ function gcd(a::T, b::T) where T<:BitInteger
         v >>= trailing_zeros(v)
     end
     r = u << k
-    # T(r) would throw InexactError; we want OverflowError instead
-    r > typemax(T) && __throw_gcd_overflow(a, b)
-    r % T
+    return r % T
 end
-@noinline __throw_gcd_overflow(a, b) = throw(OverflowError("gcd($a, $b) overflows"))
 
 """
-    lcm(x,y)
+    lcm(x, y...)
 
-Least common (non-negative) multiple.
+Least common (positive) multiple (or zero if any argument is zero).
 The arguments may be integer and rational numbers.
 
 !!! compat "Julia 1.4"
@@ -79,30 +89,33 @@ The arguments may be integer and rational numbers.
 
 # Examples
 ```jldoctest
-julia> lcm(2,3)
+julia> lcm(2, 3)
 6
 
-julia> lcm(-2,3)
+julia> lcm(-2, 3)
 6
 
-julia> lcm(0,3)
+julia> lcm(0, 3)
 0
 
-julia> lcm(0,0)
+julia> lcm(0, 0)
 0
 
-julia> lcm(1//3,2//3)
+julia> lcm(1//3, 2//3)
 2//3
 
-julia> lcm(1//3,-2//3)
+julia> lcm(1//3, -2//3)
 2//3
 
-julia> lcm(1//3,2)
+julia> lcm(1//3, 2)
 2//1
+
+julia> lcm(1, 3, 5, 7)
+105
 ```
 """
 function lcm(a::T, b::T) where T<:Integer
-    # explicit a==0 test is to handle case of lcm(0,0) correctly
+    # explicit a==0 test is to handle case of lcm(0, 0) correctly
     # explicit b==0 test is to handle case of lcm(typemin(T),0) correctly
     if a == 0 || b == 0
         return zero(a)
@@ -111,8 +124,9 @@ function lcm(a::T, b::T) where T<:Integer
     end
 end
 
-gcd(a::Union{Integer,Rational}) = a
-lcm(a::Union{Integer,Rational}) = a
+gcd(a::Integer) = checked_abs(a)
+gcd(a::Rational) = checked_abs(a.num) // a.den
+lcm(a::Union{Integer,Rational}) = gcd(a)
 gcd(a::Unsigned, b::Signed) = gcd(promote(a, abs(b))...)
 gcd(a::Signed, b::Unsigned) = gcd(promote(abs(a), b)...)
 gcd(a::Real, b::Real) = gcd(promote(a,b)...)
@@ -128,7 +142,7 @@ lcm(abc::AbstractArray{<:Real}) = reduce(lcm, abc; init=one(eltype(abc)))
 function gcd(abc::AbstractArray{<:Integer})
     a = zero(eltype(abc))
     for b in abc
-        a = gcd(a,b)
+        a = gcd(a, b)
         if a == 1
             return a
         end
@@ -136,13 +150,13 @@ function gcd(abc::AbstractArray{<:Integer})
     return a
 end
 
-# return (gcd(a,b),x,y) such that ax+by == gcd(a,b)
+# return (gcd(a, b), x, y) such that ax+by == gcd(a, b)
 """
-    gcdx(x,y)
+    gcdx(a, b)
 
-Computes the greatest common (positive) divisor of `x` and `y` and their Bézout
+Computes the greatest common (positive) divisor of `a` and `b` and their Bézout
 coefficients, i.e. the integer coefficients `u` and `v` that satisfy
-``ux+vy = d = gcd(x,y)``. ``gcdx(x,y)`` returns ``(d,u,v)``.
+``ua+vb = d = gcd(a, b)``. ``gcdx(a, b)`` returns ``(d, u, v)``.
 
 The arguments may be integer and rational numbers.
 
@@ -169,8 +183,8 @@ julia> gcdx(240, 46)
     their `typemax`, and the identity then holds only via the unsigned
     integers' modulo arithmetic.
 """
-function gcdx(a::U, b::V) where {U<:Integer, V<:Integer}
-    T = promote_type(U, V)
+function gcdx(a::Integer, b::Integer)
+    T = promote_type(typeof(a), typeof(b))
     # a0, b0 = a, b
     s0, s1 = oneunit(T), zero(T)
     t0, t1 = s1, s0
@@ -191,33 +205,43 @@ gcdx(a::T, b::T) where T<:Real = throw(MethodError(gcdx, (a,b)))
 # multiplicative inverse of n mod m, error if none
 
 """
-    invmod(x,m)
+    invmod(n, m)
 
-Take the inverse of `x` modulo `m`: `y` such that ``x y = 1 \\pmod m``,
-with ``div(x,y) = 0``. This is undefined for ``m = 0``, or if
-``gcd(x,m) \\neq 1``.
+Take the inverse of `n` modulo `m`: `y` such that ``n y = 1 \\pmod m``,
+and ``div(y,m) = 0``. This will throw an error if ``m = 0``, or if
+``gcd(n,m) \\neq 1``.
 
 # Examples
 ```jldoctest
-julia> invmod(2,5)
+julia> invmod(2, 5)
 3
 
-julia> invmod(2,3)
+julia> invmod(2, 3)
 2
 
-julia> invmod(5,6)
+julia> invmod(5, 6)
 5
 ```
 """
 function invmod(n::Integer, m::Integer)
+    iszero(m) && throw(DomainError(m, "`m` must not be 0."))
+    if n isa Signed && hastypemax(typeof(n))
+        # work around inconsistencies in gcdx
+        # https://github.com/JuliaLang/julia/issues/33781
+        T = promote_type(typeof(n), typeof(m))
+        n == typemin(typeof(n)) && m == typeof(n)(-1) && return T(0)
+        n == typeof(n)(-1) && m == typemin(typeof(n)) && return T(-1)
+    end
     g, x, y = gcdx(n, m)
     g != 1 && throw(DomainError((n, m), "Greatest common divisor is $g."))
-    m == 0 && throw(DomainError(m, "`m` must not be 0."))
     # Note that m might be negative here.
-    # For unsigned T, x might be close to typemax; add m to force a wrap-around.
-    r = mod(x + m, m)
-    # The postcondition is: mod(r * n, m) == mod(T(1), m) && div(r, m) == 0
-    r
+    if n isa Unsigned && hastypemax(typeof(n)) && x > typemax(n)>>1
+        # x might have wrapped if it would have been negative
+        # adding back m forces a correction
+        x += m
+    end
+    # The postcondition is: mod(result * n, m) == mod(T(1), m) && div(result, m) == 0
+    return mod(x, m)
 end
 
 # ^ for any x supporting *
@@ -287,14 +311,15 @@ end
 const HWReal = Union{Int8,Int16,Int32,Int64,UInt8,UInt16,UInt32,UInt64,Float32,Float64}
 const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
-# Core.Compiler has complicated logic to inline x^2 and x^3 for
-# numeric types.  In terms of Val we can do it much more simply.
+# Inline x^2 and x^3 for Val
 # (The first argument prevents unexpected behavior if a function ^
 # is defined that is not equal to Base.^)
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{0}) = one(x)
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{1}) = x
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{2}) = x*x
 @inline literal_pow(::typeof(^), x::HWNumber, ::Val{3}) = x*x*x
+@inline literal_pow(::typeof(^), x::HWNumber, ::Val{-1}) = inv(x)
+@inline literal_pow(::typeof(^), x::HWNumber, ::Val{-2}) = (i=inv(x); i*i)
 
 # don't use the inv(x) transformation here since float^p is slightly more accurate
 @inline literal_pow(::typeof(^), x::AbstractFloat, ::Val{p}) where {p} = x^p
@@ -302,11 +327,15 @@ const HWNumber = Union{HWReal, Complex{<:HWReal}, Rational{<:HWReal}}
 
 # for other types, define x^-n as inv(x)^n so that negative literal powers can
 # be computed in a type-stable way even for e.g. integers.
-@inline @generated function literal_pow(f::typeof(^), x, ::Val{p}) where {p}
+@inline function literal_pow(f::typeof(^), x, ::Val{p}) where {p}
     if p < 0
-        :(literal_pow(^, inv(x), $(Val{-p}())))
+        if x isa BitInteger64
+            f(Float64(x), p) # inv would cause rounding, while Float64^Integer is able to compensate the inverse
+        else
+            f(inv(x), -p)
+        end
     else
-        :(f(x,$p))
+        f(x, p)
     end
 end
 
@@ -368,9 +397,11 @@ _prevpow2(x::Unsigned) = one(x) << unsigned((sizeof(x)<<3)-leading_zeros(x)-1)
 _prevpow2(x::Integer) = reinterpret(typeof(x),x < 0 ? -_prevpow2(unsigned(-x)) : _prevpow2(unsigned(x)))
 
 """
-    ispow2(n::Integer) -> Bool
+    ispow2(n::Number) -> Bool
 
-Test whether `n` is a power of two.
+Test whether `n` is an integer power of two.
+
+See also [`count_ones`](@ref), [`prevpow`](@ref), [`nextpow`](@ref).
 
 # Examples
 ```jldoctest
@@ -379,8 +410,22 @@ true
 
 julia> ispow2(5)
 false
+
+julia> ispow2(4.5)
+false
+
+julia> ispow2(0.25)
+true
+
+julia> ispow2(1//8)
+true
 ```
+
+!!! compat "Julia 1.6"
+    Support for non-`Integer` arguments was added in Julia 1.6.
 """
+ispow2(x::Number) = isreal(x) && ispow2(real(x))
+
 ispow2(x::Integer) = x > 0 && count_ones(x) == 1
 
 """
@@ -389,6 +434,8 @@ ispow2(x::Integer) = x > 0 && count_ones(x) == 1
 The smallest `a^n` not less than `x`, where `n` is a non-negative integer. `a` must be
 greater than 1, and `x` must be greater than 0.
 
+See also [`prevpow`](@ref).
+
 # Examples
 ```jldoctest
 julia> nextpow(2, 7)
@@ -403,8 +450,6 @@ julia> nextpow(5, 20)
 julia> nextpow(4, 16)
 16
 ```
-
-See also [`prevpow`](@ref).
 """
 function nextpow(a::Real, x::Real)
     x <= 0 && throw(DomainError(x, "`x` must be positive."))
@@ -415,9 +460,16 @@ function nextpow(a::Real, x::Real)
     a <= 1 && throw(DomainError(a, "`a` must be greater than 1."))
     x <= 1 && return one(a)
     n = ceil(Integer,log(a, x))
+    # round-off error of log can go either direction, so need some checks
     p = a^(n-1)
-    # guard against roundoff error, e.g., with a=5 and x=125
-    p >= x ? p : a^n
+    x > typemax(p) && throw(DomainError(x,"argument is beyond the range of type of the base"))
+    p >= x && return p
+    wp = a^n
+    wp > p || throw(OverflowError("result is beyond the range of type of the base"))
+    wp >= x && return wp
+    wwp = a^(n+1)
+    wwp > wp || throw(OverflowError("result is beyond the range of type of the base"))
+    return wwp
 end
 
 """
@@ -426,6 +478,8 @@ end
 The largest `a^n` not greater than `x`, where `n` is a non-negative integer.
 `a` must be greater than 1, and `x` must not be less than 1.
 
+See also [`nextpow`](@ref), [`isqrt`](@ref).
+
 # Examples
 ```jldoctest
 julia> prevpow(2, 7)
@@ -440,16 +494,25 @@ julia> prevpow(5, 20)
 julia> prevpow(4, 16)
 16
 ```
-See also [`nextpow`](@ref).
 """
-function prevpow(a::Real, x::Real)
+function prevpow(a::T, x::Real) where T <: Real
     x < 1 && throw(DomainError(x, "`x` must be ≥ 1."))
     # See comment in nextpos() for a == special case.
     a == 2 && isa(x, Integer) && return _prevpow2(x)
     a <= 1 && throw(DomainError(a, "`a` must be greater than 1."))
     n = floor(Integer,log(a, x))
-    p = a^(n+1)
-    p <= x ? p : a^n
+    # round-off error of log can go either direction, so need some checks
+    p = a^n
+    x > typemax(p) && throw(DomainError(x,"argument is beyond the range of type of the base"))
+    if a isa Integer
+        wp, overflow = mul_with_overflow(a, p)
+        wp <= x && !overflow && return wp
+    else
+        wp = a^(n+1)
+        wp <= x && return wp
+    end
+    p <= x && return p
+    return a^(n-1)
 end
 
 ## ndigits (number of digits) in base 10 ##
@@ -582,6 +645,8 @@ Compute the number of digits in integer `n` written in base `base`
 (`base` must not be in `[-1, 0, 1]`), optionally padded with zeros
 to a specified size (the result will never be less than `pad`).
 
+See also [`digits`](@ref), [`count_ones`](@ref).
+
 # Examples
 ```jldoctest
 julia> ndigits(12345)
@@ -595,81 +660,121 @@ julia> string(1022, base=16)
 
 julia> ndigits(123, pad=5)
 5
+
+julia> ndigits(-123)
+3
 ```
 """
 ndigits(x::Integer; base::Integer=10, pad::Integer=1) = max(pad, ndigits0z(x, base))
 
 ## integer to string functions ##
 
-function bin(x::Unsigned, pad::Integer, neg::Bool)
-    i = neg + max(pad,sizeof(x)<<3-leading_zeros(x))
-    a = StringVector(i)
+function bin(x::Unsigned, pad::Int, neg::Bool)
+    m = 8 * sizeof(x) - leading_zeros(x)
+    n = neg + max(pad, m)
+    a = StringVector(n)
+    # for i in 0x0:UInt(n-1) # automatic vectorization produces redundant codes
+    #     @inbounds a[n - i] = 0x30 + (((x >> i) % UInt8)::UInt8 & 0x1)
+    # end
+    i = n
+    @inbounds while i >= 4
+        b = UInt32((x % UInt8)::UInt8)
+        d = 0x30303030 + ((b * 0x08040201) >> 0x3) & 0x01010101
+        a[i-3] = (d >> 0x00) % UInt8
+        a[i-2] = (d >> 0x08) % UInt8
+        a[i-1] = (d >> 0x10) % UInt8
+        a[i]   = (d >> 0x18) % UInt8
+        x >>= 0x4
+        i -= 4
+    end
     while i > neg
-        @inbounds a[i] = 48+(x&0x1)
-        x >>= 1
+        @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x1)
+        x >>= 0x1
         i -= 1
     end
     if neg; @inbounds a[1]=0x2d; end
     String(a)
 end
 
-function oct(x::Unsigned, pad::Integer, neg::Bool)
-    i = neg + max(pad,div((sizeof(x)<<3)-leading_zeros(x)+2,3))
-    a = StringVector(i)
+function oct(x::Unsigned, pad::Int, neg::Bool)
+    m = div(8 * sizeof(x) - leading_zeros(x) + 2, 3)
+    n = neg + max(pad, m)
+    a = StringVector(n)
+    i = n
     while i > neg
-        @inbounds a[i] = 48+(x&0x7)
-        x >>= 3
+        @inbounds a[i] = 0x30 + ((x % UInt8)::UInt8 & 0x7)
+        x >>= 0x3
         i -= 1
     end
     if neg; @inbounds a[1]=0x2d; end
     String(a)
 end
 
-function dec(x::Unsigned, pad::Integer, neg::Bool)
-    i = neg + ndigits(x, base=10, pad=pad)
-    a = StringVector(i)
-    while i > neg
-        @inbounds a[i] = 48+rem(x,10)
-        x = oftype(x,div(x,10))
-        i -= 1
+# 2-digit decimal characters ("00":"99")
+const _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99]
+
+function dec(x::Unsigned, pad::Int, neg::Bool)
+    n = neg + ndigits(x, pad=pad)
+    a = StringVector(n)
+    i = n
+    @inbounds while i >= 2
+        d, r = divrem(x, 0x64)
+        d100 = _dec_d100[(r % Int)::Int + 1]
+        a[i-1] = d100 % UInt8
+        a[i] = (d100 >> 0x8) % UInt8
+        x = oftype(x, d)
+        i -= 2
+    end
+    if i > neg
+        @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8
     end
     if neg; @inbounds a[1]=0x2d; end
     String(a)
 end
 
-function hex(x::Unsigned, pad::Integer, neg::Bool)
-    i = neg + max(pad,(sizeof(x)<<1)-(leading_zeros(x)>>2))
-    a = StringVector(i)
-    while i > neg
-        d = x & 0xf
-        @inbounds a[i] = 48+d+39*(d>9)
-        x >>= 4
-        i -= 1
+function hex(x::Unsigned, pad::Int, neg::Bool)
+    m = 2 * sizeof(x) - (leading_zeros(x) >> 2)
+    n = neg + max(pad, m)
+    a = StringVector(n)
+    i = n
+    while i >= 2
+        b = (x % UInt8)::UInt8
+        d1, d2 = b >> 0x4, b & 0xf
+        @inbounds a[i-1] = d1 + ifelse(d1 > 0x9, 0x57, 0x30)
+        @inbounds a[i]   = d2 + ifelse(d2 > 0x9, 0x57, 0x30)
+        x >>= 0x8
+        i -= 2
+    end
+    if i > neg
+        d = (x % UInt8)::UInt8 & 0xf
+        @inbounds a[i] = d + ifelse(d > 0x9, 0x57, 0x30)
     end
     if neg; @inbounds a[1]=0x2d; end
     String(a)
 end
 
-const base36digits = ['0':'9';'a':'z']
-const base62digits = ['0':'9';'A':'Z';'a':'z']
+const base36digits = UInt8['0':'9';'a':'z']
+const base62digits = UInt8['0':'9';'A':'Z';'a':'z']
 
-function _base(b::Integer, x::Integer, pad::Integer, neg::Bool)
-    (x >= 0) | (b < 0) || throw(DomainError(x, "For negative `x`, `b` must be negative."))
-    2 <= abs(b) <= 62 || throw(DomainError(b, "base must satisfy 2 ≤ abs(base) ≤ 62"))
+function _base(base::Integer, x::Integer, pad::Int, neg::Bool)
+    (x >= 0) | (base < 0) || throw(DomainError(x, "For negative `x`, `base` must be negative."))
+    2 <= abs(base) <= 62 || throw(DomainError(base, "base must satisfy 2 ≤ abs(base) ≤ 62"))
+    b = (base % Int)::Int
     digits = abs(b) <= 36 ? base36digits : base62digits
-    i = neg + ndigits(x, base=b, pad=pad)
-    a = StringVector(i)
+    n = neg + ndigits(x, base=b, pad=pad)
+    a = StringVector(n)
+    i = n
     @inbounds while i > neg
         if b > 0
-            a[i] = digits[1+rem(x,b)]
+            a[i] = digits[1 + (rem(x, b) % Int)::Int]
             x = div(x,b)
         else
-            a[i] = digits[1+mod(x,-b)]
+            a[i] = digits[1 + (mod(x, -b) % Int)::Int]
             x = cld(x,b)
         end
         i -= 1
     end
-    if neg; a[1]='-'; end
+    if neg; @inbounds a[1]=0x2d; end
     String(a)
 end
 
@@ -682,15 +787,19 @@ split_sign(n::Unsigned) = n, false
 Convert an integer `n` to a string in the given `base`,
 optionally specifying a number of digits to pad to.
 
+See also [`digits`](@ref), [`bitstring`](@ref), [`count_zeros`](@ref).
+
+# Examples
 ```jldoctest
 julia> string(5, base = 13, pad = 4)
 "0005"
 
-julia> string(13, base = 5, pad = 4)
-"0023"
+julia> string(-13, base = 5, pad = 4)
+"-0023"
 ```
 """
 function string(n::Integer; base::Integer = 10, pad::Integer = 1)
+    pad = (min(max(pad, typemin(Int)), typemax(Int)) % Int)::Int
     if base == 2
         (n_positive, neg) = split_sign(n)
         bin(n_positive, pad, neg)
@@ -713,24 +822,36 @@ string(b::Bool) = b ? "true" : "false"
 """
     bitstring(n)
 
-A string giving the literal bit representation of a number.
+A string giving the literal bit representation of a primitive type.
+
+See also [`count_ones`](@ref), [`count_zeros`](@ref), [`digits`](@ref).
 
 # Examples
 ```jldoctest
-julia> bitstring(4)
-"0000000000000000000000000000000000000000000000000000000000000100"
+julia> bitstring(Int32(4))
+"00000000000000000000000000000100"
 
 julia> bitstring(2.2)
 "0100000000000001100110011001100110011001100110011001100110011010"
 ```
 """
-function bitstring end
-
-bitstring(x::Union{Bool,Int8,UInt8})           = string(reinterpret(UInt8,x), pad = 8, base = 2)
-bitstring(x::Union{Int16,UInt16,Float16})      = string(reinterpret(UInt16,x), pad = 16, base = 2)
-bitstring(x::Union{Char,Int32,UInt32,Float32}) = string(reinterpret(UInt32,x), pad = 32, base = 2)
-bitstring(x::Union{Int64,UInt64,Float64})      = string(reinterpret(UInt64,x), pad = 64, base = 2)
-bitstring(x::Union{Int128,UInt128})            = string(reinterpret(UInt128,x), pad = 128, base = 2)
+function bitstring(x::T) where {T}
+    isprimitivetype(T) || throw(ArgumentError("$T not a primitive type"))
+    sz = sizeof(T) * 8
+    str = StringVector(sz)
+    i = sz
+    @inbounds while i >= 4
+        b = UInt32(sizeof(T) == 1 ? bitcast(UInt8, x) : trunc_int(UInt8, x))
+        d = 0x30303030 + ((b * 0x08040201) >> 0x3) & 0x01010101
+        str[i-3] = (d >> 0x00) % UInt8
+        str[i-2] = (d >> 0x08) % UInt8
+        str[i-1] = (d >> 0x10) % UInt8
+        str[i]   = (d >> 0x18) % UInt8
+        x = lshr_int(x, 4)
+        i -= 4
+    end
+    return String(str)
+end
 
 """
     digits([T<:Integer], n::Integer; base::T = 10, pad::Integer = 1)
@@ -739,9 +860,12 @@ Return an array with element type `T` (default `Int`) of the digits of `n` in th
 base, optionally padded with zeros to a specified size. More significant digits are at
 higher indices, such that `n == sum(digits[k]*base^(k-1) for k=1:length(digits))`.
 
+See also [`ndigits`](@ref), [`digits!`](@ref),
+and for base 2 also [`bitstring`](@ref), [`count_ones`](@ref).
+
 # Examples
 ```jldoctest
-julia> digits(10, base = 10)
+julia> digits(10)
 2-element Vector{Int64}:
  0
  1
@@ -753,14 +877,18 @@ julia> digits(10, base = 2)
  0
  1
 
-julia> digits(10, base = 2, pad = 6)
-6-element Vector{Int64}:
- 0
- 1
- 0
- 1
- 0
- 0
+julia> digits(-256, base = 10, pad = 5)
+5-element Vector{Int64}:
+ -6
+ -5
+ -2
+  0
+  0
+
+julia> n = rand(-999:999);
+
+julia> n == evalpoly(13, digits(n, base = 13))
+true
 ```
 """
 digits(n::Integer; base::Integer = 10, pad::Integer = 1) =
@@ -773,10 +901,11 @@ end
 """
     hastypemax(T::Type) -> Bool
 
-Return `true` if and only if `typemax(T)` is defined.
+Return true if and only if the extrema `typemax(T)` and `typemin(T)` are defined.
 """
 hastypemax(::Base.BitIntegerType) = true
-hastypemax(::Type{T}) where {T} = applicable(typemax, T)
+hastypemax(::Type{Bool}) = true
+hastypemax(::Type{T}) where {T} = applicable(typemax, T) && applicable(typemin, T)
 
 """
     digits!(array, n::Integer; base::Integer = 10)
@@ -787,14 +916,14 @@ the array length. If the array length is excessive, the excess portion is filled
 
 # Examples
 ```jldoctest
-julia> digits!([2,2,2,2], 10, base = 2)
+julia> digits!([2, 2, 2, 2], 10, base = 2)
 4-element Vector{Int64}:
  0
  1
  0
  1
 
-julia> digits!([2,2,2,2,2,2], 10, base = 2)
+julia> digits!([2, 2, 2, 2, 2, 2], 10, base = 2)
 6-element Vector{Int64}:
  0
  1
@@ -866,6 +995,8 @@ Factorial of `n`. If `n` is an [`Integer`](@ref), the factorial is computed as a
 integer (promoted to at least 64 bits). Note that this may overflow if `n` is not small,
 but you can use `factorial(big(n))` to compute the result exactly in arbitrary precision.
 
+See also [`binomial`](@ref).
+
 # Examples
 ```jldoctest
 julia> factorial(6)
@@ -880,9 +1011,6 @@ julia> factorial(big(21))
 51090942171709440000
 ```
 
-# See also
-* [`binomial`](@ref)
-
 # External links
 * [Factorial](https://en.wikipedia.org/wiki/Factorial) on Wikipedia.
 """
@@ -912,6 +1040,8 @@ If ``n`` is negative, then it is defined in terms of the identity
 \\binom{n}{k} = (-1)^k \\binom{k-n-1}{k}
 ```
 
+See also [`factorial`](@ref).
+
 # Examples
 ```jldoctest
 julia> binomial(5, 3)
@@ -924,9 +1054,6 @@ julia> binomial(-5, 3)
 -35
 ```
 
-# See also
-* [`factorial`](@ref)
-
 # External links
 * [Binomial coefficient](https://en.wikipedia.org/wiki/Binomial_coefficient) on Wikipedia.
 """
@@ -935,7 +1062,7 @@ function binomial(n::T, k::T) where T<:Integer
     k < 0 && return zero(T)
     sgn = one(T)
     if n < 0
-        n = -n + k -1
+        n = -n + k - one(T)
         if isodd(k)
             sgn = -sgn
         end
@@ -946,15 +1073,15 @@ function binomial(n::T, k::T) where T<:Integer
     if k > (n>>1)
         k = (n - k)
     end
-    x::T = nn = n - k + 1
-    nn += 1
-    rr = 2
+    x = nn = n - k + one(T)
+    nn += one(T)
+    rr = T(2)
     while rr <= k
         xt = div(widemul(x, nn), rr)
         x = xt % T
         x == xt || throw(OverflowError("binomial($n0, $k0) overflows"))
-        rr += 1
-        nn += 1
+        rr += one(T)
+        nn += one(T)
     end
-    convert(T, copysign(x, sgn))
+    copysign(x, sgn)
 end
diff --git a/base/io.jl b/base/io.jl
index 4c0cf6d7cf19e7..3fac2287bdabf4 100644
--- a/base/io.jl
+++ b/base/io.jl
@@ -15,12 +15,12 @@ struct EOFError <: Exception end
 A system call failed with an error code (in the `errno` global variable).
 """
 struct SystemError <: Exception
-    prefix::AbstractString
+    prefix::String
     errnum::Int32
     extrainfo
     SystemError(p::AbstractString, e::Integer, extrainfo) = new(p, e, extrainfo)
     SystemError(p::AbstractString, e::Integer) = new(p, e, nothing)
-    SystemError(p::AbstractString) = new(p, Libc.errno())
+    SystemError(p::AbstractString) = new(p, Libc.errno(), nothing)
 end
 
 lock(::IO) = nothing
@@ -60,9 +60,50 @@ function isopen end
 Close an I/O stream. Performs a [`flush`](@ref) first.
 """
 function close end
+
+"""
+    closewrite(stream)
+
+Shutdown the write half of a full-duplex I/O stream. Performs a [`flush`](@ref)
+first. Notify the other end that no more data will be written to the underlying
+file. This is not supported by all IO types.
+
+# Examples
+```jldoctest
+julia> io = Base.BufferStream(); # this never blocks, so we can read and write on the same Task
+
+julia> write(io, "request");
+
+julia> # calling `read(io)` here would block forever
+
+julia> closewrite(io);
+
+julia> read(io, String)
+"request"
+```
+"""
+function closewrite end
+
+"""
+    flush(stream)
+
+Commit all currently buffered writes to the given stream.
+"""
 function flush end
-function wait_readnb end
-function wait_close end
+
+"""
+    bytesavailable(io)
+
+Return the number of bytes available for reading before a read from this stream or buffer will block.
+
+# Examples
+```jldoctest
+julia> io = IOBuffer("JuliaLang is a GitHub organization");
+
+julia> bytesavailable(io)
+34
+```
+"""
 function bytesavailable end
 
 """
@@ -73,15 +114,15 @@ data has already been buffered. The result is a `Vector{UInt8}`.
 
 !!! warning
     The amount of data returned is implementation-dependent; for example it can
-depend on the internal choice of buffer size. Other functions such as [`read`](@ref)
-should generally be used instead.
+    depend on the internal choice of buffer size. Other functions such as [`read`](@ref)
+    should generally be used instead.
 """
 function readavailable end
 
 """
     isreadable(io) -> Bool
 
-Return `true` if the specified IO object is readable (if that can be determined).
+Return `false` if the specified IO object is not readable.
 
 # Examples
 ```jldoctest
@@ -99,12 +140,12 @@ true
 julia> rm("myfile.txt")
 ```
 """
-function isreadable end
+isreadable(io::IO) = isopen(io)
 
 """
     iswritable(io) -> Bool
 
-Return `true` if the specified IO object is writable (if that can be determined).
+Return `false` if the specified IO object is not writable.
 
 # Examples
 ```jldoctest
@@ -122,10 +163,23 @@ false
 julia> rm("myfile.txt")
 ```
 """
-function iswritable end
-function copy end
+iswritable(io::IO) = isopen(io)
+
+"""
+    eof(stream) -> Bool
+
+Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
+function will block to wait for more data if necessary, and then return `false`. Therefore
+it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
+`false` as long as buffered data is still available, even if the remote end of a connection
+is closed.
+"""
 function eof end
 
+function copy end
+function wait_readnb end
+function wait_close end
+
 """
     read(io::IO, T)
 
@@ -136,7 +190,7 @@ Note that Julia does not convert the endianness for you. Use [`ntoh`](@ref) or
 
     read(io::IO, String)
 
-Read the entirety of `io`, as a `String`.
+Read the entirety of `io`, as a `String` (see also [`readchomp`](@ref)).
 
 # Examples
 ```jldoctest
@@ -307,7 +361,7 @@ function open_flags(;
 end
 
 """
-    open(f::Function, args...; kwargs....)
+    open(f::Function, args...; kwargs...)
 
 Apply the function `f` to the result of `open(args...; kwargs...)` and close the resulting file
 descriptor upon completion.
@@ -357,67 +411,41 @@ end
 function pipe_reader end
 function pipe_writer end
 
+for f in (:flush, :closewrite, :iswritable)
+    @eval $(f)(io::AbstractPipe) = $(f)(pipe_writer(io)::IO)
+end
 write(io::AbstractPipe, byte::UInt8) = write(pipe_writer(io)::IO, byte)
+write(to::IO, from::AbstractPipe) = write(to, pipe_reader(from))
 unsafe_write(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_write(pipe_writer(io)::IO, p, nb)::Union{Int,UInt}
 buffer_writes(io::AbstractPipe, args...) = buffer_writes(pipe_writer(io)::IO, args...)
-flush(io::AbstractPipe) = flush(pipe_writer(io)::IO)
 
+for f in (
+        # peek/mark interface
+        :mark, :unmark, :reset, :ismarked,
+        # Simple reader functions
+        :read, :readavailable, :bytesavailable, :reseteof, :isreadable)
+    @eval $(f)(io::AbstractPipe) = $(f)(pipe_reader(io)::IO)
+end
 read(io::AbstractPipe, byte::Type{UInt8}) = read(pipe_reader(io)::IO, byte)::UInt8
 unsafe_read(io::AbstractPipe, p::Ptr{UInt8}, nb::UInt) = unsafe_read(pipe_reader(io)::IO, p, nb)
-read(io::AbstractPipe) = read(pipe_reader(io)::IO)
 readuntil(io::AbstractPipe, arg::UInt8; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractChar; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractString; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil(io::AbstractPipe, arg::AbstractVector; kw...) = readuntil(pipe_reader(io)::IO, arg; kw...)
 readuntil_vector!(io::AbstractPipe, target::AbstractVector, keep::Bool, out) = readuntil_vector!(pipe_reader(io)::IO, target, keep, out)
 readbytes!(io::AbstractPipe, target::AbstractVector{UInt8}, n=length(target)) = readbytes!(pipe_reader(io)::IO, target, n)
-
-for f in (
-        # peek/mark interface
-        :mark, :unmark, :reset, :ismarked,
-        # Simple reader functions
-        :readavailable, :isreadable)
-    @eval $(f)(io::AbstractPipe) = $(f)(pipe_reader(io)::IO)
-end
 peek(io::AbstractPipe, ::Type{T}) where {T} = peek(pipe_reader(io)::IO, T)::T
+wait_readnb(io::AbstractPipe, nb::Int) = wait_readnb(pipe_reader(io)::IO, nb)
+eof(io::AbstractPipe) = eof(pipe_reader(io)::IO)::Bool
 
-iswritable(io::AbstractPipe) = iswritable(pipe_writer(io)::IO)
 isopen(io::AbstractPipe) = isopen(pipe_writer(io)::IO) || isopen(pipe_reader(io)::IO)
 close(io::AbstractPipe) = (close(pipe_writer(io)::IO); close(pipe_reader(io)::IO))
-wait_readnb(io::AbstractPipe, nb::Int) = wait_readnb(pipe_reader(io)::IO, nb)
 wait_close(io::AbstractPipe) = (wait_close(pipe_writer(io)::IO); wait_close(pipe_reader(io)::IO))
 
-"""
-    bytesavailable(io)
-
-Return the number of bytes available for reading before a read from this stream or buffer will block.
-
-# Examples
-```jldoctest
-julia> io = IOBuffer("JuliaLang is a GitHub organization");
-
-julia> bytesavailable(io)
-34
-```
-"""
-bytesavailable(io::AbstractPipe) = bytesavailable(pipe_reader(io)::IO)
-
-"""
-    eof(stream) -> Bool
-
-Test whether an I/O stream is at end-of-file. If the stream is not yet exhausted, this
-function will block to wait for more data if necessary, and then return `false`. Therefore
-it is always safe to read one byte after seeing `eof` return `false`. `eof` will return
-`false` as long as buffered data is still available, even if the remote end of a connection
-is closed.
-"""
-eof(io::AbstractPipe) = eof(pipe_reader(io)::IO)::Bool
-reseteof(io::AbstractPipe) = reseteof(pipe_reader(io)::IO)
-
 
 # Exception-safe wrappers (io = open(); try f(io) finally close(io))
 
-write(filename::AbstractString, a1, args...) = open(io->write(io, a1, args...), filename, "w")
+write(filename::AbstractString, a1, args...) = open(io->write(io, a1, args...), convert(String, filename)::String, "w")
 
 """
     read(filename::AbstractString, args...)
@@ -429,9 +457,9 @@ Open a file and read its contents. `args` is passed to `read`: this is equivalen
 
 Read the entire contents of a file as a string.
 """
-read(filename::AbstractString, args...) = open(io->read(io, args...), filename)
+read(filename::AbstractString, args...) = open(io->read(io, args...), convert(String, filename)::String)
 
-read(filename::AbstractString, ::Type{T}) where {T} = open(io->read(io, T), filename)
+read(filename::AbstractString, ::Type{T}) where {T} = open(io->read(io, T), convert(String, filename)::String)
 
 """
     read!(stream::IO, array::AbstractArray)
@@ -441,7 +469,7 @@ Read binary data from an I/O stream or file, filling in `array`.
 """
 function read! end
 
-read!(filename::AbstractString, a) = open(io->read!(io, a), filename)
+read!(filename::AbstractString, a) = open(io->read!(io, a), convert(String, filename)::String)
 
 """
     readuntil(stream::IO, delim; keep::Bool = false)
@@ -468,7 +496,7 @@ julia> readuntil("my_file.txt", '.', keep = true)
 julia> rm("my_file.txt")
 ```
 """
-readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), filename)
+readuntil(filename::AbstractString, args...; kw...) = open(io->readuntil(io, args...; kw...), convert(String, filename)::String)
 
 """
     readline(io::IO=stdin; keep::Bool=false)
@@ -496,6 +524,14 @@ julia> readline("my_file.txt", keep=true)
 
 julia> rm("my_file.txt")
 ```
+```julia-repl
+julia> print("Enter your name: ")
+Enter your name:
+
+julia> your_name = readline()
+Logan
+"Logan"
+```
 """
 function readline(filename::AbstractString; keep::Bool=false)
     open(filename) do f
@@ -521,7 +557,8 @@ end
 
 Read all lines of an I/O stream or a file as a vector of strings. Behavior is
 equivalent to saving the result of reading [`readline`](@ref) repeatedly with the same
-arguments and saving the resulting lines as a vector of strings.
+arguments and saving the resulting lines as a vector of strings.  See also
+[`eachline`](@ref) to iterate over the lines without reading them all at once.
 
 # Examples
 ```jldoctest
@@ -883,8 +920,9 @@ end
 
 function readuntil(io::IO, target::AbstractString; keep::Bool=false)
     # small-string target optimizations
-    isempty(target) && return ""
-    c, rest = Iterators.peel(target)
+    x = Iterators.peel(target)
+    isnothing(x) && return ""
+    c, rest = x
     if isempty(rest) && c <= '\x7f'
         return readuntil_string(io, c % UInt8, keep)
     end
@@ -987,6 +1025,13 @@ retained. When called with a file name, the file is opened once at the beginning
 iteration and closed at the end. If iteration is interrupted, the file will be
 closed when the `EachLine` object is garbage collected.
 
+To iterate over each line of a `String`, `eachline(IOBuffer(str))` can be used.
+
+[`Iterators.reverse`](@ref) can be used on an `EachLine` object to read the lines
+in reverse order (for files, buffers, and other I/O streams supporting [`seek`](@ref)),
+and [`first`](@ref) or [`last`](@ref) can be used to extract the initial or final
+lines, respectively.
+
 # Examples
 ```jldoctest
 julia> open("my_file.txt", "w") do io
@@ -1000,6 +1045,9 @@ JuliaLang is a GitHub organization. It has many members.
 
 julia> rm("my_file.txt");
 ```
+
+!!! compat "Julia 1.8"
+       Julia 1.8 is required to use `Iterators.reverse` or `last` with `eachline` iterators.
 """
 function eachline(stream::IO=stdin; keep::Bool=false)
     EachLine(stream, keep=keep)::EachLine
@@ -1019,6 +1067,117 @@ eltype(::Type{<:EachLine}) = String
 
 IteratorSize(::Type{<:EachLine}) = SizeUnknown()
 
+isdone(itr::EachLine, state...) = eof(itr.stream)
+
+# Reverse-order iteration for the EachLine iterator for seekable streams,
+# which works by reading the stream from the end in 4kiB chunks.
+function iterate(r::Iterators.Reverse{<:EachLine})
+    p0 = position(r.itr.stream)
+    seekend(r.itr.stream) # may throw if io is non-seekable
+    p = position(r.itr.stream)
+    # chunks = circular buffer of 4kiB blocks read from end of stream
+    chunks = empty!(Vector{Vector{UInt8}}(undef, 2)) # allocate space for 2 buffers (common case)
+    inewline = jnewline = 0
+    while p > p0 && inewline == 0 # read chunks until we find a newline or we read whole file
+        chunk = Vector{UInt8}(undef, min(4096, p-p0))
+        p -= length(chunk)
+        readbytes!(seek(r.itr.stream, p), chunk)
+        pushfirst!(chunks, chunk)
+        inewline = something(findlast(==(UInt8('\n')), chunk), 0)
+        if length(chunks) == 1 && inewline == length(chunks[1])
+            # found newline at end of file … keep looking
+            jnewline = inewline
+            inewline = something(findprev(==(UInt8('\n')), chunk, inewline-1), 0)
+        end
+    end
+    return iterate(r, (; p0, p, chunks, ichunk=1, inewline, jchunk=length(chunks), jnewline = jnewline == 0 && !isempty(chunks) ? length(chunks[end]) : jnewline))
+end
+function iterate(r::Iterators.Reverse{<:EachLine}, state)
+    function _stripnewline(keep, pos, data)
+        # strip \n or \r\n from data[pos] by decrementing pos
+        if !keep && pos > 0 && data[pos] == UInt8('\n')
+            pos -= 1
+            pos -= pos > 0 && data[pos] == UInt8('\r')
+        end
+        return pos
+    end
+    # state tuple: p0 = initial file position, p = current position,
+    #              chunks = circular array of chunk buffers,
+    #              current line is from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
+    p0, p, chunks, ichunk, inewline, jchunk, jnewline = state
+    if inewline == 0 # no newline found, remaining line = rest of chunks (if any)
+        isempty(chunks) && return (r.itr.ondone(); nothing)
+        buf = IOBuffer(sizehint = ichunk==jchunk ? jnewline : 4096)
+        while ichunk != jchunk
+            write(buf, chunks[ichunk])
+            ichunk = ichunk == length(chunks) ? 1 : ichunk + 1
+        end
+        chunk = chunks[jchunk]
+        write(buf, view(chunk, 1:jnewline))
+        buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
+        empty!(chunks) # will cause next iteration to terminate
+        seekend(r.itr.stream) # reposition to end of stream for isdone
+        s = String(take!(buf))
+    else
+        # extract the string from chunks[ichunk][inewline+1] to chunks[jchunk][jnewline]
+        if ichunk == jchunk # common case: current and previous newline in same chunk
+            chunk = chunks[ichunk]
+            s = String(view(chunk, inewline+1:_stripnewline(r.itr.keep, jnewline, chunk)))
+        else
+            buf = IOBuffer(sizehint=max(128, length(chunks[ichunk])-inewline+jnewline))
+            write(buf, view(chunks[ichunk], inewline+1:length(chunks[ichunk])))
+            i = ichunk
+            while true
+                i = i == length(chunks) ? 1 : i + 1
+                i == jchunk && break
+                write(buf, chunks[i])
+            end
+            write(buf, view(chunks[jchunk], 1:jnewline))
+            buf.size = _stripnewline(r.itr.keep, buf.size, buf.data)
+            s = String(take!(buf))
+
+            # overwrite obsolete chunks (ichunk+1:jchunk)
+            i = jchunk
+            while i != ichunk
+                chunk = chunks[i]
+                p -= length(resize!(chunk, min(4096, p-p0)))
+                readbytes!(seek(r.itr.stream, p), chunk)
+                i = i == 1 ? length(chunks) : i - 1
+            end
+        end
+
+        # find the newline previous to inewline
+        jchunk = ichunk
+        jnewline = inewline
+        while true
+            inewline = something(findprev(==(UInt8('\n')), chunks[ichunk], inewline-1), 0)
+            inewline > 0 && break
+            ichunk = ichunk == 1 ? length(chunks) : ichunk - 1
+            ichunk == jchunk && break # found nothing — may need to read more chunks
+            inewline = length(chunks[ichunk])+1 # start for next findprev
+        end
+
+        # read more chunks to look for a newline (should rarely happen)
+        if inewline == 0 && p > p0
+            ichunk = jchunk + 1
+            while true
+                chunk = Vector{UInt8}(undef, min(4096, p-p0))
+                p -= length(chunk)
+                readbytes!(seek(r.itr.stream, p), chunk)
+                insert!(chunks, ichunk, chunk)
+                inewline = something(findlast(==(UInt8('\n')), chunk), 0)
+                (p == p0 || inewline > 0) && break
+            end
+        end
+    end
+    return (s, (; p0, p, chunks, ichunk, inewline, jchunk, jnewline))
+end
+isdone(r::Iterators.Reverse{<:EachLine}, state) = isempty(state.chunks)
+isdone(r::Iterators.Reverse{<:EachLine}) = isdone(r.itr)
+
+# use reverse iteration to get end of EachLines (if possible)
+last(itr::EachLine) = first(Iterators.reverse(itr))
+
 struct ReadEachIterator{T, IOT <: IO}
     stream::IOT
 end
@@ -1028,7 +1187,7 @@ end
 
 Return an iterable object yielding [`read(io, T)`](@ref).
 
-See also: [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref)
+See also [`skipchars`](@ref), [`eachline`](@ref), [`readuntil`](@ref).
 
 !!! compat "Julia 1.6"
     `readeach` requires Julia 1.6 or later.
@@ -1053,13 +1212,15 @@ eltype(::Type{ReadEachIterator{T}}) where T = T
 
 IteratorSize(::Type{<:ReadEachIterator}) = SizeUnknown()
 
+isdone(itr::ReadEachIterator, state...) = eof(itr.stream)
+
 # IOStream Marking
 # Note that these functions expect that io.mark exists for
 # the concrete IO type. This may not be true for IO types
 # not in base.
 
 """
-    mark(s)
+    mark(s::IO)
 
 Add a mark at the current position of stream `s`. Return the marked position.
 
@@ -1070,7 +1231,7 @@ function mark(io::IO)
 end
 
 """
-    unmark(s)
+    unmark(s::IO)
 
 Remove a mark from stream `s`. Return `true` if the stream was marked, `false` otherwise.
 
@@ -1083,7 +1244,7 @@ function unmark(io::IO)
 end
 
 """
-    reset(s)
+    reset(s::IO)
 
 Reset a stream `s` to a previously marked position, and remove the mark. Return the
 previously marked position. Throw an error if the stream is not marked.
@@ -1099,7 +1260,7 @@ function reset(io::T) where T<:IO
 end
 
 """
-    ismarked(s)
+    ismarked(s::IO)
 
 Return `true` if stream `s` is marked.
 
@@ -1110,11 +1271,6 @@ ismarked(io::IO) = io.mark >= 0
 # Make sure all IO streams support flush, even if only as a no-op,
 # to make it easier to write generic I/O code.
 
-"""
-    flush(stream)
-
-Commit all currently buffered writes to the given stream.
-"""
 flush(io::IO) = nothing
 
 """
@@ -1156,6 +1312,8 @@ pass the filename as the first argument. EOL markers other than `'\\n'` are supp
 passing them as the second argument.  The last non-empty line of `io` is counted even if it does not
 end with the EOL, matching the length returned by [`eachline`](@ref) and [`readlines`](@ref).
 
+To count lines of a `String`, `countlines(IOBuffer(str))` can be used.
+
 # Examples
 ```jldoctest
 julia> io = IOBuffer("JuliaLang is a GitHub organization.\\n");
@@ -1168,8 +1326,13 @@ julia> io = IOBuffer("JuliaLang is a GitHub organization.");
 julia> countlines(io)
 1
 
+julia> eof(io) # counting lines moves the file pointer
+true
+
+julia> io = IOBuffer("JuliaLang is a GitHub organization.");
+
 julia> countlines(io, eol = '.')
-0
+1
 ```
 """
 function countlines(io::IO; eol::AbstractChar='\n')
diff --git a/base/iobuffer.jl b/base/iobuffer.jl
index a1504b4bd4f638..e08a019d84a2ca 100644
--- a/base/iobuffer.jl
+++ b/base/iobuffer.jl
@@ -334,6 +334,12 @@ end
 
 eof(io::GenericIOBuffer) = (io.ptr-1 == io.size)
 
+function closewrite(io::GenericIOBuffer)
+    io.writable = false
+    # OR throw(_UVError("closewrite", UV_ENOTSOCK))
+    nothing
+end
+
 @noinline function close(io::GenericIOBuffer{T}) where T
     io.readable = false
     io.writable = false
@@ -353,8 +359,7 @@ isopen(io::GenericIOBuffer) = io.readable || io.writable || io.seekable || bytes
 """
     take!(b::IOBuffer)
 
-Obtain the contents of an `IOBuffer` as an array, without copying. Afterwards, the
-`IOBuffer` is reset to its initial state.
+Obtain the contents of an `IOBuffer` as an array. Afterwards, the `IOBuffer` is reset to its initial state.
 
 # Examples
 ```jldoctest
@@ -405,12 +410,12 @@ function take!(io::IOBuffer)
     return data
 end
 
-function write(to::GenericIOBuffer, from::GenericIOBuffer)
+function write(to::IO, from::GenericIOBuffer)
     if to === from
         from.ptr = from.size + 1
         return 0
     end
-    written::Int = write_sub(to, from.data, from.ptr, bytesavailable(from))
+    written::Int = GC.@preserve from unsafe_write(to, pointer(from.data, from.ptr), UInt(bytesavailable(from)))
     from.ptr += written
     return written
 end
@@ -434,14 +439,6 @@ function unsafe_write(to::GenericIOBuffer, p::Ptr{UInt8}, nb::UInt)
     return written
 end
 
-function write_sub(to::GenericIOBuffer, a::AbstractArray{UInt8}, offs, nel)
-    require_one_based_indexing(a)
-    if offs+nel-1 > length(a) || offs < 1 || nel < 0
-        throw(BoundsError())
-    end
-    GC.@preserve a unsafe_write(to, pointer(a, offs), UInt(nel))
-end
-
 @inline function write(to::GenericIOBuffer, a::UInt8)
     ensureroom(to, UInt(1))
     ptr = (to.append ? to.size+1 : to.ptr)
diff --git a/base/iostream.jl b/base/iostream.jl
index 4a52a4f4ce5789..23dfb53256e826 100644
--- a/base/iostream.jl
+++ b/base/iostream.jl
@@ -13,7 +13,7 @@ Mostly used to represent files returned by [`open`](@ref).
 mutable struct IOStream <: IO
     handle::Ptr{Cvoid}
     ios::Array{UInt8,1}
-    name::AbstractString
+    name::String
     mark::Int64
     lock::ReentrantLock
     _dolock::Bool
@@ -272,7 +272,7 @@ safe multi-threaded access.
 !!! compat "Julia 1.5"
     The `lock` argument is available as of Julia 1.5.
 """
-function open(fname::AbstractString; lock = true,
+function open(fname::String; lock = true,
     read     :: Union{Bool,Nothing} = nothing,
     write    :: Union{Bool,Nothing} = nothing,
     create   :: Union{Bool,Nothing} = nothing,
@@ -299,6 +299,7 @@ function open(fname::AbstractString; lock = true,
     end
     return s
 end
+open(fname::AbstractString; kwargs...) = open(convert(String, fname)::String; kwargs...)
 
 """
     open(filename::AbstractString, [mode::AbstractString]; lock = true) -> IOStream
@@ -404,13 +405,15 @@ end
 if ENDIAN_BOM == 0x04030201
 function read(s::IOStream, T::Union{Type{Int16},Type{UInt16},Type{Int32},Type{UInt32},Type{Int64},Type{UInt64}})
     n = sizeof(T)
-    lock(s.lock)
+    l = s._dolock
+    _lock = s.lock
+    l && lock(_lock)
     if ccall(:jl_ios_buffer_n, Cint, (Ptr{Cvoid}, Csize_t), s.ios, n) != 0
-        unlock(s.lock)
+        l && unlock(_lock)
         throw(EOFError())
     end
     x = ccall(:jl_ios_get_nbyte_int, UInt64, (Ptr{Cvoid}, Csize_t), s.ios, n) % T
-    unlock(s.lock)
+    l && unlock(_lock)
     return x
 end
 
@@ -450,17 +453,24 @@ function readbytes_all!(s::IOStream,
                         nb::Integer)
     olb = lb = length(b)
     nr = 0
-    @_lock_ios s begin
-    GC.@preserve b while nr < nb
-        if lb < nr+1
-            lb = max(65536, (nr+1) * 2)
-            resize!(b, lb)
+    let l = s._dolock, slock = s.lock
+        l && lock(slock)
+        GC.@preserve b while nr < nb
+            if lb < nr+1
+                try
+                    lb = max(65536, (nr+1) * 2)
+                    resize!(b, lb)
+                catch
+                    l && unlock(slock)
+                    rethrow()
+                end
+            end
+            thisr = Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
+                            s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
+            nr += thisr
+            (nr == nb || thisr == 0 || _eof_nolock(s)) && break
         end
-        thisr = Int(ccall(:ios_readall, Csize_t, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t),
-                          s.ios, pointer(b, nr+1), min(lb-nr, nb-nr)))
-        nr += thisr
-        (nr == nb || thisr == 0 || _eof_nolock(s)) && break
-    end
+        l && unlock(slock)
     end
     if lb > olb && lb > nr
         resize!(b, max(olb, nr)) # shrink to just contain input data if was resized
diff --git a/base/irrationals.jl b/base/irrationals.jl
index 1f7b1358dd70e3..ecc3aff6138c1a 100644
--- a/base/irrationals.jl
+++ b/base/irrationals.jl
@@ -22,7 +22,9 @@ abstract type AbstractIrrational <: Real end
     Irrational{sym} <: AbstractIrrational
 
 Number type representing an exact irrational value denoted by the
-symbol `sym`.
+symbol `sym`, such as [`π`](@ref pi), [`ℯ`](@ref) and [`γ`](@ref Base.MathConstants.eulergamma).
+
+See also [`@irrational`], [`AbstractIrrational`](@ref).
 """
 struct Irrational{sym} <: AbstractIrrational end
 
@@ -46,7 +48,8 @@ AbstractFloat(x::AbstractIrrational) = Float64(x)::Float64
 Float16(x::AbstractIrrational) = Float16(Float32(x)::Float32)
 Complex{T}(x::AbstractIrrational) where {T<:Real} = Complex{T}(T(x))
 
-@pure function Rational{T}(x::AbstractIrrational) where T<:Integer
+# XXX this may change `DEFAULT_PRECISION`, thus not effect free
+@assume_effects :total function Rational{T}(x::AbstractIrrational) where T<:Integer
     o = precision(BigFloat)
     p = 256
     while true
@@ -62,7 +65,7 @@ Complex{T}(x::AbstractIrrational) where {T<:Real} = Complex{T}(T(x))
 end
 Rational{BigInt}(x::AbstractIrrational) = throw(ArgumentError("Cannot convert an AbstractIrrational to a Rational{BigInt}: use rationalize(BigInt, x) instead"))
 
-@pure function (t::Type{T})(x::AbstractIrrational, r::RoundingMode) where T<:Union{Float32,Float64}
+@assume_effects :total function (t::Type{T})(x::AbstractIrrational, r::RoundingMode) where T<:Union{Float32,Float64}
     setprecision(BigFloat, 256) do
         T(BigFloat(x)::BigFloat, r)
     end
@@ -104,11 +107,11 @@ end
 <=(x::AbstractFloat, y::AbstractIrrational) = x < y
 
 # Irrational vs Rational
-@pure function rationalize(::Type{T}, x::AbstractIrrational; tol::Real=0) where T
+@assume_effects :total function rationalize(::Type{T}, x::AbstractIrrational; tol::Real=0) where T
     return rationalize(T, big(x), tol=tol)
 end
-@pure function lessrational(rx::Rational{<:Integer}, x::AbstractIrrational)
-    # an @pure version of `<` for determining if the rationalization of
+@assume_effects :total function lessrational(rx::Rational{<:Integer}, x::AbstractIrrational)
+    # an @assume_effects :total version of `<` for determining if the rationalization of
     # an irrational number required rounding up or down
     return rx < big(x)
 end
@@ -151,6 +154,8 @@ zero(::Type{<:AbstractIrrational}) = false
 one(::AbstractIrrational) = true
 one(::Type{<:AbstractIrrational}) = true
 
+sign(x::AbstractIrrational) = ifelse(x < zero(x), -1.0, 1.0)
+
 -(x::AbstractIrrational) = -Float64(x)
 for op in Symbol[:+, :-, :*, :/, :^]
     @eval $op(x::AbstractIrrational, y::AbstractIrrational) = $op(Float64(x),Float64(y))
@@ -160,8 +165,8 @@ end
 round(x::Irrational, r::RoundingMode) = round(float(x), r)
 
 """
-	@irrational sym val def
-	@irrational(sym, val, def)
+    @irrational sym val def
+    @irrational(sym, val, def)
 
 Define a new `Irrational` value, `sym`, with pre-computed `Float64` value `val`,
 and arbitrary-precision definition in terms of `BigFloat`s given by the expression `def`.
@@ -201,7 +206,7 @@ big(::Type{<:AbstractIrrational}) = BigFloat
 function alignment(io::IO, x::AbstractIrrational)
     m = match(r"^(.*?)(=.*)$", sprint(show, x, context=io, sizehint=0))
     m === nothing ? (length(sprint(show, x, context=io, sizehint=0)), 0) :
-    (length(m.captures[1]), length(m.captures[2]))
+    (length(something(m.captures[1])), length(something(m.captures[2])))
 end
 
 # inv
diff --git a/base/iterators.jl b/base/iterators.jl
index 6480b2b799e7fe..2702375d0f6304 100644
--- a/base/iterators.jl
+++ b/base/iterators.jl
@@ -9,7 +9,7 @@ module Iterators
 import ..@__MODULE__, ..parentmodule
 const Base = parentmodule(@__MODULE__)
 using .Base:
-    @inline, Pair, AbstractDict, IndexLinear, IndexCartesian, IndexStyle, AbstractVector, Vector,
+    @inline, Pair, Pairs, AbstractDict, IndexLinear, IndexCartesian, IndexStyle, AbstractVector, Vector,
     tail, SizeUnknown, HasLength, HasShape, IsInfinite, EltypeUnknown, HasEltype, OneTo,
     @propagate_inbounds, @isdefined, @boundscheck, @inbounds, Generator, AbstractRange,
     LinearIndices, (:), |, +, -, !==, !, <=, <, missing, any, _counttuple
@@ -22,7 +22,7 @@ import .Base:
     getindex, setindex!, get, iterate,
     popfirst!, isdone, peek
 
-export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, partition
+export enumerate, zip, rest, countfrom, take, drop, takewhile, dropwhile, cycle, repeated, product, flatten, partition, flatmap
 
 """
     Iterators.map(f, iterators...)
@@ -103,7 +103,6 @@ size(r::Reverse) = size(r.itr)
 IteratorSize(::Type{Reverse{T}}) where {T} = IteratorSize(T)
 IteratorEltype(::Type{Reverse{T}}) where {T} = IteratorEltype(T)
 last(r::Reverse) = first(r.itr) # the first shall be last
-first(r::Reverse) = last(r.itr) # and the last shall be first
 
 # reverse-order array iterators: assumes more-specialized Reverse for eachindex
 @propagate_inbounds function iterate(A::Reverse{<:AbstractArray}, state=(reverse(eachindex(A.itr)),))
@@ -113,13 +112,22 @@ first(r::Reverse) = last(r.itr) # and the last shall be first
     (A.itr[idx], (state[1], itrs))
 end
 
+# Fallback method of `iterate(::Reverse{T})` which assumes the collection has `getindex(::T) and `reverse(eachindex(::T))`
+# don't propagate inbounds for this just in case
+function iterate(A::Reverse, state=(reverse(eachindex(A.itr)),))
+    y = iterate(state...)
+    y === nothing && return y
+    idx, itrs = y
+    (A.itr[idx], (state[1], itrs))
+end
+
 reverse(R::AbstractRange) = Base.reverse(R) # copying ranges is cheap
 reverse(G::Generator) = Generator(G.f, reverse(G.iter))
 reverse(r::Reverse) = r.itr
 reverse(x::Union{Number,AbstractChar}) = x
 reverse(p::Pair) = Base.reverse(p) # copying pairs is cheap
 
-iterate(r::Reverse{<:Tuple}, i::Int = length(r.itr)) = i < 1 ? nothing : (r.itr[i], i-1)
+iterate(r::Reverse{<:Union{Tuple, NamedTuple}}, i::Int = length(r.itr)) = i < 1 ? nothing : (r.itr[i], i-1)
 
 # enumerate
 
@@ -160,6 +168,7 @@ size(e::Enumerate) = size(e.itr)
     n === nothing && return n
     (i, n[1]), (i+1, n[2])
 end
+last(e::Enumerate) = (length(e.itr), e.itr[end])
 
 eltype(::Type{Enumerate{I}}) where {I} = Tuple{Int, eltype(I)}
 
@@ -177,18 +186,6 @@ end
     (i, n[1]), (i-1, ri, n[2])
 end
 
-"""
-    Iterators.Pairs(values, keys) <: AbstractDict{eltype(keys), eltype(values)}
-
-Transforms an indexable container into an Dictionary-view of the same data.
-Modifying the key-space of the underlying data may invalidate this object.
-"""
-struct Pairs{K, V, I, A} <: AbstractDict{K, V}
-    data::A
-    itr::I
-    Pairs(data::A, itr::I) where {A, I} = new{eltype(I), eltype(A), I, A}(data, itr)
-end
-
 """
     pairs(IndexLinear(), A)
     pairs(IndexCartesian(), A)
@@ -233,44 +230,59 @@ CartesianIndex(1, 2) d
 CartesianIndex(2, 2) e
 ```
 
-See also: [`IndexStyle`](@ref), [`axes`](@ref).
+See also [`IndexStyle`](@ref), [`axes`](@ref).
 """
 pairs(::IndexLinear,    A::AbstractArray) = Pairs(A, LinearIndices(A))
 pairs(::IndexCartesian, A::AbstractArray) = Pairs(A, CartesianIndices(axes(A)))
 
 # preserve indexing capabilities for known indexable types
 # faster than zip(keys(a), values(a)) for arrays
+pairs(tuple::Tuple) = Pairs{Int}(tuple, keys(tuple))
+pairs(nt::NamedTuple) = Pairs{Symbol}(nt, keys(nt))
+pairs(v::Core.SimpleVector) = Pairs(v, LinearIndices(v))
 pairs(A::AbstractArray)  = pairs(IndexCartesian(), A)
 pairs(A::AbstractVector) = pairs(IndexLinear(), A)
-pairs(tuple::Tuple) = Pairs(tuple, keys(tuple))
-pairs(nt::NamedTuple) = Pairs(nt, keys(nt))
-pairs(v::Core.SimpleVector) = Pairs(v, LinearIndices(v))
 # pairs(v::Pairs) = v # listed for reference, but already defined from being an AbstractDict
 
-length(v::Pairs) = length(v.itr)
-axes(v::Pairs) = axes(v.itr)
-size(v::Pairs) = size(v.itr)
-@propagate_inbounds function iterate(v::Pairs{K, V}, state...) where {K, V}
-    x = iterate(v.itr, state...)
+length(v::Pairs) = length(getfield(v, :itr))
+axes(v::Pairs) = axes(getfield(v, :itr))
+size(v::Pairs) = size(getfield(v, :itr))
+
+@propagate_inbounds function _pairs_elt(p::Pairs{K, V}, idx) where {K, V}
+    return Pair{K, V}(idx, getfield(p, :data)[idx])
+end
+
+@propagate_inbounds function iterate(p::Pairs{K, V}, state...) where {K, V}
+    x = iterate(getfield(p, :itr), state...)
+    x === nothing && return x
+    idx, next = x
+    return (_pairs_elt(p, idx), next)
+end
+
+@propagate_inbounds function iterate(r::Reverse{<:Pairs}, state=(reverse(getfield(r.itr, :itr)),))
+    x = iterate(state...)
     x === nothing && return x
-    indx, n = x
-    item = v.data[indx]
-    return (Pair{K, V}(indx, item), n)
+    idx, next = x
+    return (_pairs_elt(r.itr, idx), (state[1], next))
 end
-@inline isdone(v::Pairs, state...) = isdone(v.itr, state...)
+
+@inline isdone(v::Pairs, state...) = isdone(getfield(v, :itr), state...)
 
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, I}}) where {I} = IteratorSize(I)
 IteratorSize(::Type{<:Pairs{<:Any, <:Any, <:Base.AbstractUnitRange, <:Tuple}}) = HasLength()
 
-reverse(v::Pairs) = Pairs(v.data, reverse(v.itr))
+function last(v::Pairs{K, V}) where {K, V}
+    idx = last(getfield(v, :itr))
+    return Pair{K, V}(idx, v[idx])
+end
 
-haskey(v::Pairs, key) = (key in v.itr)
-keys(v::Pairs) = v.itr
-values(v::Pairs) = v.data
-getindex(v::Pairs, key) = v.data[key]
-setindex!(v::Pairs, value, key) = (v.data[key] = value; v)
-get(v::Pairs, key, default) = get(v.data, key, default)
-get(f::Base.Callable, v::Pairs, key) = get(f, v.data, key)
+haskey(v::Pairs, key) = (key in getfield(v, :itr))
+keys(v::Pairs) = getfield(v, :itr)
+values(v::Pairs) = getfield(v, :data) # TODO: this should be a view of data subset by itr
+getindex(v::Pairs, key) = getfield(v, :data)[key]
+setindex!(v::Pairs, value, key) = (getfield(v, :data)[key] = value; v)
+get(v::Pairs, key, default) = get(getfield(v, :data), key, default)
+get(f::Base.Callable, v::Pairs, key) = get(f, getfield(v, :data), key)
 
 # zip
 
@@ -288,6 +300,8 @@ the `zip` iterator is a tuple of values of its subiterators.
     `zip` orders the calls to its subiterators in such a way that stateful iterators will
     not advance when another iterator finishes in the current iteration.
 
+See also: [`enumerate`](@ref), [`splat`](@ref Base.splat).
+
 # Examples
 ```jldoctest
 julia> a = 1:5
@@ -408,7 +422,8 @@ zip_iteratoreltype() = HasEltype()
 zip_iteratoreltype(a) = a
 zip_iteratoreltype(a, tail...) = and_iteratoreltype(a, zip_iteratoreltype(tail...))
 
-reverse(z::Zip) = Zip(Base.map(reverse, z.is))
+reverse(z::Zip) = Zip(Base.map(reverse, z.is)) # n.b. we assume all iterators are the same length
+last(z::Zip) = getindex.(z.is, minimum(Base.map(lastindex, z.is)))
 
 # filter
 
@@ -441,6 +456,12 @@ julia> foreach(println, f)
 1
 3
 5
+
+julia> [x for x in [1, 2, 3, 4, 5] if isodd(x)]  # collects a generator over Iterators.filter
+3-element Vector{Int64}:
+ 1
+ 3
+ 5
 ```
 """
 filter(flt, itr) = Filter(flt, itr)
@@ -461,6 +482,7 @@ IteratorEltype(::Type{Filter{F,I}}) where {F,I} = IteratorEltype(I)
 IteratorSize(::Type{<:Filter}) = SizeUnknown()
 
 reverse(f::Filter) = Filter(f.flt, reverse(f.itr))
+last(f::Filter) = first(reverse(f))
 
 # Accumulate -- partial reductions of a function over an iterator
 
@@ -484,20 +506,22 @@ This is effectively a lazy version of [`Base.accumulate`](@ref).
 
 # Examples
 ```jldoctest
-julia> f = Iterators.accumulate(+, [1,2,3,4]);
+julia> a = Iterators.accumulate(+, [1,2,3,4]);
 
-julia> foreach(println, f)
+julia> foreach(println, a)
 1
 3
 6
 10
 
-julia> f = Iterators.accumulate(+, [1,2,3]; init = 100);
+julia> b = Iterators.accumulate(/, (2, 5, 2, 5); init = 100);
 
-julia> foreach(println, f)
-101
-103
-106
+julia> collect(b)
+4-element Vector{Float64}:
+ 50.0
+ 10.0
+  5.0
+  1.0
 ```
 """
 accumulate(f, itr; init = Base._InitialValue()) = Accumulate(f, itr, init)
@@ -538,6 +562,8 @@ end
 
 An iterator that yields the same elements as `iter`, but starting at the given `state`.
 
+See also: [`Iterators.drop`](@ref), [`Iterators.peel`](@ref), [`Base.rest`](@ref).
+
 # Examples
 ```jldoctest
 julia> collect(Iterators.rest([1,2,3,4], 2))
@@ -556,6 +582,13 @@ rest(itr) = itr
 
 Returns the first element and an iterator over the remaining elements.
 
+If the iterator is empty return `nothing` (like `iterate`).
+
+!!! compat "Julia 1.7"
+    Prior versions throw a BoundsError if the iterator is empty.
+
+See also: [`Iterators.drop`](@ref), [`Iterators.take`](@ref).
+
 # Examples
 ```jldoctest
 julia> (a, rest) = Iterators.peel("abc");
@@ -571,7 +604,7 @@ julia> collect(rest)
 """
 function peel(itr)
     y = iterate(itr)
-    y === nothing && throw(BoundsError())
+    y === nothing && return y
     val, s = y
     val, rest(itr, s)
 end
@@ -587,8 +620,8 @@ IteratorSize(::Type{<:Rest{I}}) where {I} = rest_iteratorsize(IteratorSize(I))
 
 # Count -- infinite counting
 
-struct Count{S<:Number}
-    start::S
+struct Count{T,S}
+    start::T
     step::S
 end
 
@@ -608,11 +641,13 @@ julia> for v in Iterators.countfrom(5, 2)
 9
 ```
 """
-countfrom(start::Number, step::Number) = Count(promote(start, step)...)
-countfrom(start::Number)               = Count(start, oneunit(start))
-countfrom()                            = Count(1, 1)
+countfrom(start::T, step::S) where {T,S} = Count{typeof(start+step),S}(start, step)
+countfrom(start::Number, step::Number)   = Count(promote(start, step)...)
+countfrom(start)                         = Count(start, oneunit(start))
+countfrom()                              = Count(1, 1)
+
 
-eltype(::Type{Count{S}}) where {S} = S
+eltype(::Type{<:Count{T}}) where {T} = T
 
 iterate(it::Count, state=it.start) = (state, state + it.step)
 
@@ -634,6 +669,8 @@ end
 
 An iterator that generates at most the first `n` elements of `iter`.
 
+See also: [`drop`](@ref Iterators.drop), [`peel`](@ref Iterators.peel), [`first`](@ref), [`take!`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = 1:2:11
@@ -779,7 +816,7 @@ end
 
 IteratorSize(::Type{<:TakeWhile}) = SizeUnknown()
 eltype(::Type{TakeWhile{I,P}} where P) where {I} = eltype(I)
-IteratorEltype(::Type{TakeWhile{I}} where P) where {I} = IteratorEltype(I)
+IteratorEltype(::Type{TakeWhile{I, P}} where P) where {I} = IteratorEltype(I)
 
 
 # dropwhile
@@ -845,6 +882,8 @@ end
 An iterator that cycles through `iter` forever.
 If `iter` is empty, so is `cycle(iter)`.
 
+See also: [`Iterators.repeated`](@ref), [`repeat`](@ref).
+
 # Examples
 ```jldoctest
 julia> for (i, v) in enumerate(Iterators.cycle("hello"))
@@ -870,6 +909,7 @@ function iterate(it::Cycle, state)
 end
 
 reverse(it::Cycle) = Cycle(reverse(it.xs))
+last(it::Cycle) = last(it.xs)
 
 # Repeated - repeat an object infinitely many times
 
@@ -884,6 +924,8 @@ repeated(x) = Repeated(x)
 An iterator that generates the value `x` forever. If `n` is specified, generates `x` that
 many times (equivalent to `take(repeated(x), n)`).
 
+See also: [`Iterators.cycle`](@ref), [`repeat`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = Iterators.repeated([1 2], 4);
@@ -906,6 +948,7 @@ IteratorSize(::Type{<:Repeated}) = IsInfinite()
 IteratorEltype(::Type{<:Repeated}) = HasEltype()
 
 reverse(it::Union{Repeated,Take{<:Repeated}}) = it
+last(it::Union{Repeated,Take{<:Repeated}}) = first(it)
 
 # Product -- cartesian product of iterators
 struct ProductIterator{T<:Tuple}
@@ -919,12 +962,17 @@ Return an iterator over the product of several iterators. Each generated element
 a tuple whose `i`th element comes from the `i`th argument iterator. The first iterator
 changes the fastest.
 
+See also: [`zip`](@ref), [`Iterators.flatten`](@ref).
+
 # Examples
 ```jldoctest
 julia> collect(Iterators.product(1:2, 3:5))
 2×3 Matrix{Tuple{Int64, Int64}}:
  (1, 3)  (1, 4)  (1, 5)
  (2, 3)  (2, 4)  (2, 5)
+
+julia> ans == [(x,y) for x in 1:2, y in 3:5]  # collects a generator involving Iterators.product
+true
 ```
 """
 product(iters...) = ProductIterator(iters)
@@ -1032,6 +1080,7 @@ end
 end
 
 reverse(p::ProductIterator) = ProductIterator(Base.map(reverse, p.iterators))
+last(p::ProductIterator) = Base.map(last, p.iterators)
 
 # flatten an iterator of iterators
 
@@ -1054,6 +1103,15 @@ julia> collect(Iterators.flatten((1:2, 8:9)))
  2
  8
  9
+
+julia> [(x,y) for x in 0:1 for y in 'a':'c']  # collects generators involving Iterators.flatten
+6-element Vector{Tuple{Int64, Char}}:
+ (0, 'a')
+ (0, 'b')
+ (0, 'c')
+ (1, 'a')
+ (1, 'b')
+ (1, 'c')
 ```
 """
 flatten(itr) = Flatten(itr)
@@ -1102,6 +1160,34 @@ length(f::Flatten{Tuple{}}) = 0
 end
 
 reverse(f::Flatten) = Flatten(reverse(itr) for itr in reverse(f.it))
+last(f::Flatten) = last(last(f.it))
+
+"""
+    Iterators.flatmap(f, iterators...)
+
+Equivalent to `flatten(map(f, iterators...))`.
+
+See also [`Iterators.flatten`](@ref), [`Iterators.map`](@ref).
+
+!!! compat "Julia 1.9"
+    This function was added in Julia 1.9.
+
+# Examples
+```jldoctest
+julia> Iterators.flatmap(n->-n:2:n, 1:3) |> collect
+9-element Vector{Int64}:
+ -1
+  1
+ -2
+  0
+  2
+ -3
+ -1
+  1
+  3
+```
+"""
+flatmap(f, c...) = flatten(map(f, c...))
 
 """
     partition(collection, n)
@@ -1148,29 +1234,29 @@ end
 
 function length(itr::PartitionIterator)
     l = length(itr.c)
-    return div(l, itr.n) + ((mod(l, itr.n) > 0) ? 1 : 0)
+    return cld(l, itr.n)
 end
 
-function iterate(itr::PartitionIterator{<:AbstractRange}, state=1)
-    state > length(itr.c) && return nothing
-    r = min(state + itr.n - 1, length(itr.c))
+function iterate(itr::PartitionIterator{<:AbstractRange}, state = firstindex(itr.c))
+    state > lastindex(itr.c) && return nothing
+    r = min(state + itr.n - 1, lastindex(itr.c))
     return @inbounds itr.c[state:r], r + 1
 end
 
-function iterate(itr::PartitionIterator{<:AbstractArray}, state=1)
-    state > length(itr.c) && return nothing
-    r = min(state + itr.n - 1, length(itr.c))
+function iterate(itr::PartitionIterator{<:AbstractArray}, state = firstindex(itr.c))
+    state > lastindex(itr.c) && return nothing
+    r = min(state + itr.n - 1, lastindex(itr.c))
     return @inbounds view(itr.c, state:r), r + 1
 end
 
 struct IterationCutShort; end
 
 function iterate(itr::PartitionIterator, state...)
-    v = Vector{eltype(itr.c)}(undef, itr.n)
     # This is necessary to remember whether we cut the
     # last element short. In such cases, we do return that
     # element, but not the next one
     state === (IterationCutShort(),) && return nothing
+    v = Vector{eltype(itr.c)}(undef, itr.n)
     i = 0
     y = iterate(itr.c, state...)
     while y !== nothing
@@ -1223,6 +1309,12 @@ julia> collect(a)
 2-element Vector{Char}:
  'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)
  'f': ASCII/Unicode U+0066 (category Ll: Letter, lowercase)
+
+julia> Iterators.reset!(a); popfirst!(a)
+'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
+
+julia> Iterators.reset!(a, "hello"); popfirst!(a)
+'h': ASCII/Unicode U+0068 (category Ll: Letter, lowercase)
 ```
 
 ```jldoctest
@@ -1251,7 +1343,7 @@ mutable struct Stateful{T, VS}
     end
 end
 
-function reset!(s::Stateful{T,VS}, itr::T) where {T,VS}
+function reset!(s::Stateful{T,VS}, itr::T=s.itr) where {T,VS}
     s.itr = itr
     setfield!(s, :nextvalstate, iterate(itr))
     s.taken = 0
@@ -1266,7 +1358,7 @@ else
     # fixpoint.
     approx_iter_type(itrT::Type) = _approx_iter_type(itrT, Base._return_type(iterate, Tuple{itrT}))
     # Not actually called, just passed to return type to avoid
-    # having to typesubtract
+    # having to typesplit on Nothing
     function doiterate(itr, valstate::Union{Nothing, Tuple{Any, Any}})
         valstate === nothing && return nothing
         val, st = valstate
@@ -1296,7 +1388,10 @@ convert(::Type{Stateful}, itr) = Stateful(itr)
     end
 end
 
-@inline peek(s::Stateful, sentinel=nothing) = s.nextvalstate !== nothing ? s.nextvalstate[1] : sentinel
+@inline function peek(s::Stateful, sentinel=nothing)
+    ns = s.nextvalstate
+    return ns !== nothing ? ns[1] : sentinel
+end
 @inline iterate(s::Stateful, state=nothing) = s.nextvalstate === nothing ? nothing : (popfirst!(s), nothing)
 IteratorSize(::Type{Stateful{T,VS}}) where {T,VS} = IteratorSize(T) isa HasShape ? HasLength() : IteratorSize(T)
 eltype(::Type{Stateful{T, VS}} where VS) where {T} = eltype(T)
@@ -1306,20 +1401,39 @@ length(s::Stateful) = length(s.itr) - s.taken
 """
     only(x)
 
-Returns the one and only element of collection `x`, and throws an `ArgumentError` if the
+Return the one and only element of collection `x`, or throw an [`ArgumentError`](@ref) if the
 collection has zero or multiple elements.
 
-See also: [`first`](@ref), [`last`](@ref).
+See also [`first`](@ref), [`last`](@ref).
 
 !!! compat "Julia 1.4"
     This method requires at least Julia 1.4.
+
+# Examples
+```jldoctest
+julia> only(["a"])
+"a"
+
+julia> only("a")
+'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
+
+julia> only(())
+ERROR: ArgumentError: Tuple contains 0 elements, must contain exactly 1 element
+Stacktrace:
+[...]
+
+julia> only(('a', 'b'))
+ERROR: ArgumentError: Tuple contains 2 elements, must contain exactly 1 element
+Stacktrace:
+[...]
+```
 """
 @propagate_inbounds function only(x)
     i = iterate(x)
     @boundscheck if i === nothing
         throw(ArgumentError("Collection is empty, must contain exactly 1 element"))
     end
-    (ret, state) = i
+    (ret, state) = i::NTuple{2,Any}
     @boundscheck if iterate(x, state) !== nothing
         throw(ArgumentError("Collection has multiple elements, must contain exactly 1 element"))
     end
@@ -1340,4 +1454,7 @@ only(x::NamedTuple) = throw(
     ArgumentError("NamedTuple contains $(length(x)) elements, must contain exactly 1 element")
 )
 
+
+Base.intersect(a::ProductIterator, b::ProductIterator) = ProductIterator(intersect.(a.iterators, b.iterators))
+
 end
diff --git a/base/libc.jl b/base/libc.jl
index 547561ac964bac..7d88e89bf605a4 100644
--- a/base/libc.jl
+++ b/base/libc.jl
@@ -131,7 +131,7 @@ Suspends execution for `s` seconds.
 This function does not yield to Julia's scheduler and therefore blocks
 the Julia thread that it is running on for the duration of the sleep time.
 
-See also: [`sleep`](@ref)
+See also [`sleep`](@ref).
 """
 systemsleep
 
@@ -255,7 +255,7 @@ time() = ccall(:jl_clock_now, Float64, ())
 
 Get Julia's process ID.
 """
-getpid() = ccall(:jl_getpid, Int32, ())
+getpid() = ccall(:uv_os_getpid, Int32, ())
 
 ## network functions ##
 
@@ -376,31 +376,111 @@ free(p::Cwstring) = free(convert(Ptr{Cwchar_t}, p))
 
 ## Random numbers ##
 
+# Access to very high quality (kernel) randomness
+function getrandom!(A::Union{Array,Base.RefValue})
+    ret = ccall(:uv_random, Cint, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}, Csize_t,   Cuint, Ptr{Cvoid}),
+                                   C_NULL,     C_NULL,     A,          sizeof(A), 0,     C_NULL)
+    Base.uv_error("getrandom", ret)
+    return A
+end
+_make_uint64_seed() = getrandom!(Base.RefValue{UInt64}())[]
+
 # To limit dependency on rand functionality implemented in the Random module,
-# Libc.rand is used in file.jl, and could be used in error.jl (but it breaks a test)
+# Libc.rand is used in Base (it also is independent from Random.seed, so is
+# only affected by `Libc.srand(seed)` calls)
 """
-    rand([T::Type])
+    rand([T::Type]=UInt32)
 
-Interface to the C `rand()` function. If `T` is provided, generate a value of type `T`
-by composing two calls to `rand()`. `T` can be `UInt32` or `Float64`.
+Generate a random number of type `T`. `T` can be `UInt32` or `Float64`.
 """
-rand() = ccall(:rand, Cint, ())
-@static if Sys.iswindows()
-    # Windows RAND_MAX is 2^15-1
-    rand(::Type{UInt32}) = ((rand() % UInt32) << 17) ⊻ ((rand() % UInt32) << 8) ⊻ (rand() % UInt32)
-else
-    # RAND_MAX is at least 2^15-1 in theory, but we assume 2^16-1
-    # on non-Windows systems (in practice, it's 2^31-1)
-    rand(::Type{UInt32}) = ((rand() % UInt32) << 16) ⊻ (rand() % UInt32)
-end
-rand(::Type{Float64}) = rand(UInt32) * 2.0^-32
+rand() = ccall(:jl_rand, UInt64, ()) % UInt32
+rand(::Type{UInt32}) = rand()
+rand(::Type{Float64}) = rand() * 2.0^-32
 
 """
     srand([seed])
 
-Interface to the C `srand(seed)` function.
+Set a value for the current global `seed`.
 """
-srand(seed=floor(Int, time()) % Cuint) = ccall(:srand, Cvoid, (Cuint,), seed)
+function srand(seed::Integer=_make_uint64_seed())
+    ccall(:jl_srand, Cvoid, (UInt64,), seed % UInt64)
+end
+
+struct Cpasswd
+   username::Cstring
+   uid::Culong
+   gid::Culong
+   shell::Cstring
+   homedir::Cstring
+   gecos::Cstring
+   Cpasswd() = new(C_NULL, typemax(Culong), typemax(Culong), C_NULL, C_NULL, C_NULL)
+end
+mutable struct Cgroup
+    groupname::Cstring # group name
+    gid::Culong        # group ID
+    mem::Ptr{Cstring}  # group members
+    Cgroup() = new(C_NULL, typemax(Culong), C_NULL)
+end
+struct Passwd
+    username::String
+    uid::UInt
+    gid::UInt
+    shell::String
+    homedir::String
+    gecos::String
+end
+struct Group
+    groupname::String
+    gid::UInt
+    mem::Vector{String}
+end
+
+function getpwuid(uid::Unsigned, throw_error::Bool=true)
+    ref_pd = Ref(Cpasswd())
+    ret = ccall(:uv_os_get_passwd2, Cint, (Ref{Cpasswd}, Culong), ref_pd, uid)
+    if ret != 0
+        throw_error && Base.uv_error("getpwuid", ret)
+        return
+    end
+    pd = ref_pd[]
+    pd = Passwd(
+        pd.username == C_NULL ? "" : unsafe_string(pd.username),
+        pd.uid,
+        pd.gid,
+        pd.shell == C_NULL ? "" : unsafe_string(pd.shell),
+        pd.homedir == C_NULL ? "" : unsafe_string(pd.homedir),
+        pd.gecos == C_NULL ? "" : unsafe_string(pd.gecos),
+    )
+    ccall(:uv_os_free_passwd, Cvoid, (Ref{Cpasswd},), ref_pd)
+    return pd
+end
+function getgrgid(gid::Unsigned, throw_error::Bool=true)
+    ref_gp = Ref(Cgroup())
+    ret = ccall(:uv_os_get_group, Cint, (Ref{Cgroup}, Culong), ref_gp, gid)
+    if ret != 0
+        throw_error && Base.uv_error("getgrgid", ret)
+        return
+    end
+    gp = ref_gp[]
+    members = String[]
+    if gp.mem != C_NULL
+        while true
+            mem = unsafe_load(gp.mem, length(members) + 1)
+            mem == C_NULL && break
+            push!(members, unsafe_string(mem))
+        end
+    end
+    gp = Group(
+         gp.groupname == C_NULL ? "" : unsafe_string(gp.groupname),
+         gp.gid,
+         members,
+    )
+    ccall(:uv_os_free_group, Cvoid, (Ref{Cgroup},), ref_gp)
+    return gp
+end
+
+getuid() = ccall(:jl_getuid, Culong, ())
+geteuid() = ccall(:jl_geteuid, Culong, ())
 
 # Include dlopen()/dlpath() code
 include("libdl.jl")
diff --git a/base/libdl.jl b/base/libdl.jl
index c20b8168e2fec9..4f29260bb24f82 100644
--- a/base/libdl.jl
+++ b/base/libdl.jl
@@ -46,9 +46,12 @@ applicable.
 (RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL, RTLD_NODELETE, RTLD_NOLOAD, RTLD_NOW)
 
 """
-    dlsym(handle, sym)
+    dlsym(handle, sym; throw_error::Bool = true)
 
 Look up a symbol from a shared library handle, return callable function pointer on success.
+
+If the symbol cannot be found, this method throws an error, unless the keyword argument
+`throw_error` is set to `false`, in which case this method returns `nothing`.
 """
 function dlsym(hnd::Ptr, s::Union{Symbol,AbstractString}; throw_error::Bool = true)
     hnd == C_NULL && throw(ArgumentError("NULL library handle"))
diff --git a/base/libuv.jl b/base/libuv.jl
index 82298516f4a1b9..64b228c6500e75 100644
--- a/base/libuv.jl
+++ b/base/libuv.jl
@@ -61,8 +61,11 @@ function preserve_handle(x)
 end
 function unpreserve_handle(x)
     lock(preserve_handle_lock)
-    v = uvhandles[x]::Int
-    if v == 1
+    v = get(uvhandles, x, 0)::Int
+    if v == 0
+        unlock(preserve_handle_lock)
+        error("unbalanced call to unpreserve_handle for $(typeof(x))")
+    elseif v == 1
         pop!(uvhandles, x)
     else
         uvhandles[x] = v - 1
@@ -74,7 +77,7 @@ end
 ## Libuv error handling ##
 
 struct IOError <: Exception
-    msg::AbstractString
+    msg::String
     code::Int32
     IOError(msg::AbstractString, code::Integer) = new(msg, code)
 end
@@ -107,6 +110,7 @@ end
 function uv_alloc_buf end
 function uv_readcb end
 function uv_writecb_task end
+function uv_shutdowncb_task end
 function uv_return_spawn end
 function uv_asynccb end
 function uv_timercb end
@@ -129,21 +133,21 @@ function reinit_stdio()
 end
 
 """
-    stdin
+    stdin::IO
 
 Global variable referring to the standard input stream.
 """
 :stdin
 
 """
-    stdout
+    stdout::IO
 
 Global variable referring to the standard out stream.
 """
 :stdout
 
 """
-    stderr
+    stderr::IO
 
 Global variable referring to the standard error stream.
 """
diff --git a/base/linked_list.jl b/base/linked_list.jl
index beceb24a27f40e..c477dc56bdb2b6 100644
--- a/base/linked_list.jl
+++ b/base/linked_list.jl
@@ -1,23 +1,23 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-mutable struct InvasiveLinkedList{T}
+mutable struct IntrusiveLinkedList{T}
     # Invasive list requires that T have a field `.next >: U{T, Nothing}` and `.queue >: U{ILL{T}, Nothing}`
     head::Union{T, Nothing}
     tail::Union{T, Nothing}
-    InvasiveLinkedList{T}() where {T} = new{T}(nothing, nothing)
+    IntrusiveLinkedList{T}() where {T} = new{T}(nothing, nothing)
 end
 
 #const list_append!! = append!
 #const list_deletefirst! = delete!
 
-eltype(::Type{<:InvasiveLinkedList{T}}) where {T} = @isdefined(T) ? T : Any
+eltype(::Type{<:IntrusiveLinkedList{T}}) where {T} = @isdefined(T) ? T : Any
 
-iterate(q::InvasiveLinkedList) = (h = q.head; h === nothing ? nothing : (h, h))
-iterate(q::InvasiveLinkedList{T}, v::T) where {T} = (h = v.next; h === nothing ? nothing : (h, h))
+iterate(q::IntrusiveLinkedList) = (h = q.head; h === nothing ? nothing : (h, h))
+iterate(q::IntrusiveLinkedList{T}, v::T) where {T} = (h = v.next; h === nothing ? nothing : (h, h))
 
-isempty(q::InvasiveLinkedList) = (q.head === nothing)
+isempty(q::IntrusiveLinkedList) = (q.head === nothing)
 
-function length(q::InvasiveLinkedList)
+function length(q::IntrusiveLinkedList)
     i = 0
     head = q.head
     while head !== nothing
@@ -27,7 +27,7 @@ function length(q::InvasiveLinkedList)
     return i
 end
 
-function list_append!!(q::InvasiveLinkedList{T}, q2::InvasiveLinkedList{T}) where T
+function list_append!!(q::IntrusiveLinkedList{T}, q2::IntrusiveLinkedList{T}) where T
     q === q2 && error("can't append list to itself")
     head2 = q2.head
     if head2 !== nothing
@@ -49,7 +49,7 @@ function list_append!!(q::InvasiveLinkedList{T}, q2::InvasiveLinkedList{T}) wher
     return q
 end
 
-function push!(q::InvasiveLinkedList{T}, val::T) where T
+function push!(q::IntrusiveLinkedList{T}, val::T) where T
     val.queue === nothing || error("val already in a list")
     val.queue = q
     tail = q.tail
@@ -62,7 +62,7 @@ function push!(q::InvasiveLinkedList{T}, val::T) where T
     return q
 end
 
-function pushfirst!(q::InvasiveLinkedList{T}, val::T) where T
+function pushfirst!(q::IntrusiveLinkedList{T}, val::T) where T
     val.queue === nothing || error("val already in a list")
     val.queue = q
     head = q.head
@@ -75,19 +75,20 @@ function pushfirst!(q::InvasiveLinkedList{T}, val::T) where T
     return q
 end
 
-function pop!(q::InvasiveLinkedList{T}) where {T}
+function pop!(q::IntrusiveLinkedList{T}) where {T}
     val = q.tail::T
     list_deletefirst!(q, val) # expensive!
     return val
 end
 
-function popfirst!(q::InvasiveLinkedList{T}) where {T}
+function popfirst!(q::IntrusiveLinkedList{T}) where {T}
     val = q.head::T
     list_deletefirst!(q, val) # cheap
     return val
 end
 
-function list_deletefirst!(q::InvasiveLinkedList{T}, val::T) where T
+# this function assumes `val` is found in `q`
+function list_deletefirst!(q::IntrusiveLinkedList{T}, val::T) where T
     val.queue === q || return
     head = q.head::T
     if head === val
@@ -97,10 +98,10 @@ function list_deletefirst!(q::InvasiveLinkedList{T}, val::T) where T
             q.head = val.next::T
         end
     else
-        head_next = head.next
+        head_next = head.next::T
         while head_next !== val
             head = head_next
-            head_next = head.next::Union{T, Nothing}
+            head_next = head.next::T
         end
         if q.tail::T === val
             head.next = nothing
@@ -124,20 +125,20 @@ end
 mutable struct LinkedListItem{T}
     # Adapter class to use any `T` in a LinkedList
     next::Union{LinkedListItem{T}, Nothing}
-    queue::Union{InvasiveLinkedList{LinkedListItem{T}}, Nothing}
+    queue::Union{IntrusiveLinkedList{LinkedListItem{T}}, Nothing}
     value::T
     LinkedListItem{T}(value::T) where {T} = new{T}(nothing, nothing, value)
 end
-const LinkedList{T} = InvasiveLinkedList{LinkedListItem{T}}
+const LinkedList{T} = IntrusiveLinkedList{LinkedListItem{T}}
 
 # delegate methods, as needed
 eltype(::Type{<:LinkedList{T}}) where {T} = @isdefined(T) ? T : Any
 iterate(q::LinkedList) = (h = q.head; h === nothing ? nothing : (h.value, h))
-iterate(q::InvasiveLinkedList{LLT}, v::LLT) where {LLT<:LinkedListItem} = (h = v.next; h === nothing ? nothing : (h.value, h))
+iterate(q::IntrusiveLinkedList{LLT}, v::LLT) where {LLT<:LinkedListItem} = (h = v.next; h === nothing ? nothing : (h.value, h))
 push!(q::LinkedList{T}, val::T) where {T} = push!(q, LinkedListItem{T}(val))
 pushfirst!(q::LinkedList{T}, val::T) where {T} = pushfirst!(q, LinkedListItem{T}(val))
-pop!(q::LinkedList) = invoke(pop!, Tuple{InvasiveLinkedList,}, q).value
-popfirst!(q::LinkedList) = invoke(popfirst!, Tuple{InvasiveLinkedList,}, q).value
+pop!(q::LinkedList) = invoke(pop!, Tuple{IntrusiveLinkedList,}, q).value
+popfirst!(q::LinkedList) = invoke(popfirst!, Tuple{IntrusiveLinkedList,}, q).value
 function list_deletefirst!(q::LinkedList{T}, val::T) where T
     h = q.head
     while h !== nothing
diff --git a/base/loading.jl b/base/loading.jl
index 84150004e48106..7588aaa3cbc17e 100644
--- a/base/loading.jl
+++ b/base/loading.jl
@@ -1,16 +1,17 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Base.require is the implementation for the `import` statement
+const require_lock = ReentrantLock()
 
 # Cross-platform case-sensitive path canonicalization
 
 if Sys.isunix() && !Sys.isapple()
     # assume case-sensitive filesystems, don't have to do anything
-    isfile_casesensitive(path) = isfile(path)
+    isfile_casesensitive(path) = isaccessiblefile(path)
 elseif Sys.iswindows()
     # GetLongPathName Win32 function returns the case-preserved filename on NTFS.
     function isfile_casesensitive(path)
-        isfile(path) || return false  # Fail fast
+        isaccessiblefile(path) || return false  # Fail fast
         basename(Filesystem.longpath(path)) == basename(path)
     end
 elseif Sys.isapple()
@@ -43,7 +44,7 @@ elseif Sys.isapple()
     # Buffer buf;
     # getattrpath(path, &attr_list, &buf, sizeof(buf), FSOPT_NOFOLLOW);
     function isfile_casesensitive(path)
-        isfile(path) || return false
+        isaccessiblefile(path) || return false
         path_basename = String(basename(path))
         local casepreserved_basename
         header_size = 12
@@ -74,21 +75,50 @@ elseif Sys.isapple()
 else
     # Generic fallback that performs a slow directory listing.
     function isfile_casesensitive(path)
-        isfile(path) || return false
+        isaccessiblefile(path) || return false
         dir, filename = splitdir(path)
         any(readdir(dir) .== filename)
     end
 end
 
+# Check if the file is accessible. If stat fails return `false`
+
+function isaccessibledir(dir)
+    return try
+        isdir(dir)
+    catch err
+        err isa IOError || rethrow()
+        false
+    end
+end
+
+function isaccessiblefile(file)
+    return try
+        isfile(file)
+    catch err
+        err isa IOError || rethrow()
+        false
+    end
+end
+
+function isaccessiblepath(path)
+    return try
+        ispath(path)
+    catch err
+        err isa IOError || rethrow()
+        false
+    end
+end
+
 ## SHA1 ##
 
 struct SHA1
-    bytes::Vector{UInt8}
-    function SHA1(bytes::Vector{UInt8})
-        length(bytes) == 20 ||
-            throw(ArgumentError("wrong number of bytes for SHA1 hash: $(length(bytes))"))
-        return new(bytes)
-    end
+    bytes::NTuple{20, UInt8}
+end
+function SHA1(bytes::Vector{UInt8})
+    length(bytes) == 20 ||
+        throw(ArgumentError("wrong number of bytes for SHA1 hash: $(length(bytes))"))
+    return SHA1(ntuple(i->bytes[i], Val(20)))
 end
 SHA1(s::AbstractString) = SHA1(hex2bytes(s))
 parse(::Type{SHA1}, s::AbstractString) = SHA1(s)
@@ -129,12 +159,23 @@ end
 const ns_dummy_uuid = UUID("fe0723d6-3a44-4c41-8065-ee0f42c8ceab")
 
 function dummy_uuid(project_file::String)
+    @lock require_lock begin
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        uuid = get(cache.dummy_uuid, project_file, nothing)
+        uuid === nothing || return uuid
+    end
     project_path = try
         realpath(project_file)
     catch
         project_file
     end
-    return uuid5(ns_dummy_uuid, project_path)
+    uuid = uuid5(ns_dummy_uuid, project_path)
+    if cache !== nothing
+        cache.dummy_uuid[project_file] = uuid
+    end
+    return uuid
+    end
 end
 
 ## package path slugs: turning UUID + SHA1 into a pair of 4-byte "slugs" ##
@@ -163,16 +204,92 @@ function version_slug(uuid::UUID, sha1::SHA1, p::Int=5)
     return slug(crc, p)
 end
 
-struct TOMLCache
-    p::TOML.Parser
-    d::Dict{String, Dict{String, Any}}
+mutable struct CachedTOMLDict
+    path::String
+    inode::UInt64
+    mtime::Float64
+    size::Int64
+    hash::UInt32
+    d::Dict{String, Any}
+end
+
+function CachedTOMLDict(p::TOML.Parser, path::String)
+    s = stat(path)
+    content = read(path)
+    crc32 = _crc32c(content)
+    TOML.reinit!(p, String(content); filepath=path)
+    d = TOML.parse(p)
+    return CachedTOMLDict(
+        path,
+        s.inode,
+        s.mtime,
+        s.size,
+        crc32,
+        d,
+   )
+end
+
+function get_updated_dict(p::TOML.Parser, f::CachedTOMLDict)
+    s = stat(f.path)
+    time_since_cached = time() - f.mtime
+    rough_mtime_granularity = 0.1 # seconds
+    # In case the file is being updated faster than the mtime granularity,
+    # and have the same size after the update we might miss that it changed. Therefore
+    # always check the hash in case we recently created the cache.
+    if time_since_cached < rough_mtime_granularity || s.inode != f.inode || s.mtime != f.mtime || f.size != s.size
+        content = read(f.path)
+        new_hash = _crc32c(content)
+        if new_hash != f.hash
+            f.inode = s.inode
+            f.mtime = s.mtime
+            f.size = s.size
+            f.hash = new_hash
+            TOML.reinit!(p, String(content); filepath=f.path)
+            return f.d = TOML.parse(p)
+        end
+    end
+    return f.d
+end
+
+struct LoadingCache
+    load_path::Vector{String}
+    dummy_uuid::Dict{String, UUID}
+    env_project_file::Dict{String, Union{Bool, String}}
+    project_file_manifest_path::Dict{String, Union{Nothing, String}}
+    require_parsed::Set{String}
 end
-TOMLCache() = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
+const LOADING_CACHE = Ref{Union{LoadingCache, Nothing}}(nothing)
+LoadingCache() = LoadingCache(load_path(), Dict(), Dict(), Dict(), Set())
 
-function parsed_toml(cache::TOMLCache, project_file::String)
-    get!(cache.d, project_file) do
-        TOML.reinit!(cache.p, read(project_file, String); filepath=project_file)
-        TOML.parse(cache.p)
+
+struct TOMLCache
+    p::TOML.Parser
+    d::Dict{String, CachedTOMLDict}
+end
+const TOML_CACHE = TOMLCache(TOML.Parser(), Dict{String, Dict{String, Any}}())
+
+parsed_toml(project_file::AbstractString) = parsed_toml(project_file, TOML_CACHE, require_lock)
+function parsed_toml(project_file::AbstractString, toml_cache::TOMLCache, toml_lock::ReentrantLock)
+    lock(toml_lock) do
+        cache = LOADING_CACHE[]
+        dd = if !haskey(toml_cache.d, project_file)
+            d = CachedTOMLDict(toml_cache.p, project_file)
+            toml_cache.d[project_file] = d
+            d.d
+        else
+            d = toml_cache.d[project_file]
+            # We are in a require call and have already parsed this TOML file
+            # assume that it is unchanged to avoid hitting disk
+            if cache !== nothing && project_file in cache.require_parsed
+                d.d
+            else
+                get_updated_dict(toml_cache.p, d)
+            end
+        end
+        if cache !== nothing
+            push!(cache.require_parsed, project_file)
+        end
+        return dd
     end
 end
 
@@ -180,22 +297,21 @@ end
 
 # Used by Pkg but not used in loading itself
 function find_package(arg)
-    cache = TOMLCache()
-    pkg = identify_package(arg, cache)
+    pkg = identify_package(arg)
     pkg === nothing && return nothing
-    return locate_package(pkg, cache)
+    return locate_package(pkg)
 end
 
 ## package identity: given a package name and a context, try to return its identity ##
-identify_package(where::Module, name::String, cache::TOMLCache = TOMLCache()) = identify_package(PkgId(where), name, cache)
+identify_package(where::Module, name::String) = identify_package(PkgId(where), name)
 
 # identify_package computes the PkgId for `name` from the context of `where`
 # or return `nothing` if no mapping exists for it
-function identify_package(where::PkgId, name::String, cache::TOMLCache=TOMLCache())::Union{Nothing,PkgId}
+function identify_package(where::PkgId, name::String)::Union{Nothing,PkgId}
     where.name === name && return where
-    where.uuid === nothing && return identify_package(name, cache) # ignore `where`
+    where.uuid === nothing && return identify_package(name) # ignore `where`
     for env in load_path()
-        uuid = manifest_deps_get(env, where, name, cache)
+        uuid = manifest_deps_get(env, where, name)
         uuid === nothing && continue # not found--keep looking
         uuid.uuid === nothing || return uuid # found in explicit environment--use it
         return nothing # found in implicit environment--return "not found"
@@ -205,35 +321,39 @@ end
 
 # identify_package computes the PkgId for `name` from toplevel context
 # by looking through the Project.toml files and directories
-function identify_package(name::String, cache::TOMLCache=TOMLCache())::Union{Nothing,PkgId}
+function identify_package(name::String)::Union{Nothing,PkgId}
     for env in load_path()
-        uuid = project_deps_get(env, name, cache)
+        uuid = project_deps_get(env, name)
         uuid === nothing || return uuid # found--return it
     end
     return nothing
 end
 
 ## package location: given a package identity, find file to load ##
-function locate_package(pkg::PkgId, cache::TOMLCache=TOMLCache())::Union{Nothing,String}
+function locate_package(pkg::PkgId)::Union{Nothing,String}
     if pkg.uuid === nothing
         for env in load_path()
             # look for the toplevel pkg `pkg.name` in this entry
-            found = project_deps_get(env, pkg.name, cache)
+            found = project_deps_get(env, pkg.name)
             found === nothing && continue
             if pkg == found
                 # pkg.name is present in this directory or project file,
                 # return the path the entry point for the code, if it could be found
                 # otherwise, signal failure
-                return implicit_manifest_uuid_path(env, pkg, cache)
+                return implicit_manifest_uuid_path(env, pkg)
             end
             @assert found.uuid !== nothing
-            return locate_package(found, cache) # restart search now that we know the uuid for pkg
+            return locate_package(found) # restart search now that we know the uuid for pkg
         end
     else
         for env in load_path()
-            path = manifest_uuid_path(env, pkg, cache)
+            path = manifest_uuid_path(env, pkg)
             path === nothing || return entry_path(path, pkg.name)
         end
+        # Allow loading of stdlibs if the name/uuid are given
+        # e.g. if they have been explicitly added to the project/manifest
+        path = manifest_uuid_path(Sys.STDLIB, pkg)
+        path === nothing || return entry_path(path, pkg.name)
     end
     return nothing
 end
@@ -248,125 +368,179 @@ Use [`dirname`](@ref) to get the directory part and [`basename`](@ref)
 to get the file name part of the path.
 """
 function pathof(m::Module)
-    pkgid = get(Base.module_keys, m, nothing)
+    @lock require_lock begin
+    pkgid = get(module_keys, m, nothing)
     pkgid === nothing && return nothing
-    origin = get(Base.pkgorigins, pkgid, nothing)
+    origin = get(pkgorigins, pkgid, nothing)
     origin === nothing && return nothing
-    return origin.path
+    path = origin.path
+    path === nothing && return nothing
+    return fixup_stdlib_path(path)
+    end
 end
 
 """
-    pkgdir(m::Module)
+    pkgdir(m::Module[, paths::String...])
+
+Return the root directory of the package that imported module `m`,
+or `nothing` if `m` was not imported from a package. Optionally further
+path component strings can be provided to construct a path within the
+package root.
 
- Return the root directory of the package that imported module `m`,
- or `nothing` if `m` was not imported from a package.
- """
-function pkgdir(m::Module)
-    rootmodule = Base.moduleroot(m)
+```julia-repl
+julia> pkgdir(Foo)
+"/path/to/Foo.jl"
+
+julia> pkgdir(Foo, "src", "file.jl")
+"/path/to/Foo.jl/src/file.jl"
+```
+
+!!! compat "Julia 1.7"
+    The optional argument `paths` requires at least Julia 1.7.
+"""
+function pkgdir(m::Module, paths::String...)
+    rootmodule = moduleroot(m)
     path = pathof(rootmodule)
     path === nothing && return nothing
-    return dirname(dirname(path))
+    return joinpath(dirname(dirname(path)), paths...)
 end
 
 ## generic project & manifest API ##
 
 const project_names = ("JuliaProject.toml", "Project.toml")
 const manifest_names = ("JuliaManifest.toml", "Manifest.toml")
+const preferences_names = ("JuliaLocalPreferences.toml", "LocalPreferences.toml")
+
+function locate_project_file(env::String)
+    for proj in project_names
+        project_file = joinpath(env, proj)
+        if isfile_casesensitive(project_file)
+            return project_file
+        end
+    end
+    return true
+end
 
 # classify the LOAD_PATH entry to be one of:
 #  - `false`: nonexistant / nothing to see here
 #  - `true`: `env` is an implicit environment
 #  - `path`: the path of an explicit project file
 function env_project_file(env::String)::Union{Bool,String}
+    @lock require_lock begin
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        project_file = get(cache.env_project_file, env, nothing)
+        project_file === nothing || return project_file
+    end
     if isdir(env)
-        for proj in project_names
-            project_file = joinpath(env, proj)
-            isfile_casesensitive(project_file) && return project_file
-        end
-        return true
+        project_file = locate_project_file(env)
     elseif basename(env) in project_names && isfile_casesensitive(env)
-        return env
+        project_file = env
+    else
+        project_file = false
+    end
+    if cache !== nothing
+        cache.env_project_file[env] = project_file
+    end
+    return project_file
     end
-    return false
 end
 
-function project_deps_get(env::String, name::String, cache::TOMLCache)::Union{Nothing,PkgId}
+function project_deps_get(env::String, name::String)::Union{Nothing,PkgId}
     project_file = env_project_file(env)
     if project_file isa String
-        pkg_uuid = explicit_project_deps_get(project_file, name, cache)
+        pkg_uuid = explicit_project_deps_get(project_file, name)
         pkg_uuid === nothing || return PkgId(pkg_uuid, name)
     elseif project_file
-        return implicit_project_deps_get(env, name, cache)
+        return implicit_project_deps_get(env, name)
     end
     return nothing
 end
 
-function manifest_deps_get(env::String, where::PkgId, name::String, cache::TOMLCache)::Union{Nothing,PkgId}
-    @assert where.uuid !== nothing
+function manifest_deps_get(env::String, where::PkgId, name::String)::Union{Nothing,PkgId}
+    uuid = where.uuid
+    @assert uuid !== nothing
     project_file = env_project_file(env)
     if project_file isa String
         # first check if `where` names the Project itself
-        proj = project_file_name_uuid(project_file, where.name, cache)
+        proj = project_file_name_uuid(project_file, where.name)
         if proj == where
             # if `where` matches the project, use [deps] section as manifest, and stop searching
-            pkg_uuid = explicit_project_deps_get(project_file, name, cache)
+            pkg_uuid = explicit_project_deps_get(project_file, name)
             return PkgId(pkg_uuid, name)
         end
         # look for manifest file and `where` stanza
-        return explicit_manifest_deps_get(project_file, where.uuid, name, cache)
+        return explicit_manifest_deps_get(project_file, uuid, name)
     elseif project_file
         # if env names a directory, search it
-        return implicit_manifest_deps_get(env, where, name, cache)
+        return implicit_manifest_deps_get(env, where, name)
     end
     return nothing
 end
 
-function manifest_uuid_path(env::String, pkg::PkgId, cache::TOMLCache)::Union{Nothing,String}
+function manifest_uuid_path(env::String, pkg::PkgId)::Union{Nothing,String}
     project_file = env_project_file(env)
     if project_file isa String
-        proj = project_file_name_uuid(project_file, pkg.name, cache)
+        proj = project_file_name_uuid(project_file, pkg.name)
         if proj == pkg
             # if `pkg` matches the project, return the project itself
-            return project_file_path(project_file, pkg.name, cache)
+            return project_file_path(project_file, pkg.name)
         end
         # look for manifest file and `where` stanza
-        return explicit_manifest_uuid_path(project_file, pkg, cache)
+        return explicit_manifest_uuid_path(project_file, pkg)
     elseif project_file
         # if env names a directory, search it
-        return implicit_manifest_uuid_path(env, pkg, cache)
+        return implicit_manifest_uuid_path(env, pkg)
     end
     return nothing
 end
 
 # find project file's top-level UUID entry (or nothing)
-function project_file_name_uuid(project_file::String, name::String, cache::TOMLCache)::PkgId
-    uuid = dummy_uuid(project_file)
-    d = parsed_toml(cache, project_file)
+function project_file_name_uuid(project_file::String, name::String)::PkgId
+    d = parsed_toml(project_file)
     uuid′ = get(d, "uuid", nothing)::Union{String, Nothing}
-    uuid′ === nothing || (uuid = UUID(uuid′))
+    uuid = uuid′ === nothing ? dummy_uuid(project_file) : UUID(uuid′)
     name = get(d, "name", name)::String
     return PkgId(uuid, name)
 end
 
-function project_file_path(project_file::String, name::String, cache)
-    d = parsed_toml(cache, project_file)
+function project_file_path(project_file::String, name::String)
+    d = parsed_toml(project_file)
     joinpath(dirname(project_file), get(d, "path", "")::String)
 end
 
 # find project file's corresponding manifest file
-function project_file_manifest_path(project_file::String, cache::TOMLCache)::Union{Nothing,String}
+function project_file_manifest_path(project_file::String)::Union{Nothing,String}
+    @lock require_lock begin
+    cache = LOADING_CACHE[]
+    if cache !== nothing
+        manifest_path = get(cache.project_file_manifest_path, project_file, missing)
+        manifest_path === missing || return manifest_path
+    end
     dir = abspath(dirname(project_file))
-    d = parsed_toml(cache, project_file)
+    d = parsed_toml(project_file)
     explicit_manifest = get(d, "manifest", nothing)::Union{String, Nothing}
+    manifest_path = nothing
     if explicit_manifest !== nothing
         manifest_file = normpath(joinpath(dir, explicit_manifest))
-        isfile_casesensitive(manifest_file) && return manifest_file
+        if isfile_casesensitive(manifest_file)
+            manifest_path = manifest_file
+        end
+    end
+    if manifest_path === nothing
+        for mfst in manifest_names
+            manifest_file = joinpath(dir, mfst)
+            if isfile_casesensitive(manifest_file)
+                manifest_path = manifest_file
+                break
+            end
+        end
     end
-    for mfst in manifest_names
-        manifest_file = joinpath(dir, mfst)
-        isfile_casesensitive(manifest_file) && return manifest_file
+    if cache !== nothing
+        cache.project_file_manifest_path[project_file] = manifest_path
+    end
+    return manifest_path
     end
-    return nothing
 end
 
 # given a directory (implicit env from LOAD_PATH) and a name,
@@ -408,8 +582,8 @@ end
 
 # find project file root or deps `name => uuid` mapping
 # return `nothing` if `name` is not found
-function explicit_project_deps_get(project_file::String, name::String, cache::TOMLCache)::Union{Nothing,UUID}
-    d = parsed_toml(cache, project_file)
+function explicit_project_deps_get(project_file::String, name::String)::Union{Nothing,UUID}
+    d = parsed_toml(project_file)
     root_uuid = dummy_uuid(project_file)
     if get(d, "name", nothing)::Union{String, Nothing} === name
         uuid = get(d, "uuid", nothing)::Union{String, Nothing}
@@ -423,18 +597,41 @@ function explicit_project_deps_get(project_file::String, name::String, cache::TO
     return nothing
 end
 
+function is_v1_format_manifest(raw_manifest::Dict)
+    if haskey(raw_manifest, "manifest_format")
+        mf = raw_manifest["manifest_format"]
+        if mf isa Dict && haskey(mf, "uuid")
+            # the off-chance where an old format manifest has a dep called "manifest_format"
+            return true
+        end
+        return false
+    else
+        return true
+    end
+end
+
+# returns a deps list for both old and new manifest formats
+function get_deps(raw_manifest::Dict)
+    if is_v1_format_manifest(raw_manifest)
+        return raw_manifest
+    else
+        # if the manifest has no deps, there won't be a `deps` field
+        return get(Dict{String, Any}, raw_manifest, "deps")::Dict{String, Any}
+    end
+end
+
 # find `where` stanza and return the PkgId for `name`
 # return `nothing` if it did not find `where` (indicating caller should continue searching)
-function explicit_manifest_deps_get(project_file::String, where::UUID, name::String, cache::TOMLCache)::Union{Nothing,PkgId}
-    manifest_file = project_file_manifest_path(project_file, cache)
+function explicit_manifest_deps_get(project_file::String, where::UUID, name::String)::Union{Nothing,PkgId}
+    manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # manifest not found--keep searching LOAD_PATH
-    d = parsed_toml(cache, manifest_file)
+    d = get_deps(parsed_toml(manifest_file))
     found_where = false
     found_name = false
     for (dep_name, entries) in d
         entries::Vector{Any}
         for entry in entries
-            entry::Dict{String, Any}
+            entry = entry::Dict{String, Any}
             uuid = get(entry, "uuid", nothing)::Union{String, Nothing}
             uuid === nothing && continue
             if UUID(uuid) === where
@@ -447,7 +644,7 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str
                     found_name = name in deps
                     break
                 else
-                    deps::Dict{String, Any}
+                    deps = deps::Dict{String, Any}
                     for (dep, uuid) in deps
                         uuid::String
                         if dep === name
@@ -472,15 +669,15 @@ function explicit_manifest_deps_get(project_file::String, where::UUID, name::Str
 end
 
 # find `uuid` stanza, return the corresponding path
-function explicit_manifest_uuid_path(project_file::String, pkg::PkgId, cache::TOMLCache)::Union{Nothing,String}
-    manifest_file = project_file_manifest_path(project_file, cache)
+function explicit_manifest_uuid_path(project_file::String, pkg::PkgId)::Union{Nothing,String}
+    manifest_file = project_file_manifest_path(project_file)
     manifest_file === nothing && return nothing # no manifest, skip env
 
-    d = parsed_toml(cache, manifest_file)
+    d = get_deps(parsed_toml(manifest_file))
     entries = get(d, pkg.name, nothing)::Union{Nothing, Vector{Any}}
     entries === nothing && return nothing # TODO: allow name to mismatch?
     for entry in entries
-        entry::Dict{String, Any}
+        entry = entry::Dict{String, Any}
         uuid = get(entry, "uuid", nothing)::Union{Nothing, String}
         uuid === nothing && continue
         if UUID(uuid) === pkg.uuid
@@ -500,10 +697,11 @@ function explicit_manifest_entry_path(manifest_file::String, pkg::PkgId, entry::
     hash === nothing && return nothing
     hash = SHA1(hash)
     # Keep the 4 since it used to be the default
-    for slug in (version_slug(pkg.uuid, hash, 4), version_slug(pkg.uuid, hash))
+    uuid = pkg.uuid::UUID # checked within `explicit_manifest_uuid_path`
+    for slug in (version_slug(uuid, hash), version_slug(uuid, hash, 4))
         for depot in DEPOT_PATH
-            path = abspath(depot, "packages", pkg.name, slug)
-            ispath(path) && return path
+            path = joinpath(depot, "packages", pkg.name, slug)
+            ispath(path) && return abspath(path)
         end
     end
     return nothing
@@ -513,13 +711,13 @@ end
 
 # look for an entry point for `name` from a top-level package (no environment)
 # otherwise return `nothing` to indicate the caller should keep searching
-function implicit_project_deps_get(dir::String, name::String, cache::TOMLCache)::Union{Nothing,PkgId}
+function implicit_project_deps_get(dir::String, name::String)::Union{Nothing,PkgId}
     path, project_file = entry_point_and_project_file(dir, name)
     if project_file === nothing
         path === nothing && return nothing
         return PkgId(name)
     end
-    proj = project_file_name_uuid(project_file, name, cache)
+    proj = project_file_name_uuid(project_file, name)
     proj.name == name || return nothing
     return proj
 end
@@ -527,25 +725,25 @@ end
 # look for an entry-point for `name`, check that UUID matches
 # if there's a project file, look up `name` in its deps and return that
 # otherwise return `nothing` to indicate the caller should keep searching
-function implicit_manifest_deps_get(dir::String, where::PkgId, name::String, cache::TOMLCache)::Union{Nothing,PkgId}
+function implicit_manifest_deps_get(dir::String, where::PkgId, name::String)::Union{Nothing,PkgId}
     @assert where.uuid !== nothing
     project_file = entry_point_and_project_file(dir, where.name)[2]
     project_file === nothing && return nothing # a project file is mandatory for a package with a uuid
-    proj = project_file_name_uuid(project_file, where.name, cache)
+    proj = project_file_name_uuid(project_file, where.name)
     proj == where || return nothing # verify that this is the correct project file
     # this is the correct project, so stop searching here
-    pkg_uuid = explicit_project_deps_get(project_file, name, cache)
+    pkg_uuid = explicit_project_deps_get(project_file, name)
     return PkgId(pkg_uuid, name)
 end
 
 # look for an entry-point for `pkg` and return its path if UUID matches
-function implicit_manifest_uuid_path(dir::String, pkg::PkgId, cache::TOMLCache)::Union{Nothing,String}
+function implicit_manifest_uuid_path(dir::String, pkg::PkgId)::Union{Nothing,String}
     path, project_file = entry_point_and_project_file(dir, pkg.name)
     if project_file === nothing
         pkg.uuid === nothing || return nothing
         return path
     end
-    proj = project_file_name_uuid(project_file, pkg.name, cache)
+    proj = project_file_name_uuid(project_file, pkg.name)
     proj == pkg || return nothing
     return path
 end
@@ -554,7 +752,7 @@ end
 
 function find_source_file(path::AbstractString)
     (isabspath(path) || isfile(path)) && return path
-    base_path = joinpath(Sys.BINDIR::String, DATAROOTDIR, "julia", "base", path)
+    base_path = joinpath(Sys.BINDIR, DATAROOTDIR, "julia", "base", path)
     return isfile(base_path) ? normpath(base_path) : nothing
 end
 
@@ -569,7 +767,7 @@ function find_all_in_cache_path(pkg::PkgId)
     entrypath, entryfile = cache_file_entry(pkg)
     for path in joinpath.(DEPOT_PATH, entrypath)
         isdir(path) || continue
-        for file in readdir(path)
+        for file in readdir(path, sort = false) # no sort given we sort later
             if !((pkg.uuid === nothing && file == entryfile * ".ji") ||
                  (pkg.uuid !== nothing && startswith(file, entryfile * "_")))
                  continue
@@ -578,34 +776,69 @@ function find_all_in_cache_path(pkg::PkgId)
             isfile_casesensitive(filepath) && push!(paths, filepath)
         end
     end
-    return paths
+    if length(paths) > 1
+        # allocating the sort vector is less expensive than using sort!(.. by=mtime), which would
+        # call the relatively slow mtime multiple times per path
+        p = sortperm(mtime.(paths), rev = true)
+        return paths[p]
+    else
+        return paths
+    end
 end
 
 # these return either the array of modules loaded from the path / content given
 # or an Exception that describes why it couldn't be loaded
 # and it reconnects the Base.Docs.META
-function _include_from_serialized(path::String, depmods::Vector{Any})
+function _include_from_serialized(pkg::PkgId, path::String, depmods::Vector{Any})
     sv = ccall(:jl_restore_incremental, Any, (Cstring, Any), path, depmods)
     if isa(sv, Exception)
         return sv
     end
-    restored = sv[1]
-    if !isa(restored, Exception)
-        for M in restored::Vector{Any}
-            M = M::Module
-            if isdefined(M, Base.Docs.META)
-                push!(Base.Docs.modules, M)
-            end
-            if parentmodule(M) === M
-                register_root_module(M)
-            end
+    sv = sv::SimpleVector
+    restored = sv[1]::Vector{Any}
+    for M in restored
+        M = M::Module
+        if isdefined(M, Base.Docs.META) && getfield(M, Base.Docs.META) !== nothing
+            push!(Base.Docs.modules, M)
+        end
+        if parentmodule(M) === M
+            register_root_module(M)
+        end
+    end
+
+    # Register this cache path now - If Requires.jl is loaded, Revise may end
+    # up looking at the cache path during the init callback.
+    get!(PkgOrigin, pkgorigins, pkg).cachepath = path
+
+    inits = sv[2]::Vector{Any}
+    if !isempty(inits)
+        unlock(require_lock) # temporarily _unlock_ during these callbacks
+        try
+            ccall(:jl_init_restored_modules, Cvoid, (Any,), inits)
+        finally
+            lock(require_lock)
         end
     end
-    isassigned(sv, 2) && ccall(:jl_init_restored_modules, Cvoid, (Any,), sv[2])
     return restored
 end
 
-function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String}, cache::TOMLCache)
+function run_package_callbacks(modkey::PkgId)
+    unlock(require_lock)
+    try
+        for callback in package_callbacks
+            invokelatest(callback, modkey)
+        end
+    catch
+        # Try to continue loading if a callback errors
+        errs = current_exceptions()
+        @error "Error during package callback" exception=errs
+    finally
+        lock(require_lock)
+    end
+    nothing
+end
+
+function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::Union{Nothing, String}, depth::Int = 0)
     if root_module_exists(modkey)
         M = root_module(modkey)
         if PkgId(M) == modkey && module_build_id(M) === build_id
@@ -613,14 +846,13 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::U
         end
     else
         if modpath === nothing
-            modpath = locate_package(modkey, cache)
+            modpath = locate_package(modkey)
             modpath === nothing && return nothing
         end
-        mod = _require_search_from_serialized(modkey, String(modpath), cache)
+        mod = _require_search_from_serialized(modkey, String(modpath), depth)
+        get!(PkgOrigin, pkgorigins, modkey).path = modpath
         if !isa(mod, Bool)
-            for callback in package_callbacks
-                invokelatest(callback, modkey)
-            end
+            run_package_callbacks(modkey)
             for M in mod::Vector{Any}
                 M = M::Module
                 if PkgId(M) == modkey && module_build_id(M) === build_id
@@ -632,7 +864,7 @@ function _tryrequire_from_serialized(modkey::PkgId, build_id::UInt64, modpath::U
     return nothing
 end
 
-function _require_from_serialized(path::String, cache::TOMLCache)
+function _require_from_serialized(pkg::PkgId, path::String)
     # loads a precompile cache file, ignoring stale_cachfile tests
     # load all of the dependent modules first
     local depmodnames
@@ -648,24 +880,35 @@ function _require_from_serialized(path::String, cache::TOMLCache)
     depmods = Vector{Any}(undef, ndeps)
     for i in 1:ndeps
         modkey, build_id = depmodnames[i]
-        dep = _tryrequire_from_serialized(modkey, build_id, nothing, cache)
+        dep = _tryrequire_from_serialized(modkey, build_id, nothing)
         dep === nothing && return ErrorException("Required dependency $modkey failed to load from a cache file.")
         depmods[i] = dep::Module
     end
     # then load the file
-    return _include_from_serialized(path, depmods)
+    return _include_from_serialized(pkg, path, depmods)
 end
 
+# use an Int counter so that nested @time_imports calls all remain open
+const TIMING_IMPORTS = Threads.Atomic{Int}(0)
+
 # returns `true` if require found a precompile cache for this sourcepath, but couldn't load it
 # returns `false` if the module isn't known to be precompilable
 # returns the set of modules restored if the cache load succeeded
-function _require_search_from_serialized(pkg::PkgId, sourcepath::String, cache::TOMLCache)
+@constprop :none function _require_search_from_serialized(pkg::PkgId, sourcepath::String, depth::Int = 0)
+    timing_imports = TIMING_IMPORTS[] > 0
+    try
+    if timing_imports
+        t_before = time_ns()
+        cumulative_compile_timing(true)
+        t_comp_before = cumulative_compile_time_ns()
+    end
     paths = find_all_in_cache_path(pkg)
     for path_to_try in paths::Vector{String}
-        staledeps = stale_cachefile(sourcepath, path_to_try, cache)
+        staledeps = stale_cachefile(sourcepath, path_to_try)
         if staledeps === true
             continue
         end
+        staledeps = staledeps::Vector{Any}
         try
             touch(path_to_try) # update timestamp of precompilation file
         catch # file might be read-only and then we fail to update timestamp, which is fine
@@ -675,7 +918,7 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String, cache::
             dep = staledeps[i]
             dep isa Module && continue
             modpath, modkey, build_id = dep::Tuple{String, PkgId, UInt64}
-            dep = _tryrequire_from_serialized(modkey, build_id, modpath, cache)
+            dep = _tryrequire_from_serialized(modkey, build_id, modpath, depth + 1)
             if dep === nothing
                 @debug "Required dependency $modkey failed to load from cache file for $modpath."
                 staledeps = true
@@ -686,18 +929,37 @@ function _require_search_from_serialized(pkg::PkgId, sourcepath::String, cache::
         if staledeps === true
             continue
         end
-        restored = _include_from_serialized(path_to_try, staledeps)
+        restored = _include_from_serialized(pkg, path_to_try, staledeps)
         if isa(restored, Exception)
             @debug "Deserialization checks failed while attempting to load cache from $path_to_try" exception=restored
         else
+            if timing_imports
+                elapsed = round((time_ns() - t_before) / 1e6, digits = 1)
+                comp_time, recomp_time = cumulative_compile_time_ns() .- t_comp_before
+                tree_prefix = depth == 0 ? "" : "  "^(depth-1)*"┌ "
+                print(lpad(elapsed, 9), " ms  ")
+                printstyled(tree_prefix, color = :light_black)
+                print(pkg.name)
+                if comp_time > 0
+                    printstyled(" ", Ryu.writefixed(Float64(100 * comp_time / (elapsed * 1e6)), 2), "% compilation time", color = Base.info_color())
+                end
+                if recomp_time > 0
+                    perc = Float64(100 * recomp_time / comp_time)
+                    printstyled(" (", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% recompilation)", color = Base.warn_color())
+                end
+                println()
+            end
             return restored
         end
     end
     return !isempty(paths)
+    finally
+        timing_imports && cumulative_compile_timing(false)
+    end
 end
 
 # to synchronize multiple tasks trying to import/using something
-const package_locks = Dict{PkgId,Condition}()
+const package_locks = Dict{PkgId,Threads.Condition}()
 
 # to notify downstream consumers that a module was successfully loaded
 # Callbacks take the form (mod::Base.PkgId) -> nothing.
@@ -720,7 +982,9 @@ function _include_dependency(mod::Module, _path::AbstractString)
         path = normpath(joinpath(dirname(prev), _path))
     end
     if _track_dependencies[]
+        @lock require_lock begin
         push!(_require_dependencies, (mod, path, mtime(path)))
+        end
     end
     return path, prev
 end
@@ -767,9 +1031,6 @@ end
 # require always works in Main scope and loads files from node 1
 const toplevel_load = Ref(true)
 
-const full_warning_showed = Ref(false)
-const modules_warned_for = Set{PkgId}()
-
 """
     require(into::Module, module::Symbol)
 
@@ -792,63 +1053,70 @@ For more details regarding code loading, see the manual sections on [modules](@r
 [parallel computing](@ref code-availability).
 """
 function require(into::Module, mod::Symbol)
-    cache = TOMLCache()
-    uuidkey = identify_package(into, String(mod), cache)
-    # Core.println("require($(PkgId(into)), $mod) -> $uuidkey")
-    if uuidkey === nothing
-        where = PkgId(into)
-        if where.uuid === nothing
-            throw(ArgumentError("""
-                Package $mod not found in current path:
-                - Run `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package.
-                """))
-        else
-            s = """
-            Package $(where.name) does not have $mod in its dependencies:
-            - If you have $(where.name) checked out for development and have
-              added $mod as a dependency but haven't updated your primary
-              environment's manifest file, try `Pkg.resolve()`.
-            - Otherwise you may need to report an issue with $(where.name)"""
-
-            uuidkey = identify_package(PkgId(string(into)), String(mod))
-            uuidkey === nothing && throw(ArgumentError(s))
-
-            # fall back to toplevel loading with a warning
-            if !(where in modules_warned_for)
-                @warn string(
-                    full_warning_showed[] ? "" : s, "\n",
-                    string("Loading $(mod) into $(where.name) from project dependency, ",
-                           "future warnings for $(where.name) are suppressed.")
-                ) _module = nothing _file = nothing _group = nothing
-                push!(modules_warned_for, where)
+    @lock require_lock begin
+    LOADING_CACHE[] = LoadingCache()
+    try
+        uuidkey = identify_package(into, String(mod))
+        # Core.println("require($(PkgId(into)), $mod) -> $uuidkey")
+        if uuidkey === nothing
+            where = PkgId(into)
+            if where.uuid === nothing
+                hint, dots = begin
+                    if isdefined(into, mod) && getfield(into, mod) isa Module
+                        true, "."
+                    elseif isdefined(parentmodule(into), mod) && getfield(parentmodule(into), mod) isa Module
+                        true, ".."
+                    else
+                        false, ""
+                    end
+                end
+                hint_message = hint ? ", maybe you meant `import/using $(dots)$(mod)`" : ""
+                start_sentence = hint ? "Otherwise, run" : "Run"
+                throw(ArgumentError("""
+                    Package $mod not found in current path$hint_message.
+                    - $start_sentence `import Pkg; Pkg.add($(repr(String(mod))))` to install the $mod package."""))
+            else
+                throw(ArgumentError("""
+                Package $(where.name) does not have $mod in its dependencies:
+                - You may have a partially installed environment. Try `Pkg.instantiate()`
+                  to ensure all packages in the environment are installed.
+                - Or, if you have $(where.name) checked out for development and have
+                  added $mod as a dependency but haven't updated your primary
+                  environment's manifest file, try `Pkg.resolve()`.
+                - Otherwise you may need to report an issue with $(where.name)"""))
             end
-            full_warning_showed[] = true
         end
+        if _track_dependencies[]
+            push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
+        end
+        return _require_prelocked(uuidkey)
+    finally
+        LOADING_CACHE[] = nothing
     end
-    if _track_dependencies[]
-        push!(_require_dependencies, (into, binpack(uuidkey), 0.0))
     end
-    return require(uuidkey, cache)
 end
 
 mutable struct PkgOrigin
-    # version::VersionNumber
     path::Union{String,Nothing}
     cachepath::Union{String,Nothing}
+    version::Union{VersionNumber,Nothing}
 end
-PkgOrigin() = PkgOrigin(nothing, nothing)
+PkgOrigin() = PkgOrigin(nothing, nothing, nothing)
 const pkgorigins = Dict{PkgId,PkgOrigin}()
 
-function require(uuidkey::PkgId, cache::TOMLCache=TOMLCache())
+require(uuidkey::PkgId) = @lock require_lock _require_prelocked(uuidkey)
+
+function _require_prelocked(uuidkey::PkgId)
+    just_loaded_pkg = false
     if !root_module_exists(uuidkey)
-        cachefile = _require(uuidkey, cache)
-        if cachefile !== nothing
-            get!(PkgOrigin, pkgorigins, uuidkey).cachepath = cachefile
-        end
+        _require(uuidkey)
         # After successfully loading, notify downstream consumers
-        for callback in package_callbacks
-            invokelatest(callback, uuidkey)
-        end
+        run_package_callbacks(uuidkey)
+        just_loaded_pkg = true
+    end
+    if just_loaded_pkg && !root_module_exists(uuidkey)
+        error("package `$(uuidkey.name)` did not define the expected \
+              module `$(uuidkey.name)`, check for typos in package module name")
     end
     return root_module(uuidkey)
 end
@@ -856,10 +1124,13 @@ end
 const loaded_modules = Dict{PkgId,Module}()
 const module_keys = IdDict{Module,PkgId}() # the reverse
 
-is_root_module(m::Module) = haskey(module_keys, m)
-root_module_key(m::Module) = module_keys[m]
+is_root_module(m::Module) = @lock require_lock haskey(module_keys, m)
+root_module_key(m::Module) = @lock require_lock module_keys[m]
 
-function register_root_module(m::Module)
+@constprop :none function register_root_module(m::Module)
+    # n.b. This is called from C after creating a new module in `Base.__toplevel__`,
+    # instead of adding them to the binding table there.
+    @lock require_lock begin
     key = PkgId(m, String(nameof(m)))
     if haskey(loaded_modules, key)
         oldm = loaded_modules[key]
@@ -869,6 +1140,7 @@ function register_root_module(m::Module)
     end
     loaded_modules[key] = m
     module_keys[m] = key
+    end
     nothing
 end
 
@@ -884,12 +1156,13 @@ using Base
 end
 
 # get a top-level Module from the given key
-root_module(key::PkgId) = loaded_modules[key]
+root_module(key::PkgId) = @lock require_lock loaded_modules[key]
 root_module(where::Module, name::Symbol) =
     root_module(identify_package(where, String(name)))
+maybe_root_module(key::PkgId) = @lock require_lock get(loaded_modules, key, nothing)
 
-root_module_exists(key::PkgId) = haskey(loaded_modules, key)
-loaded_modules_array() = collect(values(loaded_modules))
+root_module_exists(key::PkgId) = @lock require_lock haskey(loaded_modules, key)
+loaded_modules_array() = @lock require_lock collect(values(loaded_modules))
 
 function unreference_module(key::PkgId)
     if haskey(loaded_modules, key)
@@ -899,8 +1172,23 @@ function unreference_module(key::PkgId)
     end
 end
 
+function set_pkgorigin_version_path(pkg, path)
+    pkgorigin = get!(PkgOrigin, pkgorigins, pkg)
+    if path !== nothing
+        project_file = locate_project_file(joinpath(dirname(path), ".."))
+        if project_file isa String
+            d = parsed_toml(project_file)
+            v = get(d, "version", nothing)
+            if v !== nothing
+                pkgorigin.version = VersionNumber(v::AbstractString)
+            end
+        end
+    end
+    pkgorigin.path = path
+end
+
 # Returns `nothing` or the name of the newly-created cachefile
-function _require(pkg::PkgId, cache::TOMLCache)
+function _require(pkg::PkgId)
     # handle recursive calls to require
     loading = get(package_locks, pkg, false)
     if loading !== false
@@ -908,14 +1196,14 @@ function _require(pkg::PkgId, cache::TOMLCache)
         wait(loading)
         return
     end
-    package_locks[pkg] = Condition()
+    package_locks[pkg] = Threads.Condition(require_lock)
 
     last = toplevel_load[]
     try
         toplevel_load[] = false
         # perform the search operation to select the module file require intends to load
-        path = locate_package(pkg, cache)
-        get!(PkgOrigin, pkgorigins, pkg).path = path
+        path = locate_package(pkg)
+        set_pkgorigin_version_path(pkg, path)
         if path === nothing
             throw(ArgumentError("""
                 Package $pkg is required but does not seem to be installed:
@@ -925,7 +1213,7 @@ function _require(pkg::PkgId, cache::TOMLCache)
 
         # attempt to load the module file via the precompile cache locations
         if JLOptions().use_compiled_modules != 0
-            m = _require_search_from_serialized(pkg, path, cache)
+            m = _require_search_from_serialized(pkg, path)
             if !isa(m, Bool)
                 return
             end
@@ -958,11 +1246,11 @@ function _require(pkg::PkgId, cache::TOMLCache)
                     end
                     # fall-through to loading the file locally
                 else
-                    m = _require_from_serialized(cachefile, cache)
+                    m = _require_from_serialized(pkg, cachefile)
                     if isa(m, Exception)
                         @warn "The call to compilecache failed to create a usable precompiled cache file for $pkg" exception=m
                     else
-                        return cachefile
+                        return
                     end
                 end
             end
@@ -976,10 +1264,12 @@ function _require(pkg::PkgId, cache::TOMLCache)
         if uuid !== old_uuid
             ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, uuid)
         end
+        unlock(require_lock)
         try
             include(__toplevel__, path)
             return
         finally
+            lock(require_lock)
             if uuid !== old_uuid
                 ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), __toplevel__, old_uuid)
             end
@@ -1002,6 +1292,9 @@ Like [`include`](@ref), except reads code from the given string rather than from
 The optional first argument `mapexpr` can be used to transform the included code before
 it is evaluated: for each parsed expression `expr` in `code`, the `include_string` function
 actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
+
+!!! compat "Julia 1.5"
+    Julia 1.5 is required for passing the `mapexpr` argument.
 """
 function include_string(mapexpr::Function, mod::Module, code::AbstractString,
                         filename::AbstractString="string")
@@ -1036,8 +1329,11 @@ include_string(m::Module, txt::AbstractString, fname::AbstractString="string") =
 
 function source_path(default::Union{AbstractString,Nothing}="")
     s = current_task().storage
-    if s !== nothing && haskey(s::IdDict{Any,Any}, :SOURCE_PATH)
-        return s[:SOURCE_PATH]::Union{Nothing,String}
+    if s !== nothing
+        s = s::IdDict{Any,Any}
+        if haskey(s, :SOURCE_PATH)
+            return s[:SOURCE_PATH]::Union{Nothing,String}
+        end
     end
     return default
 end
@@ -1061,12 +1357,15 @@ interactively, or to combine files in packages that are broken into multiple sou
 The optional first argument `mapexpr` can be used to transform the included code before
 it is evaluated: for each parsed expression `expr` in `path`, the `include` function
 actually evaluates `mapexpr(expr)`.  If it is omitted, `mapexpr` defaults to [`identity`](@ref).
+
+!!! compat "Julia 1.5"
+    Julia 1.5 is required for passing the `mapexpr` argument.
 """
 Base.include # defined in Base.jl
 
 # Full include() implementation which is used after bootstrap
 function _include(mapexpr::Function, mod::Module, _path::AbstractString)
-    @_noinline_meta # Workaround for module availability in _simplify_include_frames
+    @noinline # Workaround for module availability in _simplify_include_frames
     path, prev = _include_dependency(mod, _path)
     for callback in include_callbacks # to preserve order, must come before eval in include_string
         invokelatest(callback, mod, path)
@@ -1115,40 +1414,46 @@ function load_path_setup_code(load_path::Bool=true)
         code *= """
         append!(empty!(Base.LOAD_PATH), $(repr(load_path)))
         ENV["JULIA_LOAD_PATH"] = $(repr(join(load_path, Sys.iswindows() ? ';' : ':')))
-        Base.ACTIVE_PROJECT[] = nothing
+        Base.set_active_project(nothing)
         """
     end
     return code
 end
 
 # this is called in the external process that generates precompiled package files
-function include_package_for_output(input::String, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String}, concrete_deps::typeof(_concrete_dependencies), uuid_tuple::NTuple{2,UInt64}, source::Union{Nothing,String})
+function include_package_for_output(pkg::PkgId, input::String, depot_path::Vector{String}, dl_load_path::Vector{String}, load_path::Vector{String},
+                                    concrete_deps::typeof(_concrete_dependencies), source::Union{Nothing,String})
     append!(empty!(Base.DEPOT_PATH), depot_path)
     append!(empty!(Base.DL_LOAD_PATH), dl_load_path)
     append!(empty!(Base.LOAD_PATH), load_path)
     ENV["JULIA_LOAD_PATH"] = join(load_path, Sys.iswindows() ? ';' : ':')
-    Base.ACTIVE_PROJECT[] = nothing
+    set_active_project(nothing)
     Base._track_dependencies[] = true
+    get!(Base.PkgOrigin, Base.pkgorigins, pkg).path = input
     append!(empty!(Base._concrete_dependencies), concrete_deps)
+    uuid_tuple = pkg.uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, pkg.uuid)
 
     ccall(:jl_set_module_uuid, Cvoid, (Any, NTuple{2, UInt64}), Base.__toplevel__, uuid_tuple)
     if source !== nothing
         task_local_storage()[:SOURCE_PATH] = source
     end
 
+    Core.Compiler.track_newly_inferred.x = true
     try
         Base.include(Base.__toplevel__, input)
     catch ex
         precompilableerror(ex) || rethrow()
         @debug "Aborting `create_expr_cache'" exception=(ErrorException("Declaration of __precompile__(false) not allowed"), catch_backtrace())
         exit(125) # we define status = 125 means PrecompileableError
+    finally
+        Core.Compiler.track_newly_inferred.x = false
     end
+    ccall(:jl_set_newly_inferred, Cvoid, (Any,), Core.Compiler.newly_inferred)
 end
 
-@assert precompile(include_package_for_output, (String,Vector{String},Vector{String},Vector{String},typeof(_concrete_dependencies),NTuple{2,UInt64},Nothing))
-@assert precompile(include_package_for_output, (String,Vector{String},Vector{String},Vector{String},typeof(_concrete_dependencies),NTuple{2,UInt64},String))
-
-function create_expr_cache(input::String, output::String, concrete_deps::typeof(_concrete_dependencies), uuid::Union{Nothing,UUID})
+const PRECOMPILE_TRACE_COMPILE = Ref{String}()
+function create_expr_cache(pkg::PkgId, input::String, output::String, concrete_deps::typeof(_concrete_dependencies), internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+    @nospecialize internal_stderr internal_stdout
     rm(output, force=true)   # Remove file if it exists
     depot_path = map(abspath, DEPOT_PATH)
     dl_load_path = map(abspath, DL_LOAD_PATH)
@@ -1158,37 +1463,41 @@ function create_expr_cache(input::String, output::String, concrete_deps::typeof(
         error("LOAD_PATH entries cannot contain $(repr(path_sep))")
 
     deps_strs = String[]
-    for (pkg, build_id) in concrete_deps
-        pkg_str = if pkg.uuid === nothing
-            "Base.PkgId($(repr(pkg.name)))"
+    function pkg_str(_pkg::PkgId)
+        if _pkg.uuid === nothing
+            "Base.PkgId($(repr(_pkg.name)))"
         else
-            "Base.PkgId(Base.UUID(\"$(pkg.uuid)\"), $(repr(pkg.name)))"
+            "Base.PkgId(Base.UUID(\"$(_pkg.uuid)\"), $(repr(_pkg.name)))"
         end
-        push!(deps_strs, "$pkg_str => $(repr(build_id))")
     end
-    deps = repr(eltype(concrete_deps)) * "[" * join(deps_strs, ",") * "]"
-
-    uuid_tuple = uuid === nothing ? (UInt64(0), UInt64(0)) : convert(NTuple{2, UInt64}, uuid)
-
+    for (pkg, build_id) in concrete_deps
+        push!(deps_strs, "$(pkg_str(pkg)) => $(repr(build_id))")
+    end
+    deps_eltype = sprint(show, eltype(concrete_deps); context = :module=>nothing)
+    deps = deps_eltype * "[" * join(deps_strs, ",") * "]"
+    trace = isassigned(PRECOMPILE_TRACE_COMPILE) ? `--trace-compile=$(PRECOMPILE_TRACE_COMPILE[])` : ``
     io = open(pipeline(`$(julia_cmd()::Cmd) -O0
                        --output-ji $output --output-incremental=yes
                        --startup-file=no --history-file=no --warn-overwrite=yes
                        --color=$(have_color === nothing ? "auto" : have_color ? "yes" : "no")
-                       --eval 'eval(Meta.parse(read(stdin,String)))'`, stderr=stderr),
+                       $trace
+                       -`, stderr = internal_stderr, stdout = internal_stdout),
               "w", stdout)
     # write data over stdin to avoid the (unlikely) case of exceeding max command line size
     write(io.in, """
-        Base.include_package_for_output($(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
-            $(repr(load_path)), $deps, $(repr(uuid_tuple)), $(repr(source_path(nothing))))
+        Base.include_package_for_output($(pkg_str(pkg)), $(repr(abspath(input))), $(repr(depot_path)), $(repr(dl_load_path)),
+            $(repr(load_path)), $deps, $(repr(source_path(nothing))))
         """)
     close(io.in)
     return io
 end
 
-@assert precompile(create_expr_cache, (String, String, typeof(_concrete_dependencies), Nothing))
-@assert precompile(create_expr_cache, (String, String, typeof(_concrete_dependencies), UUID))
+function compilecache_dir(pkg::PkgId)
+    entrypath, entryfile = cache_file_entry(pkg)
+    return joinpath(DEPOT_PATH[1], entrypath)
+end
 
-function compilecache_path(pkg::PkgId)::String
+function compilecache_path(pkg::PkgId, prefs_hash::UInt64)::String
     entrypath, entryfile = cache_file_entry(pkg)
     cachepath = joinpath(DEPOT_PATH[1], entrypath)
     isdir(cachepath) || mkpath(cachepath)
@@ -1198,6 +1507,7 @@ function compilecache_path(pkg::PkgId)::String
         crc = _crc32c(something(Base.active_project(), ""))
         crc = _crc32c(unsafe_string(JLOptions().image_file), crc)
         crc = _crc32c(unsafe_string(JLOptions().julia_bin), crc)
+        crc = _crc32c(prefs_hash, crc)
         project_precompile_slug = slug(crc, 5)
         abspath(cachepath, string(entryfile, "_", project_precompile_slug, ".ji"))
     end
@@ -1211,32 +1521,29 @@ This can be used to reduce package load times. Cache files are stored in
 `DEPOT_PATH[1]/compiled`. See [Module initialization and precompilation](@ref)
 for important notes.
 """
-function compilecache(pkg::PkgId, cache::TOMLCache = TOMLCache())
-    path = locate_package(pkg, cache)
+function compilecache(pkg::PkgId, internal_stderr::IO = stderr, internal_stdout::IO = stdout)
+    @nospecialize internal_stderr internal_stdout
+    path = locate_package(pkg)
     path === nothing && throw(ArgumentError("$pkg not found during precompilation"))
-    return compilecache(pkg, path)
+    return compilecache(pkg, path, internal_stderr, internal_stdout)
 end
 
-const MAX_NUM_PRECOMPILE_FILES = 10
+const MAX_NUM_PRECOMPILE_FILES = Ref(10)
+
+function compilecache(pkg::PkgId, path::String, internal_stderr::IO = stderr, internal_stdout::IO = stdout,
+                      ignore_loaded_modules::Bool = true)
 
-function compilecache(pkg::PkgId, path::String)
+    @nospecialize internal_stderr internal_stdout
     # decide where to put the resulting cache file
-    cachefile = compilecache_path(pkg)
-    cachepath = dirname(cachefile)
-    # prune the directory with cache files
-    if pkg.uuid !== nothing
-        entrypath, entryfile = cache_file_entry(pkg)
-        cachefiles = filter!(x -> startswith(x, entryfile * "_"), readdir(cachepath))
-        if length(cachefiles) >= MAX_NUM_PRECOMPILE_FILES
-            idx = findmin(mtime.(joinpath.(cachepath, cachefiles)))[2]
-            rm(joinpath(cachepath, cachefiles[idx]))
-        end
-    end
+    cachepath = compilecache_dir(pkg)
+
     # build up the list of modules that we want the precompile process to preserve
     concrete_deps = copy(_concrete_dependencies)
-    for (key, mod) in loaded_modules
-        if !(mod === Main || mod === Core || mod === Base)
-            push!(concrete_deps, key => module_build_id(mod))
+    if ignore_loaded_modules
+        for (key, mod) in loaded_modules
+            if !(mod === Main || mod === Core || mod === Base)
+                push!(concrete_deps, key => module_build_id(mod))
+            end
         end
     end
     # run the expression and cache the result
@@ -1245,18 +1552,34 @@ function compilecache(pkg::PkgId, path::String)
 
     # create a temporary file in `cachepath` directory, write the cache in it,
     # write the checksum, _and then_ atomically move the file to `cachefile`.
+    mkpath(cachepath)
     tmppath, tmpio = mktemp(cachepath)
     local p
     try
         close(tmpio)
-        p = create_expr_cache(path, tmppath, concrete_deps, pkg.uuid)
+        p = create_expr_cache(pkg, path, tmppath, concrete_deps, internal_stderr, internal_stdout)
         if success(p)
             # append checksum to the end of the .ji file:
             open(tmppath, "a+") do f
                 write(f, _crc32c(seekstart(f)))
             end
-            # inherit permission from the source file
-            chmod(tmppath, filemode(path) & 0o777)
+            # inherit permission from the source file (and make them writable)
+            chmod(tmppath, filemode(path) & 0o777 | 0o200)
+
+            # Read preferences hash back from .ji file (we can't precompute because
+            # we don't actually know what the list of compile-time preferences are without compiling)
+            prefs_hash = preferences_hash(tmppath)
+            cachefile = compilecache_path(pkg, prefs_hash)
+
+            # prune the directory with cache files
+            if pkg.uuid !== nothing
+                entrypath, entryfile = cache_file_entry(pkg)
+                cachefiles = filter!(x -> startswith(x, entryfile * "_"), readdir(cachepath))
+                if length(cachefiles) >= MAX_NUM_PRECOMPILE_FILES[]
+                    idx = findmin(mtime.(joinpath.(cachepath, cachefiles)))[2]
+                    rm(joinpath(cachepath, cachefiles[idx]); force=true)
+                end
+            end
 
             # this is atomic according to POSIX:
             rename(tmppath, cachefile; force=true)
@@ -1268,7 +1591,7 @@ function compilecache(pkg::PkgId, path::String)
     if p.exitcode == 125
         return PrecompilableError()
     else
-        error("Failed to precompile $pkg to $cachefile.")
+        error("Failed to precompile $pkg to $tmppath.")
     end
 end
 
@@ -1294,17 +1617,23 @@ function parse_cache_header(f::IO)
         build_id = read(f, UInt64) # build UUID (mostly just a timestamp)
         push!(modules, PkgId(uuid, sym) => build_id)
     end
-    totbytes = read(f, Int64) # total bytes for file dependencies
+    totbytes = read(f, Int64) # total bytes for file dependencies + preferences
     # read the list of requirements
     # and split the list into include and requires statements
     includes = CacheHeaderIncludes[]
     requires = Pair{PkgId, PkgId}[]
     while true
         n2 = read(f, Int32)
-        n2 == 0 && break
+        totbytes -= 4
+        if n2 == 0
+            break
+        end
         depname = String(read(f, n2))
+        totbytes -= n2
         mtime = read(f, Float64)
+        totbytes -= 8
         n1 = read(f, Int32)
+        totbytes -= 4
         # map ids to keys
         modkey = (n1 == 0) ? PkgId("") : modules[n1].first
         modpath = String[]
@@ -1313,7 +1642,9 @@ function parse_cache_header(f::IO)
             while true
                 n1 = read(f, Int32)
                 totbytes -= 4
-                n1 == 0 && break
+                if n1 == 0
+                    break
+                end
                 push!(modpath, String(read(f, n1)))
                 totbytes -= n1
             end
@@ -1323,10 +1654,22 @@ function parse_cache_header(f::IO)
         else
             push!(includes, CacheHeaderIncludes(modkey, depname, mtime, modpath))
         end
-        totbytes -= 4 + 4 + n2 + 8
     end
-    @assert totbytes == 12 "header of cache file appears to be corrupt"
+    prefs = String[]
+    while true
+        n2 = read(f, Int32)
+        totbytes -= 4
+        if n2 == 0
+            break
+        end
+        push!(prefs, String(read(f, n2)))
+        totbytes -= n2
+    end
+    prefs_hash = read(f, UInt64)
+    totbytes -= 8
     srctextpos = read(f, Int64)
+    totbytes -= 8
+    @assert totbytes == 0 "header of cache file appears to be corrupt (totbytes == $(totbytes))"
     # read the list of modules that are required to be present during loading
     required_modules = Vector{Pair{PkgId, UInt64}}()
     while true
@@ -1337,7 +1680,7 @@ function parse_cache_header(f::IO)
         build_id = read(f, UInt64) # build id
         push!(required_modules, PkgId(uuid, sym) => build_id)
     end
-    return modules, (includes, requires), required_modules, srctextpos
+    return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
 end
 
 function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
@@ -1346,21 +1689,37 @@ function parse_cache_header(cachefile::String; srcfiles_only::Bool=false)
         !isvalid_cache_header(io) && throw(ArgumentError("Invalid header in cache file $cachefile."))
         ret = parse_cache_header(io)
         srcfiles_only || return ret
-        modules, (includes, requires), required_modules, srctextpos = ret
+        modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = ret
         srcfiles = srctext_files(io, srctextpos)
         delidx = Int[]
         for (i, chi) in enumerate(includes)
             chi.filename ∈ srcfiles || push!(delidx, i)
         end
         deleteat!(includes, delidx)
-        return modules, (includes, requires), required_modules, srctextpos
+        return modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash
     finally
         close(io)
     end
 end
 
+
+
+preferences_hash(f::IO) = parse_cache_header(f)[end]
+function preferences_hash(cachefile::String)
+    io = open(cachefile, "r")
+    try
+        if !isvalid_cache_header(io)
+            throw(ArgumentError("Invalid header in cache file $cachefile."))
+        end
+        return preferences_hash(io)
+    finally
+        close(io)
+    end
+end
+
+
 function cache_dependencies(f::IO)
-    defs, (includes, requires), modules = parse_cache_header(f)
+    defs, (includes, requires), modules, srctextpos, prefs, prefs_hash = parse_cache_header(f)
     return modules, map(chi -> (chi.filename, chi.mtime), includes)  # return just filename and mtime
 end
 
@@ -1375,7 +1734,7 @@ function cache_dependencies(cachefile::String)
 end
 
 function read_dependency_src(io::IO, filename::AbstractString)
-    modules, (includes, requires), required_modules, srctextpos = parse_cache_header(io)
+    modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
     srctextpos == 0 && error("no source-text stored in cache file")
     seek(io, srctextpos)
     return _read_dependency_src(io, filename)
@@ -1420,17 +1779,181 @@ function srctext_files(f::IO, srctextpos::Int64)
     return files
 end
 
+# Test to see if this UUID is mentioned in this `Project.toml`; either as
+# the top-level UUID (e.g. that of the project itself), as a dependency,
+# or as an extra for Preferences.
+function get_uuid_name(project::Dict{String, Any}, uuid::UUID)
+    uuid_p = get(project, "uuid", nothing)::Union{Nothing, String}
+    name = get(project, "name", nothing)::Union{Nothing, String}
+    if name !== nothing && uuid_p !== nothing && UUID(uuid_p) == uuid
+        return name
+    end
+    deps = get(project, "deps", nothing)::Union{Nothing, Dict{String, Any}}
+    if deps !== nothing
+        for (k, v) in deps
+            if uuid == UUID(v::String)
+                return k
+            end
+        end
+    end
+    for subkey in ("deps", "extras")
+        subsection = get(project, subkey, nothing)::Union{Nothing, Dict{String, Any}}
+        if subsection !== nothing
+            for (k, v) in subsection
+                if uuid == UUID(v::String)
+                    return k
+                end
+            end
+        end
+    end
+    return nothing
+end
+
+function get_uuid_name(project_toml::String, uuid::UUID)
+    project = parsed_toml(project_toml)
+    return get_uuid_name(project, uuid)
+end
+
+# If we've asked for a specific UUID, this function will extract the prefs
+# for that particular UUID.  Otherwise, it returns all preferences.
+function filter_preferences(prefs::Dict{String, Any}, pkg_name)
+    if pkg_name === nothing
+        return prefs
+    else
+        return get(Dict{String, Any}, prefs, pkg_name)::Dict{String, Any}
+    end
+end
+
+function collect_preferences(project_toml::String, uuid::Union{UUID,Nothing})
+    # We'll return a list of dicts to be merged
+    dicts = Dict{String, Any}[]
+
+    project = parsed_toml(project_toml)
+    pkg_name = nothing
+    if uuid !== nothing
+        # If we've been given a UUID, map that to the name of the package as
+        # recorded in the preferences section.  If we can't find that mapping,
+        # exit out, as it means there's no way preferences can be set for that
+        # UUID, as we only allow actual dependencies to have preferences set.
+        pkg_name = get_uuid_name(project, uuid)
+        if pkg_name === nothing
+            return dicts
+        end
+    end
+
+    # Look first inside of `Project.toml` to see we have preferences embedded within there
+    proj_preferences = get(Dict{String, Any}, project, "preferences")::Dict{String, Any}
+    push!(dicts, filter_preferences(proj_preferences, pkg_name))
+
+    # Next, look for `(Julia)LocalPreferences.toml` files next to this `Project.toml`
+    project_dir = dirname(project_toml)
+    for name in preferences_names
+        toml_path = joinpath(project_dir, name)
+        if isfile(toml_path)
+            prefs = parsed_toml(toml_path)
+            push!(dicts, filter_preferences(prefs, pkg_name))
+
+            # If we find `JuliaLocalPreferences.toml`, don't look for `LocalPreferences.toml`
+            break
+        end
+    end
+
+    return dicts
+end
+
+"""
+    recursive_prefs_merge(base::Dict, overrides::Dict...)
+
+Helper function to merge preference dicts recursively, honoring overrides in nested
+dictionaries properly.
+"""
+function recursive_prefs_merge(base::Dict{String, Any}, overrides::Dict{String, Any}...)
+    new_base = Base._typeddict(base, overrides...)
+
+    for override in overrides
+        # Clear entries are keys that should be deleted from any previous setting.
+        override_clear = get(override, "__clear__", nothing)
+        if override_clear isa Vector{String}
+            for k in override_clear
+                delete!(new_base, k)
+            end
+        end
+
+        for (k, override_k) in override
+            # Note that if `base` has a mapping that is _not_ a `Dict`, and `override`
+            new_base_k = get(new_base, k, nothing)
+            if new_base_k isa Dict{String, Any} && override_k isa Dict{String, Any}
+                new_base[k] = recursive_prefs_merge(new_base_k, override_k)
+            else
+                new_base[k] = override_k
+            end
+        end
+    end
+    return new_base
+end
+
+function get_preferences(uuid::Union{UUID,Nothing} = nothing)
+    merged_prefs = Dict{String,Any}()
+    for env in reverse(load_path())
+        project_toml = env_project_file(env)
+        if !isa(project_toml, String)
+            continue
+        end
+
+        # Collect all dictionaries from the current point in the load path, then merge them in
+        dicts = collect_preferences(project_toml, uuid)
+        merged_prefs = recursive_prefs_merge(merged_prefs, dicts...)
+    end
+    return merged_prefs
+end
+
+function get_preferences_hash(uuid::Union{UUID, Nothing}, prefs_list::Vector{String})
+    # Start from a predictable hash point to ensure that the same preferences always
+    # hash to the same value, modulo changes in how Dictionaries are hashed.
+    h = UInt(0)
+    uuid === nothing && return UInt64(h)
+
+    # Load the preferences
+    prefs = get_preferences(uuid)
+
+    # Walk through each name that's called out as a compile-time preference
+    for name in prefs_list
+        prefs_value = get(prefs, name, nothing)
+        if prefs_value !== nothing
+            h = hash(prefs_value, h)::UInt
+        end
+    end
+    # We always return a `UInt64` so that our serialization format is stable
+    return UInt64(h)
+end
+
+get_preferences_hash(m::Module, prefs_list::Vector{String}) = get_preferences_hash(PkgId(m).uuid, prefs_list)
+
+# This is how we keep track of who is using what preferences at compile-time
+const COMPILETIME_PREFERENCES = Dict{UUID,Set{String}}()
+
+# In `Preferences.jl`, if someone calls `load_preference(@__MODULE__, key)` while we're precompiling,
+# we mark that usage as a usage at compile-time and call this method, so that at the end of `.ji` generation,
+# we can record the list of compile-time preferences and embed that into the `.ji` header
+function record_compiletime_preference(uuid::UUID, key::String)
+    pref = get!(Set{String}, COMPILETIME_PREFERENCES, uuid)
+    push!(pref, key)
+    return nothing
+end
+get_compiletime_preferences(uuid::UUID) = collect(get(Vector{String}, COMPILETIME_PREFERENCES, uuid))
+get_compiletime_preferences(m::Module) = get_compiletime_preferences(PkgId(m).uuid)
+get_compiletime_preferences(::Nothing) = String[]
+
 # returns true if it "cachefile.ji" is stale relative to "modpath.jl"
 # otherwise returns the list of dependencies to also check
-stale_cachefile(modpath::String, cachefile::String) = stale_cachefile(modpath, cachefile, TOMLCache())
-function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
+@constprop :none function stale_cachefile(modpath::String, cachefile::String; ignore_loaded::Bool = false)
     io = open(cachefile, "r")
     try
         if !isvalid_cache_header(io)
             @debug "Rejecting cache file $cachefile due to it containing an invalid cache header"
             return true # invalid cache file
         end
-        (modules, (includes, requires), required_modules) = parse_cache_header(io)
+        modules, (includes, requires), required_modules, srctextpos, prefs, prefs_hash = parse_cache_header(io)
         id = isempty(modules) ? nothing : first(modules).first
         modules = Dict{PkgId, UInt64}(modules)
 
@@ -1444,17 +1967,21 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
                 M = root_module(req_key)
                 if PkgId(M) == req_key && module_build_id(M) === req_build_id
                     depmods[i] = M
+                elseif ignore_loaded
+                    # Used by Pkg.precompile given that there it's ok to precompile different versions of loaded packages
+                    @goto locate_branch
                 else
                     @debug "Rejecting cache file $cachefile because module $req_key is already loaded and incompatible."
                     return true # Won't be able to fulfill dependency
                 end
             else
-                path = locate_package(req_key, cache)
-                get!(PkgOrigin, pkgorigins, req_key).path = path
+                @label locate_branch
+                path = locate_package(req_key)
                 if path === nothing
                     @debug "Rejecting cache file $cachefile because dependency $req_key not found."
                     return true # Won't be able to fulfill dependency
                 end
+                set_pkgorigin_version_path(req_key, path)
                 depmods[i] = (path, req_key, req_build_id)
             end
         end
@@ -1478,12 +2005,12 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
         # now check if this file is fresh relative to its source files
         if !skip_timecheck
             if !samefile(includes[1].filename, modpath)
-                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename)) not file $modpath"
+                @debug "Rejecting cache file $cachefile because it is for file $(includes[1].filename) not file $modpath"
                 return true # cache file was compiled from a different path
             end
             for (modkey, req_modkey) in requires
                 # verify that `require(modkey, name(req_modkey))` ==> `req_modkey`
-                if identify_package(modkey, req_modkey.name, cache) != req_modkey
+                if identify_package(modkey, req_modkey.name) != req_modkey
                     @debug "Rejecting cache file $cachefile because uuid mapping for $modkey => $req_modkey has changed"
                     return true
                 end
@@ -1492,8 +2019,9 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
                 f, ftime_req = chi.filename, chi.mtime
                 # Issue #13606: compensate for Docker images rounding mtimes
                 # Issue #20837: compensate for GlusterFS truncating mtimes to microseconds
+                # The `ftime != 1.0` condition below provides compatibility with Nix mtime.
                 ftime = mtime(f)
-                if ftime != ftime_req && ftime != floor(ftime_req) && ftime != trunc(ftime_req, digits=6)
+                if ftime != ftime_req && ftime != floor(ftime_req) && ftime != trunc(ftime_req, digits=6) && ftime != 1.0
                     @debug "Rejecting stale cache file $cachefile (mtime $ftime_req) because file $f (mtime $ftime) has changed"
                     return true
                 end
@@ -1506,7 +2034,11 @@ function stale_cachefile(modpath::String, cachefile::String, cache::TOMLCache)
         end
 
         if isa(id, PkgId)
-            get!(PkgOrigin, pkgorigins, id).cachepath = cachefile
+            curr_prefs_hash = get_preferences_hash(id.uuid, prefs)
+            if prefs_hash != curr_prefs_hash
+                @debug "Rejecting cache file $cachefile because preferences hash does not match 0x$(string(prefs_hash, base=16)) != 0x$(string(curr_prefs_hash, base=16))"
+                return true
+            end
         end
 
         return depmods # fresh cachefile
@@ -1540,3 +2072,25 @@ macro __DIR__()
     _dirname = dirname(String(__source__.file::Symbol))
     return isempty(_dirname) ? pwd() : abspath(_dirname)
 end
+
+"""
+    precompile(f, args::Tuple{Vararg{Any}})
+
+Compile the given function `f` for the argument tuple (of types) `args`, but do not execute it.
+"""
+function precompile(@nospecialize(f), @nospecialize(args::Tuple))
+    precompile(Tuple{Core.Typeof(f), args...})
+end
+
+const ENABLE_PRECOMPILE_WARNINGS = Ref(false)
+function precompile(@nospecialize(argt::Type))
+    ret = ccall(:jl_compile_hint, Int32, (Any,), argt) != 0
+    if !ret && ENABLE_PRECOMPILE_WARNINGS[]
+        @warn "Inactive precompile statement" maxlog=100 form=argt _module=nothing _file=nothing _line=0
+    end
+    return ret
+end
+
+precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), Nothing))
+precompile(include_package_for_output, (PkgId, String, Vector{String}, Vector{String}, Vector{String}, typeof(_concrete_dependencies), String))
+precompile(create_expr_cache, (PkgId, String, String, typeof(_concrete_dependencies), IO, IO))
diff --git a/base/lock.jl b/base/lock.jl
index 7033fdd80cff9f..8a15d3f95b2391 100644
--- a/base/lock.jl
+++ b/base/lock.jl
@@ -6,16 +6,47 @@ const ThreadSynchronizer = GenericCondition{Threads.SpinLock}
 """
     ReentrantLock()
 
-Creates a re-entrant lock for synchronizing [`Task`](@ref)s.
-The same task can acquire the lock as many times as required.
-Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
+Creates a re-entrant lock for synchronizing [`Task`](@ref)s. The same task can
+acquire the lock as many times as required. Each [`lock`](@ref) must be matched
+with an [`unlock`](@ref).
+
+Calling 'lock' will also inhibit running of finalizers on that thread until the
+corresponding 'unlock'. Use of the standard lock pattern illustrated below
+should naturally be supported, but beware of inverting the try/lock order or
+missing the try block entirely (e.g. attempting to return with the lock still
+held):
+
+This provides a acquire/release memory ordering on lock/unlock calls.
+
+```
+lock(l)
+try
+    <atomic work>
+finally
+    unlock(l)
+end
+```
+
+If [`!islocked(lck::ReentrantLock)`](@ref islocked) holds, [`trylock(lck)`](@ref trylock)
+succeeds unless there are other tasks attempting to hold the lock "at the same time."
 """
 mutable struct ReentrantLock <: AbstractLock
-    locked_by::Union{Task, Nothing}
-    cond_wait::ThreadSynchronizer
-    reentrancy_cnt::Int
-
-    ReentrantLock() = new(nothing, ThreadSynchronizer(), 0)
+    # offset = 16
+    @atomic locked_by::Union{Task, Nothing}
+    # offset32 = 20, offset64 = 24
+    reentrancy_cnt::UInt32
+    # offset32 = 24, offset64 = 28
+    @atomic havelock::UInt8 # 0x0 = none, 0x1 = lock, 0x2 = conflict
+    # offset32 = 28, offset64 = 32
+    cond_wait::ThreadSynchronizer # 2 words
+    # offset32 = 36, offset64 = 48
+    # sizeof32 = 20, sizeof64 = 32
+    # now add padding to make this a full cache line to minimize false sharing between objects
+    _::NTuple{Int === Int32 ? 2 : 3, Int}
+    # offset32 = 44, offset64 = 72 == sizeof+offset
+    # sizeof32 = 28, sizeof64 = 56
+
+    ReentrantLock() = new(nothing, 0x0000_0000, 0x00, ThreadSynchronizer())
 end
 
 assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
@@ -24,10 +55,43 @@ assert_havelock(l::ReentrantLock) = assert_havelock(l, l.locked_by)
     islocked(lock) -> Status (Boolean)
 
 Check whether the `lock` is held by any task/thread.
-This should not be used for synchronization (see instead [`trylock`](@ref)).
+This function alone should not be used for synchronization. However, `islocked` combined
+with [`trylock`](@ref) can be used for writing the test-and-test-and-set or exponential
+backoff algorithms *if it is supported by the `typeof(lock)`* (read its documentation).
+
+# Extended help
+
+For example, an exponential backoff can be implemented as follows if the `lock`
+implementation satisfied the properties documented below.
+
+```julia
+nspins = 0
+while true
+    while islocked(lock)
+        GC.safepoint()
+        nspins += 1
+        nspins > LIMIT && error("timeout")
+    end
+    trylock(lock) && break
+    backoff()
+end
+```
+
+## Implementation
+
+A lock implementation is advised to define `islocked` with the following properties and note
+it in its docstring.
+
+* `islocked(lock)` is data-race-free.
+* If `islocked(lock)` returns `false`, an immediate invocation of `trylock(lock)` must
+  succeed (returns `true`) if there is no interference from other tasks.
 """
+function islocked end
+# Above docstring is a documentation for the abstract interface and not the one specific to
+# `ReentrantLock`.
+
 function islocked(rl::ReentrantLock)
-    return rl.reentrancy_cnt != 0
+    return (@atomic :monotonic rl.havelock) != 0
 end
 
 """
@@ -39,23 +103,35 @@ If the lock is already locked by a different task/thread,
 return `false`.
 
 Each successful `trylock` must be matched by an [`unlock`](@ref).
+
+Function `trylock` combined with [`islocked`](@ref) can be used for writing the
+test-and-test-and-set or exponential backoff algorithms *if it is supported by the
+`typeof(lock)`* (read its documentation).
 """
-function trylock(rl::ReentrantLock)
-    t = current_task()
-    if t === rl.locked_by
-        rl.reentrancy_cnt += 1
+function trylock end
+# Above docstring is a documentation for the abstract interface and not the one specific to
+# `ReentrantLock`.
+
+@inline function trylock(rl::ReentrantLock)
+    ct = current_task()
+    if rl.locked_by === ct
+        #@assert rl.havelock !== 0x00
+        rl.reentrancy_cnt += 0x0000_0001
         return true
     end
-    lock(rl.cond_wait)
-    if rl.reentrancy_cnt == 0
-        rl.locked_by = t
-        rl.reentrancy_cnt = 1
-        got = true
-    else
-        got = false
+    return _trylock(rl, ct)
+end
+@noinline function _trylock(rl::ReentrantLock, ct::Task)
+    GC.disable_finalizers()
+    if (@atomicreplace :acquire rl.havelock 0x00 => 0x01).success
+        #@assert rl.locked_by === nothing
+        #@assert rl.reentrancy_cnt === 0
+        rl.reentrancy_cnt = 0x0000_0001
+        @atomic :release rl.locked_by = ct
+        return true
     end
-    unlock(rl.cond_wait)
-    return got
+    GC.enable_finalizers()
+    return false
 end
 
 """
@@ -67,27 +143,23 @@ wait for it to become available.
 
 Each `lock` must be matched by an [`unlock`](@ref).
 """
-function lock(rl::ReentrantLock)
-    t = current_task()
-    if t === rl.locked_by
-        rl.reentrancy_cnt += 1
-    else
-        lock(rl.cond_wait)
-        while true
-            if rl.reentrancy_cnt == 0
-                rl.locked_by = t
-                rl.reentrancy_cnt = 1
-                break
-            end
-            try
-                wait(rl.cond_wait)
-            catch
-                unlock(rl.cond_wait)
-                rethrow()
+@inline function lock(rl::ReentrantLock)
+    trylock(rl) || (@noinline function slowlock(rl::ReentrantLock)
+        c = rl.cond_wait
+        lock(c.lock)
+        try
+            while true
+                if (@atomicreplace rl.havelock 0x01 => 0x02).old == 0x00 # :sequentially_consistent ? # now either 0x00 or 0x02
+                    # it was unlocked, so try to lock it ourself
+                    _trylock(rl, current_task()) && break
+                else # it was locked, so now wait for the release to notify us
+                    wait(c)
+                end
             end
+        finally
+            unlock(c.lock)
         end
-        unlock(rl.cond_wait)
-    end
+    end)(rl)
     return
 end
 
@@ -99,56 +171,42 @@ Releases ownership of the `lock`.
 If this is a recursive lock which has been acquired before, decrement an
 internal counter and return immediately.
 """
-function unlock(rl::ReentrantLock)
-    t = current_task()
-    n = rl.reentrancy_cnt
-    n == 0 && error("unlock count must match lock count")
-    rl.locked_by === t || error("unlock from wrong thread")
-    if n > 1
-        rl.reentrancy_cnt = n - 1
-    else
-        lock(rl.cond_wait)
-        rl.reentrancy_cnt = 0
-        rl.locked_by = nothing
-        if !isempty(rl.cond_wait.waitq)
-            try
-                notify(rl.cond_wait)
-            catch
-                unlock(rl.cond_wait)
-                rethrow()
+@inline function unlock(rl::ReentrantLock)
+    rl.locked_by === current_task() ||
+        error(rl.reentrancy_cnt == 0x0000_0000 ? "unlock count must match lock count" : "unlock from wrong thread")
+    (@noinline function _unlock(rl::ReentrantLock)
+        n = rl.reentrancy_cnt - 0x0000_0001
+        rl.reentrancy_cnt = n
+        if n == 0x0000_00000
+            @atomic :monotonic rl.locked_by = nothing
+            if (@atomicswap :release rl.havelock = 0x00) == 0x02
+                (@noinline function notifywaiters(rl)
+                    cond_wait = rl.cond_wait
+                    lock(cond_wait)
+                    try
+                        notify(cond_wait)
+                    finally
+                        unlock(cond_wait)
+                    end
+                end)(rl)
             end
+            return true
         end
-        unlock(rl.cond_wait)
-    end
-    return
+        return false
+    end)(rl) && GC.enable_finalizers()
+    nothing
 end
 
 function unlockall(rl::ReentrantLock)
-    t = current_task()
-    n = rl.reentrancy_cnt
-    rl.locked_by === t || error("unlock from wrong thread")
-    n == 0 && error("unlock count must match lock count")
-    lock(rl.cond_wait)
-    rl.reentrancy_cnt = 0
-    rl.locked_by = nothing
-    if !isempty(rl.cond_wait.waitq)
-        try
-            notify(rl.cond_wait)
-        catch
-            unlock(rl.cond_wait)
-            rethrow()
-        end
-    end
-    unlock(rl.cond_wait)
+    n = @atomicswap :not_atomic rl.reentrancy_cnt = 0x0000_0001
+    unlock(rl)
     return n
 end
 
-function relockall(rl::ReentrantLock, n::Int)
-    t = current_task()
+function relockall(rl::ReentrantLock, n::UInt32)
     lock(rl)
-    n1 = rl.reentrancy_cnt
-    rl.reentrancy_cnt = n
-    n1 == 1 || concurrency_violation()
+    old = @atomicswap :not_atomic rl.reentrancy_cnt = n
+    old == 0x0000_0001 || concurrency_violation()
     return
 end
 
@@ -161,6 +219,9 @@ available.
 
 When this function returns, the `lock` has been released, so the caller should
 not attempt to `unlock` it.
+
+!!! compat "Julia 1.7"
+    Using a [`Channel`](@ref) as the second argument requires Julia 1.7 or later.
 """
 function lock(f, l::AbstractLock)
     lock(l)
@@ -182,6 +243,22 @@ function trylock(f, l::AbstractLock)
     return false
 end
 
+"""
+    @lock l expr
+
+Macro version of `lock(f, l::AbstractLock)` but with `expr` instead of `f` function.
+Expands to:
+```julia
+lock(l)
+try
+    expr
+finally
+    unlock(l)
+end
+```
+This is similar to using [`lock`](@ref) with a `do` block, but avoids creating a closure
+and thus can improve the performance.
+"""
 macro lock(l, expr)
     quote
         temp = $(esc(l))
@@ -194,6 +271,13 @@ macro lock(l, expr)
     end
 end
 
+"""
+    @lock_nofail l expr
+
+Equivalent to `@lock l expr` for cases in which we can guarantee that the function
+will not throw any error. In this case, avoiding try-catch can improve the performance.
+See [`@lock`](@ref).
+"""
 macro lock_nofail(l, expr)
     quote
         temp = $(esc(l))
@@ -249,6 +333,8 @@ end
 Create a counting semaphore that allows at most `sem_size`
 acquires to be in use at any time.
 Each acquire must be matched with a release.
+
+This provides a acquire & release memory ordering on acquire/release calls.
 """
 mutable struct Semaphore
     sem_size::Int
@@ -276,6 +362,39 @@ function acquire(s::Semaphore)
     return
 end
 
+"""
+    acquire(f, s::Semaphore)
+
+Execute `f` after acquiring from Semaphore `s`,
+and `release` on completion or error.
+
+For example, a do-block form that ensures only 2
+calls of `foo` will be active at the same time:
+
+```julia
+s = Base.Semaphore(2)
+@sync for _ in 1:100
+    Threads.@spawn begin
+        Base.acquire(s) do
+            foo()
+        end
+    end
+end
+```
+
+!!! compat "Julia 1.8"
+    This method requires at least Julia 1.8.
+
+"""
+function acquire(f, s::Semaphore)
+    acquire(s)
+    try
+        return f()
+    finally
+        release(s)
+    end
+end
+
 """
     release(s::Semaphore)
 
@@ -297,40 +416,60 @@ end
 
 
 """
-    Event()
+    Event([autoreset=false])
 
 Create a level-triggered event source. Tasks that call [`wait`](@ref) on an
-`Event` are suspended and queued until `notify` is called on the `Event`.
+`Event` are suspended and queued until [`notify`](@ref) is called on the `Event`.
 After `notify` is called, the `Event` remains in a signaled state and
-tasks will no longer block when waiting for it.
+tasks will no longer block when waiting for it, until `reset` is called.
+
+If `autoreset` is true, at most one task will be released from `wait` for
+each call to `notify`.
+
+This provides an acquire & release memory ordering on notify/wait.
 
 !!! compat "Julia 1.1"
     This functionality requires at least Julia 1.1.
+
+!!! compat "Julia 1.8"
+    The `autoreset` functionality and memory ordering guarantee requires at least Julia 1.8.
 """
 mutable struct Event
     notify::Threads.Condition
-    set::Bool
-    Event() = new(Threads.Condition(), false)
+    autoreset::Bool
+    @atomic set::Bool
+    Event(autoreset::Bool=false) = new(Threads.Condition(), autoreset, false)
 end
 
 function wait(e::Event)
-    e.set && return
-    lock(e.notify)
+    if e.autoreset
+        (@atomicswap :acquire_release e.set = false) && return
+    else
+        (@atomic e.set) && return # full barrier also
+    end
+    lock(e.notify) # acquire barrier
     try
-        while !e.set
-            wait(e.notify)
+        if e.autoreset
+            (@atomicswap :acquire_release e.set = false) && return
+        else
+            e.set && return
         end
+        wait(e.notify)
     finally
-        unlock(e.notify)
+        unlock(e.notify) # release barrier
     end
     nothing
 end
 
 function notify(e::Event)
-    lock(e.notify)
+    lock(e.notify) # acquire barrier
     try
-        if !e.set
-            e.set = true
+        if e.autoreset
+            if notify(e.notify, all=false) == 0
+                @atomic :release e.set = true
+            end
+        elseif !e.set
+            @atomic :release e.set = true
             notify(e.notify)
         end
     finally
@@ -339,6 +478,17 @@ function notify(e::Event)
     nothing
 end
 
+"""
+    reset(::Event)
+
+Reset an Event back into an un-set state. Then any future calls to `wait` will
+block until `notify` is called again.
+"""
+function reset(e::Event)
+    @atomic e.set = false # full barrier
+    nothing
+end
+
 @eval Threads begin
     import .Base: Event
     export Event
diff --git a/base/locks-mt.jl b/base/locks-mt.jl
index 49c7d2c0f90118..bfa3ac1b8352e4 100644
--- a/base/locks-mt.jl
+++ b/base/locks-mt.jl
@@ -21,50 +21,27 @@ to execute and does not block (e.g. perform I/O).
 In general, [`ReentrantLock`](@ref) should be used instead.
 
 Each [`lock`](@ref) must be matched with an [`unlock`](@ref).
+If [`!islocked(lck::SpinLock)`](@ref islocked) holds, [`trylock(lck)`](@ref trylock)
+succeeds unless there are other tasks attempting to hold the lock "at the same time."
 
 Test-and-test-and-set spin locks are quickest up to about 30ish
 contending threads. If you have more contention than that, different
 synchronization approaches should be considered.
 """
 mutable struct SpinLock <: AbstractLock
-    owned::Int
+    # we make this much larger than necessary to minimize false-sharing
+    @atomic owned::Int
     SpinLock() = new(0)
 end
 
-import Base.Sys.WORD_SIZE
-
-@eval _xchg!(x::SpinLock, v::Int) =
-    llvmcall($"""
-             %ptr = inttoptr i$WORD_SIZE %0 to i$WORD_SIZE*
-             %rv = atomicrmw xchg i$WORD_SIZE* %ptr, i$WORD_SIZE %1 acq_rel
-             ret i$WORD_SIZE %rv
-             """, Int, Tuple{Ptr{Int}, Int}, unsafe_convert(Ptr{Int}, pointer_from_objref(x)), v)
-
-@eval _get(x::SpinLock) =
-    llvmcall($"""
-             %ptr = inttoptr i$WORD_SIZE %0 to i$WORD_SIZE*
-             %rv = load atomic i$WORD_SIZE, i$WORD_SIZE* %ptr monotonic, align $(gc_alignment(Int))
-             ret i$WORD_SIZE %rv
-             """, Int, Tuple{Ptr{Int}}, unsafe_convert(Ptr{Int}, pointer_from_objref(x)))
-
-@eval _set!(x::SpinLock, v::Int) =
-    llvmcall($"""
-             %ptr = inttoptr i$WORD_SIZE %0 to i$WORD_SIZE*
-             store atomic i$WORD_SIZE %1, i$WORD_SIZE* %ptr release, align $(gc_alignment(Int))
-             ret void
-             """, Cvoid, Tuple{Ptr{Int}, Int}, unsafe_convert(Ptr{Int}, pointer_from_objref(x)), v)
-
 # Note: this cannot assert that the lock is held by the correct thread, because we do not
 # track which thread locked it. Users beware.
 Base.assert_havelock(l::SpinLock) = islocked(l) ? nothing : Base.concurrency_violation()
 
 function lock(l::SpinLock)
     while true
-        if _get(l) == 0
-            p = _xchg!(l, 1)
-            if p == 0
-                return
-            end
+        if @inline trylock(l)
+            return
         end
         ccall(:jl_cpu_pause, Cvoid, ())
         # Temporary solution before we have gc transition support in codegen.
@@ -73,18 +50,26 @@ function lock(l::SpinLock)
 end
 
 function trylock(l::SpinLock)
-    if _get(l) == 0
-        return _xchg!(l, 1) == 0
+    if l.owned == 0
+        GC.disable_finalizers()
+        p = @atomicswap :acquire l.owned = 1
+        if p == 0
+            return true
+        end
+        GC.enable_finalizers()
     end
     return false
 end
 
 function unlock(l::SpinLock)
-    _set!(l, 0)
+    if (@atomicswap :release l.owned = 0) == 0
+        error("unlock count must match lock count")
+    end
+    GC.enable_finalizers()
     ccall(:jl_cpu_wake, Cvoid, ())
     return
 end
 
 function islocked(l::SpinLock)
-    return _get(l) != 0
+    return (@atomic :monotonic l.owned) != 0
 end
diff --git a/base/logging.jl b/base/logging.jl
index 653ded32e443d2..d2b6fa637c1bc9 100644
--- a/base/logging.jl
+++ b/base/logging.jl
@@ -73,21 +73,21 @@ catch_exceptions(logger) = true
 
 # Prevent invalidation when packages define custom loggers
 # Using invoke in combination with @nospecialize eliminates backedges to these methods
-function _invoked_shouldlog(logger, level, _module, group, id)
+Base.@constprop :none function _invoked_shouldlog(logger, level, _module, group, id)
     @nospecialize
     return invoke(
         shouldlog,
         Tuple{typeof(logger), typeof(level), typeof(_module), typeof(group), typeof(id)},
         logger, level, _module, group, id
-    )
+    )::Bool
 end
 
 function _invoked_min_enabled_level(@nospecialize(logger))
-    return invoke(min_enabled_level, Tuple{typeof(logger)}, logger)
+    return invoke(min_enabled_level, Tuple{typeof(logger)}, logger)::LogLevel
 end
 
 function _invoked_catch_exceptions(@nospecialize(logger))
-    return invoke(catch_exceptions, Tuple{typeof(logger)}, logger)
+    return invoke(catch_exceptions, Tuple{typeof(logger)}, logger)::Bool
 end
 
 """
@@ -101,7 +101,7 @@ struct NullLogger <: AbstractLogger; end
 min_enabled_level(::NullLogger) = AboveMaxLevel
 shouldlog(::NullLogger, args...) = false
 handle_message(::NullLogger, args...; kwargs...) =
-    error("Null logger handle_message() should not be called")
+    (@nospecialize; error("Null logger handle_message() should not be called"))
 
 
 #-------------------------------------------------------------------------------
@@ -116,7 +116,7 @@ filtered, before any other work is done to construct the log record data
 structure itself.
 
 # Examples
-```
+```julia-repl
 julia> Logging.LogLevel(0) == Logging.Info
 true
 ```
@@ -133,9 +133,29 @@ isless(a::LogLevel, b::LogLevel) = isless(a.level, b.level)
 convert(::Type{LogLevel}, level::Integer) = LogLevel(level)
 
 const BelowMinLevel = LogLevel(-1000001)
+"""
+    Debug
+
+Alias for [`LogLevel(-1000)`](@ref LogLevel).
+"""
 const Debug         = LogLevel(   -1000)
+"""
+    Info
+
+Alias for [`LogLevel(0)`](@ref LogLevel).
+"""
 const Info          = LogLevel(       0)
+"""
+    Warn
+
+Alias for [`LogLevel(1000)`](@ref LogLevel).
+"""
 const Warn          = LogLevel(    1000)
+"""
+    Error
+
+Alias for [`LogLevel(2000)`](@ref LogLevel).
+"""
 const Error         = LogLevel(    2000)
 const AboveMaxLevel = LogLevel( 1000001)
 
@@ -202,7 +222,7 @@ There's also some key value pairs which have conventional meaning:
 
 # Examples
 
-```
+```julia
 @debug "Verbose debugging information.  Invisible by default"
 @info  "An informational message"
 @warn  "Something was odd.  You should pay attention"
@@ -254,10 +274,11 @@ _log_record_ids = Set{Symbol}()
 # across versions of the originating module, provided the log generating
 # statement itself doesn't change.
 function log_record_id(_module, level, message, log_kws)
+    @nospecialize
     modname = _module === nothing ?  "" : join(fullname(_module), "_")
     # Use an arbitrarily chosen eight hex digits here. TODO: Figure out how to
     # make the id exactly the same on 32 and 64 bit systems.
-    h = UInt32(hash(string(modname, level, message, log_kws)) & 0xFFFFFFFF)
+    h = UInt32(hash(string(modname, level, message, log_kws)::String) & 0xFFFFFFFF)
     while true
         id = Symbol(modname, '_', string(h, base = 16, pad = 8))
         # _log_record_ids is a registry of log record ids for use during
@@ -274,48 +295,115 @@ end
 
 default_group(file) = Symbol(splitext(basename(file))[1])
 
+function issimple(@nospecialize val)
+    val isa String && return true
+    val isa Symbol && return true
+    val isa QuoteNode && return true
+    val isa Number && return true
+    val isa Char && return true
+    if val isa Expr
+        val.head === :quote && issimple(val.args[1]) && return true
+        val.head === :inert && return true
+    end
+    return false
+end
+function issimplekw(@nospecialize val)
+    if val isa Expr
+        if val.head === :kw
+            val = val.args[2]
+            if val isa Expr && val.head === :escape
+                issimple(val.args[1]) && return true
+            end
+        end
+    end
+    return false
+end
+
 # Generate code for logging macros
 function logmsg_code(_module, file, line, level, message, exs...)
+    @nospecialize
     log_data = process_logmsg_exs(_module, file, line, level, message, exs...)
-    quote
-    let
-        level = $level
-        std_level = convert(LogLevel, level)
-        if std_level >= getindex(_min_enabled_level)
-            group = $(log_data._group)
-            _module = $(log_data._module)
-            logger = current_logger_for_env(std_level, group, _module)
-            if !(logger === nothing)
-                id = $(log_data._id)
-                # Second chance at an early bail-out (before computing the message),
-                # based on arbitrary logger-specific logic.
-                if _invoked_shouldlog(logger, level, _module, group, id)
-                    file = $(log_data._file)
-                    line = $(log_data._line)
-                    try
-                        msg = $(esc(message))
-                        handle_message(
+    if !isa(message, Symbol) && issimple(message) && isempty(log_data.kwargs)
+        logrecord = quote
+            msg = $(message)
+            kwargs = (;)
+            true
+        end
+    elseif issimple(message) && all(issimplekw, log_data.kwargs)
+        # if message and kwargs are just values and variables, we can avoid try/catch
+        # complexity by adding the code for testing the UndefVarError by hand
+        checkerrors = nothing
+        for kwarg in reverse(log_data.kwargs)
+            if isa(kwarg.args[2].args[1], Symbol)
+                checkerrors = Expr(:if, Expr(:isdefined, kwarg.args[2]), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(kwarg.args[2].args[1])))
+            end
+        end
+        if isa(message, Symbol)
+            message = esc(message)
+            checkerrors = Expr(:if, Expr(:isdefined, message), checkerrors, Expr(:call, Expr(:core, :UndefVarError), QuoteNode(message.args[1])))
+        end
+        logrecord = quote
+            let err = $checkerrors
+                if err === nothing
+                    msg = $(message)
+                    kwargs = (;$(log_data.kwargs...))
+                    true
+                else
+                    logging_error(logger, level, _module, group, id, file, line, err, false)
+                    false
+                end
+            end
+        end
+    else
+        logrecord = quote
+            try
+                msg = $(esc(message))
+                kwargs = (;$(log_data.kwargs...))
+                true
+            catch err
+                logging_error(logger, level, _module, group, id, file, line, err, true)
+                false
+            end
+        end
+    end
+    return quote
+        let
+            level = $level
+            std_level = convert(LogLevel, level)
+            if std_level >= _min_enabled_level[]
+                group = $(log_data._group)
+                _module = $(log_data._module)
+                logger = current_logger_for_env(std_level, group, _module)
+                if !(logger === nothing)
+                    id = $(log_data._id)
+                    # Second chance at an early bail-out (before computing the message),
+                    # based on arbitrary logger-specific logic.
+                    if _invoked_shouldlog(logger, level, _module, group, id)
+                        file = $(log_data._file)
+                        if file isa String
+                            file = Base.fixup_stdlib_path(file)
+                        end
+                        line = $(log_data._line)
+                        local msg, kwargs
+                        $(logrecord) && handle_message(
                             logger, level, msg, _module, group, id, file, line;
-                            $(log_data.kwargs...)
-                        )
-                    catch err
-                        logging_error(logger, level, _module, group, id, file, line, err)
+                            kwargs...)
                     end
                 end
             end
+            nothing
         end
-        nothing
-    end
     end
 end
 
 function process_logmsg_exs(_orig_module, _file, _line, level, message, exs...)
+    @nospecialize
     local _group, _id
     _module = _orig_module
     kwargs = Any[]
     for ex in exs
         if ex isa Expr && ex.head === :(=)
-            k,v = ex.args
+            k, v = ex.args
             if !(k isa Symbol)
                 k = Symbol(k)
             end
@@ -352,6 +440,7 @@ function process_logmsg_exs(_orig_module, _file, _line, level, message, exs...)
 end
 
 function default_group_code(file)
+    @nospecialize
     if file isa String && isdefined(Base, :basename)
         QuoteNode(default_group(file))  # precompute if we can
     else
@@ -361,43 +450,38 @@ function default_group_code(file)
 end
 
 
-# Report an error in log message creation (or in the logger itself).
+# Report an error in log message creation
 @noinline function logging_error(logger, level, _module, group, id,
-                                 filepath, line, @nospecialize(err))
+                                 filepath, line, @nospecialize(err), real::Bool)
+    @nospecialize
     if !_invoked_catch_exceptions(logger)
-        rethrow(err)
-    end
-    try
-        msg = "Exception while generating log record in module $_module at $filepath:$line"
-        handle_message(
-            logger, Error, msg, _module, :logevent_error, id, filepath, line;
-            exception=(err,catch_backtrace())
-        )
-    catch err2
-        try
-            # Give up and write to stderr, in three independent calls to
-            # increase the odds of it getting through.
-            print(stderr, "Exception handling log message: ")
-            println(stderr, err)
-            println(stderr, "  module=$_module  file=$filepath  line=$line")
-            println(stderr, "  Second exception: ", err2)
-        catch
-        end
+        real ? rethrow(err) : throw(err)
     end
+    msg = try
+              "Exception while generating log record in module $_module at $filepath:$line"
+          catch ex
+              LazyString("Exception handling log message: ", ex)
+          end
+    bt = real ? catch_backtrace() : backtrace()
+    handle_message(
+        logger, Error, msg, _module, :logevent_error, id, filepath, line;
+        exception=(err,bt))
     nothing
 end
 
 # Log a message. Called from the julia C code; kwargs is in the format
 # Any[key1,val1, ...] for simplicity in construction on the C side.
 function logmsg_shim(level, message, _module, group, id, file, line, kwargs)
-    real_kws = Any[(kwargs[i],kwargs[i+1]) for i in 1:2:length(kwargs)]
+    @nospecialize
+    real_kws = Any[(kwargs[i], kwargs[i+1]) for i in 1:2:length(kwargs)]
     @logmsg(convert(LogLevel, level), message,
             _module=_module, _id=id, _group=group,
             _file=String(file), _line=line, real_kws...)
+    nothing
 end
 
 # Global log limiting mechanism for super fast but inflexible global log limiting.
-const _min_enabled_level = Ref(Debug)
+const _min_enabled_level = Ref{LogLevel}(Debug)
 
 # LogState - a cache of data extracted from the logger, plus the logger itself.
 struct LogState
@@ -413,7 +497,7 @@ function current_logstate()
 end
 
 # helper function to get the current logger, if enabled for the specified message type
-@noinline function current_logger_for_env(std_level::LogLevel, group, _module)
+@noinline Base.@constprop :none function current_logger_for_env(std_level::LogLevel, group, _module)
     logstate = current_logstate()
     if std_level >= logstate.min_enabled_level || env_override_minlevel(group, _module)
         return logstate.logger
@@ -444,7 +528,7 @@ a *global* setting, intended to make debug logging extremely cheap when
 disabled.
 
 # Examples
-```
+```julia
 Logging.disable_logging(Logging.Info) # Disable debug and info
 ```
 """
@@ -455,7 +539,7 @@ end
 let _debug_groups_include::Vector{Symbol} = Symbol[],
     _debug_groups_exclude::Vector{Symbol} = Symbol[],
     _debug_str::String = ""
-global function env_override_minlevel(group, _module)
+global Base.@constprop :none function env_override_minlevel(group, _module)
     debug = get(ENV, "JULIA_DEBUG", "")
     if !(debug === _debug_str)
         _debug_str = debug
@@ -550,21 +634,25 @@ attached to the task.
 """
 current_logger() = current_logstate().logger
 
+const closed_stream = IOBuffer(UInt8[])
+close(closed_stream)
 
 #-------------------------------------------------------------------------------
 # SimpleLogger
 """
-    SimpleLogger(stream=stderr, min_level=Info)
+    SimpleLogger([stream,] min_level=Info)
 
 Simplistic logger for logging all messages with level greater than or equal to
-`min_level` to `stream`.
+`min_level` to `stream`. If stream is closed then messages with log level
+greater or equal to `Warn` will be logged to `stderr` and below to `stdout`.
 """
 struct SimpleLogger <: AbstractLogger
     stream::IO
     min_level::LogLevel
     message_limits::Dict{Any,Int}
 end
-SimpleLogger(stream::IO=stderr, level=Info) = SimpleLogger(stream, level, Dict{Any,Int}())
+SimpleLogger(stream::IO, level=Info) = SimpleLogger(stream, level, Dict{Any,Int}())
+SimpleLogger(level=Info) = SimpleLogger(closed_stream, level)
 
 shouldlog(logger::SimpleLogger, level, _module, group, id) =
     get(logger.message_limits, id, 1) > 0
@@ -573,30 +661,37 @@ min_enabled_level(logger::SimpleLogger) = logger.min_level
 
 catch_exceptions(logger::SimpleLogger) = false
 
-function handle_message(logger::SimpleLogger, level, message, _module, group, id,
-                        filepath, line; maxlog=nothing, kwargs...)
-    if maxlog !== nothing && maxlog isa Integer
-        remaining = get!(logger.message_limits, id, maxlog)
+function handle_message(logger::SimpleLogger, level::LogLevel, message, _module, group, id,
+                        filepath, line; kwargs...)
+    @nospecialize
+    maxlog = get(kwargs, :maxlog, nothing)
+    if maxlog isa Core.BuiltinInts
+        remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
         logger.message_limits[id] = remaining - 1
         remaining > 0 || return
     end
     buf = IOBuffer()
-    iob = IOContext(buf, logger.stream)
+    stream = logger.stream
+    if !isopen(stream)
+        stream = stderr
+    end
+    iob = IOContext(buf, stream)
     levelstr = level == Warn ? "Warning" : string(level)
-    msglines = split(chomp(string(message)), '\n')
-    println(iob, "┌ ", levelstr, ": ", msglines[1])
-    for i in 2:length(msglines)
-        println(iob, "│ ", msglines[i])
+    msglines = eachsplit(chomp(convert(String, string(message))::String), '\n')
+    msg1, rest = Iterators.peel(msglines)
+    println(iob, "┌ ", levelstr, ": ", msg1)
+    for msg in rest
+        println(iob, "│ ", msg)
     end
     for (key, val) in kwargs
+        key === :maxlog && continue
         println(iob, "│   ", key, " = ", val)
     end
-    println(iob, "└ @ ", something(_module, "nothing"), " ",
-            something(filepath, "nothing"), ":", something(line, "nothing"))
-    write(logger.stream, take!(buf))
+    println(iob, "└ @ ", _module, " ", filepath, ":", line)
+    write(stream, take!(buf))
     nothing
 end
 
-_global_logstate = LogState(SimpleLogger(Core.stderr, CoreLogging.Info))
+_global_logstate = LogState(SimpleLogger())
 
 end # CoreLogging
diff --git a/base/math.jl b/base/math.jl
index ab77d95b97df6d..9550a0a54b4963 100644
--- a/base/math.jl
+++ b/base/math.jl
@@ -18,7 +18,7 @@ export sin, cos, sincos, tan, sinh, cosh, tanh, asin, acos, atan,
 import .Base: log, exp, sin, cos, tan, sinh, cosh, tanh, asin,
              acos, atan, asinh, acosh, atanh, sqrt, log2, log10,
              max, min, minmax, ^, exp2, muladd, rem,
-             exp10, expm1, log1p
+             exp10, expm1, log1p, @constprop, @assume_effects
 
 using .Base: sign_mask, exponent_mask, exponent_one,
             exponent_half, uinttype, significand_mask,
@@ -30,32 +30,56 @@ using Core.Intrinsics: sqrt_llvm
 using .Base: IEEEFloat
 
 @noinline function throw_complex_domainerror(f::Symbol, x)
-    throw(DomainError(x, string("$f will only return a complex result if called with a ",
-                                "complex argument. Try $f(Complex(x)).")))
+    throw(DomainError(x,
+        LazyString(f," will only return a complex result if called with a complex argument. Try ", f,"(Complex(x)).")))
 end
 @noinline function throw_exp_domainerror(x)
-    throw(DomainError(x, string("Exponentiation yielding a complex result requires a ",
-                                "complex argument.\nReplace x^y with (x+0im)^y, ",
-                                "Complex(x)^y, or similar.")))
+    throw(DomainError(x, LazyString(
+        "Exponentiation yielding a complex result requires a ",
+        "complex argument.\nReplace x^y with (x+0im)^y, ",
+        "Complex(x)^y, or similar.")))
 end
 
 # non-type specific math functions
 
+@inline function two_mul(x::Float64, y::Float64)
+    if Core.Intrinsics.have_fma(Float64)
+        xy = x*y
+        return xy, fma(x, y, -xy)
+    end
+    return Base.twomul(x,y)
+end
+
+@inline function two_mul(x::T, y::T) where T<: Union{Float16, Float32}
+    if Core.Intrinsics.have_fma(T)
+        xy = x*y
+        return xy, fma(x, y, -xy)
+    end
+    xy = widen(x)*y
+    Txy = T(xy)
+    return Txy, T(xy-Txy)
+end
+
 """
     clamp(x, lo, hi)
 
 Return `x` if `lo <= x <= hi`. If `x > hi`, return `hi`. If `x < lo`, return `lo`. Arguments
 are promoted to a common type.
 
+See also [`clamp!`](@ref), [`min`](@ref), [`max`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` as the first argument requires at least Julia 1.3.
+
 # Examples
 ```jldoctest
-julia> clamp.([pi, 1.0, big(10.)], 2., 9.)
+julia> clamp.([pi, 1.0, big(10)], 2.0, 9.0)
 3-element Vector{BigFloat}:
  3.141592653589793238462643383279502884197169399375105820974944592307816406286198
  2.0
  9.0
 
-julia> clamp.([11,8,5],10,6) # an example where lo > hi
+julia> clamp.([11, 8, 5], 10, 6)  # an example where lo > hi
 3-element Vector{Int64}:
   6
   6
@@ -73,12 +97,18 @@ clamp(x::X, lo::L, hi::H) where {X,L,H} =
 
 Clamp `x` between `typemin(T)` and `typemax(T)` and convert the result to type `T`.
 
+See also [`trunc`](@ref).
+
 # Examples
 ```jldoctest
 julia> clamp(200, Int8)
 127
+
 julia> clamp(-200, Int8)
 -128
+
+julia> trunc(Int, 4pi^2)
+39
 ```
 """
 clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
@@ -89,6 +119,22 @@ clamp(x, ::Type{T}) where {T<:Integer} = clamp(x, typemin(T), typemax(T)) % T
 
 Restrict values in `array` to the specified range, in-place.
 See also [`clamp`](@ref).
+
+!!! compat "Julia 1.3"
+    `missing` entries in `array` require at least Julia 1.3.
+
+# Examples
+```jldoctest
+julia> row = collect(-4:4)';
+
+julia> clamp!(row, 0, Inf)
+1×9 adjoint(::Vector{Int64}) with eltype Int64:
+ 0  0  0  0  0  1  2  3  4
+
+julia> clamp.((-4:4)', 0, Inf)
+1×9 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0  1.0  2.0  3.0  4.0
+```
 """
 function clamp!(x::AbstractArray, lo, hi)
     @inbounds for i in eachindex(x)
@@ -97,6 +143,15 @@ function clamp!(x::AbstractArray, lo, hi)
     x
 end
 
+"""
+    clamp(x::Integer, r::AbstractUnitRange)
+
+Clamp `x` to lie within range `r`.
+
+!!! compat "Julia 1.6"
+     This method requires at least Julia 1.6.
+"""
+clamp(x::Integer, r::AbstractUnitRange{<:Integer}) = clamp(x, first(r), last(r))
 
 """
     evalpoly(x, p)
@@ -196,6 +251,8 @@ end
     @horner(x, p...)
 
 Evaluate `p[1] + x * (p[2] + x * (....))`, i.e. a polynomial via Horner's rule.
+
+See also [`@evalpoly`](@ref), [`evalpoly`](@ref).
 """
 macro horner(x, p...)
      xesc, pesc = esc(x), esc.(p)
@@ -215,6 +272,8 @@ that is, the coefficients are given in ascending order by power of `z`.  This ma
 to efficient inline code that uses either Horner's method or, for complex `z`, a more
 efficient Goertzel-like algorithm.
 
+See also [`evalpoly`](@ref).
+
 # Examples
 ```jldoctest
 julia> @evalpoly(3, 1, 0, 1)
@@ -232,6 +291,19 @@ macro evalpoly(z, p...)
     :(evalpoly($zesc, ($(pesc...),)))
 end
 
+# polynomial evaluation using compensated summation.
+# much more accurate, especially when lo can be combined with other rounding errors
+@inline function exthorner(x, p::Tuple)
+    hi, lo = p[end], zero(x)
+    for i in length(p)-1:-1:1
+        pi = p[i]
+        prod, err = two_mul(hi,x)
+        hi = pi+prod
+        lo = fma(lo, x, prod - (hi - pi) + err)
+    end
+    return hi, lo
+end
+
 """
     rad2deg(x)
 
@@ -250,6 +322,8 @@ rad2deg(z::AbstractFloat) = z * (180 / oftype(z, pi))
 
 Convert `x` from degrees to radians.
 
+See also: [`rad2deg`](@ref), [`sind`](@ref).
+
 # Examples
 ```jldoctest
 julia> deg2rad(90)
@@ -356,64 +430,6 @@ Compute the inverse hyperbolic sine of `x`.
 """
 asinh(x::Number)
 
-"""
-    expm1(x)
-
-Accurately compute ``e^x-1``.
-"""
-expm1(x)
-for f in (:exp2, :expm1)
-    @eval begin
-        ($f)(x::Float64) = ccall(($(string(f)),libm), Float64, (Float64,), x)
-        ($f)(x::Float32) = ccall(($(string(f,"f")),libm), Float32, (Float32,), x)
-        ($f)(x::Real) = ($f)(float(x))
-    end
-end
-
-"""
-    exp2(x)
-
-Compute the base 2 exponential of `x`, in other words ``2^x``.
-
-# Examples
-```jldoctest
-julia> exp2(5)
-32.0
-```
-"""
-exp2(x::AbstractFloat) = 2^x
-
-"""
-    exp10(x)
-
-Compute the base 10 exponential of `x`, in other words ``10^x``.
-
-# Examples
-```jldoctest
-julia> exp10(2)
-100.0
-```
-"""
-exp10(x::AbstractFloat) = 10^x
-
-for f in (:sinh, :cosh, :tanh, :atan, :asinh, :exp, :expm1)
-    @eval ($f)(x::AbstractFloat) = error("not implemented for ", typeof(x))
-end
-
-# functions with special cases for integer arguments
-@inline function exp2(x::Base.BitInteger)
-    if x > 1023
-        Inf64
-    elseif x <= -1023
-        # if -1073 < x <= -1023 then Result will be a subnormal number
-        # Hex literal with padding must be used to work on 32bit machine
-        reinterpret(Float64, 0x0000_0000_0000_0001 << ((x + 1074) % UInt))
-    else
-        # We will cast everything to Int64 to avoid errors in case of Int128
-        # If x is a Int128, and is outside the range of Int64, then it is not -1023<x<=1023
-        reinterpret(Float64, (exponent_bias(Float64) + (x % Int64)) << (significand_bits(Float64) % UInt))
-    end
-end
 
 # utility for converting NaN return to DomainError
 # the branch in nan_dom_err prevents its callers from inlining, so be sure to force it
@@ -425,6 +441,8 @@ end
     sin(x)
 
 Compute sine of `x`, where `x` is in radians.
+
+See also [`sind`](@ref), [`sinpi`](@ref), [`sincos`](@ref), [`cis`](@ref).
 """
 sin(x::Number)
 
@@ -432,6 +450,8 @@ sin(x::Number)
     cos(x)
 
 Compute cosine of `x`, where `x` is in radians.
+
+See also [`cosd`](@ref), [`cospi`](@ref), [`sincos`](@ref), [`cis`](@ref).
 """
 cos(x::Number)
 
@@ -476,6 +496,8 @@ atanh(x::Number)
 Compute the natural logarithm of `x`. Throws [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments. Use complex negative arguments to obtain complex results.
 
+See also [`log1p`](@ref), [`log2`](@ref), [`log10`](@ref).
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> log(2)
@@ -497,6 +519,8 @@ log(x::Number)
 Compute the logarithm of `x` to base 2. Throws [`DomainError`](@ref) for negative
 [`Real`](@ref) arguments.
 
+See also: [`exp2`](@ref), [`ldexp`](@ref), [`ispow2`](@ref).
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> log2(4)
@@ -507,9 +531,9 @@ julia> log2(10)
 
 julia> log2(-2)
 ERROR: DomainError with -2.0:
-NaN result for non-NaN input.
+log2 will only return a complex result if called with a complex argument. Try log2(Complex(x)).
 Stacktrace:
- [1] nan_dom_err at ./math.jl:325 [inlined]
+ [1] throw_complex_domainerror(f::Symbol, x::Float64) at ./math.jl:31
 [...]
 ```
 """
@@ -531,9 +555,9 @@ julia> log10(2)
 
 julia> log10(-2)
 ERROR: DomainError with -2.0:
-NaN result for non-NaN input.
+log10 will only return a complex result if called with a complex argument. Try log10(Complex(x)).
 Stacktrace:
- [1] nan_dom_err at ./math.jl:325 [inlined]
+ [1] throw_complex_domainerror(f::Symbol, x::Float64) at ./math.jl:31
 [...]
 ```
 """
@@ -562,13 +586,6 @@ Stacktrace:
 ```
 """
 log1p(x)
-for f in (:log2, :log10)
-    @eval begin
-        @inline ($f)(x::Float64) = nan_dom_err(ccall(($(string(f)), libm), Float64, (Float64,), x), x)
-        @inline ($f)(x::Float32) = nan_dom_err(ccall(($(string(f, "f")), libm), Float32, (Float32,), x), x)
-        @inline ($f)(x::Real) = ($f)(float(x))
-    end
-end
 
 @inline function sqrt(x::Union{Float32,Float64})
     x < zero(x) && throw_complex_domainerror(:sqrt, x)
@@ -581,6 +598,8 @@ end
 Return ``\\sqrt{x}``. Throws [`DomainError`](@ref) for negative [`Real`](@ref) arguments.
 Use complex negative arguments instead. The prefix operator `√` is equivalent to `sqrt`.
 
+See also: [`hypot`](@ref).
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> sqrt(big(81))
@@ -595,9 +614,16 @@ Stacktrace:
 
 julia> sqrt(big(complex(-81)))
 0.0 + 9.0im
+
+julia> .√(1:4)
+4-element Vector{Float64}:
+ 1.0
+ 1.4142135623730951
+ 1.7320508075688772
+ 2.0
 ```
 """
-sqrt(x::Real) = sqrt(float(x))
+sqrt(x)
 
 """
     hypot(x, y)
@@ -607,9 +633,15 @@ Compute the hypotenuse ``\\sqrt{|x|^2+|y|^2}`` avoiding overflow and underflow.
 This code is an implementation of the algorithm described in:
 An Improved Algorithm for `hypot(a,b)`
 by Carlos F. Borges
-The article is available online at ArXiv at the link
+The article is available online at arXiv at the link
   https://arxiv.org/abs/1904.09481
 
+    hypot(x...)
+
+Compute the hypotenuse ``\\sqrt{\\sum |x_i|^2}`` avoiding overflow and underflow.
+
+See also `norm` in the [`LinearAlgebra`](@ref man-linalg) standard library.
+
 # Examples
 ```jldoctest; filter = r"Stacktrace:(\\n \\[[0-9]+\\].*)*"
 julia> a = Int64(10)^10;
@@ -625,85 +657,103 @@ Stacktrace:
 
 julia> hypot(3, 4im)
 5.0
+
+julia> hypot(-5.7)
+5.7
+
+julia> hypot(3, 4im, 12.0)
+13.0
+
+julia> using LinearAlgebra
+
+julia> norm([a, a, a, a]) == hypot(a, a, a, a)
+true
 ```
 """
-hypot(x::Number, y::Number) = hypot(promote(x, y)...)
-hypot(x::Complex, y::Complex) = hypot(abs(x), abs(y))
-hypot(x::T, y::T) where {T<:Real} = hypot(float(x), float(y))
-function hypot(x::T, y::T) where {T<:Number}
-    if !iszero(x)
-        z = y/x
-        z2 = z*z
+hypot(x::Number) = abs(float(x))
+hypot(x::Number, y::Number) = _hypot(promote(float(x), y)...)
+hypot(x::Number, y::Number, xs::Number...) = _hypot(promote(float(x), y, xs...))
+function _hypot(x, y)
+    # preserves unit
+    axu = abs(x)
+    ayu = abs(y)
 
-        abs(x) * sqrt(oneunit(z2) + z2)
-    else
-        abs(y)
-    end
-end
+    # unitless
+    ax = axu / oneunit(axu)
+    ay = ayu / oneunit(ayu)
 
-function hypot(x::T, y::T) where T<:AbstractFloat
     # Return Inf if either or both inputs is Inf (Compliance with IEEE754)
-    if isinf(x) || isinf(y)
-        return T(Inf)
+    if isinf(ax) || isinf(ay)
+        return typeof(axu)(Inf)
     end
 
     # Order the operands
-    ax,ay = abs(x), abs(y)
     if ay > ax
-        ax,ay = ay,ax
+        axu, ayu = ayu, axu
+        ax, ay = ay, ax
     end
 
     # Widely varying operands
-    if ay <= ax*sqrt(eps(T)/2)  #Note: This also gets ay == 0
-        return ax
+    if ay <= ax*sqrt(eps(typeof(ax))/2)  #Note: This also gets ay == 0
+        return axu
     end
 
     # Operands do not vary widely
-    scale = eps(T)*sqrt(floatmin(T))  #Rescaling constant
-    if ax > sqrt(floatmax(T)/2)
+    scale = eps(typeof(ax))*sqrt(floatmin(ax))  #Rescaling constant
+    if ax > sqrt(floatmax(ax)/2)
         ax = ax*scale
         ay = ay*scale
         scale = inv(scale)
-    elseif ay < sqrt(floatmin(T))
+    elseif ay < sqrt(floatmin(ax))
         ax = ax/scale
         ay = ay/scale
     else
-        scale = one(scale)
+        scale = oneunit(scale)
     end
-    h = sqrt(muladd(ax,ax,ay*ay))
+    h = sqrt(muladd(ax, ax, ay*ay))
     # This branch is correctly rounded but requires a native hardware fma.
-    if Base.Math.FMA_NATIVE
+    if Core.Intrinsics.have_fma(typeof(h))
         hsquared = h*h
         axsquared = ax*ax
-        h -= (fma(-ay,ay,hsquared-axsquared) + fma(h,h,-hsquared) - fma(ax,ax,-axsquared))/(2*h)
+        h -= (fma(-ay, ay, hsquared-axsquared) + fma(h, h,-hsquared) - fma(ax, ax, -axsquared))/(2*h)
     # This branch is within one ulp of correctly rounded.
     else
         if h <= 2*ay
             delta = h-ay
-            h -= muladd(delta,delta-2*(ax-ay),ax*(2*delta - ax))/(2*h)
+            h -= muladd(delta, delta-2*(ax-ay), ax*(2*delta - ax))/(2*h)
         else
             delta = h-ax
-            h -= muladd(delta,delta,muladd(ay,(4*delta-ay),2*delta*(ax-2*ay)))/(2*h)
+            h -= muladd(delta, delta, muladd(ay, (4*delta - ay), 2*delta*(ax - 2*ay)))/(2*h)
         end
     end
-    return h*scale
+    return h*scale*oneunit(axu)
+end
+@inline function _hypot(x::Float32, y::Float32)
+    if isinf(x) || isinf(y)
+        return Inf32
+    end
+    _x, _y = Float64(x), Float64(y)
+    return Float32(sqrt(muladd(_x, _x, _y*_y)))
+end
+@inline function _hypot(x::Float16, y::Float16)
+    if isinf(x) || isinf(y)
+        return Inf16
+    end
+    _x, _y = Float32(x), Float32(y)
+    return Float16(sqrt(muladd(_x, _x, _y*_y)))
+end
+_hypot(x::ComplexF16, y::ComplexF16) = Float16(_hypot(ComplexF32(x), ComplexF32(y)))
+
+function _hypot(x::NTuple{N,<:Number}) where {N}
+    maxabs = maximum(abs, x)
+    if isnan(maxabs) && any(isinf, x)
+        return typeof(maxabs)(Inf)
+    elseif (iszero(maxabs) || isinf(maxabs))
+        return maxabs
+    else
+        return maxabs * sqrt(sum(y -> abs2(y / maxabs), x))
+    end
 end
-
-"""
-    hypot(x...)
-
-Compute the hypotenuse ``\\sqrt{\\sum |x_i|^2}`` avoiding overflow and underflow.
-
-# Examples
-```jldoctest
-julia> hypot(-5.7)
-5.7
-
-julia> hypot(3, 4im, 12.0)
-13.0
-```
-"""
-hypot(x::Number...) = sqrt(sum(abs2(y) for y in x))
 
 atan(y::Real, x::Real) = atan(promote(float(y),float(x))...)
 atan(y::T, x::T) where {T<:AbstractFloat} = Base.no_op_err("atan", T)
@@ -735,7 +785,7 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
     xu = reinterpret(Unsigned, x)
     xs = xu & ~sign_mask(T)
     xs >= exponent_mask(T) && return x # NaN or Inf
-    k = Int(xs >> significand_bits(T))
+    k = (xs >> significand_bits(T)) % Int
     if k == 0 # x is subnormal
         xs == 0 && return x # +-0
         m = leading_zeros(xs) - exponent_bits(T)
@@ -768,7 +818,8 @@ function ldexp(x::T, e::Integer) where T<:IEEEFloat
             return flipsign(T(0.0), x)
         end
         k += significand_bits(T)
-        z = T(2.0)^-significand_bits(T)
+        # z = T(2.0) ^ (-significand_bits(T))
+        z = reinterpret(T, rem(exponent_bias(T)-significand_bits(T), uinttype(T)) << significand_bits(T))
         xu = (xu & ~exponent_mask(T)) | (rem(k, uinttype(T)) << significand_bits(T))
         return z*reinterpret(T, xu)
     end
@@ -780,10 +831,19 @@ ldexp(x::Float16, q::Integer) = Float16(ldexp(Float32(x), q))
 
 Get the exponent of a normalized floating-point number.
 Returns the largest integer `y` such that `2^y ≤ abs(x)`.
+
+# Examples
+```jldoctest
+julia> exponent(6.5)
+2
+
+julia> exponent(16.0)
+4
+```
 """
 function exponent(x::T) where T<:IEEEFloat
     @noinline throw1(x) = throw(DomainError(x, "Cannot be NaN or Inf."))
-    @noinline throw2(x) = throw(DomainError(x, "Cannot be subnormal converted to 0."))
+    @noinline throw2(x) = throw(DomainError(x, "Cannot be ±0.0."))
     xs = reinterpret(Unsigned, x) & ~sign_mask(T)
     xs >= exponent_mask(T) && throw1(x)
     k = Int(xs >> significand_bits(T))
@@ -795,20 +855,42 @@ function exponent(x::T) where T<:IEEEFloat
     return k - exponent_bias(T)
 end
 
+# Like exponent, but assumes the nothrow precondition. For
+# internal use only. Could be written as
+# @assume_effects :nothrow exponent()
+# but currently this form is easier on the compiler.
+function _exponent_finite_nonzero(x::T) where T<:IEEEFloat
+    # @precond :nothrow !isnan(x) && !isinf(x) && !iszero(x)
+    xs = reinterpret(Unsigned, x) & ~sign_mask(T)
+    k = rem(xs >> significand_bits(T), Int)
+    if k == 0 # x is subnormal
+        m = leading_zeros(xs) - exponent_bits(T)
+        k = 1 - m
+    end
+    return k - exponent_bias(T)
+end
+
 """
     significand(x)
 
-Extract the `significand(s)` (a.k.a. mantissa), in binary representation, of a
-floating-point number. If `x` is a non-zero finite number, then the result will be
-a number of the same type on the interval ``[1,2)``. Otherwise `x` is returned.
+Extract the significand (a.k.a. mantissa) of a floating-point number. If `x` is
+a non-zero finite number, then the result will be a number of the same type and
+sign as `x`, and whose absolute value is on the interval ``[1,2)``. Otherwise
+`x` is returned.
 
 # Examples
 ```jldoctest
-julia> significand(15.2)/15.2
-0.125
+julia> significand(15.2)
+1.9
+
+julia> significand(-15.2)
+-1.9
+
+julia> significand(-15.2) * 2^3
+-15.2
 
-julia> significand(15.2)*8
-15.2
+julia> significand(-Inf), significand(Inf), significand(NaN)
+(-Inf, Inf, NaN)
 ```
 """
 function significand(x::T) where T<:IEEEFloat
@@ -830,6 +912,11 @@ end
 
 Return `(x,exp)` such that `x` has a magnitude in the interval ``[1/2, 1)`` or 0,
 and `val` is equal to ``x \\times 2^{exp}``.
+# Examples
+```jldoctest
+julia> frexp(12.8)
+(0.8, 4)
+```
 """
 function frexp(x::T) where T<:IEEEFloat
     xu = reinterpret(Unsigned, x)
@@ -848,11 +935,39 @@ function frexp(x::T) where T<:IEEEFloat
     return reinterpret(T, xu), k
 end
 
-rem(x::Float64, y::Float64, ::RoundingMode{:Nearest}) =
-    ccall((:remainder, libm),Float64,(Float64,Float64),x,y)
-rem(x::Float32, y::Float32, ::RoundingMode{:Nearest}) =
-    ccall((:remainderf, libm),Float32,(Float32,Float32),x,y)
-rem(x::Float16, y::Float16, r::RoundingMode{:Nearest}) = Float16(rem(Float32(x), Float32(y), r))
+# NOTE: This `rem` method is adapted from the msun `remainder` and `remainderf`
+# functions, which are under the following license:
+#
+# Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+#
+# Developed at SunSoft, a Sun Microsystems, Inc. business.
+# Permission to use, copy, modify, and distribute this
+# software is freely granted, provided that this notice
+# is preserved.
+function rem(x::T, p::T, ::RoundingMode{:Nearest}) where T<:IEEEFloat
+    (iszero(p) || !isfinite(x) || isnan(p)) && return T(NaN)
+    x == p && return copysign(zero(T), x)
+    oldx = x
+    x = abs(rem(x, 2p))  # 2p may overflow but that's okay
+    p = abs(p)
+    if p < 2 * floatmin(T)  # Check whether dividing p by 2 will underflow
+        if 2x > p
+            x -= p
+            if 2x >= p
+                x -= p
+            end
+        end
+    else
+        p_half = p / 2
+        if x > p_half
+            x -= p
+            if x >= p_half
+                x -= p
+            end
+        end
+    end
+    return flipsign(x, oldx)
+end
 
 
 """
@@ -872,47 +987,94 @@ julia> modf(-3.5)
 """
 modf(x) = isinf(x) ? (flipsign(zero(x), x), x) : (rem(x, one(x)), trunc(x))
 
-function modf(x::Float32)
-    temp = Ref{Float32}()
-    f = ccall((:modff, libm), Float32, (Float32, Ptr{Float32}), x, temp)
-    f, temp[]
+function modf(x::T) where T<:IEEEFloat
+    isinf(x) && return (copysign(zero(T), x), x)
+    ix = trunc(x)
+    rx = copysign(x - ix, x)
+    return (rx, ix)
 end
 
-function modf(x::Float64)
-    temp = Ref{Float64}()
-    f = ccall((:modf, libm), Float64, (Float64, Ptr{Float64}), x, temp)
-    f, temp[]
+# @constprop aggressive to help the compiler see the switch between the integer and float
+# variants for callers with constant `y`
+@constprop :aggressive function ^(x::Float64, y::Float64)
+    yint = unsafe_trunc(Int, y) # Note, this is actually safe since julia freezes the result
+    y == yint && return x^yint
+    #numbers greater than 2*inv(eps(T)) must be even, and the pow will overflow
+    y >= 2*inv(eps()) && return x^(typemax(Int64)-1)
+    xu = reinterpret(UInt64, x)
+    x<0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
+    x === 1.0 && return 1.0
+    x==0 && return abs(y)*Inf*(!(y>0))
+    !isfinite(x) && return x*(y>0 || isnan(x))           # x is inf or NaN
+    if xu < (UInt64(1)<<52) # x is subnormal
+        xu = reinterpret(UInt64, x * 0x1p52) # normalize x
+        xu &= ~sign_mask(Float64)
+        xu -= UInt64(52) << 52 # mess with the exponent
+    end
+    return pow_body(xu, y)
 end
 
-@inline function ^(x::Float64, y::Float64)
-    z = ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, y)
-    if isnan(z) & !isnan(x+y)
-        throw_exp_domainerror(x)
-    end
-    z
+@inline function pow_body(xu::UInt64, y::Float64)
+    logxhi,logxlo = Base.Math._log_ext(xu)
+    xyhi, xylo = two_mul(logxhi,y)
+    xylo = muladd(logxlo, y, xylo)
+    hi = xyhi+xylo
+    return Base.Math.exp_impl(hi, xylo-(hi-xyhi), Val(:ℯ))
 end
-@inline function ^(x::Float32, y::Float32)
-    z = ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, y)
-    if isnan(z) & !isnan(x+y)
-        throw_exp_domainerror(x)
-    end
-    z
+
+@constprop :aggressive function ^(x::T, y::T) where T <: Union{Float16, Float32}
+    yint = unsafe_trunc(Int64, y) # Note, this is actually safe since julia freezes the result
+    y == yint && return x^yint
+    #numbers greater than 2*inv(eps(T)) must be even, and the pow will overflow
+    y >= 2*inv(eps(T)) && return x^(typemax(Int64)-1)
+    x < 0 && y > -4e18 && throw_exp_domainerror(x) # |y| is small enough that y isn't an integer
+    return pow_body(x, y)
+end
+
+@inline function pow_body(x::T, y::T) where T <: Union{Float16, Float32}
+    x == 1 && return one(T)
+    !isfinite(x) && return x*(y>0 || isnan(x))
+    x==0 && return abs(y)*T(Inf)*(!(y>0))
+    return T(exp2(log2(abs(widen(x))) * y))
+end
+
+# compensated power by squaring
+@constprop :aggressive @inline function ^(x::Float64, n::Integer)
+    n == 0 && return one(x)
+    return pow_body(x, n)
 end
-@inline function ^(x::Float64, y::Integer)
-    y == -1 && return inv(x)
-    y == 0 && return one(x)
-    y == 1 && return x
-    y == 2 && return x*x
-    y == 3 && return x*x*x
-    ccall("llvm.pow.f64", llvmcall, Float64, (Float64, Float64), x, Float64(y))
+
+@assume_effects :terminates_locally @noinline function pow_body(x::Float64, n::Integer)
+    y = 1.0
+    xnlo = ynlo = 0.0
+    n == 3 && return x*x*x # keep compatibility with literal_pow
+    if n < 0
+        rx = inv(x)
+        n==-2 && return rx*rx #keep compatability with literal_pow
+        isfinite(x) && (xnlo = -fma(x, rx, -1.) * rx)
+        x = rx
+        n = -n
+    end
+    while n > 1
+        if n&1 > 0
+            err = muladd(y, xnlo, x*ynlo)
+            y, ynlo = two_mul(x,y)
+            ynlo += err
+        end
+        err = x*2*xnlo
+        x, xnlo = two_mul(x, x)
+        xnlo += err
+        n >>>= 1
+    end
+    !isfinite(x) && return x*y
+    return muladd(x, y, muladd(y, xnlo, x*ynlo))
 end
-@inline function ^(x::Float32, y::Integer)
-    y == -1 && return inv(x)
-    y == 0 && return one(x)
-    y == 1 && return x
-    y == 2 && return x*x
-    y == 3 && return x*x*x
-    ccall("llvm.pow.f32", llvmcall, Float32, (Float32, Float32), x, Float32(y))
+
+function ^(x::Float32, n::Integer)
+    n == -2 && return (i=inv(x); i*i)
+    n == 3 && return x*x*x #keep compatibility with literal_pow
+    n < 0 && return Float32(Base.power_by_squaring(inv(Float64(x)),-n))
+    Float32(Base.power_by_squaring(Float64(x),n))
 end
 @inline ^(x::Float16, y::Integer) = Float16(Float32(x) ^ y)
 @inline literal_pow(::typeof(^), x::Float16, ::Val{p}) where {p} = Float16(literal_pow(^,Float32(x),Val(p)))
@@ -1138,24 +1300,6 @@ julia> 3 * 2 + 1
 """
 muladd(x,y,z) = x*y+z
 
-# Float16 definitions
-
-for func in (:sin,:cos,:tan,:asin,:acos,:atan,:sinh,:cosh,:tanh,:asinh,:acosh,
-             :atanh,:exp,:exp2,:exp10,:log,:log2,:log10,:sqrt,:lgamma,:log1p)
-    @eval begin
-        $func(a::Float16) = Float16($func(Float32(a)))
-        $func(a::ComplexF16) = ComplexF16($func(ComplexF32(a)))
-    end
-end
-
-for func in (:atan,:hypot)
-    @eval begin
-        $func(a::Float16,b::Float16) = Float16($func(Float32(a),Float32(b)))
-    end
-end
-
-cbrt(a::Float16) = Float16(cbrt(Float32(a)))
-sincos(a::Float16) = Float16.(sincos(Float32(a)))
 
 # helper functions for Libm functionality
 
@@ -1184,20 +1328,50 @@ Return positive part of the high word of `x` as a `UInt32`.
 # More special functions
 include("special/cbrt.jl")
 include("special/exp.jl")
-include("special/exp10.jl")
-include("special/ldexp_exp.jl")
 include("special/hyperbolic.jl")
 include("special/trig.jl")
 include("special/rem_pio2.jl")
 include("special/log.jl")
 
-# `missing` definitions for functions in this module
-for f in (:(acos), :(acosh), :(asin), :(asinh), :(atan), :(atanh),
-          :(sin), :(sinh), :(cos), :(cosh), :(tan), :(tanh),
-          :(exp), :(exp2), :(expm1), :(log), :(log10), :(log1p),
-          :(log2), :(exponent), :(sqrt))
+
+# Float16 definitions
+
+for func in (:sin,:cos,:tan,:asin,:acos,:atan,:cosh,:tanh,:asinh,:acosh,
+             :atanh,:log,:log2,:log10,:sqrt,:lgamma,:log1p)
+    @eval begin
+        $func(a::Float16) = Float16($func(Float32(a)))
+        $func(a::ComplexF16) = ComplexF16($func(ComplexF32(a)))
+    end
+end
+
+for func in (:exp,:exp2,:exp10,:sinh)
+     @eval $func(a::ComplexF16) = ComplexF16($func(ComplexF32(a)))
+end
+
+
+atan(a::Float16,b::Float16) = Float16(atan(Float32(a),Float32(b)))
+sincos(a::Float16) = Float16.(sincos(Float32(a)))
+
+for f in (:sin, :cos, :tan, :asin, :atan, :acos,
+          :sinh, :cosh, :tanh, :asinh, :acosh, :atanh,
+          :exp, :exp2, :exp10, :expm1, :log, :log2, :log10, :log1p,
+          :exponent, :sqrt, :cbrt)
+    @eval function ($f)(x::Real)
+        xf = float(x)
+        x === xf && throw(MethodError($f, (x,)))
+        return ($f)(xf)
+    end
     @eval $(f)(::Missing) = missing
 end
+
+for f in (:atan, :hypot, :log)
+    @eval $(f)(::Missing, ::Missing) = missing
+    @eval $(f)(::Number, ::Missing) = missing
+    @eval $(f)(::Missing, ::Number) = missing
+end
+
+exp2(x::AbstractFloat) = 2^x
+exp10(x::AbstractFloat) = 10^x
 clamp(::Missing, lo, hi) = missing
 
 end # module
diff --git a/base/mathconstants.jl b/base/mathconstants.jl
index a3d1be99becbb5..3bb4bb52ad07f7 100644
--- a/base/mathconstants.jl
+++ b/base/mathconstants.jl
@@ -23,10 +23,17 @@ Base.@irrational catalan  0.91596559417721901505  catalan
 
 The constant pi.
 
+Unicode `π` can be typed by writing `\\pi` then pressing tab in the Julia REPL, and in many editors.
+
+See also: [`sinpi`](@ref), [`sincospi`](@ref), [`deg2rad`](@ref).
+
 # Examples
 ```jldoctest
 julia> pi
 π = 3.1415926535897...
+
+julia> 1/2pi
+0.15915494309189535
 ```
 """
 π, const pi = π
@@ -37,10 +44,20 @@ julia> pi
 
 The constant ℯ.
 
+Unicode `ℯ` can be typed by writing `\\euler` and pressing tab in the Julia REPL, and in many editors.
+
+See also: [`exp`](@ref), [`cis`](@ref), [`cispi`](@ref).
+
 # Examples
 ```jldoctest
 julia> ℯ
 ℯ = 2.7182818284590...
+
+julia> log(ℯ)
+1
+
+julia> ℯ^(im)π ≈ -1
+true
 ```
 """
 ℯ, const e = ℯ
@@ -55,6 +72,11 @@ Euler's constant.
 ```jldoctest
 julia> Base.MathConstants.eulergamma
 γ = 0.5772156649015...
+
+julia> dx = 10^-6;
+
+julia> sum(-exp(-x) * log(x) for x in dx:dx:100) * dx
+0.5772078382499133
 ```
 """
 γ, const eulergamma = γ
@@ -69,6 +91,9 @@ The golden ratio.
 ```jldoctest
 julia> Base.MathConstants.golden
 φ = 1.6180339887498...
+
+julia> (2ans - 1)^2 ≈ 5
+true
 ```
 """
 φ, const golden = φ
@@ -82,6 +107,9 @@ Catalan's constant.
 ```jldoctest
 julia> Base.MathConstants.catalan
 catalan = 0.9159655941772...
+
+julia> sum(log(x)/(1+x^2) for x in 1:0.01:10^6) * 0.01
+0.9159466120554123
 ```
 """
 catalan
@@ -95,4 +123,10 @@ Base.literal_pow(::typeof(^), ::Irrational{:ℯ}, ::Val{p}) where {p} = exp(p)
 Base.log(::Irrational{:ℯ}) = 1 # use 1 to correctly promote expressions like log(x)/log(ℯ)
 Base.log(::Irrational{:ℯ}, x::Number) = log(x)
 
+Base.sin(::Irrational{:π}) = 0.0
+Base.cos(::Irrational{:π}) = -1.0
+Base.sincos(::Irrational{:π}) = (0.0, -1.0)
+Base.tan(::Irrational{:π}) = 0.0
+Base.cot(::Irrational{:π}) = -1/0
+
 end # module
diff --git a/base/meta.jl b/base/meta.jl
index ff2ea563cb3234..cf59d3fa3274e1 100644
--- a/base/meta.jl
+++ b/base/meta.jl
@@ -9,14 +9,24 @@ using ..CoreLogging
 
 export quot,
        isexpr,
+       isidentifier,
+       isoperator,
+       isunaryoperator,
+       isbinaryoperator,
+       ispostfixoperator,
+       replace_sourceloc!,
        show_sexpr,
        @dump
 
+using Base: isidentifier, isoperator, isunaryoperator, isbinaryoperator, ispostfixoperator
+import Base: isexpr
+
 """
     Meta.quot(ex)::Expr
 
-Quote expression `ex` to produce an expression with head `quote`. This can for instance be used to represent objects of type `Expr` in the AST.
-See also the manual section about [QuoteNode](@ref man-quote-node).
+Quote expression `ex` to produce an expression with head `quote`. This can for
+instance be used to represent objects of type `Expr` in the AST. See also the
+manual section about [QuoteNode](@ref man-quote-node).
 
 # Examples
 ```jldoctest
@@ -38,7 +48,10 @@ quot(ex) = Expr(:quote, ex)
 """
     Meta.isexpr(ex, head[, n])::Bool
 
-Check if `ex` is an expression with head `head` and `n` arguments.
+Return true if `ex` is an `Expr` with the given type `head` and optionally that
+the argument list is of length `n`. `head` may be a `Symbol` or collection of
+`Symbol`s. For example, to check that a macro was passed a function call
+expression, you might use `isexpr(ex, :call)`.
 
 # Examples
 ```jldoctest
@@ -61,10 +74,36 @@ julia> Meta.isexpr(ex, :call, 2)
 true
 ```
 """
-isexpr(@nospecialize(ex), head::Symbol) = isa(ex, Expr) && ex.head === head
-isexpr(@nospecialize(ex), heads) = isa(ex, Expr) && in(ex.head, heads)
-isexpr(@nospecialize(ex), head::Symbol, n::Int) = isa(ex, Expr) && ex.head === head && length(ex.args) == n
-isexpr(@nospecialize(ex), heads, n::Int) = isa(ex, Expr) && in(ex.head, heads) && length(ex.args) == n
+isexpr
+
+"""
+    replace_sourceloc!(location, expr)
+
+Overwrite the caller source location for each macro call in `expr`, returning
+the resulting AST.  This is useful when you need to wrap a macro inside a
+macro, and want the inner macro to see the `__source__` location of the outer
+macro.  For example:
+
+```
+macro test_is_one(ex)
+    replace_sourceloc!(__source__, :(@test \$(esc(ex)) == 1))
+end
+@test_is_one 2
+```
+
+`@test` now reports the location of the call `@test_is_one 2` to the user,
+rather than line 2 where `@test` is used as an implementation detail.
+"""
+function replace_sourceloc!(sourceloc, @nospecialize(ex))
+    if ex isa Expr
+        if ex.head == :macrocall
+            ex.args[2] = sourceloc
+        end
+        map!(e -> replace_sourceloc!(sourceloc, e), ex.args, ex.args)
+    end
+    return ex
+end
+
 
 """
     Meta.show_sexpr([io::IO,], ex)
@@ -147,41 +186,52 @@ The expression passed to the [`parse`](@ref) function could not be interpreted a
 expression.
 """
 struct ParseError <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 function _parse_string(text::AbstractString, filename::AbstractString,
-                       index::Integer, options)
+                       lineno::Integer, index::Integer, options)
     if index < 1 || index > ncodeunits(text) + 1
         throw(BoundsError(text, index))
     end
-    ex, offset::Int = Core._parse(text, filename, index-1, options)
+    ex, offset::Int = Core._parse(text, filename, lineno, index-1, options)
     ex, offset+1
 end
 
 """
     parse(str, start; greedy=true, raise=true, depwarn=true)
 
-Parse the expression string and return an expression (which could later be passed to eval
-for execution). `start` is the index of the first character to start parsing. If `greedy` is
-`true` (default), `parse` will try to consume as much input as it can; otherwise, it will
-stop as soon as it has parsed a valid expression. Incomplete but otherwise syntactically
-valid expressions will return `Expr(:incomplete, "(error message)")`. If `raise` is `true`
-(default), syntax errors other than incomplete expressions will raise an error. If `raise`
-is `false`, `parse` will return an expression that will raise an error upon evaluation. If
-`depwarn` is `false`, deprecation warnings will be suppressed.
+Parse the expression string and return an expression (which could later be
+passed to eval for execution). `start` is the code unit index into `str` of the
+first character to start parsing at (as with all string indexing, these are not
+character indices). If `greedy` is `true` (default), `parse` will try to consume
+as much input as it can; otherwise, it will stop as soon as it has parsed a
+valid expression. Incomplete but otherwise syntactically valid expressions will
+return `Expr(:incomplete, "(error message)")`. If `raise` is `true` (default),
+syntax errors other than incomplete expressions will raise an error. If `raise`
+is `false`, `parse` will return an expression that will raise an error upon
+evaluation. If `depwarn` is `false`, deprecation warnings will be suppressed.
 
 ```jldoctest
-julia> Meta.parse("x = 3, y = 5", 7)
-(:(y = 5), 13)
+julia> Meta.parse("(α, β) = 3, 5", 1) # start of string
+(:((α, β) = (3, 5)), 16)
+
+julia> Meta.parse("(α, β) = 3, 5", 1, greedy=false)
+(:((α, β)), 9)
+
+julia> Meta.parse("(α, β) = 3, 5", 16) # end of string
+(nothing, 16)
+
+julia> Meta.parse("(α, β) = 3, 5", 11) # index of 3
+(:((3, 5)), 16)
 
-julia> Meta.parse("x = 3, y = 5", 5)
-(:((3, y) = 5), 13)
+julia> Meta.parse("(α, β) = 3, 5", 11, greedy=false)
+(3, 13)
 ```
 """
 function parse(str::AbstractString, pos::Integer; greedy::Bool=true, raise::Bool=true,
                depwarn::Bool=true)
-    ex, pos = _parse_string(str, "none", pos, greedy ? :statement : :atom)
+    ex, pos = _parse_string(str, "none", 1, pos, greedy ? :statement : :atom)
     if raise && isa(ex,Expr) && ex.head === :error
         throw(ParseError(ex.args[1]))
     end
@@ -225,12 +275,12 @@ function parse(str::AbstractString; raise::Bool=true, depwarn::Bool=true)
     return ex
 end
 
-function parseatom(text::AbstractString, pos::Integer; filename="none")
-    return _parse_string(text, filename, pos, :atom)
+function parseatom(text::AbstractString, pos::Integer; filename="none", lineno=1)
+    return _parse_string(text, String(filename), lineno, pos, :atom)
 end
 
-function parseall(text::AbstractString; filename="none")
-    ex,_ = _parse_string(text, filename, 1, :all)
+function parseall(text::AbstractString; filename="none", lineno=1)
+    ex,_ = _parse_string(text, String(filename), lineno, 1, :all)
     return ex
 end
 
@@ -303,10 +353,26 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
         x.edges .+= slot_offset
         return x
     end
+    if isa(x, Core.ReturnNode)
+        return Core.ReturnNode(
+            _partially_inline!(x.val, slot_replacements, type_signature, static_param_values,
+                               slot_offset, statement_offset, boundscheck),
+        )
+    end
+    if isa(x, Core.GotoIfNot)
+        return Core.GotoIfNot(
+            _partially_inline!(x.cond, slot_replacements, type_signature, static_param_values,
+                               slot_offset, statement_offset, boundscheck),
+            x.dest + statement_offset,
+        )
+    end
     if isa(x, Expr)
         head = x.head
         if head === :static_parameter
-            return QuoteNode(static_param_values[x.args[1]])
+            if isassigned(static_param_values, x.args[1])
+                return QuoteNode(static_param_values[x.args[1]])
+            end
+            return x
         elseif head === :cfunction
             @assert !isa(type_signature, UnionAll) || !isempty(spvals)
             if !isa(x.args[2], QuoteNode) # very common no-op
@@ -326,7 +392,7 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
                 elseif i == 4
                     @assert isa(x.args[4], Int)
                 elseif i == 5
-                    @assert isa((x.args[5]::QuoteNode).value, Symbol)
+                    @assert isa((x.args[5]::QuoteNode).value, Union{Symbol, Tuple{Symbol, UInt8}})
                 else
                     x.args[i] = _partially_inline!(x.args[i], slot_replacements,
                                                    type_signature, static_param_values,
@@ -349,7 +415,31 @@ function _partially_inline!(@nospecialize(x), slot_replacements::Vector{Any},
             x.args[2] += statement_offset
         elseif head === :enter
             x.args[1] += statement_offset
-        elseif !is_meta_expr_head(head)
+        elseif head === :isdefined
+            arg = x.args[1]
+            # inlining a QuoteNode or literal into `Expr(:isdefined, x)` is invalid, replace with true
+            if isa(arg, Core.SlotNumber)
+                id = arg.id
+                if 1 <= id <= length(slot_replacements)
+                    replacement = slot_replacements[id]
+                    if isa(replacement, Union{Core.SlotNumber, GlobalRef, Symbol})
+                        return Expr(:isdefined, replacement)
+                    else
+                        @assert !isa(replacement, Expr)
+                        return true
+                    end
+                end
+                return Expr(:isdefined, Core.SlotNumber(id + slot_offset))
+            elseif isexpr(arg, :static_parameter)
+                if isassigned(static_param_values, arg.args[1])
+                    return true
+                end
+                return x
+            else
+                @assert isa(arg, Union{GlobalRef, Symbol})
+                return x
+            end
+        elseif !Core.Compiler.is_meta_expr_head(head)
             partially_inline!(x.args, slot_replacements, type_signature, static_param_values,
                               slot_offset, statement_offset, boundscheck)
         end
@@ -359,6 +449,4 @@ end
 
 _instantiate_type_in_env(x, spsig, spvals) = ccall(:jl_instantiate_type_in_env, Any, (Any, Any, Ptr{Any}), x, spsig, spvals)
 
-is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || head === :loopinfo)
-
 end # module
diff --git a/base/methodshow.jl b/base/methodshow.jl
index 1894e62b4a3765..1fe12d718457d0 100644
--- a/base/methodshow.jl
+++ b/base/methodshow.jl
@@ -2,48 +2,52 @@
 
 # Method and method table pretty-printing
 
+const empty_sym = Symbol("")
+function strip_gensym(sym)
+    if sym === :var"#self#" || sym === :var"#unused#"
+        return empty_sym
+    end
+    return Symbol(replace(String(sym), r"^(.*)#(.*#)?\d+$" => s"\1"))
+end
+
 function argtype_decl(env, n, @nospecialize(sig::DataType), i::Int, nargs, isva::Bool) # -> (argname, argtype)
-    t = sig.parameters[i]
-    if i == nargs && isva && !isvarargtype(t)
-        t = Vararg{t,length(sig.parameters)-nargs+1}
+    t = sig.parameters[unwrapva(min(i, end))]
+    if i == nargs && isva
+        va = sig.parameters[end]
+        if isvarargtype(va) && (!isdefined(va, :N) || !isa(va.N, Int))
+            t = va
+        else
+            ntotal = length(sig.parameters)
+            isvarargtype(va) && (ntotal += va.N - 1)
+            t = Vararg{t,ntotal-nargs+1}
+        end
     end
     if isa(n,Expr)
         n = n.args[1]  # handle n::T in arg list
     end
-    s = string(n)::String
-    i = findfirst(isequal('#'), s)
-    if i !== nothing
-        s = s[1:prevind(s, i)::Int]
-    end
-    if t === Any && !isempty(s)
-        return s, ""
+    n = strip_gensym(n)
+    local s
+    if n === empty_sym
+        s = ""
+    else
+        s = sprint(show_sym, n)
+        t === Any && return s, ""
     end
     if isvarargtype(t)
-        v1, v2 = nothing, nothing
-        if isa(t, UnionAll)
-            v1 = t.var
-            t = t.body
-            if isa(t, UnionAll)
-                v2 = t.var
-                t = t.body
-            end
-        end
-        ut = unwrap_unionall(t)
-        tt, tn = ut.parameters[1], ut.parameters[2]
-        if isa(tn, TypeVar) && (tn === v1 || tn === v2)
-            if tt === Any || (isa(tt, TypeVar) && (tt === v1 || tt === v2))
+        if !isdefined(t, :N)
+            if unwrapva(t) === Any
                 return string(s, "..."), ""
             else
-                return s, string_with_env(env, tt) * "..."
+                return s, string_with_env(env, unwrapva(t)) * "..."
             end
         end
-        return s, string_with_env(env, "Vararg{", tt, ", ", tn, "}")
+        return s, string_with_env(env, "Vararg{", t.T, ", ", t.N, "}")
     end
     return s, string_with_env(env, t)
 end
 
 function method_argnames(m::Method)
-    argnames = ccall(:jl_uncompress_argnames, Vector{Any}, (Any,), m.slot_syms)
+    argnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), m.slot_syms)
     isempty(argnames) && return argnames
     return argnames[1:m.nargs]
 end
@@ -65,7 +69,7 @@ function arg_decl_parts(m::Method, html=false)
         end
         decls = Tuple{String,String}[argtype_decl(show_env, argnames[i], sig, i, m.nargs, m.isva)
                     for i = 1:m.nargs]
-        decls[1] = ("", sprint(show_signature_function, sig.parameters[1], false, decls[1][1], html,
+        decls[1] = ("", sprint(show_signature_function, unwrapva(sig.parameters[1]), false, decls[1][1], html,
                                context = show_env))
     else
         decls = Tuple{String,String}[("", "") for i = 1:length(sig.parameters::SimpleVector)]
@@ -73,10 +77,11 @@ function arg_decl_parts(m::Method, html=false)
     return tv, decls, file, line
 end
 
-const empty_sym = Symbol("")
-
 # NOTE: second argument is deprecated and is no longer used
 function kwarg_decl(m::Method, kwtype = nothing)
+    if m.sig === Tuple # OpaqueClosure
+        return Symbol[]
+    end
     mt = get_methodtable(m)
     if isdefined(mt, :kwsorter)
         kwtype = typeof(mt.kwsorter)
@@ -84,7 +89,7 @@ function kwarg_decl(m::Method, kwtype = nothing)
         kwli = ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), kwtype.name.mt, sig, get_world_counter())
         if kwli !== nothing
             kwli = kwli::Method
-            slotnames = ccall(:jl_uncompress_argnames, Vector{Any}, (Any,), kwli.slot_syms)
+            slotnames = ccall(:jl_uncompress_argnames, Vector{Symbol}, (Any,), kwli.slot_syms)
             kws = filter(x -> !(x === empty_sym || '#' in string(x)), slotnames[(kwli.nargs + 1):end])
             # ensure the kwarg... is always printed last. The order of the arguments are not
             # necessarily the same as defined in the function
@@ -96,7 +101,7 @@ function kwarg_decl(m::Method, kwtype = nothing)
             return kws
         end
     end
-    return Any[]
+    return Symbol[]
 end
 
 function show_method_params(io::IO, tv)
@@ -122,24 +127,37 @@ end
 # In case the line numbers in the source code have changed since the code was compiled,
 # allow packages to set a callback function that corrects them.
 # (Used by Revise and perhaps other packages.)
+# Any function `f` stored here must be consistent with the signature
+#    f(m::Method)::Tuple{Union{Symbol,String}, Union{Int32,Int64}}
 const methodloc_callback = Ref{Union{Function, Nothing}}(nothing)
 
+function fixup_stdlib_path(path::String)
+    # The file defining Base.Sys gets included after this file is included so make sure
+    # this function is valid even in this intermediary state
+    if isdefined(@__MODULE__, :Sys)
+        BUILD_STDLIB_PATH = Sys.BUILD_STDLIB_PATH::String
+        STDLIB = Sys.STDLIB::String
+        if BUILD_STDLIB_PATH != STDLIB
+            # BUILD_STDLIB_PATH gets defined in sysinfo.jl
+            npath = normpath(path)
+            npath′ = replace(npath, normpath(BUILD_STDLIB_PATH) => normpath(STDLIB))
+            return npath == npath′ ? path : npath′
+        end
+    end
+    return path
+end
+
 # This function does the method location updating
 function updated_methodloc(m::Method)::Tuple{String, Int32}
     file, line = m.file, m.line
     if methodloc_callback[] !== nothing
         try
-            file, line = invokelatest(methodloc_callback[], m)
+            file, line = invokelatest(methodloc_callback[], m)::Tuple{Union{Symbol,String}, Union{Int32,Int64}}
         catch
         end
     end
-    # The file defining Base.Sys gets included after this file is included so make sure
-    # this function is valid even in this intermediary state
-    if isdefined(@__MODULE__, :Sys) && Sys.BUILD_STDLIB_PATH != Sys.STDLIB
-        # BUILD_STDLIB_PATH gets defined in sysinfo.jl
-        file = replace(string(file), normpath(Sys.BUILD_STDLIB_PATH) => normpath(Sys.STDLIB))
-    end
-    return string(file), line
+    file = fixup_stdlib_path(string(file))
+    return file, Int32(line)
 end
 
 functionloc(m::Core.MethodInstance) = functionloc(m.def)
@@ -179,6 +197,15 @@ function functionloc(@nospecialize(f))
     return functionloc(first(mt))
 end
 
+function sym_to_string(sym)
+    s = String(sym)
+    if endswith(s, "...")
+        return string(sprint(show_sym, Symbol(s[1:end-3])), "...")
+    else
+        return sprint(show_sym, sym)
+    end
+end
+
 function show(io::IO, m::Method)
     tv, decls, file, line = arg_decl_parts(m)
     sig = unwrap_unionall(m.sig)
@@ -188,12 +215,16 @@ function show(io::IO, m::Method)
         return
     end
     print(io, decls[1][2], "(")
-    join(io, String[isempty(d[2]) ? d[1] : d[1]*"::"*d[2] for d in decls[2:end]],
-                 ", ", ", ")
+    join(
+        io,
+        String[isempty(d[2]) ? d[1] : string(d[1], "::", d[2]) for d in decls[2:end]],
+        ", ",
+        ", ",
+    )
     kwargs = kwarg_decl(m)
     if !isempty(kwargs)
         print(io, "; ")
-        join(io, kwargs, ", ", ", ")
+        join(io, map(sym_to_string, kwargs), ", ", ", ")
     end
     print(io, ")")
     show_method_params(io, tv)
@@ -202,6 +233,7 @@ function show(io::IO, m::Method)
         file, line = updated_methodloc(m)
         print(io, " at ", file, ":", line)
     end
+    nothing
 end
 
 function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
@@ -210,24 +242,27 @@ function show_method_list_header(io::IO, ms::MethodList, namefmt::Function)
     hasname = isdefined(mt.module, name) &&
               typeof(getfield(mt.module, name)) <: Function
     n = length(ms)
-    if mt.module === Core && n == 0 && mt.defs === nothing && mt.cache !== nothing
-        # try to detect Builtin
-        print(io, "# built-in function; no methods")
+    m = n==1 ? "method" : "methods"
+    print(io, "# $n $m")
+    sname = string(name)
+    namedisplay = namefmt(sname)
+    if hasname
+        what = (startswith(sname, '@') ?
+                    "macro"
+               : mt.module === Core && last(ms).sig === Tuple ?
+                    "builtin function"
+               : # else
+                    "generic function")
+        print(io, " for ", what, " ", namedisplay)
+    elseif '#' in sname
+        print(io, " for anonymous function ", namedisplay)
+    elseif mt === _TYPE_NAME.mt
+        print(io, " for type constructor")
     else
-        m = n==1 ? "method" : "methods"
-        print(io, "# $n $m")
-        sname = string(name)
-        namedisplay = namefmt(sname)
-        if hasname
-            what = startswith(sname, '@') ? "macro" : "generic function"
-            print(io, " for ", what, " ", namedisplay)
-        elseif '#' in sname
-            print(io, " for anonymous function ", namedisplay)
-        elseif mt === _TYPE_NAME.mt
-            print(io, " for type constructor")
-        end
-        print(io, ":")
+        print(io, " for callable object")
     end
+    n > 0 && print(io, ":")
+    nothing
 end
 
 function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=true)
@@ -245,7 +280,7 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
     last_shown_line_infos === nothing || empty!(last_shown_line_infos)
 
     for meth in ms
-        if max==-1 || n<max
+        if max == -1 || n < max
             n += 1
             println(io)
             print(io, "[$n] ")
@@ -270,9 +305,11 @@ function show_method_table(io::IO, ms::MethodList, max::Int=-1, header::Bool=tru
             end
         end
     end
+    nothing
 end
 
 show(io::IO, ms::MethodList) = show_method_table(io, ms)
+show(io::IO, ::MIME"text/plain", ms::MethodList) = show_method_table(io, ms)
 show(io::IO, mt::Core.MethodTable) = show_method_table(io, MethodList(mt))
 
 function inbase(m::Module)
@@ -334,12 +371,18 @@ function show(io::IO, ::MIME"text/html", m::Method)
         return
     end
     print(io, decls[1][2], "(")
-    join(io, String[isempty(d[2]) ? d[1] : d[1]*"::<b>"*d[2]*"</b>"
-                      for d in decls[2:end]], ", ", ", ")
+    join(
+        io,
+        String[
+            isempty(d[2]) ? d[1] : string(d[1], "::<b>", d[2], "</b>") for d in decls[2:end]
+        ],
+        ", ",
+        ", ",
+    )
     kwargs = kwarg_decl(m)
     if !isempty(kwargs)
         print(io, "; <i>")
-        join(io, kwargs, ", ", ", ")
+        join(io, map(sym_to_string, kwargs), ", ", ", ")
         print(io, "</i>")
     end
     print(io, ")")
diff --git a/base/missing.jl b/base/missing.jl
index 1d42188a656c01..e1988064aadc12 100644
--- a/base/missing.jl
+++ b/base/missing.jl
@@ -12,7 +12,7 @@ where it is not supported. The error message, in the `msg` field
 may provide more specific details.
 """
 struct MissingException <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 showerror(io::IO, ex::MissingException) =
@@ -36,7 +36,7 @@ Any
 !!! compat "Julia 1.3"
     This function is exported as of Julia 1.3.
 """
-nonmissingtype(::Type{T}) where {T} = Core.Compiler.typesubtract(T, Missing)
+nonmissingtype(::Type{T}) where {T} = typesplit(T, Missing)
 
 function nonmissingtype_checked(T::Type)
     R = nonmissingtype(T)
@@ -91,10 +91,11 @@ isapprox(::Missing, ::Any; kwargs...) = missing
 isapprox(::Any, ::Missing; kwargs...) = missing
 
 # Unary operators/functions
-for f in (:(!), :(~), :(+), :(-), :(zero), :(one), :(oneunit),
+for f in (:(!), :(~), :(+), :(-), :(*), :(&), :(|), :(xor),
+          :(zero), :(one), :(oneunit),
           :(isfinite), :(isinf), :(isodd),
           :(isinteger), :(isreal), :(isnan),
-          :(iszero), :(transpose), :(adjoint), :(float), :(conj),
+          :(iszero), :(transpose), :(adjoint), :(float), :(complex), :(conj),
           :(abs), :(abs2), :(iseven), :(ispow2),
           :(real), :(imag), :(sign), :(inv))
     @eval ($f)(::Missing) = missing
@@ -106,6 +107,13 @@ for f in (:(Base.zero), :(Base.one), :(Base.oneunit))
         $f(T)
     end
 end
+for f in (:(Base.float), :(Base.complex))
+    @eval $f(::Type{Missing}) = Missing
+    @eval function $f(::Type{Union{T, Missing}}) where T
+        T === Any && throw(MethodError($f, (Any,)))  # To prevent StackOverflowError
+        Union{$f(T), Missing}
+    end
+end
 
 # Binary operators/functions
 for f in (:(+), :(-), :(*), :(/), :(^), :(mod), :(rem))
@@ -171,8 +179,8 @@ xor(b::Bool, a::Missing) = missing
 xor(::Missing, ::Integer) = missing
 xor(::Integer, ::Missing) = missing
 
-*(d::Missing, x::AbstractString) = missing
-*(d::AbstractString, x::Missing) = missing
+*(d::Missing, x::Union{AbstractString,AbstractChar}) = missing
+*(d::Union{AbstractString,AbstractChar}, x::Missing) = missing
 
 function float(A::AbstractArray{Union{T, Missing}}) where {T}
     U = typeof(float(zero(T)))
@@ -193,6 +201,8 @@ Use [`collect`](@ref) to obtain an `Array` containing the non-`missing` values i
 be a `Vector` since it is not possible to remove missings while preserving dimensions
 of the input.
 
+See also [`coalesce`](@ref), [`ismissing`](@ref), [`something`](@ref).
+
 # Examples
 ```jldoctest
 julia> x = skipmissing([1, missing, 2])
@@ -273,24 +283,24 @@ mapreduce(f, op, itr::SkipMissing{<:AbstractArray}) =
 
 function _mapreduce(f, op, ::IndexLinear, itr::SkipMissing{<:AbstractArray})
     A = itr.x
-    local ai
+    ai = missing
     inds = LinearIndices(A)
     i = first(inds)
     ilast = last(inds)
-    while i <= ilast
+    for outer i in i:ilast
         @inbounds ai = A[i]
-        ai === missing || break
-        i += 1
+        ai !== missing && break
     end
-    i > ilast && return mapreduce_empty(f, op, eltype(itr))
+    ai === missing && return mapreduce_empty(f, op, eltype(itr))
     a1::eltype(itr) = ai
+    i == typemax(typeof(i)) && return mapreduce_first(f, op, a1)
     i += 1
-    while i <= ilast
+    ai = missing
+    for outer i in i:ilast
         @inbounds ai = A[i]
-        ai === missing || break
-        i += 1
+        ai !== missing && break
     end
-    i > ilast && return mapreduce_first(f, op, a1)
+    ai === missing && return mapreduce_first(f, op, a1)
     # We know A contains at least two non-missing entries: the result cannot be nothing
     something(mapreduce_impl(f, op, itr, first(inds), last(inds)))
 end
@@ -304,32 +314,35 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
 @noinline function mapreduce_impl(f, op, itr::SkipMissing{<:AbstractArray},
                                   ifirst::Integer, ilast::Integer, blksize::Int)
     A = itr.x
-    if ifirst == ilast
+    if ifirst > ilast
+        return nothing
+    elseif ifirst == ilast
         @inbounds a1 = A[ifirst]
         if a1 === missing
             return nothing
         else
             return Some(mapreduce_first(f, op, a1))
         end
-    elseif ifirst + blksize > ilast
+    elseif ilast - ifirst < blksize
         # sequential portion
-        local ai
+        ai = missing
         i = ifirst
-        while i <= ilast
+        for outer i in i:ilast
             @inbounds ai = A[i]
-            ai === missing || break
-            i += 1
+            ai !== missing && break
         end
-        i > ilast && return nothing
+        ai === missing && return nothing
         a1 = ai::eltype(itr)
+        i == typemax(typeof(i)) && return Some(mapreduce_first(f, op, a1))
         i += 1
-        while i <= ilast
+        ai = missing
+        for outer i in i:ilast
             @inbounds ai = A[i]
-            ai === missing || break
-            i += 1
+            ai !== missing && break
         end
-        i > ilast && return Some(mapreduce_first(f, op, a1))
+        ai === missing && return Some(mapreduce_first(f, op, a1))
         a2 = ai::eltype(itr)
+        i == typemax(typeof(i)) && return Some(op(f(a1), f(a2)))
         i += 1
         v = op(f(a1), f(a2))
         @simd for i = i:ilast
@@ -341,7 +354,7 @@ mapreduce_impl(f, op, A::SkipMissing, ifirst::Integer, ilast::Integer) =
         return Some(v)
     else
         # pairwise portion
-        imid = (ifirst + ilast) >> 1
+        imid = ifirst + (ilast - ifirst) >> 1
         v1 = mapreduce_impl(f, op, itr, ifirst, imid, blksize)
         v2 = mapreduce_impl(f, op, itr, imid+1, ilast, blksize)
         if v1 === nothing && v2 === nothing
@@ -388,12 +401,12 @@ function filter(f, itr::SkipMissing{<:AbstractArray})
 end
 
 """
-    coalesce(x, y...)
+    coalesce(x...)
 
 Return the first value in the arguments which is not equal to [`missing`](@ref),
 if any. Otherwise return `missing`.
 
-See also [`something`](@ref).
+See also [`skipmissing`](@ref), [`something`](@ref), [`@coalesce`](@ref).
 
 # Examples
 
@@ -415,3 +428,38 @@ function coalesce end
 coalesce() = missing
 coalesce(x::Missing, y...) = coalesce(y...)
 coalesce(x::Any, y...) = x
+
+
+"""
+    @coalesce(x...)
+
+Short-circuiting version of [`coalesce`](@ref).
+
+# Examples
+```jldoctest
+julia> f(x) = (println("f(\$x)"); missing);
+
+julia> a = 1;
+
+julia> a = @coalesce a f(2) f(3) error("`a` is still missing")
+1
+
+julia> b = missing;
+
+julia> b = @coalesce b f(2) f(3) error("`b` is still missing")
+f(2)
+f(3)
+ERROR: `b` is still missing
+[...]
+```
+
+!!! compat "Julia 1.7"
+    This macro is available as of Julia 1.7.
+"""
+macro coalesce(args...)
+    expr = :(missing)
+    for arg in reverse(args)
+        expr = :((val = $arg) !== missing ? val : $expr)
+    end
+    return esc(:(let val; $expr; end))
+end
diff --git a/base/mpfr.jl b/base/mpfr.jl
index 4fc190691ecd8b..60f59cdb0af7ea 100644
--- a/base/mpfr.jl
+++ b/base/mpfr.jl
@@ -11,19 +11,18 @@ import
         inv, exp, exp2, exponent, factorial, floor, fma, hypot, isinteger,
         isfinite, isinf, isnan, ldexp, log, log2, log10, max, min, mod, modf,
         nextfloat, prevfloat, promote_rule, rem, rem2pi, round, show, float,
-        sum, sqrt, string, print, trunc, precision, exp10, expm1,
-        log1p,
+        sum, sqrt, string, print, trunc, precision, _precision, exp10, expm1, log1p,
         eps, signbit, sign, sin, cos, sincos, tan, sec, csc, cot, acos, asin, atan,
-        cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh,
+        cosh, sinh, tanh, sech, csch, coth, acosh, asinh, atanh, lerpi,
         cbrt, typemax, typemin, unsafe_trunc, floatmin, floatmax, rounding,
         setrounding, maxintfloat, widen, significand, frexp, tryparse, iszero,
-        isone, big, _string_n
+        isone, big, _string_n, decompose
 
-import .Base.Rounding: rounding_raw, setrounding_raw
+import ..Rounding: rounding_raw, setrounding_raw
 
-import .Base.GMP: ClongMax, CulongMax, CdoubleMax, Limb
+import ..GMP: ClongMax, CulongMax, CdoubleMax, Limb
 
-import .Base.FastMath.sincos_fast
+import ..FastMath.sincos_fast
 
 version() = VersionNumber(unsafe_string(ccall((:mpfr_get_version,:libmpfr), Ptr{Cchar}, ())))
 patches() = split(unsafe_string(ccall((:mpfr_get_patches,:libmpfr), Ptr{Cchar}, ())),' ')
@@ -152,6 +151,11 @@ global precision; `convert` will always return `x`.
 convenience since decimal literals are converted to `Float64` when parsed, so
 `BigFloat(2.1)` may not yield what you expect.
 
+See also:
+- [`@big_str`](@ref)
+- [`rounding`](@ref) and [`setrounding`](@ref)
+- [`precision`](@ref) and [`setprecision`](@ref)
+
 !!! compat "Julia 1.1"
     `precision` as a keyword argument requires at least Julia 1.1.
     In Julia 1.0 `precision` is the second positional argument (`BigFloat(x, precision)`).
@@ -170,11 +174,6 @@ julia> BigFloat("2.1", RoundUp)
 julia> BigFloat("2.1", RoundUp, precision=128)
 2.100000000000000000000000000000000000007
 ```
-
-# See also
-- [`@big_str`](@ref)
-- [`rounding`](@ref) and [`setrounding`](@ref)
-- [`precision`](@ref) and [`setprecision`](@ref)
 """
 BigFloat(x, r::RoundingMode)
 
@@ -182,7 +181,7 @@ widen(::Type{Float64}) = BigFloat
 widen(::Type{BigFloat}) = BigFloat
 
 function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::Integer=DEFAULT_PRECISION[])
-    if precision == MPFR.precision(x)
+    if precision == _precision(x)
         return x
     else
         z = BigFloat(;precision=precision)
@@ -193,7 +192,7 @@ function BigFloat(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]; precision::I
 end
 
 function _duplicate(x::BigFloat)
-    z = BigFloat(;precision=precision(x))
+    z = BigFloat(;precision=_precision(x))
     ccall((:mpfr_set, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Int32), z, x, 0)
     return z
 end
@@ -295,7 +294,14 @@ function round(::Type{T}, x::BigFloat, r::Union{RoundingMode, MPFRRoundingMode})
     end
     return unsafe_trunc(T, res)
 end
-round(::Type{BigInt}, x::BigFloat, r::Union{RoundingMode, MPFRRoundingMode}) = _unchecked_cast(BigInt, x, r)
+
+function round(::Type{BigInt}, x::BigFloat, r::Union{RoundingMode, MPFRRoundingMode})
+    clear_flags()
+    res = _unchecked_cast(BigInt, x, r)
+    had_range_exception() && throw(InexactError(:round, BigInt, x))
+    return res
+end
+
 round(::Type{T}, x::BigFloat, r::RoundingMode) where T<:Union{Signed, Unsigned} =
     invoke(round, Tuple{Type{<:Union{Signed, Unsigned}}, BigFloat, Union{RoundingMode, MPFRRoundingMode}}, T, x, r)
 round(::Type{BigInt}, x::BigFloat, r::RoundingMode) =
@@ -339,8 +345,23 @@ Float32(x::BigFloat, r::MPFRRoundingMode=ROUNDING_MODE[]) =
     _cpynansgn(ccall((:mpfr_get_flt,:libmpfr), Float32, (Ref{BigFloat}, MPFRRoundingMode), x, r), x)
 Float32(x::BigFloat, r::RoundingMode) = Float32(x, convert(MPFRRoundingMode, r))
 
-# TODO: avoid double rounding
-Float16(x::BigFloat) = Float16(Float32(x))
+function Float16(x::BigFloat) :: Float16
+    res = Float32(x)
+    resi = reinterpret(UInt32, res)
+    if (resi&0x7fffffff) < 0x38800000 # if Float16(res) is subnormal
+        #shift so that the mantissa lines up where it would for normal Float16
+        shift = 113-((resi & 0x7f800000)>>23)
+        if shift<23
+            resi |= 0x0080_0000 # set implicit bit
+            resi >>= shift
+        end
+    end
+    if (resi & 0x1fff == 0x1000) # if we are halfway between 2 Float16 values
+        # adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
+        res = nextfloat(res, cmp(x, res))
+    end
+    return res
+end
 
 promote_rule(::Type{BigFloat}, ::Type{<:Real}) = BigFloat
 promote_rule(::Type{BigInt}, ::Type{<:AbstractFloat}) = BigFloat
@@ -793,37 +814,37 @@ function sign(x::BigFloat)
     return c < 0 ? -one(x) : one(x)
 end
 
-function precision(x::BigFloat)  # precision of an object of type BigFloat
+function _precision(x::BigFloat)  # precision of an object of type BigFloat
     return ccall((:mpfr_get_prec, :libmpfr), Clong, (Ref{BigFloat},), x)
 end
+precision(x::BigFloat; base::Integer=2) = _precision(x, base)
 
-"""
-    precision(BigFloat)
-
-Get the precision (in bits) currently used for [`BigFloat`](@ref) arithmetic.
-"""
-precision(::Type{BigFloat}) = Int(DEFAULT_PRECISION[]) # precision of the type BigFloat itself
+_precision(::Type{BigFloat}) = Int(DEFAULT_PRECISION[]) # default precision of the type BigFloat itself
 
 """
-    setprecision([T=BigFloat,] precision::Int)
+    setprecision([T=BigFloat,] precision::Int; base=2)
 
-Set the precision (in bits) to be used for `T` arithmetic.
+Set the precision (in bits, by default) to be used for `T` arithmetic.
+If `base` is specified, then the precision is the minimum required to give
+at least `precision` digits in the given `base`.
 
 !!! warning
 
     This function is not thread-safe. It will affect code running on all threads, but
     its behavior is undefined if called concurrently with computations that use the
     setting.
+
+!!! compat "Julia 1.8"
+    The `base` keyword requires at least Julia 1.8.
 """
-function setprecision(::Type{BigFloat}, precision::Integer)
-    if precision < 2
-        throw(DomainError(precision, "`precision` cannot be less than 2."))
-    end
-    DEFAULT_PRECISION[] = precision
+function setprecision(::Type{BigFloat}, precision::Integer; base::Integer=2)
+    base > 1 || throw(DomainError(base, "`base` cannot be less than 2."))
+    precision > 0 || throw(DomainError(precision, "`precision` cannot be less than 1."))
+    DEFAULT_PRECISION[] = base == 2 ? precision : ceil(Int, precision * log2(base))
     return precision
 end
 
-setprecision(precision::Integer) = setprecision(BigFloat, precision)
+setprecision(precision::Integer; base::Integer=2) = setprecision(BigFloat, precision; base)
 
 maxintfloat(x::BigFloat) = BigFloat(2)^precision(x)
 maxintfloat(::Type{BigFloat}) = BigFloat(2)^precision(BigFloat)
@@ -917,9 +938,9 @@ floatmin(::Type{BigFloat}) = nextfloat(zero(BigFloat))
 floatmax(::Type{BigFloat}) = prevfloat(BigFloat(Inf))
 
 """
-    setprecision(f::Function, [T=BigFloat,] precision::Integer)
+    setprecision(f::Function, [T=BigFloat,] precision::Integer; base=2)
 
-Change the `T` arithmetic precision (in bits) for the duration of `f`.
+Change the `T` arithmetic precision (in the given `base`) for the duration of `f`.
 It is logically equivalent to:
 
     old = precision(BigFloat)
@@ -930,11 +951,14 @@ It is logically equivalent to:
 Often used as `setprecision(T, precision) do ... end`
 
 Note: `nextfloat()`, `prevfloat()` do not use the precision mentioned by
-`setprecision`
+`setprecision`.
+
+!!! compat "Julia 1.8"
+    The `base` keyword requires at least Julia 1.8.
 """
-function setprecision(f::Function, ::Type{T}, prec::Integer) where T
+function setprecision(f::Function, ::Type{T}, prec::Integer; kws...) where T
     old_prec = precision(T)
-    setprecision(T, prec)
+    setprecision(T, prec; kws...)
     try
         return f()
     finally
@@ -942,7 +966,7 @@ function setprecision(f::Function, ::Type{T}, prec::Integer) where T
     end
 end
 
-setprecision(f::Function, prec::Integer) = setprecision(f, BigFloat, prec)
+setprecision(f::Function, prec::Integer; base::Integer=2) = setprecision(f, BigFloat, prec; base)
 
 function string_mpfr(x::BigFloat, fmt::String)
     pc = Ref{Ptr{UInt8}}()
@@ -963,7 +987,7 @@ function string_mpfr(x::BigFloat, fmt::String)
 end
 
 function _prettify_bigfloat(s::String)::String
-    mantissa, exponent = split(s, 'e')
+    mantissa, exponent = eachsplit(s, 'e')
     if !occursin('.', mantissa)
         mantissa = string(mantissa, '.')
     end
@@ -974,7 +998,7 @@ function _prettify_bigfloat(s::String)::String
     expo = parse(Int, exponent)
     if -5 < expo < 6
         expo == 0 && return mantissa
-        int, frac = split(mantissa, '.')
+        int, frac = eachsplit(mantissa, '.')
         if expo > 0
             expo < length(frac) ?
                 string(int, frac[1:expo], '.', frac[expo+1:end]) :
@@ -983,7 +1007,7 @@ function _prettify_bigfloat(s::String)::String
             neg = startswith(int, '-')
             neg == true && (int = lstrip(int, '-'))
             @assert length(int) == 1
-            string(neg ? '-' : "", '0', '.', '0'^(-expo-1), int, frac)
+            string(neg ? '-' : "", '0', '.', '0'^(-expo-1), int, frac == "0" ? "" : frac)
         end
     else
         string(mantissa, 'e', exponent)
@@ -1022,17 +1046,29 @@ set_emax!(x) = check_exponent_err(ccall((:mpfr_set_emax, :libmpfr), Cint, (Clong
 set_emin!(x) = check_exponent_err(ccall((:mpfr_set_emin, :libmpfr), Cint, (Clong,), x))
 
 function Base.deepcopy_internal(x::BigFloat, stackdict::IdDict)
-    haskey(stackdict, x) && return stackdict[x]
-    # d = copy(x._d)
-    d = x._d
-    d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
-    y = _BigFloat(x.prec, x.sign, x.exp, d′)
-    #ccall((:mpfr_custom_move,:libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
-    stackdict[x] = y
-    return y
+    get!(stackdict, x) do
+        # d = copy(x._d)
+        d = x._d
+        d′ = GC.@preserve d unsafe_string(pointer(d), sizeof(d)) # creates a definitely-new String
+        y = _BigFloat(x.prec, x.sign, x.exp, d′)
+        #ccall((:mpfr_custom_move,:libmpfr), Cvoid, (Ref{BigFloat}, Ptr{Limb}), y, d) # unnecessary
+        return y
+    end
+end
+
+function decompose(x::BigFloat)::Tuple{BigInt, Int, Int}
+    isnan(x) && return 0, 0, 0
+    isinf(x) && return x.sign, 0, 0
+    x == 0 && return 0, 0, x.sign
+    s = BigInt()
+    s.size = cld(x.prec, 8*sizeof(Limb)) # limbs
+    b = s.size * sizeof(Limb)            # bytes
+    ccall((:__gmpz_realloc2, :libgmp), Cvoid, (Ref{BigInt}, Culong), s, 8b) # bits
+    ccall(:memcpy, Ptr{Cvoid}, (Ptr{Cvoid}, Ptr{Cvoid}, Csize_t), s.d, x.d, b) # bytes
+    s, x.exp - 8b, x.sign
 end
 
-function Base.lerpi(j::Integer, d::Integer, a::BigFloat, b::BigFloat)
+function lerpi(j::Integer, d::Integer, a::BigFloat, b::BigFloat)
     t = BigFloat(j)/d
     fma(t, b, fma(-t, a, a))
 end
diff --git a/base/multidimensional.jl b/base/multidimensional.jl
index f19afd78d481b6..b5e401a7834e7e 100644
--- a/base/multidimensional.jl
+++ b/base/multidimensional.jl
@@ -2,15 +2,16 @@
 
 ### Multidimensional iterators
 module IteratorsMD
-    import .Base: eltype, length, size, first, last, in, getindex,
-                 setindex!, IndexStyle, min, max, zero, oneunit, isless, eachindex,
-                 ndims, IteratorSize, convert, show, iterate, promote_rule, to_indices
+    import .Base: eltype, length, size, first, last, in, getindex, setindex!, IndexStyle,
+                  min, max, zero, oneunit, isless, eachindex, ndims, IteratorSize,
+                  convert, show, iterate, promote_rule, to_indices, to_index
 
     import .Base: +, -, *, (:)
     import .Base: simd_outer_range, simd_inner_length, simd_index, setindex
     using .Base: IndexLinear, IndexCartesian, AbstractCartesianIndex, fill_to_length, tail,
         ReshapedArray, ReshapedArrayLF, OneTo
     using .Base.Iterators: Reverse, PartitionIterator
+    using .Base: @propagate_inbounds
 
     export CartesianIndex, CartesianIndices
 
@@ -103,9 +104,9 @@ module IteratorsMD
 
     # zeros and ones
     zero(::CartesianIndex{N}) where {N} = zero(CartesianIndex{N})
-    zero(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(x -> 0, Val(N)))
+    zero(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(0), Val(N)))
     oneunit(::CartesianIndex{N}) where {N} = oneunit(CartesianIndex{N})
-    oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(x -> 1, Val(N)))
+    oneunit(::Type{CartesianIndex{N}}) where {N} = CartesianIndex(ntuple(Returns(1), Val(N)))
 
     # arithmetic, min/max
     @inline (-)(index::CartesianIndex{N}) where {N} =
@@ -123,13 +124,7 @@ module IteratorsMD
     @inline (*)(index::CartesianIndex, a::Integer) = *(a,index)
 
     # comparison
-    @inline isless(I1::CartesianIndex{N}, I2::CartesianIndex{N}) where {N} = _isless(0, I1.I, I2.I)
-    @inline function _isless(ret, I1::NTuple{N,Int}, I2::NTuple{N,Int}) where N
-        newret = ifelse(ret==0, icmp(I1[N], I2[N]), ret)
-        _isless(newret, Base.front(I1), Base.front(I2))
-    end
-    _isless(ret, ::Tuple{}, ::Tuple{}) = ifelse(ret==1, true, false)
-    icmp(a, b) = ifelse(isless(a,b), 1, ifelse(a==b, 0, -1))
+    isless(I1::CartesianIndex{N}, I2::CartesianIndex{N}) where {N} = isless(reverse(I1.I), reverse(I2.I))
 
     # conversions
     convert(::Type{T}, index::CartesianIndex{1}) where {T<:Number} = convert(T, index[1])
@@ -149,13 +144,13 @@ module IteratorsMD
     function Base.nextind(a::AbstractArray{<:Any,N}, i::CartesianIndex{N}) where {N}
         iter = CartesianIndices(axes(a))
         # might overflow
-        I = inc(i.I, first(iter).I, last(iter).I)
+        I = inc(i.I, iter.indices)
         return I
     end
     function Base.prevind(a::AbstractArray{<:Any,N}, i::CartesianIndex{N}) where {N}
         iter = CartesianIndices(axes(a))
         # might underflow
-        I = dec(i.I, last(iter).I, first(iter).I)
+        I = dec(i.I, iter.indices)
         return I
     end
 
@@ -167,17 +162,18 @@ module IteratorsMD
         error("iteration is deliberately unsupported for CartesianIndex. Use `I` rather than `I...`, or use `Tuple(I)...`")
 
     # Iteration
+    const OrdinalRangeInt = OrdinalRange{Int, Int}
     """
         CartesianIndices(sz::Dims) -> R
-        CartesianIndices((istart:istop, jstart:jstop, ...)) -> R
+        CartesianIndices((istart:[istep:]istop, jstart:[jstep:]jstop, ...)) -> R
 
     Define a region `R` spanning a multidimensional rectangular range
     of integer indices. These are most commonly encountered in the
     context of iteration, where `for I in R ... end` will return
     [`CartesianIndex`](@ref) indices `I` equivalent to the nested loops
 
-        for j = jstart:jstop
-            for i = istart:istop
+        for j = jstart:jstep:jstop
+            for i = istart:istep:istop
                 ...
             end
         end
@@ -190,6 +186,10 @@ module IteratorsMD
     As a convenience, constructing a `CartesianIndices` from an array makes a
     range of its indices.
 
+    !!! compat "Julia 1.6"
+        The step range method `CartesianIndices((istart:istep:istop, jstart:[jstep:]jstop, ...))`
+        requires at least Julia 1.6.
+
     # Examples
     ```jldoctest
     julia> foreach(println, CartesianIndices((2, 2, 2)))
@@ -203,9 +203,7 @@ module IteratorsMD
     CartesianIndex(2, 2, 2)
 
     julia> CartesianIndices(fill(1, (2,3)))
-    2×3 CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}:
-     CartesianIndex(1, 1)  CartesianIndex(1, 2)  CartesianIndex(1, 3)
-     CartesianIndex(2, 1)  CartesianIndex(2, 2)  CartesianIndex(2, 3)
+    CartesianIndices((2, 3))
     ```
 
     ## Conversion between linear and cartesian indices
@@ -215,13 +213,16 @@ module IteratorsMD
 
     ```jldoctest
     julia> cartesian = CartesianIndices((1:3, 1:2))
-    3×2 CartesianIndices{2, Tuple{UnitRange{Int64}, UnitRange{Int64}}}:
-     CartesianIndex(1, 1)  CartesianIndex(1, 2)
-     CartesianIndex(2, 1)  CartesianIndex(2, 2)
-     CartesianIndex(3, 1)  CartesianIndex(3, 2)
+    CartesianIndices((1:3, 1:2))
 
     julia> cartesian[4]
     CartesianIndex(1, 2)
+
+    julia> cartesian = CartesianIndices((1:2:5, 1:2))
+    CartesianIndices((1:2:5, 1:2))
+
+    julia> cartesian[2, 2]
+    CartesianIndex(3, 2)
     ```
 
     ## Broadcasting
@@ -233,44 +234,58 @@ module IteratorsMD
 
     ```jldoctest
     julia> CIs = CartesianIndices((2:3, 5:6))
-    2×2 CartesianIndices{2, Tuple{UnitRange{Int64}, UnitRange{Int64}}}:
-     CartesianIndex(2, 5)  CartesianIndex(2, 6)
-     CartesianIndex(3, 5)  CartesianIndex(3, 6)
+    CartesianIndices((2:3, 5:6))
 
     julia> CI = CartesianIndex(3, 4)
     CartesianIndex(3, 4)
 
     julia> CIs .+ CI
-    2×2 CartesianIndices{2, Tuple{UnitRange{Int64}, UnitRange{Int64}}}:
-     CartesianIndex(5, 9)  CartesianIndex(5, 10)
-     CartesianIndex(6, 9)  CartesianIndex(6, 10)
+    CartesianIndices((5:6, 9:10))
     ```
 
     For cartesian to linear index conversion, see [`LinearIndices`](@ref).
     """
-    struct CartesianIndices{N,R<:NTuple{N,AbstractUnitRange{Int}}} <: AbstractArray{CartesianIndex{N},N}
+    struct CartesianIndices{N,R<:NTuple{N,OrdinalRangeInt}} <: AbstractArray{CartesianIndex{N},N}
         indices::R
     end
 
     CartesianIndices(::Tuple{}) = CartesianIndices{0,typeof(())}(())
-    CartesianIndices(inds::NTuple{N,AbstractUnitRange{<:Integer}}) where {N} =
-        CartesianIndices(map(r->convert(AbstractUnitRange{Int}, r), inds))
+    function CartesianIndices(inds::NTuple{N,OrdinalRange{<:Integer, <:Integer}}) where {N}
+        indices = map(r->convert(OrdinalRangeInt, r), inds)
+        CartesianIndices{N, typeof(indices)}(indices)
+    end
 
     CartesianIndices(index::CartesianIndex) = CartesianIndices(index.I)
-    CartesianIndices(sz::NTuple{N,<:Integer}) where {N} = CartesianIndices(map(Base.OneTo, sz))
-    CartesianIndices(inds::NTuple{N,Union{<:Integer,AbstractUnitRange{<:Integer}}}) where {N} =
-        CartesianIndices(map(i->first(i):last(i), inds))
+    CartesianIndices(inds::NTuple{N,Union{<:Integer,OrdinalRange{<:Integer}}}) where {N} =
+        CartesianIndices(map(_convert2ind, inds))
 
     CartesianIndices(A::AbstractArray) = CartesianIndices(axes(A))
 
+    _convert2ind(sz::Bool) = Base.OneTo(Int8(sz))
+    _convert2ind(sz::Integer) = Base.OneTo(sz)
+    _convert2ind(sz::AbstractUnitRange) = first(sz):last(sz)
+    _convert2ind(sz::OrdinalRange) = first(sz):step(sz):last(sz)
+
+    function show(io::IO, iter::CartesianIndices)
+        print(io, "CartesianIndices(")
+        show(io, map(_xform_index, iter.indices))
+        print(io, ")")
+    end
+    _xform_index(i) = i
+    _xform_index(i::OneTo) = i.stop
+    show(io::IO, ::MIME"text/plain", iter::CartesianIndices) = show(io, iter)
+
     """
-        (:)(I::CartesianIndex, J::CartesianIndex)
+        (:)(start::CartesianIndex, [step::CartesianIndex], stop::CartesianIndex)
 
-    Construct [`CartesianIndices`](@ref) from two `CartesianIndex`.
+    Construct [`CartesianIndices`](@ref) from two `CartesianIndex` and an optional step.
 
     !!! compat "Julia 1.1"
         This method requires at least Julia 1.1.
 
+    !!! compat "Julia 1.6"
+        The step range method start:step:stop requires at least Julia 1.6.
+
     # Examples
     ```jldoctest
     julia> I = CartesianIndex(2,1);
@@ -278,20 +293,25 @@ module IteratorsMD
     julia> J = CartesianIndex(3,3);
 
     julia> I:J
-    2×3 CartesianIndices{2, Tuple{UnitRange{Int64}, UnitRange{Int64}}}:
-     CartesianIndex(2, 1)  CartesianIndex(2, 2)  CartesianIndex(2, 3)
-     CartesianIndex(3, 1)  CartesianIndex(3, 2)  CartesianIndex(3, 3)
+    CartesianIndices((2:3, 1:3))
+
+    julia> I:CartesianIndex(1, 2):J
+    CartesianIndices((2:1:3, 1:2:3))
     ```
     """
     (:)(I::CartesianIndex{N}, J::CartesianIndex{N}) where N =
         CartesianIndices(map((i,j) -> i:j, Tuple(I), Tuple(J)))
+    (:)(I::CartesianIndex{N}, S::CartesianIndex{N}, J::CartesianIndex{N}) where N =
+        CartesianIndices(map((i,s,j) -> i:s:j, Tuple(I), Tuple(S), Tuple(J)))
 
     promote_rule(::Type{CartesianIndices{N,R1}}, ::Type{CartesianIndices{N,R2}}) where {N,R1,R2} =
         CartesianIndices{N,Base.indices_promote_type(R1,R2)}
 
     convert(::Type{Tuple{}}, R::CartesianIndices{0}) = ()
-    convert(::Type{NTuple{N,AbstractUnitRange{Int}}}, R::CartesianIndices{N}) where {N} =
-        R.indices
+    for RT in (OrdinalRange{Int, Int}, StepRange{Int, Int}, AbstractUnitRange{Int})
+        @eval convert(::Type{NTuple{N,$RT}}, R::CartesianIndices{N}) where {N} =
+            map(x->convert($RT, x), R.indices)
+    end
     convert(::Type{NTuple{N,AbstractUnitRange}}, R::CartesianIndices{N}) where {N} =
         convert(NTuple{N,AbstractUnitRange{Int}}, R)
     convert(::Type{NTuple{N,UnitRange{Int}}}, R::CartesianIndices{N}) where {N} =
@@ -318,13 +338,43 @@ module IteratorsMD
     # AbstractArray implementation
     Base.axes(iter::CartesianIndices{N,R}) where {N,R} = map(Base.axes1, iter.indices)
     Base.IndexStyle(::Type{CartesianIndices{N,R}}) where {N,R} = IndexCartesian()
-    @inline function Base.getindex(iter::CartesianIndices{N,<:NTuple{N,Base.OneTo}}, I::Vararg{Int, N}) where {N}
-        @boundscheck checkbounds(iter, I...)
-        CartesianIndex(I)
+    # getindex for a 0D CartesianIndices is necessary for disambiguation
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{0,R}) where {R}
+        CartesianIndex()
     end
     @inline function Base.getindex(iter::CartesianIndices{N,R}, I::Vararg{Int, N}) where {N,R}
+        # Eagerly do boundscheck before calculating each item of the CartesianIndex so that
+        # we can pass `@inbounds` hint to inside the map and generates more efficient SIMD codes (#42115)
+        @boundscheck checkbounds(iter, I...)
+        index = map(iter.indices, I) do r, i
+            @inbounds getindex(r, i)
+        end
+        CartesianIndex(index)
+    end
+
+    # CartesianIndices act as a multidimensional range, so cartesian indexing of CartesianIndices
+    # with compatible dimensions may be seen as indexing into the component ranges.
+    # This may use the special indexing behavior implemented for ranges to return another CartesianIndices
+    @inline function Base.getindex(iter::CartesianIndices{N,R},
+        I::Vararg{Union{OrdinalRange{<:Integer, <:Integer}, Colon}, N}) where {N,R}
         @boundscheck checkbounds(iter, I...)
-        CartesianIndex(I .- first.(Base.axes1.(iter.indices)) .+ first.(iter.indices))
+        indices = map(iter.indices, I) do r, i
+            @inbounds getindex(r, i)
+        end
+        CartesianIndices(indices)
+    end
+    @propagate_inbounds function Base.getindex(iter::CartesianIndices{N},
+        C::CartesianIndices{N}) where {N}
+        getindex(iter, C.indices...)
+    end
+    @inline Base.getindex(iter::CartesianIndices{0}, ::CartesianIndices{0}) = iter
+
+    # If dimensions permit, we may index into a CartesianIndices directly instead of constructing a SubArray wrapper
+    @propagate_inbounds function Base.view(c::CartesianIndices{N}, r::Vararg{Union{OrdinalRange{<:Integer, <:Integer}, Colon},N}) where {N}
+        getindex(c, r...)
+    end
+    @propagate_inbounds function Base.view(c::CartesianIndices{N}, C::CartesianIndices{N}) where {N}
+        getindex(c, C)
     end
 
     ndims(R::CartesianIndices) = ndims(typeof(R))
@@ -344,62 +394,79 @@ module IteratorsMD
     IteratorSize(::Type{<:CartesianIndices{N}}) where {N} = Base.HasShape{N}()
 
     @inline function iterate(iter::CartesianIndices)
-        iterfirst, iterlast = first(iter), last(iter)
-        if any(map(>, iterfirst.I, iterlast.I))
+        iterfirst = first(iter)
+        if !all(map(in, iterfirst.I, iter.indices))
             return nothing
         end
         iterfirst, iterfirst
     end
     @inline function iterate(iter::CartesianIndices, state)
-        valid, I = __inc(state.I, first(iter).I, last(iter).I)
+        valid, I = __inc(state.I, iter.indices)
         valid || return nothing
         return CartesianIndex(I...), CartesianIndex(I...)
     end
 
     # increment & carry
-    @inline function inc(state, start, stop)
-        _, I = __inc(state, start, stop)
+    @inline function inc(state, indices)
+        _, I = __inc(state, indices)
         return CartesianIndex(I...)
     end
 
-    # increment post check to avoid integer overflow
-    @inline __inc(::Tuple{}, ::Tuple{}, ::Tuple{}) = false, ()
-    @inline function __inc(state::Tuple{Int}, start::Tuple{Int}, stop::Tuple{Int})
-        valid = state[1] < stop[1]
-        return valid, (state[1]+1,)
+    # Unlike ordinary ranges, CartesianIndices continues the iteration in the next column when the
+    # current column is consumed. The implementation is written recursively to achieve this.
+    # `iterate` returns `Union{Nothing, Tuple}`, we explicitly pass a `valid` flag to eliminate
+    # the type instability inside the core `__inc` logic, and this gives better runtime performance.
+    __inc(::Tuple{}, ::Tuple{}) = false, ()
+    @inline function __inc(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
+        rng = indices[1]
+        I = state[1] + step(rng)
+        valid = __is_valid_range(I, rng) && state[1] != last(rng)
+        return valid, (I, )
+    end
+    @inline function __inc(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
+        rng = indices[1]
+        I = state[1] + step(rng)
+        if __is_valid_range(I, rng) && state[1] != last(rng)
+            return true, (I, tail(state)...)
+        end
+        valid, I = __inc(tail(state), tail(indices))
+        return valid, (first(rng), I...)
     end
 
-    @inline function __inc(state, start, stop)
-        if state[1] < stop[1]
-            return true, (state[1]+1, tail(state)...)
+    @inline __is_valid_range(I, rng::AbstractUnitRange) = I in rng
+    @inline function __is_valid_range(I, rng::OrdinalRange)
+        if step(rng) > 0
+            lo, hi = first(rng), last(rng)
+        else
+            lo, hi = last(rng), first(rng)
         end
-        valid, I = __inc(tail(state), tail(start), tail(stop))
-        return valid, (start[1], I...)
+        lo <= I <= hi
     end
 
     # 0-d cartesian ranges are special-cased to iterate once and only once
     iterate(iter::CartesianIndices{0}, done=false) = done ? nothing : (CartesianIndex(), true)
 
-    size(iter::CartesianIndices) = map(dimlength, first(iter).I, last(iter).I)
-    dimlength(start, stop) = stop-start+1
+    size(iter::CartesianIndices) = map(length, iter.indices)
 
     length(iter::CartesianIndices) = prod(size(iter))
 
+    # make CartesianIndices a multidimensional range
+    Base.step(iter::CartesianIndices) = CartesianIndex(map(step, iter.indices))
+
     first(iter::CartesianIndices) = CartesianIndex(map(first, iter.indices))
     last(iter::CartesianIndices)  = CartesianIndex(map(last, iter.indices))
 
     # When used as indices themselves, CartesianIndices can simply become its tuple of ranges
-    @inline to_indices(A, inds, I::Tuple{CartesianIndices, Vararg{Any}}) =
-        to_indices(A, inds, (I[1].indices..., tail(I)...))
+    @inline function to_indices(A, inds, I::Tuple{CartesianIndices{N}, Vararg{Any}}) where N
+        _, indstail = split(inds, Val(N))
+        (map(i -> to_index(A, i), I[1].indices)..., to_indices(A, indstail, tail(I))...)
+    end
     # but preserve CartesianIndices{0} as they consume a dimension.
     @inline to_indices(A, inds, I::Tuple{CartesianIndices{0},Vararg{Any}}) =
         (first(I), to_indices(A, inds, tail(I))...)
 
-    @inline function in(i::CartesianIndex{N}, r::CartesianIndices{N}) where {N}
-        _in(true, i.I, first(r).I, last(r).I)
-    end
-    _in(b, ::Tuple{}, ::Tuple{}, ::Tuple{}) = b
-    @inline _in(b, i, start, stop) = _in(b & (start[1] <= i[1] <= stop[1]), tail(i), tail(start), tail(stop))
+    @inline in(i::CartesianIndex, r::CartesianIndices) = false
+    @inline in(i::CartesianIndex{N}, r::CartesianIndices{N}) where {N} = all(map(in, i.I, r.indices))
 
     simd_outer_range(iter::CartesianIndices{0}) = iter
     function simd_outer_range(iter::CartesianIndices)
@@ -410,13 +477,12 @@ module IteratorsMD
     simd_inner_length(iter::CartesianIndices, I::CartesianIndex) = Base.length(iter.indices[1])
 
     simd_index(iter::CartesianIndices{0}, ::CartesianIndex, I1::Int) = first(iter)
-    @inline function simd_index(iter::CartesianIndices, Ilast::CartesianIndex, I1::Int)
-        CartesianIndex((I1+first(iter.indices[1]), Ilast.I...))
-    end
+    @propagate_inbounds simd_index(iter::CartesianIndices, Ilast::CartesianIndex, I1::Int) =
+        CartesianIndex(iter.indices[1][I1+firstindex(iter.indices[1])], Ilast)
 
     # Split out the first N elements of a tuple
     @inline function split(t, V::Val)
-        ref = ntuple(d->true, V)  # create a reference tuple of length N
+        ref = ntuple(Returns(true), V)  # create a reference tuple of length N
         _split1(t, ref), _splitrest(t, ref)
     end
     @inline _split1(t, ref) = (t[1], _split1(tail(t), tail(ref))...)
@@ -440,51 +506,85 @@ module IteratorsMD
 
     # reversed CartesianIndices iteration
 
+    Base.reverse(iter::CartesianIndices) = CartesianIndices(reverse.(iter.indices))
+
     @inline function iterate(r::Reverse{<:CartesianIndices})
-        iterfirst, iterlast = last(r.itr), first(r.itr)
-        if any(map(<, iterfirst.I, iterlast.I))
+        iterfirst = last(r.itr)
+        if !all(map(in, iterfirst.I, r.itr.indices))
             return nothing
         end
         iterfirst, iterfirst
     end
     @inline function iterate(r::Reverse{<:CartesianIndices}, state)
-        valid, I = __dec(state.I, last(r.itr).I, first(r.itr).I)
+        valid, I = __dec(state.I, r.itr.indices)
         valid || return nothing
         return CartesianIndex(I...), CartesianIndex(I...)
     end
 
     # decrement & carry
-    @inline function dec(state, start, stop)
-        _, I = __dec(state, start, stop)
+    @inline function dec(state, indices)
+        _, I = __dec(state, indices)
         return CartesianIndex(I...)
     end
 
     # decrement post check to avoid integer overflow
-    @inline __dec(::Tuple{}, ::Tuple{}, ::Tuple{}) = false, ()
-    @inline function __dec(state::Tuple{Int}, start::Tuple{Int}, stop::Tuple{Int})
-        valid = state[1] > stop[1]
-        return valid, (state[1]-1,)
+    @inline __dec(::Tuple{}, ::Tuple{}) = false, ()
+    @inline function __dec(state::Tuple{Int}, indices::Tuple{OrdinalRangeInt})
+        rng = indices[1]
+        I = state[1] - step(rng)
+        valid = __is_valid_range(I, rng) && state[1] != first(rng)
+        return valid, (I,)
     end
-
-    @inline function __dec(state, start, stop)
-        if state[1] > stop[1]
-            return true, (state[1]-1, tail(state)...)
+    @inline function __dec(state::Tuple{Int,Int,Vararg{Int}}, indices::Tuple{OrdinalRangeInt,OrdinalRangeInt,Vararg{OrdinalRangeInt}})
+        rng = indices[1]
+        I = state[1] - step(rng)
+        if __is_valid_range(I, rng) && state[1] != first(rng)
+            return true, (I, tail(state)...)
         end
-        valid, I = __dec(tail(state), tail(start), tail(stop))
-        return valid, (start[1], I...)
+        valid, I = __dec(tail(state), tail(indices))
+        return valid, (last(rng), I...)
     end
 
     # 0-d cartesian ranges are special-cased to iterate once and only once
     iterate(iter::Reverse{<:CartesianIndices{0}}, state=false) = state ? nothing : (CartesianIndex(), true)
 
-    Base.LinearIndices(inds::CartesianIndices{N,R}) where {N,R} = LinearIndices{N,R}(inds.indices)
+    function Base.LinearIndices(inds::CartesianIndices{N,R}) where {N,R<:NTuple{N, AbstractUnitRange}}
+        LinearIndices{N,R}(inds.indices)
+    end
+    function Base.LinearIndices(inds::CartesianIndices)
+        indices = inds.indices
+        if all(x->step(x)==1, indices)
+            indices = map(rng->first(rng):last(rng), indices)
+            LinearIndices{length(indices), typeof(indices)}(indices)
+        else
+            # Given the fact that StepRange 1:2:4 === 1:2:3, we lost the original size information
+            # and thus cannot calculate the correct linear indices when the steps are not 1.
+            throw(ArgumentError("LinearIndices for $(typeof(inds)) with non-1 step size is not yet supported."))
+        end
+    end
+
+    # This is currently needed because converting to LinearIndices is only available when steps are
+    # all 1
+    # NOTE: this is only a temporary patch and could be possibly removed when StepRange support to
+    # LinearIndices is done
+    function Base.collect(inds::CartesianIndices{N, R}) where {N,R<:NTuple{N, AbstractUnitRange}}
+        Base._collect_indices(axes(inds), inds)
+    end
+    function Base.collect(inds::CartesianIndices)
+        dest = Array{eltype(inds), ndims(inds)}(undef, size(inds))
+        i = 0
+        @inbounds for a in inds
+            dest[i+=1] = a
+        end
+        dest
+    end
 
     # array operations
     Base.intersect(a::CartesianIndices{N}, b::CartesianIndices{N}) where N =
         CartesianIndices(intersect.(a.indices, b.indices))
 
     # Views of reshaped CartesianIndices are used for partitions — ensure these are fast
-    const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,Tuple{UnitRange{Int}},false}
+    const CartesianPartition{T<:CartesianIndex, P<:CartesianIndices, R<:ReshapedArray{T,1,P}} = SubArray{T,1,R,<:Tuple{AbstractUnitRange{Int}},false}
     eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArrayLF} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, true}
     eltype(::Type{PartitionIterator{T}}) where {T<:ReshapedArray} = SubArray{eltype(T), 1, T, Tuple{UnitRange{Int}}, false}
     Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:ReshapedArray} = Iterators.IteratorEltype(T)
@@ -493,7 +593,6 @@ module IteratorsMD
     eltype(::Type{PartitionIterator{T}}) where {T<:Union{UnitRange, StepRange, StepRangeLen, LinRange}} = T
     Iterators.IteratorEltype(::Type{<:PartitionIterator{T}}) where {T<:Union{OneTo, UnitRange, StepRange, StepRangeLen, LinRange}} = Iterators.IteratorEltype(T)
 
-
     @inline function iterate(iter::CartesianPartition)
         isempty(iter) && return nothing
         f = first(iter)
@@ -501,7 +600,7 @@ module IteratorsMD
     end
     @inline function iterate(iter::CartesianPartition, (state, n))
         n >= length(iter) && return nothing
-        I = IteratorsMD.inc(state.I, first(iter.parent.parent).I, last(iter.parent.parent).I)
+        I = IteratorsMD.inc(state.I, iter.parent.parent.indices)
         return I, (I, n+1)
     end
 
@@ -509,33 +608,45 @@ module IteratorsMD
         # In general, the Cartesian Partition might start and stop in the middle of the outer
         # dimensions — thus the outer range of a CartesianPartition is itself a
         # CartesianPartition.
-        t = tail(iter.parent.parent.indices)
-        ci = CartesianIndices(t)
-        li = LinearIndices(t)
-        return @inbounds view(ci, li[tail(iter[1].I)...]:li[tail(iter[end].I)...])
+        mi = iter.parent.mi
+        ci = iter.parent.parent
+        ax, ax1 = axes(ci), Base.axes1(ci)
+        subs = Base.ind2sub_rs(ax, mi, first(iter.indices[1]))
+        vl, fl = Base._sub2ind(tail(ax), tail(subs)...), subs[1]
+        vr, fr = divrem(last(iter.indices[1]) - 1, mi[end]) .+ (1, first(ax1))
+        oci = CartesianIndices(tail(ci.indices))
+        # A fake CartesianPartition to reuse the outer iterate fallback
+        outer = @inbounds view(ReshapedArray(oci, (length(oci),), mi), vl:vr)
+        init = @inbounds dec(oci[tail(subs)...].I, oci.indices) # real init state
+        # Use Generator to make inner loop branchless
+        @inline function skip_len_I(i::Int, I::CartesianIndex)
+            l = i == 1 ? fl : first(ax1)
+            r = i == length(outer) ? fr : last(ax1)
+            l - first(ax1), r - l + 1, I
+        end
+        (skip_len_I(i, I) for (i, I) in Iterators.enumerate(Iterators.rest(outer, (init, 0))))
     end
-    function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
+    @inline function simd_outer_range(iter::CartesianPartition{CartesianIndex{2}})
         # But for two-dimensional Partitions the above is just a simple one-dimensional range
         # over the second dimension; we don't need to worry about non-rectangular staggers in
         # higher dimensions.
-        return @inbounds CartesianIndices((iter[1][2]:iter[end][2],))
-    end
-    @inline function simd_inner_length(iter::CartesianPartition, I::CartesianIndex)
-        inner = iter.parent.parent.indices[1]
-        @inbounds fi = iter[1].I
-        @inbounds li = iter[end].I
-        inner_start = I.I == tail(fi) ? fi[1] : first(inner)
-        inner_end   = I.I == tail(li) ? li[1] : last(inner)
-        return inner_end - inner_start + 1
-    end
-    @inline function simd_index(iter::CartesianPartition, Ilast::CartesianIndex, I1::Int)
-        # I1 is the 0-based distance from the first dimension's offest
-        offset = first(iter.parent.parent.indices[1]) # (this is 1 for 1-based arrays)
-        # In the first column we need to also add in the iter's starting point (branchlessly)
-        f = @inbounds iter[1]
-        startoffset = (Ilast.I == tail(f.I))*(f[1] - 1)
-        CartesianIndex((I1 + offset + startoffset, Ilast.I...))
+        mi = iter.parent.mi
+        ci = iter.parent.parent
+        ax, ax1 = axes(ci), Base.axes1(ci)
+        fl, vl = Base.ind2sub_rs(ax, mi, first(iter.indices[1]))
+        fr, vr = Base.ind2sub_rs(ax, mi, last(iter.indices[1]))
+        outer = @inbounds CartesianIndices((ci.indices[2][vl:vr],))
+        # Use Generator to make inner loop branchless
+        @inline function skip_len_I(I::CartesianIndex{1})
+            l = I == first(outer) ? fl : first(ax1)
+            r = I == last(outer) ? fr : last(ax1)
+            l - first(ax1), r - l + 1, I
+        end
+        (skip_len_I(I) for I in outer)
     end
+    @inline simd_inner_length(iter::CartesianPartition, (_, len, _)::Tuple{Int,Int,CartesianIndex}) = len
+    @propagate_inbounds simd_index(iter::CartesianPartition, (skip, _, I)::Tuple{Int,Int,CartesianIndex}, n::Int) =
+        simd_index(iter.parent.parent, I, n + skip)
 end  # IteratorsMD
 
 
@@ -544,7 +655,7 @@ using .IteratorsMD
 ## Bounds-checking with CartesianIndex
 # Disallow linear indexing with CartesianIndex
 function checkbounds(::Type{Bool}, A::AbstractArray, i::Union{CartesianIndex, AbstractArray{<:CartesianIndex}})
-    @_inline_meta
+    @inline
     checkbounds_indices(Bool, axes(A), (i,))
 end
 
@@ -584,6 +695,16 @@ end
     checkindex(Bool, IA1, I[1]) & checkbounds_indices(Bool, IArest, tail(I))
 end
 
+
+@inline function checkbounds_indices(::Type{Bool}, IA::Tuple{},
+    I::Tuple{AbstractArray{Bool,N},Vararg{Any}}) where N
+    return checkbounds_indices(Bool, IA, (LogicalIndex(I[1]), tail(I)...))
+end
+@inline function checkbounds_indices(::Type{Bool}, IA::Tuple,
+    I::Tuple{AbstractArray{Bool,N},Vararg{Any}}) where N
+    return checkbounds_indices(Bool, IA, (LogicalIndex(I[1]), tail(I)...))
+end
+
 function checkindex(::Type{Bool}, inds::Tuple, I::AbstractArray{<:CartesianIndex})
     b = true
     for i in I
@@ -598,10 +719,10 @@ checkindex(::Type{Bool}, inds::Tuple, I::CartesianIndices) = all(checkindex.(Boo
 # rather than returning N, it returns an NTuple{N,Bool} so the result is inferrable
 @inline index_ndims(i1, I...) = (true, index_ndims(I...)...)
 @inline function index_ndims(i1::CartesianIndex, I...)
-    (map(x->true, i1.I)..., index_ndims(I...)...)
+    (map(Returns(true), i1.I)..., index_ndims(I...)...)
 end
 @inline function index_ndims(i1::AbstractArray{CartesianIndex{N}}, I...) where N
-    (ntuple(x->true, Val(N))..., index_ndims(I...)...)
+    (ntuple(Returns(true), Val(N))..., index_ndims(I...)...)
 end
 index_ndims() = ()
 
@@ -611,7 +732,7 @@ index_ndims() = ()
 @inline index_dimsum(::Colon, I...) = (true, index_dimsum(I...)...)
 @inline index_dimsum(::AbstractArray{Bool}, I...) = (true, index_dimsum(I...)...)
 @inline function index_dimsum(::AbstractArray{<:Any,N}, I...) where N
-    (ntuple(x->true, Val(N))..., index_dimsum(I...)...)
+    (ntuple(Returns(true), Val(N))..., index_dimsum(I...)...)
 end
 index_dimsum() = ()
 
@@ -675,29 +796,35 @@ end
     end
 end
 # When wrapping a BitArray, lean heavily upon its internals.
-@inline function iterate(L::Base.LogicalIndex{Int,<:BitArray})
+@inline function iterate(L::LogicalIndex{Int,<:BitArray})
+    L.sum == 0 && return nothing
+    Bc = L.mask.chunks
+    return iterate(L, (1, 1, (), @inbounds Bc[1]))
+end
+@inline function iterate(L::LogicalIndex{<:CartesianIndex,<:BitArray})
     L.sum == 0 && return nothing
     Bc = L.mask.chunks
-    return iterate(L, (1, @inbounds Bc[1]))
+    irest = ntuple(one, ndims(L.mask)-1)
+    return iterate(L, (1, 1, irest, @inbounds Bc[1]))
 end
-@inline function iterate(L::Base.LogicalIndex{Int,<:BitArray}, s)
+@inline function iterate(L::LogicalIndex{<:Any,<:BitArray}, (i1, Bi, irest, c))
     Bc = L.mask.chunks
-    i1, c = s
-    while c==0
-        i1 % UInt >= length(Bc) % UInt && return nothing
-        i1 += 1
-        @inbounds c = Bc[i1]
+    while c == 0
+        Bi >= length(Bc) && return nothing
+        i1 += 64
+        @inbounds c = Bc[Bi+=1]
     end
-    tz = trailing_zeros(c) + 1
+    tz = trailing_zeros(c)
     c = _blsr(c)
-    return ((i1-1)<<6 + tz, (i1, c))
+    i1, irest = _overflowind(i1 + tz, irest, size(L.mask))
+    return eltype(L)(i1, irest...), (i1 - tz, Bi, irest, c)
 end
 
 @inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex{<:Any,<:AbstractArray{Bool,1}}) =
     eachindex(IndexLinear(), A) == eachindex(IndexLinear(), I.mask)
 @inline checkbounds(::Type{Bool}, A::AbstractArray, I::LogicalIndex) = axes(A) == axes(I.mask)
 @inline checkindex(::Type{Bool}, indx::AbstractUnitRange, I::LogicalIndex) = (indx,) == axes(I.mask)
-checkindex(::Type{Bool}, inds::Tuple, I::LogicalIndex) = false
+checkindex(::Type{Bool}, inds::Tuple, I::LogicalIndex) = checkbounds_indices(Bool, inds, axes(I.mask))
 
 ensure_indexable(I::Tuple{}) = ()
 @inline ensure_indexable(I::Tuple{Any, Vararg{Any}}) = (I[1], ensure_indexable(tail(I))...)
@@ -707,10 +834,12 @@ ensure_indexable(I::Tuple{}) = ()
 # until Julia gets smart enough to elide the call on its own:
 @inline to_indices(A, I::Tuple{Vararg{Union{Integer, CartesianIndex}}}) = to_indices(A, (), I)
 # But some index types require more context spanning multiple indices
-# CartesianIndexes are simple; they just splat out
-@inline to_indices(A, inds, I::Tuple{CartesianIndex, Vararg{Any}}) =
-    to_indices(A, inds, (I[1].I..., tail(I)...))
-# But for arrays of CartesianIndex, we just skip the appropriate number of inds
+# CartesianIndex is unfolded outside the inner to_indices for better inference
+@inline function to_indices(A, inds, I::Tuple{CartesianIndex{N}, Vararg{Any}}) where N
+    _, indstail = IteratorsMD.split(inds, Val(N))
+    (map(i -> to_index(A, i), I[1].I)..., to_indices(A, indstail, tail(I))...)
+end
+# For arrays of CartesianIndex, we just skip the appropriate number of inds
 @inline function to_indices(A, inds, I::Tuple{AbstractArray{CartesianIndex{N}}, Vararg{Any}}) where N
     _, indstail = IteratorsMD.split(inds, Val(N))
     (to_index(A, I[1]), to_indices(A, indstail, tail(I))...)
@@ -735,7 +864,7 @@ uncolon(inds::Tuple{},    I::Tuple{Colon, Vararg{Any}}) = Slice(OneTo(1))
 uncolon(inds::Tuple,      I::Tuple{Colon, Vararg{Any}}) = Slice(inds[1])
 
 ### From abstractarray.jl: Internal multidimensional indexing definitions ###
-getindex(x::Number, i::CartesianIndex{0}) = x
+getindex(x::Union{Number,AbstractChar}, ::CartesianIndex{0}) = x
 getindex(t::Tuple,  i::CartesianIndex{1}) = getindex(t, i.I[1])
 
 # These are not defined on directly on getindex to avoid
@@ -756,14 +885,14 @@ function _unsafe_getindex(::IndexStyle, A::AbstractArray, I::Vararg{Union{Real,
     # This is specifically not inlined to prevent excessive allocations in type unstable code
     shape = index_shape(I...)
     dest = similar(A, shape)
-    map(unsafe_length, axes(dest)) == map(unsafe_length, shape) || throw_checksize_error(dest, shape)
+    map(length, axes(dest)) == map(length, shape) || throw_checksize_error(dest, shape)
     _unsafe_getindex!(dest, A, I...) # usually a generated function, don't allow it to impact inference result
     return dest
 end
 
 function _generate_unsafe_getindex!_body(N::Int)
     quote
-        @_inline_meta
+        @inline
         D = eachindex(dest)
         Dy = iterate(D)
         @inbounds @nloops $N j d->I[d] begin
@@ -796,7 +925,7 @@ end
 
 ## setindex! ##
 function _setindex!(l::IndexStyle, A::AbstractArray, x, I::Union{Real, AbstractArray}...)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(A, I...)
     _unsafe_setindex!(l, _maybe_reshape(l, A, I...), x, I...)
     A
@@ -877,7 +1006,7 @@ function diff(a::AbstractArray{T,N}; dims::Integer) where {T,N}
 end
 function diff(r::AbstractRange{T}; dims::Integer=1) where {T}
     dims == 1 || throw(ArgumentError("dimension $dims out of range (1:1)"))
-    return T[@inbounds r[i+1] - r[i] for i in firstindex(r):lastindex(r)-1]
+    return [@inbounds r[i+1] - r[i] for i in firstindex(r):lastindex(r)-1]
 end
 
 ### from abstractarray.jl
@@ -1004,6 +1133,25 @@ end
 
 Copy the block of `src` in the range of `Rsrc` to the block of `dest`
 in the range of `Rdest`. The sizes of the two regions must match.
+
+# Examples
+```jldoctest
+julia> A = zeros(5, 5);
+
+julia> B = [1 2; 3 4];
+
+julia> Ainds = CartesianIndices((2:3, 2:3));
+
+julia> Binds = CartesianIndices(B);
+
+julia> copyto!(A, Ainds, B, Binds)
+5×5 Matrix{Float64}:
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  1.0  2.0  0.0  0.0
+ 0.0  3.0  4.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+ 0.0  0.0  0.0  0.0  0.0
+```
 """
 copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)
 
@@ -1024,6 +1172,7 @@ See also [`circshift`](@ref).
     dest === src && throw(ArgumentError("dest and src must be separate arrays"))
     inds = axes(src)
     axes(dest) == inds || throw(ArgumentError("indices of src and dest must match (got $inds and $(axes(dest)))"))
+    isempty(src) && return dest
     _circshift!(dest, (), src, (), inds, fill_to_length(shiftamt, 0, Val(N)))
 end
 
@@ -1073,6 +1222,8 @@ their indices; any offset results in a (circular) wraparound. If the
 arrays have overlapping indices, then on the domain of the overlap
 `dest` agrees with `src`.
 
+See also: [`circshift`](@ref).
+
 # Examples
 ```julia-repl
 julia> src = reshape(Vector(1:16), (4,4))
@@ -1132,14 +1283,14 @@ end
 
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
-@inline function _unsafe_getindex!(X::BitArray, B::BitArray, I0::Union{UnitRange{Int},Slice})
+@inline function _unsafe_getindex!(X::BitArray, B::BitArray, I0::Union{AbstractUnitRange{Int},Slice})
     copy_chunks!(X.chunks, 1, B.chunks, indexoffset(I0)+1, length(I0))
     return X
 end
 
 # Optimization where the inner dimension is contiguous improves perf dramatically
 @generated function _unsafe_getindex!(X::BitArray, B::BitArray,
-        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Slice}...)
+        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Slice}...)
     N = length(I)
     quote
         $(Expr(:meta, :inline))
@@ -1274,7 +1425,7 @@ end
 # contiguous multidimensional indexing: if the first dimension is a range,
 # we can get some performance from using copy_chunks!
 
-@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,UnitRange{Int}})
+@inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray}, J0::Union{Colon,AbstractUnitRange{Int}})
     I0 = to_indices(B, (J0,))[1]
     @boundscheck checkbounds(B, I0)
     l0 = length(I0)
@@ -1286,13 +1437,13 @@ end
 end
 
 @inline function setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...)
+        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
     J = to_indices(B, (I0, I...))
     @boundscheck checkbounds(B, J...)
     _unsafe_setindex!(B, X, J...)
 end
 @generated function _unsafe_setindex!(B::BitArray, X::Union{StridedArray,BitArray},
-        I0::Union{Slice,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Slice}...)
+        I0::Union{Slice,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Slice}...)
     N = length(I)
     quote
         idxlens = @ncall $N index_lengths I0 d->I[d]
@@ -1327,12 +1478,12 @@ end
 end
 
 @propagate_inbounds function setindex!(B::BitArray, X::AbstractArray,
-        I0::Union{Colon,UnitRange{Int}}, I::Union{Int,UnitRange{Int},Colon}...)
+        I0::Union{Colon,AbstractUnitRange{Int}}, I::Union{Int,AbstractUnitRange{Int},Colon}...)
     _setindex!(IndexStyle(B), B, X, to_indices(B, (I0, I...))...)
 end
 
 ## fill! contiguous views of BitArrays with a single value
-function fill!(V::SubArray{Bool, <:Any, <:BitArray, Tuple{AbstractUnitRange{Int}}}, x)
+function fill!(V::SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}}}, x)
     B = V.parent
     I0 = V.indices[1]
     l0 = length(I0)
@@ -1341,7 +1492,7 @@ function fill!(V::SubArray{Bool, <:Any, <:BitArray, Tuple{AbstractUnitRange{Int}
     return V
 end
 
-fill!(V::SubArray{Bool, <:Any, <:BitArray, Tuple{AbstractUnitRange{Int}, Vararg{Union{Int,AbstractUnitRange{Int}}}}}, x) =
+fill!(V::SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}, Vararg{Union{Int,AbstractUnitRange{Int}}}}}, x) =
     _unsafe_fill_indices!(V.parent, x, V.indices...)
 
 @generated function _unsafe_fill_indices!(B::BitArray, x,
@@ -1432,13 +1583,12 @@ for (V, PT, BT) in Any[((:N,), BitArray, BitArray), ((:T,:N), Array, StridedArra
             #Creates offset, because indexing starts at 1
             offset = 1 - sum(@ntuple $N d->strides_{d+1})
 
+            sumc = 0
             ind = 1
-            @nexprs 1 d->(counts_{$N+1} = strides_{$N+1}) # a trick to set counts_($N+1)
             @nloops($N, i, P,
-                    d->(counts_d = strides_d), # PRE
-                    d->(counts_{d+1} += strides_{d+1}), # POST
+                    d->(sumc += i_d*strides_{d+1}), # PRE
+                    d->(sumc -= i_d*strides_{d+1}), # POST
                     begin # BODY
-                        sumc = sum(@ntuple $N d->counts_{d+1})
                         @inbounds P[ind] = B[sumc+offset]
                         ind += 1
                     end)
@@ -1528,7 +1678,7 @@ _unique_dims(A::AbstractArray, dims::Colon) = invoke(unique, Tuple{Any}, A)
             else
                 j_d = i_d
             end) begin
-                if (@nref $N A j) != (@nref $N A i)
+                if !isequal((@nref $N A j), (@nref $N A i))
                     collided[k] = true
                 end
             end
@@ -1558,7 +1708,7 @@ _unique_dims(A::AbstractArray, dims::Colon) = invoke(unique, Tuple{Any}, A)
                         j_d = i_d
                     end
                 end begin
-                    if (@nref $N A j) != (@nref $N A i)
+                    if !isequal((@nref $N A j), (@nref $N A i))
                         nowcollided[k] = true
                     end
                 end
@@ -1570,80 +1720,6 @@ _unique_dims(A::AbstractArray, dims::Colon) = invoke(unique, Tuple{Any}, A)
     end
 end
 
-"""
-    extrema(A::AbstractArray; dims) -> Array{Tuple}
-
-Compute the minimum and maximum elements of an array over the given dimensions.
-
-# Examples
-```jldoctest
-julia> A = reshape(Vector(1:2:16), (2,2,2))
-2×2×2 Array{Int64, 3}:
-[:, :, 1] =
- 1  5
- 3  7
-
-[:, :, 2] =
-  9  13
- 11  15
-
-julia> extrema(A, dims = (1,2))
-1×1×2 Array{Tuple{Int64, Int64}, 3}:
-[:, :, 1] =
- (1, 7)
-
-[:, :, 2] =
- (9, 15)
-```
-"""
-extrema(A::AbstractArray; dims = :) = _extrema_dims(identity, A, dims)
-
-"""
-    extrema(f, A::AbstractArray; dims) -> Array{Tuple}
-
-Compute the minimum and maximum of `f` applied to each element in the given dimensions
-of `A`.
-
-!!! compat "Julia 1.2"
-    This method requires Julia 1.2 or later.
-"""
-extrema(f, A::AbstractArray; dims=:) = _extrema_dims(f, A, dims)
-
-_extrema_dims(f, A::AbstractArray, ::Colon) = _extrema_itr(f, A)
-
-function _extrema_dims(f, A::AbstractArray, dims)
-    sz = [size(A)...]
-    for d in dims
-        sz[d] = 1
-    end
-    T = promote_op(f, eltype(A))
-    B = Array{Tuple{T,T}}(undef, sz...)
-    return extrema!(f, B, A)
-end
-
-@noinline function extrema!(f, B, A)
-    require_one_based_indexing(B, A)
-    sA = size(A)
-    sB = size(B)
-    for I in CartesianIndices(sB)
-        fAI = f(A[I])
-        B[I] = (fAI, fAI)
-    end
-    Bmax = CartesianIndex(sB)
-    @inbounds @simd for I in CartesianIndices(sA)
-        J = min(Bmax,I)
-        BJ = B[J]
-        fAI = f(A[I])
-        if fAI < BJ[1]
-            B[J] = (fAI, BJ[2])
-        elseif fAI > BJ[2]
-            B[J] = (BJ[1], fAI)
-        end
-    end
-    return B
-end
-extrema!(B, A) = extrema!(identity, B, A)
-
 # Show for pairs() with Cartesian indices. Needs to be here rather than show.jl for bootstrap order
 function Base.showarg(io::IO, r::Iterators.Pairs{<:Integer, <:Any, <:Any, T}, toplevel) where T <: Union{AbstractVector, Tuple}
     print(io, "pairs(::$T)")
diff --git a/base/multimedia.jl b/base/multimedia.jl
index 45e6b9532e9fae..d15768affd012b 100644
--- a/base/multimedia.jl
+++ b/base/multimedia.jl
@@ -69,7 +69,7 @@ methods; for example, if the available MIME formats depend on the *value* of `x`
 julia> showable(MIME("text/plain"), rand(5))
 true
 
-julia> showable("img/png", rand(5))
+julia> showable("image/png", rand(5))
 false
 ```
 """
@@ -176,7 +176,7 @@ data except for a set of types known to be text data (possibly Unicode).
 julia> istextmime(MIME("text/plain"))
 true
 
-julia> istextmime(MIME("img/png"))
+julia> istextmime(MIME("image/png"))
 false
 ```
 """
@@ -239,14 +239,14 @@ objects are printed in the Julia REPL.)
 struct TextDisplay <: AbstractDisplay
     io::IO
 end
-display(d::TextDisplay, M::MIME"text/plain", @nospecialize x) = show(d.io, M, x)
+display(d::TextDisplay, M::MIME"text/plain", @nospecialize x) = (show(d.io, M, x); println(d.io))
 display(d::TextDisplay, @nospecialize x) = display(d, MIME"text/plain"(), x)
 
 # if you explicitly call display("text/foo", x), it should work on a TextDisplay:
 displayable(d::TextDisplay, M::MIME) = istextmime(M)
 function display(d::TextDisplay, M::MIME, @nospecialize x)
     displayable(d, M) || throw(MethodError(display, (d, M, x)))
-    show(d.io, M, x)
+    show(d.io, M, x); println(d.io)
 end
 
 import Base: close, flush
@@ -300,7 +300,7 @@ xdisplayable(D::AbstractDisplay, @nospecialize args...) = applicable(display, D,
     display(mime, x)
     display(d::AbstractDisplay, mime, x)
 
-AbstractDisplay `x` using the topmost applicable display in the display stack, typically using the
+Display `x` using the topmost applicable display in the display stack, typically using the
 richest supported multimedia output for `x`, with plain-text [`stdout`](@ref) output as a fallback.
 The `display(d, x)` variant attempts to display `x` on the given display `d` only, throwing
 a [`MethodError`](@ref) if `d` cannot display objects of this type.
diff --git a/base/namedtuple.jl b/base/namedtuple.jl
index e8829086207702..b2ebb3f9d0d7e3 100644
--- a/base/namedtuple.jl
+++ b/base/namedtuple.jl
@@ -9,9 +9,9 @@ tuple-like collection of values, where each entry has a unique name, represented
 can be modified in place after construction.
 
 Accessing the value associated with a name in a named tuple can be done using field
-access syntax, e.g. `x.a`, or using [`getindex`](@ref), e.g. `x[:a]`. A tuple of the
-names can be obtained using [`keys`](@ref), and a tuple of the values can be obtained
-using [`values`](@ref).
+access syntax, e.g. `x.a`, or using [`getindex`](@ref), e.g. `x[:a]` or `x[(:a, :b)]`.
+A tuple of the names can be obtained using [`keys`](@ref), and a tuple of the values
+can be obtained using [`values`](@ref).
 
 !!! note
     Iteration over `NamedTuple`s produces the *values* without the names. (See example
@@ -30,6 +30,9 @@ julia> x.a
 julia> x[:a]
 1
 
+julia> x[(:a,)]
+(a = 1,)
+
 julia> keys(x)
 (:a, :b)
 
@@ -76,6 +79,9 @@ julia> (; t.x)
 
 !!! compat "Julia 1.5"
     Implicit names from identifiers and dot expressions are available as of Julia 1.5.
+
+!!! compat "Julia 1.7"
+    Use of `getindex` methods with multiple `Symbol`s is available as of Julia 1.7.
 """
 Core.NamedTuple
 
@@ -90,6 +96,15 @@ if nameof(@__MODULE__) === :Base
     $(Expr(:splatnew, :(NamedTuple{names,T}), :(T(args))))
 end
 
+function NamedTuple{names, T}(nt::NamedTuple) where {names, T <: Tuple}
+    if @generated
+        Expr(:new, :(NamedTuple{names, T}),
+             Any[ :(convert(fieldtype(T, $n), getfield(nt, $(QuoteNode(names[n]))))) for n in 1:length(names) ]...)
+    else
+        NamedTuple{names, T}(map(Fix1(getfield, nt), names))
+    end
+end
+
 function NamedTuple{names}(nt::NamedTuple) where {names}
     if @generated
         idx = Int[ fieldindex(nt, names[n]) for n in 1:length(names) ]
@@ -97,7 +112,7 @@ function NamedTuple{names}(nt::NamedTuple) where {names}
         Expr(:new, :(NamedTuple{names, $types}), Any[ :(getfield(nt, $(idx[n]))) for n in 1:length(idx) ]...)
     else
         types = Tuple{(fieldtype(typeof(nt), names[n]) for n in 1:length(names))...}
-        NamedTuple{names, types}(Tuple(getfield(nt, n) for n in 1:length(names)))
+        NamedTuple{names, types}(map(Fix1(getfield, nt), names))
     end
 end
 
@@ -106,19 +121,29 @@ NamedTuple{names}(itr) where {names} = NamedTuple{names}(Tuple(itr))
 
 NamedTuple(itr) = (; itr...)
 
+# avoids invalidating Union{}(...)
+NamedTuple{names, Union{}}(itr::Tuple) where {names} = throw(MethodError(NamedTuple{names, Union{}}, (itr,)))
+
 end # if Base
 
 length(t::NamedTuple) = nfields(t)
 iterate(t::NamedTuple, iter=1) = iter > nfields(t) ? nothing : (getfield(t, iter), iter + 1)
+rest(t::NamedTuple) = t
+@inline rest(t::NamedTuple{names}, i::Int) where {names} = NamedTuple{rest(names,i)}(t)
 firstindex(t::NamedTuple) = 1
 lastindex(t::NamedTuple) = nfields(t)
 getindex(t::NamedTuple, i::Int) = getfield(t, i)
 getindex(t::NamedTuple, i::Symbol) = getfield(t, i)
+@inline getindex(t::NamedTuple, idxs::Tuple{Vararg{Symbol}}) = NamedTuple{idxs}(t)
+@inline getindex(t::NamedTuple, idxs::AbstractVector{Symbol}) = NamedTuple{Tuple(idxs)}(t)
 indexed_iterate(t::NamedTuple, i::Int, state=1) = (getfield(t, i), i+1)
 isempty(::NamedTuple{()}) = true
 isempty(::NamedTuple) = false
 empty(::NamedTuple) = NamedTuple()
 
+prevind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)-1
+nextind(@nospecialize(t::NamedTuple), i::Integer) = Int(i)+1
+
 convert(::Type{NamedTuple{names,T}}, nt::NamedTuple{names,T}) where {names,T<:Tuple} = nt
 convert(::Type{NamedTuple{names}}, nt::NamedTuple{names}) where {names} = nt
 
@@ -149,7 +174,8 @@ function show(io::IO, t::NamedTuple)
         typeinfo = get(io, :typeinfo, Any)
         print(io, "(")
         for i = 1:n
-            print(io, fieldname(typeof(t),i), " = ")
+            show_sym(io, fieldname(typeof(t), i))
+            print(io, " = ")
             show(IOContext(io, :typeinfo =>
                            t isa typeinfo <: NamedTuple ? fieldtype(typeinfo, i) : Any),
                  getfield(t, i))
@@ -178,8 +204,8 @@ _nt_names(::Type{T}) where {names,T<:NamedTuple{names}} = names
 
 hash(x::NamedTuple, h::UInt) = xor(objectid(_nt_names(x)), hash(Tuple(x), h))
 
+(<)(a::NamedTuple{n}, b::NamedTuple{n}) where {n} = Tuple(a) < Tuple(b)
 isless(a::NamedTuple{n}, b::NamedTuple{n}) where {n} = isless(Tuple(a), Tuple(b))
-# TODO: case where one argument's names are a prefix of the other's
 
 same_names(::NamedTuple{names}...) where {names} = true
 same_names(::NamedTuple...) = false
@@ -192,7 +218,7 @@ function map(f, nt::NamedTuple{names}, nts::NamedTuple...) where names
     NamedTuple{names}(map(f, map(Tuple, (nt, nts...))...))
 end
 
-@pure function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+@assume_effects :total function merge_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
     @nospecialize an bn
     names = Symbol[an...]
     for n in bn
@@ -203,7 +229,7 @@ end
     (names...,)
 end
 
-@pure function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
+@assume_effects :total function merge_types(names::Tuple{Vararg{Symbol}}, a::Type{<:NamedTuple}, b::Type{<:NamedTuple})
     @nospecialize names a b
     bn = _nt_names(b)
     return Tuple{Any[ fieldtype(sym_in(names[n], bn) ? b : a, names[n]) for n in 1:length(names) ]...}
@@ -251,7 +277,7 @@ merge(a::NamedTuple,     b::NamedTuple{()}) = a
 merge(a::NamedTuple{()}, b::NamedTuple{()}) = a
 merge(a::NamedTuple{()}, b::NamedTuple)     = b
 
-merge(a::NamedTuple, b::Iterators.Pairs{<:Any,<:Any,<:Any,<:NamedTuple}) = merge(a, b.data)
+merge(a::NamedTuple, b::Iterators.Pairs{<:Any,<:Any,<:Any,<:NamedTuple}) = merge(a, getfield(b, :data))
 
 merge(a::NamedTuple, b::Iterators.Zip{<:Tuple{Any,Any}}) = merge(a, NamedTuple{Tuple(b.is[1])}(b.is[2]))
 
@@ -273,7 +299,8 @@ function merge(a::NamedTuple, itr)
     names = Symbol[]
     vals = Any[]
     inds = IdDict{Symbol,Int}()
-    for (k::Symbol, v) in itr
+    for (k, v) in itr
+        k = k::Symbol
         oldind = get(inds, k, 0)
         if oldind > 0
             vals[oldind] = v
@@ -289,12 +316,12 @@ end
 keys(nt::NamedTuple{names}) where {names} = names
 values(nt::NamedTuple) = Tuple(nt)
 haskey(nt::NamedTuple, key::Union{Integer, Symbol}) = isdefined(nt, key)
-get(nt::NamedTuple, key::Union{Integer, Symbol}, default) = haskey(nt, key) ? getfield(nt, key) : default
-get(f::Callable, nt::NamedTuple, key::Union{Integer, Symbol}) = haskey(nt, key) ? getfield(nt, key) : f()
+get(nt::NamedTuple, key::Union{Integer, Symbol}, default) = isdefined(nt, key) ? getfield(nt, key) : default
+get(f::Callable, nt::NamedTuple, key::Union{Integer, Symbol}) = isdefined(nt, key) ? getfield(nt, key) : f()
 tail(t::NamedTuple{names}) where names = NamedTuple{tail(names)}(t)
 front(t::NamedTuple{names}) where names = NamedTuple{front(names)}(t)
 
-@pure function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
+@assume_effects :total function diff_names(an::Tuple{Vararg{Symbol}}, bn::Tuple{Vararg{Symbol}})
     @nospecialize an bn
     names = Symbol[]
     for n in an
@@ -321,7 +348,7 @@ function structdiff(a::NamedTuple{an}, b::Union{NamedTuple{bn}, Type{NamedTuple{
     else
         names = diff_names(an, bn)
         types = Tuple{Any[ fieldtype(typeof(a), names[n]) for n in 1:length(names) ]...}
-        NamedTuple{names,types}(map(n->getfield(a, n), names))
+        NamedTuple{names,types}(map(Fix1(getfield, a), names))
     end
 end
 
@@ -388,3 +415,9 @@ macro NamedTuple(ex)
     types = [esc(e isa Symbol ? :Any : e.args[2]) for e in decls]
     return :(NamedTuple{($(vars...),), Tuple{$(types...)}})
 end
+
+function split_rest(t::NamedTuple{names}, n::Int, st...) where {names}
+    _check_length_split_rest(length(t), n)
+    names_front, names_last_n = split_rest(names, n, st...)
+    return NamedTuple{names_front}(t), NamedTuple{names_last_n}(t)
+end
diff --git a/base/ntuple.jl b/base/ntuple.jl
index a5608dfa927c33..6f70b494812230 100644
--- a/base/ntuple.jl
+++ b/base/ntuple.jl
@@ -32,22 +32,22 @@ julia> ntuple(i -> 2*i, 4)
 end
 
 function _ntuple(f::F, n) where F
-    @_noinline_meta
+    @noinline
     (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
     ([f(i) for i = 1:n]...,)
 end
 
 function ntupleany(f, n)
-    @_noinline_meta
+    @noinline
     (n >= 0) || throw(ArgumentError(string("tuple length should be ≥ 0, got ", n)))
     (Any[f(i) for i = 1:n]...,)
 end
 
 # inferrable ntuple (enough for bootstrapping)
 ntuple(f, ::Val{0}) = ()
-ntuple(f, ::Val{1}) = (@_inline_meta; (f(1),))
-ntuple(f, ::Val{2}) = (@_inline_meta; (f(1), f(2)))
-ntuple(f, ::Val{3}) = (@_inline_meta; (f(1), f(2), f(3)))
+ntuple(f, ::Val{1}) = (@inline; (f(1),))
+ntuple(f, ::Val{2}) = (@inline; (f(1), f(2)))
+ntuple(f, ::Val{3}) = (@inline; (f(1), f(2), f(3)))
 
 """
     ntuple(f, ::Val{N})
diff --git a/base/number.jl b/base/number.jl
index 142796d3903ac5..7436655bfad38a 100644
--- a/base/number.jl
+++ b/base/number.jl
@@ -25,6 +25,8 @@ isinteger(x::Integer) = true
 Return `true` if `x == zero(x)`; if `x` is an array, this checks whether
 all of the elements of `x` are zero.
 
+See also: [`isone`](@ref), [`isinteger`](@ref), [`isfinite`](@ref), [`isnan`](@ref).
+
 # Examples
 ```jldoctest
 julia> iszero(0.0)
@@ -92,15 +94,20 @@ keys(::Number) = OneTo(1)
 
 getindex(x::Number) = x
 function getindex(x::Number, i::Integer)
-    @_inline_meta
+    @inline
     @boundscheck i == 1 || throw(BoundsError())
     x
 end
 function getindex(x::Number, I::Integer...)
-    @_inline_meta
+    @inline
     @boundscheck all(isone, I) || throw(BoundsError())
     x
 end
+get(x::Number, i::Integer, default) = isone(i) ? x : default
+get(x::Number, ind::Tuple, default) = all(isone, ind) ? x : default
+get(f::Callable, x::Number, i::Integer) = isone(i) ? x : f()
+get(f::Callable, x::Number, ind::Tuple) = all(isone, ind) ? x : f()
+
 first(x::Number) = x
 last(x::Number) = x
 copy(x::Number) = x # some code treats numbers as collection-like
@@ -110,6 +117,8 @@ copy(x::Number) = x # some code treats numbers as collection-like
 
 Returns `true` if the value of the sign of `x` is negative, otherwise `false`.
 
+See also [`sign`](@ref) and [`copysign`](@ref).
+
 # Examples
 ```jldoctest
 julia> signbit(-4)
@@ -131,6 +140,23 @@ signbit(x::Real) = x < 0
     sign(x)
 
 Return zero if `x==0` and ``x/|x|`` otherwise (i.e., ±1 for real `x`).
+
+See also [`signbit`](@ref), [`zero`](@ref), [`copysign`](@ref), [`flipsign`](@ref).
+
+# Examples
+```jldoctest
+julia> sign(-4.0)
+-1.0
+
+julia> sign(99)
+1
+
+julia> sign(-0.0)
+-0.0
+
+julia> sign(0 + im)
+0.0 + 1.0im
+```
 """
 sign(x::Number) = iszero(x) ? x/abs(oneunit(x)) : x/abs(x)
 sign(x::Real) = ifelse(x < zero(x), oftype(one(x),-1), ifelse(x > zero(x), one(x), typeof(one(x))(x)))
@@ -148,6 +174,7 @@ julia> abs2(-3)
 9
 ```
 """
+abs2(x::Number) = abs(x)^2
 abs2(x::Real) = x*x
 
 """
@@ -222,10 +249,18 @@ inv(x::Number) = one(x)/x
 
 Multiply `x` and `y`, giving the result as a larger type.
 
+See also [`promote`](@ref), [`Base.add_sum`](@ref).
+
 # Examples
 ```jldoctest
-julia> widemul(Float32(3.), 4.)
-12.0
+julia> widemul(Float32(3.0), 4.0) isa BigFloat
+true
+
+julia> typemax(Int8) * typemax(Int8)
+1
+
+julia> widemul(typemax(Int8), typemax(Int8))  # == 127^2
+16129
 ```
 """
 widemul(x::Number, y::Number) = widen(x)*widen(y)
@@ -243,6 +278,8 @@ map(f, x::Number, ys::Number...) = f(x, ys...)
 
 Get the additive identity element for the type of `x` (`x` can also specify the type itself).
 
+See also [`iszero`](@ref), [`one`](@ref), [`oneunit`](@ref), [`oftype`](@ref).
+
 # Examples
 ```jldoctest
 julia> zero(1)
@@ -280,6 +317,9 @@ should return an identity value of the same precision
 If you want a quantity that is of the same type as `x`, or of type `T`,
 even if `x` is dimensionful, use [`oneunit`](@ref) instead.
 
+See also the [`identity`](@ref) function,
+and `I` in [`LinearAlgebra`](@ref man-linalg) for the identity matrix.
+
 # Examples
 ```jldoctest
 julia> one(3.7)
diff --git a/base/opaque_closure.jl b/base/opaque_closure.jl
new file mode 100644
index 00000000000000..ac2ae2e8bf3c04
--- /dev/null
+++ b/base/opaque_closure.jl
@@ -0,0 +1,69 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+"""
+    @opaque ([type, ]args...) -> body
+
+Marks a given closure as "opaque". Opaque closures capture the
+world age of their creation (as opposed to their invocation).
+This allows for more aggressive optimization of the capture
+list, but trades off against the ability to inline opaque
+closures at the call site, if their creation is not statically
+visible.
+
+An argument tuple type (`type`) may optionally be specified, to
+specify allowed argument types in a more flexible way. In particular,
+the argument type may be fixed length even if the function is variadic.
+
+!!! warning
+    This interface is experimental and subject to change or removal without notice.
+"""
+macro opaque(ex)
+    esc(Expr(:opaque_closure, ex))
+end
+
+macro opaque(ty, ex)
+    esc(Expr(:opaque_closure, ty, ex))
+end
+
+# OpaqueClosure construction from pre-inferred CodeInfo/IRCode
+using Core.Compiler: IRCode
+using Core: CodeInfo
+
+function compute_ir_rettype(ir::IRCode)
+    rt = Union{}
+    for i = 1:length(ir.stmts)
+        stmt = ir.stmts[i][:inst]
+        if isa(stmt, Core.Compiler.ReturnNode) && isdefined(stmt, :val)
+            rt = Core.Compiler.tmerge(Core.Compiler.argextype(stmt.val, ir), rt)
+        end
+    end
+    return Core.Compiler.widenconst(rt)
+end
+
+function Core.OpaqueClosure(ir::IRCode, env...;
+        nargs::Int = length(ir.argtypes)-1,
+        isva::Bool = false,
+        rt = compute_ir_rettype(ir))
+    if (isva && nargs > length(ir.argtypes)) || (!isva && nargs != length(ir.argtypes)-1)
+        throw(ArgumentError("invalid argument count"))
+    end
+    src = ccall(:jl_new_code_info_uninit, Ref{CodeInfo}, ())
+    src.slotflags = UInt8[]
+    src.slotnames = fill(:none, nargs+1)
+    src.slottypes = copy(ir.argtypes)
+    Core.Compiler.replace_code_newstyle!(src, ir, nargs+1)
+    Core.Compiler.widen_all_consts!(src)
+    src.inferred = true
+    # NOTE: we need ir.argtypes[1] == typeof(env)
+
+    ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any),
+          Tuple{ir.argtypes[2:end]...}, Union{}, rt, @__MODULE__, src, 0, nothing, nargs, isva, env)
+end
+
+function Core.OpaqueClosure(src::CodeInfo, env...)
+    M = src.parent.def
+    sig = Base.tuple_type_tail(src.parent.specTypes)
+
+    ccall(:jl_new_opaque_closure_from_code_info, Any, (Any, Any, Any, Any, Any, Cint, Any, Cint, Cint, Any),
+          sig, Union{}, src.rettype, @__MODULE__, src, 0, nothing, M.nargs - 1, M.isva, env)
+end
diff --git a/base/operators.jl b/base/operators.jl
index 3ac24abfb611c2..92c016d00bf030 100644
--- a/base/operators.jl
+++ b/base/operators.jl
@@ -40,15 +40,8 @@ julia> supertype(Int32)
 Signed
 ```
 """
-function supertype(T::DataType)
-    @_pure_meta
-    T.super
-end
-
-function supertype(T::UnionAll)
-    @_pure_meta
-    UnionAll(T.var, supertype(T.body))
-end
+supertype(T::DataType) = (@_total_meta; T.super)
+supertype(T::UnionAll) = (@_total_meta; UnionAll(T.var, supertype(T.body)))
 
 ## generic comparison ##
 
@@ -79,6 +72,9 @@ handle comparison to other types via promotion rules where possible.
 [`isequal`](@ref) falls back to `==`, so new methods of `==` will be used by the
 [`Dict`](@ref) type to compare keys. If your type will be used as a dictionary key, it
 should therefore also implement [`hash`](@ref).
+
+If some type defines `==`, [`isequal`](@ref), and [`isless`](@ref) then it should
+also implement [`<`](@ref) to ensure consistency of comparisons.
 """
 ==
 
@@ -90,6 +86,10 @@ and of missing values. `isequal` treats all floating-point `NaN` values as equal
 to each other, treats `-0.0` as unequal to `0.0`, and [`missing`](@ref) as equal
 to `missing`. Always returns a `Bool` value.
 
+`isequal` is an equivalence relation - it is reflexive (`===` implies `isequal`), symmetric
+(`isequal(a, b)` implies `isequal(b, a)`) and transitive (`isequal(a, b)` and
+`isequal(b, c)` implies `isequal(a, c)`).
+
 # Implementation
 The default implementation of `isequal` calls `==`, so a type that does not involve
 floating-point values generally only needs to define `==`.
@@ -98,8 +98,12 @@ floating-point values generally only needs to define `==`.
 that `hash(x) == hash(y)`.
 
 This typically means that types for which a custom `==` or `isequal` method exists must
-implement a corresponding `hash` method (and vice versa). Collections typically implement
-`isequal` by calling `isequal` recursively on all contents.
+implement a corresponding [`hash`](@ref) method (and vice versa). Collections typically
+implement `isequal` by calling `isequal` recursively on all contents.
+
+Furthermore, `isequal` is linked with [`isless`](@ref), and they work together to
+define a fixed total ordering, where exactly one of `isequal(x, y)`, `isless(x, y)`, or
+`isless(y, x)` must be `true` (and the other two `false`).
 
 Scalar types generally do not need to implement `isequal` separate from `==`, unless they
 represent floating-point numbers amenable to a more efficient implementation than that
@@ -118,9 +122,15 @@ true
 
 julia> isequal(0.0, -0.0)
 false
+
+julia> missing == missing
+missing
+
+julia> isequal(missing, missing)
+true
 ```
 """
-isequal(x, y) = x == y
+isequal(x, y) = (x == y)::Bool # all `missing` cases are handled in missing.jl
 
 signequal(x, y) = signbit(x)::Bool == signbit(y)::Bool
 signless(x, y) = signbit(x)::Bool & !signbit(y)::Bool
@@ -132,8 +142,8 @@ isequal(x::AbstractFloat, y::Real         ) = (isnan(x) & isnan(y)) | signequal(
 """
     isless(x, y)
 
-Test whether `x` is less than `y`, according to a fixed total order.
-`isless` is not defined on all pairs of values `(x, y)`. However, if it
+Test whether `x` is less than `y`, according to a fixed total order (defined together with
+[`isequal`](@ref)). `isless` is not defined on all pairs of values `(x, y)`. However, if it
 is defined, it is expected to satisfy the following:
 - If `isless(x, y)` is defined, then so is `isless(y, x)` and `isequal(x, y)`,
   and exactly one of those three yields `true`.
@@ -141,7 +151,7 @@ is defined, it is expected to satisfy the following:
   `isless(x, y) && isless(y, z)` implies `isless(x, z)`.
 
 Values that are normally unordered, such as `NaN`,
-are ordered in an arbitrary but consistent fashion.
+are ordered after regular values.
 [`missing`](@ref) values are ordered last.
 
 This is the default comparison used by [`sort`](@ref).
@@ -150,15 +160,17 @@ This is the default comparison used by [`sort`](@ref).
 Non-numeric types with a total order should implement this function.
 Numeric types only need to implement it if they have special values such as `NaN`.
 Types with a partial order should implement [`<`](@ref).
+See the documentation on [Alternate orderings](@ref) for how to define alternate
+ordering methods that can be used in sorting and related functions.
 
 # Examples
- ```jldoctest
- julia> isless(1, 3)
- true
+```jldoctest
+julia> isless(1, 3)
+true
 
- julia> isless("Red", "Blue")
- false
- ```
+julia> isless("Red", "Blue")
+false
+```
 """
 function isless end
 
@@ -166,15 +178,70 @@ isless(x::AbstractFloat, y::AbstractFloat) = (!isnan(x) & (isnan(y) | signless(x
 isless(x::Real,          y::AbstractFloat) = (!isnan(x) & (isnan(y) | signless(x, y))) | (x < y)
 isless(x::AbstractFloat, y::Real         ) = (!isnan(x) & (isnan(y) | signless(x, y))) | (x < y)
 
+"""
+    isgreater(x, y)
 
-function ==(T::Type, S::Type)
-    @_pure_meta
-    return ccall(:jl_types_equal, Cint, (Any, Any), T, S) != 0
-end
-function !=(T::Type, S::Type)
-    @_pure_meta
-    return !(T == S)
-end
+Not the inverse of `isless`! Test whether `x` is greater than `y`, according to
+a fixed total order compatible with `min`.
+
+Defined with `isless`, this function is usually `isless(y, x)`, but `NaN` and
+[`missing`](@ref) are ordered as smaller than any ordinary value with `missing`
+smaller than `NaN`.
+
+So `isless` defines an ascending total order with `NaN` and `missing` as the
+largest values and `isgreater` defines a descending total order with `NaN` and
+`missing` as the smallest values.
+
+!!! note
+
+    Like `min`, `isgreater` orders containers (tuples, vectors, etc)
+    lexicographically with `isless(y, x)` rather than recursively with itself:
+
+    ```jldoctest
+    julia> Base.isgreater(1, NaN) # 1 is greater than NaN
+    true
+
+    julia> Base.isgreater((1,), (NaN,)) # But (1,) is not greater than (NaN,)
+    false
+
+    julia> sort([1, 2, 3, NaN]; lt=Base.isgreater)
+    4-element Vector{Float64}:
+       3.0
+       2.0
+       1.0
+     NaN
+
+    julia> sort(tuple.([1, 2, 3, NaN]); lt=Base.isgreater)
+    4-element Vector{Tuple{Float64}}:
+     (NaN,)
+     (3.0,)
+     (2.0,)
+     (1.0,)
+    ```
+
+# Implementation
+This is unexported. Types should not usually implement this function. Instead, implement `isless`.
+"""
+isgreater(x, y) = isunordered(x) || isunordered(y) ? isless(x, y) : isless(y, x)
+
+"""
+    isunordered(x)
+
+Return `true` if `x` is a value that is not orderable according to [`<`](@ref), such as `NaN`
+or `missing`.
+
+The values that evaluate to `true` with this predicate may be orderable with respect to other
+orderings such as [`isless`](@ref).
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+"""
+isunordered(x) = false
+isunordered(x::AbstractFloat) = isnan(x)
+isunordered(x::Missing) = true
+
+==(T::Type, S::Type) = (@_total_meta; ccall(:jl_types_equal, Cint, (Any, Any), T, S) != 0)
+!=(T::Type, S::Type) = (@_total_meta; !(T == S))
 ==(T::TypeVar, S::Type) = false
 ==(T::Type, S::TypeVar) = false
 
@@ -260,7 +327,6 @@ a partial order.
 New numeric types with a canonical partial order should implement this function for
 two arguments of the new type.
 Types with a canonical total order should implement [`isless`](@ref) instead.
-(x < y) | (x == y)
 
 # Examples
 ```jldoctest
@@ -354,22 +420,6 @@ const ≥ = >=
 # which is more idiomatic:
 isless(x::Real, y::Real) = x<y
 
-"""
-    ifelse(condition::Bool, x, y)
-
-Return `x` if `condition` is `true`, otherwise return `y`. This differs from `?` or `if` in
-that it is an ordinary function, so all the arguments are evaluated first. In some cases,
-using `ifelse` instead of an `if` statement can eliminate the branch in generated code and
-provide higher performance in tight loops.
-
-# Examples
-```jldoctest
-julia> ifelse(1 > 2, 1, 2)
-2
-```
-"""
-ifelse
-
 """
     cmp(x,y)
 
@@ -405,7 +455,7 @@ cmp(x::Integer, y::Integer) = ifelse(isless(x, y), -1, ifelse(isless(y, x), 1, 0
 """
     max(x, y, ...)
 
-Return the maximum of the arguments. See also the [`maximum`](@ref) function
+Return the maximum of the arguments (with respect to [`isless`](@ref)). See also the [`maximum`](@ref) function
 to take the maximum element from a collection.
 
 # Examples
@@ -419,7 +469,7 @@ max(x, y) = ifelse(isless(y, x), x, y)
 """
     min(x, y, ...)
 
-Return the minimum of the arguments. See also the [`minimum`](@ref) function
+Return the minimum of the arguments (with respect to [`isless`](@ref)). See also the [`minimum`](@ref) function
 to take the minimum element from a collection.
 
 # Examples
@@ -433,7 +483,9 @@ min(x,y) = ifelse(isless(y, x), y, x)
 """
     minmax(x, y)
 
-Return `(min(x,y), max(x,y))`. See also: [`extrema`](@ref) that returns `(minimum(x), maximum(x))`.
+Return `(min(x,y), max(x,y))`.
+
+See also [`extrema`](@ref) that returns `(minimum(x), maximum(x))`.
 
 # Examples
 ```jldoctest
@@ -443,58 +495,6 @@ julia> minmax('c','b')
 """
 minmax(x,y) = isless(y, x) ? (y, x) : (x, y)
 
-"""
-    extrema(itr) -> Tuple
-
-Compute both the minimum and maximum element in a single pass, and return them as a 2-tuple.
-
-# Examples
-```jldoctest
-julia> extrema(2:10)
-(2, 10)
-
-julia> extrema([9,pi,4.5])
-(3.141592653589793, 9.0)
-```
-"""
-extrema(itr) = _extrema_itr(identity, itr)
-
-"""
-    extrema(f, itr) -> Tuple
-
-Compute both the minimum and maximum of `f` applied to each element in `itr` and return
-them as a 2-tuple. Only one pass is made over `itr`.
-
-!!! compat "Julia 1.2"
-    This method requires Julia 1.2 or later.
-
-# Examples
-```jldoctest
-julia> extrema(sin, 0:π)
-(0.0, 0.9092974268256817)
-```
-"""
-extrema(f, itr) = _extrema_itr(f, itr)
-
-function _extrema_itr(f, itr)
-    y = iterate(itr)
-    y === nothing && throw(ArgumentError("collection must be non-empty"))
-    (v, s) = y
-    vmin = vmax = f(v)
-    while true
-        y = iterate(itr, s)
-        y === nothing && break
-        (x, s) = y
-        fx = f(x)
-        vmax = max(fx, vmax)
-        vmin = min(fx, vmin)
-    end
-    return (vmin, vmax)
-end
-
-extrema(x::Real) = (x, x)
-extrema(f, x::Real) = (y = f(x); (y, y))
-
 ## definitions providing basic traits of arithmetic operators ##
 
 """
@@ -502,13 +502,15 @@ extrema(f, x::Real) = (y = f(x); (y, y))
 
 The identity function. Returns its argument.
 
+See also: [`one`](@ref), [`oneunit`](@ref), and [`LinearAlgebra`](@ref man-linalg)'s `I`.
+
 # Examples
 ```jldoctest
 julia> identity("Well, what did you expect?")
 "Well, what did you expect?"
 ```
 """
-identity(x) = x
+identity(@nospecialize x) = x
 
 +(x::Number) = x
 *(x::Number) = x
@@ -517,25 +519,63 @@ identity(x) = x
 xor(x::Integer) = x
 
 const ⊻ = xor
+const ⊼ = nand
+const ⊽ = nor
 
-# foldl for argument lists. expand recursively up to a point, then
-# switch to a loop. this allows small cases like `a+b+c+d` to be inlined
+# foldl for argument lists. expand fully up to a point, then
+# switch to a loop. this allows small cases like `a+b+c+d` to be managed
 # efficiently, without a major slowdown for `+(x...)` when `x` is big.
-afoldl(op,a) = a
-afoldl(op,a,b) = op(a,b)
-afoldl(op,a,b,c...) = afoldl(op, op(a,b), c...)
-function afoldl(op,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,qs...)
-    y = op(op(op(op(op(op(op(op(op(op(op(op(op(op(op(a,b),c),d),e),f),g),h),i),j),k),l),m),n),o),p)
-    for x in qs; y = op(y,x); end
-    y
+# n.b.: keep this method count small, so it can be inferred without hitting the
+# method count limit in inference
+afoldl(op, a) = a
+function afoldl(op, a, bs...)
+    l = length(bs)
+    i =  0; y = a;            l == i && return y
+    #@nexprs 31 i -> (y = op(y, bs[i]); l == i && return y)
+    i =  1; y = op(y, bs[i]); l == i && return y
+    i =  2; y = op(y, bs[i]); l == i && return y
+    i =  3; y = op(y, bs[i]); l == i && return y
+    i =  4; y = op(y, bs[i]); l == i && return y
+    i =  5; y = op(y, bs[i]); l == i && return y
+    i =  6; y = op(y, bs[i]); l == i && return y
+    i =  7; y = op(y, bs[i]); l == i && return y
+    i =  8; y = op(y, bs[i]); l == i && return y
+    i =  9; y = op(y, bs[i]); l == i && return y
+    i = 10; y = op(y, bs[i]); l == i && return y
+    i = 11; y = op(y, bs[i]); l == i && return y
+    i = 12; y = op(y, bs[i]); l == i && return y
+    i = 13; y = op(y, bs[i]); l == i && return y
+    i = 14; y = op(y, bs[i]); l == i && return y
+    i = 15; y = op(y, bs[i]); l == i && return y
+    i = 16; y = op(y, bs[i]); l == i && return y
+    i = 17; y = op(y, bs[i]); l == i && return y
+    i = 18; y = op(y, bs[i]); l == i && return y
+    i = 19; y = op(y, bs[i]); l == i && return y
+    i = 20; y = op(y, bs[i]); l == i && return y
+    i = 21; y = op(y, bs[i]); l == i && return y
+    i = 22; y = op(y, bs[i]); l == i && return y
+    i = 23; y = op(y, bs[i]); l == i && return y
+    i = 24; y = op(y, bs[i]); l == i && return y
+    i = 25; y = op(y, bs[i]); l == i && return y
+    i = 26; y = op(y, bs[i]); l == i && return y
+    i = 27; y = op(y, bs[i]); l == i && return y
+    i = 28; y = op(y, bs[i]); l == i && return y
+    i = 29; y = op(y, bs[i]); l == i && return y
+    i = 30; y = op(y, bs[i]); l == i && return y
+    i = 31; y = op(y, bs[i]); l == i && return y
+    for i in (i + 1):l
+        y = op(y, bs[i])
+    end
+    return y
 end
+typeof(afoldl).name.mt.max_args = 34
 
 for op in (:+, :*, :&, :|, :xor, :min, :max, :kron)
     @eval begin
         # note: these definitions must not cause a dispatch loop when +(a,b) is
         # not defined, and must only try to call 2-argument definitions, so
         # that defining +(a,b) is sufficient for full functionality.
-        ($op)(a, b, c, xs...) = afoldl($op, ($op)(($op)(a,b),c), xs...)
+        ($op)(a, b, c, xs...) = (@inline; afoldl($op, ($op)(($op)(a,b),c), xs...))
         # a further concern is that it's easy for a type like (Int,Int...)
         # to match many definitions, so we need to keep the number of
         # definitions down to avoid losing type information.
@@ -597,16 +637,16 @@ julia> bitstring(Int8(3))
 julia> bitstring(Int8(12))
 "00001100"
 ```
-See also [`>>`](@ref), [`>>>`](@ref).
+See also [`>>`](@ref), [`>>>`](@ref), [`exp2`](@ref), [`ldexp`](@ref).
 """
 function <<(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     typemin(Int) <= c <= typemax(Int) && return x << (c % Int)
     (x >= 0 || c >= 0) && return zero(x) << 0  # for type stability
     oftype(x, -1)
 end
 function <<(x::Integer, c::Unsigned)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(<<, (x, c)))
     end
@@ -645,7 +685,7 @@ julia> bitstring(Int8(-4))
 See also [`>>>`](@ref), [`<<`](@ref).
 """
 function >>(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(>>, (x, c)))
     end
@@ -683,11 +723,11 @@ is equivalent to [`>>`](@ref).
 See also [`>>`](@ref), [`<<`](@ref).
 """
 function >>>(x::Integer, c::Integer)
-    @_inline_meta
+    @inline
     typemin(Int) <= c <= typemax(Int) ? x >>> (c % Int) : zero(x) >>> 0
 end
 function >>>(x::Integer, c::Unsigned)
-    @_inline_meta
+    @inline
     if c isa UInt
         throw(MethodError(>>>, (x, c)))
     end
@@ -704,6 +744,8 @@ end
 Remainder from Euclidean division, returning a value of the same sign as `x`, and smaller in
 magnitude than `y`. This value is always exact.
 
+See also: [`div`](@ref), [`mod`](@ref), [`mod1`](@ref), [`divrem`](@ref).
+
 # Examples
 ```jldoctest
 julia> x = 15; y = 4;
@@ -713,6 +755,10 @@ julia> x % y
 
 julia> x == div(x, y) * y + rem(x, y)
 true
+
+julia> rem.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -2  -1  0  -2  -1  0  1  2  0  1  2
 ```
 """
 rem
@@ -722,7 +768,10 @@ const % = rem
     div(x, y)
     ÷(x, y)
 
-The quotient from Euclidean division. Computes `x/y`, truncated to an integer.
+The quotient from Euclidean (integer) division. Generally equivalent
+to a mathematical operation x/y without a fractional part.
+
+See also: [`cld`](@ref), [`fld`](@ref), [`rem`](@ref), [`divrem`](@ref).
 
 # Examples
 ```jldoctest
@@ -734,6 +783,10 @@ julia> -5 ÷ 3
 
 julia> 5.0 ÷ 2
 2.0
+
+julia> div.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ -1  -1  -1  0  0  0  0  0  1  1  1
 ```
 """
 div
@@ -745,15 +798,24 @@ const ÷ = div
 Modulus after flooring division, returning a value `r` such that `mod(r, y) == mod(x, y)`
 in the range ``(0, y]`` for positive `y` and in the range ``[y,0)`` for negative `y`.
 
-See also: [`fld1`](@ref), [`fldmod1`](@ref).
+With integer arguments and positive `y`, this is equal to `mod(x, 1:y)`, and hence natural
+for 1-based indexing. By comparison, `mod(x, y) == mod(x, 0:y-1)` is natural for computations with
+offsets or strides.
+
+See also [`mod`](@ref), [`fld1`](@ref), [`fldmod1`](@ref).
 
 # Examples
 ```jldoctest
 julia> mod1(4, 2)
 2
 
-julia> mod1(4, 3)
-1
+julia> mod1.(-5:5, 3)'
+1×11 adjoint(::Vector{Int64}) with eltype Int64:
+ 1  2  3  1  2  3  1  2  3  1  2
+
+julia> mod1.([-0.1, 0, 0.1, 1, 2, 2.9, 3, 3.1]', 3)
+1×8 Matrix{Float64}:
+ 2.9  3.0  0.1  1.0  2.0  2.9  3.0  0.1
 ```
 """
 mod1(x::T, y::T) where {T<:Real} = (m = mod(x, y); ifelse(m == 0, y, m))
@@ -764,7 +826,7 @@ mod1(x::T, y::T) where {T<:Real} = (m = mod(x, y); ifelse(m == 0, y, m))
 
 Flooring division, returning a value consistent with `mod1(x,y)`
 
-See also: [`mod1`](@ref), [`fldmod1`](@ref).
+See also [`mod1`](@ref), [`fldmod1`](@ref).
 
 # Examples
 ```jldoctest
@@ -791,7 +853,7 @@ end
 
 Return `(fld1(x,y), mod1(x,y))`.
 
-See also: [`fld1`](@ref), [`mod1`](@ref).
+See also [`fld1`](@ref), [`mod1`](@ref).
 """
 fldmod1(x, y) = (fld1(x, y), mod1(x, y))
 
@@ -826,21 +888,53 @@ widen(x::Type{T}) where {T} = throw(MethodError(widen, (T,)))
     |>(x, f)
 
 Applies a function to the preceding argument. This allows for easy function chaining.
+When used with anonymous functions, parentheses are typically required around the definition to get the intended chain.
 
 # Examples
 ```jldoctest
-julia> [1:5;] |> x->x.^2 |> sum |> inv
+julia> [1:5;] .|> (x -> x^2) |> sum |> inv
 0.01818181818181818
 ```
 """
 |>(x, f) = f(x)
 
+"""
+    f = Returns(value)
+
+Create a callable `f` such that `f(args...; kw...) === value` holds.
+
+# Examples
+
+```jldoctest
+julia> f = Returns(42);
+
+julia> f(1)
+42
+
+julia> f("hello", x=32)
+42
+
+julia> f.value
+42
+```
+
+!!! compat "Julia 1.7"
+    `Returns` requires at least Julia 1.7.
+"""
+struct Returns{V} <: Function
+    value::V
+    Returns{V}(value) where {V} = new{V}(value)
+    Returns(value) = new{Core.Typeof(value)}(value)
+end
+
+(obj::Returns)(@nospecialize(args...); @nospecialize(kw...)) = obj.value
+
 # function composition
 
 """
     f ∘ g
 
-Compose functions: i.e. `(f ∘ g)(args...)` means `f(g(args...))`. The `∘` symbol can be
+Compose functions: i.e. `(f ∘ g)(args...; kwargs...)` means `f(g(args...; kwargs...))`. The `∘` symbol can be
 entered in the Julia REPL (and most editors, appropriately configured) by typing `\\circ<tab>`.
 
 Function composition also works in prefix form: `∘(f, g)` is the same as `f ∘ g`.
@@ -851,7 +945,10 @@ and splatting `∘(fs...)` for composing an iterable collection of functions.
     Multiple function composition requires at least Julia 1.4.
 
 !!! compat "Julia 1.5"
-    Composition of one function ∘(f)  requires at least Julia 1.5.
+    Composition of one function ∘(f) requires at least Julia 1.5.
+
+!!! compat "Julia 1.7"
+    Using keyword arguments requires at least Julia 1.7.
 
 # Examples
 ```jldoctest
@@ -871,33 +968,80 @@ julia> fs = [
 julia> ∘(fs...)(3)
 3.0
 ```
+See also [`ComposedFunction`](@ref), [`!f::Function`](@ref).
 """
 function ∘ end
 
-struct ComposedFunction{F,G} <: Function
-    f::F
-    g::G
-    ComposedFunction{F, G}(f, g) where {F, G} = new{F, G}(f, g)
-    ComposedFunction(f, g) = new{Core.Typeof(f),Core.Typeof(g)}(f, g)
+"""
+    ComposedFunction{Outer,Inner} <: Function
+
+Represents the composition of two callable objects `outer::Outer` and `inner::Inner`. That is
+```julia
+ComposedFunction(outer, inner)(args...; kw...) === outer(inner(args...; kw...))
+```
+The preferred way to construct instance of `ComposedFunction` is to use the composition operator [`∘`](@ref):
+```jldoctest
+julia> sin ∘ cos === ComposedFunction(sin, cos)
+true
+
+julia> typeof(sin∘cos)
+ComposedFunction{typeof(sin), typeof(cos)}
+```
+The composed pieces are stored in the fields of `ComposedFunction` and can be retrieved as follows:
+```jldoctest
+julia> composition = sin ∘ cos
+sin ∘ cos
+
+julia> composition.outer === sin
+true
+
+julia> composition.inner === cos
+true
+```
+!!! compat "Julia 1.6"
+    ComposedFunction requires at least Julia 1.6. In earlier versions `∘` returns an anonymous function instead.
+
+See also [`∘`](@ref).
+"""
+struct ComposedFunction{O,I} <: Function
+    outer::O
+    inner::I
+    ComposedFunction{O, I}(outer, inner) where {O, I} = new{O, I}(outer, inner)
+    ComposedFunction(outer, inner) = new{Core.Typeof(outer),Core.Typeof(inner)}(outer, inner)
 end
 
-(c::ComposedFunction)(x...) = c.f(c.g(x...))
+(c::ComposedFunction)(x...; kw...) = c.outer(c.inner(x...; kw...))
 
 ∘(f) = f
 ∘(f, g) = ComposedFunction(f, g)
 ∘(f, g, h...) = ∘(f ∘ g, h...)
 
 function show(io::IO, c::ComposedFunction)
-    show(io, c.f)
+    c.outer isa ComposedFunction ? show(io, c.outer) : _showcomposed(io, c.outer)
     print(io, " ∘ ")
-    show(io, c.g)
+    _showcomposed(io, c.inner)
+end
+
+#shows !f instead of (!) ∘ f when ! is the outermost function
+function show(io::IO, c::ComposedFunction{typeof(!)})
+    print(io, '!')
+    _showcomposed(io, c.inner)
 end
 
+_showcomposed(io::IO, x) = show(io, x)
+#display operators like + and - inside parens
+_showcomposed(io::IO, f::Function) = isoperator(Symbol(f)) ? (print(io, '('); show(io, f); print(io, ')')) : show(io, f)
+#nesting for chained composition
+_showcomposed(io::IO, f::ComposedFunction) = (print(io, '('); show(io, f); print(io, ')'))
+#no nesting when ! is the outer function in a composition chain
+_showcomposed(io::IO, f::ComposedFunction{typeof(!)}) = show(io, f)
+
 """
     !f::Function
 
-Predicate function negation: when the argument of `!` is a function, it returns a
-function which computes the boolean negation of `f`.
+Predicate function negation: when the argument of `!` is a function, it returns a composed function which computes the boolean negation of `f`.
+
+See also [`∘`](@ref).
 
 # Examples
 ```jldoctest
@@ -910,8 +1054,12 @@ julia> filter(isletter, str)
 julia> filter(!isletter, str)
 "∀  > 0, ∃  > 0: |-| <  ⇒ |()-()| < "
 ```
+
+!!! compat "Julia 1.9"
+    Starting with Julia 1.9, `!f` returns a [`ComposedFunction`](@ref) instead of an anonymous function.
 """
-!(f::Function) = (x...)->!f(x...)
+!(f::Function) = (!) ∘ f
+!(f::ComposedFunction{typeof(!)}) = f.inner #allows !!f === f
 
 """
     Fix1(f, x)
@@ -919,6 +1067,8 @@ julia> filter(!isletter, str)
 A type representing a partially-applied version of the two-argument function
 `f`, with the first argument fixed to the value "x". In other words,
 `Fix1(f, x)` behaves similarly to `y->f(x, y)`.
+
+See also [`Fix2`](@ref Base.Fix2).
 """
 struct Fix1{F,T} <: Function
     f::F
@@ -1057,13 +1207,15 @@ julia> map(Base.splat(+), zip(1:3,4:6))
 """
 splat(f) = args->f(args...)
 
-## in & contains
+## in and related operators
 
 """
-    in(x)
+    in(collection)
+    ∈(collection)
 
-Create a function that checks whether its argument is [`in`](@ref) `x`, i.e.
-a function equivalent to `y -> y in x`.
+Create a function that checks whether its argument is [`in`](@ref) `collection`, i.e.
+a function equivalent to `y -> y in collection`. See also [`insorted`](@ref) for use
+with sorted collections.
 
 The returned function is of type `Base.Fix2{typeof(in)}`, which can be
 used to implement specialized methods.
@@ -1084,14 +1236,34 @@ function in(x, itr)
 end
 
 const ∈ = in
-∋(itr, x) = ∈(x, itr)
 ∉(x, itr) = !∈(x, itr)
+∉(itr) = Fix2(∉, itr)
+
+"""
+    ∋(collection, item) -> Bool
+
+Like [`in`](@ref), but with arguments in reverse order.
+Avoid adding methods to this function; define `in` instead.
+"""
+∋(itr, x) = in(x, itr)
+
+"""
+    ∋(item)
+
+Create a function that checks whether its argument contains the given `item`, i.e.
+a function equivalent to `y -> item in y`.
+
+!!! compat "Julia 1.6"
+    This method requires Julia 1.6 or later.
+"""
+∋(x) = Fix2(∋, x)
+
 ∌(itr, x) = !∋(itr, x)
+∌(x) = Fix2(∌, x)
 
 """
     in(item, collection) -> Bool
     ∈(item, collection) -> Bool
-    ∋(collection, item) -> Bool
 
 Determine whether an item is in the given collection, in the sense that it is
 [`==`](@ref) to one of the values generated by iterating over the collection.
@@ -1157,8 +1329,10 @@ julia> [1, 2] .∈ ([2, 3],)
  0
  1
 ```
+
+See also: [`insorted`](@ref), [`contains`](@ref), [`occursin`](@ref), [`issubset`](@ref).
 """
-in, ∋
+in
 
 """
     ∉(item, collection) -> Bool
diff --git a/base/options.jl b/base/options.jl
index a23dd62f78b438..63f73982b2e8ec 100644
--- a/base/options.jl
+++ b/base/options.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# NOTE: This type needs to be kept in sync with jl_options in src/julia.h
+# NOTE: This type needs to be kept in sync with jl_options in src/jloptions.h
 struct JLOptions
     quiet::Int8
     banner::Int8
@@ -9,7 +9,9 @@ struct JLOptions
     commands::Ptr{Ptr{UInt8}} # (e)eval, (E)print, (L)load
     image_file::Ptr{UInt8}
     cpu_target::Ptr{UInt8}
-    nthreads::Int32
+    nthreadpools::Int16
+    nthreads::Int16
+    nthreads_per_pool::Ptr{Int16}
     nprocs::Int32
     machine_file::Ptr{UInt8}
     project::Ptr{UInt8}
@@ -20,7 +22,9 @@ struct JLOptions
     compile_enabled::Int8
     code_coverage::Int8
     malloc_log::Int8
+    tracked_path::Ptr{UInt8}
     opt_level::Int8
+    opt_level_min::Int8
     debug_level::Int8
     check_bounds::Int8
     depwarn::Int8
@@ -45,6 +49,9 @@ struct JLOptions
     image_file_specified::Int8
     warn_scope::Int8
     image_codegen::Int8
+    rr_detach::Int8
+    strip_metadata::Int8
+    strip_ir::Int8
 end
 
 # This runs early in the sysimage != is not defined yet
@@ -84,3 +91,7 @@ function unsafe_load_commands(v::Ptr{Ptr{UInt8}})
     end
     return cmds
 end
+
+function is_file_tracked(file::Symbol)
+    return ccall(:jl_is_file_tracked, Cint, (Any,), file) == 1
+end
diff --git a/base/ordering.jl b/base/ordering.jl
index dc2f2c8595be23..e49102159c9620 100644
--- a/base/ordering.jl
+++ b/base/ordering.jl
@@ -6,7 +6,7 @@ module Order
 import ..@__MODULE__, ..parentmodule
 const Base = parentmodule(@__MODULE__)
 import .Base:
-    AbstractVector, @propagate_inbounds, isless, identity, getindex,
+    AbstractVector, @propagate_inbounds, isless, identity, getindex, reverse,
     +, -, !, &, <, |
 
 ## notions of element ordering ##
@@ -18,9 +18,26 @@ export # not exported by Base
     DirectOrdering,
     lt, ord, ordtype
 
+"""
+    Base.Order.Ordering
+
+Abstract type which represents a total order on some set of elements.
+
+Use [`Base.Order.lt`](@ref) to compare two elements according to the ordering.
+"""
 abstract type Ordering end
 
 struct ForwardOrdering <: Ordering end
+
+"""
+    ReverseOrdering(fwd::Ordering=Forward)
+
+A wrapper which reverses an ordering.
+
+For a given `Ordering` `o`, the following holds for all  `a`, `b`:
+
+    lt(ReverseOrdering(o), a, b) == lt(o, b, a)
+"""
 struct ReverseOrdering{Fwd<:Ordering} <: Ordering
     fwd::Fwd
 end
@@ -29,11 +46,36 @@ ReverseOrdering(rev::ReverseOrdering) = rev.fwd
 ReverseOrdering(fwd::Fwd) where {Fwd} = ReverseOrdering{Fwd}(fwd)
 ReverseOrdering() = ReverseOrdering(ForwardOrdering())
 
+"""
+    reverse(o::Base.Ordering)
+
+reverses ordering specified by `o`.
+
+"""
+reverse(o::Ordering) = ReverseOrdering(o)
+
 const DirectOrdering = Union{ForwardOrdering,ReverseOrdering{ForwardOrdering}}
 
+"""
+    Base.Order.Forward
+
+Default ordering according to [`isless`](@ref).
+"""
 const Forward = ForwardOrdering()
+
+"""
+    Base.Order.Reverse
+
+Reverse ordering according to [`isless`](@ref).
+"""
 const Reverse = ReverseOrdering()
 
+"""
+    By(by, order::Ordering=Forward)
+
+`Ordering` which applies `order` to elements after they have been transformed
+by the function `by`.
+"""
 struct By{T, O} <: Ordering
     by::T
     order::O
@@ -42,10 +84,23 @@ end
 # backwards compatibility with VERSION < v"1.5-"
 By(by) = By(by, Forward)
 
+"""
+    Lt(lt)
+
+`Ordering` which calls `lt(a, b)` to compare elements. `lt` should
+obey the same rules as implementations of [`isless`](@ref).
+"""
 struct Lt{T} <: Ordering
     lt::T
 end
 
+"""
+    Perm(order::Ordering, data::AbstractVector)
+
+`Ordering` on the indices of `data` where `i` is less than `j` if `data[i]` is
+less than `data[j]` according to `order`. In the case that `data[i]` and
+`data[j]` are equal, `i` and `j` are compared by numeric value.
+"""
 struct Perm{O<:Ordering,V<:AbstractVector} <: Ordering
     order::O
     data::V
@@ -54,6 +109,11 @@ end
 ReverseOrdering(by::By) = By(by.by, ReverseOrdering(by.order))
 ReverseOrdering(perm::Perm) = Perm(ReverseOrdering(perm.order), perm.data)
 
+"""
+    lt(o::Ordering, a, b)
+
+Test whether `a` is less than `b` according to the ordering `o`.
+"""
 lt(o::ForwardOrdering,       a, b) = isless(a,b)
 lt(o::ReverseOrdering,       a, b) = lt(o.fwd,b,a)
 lt(o::By,                    a, b) = lt(o.order,o.by(a),o.by(b))
@@ -78,6 +138,22 @@ function _ord(lt, by, order::Ordering)
     end
 end
 
+"""
+    ord(lt, by, rev::Union{Bool, Nothing}, order::Ordering=Forward)
+
+Construct an [`Ordering`](@ref) object from the same arguments used by
+[`sort!`](@ref).
+Elements are first transformed by the function `by` (which may be
+[`identity`](@ref)) and are then compared according to either the function `lt`
+or an existing ordering `order`. `lt` should be [`isless`](@ref) or a function
+which obeys similar rules. Finally, the resulting order is reversed if
+`rev=true`.
+
+Passing an `lt` other than `isless` along with an `order` other than
+[`Base.Order.Forward`](@ref) or [`Base.Order.Reverse`](@ref) is not permitted,
+otherwise all options are independent and can be used together in all possible
+combinations.
+"""
 ord(lt, by, rev::Nothing, order::Ordering=Forward) = _ord(lt, by, order)
 
 function ord(lt, by, rev::Bool, order::Ordering=Forward)
diff --git a/base/pair.jl b/base/pair.jl
index 30fd91892ce4b5..b5dffbb4e7e866 100644
--- a/base/pair.jl
+++ b/base/pair.jl
@@ -1,18 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-struct Pair{A, B}
-    first::A
-    second::B
-    function Pair{A, B}(@nospecialize(a), @nospecialize(b)) where {A, B}
-        @_inline_meta
-        # if we didn't inline this, it's probably because the callsite was actually dynamic
-        # to avoid potentially compiling many copies of this, we mark the arguments with `@nospecialize`
-        # but also mark the whole function with `@inline` to ensure we will inline it whenever possible
-        # (even if `convert(::Type{A}, a::A)` for some reason was expensive)
-        return new(a, b)
-    end
-end
-Pair(a, b) = Pair{typeof(a), typeof(b)}(a, b)
 const => = Pair
 
 """
@@ -23,7 +10,7 @@ Construct a `Pair` object with type `Pair{typeof(x), typeof(y)}`. The elements
 are stored in the fields `first` and `second`. They can also be accessed via
 iteration (but a `Pair` is treated as a single "scalar" for broadcasting operations).
 
-See also: [`Dict`](@ref)
+See also [`Dict`](@ref).
 
 # Examples
 ```jldoctest
diff --git a/base/parse.jl b/base/parse.jl
index 1097e8a19b8040..1c911c96e1479c 100644
--- a/base/parse.jl
+++ b/base/parse.jl
@@ -194,10 +194,10 @@ function tryparse_internal(::Type{Bool}, sbuff::Union{String,SubString{String}},
     orig_end   = endpos
 
     # Ignore leading and trailing whitespace
-    while isspace(sbuff[startpos]) && startpos <= endpos
+    while startpos <= endpos && isspace(sbuff[startpos])
         startpos = nextind(sbuff, startpos)
     end
-    while isspace(sbuff[endpos]) && endpos >= startpos
+    while endpos >= startpos && isspace(sbuff[endpos])
         endpos = prevind(sbuff, endpos)
     end
 
diff --git a/base/partr.jl b/base/partr.jl
new file mode 100644
index 00000000000000..a4cfcb60fe5201
--- /dev/null
+++ b/base/partr.jl
@@ -0,0 +1,194 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Partr
+
+using ..Threads: SpinLock, nthreads, threadid
+
+# a task minheap
+mutable struct taskheap
+    const lock::SpinLock
+    const tasks::Vector{Task}
+    @atomic ntasks::Int32
+    @atomic priority::UInt16
+    taskheap() = new(SpinLock(), Vector{Task}(undef, 256), zero(Int32), typemax(UInt16))
+end
+
+
+# multiqueue minheap state
+const heap_d = UInt32(8)
+const heaps = [Vector{taskheap}(undef, 0), Vector{taskheap}(undef, 0)]
+const heaps_lock = [SpinLock(), SpinLock()]
+const cong_unbias = [typemax(UInt32), typemax(UInt32)]
+
+
+cong(max::UInt32, unbias::UInt32) =
+    ccall(:jl_rand_ptls, UInt32, (UInt32, UInt32), max, unbias) + UInt32(1)
+
+function unbias_cong(max::UInt32)
+    return typemax(UInt32) - ((typemax(UInt32) % max) + UInt32(1))
+end
+
+
+function multiq_sift_up(heap::taskheap, idx::Int32)
+    while idx > Int32(1)
+        parent = (idx - Int32(2)) ÷ heap_d + Int32(1)
+        if heap.tasks[idx].priority < heap.tasks[parent].priority
+            t = heap.tasks[parent]
+            heap.tasks[parent] = heap.tasks[idx]
+            heap.tasks[idx] = t
+            idx = parent
+        else
+            break
+        end
+    end
+end
+
+
+function multiq_sift_down(heap::taskheap, idx::Int32)
+    if idx <= heap.ntasks
+        for child = (heap_d * idx - heap_d + 2):(heap_d * idx + 1)
+            child = Int(child)
+            child > length(heap.tasks) && break
+            if isassigned(heap.tasks, child) &&
+                    heap.tasks[child].priority < heap.tasks[idx].priority
+                t = heap.tasks[idx]
+                heap.tasks[idx] = heap.tasks[child]
+                heap.tasks[child] = t
+                multiq_sift_down(heap, Int32(child))
+            end
+        end
+    end
+end
+
+
+function multiq_size(tpid::Int8)
+    nt = UInt32(Threads._nthreads_in_pool(tpid))
+    tp = tpid + 1
+    tpheaps = heaps[tp]
+    heap_c = UInt32(2)
+    heap_p = UInt32(length(tpheaps))
+
+    if heap_c * nt <= heap_p
+        return heap_p
+    end
+
+    @lock heaps_lock[tp] begin
+        heap_p = UInt32(length(tpheaps))
+        nt = UInt32(Threads._nthreads_in_pool(tpid))
+        if heap_c * nt <= heap_p
+            return heap_p
+        end
+
+        heap_p += heap_c * nt
+        newheaps = Vector{taskheap}(undef, heap_p)
+        copyto!(newheaps, tpheaps)
+        for i = (1 + length(tpheaps)):heap_p
+            newheaps[i] = taskheap()
+        end
+        heaps[tp] = newheaps
+        cong_unbias[tp] = unbias_cong(heap_p)
+    end
+
+    return heap_p
+end
+
+
+function multiq_insert(task::Task, priority::UInt16)
+    tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), task)
+    heap_p = multiq_size(tpid)
+    tp = tpid + 1
+
+    task.priority = priority
+
+    rn = cong(heap_p, cong_unbias[tp])
+    tpheaps = heaps[tp]
+    while !trylock(tpheaps[rn].lock)
+        rn = cong(heap_p, cong_unbias[tp])
+    end
+
+    heap = tpheaps[rn]
+    if heap.ntasks >= length(heap.tasks)
+        resize!(heap.tasks, length(heap.tasks) * 2)
+    end
+
+    ntasks = heap.ntasks + Int32(1)
+    @atomic :monotonic heap.ntasks = ntasks
+    heap.tasks[ntasks] = task
+    multiq_sift_up(heap, ntasks)
+    priority = heap.priority
+    if task.priority < priority
+        @atomic :monotonic heap.priority = task.priority
+    end
+    unlock(heap.lock)
+
+    return true
+end
+
+
+function multiq_deletemin()
+    local rn1, rn2
+    local prio1, prio2
+
+    tid = Threads.threadid()
+    tp = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1) + 1
+    tpheaps = heaps[tp]
+
+    @label retry
+    GC.safepoint()
+    heap_p = UInt32(length(tpheaps))
+    for i = UInt32(0):heap_p
+        if i == heap_p
+            return nothing
+        end
+        rn1 = cong(heap_p, cong_unbias[tp])
+        rn2 = cong(heap_p, cong_unbias[tp])
+        prio1 = tpheaps[rn1].priority
+        prio2 = tpheaps[rn2].priority
+        if prio1 > prio2
+            prio1 = prio2
+            rn1 = rn2
+        elseif prio1 == prio2 && prio1 == typemax(UInt16)
+            continue
+        end
+        if trylock(tpheaps[rn1].lock)
+            if prio1 == tpheaps[rn1].priority
+                break
+            end
+            unlock(tpheaps[rn1].lock)
+        end
+    end
+
+    heap = tpheaps[rn1]
+    task = heap.tasks[1]
+    if ccall(:jl_set_task_tid, Cint, (Any, Cint), task, tid-1) == 0
+        unlock(heap.lock)
+        @goto retry
+    end
+    ntasks = heap.ntasks
+    @atomic :monotonic heap.ntasks = ntasks - Int32(1)
+    heap.tasks[1] = heap.tasks[ntasks]
+    Base._unsetindex!(heap.tasks, Int(ntasks))
+    prio1 = typemax(UInt16)
+    if ntasks > 1
+        multiq_sift_down(heap, Int32(1))
+        prio1 = heap.tasks[1].priority
+    end
+    @atomic :monotonic heap.priority = prio1
+    unlock(heap.lock)
+
+    return task
+end
+
+
+function multiq_check_empty()
+    for j = UInt32(1):length(heaps)
+        for i = UInt32(1):length(heaps[j])
+            if heaps[j][i].ntasks != 0
+                return false
+            end
+        end
+    end
+    return true
+end
+
+end
diff --git a/base/path.jl b/base/path.jl
index 231f772923eee1..454fe5bd65d32f 100644
--- a/base/path.jl
+++ b/base/path.jl
@@ -36,7 +36,7 @@ elseif Sys.iswindows()
 
     function splitdrive(path::String)
         m = match(r"^([^\\]+:|\\\\[^\\]+\\[^\\]+|\\\\\?\\UNC\\[^\\]+\\[^\\]+|\\\\\?\\[^\\]+:|)(.*)$"s, path)
-        String(m.captures[1]), String(m.captures[2])
+        String(something(m.captures[1])), String(something(m.captures[2]))
     end
 else
     error("path primitives for this OS need to be defined")
@@ -72,7 +72,7 @@ function homedir()
         elseif rc == Base.UV_ENOBUFS
             resize!(buf, sz[] - 1) # space for null-terminator implied by StringVector
         else
-            uv_error(:homedir, rc)
+            uv_error("homedir()", rc)
         end
     end
 end
@@ -137,8 +137,11 @@ _splitdir_nodrive(path::String) = _splitdir_nodrive("", path)
 function _splitdir_nodrive(a::String, b::String)
     m = match(path_dir_splitter,b)
     m === nothing && return (a,b)
-    a = string(a, isempty(m.captures[1]) ? m.captures[2][1] : m.captures[1])
-    a, String(m.captures[3])
+    cs = m.captures
+    getcapture(cs, i) = cs[i]::AbstractString
+    c1, c2, c3 = getcapture(cs, 1), getcapture(cs, 2), getcapture(cs, 3)
+    a = string(a, isempty(c1) ? c2[1] : c1)
+    a, String(c3)
 end
 
 """
@@ -156,7 +159,7 @@ julia> dirname("/home/myuser/")
 "/home/myuser"
 ```
 
-See also: [`basename`](@ref)
+See also [`basename`](@ref).
 """
  dirname(path::AbstractString) = splitdir(path)[1]
 
@@ -165,21 +168,28 @@ See also: [`basename`](@ref)
 
 Get the file name part of a path.
 
+!!! note
+    This function differs slightly from the Unix `basename` program, where trailing slashes are ignored,
+    i.e. `\$ basename /foo/bar/` returns `bar`, whereas `basename` in Julia returns an empty string `""`.
+
 # Examples
 ```jldoctest
 julia> basename("/home/myuser/example.jl")
 "example.jl"
+
+julia> basename("/home/myuser/")
+""
 ```
 
-See also: [`dirname`](@ref)
+See also [`dirname`](@ref).
 """
 basename(path::AbstractString) = splitdir(path)[2]
 
 """
     splitext(path::AbstractString) -> (AbstractString, AbstractString)
 
-If the last component of a path contains a dot, split the path into everything before the
-dot and everything including and after the dot. Otherwise, return a tuple of the argument
+If the last component of a path contains one or more dots, split the path into everything before the
+last dot and everything including and after the dot. Otherwise, return a tuple of the argument
 unmodified and the empty string. "splitext" is short for "split extension".
 
 # Examples
@@ -187,15 +197,18 @@ unmodified and the empty string. "splitext" is short for "split extension".
 julia> splitext("/home/myuser/example.jl")
 ("/home/myuser/example", ".jl")
 
-julia> splitext("/home/myuser/example")
-("/home/myuser/example", "")
+julia> splitext("/home/myuser/example.tar.gz")
+("/home/myuser/example.tar", ".gz")
+
+julia> splitext("/home/my.user/example")
+("/home/my.user/example", "")
 ```
 """
 function splitext(path::String)
     a, b = splitdrive(path)
     m = match(path_ext_splitter, b)
     m === nothing && return (path,"")
-    a*m.captures[1], String(m.captures[2])
+    (a*something(m.captures[1])), String(something(m.captures[2]))
 end
 
 # NOTE: deprecated in 1.4
@@ -241,16 +254,19 @@ function splitpath(p::String)
     return out
 end
 
-joinpath(path::AbstractString)::String = path
-
 if Sys.iswindows()
 
-function joinpath(path::AbstractString, paths::AbstractString...)::String
-    result_drive, result_path = splitdrive(path)
+function joinpath(paths::Union{Tuple, AbstractVector})::String
+    assertstring(x) = x isa AbstractString || throw(ArgumentError("path component is not a string: $(repr(x))"))
+
+    isempty(paths) && throw(ArgumentError("collection of path components must be non-empty"))
+    assertstring(paths[1])
+    result_drive, result_path = splitdrive(paths[1])
 
-    local p_drive, p_path
-    for p in paths
-        p_drive, p_path = splitdrive(p)
+    p_path = ""
+    for i in firstindex(paths)+1:lastindex(paths)
+        assertstring(paths[i])
+        p_drive, p_path = splitdrive(paths[i])
 
         if startswith(p_path, ('\\', '/'))
             # second path is absolute
@@ -286,8 +302,15 @@ end
 
 else
 
-function joinpath(path::AbstractString, paths::AbstractString...)::String
-    for p in paths
+function joinpath(paths::Union{Tuple, AbstractVector})::String
+    assertstring(x) = x isa AbstractString || throw(ArgumentError("path component is not a string: $(repr(x))"))
+
+    isempty(paths) && throw(ArgumentError("collection of path components must be non-empty"))
+    assertstring(paths[1])
+    path = paths[1]
+    for i in firstindex(paths)+1:lastindex(paths)
+        p = paths[i]
+        assertstring(p)
         if isabspath(p)
             path = p
         elseif isempty(path) || path[end] == '/'
@@ -301,8 +324,12 @@ end
 
 end # os-test
 
+joinpath(paths::AbstractString...)::String = joinpath(paths)
+
 """
     joinpath(parts::AbstractString...) -> String
+    joinpath(parts::Vector{AbstractString}) -> String
+    joinpath(parts::Tuple{AbstractString}) -> String
 
 Join path components into a full path. If some argument is an absolute path or
 (on Windows) has a drive specification that doesn't match the drive computed for
@@ -318,26 +345,35 @@ letter casing, hence `joinpath("C:\\A","c:b") = "C:\\A\\b"`.
 julia> joinpath("/home/myuser", "example.jl")
 "/home/myuser/example.jl"
 ```
+
+```jldoctest
+julia> joinpath(["/home/myuser", "example.jl"])
+"/home/myuser/example.jl"
+```
 """
 joinpath
 
 """
     normpath(path::AbstractString) -> String
 
-Normalize a path, removing "." and ".." entries.
+Normalize a path, removing "." and ".." entries and changing "/" to the canonical path separator
+for the system.
 
 # Examples
 ```jldoctest
 julia> normpath("/home/myuser/../example.jl")
 "/home/example.jl"
+
+julia> normpath("Documents/Julia") == joinpath("Documents", "Julia")
+true
 ```
 """
 function normpath(path::String)
     isabs = isabspath(path)
     isdir = isdirpath(path)
     drive, path = splitdrive(path)
-    parts = split(path, path_separator_re)
-    filter!(x->!isempty(x) && x!=".", parts)
+    parts = split(path, path_separator_re; keepempty=false)
+    filter!(!=("."), parts)
     while true
         clean = true
         for j = 1:length(parts)-1
@@ -380,7 +416,19 @@ normpath(a::AbstractString, b::AbstractString...) = normpath(joinpath(a,b...))
 Convert a path to an absolute path by adding the current directory if necessary.
 Also normalizes the path as in [`normpath`](@ref).
 """
-abspath(a::String) = normpath(isabspath(a) ? a : joinpath(pwd(),a))
+function abspath(a::String)::String
+    if !isabspath(a)
+        cwd = pwd()
+        a_drive, a_nodrive = splitdrive(a)
+        if a_drive != "" && lowercase(splitdrive(cwd)[1]) != lowercase(a_drive)
+            cwd = a_drive * path_separator
+            a = joinpath(cwd, a_nodrive)
+        else
+            a = joinpath(cwd, a)
+        end
+    end
+    return normpath(a)
+end
 
 """
     abspath(path::AbstractString, paths::AbstractString...) -> String
@@ -425,11 +473,11 @@ function realpath(path::AbstractString)
                     (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Ptr{Cvoid}),
                     C_NULL, req, path, C_NULL)
         if ret < 0
-            ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
-            uv_error("realpath", ret)
+            uv_fs_req_cleanup(req)
+            uv_error("realpath($(repr(path)))", ret)
         end
         path = unsafe_string(ccall(:jl_uv_fs_t_ptr, Cstring, (Ptr{Cvoid},), req))
-        ccall(:uv_fs_req_cleanup, Cvoid, (Ptr{Cvoid},), req)
+        uv_fs_req_cleanup(req)
         return path
     finally
         Libc.free(req)
@@ -485,6 +533,9 @@ contractuser(path::AbstractString)
 Return a relative filepath to `path` either from the current directory or from an optional
 start directory. This is a path computation: the filesystem is not accessed to confirm the
 existence or nature of `path` or `startpath`.
+
+On Windows, case sensitivity is applied to every part of the path except drive letters. If
+`path` and `startpath` refer to different drives, the absolute path of `path` is returned.
 """
 function relpath(path::String, startpath::String = ".")
     isempty(path) && throw(ArgumentError("`path` must be specified"))
@@ -492,8 +543,17 @@ function relpath(path::String, startpath::String = ".")
     curdir = "."
     pardir = ".."
     path == startpath && return curdir
-    path_arr  = split(abspath(path),      path_separator_re)
-    start_arr = split(abspath(startpath), path_separator_re)
+    if Sys.iswindows()
+        path_drive, path_without_drive = splitdrive(path)
+        startpath_drive, startpath_without_drive = splitdrive(startpath)
+        isempty(startpath_drive) && (startpath_drive = path_drive) # by default assume same as path drive
+        uppercase(path_drive) == uppercase(startpath_drive) || return abspath(path) # if drives differ return first path
+        path_arr  = split(abspath(path_drive * path_without_drive),      path_separator_re)
+        start_arr = split(abspath(path_drive * startpath_without_drive), path_separator_re)
+    else
+        path_arr  = split(abspath(path),      path_separator_re)
+        start_arr = split(abspath(startpath), path_separator_re)
+    end
     i = 0
     while i < min(length(path_arr), length(start_arr))
         i += 1
diff --git a/base/pcre.jl b/base/pcre.jl
index 1508eb90b19931..d689e9be29113a 100644
--- a/base/pcre.jl
+++ b/base/pcre.jl
@@ -24,69 +24,106 @@ function create_match_context()
     return ctx
 end
 
-const THREAD_MATCH_CONTEXTS = Ptr{Cvoid}[C_NULL]
+THREAD_MATCH_CONTEXTS::Vector{Ptr{Cvoid}} = [C_NULL]
 
 PCRE_COMPILE_LOCK = nothing
 
-_tid() = Int(ccall(:jl_threadid, Int16, ())+1)
+_tid() = Int(ccall(:jl_threadid, Int16, ())) + 1
 _nth() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
 
 function get_local_match_context()
     tid = _tid()
-    ctx = @inbounds THREAD_MATCH_CONTEXTS[tid]
+    ctxs = THREAD_MATCH_CONTEXTS
+    if length(ctxs) < tid
+        # slow path to allocate it
+        l = PCRE_COMPILE_LOCK::Threads.SpinLock
+        lock(l)
+        try
+            ctxs = THREAD_MATCH_CONTEXTS
+            if length(ctxs) < tid
+                global THREAD_MATCH_CONTEXTS = ctxs = copyto!(fill(C_NULL, _nth()), ctxs)
+            end
+        finally
+            unlock(l)
+        end
+    end
+    ctx = @inbounds ctxs[tid]
     if ctx == C_NULL
-        @inbounds THREAD_MATCH_CONTEXTS[tid] = ctx = create_match_context()
+        # slow path to allocate it
+        ctx = create_match_context()
+        THREAD_MATCH_CONTEXTS[tid] = ctx
     end
     return ctx
 end
 
-function __init__()
-    resize!(THREAD_MATCH_CONTEXTS, _nth())
-    fill!(THREAD_MATCH_CONTEXTS, C_NULL)
-    global PCRE_COMPILE_LOCK = Threads.SpinLock()
-end
-
 # supported options for different use cases
 
+# arguments to pcre2_compile
 const COMPILE_MASK      =
+      ALT_BSUX          |
+      ALT_CIRCUMFLEX    |
+      ALT_VERBNAMES     |
       ANCHORED          |
+      # AUTO_CALLOUT    |
       CASELESS          |
       DOLLAR_ENDONLY    |
       DOTALL            |
+      # DUPNAMES        |
       ENDANCHORED       |
       EXTENDED          |
+      EXTENDED_MORE     |
       FIRSTLINE         |
+      LITERAL           |
+      MATCH_INVALID_UTF |
+      MATCH_UNSET_BACKREF |
       MULTILINE         |
-      NEWLINE_ANY       |
-      NEWLINE_ANYCRLF   |
-      NEWLINE_CR        |
-      NEWLINE_CRLF      |
-      NEWLINE_LF        |
+      NEVER_BACKSLASH_C |
+      NEVER_UCP         |
+      NEVER_UTF         |
       NO_AUTO_CAPTURE   |
+      NO_AUTO_POSSESS   |
+      NO_DOTSTAR_ANCHOR |
       NO_START_OPTIMIZE |
       NO_UTF_CHECK      |
+      UCP               |
       UNGREEDY          |
-      UTF               |
-      UCP
-
+      USE_OFFSET_LIMIT  |
+      UTF
+
+# arguments to pcre2_set_newline
+const COMPILE_NEWLINE_MASK = (
+      NEWLINE_CR,
+      NEWLINE_LF,
+      NEWLINE_CRLF,
+      NEWLINE_ANY,
+      NEWLINE_ANYCRLF,
+      NEWLINE_NUL)
+
+# arguments to pcre2_set_compile_extra_options
+const COMPILE_EXTRA_MASK            =
+      EXTRA_ALLOW_SURROGATE_ESCAPES |
+      EXTRA_ALT_BSUX                |
+      EXTRA_BAD_ESCAPE_IS_LITERAL   |
+      EXTRA_ESCAPED_CR_IS_LF        |
+      EXTRA_MATCH_LINE              |
+      EXTRA_MATCH_WORD
+
+# arguments to match
 const EXECUTE_MASK      =
-      NEWLINE_ANY       |
-      NEWLINE_ANYCRLF   |
-      NEWLINE_CR        |
-      NEWLINE_CRLF      |
-      NEWLINE_LF        |
+      # ANCHORED        |
+      # COPY_MATCHED_SUBJECT |
+      # ENDANCHORED     |
       NOTBOL            |
       NOTEMPTY          |
       NOTEMPTY_ATSTART  |
       NOTEOL            |
+      # NO_JIT          |
       NO_START_OPTIMIZE |
       NO_UTF_CHECK      |
       PARTIAL_HARD      |
       PARTIAL_SOFT
 
 
-const OPTIONS_MASK = COMPILE_MASK | EXECUTE_MASK
-
 const UNSET = ~Csize_t(0)  # Indicates that an output vector element is unset
 
 function info(regex::Ptr{Cvoid}, what::Integer, ::Type{T}) where T
@@ -201,7 +238,10 @@ function substring_length_bynumber(match_data, number)
     s = RefValue{Csize_t}()
     rc = ccall((:pcre2_substring_length_bynumber_8, PCRE_LIB), Cint,
                (Ptr{Cvoid}, Cint, Ref{Csize_t}), match_data, number, s)
-    rc < 0 && error("PCRE error: $(err_message(rc))")
+    if rc < 0
+        rc == ERROR_UNSET && return 0
+        error("PCRE error: $(err_message(rc))")
+    end
     return Int(s[])
 end
 
diff --git a/base/permuteddimsarray.jl b/base/permuteddimsarray.jl
index d951daa1252738..ea966c44efc38b 100644
--- a/base/permuteddimsarray.jl
+++ b/base/permuteddimsarray.jl
@@ -24,7 +24,7 @@ Given an AbstractArray `A`, create a view `B` such that the
 dimensions appear to be permuted. Similar to `permutedims`, except
 that no copying occurs (`B` shares storage with `A`).
 
-See also: [`permutedims`](@ref).
+See also [`permutedims`](@ref), [`invperm`](@ref).
 
 # Examples
 ```jldoctest
@@ -83,10 +83,10 @@ end
 """
     permutedims(A::AbstractArray, perm)
 
-Permute the dimensions of array `A`. `perm` is a vector specifying a permutation of length
-`ndims(A)`.
+Permute the dimensions of array `A`. `perm` is a vector or a tuple of length `ndims(A)`
+specifying the permutation.
 
-See also: [`PermutedDimsArray`](@ref).
+See also [`permutedims!`](@ref), [`PermutedDimsArray`](@ref), [`transpose`](@ref), [`invperm`](@ref).
 
 # Examples
 ```jldoctest
@@ -100,7 +100,7 @@ julia> A = reshape(Vector(1:8), (2,2,2))
  5  7
  6  8
 
-julia> permutedims(A, [3, 2, 1])
+julia> permutedims(A, (3, 2, 1))
 2×2×2 Array{Int64, 3}:
 [:, :, 1] =
  1  3
@@ -109,6 +109,16 @@ julia> permutedims(A, [3, 2, 1])
 [:, :, 2] =
  2  4
  6  8
+
+julia> B = randn(5, 7, 11, 13);
+
+julia> perm = [4,1,3,2];
+
+julia> size(permutedims(B, perm))
+(13, 5, 11, 7)
+
+julia> size(B)[perm] == ans
+true
 ```
 """
 function permutedims(A::AbstractArray, perm)
@@ -144,7 +154,7 @@ julia> permutedims(X)
  [5 6; 7 8]  [13 14; 15 16]
 
 julia> transpose(X)
-2×2 Transpose{Transpose{Int64, Matrix{Int64}}, Matrix{Matrix{Int64}}}:
+2×2 transpose(::Matrix{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
  [1 3; 2 4]  [9 11; 10 12]
  [5 7; 6 8]  [13 15; 14 16]
 ```
@@ -174,7 +184,7 @@ julia> permutedims(V)
  [1 2; 3 4]  [5 6; 7 8]
 
 julia> transpose(V)
-1×2 Transpose{Transpose{Int64, Matrix{Int64}}, Vector{Matrix{Int64}}}:
+1×2 transpose(::Vector{Matrix{Int64}}) with eltype Transpose{Int64, Matrix{Int64}}:
  [1 3; 2 4]  [5 7; 6 8]
 ```
 """
@@ -253,11 +263,22 @@ end
     P
 end
 
+function Base._mapreduce_dim(f, op, init::Base._InitialValue, A::PermutedDimsArray, dims::Colon)
+    Base._mapreduce_dim(f, op, init, parent(A), dims)
+end
+
+function Base.mapreducedim!(f, op, B::AbstractArray{T,N}, A::PermutedDimsArray{T,N,perm,iperm}) where {T,N,perm,iperm}
+    C = PermutedDimsArray{T,N,iperm,perm,typeof(B)}(B) # make the inverse permutation for the output
+    Base.mapreducedim!(f, op, C, parent(A))
+    B
+end
+
 function Base.showarg(io::IO, A::PermutedDimsArray{T,N,perm}, toplevel) where {T,N,perm}
     print(io, "PermutedDimsArray(")
     Base.showarg(io, parent(A), false)
     print(io, ", ", perm, ')')
     toplevel && print(io, " with eltype ", eltype(A))
+    return nothing
 end
 
 end
diff --git a/base/pkgid.jl b/base/pkgid.jl
index 6d588ffe6647dd..20d9de559b3341 100644
--- a/base/pkgid.jl
+++ b/base/pkgid.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 struct PkgId
     uuid::Union{UUID,Nothing}
     name::String
@@ -40,4 +42,3 @@ function binunpack(s::String)
     name = read(io, String)
     return PkgId(UUID(uuid), name)
 end
-
diff --git a/base/pointer.jl b/base/pointer.jl
index 0813d0a0c97350..60db18f2ca8555 100644
--- a/base/pointer.jl
+++ b/base/pointer.jl
@@ -77,7 +77,10 @@ element type. `dims` is either an integer (for a 1d array) or a tuple of the arr
 calling `free` on the pointer when the array is no longer referenced.
 
 This function is labeled "unsafe" because it will crash if `pointer` is not
-a valid memory address to data of the requested length.
+a valid memory address to data of the requested length. Unlike [`unsafe_load`](@ref)
+and [`unsafe_store!`](@ref), the programmer is responsible also for ensuring that the
+underlying data is not accessed through two arrays of different element type, similar
+to the strict aliasing rule in C.
 """
 function unsafe_wrap(::Union{Type{Array},Type{Array{T}},Type{Array{T,N}}},
                      p::Ptr{T}, dims::NTuple{N,Int}; own::Bool = false) where {T,N}
@@ -99,8 +102,10 @@ Load a value of type `T` from the address of the `i`th element (1-indexed) start
 This is equivalent to the C expression `p[i-1]`.
 
 The `unsafe` prefix on this function indicates that no validation is performed on the
-pointer `p` to ensure that it is valid. Incorrect usage may segfault your program or return
-garbage answers, in the same manner as C.
+pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
+that referenced memory is not freed or garbage collected while invoking this function.
+Incorrect usage may segfault your program or return garbage answers. Unlike C, dereferencing
+memory region allocated as different type may be valid provided that the types are compatible.
 """
 unsafe_load(p::Ptr, i::Integer=1) = pointerref(p, Int(i), 1)
 
@@ -111,8 +116,10 @@ Store a value of type `T` to the address of the `i`th element (1-indexed) starti
 This is equivalent to the C expression `p[i-1] = x`.
 
 The `unsafe` prefix on this function indicates that no validation is performed on the
-pointer `p` to ensure that it is valid. Incorrect usage may corrupt or segfault your
-program, in the same manner as C.
+pointer `p` to ensure that it is valid. Like C, the programmer is responsible for ensuring
+that referenced memory is not freed or garbage collected while invoking this function.
+Incorrect usage may segfault your program. Unlike C, storing memory region allocated as
+different type may be valid provided that that the types are compatible.
 """
 unsafe_store!(p::Ptr{Any}, @nospecialize(x), i::Integer=1) = pointerset(p, x, Int(i), 1)
 unsafe_store!(p::Ptr{T}, x, i::Integer=1) where {T} = pointerset(p, convert(T,x), Int(i), 1)
@@ -125,7 +132,7 @@ Convert a `Ptr` to an object reference. Assumes the pointer refers to a valid he
 Julia object. If this is not the case, undefined behavior results, hence this function is
 considered "unsafe" and should be used with care.
 
-See also: [`pointer_from_objref`](@ref).
+See also [`pointer_from_objref`](@ref).
 """
 unsafe_pointer_to_objref(x::Ptr) = ccall(:jl_value_ptr, Any, (Ptr{Cvoid},), x)
 
@@ -139,11 +146,11 @@ remains referenced for the whole time that the `Ptr` will be used.
 This function may not be called on immutable objects, since they do not have
 stable memory addresses.
 
-See also: [`unsafe_pointer_to_objref`](@ref).
+See also [`unsafe_pointer_to_objref`](@ref).
 """
 function pointer_from_objref(@nospecialize(x))
-    @_inline_meta
-    typeof(x).mutable || error("pointer_from_objref cannot be used on immutable objects")
+    @inline
+    ismutable(x) || error("pointer_from_objref cannot be used on immutable objects")
     ccall(:jl_value_ptr, Ptr{Cvoid}, (Any,), x)
 end
 
diff --git a/base/process.jl b/base/process.jl
index 302387ce5b3a6d..aa378e72b2dce2 100644
--- a/base/process.jl
+++ b/base/process.jl
@@ -38,9 +38,13 @@ pipe_writer(p::ProcessChain) = p.in
 # release ownership of the libuv handle
 function uvfinalize(proc::Process)
     if proc.handle != C_NULL
-        disassociate_julia_struct(proc.handle)
-        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), proc.handle)
-        proc.handle = C_NULL
+        iolock_begin()
+        if proc.handle != C_NULL
+            disassociate_julia_struct(proc.handle)
+            ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), proc.handle)
+            proc.handle = C_NULL
+        end
+        iolock_end()
     end
     nothing
 end
@@ -52,6 +56,7 @@ function uv_return_spawn(p::Ptr{Cvoid}, exit_status::Int64, termsignal::Int32)
     proc = unsafe_pointer_to_objref(data)::Process
     proc.exitcode = exit_status
     proc.termsignal = termsignal
+    disassociate_julia_struct(proc.handle) # ensure that data field is set to C_NULL
     ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), proc.handle)
     proc.handle = C_NULL
     lock(proc.exitnotify)
@@ -65,45 +70,67 @@ end
 
 # called when the libuv handle is destroyed
 function _uv_hook_close(proc::Process)
-    proc.handle = C_NULL
+    Libc.free(@atomicswap :not_atomic proc.handle = C_NULL)
     nothing
 end
 
-const SpawnIOs = Vector{Any} # convenience name for readability
+const SpawnIO  = Union{IO, RawFD, OS_HANDLE}
+const SpawnIOs = Vector{SpawnIO} # convenience name for readability
+
+function as_cpumask(cpus::Vector{UInt16})
+    n = max(Int(maximum(cpus)), Int(ccall(:uv_cpumask_size, Cint, ())))
+    cpumask = zeros(Bool, n)
+    for i in cpus
+        cpumask[i] = true
+    end
+    return cpumask
+end
 
 # handle marshalling of `Cmd` arguments from Julia to C
 @noinline function _spawn_primitive(file, cmd::Cmd, stdio::SpawnIOs)
     loop = eventloop()
-    iohandles = Tuple{Cint, UInt}[ # assuming little-endian layout
-        let h = rawhandle(io)
-            h === C_NULL     ? (0x00, UInt(0)) :
-            h isa OS_HANDLE  ? (0x02, UInt(cconvert(@static(Sys.iswindows() ? Ptr{Cvoid} : Cint), h))) :
-            h isa Ptr{Cvoid} ? (0x04, UInt(h)) :
-            error("invalid spawn handle $h from $io")
+    cpumask = cmd.cpus
+    cpumask === nothing || (cpumask = as_cpumask(cpumask))
+    GC.@preserve stdio begin
+        iohandles = Tuple{Cint, UInt}[ # assuming little-endian layout
+            let h = rawhandle(io)
+                h === C_NULL     ? (0x00, UInt(0)) :
+                h isa OS_HANDLE  ? (0x02, UInt(cconvert(@static(Sys.iswindows() ? Ptr{Cvoid} : Cint), h))) :
+                h isa Ptr{Cvoid} ? (0x04, UInt(h)) :
+                error("invalid spawn handle $h from $io")
+            end
+            for io in stdio]
+        handle = Libc.malloc(_sizeof_uv_process)
+        disassociate_julia_struct(handle)
+        (; exec, flags, env, dir) = cmd
+        iolock_begin()
+        err = ccall(:jl_spawn, Int32,
+                  (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
+                   Ptr{Tuple{Cint, UInt}}, Int,
+                   UInt32, Ptr{Cstring}, Cstring, Ptr{Bool}, Csize_t, Ptr{Cvoid}),
+            file, exec, loop, handle,
+            iohandles, length(iohandles),
+            flags,
+            env === nothing ? C_NULL : env,
+            isempty(dir) ? C_NULL : dir,
+            cpumask === nothing ? C_NULL : cpumask,
+            cpumask === nothing ? 0 : length(cpumask),
+            @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
+        if err == 0
+            pp = Process(cmd, handle)
+            associate_julia_struct(handle, pp)
+        else
+            ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
         end
-        for io in stdio]
-    handle = Libc.malloc(_sizeof_uv_process)
-    disassociate_julia_struct(handle) # ensure that data field is set to C_NULL
-    err = ccall(:jl_spawn, Int32,
-              (Cstring, Ptr{Cstring}, Ptr{Cvoid}, Ptr{Cvoid},
-               Ptr{Tuple{Cint, UInt}}, Int,
-               UInt32, Ptr{Cstring}, Cstring, Ptr{Cvoid}),
-        file, cmd.exec, loop, handle,
-        iohandles, length(iohandles),
-        cmd.flags,
-        cmd.env === nothing ? C_NULL : cmd.env,
-        isempty(cmd.dir) ? C_NULL : cmd.dir,
-        @cfunction(uv_return_spawn, Cvoid, (Ptr{Cvoid}, Int64, Int32)))
+        iolock_end()
+    end
     if err != 0
-        ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), handle) # will call free on handle eventually
         throw(_UVError("could not spawn " * repr(cmd), err))
     end
-    pp = Process(cmd, handle)
-    associate_julia_struct(handle, pp)
     return pp
 end
 
-_spawn(cmds::AbstractCmd) = _spawn(cmds, Any[])
+_spawn(cmds::AbstractCmd) = _spawn(cmds, SpawnIO[])
 
 # optimization: we can spawn `Cmd` directly without allocating the ProcessChain
 function _spawn(cmd::Cmd, stdios::SpawnIOs)
@@ -187,7 +214,7 @@ end
 # open the child end of each element of `stdios`, and initialize the parent end
 function setup_stdios(f, stdios::SpawnIOs)
     nstdio = length(stdios)
-    open_io = Vector{Any}(undef, nstdio)
+    open_io = SpawnIOs(undef, nstdio)
     close_io = falses(nstdio)
     try
         for i in 1:nstdio
@@ -209,10 +236,10 @@ function setup_stdio(stdio::PipeEndpoint, child_readable::Bool)
         rd, wr = link_pipe(!child_readable, child_readable)
         try
             open_pipe!(stdio, child_readable ? wr : rd)
-        catch ex
+        catch
             close_pipe_sync(rd)
             close_pipe_sync(wr)
-            rethrow(ex)
+            rethrow()
         end
         child = child_readable ? rd : wr
         return (child, true)
@@ -251,18 +278,19 @@ function setup_stdio(stdio::FileRedirect, child_readable::Bool)
     return (io, true)
 end
 
-# incrementally move data between an IOBuffer and a system Pipe
+# incrementally move data between an arbitrary IO and a system Pipe,
+# including copying the EOF (shutdown) when finished
 # TODO: probably more efficient (when valid) to use `stdio` directly as the
 #       PipeEndpoint buffer field in some cases
-function setup_stdio(stdio::Union{IOBuffer, BufferStream}, child_readable::Bool)
+function setup_stdio(stdio::IO, child_readable::Bool)
     parent = PipeEndpoint()
     rd, wr = link_pipe(!child_readable, child_readable)
     try
         open_pipe!(parent, child_readable ? wr : rd)
-    catch ex
+    catch
         close_pipe_sync(rd)
         close_pipe_sync(wr)
-        rethrow(ex)
+        rethrow()
     end
     child = child_readable ? rd : wr
     try
@@ -271,24 +299,19 @@ function setup_stdio(stdio::Union{IOBuffer, BufferStream}, child_readable::Bool)
             @async try
                 write(in, out)
             catch ex
-                @warn "Process error" exception=(ex, catch_backtrace())
+                @warn "Process I/O error" exception=(ex, catch_backtrace())
             finally
                 close(parent)
+                child_readable || closewrite(stdio)
             end
         end
-    catch ex
+    catch
         close_pipe_sync(child)
-        rethrow(ex)
+        rethrow()
     end
     return (child, true)
 end
 
-function setup_stdio(io, child_readable::Bool)
-    # if there is no specialization,
-    # assume that rawhandle is defined for it
-    return (io, false)
-end
-
 close_stdio(stdio::OS_HANDLE) = close_pipe_sync(stdio)
 close_stdio(stdio) = close(stdio)
 
@@ -302,19 +325,19 @@ close_stdio(stdio) = close(stdio)
 #   - An Filesystem.File or IOStream object to redirect the output to
 #   - A FileRedirect, containing a string specifying a filename to be opened for the child
 
-spawn_opts_swallow(stdios::StdIOSet) = Any[stdios...]
-spawn_opts_inherit(stdios::StdIOSet) = Any[stdios...]
+spawn_opts_swallow(stdios::StdIOSet) = SpawnIO[stdios...]
+spawn_opts_inherit(stdios::StdIOSet) = SpawnIO[stdios...]
 spawn_opts_swallow(in::Redirectable=devnull, out::Redirectable=devnull, err::Redirectable=devnull) =
-    Any[in, out, err]
+    SpawnIO[in, out, err]
 # pass original descriptors to child processes by default, because we might
 # have already exhausted and closed the libuv object for our standard streams.
 # ref issue #8529
 spawn_opts_inherit(in::Redirectable=RawFD(0), out::Redirectable=RawFD(1), err::Redirectable=RawFD(2)) =
-    Any[in, out, err]
+    SpawnIO[in, out, err]
 
 function eachline(cmd::AbstractCmd; keep::Bool=false)
     out = PipeEndpoint()
-    processes = _spawn(cmd, Any[devnull, out, stderr])
+    processes = _spawn(cmd, SpawnIO[devnull, out, stderr])
     # if the user consumes all the data, also check process exit status for success
     ondone = () -> (success(processes) || pipeline_error(processes); nothing)
     return EachLine(out, keep=keep, ondone=ondone)::EachLine
@@ -362,20 +385,20 @@ function open(cmds::AbstractCmd, stdio::Redirectable=devnull; write::Bool=false,
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in read-write mode"))
         in = PipeEndpoint()
         out = PipeEndpoint()
-        processes = _spawn(cmds, Any[in, out, stderr])
+        processes = _spawn(cmds, SpawnIO[in, out, stderr])
         processes.in = in
         processes.out = out
     elseif read
         out = PipeEndpoint()
-        processes = _spawn(cmds, Any[stdio, out, stderr])
+        processes = _spawn(cmds, SpawnIO[stdio, out, stderr])
         processes.out = out
     elseif write
         in = PipeEndpoint()
-        processes = _spawn(cmds, Any[in, stdio, stderr])
+        processes = _spawn(cmds, SpawnIO[in, stdio, stderr])
         processes.in = in
     else
         stdio === devnull || throw(ArgumentError("no stream can be specified for `stdio` in no-access mode"))
-        processes = _spawn(cmds, Any[devnull, devnull, stderr])
+        processes = _spawn(cmds, SpawnIO[devnull, devnull, stderr])
     end
     return processes
 end
@@ -383,19 +406,33 @@ end
 """
     open(f::Function, command, args...; kwargs...)
 
-Similar to `open(command, args...; kwargs...)`, but calls `f(stream)` on the resulting process
-stream, then closes the input stream and waits for the process to complete.
-Returns the value returned by `f`.
+Similar to `open(command, args...; kwargs...)`, but calls `f(stream)` on the
+resulting process stream, then closes the input stream and waits for the process
+to complete. Return the value returned by `f` on success. Throw an error if the
+process failed, or if the process attempts to print anything to stdout.
 """
 function open(f::Function, cmds::AbstractCmd, args...; kwargs...)
     P = open(cmds, args...; kwargs...)
+    function waitkill(P::Process)
+        close(P)
+        # 0.1 seconds after we hope it dies (from closing stdio),
+        # we kill the process with SIGTERM (15)
+        local t = Timer(0.1) do t
+            process_running(P) && kill(P)
+        end
+        wait(P)
+        close(t)
+    end
     ret = try
         f(P)
     catch
-        kill(P)
+        waitkill(P)
         rethrow()
-    finally
-        close(P.in)
+    end
+    close(P.in)
+    if !eof(P.out)
+        waitkill(P)
+        throw(_UVError("open(do)", UV_EPIPE))
     end
     success(P) || pipeline_error(P)
     return ret
@@ -418,7 +455,7 @@ end
 
 Run `command` and return the resulting output as a `String`.
 """
-read(cmd::AbstractCmd, ::Type{String}) = String(read(cmd))
+read(cmd::AbstractCmd, ::Type{String}) = String(read(cmd))::String
 
 """
     run(command, args...; wait::Bool = true)
@@ -476,7 +513,7 @@ function test_success(proc::Process)
         #TODO: this codepath is not currently tested
         throw(_UVError("could not start process " * repr(proc.cmd), proc.exitcode))
     end
-    return proc.exitcode == 0 && (proc.termsignal == 0 || proc.termsignal == SIGPIPE)
+    return proc.exitcode == 0 && proc.termsignal == 0
 end
 
 function success(x::Process)
@@ -570,10 +607,10 @@ Get the child process ID, if it still exists.
     This function requires at least Julia 1.1.
 """
 function Libc.getpid(p::Process)
-    # TODO: due to threading, this method is no longer synchronized with the user application
+    # TODO: due to threading, this method is only weakly synchronized with the user application
     iolock_begin()
     ppid = Int32(0)
-    if p.handle != C_NULL
+    if p.handle != C_NULL # e.g. process_running
         ppid = ccall(:jl_uv_process_pid, Int32, (Ptr{Cvoid},), p.handle)
     end
     iolock_end()
@@ -637,6 +674,7 @@ show(io::IO, p::Process) = print(io, "Process(", p.cmd, ", ", process_status(p),
 for f in (:length, :firstindex, :lastindex, :keys, :first, :last, :iterate)
     @eval $f(cmd::Cmd) = $f(cmd.exec)
 end
+Iterators.reverse(cmd::Cmd) = Iterators.reverse(cmd.exec)
 eltype(::Type{Cmd}) = eltype(fieldtype(Cmd, :exec))
 for f in (:iterate, :getindex)
     @eval $f(cmd::Cmd, i) = $f(cmd.exec, i)
diff --git a/base/promotion.jl b/base/promotion.jl
index 1b9c8c882324a7..8e05a86b8b7630 100644
--- a/base/promotion.jl
+++ b/base/promotion.jl
@@ -5,15 +5,14 @@
 """
     typejoin(T, S)
 
-
 Return the closest common ancestor of `T` and `S`, i.e. the narrowest type from which
 they both inherit.
 """
 typejoin() = Bottom
 typejoin(@nospecialize(t)) = t
-typejoin(@nospecialize(t), ts...) = (@_pure_meta; typejoin(t, typejoin(ts...)))
+typejoin(@nospecialize(t), ts...) = (@_total_meta; typejoin(t, typejoin(ts...)))
 function typejoin(@nospecialize(a), @nospecialize(b))
-    @_pure_meta
+    @_total_meta
     if isa(a, TypeVar)
         return typejoin(a.ub, b)
     elseif isa(b, TypeVar)
@@ -30,11 +29,15 @@ function typejoin(@nospecialize(a), @nospecialize(b))
         return typejoin(typejoin(a.a, a.b), b)
     elseif isa(b, Union)
         return typejoin(a, typejoin(b.a, b.b))
-    elseif a <: Tuple
+    end
+    # a and b are DataTypes
+    # We have to hide Constant info from inference, see #44390
+    a, b = inferencebarrier(a)::DataType, inferencebarrier(b)::DataType
+    if a <: Tuple
         if !(b <: Tuple)
             return Any
         end
-        ap, bp = a.parameters::Core.SimpleVector, b.parameters::Core.SimpleVector
+        ap, bp = a.parameters, b.parameters
         lar = length(ap)
         lbr = length(bp)
         if lar == 0
@@ -78,7 +81,6 @@ function typejoin(@nospecialize(a), @nospecialize(b))
     elseif b <: Tuple
         return Any
     end
-    a, b = a::DataType, b::DataType
     while b !== Any
         if a <: b.name.wrapper
             while a.name !== b.name
@@ -121,32 +123,104 @@ function typejoin(@nospecialize(a), @nospecialize(b))
     return Any
 end
 
+# return an upper-bound on type `a` with type `b` removed
+# such that `return <: a` && `Union{return, b} == Union{a, b}`
+# WARNING: this is wrong for some objects for which subtyping is broken
+#          (Core.Compiler.isnotbrokensubtype), use only simple types for `b`
+function typesplit(@nospecialize(a), @nospecialize(b))
+    @_total_may_throw_meta
+    if a <: b
+        return Bottom
+    end
+    if isa(a, Union)
+        return Union{typesplit(a.a, b),
+                     typesplit(a.b, b)}
+    end
+    return a
+end
+
+
 """
     promote_typejoin(T, S)
 
 Compute a type that contains both `T` and `S`, which could be
 either a parent of both types, or a `Union` if appropriate.
 Falls back to [`typejoin`](@ref).
+
+See instead [`promote`](@ref), [`promote_type`](@ref).
+
+# Examples
+```jldoctest
+julia> Base.promote_typejoin(Int, Float64)
+Real
+
+julia> Base.promote_type(Int, Float64)
+Float64
+```
 """
 function promote_typejoin(@nospecialize(a), @nospecialize(b))
     c = typejoin(_promote_typesubtract(a), _promote_typesubtract(b))
     return Union{a, b, c}::Type
 end
-_promote_typesubtract(@nospecialize(a)) = Core.Compiler.typesubtract(a, Union{Nothing, Missing})
+_promote_typesubtract(@nospecialize(a)) = typesplit(a, Union{Nothing, Missing})
+
+function promote_typejoin_union(::Type{T}) where T
+    if T === Union{}
+        return Union{}
+    elseif T isa UnionAll
+        return Any # TODO: compute more precise bounds
+    elseif T isa Union
+        return promote_typejoin(promote_typejoin_union(T.a), promote_typejoin_union(T.b))
+    elseif T isa DataType
+        T <: Tuple && return typejoin_union_tuple(T)
+        return T
+    else
+        error("unreachable") # not a type??
+    end
+end
 
+function typejoin_union_tuple(T::DataType)
+    @_total_may_throw_meta
+    u = Base.unwrap_unionall(T)
+    p = (u::DataType).parameters
+    lr = length(p)::Int
+    if lr == 0
+        return Tuple{}
+    end
+    c = Vector{Any}(undef, lr)
+    for i = 1:lr
+        pi = p[i]
+        U = Core.Compiler.unwrapva(pi)
+        if U === Union{}
+            ci = Union{}
+        elseif U isa Union
+            ci = typejoin(U.a, U.b)
+        elseif U isa UnionAll
+            return Any # TODO: compute more precise bounds
+        else
+            ci = promote_typejoin_union(U)
+        end
+        if i == lr && Core.Compiler.isvarargtype(pi)
+            c[i] = isdefined(pi, :N) ? Vararg{ci, pi.N} : Vararg{ci}
+        else
+            c[i] = ci
+        end
+    end
+    return Base.rewrap_unionall(Tuple{c...}, T)
+end
 
 # Returns length, isfixed
-function full_va_len(p)
+function full_va_len(p::Core.SimpleVector)
     isempty(p) && return 0, true
     last = p[end]
     if isvarargtype(last)
-        N = unwrap_unionall(last).parameters[2]
-        if isa(N, Int)
-            return length(p)::Int + N - 1, true
+        if isdefined(last, :N)
+            N = last.N
+            isa(N, Int) && return length(p) + N - 1, true
         end
-        return length(p)::Int, false
+        return length(p), false
     end
-    return length(p)::Int, true
+    return length(p), true
 end
 
 # reduce typejoin over A[i:end]
@@ -164,7 +238,7 @@ end
 ## promotion mechanism ##
 
 """
-    promote_type(type1, type2)
+    promote_type(type1, type2, ...)
 
 Promotion refers to converting values of mixed types to a single common type.
 `promote_type` represents the default promotion behavior in Julia when
@@ -175,6 +249,9 @@ tolerated; for example, `promote_type(Int64, Float64)` returns
 [`Float64`](@ref) even though strictly, not all [`Int64`](@ref) values can be
 represented exactly as `Float64` values.
 
+See also: [`promote`](@ref), [`promote_typejoin`](@ref), [`promote_rule`](@ref).
+
+# Examples
 ```jldoctest
 julia> promote_type(Int64, Float64)
 Float64
@@ -194,12 +271,17 @@ Float16
 julia> promote_type(Int8, UInt16)
 UInt16
 ```
+
+!!! warning "Don't overload this directly"
+    To overload promotion for your own types you should overload [`promote_rule`](@ref).
+    `promote_type` calls `promote_rule` internally to determine the type.
+    Overloading `promote_type` directly can cause ambiguity errors.
 """
 function promote_type end
 
 promote_type()  = Bottom
 promote_type(T) = T
-promote_type(T, S, U, V...) = (@_inline_meta; promote_type(T, promote_type(S, U, V...)))
+promote_type(T, S, U, V...) = (@inline; promote_type(T, promote_type(S, U, V...)))
 
 promote_type(::Type{Bottom}, ::Type{Bottom}) = Bottom
 promote_type(::Type{T}, ::Type{T}) where {T} = T
@@ -207,7 +289,7 @@ promote_type(::Type{T}, ::Type{Bottom}) where {T} = T
 promote_type(::Type{Bottom}, ::Type{T}) where {T} = T
 
 function promote_type(::Type{T}, ::Type{S}) where {T,S}
-    @_inline_meta
+    @inline
     # Try promote_rule in both orders. Typically only one is defined,
     # and there is a fallback returning Bottom below, so the common case is
     #   promote_type(T, S) =>
@@ -225,12 +307,12 @@ it for new types as appropriate.
 """
 function promote_rule end
 
-promote_rule(::Type{<:Any}, ::Type{<:Any}) = Bottom
+promote_rule(::Type, ::Type) = Bottom
 
-promote_result(::Type{<:Any},::Type{<:Any},::Type{T},::Type{S}) where {T,S} = (@_inline_meta; promote_type(T,S))
+promote_result(::Type,::Type,::Type{T},::Type{S}) where {T,S} = (@inline; promote_type(T,S))
 # If no promote_rule is defined, both directions give Bottom. In that
 # case use typejoin on the original types instead.
-promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} = (@_inline_meta; typejoin(T, S))
+promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} = (@inline; typejoin(T, S))
 
 """
     promote(xs...)
@@ -238,6 +320,8 @@ promote_result(::Type{T},::Type{S},::Type{Bottom},::Type{Bottom}) where {T,S} =
 Convert all arguments to a common type, and return them all (as a tuple).
 If no arguments can be converted, an error is raised.
 
+See also: [`promote_type`], [`promote_rule`].
+
 # Examples
 ```jldoctest
 julia> promote(Int8(1), Float16(4.5), Float32(4.1))
@@ -247,19 +331,19 @@ julia> promote(Int8(1), Float16(4.5), Float32(4.1))
 function promote end
 
 function _promote(x::T, y::S) where {T,S}
-    @_inline_meta
+    @inline
     R = promote_type(T, S)
     return (convert(R, x), convert(R, y))
 end
 promote_typeof(x) = typeof(x)
-promote_typeof(x, xs...) = (@_inline_meta; promote_type(typeof(x), promote_typeof(xs...)))
+promote_typeof(x, xs...) = (@inline; promote_type(typeof(x), promote_typeof(xs...)))
 function _promote(x, y, z)
-    @_inline_meta
+    @inline
     R = promote_typeof(x, y, z)
     return (convert(R, x), convert(R, y), convert(R, z))
 end
 function _promote(x, y, zs...)
-    @_inline_meta
+    @inline
     R = promote_typeof(x, y, zs...)
     return (convert(R, x), convert(R, y), convert(Tuple{Vararg{R}}, zs)...)
 end
@@ -271,13 +355,13 @@ promote() = ()
 promote(x) = (x,)
 
 function promote(x, y)
-    @_inline_meta
+    @inline
     px, py = _promote(x, y)
     not_sametype((x,y), (px,py))
     px, py
 end
 function promote(x, y, z)
-    @_inline_meta
+    @inline
     px, py, pz = _promote(x, y, z)
     not_sametype((x,y,z), (px,py,pz))
     px, py, pz
@@ -295,7 +379,7 @@ not_sametype(x::T, y::T) where {T} = sametype_error(x)
 not_sametype(x, y) = nothing
 
 function sametype_error(input)
-    @_noinline_meta
+    @noinline
     error("promotion of types ",
           join(map(x->string(typeof(x)), input), ", ", " and "),
           " failed to change any arguments")
@@ -316,8 +400,10 @@ If `y` is an `Int` literal (e.g. `2` in `x^2` or `-3` in `x^-3`), the Julia code
 enable compile-time specialization on the value of the exponent.
 (As a default fallback we have `Base.literal_pow(^, x, Val(y)) = ^(x,y)`,
 where usually `^ == Base.^` unless `^` has been defined in the calling
-namespace.)
+namespace.) If `y` is a negative integer literal, then `Base.literal_pow`
+transforms the operation to `inv(x)^-y` by default, where `-y` is positive.
 
+# Examples
 ```jldoctest
 julia> 3^5
 243
diff --git a/base/range.jl b/base/range.jl
index 8f1a1443b22fc9..23735aaa87f1c7 100644
--- a/base/range.jl
+++ b/base/range.jl
@@ -1,21 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-(:)(a::Real, b::Real) = (:)(promote(a,b)...)
+(:)(a::Real, b::Real) = (:)(promote(a, b)...)
 
 (:)(start::T, stop::T) where {T<:Real} = UnitRange{T}(start, stop)
 
-(:)(start::T, stop::T) where {T} = (:)(start, oftype(stop-start, 1), stop)
+(:)(start::T, stop::T) where {T} = (:)(start, oftype(stop >= start ? stop - start : start - stop, 1), stop)
 
 # promote start and stop, leaving step alone
-(:)(start::A, step, stop::C) where {A<:Real,C<:Real} =
-    (:)(convert(promote_type(A,C),start), step, convert(promote_type(A,C),stop))
+(:)(start::A, step, stop::C) where {A<:Real, C<:Real} =
+    (:)(convert(promote_type(A, C), start), step, convert(promote_type(A, C), stop))
 
 # AbstractFloat specializations
 (:)(a::T, b::T) where {T<:AbstractFloat} = (:)(a, T(1), b)
 
-(:)(a::T, b::AbstractFloat, c::T) where {T<:Real} = (:)(promote(a,b,c)...)
-(:)(a::T, b::AbstractFloat, c::T) where {T<:AbstractFloat} = (:)(promote(a,b,c)...)
-(:)(a::T, b::Real, c::T) where {T<:AbstractFloat} = (:)(promote(a,b,c)...)
+(:)(a::T, b::AbstractFloat, c::T) where {T<:Real} = (:)(promote(a, b, c)...)
+(:)(a::T, b::AbstractFloat, c::T) where {T<:AbstractFloat} = (:)(promote(a, b, c)...)
+(:)(a::T, b::Real, c::T) where {T<:AbstractFloat} = (:)(promote(a, b, c)...)
 
 (:)(start::T, step::T, stop::T) where {T<:AbstractFloat} =
     _colon(OrderStyle(T), ArithmeticStyle(T), start, step, stop)
@@ -24,9 +24,9 @@
 _colon(::Ordered, ::Any, start::T, step, stop::T) where {T} = StepRange(start, step, stop)
 # for T<:Union{Float16,Float32,Float64} see twiceprecision.jl
 _colon(::Ordered, ::ArithmeticRounds, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Int, (stop-start)/step)+1)
+    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
 _colon(::Any, ::Any, start::T, step, stop::T) where {T} =
-    StepRangeLen(start, step, floor(Int, (stop-start)/step)+1)
+    StepRangeLen(start, step, floor(Integer, (stop-start)/step)+1)
 
 """
     (:)(start, [step], stop)
@@ -47,25 +47,20 @@ function _colon(start::T, step, stop::T) where T
 end
 
 """
-    range(start[, stop]; length, stop, step=1)
+    range(start, stop, length)
+    range(start, stop; length, step)
+    range(start; length, stop, step)
+    range(;start, length, stop, step)
 
-Given a starting value, construct a range either by length or from `start` to `stop`,
-optionally with a given step (defaults to 1, a [`UnitRange`](@ref)).
-One of `length` or `stop` is required.  If `length`, `stop`, and `step` are all specified, they must agree.
+Construct a specialized array with evenly spaced elements and optimized storage (an [`AbstractRange`](@ref)) from the arguments.
+Mathematically a range is uniquely determined by any three of `start`, `step`, `stop` and `length`.
+Valid invocations of range are:
+* Call `range` with any three of `start`, `step`, `stop`, `length`.
+* Call `range` with two of `start`, `stop`, `length`. In this case `step` will be assumed
+  to be one. If both arguments are Integers, a [`UnitRange`](@ref) will be returned.
+* Call `range` with one of `stop` or `length`. `start` and `step` will be assumed to be one.
 
-If `length` and `stop` are provided and `step` is not, the step size will be computed
-automatically such that there are `length` linearly spaced elements in the range.
-
-If `step` and `stop` are provided and `length` is not, the overall range length will be computed
-automatically such that the elements are `step` spaced.
-
-Special care is taken to ensure intermediate values are computed rationally.
-To avoid this induced overhead, see the [`LinRange`](@ref) constructor.
-
-`stop` may be specified as either a positional or keyword argument.
-
-!!! compat "Julia 1.1"
-    `stop` as a positional argument requires at least Julia 1.1.
+See Extended Help for additional details on the returned type.
 
 # Examples
 ```jldoctest
@@ -86,51 +81,164 @@ julia> range(1, 10, length=101)
 
 julia> range(1, 100, step=5)
 1:5:96
+
+julia> range(stop=10, length=5)
+6:10
+
+julia> range(stop=10, step=1, length=5)
+6:1:10
+
+julia> range(start=1, step=1, stop=10)
+1:1:10
+
+julia> range(; length = 10)
+Base.OneTo(10)
+
+julia> range(; stop = 6)
+Base.OneTo(6)
+
+julia> range(; stop = 6.5)
+1.0:1.0:6.0
 ```
+If `length` is not specified and `stop - start` is not an integer multiple of `step`, a range that ends before `stop` will be produced.
+```jldoctest
+julia> range(1, 3.5, step=2)
+1.0:2.0:3.0
+```
+
+Special care is taken to ensure intermediate values are computed rationally.
+To avoid this induced overhead, see the [`LinRange`](@ref) constructor.
+
+!!! compat "Julia 1.1"
+    `stop` as a positional argument requires at least Julia 1.1.
+
+!!! compat "Julia 1.7"
+    The versions without keyword arguments and `start` as a keyword argument
+    require at least Julia 1.7.
+
+!!! compat "Julia 1.8"
+    The versions with `stop` as a sole keyword argument,
+    or `length` as a sole keyword argument require at least Julia 1.8.
+
+
+# Extended Help
+
+`range` will produce a `Base.OneTo` when the arguments are Integers and
+* Only `length` is provided
+* Only `stop` is provided
+
+`range` will produce a `UnitRange` when the arguments are Integers and
+* Only `start`  and `stop` are provided
+* Only `length` and `stop` are provided
+
+A `UnitRange` is not produced if `step` is provided even if specified as one.
 """
-range(start; length::Union{Integer,Nothing}=nothing, stop=nothing, step=nothing) =
+function range end
+
+range(start; stop=nothing, length::Union{Integer,Nothing}=nothing, step=nothing) =
     _range(start, step, stop, length)
+range(start, stop; length::Union{Integer,Nothing}=nothing, step=nothing) = _range(start, step, stop, length)
+range(start, stop, length::Integer) = _range(start, nothing, stop, length)
 
-range(start, stop; length::Union{Integer,Nothing}=nothing, step=nothing) =
-    _range2(start, step, stop, length)
-
-_range2(start, ::Nothing, stop, ::Nothing) =
-    throw(ArgumentError("At least one of `length` or `step` must be specified"))
-
-_range2(start, step, stop, length) = _range(start, step, stop, length)
-
-# Range from start to stop: range(a, [step=s,] stop=b), no length
-_range(start, step,      stop, ::Nothing) = (:)(start, step, stop)
-_range(start, ::Nothing, stop, ::Nothing) = (:)(start, stop)
-
-# Range of a given length: range(a, [step=s,] length=l), no stop
-_range(a::Real,          ::Nothing,         ::Nothing, len::Integer) = UnitRange{typeof(a)}(a, oftype(a, a+len-1))
-_range(a::AbstractFloat, ::Nothing,         ::Nothing, len::Integer) = _range(a, oftype(a, 1),   nothing, len)
-_range(a::AbstractFloat, st::AbstractFloat, ::Nothing, len::Integer) = _range(promote(a, st)..., nothing, len)
-_range(a::Real,          st::AbstractFloat, ::Nothing, len::Integer) = _range(float(a), st,      nothing, len)
-_range(a::AbstractFloat, st::Real,          ::Nothing, len::Integer) = _range(a, float(st),      nothing, len)
-_range(a,                ::Nothing,         ::Nothing, len::Integer) = _range(a, oftype(a-a, 1), nothing, len)
-
-_range(a::T, step::T, ::Nothing, len::Integer) where {T <: AbstractFloat} =
-    _rangestyle(OrderStyle(T), ArithmeticStyle(T), a, step, len)
-_range(a::T, step, ::Nothing, len::Integer) where {T} =
-    _rangestyle(OrderStyle(T), ArithmeticStyle(T), a, step, len)
-_rangestyle(::Ordered, ::ArithmeticWraps, a::T, step::S, len::Integer) where {T,S} =
-    StepRange{T,S}(a, step, convert(T, a+step*(len-1)))
-_rangestyle(::Any, ::Any, a::T, step::S, len::Integer) where {T,S} =
-    StepRangeLen{typeof(a+0*step),T,S}(a, step, len)
-
-# Malformed calls
-_range(start,     step,      ::Nothing, ::Nothing) = # range(a, step=s)
-    throw(ArgumentError("At least one of `length` or `stop` must be specified"))
-_range(start,     ::Nothing, ::Nothing, ::Nothing) = # range(a)
-    throw(ArgumentError("At least one of `length` or `stop` must be specified"))
-_range(::Nothing, ::Nothing, ::Nothing, ::Nothing) = # range(nothing)
-    throw(ArgumentError("At least one of `length` or `stop` must be specified"))
-_range(start::Real, step::Real, stop::Real, length::Integer) = # range(a, step=s, stop=b, length=l)
-    throw(ArgumentError("Too many arguments specified; try passing only one of `stop` or `length`"))
-_range(::Nothing, ::Nothing, ::Nothing, ::Integer) = # range(nothing, length=l)
-    throw(ArgumentError("Can't start a range at `nothing`"))
+range(;start=nothing, stop=nothing, length::Union{Integer, Nothing}=nothing, step=nothing) =
+    _range(start, step, stop, length)
+
+_range(start::Nothing, step::Nothing, stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
+_range(start::Nothing, step::Nothing, stop::Nothing, len::Any    ) = range_length(len)
+_range(start::Nothing, step::Nothing, stop::Any    , len::Nothing) = range_stop(stop)
+_range(start::Nothing, step::Nothing, stop::Any    , len::Any    ) = range_stop_length(stop, len)
+_range(start::Nothing, step::Any    , stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
+_range(start::Nothing, step::Any    , stop::Nothing, len::Any    ) = range_error(start, step, stop, len)
+_range(start::Nothing, step::Any    , stop::Any    , len::Nothing) = range_error(start, step, stop, len)
+_range(start::Nothing, step::Any    , stop::Any    , len::Any    ) = range_step_stop_length(step, stop, len)
+_range(start::Any    , step::Nothing, stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
+_range(start::Any    , step::Nothing, stop::Nothing, len::Any    ) = range_start_length(start, len)
+_range(start::Any    , step::Nothing, stop::Any    , len::Nothing) = range_start_stop(start, stop)
+_range(start::Any    , step::Nothing, stop::Any    , len::Any    ) = range_start_stop_length(start, stop, len)
+_range(start::Any    , step::Any    , stop::Nothing, len::Nothing) = range_error(start, step, stop, len)
+_range(start::Any    , step::Any    , stop::Nothing, len::Any    ) = range_start_step_length(start, step, len)
+_range(start::Any    , step::Any    , stop::Any    , len::Nothing) = range_start_step_stop(start, step, stop)
+_range(start::Any    , step::Any    , stop::Any    , len::Any    ) = range_error(start, step, stop, len)
+
+# Length as the only argument
+range_length(len::Integer) = OneTo(len)
+
+# Stop as the only argument
+range_stop(stop) = range_start_stop(oftype(stop, 1), stop)
+range_stop(stop::Integer) = range_length(stop)
+
+function range_step_stop_length(step, a, len::Integer)
+    start = a - step * (len - oneunit(len))
+    if start isa Signed
+        # overflow in recomputing length from stop is okay
+        return StepRange{typeof(start),typeof(step)}(start, step, convert(typeof(start), a))
+    end
+    return StepRangeLen{typeof(start),typeof(start),typeof(step)}(start, step, len)
+end
+
+# Stop and length as the only argument
+function range_stop_length(a, len::Integer)
+    step = oftype(a - a, 1) # assert that step is representable
+    start = a - (len - oneunit(len))
+    if start isa Signed
+        # overflow in recomputing length from stop is okay
+        return UnitRange(start, oftype(start, a))
+    end
+    return StepRangeLen{typeof(start),typeof(start),typeof(step)}(start, step, len)
+end
+
+# Start and length as the only argument
+function range_start_length(a, len::Integer)
+    step = oftype(a - a, 1) # assert that step is representable
+    stop = a + (len - oneunit(len))
+    if stop isa Signed
+        # overflow in recomputing length from stop is okay
+        return UnitRange(oftype(stop, a), stop)
+    end
+    return StepRangeLen{typeof(stop),typeof(a),typeof(step)}(a, step, len)
+end
+
+range_start_stop(start, stop) = start:stop
+
+function range_start_step_length(a, step, len::Integer)
+    stop = a + step * (len - oneunit(len))
+    if stop isa Signed
+        # overflow in recomputing length from stop is okay
+        return StepRange{typeof(stop),typeof(step)}(convert(typeof(stop), a), step, stop)
+    end
+    return StepRangeLen{typeof(stop),typeof(a),typeof(step)}(a, step, len)
+end
+
+range_start_step_stop(start, step, stop) = start:step:stop
+
+function range_error(start, step, stop, length)
+    hasstart  = start !== nothing
+    hasstep   = step  !== nothing
+    hasstop   = stop  !== nothing
+    haslength = start !== nothing
+
+    hint = if hasstart && hasstep && hasstop && haslength
+        "Try specifying only three arguments"
+    elseif !hasstop && !haslength
+        "At least one of `length` or `stop` must be specified."
+    elseif !hasstep && !haslength
+        "At least one of `length` or `step` must be specified."
+    elseif !hasstart && !hasstop
+        "At least one of `start` or `stop` must be specified."
+    else
+        "Try specifying more arguments."
+    end
+
+    msg = """
+    Cannot construct range from arguments:
+    start = $start
+    step = $step
+    stop = $stop
+    length = $length
+    $hint
+    """
+    throw(ArgumentError(msg))
+end
 
 ## 1-dimensional ranges ##
 
@@ -202,18 +310,21 @@ struct StepRange{T,S} <: OrdinalRange{T,S}
     stop::T
 
     function StepRange{T,S}(start, step, stop) where {T,S}
-        sta = convert(T, start)
-        ste = convert(S, step)
-        sto = convert(T, stop)
-        new(sta, ste, steprange_last(sta,ste,sto))
+        start = convert(T, start)
+        step = convert(S, step)
+        stop = convert(T, stop)
+        return new(start, step, steprange_last(start, step, stop))
     end
 end
 
 # to make StepRange constructor inlineable, so optimizer can see `step` value
-function steprange_last(start::T, step, stop) where T
-    if isa(start,AbstractFloat) || isa(step,AbstractFloat)
+function steprange_last(start, step, stop)::typeof(stop)
+    if isa(start, AbstractFloat) || isa(step, AbstractFloat)
         throw(ArgumentError("StepRange should not be used with floating point"))
     end
+    if isa(start, Integer) && !isinteger(start + step)
+        throw(ArgumentError("StepRange{<:Integer} cannot have non-integer step"))
+    end
     z = zero(step)
     step == z && throw(ArgumentError("step cannot be zero"))
 
@@ -228,33 +339,31 @@ function steprange_last(start::T, step, stop) where T
             absdiff, absstep = stop > start ? (stop - start, step) : (start - stop, -step)
 
             # Compute remainder as a nonnegative number:
-            if T <: Signed && absdiff < zero(absdiff)
-                # handle signed overflow with unsigned rem
-                remain = convert(T, unsigned(absdiff) % absstep)
+            if absdiff isa Signed && absdiff < zero(absdiff)
+                # unlikely, but handle the signed overflow case with unsigned rem
+                remain = convert(typeof(absdiff), unsigned(absdiff) % absstep)
             else
-                remain = absdiff % absstep
+                remain = convert(typeof(absdiff), absdiff % absstep)
             end
             # Move `stop` closer to `start` if there is a remainder:
             last = stop > start ? stop - remain : stop + remain
         end
     end
-    last
+    return last
 end
 
-function steprange_last_empty(start::Integer, step, stop)
-    # empty range has a special representation where stop = start-1
-    # this is needed to avoid the wrap-around that can happen computing
-    # start - step, which leads to a range that looks very large instead
-    # of empty.
+function steprange_last_empty(start::Integer, step, stop)::typeof(stop)
+    # empty range has a special representation where stop = start-1,
+    # which simplifies arithmetic for Signed numbers
     if step > zero(step)
-        last = start - oneunit(stop-start)
+        last = start - oneunit(step)
     else
-        last = start + oneunit(stop-start)
+        last = start + oneunit(step)
     end
-    last
+    return last
 end
-# For types where x+oneunit(x) may not be well-defined
-steprange_last_empty(start, step, stop) = start - step
+# For types where x+oneunit(x) may not be well-defined use the user-given value for stop
+steprange_last_empty(start, step, stop) = stop
 
 StepRange{T}(start, step::S, stop) where {T,S} = StepRange{T,S}(start, step, stop)
 StepRange(start::T, step::S, stop::T) where {T,S} = StepRange{T,S}(start, step, stop)
@@ -281,29 +390,36 @@ UnitRange{Int64}
 struct UnitRange{T<:Real} <: AbstractUnitRange{T}
     start::T
     stop::T
-    UnitRange{T}(start, stop) where {T<:Real} = new(start, unitrange_last(start,stop))
+    UnitRange{T}(start::T, stop::T) where {T<:Real} = new(start, unitrange_last(start, stop))
 end
+UnitRange{T}(start, stop) where {T<:Real} = UnitRange{T}(convert(T, start), convert(T, stop))
 UnitRange(start::T, stop::T) where {T<:Real} = UnitRange{T}(start, stop)
+UnitRange(start, stop) = UnitRange(promote(start, stop)...)
 
-unitrange_last(::Bool, stop::Bool) = stop
-unitrange_last(start::T, stop::T) where {T<:Integer} =
-    ifelse(stop >= start, stop, convert(T,start-oneunit(stop-start)))
-unitrange_last(start::T, stop::T) where {T} =
-    ifelse(stop >= start, convert(T,start+floor(stop-start)),
-                          convert(T,start-oneunit(stop-start)))
+# if stop and start are integral, we know that their difference is a multiple of 1
+unitrange_last(start::Integer, stop::Integer) =
+    stop >= start ? stop : convert(typeof(stop), start - oneunit(start - stop))
+# otherwise, use `floor` as a more efficient way to compute modulus with step=1
+unitrange_last(start, stop) =
+    stop >= start ? convert(typeof(stop), start + floor(stop - start)) :
+                    convert(typeof(stop), start - oneunit(start - stop))
+
+unitrange(x::AbstractUnitRange) = UnitRange(x) # convenience conversion for promoting the range type
 
 if isdefined(Main, :Base)
     # Constant-fold-able indexing into tuples to functionally expose Base.tail and Base.front
-    function getindex(@nospecialize(t::Tuple), r::UnitRange)
-        @_inline_meta
-        r.start > r.stop && return ()
-        if r.start == 1
-            r.stop == length(t)   && return t
-            r.stop == length(t)-1 && return front(t)
-            r.stop == length(t)-2 && return front(front(t))
-        elseif r.stop == length(t)
-            r.start == 2 && return tail(t)
-            r.start == 3 && return tail(tail(t))
+    function getindex(@nospecialize(t::Tuple), r::AbstractUnitRange)
+        @inline
+        require_one_based_indexing(r)
+        if length(r) <= 10
+            return ntuple(i -> t[i + first(r) - 1], length(r))
+        elseif first(r) == 1
+            last(r) == length(t)   && return t
+            last(r) == length(t)-1 && return front(t)
+            last(r) == length(t)-2 && return front(front(t))
+        elseif last(r) == length(t)
+            first(r) == 2 && return tail(t)
+            first(r) == 3 && return tail(tail(t))
         end
         return (eltype(t)[t[ri] for ri in r]...,)
     end
@@ -318,25 +434,34 @@ be 1.
 """
 struct OneTo{T<:Integer} <: AbstractUnitRange{T}
     stop::T
-    OneTo{T}(stop) where {T<:Integer} = new(max(zero(T), stop))
+    function OneTo{T}(stop) where {T<:Integer}
+        throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
+        T === Bool && throwbool(stop)
+        return new(max(zero(T), stop))
+    end
+
     function OneTo{T}(r::AbstractRange) where {T<:Integer}
-        throwstart(r) = (@_noinline_meta; throw(ArgumentError("first element must be 1, got $(first(r))")))
-        throwstep(r)  = (@_noinline_meta; throw(ArgumentError("step must be 1, got $(step(r))")))
+        throwstart(r) = (@noinline; throw(ArgumentError("first element must be 1, got $(first(r))")))
+        throwstep(r)  = (@noinline; throw(ArgumentError("step must be 1, got $(step(r))")))
+        throwbool(r)  = (@noinline; throw(ArgumentError("invalid index: $r of type Bool")))
         first(r) == 1 || throwstart(r)
         step(r)  == 1 || throwstep(r)
+        T === Bool && throwbool(r)
         return new(max(zero(T), last(r)))
     end
 end
 OneTo(stop::T) where {T<:Integer} = OneTo{T}(stop)
 OneTo(r::AbstractRange{T}) where {T<:Integer} = OneTo{T}(r)
+oneto(r) = OneTo(r)
 
 ## Step ranges parameterized by length
 
 """
-    StepRangeLen{T,R,S}(ref::R, step::S, len, [offset=1]) where {T,R,S}
-    StepRangeLen(       ref::R, step::S, len, [offset=1]) where {  R,S}
+    StepRangeLen(         ref::R, step::S, len, [offset=1]) where {  R,S}
+    StepRangeLen{T,R,S}(  ref::R, step::S, len, [offset=1]) where {T,R,S}
+    StepRangeLen{T,R,S,L}(ref::R, step::S, len, [offset=1]) where {T,R,S,L}
 
-A range `r` where `r[i]` produces values of type `T` (in the second
+A range `r` where `r[i]` produces values of type `T` (in the first
 form, `T` is deduced automatically), parameterized by a `ref`erence
 value, a `step`, and the `len`gth. By default `ref` is the starting
 value `r[1]`, but alternatively you can supply it as the value of
@@ -344,45 +469,53 @@ value `r[1]`, but alternatively you can supply it as the value of
 with `TwicePrecision` this can be used to implement ranges that are
 free of roundoff error.
 """
-struct StepRangeLen{T,R,S} <: AbstractRange{T}
+struct StepRangeLen{T,R,S,L<:Integer} <: AbstractRange{T}
     ref::R       # reference value (might be smallest-magnitude value in the range)
     step::S      # step value
-    len::Int     # length of the range
-    offset::Int  # the index of ref
+    len::L       # length of the range
+    offset::L    # the index of ref
 
-    function StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S}
-        len >= 0 || throw(ArgumentError("length cannot be negative, got $len"))
-        1 <= offset <= max(1,len) || throw(ArgumentError("StepRangeLen: offset must be in [1,$len], got $offset"))
-        new(ref, step, len, offset)
+    function StepRangeLen{T,R,S,L}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S,L}
+        if T <: Integer && !isinteger(ref + step)
+            throw(ArgumentError("StepRangeLen{<:Integer} cannot have non-integer step"))
+        end
+        len = convert(L, len)
+        len >= zero(len) || throw(ArgumentError("length cannot be negative, got $len"))
+        offset = convert(L, offset)
+        L1 = oneunit(typeof(len))
+        L1 <= offset <= max(L1, len) || throw(ArgumentError("StepRangeLen: offset must be in [1,$len], got $offset"))
+        return new(ref, step, len, offset)
     end
 end
 
+StepRangeLen{T,R,S}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
+    StepRangeLen{T,R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 StepRangeLen(ref::R, step::S, len::Integer, offset::Integer = 1) where {R,S} =
-    StepRangeLen{typeof(ref+0*step),R,S}(ref, step, len, offset)
+    StepRangeLen{typeof(ref+zero(step)),R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 StepRangeLen{T}(ref::R, step::S, len::Integer, offset::Integer = 1) where {T,R,S} =
-    StepRangeLen{T,R,S}(ref, step, len, offset)
+    StepRangeLen{T,R,S,promote_type(Int,typeof(len))}(ref, step, len, offset)
 
 ## range with computed step
 
 """
-    LinRange{T}
+    LinRange{T,L}
 
 A range with `len` linearly spaced elements between its `start` and `stop`.
 The size of the spacing is controlled by `len`, which must
-be an `Int`.
+be an `Integer`.
 
 # Examples
 ```jldoctest
 julia> LinRange(1.5, 5.5, 9)
-9-element LinRange{Float64}:
- 1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5
+9-element LinRange{Float64, Int64}:
+ 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5
 ```
 
 Compared to using [`range`](@ref), directly constructing a `LinRange` should
 have less overhead but won't try to correct for floating point errors:
-```julia
+```jldoctest
 julia> collect(range(-0.1, 0.3, length=5))
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.1
   0.0
   0.1
@@ -390,7 +523,7 @@ julia> collect(range(-0.1, 0.3, length=5))
   0.3
 
 julia> collect(LinRange(-0.1, 0.3, 5))
-5-element Array{Float64,1}:
+5-element Vector{Float64}:
  -0.1
  -1.3877787807814457e-17
   0.09999999999999999
@@ -398,45 +531,57 @@ julia> collect(LinRange(-0.1, 0.3, 5))
   0.3
 ```
 """
-struct LinRange{T} <: AbstractRange{T}
+struct LinRange{T,L<:Integer} <: AbstractRange{T}
     start::T
     stop::T
-    len::Int
-    lendiv::Int
+    len::L
+    lendiv::L
 
-    function LinRange{T}(start,stop,len) where T
+    function LinRange{T,L}(start::T, stop::T, len::L) where {T,L<:Integer}
         len >= 0 || throw(ArgumentError("range($start, stop=$stop, length=$len): negative length"))
-        if len == 1
+        onelen = oneunit(typeof(len))
+        if len == onelen
             start == stop || throw(ArgumentError("range($start, stop=$stop, length=$len): endpoints differ"))
-            return new(start, stop, 1, 1)
+            return new(start, stop, len, len)
+        end
+        lendiv = max(len - onelen, onelen)
+        if T <: Integer && !iszero(mod(stop-start, lendiv))
+            throw(ArgumentError("LinRange{<:Integer} cannot have non-integer step"))
         end
-        new(start,stop,len,max(len-1,1))
+        return new(start, stop, len, lendiv)
     end
 end
 
+function LinRange{T,L}(start, stop, len::Integer) where {T,L}
+    LinRange{T,L}(convert(T, start), convert(T, stop), convert(L, len))
+end
+
+function LinRange{T}(start, stop, len::Integer) where T
+    LinRange{T,promote_type(Int,typeof(len))}(start, stop, len)
+end
+
 function LinRange(start, stop, len::Integer)
-    T = typeof((stop-start)/len)
+    T = typeof((zero(stop) - zero(start)) / oneunit(len))
     LinRange{T}(start, stop, len)
 end
 
-function _range(start::T, ::Nothing, stop::S, len::Integer) where {T,S}
-    a, b = promote(start, stop)
-    _range(a, nothing, b, len)
-end
-_range(start::T, ::Nothing, stop::T, len::Integer) where {T<:Real} = LinRange{T}(start, stop, len)
-_range(start::T, ::Nothing, stop::T, len::Integer) where {T} = LinRange{T}(start, stop, len)
-_range(start::T, ::Nothing, stop::T, len::Integer) where {T<:Integer} =
+range_start_stop_length(start, stop, len::Integer) =
+    range_start_stop_length(promote(start, stop)..., len)
+range_start_stop_length(start::T, stop::T, len::Integer) where {T} = LinRange(start, stop, len)
+range_start_stop_length(start::T, stop::T, len::Integer) where {T<:Integer} =
     _linspace(float(T), start, stop, len)
 ## for Float16, Float32, and Float64 we hit twiceprecision.jl to lift to higher precision StepRangeLen
 # for all other types we fall back to a plain old LinRange
 _linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where T = LinRange{T}(start, stop, len)
 
-function show(io::IO, r::LinRange)
-    print(io, "range(")
+function show(io::IO, r::LinRange{T}) where {T}
+    print(io, "LinRange{")
+    show(io, T)
+    print(io, "}(")
     show(io, first(r))
-    print(io, ", stop=")
+    print(io, ", ")
     show(io, last(r))
-    print(io, ", length=")
+    print(io, ", ")
     show(io, length(r))
     print(io, ')')
 end
@@ -447,7 +592,7 @@ as if it were `collect(r)`, dependent on the size of the
 terminal, and taking into account whether compact numbers should be shown.
 It figures out the width in characters of each element, and if they
 end up too wide, it shows the first and last elements separated by a
-horizontal ellipsis. Typical output will look like `1.0,2.0,3.0,…,4.0,5.0,6.0`.
+horizontal ellipsis. Typical output will look like `1.0, 2.0, …, 5.0, 6.0`.
 
 `print_range(io, r, pre, sep, post, hdots)` uses optional
 parameters `pre` and `post` characters for each printed row,
@@ -456,9 +601,9 @@ parameters `pre` and `post` characters for each printed row,
 """
 function print_range(io::IO, r::AbstractRange,
                      pre::AbstractString = " ",
-                     sep::AbstractString = ",",
+                     sep::AbstractString = ", ",
                      post::AbstractString = "",
-                     hdots::AbstractString = ",\u2026,") # horiz ellipsis
+                     hdots::AbstractString = ", \u2026, ") # horiz ellipsis
     # This function borrows from print_matrix() in show.jl
     # and should be called by show and display
     sz = displaysize(io)
@@ -477,31 +622,34 @@ function print_range(io::IO, r::AbstractRange,
     maxpossiblecols = div(screenwidth, 1+sepsize) # assume each element is at least 1 char + 1 separator
     colsr = n <= maxpossiblecols ? (1:n) : [1:div(maxpossiblecols,2)+1; (n-div(maxpossiblecols,2)):n]
     rowmatrix = reshape(r[colsr], 1, length(colsr)) # treat the range as a one-row matrix for print_matrix_row
-    A = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), screenwidth, screenwidth, sepsize) # how much space range takes
+    nrow, idxlast = size(rowmatrix, 2), last(axes(rowmatrix, 2))
+    A = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), screenwidth, screenwidth, sepsize, nrow) # how much space range takes
     if n <= length(A) # cols fit screen, so print out all elements
         print(io, pre) # put in pre chars
-        print_matrix_row(io,rowmatrix,A,1,1:n,sep) # the entire range
+        print_matrix_row(io,rowmatrix,A,1,1:n,sep,idxlast) # the entire range
         print(io, post) # add the post characters
     else # cols don't fit so put horiz ellipsis in the middle
         # how many chars left after dividing width of screen in half
         # and accounting for the horiz ellipsis
         c = div(screenwidth-length(hdots)+1,2)+1 # chars remaining for each side of rowmatrix
-        alignR = reverse(alignment(io, rowmatrix, 1:m, length(rowmatrix):-1:1, c, c, sepsize)) # which cols of rowmatrix to put on the right
+        alignR = reverse(alignment(io, rowmatrix, 1:m, length(rowmatrix):-1:1, c, c, sepsize, nrow)) # which cols of rowmatrix to put on the right
         c = screenwidth - sum(map(sum,alignR)) - (length(alignR)-1)*sepsize - length(hdots)
-        alignL = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), c, c, sepsize) # which cols of rowmatrix to put on the left
+        alignL = alignment(io, rowmatrix, 1:m, 1:length(rowmatrix), c, c, sepsize, nrow) # which cols of rowmatrix to put on the left
         print(io, pre)   # put in pre chars
-        print_matrix_row(io, rowmatrix,alignL,1,1:length(alignL),sep) # left part of range
+        print_matrix_row(io, rowmatrix,alignL,1,1:length(alignL),sep,idxlast) # left part of range
         print(io, hdots) # horizontal ellipsis
-        print_matrix_row(io, rowmatrix,alignR,1,length(rowmatrix)-length(alignR)+1:length(rowmatrix),sep) # right part of range
+        print_matrix_row(io, rowmatrix,alignR,1,length(rowmatrix)-length(alignR)+1:length(rowmatrix),sep,idxlast) # right part of range
         print(io, post)  # post chars
     end
 end
 
 ## interface implementations
 
+length(r::AbstractRange) = error("length implementation missing") # catch mistakes
 size(r::AbstractRange) = (length(r),)
 
 isempty(r::StepRange) =
+    # steprange_last(r.start, r.step, r.stop) == r.stop
     (r.start != r.stop) & ((r.step > zero(r.step)) != (r.stop > r.start))
 isempty(r::AbstractUnitRange) = first(r) > last(r)
 isempty(r::StepRangeLen) = length(r) == 0
@@ -528,68 +676,141 @@ julia> step(range(2.5, stop=10.9, length=85))
 ```
 """
 step(r::StepRange) = r.step
-step(r::AbstractUnitRange{T}) where{T} = oneunit(T) - zero(T)
+step(r::AbstractUnitRange{T}) where {T} = oneunit(T) - zero(T)
 step(r::StepRangeLen) = r.step
 step(r::StepRangeLen{T}) where {T<:AbstractFloat} = T(r.step)
 step(r::LinRange) = (last(r)-first(r))/r.lendiv
 
+# high-precision step
 step_hp(r::StepRangeLen) = r.step
 step_hp(r::AbstractRange) = step(r)
 
-unsafe_length(r::AbstractRange) = length(r)  # generic fallback
-
-function unsafe_length(r::StepRange)
-    n = Integer(div((r.stop - r.start) + r.step, r.step))
-    isempty(r) ? zero(n) : n
-end
-length(r::StepRange) = unsafe_length(r)
-unsafe_length(r::AbstractUnitRange) = Integer(last(r) - first(r) + step(r))
-unsafe_length(r::OneTo) = Integer(r.stop - zero(r.stop))
-length(r::AbstractUnitRange) = unsafe_length(r)
-length(r::OneTo) = unsafe_length(r)
-length(r::StepRangeLen) = r.len
-length(r::LinRange) = r.len
+axes(r::AbstractRange) = (oneto(length(r)),)
 
 # Needed to fold the `firstindex` call in SimdLoop.simd_index
 firstindex(::UnitRange) = 1
 firstindex(::StepRange) = 1
 firstindex(::LinRange) = 1
 
-function length(r::StepRange{T}) where T<:Union{Int,UInt,Int64,UInt64,Int128,UInt128}
-    isempty(r) && return zero(T)
-    if r.step > 1
-        return checked_add(convert(T, div(unsigned(r.stop - r.start), r.step)), one(T))
-    elseif r.step < -1
-        return checked_add(convert(T, div(unsigned(r.start - r.stop), -r.step)), one(T))
-    elseif r.step > 0
-        return checked_add(div(checked_sub(r.stop, r.start), r.step), one(T))
+# n.b. checked_length for these is defined iff checked_add and checked_sub are
+# defined between the relevant types
+function checked_length(r::OrdinalRange{T}) where T
+    s = step(r)
+    start = first(r)
+    if isempty(r)
+        return Integer(div(start - start, oneunit(s)))
+    end
+    stop = last(r)
+    if isless(s, zero(s))
+        diff = checked_sub(start, stop)
+        s = -s
     else
-        return checked_add(div(checked_sub(r.start, r.stop), -r.step), one(T))
+        diff = checked_sub(stop, start)
     end
+    a = div(diff, s)
+    return Integer(checked_add(a, oneunit(a)))
 end
 
-function length(r::AbstractUnitRange{T}) where T<:Union{Int,Int64,Int128}
-    @_inline_meta
-    checked_add(checked_sub(last(r), first(r)), one(T))
+function checked_length(r::AbstractUnitRange{T}) where T
+    # compiler optimization: remove dead cases from above
+    if isempty(r)
+        return Integer(first(r) - first(r))
+    end
+    a = checked_sub(last(r), first(r))
+    return Integer(checked_add(a, oneunit(a)))
 end
-length(r::OneTo{T}) where {T<:Union{Int,Int64}} = T(r.stop)
 
-length(r::AbstractUnitRange{T}) where {T<:Union{UInt,UInt64,UInt128}} =
-    r.stop < r.start ? zero(T) : checked_add(last(r) - first(r), one(T))
+function length(r::OrdinalRange{T}) where T
+    s = step(r)
+    start = first(r)
+    if isempty(r)
+        return Integer(div(start - start, oneunit(s)))
+    end
+    stop = last(r)
+    if isless(s, zero(s))
+        diff = start - stop
+        s = -s
+    else
+        diff = stop - start
+    end
+    a = div(diff, s)
+    return Integer(a + oneunit(a))
+end
 
-# some special cases to favor default Int type
-let smallint = (Int === Int64 ?
-                Union{Int8,UInt8,Int16,UInt16,Int32,UInt32} :
-                Union{Int8,UInt8,Int16,UInt16})
-    global length
-
-    function length(r::StepRange{<:smallint})
-        isempty(r) && return Int(0)
-        div(Int(r.stop)+Int(r.step) - Int(r.start), Int(r.step))
+function length(r::AbstractUnitRange{T}) where T
+    @inline
+    start, stop = first(r), last(r)
+    a = oneunit(zero(stop) - zero(start))
+    if a isa Signed || stop >= start
+        a += stop - start # Signed are allowed to go negative
+    else
+        a = zero(a) # Unsigned don't necessarily underflow
+    end
+    return Integer(a)
+end
+
+length(r::OneTo) = Integer(r.stop - zero(r.stop))
+length(r::StepRangeLen) = r.len
+length(r::LinRange) = r.len
+
+let bigints = Union{Int, UInt, Int64, UInt64, Int128, UInt128}
+    global length, checked_length
+    # compile optimization for which promote_type(T, Int) == T
+    length(r::OneTo{T}) where {T<:bigints} = r.stop
+    # slightly more accurate length and checked_length in extreme cases
+    # (near typemax) for types with known `unsigned` functions
+    function length(r::OrdinalRange{T}) where T<:bigints
+        s = step(r)
+        isempty(r) && return zero(T)
+        diff = last(r) - first(r)
+        # if |s| > 1, diff might have overflowed, but unsigned(diff)÷s should
+        # therefore still be valid (if the result is representable at all)
+        # n.b. !(s isa T)
+        if s isa Unsigned || -1 <= s <= 1 || s == -s
+            a = div(diff, s)
+        elseif s < 0
+            a = div(unsigned(-diff), -s) % typeof(diff)
+        else
+            a = div(unsigned(diff), s) % typeof(diff)
+        end
+        return Integer(a) + oneunit(a)
     end
+    function checked_length(r::OrdinalRange{T}) where T<:bigints
+        s = step(r)
+        isempty(r) && return zero(T)
+        stop, start = last(r), first(r)
+        # n.b. !(s isa T)
+        if s > 1
+            diff = stop - start
+            a = convert(T, div(unsigned(diff), s))
+        elseif s < -1
+            diff = start - stop
+            a = convert(T, div(unsigned(diff), -s))
+        elseif s > 0
+            a = div(checked_sub(stop, start), s)
+        else
+            a = div(checked_sub(start, stop), -s)
+        end
+        return checked_add(a, oneunit(a))
+    end
+end
 
-    length(r::AbstractUnitRange{<:smallint}) = Int(last(r)) - Int(first(r)) + 1
-    length(r::OneTo{<:smallint}) = Int(r.stop)
+# some special cases to favor default Int type
+let smallints = (Int === Int64 ?
+                Union{Int8, UInt8, Int16, UInt16, Int32, UInt32} :
+                Union{Int8, UInt8, Int16, UInt16})
+    global length, checked_length
+    # n.b. !(step isa T)
+    function length(r::OrdinalRange{<:smallints})
+        s = step(r)
+        isempty(r) && return 0
+        return div(Int(last(r)) - Int(first(r)), s) + 1
+    end
+    length(r::AbstractUnitRange{<:smallints}) = Int(last(r)) - Int(first(r)) + 1
+    length(r::OneTo{<:smallints}) = Int(r.stop)
+    checked_length(r::OrdinalRange{<:smallints}) = length(r)
+    checked_length(r::AbstractUnitRange{<:smallints}) = length(r)
+    checked_length(r::OneTo{<:smallints}) = length(r)
 end
 
 first(r::OrdinalRange{T}) where {T} = convert(T, r.start)
@@ -597,7 +818,7 @@ first(r::OneTo{T}) where {T} = oneunit(T)
 first(r::StepRangeLen) = unsafe_getindex(r, 1)
 first(r::LinRange) = r.start
 
-last(r::OrdinalRange{T}) where {T} = convert(T, r.stop)
+last(r::OrdinalRange{T}) where {T} = convert(T, r.stop) # via steprange_last
 last(r::StepRangeLen) = unsafe_getindex(r, length(r))
 last(r::LinRange) = r.stop
 
@@ -606,6 +827,40 @@ maximum(r::AbstractUnitRange) = isempty(r) ? throw(ArgumentError("range must be
 minimum(r::AbstractRange)  = isempty(r) ? throw(ArgumentError("range must be non-empty")) : min(first(r), last(r))
 maximum(r::AbstractRange)  = isempty(r) ? throw(ArgumentError("range must be non-empty")) : max(first(r), last(r))
 
+"""
+    argmin(r::AbstractRange)
+
+Ranges can have multiple minimal elements. In that case
+`argmin` will return a minimal index, but not necessarily the
+first one.
+"""
+function argmin(r::AbstractRange)
+    if isempty(r)
+        throw(ArgumentError("range must be non-empty"))
+    elseif step(r) > 0
+        firstindex(r)
+    else
+        lastindex(r)
+    end
+end
+
+"""
+    argmax(r::AbstractRange)
+
+Ranges can have multiple maximal elements. In that case
+`argmax` will return a maximal index, but not necessarily the
+first one.
+"""
+function argmax(r::AbstractRange)
+    if isempty(r)
+        throw(ArgumentError("range must be non-empty"))
+    elseif step(r) > 0
+        lastindex(r)
+    else
+        firstindex(r)
+    end
+end
+
 extrema(r::AbstractRange) = (minimum(r), maximum(r))
 
 # Ranges are immutable
@@ -614,16 +869,17 @@ copy(r::AbstractRange) = r
 
 ## iteration
 
-function iterate(r::Union{LinRange,StepRangeLen}, i::Int=1)
-    @_inline_meta
+function iterate(r::Union{StepRangeLen,LinRange}, i::Integer=zero(length(r)))
+    @inline
+    i += oneunit(i)
     length(r) < i && return nothing
-    unsafe_getindex(r, i), i + 1
+    unsafe_getindex(r, i), i
 end
 
 iterate(r::OrdinalRange) = isempty(r) ? nothing : (first(r), first(r))
 
 function iterate(r::OrdinalRange{T}, i) where {T}
-    @_inline_meta
+    @inline
     i == last(r) && return nothing
     next = convert(T, i + step(r))
     (next, next)
@@ -634,8 +890,9 @@ end
 _in_unit_range(v::UnitRange, val, i::Integer) = i > 0 && val <= v.stop && val >= v.start
 
 function getindex(v::UnitRange{T}, i::Integer) where T
-    @_inline_meta
-    val = convert(T, v.start + (i - 1))
+    @inline
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    val = convert(T, v.start + (i - oneunit(i)))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val
 end
@@ -644,21 +901,24 @@ const OverflowSafe = Union{Bool,Int8,Int16,Int32,Int64,Int128,
                            UInt8,UInt16,UInt32,UInt64,UInt128}
 
 function getindex(v::UnitRange{T}, i::Integer) where {T<:OverflowSafe}
-    @_inline_meta
-    val = v.start + (i - 1)
+    @inline
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    val = v.start + (i - oneunit(i))
     @boundscheck _in_unit_range(v, val, i) || throw_boundserror(v, i)
     val % T
 end
 
 function getindex(v::OneTo{T}, i::Integer) where T
-    @_inline_meta
+    @inline
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck ((i > 0) & (i <= v.stop)) || throw_boundserror(v, i)
     convert(T, i)
 end
 
 function getindex(v::AbstractRange{T}, i::Integer) where T
-    @_inline_meta
-    ret = convert(T, first(v) + (i - 1)*step_hp(v))
+    @inline
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    ret = convert(T, first(v) + (i - oneunit(i))*step_hp(v))
     ok = ifelse(step(v) > zero(step(v)),
                 (ret <= last(v)) & (ret >= first(v)),
                 (ret <= first(v)) & (ret >= last(v)))
@@ -667,83 +927,171 @@ function getindex(v::AbstractRange{T}, i::Integer) where T
 end
 
 function getindex(r::Union{StepRangeLen,LinRange}, i::Integer)
-    @_inline_meta
+    @inline
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     @boundscheck checkbounds(r, i)
     unsafe_getindex(r, i)
 end
 
 # This is separate to make it useful even when running with --check-bounds=yes
 function unsafe_getindex(r::StepRangeLen{T}, i::Integer) where T
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     u = i - r.offset
     T(r.ref + u*r.step)
 end
 
 function _getindex_hiprec(r::StepRangeLen, i::Integer)  # without rounding by T
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     u = i - r.offset
     r.ref + u*r.step
 end
 
 function unsafe_getindex(r::LinRange, i::Integer)
-    lerpi(i-1, r.lendiv, r.start, r.stop)
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
+    lerpi(i-oneunit(i), r.lendiv, r.start, r.stop)
 end
 
 function lerpi(j::Integer, d::Integer, a::T, b::T) where T
-    @_inline_meta
-    t = j/d
-    T((1-t)*a + t*b)
+    @inline
+    t = j/d # ∈ [0,1]
+    # compute approximately fma(t, b, -fma(t, a, a))
+    return T((1-t)*a + t*b)
 end
 
 getindex(r::AbstractRange, ::Colon) = copy(r)
 
-function getindex(r::AbstractUnitRange, s::AbstractUnitRange{<:Integer})
-    @_inline_meta
+function getindex(r::AbstractUnitRange, s::AbstractUnitRange{T}) where {T<:Integer}
+    @inline
     @boundscheck checkbounds(r, s)
-    f = first(r)
-    st = oftype(f, f + first(s)-1)
-    range(st, length=length(s))
+
+    if T === Bool
+        return range(first(s) ? first(r) : last(r), length = last(s))
+    else
+        f = first(r)
+        start = oftype(f, f + first(s) - firstindex(r))
+        len = length(s)
+        stop = oftype(f, start + (len - oneunit(len)))
+        return range(start, stop)
+    end
 end
 
 function getindex(r::OneTo{T}, s::OneTo) where T
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(r, s)
-    OneTo(T(s.stop))
+    return OneTo(T(s.stop))
 end
 
-function getindex(r::AbstractUnitRange, s::StepRange{<:Integer})
-    @_inline_meta
+function getindex(r::AbstractUnitRange, s::StepRange{T}) where {T<:Integer}
+    @inline
     @boundscheck checkbounds(r, s)
-    st = oftype(first(r), first(r) + s.start-1)
-    range(st, step=step(s), length=length(s))
+
+    if T === Bool
+        return range(first(s) ? first(r) : last(r), step=oneunit(eltype(r)), length=last(s))
+    else
+        f = first(r)
+        start = oftype(f, f + s.start - firstindex(r))
+        st = step(s)
+        len = length(s)
+        stop = oftype(f, start + (len - oneunit(len)) * st)
+        return range(start, stop; step=st)
+    end
 end
 
-function getindex(r::StepRange, s::AbstractRange{<:Integer})
-    @_inline_meta
+function getindex(r::StepRange, s::AbstractRange{T}) where {T<:Integer}
+    @inline
     @boundscheck checkbounds(r, s)
-    st = oftype(r.start, r.start + (first(s)-1)*step(r))
-    range(st, step=step(r)*step(s), length=length(s))
+
+    if T === Bool
+        if length(s) == 0
+            start, len = first(r), 0
+        elseif length(s) == 1
+            if first(s)
+                start, len = first(r), 1
+            else
+                start, len = first(r), 0
+            end
+        else # length(s) == 2
+            start, len = last(r), 1
+        end
+        return range(start, step=step(r); length=len)
+    else
+        f = r.start
+        fs = first(s)
+        st = r.step
+        start = oftype(f, f + (fs - oneunit(fs)) * st)
+        st = st * step(s)
+        len = length(s)
+        stop = oftype(f, start + (len - oneunit(len)) * st)
+        return range(start, stop; step=st)
+    end
 end
 
-function getindex(r::StepRangeLen{T}, s::OrdinalRange{<:Integer}) where {T}
-    @_inline_meta
+function getindex(r::StepRangeLen{T}, s::OrdinalRange{S}) where {T, S<:Integer}
+    @inline
     @boundscheck checkbounds(r, s)
-    # Find closest approach to offset by s
-    ind = LinearIndices(s)
-    offset = max(min(1 + round(Int, (r.offset - first(s))/step(s)), last(ind)), first(ind))
-    ref = _getindex_hiprec(r, first(s) + (offset-1)*step(s))
-    return StepRangeLen{T}(ref, r.step*step(s), length(s), offset)
+
+    len = length(s)
+    sstep = step_hp(s)
+    rstep = step_hp(r)
+    L = typeof(len)
+    if S === Bool
+        rstep *= one(sstep)
+        if len == 0
+            return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+        elseif len == 1
+            if first(s)
+                return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
+            else
+                return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+            end
+        else # len == 2
+            return StepRangeLen{T}(last(r), rstep, oneunit(L), oneunit(L))
+        end
+    else
+        # Find closest approach to offset by s
+        ind = LinearIndices(s)
+        offset = L(max(min(1 + round(L, (r.offset - first(s))/sstep), last(ind)), first(ind)))
+        ref = _getindex_hiprec(r, first(s) + (offset - oneunit(offset)) * sstep)
+        return StepRangeLen{T}(ref, rstep*sstep, len, offset)
+    end
 end
 
-function getindex(r::LinRange{T}, s::OrdinalRange{<:Integer}) where {T}
-    @_inline_meta
+function getindex(r::LinRange{T}, s::OrdinalRange{S}) where {T, S<:Integer}
+    @inline
     @boundscheck checkbounds(r, s)
-    vfirst = unsafe_getindex(r, first(s))
-    vlast  = unsafe_getindex(r, last(s))
-    return LinRange{T}(vfirst, vlast, length(s))
+
+    len = length(s)
+    L = typeof(len)
+    if S === Bool
+        if len == 0
+            return LinRange{T}(first(r), first(r), len)
+        elseif len == 1
+            if first(s)
+                return LinRange{T}(first(r), first(r), len)
+            else
+                return LinRange{T}(first(r), first(r), zero(L))
+            end
+        else # length(s) == 2
+            return LinRange{T}(last(r), last(r), oneunit(L))
+        end
+    else
+        vfirst = unsafe_getindex(r, first(s))
+        vlast  = unsafe_getindex(r, last(s))
+        return LinRange{T}(vfirst, vlast, len)
+    end
 end
 
 show(io::IO, r::AbstractRange) = print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
 show(io::IO, r::UnitRange) = print(io, repr(first(r)), ':', repr(last(r)))
 show(io::IO, r::OneTo) = print(io, "Base.OneTo(", r.stop, ")")
+function show(io::IO, r::StepRangeLen)
+    if step(r) != 0
+        print(io, repr(first(r)), ':', repr(step(r)), ':', repr(last(r)))
+    else
+        # ugly temporary printing, to avoid 0:0:0 etc.
+        print(io, "StepRangeLen(", repr(first(r)), ", ", repr(step(r)), ", ", repr(length(r)), ")")
+    end
+end
 
 function ==(r::T, s::T) where {T<:AbstractRange}
     isempty(r) && return isempty(s)
@@ -757,6 +1105,11 @@ function ==(r::OrdinalRange, s::OrdinalRange)
     (first(r) == first(s)) & (step(r) == step(s)) & (last(r) == last(s))
 end
 
+==(r::AbstractUnitRange, s::AbstractUnitRange) =
+    (isempty(r) & isempty(s)) | ((first(r) == first(s)) & (last(r) == last(s)))
+
+==(r::OneTo, s::OneTo) = last(r) == last(s)
+
 ==(r::T, s::T) where {T<:Union{StepRangeLen,LinRange}} =
     (isempty(r) & isempty(s)) | ((first(r) == first(s)) & (length(r) == length(s)) & (last(r) == last(s)))
 
@@ -855,6 +1208,16 @@ function intersect(r::StepRange, s::StepRange)
     step(r) < zero(step(r)) ? StepRange{T,S}(n, -a, m) : StepRange{T,S}(m, a, n)
 end
 
+function intersect(r1::AbstractRange, r2::AbstractRange)
+    # To iterate over the shorter range
+    length(r1) > length(r2) && return intersect(r2, r1)
+
+    r1 = unique(r1)
+    T = promote_eltype(r1, r2)
+
+    return T[x for x in r1 if x ∈ r2]
+end
+
 function intersect(r1::AbstractRange, r2::AbstractRange, r3::AbstractRange, r::AbstractRange...)
     i = intersect(intersect(r1, r2), r3)
     for t in r
@@ -888,32 +1251,36 @@ end
 issubset(r::OneTo, s::OneTo) = r.stop <= s.stop
 
 issubset(r::AbstractUnitRange{<:Integer}, s::AbstractUnitRange{<:Integer}) =
-    isempty(r) || first(r) >= first(s) && last(r) <= last(s)
+    isempty(r) || (first(r) >= first(s) && last(r) <= last(s))
 
 ## linear operations on ranges ##
 
--(r::OrdinalRange) = range(-first(r), step=-step(r), length=length(r))
--(r::StepRangeLen{T,R,S}) where {T,R,S} =
-    StepRangeLen{T,R,S}(-r.ref, -r.step, length(r), r.offset)
+-(r::OrdinalRange) = range(-first(r), step=negate(step(r)), length=length(r))
+-(r::StepRangeLen{T,R,S,L}) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(-r.ref, -r.step, r.len, r.offset)
 function -(r::LinRange)
     start = -r.start
     LinRange{typeof(start)}(start, -r.stop, length(r))
 end
 
-
 # promote eltype if at least one container wouldn't change, otherwise join container types.
-el_same(::Type{T}, a::Type{<:AbstractArray{T,n}}, b::Type{<:AbstractArray{T,n}}) where {T,n}   = a
+el_same(::Type{T}, a::Type{<:AbstractArray{T,n}}, b::Type{<:AbstractArray{T,n}}) where {T,n}   = a # we assume a === b
 el_same(::Type{T}, a::Type{<:AbstractArray{T,n}}, b::Type{<:AbstractArray{S,n}}) where {T,S,n} = a
 el_same(::Type{T}, a::Type{<:AbstractArray{S,n}}, b::Type{<:AbstractArray{T,n}}) where {T,S,n} = b
 el_same(::Type, a, b) = promote_typejoin(a, b)
 
+promote_result(::Type{<:AbstractArray}, ::Type{<:AbstractArray}, ::Type{T}, ::Type{S}) where {T,S} = (@inline; promote_type(T,S))
+promote_result(::Type{T}, ::Type{S}, ::Type{Bottom}, ::Type{Bottom}) where {T<:AbstractArray,S<:AbstractArray} = (@inline; promote_typejoin(T,S))
+# If no promote_rule is defined, both directions give Bottom. In that case use typejoin on the eltypes instead and give Array as the container.
+promote_result(::Type{<:AbstractArray{T,n}}, ::Type{<:AbstractArray{S,n}}, ::Type{Bottom}, ::Type{Bottom}) where {T,S,n} = (@inline; Array{promote_type(T,S),n})
+
 promote_rule(a::Type{UnitRange{T1}}, b::Type{UnitRange{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
+    el_same(promote_type(T1, T2), a, b)
 UnitRange{T}(r::UnitRange{T}) where {T<:Real} = r
 UnitRange{T}(r::UnitRange) where {T<:Real} = UnitRange{T}(r.start, r.stop)
 
 promote_rule(a::Type{OneTo{T1}}, b::Type{OneTo{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
+    el_same(promote_type(T1, T2), a, b)
 OneTo{T}(r::OneTo{T}) where {T<:Integer} = r
 OneTo{T}(r::OneTo) where {T<:Integer} = OneTo{T}(r.stop)
 
@@ -926,11 +1293,14 @@ AbstractUnitRange{T}(r::AbstractUnitRange{T}) where {T} = r
 AbstractUnitRange{T}(r::UnitRange) where {T} = UnitRange{T}(r)
 AbstractUnitRange{T}(r::OneTo) where {T} = OneTo{T}(r)
 
-promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) where {T1a,T1b,T2a,T2b} =
-    el_same(promote_type(T1a,T2a),
-            # el_same only operates on array element type, so just promote second type parameter
-            StepRange{T1a, promote_type(T1b,T2b)},
-            StepRange{T2a, promote_type(T1b,T2b)})
+OrdinalRange{T, S}(r::OrdinalRange) where {T, S} = StepRange{T, S}(r)
+OrdinalRange{T, T}(r::AbstractUnitRange) where {T} = AbstractUnitRange{T}(r)
+
+function promote_rule(::Type{StepRange{T1a,T1b}}, ::Type{StepRange{T2a,T2b}}) where {T1a,T1b,T2a,T2b}
+    Tb = promote_type(T1b, T2b)
+    # el_same only operates on array element type, so just promote second type parameter
+    el_same(promote_type(T1a, T2a), StepRange{T1a,Tb}, StepRange{T2a,Tb})
+end
 StepRange{T1,T2}(r::StepRange{T1,T2}) where {T1,T2} = r
 
 promote_rule(a::Type{StepRange{T1a,T1b}}, ::Type{UR}) where {T1a,T1b,UR<:AbstractUnitRange} =
@@ -941,35 +1311,38 @@ StepRange(r::AbstractUnitRange{T}) where {T} =
     StepRange{T,T}(first(r), step(r), last(r))
 (StepRange{T1,T2} where T1)(r::AbstractRange) where {T2} = StepRange{eltype(r),T2}(r)
 
-promote_rule(::Type{StepRangeLen{T1,R1,S1}},::Type{StepRangeLen{T2,R2,S2}}) where {T1,T2,R1,R2,S1,S2} =
-    el_same(promote_type(T1,T2),
-            StepRangeLen{T1,promote_type(R1,R2),promote_type(S1,S2)},
-            StepRangeLen{T2,promote_type(R1,R2),promote_type(S1,S2)})
-StepRangeLen{T,R,S}(r::StepRangeLen{T,R,S}) where {T,R,S} = r
-StepRangeLen{T,R,S}(r::StepRangeLen) where {T,R,S} =
-    StepRangeLen{T,R,S}(convert(R, r.ref), convert(S, r.step), length(r), r.offset)
+function promote_rule(::Type{StepRangeLen{T1,R1,S1,L1}},::Type{StepRangeLen{T2,R2,S2,L2}}) where {T1,T2,R1,R2,S1,S2,L1,L2}
+    R, S, L = promote_type(R1, R2), promote_type(S1, S2), promote_type(L1, L2)
+    el_same(promote_type(T1, T2), StepRangeLen{T1,R,S,L}, StepRangeLen{T2,R,S,L})
+end
+StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T,R,S,L} = r
+StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(convert(R, r.ref), convert(S, r.step), convert(L, r.len), convert(L, r.offset))
 StepRangeLen{T}(r::StepRangeLen) where {T} =
-    StepRangeLen(convert(T, r.ref), convert(T, r.step), length(r), r.offset)
+    StepRangeLen(convert(T, r.ref), convert(T, r.step), r.len, r.offset)
 
-promote_rule(a::Type{StepRangeLen{T,R,S}}, ::Type{OR}) where {T,R,S,OR<:AbstractRange} =
-    promote_rule(a, StepRangeLen{eltype(OR), eltype(OR), eltype(OR)})
-StepRangeLen{T,R,S}(r::AbstractRange) where {T,R,S} =
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+promote_rule(a::Type{StepRangeLen{T,R,S,L}}, ::Type{OR}) where {T,R,S,L,OR<:AbstractRange} =
+    promote_rule(a, StepRangeLen{eltype(OR), eltype(OR), eltype(OR), Int})
+StepRangeLen{T,R,S,L}(r::AbstractRange) where {T,R,S,L} =
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), length(r))
 StepRangeLen{T}(r::AbstractRange) where {T} =
     StepRangeLen(T(first(r)), T(step(r)), length(r))
 StepRangeLen(r::AbstractRange) = StepRangeLen{eltype(r)}(r)
 
-promote_rule(a::Type{LinRange{T1}}, b::Type{LinRange{T2}}) where {T1,T2} =
-    el_same(promote_type(T1,T2), a, b)
-LinRange{T}(r::LinRange{T}) where {T} = r
-LinRange{T}(r::AbstractRange) where {T} = LinRange{T}(first(r), last(r), length(r))
+function promote_rule(a::Type{LinRange{T1,L1}}, b::Type{LinRange{T2,L2}}) where {T1,T2,L1,L2}
+    L = promote_type(L1, L2)
+    el_same(promote_type(T1, T2), LinRange{T1,L}, LinRange{T2,L})
+end
+LinRange{T,L}(r::LinRange{T,L}) where {T,L} = r
+LinRange{T,L}(r::AbstractRange) where {T,L} = LinRange{T,L}(first(r), last(r), length(r))
+LinRange{T}(r::AbstractRange) where {T} = LinRange{T,typeof(length(r))}(first(r), last(r), length(r))
 LinRange(r::AbstractRange{T}) where {T} = LinRange{T}(r)
 
-promote_rule(a::Type{LinRange{T}}, ::Type{OR}) where {T,OR<:OrdinalRange} =
-    promote_rule(a, LinRange{eltype(OR)})
+promote_rule(a::Type{LinRange{T,L}}, ::Type{OR}) where {T,L,OR<:OrdinalRange} =
+    promote_rule(a, LinRange{eltype(OR),L})
 
-promote_rule(::Type{LinRange{L}}, b::Type{StepRangeLen{T,R,S}}) where {L,T,R,S} =
-    promote_rule(StepRangeLen{L,L,L}, b)
+promote_rule(::Type{LinRange{A,L}}, b::Type{StepRangeLen{T2,R2,S2,L2}}) where {A,L,T2,R2,S2,L2} =
+    promote_rule(StepRangeLen{A,A,A,L}, b)
 
 ## concatenation ##
 
@@ -990,15 +1363,15 @@ end
 Array{T,1}(r::AbstractRange{T}) where {T} = vcat(r)
 collect(r::AbstractRange) = vcat(r)
 
-_reverse(r::OrdinalRange, ::Colon) = (:)(last(r), -step(r), first(r))
+_reverse(r::OrdinalRange, ::Colon) = (:)(last(r), negate(step(r)), first(r))
 function _reverse(r::StepRangeLen, ::Colon)
     # If `r` is empty, `length(r) - r.offset + 1 will be nonpositive hence
     # invalid. As `reverse(r)` is also empty, any offset would work so we keep
     # `r.offset`
     offset = isempty(r) ? r.offset : length(r)-r.offset+1
-    StepRangeLen(r.ref, -r.step, length(r), offset)
+    return typeof(r)(r.ref, negate(r.step), length(r), offset)
 end
-_reverse(r::LinRange{T}, ::Colon) where {T} = LinRange{T}(r.stop, r.start, length(r))
+_reverse(r::LinRange{T}, ::Colon) where {T} = typeof(r)(r.stop, r.start, length(r))
 
 ## sorting ##
 
@@ -1021,7 +1394,9 @@ function sum(r::AbstractRange{<:Real})
 end
 
 function _in_range(x, r::AbstractRange)
-    if step(r) == 0
+    if !isfinite(x)
+        return false
+    elseif iszero(step(r))
         return !isempty(r) && first(r) == x
     else
         n = round(Integer, (x - first(r)) / step(r)) + 1
@@ -1036,11 +1411,13 @@ in(x::T, r::AbstractRange{T}) where {T} = _in_range(x, r)
 in(x::Integer, r::AbstractUnitRange{<:Integer}) = (first(r) <= x) & (x <= last(r))
 
 in(x::Real, r::AbstractRange{T}) where {T<:Integer} =
-    isinteger(x) && !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
-        (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)
+    isinteger(x) && !isempty(r) &&
+    (iszero(step(r)) ? x == first(r) : (x >= minimum(r) && x <= maximum(r) &&
+        (mod(convert(T,x),step(r))-mod(first(r),step(r)) == 0)))
 in(x::AbstractChar, r::AbstractRange{<:AbstractChar}) =
-    !isempty(r) && x >= minimum(r) && x <= maximum(r) &&
-        (mod(Int(x) - Int(first(r)), step(r)) == 0)
+    !isempty(r) &&
+    (iszero(step(r)) ? x == first(r) : (x >= minimum(r) && x <= maximum(r) &&
+        (mod(Int(x) - Int(first(r)), step(r)) == 0)))
 
 # Addition/subtraction of ranges
 
@@ -1050,7 +1427,7 @@ function _define_range_op(@nospecialize f)
             r1l = length(r1)
             (r1l == length(r2) ||
              throw(DimensionMismatch("argument dimensions must match: length of r1 is $r1l, length of r2 is $(length(r2))")))
-            range($f(first(r1), first(r2)), step=$f(step(r1), step(r2)), length=r1l)
+            StepRangeLen($f(first(r1), first(r2)), $f(step(r1), step(r2)), r1l)
         end
 
         function $f(r1::LinRange{T}, r2::LinRange{T}) where T
@@ -1086,14 +1463,14 @@ end
 Find `y` in the range `r` such that ``x ≡ y (mod n)``, where `n = length(r)`,
 i.e. `y = mod(x - first(r), n) + first(r)`.
 
-See also: [`mod1`](@ref).
+See also [`mod1`](@ref).
 
 # Examples
 ```jldoctest
-julia> mod(0, Base.OneTo(3))
+julia> mod(0, Base.OneTo(3))  # mod1(0, 3)
 3
 
-julia> mod(3, 0:2)
+julia> mod(3, 0:2)  # mod(3, 3)
 0
 ```
 
diff --git a/base/rational.jl b/base/rational.jl
index 1f7b0bea79ca41..9e887bdaefa91a 100644
--- a/base/rational.jl
+++ b/base/rational.jl
@@ -18,21 +18,22 @@ unsafe_rational(num::T, den::T) where {T<:Integer} = unsafe_rational(T, num, den
 unsafe_rational(num::Integer, den::Integer) = unsafe_rational(promote(num, den)...)
 
 @noinline __throw_rational_argerror_typemin(T) = throw(ArgumentError("invalid rational: denominator can't be typemin($T)"))
-function checked_den(num::T, den::T) where T<:Integer
+function checked_den(::Type{T}, num::T, den::T) where T<:Integer
     if signbit(den)
         den = -den
-        signbit(den) && __throw_rational_argerror_typemin(T)
+        signbit(den) && __throw_rational_argerror_typemin(typeof(den))
         num = -num
     end
     return unsafe_rational(T, num, den)
 end
+checked_den(num::T, den::T) where T<:Integer = checked_den(T, num, den)
 checked_den(num::Integer, den::Integer) = checked_den(promote(num, den)...)
 
 @noinline __throw_rational_argerror_zero(T) = throw(ArgumentError("invalid rational: zero($T)//zero($T)"))
 function Rational{T}(num::Integer, den::Integer) where T<:Integer
     iszero(den) && iszero(num) && __throw_rational_argerror_zero(T)
     num, den = divgcd(num, den)
-    return checked_den(T(num), T(den))
+    return checked_den(T, T(num), T(den))
 end
 
 Rational(n::T, d::T) where {T<:Integer} = Rational{T}(n, d)
@@ -215,6 +216,8 @@ function rationalize(::Type{T}, x::AbstractFloat, tol::Real) where T<:Integer
 end
 rationalize(::Type{T}, x::AbstractFloat; tol::Real = eps(x)) where {T<:Integer} = rationalize(T, x, tol)::Rational{T}
 rationalize(x::AbstractFloat; kvs...) = rationalize(Int, x; kvs...)
+rationalize(::Type{T}, x::Complex; kvs...) where {T<:Integer} = Complex(rationalize(T, x.re, kvs...)::Rational{T}, rationalize(T, x.im, kvs...)::Rational{T})
+rationalize(x::Complex; kvs...) = Complex(rationalize(Int, x.re, kvs...), rationalize(Int, x.im, kvs...))
 
 """
     numerator(x)
@@ -262,6 +265,7 @@ typemin(::Type{Rational{T}}) where {T<:Integer} = unsafe_rational(T, zero(T), on
 typemax(::Type{Rational{T}}) where {T<:Integer} = unsafe_rational(T, one(T), zero(T))
 
 isinteger(x::Rational) = x.den == 1
+ispow2(x::Rational) = ispow2(x.num) & ispow2(x.den)
 
 +(x::Rational) = unsafe_rational(+x.num, x.den)
 -(x::Rational) = unsafe_rational(-x.num, x.den)
@@ -277,13 +281,35 @@ function -(x::Rational{T}) where T<:Unsigned
     x
 end
 
-for (op,chop) in ((:+,:checked_add), (:-,:checked_sub), (:rem,:rem), (:mod,:mod))
+function +(x::Rational, y::Rational)
+    xp, yp = promote(x, y)::NTuple{2,Rational}
+    if isinf(x) && x == y
+        return xp
+    end
+    xd, yd = divgcd(promote(x.den, y.den)...)
+    Rational(checked_add(checked_mul(x.num,yd), checked_mul(y.num,xd)), checked_mul(x.den,yd))
+end
+
+function -(x::Rational, y::Rational)
+    xp, yp = promote(x, y)::NTuple{2,Rational}
+    if isinf(x) && x == -y
+        return xp
+    end
+    xd, yd = divgcd(promote(x.den, y.den)...)
+    Rational(checked_sub(checked_mul(x.num,yd), checked_mul(y.num,xd)), checked_mul(x.den,yd))
+end
+
+for (op,chop) in ((:rem,:rem), (:mod,:mod))
     @eval begin
         function ($op)(x::Rational, y::Rational)
             xd, yd = divgcd(promote(x.den, y.den)...)
             Rational(($chop)(checked_mul(x.num,yd), checked_mul(y.num,xd)), checked_mul(x.den,yd))
         end
+    end
+end
 
+for (op,chop) in ((:+,:checked_add), (:-,:checked_sub), (:rem,:rem), (:mod,:mod))
+    @eval begin
         function ($op)(x::Rational, y::Integer)
             unsafe_rational(($chop)(x.num, checked_mul(x.den, y)), x.den)
         end
@@ -485,3 +511,45 @@ function gcdx(x::Rational, y::Rational)
     end
     c, a, b
 end
+
+## streamlined hashing for smallish rational types ##
+
+decompose(x::Rational) = numerator(x), 0, denominator(x)
+function hash(x::Rational{<:BitInteger64}, h::UInt)
+    num, den = Base.numerator(x), Base.denominator(x)
+    den == 1 && return hash(num, h)
+    den == 0 && return hash(ifelse(num > 0, Inf, -Inf), h)
+    if isodd(den)
+        pow = trailing_zeros(num)
+        num >>= pow
+    else
+        pow = trailing_zeros(den)
+        den >>= pow
+        pow = -pow
+        if den == 1 && abs(num) < 9007199254740992
+            return hash(ldexp(Float64(num),pow),h)
+        end
+    end
+    h = hash_integer(den, h)
+    h = hash_integer(pow, h)
+    h = hash_integer(num, h)
+    return h
+end
+
+# These methods are only needed for performance. Since `first(r)` and `last(r)` have the
+# same denominator (because their difference is an integer), `length(r)` can be calulated
+# without calling `gcd`.
+function length(r::AbstractUnitRange{T}) where T<:Rational
+    @inline
+    f = first(r)
+    l = last(r)
+    return div(l.num - f.num + f.den, f.den)
+end
+function checked_length(r::AbstractUnitRange{T}) where T<:Rational
+    f = first(r)
+    l = last(r)
+    if isempty(r)
+        return f.num - f.num
+    end
+    return div(checked_add(checked_sub(l.num, f.num), f.den), f.den)
+end
diff --git a/base/reduce.jl b/base/reduce.jl
index 4a6f27a5d947a0..1f59c61ea5d5b0 100644
--- a/base/reduce.jl
+++ b/base/reduce.jl
@@ -88,6 +88,8 @@ Create a mapping reducing function `rf′(acc, x) = rf(acc, f(x))`.
 struct MappingRF{F, T}
     f::F
     rf::T
+    MappingRF(f::F, rf::T) where {F,T} = new{F,T}(f, rf)
+    MappingRF(::Type{f}, rf::T) where {f,T} = new{Type{f},T}(f, rf)
 end
 
 @inline (op::MappingRF)(acc, x) = op.rf(acc, op.f(x))
@@ -166,6 +168,8 @@ Like [`reduce`](@ref), but with guaranteed left associativity. If provided, the
 argument `init` will be used exactly once. In general, it will be necessary to provide
 `init` to work with empty collections.
 
+See also [`mapfoldl`](@ref), [`foldr`](@ref), [`accumulate`](@ref).
+
 # Examples
 ```jldoctest
 julia> foldl(=>, 1:4)
@@ -173,6 +177,9 @@ julia> foldl(=>, 1:4)
 
 julia> foldl(=>, 1:4; init=0)
 (((0 => 1) => 2) => 3) => 4
+
+julia> accumulate(=>, (1,2,3,4))
+(1, 1 => 2, (1 => 2) => 3, ((1 => 2) => 3) => 4)
 ```
 """
 foldl(op, itr; kw...) = mapfoldl(identity, op, itr; kw...)
@@ -235,7 +242,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
     if ifirst == ilast
         @inbounds a1 = A[ifirst]
         return mapreduce_first(f, op, a1)
-    elseif ifirst + blksize > ilast
+    elseif ilast - ifirst < blksize
         # sequential portion
         @inbounds a1 = A[ifirst]
         @inbounds a2 = A[ifirst+1]
@@ -247,7 +254,7 @@ foldr(op, itr; kw...) = mapfoldr(identity, op, itr; kw...)
         return v
     else
         # pairwise portion
-        imid = (ifirst + ilast) >> 1
+        imid = ifirst + (ilast - ifirst) >> 1
         v1 = mapreduce_impl(f, op, A, ifirst, imid, blksize)
         v2 = mapreduce_impl(f, op, A, imid+1, ilast, blksize)
         return op(v1, v2)
@@ -297,6 +304,9 @@ pairwise_blocksize(::typeof(abs2), ::typeof(+)) = 4096
 
 # handling empty arrays
 _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection is not allowed"))
+_empty_reduce_error(@nospecialize(f), @nospecialize(T::Type)) = throw(ArgumentError("""
+    reducing with $f over an empty collection of element type $T is not allowed.
+    You may be able to prevent this error by supplying an `init` value to the reducer."""))
 
 """
     Base.reduce_empty(op, T)
@@ -304,23 +314,32 @@ _empty_reduce_error() = throw(ArgumentError("reducing over an empty collection i
 The value to be returned when calling [`reduce`](@ref), [`foldl`](@ref) or [`foldr`](@ref)
 with reduction `op` over an empty array with element type of `T`.
 
-If not defined, this will throw an `ArgumentError`.
+This should only be defined in unambiguous cases; for example,
+
+```julia
+Base.reduce_empty(::typeof(+), ::Type{T}) where T = zero(T)
+```
+
+is justified (the sum of zero elements is zero), whereas
+`reduce_empty(::typeof(max), ::Type{Any})` is not (the maximum value of an empty collection
+is generally ambiguous, and especially so when the element type is unknown).
+
+As an alternative, consider supplying an `init` value to the reducer.
 """
-reduce_empty(op, ::Type{T}) where {T} = _empty_reduce_error()
-reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(+), ::Type{Union{}}) = _empty_reduce_error(+, Union{})
 reduce_empty(::typeof(+), ::Type{T}) where {T} = zero(T)
 reduce_empty(::typeof(+), ::Type{Bool}) = zero(Int)
-reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(*), ::Type{Union{}}) = _empty_reduce_error(*, Union{})
 reduce_empty(::typeof(*), ::Type{T}) where {T} = one(T)
 reduce_empty(::typeof(*), ::Type{<:AbstractChar}) = ""
 reduce_empty(::typeof(&), ::Type{Bool}) = true
 reduce_empty(::typeof(|), ::Type{Bool}) = false
 
-reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(add_sum), ::Type{Union{}}) = _empty_reduce_error(add_sum, Union{})
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T} = reduce_empty(+, T)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallSigned}  = zero(Int)
 reduce_empty(::typeof(add_sum), ::Type{T}) where {T<:SmallUnsigned} = zero(UInt)
-reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error()
+reduce_empty(::typeof(mul_prod), ::Type{Union{}}) = _empty_reduce_error(mul_prod, Union{})
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T} = reduce_empty(*, T)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallSigned}  = one(Int)
 reduce_empty(::typeof(mul_prod), ::Type{T}) where {T<:SmallUnsigned} = one(UInt)
@@ -335,11 +354,8 @@ reduce_empty(op::FlipArgs, ::Type{T}) where {T} = reduce_empty(op.f, T)
 
 The value to be returned when calling [`mapreduce`](@ref), [`mapfoldl`](@ref`) or
 [`mapfoldr`](@ref) with map `f` and reduction `op` over an empty array with element type
-of `T`.
-
-If not defined, this will throw an `ArgumentError`.
+of `T`. See [`Base.reduce_empty`](@ref) for more information.
 """
-mapreduce_empty(f, op, T) = _empty_reduce_error()
 mapreduce_empty(::typeof(identity), op, T) = reduce_empty(op, T)
 mapreduce_empty(::typeof(abs), op, T)      = abs(reduce_empty(op, T))
 mapreduce_empty(::typeof(abs2), op, T)     = abs2(reduce_empty(op, T))
@@ -353,7 +369,10 @@ mapreduce_empty_iter(f, op, itr, ItrEltype) =
 
 @inline reduce_empty_iter(op, itr) = reduce_empty_iter(op, itr, IteratorEltype(itr))
 @inline reduce_empty_iter(op, itr, ::HasEltype) = reduce_empty(op, eltype(itr))
-reduce_empty_iter(op, itr, ::EltypeUnknown) = _empty_reduce_error()
+reduce_empty_iter(op, itr, ::EltypeUnknown) = throw(ArgumentError("""
+    reducing over an empty collection of unknown element type is not allowed.
+    You may be able to prevent this error by supplying an `init` value to the reducer."""))
+
 
 # handling of single-element iterators
 """
@@ -516,6 +535,8 @@ for non-empty collections.
 !!! compat "Julia 1.6"
     Keyword argument `init` requires Julia 1.6 or later.
 
+See also: [`reduce`](@ref), [`mapreduce`](@ref), [`count`](@ref), [`union`](@ref).
+
 # Examples
 ```jldoctest
 julia> sum(1:20)
@@ -527,7 +548,7 @@ julia> sum(1:20; init = 0.0)
 """
 sum(a; kw...) = sum(identity, a; kw...)
 sum(a::AbstractArray{Bool}; kw...) =
-    kw.data === NamedTuple() ? count(a) : reduce(add_sum, a; kw...)
+    isempty(kw) ? count(a) : reduce(add_sum, a; kw...)
 
 ## prod
 """
@@ -570,6 +591,8 @@ for non-empty collections.
 !!! compat "Julia 1.6"
     Keyword argument `init` requires Julia 1.6 or later.
 
+See also: [`reduce`](@ref), [`cumprod`](@ref), [`any`](@ref).
+
 # Examples
 ```jldoctest
 julia> prod(1:5)
@@ -581,7 +604,7 @@ julia> prod(1:5; init = 1.0)
 """
 prod(a; kw...) = mapreduce(identity, mul_prod, a; kw...)
 
-## maximum & minimum
+## maximum, minimum, & extrema
 _fast(::typeof(min),x,y) = min(x,y)
 _fast(::typeof(max),x,y) = max(x,y)
 function _fast(::typeof(max), x::AbstractFloat, y::AbstractFloat)
@@ -611,11 +634,6 @@ function mapreduce_impl(f, op::Union{typeof(max), typeof(min)},
     start = first + 1
     simdstop  = start + chunk_len - 4
     while simdstop <= last - 3
-        # short circuit in case of NaN or missing
-        (v1 == v1) === true || return v1
-        (v2 == v2) === true || return v2
-        (v3 == v3) === true || return v3
-        (v4 == v4) === true || return v4
         @inbounds for i in start:4:simdstop
             v1 = _fast(op, v1, f(A[i+0]))
             v2 = _fast(op, v2, f(A[i+1]))
@@ -720,7 +738,7 @@ julia> maximum([1,2,3])
 3
 
 julia> maximum(())
-ERROR: ArgumentError: reducing over an empty collection is not allowed
+ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -752,7 +770,7 @@ julia> minimum([1,2,3])
 1
 
 julia> minimum([])
-ERROR: ArgumentError: reducing over an empty collection is not allowed
+ERROR: MethodError: reducing over an empty collection is not allowed; consider supplying `init` to the reducer
 Stacktrace:
 [...]
 
@@ -762,18 +780,317 @@ Inf
 """
 minimum(a; kw...) = mapreduce(identity, min, a; kw...)
 
+"""
+    extrema(itr; [init]) -> (mn, mx)
+
+Compute both the minimum `mn` and maximum `mx` element in a single pass, and return them
+as a 2-tuple.
+
+The value returned for empty `itr` can be specified by `init`. It must be a 2-tuple whose
+first and second elements are neutral elements for `min` and `max` respectively
+(i.e. which are greater/less than or equal to any other element). As a consequence, when
+`itr` is empty the returned `(mn, mx)` tuple will satisfy `mn ≥ mx`. When `init` is
+specified it may be used even for non-empty `itr`.
+
+!!! compat "Julia 1.8"
+    Keyword argument `init` requires Julia 1.8 or later.
+
+# Examples
+```jldoctest
+julia> extrema(2:10)
+(2, 10)
+
+julia> extrema([9,pi,4.5])
+(3.141592653589793, 9.0)
+
+julia> extrema([]; init = (Inf, -Inf))
+(Inf, -Inf)
+```
+"""
+extrema(itr; kw...) = extrema(identity, itr; kw...)
+
+"""
+    extrema(f, itr; [init]) -> (mn, mx)
+
+Compute both the minimum `mn` and maximum `mx` of `f` applied to each element in `itr` and
+return them as a 2-tuple. Only one pass is made over `itr`.
+
+The value returned for empty `itr` can be specified by `init`. It must be a 2-tuple whose
+first and second elements are neutral elements for `min` and `max` respectively
+(i.e. which are greater/less than or equal to any other element). It is used for non-empty
+collections. Note: it implies that, for empty `itr`, the returned value `(mn, mx)` satisfies
+`mn ≥ mx` even though for non-empty `itr` it  satisfies `mn ≤ mx`.  This is a "paradoxical"
+but yet expected result.
+
+!!! compat "Julia 1.2"
+    This method requires Julia 1.2 or later.
+
+!!! compat "Julia 1.8"
+    Keyword argument `init` requires Julia 1.8 or later.
+
+# Examples
+```jldoctest
+julia> extrema(sin, 0:π)
+(0.0, 0.9092974268256817)
+
+julia> extrema(sin, Real[]; init = (1.0, -1.0))  # good, since -1 ≤ sin(::Real) ≤ 1
+(1.0, -1.0)
+```
+"""
+extrema(f, itr; kw...) = mapreduce(ExtremaMap(f), _extrema_rf, itr; kw...)
+
+# Not using closure since `extrema(type, itr)` is a very likely use-case and it's better
+# to avoid type-instability (#23618).
+struct ExtremaMap{F} <: Function
+    f::F
+end
+ExtremaMap(::Type{T}) where {T} = ExtremaMap{Type{T}}(T)
+@inline (f::ExtremaMap)(x) = (y = f.f(x); (y, y))
+
+# TODO: optimize for inputs <: AbstractFloat
+@inline _extrema_rf((min1, max1), (min2, max2)) = (min(min1, min2), max(max1, max2))
+
+## findmax, findmin, argmax & argmin
+
+"""
+    findmax(f, domain) -> (f(x), index)
+
+Returns a pair of a value in the codomain (outputs of `f`) and the index of
+the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is maximised.
+If there are multiple maximal points, then the first one will be returned.
+
+`domain` must be a non-empty iterable.
+
+Values are compared with `isless`.
+
+!!! compat "Julia 1.7"
+    This method requires Julia 1.7 or later.
+
+# Examples
+
+```jldoctest
+julia> findmax(identity, 5:9)
+(9, 5)
+
+julia> findmax(-, 1:10)
+(-1, 1)
+
+julia> findmax(first, [(1, :a), (3, :b), (3, :c)])
+(3, 2)
+
+julia> findmax(cos, 0:π/2:2π)
+(1.0, 1)
+```
+"""
+findmax(f, domain) = mapfoldl( ((k, v),) -> (f(v), k), _rf_findmax, pairs(domain) )
+_rf_findmax((fm, im), (fx, ix)) = isless(fm, fx) ? (fx, ix) : (fm, im)
+
+"""
+    findmax(itr) -> (x, index)
+
+Return the maximal element of the collection `itr` and its index or key.
+If there are multiple maximal elements, then the first one will be returned.
+Values are compared with `isless`.
+
+See also: [`findmin`](@ref), [`argmax`](@ref), [`maximum`](@ref).
+
+# Examples
+
+```jldoctest
+julia> findmax([8, 0.1, -9, pi])
+(8.0, 1)
+
+julia> findmax([1, 7, 7, 6])
+(7, 2)
+
+julia> findmax([1, 7, 7, NaN])
+(NaN, 4)
+```
+"""
+findmax(itr) = _findmax(itr, :)
+_findmax(a, ::Colon) = findmax(identity, a)
+
+"""
+    findmin(f, domain) -> (f(x), index)
+
+Returns a pair of a value in the codomain (outputs of `f`) and the index of
+the corresponding value in the `domain` (inputs to `f`) such that `f(x)` is minimised.
+If there are multiple minimal points, then the first one will be returned.
+
+`domain` must be a non-empty iterable.
+
+`NaN` is treated as less than all other values except `missing`.
+
+!!! compat "Julia 1.7"
+    This method requires Julia 1.7 or later.
+
+# Examples
+
+```jldoctest
+julia> findmin(identity, 5:9)
+(5, 1)
+
+julia> findmin(-, 1:10)
+(-10, 10)
+
+julia> findmin(first, [(2, :a), (2, :b), (3, :c)])
+(2, 1)
+
+julia> findmin(cos, 0:π/2:2π)
+(-1.0, 3)
+```
+
+"""
+findmin(f, domain) = mapfoldl( ((k, v),) -> (f(v), k), _rf_findmin, pairs(domain) )
+_rf_findmin((fm, im), (fx, ix)) = isgreater(fm, fx) ? (fx, ix) : (fm, im)
+
+"""
+    findmin(itr) -> (x, index)
+
+Return the minimal element of the collection `itr` and its index or key.
+If there are multiple minimal elements, then the first one will be returned.
+`NaN` is treated as less than all other values except `missing`.
+
+See also: [`findmax`](@ref), [`argmin`](@ref), [`minimum`](@ref).
+
+# Examples
+
+```jldoctest
+julia> findmin([8, 0.1, -9, pi])
+(-9.0, 3)
+
+julia> findmin([1, 7, 7, 6])
+(1, 1)
+
+julia> findmin([1, 7, 7, NaN])
+(NaN, 4)
+```
+"""
+findmin(itr) = _findmin(itr, :)
+_findmin(a, ::Colon) = findmin(identity, a)
+
+"""
+    argmax(f, domain)
+
+Return a value `x` in the domain of `f` for which `f(x)` is maximised.
+If there are multiple maximal values for `f(x)` then the first one will be found.
+
+`domain` must be a non-empty iterable.
+
+Values are compared with `isless`.
+
+!!! compat "Julia 1.7"
+    This method requires Julia 1.7 or later.
+
+See also [`argmin`](@ref), [`findmax`](@ref).
+
+# Examples
+```jldoctest
+julia> argmax(abs, -10:5)
+-10
+
+julia> argmax(cos, 0:π/2:2π)
+0.0
+```
+"""
+argmax(f, domain) = mapfoldl(x -> (f(x), x), _rf_findmax, domain)[2]
+
+"""
+    argmax(itr)
+
+Return the index or key of the maximal element in a collection.
+If there are multiple maximal elements, then the first one will be returned.
+
+The collection must not be empty.
+
+Values are compared with `isless`.
+
+See also: [`argmin`](@ref), [`findmax`](@ref).
+
+# Examples
+```jldoctest
+julia> argmax([8, 0.1, -9, pi])
+1
+
+julia> argmax([1, 7, 7, 6])
+2
+
+julia> argmax([1, 7, 7, NaN])
+4
+```
+"""
+argmax(itr) = findmax(itr)[2]
+
+"""
+    argmin(f, domain)
+
+Return a value `x` in the domain of `f` for which `f(x)` is minimised.
+If there are multiple minimal values for `f(x)` then the first one will be found.
+
+`domain` must be a non-empty iterable.
+
+`NaN` is treated as less than all other values except `missing`.
+
+!!! compat "Julia 1.7"
+    This method requires Julia 1.7 or later.
+
+See also [`argmax`](@ref), [`findmin`](@ref).
+
+# Examples
+```jldoctest
+julia> argmin(sign, -10:5)
+-10
+
+julia> argmin(x -> -x^3 + x^2 - 10, -5:5)
+5
+
+julia> argmin(acos, 0:0.1:1)
+1.0
+```
+"""
+argmin(f, domain) = mapfoldl(x -> (f(x), x), _rf_findmin, domain)[2]
+
+"""
+    argmin(itr)
+
+Return the index or key of the minimal element in a collection.
+If there are multiple minimal elements, then the first one will be returned.
+
+The collection must not be empty.
+
+`NaN` is treated as less than all other values except `missing`.
+
+See also: [`argmax`](@ref), [`findmin`](@ref).
+
+# Examples
+```jldoctest
+julia> argmin([8, 0.1, -9, pi])
+3
+
+julia> argmin([7, 1, 1, 6])
+2
+
+julia> argmin([7, 1, 1, NaN])
+4
+```
+"""
+argmin(itr) = findmin(itr)[2]
+
 ## all & any
 
 """
     any(itr) -> Bool
 
 Test whether any elements of a boolean collection are `true`, returning `true` as
-soon as the first `true` value in `itr` is encountered (short-circuiting).
+soon as the first `true` value in `itr` is encountered (short-circuiting). To
+short-circuit on `false`, use [`all`](@ref).
 
 If the input contains [`missing`](@ref) values, return `missing` if all non-missing
 values are `false` (or equivalently, if the input contains no `true` value), following
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
 
+See also: [`all`](@ref), [`count`](@ref), [`sum`](@ref), [`|`](@ref), , [`||`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [true,false,false,true]
@@ -803,12 +1120,15 @@ any(itr) = any(identity, itr)
     all(itr) -> Bool
 
 Test whether all elements of a boolean collection are `true`, returning `false` as
-soon as the first `false` value in `itr` is encountered (short-circuiting).
+soon as the first `false` value in `itr` is encountered (short-circuiting). To
+short-circuit on `true`, use [`any`](@ref).
 
 If the input contains [`missing`](@ref) values, return `missing` if all non-missing
 values are `true` (or equivalently, if the input contains no `false` value), following
 [three-valued logic](https://en.wikipedia.org/wiki/Three-valued_logic).
 
+See also: [`all!`](@ref), [`any`](@ref), [`count`](@ref), [`&`](@ref), , [`&&`](@ref), [`allunique`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [true,false,false,true]
@@ -840,7 +1160,7 @@ all(itr) = all(identity, itr)
 
 Determine whether predicate `p` returns `true` for any elements of `itr`, returning
 `true` as soon as the first item in `itr` for which `p` returns `true` is encountered
-(short-circuiting).
+(short-circuiting). To short-circuit on `false`, use [`all`](@ref).
 
 If the input contains [`missing`](@ref) values, return `missing` if all non-missing
 values are `false` (or equivalently, if the input contains no `true` value), following
@@ -883,12 +1203,33 @@ function _any(f, itr, ::Colon)
     return anymissing ? missing : false
 end
 
+# Specialized versions of any(f, ::Tuple), avoiding type instabilities for small tuples
+# containing mixed types.
+# We fall back to the for loop implementation all elements have the same type or
+# if the tuple is too large.
+any(f, itr::NTuple) = _any(f, itr, :)  # case of homogeneous tuple
+function any(f, itr::Tuple)            # case of tuple with mixed types
+    length(itr) > 32 && return _any(f, itr, :)
+    _any_tuple(f, false, itr...)
+end
+
+@inline function _any_tuple(f, anymissing, x, rest...)
+    v = f(x)
+    if ismissing(v)
+        anymissing = true
+    elseif v
+        return true
+    end
+    return _any_tuple(f, anymissing, rest...)
+end
+@inline _any_tuple(f, anymissing) = anymissing ? missing : false
+
 """
     all(p, itr) -> Bool
 
 Determine whether predicate `p` returns `true` for all elements of `itr`, returning
 `false` as soon as the first item in `itr` for which `p` returns `false` is encountered
-(short-circuiting).
+(short-circuiting). To short-circuit on `true`, use [`any`](@ref).
 
 If the input contains [`missing`](@ref) values, return `missing` if all non-missing
 values are `true` (or equivalently, if the input contains no `false` value), following
@@ -933,17 +1274,44 @@ function _all(f, itr, ::Colon)
     return anymissing ? missing : true
 end
 
+# Specialized versions of all(f, ::Tuple), avoiding type instabilities for small tuples
+# containing mixed types. This is similar to any(f, ::Tuple) defined above.
+all(f, itr::NTuple) = _all(f, itr, :)
+function all(f, itr::Tuple)
+    length(itr) > 32 && return _all(f, itr, :)
+    _all_tuple(f, false, itr...)
+end
+
+@inline function _all_tuple(f, anymissing, x, rest...)
+    v = f(x)
+    if ismissing(v)
+        anymissing = true
+    # this syntax allows throwing a TypeError for non-Bool, for consistency with any
+    elseif v
+        nothing
+    else
+        return false
+    end
+    return _all_tuple(f, anymissing, rest...)
+end
+@inline _all_tuple(f, anymissing) = anymissing ? missing : true
+
 ## count
 
 _bool(f) = x->f(x)::Bool
 
 """
-    count(p, itr) -> Integer
-    count(itr) -> Integer
+    count([f=identity,] itr; init=0) -> Integer
+
+Count the number of elements in `itr` for which the function `f` returns `true`.
+If `f` is omitted, count the number of `true` elements in `itr` (which
+should be a collection of boolean values). `init` optionally specifies the value
+to start counting from and therefore also determines the output type.
 
-Count the number of elements in `itr` for which predicate `p` returns `true`.
-If `p` is omitted, counts the number of `true` elements in `itr` (which
-should be a collection of boolean values).
+!!! compat "Julia 1.6"
+    `init` keyword was added in Julia 1.6.
+
+See also: [`any`](@ref), [`sum`](@ref).
 
 # Examples
 ```jldoctest
@@ -952,32 +1320,37 @@ julia> count(i->(4<=i<=6), [2,3,4,5,6])
 
 julia> count([true, false, true, true])
 3
+
+julia> count(>(3), 1:7, init=0x03)
+0x07
 ```
 """
-count(itr) = count(identity, itr)
+count(itr; init=0) = count(identity, itr; init)
 
-count(f, itr) = _simple_count(f, itr)
+count(f, itr; init=0) = _simple_count(f, itr, init)
 
-function _simple_count(pred, itr)
-    n = 0
-    for x in itr
-        n += pred(x)::Bool
+_simple_count(pred, itr, init) = _simple_count_helper(Generator(pred, itr), init)
+
+function _simple_count_helper(g, init::T) where {T}
+    n::T = init
+    for x in g
+        n += x::Bool
     end
     return n
 end
 
-function count(::typeof(identity), x::Array{Bool})
-    n = 0
+function _simple_count(::typeof(identity), x::Array{Bool}, init::T=0) where {T}
+    n::T = init
     chunks = length(x) ÷ sizeof(UInt)
     mask = 0x0101010101010101 % UInt
     GC.@preserve x begin
         ptr = Ptr{UInt}(pointer(x))
         for i in 1:chunks
-            n += count_ones(unsafe_load(ptr, i) & mask)
+            n = (n + count_ones(unsafe_load(ptr, i) & mask)) % T
         end
     end
     for i in sizeof(UInt)*chunks+1:length(x)
-        n += x[i]
+        n = (n + x[i]) % T
     end
     return n
 end
diff --git a/base/reducedim.jl b/base/reducedim.jl
index c889392ececd70..4ccf826df5865e 100644
--- a/base/reducedim.jl
+++ b/base/reducedim.jl
@@ -3,7 +3,7 @@
 ## Functions to compute the reduced shape
 
 # for reductions that expand 0 dims to 1
-reduced_index(i::OneTo) = OneTo(1)
+reduced_index(i::OneTo{T}) where {T} = OneTo(one(T))
 reduced_index(i::Union{Slice, IdentityUnitRange}) = oftype(i, first(i):first(i))
 reduced_index(i::AbstractUnitRange) =
     throw(ArgumentError(
@@ -44,7 +44,7 @@ function reduced_indices0(inds::Indices{N}, d::Int) where N
 end
 
 function reduced_indices(inds::Indices{N}, region) where N
-    rinds = [inds...]
+    rinds = collect(inds)
     for i in region
         isa(i, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
         d = Int(i)
@@ -58,7 +58,7 @@ function reduced_indices(inds::Indices{N}, region) where N
 end
 
 function reduced_indices0(inds::Indices{N}, region) where N
-    rinds = [inds...]
+    rinds = collect(inds)
     for i in region
         isa(i, Integer) || throw(ArgumentError("reduced dimension(s) must be integers"))
         d = Int(i)
@@ -77,15 +77,14 @@ end
 ## initialization
 # initarray! is only called by sum!, prod!, etc.
 for (Op, initfun) in ((:(typeof(add_sum)), :zero), (:(typeof(mul_prod)), :one))
-    @eval initarray!(a::AbstractArray{T}, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
+    @eval initarray!(a::AbstractArray{T}, ::Any, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && fill!(a, $(initfun)(T)); a)
 end
 
-for Op in (:(typeof(max)), :(typeof(min)))
-    @eval initarray!(a::AbstractArray{T}, ::$(Op), init::Bool, src::AbstractArray) where {T} = (init && copyfirst!(a, src); a)
-end
+initarray!(a::AbstractArray{T}, f, ::Union{typeof(min),typeof(max),typeof(_extrema_rf)},
+    init::Bool, src::AbstractArray) where {T} = (init && mapfirst!(f, a, src); a)
 
 for (Op, initval) in ((:(typeof(&)), true), (:(typeof(|)), false))
-    @eval initarray!(a::AbstractArray, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
+    @eval initarray!(a::AbstractArray, ::Any, ::$(Op), init::Bool, src::AbstractArray) = (init && fill!(a, $initval); a)
 end
 
 # reducedim_initarray is called by
@@ -125,7 +124,7 @@ function _reducedim_init(f, op, fv, fop, A, region)
 end
 
 # initialization when computing minima and maxima requires a little care
-for (f1, f2, initval) in ((:min, :max, :Inf), (:max, :min, :(-Inf)))
+for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min, :(-Inf), :typemin))
     @eval function reducedim_init(f, op::typeof($f1), A::AbstractArray, region)
         # First compute the reduce indices. This will throw an ArgumentError
         # if any region is invalid
@@ -139,20 +138,68 @@ for (f1, f2, initval) in ((:min, :max, :Inf), (:max, :min, :(-Inf)))
 
         if isempty(A1)
             # If the slice is empty just return non-view version as the initial array
-            return copy(A1)
+            return map(f, A1)
         else
             # otherwise use the min/max of the first slice as initial value
             v0 = mapreduce(f, $f2, A1)
 
-            # but NaNs need to be avoided as initial values
-            v0 = v0 != v0 ? typeof(v0)($initval) : v0
-
             T = _realtype(f, promote_union(eltype(A)))
             Tr = v0 isa T ? T : typeof(v0)
+
+            # but NaNs and missing need to be avoided as initial values
+            if v0 isa Number && isnan(v0)
+                # v0 is NaN
+                v0 = oftype(v0, $initval)
+            elseif isunordered(v0)
+                # v0 is missing or a third-party unordered value
+                Tnm = nonmissingtype(Tr)
+                # TODO: Some types, like BigInt, don't support typemin/typemax.
+                # So a Matrix{Union{BigInt, Missing}} can still error here.
+                v0 = $typeextreme(Tnm)
+            end
+            # v0 may have changed type.
+            Tr = v0 isa T ? T : typeof(v0)
+
             return reducedim_initarray(A, region, v0, Tr)
         end
     end
 end
+
+function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray, region)
+    # First compute the reduce indices. This will throw an ArgumentError
+    # if any region is invalid
+    ri = reduced_indices(A, region)
+
+    # Next, throw if reduction is over a region with length zero
+    any(i -> isempty(axes(A, i)), region) && _empty_reduce_error()
+
+    # Make a view of the first slice of the region
+    A1 = view(A, ri...)
+
+    isempty(A1) && return map(f, A1)
+    # use the max/min of the first slice as initial value for non-empty cases
+    v0 = reverse(mapreduce(f, op, A1)) # turn minmax to maxmin
+
+    T = _realtype(f.f, promote_union(eltype(A)))
+    Tmin = v0[1] isa T ? T : typeof(v0[1])
+    Tmax = v0[2] isa T ? T : typeof(v0[2])
+
+    # but NaNs and missing need to be avoided as initial values
+    if v0[1] isa Number && isnan(v0[1])
+        v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
+    elseif isunordered(v0[1])
+        # v0 is missing or a third-party unordered value
+        # TODO: Some types, like BigInt, don't support typemin/typemax.
+        # So a Matrix{Union{BigInt, Missing}} can still error here.
+        v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax))
+    end
+    # v0 may have changed type.
+    Tmin = v0[1] isa T ? T : typeof(v0[1])
+    Tmax = v0[2] isa T ? T : typeof(v0[2])
+
+    return reducedim_initarray(A, region, v0, Tuple{Tmin,Tmax})
+end
+
 reducedim_init(f::Union{typeof(abs),typeof(abs2)}, op::typeof(max), A::AbstractArray{T}, region) where {T} =
     reducedim_initarray(A, region, zero(f(zero(T))), _realtype(f, T))
 
@@ -181,7 +228,7 @@ end
 
 has_fast_linear_indexing(a::AbstractArrayOrBroadcasted) = false
 has_fast_linear_indexing(a::Array) = true
-has_fast_linear_indexing(::Number) = true  # for Broadcasted
+has_fast_linear_indexing(::Union{Number,Ref,AbstractChar}) = true  # 0d objects, for Broadcasted
 has_fast_linear_indexing(bc::Broadcast.Broadcasted) =
     all(has_fast_linear_indexing, bc.args)
 
@@ -283,7 +330,7 @@ reducedim!(op, R::AbstractArray{RT}, A::AbstractArrayOrBroadcasted) where {RT} =
 """
     mapreduce(f, op, A::AbstractArray...; dims=:, [init])
 
-Evaluates to the same as `reduce(op, map(f, A); dims=dims, init=init)`, but is generally
+Evaluates to the same as `reduce(op, map(f, A...); dims=dims, init=init)`, but is generally
 faster because the intermediate array is avoided.
 
 !!! compat "Julia 1.2"
@@ -369,6 +416,9 @@ dimensions.
 !!! compat "Julia 1.5"
     `dims` keyword was added in Julia 1.5.
 
+!!! compat "Julia 1.6"
+    `init` keyword was added in Julia 1.6.
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -386,11 +436,11 @@ julia> count(<=(2), A, dims=2)
  0
 ```
 """
-count(A::AbstractArrayOrBroadcasted; dims=:) = count(identity, A, dims=dims)
-count(f, A::AbstractArrayOrBroadcasted; dims=:) = _count(f, A, dims)
+count(A::AbstractArrayOrBroadcasted; dims=:, init=0) = count(identity, A; dims, init)
+count(f, A::AbstractArrayOrBroadcasted; dims=:, init=0) = _count(f, A, dims, init)
 
-_count(f, A::AbstractArrayOrBroadcasted, dims::Colon) = _simple_count(f, A)
-_count(f, A::AbstractArrayOrBroadcasted, dims) = mapreduce(_bool(f), add_sum, A, dims=dims, init=0)
+_count(f, A::AbstractArrayOrBroadcasted, dims::Colon, init) = _simple_count(f, A, init)
+_count(f, A::AbstractArrayOrBroadcasted, dims, init) = mapreduce(_bool(f), add_sum, A; dims, init)
 
 """
     count!([f=identity,] r, A)
@@ -420,7 +470,7 @@ julia> count!(<=(2), [1; 1], A)
 """
 count!(r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) = count!(identity, r, A; init=init)
 count!(f, r::AbstractArray, A::AbstractArrayOrBroadcasted; init::Bool=true) =
-    mapreducedim!(_bool(f), add_sum, initarray!(r, add_sum, init, A), A)
+    mapreducedim!(_bool(f), add_sum, initarray!(r, f, add_sum, init, A), A)
 
 """
     sum(A::AbstractArray; dims)
@@ -575,6 +625,8 @@ Compute the maximum value of an array over the given dimensions. See also the
 [`max(a,b)`](@ref) function to take the maximum of two or more arguments,
 which can be applied elementwise to arrays via `max.(a,b)`.
 
+See also: [`maximum!`](@ref), [`extrema`](@ref), [`findmax`](@ref), [`argmax`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -597,7 +649,7 @@ maximum(A::AbstractArray; dims)
 """
     maximum(f, A::AbstractArray; dims)
 
-Compute the maximum value from of calling the function `f` on each element of an array over the given
+Compute the maximum value by calling the function `f` on each element of an array over the given
 dimensions.
 
 # Examples
@@ -650,6 +702,8 @@ Compute the minimum value of an array over the given dimensions. See also the
 [`min(a,b)`](@ref) function to take the minimum of two or more arguments,
 which can be applied elementwise to arrays via `min.(a,b)`.
 
+See also: [`minimum!`](@ref), [`extrema`](@ref), [`findmin`](@ref), [`argmin`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2; 3 4]
@@ -672,7 +726,7 @@ minimum(A::AbstractArray; dims)
 """
     minimum(f, A::AbstractArray; dims)
 
-Compute the minimum value from of calling the function `f` on each element of an array over the given
+Compute the minimum value by calling the function `f` on each element of an array over the given
 dimensions.
 
 # Examples
@@ -718,6 +772,74 @@ julia> minimum!([1 1], A)
 """
 minimum!(r, A)
 
+"""
+    extrema(A::AbstractArray; dims) -> Array{Tuple}
+
+Compute the minimum and maximum elements of an array over the given dimensions.
+
+See also: [`minimum`](@ref), [`maximum`](@ref), [`extrema!`](@ref).
+
+# Examples
+```jldoctest
+julia> A = reshape(Vector(1:2:16), (2,2,2))
+2×2×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  5
+ 3  7
+
+[:, :, 2] =
+  9  13
+ 11  15
+
+julia> extrema(A, dims = (1,2))
+1×1×2 Array{Tuple{Int64, Int64}, 3}:
+[:, :, 1] =
+ (1, 7)
+
+[:, :, 2] =
+ (9, 15)
+```
+"""
+extrema(A::AbstractArray; dims)
+
+"""
+    extrema(f, A::AbstractArray; dims) -> Array{Tuple}
+
+Compute the minimum and maximum of `f` applied to each element in the given dimensions
+of `A`.
+
+!!! compat "Julia 1.2"
+    This method requires Julia 1.2 or later.
+"""
+extrema(f, A::AbstractArray; dims)
+
+"""
+    extrema!(r, A)
+
+Compute the minimum and maximum value of `A` over the singleton dimensions of `r`, and write results to `r`.
+
+!!! compat "Julia 1.8"
+    This method requires Julia 1.8 or later.
+
+# Examples
+```jldoctest
+julia> A = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> extrema!([(1, 1); (1, 1)], A)
+2-element Vector{Tuple{Int64, Int64}}:
+ (1, 2)
+ (3, 4)
+
+julia> extrema!([(1, 1);; (1, 1)], A)
+1×2 Matrix{Tuple{Int64, Int64}}:
+ (1, 3)  (2, 4)
+```
+"""
+extrema!(r, A)
+
 """
     all(A; dims)
 
@@ -745,7 +867,7 @@ all(A::AbstractArray; dims)
 """
     all(p, A; dims)
 
-Determine whether predicate p returns true for all elements along the given dimensions of an array.
+Determine whether predicate `p` returns `true` for all elements along the given dimensions of an array.
 
 # Examples
 ```jldoctest
@@ -817,7 +939,7 @@ any(::AbstractArray; dims)
 """
     any(p, A; dims)
 
-Determine whether predicate p returns true for any elements along the given dimensions of an array.
+Determine whether predicate `p` returns `true` for any elements along the given dimensions of an array.
 
 # Examples
 ```jldoctest
@@ -864,7 +986,9 @@ julia> any!([1 1], A)
 any!(r, A)
 
 for (fname, _fname, op) in [(:sum,     :_sum,     :add_sum), (:prod,    :_prod,    :mul_prod),
-                            (:maximum, :_maximum, :max),     (:minimum, :_minimum, :min)]
+                            (:maximum, :_maximum, :max),     (:minimum, :_minimum, :min),
+                            (:extrema, :_extrema, :_extrema_rf)]
+    mapf = fname === :extrema ? :(ExtremaMap(f)) : :f
     @eval begin
         # User-facing methods with keyword arguments
         @inline ($fname)(a::AbstractArray; dims=:, kw...) = ($_fname)(a, dims; kw...)
@@ -872,7 +996,7 @@ for (fname, _fname, op) in [(:sum,     :_sum,     :add_sum), (:prod,    :_prod,
 
         # Underlying implementations using dispatch
         ($_fname)(a, ::Colon; kw...) = ($_fname)(identity, a, :; kw...)
-        ($_fname)(f, a, ::Colon; kw...) = mapreduce(f, $op, a; kw...)
+        ($_fname)(f, a, ::Colon; kw...) = mapreduce($mapf, $op, a; kw...)
     end
 end
 
@@ -885,16 +1009,18 @@ _all(a, ::Colon)                           = _all(identity, a, :)
 
 for (fname, op) in [(:sum, :add_sum), (:prod, :mul_prod),
                     (:maximum, :max), (:minimum, :min),
-                    (:all, :&),       (:any, :|)]
+                    (:all, :&),       (:any, :|),
+                    (:extrema, :_extrema_rf)]
     fname! = Symbol(fname, '!')
     _fname = Symbol('_', fname)
+    mapf = fname === :extrema ? :(ExtremaMap(f)) : :f
     @eval begin
         $(fname!)(f::Function, r::AbstractArray, A::AbstractArray; init::Bool=true) =
-            mapreducedim!(f, $(op), initarray!(r, $(op), init, A), A)
+            mapreducedim!($mapf, $(op), initarray!(r, $mapf, $(op), init, A), A)
         $(fname!)(r::AbstractArray, A::AbstractArray; init::Bool=true) = $(fname!)(identity, r, A; init=init)
 
         $(_fname)(A, dims; kw...)    = $(_fname)(identity, A, dims; kw...)
-        $(_fname)(f, A, dims; kw...) = mapreduce(f, $(op), A; dims=dims, kw...)
+        $(_fname)(f, A, dims; kw...) = mapreduce($mapf, $(op), A; dims=dims, kw...)
     end
 end
 
@@ -923,7 +1049,7 @@ function findminmax!(f, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
             for i in axes(A,1)
                 k, kss = y::Tuple
                 tmpAv = A[i,IA]
-                if tmpRi == zi || (tmpRv == tmpRv && (tmpAv != tmpAv || f(tmpAv, tmpRv)))
+                if tmpRi == zi || f(tmpRv, tmpAv)
                     tmpRv = tmpAv
                     tmpRi = k
                 end
@@ -940,7 +1066,7 @@ function findminmax!(f, Rval, Rind, A::AbstractArray{T,N}) where {T,N}
                 tmpAv = A[i,IA]
                 tmpRv = Rval[i,IR]
                 tmpRi = Rind[i,IR]
-                if tmpRi == zi || (tmpRv == tmpRv && (tmpAv != tmpAv || f(tmpAv, tmpRv)))
+                if tmpRi == zi || f(tmpRv, tmpAv)
                     Rval[i,IR] = tmpAv
                     Rind[i,IR] = k
                 end
@@ -956,18 +1082,18 @@ end
 
 Find the minimum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
-`NaN` is treated as less than all other values.
+`NaN` is treated as less than all other values except `missing`.
 """
 function findmin!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
-    findminmax!(isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
+    findminmax!(isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
 end
 
 """
     findmin(A; dims) -> (minval, index)
 
 For an array input, returns the value and index of the minimum over the given dimensions.
-`NaN` is treated as less than all other values.
+`NaN` is treated as less than all other values except `missing`.
 
 # Examples
 ```jldoctest
@@ -980,7 +1106,7 @@ julia> findmin(A, dims=1)
 ([1.0 2.0], CartesianIndex{2}[CartesianIndex(1, 1) CartesianIndex(1, 2)])
 
 julia> findmin(A, dims=2)
-([1.0; 3.0], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 1)])
+([1.0; 3.0;;], CartesianIndex{2}[CartesianIndex(1, 1); CartesianIndex(2, 1);;])
 ```
 """
 findmin(A::AbstractArray; dims=:) = _findmin(A, dims)
@@ -993,30 +1119,28 @@ function _findmin(A, region)
         end
         (similar(A, ri), zeros(eltype(keys(A)), ri))
     else
-        findminmax!(isless, fill!(similar(A, ri), first(A)),
+        findminmax!(isgreater, fill!(similar(A, ri), first(A)),
                     zeros(eltype(keys(A)), ri), A)
     end
 end
 
-isgreater(a, b) = isless(b,a)
-
 """
     findmax!(rval, rind, A) -> (maxval, index)
 
 Find the maximum of `A` and the corresponding linear index along singleton
 dimensions of `rval` and `rind`, and store the results in `rval` and `rind`.
-`NaN` is treated as greater than all other values.
+`NaN` is treated as greater than all other values except `missing`.
 """
 function findmax!(rval::AbstractArray, rind::AbstractArray, A::AbstractArray;
                   init::Bool=true)
-    findminmax!(isgreater, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
+    findminmax!(isless, init && !isempty(A) ? fill!(rval, first(A)) : rval, fill!(rind,zero(eltype(keys(A)))), A)
 end
 
 """
     findmax(A; dims) -> (maxval, index)
 
 For an array input, returns the value and index of the maximum over the given dimensions.
-`NaN` is treated as greater than all other values.
+`NaN` is treated as greater than all other values except `missing`.
 
 # Examples
 ```jldoctest
@@ -1029,7 +1153,7 @@ julia> findmax(A, dims=1)
 ([3.0 4.0], CartesianIndex{2}[CartesianIndex(2, 1) CartesianIndex(2, 2)])
 
 julia> findmax(A, dims=2)
-([2.0; 4.0], CartesianIndex{2}[CartesianIndex(1, 2); CartesianIndex(2, 2)])
+([2.0; 4.0;;], CartesianIndex{2}[CartesianIndex(1, 2); CartesianIndex(2, 2);;])
 ```
 """
 findmax(A::AbstractArray; dims=:) = _findmax(A, dims)
@@ -1042,7 +1166,7 @@ function _findmax(A, region)
         end
         similar(A, ri), zeros(eltype(keys(A)), ri)
     else
-        findminmax!(isgreater, fill!(similar(A, ri), first(A)),
+        findminmax!(isless, fill!(similar(A, ri), first(A)),
                     zeros(eltype(keys(A)), ri), A)
     end
 end
@@ -1053,7 +1177,7 @@ reducedim1(R, A) = length(axes1(R)) == 1
     argmin(A; dims) -> indices
 
 For an array input, return the indices of the minimum elements over the given dimensions.
-`NaN` is treated as less than all other values.
+`NaN` is treated as less than all other values except `missing`.
 
 # Examples
 ```jldoctest
@@ -1078,7 +1202,7 @@ argmin(A::AbstractArray; dims=:) = findmin(A; dims=dims)[2]
     argmax(A; dims) -> indices
 
 For an array input, return the indices of the maximum elements over the given dimensions.
-`NaN` is treated as greater than all other values.
+`NaN` is treated as greater than all other values except `missing`.
 
 # Examples
 ```jldoctest
diff --git a/base/reflection.jl b/base/reflection.jl
index 031426531b9e10..7e0003c0e651b6 100644
--- a/base/reflection.jl
+++ b/base/reflection.jl
@@ -20,6 +20,8 @@ nameof(m::Module) = ccall(:jl_module_name, Ref{Symbol}, (Any,), m)
 
 Get a module's enclosing `Module`. `Main` is its own parent.
 
+See also: [`names`](@ref), [`nameof`](@ref), [`fullname`](@ref), [`@__MODULE__`](@ref).
+
 # Examples
 ```jldoctest
 julia> parentmodule(Main)
@@ -94,6 +96,8 @@ are also included.
 
 As a special case, all names defined in `Main` are considered \"exported\",
 since it is not idiomatic to explicitly export names from `Main`.
+
+See also: [`@locals`](@ref Base.@locals), [`@__MODULE__`](@ref).
 """
 names(m::Module; all::Bool = false, imported::Bool = false) =
     sort!(ccall(:jl_module_names, Array{Symbol,1}, (Any, Cint, Cint), m, all, imported))
@@ -125,7 +129,7 @@ function _fieldnames(@nospecialize t)
             throw(ArgumentError("type does not have definite field names"))
         end
     end
-    isdefined(t, :names) ? t.names : t.name.names
+    return t.name.names
 end
 
 """
@@ -143,15 +147,19 @@ julia> fieldname(Rational, 2)
 ```
 """
 function fieldname(t::DataType, i::Integer)
-    if t.abstract
-        throw(ArgumentError("type does not have definite field names"))
+    throw_not_def_field() = throw(ArgumentError("type does not have definite field names"))
+    function throw_field_access(t, i, n_fields)
+        field_label = n_fields == 1 ? "field" : "fields"
+        throw(ArgumentError("Cannot access field $i since type $t only has $n_fields $field_label."))
     end
+    throw_need_pos_int(i) = throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
+
+    isabstracttype(t) && throw_not_def_field()
     names = _fieldnames(t)
     n_fields = length(names)::Int
-    field_label = n_fields == 1 ? "field" : "fields"
-    i > n_fields && throw(ArgumentError("Cannot access field $i since type $t only has $n_fields $field_label."))
-    i < 1 && throw(ArgumentError("Field numbers must be positive integers. $i is invalid."))
-    return names[i]::Symbol
+    i > n_fields && throw_field_access(t, i, n_fields)
+    i < 1 && throw_need_pos_int(i)
+    return @inbounds names[i]::Symbol
 end
 
 fieldname(t::UnionAll, i::Integer) = fieldname(unwrap_unionall(t), i)
@@ -163,10 +171,15 @@ fieldname(t::Type{<:Tuple}, i::Integer) =
 
 Get a tuple with the names of the fields of a `DataType`.
 
+See also [`propertynames`](@ref), [`hasfield`](@ref).
+
 # Examples
 ```jldoctest
 julia> fieldnames(Rational)
 (:num, :den)
+
+julia> fieldnames(typeof(1+im))
+(:re, :im)
 ```
 """
 fieldnames(t::DataType) = (fieldcount(t); # error check to make sure type is specific enough
@@ -181,13 +194,25 @@ fieldnames(t::Type{<:Tuple}) = ntuple(identity, fieldcount(t))
 
 Return a boolean indicating whether `T` has `name` as one of its own fields.
 
+See also [`fieldnames`](@ref), [`fieldcount`](@ref), [`hasproperty`](@ref).
+
 !!! compat "Julia 1.2"
      This function requires at least Julia 1.2.
+
+# Examples
+```jldoctest
+julia> struct Foo
+            bar::Int
+       end
+
+julia> hasfield(Foo, :bar)
+true
+
+julia> hasfield(Foo, :x)
+false
+```
 """
-function hasfield(T::Type, name::Symbol)
-    @_pure_meta
-    return fieldindex(T, name, false) > 0
-end
+hasfield(T::Type, name::Symbol) = fieldindex(T, name, false) > 0
 
 """
     nameof(t::DataType) -> Symbol
@@ -235,11 +260,34 @@ parentmodule(t::UnionAll) = parentmodule(unwrap_unionall(t))
 """
     isconst(m::Module, s::Symbol) -> Bool
 
-Determine whether a global is declared `const` in a given `Module`.
+Determine whether a global is declared `const` in a given module `m`.
 """
 isconst(m::Module, s::Symbol) =
     ccall(:jl_is_const, Cint, (Any, Any), m, s) != 0
 
+"""
+    isconst(t::DataType, s::Union{Int,Symbol}) -> Bool
+
+Determine whether a field `s` is declared `const` in a given type `t`.
+"""
+function isconst(@nospecialize(t::Type), s::Symbol)
+    t = unwrap_unionall(t)
+    isa(t, DataType) || return false
+    return isconst(t, fieldindex(t, s, false))
+end
+function isconst(@nospecialize(t::Type), s::Int)
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    isa(t, DataType) || return false # uncertain
+    ismutabletype(t) || return true # immutable structs are always const
+    1 <= s <= length(t.name.names) || return true # OOB reads are "const" since they always throw
+    constfields = t.name.constfields
+    constfields === C_NULL && return false
+    s -= 1
+    return unsafe_load(Ptr{UInt32}(constfields), 1 + s÷32) & (1 << (s%32)) != 0
+end
+
+
 """
     @locals()
 
@@ -280,15 +328,17 @@ macro locals()
 end
 
 """
-    objectid(x)
+    objectid(x) -> UInt
 
 Get a hash value for `x` based on object identity. `objectid(x)==objectid(y)` if `x === y`.
+
+See also [`hash`](@ref), [`IdDict`](@ref).
 """
 objectid(@nospecialize(x)) = ccall(:jl_object_id, UInt, (Any,), x)
 
 # concrete datatype predicates
 
-datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Any, (Any,), x)
+datatype_fieldtypes(x::DataType) = ccall(:jl_get_fieldtypes, Core.SimpleVector, (Any,), x)
 
 struct DataTypeLayout
     nfields::UInt32
@@ -307,28 +357,30 @@ Memory allocation minimum alignment for instances of this type.
 Can be called on any `isconcretetype`.
 """
 function datatype_alignment(dt::DataType)
-    @_pure_meta
+    @_total_may_throw_meta
     dt.layout == C_NULL && throw(UndefRefError())
     alignment = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).alignment
     return Int(alignment)
 end
 
-function uniontype_layout(T::Type)
+function uniontype_layout(@nospecialize T::Type)
     sz = RefValue{Csize_t}(0)
     algn = RefValue{Csize_t}(0)
     isinline = ccall(:jl_islayout_inline, Cint, (Any, Ptr{Csize_t}, Ptr{Csize_t}), T, sz, algn) != 0
-    (isinline, sz[], algn[])
+    (isinline, Int(sz[]), Int(algn[]))
 end
 
+LLT_ALIGN(x, sz) = (x + sz - 1) & -sz
+
 # amount of total space taken by T when stored in a container
-function aligned_sizeof(T::Type)
-    @_pure_meta
+function aligned_sizeof(@nospecialize T::Type)
+    @_total_may_throw_meta
     if isbitsunion(T)
         _, sz, al = uniontype_layout(T)
-        return (sz + al - 1) & -al
+        return LLT_ALIGN(sz, al)
     elseif allocatedinline(T)
         al = datatype_alignment(T)
-        return (Core.sizeof(T) + al - 1) & -al
+        return LLT_ALIGN(Core.sizeof(T), al)
     else
         return Core.sizeof(Ptr{Cvoid})
     end
@@ -345,7 +397,7 @@ with no intervening padding bytes.
 Can be called on any `isconcretetype`.
 """
 function datatype_haspadding(dt::DataType)
-    @_pure_meta
+    @_total_may_throw_meta
     dt.layout == C_NULL && throw(UndefRefError())
     flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
     return flags & 1 == 1
@@ -358,12 +410,11 @@ Return the number of fields known to this datatype's layout.
 Can be called on any `isconcretetype`.
 """
 function datatype_nfields(dt::DataType)
-    @_pure_meta
+    @_total_may_throw_meta
     dt.layout == C_NULL && throw(UndefRefError())
     return unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).nfields
 end
 
-
 """
     Base.datatype_pointerfree(dt::DataType) -> Bool
 
@@ -371,7 +422,7 @@ Return whether instances of this type can contain references to gc-managed memor
 Can be called on any `isconcretetype`.
 """
 function datatype_pointerfree(dt::DataType)
-    @_pure_meta
+    @_total_may_throw_meta
     dt.layout == C_NULL && throw(UndefRefError())
     npointers = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).npointers
     return npointers == 0
@@ -387,19 +438,64 @@ Can be called on any `isconcretetype`.
 See also [`fieldoffset`](@ref).
 """
 function datatype_fielddesc_type(dt::DataType)
-    @_pure_meta
+    @_total_may_throw_meta
     dt.layout == C_NULL && throw(UndefRefError())
     flags = unsafe_load(convert(Ptr{DataTypeLayout}, dt.layout)).flags
     return (flags >> 1) & 3
 end
 
+# For type stability, we only expose a single struct that describes everything
+struct FieldDesc
+    isforeign::Bool
+    isptr::Bool
+    size::UInt32
+    offset::UInt32
+end
+
+struct FieldDescStorage{T}
+    ptrsize::T
+    offset::T
+end
+FieldDesc(fd::FieldDescStorage{T}) where {T} =
+    FieldDesc(false, fd.ptrsize & 1 != 0,
+              fd.ptrsize >> 1, fd.offset)
+
+struct DataTypeFieldDesc
+    dt::DataType
+    function DataTypeFieldDesc(dt::DataType)
+        dt.layout == C_NULL && throw(UndefRefError())
+        new(dt)
+    end
+end
+
+function getindex(dtfd::DataTypeFieldDesc, i::Int)
+    layout_ptr = convert(Ptr{DataTypeLayout}, dtfd.dt.layout)
+    fd_ptr = layout_ptr + sizeof(DataTypeLayout)
+    layout = unsafe_load(layout_ptr)
+    fielddesc_type = (layout.flags >> 1) & 3
+    nfields = layout.nfields
+    @boundscheck ((1 <= i <= nfields) || throw(BoundsError(dtfd, i)))
+    if fielddesc_type == 0
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt8}}(fd_ptr), i))
+    elseif fielddesc_type == 1
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt16}}(fd_ptr), i))
+    elseif fielddesc_type == 2
+        return FieldDesc(unsafe_load(Ptr{FieldDescStorage{UInt32}}(fd_ptr), i))
+    else
+        # fielddesc_type == 3
+        return FieldDesc(true, true, 0, 0)
+    end
+end
+
 """
     ismutable(v) -> Bool
 
-Return `true` iff value `v` is mutable.  See [Mutable Composite Types](@ref)
+Return `true` if and only if value `v` is mutable.  See [Mutable Composite Types](@ref)
 for a discussion of immutability. Note that this function works on values, so if you give it
 a type, it will tell you that a value of `DataType` is mutable.
 
+See also [`isbits`](@ref), [`isstructtype`](@ref).
+
 # Examples
 ```jldoctest
 julia> ismutable(1)
@@ -412,7 +508,23 @@ true
 !!! compat "Julia 1.5"
     This function requires at least Julia 1.5.
 """
-ismutable(@nospecialize(x)) = (@_pure_meta; typeof(x).mutable)
+ismutable(@nospecialize(x)) = (@_total_meta; typeof(x).name.flags & 0x2 == 0x2)
+
+"""
+    ismutabletype(T) -> Bool
+
+Determine whether type `T` was declared as a mutable type
+(i.e. using `mutable struct` keyword).
+
+!!! compat "Julia 1.7"
+    This function requires at least Julia 1.7.
+"""
+function ismutabletype(@nospecialize t)
+    @_total_meta
+    t = unwrap_unionall(t)
+    # TODO: what to do for `Union`?
+    return isa(t, DataType) && t.name.flags & 0x2 == 0x2
+end
 
 """
     isstructtype(T) -> Bool
@@ -420,13 +532,13 @@ ismutable(@nospecialize(x)) = (@_pure_meta; typeof(x).mutable)
 Determine whether type `T` was declared as a struct type
 (i.e. using the `struct` or `mutable struct` keyword).
 """
-function isstructtype(@nospecialize(t::Type))
-    @_pure_meta
+function isstructtype(@nospecialize t)
+    @_total_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false
     hasfield = !isdefined(t, :types) || !isempty(t.types)
-    return hasfield || (t.size == 0 && !t.abstract)
+    return hasfield || (t.size == 0 && !isabstracttype(t))
 end
 
 """
@@ -435,13 +547,13 @@ end
 Determine whether type `T` was declared as a primitive type
 (i.e. using the `primitive` keyword).
 """
-function isprimitivetype(@nospecialize(t::Type))
-    @_pure_meta
+function isprimitivetype(@nospecialize t)
+    @_total_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
     isa(t, DataType) || return false
     hasfield = !isdefined(t, :types) || !isempty(t.types)
-    return !hasfield && t.size != 0 && !t.abstract
+    return !hasfield && t.size != 0 && !isabstracttype(t)
 end
 
 """
@@ -456,6 +568,8 @@ This category of types is significant since they are valid as type parameters,
 may not track [`isdefined`](@ref) / [`isassigned`](@ref) status,
 and have a defined layout that is compatible with C.
 
+See also [`isbits`](@ref), [`isprimitivetype`](@ref), [`ismutable`](@ref).
+
 # Examples
 ```jldoctest
 julia> isbitstype(Complex{Float64})
@@ -465,14 +579,14 @@ julia> isbitstype(Complex)
 false
 ```
 """
-isbitstype(@nospecialize(t::Type)) = (@_pure_meta; isa(t, DataType) && t.isbitstype)
+isbitstype(@nospecialize t) = (@_total_meta; isa(t, DataType) && (t.flags & 0x8) == 0x8)
 
 """
     isbits(x)
 
-Return `true` if `x` is an instance of an `isbitstype` type.
+Return `true` if `x` is an instance of an [`isbitstype`](@ref) type.
 """
-isbits(@nospecialize x) = (@_pure_meta; typeof(x).isbitstype)
+isbits(@nospecialize x) = (@_total_meta; typeof(x).flags & 0x8 == 0x8)
 
 """
     isdispatchtuple(T)
@@ -481,7 +595,7 @@ Determine whether type `T` is a tuple "leaf type",
 meaning it could appear as a type signature in dispatch
 and has no subtypes (or supertypes) which could appear in a call.
 """
-isdispatchtuple(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && t.isdispatchtuple)
+isdispatchtuple(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x4) == 0x4)
 
 iskindtype(@nospecialize t) = (t === DataType || t === UnionAll || t === Union || t === typeof(Bottom))
 isconcretedispatch(@nospecialize t) = isconcretetype(t) && !iskindtype(t)
@@ -501,6 +615,8 @@ end
 Determine whether type `T` is a concrete type, meaning it could have direct instances
 (values `x` such that `typeof(x) === T`).
 
+See also: [`isbits`](@ref), [`isabstracttype`](@ref), [`issingletontype`](@ref).
+
 # Examples
 ```jldoctest
 julia> isconcretetype(Complex)
@@ -522,7 +638,7 @@ julia> isconcretetype(Union{Int,String})
 false
 ```
 """
-isconcretetype(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && t.isconcretetype)
+isconcretetype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && (t.flags & 0x2) == 0x2)
 
 """
     isabstracttype(T)
@@ -540,10 +656,10 @@ false
 ```
 """
 function isabstracttype(@nospecialize(t))
-    @_pure_meta
+    @_total_meta
     t = unwrap_unionall(t)
     # TODO: what to do for `Union`?
-    return isa(t, DataType) && t.abstract
+    return isa(t, DataType) && (t.name.flags & 0x1) == 0x1
 end
 
 """
@@ -552,17 +668,17 @@ end
 Determine whether type `T` has exactly one possible instance; for example, a
 struct type with no fields.
 """
-issingletontype(@nospecialize(t)) = (@_pure_meta; isa(t, DataType) && isdefined(t, :instance))
+issingletontype(@nospecialize(t)) = (@_total_meta; isa(t, DataType) && isdefined(t, :instance))
 
 """
-    typeintersect(T, S)
+    typeintersect(T::Type, S::Type)
 
 Compute a type that contains the intersection of `T` and `S`. Usually this will be the
 smallest such type or one close to it.
 """
-typeintersect(@nospecialize(a), @nospecialize(b)) = (@_pure_meta; ccall(:jl_type_intersection, Any, (Any, Any), a, b))
+typeintersect(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_intersection, Any, (Any, Any), a::Type, b::Type))
 
-morespecific(@nospecialize(a), @nospecialize(b)) = ccall(:jl_type_morespecific, Cint, (Any, Any), a, b) != 0
+morespecific(@nospecialize(a), @nospecialize(b)) = (@_total_meta; ccall(:jl_type_morespecific, Cint, (Any, Any), a::Type, b::Type) != 0)
 
 """
     fieldoffset(type, i)
@@ -574,22 +690,23 @@ use it in the following manner to summarize information about a struct:
 julia> structinfo(T) = [(fieldoffset(T,i), fieldname(T,i), fieldtype(T,i)) for i = 1:fieldcount(T)];
 
 julia> structinfo(Base.Filesystem.StatStruct)
-12-element Vector{Tuple{UInt64, Symbol, DataType}}:
- (0x0000000000000000, :device, UInt64)
- (0x0000000000000008, :inode, UInt64)
- (0x0000000000000010, :mode, UInt64)
- (0x0000000000000018, :nlink, Int64)
- (0x0000000000000020, :uid, UInt64)
- (0x0000000000000028, :gid, UInt64)
- (0x0000000000000030, :rdev, UInt64)
- (0x0000000000000038, :size, Int64)
- (0x0000000000000040, :blksize, Int64)
- (0x0000000000000048, :blocks, Int64)
- (0x0000000000000050, :mtime, Float64)
- (0x0000000000000058, :ctime, Float64)
+13-element Vector{Tuple{UInt64, Symbol, Type}}:
+ (0x0000000000000000, :desc, Union{RawFD, String})
+ (0x0000000000000008, :device, UInt64)
+ (0x0000000000000010, :inode, UInt64)
+ (0x0000000000000018, :mode, UInt64)
+ (0x0000000000000020, :nlink, Int64)
+ (0x0000000000000028, :uid, UInt64)
+ (0x0000000000000030, :gid, UInt64)
+ (0x0000000000000038, :rdev, UInt64)
+ (0x0000000000000040, :size, Int64)
+ (0x0000000000000048, :blksize, Int64)
+ (0x0000000000000050, :blocks, Int64)
+ (0x0000000000000058, :mtime, Float64)
+ (0x0000000000000060, :ctime, Float64)
 ```
 """
-fieldoffset(x::DataType, idx::Integer) = (@_pure_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
+fieldoffset(x::DataType, idx::Integer) = (@_total_may_throw_meta; ccall(:jl_get_field_offset, Csize_t, (Any, Cint), x, idx))
 
 """
     fieldtype(T, name::Symbol | index::Int)
@@ -635,12 +752,22 @@ julia> Base.fieldindex(Foo, :z, false)
 ```
 """
 function fieldindex(T::DataType, name::Symbol, err::Bool=true)
+    @_total_may_throw_meta
     return Int(ccall(:jl_field_index, Cint, (Any, Any, Cint), T, name, err)+1)
 end
 
-fieldindex(t::UnionAll, name::Symbol, err::Bool=true) = fieldindex(something(argument_datatype(t)), name, err)
+function fieldindex(t::UnionAll, name::Symbol, err::Bool=true)
+    t = argument_datatype(t)
+    if t === nothing
+        throw(ArgumentError("type does not have definite fields"))
+    end
+    return fieldindex(t, name, err)
+end
 
-argument_datatype(@nospecialize t) = ccall(:jl_argument_datatype, Any, (Any,), t)
+function argument_datatype(@nospecialize t)
+    @_total_meta
+    return ccall(:jl_argument_datatype, Any, (Any,), t)::Union{Nothing,DataType}
+end
 
 """
     fieldcount(t::Type)
@@ -649,20 +776,20 @@ Get the number of fields that an instance of the given type would have.
 An error is thrown if the type is too abstract to determine this.
 """
 function fieldcount(@nospecialize t)
+    @_total_may_throw_meta
     if t isa UnionAll || t isa Union
         t = argument_datatype(t)
         if t === nothing
             throw(ArgumentError("type does not have a definite number of fields"))
         end
-        t = t::DataType
-    elseif t == Union{}
+    elseif t === Union{}
         throw(ArgumentError("The empty type does not have a well-defined number of fields since it does not have instances."))
     end
     if !(t isa DataType)
         throw(TypeError(:fieldcount, DataType, t))
     end
     if t.name === NamedTuple_typename
-        names, types = t.parameters
+        names, types = t.parameters[1], t.parameters[2]
         if names isa Tuple
             return length(names)
         end
@@ -671,7 +798,7 @@ function fieldcount(@nospecialize t)
         end
         abstr = true
     else
-        abstr = t.abstract || (t.name === Tuple.name && isvatuple(t))
+        abstr = isabstracttype(t) || (t.name === Tuple.name && isvatuple(t))
     end
     if abstr
         throw(ArgumentError("type does not have a definite number of fields"))
@@ -701,7 +828,7 @@ julia> fieldtypes(Foo)
 (Int64, String)
 ```
 """
-fieldtypes(T::Type) = ntupleany(i -> fieldtype(T, i), fieldcount(T))
+fieldtypes(T::Type) = (@_total_may_throw_meta; ntupleany(i -> fieldtype(T, i), fieldcount(T)))
 
 # return all instances, for types that can be enumerated
 
@@ -727,6 +854,9 @@ function to_tuple_type(@nospecialize(t))
     end
     if isa(t, Type) && t <: Tuple
         for p in unwrap_unionall(t).parameters
+            if isa(p, Core.TypeofVararg)
+                p = p.T
+            end
             if !(isa(p, Type) || isa(p, TypeVar))
                 error("argument tuple type must contain only types")
             end
@@ -773,7 +903,7 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     return map(method_instances(f, t)) do m
-        if generated && isgenerated(m)
+        if generated && hasgenerator(m)
             if may_invoke_generator(m)
                 return ccall(:jl_code_for_staged, Any, (Any,), m)::CodeInfo
             else
@@ -788,8 +918,8 @@ function code_lowered(@nospecialize(f), @nospecialize(t=Tuple); generated::Bool=
     end
 end
 
-isgenerated(m::Method) = isdefined(m, :generator)
-isgenerated(m::Core.MethodInstance) = isgenerated(m.def)
+hasgenerator(m::Method) = isdefined(m, :generator)
+hasgenerator(m::Core.MethodInstance) = hasgenerator(m.def::Method)
 
 # low-level method lookup functions used by the compiler
 
@@ -806,27 +936,25 @@ function _methods(@nospecialize(f), @nospecialize(t), lim::Int, world::UInt)
 end
 
 function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt)
-    return _methods_by_ftype(t, lim, world, false, UInt[typemin(UInt)], UInt[typemax(UInt)], Cint[0])
+    return _methods_by_ftype(t, nothing, lim, world)
 end
-function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt, ambig::Bool, min::Array{UInt,1}, max::Array{UInt,1}, has_ambig::Array{Int32,1})
-    return ccall(:jl_matching_methods, Any, (Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt)
+    return _methods_by_ftype(t, mt, lim, world, false, RefValue{UInt}(typemin(UInt)), RefValue{UInt}(typemax(UInt)), Ptr{Int32}(C_NULL))
 end
-function _methods_by_ftype(@nospecialize(t), lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
-    return ccall(:jl_matching_methods, Any, (Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+function _methods_by_ftype(@nospecialize(t), mt::Union{Core.MethodTable, Nothing}, lim::Int, world::UInt, ambig::Bool, min::Ref{UInt}, max::Ref{UInt}, has_ambig::Ref{Int32})
+    return ccall(:jl_matching_methods, Any, (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ptr{Int32}), t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
 end
 
 # high-level, more convenient method lookup functions
 
 # type for reflecting and pretty-printing a subset of methods
-mutable struct MethodList
+mutable struct MethodList <: AbstractArray{Method,1}
     ms::Array{Method,1}
     mt::Core.MethodTable
 end
 
-length(m::MethodList) = length(m.ms)
-isempty(m::MethodList) = isempty(m.ms)
-iterate(m::MethodList, s...) = iterate(m.ms, s...)
-eltype(::Type{MethodList}) = Method
+size(m::MethodList) = size(m.ms)
+getindex(m::MethodList, i::Integer) = m.ms[i]
 
 function MethodList(mt::Core.MethodTable)
     ms = Method[]
@@ -847,39 +975,34 @@ A list of modules can also be specified as an array.
 
 !!! compat "Julia 1.4"
     At least Julia 1.4 is required for specifying a module.
+
+See also: [`which`](@ref) and `@which`.
 """
 function methods(@nospecialize(f), @nospecialize(t),
-                 @nospecialize(mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing))
-    if isa(f, Core.Builtin)
-        throw(ArgumentError("argument is not a generic function"))
-    end
+                 mod::Union{Tuple{Module},AbstractArray{Module},Nothing}=nothing)
     t = to_tuple_type(t)
-    world = typemax(UInt)
+    world = get_world_counter()
     # Lack of specialization => a comprehension triggers too many invalidations via _collect, so collect the methods manually
     ms = Method[]
-    for m in _methods(f, t, -1, world)
-        m::Core.MethodMatch
+    for m in _methods(f, t, -1, world)::Vector
+        m = m::Core.MethodMatch
         (mod === nothing || m.method.module ∈ mod) && push!(ms, m.method)
     end
     MethodList(ms, typeof(f).name.mt)
 end
 methods(@nospecialize(f), @nospecialize(t), mod::Module) = methods(f, t, (mod,))
 
-methods(f::Core.Builtin) = MethodList(Method[], typeof(f).name.mt)
-
 function methods_including_ambiguous(@nospecialize(f), @nospecialize(t))
     tt = signature_type(f, t)
-    world = typemax(UInt)
-    min = UInt[typemin(UInt)]
-    max = UInt[typemax(UInt)]
-    has_ambig = Int32[0]
-    ms = _methods_by_ftype(tt, -1, world, true, min, max, has_ambig)
-    isa(ms, Bool) && return ms
+    world = get_world_counter()
+    min = RefValue{UInt}(typemin(UInt))
+    max = RefValue{UInt}(typemax(UInt))
+    ms = _methods_by_ftype(tt, nothing, -1, world, true, min, max, Ptr{Int32}(C_NULL))::Vector
     return MethodList(Method[(m::Core.MethodMatch).method for m in ms], typeof(f).name.mt)
 end
 
 function methods(@nospecialize(f),
-                 @nospecialize(mod::Union{Module,AbstractArray{Module},Nothing}=nothing))
+                 mod::Union{Module,AbstractArray{Module},Nothing}=nothing)
     # return all matches
     return methods(f, Tuple{Vararg{Any}}, mod)
 end
@@ -944,12 +1067,11 @@ _uncompressed_ir(ci::Core.CodeInstance, s::Array{UInt8,1}) = ccall(:jl_uncompres
 const uncompressed_ast = uncompressed_ir
 const _uncompressed_ast = _uncompressed_ir
 
-function method_instances(@nospecialize(f), @nospecialize(t), world::UInt = typemax(UInt))
+function method_instances(@nospecialize(f), @nospecialize(t), world::UInt=get_world_counter())
     tt = signature_type(f, t)
     results = Core.MethodInstance[]
-    for match in _methods_by_ftype(tt, -1, world)
-        instance = ccall(:jl_specializations_get_linfo, Ref{MethodInstance},
-            (Any, Any, Any), match.method, match.spec_types, match.sparams)
+    for match in _methods_by_ftype(tt, -1, world)::Vector
+        instance = Core.Compiler.specialize_method(match)
         push!(results, instance)
     end
     return results
@@ -986,10 +1108,10 @@ const SLOT_USED = 0x8
 ast_slotflag(@nospecialize(code), i) = ccall(:jl_ir_slotflag, UInt8, (Any, Csize_t), code, i - 1)
 
 """
-    may_invoke_generator(method, atypes, sparams)
+    may_invoke_generator(method, atype, sparams)
 
 Computes whether or not we may invoke the generator for the given `method` on
-the given atypes and sparams. For correctness, all generated function are
+the given atype and sparams. For correctness, all generated function are
 required to return monotonic answers. However, since we don't expect users to
 be able to successfully implement this criterion, we only call generated
 functions on concrete types. The one exception to this is that we allow calling
@@ -1003,9 +1125,9 @@ in some cases, but this may still allow inference not to fall over in some limit
 function may_invoke_generator(method::MethodInstance)
     return may_invoke_generator(method.def::Method, method.specTypes, method.sparam_vals)
 end
-function may_invoke_generator(method::Method, @nospecialize(atypes), sparams::SimpleVector)
+function may_invoke_generator(method::Method, @nospecialize(atype), sparams::SimpleVector)
     # If we have complete information, we may always call the generator
-    isdispatchtuple(atypes) && return true
+    isdispatchtuple(atype) && return true
 
     # We don't have complete information, but it is possible that the generator
     # syntactically doesn't make use of the information we don't have. Check
@@ -1023,7 +1145,7 @@ function may_invoke_generator(method::Method, @nospecialize(atypes), sparams::Si
     isdefined(generator_method, :source) || return false
     code = generator_method.source
     nslots = ccall(:jl_ir_nslots, Int, (Any,), code)
-    at = unwrap_unionall(atypes)::DataType
+    at = unwrap_unionall(atype)::DataType
     (nslots >= 1 + length(sparams) + length(at.parameters)) || return false
 
     for i = 1:nsparams
@@ -1053,21 +1175,47 @@ function func_for_method_checked(m::Method, @nospecialize(types), sparams::Simpl
 end
 
 """
-    code_typed(f, types; optimize=true, debuginfo=:default)
+    code_typed(f, types; kw...)
 
 Returns an array of type-inferred lowered form (IR) for the methods matching the given
-generic function and type signature. The keyword argument `optimize` controls whether
-additional optimizations, such as inlining, are also applied.
-The keyword `debuginfo` controls the amount of code metadata present in the output,
+generic function and type signature.
+
+# Keyword Arguments
+
+- `optimize=true`: controls whether additional optimizations, such as inlining, are also applied.
+- `debuginfo=:default`: controls the amount of code metadata present in the output,
 possible options are `:source` or `:none`.
+
+# Internal Keyword Arguments
+
+This section should be considered internal, and is only for who understands Julia compiler
+internals.
+
+- `world=Base.get_world_counter()`: optional, controls the world age to use when looking up methods,
+use current world age if not specified.
+- `interp=Core.Compiler.NativeInterpreter(world)`: optional, controls the interpreter to use,
+use the native interpreter Julia uses if not specified.
+
+# Example
+
+One can put the argument types in a tuple to get the corresponding `code_typed`.
+
+```julia
+julia> code_typed(+, (Float64, Float64))
+1-element Vector{Any}:
+ CodeInfo(
+1 ─ %1 = Base.add_float(x, y)::Float64
+└──      return %1
+) => Float64
+```
 """
-function code_typed(@nospecialize(f), @nospecialize(types=Tuple);
+function code_typed(@nospecialize(f), @nospecialize(types=default_tt(f));
                     optimize=true,
                     debuginfo::Symbol=:default,
                     world = get_world_counter(),
                     interp = Core.Compiler.NativeInterpreter(world))
-    if isa(f, Core.Builtin)
-        throw(ArgumentError("argument is not a generic function"))
+    if isa(f, Core.OpaqueClosure)
+        return code_typed_opaque_closure(f; optimize, debuginfo, interp)
     end
     ft = Core.Typeof(f)
     if isa(types, Type)
@@ -1079,6 +1227,18 @@ function code_typed(@nospecialize(f), @nospecialize(types=Tuple);
     return code_typed_by_type(tt; optimize, debuginfo, world, interp)
 end
 
+# returns argument tuple type which is supposed to be used for `code_typed` and its family;
+# if there is a single method this functions returns the method argument signature,
+# otherwise returns `Tuple` that doesn't match with any signature
+function default_tt(@nospecialize(f))
+    ms = methods(f)
+    if length(ms) == 1
+        return tuple_type_tail(only(ms).sig)
+    else
+        return Tuple
+    end
+end
+
 """
     code_typed_by_type(types::Type{<:Tuple}; ...)
 
@@ -1100,38 +1260,85 @@ function code_typed_by_type(@nospecialize(tt::Type);
         throw(ArgumentError("'debuginfo' must be either :source or :none"))
     end
     tt = to_tuple_type(tt)
-    matches = _methods_by_ftype(tt, -1, world)
-    if matches === false
-        error("signature does not correspond to a generic function")
-    end
+    matches = _methods_by_ftype(tt, -1, world)::Vector
     asts = []
     for match in matches
+        match = match::Core.MethodMatch
         meth = func_for_method_checked(match.method, tt, match.sparams)
         (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, optimize)
-        code === nothing && error("inference not successful") # inference disabled?
-        debuginfo === :none && remove_linenums!(code)
-        push!(asts, code => ty)
+        if code === nothing
+            push!(asts, meth => Any)
+        else
+            debuginfo === :none && remove_linenums!(code)
+            push!(asts, code => ty)
+        end
     end
     return asts
 end
 
-function return_types(@nospecialize(f), @nospecialize(types=Tuple), interp=Core.Compiler.NativeInterpreter())
+function code_typed_opaque_closure(@nospecialize(oc::Core.OpaqueClosure);
+    debuginfo::Symbol=:default, __...)
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
-    if isa(f, Core.Builtin)
-        throw(ArgumentError("argument is not a generic function"))
+    m = oc.source
+    if isa(m, Method)
+        code = _uncompressed_ir(m, m.source)
+        debuginfo === :none && remove_linenums!(code)
+        # intersect the declared return type and the inferred return type (if available)
+        rt = typeintersect(code.rettype, typeof(oc).parameters[2])
+        return Any[code => rt]
+    else
+        error("encountered invalid Core.OpaqueClosure object")
+    end
+end
+
+function return_types(@nospecialize(f), @nospecialize(types=default_tt(f));
+                      world = get_world_counter(),
+                      interp = Core.Compiler.NativeInterpreter(world))
+    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+    if isa(f, Core.OpaqueClosure)
+        _, rt = only(code_typed_opaque_closure(f))
+        return Any[rt]
     end
     types = to_tuple_type(types)
     rt = []
-    world = get_world_counter()
-    for match in _methods(f, types, -1, world)
+    for match in _methods(f, types, -1, world)::Vector
+        match = match::Core.MethodMatch
         meth = func_for_method_checked(match.method, types, match.sparams)
         ty = Core.Compiler.typeinf_type(interp, meth, match.spec_types, match.sparams)
-        ty === nothing && error("inference not successful") # inference disabled?
-        push!(rt, ty)
+        push!(rt, something(ty, Any))
     end
     return rt
 end
 
+function infer_effects(@nospecialize(f), @nospecialize(types=default_tt(f));
+                       world = get_world_counter(),
+                       interp = Core.Compiler.NativeInterpreter(world))
+    ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
+    types = to_tuple_type(types)
+    if isa(f, Core.Builtin)
+        args = Any[types.parameters...]
+        rt = Core.Compiler.builtin_tfunction(interp, f, args, nothing)
+        return Core.Compiler.builtin_effects(f, args, rt)
+    else
+        effects = Core.Compiler.EFFECTS_TOTAL
+        matches = _methods(f, types, -1, world)::Vector
+        if isempty(matches)
+            # although this call is known to throw MethodError (thus `nothrow=ALWAYS_FALSE`),
+            # still mark it `TRISTATE_UNKNOWN` just in order to be consistent with a result
+            # derived by the effect analysis, which can't prove guaranteed throwness at this moment
+            return Core.Compiler.Effects(effects; nothrow=Core.Compiler.TRISTATE_UNKNOWN)
+        end
+        for match in matches
+            match = match::Core.MethodMatch
+            frame = Core.Compiler.typeinf_frame(interp,
+                match.method, match.spec_types, match.sparams, #=run_optimizer=#false)
+            frame === nothing && return Core.Compiler.Effects()
+            effects = Core.Compiler.tristate_merge(effects, frame.ipo_effects)
+        end
+        return effects
+    end
+end
+
 """
     print_statement_costs(io::IO, f, types)
 
@@ -1139,9 +1346,6 @@ Print type-inferred and optimized code for `f` given argument types `types`,
 prepending each line with its cost as estimated by the compiler's inlining engine.
 """
 function print_statement_costs(io::IO, @nospecialize(f), @nospecialize(t); kwargs...)
-    if isa(f, Core.Builtin)
-        throw(ArgumentError("argument is not a generic function"))
-    end
     tt = signature_type(f, t)
     print_statement_costs(io, tt; kwargs...)
 end
@@ -1149,39 +1353,51 @@ end
 function print_statement_costs(io::IO, @nospecialize(tt::Type);
                                world = get_world_counter(),
                                interp = Core.Compiler.NativeInterpreter(world))
-    matches = _methods_by_ftype(tt, -1, world)
-    if matches === false
-        error("signature does not correspond to a generic function")
-    end
+    matches = _methods_by_ftype(tt, -1, world)::Vector
     params = Core.Compiler.OptimizationParams(interp)
     cst = Int[]
     for match in matches
+        match = match::Core.MethodMatch
         meth = func_for_method_checked(match.method, tt, match.sparams)
-        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, true)
-        code === nothing && error("inference not successful") # inference disabled?
-        empty!(cst)
-        resize!(cst, length(code.code))
-        maxcost = Core.Compiler.statement_costs!(cst, code.code, code, Any[match.sparams...], false, params)
-        nd = ndigits(maxcost)
         println(io, meth)
-        IRShow.show_ir(io, code, (io, linestart, idx) -> (print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " "); return ""))
-        println()
+        (code, ty) = Core.Compiler.typeinf_code(interp, meth, match.spec_types, match.sparams, true)
+        if code === nothing
+            println(io, "  inference not successful")
+        else
+            empty!(cst)
+            resize!(cst, length(code.code))
+            maxcost = Core.Compiler.statement_costs!(cst, code.code, code, Any[match.sparams...], false, params)
+            nd = ndigits(maxcost)
+            irshow_config = IRShow.IRShowConfig() do io, linestart, idx
+                print(io, idx > 0 ? lpad(cst[idx], nd+1) : " "^(nd+1), " ")
+                return ""
+            end
+            IRShow.show_ir(io, code, irshow_config)
+        end
+        println(io)
     end
 end
 
 print_statement_costs(args...; kwargs...) = print_statement_costs(stdout, args...; kwargs...)
 
+function _which(@nospecialize(tt::Type), world=get_world_counter())
+    match, _ = Core.Compiler._findsup(tt, nothing, world)
+    if match === nothing
+        error("no unique matching method found for the specified argument types")
+    end
+    return match
+end
+
 """
     which(f, types)
 
 Returns the method of `f` (a `Method` object) that would be called for arguments of the given `types`.
 
 If `types` is an abstract type, then the method that would be called by `invoke` is returned.
+
+See also: [`parentmodule`](@ref), and `@which` and `@edit` in [`InteractiveUtils`](@ref man-interactive-utils).
 """
 function which(@nospecialize(f), @nospecialize(t))
-    if isa(f, Core.Builtin)
-        throw(ArgumentError("argument is not a generic function"))
-    end
     t = to_tuple_type(t)
     tt = signature_type(f, t)
     return which(tt)
@@ -1192,12 +1408,8 @@ end
 
 Returns the method that would be called by the given type signature (as a tuple type).
 """
-function which(@nospecialize(tt::Type))
-    m = ccall(:jl_gf_invoke_lookup, Any, (Any, UInt), tt, typemax(UInt))
-    if m === nothing
-        error("no unique matching method found for the specified argument types")
-    end
-    return m::Method
+function which(@nospecialize(tt#=::Type=#))
+    return _which(tt).method
 end
 
 """
@@ -1258,7 +1470,7 @@ function parentmodule(@nospecialize(f), @nospecialize(types))
 end
 
 """
-    hasmethod(f, t::Type{<:Tuple}[, kwnames]; world=typemax(UInt)) -> Bool
+    hasmethod(f, t::Type{<:Tuple}[, kwnames]; world=get_world_counter()) -> Bool
 
 Determine whether the given generic function has a method matching the given
 `Tuple` of argument types with the upper bound of world age given by `world`.
@@ -1293,13 +1505,13 @@ julia> hasmethod(g, Tuple{}, (:a, :b, :c, :d))  # g accepts arbitrary kwargs
 true
 ```
 """
-function hasmethod(@nospecialize(f), @nospecialize(t); world=typemax(UInt))
+function hasmethod(@nospecialize(f), @nospecialize(t); world::UInt=get_world_counter())
     t = to_tuple_type(t)
     t = signature_type(f, t)
-    return ccall(:jl_gf_invoke_lookup, Any, (Any, UInt), t, world) !== nothing
+    return ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), t, nothing, world) !== nothing
 end
 
-function hasmethod(@nospecialize(f), @nospecialize(t), kwnames::Tuple{Vararg{Symbol}}; world=typemax(UInt))
+function hasmethod(@nospecialize(f), @nospecialize(t), kwnames::Tuple{Vararg{Symbol}}; world::UInt=get_world_counter())
     # TODO: this appears to be doing the wrong queries
     hasmethod(f, t, world=world) || return false
     isempty(kwnames) && return true
@@ -1330,7 +1542,7 @@ function bodyfunction(basemethod::Method)
     #   %1 = mkw(kwvalues..., #self#, args...)
     #        return %1
     # where `mkw` is the name of the "active" keyword body-function.
-    ast = Base.uncompressed_ast(basemethod)
+    ast = uncompressed_ast(basemethod)
     f = nothing
     if isa(ast, Core.CodeInfo) && length(ast.code) >= 2
         callexpr = ast.code[end-1]
@@ -1363,9 +1575,11 @@ Determine whether two methods `m1` and `m2` may be ambiguous for some call
 signature. This test is performed in the context of other methods of the same
 function; in isolation, `m1` and `m2` might be ambiguous, but if a third method
 resolving the ambiguity has been defined, this returns `false`.
+Alternatively, in isolation `m1` and `m2` might be ordered, but if a third
+method cannot be sorted with them, they may cause an ambiguity together.
 
 For parametric types, the `ambiguous_bottom` keyword argument controls whether
-`Union{}` counts as an ambiguous intersection of type parameters – when `true`,
+`Union{}` counts as an ambiguous intersection of type parameters – when `true`,
 it is considered ambiguous, when `false` it is not.
 
 # Examples
@@ -1389,27 +1603,89 @@ false
 ```
 """
 function isambiguous(m1::Method, m2::Method; ambiguous_bottom::Bool=false)
-    # TODO: eagerly returning `morespecific` is wrong, and fails to consider
-    # the possibility of an ambiguity caused by a third method:
-    # see the precise algorithm in ml_matches for a more correct computation
-    if m1 === m2 || morespecific(m1.sig, m2.sig) || morespecific(m2.sig, m1.sig)
-        return false
-    end
+    m1 === m2 && return false
     ti = typeintersect(m1.sig, m2.sig)
-    (ti <: m1.sig && ti <: m2.sig) || return false # XXX: completely wrong, obviously
     ti === Bottom && return false
-    if !ambiguous_bottom
-        has_bottom_parameter(ti) && return false
-    end
-    matches = _methods_by_ftype(ti, -1, typemax(UInt))
-    for match in matches
-        m = match.method
-        m === m1 && continue
-        m === m2 && continue
-        if ti <: m.sig && morespecific(m.sig, m1.sig) && morespecific(m.sig, m2.sig)
+    function inner(ti)
+        ti === Bottom && return false
+        if !ambiguous_bottom
+            has_bottom_parameter(ti) && return false
+        end
+        world = get_world_counter()
+        min = Ref{UInt}(typemin(UInt))
+        max = Ref{UInt}(typemax(UInt))
+        has_ambig = Ref{Int32}(0)
+        ms = _methods_by_ftype(ti, nothing, -1, world, true, min, max, has_ambig)::Vector
+        has_ambig[] == 0 && return false
+        if !ambiguous_bottom
+            filter!(ms) do m::Core.MethodMatch
+                return !has_bottom_parameter(m.spec_types)
+            end
+        end
+        # if ml-matches reported the existence of an ambiguity over their
+        # intersection, see if both m1 and m2 may be involved in it
+        have_m1 = have_m2 = false
+        for match in ms
+            match = match::Core.MethodMatch
+            m = match.method
+            m === m1 && (have_m1 = true)
+            m === m2 && (have_m2 = true)
+        end
+        if !have_m1 || !have_m2
+            # ml-matches did not need both methods to expose the reported ambiguity
+            return false
+        end
+        if !ambiguous_bottom
+            # since we're intentionally ignoring certain ambiguities (via the
+            # filter call above), see if we can now declare the intersection fully
+            # covered even though it is partially ambiguous over Union{} as a type
+            # parameter somewhere
+            minmax = nothing
+            for match in ms
+                m = match.method
+                match.fully_covers || continue
+                if minmax === nothing || morespecific(m.sig, minmax.sig)
+                    minmax = m
+                end
+            end
+            if minmax === nothing
+                return true
+            end
+            for match in ms
+                m = match.method
+                m === minmax && continue
+                if match.fully_covers
+                    if !morespecific(minmax.sig, m.sig)
+                        return true
+                    end
+                else
+                    if morespecific(m.sig, minmax.sig)
+                        return true
+                    end
+                end
+            end
             return false
         end
+        return true
+    end
+    if !(ti <: m1.sig && ti <: m2.sig)
+        # When type-intersection fails, it's often also not commutative. Thus
+        # checking the reverse may allow detecting ambiguity solutions
+        # correctly in more cases (and faster).
+        ti2 = typeintersect(m2.sig, m1.sig)
+        if ti2 <: m1.sig && ti2 <: m2.sig
+            ti = ti2
+        elseif ti != ti2
+            # TODO: this would be the correct way to handle this case, but
+            #       people complained so we don't do it
+            # inner(ti2) || return false
+            return false # report that the type system failed to decide if it was ambiguous by saying they definitely aren't
+        else
+            return false # report that the type system failed to decide if it was ambiguous by saying they definitely aren't
+        end
     end
+    inner(ti) || return false
+    # otherwise type-intersection reported an ambiguity we couldn't solve
     return true
 end
 
@@ -1449,7 +1725,6 @@ min_world(m::Core.CodeInfo) = m.min_world
 max_world(m::Core.CodeInfo) = m.max_world
 get_world_counter() = ccall(:jl_get_world_counter, UInt, ())
 
-
 """
     propertynames(x, private=false)
 
@@ -1462,10 +1737,12 @@ as well to get the properties of an instance of the type.
 of the documented interface of `x`.   If you want it to also return "private"
 fieldnames intended for internal use, pass `true` for the optional second argument.
 REPL tab completion on `x.` shows only the `private=false` properties.
+
+See also: [`hasproperty`](@ref), [`hasfield`](@ref).
 """
 propertynames(x) = fieldnames(typeof(x))
 propertynames(m::Module) = names(m)
-propertynames(x, private) = propertynames(x) # ignore private flag by default
+propertynames(x, private::Bool) = propertynames(x) # ignore private flag by default
 
 """
     hasproperty(x, s::Symbol)
@@ -1474,5 +1751,70 @@ Return a boolean indicating whether the object `x` has `s` as one of its own pro
 
 !!! compat "Julia 1.2"
      This function requires at least Julia 1.2.
+
+See also: [`propertynames`](@ref), [`hasfield`](@ref).
 """
 hasproperty(x, s::Symbol) = s in propertynames(x)
+
+"""
+    @invoke f(arg::T, ...; kwargs...)
+
+Provides a convenient way to call [`invoke`](@ref);
+`@invoke f(arg1::T1, arg2::T2; kwargs...)` will be expanded into `invoke(f, Tuple{T1,T2}, arg1, arg2; kwargs...)`.
+When an argument's type annotation is omitted, it's specified as `Any` argument, e.g.
+`@invoke f(arg1::T, arg2)` will be expanded into `invoke(f, Tuple{T,Any}, arg1, arg2)`.
+
+!!! compat "Julia 1.7"
+    This macro requires Julia 1.7 or later.
+"""
+macro invoke(ex)
+    f, args, kwargs = destructure_callex(ex)
+    newargs, newargtypes = Any[], Any[]
+    for i = 1:length(args)
+        x = args[i]
+        if isexpr(x, :(::))
+            a = x.args[1]
+            t = x.args[2]
+        else
+            a = x
+            t = GlobalRef(Core, :Any)
+        end
+        push!(newargs, a)
+        push!(newargtypes, t)
+    end
+    return esc(:($(GlobalRef(Core, :invoke))($(f), Tuple{$(newargtypes...)}, $(newargs...); $(kwargs...))))
+end
+
+"""
+    @invokelatest f(args...; kwargs...)
+
+Provides a convenient way to call [`Base.invokelatest`](@ref).
+`@invokelatest f(args...; kwargs...)` will simply be expanded into
+`Base.invokelatest(f, args...; kwargs...)`.
+
+!!! compat "Julia 1.7"
+    This macro requires Julia 1.7 or later.
+"""
+macro invokelatest(ex)
+    f, args, kwargs = destructure_callex(ex)
+    return esc(:($(GlobalRef(@__MODULE__, :invokelatest))($(f), $(args...); $(kwargs...))))
+end
+
+function destructure_callex(ex)
+    isexpr(ex, :call) || throw(ArgumentError("a call expression f(args...; kwargs...) should be given"))
+
+    f = first(ex.args)
+    args = []
+    kwargs = []
+    for x in ex.args[2:end]
+        if isexpr(x, :parameters)
+            append!(kwargs, x.args)
+        elseif isexpr(x, :kw)
+            push!(kwargs, x)
+        else
+            push!(args, x)
+        end
+    end
+
+    return f, args, kwargs
+end
diff --git a/base/refpointer.jl b/base/refpointer.jl
index 8ce30c6bded3a0..cd179c87b30d5d 100644
--- a/base/refpointer.jl
+++ b/base/refpointer.jl
@@ -24,6 +24,15 @@ If `T` is a bitstype, `isassigned(Ref{T}())` will always be true.
 
 When passed as a `ccall` argument (either as a `Ptr` or `Ref` type), a `Ref`
 object will be converted to a native pointer to the data it references.
+For most `T`, or when converted to a `Ptr{Cvoid}`, this is a pointer to the
+object data. When `T` is an `isbits` type, this value may be safely mutated,
+otherwise mutation is strictly undefined behavior.
+
+As a special case, setting `T = Any` will instead cause the creation of a
+pointer to the reference itself when converted to a `Ptr{Any}`
+(a `jl_value_t const* const*` if T is immutable, else a `jl_value_t *const *`).
+When converted to a `Ptr{Cvoid}`, it will still return a pointer to the data
+region as for any other `T`.
 
 A `C_NULL` instance of `Ptr` can be passed to a `ccall` `Ref` argument to initialize it.
 
@@ -105,21 +114,20 @@ RefArray(x::AbstractArray{T}, i::Int, roots::Any) where {T} = RefArray{T,typeof(
 RefArray(x::AbstractArray{T}, i::Int=1, roots::Nothing=nothing) where {T} = RefArray{T,typeof(x),Nothing}(x, i, nothing)
 convert(::Type{Ref{T}}, x::AbstractArray{T}) where {T} = RefArray(x, 1)
 
-function unsafe_convert(P::Type{Ptr{T}}, b::RefArray{T}) where T
+function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefArray{T})::P where T
     if allocatedinline(T)
         p = pointer(b.x, b.i)
-    elseif isconcretetype(T) && T.mutable
+    elseif isconcretetype(T) && ismutabletype(T)
         p = pointer_from_objref(b.x[b.i])
     else
         # see comment on equivalent branch for RefValue
         p = pointerref(Ptr{Ptr{Cvoid}}(pointer(b.x, b.i)), 1, Core.sizeof(Ptr{Cvoid}))
     end
-    return convert(P, p)
+    return p
 end
-function unsafe_convert(P::Type{Ptr{Any}}, b::RefArray{Any})
-    return convert(P, pointer(b.x, b.i))
+function unsafe_convert(::Type{Ptr{Any}}, b::RefArray{Any})::Ptr{Any}
+    return pointer(b.x, b.i)
 end
-unsafe_convert(::Type{Ptr{Cvoid}}, b::RefArray{T}) where {T} = convert(Ptr{Cvoid}, unsafe_convert(Ptr{T}, b))
 
 ###
 if is_primary_base_module
diff --git a/base/refvalue.jl b/base/refvalue.jl
index 47659ddd31723e..7cbb651d41aee7 100644
--- a/base/refvalue.jl
+++ b/base/refvalue.jl
@@ -35,10 +35,10 @@ true
 """
 isassigned(x::RefValue) = isdefined(x, :x)
 
-function unsafe_convert(P::Type{Ptr{T}}, b::RefValue{T}) where T
+function unsafe_convert(P::Union{Type{Ptr{T}},Type{Ptr{Cvoid}}}, b::RefValue{T})::P where T
     if allocatedinline(T)
         p = pointer_from_objref(b)
-    elseif isconcretetype(T) && T.mutable
+    elseif isconcretetype(T) && ismutabletype(T)
         p = pointer_from_objref(b.x)
     else
         # If the slot is not leaf type, it could be either immutable or not.
@@ -47,12 +47,11 @@ function unsafe_convert(P::Type{Ptr{T}}, b::RefValue{T}) where T
         # which also ensures this returns same pointer as the one rooted in the `RefValue` object.
         p = pointerref(Ptr{Ptr{Cvoid}}(pointer_from_objref(b)), 1, Core.sizeof(Ptr{Cvoid}))
     end
-    return convert(P, p)::Ptr{T}
+    return p
 end
-function unsafe_convert(P::Type{Ptr{Any}}, b::RefValue{Any})
-    return convert(P, pointer_from_objref(b))::Ptr{Any}
+function unsafe_convert(::Type{Ptr{Any}}, b::RefValue{Any})::Ptr{Any}
+    return pointer_from_objref(b)
 end
-unsafe_convert(::Type{Ptr{Cvoid}}, b::RefValue{T}) where {T} = convert(Ptr{Cvoid}, unsafe_convert(Ptr{T}, b))::Ptr{Cvoid}
 
 getindex(b::RefValue) = b.x
 setindex!(b::RefValue, x) = (b.x = x; b)
diff --git a/base/regex.jl b/base/regex.jl
index 75c3777fd681a0..6433eab40006de 100644
--- a/base/regex.jl
+++ b/base/regex.jl
@@ -4,9 +4,16 @@
 
 include("pcre.jl")
 
-const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.NO_UTF_CHECK | PCRE.ALT_BSUX | PCRE.UCP
+const DEFAULT_COMPILER_OPTS = PCRE.UTF | PCRE.MATCH_INVALID_UTF | PCRE.ALT_BSUX | PCRE.UCP
 const DEFAULT_MATCH_OPTS = PCRE.NO_UTF_CHECK
 
+"""
+An abstract type representing any sort of pattern matching expression
+(typically a regular expression). `AbstractPattern` objects can be used to
+match strings with [`match`](@ref).
+"""
+abstract type AbstractPattern end
+
 """
     Regex(pattern[, flags])
 
@@ -16,8 +23,11 @@ with [`match`](@ref).
 `Regex` objects can be created using the [`@r_str`](@ref) string macro. The
 `Regex(pattern[, flags])` constructor is usually used if the `pattern` string needs
 to be interpolated. See the documentation of the string macro for details on flags.
+
+!!! note
+    To escape interpolated variables use `\\Q` and `\\E` (e.g. `Regex("\\\\Q\$x\\\\E")`)
 """
-mutable struct Regex
+mutable struct Regex <: AbstractPattern
     pattern::String
     compile_options::UInt32
     match_options::UInt32
@@ -97,7 +107,7 @@ listed after the ending quote, to change its behaviour:
   `\\s`, `\\W`, `\\w`, etc. match based on Unicode character properties. With this option,
   these sequences only match ASCII characters.
 
-See `Regex` if interpolation is needed.
+See [`Regex`](@ref) if interpolation is needed.
 
 # Examples
 ```jldoctest
@@ -112,8 +122,9 @@ function show(io::IO, re::Regex)
     imsxa = PCRE.CASELESS|PCRE.MULTILINE|PCRE.DOTALL|PCRE.EXTENDED|PCRE.UCP
     opts = re.compile_options
     if (opts & ~imsxa) == (DEFAULT_COMPILER_OPTS & ~imsxa)
-        print(io, 'r')
-        print_quoted_literal(io, re.pattern)
+        print(io, "r\"")
+        escape_raw_string(io, re.pattern)
+        print(io, "\"")
         if (opts & PCRE.CASELESS ) != 0; print(io, 'i'); end
         if (opts & PCRE.MULTILINE) != 0; print(io, 'm'); end
         if (opts & PCRE.DOTALL   ) != 0; print(io, 's'); end
@@ -128,10 +139,54 @@ function show(io::IO, re::Regex)
     end
 end
 
-# TODO: map offsets into strings in other encodings back to original indices.
-# or maybe it's better to just fail since that would be quite slow
+"""
+`AbstractMatch` objects are used to represent information about matches found
+in a string using an `AbstractPattern`.
+"""
+abstract type AbstractMatch end
+
+"""
+    RegexMatch
+
+A type representing a single match to a `Regex` found in a string.
+Typically created from the [`match`](@ref) function.
+
+The `match` field stores the substring of the entire matched string.
+The `captures` field stores the substrings for each capture group, indexed by number.
+To index by capture group name, the entire match object should be indexed instead,
+as shown in the examples.
+The location of the start of the match is stored in the `offset` field.
+The `offsets` field stores the locations of the start of each capture group,
+with 0 denoting a group that was not captured.
+
+This type can be used as an iterator over the capture groups of the `Regex`,
+yielding the substrings captured in each group.
+Because of this, the captures of a match can be destructured.
+If a group was not captured, `nothing` will be yielded instead of a substring.
+
+Methods that accept a `RegexMatch` object are defined for [`iterate`](@ref),
+[`length`](@ref), [`eltype`](@ref), [`keys`](@ref keys(::RegexMatch)), [`haskey`](@ref), and
+[`getindex`](@ref), where keys are the the names or numbers of a capture group.
+See [`keys`](@ref keys(::RegexMatch)) for more information.
+
+# Examples
+```jldoctest
+julia> m = match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30 in the morning")
+RegexMatch("11:30", hour="11", minute="30", 3=nothing)
+
+julia> hr, min, ampm = m;
+
+julia> hr
+"11"
 
-struct RegexMatch
+julia> m["minute"]
+"30"
+
+julia> m.match
+"11:30"
+```
+"""
+struct RegexMatch <: AbstractMatch
     match::SubString{String}
     captures::Vector{Union{Nothing,SubString{String}}}
     offset::Int
@@ -139,19 +194,46 @@ struct RegexMatch
     regex::Regex
 end
 
+"""
+    keys(m::RegexMatch) -> Vector
+
+Return a vector of keys for all capture groups of the underlying regex.
+A key is included even if the capture group fails to match.
+That is, `idx` will be in the return value even if `m[idx] == nothing`.
+
+Unnamed capture groups will have integer keys corresponding to their index.
+Named capture groups will have string keys.
+
+!!! compat "Julia 1.6"
+    This method was added in Julia 1.6
+
+# Examples
+```jldoctest
+julia> keys(match(r"(?<hour>\\d+):(?<minute>\\d+)(am|pm)?", "11:30"))
+3-element Vector{Any}:
+  "hour"
+  "minute"
+ 3
+```
+"""
+function keys(m::RegexMatch)
+    idx_to_capture_name = PCRE.capture_names(m.regex.regex)
+    return map(eachindex(m.captures)) do i
+        # If the capture group is named, return it's name, else return it's index
+        get(idx_to_capture_name, i, i)
+    end
+end
+
 function show(io::IO, m::RegexMatch)
     print(io, "RegexMatch(")
     show(io, m.match)
-    idx_to_capture_name = PCRE.capture_names(m.regex.regex)
-    if !isempty(m.captures)
+    capture_keys = keys(m)
+    if !isempty(capture_keys)
         print(io, ", ")
-        for i = 1:length(m.captures)
-            # If the capture group is named, show the name.
-            # Otherwise show its index.
-            capture_name = get(idx_to_capture_name, i, i)
+        for (i, capture_name) in enumerate(capture_keys)
             print(io, capture_name, "=")
             show(io, m.captures[i])
-            if i < length(m.captures)
+            if i < length(m)
                 print(io, ", ")
             end
         end
@@ -175,6 +257,10 @@ function haskey(m::RegexMatch, name::Symbol)
 end
 haskey(m::RegexMatch, name::AbstractString) = haskey(m, Symbol(name))
 
+iterate(m::RegexMatch, args...) = iterate(m.captures, args...)
+length(m::RegexMatch) = length(m.captures)
+eltype(m::RegexMatch) = eltype(m.captures)
+
 function occursin(r::Regex, s::AbstractString; offset::Integer=0)
     compile(r)
     return PCRE.exec_r(r.regex, String(s), offset, r.match_options)
@@ -199,7 +285,7 @@ Return `true` if `s` starts with the regex pattern, `prefix`.
 See also [`occursin`](@ref) and [`endswith`](@ref).
 
 !!! compat "Julia 1.2"
-     This method requires at least Julia 1.2.
+    This method requires at least Julia 1.2.
 
 # Examples
 ```jldoctest
@@ -231,7 +317,7 @@ Return `true` if `s` ends with the regex pattern, `suffix`.
 See also [`occursin`](@ref) and [`startswith`](@ref).
 
 !!! compat "Julia 1.2"
-     This method requires at least Julia 1.2.
+    This method requires at least Julia 1.2.
 
 # Examples
 ```jldoctest
@@ -249,10 +335,24 @@ function endswith(s::SubString, r::Regex)
     return PCRE.exec_r(r.regex, s, 0, r.match_options | PCRE.ENDANCHORED)
 end
 
+function chopprefix(s::AbstractString, prefix::Regex)
+    m = match(prefix, s, firstindex(s), PCRE.ANCHORED)
+    m === nothing && return SubString(s)
+    return SubString(s, ncodeunits(m.match) + 1)
+end
+
+function chopsuffix(s::AbstractString, suffix::Regex)
+    m = match(suffix, s, firstindex(s), PCRE.ENDANCHORED)
+    m === nothing && return SubString(s)
+    isempty(m.match) && return SubString(s)
+    return SubString(s, firstindex(s), prevind(s, m.offset))
+end
+
+
 """
     match(r::Regex, s::AbstractString[, idx::Integer[, addopts]])
 
-Search for the first match of the regular expression `r` in `s` and return a `RegexMatch`
+Search for the first match of the regular expression `r` in `s` and return a [`RegexMatch`](@ref)
 object containing the match, or nothing if the match failed. The matching substring can be
 retrieved by accessing `m.match` and the captured sequences can be retrieved by accessing
 `m.captures` The optional `idx` argument specifies an index at which to start the search.
@@ -278,7 +378,8 @@ true
 """
 function match end
 
-function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer, add_opts::UInt32=UInt32(0))
+function match(re::Regex, str::Union{SubString{String}, String}, idx::Integer,
+               add_opts::UInt32=UInt32(0))
     compile(re)
     opts = re.match_options | add_opts
     matched, data = PCRE.exec_r_data(re.regex, str, idx-1, opts)
@@ -336,7 +437,7 @@ findfirst(r::Regex, s::AbstractString) = findnext(r,s,firstindex(s))
 
 """
     findall(
-        pattern::Union{AbstractString,Regex},
+        pattern::Union{AbstractString,AbstractPattern},
         string::AbstractString;
         overlap::Bool = false,
     )
@@ -364,8 +465,11 @@ julia> findall("a", "banana")
  4:4
  6:6
 ```
+
+!!! compat "Julia 1.3"
+     This method requires at least Julia 1.3.
 """
-function findall(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
+function findall(t::Union{AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     found = UnitRange{Int}[]
     i, e = firstindex(s), lastindex(s)
     while true
@@ -379,9 +483,29 @@ function findall(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Boo
     return found
 end
 
+"""
+    findall(c::AbstractChar, s::AbstractString)
+
+Return a vector `I` of the indices of `s` where `s[i] == c`. If there are no such
+elements in `s`, return an empty array.
+
+# Examples
+```jldoctest
+julia> findall('a', "batman")
+2-element Vector{Int64}:
+ 2
+ 5
+```
+
+!!! compat "Julia 1.7"
+     This method requires at least Julia 1.7.
+"""
+findall(c::AbstractChar, s::AbstractString) = findall(isequal(c),s)
+
+
 """
     count(
-        pattern::Union{AbstractString,Regex},
+        pattern::Union{AbstractChar,AbstractString,AbstractPattern},
         string::AbstractString;
         overlap::Bool = false,
     )
@@ -391,8 +515,14 @@ calling `length(findall(pattern, string))` but more efficient.
 
 If `overlap=true`, the matching sequences are allowed to overlap indices in the
 original string, otherwise they must be from disjoint character ranges.
+
+!!! compat "Julia 1.3"
+     This method requires at least Julia 1.3.
+
+!!! compat "Julia 1.7"
+      Using a character as the pattern requires at least Julia 1.7.
 """
-function count(t::Union{AbstractString,Regex}, s::AbstractString; overlap::Bool=false)
+function count(t::Union{AbstractChar,AbstractString,AbstractPattern}, s::AbstractString; overlap::Bool=false)
     n = 0
     i, e = firstindex(s), lastindex(s)
     while true
@@ -412,18 +542,17 @@ end
 Stores the given string `substr` as a `SubstitutionString`, for use in regular expression
 substitutions. Most commonly constructed using the [`@s_str`](@ref) macro.
 
+# Examples
 ```jldoctest
 julia> SubstitutionString("Hello \\\\g<name>, it's \\\\1")
-s"Hello \\\\g<name>, it's \\\\1"
+s"Hello \\g<name>, it's \\1"
 
 julia> subst = s"Hello \\g<name>, it's \\1"
-s"Hello \\\\g<name>, it's \\\\1"
+s"Hello \\g<name>, it's \\1"
 
 julia> typeof(subst)
 SubstitutionString{String}
-
 ```
-
 """
 struct SubstitutionString{T<:AbstractString} <: AbstractString
     string::T
@@ -436,8 +565,9 @@ isvalid(s::SubstitutionString, i::Integer) = isvalid(s.string, i)::Bool
 iterate(s::SubstitutionString, i::Integer...) = iterate(s.string, i...)::Union{Nothing,Tuple{AbstractChar,Int}}
 
 function show(io::IO, s::SubstitutionString)
-    print(io, "s")
-    show(io, s.string)
+    print(io, "s\"")
+    escape_raw_string(io, s.string)
+    print(io, "\"")
 end
 
 """
@@ -447,6 +577,7 @@ Construct a substitution string, used for regular expression substitutions.  Wit
 string, sequences of the form `\\N` refer to the Nth capture group in the regex, and
 `\\g<groupname>` refers to a named capture group with name `groupname`.
 
+# Examples
 ```jldoctest
 julia> msg = "#Hello# from Julia";
 
@@ -472,13 +603,20 @@ _free_pat_replacer(r::RegexAndMatchData) = PCRE.free_match_data(r.match_data)
 
 replace_err(repl) = error("Bad replacement string: $repl")
 
-function _write_capture(io, re::RegexAndMatchData, group)
+function _write_capture(io::IO, group::Int, str, r, re::RegexAndMatchData)
     len = PCRE.substring_length_bynumber(re.match_data, group)
+    # in the case of an optional group that doesn't match, len == 0
+    len == 0 && return
     ensureroom(io, len+1)
     PCRE.substring_copy_bynumber(re.match_data, group,
         pointer(io.data, io.ptr), len+1)
     io.ptr += len
     io.size = max(io.size, io.ptr - 1)
+    nothing
+end
+function _write_capture(io::IO, group::Int, str, r, re)
+    group == 0 || replace_err("pattern is not a Regex")
+    return print(io, SubString(str, r))
 end
 
 
@@ -486,7 +624,7 @@ const SUB_CHAR = '\\'
 const GROUP_CHAR = 'g'
 const KEEP_ESC = [SUB_CHAR, GROUP_CHAR, '0':'9'...]
 
-function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
+function _replace(io, repl_s::SubstitutionString, str, r, re)
     LBRACKET = '<'
     RBRACKET = '>'
     repl = unescape_string(repl_s.string, KEEP_ESC)
@@ -510,7 +648,7 @@ function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
                         break
                     end
                 end
-                _write_capture(io, re, group)
+                _write_capture(io, group, str, r, re)
             elseif repl[next_i] == GROUP_CHAR
                 i = nextind(repl, next_i)
                 if i > e || repl[i] != LBRACKET
@@ -523,15 +661,16 @@ function _replace(io, repl_s::SubstitutionString, str, r, re::RegexAndMatchData)
                     i = nextind(repl, i)
                     i > e && replace_err(repl)
                 end
-                #  TODO: avoid this allocation
                 groupname = SubString(repl, groupstart, prevind(repl, i))
                 if all(isdigit, groupname)
-                    _write_capture(io, re, parse(Int, groupname))
-                else
+                    group = parse(Int, groupname)
+                elseif re isa RegexAndMatchData
                     group = PCRE.substring_number_from_name(re.re.regex, groupname)
                     group < 0 && replace_err("Group $groupname not found in regex $(re.re)")
-                    _write_capture(io, re, group)
+                else
+                    group = -1
                 end
+                _write_capture(io, group, str, r, re)
                 i = nextind(repl, i)
             else
                 replace_err(repl)
@@ -643,7 +782,7 @@ meaning that the contained characters are devoid of any special meaning
 (they are quoted with "\\Q" and "\\E").
 
 !!! compat "Julia 1.3"
-     This method requires at least Julia 1.3.
+    This method requires at least Julia 1.3.
 
 # Examples
 ```jldoctest
@@ -722,7 +861,7 @@ end
 Repeat a regex `n` times.
 
 !!! compat "Julia 1.3"
-     This method requires at least Julia 1.3.
+    This method requires at least Julia 1.3.
 
 # Examples
 ```jldoctest
diff --git a/base/reinterpretarray.jl b/base/reinterpretarray.jl
index 4a11a4d55ae7d3..7dc6607285fd0a 100644
--- a/base/reinterpretarray.jl
+++ b/base/reinterpretarray.jl
@@ -3,53 +3,143 @@
 """
 Gives a reinterpreted view (of element type T) of the underlying array (of element type S).
 If the size of `T` differs from the size of `S`, the array will be compressed/expanded in
-the first dimension.
+the first dimension. The variant `reinterpret(reshape, T, a)` instead adds or consumes the first dimension
+depending on the ratio of element sizes.
 """
-struct ReinterpretArray{T,N,S,A<:AbstractArray{S, N}} <: AbstractArray{T, N}
+struct ReinterpretArray{T,N,S,A<:AbstractArray{S},IsReshaped} <: AbstractArray{T, N}
     parent::A
     readable::Bool
     writable::Bool
+
+    function throwbits(S::Type, T::Type, U::Type)
+        @noinline
+        throw(ArgumentError("cannot reinterpret `$(S)` as `$(T)`, type `$(U)` is not a bits type"))
+    end
+    function throwsize0(S::Type, T::Type, msg)
+        @noinline
+        throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a $msg size"))
+    end
+    function throwsingleton(S::Type, T::Type, kind)
+        @noinline
+        throw(ArgumentError("cannot reinterpret $kind `$(S)` array to `$(T)` which is a singleton type"))
+    end
+
     global reinterpret
     function reinterpret(::Type{T}, a::A) where {T,N,S,A<:AbstractArray{S, N}}
-        function throwbits(::Type{S}, ::Type{T}, ::Type{U}) where {S,T,U}
-            @_noinline_meta
-            throw(ArgumentError("cannot reinterpret `$(S)` `$(T)`, type `$(U)` is not a bits type"))
-        end
-        function throwsize0(::Type{S}, ::Type{T})
-            @_noinline_meta
-            throw(ArgumentError("cannot reinterpret a zero-dimensional `$(S)` array to `$(T)` which is of a different size"))
-        end
-        function thrownonint(::Type{S}, ::Type{T}, dim)
-            @_noinline_meta
+        function thrownonint(S::Type, T::Type, dim)
+            @noinline
             throw(ArgumentError("""
                 cannot reinterpret an `$(S)` array to `$(T)` whose first dimension has size `$(dim)`.
                 The resulting array would have non-integral first dimension.
                 """))
         end
-        function throwaxes1(::Type{S}, ::Type{T}, ax1)
-            @_noinline_meta
+        function throwaxes1(S::Type, T::Type, ax1)
+            @noinline
             throw(ArgumentError("cannot reinterpret a `$(S)` array to `$(T)` when the first axis is $ax1. Try reshaping first."))
         end
         isbitstype(T) || throwbits(S, T, T)
         isbitstype(S) || throwbits(S, T, S)
-        (N != 0 || sizeof(T) == sizeof(S)) || throwsize0(S, T)
+        (N != 0 || sizeof(T) == sizeof(S)) || throwsize0(S, T, "different")
         if N != 0 && sizeof(S) != sizeof(T)
             ax1 = axes(a)[1]
             dim = length(ax1)
-            rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
+            if issingletontype(T)
+                dim == 0 || throwsingleton(S, T, "a non-empty")
+            else
+                rem(dim*sizeof(S),sizeof(T)) == 0 || thrownonint(S, T, dim)
+            end
             first(ax1) == 1 || throwaxes1(S, T, ax1)
         end
         readable = array_subpadding(T, S)
         writable = array_subpadding(S, T)
-        new{T, N, S, A}(a, readable, writable)
+        new{T, N, S, A, false}(a, readable, writable)
+    end
+    reinterpret(::Type{T}, a::AbstractArray{T}) where {T} = a
+
+    # With reshaping
+    function reinterpret(::typeof(reshape), ::Type{T}, a::A) where {T,S,A<:AbstractArray{S}}
+        function throwintmult(S::Type, T::Type)
+            @noinline
+            throw(ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got $(sizeof(T))) and `sizeof(eltype(a))` (got $(sizeof(S))) be an integer multiple of the other"))
+        end
+        function throwsize1(a::AbstractArray, T::Type)
+            @noinline
+            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $(eltype(a)) requires that `axes(a, 1)` (got $(axes(a, 1))) be equal to 1:$(sizeof(T) ÷ sizeof(eltype(a))) (from the ratio of element sizes)"))
+        end
+        function throwfromsingleton(S, T)
+            @noinline
+            throw(ArgumentError("`reinterpret(reshape, $T, a)` where `eltype(a)` is $S requires that $T be a singleton type, since $S is one"))
+        end
+        isbitstype(T) || throwbits(S, T, T)
+        isbitstype(S) || throwbits(S, T, S)
+        if sizeof(S) == sizeof(T)
+            N = ndims(a)
+        elseif sizeof(S) > sizeof(T)
+            issingletontype(T) && throwsingleton(S, T, "with reshape a")
+            rem(sizeof(S), sizeof(T)) == 0 || throwintmult(S, T)
+            N = ndims(a) + 1
+        else
+            issingletontype(S) && throwfromsingleton(S, T)
+            rem(sizeof(T), sizeof(S)) == 0 || throwintmult(S, T)
+            N = ndims(a) - 1
+            N > -1 || throwsize0(S, T, "larger")
+            axes(a, 1) == OneTo(sizeof(T) ÷ sizeof(S)) || throwsize1(a, T)
+        end
+        readable = array_subpadding(T, S)
+        writable = array_subpadding(S, T)
+        new{T, N, S, A, true}(a, readable, writable)
     end
+    reinterpret(::typeof(reshape), ::Type{T}, a::AbstractArray{T}) where {T} = a
 end
 
-reinterpret(::Type{T}, a::ReinterpretArray) where {T} = reinterpret(T, a.parent)
+ReshapedReinterpretArray{T,N,S,A<:AbstractArray{S}} = ReinterpretArray{T,N,S,A,true}
+NonReshapedReinterpretArray{T,N,S,A<:AbstractArray{S, N}} = ReinterpretArray{T,N,S,A,false}
+
+"""
+    reinterpret(reshape, T, A::AbstractArray{S}) -> B
+
+Change the type-interpretation of `A` while consuming or adding a "channel dimension."
+
+If `sizeof(T) = n*sizeof(S)` for `n>1`, `A`'s first dimension must be
+of size `n` and `B` lacks `A`'s first dimension. Conversely, if `sizeof(S) = n*sizeof(T)` for `n>1`,
+`B` gets a new first dimension of size `n`. The dimensionality is unchanged if `sizeof(T) == sizeof(S)`.
+
+!!! compat "Julia 1.6"
+    This method requires at least Julia 1.6.
+
+# Examples
+
+```jldoctest
+julia> A = [1 2; 3 4]
+2×2 Matrix{$Int}:
+ 1  2
+ 3  4
+
+julia> reinterpret(reshape, Complex{Int}, A)    # the result is a vector
+2-element reinterpret(reshape, Complex{$Int}, ::Matrix{$Int}) with eltype Complex{$Int}:
+ 1 + 3im
+ 2 + 4im
+
+julia> a = [(1,2,3), (4,5,6)]
+2-element Vector{Tuple{$Int, $Int, $Int}}:
+ (1, 2, 3)
+ (4, 5, 6)
+
+julia> reinterpret(reshape, Int, a)             # the result is a matrix
+3×2 reinterpret(reshape, $Int, ::Vector{Tuple{$Int, $Int, $Int}}) with eltype $Int:
+ 1  4
+ 2  5
+ 3  6
+```
+"""
+reinterpret(::typeof(reshape), T::Type, a::AbstractArray)
+
+reinterpret(::Type{T}, a::NonReshapedReinterpretArray) where {T} = reinterpret(T, a.parent)
+reinterpret(::typeof(reshape), ::Type{T}, a::ReshapedReinterpretArray) where {T} = reinterpret(reshape, T, a.parent)
 
 # Definition of StridedArray
 StridedFastContiguousSubArray{T,N,A<:DenseArray} = FastContiguousSubArray{T,N,A}
-StridedReinterpretArray{T,N,A<:Union{DenseArray,StridedFastContiguousSubArray}} = ReinterpretArray{T,N,S,A} where S
+StridedReinterpretArray{T,N,A<:Union{DenseArray,StridedFastContiguousSubArray},IsReshaped} = ReinterpretArray{T,N,S,A,IsReshaped} where S
 StridedReshapedArray{T,N,A<:Union{DenseArray,StridedFastContiguousSubArray,StridedReinterpretArray}} = ReshapedArray{T,N,A}
 StridedSubArray{T,N,A<:Union{DenseArray,StridedReshapedArray,StridedReinterpretArray},
     I<:Tuple{Vararg{Union{RangeIndex, ReshapedUnitRange, AbstractCartesianIndex}}}} = SubArray{T,N,A,I}
@@ -58,33 +148,43 @@ StridedVector{T} = StridedArray{T,1}
 StridedMatrix{T} = StridedArray{T,2}
 StridedVecOrMat{T} = Union{StridedVector{T}, StridedMatrix{T}}
 
-# the definition of strides for Array{T,N} is tuple() if N = 0, otherwise it is
-# a tuple containing 1 and a cumulative product of the first N-1 sizes
-# this definition is also used for StridedReshapedArray and StridedReinterpretedArray
-# which have the same memory storage as Array
-stride(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}, i::Int) = _stride(a, i)
+strides(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}) = size_to_strides(1, size(a)...)
+stride(A::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}, k::Integer) =
+    k ≤ ndims(A) ? strides(A)[k] : length(A)
+
+function strides(a::ReshapedReinterpretArray)
+    ap = parent(a)
+    els, elp = elsize(a), elsize(ap)
+    stp = strides(ap)
+    els == elp && return stp
+    els < elp && return (1, _checked_strides(stp, els, elp)...)
+    stp[1] == 1 || throw(ArgumentError("Parent must be contiguous in the 1st dimension!"))
+    return _checked_strides(tail(stp), els, elp)
+end
 
-function stride(a::ReinterpretArray, i::Int)
-    a.parent isa StridedArray || ArgumentError("Parent must be strided.") |> throw
-    return _stride(a, i)
+function strides(a::NonReshapedReinterpretArray)
+    ap = parent(a)
+    els, elp = elsize(a), elsize(ap)
+    stp = strides(ap)
+    els == elp && return stp
+    stp[1] == 1 || throw(ArgumentError("Parent must be contiguous in the 1st dimension!"))
+    return (1, _checked_strides(tail(stp), els, elp)...)
 end
 
-function _stride(a, i)
-    if i > ndims(a)
-        return length(a)
+@inline function _checked_strides(stp::Tuple, els::Integer, elp::Integer)
+    if elp > els && rem(elp, els) == 0
+        N = div(elp, els)
+        return map(i -> N * i, stp)
     end
-    s = 1
-    for n = 1:(i-1)
-        s *= size(a, n)
-    end
-    return s
+    drs = map(i -> divrem(elp * i, els), stp)
+    all(i->iszero(i[2]), drs) ||
+        throw(ArgumentError("Parent's strides could not be exactly divided!"))
+    map(first, drs)
 end
 
-function strides(a::ReinterpretArray)
-    a.parent isa StridedArray || ArgumentError("Parent must be strided.") |> throw
-    size_to_strides(1, size(a)...)
-end
-strides(a::Union{DenseArray,StridedReshapedArray,StridedReinterpretArray}) = size_to_strides(1, size(a)...)
+_checkcontiguous(::Type{Bool}, A::ReinterpretArray) = _checkcontiguous(Bool, parent(A))
+
+similar(a::ReinterpretArray, T::Type, d::Dims) = similar(a.parent, T, d)
 
 function check_readable(a::ReinterpretArray{T, N, S} where N) where {T,S}
     # See comment in check_writable
@@ -105,32 +205,146 @@ function check_writable(a::ReinterpretArray{T, N, S} where N) where {T,S}
     end
 end
 
-IndexStyle(a::ReinterpretArray) = IndexStyle(a.parent)
+## IndexStyle specializations
+
+# For `reinterpret(reshape, T, a)` where we're adding a channel dimension and with
+# `IndexStyle(a) == IndexLinear()`, it's advantageous to retain pseudo-linear indexing.
+struct IndexSCartesian2{K} <: IndexStyle end   # K = sizeof(S) ÷ sizeof(T), a static-sized 2d cartesian iterator
+
+IndexStyle(::Type{ReinterpretArray{T,N,S,A,false}}) where {T,N,S,A<:AbstractArray{S,N}} = IndexStyle(A)
+function IndexStyle(::Type{ReinterpretArray{T,N,S,A,true}}) where {T,N,S,A<:AbstractArray{S}}
+    if sizeof(T) < sizeof(S)
+        IndexStyle(A) === IndexLinear() && return IndexSCartesian2{sizeof(S) ÷ sizeof(T)}()
+        return IndexCartesian()
+    end
+    return IndexStyle(A)
+end
+IndexStyle(::IndexSCartesian2{K}, ::IndexSCartesian2{K}) where {K} = IndexSCartesian2{K}()
+
+struct SCartesianIndex2{K}   # can't make <:AbstractCartesianIndex without N, and 2 would be a bit misleading
+    i::Int
+    j::Int
+end
+to_index(i::SCartesianIndex2) = i
+
+struct SCartesianIndices2{K,R<:AbstractUnitRange{Int}} <: AbstractMatrix{SCartesianIndex2{K}}
+    indices2::R
+end
+SCartesianIndices2{K}(indices2::AbstractUnitRange{Int}) where {K} = (@assert K::Int > 1; SCartesianIndices2{K,typeof(indices2)}(indices2))
+
+eachindex(::IndexSCartesian2{K}, A::ReshapedReinterpretArray) where {K} = SCartesianIndices2{K}(eachindex(IndexLinear(), parent(A)))
+@inline function eachindex(style::IndexSCartesian2{K}, A::AbstractArray, B::AbstractArray...) where {K}
+    iter = eachindex(style, A)
+    _all_match_first(C->eachindex(style, C), iter, B...) || throw_eachindex_mismatch_indices(IndexSCartesian2{K}(), axes(A), axes.(B)...)
+    return iter
+end
+
+size(iter::SCartesianIndices2{K}) where K = (K, length(iter.indices2))
+axes(iter::SCartesianIndices2{K}) where K = (OneTo(K), iter.indices2)
+
+first(iter::SCartesianIndices2{K}) where {K} = SCartesianIndex2{K}(1, first(iter.indices2))
+last(iter::SCartesianIndices2{K}) where {K}  = SCartesianIndex2{K}(K, last(iter.indices2))
+
+@inline function getindex(iter::SCartesianIndices2{K}, i::Int, j::Int) where {K}
+    @boundscheck checkbounds(iter, i, j)
+    return SCartesianIndex2{K}(i, iter.indices2[j])
+end
+
+function iterate(iter::SCartesianIndices2{K}) where {K}
+    ret = iterate(iter.indices2)
+    ret === nothing && return nothing
+    item2, state2 = ret
+    return SCartesianIndex2{K}(1, item2), (1, item2, state2)
+end
+
+function iterate(iter::SCartesianIndices2{K}, (state1, item2, state2)) where {K}
+    if state1 < K
+        item1 = state1 + 1
+        return SCartesianIndex2{K}(item1, item2), (item1, item2, state2)
+    end
+    ret = iterate(iter.indices2, state2)
+    ret === nothing && return nothing
+    item2, state2 = ret
+    return SCartesianIndex2{K}(1, item2), (1, item2, state2)
+end
+
+SimdLoop.simd_outer_range(iter::SCartesianIndices2) = iter.indices2
+SimdLoop.simd_inner_length(::SCartesianIndices2{K}, ::Any) where K = K
+@inline function SimdLoop.simd_index(::SCartesianIndices2{K}, Ilast::Int, I1::Int) where {K}
+    SCartesianIndex2{K}(I1+1, Ilast)
+end
+
+_maybe_reshape(::IndexSCartesian2, A::ReshapedReinterpretArray, I...) = A
+
+# fallbacks
+function _getindex(::IndexSCartesian2, A::AbstractArray{T,N}, I::Vararg{Int, N}) where {T,N}
+    @_propagate_inbounds_meta
+    getindex(A, I...)
+end
+function _setindex!(::IndexSCartesian2, A::AbstractArray{T,N}, v, I::Vararg{Int, N}) where {T,N}
+    @_propagate_inbounds_meta
+    setindex!(A, v, I...)
+end
+# fallbacks for array types that use "pass-through" indexing (e.g., `IndexStyle(A) = IndexStyle(parent(A))`)
+# but which don't handle SCartesianIndex2
+function _getindex(::IndexSCartesian2, A::AbstractArray{T,N}, ind::SCartesianIndex2) where {T,N}
+    @_propagate_inbounds_meta
+    J = _ind2sub(tail(axes(A)), ind.j)
+    getindex(A, ind.i, J...)
+end
+function _setindex!(::IndexSCartesian2, A::AbstractArray{T,N}, v, ind::SCartesianIndex2) where {T,N}
+    @_propagate_inbounds_meta
+    J = _ind2sub(tail(axes(A)), ind.j)
+    setindex!(A, v, ind.i, J...)
+end
+eachindex(style::IndexSCartesian2, A::AbstractArray) = eachindex(style, parent(A))
+
+## AbstractArray interface
 
 parent(a::ReinterpretArray) = a.parent
 dataids(a::ReinterpretArray) = dataids(a.parent)
-unaliascopy(a::ReinterpretArray{T}) where {T} = reinterpret(T, unaliascopy(a.parent))
+unaliascopy(a::NonReshapedReinterpretArray{T}) where {T} = reinterpret(T, unaliascopy(a.parent))
+unaliascopy(a::ReshapedReinterpretArray{T}) where {T} = reinterpret(reshape, T, unaliascopy(a.parent))
 
-function size(a::ReinterpretArray{T,N,S} where {N}) where {T,S}
+function size(a::NonReshapedReinterpretArray{T,N,S} where {N}) where {T,S}
     psize = size(a.parent)
-    size1 = div(psize[1]*sizeof(S), sizeof(T))
+    size1 = issingletontype(T) ? psize[1] : div(psize[1]*sizeof(S), sizeof(T))
     tuple(size1, tail(psize)...)
 end
-size(a::ReinterpretArray{T,0}) where {T} = ()
+function size(a::ReshapedReinterpretArray{T,N,S} where {N}) where {T,S}
+    psize = size(a.parent)
+    sizeof(S) > sizeof(T) && return (div(sizeof(S), sizeof(T)), psize...)
+    sizeof(S) < sizeof(T) && return tail(psize)
+    return psize
+end
+size(a::NonReshapedReinterpretArray{T,0}) where {T} = ()
 
-function axes(a::ReinterpretArray{T,N,S} where {N}) where {T,S}
+function axes(a::NonReshapedReinterpretArray{T,N,S} where {N}) where {T,S}
     paxs = axes(a.parent)
     f, l = first(paxs[1]), length(paxs[1])
-    size1 = div(l*sizeof(S), sizeof(T))
+    size1 = issingletontype(T) ? l : div(l*sizeof(S), sizeof(T))
     tuple(oftype(paxs[1], f:f+size1-1), tail(paxs)...)
 end
-axes(a::ReinterpretArray{T,0}) where {T} = ()
+function axes(a::ReshapedReinterpretArray{T,N,S} where {N}) where {T,S}
+    paxs = axes(a.parent)
+    sizeof(S) > sizeof(T) && return (OneTo(div(sizeof(S), sizeof(T))), paxs...)
+    sizeof(S) < sizeof(T) && return tail(paxs)
+    return paxs
+end
+axes(a::NonReshapedReinterpretArray{T,0}) where {T} = ()
 
 elsize(::Type{<:ReinterpretArray{T}}) where {T} = sizeof(T)
 unsafe_convert(::Type{Ptr{T}}, a::ReinterpretArray{T,N,S} where N) where {T,S} = Ptr{T}(unsafe_convert(Ptr{S},a.parent))
 
-@inline @propagate_inbounds getindex(a::ReinterpretArray{T,0}) where {T} = reinterpret(T, a.parent[])
-@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[1]
+@inline @propagate_inbounds function getindex(a::NonReshapedReinterpretArray{T,0,S}) where {T,S}
+    if isprimitivetype(T) && isprimitivetype(S)
+        reinterpret(T, a.parent[])
+    else
+        a[firstindex(a)]
+    end
+end
+
+@inline @propagate_inbounds getindex(a::ReinterpretArray) = a[firstindex(a)]
 
 @inline @propagate_inbounds function getindex(a::ReinterpretArray{T,N,S}, inds::Vararg{Int, N}) where {T,N,S}
     check_readable(a)
@@ -145,41 +359,62 @@ end
     # Convert to full indices here, to avoid needing multiple conversions in
     # the loop in _getindex_ra
     inds = _to_subscript_indices(a, i)
-    _getindex_ra(a, inds[1], tail(inds))
+    isempty(inds) ? _getindex_ra(a, 1, ()) : _getindex_ra(a, inds[1], tail(inds))
+end
+
+@inline @propagate_inbounds function getindex(a::ReshapedReinterpretArray{T,N,S}, ind::SCartesianIndex2) where {T,N,S}
+    check_readable(a)
+    s = Ref{S}(a.parent[ind.j])
+    GC.@preserve s begin
+        tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+        return unsafe_load(tptr, ind.i)
+    end
 end
 
 @inline _memcpy!(dst, src, n) = ccall(:memcpy, Cvoid, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), dst, src, n)
 
-@inline @propagate_inbounds function _getindex_ra(a::ReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline @propagate_inbounds function _getindex_ra(a::NonReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
+        if issingletontype(T) # singleton types
+            @boundscheck checkbounds(a, i1, tailinds...)
+            return T.instance
+        end
         return reinterpret(T, a.parent[i1, tailinds...])
     else
         @boundscheck checkbounds(a, i1, tailinds...)
         ind_start, sidx = divrem((i1-1)*sizeof(T), sizeof(S))
-        t = Ref{T}()
-        s = Ref{S}()
-        GC.@preserve t s begin
-            tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
-            sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
-            # Optimizations that avoid branches
-            if sizeof(T) % sizeof(S) == 0
-                # T is bigger than S and contains an integer number of them
-                n = sizeof(T) ÷ sizeof(S)
+        # Optimizations that avoid branches
+        if sizeof(T) % sizeof(S) == 0
+            # T is bigger than S and contains an integer number of them
+            n = sizeof(T) ÷ sizeof(S)
+            t = Ref{T}()
+            GC.@preserve t begin
+                sptr = Ptr{S}(unsafe_convert(Ref{T}, t))
                 for i = 1:n
-                    s[] = a.parent[ind_start + i, tailinds...]
-                    _memcpy!(tptr + (i-1)*sizeof(S), sptr, sizeof(S))
+                     s = a.parent[ind_start + i, tailinds...]
+                     unsafe_store!(sptr, s, i)
                 end
-            elseif sizeof(S) % sizeof(T) == 0
-                # S is bigger than T and contains an integer number of them
-                s[] = a.parent[ind_start + 1, tailinds...]
-                _memcpy!(tptr, sptr + sidx, sizeof(T))
-            else
-                i = 1
-                nbytes_copied = 0
-                # This is a bit complicated to deal with partial elements
-                # at both the start and the end. LLVM will fold as appropriate,
-                # once it knows the data layout
+            end
+            return t[]
+        elseif sizeof(S) % sizeof(T) == 0
+            # S is bigger than T and contains an integer number of them
+            s = Ref{S}(a.parent[ind_start + 1, tailinds...])
+            GC.@preserve s begin
+                tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+                return unsafe_load(tptr + sidx)
+            end
+        else
+            i = 1
+            nbytes_copied = 0
+            # This is a bit complicated to deal with partial elements
+            # at both the start and the end. LLVM will fold as appropriate,
+            # once it knows the data layout
+            s = Ref{S}()
+            t = Ref{T}()
+            GC.@preserve s t begin
+                sptr = Ptr{S}(unsafe_convert(Ref{S}, s))
+                tptr = Ptr{T}(unsafe_convert(Ref{T}, t))
                 while nbytes_copied < sizeof(T)
                     s[] = a.parent[ind_start + i, tailinds...]
                     nb = min(sizeof(S) - sidx, sizeof(T)-nbytes_copied)
@@ -189,14 +424,67 @@ end
                     i += 1
                 end
             end
+            return t[]
+        end
+    end
+end
+
+@inline @propagate_inbounds function _getindex_ra(a::ReshapedReinterpretArray{T,N,S}, i1::Int, tailinds::TT) where {T,N,S,TT}
+    # Make sure to match the scalar reinterpret if that is applicable
+    if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
+        if issingletontype(T) # singleton types
+            @boundscheck checkbounds(a, i1, tailinds...)
+            return T.instance
+        end
+        return reinterpret(T, a.parent[i1, tailinds...])
+    end
+    @boundscheck checkbounds(a, i1, tailinds...)
+    if sizeof(T) >= sizeof(S)
+        t = Ref{T}()
+        GC.@preserve t begin
+            sptr = Ptr{S}(unsafe_convert(Ref{T}, t))
+            if sizeof(T) > sizeof(S)
+                # Extra dimension in the parent array
+                n = sizeof(T) ÷ sizeof(S)
+                if isempty(tailinds) && IndexStyle(a.parent) === IndexLinear()
+                    offset = n * (i1 - firstindex(a))
+                    for i = 1:n
+                        s = a.parent[i + offset]
+                        unsafe_store!(sptr, s, i)
+                    end
+                else
+                    for i = 1:n
+                        s = a.parent[i, i1, tailinds...]
+                        unsafe_store!(sptr, s, i)
+                    end
+                end
+            else
+                # No extra dimension
+                s = a.parent[i1, tailinds...]
+                unsafe_store!(sptr, s)
+            end
         end
         return t[]
     end
+    # S is bigger than T and contains an integer number of them
+    # n = sizeof(S) ÷ sizeof(T)
+    s = Ref{S}()
+    GC.@preserve s begin
+        tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+        s[] = a.parent[tailinds...]
+        return unsafe_load(tptr, i1)
+    end
 end
 
+@inline @propagate_inbounds function setindex!(a::NonReshapedReinterpretArray{T,0,S}, v) where {T,S}
+    if isprimitivetype(S) && isprimitivetype(T)
+        a.parent[] = reinterpret(S, v)
+        return a
+    end
+    setindex!(a, v, firstindex(a))
+end
 
-@inline @propagate_inbounds setindex!(a::ReinterpretArray{T,0,S} where T, v) where {S} = (a.parent[] = reinterpret(S, v))
-@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = (a[1] = v)
+@inline @propagate_inbounds setindex!(a::ReinterpretArray, v) = setindex!(a, v, firstindex(a))
 
 @inline @propagate_inbounds function setindex!(a::ReinterpretArray{T,N,S}, v, inds::Vararg{Int, N}) where {T,N,S}
     check_writable(a)
@@ -212,33 +500,57 @@ end
     _setindex_ra!(a, v, inds[1], tail(inds))
 end
 
-@inline @propagate_inbounds function _setindex_ra!(a::ReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+@inline @propagate_inbounds function setindex!(a::ReshapedReinterpretArray{T,N,S}, v, ind::SCartesianIndex2) where {T,N,S}
+    check_writable(a)
+    v = convert(T, v)::T
+    s = Ref{S}(a.parent[ind.j])
+    GC.@preserve s begin
+        tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+        unsafe_store!(tptr, v, ind.i)
+    end
+    a.parent[ind.j] = s[]
+    return a
+end
+
+@inline @propagate_inbounds function _setindex_ra!(a::NonReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
     v = convert(T, v)::T
     # Make sure to match the scalar reinterpret if that is applicable
     if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
-        return setindex!(a.parent, reinterpret(S, v), i1, tailinds...)
+        if issingletontype(T) # singleton types
+            @boundscheck checkbounds(a, i1, tailinds...)
+            # setindex! is a noop except for the index check
+        else
+            setindex!(a.parent, reinterpret(S, v), i1, tailinds...)
+        end
     else
         @boundscheck checkbounds(a, i1, tailinds...)
         ind_start, sidx = divrem((i1-1)*sizeof(T), sizeof(S))
-        t = Ref{T}(v)
-        s = Ref{S}()
-        GC.@preserve t s begin
-            tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
-            sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
-            # Optimizations that avoid branches
-            if sizeof(T) % sizeof(S) == 0
-                # T is bigger than S and contains an integer number of them
+        # Optimizations that avoid branches
+        if sizeof(T) % sizeof(S) == 0
+            # T is bigger than S and contains an integer number of them
+            t = Ref{T}(v)
+            GC.@preserve t begin
+                sptr = Ptr{S}(unsafe_convert(Ref{T}, t))
                 n = sizeof(T) ÷ sizeof(S)
-                for i = 0:n-1
-                    _memcpy!(sptr, tptr + i*sizeof(S), sizeof(S))
-                    a.parent[ind_start + i + 1, tailinds...] = s[]
+                for i = 1:n
+                    s = unsafe_load(sptr, i)
+                    a.parent[ind_start + i, tailinds...] = s
                 end
-            elseif sizeof(S) % sizeof(T) == 0
-                # S is bigger than T and contains an integer number of them
-                s[] = a.parent[ind_start + 1, tailinds...]
-                _memcpy!(sptr + sidx, tptr, sizeof(T))
+            end
+        elseif sizeof(S) % sizeof(T) == 0
+            # S is bigger than T and contains an integer number of them
+            s = Ref{S}(a.parent[ind_start + 1, tailinds...])
+            GC.@preserve s begin
+                tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+                unsafe_store!(tptr + sidx, v)
                 a.parent[ind_start + 1, tailinds...] = s[]
-            else
+            end
+        else
+            t = Ref{T}(v)
+            s = Ref{S}()
+            GC.@preserve t s begin
+                tptr = Ptr{UInt8}(unsafe_convert(Ref{T}, t))
+                sptr = Ptr{UInt8}(unsafe_convert(Ref{S}, s))
                 nbytes_copied = 0
                 i = 1
                 # Deal with any partial elements at the start. We'll have to copy in the
@@ -273,6 +585,56 @@ end
     return a
 end
 
+@inline @propagate_inbounds function _setindex_ra!(a::ReshapedReinterpretArray{T,N,S}, v, i1::Int, tailinds::TT) where {T,N,S,TT}
+    v = convert(T, v)::T
+    # Make sure to match the scalar reinterpret if that is applicable
+    if sizeof(T) == sizeof(S) && (fieldcount(T) + fieldcount(S)) == 0
+        if issingletontype(T) # singleton types
+            @boundscheck checkbounds(a, i1, tailinds...)
+            # setindex! is a noop except for the index check
+        else
+            setindex!(a.parent, reinterpret(S, v), i1, tailinds...)
+        end
+    end
+    @boundscheck checkbounds(a, i1, tailinds...)
+    if sizeof(T) >= sizeof(S)
+        t = Ref{T}(v)
+        GC.@preserve t begin
+            sptr = Ptr{S}(unsafe_convert(Ref{T}, t))
+            if sizeof(T) > sizeof(S)
+                # Extra dimension in the parent array
+                n = sizeof(T) ÷ sizeof(S)
+                if isempty(tailinds) && IndexStyle(a.parent) === IndexLinear()
+                    offset = n * (i1 - firstindex(a))
+                    for i = 1:n
+                        s = unsafe_load(sptr, i)
+                        a.parent[i + offset] = s
+                    end
+                else
+                    for i = 1:n
+                        s = unsafe_load(sptr, i)
+                        a.parent[i, i1, tailinds...] = s
+                    end
+                end
+            else # sizeof(T) == sizeof(S)
+                # No extra dimension
+                s = unsafe_load(sptr)
+                a.parent[i1, tailinds...] = s
+            end
+        end
+    else
+        # S is bigger than T and contains an integer number of them
+        s = Ref{S}()
+        GC.@preserve s begin
+            tptr = Ptr{T}(unsafe_convert(Ref{S}, s))
+            s[] = a.parent[tailinds...]
+            unsafe_store!(tptr, v, i1)
+            a.parent[tailinds...] = s[]
+        end
+    end
+    return a
+end
+
 # Padding
 struct Padding
     offset::Int
@@ -345,7 +707,7 @@ function CyclePadding(T::DataType)
 end
 
 using .Iterators: Stateful
-@pure function array_subpadding(S, T)
+@assume_effects :total function array_subpadding(S, T)
     checked_size = 0
     lcm_size = lcm(sizeof(S), sizeof(T))
     s, t = Stateful{<:Any, Any}(CyclePadding(S)),
@@ -366,3 +728,46 @@ using .Iterators: Stateful
     end
     return true
 end
+
+# Reductions with IndexSCartesian2
+
+function _mapreduce(f::F, op::OP, style::IndexSCartesian2{K}, A::AbstractArrayOrBroadcasted) where {F,OP,K}
+    inds = eachindex(style, A)
+    n = size(inds)[2]
+    if n == 0
+        return mapreduce_empty_iter(f, op, A, IteratorEltype(A))
+    else
+        return mapreduce_impl(f, op, A, first(inds), last(inds))
+    end
+end
+
+@noinline function mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted,
+                                  ifirst::SCI, ilast::SCI, blksize::Int) where {F,OP,SCI<:SCartesianIndex2{K}} where K
+    if ilast.j - ifirst.j < blksize
+        # sequential portion
+        @inbounds a1 = A[ifirst]
+        @inbounds a2 = A[SCI(2,ifirst.j)]
+        v = op(f(a1), f(a2))
+        @simd for i = ifirst.i + 2 : K
+            @inbounds ai = A[SCI(i,ifirst.j)]
+            v = op(v, f(ai))
+        end
+        # Remaining columns
+        for j = ifirst.j+1 : ilast.j
+            @simd for i = 1:K
+                @inbounds ai = A[SCI(i,j)]
+                v = op(v, f(ai))
+            end
+        end
+        return v
+    else
+        # pairwise portion
+        jmid = ifirst.j + (ilast.j - ifirst.j) >> 1
+        v1 = mapreduce_impl(f, op, A, ifirst, SCI(K,jmid), blksize)
+        v2 = mapreduce_impl(f, op, A, SCI(1,jmid+1), ilast, blksize)
+        return op(v1, v2)
+    end
+end
+
+mapreduce_impl(f::F, op::OP, A::AbstractArrayOrBroadcasted, ifirst::SCartesianIndex2, ilast::SCartesianIndex2) where {F,OP} =
+    mapreduce_impl(f, op, A, ifirst, ilast, pairwise_blocksize(f, op))
diff --git a/base/reshapedarray.jl b/base/reshapedarray.jl
index c137afc06e5e42..82d293249afc6e 100644
--- a/base/reshapedarray.jl
+++ b/base/reshapedarray.jl
@@ -113,9 +113,10 @@ reshape(parent::AbstractArray, dims::Dims)        = _reshape(parent, dims)
 
 # Allow missing dimensions with Colon():
 reshape(parent::AbstractVector, ::Colon) = parent
+reshape(parent::AbstractVector, ::Tuple{Colon}) = parent
 reshape(parent::AbstractArray, dims::Int...) = reshape(parent, dims)
 reshape(parent::AbstractArray, dims::Union{Int,Colon}...) = reshape(parent, dims)
-reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = _reshape(parent, _reshape_uncolon(parent, dims))
+reshape(parent::AbstractArray, dims::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent, _reshape_uncolon(parent, dims))
 @inline function _reshape_uncolon(A, dims)
     @noinline throw1(dims) = throw(DimensionMismatch(string("new dimensions $(dims) ",
         "may have at most one omitted dimension specified by `Colon()`")))
@@ -146,14 +147,14 @@ end
 # product of trailing dims into the last element
 rdims_trailing(l, inds...) = length(l) * rdims_trailing(inds...)
 rdims_trailing(l) = length(l)
-rdims(out::Val{N}, inds::Tuple) where {N} = rdims(ntuple(i -> OneTo(1), Val(N)), inds)
+rdims(out::Val{N}, inds::Tuple) where {N} = rdims(ntuple(Returns(OneTo(1)), Val(N)), inds)
 rdims(out::Tuple{}, inds::Tuple{}) = () # N == 0, M == 0
 rdims(out::Tuple{}, inds::Tuple{Any}) = ()
 rdims(out::Tuple{}, inds::NTuple{M,Any}) where {M} = ()
 rdims(out::Tuple{Any}, inds::Tuple{}) = out # N == 1, M == 0
 rdims(out::NTuple{N,Any}, inds::Tuple{}) where {N} = out # N > 1, M == 0
 rdims(out::Tuple{Any}, inds::Tuple{Any}) = inds # N == 1, M == 1
-rdims(out::Tuple{Any}, inds::NTuple{M,Any}) where {M} = (OneTo(rdims_trailing(inds...)),) # N == 1, M > 1
+rdims(out::Tuple{Any}, inds::NTuple{M,Any}) where {M} = (oneto(rdims_trailing(inds...)),) # N == 1, M > 1
 rdims(out::NTuple{N,Any}, inds::NTuple{N,Any}) where {N} = inds # N > 1, M == N
 rdims(out::NTuple{N,Any}, inds::NTuple{M,Any}) where {N,M} = (first(inds), rdims(tail(out), tail(inds))...) # N > 1, M > 1, M != N
 
@@ -185,7 +186,7 @@ end
 _reshape(v::ReshapedArray{<:Any,1}, dims::Dims{1}) = _reshape(v.parent, dims)
 _reshape(R::ReshapedArray, dims::Dims) = _reshape(R.parent, dims)
 
-function __reshape(p::Tuple{AbstractArray,IndexCartesian}, dims::Dims)
+function __reshape(p::Tuple{AbstractArray,IndexStyle}, dims::Dims)
     parent = p[1]
     strds = front(size_to_strides(map(length, axes(parent))..., 1))
     strds1 = map(s->max(1,Int(s)), strds)  # for resizing empty arrays
@@ -207,7 +208,7 @@ size(A::ReshapedArray) = A.dims
 similar(A::ReshapedArray, eltype::Type, dims::Dims) = similar(parent(A), eltype, dims)
 IndexStyle(::Type{<:ReshapedArrayLF}) = IndexLinear()
 parent(A::ReshapedArray) = A.parent
-parentindices(A::ReshapedArray) = map(OneTo, size(parent(A)))
+parentindices(A::ReshapedArray) = map(oneto, size(parent(A)))
 reinterpret(::Type{T}, A::ReshapedArray, dims::Dims) where {T} = reinterpret(T, parent(A), dims)
 elsize(::Type{<:ReshapedArray{<:Any,<:Any,P}}) where {P} = elsize(P)
 
@@ -241,7 +242,7 @@ end
 
 @inline function _unsafe_getindex(A::ReshapedArray{T,N}, indices::Vararg{Int,N}) where {T,N}
     axp = axes(A.parent)
-    i = offset_if_vec(Base._sub2ind(size(A), indices...), axp)
+    i = offset_if_vec(_sub2ind(size(A), indices...), axp)
     I = ind2sub_rs(axp, A.mi, i)
     _unsafe_getindex_rs(parent(A), I)
 end
@@ -265,7 +266,7 @@ end
 
 @inline function _unsafe_setindex!(A::ReshapedArray{T,N}, val, indices::Vararg{Int,N}) where {T,N}
     axp = axes(A.parent)
-    i = offset_if_vec(Base._sub2ind(size(A), indices...), axp)
+    i = offset_if_vec(_sub2ind(size(A), indices...), axp)
     @inbounds parent(A)[ind2sub_rs(axes(A.parent), A.mi, i)...] = val
     val
 end
@@ -286,8 +287,21 @@ viewindexing(I::Tuple{Slice, ReshapedUnitRange, Vararg{ScalarIndex}}) = IndexLin
 viewindexing(I::Tuple{ReshapedRange, Vararg{ScalarIndex}}) = IndexLinear()
 compute_stride1(s, inds, I::Tuple{ReshapedRange, Vararg{Any}}) = s*step(I[1].parent)
 compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{ReshapedRange}) =
-    (@_inline_meta; first(I[1]) - first(axes1(I[1]))*stride1)
+    (@inline; first(I[1]) - first(axes1(I[1]))*stride1)
 substrides(strds::NTuple{N,Int}, I::Tuple{ReshapedUnitRange, Vararg{Any}}) where N =
     (size_to_strides(strds[1], size(I[1])...)..., substrides(tail(strds), tail(I))...)
 unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{Union{RangeIndex,ReshapedUnitRange}}}}) where {T,N,P} =
     unsafe_convert(Ptr{T}, V.parent) + (first_index(V)-1)*sizeof(T)
+
+
+_checkcontiguous(::Type{Bool}, A::AbstractArray) = size_to_strides(1, size(A)...) == strides(A)
+_checkcontiguous(::Type{Bool}, A::Array) = true
+_checkcontiguous(::Type{Bool}, A::ReshapedArray) = _checkcontiguous(Bool, parent(A))
+_checkcontiguous(::Type{Bool}, A::FastContiguousSubArray) = _checkcontiguous(Bool, parent(A))
+
+function strides(a::ReshapedArray)
+    # We can handle non-contiguous parent if it's a StridedVector
+    ndims(parent(a)) == 1 && return size_to_strides(only(strides(parent(a))), size(a)...)
+    _checkcontiguous(Bool, a) || throw(ArgumentError("Parent must be contiguous."))
+    size_to_strides(1, size(a)...)
+end
diff --git a/base/rounding.jl b/base/rounding.jl
index 1628d7a01ec1d4..25cfe2dc09829f 100644
--- a/base/rounding.jl
+++ b/base/rounding.jl
@@ -37,9 +37,13 @@ Currently supported rounding modes are:
 - [`RoundNearestTiesAway`](@ref)
 - [`RoundNearestTiesUp`](@ref)
 - [`RoundToZero`](@ref)
-- [`RoundFromZero`](@ref) ([`BigFloat`](@ref) only)
+- [`RoundFromZero`](@ref)
 - [`RoundUp`](@ref)
 - [`RoundDown`](@ref)
+
+!!! compat "Julia 1.9"
+    `RoundFromZero` requires at least Julia 1.9. Prior versions support
+    `RoundFromZero` for `BigFloat`s only.
 """
 struct RoundingMode{T} end
 
@@ -76,7 +80,10 @@ const RoundDown = RoundingMode{:Down}()
     RoundFromZero
 
 Rounds away from zero.
-This rounding mode may only be used with `T == BigFloat` inputs to [`round`](@ref).
+
+!!! compat "Julia 1.9"
+    `RoundFromZero` requires at least Julia 1.9. Prior versions support
+    `RoundFromZero` for `BigFloat`s only.
 
 # Examples
 ```jldoctest
@@ -84,7 +91,7 @@ julia> BigFloat("1.0000000000000001", 5, RoundFromZero)
 1.06
 ```
 """
-const RoundFromZero = RoundingMode{:FromZero}() # mpfr only
+const RoundFromZero = RoundingMode{:FromZero}()
 
 """
     RoundNearestTiesAway
@@ -151,8 +158,8 @@ See [`RoundingMode`](@ref) for available modes.
 """
 :rounding
 
-setrounding_raw(::Type{<:Union{Float32,Float64}}, i::Integer) = ccall(:fesetround, Int32, (Int32,), i)
-rounding_raw(::Type{<:Union{Float32,Float64}}) = ccall(:fegetround, Int32, ())
+setrounding_raw(::Type{<:Union{Float32,Float64}}, i::Integer) = ccall(:jl_set_fenv_rounding, Int32, (Int32,), i)
+rounding_raw(::Type{<:Union{Float32,Float64}}) = ccall(:jl_get_fenv_rounding, Int32, ())
 
 rounding(::Type{T}) where {T<:Union{Float32,Float64}} = from_fenv(rounding_raw(T))
 
diff --git a/base/ryu/LICENSE.md b/base/ryu/LICENSE.md
index 74c718646a08d8..cab89eec22785d 100644
--- a/base/ryu/LICENSE.md
+++ b/base/ryu/LICENSE.md
@@ -22,4 +22,4 @@ FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+DEALINGS IN THE SOFTWARE.
diff --git a/base/ryu/Ryu.jl b/base/ryu/Ryu.jl
index 1d260fe9b3696e..81d1c41f4c19f0 100644
--- a/base/ryu/Ryu.jl
+++ b/base/ryu/Ryu.jl
@@ -64,7 +64,7 @@ Various options for the output format include:
   * `hash`: whether the decimal point should be written, even if no additional digits are needed for precision
   * `precision`: minimum number of significant digits to be included in the decimal string; extra `'0'` characters will be added for padding if necessary
   * `decchar`: decimal point character to be used
-  * `trimtrailingzeros`: whether trailing zeros should be removed
+  * `trimtrailingzeros`: whether trailing zeros of fractional part should be removed
 """
 function writefixed(x::T,
     precision::Integer,
diff --git a/base/ryu/exp.jl b/base/ryu/exp.jl
index cf1fe23105b8c9..30291212d014d3 100644
--- a/base/ryu/exp.jl
+++ b/base/ryu/exp.jl
@@ -1,25 +1,16 @@
-@inline function writeexp(buf, pos, v::T,
+function writeexp(buf, pos, v::T,
     precision=-1, plus=false, space=false, hash=false,
     expchar=UInt8('e'), decchar=UInt8('.'), trimtrailingzeros=false) where {T <: Base.IEEEFloat}
     @assert 0 < pos <= length(buf)
     startpos = pos
     x = Float64(v)
-    neg = signbit(x)
+    pos = append_sign(x, plus, space, buf, pos)
+
     # special cases
     if x == 0
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('0')
         pos += 1
-        if precision > 0
+        if precision > 0 && !trimtrailingzeros
             buf[pos] = decchar
             pos += 1
             for _ = 1:precision
@@ -41,16 +32,6 @@
         buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('I')
         buf[pos + 1] = UInt8('n')
         buf[pos + 2] = UInt8('f')
@@ -70,16 +51,6 @@
     end
     nonzero = false
     precision += 1
-    if neg
-        buf[pos] = UInt8('-')
-        pos += 1
-    elseif plus
-        buf[pos] = UInt8('+')
-        pos += 1
-    elseif space
-        buf[pos] = UInt8(' ')
-        pos += 1
-    end
     digits = 0
     printedDigits = 0
     availableDigits = 0
@@ -213,7 +184,7 @@
         roundPos = pos
         while true
             roundPos -= 1
-            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-')
+            if roundPos == (startpos - 1) || buf[roundPos] == UInt8('-') || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
                 buf[roundPos + 1] = UInt8('1')
                 e += 1
                 break
diff --git a/base/ryu/fixed.jl b/base/ryu/fixed.jl
index 4be1b3741832eb..e0085f5c66dab1 100644
--- a/base/ryu/fixed.jl
+++ b/base/ryu/fixed.jl
@@ -1,30 +1,18 @@
-@inline function writefixed(buf, pos, v::T,
+function writefixed(buf, pos, v::T,
     precision=-1, plus=false, space=false, hash=false,
     decchar=UInt8('.'), trimtrailingzeros=false) where {T <: Base.IEEEFloat}
     @assert 0 < pos <= length(buf)
     startpos = pos
     x = Float64(v)
-    neg = signbit(x)
+    pos = append_sign(x, plus, space, buf, pos)
+
     # special cases
     if x == 0
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('0')
         pos += 1
-        if precision > 0
+        if precision > 0 && !trimtrailingzeros
             buf[pos] = decchar
             pos += 1
-            if trimtrailingzeros
-                precision = 1
-            end
             for _ = 1:precision
                 buf[pos] = UInt8('0')
                 pos += 1
@@ -40,16 +28,6 @@
         buf[pos + 2] = UInt8('N')
         return pos + 3
     elseif !isfinite(x)
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
-        end
         buf[pos] = UInt8('I')
         buf[pos + 1] = UInt8('n')
         buf[pos + 2] = UInt8('f')
@@ -68,16 +46,6 @@
         m2 = (Int64(1) << 52) | mant
     end
     nonzero = false
-    if neg
-        buf[pos] = UInt8('-')
-        pos += 1
-    elseif plus
-        buf[pos] = UInt8('+')
-        pos += 1
-    elseif space
-        buf[pos] = UInt8(' ')
-        pos += 1
-    end
     if e2 >= -52
         idx = e2 < 0 ? 0 : indexforexp(e2)
         p10bits = pow10bitsforindex(idx)
@@ -101,9 +69,11 @@
         buf[pos] = UInt8('0')
         pos += 1
     end
+    hasfractional = false
     if precision > 0 || hash
         buf[pos] = decchar
         pos += 1
+        hasfractional = true
     end
     if e2 < 0
         idx = div(-e2, 16)
@@ -166,11 +136,12 @@
             dotPos = 1
             while true
                 roundPos -= 1
-                if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-'))
+                if roundPos == (startpos - 1) || (buf[roundPos] == UInt8('-')) || (plus && buf[roundPos] == UInt8('+')) || (space && buf[roundPos] == UInt8(' '))
                     buf[roundPos + 1] = UInt8('1')
                     if dotPos > 1
                         buf[dotPos] = UInt8('0')
                         buf[dotPos + 1] = decchar
+                        hasfractional = true
                     end
                     buf[pos] = UInt8('0')
                     pos += 1
@@ -199,7 +170,7 @@
             pos += 1
         end
     end
-    if trimtrailingzeros
+    if trimtrailingzeros && hasfractional
         while buf[pos - 1] == UInt8('0')
             pos -= 1
         end
diff --git a/base/ryu/shortest.jl b/base/ryu/shortest.jl
index 21ef2e8c02e85a..f95c09d235e6df 100644
--- a/base/ryu/shortest.jl
+++ b/base/ryu/shortest.jl
@@ -224,25 +224,25 @@ integer. If a `maxsignif` argument is provided, then `b < maxsignif`.
     return b, e10
 end
 
-
-@inline function writeshortest(buf::Vector{UInt8}, pos, x::T,
-                               plus=false, space=false, hash=true,
-                               precision=-1, expchar=UInt8('e'), padexp=false, decchar=UInt8('.'),
-                               typed=false, compact=false) where {T}
+function writeshortest(buf::Vector{UInt8}, pos, x::T,
+                       plus=false, space=false, hash=true,
+                       precision=-1, expchar=UInt8('e'), padexp=false, decchar=UInt8('.'),
+                       typed=false, compact=false) where {T}
     @assert 0 < pos <= length(buf)
-    neg = signbit(x)
     # special cases
     if x == 0
-        if neg
-            buf[pos] = UInt8('-')
-            pos += 1
-        elseif plus
-            buf[pos] = UInt8('+')
-            pos += 1
-        elseif space
-            buf[pos] = UInt8(' ')
-            pos += 1
+        if typed && x isa Float16
+            buf[pos] = UInt8('F')
+            buf[pos + 1] = UInt8('l')
+            buf[pos + 2] = UInt8('o')
+            buf[pos + 3] = UInt8('a')
+            buf[pos + 4] = UInt8('t')
+            buf[pos + 5] = UInt8('1')
+            buf[pos + 6] = UInt8('6')
+            buf[pos + 7] = UInt8('(')
+            pos += 8
         end
+        pos = append_sign(x, plus, space, buf, pos)
         buf[pos] = UInt8('0')
         pos += 1
         if hash
@@ -257,6 +257,10 @@ end
                 buf[pos + 1] = UInt8('0')
                 pos += 2
             end
+            if typed && x isa Float16
+                buf[pos] = UInt8(')')
+                pos += 1
+            end
             return pos
         end
         while hash && precision > 1
@@ -269,8 +273,13 @@ end
             buf[pos + 1] = UInt8('0')
             pos += 2
         end
+        if typed && x isa Float16
+            buf[pos] = UInt8(')')
+            pos += 1
+        end
         return pos
     elseif isnan(x)
+        pos = append_sign(x, plus, space, buf, pos)
         buf[pos] = UInt8('N')
         buf[pos + 1] = UInt8('a')
         buf[pos + 2] = UInt8('N')
@@ -285,22 +294,20 @@ end
         end
         return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     elseif !isfinite(x)
-        if neg
-            buf[pos] = UInt8('-')
-        end
-        buf[pos + neg] = UInt8('I')
-        buf[pos + neg + 1] = UInt8('n')
-        buf[pos + neg + 2] = UInt8('f')
+        pos = append_sign(x, plus, space, buf, pos)
+        buf[pos] = UInt8('I')
+        buf[pos + 1] = UInt8('n')
+        buf[pos + 2] = UInt8('f')
         if typed
             if x isa Float32
-                buf[pos + neg + 3] = UInt8('3')
-                buf[pos + neg + 4] = UInt8('2')
+                buf[pos + 3] = UInt8('3')
+                buf[pos + 4] = UInt8('2')
             elseif x isa Float16
-                buf[pos + neg + 3] = UInt8('1')
-                buf[pos + neg + 4] = UInt8('6')
+                buf[pos + 3] = UInt8('1')
+                buf[pos + 4] = UInt8('6')
             end
         end
-        return pos + neg + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
+        return pos + 3 + (typed && x isa Union{Float32, Float16} ? 2 : 0)
     end
 
     output, nexp = reduce_shortest(x, compact ? 999_999 : nothing)
@@ -316,16 +323,7 @@ end
         buf[pos + 7] = UInt8('(')
         pos += 8
     end
-    if neg
-        buf[pos] = UInt8('-')
-        pos += 1
-    elseif plus
-        buf[pos] = UInt8('+')
-        pos += 1
-    elseif space
-        buf[pos] = UInt8(' ')
-        pos += 1
-    end
+    pos = append_sign(x, plus, space, buf, pos)
 
     olength = decimallength(output)
     exp_form = true
diff --git a/base/ryu/utils.jl b/base/ryu/utils.jl
index 3980130305837c..352f8f19cb9bed 100644
--- a/base/ryu/utils.jl
+++ b/base/ryu/utils.jl
@@ -195,6 +195,20 @@ Compute `(m * mul) >> j % 10^9` where `mul = mula + mulb<<64 + mulc<<128`, and `
     return (v % UInt32) - UInt32(1000000000) * shifted
 end
 
+@inline function append_sign(x, plus, space, buf, pos)
+    if signbit(x) && !isnan(x)  # suppress minus sign for signaling NaNs
+        buf[pos] = UInt8('-')
+        pos += 1
+    elseif plus
+        buf[pos] = UInt8('+')
+        pos += 1
+    elseif space
+        buf[pos] = UInt8(' ')
+        pos += 1
+    end
+    return pos
+end
+
 @inline function append_n_digits(olength, digits, buf, pos)
     i = 0
     while digits >= 10000
@@ -353,10 +367,11 @@ end
 """
 function pow5invsplit_lookup end
 for T in (Float64, Float32, Float16)
-    e2_max = exponent_max(T) - precision(T) - 2
+    e2_max = exponent_max(T) - precision(T) - 1
     i_max = log10pow2(e2_max)
-    table = Any[pow5invsplit(T, i) for i = 0:i_max]
-    @eval pow5invsplit_lookup(::Type{$T}, i) = @inbounds($table[i+1])
+    table_sym = Symbol("pow5invsplit_table_", string(T))
+    @eval const $table_sym = Tuple(Any[pow5invsplit($T, i) for i = 0:$i_max])
+    @eval pow5invsplit_lookup(::Type{$T}, i) = @inbounds($table_sym[i+1])
 end
 
 
@@ -382,8 +397,9 @@ function pow5split_lookup end
 for T in (Float64, Float32, Float16)
     e2_min = 1 - exponent_bias(T) - significand_bits(T) - 2
     i_max = 1 - e2_min - log10pow5(-e2_min)
-    table = Any[pow5split(T, i) for i = 0:i_max]
-    @eval pow5split_lookup(::Type{$T}, i) = @inbounds($table[i+1])
+    table_sym = Symbol("pow5split_table_", string(T))
+    @eval const $table_sym = Tuple(Any[pow5split($T, i) for i = 0:$i_max])
+    @eval pow5split_lookup(::Type{$T}, i) = @inbounds($table_sym[i+1])
 end
 
 const DIGIT_TABLE = UInt8[
diff --git a/base/set.jl b/base/set.jl
index 75e2d5a744bb3a..66b5ef33fb4f31 100644
--- a/base/set.jl
+++ b/base/set.jl
@@ -3,13 +3,21 @@
 struct Set{T} <: AbstractSet{T}
     dict::Dict{T,Nothing}
 
-    Set{T}() where {T} = new(Dict{T,Nothing}())
-    Set{T}(s::Set{T}) where {T} = new(Dict{T,Nothing}(s.dict))
+    global _Set(dict::Dict{T,Nothing}) where {T} = new{T}(dict)
 end
 
+Set{T}() where {T} = _Set(Dict{T,Nothing}())
+Set{T}(s::Set{T}) where {T} = _Set(Dict{T,Nothing}(s.dict))
 Set{T}(itr) where {T} = union!(Set{T}(), itr)
 Set() = Set{Any}()
 
+function Set{T}(s::KeySet{T, <:Dict{T}}) where {T}
+    d = s.dict
+    slots = copy(d.slots)
+    keys = copy(d.keys)
+    vals = similar(d.vals, Nothing)
+    _Set(Dict{T,Nothing}(slots, keys, vals, d.ndel, d.count, d.age, d.idxfloor, d.maxprobe))
+end
 
 """
     Set([itr])
@@ -17,6 +25,8 @@ Set() = Set{Any}()
 Construct a [`Set`](@ref) of the values generated by the given iterable object, or an
 empty set. Should be used instead of [`BitSet`](@ref) for sparse integer sets, or
 for sets of arbitrary objects.
+
+See also: [`push!`](@ref), [`empty!`](@ref), [`union!`](@ref), [`in`](@ref).
 """
 Set(itr) = _Set(itr, IteratorEltype(itr))
 
@@ -34,7 +44,7 @@ empty(s::AbstractSet{T}, ::Type{U}=T) where {T,U} = Set{U}()
 # by default, a Set is returned
 emptymutable(s::AbstractSet{T}, ::Type{U}=T) where {T,U} = Set{U}()
 
-_similar_for(c::AbstractSet, ::Type{T}, itr, isz) where {T} = empty(c, T)
+_similar_for(c::AbstractSet, ::Type{T}, itr, isz, len) where {T} = empty(c, T)
 
 function show(io::IO, s::Set)
     if isempty(s)
@@ -54,6 +64,16 @@ end
 isempty(s::Set) = isempty(s.dict)
 length(s::Set)  = length(s.dict)
 in(x, s::Set) = haskey(s.dict, x)
+
+# This avoids hashing and probing twice and it works the same as
+# in!(x, s::Set) = in(x, s) ? true : (push!(s, x); false)
+function in!(x, s::Set)
+    idx, sh = ht_keyindex2_shorthash!(s.dict, x)
+    idx > 0 && return true
+    _setindex!(s.dict, nothing, x, -idx, sh)
+    return false
+end
+
 push!(s::Set, x) = (s.dict[x] = nothing; s)
 pop!(s::Set, x) = (pop!(s.dict, x); x)
 pop!(s::Set, x, default) = (x in s ? pop!(s, x) : default)
@@ -105,6 +125,8 @@ as determined by [`isequal`](@ref), in the order that the first of each
 set of equivalent elements originally appears. The element type of the
 input is preserved.
 
+See also: [`unique!`](@ref), [`allunique`](@ref), [`allequal`](@ref).
+
 # Examples
 ```jldoctest
 julia> unique([1, 2, 6, 2])
@@ -125,10 +147,7 @@ function unique(itr)
         out = Vector{T}()
         seen = Set{T}()
         for x in itr
-            if !in(x, seen)
-                push!(seen, x)
-                push!(out, x)
-            end
+            !in!(x, seen) && push!(out, x)
         end
         return out
     end
@@ -152,16 +171,10 @@ _unique_from(itr, out, seen, i) = unique_from(itr, out, seen, i)
             R = promote_typejoin(S, T)
             seenR = convert(Set{R}, seen)
             outR = convert(Vector{R}, out)
-            if !in(x, seenR)
-                push!(seenR, x)
-                push!(outR, x)
-            end
+            !in!(x, seenR) && push!(outR, x)
             return _unique_from(itr, outR, seenR, i)
         end
-        if !in(x, seen)
-            push!(seen, x)
-            push!(out, x)
-        end
+        !in!(x, seen) && push!(out, x)
     end
     return out
 end
@@ -187,11 +200,7 @@ function unique(f, C; seen::Union{Nothing,Set}=nothing)
     out = Vector{eltype(C)}()
     if seen !== nothing
         for x in C
-            y = f(x)
-            if y ∉ seen
-                push!(out, x)
-                push!(seen, y)
-            end
+            !in!(f(x), seen) && push!(out, x)
         end
         return out
     end
@@ -308,10 +317,12 @@ function _groupedunique!(A::AbstractVector)
     idxs = eachindex(A)
     y = first(A)
     # We always keep the first element
-    it = iterate(idxs, iterate(idxs)[2])
+    T = NTuple{2,Any} # just to eliminate `iterate(idxs)::Nothing` candidate
+    it = iterate(idxs, (iterate(idxs)::T)[2])
     count = 1
     for x in Iterators.drop(A, 1)
         if !isequal(x, y)
+            it = it::T
             y = A[it[1]] = x
             count += 1
             it = iterate(idxs, it[2])
@@ -369,6 +380,8 @@ end
 
 Return `true` if all values from `itr` are distinct when compared with [`isequal`](@ref).
 
+See also: [`unique`](@ref), [`issorted`](@ref), [`allequal`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = [1; 2; 3]
@@ -377,28 +390,27 @@ julia> a = [1; 2; 3]
  2
  3
 
+julia> allunique(a)
+true
+
 julia> allunique([a, a])
 false
 ```
 """
 function allunique(C)
-    seen = Dict{eltype(C), Nothing}()
+    seen = Set{eltype(C)}()
     x = iterate(C)
     if haslength(C) && length(C) > 1000
         for i in OneTo(1000)
             v, s = x
-            idx = ht_keyindex2!(seen, v)
-            idx > 0 && return false
-            _setindex!(seen, nothing, v, -idx)
+            in!(v, seen) && return false
             x = iterate(C, s)
         end
         sizehint!(seen, length(C))
     end
     while x !== nothing
         v, s = x
-        idx = ht_keyindex2!(seen, v)
-        idx > 0 && return false
-        _setindex!(seen, nothing, v, -idx)
+        in!(v, seen) && return false
         x = iterate(C, s)
     end
     return true
@@ -408,6 +420,40 @@ allunique(::Union{AbstractSet,AbstractDict}) = true
 
 allunique(r::AbstractRange) = !iszero(step(r)) || length(r) <= 1
 
+"""
+    allequal(itr) -> Bool
+
+Return `true` if all values from `itr` are equal when compared with [`isequal`](@ref).
+
+See also: [`unique`](@ref), [`allunique`](@ref).
+
+!!! compat "Julia 1.8"
+    The `allequal` function requires at least Julia 1.8.
+
+# Examples
+```jldoctest
+julia> allequal([])
+true
+
+julia> allequal([1])
+true
+
+julia> allequal([1, 1])
+true
+
+julia> allequal([1, 2])
+false
+
+julia> allequal(Dict(:a => 1, :b => 1))
+false
+```
+"""
+allequal(itr) = isempty(itr) ? true : all(isequal(first(itr)), itr)
+
+allequal(c::Union{AbstractSet,AbstractDict}) = length(c) <= 1
+
+allequal(r::AbstractRange) = iszero(step(r)) || length(r) <= 1
+
 filter!(f, s::Set) = unsafe_filter!(f, s)
 
 const hashs_seed = UInt === UInt64 ? 0x852ada37cfe8e0ce : 0xcfe8e0ce
@@ -433,7 +479,7 @@ end
 # TODO: use copy!, which is currently unavailable from here since it is defined in Future
 _copy_oftype(x, ::Type{T}) where {T} = copyto!(similar(x, T), x)
 # TODO: use similar() once deprecation is removed and it preserves keys
-_copy_oftype(x::AbstractDict, ::Type{T}) where {T} = merge!(empty(x, T), x)
+_copy_oftype(x::AbstractDict, ::Type{Pair{K,V}}) where {K,V} = merge!(empty(x, K, V), x)
 _copy_oftype(x::AbstractSet, ::Type{T}) where {T} = union!(empty(x, T), x)
 
 _copy_oftype(x::AbstractArray{T}, ::Type{T}) where {T} = copy(x)
@@ -535,7 +581,10 @@ of the result will not include singleton types which are replaced with values of
 a different type: for example, `Union{T,Missing}` will become `T` if `missing` is
 replaced.
 
-See also [`replace!`](@ref).
+See also [`replace!`](@ref), [`splice!`](@ref), [`delete!`](@ref), [`insert!`](@ref).
+
+!!! compat "Julia 1.7"
+    Version 1.7 is required to replace elements of a `Tuple`.
 
 # Examples
 ```jldoctest
@@ -570,7 +619,7 @@ promote_valuetype(x::Pair{K, V}, y::Pair...) where {K, V} =
 # Subtract singleton types which are going to be replaced
 function subtract_singletontype(::Type{T}, x::Pair{K}) where {T, K}
     if issingletontype(K)
-        Core.Compiler.typesubtract(T, K)
+        typesplit(T, K)
     else
         T
     end
@@ -585,6 +634,9 @@ Return a copy of `A` where each value `x` in `A` is replaced by `new(x)`.
 If `count` is specified, then replace at most `count` values in total
 (replacements being defined as `new(x) !== x`).
 
+!!! compat "Julia 1.7"
+    Version 1.7 is required to replace elements of a `Tuple`.
+
 # Examples
 ```jldoctest
 julia> replace(x -> isodd(x) ? 2x : x, [1, 2, 3, 4])
@@ -610,15 +662,16 @@ replace!(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace!,
 replace!(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace!, (a, b, c)))
 replace(a::Callable, b::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b)))
 replace(a::Callable, b::Pair, c::Pair; count::Integer=-1) = throw(MethodError(replace, (a, b, c)))
-replace(a::AbstractString, b::Pair, c::Pair) = throw(MethodError(replace, (a, b, c)))
 
 ### replace! for AbstractDict/AbstractSet
 
 askey(k, ::AbstractDict) = k.first
 askey(k, ::AbstractSet) = k
 
-function _replace!(new::Callable, res::T, A::T,
-                   count::Int) where T<:Union{AbstractDict,AbstractSet}
+function _replace!(new::Callable, res::Union{AbstractDict,AbstractSet},
+                   A::Union{AbstractDict,AbstractSet}, count::Int)
+    @assert res isa AbstractDict && A isa AbstractDict ||
+        res isa AbstractSet && A isa AbstractSet
     count == 0 && return res
     c = 0
     if res === A # cannot replace elements while iterating over A
@@ -683,3 +736,73 @@ function _replace!(new::Callable, res::AbstractArray, A::AbstractArray, count::I
     end
     res
 end
+
+### specialization for Dict / Set
+
+function _replace!(new::Callable, t::Dict{K,V}, A::AbstractDict, count::Int) where {K,V}
+    # we ignore A, which is supposed to be equal to the destination t,
+    # as it can generally be faster to just replace inline
+    count == 0 && return t
+    c = 0
+    news = Pair{K,V}[]
+    i = skip_deleted_floor!(t)
+    @inbounds while i != 0
+        k1, v1 = t.keys[i], t.vals[i]
+        x1 = Pair{K,V}(k1, v1)
+        x2 = new(x1)
+        if x1 !== x2
+            k2, v2 = first(x2), last(x2)
+            if isequal(k1, k2)
+                t.keys[i] = k2
+                t.vals[i] = v2
+                t.age += 1
+            else
+                _delete!(t, i)
+                push!(news, x2)
+            end
+            c += 1
+            c == count && break
+        end
+        i = i == typemax(Int) ? 0 : skip_deleted(t, i+1)
+    end
+    for n in news
+        push!(t, n)
+    end
+    t
+end
+
+function _replace!(new::Callable, t::Set{T}, ::AbstractSet, count::Int) where {T}
+    _replace!(t.dict, t.dict, count) do kv
+        k = first(kv)
+        k2 = new(k)
+        k2 === k ? kv : k2 => nothing
+    end
+    t
+end
+
+### replace for tuples
+
+function _replace(f::Callable, t::Tuple, count::Int)
+    if count == 0 || isempty(t)
+        t
+    else
+        x = f(t[1])
+        (x, _replace(f, tail(t), count - !==(x, t[1]))...)
+    end
+end
+
+replace(f::Callable, t::Tuple; count::Integer=typemax(Int)) =
+    _replace(f, t, check_count(count))
+
+function _replace(t::Tuple, count::Int, old_new::Tuple{Vararg{Pair}})
+    _replace(t, count) do x
+        @inline
+        for o_n in old_new
+            isequal(first(o_n), x) && return last(o_n)
+        end
+        return x
+    end
+end
+
+replace(t::Tuple, old_new::Pair...; count::Integer=typemax(Int)) =
+    _replace(t, check_count(count), old_new)
diff --git a/base/shell.jl b/base/shell.jl
index a58f48034b6d21..f443a1f9c094ad 100644
--- a/base/shell.jl
+++ b/base/shell.jl
@@ -49,22 +49,24 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
         empty!(innerlist)
     end
 
+    C = eltype(str)
+    P = Pair{Int,C}
     for (j, c) in st
-        j, c = j::Int, c::eltype(str)
+        j, c = j::Int, c::C
         if !in_single_quotes && !in_double_quotes && isspace(c)
             i = consume_upto!(arg, s, i, j)
             append_2to1!(args, arg)
             while !isempty(st)
                 # We've made sure above that we don't end in whitespace,
                 # so updating `i` here is ok
-                (i, c) = peek(st)::Pair{Int,eltype(str)}
+                (i, c) = peek(st)::P
                 isspace(c) || break
                 popfirst!(st)
             end
         elseif interpolate && !in_single_quotes && c == '$'
             i = consume_upto!(arg, s, i, j)
             isempty(st) && error("\$ right before end of command")
-            stpos, c = popfirst!(st)::Pair{Int,eltype(str)}
+            stpos, c = popfirst!(st)::P
             isspace(c) && error("space not allowed right after \$")
             if startswith(SubString(s, stpos), "var\"")
                 # Disallow var"#" syntax in cmd interpolations.
@@ -87,15 +89,25 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
             elseif !in_single_quotes && c == '"'
                 in_double_quotes = !in_double_quotes
                 i = consume_upto!(arg, s, i, j)
-            elseif c == '\\'
-                if in_double_quotes
+            elseif !in_single_quotes && c == '\\'
+                if !isempty(st) && (peek(st)::P)[2] in ('\n', '\r')
+                    i = consume_upto!(arg, s, i, j) + 1
+                    if popfirst!(st)[2] == '\r' && (peek(st)::P)[2] == '\n'
+                        i += 1
+                        popfirst!(st)
+                    end
+                    while !isempty(st) && (peek(st)::P)[2] in (' ', '\t')
+                        i = nextind(str, i)
+                        _ = popfirst!(st)
+                    end
+                elseif in_double_quotes
                     isempty(st) && error("unterminated double quote")
-                    k, c′ = peek(st)
+                    k, c′ = peek(st)::P
                     if c′ == '"' || c′ == '$' || c′ == '\\'
                         i = consume_upto!(arg, s, i, j)
                         _ = popfirst!(st)
                     end
-                elseif !in_single_quotes
+                else
                     isempty(st) && error("dangling backslash")
                     i = consume_upto!(arg, s, i, j)
                     _ = popfirst!(st)
@@ -199,8 +211,8 @@ function print_shell_escaped_posixly(io::IO, args::AbstractString...)
         first || print(io, ' ')
         # avoid printing quotes around simple enough strings
         # that any (reasonable) shell will definitely never consider them to be special
-        have_single = false
-        have_double = false
+        have_single::Bool = false
+        have_double::Bool = false
         function isword(c::AbstractChar)
             if '0' <= c <= '9' || 'a' <= c <= 'z' || 'A' <= c <= 'Z'
                 # word characters
@@ -251,61 +263,200 @@ julia> Base.shell_escape_posixly("echo", "this", "&&", "that")
 shell_escape_posixly(args::AbstractString...) =
     sprint(print_shell_escaped_posixly, args...)
 
-
-function print_shell_escaped_winsomely(io::IO, args::AbstractString...)
+"""
+    shell_escape_csh(args::Union{Cmd,AbstractString...})
+    shell_escape_csh(io::IO, args::Union{Cmd,AbstractString...})
+
+This function quotes any metacharacters in the string arguments such
+that the string returned can be inserted into a command-line for
+interpretation by the Unix C shell (csh, tcsh), where each string
+argument will form one word.
+
+In contrast to a POSIX shell, csh does not support the use of the
+backslash as a general escape character in double-quoted strings.
+Therefore, this function wraps strings that might contain
+metacharacters in single quotes, except for parts that contain single
+quotes, which it wraps in double quotes instead. It switches between
+these types of quotes as needed. Linefeed characters are escaped with
+a backslash.
+
+This function should also work for a POSIX shell, except if the input
+string contains a linefeed (`"\\n"`) character.
+
+See also: [`shell_escape_posixly`](@ref)
+"""
+function shell_escape_csh(io::IO, args::AbstractString...)
     first = true
     for arg in args
         first || write(io, ' ')
         first = false
-        # Quote any arg that contains a whitespace (' ' or '\t') or a double quote mark '"'.
-        # It's also valid to quote an arg with just a whitespace,
-        # but the following may be 'safer', and both implementations are valid anyways.
-        quotes = any(c -> c in (' ', '\t', '"'), arg) || isempty(arg)
-        quotes && write(io, '"')
-        backslashes = 0
-        for c in arg
-            if c == '\\'
-                backslashes += 1
-            else
-                # escape all backslashes and the following double quote
-                c == '"' && (backslashes = backslashes * 2 + 1)
-                for j = 1:backslashes
-                    # backslashes aren't special here
-                    write(io, '\\')
+        i = 1
+        while true
+            for (r,e) = (r"^[A-Za-z0-9/\._-]+\z" => "",
+                         r"^[^']*\z" => "'", r"^[^\$\`\"]*\z" => "\"",
+                         r"^[^']+"  => "'", r"^[^\$\`\"]+"  => "\"")
+                if ((m = match(r, SubString(arg, i))) !== nothing)
+                    write(io, e)
+                    write(io, replace(m.match, '\n' => "\\\n"))
+                    write(io, e)
+                    i += ncodeunits(m.match)
+                    break
                 end
-                backslashes = 0
-                write(io, c)
             end
+            i <= lastindex(arg) || break
         end
-        # escape all backslashes, letting the terminating double quote we add below to then be interpreted as a special char
-        quotes && (backslashes *= 2)
-        for j = 1:backslashes
-            write(io, '\\')
-        end
-        quotes && write(io, '"')
     end
-    return nothing
 end
-
+shell_escape_csh(args::AbstractString...) =
+    sprint(shell_escape_csh, args...;
+           sizehint = sum(sizeof.(args)) + length(args) * 3)
 
 """
-     shell_escaped_winsomely(args::Union{Cmd,AbstractString...})::String
-
-Convert the collection of strings `args` into single string suitable for passing as the argument
-string for a Windows command line. Windows passes the entire command line as a single string to
-the application (unlike POSIX systems, where the list of arguments are passed separately).
-Many Windows API applications (including julia.exe), use the conventions of the [Microsoft C
-runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments) to
-split that command line into a list of strings. This function implements the inverse of such a
-C runtime command-line parser. It joins command-line arguments to be passed to a Windows console
-application into a command line, escaping or quoting meta characters such as space,
-double quotes and backslash where needed. This may be useful in concert with the `windows_verbatim`
-flag to [`Cmd`](@ref) when constructing process pipelines.
+    shell_escape_wincmd(s::AbstractString)
+    shell_escape_wincmd(io::IO, s::AbstractString)
+
+The unexported `shell_escape_wincmd` function escapes Windows `cmd.exe` shell
+meta characters. It escapes `()!^<>&|` by placing a `^` in front. An `@` is
+only escaped at the start of the string. Pairs of `"` characters and the
+strings they enclose are passed through unescaped. Any remaining `"` is escaped
+with `^` to ensure that the number of unescaped `"` characters in the result
+remains even.
+
+Since `cmd.exe` substitutes variable references (like `%USER%`) _before_
+processing the escape characters `^` and `"`, this function makes no attempt to
+escape the percent sign (`%`), the presence of `%` in the input may cause
+severe breakage, depending on where the result is used.
+
+Input strings with ASCII control characters that cannot be escaped (NUL, CR,
+LF) will cause an `ArgumentError` exception.
+
+The result is safe to pass as an argument to a command call being processed by
+`CMD.exe /S /C " ... "` (with surrounding double-quote pair) and will be
+received verbatim by the target application if the input does not contain `%`
+(else this function will fail with an ArgumentError). The presence of `%` in
+the input string may result in command injection vulnerabilities and may
+invalidate any claim of suitability of the output of this function for use as
+an argument to cmd (due to the ordering described above), so use caution when
+assembling a string from various sources.
+
+This function may be useful in concert with the `windows_verbatim` flag to
+[`Cmd`](@ref) when constructing process pipelines.
+
+```julia
+wincmd(c::String) =
+   run(Cmd(Cmd(["cmd.exe", "/s /c \\" \$c \\""]);
+           windows_verbatim=true))
+wincmd_echo(s::String) =
+   wincmd("echo " * Base.shell_escape_wincmd(s))
+wincmd_echo("hello \$(ENV["USERNAME"]) & the \\"whole\\" world! (=^I^=)")
+```
+
+But take note that if the input string `s` contains a `%`, the argument list
+and echo'ed text may get corrupted, resulting in arbitrary command execution.
+The argument can alternatively be passed as an environment variable, which
+avoids the problem with `%` and the need for the `windows_verbatim` flag:
+
+```julia
+cmdargs = Base.shell_escape_wincmd("Passing args with %cmdargs% works 100%!")
+run(setenv(`cmd /C echo %cmdargs%`, "cmdargs" => cmdargs))
+```
+
+!warning
+    The argument parsing done by CMD when calling batch files (either inside
+    `.bat` files or as arguments to them) is not fully compatible with the
+    output of this function. In particular, the processing of `%` is different.
+
+!important
+    Due to a peculiar behavior of the CMD parser/interpreter, each command
+    after a literal `|` character (indicating a command pipeline) must have
+    `shell_escape_wincmd` applied twice since it will be parsed twice by CMD.
+    This implies ENV variables would also be expanded twice!
+    For example:
+    ```julia
+    to_print = "All for 1 & 1 for all!"
+    to_print_esc = Base.shell_escape_wincmd(Base.shell_escape_wincmd(to_print))
+    run(Cmd(Cmd(["cmd", "/S /C \\" break | echo \$(to_print_esc) \\""]), windows_verbatim=true))
+    ```
+
+With an I/O stream parameter `io`, the result will be written there,
+rather than returned as a string.
+
+See also [`escape_microsoft_c_args`](@ref), [`shell_escape_posixly`](@ref).
 
 # Example
 ```jldoctest
-julia> println(shell_escaped_winsomely("A B\\", "C"))
-"A B\\" C
+julia> Base.shell_escape_wincmd("a^\\"^o\\"^u\\"")
+"a^^\\"^o\\"^^u^\\""
+```
+"""
+function shell_escape_wincmd(io::IO, s::AbstractString)
+    # https://stackoverflow.com/a/4095133/1990689
+    occursin(r"[\r\n\0]", s) &&
+        throw(ArgumentError("control character unsupported by CMD.EXE"))
+    i = 1
+    len = ncodeunits(s)
+    if len > 0 && s[1] == '@'
+        write(io, '^')
+    end
+    while i <= len
+        c = s[i]
+        if c == '"' && (j = findnext('"', s, nextind(s,i))) !== nothing
+            write(io, SubString(s,i,j))
+            i = j
+        else
+            if c in ('"', '(', ')', '!', '^', '<', '>', '&', '|')
+                write(io, '^', c)
+            else
+                write(io, c)
+            end
+        end
+        i = nextind(s,i)
+    end
+end
+shell_escape_wincmd(s::AbstractString) = sprint(shell_escape_wincmd, s;
+                                                sizehint = 2*sizeof(s))
+
 """
-shell_escape_winsomely(args::AbstractString...) =
-    sprint(print_shell_escaped_winsomely, args..., sizehint=(sum(length, args)) + 3*length(args))
+    escape_microsoft_c_args(args::Union{Cmd,AbstractString...})
+    escape_microsoft_c_args(io::IO, args::Union{Cmd,AbstractString...})
+
+Convert a collection of string arguments into a string that can be
+passed to many Windows command-line applications.
+
+Microsoft Windows passes the entire command line as a single string to
+the application (unlike POSIX systems, where the shell splits the
+command line into a list of arguments). Many Windows API applications
+(including julia.exe), use the conventions of the [Microsoft C/C++
+runtime](https://docs.microsoft.com/en-us/cpp/c-language/parsing-c-command-line-arguments)
+to split that command line into a list of strings.
+
+This function implements an inverse for a parser compatible with these rules.
+It joins command-line arguments to be passed to a Windows
+C/C++/Julia application into a command line, escaping or quoting the
+meta characters space, TAB, double quote and backslash where needed.
+
+See also [`shell_escape_wincmd`](@ref), [`escape_raw_string`](@ref).
+"""
+function escape_microsoft_c_args(io::IO, args::AbstractString...)
+    # http://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
+    first = true
+    for arg in args
+        if first
+            first = false
+        else
+            write(io, ' ')  # separator
+        end
+        if isempty(arg) || occursin(r"[ \t\"]", arg)
+            # Julia raw strings happen to use the same escaping convention
+            # as the argv[] parser in Microsoft's C runtime library.
+            write(io, '"')
+            escape_raw_string(io, arg)
+            write(io, '"')
+        else
+            write(io, arg)
+        end
+    end
+end
+escape_microsoft_c_args(args::AbstractString...) =
+    sprint(escape_microsoft_c_args, args...;
+           sizehint = (sum(sizeof.(args)) + 3*length(args)))
diff --git a/base/show.jl b/base/show.jl
index f4a65dfbf6d0f6..e59f2c8a9ce8ee 100644
--- a/base/show.jl
+++ b/base/show.jl
@@ -45,6 +45,7 @@ function show(io::IO, ::MIME"text/plain", f::Function)
 end
 
 show(io::IO, ::MIME"text/plain", c::ComposedFunction) = show(io, c)
+show(io::IO, ::MIME"text/plain", c::Returns) = show(io, c)
 
 const ansi_regex = r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])"
 # An iterator similar to `pairs` but skips over "tokens" corresponding to
@@ -393,6 +394,8 @@ getindex(io::IOContext, key) = getindex(io.dict, key)
 getindex(io::IO, key) = throw(KeyError(key))
 get(io::IOContext, key, default) = get(io.dict, key, default)
 get(io::IO, key, default) = default
+keys(io::IOContext) = keys(io.dict)
+keys(io::IO) = keys(ImmutableDict{Symbol,Any}())
 
 displaysize(io::IOContext) = haskey(io, :displaysize) ? io[:displaysize]::Tuple{Int,Int} : displaysize(io.io)
 
@@ -439,14 +442,14 @@ Hello World!
 """
 show(io::IO, @nospecialize(x)) = show_default(io, x)
 
-show(x) = show(stdout::IO, x)
+show(x) = show(stdout, x)
 
 # avoid inferring show_default on the type of `x`
 show_default(io::IO, @nospecialize(x)) = _show_default(io, inferencebarrier(x))
 
 function _show_default(io::IO, @nospecialize(x))
     t = typeof(x)
-    show(io, inferencebarrier(t))
+    show(io, inferencebarrier(t)::DataType)
     print(io, '(')
     nf = nfields(x)
     nb = sizeof(x)::Int
@@ -468,7 +471,7 @@ function _show_default(io::IO, @nospecialize(x))
         end
     else
         print(io, "0x")
-        r = Ref(x)
+        r = Ref{Any}(x)
         GC.@preserve r begin
             p = unsafe_convert(Ptr{Cvoid}, r)
             for i in (nb - 1):-1:0
@@ -493,28 +496,29 @@ function is_exported_from_stdlib(name::Symbol, mod::Module)
     return isexported(mod, name) && isdefined(mod, name) && !isdeprecated(mod, name) && getfield(mod, name) === orig
 end
 
-function show_function(io::IO, f::Function, compact::Bool)
+function show_function(io::IO, f::Function, compact::Bool, fallback::Function)
     ft = typeof(f)
     mt = ft.name.mt
     if mt === Symbol.name.mt
         # uses shared method table
-        show_default(io, f)
+        fallback(io, f)
     elseif compact
         print(io, mt.name)
     elseif isdefined(mt, :module) && isdefined(mt.module, mt.name) &&
         getfield(mt.module, mt.name) === f
         if is_exported_from_stdlib(mt.name, mt.module) || mt.module === Main
-            print(io, mt.name)
+            show_sym(io, mt.name)
         else
-            print(io, mt.module, ".", mt.name)
+            print(io, mt.module, ".")
+            show_sym(io, mt.name)
         end
     else
-        show_default(io, f)
+        fallback(io, f)
     end
 end
 
-show(io::IO, f::Function) = show_function(io, f, get(io, :compact, false)::Bool)
-print(io::IO, f::Function) = show_function(io, f, true)
+show(io::IO, f::Function) = show_function(io, f, get(io, :compact, false)::Bool, show_default)
+print(io::IO, f::Function) = show_function(io, f, true, show)
 
 function show(io::IO, f::Core.IntrinsicFunction)
     if !(get(io, :compact, false)::Bool)
@@ -561,36 +565,21 @@ end
 # we're attempting to represent.
 # Union{T} where T is a degenerate case and is equal to T.ub, but we don't want
 # to print them that way, so filter those out from our aliases completely.
-function makeproper(io::IO, x::Type)
-    properx = x
-    x = unwrap_unionall(x)
+function makeproper(io::IO, @nospecialize(x::Type))
     if io isa IOContext
         for (key, val) in io.dict
             if key === :unionall_env && val isa TypeVar
-                properx = UnionAll(val, properx)
+                x = UnionAll(val, x)
             end
         end
     end
-    if x isa Union
-        y = []
-        normal = true
-        for typ in uniontypes(x)
-            if isa(typ, TypeVar)
-                normal = false
-            else
-                push!(y, typ)
-            end
-        end
-        normal || (x = Union{y...})
-        properx = rewrap_unionall(x, properx)
-    end
-    has_free_typevars(properx) && return Any
-    return properx
+    has_free_typevars(x) && return Any
+    return x
 end
 
 function make_typealias(@nospecialize(x::Type))
-    Any <: x && return
-    x <: Tuple && return
+    Any === x && return nothing
+    x <: Tuple && return nothing
     mods = modulesof!(Set{Module}(), x)
     Core in mods && push!(mods, Base)
     aliases = Tuple{GlobalRef,SimpleVector}[]
@@ -603,7 +592,7 @@ function make_typealias(@nospecialize(x::Type))
         for name in names(mod)
             if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
                 alias = getfield(mod, name)
-                if alias isa Type && !has_free_typevars(alias) && !isvarargtype(alias) && !print_without_params(alias) && x <: alias
+                if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && x <: alias
                     if alias isa UnionAll
                         (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), x, alias)::SimpleVector
                         # ti === Union{} && continue # impossible, since we already checked that x <: alias
@@ -629,8 +618,8 @@ function make_typealias(@nospecialize(x::Type))
                             applied = rewrap_unionall(applied, p)
                         end
                         has_free_typevars(applied) && continue
-                        applied == x || continue # it couldn't figure out the parameter matching
-                    elseif alias <: x
+                        applied === x || continue # it couldn't figure out the parameter matching
+                    elseif alias === x
                         env = Core.svec()
                     else
                         continue # not a complete match
@@ -645,7 +634,70 @@ function make_typealias(@nospecialize(x::Type))
     end
 end
 
-function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector)
+isgensym(s::Symbol) = '#' in string(s)
+
+function show_can_elide(p::TypeVar, wheres::Vector, elide::Int, env::SimpleVector, skip::Int)
+    elide == 0 && return false
+    wheres[elide] === p || return false
+    for i = (elide + 1):length(wheres)
+        v = wheres[i]::TypeVar
+        has_typevar(v.lb, p) && return false
+        has_typevar(v.ub, p) && return false
+    end
+    for i = 1:length(env)
+        i == skip && continue
+        has_typevar(env[i], p) && return false
+    end
+    return true
+end
+
+function show_typeparams(io::IO, env::SimpleVector, orig::SimpleVector, wheres::Vector)
+    n = length(env)
+    elide = length(wheres)
+    function egal_var(p::TypeVar, @nospecialize o)
+        return o isa TypeVar &&
+            ccall(:jl_types_egal, Cint, (Any, Any), p.ub, o.ub) != 0 &&
+            ccall(:jl_types_egal, Cint, (Any, Any), p.lb, o.lb) != 0
+    end
+    for i = n:-1:1
+        p = env[i]
+        if p isa TypeVar
+            if i == n && egal_var(p, orig[i]) && show_can_elide(p, wheres, elide, env, i)
+                n -= 1
+                elide -= 1
+            elseif p.lb === Union{} && isgensym(p.name) && show_can_elide(p, wheres, elide, env, i)
+                elide -= 1
+            elseif p.ub === Any && isgensym(p.name) && show_can_elide(p, wheres, elide, env, i)
+                elide -= 1
+            end
+        end
+    end
+    if n > 0
+        print(io, "{")
+        for i = 1:n
+            p = env[i]
+            if p isa TypeVar
+                if p.lb === Union{} && something(findfirst(@nospecialize(w) -> w === p, wheres), 0) > elide
+                    print(io, "<:")
+                    show(io, p.ub)
+                elseif p.ub === Any && something(findfirst(@nospecialize(w) -> w === p, wheres), 0) > elide
+                    print(io, ">:")
+                    show(io, p.lb)
+                else
+                    show(io, p)
+                end
+            else
+                show(io, p)
+            end
+            i < n && print(io, ", ")
+        end
+        print(io, "}")
+    end
+    resize!(wheres, elide)
+    nothing
+end
+
+function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector, wheres::Vector)
     if !(get(io, :compact, false)::Bool)
         # Print module prefix unless alias is visible from module passed to
         # IOContext. If :module is not set, default to Main. nothing can be used
@@ -657,61 +709,99 @@ function show_typealias(io::IO, name::GlobalRef, x::Type, env::SimpleVector)
         end
     end
     print(io, name.name)
-    n = length(env)
-    n == 0 && return
+    isempty(env) && return
+    io = IOContext(io)
+    for p in wheres
+        io = IOContext(io, :unionall_env => p)
+    end
+    orig = getfield(name.mod, name.name)
+    vars = TypeVar[]
+    while orig isa UnionAll
+        push!(vars, orig.var)
+        orig = orig.body
+    end
+    show_typeparams(io, env, Core.svec(vars...), wheres)
+    nothing
+end
 
-    print(io, "{")
-    let io = IOContext(io)
-        for i = n:-1:1
-            p = env[i]
-            if p isa TypeVar
-                io = IOContext(io, :unionall_env => p)
+function make_wheres(io::IO, env::SimpleVector, @nospecialize(x::Type))
+    seen = IdSet()
+    wheres = TypeVar[]
+    # record things printed by the context
+    if io isa IOContext
+        for (key, val) in io.dict
+            if key === :unionall_env && val isa TypeVar && has_typevar(x, val)
+                push!(seen, val)
             end
         end
-        for i = 1:n
-            p = env[i]
-            show(io, p)
-            i < n && print(io, ", ")
+    end
+    # record things in x to print outermost
+    while x isa UnionAll
+        if !(x.var in seen)
+            push!(seen, x.var)
+            push!(wheres, x.var)
         end
+        x = x.body
     end
-    print(io, "}")
-    for i = n:-1:1
+    # record remaining things in env to print innermost
+    for i = length(env):-1:1
         p = env[i]
-        if p isa TypeVar && !io_has_tvar_name(io, p.name, x)
-            print(io, " where ")
-            show(io, p)
+        if p isa TypeVar && !(p in seen)
+            push!(seen, p)
+            pushfirst!(wheres, p)
         end
     end
+    return wheres
+end
+
+function show_wheres(io::IO, wheres::Vector{TypeVar})
+    isempty(wheres) && return
+    io = IOContext(io)
+    n = length(wheres)
+    for i = 1:n
+        p = wheres[i]
+        print(io, n == 1 ? " where " : i == 1 ? " where {" : ", ")
+        show(io, p)
+        io = IOContext(io, :unionall_env => p)
+    end
+    n > 1 && print(io, "}")
+    nothing
 end
 
-function show_typealias(io::IO, x::Type)
+function show_typealias(io::IO, @nospecialize(x::Type))
     properx = makeproper(io, x)
     alias = make_typealias(properx)
     alias === nothing && return false
-    show_typealias(io, alias[1], x, alias[2])
+    wheres = make_wheres(io, alias[2], x)
+    show_typealias(io, alias[1], x, alias[2], wheres)
+    show_wheres(io, wheres)
     return true
 end
 
 function make_typealiases(@nospecialize(x::Type))
-    Any <: x && return Core.svec(), Union{}
-    x <: Tuple && return Core.svec(), Union{}
+    aliases = SimpleVector[]
+    Any === x && return aliases, Union{}
+    x <: Tuple && return aliases, Union{}
     mods = modulesof!(Set{Module}(), x)
     Core in mods && push!(mods, Base)
-    aliases = SimpleVector[]
     vars = Dict{Symbol,TypeVar}()
     xenv = UnionAll[]
+    each = Any[]
     for p in uniontypes(unwrap_unionall(x))
         p isa UnionAll && push!(xenv, p)
+        push!(each, rewrap_unionall(p, x))
     end
     x isa UnionAll && push!(xenv, x)
     for mod in mods
         for name in names(mod)
             if isdefined(mod, name) && !isdeprecated(mod, name) && isconst(mod, name)
                 alias = getfield(mod, name)
-                if alias isa Type && !has_free_typevars(alias) && !isvarargtype(alias) && !print_without_params(alias) && !(alias <: Tuple)
+                if alias isa Type && !has_free_typevars(alias) && !print_without_params(alias) && !(alias <: Tuple)
                     (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), x, alias)::SimpleVector
                     ti === Union{} && continue
-                    mod in modulesof!(Set{Module}(), alias) || continue # make sure this alias wasn't from an unrelated part of the Union
+                    # make sure this alias wasn't from an unrelated part of the Union
+                    mod2 = modulesof!(Set{Module}(), alias)
+                    mod in mod2 || (mod === Base && Core in mods) || continue
                     env = env::SimpleVector
                     applied = alias
                     if !isempty(env)
@@ -734,29 +824,35 @@ function make_typealiases(@nospecialize(x::Type))
                     has_free_typevars(applied) && continue
                     applied <: x || continue # parameter matching didn't make a subtype
                     print_without_params(x) && (env = Core.svec())
-                    push!(aliases, Core.svec(GlobalRef(mod, name), env, applied, (ul, -length(env))))
+                    for typ in each # check that the alias also fully subsumes at least component of the input
+                        if typ <: applied
+                            push!(aliases, Core.svec(GlobalRef(mod, name), env, applied, (ul, -length(env))))
+                            break
+                        end
+                    end
                 end
             end
         end
     end
     if isempty(aliases)
-        return Core.svec(), Union{}
+        return aliases, Union{}
     end
-    sort!(aliases, by = x -> x[4], rev = true) # heuristic sort by "best" environment
+    sort!(aliases, by = x -> x[4]::Tuple{Int,Int}, rev = true) # heuristic sort by "best" environment
     let applied = Union{}
         applied1 = Union{}
         keep = SimpleVector[]
         prev = (0, 0)
         for alias in aliases
-            if alias[4][1] < 2
+            alias4 = alias[4]::Tuple{Int,Int}
+            if alias4[1] < 2
                 if !(alias[3] <: applied)
                     applied1 = Union{applied1, alias[3]}
                     push!(keep, alias)
                 end
-            elseif alias[4] == prev || !(alias[3] <: applied)
+            elseif alias4 == prev || !(alias[3] <: applied)
                 applied = applied1 = Union{applied1, alias[3]}
                 push!(keep, alias)
-                prev = alias[4]
+                prev = alias4
             end
         end
         return keep, applied1
@@ -766,51 +862,78 @@ end
 function show_unionaliases(io::IO, x::Union)
     properx = makeproper(io, x)
     aliases, applied = make_typealiases(properx)
+    isempty(aliases) && return false
     first = true
+    tvar = false
     for typ in uniontypes(x)
-        if !isa(typ, TypeVar) && rewrap_unionall(typ, properx) <: applied
+        if isa(typ, TypeVar)
+            tvar = true # sort bare TypeVars to the end
+            continue
+        elseif rewrap_unionall(typ, properx) <: applied
             continue
         end
         print(io, first ? "Union{" : ", ")
         first = false
         show(io, typ)
     end
-    if first && length(aliases) == 1
+    if first && !tvar && length(aliases) == 1
         alias = aliases[1]
-        show_typealias(io, alias[1], x, alias[2])
+        env = alias[2]::SimpleVector
+        wheres = make_wheres(io, env, x)
+        show_typealias(io, alias[1], x, env, wheres)
+        show_wheres(io, wheres)
     else
         for alias in aliases
             print(io, first ? "Union{" : ", ")
             first = false
-            env = alias[2]
-            show_typealias(io, alias[1], x, alias[2])
+            env = alias[2]::SimpleVector
+            wheres = make_wheres(io, env, x)
+            show_typealias(io, alias[1], x, env, wheres)
+            show_wheres(io, wheres)
+        end
+        if tvar
+            for typ in uniontypes(x)
+                if isa(typ, TypeVar)
+                    print(io, ", ")
+                    show(io, typ)
+                end
+            end
         end
         print(io, "}")
     end
+    return true
 end
 
 function show(io::IO, ::MIME"text/plain", @nospecialize(x::Type))
-    show(io, x)
-    if !print_without_params(x) && get(io, :compact, true)
+    if !print_without_params(x)
         properx = makeproper(io, x)
-        if make_typealias(properx) !== nothing || x <: make_typealiases(properx)[2]
-            print(io, " (alias for ")
-            show(IOContext(io, :compact => false), x)
-            print(io, ")")
+        if make_typealias(properx) !== nothing || (unwrap_unionall(x) isa Union && x <: make_typealiases(properx)[2])
+            show(IOContext(io, :compact => true), x)
+            if !(get(io, :compact, false)::Bool)
+                printstyled(io, " (alias for "; color = :light_black)
+                printstyled(IOContext(io, :compact => false), x, color = :light_black)
+                printstyled(io, ")"; color = :light_black)
+            end
+            return
+        end
+    end
+    show(io, x)
+    # give a helpful hint for function types
+    if x isa DataType && x !== UnionAll && !(get(io, :compact, false)::Bool)
+        tn = x.name::Core.TypeName
+        globname = isdefined(tn, :mt) ? tn.mt.name : nothing
+        if is_global_function(tn, globname)
+            print(io, " (singleton type of function ")
+            show_sym(io, globname)
+            print(io, ", subtype of Function)")
         end
     end
-
-    #s1 = sprint(show, x, context = io)
-    #s2 = sprint(show, x, context = IOContext(io, :compact => false))
-    #print(io, s1)
-    #if s1 != s2
-    #    print(io, " = ", s2)
-    #end
 end
 
-function show(io::IO, @nospecialize(x::Type))
+show(io::IO, @nospecialize(x::Type)) = _show_type(io, inferencebarrier(x))
+function _show_type(io::IO, @nospecialize(x::Type))
     if print_without_params(x)
-        show_type_name(io, unwrap_unionall(x).name)
+        show_type_name(io, (unwrap_unionall(x)::DataType).name)
         return
     elseif get(io, :compact, true) && show_typealias(io, x)
         return
@@ -818,32 +941,43 @@ function show(io::IO, @nospecialize(x::Type))
         show_datatype(io, x)
         return
     elseif x isa Union
-        if get(io, :compact, true)
-            show_unionaliases(io, x)
-        else
-            print(io, "Union")
-            show_delim_array(io, uniontypes(x), '{', ',', '}', false)
+        if get(io, :compact, true) && show_unionaliases(io, x)
+            return
         end
+        print(io, "Union")
+        show_delim_array(io, uniontypes(x), '{', ',', '}', false)
         return
     end
 
     x = x::UnionAll
-    if x.var.name === :_ || io_has_tvar_name(io, x.var.name, x)
-        counter = 1
-        while true
-            newname = Symbol(x.var.name, counter)
-            if !io_has_tvar_name(io, newname, x)
-                newtv = TypeVar(newname, x.var.lb, x.var.ub)
-                x = UnionAll(newtv, x{newtv})
-                break
+    wheres = TypeVar[]
+    let io = IOContext(io)
+        while x isa UnionAll
+            var = x.var
+            if var.name === :_ || io_has_tvar_name(io, var.name, x)
+                counter = 1
+                while true
+                    newname = Symbol(var.name, counter)
+                    if !io_has_tvar_name(io, newname, x)
+                        var = TypeVar(newname, var.lb, var.ub)
+                        x = x{var}
+                        break
+                    end
+                    counter += 1
+                end
+            else
+                x = x.body
             end
-            counter += 1
+            push!(wheres, var)
+            io = IOContext(io, :unionall_env => var)
+        end
+        if x isa DataType
+            show_datatype(io, x, wheres)
+        else
+            show(io, x)
         end
     end
-
-    show(IOContext(io, :unionall_env => x.var), x.body)
-    print(io, " where ")
-    show(io, x.var)
+    show_wheres(io, wheres)
 end
 
 # Check whether 'sym' (defined in module 'parent') is visible from module 'from'
@@ -857,22 +991,26 @@ function isvisible(sym::Symbol, parent::Module, from::Module)
         isdefined(from, sym) # if we're going to return true, force binding resolution
 end
 
-function show_type_name(io::IO, tn::Core.TypeName)
-    if tn === UnionAll.name
-        # by coincidence, `typeof(Type)` is a valid representation of the UnionAll type.
-        # intercept this case and print `UnionAll` instead.
-        return print(io, "UnionAll")
-    end
-    globname = isdefined(tn, :mt) ? tn.mt.name : nothing
-    globfunc = false
+function is_global_function(tn::Core.TypeName, globname::Union{Symbol,Nothing})
     if globname !== nothing
         globname_str = string(globname::Symbol)
         if ('#' ∉ globname_str && '@' ∉ globname_str && isdefined(tn, :module) &&
                 isbindingresolved(tn.module, globname) && isdefined(tn.module, globname) &&
                 isconcretetype(tn.wrapper) && isa(getfield(tn.module, globname), tn.wrapper))
-            globfunc = true
+            return true
         end
     end
+    return false
+end
+
+function show_type_name(io::IO, tn::Core.TypeName)
+    if tn === UnionAll.name
+        # by coincidence, `typeof(Type)` is a valid representation of the UnionAll type.
+        # intercept this case and print `UnionAll` instead.
+        return print(io, "UnionAll")
+    end
+    globname = isdefined(tn, :mt) ? tn.mt.name : nothing
+    globfunc = is_global_function(tn, globname)
     sym = (globfunc ? globname : tn.name)::Symbol
     globfunc && print(io, "typeof(")
     quo = false
@@ -899,29 +1037,42 @@ function show_type_name(io::IO, tn::Core.TypeName)
     nothing
 end
 
-function show_datatype(io::IO, @nospecialize(x::DataType))
+function show_datatype(io::IO, x::DataType, wheres::Vector{TypeVar}=TypeVar[])
     parameters = x.parameters::SimpleVector
     istuple = x.name === Tuple.name
     n = length(parameters)
 
-    # Print homogeneous tuples with more than 3 elements compactly as NTuple{N, T}
-    if istuple && n > 3 && all(i -> (parameters[1] === i), parameters)
-        print(io, "NTuple{", n, ", ", parameters[1], "}")
-    else
-        show_type_name(io, x.name)
-        if (n > 0 || istuple) && x !== Tuple
-            # Do not print the type parameters for the primary type if we are
-            # printing a method signature or type parameter.
-            # Always print the type parameter if we are printing the type directly
-            # since this information is still useful.
-            print(io, '{')
-            for i = 1:n
-                p = parameters[i]
-                show(io, p)
-                i < n && print(io, ", ")
+    # Print tuple types with homogeneous tails longer than max_n compactly using `NTuple` or `Vararg`
+    max_n = 3
+    if istuple
+        taillen = 1
+        for i in (n-1):-1:1
+            if parameters[i] === parameters[n]
+                taillen += 1
+            else
+                break
+            end
+        end
+        if n == taillen > max_n
+            print(io, "NTuple{", n, ", ")
+            show(io, parameters[1])
+            print(io, "}")
+        else
+            print(io, "Tuple{")
+            for i = 1:(taillen > max_n ? n-taillen : n)
+                i > 1 && print(io, ", ")
+                show(io, parameters[i])
             end
-            print(io, '}')
+            if taillen > max_n
+                print(io, ", Vararg{")
+                show(io, parameters[n])
+                print(io, ", ", taillen, "}")
+            end
+            print(io, "}")
         end
+    else
+        show_type_name(io, x.name)
+        show_typeparams(io, parameters, (unwrap_unionall(x.name.wrapper)::DataType).parameters, wheres)
     end
 end
 
@@ -936,9 +1087,22 @@ end
 show_supertypes(typ::DataType) = show_supertypes(stdout, typ)
 
 """
-    @show
+    @show exs...
+
+Prints one or more expressions, and their results, to `stdout`, and returns the last result.
+
+See also: [`show`](@ref), [`@info`](@ref man-logging), [`println`](@ref).
+
+# Examples
+```jldoctest
+julia> x = @show 1+2
+1 + 2 = 3
+3
 
-Show an expression and result, returning the result. See also [`show`](@ref).
+julia> @show x^2 x/2;
+x ^ 2 = 9
+x / 2 = 1.5
+```
 """
 macro show(exs...)
     blk = Expr(:block)
@@ -1003,7 +1167,20 @@ function show(io::IO, m::Module)
     if is_root_module(m)
         print(io, nameof(m))
     else
-        print(io, join(fullname(m),"."))
+        print_fullname(io, m)
+    end
+end
+# The call to print_fullname above was originally `print(io, join(fullname(m),"."))`,
+# which allocates. The method below provides the same behavior without allocating.
+# See https://github.com/JuliaLang/julia/pull/42773 for perf information.
+function print_fullname(io::IO, m::Module)
+    mp = parentmodule(m)
+    if m === Main || m === Base || m === Core || mp === m
+        show_sym(io, nameof(m))
+    else
+        print_fullname(io, mp)
+        print(io, '.')
+        show_sym(io, nameof(m))
     end
 end
 
@@ -1012,6 +1189,10 @@ function sourceinfo_slotnames(src::CodeInfo)
     names = Dict{String,Int}()
     printnames = Vector{String}(undef, length(slotnames))
     for i in eachindex(slotnames)
+        if slotnames[i] == :var"#unused#"
+            printnames[i] = "_"
+            continue
+        end
         name = string(slotnames[i])
         idx = get!(names, name, i)
         if idx != i || isempty(name)
@@ -1026,7 +1207,9 @@ function sourceinfo_slotnames(src::CodeInfo)
     return printnames
 end
 
-function show(io::IO, l::Core.MethodInstance)
+show(io::IO, l::Core.MethodInstance) = show_mi(io, l)
+
+function show_mi(io::IO, l::Core.MethodInstance, from_stackframe::Bool=false)
     def = l.def
     if isa(def, Method)
         if isdefined(def, :generator) && l === def.generator
@@ -1034,13 +1217,49 @@ function show(io::IO, l::Core.MethodInstance)
             show(io, def)
         else
             print(io, "MethodInstance for ")
-            show_tuple_as_call(io, def.name, l.specTypes)
+            show_tuple_as_call(io, def.name, l.specTypes; qualified=true)
         end
     else
         print(io, "Toplevel MethodInstance thunk")
+        # `thunk` is not very much information to go on. If this
+        # MethodInstance is part of a stacktrace, it gets location info
+        # added by other means.  But if it isn't, then we should try
+        # to print a little more identifying information.
+        if !from_stackframe
+            linetable = l.uninferred.linetable
+            line = isempty(linetable) ? "unknown" : (lt = linetable[1]::Union{LineNumberNode,Core.LineInfoNode}; string(lt.file, ':', lt.line))
+            print(io, " from ", def, " starting at ", line)
+        end
+    end
+end
+
+# These sometimes show up as Const-values in InferenceFrameInfo signatures
+show(io::IO, r::Core.Compiler.UnitRange) = show(io, r.start : r.stop)
+show(io::IO, mime::MIME{Symbol("text/plain")}, r::Core.Compiler.UnitRange) = show(io, mime, r.start : r.stop)
+
+function show(io::IO, mi_info::Core.Compiler.Timings.InferenceFrameInfo)
+    mi = mi_info.mi
+    def = mi.def
+    if isa(def, Method)
+        if isdefined(def, :generator) && mi === def.generator
+            print(io, "InferenceFrameInfo generator for ")
+            show(io, def)
+        else
+            print(io, "InferenceFrameInfo for ")
+            argnames = [isa(a, Core.Const) ? (isa(a.val, Type) ? "" : a.val) : "" for a in mi_info.slottypes[1:mi_info.nargs]]
+            show_tuple_as_call(io, def.name, mi.specTypes; argnames, qualified=true)
+        end
+    else
+        linetable = mi.uninferred.linetable
+        line = isempty(linetable) ? "" : (lt = linetable[1]; string(lt.file, ':', lt.line))
+        print(io, "Toplevel InferenceFrameInfo thunk from ", def, " starting at ", line)
     end
 end
 
+function show(io::IO, tinf::Core.Compiler.Timings.Timing)
+    print(io, "Core.Compiler.Timings.Timing(", tinf.mi_info, ") with ", length(tinf.children), " children")
+end
+
 function show_delim_array(io::IO, itr::Union{AbstractArray,SimpleVector}, op, delim, cl,
                           delim_one, i1=first(LinearIndices(itr)), l=last(LinearIndices(itr)))
     print(io, op)
@@ -1142,7 +1361,7 @@ const ExprNode = Union{Expr, QuoteNode, Slot, LineNumberNode, SSAValue,
 # IOContext(io, :unquote_fallback => false) tells show_unquoted to treat any
 # Expr whose head is :$ as if it is inside a quote, preventing fallback to the
 # "unhandled" case: this is used by print/string to be lawful to Rule 1 above.
-# On the countrary, show/repr have to follow Rule 2, requiring any Expr whose
+# On the contrary, show/repr have to follow Rule 2, requiring any Expr whose
 # head is :$ and which is not inside a quote to fallback to the "unhandled" case:
 # this is behavior is triggered by IOContext(io, :unquote_fallback => true)
 print(        io::IO, ex::ExprNode)    = (show_unquoted(IOContext(io, :unquote_fallback => false), ex, 0, -1); nothing)
@@ -1157,32 +1376,60 @@ show_unquoted(io::IO, ex, indent::Int, prec::Int, ::Int) = show_unquoted(io, ex,
 const indent_width = 4
 const quoted_syms = Set{Symbol}([:(:),:(::),:(:=),:(=),:(==),:(===),:(=>)])
 const uni_syms = Set{Symbol}([:(::), :(<:), :(>:)])
-const uni_ops = Set{Symbol}([:(+), :(-), :(!), :(¬), :(~), :(<:), :(>:), :(√), :(∛), :(∜)])
+const uni_ops = Set{Symbol}([:(+), :(-), :(!), :(¬), :(~), :(<:), :(>:), :(√), :(∛), :(∜), :(∓), :(±)])
 const expr_infix_wide = Set{Symbol}([
     :(=), :(+=), :(-=), :(*=), :(/=), :(\=), :(^=), :(&=), :(|=), :(÷=), :(%=), :(>>>=), :(>>=), :(<<=),
     :(.=), :(.+=), :(.-=), :(.*=), :(./=), :(.\=), :(.^=), :(.&=), :(.|=), :(.÷=), :(.%=), :(.>>>=), :(.>>=), :(.<<=),
-    :(&&), :(||), :(<:), :($=), :(⊻=), :(>:), :(-->)])
-const expr_infix = Set{Symbol}([:(:), :(->), Symbol("::")])
+    :(&&), :(||), :(<:), :($=), :(⊻=), :(>:), :(-->),
+    :(:=), :(≔), :(⩴), :(≕)])
+const expr_infix = Set{Symbol}([:(:), :(->), :(::)])
 const expr_infix_any = union(expr_infix, expr_infix_wide)
 const expr_calls  = Dict(:call => ('(',')'), :calldecl => ('(',')'),
                          :ref => ('[',']'), :curly => ('{','}'), :(.) => ('(',')'))
 const expr_parens = Dict(:tuple=>('(',')'), :vcat=>('[',']'),
                          :hcat =>('[',']'), :row =>('[',']'), :vect=>('[',']'),
+                         :ncat =>('[',']'), :nrow =>('[',']'),
                          :braces=>('{','}'), :bracescat=>('{','}'))
 
 ## AST decoding helpers ##
 
 is_id_start_char(c::AbstractChar) = ccall(:jl_id_start_char, Cint, (UInt32,), c) != 0
 is_id_char(c::AbstractChar) = ccall(:jl_id_char, Cint, (UInt32,), c) != 0
+
+"""
+     isidentifier(s) -> Bool
+
+Return whether the symbol or string `s` contains characters that are parsed as
+a valid ordinary identifier (not a binary/unary operator) in Julia code;
+see also [`Base.isoperator`](@ref).
+
+Internally Julia allows any sequence of characters in a `Symbol` (except `\\0`s),
+and macros automatically use variable names containing `#` in order to avoid
+naming collision with the surrounding code. In order for the parser to
+recognize a variable, it uses a limited set of characters (greatly extended by
+Unicode). `isidentifier()` makes it possible to query the parser directly
+whether a symbol contains valid characters.
+
+# Examples
+```jldoctest
+julia> Meta.isidentifier(:x), Meta.isidentifier("1x")
+(true, false)
+```
+"""
 function isidentifier(s::AbstractString)
-    isempty(s) && return false
+    x = Iterators.peel(s)
+    isnothing(x) && return false
     (s == "true" || s == "false") && return false
-    c, rest = Iterators.peel(s)
+    c, rest = x
     is_id_start_char(c) || return false
     return all(is_id_char, rest)
 end
 isidentifier(s::Symbol) = isidentifier(string(s))
 
+is_op_suffix_char(c::AbstractChar) = ccall(:jl_op_suffix_char, Cint, (UInt32,), c) != 0
+
+_isoperator(s) = ccall(:jl_is_operator, Cint, (Cstring,), s) != 0
+
 """
     isoperator(s::Symbol)
 
@@ -1190,11 +1437,11 @@ Return `true` if the symbol can be used as an operator, `false` otherwise.
 
 # Examples
 ```jldoctest
-julia> Base.isoperator(:+), Base.isoperator(:f)
+julia> Meta.isoperator(:+), Meta.isoperator(:f)
 (true, false)
 ```
 """
-isoperator(s::Union{Symbol,AbstractString}) = ccall(:jl_is_operator, Cint, (Cstring,), s) != 0
+isoperator(s::Union{Symbol,AbstractString}) = _isoperator(s) || ispostfixoperator(s)
 
 """
     isunaryoperator(s::Symbol)
@@ -1203,12 +1450,13 @@ Return `true` if the symbol can be used as a unary (prefix) operator, `false` ot
 
 # Examples
 ```jldoctest
-julia> Base.isunaryoperator(:-), Base.isunaryoperator(:√), Base.isunaryoperator(:f)
+julia> Meta.isunaryoperator(:-), Meta.isunaryoperator(:√), Meta.isunaryoperator(:f)
 (true, true, false)
 ```
 """
 isunaryoperator(s::Symbol) = ccall(:jl_is_unary_operator, Cint, (Cstring,), s) != 0
 is_unary_and_binary_operator(s::Symbol) = ccall(:jl_is_unary_and_binary_operator, Cint, (Cstring,), s) != 0
+is_syntactic_operator(s::Symbol) = ccall(:jl_is_syntactic_operator, Cint, (Cstring,), s) != 0
 
 """
     isbinaryoperator(s::Symbol)
@@ -1217,11 +1465,30 @@ Return `true` if the symbol can be used as a binary (infix) operator, `false` ot
 
 # Examples
 ```jldoctest
-julia> Base.isbinaryoperator(:-), Base.isbinaryoperator(:√), Base.isbinaryoperator(:f)
+julia> Meta.isbinaryoperator(:-), Meta.isbinaryoperator(:√), Meta.isbinaryoperator(:f)
 (true, false, false)
 ```
 """
-isbinaryoperator(s::Symbol) = isoperator(s) && (!isunaryoperator(s) || is_unary_and_binary_operator(s))
+function isbinaryoperator(s::Symbol)
+    return _isoperator(s) && (!isunaryoperator(s) || is_unary_and_binary_operator(s)) &&
+        s !== Symbol("'")
+end
+
+"""
+    ispostfixoperator(s::Union{Symbol,AbstractString})
+
+Return `true` if the symbol can be used as a postfix operator, `false` otherwise.
+
+# Examples
+```jldoctest
+julia> Meta.ispostfixoperator(Symbol("'")), Meta.ispostfixoperator(Symbol("'ᵀ")), Meta.ispostfixoperator(:-)
+(true, true, false)
+```
+"""
+function ispostfixoperator(s::Union{Symbol,AbstractString})
+    s = String(s)::String
+    return startswith(s, '\'') && all(is_op_suffix_char, SubString(s, 2))
+end
 
 """
     operator_precedence(s::Symbol)
@@ -1275,9 +1542,6 @@ function operator_associativity(s::Symbol)
     return :left
 end
 
-is_expr(@nospecialize(ex), head::Symbol)         = isa(ex, Expr) && (ex.head === head)
-is_expr(@nospecialize(ex), head::Symbol, n::Int) = is_expr(ex, head) && length((ex::Expr).args) == n
-
 is_quoted(ex)            = false
 is_quoted(ex::QuoteNode) = true
 is_quoted(ex::Expr)      = is_expr(ex, :quote, 1) || is_expr(ex, :inert, 1)
@@ -1290,16 +1554,14 @@ unquoted(ex::Expr)       = ex.args[1]
 function printstyled end
 function with_output_color end
 
-const indent_width = 4
-
 is_expected_union(u::Union) = u.a == Nothing || u.b == Nothing || u.a == Missing || u.b == Missing
 
 emphasize(io, str::AbstractString, col = Base.error_color()) = get(io, :color, false) ?
     printstyled(io, str; color=col, bold=true) :
     print(io, uppercase(str))
 
-show_linenumber(io::IO, line)       = print(io, "#= line ", line, " =#")
-show_linenumber(io::IO, line, file) = print(io, "#= ", file, ":", line, " =#")
+show_linenumber(io::IO, line)       = printstyled(io, "#= line ", line, " =#", color=:light_black)
+show_linenumber(io::IO, line, file) = printstyled(io, "#= ", file, ":", line, " =#", color=:light_black)
 show_linenumber(io::IO, line, file::Nothing) = show_linenumber(io, line)
 
 # show a block, e g if/for/etc
@@ -1344,7 +1606,7 @@ function show_list(io::IO, items, sep, indent::Int, prec::Int=0, quote_level::In
             (first && prec >= prec_power &&
              ((item isa Expr && item.head === :call && (callee = item.args[1]; isa(callee, Symbol) && callee in uni_ops)) ||
               (item isa Real && item < 0))) ||
-              (enclose_operators && item isa Symbol && isoperator(item))
+            (enclose_operators && item isa Symbol && isoperator(item) && is_valid_identifier(item))
         parens && print(io, '(')
         if kw && is_expr(item, :kw, 2)
             item = item::Expr
@@ -1366,11 +1628,20 @@ function show_enclosed_list(io::IO, op, items, sep, cl, indent, prec=0, quote_le
     print(io, cl)
 end
 
+function is_valid_identifier(sym)
+    return isidentifier(sym) || (
+        _isoperator(sym) &&
+        !(sym in (Symbol("'"), :(::), :?)) &&
+        !is_syntactic_operator(sym)
+    )
+end
+
 # show a normal (non-operator) function call, e.g. f(x, y) or A[z]
 # kw: `=` expressions are parsed with head `kw` in this context
 function show_call(io::IO, head, func, func_args, indent, quote_level, kw::Bool)
     op, cl = expr_calls[head]
     if (isa(func, Symbol) && func !== :(:) && !(head === :. && isoperator(func))) ||
+            (isa(func, Symbol) && !is_valid_identifier(func)) ||
             (isa(func, Expr) && (func.head === :. || func.head === :curly || func.head === :macroname)) ||
             isa(func, GlobalRef)
         show_unquoted(io, func, indent, 0, quote_level)
@@ -1396,12 +1667,12 @@ end
 # Print `sym` as it would appear as an identifier name in code
 # * Print valid identifiers & operators literally; also macros names if allow_macroname=true
 # * Escape invalid identifiers with var"" syntax
-function show_sym(io::IO, sym; allow_macroname=false)
-    if isidentifier(sym) || (isoperator(sym) && sym !== Symbol("'"))
+function show_sym(io::IO, sym::Symbol; allow_macroname=false)
+    if is_valid_identifier(sym)
         print(io, sym)
     elseif allow_macroname && (sym_str = string(sym); startswith(sym_str, '@'))
         print(io, '@')
-        show_sym(io, sym_str[2:end])
+        show_sym(io, Symbol(sym_str[2:end]))
     else
         print(io, "var", repr(string(sym)))
     end
@@ -1456,14 +1727,16 @@ function show_unquoted(io::IO, ex::QuoteNode, indent::Int, prec::Int)
 end
 
 function show_unquoted_quote_expr(io::IO, @nospecialize(value), indent::Int, prec::Int, quote_level::Int)
-    if isa(value, Symbol) && !(value in quoted_syms)
-        value = value::Symbol
-        s = string(value)
-        if isidentifier(s) || (isoperator(value) && value !== Symbol("'"))
-            print(io, ":")
-            print(io, value)
+    if isa(value, Symbol)
+        sym = value::Symbol
+        if value in quoted_syms
+            print(io, ":(", sym, ")")
         else
-            print(io, "Symbol(", repr(s), ")")
+            if isidentifier(sym) || (_isoperator(sym) && sym !== Symbol("'"))
+                print(io, ":", sym)
+            else
+                print(io, "Symbol(", repr(String(sym)), ")")
+            end
         end
     else
         if isa(value,Expr) && value.head === :block
@@ -1499,7 +1772,10 @@ function show_generator(io, ex::Expr, indent, quote_level)
     end
 end
 
-function valid_import_path(@nospecialize ex)
+function valid_import_path(@nospecialize(ex), allow_as = true)
+    if allow_as && is_expr(ex, :as) && length((ex::Expr).args) == 2
+        ex = (ex::Expr).args[1]
+    end
     return is_expr(ex, :(.)) && length((ex::Expr).args) > 0 && all(a->isa(a,Symbol), (ex::Expr).args)
 end
 
@@ -1517,10 +1793,12 @@ function show_import_path(io::IO, ex, quote_level)
         end
     elseif ex.head === :(.)
         for i = 1:length(ex.args)
-            if i > 1 && ex.args[i-1] !== :(.)
+            if ex.args[i] === :(.)
                 print(io, '.')
+            else
+                show_sym(io, ex.args[i]::Symbol, allow_macroname=(i==length(ex.args)))
+                i < length(ex.args) && print(io, '.')
             end
-            show_sym(io, ex.args[i]::Symbol, allow_macroname=(i==length(ex.args)))
         end
     else
         show_unquoted(io, ex, 0, 0, quote_level)
@@ -1563,7 +1841,10 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     unhandled = false
     # dot (i.e. "x.y"), but not compact broadcast exps
     if head === :(.) && (nargs != 2 || !is_expr(args[2], :tuple))
-        if nargs == 2 && is_quoted(args[2])
+        # standalone .op
+        if nargs == 1 && args[1] isa Symbol && isoperator(args[1]::Symbol)
+            print(io, "(.", args[1], ")")
+        elseif nargs == 2 && is_quoted(args[2])
             item = args[1]
             # field
             field = unquoted(args[2])
@@ -1613,14 +1894,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
 
     # list-like forms, e.g. "[1, 2, 3]"
     elseif haskey(expr_parens, head) ||                          # :vcat etc.
-        head === :typed_vcat || head === :typed_hcat
+        head === :typed_vcat || head === :typed_hcat || head === :typed_ncat
         # print the type and defer to the untyped case
-        if head === :typed_vcat || head === :typed_hcat
+        if head === :typed_vcat || head === :typed_hcat || head === :typed_ncat
             show_unquoted(io, args[1], indent, prec, quote_level)
             if head === :typed_vcat
                 head = :vcat
-            else
+            elseif head === :typed_hcat
                 head = :hcat
+            else
+                head = :ncat
             end
             args = args[2:end]
             nargs = nargs - 1
@@ -1630,15 +1913,33 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             sep = "; "
         elseif head === :hcat || head === :row
             sep = " "
+        elseif head === :ncat || head === :nrow
+            sep = ";"^args[1]::Int * " "
+            args = args[2:end]
+            nargs = nargs - 1
         else
             sep = ", "
         end
-        head !== :row && print(io, op)
+        head !== :row && head !== :nrow && print(io, op)
         show_list(io, args, sep, indent, 0, quote_level)
-        if nargs == 1 && head === :vcat
-            print(io, ';')
+        if nargs <= 1 && (head === :vcat || head === :ncat)
+            print(io, sep[1:end-1])
         end
-        head !== :row && print(io, cl)
+        head !== :row && head !== :nrow && print(io, cl)
+
+    # transpose
+    elseif (head === Symbol("'") && nargs == 1) || (
+        # ' with unicode suffix is a call expression
+        head === :call && nargs == 2 && args[1] isa Symbol &&
+        ispostfixoperator(args[1]::Symbol) && args[1]::Symbol !== Symbol("'")
+    )
+        op, arg1 = head === Symbol("'") ? (head, args[1]) : (args[1], args[2])
+        if isa(arg1, Expr) || (isa(arg1, Symbol) && isoperator(arg1::Symbol))
+            show_enclosed_list(io, '(', [arg1::Union{Expr, Symbol}], ", ", ')', indent, 0)
+        else
+            show_unquoted(io, arg1, indent, 0, quote_level)
+        end
+        print(io, op)
 
     # function call
     elseif head === :call && nargs >= 1
@@ -1668,10 +1969,10 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             end
 
         # unary operator (i.e. "!z")
-        elseif isa(func,Symbol) && func in uni_ops && length(func_args) == 1
+        elseif isa(func,Symbol) && length(func_args) == 1 && func in uni_ops
             show_unquoted(io, func, indent, 0, quote_level)
             arg1 = func_args[1]
-            if isa(arg1, Expr) || (isa(arg1, Symbol) && isoperator(arg1))
+            if isa(arg1, Expr) || (isa(arg1, Symbol) && isoperator(arg1) && is_valid_identifier(arg1))
                 show_enclosed_list(io, '(', func_args, ", ", ')', indent, func_prec)
             else
                 show_unquoted(io, arg1, indent, func_prec, quote_level)
@@ -1681,8 +1982,8 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
         elseif func_prec > 0 # is a binary operator
             na = length(func_args)
             if (na == 2 || (na > 2 && isa(func, Symbol) && func in (:+, :++, :*)) || (na == 3 && func === :(:))) &&
-                    all(!isa(a, Expr) || a.head !== :... for a in func_args)
-                sep = func === :(:) ? "$func" : " $func "
+                    all(a -> !isa(a, Expr) || a.head !== :..., func_args)
+                sep = func === :(:) ? "$func" : " " * convert(String, string(func))::String * " "   # if func::Any, avoid string interpolation (invalidation)
 
                 if func_prec <= prec
                     show_enclosed_list(io, '(', func_args, sep, ')', indent, func_prec, quote_level, true)
@@ -1819,8 +2120,12 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     # var-arg declaration or expansion
     # (i.e. "function f(L...) end" or "f(B...)")
     elseif head === :(...) && nargs == 1
-        show_unquoted(io, args[1], indent, 0, quote_level)
+        dotsprec = operator_precedence(:(:)) - 1
+        parens = dotsprec <= prec
+        parens && print(io, "(")
+        show_unquoted(io, args[1], indent, dotsprec, quote_level)
         print(io, "...")
+        parens && print(io, ")")
 
     elseif (nargs == 0 && head in (:break, :continue))
         print(io, head)
@@ -1908,12 +2213,15 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
     elseif head === :line && 1 <= nargs <= 2
         show_linenumber(io, args...)
 
-    elseif head === :try && 3 <= nargs <= 4
+    elseif head === :try && 3 <= nargs <= 5
         iob = IOContext(io, beginsym=>false)
         show_block(iob, "try", args[1], indent, quote_level)
         if is_expr(args[3], :block)
             show_block(iob, "catch", args[2] === false ? Any[] : args[2], args[3]::Expr, indent, quote_level)
         end
+        if nargs >= 5 && is_expr(args[5], :block)
+            show_block(iob, "else", Any[], args[5]::Expr, indent, quote_level)
+        end
         if nargs >= 4 && is_expr(args[4], :block)
             show_block(iob, "finally", Any[], args[4]::Expr, indent, quote_level)
         end
@@ -2005,17 +2313,6 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             parens && print(io, ")")
         end
 
-    # transpose
-    elseif head === Symbol('\'') && nargs == 1
-        if isa(args[1], Symbol)
-            show_unquoted(io, args[1], 0, 0, quote_level)
-        else
-            print(io, "(")
-            show_unquoted(io, args[1], 0, 0, quote_level)
-            print(io, ")")
-        end
-        print(io, head)
-
     # `where` syntax
     elseif head === :where && nargs > 1
         parens = 1 <= prec
@@ -2047,12 +2344,16 @@ function show_unquoted(io::IO, ex::Expr, indent::Int, prec::Int, quote_level::In
             first = false
             show_import_path(io, a, quote_level)
         end
+    elseif head === :as && nargs == 2 && valid_import_path(args[1], false)
+        show_import_path(io, args[1], quote_level)
+        print(io, " as ")
+        show_unquoted(io, args[2], indent, 0, quote_level)
     elseif head === :meta && nargs >= 2 && args[1] === :push_loc
         print(io, "# meta: location ", join(args[2:end], " "))
     elseif head === :meta && nargs == 1 && args[1] === :pop_loc
         print(io, "# meta: pop location")
     elseif head === :meta && nargs == 2 && args[1] === :pop_loc
-        print(io, "# meta: pop locations ($(args[2]))")
+        print(io, "# meta: pop locations ($(args[2]::Int))")
     # print anything else as "Expr(head, args...)"
     else
         unhandled = true
@@ -2075,37 +2376,44 @@ end
 
 # show the called object in a signature, given its type `ft`
 # `io` should contain the UnionAll env of the signature
-function show_signature_function(io::IO, @nospecialize(ft), demangle=false, fargname="", html=false)
+function show_signature_function(io::IO, @nospecialize(ft), demangle=false, fargname="", html=false, qualified=false)
     uw = unwrap_unionall(ft)
     if ft <: Function && isa(uw, DataType) && isempty(uw.parameters) &&
         isdefined(uw.name.module, uw.name.mt.name) &&
         ft == typeof(getfield(uw.name.module, uw.name.mt.name))
-        print(io, (demangle ? demangle_function_name : identity)(uw.name.mt.name))
+        if qualified && !is_exported_from_stdlib(uw.name.mt.name, uw.name.module) && uw.name.module !== Main
+            print_within_stacktrace(io, uw.name.module, '.', bold=true)
+        end
+        s = sprint(show_sym, (demangle ? demangle_function_name : identity)(uw.name.mt.name), context=io)
+        print_within_stacktrace(io, s, bold=true)
     elseif isa(ft, DataType) && ft.name === Type.body.name &&
         (f = ft.parameters[1]; !isa(f, TypeVar))
         uwf = unwrap_unionall(f)
         parens = isa(f, UnionAll) && !(isa(uwf, DataType) && f === uwf.name.wrapper)
         parens && print(io, "(")
-        show(io, f)
+        print_within_stacktrace(io, f, bold=true)
         parens && print(io, ")")
     else
         if html
             print(io, "($fargname::<b>", ft, "</b>)")
         else
-            print(io, "($fargname::", ft, ")")
+            print_within_stacktrace(io, "($fargname::", ft, ")", bold=true)
         end
     end
     nothing
 end
 
-function print_within_stacktrace(io, s...; color, bold=false)
+function print_within_stacktrace(io, s...; color=:normal, bold=false)
     if get(io, :backtrace, false)::Bool
         printstyled(io, s...; color, bold)
     else
         print(io, s...)
     end
 end
-function show_tuple_as_call(io::IO, name::Symbol, sig::Type, demangle=false, kwargs=nothing, argnames=nothing)
+
+function show_tuple_as_call(io::IO, name::Symbol, sig::Type;
+                            demangle=false, kwargs=nothing, argnames=nothing,
+                            qualified=false, hasfirst=true)
     # print a method signature tuple for a lambda definition
     if sig === Tuple
         print(io, demangle ? demangle_function_name(name) : name, "(...)")
@@ -2118,19 +2426,23 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type, demangle=false, kwa
         env_io = IOContext(env_io, :unionall_env => sig.var)
         sig = sig.body
     end
+    n = 1
     sig = (sig::DataType).parameters
-    show_signature_function(env_io, sig[1], demangle)
+    if hasfirst
+        show_signature_function(env_io, sig[1], demangle, "", false, qualified)
+        n += 1
+    end
     first = true
-    print_within_stacktrace(io, "(", color=:light_black)
+    print_within_stacktrace(io, "(", bold=true)
     show_argnames = argnames !== nothing && length(argnames) == length(sig)
-    for i = 2:length(sig)  # fixme (iter): `eachindex` with offset?
+    for i = n:length(sig)  # fixme (iter): `eachindex` with offset?
         first || print(io, ", ")
         first = false
         if show_argnames
-            print_within_stacktrace(io, argnames[i]; bold=true, color=:light_black)
+            print_within_stacktrace(io, argnames[i]; color=:light_black)
         end
         print(io, "::")
-        print_within_stacktrace(env_io, sig[i]; color=:light_black)
+        print_type_stacktrace(env_io, sig[i])
     end
     if kwargs !== nothing
         print(io, "; ")
@@ -2138,16 +2450,29 @@ function show_tuple_as_call(io::IO, name::Symbol, sig::Type, demangle=false, kwa
         for (k, t) in kwargs
             first || print(io, ", ")
             first = false
-            print_within_stacktrace(io, k; bold=true, color=:light_black)
+            print_within_stacktrace(io, k; color=:light_black)
             print(io, "::")
-            print_within_stacktrace(io, t; color=:light_black)
+            print_type_stacktrace(io, t)
         end
     end
-    print_within_stacktrace(io, ")", color=:light_black)
+    print_within_stacktrace(io, ")", bold=true)
     show_method_params(io, tv)
     nothing
 end
 
+function print_type_stacktrace(io, type; color=:normal)
+    str = sprint(show, type, context=io)
+    i = findfirst('{', str)
+    if !get(io, :backtrace, false)::Bool
+        print(io, str)
+    elseif i === nothing
+        printstyled(io, str; color=color)
+    else
+        printstyled(io, str[1:prevind(str,i)]; color=color)
+        printstyled(io, str[i:end]; color=:light_black)
+    end
+end
+
 resolvebinding(@nospecialize(ex)) = ex
 resolvebinding(ex::QuoteNode) = ex.value
 resolvebinding(ex::Symbol) = resolvebinding(GlobalRef(Main, ex))
@@ -2208,11 +2533,26 @@ function show(io::IO, tv::TypeVar)
     nothing
 end
 
+function show(io::IO, vm::Core.TypeofVararg)
+    print(io, "Vararg")
+    if isdefined(vm, :T)
+        print(io, "{")
+        show(io, vm.T)
+        if isdefined(vm, :N)
+            print(io, ", ")
+            show(io, vm.N)
+        end
+        print(io, "}")
+    end
+end
+
 module IRShow
     const Compiler = Core.Compiler
     using Core.IR
     import ..Base
-    import .Compiler: IRCode, ReturnNode, GotoIfNot, CFG, scan_ssa_use!, Argument, isexpr, compute_basic_blocks, block_for_inst
+    import .Compiler: IRCode, ReturnNode, GotoIfNot, CFG, scan_ssa_use!, Argument,
+        isexpr, compute_basic_blocks, block_for_inst,
+        TriState, Effects, ALWAYS_TRUE, ALWAYS_FALSE
     Base.getindex(r::Compiler.StmtRange, ind::Integer) = Compiler.getindex(r, ind)
     Base.size(r::Compiler.StmtRange) = Compiler.size(r)
     Base.first(r::Compiler.StmtRange) = Compiler.first(r)
@@ -2245,7 +2585,7 @@ function show(io::IO, src::CodeInfo; debuginfo::Symbol=:source)
         # TODO: static parameter values?
         # only accepts :source or :none, we can't have a fallback for default since
         # that would break code_typed(, debuginfo=:source) iff IRShow.default_debuginfo[] = :none
-        IRShow.show_ir(lambda_io, src, IRShow.__debuginfo[debuginfo](src))
+        IRShow.show_ir(lambda_io, src, IRShow.IRShowConfig(IRShow.__debuginfo[debuginfo](src)))
     else
         # this is a CodeInfo that has not been used as a method yet, so its locations are still LineNumberNodes
         body = Expr(:block)
@@ -2372,7 +2712,7 @@ function dump(io::IOContext, x::DataType, n::Int, indent)
     if x !== Any
         print(io, " <: ", supertype(x))
     end
-    if n > 0 && !(x <: Tuple) && !x.abstract
+    if n > 0 && !(x <: Tuple) && !isabstracttype(x)
         tvar_io::IOContext = io
         for tparam in x.parameters
             # approximately recapture the list of tvar parameterization
@@ -2434,7 +2774,7 @@ MyStruct
 function dump(arg; maxdepth=DUMP_DEFAULT_MAXDEPTH)
     # this is typically used interactively, so default to being in Main
     mod = get(stdout, :module, Main)
-    dump(IOContext(stdout::IO, :limit => true, :module => mod), arg; maxdepth=maxdepth)
+    dump(IOContext(stdout, :limit => true, :module => mod), arg; maxdepth=maxdepth)
 end
 
 
@@ -2538,6 +2878,9 @@ function array_summary(io::IO, a, inds)
     print(io, " with indices ", inds2string(inds))
 end
 
+## `summary` for Function
+summary(io::IO, f::Function) = show(io, MIME"text/plain"(), f)
+
 """
     showarg(io::IO, x, toplevel)
 
@@ -2599,8 +2942,9 @@ function showarg(io::IO, v::SubArray, toplevel)
     showindices(io, v.indices...)
     print(io, ')')
     toplevel && print(io, " with eltype ", eltype(v))
+    return nothing
 end
-showindices(io, ::Union{Slice,IdentityUnitRange}, inds...) =
+showindices(io, ::Slice, inds...) =
     (print(io, ", :"); showindices(io, inds...))
 showindices(io, ind1, inds...) =
     (print(io, ", ", ind1); showindices(io, inds...))
@@ -2612,14 +2956,23 @@ function showarg(io::IO, r::ReshapedArray, toplevel)
     print(io, ", ", join(r.dims, ", "))
     print(io, ')')
     toplevel && print(io, " with eltype ", eltype(r))
+    return nothing
 end
 
-function showarg(io::IO, r::ReinterpretArray{T}, toplevel) where {T}
+function showarg(io::IO, r::NonReshapedReinterpretArray{T}, toplevel) where {T}
     print(io, "reinterpret(", T, ", ")
     showarg(io, parent(r), false)
     print(io, ')')
 end
 
+function showarg(io::IO, r::ReshapedReinterpretArray{T}, toplevel) where {T}
+    print(io, "reinterpret(reshape, ", T, ", ")
+    showarg(io, parent(r), false)
+    print(io, ')')
+    toplevel && print(io, " with eltype ", eltype(r))
+    return nothing
+end
+
 # printing iterators from Base.Iterators
 
 function show(io::IO, e::Iterators.Enumerate)
@@ -2672,3 +3025,15 @@ end
 bitshow(B::BitArray) = bitshow(stdout, B)
 
 bitstring(B::BitArray) = sprint(bitshow, B)
+
+# printing OpaqueClosure
+function show(io::IO, oc::Core.OpaqueClosure)
+    A, R = typeof(oc).parameters
+    show_tuple_as_call(io, Symbol(""), A; hasfirst=false)
+    print(io, "::", R)
+    print(io, "->◌")
+end
+
+function show(io::IO, ::MIME"text/plain", oc::Core.OpaqueClosure{A, R}) where {A, R}
+    show(io, oc)
+end
diff --git a/base/simdloop.jl b/base/simdloop.jl
index e0b6d89d972775..29e2382cf39aa8 100644
--- a/base/simdloop.jl
+++ b/base/simdloop.jl
@@ -8,7 +8,7 @@ export @simd, simd_outer_range, simd_inner_length, simd_index
 
 # Error thrown from ill-formed uses of @simd
 struct SimdError <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 # Parse iteration space expression
diff --git a/base/some.jl b/base/some.jl
index 82638e4250d2e7..8be58739a4df41 100644
--- a/base/some.jl
+++ b/base/some.jl
@@ -16,7 +16,7 @@ Some(::Type{T}) where {T} = Some{Type{T}}(T)
 
 promote_rule(::Type{Some{T}}, ::Type{Some{S}}) where {T, S<:T} = Some{T}
 
-nonnothingtype(::Type{T}) where {T} = Core.Compiler.typesubtract(T, Nothing)
+nonnothingtype(::Type{T}) where {T} = typesplit(T, Nothing)
 promote_rule(T::Type{Nothing}, S::Type) = Union{S, Nothing}
 function promote_rule(T::Type{>:Nothing}, S::Type)
     R = nonnothingtype(T)
@@ -34,6 +34,8 @@ end
 
 convert(::Type{T}, x::T) where {T>:Nothing} = x
 convert(::Type{T}, x) where {T>:Nothing} = convert(nonnothingtype_checked(T), x)
+convert(::Type{Nothing}, x) = throw(MethodError(convert, (Nothing, x)))
+convert(::Type{Nothing}, ::Nothing) = nothing
 convert(::Type{Some{T}}, x::Some{T}) where {T} = x
 convert(::Type{Some{T}}, x::Some) where {T} = Some{T}(convert(T, x.value))
 
@@ -62,19 +64,20 @@ Return `true` if `x === nothing`, and return `false` if not.
 
 !!! compat "Julia 1.1"
     This function requires at least Julia 1.1.
+
+See also [`something`](@ref), [`notnothing`](@ref), [`ismissing`](@ref).
 """
-isnothing(::Any) = false
-isnothing(::Nothing) = true
+isnothing(x) = x === nothing
 
 
 """
-    something(x, y...)
+    something(x...)
 
 Return the first value in the arguments which is not equal to [`nothing`](@ref),
 if any. Otherwise throw an error.
 Arguments of type [`Some`](@ref) are unwrapped.
 
-See also [`coalesce`](@ref).
+See also [`coalesce`](@ref), [`skipmissing`](@ref), [`@something`](@ref).
 
 # Examples
 ```jldoctest
@@ -97,3 +100,46 @@ something() = throw(ArgumentError("No value arguments present"))
 something(x::Nothing, y...) = something(y...)
 something(x::Some, y...) = x.value
 something(x::Any, y...) = x
+
+
+"""
+    @something(x...)
+
+Short-circuiting version of [`something`](@ref).
+
+# Examples
+```jldoctest
+julia> f(x) = (println("f(\$x)"); nothing);
+
+julia> a = 1;
+
+julia> a = @something a f(2) f(3) error("Unable to find default for `a`")
+1
+
+julia> b = nothing;
+
+julia> b = @something b f(2) f(3) error("Unable to find default for `b`")
+f(2)
+f(3)
+ERROR: Unable to find default for `b`
+[...]
+
+julia> b = @something b f(2) f(3) Some(nothing)
+f(2)
+f(3)
+
+julia> b === nothing
+true
+```
+
+!!! compat "Julia 1.7"
+    This macro is available as of Julia 1.7.
+"""
+macro something(args...)
+    expr = :(nothing)
+    for arg in reverse(args)
+        expr = :(val = $(esc(arg)); val !== nothing ? val : ($expr))
+    end
+    something = GlobalRef(Base, :something)
+    return :($something($expr))
+end
diff --git a/base/sort.jl b/base/sort.jl
index ddd09e89cc2ec9..23579abd77547e 100644
--- a/base/sort.jl
+++ b/base/sort.jl
@@ -5,12 +5,13 @@ module Sort
 import ..@__MODULE__, ..parentmodule
 const Base = parentmodule(@__MODULE__)
 using .Base.Order
-using .Base: copymutable, LinearIndices, length, (:),
+using .Base: copymutable, LinearIndices, length, (:), iterate,
     eachindex, axes, first, last, similar, zip, OrdinalRange,
     AbstractVector, @inbounds, AbstractRange, @eval, @inline, Vector, @noinline,
     AbstractMatrix, AbstractUnitRange, isless, identity, eltype, >, <, <=, >=, |, +, -, *, !,
     extrema, sub_with_overflow, add_with_overflow, oneunit, div, getindex, setindex!,
-    length, resize!, fill, Missing, require_one_based_indexing, keytype
+    length, resize!, fill, Missing, require_one_based_indexing, keytype, UnitRange,
+    min, max, reinterpret, signed, unsigned, Signed, Unsigned, typemin, xor, Type, BitSigned
 
 using .Base: >>>, !==
 
@@ -27,6 +28,7 @@ export # also exported by Base
     searchsorted,
     searchsortedfirst,
     searchsortedlast,
+    insorted,
     # order & algorithm:
     sort,
     sort!,
@@ -67,7 +69,7 @@ function issorted(itr, order::Ordering)
 end
 
 """
-    issorted(v, lt=isless, by=identity, rev:Bool=false, order::Ordering=Forward)
+    issorted(v, lt=isless, by=identity, rev::Bool=false, order::Ordering=Forward)
 
 Test whether a vector is in sorted order. The `lt`, `by` and `rev` keywords modify what
 order is considered to be sorted just as they do for [`sort`](@ref).
@@ -230,92 +232,62 @@ end
 
 function searchsortedlast(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    if step(a) == 0
-        lt(o, x, first(a)) ? 0 : length(a)
+    f, h, l = first(a), step(a), last(a)
+    if lt(o, x, f)
+        0
+    elseif h == 0 || !lt(o, x, l)
+        length(a)
     else
-        n = round(Integer, clamp((x - first(a)) / step(a) + 1, 1, length(a)))
+        n = round(Integer, (x - f) / h + 1)
         lt(o, x, a[n]) ? n - 1 : n
     end
 end
 
 function searchsortedfirst(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    if step(a) == 0
-        lt(o, first(a), x) ? length(a) + 1 : 1
+    f, h, l = first(a), step(a), last(a)
+    if !lt(o, f, x)
+        1
+    elseif h == 0 || lt(o, l, x)
+        length(a) + 1
     else
-        n = round(Integer, clamp((x - first(a)) / step(a) + 1, 1, length(a)))
+        n = round(Integer, (x - f) / h + 1)
         lt(o, a[n], x) ? n + 1 : n
     end
 end
 
 function searchsortedlast(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    h = step(a)
-    if h == 0
-        lt(o, x, first(a)) ? 0 : length(a)
-    elseif h > 0 && x < first(a)
-        firstindex(a) - 1
-    elseif h > 0 && x >= last(a)
-        lastindex(a)
-    elseif h < 0 && x > first(a)
-        firstindex(a) - 1
-    elseif h < 0 && x <= last(a)
-        lastindex(a)
+    f, h, l = first(a), step(a), last(a)
+    if lt(o, x, f)
+        0
+    elseif h == 0 || !lt(o, x, l)
+        length(a)
     else
         if o isa ForwardOrdering
-            fld(floor(Integer, x) - first(a), h) + 1
+            fld(floor(Integer, x) - f, h) + 1
         else
-            fld(ceil(Integer, x) - first(a), h) + 1
+            fld(ceil(Integer, x) - f, h) + 1
         end
     end
 end
 
 function searchsortedfirst(a::AbstractRange{<:Integer}, x::Real, o::DirectOrdering)::keytype(a)
     require_one_based_indexing(a)
-    h = step(a)
-    if h == 0
-        lt(o, first(a), x) ? length(a)+1 : 1
-    elseif h > 0 && x <= first(a)
-        firstindex(a)
-    elseif h > 0 && x > last(a)
-        lastindex(a) + 1
-    elseif h < 0 && x >= first(a)
-        firstindex(a)
-    elseif h < 0 && x < last(a)
-        lastindex(a) + 1
+    f, h, l = first(a), step(a), last(a)
+    if !lt(o, f, x)
+        1
+    elseif h == 0 || lt(o, l, x)
+        length(a) + 1
     else
         if o isa ForwardOrdering
-            -fld(floor(Integer, -x) + Signed(first(a)), h) + 1
+            cld(ceil(Integer, x) - f, h) + 1
         else
-            -fld(ceil(Integer, -x) + Signed(first(a)), h) + 1
+            cld(floor(Integer, x) - f, h) + 1
         end
     end
 end
 
-function searchsortedfirst(a::AbstractRange{<:Integer}, x::Unsigned, o::DirectOrdering)::keytype(a)
-    require_one_based_indexing(a)
-    if lt(o, first(a), x)
-        if step(a) == 0
-            length(a) + 1
-        else
-            min(cld(x - first(a), step(a)), length(a)) + 1
-        end
-    else
-        1
-    end
-end
-
-function searchsortedlast(a::AbstractRange{<:Integer}, x::Unsigned, o::DirectOrdering)::keytype(a)
-    require_one_based_indexing(a)
-    if lt(o, x, first(a))
-        0
-    elseif step(a) == 0
-        length(a)
-    else
-        min(fld(x - first(a), step(a)) + 1, length(a))
-    end
-end
-
 searchsorted(a::AbstractRange{<:Real}, x::Real, o::DirectOrdering) =
     searchsortedfirst(a, x, o) : searchsortedlast(a, x, o)
 
@@ -336,6 +308,8 @@ according to the order specified by the `by`, `lt` and `rev` keywords, assuming
 is already sorted in that order. Return an empty range located at the insertion point
 if `a` does not contain values equal to `x`.
 
+See also: [`insorted`](@ref), [`searchsortedfirst`](@ref), [`sort`](@ref), [`findall`](@ref).
+
 # Examples
 ```jldoctest
 julia> searchsorted([1, 2, 4, 5, 5, 7], 4) # single match
@@ -359,9 +333,11 @@ julia> searchsorted([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
     searchsortedfirst(a, x; by=<transform>, lt=<comparison>, rev=false)
 
 Return the index of the first value in `a` greater than or equal to `x`, according to the
-specified order. Return `length(a) + 1` if `x` is greater than all values in `a`.
+specified order. Return `lastindex(a) + 1` if `x` is greater than all values in `a`.
 `a` is assumed to be sorted.
 
+See also: [`searchsortedlast`](@ref), [`searchsorted`](@ref), [`findfirst`](@ref).
+
 # Examples
 ```jldoctest
 julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 4) # single match
@@ -385,8 +361,8 @@ julia> searchsortedfirst([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
     searchsortedlast(a, x; by=<transform>, lt=<comparison>, rev=false)
 
 Return the index of the last value in `a` less than or equal to `x`, according to the
-specified order. Return `0` if `x` is less than all values in `a`. `a` is assumed to
-be sorted.
+specified order. Return `firstindex(a) - 1` if `x` is less than all values in `a`. `a` is
+assumed to be sorted.
 
 # Examples
 ```jldoctest
@@ -407,6 +383,40 @@ julia> searchsortedlast([1, 2, 4, 5, 5, 7], 0) # no match, insert at start
 ```
 """ searchsortedlast
 
+"""
+    insorted(a, x; by=<transform>, lt=<comparison>, rev=false) -> Bool
+
+Determine whether an item is in the given sorted collection, in the sense that
+it is [`==`](@ref) to one of the values of the collection according to the order
+specified by the `by`, `lt` and `rev` keywords, assuming that `a` is already
+sorted in that order, see [`sort`](@ref) for the keywords.
+
+See also [`in`](@ref).
+
+# Examples
+```jldoctest
+julia> insorted(4, [1, 2, 4, 5, 5, 7]) # single match
+true
+
+julia> insorted(5, [1, 2, 4, 5, 5, 7]) # multiple matches
+true
+
+julia> insorted(3, [1, 2, 4, 5, 5, 7]) # no match
+false
+
+julia> insorted(9, [1, 2, 4, 5, 5, 7]) # no match
+false
+
+julia> insorted(0, [1, 2, 4, 5, 5, 7]) # no match
+false
+```
+
+!!! compat "Julia 1.6"
+     `insorted` was added in Julia 1.6.
+"""
+function insorted end
+insorted(x, v::AbstractVector; kw...) = !isempty(searchsorted(v, x; kw...))
+insorted(x, r::AbstractRange) = in(x, r)
 
 ## sorting algorithms ##
 
@@ -416,6 +426,22 @@ struct InsertionSortAlg <: Algorithm end
 struct QuickSortAlg     <: Algorithm end
 struct MergeSortAlg     <: Algorithm end
 
+"""
+    AdaptiveSort(fallback)
+
+Indicate that a sorting function should use the fastest available algorithm.
+
+Adaptive sort will use the algorithm specified by `fallback` for types and orders that are
+not [`UIntMappable`](@ref). Otherwise, it will typically use:
+  * Insertion sort for short vectors
+  * Radix sort for long vectors
+  * Counting sort for vectors of integers spanning a short range
+
+Adaptive sort is guaranteed to be stable if the fallback algorithm is stable.
+"""
+struct AdaptiveSort{Fallback <: Algorithm} <: Algorithm
+    fallback::Fallback
+end
 """
     PartialQuickSort{T <: Union{Integer,OrdinalRange}}
 
@@ -441,7 +467,7 @@ end
 Indicate that a sorting function should use the insertion sort
 algorithm. Insertion sort traverses the collection one element
 at a time, inserting each element into its correct, sorted position in
-the output list.
+the output vector.
 
 Characteristics:
   * *stable*: preserves the ordering of elements which
@@ -485,8 +511,8 @@ Characteristics:
 """
 const MergeSort     = MergeSortAlg()
 
-const DEFAULT_UNSTABLE = QuickSort
-const DEFAULT_STABLE   = MergeSort
+const DEFAULT_UNSTABLE = AdaptiveSort(QuickSort)
+const DEFAULT_STABLE   = AdaptiveSort(MergeSort)
 const SMALL_ALGORITHM  = InsertionSort
 const SMALL_THRESHOLD  = 20
 
@@ -494,13 +520,9 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, ::InsertionSortAlg,
     @inbounds for i = lo+1:hi
         j = i
         x = v[i]
-        while j > lo
-            if lt(o, x, v[j-1])
-                v[j] = v[j-1]
-                j -= 1
-                continue
-            end
-            break
+        while j > lo && lt(o, x, v[j-1])
+            v[j] = v[j-1]
+            j -= 1
         end
         v[j] = x
     end
@@ -616,40 +638,20 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::MergeSortAlg, o::
     return v
 end
 
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort{<:Integer},
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort,
                o::Ordering)
     @inbounds while lo < hi
         hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
         j = partition!(v, lo, hi, o)
-        if j >= a.k
-            # we don't need to sort anything bigger than j
-            hi = j-1
-        elseif j-lo < hi-j
-            # recurse on the smaller chunk
-            # this is necessary to preserve O(log(n))
-            # stack space in the worst case (rather than O(n))
-            lo < (j-1) && sort!(v, lo, j-1, a, o)
-            lo = j+1
-        else
-            (j+1) < hi && sort!(v, j+1, hi, a, o)
-            hi = j-1
-        end
-    end
-    return v
-end
-
-
-function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort{T},
-               o::Ordering) where T<:OrdinalRange
-    @inbounds while lo < hi
-        hi-lo <= SMALL_THRESHOLD && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
-        j = partition!(v, lo, hi, o)
 
         if j <= first(a.k)
             lo = j+1
         elseif j >= last(a.k)
             hi = j-1
         else
+            # recurse on the smaller chunk
+            # this is necessary to preserve O(log(n))
+            # stack space in the worst case (rather than O(n))
             if j-lo < hi-j
                 lo < (j-1) && sort!(v, lo, j-1, a, o)
                 lo = j+1
@@ -662,11 +664,202 @@ function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::PartialQuickSort{
     return v
 end
 
+# This is a stable least significant bit first radix sort.
+#
+# That is, it first sorts the entire vector by the last chunk_size bits, then by the second
+# to last chunk_size bits, and so on. Stability means that it will not reorder two elements
+# that compare equal. This is essential so that the order introduced by earlier,
+# less significant passes is preserved by later passes.
+#
+# Each pass divides the input into 2^chunk_size == mask+1 buckets. To do this, it
+#  * counts the number of entries that fall into each bucket
+#  * uses those counts to compute the indices to move elements of those buckets into
+#  * moves elements into the computed indices in the swap array
+#  * switches the swap and working array
+#
+# In the case of an odd number of passes, the returned vector will === the input vector t,
+# not v. This is one of the many reasons radix_sort! is not exported.
+function radix_sort!(v::AbstractVector{U}, lo::Integer, hi::Integer, bits::Unsigned,
+                     t::AbstractVector{U}, chunk_size=radix_chunk_size_heuristic(lo, hi, bits)) where U <: Unsigned
+    # bits is unsigned for performance reasons.
+    mask = UInt(1) << chunk_size - 0x1
+    counts = Vector{UInt}(undef, mask+2)
+
+    @inbounds for shift in 0:chunk_size:bits-1
+
+        # counts[2:mask+2] will store the number of elements that fall into each bucket.
+        # if chunk_size = 8, counts[2] is bucket 0x00 and counts[257] is bucket 0xff.
+        counts .= 0
+        for k in lo:hi
+            x = v[k]                  # lookup the element
+            i = (x >> shift)&mask + 2 # compute its bucket's index for this pass
+            counts[i] += 1            # increment that bucket's count
+        end
+
+        counts[1] = lo                # set target index for the first bucket
+        cumsum!(counts, counts)       # set target indices for subsequent buckets
+        # counts[1:mask+1] now stores indices where the first member of each bucket
+        # belongs, not the number of elements in each bucket. We will put the first element
+        # of bucket 0x00 in t[counts[1]], the next element of bucket 0x00 in t[counts[1]+1],
+        # and the last element of bucket 0x00 in t[counts[2]-1].
+
+        for k in lo:hi
+            x = v[k]                  # lookup the element
+            i = (x >> shift)&mask + 1 # compute its bucket's index for this pass
+            j = counts[i]             # lookup the target index
+            t[j] = x                  # put the element where it belongs
+            counts[i] = j + 1         # increment the target index for the next
+        end                           #  ↳ element in this bucket
+
+        v, t = t, v # swap the now sorted destination vector t back into primary vector v
+
+    end
+
+    v
+end
+function radix_chunk_size_heuristic(lo::Integer, hi::Integer, bits::Unsigned)
+    # chunk_size is the number of bits to radix over at once.
+    # We need to allocate an array of size 2^chunk size, and on the other hand the higher
+    # the chunk size the fewer passes we need. Theoretically, chunk size should be based on
+    # the Lambert W function applied to length. Empirically, we use this heuristic:
+    guess = min(10, log(maybe_unsigned(hi-lo))*3/4+3)
+    # TODO the maximum chunk size should be based on archetecture cache size.
+
+    # We need iterations * chunk size ≥ bits, and these cld's
+    # make an effort to get iterations * chunk size ≈ bits
+    UInt8(cld(bits, cld(bits, guess)))
+end
+
+# For AbstractVector{Bool}, counting sort is always best.
+# This is an implementation of counting sort specialized for Bools.
+function sort!(v::AbstractVector{<:Bool}, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering)
+    first = lt(o, false, true) ? false : lt(o, true, false) ? true : return v
+    count = 0
+    @inbounds for i in lo:hi
+        if v[i] == first
+            count += 1
+        end
+    end
+    @inbounds v[lo:lo+count-1] .= first
+    @inbounds v[lo+count:hi] .= !first
+    v
+end
+
+maybe_unsigned(x::Integer) = x # this is necessary to avoid calling unsigned on BigInt
+maybe_unsigned(x::BitSigned) = unsigned(x)
+function _extrema(v::AbstractArray, lo::Integer, hi::Integer, o::Ordering)
+    mn = mx = v[lo]
+    @inbounds for i in (lo+1):hi
+        vi = v[i]
+        lt(o, vi, mn) && (mn = vi)
+        lt(o, mx, vi) && (mx = vi)
+    end
+    mn, mx
+end
+function sort!(v::AbstractVector, lo::Integer, hi::Integer, a::AdaptiveSort, o::Ordering)
+    # if the sorting task is not UIntMappable, then we can't radix sort or sort_int_range!
+    # so we skip straight to the fallback algorithm which is comparison based.
+    U = UIntMappable(eltype(v), o)
+    U === nothing && return sort!(v, lo, hi, a.fallback, o)
+
+    # to avoid introducing excessive detection costs for the trivial sorting problem
+    # and to avoid overflow, we check for small inputs before any other runtime checks
+    hi <= lo && return v
+    lenm1 = maybe_unsigned(hi-lo) # adding 1 would risk overflow
+    # only count sort on a short range can compete with insertion sort when lenm1 < 40
+    # and the optimization is not worth the detection cost, so we use insertion sort.
+    lenm1 < 40 && return sort!(v, lo, hi, SMALL_ALGORITHM, o)
+
+    # For most arrays, a presorted check is cheap (overhead < 5%) and for most large
+    # arrays it is essentially free (<1%). Insertion sort runs in a fast O(n) on presorted
+    # input and this guarantees presorted input will always be efficiently handled
+    issorted(view(v, lo:hi), o) && return v
+
+    # For large arrays, a reverse-sorted check is essentially free (overhead < 1%)
+    if lenm1 >= 500 && issorted(view(v, lo:hi), ReverseOrdering(o))
+        reverse!(view(v, lo:hi))
+        return v
+    end
+
+    # UInt128 does not support fast bit shifting so we never
+    # dispatch to radix sort but we may still perform count sort
+    if sizeof(U) > 8
+        if eltype(v) <: Integer && o isa DirectOrdering
+            v_min, v_max = _extrema(v, lo, hi, Forward)
+            v_range = maybe_unsigned(v_max-v_min)
+            v_range == 0 && return v # all same
+
+            # we know lenm1 ≥ 40, so this will never underflow.
+            # if lenm1 > 3.7e18 (59 exabytes), then this may incorrectly dispatch to fallback
+            if v_range < 5lenm1-100 # count sort will outperform comparison sort if v's range is small
+                return sort_int_range!(v, Int(v_range+1), v_min, o === Forward ? identity : reverse, lo, hi)
+            end
+        end
+        return sort!(v, lo, hi, a.fallback, o)
+    end
+
+    v_min, v_max = _extrema(v, lo, hi, o)
+    lt(o, v_min, v_max) || return v # all same
+    if eltype(v) <: Integer && o isa DirectOrdering
+        R = o === Reverse
+        v_range = maybe_unsigned(R ? v_min-v_max : v_max-v_min)
+        if v_range < div(lenm1, 2) # count sort will be superior if v's range is very small
+            return sort_int_range!(v, Int(v_range+1), R ? v_max : v_min, R ? reverse : identity, lo, hi)
+        end
+    end
+
+    u_min, u_max = uint_map(v_min, o), uint_map(v_max, o)
+    u_range = maybe_unsigned(u_max-u_min)
+    if u_range < div(lenm1, 2) # count sort will be superior if u's range is very small
+        u = uint_map!(v, lo, hi, o)
+        sort_int_range!(u, Int(u_range+1), u_min, identity, lo, hi)
+        return uint_unmap!(v, u, lo, hi, o)
+    end
+
+    # if u's range is small, then once we subtract out v_min, we'll get a vector like
+    # UInt16[0x001a, 0x0015, 0x0006, 0x001b, 0x0008, 0x000c, 0x0001, 0x000e, 0x001c, 0x0009]
+    # where we only need to radix over the last few bits (5, in the example).
+    bits = unsigned(8sizeof(u_range) - leading_zeros(u_range))
+
+    # radix sort runs in O(bits * lenm1), insertion sort runs in O(lenm1^2). Radix sort
+    # has a constant factor that is three times higher, so radix runtime is 3bits * lenm1
+    # and insertion runtime is lenm1^2. Empirically, insertion is faster than radix iff
+    # lenm1 < 3bits.
+    # Insertion < Radix
+    #   lenm1^2 < 3 * bits * lenm1
+    #     lenm1 < 3bits
+    if lenm1 < 3bits
+        # at lenm1 = 64*3-1, QuickSort is about 20% faster than InsertionSort.
+        alg = a.fallback === QuickSort && lenm1 > 120 ? QuickSort : SMALL_ALGORITHM
+        return sort!(v, lo, hi, alg, o)
+    end
+
+    # At this point, we are committed to radix sort.
+    u = uint_map!(v, lo, hi, o)
+
+    # we subtract u_min to avoid radixing over unnecessary bits. For example,
+    # Int32[3, -1, 2] uint_maps to UInt32[0x80000003, 0x7fffffff, 0x80000002]
+    # which uses all 32 bits, but once we subtract u_min = 0x7fffffff, we are left with
+    # UInt32[0x00000004, 0x00000000, 0x00000003] which uses only 3 bits, and
+    # Float32[2.012, 400.0, 12.345] uint_maps to UInt32[0x3fff3b63, 0x3c37ffff, 0x414570a4]
+    # which is reduced to UInt32[0x03c73b64, 0x00000000, 0x050d70a5] using only 26 bits.
+    # the overhead for this subtraction is small enough that it is worthwhile in many cases.
+
+    # this is faster than u[lo:hi] .-= u_min as of v1.9.0-DEV.100
+    @inbounds for i in lo:hi
+        u[i] -= u_min
+    end
+
+    u2 = radix_sort!(u, lo, hi, bits, similar(u))
+    uint_unmap!(v, u2, lo, hi, o, u_min)
+end
 
 ## generic sorting methods ##
 
 defalg(v::AbstractArray) = DEFAULT_STABLE
 defalg(v::AbstractArray{<:Union{Number, Missing}}) = DEFAULT_UNSTABLE
+defalg(v::AbstractArray{Missing}) = DEFAULT_UNSTABLE # for method disambiguation
+defalg(v::AbstractArray{Union{}}) = DEFAULT_UNSTABLE # for method disambiguation
 
 function sort!(v::AbstractVector, alg::Algorithm, order::Ordering)
     inds = axes(v,1)
@@ -680,7 +873,8 @@ Sort the vector `v` in place. [`QuickSort`](@ref) is used by default for numeric
 [`MergeSort`](@ref) is used for other arrays. You can specify an algorithm to use via the `alg`
 keyword (see [Sorting Algorithms](@ref) for available algorithms). The `by` keyword lets you provide
 a function that will be applied to each element before comparison; the `lt` keyword allows
-providing a custom "less than" function; use `rev=true` to reverse the sorting order. These
+providing a custom "less than" function (note that for every `x` and `y`, only one of `lt(x,y)`
+and `lt(y,x)` can return `true`); use `rev=true` to reverse the sorting order. These
 options are independent and can be used together in all possible combinations: if both `by`
 and `lt` are specified, the `lt` function is applied to the result of the `by` function;
 `rev=true` reverses whatever ordering specified via the `by` and `lt` keywords.
@@ -718,33 +912,22 @@ function sort!(v::AbstractVector;
                by=identity,
                rev::Union{Bool,Nothing}=nothing,
                order::Ordering=Forward)
-    ordr = ord(lt,by,rev,order)
-    if (ordr === Forward || ordr === Reverse) && eltype(v)<:Integer
-        n = length(v)
-        if n > 1
-            min, max = extrema(v)
-            (diff, o1) = sub_with_overflow(max, min)
-            (rangelen, o2) = add_with_overflow(diff, oneunit(diff))
-            if !o1 && !o2 && rangelen < div(n,2)
-                return sort_int_range!(v, rangelen, min, ordr === Reverse ? reverse : identity)
-            end
-        end
-    end
-    sort!(v, alg, ordr)
+    sort!(v, alg, ord(lt,by,rev,order))
 end
 
 # sort! for vectors of few unique integers
-function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse)
+function sort_int_range!(x::AbstractVector{<:Integer}, rangelen, minval, maybereverse,
+                         lo=firstindex(x), hi=lastindex(x))
     offs = 1 - minval
 
-    where = fill(0, rangelen)
-    @inbounds for i = eachindex(x)
-        where[x[i] + offs] += 1
+    counts = fill(0, rangelen)
+    @inbounds for i = lo:hi
+        counts[x[i] + offs] += 1
     end
 
-    idx = firstindex(x)
+    idx = lo
     @inbounds for i = maybereverse(1:rangelen)
-        lastidx = idx + where[i] - 1
+        lastidx = idx + counts[i] - 1
         val = i-offs
         for j = idx:lastidx
             x[j] = val
@@ -893,7 +1076,7 @@ using the same keywords as [`sort!`](@ref). The permutation is guaranteed to be
 if the sorting algorithm is unstable, meaning that indices of equal elements appear in
 ascending order.
 
-See also [`sortperm!`](@ref).
+See also [`sortperm!`](@ref), [`partialsortperm`](@ref), [`invperm`](@ref), [`indexin`](@ref).
 
 # Examples
 ```jldoctest
@@ -985,22 +1168,22 @@ function sortperm_int_range(x::Vector{<:Integer}, rangelen, minval)
     offs = 1 - minval
     n = length(x)
 
-    where = fill(0, rangelen+1)
-    where[1] = 1
+    counts = fill(0, rangelen+1)
+    counts[1] = 1
     @inbounds for i = 1:n
-        where[x[i] + offs + 1] += 1
+        counts[x[i] + offs + 1] += 1
     end
 
-    #cumsum!(where, where)
-    @inbounds for i = 2:length(where)
-        where[i] += where[i-1]
+    #cumsum!(counts, counts)
+    @inbounds for i = 2:length(counts)
+        counts[i] += counts[i-1]
     end
 
     P = Vector{Int}(undef, n)
     @inbounds for i = 1:n
         label = x[i] + offs
-        P[where[label]] = i
-        where[label] += 1
+        P[counts[label]] = i
+        counts[label] += 1
     end
 
     return P
@@ -1108,7 +1291,7 @@ function sort!(A::AbstractArray;
 
     1 <= k <= nd || throw(ArgumentError("dimension out of range"))
 
-    remdims = ntuple(i -> i == k ? 1 : size(A, i), nd)
+    remdims = ntuple(i -> i == k ? 1 : axes(A, i), nd)
     for idx in CartesianIndices(remdims)
         Av = view(A, ntuple(i -> i == k ? Colon() : idx[i], nd)...)
         sort!(Av, alg, ordr)
@@ -1116,18 +1299,113 @@ function sort!(A::AbstractArray;
     A
 end
 
+
+## uint mapping to allow radix sorting primitives other than UInts ##
+
+"""
+    UIntMappable(T::Type, order::Ordering)
+
+Return `typeof(uint_map(x::T, order))` if [`uint_map`](@ref) and
+[`uint_unmap`](@ref) are implemented.
+
+If either is not implemented, return `nothing`.
+"""
+UIntMappable(T::Type, order::Ordering) = nothing
+
+"""
+    uint_map(x, order::Ordering)::Unsigned
+
+Map `x` to an un unsigned integer, maintaining sort order.
+
+The map should be reversible with [`uint_unmap`](@ref), so `isless(order, a, b)` must be
+a linear ordering for `a, b <: typeof(x)`. Satisfies
+`isless(order, a, b) === (uint_map(a, order) < uint_map(b, order))`
+and `x === uint_unmap(typeof(x), uint_map(x, order), order)`
+
+See also: [`UIntMappable`](@ref) [`uint_unmap`](@ref)
+"""
+function uint_map end
+
+"""
+    uint_unmap(T::Type, u::Unsigned, order::Ordering)
+
+Reconstruct the unique value `x::T` that uint_maps to `u`. Satisfies
+`x === uint_unmap(T, uint_map(x::T, order), order)` for all `x <: T`.
+
+See also: [`uint_map`](@ref) [`UIntMappable`](@ref)
+"""
+function uint_unmap end
+
+
+### Primitive Types
+
+# Integers
+uint_map(x::Unsigned, ::ForwardOrdering) = x
+uint_unmap(::Type{T}, u::T, ::ForwardOrdering) where T <: Unsigned = u
+
+uint_map(x::Signed, ::ForwardOrdering) =
+    unsigned(xor(x, typemin(x)))
+uint_unmap(::Type{T}, u::Unsigned, ::ForwardOrdering) where T <: Signed =
+    xor(signed(u), typemin(T))
+
+# unsigned(Int) is not available during bootstrapping.
+for (U, S) in [(UInt8, Int8), (UInt16, Int16), (UInt32, Int32), (UInt64, Int64), (UInt128, Int128)]
+    @eval UIntMappable(::Type{<:Union{$U, $S}}, ::ForwardOrdering) = $U
+end
+
+# Floats are not UIntMappable under regular orderings because they fail on NaN edge cases.
+# uint mappings for floats are defined in Float, where the Left and Right orderings
+# guarantee that there are no NaN values
+
+# Chars
+uint_map(x::Char, ::ForwardOrdering) = reinterpret(UInt32, x)
+uint_unmap(::Type{Char}, u::UInt32, ::ForwardOrdering) = reinterpret(Char, u)
+UIntMappable(::Type{Char}, ::ForwardOrdering) = UInt32
+
+### Reverse orderings
+uint_map(x, rev::ReverseOrdering) = ~uint_map(x, rev.fwd)
+uint_unmap(T::Type, u::Unsigned, rev::ReverseOrdering) = uint_unmap(T, ~u, rev.fwd)
+UIntMappable(T::Type, order::ReverseOrdering) = UIntMappable(T, order.fwd)
+
+
+### Vectors
+
+# Convert v to unsigned integers in place, maintaining sort order.
+function uint_map!(v::AbstractVector, lo::Integer, hi::Integer, order::Ordering)
+    u = reinterpret(UIntMappable(eltype(v), order), v)
+    @inbounds for i in lo:hi
+        u[i] = uint_map(v[i], order)
+    end
+    u
+end
+
+function uint_unmap!(v::AbstractVector, u::AbstractVector{U}, lo::Integer, hi::Integer,
+                     order::Ordering, offset::U=zero(U)) where U <: Unsigned
+    @inbounds for i in lo:hi
+        v[i] = uint_unmap(eltype(v), u[i]+offset, order)
+    end
+    v
+end
+
+
 ## fast clever sorting for floats ##
 
 module Float
 using ..Sort
 using ...Order
-using ..Base: @inbounds, AbstractVector, Vector, last, axes
+using ..Base: @inbounds, AbstractVector, Vector, last, axes, Missing, Type, reinterpret
 
 import Core.Intrinsics: slt_int
-import ..Sort: sort!
+import ..Sort: sort!, UIntMappable, uint_map, uint_unmap
 import ...Order: lt, DirectOrdering
 
 const Floats = Union{Float32,Float64}
+const FPSortable = Union{ # Mixed Float32 and Float64 are not allowed.
+    AbstractVector{Union{Float32, Missing}},
+    AbstractVector{Union{Float64, Missing}},
+    AbstractVector{Float32},
+    AbstractVector{Float64},
+    AbstractVector{Missing}}
 
 struct Left <: Ordering end
 struct Right <: Ordering end
@@ -1141,17 +1419,40 @@ right(o::Perm) = Perm(right(o.order), o.data)
 lt(::Left, x::T, y::T) where {T<:Floats} = slt_int(y, x)
 lt(::Right, x::T, y::T) where {T<:Floats} = slt_int(x, y)
 
+uint_map(x::Float32, ::Left) = ~reinterpret(UInt32, x)
+uint_unmap(::Type{Float32}, u::UInt32, ::Left) = reinterpret(Float32, ~u)
+uint_map(x::Float32, ::Right) = reinterpret(UInt32, x)
+uint_unmap(::Type{Float32}, u::UInt32, ::Right) = reinterpret(Float32, u)
+UIntMappable(::Type{Float32}, ::Union{Left, Right}) = UInt32
+
+uint_map(x::Float64, ::Left) = ~reinterpret(UInt64, x)
+uint_unmap(::Type{Float64}, u::UInt64, ::Left) = reinterpret(Float64, ~u)
+uint_map(x::Float64, ::Right) = reinterpret(UInt64, x)
+uint_unmap(::Type{Float64}, u::UInt64, ::Right) = reinterpret(Float64, u)
+UIntMappable(::Type{Float64}, ::Union{Left, Right}) = UInt64
+
 isnan(o::DirectOrdering, x::Floats) = (x!=x)
+isnan(o::DirectOrdering, x::Missing) = false
 isnan(o::Perm, i::Integer) = isnan(o.order,o.data[i])
 
-function nans2left!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
+ismissing(o::DirectOrdering, x::Floats) = false
+ismissing(o::DirectOrdering, x::Missing) = true
+ismissing(o::Perm, i::Integer) = ismissing(o.order,o.data[i])
+
+allowsmissing(::AbstractVector{T}, ::DirectOrdering) where {T} = T >: Missing
+allowsmissing(::AbstractVector{<:Integer},
+              ::Perm{<:DirectOrdering,<:AbstractVector{T}}) where {T} =
+    T >: Missing
+
+function specials2left!(testf::Function, v::AbstractVector, o::Ordering,
+                        lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
     i = lo
-    @inbounds while i <= hi && isnan(o,v[i])
+    @inbounds while i <= hi && testf(o,v[i])
         i += 1
     end
     j = i + 1
     @inbounds while j <= hi
-        if isnan(o,v[j])
+        if testf(o,v[j])
             v[i], v[j] = v[j], v[i]
             i += 1
         end
@@ -1159,14 +1460,15 @@ function nans2left!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1))
     end
     return i, hi
 end
-function nans2right!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
+function specials2right!(testf::Function, v::AbstractVector, o::Ordering,
+                         lo::Integer=first(axes(v,1)), hi::Integer=last(axes(v,1)))
     i = hi
-    @inbounds while lo <= i && isnan(o,v[i])
+    @inbounds while lo <= i && testf(o,v[i])
         i -= 1
     end
     j = i - 1
     @inbounds while lo <= j
-        if isnan(o,v[j])
+        if testf(o,v[j])
             v[i], v[j] = v[j], v[i]
             i -= 1
         end
@@ -1175,17 +1477,46 @@ function nans2right!(v::AbstractVector, o::Ordering, lo::Integer=first(axes(v,1)
     return lo, i
 end
 
-nans2end!(v::AbstractVector, o::ForwardOrdering) = nans2right!(v,o)
-nans2end!(v::AbstractVector, o::ReverseOrdering) = nans2left!(v,o)
-nans2end!(v::AbstractVector{<:Integer}, o::Perm{<:ForwardOrdering}) = nans2right!(v,o)
-nans2end!(v::AbstractVector{<:Integer}, o::Perm{<:ReverseOrdering}) = nans2left!(v,o)
+function specials2left!(v::AbstractVector, a::Algorithm, o::Ordering)
+    lo, hi = first(axes(v,1)), last(axes(v,1))
+    if allowsmissing(v, o)
+        i, _ = specials2left!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
+        sort!(v, lo, i-1, a, o)
+        return i, hi
+    else
+        return specials2left!(isnan, v, o, lo, hi)
+    end
+end
+function specials2right!(v::AbstractVector, a::Algorithm, o::Ordering)
+    lo, hi = first(axes(v,1)), last(axes(v,1))
+    if allowsmissing(v, o)
+        _, i = specials2right!((v, o) -> ismissing(v, o) || isnan(v, o), v, o, lo, hi)
+        sort!(v, i+1, hi, a, o)
+        return lo, i
+    else
+        return specials2right!(isnan, v, o, lo, hi)
+    end
+end
+
+specials2end!(v::AbstractVector, a::Algorithm, o::ForwardOrdering) =
+    specials2right!(v, a, o)
+specials2end!(v::AbstractVector, a::Algorithm, o::ReverseOrdering) =
+    specials2left!(v, a, o)
+specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ForwardOrdering}) =
+    specials2right!(v, a, o)
+specials2end!(v::AbstractVector{<:Integer}, a::Algorithm, o::Perm{<:ReverseOrdering}) =
+    specials2left!(v, a, o)
 
 issignleft(o::ForwardOrdering, x::Floats) = lt(o, x, zero(x))
 issignleft(o::ReverseOrdering, x::Floats) = lt(o, x, -zero(x))
 issignleft(o::Perm, i::Integer) = issignleft(o.order, o.data[i])
 
 function fpsort!(v::AbstractVector, a::Algorithm, o::Ordering)
-    i, j = lo, hi = nans2end!(v,o)
+    # fpsort!'s optimizations speed up comparisons, of which there are O(nlogn).
+    # The overhead is O(n). For n < 10, it's not worth it.
+    length(v) < 10 && return sort!(v, first(axes(v,1)), last(axes(v,1)), SMALL_ALGORITHM, o)
+
+    i, j = lo, hi = specials2end!(v,a,o)
     @inbounds while true
         while i <= j &&  issignleft(o,v[i]); i += 1; end
         while i <= j && !issignleft(o,v[j]); j -= 1; end
@@ -1202,8 +1533,10 @@ end
 fpsort!(v::AbstractVector, a::Sort.PartialQuickSort, o::Ordering) =
     sort!(v, first(axes(v,1)), last(axes(v,1)), a, o)
 
-sort!(v::AbstractVector{<:Floats}, a::Algorithm, o::DirectOrdering) = fpsort!(v,a,o)
-sort!(v::Vector{Int}, a::Algorithm, o::Perm{<:DirectOrdering,<:Vector{<:Floats}}) = fpsort!(v,a,o)
+sort!(v::FPSortable, a::Algorithm, o::DirectOrdering) =
+    fpsort!(v, a, o)
+sort!(v::AbstractVector{<:Union{Signed, Unsigned}}, a::Algorithm, o::Perm{<:DirectOrdering,<:FPSortable}) =
+    fpsort!(v, a, o)
 
 end # module Sort.Float
 
diff --git a/base/special/cbrt.jl b/base/special/cbrt.jl
index 23b518a87a9a7d..9fda5c41fb09e0 100644
--- a/base/special/cbrt.jl
+++ b/base/special/cbrt.jl
@@ -31,7 +31,6 @@ julia> cbrt(big(-27))
 -3.0
 ```
 """
-cbrt(x::Real) = cbrt(float(x))
 cbrt(x::AbstractFloat) = x < 0 ? -(-x)^(1//3) : x^(1//3)
 
 """
@@ -147,3 +146,20 @@ function cbrt(x::Union{Float32,Float64})
     t = _approx_cbrt(x)
     return _improve_cbrt(x, t)
 end
+
+function cbrt(a::Float16)
+    if !isfinite(a) || iszero(a)
+        return a
+    end
+    x = Float32(a)
+
+    # 5 bit approximation. Simpler than _approx_cbrt since subnormals can not appear
+    u = highword(x) & 0x7fff_ffff
+    v = div(u, UInt32(3)) + 0x2a5119f2
+    t = copysign(fromhighword(Float32, v), x)
+
+    # 2 newton iterations
+    t = 0.33333334f0 * (2f0*t + x/(t*t))
+    t = 0.33333334f0 * (2f0*t + x/(t*t))
+    return Float16(t)
+end
diff --git a/base/special/exp.jl b/base/special/exp.jl
index 493e5167f1e664..837310bc7ed197 100644
--- a/base/special/exp.jl
+++ b/base/special/exp.jl
@@ -1,138 +1,499 @@
-# Based on FDLIBM http://www.netlib.org/fdlibm/e_exp.c
-# which is made available under the following licence
-
-## Copyright (C) 2004 by Sun Microsystems, Inc. All rights reserved. Permission
-## to use, copy, modify, and distribute this software is freely granted,
-## provided that this notice is preserved.
-
-# Method
-# 1. Argument reduction: Reduce x to an r so that |r| <= 0.5*ln(2). Given x,
-#    find r and integer k such that
-#       x = k*ln(2) + r,  |r| <= 0.5*ln(2).
-#    Here r is represented as r = hi - lo for better accuracy.
-#
-# 2. Approximate exp(r) by a special rational function on [0, 0.5*ln(2)]:
-#       R(r^2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r^4/360 + ...
-#
-#    A special Remez algorithm on [0, 0.5*ln(2)] is used to generate a
-#    polynomial to approximate R.
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# magic rounding constant: 1.5*2^52 Adding, then subtracting it from a float rounds it to an Int.
+# This works because eps(MAGIC_ROUND_CONST(T)) == one(T), so adding it to a smaller number aligns the lsb to the 1s place.
+# Values for which this trick doesn't work are going to have outputs of 0 or Inf.
+MAGIC_ROUND_CONST(::Type{Float64}) = 6.755399441055744e15
+MAGIC_ROUND_CONST(::Type{Float32}) = 1.048576f7
+
+# max, min, and subnormal arguments
+# max_exp = T(exponent_bias(T)*log(base, big(2)) + log(base, 2 - big(2.0)^-significand_bits(T)))
+MAX_EXP(n::Val{2}, ::Type{Float32}) = 128.0f0
+MAX_EXP(n::Val{2}, ::Type{Float64}) = 1024.0
+MAX_EXP(n::Val{:ℯ}, ::Type{Float32}) = 88.72284f0
+MAX_EXP(n::Val{:ℯ}, ::Type{Float64}) = 709.7827128933841
+MAX_EXP(n::Val{10}, ::Type{Float32}) = 38.53184f0
+MAX_EXP(n::Val{10}, ::Type{Float64}) = 308.25471555991675
+
+# min_exp = T(-(exponent_bias(T)+significand_bits(T)) * log(base, big(2)))
+MIN_EXP(n::Val{2}, ::Type{Float32}) = -150.0f0
+MIN_EXP(n::Val{2}, ::Type{Float64}) = -1075.0
+MIN_EXP(n::Val{:ℯ}, ::Type{Float32}) = -103.97208f0
+MIN_EXP(n::Val{:ℯ}, ::Type{Float64}) = -745.1332191019412
+MIN_EXP(n::Val{10}, ::Type{Float32}) = -45.1545f0
+MIN_EXP(n::Val{10}, ::Type{Float64}) = -323.60724533877976
+
+# subnorm_exp = abs(log(base, floatmin(T)))
+# these vals are positive since it's easier to take abs(x) than -abs(x)
+SUBNORM_EXP(n::Val{2}, ::Type{Float32}) = 126.00001f0
+SUBNORM_EXP(n::Val{2}, ::Type{Float64}) = 1022.0
+SUBNORM_EXP(n::Val{:ℯ}, ::Type{Float32}) = 87.33655f0
+SUBNORM_EXP(n::Val{:ℯ}, ::Type{Float64}) = 708.3964185322641
+SUBNORM_EXP(n::Val{10}, ::Type{Float32}) = 37.92978f0
+SUBNORM_EXP(n::Val{10}, ::Type{Float64}) = 307.6526555685887
+
+# 256/log(base, 2) (For Float64 reductions)
+LogBo256INV(::Val{2}, ::Type{Float64}) = 256.0
+LogBo256INV(::Val{:ℯ}, ::Type{Float64}) = 369.3299304675746
+LogBo256INV(::Val{10}, ::Type{Float64}) = 850.4135922911647
+
+# -log(base, 2)/256 in upper and lower bits
+# Upper is truncated to only have 34 bits of significand since N has at most
+# ceil(log2(-MIN_EXP(base, Float64)*LogBo256INV(Val(2), Float64))) = 19 bits.
+# This ensures no rounding when multiplying LogBo256U*N for FMAless hardware
+LogBo256U(::Val{2}, ::Type{Float64}) = -0.00390625
+LogBo256U(::Val{:ℯ}, ::Type{Float64}) = -0.002707606173999011
+LogBo256U(::Val{10}, ::Type{Float64}) = -0.0011758984204561784
+LogBo256L(::Val{2}, ::Type{Float64}) = 0.0
+LogBo256L(::Val{:ℯ}, ::Type{Float64}) = -6.327543041662719e-14
+LogBo256L(::Val{10}, ::Type{Float64}) = -1.0624811566412999e-13
+
+# 1/log(base, 2) (For Float32 reductions)
+LogBINV(::Val{2}, ::Type{Float32}) = 1.0f0
+LogBINV(::Val{:ℯ}, ::Type{Float32}) = 1.442695f0
+LogBINV(::Val{10}, ::Type{Float32}) = 3.321928f0
+
+# -log(base, 2) in upper and lower bits
+# Upper is truncated to only have 16 bits of significand since N has at most
+# ceil(log2(-MIN_EXP(n, Float32)*LogBINV(Val(2), Float32))) = 8 bits.
+# This ensures no rounding when multiplying LogBU*N for FMAless hardware
+LogBU(::Val{2}, ::Type{Float32}) = -1.0f0
+LogBU(::Val{:ℯ}, ::Type{Float32}) = -0.69314575f0
+LogBU(::Val{10}, ::Type{Float32}) = -0.3010254f0
+LogBL(::Val{2}, ::Type{Float32}) = 0.0f0
+LogBL(::Val{:ℯ}, ::Type{Float32}) = -1.4286068f-6
+LogBL(::Val{10}, ::Type{Float32}) = -4.605039f-6
+
+# -log(base, 2) as a Float32 for Float16 version.
+LogB(::Val{2}, ::Type{Float16}) = -1.0f0
+LogB(::Val{:ℯ}, ::Type{Float16}) = -0.6931472f0
+LogB(::Val{10}, ::Type{Float16}) = -0.30103f0
+
+# Range reduced kernels
+@inline function expm1b_kernel(::Val{2}, x::Float64)
+    return x * evalpoly(x, (0.6931471805599393, 0.24022650695910058,
+                            0.05550411502333161, 0.009618129548366803))
+end
+@inline function expm1b_kernel(::Val{:ℯ}, x::Float64)
+    return x * evalpoly(x, (0.9999999999999912, 0.4999999999999997,
+                            0.1666666857598779, 0.04166666857598777))
+end
+
+@inline function expm1b_kernel(::Val{10}, x::Float64)
+    return x * evalpoly(x, (2.3025850929940255, 2.6509490552391974,
+                            2.034678825384765, 1.1712552025835192))
+end
+
+@inline function expb_kernel(::Val{2}, x::Float32)
+    return evalpoly(x, (1.0f0, 0.6931472f0, 0.2402265f0,
+                        0.05550411f0, 0.009618025f0,
+                        0.0013333423f0, 0.00015469732f0, 1.5316464f-5))
+end
+@inline function expb_kernel(::Val{:ℯ}, x::Float32)
+    return evalpoly(x, (1.0f0, 1.0f0, 0.5f0, 0.16666667f0,
+                        0.041666217f0, 0.008333249f0,
+                        0.001394858f0, 0.00019924171f0))
+end
+@inline function expb_kernel(::Val{10}, x::Float32)
+    return evalpoly(x, (1.0f0, 2.3025851f0, 2.650949f0,
+                        2.0346787f0, 1.1712426f0, 0.53937745f0,
+                        0.20788547f0, 0.06837386f0))
+end
+
+# Table stores data with 60 sig figs by using the fact that the first 12 bits of all the
+# values would be the same if stored as regular Float64.
+# This only gains 8 bits since the least significant 4 bits of the exponent
+# of the small part are not the same for all table entries
+const JU_MASK = typemax(UInt64)>>12
+const JL_MASK = typemax(UInt64)>>8
+const JU_CONST = 0x3FF0000000000000
+const JL_CONST = 0x3C00000000000000
+
+
+#function make_table(size)
+#    t_array = zeros(UInt64, size);
+#    for j in 1:size
+#        val = 2.0^(BigFloat(j-1)/size)
+#        valU = Float64(val, RoundDown)
+#        valL = Float64(val-valU)
+#        valU = reinterpret(UInt64, valU) & JU_MASK
+#        valL = ((reinterpret(UInt64, valL) & JL_MASK)>>44)<<52
+#        t_array[j] = valU | valL
+#    end
+#    return Tuple(t_array)
+#end
+#const J_TABLE = make_table(256);
+const J_TABLE = (0x0000000000000000, 0xaac00b1afa5abcbe, 0x9b60163da9fb3335, 0xab502168143b0280, 0xadc02c9a3e778060,
+                 0x656037d42e11bbcc, 0xa7a04315e86e7f84, 0x84c04e5f72f654b1, 0x8d7059b0d3158574, 0xa510650a0e3c1f88,
+                 0xa8d0706b29ddf6dd, 0x83207bd42b72a836, 0x6180874518759bc8, 0xa4b092bdf66607df, 0x91409e3ecac6f383,
+                 0x85d0a9c79b1f3919, 0x98a0b5586cf9890f, 0x94f0c0f145e46c85, 0x9010cc922b7247f7, 0xa210d83b23395deb,
+                 0x4030e3ec32d3d1a2, 0xa5b0efa55fdfa9c4, 0xae40fb66affed31a, 0x8d41073028d7233e, 0xa4911301d0125b50,
+                 0xa1a11edbab5e2ab5, 0xaf712abdc06c31cb, 0xae8136a814f204aa, 0xa661429aaea92ddf, 0xa9114e95934f312d,
+                 0x82415a98c8a58e51, 0x58f166a45471c3c2, 0xab9172b83c7d517a, 0x70917ed48695bbc0, 0xa7718af9388c8de9,
+                 0x94a1972658375d2f, 0x8e51a35beb6fcb75, 0x97b1af99f8138a1c, 0xa351bbe084045cd3, 0x9001c82f95281c6b,
+                 0x9e01d4873168b9aa, 0xa481e0e75eb44026, 0xa711ed5022fcd91c, 0xa201f9c18438ce4c, 0x8dc2063b88628cd6,
+                 0x935212be3578a819, 0x82a21f49917ddc96, 0x8d322bdda27912d1, 0x99b2387a6e756238, 0x8ac2451ffb82140a,
+                 0x8ac251ce4fb2a63f, 0x93e25e85711ece75, 0x82b26b4565e27cdd, 0x9e02780e341ddf29, 0xa2d284dfe1f56380,
+                 0xab4291ba7591bb6f, 0x86129e9df51fdee1, 0xa352ab8a66d10f12, 0xafb2b87fd0dad98f, 0xa572c57e39771b2e,
+                 0x9002d285a6e4030b, 0x9d12df961f641589, 0x71c2ecafa93e2f56, 0xaea2f9d24abd886a, 0x86f306fe0a31b715,
+                 0x89531432edeeb2fd, 0x8a932170fc4cd831, 0xa1d32eb83ba8ea31, 0x93233c08b26416ff, 0xab23496266e3fa2c,
+                 0xa92356c55f929ff0, 0xa8f36431a2de883a, 0xa4e371a7373aa9ca, 0xa3037f26231e7549, 0xa0b38cae6d05d865,
+                 0xa3239a401b7140ee, 0xad43a7db34e59ff6, 0x9543b57fbfec6cf4, 0xa083c32dc313a8e4, 0x7fe3d0e544ede173,
+                 0x8ad3dea64c123422, 0xa943ec70df1c5174, 0xa413fa4504ac801b, 0x8bd40822c367a024, 0xaf04160a21f72e29,
+                 0xa3d423fb27094689, 0xab8431f5d950a896, 0x88843ffa3f84b9d4, 0x48944e086061892d, 0xae745c2042a7d231,
+                 0x9c946a41ed1d0057, 0xa1e4786d668b3236, 0x73c486a2b5c13cd0, 0xab1494e1e192aed1, 0x99c4a32af0d7d3de,
+                 0xabb4b17dea6db7d6, 0x7d44bfdad5362a27, 0x9054ce41b817c114, 0x98e4dcb299fddd0d, 0xa564eb2d81d8abfe,
+                 0xa5a4f9b2769d2ca6, 0x7a2508417f4531ee, 0xa82516daa2cf6641, 0xac65257de83f4eee, 0xabe5342b569d4f81,
+                 0x879542e2f4f6ad27, 0xa8a551a4ca5d920e, 0xa7856070dde910d1, 0x99b56f4736b527da, 0xa7a57e27dbe2c4ce,
+                 0x82958d12d497c7fd, 0xa4059c0827ff07cb, 0x9635ab07dd485429, 0xa245ba11fba87a02, 0x3c45c9268a5946b7,
+                 0xa195d84590998b92, 0x9ba5e76f15ad2148, 0xa985f6a320dceb70, 0xa60605e1b976dc08, 0x9e46152ae6cdf6f4,
+                 0xa636247eb03a5584, 0x984633dd1d1929fd, 0xa8e6434634ccc31f, 0xa28652b9febc8fb6, 0xa226623882552224,
+                 0xa85671c1c70833f5, 0x60368155d44ca973, 0x880690f4b19e9538, 0xa216a09e667f3bcc, 0x7a36b052fa75173e,
+                 0xada6c012750bdabe, 0x9c76cfdcddd47645, 0xae46dfb23c651a2e, 0xa7a6ef9298593ae4, 0xa9f6ff7df9519483,
+                 0x59d70f7466f42e87, 0xaba71f75e8ec5f73, 0xa6f72f8286ead089, 0xa7a73f9a48a58173, 0x90474fbd35d7cbfd,
+                 0xa7e75feb564267c8, 0x9b777024b1ab6e09, 0x986780694fde5d3f, 0x934790b938ac1cf6, 0xaaf7a11473eb0186,
+                 0xa207b17b0976cfda, 0x9f17c1ed0130c132, 0x91b7d26a62ff86f0, 0x7057e2f336cf4e62, 0xabe7f3878491c490,
+                 0xa6c80427543e1a11, 0x946814d2add106d9, 0xa1582589994cce12, 0x9998364c1eb941f7, 0xa9c8471a4623c7ac,
+                 0xaf2857f4179f5b20, 0xa01868d99b4492ec, 0x85d879cad931a436, 0x99988ac7d98a6699, 0x9d589bd0a478580f,
+                 0x96e8ace5422aa0db, 0x9ec8be05bad61778, 0xade8cf3216b5448b, 0xa478e06a5e0866d8, 0x85c8f1ae99157736,
+                 0x959902fed0282c8a, 0xa119145b0b91ffc5, 0xab2925c353aa2fe1, 0xae893737b0cdc5e4, 0xa88948b82b5f98e4,
+                 0xad395a44cbc8520e, 0xaf296bdd9a7670b2, 0xa1797d829fde4e4f, 0x7ca98f33e47a22a2, 0xa749a0f170ca07b9,
+                 0xa119b2bb4d53fe0c, 0x7c79c49182a3f090, 0xa579d674194bb8d4, 0x7829e86319e32323, 0xaad9fa5e8d07f29d,
+                 0xa65a0c667b5de564, 0x9c6a1e7aed8eb8bb, 0x963a309bec4a2d33, 0xa2aa42c980460ad7, 0xa16a5503b23e255c,
+                 0x650a674a8af46052, 0x9bca799e1330b358, 0xa58a8bfe53c12e58, 0x90fa9e6b5579fdbf, 0x889ab0e521356eba,
+                 0xa81ac36bbfd3f379, 0x97ead5ff3a3c2774, 0x97aae89f995ad3ad, 0xa5aafb4ce622f2fe, 0xa21b0e07298db665,
+                 0x94db20ce6c9a8952, 0xaedb33a2b84f15fa, 0xac1b468415b749b0, 0xa1cb59728de55939, 0x92ab6c6e29f1c52a,
+                 0xad5b7f76f2fb5e46, 0xa24b928cf22749e3, 0xa08ba5b030a10649, 0xafcbb8e0b79a6f1e, 0x823bcc1e904bc1d2,
+                 0xafcbdf69c3f3a206, 0xa08bf2c25bd71e08, 0xa89c06286141b33c, 0x811c199bdd85529c, 0xa48c2d1cd9fa652b,
+                 0x9b4c40ab5fffd07a, 0x912c544778fafb22, 0x928c67f12e57d14b, 0xa86c7ba88988c932, 0x71ac8f6d9406e7b5,
+                 0xaa0ca3405751c4da, 0x750cb720dcef9069, 0xac5ccb0f2e6d1674, 0xa88cdf0b555dc3f9, 0xa2fcf3155b5bab73,
+                 0xa1ad072d4a07897b, 0x955d1b532b08c968, 0xa15d2f87080d89f1, 0x93dd43c8eacaa1d6, 0x82ed5818dcfba487,
+                 0x5fed6c76e862e6d3, 0xa77d80e316c98397, 0x9a0d955d71ff6075, 0x9c2da9e603db3285, 0xa24dbe7cd63a8314,
+                 0x92ddd321f301b460, 0xa1ade7d5641c0657, 0xa72dfc97337b9b5e, 0xadae11676b197d16, 0xa42e264614f5a128,
+                 0xa30e3b333b16ee11, 0x839e502ee78b3ff6, 0xaa7e653924676d75, 0x92de7a51fbc74c83, 0xa77e8f7977cdb73f,
+                 0xa0bea4afa2a490d9, 0x948eb9f4867cca6e, 0xa1becf482d8e67f0, 0x91cee4aaa2188510, 0x9dcefa1bee615a27,
+                 0xa66f0f9c1cb64129, 0x93af252b376bba97, 0xacdf3ac948dd7273, 0x99df50765b6e4540, 0x9faf6632798844f8,
+                 0xa12f7bfdad9cbe13, 0xaeef91d802243c88, 0x874fa7c1819e90d8, 0xacdfbdba3692d513, 0x62efd3c22b8f71f1, 0x74afe9d96b2a23d9)
+
+@inline function table_unpack(ind)
+    j = @inbounds J_TABLE[ind]
+    jU = reinterpret(Float64, JU_CONST | (j&JU_MASK))
+    jL = reinterpret(Float64, JL_CONST | (j>>8))
+    return jU, jL
+end
+
+# Method for Float64
+# 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/512. Given x, base b,
+#    find r and integers k, j such that
+#       x = (k + j/256)*log(b, 2) + r,  0 <= j < 256, |r| <= log(b,2)/512.
 #
-#    The computation of exp(r) thus becomes
-#                       2*r
-#       exp(r) = 1 + ----------
-#                     R(r) - r
-#                          r*c(r)
-#              = 1 + r + ----------- (for better accuracy)
-#                         2 - c(r)
-#    where
-#       c(r) = r - (P1*r^2  + P2*r^4  + ... + P5*r^10 + ...).
+# 2. Approximate b^r-1 by 3rd-degree minimax polynomial p_b(r) on the interval [-log(b,2)/512, log(b,2)/512].
+#    Since the bounds on r are very tight, this is sufficient to be accurate to floating point epsilon.
 #
-# 3. Scale back: exp(x) = 2^k * exp(r)
+# 3. Scale back: b^x = 2^k * 2^(j/256) * (1 + p_b(r))
+#    Since the range of possible j is small, 2^(j/256) is stored for all possible values in slightly extended precision.
 
-# log(2)
-const LN2 = 6.931471805599453094172321214581765680755001343602552541206800094933936219696955e-01
-# log2(e)
-const LOG2_E = 1.442695040888963407359924681001892137426646
-
-# log(2) into upper and lower bits
-LN2U(::Type{Float64}) = 6.93147180369123816490e-1
-LN2U(::Type{Float32}) = 6.9313812256f-1
+# Method for Float32
+# 1. Argument reduction: Reduce x to an r so that |r| <= log(b, 2)/2. Given x, base b,
+#    find r and integer N such that
+#       x = N*log(b, 2) + r,  |r| <= log(b,2)/2.
+#
+# 2. Approximate b^r by 7th-degree minimax polynomial p_b(r) on the interval [-log(b,2)/2, log(b,2)/2].
+# 3. Scale back: b^x = 2^N * p_b(r)
+# For both, a little extra care needs to be taken if b^r is subnormal.
+# The solution is to do the scaling back in 2 steps as just messing with the exponent wouldn't work.
 
-LN2L(::Type{Float64}) = 1.90821492927058770002e-10
-LN2L(::Type{Float32}) = 9.0580006145f-6
+@inline function exp_impl(x::Float64, base)
+    T = Float64
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU, jL = table_unpack(N&255 + 1)
+    small_part =  muladd(jU, expm1b_kernel(base, r), jL) + jU
 
-# max and min arguments
-MAX_EXP(::Type{Float64}) = 7.09782712893383996732e2 # log 2^1023*(2-2^-52)
-MAX_EXP(::Type{Float32}) = 88.72283905206835f0      # log 2^127 *(2-2^-23)
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x >= MAX_EXP(base, T) && return Inf
+        x <= MIN_EXP(base, T) && return 0.0
+        if k <= -53
+            # The UInt64 forces promotion. (Only matters for 32 bit systems.)
+            twopk = (k + UInt64(53)) << 52
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
+        end
+        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+    end
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
+# Computes base^(x+xlo). Used for pow.
+@inline function exp_impl(x::Float64, xlo::Float64, base)
+    T = Float64
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU, jL = table_unpack(N&255 + 1)
+    very_small = muladd(jU, expm1b_kernel(base, r), jL)
+    small_part =  muladd(jU,xlo,very_small) + jU
+    if !(abs(x) <= SUBNORM_EXP(base, T))
+        x >= MAX_EXP(base, T) && return Inf
+        x <= MIN_EXP(base, T) && return 0.0
+        if k <= -53
+            # The UInt64 forces promotion. (Only matters for 32 bit systems.)
+            twopk = (k + UInt64(53)) << 52
+            return reinterpret(T, twopk + reinterpret(UInt64, small_part))*(2.0^-53)
+        end
+        #k == 1024 && return (small_part * 2.0) * 2.0^1023
+    end
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
+@inline function exp_impl_fast(x::Float64, base)
+    T = Float64
+    x >= MAX_EXP(base, T) && return Inf
+    x <= -SUBNORM_EXP(base, T) && return 0.0
+    N_float = muladd(x, LogBo256INV(base, T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(base, T))
+    r = muladd(N_float, LogBo256U(base, T), x)
+    r = muladd(N_float, LogBo256L(base, T), r)
+    k = N >> 8
+    jU = reinterpret(Float64, JU_CONST | (@inbounds J_TABLE[N&255 + 1] & JU_MASK))
+    small_part =  muladd(jU, expm1b_kernel(base, r), jU)
+    twopk = Int64(k) << 52
+    return reinterpret(T, twopk + reinterpret(Int64, small_part))
+end
 
-# one less than the min exponent since we can sqeeze a bit more from the exp function
-MIN_EXP(::Type{Float64}) = -7.451332191019412076235e2 # log 2^-1075
-MIN_EXP(::Type{Float32}) = -103.97207708f0            # log 2^-150
+@inline function exp_impl(x::Float32, base)
+    T = Float32
+    N_float = round(x*LogBINV(base, T))
+    N = unsafe_trunc(Int32, N_float)
+    r = muladd(N_float, LogBU(base, T), x)
+    r = muladd(N_float, LogBL(base, T), r)
+    small_part = expb_kernel(base, r)
+    power = (N+Int32(127))
+    x > MAX_EXP(base, T) && return Inf32
+    x < MIN_EXP(base, T) && return 0.0f0
+    if x <= -SUBNORM_EXP(base, T)
+        power += Int32(24)
+        small_part *= Float32(0x1p-24)
+    end
+    if N == 128
+        power -= Int32(1)
+        small_part *= 2f0
+    end
+    return small_part * reinterpret(T, power << Int32(23))
+end
 
-@inline exp_kernel(x::Float64) = @horner(x, 1.66666666666666019037e-1,
-    -2.77777777770155933842e-3, 6.61375632143793436117e-5,
-    -1.65339022054652515390e-6, 4.13813679705723846039e-8)
+@inline function exp_impl_fast(x::Float32, base)
+    T = Float32
+    x >= MAX_EXP(base, T) && return Inf32
+    x <= -SUBNORM_EXP(base, T) && return 0f0
+    N_float = round(x*LogBINV(base, T))
+    N = unsafe_trunc(Int32, N_float)
+    r = muladd(N_float, LogBU(base, T), x)
+    r = muladd(N_float, LogBL(base, T), r)
+    small_part = expb_kernel(base, r)
+    twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
+    return twopk*small_part
+end
 
-@inline exp_kernel(x::Float32) = @horner(x, 1.6666625440f-1, -2.7667332906f-3)
+@inline function exp_impl(a::Float16, base)
+    T = Float32
+    x = T(a)
+    N_float = round(x*LogBINV(base, T))
+    N = unsafe_trunc(Int32, N_float)
+    r = muladd(N_float, LogB(base, Float16), x)
+    small_part = expb_kernel(base, r)
+    if !(abs(x) <= 25)
+        x > 16 && return Inf16
+        x < 25 && return zero(Float16)
+    end
+    twopk = reinterpret(T, (N+Int32(127)) << Int32(23))
+    return Float16(twopk*small_part)
+end
 
-# for values smaller than this threshold just use a Taylor expansion
-@eval exp_small_thres(::Type{Float64}) = $(2.0^-28)
-@eval exp_small_thres(::Type{Float32}) = $(2.0f0^-13)
+for (func, fast_func, base) in ((:exp2,  :exp2_fast,  Val(2)),
+                                (:exp,   :exp_fast,   Val(:ℯ)),
+                                (:exp10, :exp10_fast, Val(10)))
+    @eval begin
+        $func(x::Union{Float16,Float32,Float64}) = exp_impl(x, $base)
+        $fast_func(x::Union{Float32,Float64}) = exp_impl_fast(x, $base)
+    end
+end
 
-"""
+@doc """
     exp(x)
 
-Compute the natural base exponential of `x`, in other words ``e^x``.
+Compute the natural base exponential of `x`, in other words ``ℯ^x``.
+
+See also [`exp2`](@ref), [`exp10`](@ref) and [`cis`](@ref).
 
 # Examples
 ```jldoctest
 julia> exp(1.0)
 2.718281828459045
+
+julia> exp(im * pi) ≈ cis(pi)
+true
 ```
+""" exp(x::Real)
+
 """
-exp(x::Real) = exp(float(x))
-function exp(x::T) where T<:Union{Float32,Float64}
-    xa = reinterpret(Unsigned, x) & ~sign_mask(T)
-    xsb = signbit(x)
-
-    # filter out non-finite arguments
-    if xa > reinterpret(Unsigned, MAX_EXP(T))
-        if xa >= exponent_mask(T)
-            xa & significand_mask(T) != 0 && return T(NaN)
-            return xsb ? T(0.0) : T(Inf) # exp(+-Inf)
-        end
-        x > MAX_EXP(T) && return T(Inf)
-        x < MIN_EXP(T) && return T(0.0)
+    exp2(x)
+
+Compute the base 2 exponential of `x`, in other words ``2^x``.
+
+See also [`ldexp`](@ref), [`<<`](@ref).
+
+# Examples
+```jldoctest
+julia> exp2(5)
+32.0
+
+julia> 2^5
+32
+
+julia> exp2(63) > typemax(Int)
+true
+```
+"""
+exp2(x)
+
+"""
+    exp10(x)
+
+Compute the base 10 exponential of `x`, in other words ``10^x``.
+
+# Examples
+```jldoctest
+julia> exp10(2)
+100.0
+
+julia> 10^2
+100
+```
+"""
+exp10(x)
+
+# functions with special cases for integer arguments
+@inline function exp2(x::Base.BitInteger)
+    if x > 1023
+        Inf64
+    elseif x <= -1023
+        # if -1073 < x <= -1023 then Result will be a subnormal number
+        # Hex literal with padding must be used to work on 32bit machine
+        reinterpret(Float64, 0x0000_0000_0000_0001 << ((x + 1074) % UInt))
+    else
+        # We will cast everything to Int64 to avoid errors in case of Int128
+        # If x is a Int128, and is outside the range of Int64, then it is not -1023<x<=1023
+        reinterpret(Float64, (exponent_bias(Float64) + (x % Int64)) << (significand_bits(Float64) % UInt))
+    end
+end
+
+# min and max arguments for expm1 by type
+MAX_EXP(::Type{Float64}) =  709.7827128933845   # log 2^1023*(2-2^-52)
+MIN_EXP(::Type{Float64}) = -37.42994775023705   # log 2^-54
+MAX_EXP(::Type{Float32}) =  88.72284f0          # log 2^127 *(2-2^-23)
+MIN_EXP(::Type{Float32}) = -17.32868f0          # log 2^-25
+MAX_EXP(::Type{Float16}) =  Float16(11.09)      # log 2^15 *(2-2^-10)
+MIN_EXP(::Type{Float16}) = -Float16(8.32)       # log 2^-12
+
+Ln2INV(::Type{Float64}) = 1.4426950408889634
+Ln2(::Type{Float64}) = -0.6931471805599453
+Ln2INV(::Type{Float32}) = 1.442695f0
+Ln2(::Type{Float32}) = -0.6931472f0
+
+# log(.75) <= x <= log(1.25)
+@inline function expm1_small(x::Float64)
+    p = evalpoly(x, (0.16666666666666632, 0.04166666666666556, 0.008333333333401227,
+                     0.001388888889068783, 0.00019841269447671544, 2.480157691845342e-5,
+                     2.7558212415361945e-6, 2.758218402815439e-7, 2.4360682937111612e-8))
+    p2 = exthorner(x, (1.0, .5, p))
+    return fma(x, p2[1], x*p2[2])
+end
+@inline function expm1_small(x::Float32)
+    p = evalpoly(x, (0.16666666f0, 0.041666627f0, 0.008333682f0,
+                     0.0013908712f0, 0.0001933096f0))
+    p2 = exthorner(x, (1f0, .5f0, p))
+    return fma(x, p2[1], x*p2[2])
+end
+
+function expm1(x::Float64)
+    T = Float64
+    if -0.2876820724517809 <= x <= 0.22314355131420976
+        return expm1_small(x)
+    elseif !(abs(x)<=MIN_EXP(Float64))
+        isnan(x) && return x
+        x > MAX_EXP(Float64) && return Inf
+        x < MIN_EXP(Float64) && return -1.0
     end
-    # This implementation gives 2.7182818284590455 for exp(1.0) when T ==
-    # Float64, which is well within the allowable error; however,
-    # 2.718281828459045 is closer to the true value so we prefer that answer,
-    # given that 1.0 is such an important argument value.
-    if x == T(1.0) && T == Float64
-        return 2.718281828459045235360
+
+    N_float = muladd(x, LogBo256INV(Val(:ℯ), T), MAGIC_ROUND_CONST(T))
+    N = reinterpret(UInt64, N_float) % Int32
+    N_float -=  MAGIC_ROUND_CONST(T) #N_float now equals round(x*LogBo256INV(Val(:ℯ), T))
+    r = muladd(N_float, LogBo256U(Val(:ℯ), T), x)
+    r = muladd(N_float, LogBo256L(Val(:ℯ), T), r)
+    k = Int64(N >> 8)
+    jU, jL = table_unpack(N&255 +1)
+    p = expm1b_kernel(Val(:ℯ), r)
+    twopk  = reinterpret(Float64, (1023+k) << 52)
+    twopnk = reinterpret(Float64, (1023-k) << 52)
+    k>=106 && return reinterpret(Float64, (1022+k) << 52)*(jU + muladd(jU, p, jL))*2
+    k>=53 && return twopk*(jU + muladd(jU, p, (jL-twopnk)))
+    k<=-2 && return twopk*(jU + muladd(jU, p, jL))-1
+    return twopk*((jU-twopnk) + fma(jU, p, jL))
+end
+
+function expm1(x::Float32)
+    x > MAX_EXP(Float32) && return Inf32
+    x < MIN_EXP(Float32) && return -1f0
+    if -0.2876821f0 <=x <= 0.22314355f0
+        return expm1_small(x)
     end
-    # compute approximation
-    if xa > reinterpret(Unsigned, T(0.5)*T(LN2)) # |x| > 0.5 log(2)
-        # argument reduction
-        if xa < reinterpret(Unsigned, T(1.5)*T(LN2)) # |x| < 1.5 log(2)
-            if xsb
-                k = -1
-                hi = x + LN2U(T)
-                lo = -LN2L(T)
-            else
-                k = 1
-                hi = x - LN2U(T)
-                lo = LN2L(T)
-            end
-        else
-            n = round(T(LOG2_E)*x)
-            k = unsafe_trunc(Int,n)
-            hi = muladd(n, -LN2U(T), x)
-            lo = n*LN2L(T)
-        end
-        # compute approximation on reduced argument
-        r = hi - lo
-        z = r*r
-        p = r - z*exp_kernel(z)
-        y = T(1.0) - ((lo - (r*p)/(T(2.0) - p)) - hi)
-        # scale back
-        if k > -significand_bits(T)
-            # multiply by 2.0 first to prevent overflow, which helps extends the range
-            k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
-            twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
-            return y*twopk
-        else
-            # add significand_bits(T) + 1 to lift the range outside the subnormals
-            twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
-            return y * twopk * T(2.0)^(-significand_bits(T) - 1)
-        end
-    elseif xa < reinterpret(Unsigned, exp_small_thres(T)) # |x| < exp_small_thres
-        # Taylor approximation for small values: exp(x) ≈ 1.0 + x
-        return T(1.0) + x
-    else
-        # primary range with k = 0, so compute approximation directly
-        z = x*x
-        p = x - z*exp_kernel(z)
-        return T(1.0) - ((x*p)/(p - T(2.0)) - x)
+    x = Float64(x)
+    N_float = round(x*Ln2INV(Float64))
+    N = unsafe_trunc(UInt64, N_float)
+    r = muladd(N_float, Ln2(Float64), x)
+    hi = evalpoly(r, (1.0, .5, 0.16666667546642386, 0.041666183019487026,
+                      0.008332997481506921, 0.0013966479175977883, 0.0002004037059220124))
+    small_part = r*hi
+    twopk = reinterpret(Float64, (N+1023) << 52)
+    return Float32(muladd(twopk, small_part, twopk-1.0))
+end
+
+function expm1(x::Float16)
+    x > MAX_EXP(Float16) && return Inf16
+    x < MIN_EXP(Float16) && return Float16(-1.0)
+    x = Float32(x)
+    if -0.2876821f0 <=x <= 0.22314355f0
+        return Float16(x*evalpoly(x, (1f0, .5f0, 0.16666628f0, 0.04166785f0, 0.008351848f0, 0.0013675707f0)))
     end
+    N_float = round(x*Ln2INV(Float32))
+    N = unsafe_trunc(UInt32, N_float)
+    r = muladd(N_float, Ln2(Float32), x)
+    hi = evalpoly(r, (1f0, .5f0, 0.16666667f0, 0.041665863f0, 0.008333111f0, 0.0013981499f0, 0.00019983904f0))
+    small_part = r*hi
+    twopk = reinterpret(Float32, (N+Int32(127)) << Int32(23))
+    return Float16(muladd(twopk, small_part, twopk-1f0))
 end
+
+"""
+    expm1(x)
+
+Accurately compute ``e^x-1``. It avoids the loss of precision involved in the direct
+evaluation of exp(x)-1 for small values of x.
+# Examples
+```jldoctest
+julia> expm1(1e-16)
+1.0e-16
+
+julia> exp(1e-16) - 1
+0.0
+```
+"""
+expm1(x)
diff --git a/base/special/exp10.jl b/base/special/exp10.jl
deleted file mode 100644
index c32d0a98702ee0..00000000000000
--- a/base/special/exp10.jl
+++ /dev/null
@@ -1,139 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#  Method
-#  1. Argument reduction: Reduce x to an r so that |r| <= 0.5*log10(2). Given x,
-#     find r and integer k such that
-#
-#                x = k*log10(2) + r,  |r| <= 0.5*log10(2).
-#
-# 2. Approximate exp10(r) by a polynomial on the interval [-0.5*log10(2), 0.5*log10(2)]:
-#
-#           exp10(x) = 1.0 + polynomial(x),
-#
-#    sup norm relative error within the interval of the polynomial approximations:
-#    Float64 : [2.7245504724394698952e-18; 2.7245529895753476720e-18]
-#    Float32 : [9.6026471477842205871e-10; 9.6026560194009888672e-10]
-#
-# 3. Scale back: exp10(x) = 2^k * exp10(r)
-
-# log2(10)
-const LOG2_10 = 3.321928094887362347870319429489390175864831393024580612054756395815934776608624
-# log10(2)
-const LOG10_2 = 3.010299956639811952137388947244930267681898814621085413104274611271081892744238e-01
-# log(10)
-const LN10 = 2.302585092994045684017991454684364207601101488628772976033327900967572609677367
-
-# log10(2) into upper and lower bits
-LOG10_2U(::Type{Float64}) = 3.01025390625000000000e-1
-LOG10_2U(::Type{Float32}) = 3.00781250000000000000f-1
-
-LOG10_2L(::Type{Float64}) = 4.60503898119521373889e-6
-LOG10_2L(::Type{Float32}) = 2.48745663981195213739f-4
-
-# max and min arguments
-MAX_EXP10(::Type{Float64}) = 3.08254715559916743851e2 # log 2^1023*(2-2^-52)
-MAX_EXP10(::Type{Float32}) = 38.531839419103626f0     # log 2^127 *(2-2^-23)
-
-# one less than the min exponent since we can sqeeze a bit more from the exp10 function
-MIN_EXP10(::Type{Float64}) = -3.23607245338779784854769e2 # log10 2^-1075
-MIN_EXP10(::Type{Float32}) = -45.15449934959718f0         # log10 2^-150
-
-@inline exp10_kernel(x::Float64) =
-    @horner(x, 1.0,
-    2.30258509299404590109361379290930926799774169921875,
-    2.6509490552391992146397114993305876851081848144531,
-    2.03467859229323178027470930828712880611419677734375,
-    1.17125514891212478829629617393948137760162353515625,
-    0.53938292928868392106522833273629657924175262451172,
-    0.20699584873167015119932443667494226247072219848633,
-    6.8089348259156870502017966373387025669217109680176e-2,
-    1.9597690535095281527677713029333972372114658355713e-2,
-    5.015553121397981796436571499953060992993414402008e-3,
-    1.15474960721768829356725927226534622604958713054657e-3,
-    1.55440426715227567738830671828509366605430841445923e-4,
-    3.8731032432074128681303432086835414338565897196531e-5,
-    2.3804466459036747669197886523306806338950991630554e-3,
-    9.3881392238209649520573607528461934634833596646786e-5,
-    -2.64330486232183387018679354696359951049089431762695e-2)
-
-@inline exp10_kernel(x::Float32) =
-    @horner(x, 1.0f0,
-    2.302585124969482421875f0,
-    2.650949001312255859375f0,
-    2.0346698760986328125f0,
-    1.17125606536865234375f0,
-    0.5400512218475341796875f0,
-    0.20749187469482421875f0,
-    5.2789829671382904052734375f-2)
-
-@eval exp10_small_thres(::Type{Float64}) = $(2.0^-29)
-@eval exp10_small_thres(::Type{Float32}) = $(2.0f0^-14)
-
-"""
-    exp10(x)
-
-Compute ``10^x``.
-
-# Examples
-```jldoctest
-julia> exp10(2)
-100.0
-
-julia> exp10(0.2)
-1.5848931924611136
-```
-"""
-exp10(x::Real) = exp10(float(x))
-function exp10(x::T) where T<:Union{Float32,Float64}
-    xa = reinterpret(Unsigned, x) & ~sign_mask(T)
-    xsb = signbit(x)
-
-    # filter out non-finite arguments
-    if xa > reinterpret(Unsigned, MAX_EXP10(T))
-        if xa >= exponent_mask(T)
-            xa & significand_mask(T) != 0 && return T(NaN)
-            return xsb ? T(0.0) : T(Inf) # exp10(+-Inf)
-        end
-        x > MAX_EXP10(T) && return T(Inf)
-        x < MIN_EXP10(T) && return T(0.0)
-    end
-    # compute approximation
-    if xa > reinterpret(Unsigned, T(0.5)*T(LOG10_2)) # |x| > 0.5 log10(2).
-        # argument reduction
-        if xa < reinterpret(Unsigned, T(1.5)*T(LOG10_2)) # |x| <= 1.5 log10(2)
-            if xsb
-                k = -1
-                r = LOG10_2U(T) + x
-                r = LOG10_2L(T) + r
-            else
-                k = 1
-                r = x - LOG10_2U(T)
-                r = r - LOG10_2L(T)
-            end
-        else
-            n = round(T(LOG2_10)*x)
-            k = unsafe_trunc(Int,n)
-            r = muladd(n, -LOG10_2U(T), x)
-            r = muladd(n, -LOG10_2L(T), r)
-        end
-        # compute approximation on reduced argument
-        y = exp10_kernel(r)
-        # scale back
-        if k > -significand_bits(T)
-            # multiply by 2.0 first to prevent overflow, extending the range
-            k == exponent_max(T) && return y * T(2.0) * T(2.0)^(exponent_max(T) - 1)
-            twopk = reinterpret(T, rem(exponent_bias(T) + k, uinttype(T)) << significand_bits(T))
-            return y*twopk
-        else
-            # add significand_bits(T) + 1 to lift the range outside the subnormals
-            twopk = reinterpret(T, rem(exponent_bias(T) + significand_bits(T) + 1 + k, uinttype(T)) << significand_bits(T))
-            return y * twopk * T(2.0)^(-significand_bits(T) - 1)
-        end
-    elseif xa < reinterpret(Unsigned, exp10_small_thres(T))  # |x| < exp10_small_thres
-        # Taylor approximation for small values: exp10(x) ≈ 1.0 + log(10)*x
-        return muladd(x, T(LN10), T(1.0))
-    else
-        # primary range with k = 0, so compute approximation directly
-        return exp10_kernel(x)
-    end
-end
diff --git a/base/special/hyperbolic.jl b/base/special/hyperbolic.jl
index 4b0e994b7e610a..74f750064c7c25 100644
--- a/base/special/hyperbolic.jl
+++ b/base/special/hyperbolic.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# sinh, cosh, tanh, asinh, acosh, and atanh are heavily based on FDLIBM code:
+# asinh, acosh, and atanh are heavily based on FDLIBM code:
 # e_sinh.c, e_sinhf, e_cosh.c, e_coshf, s_tanh.c, s_tanhf.c, s_asinh.c,
 # s_asinhf.c, e_acosh.c, e_coshf.c, e_atanh.c, and e_atanhf.c
 # that are made available under the following licence:
@@ -14,6 +14,7 @@
 # is preserved.
 # ====================================================
 
+
 # Hyperbolic functions
 # sinh methods
 H_SMALL_X(::Type{Float64}) = 2.0^-28
@@ -23,159 +24,139 @@ H_SMALL_X(::Type{Float32}) = 2f-12
 H_MEDIUM_X(::Type{Float32}) = 9f0
 
 H_LARGE_X(::Type{Float64}) = 709.7822265633563 # nextfloat(709.7822265633562)
-H_OVERFLOW_X(::Type{Float64}) = 710.475860073944 # nextfloat(710.4758600739439)
 
 H_LARGE_X(::Type{Float32}) = 88.72283f0
-H_OVERFLOW_X(::Type{Float32}) = 89.415985f0
-function sinh(x::T) where T <: Union{Float32, Float64}
+
+SINH_SMALL_X(::Type{Float64}) = 2.1
+SINH_SMALL_X(::Type{Float32}) = 3.0f0
+
+# For Float64, use DoubleFloat scheme for extra accuracy
+function sinh_kernel(x::Float64)
+    x2, x2lo = two_mul(x,x)
+    hi_order = evalpoly(x2, (8.333333333336817e-3, 1.9841269840165435e-4,
+                             2.7557319381151335e-6, 2.5052096530035283e-8,
+                             1.6059550718903307e-10, 7.634842144412119e-13,
+                             2.9696954760355812e-15))
+    hi,lo = exthorner(x2, (1.0, 0.16666666666666635, hi_order))
+    return muladd(x, hi, muladd(x, lo, x*x2lo*0.16666666666666635))
+end
+# For Float32, using Float64 is simpler, faster, and doesn't require FMA
+function sinh_kernel(x::Float32)
+    x=Float64(x)
+    res = evalpoly(x*x, (1.0, 0.1666666779967941, 0.008333336726447933,
+                         0.00019841001151414065, 2.7555538207080807e-6,
+                         2.5143389765825282e-8, 1.6260094552031644e-10))
+    return Float32(res*x)
+end
+
+@inline function sinh16_kernel(x::Float32)
+    res = evalpoly(x*x, (1.0f0, 0.16666667f0, 0.008333337f0, 0.00019841001f0,
+                         2.7555539f-6, 2.514339f-8, 1.6260095f-10))
+    return Float16(res*x)
+end
+
+function sinh(x::T) where T<:Union{Float32,Float64}
     # Method
     # mathematically sinh(x) is defined to be (exp(x)-exp(-x))/2
-    #    1. Replace x by |x| (sinh(-x) = -sinh(x)).
+    #    1. Sometimes replace x by |x| (sinh(-x) = -sinh(x)).
     #    2. Find the branch and the expression to calculate and return it
-    #      a)   0 <= x < H_SMALL_X
-    #               return x
-    #      b)   H_SMALL_X <= x < H_MEDIUM_X
-    #               return sinh(x) = (E + E/(E+1))/2, where E=expm1(x)
-    #      c)   H_MEDIUM_X <= x < H_LARGE_X
-    #               return sinh(x) = exp(x)/2
-    #      d)   H_LARGE_X  <= x < H_OVERFLOW_X
+    #      a)   0 <= x < SINH_SMALL_X
+    #               approximate sinh(x) with a  minimax polynomial
+    #      b)   SINH_SMALL_X <= x < H_LARGE_X
+    #               return sinh(x) = (exp(x) - exp(-x))/2
+    #      d)   H_LARGE_X  <= x
     #               return sinh(x) = exp(x/2)/2 * exp(x/2)
-    #      e)   H_OVERFLOW_X <=  x
-    #               return sinh(x) = T(Inf)
-    #
-    # Notes:
-    #    only sinh(0) = 0 is exact for finite x.
+    #               Note that this branch automatically deals with Infs and NaNs
 
-    isnan(x) && return x
+    absx = abs(x)
+    if absx <= SINH_SMALL_X(T)
+        return sinh_kernel(x)
+    elseif absx >= H_LARGE_X(T)
+        E = exp(T(.5)*absx)
+        return copysign(T(.5)*E*E, x)
+    end
+    E = exp(absx)
+    return copysign(T(.5)*(E - 1/E),x)
+end
 
+function Base.sinh(a::Float16)
+    x = Float32(a)
     absx = abs(x)
+    absx <= SINH_SMALL_X(Float32) && return sinh16_kernel(x)
+    E = exp(absx)
+    return Float16(copysign(.5f0*(E - 1/E),x))
+end
 
-    h = T(0.5)
-    if x < 0
-        h = -h
-    end
-    # in a) or b)
-    if absx < H_MEDIUM_X(T)
-        # in a)
-        if absx < H_SMALL_X(T)
-            return x
-        end
-        t = expm1(absx)
-        if absx < T(1)
-            return h*(T(2)*t - t*t/(t + T(1)))
-        end
-        return h*(t + t/(t + T(1)))
-    end
-    # in c)
-    if absx < H_LARGE_X(T)
-        return h*exp(absx)
-    end
-    # in d)
-    if absx < H_OVERFLOW_X(T)
-        return h*T(2)*_ldexp_exp(absx, Int32(-1))
-    end
-    # in e)
-    return copysign(T(Inf), x)
+COSH_SMALL_X(::Type{T}) where T= one(T)
+
+function cosh_kernel(x2::Float32)
+    return evalpoly(x2, (1.0f0, 0.49999997f0, 0.041666888f0, 0.0013882756f0, 2.549933f-5))
+end
+
+function cosh_kernel(x2::Float64)
+    return evalpoly(x2, (1.0, 0.5000000000000002, 0.04166666666666269,
+                         1.3888888889206764e-3, 2.4801587176784207e-5,
+                         2.7557345825742837e-7, 2.0873617441235094e-9,
+                         1.1663435515945578e-11))
 end
-sinh(x::Real) = sinh(float(x))
 
-# cosh methods
-COSH_SMALL_X(::Type{Float32}) = 0.00024414062f0
-COSH_SMALL_X(::Type{Float64}) = 2.7755602085408512e-17
-function cosh(x::T) where T <: Union{Float32, Float64}
+function cosh(x::T) where T<:Union{Float32,Float64}
     # Method
     # mathematically cosh(x) is defined to be (exp(x)+exp(-x))/2
     #    1. Replace x by |x| (cosh(x) = cosh(-x)).
     #    2. Find the branch and the expression to calculate and return it
     #      a)   x <= COSH_SMALL_X
-    #               return T(1)
-    #      b)   COSH_SMALL_X <= x <= ln2/2
-    #               return 1+expm1(|x|)^2/(2*exp(|x|))
-    #      c)   ln2/2 <= x <= H_MEDIUM_X
-    #               return (exp(|x|)+1/exp(|x|)/2
-    #      d)   H_MEDIUM_X <= x < H_LARGE_X
-    #               return cosh(x) = exp(x)/2
-    #      e)   H_LARGE_X  <= x < H_OVERFLOW_X
+    #               approximate sinh(x) with a minimax polynomial
+    #      b)   COSH_SMALL_X <= x < H_LARGE_X
+    #               return cosh(x) = = (exp(x) + exp(-x))/2
+    #      e)   H_LARGE_X  <= x
     #               return cosh(x) = exp(x/2)/2 * exp(x/2)
-    #      f)   H_OVERFLOW_X <=  x
-    #               return cosh(x) = T(Inf)
-
-    isnan(x) && return x
+    #               Note that this branch automatically deals with Infs and NaNs
 
     absx = abs(x)
-    h = T(0.5)
-    # in a) or b)
-    if absx < log(T(2))/2
-        # in a)
-        if absx < COSH_SMALL_X(T)
-            return T(1)
-        end
-        t = expm1(absx)
-        w = T(1) + t
-        return T(1) + (t*t)/(w + w)
-    end
-    # in c)
-    if absx < H_MEDIUM_X(T)
-        t = exp(absx)
-        return h*t + h/t
-    end
-    # in d)
-    if absx < H_LARGE_X(T)
-        return h*exp(absx)
+    if absx <= COSH_SMALL_X(T)
+        return cosh_kernel(x*x)
+    elseif absx >= H_LARGE_X(T)
+        E = exp(T(.5)*absx)
+        return T(.5)*E*E
     end
-    # in e)
-    if absx < H_OVERFLOW_X(T)
-        return _ldexp_exp(absx, Int32(-1))
-    end
-    # in f)
-    return T(Inf)
+    E = exp(absx)
+    return T(.5)*(E + 1/E)
 end
-cosh(x::Real) = cosh(float(x))
 
 # tanh methods
-TANH_LARGE_X(::Type{Float64}) = 22.0
-TANH_LARGE_X(::Type{Float32}) = 9.0f0
+TANH_LARGE_X(::Type{Float64}) = 44.0
+TANH_LARGE_X(::Type{Float32}) = 18.0f0
+TANH_SMALL_X(::Type{Float64}) = 1.0
+TANH_SMALL_X(::Type{Float32}) = 1.3862944f0       #2*log(2)
+@inline function tanh_kernel(x::Float64)
+    return evalpoly(x, (1.0, -0.33333333333332904, 0.13333333333267555,
+                        -0.05396825393066753, 0.02186948742242217,
+                        -0.008863215974794633, 0.003591910693118715,
+                        -0.0014542587440487815, 0.0005825521659411748,
+                        -0.00021647574085351332, 5.5752458452673005e-5))
+end
+@inline function tanh_kernel(x::Float32)
+    return evalpoly(x, (1.0f0, -0.3333312f0, 0.13328037f0,
+                        -0.05350336f0, 0.019975215f0, -0.0050525228f0))
+end
 function tanh(x::T) where T<:Union{Float32, Float64}
     # Method
     # mathematically tanh(x) is defined to be (exp(x)-exp(-x))/(exp(x)+exp(-x))
     #    1. reduce x to non-negative by tanh(-x) = -tanh(x).
     #    2. Find the branch and the expression to calculate and return it
     #      a) 0 <= x < H_SMALL_X
-    #             return x
-    #      b) H_SMALL_X <= x < 1
-    #            -expm1(-2x)/(expm1(-2x) + 2)
-    #      c) 1 <= x < TANH_LARGE_X
-    #           1 - 2/(expm1(2x) + 2)
-    #      d) TANH_LARGE_X <= x
+    #             Use a minimax polynomial over the range
+    #      b) H_SMALL_X <= x < TANH_LARGE_X
+    #           1 - 2/(exp(2x) + 1)
+    #      c) TANH_LARGE_X <= x
     #            return 1
-    if isnan(x)
-        return x
-    elseif isinf(x)
-        return copysign(T(1), x)
-    end
-
-    absx = abs(x)
-    if absx < TANH_LARGE_X(T)
-        # in a)
-        if absx < H_SMALL_X(T)
-            return x
-        end
-        if absx >= T(1)
-            # in c)
-            t = expm1(T(2)*absx)
-            z = T(1) - T(2)/(t + T(2))
-        else
-            # in b)
-            t = expm1(-T(2)*absx)
-            z = -t/(t + T(2))
-        end
-    else
-        # in d)
-        z = T(1)
-    end
-    return copysign(z, x)
+    abs2x = abs(2x)
+    abs2x >= TANH_LARGE_X(T) && return copysign(one(T), x)
+    abs2x <= TANH_SMALL_X(T) && return x*tanh_kernel(x*x)
+    k = exp(abs2x)
+    return copysign(1 - 2/(k+1), x)
 end
-tanh(x::Real) = tanh(float(x))
 
 # Inverse hyperbolic functions
 AH_LN2(::Type{Float64}) = 6.93147180559945286227e-01
@@ -216,7 +197,6 @@ function asinh(x::T) where T <: Union{Float32, Float64}
     end
     return copysign(w, x)
 end
-asinh(x::Real) = asinh(float(x))
 
 # acosh methods
 @noinline acosh_domain_error(x) = throw(DomainError(x, "acosh(x) is only defined for x ≥ 1."))
@@ -255,7 +235,6 @@ function acosh(x::T) where T <: Union{Float32, Float64}
         return log(x) + AH_LN2(T)
     end
 end
-acosh(x::Real) = acosh(float(x))
 
 # atanh methods
 @noinline atanh_domain_error(x) = throw(DomainError(x, "atanh(x) is only defined for |x| ≤ 1."))
@@ -263,14 +242,10 @@ function atanh(x::T) where T <: Union{Float32, Float64}
     # Method
     # 1.Reduced x to positive by atanh(-x) = -atanh(x)
     # 2. Find the branch and the expression to calculate and return it
-    #     a) 0 <= x < 2^-28
-    #         return x
-    #     b) 2^-28 <= x < 0.5
-    #         return 0.5*log1p(2x+2x*x/(1-x))
-    #     c) 0.5 <= x < 1
-    #         return 0.5*log1p(2x/1-x)
-    #     d) x = 1
-    #         return Inf
+    #     a) 0 <= x < 0.5
+    #         return 0.5*log1p(2x/(1-x))
+    #     b) 0.5 <= x <= 1
+    #         return 0.5*log((x+1)/(1-x))
     # Special cases:
     #    if |x| > 1 throw DomainError
     isnan(x) && return x
@@ -280,21 +255,12 @@ function atanh(x::T) where T <: Union{Float32, Float64}
     if absx > 1
         atanh_domain_error(x)
     end
-    if absx < T(2)^-28
-        # in a)
-        return x
-    end
     if absx < T(0.5)
+        # in a)
+        t = log1p(T(2)*absx/(T(1)-absx))
+    else
         # in b)
-        t = absx+absx
-        t = T(0.5)*log1p(t+t*absx/(T(1)-absx))
-    elseif absx < T(1)
-        # in c)
-        t = T(0.5)*log1p((absx + absx)/(T(1)-absx))
-    elseif absx == T(1)
-        # in d)
-        return copysign(T(Inf), x)
+        t = log((T(1)+absx)/(T(1)-absx))
     end
-    return copysign(t, x)
+    return T(0.5)*copysign(t, x)
 end
-atanh(x::Real) = atanh(float(x))
diff --git a/base/special/ldexp_exp.jl b/base/special/ldexp_exp.jl
deleted file mode 100644
index 3ea0f39373ecec..00000000000000
--- a/base/special/ldexp_exp.jl
+++ /dev/null
@@ -1,105 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# This code is a Julia translation of the C code from Openlibm (http://www.openlibm.org/)
-# with the following license:
-
-# Copyright (c) 2011 David Schultz <das@FreeBSD.ORG>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-#
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-
-modify_highword(x::Float32, hw) = reinterpret(Float32, hw)
-modify_highword(x::Float64, hw) = reinterpret(Float64, (UInt64(hw)<<32)|(reinterpret(UInt64, x)<<32)>>32)
-
-exponent_rshift(T::Type{Float32}, hw) = hw >> 23 # this comes from 32 (bits in UInt32) minus 9 bits for the sign and exponent
-exponent_rshift(T::Type{Float64}, hw) = hw >> 20 # this comes from 32 (bits in UInt32) minus 12 bits for the sign and exponent
-exponent_lshift(T::Type{Float32}, hw) = hw << 23 # this comes from 32 (bits in UInt32) minus 9 bits for the sign and exponent
-exponent_lshift(T::Type{Float64}, hw) = hw << 20 # this comes from 32 (bits in UInt32) minus 12 bits for the sign and exponent
-
-function modify_exponent(x::T, expnt_x) where T <: Union{Float32, Float64}
-    # mask away exponent; "100...0111..111" with 9 or 12 leading 0's
-    high_mask = T == Float32 ? 0x807fffff : 0x800fffff # don't mask away the sign
-    # use mask to replace with first 9 or 12 bits with expnt_x << appropriately
-    modify_highword(x, (highword(x) & high_mask) | exponent_lshift(T, expnt_x))
-end
-
-"""
-    _ldexp_exp(x, l2)
-Returns exp(x) * 2^l2. The function is intended for large arguments, x, where
-x >= ln(prevfloat(typemax(x)) and care is needed to avoid overflow.
-
-The present implementation is narrowly tailored for our hyperbolic and
-exponential functions.  We assume l2 is small (0 or -1), and the caller
-has filtered out very large x, for which overflow would be inevitable.
-"""
-function _ldexp_exp(x::T, l2) where T <: Union{Float32, Float64}
-    # This function is intended for use in our hyperbolic and exponential functions.
-
-    # Calculate exp(x) = (exp(x-kr*log(2))*2^ks*)2^k2 = exp_x*2^k2
-    exp_x, k2 = _frexp_exp(x)
-
-    # Add the two exponents together to form (2^l2)*(2^k2) = 2^(l2+k2) = 2^L
-    l2 += k2
-    L_as_hw = exponent_lshift(T, UInt32(exponent_bias(T) + l2))
-    # Form 2^L
-    scale = fromhighword(T, L_as_hw)
-    # Return exp(x)*2^l2
-    return exp_x * scale
-end
-
-"""
-    exp_x, k2 = _frexp_exp(x)
-
-Calculate exp(x) as exp_x*2^k2 and return exp_x = exp(x-kr*log(w))*2^ks where kr
-is a type dependant range reduction constant, ks scales exp_x towards the largest
-finite number, and k2 is used to absorb the remaning scale to allow for exp(x)
-to be outside the normal floating point range.
-
-This function is intended for use in our hyperbolic and exponential functions.
-"""
-function _frexp_exp(x::T) where T<:Union{Float32, Float64}
-    # and should only be used for values in the range (let T = typeof(x)):
-    #
-    #     log(prevfloat(typemax(x))) <= x < log(2 * prevfloat(typemax(x) / nextfloat(T(0)))
-    #
-    # where the upper bound is around 192.7f0 and ~= 1454.91. The function outputs
-    # exp_x in the ranges
-    #     [2f0^127, 2f0^128) and
-    #     [2.0^1023, 2.0^1024)
-    # respectively.
-
-    # We use exp(x) = exp(x - kln2) * 2**k, carefully chosen to
-    # minimize |exp(kln2) - 2**k|.
-    kr = T == Float32 ? UInt32(235) : UInt32(1799)
-
-    # We also scale the exponent of exp_x to exponent_bias + the largest finite
-    # exponent (exponent of T(Inf)-1, so that the result can be multiplied by
-    # a tiny number without losing accuracy due to denormalization.
-    exp_x = exp(x - kr*log(T(2))) # exp_x*2^k = exp(x)
-
-    # Calculate the ks in exp_x*2^ks
-    ks = exponent_rshift(T, highword(exp_x)) - (exponent_bias(T) + (exponent_max(T) - 1)) + kr
-
-    # Rescale exp_x to have exponent k2 = exponent_max(T) - 1
-    exp_x = modify_exponent(exp_x, UInt32(exponent_bias(T) + (exponent_max(T) - 1)))
-    return exp_x, ks
-end
diff --git a/base/special/log.jl b/base/special/log.jl
index ef578edb56795e..440a32f8da0f02 100644
--- a/base/special/log.jl
+++ b/base/special/log.jl
@@ -139,19 +139,24 @@ const t_log_Float32 = (0.0,0.007782140442054949,0.015504186535965254,0.023167059
     0.6773988235918061,0.6813592248079031,0.6853040030989194,0.689233281238809,
     0.6931471805599453)
 
-# determine if hardware FMA is available
-# should probably check with LLVM, see #9855.
-const FMA_NATIVE = muladd(nextfloat(1.0),nextfloat(1.0),-nextfloat(1.0,2)) != 0
-
 # truncate lower order bits (up to 26)
 # ideally, this should be able to use ANDPD instructions, see #9868.
 @inline function truncbits(x::Float64)
     reinterpret(Float64, reinterpret(UInt64,x) & 0xffff_ffff_f800_0000)
 end
 
+logb(::Type{Float32},::Val{2})  = 1.4426950408889634
+logb(::Type{Float32},::Val{:ℯ}) = 1.0
+logb(::Type{Float32},::Val{10}) = 0.4342944819032518
+logbU(::Type{Float64},::Val{2})  = 1.4426950408889634
+logbL(::Type{Float64},::Val{2})  = 2.0355273740931033e-17
+logbU(::Type{Float64},::Val{:ℯ}) = 1.0
+logbL(::Type{Float64},::Val{:ℯ}) = 0.0
+logbU(::Type{Float64},::Val{10}) = 0.4342944819032518
+logbL(::Type{Float64},::Val{10}) = 1.098319650216765e-17
 
 # Procedure 1
-@inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,jp::Int)
+@inline function log_proc1(y::Float64,mf::Float64,F::Float64,f::Float64,jp::Int,base=Val(:ℯ))
     ## Steps 1 and 2
     @inbounds hi,lo = t_log_Float64[jp]
     l_hi = mf* 0.6931471805601177 + hi
@@ -175,11 +180,13 @@ end
                     0.012500053168098584)
 
     ## Step 4
-    l_hi + (u + (q + l_lo))
+    m_hi = logbU(Float64, base)
+    m_lo = logbL(Float64, base)
+    return fma(m_hi, l_hi, fma(m_hi, (u + (q + l_lo)), m_lo*l_hi))
 end
 
 # Procedure 2
-@inline function log_proc2(f::Float64)
+@inline function log_proc2(f::Float64,base=Val(:ℯ))
     ## Step 1
     g = 1.0/(2.0+f)
     u = 2.0*f*g
@@ -198,20 +205,14 @@ end
     #   2(f-u1-u2) - f*(u1+u2) = 0
     #   2(f-u1) - f*u1 = (2+f)u2
     #   u2 = (2(f-u1) - f*u1)/(2+f)
-    if FMA_NATIVE
-        return u + fma(fma(-u,f,2(f-u)), g, q)
-    else
-        u1 = truncbits(u) # round to 24 bits
-        f1 = truncbits(f)
-        f2 = f-f1
-        u2 = ((2.0*(f-u1)-u1*f1)-u1*f2)*g
-        ## Step 4
-        return u1 + (u2 + q)
-    end
+
+    m_hi = logbU(Float64, base)
+    m_lo = logbL(Float64, base)
+    return fma(m_hi, u, fma(m_lo, u, m_hi*fma(fma(-u,f,2(f-u)), g, q)))
 end
 
 
-@inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,jp::Int)
+@inline function log_proc1(y::Float32,mf::Float32,F::Float32,f::Float32,jp::Int,base=Val(:ℯ))
     ## Steps 1 and 2
     @inbounds hi = t_log_Float32[jp]
     l = mf*0.6931471805599453 + hi
@@ -228,10 +229,10 @@ end
     q = u*v*0.08333351f0
 
     ## Step 4
-    Float32(l + (u + q))
+    Float32(logb(Float32, base)*(l + (u + q)))
 end
 
-@inline function log_proc2(f::Float32)
+@inline function log_proc2(f::Float32,base=Val(:ℯ))
     ## Step 1
     # compute in higher precision
     u64 = Float64(2f0*f)/(2.0+f)
@@ -246,18 +247,24 @@ end
     ## Step 3: not required
 
     ## Step 4
-    Float32(u64 + q)
+    Float32(logb(Float32, base)*(u64 + q))
 end
 
+log2(x::Float32)  = _log(x, Val(2),  :log2)
+log(x::Float32)   = _log(x, Val(:ℯ), :log)
+log10(x::Float32) = _log(x, Val(10), :log10)
+log2(x::Float64)  = _log(x, Val(2),  :log2)
+log(x::Float64)   = _log(x, Val(:ℯ), :log)
+log10(x::Float64) = _log(x, Val(10), :log10)
 
-function log(x::Float64)
+function _log(x::Float64, base, func)
     if x > 0.0
         x == Inf && return x
 
         # Step 2
         if 0.9394130628134757 < x < 1.0644944589178595
             f = x-1.0
-            return log_proc2(f)
+            return log_proc2(f, base)
         end
 
         # Step 3
@@ -276,24 +283,24 @@ function log(x::Float64)
         f = y-F
         jp = unsafe_trunc(Int,128.0*F)-127
 
-        return log_proc1(y,mf,F,f,jp)
+        return log_proc1(y,mf,F,f,jp,base)
     elseif x == 0.0
         -Inf
     elseif isnan(x)
         NaN
     else
-        throw_complex_domainerror(:log, x)
+        throw_complex_domainerror(func, x)
     end
 end
 
-function log(x::Float32)
+function _log(x::Float32, base, func)
     if x > 0f0
         x == Inf32 && return x
 
         # Step 2
         if 0.939413f0 < x < 1.0644945f0
             f = x-1f0
-            return log_proc2(f)
+            return log_proc2(f, base)
         end
 
         # Step 3
@@ -312,13 +319,13 @@ function log(x::Float32)
         f = y-F
         jp = unsafe_trunc(Int,128.0f0*F)-127
 
-        log_proc1(y,mf,F,f,jp)
+        log_proc1(y,mf,F,f,jp,base)
     elseif x == 0f0
         -Inf32
     elseif isnan(x)
         NaN32
     else
-        throw_complex_domainerror(:log, x)
+        throw_complex_domainerror(func, x)
     end
 end
 
@@ -390,8 +397,190 @@ function log1p(x::Float32)
     end
 end
 
-for f in (:log,:log1p)
-    @eval begin
-        ($f)(x::Real) = ($f)(float(x))
-    end
+#function make_compact_table(N)
+#    table = Tuple{UInt64,Float64}[]
+#    lo, hi = 0x1.69555p-1, 0x1.69555p0
+#    for i in 0:N-1
+#        # I am not fully sure why this is the right formula to use, but it apparently is
+#        center = i/(2*N) + lo < 1 ? (i+.5)/(2*N) + lo : (i+.5)/N + hi -1
+#        invc = Float64(center < 1 ? round(N/center)/N : round(2*N/center)/(N*2))
+#        c = inv(big(invc))
+#        logc = Float64(round(0x1p43*log(c))/0x1p43)
+#        logctail = reinterpret(Float64, Float64(log(c) - logc))
+#        p1 = (reinterpret(UInt64,invc) >> 45) % UInt8
+#        push!(table, (p1|reinterpret(UInt64,logc),logctail))
+#    end
+#    return Tuple(table)
+#end
+#const t_log_table_compat = make_compact_table(128)
+const t_log_table_compat = (
+    (0xbfd62c82f2b9c8b5, 5.929407345889625e-15),
+    (0xbfd5d1bdbf5808b4, -2.544157440035963e-14),
+    (0xbfd57677174558b3, -3.443525940775045e-14),
+    (0xbfd51aad872df8b2, -2.500123826022799e-15),
+    (0xbfd4be5f957778b1, -8.929337133850617e-15),
+    (0xbfd4618bc21c60b0, 1.7625431312172662e-14),
+    (0xbfd404308686a8af, 1.5688303180062087e-15),
+    (0xbfd3a64c556948ae, 2.9655274673691784e-14),
+    (0xbfd347dd9a9880ad, 3.7923164802093147e-14),
+    (0xbfd2e8e2bae120ac, 3.993416384387844e-14),
+    (0xbfd2895a13de88ab, 1.9352855826489123e-14),
+    (0xbfd2895a13de88ab, 1.9352855826489123e-14),
+    (0xbfd22941fbcf78aa, -1.9852665484979036e-14),
+    (0xbfd1c898c16998a9, -2.814323765595281e-14),
+    (0xbfd1675cababa8a8, 2.7643769993528702e-14),
+    (0xbfd1058bf9ae48a7, -4.025092402293806e-14),
+    (0xbfd0a324e27390a6, -1.2621729398885316e-14),
+    (0xbfd0402594b4d0a5, -3.600176732637335e-15),
+    (0xbfd0402594b4d0a5, -3.600176732637335e-15),
+    (0xbfcfb9186d5e40a4, 1.3029797173308663e-14),
+    (0xbfcef0adcbdc60a3, 4.8230289429940886e-14),
+    (0xbfce27076e2af0a2, -2.0592242769647135e-14),
+    (0xbfcd5c216b4fc0a1, 3.149265065191484e-14),
+    (0xbfcc8ff7c79aa0a0, 4.169796584527195e-14),
+    (0xbfcc8ff7c79aa0a0, 4.169796584527195e-14),
+    (0xbfcbc286742d909f, 2.2477465222466186e-14),
+    (0xbfcaf3c94e80c09e, 3.6507188831790577e-16),
+    (0xbfca23bc1fe2b09d, -3.827767260205414e-14),
+    (0xbfca23bc1fe2b09d, -3.827767260205414e-14),
+    (0xbfc9525a9cf4509c, -4.7641388950792196e-14),
+    (0xbfc87fa06520d09b, 4.9278276214647115e-14),
+    (0xbfc7ab890210e09a, 4.9485167661250996e-14),
+    (0xbfc7ab890210e09a, 4.9485167661250996e-14),
+    (0xbfc6d60fe719d099, -1.5003333854266542e-14),
+    (0xbfc5ff3070a79098, -2.7194441649495324e-14),
+    (0xbfc5ff3070a79098, -2.7194441649495324e-14),
+    (0xbfc526e5e3a1b097, -2.99659267292569e-14),
+    (0xbfc44d2b6ccb8096, 2.0472357800461955e-14),
+    (0xbfc44d2b6ccb8096, 2.0472357800461955e-14),
+    (0xbfc371fc201e9095, 3.879296723063646e-15),
+    (0xbfc29552f81ff094, -3.6506824353335045e-14),
+    (0xbfc1b72ad52f6093, -5.4183331379008994e-14),
+    (0xbfc1b72ad52f6093, -5.4183331379008994e-14),
+    (0xbfc0d77e7cd09092, 1.1729485484531301e-14),
+    (0xbfc0d77e7cd09092, 1.1729485484531301e-14),
+    (0xbfbfec9131dbe091, -3.811763084710266e-14),
+    (0xbfbe27076e2b0090, 4.654729747598445e-14),
+    (0xbfbe27076e2b0090, 4.654729747598445e-14),
+    (0xbfbc5e548f5bc08f, -2.5799991283069902e-14),
+    (0xbfba926d3a4ae08e, 3.7700471749674615e-14),
+    (0xbfba926d3a4ae08e, 3.7700471749674615e-14),
+    (0xbfb8c345d631a08d, 1.7306161136093256e-14),
+    (0xbfb8c345d631a08d, 1.7306161136093256e-14),
+    (0xbfb6f0d28ae5608c, -4.012913552726574e-14),
+    (0xbfb51b073f06208b, 2.7541708360737882e-14),
+    (0xbfb51b073f06208b, 2.7541708360737882e-14),
+    (0xbfb341d7961be08a, 5.0396178134370583e-14),
+    (0xbfb341d7961be08a, 5.0396178134370583e-14),
+    (0xbfb16536eea38089, 1.8195060030168815e-14),
+    (0xbfaf0a30c0118088, 5.213620639136504e-14),
+    (0xbfaf0a30c0118088, 5.213620639136504e-14),
+    (0xbfab42dd71198087, 2.532168943117445e-14),
+    (0xbfab42dd71198087, 2.532168943117445e-14),
+    (0xbfa77458f632c086, -5.148849572685811e-14),
+    (0xbfa77458f632c086, -5.148849572685811e-14),
+    (0xbfa39e87b9fec085, 4.6652946995830086e-15),
+    (0xbfa39e87b9fec085, 4.6652946995830086e-15),
+    (0xbf9f829b0e780084, -4.529814257790929e-14),
+    (0xbf9f829b0e780084, -4.529814257790929e-14),
+    (0xbf97b91b07d58083, -4.361324067851568e-14),
+    (0xbf8fc0a8b0fc0082, -1.7274567499706107e-15),
+    (0xbf8fc0a8b0fc0082, -1.7274567499706107e-15),
+    (0xbf7fe02a6b100081, -2.298941004620351e-14),
+    (0xbf7fe02a6b100081, -2.298941004620351e-14),
+    (0x0000000000000080, 0.0),
+    (0x0000000000000080, 0.0),
+    (0x3f8010157589007e, -1.4902732911301337e-14),
+    (0x3f9020565893807c, -3.527980389655325e-14),
+    (0x3f98492528c9007a, -4.730054772033249e-14),
+    (0x3fa0415d89e74078, 7.580310369375161e-15),
+    (0x3fa466aed42e0076, -4.9893776716773285e-14),
+    (0x3fa894aa149fc074, -2.262629393030674e-14),
+    (0x3faccb73cdddc072, -2.345674491018699e-14),
+    (0x3faeea31c006c071, -1.3352588834854848e-14),
+    (0x3fb1973bd146606f, -3.765296820388875e-14),
+    (0x3fb3bdf5a7d1e06d, 5.1128335719851986e-14),
+    (0x3fb5e95a4d97a06b, -5.046674438470119e-14),
+    (0x3fb700d30aeac06a, 3.1218748807418837e-15),
+    (0x3fb9335e5d594068, 3.3871241029241416e-14),
+    (0x3fbb6ac88dad6066, -1.7376727386423858e-14),
+    (0x3fbc885801bc4065, 3.957125899799804e-14),
+    (0x3fbec739830a2063, -5.2849453521890294e-14),
+    (0x3fbfe89139dbe062, -3.767012502308738e-14),
+    (0x3fc1178e8227e060, 3.1859736349078334e-14),
+    (0x3fc1aa2b7e23f05f, 5.0900642926060466e-14),
+    (0x3fc2d1610c86805d, 8.710783796122478e-15),
+    (0x3fc365fcb015905c, 6.157896229122976e-16),
+    (0x3fc4913d8333b05a, 3.821577743916796e-14),
+    (0x3fc527e5e4a1b059, 3.9440046718453496e-14),
+    (0x3fc6574ebe8c1057, 2.2924522154618074e-14),
+    (0x3fc6f0128b757056, -3.742530094732263e-14),
+    (0x3fc7898d85445055, -2.5223102140407338e-14),
+    (0x3fc8beafeb390053, -1.0320443688698849e-14),
+    (0x3fc95a5adcf70052, 1.0634128304268335e-14),
+    (0x3fca93ed3c8ae050, -4.3425422595242564e-14),
+    (0x3fcb31d8575bd04f, -1.2527395755711364e-14),
+    (0x3fcbd087383be04e, -5.204008743405884e-14),
+    (0x3fcc6ffbc6f0104d, -3.979844515951702e-15),
+    (0x3fcdb13db0d4904b, -4.7955860343296286e-14),
+    (0x3fce530effe7104a, 5.015686013791602e-16),
+    (0x3fcef5ade4dd0049, -7.252318953240293e-16),
+    (0x3fcf991c6cb3b048, 2.4688324156011588e-14),
+    (0x3fd07138604d5846, 5.465121253624792e-15),
+    (0x3fd0c42d67616045, 4.102651071698446e-14),
+    (0x3fd1178e8227e844, -4.996736502345936e-14),
+    (0x3fd16b5ccbacf843, 4.903580708156347e-14),
+    (0x3fd1bf99635a6842, 5.089628039500759e-14),
+    (0x3fd214456d0eb841, 1.1782016386565151e-14),
+    (0x3fd2bef07cdc903f, 4.727452940514406e-14),
+    (0x3fd314f1e1d3603e, -4.4204083338755686e-14),
+    (0x3fd36b6776be103d, 1.548345993498083e-14),
+    (0x3fd3c2527733303c, 2.1522127491642888e-14),
+    (0x3fd419b423d5e83b, 1.1054030169005386e-14),
+    (0x3fd4718dc271c83a, -5.534326352070679e-14),
+    (0x3fd4c9e09e173039, -5.351646604259541e-14),
+    (0x3fd522ae0738a038, 5.4612144489920215e-14),
+    (0x3fd57bf753c8d037, 2.8136969901227338e-14),
+    (0x3fd5d5bddf596036, -1.156568624616423e-14))
+
+ @inline function log_tab_unpack(t::UInt64)
+    invc = UInt64(t&UInt64(0xff)|0x1ff00)<<45
+    logc = t&(~UInt64(0xff))
+    return (reinterpret(Float64, invc), reinterpret(Float64, logc))
+end
+
+# Log implementation that returns 2 numbers which sum to give true value with about 68 bits of precision
+# Since `log` only makes sense for positive exponents, we speed up the implimentation by stealing the sign bit
+# of the input for an extra bit of the exponent which is used to normalize subnormal inputs.
+# Does not normalize results.
+# Adapted and modified from https://github.com/ARM-software/optimized-routines/blob/master/math/pow.c
+# Copyright (c) 2018-2020, Arm Limited. (which is also MIT licensed)
+# note that this isn't an exact translation as this version compacts the table to reduce cache pressure.
+function _log_ext(xu)
+    # x = 2^k z; where z is in range [0x1.69555p-1,0x1.69555p-0) and exact.
+    # The range is split into N subintervals.
+    # The ith subinterval contains z and c is near the center of the interval.
+    tmp = reinterpret(Int64, xu - 0x3fe6955500000000) #0x1.69555p-1
+    i = (tmp >> 45) & 127
+    z = reinterpret(Float64, xu - (tmp & 0xfff0000000000000))
+    k = Float64(tmp >> 52)
+    # log(x) = k*Ln2 + log(c) + log1p(z/c-1).
+    t, logctail = t_log_table_compat[i+1]
+    invc, logc = log_tab_unpack(t)
+    # Note: invc is j/N or j/N/2 where j is an integer in [N,2N) and
+    # |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible.
+    r = fma(z, invc, -1.0)
+    # k*Ln2 + log(c) + r.
+    t1 = muladd(k, 0.6931471805598903, logc) #ln(2) hi part
+    t2 = t1 + r
+    lo1 = muladd(k, 5.497923018708371e-14, logctail) #ln(2) lo part
+    lo2 = t1 - t2 + r
+    ar = -0.5 * r
+    ar2, lo3 = two_mul(r, ar)
+    # k*Ln2 + log(c) + r + .5*r*r.
+    hi = t2 + ar2
+    lo4 = t2 - hi + ar2
+    p = evalpoly(r, (-0x1.555555555556p-1, 0x1.0000000000006p-1, -0x1.999999959554ep-2, 0x1.555555529a47ap-2, -0x1.2495b9b4845e9p-2, 0x1.0002b8b263fc3p-2))
+    lo = lo1 + lo2 + lo3 + muladd(r*ar2, p, lo4)
+    return hi, lo
 end
diff --git a/base/special/rem_pio2.jl b/base/special/rem_pio2.jl
index 7242eb8f17b69a..4ec9945885e7e1 100644
--- a/base/special/rem_pio2.jl
+++ b/base/special/rem_pio2.jl
@@ -23,7 +23,7 @@
 #        @printf "0x%016x,\n" k
 #        I -= k
 #    end
-const INV_2PI = UInt64[
+const INV_2PI = (
     0x28be_60db_9391_054a,
     0x7f09_d5f4_7d4d_3770,
     0x36d8_a566_4f10_e410,
@@ -42,7 +42,7 @@ const INV_2PI = UInt64[
     0x5d49_eeb1_faf9_7c5e,
     0xcf41_ce7d_e294_a4ba,
     0x9afe_d7ec_47e3_5742,
-    0x1580_cc11_bf1e_daea]
+    0x1580_cc11_bf1e_daea)
 
 @inline function cody_waite_2c_pio2(x::Float64, fn, n)
     pio2_1 = 1.57079632673412561417e+00
diff --git a/base/special/trig.jl b/base/special/trig.jl
index 40feaeb79522dc..e3033aab6c2720 100644
--- a/base/special/trig.jl
+++ b/base/special/trig.jl
@@ -50,7 +50,6 @@ function sin(x::T) where T<:Union{Float32, Float64}
         return -cos_kernel(y)
     end
 end
-sin(x::Real) = sin(float(x))
 
 # Coefficients in 13th order polynomial approximation on [0; π/4]
 #     sin(x) ≈ x + S1*x³ + S2*x⁵ + S3*x⁷ + S4*x⁹ + S5*x¹¹ + S6*x¹³
@@ -121,7 +120,6 @@ function cos(x::T) where T<:Union{Float32, Float64}
         end
     end
 end
-cos(x::Real) = cos(float(x))
 
 const DC1 = 4.16666666666666019037e-02
 const DC2 = -1.38888888888741095749e-03
@@ -168,7 +166,10 @@ end
 """
     sincos(x)
 
-Simultaneously compute the sine and cosine of `x`, where the `x` is in radians.
+Simultaneously compute the sine and cosine of `x`, where `x` is in radians, returning
+a tuple `(sine, cosine)`.
+
+See also [`cis`](@ref), [`sincospi`](@ref), [`sincosd`](@ref).
 """
 function sincos(x::T) where T<:Union{Float32, Float64}
     if abs(x) < T(pi)/4
@@ -230,7 +231,6 @@ function tan(x::T) where T<:Union{Float32, Float64}
         return tan_kernel(y,-1)
     end
 end
-tan(x::Real) = tan(float(x))
 
 @inline tan_kernel(y::Float64) = tan_kernel(DoubleFloat64(y, 0.0), 1)
 @inline function tan_kernel(y::DoubleFloat64, k)
@@ -423,7 +423,7 @@ end
     flipsign(Float32(pi/2 - 2*(s + s*tRt)), x)
 end
 
-@noinline asin_domain_error(x) = throw(DomainError(x, "asin(x) is not defined for |x|>1."))
+@noinline asin_domain_error(x) = throw(DomainError(x, "asin(x) is not defined for |x| > 1."))
 function asin(x::T) where T<:Union{Float32, Float64}
     # Since  asin(x) = x + x^3/6 + x^5*3/40 + x^7*15/336 + ...
     # we approximate asin(x) on [0,0.5] by
@@ -449,7 +449,6 @@ function asin(x::T) where T<:Union{Float32, Float64}
     t = (T(1.0) - absx)/2
     return asin_kernel(t, x)
 end
-asin(x::Real) = asin(float(x))
 
 # atan methods
 ATAN_1_O_2_HI(::Type{Float64}) = 4.63647609000806093515e-01 # atan(0.5).hi
@@ -499,7 +498,6 @@ atan_q(w::Float32) = w*@horner(w, -1.9999158382f-01, -1.0648017377f-01)
     atan_p(x², x⁴), atan_q(x⁴)
 end
 
-atan(x::Real) = atan(float(x))
 function atan(x::T) where T<:Union{Float32, Float64}
     # Method
     #   1. Reduce x to positive by atan(x) = -atan(-x).
@@ -723,7 +721,6 @@ function acos(x::T) where T <: Union{Float32, Float64}
         return T(2.0)*(df + (zRz*s + c))
     end
 end
-acos(x::Real) = acos(float(x))
 
 # multiply in extended precision
 function mulpi_ext(x::Float64)
@@ -747,6 +744,8 @@ mulpi_ext(x::Real) = pi*x # Fallback
     sinpi(x)
 
 Compute ``\\sin(\\pi x)`` more accurately than `sin(pi*x)`, especially for large `x`.
+
+See also [`sind`](@ref), [`cospi`](@ref), [`sincospi`](@ref).
 """
 function sinpi(x::T) where T<:AbstractFloat
     if !isfinite(x)
@@ -863,10 +862,13 @@ end
 """
     sincospi(x)
 
-Simultaneously compute `sinpi(x)` and `cospi(x)`, where the `x` is in radians.
+Simultaneously compute [`sinpi(x)`](@ref) and [`cospi(x)`](@ref) (the sine and cosine of `π*x`,
+where `x` is in radians), returning a tuple `(sine, cosine)`.
 
 !!! compat "Julia 1.6"
     This function requires Julia 1.6 or later.
+
+See also: [`cispi`](@ref), [`sincosd`](@ref), [`sinpi`](@ref).
 """
 function sincospi(x::T) where T<:AbstractFloat
     if !isfinite(x)
@@ -1073,6 +1075,8 @@ isinf_real(x::Number) = false
     sinc(x)
 
 Compute ``\\sin(\\pi x) / (\\pi x)`` if ``x \\neq 0``, and ``1`` if ``x = 0``.
+
+See also [`cosc`](@ref), its derivative.
 """
 sinc(x::Number) = _sinc(float(x))
 sinc(x::Integer) = iszero(x) ? one(x) : zero(x)
@@ -1250,37 +1254,45 @@ Simultaneously compute the sine and cosine of `x`, where `x` is in degrees.
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-function sincosd(x::Real)
-    if isinf(x)
-        return throw(DomainError(x, "sincosd(x) is only defined for finite `x`."))
-    elseif isnan(x)
-        return (oftype(x,NaN), oftype(x,NaN))
-    end
-
-    # It turns out that calling those functions separately yielded better
-    # performance than considering each case and calling `sincos_kernel`.
-    return (sind(x), cosd(x))
-end
+sincosd(x) = (sind(x), cosd(x))
+# It turns out that calling these functions separately yields better
+# performance than considering each case and calling `sincos_kernel`.
 
 sincosd(::Missing) = (missing, missing)
 
 for (fd, f, fn) in ((:sind, :sin, "sine"), (:cosd, :cos, "cosine"), (:tand, :tan, "tangent"))
-    name = string(fd)
-    @eval begin
-        @doc """
-            $($name)(x)
-        Compute $($fn) of `x`, where `x` is in degrees. """ ($fd)(z) = ($f)(deg2rad(z))
+    for (fu, un) in ((:deg2rad, "degrees"),)
+        name = string(fd)
+        @eval begin
+            @doc """
+                $($name)(x)
+
+            Compute $($fn) of `x`, where `x` is in $($un).
+            If `x` is a matrix, `x` needs to be a square matrix.
+
+            !!! compat "Julia 1.7"
+                Matrix arguments require Julia 1.7 or later.
+            """ ($fd)(x) = ($f)(($fu).(x))
+        end
     end
 end
 
 for (fd, f, fn) in ((:asind, :asin, "sine"), (:acosd, :acos, "cosine"),
                     (:asecd, :asec, "secant"), (:acscd, :acsc, "cosecant"), (:acotd, :acot, "cotangent"))
-    name = string(fd)
-    @eval begin
-        @doc """
-            $($name)(x)
 
-        Compute the inverse $($fn) of `x`, where the output is in degrees. """ ($fd)(y) = rad2deg(($f)(y))
+    for (fu, un) in ((:rad2deg, "degrees"),)
+        name = string(fd)
+        @eval begin
+            @doc """
+                $($name)(x)
+
+            Compute the inverse $($fn) of `x`, where the output is in $($un).
+            If `x` is a matrix, `x` needs to be a square matrix.
+
+            !!! compat "Julia 1.7"
+                Matrix arguments require Julia 1.7 or later.
+            """ ($fd)(x) = ($fu).(($f)(x))
+        end
     end
 end
 
@@ -1289,6 +1301,9 @@ end
     atand(y,x)
 
 Compute the inverse tangent of `y` or `y/x`, respectively, where the output is in degrees.
+
+!!! compat "Julia 1.7"
+    The one-argument method supports square matrix arguments as of Julia 1.7.
 """
-atand(y)    = rad2deg(atan(y))
-atand(y, x) = rad2deg(atan(y,x))
+atand(y)    = rad2deg.(atan(y))
+atand(y, x) = rad2deg.(atan(y,x))
diff --git a/base/stacktraces.jl b/base/stacktraces.jl
index ec366b0def1985..3cb81d82bd3f79 100644
--- a/base/stacktraces.jl
+++ b/base/stacktraces.jl
@@ -103,7 +103,7 @@ Given a pointer to an execution context (usually generated by a call to `backtra
 up stack frame context information. Returns an array of frame information for all functions
 inlined at that point, innermost function first.
 """
-function lookup(pointer::Ptr{Cvoid})
+Base.@constprop :none function lookup(pointer::Ptr{Cvoid})
     infos = ccall(:jl_lookup_code_address, Any, (Ptr{Cvoid}, Cint), pointer, false)::Core.SimpleVector
     pointer = convert(UInt64, pointer)
     isempty(infos) && return [StackFrame(empty_sym, empty_sym, -1, nothing, true, false, pointer)] # this is equal to UNKNOWN
@@ -111,41 +111,41 @@ function lookup(pointer::Ptr{Cvoid})
     for i in 1:length(infos)
         info = infos[i]::Core.SimpleVector
         @assert(length(info) == 6)
-        res[i] = StackFrame(info[1], info[2], info[3], info[4], info[5], info[6], pointer)
+        res[i] = StackFrame(info[1]::Symbol, info[2]::Symbol, info[3]::Int, info[4], info[5]::Bool, info[6]::Bool, pointer)
     end
     return res
 end
 
 const top_level_scope_sym = Symbol("top-level scope")
 
-function lookup(ip::Base.InterpreterIP)
-    if ip.code isa MethodInstance && ip.code.def isa Method
-        codeinfo = ip.code.uninferred
-        func = ip.code.def.name
-        file = ip.code.def.file
-        line = ip.code.def.line
-    elseif ip.code === nothing
+function lookup(ip::Union{Base.InterpreterIP,Core.Compiler.InterpreterIP})
+    code = ip.code
+    if code === nothing
         # interpreted top-level expression with no CodeInfo
         return [StackFrame(top_level_scope_sym, empty_sym, 0, nothing, false, false, 0)]
+    end
+    codeinfo = (code isa MethodInstance ? code.uninferred : code)::CodeInfo
+    # prepare approximate code info
+    if code isa MethodInstance && (meth = code.def; meth isa Method)
+        func = meth.name
+        file = meth.file
+        line = meth.line
     else
-        @assert ip.code isa CodeInfo
-        codeinfo = ip.code
         func = top_level_scope_sym
         file = empty_sym
-        line = 0
+        line = Int32(0)
     end
     i = max(ip.stmt+1, 1)  # ip.stmt is 0-indexed
     if i > length(codeinfo.codelocs) || codeinfo.codelocs[i] == 0
-        return [StackFrame(func, file, line, ip.code, false, false, 0)]
+        return [StackFrame(func, file, line, code, false, false, 0)]
     end
-    lineinfo = codeinfo.linetable[codeinfo.codelocs[i]]
+    lineinfo = codeinfo.linetable[codeinfo.codelocs[i]]::Core.LineInfoNode
     scopes = StackFrame[]
     while true
-        push!(scopes, StackFrame(lineinfo.method, lineinfo.file, lineinfo.line, ip.code, false, false, 0))
-        if lineinfo.inlined_at == 0
-            break
-        end
-        lineinfo = codeinfo.linetable[lineinfo.inlined_at]
+        inlined = lineinfo.inlined_at != 0
+        push!(scopes, StackFrame(Base.IRShow.method_name(lineinfo)::Symbol, lineinfo.file, lineinfo.line, inlined ? nothing : code, false, inlined, 0))
+        inlined || break
+        lineinfo = codeinfo.linetable[lineinfo.inlined_at]::Core.LineInfoNode
     end
     return scopes
 end
@@ -157,7 +157,7 @@ Returns a stack trace in the form of a vector of `StackFrame`s. (By default stac
 doesn't return C functions, but this can be enabled.) When called without specifying a
 trace, `stacktrace` first calls `backtrace`.
 """
-function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Ptr{Cvoid}}}, c_funcs::Bool=false)
+Base.@constprop :none function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Core.Compiler.InterpreterIP,Ptr{Cvoid}}}, c_funcs::Bool=false)
     stack = StackTrace()
     for ip in trace
         for frame in lookup(ip)
@@ -170,7 +170,7 @@ function stacktrace(trace::Vector{<:Union{Base.InterpreterIP,Ptr{Cvoid}}}, c_fun
     return stack
 end
 
-function stacktrace(c_funcs::Bool=false)
+Base.@constprop :none function stacktrace(c_funcs::Bool=false)
     stack = stacktrace(backtrace(), c_funcs)
     # Remove frame for this function (and any functions called by this function).
     remove_frames!(stack, :stacktrace)
@@ -217,7 +217,7 @@ function show_spec_linfo(io::IO, frame::StackFrame)
         elseif frame.func === top_level_scope_sym
             print(io, "top-level scope")
         else
-            print(io, Base.demangle_function_name(string(frame.func)))
+            Base.print_within_stacktrace(io, Base.demangle_function_name(string(frame.func)), bold=true)
         end
     elseif linfo isa MethodInstance
         def = linfo.def
@@ -236,12 +236,15 @@ function show_spec_linfo(io::IO, frame::StackFrame)
                         kwnames[i] = Symbol(str[1:end-3])
                     end
                 end
-                Base.show_tuple_as_call(io, def.name, pos_sig, true, zip(kwnames, kwarg_types), argnames[def.nkw+2:end])
+                Base.show_tuple_as_call(io, def.name, pos_sig;
+                                        demangle=true,
+                                        kwargs=zip(kwnames, kwarg_types),
+                                        argnames=argnames[def.nkw+2:end])
             else
-                Base.show_tuple_as_call(io, def.name, sig, true, nothing, argnames)
+                Base.show_tuple_as_call(io, def.name, sig; demangle=true, argnames)
             end
         else
-            Base.show(io, linfo)
+            Base.show_mi(io, linfo, true)
         end
     elseif linfo isa CodeInfo
         print(io, "top-level scope")
@@ -266,8 +269,9 @@ function show(io::IO, frame::StackFrame)
 end
 
 function Base.parentmodule(frame::StackFrame)
-    if frame.linfo isa MethodInstance
-        def = frame.linfo.def
+    linfo = frame.linfo
+    if linfo isa MethodInstance
+        def = linfo.def
         if def isa Module
             return def
         else
diff --git a/base/stat.jl b/base/stat.jl
index 15bbe0b34d2dd2..f38a82634dc2fe 100644
--- a/base/stat.jl
+++ b/base/stat.jl
@@ -26,6 +26,7 @@ export
     uperm
 
 struct StatStruct
+    desc    :: Union{String, OS_HANDLE} # for show method, not included in equality or hash
     device  :: UInt
     inode   :: UInt
     mode    :: UInt
@@ -40,9 +41,25 @@ struct StatStruct
     ctime   :: Float64
 end
 
-StatStruct() = StatStruct(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+@eval function Base.:(==)(x::StatStruct, y::StatStruct) # do not include `desc` in equality or hash
+  $(let ex = true
+        for fld in fieldnames(StatStruct)[2:end]
+            ex = :(getfield(x, $(QuoteNode(fld))) === getfield(y, $(QuoteNode(fld))) && $ex)
+        end
+        Expr(:return, ex)
+    end)
+end
+@eval function Base.hash(obj::StatStruct, h::UInt)
+  $(quote
+        $(Any[:(h = hash(getfield(obj, $(QuoteNode(fld))), h)) for fld in fieldnames(StatStruct)[2:end]]...)
+        return h
+    end)
+end
 
-StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+StatStruct() = StatStruct("", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
+StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct("", buf)
+StatStruct(desc::Union{AbstractString, OS_HANDLE}, buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
+    desc isa OS_HANDLE ? desc : String(desc),
     ccall(:jl_stat_dev,     UInt32,  (Ptr{UInt8},), buf),
     ccall(:jl_stat_ino,     UInt32,  (Ptr{UInt8},), buf),
     ccall(:jl_stat_mode,    UInt32,  (Ptr{UInt8},), buf),
@@ -57,18 +74,84 @@ StatStruct(buf::Union{Vector{UInt8},Ptr{UInt8}}) = StatStruct(
     ccall(:jl_stat_ctime,   Float64, (Ptr{UInt8},), buf),
 )
 
-show(io::IO, st::StatStruct) = print(io, "StatStruct(mode=0o$(string(filemode(st), base = 8, pad = 6)), size=$(filesize(st)))")
+function iso_datetime_with_relative(t, tnow)
+    str = Libc.strftime("%FT%T%z", t)
+    secdiff = t - tnow
+    for (d, name) in ((24*60*60, "day"), (60*60, "hour"), (60, "minute"), (1, "second"))
+        tdiff = round(Int, div(abs(secdiff), d))
+        if tdiff != 0 # find first unit difference
+            plural = tdiff == 1 ? "" : "s"
+            when = secdiff < 0 ? "ago" : "in the future"
+            return "$str ($tdiff $name$plural $when)"
+        end
+    end
+    return "$str (just now)"
+end
+
+
+function getusername(uid::Unsigned)
+    pwd = Libc.getpwuid(uid, false)
+    pwd === nothing && return
+    isempty(pwd.username) && return
+    return pwd.username
+end
+
+function getgroupname(gid::Unsigned)
+    gp = Libc.getgrgid(gid, false)
+    gp === nothing && return
+    isempty(gp.groupname) && return
+    return gp.groupname
+end
+
+function show_statstruct(io::IO, st::StatStruct, oneline::Bool)
+    print(io, oneline ? "StatStruct(" : "StatStruct for ")
+    show(io, st.desc)
+    oneline || print(io, "\n  ")
+    print(io, " size: ", st.size, " bytes")
+    oneline || print(io, "\n")
+    print(io, " device: ", st.device)
+    oneline || print(io, "\n ")
+    print(io, " inode: ", st.inode)
+    oneline || print(io, "\n  ")
+    print(io, " mode: 0o", string(filemode(st), base = 8, pad = 6), " (", filemode_string(st), ")")
+    oneline || print(io, "\n ")
+    print(io, " nlink: ", st.nlink)
+    oneline || print(io, "\n   ")
+    print(io, " uid: $(st.uid)")
+    username = getusername(st.uid)
+    username === nothing || print(io, " (", username, ")")
+    oneline || print(io, "\n   ")
+    print(io, " gid: ", st.gid)
+    groupname = getgroupname(st.gid)
+    groupname === nothing || print(io, " (", groupname, ")")
+    oneline || print(io, "\n  ")
+    print(io, " rdev: ", st.rdev)
+    oneline || print(io, "\n ")
+    print(io, " blksz: ", st.blksize)
+    oneline || print(io, "\n")
+    print(io, " blocks: ", st.blocks)
+    tnow = round(UInt, time())
+    oneline || print(io, "\n ")
+    print(io, " mtime: ", iso_datetime_with_relative(st.mtime, tnow))
+    oneline || print(io, "\n ")
+    print(io, " ctime: ", iso_datetime_with_relative(st.ctime, tnow))
+    oneline && print(io, ")")
+    return nothing
+end
+
+show(io::IO, st::StatStruct) = show_statstruct(io, st, true)
+show(io::IO, ::MIME"text/plain", st::StatStruct) = show_statstruct(io, st, false)
 
 # stat & lstat functions
 
 macro stat_call(sym, arg1type, arg)
     return quote
-        stat_buf = zeros(UInt8, ccall(:jl_sizeof_stat, Int32, ()))
+        stat_buf = zeros(UInt8, Int(ccall(:jl_sizeof_stat, Int32, ())))
         r = ccall($(Expr(:quote, sym)), Int32, ($(esc(arg1type)), Ptr{UInt8}), $(esc(arg)), stat_buf)
         if !(r in (0, Base.UV_ENOENT, Base.UV_ENOTDIR, Base.UV_EINVAL))
-            throw(_UVError("stat", r, "for file ", repr($(esc(arg)))))
+            uv_error(string("stat(", repr($(esc(arg))), ")"), r)
         end
-        st = StatStruct(stat_buf)
+        st = StatStruct($(esc(arg)), stat_buf)
         if ispath(st) != (r == 0)
             error("stat returned zero type for a valid path")
         end
@@ -92,6 +175,7 @@ The fields of the structure are:
 
 | Name    | Description                                                        |
 |:--------|:-------------------------------------------------------------------|
+| desc    | The path or OS file descriptor                                     |
 | size    | The size (in bytes) of the file                                    |
 | device  | ID of the device that contains the file                            |
 | inode   | The inode number of the file                                       |
@@ -103,7 +187,7 @@ The fields of the structure are:
 | blksize | The file-system preferred block size for the file                  |
 | blocks  | The number of such blocks allocated                                |
 | mtime   | Unix timestamp of when the file was last modified                  |
-| ctime   | Unix timestamp of when the file was created                        |
+| ctime   | Unix timestamp of when the file's metadata was changed             |
 
 """
 stat(path...) = stat(joinpath(path...))
@@ -120,12 +204,73 @@ lstat(path...) = lstat(joinpath(path...))
 
 # some convenience functions
 
+const filemode_table = (
+    [
+        (S_IFLNK, "l"),
+        (S_IFSOCK, "s"),  # Must appear before IFREG and IFDIR as IFSOCK == IFREG | IFDIR
+        (S_IFREG, "-"),
+        (S_IFBLK, "b"),
+        (S_IFDIR, "d"),
+        (S_IFCHR, "c"),
+        (S_IFIFO, "p")
+    ],
+    [
+        (S_IRUSR, "r"),
+    ],
+    [
+        (S_IWUSR, "w"),
+    ],
+    [
+        (S_IXUSR|S_ISUID, "s"),
+        (S_ISUID, "S"),
+        (S_IXUSR, "x")
+    ],
+    [
+        (S_IRGRP, "r"),
+    ],
+    [
+        (S_IWGRP, "w"),
+    ],
+    [
+        (S_IXGRP|S_ISGID, "s"),
+        (S_ISGID, "S"),
+        (S_IXGRP, "x")
+    ],
+    [
+        (S_IROTH, "r"),
+    ],
+    [
+        (S_IWOTH, "w"),
+    ],
+    [
+        (S_IXOTH|S_ISVTX, "t"),
+        (S_ISVTX, "T"),
+        (S_IXOTH, "x")
+    ]
+)
+
 """
     filemode(file)
 
 Equivalent to `stat(file).mode`.
 """
 filemode(st::StatStruct) = st.mode
+filemode_string(st::StatStruct) = filemode_string(st.mode)
+function filemode_string(mode)
+    str = IOBuffer()
+    for table in filemode_table
+        complete = true
+        for (bit, char) in table
+            if mode & bit == bit
+                write(str, char)
+                complete = false
+                break
+            end
+        end
+        complete && write(str, "-")
+    end
+    return String(take!(str))
+end
 
 """
     filesize(path...)
@@ -187,7 +332,7 @@ julia> isdir("not/a/directory")
 false
 ```
 
-See also: [`isfile`](@ref) and [`ispath`](@ref).
+See also [`isfile`](@ref) and [`ispath`](@ref).
 """
 isdir(st::StatStruct) = filemode(st) & 0xf000 == 0x4000
 
@@ -216,7 +361,7 @@ true
 julia> close(f); rm("test_file.txt")
 ```
 
-See also: [`isdir`](@ref) and [`ispath`](@ref).
+See also [`isdir`](@ref) and [`ispath`](@ref).
 """
 isfile(st::StatStruct) = filemode(st) & 0xf000 == 0x8000
 
diff --git a/base/stream.jl b/base/stream.jl
index 9ce58744b53f17..948c12ad604b44 100644
--- a/base/stream.jl
+++ b/base/stream.jl
@@ -43,7 +43,7 @@ end
 
 An abstract type for IO streams handled by libuv.
 
-If`stream isa LibuvStream`, it must obey the following interface:
+If `stream isa LibuvStream`, it must obey the following interface:
 
 - `stream.handle`, if present, must be a `Ptr{Cvoid}`
 - `stream.status`, if present, must be an `Int`
@@ -109,7 +109,7 @@ function eof(s::LibuvStream)
     # and that we won't return true if there's a readerror pending (it'll instead get thrown).
     # This requires some careful ordering here (TODO: atomic loads)
     bytesavailable(s) > 0 && return false
-    open = isopen(s) # must precede readerror check
+    open = isreadable(s) # must precede readerror check
     s.readerror === nothing || throw(s.readerror)
     return !open
 end
@@ -270,6 +270,7 @@ show(io::IO, stream::LibuvStream) = print(io, typeof(stream), "(",
 function isreadable(io::LibuvStream)
     bytesavailable(io) > 0 && return true
     isopen(io) || return false
+    io.status == StatusEOF && return false
     return ccall(:uv_is_readable, Cint, (Ptr{Cvoid},), io.handle) != 0
 end
 
@@ -282,6 +283,7 @@ end
 lock(s::LibuvStream) = lock(s.lock)
 unlock(s::LibuvStream) = unlock(s.lock)
 
+setup_stdio(stream::LibuvStream, ::Bool) = (stream, false)
 rawhandle(stream::LibuvStream) = stream.handle
 unsafe_convert(::Type{Ptr{Cvoid}}, s::Union{LibuvStream, LibuvServer}) = s.handle
 
@@ -375,10 +377,10 @@ if OS_HANDLE != RawFD
 end
 
 function isopen(x::Union{LibuvStream, LibuvServer})
-    if x.status == StatusUninit || x.status == StatusInit
+    if x.status == StatusUninit || x.status == StatusInit || x.handle === C_NULL
         throw(ArgumentError("$x is not initialized"))
     end
-    return x.status != StatusClosed && x.status != StatusEOF
+    return x.status != StatusClosed
 end
 
 function check_open(x::Union{LibuvStream, LibuvServer})
@@ -390,13 +392,13 @@ end
 function wait_readnb(x::LibuvStream, nb::Int)
     # fast path before iolock acquire
     bytesavailable(x.buffer) >= nb && return
-    open = isopen(x) # must precede readerror check
+    open = isopen(x) && x.status != StatusEOF # must precede readerror check
     x.readerror === nothing || throw(x.readerror)
     open || return
     iolock_begin()
     # repeat fast path after iolock acquire, before other expensive work
     bytesavailable(x.buffer) >= nb && (iolock_end(); return)
-    open = isopen(x)
+    open = isopen(x) && x.status != StatusEOF
     x.readerror === nothing || throw(x.readerror)
     open || (iolock_end(); return)
     # now do the "real" work
@@ -407,6 +409,7 @@ function wait_readnb(x::LibuvStream, nb::Int)
         while bytesavailable(x.buffer) < nb
             x.readerror === nothing || throw(x.readerror)
             isopen(x) || break
+            x.status != StatusEOF || break
             x.throttle = max(nb, x.throttle)
             start_reading(x) # ensure we are reading
             iolock_end()
@@ -431,6 +434,52 @@ function wait_readnb(x::LibuvStream, nb::Int)
     nothing
 end
 
+function closewrite(s::LibuvStream)
+    iolock_begin()
+    check_open(s)
+    req = Libc.malloc(_sizeof_uv_shutdown)
+    uv_req_set_data(req, C_NULL) # in case we get interrupted before arriving at the wait call
+    err = ccall(:uv_shutdown, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}),
+                req, s, @cfunction(uv_shutdowncb_task, Cvoid, (Ptr{Cvoid}, Cint)))
+    if err < 0
+        Libc.free(req)
+        uv_error("shutdown", err)
+    end
+    ct = current_task()
+    preserve_handle(ct)
+    sigatomic_begin()
+    uv_req_set_data(req, ct)
+    iolock_end()
+    status = try
+        sigatomic_end()
+        wait()::Cint
+    finally
+        # try-finally unwinds the sigatomic level, so need to repeat sigatomic_end
+        sigatomic_end()
+        iolock_begin()
+        ct.queue === nothing || list_deletefirst!(ct.queue, ct)
+        if uv_req_data(req) != C_NULL
+            # req is still alive,
+            # so make sure we won't get spurious notifications later
+            uv_req_set_data(req, C_NULL)
+        else
+            # done with req
+            Libc.free(req)
+        end
+        iolock_end()
+        unpreserve_handle(ct)
+    end
+    if isopen(s)
+        if status < 0 || ccall(:uv_is_readable, Cint, (Ptr{Cvoid},), s.handle) == 0
+            close(s)
+        end
+    end
+    if status < 0
+        throw(_UVError("shutdown", status))
+    end
+    nothing
+end
+
 function wait_close(x::Union{LibuvStream, LibuvServer})
     preserve_handle(x)
     lock(x.cond)
@@ -447,34 +496,37 @@ end
 
 function close(stream::Union{LibuvStream, LibuvServer})
     iolock_begin()
-    should_wait = false
     if stream.status == StatusInit
         ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
         stream.status = StatusClosing
-    elseif isopen(stream) || stream.status == StatusEOF
-        should_wait = uv_handle_data(stream) != C_NULL
+    elseif isopen(stream)
         if stream.status != StatusClosing
             ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
             stream.status = StatusClosing
         end
     end
     iolock_end()
-    should_wait && wait_close(stream)
+    wait_close(stream)
     nothing
 end
 
 function uvfinalize(uv::Union{LibuvStream, LibuvServer})
-    uv.handle == C_NULL && return
     iolock_begin()
     if uv.handle != C_NULL
-        disassociate_julia_struct(uv.handle) # not going to call the usual close hooks
-        if uv.status != StatusUninit
-            close(uv)
-        else
+        disassociate_julia_struct(uv.handle) # not going to call the usual close hooks (so preserve_handle is not needed)
+        if uv.status == StatusUninit
+            Libc.free(uv.handle)
+        elseif uv.status == StatusInit
+            ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), uv.handle)
+        elseif isopen(uv)
+            if uv.status != StatusClosing
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), uv.handle)
+            end
+        elseif uv.status == StatusClosed
             Libc.free(uv.handle)
         end
-        uv.status = StatusClosed
         uv.handle = C_NULL
+        uv.status = StatusClosed
     end
     iolock_end()
     nothing
@@ -503,7 +555,7 @@ julia> withenv("LINES" => 30, "COLUMNS" => 100) do
 
 To get your TTY size,
 
-```julia
+```julia-repl
 julia> displaysize(stdout)
 (34, 147)
 ```
@@ -554,7 +606,7 @@ function alloc_request(buffer::IOBuffer, recommended_size::UInt)
     ensureroom(buffer, Int(recommended_size))
     ptr = buffer.append ? buffer.size + 1 : buffer.ptr
     nb = min(length(buffer.data), buffer.maxsize) - ptr + 1
-    return (pointer(buffer.data, ptr), nb)
+    return (Ptr{Cvoid}(pointer(buffer.data, ptr)), nb)
 end
 
 notify_filled(buffer::IOBuffer, nread::Int, base::Ptr{Cvoid}, len::UInt) = notify_filled(buffer, nread)
@@ -606,35 +658,33 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
     nrequested = ccall(:jl_uv_buf_len, Csize_t, (Ptr{Cvoid},), buf)
     function readcb_specialized(stream::LibuvStream, nread::Int, nrequested::UInt)
         lock(stream.cond)
-        try
-            if nread < 0
-                if nread == UV_ENOBUFS && nrequested == 0
-                    # remind the client that stream.buffer is full
-                    notify(stream.cond)
-                elseif nread == UV_EOF
-                    if isa(stream, TTY)
-                        stream.status = StatusEOF # libuv called uv_stop_reading already
+        if nread < 0
+            if nread == UV_ENOBUFS && nrequested == 0
+                # remind the client that stream.buffer is full
+                notify(stream.cond)
+            elseif nread == UV_EOF # libuv called uv_stop_reading already
+                if stream.status != StatusClosing
+                    stream.status = StatusEOF
+                    if stream isa TTY # TODO: || ccall(:uv_is_writable, Cint, (Ptr{Cvoid},), stream.handle) != 0
+                        # stream can still be used either by reseteof # TODO: or write
                         notify(stream.cond)
-                    elseif stream.status != StatusClosing
-                        # begin shutdown of the stream
+                    else
+                        # underlying stream is no longer useful: begin finalization
                         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
                         stream.status = StatusClosing
                     end
-                else
-                    stream.readerror = _UVError("read", nread)
-                    # This is a fatal connection error. Shutdown requests as per the usual
-                    # close function won't work and libuv will fail with an assertion failure
-                    ccall(:jl_forceclose_uv, Cvoid, (Ptr{Cvoid},), stream)
-                    stream.status = StatusClosing
-                    notify(stream.cond)
                 end
             else
-                notify_filled(stream.buffer, nread)
-                notify(stream.cond)
+                stream.readerror = _UVError("read", nread)
+                # This is a fatal connection error
+                ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), stream.handle)
+                stream.status = StatusClosing
             end
-        finally
-            unlock(stream.cond)
+        else
+            notify_filled(stream.buffer, nread)
+            notify(stream.cond)
         end
+        unlock(stream.cond)
 
         # Stop background reading when
         # 1) there's nobody paying attention to the data we are reading
@@ -651,6 +701,7 @@ function uv_readcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid})
         nothing
     end
     readcb_specialized(stream_unknown_type, Int(nread), UInt(nrequested))
+    nothing
 end
 
 function reseteof(x::TTY)
@@ -665,7 +716,6 @@ end
 function _uv_hook_close(uv::Union{LibuvStream, LibuvServer})
     lock(uv.cond)
     try
-        uv.handle = C_NULL
         uv.status = StatusClosed
         # notify any listeners that exist on this libuv stream type
         notify(uv.cond)
@@ -844,6 +894,7 @@ function readbytes!(s::LibuvStream, a::Vector{UInt8}, nb::Int)
         while bytesavailable(buf) < nb
             s.readerror === nothing || throw(s.readerror)
             isopen(s) || break
+            s.status != StatusEOF || break
             iolock_end()
             wait_readnb(s, nb)
             iolock_begin()
@@ -890,6 +941,7 @@ function unsafe_read(s::LibuvStream, p::Ptr{UInt8}, nb::UInt)
         while bytesavailable(buf) < nb
             s.readerror === nothing || throw(s.readerror)
             isopen(s) || throw(EOFError())
+            s.status != StatusEOF || throw(EOFError())
             iolock_end()
             wait_readnb(s, nb)
             iolock_begin()
@@ -946,13 +998,14 @@ function readuntil(x::LibuvStream, c::UInt8; keep::Bool=false)
     @assert buf.seekable == false
     if !occursin(c, buf) # fast path checks first
         x.readerror === nothing || throw(x.readerror)
-        if isopen(x)
+        if isopen(x) && x.status != StatusEOF
             preserve_handle(x)
             lock(x.cond)
             try
                 while !occursin(c, x.buffer)
                     x.readerror === nothing || throw(x.readerror)
                     isopen(x) || break
+                    x.status != StatusEOF || break
                     start_reading(x) # ensure we are reading
                     iolock_end()
                     wait(x.cond)
@@ -1115,128 +1168,273 @@ function uv_writecb_task(req::Ptr{Cvoid}, status::Cint)
     nothing
 end
 
+function uv_shutdowncb_task(req::Ptr{Cvoid}, status::Cint)
+    d = uv_req_data(req)
+    if d != C_NULL
+        uv_req_set_data(req, C_NULL) # let the Task know we got the shutdowncb
+        t = unsafe_pointer_to_objref(d)::Task
+        schedule(t, status)
+    else
+        # no owner for this req, safe to just free it
+        Libc.free(req)
+    end
+    nothing
+end
+
+
 _fd(x::IOStream) = RawFD(fd(x))
 _fd(x::Union{OS_HANDLE, RawFD}) = x
 
 function _fd(x::Union{LibuvStream, LibuvServer})
     fd = Ref{OS_HANDLE}(INVALID_OS_HANDLE)
-    if x.status != StatusUninit && x.status != StatusClosed
+    if x.status != StatusUninit && x.status != StatusClosed && x.handle != C_NULL
         err = ccall(:uv_fileno, Int32, (Ptr{Cvoid}, Ptr{OS_HANDLE}), x.handle, fd)
         # handle errors by returning INVALID_OS_HANDLE
     end
     return fd[]
 end
 
-for (x, writable, unix_fd, c_symbol) in
-        ((:stdin, false, 0, :jl_uv_stdin),
-         (:stdout, true, 1, :jl_uv_stdout),
-         (:stderr, true, 2, :jl_uv_stderr))
-    f = Symbol("redirect_", lowercase(string(x)))
-    _f = Symbol("_", f)
-    @eval begin
-        function ($_f)(stream)
-            global $x
-            posix_fd = _fd(stream)
-            @static if Sys.iswindows()
-                ccall(:SetStdHandle, stdcall, Int32, (Int32, OS_HANDLE),
-                    $(-10 - unix_fd), Libc._get_osfhandle(posix_fd))
-            end
-            dup(posix_fd, RawFD($unix_fd))
-            $x = stream
-            nothing
-        end
-        function ($f)(handle::Union{LibuvStream, IOStream})
-            $(_f)(handle)
-            unsafe_store!(cglobal($(Expr(:quote, c_symbol)), Ptr{Cvoid}),
-                handle.handle)
-            return handle
-        end
-        function ($f)()
-            p = link_pipe!(Pipe())
-            read, write = p.out, p.in
-            ($f)($(writable ? :write : :read))
-            return (read, write)
-        end
-        function ($f)(::DevNull)
-            global $x
-            nulldev = @static Sys.iswindows() ? "NUL" : "/dev/null"
-            handle = open(nulldev, write=$writable)
-            $(_f)(handle)
-            close(handle) # handle has been dup'ed in $(_f)
-            $x = devnull
-            return devnull
-        end
-        function ($f)(io::IOContext)
-            io2, _dict = unwrapcontext(io)
-            ($f)(io2)
-            global $x = io
-            return io
+struct RedirectStdStream <: Function
+    unix_fd::Int
+    writable::Bool
+end
+for (f, writable, unix_fd) in
+        ((:redirect_stdin, false, 0),
+         (:redirect_stdout, true, 1),
+         (:redirect_stderr, true, 2))
+    @eval const ($f) = RedirectStdStream($unix_fd, $writable)
+end
+function _redirect_io_libc(stream, unix_fd::Int)
+    posix_fd = _fd(stream)
+    @static if Sys.iswindows()
+        if 0 <= unix_fd <= 2
+            ccall(:SetStdHandle, stdcall, Int32, (Int32, OS_HANDLE),
+                -10 - unix_fd, Libc._get_osfhandle(posix_fd))
         end
     end
+    dup(posix_fd, RawFD(unix_fd))
+    nothing
+end
+function _redirect_io_global(io, unix_fd::Int)
+    unix_fd == 0 && (global stdin = io)
+    unix_fd == 1 && (global stdout = io)
+    unix_fd == 2 && (global stderr = io)
+    nothing
+end
+function (f::RedirectStdStream)(handle::Union{LibuvStream, IOStream})
+    _redirect_io_libc(handle, f.unix_fd)
+    c_sym = f.unix_fd == 0 ? cglobal(:jl_uv_stdin, Ptr{Cvoid}) :
+            f.unix_fd == 1 ? cglobal(:jl_uv_stdout, Ptr{Cvoid}) :
+            f.unix_fd == 2 ? cglobal(:jl_uv_stderr, Ptr{Cvoid}) :
+            C_NULL
+    c_sym == C_NULL || unsafe_store!(c_sym, handle.handle)
+    _redirect_io_global(handle, f.unix_fd)
+    return handle
+end
+function (f::RedirectStdStream)(::DevNull)
+    nulldev = @static Sys.iswindows() ? "NUL" : "/dev/null"
+    handle = open(nulldev, write=f.writable)
+    _redirect_io_libc(handle, f.unix_fd)
+    close(handle) # handle has been dup'ed in _redirect_io_libc
+    _redirect_io_global(devnull, f.unix_fd)
+    return devnull
+end
+function (f::RedirectStdStream)(io::AbstractPipe)
+    io2 = (f.writable ? pipe_writer : pipe_reader)(io)
+    f(io2)
+    _redirect_io_global(io, f.unix_fd)
+    return io
 end
+function (f::RedirectStdStream)(p::Pipe)
+    if p.in.status == StatusInit && p.out.status == StatusInit
+        link_pipe!(p)
+    end
+    io2 = getfield(p, f.writable ? :in : :out)
+    f(io2)
+    return p
+end
+(f::RedirectStdStream)() = f(Pipe())
+
+# Deprecate these in v2 (RedirectStdStream support)
+iterate(p::Pipe) = (p.out, 1)
+iterate(p::Pipe, i::Int) = i == 1 ? (p.in, 2) : nothing
+getindex(p::Pipe, key::Int) = key == 1 ? p.out : key == 2 ? p.in : throw(KeyError(key))
 
 """
-    redirect_stdout([stream]) -> (rd, wr)
+    redirect_stdout([stream]) -> stream
 
 Create a pipe to which all C and Julia level [`stdout`](@ref) output
-will be redirected.
-Returns a tuple `(rd, wr)` representing the pipe ends.
+will be redirected. Return a stream representing the pipe ends.
 Data written to [`stdout`](@ref) may now be read from the `rd` end of
-the pipe. The `wr` end is given for convenience in case the old
-[`stdout`](@ref) object was cached by the user and needs to be replaced
-elsewhere.
-
-If called with the optional `stream` argument, then returns `stream` itself.
+the pipe.
 
 !!! note
-    `stream` must be an `IOStream`, a `TTY`, a `Pipe`, a socket, or `devnull`.
+    `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
+    `Pipe`, socket, or `devnull`.
+
+See also [`redirect_stdio`](@ref).
 """
 redirect_stdout
 
 """
-    redirect_stderr([stream]) -> (rd, wr)
+    redirect_stderr([stream]) -> stream
 
 Like [`redirect_stdout`](@ref), but for [`stderr`](@ref).
 
 !!! note
-    `stream` must be an `IOStream`, a `TTY`, a `Pipe`, a socket, or `devnull`.
+    `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
+    `Pipe`, socket, or `devnull`.
+
+See also [`redirect_stdio`](@ref).
 """
 redirect_stderr
 
 """
-    redirect_stdin([stream]) -> (rd, wr)
+    redirect_stdin([stream]) -> stream
 
 Like [`redirect_stdout`](@ref), but for [`stdin`](@ref).
-Note that the order of the return tuple is still `(rd, wr)`,
-i.e. data to be read from [`stdin`](@ref) may be written to `wr`.
+Note that the direction of the stream is reversed.
 
 !!! note
-    `stream` must be an `IOStream`, a `TTY`, a `Pipe`, a socket, or `devnull`.
+    `stream` must be a compatible objects, such as an `IOStream`, `TTY`,
+    `Pipe`, socket, or `devnull`.
+
+See also [`redirect_stdio`](@ref).
 """
 redirect_stdin
 
-for (F,S) in ((:redirect_stdin, :stdin), (:redirect_stdout, :stdout), (:redirect_stderr, :stderr))
-    @eval function $F(f::Function, stream)
-        STDOLD = $S
-        local ret
-        $F(stream)
-        try
-            ret = f()
-        finally
-            $F(STDOLD)
-        end
-        ret
+"""
+    redirect_stdio(;stdin=stdin, stderr=stderr, stdout=stdout)
+
+Redirect a subset of the streams `stdin`, `stderr`, `stdout`.
+Each argument must be an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+
+!!! compat "Julia 1.7"
+    `redirect_stdio` requires Julia 1.7 or later.
+"""
+function redirect_stdio(;stdin=nothing, stderr=nothing, stdout=nothing)
+    stdin  === nothing || redirect_stdin(stdin)
+    stderr === nothing || redirect_stderr(stderr)
+    stdout === nothing || redirect_stdout(stdout)
+end
+
+"""
+    redirect_stdio(f; stdin=nothing, stderr=nothing, stdout=nothing)
+
+Redirect a subset of the streams `stdin`, `stderr`, `stdout`,
+call `f()` and restore each stream.
+
+Possible values for each stream are:
+* `nothing` indicating the stream should not be redirected.
+* `path::AbstractString` redirecting the stream to the file at `path`.
+* `io` an `IOStream`, `TTY`, `Pipe`, socket, or `devnull`.
+
+# Examples
+```julia-repl
+julia> redirect_stdio(stdout="stdout.txt", stderr="stderr.txt") do
+           print("hello stdout")
+           print(stderr, "hello stderr")
+       end
+
+julia> read("stdout.txt", String)
+"hello stdout"
+
+julia> read("stderr.txt", String)
+"hello stderr"
+```
+
+# Edge cases
+
+It is possible to pass the same argument to `stdout` and `stderr`:
+```julia-repl
+julia> redirect_stdio(stdout="log.txt", stderr="log.txt", stdin=devnull) do
+    ...
+end
+```
+
+However it is not supported to pass two distinct descriptors of the same file.
+```julia-repl
+julia> io1 = open("same/path", "w")
+
+julia> io2 = open("same/path", "w")
+
+julia> redirect_stdio(f, stdout=io1, stderr=io2) # not suppored
+```
+Also the `stdin` argument may not be the same descriptor as `stdout` or `stderr`.
+```julia-repl
+julia> io = open(...)
+
+julia> redirect_stdio(f, stdout=io, stdin=io) # not supported
+```
+
+!!! compat "Julia 1.7"
+    `redirect_stdio` requires Julia 1.7 or later.
+"""
+function redirect_stdio(f; stdin=nothing, stderr=nothing, stdout=nothing)
+
+    function resolve(new::Nothing, oldstream, mode)
+        (new=nothing, close=false, old=nothing)
+    end
+    function resolve(path::AbstractString, oldstream,mode)
+        (new=open(path, mode), close=true, old=oldstream)
+    end
+    function resolve(new, oldstream, mode)
+        (new=new, close=false, old=oldstream)
+    end
+
+    same_path(x, y) = false
+    function same_path(x::AbstractString, y::AbstractString)
+        # if x = y = "does_not_yet_exist.txt" then samefile will return false
+        (abspath(x) == abspath(y)) || samefile(x,y)
+    end
+    if same_path(stderr, stdin)
+        throw(ArgumentError("stdin and stderr cannot be the same path"))
+    end
+    if same_path(stdout, stdin)
+        throw(ArgumentError("stdin and stdout cannot be the same path"))
+    end
+
+    new_in , close_in , old_in  = resolve(stdin , Base.stdin , "r")
+    new_out, close_out, old_out = resolve(stdout, Base.stdout, "w")
+    if same_path(stderr, stdout)
+        # make sure that in case stderr = stdout = "same/path"
+        # only a single io is used instead of opening the same file twice
+        new_err, close_err, old_err = new_out, false, Base.stderr
+    else
+        new_err, close_err, old_err = resolve(stderr, Base.stderr, "w")
+    end
+
+    redirect_stdio(; stderr=new_err, stdin=new_in, stdout=new_out)
+
+    try
+        return f()
+    finally
+        redirect_stdio(;stderr=old_err, stdin=old_in, stdout=old_out)
+        close_err && close(new_err)
+        close_in  && close(new_in )
+        close_out && close(new_out)
+    end
+end
+
+function (f::RedirectStdStream)(thunk::Function, stream)
+    stdold = f.unix_fd == 0 ? stdin :
+             f.unix_fd == 1 ? stdout :
+             f.unix_fd == 2 ? stderr :
+             throw(ArgumentError("Not implemented to get old handle of fd except for stdio"))
+    f(stream)
+    try
+        return thunk()
+    finally
+        f(stdold)
     end
 end
 
+
 """
     redirect_stdout(f::Function, stream)
 
 Run the function `f` while redirecting [`stdout`](@ref) to `stream`.
 Upon completion, [`stdout`](@ref) is restored to its prior setting.
-
-!!! note
-    `stream` must be a `TTY`, a `Pipe`, or a socket.
 """
 redirect_stdout(f::Function, stream)
 
@@ -1245,9 +1443,6 @@ redirect_stdout(f::Function, stream)
 
 Run the function `f` while redirecting [`stderr`](@ref) to `stream`.
 Upon completion, [`stderr`](@ref) is restored to its prior setting.
-
-!!! note
-    `stream` must be a `TTY`, a `Pipe`, or a socket.
 """
 redirect_stderr(f::Function, stream)
 
@@ -1256,9 +1451,6 @@ redirect_stderr(f::Function, stream)
 
 Run the function `f` while redirecting [`stdin`](@ref) to `stream`.
 Upon completion, [`stdin`](@ref) is restored to its prior setting.
-
-!!! note
-    `stream` must be a `TTY`, a `Pipe`, or a socket.
 """
 redirect_stdin(f::Function, stream)
 
@@ -1280,23 +1472,26 @@ mutable struct BufferStream <: LibuvStream
     buffer::IOBuffer
     cond::Threads.Condition
     readerror::Any
-    is_open::Bool
     buffer_writes::Bool
     lock::ReentrantLock # advisory lock
+    status::Int
 
-    BufferStream() = new(PipeBuffer(), Threads.Condition(), nothing, true, false, ReentrantLock())
+    BufferStream() = new(PipeBuffer(), Threads.Condition(), nothing, false, ReentrantLock(), StatusActive)
 end
 
-isopen(s::BufferStream) = s.is_open
+isopen(s::BufferStream) = s.status != StatusClosed
+
+closewrite(s::BufferStream) = close(s)
 
 function close(s::BufferStream)
     lock(s.cond) do
-        s.is_open = false
+        s.status = StatusClosed
         notify(s.cond)
         nothing
     end
 end
 uvfinalize(s::BufferStream) = nothing
+setup_stdio(stream::BufferStream, child_readable::Bool) = invoke(setup_stdio, Tuple{IO, Bool}, stream, child_readable)
 
 function read(s::BufferStream, ::Type{UInt8})
     nread = lock(s.cond) do
@@ -1314,8 +1509,8 @@ function unsafe_read(s::BufferStream, a::Ptr{UInt8}, nb::UInt)
 end
 bytesavailable(s::BufferStream) = bytesavailable(s.buffer)
 
-isreadable(s::BufferStream) = s.buffer.readable
-iswritable(s::BufferStream) = s.buffer.writable
+isreadable(s::BufferStream) = (isopen(s) || bytesavailable(s) > 0) && s.buffer.readable
+iswritable(s::BufferStream) = isopen(s) && s.buffer.writable
 
 function wait_readnb(s::BufferStream, nb::Int)
     lock(s.cond) do
@@ -1325,7 +1520,7 @@ function wait_readnb(s::BufferStream, nb::Int)
     end
 end
 
-show(io::IO, s::BufferStream) = print(io, "BufferStream() bytes waiting:", bytesavailable(s.buffer), ", isopen:", s.is_open)
+show(io::IO, s::BufferStream) = print(io, "BufferStream(bytes waiting=", bytesavailable(s.buffer), ", isopen=", isopen(s), ")")
 
 function readuntil(s::BufferStream, c::UInt8; keep::Bool=false)
     bytes = lock(s.cond) do
@@ -1375,3 +1570,5 @@ function flush(s::BufferStream)
         nothing
     end
 end
+
+skip(s::BufferStream, n) = skip(s.buffer, n)
diff --git a/base/strings/basic.jl b/base/strings/basic.jl
index 82038d78635ac5..306ecc5cc214a7 100644
--- a/base/strings/basic.jl
+++ b/base/strings/basic.jl
@@ -16,7 +16,7 @@ about strings:
   * Each `AbstractChar` in a string is encoded by one or more code units
   * Only the index of the first code unit of an `AbstractChar` is a valid index
   * The encoding of an `AbstractChar` is independent of what precedes or follows it
-  * String encodings are [self-synchronizing] – i.e. `isvalid(s, i)` is O(1)
+  * String encodings are [self-synchronizing] – i.e. `isvalid(s, i)` is O(1)
 
 [self-synchronizing]: https://en.wikipedia.org/wiki/Self-synchronizing_code
 
@@ -35,8 +35,8 @@ model allows index arithmetic to work with out-of- bounds indices as
 intermediate values so long as one never uses them to retrieve a character,
 which often helps avoid needing to code around edge cases.
 
-See also: [`codeunit`](@ref), [`ncodeunits`](@ref), [`thisind`](@ref),
-[`nextind`](@ref), [`prevind`](@ref)
+See also [`codeunit`](@ref), [`ncodeunits`](@ref), [`thisind`](@ref),
+[`nextind`](@ref), [`prevind`](@ref).
 """
 AbstractString
 
@@ -46,8 +46,8 @@ AbstractString
     ncodeunits(s::AbstractString) -> Int
 
 Return the number of code units in a string. Indices that are in bounds to
-access this string must satisfy `1 ≤ i ≤ ncodeunits(s)`. Not all such indices
-are valid – they may not be the start of a character, but they will return a
+access this string must satisfy `1 ≤ i ≤ ncodeunits(s)`. Not all such indices
+are valid – they may not be the start of a character, but they will return a
 code unit value when calling `codeunit(s,i)`.
 
 # Examples
@@ -62,8 +62,8 @@ julia> ncodeunits('∫'), ncodeunits('e'), ncodeunits('ˣ')
 (3, 1, 2)
 ```
 
-See also: [`codeunit`](@ref), [`checkbounds`](@ref), [`sizeof`](@ref),
-[`length`](@ref), [`lastindex`](@ref)
+See also [`codeunit`](@ref), [`checkbounds`](@ref), [`sizeof`](@ref),
+[`length`](@ref), [`lastindex`](@ref).
 """
 ncodeunits(s::AbstractString)
 
@@ -77,7 +77,7 @@ limited to these three types, but it's hard to think of widely used string
 encodings that don't use one of these units. `codeunit(s)` is the same as
 `typeof(codeunit(s,1))` when `s` is a non-empty string.
 
-See also: [`ncodeunits`](@ref)
+See also [`ncodeunits`](@ref).
 """
 codeunit(s::AbstractString)
 
@@ -102,9 +102,9 @@ julia> typeof(a)
 UInt8
 ```
 
-See also: [`ncodeunits`](@ref), [`checkbounds`](@ref)
+See also [`ncodeunits`](@ref), [`checkbounds`](@ref).
 """
-@propagate_inbounds codeunit(s::AbstractString, i::Integer) = typeof(i) === Int ?
+@propagate_inbounds codeunit(s::AbstractString, i::Integer) = i isa Int ?
     throw(MethodError(codeunit, (s, i))) : codeunit(s, Int(i))
 
 """
@@ -118,8 +118,8 @@ In order for `isvalid(s, i)` to be an O(1) function, the encoding of `s` must be
 [self-synchronizing](https://en.wikipedia.org/wiki/Self-synchronizing_code). This
 is a basic assumption of Julia's generic string support.
 
-See also: [`getindex`](@ref), [`iterate`](@ref), [`thisind`](@ref),
-[`nextind`](@ref), [`prevind`](@ref), [`length`](@ref)
+See also [`getindex`](@ref), [`iterate`](@ref), [`thisind`](@ref),
+[`nextind`](@ref), [`prevind`](@ref), [`length`](@ref).
 
 # Examples
 ```jldoctest
@@ -140,7 +140,7 @@ Stacktrace:
 [...]
 ```
 """
-@propagate_inbounds isvalid(s::AbstractString, i::Integer) = typeof(i) === Int ?
+@propagate_inbounds isvalid(s::AbstractString, i::Integer) = i isa Int ?
     throw(MethodError(isvalid, (s, i))) : isvalid(s, Int(i))
 
 """
@@ -152,9 +152,9 @@ be iterated, yielding a sequences of characters. If `i` is out of bounds in `s`
 then a bounds error is raised. The `iterate` function, as part of the iteration
 protocol may assume that `i` is the start of a character in `s`.
 
-See also: [`getindex`](@ref), [`checkbounds`](@ref)
+See also [`getindex`](@ref), [`checkbounds`](@ref).
 """
-@propagate_inbounds iterate(s::AbstractString, i::Integer) = typeof(i) === Int ?
+@propagate_inbounds iterate(s::AbstractString, i::Integer) = i isa Int ?
     throw(MethodError(iterate, (s, i))) : iterate(s, Int(i))
 
 ## basic generic definitions ##
@@ -183,7 +183,7 @@ isempty(s::AbstractString) = iszero(ncodeunits(s)::Int)
 
 function getindex(s::AbstractString, i::Integer)
     @boundscheck checkbounds(s, i)
-    @inbounds return isvalid(s, i) ? iterate(s, i)[1] : string_index_err(s, i)
+    @inbounds return isvalid(s, i) ? (iterate(s, i)::NTuple{2,Any})[1] : string_index_err(s, i)
 end
 
 getindex(s::AbstractString, i::Colon) = s
@@ -375,8 +375,8 @@ value `0`.
     the string because it counts the value on the fly. This is in contrast to
     the method for arrays, which is a constant-time operation.
 
-See also: [`isvalid`](@ref), [`ncodeunits`](@ref), [`lastindex`](@ref),
-[`thisind`](@ref), [`nextind`](@ref), [`prevind`](@ref)
+See also [`isvalid`](@ref), [`ncodeunits`](@ref), [`lastindex`](@ref),
+[`thisind`](@ref), [`nextind`](@ref), [`prevind`](@ref).
 
 # Examples
 ```jldoctest
@@ -389,7 +389,7 @@ length(s::AbstractString) = @inbounds return length(s, 1, ncodeunits(s)::Int)
 function length(s::AbstractString, i::Int, j::Int)
     @boundscheck begin
         0 < i ≤ ncodeunits(s)::Int+1 || throw(BoundsError(s, i))
-        0 ≤ j < ncodeunits(s)::Int+1 || throw(BoundsError(s, j))
+        0 ≤ j < ncodeunits(s)::Int+1 || throw(BoundsError(s, j))
     end
     n = 0
     for k = i:j
@@ -438,8 +438,8 @@ thisind(s::AbstractString, i::Integer) = thisind(s, Int(i))
 function thisind(s::AbstractString, i::Int)
     z = ncodeunits(s)::Int + 1
     i == z && return i
-    @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i))
-    @inbounds while 1 < i && !(isvalid(s, i)::Bool)
+    @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i))
+    @inbounds while 1 < i && !(isvalid(s, i)::Bool)
         i -= 1
     end
     return i
@@ -498,7 +498,7 @@ function prevind(s::AbstractString, i::Int, n::Int)
     z = ncodeunits(s) + 1
     @boundscheck 0 < i ≤ z || throw(BoundsError(s, i))
     n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i)
-    while n > 0 && 1 < i
+    while n > 0 && 1 < i
         @inbounds n -= isvalid(s, i -= 1)
     end
     return i - n
@@ -557,7 +557,7 @@ function nextind(s::AbstractString, i::Int, n::Int)
     z = ncodeunits(s)
     @boundscheck 0 ≤ i ≤ z || throw(BoundsError(s, i))
     n == 0 && return thisind(s, i) == i ? i : string_index_err(s, i)
-    while n > 0 && i < z
+    while n > 0 && i < z
         @inbounds n -= isvalid(s, i += 1)
     end
     return i + n
@@ -596,6 +596,15 @@ true
 julia> isascii("αβγ")
 false
 ```
+For example, `isascii` can be used as a predicate function for [`filter`](@ref) or [`replace`](@ref)
+to remove or replace non-ASCII characters, respectively:
+```jldoctest
+julia> filter(isascii, "abcdeγfgh") # discard non-ASCII chars
+"abcdefgh"
+
+julia> replace("abcdeγfgh", !isascii=>' ') # replace non-ASCII chars with spaces
+"abcde fgh"
+```
 """
 isascii(c::Char) = bswap(reinterpret(UInt32, c)) < 0x80
 isascii(s::AbstractString) = all(isascii, s)
@@ -676,13 +685,16 @@ cases where `v` contains non-ASCII characters.)
 
 # Examples
 ```jldoctest
-julia> r = reverse("Julia")
-"ailuJ"
+julia> s = "Julia🚀"
+"Julia🚀"
 
-julia> for i in 1:length(r)
-           print(r[reverseind("Julia", i)])
+julia> r = reverse(s)
+"🚀ailuJ"
+
+julia> for i in eachindex(s)
+           print(r[reverseind(r, i)])
        end
-Julia
+Julia🚀
 ```
 """
 reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1)
@@ -692,7 +704,7 @@ reverseind(s::AbstractString, i::Integer) = thisind(s, ncodeunits(s)-i+1)
 
 Repeat a string `r` times. This can be written as `s^r`.
 
-See also: [`^`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer))
+See also [`^`](@ref :^(::Union{AbstractString, AbstractChar}, ::Integer)).
 
 # Examples
 ```jldoctest
@@ -707,7 +719,7 @@ repeat(s::AbstractString, r::Integer) = repeat(String(s), r)
 
 Repeat a string or character `n` times. This can also be written as `repeat(s, n)`.
 
-See also: [`repeat`](@ref)
+See also [`repeat`](@ref).
 
 # Examples
 ```jldoctest
@@ -737,7 +749,7 @@ end
 length(s::CodeUnits) = ncodeunits(s.s)
 sizeof(s::CodeUnits{T}) where {T} = ncodeunits(s.s) * sizeof(T)
 size(s::CodeUnits) = (length(s),)
-elsize(s::CodeUnits{T}) where {T} = sizeof(T)
+elsize(s::Type{<:CodeUnits{T}}) where {T} = sizeof(T)
 @propagate_inbounds getindex(s::CodeUnits, i::Int) = codeunit(s.s, i)
 IndexStyle(::Type{<:CodeUnits}) = IndexLinear()
 @inline iterate(s::CodeUnits, i=1) = (i % UInt) - 1 < length(s) ? (@inbounds s[i], i + 1) : nothing
@@ -768,3 +780,16 @@ julia> codeunits("Juλia")
 ```
 """
 codeunits(s::AbstractString) = CodeUnits(s)
+
+function _split_rest(s::AbstractString, n::Int)
+    lastind = lastindex(s)
+    i = try
+        prevind(s, lastind, n)
+    catch e
+        e isa BoundsError || rethrow()
+        _check_length_split_rest(length(s), n)
+    end
+    last_n = SubString(s, nextind(s, i), lastind)
+    front = s[begin:i]
+    return front, last_n
+end
diff --git a/base/strings/io.jl b/base/strings/io.jl
index f69834e63e24dc..d1bf7a763e93ce 100644
--- a/base/strings/io.jl
+++ b/base/strings/io.jl
@@ -15,7 +15,7 @@ avoid Julia-specific details.
 For example, `show` displays strings with quotes, and `print` displays strings
 without quotes.
 
-[`string`](@ref) returns the output of `print` as a string.
+See also [`println`](@ref), [`string`](@ref), [`printstyled`](@ref).
 
 # Examples
 ```jldoctest
@@ -54,8 +54,10 @@ end
 """
     println([io::IO], xs...)
 
-Print (using [`print`](@ref)) `xs` followed by a newline.
-If `io` is not supplied, prints to [`stdout`](@ref).
+Print (using [`print`](@ref)) `xs` to `io` followed by a newline.
+If `io` is not supplied, prints to the default output stream [`stdout`](@ref).
+
+See also [`printstyled`](@ref) to add colors etc.
 
 # Examples
 ```jldoctest
@@ -64,13 +66,13 @@ Hello, world
 
 julia> io = IOBuffer();
 
-julia> println(io, "Hello, world")
+julia> println(io, "Hello", ',', " world.")
 
 julia> String(take!(io))
-"Hello, world\\n"
+"Hello, world.\\n"
 ```
 """
-println(io::IO, xs...) = print(io, xs..., '\n')
+println(io::IO, xs...) = print(io, xs..., "\n")
 
 ## conversion of general objects to strings ##
 
@@ -79,14 +81,19 @@ println(io::IO, xs...) = print(io, xs..., '\n')
 
 Call the given function with an I/O stream and the supplied extra arguments.
 Everything written to this I/O stream is returned as a string.
-`context` can be either an [`IOContext`](@ref) whose properties will be used,
-or a `Pair` specifying a property and its value. `sizehint` suggests the capacity
-of the buffer (in bytes).
+`context` can be an [`IOContext`](@ref) whose properties will be used, a `Pair`
+specifying a property and its value, or a tuple of `Pair` specifying multiple
+properties and their values. `sizehint` suggests the capacity of the buffer (in
+bytes).
+
+The optional keyword argument `context` can be set to a `:key=>value` pair, a
+tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
+attributes are used for the I/O stream passed to `f`.  The optional `sizehint`
+is a suggested size (in bytes) to allocate for the buffer used to write the
+string.
 
-The optional keyword argument `context` can be set to `:key=>value` pair
-or an `IO` or [`IOContext`](@ref) object whose attributes are used for the I/O
-stream passed to `f`.  The optional `sizehint` is a suggested size (in bytes)
-to allocate for the buffer used to write the string.
+!!! compat "Julia 1.7"
+    Passing a tuple to keyword `context` requires Julia 1.7 or later.
 
 # Examples
 ```jldoctest
@@ -99,7 +106,9 @@ julia> sprint(showerror, BoundsError([1], 100))
 """
 function sprint(f::Function, args...; context=nothing, sizehint::Integer=0)
     s = IOBuffer(sizehint=sizehint)
-    if context !== nothing
+    if context isa Tuple
+        f(IOContext(s, context...), args...)
+    elseif context !== nothing
         f(IOContext(s, context), args...)
     else
         f(s, args...)
@@ -165,6 +174,8 @@ highly efficient, then it may make sense to add a method to `string` and
 define `print(io::IO, x::MyType) = print(io, string(x))` to ensure the
 functions are consistent.
 
+See also: [`String`](@ref), [`repr`](@ref), [`sprint`](@ref), [`show`](@ref @show).
+
 # Examples
 ```jldoctest
 julia> string("a", 1, true)
@@ -181,35 +192,77 @@ print(io::IO, s::AbstractString) = for c in s; print(io, c); end
 write(io::IO, s::AbstractString) = (len = 0; for c in s; len += Int(write(io, c))::Int; end; len)
 show(io::IO, s::AbstractString) = print_quoted(io, s)
 
+# show elided string if more than `limit` characters
+function show(
+    io    :: IO,
+    mime  :: MIME"text/plain",
+    str   :: AbstractString;
+    limit :: Union{Int, Nothing} = nothing,
+)
+    # compute limit in default case
+    if limit === nothing
+        get(io, :limit, false) || return show(io, str)
+        limit = max(20, displaysize(io)[2])
+        # one line in collection, seven otherwise
+        get(io, :typeinfo, nothing) === nothing && (limit *= 7)
+    end
+
+    # early out for short strings
+    len = ncodeunits(str)
+    len ≤ limit - 2 && # quote chars
+        return show(io, str)
+
+    # these don't depend on string data
+    units = codeunit(str) == UInt8 ? "bytes" : "code units"
+    skip_text(skip) = " ⋯ $skip $units ⋯ "
+    short = length(skip_text("")) + 4 # quote chars
+    chars = max(limit, short + 1) - short # at least 1 digit
+
+    # figure out how many characters to print in elided case
+    chars -= d = ndigits(len - chars) # first adjustment
+    chars += d - ndigits(len - chars) # second if needed
+    chars = max(0, chars)
+
+    # find head & tail, avoiding O(length(str)) computation
+    head = nextind(str, 0, 1 + (chars + 1) ÷ 2)
+    tail = prevind(str, len + 1, chars ÷ 2)
+
+    # threshold: min chars skipped to make elision worthwhile
+    t = short + ndigits(len - chars) - 1
+    n = tail - head # skipped code units
+    if 4t ≤ n || t ≤ n && t ≤ length(str, head, tail-1)
+        skip = skip_text(n)
+        show(io, SubString(str, 1:prevind(str, head)))
+        print(io, skip) # TODO: bold styled
+        show(io, SubString(str, tail))
+    else
+        show(io, str)
+    end
+end
+
 # optimized methods to avoid iterating over chars
 write(io::IO, s::Union{String,SubString{String}}) =
     GC.@preserve s Int(unsafe_write(io, pointer(s), reinterpret(UInt, sizeof(s))))::Int
 print(io::IO, s::Union{String,SubString{String}}) = (write(io, s); nothing)
 
-## printing literal quoted string data ##
-
-# this is the inverse of print_unescaped_chars(io, s, "\\\")
-
-function print_quoted_literal(io, s::AbstractString)
-    print(io, '"')
-    for c = s; c == '"' ? print(io, "\\\"") : print(io, c); end
-    print(io, '"')
-end
-
 """
     repr(x; context=nothing)
 
 Create a string from any value using the [`show`](@ref) function.
 You should not add methods to `repr`; define a `show` method instead.
 
-The optional keyword argument `context` can be set to an `IO` or [`IOContext`](@ref)
-object whose attributes are used for the I/O stream passed to `show`.
+The optional keyword argument `context` can be set to a `:key=>value` pair, a
+tuple of `:key=>value` pairs, or an `IO` or [`IOContext`](@ref) object whose
+attributes are used for the I/O stream passed to `show`.
 
 Note that `repr(x)` is usually similar to how the value of `x` would
 be entered in Julia.  See also [`repr(MIME("text/plain"), x)`](@ref) to instead
 return a "pretty-printed" version of `x` designed more for human consumption,
 equivalent to the REPL display of `x`.
 
+!!! compat "Julia 1.7"
+    Passing a tuple to keyword `context` requires Julia 1.7 or later.
+
 # Examples
 ```jldoctest
 julia> repr(1)
@@ -254,15 +307,12 @@ IOBuffer(s::SubString{String}) = IOBuffer(view(unsafe_wrap(Vector{UInt8}, s.stri
 # join is implemented using IO
 
 """
-    join([io::IO,] strings [, delim [, last]])
+    join([io::IO,] iterator [, delim [, last]])
 
-Join an array of `strings` into a single string, inserting the given delimiter (if any) between
-adjacent strings. If `last` is given, it will be used instead of `delim` between the last
-two strings. If `io` is given, the result is written to `io` rather than returned
-as a `String`.
-
-`strings` can be any iterable over elements `x` which are convertible to strings
-via `print(io::IOBuffer, x)`. `strings` will be printed to `io`.
+Join any `iterator` into a single string, inserting the given delimiter (if any) between
+adjacent items.  If `last` is given, it will be used instead of `delim` between the last
+two items.  Each item of `iterator` is converted to a string via `print(io::IOBuffer, x)`.
+If `io` is given, the result is written to `io` rather than returned as a `String`.
 
 # Examples
 ```jldoctest
@@ -273,15 +323,15 @@ julia> join([1,2,3,4,5])
 "12345"
 ```
 """
-function join(io::IO, strings, delim, last)
+function join(io::IO, iterator, delim, last)
     first = true
     local prev
-    for str in strings
+    for item in iterator
         if @isdefined prev
             first ? (first = false) : print(io, delim)
             print(io, prev)
         end
-        prev = str
+        prev = item
     end
     if @isdefined prev
         first || print(io, last)
@@ -289,19 +339,19 @@ function join(io::IO, strings, delim, last)
     end
     nothing
 end
-function join(io::IO, strings, delim="")
+function join(io::IO, iterator, delim="")
     # Specialization of the above code when delim==last,
     # which lets us emit (compile) less code
     first = true
-    for str in strings
+    for item in iterator
         first ? (first = false) : print(io, delim)
-        print(io, str)
+        print(io, item)
     end
 end
 
-join(strings) = sprint(join, strings)
-join(strings, delim) = sprint(join, strings, delim)
-join(strings, delim, last) = sprint(join, strings, delim, last)
+join(iterator) = sprint(join, iterator)
+join(iterator, delim) = sprint(join, iterator, delim)
+join(iterator, delim, last) = sprint(join, iterator, delim, last)
 
 ## string escaping & unescaping ##
 
@@ -310,8 +360,8 @@ escape_nul(c::Union{Nothing, AbstractChar}) =
     (c !== nothing && '0' <= c <= '7') ? "\\x00" : "\\0"
 
 """
-    escape_string(str::AbstractString[, esc])::AbstractString
-    escape_string(io, str::AbstractString[, esc::])::Nothing
+    escape_string(str::AbstractString[, esc]; keep = ())::AbstractString
+    escape_string(io, str::AbstractString[, esc]; keep = ())::Nothing
 
 General escaping of traditional C and Unicode escape sequences. The first form returns the
 escaped string, the second prints the result to `io`.
@@ -323,11 +373,22 @@ unambiguous), unicode code point (`"\\u"` prefix) or hex (`"\\x"` prefix).
 The optional `esc` argument specifies any additional characters that should also be
 escaped by a prepending backslash (`\"` is also escaped by default in the first form).
 
+The argument `keep` specifies a collection of characters which are to be kept as
+they are. Notice that `esc` has precedence here.
+
+See also [`unescape_string`](@ref) for the reverse operation.
+
+!!! compat "Julia 1.7"
+    The `keep` argument is available as of Julia 1.7.
+
 # Examples
 ```jldoctest
 julia> escape_string("aaa\\nbbb")
 "aaa\\\\nbbb"
 
+julia> escape_string("aaa\\nbbb"; keep = '\\n')
+"aaa\\nbbb"
+
 julia> escape_string("\\xfe\\xff") # invalid utf-8
 "\\\\xfe\\\\xff"
 
@@ -337,15 +398,14 @@ julia> escape_string(string('\\u2135','\\0')) # unambiguous
 julia> escape_string(string('\\u2135','\\0','0')) # \\0 would be ambiguous
 "ℵ\\\\x000"
 ```
-
-## See also
-[`unescape_string`](@ref) for the reverse operation.
 """
-function escape_string(io::IO, s::AbstractString, esc="")
+function escape_string(io::IO, s::AbstractString, esc=""; keep = ())
     a = Iterators.Stateful(s)
     for c::AbstractChar in a
         if c in esc
             print(io, '\\', c)
+        elseif c in keep
+            print(io, c)
         elseif isascii(c)
             c == '\0'          ? print(io, escape_nul(peek(a)::Union{AbstractChar,Nothing})) :
             c == '\e'          ? print(io, "\\e") :
@@ -368,7 +428,8 @@ function escape_string(io::IO, s::AbstractString, esc="")
     end
 end
 
-escape_string(s::AbstractString, esc=('\"',)) = sprint(escape_string, s, esc, sizehint=lastindex(s))
+escape_string(s::AbstractString, esc=('\"',); keep = ()) =
+    sprint((io)->escape_string(io, s, esc; keep = keep), sizehint=lastindex(s))
 
 function print_quoted(io, s::AbstractString)
     print(io, '"')
@@ -397,6 +458,8 @@ The following escape sequences are recognised:
  - Hex bytes (`\\x` with 1-2 trailing hex digits)
  - Octal bytes (`\\` with 1-3 trailing octal digits)
 
+See also [`escape_string`](@ref).
+
 # Examples
 ```jldoctest
 julia> unescape_string("aaa\\\\nbbb") # C escape sequence
@@ -411,9 +474,6 @@ julia> unescape_string("\\\\101") # octal
 julia> unescape_string("aaa \\\\g \\\\n", ['g']) # using `keep` argument
 "aaa \\\\g \\n"
 ```
-
-## See also
-[`escape_string`](@ref).
 """
 function unescape_string(io::IO, s::AbstractString, keep = ())
     a = Iterators.Stateful(s)
@@ -427,7 +487,7 @@ function unescape_string(io::IO, s::AbstractString, keep = ())
                 m = c == 'x' ? 2 :
                     c == 'u' ? 4 : 8
                 while (k += 1) <= m && !isempty(a)
-                    nc = peek(a)
+                    nc = peek(a)::AbstractChar
                     n = '0' <= nc <= '9' ? n<<4 + (nc-'0') :
                         'a' <= nc <= 'f' ? n<<4 + (nc-'a'+10) :
                         'A' <= nc <= 'F' ? n<<4 + (nc-'A'+10) : break
@@ -447,7 +507,7 @@ function unescape_string(io::IO, s::AbstractString, keep = ())
                 k = 1
                 n = c-'0'
                 while (k += 1) <= 3 && !isempty(a)
-                    c = peek(a)
+                    c = peek(a)::AbstractChar
                     n = ('0' <= c <= '7') ? n<<3 + c-'0' : break
                     popfirst!(a)
                 end
@@ -531,7 +591,7 @@ macro raw_str(s); s; end
 
 Escape a string in the manner used for parsing raw string literals.
 For each double-quote (`"`) character in input string `s`, this
-function counts the number _n_ of preceeding backslash (`\\`) characters,
+function counts the number _n_ of preceding backslash (`\\`) characters,
 and then increases there the number of backslashes from _n_ to 2_n_+1
 (even for _n_ = 0). It also doubles a sequence of backslashes at the end
 of the string.
@@ -541,7 +601,7 @@ string literals. (It also happens to be the escaping convention
 expected by the Microsoft C/C++ compiler runtime when it parses a
 command-line string into the argv[] array.)
 
-See also: [`escape_string`](@ref)
+See also [`escape_string`](@ref).
 """
 function escape_raw_string(io, str::AbstractString)
     escapes = 0
@@ -613,6 +673,8 @@ end
 
 Remove leading indentation from string.
 
+See also `indent` from the [`MultilineStrings` package](https://github.com/invenia/MultilineStrings.jl).
+
 # Examples
 ```jldoctest
 julia> Base.unindent("   a\\n   b", 2)
diff --git a/base/strings/lazy.jl b/base/strings/lazy.jl
new file mode 100644
index 00000000000000..3510afc9b4f116
--- /dev/null
+++ b/base/strings/lazy.jl
@@ -0,0 +1,101 @@
+"""
+    LazyString <: AbstractString
+
+A lazy representation of string interpolation. This is useful when a string
+needs to be constructed in a context where performing the actual interpolation
+and string construction is unnecessary or undesirable (e.g. in error paths
+of functions).
+
+This type is designed to be cheap to construct at runtime, trying to offload
+as much work as possible to either the macro or later printing operations.
+
+# Examples
+
+```jldoctest
+julia> n = 5; str = LazyString("n is ", n)
+"n is 5"
+```
+
+See also [`@lazy_str`](@ref).
+
+!!! compat "Julia 1.8"
+    `LazyString` requires Julia 1.8 or later.
+
+# Extended help
+## Safety properties for concurrent programs
+
+A lazy string itself does not introduce any concurrency problems even if it is printed in
+multiple Julia tasks.  However, if `print` methods on a captured value can have a
+concurrency issue when invoked without synchronizations, printing the lazy string may cause
+an issue.  Furthermore, the `print` methods on the captured values may be invoked multiple
+times, though only exactly one result will be returned.
+
+!!! compat "Julia 1.9"
+    `LazyString` is safe in the above sense in Julia 1.9 and later.
+"""
+mutable struct LazyString <: AbstractString
+    const parts::Tuple
+    # Created on first access
+    @atomic str::Union{String,Nothing}
+    global _LazyString(parts, str) = new(parts, str)
+    LazyString(args...) = new(args, nothing)
+end
+
+"""
+    lazy"str"
+
+Create a [`LazyString`](@ref) using regular string interpolation syntax.
+Note that interpolations are *evaluated* at LazyString construction time,
+but *printing* is delayed until the first access to the string.
+
+See [`LazyString`](@ref) documentation for the safety properties for concurrent programs.
+
+# Examples
+
+```
+julia> n = 5; str = lazy"n is \$n"
+"n is 5"
+
+julia> typeof(str)
+LazyString
+```
+
+!!! compat "Julia 1.8"
+    `lazy"str"` requires Julia 1.8 or later.
+"""
+macro lazy_str(text)
+    parts = Any[]
+    lastidx = idx = 1
+    while (idx = findnext('$', text, idx)) !== nothing
+        lastidx < idx && push!(parts, text[lastidx:idx-1])
+        idx += 1
+        expr, idx = Meta.parseatom(text, idx; filename=string(__source__.file))
+        push!(parts, esc(expr))
+        lastidx = idx
+    end
+    lastidx <= lastindex(text) && push!(parts, text[lastidx:end])
+    :(LazyString($(parts...)))
+end
+
+function String(l::LazyString)
+    old = @atomic :acquire l.str
+    old === nothing || return old
+    str = sprint() do io
+        for p in l.parts
+            print(io, p)
+        end
+    end
+    old, ok = @atomicreplace :acquire_release :acquire l.str nothing => str
+    return ok ? str : (old::String)
+end
+
+hash(s::LazyString, h::UInt64) = hash(String(s), h)
+lastindex(s::LazyString) = lastindex(String(s))
+iterate(s::LazyString) = iterate(String(s))
+iterate(s::LazyString, i::Integer) = iterate(String(s), i)
+isequal(a::LazyString, b::LazyString) = isequal(String(a), String(b))
+==(a::LazyString, b::LazyString) = (String(a) == String(b))
+ncodeunits(s::LazyString) = ncodeunits(String(s))
+codeunit(s::LazyString) = codeunit(String(s))
+codeunit(s::LazyString, i::Integer) = codeunit(String(s), i)
+isvalid(s::LazyString, i::Integer) = isvalid(String(s), i)
diff --git a/base/strings/search.jl b/base/strings/search.jl
index b1908ac99c8600..938ed8d527d997 100644
--- a/base/strings/search.jl
+++ b/base/strings/search.jl
@@ -25,6 +25,9 @@ findfirst(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a
 findnext(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
     nothing_sentinel(_search(a, pred.x, i))
 
+findfirst(::typeof(iszero), a::ByteArray) = nothing_sentinel(_search(a, zero(UInt8)))
+findnext(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_search(a, zero(UInt8), i))
+
 function _search(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = 1)
     if i < 1
         throw(BoundsError(a, i))
@@ -65,6 +68,9 @@ findlast(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a:
 findprev(pred::Fix2{<:Union{typeof(isequal),typeof(==)},<:Union{Int8,UInt8}}, a::ByteArray, i::Integer) =
     nothing_sentinel(_rsearch(a, pred.x, i))
 
+findlast(::typeof(iszero), a::ByteArray) = nothing_sentinel(_rsearch(a, zero(UInt8)))
+findprev(::typeof(iszero), a::ByteArray, i::Integer) = nothing_sentinel(_rsearch(a, zero(UInt8), i))
+
 function _rsearch(a::Union{String,ByteArray}, b::Union{Int8,UInt8}, i::Integer = sizeof(a))
     if i < 1
         return i == 0 ? 0 : throw(BoundsError(a, i))
@@ -88,7 +94,7 @@ end
 
 """
     findfirst(pattern::AbstractString, string::AbstractString)
-    findfirst(pattern::Regex, string::String)
+    findfirst(pattern::AbstractPattern, string::String)
 
 Find the first occurrence of `pattern` in `string`. Equivalent to
 [`findnext(pattern, string, firstindex(s))`](@ref).
@@ -123,6 +129,25 @@ true
 """
 findfirst(ch::AbstractChar, string::AbstractString) = findfirst(==(ch), string)
 
+"""
+    findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}},
+              A::AbstractVector{<:Union{Int8,UInt8}})
+
+Find the first occurrence of sequence `pattern` in vector `A`.
+
+!!! compat "Julia 1.6"
+    This method requires at least Julia 1.6.
+
+# Examples
+```jldoctest
+julia> findfirst([0x52, 0x62], [0x40, 0x52, 0x62, 0x63])
+2:3
+```
+"""
+findfirst(pattern::AbstractVector{<:Union{Int8,UInt8}},
+          A::AbstractVector{<:Union{Int8,UInt8}}) =
+    _search(A, pattern, firstindex(A))
+
 # AbstractString implementation of the generic findnext interface
 function findnext(testf::Function, s::AbstractString, i::Integer)
     i = Int(i)
@@ -143,11 +168,12 @@ in(c::AbstractChar, s::AbstractString) = (findfirst(isequal(c),s)!==nothing)
 function _searchindex(s::Union{AbstractString,ByteArray},
                       t::Union{AbstractString,AbstractChar,Int8,UInt8},
                       i::Integer)
-    if isempty(t)
+    x = Iterators.peel(t)
+    if isnothing(x)
         return 1 <= i <= nextind(s,lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
-    t1, trest = Iterators.peel(t)
+    t1, trest = x
     while true
         i = findnext(isequal(t1),s,i)
         if i === nothing return 0 end
@@ -166,7 +192,7 @@ function _search_bloom_mask(c)
 end
 
 _nthbyte(s::String, i) = codeunit(s, i)
-_nthbyte(a::Union{AbstractVector{UInt8},AbstractVector{Int8}}, i) = a[i]
+_nthbyte(t::AbstractVector, index) = t[index + (firstindex(t)-1)]
 
 function _searchindex(s::String, t::String, i::Integer)
     # Check for fast case of a single byte
@@ -174,21 +200,26 @@ function _searchindex(s::String, t::String, i::Integer)
     _searchindex(unsafe_wrap(Vector{UInt8},s), unsafe_wrap(Vector{UInt8},t), i)
 end
 
-function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
-    n = sizeof(t)
-    m = sizeof(s)
+function _searchindex(s::AbstractVector{<:Union{Int8,UInt8}},
+                      t::AbstractVector{<:Union{Int8,UInt8}},
+                      _i::Integer)
+    sentinel = firstindex(s) - 1
+    n = length(t)
+    m = length(s)
+    i = Int(_i) - sentinel
+    (i < 1 || i > m+1) && throw(BoundsError(s, _i))
 
     if n == 0
-        return 1 <= i <= m+1 ? max(1, i) : 0
+        return 1 <= i <= m+1 ? max(1, i) : sentinel
     elseif m == 0
-        return 0
+        return sentinel
     elseif n == 1
-        return something(findnext(isequal(_nthbyte(t,1)), s, i), 0)
+        return something(findnext(isequal(_nthbyte(t,1)), s, i), sentinel)
     end
 
     w = m - n
     if w < 0 || i - 1 > w
-        return 0
+        return sentinel
     end
 
     bloom_mask = UInt64(0)
@@ -215,7 +246,8 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
 
             # match found
             if j == n - 1
-                return i+1
+                # restore in case `s` is an OffSetArray
+                return i+firstindex(s)
             end
 
             # no match, try to rule out the next character
@@ -232,16 +264,16 @@ function _searchindex(s::ByteArray, t::ByteArray, i::Integer)
         i += 1
     end
 
-    0
+    sentinel
 end
 
-function _search(s::Union{AbstractString,ByteArray},
-                 t::Union{AbstractString,AbstractChar,Int8,UInt8},
+function _search(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}},
+                 t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}},
                  i::Integer)
     idx = _searchindex(s,t,i)
     if isempty(t)
         idx:idx-1
-    elseif idx > 0
+    elseif idx >= firstindex(s)
         idx:(idx + lastindex(t) - 1)
     else
         nothing
@@ -250,7 +282,7 @@ end
 
 """
     findnext(pattern::AbstractString, string::AbstractString, start::Integer)
-    findnext(pattern::Regex, string::String, start::Integer)
+    findnext(pattern::AbstractPattern, string::String, start::Integer)
 
 Find the next occurrence of `pattern` in `string` starting at position `start`.
 `pattern` can be either a string, or a regular expression, in which case `string`
@@ -274,7 +306,7 @@ julia> findnext("Lang", "JuliaLang", 2)
 6:9
 ```
 """
-findnext(t::AbstractString, s::AbstractString, i::Integer) = _search(s, t, Int(i))
+findnext(t::AbstractString, s::AbstractString, start::Integer) = _search(s, t, Int(start))
 
 """
     findnext(ch::AbstractChar, string::AbstractString, start::Integer)
@@ -293,8 +325,32 @@ julia> findnext('o', "Hello to the world", 6)
 8
 ```
 """
-findnext(ch::AbstractChar, string::AbstractString, ind::Integer) =
-    findnext(==(ch), string, ind)
+findnext(ch::AbstractChar, string::AbstractString, start::Integer) =
+    findnext(==(ch), string, start)
+
+"""
+    findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
+             A::AbstractVector{<:Union{Int8,UInt8}},
+             start::Integer)
+
+Find the next occurrence of the sequence `pattern` in vector `A` starting at position `start`.
+
+!!! compat "Julia 1.6"
+    This method requires at least Julia 1.6.
+
+# Examples
+```jldoctest
+julia> findnext([0x52, 0x62], [0x52, 0x62, 0x72], 3) === nothing
+true
+
+julia> findnext([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
+4:5
+```
+"""
+findnext(pattern::AbstractVector{<:Union{Int8,UInt8}},
+         A::AbstractVector{<:Union{Int8,UInt8}},
+         start::Integer) =
+    _search(A, pattern, start)
 
 """
     findlast(pattern::AbstractString, string::AbstractString)
@@ -314,6 +370,23 @@ julia> findfirst("Julia", "JuliaLang")
 findlast(pattern::AbstractString, string::AbstractString) =
     findprev(pattern, string, lastindex(string))
 
+"""
+    findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
+             A::AbstractVector{<:Union{Int8,UInt8}})
+
+Find the last occurrence of `pattern` in array `A`. Equivalent to
+[`findprev(pattern, A, lastindex(A))`](@ref).
+
+# Examples
+```jldoctest
+julia> findlast([0x52, 0x62], [0x52, 0x62, 0x52, 0x62])
+3:4
+```
+"""
+findlast(pattern::AbstractVector{<:Union{Int8,UInt8}},
+         A::AbstractVector{<:Union{Int8,UInt8}}) =
+    findprev(pattern, A, lastindex(A))
+
 """
     findlast(ch::AbstractChar, string::AbstractString)
 
@@ -354,7 +427,7 @@ function _rsearchindex(s::AbstractString,
         return 1 <= i <= nextind(s, lastindex(s))::Int ? i :
                throw(BoundsError(s, i))
     end
-    t1, trest = Iterators.peel(Iterators.reverse(t))
+    t1, trest = Iterators.peel(Iterators.reverse(t))::NTuple{2,Any}
     while true
         i = findprev(isequal(t1), s, i)
         i === nothing && return 0
@@ -387,21 +460,24 @@ function _rsearchindex(s::String, t::String, i::Integer)
     end
 end
 
-function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
-    n = sizeof(t)
-    m = sizeof(s)
+function _rsearchindex(s::AbstractVector{<:Union{Int8,UInt8}}, t::AbstractVector{<:Union{Int8,UInt8}}, _k::Integer)
+    sentinel = firstindex(s) - 1
+    n = length(t)
+    m = length(s)
+    k = Int(_k) - sentinel
+    k < 0 && throw(BoundsError(s, _k))
 
     if n == 0
-        return 0 <= k <= m ? max(k, 1) : 0
+        return 0 <= k <= m ? max(k, 1) : sentinel
     elseif m == 0
-        return 0
+        return sentinel
     elseif n == 1
-        return something(findprev(isequal(_nthbyte(t,1)), s, k), 0)
+        return something(findprev(isequal(_nthbyte(t,1)), s, k), sentinel)
     end
 
     w = m - n
     if w < 0 || k <= 0
-        return 0
+        return sentinel
     end
 
     bloom_mask = UInt64(0)
@@ -426,9 +502,9 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
                 j += 1
             end
 
-            # match found
+            # match found, restore in case `s` is an OffsetArray
             if j == n
-                return i
+                return i + sentinel
             end
 
             # no match, try to rule out the next character
@@ -445,16 +521,16 @@ function _rsearchindex(s::ByteArray, t::ByteArray, k::Integer)
         i -= 1
     end
 
-    0
+    sentinel
 end
 
-function _rsearch(s::Union{AbstractString,ByteArray},
-                  t::Union{AbstractString,AbstractChar,Int8,UInt8},
+function _rsearch(s::Union{AbstractString,AbstractVector{<:Union{Int8,UInt8}}},
+                  t::Union{AbstractString,AbstractChar,AbstractVector{<:Union{Int8,UInt8}}},
                   i::Integer)
     idx = _rsearchindex(s,t,i)
     if isempty(t)
         idx:idx-1
-    elseif idx > 0
+    elseif idx > firstindex(s) - 1
         idx:(idx + lastindex(t) - 1)
     else
         nothing
@@ -503,11 +579,31 @@ julia> findprev('o', "Hello to the world", 18)
 15
 ```
 """
-findprev(ch::AbstractChar, string::AbstractString, ind::Integer) =
-    findprev(==(ch), string, ind)
+findprev(ch::AbstractChar, string::AbstractString, start::Integer) =
+    findprev(==(ch), string, start)
 
 """
-    occursin(needle::Union{AbstractString,Regex,AbstractChar}, haystack::AbstractString)
+    findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
+             A::AbstractVector{<:Union{Int8,UInt8}},
+             start::Integer)
+
+Find the previous occurrence of the sequence `pattern` in vector `A` starting at position `start`.
+
+!!! compat "Julia 1.6"
+    This method requires at least Julia 1.6.
+
+# Examples
+```jldoctest
+julia> findprev([0x52, 0x62], [0x40, 0x52, 0x62, 0x52, 0x62], 3)
+2:3
+```
+"""
+findprev(pattern::AbstractVector{<:Union{Int8,UInt8}},
+         A::AbstractVector{<:Union{Int8,UInt8}},
+         start::Integer) =
+    _rsearch(A, pattern, start)
+"""
+    occursin(needle::Union{AbstractString,AbstractPattern,AbstractChar}, haystack::AbstractString)
 
 Determine whether the first argument is a substring of the second. If `needle`
 is a regular expression, checks whether `haystack` contains a match.
@@ -527,9 +623,22 @@ julia> occursin(r"a.a", "abba")
 false
 ```
 
-See also: [`contains`](@ref).
+See also [`contains`](@ref).
 """
 occursin(needle::Union{AbstractString,AbstractChar}, haystack::AbstractString) =
     _searchindex(haystack, needle, firstindex(haystack)) != 0
 
-in(::AbstractString, ::AbstractString) = error("use occursin(x, y) for string containment")
+"""
+    occursin(haystack)
+
+Create a function that checks whether its argument occurs in `haystack`, i.e.
+a function equivalent to `needle -> occursin(needle, haystack)`.
+
+The returned function is of type `Base.Fix2{typeof(occursin)}`.
+
+!!! compat "Julia 1.6"
+    This method requires Julia 1.6 or later.
+"""
+occursin(haystack) = Base.Fix2(occursin, haystack)
+
+in(::AbstractString, ::AbstractString) = error("use occursin(needle, haystack) for string containment")
diff --git a/base/strings/string.jl b/base/strings/string.jl
index 1ebb85ff78dd22..e44746f9834d93 100644
--- a/base/strings/string.jl
+++ b/base/strings/string.jl
@@ -25,10 +25,22 @@ function Base.showerror(io::IO, exc::StringIndexError)
     end
 end
 
-const ByteArray = Union{Vector{UInt8},Vector{Int8}}
+const ByteArray = Union{CodeUnits{UInt8,String}, Vector{UInt8},Vector{Int8}, FastContiguousSubArray{UInt8,1,CodeUnits{UInt8,String}}, FastContiguousSubArray{UInt8,1,Vector{UInt8}}, FastContiguousSubArray{Int8,1,Vector{Int8}}}
 
 @inline between(b::T, lo::T, hi::T) where {T<:Integer} = (lo ≤ b) & (b ≤ hi)
 
+"""
+    String <: AbstractString
+
+The default string type in Julia, used by e.g. string literals.
+
+`String`s are immutable sequences of `Char`s. A `String` is stored internally as
+a contiguous byte array, and while they are interpreted as being UTF-8 encoded,
+they can be composed of any byte sequence. Use [`isvalid`](@ref) to validate
+that the underlying byte sequence is valid as UTF-8.
+"""
+String
+
 ## constructors and conversions ##
 
 # String constructor docstring from boot.jl, workaround for #16730
@@ -36,10 +48,11 @@ const ByteArray = Union{Vector{UInt8},Vector{Int8}}
 """
     String(v::AbstractVector{UInt8})
 
-Create a new `String` object from a byte vector `v` containing UTF-8 encoded
-characters. If `v` is `Vector{UInt8}` it will be truncated to zero length and
-future modification of `v` cannot affect the contents of the resulting string.
-To avoid truncation use `String(copy(v))`.
+Create a new `String` object using the data buffer from byte vector `v`.
+If `v` is a `Vector{UInt8}` it will be truncated to zero length and future
+modification of `v` cannot affect the contents of the resulting string.
+To avoid truncation of `Vector{UInt8}` data, use `String(copy(v))`; for other
+`AbstractVector` types, `String(v)` already makes a copy.
 
 When possible, the memory of `v` will be used without copying when the `String`
 object is created. This is guaranteed to be the case for byte vectors returned
@@ -70,16 +83,17 @@ function unsafe_string(p::Union{Ptr{UInt8},Ptr{Int8}})
     ccall(:jl_cstr_to_string, Ref{String}, (Ptr{UInt8},), p)
 end
 
-_string_n(n::Integer) = ccall(:jl_alloc_string, Ref{String}, (Csize_t,), n)
+# This is @assume_effects :effect_free :nothrow :terminates_globally @ccall jl_alloc_string(n::Csize_t)::Ref{String},
+# but the macro is not available at this time in bootstrap, so we write it manually.
+@eval _string_n(n::Integer) = $(Expr(:foreigncall, QuoteNode(:jl_alloc_string), Ref{String}, Expr(:call, Expr(:core, :svec), :Csize_t), 1, QuoteNode((:ccall,0xe)), :(convert(Csize_t, n))))
 
 """
     String(s::AbstractString)
 
-Convert a string to a contiguous byte array representation encoded as UTF-8 bytes.
-This representation is often appropriate for passing strings to C.
+Create a new `String` from an existing `AbstractString`.
 """
 String(s::AbstractString) = print_to_string(s)
-@pure String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
+@assume_effects :total String(s::Symbol) = unsafe_string(unsafe_convert(Ptr{UInt8}, s))
 
 unsafe_wrap(::Type{Vector{UInt8}}, s::String) = ccall(:jl_string_to_array, Ref{Vector{UInt8}}, (Any,), s)
 
@@ -92,9 +106,9 @@ String(s::CodeUnits{UInt8,String}) = s.s
 ## low-level functions ##
 
 pointer(s::String) = unsafe_convert(Ptr{UInt8}, s)
-pointer(s::String, i::Integer) = pointer(s)+(i-1)
+pointer(s::String, i::Integer) = pointer(s) + Int(i)::Int - 1
 
-@pure ncodeunits(s::String) = Core.sizeof(s)
+ncodeunits(s::String) = Core.sizeof(s)
 codeunit(s::String) = UInt8
 
 @inline function codeunit(s::String, i::Integer)
@@ -233,7 +247,7 @@ function getindex_continued(s::String, i::Int, u::UInt32)
     end
     n = ncodeunits(s)
 
-    (i += 1) > n && @goto ret
+    (i += 1) > n && @goto ret
     @inbounds b = codeunit(s, i) # cont byte 1
     b & 0xc0 == 0x80 || @goto ret
     u |= UInt32(b) << 16
@@ -251,7 +265,7 @@ function getindex_continued(s::String, i::Int, u::UInt32)
     return reinterpret(Char, u)
 end
 
-getindex(s::String, r::UnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
+getindex(s::String, r::AbstractUnitRange{<:Integer}) = s[Int(first(r)):Int(last(r))]
 
 @inline function getindex(s::String, r::UnitRange{Int})
     isempty(r) && return ""
@@ -273,7 +287,7 @@ length(s::String) = length_continued(s, 1, ncodeunits(s), ncodeunits(s))
 @inline function length(s::String, i::Int, j::Int)
     @boundscheck begin
         0 < i ≤ ncodeunits(s)+1 || throw(BoundsError(s, i))
-        0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
+        0 ≤ j < ncodeunits(s)+1 || throw(BoundsError(s, j))
     end
     j < i && return 0
     @inbounds i, k = thisind(s, i), i
@@ -286,8 +300,8 @@ end
     @inbounds b = codeunit(s, i)
     @inbounds while true
         while true
-            (i += 1) ≤ n || return c
-            0xc0 ≤ b ≤ 0xf7 && break
+            (i += 1) ≤ n || return c
+            0xc0 ≤ b ≤ 0xf7 && break
             b = codeunit(s, i)
         end
         l = b
@@ -295,12 +309,12 @@ end
         c -= (x = b & 0xc0 == 0x80)
         x & (l ≥ 0xe0) || continue
 
-        (i += 1) ≤ n || return c
+        (i += 1) ≤ n || return c
         b = codeunit(s, i) # cont byte 2
         c -= (x = b & 0xc0 == 0x80)
         x & (l ≥ 0xf0) || continue
 
-        (i += 1) ≤ n || return c
+        (i += 1) ≤ n || return c
         b = codeunit(s, i) # cont byte 3
         c -= (b & 0xc0 == 0x80)
     end
diff --git a/base/strings/substring.jl b/base/strings/substring.jl
index 3e99cc7477446d..b8a0de19483269 100644
--- a/base/strings/substring.jl
+++ b/base/strings/substring.jl
@@ -7,6 +7,9 @@
 Like [`getindex`](@ref), but returns a view into the parent string `s`
 within range `i:j` or `r` respectively instead of making a copy.
 
+The [`@views`](@ref) macro converts any string slices `s[i:j]` into
+substrings `SubString(s, i, j)` in a block of code.
+
 # Examples
 ```jldoctest
 julia> SubString("abc", 1, 2)
@@ -25,7 +28,7 @@ struct SubString{T<:AbstractString} <: AbstractString
     ncodeunits::Int
 
     function SubString{T}(s::T, i::Int, j::Int) where T<:AbstractString
-        i ≤ j || return new(s, 0, 0)
+        i ≤ j || return new(s, 0, 0)
         @boundscheck begin
             checkbounds(s, i:j)
             @inbounds isvalid(s, i) || string_index_err(s, i)
@@ -55,6 +58,11 @@ convert(::Type{SubString{S}}, s::AbstractString) where {S<:AbstractString} =
     SubString(convert(S, s))
 convert(::Type{T}, s::T) where {T<:SubString} = s
 
+# Regex match allows only Union{String, SubString{String}} so define conversion to this type
+convert(::Type{Union{String, SubString{String}}}, s::String) = s
+convert(::Type{Union{String, SubString{String}}}, s::SubString{String}) = s
+convert(::Type{Union{String, SubString{String}}}, s::AbstractString) = convert(String, s)
+
 function String(s::SubString{String})
     parent = s.string
     copy = GC.@preserve parent unsafe_string(pointer(parent, s.offset+1), s.ncodeunits)
@@ -205,19 +213,37 @@ end
     return n
 end
 
-function string(a::Union{Char, String, SubString{String}}...)
+@inline function __unsafe_string!(out, s::Symbol, offs::Integer)
+    n = sizeof(s)
+    GC.@preserve s out unsafe_copyto!(pointer(out, offs), unsafe_convert(Ptr{UInt8},s), n)
+    return n
+end
+
+function string(a::Union{Char, String, SubString{String}, Symbol}...)
     n = 0
     for v in a
+        # 4 types is too many for automatic Union-splitting, so we split manually
+        # and allow one specializable call site per concrete type
         if v isa Char
             n += ncodeunits(v)
-        else
+        elseif v isa String
+            n += sizeof(v)
+        elseif v isa SubString{String}
             n += sizeof(v)
+        else
+            n += sizeof(v::Symbol)
         end
     end
     out = _string_n(n)
     offs = 1
     for v in a
-        offs += __unsafe_string!(out, v, offs)
+        if v isa Char
+            offs += __unsafe_string!(out, v, offs)
+        elseif v isa String || v isa SubString{String}
+            offs += __unsafe_string!(out, v, offs)
+        else
+            offs += __unsafe_string!(out, v::Symbol, offs)
+        end
     end
     return out
 end
@@ -252,4 +278,4 @@ function filter(f, s::Union{String, SubString{String}})
     return String(out)
 end
 
-getindex(s::AbstractString, r::UnitRange{<:Integer}) = SubString(s, r)
+getindex(s::AbstractString, r::AbstractUnitRange{<:Integer}) = SubString(s, r)
diff --git a/base/strings/unicode.jl b/base/strings/unicode.jl
index 235f85184d43e5..902a27b942d4e8 100644
--- a/base/strings/unicode.jl
+++ b/base/strings/unicode.jl
@@ -145,20 +145,43 @@ const UTF8PROC_STRIPMARK = (1<<13)
 
 utf8proc_error(result) = error(unsafe_string(ccall(:utf8proc_errmsg, Cstring, (Cssize_t,), result)))
 
-function utf8proc_map(str::String, options::Integer)
-    nwords = ccall(:utf8proc_decompose, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint),
-                   str, sizeof(str), C_NULL, 0, options)
-    nwords < 0 && utf8proc_error(nwords)
+# static wrapper around user callback function
+utf8proc_custom_func(codepoint::UInt32, callback::Any) =
+    UInt32(callback(codepoint))::UInt32
+
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::typeof(identity))
+    ret = ccall(:utf8proc_decompose, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint),
+                str, sizeof(str), buffer, nwords, options)
+    ret < 0 && utf8proc_error(ret)
+    return ret
+end
+function utf8proc_decompose(str, options, buffer, nwords, chartransform::T) where T
+    ret = ccall(:utf8proc_decompose_custom, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint, Ptr{Cvoid}, Ref{T}),
+                str, sizeof(str), buffer, nwords, options,
+                @cfunction(utf8proc_custom_func, UInt32, (UInt32, Ref{T})), chartransform)
+    ret < 0 && utf8proc_error(ret)
+    return ret
+end
+
+function utf8proc_map(str::Union{String,SubString{String}}, options::Integer, chartransform=identity)
+    nwords = utf8proc_decompose(str, options, C_NULL, 0, chartransform)
     buffer = Base.StringVector(nwords*4)
-    nwords = ccall(:utf8proc_decompose, Int, (Ptr{UInt8}, Int, Ptr{UInt8}, Int, Cint),
-                   str, sizeof(str), buffer, nwords, options)
-    nwords < 0 && utf8proc_error(nwords)
+    nwords = utf8proc_decompose(str, options, buffer, nwords, chartransform)
     nbytes = ccall(:utf8proc_reencode, Int, (Ptr{UInt8}, Int, Cint), buffer, nwords, options)
     nbytes < 0 && utf8proc_error(nbytes)
     return String(resize!(buffer, nbytes))
 end
 
-utf8proc_map(s::AbstractString, flags::Integer) = utf8proc_map(String(s), flags)
+# from julia_charmap.h, used by julia_chartransform in the Unicode stdlib
+const _julia_charmap = Dict{UInt32,UInt32}(
+    0x025B => 0x03B5,
+    0x00B5 => 0x03BC,
+    0x00B7 => 0x22C5,
+    0x0387 => 0x22C5,
+    0x2212 => 0x002D,
+)
+
+utf8proc_map(s::AbstractString, flags::Integer, chartransform=identity) = utf8proc_map(String(s), flags, chartransform)
 
 # Documented in Unicode module
 function normalize(
@@ -176,6 +199,7 @@ function normalize(
     casefold::Bool=false,
     lump::Bool=false,
     stripmark::Bool=false,
+    chartransform=identity,
 )
     flags = 0
     stable && (flags = flags | UTF8PROC_STABLE)
@@ -198,7 +222,7 @@ function normalize(
     casefold && (flags = flags | UTF8PROC_CASEFOLD)
     lump && (flags = flags | UTF8PROC_LUMP)
     stripmark && (flags = flags | UTF8PROC_STRIPMARK)
-    utf8proc_map(s, flags)
+    utf8proc_map(s, flags, chartransform)
 end
 
 function normalize(s::AbstractString, nf::Symbol)
@@ -246,10 +270,64 @@ julia> textwidth("March")
 """
 textwidth(s::AbstractString) = mapreduce(textwidth, +, s; init=0)
 
+"""
+    lowercase(c::AbstractChar)
+
+Convert `c` to lowercase.
+
+See also [`uppercase`](@ref), [`titlecase`](@ref).
+
+# Examples
+```jldoctest
+julia> lowercase('A')
+'a': ASCII/Unicode U+0061 (category Ll: Letter, lowercase)
+
+julia> lowercase('Ö')
+'ö': Unicode U+00F6 (category Ll: Letter, lowercase)
+```
+"""
 lowercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('A' <= c <= 'Z' ? c + 0x20 : c) :
     T(ccall(:utf8proc_tolower, UInt32, (UInt32,), c))
+
+"""
+    uppercase(c::AbstractChar)
+
+Convert `c` to uppercase.
+
+See also [`lowercase`](@ref), [`titlecase`](@ref).
+
+# Examples
+```jldoctest
+julia> uppercase('a')
+'A': ASCII/Unicode U+0041 (category Lu: Letter, uppercase)
+
+julia> uppercase('ê')
+'Ê': Unicode U+00CA (category Lu: Letter, uppercase)
+```
+"""
 uppercase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_toupper, UInt32, (UInt32,), c))
+
+"""
+    titlecase(c::AbstractChar)
+
+Convert `c` to titlecase. This may differ from uppercase for digraphs,
+compare the example below.
+
+See also [`uppercase`](@ref), [`lowercase`](@ref).
+
+# Examples
+```jldoctest
+julia> titlecase('a')
+'A': ASCII/Unicode U+0041 (category Lu: Letter, uppercase)
+
+julia> titlecase('ǆ')
+'ǅ': Unicode U+01C5 (category Lt: Letter, titlecase)
+
+julia> uppercase('ǆ')
+'Ǆ': Unicode U+01C4 (category Lu: Letter, uppercase)
+```
+"""
 titlecase(c::T) where {T<:AbstractChar} = isascii(c) ? ('a' <= c <= 'z' ? c - 0x20 : c) :
     T(ccall(:utf8proc_totitle, UInt32, (UInt32,), c))
 
@@ -280,11 +358,10 @@ isassigned(c) = UTF8PROC_CATEGORY_CN < category_code(c) <= UTF8PROC_CATEGORY_CO
 """
     islowercase(c::AbstractChar) -> Bool
 
-Tests whether a character is a lowercase letter.
-A character is classified as lowercase if it belongs to Unicode category Ll,
-Letter: Lowercase.
+Tests whether a character is a lowercase letter (according to the Unicode
+standard's `Lowercase` derived property).
 
-See also: [`isuppercase`](@ref).
+See also [`isuppercase`](@ref).
 
 # Examples
 ```jldoctest
@@ -298,18 +375,17 @@ julia> islowercase('❤')
 false
 ```
 """
-islowercase(c::AbstractChar) = category_code(c) == UTF8PROC_CATEGORY_LL
+islowercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_islower, Cint, (UInt32,), UInt32(c)))
 
 # true for Unicode upper and mixed case
 
 """
     isuppercase(c::AbstractChar) -> Bool
 
-Tests whether a character is an uppercase letter.
-A character is classified as uppercase if it belongs to Unicode category Lu,
-Letter: Uppercase, or Lt, Letter: Titlecase.
+Tests whether a character is an uppercase letter (according to the Unicode
+standard's `Uppercase` derived property).
 
-See also: [`islowercase`](@ref).
+See also [`islowercase`](@ref).
 
 # Examples
 ```jldoctest
@@ -323,17 +399,14 @@ julia> isuppercase('❤')
 false
 ```
 """
-function isuppercase(c::AbstractChar)
-    cat = category_code(c)
-    cat == UTF8PROC_CATEGORY_LU || cat == UTF8PROC_CATEGORY_LT
-end
+isuppercase(c::AbstractChar) = ismalformed(c) ? false : Bool(ccall(:utf8proc_isupper, Cint, (UInt32,), UInt32(c)))
 
 """
     iscased(c::AbstractChar) -> Bool
 
 Tests whether a character is cased, i.e. is lower-, upper- or title-cased.
 
-See also: [`islowercase`](@ref), [`isuppercase`](@ref).
+See also [`islowercase`](@ref), [`isuppercase`](@ref).
 """
 function iscased(c::AbstractChar)
     cat = category_code(c)
@@ -519,7 +592,7 @@ isxdigit(c::AbstractChar) = '0'<=c<='9' || 'a'<=c<='f' || 'A'<=c<='F'
 
 Return `s` with all characters converted to uppercase.
 
-See also: [`lowercase`](@ref), [`titlecase`](@ref), [`uppercasefirst`](@ref).
+See also [`lowercase`](@ref), [`titlecase`](@ref), [`uppercasefirst`](@ref).
 
 # Examples
 ```jldoctest
@@ -534,7 +607,7 @@ uppercase(s::AbstractString) = map(uppercase, s)
 
 Return `s` with all characters converted to lowercase.
 
-See also: [`uppercase`](@ref), [`titlecase`](@ref), [`lowercasefirst`](@ref).
+See also [`uppercase`](@ref), [`titlecase`](@ref), [`lowercasefirst`](@ref).
 
 # Examples
 ```jldoctest
@@ -550,13 +623,13 @@ lowercase(s::AbstractString) = map(lowercase, s)
 Capitalize the first character of each word in `s`;
 if `strict` is true, every other character is
 converted to lowercase, otherwise they are left unchanged.
-By default, all non-letters are considered as word separators;
+By default, all non-letters beginning a new grapheme are considered as word separators;
 a predicate can be passed as the `wordsep` keyword to determine
 which characters should be considered as word separators.
 See also [`uppercasefirst`](@ref) to capitalize only the first
 character in `s`.
 
-See also: [`uppercase`](@ref), [`lowercase`](@ref), [`uppercasefirst`](@ref).
+See also [`uppercase`](@ref), [`lowercase`](@ref), [`uppercasefirst`](@ref).
 
 # Examples
 ```jldoctest
@@ -570,17 +643,23 @@ julia> titlecase("a-a b-b", wordsep = c->c==' ')
 "A-a B-b"
 ```
 """
-function titlecase(s::AbstractString; wordsep::Function = !iscased, strict::Bool=true)
+function titlecase(s::AbstractString; wordsep::Function = !isletter, strict::Bool=true)
     startword = true
+    state = Ref{Int32}(0)
+    c0 = eltype(s)(0x00000000)
     b = IOBuffer()
     for c in s
-        if wordsep(c)
+        # Note: It would be better to have a word iterator following UAX#29,
+        # similar to our grapheme iterator, but utf8proc does not yet have
+        # this information.  At the very least we shouldn't break inside graphemes.
+        if isgraphemebreak!(state, c0, c) && wordsep(c)
             print(b, c)
             startword = true
         else
             print(b, startword ? titlecase(c) : strict ? lowercase(c) : c)
             startword = false
         end
+        c0 = c
     end
     return String(take!(b))
 end
@@ -592,8 +671,8 @@ Return `s` with the first character converted to uppercase (technically "title
 case" for Unicode). See also [`titlecase`](@ref) to capitalize the first
 character of every word in `s`.
 
-See also: [`lowercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
-[`titlecase`](@ref)
+See also [`lowercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
+[`titlecase`](@ref).
 
 # Examples
 ```jldoctest
@@ -614,8 +693,8 @@ end
 
 Return `s` with the first character converted to lowercase.
 
-See also: [`uppercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
-[`titlecase`](@ref)
+See also [`uppercasefirst`](@ref), [`uppercase`](@ref), [`lowercase`](@ref),
+[`titlecase`](@ref).
 
 # Examples
 ```jldoctest
@@ -680,7 +759,7 @@ function iterate(g::GraphemeIterator, i_=(Int32(0),firstindex(g.s)))
     y === nothing && return nothing
     c0, k = y
     while k <= ncodeunits(s) # loop until next grapheme is s[i:j]
-        c, ℓ = iterate(s, k)
+        c, ℓ = iterate(s, k)::NTuple{2,Any}
         isgraphemebreak!(state, c0, c) && break
         j = k
         k = ℓ
diff --git a/base/strings/util.jl b/base/strings/util.jl
index 24dfaa72e9767c..fb89303e557e48 100644
--- a/base/strings/util.jl
+++ b/base/strings/util.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-const Chars = Union{AbstractChar,Tuple{Vararg{<:AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}
+const Chars = Union{AbstractChar,Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}
 
 # starts with and ends with predicates
 
@@ -10,7 +10,7 @@ const Chars = Union{AbstractChar,Tuple{Vararg{<:AbstractChar}},AbstractVector{<:
 Return `true` if `s` starts with `prefix`. If `prefix` is a vector or set
 of characters, test whether the first character of `s` belongs to that set.
 
-See also [`endswith`](@ref).
+See also [`endswith`](@ref), [`contains`](@ref).
 
 # Examples
 ```jldoctest
@@ -19,10 +19,15 @@ true
 ```
 """
 function startswith(a::AbstractString, b::AbstractString)
-    a, b = Iterators.Stateful(a), Iterators.Stateful(b)
-    all(splat(==), zip(a, b)) && isempty(b)
+    i, j = iterate(a), iterate(b)
+    while true
+        j === nothing && return true # ran out of prefix: success!
+        i === nothing && return false # ran out of source: failure
+        i[1] == j[1] || return false # mismatch: failure
+        i, j = iterate(a, i[2]), iterate(b, j[2])
+    end
 end
-startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str) in chars
+startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str)::AbstractChar in chars
 
 """
     endswith(s::AbstractString, suffix::AbstractString)
@@ -30,7 +35,7 @@ startswith(str::AbstractString, chars::Chars) = !isempty(str) && first(str) in c
 Return `true` if `s` ends with `suffix`. If `suffix` is a vector or set of
 characters, test whether the last character of `s` belongs to that set.
 
-See also [`startswith`](@ref).
+See also [`startswith`](@ref), [`contains`](@ref).
 
 # Examples
 ```jldoctest
@@ -39,9 +44,14 @@ true
 ```
 """
 function endswith(a::AbstractString, b::AbstractString)
-    a = Iterators.Stateful(Iterators.reverse(a))
-    b = Iterators.Stateful(Iterators.reverse(b))
-    all(splat(==), zip(a, b)) && isempty(b)
+    a, b = Iterators.Reverse(a), Iterators.Reverse(b)
+    i, j = iterate(a), iterate(b)
+    while true
+        j === nothing && return true # ran out of suffix: success!
+        i === nothing && return false # ran out of source: failure
+        i[1] == j[1] || return false # mismatch: failure
+        i, j = iterate(a, i[2]), iterate(b, j[2])
+    end
 end
 endswith(str::AbstractString, chars::Chars) = !isempty(str) && last(str) in chars
 
@@ -51,7 +61,7 @@ function startswith(a::Union{String, SubString{String}},
     if ncodeunits(a) < cub
         false
     elseif _memcmp(a, b, sizeof(b)) == 0
-        nextind(a, cub) == cub + 1
+        nextind(a, cub) == cub + 1 # check that end of `b` doesn't match a partial character in `a`
     else
         false
     end
@@ -64,7 +74,7 @@ function endswith(a::Union{String, SubString{String}},
     if astart < 1
         false
     elseif GC.@preserve(a, _memcmp(pointer(a, astart), b, sizeof(b))) == 0
-        thisind(a, astart) == astart
+        thisind(a, astart) == astart # check that end of `b` doesn't match a partial character in `a`
     else
         false
     end
@@ -77,6 +87,8 @@ Return `true` if `haystack` contains `needle`.
 This is the same as `occursin(needle, haystack)`, but is provided for consistency with
 `startswith(haystack, needle)` and `endswith(haystack, needle)`.
 
+See also [`occursin`](@ref), [`in`](@ref), [`issubset`](@ref).
+
 # Examples
 ```jldoctest
 julia> contains("JuliaLang is pretty cool!", "Julia")
@@ -109,6 +121,14 @@ used to implement specialized methods.
 !!! compat "Julia 1.5"
     The single argument `endswith(suffix)` requires at least Julia 1.5.
 
+# Examples
+```jldoctest
+julia> endswith("Julia")("Ends with Julia")
+true
+
+julia> endswith("Julia")("JuliaLang")
+false
+```
 """
 endswith(s) = Base.Fix2(endswith, s)
 
@@ -124,6 +144,14 @@ used to implement specialized methods.
 !!! compat "Julia 1.5"
     The single argument `startswith(prefix)` requires at least Julia 1.5.
 
+# Examples
+```jldoctest
+julia> startswith("Julia")("JuliaLang")
+true
+
+julia> startswith("Julia")("Ends with Julia")
+false
+```
 """
 startswith(s) = Base.Fix2(startswith, s)
 
@@ -146,6 +174,8 @@ The call `chop(s)` removes the last character from `s`.
 If it is requested to remove more characters than `length(s)`
 then an empty string is returned.
 
+See also [`chomp`](@ref), [`startswith`](@ref), [`first`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = "March"
@@ -171,11 +201,98 @@ end
 # TODO: optimization for the default case based on
 # chop(s::AbstractString) = SubString(s, firstindex(s), prevind(s, lastindex(s)))
 
+"""
+    chopprefix(s::AbstractString, prefix::Union{AbstractString,Regex}) -> SubString
+
+Remove the prefix `prefix` from `s`. If `s` does not start with `prefix`, a string equal to `s` is returned.
+
+See also [`chopsuffix`](@ref).
+
+!!! compat "Julia 1.8"
+    This function is available as of Julia 1.8.
+
+# Examples
+```jldoctest
+julia> chopprefix("Hamburger", "Ham")
+"burger"
+
+julia> chopprefix("Hamburger", "hotdog")
+"Hamburger"
+```
+"""
+function chopprefix(s::AbstractString, prefix::AbstractString)
+    k = firstindex(s)
+    i, j = iterate(s), iterate(prefix)
+    while true
+        j === nothing && i === nothing && return SubString(s, 1, 0) # s == prefix: empty result
+        j === nothing && return @inbounds SubString(s, k) # ran out of prefix: success!
+        i === nothing && return SubString(s) # ran out of source: failure
+        i[1] == j[1] || return SubString(s) # mismatch: failure
+        k = i[2]
+        i, j = iterate(s, k), iterate(prefix, j[2])
+    end
+end
+
+function chopprefix(s::Union{String, SubString{String}},
+                    prefix::Union{String, SubString{String}})
+    if startswith(s, prefix)
+        SubString(s, 1 + ncodeunits(prefix))
+    else
+        SubString(s)
+    end
+end
+
+"""
+    chopsuffix(s::AbstractString, suffix::Union{AbstractString,Regex}) -> SubString
+
+Remove the suffix `suffix` from `s`. If `s` does not end with `suffix`, a string equal to `s` is returned.
+
+See also [`chopprefix`](@ref).
+
+!!! compat "Julia 1.8"
+    This function is available as of Julia 1.8.
+
+# Examples
+```jldoctest
+julia> chopsuffix("Hamburger", "er")
+"Hamburg"
+
+julia> chopsuffix("Hamburger", "hotdog")
+"Hamburger"
+```
+"""
+function chopsuffix(s::AbstractString, suffix::AbstractString)
+    a, b = Iterators.Reverse(s), Iterators.Reverse(suffix)
+    k = lastindex(s)
+    i, j = iterate(a), iterate(b)
+    while true
+        j === nothing && i === nothing && return SubString(s, 1, 0) # s == suffix: empty result
+        j === nothing && return @inbounds SubString(s, firstindex(s), k) # ran out of suffix: success!
+        i === nothing && return SubString(s) # ran out of source: failure
+        i[1] == j[1] || return SubString(s) # mismatch: failure
+        k = i[2]
+        i, j = iterate(a, k), iterate(b, j[2])
+    end
+end
+
+function chopsuffix(s::Union{String, SubString{String}},
+                    suffix::Union{String, SubString{String}})
+    if !isempty(suffix) && endswith(s, suffix)
+        astart = ncodeunits(s) - ncodeunits(suffix) + 1
+        @inbounds SubString(s, firstindex(s), prevind(s, astart))
+    else
+        SubString(s)
+    end
+end
+
+
 """
     chomp(s::AbstractString) -> SubString
 
 Remove a single trailing newline from a string.
 
+See also [`chop`](@ref).
+
 # Examples
 ```jldoctest
 julia> chomp("Hello\\n")
@@ -213,6 +330,8 @@ The default behaviour is to remove leading whitespace and delimiters: see
 The optional `chars` argument specifies which characters to remove: it can be a single
 character, or a vector or set of characters.
 
+See also [`strip`](@ref) and [`rstrip`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = lpad("March", 20)
@@ -245,6 +364,8 @@ The default behaviour is to remove trailing whitespace and delimiters: see
 The optional `chars` argument specifies which characters to remove: it can be a single
 character, or a vector or set of characters.
 
+See also [`strip`](@ref) and [`lstrip`](@ref).
+
 # Examples
 ```jldoctest
 julia> a = rpad("March", 20)
@@ -270,12 +391,14 @@ rstrip(s::AbstractString, chars::Chars) = rstrip(in(chars), s)
 Remove leading and trailing characters from `str`, either those specified by `chars` or
 those for which the function `pred` returns `true`.
 
-The default behaviour is to remove leading whitespace and delimiters: see
+The default behaviour is to remove leading and trailing whitespace and delimiters: see
 [`isspace`](@ref) for precise details.
 
 The optional `chars` argument specifies which characters to remove: it can be a single
 character, vector or set of characters.
 
+See also [`lstrip`](@ref) and [`rstrip`](@ref).
+
 !!! compat "Julia 1.2"
     The method which accepts a predicate function requires Julia 1.2 or later.
 
@@ -295,7 +418,7 @@ strip(f, s::AbstractString) = lstrip(f, rstrip(f, s))
     lpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') -> String
 
 Stringify `s` and pad the resulting string on the left with `p` to make it `n`
-characters (code points) long. If `s` is already `n` characters long, an equal
+characters (in [`textwidth`](@ref)) long. If `s` is already `n` characters long, an equal
 string is returned. Pad with spaces by default.
 
 # Examples
@@ -303,6 +426,8 @@ string is returned. Pad with spaces by default.
 julia> lpad("March", 10)
 "     March"
 ```
+!!! compat "Julia 1.7"
+    In Julia 1.7, this function was changed to use `textwidth` rather than a raw character (codepoint) count.
 """
 lpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') = lpad(string(s)::AbstractString, n, string(p))
 
@@ -312,9 +437,9 @@ function lpad(
     p::Union{AbstractChar,AbstractString}=' ',
 ) :: String
     n = Int(n)::Int
-    m = signed(n) - Int(length(s))::Int
+    m = signed(n) - Int(textwidth(s))::Int
     m ≤ 0 && return string(s)
-    l = length(p)
+    l = textwidth(p)
     q, r = divrem(m, l)
     r == 0 ? string(p^q, s) : string(p^q, first(p, r), s)
 end
@@ -323,7 +448,7 @@ end
     rpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') -> String
 
 Stringify `s` and pad the resulting string on the right with `p` to make it `n`
-characters (code points) long. If `s` is already `n` characters long, an equal
+characters (in [`textwidth`](@ref)) long. If `s` is already `n` characters long, an equal
 string is returned. Pad with spaces by default.
 
 # Examples
@@ -331,6 +456,8 @@ string is returned. Pad with spaces by default.
 julia> rpad("March", 20)
 "March               "
 ```
+!!! compat "Julia 1.7"
+    In Julia 1.7, this function was changed to use `textwidth` rather than a raw character (codepoint) count.
 """
 rpad(s, n::Integer, p::Union{AbstractChar,AbstractString}=' ') = rpad(string(s)::AbstractString, n, string(p))
 
@@ -340,13 +467,95 @@ function rpad(
     p::Union{AbstractChar,AbstractString}=' ',
 ) :: String
     n = Int(n)::Int
-    m = signed(n) - Int(length(s))::Int
+    m = signed(n) - Int(textwidth(s))::Int
     m ≤ 0 && return string(s)
-    l = length(p)
+    l = textwidth(p)
     q, r = divrem(m, l)
     r == 0 ? string(s, p^q) : string(s, p^q, first(p, r))
 end
 
+"""
+    eachsplit(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
+    eachsplit(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
+
+Split `str` on occurrences of the delimiter(s) `dlm` and return an iterator over the
+substrings.  `dlm` can be any of the formats allowed by [`findnext`](@ref)'s first argument
+(i.e. as a string, regular expression or a function), or as a single character or collection
+of characters.
+
+If `dlm` is omitted, it defaults to [`isspace`](@ref).
+
+The optional keyword arguments are:
+ - `limit`: the maximum size of the result. `limit=0` implies no maximum (default)
+ - `keepempty`: whether empty fields should be kept in the result. Default is `false` without
+   a `dlm` argument, `true` with a `dlm` argument.
+
+See also [`split`](@ref).
+
+!!! compat "Julia 1.8"
+    The `eachsplit` function requires at least Julia 1.8.
+
+# Examples
+```jldoctest
+julia> a = "Ma.rch"
+"Ma.rch"
+
+julia> collect(eachsplit(a, "."))
+2-element Vector{SubString}:
+ "Ma"
+ "rch"
+```
+"""
+function eachsplit end
+
+# Forcing specialization on `splitter` improves performance (roughly 30% decrease in runtime)
+# and prevents a major invalidation risk (1550 MethodInstances)
+struct SplitIterator{S<:AbstractString,F}
+    str::S
+    splitter::F
+    limit::Int
+    keepempty::Bool
+end
+
+eltype(::Type{<:SplitIterator}) = SubString
+
+IteratorSize(::Type{<:SplitIterator}) = SizeUnknown()
+
+# i: the starting index of the substring to be extracted
+# k: the starting index of the next substring to be extracted
+# n: the number of splits returned so far; always less than iter.limit - 1 (1 for the rest)
+function iterate(iter::SplitIterator, (i, k, n)=(firstindex(iter.str), firstindex(iter.str), 0))
+    i - 1 > ncodeunits(iter.str)::Int && return nothing
+    r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
+    while r !== nothing && n != iter.limit - 1 && first(r) <= ncodeunits(iter.str)
+        j, k = first(r), nextind(iter.str, last(r))::Int
+        k_ = k <= j ? nextind(iter.str, j) : k
+        if i < k
+            substr = @inbounds SubString(iter.str, i, prevind(iter.str, j)::Int)
+            (iter.keepempty || i < j) && return (substr, (k, k_, n + 1))
+            i = k
+        end
+        k = k_
+        r = findnext(iter.splitter, iter.str, k)::Union{Nothing,Int,UnitRange{Int}}
+    end
+    iter.keepempty || i <= ncodeunits(iter.str) || return nothing
+    @inbounds SubString(iter.str, i), (ncodeunits(iter.str) + 2, k, n + 1)
+end
+
+eachsplit(str::T, splitter; limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString} =
+    SplitIterator(str, splitter, limit, keepempty)
+
+eachsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+          limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachsplit(str, in(splitter); limit, keepempty)
+
+eachsplit(str::T, splitter::AbstractChar; limit::Integer=0, keepempty=true) where {T<:AbstractString} =
+    eachsplit(str, isequal(splitter); limit, keepempty)
+
+# a bit oddball, but standard behavior in Perl, Ruby & Python:
+eachsplit(str::AbstractString; limit::Integer=0, keepempty=false) =
+    eachsplit(str, isspace; limit, keepempty)
+
 """
     split(str::AbstractString, dlm; limit::Integer=0, keepempty::Bool=true)
     split(str::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -363,7 +572,7 @@ The optional keyword arguments are:
  - `keepempty`: whether empty fields should be kept in the result. Default is `false` without
    a `dlm` argument, `true` with a `dlm` argument.
 
-See also [`rsplit`](@ref).
+See also [`rsplit`](@ref), [`eachsplit`](@ref).
 
 # Examples
 ```jldoctest
@@ -376,50 +585,16 @@ julia> split(a, ".")
  "rch"
 ```
 """
-function split end
-
 function split(str::T, splitter;
                limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _split(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function split(str::T, splitter::Union{Tuple{Vararg{<:AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
-               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _split(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-function split(str::T, splitter::AbstractChar;
-               limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
-    _split(str, isequal(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
-end
-
-function _split(str::AbstractString, splitter, limit::Integer, keepempty::Bool, strs::Vector)
-    i = 1 # firstindex(str)
-    n = lastindex(str)::Int
-    r = findfirst(splitter,str)::Union{Nothing,Int,UnitRange{Int}}
-    if !isnothing(r)
-        j, k = first(r), nextind(str,last(r))::Int
-        while 0 < j <= n && length(strs) != limit-1
-            if i < k
-                if keepempty || i < j
-                    push!(strs, @inbounds SubString(str,i,prevind(str,j)::Int))
-                end
-                i = k
-            end
-            (k <= j) && (k = nextind(str,j)::Int)
-            r = findnext(splitter,str,k)::Union{Nothing,Int,UnitRange{Int}}
-            isnothing(r) && break
-            j, k = first(r), nextind(str,last(r))::Int
-        end
-    end
-    if keepempty || i <= ncodeunits(str)::Int
-        push!(strs, @inbounds SubString(str,i))
-    end
-    return strs
+    itr = eachsplit(str, splitter; limit, keepempty)
+    collect(T <: SubString ? T : SubString{T}, itr)
 end
 
 # a bit oddball, but standard behavior in Perl, Ruby & Python:
 split(str::AbstractString;
       limit::Integer=0, keepempty::Bool=false) =
-    split(str, isspace; limit=limit, keepempty=keepempty)
+    split(str, isspace; limit, keepempty)
 
 """
     rsplit(s::AbstractString; limit::Integer=0, keepempty::Bool=false)
@@ -456,7 +631,7 @@ function rsplit(str::T, splitter;
                 limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
     _rsplit(str, splitter, limit, keepempty, T <: SubString ? T[] : SubString{T}[])
 end
-function rsplit(str::T, splitter::Union{Tuple{Vararg{<:AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
+function rsplit(str::T, splitter::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}};
                 limit::Integer=0, keepempty::Bool=true) where {T<:AbstractString}
     _rsplit(str, in(splitter), limit, keepempty, T <: SubString ? T[] : SubString{T}[])
 end
@@ -488,68 +663,93 @@ _replace(io, repl::Function, str, r, pattern) =
 _replace(io, repl::Function, str, r, pattern::Function) =
     print(io, repl(str[first(r)]))
 
-replace(str::String, pat_repl::Pair{<:AbstractChar}; count::Integer=typemax(Int)) =
-    replace(str, isequal(first(pat_repl)) => last(pat_repl); count=count)
-
-replace(str::String, pat_repl::Pair{<:Union{Tuple{Vararg{<:AbstractChar}},
-                                            AbstractVector{<:AbstractChar},Set{<:AbstractChar}}};
-        count::Integer=typemax(Int)) =
-    replace(str, in(first(pat_repl)) => last(pat_repl), count=count)
-
 _pat_replacer(x) = x
 _free_pat_replacer(x) = nothing
 
-function replace(str::String, pat_repl::Pair; count::Integer=typemax(Int))
-    pattern, repl = pat_repl
+_pat_replacer(x::AbstractChar) = isequal(x)
+_pat_replacer(x::Union{Tuple{Vararg{AbstractChar}},AbstractVector{<:AbstractChar},Set{<:AbstractChar}}) = in(x)
+
+function replace(str::String, pat_repl::Vararg{Pair,N}; count::Integer=typemax(Int)) where N
     count == 0 && return str
     count < 0 && throw(DomainError(count, "`count` must be non-negative."))
     n = 1
-    e = lastindex(str)
+    e1 = nextind(str, lastindex(str)) # sizeof(str)
     i = a = firstindex(str)
-    pattern = _pat_replacer(pattern)
-    r = something(findnext(pattern,str,i), 0)
-    j, k = first(r), last(r)
-    if j == 0
-        _free_pat_replacer(pattern)
+    patterns = map(p -> _pat_replacer(first(p)), pat_repl)
+    replaces = map(last, pat_repl)
+    rs = map(patterns) do p
+        r = findnext(p, str, a)
+        if r === nothing || first(r) == 0
+            return e1+1:0
+        end
+        r isa Int && (r = r:r) # findnext / performance fix
+        return r
+    end
+    if all(>(e1), map(first, rs))
+        foreach(_free_pat_replacer, patterns)
         return str
     end
     out = IOBuffer(sizehint=floor(Int, 1.2sizeof(str)))
-    while j != 0
+    while true
+        p = argmin(map(first, rs)) # TODO: or argmin(rs), to pick the shortest first match ?
+        r = rs[p]
+        j, k = first(r), last(r)
+        j > e1 && break
         if i == a || i <= k
+            # copy out preserved portion
             GC.@preserve str unsafe_write(out, pointer(str, i), UInt(j-i))
-            _replace(out, repl, str, r, pattern)
+            # copy out replacement string
+            _replace(out, replaces[p], str, r, patterns[p])
         end
         if k < j
             i = j
-            j > e && break
+            j == e1 && break
             k = nextind(str, j)
         else
             i = k = nextind(str, k)
         end
-        r = something(findnext(pattern,str,k), 0)
-        r === 0:-1 || n == count && break
-        j, k = first(r), last(r)
+        n == count && break
+        let k = k
+            rs = map(patterns, rs) do p, r
+                if first(r) < k
+                    r = findnext(p, str, k)
+                    if r === nothing || first(r) == 0
+                        return e1+1:0
+                    end
+                    r isa Int && (r = r:r) # findnext / performance fix
+                end
+                return r
+            end
+        end
         n += 1
     end
-    _free_pat_replacer(pattern)
-    write(out, SubString(str,i))
-    String(take!(out))
+    foreach(_free_pat_replacer, patterns)
+    write(out, SubString(str, i))
+    return String(take!(out))
 end
 
+
 """
-    replace(s::AbstractString, pat=>r; [count::Integer])
+    replace(s::AbstractString, pat=>r, [pat2=>r2, ...]; [count::Integer])
 
 Search for the given pattern `pat` in `s`, and replace each occurrence with `r`.
 If `count` is provided, replace at most `count` occurrences.
 `pat` may be a single character, a vector or a set of characters, a string,
 or a regular expression.
 If `r` is a function, each occurrence is replaced with `r(s)`
-where `s` is the matched substring (when `pat` is a `Regex` or `AbstractString`) or
+where `s` is the matched substring (when `pat` is a `AbstractPattern` or `AbstractString`) or
 character (when `pat` is an `AbstractChar` or a collection of `AbstractChar`).
 If `pat` is a regular expression and `r` is a [`SubstitutionString`](@ref), then capture group
 references in `r` are replaced with the corresponding matched text.
 To remove instances of `pat` from `string`, set `r` to the empty `String` (`""`).
 
+Multiple patterns can be specified, and they will be applied left-to-right
+simultaneously, so only one pattern will be applied to any character, and the
+patterns will only be applied to the input text, not the replacements.
+
+!!! compat "Julia 1.7"
+    Support for multiple patterns requires version 1.7.
+
 # Examples
 ```jldoctest
 julia> replace("Python is a programming language.", "Python" => "Julia")
@@ -563,25 +763,33 @@ julia> replace("The quick foxes run quickly.", "quick" => "", count=1)
 
 julia> replace("The quick foxes run quickly.", r"fox(es)?" => s"bus\\1")
 "The quick buses run quickly."
+
+julia> replace("abcabc", "a" => "b", "b" => "c", r".+" => "a")
+"bca"
 ```
 """
-replace(s::AbstractString, pat_f::Pair; count=typemax(Int)) =
-    replace(String(s), pat_f, count=count)
+replace(s::AbstractString, pat_f::Pair...; count=typemax(Int)) =
+    replace(String(s), pat_f..., count=count)
 
 # TODO: allow transform as the first argument to replace?
 
 # hex <-> bytes conversion
 
 """
-    hex2bytes(s::Union{AbstractString,AbstractVector{UInt8}})
+    hex2bytes(itr)
 
-Given a string or array `s` of ASCII codes for a sequence of hexadecimal digits, returns a
+Given an iterable `itr` of ASCII codes for a sequence of hexadecimal digits, returns a
 `Vector{UInt8}` of bytes  corresponding to the binary representation: each successive pair
-of hexadecimal digits in `s` gives the value of one byte in the return vector.
+of hexadecimal digits in `itr` gives the value of one byte in the return vector.
 
-The length of `s` must be even, and the returned array has half of the length of `s`.
+The length of `itr` must be even, and the returned array has half of the length of `itr`.
 See also [`hex2bytes!`](@ref) for an in-place version, and [`bytes2hex`](@ref) for the inverse.
 
+!!! compat "Julia 1.7"
+    Calling `hex2bytes` with iterators producing `UInt8` values requires
+    Julia 1.7 or later. In earlier versions, you can `collect` the iterator
+    before calling `hex2bytes`.
+
 # Examples
 ```jldoctest
 julia> s = string(12345, base = 16)
@@ -610,46 +818,64 @@ julia> hex2bytes(a)
 """
 function hex2bytes end
 
-hex2bytes(s::AbstractString) = hex2bytes(String(s))
-hex2bytes(s::Union{String,AbstractVector{UInt8}}) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
+hex2bytes(s) = hex2bytes!(Vector{UInt8}(undef, length(s) >> 1), s)
 
-_firstbyteidx(s::String) = 1
-_firstbyteidx(s::AbstractVector{UInt8}) = first(eachindex(s))
-_lastbyteidx(s::String) = sizeof(s)
-_lastbyteidx(s::AbstractVector{UInt8}) = lastindex(s)
+# special case - valid bytes are checked in the generic implementation
+function hex2bytes!(dest::AbstractArray{UInt8}, s::String)
+    sizeof(s) != length(s) && throw(ArgumentError("input string must consist of hexadecimal characters only"))
+
+    hex2bytes!(dest, transcode(UInt8, s))
+end
 
 """
-    hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
+    hex2bytes!(dest::AbstractVector{UInt8}, itr)
 
-Convert an array `s` of bytes representing a hexadecimal string to its binary
+Convert an iterable `itr` of bytes representing a hexadecimal string to its binary
 representation, similar to [`hex2bytes`](@ref) except that the output is written in-place
-in `d`.   The length of `s` must be exactly twice the length of `d`.
-"""
-function hex2bytes!(d::AbstractVector{UInt8}, s::Union{String,AbstractVector{UInt8}})
-    if 2length(d) != sizeof(s)
-        isodd(sizeof(s)) && throw(ArgumentError("input hex array must have even length"))
-        throw(ArgumentError("output array must be half length of input array"))
-    end
-    j = first(eachindex(d)) - 1
-    for i = _firstbyteidx(s):2:_lastbyteidx(s)
-        @inbounds d[j += 1] = number_from_hex(_nthbyte(s,i)) << 4 + number_from_hex(_nthbyte(s,i+1))
+to `dest`. The length of `dest` must be half the length of `itr`.
+
+!!! compat "Julia 1.7"
+    Calling hex2bytes! with iterators producing UInt8 requires
+    version 1.7. In earlier versions, you can collect the iterable
+    before calling instead.
+"""
+function hex2bytes!(dest::AbstractArray{UInt8}, itr)
+    isodd(length(itr)) && throw(ArgumentError("length of iterable must be even"))
+    @boundscheck 2*length(dest) != length(itr) && throw(ArgumentError("length of output array must be half of the length of input iterable"))
+    iszero(length(itr)) && return dest
+
+    next = iterate(itr)
+    @inbounds for i in eachindex(dest)
+        x,state = next::NTuple{2,Any}
+        y,state = iterate(itr, state)::NTuple{2,Any}
+        next = iterate(itr, state)
+        dest[i] = number_from_hex(x) << 4 + number_from_hex(y)
     end
-    return d
+
+    return dest
 end
 
-@inline number_from_hex(c) =
-    (UInt8('0') <= c <= UInt8('9')) ? c - UInt8('0') :
-    (UInt8('A') <= c <= UInt8('F')) ? c - (UInt8('A') - 0x0a) :
-    (UInt8('a') <= c <= UInt8('f')) ? c - (UInt8('a') - 0x0a) :
+@inline number_from_hex(c::AbstractChar) = number_from_hex(Char(c))
+@inline number_from_hex(c::Char) = number_from_hex(UInt8(c))
+@inline function number_from_hex(c::UInt8)
+    UInt8('0') <= c <= UInt8('9') && return c - UInt8('0')
+    c |= 0b0100000
+    UInt8('a') <= c <= UInt8('f') && return c - UInt8('a') + 0x0a
     throw(ArgumentError("byte is not an ASCII hexadecimal digit"))
+end
 
 """
-    bytes2hex(a::AbstractArray{UInt8}) -> String
-    bytes2hex(io::IO, a::AbstractArray{UInt8})
+    bytes2hex(itr) -> String
+    bytes2hex(io::IO, itr)
 
-Convert an array `a` of bytes to its hexadecimal string representation, either
-returning a `String` via `bytes2hex(a)` or writing the string to an `io` stream
-via `bytes2hex(io, a)`.  The hexadecimal characters are all lowercase.
+Convert an iterator `itr` of bytes to its hexadecimal string representation, either
+returning a `String` via `bytes2hex(itr)` or writing the string to an `io` stream
+via `bytes2hex(io, itr)`.  The hexadecimal characters are all lowercase.
+
+!!! compat "Julia 1.7"
+    Calling `bytes2hex` with arbitrary iterators producing `UInt8` values requires
+    Julia 1.7 or later. In earlier versions, you can `collect` the iterator
+    before calling `bytes2hex`.
 
 # Examples
 ```jldoctest
@@ -667,19 +893,22 @@ julia> bytes2hex(b)
 """
 function bytes2hex end
 
-function bytes2hex(a::AbstractArray{UInt8})
-    b = Base.StringVector(2*length(a))
-    @inbounds for (i, x) in enumerate(a)
+function bytes2hex(itr)
+    eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
+    b = Base.StringVector(2*length(itr))
+    @inbounds for (i, x) in enumerate(itr)
         b[2i - 1] = hex_chars[1 + x >> 4]
         b[2i    ] = hex_chars[1 + x & 0xf]
     end
     return String(b)
 end
 
-bytes2hex(io::IO, a::AbstractArray{UInt8}) =
-    for x in a
+function bytes2hex(io::IO, itr)
+    eltype(itr) === UInt8 || throw(ArgumentError("eltype of iterator not UInt8"))
+    for x in itr
         print(io, Char(hex_chars[1 + x >> 4]), Char(hex_chars[1 + x & 0xf]))
     end
+end
 
 # check for pure ASCII-ness
 function ascii(s::String)
@@ -696,6 +925,8 @@ end
 Convert a string to `String` type and check that it contains only ASCII data, otherwise
 throwing an `ArgumentError` indicating the position of the first non-ASCII byte.
 
+See also the [`isascii`](@ref) predicate to filter or replace non-ASCII characters.
+
 # Examples
 ```jldoctest
 julia> ascii("abcdeγfgh")
@@ -708,3 +939,12 @@ julia> ascii("abcdefgh")
 ```
 """
 ascii(x::AbstractString) = ascii(String(x))
+
+Base.rest(s::Union{String,SubString{String}}, i=1) = SubString(s, i)
+function Base.rest(s::AbstractString, st...)
+    io = IOBuffer()
+    for c in Iterators.rest(s, st...)
+        print(io, c)
+    end
+    return String(take!(io))
+end
diff --git a/base/subarray.jl b/base/subarray.jl
index de99ba48f275d8..ff2408bb48534a 100644
--- a/base/subarray.jl
+++ b/base/subarray.jl
@@ -17,22 +17,22 @@ struct SubArray{T,N,P,I,L} <: AbstractArray{T,N}
     offset1::Int       # for linear indexing and pointer, only valid when L==true
     stride1::Int       # used only for linear indexing
     function SubArray{T,N,P,I,L}(parent, indices, offset1, stride1) where {T,N,P,I,L}
-        @_inline_meta
+        @inline
         check_parent_index_match(parent, indices)
         new(parent, indices, offset1, stride1)
     end
 end
 # Compute the linear indexability of the indices, and combine it with the linear indexing of the parent
 function SubArray(parent::AbstractArray, indices::Tuple)
-    @_inline_meta
+    @inline
     SubArray(IndexStyle(viewindexing(indices), IndexStyle(parent)), parent, ensure_indexable(indices), index_dimsum(indices...))
 end
 function SubArray(::IndexCartesian, parent::P, indices::I, ::NTuple{N,Any}) where {P,I,N}
-    @_inline_meta
+    @inline
     SubArray{eltype(P), N, P, I, false}(parent, indices, 0, 0)
 end
 function SubArray(::IndexLinear, parent::P, indices::I, ::NTuple{N,Any}) where {P,I,N}
-    @_inline_meta
+    @inline
     # Compute the stride and offset
     stride1 = compute_stride1(parent, indices)
     SubArray{eltype(P), N, P, I, true}(parent, indices, compute_offset1(parent, stride1, indices), stride1)
@@ -46,9 +46,9 @@ check_parent_index_match(parent, ::NTuple{N, Bool}) where {N} =
 # This computes the linear indexing compatibility for a given tuple of indices
 viewindexing(I::Tuple{}) = IndexLinear()
 # Leading scalar indices simply increase the stride
-viewindexing(I::Tuple{ScalarIndex, Vararg{Any}}) = (@_inline_meta; viewindexing(tail(I)))
+viewindexing(I::Tuple{ScalarIndex, Vararg{Any}}) = (@inline; viewindexing(tail(I)))
 # Slices may begin a section which may be followed by any number of Slices
-viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@_inline_meta; viewindexing(tail(I)))
+viewindexing(I::Tuple{Slice, Slice, Vararg{Any}}) = (@inline; viewindexing(tail(I)))
 # A UnitRange can follow Slices, but only if all other indices are scalar
 viewindexing(I::Tuple{Slice, AbstractUnitRange, Vararg{ScalarIndex}}) = IndexLinear()
 viewindexing(I::Tuple{Slice, Slice, Vararg{ScalarIndex}}) = IndexLinear() # disambiguate
@@ -60,16 +60,20 @@ viewindexing(I::Tuple{Vararg{Any}}) = IndexCartesian()
 viewindexing(I::Tuple{AbstractArray, Vararg{Any}}) = IndexCartesian()
 
 # Simple utilities
-size(V::SubArray) = (@_inline_meta; map(n->Int(unsafe_length(n)), axes(V)))
+size(V::SubArray) = (@inline; map(length, axes(V)))
 
 similar(V::SubArray, T::Type, dims::Dims) = similar(V.parent, T, dims)
 
 sizeof(V::SubArray) = length(V) * sizeof(eltype(V))
 sizeof(V::SubArray{<:Any,<:Any,<:Array}) = length(V) * elsize(V.parent)
 
-elsize(::Type{<:SubArray{<:Any,<:Any,P}}) where {P<:Array} = elsize(P)
-
-copy(V::SubArray) = V.parent[V.indices...]
+function Base.copy(V::SubArray)
+    v = V.parent[V.indices...]
+    ndims(V) == 0 || return v
+    x = similar(V) # ensure proper type of x
+    x[] = v
+    return x
+end
 
 parent(V::SubArray) = V.parent
 parentindices(V::SubArray) = V.indices
@@ -92,7 +96,7 @@ julia> parentindices(V)
 (1, Base.Slice(Base.OneTo(2)))
 ```
 """
-parentindices(a::AbstractArray) = map(OneTo, size(a))
+parentindices(a::AbstractArray) = map(oneto, size(a))
 
 ## Aliasing detection
 dataids(A::SubArray) = (dataids(A.parent)..., _splatmap(dataids, A.indices)...)
@@ -109,7 +113,7 @@ function unaliascopy(V::SubArray{T,N,A,I,LD}) where {T,N,A<:Array,I<:Tuple{Varar
 end
 # Transform indices to be "dense"
 _trimmedindex(i::Real) = oftype(i, 1)
-_trimmedindex(i::AbstractUnitRange) = oftype(i, OneTo(length(i)))
+_trimmedindex(i::AbstractUnitRange) = oftype(i, oneto(length(i)))
 _trimmedindex(i::AbstractArray) = oftype(i, reshape(eachindex(IndexLinear(), i), axes(i)))
 
 ## SubArray creation
@@ -124,10 +128,22 @@ _maybe_reshape_parent(A::AbstractArray, ::NTuple{N, Bool}) where {N} = reshape(A
 """
     view(A, inds...)
 
-Like [`getindex`](@ref), but returns a view into the parent array `A` with the
-given indices instead of making a copy.  Calling [`getindex`](@ref) or
-[`setindex!`](@ref) on the returned `SubArray` computes the
-indices to the parent array on the fly without checking bounds.
+Like [`getindex`](@ref), but returns a lightweight array that lazily references
+(or is effectively a _view_ into) the parent array `A` at the given index or indices
+`inds` instead of eagerly extracting elements or constructing a copied subset.
+Calling [`getindex`](@ref) or [`setindex!`](@ref) on the returned value
+(often a [`SubArray`](@ref)) computes the indices to access or modify the
+parent array on the fly.  The behavior is undefined if the shape of the parent array is
+changed after `view` is called because there is no bound check for the parent array; e.g.,
+it may cause a segmentation fault.
+
+Some immutable parent arrays (like ranges) may choose to simply
+recompute a new array in some circumstances instead of returning
+a `SubArray` if doing so is efficient and provides compatible semantics.
+
+!!! compat "Julia 1.6"
+    In Julia 1.6 or later, `view` can be called on an `AbstractString`, returning a
+    `SubString`.
 
 # Examples
 ```jldoctest
@@ -150,10 +166,13 @@ julia> A # Note A has changed even though we modified b
 2×2 Matrix{Int64}:
  0  2
  0  4
+
+julia> view(2:5, 2:3) # returns a range as type is immutable
+3:4
 ```
 """
 function view(A::AbstractArray, I::Vararg{Any,N}) where {N}
-    @_inline_meta
+    @inline
     J = map(i->unalias(A,i), to_indices(A, I))
     @boundscheck checkbounds(A, J...)
     unsafe_view(_maybe_reshape_parent(A, index_ndims(J...)), J...)
@@ -185,8 +204,14 @@ function view(r1::LinRange, r2::OrdinalRange{<:Integer})
     getindex(r1, r2)
 end
 
+# getindex(r::AbstractRange, ::Colon) returns a copy of the range, and we may do the same for a view
+function view(r1::AbstractRange, c::Colon)
+    @_propagate_inbounds_meta
+    getindex(r1, c)
+end
+
 function unsafe_view(A::AbstractArray, I::Vararg{ViewIndex,N}) where {N}
-    @_inline_meta
+    @inline
     SubArray(A, I)
 end
 # When we take the view of a view, it's often possible to "reindex" the parent
@@ -196,16 +221,16 @@ end
 # So we use _maybe_reindex to figure out if there are any arrays of
 # `CartesianIndex`, and if so, we punt and keep two layers of indirection.
 unsafe_view(V::SubArray, I::Vararg{ViewIndex,N}) where {N} =
-    (@_inline_meta; _maybe_reindex(V, I))
-_maybe_reindex(V, I) = (@_inline_meta; _maybe_reindex(V, I, I))
+    (@inline; _maybe_reindex(V, I))
+_maybe_reindex(V, I) = (@inline; _maybe_reindex(V, I, I))
 _maybe_reindex(V, I, ::Tuple{AbstractArray{<:AbstractCartesianIndex}, Vararg{Any}}) =
-    (@_inline_meta; SubArray(V, I))
+    (@inline; SubArray(V, I))
 # But allow arrays of CartesianIndex{1}; they behave just like arrays of Ints
 _maybe_reindex(V, I, A::Tuple{AbstractArray{<:AbstractCartesianIndex{1}}, Vararg{Any}}) =
-    (@_inline_meta; _maybe_reindex(V, I, tail(A)))
-_maybe_reindex(V, I, A::Tuple{Any, Vararg{Any}}) = (@_inline_meta; _maybe_reindex(V, I, tail(A)))
+    (@inline; _maybe_reindex(V, I, tail(A)))
+_maybe_reindex(V, I, A::Tuple{Any, Vararg{Any}}) = (@inline; _maybe_reindex(V, I, tail(A)))
 function _maybe_reindex(V, I, ::Tuple{})
-    @_inline_meta
+    @inline
     @inbounds idxs = to_indices(V.parent, reindex(V.indices, I))
     SubArray(V.parent, idxs)
 end
@@ -252,7 +277,7 @@ end
 # In general, we simply re-index the parent indices by the provided ones
 SlowSubArray{T,N,P,I} = SubArray{T,N,P,I,false}
 function getindex(V::SubArray{T,N}, I::Vararg{Int,N}) where {T,N}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, I...)
     @inbounds r = V.parent[reindex(V.indices, I)...]
     r
@@ -261,7 +286,7 @@ end
 # But SubArrays with fast linear indexing pre-compute a stride and offset
 FastSubArray{T,N,P,I} = SubArray{T,N,P,I,true}
 function getindex(V::FastSubArray, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + V.stride1*i]
     r
@@ -271,7 +296,7 @@ end
 FastContiguousSubArray{T,N,P,I<:Union{Tuple{Union{Slice, AbstractUnitRange}, Vararg{Any}},
                                       Tuple{Vararg{ScalarIndex}}}} = SubArray{T,N,P,I,true}
 function getindex(V::FastContiguousSubArray, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + i]
     r
@@ -279,13 +304,13 @@ end
 # For vector views with linear indexing, we disambiguate to favor the stride/offset
 # computation as that'll generally be faster than (or just as fast as) re-indexing into a range.
 function getindex(V::FastSubArray{<:Any, 1}, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + V.stride1*i]
     r
 end
 function getindex(V::FastContiguousSubArray{<:Any, 1}, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds r = V.parent[V.offset1 + i]
     r
@@ -293,31 +318,31 @@ end
 
 # Indexed assignment follows the same pattern as `getindex` above
 function setindex!(V::SubArray{T,N}, x, I::Vararg{Int,N}) where {T,N}
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, I...)
     @inbounds V.parent[reindex(V.indices, I)...] = x
     V
 end
 function setindex!(V::FastSubArray, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + V.stride1*i] = x
     V
 end
 function setindex!(V::FastContiguousSubArray, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + i] = x
     V
 end
 function setindex!(V::FastSubArray{<:Any, 1}, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + V.stride1*i] = x
     V
 end
 function setindex!(V::FastContiguousSubArray{<:Any, 1}, x, i::Int)
-    @_inline_meta
+    @inline
     @boundscheck checkbounds(V, i)
     @inbounds V.parent[V.offset1 + i] = x
     V
@@ -339,11 +364,11 @@ substrides(strds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("strides is
 stride(V::SubArray, d::Integer) = d <= ndims(V) ? strides(V)[d] : strides(V)[end] * size(V)[end]
 
 compute_stride1(parent::AbstractArray, I::NTuple{N,Any}) where {N} =
-    (@_inline_meta; compute_stride1(1, fill_to_length(axes(parent), OneTo(1), Val(N)), I))
+    (@inline; compute_stride1(1, fill_to_length(axes(parent), OneTo(1), Val(N)), I))
 compute_stride1(s, inds, I::Tuple{}) = s
 compute_stride1(s, inds, I::Tuple{Vararg{ScalarIndex}}) = s
 compute_stride1(s, inds, I::Tuple{ScalarIndex, Vararg{Any}}) =
-    (@_inline_meta; compute_stride1(s*unsafe_length(inds[1]), tail(inds), tail(I)))
+    (@inline; compute_stride1(s*length(inds[1]), tail(inds), tail(I)))
 compute_stride1(s, inds, I::Tuple{AbstractRange, Vararg{Any}}) = s*step(I[1])
 compute_stride1(s, inds, I::Tuple{Slice, Vararg{Any}}) = s
 compute_stride1(s, inds, I::Tuple{Any, Vararg{Any}}) = throw(ArgumentError("invalid strided index type $(typeof(I[1]))"))
@@ -366,42 +391,42 @@ end
 # The running sum is `f`; the cumulative stride product is `s`.
 # If the parent is a vector, then we offset the parent's own indices with parameters of I
 compute_offset1(parent::AbstractVector, stride1::Integer, I::Tuple{AbstractRange}) =
-    (@_inline_meta; first(I[1]) - stride1*first(axes1(I[1])))
+    (@inline; first(I[1]) - stride1*first(axes1(I[1])))
 # If the result is one-dimensional and it's a Colon, then linear
 # indexing uses the indices along the given dimension.
 # If the result is one-dimensional and it's a range, then linear
 # indexing might be offset if the index itself is offset
-# Otherwise linear indexing always starts with 1.
+# Otherwise linear indexing always matches the parent.
 compute_offset1(parent, stride1::Integer, I::Tuple) =
-    (@_inline_meta; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
+    (@inline; compute_offset1(parent, stride1, find_extended_dims(1, I...), find_extended_inds(I...), I))
 compute_offset1(parent, stride1::Integer, dims::Tuple{Int}, inds::Tuple{Slice}, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1*first(axes(parent, dims[1])))  # index-preserving case
+    (@inline; compute_linindex(parent, I) - stride1*first(axes(parent, dims[1])))  # index-preserving case
 compute_offset1(parent, stride1::Integer, dims, inds::Tuple{AbstractRange}, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1*first(axes1(inds[1]))) # potentially index-offsetting case
+    (@inline; compute_linindex(parent, I) - stride1*first(axes1(inds[1]))) # potentially index-offsetting case
 compute_offset1(parent, stride1::Integer, dims, inds, I::Tuple) =
-    (@_inline_meta; compute_linindex(parent, I) - stride1)  # linear indexing starts with 1
+    (@inline; compute_linindex(parent, I) - stride1)
 function compute_linindex(parent, I::NTuple{N,Any}) where N
-    @_inline_meta
+    @inline
     IP = fill_to_length(axes(parent), OneTo(1), Val(N))
-    compute_linindex(1, 1, IP, I)
+    compute_linindex(first(LinearIndices(parent)), 1, IP, I)
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{ScalarIndex, Vararg{Any}})
-    @_inline_meta
+    @inline
     Δi = I[1]-first(IP[1])
-    compute_linindex(f + Δi*s, s*unsafe_length(IP[1]), tail(IP), tail(I))
+    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 function compute_linindex(f, s, IP::Tuple, I::Tuple{Any, Vararg{Any}})
-    @_inline_meta
+    @inline
     Δi = first(I[1])-first(IP[1])
-    compute_linindex(f + Δi*s, s*unsafe_length(IP[1]), tail(IP), tail(I))
+    compute_linindex(f + Δi*s, s*length(IP[1]), tail(IP), tail(I))
 end
 compute_linindex(f, s, IP::Tuple, I::Tuple{}) = f
 
-find_extended_dims(dim, ::ScalarIndex, I...) = (@_inline_meta; find_extended_dims(dim + 1, I...))
-find_extended_dims(dim, i1, I...) = (@_inline_meta; (dim, find_extended_dims(dim + 1, I...)...))
+find_extended_dims(dim, ::ScalarIndex, I...) = (@inline; find_extended_dims(dim + 1, I...))
+find_extended_dims(dim, i1, I...) = (@inline; (dim, find_extended_dims(dim + 1, I...)...))
 find_extended_dims(dim) = ()
-find_extended_inds(::ScalarIndex, I...) = (@_inline_meta; find_extended_inds(I...))
-find_extended_inds(i1, I...) = (@_inline_meta; (i1, find_extended_inds(I...)...))
+find_extended_inds(::ScalarIndex, I...) = (@inline; find_extended_inds(I...))
+find_extended_inds(i1, I...) = (@inline; (i1, find_extended_inds(I...)...))
 find_extended_inds() = ()
 
 function unsafe_convert(::Type{Ptr{T}}, V::SubArray{T,N,P,<:Tuple{Vararg{RangeIndex}}}) where {T,N,P}
@@ -423,10 +448,10 @@ end
 # indices are taken from the range/vector
 # Since bounds-checking is performance-critical and uses
 # indices, it's worth optimizing these implementations thoroughly
-axes(S::SubArray) = (@_inline_meta; _indices_sub(S.indices...))
-_indices_sub(::Real, I...) = (@_inline_meta; _indices_sub(I...))
+axes(S::SubArray) = (@inline; _indices_sub(S.indices...))
+_indices_sub(::Real, I...) = (@inline; _indices_sub(I...))
 _indices_sub() = ()
 function _indices_sub(i1::AbstractArray, I...)
-    @_inline_meta
-    (unsafe_indices(i1)..., _indices_sub(I...)...)
+    @inline
+    (axes(i1)..., _indices_sub(I...)...)
 end
diff --git a/base/summarysize.jl b/base/summarysize.jl
index 1c83089c325bac..849edee2064541 100644
--- a/base/summarysize.jl
+++ b/base/summarysize.jl
@@ -17,6 +17,20 @@ Compute the amount of memory, in bytes, used by all unique objects reachable fro
 - `exclude`: specifies the types of objects to exclude from the traversal.
 - `chargeall`: specifies the types of objects to always charge the size of all of their
   fields, even if those fields would normally be excluded.
+
+See also [`sizeof`](@ref).
+
+# Examples
+```jldoctest
+julia> Base.summarysize(1.0)
+8
+
+julia> Base.summarysize(Ref(rand(100)))
+848
+
+julia> sizeof(Ref(rand(100)))
+8
+```
 """
 function summarysize(obj;
                      exclude = Union{DataType, Core.TypeName, Core.MethodInstance},
@@ -120,12 +134,13 @@ function (ss::SummarySize)(obj::Array)
     if !haskey(ss.seen, datakey)
         ss.seen[datakey] = true
         dsize = Core.sizeof(obj)
-        if isbitsunion(eltype(obj))
+        T = eltype(obj)
+        if isbitsunion(T)
             # add 1 union selector byte for each element
             dsize += length(obj)
         end
         size += dsize
-        if !isempty(obj) && !Base.allocatedinline(eltype(obj))
+        if !isempty(obj) && T !== Symbol && (!Base.allocatedinline(T) || (T isa DataType && !Base.datatype_pointerfree(T)))
             push!(ss.frontier_x, obj)
             push!(ss.frontier_i, 1)
         end
@@ -173,8 +188,9 @@ function (ss::SummarySize)(obj::Task)
     end
     size += ss(obj.storage)::Int
     size += ss(obj.donenotify)::Int
-    size += ss(obj.exception)::Int
     size += ss(obj.result)::Int
     # TODO: add stack size, and possibly traverse stack roots
     return size
 end
+
+(ss::SummarySize)(obj::BigInt) = _summarysize(ss, obj) + obj.alloc*sizeof(Base.GMP.Limb)
diff --git a/base/sysimg.jl b/base/sysimg.jl
index 893af845771771..36c40e4ae748a2 100644
--- a/base/sysimg.jl
+++ b/base/sysimg.jl
@@ -19,45 +19,62 @@ Base.init_load_path()
 if Base.is_primary_base_module
 # load some stdlib packages but don't put their names in Main
 let
-    # Stdlibs manually sorted in top down order
+    # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl
+    # Run with the `--exclude-jlls` option to filter out all JLL packages
     stdlibs = [
-            # No deps
-            :Base64,
-            :CRC32c,
-            :SHA,
-            :FileWatching,
-            :Unicode,
-            :Mmap,
-            :Serialization,
-            :Libdl,
-            :Printf,
-            :Markdown,
-            :LibGit2,
-            :Logging,
-            :Sockets,
-            :Profile,
-            :Dates,
-            :DelimitedFiles,
-            :Random,
-            :UUIDs,
-            :Future,
-            :LinearAlgebra,
-            :SparseArrays,
-            :SuiteSparse,
-            :Distributed,
-            :SharedArrays,
-            :TOML,
-            :Artifacts,
-            :Pkg,
-            :Test,
-            :REPL,
-            :Statistics,
-            :MozillaCACerts_jll,
-            :LibCURL_jll,
-            :LibCURL,
-            :Downloads,
-        ]
-
+        # No dependencies
+        :ArgTools,
+        :Artifacts,
+        :Base64,
+        :CRC32c,
+        :FileWatching,
+        :Libdl,
+        :Logging,
+        :Mmap,
+        :NetworkOptions,
+        :SHA,
+        :Serialization,
+        :Sockets,
+        :Unicode,
+
+        # 1-depth packages
+        :DelimitedFiles,
+        :LinearAlgebra,
+        :Markdown,
+        :Printf,
+        :Random,
+        :Tar,
+
+        # 2-depth packages
+        :Dates,
+        :Distributed,
+        :Future,
+        :InteractiveUtils,
+        :LibGit2,
+        :Profile,
+        :SparseArrays,
+        :UUIDs,
+
+        # 3-depth packages
+        :REPL,
+        :SharedArrays,
+        :Statistics,
+        :SuiteSparse,
+        :TOML,
+        :Test,
+
+        # 4-depth packages
+        :LibCURL,
+
+        # 5-depth packages
+        :Downloads,
+
+        # 6-depth packages
+        :Pkg,
+
+        # 7-depth packages
+        :LazyArtifacts,
+    ]
     maxlen = reduce(max, textwidth.(string.(stdlibs)); init=0)
 
     tot_time_stdlib = 0.0
@@ -108,6 +125,8 @@ let
     empty!(DEPOT_PATH)
 end
 
+empty!(Base.TOML_CACHE.d)
+Base.TOML.reinit!(Base.TOML_CACHE.p, "")
 @eval Sys begin
     BINDIR = ""
     STDLIB = ""
diff --git a/base/sysinfo.jl b/base/sysinfo.jl
index 4a16d4b17bf078..f0852f32fc17df 100644
--- a/base/sysinfo.jl
+++ b/base/sysinfo.jl
@@ -35,20 +35,19 @@ export BINDIR,
 
 import ..Base: show
 
-global BINDIR = ccall(:jl_get_julia_bindir, Any, ())::String
 """
-    Sys.BINDIR
+    Sys.BINDIR::String
 
 A string containing the full path to the directory containing the `julia` executable.
 """
-:BINDIR
+global BINDIR::String = ccall(:jl_get_julia_bindir, Any, ())::String
 
 """
-    Sys.STDLIB
+    Sys.STDLIB::String
 
 A string containing the full path to the directory containing the `stdlib` packages.
 """
-STDLIB = "$BINDIR/../share/julia/stdlib/v$(VERSION.major).$(VERSION.minor)" # for bootstrap
+global STDLIB::String = "$BINDIR/../share/julia/stdlib/v$(VERSION.major).$(VERSION.minor)" # for bootstrap
 # In case STDLIB change after julia is built, the variable below can be used
 # to update cached method locations to updated ones.
 const BUILD_STDLIB_PATH = STDLIB
@@ -56,7 +55,7 @@ const BUILD_STDLIB_PATH = STDLIB
 # helper to avoid triggering precompile warnings
 
 """
-    Sys.CPU_THREADS
+    Sys.CPU_THREADS::Int
 
 The number of logical CPU cores available in the system, i.e. the number of threads
 that the CPU can run concurrently. Note that this is not necessarily the number of
@@ -65,37 +64,39 @@ CPU cores, for example, in the presence of
 
 See Hwloc.jl or CpuId.jl for extended information, including number of physical cores.
 """
-CPU_THREADS = 1 # for bootstrap, changed on startup
+global CPU_THREADS::Int = 1 # for bootstrap, changed on startup
 
 """
-    Sys.ARCH
+    Sys.ARCH::Symbol
 
 A symbol representing the architecture of the build configuration.
 """
-const ARCH = ccall(:jl_get_ARCH, Any, ())
+const ARCH = ccall(:jl_get_ARCH, Any, ())::Symbol
 
 
 """
-    Sys.KERNEL
+    Sys.KERNEL::Symbol
 
 A symbol representing the name of the operating system, as returned by `uname` of the build configuration.
 """
-const KERNEL = ccall(:jl_get_UNAME, Any, ())
+const KERNEL = ccall(:jl_get_UNAME, Any, ())::Symbol
 
 """
-    Sys.MACHINE
+    Sys.MACHINE::String
 
 A string containing the build triple.
 """
-const MACHINE = Base.MACHINE
+const MACHINE = Base.MACHINE::String
 
 """
-    Sys.WORD_SIZE
+    Sys.WORD_SIZE::Int
 
 Standard word size on the current machine, in bits.
 """
 const WORD_SIZE = Core.sizeof(Int) * 8
 
+global SC_CLK_TCK::Clong, CPU_NAME::String, JIT::String
+
 function __init__()
     env_threads = nothing
     if haskey(ENV, "JULIA_CPU_THREADS")
@@ -286,8 +287,8 @@ end
 
 Get the maximum resident set size utilized in bytes.
 See also:
-    - man page of getrusage(2) on Linux and FreeBSD.
-    - windows api `GetProcessMemoryInfo`
+    - man page of `getrusage`(2) on Linux and FreeBSD.
+    - Windows API `GetProcessMemoryInfo`.
 """
 maxrss() = ccall(:jl_maxrss, Csize_t, ())
 
@@ -499,7 +500,7 @@ function which(program_name::String)
         # If we have been given just a program name (not a relative or absolute
         # path) then we should search `PATH` for it here:
         pathsep = iswindows() ? ';' : ':'
-        path_dirs = abspath.(split(get(ENV, "PATH", ""), pathsep))
+        path_dirs = map(abspath, eachsplit(get(ENV, "PATH", ""), pathsep))
 
         # On windows we always check the current directory as well
         if iswindows()
@@ -516,7 +517,7 @@ function which(program_name::String)
             program_path = joinpath(path_dir, pname)
             # If we find something that matches our name and we can execute
             if isfile(program_path) && isexecutable(program_path)
-                return realpath(program_path)
+                return program_path
             end
         end
     end
diff --git a/base/task.jl b/base/task.jl
index 7d74e2988fc788..6b7f5747f12740 100644
--- a/base/task.jl
+++ b/base/task.jl
@@ -25,6 +25,16 @@ function showerror(io::IO, ce::CapturedException)
     showerror(io, ce.ex, ce.processed_bt, backtrace=true)
 end
 
+"""
+    capture_exception(ex, bt) -> Exception
+
+Returns an exception, possibly incorporating information from a backtrace `bt`. Defaults to returning [`CapturedException(ex, bt)`](@ref).
+
+Used in [`asyncmap`](@ref) and [`asyncmap!`](@ref) to capture exceptions thrown during
+the user-supplied function call.
+"""
+capture_exception(ex, bt) = CapturedException(ex, bt)
+
 """
     CompositeException
 
@@ -40,6 +50,7 @@ struct CompositeException <: Exception
 end
 length(c::CompositeException) = length(c.exceptions)
 push!(c::CompositeException, ex) = push!(c.exceptions, ex)
+pushfirst!(c::CompositeException, ex) = pushfirst!(c.exceptions, ex)
 isempty(c::CompositeException) = isempty(c.exceptions)
 iterate(c::CompositeException, state...) = iterate(c.exceptions, state...)
 eltype(::Type{CompositeException}) = Any
@@ -49,7 +60,7 @@ function showerror(io::IO, ex::CompositeException)
         showerror(io, ex.exceptions[1])
         remaining = length(ex) - 1
         if remaining > 0
-            print(io, string("\n\n...and ", remaining, " more exception(s).\n"))
+            print(io, "\n\n...and ", remaining, " more exception", remaining > 1 ? "s" : "", ".\n")
         end
     else
         print(io, "CompositeException()\n")
@@ -77,9 +88,14 @@ function showerror(io::IO, ex::TaskFailedException, bt = nothing; backtrace=true
 end
 
 function show_task_exception(io::IO, t::Task; indent = true)
-    stack = catch_stack(t)
+    stack = current_exceptions(t)
     b = IOBuffer()
-    show_exception_stack(IOContext(b, io), stack)
+    if isempty(stack)
+        # exception stack buffer not available; probably a serialized task
+        showerror(IOContext(b, io), t.result)
+    else
+        show_exception_stack(IOContext(b, io), stack)
+    end
     str = String(take!(b))
     if indent
         str = replace(str, "\n" => "\n    ")
@@ -131,10 +147,21 @@ const task_state_runnable = UInt8(0)
 const task_state_done     = UInt8(1)
 const task_state_failed   = UInt8(2)
 
+const _state_index = findfirst(==(:_state), fieldnames(Task))
+@eval function load_state_acquire(t)
+    # TODO: Replace this by proper atomic operations when available
+    @GC.preserve t llvmcall($("""
+        %ptr = inttoptr i$(Sys.WORD_SIZE) %0 to i8*
+        %rv = load atomic i8, i8* %ptr acquire, align 8
+        ret i8 %rv
+        """), UInt8, Tuple{Ptr{UInt8}},
+        Ptr{UInt8}(pointer_from_objref(t) + fieldoffset(Task, _state_index)))
+end
+
 @inline function getproperty(t::Task, field::Symbol)
     if field === :state
         # TODO: this field name should be deprecated in 2.0
-        st = getfield(t, :_state)
+        st = load_state_acquire(t)
         if st === task_state_runnable
             return :runnable
         elseif st === task_state_done
@@ -146,7 +173,10 @@ const task_state_failed   = UInt8(2)
         end
     elseif field === :backtrace
         # TODO: this field name should be deprecated in 2.0
-        return catch_stack(t)[end][2]
+        return current_exceptions(t)[end][2]
+    elseif field === :exception
+        # TODO: this field name should be deprecated in 2.0
+        return t._isexception ? t.result : nothing
     else
         return getfield(t, field)
     end
@@ -174,7 +204,7 @@ julia> istaskdone(b)
 true
 ```
 """
-istaskdone(t::Task) = t._state !== task_state_runnable
+istaskdone(t::Task) = load_state_acquire(t) !== task_state_runnable
 
 """
     istaskstarted(t::Task) -> Bool
@@ -218,9 +248,13 @@ true
 !!! compat "Julia 1.3"
     This function requires at least Julia 1.3.
 """
-istaskfailed(t::Task) = (t._state === task_state_failed)
+istaskfailed(t::Task) = (load_state_acquire(t) === task_state_failed)
 
 Threads.threadid(t::Task) = Int(ccall(:jl_get_task_tid, Int16, (Any,), t)+1)
+function Threads.threadpool(t::Task)
+    tpid = ccall(:jl_get_task_threadpoolid, Int8, (Any,), t)
+    return tpid == 0 ? :default : :interactive
+end
 
 task_result(t::Task) = t.result
 
@@ -287,6 +321,18 @@ function _wait2(t::Task, waiter::Task)
         if !istaskdone(t)
             push!(t.donenotify.waitq, waiter)
             unlock(t.donenotify)
+            # since _wait2 is similar to schedule, we should observe the sticky
+            # bit, even if we aren't calling `schedule` due to this early-return
+            if waiter.sticky && Threads.threadid(waiter) == 0
+                # Issue #41324
+                # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+                # the parent task. If the parent (current_task) is not sticky we must
+                # set it to be sticky.
+                # XXX: Ideally we would be able to unset this
+                current_task().sticky = true
+                tid = Threads.threadid()
+                ccall(:jl_set_task_tid, Cint, (Any, Cint), waiter, tid-1)
+            end
             return nothing
         else
             unlock(t.donenotify)
@@ -322,6 +368,29 @@ end
 
 ## lexically-scoped waiting for multiple items
 
+struct ScheduledAfterSyncException <: Exception
+    values::Vector{Any}
+end
+
+function showerror(io::IO, ex::ScheduledAfterSyncException)
+    print(io, "ScheduledAfterSyncException: ")
+    if isempty(ex.values)
+        print(io, "(no values)")
+        return
+    end
+    show(io, ex.values[1])
+    if length(ex.values) == 1
+        print(io, " is")
+    elseif length(ex.values) == 2
+        print(io, " and one more ")
+        print(io, nameof(typeof(ex.values[2])))
+        print(io, " are")
+    else
+        print(io, " and ", length(ex.values) - 1, " more objects are")
+    end
+    print(io, " registered after the end of a `@sync` block")
+end
+
 function sync_end(c::Channel{Any})
     local c_ex
     while isready(c)
@@ -346,6 +415,25 @@ function sync_end(c::Channel{Any})
         end
     end
     close(c)
+
+    # Capture all waitable objects scheduled after the end of `@sync` and
+    # include them in the exception. This way, the user can check what was
+    # scheduled by examining at the exception object.
+    local racy
+    for r in c
+        if !@isdefined(racy)
+            racy = []
+        end
+        push!(racy, r)
+    end
+    if @isdefined(racy)
+        if !@isdefined(c_ex)
+            c_ex = CompositeException()
+        end
+        # Since this is a clear programming error, show this exception first:
+        pushfirst!(c_ex, ScheduledAfterSyncException(racy))
+    end
+
     if @isdefined(c_ex)
         throw(c_ex)
     end
@@ -383,10 +471,24 @@ Values can be interpolated into `@async` via `\$`, which copies the value direct
 constructed underlying closure. This allows you to insert the _value_ of a variable,
 isolating the asynchronous code from changes to the variable's value in the current task.
 
+!!! warning
+    It is strongly encouraged to favor `Threads.@spawn` over `@async` always **even when no
+    parallelism is required** especially in publicly distributed libraries.  This is
+    because a use of `@async` disables the migration of the *parent* task across worker
+    threads in the current implementation of Julia.  Thus, seemingly innocent use of
+    `@async` in a library function can have a large impact on the performance of very
+    different parts of user applications.
+
 !!! compat "Julia 1.4"
     Interpolating values via `\$` is available as of Julia 1.4.
 """
 macro async(expr)
+    do_async_macro(expr)
+end
+
+# generate the code for @async, possibly wrapping the task in something before
+# pushing it to the wait queue.
+function do_async_macro(expr; wrap=identity)
     letargs = Base._lift_one_interp!(expr)
 
     thunk = esc(:(()->($expr)))
@@ -395,7 +497,7 @@ macro async(expr)
         let $(letargs...)
             local task = Task($thunk)
             if $(Expr(:islocal, var))
-                put!($var, task)
+                put!($var, $(wrap(:task)))
             end
             schedule(task)
             task
@@ -403,6 +505,73 @@ macro async(expr)
     end
 end
 
+# task wrapper that doesn't create exceptions wrapped in TaskFailedException
+struct UnwrapTaskFailedException
+    task::Task
+end
+
+# common code for wait&fetch for UnwrapTaskFailedException
+function unwrap_task_failed(f::Function, t::UnwrapTaskFailedException)
+    try
+        f(t.task)
+    catch ex
+        if ex isa TaskFailedException
+            throw(ex.task.exception)
+        else
+            rethrow()
+        end
+    end
+end
+
+# the unwrapping for above task wrapper (gets triggered in sync_end())
+wait(t::UnwrapTaskFailedException) = unwrap_task_failed(wait, t)
+
+# same for fetching the tasks, for convenience
+fetch(t::UnwrapTaskFailedException) = unwrap_task_failed(fetch, t)
+
+# macro for running async code that doesn't throw wrapped exceptions
+macro async_unwrap(expr)
+    do_async_macro(expr, wrap=task->:(Base.UnwrapTaskFailedException($task)))
+end
+
+"""
+    errormonitor(t::Task)
+
+Print an error log to `stderr` if task `t` fails.
+"""
+function errormonitor(t::Task)
+    t2 = Task() do
+        if istaskfailed(t)
+            local errs = stderr
+            try # try to display the failure atomically
+                errio = IOContext(PipeBuffer(), errs::IO)
+                emphasize(errio, "Unhandled Task ")
+                display_error(errio, scrub_repl_backtrace(current_exceptions(t)))
+                write(errs, errio)
+            catch
+                try # try to display the secondary error atomically
+                    errio = IOContext(PipeBuffer(), errs::IO)
+                    print(errio, "\nSYSTEM: caught exception while trying to print a failed Task notice: ")
+                    display_error(errio, scrub_repl_backtrace(current_exceptions()))
+                    write(errs, errio)
+                    flush(errs)
+                    # and then the actual error, as best we can
+                    Core.print(Core.stderr, "while handling: ")
+                    Core.println(Core.stderr, current_exceptions(t)[end][1])
+                catch e
+                    # give up
+                    Core.print(Core.stderr, "\nSYSTEM: caught exception of type ", typeof(e).name.name,
+                            " while trying to print a failed Task notice; giving up\n")
+                end
+            end
+        end
+        nothing
+    end
+    t2.sticky = false
+    _wait2(t, t2)
+    return t
+end
+
 # Capture interpolated variables in $() and move them to let-block
 function _lift_one_interp!(e)
     letargs = Any[]  # store the new gensymed arguments
@@ -491,14 +660,14 @@ end
 
 ## scheduler and work queue
 
-struct InvasiveLinkedListSynchronized{T}
-    queue::InvasiveLinkedList{T}
+struct IntrusiveLinkedListSynchronized{T}
+    queue::IntrusiveLinkedList{T}
     lock::Threads.SpinLock
-    InvasiveLinkedListSynchronized{T}() where {T} = new(InvasiveLinkedList{T}(), Threads.SpinLock())
+    IntrusiveLinkedListSynchronized{T}() where {T} = new(IntrusiveLinkedList{T}(), Threads.SpinLock())
 end
-isempty(W::InvasiveLinkedListSynchronized) = isempty(W.queue)
-length(W::InvasiveLinkedListSynchronized) = length(W.queue)
-function push!(W::InvasiveLinkedListSynchronized{T}, t::T) where T
+isempty(W::IntrusiveLinkedListSynchronized) = isempty(W.queue)
+length(W::IntrusiveLinkedListSynchronized) = length(W.queue)
+function push!(W::IntrusiveLinkedListSynchronized{T}, t::T) where T
     lock(W.lock)
     try
         push!(W.queue, t)
@@ -507,7 +676,7 @@ function push!(W::InvasiveLinkedListSynchronized{T}, t::T) where T
     end
     return W
 end
-function pushfirst!(W::InvasiveLinkedListSynchronized{T}, t::T) where T
+function pushfirst!(W::IntrusiveLinkedListSynchronized{T}, t::T) where T
     lock(W.lock)
     try
         pushfirst!(W.queue, t)
@@ -516,7 +685,7 @@ function pushfirst!(W::InvasiveLinkedListSynchronized{T}, t::T) where T
     end
     return W
 end
-function pop!(W::InvasiveLinkedListSynchronized)
+function pop!(W::IntrusiveLinkedListSynchronized)
     lock(W.lock)
     try
         return pop!(W.queue)
@@ -524,7 +693,7 @@ function pop!(W::InvasiveLinkedListSynchronized)
         unlock(W.lock)
     end
 end
-function popfirst!(W::InvasiveLinkedListSynchronized)
+function popfirst!(W::IntrusiveLinkedListSynchronized)
     lock(W.lock)
     try
         return popfirst!(W.queue)
@@ -532,7 +701,7 @@ function popfirst!(W::InvasiveLinkedListSynchronized)
         unlock(W.lock)
     end
 end
-function list_deletefirst!(W::InvasiveLinkedListSynchronized{T}, t::T) where T
+function list_deletefirst!(W::IntrusiveLinkedListSynchronized{T}, t::T) where T
     lock(W.lock)
     try
         list_deletefirst!(W.queue, t)
@@ -542,41 +711,50 @@ function list_deletefirst!(W::InvasiveLinkedListSynchronized{T}, t::T) where T
     return W
 end
 
-const StickyWorkqueue = InvasiveLinkedListSynchronized{Task}
-global const Workqueues = [StickyWorkqueue()]
-global const Workqueue = Workqueues[1] # default work queue is thread 1
-function __preinit_threads__()
-    if length(Workqueues) < Threads.nthreads()
-        resize!(Workqueues, Threads.nthreads())
-        for i = 2:length(Workqueues)
-            Workqueues[i] = StickyWorkqueue()
+const StickyWorkqueue = IntrusiveLinkedListSynchronized{Task}
+global Workqueues::Vector{StickyWorkqueue} = [StickyWorkqueue()]
+const Workqueues_lock = Threads.SpinLock()
+const Workqueue = Workqueues[1] # default work queue is thread 1 // TODO: deprecate this variable
+
+function workqueue_for(tid::Int)
+    qs = Workqueues
+    if length(qs) >= tid && isassigned(qs, tid)
+        return @inbounds qs[tid]
+    end
+    # slow path to allocate it
+    l = Workqueues_lock
+    @lock l begin
+        qs = Workqueues
+        if length(qs) < tid
+            nt = Threads.nthreads()
+            @assert tid <= nt
+            global Workqueues = qs = copyto!(typeof(qs)(undef, length(qs) + nt - 1), qs)
         end
+        if !isassigned(qs, tid)
+            @inbounds qs[tid] = StickyWorkqueue()
+        end
+        return @inbounds qs[tid]
     end
-    nothing
 end
 
 function enq_work(t::Task)
     (t._state === task_state_runnable && t.queue === nothing) || error("schedule: Task not runnable")
-    tid = Threads.threadid(t)
-    # Note there are three reasons a Task might be put into a sticky queue
-    # even if t.sticky == false:
-    # 1. The Task's stack is currently being used by the scheduler for a certain thread.
-    # 2. There is only 1 thread.
-    # 3. The multiq is full (can be fixed by making it growable).
-    if t.sticky || tid != 0 || Threads.nthreads() == 1
+    if t.sticky || Threads.nthreads() == 1
+        tid = Threads.threadid(t)
         if tid == 0
+            # Issue #41324
+            # t.sticky && tid == 0 is a task that needs to be co-scheduled with
+            # the parent task. If the parent (current_task) is not sticky we must
+            # set it to be sticky.
+            # XXX: Ideally we would be able to unset this
+            current_task().sticky = true
             tid = Threads.threadid()
-            ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1)
+            ccall(:jl_set_task_tid, Cint, (Any, Cint), t, tid-1)
         end
-        push!(Workqueues[tid], t)
+        push!(workqueue_for(tid), t)
     else
+        Partr.multiq_insert(t, t.priority)
         tid = 0
-        if ccall(:jl_enqueue_task, Cint, (Any,), t) != 0
-            # if multiq is full, give to a random thread (TODO fix)
-            tid = mod(time_ns() % Int, Threads.nthreads()) + 1
-            ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, tid-1)
-            push!(Workqueues[tid], t)
-        end
     end
     ccall(:jl_wakeup_thread, Cvoid, (Int16,), (tid - 1) % Int16)
     return t
@@ -594,6 +772,10 @@ If a second argument `val` is provided, it will be passed to the task (via the r
 [`yieldto`](@ref)) when it runs again. If `error` is `true`, the value is raised as an exception in
 the woken task.
 
+!!! warning
+    It is incorrect to use `schedule` on an arbitrary `Task` that has already been started.
+    See [the API reference](@ref low-level-schedule-wait) for more information.
+
 # Examples
 ```jldoctest
 julia> a5() = sum(i for i in 1:1000);
@@ -619,7 +801,8 @@ function schedule(t::Task, @nospecialize(arg); error=false)
     t._state === task_state_runnable || Base.error("schedule: Task not runnable")
     if error
         t.queue === nothing || Base.list_deletefirst!(t.queue, t)
-        setfield!(t, :exception, arg)
+        setfield!(t, :result, arg)
+        setfield!(t, :_isexception, true)
     else
         t.queue === nothing || Base.error("schedule: Task not runnable")
         setfield!(t, :result, arg)
@@ -655,6 +838,7 @@ A fast, unfair-scheduling version of `schedule(t, arg); yield()` which
 immediately yields to `t` before calling the scheduler.
 """
 function yield(t::Task, @nospecialize(x=nothing))
+    (t._state === task_state_runnable && t.queue === nothing) || error("yield: Task not runnable")
     t.result = x
     enq_work(current_task())
     set_next_task(t)
@@ -670,6 +854,13 @@ call to `yieldto`. This is a low-level call that only switches tasks, not consid
 or scheduling in any way. Its use is discouraged.
 """
 function yieldto(t::Task, @nospecialize(x=nothing))
+    # TODO: these are legacy behaviors; these should perhaps be a scheduler
+    # state error instead.
+    if t._state === task_state_done
+        return x
+    elseif t._state === task_state_failed
+        throw(t.result)
+    end
     t.result = x
     set_next_task(t)
     return try_yieldto(identity)
@@ -683,9 +874,10 @@ function try_yieldto(undo)
         rethrow()
     end
     ct = current_task()
-    exc = ct.exception
-    if exc !== nothing
-        ct.exception = nothing
+    if ct._isexception
+        exc = ct.result
+        ct.result = nothing
+        ct._isexception = false
         throw(exc)
     end
     result = ct.result
@@ -695,18 +887,20 @@ end
 
 # yield to a task, throwing an exception in it
 function throwto(t::Task, @nospecialize exc)
-    t.exception = exc
-    return yieldto(t)
+    t.result = exc
+    t._isexception = true
+    set_next_task(t)
+    return try_yieldto(identity)
 end
 
 function ensure_rescheduled(othertask::Task)
     ct = current_task()
-    W = Workqueues[Threads.threadid()]
+    W = workqueue_for(Threads.threadid())
     if ct !== othertask && othertask._state === task_state_runnable
         # we failed to yield to othertask
         # return it to the head of a queue to be retried later
         tid = Threads.threadid(othertask)
-        Wother = tid == 0 ? W : Workqueues[tid]
+        Wother = tid == 0 ? W : workqueue_for(tid)
         pushfirst!(Wother, othertask)
     end
     # if the current task was queued,
@@ -717,31 +911,36 @@ function ensure_rescheduled(othertask::Task)
 end
 
 function trypoptask(W::StickyWorkqueue)
-    isempty(W) && return
-    t = popfirst!(W)
-    if t._state !== task_state_runnable
-        # assume this somehow got queued twice,
-        # probably broken now, but try discarding this switch and keep going
-        # can't throw here, because it's probably not the fault of the caller to wait
-        # and don't want to use print() here, because that may try to incur a task switch
-        ccall(:jl_safe_printf, Cvoid, (Ptr{UInt8}, Int32...),
-            "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state != :runnable\n")
-        return
+    while !isempty(W)
+        t = popfirst!(W)
+        if t._state !== task_state_runnable
+            # assume this somehow got queued twice,
+            # probably broken now, but try discarding this switch and keep going
+            # can't throw here, because it's probably not the fault of the caller to wait
+            # and don't want to use print() here, because that may try to incur a task switch
+            ccall(:jl_safe_printf, Cvoid, (Ptr{UInt8}, Int32...),
+                "\nWARNING: Workqueue inconsistency detected: popfirst!(Workqueue).state != :runnable\n")
+            continue
+        end
+        return t
     end
-    return t
+    return Partr.multiq_deletemin()
 end
 
+checktaskempty = Partr.multiq_check_empty
+
 @noinline function poptask(W::StickyWorkqueue)
     task = trypoptask(W)
     if !(task isa Task)
-        task = ccall(:jl_task_get_next, Ref{Task}, (Any, Any), trypoptask, W)
+        task = ccall(:jl_task_get_next, Ref{Task}, (Any, Any, Any), trypoptask, W, checktaskempty)
     end
     set_next_task(task)
     nothing
 end
 
 function wait()
-    W = Workqueues[Threads.threadid()]
+    GC.safepoint()
+    W = workqueue_for(Threads.threadid())
     poptask(W)
     result = try_yieldto(ensure_rescheduled)
     process_events()
diff --git a/base/threadcall.jl b/base/threadcall.jl
index 2267e4ea2228c3..45965fdbc6c651 100644
--- a/base/threadcall.jl
+++ b/base/threadcall.jl
@@ -9,7 +9,7 @@ const threadcall_restrictor = Semaphore(max_ccall_threads)
 
 The `@threadcall` macro is called in the same way as [`ccall`](@ref) but does the work
 in a different thread. This is useful when you want to call a blocking C
-function without causing the main `julia` thread to become blocked. Concurrency
+function without causing the current `julia` thread to become blocked. Concurrency
 is limited by size of the libuv thread pool, which defaults to 4 threads but
 can be increased by setting the `UV_THREADPOOL_SIZE` environment variable and
 restarting the `julia` process.
@@ -30,7 +30,7 @@ macro threadcall(f, rettype, argtypes, argvals...)
     argvals = map(esc, argvals)
 
     # construct non-allocating wrapper to call C function
-    wrapper = :(function (args_ptr::Ptr{Cvoid}, retval_ptr::Ptr{Cvoid})
+    wrapper = :(function (fptr::Ptr{Cvoid}, args_ptr::Ptr{Cvoid}, retval_ptr::Ptr{Cvoid})
         p = args_ptr
         # the rest of the body is created below
     end)
@@ -42,18 +42,19 @@ macro threadcall(f, rettype, argtypes, argvals...)
         push!(body, :(p += Core.sizeof($T)))
         push!(args, arg)
     end
-    push!(body, :(ret = ccall($f, $rettype, ($(argtypes...),), $(args...))))
+    push!(body, :(ret = ccall(fptr, $rettype, ($(argtypes...),), $(args...))))
     push!(body, :(unsafe_store!(convert(Ptr{$rettype}, retval_ptr), ret)))
     push!(body, :(return Int(Core.sizeof($rettype))))
 
     # return code to generate wrapper function and send work request thread queue
     wrapper = Expr(Symbol("hygienic-scope"), wrapper, @__MODULE__)
-    return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}))
-        do_threadcall(fun_ptr, $rettype, Any[$(argtypes...)], Any[$(argvals...)])
+    return :(let fun_ptr = @cfunction($wrapper, Int, (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{Cvoid}))
+        # use cglobal to look up the function on the calling thread
+        do_threadcall(fun_ptr, cglobal($f), $rettype, Any[$(argtypes...)], Any[$(argvals...)])
     end)
 end
 
-function do_threadcall(fun_ptr::Ptr{Cvoid}, rettype::Type, argtypes::Vector, argvals::Vector)
+function do_threadcall(fun_ptr::Ptr{Cvoid}, cfptr::Ptr{Cvoid}, rettype::Type, argtypes::Vector, argvals::Vector)
     # generate function pointer
     c_notify_fun = @cfunction(
         function notify_fun(idx)
@@ -86,8 +87,8 @@ function do_threadcall(fun_ptr::Ptr{Cvoid}, rettype::Type, argtypes::Vector, arg
     GC.@preserve args_arr ret_arr roots begin
         # queue up the work to be done
         ccall(:jl_queue_work, Cvoid,
-            (Ptr{Cvoid}, Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid}, Cint),
-            fun_ptr, args_arr, ret_arr, c_notify_fun, idx)
+            (Ptr{Cvoid}, Ptr{Cvoid}, Ptr{UInt8}, Ptr{UInt8}, Ptr{Cvoid}, Cint),
+            fun_ptr, cfptr, args_arr, ret_arr, c_notify_fun, idx)
 
         # wait for a result & return it
         wait(thread_notifiers[idx])
diff --git a/base/threadingconstructs.jl b/base/threadingconstructs.jl
index 27096e1ba8be66..2f0d40f3d980e8 100644
--- a/base/threadingconstructs.jl
+++ b/base/threadingconstructs.jl
@@ -1,31 +1,71 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-export threadid, nthreads, @threads
+export threadid, nthreads, @threads, @spawn,
+       threadpool, nthreadpools
 
 """
-    Threads.threadid()
+    Threads.threadid() -> Int
 
-Get the ID number of the current thread of execution. The master thread has ID `1`.
+Get the ID number of the current thread of execution. The master thread has
+ID `1`.
 """
 threadid() = Int(ccall(:jl_threadid, Int16, ())+1)
 
-# Inclusive upper bound on threadid()
 """
-    Threads.nthreads()
+    Threads.nthreads([:default|:interactive]) -> Int
 
-Get the number of threads available to the Julia process. This is the inclusive upper bound
-on [`threadid()`](@ref).
+Get the number of threads (across all thread pools or within the specified
+thread pool) available to Julia. The number of threads across all thread
+pools is the inclusive upper bound on [`threadid()`](@ref).
+
+See also: `BLAS.get_num_threads` and `BLAS.set_num_threads` in the
+[`LinearAlgebra`](@ref man-linalg) standard library, and `nprocs()` in the
+[`Distributed`](@ref man-distributed) standard library.
 """
+function nthreads end
+
 nthreads() = Int(unsafe_load(cglobal(:jl_n_threads, Cint)))
+function nthreads(pool::Symbol)
+    if pool == :default
+        tpid = Int8(0)
+    elseif pool == :interactive
+        tpid = Int8(1)
+    else
+        error("invalid threadpool specified")
+    end
+    return _nthreads_in_pool(tpid)
+end
+function _nthreads_in_pool(tpid::Int8)
+    p = unsafe_load(cglobal(:jl_n_threads_per_pool, Ptr{Cint}))
+    return Int(unsafe_load(p, tpid + 1))
+end
+
+"""
+    Threads.threadpool(tid = threadid()) -> Symbol
+
+Returns the specified thread's threadpool; either `:default` or `:interactive`.
+"""
+function threadpool(tid = threadid())
+    tpid = ccall(:jl_threadpoolid, Int8, (Int16,), tid-1)
+    return tpid == 0 ? :default : :interactive
+end
 
-function threading_run(func)
+"""
+    Threads.nthreadpools() -> Int
+
+Returns the number of threadpools currently configured.
+"""
+nthreadpools() = Int(unsafe_load(cglobal(:jl_n_threadpools, Cint)))
+
+
+function threading_run(fun, static)
     ccall(:jl_enter_threaded_region, Cvoid, ())
     n = nthreads()
     tasks = Vector{Task}(undef, n)
     for i = 1:n
-        t = Task(func)
-        t.sticky = true
-        ccall(:jl_set_task_tid, Cvoid, (Any, Cint), t, i-1)
+        t = Task(() -> fun(i)) # pass in tid
+        t.sticky = static
+        static && ccall(:jl_set_task_tid, Cint, (Any, Cint), t, i-1)
         tasks[i] = t
         schedule(t)
     end
@@ -44,7 +84,7 @@ function _threadsfor(iter, lbody, schedule)
     quote
         local threadsfor_fun
         let range = $(esc(range))
-        function threadsfor_fun(onethread=false)
+        function threadsfor_fun(tid = 1; onethread = false)
             r = range # Load into local variable
             lenr = length(r)
             # divide loop iterations among threads
@@ -52,7 +92,6 @@ function _threadsfor(iter, lbody, schedule)
                 tid = 1
                 len, rem = lenr, 0
             else
-                tid = threadid()
                 len, rem = divrem(lenr, nthreads())
             end
             # not enough iterations for all the threads?
@@ -82,15 +121,12 @@ function _threadsfor(iter, lbody, schedule)
             end
         end
         end
-        if threadid() != 1 || ccall(:jl_in_threaded_region, Cint, ()) != 0
-            $(if schedule === :static
-              :(error("`@threads :static` can only be used from thread 1 and not nested"))
-              else
-              # only use threads when called from thread 1, outside @threads
-              :(Base.invokelatest(threadsfor_fun, true))
-              end)
-        else
-            threading_run(threadsfor_fun)
+        if $(schedule === :dynamic || schedule === :default)
+            threading_run(threadsfor_fun, false)
+        elseif ccall(:jl_in_threaded_region, Cint, ()) != 0 # :static
+            error("`@threads :static` cannot be used concurrently or nested")
+        else # :static
+            threading_run(threadsfor_fun, true)
         end
         nothing
     end
@@ -99,21 +135,109 @@ end
 """
     Threads.@threads [schedule] for ... end
 
-A macro to parallelize a `for` loop to run with multiple threads. Splits the iteration
-space among multiple tasks and runs those tasks on threads according to a scheduling
-policy.
-A barrier is placed at the end of the loop which waits for all tasks to finish
-execution.
+A macro to execute a `for` loop in parallel. The iteration space is distributed to
+coarse-grained tasks. This policy can be specified by the `schedule` argument. The
+execution of the loop waits for the evaluation of all iterations.
+
+See also: [`@spawn`](@ref Threads.@spawn) and
+`pmap` in [`Distributed`](@ref man-distributed).
+
+# Extended help
+
+## Semantics
+
+Unless stronger guarantees are specified by the scheduling option, the loop executed by
+`@threads` macro have the following semantics.
+
+The `@threads` macro executes the loop body in an unspecified order and potentially
+concurrently. It does not specify the exact assignments of the tasks and the worker threads.
+The assignments can be different for each execution. The loop body code (including any code
+transitively called from it) must not make any assumptions about the distribution of
+iterations to tasks or the worker thread in which they are executed. The loop body for each
+iteration must be able to make forward progress independent of other iterations and be free
+from data races. As such, invalid synchronizations across iterations may deadlock while
+unsynchronized memory accesses may result in undefined behavior.
 
-The `schedule` argument can be used to request a particular scheduling policy.
-The only currently supported value is `:static`, which creates one task per thread
-and divides the iterations equally among them. Specifying `:static` is an error
-if used from inside another `@threads` loop or from a thread other than 1.
+For example, the above conditions imply that:
 
-The default schedule (used when no `schedule` argument is present) is subject to change.
+- The lock taken in an iteration *must* be released within the same iteration.
+- Communicating between iterations using blocking primitives like `Channel`s is incorrect.
+- Write only to locations not shared across iterations (unless a lock or atomic operation is
+  used).
+- The value of [`threadid()`](@ref Threads.threadid) may change even within a single
+  iteration.
+
+## Schedulers
+
+Without the scheduler argument, the exact scheduling is unspecified and varies across Julia
+releases. Currently, `:dynamic` is used when the scheduler is not specified.
 
 !!! compat "Julia 1.5"
     The `schedule` argument is available as of Julia 1.5.
+
+### `:dynamic` (default)
+
+`:dynamic` scheduler executes iterations dynamically to available worker threads. Current
+implementation assumes that the workload for each iteration is uniform. However, this
+assumption may be removed in the future.
+
+This scheduling option is merely a hint to the underlying execution mechanism. However, a
+few properties can be expected. The number of `Task`s used by `:dynamic` scheduler is
+bounded by a small constant multiple of the number of available worker threads
+([`nthreads()`](@ref Threads.nthreads)). Each task processes contiguous regions of the
+iteration space. Thus, `@threads :dynamic for x in xs; f(x); end` is typically more
+efficient than `@sync for x in xs; @spawn f(x); end` if `length(xs)` is significantly
+larger than the number of the worker threads and the run-time of `f(x)` is relatively
+smaller than the cost of spawning and synchronizing a task (typically less than 10
+microseconds).
+
+!!! compat "Julia 1.8"
+    The `:dynamic` option for the `schedule` argument is available and the default as of Julia 1.8.
+
+### `:static`
+
+`:static` scheduler creates one task per thread and divides the iterations equally among
+them, assigning each task specifically to each thread. In particular, the value of
+[`threadid()`](@ref Threads.threadid) is guranteed to be constant within one iteration.
+Specifying `:static` is an error if used from inside another `@threads` loop or from a
+thread other than 1.
+
+!!! note
+    `:static` scheduling exists for supporting transition of code written before Julia 1.3.
+    In newly written library functions, `:static` scheduling is discouraged because the
+    functions using this option cannot be called from arbitrary worker threads.
+
+## Example
+
+To illustrate of the different scheduling strategies, consider the following function
+`busywait` containing a non-yielding timed loop that runs for a given number of seconds.
+
+```julia-repl
+julia> function busywait(seconds)
+            tstart = time_ns()
+            while (time_ns() - tstart) / 1e9 < seconds
+            end
+        end
+
+julia> @time begin
+            Threads.@spawn busywait(5)
+            Threads.@threads :static for i in 1:Threads.nthreads()
+                busywait(1)
+            end
+        end
+6.003001 seconds (16.33 k allocations: 899.255 KiB, 0.25% compilation time)
+
+julia> @time begin
+            Threads.@spawn busywait(5)
+            Threads.@threads :dynamic for i in 1:Threads.nthreads()
+                busywait(1)
+            end
+        end
+2.012056 seconds (16.05 k allocations: 883.919 KiB, 0.66% compilation time)
+```
+
+The `:dynamic` example takes 2 seconds since one of the non-occupied threads is able
+to run two of the 1-second iterations to complete the for loop.
 """
 macro threads(args...)
     na = length(args)
@@ -125,7 +249,7 @@ macro threads(args...)
             # for now only allow quoted symbols
             sched = nothing
         end
-        if sched !== :static
+        if sched !== :static && sched !== :dynamic
             throw(ArgumentError("unsupported schedule argument in @threads"))
         end
     elseif na == 1
@@ -144,34 +268,63 @@ macro threads(args...)
 end
 
 """
-    Threads.@spawn expr
+    Threads.@spawn [:default|:interactive] expr
 
-Create and run a [`Task`](@ref) on any available thread. To wait for the task to
-finish, call [`wait`](@ref) on the result of this macro, or call [`fetch`](@ref)
-to wait and then obtain its return value.
+Create a [`Task`](@ref) and [`schedule`](@ref) it to run on any available
+thread in the specified threadpool (`:default` if unspecified). The task is
+allocated to a thread once one becomes available. To wait for the task to
+finish, call [`wait`](@ref) on the result of this macro, or call
+[`fetch`](@ref) to wait and then obtain its return value.
 
-Values can be interpolated into `@spawn` via `\$`, which copies the value directly into the
-constructed underlying closure. This allows you to insert the _value_ of a variable,
-isolating the asynchronous code from changes to the variable's value in the current task.
+Values can be interpolated into `@spawn` via `\$`, which copies the value
+directly into the constructed underlying closure. This allows you to insert
+the _value_ of a variable, isolating the asynchronous code from changes to
+the variable's value in the current task.
 
 !!! note
-    See the manual chapter on threading for important caveats.
+    See the manual chapter on [multi-threading](@ref man-multithreading)
+    for important caveats. See also the chapter on [threadpools](@ref man-threadpools).
 
 !!! compat "Julia 1.3"
     This macro is available as of Julia 1.3.
 
 !!! compat "Julia 1.4"
     Interpolating values via `\$` is available as of Julia 1.4.
+
+!!! compat "Julia 1.9"
+    A threadpool may be specified as of Julia 1.9.
 """
-macro spawn(expr)
-    letargs = Base._lift_one_interp!(expr)
+macro spawn(args...)
+    tpid = Int8(0)
+    na = length(args)
+    if na == 2
+        ttype, ex = args
+        if ttype isa QuoteNode
+            ttype = ttype.value
+        elseif ttype isa Symbol
+            # TODO: allow unquoted symbols
+            ttype = nothing
+        end
+        if ttype === :interactive
+            tpid = Int8(1)
+        elseif ttype !== :default
+            throw(ArgumentError("unsupported threadpool in @spawn: $ttype"))
+        end
+    elseif na == 1
+        ex = args[1]
+    else
+        throw(ArgumentError("wrong number of arguments in @spawn"))
+    end
+
+    letargs = Base._lift_one_interp!(ex)
 
-    thunk = esc(:(()->($expr)))
+    thunk = esc(:(()->($ex)))
     var = esc(Base.sync_varname)
     quote
         let $(letargs...)
             local task = Task($thunk)
             task.sticky = false
+            ccall(:jl_set_task_threadpoolid, Cint, (Any, Int8), task, $tpid)
             if $(Expr(:islocal, var))
                 put!($var, task)
             end
diff --git a/base/threads_overloads.jl b/base/threads_overloads.jl
index 3e6ad06760747e..a0d4bbeda22888 100644
--- a/base/threads_overloads.jl
+++ b/base/threads_overloads.jl
@@ -35,7 +35,7 @@ function Threads.foreach(f, channel::Channel;
                 # do `stop[] && break` after `f(item)` to avoid losing `item`.
                 # this isn't super comprehensive since a task could still get
                 # stuck on `take!` at `for item in channel`. We should think
-                # about a more robust mechanism to avoid dropping items. See also:
+                # about a more robust mechanism to avoid dropping items. See also
                 # https://github.com/JuliaLang/julia/pull/34543#discussion_r422695217
                 stop[] && break
             end
diff --git a/base/timing.jl b/base/timing.jl
index ac19e9a2e2ba8b..539e08e885a167 100644
--- a/base/timing.jl
+++ b/base/timing.jl
@@ -16,6 +16,8 @@ struct GC_Num
     collect         ::Csize_t # GC internal
     pause           ::Cint
     full_sweep      ::Cint
+    max_pause       ::Int64
+    max_memory      ::Int64
 end
 
 gc_num() = ccall(:jl_gc_num, GC_Num, ())
@@ -40,7 +42,7 @@ function GC_Diff(new::GC_Num, old::GC_Num)
     # logic from `src/gc.c:jl_gc_total_bytes`
     old_allocd = gc_total_bytes(old)
     new_allocd = gc_total_bytes(new)
-    return GC_Diff(new_allocd - old_allocd,
+    return GC_Diff(new_allocd       - old_allocd,
                    new.malloc       - old.malloc,
                    new.realloc      - old.realloc,
                    new.poolalloc    - old.poolalloc,
@@ -55,6 +57,21 @@ function gc_alloc_count(diff::GC_Diff)
     diff.malloc + diff.realloc + diff.poolalloc + diff.bigalloc
 end
 
+# cumulative total time spent on compilation and recompilation, in nanoseconds
+function cumulative_compile_time_ns()
+    comp = ccall(:jl_cumulative_compile_time_ns, UInt64, ())
+    recomp = ccall(:jl_cumulative_recompile_time_ns, UInt64, ())
+    return comp, recomp
+end
+
+function cumulative_compile_timing(b::Bool)
+    if b
+        ccall(:jl_cumulative_compile_timing_enable, Cvoid, ())
+    else
+        ccall(:jl_cumulative_compile_timing_disable, Cvoid, ())
+    end
+    return
+end
 
 # total time spend in garbage collection, in nanoseconds
 gc_time_ns() = ccall(:jl_gc_total_hrtime, UInt64, ())
@@ -72,6 +89,16 @@ function gc_live_bytes()
     Int(ccall(:jl_gc_live_bytes, Int64, ())) + num.allocd + num.deferred_alloc
 end
 
+"""
+    Base.jit_total_bytes()
+
+Return the total amount (in bytes) allocated by the just-in-time compiler
+for e.g. native code and data.
+"""
+function jit_total_bytes()
+    return Int(ccall(:jl_jit_total_bytes, Csize_t, ()))
+end
+
 # print elapsed time, return expression value
 const _mem_units = ["byte", "KiB", "MiB", "GiB", "TiB", "PiB"]
 const _cnt_units = ["", " k", " M", " G", " T", " P"]
@@ -85,14 +112,13 @@ function prettyprint_getunits(value, numunits, factor)
     return number, unit
 end
 
-function padded_nonzero_print(value, str)
-    if value != 0
-        blanks = "                "[1:(18 - length(str))]
+function padded_nonzero_print(value, str, always_print = true)
+    if always_print || value != 0
+        blanks = "                "[1:(19 - length(str))]
         println(str, ":", blanks, value)
     end
 end
 
-
 function format_bytes(bytes) # also used by InteractiveUtils
     bytes, mb = prettyprint_getunits(bytes, length(_mem_units), Int64(1024))
     if mb == 1
@@ -102,51 +128,90 @@ function format_bytes(bytes) # also used by InteractiveUtils
     end
 end
 
-function time_print(elapsedtime, bytes=0, gctime=0, allocs=0)
+function time_print(elapsedtime, bytes=0, gctime=0, allocs=0, compile_time=0, recompile_time=0, newline=false, _lpad=true)
     timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6)
-    length(timestr) < 10 && print(" "^(10 - length(timestr)))
-    print(timestr, " seconds")
-    if bytes != 0 || allocs != 0
-        allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
-        if ma == 1
-            print(" (", Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
-        else
-            print(" (", Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
+    str = sprint() do io
+        _lpad && print(io, length(timestr) < 10 ? (" "^(10 - length(timestr))) : "")
+        print(io, timestr, " seconds")
+        parens = bytes != 0 || allocs != 0 || gctime > 0 || compile_time > 0
+        parens && print(io, " (")
+        if bytes != 0 || allocs != 0
+            allocs, ma = prettyprint_getunits(allocs, length(_cnt_units), Int64(1000))
+            if ma == 1
+                print(io, Int(allocs), _cnt_units[ma], allocs==1 ? " allocation: " : " allocations: ")
+            else
+                print(io, Ryu.writefixed(Float64(allocs), 2), _cnt_units[ma], " allocations: ")
+            end
+            print(io, format_bytes(bytes))
         end
-        print(format_bytes(bytes))
-    end
-    if gctime > 0
-        print(", ", Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
-    end
-    if bytes != 0 || allocs != 0
-        print(")")
+        if gctime > 0
+            if bytes != 0 || allocs != 0
+                print(io, ", ")
+            end
+            print(io, Ryu.writefixed(Float64(100*gctime/elapsedtime), 2), "% gc time")
+        end
+        if compile_time > 0
+            if bytes != 0 || allocs != 0 || gctime > 0
+                print(io, ", ")
+            end
+            print(io, Ryu.writefixed(Float64(100*compile_time/elapsedtime), 2), "% compilation time")
+        end
+        if recompile_time > 0
+            perc = Float64(100 * recompile_time / compile_time)
+            # use "<1" to avoid the confusing UX of reporting 0% when it's >0%
+            print(io, ": ", perc < 1 ? "<1" : Ryu.writefixed(perc, 0), "% of which was recompilation")
+        end
+        parens && print(io, ")")
     end
+    newline ? println(str) : print(str)
     nothing
 end
 
-function timev_print(elapsedtime, diff::GC_Diff)
+function timev_print(elapsedtime, diff::GC_Diff, compile_times, _lpad)
     allocs = gc_alloc_count(diff)
-    time_print(elapsedtime, diff.allocd, diff.total_time, allocs)
-    print("\nelapsed time (ns): $elapsedtime\n")
+    compile_time = first(compile_times)
+    recompile_time = last(compile_times)
+    time_print(elapsedtime, diff.allocd, diff.total_time, allocs, compile_time, recompile_time, true, _lpad)
+    padded_nonzero_print(elapsedtime,       "elapsed time (ns)")
     padded_nonzero_print(diff.total_time,   "gc time (ns)")
     padded_nonzero_print(diff.allocd,       "bytes allocated")
     padded_nonzero_print(diff.poolalloc,    "pool allocs")
     padded_nonzero_print(diff.bigalloc,     "non-pool GC allocs")
-    padded_nonzero_print(diff.malloc,       "malloc() calls")
-    padded_nonzero_print(diff.realloc,      "realloc() calls")
-    padded_nonzero_print(diff.freecall,     "free() calls")
-    padded_nonzero_print(diff.pause,        "GC pauses")
+    padded_nonzero_print(diff.malloc,       "malloc() calls", false)
+    padded_nonzero_print(diff.realloc,      "realloc() calls", false)
+    # always print number of frees if there are mallocs
+    padded_nonzero_print(diff.freecall,     "free() calls", diff.malloc > 0)
+    minor_collects = diff.pause - diff.full_sweep
+    padded_nonzero_print(minor_collects,    "minor collections")
     padded_nonzero_print(diff.full_sweep,   "full collections")
 end
 
+# Like a try-finally block, except without introducing the try scope
+# NOTE: This is deprecated and should not be used from user logic. A proper solution to
+# this problem will be introduced in https://github.com/JuliaLang/julia/pull/39217
+macro __tryfinally(ex, fin)
+    Expr(:tryfinally,
+       :($(esc(ex))),
+       :($(esc(fin)))
+       )
+end
+
 """
-    @time
+    @time expr
+    @time "description" expr
 
 A macro to execute an expression, printing the time it took to execute, the number of
 allocations, and the total number of bytes its execution caused to be allocated, before
-returning the value of the expression.
+returning the value of the expression. Any time spent garbage collecting (gc), compiling
+new code, or recompiling invalidated code is shown as a percentage.
 
-See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
+Optionally provide a description string to print before the time report.
+
+In some cases the system will look inside the `@time` expression and compile some of the
+called code before execution of the top-level expression begins. When that happens, some
+compilation time will not be counted. To include this time you can run `@time @eval ...`.
+
+See also [`@showtime`](@ref), [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
 [`@allocated`](@ref).
 
 !!! note
@@ -154,9 +219,20 @@ See also [`@timev`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
     package which among other things evaluates the function multiple times in order to
     reduce noise.
 
+!!! compat "Julia 1.8"
+    The option to add a description was introduced in Julia 1.8.
+
+!!! compat "Julia 1.9"
+    Recompilation time being shown separately from compilation time was introduced in Julia 1.9
+
 ```julia-repl
-julia> @time rand(10^6);
-  0.001525 seconds (7 allocations: 7.630 MiB)
+julia> x = rand(10,10);
+
+julia> @time x * x;
+  0.606588 seconds (2.19 M allocations: 116.555 MiB, 3.75% gc time, 99.94% compilation time)
+
+julia> @time x * x;
+  0.000009 seconds (1 allocation: 896 bytes)
 
 julia> @time begin
            sleep(0.3)
@@ -164,50 +240,122 @@ julia> @time begin
        end
   0.301395 seconds (8 allocations: 336 bytes)
 2
+
+julia> @time "A one second sleep" sleep(1)
+A one second sleep: 1.005750 seconds (5 allocations: 144 bytes)
+
+julia> for loop in 1:3
+            @time loop sleep(1)
+        end
+1: 1.006760 seconds (5 allocations: 144 bytes)
+2: 1.001263 seconds (5 allocations: 144 bytes)
+3: 1.003676 seconds (5 allocations: 144 bytes)
 ```
 """
 macro time(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        @time nothing $(esc(ex))
+    end
+end
+macro time(msg, ex)
+    quote
+        Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
-        local val = $(esc(ex))
-        elapsedtime = time_ns() - elapsedtime
+        cumulative_compile_timing(true)
+        local compile_elapsedtimes = cumulative_compile_time_ns()
+        local val = @__tryfinally($(esc(ex)),
+            (elapsedtime = time_ns() - elapsedtime;
+            cumulative_compile_timing(false);
+            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
+        )
         local diff = GC_Diff(gc_num(), stats)
-        time_print(elapsedtime, diff.allocd, diff.total_time,
-                   gc_alloc_count(diff))
-        println()
+        local _msg = $(esc(msg))
+        local has_msg = !isnothing(_msg)
+        has_msg && print(_msg, ": ")
+        time_print(elapsedtime, diff.allocd, diff.total_time, gc_alloc_count(diff), first(compile_elapsedtimes), last(compile_elapsedtimes), true, !has_msg)
         val
     end
 end
 
 """
-    @timev
+    @showtime expr
+
+Like `@time` but also prints the expression being evaluated for reference.
+
+!!! compat "Julia 1.8"
+    This macro was added in Julia 1.8.
+
+See also [`@time`](@ref).
+
+```julia-repl
+julia> @showtime sleep(1)
+sleep(1): 1.002164 seconds (4 allocations: 128 bytes)
+```
+"""
+macro showtime(ex)
+    quote
+        @time $(sprint(show_unquoted,ex)) $(esc(ex))
+    end
+end
+
+"""
+    @timev expr
+    @timev "description" expr
 
 This is a verbose version of the `@time` macro. It first prints the same information as
 `@time`, then any non-zero memory allocation counters, and then returns the value of the
 expression.
 
+Optionally provide a description string to print before the time report.
+
+!!! compat "Julia 1.8"
+    The option to add a description was introduced in Julia 1.8.
+
 See also [`@time`](@ref), [`@timed`](@ref), [`@elapsed`](@ref), and
 [`@allocated`](@ref).
 
 ```julia-repl
-julia> @timev rand(10^6);
-  0.001006 seconds (7 allocations: 7.630 MiB)
-elapsed time (ns): 1005567
-bytes allocated:   8000256
-pool allocs:       6
-malloc() calls:    1
+julia> x = rand(10,10);
+
+julia> @timev x * x;
+  0.546770 seconds (2.20 M allocations: 116.632 MiB, 4.23% gc time, 99.94% compilation time)
+elapsed time (ns): 546769547
+gc time (ns):      23115606
+bytes allocated:   122297811
+pool allocs:       2197930
+non-pool GC allocs:1327
+malloc() calls:    36
+realloc() calls:   5
+GC pauses:         3
+
+julia> @timev x * x;
+  0.000010 seconds (1 allocation: 896 bytes)
+elapsed time (ns): 9848
+bytes allocated:   896
+pool allocs:       1
 ```
 """
 macro timev(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        @timev nothing $(esc(ex))
+    end
+end
+macro timev(msg, ex)
+    quote
+        Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
-        local val = $(esc(ex))
-        elapsedtime = time_ns() - elapsedtime
-        timev_print(elapsedtime, GC_Diff(gc_num(), stats))
+        local compile_elapsedtimes = cumulative_compile_time_ns()
+        local val = @__tryfinally($(esc(ex)),
+            (elapsedtime = time_ns() - elapsedtime;
+            compile_elapsedtimes = cumulative_compile_time_ns() .- compile_elapsedtimes)
+        )
+        local diff = GC_Diff(gc_num(), stats)
+        local _msg = $(esc(msg))
+        local has_msg = !isnothing(_msg)
+        has_msg && print(_msg, ": ")
+        timev_print(elapsedtime, diff, compile_elapsedtimes, !has_msg)
         val
     end
 end
@@ -218,6 +366,10 @@ end
 A macro to evaluate an expression, discarding the resulting value, instead returning the
 number of seconds it took to execute as a floating-point number.
 
+In some cases the system will look inside the `@elapsed` expression and compile some of the
+called code before execution of the top-level expression begins. When that happens, some
+compilation time will not be counted. To include this time you can run `@elapsed @eval ...`.
+
 See also [`@time`](@ref), [`@timev`](@ref), [`@timed`](@ref),
 and [`@allocated`](@ref).
 
@@ -228,7 +380,7 @@ julia> @elapsed sleep(0.3)
 """
 macro elapsed(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        Experimental.@force_compile
         local t0 = time_ns()
         $(esc(ex))
         (time_ns() - t0) / 1e9
@@ -260,7 +412,7 @@ julia> @allocated rand(10^6)
 """
 macro allocated(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        Experimental.@force_compile
         local b0 = Ref{Int64}(0)
         local b1 = Ref{Int64}(0)
         gc_bytes(b0)
@@ -277,6 +429,10 @@ A macro to execute an expression, and return the value of the expression, elapse
 total bytes allocated, garbage collection time, and an object with various memory allocation
 counters.
 
+In some cases the system will look inside the `@timed` expression and compile some of the
+called code before execution of the top-level expression begins. When that happens, some
+compilation time will not be counted. To include this time you can run `@timed @eval ...`.
+
 See also [`@time`](@ref), [`@timev`](@ref), [`@elapsed`](@ref), and
 [`@allocated`](@ref).
 
@@ -304,7 +460,7 @@ julia> stats.gcstats.total_time
 """
 macro timed(ex)
     quote
-        while false; end # compiler heuristic: compile this block (alter this if the heuristic changes)
+        Experimental.@force_compile
         local stats = gc_num()
         local elapsedtime = time_ns()
         local val = $(esc(ex))
diff --git a/base/toml_parser.jl b/base/toml_parser.jl
index d3a0df082c3a7a..7f4662bddc4dda 100644
--- a/base/toml_parser.jl
+++ b/base/toml_parser.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 module TOML
 
 using Base: IdSet
@@ -102,7 +104,7 @@ function Parser(str::String; filepath=nothing)
             IdSet{TOMLDict}(),    # defined_tables
             root,
             filepath,
-            get(Base.loaded_modules, DATES_PKGID, nothing),
+            isdefined(Base, :maybe_root_module) ? Base.maybe_root_module(DATES_PKGID) : nothing,
         )
     startup(l)
     return l
@@ -240,7 +242,7 @@ const err_message = Dict(
     ErrExpectedEqualAfterKey                => "expected equal sign after key",
     ErrNoTrailingDigitAfterDot              => "expected digit after dot",
     ErrOverflowError                        => "overflowed when parsing integer",
-    ErrInvalidUnicodeScalar                 => "invalid uncidode scalar",
+    ErrInvalidUnicodeScalar                 => "invalid unicode scalar",
     ErrInvalidEscapeCharacter               => "invalid escape character",
     ErrUnexpectedEofExpectedValue           => "unexpected end of file, expected a value"
 )
@@ -278,7 +280,7 @@ const Err{T} = Union{T, ParserError}
 function format_error_message_for_err_type(error::ParserError)
     msg = err_message[error.type]
     if error.type == ErrInvalidBareKeyCharacter
-        c_escaped = escape_string(string(error.data))
+        c_escaped = escape_string(string(error.data)::String)
         msg *= ": '$c_escaped'"
     end
     return msg
@@ -319,9 +321,9 @@ function Base.showerror(io::IO, err::ParserError)
     printstyled(io, " error: "; color=Base.error_color())
     println(io, format_error_message_for_err_type(err))
     # In this case we want the arrow to point one character
-    pos = err.pos
+    pos = err.pos::Int
     err.type == ErrUnexpectedEofExpectedValue && (pos += 1)
-    str1, err1 = point_to_line(err.str, pos, pos, io)
+    str1, err1 = point_to_line(err.str::String, pos, pos, io)
     @static if VERSION <= v"1.6.0-DEV.121"
         # See https://github.com/JuliaLang/julia/issues/36015
         format_fixer = get(io, :color, false) == true ? "\e[0m" : ""
@@ -482,6 +484,7 @@ end
 
 function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String}, check=true)::Err{TOMLDict}
     for i in 1:length(dotted_keys)
+        d = d::TOMLDict
         key = dotted_keys[i]
         d = get!(TOMLDict, d, key)
         if d isa Vector
@@ -489,7 +492,7 @@ function recurse_dict!(l::Parser, d::Dict, dotted_keys::AbstractVector{String},
         end
         check && @try check_allowed_add_key(l, d, i == length(dotted_keys))
     end
-    return d
+    return d::TOMLDict
 end
 
 function check_allowed_add_key(l::Parser, d, check_defined=true)::Err{Nothing}
@@ -654,12 +657,22 @@ end
 #########
 
 function push!!(v::Vector, el)
+    # Since these types are typically non-inferrable, they are a big invalidation risk,
+    # and since it's used by the package-loading infrastructure the cost of invalidation
+    # is high. Therefore, this is written to reduce the "exposed surface area": e.g., rather
+    # than writing `T[el]` we write it as `push!(Vector{T}(undef, 1), el)` so that there
+    # is no ambiguity about what types of objects will be created.
     T = eltype(v)
-    if el isa T || typeof(el) === T
+    t = typeof(el)
+    if el isa T || t === T
         push!(v, el::T)
         return v
+    elseif T === Union{}
+        out = Vector{t}(undef, 1)
+        out[1] = el
+        return out
     else
-        if typeof(T) === Union
+        if T isa Union
             newT = Any
         else
             newT = Union{T, typeof(el)}
@@ -672,7 +685,7 @@ end
 
 function parse_array(l::Parser)::Err{Vector}
     skip_ws_nl(l)
-    array = Union{}[]
+    array = Vector{Union{}}()
     empty_array = accept(l, ']')
     while !empty_array
         v = @try parse_value(l)
@@ -738,7 +751,7 @@ isvalid_binary(c::Char) = '0' <= c <= '1'
 
 const ValidSigs = Union{typeof.([isvalid_hex, isvalid_oct, isvalid_binary, isdigit])...}
 # This function eats things accepted by `f` but also allows eating `_` in between
-# digits. Retruns if it ate at lest one character and if it ate an underscore
+# digits. Returns if it ate at lest one character and if it ate an underscore
 function accept_batch_underscore(l::Parser, f::ValidSigs, fail_if_underscore=true)::Err{Tuple{Bool, Bool}}
     contains_underscore = false
     at_least_one = false
@@ -808,8 +821,6 @@ function parse_number_or_date_start(l::Parser)
             ate && return parse_int(l, contains_underscore)
         elseif accept(l, isdigit)
             return parse_local_time(l)
-        elseif peek(l) !== '.'
-            return ParserError(ErrLeadingZeroNotAllowedInteger)
         end
     end
 
@@ -922,21 +933,21 @@ ok_end_value(c::Char) = iswhitespace(c) || c == '#' || c == EOF_CHAR || c == ']'
 accept_two(l, f::F) where {F} = accept_n(l, 2, f) || return(ParserError(ErrParsingDateTime))
 function parse_datetime(l)
     # Year has already been eaten when we reach here
-    year = parse_int(l, false)::Int64
+    year = @try parse_int(l, false)
     year in 0:9999 || return ParserError(ErrParsingDateTime)
 
     # Month
     accept(l, '-') || return ParserError(ErrParsingDateTime)
     set_marker!(l)
     @try accept_two(l, isdigit)
-    month = parse_int(l, false)
+    month = @try parse_int(l, false)
     month in 1:12 || return ParserError(ErrParsingDateTime)
     accept(l, '-') || return ParserError(ErrParsingDateTime)
 
     # Day
     set_marker!(l)
     @try accept_two(l, isdigit)
-    day = parse_int(l, false)
+    day = @try parse_int(l, false)
     # Verify the real range in the constructor below
     day in 1:31 || return ParserError(ErrParsingDateTime)
 
@@ -973,9 +984,10 @@ function parse_datetime(l)
 end
 
 function try_return_datetime(p, year, month, day, h, m, s, ms)
-    if p.Dates !== nothing
+    Dates = p.Dates
+    if Dates !== nothing
         try
-            return p.Dates.DateTime(year, month, day, h, m, s, ms)
+            return Dates.DateTime(year, month, day, h, m, s, ms)
         catch
             return ParserError(ErrParsingDateTime)
         end
@@ -985,9 +997,10 @@ function try_return_datetime(p, year, month, day, h, m, s, ms)
 end
 
 function try_return_date(p, year, month, day)
-    if p.Dates !== nothing
+    Dates = p.Dates
+    if Dates !== nothing
         try
-            return p.Dates.Date(year, month, day)
+            return Dates.Date(year, month, day)
         catch
             return ParserError(ErrParsingDateTime)
         end
@@ -997,7 +1010,7 @@ function try_return_date(p, year, month, day)
 end
 
 function parse_local_time(l::Parser)
-    h = parse_int(l, false)
+    h = @try parse_int(l, false)
     h in 0:23 || return ParserError(ErrParsingDateTime)
     _, m, s, ms = @try _parse_local_time(l, true)
     # TODO: Could potentially parse greater accuracy for the
@@ -1006,9 +1019,10 @@ function parse_local_time(l::Parser)
 end
 
 function try_return_time(p, h, m, s, ms)
-    if p.Dates !== nothing
+    Dates = p.Dates
+    if Dates !== nothing
         try
-            return p.Dates.Time(h, m, s, ms)
+            return Dates.Time(h, m, s, ms)
         catch
             return ParserError(ErrParsingDateTime)
         end
@@ -1130,7 +1144,7 @@ function parse_string_continue(l::Parser, multiline::Bool, quoted::Bool)::Err{St
                     if !accept_n(l, n, isvalid_hex)
                         return ParserError(ErrInvalidUnicodeScalar)
                     end
-                    codepoint = parse_int(l, false, 16)
+                    codepoint = parse_int(l, false, 16)::Int64
                     #=
                     Unicode Scalar Value
                     ---------------------
@@ -1151,7 +1165,7 @@ function parse_string_continue(l::Parser, multiline::Bool, quoted::Bool)::Err{St
 end
 
 function take_chunks(l::Parser, unescape::Bool)::String
-    nbytes = sum(length, l.chunks)
+    nbytes = sum(length, l.chunks; init=0)
     str = Base._string_n(nbytes)
     offset = 1
     for chunk in l.chunks
diff --git a/base/ttyhascolor.jl b/base/ttyhascolor.jl
index c852bd5feb62f8..5984dba6d592ef 100644
--- a/base/ttyhascolor.jl
+++ b/base/ttyhascolor.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 if Sys.iswindows()
     ttyhascolor(term_type = nothing) = true
 else
@@ -22,4 +24,4 @@ end
 in(key_value::Pair{Symbol,Bool}, ::TTY) = key_value.first === :color && key_value.second === get_have_color()
 haskey(::TTY, key::Symbol) = key === :color
 getindex(::TTY, key::Symbol) = key === :color ? get_have_color() : throw(KeyError(key))
-get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
\ No newline at end of file
+get(::TTY, key::Symbol, default) = key === :color ? get_have_color() : default
diff --git a/base/tuple.jl b/base/tuple.jl
index 691b7fb475d8e7..484a5d24e67df0 100644
--- a/base/tuple.jl
+++ b/base/tuple.jl
@@ -27,12 +27,15 @@ lastindex(@nospecialize t::Tuple) = length(t)
 size(@nospecialize(t::Tuple), d::Integer) = (d == 1) ? length(t) : throw(ArgumentError("invalid tuple dimension $d"))
 axes(@nospecialize t::Tuple) = (OneTo(length(t)),)
 @eval getindex(@nospecialize(t::Tuple), i::Int) = getfield(t, i, $(Expr(:boundscheck)))
-@eval getindex(@nospecialize(t::Tuple), i::Real) = getfield(t, convert(Int, i), $(Expr(:boundscheck)))
+@eval getindex(@nospecialize(t::Tuple), i::Integer) = getfield(t, convert(Int, i), $(Expr(:boundscheck)))
 getindex(t::Tuple, r::AbstractArray{<:Any,1}) = (eltype(t)[t[ri] for ri in r]...,)
 getindex(t::Tuple, b::AbstractArray{Bool,1}) = length(b) == length(t) ? getindex(t, findall(b)) : throw(BoundsError(t, b))
 getindex(t::Tuple, c::Colon) = t
 
-# returns new tuple; N.B.: becomes no-op if i is out-of-bounds
+get(t::Tuple, i::Integer, default) = i in 1:length(t) ? getindex(t, i) : default
+get(f::Callable, t::Tuple, i::Integer) = i in 1:length(t) ? getindex(t, i) : f()
+
+# returns new tuple; N.B.: becomes no-op if `i` is out-of-bounds
 
 """
     setindex(c::Tuple, v, i::Integer)
@@ -48,21 +51,20 @@ true
 """
 function setindex(x::Tuple, v, i::Integer)
     @boundscheck 1 <= i <= length(x) || throw(BoundsError(x, i))
-    @_inline_meta
+    @inline
     _setindex(v, i, x...)
 end
 
-function _setindex(v, i::Integer, first, tail...)
-    @_inline_meta
-    return (ifelse(i == 1, v, first), _setindex(v, i - 1, tail...)...)
+function _setindex(v, i::Integer, args...)
+    @inline
+    return ntuple(j -> ifelse(j == i, v, args[j]), length(args))
 end
-_setindex(v, i::Integer) = ()
 
 
 ## iterating ##
 
 function iterate(@nospecialize(t::Tuple), i::Int=1)
-    @_inline_meta
+    @inline
     return (1 <= i <= length(t)) ? (@inbounds t[i], i + 1) : nothing
 end
 
@@ -72,19 +74,19 @@ prevind(@nospecialize(t::Tuple), i::Integer) = Int(i)-1
 nextind(@nospecialize(t::Tuple), i::Integer) = Int(i)+1
 
 function keys(t::Tuple, t2::Tuple...)
-    @_inline_meta
+    @inline
     OneTo(_maxlength(t, t2...))
 end
 _maxlength(t::Tuple) = length(t)
 function _maxlength(t::Tuple, t2::Tuple, t3::Tuple...)
-    @_inline_meta
+    @inline
     max(length(t), _maxlength(t2, t3...))
 end
 
 # this allows partial evaluation of bounded sequences of next() calls on tuples,
 # while reducing to plain next() for arbitrary iterables.
-indexed_iterate(t::Tuple, i::Int, state=1) = (@_inline_meta; (getfield(t, i), i+1))
-indexed_iterate(a::Array, i::Int, state=1) = (@_inline_meta; (a[i], i+1))
+indexed_iterate(t::Tuple, i::Int, state=1) = (@inline; (getfield(t, i), i+1))
+indexed_iterate(a::Array, i::Int, state=1) = (@inline; (a[i], i+1))
 function indexed_iterate(I, i)
     x = iterate(I)
     x === nothing && throw(BoundsError(I, i))
@@ -96,6 +98,96 @@ function indexed_iterate(I, i, state)
     x
 end
 
+"""
+    Base.rest(collection[, itr_state])
+
+Generic function for taking the tail of `collection`, starting from a specific iteration
+state `itr_state`. Return a `Tuple`, if `collection` itself is a `Tuple`, a subtype of
+`AbstractVector`, if `collection` is an `AbstractArray`, a subtype of `AbstractString`
+if `collection` is an `AbstractString`, and an arbitrary iterator, falling back to
+`Iterators.rest(collection[, itr_state])`, otherwise.
+
+Can be overloaded for user-defined collection types to customize the behavior of [slurping
+in assignments](@ref destructuring-assignment) in final position, like `a, b... = collection`.
+
+!!! compat "Julia 1.6"
+    `Base.rest` requires at least Julia 1.6.
+
+See also: [`first`](@ref first), [`Iterators.rest`](@ref), [`Base.split_rest`](@ref).
+
+# Examples
+```jldoctest
+julia> a = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> first, state = iterate(a)
+(1, 2)
+
+julia> first, Base.rest(a, state)
+(1, [3, 2, 4])
+```
+"""
+function rest end
+rest(t::Tuple) = t
+rest(t::Tuple, i::Int) = ntuple(x -> getfield(t, x+i-1), length(t)-i+1)
+rest(a::Array, i::Int=1) = a[i:end]
+rest(a::Core.SimpleVector, i::Int=1) = a[i:end]
+rest(itr, state...) = Iterators.rest(itr, state...)
+
+"""
+    Base.split_rest(collection, n::Int[, itr_state]) -> (rest_but_n, last_n)
+
+Generic function for splitting the tail of `collection`, starting from a specific iteration
+state `itr_state`. Returns a tuple of two new collections. The first one contains all
+elements of the tail but the `n` last ones, which make up the second collection.
+
+The type of the first collection generally follows that of [`Base.rest`](@ref), except that
+the fallback case is not lazy, but is collected eagerly into a vector.
+
+Can be overloaded for user-defined collection types to customize the behavior of [slurping
+in assignments](@ref destructuring-assignment) in non-final position, like `a, b..., c = collection`.
+
+!!! compat "Julia 1.9"
+    `Base.split_rest` requires at least Julia 1.9.
+
+See also: [`Base.rest`](@ref).
+
+# Examples
+```jldoctest
+julia> a = [1 2; 3 4]
+2×2 Matrix{Int64}:
+ 1  2
+ 3  4
+
+julia> first, state = iterate(a)
+(1, 2)
+
+julia> first, Base.split_rest(a, 1, state)
+(1, ([3, 2], [4]))
+```
+"""
+function split_rest end
+function split_rest(itr, n::Int, state...)
+    if IteratorSize(itr) == IsInfinite()
+        throw(ArgumentError("Cannot split an infinite iterator in the middle."))
+    end
+    return _split_rest(rest(itr, state...), n)
+end
+_split_rest(itr, n::Int) = _split_rest(collect(itr), n)
+function _check_length_split_rest(len, n)
+    len < n && throw(ArgumentError(
+        "The iterator only contains $len elements, but at least $n were requested."
+    ))
+end
+function _split_rest(a::Union{AbstractArray, Core.SimpleVector}, n::Int)
+    _check_length_split_rest(length(a), n)
+    return a[begin:end-n], a[end-n+1:end]
+end
+
+split_rest(t::Tuple, n::Int, i=1) = t[i:end-n], t[end-n+1:end]
+
 # Use dispatch to avoid a branch in first
 first(::Tuple{}) = throw(ArgumentError("tuple must be non-empty"))
 first(t::Tuple) = t[1]
@@ -114,7 +206,7 @@ function eltype(t::Type{<:Tuple{Vararg{E}}}) where {E}
 end
 eltype(t::Type{<:Tuple}) = _compute_eltype(t)
 function _tuple_unique_fieldtypes(@nospecialize t)
-    @_pure_meta
+    @_total_meta
     types = IdSet()
     t´ = unwrap_unionall(t)
     # Given t = Tuple{Vararg{S}} where S<:Real, the various
@@ -131,7 +223,7 @@ function _tuple_unique_fieldtypes(@nospecialize t)
     return Core.svec(types...)
 end
 function _compute_eltype(@nospecialize t)
-    @_pure_meta # TODO: the compiler shouldn't need this
+    @_total_meta # TODO: the compiler shouldn't need this
     types = _tuple_unique_fieldtypes(t)
     return afoldl(types...) do a, b
         # if we've already reached Any, it can't widen any more
@@ -152,6 +244,8 @@ safe_tail(t::Tuple{}) = ()
 
 Return a `Tuple` consisting of all but the last component of `x`.
 
+See also: [`first`](@ref), [`tail`](@ref Base.tail).
+
 # Examples
 ```jldoctest
 julia> Base.front((1,2,3))
@@ -162,13 +256,13 @@ ERROR: ArgumentError: Cannot call front on an empty tuple.
 ```
 """
 function front(t::Tuple)
-    @_inline_meta
+    @inline
     _front(t...)
 end
 _front() = throw(ArgumentError("Cannot call front on an empty tuple."))
 _front(v) = ()
 function _front(v, t...)
-    @_inline_meta
+    @inline
     (v, _front(t...)...)
 end
 
@@ -176,16 +270,22 @@ end
 
 # 1 argument function
 map(f, t::Tuple{})              = ()
-map(f, t::Tuple{Any,})          = (f(t[1]),)
-map(f, t::Tuple{Any, Any})      = (f(t[1]), f(t[2]))
-map(f, t::Tuple{Any, Any, Any}) = (f(t[1]), f(t[2]), f(t[3]))
-map(f, t::Tuple)                = (@_inline_meta; (f(t[1]), map(f,tail(t))...))
+map(f, t::Tuple{Any,})          = (@inline; (f(t[1]),))
+map(f, t::Tuple{Any, Any})      = (@inline; (f(t[1]), f(t[2])))
+map(f, t::Tuple{Any, Any, Any}) = (@inline; (f(t[1]), f(t[2]), f(t[3])))
+map(f, t::Tuple)                = (@inline; (f(t[1]), map(f,tail(t))...))
 # stop inlining after some number of arguments to avoid code blowup
-const Any16{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
-                       Any,Any,Any,Any,Any,Any,Any,Any,Vararg{Any,N}}
-const All16{T,N} = Tuple{T,T,T,T,T,T,T,T,
-                         T,T,T,T,T,T,T,T,Vararg{T,N}}
-function map(f, t::Any16)
+const Any32{N} = Tuple{Any,Any,Any,Any,Any,Any,Any,Any,
+                       Any,Any,Any,Any,Any,Any,Any,Any,
+                       Any,Any,Any,Any,Any,Any,Any,Any,
+                       Any,Any,Any,Any,Any,Any,Any,Any,
+                       Vararg{Any,N}}
+const All32{T,N} = Tuple{T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,
+                         T,T,T,T,T,T,T,T,
+                         Vararg{T,N}}
+function map(f, t::Any32)
     n = length(t)
     A = Vector{Any}(undef, n)
     for i=1:n
@@ -195,13 +295,13 @@ function map(f, t::Any16)
 end
 # 2 argument function
 map(f, t::Tuple{},        s::Tuple{})        = ()
-map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (f(t[1],s[1]),)
-map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (f(t[1],s[1]), f(t[2],s[2]))
+map(f, t::Tuple{Any,},    s::Tuple{Any,})    = (@inline; (f(t[1],s[1]),))
+map(f, t::Tuple{Any,Any}, s::Tuple{Any,Any}) = (@inline; (f(t[1],s[1]), f(t[2],s[2])))
 function map(f, t::Tuple, s::Tuple)
-    @_inline_meta
+    @inline
     (f(t[1],s[1]), map(f, tail(t), tail(s))...)
 end
-function map(f, t::Any16, s::Any16)
+function map(f, t::Any32, s::Any32)
     n = length(t)
     A = Vector{Any}(undef, n)
     for i = 1:n
@@ -214,10 +314,10 @@ heads(ts::Tuple...) = map(t -> t[1], ts)
 tails(ts::Tuple...) = map(tail, ts)
 map(f, ::Tuple{}...) = ()
 function map(f, t1::Tuple, t2::Tuple, ts::Tuple...)
-    @_inline_meta
+    @inline
     (f(heads(t1, t2, ts...)...), map(f, tails(t1, t2, ts...)...)...)
 end
-function map(f, t1::Any16, t2::Any16, ts::Any16...)
+function map(f, t1::Any32, t2::Any32, ts::Any32...)
     n = length(t1)
     A = Vector{Any}(undef, n)
     for i = 1:n
@@ -234,7 +334,7 @@ fill_to_length(t::Tuple{}, val, ::Val{1}) = (val,)
 fill_to_length(t::Tuple{Any}, val, ::Val{2}) = (t..., val)
 fill_to_length(t::Tuple{}, val, ::Val{2}) = (val, val)
 #function fill_to_length(t::Tuple, val, ::Val{N}) where {N}
-#    @_inline_meta
+#    @inline
 #    return (t..., ntuple(i -> val, N - length(t))...)
 #end
 
@@ -245,7 +345,7 @@ fill_to_length(t::Tuple{}, val, ::Val{2}) = (val, val)
 if nameof(@__MODULE__) === :Base
 
 function tuple_type_tail(T::Type)
-    @_pure_meta # TODO: this method is wrong (and not @pure)
+    @_total_may_throw_meta # TODO: this method is wrong (and not :total_may_throw)
     if isa(T, UnionAll)
         return UnionAll(T.var, tuple_type_tail(T.body))
     elseif isa(T, Union)
@@ -253,9 +353,9 @@ function tuple_type_tail(T::Type)
     else
         T.name === Tuple.name || throw(MethodError(tuple_type_tail, (T,)))
         if isvatuple(T) && length(T.parameters) == 1
-            va = T.parameters[1]
-            (isa(va, DataType) && isa(va.parameters[2], Int)) || return T
-            return Tuple{Vararg{va.parameters[1], va.parameters[2]-1}}
+            va = unwrap_unionall(T.parameters[1])::Core.TypeofVararg
+            (isdefined(va, :N) && isa(va.N, Int)) || return T
+            return Tuple{Vararg{va.T, va.N-1}}
         end
         return Tuple{argtail(T.parameters...)...}
     end
@@ -271,20 +371,24 @@ Tuple(x::Array{T,0}) where {T} = tuple(getindex(x))
 _totuple(::Type{Tuple{}}, itr, s...) = ()
 
 function _totuple_err(@nospecialize T)
-    @_noinline_meta
+    @noinline
     throw(ArgumentError("too few elements for tuple type $T"))
 end
 
-function _totuple(T, itr, s...)
-    @_inline_meta
+function _totuple(::Type{T}, itr, s::Vararg{Any,N}) where {T,N}
+    @inline
     y = iterate(itr, s...)
     y === nothing && _totuple_err(T)
-    return (convert(fieldtype(T, 1), y[1]), _totuple(tuple_type_tail(T), itr, y[2])...)
+    t1 = convert(fieldtype(T, 1), y[1])
+    # inference may give up in recursive calls, so annotate here to force accurate return type to be propagated
+    rT = tuple_type_tail(T)
+    ts = _totuple(rT, itr, y[2])::rT
+    return (t1, ts...)
 end
 
 # use iterative algorithm for long tuples
-function _totuple(T::Type{All16{E,N}}, itr) where {E,N}
-    len = N+16
+function _totuple(T::Type{All32{E,N}}, itr) where {E,N}
+    len = N+32
     elts = collect(E, Iterators.take(itr,len))
     if length(elts) != len
         _totuple_err(T)
@@ -303,20 +407,38 @@ _totuple(::Type{Tuple}, itr::NamedTuple) = (itr...,)
 
 end
 
+## find ##
+
+_findfirst_rec(f, i::Int, ::Tuple{}) = nothing
+_findfirst_rec(f, i::Int, t::Tuple) = (@inline; f(first(t)) ? i : _findfirst_rec(f, i+1, tail(t)))
+function _findfirst_loop(f::Function, t)
+    for i in 1:length(t)
+        f(t[i]) && return i
+    end
+    return nothing
+end
+findfirst(f::Function, t::Tuple) = length(t) < 32 ? _findfirst_rec(f, 1, t) : _findfirst_loop(f, t)
+
+function findlast(f::Function, x::Tuple)
+    r = findfirst(f, reverse(x))
+    return isnothing(r) ? r : length(x) - r + 1
+end
+
 ## filter ##
 
-filter(f, xs::Tuple) = afoldl((ys, x) -> f(x) ? (ys..., x) : ys, (), xs...)
+filter_rec(f, xs::Tuple) = afoldl((ys, x) -> f(x) ? (ys..., x) : ys, (), xs...)
 
 # use Array for long tuples
-filter(f, t::Any16) = Tuple(filter(f, collect(t)))
+filter(f, t::Tuple) = length(t) < 32 ? filter_rec(f, t) : Tuple(filter(f, collect(t)))
 
 ## comparison ##
 
-isequal(t1::Tuple, t2::Tuple) = (length(t1) == length(t2)) && _isequal(t1, t2)
-_isequal(t1::Tuple{}, t2::Tuple{}) = true
-_isequal(t1::Tuple{Any}, t2::Tuple{Any}) = isequal(t1[1], t2[1])
-_isequal(t1::Tuple, t2::Tuple) = isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
-function _isequal(t1::Any16, t2::Any16)
+isequal(t1::Tuple, t2::Tuple) = length(t1) == length(t2) && _isequal(t1, t2)
+_isequal(::Tuple{}, ::Tuple{}) = true
+function _isequal(t1::Tuple{Any,Vararg{Any}}, t2::Tuple{Any,Vararg{Any}})
+    return isequal(t1[1], t2[1]) && _isequal(tail(t1), tail(t2))
+end
+function _isequal(t1::Any32, t2::Any32)
     for i = 1:length(t1)
         if !isequal(t1[i], t2[i])
             return false
@@ -346,7 +468,7 @@ function _eq_missing(t1::Tuple, t2::Tuple)
         return _eq_missing(tail(t1), tail(t2))
     end
 end
-function _eq(t1::Any16, t2::Any16)
+function _eq(t1::Any32, t2::Any32)
     anymissing = false
     for i = 1:length(t1)
         eq = (t1[i] == t2[i])
@@ -362,7 +484,7 @@ end
 const tuplehash_seed = UInt === UInt64 ? 0x77cfa1eef01bca90 : 0xf01bca90
 hash(::Tuple{}, h::UInt) = h + tuplehash_seed
 hash(t::Tuple, h::UInt) = hash(t[1], hash(tail(t), h))
-function hash(t::Any16, h::UInt)
+function hash(t::Any32, h::UInt)
     out = h + tuplehash_seed
     for i = length(t):-1:1
         out = hash(t[i], out)
@@ -383,7 +505,7 @@ function <(t1::Tuple, t2::Tuple)
     end
     return tail(t1) < tail(t2)
 end
-function <(t1::Any16, t2::Any16)
+function <(t1::Any32, t2::Any32)
     n1, n2 = length(t1), length(t2)
     for i = 1:min(n1, n2)
         a, b = t1[i], t2[i]
@@ -410,7 +532,7 @@ function isless(t1::Tuple, t2::Tuple)
     a, b = t1[1], t2[1]
     isless(a, b) || (isequal(a, b) && isless(tail(t1), tail(t2)))
 end
-function isless(t1::Any16, t2::Any16)
+function isless(t1::Any32, t2::Any32)
     n1, n2 = length(t1), length(t2)
     for i = 1:min(n1, n2)
         a, b = t1[i], t2[i]
@@ -433,17 +555,12 @@ reverse(t::Tuple) = revargs(t...)
 
 ## specialized reduction ##
 
-# TODO: these definitions cannot yet be combined, since +(x...)
-# where x might be any tuple matches too many methods.
-# TODO: this is inconsistent with the regular sum in cases where the arguments
-# require size promotion to system size.
-sum(x::Tuple{Any, Vararg{Any}}) = +(x...)
-
-# NOTE: should remove, but often used on array sizes
-# TODO: this is inconsistent with the regular prod in cases where the arguments
-# require size promotion to system size.
 prod(x::Tuple{}) = 1
-prod(x::Tuple{Any, Vararg{Any}}) = *(x...)
+# This is consistent with the regular prod because there is no need for size promotion
+# if all elements in the tuple are of system size.
+# It is defined here separately in order to support bootstrap, because it's needed earlier
+# than the general prod definition is available.
+prod(x::Tuple{Int, Vararg{Int}}) = *(x...)
 
 all(x::Tuple{}) = true
 all(x::Tuple{Bool}) = x[1]
@@ -459,25 +576,21 @@ any(x::Tuple{Bool, Bool, Bool}) = x[1]|x[2]|x[3]
 # equivalent to any(f, t), to be used only in bootstrap
 _tuple_any(f::Function, t::Tuple) = _tuple_any(f, false, t...)
 function _tuple_any(f::Function, tf::Bool, a, b...)
-    @_inline_meta
+    @inline
     _tuple_any(f, tf | f(a), b...)
 end
 _tuple_any(f::Function, tf::Bool) = tf
 
 
 # a version of `in` esp. for NamedTuple, to make it pure, and not compiled for each tuple length
-function sym_in(x::Symbol, itr::Tuple{Vararg{Symbol}})
-    @nospecialize itr
-    @_pure_meta
+function sym_in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}})
+    @_total_meta
     for y in itr
         y === x && return true
     end
     return false
 end
-function in(x::Symbol, itr::Tuple{Vararg{Symbol}})
-    @nospecialize itr
-    return sym_in(x, itr)
-end
+in(x::Symbol, @nospecialize itr::Tuple{Vararg{Symbol}}) = sym_in(x, itr)
 
 
 """
@@ -486,3 +599,6 @@ end
 Returns an empty tuple, `()`.
 """
 empty(@nospecialize x::Tuple) = ()
+
+foreach(f, itr::Tuple) = foldl((_, x) -> (f(x); nothing), itr, init=nothing)
+foreach(f, itrs::Tuple...) = foldl((_, xs) -> (f(xs...); nothing), zip(itrs...), init=nothing)
diff --git a/base/twiceprecision.jl b/base/twiceprecision.jl
index 1490a0624c7d60..860f2d23185cc6 100644
--- a/base/twiceprecision.jl
+++ b/base/twiceprecision.jl
@@ -63,7 +63,7 @@ representation, even though it is exact from the standpoint of binary
 representation.
 
 Example:
-```julia
+```julia-repl
 julia> 1.0 + 1.0001e-15
 1.000000000000001
 
@@ -94,7 +94,7 @@ numbers. Mathematically, `zhi + zlo = x * y`, where `zhi` contains the
 most significant bits and `zlo` the least significant.
 
 Example:
-```julia
+```julia-repl
 julia> x = Float32(π)
 3.1415927f0
 
@@ -126,7 +126,7 @@ numbers. Mathematically, `zhi + zlo ≈ x / y`, where `zhi` contains the
 most significant bits and `zlo` the least significant.
 
 Example:
-```julia
+```julia-repl
 julia> x, y = Float32(π), 3.1f0
 (3.1415927f0, 3.1f0)
 
@@ -162,7 +162,18 @@ div12(x, y) = div12(promote(x, y)...)
     TwicePrecision{T}((num, denom))
 
 A number with twice the precision of `T`, e.g., quad-precision if `T =
-Float64`. `hi` represents the high bits (most significant bits) and
+Float64`.
+
+!!! warn
+    `TwicePrecision` is an internal type used to increase the
+    precision of floating-point ranges, and not intended for external use.
+    If you encounter them in real code, the most likely explanation is
+    that you are directly accessing the fields of a range. Use
+    the function interface instead, `step(r)` rather than `r.step`
+
+# Extended help
+
+`hi` represents the high bits (most significant bits) and
 `lo` the low bits (least significant bits). Rational values
 `num//denom` can be approximated conveniently using the syntax
 `TwicePrecision{T}((num, denom))`.
@@ -194,6 +205,10 @@ function TwicePrecision{T}(x) where {T}
     TwicePrecision{T}(xT, T(Δx))
 end
 
+function TwicePrecision{T}(x::TwicePrecision) where {T}
+    TwicePrecision{T}(x.hi, x.lo)
+end
+
 TwicePrecision{T}(i::Integer) where {T<:AbstractFloat} =
     TwicePrecision{T}(canonicalize2(splitprec(T, i)...)...)
 
@@ -207,13 +222,21 @@ end
 
 function TwicePrecision{T}(nd::Tuple{Any,Any}) where {T}
     n, d = nd
-    TwicePrecision{T}(n) / d
+    TwicePrecision{T}(TwicePrecision{T}(n) / d)
 end
 
 function TwicePrecision{T}(nd::Tuple{I,I}, nb::Integer) where {T,I}
     twiceprecision(TwicePrecision{T}(nd), nb)
 end
 
+# Fix #39798
+# See steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
+#                         step::Tuple{Integer,Integer}, nb::Integer,
+#                         len::Integer, offset::Integer)
+function TwicePrecision{T}(nd::Tuple{Integer,Integer}, nb::Integer) where T
+    twiceprecision(TwicePrecision{T}(nd), nb)
+end
+
 # Truncating constructors. Useful for generating values that can be
 # exactly multiplied by small integers.
 function twiceprecision(val::T, nb::Integer) where {T<:IEEEFloat}
@@ -321,13 +344,13 @@ function steprangelen_hp(::Type{Float64}, ref::Tuple{Integer,Integer},
                          step::Tuple{Integer,Integer}, nb::Integer,
                          len::Integer, offset::Integer)
     StepRangeLen(TwicePrecision{Float64}(ref),
-                 TwicePrecision{Float64}(step, nb), Int(len), offset)
+                 TwicePrecision{Float64}(step, nb), len, offset)
 end
 
 function steprangelen_hp(::Type{T}, ref::Tuple{Integer,Integer},
                          step::Tuple{Integer,Integer}, nb::Integer,
                          len::Integer, offset::Integer) where {T<:IEEEFloat}
-    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], Int(len), offset)
+    StepRangeLen{T}(ref[1]/ref[2], step[1]/step[2], len, offset)
 end
 
 # AbstractFloat constructors (can supply a single number or a 2-tuple
@@ -339,14 +362,13 @@ function steprangelen_hp(::Type{Float64}, ref::F_or_FF,
                          step::F_or_FF, nb::Integer,
                          len::Integer, offset::Integer)
     StepRangeLen(TwicePrecision{Float64}(ref...),
-                 twiceprecision(TwicePrecision{Float64}(step...), nb), Int(len), offset)
+                 twiceprecision(TwicePrecision{Float64}(step...), nb), len, offset)
 end
 
 function steprangelen_hp(::Type{T}, ref::F_or_FF,
                          step::F_or_FF, nb::Integer,
                          len::Integer, offset::Integer) where {T<:IEEEFloat}
-    StepRangeLen{T}(asF64(ref),
-                    asF64(step), Int(len), offset)
+    StepRangeLen{T}(asF64(ref), asF64(step), len, offset)
 end
 
 
@@ -357,33 +379,36 @@ StepRangeLen(ref::TwicePrecision{T}, step::TwicePrecision{T},
 
 # Construct range for rational start=start_n/den, step=step_n/den
 function floatrange(::Type{T}, start_n::Integer, step_n::Integer, len::Integer, den::Integer) where T
+    len = len + 0 # promote with Int
     if len < 2 || step_n == 0
-        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, Int(len), 1)
+        return steprangelen_hp(T, (start_n, den), (step_n, den), 0, len, oneunit(len))
     end
     # index of smallest-magnitude value
-    imin = clamp(round(Int, -start_n/step_n+1), 1, Int(len))
+    L = typeof(len)
+    imin = clamp(round(typeof(len), -start_n/step_n+1), oneunit(L), len)
     # Compute smallest-magnitude element to 2x precision
     ref_n = start_n+(imin-1)*step_n  # this shouldn't overflow, so don't check
     nb = nbitslen(T, len, imin)
-    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, Int(len), imin)
+    steprangelen_hp(T, (ref_n, den), (step_n, den), nb, len, imin)
 end
 
 function floatrange(a::AbstractFloat, st::AbstractFloat, len::Real, divisor::AbstractFloat)
+    len = len + 0 # promote with Int
     T = promote_type(typeof(a), typeof(st), typeof(divisor))
     m = maxintfloat(T, Int)
     if abs(a) <= m && abs(st) <= m && abs(divisor) <= m
         ia, ist, idivisor = round(Int, a), round(Int, st), round(Int, divisor)
         if ia == a && ist == st && idivisor == divisor
             # We can return the high-precision range
-            return floatrange(T, ia, ist, Int(len), idivisor)
+            return floatrange(T, ia, ist, len, idivisor)
         end
     end
     # Fallback (misses the opportunity to set offset different from 1,
     # but otherwise this is still high-precision)
-    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), Int(len), 1)
+    steprangelen_hp(T, (a,divisor), (st,divisor), nbitslen(T, len, 1), len, oneunit(len))
 end
 
-function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
+function (:)(start::T, step::T, stop::T) where T<:IEEEFloat
     step == 0 && throw(ArgumentError("range step cannot be zero"))
     # see if the inputs have exact rational approximations (and if so,
     # perform all computations in terms of the rationals)
@@ -399,7 +424,7 @@ function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
                     rem(den, start_d) == 0 && rem(den, step_d) == 0      # check lcm overflow
                 start_n = round(Int, start*den)
                 step_n = round(Int, step*den)
-                len = max(0, div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d))
+                len = max(0, Int(div(den*stop_n - stop_d*start_n + step_n*stop_d, step_n*stop_d)))
                 # Integer ops could overflow, so check that this makes sense
                 if isbetween(start, start + (len-1)*step, stop + step/2) &&
                         !isbetween(start, start + len*step, stop)
@@ -410,6 +435,7 @@ function (:)(start::T, step::T, stop::T) where T<:Union{Float16,Float32,Float64}
         end
     end
     # Fallback, taking start and step literally
+    # n.b. we use Int as the default length type for IEEEFloats
     lf = (stop-start)/step
     if lf < 0
         len = 0
@@ -427,7 +453,17 @@ end
 step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T<:AbstractFloat} = T(r.step)
 step(r::StepRangeLen{T,TwicePrecision{T},TwicePrecision{T}}) where {T} = T(r.step)
 
-function _range(a::T, st::T, ::Nothing, len::Integer) where T<:Union{Float16,Float32,Float64}
+range_start_step_length(a::Real, st::IEEEFloat, len::Integer) =
+    range_start_step_length(promote(a, st)..., len)
+
+range_start_step_length(a::IEEEFloat, st::Real, len::Integer) =
+    range_start_step_length(promote(a, st)..., len)
+
+range_start_step_length(a::IEEEFloat, st::IEEEFloat, len::Integer) =
+    range_start_step_length(promote(a, st)..., len)
+
+function range_start_step_length(a::T, st::T, len::Integer) where T<:IEEEFloat
+    len = len + 0 # promote with Int
     start_n, start_d = rat(a)
     step_n, step_d = rat(st)
     if start_d != 0 && step_d != 0 &&
@@ -444,10 +480,22 @@ function _range(a::T, st::T, ::Nothing, len::Integer) where T<:Union{Float16,Flo
     steprangelen_hp(T, a, st, 0, len, 1)
 end
 
+range_step_stop_length(step::Real, stop::IEEEFloat, len::Integer) =
+    range_step_stop_length(promote(step, stop)..., len)
+
+range_step_stop_length(step::IEEEFloat, stop::Real, len::Integer) =
+    range_step_stop_length(promote(step, stop)..., len)
+
+function range_step_stop_length(step::IEEEFloat, stop::IEEEFloat, len::Integer)
+    r = range_start_step_length(stop, negate(step), len)
+    reverse(r)
+end
+
 # This assumes that r.step has already been split so that (0:len-1)*r.step.hi is exact
 function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i::Integer) where T
     # Very similar to _getindex_hiprec, but optimized to avoid a 2nd call to add12
-    @_inline_meta
+    @inline
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     u = i - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
@@ -455,6 +503,7 @@ function unsafe_getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, i
 end
 
 function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecision}, i::Integer)
+    i isa Bool && throw(ArgumentError("invalid index: $i of type Bool"))
     u = i - r.offset
     shift_hi, shift_lo = u*r.step.hi, u*r.step.lo
     x_hi, x_lo = add12(r.ref.hi, shift_hi)
@@ -462,20 +511,41 @@ function _getindex_hiprec(r::StepRangeLen{<:Any,<:TwicePrecision,<:TwicePrecisio
     TwicePrecision(x_hi, x_lo)
 end
 
-function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{<:Integer}) where T
+function getindex(r::StepRangeLen{T,<:TwicePrecision,<:TwicePrecision}, s::OrdinalRange{S}) where {T, S<:Integer}
     @boundscheck checkbounds(r, s)
-    soffset = 1 + round(Int, (r.offset - first(s))/step(s))
-    soffset = clamp(soffset, 1, length(s))
-    ioffset = first(s) + (soffset-1)*step(s)
-    if step(s) == 1 || length(s) < 2
-        newstep = r.step
-    else
-        newstep = twiceprecision(r.step*step(s), nbitslen(T, length(s), soffset))
-    end
-    if ioffset == r.offset
-        StepRangeLen(r.ref, newstep, length(s), max(1,soffset))
+    len = length(s)
+    L = typeof(len)
+    sstep = step_hp(s)
+    rstep = step_hp(r)
+    if S === Bool
+        #rstep *= one(sstep)
+        if len == 0
+            return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+        elseif len == 1
+            if first(s)
+                return StepRangeLen{T}(first(r), rstep, oneunit(L), oneunit(L))
+            else
+                return StepRangeLen{T}(first(r), rstep, zero(L), oneunit(L))
+            end
+        else # len == 2
+            return StepRangeLen{T}(last(r), step(r), oneunit(L), oneunit(L))
+        end
     else
-        StepRangeLen(r.ref + (ioffset-r.offset)*r.step, newstep, length(s), max(1,soffset))
+        soffset = round(L, (r.offset - first(s))/sstep + 1)
+        soffset = clamp(soffset, oneunit(L), len)
+        ioffset = L(first(s) + (soffset - oneunit(L)) * sstep)
+        if sstep == 1 || len < 2
+            newstep = rstep #* one(sstep)
+        else
+            newstep = rstep * sstep
+            newstep = twiceprecision(newstep, nbitslen(T, len, soffset))
+        end
+        soffset = max(oneunit(L), soffset)
+        if ioffset == r.offset
+            return StepRangeLen{T}(r.ref, newstep, len, soffset)
+        else
+            return StepRangeLen{T}(r.ref + (ioffset-r.offset)*rstep, newstep, len, soffset)
+        end
     end
 end
 
@@ -485,30 +555,30 @@ end
 /(r::StepRangeLen{<:Real,<:TwicePrecision}, x::Real) =
     StepRangeLen(r.ref/x, twiceprecision(r.step/x, nbitslen(r)), length(r), r.offset)
 
-StepRangeLen{T,R,S}(r::StepRangeLen{T,R,S}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} = r
+StepRangeLen{T,R,S,L}(r::StepRangeLen{T,R,S,L}) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} = r
 
-StepRangeLen{T,R,S}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision} =
-    _convertSRL(StepRangeLen{T,R,S}, r)
+StepRangeLen{T,R,S,L}(r::StepRangeLen) where {T<:AbstractFloat,R<:TwicePrecision,S<:TwicePrecision,L} =
+    _convertSRL(StepRangeLen{T,R,S,L}, r)
 
 StepRangeLen{Float64}(r::StepRangeLen) =
-    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
 StepRangeLen{T}(r::StepRangeLen) where {T<:IEEEFloat} =
-    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
+    _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
 
 StepRangeLen{Float64}(r::AbstractRange) =
-    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64}}, r)
+    _convertSRL(StepRangeLen{Float64,TwicePrecision{Float64},TwicePrecision{Float64},Int}, r)
 StepRangeLen{T}(r::AbstractRange) where {T<:IEEEFloat} =
-    _convertSRL(StepRangeLen{T,Float64,Float64}, r)
+    _convertSRL(StepRangeLen{T,Float64,Float64,Int}, r)
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{<:Integer}) where {T,R,S}
-    StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{<:Integer}) where {T,R,S,L}
+    StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
 end
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{<:Integer}) where {T,R,S}
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{<:Integer}) where {T,R,S,L}
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
 end
 
-function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,R,S,U}
+function _convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
     # if start and step have a rational approximation in the old type,
     # then we transfer that rational approximation to the new type
     f, s = first(r), step(r)
@@ -522,17 +592,17 @@ function _convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,
                 rem(den, start_d) == 0 && rem(den, step_d) == 0
             start_n = round(Int, f*den)
             step_n = round(Int, s*den)
-            return floatrange(T, start_n, step_n, length(r), den)
+            return floatrange(T, start_n, step_n, L(length(r)), den)
         end
     end
-    __convertSRL(StepRangeLen{T,R,S}, r)
+    return __convertSRL(StepRangeLen{T,R,S,L}, r)
 end
 
-function __convertSRL(::Type{StepRangeLen{T,R,S}}, r::StepRangeLen{U}) where {T,R,S,U}
-    StepRangeLen{T,R,S}(R(r.ref), S(r.step), length(r), r.offset)
+function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::StepRangeLen{U}) where {T,R,S,L,U}
+    StepRangeLen{T,R,S,L}(R(r.ref), S(r.step), L(length(r)), L(r.offset))
 end
-function __convertSRL(::Type{StepRangeLen{T,R,S}}, r::AbstractRange{U}) where {T,R,S,U}
-    StepRangeLen{T,R,S}(R(first(r)), S(step(r)), length(r))
+function __convertSRL(::Type{StepRangeLen{T,R,S,L}}, r::AbstractRange{U}) where {T,R,S,L,U}
+    StepRangeLen{T,R,S,L}(R(first(r)), S(step(r)), L(length(r)))
 end
 
 function sum(r::StepRangeLen)
@@ -543,7 +613,7 @@ function sum(r::StepRangeLen)
     np, nn = l - r.offset, r.offset - 1  # positive, negative
     # To prevent overflow in sum(1:n), multiply its factors by the step
     sp, sn = sumpair(np), sumpair(nn)
-    W = widen(Int)
+    W = widen(typeof(l))
     Δn = W(sp[1]) * W(sp[2]) - W(sn[1]) * W(sn[2])
     s = r.step * Δn
     # Add in contributions of ref
@@ -579,19 +649,20 @@ function +(r1::StepRangeLen{T,R}, r2::StepRangeLen{T,R}) where T where R<:TwiceP
         imid = r1.offset
         ref = r1.ref + r2.ref
     else
-        imid = round(Int, (r1.offset+r2.offset)/2)
+        imid = round(typeof(len), (r1.offset+r2.offset)/2)
         ref1mid = _getindex_hiprec(r1, imid)
         ref2mid = _getindex_hiprec(r2, imid)
         ref = ref1mid + ref2mid
     end
     step = twiceprecision(r1.step + r2.step, nbitslen(T, len, imid))
-    StepRangeLen{T,typeof(ref),typeof(step)}(ref, step, len, imid)
+    StepRangeLen{T,typeof(ref),typeof(step),typeof(len)}(ref, step, len, imid)
 end
 
 ## LinRange
 
 # For Float16, Float32, and Float64, this returns a StepRangeLen
-function _range(start::T, ::Nothing, stop::T, len::Integer) where {T<:IEEEFloat}
+function range_start_stop_length(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
+    len = len + 0 # promote with Int
     len < 2 && return _linspace1(T, start, stop, len)
     if start == stop
         return steprangelen_hp(T, start, zero(T), 0, len, 1)
@@ -614,32 +685,35 @@ function _range(start::T, ::Nothing, stop::T, len::Integer) where {T<:IEEEFloat}
 end
 
 function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
+    len = len + 0 # promote with Int
     (isfinite(start) && isfinite(stop)) || throw(ArgumentError("start and stop must be finite, got $start and $stop"))
     # Find the index that returns the smallest-magnitude element
     Δ, Δfac = stop-start, 1
     if !isfinite(Δ)   # handle overflow for large endpoints
-        Δ, Δfac = stop/len - start/len, Int(len)
+        Δ, Δfac = stop/len - start/len, len
     end
     tmin = -(start/Δ)/Δfac            # t such that (1-t)*start + t*stop == 0
-    imin = round(Int, tmin*(len-1)+1) # index approximately corresponding to t
+    L = typeof(len)
+    lenn1 = len - oneunit(L)
+    imin = round(L, tmin*lenn1 + 1) # index approximately corresponding to t
     if 1 < imin < len
         # The smallest-magnitude element is in the interior
-        t = (imin-1)/(len-1)
+        t = (imin - 1)/lenn1
         ref = T((1-t)*start + t*stop)
         step = imin-1 < len-imin ? (ref-start)/(imin-1) : (stop-ref)/(len-imin)
     elseif imin <= 1
-        imin = 1
+        imin = oneunit(L)
         ref = start
-        step = (Δ/(len-1))*Δfac
+        step = (Δ/(lenn1))*Δfac
     else
-        imin = Int(len)
+        imin = len
         ref = stop
-        step = (Δ/(len-1))*Δfac
+        step = (Δ/(lenn1))*Δfac
     end
     if len == 2 && !isfinite(step)
         # For very large endpoints where step overflows, exploit the
         # split-representation to handle the overflow
-        return steprangelen_hp(T, start, (-start, stop), 0, 2, 1)
+        return steprangelen_hp(T, start, (-start, stop), 0, len, oneunit(L))
     end
     # 2x calculations to get high precision endpoint matching while also
     # preventing overflow in ref_hi+(i-offset)*step_hi
@@ -652,23 +726,28 @@ function _linspace(start::T, stop::T, len::Integer) where {T<:IEEEFloat}
     a, b = (start - x1_hi) - x1_lo, (stop - x2_hi) - x2_lo
     step_lo = (b - a)/(len - 1)
     ref_lo = a - (1 - imin)*step_lo
-    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, Int(len), imin)
+    steprangelen_hp(T, (ref, ref_lo), (step_hi, step_lo), 0, len, imin)
 end
 
 # range for rational numbers, start = start_n/den, stop = stop_n/den
 # Note this returns a StepRangeLen
-_linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where {T<:IEEEFloat} = _linspace(T, start, stop, len, 1)
+_linspace(::Type{T}, start::Integer, stop::Integer, len::Integer) where {T<:IEEEFloat} = _linspace(T, start, stop, len, one(start))
 function _linspace(::Type{T}, start_n::Integer, stop_n::Integer, len::Integer, den::Integer) where T<:IEEEFloat
+    len = len + 0 # promote with Int
     len < 2 && return _linspace1(T, start_n/den, stop_n/den, len)
-    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, 1)
+    L = typeof(len)
+    start_n == stop_n && return steprangelen_hp(T, (start_n, den), (zero(start_n), den), 0, len, oneunit(L))
     tmin = -start_n/(Float64(stop_n) - Float64(start_n))
-    imin = round(Int, tmin*(len-1)+1)
-    imin = clamp(imin, 1, Int(len))
-    ref_num = Int128(len-imin) * start_n + Int128(imin-1) * stop_n
-    ref_denom = Int128(len-1) * den
+    imin = round(typeof(len), tmin*(len-1)+1)
+    imin = clamp(imin, oneunit(L), len)
+    W = widen(L)
+    start_n = W(start_n)
+    stop_n = W(stop_n)
+    ref_num = W(len-imin) * start_n + W(imin-1) * stop_n
+    ref_denom = W(len-1) * den
     ref = (ref_num, ref_denom)
-    step_full = (Int128(stop_n) - Int128(start_n), ref_denom)
-    steprangelen_hp(T, ref, step_full,  nbitslen(T, len, imin), Int(len), imin)
+    step_full = (stop_n - start_n, ref_denom)
+    steprangelen_hp(T, ref, step_full, nbitslen(T, len, imin), len, imin)
 end
 
 # For len < 2
@@ -680,7 +759,7 @@ function _linspace1(::Type{T}, start, stop, len::Integer) where T<:IEEEFloat
         # The output type must be consistent with steprangelen_hp
         if T<:Union{Float32,Float16}
             return StepRangeLen{T}(Float64(start), Float64(start) - Float64(stop), len, 1)
-        else
+        else # T == Float64
             return StepRangeLen(TwicePrecision(start, zero(T)), TwicePrecision(start, -stop), len, 1)
         end
     end
@@ -689,8 +768,8 @@ end
 
 ### Numeric utilities
 
-# Approximate x with a rational representation. Guaranteed to return,
-# but not guaranteed to return a precise answer.
+# Approximate x with a rational representation as a pair of Int values.
+# Guaranteed to return, but not guaranteed to return a precise answer.
 # https://en.wikipedia.org/wiki/Continued_fraction#Best_rational_approximations
 function rat(x)
     y = x
@@ -698,7 +777,7 @@ function rat(x)
     b = c = 0
     m = maxintfloat(narrow(typeof(x)), Int)
     while abs(y) <= m
-        f = trunc(Int,y)
+        f = trunc(Int, y)
         y -= f
         a, c = f*a + c, a
         b, d = f*b + d, b
@@ -718,7 +797,7 @@ narrow(::Type{Float32}) = Float16
 narrow(::Type{Float16}) = Float16
 
 function _tp_prod(t::TwicePrecision, x, y...)
-    @_inline_meta
+    @inline
     _tp_prod(t * x, y...)
 end
 _tp_prod(t::TwicePrecision) = t
diff --git a/base/util.jl b/base/util.jl
index e9db6af3150b72..df9e29790deb66 100644
--- a/base/util.jl
+++ b/base/util.jl
@@ -18,6 +18,7 @@ const text_colors = Dict{Union{Symbol,Int},String}(
     :light_blue    => "\033[94m",
     :light_magenta => "\033[95m",
     :light_cyan    => "\033[96m",
+    :light_white   => "\033[97m",
     :normal        => "\033[0m",
     :default       => "\033[39m",
     :bold          => "\033[1m",
@@ -67,7 +68,9 @@ Printing with the color `:nothing` will print the string without modifications.
 """
 text_colors
 
-function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}, io::IO, args...; bold::Bool = false)
+function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}, io::IO, args...;
+        bold::Bool = false, underline::Bool = false, blink::Bool = false,
+        reverse::Bool = false, hidden::Bool = false)
     buf = IOBuffer()
     iscolor = get(io, :color, false)::Bool
     try f(IOContext(buf, io), args...)
@@ -77,12 +80,25 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
             print(io, str)
         else
             bold && color === :bold && (color = :nothing)
+            underline && color === :underline && (color = :nothing)
+            blink && color === :blink && (color = :nothing)
+            reverse && color === :reverse && (color = :nothing)
+            hidden && color === :hidden && (color = :nothing)
             enable_ansi  = get(text_colors, color, text_colors[:default]) *
-                               (bold ? text_colors[:bold] : "")
-            disable_ansi = (bold ? disable_text_style[:bold] : "") *
+                               (bold ? text_colors[:bold] : "") *
+                               (underline ? text_colors[:underline] : "") *
+                               (blink ? text_colors[:blink] : "") *
+                               (reverse ? text_colors[:reverse] : "") *
+                               (hidden ? text_colors[:hidden] : "")
+
+            disable_ansi = (hidden ? disable_text_style[:hidden] : "") *
+                           (reverse ? disable_text_style[:reverse] : "") *
+                           (blink ? disable_text_style[:blink] : "") *
+                           (underline ? disable_text_style[:underline] : "") *
+                           (bold ? disable_text_style[:bold] : "") *
                                get(disable_text_style, color, text_colors[:default])
             first = true
-            for line in split(str, '\n')
+            for line in eachsplit(str, '\n')
                 first || print(buf, '\n')
                 first = false
                 isempty(line) && continue
@@ -94,21 +110,30 @@ function with_output_color(@nospecialize(f::Function), color::Union{Int, Symbol}
 end
 
 """
-    printstyled([io], xs...; bold::Bool=false, color::Union{Symbol,Int}=:normal)
+    printstyled([io], xs...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Symbol,Int}=:normal)
 
 Print `xs` in a color specified as a symbol or integer, optionally in bold.
 
-`color` may take any of the values $(Base.available_text_colors_docstring)
+Keyword `color` may take any of the values $(Base.available_text_colors_docstring)
 or an integer between 0 and 255 inclusive. Note that not all terminals support 256 colors.
-If the keyword `bold` is given as `true`, the result will be printed in bold.
+
+Keywords `bold=true`, `underline=true`, `blink=true` are self-explanatory.
+Keyword `reverse=true` prints with foreground and background colors exchanged,
+and `hidden=true` should be invisibe in the terminal but can still be copied.
+These properties can be used in any combination.
+
+See also [`print`](@ref), [`println`](@ref), [`show`](@ref).
+
+!!! compat "Julia 1.7"
+    Keywords except `color` and `bold` were added in Julia 1.7.
 """
-printstyled(io::IO, msg...; bold::Bool=false, color::Union{Int,Symbol}=:normal) =
-    with_output_color(print, color, io, msg...; bold=bold)
-printstyled(msg...; bold::Bool=false, color::Union{Int,Symbol}=:normal) =
-    printstyled(stdout, msg...; bold=bold, color=color)
+@constprop :none printstyled(io::IO, msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    with_output_color(print, color, io, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden)
+@constprop :none printstyled(msg...; bold::Bool=false, underline::Bool=false, blink::Bool=false, reverse::Bool=false, hidden::Bool=false, color::Union{Int,Symbol}=:normal) =
+    printstyled(stdout, msg...; bold=bold, underline=underline, blink=blink, reverse=reverse, hidden=hidden, color=color)
 
 """
-    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR::String, julia_exename()))
+    Base.julia_cmd(juliapath=joinpath(Sys.BINDIR, julia_exename()))
 
 Return a julia command similar to the one of the running process.
 Propagates any of the `--cpu-target`, `--sysimage`, `--compile`, `--sysimage-native-code`,
@@ -124,7 +149,7 @@ Among others, `--math-mode`, `--warn-overwrite`, and `--trace-compile` are notab
 !!! compat "Julia 1.5"
     The flags `--color` and `--startup-file` were added in Julia 1.5.
 """
-function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
+function julia_cmd(julia=joinpath(Sys.BINDIR, julia_exename()))
     opts = JLOptions()
     cpu_target = unsafe_string(opts.cpu_target)
     image_file = unsafe_string(opts.image_file)
@@ -154,13 +179,14 @@ function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
                   elseif opts.check_bounds == 2
                       "no" # off
                   else
-                      "" # "default"
+                      "" # default = "auto"
                   end
         isempty(check_bounds) || push!(addflags, "--check-bounds=$check_bounds")
     end
     opts.can_inline == 0 && push!(addflags, "--inline=no")
     opts.use_compiled_modules == 0 && push!(addflags, "--compiled-modules=no")
     opts.opt_level == 2 || push!(addflags, "-O$(opts.opt_level)")
+    opts.opt_level_min == 0 || push!(addflags, "--min-optlevel=$(opts.opt_level_min)")
     push!(addflags, "-g$(opts.debug_level)")
     if opts.code_coverage != 0
         # Forward the code-coverage flag only if applicable (if the filename is pid-dependent)
@@ -170,6 +196,8 @@ function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
                 push!(addflags, "--code-coverage=user")
             elseif opts.code_coverage == 2
                 push!(addflags, "--code-coverage=all")
+            elseif opts.code_coverage == 3
+                push!(addflags, "--code-coverage=@$(unsafe_string(opts.tracked_path))")
             end
             isempty(coverage_file) || push!(addflags, "--code-coverage=$coverage_file")
         end
@@ -178,6 +206,8 @@ function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
         push!(addflags, "--track-allocation=user")
     elseif opts.malloc_log == 2
         push!(addflags, "--track-allocation=all")
+    elseif opts.malloc_log == 3
+        push!(addflags, "--track-allocation=@$(unsafe_string(opts.tracked_path))")
     end
     if opts.color == 1
         push!(addflags, "--color=yes")
@@ -187,6 +217,9 @@ function julia_cmd(julia=joinpath(Sys.BINDIR::String, julia_exename()))
     if opts.startupfile == 2
         push!(addflags, "--startup-file=no")
     end
+    if opts.use_sysimage_native_code == 0
+        push!(addflags, "--sysimage-native-code=no")
+    end
     return `$julia -C$cpu_target -J$image_file $addflags`
 end
 
@@ -265,6 +298,16 @@ is encountered or EOF (^D) character is entered on a blank line. If a `default`
 then the user can enter just a newline character to select the `default`.
 
 See also `Base.getpass` and `Base.winprompt` for secure entry of passwords.
+
+# Example
+
+```julia-repl
+julia> your_name = Base.prompt("Enter your name");
+Enter your name: Logan
+
+julia> your_name
+"Logan"
+```
 """
 function prompt(input::IO, output::IO, message::AbstractString; default::AbstractString="")
     msg = !isempty(default) ? "$message [$default]: " : "$message: "
@@ -306,7 +349,13 @@ if Sys.iswindows()
         succeeded = ccall((:CredPackAuthenticationBufferW, "credui.dll"), stdcall, Bool,
             (UInt32, Cwstring, Cwstring, Ptr{UInt8}, Ptr{UInt32}),
              CRED_PACK_GENERIC_CREDENTIALS, default_username, "", credbuf, credbufsize)
-        @assert succeeded
+        if !succeeded
+            credbuf = resize!(credbuf, credbufsize[])
+            succeeded = ccall((:CredPackAuthenticationBufferW, "credui.dll"), stdcall, Bool,
+                (UInt32, Cwstring, Cwstring, Ptr{UInt8}, Ptr{UInt32}),
+                 CRED_PACK_GENERIC_CREDENTIALS, default_username, "", credbuf, credbufsize)
+            @assert succeeded
+        end
 
         # Step 2: Create the actual dialog
         #      2.1: Set up the window
@@ -364,6 +413,8 @@ end
 
 unsafe_crc32c(a, n, crc) = ccall(:jl_crc32c, UInt32, (UInt32, Ptr{UInt8}, Csize_t), crc, a, n)
 
+_crc32c(a::NTuple{<:Any, UInt8}, crc::UInt32=0x00000000) =
+    unsafe_crc32c(Ref(a), length(a) % Csize_t, crc)
 _crc32c(a::Union{Array{UInt8},FastContiguousSubArray{UInt8,N,<:Array{UInt8}} where N}, crc::UInt32=0x00000000) =
     unsafe_crc32c(a, length(a) % Csize_t, crc)
 
@@ -385,6 +436,8 @@ _crc32c(io::IO, crc::UInt32=0x00000000) = _crc32c(io, typemax(Int64), crc)
 _crc32c(io::IOStream, crc::UInt32=0x00000000) = _crc32c(io, filesize(io)-position(io), crc)
 _crc32c(uuid::UUID, crc::UInt32=0x00000000) =
     ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt128}, Csize_t), crc, uuid.value, 16)
+_crc32c(x::UInt64, crc::UInt32=0x00000000) =
+    ccall(:jl_crc32c, UInt32, (UInt32, Ref{UInt64}, Csize_t), crc, x, 8)
 
 """
     @kwdef typedef
@@ -520,7 +573,7 @@ to the standard libraries before running the tests.
 If a seed is provided via the keyword argument, it is used to seed the
 global RNG in the context where the tests are run; otherwise the seed is chosen randomly.
 """
-function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS::Int / 2),
+function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS / 2),
                   exit_on_error::Bool=false,
                   revise::Bool=false,
                   seed::Union{BitInteger,Nothing}=nothing)
@@ -532,13 +585,18 @@ function runtests(tests = ["all"]; ncores::Int = ceil(Int, Sys.CPU_THREADS::Int
     seed !== nothing && push!(tests, "--seed=0x$(string(seed % UInt128, base=16))") # cast to UInt128 to avoid a minus sign
     ENV2 = copy(ENV)
     ENV2["JULIA_CPU_THREADS"] = "$ncores"
+    ENV2["JULIA_DEPOT_PATH"] = mktempdir(; cleanup = true)
+    delete!(ENV2, "JULIA_LOAD_PATH")
+    delete!(ENV2, "JULIA_PROJECT")
     try
-        run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR::String,
+        run(setenv(`$(julia_cmd()) $(joinpath(Sys.BINDIR,
             Base.DATAROOTDIR, "julia", "test", "runtests.jl")) $tests`, ENV2))
         nothing
     catch
         buf = PipeBuffer()
+        original_load_path = copy(Base.LOAD_PATH); empty!(Base.LOAD_PATH); pushfirst!(Base.LOAD_PATH, "@stdlib")
         Base.require(Base, :InteractiveUtils).versioninfo(buf)
+        empty!(Base.LOAD_PATH); append!(Base.LOAD_PATH, original_load_path)
         error("A test has failed. Please submit a bug report (https://github.com/JuliaLang/julia/issues)\n" *
               "including error messages above and the output of versioninfo():\n$(read(buf, String))")
     end
diff --git a/base/uuid.jl b/base/uuid.jl
index ce46a047fe7b93..ff4df68ddb7c8c 100644
--- a/base/uuid.jl
+++ b/base/uuid.jl
@@ -1,9 +1,9 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    Represents a Universally Unique Identifier (UUID).
-    Can be built from one `UInt128` (all byte values), two `UInt64`, or four `UInt32`.
-    Conversion from a string will check the UUID validity.
+Represents a Universally Unique Identifier (UUID).
+Can be built from one `UInt128` (all byte values), two `UInt64`, or four `UInt32`.
+Conversion from a string will check the UUID validity.
 """
 struct UUID
     value::UInt128
@@ -32,56 +32,60 @@ end
 UInt128(u::UUID) = u.value
 
 let
-@noinline throw_malformed_uuid(s) = throw(ArgumentError("Malformed UUID string: $(repr(s))"))
+    uuid_hash_seed = UInt === UInt64 ? 0xd06fa04f86f11b53 : 0x96a1f36d
+    Base.hash(uuid::UUID, h::UInt) = hash(uuid_hash_seed, hash(convert(NTuple{2, UInt64}, uuid), h))
+end
+
+let
 @inline function uuid_kernel(s, i, u)
     _c = UInt32(@inbounds codeunit(s, i))
     d = __convert_digit(_c, UInt32(16))
-    d >= 16 && throw_malformed_uuid(s)
+    d >= 16 && return nothing
     u <<= 4
-    u | d
+    return u | d
 end
 
-global UUID
-function UUID(s::AbstractString)
+function Base.tryparse(::Type{UUID}, s::AbstractString)
     u = UInt128(0)
-    ncodeunits(s) != 36 && throw_malformed_uuid(s)
+    ncodeunits(s) != 36 && return nothing
     for i in 1:8
         u = uuid_kernel(s, i, u)
+        u === nothing && return nothing
     end
-    @inbounds codeunit(s, 9) == UInt8('-') || @goto error
+    @inbounds codeunit(s, 9) == UInt8('-') || return nothing
     for i in 10:13
         u = uuid_kernel(s, i, u)
+        u === nothing && return nothing
     end
-    @inbounds codeunit(s, 14) == UInt8('-') || @goto error
+    @inbounds codeunit(s, 14) == UInt8('-') || return nothing
     for i in 15:18
         u = uuid_kernel(s, i, u)
+        u === nothing && return nothing
     end
-    @inbounds codeunit(s, 19) == UInt8('-') || @goto error
+    @inbounds codeunit(s, 19) == UInt8('-') || return nothing
     for i in 20:23
         u = uuid_kernel(s, i, u)
+        u === nothing && return nothing
     end
-    @inbounds codeunit(s, 24) == UInt8('-') || @goto error
+    @inbounds codeunit(s, 24) == UInt8('-') || return nothing
     for i in 25:36
         u = uuid_kernel(s, i, u)
+        u === nothing && return nothing
     end
     return Base.UUID(u)
-    @label error
-    throw_malformed_uuid(s)
 end
 end
 
-parse(::Type{UUID}, s::AbstractString) = UUID(s)
-function tryparse(::Type{UUID}, s::AbstractString)
-    try
-        return parse(UUID, s)
-    catch e
-        if isa(e, ArgumentError)
-            return nothing
-        end
-        rethrow(e)
+let
+    @noinline throw_malformed_uuid(s) = throw(ArgumentError("Malformed UUID string: $(repr(s))"))
+    function Base.parse(::Type{UUID}, s::AbstractString)
+        uuid = tryparse(UUID, s)
+        return uuid === nothing ? throw_malformed_uuid(s) : uuid
     end
 end
 
+UUID(s::AbstractString) = parse(UUID, s)
+
 let groupings = [36:-1:25; 23:-1:20; 18:-1:15; 13:-1:10; 8:-1:1]
     global string
     function string(u::UUID)
diff --git a/base/version.jl b/base/version.jl
index 3a57d40fda752f..978abbba1a8aab 100644
--- a/base/version.jl
+++ b/base/version.jl
@@ -8,18 +8,32 @@ const VInt = UInt32
 """
     VersionNumber
 
-Version number type which follow the specifications of
-[semantic versioning](https://semver.org/), composed of major, minor
+Version number type which follows the specifications of
+[semantic versioning (semver)](https://semver.org/), composed of major, minor
 and patch numeric values, followed by pre-release and build
-alpha-numeric annotations. See also [`@v_str`](@ref).
+alpha-numeric annotations.
+
+`VersionNumber` objects can be compared with all of the standard comparison
+operators (`==`, `<`, `<=`, etc.), with the result following semver rules.
+
+See also [`@v_str`](@ref) to efficiently construct `VersionNumber` objects
+from semver-format literal strings, [`VERSION`](@ref) for the `VersionNumber`
+of Julia itself, and [Version Number Literals](@ref man-version-number-literals)
+in the manual.
 
 # Examples
 ```jldoctest
-julia> VersionNumber("1.2.3")
+julia> a = VersionNumber(1, 2, 3)
 v"1.2.3"
 
-julia> VersionNumber("2.0.1-rc1")
+julia> a >= v"1.2"
+true
+
+julia> b = VersionNumber("2.0.1-rc1")
 v"2.0.1-rc1"
+
+julia> b >= v"2.0.1"
+false
 ```
 """
 struct VersionNumber
@@ -66,6 +80,7 @@ VersionNumber(major::Integer, minor::Integer = 0, patch::Integer = 0,
         map(x->x isa Integer ? UInt64(x) : String(x), bld))
 
 VersionNumber(v::Tuple) = VersionNumber(v...)
+VersionNumber(v::VersionNumber) = v
 
 function print(io::IO, v::VersionNumber)
     v == typemax(VersionNumber) && return print(io, "∞")
@@ -100,17 +115,17 @@ const VERSION_REGEX = r"^
 $"ix
 
 function split_idents(s::AbstractString)
-    idents = split(s, '.')
+    idents = eachsplit(s, '.')
     pidents = Union{UInt64,String}[occursin(r"^\d+$", ident) ? parse(UInt64, ident) : String(ident) for ident in idents]
     return tuple(pidents...)::VerTuple
 end
 
-function VersionNumber(v::AbstractString)
+function tryparse(::Type{VersionNumber}, v::AbstractString)
     v == "∞" && return typemax(VersionNumber)
-    m = match(VERSION_REGEX, v)
-    m === nothing && throw(ArgumentError("invalid version string: $v"))
+    m = match(VERSION_REGEX, String(v)::String)
+    m === nothing && return nothing
     major, minor, patch, minus, prerl, plus, build = m.captures
-    major = parse(VInt, major)
+    major = parse(VInt, major::AbstractString)
     minor = minor !== nothing ? parse(VInt, minor) : VInt(0)
     patch = patch !== nothing ? parse(VInt, patch) : VInt(0)
     if prerl !== nothing && !isempty(prerl) && prerl[1] == '-'
@@ -121,18 +136,14 @@ function VersionNumber(v::AbstractString)
     return VersionNumber(major, minor, patch, prerl::VerTuple, build::VerTuple)
 end
 
-parse(::Type{VersionNumber}, v::AbstractString) = VersionNumber(v)
-function tryparse(::Type{VersionNumber}, v::AbstractString)
-    try
-        return VersionNumber(v)
-    catch e
-        if isa(e, InterruptException)
-            rethrow(e)
-        end
-        return nothing
-    end
+function parse(::Type{VersionNumber}, v::AbstractString)
+    ver = tryparse(VersionNumber, v)
+    ver === nothing && throw(ArgumentError("invalid version string: $v"))
+    return ver
 end
 
+VersionNumber(v::AbstractString) = parse(VersionNumber, v)
+
 """
     @v_str
 
@@ -225,7 +236,7 @@ nextmajor(v::VersionNumber) = v < thismajor(v) ? thismajor(v) : VersionNumber(v.
 """
     VERSION
 
-A `VersionNumber` object describing which version of Julia is in use. For details see
+A [`VersionNumber`](@ref) object describing which version of Julia is in use. See also
 [Version Number Literals](@ref man-version-number-literals).
 """
 const VERSION = try
@@ -254,6 +265,8 @@ else
     VersionNumber(libllvm_version_string)
 end
 
+libllvm_path() = ccall(:jl_get_libllvm, Any, ())
+
 function banner(io::IO = stdout)
     if GIT_VERSION_INFO.tagged_commit
         commit_string = TAGGED_RELEASE_BANNER
diff --git a/base/version_git.sh b/base/version_git.sh
index d2ac9cb6058a70..2a3352d1066efd 100644
--- a/base/version_git.sh
+++ b/base/version_git.sh
@@ -5,14 +5,16 @@
 
 echo "# This file was autogenerated in base/version_git.sh"
 echo "struct GitVersionInfo"
-echo "    commit::AbstractString"
-echo "    commit_short::AbstractString"
-echo "    branch::AbstractString"
+echo "    commit::String"
+echo "    commit_short::String"
+echo "    branch::String"
 echo "    build_number::Int"
-echo "    date_string::AbstractString"
+echo "    date_string::String"
 echo "    tagged_commit::Bool"
 echo "    fork_master_distance::Int"
 echo "    fork_master_timestamp::Float64"
+echo "    build_system_commit::String"
+echo "    build_system_commit_short::String"
 echo "end"
 echo ""
 
@@ -22,7 +24,7 @@ cd $1
 if [  "$#" = "2"  -a "$2" = "NO_GIT" ]; then
     # this comment is used in base/Makefile to distinguish boilerplate
     echo "# Default output if git is not available."
-    echo "const GIT_VERSION_INFO = GitVersionInfo(\"\" ,\"\" ,\"\" ,0 ,\"\" ,true ,0 ,0.)"
+    echo 'const GIT_VERSION_INFO = GitVersionInfo("", "", "", 0, "", true, 0, 0.0, "", "")'
     exit 0
 fi
 # Collect temporary variables
@@ -82,6 +84,15 @@ if [ -z "$fork_master_timestamp" ]; then
     fork_master_timestamp="0"
 fi
 
+build_system_directory="../.buildkite"
+if [ -d "${build_system_directory}/.git" ]; then
+    build_system_commit=$(git -C "${build_system_directory}" rev-parse HEAD)
+    build_system_commit_short=$(git -C "${build_system_directory}" rev-parse --short HEAD)
+else
+    build_system_commit=""
+    build_system_commit_short=""
+fi
+
 echo "const GIT_VERSION_INFO = GitVersionInfo("
 echo "    \"$commit\","
 echo "    \"$commit_short\","
@@ -90,5 +101,7 @@ echo "    $build_number,"
 echo "    \"$date_string\","
 echo "    $tagged_commit,"
 echo "    $fork_master_distance,"
-echo "    $fork_master_timestamp."
+echo "    $fork_master_timestamp.0,"
+echo "    \"$build_system_commit\","
+echo "    \"$build_system_commit_short\","
 echo ")"
diff --git a/base/views.jl b/base/views.jl
index ccf24d4cdea3a7..8553695868d6c5 100644
--- a/base/views.jl
+++ b/base/views.jl
@@ -42,7 +42,7 @@ function replace_ref_begin_end_!(ex, withex)
                 n = 1
                 J = lastindex(ex.args)
                 for j = 2:J
-                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S)),:($lastindex($S,$n))))
+                    exj, used = replace_ref_begin_end_!(ex.args[j], (:($firstindex($S,$n)),:($lastindex($S,$n))))
                     used_S |= used
                     ex.args[j] = exj
                     if isa(exj,Expr) && exj.head === :...
@@ -77,10 +77,23 @@ end
 """
     @view A[inds...]
 
-Creates a `SubArray` from an indexing expression. This can only be applied directly to a
-reference expression (e.g. `@view A[1,2:end]`), and should *not* be used as the target of
-an assignment (e.g. `@view(A[1,2:end]) = ...`).  See also [`@views`](@ref)
-to switch an entire block of code to use views for slicing.
+Transform the indexing expression `A[inds...]` into the equivalent [`view`](@ref) call.
+
+This can only be applied directly to a single indexing expression and is particularly
+helpful for expressions that include the special `begin` or `end` indexing syntaxes
+like `A[begin, 2:end-1]` (as those are not supported by the normal [`view`](@ref)
+function).
+
+Note that `@view` cannot be used as the target of a regular assignment (e.g.,
+`@view(A[1, 2:end]) = ...`), nor would the un-decorated
+[indexed assignment](@ref man-indexed-assignment) (`A[1, 2:end] = ...`)
+or broadcasted indexed assignment (`A[1, 2:end] .= ...`) make a copy.  It can be useful,
+however, for _updating_ broadcasted assignments like `@view(A[1, 2:end]) .+= 1`
+because this is a simple syntax for `@view(A[1, 2:end]) .= @view(A[1, 2:end]) + 1`,
+and the indexing expression on the right-hand side would otherwise make a
+copy without the `@view`.
+
+See also [`@views`](@ref) to switch an entire block of code to use views for non-scalar indexing.
 
 !!! compat "Julia 1.5"
     Using `begin` in an indexing expression to refer to the first index requires at least
@@ -201,6 +214,8 @@ to return a view. Scalar indices, non-array types, and
 explicit [`getindex`](@ref) calls (as opposed to `array[...]`) are
 unaffected.
 
+Similarly, `@views` converts string slices into [`SubString`](@ref) views.
+
 !!! note
     The `@views` macro only affects `array[...]` expressions
     that appear explicitly in the given `expression`, not array slicing that
diff --git a/base/weakkeydict.jl b/base/weakkeydict.jl
index f21097ddfb3ef0..0a9987671ea9b0 100644
--- a/base/weakkeydict.jl
+++ b/base/weakkeydict.jl
@@ -10,24 +10,19 @@ references to objects which may be garbage collected even when
 referenced in a hash table.
 
 See [`Dict`](@ref) for further help.  Note, unlike [`Dict`](@ref),
-`WeakKeyDict` does not convert keys on insertion.
+`WeakKeyDict` does not convert keys on insertion, as this would imply the key
+object was unreferenced anywhere before insertion.
 """
 mutable struct WeakKeyDict{K,V} <: AbstractDict{K,V}
     ht::Dict{WeakRef,V}
     lock::ReentrantLock
     finalizer::Function
+    dirty::Bool
 
     # Constructors mirror Dict's
     function WeakKeyDict{K,V}() where V where K
-        t = new(Dict{Any,V}(), ReentrantLock(), identity)
-        t.finalizer = function (k)
-            # when a weak key is finalized, remove from dictionary if it is still there
-            if islocked(t)
-                finalizer(t.finalizer, k)
-                return nothing
-            end
-            delete!(t, k)
-        end
+        t = new(Dict{Any,V}(), ReentrantLock(), identity, 0)
+        t.finalizer = k -> t.dirty = true
         return t
     end
 end
@@ -69,56 +64,151 @@ function WeakKeyDict(kv)
     end
 end
 
+function _cleanup_locked(h::WeakKeyDict)
+    if h.dirty
+        h.dirty = false
+        idx = skip_deleted_floor!(h.ht)
+        while idx != 0
+            if h.ht.keys[idx].value === nothing
+                _delete!(h.ht, idx)
+            end
+            idx = skip_deleted(h.ht, idx + 1)
+        end
+    end
+    return h
+end
+
 sizehint!(d::WeakKeyDict, newsz) = sizehint!(d.ht, newsz)
 empty(d::WeakKeyDict, ::Type{K}, ::Type{V}) where {K, V} = WeakKeyDict{K, V}()
 
+IteratorSize(::Type{<:WeakKeyDict}) = SizeUnknown()
+
 islocked(wkh::WeakKeyDict) = islocked(wkh.lock)
+lock(wkh::WeakKeyDict) = lock(wkh.lock)
+unlock(wkh::WeakKeyDict) = unlock(wkh.lock)
 lock(f, wkh::WeakKeyDict) = lock(f, wkh.lock)
 trylock(f, wkh::WeakKeyDict) = trylock(f, wkh.lock)
 
 function setindex!(wkh::WeakKeyDict{K}, v, key) where K
     !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
-    finalizer(wkh.finalizer, key)
+    # 'nothing' is not valid both because 'finalizer' will reject it,
+    # and because we therefore use it as a sentinel value
+    key === nothing && throw(ArgumentError("`nothing` is not a valid WeakKeyDict key"))
     lock(wkh) do
-        wkh.ht[WeakRef(key)] = v
+        _cleanup_locked(wkh)
+        k = getkey(wkh.ht, key, nothing)
+        if k === nothing
+            finalizer(wkh.finalizer, key)
+            k = WeakRef(key)
+        else
+            k.value = key
+        end
+        wkh.ht[k] = v
     end
     return wkh
 end
+function get!(wkh::WeakKeyDict{K}, key, default) where {K}
+    v = lock(wkh) do
+        if key !== nothing && haskey(wkh.ht, key)
+            wkh.ht[key]
+        else
+            wkh[key] = default
+        end
+    end
+    return v
+end
+function get!(default::Callable, wkh::WeakKeyDict{K}, key) where {K}
+    v = lock(wkh) do
+        if key !== nothing && haskey(wkh.ht, key)
+            wkh.ht[key]
+        else
+            wkh[key] = default()
+        end
+    end
+    return v
+end
 
 function getkey(wkh::WeakKeyDict{K}, kk, default) where K
-    return lock(wkh) do
-        k = getkey(wkh.ht, kk, secret_table_token)
-        k === secret_table_token && return default
-        return k.value::K
+    k = lock(wkh) do
+        local k = getkey(wkh.ht, kk, nothing)
+        k === nothing && return nothing
+        return k.value
     end
+    return k === nothing ? default : k::K
 end
 
-map!(f,iter::ValueIterator{<:WeakKeyDict})= map!(f, values(iter.dict.ht))
-get(wkh::WeakKeyDict{K}, key, default) where {K} = lock(() -> get(wkh.ht, key, default), wkh)
-get(default::Callable, wkh::WeakKeyDict{K}, key) where {K} = lock(() -> get(default, wkh.ht, key), wkh)
-function get!(wkh::WeakKeyDict{K}, key, default) where {K}
-    !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
-    lock(() -> get!(wkh.ht, WeakRef(key), default), wkh)
+map!(f, iter::ValueIterator{<:WeakKeyDict})= map!(f, values(iter.dict.ht))
+
+function get(wkh::WeakKeyDict{K}, key, default) where {K}
+    key === nothing && throw(KeyError(nothing))
+    lock(wkh) do
+        return get(wkh.ht, key, default)
+    end
 end
-function get!(default::Callable, wkh::WeakKeyDict{K}, key) where {K}
-    !isa(key, K) && throw(ArgumentError("$(limitrepr(key)) is not a valid key for type $K"))
-    lock(() -> get!(default, wkh.ht, WeakRef(key)), wkh)
+function get(default::Callable, wkh::WeakKeyDict{K}, key) where {K}
+    key === nothing && throw(KeyError(nothing))
+    lock(wkh) do
+        return get(default, wkh.ht, key)
+    end
+end
+function pop!(wkh::WeakKeyDict{K}, key) where {K}
+    key === nothing && throw(KeyError(nothing))
+    lock(wkh) do
+        return pop!(wkh.ht, key)
+    end
+end
+function pop!(wkh::WeakKeyDict{K}, key, default) where {K}
+    key === nothing && return default
+    lock(wkh) do
+        return pop!(wkh.ht, key, default)
+    end
+end
+function delete!(wkh::WeakKeyDict, key)
+    key === nothing && return wkh
+    lock(wkh) do
+        delete!(wkh.ht, key)
+    end
+    return wkh
+end
+function empty!(wkh::WeakKeyDict)
+    lock(wkh) do
+        empty!(wkh.ht)
+    end
+    return wkh
+end
+function haskey(wkh::WeakKeyDict{K}, key) where {K}
+    key === nothing && return false
+    lock(wkh) do
+        return haskey(wkh.ht, key)
+    end
+end
+function getindex(wkh::WeakKeyDict{K}, key) where {K}
+    key === nothing && throw(KeyError(nothing))
+    lock(wkh) do
+        return getindex(wkh.ht, key)
+    end
+end
+isempty(wkh::WeakKeyDict) = length(wkh) == 0
+function length(t::WeakKeyDict)
+    lock(t) do
+        _cleanup_locked(t)
+        return length(t.ht)
+    end
 end
-pop!(wkh::WeakKeyDict{K}, key) where {K} = lock(() -> pop!(wkh.ht, key), wkh)
-pop!(wkh::WeakKeyDict{K}, key, default) where {K} = lock(() -> pop!(wkh.ht, key, default), wkh)
-delete!(wkh::WeakKeyDict, key) = (lock(() -> delete!(wkh.ht, key), wkh); wkh)
-empty!(wkh::WeakKeyDict) = (lock(() -> empty!(wkh.ht), wkh); wkh)
-haskey(wkh::WeakKeyDict{K}, key) where {K} = lock(() -> haskey(wkh.ht, key), wkh)
-getindex(wkh::WeakKeyDict{K}, key) where {K} = lock(() -> getindex(wkh.ht, key), wkh)
-isempty(wkh::WeakKeyDict) = isempty(wkh.ht)
-length(t::WeakKeyDict) = length(t.ht)
 
 function iterate(t::WeakKeyDict{K,V}, state...) where {K, V}
-    y = lock(() -> iterate(t.ht, state...), t)
-    y === nothing && return nothing
-    wkv, newstate = y
-    kv = Pair{K,V}(wkv[1].value::K, wkv[2])
-    return (kv, newstate)
+    return lock(t) do
+        while true
+            y = iterate(t.ht, state...)
+            y === nothing && return nothing
+            wkv, state = y
+            k = wkv[1].value
+            GC.safepoint() # ensure `k` is now gc-rooted
+            k === nothing && continue # indicates `k` is scheduled for deletion
+            kv = Pair{K,V}(k::K, wkv[2])
+            return (kv, state)
+        end
+    end
 end
 
 filter!(f, d::WeakKeyDict) = filter_in_one_pass!(f, d)
diff --git a/ui/.gitignore b/cli/.gitignore
similarity index 100%
rename from ui/.gitignore
rename to cli/.gitignore
diff --git a/cli/Makefile b/cli/Makefile
new file mode 100644
index 00000000000000..11855ee6244dc0
--- /dev/null
+++ b/cli/Makefile
@@ -0,0 +1,141 @@
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+JULIAHOME := $(abspath $(SRCDIR)/..)
+BUILDDIR ?= .
+include $(JULIAHOME)/deps/Versions.make
+include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/deps/llvm-ver.make
+
+
+HEADERS := $(addprefix $(SRCDIR)/,jl_exports.h loader.h) $(addprefix $(JULIAHOME)/src/,julia_fasttls.h support/platform.h support/dirpath.h jl_exported_data.inc jl_exported_funcs.inc)
+
+LOADER_CFLAGS = $(JCFLAGS) -I$(BUILDROOT)/src -I$(JULIAHOME)/src -I$(JULIAHOME)/src/support -I$(build_includedir) -ffreestanding
+LOADER_LDFLAGS = $(JLDFLAGS) -ffreestanding -L$(build_shlibdir) -L$(build_libdir)
+
+ifeq ($(OS),WINNT)
+LOADER_CFLAGS += -municode -mconsole -nostdlib -fno-stack-check -fno-stack-protector -mno-stack-arg-probe
+endif
+
+ifeq ($(OS),WINNT)
+LOADER_LDFLAGS += -municode -mconsole -nostdlib --disable-auto-import \
+                  --disable-runtime-pseudo-reloc -lntdll -lkernel32 -lpsapi
+else ifeq ($(OS),Linux)
+LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
+else ifeq ($(OS),FreeBSD)
+LOADER_LDFLAGS += -Wl,--no-as-needed -ldl -lpthread -rdynamic -lc -Wl,--as-needed
+else ifeq ($(OS),Darwin)
+LOADER_LDFLAGS += -lSystem
+endif
+
+# Build list of dependent libraries that must be opened
+SHIPFLAGS  += -DDEP_LIBS="\"$(LOADER_BUILD_DEP_LIBS)\""
+DEBUGFLAGS += -DDEP_LIBS="\"$(LOADER_DEBUG_BUILD_DEP_LIBS)\""
+
+EXE_OBJS := $(BUILDDIR)/loader_exe.o
+EXE_DOBJS := $(BUILDDIR)/loader_exe.dbg.obj
+LIB_OBJS := $(BUILDDIR)/loader_lib.o
+LIB_DOBJS := $(BUILDDIR)/loader_lib.dbg.obj
+
+# If this is an architecture that supports dynamic linking, link in a trampoline definition
+ifneq (,$(wildcard $(SRCDIR)/trampolines/trampolines_$(ARCH).S))
+LIB_OBJS += $(BUILDDIR)/loader_trampolines.o
+LIB_DOBJS += $(BUILDDIR)/loader_trampolines.o
+endif
+
+default: release
+all: release debug
+release debug :  % : julia-% libjulia-%
+
+$(BUILDDIR)/loader_lib.o : $(SRCDIR)/loader_lib.c $(HEADERS) $(JULIAHOME)/VERSION
+	@$(call PRINT_CC, $(CC) -DLIBRARY_EXPORTS $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+$(BUILDDIR)/loader_lib.dbg.obj : $(SRCDIR)/loader_lib.c $(HEADERS) $(JULIAHOME)/VERSION
+	@$(call PRINT_CC, $(CC) -DLIBRARY_EXPORTS $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+$(BUILDDIR)/loader_exe.o : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/VERSION
+	@$(call PRINT_CC, $(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+$(BUILDDIR)/loader_exe.dbg.obj : $(SRCDIR)/loader_exe.c $(HEADERS) $(JULIAHOME)/VERSION
+	@$(call PRINT_CC, $(CC) $(DEBUGFLAGS) $(LOADER_CFLAGS) -c $< -o $@)
+$(BUILDDIR)/loader_trampolines.o : $(SRCDIR)/trampolines/trampolines_$(ARCH).S $(HEADERS) $(SRCDIR)/trampolines/common.h
+	@$(call PRINT_CC, $(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) $< -c -o $@)
+
+# Debugging target to help us see what kind of code is being generated for our trampolines
+dump-trampolines: $(SRCDIR)/trampolines/trampolines_$(ARCH).S
+	$(CC) $(SHIPFLAGS) $(LOADER_CFLAGS) $< -S | sed -E 's/ ((%%)|;) /\n/g' | sed -E 's/.global/\n.global/g'
+
+DIRS = $(build_bindir) $(build_libdir)
+$(DIRS):
+	@mkdir -p $@
+
+ifeq ($(OS),WINNT)
+$(BUILDDIR)/julia_res.o: $(JULIAHOME)/contrib/windows/julia.rc $(JULIAHOME)/VERSION
+	JLVER=`cat $(JULIAHOME)/VERSION` && \
+	JLVERi=`echo $$JLVER | perl -nle \
+		'/^(\d+)\.?(\d*)\.?(\d*)/ && \
+		print int $$1,",",int $$2,",",int $$3,",0"'` && \
+	$(CROSS_COMPILE)windres $< -O coff -o $@ -DJLVER=$$JLVERi -DJLVER_STR=\\\"$$JLVER\\\"
+EXE_OBJS += $(BUILDDIR)/julia_res.o
+EXE_DOBJS += $(BUILDDIR)/julia_res.o
+endif
+
+# Embed an Info.plist in the julia executable
+# Create an intermediate target Info.plist for Darwin code signing.
+ifeq ($(DARWIN_FRAMEWORK),1)
+$(BUILDDIR)/Info.plist: $(JULIAHOME)/VERSION
+	/usr/libexec/PlistBuddy -x -c "Clear dict" $@
+	/usr/libexec/PlistBuddy -x -c "Add :CFBundleName string julia" $@
+	/usr/libexec/PlistBuddy -x -c "Add :CFBundleIdentifier string $(darwin_codesign_id_julia_ui)" $@
+	/usr/libexec/PlistBuddy -x -c "Add :CFBundleInfoDictionaryVersion string 6.0" $@
+	/usr/libexec/PlistBuddy -x -c "Add :CFBundleVersion string $(JULIA_COMMIT)" $@
+	/usr/libexec/PlistBuddy -x -c "Add :CFBundleShortVersionString string $(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION).$(JULIA_PATCH_VERSION)" $@
+.INTERMEDIATE: $(BUILDDIR)/Info.plist # cleanup this file after we are done using it
+JLDFLAGS += -Wl,-sectcreate,__TEXT,__info_plist,Info.plist
+$(build_bindir)/julia$(EXE): $(BUILDDIR)/Info.plist
+$(build_bindir)/julia-debug$(EXE): $(BUILDDIR)/Info.plist
+endif
+
+julia-release: $(build_bindir)/julia$(EXE)
+julia-debug: $(build_bindir)/julia-debug$(EXE)
+libjulia-release: $(build_shlibdir)/libjulia.$(SHLIB_EXT)
+libjulia-debug: $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
+
+ifeq ($(OS),WINNT)
+# On Windows we need to strip out exported functions from the generated import library.
+STRIP_EXPORTED_FUNCS := $(shell $(CPP_STDOUT) -I$(JULIAHOME)/src $(SRCDIR)/list_strip_symbols.h)
+endif
+
+$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_OBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(SHIPFLAGS) $(LIB_OBJS) -o $@ \
+		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia.$(SHLIB_EXT) $@
+ifeq ($(OS), WINNT)
+	@# Note that if the objcopy command starts getting too long, we can use `@file` to read
+	@# command-line options from `file` instead.
+	@$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a)
+endif
+
+$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(LIB_DOBJS) $(SRCDIR)/list_strip_symbols.h | $(build_shlibdir) $(build_libdir)
+	@$(call PRINT_LINK, $(CC) $(call IMPLIB_FLAGS,$@.tmp) $(LOADER_CFLAGS) -DLIBRARY_EXPORTS -shared $(DEBUGFLAGS) $(LIB_DOBJS) -o $@ \
+		$(JLIBLDFLAGS) $(LOADER_LDFLAGS) $(RPATH_LIB) $(call SONAME_FLAGS,libjulia-debug.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@
+ifeq ($(OS), WINNT)
+	@$(call PRINT_ANALYZE, $(OBJCOPY) $(build_libdir)/$(notdir $@).tmp.a $(STRIP_EXPORTED_FUNCS) $(build_libdir)/$(notdir $@).a && rm $(build_libdir)/$(notdir $@).tmp.a)
+endif
+
+ifneq ($(OS), WINNT)
+$(build_shlibdir)/libjulia.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia%.$(JL_MAJOR_SHLIB_EXT): \
+		$(build_shlibdir)/libjulia%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+$(build_shlibdir)/libjulia.$(SHLIB_EXT) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT): $(build_shlibdir)/libjulia%.$(SHLIB_EXT): \
+		$(build_shlibdir)/libjulia%.$(JL_MAJOR_MINOR_SHLIB_EXT) $(build_shlibdir)/libjulia%.$(JL_MAJOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+endif
+
+$(build_bindir)/julia$(EXE): $(EXE_OBJS) $(build_shlibdir)/libjulia.$(SHLIB_EXT) | $(build_bindir)
+	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(SHIPFLAGS) $(EXE_OBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia)
+
+$(build_bindir)/julia-debug$(EXE): $(EXE_DOBJS) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT) | $(build_bindir)
+	@$(call PRINT_LINK, $(CC) $(LOADER_CFLAGS) $(DEBUGFLAGS) $(EXE_DOBJS) -o $@ $(LOADER_LDFLAGS) $(RPATH) -ljulia-debug)
+
+clean: | $(CLEAN_TARGETS)
+	rm -f $(BUILDDIR)/*.o $(BUILDDIR)/*.dbg.obj
+	rm -f $(build_bindir)/julia*
+
+.PHONY: clean release debug julia-release julia-debug
diff --git a/cli/README.md b/cli/README.md
new file mode 100644
index 00000000000000..4021aceb7d8398
--- /dev/null
+++ b/cli/README.md
@@ -0,0 +1,31 @@
+# cli and loader
+
+This directory contains the code used by the Julia loader, implementing the pieces necessary to isolate ourselves from the native dynamic loader enough to reimplement useful features such as RPATH across all platforms.
+This loader comprises the `julia` executable and the `libjulia` library, which are responsible for setting things up such that `libjulia-internal` and any other internal dependencies can be reliably loaded.
+The code is organized in three pieces:
+
+* `loader_exe.c` gets built into the main `julia` executable.  It immediately loads `libjulia`.
+* `loader_lib.c` gets built into the main `libjulia` shared library.  This is the main entrypoint for the Julia runtime loading process, which occurs within `jl_load_repl()`.
+* `trampolines/*.S`, which contains assembly definitions for symbol forwarding trampolines.  These are used to allow `libjulia` to re-export symbols such that a C linker can use `libjulia` directly for embedding usecases.
+
+The main requirements of the loader are as follows:
+
+- **Isolation**: We need to be able to load our own copy of `libgcc_s.so`, etc...
+  On Linux/macOS, proper application of `RPATH` can influence the linker's decisions, however errant `LD_LIBRARY_PATH` entries or system libraries inserted into the build process can still interfere, not to mention Windows' lack of `RPATH`-like capabilities.
+  To address this, the loader is built as a stand-alone binary that does not depend on the large set of dependencies that `libjulia-internal` itself does, and manually `dlopen()`'s a list of dependencies using logic similar to that of an `RPATH`.
+- **Compatibility**: We need to support embedding usecases without forcing embedders to care about all of these things.
+  For linking against the Julia runtime by simply providing `-ljulia` on the link line, we must ensure that all public interfaces, whether function symbols or data symbols, must be exported from `libjulia`.
+  This motivates our usage of function trampolines to re-export functions from `libjulia-internal`, and the reason why all public data symbols are defined within `libjulia`, then imported into `libjulia-internal` for initialization.
+- **Flexibility**: We need to be able to make use of system libraries when requested to do so by the user at build time.
+  Currently, we embed the list of libraries to be `dlopen()`'ed within `libjulia` as a string (See the definition of `DEP_LIBS` in `Make.inc` and its usage in `loader_lib.c`).
+  This is flexible enough as we do not support changing this configuration at runtime, however in the future, we may need to add some simple parsing logic in `loader_lib.c` to inspect a `LocalPreferences.toml` and construct the list of libraries to load from that.
+- **Speed**: This whole process should be fast, especially function trampolines.
+  To this end, we write everything in low-overhead assembly, borrowing inspiration from the PLT trampolines that the linker already generates when using dynamic libraries.
+
+## Public interface definition
+
+The public interface exported by `libjulia` is contained within `.inc` files stored in `src`; one for exported data symbols, [`src/jl_exported_data.inc`](../src/jl_exported_data.inc) and one for exported functions, [`src/jl_exported_funcs.inc`](../src/jl_exported_funcs.inc).
+Adding entries to the data list will cause `libjulia` to generate a placeholder variable declaration.
+Most symbols are declared to be of type `void *`, however for symbols that are of a different size, they are declared along with their type.
+Adding entries to the function list will cause `libjulia` to generate a trampoline definition (using a trampoline according to the architecture of the target processor) and then at runtime, when `libjulia` has successfully loaded `libjulia-internal`, it will `dlsym()` that symbol from within `libjulia-internal` and set it as the target of the trampoline.
+All initialization will occur automatically upon successful load of `libjulia`, so there is no need for user code to call an initialization before invoking typical `libjulia-internal` functions (although initialization of the runtime itself is still necessary, e.g. calling `jl_init()`).
diff --git a/cli/jl_exports.h b/cli/jl_exports.h
new file mode 100644
index 00000000000000..e9be7c6f2f819b
--- /dev/null
+++ b/cli/jl_exports.h
@@ -0,0 +1,75 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Bring in the curated lists of exported data and function symbols, then
+// perform C preprocessor magic upon them to generate lists of declarations and
+// functions to re-export our function symbols from libjulia-internal to libjulia.
+#include "../src/jl_exported_data.inc"
+#include "../src/jl_exported_funcs.inc"
+
+// Define pointer data as `const void * $(name);`
+#define XX(name)    JL_DLLEXPORT const void * name;
+JL_EXPORTED_DATA_POINTERS(XX)
+#undef XX
+
+// Define symbol data as `$(type) $(name);`
+#define XX(name, type)    JL_DLLEXPORT type name;
+JL_EXPORTED_DATA_SYMBOLS(XX)
+#undef XX
+
+// Declare list of exported functions (sans type)
+#define XX(name)    JL_DLLEXPORT void name(void);
+typedef void (anonfunc)(void);
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
+
+// Define holder locations for function addresses as `const void * $(name)_addr = NULL;
+#define XX(name)    JL_HIDDEN anonfunc * name##_addr = NULL;
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
+
+// Generate lists of function names and addresses
+#define XX(name)    "i" #name,
+static const char *const jl_runtime_exported_func_names[] = {
+    JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+    JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+    NULL
+};
+#undef XX
+
+#define XX(name)    #name"_impl",
+static const char *const jl_codegen_exported_func_names[] = {
+    JL_CODEGEN_EXPORTED_FUNCS(XX)
+    NULL
+};
+#undef XX
+
+#define XX(name)    #name"_fallback",
+static const char *const jl_codegen_fallback_func_names[] = {
+    JL_CODEGEN_EXPORTED_FUNCS(XX)
+    NULL
+};
+#undef XX
+
+#define XX(name)    &name##_addr,
+static anonfunc **const jl_runtime_exported_func_addrs[] = {
+    JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+    JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+    NULL
+};
+static anonfunc **const jl_codegen_exported_func_addrs[] = {
+    JL_CODEGEN_EXPORTED_FUNCS(XX)
+    NULL
+};
+#undef XX
diff --git a/cli/list_strip_symbols.h b/cli/list_strip_symbols.h
new file mode 100644
index 00000000000000..5d534616e132be
--- /dev/null
+++ b/cli/list_strip_symbols.h
@@ -0,0 +1,10 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "jl_exported_funcs.inc"
+#include "trampolines/common.h"
+#define XX(x) --strip-symbol=CNAME(x)
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+#undef XX
diff --git a/cli/loader.h b/cli/loader.h
new file mode 100644
index 00000000000000..2d0b977f7142f8
--- /dev/null
+++ b/cli/loader.h
@@ -0,0 +1,100 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+/* Bring in definitions for `_OS_X_`, `JL_PATH_MAX` and `PATHSEPSTRING`, `jl_ptls_t`, etc... */
+#include "../src/support/platform.h"
+#include "../src/support/dirpath.h"
+#include "../src/julia_fasttls.h"
+
+#ifdef _OS_WINDOWS_
+/* We need to reimplement a bunch of standard library stuff on windows,
+ * but we want to make sure that it doesn't conflict with the actual implementations
+ * once those get linked into this process. */
+#define fwrite loader_fwrite
+#define fputs loader_fputs
+#define exit loader_exit
+#define strlen loader_strlen
+#define wcslen loader_wcslen
+#define strncat loader_strncat
+#define memcpy loader_memcpy
+#define dirname loader_dirname
+#define strchr loader_strchr
+#define malloc loader_malloc
+#define realloc loader_realloc
+#endif
+
+#include <stdint.h>
+
+#ifdef _OS_WINDOWS_
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#else
+
+#ifdef _OS_DARWIN_
+#include <mach-o/dyld.h>
+#endif
+#ifdef _OS_FREEBSD_
+#include <stddef.h>
+#include <sys/sysctl.h>
+#endif
+#define _GNU_SOURCE // Need this for `dladdr()`
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <dlfcn.h>
+
+#endif
+
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+# ifdef LIBRARY_EXPORTS
+#  define JL_DLLEXPORT __declspec(dllexport)
+# else
+#  define JL_DLLEXPORT __declspec(dllimport)
+# endif
+#define JL_HIDDEN
+#else
+# if defined(LIBRARY_EXPORTS) && defined(_OS_LINUX_)
+#  define JL_DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define JL_DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#define JL_HIDDEN    __attribute__ ((visibility("hidden")))
+#endif
+/*
+ * DEP_LIBS is our list of dependent libraries that must be loaded before `libjulia`.
+ * Note that order matters, as each entry will be opened in-order.  We define here a
+ * dummy value just so this file compiles on its own, and also so that developers can
+ * see what this value should look like.  Note that the last entry must always be
+ * `libjulia`, and that all paths should be relative to this loader library path.
+ */
+#if !defined(DEP_LIBS)
+#define DEP_LIBS "../lib/example.so:../lib/libjulia.so"
+#endif
+
+// We need to dlopen() ourselves in order to introspect the libdir.
+#if defined(JL_DEBUG_BUILD)
+#define LIBJULIA_NAME "libjulia-debug"
+#else
+#define LIBJULIA_NAME "libjulia"
+#endif
+
+
+// Declarations from `loader_lib.c` and `loader_win_utils.c`
+JL_DLLEXPORT extern int jl_load_repl(int, char **);
+JL_DLLEXPORT void jl_loader_print_stderr(const char * msg);
+void jl_loader_print_stderr3(const char * msg1, const char * msg2, const char * msg3);
+static void * lookup_symbol(const void * lib_handle, const char * symbol_name);
+
+#ifdef _OS_WINDOWS_
+LPWSTR *CommandLineToArgv(LPWSTR lpCmdLine, int *pNumArgs);
+int wchar_to_utf8(const wchar_t * wstr, char *str, size_t maxlen);
+int utf8_to_wchar(const char * str, wchar_t *wstr, size_t maxlen);
+void setup_stdio(void);
+#endif
+
+#include "../src/jloptions.h"
diff --git a/cli/loader_exe.c b/cli/loader_exe.c
new file mode 100644
index 00000000000000..07a0bddcd4b87f
--- /dev/null
+++ b/cli/loader_exe.c
@@ -0,0 +1,77 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// This defines a bare-bones loader that opens `libjulia` and immediately invokes its `load_repl()` function.
+#include "loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Bring in helper functions for windows without libgcc. */
+#ifdef _OS_WINDOWS_
+#include "loader_win_utils.c"
+#endif
+
+JULIA_DEFINE_FAST_TLS
+
+#ifdef _COMPILER_ASAN_ENABLED_
+JL_DLLEXPORT const char* __asan_default_options()
+{
+    return "allow_user_segv_handler=1:detect_leaks=0";
+    // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
+    //        or defining __lsan_default_options = exitcode=0 once publicly available
+    //        (here and in flisp/flmain.c)
+}
+#endif
+
+#ifdef _OS_WINDOWS_
+int mainCRTStartup(void)
+{
+    int argc;
+    LPWSTR * wargv = CommandLineToArgv(GetCommandLine(), &argc);
+    char ** argv = (char **)malloc(sizeof(char*) * (argc + 1));
+    setup_stdio();
+#else
+int main(int argc, char * argv[])
+{
+#endif
+
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_)
+    // ASAN/TSAN do not support RTLD_DEEPBIND
+    // https://github.com/google/sanitizers/issues/611
+    putenv("LBT_USE_RTLD_DEEPBIND=0");
+#endif
+
+    // Convert Windows wchar_t values to UTF8
+#ifdef _OS_WINDOWS_
+    for (int i = 0; i < argc; i++) {
+        size_t max_arg_len = 4*wcslen(wargv[i]);
+        argv[i] = (char *)malloc(max_arg_len);
+        if (!wchar_to_utf8(wargv[i], argv[i], max_arg_len)) {
+            jl_loader_print_stderr("Unable to convert all arguments to UTF-8!\n");
+            return 1;
+        }
+    }
+    argv[argc] = NULL;
+#endif
+
+    // Call load_repl with our initialization arguments:
+    int ret = jl_load_repl(argc, argv);
+
+    // On Windows we're running without the CRT that would do this for us
+    exit(ret);
+    return ret;
+}
+
+#if defined(__GLIBC__) && (defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
+// fork is generally bad news, but it is better if we prevent applications from
+// making it worse as openblas threadpools cause it to hang
+int __register_atfork232(void (*prepare)(void), void (*parent)(void), void (*child)(void), void *dso_handle) {
+    return 0;
+}
+__asm__ (".symver __register_atfork232, __register_atfork@@GLIBC_2.3.2");
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/cli/loader_lib.c b/cli/loader_lib.c
new file mode 100644
index 00000000000000..74241510ffd257
--- /dev/null
+++ b/cli/loader_lib.c
@@ -0,0 +1,292 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// This file defines an RPATH-style relative path loader for all platforms
+#include "loader.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Bring in definitions of symbols exported from libjulia. */
+#include "jl_exports.h"
+
+/* Bring in helper functions for windows without libgcc. */
+#ifdef _OS_WINDOWS_
+#include "loader_win_utils.c"
+#endif
+
+// Save DEP_LIBS to a variable that is explicitly sized for expansion
+static char dep_libs[1024] = DEP_LIBS;
+
+JL_DLLEXPORT void jl_loader_print_stderr(const char * msg)
+{
+    fputs(msg, stderr);
+}
+// I use three arguments a lot.
+void jl_loader_print_stderr3(const char * msg1, const char * msg2, const char * msg3)
+{
+    jl_loader_print_stderr(msg1);
+    jl_loader_print_stderr(msg2);
+    jl_loader_print_stderr(msg3);
+}
+
+/* Wrapper around dlopen(), with extra relative pathing thrown in*/
+static void * load_library(const char * rel_path, const char * src_dir, int err) {
+    void * handle = NULL;
+
+    // See if a handle is already open to the basename
+    const char *basename = rel_path + strlen(rel_path);
+    while (basename-- > rel_path)
+        if (*basename == PATHSEPSTRING[0] || *basename == '/')
+            break;
+    basename++;
+#if defined(_OS_WINDOWS_)
+    if ((handle = GetModuleHandleA(basename)))
+        return handle;
+#else
+    // if err == 0 the library is optional, so don't allow global lookups to see it
+    if ((handle = dlopen(basename, RTLD_NOLOAD | RTLD_NOW | (err ? RTLD_GLOBAL : RTLD_LOCAL))))
+        return handle;
+#endif
+
+    char path[2*JL_PATH_MAX + 1] = {0};
+    strncat(path, src_dir, sizeof(path) - 1);
+    strncat(path, PATHSEPSTRING, sizeof(path) - 1);
+    strncat(path, rel_path, sizeof(path) - 1);
+
+#if defined(_OS_WINDOWS_)
+    wchar_t wpath[2*JL_PATH_MAX + 1] = {0};
+    if (!utf8_to_wchar(path, wpath, 2*JL_PATH_MAX)) {
+        jl_loader_print_stderr3("ERROR: Unable to convert path ", path, " to wide string!\n");
+        exit(1);
+    }
+    handle = (void *)LoadLibraryExW(wpath, NULL, LOAD_WITH_ALTERED_SEARCH_PATH);
+#else
+    handle = dlopen(path, RTLD_NOW | (err ? RTLD_GLOBAL : RTLD_LOCAL));
+#endif
+
+    if (handle == NULL) {
+        if (!err)
+            return NULL;
+        jl_loader_print_stderr3("ERROR: Unable to load dependent library ", path, "\n");
+#if defined(_OS_WINDOWS_)
+        LPWSTR wmsg = TEXT("");
+        FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER |
+                       FORMAT_MESSAGE_FROM_SYSTEM |
+                       FORMAT_MESSAGE_IGNORE_INSERTS |
+                       FORMAT_MESSAGE_MAX_WIDTH_MASK,
+                       NULL, GetLastError(),
+                       MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US),
+                       (LPWSTR)&wmsg, 0, NULL);
+        char err[256] = {0};
+        wchar_to_utf8(wmsg, err, 255);
+        jl_loader_print_stderr3("Message:", err, "\n");
+#else
+        char *dlerr = dlerror();
+        if (dlerr != NULL) {
+            jl_loader_print_stderr3("Message:", dlerr, "\n");
+        }
+#endif
+        exit(1);
+    }
+    return handle;
+}
+
+static void * lookup_symbol(const void * lib_handle, const char * symbol_name) {
+#ifdef _OS_WINDOWS_
+    return GetProcAddress((HMODULE) lib_handle, symbol_name);
+#else
+    return dlsym((void *)lib_handle, symbol_name);
+#endif
+}
+
+// Find the location of libjulia.
+char lib_dir[JL_PATH_MAX];
+JL_DLLEXPORT const char * jl_get_libdir()
+{
+    // Reuse the path if this is not the first call.
+    if (lib_dir[0] != 0) {
+        return lib_dir;
+    }
+#if defined(_OS_WINDOWS_)
+    // On Windows, we use GetModuleFileNameW
+    wchar_t libjulia_path[JL_PATH_MAX];
+    HMODULE libjulia = NULL;
+
+    // Get a handle to libjulia.
+    if (!utf8_to_wchar(LIBJULIA_NAME, libjulia_path, JL_PATH_MAX)) {
+        jl_loader_print_stderr3("ERROR: Unable to convert path ", LIBJULIA_NAME, " to wide string!\n");
+        exit(1);
+    }
+    libjulia = LoadLibraryW(libjulia_path);
+    if (libjulia == NULL) {
+        jl_loader_print_stderr3("ERROR: Unable to load ", LIBJULIA_NAME, "!\n");
+        exit(1);
+    }
+    if (!GetModuleFileNameW(libjulia, libjulia_path, JL_PATH_MAX)) {
+        jl_loader_print_stderr("ERROR: GetModuleFileName() failed\n");
+        exit(1);
+    }
+    if (!wchar_to_utf8(libjulia_path, lib_dir, JL_PATH_MAX)) {
+        jl_loader_print_stderr("ERROR: Unable to convert julia path to UTF-8\n");
+        exit(1);
+    }
+#else
+    // On all other platforms, use dladdr()
+    Dl_info info;
+    if (!dladdr(&jl_get_libdir, &info)) {
+        jl_loader_print_stderr("ERROR: Unable to dladdr(&jl_get_libdir)!\n");
+        char *dlerr = dlerror();
+        if (dlerr != NULL) {
+            jl_loader_print_stderr3("Message:", dlerr, "\n");
+        }
+        exit(1);
+    }
+    strcpy(lib_dir, info.dli_fname);
+#endif
+    // Finally, convert to dirname
+    const char * new_dir = dirname(lib_dir);
+    if (new_dir != lib_dir) {
+        // On some platforms, dirname() mutates.  On others, it does not.
+        memcpy(lib_dir, new_dir, strlen(new_dir)+1);
+    }
+    return lib_dir;
+}
+
+void * libjulia_internal = NULL;
+__attribute__((constructor)) void jl_load_libjulia_internal(void) {
+    // Only initialize this once
+    if (libjulia_internal != NULL) {
+        return;
+    }
+
+    // Introspect to find our own path
+    const char * lib_dir = jl_get_libdir();
+
+    // Pre-load libraries that libjulia-internal needs.
+    int deps_len = strlen(dep_libs);
+    char * curr_dep = &dep_libs[0];
+
+    // We keep track of "special" libraries names (ones whose name is prefixed with `@`)
+    // which are libraries that we want to load in some special, custom way, such as
+    // `libjulia-internal` or `libjulia-codegen`.
+    int special_idx = 0;
+    char * special_library_names[2] = {NULL};
+    while (1) {
+        // try to find next colon character; if we can't, break out
+        char * colon = strchr(curr_dep, ':');
+        if (colon == NULL)
+            break;
+
+        // Chop the string at the colon so it's a valid-ending-string
+        *colon = '\0';
+
+        // If this library name starts with `@`, don't open it here (but mark it as special)
+        if (curr_dep[0] == '@') {
+            if (special_idx > sizeof(special_library_names)/sizeof(char *)) {
+                jl_loader_print_stderr("ERROR: Too many special library names specified, check LOADER_BUILD_DEP_LIBS and friends!\n");
+                exit(1);
+            }
+            special_library_names[special_idx] = curr_dep + 1;
+            special_idx += 1;
+        } else {
+            load_library(curr_dep, lib_dir, 1);
+        }
+
+        // Skip ahead to next dependency
+        curr_dep = colon + 1;
+    }
+
+    if (special_idx != sizeof(special_library_names)/sizeof(char *)) {
+        jl_loader_print_stderr("ERROR: Too few special library names specified, check LOADER_BUILD_DEP_LIBS and friends!\n");
+        exit(1);
+    }
+
+    // Unpack our special library names.  This is why ordering of library names matters.
+    libjulia_internal = load_library(special_library_names[0], lib_dir, 1);
+    void *libjulia_codegen = load_library(special_library_names[1], lib_dir, 0);
+    const char * const * codegen_func_names;
+    const char *codegen_liberr;
+    if (libjulia_codegen == NULL) {
+        // if codegen is not available, use fallback implementation in libjulia-internal
+        libjulia_codegen = libjulia_internal;
+        codegen_func_names = jl_codegen_fallback_func_names;
+        codegen_liberr = " from libjulia-internal\n";
+    }
+    else {
+        codegen_func_names = jl_codegen_exported_func_names;
+        codegen_liberr = " from libjulia-codegen\n";
+    }
+
+    // Once we have libjulia-internal loaded, re-export its symbols:
+    for (unsigned int symbol_idx=0; jl_runtime_exported_func_names[symbol_idx] != NULL; ++symbol_idx) {
+        void *addr = lookup_symbol(libjulia_internal, jl_runtime_exported_func_names[symbol_idx]);
+        if (addr == NULL) {
+            jl_loader_print_stderr3("ERROR: Unable to load ", jl_runtime_exported_func_names[symbol_idx], " from libjulia-internal\n");
+            exit(1);
+        }
+        (*jl_runtime_exported_func_addrs[symbol_idx]) = addr;
+    }
+    // jl_options must be initialized very early, in case an embedder sets some
+    // values there before calling jl_init
+    ((void (*)(void))jl_init_options_addr)();
+
+    for (unsigned int symbol_idx=0; codegen_func_names[symbol_idx] != NULL; ++symbol_idx) {
+        void *addr = lookup_symbol(libjulia_codegen, codegen_func_names[symbol_idx]);
+        if (addr == NULL) {
+            jl_loader_print_stderr3("ERROR: Unable to load ", codegen_func_names[symbol_idx], codegen_liberr);
+            exit(1);
+        }
+        (*jl_codegen_exported_func_addrs[symbol_idx]) = addr;
+    }
+    // Next, if we're on Linux/FreeBSD, set up fast TLS.
+#if !defined(_OS_WINDOWS_) && !defined(_OS_DARWIN_)
+    void (*jl_pgcstack_setkey)(void*, void*(*)(void)) = lookup_symbol(libjulia_internal, "jl_pgcstack_setkey");
+    if (jl_pgcstack_setkey == NULL) {
+        jl_loader_print_stderr("ERROR: Cannot find jl_pgcstack_setkey() function within libjulia-internal!\n");
+        exit(1);
+    }
+    void *fptr = lookup_symbol(RTLD_DEFAULT, "jl_get_pgcstack_static");
+    void *(*key)(void) = lookup_symbol(RTLD_DEFAULT, "jl_pgcstack_addr_static");
+    if (fptr != NULL && key != NULL)
+        jl_pgcstack_setkey(fptr, key);
+#endif
+
+    // jl_options must be initialized very early, in case an embedder sets some
+    // values there before calling jl_init
+    ((void (*)(void))jl_init_options_addr)();
+}
+
+// Load libjulia and run the REPL with the given arguments (in UTF-8 format)
+JL_DLLEXPORT int jl_load_repl(int argc, char * argv[]) {
+    // Some compilers/platforms are known to have `__attribute__((constructor))` issues,
+    // so we have a fallback call of `jl_load_libjulia_internal()` here.
+    if (libjulia_internal == NULL) {
+        jl_load_libjulia_internal();
+        if (libjulia_internal == NULL) {
+            jl_loader_print_stderr("ERROR: libjulia-internal could not be loaded!\n");
+            exit(1);
+        }
+    }
+    // Load the repl entrypoint symbol and jump into it!
+    int (*entrypoint)(int, char **) = (int (*)(int, char **))lookup_symbol(libjulia_internal, "jl_repl_entrypoint");
+    if (entrypoint == NULL) {
+        jl_loader_print_stderr("ERROR: Unable to find `jl_repl_entrypoint()` within libjulia-internal!\n");
+        exit(1);
+    }
+    return entrypoint(argc, (char **)argv);
+}
+
+#ifdef _OS_WINDOWS_
+int __stdcall DllMainCRTStartup(void* instance, unsigned reason, void* reserved) {
+    setup_stdio();
+
+    // Because we override DllMainCRTStartup, we have to manually call our constructor methods
+    jl_load_libjulia_internal();
+    return 1;
+}
+#endif
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
diff --git a/cli/loader_win_utils.c b/cli/loader_win_utils.c
new file mode 100644
index 00000000000000..621834a030c52d
--- /dev/null
+++ b/cli/loader_win_utils.c
@@ -0,0 +1,200 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Workarounds for compiling via mingw without using libgcc_s
+typedef struct {
+    HANDLE fd;
+    BOOL isconsole;
+} FILE;
+
+static FILE _stdout = { INVALID_HANDLE_VALUE };
+static FILE _stderr = { INVALID_HANDLE_VALUE };
+
+FILE *stdout = &_stdout;
+FILE *stderr = &_stderr;
+
+int loader_fwrite(const WCHAR *str, size_t nchars, FILE *out) {
+    DWORD written;
+    if (out->isconsole) {
+        if (WriteConsole(out->fd, str, nchars, &written, NULL))
+            return written;
+    } else {
+        if (WriteFile(out->fd, str, sizeof(WCHAR) * nchars, &written, NULL))
+            return written;
+    }
+    return -1;
+}
+
+int loader_fputs(const char *str, FILE *out) {
+    wchar_t wstr[1024];
+    utf8_to_wchar(str, wstr, 1024);
+    return fwrite(wstr, wcslen(wstr), out);
+}
+
+void * loader_malloc(const size_t size) {
+    return HeapAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, size);
+}
+
+void * loader_realloc(void * mem, const size_t size) {
+    return HeapReAlloc(GetProcessHeap(), HEAP_GENERATE_EXCEPTIONS, mem, size);
+}
+
+LPWSTR *CommandLineToArgv(LPWSTR lpCmdLine, int *pNumArgs) {
+    LPWSTR out = lpCmdLine;
+    LPWSTR cmd = out;
+    unsigned MaxEntries = 4;
+    unsigned backslashes = 0;
+    int in_quotes = 0;
+    int empty = 1;
+    LPWSTR *cmds;
+    *pNumArgs = 0;
+    cmds = (LPWSTR*)malloc(sizeof(LPWSTR) * MaxEntries);
+    while (1) {
+        WCHAR c = *lpCmdLine++;
+        switch (c) {
+        case 0:
+            if (!empty) {
+                *out++ = '\0';
+                cmds[(*pNumArgs)++] = cmd;
+            }
+            cmds[*pNumArgs] = NULL;
+            return cmds;
+        default:
+            *out++ = c;
+            empty = 0;
+            break;
+        case '"':
+            out -= backslashes / 2; // remove half of the backslashes
+            if (backslashes % 2)
+                *(out - 1) = '"'; // replace \ with "
+            else
+                in_quotes = !in_quotes; // treat as quote delimater
+            empty = 0;
+            break;
+        case '\t':
+        case ' ':
+            if (in_quotes) {
+                *out++ = c;
+            } else if (!empty) {
+                *out++ = '\0';
+                cmds[(*pNumArgs)++] = cmd;
+                cmd = out;
+                empty = 1;
+                if (*pNumArgs >= MaxEntries - 1) {
+                    MaxEntries *= 2;
+                    cmds = (LPWSTR*)realloc(cmds, sizeof(LPWSTR) * MaxEntries);
+                }
+            }
+        }
+        if (c == '\\')
+            backslashes++;
+        else
+            backslashes = 0;
+    }
+}
+
+void setup_stdio() {
+    DWORD mode = 0;
+    _stdout.fd = GetStdHandle(STD_OUTPUT_HANDLE);
+    _stdout.isconsole = GetConsoleMode(_stdout.fd, &mode);
+    _stderr.fd = GetStdHandle(STD_ERROR_HANDLE);
+    _stderr.isconsole = GetConsoleMode(_stderr.fd, &mode);
+}
+
+void loader_exit(int code) {
+    ExitProcess(code);
+}
+
+
+/* Utilities to convert from Windows' wchar_t stuff to UTF-8 */
+int wchar_to_utf8(const wchar_t * wstr, char *str, size_t maxlen) {
+    /* Fast-path empty strings, as WideCharToMultiByte() returns zero for them. */
+    if (wstr[0] == L'\0') {
+        str[0] = '\0';
+        return 1;
+    }
+    size_t len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL);
+    if (!len)
+        return 0;
+    if (len > maxlen)
+        return 0;
+    if (!WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, len, NULL, NULL))
+        return 0;
+    return 1;
+}
+
+int utf8_to_wchar(const char * str, wchar_t * wstr, size_t maxlen) {
+    /* Fast-path empty strings, as WideCharToMultiByte() returns zero for them. */
+    if (str[0] == '\0') {
+        wstr[0] = L'\0';
+        return 1;
+    }
+    size_t len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
+    if (!len)
+        return 0;
+    if (len > maxlen)
+        return 0;
+    if (!MultiByteToWideChar(CP_UTF8, 0, str, -1, wstr, len))
+        return 0;
+    return 1;
+}
+
+size_t loader_strlen(const char * x) {
+    int idx = 0;
+    while (x[idx] != 0)
+        idx++;
+    return idx;
+}
+
+size_t loader_wcslen(const wchar_t * x) {
+    int idx = 0;
+    while (x[idx] != 0)
+        idx++;
+    return idx;
+}
+
+char * loader_strncat(char * base, const char * tail, size_t maxlen) {
+    int base_len = strlen(base);
+    int tail_len = strlen(tail);
+    for (int idx=base_len; idx<min(maxlen, base_len + tail_len); ++idx) {
+        base[idx] = tail[idx - base_len];
+    }
+    return base;
+}
+
+void * loader_memcpy(void * dest, const void * src, size_t len) {
+    for (int idx=0; idx<len; ++idx) {
+        ((char *)dest)[idx] = ((const char *)src)[idx];
+    }
+    return dest;
+}
+
+char * loader_dirname(char * x) {
+    int idx = strlen(x);
+    while (idx > 0 && x[idx] != PATHSEPSTRING[0]) {
+        idx -= 1;
+    }
+    if (x[idx] == PATHSEPSTRING[0]) {
+        // Special-case x == "/"
+        if (idx == 0) {
+            x[1] = '\0';
+            return x;
+        } else {
+            x[idx] = '\0';
+            return x;
+        }
+    }
+    x[0] = '.';
+    x[1] = '\0';
+    return x;
+}
+
+char * loader_strchr(const char * haystack, int needle) {
+    int idx=0;
+    while (haystack[idx] != needle) {
+        if (haystack[idx] == 0) {
+            return NULL;
+        }
+        idx++;
+    }
+    return (char *)haystack + idx;
+}
diff --git a/cli/trampolines/common.h b/cli/trampolines/common.h
new file mode 100644
index 00000000000000..00d703c341515c
--- /dev/null
+++ b/cli/trampolines/common.h
@@ -0,0 +1,77 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "../../src/support/platform.h"
+
+// Preprocessor annoyances
+#define CONCAT_(x,y)    x##y
+#define CONCAT(x,y)     CONCAT_(x, y)
+#define CNAMEADDR(name) CONCAT(CNAME(name),_addr)
+#define STR_(x)         #x
+#define STR(x)          STR_(x)
+#define I(x)            x
+
+// On macOS and 32-bit windows, we need to prepend underscores on symbols to match the C ABI
+#if defined(__APPLE__) || (defined(_WIN32) && !defined(_WIN64))
+#define UNDERSCORE(x) _##x
+#else
+#define UNDERSCORE(x)    x
+#endif
+
+// Windows requires some help with the linker when it comes to debuginfo/exporting
+#if defined(_WIN32) || defined(_WIN64)
+#define DEBUGINFO(name)     .def name; \
+                            .scl 2; \
+                            .type 32; \
+                            .endef
+#define EXPORT(name)        .section .drectve,"r"; \
+                            .ascii STR(-export:##I(name)); \
+                            .ascii " "; \
+                            .section .text
+#elif defined(__ELF__)
+#define DEBUGINFO(name)     .type CNAME(name),@function
+#define EXPORT(name)        .size CNAME(name), . - CNAME(name)
+#else
+#define DEBUGINFO(name)
+#define EXPORT(name)
+#endif
+
+// Windows 64-bit uses SEH
+#if defined(_WIN64)
+#define SEH_START1(name)    .seh_proc CNAME(name)
+#define SEH_START2()        .seh_endprologue
+#define SEH_END()           .seh_endproc
+#else
+#define SEH_START1(name)
+#define SEH_START2()
+#define SEH_END()
+#endif
+
+// If we're compiling with control-flow branch protection, mark the trampoline entry
+// points with `endbr{32,64}`, as appropriate on this arch
+#if defined(__CET__) && __CET__ & 1 != 0
+#if defined(__x86_64__)
+#define CET_START()     endbr64
+#else
+#define CET_START()     endbr32
+#endif
+#else
+#define CET_START()
+#endif
+
+// aarch64 on mac requires some special assembler syntax for both calculating memory
+// offsets and even just the assembler statement separator token
+#if defined(__aarch64__)
+#if defined(__APPLE__)
+#define PAGE(x)     x##@PAGE
+#define PAGEOFF(x)  x##@PAGEOFF
+#define SEP         %%
+#else
+#define PAGE(x)     x
+#define PAGEOFF(x)  :lo12:##x
+#define SEP         ;
+#endif
+#endif
+
+// If someday we need to mangle everything, we do so by defining this `CNAME()`
+// to do something more complex than just `UNDERSCORE(x)`.
+#define CNAME(x)    UNDERSCORE(x)
diff --git a/cli/trampolines/trampolines_aarch64.S b/cli/trampolines/trampolines_aarch64.S
new file mode 100644
index 00000000000000..2d87ae6dcdb1cf
--- /dev/null
+++ b/cli/trampolines/trampolines_aarch64.S
@@ -0,0 +1,21 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+#define XX(name) \
+.global CNAME(name) SEP \
+.cfi_startproc SEP \
+.p2align    2 SEP \
+CNAME(name)##: SEP \
+    adrp x16, PAGE(CNAME(name##_addr)) SEP \
+    ldr x16, [x16, PAGEOFF(CNAME(name##_addr))] SEP \
+    br x16 SEP \
+.cfi_endproc SEP \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/trampolines/trampolines_arm.S b/cli/trampolines/trampolines_arm.S
new file mode 100644
index 00000000000000..5ce6617f3f04e6
--- /dev/null
+++ b/cli/trampolines/trampolines_arm.S
@@ -0,0 +1,24 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+#define XX(name) \
+.global CNAME(name); \
+.cfi_startproc; \
+CNAME(name)##:; \
+    ldr ip, CONCAT(.L,CNAMEADDR(name)); \
+CONCAT(.L,CNAME(name)): ;\
+    add ip, pc, ip; \
+    ldr pc, [ip]; \
+    .align 2; \
+CONCAT(.L,CNAMEADDR(name))##: ; \
+    .word CNAMEADDR(name)##-(CONCAT(.L,CNAME(name)) + 8); \
+.cfi_endproc; \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/trampolines/trampolines_i686.S b/cli/trampolines/trampolines_i686.S
new file mode 100644
index 00000000000000..3d9cacf0ce652c
--- /dev/null
+++ b/cli/trampolines/trampolines_i686.S
@@ -0,0 +1,22 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+#define XX(name) \
+DEBUGINFO(CNAME(name)); \
+.global CNAME(name); \
+.cfi_startproc; \
+CNAME(name)##:; \
+    CET_START(); \
+    jmpl *(CNAMEADDR(name)); \
+    ud2; \
+.cfi_endproc; \
+EXPORT(name); \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/trampolines/trampolines_powerpc64le.S b/cli/trampolines/trampolines_powerpc64le.S
new file mode 100644
index 00000000000000..8b32ef91d2464f
--- /dev/null
+++ b/cli/trampolines/trampolines_powerpc64le.S
@@ -0,0 +1,29 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+// Notes:
+// bctr: branch to CTR without LR update (tail-call)
+// localentry: On PPC functions have a localentry that assumes r2 contains
+//             the TOC pointer, and a global entry point that sets r2.
+// See 64-Bit ELF V2 ABI Specification: Power Architecture v1.4
+
+#define XX(name) \
+.global CNAME(name); \
+.type CNAME(name)##, @function; \
+.cfi_startproc; \
+CNAME(name)##: ; \
+    addis 2, 12, .TOC.-CNAME(name)##@ha; \
+    addi 2, 2, .TOC.-CNAME(name)##@l; \
+    .localentry CNAME(name)##,.-CNAME(name)##; \
+    addis 12,2,CNAMEADDR(name)##@toc@ha; \
+    ld 12,CNAMEADDR(name)##@toc@l(12); \
+    mtctr 12; \
+    bctr; \
+.cfi_endproc; \
+.size CNAME(name)##,.-CNAME(name)##; \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/cli/trampolines/trampolines_x86_64.S b/cli/trampolines/trampolines_x86_64.S
new file mode 100644
index 00000000000000..3b800da56eee17
--- /dev/null
+++ b/cli/trampolines/trampolines_x86_64.S
@@ -0,0 +1,26 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "common.h"
+#include "../../src/jl_exported_funcs.inc"
+
+#define XX(name) \
+DEBUGINFO(name); \
+.global CNAME(name); \
+.cfi_startproc; \
+SEH_START1(name); \
+CNAME(name)##:; \
+SEH_START2(); \
+    CET_START(); \
+    mov CNAMEADDR(name)(%rip),%r11; \
+    jmpq *%r11; \
+    ud2; \
+SEH_END(); \
+.cfi_endproc; \
+EXPORT(name); \
+
+JL_RUNTIME_EXPORTED_FUNCS(XX)
+#ifdef _OS_WINDOWS_
+JL_RUNTIME_EXPORTED_FUNCS_WIN(XX)
+#endif
+JL_CODEGEN_EXPORTED_FUNCS(XX)
+#undef XX
diff --git a/contrib/README.md b/contrib/README.md
index d1b2485dabe558..f75dc4488fb0bd 100644
--- a/contrib/README.md
+++ b/contrib/README.md
@@ -6,9 +6,8 @@ Installation
 |[ mac/ ](https://github.com/JuliaLang/julia/blob/master/contrib/mac/) | Mac install files |
 |[ windows/ ](https://github.com/JuliaLang/julia/blob/master/contrib/windows/) | Windows install files |
 |[ add_license_to_files.jl ](https://github.com/JuliaLang/julia/blob/master/contrib/add_license_to_files.jl ) | Add the Julia license to files in the Julia Project |
-|[ check-whitespace.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/check-whitespace.sh) | Check for trailing white space |
+|[ check-whitespace.jl ](https://github.com/JuliaLang/julia/blob/master/contrib/check-whitespace.jl) | Check for white space issues |
 |[ commit-name.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/commit-name.sh) | Computes a version name for a commit |
-|[ filterArgs.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/filterArgs.sh) | Update library search code to use only tokens that start with -L |
 |[ fixup-libgfortran.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/fixup-libgfortran.sh) | Include libgfortran  and libquadmath for installations |
 |[ fixup-libstdc++.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/fixup-libstdc++.sh) | Include libstdc++ for    installations |
 |[ install.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/install.sh) | Installation script with different permissions |
@@ -17,7 +16,6 @@ Installation
 |[ julia.desktop ](https://github.com/JuliaLang/julia/blob/master/contrib/julia.desktop) | GNOME desktop config file |
 |[ relative_path.py ](https://github.com/JuliaLang/julia/blob/master/contrib/relative_path.py) | Convert absolute paths into   relative paths |
 |[ stringreplace.c ](https://github.com/JuliaLang/julia/blob/master/contrib/stringreplace.c) | Replace strings to hardcoded paths in binaries during `make install` |
-|[ travis_fastfail.sh ](https://github.com/JuliaLang/julia/blob/master/contrib/travis_fastfail.sh ) |  Checks for queued build tests in Travis |
 
 Debugging
 =========
@@ -25,4 +23,4 @@ Debugging
 | Name                           |  Description                                                |
 | ------------------------------ | ----------------------------------------------------------- |
 |[ debug_bootstrap.gdb ](https://github.com/JuliaLang/julia/blob/master/contrib/debug_bootstrap.gdb) | Bootstrap process using the debug build |
-|[ valgrind-julia.supp ](https://github.com/JuliaLang/julia/blob/master/contrib/valgrind-julia.supp) | Suppressions  for Valgrind debugging tool |
+|[ valgrind-julia.supp ](https://github.com/JuliaLang/julia/blob/master/contrib/valgrind-julia.supp) | Suppressions for Valgrind debugging tool |
diff --git a/contrib/add_license_to_files.jl b/contrib/add_license_to_files.jl
index ce52881e2a031f..1d301a54553948 100644
--- a/contrib/add_license_to_files.jl
+++ b/contrib/add_license_to_files.jl
@@ -15,22 +15,24 @@ const print_result = true  # prints files which where not processed.
 
 const rootdirs = [
     "../base",
+    "../cli",
     "../contrib",
     "../src",
     "../stdlib",
-    "../test",
 ]
 
-# to exculde whole sub directories
+# to exclude whole sub directories
 const excludedirs = [
     # see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98090053
-    "../base/grisu",
     "../base/ryu",
     "../src/flisp",
+    "../stdlib/TOML/test/testfiles",
+    "../test/testhelpers/allocation_file.jl",
 ]
 
 const skipfiles = [
     "../contrib/add_license_to_files.jl",
+    "../contrib/asan/check.jl",
     # files to check - already copyright
     # see: https://github.com/JuliaLang/julia/pull/11073#issuecomment-98099389
     "../base/special/trig.jl",
@@ -44,11 +46,10 @@ const skipfiles = [
     "../src/abi_x86.cpp",
     "../src/abi_x86_64.cpp",
     "../src/disasm.cpp",
-    "../src/getopt.c",
-    "../src/getopt.h",
     "../src/support/END.h",
     "../src/support/ENTRY.amd64.h",
     "../src/support/ENTRY.i387.h",
+    "../src/support/_setjmp.win32.S",
     "../src/support/MurmurHash3.c",
     "../src/support/MurmurHash3.h",
     "../src/support/asprintf.c",
@@ -58,6 +59,7 @@ const skipfiles = [
     "../src/support/tzfile.h",
     "../src/support/utf8.c",
     "../src/crc32c.c",
+    "../src/mach_excUser.c",
 ]
 
 const ext_prefix = Dict([
@@ -66,6 +68,7 @@ const ext_prefix = Dict([
     (".h", "// "),
     (".c", "// "),
     (".cpp", "// "),
+    (".S", "// "),
 ])
 
 const new_license = "This file is a part of Julia. License is MIT: https://julialang.org/license"
@@ -104,6 +107,7 @@ function getfilespaths!(filepaths::Vector, rootdir::AbstractString)
     abs_rootdir = abspath(rootdir)
     for name in readdir(abs_rootdir)
         path = joinpath(abs_rootdir, name)
+        islink(path) && continue
         if isdir(path)
             getfilespaths!(filepaths, path)
         else
@@ -118,6 +122,7 @@ function add_license_line!(unprocessed::Vector, src::AbstractString, new_license
 
     for name in readdir(src)
         path = normpath(joinpath(src, name))
+        islink(path) && continue
         if isdir(path)
             if path in abs_excludedirs
                 getfilespaths!(unprocessed, path)
diff --git a/contrib/asan/Make.user.asan b/contrib/asan/Make.user.asan
new file mode 100644
index 00000000000000..96ed13b54e0f97
--- /dev/null
+++ b/contrib/asan/Make.user.asan
@@ -0,0 +1,27 @@
+TOOLCHAIN=$(BUILDROOT)/../toolchain
+BINDIR=$(TOOLCHAIN)/usr/bin
+TOOLDIR=$(TOOLCHAIN)/usr/tools
+
+# use our new toolchain
+USECLANG=1
+override CC=$(TOOLDIR)/clang
+override CXX=$(TOOLDIR)/clang++
+export ASAN_SYMBOLIZER_PATH=$(TOOLDIR)/llvm-symbolizer
+
+USE_BINARYBUILDER_LLVM=1
+
+override SANITIZE=1
+override SANITIZE_ADDRESS=1
+
+# make the GC use regular malloc/frees, which are hooked by ASAN
+override WITH_GC_DEBUG_ENV=1
+
+# default to a debug build for better line number reporting
+override JULIA_BUILD_MODE=debug
+
+# Enable Julia assertions and LLVM assertions
+FORCE_ASSERTIONS=1
+LLVM_ASSERTIONS=1
+
+# Build a minimal system image
+JULIA_PRECOMPILE=0
diff --git a/contrib/asan/Make.user.tools b/contrib/asan/Make.user.tools
new file mode 100644
index 00000000000000..1bd6f97e39111d
--- /dev/null
+++ b/contrib/asan/Make.user.tools
@@ -0,0 +1,2 @@
+USE_BINARYBUILDER_LLVM=1
+BUILD_LLVM_CLANG=1
diff --git a/contrib/asan/build.sh b/contrib/asan/build.sh
new file mode 100755
index 00000000000000..77f3078b35c42e
--- /dev/null
+++ b/contrib/asan/build.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#
+# Usage:
+#     contrib/asan/build.sh <path> [<make_targets>...]
+#
+# Build ASAN-enabled julia.  Given a workspace directory <path>, build
+# ASAN-enabled julia in <path>/asan.  Required toolss are install under
+# <path>/toolchain.  This scripts also takes optional <make_targets> arguments
+# which are passed to `make`.  The default make target is `debug`.
+
+set -ue
+
+# `$WORKSPACE` is a directory in which we create `toolchain` and `asan`
+# sub-directories.
+WORKSPACE="$1"
+shift
+if [ "$WORKSPACE" = "" ]; then
+    echo "Workspace directory must be specified as the first argument" >&2
+    exit 2
+fi
+
+mkdir -pv "$WORKSPACE"
+WORKSPACE="$(cd "$WORKSPACE" && pwd)"
+if [ "$WORKSPACE" = "" ]; then
+    echo "Failed to create the workspace directory." >&2
+    exit 2
+fi
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+JULIA_HOME="$HERE/../../"
+
+echo
+echo "Installing toolchain..."
+
+TOOLCHAIN="$WORKSPACE/toolchain"
+if [ ! -d "$TOOLCHAIN" ]; then
+    make -C "$JULIA_HOME" configure O=$TOOLCHAIN
+    cp "$HERE/Make.user.tools"  "$TOOLCHAIN/Make.user"
+fi
+
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+
+echo
+echo "Building Julia..."
+
+BUILD="$WORKSPACE/asan"
+if [ ! -d "$BUILD" ]; then
+    make -C "$JULIA_HOME" configure O="$BUILD"
+    cp "$HERE/Make.user.asan"  "$BUILD/Make.user"
+fi
+
+make -C "$BUILD" "$@"
diff --git a/contrib/asan/check.jl b/contrib/asan/check.jl
new file mode 100755
index 00000000000000..2933aaf3fb4e31
--- /dev/null
+++ b/contrib/asan/check.jl
@@ -0,0 +1,87 @@
+#!/bin/bash
+# -*- mode: julia -*-
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+#
+# Usage:
+#     contrib/asan/check.jl <julia>
+#
+# Check that <julia> is built with ASAN.
+#
+#=
+JULIA="${JULIA:-julia}"
+exec "$JULIA" --startup-file=no --compile=min "${BASH_SOURCE[0]}" "$@"
+=#
+
+function main(args = ARGS)::Int
+    if length(args) != 1
+        @error "Expect a single argument" args
+        return 2
+    end
+    julia, = args
+
+    # It looks like double-free is easy to robustly trigger.
+    code = """
+    @info "Testing a pattern that would trigger ASAN"
+    write(ARGS[1], "started")
+
+    ptr = ccall(:malloc, Ptr{UInt}, (Csize_t,), 256)
+    ccall(:free, Cvoid, (Ptr{UInt},), ptr)
+    ccall(:free, Cvoid, (Ptr{UInt},), ptr)
+
+    @error "Failed to trigger ASAN"
+    """
+
+    local proc
+    timeout = Threads.Atomic{Bool}(false)
+    isstarted = false
+    mktemp() do tmppath, tmpio
+        cmd = `$julia -e $code $tmppath`
+        # Note: Ideally, we set ASAN_SYMBOLIZER_PATH here. But there is no easy
+        # way to find out the path from just a Julia binary.
+
+        @debug "Starting a process" cmd
+        proc = run(pipeline(cmd; stdout, stderr); wait = false)
+        timer = Timer(10)
+        @sync try
+            @async begin
+                try
+                    wait(timer)
+                    true
+                catch err
+                    err isa EOFError || rethrow()
+                    false
+                end && begin
+                    timeout[] = true
+                    kill(proc)
+                end
+            end
+            wait(proc)
+        finally
+            close(timer)
+        end
+
+        # At the very beginning of the process, the `julia` subprocess put a
+        # marker that it is successfully started. This is to avoid mixing
+        # non-functional `julia` binary (or even non-`julia` command) and
+        # correctly working `julia` with ASAN:
+        isstarted = read(tmpio, String) == "started"
+    end
+
+    if timeout[]
+        @error "Timeout waiting for the subprocess"
+        return 1
+    elseif success(proc)
+        @error "ASAN was not triggered"
+        return 1
+    elseif !isstarted
+        @error "Failed to start the process"
+        return 1
+    else
+        @info "ASAN is functional in the Julia binary `$julia`"
+        return 0
+    end
+end
+
+if abspath(PROGRAM_FILE) == @__FILE__
+    exit(main())
+end
diff --git a/contrib/bpftrace/gc_all.bt b/contrib/bpftrace/gc_all.bt
new file mode 100755
index 00000000000000..f78e8f3aa607d8
--- /dev/null
+++ b/contrib/bpftrace/gc_all.bt
@@ -0,0 +1,44 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    $now = nsecs;
+    @time[pid] = $now;
+    @start[pid] = $now;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
+/@start[pid]/
+{
+    $now = nsecs;
+    @stop_the_world_usecs[pid] = hist(($now - @time[pid]) / 1000);
+    @time[pid] = $now;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__end
+/@start[pid]/
+{
+    $now = nsecs;
+    @gc_total_usecs[pid] = hist(($now - @start[pid]) / 1000);
+    @gc_phase_usecs[pid] = hist(($now - @time[pid]) / 1000);
+    @time[pid] = $now;
+    delete(@start[pid]);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__finalizer
+/@time[pid]/
+{
+    @finalizer[pid] = hist((nsecs - @time[pid]) / 1000);
+    delete(@time[pid]);
+}
+
+END
+{
+    clear(@start);
+    clear(@time);
+}
diff --git a/contrib/bpftrace/gc_simple.bt b/contrib/bpftrace/gc_simple.bt
new file mode 100755
index 00000000000000..559f41c41cf72c
--- /dev/null
+++ b/contrib/bpftrace/gc_simple.bt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Times... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    @start[pid] = nsecs;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__end
+/@start[pid]/
+{
+    @usecs[pid] = hist((nsecs - @start[pid]) / 1000);
+    delete(@start[pid]);
+}
+
+END
+{
+    clear(@start);
+}
diff --git a/contrib/bpftrace/gc_stop_the_world_latency.bt b/contrib/bpftrace/gc_stop_the_world_latency.bt
new file mode 100755
index 00000000000000..8e541bcb421e2d
--- /dev/null
+++ b/contrib/bpftrace/gc_stop_the_world_latency.bt
@@ -0,0 +1,23 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+    @start[pid] = nsecs;
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:gc__stop_the_world
+/@start[pid]/
+{
+    @usecs[pid] = hist((nsecs - @start[pid]) / 1000);
+    delete(@start[pid]);
+}
+
+END
+{
+    clear(@start);
+}
diff --git a/contrib/bpftrace/rt_all.bt b/contrib/bpftrace/rt_all.bt
new file mode 100755
index 00000000000000..d4de28e354a508
--- /dev/null
+++ b/contrib/bpftrace/rt_all.bt
@@ -0,0 +1,81 @@
+#!/usr/bin/env bpftrace
+
+BEGIN
+{
+    printf("Tracing Julia Task events... Hit Ctrl-C to end.\n");
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__run__task
+{
+    printf("Task running: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__pause__task
+{
+    printf("Task pausing: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__new__task
+{
+    printf("Task created: %x (Parent %x)\n", arg1, arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__start__task
+{
+    printf("Task started: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__finish__task
+{
+    printf("Task finished: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__start__process__events
+{
+    printf("Task processing libuv events: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__finish__process__events
+{
+    printf("Task processed libuv events: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__taskq__insert
+{
+    printf("Thread %x inserting task to multiq: %x\n", arg0, arg1);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__taskq__get
+{
+    printf("Thread %x popped task from multiq: %x\n", arg0, arg1);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__wake
+{
+    printf("Thread waking: %x (was sleeping?: %d)\n", arg0, arg1);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__wakeup
+{
+    printf("Thread wakeup: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__sleep
+{
+    printf("Thread trying to sleep: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__taskq__wake
+{
+    printf("Thread waking due to non-empty task queue: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__task__wake
+{
+    printf("Thread waking due to popped task: %x\n", arg0);
+}
+
+usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__uv__wake
+{
+    printf("Thread waking due to libuv: %x\n", arg0);
+}
diff --git a/contrib/check-whitespace.jl b/contrib/check-whitespace.jl
new file mode 100755
index 00000000000000..4d078d400daea4
--- /dev/null
+++ b/contrib/check-whitespace.jl
@@ -0,0 +1,55 @@
+#!/usr/bin/env julia
+
+const patterns = split("""
+    *.1
+    *.c
+    *.cpp
+    *.h
+    *.inc
+    *.jl
+    *.lsp
+    *.make
+    *.md
+    *.mk
+    *.rst
+    *.scm
+    *.sh
+    *.yml
+    *Makefile
+""")
+
+const errors = Set{Tuple{String,Int,String}}()
+
+for path in eachline(`git ls-files -- $patterns`)
+    lineno = 0
+    non_blank = 0
+
+    file_err(msg) = push!(errors, (path, 0, msg))
+    line_err(msg) = push!(errors, (path, lineno, msg))
+
+    for line in eachline(path, keep=true)
+        lineno += 1
+        contains(line, '\r')   && file_err("non-UNIX line endings")
+        contains(line, '\ua0') && line_err("non-breaking space")
+        endswith(line, '\n')   || line_err("no trailing newline")
+        line = chomp(line)
+        endswith(line, r"\s")  && line_err("trailing whitespace")
+        contains(line, r"\S")  && (non_blank = lineno)
+    end
+    non_blank < lineno         && line_err("trailing blank lines")
+end
+
+if isempty(errors)
+    println(stderr, "Whitespace check found no issues.")
+    exit(0)
+else
+    println(stderr, "Whitespace check found $(length(errors)) issues:")
+    for (path, lineno, msg) in sort!(collect(errors))
+        if lineno == 0
+            println(stderr, "$path -- $msg")
+        else
+            println(stderr, "$path:$lineno -- $msg")
+        end
+    end
+    exit(1)
+end
diff --git a/contrib/check-whitespace.sh b/contrib/check-whitespace.sh
deleted file mode 100755
index c380d7bdd29691..00000000000000
--- a/contrib/check-whitespace.sh
+++ /dev/null
@@ -1,37 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Check for trailing white space in source files;
-# report an error if so
-
-# Files to check:
-set -f # disable glob expansion in this script
-file_patterns='
-*.1
-*.c
-*.cpp
-*.h
-*.jl
-*.lsp
-*.scm
-*.inc
-*.make
-*.mk
-*.md
-*.rst
-*.sh
-*.yml
-*Makefile
-'
-
-# TODO: Look also for trailing empty lines, and missing '\n' after the last line
-if git --no-pager grep --color -n --full-name -e ' $' -- $file_patterns; then
-    echo "Error: trailing whitespace found in source file(s)"
-    echo ""
-    echo "This can often be fixed with:"
-    echo "    git rebase --whitespace=fix HEAD~1"
-    echo "or"
-    echo "    git rebase --whitespace=fix master"
-    echo "and then a forced push of the correct branch"
-    exit 1
-fi
diff --git a/contrib/codesign.sh b/contrib/codesign.sh
new file mode 100755
index 00000000000000..03866c4bb1ac1b
--- /dev/null
+++ b/contrib/codesign.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Codesign binary files for macOS.
+
+usage() {
+    echo "Usage: ${0} MACOS_CODESIGN_IDENTITY FILE-OR-DIRECTORY"
+    exit 0
+}
+
+# Default codesign identity to `-` if not provided
+if [ -z "${1}" ]; then
+    MACOS_CODESIGN_IDENTITY="-"
+    ENTITLEMENTS=""
+else
+    MACOS_CODESIGN_IDENTITY="${1}"
+    ENTITLEMENTS="--entitlements $(dirname "${0}")/mac/app/Entitlements.plist"
+fi
+
+if [ "${#}" -eq 2 ]; then
+    if [ -f "${2}" ]; then
+        # Codesign only the given file
+        MACHO_FILES="${2}"
+    elif [ -d "${2}" ]; then
+        # Find all files in the given directory
+        MACHO_FILES=$(find "${2}" -type f -perm -0111 | cut -d: -f1)
+    else
+        usage
+    fi
+else
+    usage
+fi
+
+echo "Codesigning with identity ${MACOS_CODESIGN_IDENTITY}"
+for f in ${MACHO_FILES}; do
+    echo "Codesigning ${f}..."
+    codesign -s "${MACOS_CODESIGN_IDENTITY}" --option=runtime ${ENTITLEMENTS} -vvv --timestamp --deep --force "${f}"
+done
diff --git a/contrib/download_cmake.sh b/contrib/download_cmake.sh
index d122e1e0f07d45..1deeb08ddded24 100755
--- a/contrib/download_cmake.sh
+++ b/contrib/download_cmake.sh
@@ -8,31 +8,38 @@ mkdir -p "$(dirname "$0")"/../deps/scratch
 cd "$(dirname "$0")"/../deps/scratch
 
 CMAKE_VERSION_MAJOR=3
-CMAKE_VERSION_MINOR=7
-CMAKE_VERSION_PATCH=1
+CMAKE_VERSION_MINOR=19
+CMAKE_VERSION_PATCH=3
 CMAKE_VERSION_MAJMIN=$CMAKE_VERSION_MAJOR.$CMAKE_VERSION_MINOR
 CMAKE_VERSION=$CMAKE_VERSION_MAJMIN.$CMAKE_VERSION_PATCH
 
 # listed at https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/cmake-$CMAKE_VERSION-SHA-256.txt
-# for the files cmake-$CMAKE_VERSION-Darwin-x86_64.tar.gz
-# and cmake-$CMAKE_VERSION-Linux-x86_64.tar.gz
-CMAKE_SHA256_DARWIN=1851d1448964893fdc5a8c05863326119f397a3790e0c84c40b83499c7960267
-CMAKE_SHA256_LINUX=7b4b7a1d9f314f45722899c0521c261e4bfab4a6b532609e37fef391da6bade2
+# for the files cmake-$CMAKE_VERSION-macos-universal.tar.gz
+# cmake-$CMAKE_VERSION-Linux-x86_64.tar.gz and cmake-$CMAKE_VERSION-Linux-aarch64.tar.gz
+CMAKE_SHA256_DARWIN=a6b79ad05f89241a05797510e650354d74ff72cc988981cdd1eb2b3b2bda66ac
+CMAKE_SHA256_LINUX_X86_64=c18b65697e9679e5c88dccede08c323cd3d3730648e59048047bba82097e0ffc
+CMAKE_SHA256_LINUX_AARCH64=66e507c97ffb586d7ca6567890808b792c8eb004b645706df6fbf27826a395a2
 
 PLATFORM="$(uname)-$(uname -m)"
-FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
 case $PLATFORM in
-  Darwin-x86_64)
+  Darwin-*)
+    FULLNAME=cmake-$CMAKE_VERSION-macos-universal
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
     echo "$CMAKE_SHA256_DARWIN  $FULLNAME.tar.gz" | shasum -a 256 -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/CMake.app/Contents/bin/cmake;;
   Linux-x86_64)
+    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
     ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
-    echo "$CMAKE_SHA256_LINUX  $FULLNAME.tar.gz" | sha256sum -c -
+    echo "$CMAKE_SHA256_LINUX_X86_64  $FULLNAME.tar.gz" | sha256sum -c -
+    CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
+  Linux-aarch64)
+    FULLNAME=cmake-$CMAKE_VERSION-$PLATFORM
+    ../tools/jldownload https://cmake.org/files/v$CMAKE_VERSION_MAJMIN/$FULLNAME.tar.gz
+    echo "$CMAKE_SHA256_LINUX_AARCH64  $FULLNAME.tar.gz" | sha256sum -c -
     CMAKE_EXTRACTED_PATH=$FULLNAME/bin/cmake;;
   *)
-    echo "This script only supports x86_64 Mac and Linux. For other platforms," >&2
-    echo "get cmake from your package manager or compile it from source." >&2
+    echo "This script only supports Mac and Linux, both for x86_64 and aarch64." >&2
+    echo "For other platforms, get cmake from your package manager or compile it from source." >&2
     exit 1;;
 esac
 
diff --git a/contrib/filterArgs.sh b/contrib/filterArgs.sh
deleted file mode 100755
index 823745e004e6ea..00000000000000
--- a/contrib/filterArgs.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Loop over all command line arguments
-for i in "$@"; do
-    # If an argument starts with -L, echo it out sans -L!
-    case $i in
-    -L*) printf '"%s"\n' "${i#-L}" ;;
-    esac
-done
diff --git a/contrib/fixup-libgfortran.sh b/contrib/fixup-libgfortran.sh
index 897a1a0119e94d..6121665fb5a869 100755
--- a/contrib/fixup-libgfortran.sh
+++ b/contrib/fixup-libgfortran.sh
@@ -14,7 +14,7 @@ debug() { :; }
 fi
 
 if [ -z "$1" ]; then
-    echo "Usage: $0 <private_libdir>"
+    echo "Usage: $0 [--verbose] <private_libdir>"
     exit 1
 fi
 
@@ -160,4 +160,3 @@ for lib in libopenblas libcholmod liblapack $SONAMES; do
         done
     done
 done
-
diff --git a/contrib/fixup-libstdc++.sh b/contrib/fixup-libstdc++.sh
index ee84094169b61c..1c19d98a54b1e4 100755
--- a/contrib/fixup-libstdc++.sh
+++ b/contrib/fixup-libstdc++.sh
@@ -3,7 +3,7 @@
 
 # Run as: fixup-libstdc++.sh <libdir> <private_libdir>
 
-if [ -z "$1" ]; then
+if [ "$#" -ne 2 ]; then
     echo "Usage: $0 <libdir> <private_libdir>"
     exit 1
 fi
@@ -11,8 +11,8 @@ fi
 libdir="$1"
 private_libdir="$2"
 
-if [ ! -f "$libdir/libjulia.so" ]; then
-    echo "ERROR: Could not open $libdir/libjulia.so" >&2
+if [ ! -f "$private_libdir/libjulia-internal.so" ]; then
+    echo "ERROR: Could not open $private_libdir/libjulia-internal.so" >&2
     exit 2
 fi
 
@@ -24,7 +24,7 @@ find_shlib ()
 }
 
 # Discover libstdc++ location and name
-LIBSTD=$(find_shlib "$libdir/libjulia.so" "libstdc++.so")
+LIBSTD=$(find_shlib "$private_libdir/libjulia-internal.so" "libstdc++.so")
 LIBSTD_NAME=$(basename $LIBSTD)
 LIBSTD_DIR=$(dirname $LIBSTD)
 
diff --git a/contrib/generate_precompile.jl b/contrib/generate_precompile.jl
index d52e16036801e6..a10d195229cabf 100644
--- a/contrib/generate_precompile.jl
+++ b/contrib/generate_precompile.jl
@@ -1,6 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-if isempty(ARGS) || ARGS[1] !== "0"
+if Threads.nthreads() != 1
+    @warn "Running this file with multiple Julia threads may lead to a build error" Threads.nthreads()
+end
+
+if Base.isempty(Base.ARGS) || Base.ARGS[1] !== "0"
 Sys.__init_build()
 # Prevent this from being put into the Main namespace
 @eval Module() begin
@@ -9,6 +13,7 @@ if !isdefined(Base, :uv_eventloop)
 end
 Base.include(@__MODULE__, joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testhelpers", "FakePTYs.jl"))
 import .FakePTYs: open_fake_pty
+using Base.Meta
 
 CTRL_C = '\x03'
 UP_ARROW = "\e[A"
@@ -16,24 +21,43 @@ DOWN_ARROW = "\e[B"
 
 hardcoded_precompile_statements = """
 # used by Revise.jl
-@assert precompile(Tuple{typeof(Base.parse_cache_header), String})
-@assert precompile(Tuple{typeof(pushfirst!), Vector{Any}, Function})
+precompile(Tuple{typeof(Base.parse_cache_header), String})
+precompile(Base.read_dependency_src, (String, String))
+
 # used by Requires.jl
-@assert precompile(Tuple{typeof(get!), Type{Vector{Function}}, Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
-@assert precompile(Tuple{typeof(haskey), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
-@assert precompile(Tuple{typeof(delete!), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
-@assert precompile(Tuple{typeof(push!), Vector{Function}, Function})
+precompile(Tuple{typeof(get!), Type{Vector{Function}}, Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
+precompile(Tuple{typeof(haskey), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
+precompile(Tuple{typeof(delete!), Dict{Base.PkgId,Vector{Function}}, Base.PkgId})
+precompile(Tuple{typeof(push!), Vector{Function}, Function})
+
 # miscellaneous
-@assert precompile(Tuple{typeof(Base.require), Base.PkgId})
-@assert precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
-@assert precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
+precompile(Tuple{typeof(Base.require), Base.PkgId})
+precompile(Tuple{typeof(Base.recursive_prefs_merge), Base.Dict{String, Any}})
+precompile(Tuple{typeof(isassigned), Core.SimpleVector, Int})
+precompile(Tuple{typeof(getindex), Core.SimpleVector, Int})
+precompile(Tuple{typeof(Base.Experimental.register_error_hint), Any, Type})
+precompile(Tuple{typeof(Base.display_error), Base.ExceptionStack})
+precompile(Tuple{Core.kwftype(typeof(Type)), NamedTuple{(:sizehint,), Tuple{Int}}, Type{IOBuffer}})
+precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel, String, Module))
+precompile(Base.CoreLogging.current_logger_for_env, (Base.CoreLogging.LogLevel, Symbol, Module))
+precompile(Base.CoreLogging.env_override_minlevel, (Symbol, Module))
+precompile(Base.StackTraces.lookup, (Ptr{Nothing},))
 """
 
-precompile_script = """
+for T in (Float16, Float32, Float64), IO in (IOBuffer, IOContext{IOBuffer}, Base.TTY, IOContext{Base.TTY})
+    global hardcoded_precompile_statements
+    hardcoded_precompile_statements *= "precompile(Tuple{typeof(show), $IO, $T})\n"
+end
+
+repl_script = """
 2+2
 print("")
+printstyled("a", "b")
+display([1])
+display([1 2; 3 4])
 @time 1+1
 ; pwd
+$CTRL_C
 ? reinterpret
 using Ra\t$CTRL_C
 \\alpha\t$CTRL_C
@@ -45,13 +69,41 @@ f(x) = x03
 f(1,2)
 [][1]
 cd("complet_path\t\t$CTRL_C
-# Used by JuliaInterpreter
-push!(Set{Module}(), Main)
-push!(Set{Method}(), first(methods(collect)))
-# Used by Revise
-(setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
-(setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
-get(Base.pkgorigins, Base.PkgId(Base), nothing)
+"""
+
+precompile_script = """
+# NOTE: these were moved to the end of Base.jl. TODO: move back here.
+# # Used by Revise & its dependencies
+# while true  # force inference
+# delete!(push!(Set{Module}(), Base), Main)
+# m = first(methods(+))
+# delete!(push!(Set{Method}(), m), m)
+# empty!(Set())
+# push!(push!(Set{Union{GlobalRef,Symbol}}(), :two), GlobalRef(Base, :two))
+# (setindex!(Dict{String,Base.PkgId}(), Base.PkgId(Base), "file.jl"))["file.jl"]
+# (setindex!(Dict{Symbol,Vector{Int}}(), [1], :two))[:two]
+# (setindex!(Dict{Base.PkgId,String}(), "file.jl", Base.PkgId(Base)))[Base.PkgId(Base)]
+# (setindex!(Dict{Union{GlobalRef,Symbol}, Vector{Int}}(), [1], :two))[:two]
+# (setindex!(IdDict{Type, Union{Missing, Vector{Tuple{LineNumberNode, Expr}}}}(), missing, Int))[Int]
+# Dict{Symbol, Union{Nothing, Bool, Symbol}}(:one => false)[:one]
+# Dict(Base => [:(1+1)])[Base]
+# Dict(:one => [1])[:one]
+# Dict("abc" => Set())["abc"]
+# pushfirst!([], sum)
+# get(Base.pkgorigins, Base.PkgId(Base), nothing)
+# sort!([1,2,3])
+# unique!([1,2,3])
+# cumsum([1,2,3])
+# append!(Int[], BitSet())
+# isempty(BitSet())
+# delete!(BitSet([1,2]), 3)
+# deleteat!(Int32[1,2,3], [1,3])
+# deleteat!(Any[1,2,3], [1,3])
+# Core.svec(1, 2) == Core.svec(3, 4)
+# # copy(Core.Compiler.retrieve_code_info(Core.Compiler.specialize_method(which(+, (Int, Int)), [Int, Int], Core.svec())))
+# any(t->t[1].line > 1, [(LineNumberNode(2,:none),:(1+1))])
+# break   # end force inference
+# end
 """
 
 julia_exepath() = joinpath(Sys.BINDIR, Base.julia_exename())
@@ -60,7 +112,7 @@ have_repl =  haskey(Base.loaded_modules,
                     Base.PkgId(Base.UUID("3fa0cd96-eef1-5676-8a61-b3b8758bbffb"), "REPL"))
 if have_repl
     hardcoded_precompile_statements *= """
-    @assert precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
+    precompile(Tuple{typeof(getproperty), REPL.REPLBackend, Symbol})
     """
 end
 
@@ -68,25 +120,39 @@ Distributed = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
           nothing)
 if Distributed !== nothing
+    hardcoded_precompile_statements *= """
+    precompile(Tuple{typeof(Distributed.remotecall),Function,Int,Module,Vararg{Any, 100}})
+    precompile(Tuple{typeof(Distributed.procs)})
+    precompile(Tuple{typeof(Distributed.finalize_ref), Distributed.Future})
+    """
+# This is disabled because it doesn't give much benefit
+# and the code in Distributed is poorly typed causing many invalidations
+#=
     precompile_script *= """
     using Distributed
     addprocs(2)
     pmap(x->iseven(x) ? 1 : 0, 1:4)
     @distributed (+) for i = 1:100 Int(rand(Bool)) end
     """
+=#
 end
 
+
 Artifacts = get(Base.loaded_modules,
           Base.PkgId(Base.UUID("56f22d72-fd6d-98f1-02f0-08ddc0907c33"), "Artifacts"),
           nothing)
 if Artifacts !== nothing
     precompile_script *= """
-    using Artifacts, Base.BinaryPlatforms
-    artifacts_toml = abspath($(repr(joinpath(Sys.STDLIB, "Artifacts", "test", "Artifacts.toml"))))
-    cd(() -> @artifact_str("c_simple"), dirname(artifacts_toml))
+    using Artifacts, Base.BinaryPlatforms, Libdl
+    artifacts_toml = abspath(joinpath(Sys.STDLIB, "Artifacts", "test", "Artifacts.toml"))
+    artifact_hash("HelloWorldC", artifacts_toml)
+    oldpwd = pwd(); cd(dirname(artifacts_toml))
+    macroexpand(Main, :(@artifact_str("HelloWorldC")))
+    cd(oldpwd)
     artifacts = Artifacts.load_artifacts_toml(artifacts_toml)
-    platforms = [Artifacts.unpack_platform(e, "c_simple", artifacts_toml) for e in artifacts["c_simple"]]
+    platforms = [Artifacts.unpack_platform(e, "HelloWorldC", artifacts_toml) for e in artifacts["HelloWorldC"]]
     best_platform = select_platform(Dict(p => triplet(p) for p in platforms))
+    dlopen("libjulia$(ccall(:jl_is_debugbuild, Cint, ()) != 0 ? "-debug" : "")", RTLD_LAZY | RTLD_DEEPBIND)
     """
 end
 
@@ -96,7 +162,8 @@ Pkg = get(Base.loaded_modules,
           nothing)
 
 if Pkg !== nothing
-    precompile_script *= Pkg.precompile_script
+    # TODO: Split Pkg precompile script into REPL and script part
+    repl_script *= Pkg.precompile_script
 end
 
 FileWatching = get(Base.loaded_modules,
@@ -104,8 +171,9 @@ FileWatching = get(Base.loaded_modules,
           nothing)
 if FileWatching !== nothing
     hardcoded_precompile_statements *= """
-    @assert precompile(Tuple{typeof(FileWatching.watch_file), String, Float64})
-    @assert precompile(Tuple{typeof(FileWatching.watch_file), String, Int})
+    precompile(Tuple{typeof(FileWatching.watch_file), String, Float64})
+    precompile(Tuple{typeof(FileWatching.watch_file), String, Int})
+    precompile(Tuple{typeof(FileWatching._uv_hook_close), FileWatching.FileMonitor})
     """
 end
 
@@ -118,28 +186,103 @@ if Libdl !== nothing
     """
 end
 
+Test = get(Base.loaded_modules,
+          Base.PkgId(Base.UUID("8dfed614-e22c-5e08-85e1-65c5234f0b40"), "Test"),
+          nothing)
+if Test !== nothing
+    hardcoded_precompile_statements *= """
+    precompile(Tuple{typeof(Test.do_test), Test.ExecutionResult, Any})
+    precompile(Tuple{typeof(Test.testset_beginend_call), Tuple{String, Expr}, Expr, LineNumberNode})
+    precompile(Tuple{Type{Test.DefaultTestSet}, String})
+    precompile(Tuple{Type{Test.DefaultTestSet}, AbstractString})
+    precompile(Tuple{Core.kwftype(Type{Test.DefaultTestSet}), Any, Type{Test.DefaultTestSet}, AbstractString})
+    precompile(Tuple{typeof(Test.finish), Test.DefaultTestSet})
+    precompile(Tuple{typeof(Test.eval_test), Expr, Expr, LineNumberNode, Bool})
+    precompile(Tuple{typeof(Test._inferred), Expr, Module})
+    precompile(Tuple{typeof(Test.push_testset), Test.DefaultTestSet})
+    precompile(Tuple{typeof(Test.get_alignment), Test.DefaultTestSet, Int})
+    precompile(Tuple{typeof(Test.get_test_result), Any, Any})
+    precompile(Tuple{typeof(Test.do_test_throws), Test.ExecutionResult, Any, Any})
+    precompile(Tuple{typeof(Test.print_counts), Test.DefaultTestSet, Int, Int, Int, Int, Int, Int, Int})
+    precompile(Tuple{typeof(Test._check_testset), Type, Expr})
+    precompile(Tuple{typeof(Test.test_expr!), Any, Any})
+    precompile(Tuple{typeof(Test.test_expr!), Any, Any, Vararg{Any, 100}})
+    precompile(Tuple{typeof(Test.pop_testset)})
+    precompile(Tuple{typeof(Test.match_logs), Function, Tuple{Symbol, Regex}})
+    precompile(Tuple{typeof(Test.match_logs), Function, Tuple{String, Regex}})
+    precompile(Tuple{typeof(Base.CoreLogging.shouldlog), Test.TestLogger, Base.CoreLogging.LogLevel, Module, Symbol, Symbol})
+    precompile(Tuple{typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
+    precompile(Tuple{typeof(Core.kwfunc(Base.CoreLogging.handle_message)), typeof((exception=nothing,)), typeof(Base.CoreLogging.handle_message), Test.TestLogger, Base.CoreLogging.LogLevel, String, Module, Symbol, Symbol, String, Int})
+    precompile(Tuple{typeof(Test.detect_ambiguities), Any})
+    precompile(Tuple{typeof(Test.collect_test_logs), Function})
+    precompile(Tuple{typeof(Test.do_broken_test), Test.ExecutionResult, Any})
+    precompile(Tuple{typeof(Test.record), Test.DefaultTestSet, Union{Test.Error, Test.Fail}})
+    precompile(Tuple{typeof(Test.filter_errors), Test.DefaultTestSet})
+    """
+end
+
+Profile = get(Base.loaded_modules,
+          Base.PkgId(Base.UUID("9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"), "Profile"),
+          nothing)
+if Profile !== nothing
+    repl_script *= Profile.precompile_script
+    hardcoded_precompile_statements *= """
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Int, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UInt})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, UnitRange{Int}, UnitRange{UInt}})
+    precompile(Tuple{typeof(Profile.tree!), Profile.StackFrameTree{UInt64}, Vector{UInt64}, Dict{UInt64, Vector{Base.StackTraces.StackFrame}}, Bool, Symbol, Vector{Int}, Vector{UInt}})
+    """
+end
+
+const JULIA_PROMPT = "julia> "
+const PKG_PROMPT = "pkg> "
+const SHELL_PROMPT = "shell> "
+const HELP_PROMPT = "help?> "
+
 function generate_precompile_statements()
     start_time = time_ns()
     debug_output = devnull # or stdout
+    sysimg = Base.unsafe_string(Base.JLOptions().image_file)
 
-    # Precompile a package
+    # Extract the precompile statements from the precompile file
+    statements = Set{String}()
+
+    # From hardcoded statements
+    for statement in split(hardcoded_precompile_statements::String, '\n')
+        push!(statements, statement)
+    end
+
+    # Collect statements from running the script
     mktempdir() do prec_path
-        push!(DEPOT_PATH, prec_path)
-        push!(LOAD_PATH, prec_path)
+        # Also precompile a package here
         pkgname = "__PackagePrecompilationStatementModule"
         mkpath(joinpath(prec_path, pkgname, "src"))
-        write(joinpath(prec_path, pkgname, "src", "$pkgname.jl"),
+        path = joinpath(prec_path, pkgname, "src", "$pkgname.jl")
+        write(path,
               """
               module $pkgname
               end
               """)
-        @eval using __PackagePrecompilationStatementModule
-        empty!(LOAD_PATH)
-        empty!(DEPOT_PATH)
+        tmp_prec = tempname(prec_path)
+        tmp_proc = tempname(prec_path)
+        s = """
+            pushfirst!(DEPOT_PATH, $(repr(prec_path)));
+            Base.PRECOMPILE_TRACE_COMPILE[] = $(repr(tmp_prec));
+            Base.compilecache(Base.PkgId($(repr(pkgname))), $(repr(path)))
+            $precompile_script
+            """
+        run(`$(julia_exepath()) -O0 --sysimage $sysimg --trace-compile=$tmp_proc --startup-file=no -Cnative -e $s`)
+        for f in (tmp_prec, tmp_proc)
+            for statement in split(read(f, String), '\n')
+                occursin("Main.", statement) && continue
+                push!(statements, statement)
+            end
+        end
     end
 
     mktemp() do precompile_file, precompile_file_h
-        # Run a repl process and replay our script
+        # Collect statements from running a REPL process and replaying our REPL script
         pts, ptm = open_fake_pty()
         blackhole = Sys.isunix() ? "/dev/null" : "nul"
         if have_repl
@@ -152,12 +295,10 @@ function generate_precompile_statements()
         p = withenv("JULIA_HISTORY" => blackhole,
                     "JULIA_PROJECT" => nothing, # remove from environment
                     "JULIA_LOAD_PATH" => Sys.iswindows() ? "@;@stdlib" : "@:@stdlib",
+                    "JULIA_PKG_PRECOMPILE_AUTO" => "0",
                     "TERM" => "") do
-            sysimg = Base.unsafe_string(Base.JLOptions().image_file)
             run(```$(julia_exepath()) -O0 --trace-compile=$precompile_file --sysimage $sysimg
-                   --cpu-target=native --startup-file=no --color=yes
-                   -e 'import REPL; REPL.Terminals.is_precompiling[] = true'
-                   -i $cmdargs```,
+                   --cpu-target=native --startup-file=no -i $cmdargs```,
                    pts, pts, pts; wait=false)
         end
         Base.close_stdio(pts)
@@ -170,27 +311,26 @@ function generate_precompile_statements()
                 Sys.iswindows() && (sleep(0.1); yield(); yield()) # workaround hang - probably a libuv issue?
                 write(output_copy, l)
             end
-            close(output_copy)
-            close(ptm)
         catch ex
-            close(output_copy)
-            close(ptm)
             if !(ex isa Base.IOError && ex.code == Base.UV_EIO)
                 rethrow() # ignore EIO on ptm after pts dies
             end
+        finally
+            close(output_copy)
+            close(ptm)
         end
         # wait for the definitive prompt before start writing to the TTY
-        readuntil(output_copy, "julia>")
+        readuntil(output_copy, JULIA_PROMPT)
         sleep(0.1)
         readavailable(output_copy)
         # Input our script
         if have_repl
-            precompile_lines = split(precompile_script, '\n'; keepempty=false)
+            precompile_lines = split(repl_script::String, '\n'; keepempty=false)
             curr = 0
             for l in precompile_lines
                 sleep(0.1)
                 curr += 1
-                print("\rGenerating precompile statements... $curr/$(length(precompile_lines))")
+                print("\rGenerating REPL precompile statements... $curr/$(length(precompile_lines))")
                 # consume any other output
                 bytesavailable(output_copy) > 0 && readavailable(output_copy)
                 # push our input
@@ -198,9 +338,16 @@ function generate_precompile_statements()
                 write(ptm, l, "\n")
                 readuntil(output_copy, "\n")
                 # wait for the next prompt-like to appear
-                # NOTE: this is rather inaccurate because the Pkg REPL mode is a special flower
                 readuntil(output_copy, "\n")
-                readuntil(output_copy, "> ")
+                strbuf = ""
+                while true
+                    strbuf *= String(readavailable(output_copy))
+                    occursin(JULIA_PROMPT, strbuf) && break
+                    occursin(PKG_PROMPT, strbuf) && break
+                    occursin(SHELL_PROMPT, strbuf) && break
+                    occursin(HELP_PROMPT, strbuf) && break
+                    sleep(0.1)
+                end
             end
             println()
         end
@@ -210,54 +357,77 @@ function generate_precompile_statements()
         close(ptm)
         write(debug_output, "\n#### FINISHED ####\n")
 
-        # Extract the precompile statements from the precompile file
-        statements = Set{String}()
-        for statement in eachline(precompile_file_h)
+        for statement in split(read(precompile_file, String), '\n')
             # Main should be completely clean
             occursin("Main.", statement) && continue
             push!(statements, statement)
         end
+    end
 
-        for statement in split(hardcoded_precompile_statements, '\n')
-            push!(statements, statement)
-        end
-
-        # Create a staging area where all the loaded packages are available
-        PrecompileStagingArea = Module()
-        for (_pkgid, _mod) in Base.loaded_modules
-            if !(_pkgid.name in ("Main", "Core", "Base"))
-                eval(PrecompileStagingArea, :(const $(Symbol(_mod)) = $_mod))
-            end
+    # Create a staging area where all the loaded packages are available
+    PrecompileStagingArea = Module()
+    for (_pkgid, _mod) in Base.loaded_modules
+        if !(_pkgid.name in ("Main", "Core", "Base"))
+            eval(PrecompileStagingArea, :(const $(Symbol(_mod)) = $_mod))
         end
+    end
 
-        # Execute the collected precompile statements
-        n_succeeded = 0
-        include_time = @elapsed for statement in sort(collect(statements))
-            # println(statement)
-            try
-                Base.include_string(PrecompileStagingArea, statement)
-                n_succeeded += 1
-                print("\rExecuting precompile statements... $n_succeeded/$(length(statements))")
-            catch
-                # See #28808
-                # @error "Failed to precompile $statement"
+    # Execute the collected precompile statements
+    n_succeeded = 0
+    include_time = @elapsed for statement in sort!(collect(statements))
+        # println(statement)
+        # XXX: skip some that are broken. these are caused by issue #39902
+        occursin("Tuple{Artifacts.var\"#@artifact_str\", LineNumberNode, Module, Any, Any}", statement) && continue
+        occursin("Tuple{Base.Cartesian.var\"#@ncall\", LineNumberNode, Module, Int64, Any, Vararg{Any}}", statement) && continue
+        occursin("Tuple{Base.Cartesian.var\"#@ncall\", LineNumberNode, Module, Int32, Any, Vararg{Any}}", statement) && continue
+        occursin("Tuple{Base.Cartesian.var\"#@nloops\", LineNumberNode, Module, Any, Any, Any, Vararg{Any}}", statement) && continue
+        occursin("Tuple{Core.var\"#@doc\", LineNumberNode, Module, Vararg{Any}}", statement) && continue
+        # XXX: this is strange, as this isn't the correct representation of this
+        occursin("typeof(Core.IntrinsicFunction)", statement) && continue
+        # XXX: this is strange, as this method should not be getting compiled
+        occursin(", Core.Compiler.AbstractInterpreter, ", statement) && continue
+        try
+            ps = Meta.parse(statement)
+            isexpr(ps, :call) || continue
+            popfirst!(ps.args) # precompile(...)
+            ps.head = :tuple
+            l = ps.args[end]
+            if (isexpr(l, :tuple) || isexpr(l, :curly)) && length(l.args) > 0 # Tuple{...} or (...)
+                # XXX: precompile doesn't currently handle overloaded Vararg arguments very well.
+                # Replacing N with a large number works around it.
+                l = l.args[end]
+                if isexpr(l, :curly) && length(l.args) == 2 && l.args[1] === :Vararg # Vararg{T}
+                    push!(l.args, 100) # form Vararg{T, 100} instead
+                end
             end
+            # println(ps)
+            ps = Core.eval(PrecompileStagingArea, ps)
+            # XXX: precompile doesn't currently handle overloaded nospecialize arguments very well.
+            # Skipping them avoids the warning.
+            ms = length(ps) == 1 ? Base._methods_by_ftype(ps[1], 1, Base.get_world_counter()) : Base.methods(ps...)
+            ms isa Vector || continue
+            precompile(ps...)
+            n_succeeded += 1
+            print("\rExecuting precompile statements... $n_succeeded/$(length(statements))")
+        catch ex
+            # See #28808
+            @warn "Failed to precompile expression" form=statement exception=ex _module=nothing _file=nothing _line=0
         end
-        println()
-        if have_repl
-            # Seems like a reasonable number right now, adjust as needed
-            # comment out if debugging script
-            @assert n_succeeded > 1200
-        end
-
-        tot_time = time_ns() - start_time
-        include_time *= 1e9
-        gen_time = tot_time - include_time
-        println("Precompilation complete. Summary:")
-        print("Total ─────── "); Base.time_print(tot_time); println()
-        print("Generation ── "); Base.time_print(gen_time);     print(" "); show(IOContext(stdout, :compact=>true), gen_time / tot_time * 100); println("%")
-        print("Execution ─── "); Base.time_print(include_time); print(" "); show(IOContext(stdout, :compact=>true), include_time / tot_time * 100); println("%")
     end
+    println()
+    if have_repl
+        # Seems like a reasonable number right now, adjust as needed
+        # comment out if debugging script
+        n_succeeded > 1200 || @warn "Only $n_succeeded precompile statements"
+    end
+
+    tot_time = time_ns() - start_time
+    include_time *= 1e9
+    gen_time = tot_time - include_time
+    println("Precompilation complete. Summary:")
+    print("Total ─────── "); Base.time_print(tot_time); println()
+    print("Generation ── "); Base.time_print(gen_time);     print(" "); show(IOContext(stdout, :compact=>true), gen_time / tot_time * 100); println("%")
+    print("Execution ─── "); Base.time_print(include_time); print(" "); show(IOContext(stdout, :compact=>true), include_time / tot_time * 100); println("%")
 
     return
 end
@@ -266,6 +436,7 @@ generate_precompile_statements()
 
 # As a last step in system image generation,
 # remove some references to build time environment for a more reproducible build.
+Base.Filesystem.temp_cleanup_purge(force=true)
 @eval Base PROGRAM_FILE = ""
 @eval Sys begin
     BINDIR = ""
diff --git a/contrib/julia-config.jl b/contrib/julia-config.jl
index d69e09aba05810..9c6e39216d8173 100755
--- a/contrib/julia-config.jl
+++ b/contrib/julia-config.jl
@@ -11,8 +11,6 @@ const options = [
     "--framework"
 ];
 
-threadingOn() = ccall(:jl_threading_enabled, Cint, ()) != 0
-
 function shell_escape(str)
     str = replace(str, "'" => "'\''")
     return "'$str'"
@@ -79,7 +77,7 @@ end
 
 function cflags(doframework)
     flags = IOBuffer()
-    print(flags, "-std=gnu99")
+    print(flags, "-std=gnu11")
     if doframework
         include = shell_escape(frameworkDir())
         print(flags, " -F", include)
diff --git a/contrib/mac/app/Entitlements.plist b/contrib/mac/app/Entitlements.plist
index b84dccb00f95cb..95c1a02d589585 100644
--- a/contrib/mac/app/Entitlements.plist
+++ b/contrib/mac/app/Entitlements.plist
@@ -4,7 +4,7 @@
 <dict>
 	<key>com.apple.security.automation.apple-events</key>
 	<true/>
-	<key>com.apple.security.cs.get-task-allow</key>
+	<key>com.apple.security.get-task-allow</key>
 	<true/>
 	<key>com.apple.security.cs.allow-dyld-environment-variables</key>
 	<true/>
diff --git a/contrib/mac/app/Makefile b/contrib/mac/app/Makefile
index 665b4f1566b6f4..81b7e47cdf2cfa 100644
--- a/contrib/mac/app/Makefile
+++ b/contrib/mac/app/Makefile
@@ -49,9 +49,13 @@ dmg/$(APP_NAME): startup.applescript julia.icns
 	-mkdir -p $@/Contents/Resources/julia
 	make -C $(JULIAHOME) binary-dist
 	tar zxf $(JULIAHOME)/$(JULIA_BINARYDIST_FILENAME).tar.gz -C $@/Contents/Resources/julia --strip-components 1
+	find $@/Contents/Resources/julia -type f -exec chmod -w {} \;
+	# Even though the tarball may already be signed, we re-sign here to make it easier to add
+	# unsigned executables (like the app launcher) and whatnot, without needing to maintain lists
+	# of what is or is not signed.  Codesigning is cheap, so might as well do it early and often.
 	if [ -n "$$MACOS_CODESIGN_IDENTITY" ]; then \
 	    echo "Codesigning with identity $$MACOS_CODESIGN_IDENTITY"; \
-		MACHO_FILES=$$(find "$@" -type f -perm -755 | cut -d: -f1); \
+		MACHO_FILES=$$(find "$@" -type f -perm -0111 | cut -d: -f1); \
 		for f in $${MACHO_FILES}; do \
 			echo "Codesigning $${f}..."; \
 			codesign -s "$$MACOS_CODESIGN_IDENTITY" --option=runtime --entitlements Entitlements.plist -vvv --timestamp --deep --force "$${f}"; \
diff --git a/contrib/mac/app/notarize_check.sh b/contrib/mac/app/notarize_check.sh
index ccb46844abec3c..3cf347e8e84fc9 100755
--- a/contrib/mac/app/notarize_check.sh
+++ b/contrib/mac/app/notarize_check.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # Note that you need to have exported `APPLEID` and `APPLEID_PASSWORD` for this to work.
 
diff --git a/contrib/mac/app/renotarize_dmg.sh b/contrib/mac/app/renotarize_dmg.sh
index 82d7f7872e6708..f0d6d0a197e5f5 100755
--- a/contrib/mac/app/renotarize_dmg.sh
+++ b/contrib/mac/app/renotarize_dmg.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+# This file is a part of Julia. License is MIT: https://julialang.org/license
 
 # We need a URL
 if [[ -z "$1" ]]; then
@@ -12,28 +13,28 @@ if [[ -z "${APPLEID}" ]] || [[ -z "${APPLEID_PASSWORD}" ]]; then
     exit 1
 fi
 
-# Translate from `s3://` URL to `https://` url:
+# Use `aws` to download an `s3://` URL, otherwise use `curl`
 URL="$1"
 if [[ "$URL" == s3://* ]]; then
-    # Chop off `s3://`
-    URL="${URL:5}"
-    # Split into bucket.s3.aws.com/path
-    URL="https://${URL%%/*}.s3.amazonaws.com/${URL#*/}"
+    aws s3 cp "${URL}" .
+elif [[ "${URL}" == http* ]]; then
+    # Download .dmg
+    curl -L "${URL}" -O
+else
+    echo "Unknown URL format: '${URL}'" >&2
+    exit 1
 fi
 
-# Download .dmg
-curl -L "${URL}" -O
-
 # Unpack dmg into our `dmg` folder
 rm -rf dmg
+DMG_NAME=$(basename "${URL}")
 
 # Copy app over to our `dmg` folder
 for j in /Volumes/Julia-*; do hdiutil detach "${j}"; done
-hdiutil mount "$(basename "$1")"
+hdiutil mount "${DMG_NAME}"
 cp -Ra /Volumes/Julia-* dmg
 
-# Override some important Makefile variables
-DMG_NAME=$(basename "$1")
+# Autodetect APP_NAME and VOL_NAME
 APP_NAME=$(basename dmg/*.app)
 VOL_NAME=$(basename /Volumes/Julia-*)
 
@@ -46,3 +47,8 @@ for j in /Volumes/Julia-*; do hdiutil detach "${j}"; done
 
 # Run notarization
 make notarize "DMG_NAME=${DMG_NAME}" "APP_NAME=${APP_NAME}" "VOL_NAME=${VOL_NAME}"
+
+# If it was an s3 bucket, auto-upload it
+if [[ "${URL}" == s3://* ]]; then
+    aws s3 cp --acl public-read "${DMG_NAME}" "${URL}"
+fi
diff --git a/contrib/mac/app/startup.applescript b/contrib/mac/app/startup.applescript
index f02830a3902dce..9964049f34ed6c 100644
--- a/contrib/mac/app/startup.applescript
+++ b/contrib/mac/app/startup.applescript
@@ -1,5 +1,4 @@
-set RootPath to POSIX path of (path to me)
-tell application id "com.apple.terminal"
-  do script ("exec '" & RootPath & "Contents/Resources/julia/bin/julia'")
-  activate
-end tell
+set RootPath to (path to me)
+set JuliaPath to POSIX path of ((RootPath as text) & "Contents:Resources:julia:bin:julia")
+set JuliaFile to POSIX file JuliaPath
+tell application id "com.apple.finder" to open JuliaFile
diff --git a/contrib/mac/framework/Makefile b/contrib/mac/framework/Makefile
index 4449436ac6fd2e..1f8a55b26188ad 100644
--- a/contrib/mac/framework/Makefile
+++ b/contrib/mac/framework/Makefile
@@ -143,7 +143,7 @@ endif
 	# cleanup unnecessary install outputs
 	rm $(DESTDIR)$(datarootdir)/julia/startup.jl
 	rm -rf $(DESTDIR)$(datarootdir)/icons $(DESTDIR)$(datarootdir)/applications $(DESTDIR)$(datarootdir)/appdata
-	find $(DESTDIR)$(prefix)/$(framework_directory) \( -name '.DS_Store' -o -name '.gitignore' -o -name Makefile -o -name .travis.yml -o -name .codecov.yml \) -delete
+	find $(DESTDIR)$(prefix)/$(framework_directory) \( -name '.DS_Store' -o -name '.gitignore' -o -name Makefile -o -name .codecov.yml \) -delete
 
 	# Include Julia's license info
 	$(INSTALL_F) $(JULIAHOME)/LICENSE.md $(DESTDIR)$(prefix)/$(framework_resources)
@@ -159,7 +159,7 @@ endif
 	#NB: must be the last lines of the recipe, else signature may be invalidated.
 
 	# Codesign should look at the embedded Info.plist to get the signing identifier.
-	# See JLDFLAGS in Make.inc for Darwin platform and Info.plist target in ui/Makefile.
+	# See JLDFLAGS in Make.inc for Darwin platform and Info.plist target in cli/Makefile.
 	codesign -s "$(DARWIN_CODESIGN_KEYCHAIN_IDENTITY)" -v $(darwin_codesign_options) $(darwin_codesign_julia_options) $(DESTDIR)$(prefix)/$(framework_helpers)/julia
 ifeq ($(BUNDLE_DEBUG_LIBS),1)
 	codesign -s "$(DARWIN_CODESIGN_KEYCHAIN_IDENTITY)" -v $(darwin_codesign_options) $(darwin_codesign_julia_options) $(DESTDIR)$(prefix)/$(framework_helpers)/julia-debug
diff --git a/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/AppIcon.appiconset/Contents.json b/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/AppIcon.appiconset/Contents.json
index 2fe2dbc16b987d..5071eb935ab9b2 100644
--- a/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/AppIcon.appiconset/Contents.json
+++ b/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/AppIcon.appiconset/Contents.json
@@ -65,4 +65,4 @@
     "version" : 1,
     "author" : "xcode"
   }
-}
\ No newline at end of file
+}
diff --git a/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/Contents.json b/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/Contents.json
index da4a164c918651..2d92bd53fdb222 100644
--- a/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/Contents.json
+++ b/contrib/mac/frameworkapp/JuliaLauncher/Assets.xcassets/Contents.json
@@ -3,4 +3,4 @@
     "version" : 1,
     "author" : "xcode"
   }
-}
\ No newline at end of file
+}
diff --git a/contrib/mac/frameworkapp/Makefile b/contrib/mac/frameworkapp/Makefile
index 93392cd4ec3d02..c94a5be145db9a 100644
--- a/contrib/mac/frameworkapp/Makefile
+++ b/contrib/mac/frameworkapp/Makefile
@@ -76,7 +76,7 @@ $(BUILDROOT)/framework-component.plist: $(JULIAHOME)/contrib/mac/frameworkapp/fr
 # important properties.  Together, the properties allow one "Julia.framework"
 # to exist at a location with multiple versions of Julia within.
 #
-# 1. The component's identifer is versioned to match the bundled framework.
+# 1. The component's identifier is versioned to match the bundled framework.
 # This allows multiple versions of the component to be installed with the
 # Julia.framework/Versions directory.
 # 2. The component-plist identifies the Versions/x.y directory as an upgradable
@@ -116,8 +116,8 @@ signedproductarchive: $(PRODUCTARCHIVE)
 	mv $<.signed $<
 
 clean:
-	-rm -rf $(XCARCHIVE) $(XCDERIVEDDATA) $(XCEXPORT)
-	-rm -rf $(FRAMEWORK_DESTDIR)
+	rm -rf $(XCARCHIVE) $(XCDERIVEDDATA) $(XCEXPORT)
+	rm -rf $(FRAMEWORK_DESTDIR)
 	-rm -f $(PRODUCTARCHIVE)
 
 .PHONY: appexport clean productarchive signedproductarchive
diff --git a/contrib/mac/frameworkapp/README.md b/contrib/mac/frameworkapp/README.md
index 43cee0ee3716a6..94c344d16564fc 100644
--- a/contrib/mac/frameworkapp/README.md
+++ b/contrib/mac/frameworkapp/README.md
@@ -12,7 +12,7 @@ the top of the `Makefile` to set appropriate code signing parameters.
 
 The framework is installed in `/Library/Frameworks` and the app in
 `/Applications`.  Installation may be system-wide (i.e., relative to `/`) or
-local to the user's home directory (i.e., `$Home/Appliations/Julia.app`).
+local to the user's home directory (i.e., `$Home/Applications/Julia.app`).
 
 The `julia` binary is embedded in the framework at
 `Julia.framework/Helpers/julia`.
diff --git a/contrib/mac/frameworkapp/installresources/conclusion.rtf b/contrib/mac/frameworkapp/installresources/conclusion.rtf
index 8d794ae31c04b0..1f3e60f5f52773 100644
--- a/contrib/mac/frameworkapp/installresources/conclusion.rtf
+++ b/contrib/mac/frameworkapp/installresources/conclusion.rtf
@@ -77,4 +77,4 @@ Conclusion\
 \f1 \cb1 \
 \pard\pardeftab720\partightenfactor0
 
-\f2 \cf0 \cb2 ln -s INSTALL_LOCATION/Julia.framework/Helpers/julia DIR_IN_PATH/julia}
\ No newline at end of file
+\f2 \cf0 \cb2 ln -s INSTALL_LOCATION/Julia.framework/Helpers/julia DIR_IN_PATH/julia}
diff --git a/contrib/mac/frameworkapp/installresources/readme.rtf b/contrib/mac/frameworkapp/installresources/readme.rtf
index d555047dd5c1c2..935d9a5f6a5760 100644
--- a/contrib/mac/frameworkapp/installresources/readme.rtf
+++ b/contrib/mac/frameworkapp/installresources/readme.rtf
@@ -28,4 +28,4 @@ Readme\
 \f2 \cb2 $HOME
 \f1 \cb1  usually expands to 
 \f2 \cb2 /Users/username
-\f1 \cb1 ).}
\ No newline at end of file
+\f1 \cb1 ).}
diff --git a/contrib/new-stdlib.sh b/contrib/new-stdlib.sh
new file mode 100755
index 00000000000000..15f82cffb1c46b
--- /dev/null
+++ b/contrib/new-stdlib.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+set -eu # stop on failure
+
+printf -- "Julia Stdlib Creator Helper Wizard\n"
+printf -- "----------------------------------\n"
+
+ROOT=$(dirname "$0")/../stdlib
+read -p "Name: " NAME
+read -p "Github User account (empty for local): " USER
+
+if [ -z "$USER" ]; then
+
+UUID=$(uuidgen | tr [A-Z] [a-z])
+
+sed -e "/^STDLIBS =/,/^\$/s!^\$!\\
+STDLIBS += $NAME\\
+!" "$ROOT/Makefile" >"$ROOT/Makefile.tmp"
+mv "$ROOT/Makefile.tmp" "$ROOT/Makefile"
+
+mkdir "$ROOT/$NAME"
+mkdir "$ROOT/$NAME/src"
+mkdir "$ROOT/$NAME/test"
+
+cat >"$ROOT/$NAME/Project.toml" <<EOF
+name = "$NAME"
+uuid = "$UUID"
+EOF
+
+cat >"$ROOT/$NAME/src/$NAME.jl" <<EOF
+module $NAME
+end
+EOF
+
+cat >"$ROOT/$NAME/test/runtests.jl" <<EOF
+using $NAME
+using Test
+@test "your tests here"
+EOF
+
+git add "$ROOT/$NAME"
+git add -p "$ROOT/Makefile"
+
+else
+
+read -p "Git SHA1 hash of commit: " SHA1
+
+UNAME=$(echo "$NAME" | tr [a-z] [A-Z])
+
+sed -e "/^STDLIBS_EXT =/,/^\$/s!^\$!\\
+STDLIBS_EXT += $NAME\\
+!" "$ROOT/Makefile" >"$ROOT/Makefile.tmp"
+mv "$ROOT/Makefile.tmp" "$ROOT/Makefile"
+
+cat >"$ROOT/$NAME.version" <<EOF
+${UNAME}_BRANCH = master
+${UNAME}_SHA1 = $SHA1
+${UNAME}_GIT_URL := https://github.com/$USER/$NAME.jl.git
+${UNAME}_TAR_URL = https://api.github.com/repos/$USER/$NAME.jl/tarball/\$1
+EOF
+
+git add "$ROOT/$NAME.version"
+git add -p "$ROOT/Makefile"
+
+fi
+
+printf -- "\n-------------------------------------------------------------------------------\n"
+printf -- "\n\
+Manually add this now to test/precompile.jl (Base.cache_dependencies),
+test/choosetests.jl (net_required_for), and base/sysimg.jl (stdlibs), sorted
+by top-down dependency order.\n"
+printf -- "\n-------------------------------------------------------------------------------\n"
+printf -- "Wizard finished.\n"
diff --git a/contrib/normalize_triplet.py b/contrib/normalize_triplet.py
index 19dd74e3e28032..43c9d492a4b2ed 100755
--- a/contrib/normalize_triplet.py
+++ b/contrib/normalize_triplet.py
@@ -93,9 +93,9 @@ def p(x):
     # These contain characters that can't be easily represented as
     # capture group names, unfortunately:
     os_remapping = {
-        'darwin': 'apple-darwin14',
+        'darwin': 'apple-darwin',
         'windows': 'w64-mingw32',
-        'freebsd': 'unknown-freebsd11.1',
+        'freebsd': 'unknown-freebsd',
     }
     x = r(x)
     if x:
@@ -106,19 +106,22 @@ def p(x):
 
 # If the user passes in a GCC version (like 8.2.0) use that to force a
 # "-libgfortran5" tag at the end of the triplet, but only if it has otherwise
-# not been specified
+# not been specified.
 if libgfortran_version == "blank_libgfortran":
     if len(sys.argv) >= 3:
-        libgfortran_version = {
-            "4":  "libgfortran3",
-            "5":  "libgfortran3",
-            "6":  "libgfortran3",
-            "7":  "libgfortran4",
-            "8":  "libgfortran5",
-            "9":  "libgfortran5",
-            "10": "libgfortran5",
-            "11": "libgfortran5",
-        }[list(filter(lambda x: re.match("\d+\.\d+(\.\d+)?", x), sys.argv[2].split()))[-1].split('.')[0]]
+        # If there was no gfortran/gcc version passed in, default to the latest libgfortran version
+        if not sys.argv[2]:
+            libgfortran_version = "libgfortran5"
+        else:
+            # Take the last thing that looks like a version number, and extract its major component
+            version_numbers = list(filter(lambda x: re.match("\d+\.\d+(\.\d+)?", x), sys.argv[2].split()))
+            major_ver = int(version_numbers[-1].split('.')[0])
+            if major_ver <= 6:
+                libgfortran_version = "libgfortran3"
+            elif major_ver <= 7:
+                libgfortran_version = "libgfortran4"
+            else:
+                libgfortran_version = "libgfortran5"
 
 if cxx_abi == "blank_cxx_abi":
     if len(sys.argv) == 4:
diff --git a/contrib/print_sorted_stdlibs.jl b/contrib/print_sorted_stdlibs.jl
new file mode 100644
index 00000000000000..bbf890328cb4ef
--- /dev/null
+++ b/contrib/print_sorted_stdlibs.jl
@@ -0,0 +1,99 @@
+#!/usr/bin/env julia
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using TOML
+
+function check_flag(flag)
+    idxs = findall(flag .== ARGS)
+    for idx in reverse(idxs)
+        popat!(ARGS, idx)
+    end
+    return !isempty(idxs)
+end
+
+if check_flag("--help") || check_flag("-h")
+    println("Usage: julia print_sorted_stdlibs.jl [stdlib_dir] [--exclude-jlls]")
+end
+
+# Allow users to ask for JLL or no JLLs
+exclude_jlls = check_flag("--exclude-jlls")
+
+# Default to the `stdlib/vX.Y` directory
+STDLIB_DIR = get(ARGS, 1, joinpath(@__DIR__, "..", "usr", "share", "julia", "stdlib"))
+vXYdirs = readdir(STDLIB_DIR)
+if length(vXYdirs) == 1 && match(r"v\d\.\d", vXYdirs[1]) !== nothing
+    STDLIB_DIR = joinpath(STDLIB_DIR, vXYdirs[1])
+end
+
+project_deps = Dict{String,Set{String}}()
+for project_dir in readdir(STDLIB_DIR, join=true)
+    files = readdir(project_dir)
+    if "Project.toml" in files
+        project = TOML.parsefile(joinpath(project_dir, "Project.toml"))
+
+        if !haskey(project, "name")
+            continue
+        end
+        name = project["name"]
+        deps = Set(collect(keys(get(project, "deps", Dict{String,String}()))))
+        project_deps[name] = deps
+    end
+end
+
+#println("Found $(length(keys(project_deps))) stdlib projects")
+
+function project_depth(project)
+    deps = project_deps[project]
+    if isempty(deps)
+        return 0
+    end
+
+    depth = 1
+    while !all(isempty(project_deps[d]) for d in deps)
+        depth += 1
+
+        if depth > 100
+            error("Failed to converge while finding project depth for $(project)!")
+        end
+
+        new_deps = Set{String}()
+        for d in deps
+            union!(new_deps, project_deps[d])
+        end
+        deps = new_deps
+    end
+    return depth
+end
+
+project_depths = Dict(p => project_depth(p) for p in keys(project_deps))
+
+function project_isless(p1, p2)
+    if project_depths[p1] != project_depths[p2]
+        return isless(project_depths[p1], project_depths[p2])
+    end
+    return isless(p1, p2)
+end
+
+sorted_projects = sort(collect(keys(project_depths)), lt=project_isless)
+
+if exclude_jlls
+    filter!(p -> !endswith(p, "_jll"), sorted_projects)
+end
+
+# Print out sorted projects, ready to be pasted into `sysimg.jl`
+last_depth = 0
+println("    # Stdlibs sorted in dependency, then alphabetical, order by contrib/print_sorted_stdlibs.jl")
+if exclude_jlls
+    println("    # Run with the `--exclude-jlls` option to filter out all JLL packages")
+end
+println("    stdlibs = [")
+println("        # No dependencies")
+for p in sorted_projects
+    if project_depths[p] != last_depth
+        global last_depth = project_depths[p]
+        println()
+        println("        # $(last_depth)-depth packages")
+    end
+    println("        :$(p),")
+end
+println("    ]")
diff --git a/contrib/refresh_bb_tarballs.sh b/contrib/refresh_bb_tarballs.sh
deleted file mode 100755
index e3c44b954d0b8b..00000000000000
--- a/contrib/refresh_bb_tarballs.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Invoke this with no arguments to refresh all tarballs, or with a project name to refresh only that project.
-#
-# Example:
-#   ./refresh_bb_tarballs.sh gmp
-
-# Get this list via:
-#    using BinaryBuilder
-#    print("TRIPLETS=\"$(join(triplet.(BinaryBuilder.supported_platforms()), " "))\"")
-TRIPLETS="i686-linux-gnu x86_64-linux-gnu aarch64-linux-gnu armv7l-linux-gnueabihf powerpc64le-linux-gnu i686-linux-musl x86_64-linux-musl aarch64-linux-musl armv7l-linux-musleabihf x86_64-apple-darwin14 x86_64-unknown-freebsd11.1 i686-w64-mingw32 x86_64-w64-mingw32"
-
-# These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
-BB_PROJECTS="mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind osxunwind dsfmt objconv p7zip zlib suitesparse openlibm"
-BB_GCC_EXPANDED_PROJECTS="openblas"
-BB_CXX_EXPANDED_PROJECTS="gmp llvm"
-
-# If we've been given a project name, filter down to that one:
-if [ -n "${1}" ]; then
-    case "${BB_PROJECTS}" in
-        *${1}*) BB_PROJECTS="${1}" ;;
-        *) BB_PROJECTS="" ;;
-    esac
-    case "${BB_GCC_EXPANDED_PROJECTS}" in
-        *${1}*) BB_GCC_EXPANDED_PROJECTS="${1}" ;;
-        *) BB_GCC_EXPANDED_PROJECTS="" ;;
-    esac
-    case "${BB_CXX_EXPANDED_PROJECTS}" in
-        *${1}*) BB_CXX_EXPANDED_PROJECTS="${1}" ;;
-        *) BB_CXX_EXPANDED_PROJECTS="" ;;
-    esac
-fi
-
-# Get "contrib/" directory path
-CONTRIB_DIR=$(CDPATH= cd -- "$(dirname -- "$0")" && pwd)
-
-# Get the source hash for each project
-for proj in ${BB_PROJECTS}; do
-    PROJ="$(echo ${proj} | tr [a-z] [A-Z])"
-    make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=0 DEPS_GIT=0 extract-${proj}
-done
-
-# For each triplet and each project, download the BB tarball and save its hash:
-for triplet in ${TRIPLETS}; do
-	for proj in ${BB_PROJECTS}; do
-		PROJ="$(echo ${proj} | tr [a-z] [A-Z])"
-        make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=1 ${PROJ}_BB_TRIPLET=${triplet} distclean-${proj}
-		make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=1 ${PROJ}_BB_TRIPLET=${triplet} install-${proj}
-	done
-
-    for proj in ${BB_GCC_EXPANDED_PROJECTS}; do
-		PROJ="$(echo ${proj} | tr [a-z] [A-Z])"
-        for libgfortran in libgfortran3 libgfortran4 libgfortran5; do
-		    make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=1 ${PROJ}_BB_TRIPLET=${triplet}-${libgfortran} BB_TRIPLET_CXXABI=${triplet} distclean-${proj}
-		    make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=1 ${PROJ}_BB_TRIPLET=${triplet}-${libgfortran} BB_TRIPLET_CXXABI=${triplet} install-${proj}
-        done
-    done
-
-    for proj in ${BB_CXX_EXPANDED_PROJECTS}; do
-		PROJ="$(echo ${proj} | tr [a-z] [A-Z])"
-        for cxx in cxx03 cxx11; do
-		    make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=1 ${PROJ}_BB_TRIPLET=${triplet}-${cxx} BB_TRIPLET_CXXABI=${triplet} distclean-${proj}
-		    make -C "${CONTRIB_DIR}/../deps" USE_BINARYBUILDER_${PROJ}=1 ${PROJ}_BB_TRIPLET=${triplet}-${cxx} BB_TRIPLET_CXXABI=${triplet} install-${proj}
-        done
-    done
-done
diff --git a/contrib/refresh_checksums.mk b/contrib/refresh_checksums.mk
new file mode 100644
index 00000000000000..898bd5841ee82c
--- /dev/null
+++ b/contrib/refresh_checksums.mk
@@ -0,0 +1,142 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Invoke this with no arguments to refresh all tarballs, or with a project name to refresh only that project.
+#
+# Example:
+#   make -f contrib/refresh_checksums.mk gmp
+
+SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
+JULIAHOME := $(abspath $(SRCDIR)/..)
+
+# force a sane / stable configuration
+export LC_ALL=C
+export LANG=C
+.SUFFIXES:
+
+# Default target that will have everything else added to it as a dependency
+all: checksum pack-checksum
+
+# Get this list via:
+#    using BinaryBuilder
+#    print("TRIPLETS=\"$(join(sort(triplet.(BinaryBuilder.supported_platforms(;experimental=true))), " "))\"")
+TRIPLETS=aarch64-apple-darwin aarch64-linux-gnu aarch64-linux-musl armv6l-linux-gnueabihf armv6l-linux-musleabihf armv7l-linux-gnueabihf armv7l-linux-musleabihf i686-linux-gnu i686-linux-musl i686-w64-mingw32 powerpc64le-linux-gnu x86_64-apple-darwin x86_64-linux-gnu x86_64-linux-musl x86_64-unknown-freebsd x86_64-w64-mingw32
+CLANG_TRIPLETS=$(filter %-darwin %-freebsd,$(TRIPLETS))
+NON_CLANG_TRIPLETS=$(filter-out %-darwin %-freebsd,$(TRIPLETS))
+
+# These are the projects currently using BinaryBuilder; both GCC-expanded and non-GCC-expanded:
+BB_PROJECTS=mbedtls libssh2 nghttp2 mpfr curl libgit2 pcre libuv unwind llvmunwind dsfmt objconv p7zip zlib libsuitesparse openlibm blastrampoline
+BB_GCC_EXPANDED_PROJECTS=openblas csl
+BB_CXX_EXPANDED_PROJECTS=gmp llvm clang llvm-tools
+# These are non-BB source-only deps
+NON_BB_PROJECTS=patchelf mozillacert lapack libwhich utf8proc
+
+ifneq ($(VERBOSE),1)
+QUIET_MAKE := -s
+else
+QUIET_MAKE :=
+endif
+
+# Convert `llvm-tools` to `LLVM_TOOLS`
+define makevar
+$(shell echo $(1) | tr 'a-z-' 'A-Z_')
+endef
+
+# If $(2) == `src`, this will generate a `USE_BINARYBUILDER_FOO=0` make flag
+# It will also generate a `FOO_BB_TRIPLET=$(2)` make flag.
+define make_flags
+USE_BINARYBUILDER=$(if $(filter src,$(2)),0,1) $(if $(filter src,$(2)),FC_VERSION=7.0.0,) $(call makevar,$(1))_BB_TRIPLET=$(if $(filter src,$(2)),,$(2)) LLVM_ASSERTIONS=$(if $(filter assert,$(3)),1,0) DEPS_GIT=0
+endef
+
+# checksum_bb_dep takes in (name, triplet), and generates a `checksum-$(1)-$(2)` target.
+# note that `"src"` is a special triplet value.
+# if $(3) is "assert", we set BINARYBUILDER_LLVM_ASSERTS=1
+define checksum_dep
+checksum-$(1)-$(2)-$(3): clean-$(1)
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/deps" $(call make_flags,$(1),$(2),$(3)) checksum-$(1)
+.PHONY: checksum-$(1)-$(2)-$(3)
+
+# Add this guy to his project target
+checksum-$(1): checksum-$(1)-$(2)-$(3)
+
+# Add a dependency to the pack target
+# TODO: can we make this so it only adds an ordering but not a dependency?
+pack-checksum-$(1): | checksum-$(1)
+
+# Add this guy to the `checksum` and `pack-checksum` default targets (e.g. `make -f contrib/refresh_checksums.mk openblas`)
+checksum: checksum-$1
+$1 pack-checksum: pack-checksum-$1
+endef
+
+# Generate targets for source hashes for all our projects
+$(foreach project,$(BB_PROJECTS) $(BB_GCC_EXPANDED_PROJECTS) $(BB_CXX_EXPANDED_PROJECTS) $(NON_BB_PROJECTS),$(eval $(call checksum_dep,$(project),src)))
+
+# Generate targets for triplet-specific hashes for all our BB projects
+$(foreach project,$(BB_PROJECTS),$(foreach triplet,$(TRIPLETS),$(eval $(call checksum_dep,$(project),$(triplet)))))
+$(foreach project,$(BB_GCC_EXPANDED_PROJECTS),$(foreach triplet,$(TRIPLETS),$(foreach libgfortran_version,libgfortran3 libgfortran4 libgfortran5,$(eval $(call checksum_dep,$(project),$(triplet)-$(libgfortran_version))))))
+
+# Because MacOS and FreeBSD use clang, they don't actually use cxxstring_abi expansion:
+$(foreach project,$(BB_CXX_EXPANDED_PROJECTS),$(foreach triplet,$(NON_CLANG_TRIPLETS),$(foreach cxxstring_abi,cxx11 cxx03,$(eval $(call checksum_dep,$(project),$(triplet)-$(cxxstring_abi))))))
+$(foreach project,$(BB_CXX_EXPANDED_PROJECTS),$(foreach triplet,$(CLANG_TRIPLETS),$(eval $(call checksum_dep,$(project),$(triplet)))))
+
+# Special libLLVM_asserts_jll/LLVM_assert_jll targets
+$(foreach triplet,$(NON_CLANG_TRIPLETS),$(foreach cxxstring_abi,cxx11 cxx03,$(eval $(call checksum_dep,llvm,$(triplet)-$(cxxstring_abi),assert))))
+$(foreach triplet,$(NON_CLANG_TRIPLETS),$(foreach cxxstring_abi,cxx11 cxx03,$(eval $(call checksum_dep,llvm-tools,$(triplet)-$(cxxstring_abi),assert))))
+$(foreach triplet,$(CLANG_TRIPLETS),$(eval $(call checksum_dep,llvm,$(triplet),assert)))
+$(foreach triplet,$(CLANG_TRIPLETS),$(eval $(call checksum_dep,llvm-tools,$(triplet),assert)))
+
+# External stdlibs
+checksum-stdlibs:
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/stdlib" checksumall
+all: checksum-stdlibs
+.PHONY: checksum-stdlibs
+
+# doc unicode data
+checksum-doc-unicodedata:
+	-+$(MAKE) $(QUIET_MAKE) -C "$(JULIAHOME)/doc" checksum-unicodedata
+all: checksum-doc-unicodedata
+.PHONY: checksum-doc-unicodedata
+
+# merge substring project names to avoid races
+pack-checksum-llvm-tools: | pack-checksum-llvm
+	@# nothing to do but disable the prefix rule
+pack-checksum-llvm: | checksum-llvm-tools
+pack-checksum-csl: | pack-checksum-compilersupportlibraries
+	@# nothing to do but disable the prefix rule
+pack-checksum-compilersupportlibraries: | checksum-csl
+pack-checksum-libsuitesparse: | pack-checksum-suitesparse
+	@# nothing to do but disable the prefix rule
+pack-checksum-suitesparse: | checksum-libsuitesparse
+# This is a bit tricky: we want llvmunwind to be separate from unwind and llvm,
+# so we add a rule to process those first
+pack-checksum-llvm pack-checksum-unwind: | pack-checksum-llvmunwind
+# and the name for LLVMLibUnwind is awkward, so handle that with a regex
+pack-checksum-llvmunwind: | pack-checksum-llvm.*unwind
+	cd "$(JULIAHOME)/deps/checksums" && mv 'llvm.*unwind' llvmunwind
+
+clean-%: FORCE
+	-rm "$(JULIAHOME)/deps/checksums"/'$*'
+
+# define how to pack parallel checksums into a single file format
+pack-checksum-%: FORCE
+	@echo making "$(JULIAHOME)/deps/checksums/"'$*'
+	@cd "$(JULIAHOME)/deps/checksums" && \
+		for each in $$(ls | grep -i '$*'); do \
+			if [ -d "$$each" ]; then \
+				for type in $$(ls "$$each"); do \
+					echo "$$each"/"$$type"/$$(cat "$$each"/"$$type"); \
+					rm "$$each"/"$$type"; \
+				done; \
+				rmdir "$$each"; \
+			fi; \
+		done >> '$*'
+	@cd "$(JULIAHOME)/deps/checksums" && \
+		sort '$*' > '$*.tmp' && \
+		mv '$*.tmp' '$*'
+
+# This file is completely phony
+FORCE:
+.PHONY: FORCE
+
+# Debugging helper
+print-%:
+	@echo '$*=$(subst ','\'',$($*))'
diff --git a/contrib/relative_path.py b/contrib/relative_path.py
index b9d3d1e5bca7ec..9a60607d64d9b3 100755
--- a/contrib/relative_path.py
+++ b/contrib/relative_path.py
@@ -7,4 +7,4 @@
 # shells and whatnot during the build are all POSIX shells/cygwin.  We rely on the build
 # system itself to canonicalize to `\` when it needs to, and deal with the shell escaping
 # and whatnot at the latest possible moment.
-sys.stdout.write(os.path.relpath(sys.argv[2], sys.argv[1]).replace(os.path.sep, '/'))
\ No newline at end of file
+sys.stdout.write(os.path.relpath(sys.argv[2], sys.argv[1]).replace(os.path.sep, '/'))
diff --git a/contrib/travis_fastfail.sh b/contrib/travis_fastfail.sh
deleted file mode 100755
index 410cbe2bccafc6..00000000000000
--- a/contrib/travis_fastfail.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-curlhdr="Accept: application/vnd.travis-ci.2+json"
-endpoint="https://api.travis-ci.org/repos/$TRAVIS_REPO_SLUG"
-
-# Fail fast for superseded builds to PR's
-if [ "$TRAVIS_PULL_REQUEST" != "false" ]; then
-  newestbuildforthisPR=$(curl -H "$curlhdr" $endpoint/builds?event_type=pull_request | \
-      jq ".builds | map(select(.pull_request_number == $TRAVIS_PULL_REQUEST))[0].number")
-  if [ $newestbuildforthisPR != null -a $newestbuildforthisPR != \"$TRAVIS_BUILD_NUMBER\" ]; then
-    echo "There are newer queued builds for this pull request, failing early."
-    exit 1
-  fi
-else
-  # And for non-latest push builds in branches other than master or release*
-  case $TRAVIS_BRANCH in
-    master | release*)
-      ;;
-    *)
-      if [ \"$TRAVIS_BUILD_NUMBER\" != $(curl -H "$curlhdr" \
-          $endpoint/branches/$TRAVIS_BRANCH | jq ".branch.number") ]; then
-        echo "There are newer queued builds for this branch, failing early."
-        exit 1
-      fi
-      ;;
-  esac
-fi
diff --git a/contrib/tsan/Make.user.tsan b/contrib/tsan/Make.user.tsan
new file mode 100644
index 00000000000000..01c9874a85182b
--- /dev/null
+++ b/contrib/tsan/Make.user.tsan
@@ -0,0 +1,16 @@
+TOOLCHAIN=$(BUILDROOT)/../toolchain
+BINDIR=$(TOOLCHAIN)/usr/bin
+TOOLDIR=$(TOOLCHAIN)/usr/tools
+
+# use our new toolchain
+USECLANG=1
+override CC=$(TOOLDIR)/clang
+override CXX=$(TOOLDIR)/clang++
+
+USE_BINARYBUILDER_LLVM=1
+
+override SANITIZE=1
+override SANITIZE_THREAD=1
+
+# default to a debug build for better line number reporting
+override JULIA_BUILD_MODE=debug
diff --git a/contrib/tsan/build.sh b/contrib/tsan/build.sh
new file mode 100755
index 00000000000000..2c4ba3b1bde95d
--- /dev/null
+++ b/contrib/tsan/build.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#
+# Usage:
+#     contrib/tsan/build.sh <path> [<make_targets>...]
+#
+# Build TSAN-enabled julia.  Given a workspace directory <path>, build
+# TSAN-enabled julia in <path>/tsan.  Required toolss are install under
+# <path>/toolchain.  Note that the same <path> passed to `contrib/asan/build.sh`
+# can be used to share the toolchain used for ASAN.  This scripts also takes
+# optional <make_targets> arguments which are passed to `make`.  The default
+# make target is `debug`.
+
+set -ue
+
+# `$WORKSPACE` is a directory in which we create `toolchain` and `tsan`
+# sub-directories.
+WORKSPACE="$1"
+shift
+if [ "$WORKSPACE" = "" ]; then
+    echo "Workspace directory must be specified as the first argument" >&2
+    exit 2
+fi
+
+mkdir -pv "$WORKSPACE"
+WORKSPACE="$(cd "$WORKSPACE" && pwd)"
+if [ "$WORKSPACE" = "" ]; then
+    echo "Failed to create the workspace directory." >&2
+    exit 2
+fi
+
+HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+JULIA_HOME="$HERE/../../"
+
+echo
+echo "Installing toolchain..."
+
+TOOLCHAIN="$WORKSPACE/toolchain"
+if [ ! -d "$TOOLCHAIN" ]; then
+    make -C "$JULIA_HOME" configure O=$TOOLCHAIN
+    cp "$HERE/../asan/Make.user.tools"  "$TOOLCHAIN/Make.user"
+fi
+
+make -C "$TOOLCHAIN/deps" install-clang install-llvm-tools
+
+echo
+echo "Building Julia..."
+
+BUILD="$WORKSPACE/tsan"
+if [ ! -d "$BUILD" ]; then
+    make -C "$JULIA_HOME" configure O="$BUILD"
+    cp "$HERE/Make.user.tsan"  "$BUILD/Make.user"
+fi
+
+cd "$BUILD"  # so that we can pass `-C src` to `make`
+make "$@"
diff --git a/contrib/updateSPDX.jl b/contrib/updateSPDX.jl
new file mode 100644
index 00000000000000..94b428ac70748a
--- /dev/null
+++ b/contrib/updateSPDX.jl
@@ -0,0 +1,31 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# SPDX-License-Identifier: MIT
+# Run this script with each new Julia release to update "../julia.spdx.json"
+
+using UUIDs
+using Dates
+using JSON
+using TimeZones
+using DataStructures
+
+spdxDocument= "../julia.spdx.json"
+spdxData= JSON.parsefile(spdxDocument; dicttype=OrderedDict{String, Any})
+
+# At the moment we can only update a few items automatically with each release.
+# These are the crucial elements to make a new version of the SPDX file.
+# Any other changes (ex. Adding or removing of external dependencies, updating copyright text, etc.) must be performed manually
+spdxData["documentNamespace"]= "https://julialang.org/spdxdocs/julia-spdx-" * string(uuid4())
+spdxData["creationInfo"]["created"]=  Dates.format(now(tz"UTC"), "yyyy-mm-ddTHH:MM:SS") * "Z"
+
+for pkg in spdxData["packages"]
+    if pkg["SPDXID"] == "SPDXRef-JuliaMain"
+        pkg["versionInfo"]= readline("../VERSION")
+        pkg["downloadLocation"]= "git+https://github.com/JuliaLang/julia.git@v" * pkg["versionInfo"]
+        break
+    end
+end
+
+open(spdxDocument, "w") do f
+    JSON.print(f, spdxData, 4)
+end
diff --git a/contrib/valgrind-julia.supp b/contrib/valgrind-julia.supp
index 86f843f3f4376e..408a48a2893cc4 100644
--- a/contrib/valgrind-julia.supp
+++ b/contrib/valgrind-julia.supp
@@ -2,9 +2,9 @@
 {
    msync unwind
    Memcheck:Param
-   msync(start)
+   write(buf)
    ...
-   obj:*/libpthread*.so
+   fun:validate_mem
    ...
-   fun:rec_backtrace_ctx
+   fun:rec_backtrace
 }
diff --git a/contrib/windows/Vagrantfile b/contrib/windows/Vagrantfile
deleted file mode 100644
index dbd8aa0e3fb979..00000000000000
--- a/contrib/windows/Vagrantfile
+++ /dev/null
@@ -1,76 +0,0 @@
-# Vagrantfile for building Windows Julia via MSYS2 or Cygwin
-
-$script_cygwin = <<SCRIPT
-# change the following to 32 for 32-bit Julia
-$bits = "64"
-$arch = "x86_$bits".Replace("x86_32", "i686")
-$setup = "setup-$arch.exe".Replace("i686", "x86")
-$cyginstall = "C:\\cygwin$bits"
-
-mkdir -Force $cyginstall | Out-Null
-(new-object net.webclient).DownloadFile(
-  "https://cygwin.com/$setup", "$cyginstall\\$setup")
-foreach ($pkg in @("git,make,curl,patch,python,gcc-g++,m4,cmake,p7zip",
-    "mingw64-$arch-gcc-g++,mingw64-$arch-gcc-fortran")) {
-  & "$cyginstall\\$setup" -q -n -R $cyginstall -l $cyginstall\\packages `
-    -s https://mirrors.kernel.org/sourceware/cygwin -g -P $pkg | Where-Object `
-    -FilterScript {$_ -notlike "Installing file *"} | Write-Output
-}
-& "$cyginstall\\bin\\sh" -lc "if ! [ -e julia$bits ]; then git clone \\
-  git://github.com/JuliaLang/julia.git julia$bits; fi && cd julia$bits && git pull && \\
-  echo 'XC_HOST = $arch-w64-mingw32' > Make.user && make cleanall && \\
-  make -j2 testall && make win-extras binary-dist"
-SCRIPT
-
-$script_msys2 = <<SCRIPT
-# change the following to 32 for 32-bit Julia
-$bits = "64"
-$arch = "x86_$bits".Replace("x86_32", "i686")
-# change the date in the following for future msys2 releases
-$msys2tarball = "msys2-base-$arch-20150916.tar"
-$msys2install = "C:\\msys$bits"
-
-# install chocolatey, cmake, and python2
-iex ((new-object net.webclient).DownloadString("https://chocolatey.org/install.ps1"))
-choco install -y cmake
-choco install -y python2
-
-# pacman is picky, reinstall msys2 from scratch
-foreach ($dir in @("etc", "usr", "var")) {
-  if (Test-Path "$msys2install\\$dir") {
-    rm -Recurse -Force $msys2install\\$dir
-  }
-}
-mkdir -Force $msys2install | Out-Null
-(new-object net.webclient).DownloadFile(
-  "https://chocolatey.org/7za.exe",
-  "$msys2install\\7za.exe")
-(new-object net.webclient).DownloadFile(
-  "https://sourceforge.net/projects/msys2/files/Base/$arch/$msys2tarball.xz",
-  "$msys2install\\$msys2tarball.xz")
-cd C:\\
-& "msys$bits\\7za.exe" x -y msys$bits\\$msys2tarball.xz
-& "msys$bits\\7za.exe" x -y $msys2tarball | Out-Null
-rm $msys2tarball, msys$bits\\$msys2tarball.xz, msys$bits\\7za.exe
-
-& "$msys2install\\usr\\bin\\sh" -lc "pacman --noconfirm --force --needed -Sy \\
-  bash pacman pacman-mirrors msys2-runtime"
-& "$msys2install\\usr\\bin\\sh" -lc "pacman --noconfirm -Syu && \\
-  pacman --noconfirm -S diffutils git m4 make patch tar p7zip msys/openssh"
-& "$msys2install\\usr\\bin\\sh" -lc "if ! [ -e julia$bits ]; then
-  git clone git://github.com/JuliaLang/julia.git julia$bits; fi && cd julia$bits && git pull && \\
-  if ! [ -e usr/$arch-w64-mingw32 ]; then contrib/windows/get_toolchain.sh $bits; fi && \\
-  export PATH=`$PWD/usr/$arch-w64-mingw32/sys-root/mingw/bin:`$PATH:/c/tools/python2 && \\
-  echo 'override CMAKE=/c/Program\\ Files\\ \\(x86\\)/CMake/bin/cmake' > Make.user && \\
-  make cleanall && make -j2 testall && make win-extras binary-dist"
-SCRIPT
-
-Vagrant.configure("2") do |config|
-  config.vm.box = "kensykora/windows_2012_r2_standard"
-  config.vm.provider :virtualbox do |vb|
-    # Use VBoxManage to customize the VM. For example to change memory:
-    vb.memory = 2048
-  end
-  # change the following to $script_msys2 to build with MSYS2 instead of Cygwin
-  config.vm.provision :shell, privileged: false, :inline => $script_cygwin
-end
diff --git a/contrib/windows/appveyor_build.sh b/contrib/windows/appveyor_build.sh
deleted file mode 100755
index e16342e28b4c9b..00000000000000
--- a/contrib/windows/appveyor_build.sh
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Script to compile Windows Julia on Appveyor using a Cygwin host
-# make sure you cd to the main julia directory beforing running
-# uses environment variables defined in .appveyor.yml: MINGW_ARCH
-
-# Stop on error
-set -e
-
-# Make sure stdin exists (apparently sometimes needed for mysys2?)
-exec < /dev/null
-
-export TERM=ansi  # make sure escape sequences print out properly on appveyor?
-
-# set MARCH for consistency with how binaries get built
-if [ "$MINGW_ARCH" = x86_64 ]; then
-  echo "override MARCH = x86-64" >> Make.user
-elif [ "$MINGW_ARCH" = i686 ]; then
-  echo "override MARCH = pentium4" >> Make.user
-fi
-echo "override XC_HOST = $MINGW_ARCH-w64-mingw32" >> Make.user
-echo "override JULIA_CPU_TARGET = generic;native" >> Make.user
-
-echo 'USE_BINARYBUILDER = 1' >> Make.user
-# This is pending the binaries actually being available
-#echo 'BINARYBUILDER_LLVM_ASSERTS = 1' >> Make.user
-echo 'FORCE_ASSERTIONS = 1' >> Make.user
-echo 'USECCACHE = 1' >> Make.user
-echo 'VERBOSE = 1' >> Make.user
-
-cat Make.user
-make check-whitespace
-make -j3 release
-make -j3 install
-make JULIA=../../usr/bin/julia.exe BIN=. "$(make print-CC)" -C test/embedding release
-make build-stats
-ccache -s
diff --git a/contrib/windows/build-installer.iss b/contrib/windows/build-installer.iss
index 71450d482c3cd9..4f5f0259d2f2cb 100644
--- a/contrib/windows/build-installer.iss
+++ b/contrib/windows/build-installer.iss
@@ -1,8 +1,18 @@
+#ifndef AppName
 #define AppName "Julia"
+#endif
+
+#ifndef DirName
+#define DirName AppName + "-" + AppVersion
+#endif
+
 #define AppNameLong AppName + " " + AppVersion
 #define AppMainExeName "bin\julia.exe"
 #define CurrentYear GetDateTimeString('yyyy', '', '')
-#define DirName AppName + "-" + AppVersion
+
+#ifndef AppId
+#define AppId DirName
+#endif
 
 
 [LangOptions]
@@ -45,12 +55,12 @@ SelectTasksDesc=
 
 
 [Setup]
-AppId={{054B4BC6-BD30-45C8-A623-8F5BA6EBD55D}
+AppId={#AppId}
 AppName={#AppName}
 AppVersion={#AppVersion}
 AppPublisher=Julia Language
 AppPublisherURL=https://julialang.org
-AppCopyright=Copyright 2009-{#CurrentYear}; Julia Langage
+AppCopyright=Copyright 2009-{#CurrentYear}; Julia Language
 VersionInfoDescription=Julia Installer
 PrivilegesRequiredOverridesAllowed=commandline
 WizardStyle=modern
@@ -93,7 +103,8 @@ Name: "addtopath"; Description: "Add {#AppName} to PATH"; GroupDescription: "{cm
 
 
 [Files]
-Source: "{#SourceDir}\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs
+Source: "{#SourceDir}\*"; Excludes: "{#AppMainExeName}"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs;
+Source: "{#SourceDir}\{#AppMainExeName}"; DestDir: "{app}\bin"; Flags: ignoreversion sign;
 
 
 [Icons]
diff --git a/contrib/windows/get_toolchain.sh b/contrib/windows/get_toolchain.sh
deleted file mode 100755
index 01e9ad973b2138..00000000000000
--- a/contrib/windows/get_toolchain.sh
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# download mingw-w64 compilers from opensuse build service, usage:
-# ./get_toolchain.sh 64
-# (or ./get_toolchain.sh 32)
-# depends on curl, xmllint, gunzip, sort -V, sha256sum, and p7zip
-
-# Run in top-level Julia directory
-cd `dirname "$0"`/../..
-# Stop on error
-set -e
-bits=$1
-
-case $bits in
-  32)
-    host=i686-w64-mingw32
-    exc=sjlj
-    ;;
-  64)
-    host=x86_64-w64-mingw32
-    exc=seh
-    ;;
-  *)
-    echo 'error: run script either as `./get_toolchain.sh 32` or `./get_toolchain.sh 64`' >&2
-    exit 1
-    ;;
-esac
-echo "Downloading $host toolchain, check $PWD/get_toolchain.log for full output"
-contrib/windows/winrpm.sh http://download.opensuse.org/repositories/windows:/mingw:/win$bits/openSUSE_Leap_42.2 \
-  "mingw$bits-gcc mingw$bits-gcc-c++ mingw$bits-gcc-fortran \
-   mingw$bits-libssp0 mingw$bits-libstdc++6 mingw$bits-libgfortran3" > get_toolchain.log
-
-mingwdir=usr/$host/sys-root/mingw
-chmod +x $mingwdir/bin/* $mingwdir/$host/bin/* $mingwdir/libexec/gcc/$host/*/*
-mkdir -p usr/bin
-for i in gcc_s_$exc-1 ssp-0 stdc++-6 gfortran-3 quadmath-0; do
-  cp $mingwdir/bin/lib$i.dll usr/bin
-done
-$mingwdir/bin/g++ --version
-# copy around binutils and make a junction for includes
-cp $mingwdir/$host/bin/* $mingwdir/bin
-case $(uname) in
-  CYGWIN*)
-    mklink="cmd /C mklink /J"
-    # treat these like cross-compilers if we're running from cygwin
-    for i in gcc g++ gfortran; do
-      mv $mingwdir/bin/$i.exe $mingwdir/bin/$host-$i.exe
-    done
-    ;;
-  *)
-    mklink="cmd //C mklink //J"
-    ;;
-esac
-if ! [ -e $mingwdir/$host/include ]; then
-  $mklink $(cygpath -w $mingwdir/$host/include) $(cygpath -w $mingwdir/include)
-fi
-echo "Toolchain successfully downloaded to $PWD/$mingwdir"
-echo "Add toolchain to your path by running \`export PATH=$PWD/$mingwdir/bin:\$PATH\`"
diff --git a/contrib/windows/julia-manifest.xml b/contrib/windows/julia-manifest.xml
index 5e76b7b8e76f8b..91ca19e09fe991 100644
--- a/contrib/windows/julia-manifest.xml
+++ b/contrib/windows/julia-manifest.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0" encoding="UTF-8" standalone="yes"?>
-<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0">
+<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0" xmlns:asmv3="urn:schemas-microsoft-com:asm.v3" >
   <trustInfo xmlns="urn:schemas-microsoft-com:asm.v3">
     <security>
       <requestedPrivileges>
@@ -7,6 +7,11 @@
       </requestedPrivileges>
     </security>
   </trustInfo>
+  <asmv3:application>
+    <asmv3:windowsSettings xmlns="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
+      <longPathAware>true</longPathAware>
+    </asmv3:windowsSettings>
+  </asmv3:application>
   <compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
     <application>
       <!--The ID below indicates application support for Windows Vista -->
@@ -21,4 +26,16 @@
       <supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
     </application>
   </compatibility>
+  <dependency>
+    <dependentAssembly>
+      <assemblyIdentity
+          type="win32"
+          name="Microsoft.Windows.Common-Controls"
+          version="6.0.0.0"
+          processorArchitecture="*"
+          publicKeyToken="6595b64144ccf1df"
+          language="*"
+        />
+    </dependentAssembly>
+  </dependency>
 </assembly>
diff --git a/contrib/windows/julia.rc b/contrib/windows/julia.rc
index fb2b179401cd64..9a82ee1083ad6e 100644
--- a/contrib/windows/julia.rc
+++ b/contrib/windows/julia.rc
@@ -1,13 +1,13 @@
 #include <winver.h>
 #include <winuser.h>
-1 VERSIONINFO
+VS_VERSION_INFO VERSIONINFO
 FILEVERSION     JLVER
 PRODUCTVERSION  JLVER
 /*
-FILEFLAGSMASK  	VS_FF_PRERELEASE
-FILEFLAGS      	VS_FF_PRERELEASE
+FILEFLAGSMASK  	VS_FFI_FILEFLAGSMASK
+FILEFLAGS      	VER_PRIVATEBUILD
 */
-FILEOS         	VOS_DOS_WINDOWS32
+FILEOS         	VOS__WINDOWS32
 FILETYPE       	VFT_APP
 BEGIN
   BLOCK "StringFileInfo"
@@ -18,7 +18,7 @@ BEGIN
       VALUE "FileDescription", "Julia Programming Language"
       VALUE "FileVersion", JLVER_STR
       VALUE "InternalName", "julia"
-      VALUE "LegalCopyright", "(c) 2009-2019 Julia Language"
+      VALUE "LegalCopyright", "(c) 2009-2021 Julia Language"
       VALUE "OriginalFilename", "julia.exe"
       VALUE "ProductName", "Julia"
       VALUE "ProductVersion", JLVER_STR
diff --git a/contrib/windows/winrpm.sh b/contrib/windows/winrpm.sh
deleted file mode 100755
index f1768b2761ff7b..00000000000000
--- a/contrib/windows/winrpm.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/bin/sh
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# build-time mini version of WinRPM, usage:
-# ./winrpm.sh http://download.opensuse.org/repositories/windows:/mingw:/win64/openSUSE_13.2/ mingw64-zlib1
-# depends on wget/curl, xmllint, gunzip, sort -V, sha256sum, and p7zip
-
-set -e
-url=$1
-toinstall=$2
-
-for i in xmllint gunzip sort sha256sum 7z; do
-  if [ -z "$(which $i 2>/dev/null)" ]; then
-    echo "error: this script requires having $i installed" >&2
-    exit 1
-  fi
-done
-
-jldownload=$(dirname "$0")/../../deps/tools/jldownload
-
-retry_curl() {
-  echo "fetching \"$1\"" >&2
-  "$jldownload" - "$1" && return
-  echo "error: failed to download $1" >&2
-  exit 1
-}
-
-xp="xmllint --xpath"
-# the local-name() complication here is due to xml namespaces
-loc="local-name()="
-eval $(retry_curl $url/repodata/repomd.xml | $xp "/*[$loc'repomd'] \
-  /*[$loc'data'][@type='primary']/*[$loc'location']/@href" -)
-
-case $href in
-  *.gz)
-    primary=$(retry_curl $url/$href | gunzip);;
-  *)
-    primary=$(retry_curl $url/$href);;
-esac
-
-# outputs <package> xml string for newest version
-# don't include arch=src packages, those will list build-time dependencies
-rpm_select() {
-  candidates="<c>$(echo $primary | $xp "//*[$loc'package'] \
-    [./*[$loc'name' and .='$1']][./*[$loc'arch' and .='noarch']]" - \
-    2>/dev/null | sed -e 's|<rpm:|<|g' -e 's|</rpm:|</|g')</c>"
-  # remove rpm namespacing so output can be parsed by xmllint later
-  if [ "$candidates" = "<c></c>" ]; then
-    echo "error: no package candidates found for $1" >&2
-    exit 1
-  fi
-  epochs=""
-  for i in $(echo $candidates | $xp "/c/package/version/@epoch" -); do
-    eval $i
-    epochs="$epochs $epoch"
-  done
-  maxepoch=$(echo $epochs | sed 's/ /\n/g' | sort -V -u | tail -n 1)
-  vers=""
-  for i in $(echo $candidates | $xp "/c/package/version \
-      [@epoch='$maxepoch']/@ver" -); do
-    eval $i
-    vers="$vers $ver"
-  done
-  maxver=$(echo $vers | sed 's/ /\n/g' | sort -V -u | tail -n 1)
-  rels=""
-  for i in $(echo $candidates | $xp "/c/package/version \
-      [@epoch='$maxepoch'][@ver='$maxver']/@rel" -); do
-    eval $i
-    rels="$rels $rel"
-  done
-  maxrel=$(echo $rels | sed 's/ /\n/g' | sort -V -u | tail -n 1)
-  repeats=$(echo $rels | sed 's/ /\n/g' | sort -V | uniq -d | tail -n 1)
-  if [ "$repeats" = "$maxrel" ]; then
-    echo "warning: multiple candidates found for $1 with same version:" >&2
-    echo "epoch $maxepoch, ver $maxver, rel $maxrel, picking at random" >&2
-  fi
-  echo $candidates | $xp "/c/package[version[@epoch='$maxepoch'] \
-    [@ver='$maxver'][@rel='$maxrel']][1]" -
-}
-
-for i in $toinstall; do
-  # fail if no available candidates for requested packages
-  if [ -z "$(rpm_select $i)" ]; then
-    exit 1
-  fi
-done
-
-# outputs package and dll names, e.g. mingw64(zlib1.dll)
-rpm_requires() {
-  for i in $(rpm_select $1 | \
-      $xp "/package/format/requires/entry/@name" - 2>/dev/null); do
-    eval $i
-    echo $name
-  done
-}
-
-# outputs package name, warns if multiple providers with different names
-rpm_provides() {
-  providers=$(echo $primary | $xp "//*[$loc'package'][./*[$loc'format'] \
-    /*[$loc'provides']/*[$loc'entry'][@name='$1']]/*[$loc'name']" - | \
-    sed -e 's|<name>||g' -e 's|</name>|\n|g' | sort -u)
-  if [ $(echo $providers | wc -w) -gt 1 ]; then
-    echo "warning: found multiple providers $providers for $1, adding all" >&2
-  fi
-  echo $providers
-}
-
-newpkgs=$toinstall
-allrequires=""
-while [ -n "$newpkgs" ]; do
-  newrequires=""
-  for i in $newpkgs; do
-    for j in $(rpm_requires $i); do
-      # leading and trailing spaces to ensure word match
-      case " $allrequires $newrequires " in
-        *" $j "*) # already on list
-          ;;
-        *)
-          newrequires="$newrequires $j";;
-      esac
-    done
-  done
-  allrequires="$allrequires $newrequires"
-  newpkgs=""
-  for i in $newrequires; do
-    provides="$(rpm_provides $i)"
-    case " $toinstall $newpkgs " in
-      *" $provides "*) # already on list
-        ;;
-      *)
-        newpkgs="$newpkgs $provides";;
-    esac
-  done
-  toinstall="$toinstall $newpkgs"
-done
-
-mkdir -p noarch
-for i in $toinstall; do
-  pkgi=$(rpm_select $i)
-  checksum=$(echo $pkgi | $xp "/package/checksum/text()" -)
-  eval $(echo $pkgi | $xp "/package/location/@href" -)
-  echo "downloading $href"
-  $jldownload $href $url/$href
-  echo "$checksum *$href" | sha256sum -c
-  7z x -y $href
-  cpiofile=$(basename $href | sed 's/.rpm$/.cpio/')
-  rm $href
-  7z x -y $cpiofile
-  rm $cpiofile
-done
-rmdir --ignore-fail-on-non-empty noarch
diff --git a/deps/Makefile b/deps/Makefile
index 174758d3afe92a..ac0dbe7afcb1a7 100644
--- a/deps/Makefile
+++ b/deps/Makefile
@@ -24,8 +24,8 @@ BUILDDIR := $(BUILDDIR)$(MAYBE_HOST)
 # if you are adding a new target, it can help to copy an similar, existing target
 #
 # autoconf configure-driven scripts: pcre unwind gmp mpfr patchelf libuv curl
-# custom Makefile rules: openlibm dsfmt suitesparse-wrapper suitesparse lapack openblas utf8proc objconv osxunwind libwhich
-# CMake libs: llvm libgit2 libssh2 mbedtls
+# custom Makefile rules: openlibm dsfmt libsuitesparse lapack blastrampoline openblas utf8proc objconv libwhich
+# CMake libs: llvm llvmunwind libgit2 libssh2 mbedtls
 #
 # downloadable via git: llvm-svn, libuv, libopenlibm, utf8proc, libgit2, libssh2
 #
@@ -39,8 +39,12 @@ unexport CONFIG_SITE
 
 DEP_LIBS :=
 
-ifeq ($(USE_GPL_LIBS), 1)
-DEP_LIBS += suitesparse-wrapper
+ifeq ($(USE_SYSTEM_LIBBLASTRAMPOLINE), 0)
+DEP_LIBS += blastrampoline
+endif
+
+ifeq ($(USE_SYSTEM_CSL), 0)
+DEP_LIBS += csl
 endif
 
 ifeq ($(USE_SYSTEM_LIBUV), 0)
@@ -54,7 +58,7 @@ DEP_LIBS += unwind
 else ifeq ($(OS), FreeBSD)
 DEP_LIBS += unwind
 else ifeq ($(OS), Darwin)
-DEP_LIBS += osxunwind
+DEP_LIBS += llvmunwind
 endif
 endif
 endif
@@ -127,8 +131,8 @@ DEP_LIBS += mpfr
 endif
 
 ifeq ($(USE_GPL_LIBS), 1)
-ifeq ($(USE_SYSTEM_SUITESPARSE), 0)
-DEP_LIBS += suitesparse
+ifeq ($(USE_SYSTEM_LIBSUITESPARSE), 0)
+DEP_LIBS += libsuitesparse
 endif
 endif
 
@@ -155,16 +159,30 @@ DEP_LIBS += lapack
 endif
 endif
 
+ifeq ($(USE_SYSTEM_LIBWHICH), 0)
 ifneq ($(OS), WINNT)
 DEP_LIBS += libwhich
 endif
+endif
+
+# list all targets
+DEP_LIBS_STAGED_ALL := llvm llvm-tools clang llvmunwind unwind libuv pcre \
+	openlibm dsfmt blastrampoline openblas lapack gmp mpfr patchelf utf8proc \
+	objconv mbedtls libssh2 nghttp2 curl libgit2 libwhich zlib p7zip csl \
+	libsuitesparse
+DEP_LIBS_ALL := $(DEP_LIBS_STAGED_ALL)
 
-# unlist targets that have not been converted to use the staged-install
-DEP_LIBS_STAGED := $(filter-out suitesparse-wrapper,$(DEP_LIBS))
-ifneq ($(USE_BINARYBUILDER_LIBUNWIND),1)
-DEP_LIBS_STAGED := $(filter-out osxunwind,$(DEP_LIBS_STAGED))
+ifneq ($(USE_BINARYBUILDER_OPENBLAS),0)
+DEP_LIBS_ALL := $(filter-out lapack,$(DEP_LIBS_ALL))
 endif
 
+ifeq ($(USE_BINARYBUILDER_LLVM),0)
+DEP_LIBS_ALL := $(filter-out clang llvm-tools,$(DEP_LIBS_ALL))
+endif
+
+ifeq ($(USE_BINARYBUILDER_LIBSUITESPARSE),0)
+DEP_LIBS_STAGED := $(filter-out libsuitesparse,$(DEP_LIBS_STAGED))
+endif
 
 ## Common build target prefixes
 
@@ -176,21 +194,27 @@ compile: $(addprefix compile-, $(DEP_LIBS))
 check: $(addprefix check-, $(DEP_LIBS))
 fastcheck: $(addprefix fastcheck-, $(DEP_LIBS))
 stage: $(addprefix stage-, $(DEP_LIBS_STAGED))
-install: $(addprefix install-, $(DEP_LIBS))
-cleanall: $(addprefix clean-, $(DEP_LIBS))
-distcleanall: $(addprefix distclean-, $(DEP_LIBS))
+install: version-check $(addprefix install-, $(DEP_LIBS))
+version-check: $(addprefix version-check-, $(DEP_LIBS_STAGED))
+
+uninstall: $(addprefix uninstall-, $(DEP_LIBS_STAGED_ALL))
+cleanall: $(addprefix clean-, $(DEP_LIBS_ALL))
+distcleanall: $(addprefix distclean-, $(DEP_LIBS_ALL))
 	rm -rf $(build_prefix)
-getall: get-llvm get-libuv get-pcre get-openlibm get-dsfmt get-openblas get-lapack get-suitesparse get-unwind get-osxunwind get-gmp get-mpfr get-patchelf get-utf8proc get-objconv get-mbedtls get-libssh2 get-nghttp2 get-curl get-libgit2 get-libwhich
+getall: $(addprefix get-, $(DEP_LIBS_ALL))
 
+include $(SRCDIR)/csl.mk
 include $(SRCDIR)/llvm.mk
 include $(SRCDIR)/libuv.mk
 include $(SRCDIR)/pcre.mk
 include $(SRCDIR)/openlibm.mk
 include $(SRCDIR)/dsfmt.mk
 include $(SRCDIR)/objconv.mk
-include $(SRCDIR)/blas.mk
+include $(SRCDIR)/blastrampoline.mk
+include $(SRCDIR)/openblas.mk
 include $(SRCDIR)/utf8proc.mk
-include $(SRCDIR)/suitesparse.mk
+include $(SRCDIR)/libsuitesparse.mk
+include $(SRCDIR)/zlib.mk
 include $(SRCDIR)/unwind.mk
 include $(SRCDIR)/gmp.mk
 include $(SRCDIR)/mpfr.mk
@@ -201,7 +225,6 @@ include $(SRCDIR)/nghttp2.mk
 include $(SRCDIR)/curl.mk
 include $(SRCDIR)/libgit2.mk
 include $(SRCDIR)/libwhich.mk
-include $(SRCDIR)/zlib.mk
 include $(SRCDIR)/p7zip.mk
 
 include $(SRCDIR)/tools/uninstallers.mk
diff --git a/deps/NATIVE.cmake b/deps/NATIVE.cmake
deleted file mode 100644
index 026ee4de669340..00000000000000
--- a/deps/NATIVE.cmake
+++ /dev/null
@@ -1,4 +0,0 @@
-# native toolchain file to fix llvm cross-compilation finickiness
-# ref http://lists.llvm.org/pipermail/llvm-dev/2016-February/095366.html
-set(CMAKE_C_COMPILER cc)
-set(CMAKE_CXX_COMPILER c++)
diff --git a/deps/SuiteSparse_wrapper.c b/deps/SuiteSparse_wrapper.c
deleted file mode 100644
index fc8b612d7671bb..00000000000000
--- a/deps/SuiteSparse_wrapper.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
-  SuiteSparse_wrapper.c: Changes made to this file in the Julia repo
-  in deps/SuiteSparse_wrapper.c should be also made in
-  Yggdrasil/S/SuiteSparse and vice versa.
-*/
-
-#include <string.h>
-#include <cholmod.h>
-
-extern size_t jl_cholmod_common_size(void) {
-    return sizeof(cholmod_common);
-}
-
-extern size_t jl_cholmod_sizeof_long(void) {
-    return sizeof(SuiteSparse_long);
-}
-
-extern int jl_cholmod_version(int *ver) {
-    if (ver != (int*) NULL) {
-        ver[0] = CHOLMOD_MAIN_VERSION;
-        ver[1] = CHOLMOD_SUB_VERSION;
-        ver[2] = CHOLMOD_SUBSUB_VERSION;
-    }
-    return CHOLMOD_VERSION;
-}
-
-extern void jl_cholmod_common_offsets(size_t *vv) {
-    vv[0] = offsetof(cholmod_common, dbound);
-    vv[1] = offsetof(cholmod_common, maxrank);
-    vv[2] = offsetof(cholmod_common, supernodal_switch);
-    vv[3] = offsetof(cholmod_common, supernodal);
-    vv[4] = offsetof(cholmod_common, final_asis);
-    vv[5] = offsetof(cholmod_common, final_super);
-    vv[6] = offsetof(cholmod_common, final_ll);
-    vv[7] = offsetof(cholmod_common, final_pack);
-    vv[8] = offsetof(cholmod_common, final_monotonic);
-    vv[9] = offsetof(cholmod_common, final_resymbol);
-    vv[10] = offsetof(cholmod_common, prefer_zomplex);
-    vv[11] = offsetof(cholmod_common, prefer_upper);
-    vv[12] = offsetof(cholmod_common, print);
-    vv[13] = offsetof(cholmod_common, precise);
-    vv[14] = offsetof(cholmod_common, nmethods);
-    vv[15] = offsetof(cholmod_common, selected);
-    vv[16] = offsetof(cholmod_common, postorder);
-    vv[17] = offsetof(cholmod_common, itype);
-    vv[18] = offsetof(cholmod_common, dtype);
-}
diff --git a/deps/Versions.make b/deps/Versions.make
index 6ed63798bccd26..77d568ee7c6b51 100644
--- a/deps/Versions.make
+++ b/deps/Versions.make
@@ -1,45 +1,117 @@
-LLVM_VER = 9.0.1
-LLVM_BB_REL = 8
-PCRE_VER = 10.31
-PCRE_BB_REL = 0
-DSFMT_VER = 2.2.3
-DSFMT_BB_REL = 0
-OPENBLAS_VER = 0.3.10
-OPENBLAS_BB_REL = 0
-LAPACK_VER = 3.9.0
-SUITESPARSE_VER = 5.4.0
-SUITESPARSE_BB_REL = 6
-OPENLIBM_VER = 0.7.0
-OPENLIBM_BB_REL = 0
-UNWIND_VER = 1.3.1
-UNWIND_BB_REL = 4
-OSXUNWIND_VER = 0.0.6
-OSXUNWIND_BB_REL = 0
-GMP_VER = 6.2.0
-GMP_BB_REL = 1
-MPFR_VER = 4.1.0
-MPFR_BB_REL = 1
-PATCHELF_VER = 0.9
-MBEDTLS_VER = 2.16.8
-MBEDTLS_BB_REL = 0
-LIBSSH2_VER = 1.9.0
-LIBSSH2_BB_REL = 1
-CURL_VER = 7.71.1
-CURL_BB_REL = 0
-NGHTTP2_VER = 1.40.0
-NGHTTP2_BB_REL = 2
-LIBGIT2_VER = 1.0.1
-LIBGIT2_BB_REL = 0
-LIBUV_VER = 1.29.1
-LIBUV_BB_REL = 9
-OBJCONV_VER = 2.49.0
-OBJCONV_BB_REL = 0
-ZLIB_VER = 1.2.11
-ZLIB_BB_REL = 10
-P7ZIP_VER = 16.2.0
-P7ZIP_BB_REL = 1
+## Dependencies and where to find them, listed in alphabetical order
+
+# To define a new dependency, you need to know the following pieces of information:
+#
+#  * The Makefile variable stem; for LibCURL this is just "CURL".
+#  * The JLL name; for GMP this is "GMP", while for LLVM it could be "LLVM_full" or "LLVM_full_assert"
+#  * The upstream source version; for dSFMT this is currently "2.2.3"
+#
+# Everything else will be auto-generated.  In particular, the version listed here
+# represents the upstream source version; the JLL binary version that gets downloaded is
+# controlled by the `Project.toml` files in `stdlib/XXX_jll/`.
+
+# Compiler Support Libraries
+CSL_JLL_NAME := CompilerSupportLibraries
+
+# Clang (paired with LLVM, only here as a JLL download)
+CLANG_JLL_NAME := Clang
+CLANG_JLL_VER  := 13.0.1+0
+
+# DSFMT
+DSFMT_VER := 2.2.4
+DSFMT_JLL_NAME := dSFMT
+
+# GMP
+GMP_VER := 6.2.1
+GMP_JLL_NAME := GMP
+
+# LibCURL
+CURL_VER := 7.81.0
+CURL_JLL_NAME := LibCURL
+
+# LAPACK, source-only
+LAPACK_VER := 3.9.0
+
+# LibGit2
+LIBGIT2_JLL_NAME := LibGit2
+
+# LibSSH2
+LIBSSH2_VER := 1.10.2
+LIBSSH2_JLL_NAME := LibSSH2
+
+# LibUV
+LIBUV_VER := 2
+LIBUV_JLL_NAME := LibUV
+
+# LLVM
+LLVM_VER := 13.0.1
+LLVM_ASSERT_JLL_VER := 13.0.1+0
+LLVM_JLL_NAME := libLLVM
+
+# LLVM_tools (downloads LLVM_jll to get things like `lit` and `opt`)
+LLVM_TOOLS_JLL_NAME := LLVM
+LLVM_TOOLS_JLL_VER := 13.0.1+0
+LLVM_TOOLS_ASSERT_JLL_VER := 13.0.1+0
+
+# LLVM libunwind
+LLVMUNWIND_VER := 12.0.1
+LLVMUNWIND_JLL_NAME := LLVMLibUnwind
+
+# MbedTLS
+MBEDTLS_VER := 2.28.0
+MBEDTLS_JLL_NAME := MbedTLS
+
+# MPFR
+MPFR_VER := 4.1.0
+MPFR_JLL_NAME := MPFR
+
+# nghttp2
+NGHTTP2_VER := 1.41.0
+NGHTTP2_JLL_NAME := nghttp2
+
+# Objconv (we don't ship this, so no need for a fake JLL; therefore we specify the JLL_VER here)
+OBJCONV_VER := 2.49.1
+OBJCONV_JLL_NAME := Objconv
+OBJCONV_JLL_VER  := 2.49.1+0
+
+# blastrampoline
+BLASTRAMPOLINE_VER := 5.1.0
+BLASTRAMPOLINE_JLL_NAME := libblastrampoline
+
+# OpenBLAS
+OPENBLAS_VER := 0.3.17
+OPENBLAS_JLL_NAME := OpenBLAS
+
+# OpenLibm
+OPENLIBM_VER := 0.8.1
+OPENLIBM_JLL_NAME := OpenLibm
+
+# Patchelf (we don't ship this or even use a JLL, we just always build it)
+PATCHELF_VER := 0.13
+
+# p7zip
+P7ZIP_VER := 16.2.0
+P7ZIP_JLL_NAME := p7zip
+
+# PCRE
+PCRE_VER := 10.36
+PCRE_JLL_NAME := PCRE2
+
+# SuiteSparse
+LIBSUITESPARSE_VER := 5.10.1
+LIBSUITESPARSE_JLL_NAME := SuiteSparse
+
+# unwind
+UNWIND_VER := 1.5.0
+UNWIND_VER_TAG := 1.5
+UNWIND_JLL_NAME := LibUnwind
+UNWIND_JLL_VER  := 1.5.0+1
+
+# zlib
+ZLIB_VER := 1.2.11
+ZLIB_JLL_NAME := Zlib
 
 # Specify the version of the Mozilla CA Certificate Store to obtain.
 # The versions of cacert.pem are identified by the date (YYYY-MM-DD) of their changes.
 # See https://curl.haxx.se/docs/caextract.html for more details.
-MOZILLA_CACERT_VERSION := 2020-07-22
+MOZILLA_CACERT_VERSION := 2022-02-01
diff --git a/deps/blas.mk b/deps/blas.mk
deleted file mode 100644
index 68271f32e1cf3b..00000000000000
--- a/deps/blas.mk
+++ /dev/null
@@ -1,225 +0,0 @@
-## OpenBLAS ##
-# LAPACK is built into OpenBLAS by default
-OPENBLAS_GIT_URL := git://github.com/xianyi/OpenBLAS.git
-OPENBLAS_TAR_URL = https://api.github.com/repos/xianyi/OpenBLAS/tarball/$1
-$(eval $(call git-external,openblas,OPENBLAS,,,$(BUILDDIR)))
-
-OPENBLAS_BUILD_OPTS := CC="$(CC)" FC="$(FC)" LD="$(LD)" RANLIB="$(RANLIB)" TARGET=$(OPENBLAS_TARGET_ARCH) BINARY=$(BINARY)
-
-# Thread support
-ifeq ($(OPENBLAS_USE_THREAD), 1)
-OPENBLAS_BUILD_OPTS += USE_THREAD=1
-OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=50
-# Maximum number of threads for parallelism
-ifneq ($(ARCH),x86_64)
-# Assume we can't address much memory to spawn many threads
-# It is also unlikely that 32-bit architectures have too many cores
-OPENBLAS_BUILD_OPTS += NUM_THREADS=8
-else ifeq ($(OS),WINNT)
-# Windows seems unable to handle very many
-OPENBLAS_BUILD_OPTS += NUM_THREADS=16
-else ifeq ($(OS),Darwin)
-# This should suffice for the largest macs
-OPENBLAS_BUILD_OPTS += NUM_THREADS=16
-else
-# On linux, try to provision for the largest possible machine currently
-OPENBLAS_BUILD_OPTS += NUM_THREADS=16
-endif
-else
-OPENBLAS_BUILD_OPTS += USE_THREAD=0
-endif
-
-# don't touch scheduler affinity since we manage this ourselves
-OPENBLAS_BUILD_OPTS += NO_AFFINITY=1
-
-# Build for all architectures - required for distribution
-ifeq ($(OPENBLAS_DYNAMIC_ARCH), 1)
-OPENBLAS_BUILD_OPTS += DYNAMIC_ARCH=1
-endif
-
-# 64-bit BLAS interface
-ifeq ($(USE_BLAS64), 1)
-OPENBLAS_BUILD_OPTS += INTERFACE64=1 SYMBOLSUFFIX="$(OPENBLAS_SYMBOLSUFFIX)" LIBPREFIX="libopenblas$(OPENBLAS_LIBNAMESUFFIX)"
-ifeq ($(OS), Darwin)
-OPENBLAS_BUILD_OPTS += OBJCONV=$(abspath $(BUILDDIR)/objconv/objconv)
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: | $(BUILDDIR)/objconv/build-compiled
-endif
-endif
-
-OPENBLAS_FFLAGS := $(JFFLAGS) $(USE_BLAS_FFLAGS)
-OPENBLAS_CFLAGS := -O2
-
-# Decide whether to build for 32-bit or 64-bit arch
-ifneq ($(BUILD_OS),$(OS))
-OPENBLAS_BUILD_OPTS += OSNAME=$(OS) CROSS=1 HOSTCC=$(HOSTCC) CROSS_SUFFIX=$(CROSS_COMPILE)
-endif
-ifeq ($(OS),WINNT)
-ifneq ($(ARCH),x86_64)
-ifneq ($(USECLANG),1)
-OPENBLAS_CFLAGS += -mincoming-stack-boundary=2
-endif
-OPENBLAS_FFLAGS += -mincoming-stack-boundary=2
-endif
-endif
-
-# Work around invalid register errors on 64-bit Windows
-# See discussion in https://github.com/xianyi/OpenBLAS/issues/1708
-# TODO: Remove this once we use a version of OpenBLAS where this is set automatically
-ifeq ($(OS),WINNT)
-ifeq ($(ARCH),x86_64)
-OPENBLAS_CFLAGS += -fno-asynchronous-unwind-tables
-endif
-endif
-
-OPENBLAS_BUILD_OPTS += CFLAGS="$(CFLAGS) $(OPENBLAS_CFLAGS)"
-OPENBLAS_BUILD_OPTS += FFLAGS="$(FFLAGS) $(OPENBLAS_FFLAGS)"
-OPENBLAS_BUILD_OPTS += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN)"
-
-# Debug OpenBLAS
-ifeq ($(OPENBLAS_DEBUG), 1)
-OPENBLAS_BUILD_OPTS += DEBUG=1
-endif
-
-# Allow disabling AVX for older binutils
-ifeq ($(OPENBLAS_NO_AVX), 1)
-OPENBLAS_BUILD_OPTS += NO_AVX=1 NO_AVX2=1 NO_AVX512=1
-else ifeq ($(OPENBLAS_NO_AVX2), 1)
-OPENBLAS_BUILD_OPTS += NO_AVX2=1 NO_AVX512=1
-else ifeq ($(OPENBLAS_NO_AVX512), 1)
-OPENBLAS_BUILD_OPTS += NO_AVX512=1
-endif
-
-# Do not overwrite the "-j" flag
-OPENBLAS_BUILD_OPTS += MAKE_NB_JOBS=0
-
-ifneq ($(USE_BINARYBUILDER_OPENBLAS), 1)
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/source-extracted
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-winexit.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied
-	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
-		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
-	echo 1 > $@
-
-$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
-	echo $(MAKE) -C $(dir $<) $(OPENBLAS_BUILD_OPTS) # echo first, so we only print the error message below in a failure case
-	@$(MAKE) -C $(dir $<) $(OPENBLAS_BUILD_OPTS) || (echo $(WARNCOLOR)"*** Clean the OpenBLAS build with 'make -C deps clean-openblas'. Rebuild with 'make OPENBLAS_USE_THREAD=0' if OpenBLAS had trouble linking libpthread.so, and with 'make OPENBLAS_TARGET_ARCH=NEHALEM' if there were errors building SandyBridge support. Both these options can also be used simultaneously. ***"$(ENDCOLOR) && false)
-	echo 1 > $@
-
-define OPENBLAS_INSTALL
-	$(call SHLIBFILE_INSTALL,$1,$2,$3)
-ifeq ($$(OS), Linux)
-	ln -sf libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT) $2/$$(build_libdir)/libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT).0
-endif
-endef
-$(eval $(call staged-install, \
-	openblas,$(OPENBLAS_SRC_DIR), \
-	OPENBLAS_INSTALL,$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/$(LIBBLASNAME).$(SHLIB_EXT),, \
-	$$(INSTALL_NAME_CMD)libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT) $$(build_shlibdir)/libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT)))
-
-clean-openblas:
-	-rm $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled
-	-$(MAKE) -C $(BUILDDIR)/$(OPENBLAS_SRC_DIR) clean
-
-
-get-openblas: $(OPENBLAS_SRC_FILE)
-extract-openblas: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/source-extracted
-configure-openblas: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
-compile-openblas: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled
-fastcheck-openblas: check-openblas
-check-openblas: compile-openblas
-
-
-## Mac gfortran BLAS wrapper ##
-ifeq ($(OS),Darwin)
-$(BUILDDIR)/libgfortblas.$(SHLIB_EXT): $(SRCDIR)/gfortblas.c $(SRCDIR)/gfortblas.alias
-	$(CC) -Wall -O3 $(CPPFLAGS) $(CFLAGS) $(fPIC) -shared $< -o $@ -pipe \
-				-Wl,-reexport_framework,Accelerate -Wl,-alias_list,$(SRCDIR)/gfortblas.alias
-
-$(build_shlibdir)/libgfortblas.$(SHLIB_EXT): $(BUILDDIR)/libgfortblas.$(SHLIB_EXT)
-	cp -f $< $@
-	$(INSTALL_NAME_CMD)libgfortblas.$(SHLIB_EXT) $@
-endif
-
-
-## LAPACK ##
-
-LAPACK_MFLAGS := NOOPT="$(FFLAGS) $(JFFLAGS) $(USE_BLAS_FFLAGS) -O0" \
-    OPTS="$(FFLAGS) $(JFFLAGS) $(USE_BLAS_FFLAGS)" FORTRAN="$(FC)" \
-    LOADER="$(FC)" BLASLIB="$(RPATH_ESCAPED_ORIGIN) $(LIBBLAS)"
-
-$(SRCCACHE)/lapack-$(LAPACK_VER).tgz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ http://www.netlib.org/lapack/$(notdir $@)
-
-$(BUILDDIR)/lapack-$(LAPACK_VER)/source-extracted: $(SRCCACHE)/lapack-$(LAPACK_VER).tgz
-	$(JLCHECKSUM) $<
-	mkdir -p $(BUILDDIR)
-	cd $(BUILDDIR) && $(TAR) -zxf $<
-	cp $(dir $@)INSTALL/make.inc.gfortran $(dir $@)make.inc
-	echo 1 > $@
-
-ifeq ($(USE_SYSTEM_BLAS), 0)
-$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0: | $(build_prefix)/manifest/openblas
-else ifeq ($(OS),Darwin)
-$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0: | $(build_shlibdir)/libgfortblas.$(SHLIB_EXT)
-endif
-$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0: $(BUILDDIR)/lapack-$(LAPACK_VER)/source-extracted
-	$(MAKE) -C $(dir $@) lapacklib $(LAPACK_MFLAGS)
-	echo 1 > $@
-
-$(BUILDDIR)/lapack-$(LAPACK_VER)/build-checked: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0
-ifeq ($(BUILD_OS),$(OS))
-	$(MAKE) -C $(dir $@) lapack_testing $(LAPACK_MFLAGS) -k
-endif
-	echo 1 > $@
-
-$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0 | $(build_prefix)/manifest
-	$(FC) -shared $(FFLAGS) $(JFFLAGS) $(dir $<)/SRC/*.o \
-		$(dir $<)/INSTALL/dlamch.o $(dir $<)/INSTALL/dsecnd_INT_ETIME.o \
-		$(dir $<)/INSTALL/ilaver.o $(dir $<)/INSTALL/slamch.o $(LIBBLAS) \
-		-o $(dir $<)/liblapack.$(SHLIB_EXT)
-	echo 1 > $@
-
-$(eval $(call staged-install, \
-	lapack,lapack-$(LAPACK_VER), \
-	SHLIBFILE_INSTALL,$(BUILDDIR)/lapack-$(LAPACK_VER)/liblapack.$(SHLIB_EXT),, \
-	$$(INSTALL_NAME_CMD)liblapack.$$(SHLIB_EXT) $$(build_shlibdir)/liblapack.$$(SHLIB_EXT)))
-
-clean-lapack:
-	-rm $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0 $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled
-	-$(MAKE) -C $(BUILDDIR)/lapack-$(LAPACK_VER) clean
-
-distclean-lapack:
-	-rm -rf $(SRCCACHE)/lapack-$(LAPACK_VER).tgz $(BUILDDIR)/lapack-$(LAPACK_VER)
-
-
-get-lapack: $(SRCCACHE)/lapack-$(LAPACK_VER).tgz
-extract-lapack: $(BUILDDIR)/lapack-$(LAPACK_VER)/source-extracted
-configure-lapack: extract-lapack
-compile-lapack: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled
-fastcheck-lapack: check-lapack
-check-lapack: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-checked
-
-else # USE_BINARYBUILDER_OPENBLAS
-
-
-OPENBLAS_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/OpenBLAS_jll.jl/releases/download/OpenBLAS-v$(OPENBLAS_VER)+$(OPENBLAS_BB_REL)
-OPENBLAS_BB_NAME := OpenBLAS.v$(OPENBLAS_VER)
-
-$(eval $(call bb-install,openblas,OPENBLAS,true))
-get-lapack: get-openblas
-extract-lapack: extract-openblas
-configure-lapack: configure-openblas
-compile-lapack: compile-openblas
-fastcheck-lapack: fastcheck-openblas
-check-lapack: check-openblas
-clean-lapack: clean-openblas
-distclean-lapack: distclean-openblas
-install-lapack: install-openblas
-endif
diff --git a/deps/blastrampoline.mk b/deps/blastrampoline.mk
new file mode 100644
index 00000000000000..a29b9b19e0eaa1
--- /dev/null
+++ b/deps/blastrampoline.mk
@@ -0,0 +1,37 @@
+## blastrampoline ##
+
+ifneq ($(USE_BINARYBUILDER_BLASTRAMPOLINE),1)
+
+BLASTRAMPOLINE_GIT_URL := https://github.com/JuliaLinearAlgebra/libblastrampoline.git
+BLASTRAMPOLINE_TAR_URL = https://api.github.com/repos/JuliaLinearAlgebra/libblastrampoline/tarball/$1
+$(eval $(call git-external,blastrampoline,BLASTRAMPOLINE,,,$(BUILDDIR)))
+
+$(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/source-extracted
+	mkdir -p $(dir $@)
+	echo 1 > $@
+
+$(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-configured
+	cd $(dir $@)/src && $(MAKE) $(MAKE_COMMON)
+	echo 1 > $@
+
+define BLASTRAMPOLINE_INSTALL
+	$(MAKE) -C $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/src $(MAKE_COMMON) install
+endef
+$(eval $(call staged-install, \
+	blastrampoline,$(BLASTRAMPOLINE_SRC_DIR), \
+	BLASTRAMPOLINE_INSTALL,, \
+	$$(BLASTRAMPOLINE_OBJ_TARGET), \
+	$$(INSTALL_NAME_CMD)libblastrampoline.$$(SHLIB_EXT) $$(build_shlibdir)/libblastrampoline.$$(SHLIB_EXT)))
+
+get-blastrampoline: $(BLASTRAMPOLINE_SRC_FILE)
+extract-blastrampoline: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/source-extracted
+configure-blastrampoline: extract-blastrampoline
+compile-blastrampoline: $(BUILDDIR)/$(BLASTRAMPOLINE_SRC_DIR)/build-compiled
+fastcheck-blastrampoline: check-blastrampoline
+check-blastrampoline: compile-blastrampoline
+
+else
+
+$(eval $(call bb-install,blastrampoline,BLASTRAMPOLINE,false))
+
+endif # USE_BINARYBUILDER_BLASTRAMPOLINE
diff --git a/deps/blastrampoline.version b/deps/blastrampoline.version
new file mode 100644
index 00000000000000..23074f70854dc9
--- /dev/null
+++ b/deps/blastrampoline.version
@@ -0,0 +1,2 @@
+BLASTRAMPOLINE_BRANCH=v5.0.1
+BLASTRAMPOLINE_SHA1=d32042273719672c6669f6442a0be5605d434b70
diff --git a/deps/checksums/7z1900-x64.exe/md5 b/deps/checksums/7z1900-x64.exe/md5
deleted file mode 100644
index 588587addaa62a..00000000000000
--- a/deps/checksums/7z1900-x64.exe/md5
+++ /dev/null
@@ -1 +0,0 @@
-d7b20f933be6cdae41efbe75548eba5f
diff --git a/deps/checksums/7z1900-x64.exe/sha512 b/deps/checksums/7z1900-x64.exe/sha512
deleted file mode 100644
index c9555b530aa4b4..00000000000000
--- a/deps/checksums/7z1900-x64.exe/sha512
+++ /dev/null
@@ -1 +0,0 @@
-af8f38679e16c996ffac152cac49369cf4b609abbd2cad07f49a114a82c6b5e564be29630c0fd2418110cf1a3d0ef3c9cc12f9164a69a575c91d9b98ce0df1a9
diff --git a/deps/checksums/7z1900.exe/md5 b/deps/checksums/7z1900.exe/md5
deleted file mode 100755
index 3ce2f9785b1113..00000000000000
--- a/deps/checksums/7z1900.exe/md5
+++ /dev/null
@@ -1 +0,0 @@
-fabe184f6721e640474e1497c69ffc98
diff --git a/deps/checksums/7z1900.exe/sha512 b/deps/checksums/7z1900.exe/sha512
deleted file mode 100755
index aff2a9e437ba1f..00000000000000
--- a/deps/checksums/7z1900.exe/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2924fd60f5dd636f643b68d402b65c2bfab5536122aa688ebba5ae142c7d04ce8b1c8e078f54db8adadce9d5c6fa74c0794604ecc16a4c5489f9ca70a6d9e1c4
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
new file mode 100644
index 00000000000000..915ee5c4bb6bfb
--- /dev/null
+++ b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/md5
@@ -0,0 +1 @@
+22c097ca7784442f1f10733db7961cc3
diff --git a/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512 b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
new file mode 100644
index 00000000000000..b824dbcb73a08f
--- /dev/null
+++ b/deps/checksums/ArgTools-08b11b2707593d4d7f92e5f1b9dba7668285ff82.tar.gz/sha512
@@ -0,0 +1 @@
+915791ab9837f09db428060bd128e182dda38c8dc10e13f32f059eb8e8b477548e8ae2cd691522f98c88c510b78b2693018264b62d9cc76d5005ea8104d1539a
diff --git a/deps/checksums/Downloads-1a1d2e0a10209512f5b29e585bfd78e7a47f8f61.tar.gz/md5 b/deps/checksums/Downloads-1a1d2e0a10209512f5b29e585bfd78e7a47f8f61.tar.gz/md5
deleted file mode 100644
index f908a2236edb13..00000000000000
--- a/deps/checksums/Downloads-1a1d2e0a10209512f5b29e585bfd78e7a47f8f61.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-24a8b8fc2398d20c24a13ce73482a3d7
diff --git a/deps/checksums/Downloads-1a1d2e0a10209512f5b29e585bfd78e7a47f8f61.tar.gz/sha512 b/deps/checksums/Downloads-1a1d2e0a10209512f5b29e585bfd78e7a47f8f61.tar.gz/sha512
deleted file mode 100644
index 810280cef043b2..00000000000000
--- a/deps/checksums/Downloads-1a1d2e0a10209512f5b29e585bfd78e7a47f8f61.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4b652be535dce6a5cf36e546a31d3221f4c2534d1a3ac710f31f9ed2dfbeabd21b36c0186e4abe12cfee992ff557317aefe2bdeb9cb8b1c44da085b030719329
diff --git a/deps/checksums/Downloads-9f738d30e1256a4c122dff9f38536cfc1feeca8e.tar.gz/md5 b/deps/checksums/Downloads-9f738d30e1256a4c122dff9f38536cfc1feeca8e.tar.gz/md5
new file mode 100644
index 00000000000000..dcfba06cb5a0c5
--- /dev/null
+++ b/deps/checksums/Downloads-9f738d30e1256a4c122dff9f38536cfc1feeca8e.tar.gz/md5
@@ -0,0 +1 @@
+33c94b6f17cb9468a96eea927126c642
diff --git a/deps/checksums/Downloads-9f738d30e1256a4c122dff9f38536cfc1feeca8e.tar.gz/sha512 b/deps/checksums/Downloads-9f738d30e1256a4c122dff9f38536cfc1feeca8e.tar.gz/sha512
new file mode 100644
index 00000000000000..94e3f10d4f7d11
--- /dev/null
+++ b/deps/checksums/Downloads-9f738d30e1256a4c122dff9f38536cfc1feeca8e.tar.gz/sha512
@@ -0,0 +1 @@
+f7d94582df5afbc4ed68eb4ba47403971b1f173420b76d9f0e538cfc1190c71e2657bff892900c2bc565eacc54815c75a31c441f254dfb805ed10cc9b78402eb
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index 08beaa9196c017..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fe868923f93979e476119550525c2911
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index a86842597ff6c4..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c3a6cf6404599c5ebc4ba2528003d2c70f5cecf8b6e690b9632f86cd6da607cd770db0925ff0d798d06f2a7cf9e32b5bcbc449adc565a25b60d6c931d9d08f4b
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index 9aa9da630dc82a..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-609b33d8fff60108196e49f2a3de2735
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index 52d57f2c1d0b2c..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-348d76a0f41b426f8eb82f506cab389d767aa84654f4a075dc28acae2c3b7e359655697ce2b9df16129c91f70ea87eb874195595b02baed60f0bd32e1d4e44a0
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx03.tar.gz/md5
deleted file mode 100644
index e5cdea3bae69e4..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2e6bcf2a15e27f6e6a7ad61e99fa23c9
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx03.tar.gz/sha512
deleted file mode 100644
index 5100ea9cf46ef0..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e47b4b183e09fd6045e8336878ef2922d48de7b6c0241a77e858ed477adf0d07e9aea840fa41ed19d61754c4a3e78715a343f6f25b14eb1b2ada23e3b05846f2
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx11.tar.gz/md5
deleted file mode 100644
index 6642dc8671a3ff..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-635000ec5425c8e2b83f3d3b2b51e140
diff --git a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx11.tar.gz/sha512
deleted file mode 100644
index 4c787c37361686..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.aarch64-linux-musl-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d103a6bf9e653446d3ce0e68f8b255d4a8c0a22719314b2a3a311452beba557bfc91d51ad44b3679e0dfa24d4cde7dec28441cb08a748b4394cae0cb0a2d6241
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5
deleted file mode 100644
index 84ca78a0abceab..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2ba5a3a0488fabf3fa383b0eff8fc070
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512
deleted file mode 100644
index b1726553aa09d7..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4e2ab35a4432d0bfa4a484262540e2d78011ed1facff4aaf8d3af8e602809031291df6eb2912a1835d25b16b4777b5ff80ad943ba186766c8e038f30ac0e7c23
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5
deleted file mode 100644
index 1ce40c32b1a45b..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-22c11db0b11734919bd29d6f0a188c3c
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512
deleted file mode 100644
index b3bd6246fafd47..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2eaf9572741c3537d89ef51e38fc1898d051dd60e30fb27a4c2b91700872a079ef0c432d6e781caa678b57a7bedb57e1f8efbfcc10291dd1bd4154ad0f8f81fb
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx03.tar.gz/md5
deleted file mode 100644
index 5a289cc527c722..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3cd478ea53f584167425ef22c54a3932
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512
deleted file mode 100644
index 52de44cc4e2579..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-21cd917d2de42abc06d81bf0a80714b1561156d1397d069d0f932366d159ed6a716746974d3c204ab8fa06d7ed4747c34e5c7e94dbd6efa2b5186ed3457632e2
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx11.tar.gz/md5
deleted file mode 100644
index 7b1f4aff5c6c98..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7c0b8c0b0625e8a4b7841ffb31e6623a
diff --git a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512
deleted file mode 100644
index 5a238c48c84ef1..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5fc976fc29a3b91f11d44f09f8620d815b2c2ab86400ad2c41b2ce88ca24e357e6c30b15df22aef47187184e4b5fa9133c0c5baacf164309c8efa3cda458be81
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index 0652c454b03c84..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2ab6efd0360e0c61b57f942469fc42c7
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index f1605abf6bc2b5..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-83d029ce5b5348490929eb93f6c41b2ce5e6e42eb124e3b96f5d45cce2688cb500fc031f1225c9eabbe966a53177b908efcbddd1be4a4b14ed06532e02578250
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index d387609c2554cb..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-57d1b9c8d468b79bfb606a927018a8e6
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index b17c8f940ae363..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-966aad06c5168c70e06c511a71117f03bcf12e041262ce1f4a26cd4c3db2dd24543db36733e1ccb2c87b8f45b1c7ef9bbeb1912215a70b4a6bdcfa73cab61a8c
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx03.tar.gz/md5
deleted file mode 100644
index 8fa9b87734c1b7..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d90a9f2036bc78eeca2a3ccfed237c86
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx03.tar.gz/sha512
deleted file mode 100644
index f153d03acb8a1b..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f8e0f9313e76b14c3c8dbf1012780da7ebf16a07c356e63a0182d92360e53dd27480f596c7f1b70f55c55df51650998a84d7ad7d0a42eb85ea72850a5f099e90
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx11.tar.gz/md5
deleted file mode 100644
index fa6729739aded1..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-362af1ef3ff4b58eb39a53eaf8c76b66
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx11.tar.gz/sha512
deleted file mode 100644
index 70dd93bb5ce4c9..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-linux-musl-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-07c87d21644e4e02b48dbac064a1a3d7849f5f8d9e94ade30a1c513072a7ce9e66e1c13fbb5651907dfb65b29bd88183621500326fdb7d0957a8ad976fb0889e
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx03.tar.gz/md5
deleted file mode 100644
index a85fdb97864d05..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-078b5fb610e8d2449003f27f6462bf6c
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx03.tar.gz/sha512
deleted file mode 100644
index 6f2b12aa339fdc..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f6876b2762ed416d0c705bf491b132552977408d2425ba0dd0a728362af7dd82086047417c71f982576a4476bebb35c571eb621c6aea381e0015c3a68b2d7e23
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx11.tar.gz/md5
deleted file mode 100644
index 81b5a734387cad..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2e23f94f52941f7ee61974cdfb83ec41
diff --git a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx11.tar.gz/sha512
deleted file mode 100644
index 5d6712b4b938be..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.i686-w64-mingw32-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4cdeb4c9dc30ed9f8121d9b675c51cd5efe427e49f27952e51db166dc94ee18384a2ba906ac34eb72172edb063d001bffafbb7f98336d68b408dccbf8bc7ddcc
diff --git a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index 7c80b5547518aa..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-39c5f1fd25e156a9cd0684ecfe3ce12a
diff --git a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index 0467df21b32d37..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8a74bf5a694279b965e4963f365570127706ea605583319f5418081a8e75c505ce4524ac087b04cab3d02cb931a2ffb308e166e192d8ca69937735c369f68a48
diff --git a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index a2203147c2b233..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5ac1a267e8162ad1bf5ca31043bbb805
diff --git a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index 6c71eb27fd5eed..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e2f526d5d150b8672877fcb5d7347c1293213fd79bdbadb49012819af1e51969677b9abd360f6dd9c5f6a3431b8c18ff7a2cfec009c3d05882bdab716e366106
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx03.tar.gz/md5
deleted file mode 100644
index 9ec13bbec8cbc6..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9e1e0397b545d1842bad8b61a5209223
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx03.tar.gz/sha512
deleted file mode 100644
index ff182bd691e600..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-90ddf97e3c7f879ddbf25523ad981621a5b7d46e7f087f4c3cafb79590afbb6de7e181b353a01c06b8a24b12a153ef2bdae7aa1417987e6bc5c45a7e3647033d
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx11.tar.gz/md5
deleted file mode 100644
index f29a2f725b1e46..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c92f5a49f9e17e15f14c722d068c6be1
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx11.tar.gz/sha512
deleted file mode 100644
index f43ce69b0884ca..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-apple-darwin14-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5846b75741895f11818dfdbd9dfe8352ecf1d8a794ab3d2d7c2cef36a64d3e0636c3c8d1a15c30b2d344943c23b84b7781abe811ccc09e39e98e92465f56ab5d
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index 4ab844292c6043..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-332a3fa5aacd4d1d524b136887ec965a
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index ef9bd59be46141..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f62fa0dfd541bd02d545547e3962e437607272bb8e0a2064357e0d9136957f1deecbe1752cbfcd033ba474c25d7fd0c39a97f761f0719d77168300348c924ec4
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index 91f8aa315640ff..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-03fadd39ad1121c5843fbea90b445667
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index 5c396065db74ae..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-98443121e137a5965d6cf3dee9a1279d8889795dfcde0129957e78ee71225a5bd8055e76ce5ee5cc3235f30e7bf716a78438ccee8c202d5da3f9f3c12a5c3e72
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx03.tar.gz/md5
deleted file mode 100644
index 8ef98b0fcabec4..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b5eb0b23fb1284b9b9629cd44ee0e00c
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx03.tar.gz/sha512
deleted file mode 100644
index ca616aff117f86..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d2a471e9d1d6d86f60a51ae9be4b00f03ba7845f3f902f5ff03338619f6a9fee6417f06b7890940a5f2defc8d745f1322ec093b1eec63bf9cb03e792f42530b8
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx11.tar.gz/md5
deleted file mode 100644
index 96f3bbf3130a46..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a61ba5da78cdfce5ea71188ee9d5237d
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx11.tar.gz/sha512
deleted file mode 100644
index e62bdce138177d..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-linux-musl-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b6bde58c41e478a856db59f296e0453c70a4ac2e15d4d67a89d57c0c9c85a3bec53134a0d54d8c5fd952556a557c9aecb57db42783ce4c44e442c3a10ba589a8
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx03.tar.gz/md5
deleted file mode 100644
index 344447f0471c7e..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8762b66893a5b4d489afdc909194bc46
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx03.tar.gz/sha512
deleted file mode 100644
index bdbdee4e7391bb..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-bca104c1943206b55e7650d9fefa00e12dc0e87402dcd045a811c85cb1d48f61a2b19f5f23988f27ff0b02d415383d0939a11c27ed6ba9c985ce17fc667c0a78
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx11.tar.gz/md5
deleted file mode 100644
index a26a87278c40c9..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-de22d235d3209ba4c2960c8df9f270c1
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx11.tar.gz/sha512
deleted file mode 100644
index 261bf236dbe3ef..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-unknown-freebsd11.1-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4dde41c1ff15e21a04e7c6e91c82f03a643afe1f79a6f74e6bd3f3d47155a2e6ca5897008d9f4439bd923aaf59326f7fbf47c45b0bf32a5b41ae6607ce0b64fb
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx03.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx03.tar.gz/md5
deleted file mode 100644
index ea008cbefa0bab..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-45da832fa92998fb9fd44ec1cd05abe9
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx03.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx03.tar.gz/sha512
deleted file mode 100644
index 302bb9d49bd486..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5e51fb5689c05272dd4a81edbcdc41e0ed603ea86213f466be2beee201f00cc3763dad54e51c0336440017cb11a307f5ffd1389e920e9a21dd679bca3eeb475f
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx11.tar.gz/md5 b/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx11.tar.gz/md5
deleted file mode 100644
index c132b4402c33ad..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-65399d37829ca8dc576786b07dabf54f
diff --git a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx11.tar.gz/sha512 b/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx11.tar.gz/sha512
deleted file mode 100644
index 88f940a1dd3fa2..00000000000000
--- a/deps/checksums/GMP.v6.2.0-1.x86_64-w64-mingw32-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-073c6b07f4f419e18dd1e315d1d2f2bec22d9954be5be2ff6fcd27e6f41054c66fabbed650528d652f72659f9d353b3cea3a50230df3f077a286c40438fa7916
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index 0216ba0d5c5758..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e8024bf7b5a2bf8d60ed833c48d64c30
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index dc0eec015603a8..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-85bee115865a9f015ad30e8c4a24092b3910c22e10e9cf0e4fa7592fc35a102d3d1b864dfd112e0c1c4a83ade9196c7480704fa0893de3dd03e9f7a96427911f
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index b3f46c5702c5df..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d3a606fe6e14db4a84dca5681b10cfeb
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index ce33af253c4daf..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4d4a0680327b475e3c6fc8f9b8bb48e89c3a4e5b3584e8681a82b0849539645e820181af4eb2317b15c424eaccd7f51a75fd62486bee3121e7f3918593f4f948
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx03.tar.gz/md5
deleted file mode 100644
index db9207676cf308..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0947966926ca83089aad09d86815b6fa
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx03.tar.gz/sha512
deleted file mode 100644
index ae668694fb0326..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-73120f8d846905394d2d9f3a29208afcc8880c8984df88f7afefd1cf14825c721656162bab90332b319a2dad82c7a9f32ce648a458cd5e815bde6d7ffda04d39
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx11.tar.gz/md5
deleted file mode 100644
index 9f8704ffd6b591..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-06469513d1fab1038cc24d2b009eeb97
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx11.tar.gz/sha512
deleted file mode 100644
index 60fba0620c86db..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.aarch64-linux-musl-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d3fa4ff76895ae15abbf6359a045b7942a0d92051b97ac30f38038cd25e9092f745fe75df07cf73015308ba72da85047096264bc26e7f7ebf2c5fe6044b2f5d5
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx03.tar.gz/md5
deleted file mode 100644
index 0a9e93e2030d5f..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7a94bfae0f955b820b860d8da494f109
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512
deleted file mode 100644
index 360cd16b743260..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9a119958026e60b269e820ad852801b977a43c82026e21bc10f0c38bd0873d8a0ca4260aa4352696103414b8f44f64f8ab89ab4dc350f3c6b3a1ebd55e3b4a1f
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx11.tar.gz/md5
deleted file mode 100644
index 0b6e2f5854b99b..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-39a99af902efcf6df48b1921fe205794
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512
deleted file mode 100644
index 74ca802197dfcc..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6181f9d053c739984e25e91015cefcc26b2bbb82ef5379cd79744c708b5ea2cd5cd4ac2bed0b35db72a49b86635c9c5057523421a1194541b5391f16c6a56f1c
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx03.tar.gz/md5
deleted file mode 100644
index 5e1a722692522b..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-61dc0291bbf721935c63ae575d1245e7
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx03.tar.gz/sha512
deleted file mode 100644
index 644457c8f1340f..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b3c80406388987d401dd924d5b294a6976108f05df00beb856784abe7e4be4b610bf1e0cc4ddee252fe992861bc1aaaf41a466f8ae0b94d9ea7354a45d6418d8
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx11.tar.gz/md5
deleted file mode 100644
index 17b6f9cdb8bd74..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5d67b7e54d0851534841a3968568eb07
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx11.tar.gz/sha512
deleted file mode 100644
index 289b2dfad602a9..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.armv7l-linux-musleabihf-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d5d6cac18ac68f2122eb2624f6b3288b5341c2f9a18fca94dcb0d87fc028c468b553c0c14ab4e139fe313c29b15b48bed601aa896e2408a144d844803730e381
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index 7a22e290eb16da..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-622df46f071bd3459c530c6dc00243e9
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index ddb1f65aebbb95..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1d873a4f344c70d84d7b31273c3e9dbcbe54edf338817ed4f5799712f4569ba3cb965d98d15f684ea1718ee3718d0e7b0acc4bb48bbcd2964fce4e1468a92ec2
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index 56b61e67d62386..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-732d2cf660dad8c2166f337275eff3dd
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index 960ecaaaab8a0e..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a2808a77f3aee834c1aed4bbcb102037bc3f5c4dbaa50b5c0aec8e1627b281cea6d6225e92a60c94c722873e8520a7c926f34fc3a11e4689dc82b2b7cc2a2b43
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx03.tar.gz/md5
deleted file mode 100644
index c220b19cf32acb..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2253fa3009616c7a30f1dd758a3d1427
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx03.tar.gz/sha512
deleted file mode 100644
index 878ac94ba79d08..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1c1b15443d809a6dd08fceb5b8cafbbfa2bcb1a105196d3ca92b43f3fcb454db1e7f31920bef67b52059f5d5ed2d7673f7c609b9dbdca7c5cbd1c1a7ded6c7b7
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx11.tar.gz/md5
deleted file mode 100644
index f5b13224ea6110..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2cc4df8d06d65b24ece512e3d5e51372
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx11.tar.gz/sha512
deleted file mode 100644
index 1f93d74e58b9a6..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-linux-musl-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e12d86b21234c840718592eb3325c97ea7194fe14ffd149249b53d90914064e0749b27359d772475d032becf74047172bc598674bf4d20bc4bd14aad60d033d0
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx03.tar.gz/md5
deleted file mode 100644
index b4cccecfcba99c..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6723ced2c9ba27ca863a2b7f20c27b54
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx03.tar.gz/sha512
deleted file mode 100644
index e6ea04f587f732..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6c9004560ffd7f1bc56d86e32fa19aec5856117409f0bdc1dc56f2902fb388df98947b58c84c35e806a3776dd33b2e7cb9c740cc6a73605caa68affe1864368b
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx11.tar.gz/md5
deleted file mode 100644
index 1784fb323add15..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-078ff5d48e7363352c1cdec44d2af350
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx11.tar.gz/sha512
deleted file mode 100644
index dfbaaf8d98bb4d..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.i686-w64-mingw32-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a1b5d27c9b7f20e74d321f405343ed91987990881d7f5eeb438fc506c3a594eb4cb5d4e694e3d221bb60b71c3a482303c56c69e30be36916d82ea6432819ee49
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index bc9c3eb978cf91..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-532e6a15b1340ea72093162222c1deca
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index 95712ce01ad97f..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2953903586486f36c867f42bddecb30d4356511955ec3d90a63697e2ae3e198db839bcb221c4fb963177e1a235091d135f7e61e8ab2dc0c08581b4c194651942
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index b3cb1f0dc1a1d0..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2d2255c86b2e38d1eb480d9681358f93
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index a946fcd6084ef7..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.powerpc64le-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f24b5e85fcf674a8dce06d0d2642035bf5104dda1650ee6483cdaccb4a8b388176ee30195fc418bc874f843a1f474392b0928c22d12eeb9ce74dcf91136e6bcd
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx03.tar.gz/md5
deleted file mode 100644
index 09c62b460401a9..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e77cb41890b4a7608fc3ef900f979a80
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx03.tar.gz/sha512
deleted file mode 100644
index 3c908cd0a3d39c..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-15d5cff7bd8933f88f161c4ae28f797ba0651ada853c3022acae20a36fc836664a474f2a22590608f041023eb72ea7ae32f81de788dd91924273dc4229128e1f
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx11.tar.gz/md5
deleted file mode 100644
index bea4fb29bcd0db..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-32977c46c5f29845758e2829de64a2c7
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx11.tar.gz/sha512
deleted file mode 100644
index c6ff99c4481e89..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-apple-darwin14-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d9cbff3772727e02f6f6dc09bda0bf89b53098b5dedcab06cae9a8f9dac5e589a11da097de27ac505e4935e966552b2dbc021683b569957bbb68dd9401b50ab6
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx03.tar.gz/md5
deleted file mode 100644
index a8558ff662de14..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-538ee8f39b2b18fdb7d0da0f9356d362
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx03.tar.gz/sha512
deleted file mode 100644
index 34bbb43d35e47a..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-07b38bb921043d7d16f9e137c888a0d91b9176e9f45f14960c694a158297548309f081ead6683cf653fd2535ef59c7fab1aef962dee9a5e4c78131ffa8013113
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx11.tar.gz/md5
deleted file mode 100644
index b49b19ec1786a4..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1e3ee9112e0bf0e8509727536d089b1c
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx11.tar.gz/sha512
deleted file mode 100644
index 7e42eae8461a20..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-gnu-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a015719599b9978ce9440d9525d991499e37ac1033259cc63e51fddce722846ba93b860e31f0eede74f18cecd28a64f5ae9aea97d6d26f07094934f0ff780ec5
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx03.tar.gz/md5
deleted file mode 100644
index 67c38db2d1084b..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e6571440b2392c3b45c13be986bc305b
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx03.tar.gz/sha512
deleted file mode 100644
index e9949b4f4f4aef..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-df7d77f8f2791639152fb0775e6bc9e2fdbc4007084c1d490afefc05c264721d4dcbb147b35bf8a1332b4d026fc8838ea7ea2f60e09cfa574e299cbc41b6d8d9
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx11.tar.gz/md5
deleted file mode 100644
index 8905c154c4c29f..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-50a36c5b0a5629b9423ee1a69ede412e
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx11.tar.gz/sha512
deleted file mode 100644
index 6507e29fe9a65d..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-linux-musl-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a4f2394286bc5bce720eccb5f3142b5b40afae4fb0bc24bacafa635e7bf37624dc65cb5d611379af60b1fc8838fe4a14bc52009b8eb45dd6897a2f72b66daa95
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx03.tar.gz/md5
deleted file mode 100644
index 84565cb2609898..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c67f938994e61ebd3798d4570b8c1e93
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx03.tar.gz/sha512
deleted file mode 100644
index 6949a372a8fe06..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c0b29c8162c3b87b04a7d1446e1fef14064bf4f93f14dbf80ffe0554972ea80a37089cca5a0306aaca87427f6638a3f131b1021df5aa1e91e7be729839d01738
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx11.tar.gz/md5
deleted file mode 100644
index e1594be1e4c1d0..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-214ca9e0c6c57e018bdd85a97911f15d
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx11.tar.gz/sha512
deleted file mode 100644
index 68eda3a1ae7573..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-unknown-freebsd11.1-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-974a2df704522d8dac788ed63116170b6b88e559a9f05d780c33b5288d915566f83a10172b501ffb320c395d1a7bae0b98c6efb381aab9deb34944d697553cb2
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx03.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx03.tar.gz/md5
deleted file mode 100644
index 74b4fad94c7c4e..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx03.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c8e42bc6c633e2647b3f994168fc54d0
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx03.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx03.tar.gz/sha512
deleted file mode 100644
index a11b718fa69522..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx03.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c3b4562355075abff7f7e4a3a8ab1ba391fa9843097634478253992bac3e041228d1d1344498051e2feb9ca8474ffa9668e8bd975417d4632aebbbf7510bfafa
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx11.tar.gz/md5 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx11.tar.gz/md5
deleted file mode 100644
index c91ea97154e6ee..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx11.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-234b60aca5887e6fdca6529ca5464aa0
diff --git a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx11.tar.gz/sha512 b/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx11.tar.gz/sha512
deleted file mode 100644
index 3858ee18908841..00000000000000
--- a/deps/checksums/LLVM_full.v9.0.1-8.x86_64-w64-mingw32-cxx11.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-762767343f63baae33ae838e78e6096ac045ef152d79f961a73c906a8ea7bfc1eab823a1109db09f2e4e90c90b7b402752fc165cd7b78d2d9516a0df4f18740e
diff --git a/deps/checksums/LibCURL-70aa2cc3f8fa4488897136a0a700b5425bfffdfa.tar.gz/md5 b/deps/checksums/LibCURL-70aa2cc3f8fa4488897136a0a700b5425bfffdfa.tar.gz/md5
deleted file mode 100644
index e869b0362ddc1a..00000000000000
--- a/deps/checksums/LibCURL-70aa2cc3f8fa4488897136a0a700b5425bfffdfa.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-829dd514bd1e0d2f9d8cc6c8d5085bba
diff --git a/deps/checksums/LibCURL-70aa2cc3f8fa4488897136a0a700b5425bfffdfa.tar.gz/sha512 b/deps/checksums/LibCURL-70aa2cc3f8fa4488897136a0a700b5425bfffdfa.tar.gz/sha512
deleted file mode 100644
index 83022f272ed48e..00000000000000
--- a/deps/checksums/LibCURL-70aa2cc3f8fa4488897136a0a700b5425bfffdfa.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-080fa389e9a924484aeb50d4bccd314da7d264613d0951f3b8dd53f28f4080abe126cb3960dcb69434a9c6d915866de01977625960117bc5e9d77928efead1cb
diff --git a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index ea179a1f21ccd4..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9429a6debbee10b7013ae6741658c2f8
diff --git a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 7a2082ae0ea815..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b301b37c0d71d13d6d08ea5888f9f62d83c3c9f0be080e5852954e418814c34aa6024c338dd074b51343de5fc4026caf4310c10fb7243da25f2de3554064fb41
diff --git a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index e216eca5c9e916..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ce8f2be6ca2d9b355888b059087c4910
diff --git a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 2c8e92aec4b615..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a672d2aa069a90e941eac69d0aac55e84be479c9f18f03884bb586e558df2a77c4272e926f1e808b8e17bbb2b1eab670d9e1a8057571b776d30fdcc69af88895
diff --git a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 8c8467db74afdd..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8679fcc7fc6ccfc9689184d8ec3f99a1
diff --git a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 0eb602b8a88302..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6257d64d749c1fa518d64f243b65290152026f7f28a3aca2218d72fddf0289e2410d4936cecd9577721bb9c4f664042caf11e5672695cd0eeb1d692517eac05b
diff --git a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index c0c0209e589db7..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-99505f07a8be8097535cd11261c3c115
diff --git a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index e1fb44ff9c4a15..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-dbf099ce8a1930fba08ba3369934d6377ef9665794ffe2ae6e89cccd2f58b9983b7070d1a86fc4f85c8b0667e209a498d429f392ee85a23344ba9e2e561ebb4f
diff --git a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index d320451914b485..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2efc802e9e6d84f280d3e10f76564082
diff --git a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 528cd73a407565..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0099f74b49a5dd53479dce05677219fb31cbdbf3a509234fba7f6f19ccaf1bee6be8e894f849f0130f1ed41b3f00bbcc3a4c8888d934da11774461f5d9799633
diff --git a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 3ab9de2ee7c1b8..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5fc430d5b4dda597f9f739ee33eeb18c
diff --git a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index a9b64c54641b9d..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-bdff1b186e4f6615da45dd4f9ee99ef0868f5a1d6e305fa4c3a57059f0aec101916b97de14cb62eb7e8ba95a3ddb587b452edf63c99994f93bcdd079217d0f3c
diff --git a/deps/checksums/LibCURL.v7.71.1-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 2a77dcf0fbf94a..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5be87ee4e9675d31c3b9fd58cc0789d5
diff --git a/deps/checksums/LibCURL.v7.71.1-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 778c7c119c9777..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fce0707d0c42744d4b93fc9ff2ff7ea329d2d89ad597d832e8c4c2b942755f63443d99667040c42659b283070bd69d0572eced0a3f90ac054d9bfd51f42ed611
diff --git a/deps/checksums/LibCURL.v7.71.1-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index d090fbc9df9f57..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-19c2d84f551de1154a2b7021bb045198
diff --git a/deps/checksums/LibCURL.v7.71.1-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 41e796d0d755bd..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-77bc7838aedd69ea836420585a5b1ad51dd99daa2b22e130ea4f217ca210d8d524511d1b86bc4ff18325c7417fed13c8d05d2e2ffeca81ebc05e24bc66fe62a5
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index b837eeac80c954..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-756ae7b6f45d27ca91f0d3f0b1b912ed
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index c8d445c4c76c4a..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ec0442d72fa08d589097521b82360f154d54c51f37702b887ee9a8d9e36c189d66b146f7a7d981c0483def3a5b0cb2f5f1363b56f1cf911a53b62e33c776a2c3
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index f76343e302008b..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7fa98a535ebaeac2632befb6d1fbbaae
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index b91ff30eded311..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6711f413d3d1f73f388b7605fe4620d420a777b0f450ca1c0c153ed9d2033f6fa15229ce113a6a3da6f5454930603cb4d80ae35c90e8b1eb08fa4e69069acd2a
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 240556d09b1238..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bc4829f55dca10f7a7f690448d7c21e2
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 39e66fc96744e2..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e5e0c585a61566053238c78e6c049df34d6001cec6a1d1b79609f28c91bd2bc03eb852bd99540bd475d8fa603cafd113ab8ef8ae0d78910c1e800136de40ad77
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index abc1ca2465f5b5..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d20046a11ae0e75b23a44425f5d55437
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 492421e8c23b11..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3478d6259bc7846662a49e95b2c37c452e635e47717ed4132a1a44bc2bb292603b49c895f75e807d09ed3d4f946694f10c9108dd313deb8ede8b3ea2f1f9779e
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 50aadfc9352807..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4e15d303c24763383bb6ac6f9f5241e2
diff --git a/deps/checksums/LibCURL.v7.71.1-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibCURL.v7.71.1-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 9a4474fffc77b1..00000000000000
--- a/deps/checksums/LibCURL.v7.71.1-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-658119a4a842672d451dc167d01d7629a0726e8be0520d288977141c35a4e8d40b91398d0d7abebb63d547d4c1db8c6c1343f85dbf3a0bd8d6d3d51435f1922e
diff --git a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 2e23fe9cc98c7f..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a39e6aded55f15bc3ece658671c99315
diff --git a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index d9312fc10e2878..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-229bee6d28cd561af7e9d7f96cec430e85308cedc992d6c0ecf0da2ca7f3da117102a0d465da9348c87d99968f28d6fccb0c4dc8c63d8df60480d85b757fd420
diff --git a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index e1732849330b02..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-23069d3105f557b3d549dd33fa2f695e
diff --git a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index fe8f5c41679983..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fc129a3671512f5f3f1758f015080770d1dbfe9e5f835267e8a473e935a3b68caee10967b190936899b436670a1989983b27b6c3b7b400dfa9a8ab6a204a502c
diff --git a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 77e4a73e6b0025..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ffde5b5d4c9edcd8f09dccd2a69efd65
diff --git a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index b08e3c2fab3d42..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1be452350b0cb1da8729a29c9a9f76c238c6675ba327e73e7ba85ee3d5f6171518dbf232bc6983c10b41c1505ad9d3960e76215249824dc8538f6ffbd5652d02
diff --git a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index d72224cb4a28e0..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d6a676ba79525923294dfa98f6e6d2da
diff --git a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index 9681b41947fd1e..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-78f1adc43b35848de695fd1a3ccf63028e38e997817c66a86113e66850ec05a39dfa9c994a7ae3296f85d59f50d759c3ff1082ef5e3b08433cd175bfd9f1749a
diff --git a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index feb6a2b6aafd51..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c35f04d41788f750415e7679fc5396c6
diff --git a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 59e8e503971aa0..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8b320e57220df522fce8fc74d5e0cd1f88f8e1c380b19ef1bb1ee6aa7dc137d1e19702569169d6cf435605743158cc95fc1a621f80a8f7283f6da305ff302ce2
diff --git a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 32668e9e8a9c3d..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-051b147a287c99cc9f07251f0be287f8
diff --git a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 88a6caa681003a..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aaa4651605a0df3459bdd28ae733eb0798498f94493505d20c1dd55fe12e45a026dbd91fa02c1fcc27eddf4128d0c62ca042d3571463add685ad5ec6516a313e
diff --git a/deps/checksums/LibGit2.v1.0.1-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index ebff2e035f5a91..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8b9eb7667b2d28d00f32a7febdd24871
diff --git a/deps/checksums/LibGit2.v1.0.1-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index e2bf4cade22867..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-63cc2316291104baf852e318ea6d19718b3c44de8129bf516f6b3ae1ab45916e12800722382d1f9fa066def1643cc72282c2b0249636653fae766aafb265cb20
diff --git a/deps/checksums/LibGit2.v1.0.1-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 0fc42239057aea..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5be82b37f2b2ab4efb352ae614a13078
diff --git a/deps/checksums/LibGit2.v1.0.1-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index d744895dc4c5b2..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-80107205348fce11d66a073a18e1ba68c96f1e340c0eee4a56d60c24ac3841baca9fe9e258946914ef75444c023d75256af3e9f9f2566d784809f4ca18cd878f
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 0d69b2e98c9669..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-96ad7b7dc24971cdff6ff1a01310834a
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 7ee94132aeb240..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b629ac9e1167d13c8010f23427022cb94b5be0baaacbe216b3145d9fd7f67af3b8f817a7c58a67cdc30101c83bf82b9a3369df61e98fdf373035d17e38991b77
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index be776a073217a5..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9c20406f23f08356e8d217b16a327164
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index c706c03498642a..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-85c953bff1539907ecef5201306b3bba152b64c814c26cd23e78e33f7f202cba67c6d162f63aa188ca6f024fdae47a61b7002676fdf53e2f02db78e057a2a86b
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 9da60d600c3407..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-26654010e9216b11f5ded2a062b5b709
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index eb2832318dd9e9..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0aa89f698b6140f54f27791ed6f750322e1c996e0b613f68d83251bcc1cc2f95d68431024fbfc45bbaec530a8b5f1c07be1b4adb31ca5a56afcaa64bd80f6ec6
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 629171df686c35..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-75b4d5260b09527df92daa4ce26cba05
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index a8b09eef785240..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b1e40c54afa69383f9e4ad6dfc71eba8b77bd48069c81f510be0f722376fa70c78e00011d6507c39973662f9b9338e151715e2ca903257d33761e7f0a191df31
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index fe72c995fa787e..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-76c12cc5794525bd5686ed9c9dcdd93a
diff --git a/deps/checksums/LibGit2.v1.0.1-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibGit2.v1.0.1-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 81a4d52c9c3393..00000000000000
--- a/deps/checksums/LibGit2.v1.0.1-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d378b89e8a3ebae357582b9ddd723cc25c6ada0b0b480bf6904fd0785d14773dbf9e15390763fb67e5d7265704d6b9bc5af9aa15298ec7802f544b32d43e0e75
diff --git a/deps/checksums/LibOSXUnwind.v0.0.5-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibOSXUnwind.v0.0.5-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index cc1cd671359a2c..00000000000000
--- a/deps/checksums/LibOSXUnwind.v0.0.5-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3e092d738b24f7a121065b46af327f1a
diff --git a/deps/checksums/LibOSXUnwind.v0.0.5-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibOSXUnwind.v0.0.5-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 6bc69f952730eb..00000000000000
--- a/deps/checksums/LibOSXUnwind.v0.0.5-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6c484cba8b5151814764d73a8bc4e56292831ae48eb172c03135c8d83e49571380abe275209fdeb8855d0e6a78d1a4f12e15ef4ab09eebee73056cd406fe017b
diff --git a/deps/checksums/LibOSXUnwind.v0.0.5.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibOSXUnwind.v0.0.5.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index cc1cd671359a2c..00000000000000
--- a/deps/checksums/LibOSXUnwind.v0.0.5.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3e092d738b24f7a121065b46af327f1a
diff --git a/deps/checksums/LibOSXUnwind.v0.0.5.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibOSXUnwind.v0.0.5.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 6bc69f952730eb..00000000000000
--- a/deps/checksums/LibOSXUnwind.v0.0.5.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6c484cba8b5151814764d73a8bc4e56292831ae48eb172c03135c8d83e49571380abe275209fdeb8855d0e6a78d1a4f12e15ef4ab09eebee73056cd406fe017b
diff --git a/deps/checksums/LibOSXUnwind.v0.0.6-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibOSXUnwind.v0.0.6-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 3fcf3732601f9b..00000000000000
--- a/deps/checksums/LibOSXUnwind.v0.0.6-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-57ee184943cd407bcb0d19d0f8616565
diff --git a/deps/checksums/LibOSXUnwind.v0.0.6-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibOSXUnwind.v0.0.6-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 435a62b21af0c9..00000000000000
--- a/deps/checksums/LibOSXUnwind.v0.0.6-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b46997d83216da569a2337e4ff7da60e3332c9323b60ce2d9352ea0f84aff1b5d7cc849e20c5de387c422d9cb07b4d064b73a278f11d9dadfa6d2f9b28c5fde2
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 926e3e8dbb4955..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-36ef163789d9123a0af39bd96e8ff79f
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index b59259c5c8b0ec..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e4a64003193d9e25a4e1a23e67b8f1a52c478984cab43ed6c75a5e8fcebb4050b5b574cc553ed18c39a26b06327ebd4d73bec0dd680cfbd8f9ac6e431503a05f
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index f2d931de49fbe5..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-620dd756343f22afe53b6b00a25462ef
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 6cc9027a2855b7..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7ac0e9a28647b4d411dd4e0ab76b2592f488ba76ba5c2651f42acc7e647f62c9804bd271225b2a652c697e6c4767a74ec70d7ba6e2682babd59a92dc9f0b089c
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index ea289d096e975b..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4a271ec41134206741057b1d073aa0e2
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index ca3a3cc7d75768..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a33d5e96070310b4f7c1a0dc6d17e5ff665deede4264526817b298bb32cd3554c34ca2b42fefeb1ca6f4077c9b42c62330cd60fdc9b101ca0063712f957a7723
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 87a020324765a0..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-052b885ac693514f3065bfa7768b70a5
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index a6171dd843e308..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1af323ce0ed7d8d3cad594dd2e23bdc88ad2e1f0bd30d4ae6fadb19de4966398ec021203619873bce668d7b8afab0e16c1584b0c95faa4b8eb3535985fe300e6
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-gnu.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 796cdc2160bbf9..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0e7ebeec5b7dc73f9ce2d2f3f81ee87a
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 554c62318a2d39..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c8b34527d47d5a2c91a3eabe36513d7e1c14998d14ca5e8561e0dcd219cf8f0c0e998229eb2755bda7d891f78d0d3326aa38efd58334fb4508dd10df12e5398b
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-musl.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 5f53b3c4306b7b..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-be9ecaa94737cdb551f2c1c8b880f2a6
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-musl.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 1f5e8f90ec9216..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ff23a8437ae12a7f0f1f844d14f1853a54260cdcd0d3be4f0bc6032e3cb561290e25aac20f7532aa5561d3af66081fb7cd3cf5ed480b1799ffe86950383870a9
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 887c0038b2bd2b..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b323bb6d450bd5d6e2943dfbf66de570
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 56f7954e003966..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-044c673057552f18009510c012b90b98e50f9a2ac086b18e3faeef4cc7ca28aa1d894f4083c96f014ba4d0d38cfcb2e5c704c21de2101b834efb60c8940a05b0
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index cc11596bd29abf..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-64fa75fec57f25a758e42cab73b4dde5
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 7bd284fac2b4b9..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1d911876ee40d02c7ce81298652e6d62e7133ce6a408dc7ac9b93e8bf73cc89e1836ebccf9c946c3cf908fe5cc56648eed992351a1856ed7085d5af42683338a
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 7e28ceeaf344aa..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5e88e48a97ab8a5cfefb7538d4f1e070
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 6140cf46b94ac3..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-20cf8fe1c2e5d7735bb11f3f464ef9fac1f6f5178dc40a42abdb640a39b6d08724c6ab76f5c2ddf9ed29c9ff6898acf204702fc576a8cf2985e8c511075f8adf
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 88264b0e104b8c..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e1413e4b8017b830f722ec41e12b1d20
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 670f0b147486ab..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ad2a951e581c3ef46bce3a79ba6e19c35e2ea403b2cd6417b539d7f8520488f57dc63e19ad516a1dcfd3b0f1cf68686d7ac9e050c07f5c16ca92b4322eda22d3
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 1008e4c4085a66..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4d89b52c247a366985b37ba94998898e
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 74c1f86bdf03dd..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-81e8e9731c5ff5b2349bf2c389a7da9eb57ac252ee5328e315b7562ef39a14b3cb274abebf5bb30215faff126569dd963802c1156fecfe19314a7bddff670939
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 2619c08c82a374..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ce0354563496eabe705db1716cbb4aa2
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 70c1e09d69723d..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9b29b82aa53d44e580959465132ce39acbbf785c24fe0d141429f925509ce81dc3cb134a2793425c143064db9ba556418d941b5d7f59a1d5e641df9e7ce9a5a8
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 8dcf66fafcf8d1..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-49460ec6b30eeb99a786f75d42facfda
diff --git a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibSSH2.v1.9.0-1.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index da3018cb8edfaa..00000000000000
--- a/deps/checksums/LibSSH2.v1.9.0-1.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-467d0ebff3af6d3d8f47db82543fd2bbf38fdc4b26a2694c24bb468f765e3b5f23abefdfe41c5d609f55aac6a41b24a7aa7d0ba0d72971f9fe192f257ef515f6
diff --git a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 2955de37e027fa..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2bd32030f78a45b6195999407f510aa5
diff --git a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index bef5e4d5d86a63..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1ccc3ffa4f5473e74f4ce342423bddc268a6f70d9743aa2f11124163db27f3e56711e85555895e2ff506a025f9dfbfec686ff457e949aa45d86ffef5ec6c2037
diff --git a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index d7a9dce808eac4..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c3b6f24843c4be53fa72d995a9296450
diff --git a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 713eb1f2f78126..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1659c0fa80c162411c284bcf85dfac2d6a699bf2b09bbfe7868ebade0c5e424e3bda416b345cc101f946dbbbd7fea677318d733ad12bf3bb6ba5555accbe8d45
diff --git a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index de164951f7fa3f..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-71b09d3e80394d283756348854a65017
diff --git a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 71ca326cc31967..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b360975c81b574a9ed673c106ab0a520eac42e594b48933a9e0526a1e00462da524b86a516d860945e04fe395ddba31acebe1ed8609aa65ccc7eea784ef0fb68
diff --git a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 4c68299162b865..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-40bf3062a91d1c24122e71be0479ed94
diff --git a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index 3da89c2a0c7ee8..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aa66897bd05f13daf682104b16329093f1234922c07d97e8134db0a1f499d17c8716704e4c8b21a00d148afa920f4fa1d2fd884a545c973b8a6734d7aada5fc6
diff --git a/deps/checksums/LibUV.v2.0.0-9.i686-linux-gnu.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index b176f993aa66f3..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-11477f56cffc636d48245ad623eea1c7
diff --git a/deps/checksums/LibUV.v2.0.0-9.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 54542c43f6e91e..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-da82d92db2ba2bf0c0b52928390bbf0ee5a8212883fd23a772c7735c7f427ca229229b75e8d7ff8964d42a2ab0614f1adfd156e0ef067b682931e0236fe7ef7b
diff --git a/deps/checksums/LibUV.v2.0.0-9.i686-linux-musl.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 609ecb4e59ebd9..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-739be10627a93a419f0a9f193232931c
diff --git a/deps/checksums/LibUV.v2.0.0-9.i686-linux-musl.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index c49cb60652102e..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8abf48cf61ef7dd1a7d619bb062ba919d97d9d68f2e3d04dd7c4fdfacf01e5bcc864ecd6fe5e8782ba72822764883583d60c5164c96fe030a979d3436278f2bd
diff --git a/deps/checksums/LibUV.v2.0.0-9.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index d0603d4804576d..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3e27de72945d3eda858798b8faaaa2f9
diff --git a/deps/checksums/LibUV.v2.0.0-9.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 47c2c429b9634b..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c0dd0c3dea0f9e2b7319f46733685a5ac1ecb6423886e669aeb14e1bc8aae4017ad994b8802dcc9f57cceaebf138f3f4328b97d2f0a64c317243ce3fe282dd52
diff --git a/deps/checksums/LibUV.v2.0.0-9.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index e6a0484d738869..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4231a4b2c1adb333f2ed603ad22b92c5
diff --git a/deps/checksums/LibUV.v2.0.0-9.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 47c6e2b165c24f..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aef5f19dc6b1061f1ebd205e4663779498e0094435dc875e4ebca3dda67b0c11d5cb8eb88a20f9baaf82d59a1e83a5fedcdb7a6be9255d738aca1981656523bf
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 949a55c685a58f..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ac719bf85a3619e9d238d0efb1291ef0
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 59b8e810d7aca0..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-753e640aeca665b6e35deed35860d8e3f3076b9c1cfae382d029827e138bc3ff65a90bbee2f828a8371ac15bca29469c1720b3c1b9c29f56d8b402790c7608e8
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index a86d696b183380..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-446e2ffac64e64b24c47d17d03cdc3a3
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index aebd451c8e4d4e..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-185ec913e17aa00b866c1c7f2b4645c80ab8ff067ee1c176cc33c75e6161450994a795feeb46ca0bcbb80bbc727de8a4532ddee5b658fdc9bbf10b1676a54026
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 2c40c1f9253838..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0f7913302512ad1002ce77b2c619a4c8
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 95213b7850a179..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c8cc6d5a27bc21f28a1cb7e75b40947e9870ace136e50040fefe067c30398f3a1655ce28fa43b28e09ff743d5f1bbce061dff2f47ab67ca81cd3d1e6f360317a
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 20a55c8a252c3c..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2fd4a5b814194d588186485b8cc5b73d
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 900f3d55128b79..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4b72488b5afa6dcc64763155489a4aad0f438266bdc039c03ed8835cf14f6159c5151751cdbc61cdfb54253bf458b2af4a7d3d8cb6ad70ffb154df0082c2149b
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/LibUV.v2.0.0-9.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index aec56e6f816324..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bab85c5eae8370f4058e06b6779c7dc7
diff --git a/deps/checksums/LibUV.v2.0.0-9.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/LibUV.v2.0.0-9.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 9ba2115db44b03..00000000000000
--- a/deps/checksums/LibUV.v2.0.0-9.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-78f42e12f99a9dddfec221c03e43fb76ac0ab664b6aec7f3f1a5f55fbc72c9cf03e2e7f99a4bb22b9a0e568cd448d52f5b91f22cf4aff6a3a3e5ddc6e5c963b7
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index e9c2f0fe1177cb..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e35f5fb8a303efee449c70d370f01925
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 0b8a627a875ede..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-da9ad44d45474af8ee0bde497956b6bb6df4b2144c40c562cb8bd20ea113bc48d0c1fbd32d714bced325ea534d74ad6029dda6c2f7e3269a15a1926ecb481dd3
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index 7b4a7aec151df1..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-25f0c6b4dd4633725545aaec4b988152
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 19fe2268a514a6..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f71f3540c311774f266e52ae463b91be817095a182920adc5b40d93384fa7661ec7f729542e2a6a3e2b9803babcfe13d57781cfb2f1465ffdc3a7dfabe3929ce
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 555b12be7a1908..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-cfba31485165d109f899060ff5c8d86f
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index e0872a17ac6964..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1eee02d647b51ab4703c6fe75ca69250c0d70920230d1705307124281f883a18e3485d439dce8376dab46d8d56347bc61e3f11355e0a988a742f8ea527062267
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index bf4e9906287359..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-43e681f53a3886c97fd8496f50310a5c
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index fff0858d6d97b1..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c70abd45910319e19507ae11d7abad202fe73f6e502cbd6b19c3d8d6d6bd05bd954a10cbee65eb16d9cfcbe644e3e7f9aa970dc7fdd0bf800f31a567bd6124b1
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-gnu.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index b187b1d67fd76c..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b1a3fdc79784f8af6efa7f8b180c6271
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 97771c8e3f8d41..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9f7bf582b1b5c3a2fd93ce80996ca6f1a7b5680f07fe6e4ddba90a8782c8b817fdafc742c8dd22936f677075619b1c5ec16f094ae4f577a1be9fcd398d88fcaf
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-musl.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 31fa1f2d9fc6d5..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c5aeb572d6fac24a91e71cf4be3a22ca
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-musl.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 42a24ca6b3466e..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9fe0592b9360cc742b1155a45de19e9aee30f6a88b928d53eaea8cd02126b8ce712f71228d69299370d83d8864ab13ba417c55046bcf0d1632c1032b7996a9c0
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index b30536ad1e3068..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-37cd841d90af1bb438e9a23141abf121
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index cb81efecaad46d..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1fedacfb681e41142ba2814aaf2455980e9993259cbc0aa1604c3490c905c841332de55a24453e6be9c30398c797ec633c73c7d0f3330fd16b6a11b8e303535c
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index a433ffe5d9eb28..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a63ab5e4a513f5fdb136d3485df2cbc4
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index a2d558d3269382..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-48368371903254799fb38c9a86f722334814fa7e3456466b904dba214607257510dca91ca21e7b6ef81640b031363d2c2414fd364ce878c6f12bd06046560165
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index c9baf752fcbdd9..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-cb3ae0b0367a72ff38180cbbbb82d05c
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 4e81e161559896..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2018321394c5babf02cb984214faf0510df74814e95fe95e9b28078016ea0b9d2e5d55f6626c27a1532f5f2405a988c3d6c5922bb7ec00f7fe0a316ef39cf7d7
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/LibUnwind.v1.3.1-4.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index d8a6eade0907ae..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2d8ebbc041661ad46fca761fb7860962
diff --git a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/LibUnwind.v1.3.1-4.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index a0aa5651c215cf..00000000000000
--- a/deps/checksums/LibUnwind.v1.3.1-4.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-982f8b7c2d649284883bdb5174aea36b6b3bea94b0e561cbfdbf476d9ceb0c00986ca0d6f68e2ab12e3ccd2504605278d6846c3767ed83f2be119b80a2a9042e
diff --git a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 045bf4b4cde684..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-480455278ac8186c988be4fd13ab08ed
diff --git a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 898fb2f98615ff..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6dd5bd83b8ae7d839201760e245787c5c3caf19c209b0098bcd786e67186529c8fb2901c507aaae3fe26d01863b1ded8b30aa547312a238c4de68f64f90cd550
diff --git a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index b08e224cc3ad65..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5bd187d72846c25104009678d761e6b5
diff --git a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 8d1b2a1ae0f093..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-86482e272578c72c3007d31103fd50ec449e2f92929f5ed991b947dd847e45f4362519e04c5407bae1845839c6aa11c6a5a817c5ef1418fa4ba67a03e395bf96
diff --git a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index cedc60f37a6787..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9ee1860a6ab8e30d24e788d50e43d847
diff --git a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index d8583f924db450..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-37fc21794e9afe69c0a1e7efcc550f511e7bc41e446df66ff9e9497a7ee81b9e4a72eac31b02305f0f22722ae15224897fad85abb7f03fa5cbb2b78d83fa1523
diff --git a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index d2599f6597065e..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8478d8f44b7765f0efebfe816613fc77
diff --git a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index cd63900c5ad1d5..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9681a299a4b71c723b5b1c48bfad7b0586f636e83c662233d142db426c146c4e228191480c6a626b030423fbe34e8a32338b3f0bb8c50dbe9f74ad0a0a57d6fd
diff --git a/deps/checksums/MPFR.v4.1.0-1.i686-linux-gnu.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index aca72fb9d07837..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-66d7d381aa1b706777bfca2f127384d6
diff --git a/deps/checksums/MPFR.v4.1.0-1.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 04308e666ff349..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c0d2d4c9be047739ccbf5417dd85b3407eb86bf56a9a7ca33ab62b4f3976037f9bfd079bb758054623c40ce405d98dc3db44a1619353da9c71e91dc0bc281c6e
diff --git a/deps/checksums/MPFR.v4.1.0-1.i686-linux-musl.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 4017b15a54fb2c..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5588d1db9f15911d9c329d35d982edc1
diff --git a/deps/checksums/MPFR.v4.1.0-1.i686-linux-musl.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 50ba7a584f045d..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-27f3cdadf1ffc91dd4f6d95e970d342f48194251b8d0e7729af413e6cd5e57169cc755460e1a05ded6a2fde44b55fe41d55125d6dfe8aaf2a2df944e396b26fe
diff --git a/deps/checksums/MPFR.v4.1.0-1.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 1372049ac9c388..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5dd7e3fe4ad870901175acada852d155
diff --git a/deps/checksums/MPFR.v4.1.0-1.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index fa9b3d96f9ddca..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0d11f6f85ccefbfacf2df83fe7de10db31ca0420c2c2a8be9879b903b46c5339a988ead03b7ec76010a4f7d0f1c00f97ebeb40afc5e5ed663fec80bf8849d9a5
diff --git a/deps/checksums/MPFR.v4.1.0-1.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 27028a31566a42..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-be9158b8c2b8959a48f2894ee46c0a05
diff --git a/deps/checksums/MPFR.v4.1.0-1.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 816a77c4fb0e7e..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aeb2d9475ef5e87609b4f9a8b5b60735268af7757162ad405a02955c6eb501f69024f4748d76355c11b9585191a8d3441b1c26e6c4d3bf0967453237292226ff
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 0ec714fb8a8339..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d5bca8dd3d1d5b928fb280a2e69ec459
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 7dd82c9bd5e825..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2a4035ccac6c30319d0b767748f332ae2748189a26e0d7cb61081a177819f08a5627e6de81dce6cb22fe5a1c34250d91b300befbf013a23dfeeb4579e628e383
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 30a571f3c6ef47..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6b00c09e1a54db66c215bf591c36ea67
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 4da6f4284c4592..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e9f00f761cb146ef314b0ed60df6d95a203e7cfbe4678fe3b54c3bf592e9ddaba79c406dd9e1f369ddb2d3f8d42ee37dd6e891b73b973300b470a3bb97425a22
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index b1d84728e732f2..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a359b259f20accd58f6056dae52ddf38
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index bef979e7cf026f..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7e0e14fd0bd66c600e862c4e053eb12ba7b69f526cdd9a6e0a1a91de6e54d51b86ed9b6c38badec81e43485f97dbc8be3c9ae3d5431b5c8a671d295ff8fea3b6
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index a3bdcbf891b782..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3fb5ea2f380e675cc951853016cb5fe3
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 590a91a235ed34..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4a9666c2d5e6357ad4ca8a4913097f76090925bf0319705065d37f8115fdc5cd8feffdb36a6f9a0400af6c551531b57708a834b73ed508ca1dde1897154382ca
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/MPFR.v4.1.0-1.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index e3de1b2449d59b..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-653bbd918e4a340ace302d20ce096e35
diff --git a/deps/checksums/MPFR.v4.1.0-1.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/MPFR.v4.1.0-1.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index ed4f945a6d6e24..00000000000000
--- a/deps/checksums/MPFR.v4.1.0-1.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-77a91ba9c41049fb9511248344bac4ab9fbb1feb965dd0e78b09ce07c2d4d7df60c74528e4d1966e1a7000587b7766c904070fa3da9746e9341033f54df8b9c6
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 3d8a8c984e9c94..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-dba394c4e81d1a67260ac05a781d5510
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index e457efdff517ba..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-39001de9dcfcb284a9b25e15c9468c147502d64e42ac27cd712ebfc9876587ae57b0b6138e73eac4b9fc6c8d55a70bd48b7e52675324138e42b29378f5c967dc
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index f8076d490ebe81..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d2a23811af7249527ea9cfb42288ebcb
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 5c5db1da2ed807..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4eef704efbba6c3e788317a7f5a394a838bc6c456f9dced242137f4f3de288b9fd41bc7527c275d35724b6128ac69a2f172b0a2e58cbb72fda18ea76e7707698
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 1fdf020e1d3ed6..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4476a9ac3874dc80d52a5f81afe1b9d0
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index a996cd45398ed6..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0b67462f606e652b7ac735ee963c1487eea2854bcdc13dee88cdf38bf62ae315895c22ccd649185b26c23140ae352f6827ba982f74f5926326dc08444bbac85c
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index ae66f86ad3ddaf..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f149f22255ad5791b73cc47c30575100
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index bc5287e8e26ece..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-183000a8b90dd7a9e30c0a3a50e00a6bda60fb9763ab85897a8a91bea5b1b7f1cec9aa5076f9023fe101ea28777c6969eb45853611d444234c348507a54b650a
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 2f0eb7b014f743..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e52b147d3b1770183cab9262a6adcd0a
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 14ddfe85e75c59..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e8b1fa6e4a67b6949c8a5cee98b1a574c0807944a252e7558d74a7880406f04f59d1e5c7409fa8479f15f20ebbee61968d1b7c9b3091298a9f70d7a1dda567e4
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 3b2edc88bcbeaf..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7aa5d533f2bea70e6d70c344d543a634
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 25757056003857..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b9e8c1eefbb42bd2b9b2ad72f1500871b3f7056e954d40d40ca1299bd54a8b32af95dd3f06707a9336bdb8cc3df985a6024bba1f98e2732ba923362ab8b91d9b
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 27fe0845fdd6f5..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-292afbb23e457df3eb363d193bc10258
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 0db840b170507b..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d0f390ebcf199874837b3ca117965403d7f733564106a02719a2a539572e73083fb3737a711101c1d4c374e7015063744dcbb710842127b85b8a3ca77dce948f
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 8721d8f75359d8..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-86d0bf0e5cd3c1c91ce1ff9c898864aa
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 049dd1fa538020..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-60bdcb911a92564cf883c50bdc3364c092157c79d7196e58418b8b540965819871737bee6278fe08330240200bea5d43fadceab77d008e474b9c0dbb4b676911
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index fecf7d400ae2e0..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2e2027f20798fb0ac1d6a40031024285
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 0d1bdaa54a9df3..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-41c2f309064367f95080f2d1826e84716645e763490c50c7e61eb1616a00dd73f36e203eb56f3814fc99dec0b3adf2fc5f73c5daf63a2ebff522dac39327e5d4
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index d14cde5bcf1d9c..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-15377a2563d635b2b58193306fd0c529
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 3d7afc59d1d7e3..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f051b79fdc7c0b02ba714e5e36d52dcd4783cb0cd1f275f2db4ab9960f9b113a2d218fb46097b6f2bbef7e3afe4f273350a2fc39bfca8e8a2ef0e506a36d76ac
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 967784e09aafac..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-540e4c642b6754e0c0e8f8db36fbaa09
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index ed3a84d52f94b6..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4f17b738011c570043e5edd068e8725490e4671ecaa94101da1b480adb048acd8f7a08d7f2c5680aebdc7a1220c4240d762f0bf0331f6b316b979a6b5604d326
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 76df2aca299101..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a2410ea01e510a913d406243fe561bf9
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 9b260efcf0711b..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d1e04d02f21f9103922c956b82c47b18ac317a884a5db3fc702f33a4f7829299900cb8dc88790baadb99a6f3acfcc86edea327cfc95db43f2e98cc463a637f16
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 47ba4a92b344af..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-df9eb1b35d44069af860b714d64f7bea
diff --git a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/MbedTLS.v2.16.8-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 2e5dcb34b60a16..00000000000000
--- a/deps/checksums/MbedTLS.v2.16.8-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a1f0ffa1b0f29e54fd1ac595b3702bc89284c09f4765048fe5dfd92bceef4979d93e70092041a87475cca3a21ea1825224138c20ab6406a9a07be02ab07a054f
diff --git a/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/md5 b/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/md5
new file mode 100644
index 00000000000000..f1a62f3d387605
--- /dev/null
+++ b/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/md5
@@ -0,0 +1 @@
+47be3a2c46e5279714bcb7837127c08a
diff --git a/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/sha512 b/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/sha512
new file mode 100644
index 00000000000000..27b5e2397013ce
--- /dev/null
+++ b/deps/checksums/NetworkOptions-4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a.tar.gz/sha512
@@ -0,0 +1 @@
+df45c5fa22619da686481b78de76b60573c798c14a9bbf3a9dd449f52ffca1f2b8dd3b247e2a7679d3dd55ba115787f3734cf03d29a10eba8fecdd78890891b5
diff --git a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 7087f1ac36c083..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-96565650d0743983f9a17d3e6a932be6
diff --git a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index d19d14783a4f9a..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6d1295e2c48b9e59f11ef935e7ccbc69ea234cbcfde1aaea9cbbb08ee6d5dcb6c3426ed8ac66ff95f8d2c8cec19767c67b1d9ba7e4698eacbe8d8b5d33cbf178
diff --git a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index acc1937805af0b..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-364900012019f8fd8e33834b43f5de58
diff --git a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index d4d7c666ef8bb7..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a5ac81da5e3d1a0d54a951b444ae5a2a49c00eee3482be1fb835efe1a068ca825bc8d9de6540e5909f04bb5b9c52459487afb6fc549f72aacacd683f82497354
diff --git a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index c98ef35d5e91e3..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5f87140ee2b7d2b5c4d7d5e666edb793
diff --git a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index bc4bdd9d0d8ad8..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cb2b3665d3edcd3b3386101d0ff22002383b25c3e9b58b03eb5387888b5cae2782f984e6a39542d395f2c100c8fd81c89a0aed79e5d7581647bf024eefa5f6c3
diff --git a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 5286ffd613488d..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8dc7f75d66ece64f490d803751ef2315
diff --git a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index 46aa7a65222365..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-45f194f2937329fc424fe5abef485e542362b38b17645cdc528f3ce7571445bf4b1f29733d8eef3bdd7b417ea11c8bf45879cc96c4865eaef7fe0baf51d862cd
diff --git a/deps/checksums/Objconv.v2.49.0-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index e2b3b2cf365814..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-85149305b9dd209c1e12f368e9c023aa
diff --git a/deps/checksums/Objconv.v2.49.0-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 079df7fea33866..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b909504ac1727ddc4311713320b553e483451123c0706d33a5dea3b2c0454040651ba23654585087ef20f7aa303120362008115e9b52d9a234889624373cb800
diff --git a/deps/checksums/Objconv.v2.49.0-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 58812bcb96c72d..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ec5e26c8a2879e991c384b9a36758e60
diff --git a/deps/checksums/Objconv.v2.49.0-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 104c1f6b8d7990..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-43440866002686b9eca5998da7d81e3875da366934b72217fdc26915722b4d446f06a02e7e8b8f5a88ce26faac650be67f1f88b90b520e845a61ca640dd6b1d0
diff --git a/deps/checksums/Objconv.v2.49.0-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 6f8e529da504fd..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-84055d1c86579824d4a634adae1cdfd2
diff --git a/deps/checksums/Objconv.v2.49.0-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 959c460c94164b..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fc980fa01e50f42f01e42fb5fbdb96a89500f757991c07a888f9cb22dee97f1a19e33f7bed43d9372a3ba500d95d0420e59783446ee28230f3f30ac878fa4e36
diff --git a/deps/checksums/Objconv.v2.49.0-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 5a671716ec9e86..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-69b3d436965c139bfcb0e0389d25d73f
diff --git a/deps/checksums/Objconv.v2.49.0-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index ff113ae735a85f..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f5f849ed48ce4a4a12b59d0ef0bd749d07e323ddacec55039241202b56a3035e546073e5596aa4598f06cd78a8eb9a8d97af3ada448ffab86e6a1d8700956458
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index e25f21435cc4de..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3c17020656896f95e3f402cb29429112
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 05add1a0ef1094..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2cd495304eb4c8ca3f49be8e39b62d835fafd4d8f2bb9ab55fe90b77a1c34cbee240e6bfb1bb47a04eef77029731532eae27e9a1bf530ba2117fad01c6c9cbb7
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 04dbde15497688..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-907e188a3ae14dd9f25b298dffc6d3d3
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index ec491e884d52d5..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-01ef0aeb8dcddaefe2432d1c9d618f647a87bb4bf06ec64207630ad531982c47b7470b6b46b76c3358bf9eb2c28a30509fb2a1197643f936c5034247f7c3cac0
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 44ebf4baabe620..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ff5ce7c16a46a718133578818032c6b8
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 0b004cc11ead3b..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b5e2a9fb1ebc3ad2442ca4b636a126924bd9d567261cc8d1d816969454d11dcaeb4f9f1848183c309a63c8a6ad7da8d5bdc111765d8cc1f4c6aabea9eb0fd30a
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 6ade7811ab12b4..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6c86252ee6d60fc171030fd602e16ac7
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 0bca4c30583364..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a178fae5a07561092b2f7d392894e52678314f923579cfa23b45ac6501efbd11cccd23a3dc9493ae935dec7b4dcce44b34ddebe0f472575101d03c37e4ee3ce3
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/Objconv.v2.49.0-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 266627f67782fb..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b7e049750328ea767d2da84477327cf5
diff --git a/deps/checksums/Objconv.v2.49.0-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/Objconv.v2.49.0-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 4595424907d649..00000000000000
--- a/deps/checksums/Objconv.v2.49.0-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-caa18e7101840462ec160e0735481f63dae4294508a1e4010610771310d0194eb284a10ab0937f3c6da41f99055886408910d46892e1e8f4db4090cf88872e9c
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran3.tar.gz/md5
deleted file mode 100644
index 3141a92c4fbc04..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-41674a91b0440630da0576920300712f
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 3473c7dcb97044..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6fc2afefd5763f06a088ded1d6626307e0f694e498e03ace523099b7e7f3800964b8f04070b790bb78715430d1468af3334e765bc1ae21a3edf7cd0932c1ca87
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran4.tar.gz/md5
deleted file mode 100644
index 6168c4d7621f05..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-382d15b5d422b68f13d7f404de97f5c6
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 63c6035e33b855..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aae74efc8b33d7b3671fa4cf8b87273301e31e00baf26ae1a36e4fe631fdb203f70c0666eefd52f7467aec786f0e9b051116a3489041bcdbfbfa389852d5f39b
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran5.tar.gz/md5
deleted file mode 100644
index 3f32bee1e2d087..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1ece6872b228d9b7283a2e0ec891e8df
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512
deleted file mode 100644
index a5024dc8b07619..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8ec7d25fd92aa0c0dcfd88adcc854b0b0bbda280445652812b9d521ad7cded1bc13a91f491d8ded80ab3c6a398cf1ffbba2d8d5fc31a7a3125ad5480bbe97986
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran3.tar.gz/md5
deleted file mode 100644
index c1a69c00765d1e..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-19901e7117e8d1c923ca05b315a5e53d
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 9cfdc1c76cb56d..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4001a6f09ba906d8ea410d1cd7b0dd6c88b476dbd307a3fcc48a0a53d3c0e1309cfdab287803e5004899ff87ed86f4067bd14d58b1fe749205ff488b2be6a6f7
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran4.tar.gz/md5
deleted file mode 100644
index 80ae36875a5b7d..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d8256feee9630bec53150c52d6b8e3e9
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 45e2c60de1f159..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e43c4bf459d5e132c38da2a00fceecf476c6a99f21232a720069a0aca2a07772e7a6add6eb774b96c73f6e5fdc2addc44a0447d958af5800589d7fcd43663f2a
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran5.tar.gz/md5
deleted file mode 100644
index 72b8f8a5595560..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a1817199921c9ae89e5b0fe8eb894d87
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran5.tar.gz/sha512
deleted file mode 100644
index ffd9473fba66e2..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.aarch64-linux-musl-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-46330cdd4702042a572c7684d7894782c53e54c22b74f5dd20292c597bbf9a6674fedc546328f49ffe1f157e361c2f68fea5f7e7ef8c2c4e8ca296bba2cc7fb5
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5
deleted file mode 100644
index ac322bb108f469..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-72a105fccd68396801a2eb1495eab3f7
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 74b2e1ede762ba..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-53ee75330be713e447d1ea07c714056a2393b0d13c244d79a217fd072c08424b205e161fd619a60a9f9f27f6abb49ac7210da3ac374b0a5fbcf6795d49c4c57f
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5
deleted file mode 100644
index c4983f6db9ad70..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7285890f9383c6932551cb618a5e1b2a
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 6e7a2564902afc..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c0dbdbb236a0674ca0445c74f98aad024677a546a356d064d76bbcfacc3170fe95513413857c6b9ecfc098f0166a0787ae85db28bce25362566aa954e698e4c6
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5
deleted file mode 100644
index 617bcbc2f3a441..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1c843aff41a566f9386fd92d8ccbbd5a
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512
deleted file mode 100644
index 30c9cb6a4a29e7..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0e430e807ac36886bb601366bbb75f35f967b3d618039d23a1b87c41bc4940289696f77e45144e0d861814dd187cf2d98bcc4cce7c042c8001142eeb85e47314
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5
deleted file mode 100644
index 7a7dc927f7e94d..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-86612c3fda7ab7a35369fbe758c250e3
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 261a6d90d15378..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5855816ed751723cdea882ce8fd220237082fa3c8033eb614f278060dfac738540d7777309769a72059f15cb5fe62f6b19dbd222fb7706042d5dba3b91c7b163
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5
deleted file mode 100644
index 28af4b5c862eea..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-927024b4e32ba91a03eb7311af9b25cd
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512
deleted file mode 100644
index dc310beb14063a..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-55d58d0ce3935ff016c5c00a4fab32198d34b0233544d98cb95f70597451fa2f487736180e04cb4bde856c422955fbe28fe82e3dcbda7037c8e11ee5a6d1ac6e
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5
deleted file mode 100644
index 67aeb7d08964b3..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-665322a13764692b970a7283416cbe43
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512
deleted file mode 100644
index 14b02fa90ae21d..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8b68e7a8e412a49f95a37701af39201f67c438a81a0ed49f8523773f5110fa3d0cb09c49564352776903dc60ab52d88f38aa28fe5d3412c4ccd4a690166955db
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran3.tar.gz/md5
deleted file mode 100644
index 44a411dfd50da5..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-acbabff40371cd134fbe8f598385e3bd
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 89c016cbc9e691..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1527b063fed3deb268c61ff022519535cb0df7a8046156a8238c85f3a6de965e3b54c211ec7395a4bbed9df0fae070a4ba451d26a3dd11601a05b2844b217faa
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran4.tar.gz/md5
deleted file mode 100644
index f9cbd3f16ea5a7..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-08e80400d8dbde9c0cb60d960543cfe4
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 7b3c243eecb823..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1a17c9f20bde85dfc9e858822c38124c1ccdab2b9311c3dd3e88cc5372ad7459632a4a1a81dccf60a0464cc0cead02867b0ec2924377471135ba9f9f2881c001
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran5.tar.gz/md5
deleted file mode 100644
index a27361917f640e..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-68c23f8b6c3f3f4e53a7418ba5c40a5a
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran5.tar.gz/sha512
deleted file mode 100644
index ed259fa2e246f4..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-gnu-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a35cbf03e74234a3e3285966c602cdde0a29f2dfd8757b3446f6589f4b692d3a10e069283da0588923a4e9e10c2521a1b9fe7beec8f30f52e298160dc5a1f574
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran3.tar.gz/md5
deleted file mode 100644
index 2665cb14d797d9..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6f7450e960b3fe9170462ed545e50da6
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran3.tar.gz/sha512
deleted file mode 100644
index aacc292261fe55..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-29b599dab452abc65b0afef695bcaefa43fe47292810f6b2a6c4de5e1a66a43ab3d16e6478aab1aa581777f851598fdaf5e10974b1ec497b55c3d5de89edb37a
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran4.tar.gz/md5
deleted file mode 100644
index 08c261e7c0a982..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bbea22ab5a3d465d50fe700a359ae8fa
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran4.tar.gz/sha512
deleted file mode 100644
index fa6c100c0aa3af..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-860bba3dbb73b6f67b345fe5f91bdda50a9bbf7cb5a9a1f711f05b7723639434591096ec12677cae1fe04e58880fa7b9ecefed694e01f328d1c078a6826a52d0
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran5.tar.gz/md5
deleted file mode 100644
index 552fab2e64638f..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-87ef372d70f621923d5e394bdf2c21a8
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran5.tar.gz/sha512
deleted file mode 100644
index 38720406ef1fba..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-linux-musl-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-899730e57b7f62325c8616c292a31dce8265ba0d9c6390282003412d67da26219ab0d3c93a0b4e15eb0462e9d32d28e9507c758c2d192c8f182561c8ca0c01be
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran3.tar.gz/md5
deleted file mode 100644
index d443b80fa55c6c..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-41a5b6ca26e13abb73e237229c51ecd0
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 19117479e2541b..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cd2aba2d15f8d8f0ed605bd0f9ce7b4daf9da40b2b2e8a90f804f6394bb6ebd07f59c5645229a35a63927bde2ef7126bbc4c38fc2396a653b63cc81cd40ba494
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran4.tar.gz/md5
deleted file mode 100644
index 9e687c2485d384..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0d9fc1a78318d24a5796a23361d4ad6f
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran4.tar.gz/sha512
deleted file mode 100644
index e384c2c8d7b1eb..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-df3b9c1ae6d053554621712c2a654ca23c2deb4f6a1864ac1402fd8a2fd903b9713e84bbf20755b6aed5a83ffe27934f12abe70c7da8e6232f09b56820b3b7c7
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran5.tar.gz/md5
deleted file mode 100644
index 9a96bf7093f300..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6e28a918776ebc4dcf614063508e4429
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran5.tar.gz/sha512
deleted file mode 100644
index 6f447080ba6588..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.i686-w64-mingw32-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-463f99ede45b35df5b65929b5d224e6acf97b60b300f6c9ec86702d3be1dc4388b344ed8e5f05e4b7e1233f205b1178113489082c7d9f78a5a0d4448a733d0b6
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5
deleted file mode 100644
index c2678d88147334..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-496d4a64e692b152d238bfdd9641dc9f
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512
deleted file mode 100644
index c5b9b14adc3896..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cf0a6aa0b7ff1e69e2df7354dff5e289c3dc005784a9a106a84929216495afa503a5cd5da815b6484755755a2cb20ff8bfb52bc93e2986af13978989965671db
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5
deleted file mode 100644
index fce8718a8e6b8c..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f5286436d9d44b6ec8b9292bd3e2285b
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512
deleted file mode 100644
index f2e6b014f0f744..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a1cc56838abede6a1701461a2a4f0c16b43b753fb093866daaa0c04bc772d4725537bbc857dfecb09cd43b5edea4b78b77d889da775b7c2bbd3b8a577b3d854e
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5
deleted file mode 100644
index c37bf0b9eb14df..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f072ffe83923379617363b86802e6460
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512
deleted file mode 100644
index e610291ba42199..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a0e89167972c3c00e0caa1f7b999e7dfd0822484c6de201c8362e43aa8f23c64f47da957b68ca01614dcd761f6ab19abba5f53a0b9fdf517b919852a57b15b30
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran3.tar.gz/md5
deleted file mode 100644
index f43e7bbb4ae8b7..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5e547b8c6fa570957c01ea6850d3b35f
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 522318a3509c98..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-12091c5c8f5ee4faf2962adbb768391535aaf133bd79fbf768fc704fef81d3b3ad43a2ed34ccfd40e71605d965ba389cdb50517e019f71d3e97db76dc9d614e3
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran4.tar.gz/md5
deleted file mode 100644
index 7de6f19a6b0d5c..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-71ba66f503c019f586f14dc7a3e58f58
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran4.tar.gz/sha512
deleted file mode 100644
index f2ea8f6685ccb0..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ec7427255ee58a321bbd1addf0b1ac4cf100109fc3ea61299a4f4e111de9f3f18f8958ce00feabfe3593a7c234965ba21d36923d5f9370e45dd1256983398769
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran5.tar.gz/md5
deleted file mode 100644
index 7adb79d69f41ab..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-29cc8a0a42291636c67db928c7bc30bc
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran5.tar.gz/sha512
deleted file mode 100644
index 3646cff93612e8..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-apple-darwin14-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f3f790b018e39227e8e1563be6a45b40090f234662d4e2ba5968fe9291f647f557cd5e842db8c21f0221e3c052b044166fdecc193fcd83a1ce8724e29f0825c9
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran3.tar.gz/md5
deleted file mode 100644
index 21029d0a0fb6bc..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-000d8afa59bc5caea9064cd308f12719
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 57693123a222dd..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-effa16a8cf1bb98be6a67073245b72ec979cfad70e52c26faf47019854ca2a8eeddb884bcbc8b036c7c89bbae2ad89d4f7ba6f7ca29495c16aa9e37e20969318
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran4.tar.gz/md5
deleted file mode 100644
index eb82f028eb7059..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a2efde90857a2601c0527f970af47584
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 042e6154e34a4c..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-28a8e31cc4c05199da5d01b6940828d5cace26a1d575e9f16a23241ffde5eb3b7369bb10d2322c8e4865c72588cca3fd8263c9f9fb2ce88a50a089ad5429df21
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran5.tar.gz/md5
deleted file mode 100644
index 316d40907d345a..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-097c69102d957a0080ded88d1cf15bb7
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512
deleted file mode 100644
index 0bed9da0b34ce7..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ed7d877b9e0421147a898dbdc20b01790938ca176f8a288334d85f40590b1157fe098153406e178e76042340a4c0da73b9b52a0b2584c7c2fb6a86b72517f958
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran3.tar.gz/md5
deleted file mode 100644
index 76e14063f7a69d..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9718ff8dc563e66a83b6bc4b447c8a56
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 9e473b1ffb483c..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4a007fa76f7b7e7c6b4c20607a5532ec180c3903221e8f483498de74a36b2da851d653422f0507d7e0241e8b2f434abfed34feb3508667b9a2703eaf0218ec8b
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran4.tar.gz/md5
deleted file mode 100644
index 5c6758335e6a4b..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e49195a51d66bc6cc9a7bb9d1e1d9846
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 9ba5963574ec25..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3f8a7d44e88a588b7a433ff72c0f841e3ad075850091f10ab008ad1d04203c09cd4819ddf9aaf3cdb9f6917c56fc96f24693bca960bac955143dc1493a4438a7
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran5.tar.gz/md5
deleted file mode 100644
index 8cf701cff43848..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-966775df42bca55f0b479005708b8426
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran5.tar.gz/sha512
deleted file mode 100644
index f4c7da99349987..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-linux-musl-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-69409eacb7da997219e5ff23e9e8039eda543e1184c30edce482bd005866cd16891ca9fbdcea312ed772299ff78f769234a6d3cbd05d1b3b818f36ecd3ce44f6
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran3.tar.gz/md5
deleted file mode 100644
index 2d00a734315c7a..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6038eb3e75f23fd115cb459bf707ab93
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran3.tar.gz/sha512
deleted file mode 100644
index 144839e7785cb6..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8211d84af69b7ab3245bc024dd72b3f34b2acd8f8f021558346c400e45f04289542c2b0d0d26e3eaa08363cd2961a03d631f1021ffb44212c2d5a36750cde577
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran4.tar.gz/md5
deleted file mode 100644
index bab898d4566f37..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8288745acad526512cf25605112e67bf
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran4.tar.gz/sha512
deleted file mode 100644
index 1b49590172a147..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-36b629191d730719fffb2b2eb4fa3ff10212c4b1257268ba8f6ae78758765138c140a423ba8e4d01bffda3d573e1ac443e5cf6fec78445731757ecca19518a04
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran5.tar.gz/md5
deleted file mode 100644
index 19ca0b0a67ee5e..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ba036e41a73cd5f5428f284728d96096
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran5.tar.gz/sha512
deleted file mode 100644
index ebd28719d0813f..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-unknown-freebsd11.1-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-35675af2e1f6f822b5ccd0a443884fb22d2b7ccdc23adc22e9e4ee2a2791a981e8e16622b439826527181e1c404e8a9a1773ca1abd71894fd315778d839c25c2
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5
deleted file mode 100644
index cc9c9afb2a3c41..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0084a4d6c169c9253bbac4eafcb79ae3
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512
deleted file mode 100644
index b25c8c36ad1060..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3759fda6a246e1b949080706971a82b1832d21916f4e161a29bc98ddcde02b02536fde0e8cf111e77a82a8a7753b9de48200e833ebfd4a74f04d1d671dd236a0
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5
deleted file mode 100644
index 9a773f82fe319c..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1c5599d3beaeba4cf55284b8a0deac75
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512
deleted file mode 100644
index c3dda4cc060f50..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-df9d465048fd229496da82fe2a969ac7201a2afa45f044613d0ff6b9905c59cfeb0f89054bf05271e17782937b4c28a1b0cbcdf09d8fa46c47da3f3382cc4292
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5
deleted file mode 100644
index 96713a94535e0b..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ebacc2016b0f28b5bc2364150f802b58
diff --git a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512 b/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512
deleted file mode 100644
index dddb8a7f6b2783..00000000000000
--- a/deps/checksums/OpenBLAS.v0.3.10-0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-008d9b6915cda1d19a23083e84ae17c7eef5c5c62771af079c7ebd04d28639b10d13b92102e0acbd92f5d60436c930e7a866f8dd9da23d8db982bddc8893e835
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 10c01865c80d76..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3f445fab4bbc703837fce894c31483f3
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index c9d5e8a4831600..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7d3dcd96ab672e2c4323d7ba4e7b44f1cf0e8283fe3df13d0add40f0a5ed31176bd5cf0f222720e08f1fb9368027527bd495d72d54195330fb0671e004b30096
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index b482e64c91e26c..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b73fc13eb967d8f7502ffd5b314e80b2
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 3ae7e129256b18..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-bd46a21fc6308a1bbc626b5002c4ff029f50d738fac4d58391afc3301ec13feb4e781bfe6d664c962d02a84e970221aa2e7e17a9328d4cf5e3db7b4b6fcbbe7b
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 8ac7ccced578a9..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-78dc354ee31636c3bdc6bd59bd0254cc
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 2bdc2f57a7cbb6..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-161cfad57e4adb911895c548e38a722d7b8245741b0cc147d10a4656c0bbe1c5ed2b7b1450d88dbef666dc9882bc9f32897b584e306f96e1ced71cc8d8365699
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 6a28da64c4ab40..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-926a9897fc997ef5b316e49d7ff64f2a
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index b69e57a9461640..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e47af6f94def69faf48fd356057643a197b48c19b9f76ee46894abb53a48c93a4bc32d3ce85d68e271e387b9ad1478d31a2f0eb9bfa13d4e9f02885f3ae610b4
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 5b3039151c618d..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bd5f5add68aff856744ffcfe2f4546fe
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index bb84b8cfa27ae4..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8b1364665b0a2d18fe9c9fef3865790bc96679b665aecf7852decab7ca3ef1ebdbffb464b7c32806bedd74f770f1116aebf4c135dd35483bfdf4cef0fab32f40
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 8a2f87ceed1055..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-13d9c4562fc22b840c2e6f5756dc91ab
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index a47a57a3a95a18..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-20ab2007fac054a9231b06634fe2a368ffee6a5a5c6db80c8e44b033abefbcd5058008b694d3a0c5bdb360bd4ff1ee7be564da3c783084599c602077da966e36
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 171610b8781c73..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-76b2cf1608c3fd4f41383ccc9e921925
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 05a0b9644d62b6..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cfc878c19d27f75d6b2d5c659d2b9acd527f6eca1aa1a8dc8528eac49da87785730807fe30955ee313d53fd074ff261e630935d882c93922f80612eb20dd07a2
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 3c999649a9f833..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7c9aaaf1b6cd7e5254fc04db07b7335c
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 32d485d54b6fa4..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e29d8c43768d8673c7491ad566aef4571b0540bac948ec414ca54269453e5c702afb74ac942f755be0a9adf33120a439b591926ba0ebab46ba2b325fa77c25cf
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index e814d7abd0117d..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-02a808e8940dfe0233a32ea10629a2df
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 238f6cf72b57a8..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c6ecbc8667995859cc16e89551cb826627af70cb019729afd7277003150eb61e673eda812ebbb4fd592a3f2c9da960d833b69ff5faa148674779446cf277c31c
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 7b52ca7e5a6e6e..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2526f73a38ef407ec118ae3ea4e5f8d8
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index f0fe38b143cb09..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-79f495b9a5332eabae4742cd6f5233403f69ffb30e6fff30d8dee5cc10e4a278e9e00c82fc32936a4b42e3446ecad208a96fbfe2b2e9d91db15b52195f5e1fe8
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 9ad3a577b10768..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-08b36c9c40568e64f76a87484ff4736c
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 4ae29337145f7c..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8ed05913066a39338e487833574b895e3d88ae79eaa9038972e33f94ec2afab93a84a8833ecc90bacc0940f5bb6f22af8b9117bcf7e943fe1f9114ba2576f698
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 30ae0420d9d84d..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f037929da10ae741953fcc8ffcf3802c
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 55c1b9024d2eb9..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-bf802cc44e8638fcd8d263070e56c3bdea37899405ffe8a67d8bc16a70dba46985d6e32e5651b5596e47608db272d1e0b53a23b23633b84d6dd3a51a5b52eb88
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index a89d30876186ad..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6e8c1ee1b5733301aacd19165d8376c1
diff --git a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/OpenLibm.v0.7.0-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 9e971b9c319eed..00000000000000
--- a/deps/checksums/OpenLibm.v0.7.0-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d24463200b065bd0137c5c87c78cc36bf8c157fca7c6b07e44f0763eb580a2fecb5aed80433a942f2ed528d0807ad045c05d73516f8f6e84e9d5cb8255deefb9
diff --git a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index fd805c335789ad..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-54e4d21dddc9165ebe7fd736fc9ebe0d
diff --git a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 3ff166e96dc145..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3845c21cd009303533f0c9ca9eda1bb80d2273e2547a3c8fed95f65902b185a06e5da51452d52b1c595e451acf2ef91284e31eb2dddf87b95649ffccea3b2ece
diff --git a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index 0fe889aaac8a26..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ab4ae6dfe099b8c718c24a18056daef8
diff --git a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 7d4828afb8ce49..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-359ac7fac9eb6e3a72d40c548602c5f3e0e88f866282b8487ff113f3347d71171e75547675ac214f35ff8e9d76891d9eea762c30d96e66f2ad2ee3804c7d255b
diff --git a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index c940a87d20952e..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8a138c21e69e2ef27e97df2393a24f62
diff --git a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 4691d6d76f8019..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aae6e19f3d8464cf195612e6c5bb1d79ea14d2c2f03207ee1796aa8e5374197ff1693e8f170620f5f5769aa2ed6f0d4b84220475376d85d111f5f9d7c33e135a
diff --git a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 1a590e33677ad0..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a2d5325168a333f60dac0dd083022224
diff --git a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index f9b9209778cfc8..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-49cbb7ad7a1efeb61aaa79906b846f69e7c98eb8b65857a6fcd30f4995398f0589a8bc6e4cb21816f0089d5904a743a6b3af2265c2bbd4b0cb25788a74b82bc5
diff --git a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 85ded7367a7631..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-442b052ac108a31c33a89f9727a3b49b
diff --git a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 49723e328d888a..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8776e35682738eb5b958954573f79dfbcbace923b00a9d70cddee6728454dc389f54d91e7a00beff759a6247a5a7fdfbe2f8a66321a2fd642a379910ca391cbf
diff --git a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 7647c989675cd7..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-24101edc95ebcf77a22fe5e4d7d8799d
diff --git a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 79a20ccd71d144..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8561f6a775e5e552d77aabaa4b4f7f6098cfcfaab06051385896214eb1b8a4d5a0cc2f3cd0d6e9c2f5fbb64928a3328a61db699d5108d553574bdcb52353da3f
diff --git a/deps/checksums/PCRE2.v10.31.0-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 4cbe1bc82b38d0..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c6b7b3402e7126d9d9aef281c09d58a6
diff --git a/deps/checksums/PCRE2.v10.31.0-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index adcd04c9d93ad2..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f46f56756d1d71fa0b9b767a18d47d9cd5966d63ac7806d90289311007717ec8580ff5989bf9f3fc8192bea581f2598f1083d182f1e8474a7e29977be5751f90
diff --git a/deps/checksums/PCRE2.v10.31.0-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index c2ff7c7b9a579b..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-31cef7ec68cabd292aa63873f517f3ee
diff --git a/deps/checksums/PCRE2.v10.31.0-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index f39d42689f435f..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-de165bc120d90553435f8955b7afd467163c66d83b6661593682bf7ae7c160622e6a98a459bcf862ba1eb14b224912cfaca6971a4f1c125a0d75170d1e13d5e5
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 94b7033edeac58..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-10bdb7281fe9899a65e4d058ce3b62a0
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 40cf63cd7239d2..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-74f603c84f48a8fd24b80146be215bd30356e1fc1b46c33a0b9df105f3afd5cebcfaac4a6f018e06d0279bdfc369ea0ec22cab38a1cdcf97ce49f56a0fc86509
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 59d8bb3b7fde0f..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7b5a398e37554f764fc33b384f2588ab
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 751eb515d2e4fe..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6d4e42bdc851d905a7c4ff626dc6da58bfcc6361d578123c54f616d571ef0056dfc27789ea3520b816ed80b1501307461cb6fd6a7eecb032da4f42e70b555128
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index ee1f0e87110360..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b4a0541fcb70459046492ea61e784af5
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 79c011bd75847c..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-71e2485144b6d5d6527777e96b8677ce49eac6ee8bddba2aa792d7d153d073b401e590c947f30fbdb14e34bbf1f339ed9da4d28c32e906fb2156b9c21e5b3928
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index bfa6d25e9141b8..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-12c462d17c6133a43224e7aa0dbf6ab8
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 71f7ebadf55b66..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-558bd5705ecf2621d6a3da0aa3ad76abd750a64566bd4e08f251fdb09057a245b2ad97fa50160baa62eb92a52118fac592ebfd562bec23de1eb45e3de9fcd02c
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index bcca7abd07d462..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ccb3b55a39f06475cdc20e243b64c822
diff --git a/deps/checksums/PCRE2.v10.31.0-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/PCRE2.v10.31.0-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index d809ff9843718a..00000000000000
--- a/deps/checksums/PCRE2.v10.31.0-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8fed96378ce5567dd40a29831c92c21c6627dce5ab468d43d8bdf6447bd43b01201360e8b89b3ec0380248eb2ef46df6be40109d683a3af16d264a2e4300eab6
diff --git a/deps/checksums/Pkg-54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17.tar.gz/md5 b/deps/checksums/Pkg-54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17.tar.gz/md5
new file mode 100644
index 00000000000000..fa7ad439f14b0b
--- /dev/null
+++ b/deps/checksums/Pkg-54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17.tar.gz/md5
@@ -0,0 +1 @@
+6cd9a8d83b45b88b2ba5c43ccd929d00
diff --git a/deps/checksums/Pkg-54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17.tar.gz/sha512 b/deps/checksums/Pkg-54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17.tar.gz/sha512
new file mode 100644
index 00000000000000..4e2ce708ba22fd
--- /dev/null
+++ b/deps/checksums/Pkg-54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17.tar.gz/sha512
@@ -0,0 +1 @@
+2e2c626103a8653c5e3f29cc2460c2e703ef2277c597d835fb58ee0d1ddb1ef535b82e7e949e7a9d83bfa5adc534d2a6cc92d38a444c91d3df094bd9258fb3e6
diff --git a/deps/checksums/Pkg-6a235eb813be335b54c97c5c7d631bdbd1059115.tar.gz/md5 b/deps/checksums/Pkg-6a235eb813be335b54c97c5c7d631bdbd1059115.tar.gz/md5
deleted file mode 100644
index 28088ecbdbe91d..00000000000000
--- a/deps/checksums/Pkg-6a235eb813be335b54c97c5c7d631bdbd1059115.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-5e4308adce421ca3faf6a2b073d0e189
diff --git a/deps/checksums/Pkg-6a235eb813be335b54c97c5c7d631bdbd1059115.tar.gz/sha512 b/deps/checksums/Pkg-6a235eb813be335b54c97c5c7d631bdbd1059115.tar.gz/sha512
deleted file mode 100644
index 778a6a9ed78d2c..00000000000000
--- a/deps/checksums/Pkg-6a235eb813be335b54c97c5c7d631bdbd1059115.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b58f1d0db83127b82d8f0ca86b45cf72bce6ee8c40f506076decaba2c1ae3262639c231f8f5dac077003041da015f653991729ea7c2695f477dd8942937dc3e9
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
new file mode 100644
index 00000000000000..f682cf35186582
--- /dev/null
+++ b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/md5
@@ -0,0 +1 @@
+de53629eb0b1ce98ac6b245bdbf14e9d
diff --git a/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512 b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
new file mode 100644
index 00000000000000..870098ef7aada2
--- /dev/null
+++ b/deps/checksums/SHA-2d1f84e6f8417a1a368de48318640d948b023e7a.tar.gz/sha512
@@ -0,0 +1 @@
+71cdc58b03cc4f42f8c4b9c2353d6f94d77b4ac5c9d374387d435c57ba85e966f3be4e8c8447b34e184cb8e665c42b3cd2c9d9742c86f7fb5c71a85df5087966
diff --git a/deps/checksums/SparseArrays-96820d3aba22dad0fbd2b4877e6a1f0f7af76721.tar.gz/md5 b/deps/checksums/SparseArrays-96820d3aba22dad0fbd2b4877e6a1f0f7af76721.tar.gz/md5
new file mode 100644
index 00000000000000..d247208595159f
--- /dev/null
+++ b/deps/checksums/SparseArrays-96820d3aba22dad0fbd2b4877e6a1f0f7af76721.tar.gz/md5
@@ -0,0 +1 @@
+a6f48b4fbfecc10d6340536957d094a0
diff --git a/deps/checksums/SparseArrays-96820d3aba22dad0fbd2b4877e6a1f0f7af76721.tar.gz/sha512 b/deps/checksums/SparseArrays-96820d3aba22dad0fbd2b4877e6a1f0f7af76721.tar.gz/sha512
new file mode 100644
index 00000000000000..8699e2cb530aad
--- /dev/null
+++ b/deps/checksums/SparseArrays-96820d3aba22dad0fbd2b4877e6a1f0f7af76721.tar.gz/sha512
@@ -0,0 +1 @@
+5d74fada2c2748606683f5ffa457d185790ec68a6019717bf587e302b4a42ea6a18041bd786be8f0938a7919d486039cabcc4fbb736bcb4ef9d1aaf9eb697856
diff --git a/deps/checksums/Statistics-61a021bcb330e6c52f2435f2abaffc77875ab6f2.tar.gz/md5 b/deps/checksums/Statistics-61a021bcb330e6c52f2435f2abaffc77875ab6f2.tar.gz/md5
new file mode 100644
index 00000000000000..ad05c56de3970c
--- /dev/null
+++ b/deps/checksums/Statistics-61a021bcb330e6c52f2435f2abaffc77875ab6f2.tar.gz/md5
@@ -0,0 +1 @@
+028a653f3b33540ca9d95f119bc62a06
diff --git a/deps/checksums/Statistics-61a021bcb330e6c52f2435f2abaffc77875ab6f2.tar.gz/sha512 b/deps/checksums/Statistics-61a021bcb330e6c52f2435f2abaffc77875ab6f2.tar.gz/sha512
new file mode 100644
index 00000000000000..62ba9972c20292
--- /dev/null
+++ b/deps/checksums/Statistics-61a021bcb330e6c52f2435f2abaffc77875ab6f2.tar.gz/sha512
@@ -0,0 +1 @@
+27e6f153f119638c4ed8e29127db10c1aff4fe5c14217a86a65d2bcb7ffbd3ed8e22613ed26fe0b9ffbb525ba00fc673be989d9da50e10fa12fd9a460ceeddcf
diff --git a/deps/checksums/Statistics-b384104d35ff0e7cf311485607b177223ed72b9a.tar.gz/md5 b/deps/checksums/Statistics-b384104d35ff0e7cf311485607b177223ed72b9a.tar.gz/md5
deleted file mode 100644
index 155beaca17093a..00000000000000
--- a/deps/checksums/Statistics-b384104d35ff0e7cf311485607b177223ed72b9a.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ce7ee5ff49633d79c00ece733028ad94
diff --git a/deps/checksums/Statistics-b384104d35ff0e7cf311485607b177223ed72b9a.tar.gz/sha512 b/deps/checksums/Statistics-b384104d35ff0e7cf311485607b177223ed72b9a.tar.gz/sha512
deleted file mode 100644
index bcda4799c19fec..00000000000000
--- a/deps/checksums/Statistics-b384104d35ff0e7cf311485607b177223ed72b9a.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-77ee31f7a683140fa6d3301a90c1e11849228a1fcc4eabbb9c4f26ab0a5a8089b06924f1c5ab745d1ef667438b11c6bfedf5b2ec049f28ab636e7dae9157cb83
diff --git a/deps/checksums/SuiteSparse-5.4.0.tar.gz/md5 b/deps/checksums/SuiteSparse-5.4.0.tar.gz/md5
deleted file mode 100644
index d111708b32fcf3..00000000000000
--- a/deps/checksums/SuiteSparse-5.4.0.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d73dc3527c66804ecf41b09d057142ff
diff --git a/deps/checksums/SuiteSparse-5.4.0.tar.gz/sha512 b/deps/checksums/SuiteSparse-5.4.0.tar.gz/sha512
deleted file mode 100644
index 7e097a1e26e687..00000000000000
--- a/deps/checksums/SuiteSparse-5.4.0.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-65065fff86ff31e2ecc706525c3a2240cec7fff67f1bdf5371df7c55b661bd797e2fdbde8952f9ca7b7d8aa39bfcd6b6e0355c3acb59f66069cb0e14cb76701d
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index f1e62bb1cee4e4..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a32df8ce15d204a993d00b5fdc8681a9
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 1c2d2dd71affe3..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b10c14909f2996da524098f8a2804335510decd119026a65027443280e8ee1661a13568a4eeabc2e4157a018e68b030953fe713aef9e4562f1cea784223591fb
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index 23ced5947568d7..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-15884c89e8491a6b147c3d36ea4f0f5c
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 7090c0dd44afa0..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7b2e823452fe181a6bd295c4b70dd6e0d0fcbd282151ed40da4288a480674786ee6307ca2502986f6cf9b482e2198e9fdee9c6c6818686666f9157a5096905cb
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 9785a02a504489..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2da0587fd842c14504d2cd60e809f2ea
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 58a7f9fa3f52e7..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-85cb01448c0f33e21f478af57f25ae12b9ae83b66daf94952f89871852ed125b9de8e595613d8dd438d9eaea6a352bbfd8526fd98851d2a197e52d142c187294
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index af23bd7be43528..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bbdf9391f98ef320ec33cc7faaaa5bc2
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index 949e11d13d5ff0..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-03fe617178ecbb14f64f3f06c059db3732090e4bff08f33fa5096847e3d0aecb28917644ecf90d31f61edd670c58b46155a1d8ca1d409c4b378647864898ff59
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-gnu.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 3df549d906cb01..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1fce85a472ac3ed816cc09ac7d023161
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 0dec762adcf87f..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3bac1f8d6e6cef1323f19988f5e8b23ddc7eae2e7a1e814ac22f318bb79e2f0cc28517ae97e409148de101b8ea27e0c738731e63b6567fda9d947f0bc057fa8c
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-musl.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index d9c582d0d00b24..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-935a8082e5b2616e8e6d20c153c27373
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-musl.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index dea3dea97521f1..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-30cc703b2bfd8397adc113ef8d88d5444182b3ce3be8583efa26105d428d659a7560a99748099c757a11b5de75c3f18e4ae33dbd5ea3de3601f51bdc6d1bb444
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 7bab2029423e1a..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8826cb6cdd778a279a63e58f18c008bb
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index c04bd144376ed0..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-19fe4b5949462e703e11d32596411ee41403bbfa303dc33420d6ae02123e6008efa741daeae4e2242d643c90df1c836fc7235c5398e585f9b316a68788ec0b44
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 043c601254ed03..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7cd85be4fbb1190b56e5806e5a048ac0
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 85d9325d547cc1..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a0dbde6e328266c3210687d2bc4094a8bd878f6db3fc0a65331823b1c5bf79afff34bcd60e589820cd55866a0ad9c774cfbd48923a58f22ddb8e198bf0690dcb
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 4c78dac25f0119..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bf73957713ddc7ddec4b300cc4e2294b
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 0ae62a2b253e5d..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-584eacdd1a5a1624fb478165216de10d97d125f516aace1dc993d2a1e13defc9e1270cd9b6e023b266d3776350d8dd72cb9cd509ebcf0f33392c997be744c2d4
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 6269d2a6a3aeb8..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2a353f6c7917ba5a9749147ca43786e8
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index db6c8542a0785b..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-75d9ea6e46325f8e249dce356e79512801afc9b67e7e4a2ebe56c2a92fd8d14de2b1b135a4c02cf36c2080ee00d8cdcb6e572b4990971dd806f8b66ab5656fbc
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 53770b20702804..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fa443a09ca62fb25bdedc1887e04559f
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index d79eb7eb1567c3..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6cce5d984e1d9f51189b3442bc6fdf0d7f173c6224cbca3be4a1a144abea76a0a454e6ce8564df7c1e677df7aee653e81dfcae1a352288d3704f6ce087c2e985
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 1befded197aab4..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-df8e6168f2ed3f1792e44d1f0bfd8ecc
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index f2388c330ca020..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-838081c9b172158b8d59c8491770a9665f3441d49cc2ce7bae0b656c81791210af6d30585f0e4f4b2a08b11be8913ac709dc488384f43085869a192754a2f211
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index f135db1e175f40..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d0354a7a698e39fc76702faab6ee283e
diff --git a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 54129a07b52c74..00000000000000
--- a/deps/checksums/SuiteSparse.v5.4.0-6.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-880eaed5b817d09413a6f5c18c502fdd1c61b47b0c9d436717b4308c7649ef1d01b2a2e63e452e7b3ddcb92990b6ebc85c8fb5b697f143e8e1aa84e3d3b98a7f
diff --git a/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/md5 b/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/md5
new file mode 100644
index 00000000000000..3be44f2d907182
--- /dev/null
+++ b/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/md5
@@ -0,0 +1 @@
+2e6f1656df70500842c4de4d0f941f89
diff --git a/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/sha512 b/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/sha512
new file mode 100644
index 00000000000000..7c1626b841ee03
--- /dev/null
+++ b/deps/checksums/Tar-56062695b92920c8b75e997fb0c8c3b015d04b78.tar.gz/sha512
@@ -0,0 +1 @@
+2d2ed113bb9e9469b29a680172d1ab06b2ad8e0574788c3eb16467e621b9127c6159afbddd1014694d46bf9945cfcecbe8cbc315448e0a06fe14b45a4b10ae83
diff --git a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 5cfb66f75df193..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-51358c64e42d3b8d923451278fc7aa18
diff --git a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 54ce95ddbe8e00..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4812a79c1abf7b6301f7d1b4d65116e1add5edcaee574c6797d5c6c75b97fc9557d4e321d69afcf2dc1cd7c86b5f23f26a5cffd4541235e43c3d929498793554
diff --git a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index 7190114e8c8a23..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d967ee51bc241699462b50466208b7a1
diff --git a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index cd3df5f660438e..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b534781b3aa10ed2d5b426c2e4773ebf02aa09dd3b99326b3d74361260459ca4aff8e0638f5bafc2bc7b1c5e9eb27d37815194667ba09bf75360489caac2dc07
diff --git a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index c0c8feee7312a4..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4324accedf22fe4733323f91ec408eca
diff --git a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 08588f645c5aed..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e7bc261ef2ff5e1dad9135dd6278365db0865b99b75d4086a27793e8c45276a609556c8145188fdfa9e617f583ac1c8992d02854862453d40849e49c39ae4a2e
diff --git a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index f0c5f60add8fe3..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6544bd96203abd28b5a9ab4de809cfb3
diff --git a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index f2430d4cd58b61..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cec06a2b7a1bd44f69c5281aa4db0f032506598b5d9b2f81716587f07321528aefc8f52c3ad686c16b923715fa13b0d7a0f182d7dacafafe55059346f1ea24cb
diff --git a/deps/checksums/Zlib.v1.2.11-10.i686-linux-gnu.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 3c4eb40c8cde4b..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-760ea40b76d53f6de170f09a17fc3aec
diff --git a/deps/checksums/Zlib.v1.2.11-10.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index eb388256010e07..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-550e950ddf02598eefa013c47f9c13ded8468fdf871602de85156f2c4654f515a56ba3b8881441144a78453ebed99b9ea4b580ad940b9d587208309b20da385c
diff --git a/deps/checksums/Zlib.v1.2.11-10.i686-linux-musl.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index b42db353b33e4a..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-390f2326a32f1c5ee44585a49a60ecf9
diff --git a/deps/checksums/Zlib.v1.2.11-10.i686-linux-musl.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 65c0a0b6a155fe..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-357beb491a420dbbd6640df3ff8bb3e32be9fe0308d4b44c956b1c50f40f08dd6d77118c7ae46a4149e48772e6b545987542e7709d545cc486374710a6bc8cb3
diff --git a/deps/checksums/Zlib.v1.2.11-10.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 42c3fcf5122a17..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0b4e00c3d2ef8a4aaf7154c18ada2348
diff --git a/deps/checksums/Zlib.v1.2.11-10.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 8949fa694c244a..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9bb1f68607b07c8f20f18d839d7b13ca87e45ac57a6c3bffcf27db26c11d99433e08af391939b61c13f49e5df5feddce2ff5d9d962ca0d7e89d850288f563172
diff --git a/deps/checksums/Zlib.v1.2.11-10.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 746718fc13e8a6..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-becb958e9f99b08a152824c8a44454d5
diff --git a/deps/checksums/Zlib.v1.2.11-10.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index d0fb756d83e58c..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-857225765e3af15463128b7d4aca87c358c34c802fbf7863cdeed06f958b2c3976750c8583bfcf0ef12c106519563248f80a676337d340fddfc005f7cab09b7c
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 36752a8ff77b44..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-b073327b80d77fcb321aa90ac054f3bc
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 46257881f45d02..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3e568123255eb2e3fd6b59395ebc72407743f596ead1cc2efaf1f2cbdb88f554ea53cf7caa3f062a8a6695eeb591a6f6ff1304bd0915b148fce4849c36e915e2
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 0223a5fba03458..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9b387576651cf42d2cba762509603aa0
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 62af5484db9ae8..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-31020590c9f122954eb545515adeb3b93894caa7098e6a12b70cbbd71a9e1b71f4d3185d0960f4f2e192fbe74a0d2c944d1cd897c715aa3e7182ea3cfc82a63f
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 7a9589bf3255db..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-8eb752c5fb3f1f013a91ba84db33f268
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 1b6ab531d9ebb0..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9c4b198cceffc5e3c87e91fa4f9223b973b090f5a343b1ea84425cbb445a7d9a34926ca55e4febae8783fbba3390b3a1aa2bd59b3177335a544c862bd55b8ec5
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 7a2282753a6f82..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-bffe25dd8fb78bbf881785eef64202d5
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 07ff8b9147ade5..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2215ee8ebd3415f35a4f488537b2bb525bc8fef74419b2182bb8bc5b2e236fa7fb637c6aaa234e840c94c3940a71441e5a2dc9e7fb5d674372175a30dbb5fa19
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/Zlib.v1.2.11-10.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 957c62c98ebf24..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9efba92de9aa49b43822309fcdce046a
diff --git a/deps/checksums/Zlib.v1.2.11-10.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/Zlib.v1.2.11-10.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 6e8d8f638743e5..00000000000000
--- a/deps/checksums/Zlib.v1.2.11-10.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-c76553c7b79c8f2ed20dcbfbb55ebe06636e238c3ba7028beef84547cf1ee82c2a2a293712c9241e7b60cb5cf6c907eec7046b272a0ef26aed805eae4947a9c6
diff --git a/deps/checksums/arpack-ng-3.3.0-testA.mtx/md5 b/deps/checksums/arpack-ng-3.3.0-testA.mtx/md5
deleted file mode 100644
index 8e47827354ee35..00000000000000
--- a/deps/checksums/arpack-ng-3.3.0-testA.mtx/md5
+++ /dev/null
@@ -1 +0,0 @@
-2826846e98bcb009d339fb69973951d3
diff --git a/deps/checksums/arpack-ng-3.3.0-testA.mtx/sha512 b/deps/checksums/arpack-ng-3.3.0-testA.mtx/sha512
deleted file mode 100644
index 70eec3327a7196..00000000000000
--- a/deps/checksums/arpack-ng-3.3.0-testA.mtx/sha512
+++ /dev/null
@@ -1 +0,0 @@
-00af7f2353441c4197c52d105d3670fe250a312b8e67ae2794246f2ce8cd0b63585e5c5ab764921d357efd9ad685fcc0ee5b8b8ee7ab9af2bea26ccbb97c50ba
diff --git a/deps/checksums/arpack-ng-3.3.0.tar.gz/md5 b/deps/checksums/arpack-ng-3.3.0.tar.gz/md5
deleted file mode 100644
index 81b2c9e09d9a24..00000000000000
--- a/deps/checksums/arpack-ng-3.3.0.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-343db685ebedbb9690e4ea5f9d62c19a
diff --git a/deps/checksums/arpack-ng-3.3.0.tar.gz/sha512 b/deps/checksums/arpack-ng-3.3.0.tar.gz/sha512
deleted file mode 100644
index fd2a13c4a18f58..00000000000000
--- a/deps/checksums/arpack-ng-3.3.0.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8d0e526831d62200af9d939e77e41548875caacdf39543c3f33a00321bb18fc04ffa625d9c4103f84c1f6a03a3c362dc1df8571db9a525d46487e5d1ae682474
diff --git a/deps/checksums/blastrampoline b/deps/checksums/blastrampoline
new file mode 100644
index 00000000000000..3b5e4359e43ec2
--- /dev/null
+++ b/deps/checksums/blastrampoline
@@ -0,0 +1,34 @@
+blastrampoline-d32042273719672c6669f6442a0be5605d434b70.tar.gz/md5/f380e4238a2dec186ecfe9598f75b824
+blastrampoline-d32042273719672c6669f6442a0be5605d434b70.tar.gz/sha512/00437a96b57d99cef946257480e38e1dfdf325c46bc4a1619f5067565dfb7d9f668b0c8415badb0879b933cb1972f3c4e6be4c9e63a8a85728033e2183373819
+libblastrampoline.v5.1.0+0.aarch64-apple-darwin.tar.gz/md5/edf090a17d862c33d611875058438757
+libblastrampoline.v5.1.0+0.aarch64-apple-darwin.tar.gz/sha512/a3413c7d46c04318a5bebf10d6f930d04b5997d4be6be4e2748a7b60f968d2f2be7de140eee6c699962a12e8439f68f144e5323dea17d91587e82f97aaaaaa24
+libblastrampoline.v5.1.0+0.aarch64-linux-gnu.tar.gz/md5/fe88a410d795f805756488915679edbd
+libblastrampoline.v5.1.0+0.aarch64-linux-gnu.tar.gz/sha512/cbd31304278ea67ddc0f766c4647275c87829cf5377c3851153b7568015f4f016fd0f3e095f479c33d23a50f4af8c38bae4555b02dcbf45a04b6e5a0dd3504a8
+libblastrampoline.v5.1.0+0.aarch64-linux-musl.tar.gz/md5/d4d8c393eb28953297b37a7bae79ed2e
+libblastrampoline.v5.1.0+0.aarch64-linux-musl.tar.gz/sha512/3b5dca87e089ac10486f75663b4cf7d404c71b040231b04e1ec5110d13f30ac620b4cb880040106273866d465da9bdda5643887534de8e35668a7ab545422216
+libblastrampoline.v5.1.0+0.armv6l-linux-gnueabihf.tar.gz/md5/8b5f2fbd5442bf31bd10836ffd177968
+libblastrampoline.v5.1.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/f1d6314c785afc0aaa3ebcf8a532312e676ca41d427b9c2abdea88c700df4d6a7cb5cfa54d65493e5c3d711a64062a20a5de7e3b75feee0653115cee7de05446
+libblastrampoline.v5.1.0+0.armv6l-linux-musleabihf.tar.gz/md5/8ed3013c644ab3be5dce013fb23fd413
+libblastrampoline.v5.1.0+0.armv6l-linux-musleabihf.tar.gz/sha512/da40cbb0114d46a66ae41284d36dc855aa52dcd3993643858308f18c5d8eedbf92fc8ee57d3e3cc2153f29670b40bc03a8dd01d5b49dde210c8a7a2d471a59b7
+libblastrampoline.v5.1.0+0.armv7l-linux-gnueabihf.tar.gz/md5/23b8ef9ea92a8d474646d814c0c91577
+libblastrampoline.v5.1.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/97789adc18a54b953ce8696b484a4314e734a8092a27f81f43c1ae269b592b18ba7c67082396220a1906ffb075895c34462be976e0059aded9f6a6948abb1672
+libblastrampoline.v5.1.0+0.armv7l-linux-musleabihf.tar.gz/md5/d5a47ebe37a4a234ee6a4f3cf830e8c5
+libblastrampoline.v5.1.0+0.armv7l-linux-musleabihf.tar.gz/sha512/65366692c074576733e3b3f15d011e326d6a1e2357055a1a0159db31cdd7d5ff0e9aba9a33c1f2a949e128ac10b72776a3f76907df4cadcf7e67ace934cf4ef0
+libblastrampoline.v5.1.0+0.i686-linux-gnu.tar.gz/md5/14a342ab1bd16ef61d747e99acc97e6a
+libblastrampoline.v5.1.0+0.i686-linux-gnu.tar.gz/sha512/8eca984912e69af769f06cd2b38d1df9d724e4e42d6d5b2fcb77a8e74b2aa9f9c31beb36d634e5da28d4d2f0838957f5c5cd336db616768d8ffb60217fe92edc
+libblastrampoline.v5.1.0+0.i686-linux-musl.tar.gz/md5/201e6c737df0c0e2f4327c395133969f
+libblastrampoline.v5.1.0+0.i686-linux-musl.tar.gz/sha512/778daa7a0d3a6fb8d6480a14123e874009f0fdc5f1d3411518f8d9975c45ca418e88d71db72af8465d4064f4c177d0abb70bc568df3a4c765eed7c5aeddca428
+libblastrampoline.v5.1.0+0.i686-w64-mingw32.tar.gz/md5/8ddf4dec49fac4888f94f90143126e5f
+libblastrampoline.v5.1.0+0.i686-w64-mingw32.tar.gz/sha512/388b797f4c86f0ea090058acaff0eed34c42d45092c001410d11a4a4da93668c1729453290872cd44615ee517d62546f4dc42005240a6c36e40e7152f5c9cf5c
+libblastrampoline.v5.1.0+0.powerpc64le-linux-gnu.tar.gz/md5/db626123ab94b489ac8b4d395b2f5cf4
+libblastrampoline.v5.1.0+0.powerpc64le-linux-gnu.tar.gz/sha512/8c96f518dea82057fe85bdb2ee867cc7abc33e9c53fe94dd84d097a16268630c22082db7fc003dadfc4749400f3465564088e05cabd6844c31b870319432c433
+libblastrampoline.v5.1.0+0.x86_64-apple-darwin.tar.gz/md5/65b9aae2f749ec608b61412aa1921d65
+libblastrampoline.v5.1.0+0.x86_64-apple-darwin.tar.gz/sha512/38e974c9260614d855b0b13f78e72bbd65aa889e88101d25441dd4e78ce37baf81bab7de1950d71d8e35b32d62fb88ac9c3f39ab5a4aff11d00619441bc003f8
+libblastrampoline.v5.1.0+0.x86_64-linux-gnu.tar.gz/md5/0ab01f256277b4ea96f6d83c50891b99
+libblastrampoline.v5.1.0+0.x86_64-linux-gnu.tar.gz/sha512/2b2178d74beb1c12e348f6469777d31116f26229c243d5e08a6ac36a74c3eb38854c1d82429d0e7cabee259d0d5220c47c334a561ea5caac6f61d91aa6b34f52
+libblastrampoline.v5.1.0+0.x86_64-linux-musl.tar.gz/md5/52a9da4586daa6572b8fe2c13db6268a
+libblastrampoline.v5.1.0+0.x86_64-linux-musl.tar.gz/sha512/04abc5a0b6f80f10d1fccceee8a0e1c58aba76a45e3f6662ce4115d9d39d20dd05b3859434037d21bf6c5088a5a428565cd86e1cf6d1676666ce7e3eb1921b80
+libblastrampoline.v5.1.0+0.x86_64-unknown-freebsd.tar.gz/md5/f2b66517937a7647086ba96acc81c6a6
+libblastrampoline.v5.1.0+0.x86_64-unknown-freebsd.tar.gz/sha512/c19654b97928bdba36ccf3dbecf8ca994a46929c29c5c120d2d17062128a3df8927230fe7c418d6f780557abb8ce94b6a6a023bddcd3aeb91c8302cdbfe2b39e
+libblastrampoline.v5.1.0+0.x86_64-w64-mingw32.tar.gz/md5/4b50ad8399c733ee5d60ce1ad00e1e5e
+libblastrampoline.v5.1.0+0.x86_64-w64-mingw32.tar.gz/sha512/6a0f1d061350d53dd2a030ba11a0ac02c5ae598cd2c21dda39f95d81a2b0f43a454d60cf32c2fc0546df074181100e2d247d229d62c4a6b94bc7b697b02f0e0e
diff --git a/deps/checksums/cacert-2020-07-22.pem/md5 b/deps/checksums/cacert-2020-07-22.pem/md5
deleted file mode 100644
index ec3b25e2397088..00000000000000
--- a/deps/checksums/cacert-2020-07-22.pem/md5
+++ /dev/null
@@ -1 +0,0 @@
-18c68c9898be980227f33c213a2464aa
diff --git a/deps/checksums/cacert-2020-07-22.pem/sha512 b/deps/checksums/cacert-2020-07-22.pem/sha512
deleted file mode 100644
index a37695226f9475..00000000000000
--- a/deps/checksums/cacert-2020-07-22.pem/sha512
+++ /dev/null
@@ -1 +0,0 @@
-0d49bd1435a25b113a34ac38b337a9c904b6ac720824fd55d410ff6d8f6d0f637b54fd92cdff31d1c632b6a77f35fe55de9c756f35365387cea94f0fd93631b1
diff --git a/deps/checksums/cacert-2022-02-01.pem/md5 b/deps/checksums/cacert-2022-02-01.pem/md5
new file mode 100644
index 00000000000000..e287f024b8e189
--- /dev/null
+++ b/deps/checksums/cacert-2022-02-01.pem/md5
@@ -0,0 +1 @@
+3b89462e00eba6769fae30eebfb9997f
diff --git a/deps/checksums/cacert-2022-02-01.pem/sha512 b/deps/checksums/cacert-2022-02-01.pem/sha512
new file mode 100644
index 00000000000000..a5d8840598343c
--- /dev/null
+++ b/deps/checksums/cacert-2022-02-01.pem/sha512
@@ -0,0 +1 @@
+75f5222c23d14d194856d3fa58eb605a6400cbf0068e208e1bc75a4821f841c39a95dde161b904db54ce922efa384796ad5f2e2b6ef75327475f711e72652388
diff --git a/deps/checksums/cfe-6.0.0.src.tar.xz/md5 b/deps/checksums/cfe-6.0.0.src.tar.xz/md5
deleted file mode 100644
index 27d1ed065a0936..00000000000000
--- a/deps/checksums/cfe-6.0.0.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-121b3896cb0c7765d690acc5d9495d24
diff --git a/deps/checksums/cfe-6.0.0.src.tar.xz/sha512 b/deps/checksums/cfe-6.0.0.src.tar.xz/sha512
deleted file mode 100644
index 25505f252639ba..00000000000000
--- a/deps/checksums/cfe-6.0.0.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e886dd27448503bbfc7fd4f68eb089c19b2f2be4f0e5b26d3df253833f60b91d70b472a6b530063386e2252075b110ce9f5942800feddf6c34b94a75cf7bd5c6
diff --git a/deps/checksums/clang b/deps/checksums/clang
new file mode 100644
index 00000000000000..68f28d9640b216
--- /dev/null
+++ b/deps/checksums/clang
@@ -0,0 +1,58 @@
+Clang.v13.0.1+0.aarch64-apple-darwin.tar.gz/md5/e94db5924ccf13ba54642df7c93c69a9
+Clang.v13.0.1+0.aarch64-apple-darwin.tar.gz/sha512/1f77b8ea9f67e46a6fc65f58ba5cf5c451d97e8f94c3842e228886fb7571a07e544de78872e5d7f201e03a6b43ab0d94b9bfd538a3f73d7b6b53f442871c61df
+Clang.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/ed984baafbcd36c4627a45dc0edf9a11
+Clang.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/13ca14c74e4544bbc069ac562f296a73bfa347cb5cd015638f1bffc047f9395aaf49947040a61ceab360a50cea928d002752b1b01210662c286981832844c584
+Clang.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/1f1207b0522351e57a55f0e05c98d6ce
+Clang.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/7fa39fe15b3aaeec37cba5563a46423990b48bfc8a1f185797050de0bce9293ef0893603aec578c3aadbebab53d07caf33198eda7507876a49be9ec15cdbb1fd
+Clang.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/37b49d0d02a5911b74523cb8f8a1abf1
+Clang.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/1a5307498c9a1eec6e80bc1641fbd5819847ce504ee0c53c07cd09a5b15976649750364755b3ff5f851ffa197eaf6d69a74c4a96cc3b3e6d44c6ca66afd3cff9
+Clang.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/ea5974f42ceea627ba96fac88e0f0ed9
+Clang.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/15d2c0526accb8610e64f9a4bf9cd9d72c3c903727fa4af129fbdce0af350295546c8a5e58c3a59196d511e30e57d7b0c448a087fadb60806cc0ac2fc5dba2f9
+Clang.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/3db46a89eb9323734fc4a4f6dcdb558e
+Clang.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/bdd974cdc6ce4974fd1a0e594535efc66ffd14d9cc4f6421046b836337e950d983d67f23e7af12b59c62d0254df05b5a8dd19a5503e67b00d5d9442d85a789ef
+Clang.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/fa0f8ba9ed675da78f19b7212a3f8a89
+Clang.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/b96b4121bd327fe004dc335382e2aa5193acdee411ec5b5a5fc449c209bf94d2645d40f43f15e9ddd92d5848a1f87c792e2852dccba2d469de2e1a9ea95f5ef6
+Clang.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/33e2cc2bc2883ee2d34c19b89927f736
+Clang.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/a35f10aa8412b008ec181d71dd575284ecdc103cf41f0e1c52c1e856cc26e77f566cfc3a581394b52b87d4fcb11616b7824631c389ee711c5786d43dc5ff52de
+Clang.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/8990c4b777810f1335bfd2d2ace2cf3e
+Clang.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/e92999e8112316b7806756967cbb1424a68c9415e03c7f9c1203a0450485f4f1d48d6e8341439ce3d63a9e88c4b6db46ce4f886db353e31dbcf3111f8e5744fd
+Clang.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/91a4810d844aea695f7114bf1ac80207
+Clang.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/310ce9579c637de268e18c4f5cc31f5023784be36f3073273927c9ade7299326fb801759f0f5828cdf04580104502651e9b532d4a6b2934aa8d39acbad118956
+Clang.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/73c0c2c6533af4964892dba587c8b5fe
+Clang.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/b0b311acc95a731fc791d578b6b1fc65834c98e1b551d91f0a4ac03f79c27af16427f0397a1f6f380ad4b77c9aa38465a207cf472f39e0651b39e54695150481
+Clang.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/e6b6bb1aa23fbbf60ac52bad871e9dbf
+Clang.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/66e90be4aed8a5cf9becb929915156b3c2fb0bb8b2ee8c3a8f06c3e7c24fa84b69b37493843d0609020b6a7263b0df7ab2793dd0f6ce01b79d7f5a350cde2ac1
+Clang.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/9dcd26df744a47a1cefea19f17935b29
+Clang.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/a72d97d581f99be56cf8a6853662c77cabb3001eec4fcb802ec3278ab84517e96726373414f67c87c0926e25ce170f22c930b2bf804b0067b1511d6cfc61b00f
+Clang.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/9c1094a09da852d4bb48f7a60e0c83cb
+Clang.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/6f62fb75f64c8b8adbae1ca8db44c4a4795ad6eae0673982aa18122282fb784c796107cc3a9a54e435694b4a898c63c86797317d7e37a0d8f1110f4fcbe4ef58
+Clang.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/5d22a3bedc62200471878a42001fc39d
+Clang.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/7fb2041030245c2e997f51cb3406ed5307def6dd5c23b1a32fff19b3dc03b59de1a0f2d6d530abb89ab0a2514110dfdffb53bb0178337f29f28d3fcaf00f8ce1
+Clang.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/fcc97104506c26f5161fd94b973dbb46
+Clang.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/99a42e5d583442432175357546811c7fede695f4d3d6026eb9d02585539d7c21ccf1adb449de47bb248d602a5297ae1923766fadd52487806729f95381ebcfd5
+Clang.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/1a712b6fa8672da1db6528dd655a8bf7
+Clang.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/eafc025c261f79dc646766aced9962b1901c820a2691e230f2610f499687905b34feffe65a241b885187f79dd83688dc796cd5adcd3af304effe75190098d6d4
+Clang.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/7d9f36bc0be2b02443adafb6e57a180f
+Clang.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/0642c87e349ae10c7ea8f48388a600ff97a276b23b7936ca35ac6d9a1f686c70d1ec4cc7e4a893aca13f8109b5368d2ca52113021d18ba33912c375007ac1051
+Clang.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/034d5fb31a4b749f7fcf13742d5d211c
+Clang.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/9313dcf2a807d349be44b827d34f44f9780f14a93e7b432ff99346c7e352c42e3938fc6fee508f9b1896853823f524410ce7fb85a7b3e542e474df3c20d810d3
+Clang.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/7b7286c7ce9e383a6180442ada1b21c2
+Clang.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/c9a10e970a93c2d0fe7cd1952f4c152a51c51648376ab0ebf41a736d89a20121c2f9744104290ca4377a397ee612d6af85f117817aea0c49a2ac8d4a861664e8
+Clang.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/53f47082122cd88d411af8ad98adf344
+Clang.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/8672668843e4aed4fa0c8acfc28066a2acfaffa47f46c3a4f6bfeeec4824269fc063860c848c737b76e009b15e8c0132ed6b63b2904b96bb1d0df5cf7d835022
+Clang.v13.0.1+0.x86_64-apple-darwin.tar.gz/md5/deb4584aa670642d499454aafe32b809
+Clang.v13.0.1+0.x86_64-apple-darwin.tar.gz/sha512/e4de906392344ba21a7ebee11a8bbce0e422f8460d39de31980a9637a52e88d49db6ea22b094d3ea1c27283062d7abc6d45fc570aeddc067d1e28f573c00c8fd
+Clang.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/8c999db749701fd4a4df7486f740c89f
+Clang.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/ea9661825f40a31ae238b5644693767106710a9e157e1f7d715dab5faf63ff8433117e2507eeb863f0a25deed669cc0bfee750af961f6d167db27d7cf8b75819
+Clang.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/7f09aa135ce9ae07586d075414a44e87
+Clang.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/93f75720fd620ca46997c7fd6f401cb45063afc8f860eb3c361f285d85ab5c4e902a13ca3abefae48cfe1e8fb902adde4341f2aabf72c3b188573054b81c6b9e
+Clang.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/fd701653e03d835e67b5c0930c281034
+Clang.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/7cf9180caa5d4b333842a41f3f451cd389457aee9ea83fa2405f655804f3c74d9be2d9e887bd6a787fe817afbde36ad658d4ae49b63ec1ebce0ed77c62326442
+Clang.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/15fb3d47ee056a15d8f14799ff5fe45a
+Clang.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/3cc641ebe266d959e0c5699c59d655095a5b596340e991cc9d4462a5674fa50d89d7cc1937582011464c8568306babe21cef0c4bd1d99430687fd17f3a6f479e
+Clang.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/b4f855841995f513a632905184e6271c
+Clang.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/d3390ea1ee311b49d355f9a6c41669575fbd3b66ddbc9791cfcb47673e19796d3cdd210469fecf351a57060d7447d9678980f022bbae1b4cda5799e8ece6aecf
+Clang.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/323038a69d2760ac4c4cb6f3f712231b
+Clang.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/51073b2862447c184c54b47a02d27d20733024f1d11d4d2f15938c47bb47f94002b56dc60994165cf416079b74d1850445d521811356070bccec0e32f09071fc
+Clang.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/a7e7405baa541ca5bcf44468274c179d
+Clang.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/07590b6f3ea2456f5bbf7aa87248b8462e60b8ca0f8c4c4ea419bf093efec232057551aee9e93114bff2cd7ee9a76ccec9515be632b94f4e6c17af4aae3478d6
diff --git a/deps/checksums/compiler-rt-6.0.0.src.tar.xz/md5 b/deps/checksums/compiler-rt-6.0.0.src.tar.xz/md5
deleted file mode 100644
index 54ea354573ba13..00000000000000
--- a/deps/checksums/compiler-rt-6.0.0.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ba6368e894b5528e527d86a69d8533c6
diff --git a/deps/checksums/compiler-rt-6.0.0.src.tar.xz/sha512 b/deps/checksums/compiler-rt-6.0.0.src.tar.xz/sha512
deleted file mode 100644
index 2b5443dd79d964..00000000000000
--- a/deps/checksums/compiler-rt-6.0.0.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-717bed116ef43ebb2e18daf6fb737472edf57564947f53fe6368d3bbb080f63e986c0d1b94dbd087be998196ad7be54f4361854f8eb5214600b82449ba02c9c1
diff --git a/deps/checksums/compilersupportlibraries b/deps/checksums/compilersupportlibraries
new file mode 100644
index 00000000000000..86250fdc633908
--- /dev/null
+++ b/deps/checksums/compilersupportlibraries
@@ -0,0 +1,92 @@
+CompilerSupportLibraries.v0.5.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/e0651fbefd39d405ec97d7530f2887d7
+CompilerSupportLibraries.v0.5.2+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/0a067b7e37d98a4c96dd1400b8c1a07c82cc223d11a93a0ee2455c3b55b394eee0cb251e26206495453f2cf8866822fb586ffe105f44e3380fa949adffe8b83c
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/1f4a5e98cd88a08029326ca5e9d47e9c
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/696f359746de592d4e30dc9ad19d5e07ebc1e6635e1f082e249747c42338ef04ce885fee5ad5915ec39fa2866af4265bb6ef580c75874c091a15b64d02626123
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/8285fd34164fac0410fcec6bb9d8b8e4
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/df0869d357326c803d8ff33c9734f01457d877e80c4af33745d4ca016144eb0c52fba7aad7e1098eecde3fc4cf41ed971638b4b6f901c7306a2072e8c14c3513
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/82add6093bda667442236c04d84b6934
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/81538d75950cdf931f9aaa932d1f9cf40998bc256924c3231e984179f6a5c3eca0f7e1ba315b21f2add3bf9376e3a45ee59ccd8d9f6d765105e05da25bf65cfc
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/ee0d6a9f0a1372e36a02a95b6c07aefc
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/f248e57249af88520f9c7ac32dba45ca03e5904606b4edb682ea514c31a9a775198d02f0892e79124326e184d7906b7a13b0e4f3e7721352b8105cdfa72f89ed
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/dddc8f7a9be9f07e9738e2a027fe8a0c
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/36f9b94f470d451b9c3c2429026292463434427625563240467f50374624a69fbca7ddcb0678937a58d22d32a8157571d3e201c47cc9a2484d1d75d4c0f77ebc
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/12b7eb088023eaf9583ffa6f9f0e18ac
+CompilerSupportLibraries.v0.5.2+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/a5f5a6053e63ea1fb0185a0c3a7752a938373da847dffb872c1227ed3a0a80f2de1e4394baaaeeb8e0d8f2a4da123433896742cfdca6f94343bd4d0ab3578c65
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/e5e6918571981e4cfa5a2951e59f2df7
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/5d7b0f4f55b6726ae7317edb170cafb6a2c4563b0f4a90c619da95c120edd8fdce118bbd1e7168110f75cc899b857472fd524a396deb6d9f2552f53c861faeb7
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/7ae11706e9c6c043ad771f2700d06591
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/4f2f01aed00a58f4393cfd4608df1a6df6c9bff6e352a02a2b9af13f14a4436611769d64d082d3b151ba23d3d905ae2700bf469b9858249757ad7b5aae716d6a
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/e922dad7dad1d5f80cc154a6ddb6de35
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/3fabbcedbbc4abfe1e0c01c387bbe2537105937674877122b5b66d6015944a58f547106da1e185c1434de0c1883d356f8dc52968f075a00c6a8a52edaaf88957
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/53741f61d806efe045a5abe0e748aa36
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/b975a8fdfb736ef2b1aede2c89e390df261bfe8aaf8ffdb37887add09263d95f46642c3898ac19ec6098cdfdfc7f0726436dc273e9f70f10fe1abf4ea945277a
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/9687cf768c6c2879261e385c44ba490c
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/02f9accf8273597f6889677de64255e4e399d67377b5363ed31dea7e2118cc24d3b7fad7c0632aea79dee44250b1ff74bf2fa22e4f3e7755de65871854112c14
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/b62a81b9f43903b3de6fa1c78c03b89f
+CompilerSupportLibraries.v0.5.2+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/d44eecb30ccf19bc8dca41c738dbedd2bd2cb6e379a3ab181c955cb9cdf9bae8efeaf7a90c85dc7434520ead7e910d38e92b448cff7aecaef0902684e9b06c9f
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/e31780333339ac64f54ad434578d6294
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/9c3b91ed90f3393dfc72e7e2feefa60afe6ad457971950b163ffbecafa41cea43a15cdfadd8f402fd8fb61652c224f5b1a04c432fb0f43593749f51ed1340116
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/0f7bdfb908aa3d721428a1ee8412b594
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/3199da41c3df3d702a557c8b5e9fdde3a47c12d4c45fb9094fd194cbbe667663334b6cc0a5169fcc755790c4b5fada71c5094dc8d9a7f8b6c836d3f4c4c6e509
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/f455758e436750092ba2df65adcfd380
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/b5d0dbdff19b5ce076b8ae7b907da25fdbe05eabd47e46987f9987690a3a670d14bd3d2c2343d366ca1ee861b85fcbaccc1460ba3a73571686ef9e4330427b65
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/4cf3790d881b829b4b8da882987d5a40
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/ef5810243af32135da0cb7d08ae35ff8a2cce50c05200450154aa860c181719844466b787faae551aa71bd94e721f2d7d17ab14a049d0558666037862aff2f6a
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/a49e1fa6e040ac86ddd85a3188f83a76
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/cb0292651392a14f952181eb7a4a0ea6359632e96b017169cf4f1792f44f2846b5d6b2b5d334dee490262dd1c2d421de49d1f4a919402392f77fdaf60c1d19a3
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/3f64969e0e70dc8644fe09637dd1cbe7
+CompilerSupportLibraries.v0.5.2+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/0a71f8b731911019666bdc82f42e306ff1801321362ce6fe58988c9a1b110cd032a01c11fd0f9a6a3fbf6c6545f3287e363f5b3c40ef2eab0659638c38687196
+CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran3.tar.gz/md5/28f58931f66a3405fc4c99ce40724ece
+CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/d5290079264cfc6f716dcc9171f8412369e685c7ba0b9e82ae3d764de41671fbb4a24fdf7ebae9a9b913393837c2e41951326dbf3e870340fba7121709ebba8b
+CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran4.tar.gz/md5/f98763aae801cc7d88124bea422f13ca
+CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/da2095a462637ffcd0825949f4bcc86be9484c9e009648dc3c2e22e2fa19c65124e5e45f2694e85616df49b1181e2f4d2b886d3b83401c09ca58207db461ea23
+CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran5.tar.gz/md5/1bfee57db4f2bdd788e59e34d0bb4506
+CompilerSupportLibraries.v0.5.2+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/8f4814d97d6cd6c1f0c1d23fce875c40b6df7de7a8dc66e66681ba3c533120cb14d9d018808ff4e33dec53bb8958fbcedc9be6ac70817839ff89a0db5c0d18a8
+CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran3.tar.gz/md5/5da7af0483ffde929c58f3ae411f6489
+CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran3.tar.gz/sha512/97e56fe4fe0e10fa0d57ec10882a62d290829940049ffce7a8d81a843b91c7844e53d737bcdbc7a5e8206ca9820a7066fcdd7d0eed1e831d7af96222ccca1224
+CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran4.tar.gz/md5/a0b5cf513f2f02107c8887ea5e30cdda
+CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran4.tar.gz/sha512/aeeacfb58094751fe5cec87825ebb02a22c58d3e7300b6ca6066eb717e28ebecff230838c32935ac11376a6efdd5a0c44fe0c8e7d5b9a1f0165171c2b67a2d8b
+CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran5.tar.gz/md5/569ef42292d8cfd157026b434e93fe4d
+CompilerSupportLibraries.v0.5.2+0.i686-linux-musl-libgfortran5.tar.gz/sha512/daf543fbe7e80fd63220f7c08e0d6b51d45ce9e0af592a591eecadcaac9b859ce596df2bf8fcb3fb72fb799f869d0caac28acb5d26b3c3aed6dc80245b90dcce
+CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/f4e0f3d40f7f77d32f26424dedff850f
+CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/57e35c39c4c93919cdbbe33891b5938918d33840ad33ed51a010f9deab791d60fa2d030d3e14df6e445e0607dc9280b07ca287a3273630bf7e245d6ab8069cbd
+CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/d366731c11314cb908fca2032e7fefca
+CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/a7e087e718f9d8cb4957b8bf3a4554faae97510b25d88a3e9ae4241cb69efa5b520bd9424a0072e7d712c9435e6900690c56004a716a716838367e91fe20e11d
+CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/eff855bb45f038c9d74c67ae2eed5641
+CompilerSupportLibraries.v0.5.2+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/e674d60247086bb8029270406d246a4857e668442a77299a431ec837446387bd1ed2de5e0f9f6985cc6e5d15b6692f40b18e0016e7c9d4e95a3770dffc19b44d
+CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/0bfe78d226b3d89a83b54c6ff39239e1
+CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/fed14514c9603a1e4772d2fd5f4a48da751c10e34b6fba5e0c35ff40b8ed165af6daebc051fa86751bdffb8f820ac779215dc3b38c4ff5c1624214b61d7ad1b0
+CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/d5219b60117555a3ccd41ab406d485f4
+CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/9268d7c2c6ef649dc753757f9afc7ac1382e521d02c58a91eead9873f2a80f215f3b67f9a33abad53c8bca18c19ae3e63804e01e3109c939d33555c7ec8c5b1a
+CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/1f620c9a049e00b8b11c3970a23f2761
+CompilerSupportLibraries.v0.5.2+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/6ac900dfac9268334c9b54badbfbec323151353e8d87d3199f875a505febf863766ded0c52bce2939e5975fa6e35a28cc16c88e7c1cce37d65725fe275813606
+CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/c21c35b00ed7ad0171d63006f1a4170d
+CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/f993a616a75b1f5ee140ed47b6e4aa981cffbbffd795fc0cf9df9397a6366a4507a158530e961c398bab656e7d51a27be026088678e0c19485ef0bad136bb69a
+CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/f0cd5c8631256f3b903e95ad3623d702
+CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/81de3f699169254fa83a3ab8b6063ddfd300065edf90f15239b0a304f3feea9534acba7d982058a7712ce94dcdb1ae036502f276813a96f8254e323787556d63
+CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/6030c114c1250e99958a0727da9d6daf
+CompilerSupportLibraries.v0.5.2+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/1d4be1c0718aeab056368653b7f34bd5ac3c85edb9fbdc2752b8c4877fcf5d080774506519cf285954485d806bccc18323f6c45f069db8bd314d064a2cc1ed66
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/b45ac0c04357de9d013df598dd13f3bf
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/42174d05c7165f87693efa09facc9405c9d6eab490c4b5fc74ba02e1e2e871799a24dcb7496e0693f30f9c3fd7e81020b77a3dd946832288769063f6d2a31aba
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/761998b08e4b460cec95468adb850c31
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/32853dcb3202e735325e1e0e3d88e2e446d7c88d45bc462d4e91f7d57dfd78b0f3381302e72163fafdb1c2cef53d4822e1c52289081e06b7b74d67e2ed0d34c2
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/dfd50d071702f903213ea0c6a42ad81b
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/3d6ecca7689bcb1925801d26a328790228c564bb731f6fa25d88763eeb22cccc4409dd6376c7b574ec242fbf85e41fd82d038a2650f8d33bb850b9a9a9f9a722
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/0b374bc55dd0d5f4cf34a12d4901c022
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/10db23cc1d1367f40fed6c6cfc232fdc49f55e666d3623faa1af40dd781ea7a5d37b6b5a39524f0fc57d6d49947f429389bbf7075f10163090d7ea48903e688a
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/1e28cdc7937a500b081a1f4d340190f2
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/0b635b8f594739453033fd1dc5496976a8fff314dd078e2d8248d3c2136abaaa610ebc45252a81d16db9d91a0ec20a552f1bcb65ed3b50a627e40168e7f100e0
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/f6fcf32044f69d8305a718eeb7651614
+CompilerSupportLibraries.v0.5.2+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/5940a145a3203d5a4a9b7cd9aab45b8bcff08a43a69a8fea67a9e18535625c8ecc051ba344421253b2f96eaa1a007d42555897a8f8aa0e8bd5dbf1ddbd38f197
+CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/eb46728ef7d3ce955d5a497a556138c2
+CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/922d3a85059e7cedc6e0e52687cd6f22cb708677a65fcab86f7571737d8f17455f15b3f1af7442ee5fd04a437f226d4eee374d0f353a10f8f7a87160d7a2351d
+CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/fc1f4fc44c08f0c3040b976558a35e3e
+CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/5406251fe1d1d1901ac4e6af3b8e9394fcaee2fa6a4f3d2817161a1626bc6b45d7b184f9bdd3d2e6571640f40b4e06c61f321358ad8fe484871ab9b878801a95
+CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/e1b52fdb233c9667610867e278e7719a
+CompilerSupportLibraries.v0.5.2+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/efadc4efc419808cb289c8c8f52664a72f2646bad2e8e02533456cf9afd613d4cbacd121da786316206df8f65b5264498f25adb04f7673121b2a58a20c4a75b9
+CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/a449351de41a3140534d278aacedc54e
+CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/db5bfbd161eba076598465cfee277418c6e9f4f0f7c4672a437c68ceff374f600917fdcaaa9dfdb945103d2b5c9786663e8e9403f6fdc796cda7c529dadf28ba
+CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/facd6a008270b85d08ca835556921127
+CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/236438e05eb3f50063aea90522e61f10a03c474f3c26117c071bf94d4ca24fae56e09a565cbf00dc5d1eabefec804fa5503ecbcc324b5da00a65b5471fccfadf
+CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/cd294be65ddd327d6c0feeca8b13f922
+CompilerSupportLibraries.v0.5.2+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/73dc99009d25fa0ebafa77d7c5747d21a6e0778a6266a2408df885d9553e4b8029c104e1fe174526d9261252bb564128ae7cf9058268475d168c79d19ee4f0c0
diff --git a/deps/checksums/curl b/deps/checksums/curl
new file mode 100644
index 00000000000000..4b6e8da990d69d
--- /dev/null
+++ b/deps/checksums/curl
@@ -0,0 +1,36 @@
+LibCURL-fd8af649b38ae20c3ff7f5dca53753512ca00376.tar.gz/md5/f082283e6a35fcba5b63c9a6219d8003
+LibCURL-fd8af649b38ae20c3ff7f5dca53753512ca00376.tar.gz/sha512/3bea5fa3fb6d29651daa923ae6bcb8eeb356ab9f2a1f3e005a6b746b617b0cf609aed4cadda4181783959840873c04b18e34e45ab973549169d19775a05ea01e
+LibCURL.v7.81.0+0.aarch64-apple-darwin.tar.gz/md5/16d584cdac9f1756de1935c844f2095c
+LibCURL.v7.81.0+0.aarch64-apple-darwin.tar.gz/sha512/38f800e309fddb2cd103ef5c65ad1ef2f7ec0dd7711e9afdb716b96b802c7fe089b04ea8d2bd2e675d62adc3b8aca3c7a243780f097b3466a496dbb25d2f7807
+LibCURL.v7.81.0+0.aarch64-linux-gnu.tar.gz/md5/6f70f7df6325bf6b62531d52ad313ae6
+LibCURL.v7.81.0+0.aarch64-linux-gnu.tar.gz/sha512/303fb30e2859c9d11fe64e964405ec2d4bcff4bafaaa5815a5548fdb0b42fca91fdfdf85473737b03399817f0ca6e23d870f56c354b0e53dd6ec142f2c69b182
+LibCURL.v7.81.0+0.aarch64-linux-musl.tar.gz/md5/b7aedf4bcbadf952c600d30643a2e284
+LibCURL.v7.81.0+0.aarch64-linux-musl.tar.gz/sha512/8bedf575e4eb2d4844b97b13b00f3d2c1fffccf10c1adbe11392053f7f956bd7592ac32a1eada474c57cc8d77999e214945ad6cf5242e577fa9ada29b35eaebd
+LibCURL.v7.81.0+0.armv6l-linux-gnueabihf.tar.gz/md5/ed25c1478101dce0e37c18c68bfc2287
+LibCURL.v7.81.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/6bc00770fea95aa01e8144738833990fb9080807efc3bed31b8ebaa45c04fe2817d8bcb0179f0322d60b77e4dd59514032978a680320fcc20287a0ba549e9972
+LibCURL.v7.81.0+0.armv6l-linux-musleabihf.tar.gz/md5/ce3591ab3e9b5c1da0b7f44ac3c03ff5
+LibCURL.v7.81.0+0.armv6l-linux-musleabihf.tar.gz/sha512/355c9f5d278d49329dbc56219df64f5d2b37581e1ee6cf2100deb52102f90ae7c9fdc047b9a341489985062d2461c058c1c8feb557776e7cf1563d4f49cb0a08
+LibCURL.v7.81.0+0.armv7l-linux-gnueabihf.tar.gz/md5/1e86f1abdc9ba03f26155f46db952150
+LibCURL.v7.81.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/cc305e36e7427cbfeed7d5ddb10d34eb6f7475e1e9695f829fcb6400498ed5307051ebd31a28193b99cf11e87f79cb4f8a66e589f10b76b9ed6898a11e917b09
+LibCURL.v7.81.0+0.armv7l-linux-musleabihf.tar.gz/md5/dfaf544cdcf189cd09951aaaa26fbdc2
+LibCURL.v7.81.0+0.armv7l-linux-musleabihf.tar.gz/sha512/a412fef9e80f956f10092996b29c86f3fd673421339a0c502b2230bbca97065877ef379b18380197d071234abcd818edea797c739410c78170244c7eeaa141f4
+LibCURL.v7.81.0+0.i686-linux-gnu.tar.gz/md5/b8561fde02ddfcb64f724cd037cb59e9
+LibCURL.v7.81.0+0.i686-linux-gnu.tar.gz/sha512/904c043db84bef78f1bbb7b7ae1ba177942ad316ec39cdd7f28f9b2d3c578b8a835eb86d8ee91b604ed14e10b9200ae60ed8312e8a1ab7684e20d75536242e60
+LibCURL.v7.81.0+0.i686-linux-musl.tar.gz/md5/5fc2e3fbe3ccc362488e79fbd5eab20b
+LibCURL.v7.81.0+0.i686-linux-musl.tar.gz/sha512/495be4a6ae0526c5ac6983e96b342226cfb2fa5c203135f0a402bbf3e8486d820454b8964c1a9fac4695df1619e5555a61a8cb4a3174c99cf0e8a3546a7f8749
+LibCURL.v7.81.0+0.i686-w64-mingw32.tar.gz/md5/24aa660ea3f5c019fb81f609bda7c44c
+LibCURL.v7.81.0+0.i686-w64-mingw32.tar.gz/sha512/64f75cde988dedc0abbabb912b90850b07c54b24f8544125d6ceac5989337266cf3ea78b0758b58e3a490c7335090b8ac45d1282a2fe15dfb4fa93f55d4a46ab
+LibCURL.v7.81.0+0.powerpc64le-linux-gnu.tar.gz/md5/26568c1b5e75fe00189cb6ebe6fa9ec2
+LibCURL.v7.81.0+0.powerpc64le-linux-gnu.tar.gz/sha512/ca7b2bba5190500275236966b7014935285b22ff551698a532681b970e461feb507fbe682ea95833ef453bdb5bf0516948fd9ca8971e10349252d286593a4792
+LibCURL.v7.81.0+0.x86_64-apple-darwin.tar.gz/md5/07850295b3ab6bb6cd63fcd9d4a35e6d
+LibCURL.v7.81.0+0.x86_64-apple-darwin.tar.gz/sha512/cfc9fdf3f0891ce26d077696a4059a9fe0d95793dd391fc530b94367d074ce96bbb9f8a3af4cb5dcbbcc8c4ae160fe17146011bf805263ae4fefc36f320402e2
+LibCURL.v7.81.0+0.x86_64-linux-gnu.tar.gz/md5/39dc13a4ed2492a9ce9675737e8b5b10
+LibCURL.v7.81.0+0.x86_64-linux-gnu.tar.gz/sha512/f6e1c439620717be028a28fc9878d1618329aefe92561a2d4d95026bbe88c91526bf98a3b2e4643f47ad3ac047986c4461c5ace67412386f2ed53084826e5523
+LibCURL.v7.81.0+0.x86_64-linux-musl.tar.gz/md5/c7dfa116097f19421bba42728567a543
+LibCURL.v7.81.0+0.x86_64-linux-musl.tar.gz/sha512/91d3d99d67243bf6eac3aca09bb59d6b41bb5dbc4d7ecd6e81f84a9f7bb9a619ba5317ba06bdbc59ba372b0a9c5ef26d6d9654e8661ec6c890ef8bb189fb44ff
+LibCURL.v7.81.0+0.x86_64-unknown-freebsd.tar.gz/md5/a19342f14c554d1a4a8355c17ee9e662
+LibCURL.v7.81.0+0.x86_64-unknown-freebsd.tar.gz/sha512/45ef0edb6a850ed0a45e7094fb5766b59ad325c29612a269a3e3a89cbc5fe62b06f9967bee5bae1239d4884e12af751e8c5054eb124a4ecdd06993b04aa6ea05
+LibCURL.v7.81.0+0.x86_64-w64-mingw32.tar.gz/md5/cffc213693c62d651f9cee6ed726eb81
+LibCURL.v7.81.0+0.x86_64-w64-mingw32.tar.gz/sha512/4b15a3240152aec816e16a25778aa5f5c26e8d3fc6e1db326ff20bafe1dc1e84f665dbedbca3a12a9486768d6128c2d1f18d07f812c5b74878bfe3173f130229
+curl-7.81.0.tar.bz2/md5/f42ab772edb85374fc985ae65810439e
+curl-7.81.0.tar.bz2/sha512/4889e94998cb9da3f05a70e61e7a0599a0fd3529455f5b3664ede255a834276f1d7898bd370e9b0fb21b0c0ffe4ce50c0757bb8bf896943726c538f8ead0cc41
diff --git a/deps/checksums/curl-7.71.1.tar.bz2/md5 b/deps/checksums/curl-7.71.1.tar.bz2/md5
deleted file mode 100644
index 020454dd04f8a0..00000000000000
--- a/deps/checksums/curl-7.71.1.tar.bz2/md5
+++ /dev/null
@@ -1 +0,0 @@
-7c681ac816491ded4a11814ebc717734
diff --git a/deps/checksums/curl-7.71.1.tar.bz2/sha512 b/deps/checksums/curl-7.71.1.tar.bz2/sha512
deleted file mode 100644
index 8420530596f463..00000000000000
--- a/deps/checksums/curl-7.71.1.tar.bz2/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9c1737a85c2bc60d0b07bf9f85134496e23a8511b036bcc8d53085a129f3f8387154c24eb83bcd20f587de8158dfdcd4c54982ce1f0a513715b8457e783bceca
diff --git a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 6c4c474d18e589..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-f6a70363ea796a63ed1aa2e0c68cca9f
diff --git a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index efa055e66181a0..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-f4980b557041922dadb2dc89732a332dfac360471194d37e30c85518dff5072412d28213f802ed0f5d791717e3bdebba7ca0bfe11a52b2ef16e633adfe13a4a3
diff --git a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index a0637956100d39..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-d349f16c392747610de2b1e30f5dc844
diff --git a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index fe63670a71aaf4..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2694a265d05acb82e1be3dbe8f1ad533c83af038c3a8f184cf0a0c8088a09ebfeffae02c1821cbcacf2a34a2fff091d21a094a900ee1ab8fead8a78bd3e0a70e
diff --git a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 09335169e63745..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-10b22af1aafd1d06efcfb9bad249d99d
diff --git a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 287054be6b15b0..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-545a8801d8e176448306ed77b482393f62f97a0e7a3e169ee08a868d5a405b5e87f4698e2915740d56587173117f0894686b59d0f81d29f3c1668a40d0526ad8
diff --git a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 0a401a007e586a..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-eb01530f3e4be7d2a0b7b3480e71dadd
diff --git a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index 7a5874ea4d773a..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-28cdc41f3e30efcfb1327b2ea5ce2405a0cb72238ee8722b2da925bbec4d683ded161621001f64d028268abd969d1ed655a9ec9758c9a6c1bacd96e4d9c25f13
diff --git a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-gnu.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index 388ef52c51b213..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1b78e8985d28b42ad083a35668d27ee8
diff --git a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 9b7802f659ca78..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-757a74dbab1c483e524085fbe12d5b961282a412b86a6c3ef4be9d5a88bd3165d761eae7c39f49b3d3069253bb3a35704f9400b0377c16b360041e7929355b87
diff --git a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-musl.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 80b917fbb40b38..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-52c2f2399832fbc68e1a3585b0fd2ee8
diff --git a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-musl.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 631a9bcdadd414..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3132e3a17cc6dc9cd47c1f602ec7ab3620d526c09c0fda1c77bcf49e42671132853b6acf4e917bd327afcbbdf19ed0e8590ba5b3d560c72d7b9be465152dd4a0
diff --git a/deps/checksums/dSFMT.v2.2.3-0.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 5c14478b98b63e..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-23b2ca964fcb40f2eff7140cb185d3ee
diff --git a/deps/checksums/dSFMT.v2.2.3-0.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index b3abe0252e20bb..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d1061dbe44ec777cf9303295d92eae31d80f13a5e2a79710928ae96ff6899ed4aa155037af93c369d1880e13725294276556c32c174f4e94566b7ad898c3636b
diff --git a/deps/checksums/dSFMT.v2.2.3-0.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index d23749c542dc7a..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-69ed63fb5d330b216107670224c9d751
diff --git a/deps/checksums/dSFMT.v2.2.3-0.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 753e61d5caa91a..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-e7668fc13899a727b54ab034383686c05c7903a90e5eef257928218f6114e2cfabb1bbcef230e4f4f36b131976fc6e36193192666ea781330de115dacd00fa29
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index fe759a7ed2f614..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9a978d0d59b85fffdf8edec751c4ada4
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index 527bc5afd7004e..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1517dca3c35d96a3dae563cbf06325a8ea68689b59ed74eaf7775e021a4e853df2f023559cb730ced48691a8688cb8214dbbb18397c5944ea838ac42a2e42984
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index 6f2b1f3056cd90..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1eb95aa2ae1aa96207964f0106a76355
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 9e03fa40203b52..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-b4230d8b5b505de81f7a44406df70ad1a7318caac461bca656fcf06251f4af5a7d063b7f139c083858d674c22bfb02c8069a752d21acf0e99d0f0e97fac6f287
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index bed0a5a4aa6d70..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-33aae580ccada0715cd462299221f94b
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 5db27fede643f8..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2888eb510f7fb89110357d8590b751c3a80f9ee659f202d25e233fb03531676b7c40bd754e4a57e592f74323d41265b24768621f1900908cd3825e95b7e61db8
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 2c60431b012609..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6e388e1e437cfeb020b67f47201b8f66
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 6d682ebac19623..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-15e9ddc274e582775fafb4ccb0b824dab44afad2c950481d4cac71fa94f335fb91f1cf94967e321a2f2ded3f10a02470ef79108f598a0948e6903dbbd5be082d
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 7adfb502520e7f..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-731892f1ec7857ed0d199aac541c9f8a
diff --git a/deps/checksums/dSFMT.v2.2.3-0.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/dSFMT.v2.2.3-0.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index e04a600b158953..00000000000000
--- a/deps/checksums/dSFMT.v2.2.3-0.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-014a86aee1c76ac818cc16d2bb494b6d028321d701c238ac656ed3309b68d8105216e7574b2984cda79b293345a493fef5051da943fcf5ce5c7eeadb58c72772
diff --git a/deps/checksums/dsfmt b/deps/checksums/dsfmt
new file mode 100644
index 00000000000000..edadf5c01b1d7f
--- /dev/null
+++ b/deps/checksums/dsfmt
@@ -0,0 +1,34 @@
+dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/md5/0299af20dae6bed519635900687f4aeb
+dSFMT.v2.2.4+1.aarch64-apple-darwin.tar.gz/sha512/5f20bd7602f09dcb23299d979372453db9a0e76a66129d69cc93c4b45a65ad377486f3cecb7093ff65307f515358420dc318b19eaf5945ff2fbfbe6886e95efa
+dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/md5/78a0fa53ad3db17f2849c744246a6bc6
+dSFMT.v2.2.4+1.aarch64-linux-gnu.tar.gz/sha512/b855bf3349f1ee33978d2c35999fe24a91ee17c5af345881e26351099cea05477528da9df43773d82e20917b4b1dd8c5590eb5ebb99cb5c9c425d03e38192e32
+dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/md5/45829fa624e98f806e184cfdbb918a7a
+dSFMT.v2.2.4+1.aarch64-linux-musl.tar.gz/sha512/28823838bba7c1bb40f636835b1f7b15c49c5395a72a1261f3d5eb22c54b487b98bbcd7cd79f12297ffb37c62bfc3b69f0ce9b7036b5c2662d6305497d2e09dc
+dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/md5/fdf55ed8c59b6cc0409fc8154777d57e
+dSFMT.v2.2.4+1.armv6l-linux-gnueabihf.tar.gz/sha512/8249e8a74ea958ebdd1e4e967d1020158c49643bc33855a5f0043b77026c250371d60b95e8370e0f67dbbaa380a7c02fc1c4eff7d49933a5f471393abc2a266f
+dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/md5/4814dcf836033065745f3532ceabeb33
+dSFMT.v2.2.4+1.armv6l-linux-musleabihf.tar.gz/sha512/75b8df698762e421cbef208cf7fda556f812f7e9c0481f83ddf38e468459ffa6fbdde86b5942f28f47225c73901c9863246f77eed93abd73052b6d0918717444
+dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/md5/bff0a088b3bdf557dcebc48c2b260bb5
+dSFMT.v2.2.4+1.armv7l-linux-gnueabihf.tar.gz/sha512/609b45b7330bbf6e093fe6277dd14c9e23fd8c8c5e4db6a7275d29c6436f7602cd5069a0912a6ae3fd02d492a25c56400166b25c02b7379eb152eba3aa019dcb
+dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/md5/5193c1f1c2d056b3ffd265f4ef18965b
+dSFMT.v2.2.4+1.armv7l-linux-musleabihf.tar.gz/sha512/3e1b0af492a83c076087923d317291fd473670626599d3d03ed86a4515362a24610f3a9b2bd4b71c15bf86b03e44a11fd973f9f16d8b01bfdabbf7ee1ea7f4bb
+dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/md5/69b959d409030f86eefbe1a0d4196787
+dSFMT.v2.2.4+1.i686-linux-gnu.tar.gz/sha512/0ff871b96031c5f11e5c5fbb4fd35c8bf5e3b1fa5c43dcece275bc847a82b89f0f60db5b273bef2dd31572e89c98694fd1cbc2b442ee3a5fdf3b44e8707ef338
+dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/md5/1fd3b4d5169be306b86cca9dfa6f014c
+dSFMT.v2.2.4+1.i686-linux-musl.tar.gz/sha512/d5e129abf6ff8a1077bb9de27fdc17c131f26d9c3707c189c02649290b50699f26e39230ef875fd172b54b1e28b1b595cbf835c6a8c36e1101951444e174f02a
+dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/md5/2a6ea60fe134309ecafb0efd5364b186
+dSFMT.v2.2.4+1.i686-w64-mingw32.tar.gz/sha512/50ffad9c0071746acff16532b71d84d53c6f11039aa10167f49ac9293f4819a905f63f521c93a45daed5068df0fea1699a15a1a1d6c100dce0932cce4165442d
+dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/md5/060a4ed22e6e36a661b08c804a7272bd
+dSFMT.v2.2.4+1.powerpc64le-linux-gnu.tar.gz/sha512/f1367b910512b996c4e6bfcb4a99afc3640a4ad5ec8e6a2fc092d677c7eb68527800c4d248188a2cd7a2d427cab472a2fdb48978aeab39805a62f774dc58bb50
+dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/md5/d768332dd9902c4e3869a160fb002af3
+dSFMT.v2.2.4+1.x86_64-apple-darwin.tar.gz/sha512/db3e43ea9b884fb2ddc9585a224d85835ead169f5996ffb20930a8970893f9cbbd8b54832a4fc78745c7dcd7991f973e929965ffded32ae8289c0be68316e60d
+dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/md5/671e5a06c68d23854051c78268bfb9ed
+dSFMT.v2.2.4+1.x86_64-linux-gnu.tar.gz/sha512/0b8eb9e527cea444fdc33a3089684f9b85a8889370fe0b240718d32332523f1175e38a9b51fdabf4a38bad4a820e956baceac253001213b1fc3e7a5eabf8664a
+dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/md5/65929d7a40fea8e8783cdeb77205ee06
+dSFMT.v2.2.4+1.x86_64-linux-musl.tar.gz/sha512/ce1b49365b764cf67ef4757f91078ea11afc6e07c4a776258a09f58c9ff84ece440d80714a491c1a21da06ea6a67bd27d2933b862dbfecf3c357f3c32ebb4fc1
+dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/md5/e27869ac4f1ea6774ade7d3b53cd301b
+dSFMT.v2.2.4+1.x86_64-unknown-freebsd.tar.gz/sha512/762571a5d5773c2d9780586603859272f48ed67d6c8b09cd95c92fd62dc9bb03c274b12c2c04e05f426c9a42edbbc8e33beba3c79865f2c49459eca2d588b14c
+dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/md5/74e5c27ba9eb654b4e998ce73719e724
+dSFMT.v2.2.4+1.x86_64-w64-mingw32.tar.gz/sha512/59badcef14b06f14f8f5bce1c72de6750c8310ae18581e24b5d663edefe1bed3d120b4cebb87b53dc664411b62d9802f75aefde4e5236ada1dec740e6ef2445d
+dsfmt-2.2.4.tar.gz/md5/ed30e63552d62df48d709dde4f755660
+dsfmt-2.2.4.tar.gz/sha512/fe84e986cbf198172340adfac0436b08f087643eca3f1ceccacde146cbfd8c41e3eb0dfbb062f7ca5f462db13c386abd7c269bc0cbefc9a0ecf97a8a8870a2e4
diff --git a/deps/checksums/dsfmt-2.2.3.tar.gz/md5 b/deps/checksums/dsfmt-2.2.3.tar.gz/md5
deleted file mode 100644
index bb1c6f12a243bc..00000000000000
--- a/deps/checksums/dsfmt-2.2.3.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-057c5a11d28296825fba584f561a4369
diff --git a/deps/checksums/dsfmt-2.2.3.tar.gz/sha512 b/deps/checksums/dsfmt-2.2.3.tar.gz/sha512
deleted file mode 100644
index 6914bfe18301ee..00000000000000
--- a/deps/checksums/dsfmt-2.2.3.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6d8ddcd01aab3f9039f4f0288a8af53e290cc2f293ed8c4a8c9f3b3f12398e7eedfce8016117b425766ba89c61c86c13b3e420035b5f9bf91315b8b814c03662
diff --git a/deps/checksums/gmp b/deps/checksums/gmp
new file mode 100644
index 00000000000000..47cee2e34a42f5
--- /dev/null
+++ b/deps/checksums/gmp
@@ -0,0 +1,60 @@
+GMP.v6.2.1+1.aarch64-apple-darwin.tar.gz/md5/03cb14ac16daabb4a77fe1c78e8e48a9
+GMP.v6.2.1+1.aarch64-apple-darwin.tar.gz/sha512/5b8f974a07f579272981f5ebe44191385a4ce95f58d434a3565ffa827a6d65824cbe4173736b7328630bbccfe6af4242195aec24de3f0aa687e2e32a18a97a5c
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx03.tar.gz/md5/0ce7d419a49f2f90033618bdda2588e7
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx03.tar.gz/sha512/16363dedaae116fa0d493182aeadb2ffa7f990f1813e4b47cae3cd61ca71f23b65267ea4e2c698d52bd78d61e12feaa73179d7b86ab6d6df01eeb7b6a9b27958
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx11.tar.gz/md5/011f1cdc39b9e529b4b6ea80f4c33108
+GMP.v6.2.1+1.aarch64-linux-gnu-cxx11.tar.gz/sha512/1ed2139580c5c78578f350ee83dbf9cd0120d9d36e1951438d757f5734cda7931600b3f83bfe0d0d806926636d6aea8048c6b64aa42a22e59310282c2428f417
+GMP.v6.2.1+1.aarch64-linux-musl-cxx03.tar.gz/md5/34f17083a1f142c284b707cc82407b00
+GMP.v6.2.1+1.aarch64-linux-musl-cxx03.tar.gz/sha512/dd32912c31a8422734c2e5d5a37001ac18f0e9de151982583d9dc185e5cc3e45076d737729345cca8e8eaf42993d4102353261a2de245e26a8a9cd86960a2fbf
+GMP.v6.2.1+1.aarch64-linux-musl-cxx11.tar.gz/md5/9ba1b822f20f88a1e4c6e81dc8c4fdc1
+GMP.v6.2.1+1.aarch64-linux-musl-cxx11.tar.gz/sha512/d8a4ecd5c35022b9c912c3b4fabe3a4c31258d6a1bd38e4fea13a3da53206a29bfd90f4d602f6e3ee3ee271d84289d1ecdf45534adfabf7e657daef5b5cb0b21
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/23e28efa2579d636cb4c80036da5d4ea
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/02c8023958fa616c1f944898e686510d449b743d053cfd42f526e9c4fe3ff3dd9de7309694b8537b4bb6dc978085339eb787983ec4ba32dc041448c912a8b982
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/bf2a2c4f81f6d04746cc528438f62639
+GMP.v6.2.1+1.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/1c152abeed24761c775e78a64835f8e61b28b16cbc29a6fde88fa4fdbf2a5782cd62697bd03a552d873995bda58b7bdc081c11ecd5e4badde2dea426e5218116
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx03.tar.gz/md5/25cbceed2cf1bb12601fe285c342d6b0
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/37d8b21bf59c0c555f2b59d6dca4f486bf1725ae18a7fea9a2f31533c54ebb818b5ddb88ec8aa2b618e0ecad78973659abd1a9f095f64ef65067ab8ed08d7801
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx11.tar.gz/md5/8ec72c769625a218c6951abed32b3684
+GMP.v6.2.1+1.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/4cb9ccb97859b0918002b649e1b5e74e1fc89a2daeec6f32d5a06ce0d84217f54d1ee788f472cebeefc73ef52284a3d5607efbed47058b438d2dcbcf9f384ed0
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/6f799d6516cc46af28eacf8409634825
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/541c1e03726584ddb672a83becdc9a99c68f5da9a7415750d582753b47774910bf25cee7fe21f5b5c2a80ff8ce87fc327abd45bf54d6cfe821cb202c81b67e43
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/17dba9ebcc1bf4637095a98a876375a8
+GMP.v6.2.1+1.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/648220e632618d23e8611e10fa4bb2e581ed4432e3fff77d0d7349a7585bffa65ae57bf1ce64c550bf6d2acc016f499c0bbbfed8088281445b9d4ecbbf9a64bc
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx03.tar.gz/md5/79c77b81cc16fd22ad4cef75af7aa220
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/0059ba54806ef0ca6621ddcd309a18922c4c7d9d9e214bc6870b6338a9449a472934cc27569402741d41a18dd53a896aae2f68b788f853fd4ea3db63035c8153
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx11.tar.gz/md5/87b79bfc5c780e214863d0f0c1944da9
+GMP.v6.2.1+1.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/88dcabcf96d8f2dcc7968333a94adcb8e8a91615b67ca23edf75c3368a89ef60a8deff8e8532d0cd4d5dd5356343b753b0ae0bf88ce7e190639468bf8170939a
+GMP.v6.2.1+1.i686-linux-gnu-cxx03.tar.gz/md5/61d39e42ab6fd5844e938605e357b336
+GMP.v6.2.1+1.i686-linux-gnu-cxx03.tar.gz/sha512/8e0d382adf6b45cbf613092cee524551a04096b0bc6fb8893701edae9c1928bda67b5522cae3ef954a882ff73b735190881ade37495d9d1a6db88ed6fbcdc6b1
+GMP.v6.2.1+1.i686-linux-gnu-cxx11.tar.gz/md5/b66b49054426adf3e1d3454a80010d97
+GMP.v6.2.1+1.i686-linux-gnu-cxx11.tar.gz/sha512/b28f22bbfbf796c4e959b1fa3433d46b4cf0dbd402c0497a6d4893c8030aa12fd038da4846d8bce02199f1da9b0158d78f2b4ff2636799ba139602775725ff6d
+GMP.v6.2.1+1.i686-linux-musl-cxx03.tar.gz/md5/69ea3b3348813777a1682e41a117d7c3
+GMP.v6.2.1+1.i686-linux-musl-cxx03.tar.gz/sha512/048dd08b5891864e69504baf6328ef5423e0f8e31c5c6cfac552eb51b3ef943af83b7ac654c33e1a0cf061c5832e08eebb9c03dbda6532fbc24e160e99c2aae6
+GMP.v6.2.1+1.i686-linux-musl-cxx11.tar.gz/md5/e7c82091d29a3e5958442c9ec631ad78
+GMP.v6.2.1+1.i686-linux-musl-cxx11.tar.gz/sha512/8574f2e42e181a7bd1cf8aa8056a14d13efe555ee74b14e14aef1bdce7f26ce2afe41b4f85ee20de6823045d5ff38e4dbcebcc7042fff4288af1b7d296202d43
+GMP.v6.2.1+1.i686-w64-mingw32-cxx03.tar.gz/md5/dcef59aa056dcd56e6e36ad49174389f
+GMP.v6.2.1+1.i686-w64-mingw32-cxx03.tar.gz/sha512/3cf3096c325ae2baea8b3c3aed4a26d649dc2bb3cf0d979809d9962521422ada3fdcdddbcfc52b27d43b473a1d3ed4a40368cdeb16cac4d32718c604dbc9f388
+GMP.v6.2.1+1.i686-w64-mingw32-cxx11.tar.gz/md5/b772a602b016e73dfc9a93908f51622b
+GMP.v6.2.1+1.i686-w64-mingw32-cxx11.tar.gz/sha512/00e06591e2cc44100dca1a8897c72933bf4bd8c3c732daea99a9efa4d0a67f6a8820bf3e5d27583dfddc50d4cda656fa7462a2c453035d03657948f0051dc2fe
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx03.tar.gz/md5/b31c423855c4c5633b41301e3b424312
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/2565176e2bbcb9deab25a91736e8b6de01e7dca619ed1fcc98cebcaaa144eb03f89f4f6d5989aa8454b0d1c7266d1ace690e6deef67c0cf5c3fc1c2ab4d41b43
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx11.tar.gz/md5/1ed2494342b5713308f6ffed5fe3863d
+GMP.v6.2.1+1.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/c600802c81c77247a24a50ec0695f742177c8c9f090b4c345f9b0cd065b35183f49592a764cdb7b1b6d5ee3722e7dd26672d85db963d1e490731545a36d1e581
+GMP.v6.2.1+1.x86_64-apple-darwin.tar.gz/md5/51e00a2b55e9f81eb62abe23bb5f6fd9
+GMP.v6.2.1+1.x86_64-apple-darwin.tar.gz/sha512/91731427afd8df54b54d87b93006190a8b959438dc591eb5fa44724056911b8bd5588b2b1e70e9da3d8d6e9ce5aaa6fea66b0706f636cb56b3c860e8f3c0550a
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx03.tar.gz/md5/3f3a6f15e4e8499470bbe69a9ea885c1
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx03.tar.gz/sha512/2659344ab097cd9542a5946c127a43af6fad05aa1445d69a4978d1a6d9a9f0e0502a5a60c6ca88acccb86d038dd10f2a72a7c2d4dd7ad5383c7d687e9720cc88
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx11.tar.gz/md5/15ee858d8e1f07f18df8a893634d859e
+GMP.v6.2.1+1.x86_64-linux-gnu-cxx11.tar.gz/sha512/9d8ffa570eb22a5a908679e06af4dd0ce8c06cf97ff9fd766baeca352a99bcc54b4b71b9c52829ba80043a688f2ed6a33b0302072518f2b16416235d5295ea00
+GMP.v6.2.1+1.x86_64-linux-musl-cxx03.tar.gz/md5/79078a236575994696e7328e34326243
+GMP.v6.2.1+1.x86_64-linux-musl-cxx03.tar.gz/sha512/d4b77a4056a2b0dcb6f789381fff720ab7481cc7edb4672756cb2057ed6475abeb6ea414e6cec3e2450ef7302b647d7d2fc2d9f7de52feddd7767548392e84bb
+GMP.v6.2.1+1.x86_64-linux-musl-cxx11.tar.gz/md5/94f822c7521f83652d87fd5f1ad8bb19
+GMP.v6.2.1+1.x86_64-linux-musl-cxx11.tar.gz/sha512/fa4f70f81524d47b65d5cf3ff5abe38a691f09e3297c62f0db2512483702b9af33bc4a3c15f6f1465d6dce4eeb19f665f29872e6dd7caea0806f4c7fd32c2c5a
+GMP.v6.2.1+1.x86_64-unknown-freebsd.tar.gz/md5/cdb93a733763e8a4fc29652fda8c8b13
+GMP.v6.2.1+1.x86_64-unknown-freebsd.tar.gz/sha512/ec529f57eb167bfcb367310b375a3cded007cbc386cab9b09faa9fe8f37a443302c674814ada6c82125ad0ce4aebecb75bb61633a21e7a3a00fc928fbe05cb4f
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx03.tar.gz/md5/8b5be9da6a0a293e14ab1d589a622b98
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx03.tar.gz/sha512/73287b8390cac2ce8afc4565c5218ac739ed8a23c56754f4667570039f022b777284aee25d7857a94ff46fd502ac0fabe46f509a5f870b1aa074f6ed1278dcf1
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx11.tar.gz/md5/11bcbfc3b65b19d73c3abf92ec46cb6a
+GMP.v6.2.1+1.x86_64-w64-mingw32-cxx11.tar.gz/sha512/1dd9a6fe5c4991483a2d46420cd892271d37d9d23c409ed782b7736ab1942cd6c42360efbc308b5684bd5f991c7a96e8d375f3e855dc537bb3089e3402eed110
+gmp-6.2.1.tar.bz2/md5/28971fc21cf028042d4897f02fd355ea
+gmp-6.2.1.tar.bz2/sha512/8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9
diff --git a/deps/checksums/gmp-6.1.2.tar.bz2/md5 b/deps/checksums/gmp-6.1.2.tar.bz2/md5
deleted file mode 100644
index afce61b8a27375..00000000000000
--- a/deps/checksums/gmp-6.1.2.tar.bz2/md5
+++ /dev/null
@@ -1 +0,0 @@
-8ddbb26dc3bd4e2302984debba1406a5
diff --git a/deps/checksums/gmp-6.1.2.tar.bz2/sha512 b/deps/checksums/gmp-6.1.2.tar.bz2/sha512
deleted file mode 100644
index 82e0e2bb2de1fa..00000000000000
--- a/deps/checksums/gmp-6.1.2.tar.bz2/sha512
+++ /dev/null
@@ -1 +0,0 @@
-268db88447174617f5746d9a6ba2b105940cc1a5e73155eb23b6eedf55f8e7724eda05d161b2de19aca9e794956d226ba9ed6f23124c7c82f7e1872e32b003cf
diff --git a/deps/checksums/gmp-6.2.0.tar.bz2/md5 b/deps/checksums/gmp-6.2.0.tar.bz2/md5
deleted file mode 100644
index 2ced18fce24d39..00000000000000
--- a/deps/checksums/gmp-6.2.0.tar.bz2/md5
+++ /dev/null
@@ -1 +0,0 @@
-c24161e0dd44cae78cd5f67193492a21
diff --git a/deps/checksums/gmp-6.2.0.tar.bz2/sha512 b/deps/checksums/gmp-6.2.0.tar.bz2/sha512
deleted file mode 100644
index fbcc90f9c873ea..00000000000000
--- a/deps/checksums/gmp-6.2.0.tar.bz2/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ff22ed47fff176ed56301ecab0213316150a3abb370fed031635804f829c878296d7c65597b1f687f394479eef04fae6eba771162f7d363dc4c94c7334fc1fc0
diff --git a/deps/checksums/i686-4.9.2-release-win32-sjlj-rt_v4-rev3.7z/md5 b/deps/checksums/i686-4.9.2-release-win32-sjlj-rt_v4-rev3.7z/md5
deleted file mode 100644
index e297faa029d090..00000000000000
--- a/deps/checksums/i686-4.9.2-release-win32-sjlj-rt_v4-rev3.7z/md5
+++ /dev/null
@@ -1 +0,0 @@
-6217dae4a1016b37b12aeed1cc950187
diff --git a/deps/checksums/i686-4.9.2-release-win32-sjlj-rt_v4-rev3.7z/sha512 b/deps/checksums/i686-4.9.2-release-win32-sjlj-rt_v4-rev3.7z/sha512
deleted file mode 100644
index 2cc5d1523bc613..00000000000000
--- a/deps/checksums/i686-4.9.2-release-win32-sjlj-rt_v4-rev3.7z/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1f38cc758725f124552285940cde6d9e072a594298a09f7a542a6cc28cb25d119d80d73918e4e9207694819825787abb6eba4a83cee5892e5acaf6350221b97f
diff --git a/deps/checksums/lapack b/deps/checksums/lapack
new file mode 100644
index 00000000000000..997b522f7ef84c
--- /dev/null
+++ b/deps/checksums/lapack
@@ -0,0 +1,2 @@
+lapack-3.9.0.tgz/md5/0d39aa430ac2716d88b45224f4de2c8c
+lapack-3.9.0.tgz/sha512/90c2c8372c2567b15c25653ed64e7c86a2cb0f1dda116f33716c6877490415210217af5badb67cb50e9d428f56ff83d33a3dad60b6ed7d31919164f6e7e98e0f
diff --git a/deps/checksums/lapack-3.9.0.tgz/md5 b/deps/checksums/lapack-3.9.0.tgz/md5
deleted file mode 100644
index 62a352c3d126ed..00000000000000
--- a/deps/checksums/lapack-3.9.0.tgz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0b251e2a8d5f949f99b50dd5e2200ee2
diff --git a/deps/checksums/lapack-3.9.0.tgz/sha512 b/deps/checksums/lapack-3.9.0.tgz/sha512
deleted file mode 100644
index 8133dd1a8ad47b..00000000000000
--- a/deps/checksums/lapack-3.9.0.tgz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-424956ad941a60a4b71e0d451ad48db12a692f8a71a90f3ca7f71d6ecc1922f392746ea84df1c47a46577ed2db32e9e47ec44ad248207c5ac7da179becb712ef
diff --git a/deps/checksums/libgit2 b/deps/checksums/libgit2
new file mode 100644
index 00000000000000..9b360b711ceb5c
--- /dev/null
+++ b/deps/checksums/libgit2
@@ -0,0 +1,34 @@
+LibGit2.v1.3.0+0.aarch64-apple-darwin.tar.gz/md5/af98f6fafe5678873b7f974c481c4238
+LibGit2.v1.3.0+0.aarch64-apple-darwin.tar.gz/sha512/1c3509d6b2e00ddfb282e4081a3994302b86cef2ff54c74e63000ec5319cf9f37b7685a14cad85f48a90e37afa507efa97881c27a4f4926fae1b74e96a4aed5a
+LibGit2.v1.3.0+0.aarch64-linux-gnu.tar.gz/md5/4884296753929a70f6f01b36bfec1f61
+LibGit2.v1.3.0+0.aarch64-linux-gnu.tar.gz/sha512/42babda48f23b672ac382780b450e314ee16929c523125246f7d66e11fd27208354fd4d4c7e663e2a6091de78612be0e825f8d1cd4eba595a056838df12fd213
+LibGit2.v1.3.0+0.aarch64-linux-musl.tar.gz/md5/c0b53bfdfa9d4b9e653a5470eccb40c6
+LibGit2.v1.3.0+0.aarch64-linux-musl.tar.gz/sha512/b3d79bbaeb26869066d6b7e228bc2712b67c5dc45badd317c3023eda86d82ac2b712a2126d4049f1074d0ed86ec9f80f9a2e7d6458d47c1d3c953a37a4b3ac0e
+LibGit2.v1.3.0+0.armv6l-linux-gnueabihf.tar.gz/md5/ac91abf4ce2ef1f25729d352c2bd3630
+LibGit2.v1.3.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/c8297da990ead579e285e4347a99a824a81c147965a8394e034690b63e3e84f55f21d37f2754725dacc7836812698a06fb6101fe05b222cbe11d558742986e91
+LibGit2.v1.3.0+0.armv6l-linux-musleabihf.tar.gz/md5/eaf893aabde1ec021bbeda5150df6212
+LibGit2.v1.3.0+0.armv6l-linux-musleabihf.tar.gz/sha512/3bca29dd9bb724db74eae72a9bbeae777e315a616938d76144c2183c9bb9b1656c83de3e7c743c3cba8f2492f74a1cf4254c8942859211f1d6beb514e3532b18
+LibGit2.v1.3.0+0.armv7l-linux-gnueabihf.tar.gz/md5/fd6b13ae9129c6f082ab194782e33c01
+LibGit2.v1.3.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/e236667bc0e3a929b625a410c55fffa0be3f71aef090c3c18c9806ff3c866a0f2dfc1afdcba34d7f0b81b0fd38fa3441b56f4343fedf66c5eead64f059c095d3
+LibGit2.v1.3.0+0.armv7l-linux-musleabihf.tar.gz/md5/9370e574abf25984dda0521b9b3d2df9
+LibGit2.v1.3.0+0.armv7l-linux-musleabihf.tar.gz/sha512/281c7f04d8560f5ef4c9902ace66f41b255e6868d83bbe37e61923371b0752a076f93f249abbd64e6d3849460c2c36dee9207303f0e0eb0ef5d37990aa7337df
+LibGit2.v1.3.0+0.i686-linux-gnu.tar.gz/md5/e0ecd37c7dd9709ddedf3eab8a4f2d47
+LibGit2.v1.3.0+0.i686-linux-gnu.tar.gz/sha512/04ff40e26df0f66413564f4189a031abf538dbea2cd41192164ab5e5361527b18d2a448ca7dacd9754a419d198dd816dd25bfecde4b2cfb1f497aa437a1784a3
+LibGit2.v1.3.0+0.i686-linux-musl.tar.gz/md5/4d98934e46f16eaa3afb597474639de6
+LibGit2.v1.3.0+0.i686-linux-musl.tar.gz/sha512/6d2fea7fe42d12642004f41d31d8ecf5213d0ce7cef3489c96583c9096d8b044b70dd3466bfc2ad901a7302fb8d320713a2ee4bb03702dd7487b0eb0b34966dc
+LibGit2.v1.3.0+0.i686-w64-mingw32.tar.gz/md5/29fc6f7d785aebaed7a916106efaf690
+LibGit2.v1.3.0+0.i686-w64-mingw32.tar.gz/sha512/ac29ef9b8d2d2b44a8d37bba4a40103fb174a55c3210b2b5c6d02baf7cf7f0bbea5acefc5c465ce931fc21275045ad0b39ada32b96795a74e4a46fb405c76398
+LibGit2.v1.3.0+0.powerpc64le-linux-gnu.tar.gz/md5/8b412e41808ff41418b35feab033c445
+LibGit2.v1.3.0+0.powerpc64le-linux-gnu.tar.gz/sha512/fecbcbe322e53a311aa28a6734ca6331438f14e030342efcab7d3f9f2b582c1c23b9d40ce63e1decb92d4d8620685d2e9ae7fadc55bf5db9169590f83327e2de
+LibGit2.v1.3.0+0.x86_64-apple-darwin.tar.gz/md5/033cb0d768a322dc6fc8f1fc58963f62
+LibGit2.v1.3.0+0.x86_64-apple-darwin.tar.gz/sha512/69d6c9e4b38257c89311cdf0a219a8497482c85a7a39c2c6d5819c5d9443d93f4978fbe08291313492f673e02ff4ae709fa4047c3ce015a806b4f22284c1c2cf
+LibGit2.v1.3.0+0.x86_64-linux-gnu.tar.gz/md5/bd7a36813ec371d2a39a98eced238294
+LibGit2.v1.3.0+0.x86_64-linux-gnu.tar.gz/sha512/b3a605a9cb7057f4c9652cd551922b4e11f65d74383a9c5b7a0234b7cf599e0a23cf79695e1db6c760dea194f76b59a347e38e14729b0bcc9b120812b63bb2c3
+LibGit2.v1.3.0+0.x86_64-linux-musl.tar.gz/md5/856ac3655c979881c6c0537f49907d18
+LibGit2.v1.3.0+0.x86_64-linux-musl.tar.gz/sha512/8cae2271bfa4a9dc6168fdd1d6133997756f56c60cd94525357f907109b41cb7d6594f7916557de03f1450b4bc694705f2f21d8426cb909c5678cee7f1477b88
+LibGit2.v1.3.0+0.x86_64-unknown-freebsd.tar.gz/md5/5c864ad058dd9c30340bb1c196d97b66
+LibGit2.v1.3.0+0.x86_64-unknown-freebsd.tar.gz/sha512/e2d3953ebe99743b0f6e62901b953e7c57030cfd5b1cc40d2bda85ed26573212c2e4748f2fdf46d88b75f584471a8b0b287c0a65ef83f502164e92624aaa091e
+LibGit2.v1.3.0+0.x86_64-w64-mingw32.tar.gz/md5/45d92f092eb1a319396dd7ebb36a2273
+LibGit2.v1.3.0+0.x86_64-w64-mingw32.tar.gz/sha512/0f219513972259f131fbc509eb035103a986bf1483f7cf9d0bfbca2802f5bbf23296a36a9f9d72e4d89a70f86acf781aec3d49df6aaf408da4f025f65d559cfa
+libgit2-b7bad55e4bb0a285b073ba5e02b01d3f522fc95d.tar.gz/md5/02582c680d006890def088ffaccea7d8
+libgit2-b7bad55e4bb0a285b073ba5e02b01d3f522fc95d.tar.gz/sha512/ee51c06c012503d66ba28d9c2fc9ad42af69f22fd1ae1be54642820ccd80c74e24d78eeec7fe5222daf2432930bcce163800502db1224571da852238c1970e36
diff --git a/deps/checksums/libgit2-0ced29612dacb67eefe0c562a5c1d3aab21cce96.tar.gz/md5 b/deps/checksums/libgit2-0ced29612dacb67eefe0c562a5c1d3aab21cce96.tar.gz/md5
deleted file mode 100644
index af2b7907f077ea..00000000000000
--- a/deps/checksums/libgit2-0ced29612dacb67eefe0c562a5c1d3aab21cce96.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0bd38ce98f6ba7b92b4afbd0ae7e3c96
diff --git a/deps/checksums/libgit2-0ced29612dacb67eefe0c562a5c1d3aab21cce96.tar.gz/sha512 b/deps/checksums/libgit2-0ced29612dacb67eefe0c562a5c1d3aab21cce96.tar.gz/sha512
deleted file mode 100644
index afb5398771fe00..00000000000000
--- a/deps/checksums/libgit2-0ced29612dacb67eefe0c562a5c1d3aab21cce96.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-4bd43d67356f14c9d9a7043e1f3c61c799e844dbbaeaf5250105723c774328b8aaba0fa8ecd2d7bac3ba1a50c2ef9009463e9e159011c75de97a6550b3ba097e
diff --git a/deps/checksums/libosxunwind-0.0.6.tar.gz/md5 b/deps/checksums/libosxunwind-0.0.6.tar.gz/md5
deleted file mode 100644
index 07aca332ca994c..00000000000000
--- a/deps/checksums/libosxunwind-0.0.6.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-62ce12eb88867fe3974904dbf06fb8e9
diff --git a/deps/checksums/libosxunwind-0.0.6.tar.gz/sha512 b/deps/checksums/libosxunwind-0.0.6.tar.gz/sha512
deleted file mode 100644
index 62b3d80faa9547..00000000000000
--- a/deps/checksums/libosxunwind-0.0.6.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-901d1d0e9826b9e691991932897dac3185cd72e668658a319ba71a7f4ab6ac7ae328aa7e67d4c5cbce1a1b7a306d98a754544e8a7530a82d00406e9e42761425
diff --git a/deps/checksums/libssh2 b/deps/checksums/libssh2
new file mode 100644
index 00000000000000..f8c1fc5da8d37c
--- /dev/null
+++ b/deps/checksums/libssh2
@@ -0,0 +1,34 @@
+LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/md5/b692a93b8f7e45edd5c5d397cd1d3725
+LibSSH2.v1.10.2+0.aarch64-apple-darwin.tar.gz/sha512/8863fb372e3bccb9d6ff7f33494754b1391f0081426d1a42a3f3da69ced9d1b6246b7aa84269b7ec2844c27991d5998a6c58561b277f86daa96b577dec57b514
+LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/md5/ea2e202282947c4900d656c83ba30953
+LibSSH2.v1.10.2+0.aarch64-linux-gnu.tar.gz/sha512/1c3a035a2e711ad013acb460293e929d18b58d345f84f4a7cda93510dca5e46d466f08b2b96e5742c16c509dc6ed6b6e2b13399bbd1c48340326e3e6d73f9322
+LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/md5/9dd3b1813cd1cc6246b31c5bd2df538b
+LibSSH2.v1.10.2+0.aarch64-linux-musl.tar.gz/sha512/075d681235961838e2bb14f2034daa65909a40972cf638b44646464f66973d139de9af9d653073c24510cd40e5068d3a41c09f6ff12835a8278259d8530a6720
+LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/md5/17a33524ad9e6dfcf239b076803e3c84
+LibSSH2.v1.10.2+0.armv6l-linux-gnueabihf.tar.gz/sha512/09ef31e3a6ee1e6055c6cf5c0f45fc2704701bdeb565b82a1896e8c83cc34c9dcf1b2e761d1c78b5d513291802a02ef3567a8a6d06d9d7ad946f4233e973c5c9
+LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/md5/18db6a0698da98ec8247ab0f86e9a2e9
+LibSSH2.v1.10.2+0.armv6l-linux-musleabihf.tar.gz/sha512/11657f0b411ee81d84d3c02befd34d53d25da0485214f82e6ac9601fd065127e01ac74b592d328481a0ed7d04c231b37f4fec773e06d1c1f5186f6eb7eae57ce
+LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/md5/0a49a14c15176f32867f49a6e487de77
+LibSSH2.v1.10.2+0.armv7l-linux-gnueabihf.tar.gz/sha512/aa81a8504529b90e6e212f1495e8732118f5d0eececd4a809cecdeb88b97f0ca4a1081e669660ea8513b15f71299854da9eb5f8352f099796df4fde33f89072d
+LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/md5/0bb9d7c5c40d88e19a2d9f3178c4de64
+LibSSH2.v1.10.2+0.armv7l-linux-musleabihf.tar.gz/sha512/cf361ac498daa4c3a0b044171165756e54402f70c75fd5d877e6a24db9b6930c678c3f45c16850b9138e8f97cab9f1cb4ba82262e48fad269a36fc556215899d
+LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/md5/651955e1c129d88d9dd0e9b048505db7
+LibSSH2.v1.10.2+0.i686-linux-gnu.tar.gz/sha512/b48d3e5eb36d4e0ef36bc512f1fe65a85e0ddf16dab8da8190f642d2460b6ab94677838f263ad280f37a8bd838c4c8283a3cc706247d4241d8760fde797fc163
+LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/md5/74b4741009fea8bb10ab3f6a44937fb1
+LibSSH2.v1.10.2+0.i686-linux-musl.tar.gz/sha512/13b64fd49d6a6b80dede0c049db871c5b22fec2f093adbe6699f2e467f7458684cd38093230c5d2fc837e500c8d1e86cc2966b9805a2ed7a705d50108a95803f
+LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/md5/20c1c7d0825cba67a0bbfa822348a245
+LibSSH2.v1.10.2+0.i686-w64-mingw32.tar.gz/sha512/2ac02ff310911998c07861493b699837168e43e40172372c33fc7769ff6eae2f2f2c65b10319c2f10316d34c519ec2fc5308b327b62caeb415ac7c5c692fa81d
+LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/md5/6d180f087e415adbafa516c471315ce2
+LibSSH2.v1.10.2+0.powerpc64le-linux-gnu.tar.gz/sha512/23ea211b5d1219454c14316e59cb94195195abebd9a4e7a6812c4d824abcac7c5b896c460c2dae3511abaae7e0afb5ead40a5836e5d94ec0c3a2b8076dd29e3e
+LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/md5/dff956043faefa3396fc31bddbf83b1e
+LibSSH2.v1.10.2+0.x86_64-apple-darwin.tar.gz/sha512/00aa279251a04684d968e413bd9f652c6740bf4a6e860ba9b999c8584561499f1f589ca2eb3f06a01c539a952fffb41787a37a6e514d689b97693a5a7bf4c18f
+LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/md5/a50d390c1a8ea77d7d78c07a759fa79e
+LibSSH2.v1.10.2+0.x86_64-linux-gnu.tar.gz/sha512/c985018dbc79c9d41ca3df940fcda15d7f57f0a4e869268ab7c255b4fbc7aa9bd5088281258de22523c777bc9210ce3c9e1f0f76693c0575506ea840619c0306
+LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/md5/a8b647a12439a7ec9b2d5111a4fd605c
+LibSSH2.v1.10.2+0.x86_64-linux-musl.tar.gz/sha512/7790bf3b88513a026f3b58090c5b39b0544d873f7bee4f4c06fb23f513954f580ff2d5d552f15f8b725fd3264585390c33283906f1844cf20ce6d2eee54495a7
+LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/md5/c4f76951ed337bc87d21993d1997dac8
+LibSSH2.v1.10.2+0.x86_64-unknown-freebsd.tar.gz/sha512/3db3c44e2100d00a537c87590dcd5493dc9ec74913ce903ce7bca697ab590417bd55ec6475a0a790ab49e9444d79ece539733ac25b0b82eaab735c8c96c0e992
+LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/md5/b58d05eb572149dbfec7b53a75dc4d6f
+LibSSH2.v1.10.2+0.x86_64-w64-mingw32.tar.gz/sha512/422fb36c6d7d3f7153b52547fb98d70268da1506a4957e2772184ba52bf06455f869f1c491d82852494459189830c68569219fbb2c795ddb49d7e8a8e95d6988
+libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/md5/d0b060310da22a245fc488a300288198
+libssh2-635caa90787220ac3773c1d5ba11f1236c22eae8.tar.gz/sha512/17770f8de4f081840e765d6f7842d562e20f46972fb53a15e3c9e10421f3654a559c5dd1dfbafd7b4a0e5205d800e848b9c9c26ec1d8fc0d229d5070b6d19463
diff --git a/deps/checksums/libssh2-42d37aa63129a1b2644bf6495198923534322d64.tar.gz/md5 b/deps/checksums/libssh2-42d37aa63129a1b2644bf6495198923534322d64.tar.gz/md5
deleted file mode 100644
index f50f0018358359..00000000000000
--- a/deps/checksums/libssh2-42d37aa63129a1b2644bf6495198923534322d64.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-253c2c3eef1f5c9c54be3e677512ea28
diff --git a/deps/checksums/libssh2-42d37aa63129a1b2644bf6495198923534322d64.tar.gz/sha512 b/deps/checksums/libssh2-42d37aa63129a1b2644bf6495198923534322d64.tar.gz/sha512
deleted file mode 100644
index ad3f1906406bb1..00000000000000
--- a/deps/checksums/libssh2-42d37aa63129a1b2644bf6495198923534322d64.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1fb3202ab793024dc83a419bc7138c4bce9aa37f1bd6aa199388df9a7d78f3838c1fba38b5a82fe2b912f2cf742f476f0c8f2e30a253d1313317d844ac315606
diff --git a/deps/checksums/libunwind-1.3.1.tar.gz/md5 b/deps/checksums/libunwind-1.3.1.tar.gz/md5
deleted file mode 100644
index c42ea472ddb92e..00000000000000
--- a/deps/checksums/libunwind-1.3.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a04f69d66d8e16f8bf3ab72a69112cd6
diff --git a/deps/checksums/libunwind-1.3.1.tar.gz/sha512 b/deps/checksums/libunwind-1.3.1.tar.gz/sha512
deleted file mode 100644
index a168f6515108e3..00000000000000
--- a/deps/checksums/libunwind-1.3.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3110d0aed4f5c781ef1ff72c9337e59793c02c42066209a4ac44f50eff1c0b0e02a5ff9f66891e62016de14af065a47975763970b839b700c0ff2e9f415c8def
diff --git a/deps/checksums/libuv b/deps/checksums/libuv
new file mode 100644
index 00000000000000..03f7aca15a802d
--- /dev/null
+++ b/deps/checksums/libuv
@@ -0,0 +1,34 @@
+LibUV.v2.0.1+6.aarch64-apple-darwin.tar.gz/md5/bff12bc642215646c8c03f2003a3c5ef
+LibUV.v2.0.1+6.aarch64-apple-darwin.tar.gz/sha512/9c0bb5e648d1e967caec07c700e4657c97ea9db8b48625887eb4e91af286be62380f5c85bc51bc51c87ed6104ffc26bbd498f501e3892ca1d41eb96bab88d955
+LibUV.v2.0.1+6.aarch64-linux-gnu.tar.gz/md5/af5b11ff1354c591990285e29840d83d
+LibUV.v2.0.1+6.aarch64-linux-gnu.tar.gz/sha512/67f6c6a7c780b15b9e4b317c44450a325f6966fd2948d28e113f7d4b0c2893b8b5f9b1eb6da73cce683fa7176b5587e1c73b5b1faaf09d2ad378d8b085a75392
+LibUV.v2.0.1+6.aarch64-linux-musl.tar.gz/md5/2bda667ab6f9b7f8962ec675272be6b2
+LibUV.v2.0.1+6.aarch64-linux-musl.tar.gz/sha512/271772a7acff9d2cce1ab36a46f0807bf2f30a00227d0cfbcbb8eac4c583e0bd406c6406a7e9b5afa720e844b1b2bcc01ec60cae3d907d0d004a7a40ed182397
+LibUV.v2.0.1+6.armv6l-linux-gnueabihf.tar.gz/md5/5765a268e960ebbff2e7f6a386435b06
+LibUV.v2.0.1+6.armv6l-linux-gnueabihf.tar.gz/sha512/31d1a223b57dfd859f6a6633c75b53507b99a3eeccbef9d47f12e0dbf1e4b5a77e489348bda625f0cb6ecf5450edcb751d4fc4603beebb01fde73aceb7ae6d2b
+LibUV.v2.0.1+6.armv6l-linux-musleabihf.tar.gz/md5/be91036ac0626c1b5a9b28a15026e942
+LibUV.v2.0.1+6.armv6l-linux-musleabihf.tar.gz/sha512/0e8a338f84ce24ba99357110aa6982956a9970715202005ac4a748d3a78cb75816a9063b3ad5a96569261966792f87fe698777d33b6fa428068ec07ceb944fdf
+LibUV.v2.0.1+6.armv7l-linux-gnueabihf.tar.gz/md5/921038ac4396791a555e1c2a8f5af558
+LibUV.v2.0.1+6.armv7l-linux-gnueabihf.tar.gz/sha512/45519d49d857721f025bdb08522e3c08262f264b8a00bc36d9ca4bd05d6a32ce0b1b40ba7c9cfc98bbd1201e6b4592632aa8852652abb61604bcd324abc17c76
+LibUV.v2.0.1+6.armv7l-linux-musleabihf.tar.gz/md5/06b404efd3d62d107f9331ab85deb893
+LibUV.v2.0.1+6.armv7l-linux-musleabihf.tar.gz/sha512/3e73341346060df832fcc591bc447f713a8188c06f22961ae03cba4620d524edae7b84e63ac8fd5b675abb62bf0e12f176468f09e7014fbb8df6cc763dda12b6
+LibUV.v2.0.1+6.i686-linux-gnu.tar.gz/md5/e6b31595a27a91bf34b7a5aeae48d459
+LibUV.v2.0.1+6.i686-linux-gnu.tar.gz/sha512/b59516d2340ed469be8d86dc903e3497867b522082dc6096683b23fec4b03bdc5e0c643bc2cf36ca49c2dfa11689946bd5f7e92bd68978ff2a409935203ba533
+LibUV.v2.0.1+6.i686-linux-musl.tar.gz/md5/49a84d0c90ec136b933fcd939f371716
+LibUV.v2.0.1+6.i686-linux-musl.tar.gz/sha512/1abff45b3a0894b78d20e31c4dcda8673a3e3b6d3e8fa89e8f57da115ae8feff58bcb16cd3107b4c768e9c6bfb777864056fab47de5b2babead3eaa508b2e748
+LibUV.v2.0.1+6.i686-w64-mingw32.tar.gz/md5/6ef4d726e171dc8f2aaa5603180b154b
+LibUV.v2.0.1+6.i686-w64-mingw32.tar.gz/sha512/0699afa096208829d7b3795ee150a94e2e0446a17e77c204a7e013f63f51791df0f8c8416c0549809cb0d0c3b1f52fb525310153a68f80652e6c8def9bf17903
+LibUV.v2.0.1+6.powerpc64le-linux-gnu.tar.gz/md5/72cc19fa36b7803a4973c3913c720d46
+LibUV.v2.0.1+6.powerpc64le-linux-gnu.tar.gz/sha512/694d96e8127e4a206496388db4f09d0af0673818f5168fc3ffaa9bd15da132d5af843f068c89f057a0c62404f1e3171725b86e1cdade3e27a3f0e8b6be8e9b2c
+LibUV.v2.0.1+6.x86_64-apple-darwin.tar.gz/md5/e3c076ab2aaf47f423f9de96bcd50faa
+LibUV.v2.0.1+6.x86_64-apple-darwin.tar.gz/sha512/3a3e31ccb0e2a1c1aec1b2ac52ff33f7116ef84452d70bb0f680a276411a5a9ff4aad5e5533bb7d3d981f168974a94f1ea90d41b4ddc6dab1a334f16000bf812
+LibUV.v2.0.1+6.x86_64-linux-gnu.tar.gz/md5/752545518774845ee93933fce9c9516c
+LibUV.v2.0.1+6.x86_64-linux-gnu.tar.gz/sha512/458494e07a096793552ee4f9e0bd302d160186e20d702e7c0691b50984692c5725042faa49df0b1595b3d6f2459bd6d73225af1385e4ff5a9d7e4dd5baaa4dae
+LibUV.v2.0.1+6.x86_64-linux-musl.tar.gz/md5/6988efa401aaf11e82a916632b26141e
+LibUV.v2.0.1+6.x86_64-linux-musl.tar.gz/sha512/95abfa548c8581be9f512041c1b904532ab8e62610e70b2e184d6638d1bb2552883d946565e3071e6c8f3127a524313d432df370d6d6361a5f0ce5d3c60649ec
+LibUV.v2.0.1+6.x86_64-unknown-freebsd.tar.gz/md5/5e35a7220027cd6a8ded93611fed1a57
+LibUV.v2.0.1+6.x86_64-unknown-freebsd.tar.gz/sha512/218b2f40bc1c49d91c9457b9014d536b6fd6b1f6c3704a6aeec2739bcf2ecbadda1bfd36a9ef84ffb2aebd1cb6b1903276658259d4a2d873cd61780a9762934d
+LibUV.v2.0.1+6.x86_64-w64-mingw32.tar.gz/md5/1aa9e7ff08da10c79984ac470b31a701
+LibUV.v2.0.1+6.x86_64-w64-mingw32.tar.gz/sha512/675adf9330de80fee97e9ebf7a6de7763a3cafad20b6aa9e009832a590a1a20272578861bb357e3ca41961a247e2be178e4455ad107951d88ce8d3467504c235
+libuv-f872be3b6438b90b4690fe3ee7692c50bfd9c7c7.tar.gz/md5/ec0ca23d8bbf091580db363e0216599e
+libuv-f872be3b6438b90b4690fe3ee7692c50bfd9c7c7.tar.gz/sha512/4dfb0759f88f9892210c7feec52920f00fdfafc3bdf045d09f393f2f0a7edd0701fc889e589f9c8fdacb51a43225d7c4d60073015eb6e182f30c5be2b3dda4f9
diff --git a/deps/checksums/libuv-1fcc6d66f9df74189c74d3d390f02202bb7db953.tar.gz/md5 b/deps/checksums/libuv-1fcc6d66f9df74189c74d3d390f02202bb7db953.tar.gz/md5
deleted file mode 100644
index ce4beff6c0bc91..00000000000000
--- a/deps/checksums/libuv-1fcc6d66f9df74189c74d3d390f02202bb7db953.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9388aa4d36915724f1de4b20b205ed35
diff --git a/deps/checksums/libuv-1fcc6d66f9df74189c74d3d390f02202bb7db953.tar.gz/sha512 b/deps/checksums/libuv-1fcc6d66f9df74189c74d3d390f02202bb7db953.tar.gz/sha512
deleted file mode 100644
index 863943d5d26bb5..00000000000000
--- a/deps/checksums/libuv-1fcc6d66f9df74189c74d3d390f02202bb7db953.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-70b0c738a60b4e476750b7de8d120e735359e2c36fcb3a8a38628a0ae326210ed3b15d793dfd5443d7aa5603e83e7d99f567aa4c1696846d950df9f83648669c
diff --git a/deps/checksums/libwhich b/deps/checksums/libwhich
new file mode 100644
index 00000000000000..d4a0119625663f
--- /dev/null
+++ b/deps/checksums/libwhich
@@ -0,0 +1,2 @@
+libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/md5/22fd8368c7b40209dada50e3205c1294
+libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/sha512/6fb77b715d70d9bc95a8546c3bf97bd3677c7ea344b88bb5bc3bbfac9dceabe8a8cde7a0f64dec884cde802e4a3000e30837d3f824b5a9242348c4fe061526a3
diff --git a/deps/checksums/libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/md5 b/deps/checksums/libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/md5
deleted file mode 100644
index 817a55b8078a8c..00000000000000
--- a/deps/checksums/libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-22fd8368c7b40209dada50e3205c1294
diff --git a/deps/checksums/libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/sha512 b/deps/checksums/libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/sha512
deleted file mode 100644
index 9b9ae68aa2de38..00000000000000
--- a/deps/checksums/libwhich-81e9723c0273d78493dc8c8ed570f68d9ce7e89e.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6fb77b715d70d9bc95a8546c3bf97bd3677c7ea344b88bb5bc3bbfac9dceabe8a8cde7a0f64dec884cde802e4a3000e30837d3f824b5a9242348c4fe061526a3
diff --git a/deps/checksums/llvm b/deps/checksums/llvm
new file mode 100644
index 00000000000000..6cb85ecdc0d3b7
--- /dev/null
+++ b/deps/checksums/llvm
@@ -0,0 +1,234 @@
+LLVM.v13.0.1+0.aarch64-apple-darwin.tar.gz/md5/de198200e72a0176aeb383bdc916b472
+LLVM.v13.0.1+0.aarch64-apple-darwin.tar.gz/sha512/84e5472df5a89821baa7c7f5f787d576a4fb312738da194af3d79dda916c5f69bcff05e693d76f15e00af6c6832a26e01933fb0c33b57225dca5a048869c9ea8
+LLVM.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/ad3571e776e2fdc16d7ea54b236929b4
+LLVM.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/f9ceb4c1389301fd8d85bebf966f9482fcea31a5767fd2dc89c460f4404549ae9df68ac1d52e0948c75910665b857090d62ca53e84a09cc191ca265f460f2975
+LLVM.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/27ce9c71e0c41e1f72e54b7a4c6f4826
+LLVM.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/941de4e99e24ea33944a3e93fc4c486b9adb9c721a641656803996785138eff9dff929ee4b3261dd57916086da3ee2dc7489a255c44ed8d2f0a1d2a915bf875c
+LLVM.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/e4a26e2ffd866a29d276f20565a0e76d
+LLVM.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/0c5c7b8641a02c53ce24d40183638986651e644e423fe43b58f3657a6dd21f294c43dcca588dd04c044d65745f8d493f1353cfd168be0cb4f5b68f63df921468
+LLVM.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/ff6fe3eb7392178db4fe8fa65a61dd7b
+LLVM.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/1e69c89cb616d9ea9b2f6a863f44d0fa83e2e181f8de66dc478faf3881a06d8b6a81a032607064a952b37b1ee5d25df06105ba4d2758e2da3698e7394ab69a7d
+LLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/a0498659a1f2e896762421cb4f6d2a9f
+LLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/8811f7ad799f0a31191eb7d8dc3e13fae3b47b1372aef99e02b3477e3e75de87da6d7dc3a8f7972ffa5ebbef4c58846d57981021b944ef8a7b303083322559d9
+LLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/2f5ecc129ff7d58eaf224c703973c157
+LLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/562d16c8b9de1489d655c1a3faf58b44a69b195b5d97fbbb3b60baf886a357ffff232c0ed1daded6b5aa1b635615aa3d9de497c7e87b081ba83d2c408507acf9
+LLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/9308ce36b9b3f9f23719b8ec4c7eed0d
+LLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/01330f93f15fa56b5485c0444e2c6aad82df61170579499b0a1b586871ab05a783651cd903043c39bdd955c8036e8511fd33fd541358210bd3d801b21d31750a
+LLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/9e60c460dcc29228d137f13d3c04798f
+LLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/0bbac99fcd2b7e6fb958c1966ecd135898666b313938b8fec13154fb16069ec2dac06f19626a6cbad973a967ea99bcfe7c21930486715af0a666cb850ccc7ec4
+LLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/b4aacd37b274cd86f0d74150a6481e80
+LLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/fd7cc8368fdf27805155e25c94f099b65e01d0b3edddfc3934e81da84e480801967960bdef4ef68e5cfa325f5445cda6f3e1ab9d60729e86f4aaa39c20729af8
+LLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/ed180a5375b1198dfd58bb1de07db4fa
+LLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/09077792ea1eb299bc5215ecc7904098467dec48f1f3cab532ec673bfcd9711120e77744440d5a28a1496b50490d3f551b4d8e14958396964d40991adaf8252c
+LLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/53503aca7737a92abff745a3ad23f270
+LLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/12d388a6b5dfd45f8c0fe29453f49cc17bd1ea54ba281b92cf84d8698b03c9204feefab79245e7d9e8063a311b96679f849456366064b021f86c284417c43d71
+LLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/f9f002f64d325fade65076f5912377ab
+LLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/c87ce1742babd909ed4faa66aef71301d9da48c01fe772e8775af7b5b41f49ba3f24b0f8e26694ba93a8c2f14fdda698a157bdb3d95bd114e2bc90dd85acb340
+LLVM.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/39e654c42cf3b5a4a752e46566b8b9fa
+LLVM.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/4fc6e48cae0e33843b875dcc39fc2b860380cd6ad6f9214367827049b29e2db85593544866107bc8950ea844be09671092ef133aa764dab48119105332b932bd
+LLVM.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/a5928523eff8a9fd2ef66012eb3ab556
+LLVM.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/2595eb66b23fb9013f866578a829e07c4144996ae660a7448c196255aec43e6959caef2bd074db0690d91e0a39275b09c935d634855eb69613ae834426844f7c
+LLVM.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/47d3b87788b3269da6aea81069ea13dc
+LLVM.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/0721c1440daaeecc95beec69e7493dca098d619ad27125df51429704f3d463fa8ab86685f9f486378a028a99b445705dd052d9cfa9c1e729ff80fc2e1b46d508
+LLVM.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/0604eae4ea2d2dc715924976d006b026
+LLVM.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/6ba0acc9f08d1308c07ceb587e9bcc3de3d167a133d053326eb24d0660d18b52c789a8dd86612b85c894c9faa5d4fe6b9dc65bba1c8ffe649999b8458348dd19
+LLVM.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/7879e8a03f4db12585ad2f8545fe5e06
+LLVM.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/e0d23395b0962870df1c13edf4aa67bb2ac9372ede4160e7347fb94a47d90e76e738a2224b82a604926a8fd4a3f685935be0d9c0e4697b4c5ed53183ae5e9bf6
+LLVM.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/fac7f70937406d1c06d84cee96f61a61
+LLVM.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/5b987b3a3b4ae677dfc11f9dad75a5db0f4affd6447061f0996fe81d978760f9553c9f7a89a1a229ecacb6a159b9e7728da2c7bcdb49c8a2fdd4b1498d117e6a
+LLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/8852de922ee08484018d8b8f4a4459f7
+LLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/17412ebd9e63f370eee499e883fa0da0fa05a3ccb6ee3149648b4e55241166d2f5b34d759b23d654ff58b0167ace2cbe10329bcf984cc84b7c7690b6528063b9
+LLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/c172ee499e60fe6e22dcb135854d9f39
+LLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/79773c87795f5251095473d5797a0fbc7a4a4e7eeea45eadccbe01f62eacbba0b6159370675088907297b91e020be2bf1339c211682f7525c03c1033b91178c9
+LLVM.v13.0.1+0.x86_64-apple-darwin.tar.gz/md5/730d568f05aad99f6eb596d623c18763
+LLVM.v13.0.1+0.x86_64-apple-darwin.tar.gz/sha512/fecde3420de9051f32a1753c30d83436f9ebe2e5805d2dcbddbcb10eed6d84f0b5af81d33ff05d1c34996fa3d1198f20db56d8fec302e64d85e1322893acce2a
+LLVM.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/2dcc7db78138d81c6548c59e9ad2625f
+LLVM.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/48e18a31f149c0101f80d34e8e293078c5332194821a33c290aebd0701249a8130876752938b6af4346b1985f8c16dea575248f4e862d019c3290dd1c2570e6a
+LLVM.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/f101a354d0b9b777f4754505a0d7f677
+LLVM.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/f77a338d4e0c379e5958457ce5b3d1cf323c3869616a4ab6f40be3753493966a893699de9c09946f4712c6684cdf08e235cb2d33b724e87dc8a2970f677ca952
+LLVM.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/155c5015da0e2ffd94fcdf9496e855df
+LLVM.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/a1b2e1f5f8aaba0d74efb0819e39ad5ddb1740ad7955ad41c44b0a3483ee5d17db2b32f5d548200493c390cadd08dfae3f277833dd774c95c90ff989c6bf5969
+LLVM.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/d3f804be18541fa1102af46da18a743d
+LLVM.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/bb0ab78b3c03081f352bca252f2ebab3e5a47a83ee4c2dd0504543457c6f32dbe1449de97a2b5d8f970980497a77f78bebae3dcdb7d0c1c346e9df46721eb32f
+LLVM.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/7f7de7e59d22411068a35977a6fef75d
+LLVM.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/29c9531e6ed6d0b5d85d58bb5122531212c39ecd10f4a78ea1eb42311f3328813fcc4d2ad2311eb5cc3030778492a6b8bc5c9b12653f1ba36f16e0a50c4e0272
+LLVM.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/1823541a9a6c9e9134ac7645501399f5
+LLVM.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/2dbee2c1f01e5cc4f0b70c0147352ad95f0b91f5cb1efcde7ed61b54b2baa1b0bcea0b97e0c0ff6c55526e6b037f25808cf995f861ce46da56195bfe0b0e48e3
+LLVM.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/454453a2afb04e3c4d6cdffb37591a3d
+LLVM.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/21bda5f9ceb9d4030121eb9c563233bcdab5b9d1d5b0b9b0fd22cfba3d507ec59ab4c98211d0d5c2cc5ac0b0695d1fbe4707a0264fde423833cd7a461193b556
+LLVM_assert.v13.0.1+0.aarch64-apple-darwin.tar.gz/md5/edbc793469fb7c14af3c33f8584d22df
+LLVM_assert.v13.0.1+0.aarch64-apple-darwin.tar.gz/sha512/a3137f2d2d4847e6db1acfc834e686379cdd80712feb3d36d616f73af473599356ade48c98a865d3c233a59d395d40114083fbd78617001b95ebe363fe12cde5
+LLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/00176b5cd73dea5f9265155574c08dd5
+LLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/a911c597ebfdd66bc5e20af38e2456cd1e2be051642abf939d6290017ea4426ad6c68dd17b8f59b9e5e942dff62bc2627a7d66df0c628c100d4bc948251afc58
+LLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/b494be6cdca661a43cb07e55a185cdd9
+LLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/3338abf24c2dd710d0d356e785f30d72c6a83eff5ff91a7e0113f66a213bc39f241e9886f09d41b3e5ccd56f19cc431565d391a4ae88d590a47fc5ce35b57bcb
+LLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/8bdd207d78547f38d599010272b7beca
+LLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/f349ef36df2dfa76f915353f3e3e1f0a336614c89e33fd9516a604e6d72b541fd83e0862576c3d0864b518e6fa038749a9c510788f1c07148fa5924fda357e25
+LLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/b7659747556ff940eb0093153ad01dd6
+LLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/6e0f04738beb2533cb83891c45d9f3bfc701ec1f83ed1c1e06e885c5b5bb4b51c1b6cffbc0a2cae648df1c65b01a8af378d35cd743e72ae3fdb8047774e8d54a
+LLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/79d6bca4a7660422a43185066350f9d2
+LLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/094a750a1e4f98a39e0e8a30a3a3e55e55317cab5084115ff33714db82c6645d9fa3ce0599f773930e47ef9261805a7e1bde51c1d067d07e2e844147ce180c4b
+LLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/7790a193f05283eb60f2668ddd6e4a47
+LLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/a41689262509178100866588964d5da99736c87e47f23fccaedc53128484e8f24e693858bd82ca63eecdd5af2ef627e3a37ca83df27d103affb015c93c3d2372
+LLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/67a56a20625adfec51210d86cca998eb
+LLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/00a950e6fc1b9447dc63fa0905088d6b8f441fd48e4a234018aa0b9fabdc3c173174fa3a22a6707bafd1f4476b3da436bf6f3a5d388095502e07ae9df4de2373
+LLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/77377f6eed3c5393ed2af8205eef67d1
+LLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/edf79f368c23501883ae850fc5a293dbed4fa4b22da322af43233e55799a34887fc090f7ed3a865c73692be60484c770f754af54edffad800da35e17a9a4bf39
+LLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/f3df2582d0c31fa17ec40a20aab9b684
+LLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/99905914383be921e9279a8f304daec4f3155bd88cf33c716f4a7967441f8ad4c544ded404c946b1f8270172a797cf17598bb8a05118da455e1ee5c24b7d7bda
+LLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/4ff964f982c57cfd279ff101e923fdbb
+LLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/d13eb4378e014d6370b5dc9929c0247ce73dadcac17be446f6aa3db227c466193fa3034252f26ebe06069a6da87120ea6d41ed2087ad3f8a9d64d4c54c8c28d8
+LLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/1324fd002337d2b69abd203bda0d9b6a
+LLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/d0f69d9ff0f997f9c72f70060040825a11b377a3518f2060bcd4a85253b46ed2e8eec773732547dab436f1cffde5883b24e52f75d295cbf3f7096dd0d9c90173
+LLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/313006aa96874279764a7b7c4666ea23
+LLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/59c4a59a7e0184643077a45b5da6c5693123d3678e010fd3ccce88761a4434c1321082e056bf16beb88131bc6a98f40515338e2faa8bf5353e448926d80213b9
+LLVM_assert.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/3333f1d17d5a8fd8ad07b1ef42c50f12
+LLVM_assert.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/cc244bc19588ce041159f6b251141565b31190fd8da44bccb2bc8fb7dab4cdfb6c3aaad166e4e2ffb1796cf28296bb53f94715eeeb110f4dda0852f328fd8db5
+LLVM_assert.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/8aaf25616a93aa95819b2d95de9a11b7
+LLVM_assert.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/cd0c65cf2cac76cb813eee1e87dcdfea0735a01a296a9d9483c75dd1268b1b48d8ecbbb2bb7321954503686754b78c0c0cd07c428a5722e5e3781d6323046fab
+LLVM_assert.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/c13905bd6d398ac5369161a177687508
+LLVM_assert.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/40719ed2c074a3b18b8811c0c0d204bb4c38e007daf3eb09844fd2fe59737fe850e448f4c650412ff611370f767b04b44fd02c4550ec2d120828c5577451ed7d
+LLVM_assert.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/95944a48b2360c17e0a40cef17fee9ab
+LLVM_assert.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/5554935d3932744fb15feb0cba3e86aa98059e037d8c71d3413f2c986e88ec1a58b454d884ac0e0583fa612c546009a27a7287dd240058e79bdbc41f445cfb7d
+LLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/391138eb01ed8be350669e6e22ae9fb9
+LLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/5e25e8b941e60950c5889e1f51c05bc70ea3ca75ab7bc950b674cd1f93a44a7621d1dee89b6f6be6fd0d5982b6618c36e0b4b4ec443d19856fbc8f4832fee6c6
+LLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/22dd78fd71f93c062f090afb96529912
+LLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/21f3008287015ef9d3bbbb76f6b7a320a6a4ec96ba49a126cee97648e6ce48f4dbd4df46f05c551187f3f681ed622aa2392b7c08ac060deea27f7f74ddb2d0cf
+LLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/ee9b9db47c5745d12620c6e52e7fcc6a
+LLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/e15d831588352e6404ea766852d9479dc0d5b78f88eb4108694c4fed8b123a17cd9f4211cef31ff45f4f18274622b43f54c5928c17eddfb2f195ecd59646f5bf
+LLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/c9e22ebe1f7c7e046d142b699b0649d8
+LLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/72e59f38647daafa323f55f6259c9091b39df90b6736f09244e48f2cef8230b03eae689aa8a83c2f0031a9225bafa33bccb5f1badf8fb71d5a4d22efd6de9410
+LLVM_assert.v13.0.1+0.x86_64-apple-darwin.tar.gz/md5/9c5db337206c28fb567e96a0b2f18533
+LLVM_assert.v13.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cc67489ec1c086640c9969eca1d8a0868840cff375775d1c60fdcfbbb84714d960549a5ec314077dec9409eb5fab5bdaecd9e6f4605c7c654a0b52f7b791ffeb
+LLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/a188fad8f09c3080618b6861476b9252
+LLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/2c5f95a1386b5a7f122e2af6d754173512eef72b637c9e3d1250b1bd1b1ad993a9cdadc9e71947c15e09cea308b1f30a84a2ff937fad3693b8b3c84145deeec9
+LLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/d27c6edc49622f79d61face403301f13
+LLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/9b778434293bc2da965ecfa314dca1190677372a61553dc25bc6146ae1dcf553b3b71f473df9c1ff661f17fd56e75ff6715233859a5de1a91e2d1663abaaa71a
+LLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/5c6f3e570a3c3d6af0ebcaed3139c27d
+LLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/1754a7dcf4a4fb9f88e9d5e451b5185ca5d72cf51a6675abe87cd059df1cd8b10388a3f90335e2a5f12864aa3baa7504299b90924439609e66eed24dc60c0965
+LLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/8fc7c0e358d2c98bce2dfce7f3c2f507
+LLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/81f7032f5e7ed45e3d84619c18b4f588a570a3cb36f8ce9792fd41a9442ac73cccb64b4243128a07445f6b412b20048aef98a6501efdd9b526ea0e6a1c803f57
+LLVM_assert.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/f8c750975059dfed1633735f9dbecdf6
+LLVM_assert.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/d01efc6da3de4172aa4c085a6c90d8410ca33d1dc470f1b908b5836a7873c68963fa2fcfbbe24a4a7c6ad016f869084d430e113e71e6c94a8078c46a860b3f80
+LLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/70e2d1e2e84e7f8b19be1f518949d753
+LLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/df5caf19b914f194266dd27d05218bbf11c5d0bfc2cdc589391bb40ebacf7384f9dc691a9d882dec873c8db594c1b8c158e80c1cec60965daacbf42b6486add2
+LLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/f5c5d3f2a55d6c5bf89fd9bfe1166969
+LLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/f97aa158391b35f4f62ba7bc2398382f16f33161384478ddb10c5d64d24ee4d64c6ce9439fa05a997521f2f1d391f8a13f4d5a8b29d14eb22c7bca121d4a10c8
+libLLVM.v13.0.1+0.aarch64-apple-darwin.tar.gz/md5/90c59343fc5a9ad5ffd6258467e6603c
+libLLVM.v13.0.1+0.aarch64-apple-darwin.tar.gz/sha512/97a49af9f0e68f76a10e13813900c2ad0d4575ed31ee703ce86bc19490f6dcc282d47b5b641499fff0b949f5330e1e0e58559f84987e9230b1c5f3f33a4caf7b
+libLLVM.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/ab3c2b357634a2660820012df34414f5
+libLLVM.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/6038edbe7aa305dd35660592dd37fe0ad207e074126766623573be8d7b3b8a06056a626b6da210957264e74192e40bdfc0f396dc9961757dfe6dc8d85a0ad0bc
+libLLVM.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/3f1572194c43db046610d4043b7eadaf
+libLLVM.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/d8be84d5627aa37d65bd81c2c3e0248eb053cc88ce13c38189f53e785d1df7858669045271cea40f1ea6b0516a99b8d4e01d747fe23384c4b39e69c8e509b32e
+libLLVM.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/bb96b1a1ade79e3970759b137d83f350
+libLLVM.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/80f586b763a32ed2efeec2b30c931477fea6f707388180dddbf9147129ab8e3a765ae921642fcc0b75319a5de5af80b358926604d16ab5b162453faa73521db2
+libLLVM.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/7bbc79416781ae9de6983879ba7b6566
+libLLVM.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/db1f5ac2d3e0a44f69a19284fe91b4d06ec438f295db7564160257e10c0de010ba7d2f346277060ec93126ccf9cd2194a87a73a7ddc4141f9dfc0a6a16fd1ae0
+libLLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/cd2cedf55992338a3a72d65fd317a6f2
+libLLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/979069f43f8215adc0c4d527e7341e3cb42faa287b697d4fae781bb9f321c513fcada965796033d01ffd2b8169d8e4936bff6c953a860f758f5eceaad46c8162
+libLLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/5ca3a104123a63acbc05aa5c9a372db9
+libLLVM.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/8fd77092ea76499efd78898f1179e6c37a08c6c161558986459491863344edf6a7baac7c4c8cca45c8d82269ba073b8fecc259e5bfde99f2abd5c56e87344502
+libLLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/4e56e434d66a5bdb3e5a34a99972270c
+libLLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/41f32d057c2be5f771be3ae96c4642401285a1024ce4aabf8ae3255b4557635adec1485c4afa5d57f672c1b5de57cb723f488361e54eedf65a8a43161552d5c9
+libLLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/037399603a44f4ffd2ff98e6b9456236
+libLLVM.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/0e01a8b286f99b98382b35905653c573776c9858465cf21d70e0d5842871aac27fd1b3da759644894e0bdc29351891edff159246cbc523e7ff0a8bcec67e852e
+libLLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/60e8fbacfa5c23f90ddfc4b13917c9f9
+libLLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/7125b3dbeeadb0513ea12bf8bc04f44de98da11a60dd1a1886fd5210416408cc6484ef814f5176e19338e7ba7c8a4a8aef085ebd00f2853056e549d2c6bff55a
+libLLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/3decd9bef6de6b3e5a306fee9f6af2a9
+libLLVM.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/622a60f4f256a802aa9413aed830f57546f28ef7c5a4ff09c3c66736ed958a1b8fa0169de002de26ddef3ce1151fc1352235668f4da51640615339e6d7bb271a
+libLLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/5c8370e3462987d15d0edc21c6e8af9c
+libLLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/eb961730e622074e0f2c05b7729a33d088cf084d2162e8a428d3f763d39b782bc5d341a60823d1b3f4fee9a03a995c0ff8251e2cfcd0fe13f8e09b60c3fe231d
+libLLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/6e659916b90b66cec5fb1f1d424eb177
+libLLVM.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/2489c0d76d46a10479eb2197324dae1556f330848f8efbcd545e155d871652ea0692fae2063665f3bfe02ab165567ae5d7dbeabf287fd38e180141ed9714f29f
+libLLVM.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/569dbeb437cb438636244ffa0248f2f9
+libLLVM.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/6dc44b2458dcbd59d695f20d4786a39a92d7affd2cfd8e25536f0fcf46489930c7315887e2f611d0b9f27ac04ea1bfc1ffc9b770dcb8328cfcccc8f419705466
+libLLVM.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/2e9e078ca524ecf96a801f3361e47798
+libLLVM.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/5833103547bea7614447ad27e7bfae7f7fa4e3bf6bfe49301d57974f50de26c8c43747aff60504cf923958b53189030b4016b8d381244f92be8a3cde82147a42
+libLLVM.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/babec2df18c459f4bd068c711e4f3fcf
+libLLVM.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/c3660a02a8215a0becb17d6e2ec2317e65d3c312172048ab6d867de11b3c618f4d31e8f215b349a049130fcfbe7b59f018e12c89138a1965704a84a403b3995c
+libLLVM.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/3aa2b9f877a34a8ba83fd03f9aff59ea
+libLLVM.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/1e02a817fef96628ee4ab2ed62bcd49156d7df5a61463420e0e8d9c208d242994d09d6999d6ff223b46de516b8b3bc3448d2807dee422128d729f44594dbaf91
+libLLVM.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/767865e3ed6fdc200ac9b6ae569d7fc4
+libLLVM.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/034904561e6715b8ee1b5d9f5d3669f3765cec05357e21de0e1b875346b8dfc199e545d87747f1676cf16329f4122b4e574eaf4bb91573b9893ff72dc7a0b33b
+libLLVM.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/be8fcb1eceeb0b0b1064bfd1459c440c
+libLLVM.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/1b8011e432fd570a34a90bb449082ca086a311159b3b699a9a176e9f7dfa916bfb58e06f82a4f1e40c7896d1781acfed40eff77d447070186f193f2605a2521a
+libLLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/bd14e02f94880856d9cbdc531bbc2d9a
+libLLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/4fd86b2562e96ccf8327c4791be34a1c03be7f96382626201076104e3cf04226e76fcb628f36e977487f8c4a717f4e25626713f8e2967b42a335a4cfa8836909
+libLLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/2da035de66d4e2af430b21c5ff04c8f9
+libLLVM.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/d86ed30cc3e3a42058436059f8aaa74b910ebe8ed8df65add637214e21118173f7863e834c7fc87f71b9d7014643fc129363f97e5e8e4e9694da6b31e9e21970
+libLLVM.v13.0.1+0.x86_64-apple-darwin.tar.gz/md5/513383b4044ac84dcde32afee478c1a7
+libLLVM.v13.0.1+0.x86_64-apple-darwin.tar.gz/sha512/552b09934c77bc5d44057c6a47fc5af413a5ce636a6f79308a8a304a4f5ef6d9714147d7babb9c0fe207d7526086834583cd77cb2ed3cdbce07978d4e1f2be3a
+libLLVM.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/732f0349aa40bb2b81ea78bfe0c41f96
+libLLVM.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/8ae7d1c7b38dee47a9e8758a11c27da897cac1ba0766a300018b72dd5263299bce61fd93ed58f95b6d3afcb70be091503d78613a346e6e1bfda2261af35da895
+libLLVM.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/07ef28642d4d8e1fb0557937f55e2106
+libLLVM.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/aeae745dccdc86d3af6c2332d26f152683f2b9bcca4942de880096e6d4e55457bb5bf75d51095db57dbf44e222876bd88292d9aeb06f5037c4d2752593a30c79
+libLLVM.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/db6f67a674141e999fc113a3a016fcac
+libLLVM.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/f64558e48b04f36386c1a908ed08d8975f385e4449a98b3fad3068fab760956a15c77af0f1bfe9443781779b3856c87aa537062abe608b2b33eea8a26f8a0d72
+libLLVM.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/d0ab18c49c5bac39ba7e42f034d73ed7
+libLLVM.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/8b012d61d7040a14feffc81346fae3034905f45f04ecf67ad63f589097a2f66f15bce573627145a4c20e9b96fb742773c31ae628c5ff9ac0b80b212d4180973d
+libLLVM.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/ea4034d5e3168a88b2ec93ce19ef4368
+libLLVM.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/c88d998522b35159589dd153fbdd4d0fe318af5b7bd21ccb76993315e7cb88237b86c0b1d3926112b82de6c1a01a568db3e4e7ab782b377169a9b4ce16362859
+libLLVM.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/3abb0ab78813dde21bdac01c2abe0f56
+libLLVM.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/f0e9f8f5b51bd88a3bc44a31cfd17ee5fee5693e58335e15e75a02edb633eccb20b4b550272f62fb94accf0601c0ffeda90b651386d5f4533f53efcaa737b62a
+libLLVM.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/6cd7c931f078cd4e7fdaa7100f849fdc
+libLLVM.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/5d1627125bc08887a6115d90e9fc82b489e1181508b949dae5d4bae556cae6de21d2db7a70f72f28af79db9b3e24e410f36edf7e1b8e6bbeb58f88c579739f12
+libLLVM_assert.v13.0.1+0.aarch64-apple-darwin.tar.gz/md5/106b3e9243647066dea672db53433830
+libLLVM_assert.v13.0.1+0.aarch64-apple-darwin.tar.gz/sha512/443fcf037bf415e8fc80ba54549d7562cdcff4a8b9f3904f7f9340dbca2c2f696812205d65dcd243a0272858e33ff5990eac25b67dfafd4bb43432cbe7894c8e
+libLLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/md5/96a08126d18c388cbf465823180e50d0
+libLLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx03.tar.gz/sha512/764cd65e04e3366eaa8b37464e446494d7da51fefbdb036ce1694d8e2ac690464a12c4f02e8e0001f513fd96df3387bf947d786309faa3c2ca105f2a962cc703
+libLLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/md5/f0cd12f061e008b0fffc8f5a0e59f694
+libLLVM_assert.v13.0.1+0.aarch64-linux-gnu-cxx11.tar.gz/sha512/e16a9ed2da79448297f89a0e1d85f9c482aa9f181b5b1e10b00f8f8411f46fde85b0ff6c1b5fded0c1ca05f22d578b9f1fc3b57d2f2e51adbfbabf0bc36eeca2
+libLLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/md5/2cb2998d7da32b8b0ca5086c1b1c65fb
+libLLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx03.tar.gz/sha512/cec31970c67541ff979bd94780f5369c72a63576eeaa2803598ad453e72c273f238eff492410b38c372a616e992ab02b229232e5e23eba0d15a0a61a23f179ff
+libLLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/md5/3541fd14098d5d673a75b39d1171842a
+libLLVM_assert.v13.0.1+0.aarch64-linux-musl-cxx11.tar.gz/sha512/6320d5e3b8b3b4839e90ae66c0d5639816de9bb74e463125ad05566ca394733bc83fea9a4bc49366a0ee6e31c83acbd5408d388cfd957b6918b4986d639f104c
+libLLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/md5/11b71aa8a64a8580dd297a72c6b44303
+libLLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx03.tar.gz/sha512/4468015d50d8cae071b7abcae525e2e2c05eb6cbaa138ab59c9c2092b4cd3c9616a0b22a222accb0c9d0564e975587e638afa892d1cd480a2f5db7295bf510ea
+libLLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/md5/106a99c91928b5dcf7f214bf9f0a0b9f
+libLLVM_assert.v13.0.1+0.armv6l-linux-gnueabihf-cxx11.tar.gz/sha512/94da8219ad4cf7c1155bea4878d6b6306487e9bcd7e3cd4a5f88f0106dd60fe8a5b89edf62f6db6fafdaca728b0195bc0032c3a404119930c7b5e0c7443d20c9
+libLLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/md5/f9a037108728810c78636e9ca5bdfd7f
+libLLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx03.tar.gz/sha512/2d04f17e72f505ad908433d3ee9781480bb90ea78a405c892c02f4af899a0bcaec9b8c6e8e1554aaf4241912532db59cb1719edd328edf6a75f65393a1db32b6
+libLLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/md5/6e0d147ccab5f63b61b330d6e4e261f2
+libLLVM_assert.v13.0.1+0.armv6l-linux-musleabihf-cxx11.tar.gz/sha512/43aece34e5be174628e7e645d163a442e379f10bca6988f768d3f45e2f449b0262e3a789cb71dde5431c7fea4305bffed591009c467a902bd5e079c9e0797035
+libLLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/md5/ffff6ccd75cb9e9cc59e0fef9133efd7
+libLLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx03.tar.gz/sha512/8d7201223badb90ac932e31f63b46af7bf004af32f1316e2552d7646ebd65fc69bf3d267ede2502f743f0d41e567d1448a1550c942d223e218678bbaba3d39da
+libLLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/md5/ec045bb81ffd9d9a4fa34990018e4c8d
+libLLVM_assert.v13.0.1+0.armv7l-linux-gnueabihf-cxx11.tar.gz/sha512/bcdfb4bca9088bb6d02755fb50e6531a4c7414123810e75d13ed1f71a85aef709a8164110e5d21769578ff6a43b659476bcf274d3df721f9c49183f7e3683169
+libLLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/md5/92d538e671e3bce0619181499198d6bf
+libLLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx03.tar.gz/sha512/8ef2004e7cf30327ea6ab91cf89e5bde22a378439870811969b79199ca9ddfa5825b92241cfc8e606b893c17da2a6dda665ed6dc09c34ccb95e8e3a843bcf059
+libLLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/md5/988828fe05b1564f43218978438b6395
+libLLVM_assert.v13.0.1+0.armv7l-linux-musleabihf-cxx11.tar.gz/sha512/97aa19516ada176a689118f27c6be1423316bc4f047812e1b8c0a4037b227fa20b0398e63ff764de0b75174d6fc41c656854de201121845ea66917551003526f
+libLLVM_assert.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/md5/38434f9d60b437c3ca3216696f194e8f
+libLLVM_assert.v13.0.1+0.i686-linux-gnu-cxx03.tar.gz/sha512/dcc7f39f46268883a6890d70bcab0efb5c9b752ed724d0a1ec0379da0d090599db47d82d0ddd9e8acae0a351df4caee2cd0f7283e84439b702788e2d4f3a9588
+libLLVM_assert.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/md5/7fbe5817d732c50a59736d4c048effd5
+libLLVM_assert.v13.0.1+0.i686-linux-gnu-cxx11.tar.gz/sha512/aeb7090365053c653273e0d592485c7bfba1e63f758ecf57545261540ee045df9fb2b58b91658cd087e78d15f3fb8ecfd280b64ab8af8f04dd7589085d8e1ddb
+libLLVM_assert.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/md5/7cbb0d59fec17b98b633f47b7eeb80e6
+libLLVM_assert.v13.0.1+0.i686-linux-musl-cxx03.tar.gz/sha512/2579ebd9b9b50fdbf9f3b38c0c2ca22312bdf6712a0d3c6c51058691107cb05dba9e5f4d5b27acd165f74258eb493d1680a320ed4c821943efcd2f600f68e44f
+libLLVM_assert.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/md5/354dc055ea15b8e4c866fbe439b3ec83
+libLLVM_assert.v13.0.1+0.i686-linux-musl-cxx11.tar.gz/sha512/2ef407435ad00d605c28b255eafc0b748d26a868e58a4508431a427b4aedb5c4182268d95dafda000f3ee190ce0b2d32a488641a627834b6b3ce22c171b039bc
+libLLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/md5/27f88f260b1175132be84d00834ec825
+libLLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx03.tar.gz/sha512/b904c91bca86286db662b4889dd4815a87482aeb20c49ac0e59f6adda4524a8f6385277f9aee24197aa1539096baa7445ff3caa6110432b0861966872234868c
+libLLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/md5/0e1e5267c63088088065a69846fac5f3
+libLLVM_assert.v13.0.1+0.i686-w64-mingw32-cxx11.tar.gz/sha512/ecce393ce899991f7eec3ca07887306bb002bd54270f0ccf3f8e93318024b9ea8024c8151e639c71d719c956bfbd3ed5c38c0b52f1cec40ea893d2da7b6172d3
+libLLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/md5/090a448043257587a7b9001162b0d982
+libLLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx03.tar.gz/sha512/67e5bdaa89ad657f98bbe9012b06e89a6ee30306afcd09ab46e518d7b552bcef47fc37cf166259bffdf98cfa4d7b1cd7e04851de1fe3a16507f7b354067c1393
+libLLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/md5/5eaa7afa170aa19b9f31183c47d82354
+libLLVM_assert.v13.0.1+0.powerpc64le-linux-gnu-cxx11.tar.gz/sha512/147f5a6ab233b42277e42ebab197616a6a0b0a265128fbd619b20bdf1b2af6e0ad524c990e31a5836dcdb2c0c500657021f974d91de7e8b02a761ffd29bec624
+libLLVM_assert.v13.0.1+0.x86_64-apple-darwin.tar.gz/md5/90f43cb235a3525ade4e250be1a0a7f6
+libLLVM_assert.v13.0.1+0.x86_64-apple-darwin.tar.gz/sha512/9ea0b79a16b4697276915c7dac9dc4a426213f48e4c1e1db2705c5810aa3b17ecbd9dde2ca562b472be65f7063d85e239d4948b9743407c095c910e97ae24bf6
+libLLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/md5/12d3dde26ccf6aa21fc13a2dd9aa3768
+libLLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx03.tar.gz/sha512/b8b362345fb550b8af61d851d9918413ff23f1f7b78b7817f103384af110dca3383d4c8067a56748cb97fca7d1f75957b0dd2ce323d61a56cb9a266a378361b9
+libLLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/md5/d1673dae2652f131c6ebee2ba257f629
+libLLVM_assert.v13.0.1+0.x86_64-linux-gnu-cxx11.tar.gz/sha512/47a7f634256a3df1f7ff56875ce969a550b217cfc897e9796b60fc4c45d7c4b1a22ba56a33cb7932ec40c0e987d407678234716447ef51123c5060c713a61948
+libLLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/md5/6454e1cf23e77ced847cd623995a234c
+libLLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx03.tar.gz/sha512/30ce182636afcdccf265ffec468c9954434d3f0a135878cb55698799cb829c138e828a28b0493d8226d80a36d00250be0c0dae083efcd63b0e939f5fb75b1f6e
+libLLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/md5/cd24ac0e5a37b5db288b265a90f5fe9f
+libLLVM_assert.v13.0.1+0.x86_64-linux-musl-cxx11.tar.gz/sha512/d90aa1a0e4edb57e2a940d63ae28e198c1e515e7892008f1b04289828be466662aa38596c02884dd787798c04d00ff6314f884be5a859287f840d18f79ac8c3c
+libLLVM_assert.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/7164700b24a94828b17abf8aa2e44477
+libLLVM_assert.v13.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/5ba54ec75cde0df60253efe694963b7a2eadff5f23028b2cb8ba612530acfc148cfe738d2d2e65bf9dcc419aa9998bd8544e7852167300ffdcebecfd0ac6821e
+libLLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/md5/a17f42d502120079943a1695128ae7f8
+libLLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx03.tar.gz/sha512/e4f6a370c96c29ba6bc5e979fd3660becdcb95d5c26299e4f7f31d1ca089d4acf6915371e1452dc538551aed2db4beaa2903dddb35e72a131f4a5262cd266334
+libLLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/md5/a458b0572d77d3d79b66a53e94a6436c
+libLLVM_assert.v13.0.1+0.x86_64-w64-mingw32-cxx11.tar.gz/sha512/43b6ab2becd9b3179f91f2f856854d4795e53c4078dda26607e5b6a8dfde37cdc28f9fec6c0ca9e0d0d8de5f2304d5775d5c6b7a03c0f6feb2b93e43053997c4
+llvm-julia-13.0.1-0.tar.gz/md5/34edc9f707d86fe8c5758b0ae8c35206
+llvm-julia-13.0.1-0.tar.gz/sha512/0d55c1bf3c581551faa077aab7046d1f020e8775ed16f1fbd8ccee65bc8f43173504f5ce1215227fa5e565f2804f8772e2cda039bc333bb23677067a4a3f9f87
diff --git a/deps/checksums/llvm-10.0.0.src.tar.xz/md5 b/deps/checksums/llvm-10.0.0.src.tar.xz/md5
deleted file mode 100644
index 603ca6a971ddb7..00000000000000
--- a/deps/checksums/llvm-10.0.0.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-693cefdc49d618f828144486a18b473f
diff --git a/deps/checksums/llvm-10.0.0.src.tar.xz/sha512 b/deps/checksums/llvm-10.0.0.src.tar.xz/sha512
deleted file mode 100644
index 29cefdd9c264fc..00000000000000
--- a/deps/checksums/llvm-10.0.0.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7dc961aacee3a01ecc002ff2b688a2ef50661856d2abd5ecc90566ffcad7566e4976736cd339ea96592e452cd5a17aaceba9712b2effec805661cca8ff020ee7
diff --git a/deps/checksums/llvm-6.0.1.src.tar.xz/md5 b/deps/checksums/llvm-6.0.1.src.tar.xz/md5
deleted file mode 100644
index 0c7151e84b867e..00000000000000
--- a/deps/checksums/llvm-6.0.1.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-c88c98709300ce2c285391f387fecce0
diff --git a/deps/checksums/llvm-6.0.1.src.tar.xz/sha512 b/deps/checksums/llvm-6.0.1.src.tar.xz/sha512
deleted file mode 100644
index 2e0ff6e648c08e..00000000000000
--- a/deps/checksums/llvm-6.0.1.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cbbb00eb99cfeb4aff623ee1a5ba075e7b5a76fc00c5f9f539ff28c108598f5708a0369d5bd92683def5a20c2fe60cab7827b42d628dbfcc79b57e0e91b84dd9
diff --git a/deps/checksums/llvm-7.0.1.src.tar.xz/md5 b/deps/checksums/llvm-7.0.1.src.tar.xz/md5
deleted file mode 100644
index e934f0ce64c992..00000000000000
--- a/deps/checksums/llvm-7.0.1.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-79f1256f97d52a054da8660706deb5f6
diff --git a/deps/checksums/llvm-7.0.1.src.tar.xz/sha512 b/deps/checksums/llvm-7.0.1.src.tar.xz/sha512
deleted file mode 100644
index f6f980b724b696..00000000000000
--- a/deps/checksums/llvm-7.0.1.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-ac43a3cb71a53deb55e3693653847cf20bf6f5d9056f224e6956c96d63bc59ebee9404f088eec9cabe65337b4607a905ef931354b373cf64e0004c6905a6b5df
diff --git a/deps/checksums/llvm-8.0.0.src.tar.xz/md5 b/deps/checksums/llvm-8.0.0.src.tar.xz/md5
deleted file mode 100644
index 61146bb93d68d5..00000000000000
--- a/deps/checksums/llvm-8.0.0.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-74818f431563603515a62be1ee69a142
diff --git a/deps/checksums/llvm-8.0.0.src.tar.xz/sha512 b/deps/checksums/llvm-8.0.0.src.tar.xz/sha512
deleted file mode 100644
index bda5127b6a0c1b..00000000000000
--- a/deps/checksums/llvm-8.0.0.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1602343b451b964f5d8c2d6b0654d89384c80d45883498c5f0e2f4196168dd4a1ed2a4dadb752076020243df42ffe46cb31d82ffc145d8e5874163cbb9686a1f
diff --git a/deps/checksums/llvm-8.0.1.src.tar.xz/md5 b/deps/checksums/llvm-8.0.1.src.tar.xz/md5
deleted file mode 100644
index b76ea8119cb5ef..00000000000000
--- a/deps/checksums/llvm-8.0.1.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9a3b63df01c52556f7afb5617934e79e
diff --git a/deps/checksums/llvm-8.0.1.src.tar.xz/sha512 b/deps/checksums/llvm-8.0.1.src.tar.xz/sha512
deleted file mode 100644
index bdc9a5e10793eb..00000000000000
--- a/deps/checksums/llvm-8.0.1.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-82e120be5cabdfd5111aebbea68a663fe229c8861d73802d6ab09a3bf48f60de333e07e61f8fb61beaa14ac2bea24fcd74fa6f761acaf62469f536b79fcb1e16
diff --git a/deps/checksums/llvm-9.0.0.src.tar.xz/md5 b/deps/checksums/llvm-9.0.0.src.tar.xz/md5
deleted file mode 100644
index 92e662edb11fb2..00000000000000
--- a/deps/checksums/llvm-9.0.0.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-0fd4283ff485dffb71a4f1cc8fd3fc72
diff --git a/deps/checksums/llvm-9.0.0.src.tar.xz/sha512 b/deps/checksums/llvm-9.0.0.src.tar.xz/sha512
deleted file mode 100644
index 4fc50263cb1d3e..00000000000000
--- a/deps/checksums/llvm-9.0.0.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-1bb3341e1d231559b948f1505b33c2e2e03989f9b8bbfef0e0cdaff5ac43f85574c9ec5ac53399b914f497d6899425d861411024e8d7e1d1a338c1c6951ac658
diff --git a/deps/checksums/llvm-9.0.1.src.tar.xz/md5 b/deps/checksums/llvm-9.0.1.src.tar.xz/md5
deleted file mode 100644
index 8d8f5fa3a54dfc..00000000000000
--- a/deps/checksums/llvm-9.0.1.src.tar.xz/md5
+++ /dev/null
@@ -1 +0,0 @@
-31eb9ce73dd2a0f8dcab8319fb03f8fc
diff --git a/deps/checksums/llvm-9.0.1.src.tar.xz/sha512 b/deps/checksums/llvm-9.0.1.src.tar.xz/sha512
deleted file mode 100644
index 8e64aa8257aa7b..00000000000000
--- a/deps/checksums/llvm-9.0.1.src.tar.xz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-bfb6960a4dd1e18f4005f324f478a781c69e8ec7c20569d9b243fcb9372dc7733b254f26c683373537990cc9c109c78eaf0f65449629ee17caca1bce9b9ccccd
diff --git a/deps/checksums/llvmunwind b/deps/checksums/llvmunwind
new file mode 100644
index 00000000000000..678ae7b0c3fc4e
--- /dev/null
+++ b/deps/checksums/llvmunwind
@@ -0,0 +1,34 @@
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/md5/b95ad4844e649bf46db43683b55b9f4f
+LLVMLibUnwind.v12.0.1+0.aarch64-apple-darwin.tar.gz/sha512/15e0996aebe6db91fe58121001aa7ea4b23685ead3c26b5d89afae34b535e34b4e801a971f4854d8e1a1fbc805cece06272470622eef863e225358113a127913
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/md5/6d8783dc9b86c9884e0877f0d8ac4167
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-gnu.tar.gz/sha512/d3b0c81498220d77e4f3cc684fb2cc0653792c381207390e695ac30bc74249f96a333a406b2cebdaca14e0b0a27b188cba6209bb5c1cbbb5c184d5626dbdc7a0
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/md5/052a35e879d52244e4b0804be875a38f
+LLVMLibUnwind.v12.0.1+0.aarch64-linux-musl.tar.gz/sha512/d1b34fb97f9928e046d3131a050454710a93d38e60287b7e3c92f179f436586d3230cf90b0ca0eb8a3f9ef89fef7b1ffd7d52871645dfa233a8b07ca87ea2ee4
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/md5/1ad96a03a5dde506b5c05773b1849ec4
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/82306fb7b920fa7c71bd53b23d6915e7f256e8da9679cc926a53bb0d879f1f4469f43efe556ca32c9ef59e27b435572c7b39859090652635db4eeefdec0d1685
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/md5/6a24fcd3a4dc3b1a98bb7963b1bb4930
+LLVMLibUnwind.v12.0.1+0.armv6l-linux-musleabihf.tar.gz/sha512/9ba6b83ccec061a1e5260c807dc8afd6e18799431b25a7e65b97662cc4db02509d02ea07fe12025d80914cec7383624b1c8fc9add46511c668e184ede263ac52
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/md5/09f1bfcf58a4124561553ab5005f9538
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/b0907cb857131183ffc338780c6c6dd1d48bf0ba61c3da1b8f20cf9a943373173b621cf9b2e8f1fbc657059a896b84aa025e6d4f0f1d1e8b623fac3e96541765
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/md5/19158bcfae716b26f924d67c4e719342
+LLVMLibUnwind.v12.0.1+0.armv7l-linux-musleabihf.tar.gz/sha512/a90be57990b6699cb737ba96904e94e1f082601ca9d01e670f025b5500f526980741921c9cf672accab78cb5327714ab6ecdbb875174088f0773ebb627a98819
+LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/md5/ba75556eb96b2bcdaf73ff68386d3bc3
+LLVMLibUnwind.v12.0.1+0.i686-linux-gnu.tar.gz/sha512/612fb765695b7aae11ef29608eedf8b959f60c021287a67b03a2a0f57a5814001ffa9b261c9d60d5f3d0582c06c2b41f75fd3afb66a045a248bd43d29e304c97
+LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/md5/2fcbceeb1bfde29be0cbca8bb6718bfe
+LLVMLibUnwind.v12.0.1+0.i686-linux-musl.tar.gz/sha512/58f281cfc70b3f8a59cf4faa7732824637c811ddc5ea6a058f294f4c3ed4fa6c8ddab5c007567b439f2854635cf4fd146284059bfbc73e7006000ced9383f705
+LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/md5/153c028d97dceb6924414a7a9a137e1e
+LLVMLibUnwind.v12.0.1+0.i686-w64-mingw32.tar.gz/sha512/7ae1f197600eabde9036ae58623de34a6d25636d7861777e324eb97902f65e26c6f3775e757178f8914b0cb6c2e925413f5ffc6abc9b6138470dc9e67a17f212
+LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/md5/c08a6cf3e1baf156eb05003ed4e9ebe9
+LLVMLibUnwind.v12.0.1+0.powerpc64le-linux-gnu.tar.gz/sha512/f74e44986622329990842cb3ff549ff9254c81863d8bee468b0e58b7621067e7e7f7f18e4cbeafad6a05e0c107323de6828a78dc7afbcd7cd1892383ff417968
+LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/md5/caf151150e56827be09acca6964d2b18
+LLVMLibUnwind.v12.0.1+0.x86_64-apple-darwin.tar.gz/sha512/cb3e7aa71367ec4a115bccc2e8ac6bd5d9f22b3935b3889eee1fbf7303c5f553d7d3108977bc1f6c9b6917a6ed9e10bff211fd56b8169233ceae287b112894c2
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/md5/d95874cbf6f8b55bc314c3968a6a4563
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-gnu.tar.gz/sha512/4986a8d9cc9d8761a99a4f02d017b424484233d4cbe2d4f49ccd371591384b1b8d1c4d31cb908505b86b00f2b164568e57751dd949d91af203ee4a582971798a
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/md5/89077d871e15425b1f4c2451fb19a1b2
+LLVMLibUnwind.v12.0.1+0.x86_64-linux-musl.tar.gz/sha512/b65a218b05ade2e2d1582188897b036a4596d09cf65558f178c49c1a1a62b7d992b1d99fbe86a027dc83b614f178e6061f3dfb695b18a8e2b6bf76779b741d96
+LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/md5/54ac594b4c8e7f261034a8829dad5e34
+LLVMLibUnwind.v12.0.1+0.x86_64-unknown-freebsd.tar.gz/sha512/a43756afd92081e6dd7244d162862fc318b41ca110a5e8be6e4ee2d8fdfd8fb0f79961ae55e48913e055779791bd1c0ecd34fd59281fb66b3c4f24a1f44128f0
+LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/md5/83cf8fc2a085a73b8af4245a82b7d32f
+LLVMLibUnwind.v12.0.1+0.x86_64-w64-mingw32.tar.gz/sha512/297a5c7b33bd3f57878871eccb3b9879ea5549639523a1b9db356b710cafb232906a74d668315340d60ba0c5087d3400f14ab92c3704e32e062e6b546abf7df6
+llvmunwind-12.0.1.tar.xz/md5/4ec327cee517fdb1f6a20e83748e2c7b
+llvmunwind-12.0.1.tar.xz/sha512/847b6ba03010a43f4fdbfdc49bf16d18fd18474d01584712e651b11191814bf7c1cf53475021d9ee447ed78413202b4ed97973d7bdd851d3e49f8d06f55a7af4
diff --git a/deps/checksums/mbedtls b/deps/checksums/mbedtls
new file mode 100644
index 00000000000000..723b9012bfe003
--- /dev/null
+++ b/deps/checksums/mbedtls
@@ -0,0 +1,34 @@
+MbedTLS.v2.28.0+0.aarch64-apple-darwin.tar.gz/md5/ba33f960c7bcc3fda818c84f5e716df7
+MbedTLS.v2.28.0+0.aarch64-apple-darwin.tar.gz/sha512/3878531424317954417d09090b0a7618c6c0a6907bb04db34aef37d55a033972371455fcffca548ac03be41c0b0d1f8e51a9fe6e8f8fb4d8ef4fcbf91f15b3ea
+MbedTLS.v2.28.0+0.aarch64-linux-gnu.tar.gz/md5/9e7c78fc7c39fd19dcb170d57c8c0ec6
+MbedTLS.v2.28.0+0.aarch64-linux-gnu.tar.gz/sha512/59eaeec1a772265e62fa4049e0bc8c96cd7403d954213ac6098921acf6e128b624d6bc1ba5c6062c88ecb92aa8bf9d0a06e365eee241b6516ef0bfe2b4c47188
+MbedTLS.v2.28.0+0.aarch64-linux-musl.tar.gz/md5/44f939956834d5d8130ccb3bd5962b0c
+MbedTLS.v2.28.0+0.aarch64-linux-musl.tar.gz/sha512/f9797a44851222c005fd4068df6e0bcee68133c9a48e19e16d188b8a6927be56c620fec83264398d682eb5c89b7f01683e5898d3cbcb7aecf53e5ce678464db6
+MbedTLS.v2.28.0+0.armv6l-linux-gnueabihf.tar.gz/md5/fc07035dddd51e9c57e62edfc3fc5691
+MbedTLS.v2.28.0+0.armv6l-linux-gnueabihf.tar.gz/sha512/ffb707ba7439050862654316b4388f52e8bd09bbeb7076cf6cdc924cb60c61f871c01ccfe14e1ae1e62a5733490487324ba60e8545d60902f3317039264db83b
+MbedTLS.v2.28.0+0.armv6l-linux-musleabihf.tar.gz/md5/fc54575519130bd468ee4dbe23da0ea9
+MbedTLS.v2.28.0+0.armv6l-linux-musleabihf.tar.gz/sha512/d4b9e1bd8877f7d93d1b4e0d1c4c3d4e5d2af6920e39222667e689ec84cf9817988c91a826755a734a60ce05fed913e5421b8aa9980f257450da7f51c5e9342a
+MbedTLS.v2.28.0+0.armv7l-linux-gnueabihf.tar.gz/md5/0753a99f4645ba7e1ceb27a03c65a107
+MbedTLS.v2.28.0+0.armv7l-linux-gnueabihf.tar.gz/sha512/a7a65338ee6f93117d44975651d77c351f0c919a3ae2eea6e220719dd084f71617946adf04a08a82d55c22af0275d21fce3c692becf87ccf2d932c8aa32af7af
+MbedTLS.v2.28.0+0.armv7l-linux-musleabihf.tar.gz/md5/ff335caa1cec22366cfa2c2bf87f61f7
+MbedTLS.v2.28.0+0.armv7l-linux-musleabihf.tar.gz/sha512/a3ff7d53b45134165347dec209bc27f48be984b4fb58ddd54286a146b837d038ab21e22033f1e0713d359c72adc0b97e979532ebaa734495eb88bfceaf3c2155
+MbedTLS.v2.28.0+0.i686-linux-gnu.tar.gz/md5/c4c9728ee9d875685765eb4c9c3bf731
+MbedTLS.v2.28.0+0.i686-linux-gnu.tar.gz/sha512/214142ee7ca3a5b447a97928ffcbe0389fbb8c1fa68de387656e5c0e4406f02411e4183fb051b2107600b222bd5279b9fd3a5aec43a9d97a9556b08c5338cb7b
+MbedTLS.v2.28.0+0.i686-linux-musl.tar.gz/md5/2684f2bc8a04234ae67603150e6d0917
+MbedTLS.v2.28.0+0.i686-linux-musl.tar.gz/sha512/a533afd26893464bee62dbfa9babf6e4e1119a4be31ecb242e2ff28f5f6e3a3969057e2ce653c98c1b8d2a19e340df7a17dac8693fce270399df92cfbf3a32ca
+MbedTLS.v2.28.0+0.i686-w64-mingw32.tar.gz/md5/f205fd351e94f42cd38d34d3eff6e69a
+MbedTLS.v2.28.0+0.i686-w64-mingw32.tar.gz/sha512/cfdb819d3e6fa9ce3985e29ac733c2af6c988230ae49bbdc13f0fc234e82444d17ce5da4d3b6d8cc6ac45ea4a999f0ce03ac42533223c87bea066a371487ef1e
+MbedTLS.v2.28.0+0.powerpc64le-linux-gnu.tar.gz/md5/41b1f61ebda30a8e8f02dcd955ae0d40
+MbedTLS.v2.28.0+0.powerpc64le-linux-gnu.tar.gz/sha512/25b62106404cb3b9be3e0f778ed953bdcf9d18cb289be823f97f7a1759012c84cfe7240fc936f2e6e858273ce2022d75ecc2554d5696cea110eda6d059362416
+MbedTLS.v2.28.0+0.x86_64-apple-darwin.tar.gz/md5/e7b286dac94bef06915930180b2d3bac
+MbedTLS.v2.28.0+0.x86_64-apple-darwin.tar.gz/sha512/a2acaacb77ca6e2704144d8d99e51df49b1fc69c8751e43973e0c41219d023676d35ae05bd4ff7a3680dc0edf5438e51b67baa76f5b78947560dcc420623a3da
+MbedTLS.v2.28.0+0.x86_64-linux-gnu.tar.gz/md5/39662265088efadb142fdc7255a0b7a3
+MbedTLS.v2.28.0+0.x86_64-linux-gnu.tar.gz/sha512/a3648c78bebf4c024ddf491965cb7707df887ce10dec6f9e42eb6493bc7d1220e5b23c53f5e4e73dfe94e8d8dcf35ffc6860d1992deb9b63a0c4691d4167e59f
+MbedTLS.v2.28.0+0.x86_64-linux-musl.tar.gz/md5/1fbe9f2593bc11af031075b58a108bc8
+MbedTLS.v2.28.0+0.x86_64-linux-musl.tar.gz/sha512/d185ced64d471fba9ae1aa495b2eba0e60738e8e5ef918670b1c40cc8981389ecd48e4f17506229bafab4a11f7a257d3d544cfe87ad198482778931c2a7a8aa9
+MbedTLS.v2.28.0+0.x86_64-unknown-freebsd.tar.gz/md5/26beed62ee2abe8c6e52c1dbddbe0b1a
+MbedTLS.v2.28.0+0.x86_64-unknown-freebsd.tar.gz/sha512/f04a417d99e3b908383d3c14cf8512b2f13e4b226d07235e2334090aadb6aecce40a23ae8f8df9c0ed9618707e839aaac6de64d5fee6d7e3955b290bc564d3a2
+MbedTLS.v2.28.0+0.x86_64-w64-mingw32.tar.gz/md5/cc55fe5537719aa8bf3bbee981c01413
+MbedTLS.v2.28.0+0.x86_64-w64-mingw32.tar.gz/sha512/3436647e81fdb9db138063229f20f47e2c8405e6379ca3e7cf38fb9fde84d2b6618a5f29b8df19cbffe75af7f99e00e9583d67be7b53dcce27bff453b96dcf13
+mbedtls-2.28.0.tar.gz/md5/d64054513df877458493dbb28e2935fa
+mbedtls-2.28.0.tar.gz/sha512/907867edf532ba3b099f4fb7ce31f5773ceceb072a8d067b1d830e879d541f92f401d64f13bbe6b4eb0845e58bb765d7d28896be414bb0fc7ac5b3876066be5f
diff --git a/deps/checksums/mbedtls-2.16.8.tar.gz/md5 b/deps/checksums/mbedtls-2.16.8.tar.gz/md5
deleted file mode 100644
index 7d68d4c3ce06f2..00000000000000
--- a/deps/checksums/mbedtls-2.16.8.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-20867c94a00c2bc311274babea49b9b0
diff --git a/deps/checksums/mbedtls-2.16.8.tar.gz/sha512 b/deps/checksums/mbedtls-2.16.8.tar.gz/sha512
deleted file mode 100644
index fcd22b860b4cc0..00000000000000
--- a/deps/checksums/mbedtls-2.16.8.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7bae402e88acc4940b070b99fd6f4fbbbc059779efdd6bb5bc4eeedada42e1bd76bcee53f982aa58488c897b808daeb560e5fc1e508eecaa580f780b318cb765
diff --git a/deps/checksums/mozillacert b/deps/checksums/mozillacert
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/deps/checksums/mpfr b/deps/checksums/mpfr
new file mode 100644
index 00000000000000..0eb73ceb693a22
--- /dev/null
+++ b/deps/checksums/mpfr
@@ -0,0 +1,34 @@
+MPFR.v4.1.1+1.aarch64-apple-darwin.tar.gz/md5/157265257536980394e0a025b9d28de1
+MPFR.v4.1.1+1.aarch64-apple-darwin.tar.gz/sha512/44064eb67f087c2c38857273b069eacec9ebc199dd908f975895ab28bcdeb761adaec1a20cb5c3a98788090eb9ec31678ab1c5802896b22738d120e379f1f6ad
+MPFR.v4.1.1+1.aarch64-linux-gnu.tar.gz/md5/ed45c58b6f9ee6993f34012570ffa6bd
+MPFR.v4.1.1+1.aarch64-linux-gnu.tar.gz/sha512/d90cc0826df50f359c49a5ad7a48639137d7f58649d480a50f1a8cd9b77ca09a2678b320aef29dbe0f07f65e40c1994f46ec6adec6047d345d7ed1cf100d0724
+MPFR.v4.1.1+1.aarch64-linux-musl.tar.gz/md5/9634a53796d208acb1353ed500685644
+MPFR.v4.1.1+1.aarch64-linux-musl.tar.gz/sha512/9fa2af227851bc9db79b8c4c381c07be12ce526a7e72e01bef76353b3488fe92cca17978d8df7ae38cbe610e1406b5a8d825b18b43932ced36809dca5ba81f46
+MPFR.v4.1.1+1.armv6l-linux-gnueabihf.tar.gz/md5/865fb6701c5b42b959c104387f8aaf08
+MPFR.v4.1.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/726b07c8dc7b0f67416df2b86edbec8577187b1e6285e53b54c55c613493e3e2987037e29b83f861ff9f64b5700d8815985cc564813f55399d91c1e33e8fac6e
+MPFR.v4.1.1+1.armv6l-linux-musleabihf.tar.gz/md5/29e12f8ee50b1060fe9ebfa0ee4e18fe
+MPFR.v4.1.1+1.armv6l-linux-musleabihf.tar.gz/sha512/871f834e1336782e51aa42fbf3a06165de91e5d469d69bd3acffe743bdb63ca55d7fef9f6e064ed91512d733bd82dfd7b68a2351f9b9f38f1d853e74f6713b31
+MPFR.v4.1.1+1.armv7l-linux-gnueabihf.tar.gz/md5/23d59ed4fd3e8923b1db11bde9c77e5e
+MPFR.v4.1.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/0093a048c0f56036c6a27830c7999a6da396acf58da93bc11c31b638d10e0fa2dd8518e6eac02f9f041b8b83b5c74bfbcc532f43e05c7662b2e6ad5b16943415
+MPFR.v4.1.1+1.armv7l-linux-musleabihf.tar.gz/md5/e7be267d931c33e1a5a97af9ee3d13f0
+MPFR.v4.1.1+1.armv7l-linux-musleabihf.tar.gz/sha512/da6d7ed8fbf01dfb8624f4aef27f095cd4ea88002f9587a51f877b05b9308ab2af277bb452ec9442cb71a82e322ec03fc30a90d17f43f3b9fabbcd5eca64c68c
+MPFR.v4.1.1+1.i686-linux-gnu.tar.gz/md5/26db9d697d5e40b3364cf3a52893b64c
+MPFR.v4.1.1+1.i686-linux-gnu.tar.gz/sha512/0ac65c66e669cd7bd9d951f61b06249c19579d280cc6146b8b2fb324482f1191c9fe1bba6187f5f67ba219506db2bfe2c71a00e6179b5a8995d4c91cc903b8fe
+MPFR.v4.1.1+1.i686-linux-musl.tar.gz/md5/8a012b83532aff4022784a4b85f5974b
+MPFR.v4.1.1+1.i686-linux-musl.tar.gz/sha512/182eb18ee7f4766d5f45adaa1eab703acd99e801a02812e8772a50fd59e7fcff3dedd9a008c85ae816c257ef106ca8d809315f95f38b34548307a9ea0e4fe692
+MPFR.v4.1.1+1.i686-w64-mingw32.tar.gz/md5/d59ad915170aa5dbb230a64a44e1ace8
+MPFR.v4.1.1+1.i686-w64-mingw32.tar.gz/sha512/96adfca120ae691e28741f8a2dadbba0df605fcae622fe4c83c17985ee66b3092761104e0cefb68315237900f413fa3790b60306c8aa82a0e4d7bf32311c684d
+MPFR.v4.1.1+1.powerpc64le-linux-gnu.tar.gz/md5/4796379b5d91ee63f5c37687b6962ac5
+MPFR.v4.1.1+1.powerpc64le-linux-gnu.tar.gz/sha512/b3567a40c1b105a24305d48ecf65aaba70ab2f44d1c7d9e7ac37a53393fedd56e6aa7f5c4395226eb7dd3c02f8aa9403485dd85e327f5d7c61e8fee5caf85d00
+MPFR.v4.1.1+1.x86_64-apple-darwin.tar.gz/md5/54e27d8dd1807dac1c0e77699c3e6180
+MPFR.v4.1.1+1.x86_64-apple-darwin.tar.gz/sha512/8e54dc8b24031ba66c53b45e537b7709dafa2736c2811ead8ca9062f03c22e78095579091dc8a4e7f69b666399c48906dfd22986657ce5f81a1f20043a80f504
+MPFR.v4.1.1+1.x86_64-linux-gnu.tar.gz/md5/7060b44302ca6544d372ec71b3b76aa8
+MPFR.v4.1.1+1.x86_64-linux-gnu.tar.gz/sha512/4f4e4f762106becf8c17790addada3a0a5f33444fde858359e4634041d877ee65a45b6d90f91f3126dc08e7bdad4506bcfdf3bcbda5994ed592267566393582a
+MPFR.v4.1.1+1.x86_64-linux-musl.tar.gz/md5/0c8110f6699a2ea27f2eeeb3949ce781
+MPFR.v4.1.1+1.x86_64-linux-musl.tar.gz/sha512/40c91daf959a9b78af513b054e4e8d0cd1c121a5f3e0e6cdf22446e97d28d3f056f79978092907ba08645c3f6e29b5134ef344ccc79a9c2bbaaeb2233140cc25
+MPFR.v4.1.1+1.x86_64-unknown-freebsd.tar.gz/md5/9dc9d9bb0662700510b89e6da4f44f2d
+MPFR.v4.1.1+1.x86_64-unknown-freebsd.tar.gz/sha512/14208fb683233d44eb2263e7674b9c5cf4f7f7151f025b2b00fb482e6609b78b2189eb25edd7c45b8634bca07e1aca746a6094af50d1449248847529ff58bcaa
+MPFR.v4.1.1+1.x86_64-w64-mingw32.tar.gz/md5/6159f631081b32b7df88e090af417f4c
+MPFR.v4.1.1+1.x86_64-w64-mingw32.tar.gz/sha512/5086da1de24b1f9431ea7dbe6407ae9c81df7a10b04845e8fe4a476a6a5dcb78d3e4b06ca81c85d1a8cf2d081948d20bb77672a4c9f6d20e194f384a323a1f71
+mpfr-4.1.0.tar.bz2/md5/44b892bc5a45bafb4294d134e13aad1d
+mpfr-4.1.0.tar.bz2/sha512/410208ee0d48474c1c10d3d4a59decd2dfa187064183b09358ec4c4666e34d74383128436b404123b831e585d81a9176b24c7ced9d913967c5fce35d4040a0b4
diff --git a/deps/checksums/mpfr-4.1.0.tar.bz2/md5 b/deps/checksums/mpfr-4.1.0.tar.bz2/md5
deleted file mode 100644
index c0d910b6130bf2..00000000000000
--- a/deps/checksums/mpfr-4.1.0.tar.bz2/md5
+++ /dev/null
@@ -1 +0,0 @@
-44b892bc5a45bafb4294d134e13aad1d
diff --git a/deps/checksums/mpfr-4.1.0.tar.bz2/sha512 b/deps/checksums/mpfr-4.1.0.tar.bz2/sha512
deleted file mode 100644
index 3ccc3031e1fee9..00000000000000
--- a/deps/checksums/mpfr-4.1.0.tar.bz2/sha512
+++ /dev/null
@@ -1 +0,0 @@
-410208ee0d48474c1c10d3d4a59decd2dfa187064183b09358ec4c4666e34d74383128436b404123b831e585d81a9176b24c7ced9d913967c5fce35d4040a0b4
diff --git a/deps/checksums/nghttp2 b/deps/checksums/nghttp2
new file mode 100644
index 00000000000000..27f120546f34bd
--- /dev/null
+++ b/deps/checksums/nghttp2
@@ -0,0 +1,34 @@
+nghttp2-1.41.0.tar.bz2/md5/523d330f62560a2fe4268beb84920890
+nghttp2-1.41.0.tar.bz2/sha512/61de1bbbe91230ebe9f7a3ef4d3874391f8180d93c8ff1e94a58035e4061d2f9057e5ba2b90f6fe86f6aefc7244795385d176a862019c47a3aad974b60caa143
+nghttp2.v1.41.0+1.aarch64-apple-darwin.tar.gz/md5/727cf8227b41aff95c91b9db31fbe303
+nghttp2.v1.41.0+1.aarch64-apple-darwin.tar.gz/sha512/86067f3d04bc1b1cbaafd044ee6eb6c306ab27ba1828290412e62bbd2637d1be90cba437c8c39b853df0b11776d90a6525c9d0d9750abd9462003cb319e38cb6
+nghttp2.v1.41.0+1.aarch64-linux-gnu.tar.gz/md5/9a1c71ba214a363072002ea4856c9f93
+nghttp2.v1.41.0+1.aarch64-linux-gnu.tar.gz/sha512/16a18a1d2e3486243cc419362f195e29e0bd64344a2b7167da6b10fe6efff1a6c7bd07806915358aa3ec5dd2590e9bd02024df1e3723432ba3dc833ee52f292f
+nghttp2.v1.41.0+1.aarch64-linux-musl.tar.gz/md5/1ed0166911be5703d0d6377b666e63c6
+nghttp2.v1.41.0+1.aarch64-linux-musl.tar.gz/sha512/632b6ea76e89b60d2c4f0379a88b0a33468dafcaa3605b69b3a5f57546d5036e7003341ea14060ecc417e659b74130597278c71cc34052ff7188472e23bf7092
+nghttp2.v1.41.0+1.armv6l-linux-gnueabihf.tar.gz/md5/288b56ea29c570c01cd52683085d1ff4
+nghttp2.v1.41.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/91bd764317b72df51289e67616771d5299ee32ad6222993555abc9bf4af5ce1920bed6a9bb2f03145c41bec557460a357d79f7716774f6b730629b225ec5b2df
+nghttp2.v1.41.0+1.armv6l-linux-musleabihf.tar.gz/md5/acba1517897a8e43aa3707d9d02c03a2
+nghttp2.v1.41.0+1.armv6l-linux-musleabihf.tar.gz/sha512/908db4e29550c277acffe2043b97a0e7946b45f9324b93b2c63fbe83bbd9edd5e899a6665925d1bb85c5662f301de7612829cc36e8edccc3de57ac54911d7357
+nghttp2.v1.41.0+1.armv7l-linux-gnueabihf.tar.gz/md5/2b0fb368ffecdd5f527eebca88925092
+nghttp2.v1.41.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/8ddd95df9896856f77be0af35fa9cb3c276a6cab2528553c08a2ba68014a5320d6ba7bd370bde362ba48a2fe097f6d5c348f5d8b20e1762010a9bb63c7bec502
+nghttp2.v1.41.0+1.armv7l-linux-musleabihf.tar.gz/md5/30baa03195b0560adf729f1aefd0d961
+nghttp2.v1.41.0+1.armv7l-linux-musleabihf.tar.gz/sha512/c93d9db188ccacb3da80a51c84799c91d686563cb055e2528094600be2565f608ceb57db9570f36a1933754a45e8f007e3c2a8de13de332effe0e5879814e5ee
+nghttp2.v1.41.0+1.i686-linux-gnu.tar.gz/md5/5bc7fbde702b477d28c1843c6ff053cc
+nghttp2.v1.41.0+1.i686-linux-gnu.tar.gz/sha512/2f9317172eb8489fab668cdef156462e1f0b71d365731b358c0f246c873e0ad7dc94b2b27d165f54c57ce6caba2c3fe89b9ce2555e235aaab7b05adbcf6a33ea
+nghttp2.v1.41.0+1.i686-linux-musl.tar.gz/md5/75a8870627e4db65706df7af92163492
+nghttp2.v1.41.0+1.i686-linux-musl.tar.gz/sha512/f1ea9b7e66d01de30a92da34fcc72cbd9c988ed768f8d8099d68f031ccc33d9f965460289a183ae1f9f507580d25020366a312b61fbbcd3b2f06ee697f8cd133
+nghttp2.v1.41.0+1.i686-w64-mingw32.tar.gz/md5/258224cfa14b53e7af79caa1dea2eb2c
+nghttp2.v1.41.0+1.i686-w64-mingw32.tar.gz/sha512/69a783d09e9258df81ad6121906f8102ad2959c623efca9fff7adf062c947d6e9a44fdab27cdd49bb08519b4a4d636bc529715771d276a69c6e3604237712056
+nghttp2.v1.41.0+1.powerpc64le-linux-gnu.tar.gz/md5/a358200b27f3229fc888d3e1763cca0a
+nghttp2.v1.41.0+1.powerpc64le-linux-gnu.tar.gz/sha512/cc02237e70e1cafea3e59e9929a4bd86a9b4dbd2928c812b3e654dcc91b87077d91284e7006799186b324ca5f8822d09e3bce1d4a69cea18f42d4e08f6aa5f3b
+nghttp2.v1.41.0+1.x86_64-apple-darwin.tar.gz/md5/fe76513593627368edcb95783b830ed1
+nghttp2.v1.41.0+1.x86_64-apple-darwin.tar.gz/sha512/fbabf532cc32277394b66cbd1c011c4d7a65380c2759b1cf57e57599c3c8752fbd2d86b42677159005d6430025b4fde5f623d814629215449fd934d1328589dc
+nghttp2.v1.41.0+1.x86_64-linux-gnu.tar.gz/md5/ca57b30aa01b0a8f69babed6398bad9a
+nghttp2.v1.41.0+1.x86_64-linux-gnu.tar.gz/sha512/3fa5a6806bf6eeb4305038b2c6c381e815988b1a37bcacb4510c229f9186e280aa98fd023495b3fd77c4f16314f81b7233778cd81cc3e3d64f503dac3e1ad70f
+nghttp2.v1.41.0+1.x86_64-linux-musl.tar.gz/md5/043dbfb6df2778a1b0ea57c70b982974
+nghttp2.v1.41.0+1.x86_64-linux-musl.tar.gz/sha512/70d7ad982fe3b9de49cc37758588c02e78e306ab3ac84682e130e2ab4541814790ec3e201daa96cdd900ebc8fc0a7c1ff6e02e796a65f092c6c24b56c1214b3b
+nghttp2.v1.41.0+1.x86_64-unknown-freebsd.tar.gz/md5/13a4e74ffd31852b885e44f2f0fed516
+nghttp2.v1.41.0+1.x86_64-unknown-freebsd.tar.gz/sha512/c81b76e9cb2920a0cc554fb9ff23f0cf20f3f9601276fcc07d1ffe0df608cbcc2513b31273f839eed3798cd2183fe9ad0580c833707526a4b8cfddbbc7b53006
+nghttp2.v1.41.0+1.x86_64-w64-mingw32.tar.gz/md5/635cc7c17cbe5de2d7e320c0d920e61c
+nghttp2.v1.41.0+1.x86_64-w64-mingw32.tar.gz/sha512/e6e0543b2835eab2f4774e027e921acfd1d2a9229876d6acf5c64dc61f7dc73c078d6489910b3179aee4ccb95aa7281b1502170aa6256e41ab1516982d0da230
diff --git a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index f3e42c16a80a90..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-908db697cf9edf8c4102695d2b8cbd30
diff --git a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index cef73d2edce4e5..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9c473c1a3e7632fb605cd1cd4d954e4a65438c86484a21ecd4fc89ce2f3018a99cd1bd24af9b3d7357bab2b7a248f8ddf3613d7fb1e8fd98d65f6b51c94186f6
diff --git a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index a034f5a5f7c861..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-ab0dfc67266609b9b6cedbb1aee92ed8
diff --git a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 19b4c96caafcd4..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-18f6b889ca043de8c8a73d779fe09d04f75ba81eaf87cbc0522606db700646acad0cd7cc8869ada4c22426b2516f92d6c18b08a4b2ae7efdcb5c4359d04346d7
diff --git a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 19bf17e984b7c1..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-71429f3552df12e6ab7ce9aa9b87437e
diff --git a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 11f77e519f9678..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-91adf56c03bdd1c30ae314169ac522ebc4c2b8e8b81a5b086e313774a7ac1cb50371b042e4f58fb9ce4a2b3d702865aa7b1dc12c525f7d74dc2f9054e153f3e2
diff --git a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 9df7abe9361d09..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-04c325da2958b0348e5ccd3d90c34a1d
diff --git a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index a86fb0adbb1b01..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-035a89869ebef2b3a0e8368284e932fc7965057e90d79bc37289b6d4f22cc70233182f80e65bda78c009b90d9a6c6fc6cc5238840517718a83c24747732139f3
diff --git a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-gnu.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index f62a24c4c8f527..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a8f5559ad5c7dba2a3a180a46428697c
diff --git a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index efc002e8edecc6..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-40b77b6a9598aabe0451999a07e8f1c1a762073a438e9d62d00d38723961369fee1f101e3405477c743f1c2ec5623ddcf68dcfc54a7ed8a6649b47d8cd0c6971
diff --git a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-musl.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 6199d74e47ff02..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e5e545c00156d198c2f6075e8081958b
diff --git a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-musl.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 409978527f310c..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a9e27e7087deb532c7f9b44041fee4d4d7e49634b2a841e3a9126d2f2a1fd0c5eea02aca493327a5f177c356c403f9cd55b2d516f7cc4cac2594f723b8f024c2
diff --git a/deps/checksums/nghttp2.v1.40.0-2.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 4ae12712999264..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-07ec7a683f7fb73ab97bd75257e512d1
diff --git a/deps/checksums/nghttp2.v1.40.0-2.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 910402310d4ac7..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3c7cae852ab97f1db9563467651c6c053009726be184eece92d82e1e806225a8caab4f4c772de83e6d970bca8d3410820e0fa4963f52c0945947ac75c2ea1b9e
diff --git a/deps/checksums/nghttp2.v1.40.0-2.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 0c93fbaad8f4e5..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-9e01e51638262ef419b99ccc738cf782
diff --git a/deps/checksums/nghttp2.v1.40.0-2.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index ec235c6468a7b7..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-3af1baf9127332c18e7ecae3a92dd085c46b5de85c05b84db2ec6d44b7db4ed276319ada6031c06463e5f3af899999d8f7b0cebbca43b219a70c9236b0b42f9f
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index da0b7b9cfe59eb..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4f2f8a58e8f9fd650493b500bc4b7303
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index c55809450b9a3f..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-581a34923b00cefa52c5abb20f672b399094ac984a53794484b431095497319942fab4e8ca87049c9ed11292f60d4eb996ba5fe3ac0d9aa769c73e07022f50da
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index f21bee30b6de28..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-19d387203a622981bd0486c4c7d497f4
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 3d06a260605c7f..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8fbd3c94cb5e83c8597b45ecd385570d1ac5707dcf9ce880381c231679ee69c1cce6c8de13ce2c78aaf25e9b3fde1f6e9ad303347ccefe4d7426219b3a6034ee
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 41200a2034b730..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a90d499417c7f34a57eb60a53fd84b7a
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index f2321446567ec2..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5534a43ebdd625f0cffde46e4865b1a1f99f0eb6ef5d8e6514528c04767d276cbf348ce9dc69ec31ba7cff0650b4ac9a7a8755e922053c22fb9be9cc384c07b4
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index fce02a1bed8e8f..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-492bc0b6caf63aa0003240f5191e4944
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 8959753260517c..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-34226daac3ef30483e065d42d9797dc6c0536d90abb01f7a6dd74ca5bf211c5bf973fd44f8780625e8a73fcdc9b836d19e9154c0fe7ae10cf6b2679de2bff1e9
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 0b9f636d2107d9..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-fb9dd5698a00475cc1fbf1220a438a96
diff --git a/deps/checksums/nghttp2.v1.40.0-2.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/nghttp2.v1.40.0-2.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index e08be929d89ab3..00000000000000
--- a/deps/checksums/nghttp2.v1.40.0-2.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9b2fe5b053216721c61e8e40ef35242d5f03bba390198c34687c3f1af702b3d199f61bb064db770ec3da93ee81e8356de26e1bf5d2196064a81c0fc3d4e316df
diff --git a/deps/checksums/nsis-3.04-setup.exe/md5 b/deps/checksums/nsis-3.04-setup.exe/md5
deleted file mode 100644
index dd2ee5545ece64..00000000000000
--- a/deps/checksums/nsis-3.04-setup.exe/md5
+++ /dev/null
@@ -1 +0,0 @@
-3c93427c56714478bb5a7a4bbaab934f
diff --git a/deps/checksums/nsis-3.04-setup.exe/sha512 b/deps/checksums/nsis-3.04-setup.exe/sha512
deleted file mode 100644
index 4216a8ccb295e3..00000000000000
--- a/deps/checksums/nsis-3.04-setup.exe/sha512
+++ /dev/null
@@ -1 +0,0 @@
-55142f5eeada65f18f6de950ead5342d78e1a4a01b7620d02dae2fbd3f0d963db482f5a63939131e81a78b458c5306f600578ab96891b0661cb0b74be1f7636d
diff --git a/deps/checksums/objconv b/deps/checksums/objconv
new file mode 100644
index 00000000000000..f3dfb0de2ffab2
--- /dev/null
+++ b/deps/checksums/objconv
@@ -0,0 +1,32 @@
+Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/md5/bdf95a776cfe782d30f48a41575e1414
+Objconv.v2.49.1+0.aarch64-apple-darwin.tar.gz/sha512/188b5e25d238a5e2f704c3ba8d2d57d6fe452f9d5c0e26b710ff225086581f906b8de6388c6240bbaa9d395cb58c0a73a67e65cbb8df6be7b98473101db467e0
+Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/md5/2c2b88856921c38294a30671d4794dac
+Objconv.v2.49.1+0.aarch64-linux-gnu.tar.gz/sha512/2c0e6cf6da02e25386e89c51f5f2d39833b49653b20537c73f7938a4045805d07b0f520661d07332aa1372231d34a3a979ad490bf5eb91fc00fcc20da3e7a9bf
+Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/md5/11f6c06ee0d98b553781367d5404c76e
+Objconv.v2.49.1+0.aarch64-linux-musl.tar.gz/sha512/d93a742a08f873f9336f57a28af8a1eeff624d5d9dbcbceba0d58c17a2ee3791b363661af293d08997d701fc22177192e5b5154b827974163c189ad6511ea13a
+Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/md5/0151be530a0d54376590065cef28666a
+Objconv.v2.49.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/cb7cf5b00f211be4194b5b6acf11cc491b5f140d990fd8babc6590649e9864cf07a421e8a87ccdbe0b8720bc6473166837e384202bcbac6cedb2a9bd9c46711b
+Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/md5/390251e8245a3d8d110a1786336663cc
+Objconv.v2.49.1+0.armv6l-linux-musleabihf.tar.gz/sha512/b7eb9e4a983e69ca970ce86bf306b7df11bfa8aefdd26cc02841c563ad0b5dddcb47f106fe7a0a420b20ae1d4890e6a8011c0db5a26e3493c80e63eeaadf86b0
+Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/md5/5f924d5bc16bac6447e9f2deb943e60f
+Objconv.v2.49.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/edaf3881754bc949ef3c60b058cc0cfff7e828d6486ca968940d216852baed5b06544dd48619cf045f3ef87df5ea00389ac3b298a1d4e9489995351e1e1ca952
+Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/md5/c967a9ed1764d9692e905d879a03f45f
+Objconv.v2.49.1+0.armv7l-linux-musleabihf.tar.gz/sha512/08e9397bbd34734c3e9137f64a94a86ec11bc70eaf54811301e7bf782b581ffdcfa041936e29aa0a8ee46203591f8213d6170a7c6ea141a0ab625ac1156dcfbc
+Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/md5/a59fd92a1ed62048edb1a1297d615aa7
+Objconv.v2.49.1+0.i686-linux-gnu.tar.gz/sha512/581fa0f5ea37e1802c9845bbc9df0b826fdad5900e712eed8767922e155026011570b4a4b8714430c038fb3c0d47965168a4c33b21bd28cd9080cb036fc9f033
+Objconv.v2.49.1+0.i686-linux-musl.tar.gz/md5/05524b26d550ad8fd045976f395cdf6a
+Objconv.v2.49.1+0.i686-linux-musl.tar.gz/sha512/5e6d3b27b80f96a4f1c278f2f8fe0ff1f9bdc2f1df223a7c4d1c235c18dd8eac0b8b74d37defda656142fb2882c1b590bb3e730cfed77e316012eb69b9580b53
+Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/md5/10e82481a5396b00f568eac690c47e0a
+Objconv.v2.49.1+0.i686-w64-mingw32.tar.gz/sha512/27d606acad2cf6789c9888c72887bb6a277c07f7b528fd8fe333f9738caae73e293df76ba9a0af5dceb00b8289bbc523ce235cb0eff0f0031bcf20300b4168cb
+Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/md5/8755aecaacc983e2a9a948eff5c485d9
+Objconv.v2.49.1+0.powerpc64le-linux-gnu.tar.gz/sha512/8b2bf010ff7da164b59df7147cb4904ae6f2913a3095c649e20f4263f77fb92cf8513d9130a345576da2cca4caa30828cc43b9c8ae1870268e3140e0016ad557
+Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/md5/0657a0ef9f278718c741da4d72c0952c
+Objconv.v2.49.1+0.x86_64-apple-darwin.tar.gz/sha512/ffd9247b02f72830d3b12e075124239ca472503701eef005b7457e21cd10103aaa13520206787818f11e9dcf35a156979e01cf5839dd554bab935ce757f032e0
+Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/md5/0e029960584d00dbf8673ec4fcd9eb83
+Objconv.v2.49.1+0.x86_64-linux-gnu.tar.gz/sha512/ae747a84edccbc804239499c10d863c62bd5846b4ab87abab42c832c9fd446065024034d76ddc167d358821f90f8d2241c341232d9dd86cf31601e951e050a6e
+Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/md5/39fc1ec3602dcb6eb2f80448269588fa
+Objconv.v2.49.1+0.x86_64-linux-musl.tar.gz/sha512/e86114bf0b0da7297721c56b1cf246f52b9331083e4e73b53a30a1ff031f260a5d6bd97b455669c38af915689a363b99a30ea7ed743ebf49271b95e34bcfd85e
+Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/md5/9d331f32792c394c0d11dc4e6c24ffb0
+Objconv.v2.49.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0c9b7c2f58110b3c8df52d83cbadd5349fb81732acae6786617e72a3150aa9ae8da7afa1e9eb08639f4dd4e7e69f29b882f98e99a8a4404b569c545c904f5523
+Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/md5/c8ef7dd7742e2c9bf2d05d2b0310bb50
+Objconv.v2.49.1+0.x86_64-w64-mingw32.tar.gz/sha512/b47ac1f3a10ee4f958dcda72ac45f35b94fd436498d008642ce53b93ff517c0d4158a72cbb849336dc9d4a16e26021af13e7b6976f83610380cd78cce6a7deb1
diff --git a/deps/checksums/openblas b/deps/checksums/openblas
new file mode 100644
index 00000000000000..1523372d709fa1
--- /dev/null
+++ b/deps/checksums/openblas
@@ -0,0 +1,94 @@
+OpenBLAS.v0.3.20+0.aarch64-apple-darwin-libgfortran5.tar.gz/md5/036acd7c7b68432f01f2a980bc4958be
+OpenBLAS.v0.3.20+0.aarch64-apple-darwin-libgfortran5.tar.gz/sha512/db2c995b09b5ab046491257b44a8806fd5e254bbf4b4df6e9281ffc8d199745a3d6fea912da2fdd657447e194c73db52cf7acb348b49fd37758b6fbbbdfd3a93
+OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran3.tar.gz/md5/7c5de800082f39fea05d1fdf9cdf2e79
+OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran3.tar.gz/sha512/78775b01c1f24848da6111d9f4746f0b44f5966aa202af00182c4da649e4b4cf630cd1bb90e8ed32f54dfdbee0f6d03b87c171f03fee9b37886634a20546d627
+OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran4.tar.gz/md5/eefc198718aa837a04e0f8e6dbdc8b0f
+OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran4.tar.gz/sha512/cdc351d992b795732e02698df8f5f31c301dbcd6d995d2a35790461b08f3c942d70e8f7c031a943873eead4fcbd1e73649aafdfdb7450b955f4848be2e9a43de
+OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran5.tar.gz/md5/8d9ced4a8e441713ceb0d79b72b43ca5
+OpenBLAS.v0.3.20+0.aarch64-linux-gnu-libgfortran5.tar.gz/sha512/b1dfc3f4a539d01266e3e5d400864cd445c4bc561de464e2f6c9eb5704541aa436944f6bfc89be1948e9675f1a83098d77fe52f70886dc90d54206c81f350277
+OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran3.tar.gz/md5/fa63d8009ac2605208ceea9f6183acdd
+OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran3.tar.gz/sha512/92b8e2fd2bc45c60aaf8d79c59f96b721d969cd3320c0b04989a5a48099cae213fd4a6aa9dca45910d881e495d87863513b23ee7c433c894655cf72c7b009323
+OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran4.tar.gz/md5/68672f9cbcd9bee92c89b19599897034
+OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran4.tar.gz/sha512/4c19f3cb7afb52cd54c3852fef3815a23e57b5c2ebd9b647ad43ee62191b74474c787b22d6213555f38b8233b96d479631881d522c7bdd544954a9f04b51c509
+OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran5.tar.gz/md5/7fd9458e1482d46f761d6a519999a648
+OpenBLAS.v0.3.20+0.aarch64-linux-musl-libgfortran5.tar.gz/sha512/2e20c845deb5c87c6e02a3512728a27204193a764f8ead1a66ce053b66d03bb853bbf40289727b1b635b17423416a7a69c633242c12f98d3ec1eae5e82a88613
+OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/md5/0868668b73c84e14edb634482d59eddc
+OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran3.tar.gz/sha512/c87f91120db8d3b32cc12077b1e36110f89253fde22aae9de88945fc731ee74271acf31cabac9971635725f586b65cf6b1b9badebcbba5408b0ff4c68b580ccf
+OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/md5/9e84b7585acf2bb71781002b2238d888
+OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran4.tar.gz/sha512/14b57f9d5691997cf01bc6187a1a1d58d07d162ab8eb2a480e7c42f0cff1583161c8b1a059c9eeb83e7ed276c8ffe2e193db001a3b51724e5af24c72f5e33572
+OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/md5/a4768ea555e68fc755da169f1c7eb21c
+OpenBLAS.v0.3.20+0.armv6l-linux-gnueabihf-libgfortran5.tar.gz/sha512/490ce2b60cda0b5ed40df103e79b83ab75dd03779ea88b0ae5d3b76acadcf4810b35f69566e396b438d881130e43fd0dbff1672d0383dc7fe275f44574d8830b
+OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/md5/1a4e7e7cfdefcd878c18bab39b9c80cc
+OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran3.tar.gz/sha512/15b512728b49782717770f044958ed3afcd54d6cc70b362a7c96dbadf7599bdcdd157ee021287a70e45957d0a856417540e64e2399cc392b9de55036d607fa29
+OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/md5/63ce4aa67d1d56f2cf456285546d3eeb
+OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran4.tar.gz/sha512/ac0bd761ef574d3533fa7f6110b9ecf992edf7a68c20fff4faf4b7372d3de4c5ed558119dcdb669296aab5c0da5ce0f51f54abfe998958e1924cfa0eb958305e
+OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/md5/581bcbd14328d82258511f8b91d8bf84
+OpenBLAS.v0.3.20+0.armv6l-linux-musleabihf-libgfortran5.tar.gz/sha512/be66567c762f70885b187dc8912f83003c69dd5000387b5b82162ba9f47acb17d855f8f5bda2f31d3fc7e01d2aae3cd6b2392632d70ec34f2d648010a8b11f38
+OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/md5/30dfd96f7f3d35df95e70d506f35c9f2
+OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran3.tar.gz/sha512/84213bbff84899882ab43599f3aeab1c6e3ee8f7158a3873ec2d6a3166e69036c16d742d25c476468f64b6644a2f798485e50427139880f1ae933ad507a2952c
+OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/md5/4b82a4e68a43d29538a318763004aa94
+OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran4.tar.gz/sha512/94d35902c34e6fa68a0648cab65db49650c73ed21d69ee667350cbbb81028413b92fc30e16504648a6b42039f483d327264a3ff39d546cd30241f4672f9300a2
+OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/md5/7e290717c23a468383bd66b46eb58fac
+OpenBLAS.v0.3.20+0.armv7l-linux-gnueabihf-libgfortran5.tar.gz/sha512/432cf42a320a265b9259d743eaca75b884663877858149b0feb83948436a941940955c0c89c6de9ca114f0bbf153127a046813195f4669a81cab1ce244cc5a6b
+OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/md5/f72bf36862607c57fc9cee5dc3f94dac
+OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran3.tar.gz/sha512/caecc044e25d2939eec45924d69e64d3854fc54626a56126454fb3855ae2dabf36fc248d7ef9d240f15e8883787a43539e2a0d8dc68fc5c93a094ded94f3b976
+OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/md5/66bfd606fc80e02999ad44243d3b686a
+OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran4.tar.gz/sha512/b3d76ccf40af1de018e829f5dd696c6d18ad1fd96657a06d190a9d4e939cad5062a3a2ffaeca2ce7f75e822694ae0b817568dd8f115e089a59590bb34af264f8
+OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/md5/ef7aca842a623246b4e2876ff28c53ef
+OpenBLAS.v0.3.20+0.armv7l-linux-musleabihf-libgfortran5.tar.gz/sha512/a59feb34806d651a2a3614bcc5203407db626e96dabeb6bb12b8d73915cfd87dc02b0e54704c5d0f1b8ab984d85ee64509a934884640d2522fc4a9835989aed8
+OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran3.tar.gz/md5/f2ba9ed0f68447aeddfcf3ac883cf83b
+OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran3.tar.gz/sha512/1b6f300febf5ceeb0045c46cc3d6e9f2481cba2ceb97dcafff1667f06b8b96a2ad4975853e6bc2e3e6715ade28be5fb569fdae005f4fca2140a5557d4a0845ca
+OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran4.tar.gz/md5/b39347f487b46996de98d9a453ae804e
+OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran4.tar.gz/sha512/a923a92467b4582f69ec9d96556c8f2ef55a3f99dacecf0491da9740912d14d09a9ba86bdb5fcfbaab87250c57a0c077c2f6ccc08bf3236ba5c7d98822e9c32a
+OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran5.tar.gz/md5/6d9b4adf3fa54151c45b832b5869409e
+OpenBLAS.v0.3.20+0.i686-linux-gnu-libgfortran5.tar.gz/sha512/f15583c15fb4e4b6a38353fbbce2aa57c8f46d58e6c5464a685e5fb0afd76f1bf9b3986c1d34af643a8c9b3a8a24ef63389982c2e8ffbf91a63e8f1ccca2cce5
+OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran3.tar.gz/md5/fa46f28f624e8c0752bb76abc04a41d5
+OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran3.tar.gz/sha512/76018ed804f25212760f1128f7d3823a1c8ba72b8cf5d83aa5be5c5f6e3de8076b04be9d5b659af75e3c2fd5cb9a0654dba59651f010534faf174a6c7d836cd3
+OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran4.tar.gz/md5/48411109935a2ada9d2e336515f36b6f
+OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran4.tar.gz/sha512/9be06c11fb248d6da47dab21f60d1eec6b486a137048f79f2138b5fe6818846ac198da7d73ab93ec161e8861d7e670b587b6eeb846c571497e96023934127903
+OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran5.tar.gz/md5/b0a81e44dd4a216c60b6ff139512d7b5
+OpenBLAS.v0.3.20+0.i686-linux-musl-libgfortran5.tar.gz/sha512/1b1c3cc5e62af6af8e106c60c59d7ff685d567e93dce19643ba8c0547200000bae96a3473573619ab235c34ff8e65745266001cdc868e948ff3ecaa9ba93389f
+OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran3.tar.gz/md5/18988c19ea5bdb81d97f8ce4456319f6
+OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran3.tar.gz/sha512/466d6b05dcf00b6f09c1a8b8fda97a0035838d73d77954f6cd499358e8160af6cf3e8aac97d0f7ba7ced144db1362a9ba126fb113a4469c232a6b9706dc3dc32
+OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran4.tar.gz/md5/d0aa399c07712e9a520a6cb8067bda63
+OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran4.tar.gz/sha512/7c3e0b1c18812719be4d86a641d25d927c9c8cbc6e1571c7a46ca27672ada00cbe3879faf0b5aeaaa0454907551953a20a56be0bc24b651df117532ace2f9067
+OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran5.tar.gz/md5/90d51a2f41c11fc8d1896597dd106cd6
+OpenBLAS.v0.3.20+0.i686-w64-mingw32-libgfortran5.tar.gz/sha512/683c40193ec7a4612c4a36e9d9f6d9443bfb72dbfed7fa10b200305c94589fd75362670d9b4d7646f24b4f7933cfc55a2496030907e2d3fd30b0eed8b6a2d10b
+OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/md5/30d5022d6f52adccfaf6b3dd837b6151
+OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran3.tar.gz/sha512/433a520458d6804eccf69c74fe357e6d819223b0398007f17420a6aa77a466177d9dcd4f467821b4d99f4397f5e0c1dc0864512a7f69c43f23bc40b6414449b6
+OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/md5/2848232be1646333d6d413a588519d99
+OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran4.tar.gz/sha512/edb51d55f602d2a271109dbc12e59e23c232e58833bcc34dd857858d10d318eac99ba300fe4c6480b995e152ff036ff175218a2f4b29910a27f1861543d1e978
+OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/md5/8bd4f4d571dc382eaf0084000596276e
+OpenBLAS.v0.3.20+0.powerpc64le-linux-gnu-libgfortran5.tar.gz/sha512/f9507f6dc53c632e0f26de074bcd312956b2fb492e9f1d32e3cdf1a6099d6f2b17eea09ae825b2414a28dfbd6958813cffa289fde0a15cf7cba4e6b3653d2a28
+OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran3.tar.gz/md5/c644f00642c69946d12b8f1f96a8e766
+OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran3.tar.gz/sha512/2bd51e9adda3e0955ab26c5c178e9a75a8d9c1b4cd2fd221bbb7b9eb72337cd5034f42b53aaddcf97a807e01f2b9836f9be95a5c6517c831374a3b5148b6e380
+OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran4.tar.gz/md5/cea0d5ad3528298e4512c900a13f21ec
+OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran4.tar.gz/sha512/379ad13b723acde1d2239335c2611a9ebd2abe1432931d4c2395fce9f50bbd5d830a23fd5ea5afc1fc251704e4ed880468abde42bb0ea75b6bb0abb9a7753c5b
+OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran5.tar.gz/md5/bc726288a19a8bdcef3205de12b5f172
+OpenBLAS.v0.3.20+0.x86_64-apple-darwin-libgfortran5.tar.gz/sha512/3e26b8a2075f997ded8750d84e3257b895e7e05adac77d836e66fa7478b43368b7d4b7a458c6991cb642ce0d135b1b507dade7302c4f5a44aabe637849bc1acb
+OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran3.tar.gz/md5/d162add49c7ee74dfc23b820bbd363b6
+OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran3.tar.gz/sha512/70bcc15f37e4cd822c2f95d8fd23e912829450825399d31c29c00a4ea219ca37f8831d3132ae4b5972fe9ec95c304bd1274a12ec8a8b289b1830cfb7ca0392d7
+OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran4.tar.gz/md5/f036c51e0954b8b76e3023280144b5ff
+OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran4.tar.gz/sha512/2101747ec254f51fe5c2cfc49ce9599aeacf0d3e7bcb14c9ccaa59d8b0f7e9dcda98ab3ff38973817b736a33ddf654e17748d8a9c3b40e5352a198278484a2f0
+OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran5.tar.gz/md5/143d8e7cf2fb615ccab6617bffa4acf7
+OpenBLAS.v0.3.20+0.x86_64-linux-gnu-libgfortran5.tar.gz/sha512/6e72144f83cb329301feedea02581a100d137f3b209af4983500c432b6d23cc7473c85a7b1ba90e24965508e74a191b49cea8820b5899793440c3ce067acbe06
+OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran3.tar.gz/md5/871863002d0053784a81409b4581c8cd
+OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran3.tar.gz/sha512/908936494c981e14bcd7818043efe979d9522ae1c9ebcd69feb853c46a2249da1cb5292844d0de7276762a21ad8680a1117229f3ad53332b536233d8722c4d85
+OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran4.tar.gz/md5/ce4897980b12374801095fadfad11196
+OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran4.tar.gz/sha512/ba551942563a58fd22d182a29cee83ce5f51db10e52bc8cb27d979dc71632484e1acb713d4304d773c3111d5dba532bd65651374e91a364f8125295acacfffd4
+OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran5.tar.gz/md5/301ae23724b44c1d10e4febdc6738df3
+OpenBLAS.v0.3.20+0.x86_64-linux-musl-libgfortran5.tar.gz/sha512/2f1479b1f1d10682751b025493bc38cd5eb9854620024b1f0ac45ba0f7a7621b4795c4c2f89eece5c80b671387d095b118d58d8ba201214f45bcea1ac64fca91
+OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/md5/51088d57d2a9e9e50259128a0ac48727
+OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran3.tar.gz/sha512/c88b1eb662c16b75c46a25959f6fff22de2cfb2a97ff1c0cd482528e83d54a4d8bbf33c3e7d6a79ad75998d0c6d46ef6f245e8ad406d1a072907138d7ca4a34c
+OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/md5/06167501fc4cc7b6587ead3696ef72af
+OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran4.tar.gz/sha512/a853a4c5163e0bc0266e75df0b208794e8439a008b625b520b51e7891825a355960f62fe2275e4f849c345862fabf0339d0d22d4bdcd87acfb17ffd65627f74d
+OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/md5/73a43356c9bf374765a2bc8910e2eb49
+OpenBLAS.v0.3.20+0.x86_64-unknown-freebsd-libgfortran5.tar.gz/sha512/0c2092789f4eeab1725cdfd7d308a2ede054b993d6d1a83f671c5c8e9f651565c282af7371c958c61a57679a233d3f62a287afb44225498dc31249f6821ddf98
+OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran3.tar.gz/md5/46bd5ef0708671aeb2a533476a04591b
+OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran3.tar.gz/sha512/1b0a3f9e61101cbf455da70056dea75637f3008df727072a22150072e7bfc773294378fc42a492b2351f9af2d6b7866503c0039f8addeab07d4f4b5d0f42b5fb
+OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran4.tar.gz/md5/7e412c9961e4762c40cca9c27e5c9aa2
+OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran4.tar.gz/sha512/6a275bd153bb0ba227f39ffbfe95ee1f84f42f79361f7d3a7b1a5c29ca253b8d8b2427ce389f10cf2b95fb87d91dcdf1144f24c82d11320a0aad7dfb8d3c0498
+OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran5.tar.gz/md5/2a24ea7c7a9bdf8069d7f62c55d09bb5
+OpenBLAS.v0.3.20+0.x86_64-w64-mingw32-libgfortran5.tar.gz/sha512/7f9134df42be432199119b2a5ef5df2552247cca8647546fb755901d5903030fd5cb565c711248f173c71409cd3b30609a2adadf0213c9a096a9b70298b29a87
+openblas-0b678b19dc03f2a999d6e038814c4c50b9640a4e.tar.gz/md5/4586a405791fb16775eb9aecdd7daa59
+openblas-0b678b19dc03f2a999d6e038814c4c50b9640a4e.tar.gz/sha512/c34a498f2f1ecf65c5174a198022558bf6626eb6da0c4191762a35fd9d335c67dd17246cee3ef503301738a202650aaefe5e0073d8abefd3d1b8ba19cc953304
diff --git a/deps/checksums/openblas-63b03efc2af332c88b86d4fd8079d00f4b439adf.tar.gz/md5 b/deps/checksums/openblas-63b03efc2af332c88b86d4fd8079d00f4b439adf.tar.gz/md5
deleted file mode 100644
index a9f2bfd1d7061c..00000000000000
--- a/deps/checksums/openblas-63b03efc2af332c88b86d4fd8079d00f4b439adf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3d692acc6927454f620a4c493bdb159d
diff --git a/deps/checksums/openblas-63b03efc2af332c88b86d4fd8079d00f4b439adf.tar.gz/sha512 b/deps/checksums/openblas-63b03efc2af332c88b86d4fd8079d00f4b439adf.tar.gz/sha512
deleted file mode 100644
index 14fbd0e32f8a4a..00000000000000
--- a/deps/checksums/openblas-63b03efc2af332c88b86d4fd8079d00f4b439adf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-cf89f6db1b6366833d29a1dc718ea0b8f61d162f70695c33fc94afbaba232605630a7a7cc3d3b9bed7493ec85402b65180ca99c3101de7141d6f2919318f55c1
diff --git a/deps/checksums/openlibm b/deps/checksums/openlibm
new file mode 100644
index 00000000000000..4c8ad913fc58db
--- /dev/null
+++ b/deps/checksums/openlibm
@@ -0,0 +1,34 @@
+OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/md5/cbb86fb881fce48d837e40017272f7f5
+OpenLibm.v0.8.1+0.aarch64-apple-darwin.tar.gz/sha512/3fc7753ce12e0012b42e469084351ec97e20c1761b50add48af4b4d92ba61b9db4a015243f31f7ec9bf322a9a4e098cffad31cd4501424568bb49fe973b63a35
+OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/md5/416b30b38b91a76d1613753ec85ac4a9
+OpenLibm.v0.8.1+0.aarch64-linux-gnu.tar.gz/sha512/9ee5821ee83cd5e29c4f338e0076fe3e3705925e8556abe95f0356151ae93f23be0bbcde42cbf6b382e8c29a201959cb31be349ef3d1447e2d19a414a476fc55
+OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/md5/b170fc0058803377d7c4d7d0c9e4b954
+OpenLibm.v0.8.1+0.aarch64-linux-musl.tar.gz/sha512/be311888953887745a2d64eb3d1d5755e2f37e9f46558c6f6722520c503ee23d3470e97d7bf28f0a03915a398c31080e789d6e1287a2b743b6fd3f37b3a2911a
+OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/md5/63f60d2f13acc6fd2ba3854a8ecf2b0b
+OpenLibm.v0.8.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/1853a8346f460cf7e26efefb27c36440976e40f000aefc22a81bb69bb25d07675a19f4b64c4dea8fedaaae76925467739cee8cd5743689ae55512e57dab54286
+OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/md5/5b410e9611b188f34fcc5314c45d2297
+OpenLibm.v0.8.1+0.armv6l-linux-musleabihf.tar.gz/sha512/ecb2fd14728c40c7e3d2cf7c4f1dc06568f1dacc60677218ec59011cd913cab173c33db1c402a8b27b8f0556ca66667ebc033130222617cb4f5d9d8cfe7431ed
+OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/md5/0fc3732640b6bfd52759e74be75e2290
+OpenLibm.v0.8.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/81214ec825d33e37c9e889bea054f3aa72466e5fa585356247ef0ec4653b548f7836219092a8c7f0bc3c694e97582012cd026325e0b1c1a6fc113c461dfe49f7
+OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/md5/7ba7a7f9461b43b8ac622b9fa9c0489a
+OpenLibm.v0.8.1+0.armv7l-linux-musleabihf.tar.gz/sha512/e088f806f9fad70b2e6ea28a39ffeb083b4c1c215b1cac73e848a06cb19efcf3ff100e4d401ec2a0ed225ecba6dad115f5d245a612a39c90334a753bc79947e6
+OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/md5/4a2eb776682551a25bf1d27e9d8b645e
+OpenLibm.v0.8.1+0.i686-linux-gnu.tar.gz/sha512/716808c4a2a8c06439072a39db1f4b93a171a2f42e9677cb7f3eba04f204bc181f96c915ad8c74141952eb783cd82ecf3804ea60d830e3f5d9b88bfb4924223d
+OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/md5/1acd5b790b7b8d1c40c6b094b99fcdb6
+OpenLibm.v0.8.1+0.i686-linux-musl.tar.gz/sha512/22c28a5c5e9542ddfb23297523b56e0a964bc322d252d5681e763c28d4c876dd683d3456297e385f560ab4cf378e5848047aec6cc934850fd0a4df0ea6967b44
+OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/md5/8e974b3bafa9dfe1cdba1d31049d7e85
+OpenLibm.v0.8.1+0.i686-w64-mingw32.tar.gz/sha512/df9e5250dea575341ec1a40d94e3485701690542bc7dfede0504c04fdb7f3fd272d88debdd6546d8644563fb244373b5f4e214da1d0b0133db5b66cdafbf441f
+OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/md5/83f68736e911b7c700bf7a8c79cc48a8
+OpenLibm.v0.8.1+0.powerpc64le-linux-gnu.tar.gz/sha512/b879f81d0c02f457310efc08269a7012fe6ed479d33bf79830e48dafce03976425566c5a210ed4833e106848eda038ae531f0c956971b3139c60002a5b1c1325
+OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/md5/2cd7845dc3d5558cf77e8b6faac4a659
+OpenLibm.v0.8.1+0.x86_64-apple-darwin.tar.gz/sha512/f894c5b7541ebd4f652cb0028b2d367db6af13258d5a42722f19e0ac3a6bc438453e5c2bd17d8b491288c93796ba638c4a3a247f9d33abbac392a6db9169cbcb
+OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/md5/eda96ecab33dfb9a015375813c41d14a
+OpenLibm.v0.8.1+0.x86_64-linux-gnu.tar.gz/sha512/641d35dfde15b0868c4ede7d2210ac136e4ca7bf7a969623cbecd792eb09e60824601a6178dbc2c8e4d49523aa105956b5f63566b789003ec736164a8fe5df4b
+OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/md5/8d8e6ffa1ad9574bd8ead8b99754e122
+OpenLibm.v0.8.1+0.x86_64-linux-musl.tar.gz/sha512/a66102e69688fdda1c1a51ab07697db0e183b6def21dfed411cd6a92e6c23b22eacd6cccab16e43b86e318d967f81d43dc26bc778746186965bc68c65c7e87a0
+OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/md5/7634eb65eea08ab48a9224295071f058
+OpenLibm.v0.8.1+0.x86_64-unknown-freebsd.tar.gz/sha512/f2c0a6d5113de911aeb516308b1d0ea71a1d61f2ce54c367670a6567444e0030babd66546489e288891678aa6613f40fd0182261de3244f1aed1085c2a32e81c
+OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/md5/9de4a420caab2cb53ddf86eb0241885a
+OpenLibm.v0.8.1+0.x86_64-w64-mingw32.tar.gz/sha512/928675df75d6c6f1e1acc46a2bb554ef120d74671d23e8682adbe05732f668401eaebd98d534e54f7f34e1657d2e1b1d19e18b3822faa891cdf06afd0c3ee56b
+openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/md5/19408d70bf042a109e1c267a53740089
+openlibm-ae2d91698508701c83cab83714d42a1146dccf85.tar.gz/sha512/9597fdcbc4af8369e6eecc3f8e86f251661cc64d236578f3ee8a6b39e77a47951446e1a0fe1151513da153e7ed17bf39aa5a36c32153d0d0400232bed2839e22
diff --git a/deps/checksums/openlibm-5efed306d509905714e3c43fc3a43fb26f3df743.tar.gz/md5 b/deps/checksums/openlibm-5efed306d509905714e3c43fc3a43fb26f3df743.tar.gz/md5
deleted file mode 100644
index 8adcba9e40fde9..00000000000000
--- a/deps/checksums/openlibm-5efed306d509905714e3c43fc3a43fb26f3df743.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-7c2e3d84b66d95b43b8a34ccde2b18a2
diff --git a/deps/checksums/openlibm-5efed306d509905714e3c43fc3a43fb26f3df743.tar.gz/sha512 b/deps/checksums/openlibm-5efed306d509905714e3c43fc3a43fb26f3df743.tar.gz/sha512
deleted file mode 100644
index e65ffb172fd24d..00000000000000
--- a/deps/checksums/openlibm-5efed306d509905714e3c43fc3a43fb26f3df743.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-59464f33fd8b507c6d1a76a29beff71c6e242e1c1609feb6ca3011e0e19fded7ef53d617f2c54c70b9d0ab02d963f2c50f21cfba3988bf8846c3c9ddf41a6cbf
diff --git a/deps/checksums/openspecfun-39699a1c1824bf88410cabb8a7438af91ea98f4c.tar.gz/md5 b/deps/checksums/openspecfun-39699a1c1824bf88410cabb8a7438af91ea98f4c.tar.gz/md5
deleted file mode 100644
index c667bb83eeef04..00000000000000
--- a/deps/checksums/openspecfun-39699a1c1824bf88410cabb8a7438af91ea98f4c.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-00130a0879f691240df5f3ea25e6f0ba
diff --git a/deps/checksums/openspecfun-39699a1c1824bf88410cabb8a7438af91ea98f4c.tar.gz/sha512 b/deps/checksums/openspecfun-39699a1c1824bf88410cabb8a7438af91ea98f4c.tar.gz/sha512
deleted file mode 100644
index 6848380958cdb5..00000000000000
--- a/deps/checksums/openspecfun-39699a1c1824bf88410cabb8a7438af91ea98f4c.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8681fc2f31696de2a6850cb4318c14ad39d3514742b066cf16058da87966d08e0d197fbb5104229fa19b61819a12ae5c76ea0279749f4338923cd4741ba31132
diff --git a/deps/checksums/p7zip b/deps/checksums/p7zip
new file mode 100644
index 00000000000000..0df5ed96067b13
--- /dev/null
+++ b/deps/checksums/p7zip
@@ -0,0 +1,34 @@
+p7zip-16.2.0.tar.bz2/md5/a0128d661cfe7cc8c121e73519c54fbf
+p7zip-16.2.0.tar.bz2/sha512/d2c4d53817f96bb4c7683f42045198d4cd509cfc9c3e2cb85c8d9dc4ab6dfa7496449edeac4e300ecf986a9cbbc90bd8f8feef8156895d94617c04e507add55f
+p7zip.v16.2.1+1.aarch64-apple-darwin.tar.gz/md5/12485086522a08b62dfef503b61af007
+p7zip.v16.2.1+1.aarch64-apple-darwin.tar.gz/sha512/dc9d92b294a65c55d8742b33df0d905a8cd1e80500647b33537fd404167aaa43a01280bb19035a9e4da94dd49c6ee712a0fbf455b9661af487e1c35a9a09eda7
+p7zip.v16.2.1+1.aarch64-linux-gnu.tar.gz/md5/35a760ced992c9cd4c6085e40394299b
+p7zip.v16.2.1+1.aarch64-linux-gnu.tar.gz/sha512/da3123601db48cead255240e048e33de401de52cbddddbc1e109dd7b3b36645251008108c7545abaf09e0b2803198ac4067b00a3f0ff7fe31f65a5de4ce49710
+p7zip.v16.2.1+1.aarch64-linux-musl.tar.gz/md5/f795313bc73c9f635a63861126c838eb
+p7zip.v16.2.1+1.aarch64-linux-musl.tar.gz/sha512/886b0e9e2476915be2c7106a8fb4547e9326d50fad93d8002ca97f4e35a856fee43a2350b48404f112938da6fc19255cb2dfb31e3112c74551d4a3ccb44a7fbf
+p7zip.v16.2.1+1.armv6l-linux-gnueabihf.tar.gz/md5/644ed1b6a5d7bb16407cea5264ef45ce
+p7zip.v16.2.1+1.armv6l-linux-gnueabihf.tar.gz/sha512/3cbdb56faca44ac2a3ea4cba35b8913811a2d3602a689496228968fb17c23b191ab3e01b43f619526cd8ea0f33c5a4453d2b5cca7437026e54b2c164acb1e8ee
+p7zip.v16.2.1+1.armv6l-linux-musleabihf.tar.gz/md5/219fdda71c08848844b4630e613bf35d
+p7zip.v16.2.1+1.armv6l-linux-musleabihf.tar.gz/sha512/419297b14aa820f8f49d6add367fe3a7153be18546e41e9f8bf6bbddada7535301dd3ea524089981046fc739b8094cff9113fb2aeca2947e796a8e6b74414245
+p7zip.v16.2.1+1.armv7l-linux-gnueabihf.tar.gz/md5/919e6508e4b2adb82fa2493a805875e9
+p7zip.v16.2.1+1.armv7l-linux-gnueabihf.tar.gz/sha512/cf8f58ee590e23aa6fe348b639f2b052fbc0ed52ecf7ce1e370f7dc3255e47727ef65a109b14cd045d59201ef8a5b426eb05b167967ce95581a35df7a6b67400
+p7zip.v16.2.1+1.armv7l-linux-musleabihf.tar.gz/md5/8bfb81a9a4d31ac9f05b59c19490461e
+p7zip.v16.2.1+1.armv7l-linux-musleabihf.tar.gz/sha512/6b13c1971e7049613aefd4a2bad64d534ffc7293efb037b2da92e23754462fc3872169399f3a9fe34bc337b900ecc4fccc878e3e54067238b3f890c09f8e05f0
+p7zip.v16.2.1+1.i686-linux-gnu.tar.gz/md5/f62eefb6fb2724082933e95d706b232f
+p7zip.v16.2.1+1.i686-linux-gnu.tar.gz/sha512/43a669bb64e0318c16feade75ade6e4ac73e056fb33479268e217310fa469a8f535ace13b8ade45495d96d8a540e1c247dcdb8fd7044c8096693f3766f00224f
+p7zip.v16.2.1+1.i686-linux-musl.tar.gz/md5/8a80bbfcb8c4a05d6c56539640a7bfaf
+p7zip.v16.2.1+1.i686-linux-musl.tar.gz/sha512/38ce14788fbfd964fa446c98c89ecd3854c732f5529406d6d650d8f0ac4a657caeea8ae2985370f5cee129d974a4bafa8cd164fd1c11ae0cad5191e9640534f0
+p7zip.v16.2.1+1.i686-w64-mingw32.tar.gz/md5/d55077826cdfe69747efd4fd53b81e18
+p7zip.v16.2.1+1.i686-w64-mingw32.tar.gz/sha512/71ee03bbb9916eff2e7807ff25d1c1992c209506c4602f570095ee0cd12355ed4590d77dfd090085a109604c4cbad221154bfd55d5fd79bf35c76b3b43c67a25
+p7zip.v16.2.1+1.powerpc64le-linux-gnu.tar.gz/md5/16682edc596bc1f7d6311339644070fb
+p7zip.v16.2.1+1.powerpc64le-linux-gnu.tar.gz/sha512/09c3bfbae7c4ab2757fdee0dac4baf71f6fa7b99aab48c5260ed9481c5e7b05317f7a6d466c543ffe46318281011b61c5652fef33466c02a5b24b3c39d92137d
+p7zip.v16.2.1+1.x86_64-apple-darwin.tar.gz/md5/6d7873510fca444740ab2f4ae701ae3a
+p7zip.v16.2.1+1.x86_64-apple-darwin.tar.gz/sha512/e6fc0c669b62eb2e6f11d07e840ce44beb6c8981750ac4fb5d7401cf00916465f97f8b3a49c73777d893752a7df9bed8bf40068fe7339df88942a21aff4e9d2a
+p7zip.v16.2.1+1.x86_64-linux-gnu.tar.gz/md5/2cd2efe4d51967ac8acf24a6f2c80893
+p7zip.v16.2.1+1.x86_64-linux-gnu.tar.gz/sha512/a0fdf061b5d7da97134eee7fc9afb468d8bee01108843814432d318d2b5c6217772e62700a015d5be41010ecf7b613218ed9e8ea6e2da2a24d1e5c15a1734a59
+p7zip.v16.2.1+1.x86_64-linux-musl.tar.gz/md5/f5a312e21abd7f24100e91eefa875c7f
+p7zip.v16.2.1+1.x86_64-linux-musl.tar.gz/sha512/034b00d0685da5456b91f45c0b4196e0aa21436e67ecd7a09318a578a814491774ca5c2ce2c49f6b17e1665d9c8a896a0f2f6fca6d3260208ad8be44c1dce656
+p7zip.v16.2.1+1.x86_64-unknown-freebsd.tar.gz/md5/1e647ff7fd8bf2dfdcdd569c743e9c8c
+p7zip.v16.2.1+1.x86_64-unknown-freebsd.tar.gz/sha512/e868eb1bab65ff383177ed0e929ff0db084df1f4b144430098f25cb8df788696113fe466ecf756c4ca61439fa8eed8c8a3fc396aec2972bea6ec7b3b0be51baa
+p7zip.v16.2.1+1.x86_64-w64-mingw32.tar.gz/md5/70d58fe372550313b18437f58cd249e1
+p7zip.v16.2.1+1.x86_64-w64-mingw32.tar.gz/sha512/1908d3dfd218e33c8e85366e02d920e237111b5fdb8bf028d8f7a2029ec7292c465d4d0ee50f58ef186fa8c83bfe33ea98d0bacdbcbb9c345b71eeb038cbda89
diff --git a/deps/checksums/p7zip-16.2.0.tar.bz2/md5 b/deps/checksums/p7zip-16.2.0.tar.bz2/md5
deleted file mode 100644
index 145a4871ed5bef..00000000000000
--- a/deps/checksums/p7zip-16.2.0.tar.bz2/md5
+++ /dev/null
@@ -1 +0,0 @@
-a0128d661cfe7cc8c121e73519c54fbf
diff --git a/deps/checksums/p7zip-16.2.0.tar.bz2/sha512 b/deps/checksums/p7zip-16.2.0.tar.bz2/sha512
deleted file mode 100644
index 45e46036ed2881..00000000000000
--- a/deps/checksums/p7zip-16.2.0.tar.bz2/sha512
+++ /dev/null
@@ -1 +0,0 @@
-d2c4d53817f96bb4c7683f42045198d4cd509cfc9c3e2cb85c8d9dc4ab6dfa7496449edeac4e300ecf986a9cbbc90bd8f8feef8156895d94617c04e507add55f
diff --git a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-gnu.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-gnu.tar.gz/md5
deleted file mode 100644
index c1eef0e9a580a0..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-97011a65a2d0fafd74e81bc3445654dc
diff --git a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-gnu.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index a35cb6ac0aebaa..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9de77aab797e8c9a1785e121a1a33d0f007f58529b2107b181a22f1032f836e374a937214c0cabf72bb1f8ca32b70b71e3e1a12d895b301cdfe35ac3ba1fcb10
diff --git a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-musl.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-musl.tar.gz/md5
deleted file mode 100644
index 4d22d470841920..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-90d4ebc185099fd1425607bc18118df2
diff --git a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-musl.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-musl.tar.gz/sha512
deleted file mode 100644
index 0181b5880eeaab..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.aarch64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-aa2d6c285076de80e1f0b91860bdee7dfba7074f8abd5a95b8a9d66a8419b2a0ed5f835daeb3dc2a23c5ba33edb451710cc4aa8998005fc40536a5950c65d635
diff --git a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-gnueabihf.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-gnueabihf.tar.gz/md5
deleted file mode 100644
index 13277062a01984..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-gnueabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3e170698971fbe88c72933f1b5040588
diff --git a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-gnueabihf.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-gnueabihf.tar.gz/sha512
deleted file mode 100644
index 3e5eda93448b4f..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-gnueabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-5a1026a5540a40ec6b5b7cbf1660cca7b6b8b50693b7737ced333e137e7a881c7eb0f1d0f4b3f82d92c07b7fec754375caabd1399196bde731bc9505cd1c5ada
diff --git a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-musleabihf.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-musleabihf.tar.gz/md5
deleted file mode 100644
index 3067103e020b32..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-musleabihf.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-da71ac0e0a16b11fca6953b8dbaf921c
diff --git a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-musleabihf.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-musleabihf.tar.gz/sha512
deleted file mode 100644
index 10e3faa088db4d..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.armv7l-linux-musleabihf.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-7655a1501ea4fea55165945da394f8aad366b453d2da00f585eeebfb9cc2e421545f3bfabbc5eb7b9859d6470a9d652a0503f2bdbfcd72f94553e149b11ff92d
diff --git a/deps/checksums/p7zip.v16.2.0-1.i686-linux-gnu.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.i686-linux-gnu.tar.gz/md5
deleted file mode 100644
index a182ecf7e12afd..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.i686-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-951385b34aba01cdcef4e07e66942951
diff --git a/deps/checksums/p7zip.v16.2.0-1.i686-linux-gnu.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.i686-linux-gnu.tar.gz/sha512
deleted file mode 100644
index e5c1e743b7772f..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.i686-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2d655e6d3d5d26bed81817bfbcece8084bfcbf4fc5e491c6ebec72490ccca4db72cc1f962a3ef5ceab5c5abe2cc4e9b9e9524d25c158ed8af32b6aff4b722d00
diff --git a/deps/checksums/p7zip.v16.2.0-1.i686-linux-musl.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.i686-linux-musl.tar.gz/md5
deleted file mode 100644
index 0c593877e7c2e9..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.i686-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-4e005a19cef586bc407953a0d03caf6d
diff --git a/deps/checksums/p7zip.v16.2.0-1.i686-linux-musl.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.i686-linux-musl.tar.gz/sha512
deleted file mode 100644
index 187349851307fc..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.i686-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-6c55576e1f3907435fea1aa6db0c124dee155ab1dac6cdf72ebe1ad4380bcd585d0a8d755b86621a498c63a95523dbf56053076c3f7398e6c0411db0fc1099be
diff --git a/deps/checksums/p7zip.v16.2.0-1.i686-w64-mingw32.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.i686-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 6a2ac1958b6575..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.i686-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-a193ac18bf72645e8132c0303d285aab
diff --git a/deps/checksums/p7zip.v16.2.0-1.i686-w64-mingw32.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.i686-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index f0a4fa0367bd07..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.i686-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-eb82dd1b1cbacd76b2440a73b3c2a3167d5c8e88afddd27e0912f4d43d21040112a130f32efbe8af978dda5f698310e5253091c2b144b50384117e362d711256
diff --git a/deps/checksums/p7zip.v16.2.0-1.powerpc64le-linux-gnu.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.powerpc64le-linux-gnu.tar.gz/md5
deleted file mode 100644
index 000e1fd07de4ca..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.powerpc64le-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-33ee9c368767a7dc4ecc176b107a1344
diff --git a/deps/checksums/p7zip.v16.2.0-1.powerpc64le-linux-gnu.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.powerpc64le-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 6f90953f131c58..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.powerpc64le-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-2f801688be550705f10061e1be92c53a4388c90138159763ab63835730fb794cea54928dd3da72d203fcfc047ecf4c6b10644f914c849436058013e24381126d
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-apple-darwin14.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.x86_64-apple-darwin14.tar.gz/md5
deleted file mode 100644
index 043f55b0d8a2ce..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-apple-darwin14.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-dbcfff4fd3ee79040fcee45ad70e8115
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-apple-darwin14.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.x86_64-apple-darwin14.tar.gz/sha512
deleted file mode 100644
index e6c2ee6c7f69e2..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-apple-darwin14.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-66ca063b9b7ffb1e36db3fb71de7ef0a0b22782dabafa1b7f6ba89dd7931bcb4491920cf77970c3ebc2f14741ce21c3c28e7c8ae57ad14d6c2dc54676f853177
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-gnu.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-gnu.tar.gz/md5
deleted file mode 100644
index a94b25b50d1513..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-gnu.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-e4ba040e655fc4a82a16e41620171233
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-gnu.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-gnu.tar.gz/sha512
deleted file mode 100644
index 0bc4c5bda7af75..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-gnu.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-fdc322926de636060699daf87dd69003e092a10cc7125a0963a94fa588f0af9562dd5da2adcf6aa4fdf74e1c01ab46c25ff5e36c6f5665a1781a8b8240ac5624
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-musl.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-musl.tar.gz/md5
deleted file mode 100644
index 08266da1e0fe38..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-musl.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-14875b7aa0e3d6a6c8773dce64abdcb2
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-musl.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-musl.tar.gz/sha512
deleted file mode 100644
index cddd9823ff3054..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-linux-musl.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-9cbadcc3b52ec36c9885c04156ec6521d4959f5de78afc0ed867c308ad33994da4fa39ec63f455403587e8b21c6269f1c88c98980744628c4a37470693012e81
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5
deleted file mode 100644
index 28c34c3cccf35f..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-unknown-freebsd11.1.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-6fff391800713934b3f008906349ddb2
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512
deleted file mode 100644
index 42bf9716fa05dc..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-unknown-freebsd11.1.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-887feb7e197c7cdf3ec65a8fde1809c2c82500b6ddf8a114400ace9bbbd9cb22fc947d3397ae189b6c5d76b55fc4b98c39e02aec298edcdeca997115828af080
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-w64-mingw32.tar.gz/md5 b/deps/checksums/p7zip.v16.2.0-1.x86_64-w64-mingw32.tar.gz/md5
deleted file mode 100644
index 1e28dd2fb74778..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-w64-mingw32.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-1732e69ed7e9d5201637421c7dad93cd
diff --git a/deps/checksums/p7zip.v16.2.0-1.x86_64-w64-mingw32.tar.gz/sha512 b/deps/checksums/p7zip.v16.2.0-1.x86_64-w64-mingw32.tar.gz/sha512
deleted file mode 100644
index 7a98c0a979931c..00000000000000
--- a/deps/checksums/p7zip.v16.2.0-1.x86_64-w64-mingw32.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-8354d68e07e77d0f9679f5daa27a373013b6edefaafff070629ca260a001f1160850637563f16419f37a59fb397b3b273ca4293bc38a72371feff1ecd17c2fc1
diff --git a/deps/checksums/patchelf b/deps/checksums/patchelf
new file mode 100644
index 00000000000000..a7122c400749a2
--- /dev/null
+++ b/deps/checksums/patchelf
@@ -0,0 +1,2 @@
+patchelf-0.13.tar.bz2/md5/d387eee9325414be0b1a80c8fbd2745f
+patchelf-0.13.tar.bz2/sha512/43c3f99fe922e2f34d860389165bcc2b0f3f3317e124eb8443017f71b1f223d96a7c815dc81f51b14958b7dc316f75c4ab367ccc287cd99c82abe890b09a478d
diff --git a/deps/checksums/patchelf-0.9.tar.gz/md5 b/deps/checksums/patchelf-0.9.tar.gz/md5
deleted file mode 100644
index 085d1fff063f8c..00000000000000
--- a/deps/checksums/patchelf-0.9.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-3c265508526760f233620f35d79c79fc
diff --git a/deps/checksums/patchelf-0.9.tar.gz/sha512 b/deps/checksums/patchelf-0.9.tar.gz/sha512
deleted file mode 100644
index 87e57940d1f385..00000000000000
--- a/deps/checksums/patchelf-0.9.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-715db21156e6bd91cfa626f5201b32a6619e51532f5635ef52396da8193738ba66113485b61cc1e218b16737e66f72cc2e4bb3a7a33e73061ac2ef2c6330a299
diff --git a/deps/checksums/pcre b/deps/checksums/pcre
new file mode 100644
index 00000000000000..05a06f9844ddfc
--- /dev/null
+++ b/deps/checksums/pcre
@@ -0,0 +1,34 @@
+PCRE2.v10.36.0+2.aarch64-apple-darwin.tar.gz/md5/12ac3bee39df3a79f868f6463964953b
+PCRE2.v10.36.0+2.aarch64-apple-darwin.tar.gz/sha512/a1a1312931deb7f742f80886188babcf9c179ed3f156626fb23d92633fde896d1ee9b2d72cd99ae4a1f8048971b6d939e9b0b10c455d4eeec24b265968593486
+PCRE2.v10.36.0+2.aarch64-linux-gnu.tar.gz/md5/32240ccddee3040aeedcbe69ea52fcad
+PCRE2.v10.36.0+2.aarch64-linux-gnu.tar.gz/sha512/86fb9febd186fcaeec83d2ed336fb060d7e49c7b7efe1bd8a6d2d74023ddbcce04eed5cf0e5d15348313eb2b51cd6b27763c08f7b9cf4eaf9df22d88f9405ef8
+PCRE2.v10.36.0+2.aarch64-linux-musl.tar.gz/md5/06abf8210e597a8669fb371da73865ce
+PCRE2.v10.36.0+2.aarch64-linux-musl.tar.gz/sha512/063edaa92e36468a8cf70ca9e25d9004586400a5304c0e91b067788825cbf5354e0190cad951f163e318b65d0f3f915f1944d03de61a5627ead2ead2674d3279
+PCRE2.v10.36.0+2.armv6l-linux-gnueabihf.tar.gz/md5/70ca2acdd5b1524141f15d02d26c3b1c
+PCRE2.v10.36.0+2.armv6l-linux-gnueabihf.tar.gz/sha512/377fdc5fd8b771027ffe8c0871e1688f8d991caf930b26b397eae01504af2fad5bdfbe2b3af33f25cf4b5c7bfd73dc77b16b65882a7846803a00edc0968ccef2
+PCRE2.v10.36.0+2.armv6l-linux-musleabihf.tar.gz/md5/860180f0a15ad38fac20590fab177718
+PCRE2.v10.36.0+2.armv6l-linux-musleabihf.tar.gz/sha512/412e7b0355a7bcdecca4ff5f85a1c6af1eeb094a9f07c2e90de105a0e0e6acedcbca146b5c136509ef8b38666f645b0c06fc68676dd8b1b70e2c7af4b070eb3d
+PCRE2.v10.36.0+2.armv7l-linux-gnueabihf.tar.gz/md5/12fd561c00fc7fca14e577ed54525740
+PCRE2.v10.36.0+2.armv7l-linux-gnueabihf.tar.gz/sha512/e5655e5c3f96a3a95699be534acbd399bc29873fa1064f50c2d78c43ad8e85a1fbf9039bcb674a88ecdb9bf5b468f9ecdf9a79f0dce5d95996f99d6c700da79a
+PCRE2.v10.36.0+2.armv7l-linux-musleabihf.tar.gz/md5/97d5eab8806a1920e6fd30f82db1b754
+PCRE2.v10.36.0+2.armv7l-linux-musleabihf.tar.gz/sha512/827fc45049a4b3adb6de2ab0569e45dd5e8749c09c42e57c579d3d6350f0953f6ad4fae1ba71af7347c9271ffff805a0200b5c9418e7f1894a6bc17a4fe0071c
+PCRE2.v10.36.0+2.i686-linux-gnu.tar.gz/md5/d7c9fdbcf3055c4745ea93a9274e16d1
+PCRE2.v10.36.0+2.i686-linux-gnu.tar.gz/sha512/ac0edd5d5910e7948a65c2a5c9fb05d2a6beb3f9bd875ea87433b910444bcba617ac5bc215fa0f101cbd7c5556966de7593080674cfaf28fdc8784e2485cf71b
+PCRE2.v10.36.0+2.i686-linux-musl.tar.gz/md5/05ef7559eba68cecbad0f2c75c017640
+PCRE2.v10.36.0+2.i686-linux-musl.tar.gz/sha512/91603d596a1b70bc4a933f9151fc791e09a167e4ad2de442a7ff9c355a329353cc9fb3148cf75639eaef0de3cf4f71212525f1040b0eff63c5d884892814b7af
+PCRE2.v10.36.0+2.i686-w64-mingw32.tar.gz/md5/8015e6633bf0f4c359f85445d4a98a9a
+PCRE2.v10.36.0+2.i686-w64-mingw32.tar.gz/sha512/527183fcc473c8e3f04622701cf73a55c5df132713e8230cd0bfd484023da594a9e29f5745d384f1e1015b8efac96e88bd985b06af5901b0d3052f90af8d89d6
+PCRE2.v10.36.0+2.powerpc64le-linux-gnu.tar.gz/md5/2ece20fa11fdbae393fb85a41ee1e17d
+PCRE2.v10.36.0+2.powerpc64le-linux-gnu.tar.gz/sha512/e6fbc03efed53da43b3b15b31cc0fbd85aaf5cc65564392b8c7bc02695d3a32fe832880d547c37b3a508197a4d4023be0aef910cd36da69a54ee184880cc0438
+PCRE2.v10.36.0+2.x86_64-apple-darwin.tar.gz/md5/26c560dd16b460a1ac7c81807edbacc6
+PCRE2.v10.36.0+2.x86_64-apple-darwin.tar.gz/sha512/ce56bc399e204e4b437d3f398b4e68c33d9c55ec990126523f3be0b14571603eea3b3104e1909deb22eab3f5302da72fcc690d1a279cb85ef598c42a5ef9a8a9
+PCRE2.v10.36.0+2.x86_64-linux-gnu.tar.gz/md5/474dec882abefcb56febddc309ed4682
+PCRE2.v10.36.0+2.x86_64-linux-gnu.tar.gz/sha512/882898c2d6cab8cd5ecf1027388bd08ddd1fec2339b45388786f98c53518bf7ca56f9e2cccb4a5ede953cc85e6c1cc54a5a00f80ece4cbfdc17e5f6116a9976a
+PCRE2.v10.36.0+2.x86_64-linux-musl.tar.gz/md5/af6d90c071437c5529306a5bafe6f6aa
+PCRE2.v10.36.0+2.x86_64-linux-musl.tar.gz/sha512/92a16960d7514c829a5f372a40472c87c717d49e9694030ae0cb39106d6530f5bb169155a74a416bf340139f9dea231ddc2b7ae6e54fcb935f6a9bf672b5e0c1
+PCRE2.v10.36.0+2.x86_64-unknown-freebsd.tar.gz/md5/97410029c0b6ed5f7fb0d14e1f1215ea
+PCRE2.v10.36.0+2.x86_64-unknown-freebsd.tar.gz/sha512/229e910759da2959ddef83ca89e05a050c266b8e755c85dfce6a786658be541911c3b78a0fca7dfdee1b41fbbdccf57da75cf9fe45fd2821dba8d2aaeabfd538
+PCRE2.v10.36.0+2.x86_64-w64-mingw32.tar.gz/md5/39827564bca329768e0380bd79b869fe
+PCRE2.v10.36.0+2.x86_64-w64-mingw32.tar.gz/sha512/4579049b99fca3334d726b0ca1f07524d1643a758e375b5b02b8f294ba7d9c2a4130da1a1523de29033233a8848105b3cb660e15bb4a759593405d805ee99883
+pcre2-10.36.tar.bz2/md5/bd7e7421ff3fa2e2d5429229ecfad095
+pcre2-10.36.tar.bz2/sha512/fc2a920562c80c3d31cedd94028fab55314ae0fb168cac7178f286c344a11fc514939edc3b83b8e0b57c872db4e595fd5530fd1d4b8c779be629553e9ec965a3
diff --git a/deps/checksums/pcre2-10.31.tar.bz2/md5 b/deps/checksums/pcre2-10.31.tar.bz2/md5
deleted file mode 100644
index 83b99432858e6d..00000000000000
--- a/deps/checksums/pcre2-10.31.tar.bz2/md5
+++ /dev/null
@@ -1 +0,0 @@
-e0b91c891a3c49050f7fd15de33d0ba4
diff --git a/deps/checksums/pcre2-10.31.tar.bz2/sha512 b/deps/checksums/pcre2-10.31.tar.bz2/sha512
deleted file mode 100644
index a51f234afc360a..00000000000000
--- a/deps/checksums/pcre2-10.31.tar.bz2/sha512
+++ /dev/null
@@ -1 +0,0 @@
-44d7db2513d9415dcdf6541366fea585e016f572f3e4379f6e959a38114b2337851092049ab4a1576ae8f19b9de413edbcfa62f434c77fc8470747ee5413e967
diff --git a/deps/checksums/suitesparse b/deps/checksums/suitesparse
new file mode 100644
index 00000000000000..c4d7a7bd7b70c8
--- /dev/null
+++ b/deps/checksums/suitesparse
@@ -0,0 +1,36 @@
+SuiteSparse-5.10.1.tar.gz/md5/68bb912f3cf3d2b01f30ebafef690302
+SuiteSparse-5.10.1.tar.gz/sha512/8f85c6d63b76cba95707dfa732c51200df7794cb4c2599dbd92100475747b8d02b05089a47096e85c60b89bc852a8e768e0670f24902a82d29494a80ccf2bb5f
+SuiteSparse-f63732c1c6adecb277d8f2981cc8c1883c321bcc.tar.gz/md5/baeb73b8ac38dd04174ed04fa1ea8cef
+SuiteSparse-f63732c1c6adecb277d8f2981cc8c1883c321bcc.tar.gz/sha512/a95e6ebafe948f419a65a9630b01cda380f3ce19499afe57e212a75dd43aa7a09ddd038e90d1215ae55566a676e392e696565d2d7a96853ec4fca7f73762b268
+SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/md5/b9392f8e71c0c40d37489e7b2071c5ad
+SuiteSparse.v5.10.1+0.aarch64-apple-darwin.tar.gz/sha512/109d67cb009e3b2931b94d63cbdaaee29d60dc190b731ebe3737181cd48d913b8a1333043c67be8179c73e4d3ae32ed1361ab4e34312c0f42e4b29f8a7afda3e
+SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/md5/1b2651ede4a74cd57f65505a65093314
+SuiteSparse.v5.10.1+0.aarch64-linux-gnu.tar.gz/sha512/753f986a749d139f9a6baedac059d8ed8efdd716ed28eacdbf00e6ebe863b4e17467f01a9693dcb39571d38b4b5c4c1375dbb790b88a7e704116e3fe83f7ff3e
+SuiteSparse.v5.10.1+0.aarch64-linux-musl.tar.gz/md5/051ff9bbbc95c57d58563df8a2c8eedd
+SuiteSparse.v5.10.1+0.aarch64-linux-musl.tar.gz/sha512/855979ed8d6290c529d9c9e82944fb15c88f9d9d8da7db1fa2fc34efb0ed985fc6554312882107f26956f2a18ae985918909cd834e068b874906c21a0f53b6c9
+SuiteSparse.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/md5/dbc5fb4844077084663612af26e180ce
+SuiteSparse.v5.10.1+0.armv6l-linux-gnueabihf.tar.gz/sha512/b906f7275ab58006acd52927e7e04c79eec59b5f28e9d7e5d5b8556c0eedd54cfff87e494373702c205afa2384ee6b0f2bb5e811fd440b1b50d5c9eee1b47b99
+SuiteSparse.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/md5/7770d256e76d5ce1484c3781508cc3ed
+SuiteSparse.v5.10.1+0.armv6l-linux-musleabihf.tar.gz/sha512/4f1d46cc8da5a7eff665b4bb96f9e21319f39231f98a6164d8c3d654d5b6f93c3e4477f55a39a80b7f8125a78d690cc5a1cc58f29143ba4c109a4182d7fa2110
+SuiteSparse.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/md5/ee1fa978bcfb264842749f915bbefd77
+SuiteSparse.v5.10.1+0.armv7l-linux-gnueabihf.tar.gz/sha512/9592a42f6474fd89eea1144f62ecc2a23796ad251173a9c36ccbc9bc18dd88687ce49f51528974f56b5652e2ab15f0aa41634513f2cc0b3c54259de3b68350bd
+SuiteSparse.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/md5/30f708421b92158c7741c82576e9047b
+SuiteSparse.v5.10.1+0.armv7l-linux-musleabihf.tar.gz/sha512/d8793d48757dbb62aa7a21c215b6d6e63a26ce4ba740f1f7f42a3e485ad3d9628744f021ad9cc96e29c8c88bfb2f02ea92865c26b971ca739d3c05c7f28875d9
+SuiteSparse.v5.10.1+0.i686-linux-gnu.tar.gz/md5/9018b6168b9a687bab0c9a9cbf45afba
+SuiteSparse.v5.10.1+0.i686-linux-gnu.tar.gz/sha512/308a92f441af6855517c40c6871b4935251677c05cc082c21fd1249e0137b635fa524f60cad61c7524026301a6de7ffea0ad1f4b9a4d9d6e3ced3f332a6719d4
+SuiteSparse.v5.10.1+0.i686-linux-musl.tar.gz/md5/99143f8d6de4f071ffa19942252b6dec
+SuiteSparse.v5.10.1+0.i686-linux-musl.tar.gz/sha512/9fb719fffea03296dfac8bc221bafc3ed8f7791749eca6c4b00265994de1be5d242e7e5184693603c745b39c4538feb11ab283204e0e33df2745f904cf0c7252
+SuiteSparse.v5.10.1+0.i686-w64-mingw32.tar.gz/md5/d049c943fbda2c8380dea33e16569275
+SuiteSparse.v5.10.1+0.i686-w64-mingw32.tar.gz/sha512/174768464432b991ecff88d5e5126caca83672fb5173115de59bc2387ef8aa75a56d3e84957fce625fabaf50ba462549f2ea828aea7258be7513835b7fea2e31
+SuiteSparse.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/md5/f01f7e134f8ee77849f3a46e773c1ff2
+SuiteSparse.v5.10.1+0.powerpc64le-linux-gnu.tar.gz/sha512/dc0339f2b35f05d49fbd1dcf1822c774a07af122fabc8e00eb7435dc53fcf82b3c1ec24e2bb41b1a58d3f8ab8903830eb7ece19dc6fce3f5e73d90a3dc3c4194
+SuiteSparse.v5.10.1+0.x86_64-apple-darwin.tar.gz/md5/02975a8670660c5e79eab0a70b051a0b
+SuiteSparse.v5.10.1+0.x86_64-apple-darwin.tar.gz/sha512/e55685ed7a63318c5baa326795503f13f031e0a617c045c972d5c89252ab51e7325e2b0425ca10dfbd59e79c5b4200545f5a4944fddd376e7610b6ebf74ded14
+SuiteSparse.v5.10.1+0.x86_64-linux-gnu.tar.gz/md5/6c111d315fb25c529710722bd5ae6af0
+SuiteSparse.v5.10.1+0.x86_64-linux-gnu.tar.gz/sha512/c971aed91bd695a0f7f735f58ddcb075d32b9522a8a50a30ad383ba5ce2c8e572fec97644e6cb85745206f4e5da72d7865d9a9724eb63ce3c04e90a4eedc90c9
+SuiteSparse.v5.10.1+0.x86_64-linux-musl.tar.gz/md5/7c98daf0edfad31764c3078e6351b521
+SuiteSparse.v5.10.1+0.x86_64-linux-musl.tar.gz/sha512/2c4b3cae1bd8d1ce62dae6aeca3ffbf90c26a1b01c0da4fb7761d6fe4293b8fad0b6fbfd5f930cefe6ccaef7546a482022ff2f50dc59ecf17c5c0dfc6a5961f5
+SuiteSparse.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/md5/aeca88a7bc3f9d239c61084996ce9182
+SuiteSparse.v5.10.1+0.x86_64-unknown-freebsd.tar.gz/sha512/0bee1ee07c3883fe28dd322c40195be9adb757d6dab3eb1730d7b0ff65dd4517520047696ccdda4ca618e671d898cdb45b787094594e142cb4b176549a74200b
+SuiteSparse.v5.10.1+0.x86_64-w64-mingw32.tar.gz/md5/63e449554eee134757e3d50ca8b5f47d
+SuiteSparse.v5.10.1+0.x86_64-w64-mingw32.tar.gz/sha512/95b58df4fe7520e2b526f9e3b199253909992789cd24ecca814ddb9a0c0bb37ff93c1de40239e5295a8503613cdb2431a87f0a70a3d657d94d4661f1778797f2
diff --git a/deps/checksums/unwind b/deps/checksums/unwind
new file mode 100644
index 00000000000000..6703b0c2648fc1
--- /dev/null
+++ b/deps/checksums/unwind
@@ -0,0 +1,26 @@
+LibUnwind.v1.5.0+1.aarch64-linux-gnu.tar.gz/md5/4f27fbe5a0e0897d75e3690e2f24c10b
+LibUnwind.v1.5.0+1.aarch64-linux-gnu.tar.gz/sha512/fad4ac07121823859bf6af322c0d3b52b353b1e7b2a12611dc14bfc18663fc4278a4eab61653b306bf07abfff8dc92ef07d55b24117d8ccc5a8662139b5301a1
+LibUnwind.v1.5.0+1.aarch64-linux-musl.tar.gz/md5/730455d3e334b61e9232f978a5ba9841
+LibUnwind.v1.5.0+1.aarch64-linux-musl.tar.gz/sha512/64a04b6d362774c5dc9534a49b58ea676cb514fc10ce0747cd79d5319841e9b0695701c631a2598499d2fa888d36a89f0560f7910d01fa4c7e2fc223a2143a8d
+LibUnwind.v1.5.0+1.armv6l-linux-gnueabihf.tar.gz/md5/e335c0eb5fd97e870400f472ed163722
+LibUnwind.v1.5.0+1.armv6l-linux-gnueabihf.tar.gz/sha512/4211fa7a4a08631a335a327cda511272527ff590507819dc9ee89ec1db7a7603c1ce9dcfcb22950acb4246fef297493eccd5260fb76d9929c84cc9c755381849
+LibUnwind.v1.5.0+1.armv6l-linux-musleabihf.tar.gz/md5/d8b094c08d496b45cf5e4f6f964caa43
+LibUnwind.v1.5.0+1.armv6l-linux-musleabihf.tar.gz/sha512/0fe159785b8f35ae5b8301754ed244f687156a9565fe2d6d6c72e8b4e0c04c26183079b69093ab4fec9de4bd8b69d248a1569d8fc539ef04787ed09d36e41bdd
+LibUnwind.v1.5.0+1.armv7l-linux-gnueabihf.tar.gz/md5/55289aa21e11f8fa3867dd650f863b2d
+LibUnwind.v1.5.0+1.armv7l-linux-gnueabihf.tar.gz/sha512/e669616bc1be3b34552b9927279e0b43df1e5ab0e72659a9e128d894df6b46d10f7d72b30eb5ad3355f01776a5f5250195633b846747fdbaefd2ac69915157fc
+LibUnwind.v1.5.0+1.armv7l-linux-musleabihf.tar.gz/md5/5a99cca618b56961b7108c618f8704c4
+LibUnwind.v1.5.0+1.armv7l-linux-musleabihf.tar.gz/sha512/7465262f611ff347fa57a910019e4a243451d801edf1fd9bb19a5f2ab9e9b84885da26781af18c1405347918e74b21b4f0a308d938cd3198b6260e0df8b5bc6b
+LibUnwind.v1.5.0+1.i686-linux-gnu.tar.gz/md5/bd52e05f513c8b86d8b71a9f833bde57
+LibUnwind.v1.5.0+1.i686-linux-gnu.tar.gz/sha512/750a082730c35b11cc1745a05e140bd83fd7511560ac90e15b472383b60b4641285b959a1a0897e2c6cb6bbf3857497ef7164642bfc79660c9c681cda189f530
+LibUnwind.v1.5.0+1.i686-linux-musl.tar.gz/md5/41482181efe7218a6aae785b76ad4d62
+LibUnwind.v1.5.0+1.i686-linux-musl.tar.gz/sha512/e1212ecc9efe16fc7401b1abafd3ab55c435a868e9a5408a4d9ca0039c4c422fb635314ae8d69913d4699a52ae070dc12db9cbb95d18f6e4fa41dd5047b22049
+LibUnwind.v1.5.0+1.powerpc64le-linux-gnu.tar.gz/md5/fa9dafea4ad00266188a310294b9342e
+LibUnwind.v1.5.0+1.powerpc64le-linux-gnu.tar.gz/sha512/d90370601804b477f588fbeba549a197150fc080ccee112b0e93a00393d03b7908a5f8ceed1a33c6ea8860c0f88c0d1e05a676336c948897f518d6130c480f5e
+LibUnwind.v1.5.0+1.x86_64-linux-gnu.tar.gz/md5/57b35a4b10407daf0e06c32acb942c0f
+LibUnwind.v1.5.0+1.x86_64-linux-gnu.tar.gz/sha512/0ab66f46a0b947c29d9ac76b1b10591435a9098e1a93b99eb64444c9fabd97439764a0fd1483ee5e26c2f617ca97d3929184bcbb9f1f2778ad109a9fb07d2daa
+LibUnwind.v1.5.0+1.x86_64-linux-musl.tar.gz/md5/7116a5228632a6f96bfc979d72ad530d
+LibUnwind.v1.5.0+1.x86_64-linux-musl.tar.gz/sha512/20c18da77864985ba8bb0a5857c23b807730de354d2fd0504b627e53a2d8763a72c2ebb5ac8bb7615f4a66a27937c12e89619489893b4ee17c3fea8acd12c704
+LibUnwind.v1.5.0+1.x86_64-unknown-freebsd.tar.gz/md5/79bb8128f52068faf1cb8e82f39272dc
+LibUnwind.v1.5.0+1.x86_64-unknown-freebsd.tar.gz/sha512/c60a09be8d945d7309e219e830f7cebcb11b7924b9d98ea4b0a93730ed4c2eda846bd0f043cdb225c86cc576d5708fd4f4fbe1324c7b71e47a02ae1d42c7e47f
+libunwind-1.5.0.tar.gz/md5/c6923dda0675f6a4ef21426164dc8b6a
+libunwind-1.5.0.tar.gz/sha512/1df20ca7a8cee2f2e61294fa9b677e88fec52e9d5a329f88d05c2671c69fa462f6c18808c97ca9ff664ef57292537a844f00b18d142b1938c9da701ca95a4bab
diff --git a/deps/checksums/utf8proc b/deps/checksums/utf8proc
new file mode 100644
index 00000000000000..6c2b22983ec3db
--- /dev/null
+++ b/deps/checksums/utf8proc
@@ -0,0 +1,2 @@
+utf8proc-8ca6144c85c165987cb1c5d8395c7314e13d4cd7.tar.gz/md5/af7d2e685f46ff6317fc4ab276bfade7
+utf8proc-8ca6144c85c165987cb1c5d8395c7314e13d4cd7.tar.gz/sha512/0b1c839457755db6679057c99a7872e72e3f17d8535e1e173749e139050bcf10f2e9a9b9fadccabde644ffcc865cfb9396429fc31e5a5a383f95856a01ea98a2
diff --git a/deps/checksums/utf8proc-0890a538bf8238cded9be0c81171f57e43f2c755.tar.gz/md5 b/deps/checksums/utf8proc-0890a538bf8238cded9be0c81171f57e43f2c755.tar.gz/md5
deleted file mode 100644
index d72cedb24a2ea4..00000000000000
--- a/deps/checksums/utf8proc-0890a538bf8238cded9be0c81171f57e43f2c755.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-2e1fd70d1580d6a950bd30118c3cd1df
diff --git a/deps/checksums/utf8proc-0890a538bf8238cded9be0c81171f57e43f2c755.tar.gz/sha512 b/deps/checksums/utf8proc-0890a538bf8238cded9be0c81171f57e43f2c755.tar.gz/sha512
deleted file mode 100644
index 07849bb64c5fe8..00000000000000
--- a/deps/checksums/utf8proc-0890a538bf8238cded9be0c81171f57e43f2c755.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-95d6dba2bbdfc5003f6d21fd02c926c7db9661895010041aacee55fa11dc2524bcffaa90965e2a9a8f2a9e49e93ec7572d6a4de6d5d9f9e075b787537864ea20
diff --git a/deps/checksums/x86_64-4.9.2-release-win32-seh-rt_v4-rev3.7z/md5 b/deps/checksums/x86_64-4.9.2-release-win32-seh-rt_v4-rev3.7z/md5
deleted file mode 100644
index c574a25b871124..00000000000000
--- a/deps/checksums/x86_64-4.9.2-release-win32-seh-rt_v4-rev3.7z/md5
+++ /dev/null
@@ -1 +0,0 @@
-b138576c92c425e8fa171a9d98e13b86
diff --git a/deps/checksums/x86_64-4.9.2-release-win32-seh-rt_v4-rev3.7z/sha512 b/deps/checksums/x86_64-4.9.2-release-win32-seh-rt_v4-rev3.7z/sha512
deleted file mode 100644
index ed37b9bd96e517..00000000000000
--- a/deps/checksums/x86_64-4.9.2-release-win32-seh-rt_v4-rev3.7z/sha512
+++ /dev/null
@@ -1 +0,0 @@
-50dcb3aaab3e53d5ff4561079272daf473bacc8d7a2a3720e7c594ae6e402123cb3cc6f3eabaf67df8d94fa5c01bb3e45b5bb1bc7cbb87cd2beb6b5664e02072
diff --git a/deps/checksums/zlib b/deps/checksums/zlib
new file mode 100644
index 00000000000000..d524a3f588a184
--- /dev/null
+++ b/deps/checksums/zlib
@@ -0,0 +1,34 @@
+Zlib.v1.2.12+3.aarch64-apple-darwin.tar.gz/md5/2258883a6412fbdac0b807afd133834f
+Zlib.v1.2.12+3.aarch64-apple-darwin.tar.gz/sha512/6e82b57646dfe2b86978d51cb4401d565d00d6bdcfabe09ceb888ad8979bd1398fd9ea7652542f149d88c120110f6c3baa919616f01410e9238a5199f50f5dda
+Zlib.v1.2.12+3.aarch64-linux-gnu.tar.gz/md5/663aa0d0791b92464e4822a130ac7fa9
+Zlib.v1.2.12+3.aarch64-linux-gnu.tar.gz/sha512/e50f00d92600a78b2f540e0e8e1dce435d0d0499ea80ce3c3cd0e11c8e3b5b1a97eadca9ac863f597cee369e80bcd50ec1c0a0e0f1a87bb0ff94bbaf453dea2d
+Zlib.v1.2.12+3.aarch64-linux-musl.tar.gz/md5/471179a2364d59abb6426b378ea4e195
+Zlib.v1.2.12+3.aarch64-linux-musl.tar.gz/sha512/35208e4be5966343ecb2b78471a3e1a947489f83c828b562db3508506dd0493eae3318c7eb3a6b599e911416795023193df862fbb6fcc7389d44710dc30f16a8
+Zlib.v1.2.12+3.armv6l-linux-gnueabihf.tar.gz/md5/53601c0201dadc8c9ff038167d5c4277
+Zlib.v1.2.12+3.armv6l-linux-gnueabihf.tar.gz/sha512/19744283bb412a656b934347cb7a1d121fbaf7e5f9b1aac373ddf2466567b731817a2e72e3a4d993ca7e5b5eb1fd9bb9c24d0126778367b28bdb94721649298b
+Zlib.v1.2.12+3.armv6l-linux-musleabihf.tar.gz/md5/f7c923955fc600785aae455807e63c8b
+Zlib.v1.2.12+3.armv6l-linux-musleabihf.tar.gz/sha512/623cd1758465c9e40b0dad93981ae93097a03f4aa67487b7e1c7240be2d780d86f35f8db96743c35bbb329d572741b58e73735a2b1cfb9e18e77f4dbcc714063
+Zlib.v1.2.12+3.armv7l-linux-gnueabihf.tar.gz/md5/5ce0fe42f67e09de047626424d61bc82
+Zlib.v1.2.12+3.armv7l-linux-gnueabihf.tar.gz/sha512/322e32d6fe6cd7a3334f5146f8980d4f1fc85b9a1c60271659ba8b4bbfdec314f8d9e8c6c0719248f5dd18e3daefd946811a3dcc74fa3ae5505d6dd653e65309
+Zlib.v1.2.12+3.armv7l-linux-musleabihf.tar.gz/md5/5115c374df90393cb895dd45c77275c4
+Zlib.v1.2.12+3.armv7l-linux-musleabihf.tar.gz/sha512/b04b4f42220833b99923a3ff349e4a05ad9f67c2b62d4848de37c833b287420b1dbec8a039c09d2a95ab6b68a62c6dcbacb4ba7cc069a4e90a11f8592719d2b8
+Zlib.v1.2.12+3.i686-linux-gnu.tar.gz/md5/37e0186f765fada0d76b9cd6f28c8d5d
+Zlib.v1.2.12+3.i686-linux-gnu.tar.gz/sha512/1239675bbf46c6243131585283b0fc23baa32e68226fbb2f0b7a833c8979e2df33590947daade533e37bafe21838a10198e9f9de99e094c21fba6b218b2fceab
+Zlib.v1.2.12+3.i686-linux-musl.tar.gz/md5/a0d92af6481929eed3a9fec3dbb2e622
+Zlib.v1.2.12+3.i686-linux-musl.tar.gz/sha512/b448590129ef251083b675c3d7494a90151a03297fd9883efb70bde032d106f16f2ec7c28508d9b4a0d0e5a0be0bdb4bcf0d1a9e4b2ade034a6d6cfc4916536e
+Zlib.v1.2.12+3.i686-w64-mingw32.tar.gz/md5/cc38d9ec5430e2ed7fed4792c7ac9551
+Zlib.v1.2.12+3.i686-w64-mingw32.tar.gz/sha512/85ad3babb42682d7b2b69513a30fd5e992a56436dcd7e2a44800bf1bc30d60d09aff5769cfaeefd4f5668e7973a0c2d4ad4d28559ea5f28c1c5419ed595eae57
+Zlib.v1.2.12+3.powerpc64le-linux-gnu.tar.gz/md5/8f57d8c31d2355c64a05db0412462d58
+Zlib.v1.2.12+3.powerpc64le-linux-gnu.tar.gz/sha512/9a0208c7a4dbf71b6f7e1ccaf05e3f3a422507cf0431b6482aab1a7b1bea41bd135320567f7dba6666f37c26f48cb3a627f1a1ebd39bf5c2d61148aadf62a986
+Zlib.v1.2.12+3.x86_64-apple-darwin.tar.gz/md5/5d15bb591d26d24aa9d6c9c8cf3df097
+Zlib.v1.2.12+3.x86_64-apple-darwin.tar.gz/sha512/7d8b0ec5a46a85cef3c5de451823c5cfa73b5b7c5ac98699065bbc5692af556195664908cd5c35184b7a9586fc0adab41fc0f76ee8599ca09a740cf49b9be113
+Zlib.v1.2.12+3.x86_64-linux-gnu.tar.gz/md5/25df63b9e6cbef14b0f0bf2a9eec5d14
+Zlib.v1.2.12+3.x86_64-linux-gnu.tar.gz/sha512/2660b762d816491e6b877020d8dd4a1cf1b171d6232dd5e0f47c6ee7b15504b006cc8f051434df778e0910130ef7456e30d531464470d3c4a2502e8f9fd19e76
+Zlib.v1.2.12+3.x86_64-linux-musl.tar.gz/md5/3f0c85d248711608141046d15b2da339
+Zlib.v1.2.12+3.x86_64-linux-musl.tar.gz/sha512/e4256b1b9520d5b0d97fa7e7ca6f6b9aa2583c6e5f14967392d54e48f27e242461f77e522743b229ab9b333eec5fd51f6d7b1559b566bd68ca0741b05b96df3c
+Zlib.v1.2.12+3.x86_64-unknown-freebsd.tar.gz/md5/e67dae1456645930c9e2b2fef6f805c8
+Zlib.v1.2.12+3.x86_64-unknown-freebsd.tar.gz/sha512/5915ec48ae80be829c36a71e2ce580d2d14b7a9824c8f279ad5c69fea62d9a03345b665f224b9dde0bc4b808af246f89ec4f932d47a14236bc3b7db7651e5bec
+Zlib.v1.2.12+3.x86_64-w64-mingw32.tar.gz/md5/89b152b3de0068c7c2580b87ad529ed3
+Zlib.v1.2.12+3.x86_64-w64-mingw32.tar.gz/sha512/df4b585f6501f45bc85e8d00c1b03c482d70d3491081246f9e9f9560f90c5f6057b1174a81e653f725209323cd743cf05d3e1aba1385afd26cb6f8c50186f818
+zlib-21767c654d31d2dccdde4330529775c6c5fd5389.tar.gz/md5/1fb2320f871561306bc87b3894727b45
+zlib-21767c654d31d2dccdde4330529775c6c5fd5389.tar.gz/sha512/2ad1e728f97a81b65d24fe5bef66658c94222d717a3486a0d11682b61563d7eaaa578f7457078881e8ed8c91b87aec11634d4a64021546e23a3ecabb3285197a
diff --git a/deps/checksums/zlib-cacf7f1d4e3d44d871b605da3b647f07d718623f.tar.gz/md5 b/deps/checksums/zlib-cacf7f1d4e3d44d871b605da3b647f07d718623f.tar.gz/md5
deleted file mode 100644
index 5cd6baea7f6272..00000000000000
--- a/deps/checksums/zlib-cacf7f1d4e3d44d871b605da3b647f07d718623f.tar.gz/md5
+++ /dev/null
@@ -1 +0,0 @@
-93d10d4dd040f14ae63417070d1346e8
diff --git a/deps/checksums/zlib-cacf7f1d4e3d44d871b605da3b647f07d718623f.tar.gz/sha512 b/deps/checksums/zlib-cacf7f1d4e3d44d871b605da3b647f07d718623f.tar.gz/sha512
deleted file mode 100644
index 365e160e96dfa6..00000000000000
--- a/deps/checksums/zlib-cacf7f1d4e3d44d871b605da3b647f07d718623f.tar.gz/sha512
+++ /dev/null
@@ -1 +0,0 @@
-a1e9c5a2963266a582192d0fe88c179f5239245f11c4df4427dda755ad77d31e1fcf045d7d3fe49141090f4ff8da13d9a2e8d8d317fe6460a5f3e9bdea29b883
diff --git a/deps/csl.mk b/deps/csl.mk
new file mode 100644
index 00000000000000..1940984fdc1999
--- /dev/null
+++ b/deps/csl.mk
@@ -0,0 +1,104 @@
+# Interrogate the fortran compiler (which is always GCC based) on where it is keeping its libraries
+STD_LIB_PATH := $(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^programs: =' | sed -e "s/^programs: =//")
+STD_LIB_PATH += :$(shell LANG=C $(FC) -print-search-dirs 2>/dev/null | grep '^libraries: =' | sed -e "s/^libraries: =//")
+ifneq (,$(findstring CYGWIN,$(BUILD_OS))) # the cygwin-mingw32 compiler lies about it search directory paths
+STD_LIB_PATH := $(shell echo '$(STD_LIB_PATH)' | sed -e "s!/lib/!/bin/!g")
+endif
+
+# Given a colon-separated list of paths in $(2), find the location of the library given in $(1)
+define pathsearch
+$(firstword $(wildcard $(addsuffix /$(1),$(subst :, ,$(2)))))
+endef
+
+# CSL bundles lots of system compiler libraries, and while it is quite bleeding-edge
+# as compared to what most distros ship, if someone tries to build an older branch,
+# the version of CSL that ships with that branch may become relatively old.  This is
+# not a problem for code that is built in BB, but when we build Julia with the system
+# compiler, that compiler uses the version of `libstdc++` that it is bundled with,
+# and we can get linker errors when trying to run that `julia` executable with the
+# `libstdc++` that comes from the (now old) BB-built CSL.
+#
+# To fix this, we take note when the system `libstdc++.so` is newer than whatever we
+# would get from CSL (by searching for a `GLIBCXX_3.4.X` symbol that does not exist
+# in our CSL, but would in a newer one), and default to `USE_BINARYBUILDER_CSL=0` in
+# this case.
+CSL_NEXT_GLIBCXX_VERSION=GLIBCXX_3\.4\.30|GLIBCXX_3\.5\.|GLIBCXX_4\.
+
+# First, check to see if BB is disabled on a global setting
+ifeq ($(USE_BINARYBUILDER),0)
+USE_BINARYBUILDER_CSL ?= 0
+else
+# If it's not, check to see if it's disabled by a USE_SYSTEM_xxx flag
+ifeq ($(USE_SYSTEM_CSL),1)
+USE_BINARYBUILDER_CSL ?= 0
+else
+# If it's not, see if we should disable it due to `libstdc++` being newer:
+LIBSTDCXX_PATH := $(eval $(call pathsearch,libstdc++,$(STD_LIB_PATH)))
+ifneq (,$(and $(LIBSTDCXX_PATH),$(shell objdump -p $(LIBSTDCXX_PATH) | grep $(CSL_NEXT_GLIBCXX_VERSION))))
+# Found `libstdc++`, grepped it for strings and found a `GLIBCXX` symbol
+# that is newer that whatever we have in CSL.  Default to not using BB.
+USE_BINARYBUILDER_CSL ?= 0
+else
+# Either we didn't find `libstdc++` (e.g. we're using `clang`), or we
+# found it and couldn't find the new symbol in it (it's older than what
+# BB provides, so let's use BB instead)
+USE_BINARYBUILDER_CSL ?= 1
+endif
+endif
+endif
+
+ifeq ($(USE_BINARYBUILDER_CSL),0)
+define copy_csl
+install-csl: | $$(build_shlibdir) $$(build_shlibdir)/$(1)
+$$(build_shlibdir)/$(1): | $$(build_shlibdir)
+	-@SRC_LIB=$$(call pathsearch,$(1),$$(STD_LIB_PATH)); \
+	[ -n "$$$${SRC_LIB}" ] && cp $$$${SRC_LIB} $$(build_shlibdir)
+endef
+
+# libgfortran has multiple names; we're just going to copy any version we can find
+# Since we're only looking in the location given by `$(FC)` this should only succeed for one.
+$(eval $(call copy_csl,$(call versioned_libname,libgfortran,3)))
+$(eval $(call copy_csl,$(call versioned_libname,libgfortran,4)))
+$(eval $(call copy_csl,$(call versioned_libname,libgfortran,5)))
+
+# These are all libraries that we should always have
+$(eval $(call copy_csl,$(call versioned_libname,libquadmath,0)))
+$(eval $(call copy_csl,$(call versioned_libname,libstdc++,6)))
+$(eval $(call copy_csl,$(call versioned_libname,libssp,0)))
+$(eval $(call copy_csl,$(call versioned_libname,libatomic,1)))
+$(eval $(call copy_csl,$(call versioned_libname,libgomp,1)))
+
+ifeq ($(OS),WINNT)
+# Windwos has special gcc_s names
+ifeq ($(ARCH),i686)
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s_sjlj,1)))
+else
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s_seh,1)))
+endif
+else
+$(eval $(call copy_csl,$(call versioned_libname,libgcc_s,1)))
+endif
+# winpthread is only Windows, pthread is only others
+ifeq ($(OS),WINNT)
+$(eval $(call copy_csl,$(call versioned_libname,libwinpthread,1)))
+else
+$(eval $(call copy_csl,$(call versioned_libname,libpthread,0)))
+endif
+
+get-csl:
+clean-csl:
+	-rm -f $(build_shlibdir)/libgfortran*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libquadmath*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libstdc++*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libc++*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libgcc_s*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libssp*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libpthread*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libwinpthread*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libatomic*$(SHLIB_EXT)*
+	-rm -f $(build_shlibdir)/libgomp*$(SHLIB_EXT)*
+distclean-csl: clean-csl
+
+else
+$(eval $(call bb-install,csl,CSL,true))
+endif
diff --git a/deps/curl.mk b/deps/curl.mk
index c7d8d617cb062e..12b331c57606fc 100644
--- a/deps/curl.mk
+++ b/deps/curl.mk
@@ -4,8 +4,12 @@ ifeq ($(USE_SYSTEM_LIBSSH2), 0)
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/libssh2
 endif
 
-ifeq ($(USE_SYSTEM_MBEDTLS), 0)
-$(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/mbedtls
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
+ifeq ($(USE_SYSTEM_NGHTTP2), 0)
+$(BUILDDIR)/curl-$(CURL_VER)/build-configured: | $(build_prefix)/manifest/nghttp2
 endif
 
 ifneq ($(USE_BINARYBUILDER_CURL),1)
@@ -23,19 +27,35 @@ $(SRCCACHE)/curl-$(CURL_VER).tar.bz2: | $(SRCCACHE)
 $(SRCCACHE)/curl-$(CURL_VER)/source-extracted: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) jxf $(notdir $<)
+	cp $(SRCDIR)/patches/config.sub $(SRCCACHE)/curl-$(CURL_VER)/config.sub
 	touch -c $(SRCCACHE)/curl-$(CURL_VER)/configure # old target
 	echo 1 > $@
 
+checksum-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
+	$(JLCHECKSUM) $<
+
+# We use different TLS libraries on different platforms.
+#   On Windows, we use schannel
+#   On MacOS, we use SecureTransport
+#   On Linux, we use mbedTLS
+ifeq ($(OS), WINNT)
+CURL_TLS_CONFIGURE_FLAGS := --with-schannel
+else ifeq ($(OS), Darwin)
+CURL_TLS_CONFIGURE_FLAGS := --with-secure-transport
+else
+CURL_TLS_CONFIGURE_FLAGS := --with-mbedtls=$(build_prefix)
+endif
+
 $(BUILDDIR)/curl-$(CURL_VER)/build-configured: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(dir $<)/configure $(CONFIGURE_COMMON) --includedir=$(build_includedir) \
-		--without-ssl --without-gnutls --without-gssapi --without-zlib \
-		--without-libidn --without-libidn2 --without-libmetalink --without-librtmp \
-		--without-nghttp2 --without-nss --without-polarssl \
-		--without-spnego --without-libpsl --disable-ares \
-		--disable-ldap --disable-ldaps --without-zsh-functions-dir \
-		--with-libssh2=$(build_prefix) --with-mbedtls=$(build_prefix) \
+		--without-ssl --without-gnutls --without-gssapi --disable-ares \
+		--without-libidn --without-libidn2 --without-librtmp \
+		--without-nss --without-polarssl --without-spnego --without-libpsl \
+		--disable-ldap --disable-ldaps --without-zsh-functions-dir --disable-static \
+		--with-libssh2=$(build_prefix) --with-zlib=$(build_prefix) --with-nghttp2=$(build_prefix) \
+		$(CURL_TLS_CONFIGURE_FLAGS) \
 		CFLAGS="$(CFLAGS) $(CURL_CFLAGS)" LDFLAGS="$(LDFLAGS) $(CURL_LDFLAGS)"
 	echo 1 > $@
 
@@ -55,11 +75,11 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libcurl.$$(SHLIB_EXT) $$(build_shlibdir)/libcurl.$$(SHLIB_EXT)))
 
 clean-curl:
-	-rm $(BUILDDIR)/curl-$(CURL_VER)/build-configured $(BUILDDIR)/curl-$(CURL_VER)/build-compiled
+	-rm -f $(BUILDDIR)/curl-$(CURL_VER)/build-configured $(BUILDDIR)/curl-$(CURL_VER)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/curl-$(CURL_VER) clean
 
 distclean-curl:
-	-rm -rf $(SRCCACHE)/curl-$(CURL_VER).tar.bz2 $(SRCCACHE)/curl-$(CURL_VER) $(BUILDDIR)/curl-$(CURL_VER)
+	rm -rf $(SRCCACHE)/curl-$(CURL_VER).tar.bz2 $(SRCCACHE)/curl-$(CURL_VER) $(BUILDDIR)/curl-$(CURL_VER)
 
 get-curl: $(SRCCACHE)/curl-$(CURL_VER).tar.bz2
 extract-curl: $(SRCCACHE)/curl-$(CURL_VER)/source-extracted
@@ -69,9 +89,5 @@ fastcheck-curl: #none
 check-curl: $(BUILDDIR)/curl-$(CURL_VER)/build-checked
 
 else # USE_BINARYBUILDER_CURL
-
-CURL_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/LibCURL_jll.jl/releases/download/LibCURL-v$(CURL_VER)+$(CURL_BB_REL)
-CURL_BB_NAME := LibCURL.v$(CURL_VER)
-
 $(eval $(call bb-install,curl,CURL,false))
 endif
diff --git a/deps/dsfmt.mk b/deps/dsfmt.mk
index c3e4a605cda614..2300d0d5929f45 100644
--- a/deps/dsfmt.mk
+++ b/deps/dsfmt.mk
@@ -2,30 +2,27 @@
 
 ifneq ($(USE_BINARYBUILDER_DSFMT),1)
 
-DSFMT_CFLAGS := $(CFLAGS) -DNDEBUG -DDSFMT_MEXP=19937 $(fPIC) -DDSFMT_DO_NOT_USE_OLD_NAMES
-ifneq ($(USEMSVC), 1)
+DSFMT_CFLAGS := $(CFLAGS) -DNDEBUG -DDSFMT_MEXP=19937 $(fPIC) -DDSFMT_DO_NOT_USE_OLD_NAMES -DDSFMT_SHLIB
 DSFMT_CFLAGS += -O3 -finline-functions -fomit-frame-pointer -fno-strict-aliasing \
-		--param max-inline-insns-single=1800 -Wmissing-prototypes -Wall  -std=c99 -shared
-else
-DSFMT_CFLAGS += -Wl,-dll,-def:../../libdSFMT.def
-endif
+		--param max-inline-insns-single=1800 -Wall  -std=c99 -shared
 ifeq ($(ARCH), x86_64)
 DSFMT_CFLAGS += -msse2 -DHAVE_SSE2
 endif
 
 $(SRCCACHE)/dsfmt-$(DSFMT_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/dSFMT-src-$(DSFMT_VER).tar.gz
+	$(JLDOWNLOAD) $@ https://github.com/MersenneTwister-Lab/dSFMT/archive/v$(DSFMT_VER).tar.gz
 	touch -c $@
 
 $(BUILDDIR)/dsfmt-$(DSFMT_VER)/source-extracted: $(SRCCACHE)/dsfmt-$(DSFMT_VER).tar.gz
 	$(JLCHECKSUM) $<
-	-rm -r $(dir $@)
+	rm -rf $(dir $@)
 	mkdir -p $(dir $@)
 	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
-	cd $(dir $@) && patch < $(SRCDIR)/patches/dSFMT.h.patch
-	cd $(dir $@) && patch < $(SRCDIR)/patches/dSFMT.c.patch
 	echo 1 > $@
 
+checksum-dsfmt: $(SRCCACHE)/dsfmt-$(DSFMT_VER).tar.gz
+	$(JLCHECKSUM) $<
+
 $(BUILDDIR)/dsfmt-$(DSFMT_VER)/build-compiled: $(BUILDDIR)/dsfmt-$(DSFMT_VER)/source-extracted
 	cd $(dir $<) && \
 	$(CC) $(CPPFLAGS) $(DSFMT_CFLAGS) $(LDFLAGS) dSFMT.c -o libdSFMT.$(SHLIB_EXT)
@@ -50,11 +47,11 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libdSFMT.$$(SHLIB_EXT) $$(build_shlibdir)/libdSFMT.$$(SHLIB_EXT)))
 
 clean-dsfmt:
-	-rm $(BUILDDIR)/dsfmt-$(DSFMT_VER)/build-compiled
-	-rm $(BUILDDIR)/dsfmt-$(DSFMT_VER)/libdSFMT.$(SHLIB_EXT)
+	-rm -f $(BUILDDIR)/dsfmt-$(DSFMT_VER)/build-compiled
+	-rm -f $(BUILDDIR)/dsfmt-$(DSFMT_VER)/libdSFMT.$(SHLIB_EXT)
 
 distclean-dsfmt:
-	-rm -rf $(SRCCACHE)/dsfmt*.tar.gz $(SRCCACHE)/dsfmt-$(DSFMT_VER) $(BUILDDIR)/dsfmt-$(DSFMT_VER)
+	rm -rf $(SRCCACHE)/dsfmt*.tar.gz $(SRCCACHE)/dsfmt-$(DSFMT_VER) $(BUILDDIR)/dsfmt-$(DSFMT_VER)
 
 get-dsfmt: $(SRCCACHE)/dsfmt-$(DSFMT_VER).tar.gz
 extract-dsfmt: $(BUILDDIR)/dsfmt-$(DSFMT_VER)/source-extracted
@@ -65,8 +62,6 @@ check-dsfmt: $(BUILDDIR)/dsfmt-$(DSFMT_VER)/build-checked
 
 else
 
-DSFMT_BB_URL_BASE := https://github.com/JuliaPackaging/Yggdrasil/releases/download/dSFMT-v$(DSFMT_VER)-$(DSFMT_BB_REL)
-DSFMT_BB_NAME := dSFMT.v$(DSFMT_VER)
 $(eval $(call bb-install,dsfmt,DSFMT,false))
 
 endif # USE_BINARYBUILDER_DSFMT
diff --git a/deps/gfortblas.c b/deps/gfortblas.c
index 4133a975373997..321fe124d7e878 100644
--- a/deps/gfortblas.c
+++ b/deps/gfortblas.c
@@ -119,4 +119,3 @@ __attribute__((destructor))
 static void fini(void) {
     SetBLASParamErrorProc(NULL); /* restore default handler */
 }
-
diff --git a/deps/gmp.mk b/deps/gmp.mk
index f4830d3b8fa8b4..b09b1bddf3c186 100644
--- a/deps/gmp.mk
+++ b/deps/gmp.mk
@@ -19,16 +19,36 @@ $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
 	touch -c $(SRCCACHE)/gmp-$(GMP_VER)/configure # old target
 	echo 1 > $@
 
-$(SRCCACHE)/gmp-$(GMP_VER)/build-patched: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/gmp-apple-arm64.patch
+checksum-gmp: $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2
+	$(JLCHECKSUM) $<
+
+# Apply fix to avoid using Apple ARM reserved register X18
+# Necessary for version 6.2.1, remove after next gmp release
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
+	cd $(dir $@) && \
+		patch -p1 < $(SRCDIR)/patches/gmp-HG-changeset.patch
+	echo 1 > $@
+
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied
+	cd $(dir $@) && \
+		patch -p1 < $(SRCDIR)/patches/gmp-exception.patch
+	echo 1 > $@
+
+$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied: $(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied
+	cd $(dir $@) && \
+		patch -p1 < $(SRCDIR)/patches/gmp_alloc_overflow_func.patch
+	echo 1 > $@
+
+$(SRCCACHE)/gmp-$(GMP_VER)/source-patched: \
+	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-HG-changeset.patch-applied \
+	$(SRCCACHE)/gmp-$(GMP_VER)/gmp-exception.patch-applied \
+	$(SRCCACHE)/gmp-$(GMP_VER)/gmp_alloc_overflow_func.patch-applied
 	echo 1 > $@
 
-$(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted
+$(BUILDDIR)/gmp-$(GMP_VER)/build-configured: $(SRCCACHE)/gmp-$(GMP_VER)/source-extracted $(SRCCACHE)/gmp-$(GMP_VER)/source-patched
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) F77= --enable-shared --disable-static $(GMP_CONFIGURE_OPTS)
+	$(dir $<)/configure $(CONFIGURE_COMMON) F77= --enable-cxx --enable-shared --disable-static $(GMP_CONFIGURE_OPTS)
 	echo 1 > $@
 
 $(BUILDDIR)/gmp-$(GMP_VER)/build-compiled: $(BUILDDIR)/gmp-$(GMP_VER)/build-configured
@@ -43,10 +63,7 @@ endif
 
 define GMP_INSTALL
 	mkdir -p $2/$(build_shlibdir) $2/$(build_includedir)
-ifeq ($(BUILD_OS),WINNT)
-	-mv $1/.libs/gmp.dll $1/.libs/libgmp.dll
-endif
-	$(INSTALL_M) $1/.libs/libgmp.*$(SHLIB_EXT)* $2/$(build_shlibdir)
+	$(INSTALL_M) $1/.libs/libgmp*$(SHLIB_EXT)* $2/$(build_shlibdir)
 	$(INSTALL_F) $1/gmp.h $2/$(build_includedir)
 endef
 $(eval $(call staged-install, \
@@ -55,11 +72,11 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libgmp.$$(SHLIB_EXT) $$(build_shlibdir)/libgmp.$$(SHLIB_EXT)))
 
 clean-gmp:
-	-rm $(BUILDDIR)/gmp-$(GMP_VER)/build-configured $(BUILDDIR)/gmp-$(GMP_VER)/build-compiled
+	-rm -f $(BUILDDIR)/gmp-$(GMP_VER)/build-configured $(BUILDDIR)/gmp-$(GMP_VER)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/gmp-$(GMP_VER) clean
 
 distclean-gmp:
-	-rm -rf $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2 \
+	rm -rf $(SRCCACHE)/gmp-$(GMP_VER).tar.bz2 \
 		$(SRCCACHE)/gmp-$(GMP_VER) \
 		$(BUILDDIR)/gmp-$(GMP_VER)
 
@@ -72,8 +89,5 @@ check-gmp: $(BUILDDIR)/gmp-$(GMP_VER)/build-checked
 
 else # USE_BINARYBUILDER_GMP
 
-GMP_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/GMP_jll.jl/releases/download/GMP-v$(GMP_VER)+$(GMP_BB_REL)
-GMP_BB_NAME := GMP.v$(GMP_VER)
-
 $(eval $(call bb-install,gmp,GMP,false,true))
 endif
diff --git a/deps/libgit2.mk b/deps/libgit2.mk
index 241b82cb3232a8..991703da0ea286 100644
--- a/deps/libgit2.mk
+++ b/deps/libgit2.mk
@@ -1,6 +1,7 @@
 ## libgit2
+ifneq ($(USE_BINARYBUILDER_LIBGIT2),1)
 
-LIBGIT2_GIT_URL := git://github.com/libgit2/libgit2.git
+LIBGIT2_GIT_URL := https://github.com/libgit2/libgit2.git
 LIBGIT2_TAR_URL = https://api.github.com/repos/libgit2/libgit2/tarball/$1
 $(eval $(call git-external,libgit2,LIBGIT2,CMakeLists.txt,,$(SRCCACHE)))
 
@@ -12,8 +13,6 @@ ifeq ($(USE_SYSTEM_MBEDTLS), 0)
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
 endif
 
-ifneq ($(USE_BINARYBUILDER_LIBGIT2),1)
-
 LIBGIT2_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=Release -DTHREADSAFE=ON -DUSE_BUNDLED_ZLIB=ON
 ifeq ($(OS),WINNT)
 LIBGIT2_OPTS += -DWIN32=ON -DMINGW=ON
@@ -41,8 +40,14 @@ $(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied: $(LIBGIT2_SRC_PATH)/so
 		patch -p1 -f < $(SRCDIR)/patches/libgit2-agent-nonfatal.patch
 	echo 1 > $@
 
+$(LIBGIT2_SRC_PATH)/libgit2-hostkey.patch-applied: $(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied
+	cd $(LIBGIT2_SRC_PATH) && \
+		patch -p1 -f < $(SRCDIR)/patches/libgit2-hostkey.patch
+	echo 1 > $@
+
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: \
-	$(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied
+	$(LIBGIT2_SRC_PATH)/libgit2-agent-nonfatal.patch-applied \
+	$(LIBGIT2_SRC_PATH)/libgit2-hostkey.patch-applied
 
 $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured: $(LIBGIT2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
@@ -75,8 +80,8 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libgit2.$$(SHLIB_EXT) $$(build_shlibdir)/libgit2.$$(SHLIB_EXT)))
 
 clean-libgit2:
-	-rm $(build_datarootdir)/julia/cert.pem
-	-rm $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-compiled
+	-rm -f $(build_datarootdir)/julia/cert.pem
+	-rm -f $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-configured $(BUILDDIR)/$(LIBGIT2_SRC_DIR)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/$(LIBGIT2_SRC_DIR) clean
 
 get-libgit2: $(LIBGIT2_SRC_FILE)
@@ -89,8 +94,6 @@ $(build_prefix)/manifest/libgit2: $(build_datarootdir)/julia/cert.pem # use libg
 
 else # USE_BINARYBUILDER_LIBGIT2
 
-LIBGIT2_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/LibGit2_jll.jl/releases/download/LibGit2-v$(LIBGIT2_VER)+$(LIBGIT2_BB_REL)
-LIBGIT2_BB_NAME := LibGit2.v$(LIBGIT2_VER)
 $(eval $(call bb-install,libgit2,LIBGIT2,false))
 
 # BB tarball doesn't create a manifest, so directly depend the `install` target
@@ -107,6 +110,8 @@ $(build_datarootdir)/julia/cert.pem: $(SRCCACHE)/cacert-$(MOZILLA_CACERT_VERSION
 	mkdir -p $(build_datarootdir)/julia
 	cp $< $@
 
+checksum-mozillacert: $(SRCCACHE)/cacert-$(MOZILLA_CACERT_VERSION).pem
+	$(JLCHECKSUM) $<
+
 # When "get"'ing libgit2, download the .pem
 get-libgit2: $(SRCCACHE)/cacert-$(MOZILLA_CACERT_VERSION).pem
-
diff --git a/deps/libgit2.version b/deps/libgit2.version
index a2ab705d4567eb..042f76bba673e0 100644
--- a/deps/libgit2.version
+++ b/deps/libgit2.version
@@ -1,2 +1,2 @@
-LIBGIT2_BRANCH=v1.0.1
-LIBGIT2_SHA1=0ced29612dacb67eefe0c562a5c1d3aab21cce96
+LIBGIT2_BRANCH=v1.3.0
+LIBGIT2_SHA1=b7bad55e4bb0a285b073ba5e02b01d3f522fc95d
diff --git a/deps/libssh2.mk b/deps/libssh2.mk
index 9913df48689ac9..e27a57a4078d1e 100644
--- a/deps/libssh2.mk
+++ b/deps/libssh2.mk
@@ -1,6 +1,6 @@
 ## libssh2
-
-LIBSSH2_GIT_URL := git://github.com/libssh2/libssh2.git
+ifneq ($(USE_BINARYBUILDER_LIBSSH2), 1)
+LIBSSH2_GIT_URL := https://github.com/libssh2/libssh2.git
 LIBSSH2_TAR_URL = https://api.github.com/repos/libssh2/libssh2/tarball/$1
 $(eval $(call git-external,libssh2,LIBSSH2,CMakeLists.txt,,$(SRCCACHE)))
 
@@ -8,7 +8,6 @@ ifeq ($(USE_SYSTEM_MBEDTLS), 0)
 $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: | $(build_prefix)/manifest/mbedtls
 endif
 
-ifneq ($(USE_BINARYBUILDER_LIBSSH2), 1)
 LIBSSH2_OPTS := $(CMAKE_COMMON) -DBUILD_SHARED_LIBS=ON -DBUILD_EXAMPLES=OFF \
 		-DCMAKE_BUILD_TYPE=Release
 
@@ -29,7 +28,18 @@ ifeq ($(LIBSSH2_ENABLE_TESTS), 0)
 LIBSSH2_OPTS += -DBUILD_TESTING=OFF
 endif
 
-$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(SRCCACHE)/$(LIBSSH2_SRC_DIR)/source-extracted
+LIBSSH2_SRC_PATH := $(SRCCACHE)/$(LIBSSH2_SRC_DIR)
+
+ # Apply patch to fix v1.10.0 CVE (https://github.com/libssh2/libssh2/issues/649), drop with v1.11
+$(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied: $(LIBSSH2_SRC_PATH)/source-extracted
+	cd $(LIBSSH2_SRC_PATH) && \
+		patch -p1 -f < $(SRCDIR)/patches/libssh2-userauth-check.patch
+	echo 1 > $@
+
+$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: \
+	$(LIBSSH2_SRC_PATH)/libssh2-userauth-check.patch-applied
+
+$(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured: $(LIBSSH2_SRC_PATH)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(CMAKE) $(dir $<) $(LIBSSH2_OPTS)
@@ -51,7 +61,7 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libssh2.$$(SHLIB_EXT) $$(build_shlibdir)/libssh2.$$(SHLIB_EXT)))
 
 clean-libssh2:
-	-rm $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled
+	-rm -f $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-configured $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/$(LIBSSH2_SRC_DIR) clean
 
 
@@ -64,8 +74,6 @@ check-libssh2: $(BUILDDIR)/$(LIBSSH2_SRC_DIR)/build-checked
 
 else # USE_BINARYBUILDER_LIBSSH2
 
-LIBSSH2_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/LibSSH2_jll.jl/releases/download/LibSSH2-v$(LIBSSH2_VER)+$(LIBSSH2_BB_REL)
-LIBSSH2_BB_NAME := LibSSH2.v$(LIBSSH2_VER)
-
 $(eval $(call bb-install,libssh2,LIBSSH2,false))
+
 endif
diff --git a/deps/libssh2.version b/deps/libssh2.version
index 09023514a2a517..1c4d5412c0c09c 100644
--- a/deps/libssh2.version
+++ b/deps/libssh2.version
@@ -1,2 +1,2 @@
-LIBSSH2_BRANCH=libssh2-1.9.0
-LIBSSH2_SHA1=42d37aa63129a1b2644bf6495198923534322d64
+LIBSSH2_BRANCH=libssh2-1.10.0
+LIBSSH2_SHA1=635caa90787220ac3773c1d5ba11f1236c22eae8
diff --git a/deps/libsuitesparse.mk b/deps/libsuitesparse.mk
new file mode 100644
index 00000000000000..1316a99a5b5106
--- /dev/null
+++ b/deps/libsuitesparse.mk
@@ -0,0 +1,112 @@
+## LIBSUITESPARSE ##
+
+ifeq ($(USE_BLAS64), 1)
+UMFPACK_CONFIG := -DLONGBLAS='long long'
+CHOLMOD_CONFIG := -DLONGBLAS='long long'
+SPQR_CONFIG := -DLONGBLAS='long long'
+UMFPACK_CONFIG += -DSUN64
+CHOLMOD_CONFIG += -DSUN64
+SPQR_CONFIG += -DSUN64
+endif
+
+# Disable linking to libmetis
+CHOLMOD_CONFIG += -DNPARTITION
+
+ifneq ($(USE_BINARYBUILDER_LIBSUITESPARSE), 1)
+
+LIBSUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
+LIBSUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu ldl umfpack rbio spqr)
+
+SUITE_SPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(LIBSUITESPARSE_VER)/lib"
+ifeq ($(OS), Darwin)
+SUITE_SPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
+endif
+LIBSUITESPARSE_MFLAGS := CC="$(CC)" CXX="$(CXX)" F77="$(FC)" AR="$(AR)" RANLIB="$(RANLIB)" BLAS="-L$(build_shlibdir) -lblastrampoline" LAPACK="-L$(build_shlibdir) -lblastrampoline" \
+	  LDFLAGS="$(SUITE_SPARSE_LIB)" CFOPENMP="" CUDA=no CUDA_PATH="" \
+	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" SPQR_CONFIG="$(SPQR_CONFIG)"
+ifeq ($(OS),WINNT)
+LIBSUITESPARSE_MFLAGS += UNAME=Windows
+else
+LIBSUITESPARSE_MFLAGS += UNAME=$(OS)
+endif
+
+$(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/DrTimothyAldenDavis/SuiteSparse/archive/v$(LIBSUITESPARSE_VER).tar.gz
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
+	$(JLCHECKSUM) $<
+	mkdir -p $(dir $@)
+	$(TAR) -C $(dir $@) --strip-components 1 -zxf $<
+	echo 1 > $@
+
+checksum-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
+	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
+	echo 1 > $@
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/blastrampoline
+
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
+	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(LIBSUITESPARSE_MFLAGS)
+	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
+	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
+		$(MAKE) -C $(dir $<)$${PROJ} library $(LIBSUITESPARSE_MFLAGS) || exit 1; \
+		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
+	done
+	echo 1 > $@
+
+ifeq ($(OS),WINNT)
+LIBSUITESPARSE_SHLIB_ENV:=PATH="$(abspath $(dir $<))lib:$(build_bindir):$(PATH)"
+else
+LIBSUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
+endif
+$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
+	for PROJ in $(LIBSUITESPARSE_PROJECTS); do \
+		$(LIBSUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(LIBSUITESPARSE_MFLAGS) || exit 1; \
+	done
+	echo 1 > $@
+
+UNINSTALL_suitesparse := $(LIBSUITESPARSE_VER) manual_suitesparse $(LIBSUITESPARSE_LIBS)
+
+$(build_prefix)/manifest/libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
+	for lib in $(LIBSUITESPARSE_LIBS); do \
+		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
+	done
+	#cp -a $(dir $<)lib/* $(build_shlibdir)
+	#cp -a $(dir $<)include/* $(build_includedir)
+	echo $(UNINSTALL_libsuitesparse) > $@
+
+clean-libsuitesparse: uninstall-libsuitesparse
+	-rm -f $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
+	-rm -fr $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/lib
+	-rm -fr $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/include
+	-$(MAKE) -C $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER) clean
+
+distclean-libsuitesparse:
+	rm -rf $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz \
+		$(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)
+
+get-libsuitesparse: $(SRCCACHE)/SuiteSparse-$(LIBSUITESPARSE_VER).tar.gz
+extract-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/source-extracted
+configure-libsuitesparse: extract-libsuitesparse
+compile-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-compiled
+fastcheck-libsuitesparse: #none
+check-libsuitesparse: $(BUILDDIR)/SuiteSparse-$(LIBSUITESPARSE_VER)/build-checked
+install-libsuitesparse: $(build_prefix)/manifest/libsuitesparse
+
+else # USE_BINARYBUILDER_LIBSUITESPARSE
+
+$(eval $(call bb-install,libsuitesparse,LIBSUITESPARSE,false))
+
+# libsuitesparse depends on blastrampoline
+compile-libsuitesparse: | $(build_prefix)/manifest/blastrampoline
+endif
+
+define manual_libsuitesparse
+uninstall-libsuitesparse:
+	-rm -f $(build_prefix)/manifest/libsuitesparse
+	-rm -f $(addprefix $(build_shlibdir)/lib,$3)
+endef
diff --git a/deps/libuv.mk b/deps/libuv.mk
index 09523d034d9fdc..cdcd12d8db4fa0 100644
--- a/deps/libuv.mk
+++ b/deps/libuv.mk
@@ -1,25 +1,13 @@
 ## LIBUV ##
-LIBUV_GIT_URL:=git://github.com/JuliaLang/libuv.git
+ifneq ($(USE_BINARYBUILDER_LIBUV),1)
+LIBUV_GIT_URL:=https://github.com/JuliaLang/libuv.git
 LIBUV_TAR_URL=https://api.github.com/repos/JuliaLang/libuv/tarball/$1
 $(eval $(call git-external,libuv,LIBUV,configure,,$(SRCCACHE)))
 
-ifneq ($(USE_BINARYBUILDER_LIBUV),1)
-
 UV_CFLAGS := -O2
-ifeq ($(USEMSVC), 1)
-UV_CFLAGS += -DBUILDING_UV_SHARED
-endif
-ifeq ($(USEICC), 1)
-UV_CFLAGS += -static-intel
-endif
 
 UV_FLAGS := LDFLAGS="$(LDFLAGS) $(CLDFLAGS) -v"
-ifneq ($(UV_CFLAGS),)
-UV_FLAGS += CFLAGS="$(CFLAGS) $(UV_CFLAGS)"
-endif
-ifeq ($(USEMSVC), 1)
-UV_FLAGS += --disable-shared
-endif
+UV_FLAGS += CFLAGS="$(CFLAGS) $(UV_CFLAGS) $(SANITIZE_OPTS)"
 
 ifneq ($(VERBOSE), 0)
 UV_MFLAGS += V=1
@@ -27,6 +15,9 @@ endif
 
 LIBUV_BUILDDIR := $(BUILDDIR)/$(LIBUV_SRC_DIR)
 
+ifneq ($(CLDFLAGS)$(SANITIZE_LDFLAGS),)
+$(LIBUV_BUILDDIR)/build-configured: LDFLAGS:=$(LDFLAGS) $(CLDFLAGS) $(SANITIZE_LDFLAGS)
+endif
 $(LIBUV_BUILDDIR)/build-configured: $(SRCCACHE)/$(LIBUV_SRC_DIR)/source-extracted
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/aclocal.m4 # touch a few files to prevent autogen from getting called
 	touch -c $(SRCCACHE)/$(LIBUV_SRC_DIR)/Makefile.in
@@ -52,7 +43,7 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libuv.$$(SHLIB_EXT) $$(build_shlibdir)/libuv.$$(SHLIB_EXT)))
 
 clean-libuv:
-	-rm -rf $(LIBUV_BUILDDIR)/build-configured $(LIBUV_BUILDDIR)/build-compiled
+	rm -rf $(LIBUV_BUILDDIR)/build-configured $(LIBUV_BUILDDIR)/build-compiled
 	-$(MAKE) -C $(LIBUV_BUILDDIR) clean
 
 
@@ -64,8 +55,7 @@ fastcheck-libuv: #none
 check-libuv: $(LIBUV_BUILDDIR)/build-checked
 
 else # USE_BINARYBUILDER_LIBUV
-LIBUV_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/LibUV_jll.jl/releases/download/LibUV-v2.0.0+$(LIBUV_BB_REL)
-LIBUV_BB_NAME := LibUV.v2.0.0
 
 $(eval $(call bb-install,libuv,LIBUV,false))
+
 endif
diff --git a/deps/libuv.version b/deps/libuv.version
index b410875f69ac95..9d7012fde55aae 100644
--- a/deps/libuv.version
+++ b/deps/libuv.version
@@ -1,2 +1,2 @@
-LIBUV_BRANCH=julia-uv2-1.29.1
-LIBUV_SHA1=1fcc6d66f9df74189c74d3d390f02202bb7db953
+LIBUV_BRANCH=julia-uv2-1.44.1
+LIBUV_SHA1=f872be3b6438b90b4690fe3ee7692c50bfd9c7c7
diff --git a/deps/libwhich.mk b/deps/libwhich.mk
index 5f4a50c4bf07e2..79017838193d2b 100644
--- a/deps/libwhich.mk
+++ b/deps/libwhich.mk
@@ -1,10 +1,10 @@
 ## LIBWHICH ##
-LIBWHICH_GIT_URL := git://github.com/vtjnash/libwhich.git
+LIBWHICH_GIT_URL := https://github.com/vtjnash/libwhich.git
 LIBWHICH_TAR_URL = https://api.github.com/repos/vtjnash/libwhich/tarball/$1
 $(eval $(call git-external,libwhich,LIBWHICH,,,$(BUILDDIR)))
 
 LIBWHICH_OBJ_LIB := $(build_depsbindir)/libwhich
-LIBWHICH_MFLAGS := CC="$(CC)"
+LIBWHICH_MFLAGS := CC="$(HOSTCC)"
 
 $(BUILDDIR)/$(LIBWHICH_SRC_DIR)/build-compiled: $(BUILDDIR)/$(LIBWHICH_SRC_DIR)/source-extracted
 	$(MAKE) -C $(dir $<) $(LIBWHICH_MFLAGS) libwhich
@@ -25,7 +25,7 @@ $(eval $(call staged-install, \
 	LIBWHICH_INSTALL,,,))
 
 clean-libwhich:
-	-rm $(BUILDDIR)/$(LIBWHICH_SRC_DIR)/build-compiled
+	-rm -f $(BUILDDIR)/$(LIBWHICH_SRC_DIR)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/$(LIBWHICH_SRC_DIR) clean
 
 get-libwhich: $(LIBWHICH_SRC_FILE)
diff --git a/deps/llvm-options.mk b/deps/llvm-options.mk
index 20e3114818b0e3..723a4bb8e5d39f 100644
--- a/deps/llvm-options.mk
+++ b/deps/llvm-options.mk
@@ -15,20 +15,11 @@ LLVM_FLAVOR := $(LLVM_BUILDTYPE)
 ifeq ($(LLVM_SANITIZE),1)
 ifeq ($(SANITIZE_MEMORY),1)
 LLVM_BUILDTYPE := $(LLVM_BUILDTYPE)+MSAN
-else
+endif
+ifeq ($(SANITIZE_ADDRESS),1)
 LLVM_BUILDTYPE := $(LLVM_BUILDTYPE)+ASAN
 endif
+ifeq ($(SANITIZE_THREAD),1)
+LLVM_BUILDTYPE := $(LLVM_BUILDTYPE)+TSAN
 endif
-
-
-ifeq ($(LLVM_VER),svn)
-LLVM_MONOSRC_DIR:=$(SRCCACHE)/llvm-project-$(LLVM_VER)
-LLVM_SRC_DIR:=$(LLVM_MONOSRC_DIR)/llvm
-LIBCXX_ROOT_DIR:=$(LLVM_MONOSRC_DIR)
-else
-LLVM_MONOSRC_DIR:=
-LLVM_SRC_DIR:=$(SRCCACHE)/llvm-$(LLVM_VER)
-LIBCXX_ROOT_DIR:=$(LLVM_SRC_DIR)/projects
 endif
-LLVM_BUILD_DIR:=$(BUILDDIR)/llvm-$(LLVM_VER)
-LLVM_BUILDDIR_withtype := $(LLVM_BUILD_DIR)/build_$(LLVM_BUILDTYPE)
diff --git a/deps/llvm.mk b/deps/llvm.mk
index 4a6cfe1c782c91..e0512137da9240 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -1,35 +1,44 @@
 ## LLVM ##
 include $(SRCDIR)/llvm-ver.make
+include $(SRCDIR)/llvm-options.mk
 
 ifneq ($(USE_BINARYBUILDER_LLVM), 1)
-LLVM_GIT_URL ?= https://github.com/llvm/llvm-project.git
+LLVM_GIT_URL:=https://github.com/JuliaLang/llvm-project.git
+LLVM_TAR_URL=https://api.github.com/repos/JuliaLang/llvm-project/tarball/$1
+$(eval $(call git-external,llvm,LLVM,CMakeLists.txt,,$(SRCCACHE)))
+
+LLVM_BUILDDIR := $(BUILDDIR)/$(LLVM_SRC_DIR)
+LLVM_BUILDDIR_withtype := $(LLVM_BUILDDIR)/build_$(LLVM_BUILDTYPE)
 
 ifeq ($(BUILD_LLDB), 1)
 BUILD_LLVM_CLANG := 1
 # because it's a build requirement
 endif
 
-ifeq ($(USE_POLLY),1)
-ifeq ($(USE_SYSTEM_LLVM),0)
-ifneq ($(LLVM_VER),svn)
-$(error USE_POLLY=1 requires LLVM_VER=svn)
-endif
-endif
+ifeq ($(BUILD_LIBCXX), 1)
+BUILD_LLVM_CLANG := 1
+# because it's a build requirement
 endif
 
-ifeq ($(USE_MLIR),1)
-ifeq ($(USE_SYSTEM_LLVM),0)
-ifneq ($(LLVM_VER),svn)
-$(error USE_MLIR=1 requires LLVM_VER=svn)
-endif
-endif
+ifeq ($(USE_RV),1)
+BUILD_LLVM_CLANG := 1
+# because it's a build requirement
 endif
 
+# TODO: Add RV support back in
+# ifneq ($(USE_RV),)
+# LLVM_RV_GIT_URL ?= https://github.com/cdl-saarland/rv
+# LLVM_RV_GIT_VER ?= release_90
+# endif
+
 
 # for Monorepo
 LLVM_ENABLE_PROJECTS :=
+LLVM_EXTERNAL_PROJECTS :=
+LLVM_ENABLE_RUNTIMES :=
 ifeq ($(BUILD_LLVM_CLANG), 1)
-LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);clang;compiler-rt
+LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);clang
+LLVM_ENABLE_RUNTIMES := $(LLVM_ENABLE_RUNTIMES);compiler-rt
 endif
 ifeq ($(USE_POLLY), 1)
 LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);polly
@@ -40,35 +49,19 @@ endif
 ifeq ($(USE_MLIR), 1)
 LLVM_ENABLE_PROJECTS := $(LLVM_ENABLE_PROJECTS);mlir
 endif
+ifeq ($(USE_RV), 1)
+LLVM_EXTERNAL_PROJECTS := $(LLVM_EXTERNAL_PROJECTS);rv
+endif
+ifeq ($(BUILD_LIBCXX), 1)
+LLVM_ENABLE_RUNTIMES := $(LLVM_ENABLE_RUNTIMES);libcxx;libcxxabi
+endif
 
-include $(SRCDIR)/llvm-options.mk
-LLVM_LIB_FILE := libLLVMCodeGen.a
-
-LLVM_TAR_EXT:=$(LLVM_VER).src.tar.xz
-
-ifneq ($(LLVM_VER),svn)
-LLVM_TAR:=$(SRCCACHE)/llvm-$(LLVM_TAR_EXT)
-
-ifeq ($(BUILD_LLDB),1)
-LLVM_LLDB_TAR:=$(SRCCACHE)/lldb-$(LLVM_TAR_EXT)
-endif # BUILD_LLDB
 
-ifeq ($(BUILD_LLVM_CLANG),1)
-LLVM_CLANG_TAR:=$(SRCCACHE)/cfe-$(LLVM_TAR_EXT)
-LLVM_COMPILER_RT_TAR:=$(SRCCACHE)/compiler-rt-$(LLVM_TAR_EXT)
-else
-LLVM_CLANG_TAR:=
-LLVM_COMPILER_RT_TAR:=
-LLVM_LIBCXX_TAR:=
-endif # BUILD_LLVM_CLANG
-
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-LLVM_LIBCXX_TAR:=$(SRCCACHE)/libcxx-$(LLVM_TAR_EXT)
-endif
-endif # LLVM_VER != svn
+LLVM_LIB_FILE := libLLVMCodeGen.a
 
 # Figure out which targets to build
-LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly
+LLVM_TARGETS := host;NVPTX;AMDGPU;WebAssembly;BPF
+LLVM_EXPERIMENTAL_TARGETS :=
 
 LLVM_CFLAGS :=
 LLVM_CXXFLAGS :=
@@ -76,9 +69,13 @@ LLVM_CPPFLAGS :=
 LLVM_LDFLAGS :=
 LLVM_CMAKE :=
 
-# MONOREPO
-ifeq ($(LLVM_VER),svn)
 LLVM_CMAKE += -DLLVM_ENABLE_PROJECTS="$(LLVM_ENABLE_PROJECTS)"
+LLVM_CMAKE += -DLLVM_EXTERNAL_PROJECTS="$(LLVM_EXTERNAL_PROJECTS)"
+LLVM_CMAKE += -DLLVM_ENABLE_RUNTIMES="$(LLVM_ENABLE_RUNTIMES)"
+
+ifeq ($(USE_RV),1)
+LLVM_CMAKE += -DLLVM_EXTERNAL_RV_SOURCE_DIR=$(LLVM_MONOSRC_DIR)/rv
+LLVM_CMAKE += -DLLVM_CXX_STD=c++14
 endif
 
 # Allow adding LLVM specific flags
@@ -87,7 +84,9 @@ LLVM_CXXFLAGS += $(CXXFLAGS)
 LLVM_CPPFLAGS += $(CPPFLAGS)
 LLVM_LDFLAGS += $(LDFLAGS)
 LLVM_CMAKE += -DLLVM_TARGETS_TO_BUILD:STRING="$(LLVM_TARGETS)" -DCMAKE_BUILD_TYPE="$(LLVM_CMAKE_BUILDTYPE)"
-LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=OFF -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD:STRING="$(LLVM_EXPERIMENTAL_TARGETS)"
+LLVM_CMAKE += -DLLVM_ENABLE_LIBXML2=OFF -DLLVM_HOST_TRIPLE="$(or $(XC_HOST),$(BUILD_MACHINE))"
+LLVM_CMAKE += -DLLVM_ENABLE_ZLIB=ON -DZLIB_LIBRARY="$(build_prefix)/lib"
 LLVM_CMAKE += -DCOMPILER_RT_ENABLE_IOS=OFF -DCOMPILER_RT_ENABLE_WATCHOS=OFF -DCOMPILER_RT_ENABLE_TVOS=OFF
 ifeq ($(USE_POLLY_ACC),1)
 LLVM_CMAKE += -DPOLLY_ENABLE_GPGPU_CODEGEN=ON
@@ -99,19 +98,14 @@ LLVM_CMAKE += -DLLVM_BINDINGS_LIST="" -DLLVM_INCLUDE_DOCS=Off -DLLVM_ENABLE_TERM
 ifeq ($(LLVM_ASSERTIONS), 1)
 LLVM_CMAKE += -DLLVM_ENABLE_ASSERTIONS:BOOL=ON
 endif # LLVM_ASSERTIONS
-ifeq ($(LLVM_DEBUG), 1)
-ifeq ($(OS), WINNT)
-LLVM_CXXFLAGS += -Wa,-mbig-obj
-endif # OS == WINNT
-endif # LLVM_DEBUG
 ifeq ($(OS), WINNT)
 LLVM_CPPFLAGS += -D__USING_SJLJ_EXCEPTIONS__ -D__CRT__NO_INLINE
-ifneq ($(BUILD_OS),WINNT)
-LLVM_CMAKE += -DCROSS_TOOLCHAIN_FLAGS_NATIVE=-DCMAKE_TOOLCHAIN_FILE=$(SRCDIR)/NATIVE.cmake
-endif # BUILD_OS != WINNT
 endif # OS == WINNT
+ifneq ($(HOSTCC),$(CC))
+LLVM_CMAKE += -DCROSS_TOOLCHAIN_FLAGS_NATIVE="-DCMAKE_C_COMPILER=$$(which $(HOSTCC));-DCMAKE_CXX_COMPILER=$$(which $(HOSTCXX))"
+endif
 ifeq ($(OS), emscripten)
-LLVM_CMAKE += -DCMAKE_TOOLCHAIN_FILE=$(EMSCRIPTEN)/cmake/Modules/Platform/Emscripten.cmake -DCROSS_TOOLCHAIN_FLAGS_NATIVE=-DCMAKE_TOOLCHAIN_FILE=$(SRCDIR)/NATIVE.cmake -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_ENABLE_THREADS=OFF -DLLVM_BUILD_UTILS=OFF
+LLVM_CMAKE += -DCMAKE_TOOLCHAIN_FILE=$(EMSCRIPTEN)/cmake/Modules/Platform/Emscripten.cmake -DLLVM_INCLUDE_TOOLS=OFF -DLLVM_BUILD_TOOLS=OFF -DLLVM_INCLUDE_TESTS=OFF -DLLVM_ENABLE_THREADS=OFF -DLLVM_BUILD_UTILS=OFF
 endif # OS == emscripten
 ifeq ($(USE_LLVM_SHLIB),1)
 # NOTE: we could also --disable-static here (on the condition we link tools
@@ -152,12 +146,19 @@ LLVM_CFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_LDFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_CXXFLAGS += -fsanitize=memory -fsanitize-memory-track-origins
 LLVM_CMAKE += -DLLVM_USE_SANITIZER="MemoryWithOrigins"
-else
+endif
+ifeq ($(SANITIZE_ADDRESS),1)
 LLVM_CFLAGS += -fsanitize=address
 LLVM_LDFLAGS += -fsanitize=address
 LLVM_CXXFLAGS += -fsanitize=address
 LLVM_CMAKE += -DLLVM_USE_SANITIZER="Address"
 endif
+ifeq ($(SANITIZE_THREAD),1)
+LLVM_CFLAGS += -fsanitize=thread
+LLVM_LDFLAGS += -fsanitize=thread
+LLVM_CXXFLAGS += -fsanitize=thread
+LLVM_CMAKE += -DLLVM_USE_SANITIZER="Thread"
+endif
 endif # LLVM_SANITIZE
 
 ifeq ($(LLVM_LTO),1)
@@ -165,27 +166,17 @@ LLVM_CPPFLAGS += -flto
 LLVM_LDFLAGS += -flto
 endif # LLVM_LTO
 
+ifeq ($(USE_LLVM_SHLIB),1)
+ifeq ($(USECLANG),0)
+# https://bugs.llvm.org/show_bug.cgi?id=48221
+LLVM_CXXFLAGS += -fno-gnu-unique
+endif
+endif
+
 ifeq ($(fPIC),)
 LLVM_CMAKE += -DLLVM_ENABLE_PIC=OFF
 endif
 
-# disable ABI breaking checks: by default only enabled for asserts build, in which case
-# it is then impossible to call non-asserts LLVM libraries (like out-of-tree backends)
-LLVM_CMAKE += -DLLVM_ABI_BREAKING_CHECKS=FORCE_OFF
-
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-LLVM_LDFLAGS += -Wl,-rpath,$(build_libdir)
-LLVM_CPPFLAGS += -I$(build_includedir)
-# We don't want to link to libc++ while trying to build it, so we define these
-# flags separately so that we can still pass them to the main LLVM build
-LLVM_LIBCXX_LDFLAGS := -lc++ -lc++abi
-ifeq ($(USEICC),1)
-LLVM_LDFLAGS += -no_cpprt
-endif # USEICC
-else
-LLVM_LIBCXX_LDFLAGS :=
-endif # BUILD_CUSTOM_LIBCXX
-
 LLVM_CMAKE += -DCMAKE_C_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CFLAGS)" \
 	-DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS)"
 ifeq ($(OS),Darwin)
@@ -203,364 +194,107 @@ ifeq ($(BUILD_LLDB),0)
 LLVM_CMAKE += -DLLVM_TOOL_LLDB_BUILD=OFF
 endif
 
-ifneq ($(LLVM_VER),svn)
-ifeq (,$(findstring rc,$(LLVM_VER)))
-LLVM_SRC_URL := https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVM_VER)
-else
-LLVM_VER_SPLIT := $(subst rc, ,$(LLVM_VER))
-LLVM_SRC_URL := https://prereleases.llvm.org/$(word 1,$(LLVM_VER_SPLIT))/rc$(word 2,$(LLVM_VER_SPLIT))
-endif
-
-ifneq ($(LLVM_CLANG_TAR),)
-$(LLVM_CLANG_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-$(LLVM_COMPILER_RT_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-
-ifneq ($(LLVM_LIBCXX_TAR),)
-$(LLVM_LIBCXX_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-ifneq ($(LLVM_VER),svn)
-$(LLVM_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-
-ifneq ($(LLVM_LLDB_TAR),)
-$(LLVM_LLDB_TAR): | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ $(LLVM_SRC_URL)/$(notdir $@)
-endif
-ifeq ($(BUILD_LLDB),1)
-$(LLVM_SRC_DIR)/tools/lldb:
-$(LLVM_SRC_DIR)/source-extracted: $(LLVM_SRC_DIR)/tools/lldb
-endif
-endif
-
-# LLDB still relies on plenty of python 2.x infrastructure, without checking
-llvm_python_location=$(shell /usr/bin/env python2 -c 'import sys; print(sys.executable)')
-llvm_python_workaround=$(SRCCACHE)/python2_path
-$(llvm_python_workaround):
-	mkdir -p $@
-	-python -c 'import sys; sys.exit(not sys.version_info > (3, 0))' && \
-	/usr/bin/env python2 -c 'import sys; sys.exit(not sys.version_info < (3, 0))' && \
-	ln -sf $(llvm_python_location) "$@/python" && \
-	ln -sf $(llvm_python_location)-config "$@/python-config"
-
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-
-# Take a snapshot of the CMake flags before linking to -lc++ and -lc++abi
-# These are added to the LLVM CMake flags further down
-LLVM_CMAKE_LIBCXX := $(LLVM_CMAKE) \
-	-DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
+LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS)" \
 	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS)"
 
-ifeq ($(USEICC),1)
-LIBCXX_EXTRA_FLAGS := -Bstatic -lirc -Bdynamic
-endif
-
-# These libraries require unwind.h from the libunwind dependency
-ifeq ($(USE_SYSTEM_LIBUNWIND),0)
-ifeq ($(OS),Darwin)
-BUILT_UNWIND := $(build_prefix)/manifest/osxunwind
-else
-BUILT_UNWIND := $(build_prefix)/manifest/unwind
-endif # Darwin
-else
-BUILT_UNWIND :=
-endif # Building libunwind
-
-$(LIBCXX_ROOT_DIR)/libcxx: $(LLVM_LIBCXX_TAR) | $(LLVM_SRC_DIR)/source-extracted
-$(LIBCXX_ROOT_DIR)/libcxxabi: $(LLVM_LIBCXXABI_TAR) | $(LLVM_SRC_DIR)/source-extracted
-$(LLVM_BUILD_DIR)/libcxx-build/Makefile: | $(LIBCXX_ROOT_DIR)/libcxx $(LIBCXX_ROOT_DIR)/libcxxabi $(BUILT_UNWIND)
-	mkdir -p $(dir $@)
-	cd $(dir $@) && \
-		$(CMAKE) -G "Unix Makefiles" $(CMAKE_COMMON) $(LLVM_CMAKE_LIBCXX) -DLIBCXX_CXX_ABI=libcxxabi -DLIBCXX_CXX_ABI_INCLUDE_PATHS="$(LIBCXX_ROOT_DIR)/libcxxabi/include" $(LIBCXX_ROOT_DIR)/libcxx -DCMAKE_SHARED_LINKER_FLAGS="$(LDFLAGS) -L$(build_libdir) $(LIBCXX_EXTRA_FLAGS)"
-$(LLVM_BUILD_DIR)/libcxxabi-build/Makefile: | $(LIBCXX_ROOT_DIR)/libcxxabi $(LIBCXX_ROOT_DIR)/libcxx $(BUILT_UNWIND)
-	mkdir -p $(dir $@)
-	cd $(dir $@) && \
-		$(CMAKE) -G "Unix Makefiles" $(CMAKE_COMMON) $(LLVM_CMAKE_LIBCXX) -DLLVM_ABI_BREAKING_CHECKS="WITH_ASSERTS" -DLLVM_PATH="$(LLVM_SRC_DIR)" $(LIBCXX_ROOT_DIR)/libcxxabi -DLIBCXXABI_CXX_ABI_LIBRARIES="$(LIBCXX_EXTRA_FLAGS)" -DCMAKE_CXX_FLAGS="$(LLVM_CPPFLAGS) $(LLVM_CXXFLAGS) -std=c++11"
-$(LLVM_BUILD_DIR)/libcxxabi-build/lib/libc++abi.so.1.0: $(LLVM_BUILD_DIR)/libcxxabi-build/Makefile $(LIBCXX_ROOT_DIR)/libcxxabi/.git/HEAD
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxxabi-build
-	touch -c $@
-$(build_libdir)/libc++abi.so.1.0: $(LLVM_BUILD_DIR)/libcxxabi-build/lib/libc++abi.so.1.0
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxxabi-build install
-	touch -c $@
-	# Building this library installs these headers, which breaks other dependencies
-	-rm -rf $(build_includedir)/c++
-$(LLVM_BUILD_DIR)/libcxx-build/lib/libc++.so.1.0: $(build_libdir)/libc++abi.so.1.0 $(LLVM_BUILD_DIR)/libcxx-build/Makefile $(LIBCXX_ROOT_DIR)/libcxx/.git/HEAD
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxx-build
-$(build_libdir)/libc++.so.1.0: $(LLVM_BUILD_DIR)/libcxx-build/lib/libc++.so.1.0
-	$(MAKE) -C $(LLVM_BUILD_DIR)/libcxx-build install
-	touch -c $@
-	# Building this library installs these headers, which breaks other dependencies
-	-rm -rf $(build_includedir)/c++
-get-libcxx: $(LIBCXX_ROOT_DIR)/libcxx
-get-libcxxabi: $(LIBCXX_ROOT_DIR)/libcxxabi
-install-libcxxabi: $(build_libdir)/libc++abi.so.1.0
-install-libcxx: $(build_libdir)/libc++.so.1.0
-endif # BUILD_CUSTOM_LIBCXX
-
-# We want to be able to clean without having to pass BUILD_CUSTOM_LIBCXX=1, so define these
-# outside of the conditional above, can't use `LIBCXX_ROOT_DIR` since that might come from
-# the monorepo.
-clean-libcxx:
-	-$(MAKE) -C $(LLVM_BUILD_DIR)/libcxx-build clean
-clean-libcxxabi:
-	-$(MAKE) -C $(LLVM_BUILD_DIR)/libcxxabi-build clean
-distclean-libcxx:
-	-rm -rf $(LLVM_LIBCXX_TAR) $(LLVM_SRC_DIR)/projects/libcxx $(LLVM_BUILD_DIR)/libcxx-build
-distclean-libcxxabi:
-	-rm -rf $(LLVM_LIBCXXABI_TAR) $(LLVM_SRC_DIR)/projects/libcxxabi $(LLVM_BUILD_DIR)/libcxxabi-build
-
-# We want to ensure that the libcxx linking flags don't get passed to the libcxx build, since it will
-# error on a fresh build
-LLVM_CMAKE += -DCMAKE_EXE_LINKER_FLAGS="$(LLVM_LDFLAGS) $(LLVM_LIBCXX_LDFLAGS)" \
-	-DCMAKE_SHARED_LINKER_FLAGS="$(LLVM_LDFLAGS) $(LLVM_LIBCXX_LDFLAGS)"
-
 # change the SONAME of Julia's private LLVM
 # i.e. libLLVM-6.0jl.so
 # see #32462
 LLVM_CMAKE += -DLLVM_VERSION_SUFFIX:STRING="jl"
-
-ifeq ($(BUILD_CUSTOM_LIBCXX),1)
-LIBCXX_DEPENDENCY := $(build_libdir)/libc++abi.so.1.0 $(build_libdir)/libc++.so.1.0
-get-llvm: get-libcxx get-libcxxabi
-endif
-
-$(LLVM_SRC_DIR)/source-extracted: | $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
-ifneq ($(LLVM_CLANG_TAR),)
-	$(JLCHECKSUM) $(LLVM_CLANG_TAR)
-endif
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-	$(JLCHECKSUM) $(LLVM_COMPILER_RT_TAR)
-endif
-ifneq ($(LLVM_LIBCXX_TAR),)
-	$(JLCHECKSUM) $(LLVM_LIBCXX_TAR)
-endif
-ifneq ($(LLVM_VER),svn)
-	$(JLCHECKSUM) $(LLVM_TAR)
-endif
-ifneq ($(LLVM_LLDB_TAR),)
-	$(JLCHECKSUM) $(LLVM_LLDB_TAR)
-endif
-	-rm -rf $(LLVM_SRC_DIR)
-ifneq ($(LLVM_VER),svn)
-	mkdir -p $(LLVM_SRC_DIR)
-	$(TAR) -C $(LLVM_SRC_DIR) --strip-components 1 -xf $(LLVM_TAR)
-else
-	([ ! -d $(LLVM_MONOSRC_DIR) ] && \
-		git clone $(LLVM_GIT_URL) $(LLVM_MONOSRC_DIR) ) || \
-		(cd $(LLVM_MONOSRC_DIR) && \
-		git pull --ff-only)
-ifneq ($(LLVM_GIT_VER),)
-	(cd $(LLVM_MONOSRC_DIR) && \
-		git checkout $(LLVM_GIT_VER))
-endif # LLVM_GIT_VER
-	# Debug output only. Disable pager and ignore error.
-	(cd $(LLVM_SRC_DIR) && \
-		git show HEAD --stat | cat) || true
-endif # LLVM_VER
-ifneq ($(LLVM_VER),svn)
-ifneq ($(LLVM_CLANG_TAR),)
-	mkdir -p $(LLVM_SRC_DIR)/tools/clang
-	$(TAR) -C $(LLVM_SRC_DIR)/tools/clang --strip-components 1 -xf $(LLVM_CLANG_TAR)
-endif # LLVM_CLANG_TAR
-ifneq ($(LLVM_COMPILER_RT_TAR),)
-	mkdir -p $(LLVM_SRC_DIR)/projects/compiler-rt
-	$(TAR) -C $(LLVM_SRC_DIR)/projects/compiler-rt --strip-components 1 -xf $(LLVM_COMPILER_RT_TAR)
-endif # LLVM_COMPILER_RT_TAR
-ifneq ($(LLVM_LLDB_TAR),)
-	mkdir -p $(LLVM_SRC_DIR)/tools/lldb
-	$(TAR) -C $(LLVM_SRC_DIR)/tools/lldb --strip-components 1 -xf $(LLVM_LLDB_TAR)
-endif # LLVM_LLDB_TAR
-endif # LLVM_VER
-	# touch some extra files to ensure bisect works pretty well
-	touch -c $(LLVM_SRC_DIR).extracted
-	touch -c $(LLVM_SRC_DIR)/configure
-	touch -c $(LLVM_SRC_DIR)/CMakeLists.txt
-	echo 1 > $@
+LLVM_CMAKE += -DLLVM_SHLIB_SYMBOL_VERSION:STRING="JL_LLVM_$(LLVM_VER_SHORT)"
 
 # Apply version-specific LLVM patches sequentially
 LLVM_PATCH_PREV :=
 define LLVM_PATCH
-$$(LLVM_SRC_DIR)/$1.patch-applied: $$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
-	cd $$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
+$$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm && patch -p1 < $$(SRCDIR)/patches/$1.patch
 	echo 1 > $$@
 # declare that applying any patch must re-run the compile step
-$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(LLVM_SRC_DIR)/$1.patch-applied
-LLVM_PATCH_PREV := $$(LLVM_SRC_DIR)/$1.patch-applied
+$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
+LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
 endef
 
-ifeq ($(LLVM_VER_SHORT),8.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm7-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-8.0-D50167-scev-umin))
-$(eval $(call LLVM_PATCH,llvm7-windows-race))
-$(eval $(call LLVM_PATCH,llvm-D57118-powerpc)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-exegesis-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-8.0-D66401-mingw-reloc)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-$(eval $(call LLVM_PATCH,llvm-8.0-D63688-wasm-isLocal)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D55758-tablegen-cond)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D59389-refactor-wmma)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D59393-mma-ptx63-fix)) # remove for 9.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D66657-codegen-degenerate)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D71495-vectorize-freduce)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-8.0-D65174-limit-merge-stores)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-endif # LLVM_VER 8.0
-
-ifeq ($(LLVM_VER_SHORT),9.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm9-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-exegesis-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-$(eval $(call LLVM_PATCH,llvm-8.0-D66657-codegen-degenerate)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-8.0-D71495-vectorize-freduce)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-9.0-D65174-limit-merge-stores)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm9-D71443-PPC-MC-redef-symbol)) # remove for 10.0
-$(eval $(call LLVM_PATCH,llvm-9.0-D78196)) # remove for 11.0
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-$(eval $(call LLVM_PATCH,llvm-9.0-D85499)) # landed as D85553
-$(eval $(call LLVM_PATCH,llvm-D80101)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-D84031)) # remove for LLVM 12
-endif # LLVM_VER 9.0
-
-ifeq ($(LLVM_VER_SHORT),10.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm9-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-exegesis-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm-test-plugin-mingw)) # mingw build
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-$(eval $(call LLVM_PATCH,llvm-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-10.0-PPC_SELECT_CC)) # delete for LLVM 11
-$(eval $(call LLVM_PATCH,llvm-10.0-PPC-LI-Elimination)) # delete for LLVM 11
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-$(eval $(call LLVM_PATCH,llvm-D80101)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-D84031)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-10-D85553)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-10-r_aarch64_prel32)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-10-r_ppc_rel)) # remove for LLVM 12
-endif # LLVM_VER 10.0
-
-ifeq ($(LLVM_VER_SHORT),11.0)
-$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
-$(eval $(call LLVM_PATCH,llvm8-D34078-vectorize-fdiv))
-$(eval $(call LLVM_PATCH,llvm-7.0-D44650)) # mingw32 build fix
-$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm9-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm7-revert-D44485))
-#$(eval $(call LLVM_PATCH,llvm-D75072-SCEV-add-type))
-$(eval $(call LLVM_PATCH,llvm-julia-tsan-custom-as))
-$(eval $(call LLVM_PATCH,llvm-D80101)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-D84031)) # remove for LLVM 12
-$(eval $(call LLVM_PATCH,llvm-10-D85553)) # remove for LLVM 12
-endif # LLVM_VER 11.0
-
-
-# Add a JL prefix to the version map. DO NOT REMOVE
-ifneq ($(LLVM_VER), svn)
-$(eval $(call LLVM_PATCH,llvm7-symver-jlprefix))
+define LLVM_PROJ_PATCH
+$$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/source-extracted | $$(SRCDIR)/patches/$1.patch $$(LLVM_PATCH_PREV)
+	cd $$(SRCCACHE)/$$(LLVM_SRC_DIR) && patch -p1 < $$(SRCDIR)/patches/$1.patch
+	echo 1 > $$@
+# declare that applying any patch must re-run the compile step
+$$(LLVM_BUILDDIR_withtype)/build-compiled: $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
+LLVM_PATCH_PREV := $$(SRCCACHE)/$$(LLVM_SRC_DIR)/$1.patch-applied
+endef
+
+ifeq ($(USE_SYSTEM_ZLIB), 0)
+$(LLVM_BUILDDIR_withtype)/build-configured: | $(build_prefix)/manifest/zlib
 endif
 
+# NOTE: LLVM 12 and 13 have their patches applied to JuliaLang/llvm-project
+
 # declare that all patches must be applied before running ./configure
 $(LLVM_BUILDDIR_withtype)/build-configured: | $(LLVM_PATCH_PREV)
 
-$(LLVM_BUILDDIR_withtype)/build-configured: $(LLVM_SRC_DIR)/source-extracted | $(llvm_python_workaround) $(LIBCXX_DEPENDENCY)
+$(LLVM_BUILDDIR_withtype)/build-configured: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-		export PATH=$(llvm_python_workaround):"$$PATH" && \
-		$(CMAKE) $(LLVM_SRC_DIR) $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
+		$(CMAKE) $(SRCCACHE)/$(LLVM_SRC_DIR)/llvm $(CMAKE_GENERATOR_COMMAND) $(CMAKE_COMMON) $(LLVM_CMAKE) \
 		|| { echo '*** To install a newer version of cmake, run contrib/download_cmake.sh ***' && false; }
 	echo 1 > $@
 
-$(LLVM_BUILDDIR_withtype)/build-compiled: $(LLVM_BUILDDIR_withtype)/build-configured | $(llvm_python_workaround)
+$(LLVM_BUILDDIR_withtype)/build-compiled: $(LLVM_BUILDDIR_withtype)/build-configured
 	cd $(LLVM_BUILDDIR_withtype) && \
-		export PATH=$(llvm_python_workaround):"$$PATH" && \
 		$(if $(filter $(CMAKE_GENERATOR),make), \
 		  $(MAKE), \
 		  $(CMAKE) --build .)
 	echo 1 > $@
 
-$(LLVM_BUILDDIR_withtype)/build-checked: $(LLVM_BUILDDIR_withtype)/build-compiled | $(llvm_python_workaround)
+$(LLVM_BUILDDIR_withtype)/build-checked: $(LLVM_BUILDDIR_withtype)/build-compiled
 ifeq ($(OS),$(BUILD_OS))
 	cd $(LLVM_BUILDDIR_withtype) && \
-		export PATH=$(llvm_python_workaround):"$$PATH" && \
 		  $(CMAKE) --build . --target check
 endif
 	echo 1 > $@
 
-$(build_prefix)/manifest/llvm: | $(llvm_python_workaround)
-
 LLVM_INSTALL = \
 	cd $1 && mkdir -p $2$$(build_depsbindir) && \
-    cp -r $$(LLVM_SRC_DIR)/utils/lit $2$$(build_depsbindir)/ && \
+    cp -r $$(SRCCACHE)/$$(LLVM_SRC_DIR)/llvm/utils/lit $2$$(build_depsbindir)/ && \
     $$(CMAKE) -DCMAKE_INSTALL_PREFIX="$2$$(build_prefix)" -P cmake_install.cmake
 ifeq ($(OS), WINNT)
-LLVM_INSTALL += && cp $2$$(build_shlibdir)/LLVM.dll $2$$(build_depsbindir)
+LLVM_INSTALL += && cp $2$$(build_shlibdir)/libLLVM.dll $2$$(build_depsbindir)
 endif
 ifeq ($(OS),Darwin)
 # https://github.com/JuliaLang/julia/issues/29981
 LLVM_INSTALL += && ln -s libLLVM.dylib $2$$(build_shlibdir)/libLLVM-$$(LLVM_VER_SHORT).dylib
 endif
 
-$(eval $(call staged-install,llvm,llvm-$$(LLVM_VER)/build_$$(LLVM_BUILDTYPE), \
+$(eval $(call staged-install, \
+	llvm,$$(LLVM_SRC_DIR)/build_$$(LLVM_BUILDTYPE), \
 	LLVM_INSTALL,,,))
 
-clean-llvm: clean-libcxx clean-libcxxabi
-	-rm $(LLVM_BUILDDIR_withtype)/build-configured $(LLVM_BUILDDIR_withtype)/build-compiled
+clean-llvm:
+	-rm -f $(LLVM_BUILDDIR_withtype)/build-configured $(LLVM_BUILDDIR_withtype)/build-compiled
 	-$(MAKE) -C $(LLVM_BUILDDIR_withtype) clean
 
-distclean-llvm: distclean-libcxx distclean-libcxxabi
-	-rm -rf $(LLVM_TAR) $(LLVM_CLANG_TAR) \
-		$(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR) \
-		$(LLVM_SRC_DIR) $(LLVM_BUILDDIR_withtype)
-
-
-ifneq ($(LLVM_VER),svn)
-get-llvm: $(LLVM_TAR) $(LLVM_CLANG_TAR) $(LLVM_COMPILER_RT_TAR) $(LLVM_LIBCXX_TAR) $(LLVM_LLDB_TAR)
-else
-get-llvm: $(LLVM_SRC_DIR)/source-extracted
-endif
-extract-llvm: $(LLVM_SRC_DIR)/source-extracted
+get-llvm: $(LLVM_SRC_FILE)
+extract-llvm: $(SRCCACHE)/$(LLVM_SRC_DIR)/source-extracted
 configure-llvm: $(LLVM_BUILDDIR_withtype)/build-configured
 compile-llvm: $(LLVM_BUILDDIR_withtype)/build-compiled
 fastcheck-llvm: #none
 check-llvm: $(LLVM_BUILDDIR_withtype)/build-checked
 #todo: LLVM make check target is broken on julia.mit.edu (and really slow elsewhere)
 
-
-ifeq ($(LLVM_VER),svn)
-update-llvm:
-	cd $(LLVM_MONOSRC_DIR) && \
-		git pull --ff-only
-endif
 else # USE_BINARYBUILDER_LLVM
-ifneq ($(BINARYBUILDER_LLVM_ASSERTS), 1)
-LLVM_BB_REPO_NAME := LLVM_full
-else
-LLVM_BB_REPO_NAME := LLVM_full_assert
-LLVM_BB_NAME := LLVM.asserts.v$(LLVM_VER)
+
+# We provide a way to subversively swap out which LLVM JLL we pull artifacts from
+ifeq ($(LLVM_ASSERTIONS), 1)
+LLVM_JLL_DOWNLOAD_NAME := libLLVM_assert
+LLVM_JLL_VER := $(LLVM_ASSERT_JLL_VER)
+LLVM_TOOLS_JLL_DOWNLOAD_NAME := LLVM_assert
+LLVM_TOOLS_JLL_VER := $(LLVM_TOOLS_ASSERT_JLL_VER)
 endif
-LLVM_BB_NAME := $(LLVM_BB_REPO_NAME).v$(LLVM_VER)
-LLVM_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/$(LLVM_BB_REPO_NAME)_jll.jl/releases/download/$(LLVM_BB_REPO_NAME)-v$(LLVM_VER)+$(LLVM_BB_REL)
 
 $(eval $(call bb-install,llvm,LLVM,false,true))
+$(eval $(call bb-install,clang,CLANG,false,true))
+$(eval $(call bb-install,llvm-tools,LLVM_TOOLS,false,true))
+
+install-clang install-llvm-tools: install-llvm
 
 endif # USE_BINARYBUILDER_LLVM
diff --git a/deps/llvm.version b/deps/llvm.version
new file mode 100644
index 00000000000000..ed9cfbcfc7a258
--- /dev/null
+++ b/deps/llvm.version
@@ -0,0 +1,2 @@
+LLVM_BRANCH=julia-13.0.1-0
+LLVM_SHA1=julia-13.0.1-0
diff --git a/deps/mbedtls.mk b/deps/mbedtls.mk
index 8cdd2bb2a5790e..5ddfd08327bb40 100644
--- a/deps/mbedtls.mk
+++ b/deps/mbedtls.mk
@@ -23,9 +23,14 @@ $(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
 	$(JLCHECKSUM) $<
 	mkdir -p $(dir $@) && \
 	$(TAR) -C $(dir $@) --strip-components 1 -xf $<
+	# Force-enable MD4
+	sed -i.org "s|//#define MBEDTLS_MD4_C|#define MBEDTLS_MD4_C|" $(SRCCACHE)/$(MBEDTLS_SRC)/include/mbedtls/config.h
 	touch -c $(SRCCACHE)/$(MBEDTLS_SRC)/CMakeLists.txt # old target
 	echo 1 > $@
 
+checksum-mbedtls: $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz
+	$(JLCHECKSUM) $<
+
 $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured: $(SRCCACHE)/$(MBEDTLS_SRC)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
@@ -59,19 +64,19 @@ $(eval $(call staged-install, \
 	MBEDTLS_INSTALL,,, \
 	$$(INSTALL_NAME_CMD)libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
 	$$(INSTALL_NAME_CMD)libmbedtls.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedx509.0.dylib @rpath/libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.3.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
-	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.3.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
+	$$(INSTALL_NAME_CHANGE_CMD) libmbedx509.1.dylib @rpath/libmbedx509.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
+	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.7.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedtls.$$(SHLIB_EXT) && \
+	$$(INSTALL_NAME_CHANGE_CMD) libmbedcrypto.7.dylib @rpath/libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedx509.$$(SHLIB_EXT) && \
 	$$(INSTALL_NAME_CMD)libmbedcrypto.$$(SHLIB_EXT) $$(build_shlibdir)/libmbedcrypto.$$(SHLIB_EXT)))
 
 
 clean-mbedtls:
-	-rm $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured \
+	-rm -f $(BUILDDIR)/$(MBEDTLS_SRC)/build-configured \
 		$(BUILDDIR)/$(MBEDTLS_SRC)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/$(MBEDTLS_SRC) clean
 
 distclean-mbedtls:
-	-rm -rf $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz \
+	rm -rf $(SRCCACHE)/$(MBEDTLS_SRC).tar.gz \
 		$(SRCCACHE)/$(MBEDTLS_SRC) \
 		$(BUILDDIR)/$(MBEDTLS_SRC)
 
@@ -86,9 +91,6 @@ check-mbedtls: $(BUILDDIR)/$(MBEDTLS_SRC)/build-checked
 
 else # USE_BINARYBUILDER_MBEDTLS
 
-MBEDTLS_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/MbedTLS_jll.jl/releases/download/MbedTLS-v$(MBEDTLS_VER)+$(MBEDTLS_BB_REL)
-MBEDTLS_BB_NAME := MbedTLS.v$(MBEDTLS_VER)
-
 $(eval $(call bb-install,mbedtls,MBEDTLS,false))
 
 endif
diff --git a/deps/mpfr.mk b/deps/mpfr.mk
index f05dc60cb0e292..4598a319df6d50 100644
--- a/deps/mpfr.mk
+++ b/deps/mpfr.mk
@@ -27,7 +27,7 @@ MPFR_OPTS += --host=none-unknown-linux
 endif
 
 $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ http://www.mpfr.org/mpfr-$(MPFR_VER)/$(notdir $@)
+	$(JLDOWNLOAD) $@ https://www.mpfr.org/mpfr-$(MPFR_VER)/$(notdir $@)
 $(SRCCACHE)/mpfr-$(MPFR_VER)/source-extracted: $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) -jxf $<
@@ -35,6 +35,9 @@ $(SRCCACHE)/mpfr-$(MPFR_VER)/source-extracted: $(SRCCACHE)/mpfr-$(MPFR_VER).tar.
 	touch -c $(SRCCACHE)/mpfr-$(MPFR_VER)/configure # old target
 	echo 1 > $@
 
+checksum-mpfr: $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2
+	$(JLCHECKSUM) $<
+
 $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured: $(SRCCACHE)/mpfr-$(MPFR_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
@@ -57,11 +60,11 @@ $(eval $(call staged-install, \
 	$$(INSTALL_NAME_CMD)libmpfr.$$(SHLIB_EXT) $$(build_shlibdir)/libmpfr.$$(SHLIB_EXT)))
 
 clean-mpfr:
-	-rm $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured $(BUILDDIR)/mpfr-$(MPFR_VER)/build-compiled
+	-rm -f $(BUILDDIR)/mpfr-$(MPFR_VER)/build-configured $(BUILDDIR)/mpfr-$(MPFR_VER)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/mpfr-$(MPFR_VER) clean
 
 distclean-mpfr:
-	-rm -rf $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2 \
+	rm -rf $(SRCCACHE)/mpfr-$(MPFR_VER).tar.bz2 \
 		$(SRCCACHE)/mpfr-$(MPFR_VER) \
 		$(BUILDDIR)/mpfr-$(MPFR_VER)
 
@@ -74,8 +77,6 @@ check-mpfr: $(BUILDDIR)/mpfr-$(MPFR_VER)/build-checked
 
 else # USE_BINARYBUILDER_MPFR
 
-MPFR_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/MPFR_jll.jl/releases/download/MPFR-v$(MPFR_VER)+$(MPFR_BB_REL)
-MPFR_BB_NAME := MPFR.v$(MPFR_VER)
-
 $(eval $(call bb-install,mpfr,MPFR,false))
+
 endif
diff --git a/deps/nghttp2.mk b/deps/nghttp2.mk
index a3c96789ce9c94..54fd6a241eaba2 100644
--- a/deps/nghttp2.mk
+++ b/deps/nghttp2.mk
@@ -1,14 +1,58 @@
 ## nghttp2
 
 ifneq ($(USE_BINARYBUILDER_NGHTTP2), 1)
-$(error "Non-BinaryBuilder nghttp2 is not supported.")
-endif
 
-ifneq ($(USE_SYSTEM_NGHTTP2), 0)
-$(error "Non-BinaryBuilder nghttp2 is not supported.")
+$(SRCCACHE)/nghttp2-$(NGHTTP2_VER).tar.bz2: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/nghttp2/nghttp2/releases/download/v$(NGHTTP2_VER)/$(notdir $@)
+
+$(SRCCACHE)/nghttp2-$(NGHTTP2_VER)/source-extracted: $(SRCCACHE)/nghttp2-$(NGHTTP2_VER).tar.bz2
+	$(JLCHECKSUM) $<
+	cd $(dir $<) && $(TAR) -jxf $<
+	touch -c $(SRCCACHE)/nghttp2-$(NGHTTP2_VER)/configure # old target
+	echo 1 > $@
+
+checksum-nghttp2: $(SRCCACHE)/nghttp2-$(NGHTTP2_VER).tar.bz2
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-configured: $(SRCCACHE)/nghttp2-$(NGHTTP2_VER)/source-extracted
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+	$(dir $<)/configure $(CONFIGURE_COMMON) --enable-lib-only
+	echo 1 > $@
+
+$(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-compiled: $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-configured
+	$(MAKE) -C $(dir $<)
+	echo 1 > $@
+
+$(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-checked: $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-compiled
+ifeq ($(OS),$(BUILD_OS))
+	$(MAKE) -C $(dir $@) check $(NGHTTP2_CHECK_MFLAGS)
 endif
+	echo 1 > $@
 
-NGHTTP2_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/nghttp2_jll.jl/releases/download/nghttp2-v$(NGHTTP2_VER)+$(NGHTTP2_BB_REL)
-NGHTTP2_BB_NAME := nghttp2.v$(NGHTTP2_VER)
+$(eval $(call staged-install, \
+	nghttp2,nghttp2-$(NGHTTP2_VER), \
+	MAKE_INSTALL,,, \
+	$$(INSTALL_NAME_CMD)libnghttp2.$$(SHLIB_EXT) $$(build_shlibdir)/libnghttp2.$$(SHLIB_EXT)))
+
+clean-nghttp2:
+	-rm -f $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-configured $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/nghttp2-$(NGHTTP2_VER) clean
+
+distclean-nghttp2:
+	rm -rf $(SRCCACHE)/nghttp2-$(NGHTTP2_VER).tar.bz2 \
+		$(SRCCACHE)/nghttp2-$(NGHTTP2_VER) \
+		$(BUILDDIR)/nghttp2-$(NGHTTP2_VER)
+
+get-nghttp2: $(SRCCACHE)/nghttp2-$(NGHTTP2_VER).tar.bz2
+extract-nghttp2: $(SRCCACHE)/nghttp2-$(NGHTTP2_VER)/source-extracted
+configure-nghttp2: $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-configured
+compile-nghttp2: $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-compiled
+fastcheck-nghttp2: check-nghttp2
+check-nghttp2: $(BUILDDIR)/nghttp2-$(NGHTTP2_VER)/build-checked
+
+else
 
 $(eval $(call bb-install,nghttp2,NGHTTP2,false))
+
+endif
diff --git a/deps/objconv.mk b/deps/objconv.mk
index 355cf39cc16941..8423e476d37c6b 100644
--- a/deps/objconv.mk
+++ b/deps/objconv.mk
@@ -3,10 +3,10 @@
 ifneq ($(USE_BINARYBUILDER_OBJCONV),1)
 
 $(SRCCACHE)/objconv.zip: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ http://www.agner.org/optimize/objconv.zip
+	$(JLDOWNLOAD) $@ https://www.agner.org/optimize/objconv.zip
 
 $(BUILDDIR)/objconv/source-extracted: $(SRCCACHE)/objconv.zip
-	-rm -r $(dir $@)
+	rm -rf $(dir $@)
 	mkdir -p $(BUILDDIR)
 	unzip -d $(dir $@) $<
 	cd $(dir $@) && unzip source.zip
@@ -21,10 +21,10 @@ $(eval $(call staged-install, \
 	BINFILE_INSTALL,$$(BUILDDIR)/objconv/objconv,,))
 
 clean-objconv:
-	-rm $(BUILDDIR)/objconv/build-compiled $(build_depsbindir)/objconv
+	-rm -f $(BUILDDIR)/objconv/build-compiled $(build_depsbindir)/objconv
 
 distclean-objconv:
-	-rm -rf $(SRCCACHE)/objconv.zip $(BUILDDIR)/objconv
+	rm -rf $(SRCCACHE)/objconv.zip $(BUILDDIR)/objconv
 
 
 get-objconv: $(SRCCACHE)/objconv.zip
@@ -36,8 +36,6 @@ check-objconv: compile-objconv
 
 else
 
-OBJCONV_BB_URL_BASE := https://github.com/JuliaPackaging/Yggdrasil/releases/download/Objconv-v$(OBJCONV_VER)-$(OBJCONV_BB_REL)
-OBJCONV_BB_NAME := Objconv.v$(OBJCONV_VER)
 $(eval $(call bb-install,objconv,OBJCONV,false))
 
 endif
diff --git a/deps/openblas.mk b/deps/openblas.mk
new file mode 100644
index 00000000000000..a025580bcc9237
--- /dev/null
+++ b/deps/openblas.mk
@@ -0,0 +1,215 @@
+## OpenBLAS ##
+ifneq ($(USE_BINARYBUILDER_OPENBLAS), 1)
+# LAPACK is built into OpenBLAS by default
+OPENBLAS_GIT_URL := https://github.com/xianyi/OpenBLAS.git
+OPENBLAS_TAR_URL = https://api.github.com/repos/xianyi/OpenBLAS/tarball/$1
+$(eval $(call git-external,openblas,OPENBLAS,,,$(BUILDDIR)))
+
+OPENBLAS_BUILD_OPTS := CC="$(CC)" FC="$(FC)" LD="$(LD)" RANLIB="$(RANLIB)" TARGET=$(OPENBLAS_TARGET_ARCH) BINARY=$(BINARY)
+
+# Thread support
+ifeq ($(OPENBLAS_USE_THREAD), 1)
+OPENBLAS_BUILD_OPTS += USE_THREAD=1
+OPENBLAS_BUILD_OPTS += GEMM_MULTITHREADING_THRESHOLD=50
+# Maximum number of threads for parallelism
+OPENBLAS_BUILD_OPTS += NUM_THREADS=512
+else
+OPENBLAS_BUILD_OPTS += USE_THREAD=0
+endif
+
+# don't touch scheduler affinity since we manage this ourselves
+OPENBLAS_BUILD_OPTS += NO_AFFINITY=1
+
+# Build for all architectures - required for distribution
+ifeq ($(OPENBLAS_DYNAMIC_ARCH), 1)
+OPENBLAS_BUILD_OPTS += DYNAMIC_ARCH=1
+endif
+
+# 64-bit BLAS interface
+ifeq ($(USE_BLAS64), 1)
+OPENBLAS_BUILD_OPTS += INTERFACE64=1 SYMBOLSUFFIX="$(OPENBLAS_SYMBOLSUFFIX)" LIBPREFIX="libopenblas$(OPENBLAS_LIBNAMESUFFIX)"
+ifeq ($(OS), Darwin)
+OPENBLAS_BUILD_OPTS += OBJCONV=$(abspath $(build_bindir)/objconv)
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: | $(build_prefix)/manifest/objconv
+endif
+endif
+
+OPENBLAS_FFLAGS := $(JFFLAGS) $(USE_BLAS_FFLAGS)
+OPENBLAS_CFLAGS := -O2
+
+# Decide whether to build for 32-bit or 64-bit arch
+ifneq ($(BUILD_OS),$(OS))
+OPENBLAS_BUILD_OPTS += OSNAME=$(OS) CROSS=1 HOSTCC=$(HOSTCC) CROSS_SUFFIX=$(CROSS_COMPILE)
+endif
+ifeq ($(OS),WINNT)
+ifneq ($(ARCH),x86_64)
+ifneq ($(USECLANG),1)
+OPENBLAS_CFLAGS += -mincoming-stack-boundary=2
+endif
+OPENBLAS_FFLAGS += -mincoming-stack-boundary=2
+endif
+endif
+
+# Work around invalid register errors on 64-bit Windows
+# See discussion in https://github.com/xianyi/OpenBLAS/issues/1708
+# TODO: Remove this once we use a version of OpenBLAS where this is set automatically
+ifeq ($(OS),WINNT)
+ifeq ($(ARCH),x86_64)
+OPENBLAS_CFLAGS += -fno-asynchronous-unwind-tables
+endif
+endif
+
+OPENBLAS_BUILD_OPTS += CFLAGS="$(CFLAGS) $(OPENBLAS_CFLAGS)"
+OPENBLAS_BUILD_OPTS += FFLAGS="$(FFLAGS) $(OPENBLAS_FFLAGS)"
+OPENBLAS_BUILD_OPTS += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN)"
+
+# Debug OpenBLAS
+ifeq ($(OPENBLAS_DEBUG), 1)
+OPENBLAS_BUILD_OPTS += DEBUG=1
+endif
+
+# Allow disabling AVX for older binutils
+ifeq ($(OPENBLAS_NO_AVX), 1)
+OPENBLAS_BUILD_OPTS += NO_AVX=1 NO_AVX2=1 NO_AVX512=1
+else ifeq ($(OPENBLAS_NO_AVX2), 1)
+OPENBLAS_BUILD_OPTS += NO_AVX2=1 NO_AVX512=1
+else ifeq ($(OPENBLAS_NO_AVX512), 1)
+OPENBLAS_BUILD_OPTS += NO_AVX512=1
+endif
+
+# Do not overwrite the "-j" flag
+OPENBLAS_BUILD_OPTS += MAKE_NB_JOBS=0
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/source-extracted
+	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/openblas-winexit.patch
+	echo 1 > $@
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-winexit.patch-applied
+	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/openblas-ofast-power.patch
+	echo 1 > $@
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/openblas-ofast-power.patch-applied
+	cd $(BUILDDIR)/$(OPENBLAS_SRC_DIR) && \
+		patch -p1 -f < $(SRCDIR)/patches/neoverse-generic-kernels.patch
+	echo 1 > $@
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/neoverse-generic-kernels.patch-applied
+	echo 1 > $@
+
+$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
+	echo $(MAKE) -C $(dir $<) $(OPENBLAS_BUILD_OPTS) # echo first, so we only print the error message below in a failure case
+	@$(MAKE) -C $(dir $<) $(OPENBLAS_BUILD_OPTS) || (echo $(WARNCOLOR)"*** Clean the OpenBLAS build with 'make -C deps clean-openblas'. Rebuild with 'make OPENBLAS_USE_THREAD=0' if OpenBLAS had trouble linking libpthread.so, and with 'make OPENBLAS_TARGET_ARCH=NEHALEM' if there were errors building SandyBridge support. Both these options can also be used simultaneously. ***"$(ENDCOLOR) && false)
+	echo 1 > $@
+
+define OPENBLAS_INSTALL
+	$(call SHLIBFILE_INSTALL,$1,$2,$3)
+ifeq ($$(OS), Linux)
+	ln -sf libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT) $2/$$(build_libdir)/libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT).0
+endif
+endef
+$(eval $(call staged-install, \
+	openblas,$(OPENBLAS_SRC_DIR), \
+	OPENBLAS_INSTALL,$(BUILDDIR)/$(OPENBLAS_SRC_DIR)/$(LIBBLASNAME).$(SHLIB_EXT),, \
+	$$(INSTALL_NAME_CMD)libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT) $$(build_shlibdir)/libopenblas$$(OPENBLAS_LIBNAMESUFFIX).$$(SHLIB_EXT)))
+
+clean-openblas:
+	-rm -f $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/$(OPENBLAS_SRC_DIR) clean
+
+
+get-openblas: $(OPENBLAS_SRC_FILE)
+extract-openblas: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/source-extracted
+configure-openblas: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-configured
+compile-openblas: $(BUILDDIR)/$(OPENBLAS_SRC_DIR)/build-compiled
+fastcheck-openblas: check-openblas
+check-openblas: compile-openblas
+
+
+## Mac gfortran BLAS wrapper ##
+ifeq ($(OS),Darwin)
+$(BUILDDIR)/libgfortblas.$(SHLIB_EXT): $(SRCDIR)/gfortblas.c $(SRCDIR)/gfortblas.alias
+	$(CC) -Wall -O3 $(CPPFLAGS) $(CFLAGS) $(fPIC) -shared $< -o $@ -pipe \
+				-Wl,-reexport_framework,Accelerate -Wl,-alias_list,$(SRCDIR)/gfortblas.alias
+
+$(build_shlibdir)/libgfortblas.$(SHLIB_EXT): $(BUILDDIR)/libgfortblas.$(SHLIB_EXT)
+	cp -f $< $@
+	$(INSTALL_NAME_CMD)libgfortblas.$(SHLIB_EXT) $@
+endif
+
+
+## LAPACK ##
+
+LAPACK_MFLAGS := NOOPT="$(FFLAGS) $(JFFLAGS) $(USE_BLAS_FFLAGS) -O0" \
+    OPTS="$(FFLAGS) $(JFFLAGS) $(USE_BLAS_FFLAGS)" FORTRAN="$(FC)" \
+    LOADER="$(FC)" BLASLIB="$(RPATH_ESCAPED_ORIGIN) $(LIBBLAS)"
+
+$(SRCCACHE)/lapack-$(LAPACK_VER).tgz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://www.netlib.org/lapack/$(notdir $@)
+
+$(BUILDDIR)/lapack-$(LAPACK_VER)/source-extracted: $(SRCCACHE)/lapack-$(LAPACK_VER).tgz
+	$(JLCHECKSUM) $<
+	mkdir -p $(BUILDDIR)
+	cd $(BUILDDIR) && $(TAR) -zxf $<
+	cp $(dir $@)INSTALL/make.inc.gfortran $(dir $@)make.inc
+	echo 1 > $@
+
+checksum-lapack: $(SRCCACHE)/lapack-$(LAPACK_VER).tgz
+	$(JLCHECKSUM) $<
+
+ifeq ($(USE_SYSTEM_BLAS), 0)
+$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0: | $(build_prefix)/manifest/openblas
+else ifeq ($(OS),Darwin)
+$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0: | $(build_shlibdir)/libgfortblas.$(SHLIB_EXT)
+endif
+$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0: $(BUILDDIR)/lapack-$(LAPACK_VER)/source-extracted
+	$(MAKE) -C $(dir $@) lapacklib $(LAPACK_MFLAGS)
+	echo 1 > $@
+
+$(BUILDDIR)/lapack-$(LAPACK_VER)/build-checked: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0
+ifeq ($(BUILD_OS),$(OS))
+	$(MAKE) -C $(dir $@) lapack_testing $(LAPACK_MFLAGS) -k
+endif
+	echo 1 > $@
+
+$(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0 | $(build_prefix)/manifest
+	$(FC) -shared $(FFLAGS) $(JFFLAGS) $(dir $<)/SRC/*.o \
+		$(dir $<)/INSTALL/dlamch.o $(dir $<)/INSTALL/dsecnd_INT_ETIME.o \
+		$(dir $<)/INSTALL/ilaver.o $(dir $<)/INSTALL/slamch.o $(LIBBLAS) \
+		-o $(dir $<)/liblapack.$(SHLIB_EXT)
+	echo 1 > $@
+
+$(eval $(call staged-install, \
+	lapack,lapack-$(LAPACK_VER), \
+	SHLIBFILE_INSTALL,$(BUILDDIR)/lapack-$(LAPACK_VER)/liblapack.$(SHLIB_EXT),, \
+	$$(INSTALL_NAME_CMD)liblapack.$$(SHLIB_EXT) $$(build_shlibdir)/liblapack.$$(SHLIB_EXT)))
+
+clean-lapack:
+	-rm -f $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled0 $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled
+	-$(MAKE) -C $(BUILDDIR)/lapack-$(LAPACK_VER) clean
+
+distclean-lapack:
+	rm -rf $(SRCCACHE)/lapack-$(LAPACK_VER).tgz $(BUILDDIR)/lapack-$(LAPACK_VER)
+
+
+get-lapack: $(SRCCACHE)/lapack-$(LAPACK_VER).tgz
+extract-lapack: $(BUILDDIR)/lapack-$(LAPACK_VER)/source-extracted
+configure-lapack: extract-lapack
+compile-lapack: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-compiled
+fastcheck-lapack: check-lapack
+check-lapack: $(BUILDDIR)/lapack-$(LAPACK_VER)/build-checked
+
+else # USE_BINARYBUILDER_OPENBLAS
+
+$(eval $(call bb-install,openblas,OPENBLAS,true))
+get-lapack: get-openblas
+extract-lapack: extract-openblas
+configure-lapack: configure-openblas
+compile-lapack: compile-openblas
+fastcheck-lapack: fastcheck-openblas
+check-lapack: check-openblas
+clean-lapack: clean-openblas
+distclean-lapack: distclean-openblas
+install-lapack: install-openblas
+endif
diff --git a/deps/openblas.version b/deps/openblas.version
index 74660fc0c1a212..ceb01600b0ea77 100644
--- a/deps/openblas.version
+++ b/deps/openblas.version
@@ -1,2 +1,2 @@
-OPENBLAS_BRANCH=v0.3.10
-OPENBLAS_SHA1=63b03efc2af332c88b86d4fd8079d00f4b439adf
+OPENBLAS_BRANCH=v0.3.20
+OPENBLAS_SHA1=0b678b19dc03f2a999d6e038814c4c50b9640a4e
diff --git a/deps/openlibm.mk b/deps/openlibm.mk
index 7af66c6f66deec..f99cdade47b915 100644
--- a/deps/openlibm.mk
+++ b/deps/openlibm.mk
@@ -1,13 +1,11 @@
 ## openlibm ##
-
-
-OPENLIBM_GIT_URL := git://github.com/JuliaMath/openlibm.git
+ifneq ($(USE_BINARYBUILDER_OPENLIBM), 1)
+OPENLIBM_GIT_URL := https://github.com/JuliaMath/openlibm.git
 OPENLIBM_TAR_URL = https://api.github.com/repos/JuliaMath/openlibm/tarball/$1
 $(eval $(call git-external,openlibm,OPENLIBM,,,$(BUILDDIR)))
 
 OPENLIBM_FLAGS := ARCH="$(ARCH)" REAL_ARCH="$(MARCH)" CC="$(CC)" FC="$(FC)" AR="$(AR)" OS="$(OS)" USECLANG=$(USECLANG) USEGCC=$(USEGCC)
 
-ifneq ($(USE_BINARYBUILDER_OPENLIBM), 1)
 $(BUILDDIR)/$(OPENLIBM_SRC_DIR)/build-compiled: $(BUILDDIR)/$(OPENLIBM_SRC_DIR)/source-extracted
 	$(MAKE) -C $(dir $<) $(OPENLIBM_FLAGS) $(MAKE_COMMON)
 	echo 1 > $@
@@ -18,7 +16,7 @@ $(eval $(call staged-install, \
 	$(INSTALL_NAME_CMD)libopenlibm.$(SHLIB_EXT) $(build_shlibdir)/libopenlibm.$(SHLIB_EXT)))
 
 clean-openlibm:
-	-rm $(BUILDDIR)/$(OPENLIBM_SRC_DIR)/build-compiled $(build_libdir)/libopenlibm.a
+	-rm -f $(BUILDDIR)/$(OPENLIBM_SRC_DIR)/build-compiled $(build_libdir)/libopenlibm.a
 	-$(MAKE) -C $(BUILDDIR)/$(OPENLIBM_SRC_DIR) distclean $(OPENLIBM_FLAGS)
 
 
@@ -31,8 +29,6 @@ check-openlibm: compile-openlibm
 
 else # USE_BINARYBUILDER_OPENLIBM
 
-OPENLIBM_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/OpenLibm_jll.jl/releases/download/OpenLibm-v$(OPENLIBM_VER)+$(OPENLIBM_BB_REL)
-OPENLIBM_BB_NAME := OpenLibm.v$(OPENLIBM_VER)
-
 $(eval $(call bb-install,openlibm,OPENLIBM,false))
+
 endif
diff --git a/deps/openlibm.version b/deps/openlibm.version
index 06eed6d965c52b..9edba0c1f257b7 100644
--- a/deps/openlibm.version
+++ b/deps/openlibm.version
@@ -1,2 +1,2 @@
-OPENLIBM_BRANCH=v0.7.0
-OPENLIBM_SHA1=5efed306d509905714e3c43fc3a43fb26f3df743
+OPENLIBM_BRANCH=v0.8.1
+OPENLIBM_SHA1=ae2d91698508701c83cab83714d42a1146dccf85
diff --git a/deps/p7zip.mk b/deps/p7zip.mk
index 8ad38b2e8d5226..fe3f5d6fa98d9f 100644
--- a/deps/p7zip.mk
+++ b/deps/p7zip.mk
@@ -9,7 +9,10 @@ $(BUILDDIR)/p7zip-$(P7ZIP_VER)/source-extracted: $(SRCCACHE)/p7zip-$(P7ZIP_VER).
 	$(JLCHECKSUM) $<
 	mkdir -p $(dir $@)
 	cd $(dir $@) && $(TAR) --strip-components 1 -jxf $<
-	echo $1 > $@
+	echo 1 > $@
+
+checksum-p7zip: $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.bz2
+	$(JLCHECKSUM) $<
 
 $(BUILDDIR)/p7zip-$(P7ZIP_VER)/p7zip-12-CVE-2016-9296.patch-applied: $(BUILDDIR)/p7zip-$(P7ZIP_VER)/source-extracted
 	cd $(dir $@) && patch -p1 -f < $(SRCDIR)/patches/p7zip-12-CVE-2016-9296.patch
@@ -41,12 +44,12 @@ $(eval $(call staged-install, \
 	P7ZIP_INSTALL,,,))
 
 clean-p7zip:
-	-rm $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-configured $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-compiled
-	-rm $(build_bindir)/7za
+	-rm -f $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-configured $(BUILDDIR)/p7zip-$(P7ZIP_VER)/build-compiled
+	-rm -f $(build_bindir)/7za
 	-$(MAKE) -C $(BUILDDIR)/p7zip-$(P7ZIP_VER) clean
 
 distclean-p7zip:
-	-rm -rf $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.bz2 $(SRCCACHE)/p7zip-$(P7ZIP_VER) $(BUILDDIR)/p7zip-$(P7ZIP_VER)
+	rm -rf $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.bz2 $(SRCCACHE)/p7zip-$(P7ZIP_VER) $(BUILDDIR)/p7zip-$(P7ZIP_VER)
 
 
 get-p7zip: $(SRCCACHE)/p7zip-$(P7ZIP_VER).tar.bz2
@@ -58,8 +61,7 @@ check-p7zip: compile-p7zip
 
 
 else # USE_BINARYBUILDER_P7ZIP
-P7ZIP_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/p7zip_jll.jl/releases/download/p7zip-v$(P7ZIP_VER)+$(P7ZIP_BB_REL)
-P7ZIP_BB_NAME := p7zip.v$(P7ZIP_VER)
+
 $(eval $(call bb-install,p7zip,P7ZIP,false))
 
 endif
diff --git a/deps/patchelf.mk b/deps/patchelf.mk
index 89ad1daf3b4cbc..4d1a281ed2331d 100644
--- a/deps/patchelf.mk
+++ b/deps/patchelf.mk
@@ -1,15 +1,22 @@
 ## patchelf ##
 
-$(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://nixos.org/releases/patchelf/patchelf-$(PATCHELF_VER)/patchelf-$(PATCHELF_VER).tar.gz
+$(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/NixOS/patchelf/releases/download/$(PATCHELF_VER)/patchelf-$(PATCHELF_VER).tar.bz2
 
-$(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz
+$(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
 	$(JLCHECKSUM) $<
-	cd $(dir $<) && $(TAR) zxf $<
+	mkdir $(dir $@)
+	cd $(dir $@) && $(TAR) jxf $< --strip-components=1
 	touch -c $(SRCCACHE)/patchelf-$(PATCHELF_VER)/configure # old target
 	echo 1 > $@
 
-$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted | $(LIBCXX_DEPENDENCY)
+checksum-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
+	$(JLCHECKSUM) $<
+
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: CC:=$(HOSTCC)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: CXX:=$(HOSTCXX)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: XC_HOST:=$(BUILD_MACHINE)
+$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
 	$(dir $<)/configure $(CONFIGURE_COMMON) LDFLAGS="$(CXXLDFLAGS)" CPPFLAGS="$(CPPFLAGS)"
@@ -31,17 +38,17 @@ $(eval $(call staged-install, \
 	MAKE_INSTALL,$$(LIBTOOL_CCLD),,))
 
 clean-patchelf:
-	-rm $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured \
+	-rm -f $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured \
 		$(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/patchelf-$(PATCHELF_VER) clean
 
 distclean-patchelf:
-	-rm -rf $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz \
+	rm -rf $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2 \
 		$(SRCCACHE)/patchelf-$(PATCHELF_VER) \
 		$(BUILDDIR)/patchelf-$(PATCHELF_VER)
 
 
-get-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.gz
+get-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER).tar.bz2
 extract-patchelf: $(SRCCACHE)/patchelf-$(PATCHELF_VER)/source-extracted
 configure-patchelf: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-configured
 compile-patchelf: $(BUILDDIR)/patchelf-$(PATCHELF_VER)/build-compiled
diff --git a/deps/patches/SuiteSparse-winclang.patch b/deps/patches/SuiteSparse-winclang.patch
deleted file mode 100644
index 8bfb64dc278393..00000000000000
--- a/deps/patches/SuiteSparse-winclang.patch
+++ /dev/null
@@ -1,14 +0,0 @@
---- SuiteSparse_config/SuiteSparse_config.h	2015-07-15 03:26:41.000000000 +0000
-+++ SuiteSparse_config/SuiteSparse_config.h	2016-07-01 00:55:57.157465600 +0000
-@@ -54,7 +54,11 @@
- #ifdef _WIN64
- 
- #define SuiteSparse_long __int64
-+#ifdef _MSVC_VER
- #define SuiteSparse_long_max _I64_MAX
-+#else
-+#define SuiteSparse_long_max LLONG_MAX
-+#endif
- #define SuiteSparse_long_idd "I64d"
- 
- #else
diff --git a/deps/patches/dSFMT.c.patch b/deps/patches/dSFMT.c.patch
deleted file mode 100644
index 055483135a7893..00000000000000
--- a/deps/patches/dSFMT.c.patch
+++ /dev/null
@@ -1,100 +0,0 @@
---- dsfmt-2.2/dSFMT.c	2012-06-29 03:24:27.000000000 -0400
-+++ dSFMT-patched.c	2012-12-20 12:45:45.000000000 -0500
-@@ -32,13 +32,13 @@
- inline static uint32_t ini_func1(uint32_t x);
- inline static uint32_t ini_func2(uint32_t x);
- inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
--				       int size);
-+				       ptrdiff_t size);
- inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
--				       int size);
-+				       ptrdiff_t size);
- inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
--				       int size);
-+				       ptrdiff_t size);
- inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
--				       int size);
-+				       ptrdiff_t size);
- inline static int idxof(int i);
- static void initial_mask(dsfmt_t *dsfmt);
- static void period_certification(dsfmt_t *dsfmt);
-@@ -142,8 +142,8 @@
-  * @param size number of 128-bit pseudorandom numbers to be generated.
-  */
- inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array,
--				       int size) {
--    int i, j;
-+				       ptrdiff_t size) {
-+    ptrdiff_t i, j;
-     w128_t lung;
- 
-     lung = dsfmt->status[DSFMT_N];
-@@ -180,8 +180,8 @@
-  * @param size number of 128-bit pseudorandom numbers to be generated.
-  */
- inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array,
--				       int size) {
--    int i, j;
-+				       ptrdiff_t size) {
-+    ptrdiff_t i, j;
-     w128_t lung;
- 
-     lung = dsfmt->status[DSFMT_N];
-@@ -223,8 +223,8 @@
-  * @param size number of 128-bit pseudorandom numbers to be generated.
-  */
- inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array,
--				       int size) {
--    int i, j;
-+				       ptrdiff_t size) {
-+    ptrdiff_t i, j;
-     w128_t lung;
- 
-     lung = dsfmt->status[DSFMT_N];
-@@ -266,8 +266,8 @@
-  * @param size number of 128-bit pseudorandom numbers to be generated.
-  */
- inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array,
--				       int size) {
--    int i, j;
-+				       ptrdiff_t size) {
-+    ptrdiff_t i, j;
-     w128_t lung;
- 
-     lung = dsfmt->status[DSFMT_N];
-@@ -453,7 +453,7 @@
-  * memory. Mac OSX doesn't have these functions, but \b malloc of OSX
-  * returns the pointer to the aligned memory block.
-  */
--void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) {
-+void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], ptrdiff_t size) {
-     assert(size % 2 == 0);
-     assert(size >= DSFMT_N64);
-     gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2);
-@@ -471,7 +471,7 @@
-  * @param size the number of pseudorandom numbers to be generated.
-  * see also \sa fill_array_close1_open2()
-  */
--void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) {
-+void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], ptrdiff_t size) {
-     assert(size % 2 == 0);
-     assert(size >= DSFMT_N64);
-     gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2);
-@@ -489,7 +489,7 @@
-  * @param size the number of pseudorandom numbers to be generated.
-  * see also \sa fill_array_close1_open2()
-  */
--void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) {
-+void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], ptrdiff_t size) {
-     assert(size % 2 == 0);
-     assert(size >= DSFMT_N64);
-     gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2);
-@@ -507,7 +507,7 @@
-  * @param size the number of pseudorandom numbers to be generated.
-  * see also \sa fill_array_close1_open2()
-  */
--void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) {
-+void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], ptrdiff_t size) {
-     assert(size % 2 == 0);
-     assert(size >= DSFMT_N64);
-     gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2);
diff --git a/deps/patches/dSFMT.h.patch b/deps/patches/dSFMT.h.patch
deleted file mode 100644
index b35a7fe3364d51..00000000000000
--- a/deps/patches/dSFMT.h.patch
+++ /dev/null
@@ -1,362 +0,0 @@
---- dsfmt-2.2/dSFMT.h	2012-06-29 03:24:27.000000000 -0400
-+++ dSFMT-patched.h	2012-12-20 12:35:44.000000000 -0500
-@@ -40,6 +40,7 @@
- 
- #include <stdio.h>
- #include <assert.h>
-+#include <stddef.h>
- 
- #if !defined(DSFMT_MEXP)
- #ifdef __GNUC__
-@@ -178,16 +179,17 @@
- extern const int dsfmt_global_mexp;
- 
- void dsfmt_gen_rand_all(dsfmt_t *dsfmt);
--void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size);
--void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size);
--void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size);
--void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size);
-+void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], ptrdiff_t size);
-+void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], ptrdiff_t size);
-+void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], ptrdiff_t size);
-+void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], ptrdiff_t size);
- void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp);
- void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
-                              int key_length, int mexp);
- const char *dsfmt_get_idstring(void);
- int dsfmt_get_min_array_size(void);
- 
-+/*
- #if defined(__GNUC__)
- #  define DSFMT_PRE_INLINE inline static
- #  define DSFMT_PST_INLINE __attribute__((always_inline))
-@@ -198,6 +200,10 @@
- #  define DSFMT_PRE_INLINE inline static
- #  define DSFMT_PST_INLINE
- #endif
-+*/
-+#define DSFMT_PRE_INLINE
-+#define DSFMT_PST_INLINE
-+
- DSFMT_PRE_INLINE uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt)
-     DSFMT_PST_INLINE;
-@@ -212,13 +218,13 @@
- DSFMT_PRE_INLINE double dsfmt_gv_genrand_close_open(void) DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_close(void) DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_open(void) DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_close(double array[], int size)
-+DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_close(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close_open(double array[], int size)
-+DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close_open(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_open(double array[], int size)
-+DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_open(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close1_open2(double array[], int size)
-+DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close1_open2(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE void dsfmt_gv_init_gen_rand(uint32_t seed) DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE void dsfmt_gv_init_by_array(uint32_t init_key[],
-@@ -236,7 +242,7 @@
-  * @param dsfmt dsfmt internal state date
-  * @return double precision floating point pseudorandom number
-  */
--inline static uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) {
-+uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) {
-     uint32_t r;
-     uint64_t *psfmt64 = &dsfmt->status[0].u[0];
- 
-@@ -257,7 +263,7 @@
-  * @param dsfmt dsfmt internal state date
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) {
-+double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) {
-     double r;
-     double *psfmt64 = &dsfmt->status[0].d[0];
- 
-@@ -276,7 +282,7 @@
-  * before this function.  This function uses \b global variables.
-  * @return double precision floating point pseudorandom number
-  */
--inline static uint32_t dsfmt_gv_genrand_uint32(void) {
-+uint32_t dsfmt_gv_genrand_uint32(void) {
-     return dsfmt_genrand_uint32(&dsfmt_global_data);
- }
- 
-@@ -287,7 +293,7 @@
-  * before this function. This function uses \b global variables.
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_gv_genrand_close1_open2(void) {
-+double dsfmt_gv_genrand_close1_open2(void) {
-     return dsfmt_genrand_close1_open2(&dsfmt_global_data);
- }
- 
-@@ -299,7 +305,7 @@
-  * @param dsfmt dsfmt internal state date
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_genrand_close_open(dsfmt_t *dsfmt) {
-+double dsfmt_genrand_close_open(dsfmt_t *dsfmt) {
-     return dsfmt_genrand_close1_open2(dsfmt) - 1.0;
- }
- 
-@@ -310,7 +316,7 @@
-  * before this function. This function uses \b global variables.
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_gv_genrand_close_open(void) {
-+double dsfmt_gv_genrand_close_open(void) {
-     return dsfmt_gv_genrand_close1_open2() - 1.0;
- }
- 
-@@ -322,7 +328,7 @@
-  * @param dsfmt dsfmt internal state date
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_genrand_open_close(dsfmt_t *dsfmt) {
-+double dsfmt_genrand_open_close(dsfmt_t *dsfmt) {
-     return 2.0 - dsfmt_genrand_close1_open2(dsfmt);
- }
- 
-@@ -333,7 +339,7 @@
-  * before this function. This function uses \b global variables.
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_gv_genrand_open_close(void) {
-+double dsfmt_gv_genrand_open_close(void) {
-     return 2.0 - dsfmt_gv_genrand_close1_open2();
- }
- 
-@@ -345,7 +351,7 @@
-  * @param dsfmt dsfmt internal state date
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_genrand_open_open(dsfmt_t *dsfmt) {
-+double dsfmt_genrand_open_open(dsfmt_t *dsfmt) {
-     double *dsfmt64 = &dsfmt->status[0].d[0];
-     union {
-         double d;
-@@ -368,7 +374,7 @@
-  * before this function. This function uses \b global variables.
-  * @return double precision floating point pseudorandom number
-  */
--inline static double dsfmt_gv_genrand_open_open(void) {
-+double dsfmt_gv_genrand_open_open(void) {
-     return dsfmt_genrand_open_open(&dsfmt_global_data);
- }
- 
-@@ -383,7 +389,7 @@
-  * @param size the number of pseudorandom numbers to be generated.
-  * see also \sa dsfmt_fill_array_close1_open2()
-  */
--inline static void dsfmt_gv_fill_array_close1_open2(double array[], int size) {
-+void dsfmt_gv_fill_array_close1_open2(double array[], ptrdiff_t size) {
-     dsfmt_fill_array_close1_open2(&dsfmt_global_data, array, size);
- }
- 
-@@ -399,7 +405,7 @@
-  * see also \sa dsfmt_fill_array_close1_open2() and \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void dsfmt_gv_fill_array_open_close(double array[], int size) {
-+void dsfmt_gv_fill_array_open_close(double array[], ptrdiff_t size) {
-     dsfmt_fill_array_open_close(&dsfmt_global_data, array, size);
- }
- 
-@@ -415,7 +421,7 @@
-  * see also \sa dsfmt_fill_array_close1_open2() \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void dsfmt_gv_fill_array_close_open(double array[], int size) {
-+void dsfmt_gv_fill_array_close_open(double array[], ptrdiff_t size) {
-     dsfmt_fill_array_close_open(&dsfmt_global_data, array, size);
- }
- 
-@@ -431,7 +437,7 @@
-  * see also \sa dsfmt_fill_array_close1_open2() \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void dsfmt_gv_fill_array_open_open(double array[], int size) {
-+void dsfmt_gv_fill_array_open_open(double array[], ptrdiff_t size) {
-     dsfmt_fill_array_open_open(&dsfmt_global_data, array, size);
- }
- 
-@@ -441,7 +447,7 @@
-  * @param dsfmt dsfmt state vector.
-  * @param seed a 32-bit integer used as the seed.
-  */
--inline static void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) {
-+void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) {
-     dsfmt_chk_init_gen_rand(dsfmt, seed, DSFMT_MEXP);
- }
- 
-@@ -451,7 +457,7 @@
-  * @param seed a 32-bit integer used as the seed.
-  * see also \sa dsfmt_init_gen_rand()
-  */
--inline static void dsfmt_gv_init_gen_rand(uint32_t seed) {
-+void dsfmt_gv_init_gen_rand(uint32_t seed) {
-     dsfmt_init_gen_rand(&dsfmt_global_data, seed);
- }
- 
-@@ -462,7 +468,7 @@
-  * @param init_key the array of 32-bit integers, used as a seed.
-  * @param key_length the length of init_key.
-  */
--inline static void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
-+void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[],
-                                        int key_length) {
-     dsfmt_chk_init_by_array(dsfmt, init_key, key_length, DSFMT_MEXP);
- }
-@@ -475,7 +481,7 @@
-  * @param key_length the length of init_key.
-  * see also \sa dsfmt_init_by_array()
-  */
--inline static void dsfmt_gv_init_by_array(uint32_t init_key[], int key_length) {
-+void dsfmt_gv_init_by_array(uint32_t init_key[], int key_length) {
-     dsfmt_init_by_array(&dsfmt_global_data, init_key, key_length);
- }
- 
-@@ -489,13 +495,13 @@
- DSFMT_PRE_INLINE double genrand_close_open(void) DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE double genrand_open_close(void) DSFMT_PST_INLINE;
- DSFMT_PRE_INLINE double genrand_open_open(void) DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void fill_array_open_close(double array[], int size)
-+DSFMT_PRE_INLINE void fill_array_open_close(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void fill_array_close_open(double array[], int size)
-+DSFMT_PRE_INLINE void fill_array_close_open(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void fill_array_open_open(double array[], int size)
-+DSFMT_PRE_INLINE void fill_array_open_open(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
--DSFMT_PRE_INLINE void fill_array_close1_open2(double array[], int size)
-+DSFMT_PRE_INLINE void fill_array_close1_open2(double array[], ptrdiff_t size)
-     DSFMT_PST_INLINE;
- 
- /**
-@@ -503,7 +509,7 @@
-  * @return id string.
-  * see also \sa dsfmt_get_idstring()
-  */
--inline static const char *get_idstring(void) {
-+const char *get_idstring(void) {
-     return dsfmt_get_idstring();
- }
- 
-@@ -512,7 +518,7 @@
-  * @return minimum size of array used for fill_array functions.
-  * see also \sa dsfmt_get_min_array_size()
-  */
--inline static int get_min_array_size(void) {
-+int get_min_array_size(void) {
-     return dsfmt_get_min_array_size();
- }
- 
-@@ -521,7 +527,7 @@
-  * @param seed a 32-bit integer used as the seed.
-  * see also \sa dsfmt_gv_init_gen_rand(), \sa dsfmt_init_gen_rand().
-  */
--inline static void init_gen_rand(uint32_t seed) {
-+void init_gen_rand(uint32_t seed) {
-     dsfmt_gv_init_gen_rand(seed);
- }
- 
-@@ -531,7 +537,7 @@
-  * @param key_length the length of init_key.
-  * see also \sa dsfmt_gv_init_by_array(), \sa dsfmt_init_by_array().
-  */
--inline static void init_by_array(uint32_t init_key[], int key_length) {
-+void init_by_array(uint32_t init_key[], int key_length) {
-     dsfmt_gv_init_by_array(init_key, key_length);
- }
- 
-@@ -541,7 +547,7 @@
-  * see also \sa dsfmt_genrand_close1_open2() \sa
-  * dsfmt_gv_genrand_close1_open2()
-  */
--inline static double genrand_close1_open2(void) {
-+double genrand_close1_open2(void) {
-     return dsfmt_gv_genrand_close1_open2();
- }
- 
-@@ -551,7 +557,7 @@
-  * see also \sa dsfmt_genrand_close_open() \sa
-  * dsfmt_gv_genrand_close_open()
-  */
--inline static double genrand_close_open(void) {
-+double genrand_close_open(void) {
-     return dsfmt_gv_genrand_close_open();
- }
- 
-@@ -561,7 +567,7 @@
-  * see also \sa dsfmt_genrand_open_close() \sa
-  * dsfmt_gv_genrand_open_close()
-  */
--inline static double genrand_open_close(void) {
-+double genrand_open_close(void) {
-     return dsfmt_gv_genrand_open_close();
- }
- 
-@@ -571,7 +577,7 @@
-  * see also \sa dsfmt_genrand_open_open() \sa
-  * dsfmt_gv_genrand_open_open()
-  */
--inline static double genrand_open_open(void) {
-+double genrand_open_open(void) {
-     return dsfmt_gv_genrand_open_open();
- }
- 
-@@ -584,7 +590,7 @@
-  * dsfmt_fill_array_close1_open2(), \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void fill_array_open_close(double array[], int size) {
-+void fill_array_open_close(double array[], ptrdiff_t size) {
-     dsfmt_gv_fill_array_open_close(array, size);
- }
- 
-@@ -597,7 +603,7 @@
-  * dsfmt_fill_array_close1_open2(), \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void fill_array_close_open(double array[], int size) {
-+void fill_array_close_open(double array[], ptrdiff_t size) {
-     dsfmt_gv_fill_array_close_open(array, size);
- }
- 
-@@ -610,7 +616,7 @@
-  * dsfmt_fill_array_close1_open2(), \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void fill_array_open_open(double array[], int size) {
-+void fill_array_open_open(double array[], ptrdiff_t size) {
-     dsfmt_gv_fill_array_open_open(array, size);
- }
- 
-@@ -622,7 +628,7 @@
-  * see also \sa dsfmt_fill_array_close1_open2(), \sa
-  * dsfmt_gv_fill_array_close1_open2()
-  */
--inline static void fill_array_close1_open2(double array[], int size) {
-+void fill_array_close1_open2(double array[], ptrdiff_t size) {
-     dsfmt_gv_fill_array_close1_open2(array, size);
- }
- #endif /* DSFMT_DO_NOT_USE_OLD_NAMES */
-diff --git a/test.c b/test.c
-index 82d55db..d65db9a 100644
---- a/test.c
-+++ b/test.c
-@@ -4,7 +4,7 @@
- #include <time.h>
- #include <string.h>
- #define DSFMT_DO_NOT_USE_OLD_NAMES
--#include "dSFMT.h"
-+#include "dSFMT.h.orig"
- 
- #define NUM_RANDS 50000
- #define TIC_MAG 1
diff --git a/deps/patches/gmp-HG-changeset.patch b/deps/patches/gmp-HG-changeset.patch
new file mode 100644
index 00000000000000..7437fb6f2f748e
--- /dev/null
+++ b/deps/patches/gmp-HG-changeset.patch
@@ -0,0 +1,520 @@
+
+# HG changeset patch
+# User Torbjorn Granlund <tg@gmplib.org>
+# Date 1606685500 -3600
+# Node ID 5f32dbc41afc1f8cd77af1614f0caeb24deb7d7b
+# Parent  94c84d919f83ba963ed1809f8e80c7bef32db55c
+Avoid the x18 register since it is reserved on Darwin.
+
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aors_n.asm
+--- a/mpn/arm64/aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -68,7 +68,7 @@
+ EPILOGUE()
+ PROLOGUE(func_n)
+ 	CLRCY
+-L(ent):	lsr	x18, n, #2
++L(ent):	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x7, [up]
+@@ -77,7 +77,7 @@
+ 	str	x13, [rp],#8
+ 	tbnz	n, #1, L(b11)
+ 
+-L(b01):	cbz	x18, L(ret)
++L(b01):	cbz	x17, L(ret)
+ 	ldp	x4, x5, [up,#8]
+ 	ldp	x8, x9, [vp,#8]
+ 	sub	up, up, #8
+@@ -88,7 +88,7 @@
+ 	ldp	x10, x11, [vp,#8]
+ 	add	up, up, #8
+ 	add	vp, vp, #8
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	tbnz	n, #1, L(b10)
+@@ -101,7 +101,7 @@
+ 
+ L(b10):	ldp	x6, x7, [up]
+ 	ldp	x10, x11, [vp]
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#16]
+@@ -114,8 +114,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	stp	x12, x13, [rp],#16
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	ADDSUBC	x12, x6, x10
+ 	ADDSUBC	x13, x7, x11
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsmul_1.asm
+--- a/mpn/arm64/aorsmul_1.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/aorsmul_1.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -32,10 +32,15 @@
+ 
+ include(`../config.m4')
+ 
+-C	     cycles/limb
+-C Cortex-A53	9.3-9.8
+-C Cortex-A57	 7.0
+-C X-Gene	 5.0
++C	       addmul_1        submul_1
++C	     cycles/limb     cycles/limb
++C Cortex-A53	9.3-9.8		9.3-9.8
++C Cortex-A55    9.0-9.5		9.3-9.8
++C Cortex-A57	 7		 7
++C Cortex-A72
++C Cortex-A73	 6		 6
++C X-Gene	 5		 5
++C Apple M1	 1.75		 1.75
+ 
+ C NOTES
+ C  * It is possible to keep the carry chain alive between the addition blocks
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/aorsorrlshC_n.asm
+--- a/mpn/arm64/aorsorrlshC_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/aorsorrlshC_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -65,14 +65,14 @@
+ 
+ ASM_START()
+ PROLOGUE(func_n)
+-	lsr	x18, n, #2
++	lsr	x6, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x5, [up]
+ 	tbnz	n, #1, L(b11)
+ 
+ L(b01):	ldr	x11, [vp]
+-	cbz	x18, L(1)
++	cbz	x6, L(1)
+ 	ldp	x8, x9, [vp,#8]
+ 	lsl	x13, x11, #LSH
+ 	ADDSUB(	x15, x13, x5)
+@@ -94,7 +94,7 @@
+ 	ADDSUB(	x17, x13, x5)
+ 	str	x17, [rp],#8
+ 	sub	up, up, #8
+-	cbz	x18, L(end)
++	cbz	x6, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	tbnz	n, #1, L(b10)
+@@ -107,7 +107,7 @@
+ L(b10):	CLRRCY(	x9)
+ 	ldp	x10, x11, [vp]
+ 	sub	up, up, #16
+-	cbz	x18, L(end)
++	cbz	x6, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#16]
+@@ -124,8 +124,8 @@
+ 	ADDSUBC(x16, x12, x4)
+ 	ADDSUBC(x17, x13, x5)
+ 	stp	x16, x17, [rp],#16
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x6, x6, #1
++	cbnz	x6, L(top)
+ 
+ L(end):	ldp	x4, x5, [up,#16]
+ 	extr	x12, x10, x9, #RSH
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/cnd_aors_n.asm
+--- a/mpn/arm64/cnd_aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/cnd_aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -65,7 +65,7 @@
+ 
+ 	CLRCY
+ 
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x13, [vp]
+@@ -75,7 +75,7 @@
+ 	str	x9, [rp]
+ 	tbnz	n, #1, L(b11)
+ 
+-L(b01):	cbz	x18, L(rt)
++L(b01):	cbz	x17, L(rt)
+ 	ldp	x12, x13, [vp,#8]
+ 	ldp	x10, x11, [up,#8]
+ 	sub	up, up, #8
+@@ -86,7 +86,7 @@
+ L(b11):	ldp	x12, x13, [vp,#8]!
+ 	ldp	x10, x11, [up,#8]!
+ 	sub	rp, rp, #8
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	ldp	x12, x13, [vp]
+@@ -99,7 +99,7 @@
+ 	b	L(mid)
+ 
+ L(b10):	sub	rp, rp, #16
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	bic	x6, x12, cnd
+@@ -116,8 +116,8 @@
+ 	ADDSUBC	x9, x11, x7
+ 	ldp	x10, x11, [up,#32]!
+ 	stp	x8, x9, [rp,#32]!
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	bic	x6, x12, cnd
+ 	bic	x7, x13, cnd
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/logops_n.asm
+--- a/mpn/arm64/logops_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/logops_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -78,7 +78,7 @@
+ 
+ ASM_START()
+ PROLOGUE(func)
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x7, [up]
+@@ -88,7 +88,7 @@
+ 	str	x15, [rp],#8
+ 	tbnz	n, #1, L(b11)
+ 
+-L(b01):	cbz	x18, L(ret)
++L(b01):	cbz	x17, L(ret)
+ 	ldp	x4, x5, [up,#8]
+ 	ldp	x8, x9, [vp,#8]
+ 	sub	up, up, #8
+@@ -99,7 +99,7 @@
+ 	ldp	x10, x11, [vp,#8]
+ 	add	up, up, #8
+ 	add	vp, vp, #8
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 	b	L(top)
+ 
+ L(bx0):	tbnz	n, #1, L(b10)
+@@ -110,7 +110,7 @@
+ 
+ L(b10):	ldp	x6, x7, [up]
+ 	ldp	x10, x11, [vp]
+-	cbz	x18, L(end)
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#16]
+@@ -127,8 +127,8 @@
+ 	POSTOP(	x12)
+ 	POSTOP(	x13)
+ 	stp	x12, x13, [rp],#16
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	LOGOP(	x12, x6, x10)
+ 	LOGOP(	x13, x7, x11)
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshift.asm
+--- a/mpn/arm64/lshift.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/lshift.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -61,7 +61,7 @@
+ 	add	rp, rp_arg, n, lsl #3
+ 	add	up, up, n, lsl #3
+ 	sub	tnc, xzr, cnt
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x4, [up,#-8]
+@@ -69,7 +69,7 @@
+ 
+ L(b01):	NSHIFT	x0, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt1)
++	cbnz	x17, L(gt1)
+ 	str	x2, [rp,#-8]
+ 	ret
+ L(gt1):	ldp	x4, x5, [up,#-24]
+@@ -89,7 +89,7 @@
+ 	PSHIFT	x13, x5, cnt
+ 	NSHIFT	x10, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt2)
++	cbnz	x17, L(gt2)
+ 	orr	x10, x10, x13
+ 	stp	x2, x10, [rp,#-16]
+ 	ret
+@@ -123,11 +123,11 @@
+ 	orr	x11, x12, x2
+ 	stp	x10, x11, [rp,#-32]!
+ 	PSHIFT	x2, x4, cnt
+-L(lo0):	sub	x18, x18, #1
++L(lo0):	sub	x17, x17, #1
+ L(lo3):	NSHIFT	x10, x6, tnc
+ 	PSHIFT	x13, x7, cnt
+ 	NSHIFT	x12, x7, tnc
+-	cbnz	x18, L(top)
++	cbnz	x17, L(top)
+ 
+ L(end):	orr	x10, x10, x13
+ 	orr	x11, x12, x2
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/lshiftc.asm
+--- a/mpn/arm64/lshiftc.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/lshiftc.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -61,7 +61,7 @@
+ 	add	rp, rp_arg, n, lsl #3
+ 	add	up, up, n, lsl #3
+ 	sub	tnc, xzr, cnt
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x4, [up,#-8]
+@@ -69,7 +69,7 @@
+ 
+ L(b01):	NSHIFT	x0, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt1)
++	cbnz	x17, L(gt1)
+ 	mvn	x2, x2
+ 	str	x2, [rp,#-8]
+ 	ret
+@@ -90,7 +90,7 @@
+ 	PSHIFT	x13, x5, cnt
+ 	NSHIFT	x10, x4, tnc
+ 	PSHIFT	x2, x4, cnt
+-	cbnz	x18, L(gt2)
++	cbnz	x17, L(gt2)
+ 	eon	x10, x10, x13
+ 	mvn	x2, x2
+ 	stp	x2, x10, [rp,#-16]
+@@ -125,11 +125,11 @@
+ 	eon	x11, x12, x2
+ 	stp	x10, x11, [rp,#-32]!
+ 	PSHIFT	x2, x4, cnt
+-L(lo0):	sub	x18, x18, #1
++L(lo0):	sub	x17, x17, #1
+ L(lo3):	NSHIFT	x10, x6, tnc
+ 	PSHIFT	x13, x7, cnt
+ 	NSHIFT	x12, x7, tnc
+-	cbnz	x18, L(top)
++	cbnz	x17, L(top)
+ 
+ L(end):	eon	x10, x10, x13
+ 	eon	x11, x12, x2
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/mul_1.asm
+--- a/mpn/arm64/mul_1.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/mul_1.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -56,7 +56,7 @@
+ 
+ PROLOGUE(mpn_mul_1)
+ 	adds	x4, xzr, xzr		C clear register and cy flag
+-L(com):	lsr	x18, n, #2
++L(com):	lsr	x17, n, #2
+ 	tbnz	n, #0, L(bx1)
+ 
+ L(bx0):	mov	x11, x4
+@@ -65,7 +65,7 @@
+ L(b10):	ldp	x4, x5, [up]
+ 	mul	x8, x4, v0
+ 	umulh	x10, x4, v0
+-	cbz	x18, L(2)
++	cbz	x17, L(2)
+ 	ldp	x6, x7, [up,#16]!
+ 	mul	x9, x5, v0
+ 	b	L(mid)-8
+@@ -80,7 +80,7 @@
+ 	str	x9, [rp],#8
+ 	tbnz	n, #1, L(b10)
+ 
+-L(b01):	cbz	x18, L(1)
++L(b01):	cbz	x17, L(1)
+ 
+ L(b00):	ldp	x6, x7, [up]
+ 	mul	x8, x6, v0
+@@ -90,8 +90,8 @@
+ 	adcs	x12, x8, x11
+ 	umulh	x11, x7, v0
+ 	add	rp, rp, #16
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x17, x17, #1
++	cbz	x17, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	mul	x8, x4, v0
+@@ -110,8 +110,8 @@
+ 	stp	x12, x13, [rp],#32
+ 	adcs	x12, x8, x11
+ 	umulh	x11, x7, v0
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x17, x17, #1
++	cbnz	x17, L(top)
+ 
+ L(end):	mul	x8, x4, v0
+ 	adcs	x13, x9, x10
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rsh1aors_n.asm
+--- a/mpn/arm64/rsh1aors_n.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/rsh1aors_n.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -59,7 +59,7 @@
+ 
+ ASM_START()
+ PROLOGUE(func_n)
+-	lsr	x18, n, #2
++	lsr	x6, n, #2
+ 
+ 	tbz	n, #0, L(bx0)
+ 
+@@ -69,7 +69,7 @@
+ 
+ L(b01):	ADDSUB	x13, x5, x9
+ 	and	x10, x13, #1
+-	cbz	x18, L(1)
++	cbz	x6, L(1)
+ 	ldp	x4, x5, [up],#48
+ 	ldp	x8, x9, [vp],#48
+ 	ADDSUBC	x14, x4, x8
+@@ -80,8 +80,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	str	x17, [rp], #24
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x6, x6, #1
++	cbz	x6, L(end)
+ 	b	L(top)
+ 
+ L(1):	cset	x14, COND
+@@ -97,7 +97,7 @@
+ 	ldp	x8, x9, [vp],#32
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+-	cbz	x18, L(3)
++	cbz	x6, L(3)
+ 	ldp	x4, x5, [up,#-16]
+ 	ldp	x8, x9, [vp,#-16]
+ 	extr	x17, x12, x15, #1
+@@ -117,7 +117,7 @@
+ 	ADDSUB	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	and	x10, x12, #1
+-	cbz	x18, L(2)
++	cbz	x6, L(2)
+ 	ldp	x4, x5, [up,#-16]
+ 	ldp	x8, x9, [vp,#-16]
+ 	ADDSUBC	x14, x4, x8
+@@ -134,8 +134,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	add	rp, rp, #16
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x6, x6, #1
++	cbz	x6, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	ldp	x4, x5, [up,#-16]
+@@ -152,8 +152,8 @@
+ 	ADDSUBC	x12, x4, x8
+ 	ADDSUBC	x13, x5, x9
+ 	stp	x16, x17, [rp],#32
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x6, x6, #1
++	cbnz	x6, L(top)
+ 
+ L(end):	extr	x16, x15, x14, #1
+ 	extr	x17, x12, x15, #1
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/rshift.asm
+--- a/mpn/arm64/rshift.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/rshift.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -60,7 +60,7 @@
+ PROLOGUE(mpn_rshift)
+ 	mov	rp, rp_arg
+ 	sub	tnc, xzr, cnt
+-	lsr	x18, n, #2
++	lsr	x17, n, #2
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	ldr	x5, [up]
+@@ -68,7 +68,7 @@
+ 
+ L(b01):	NSHIFT	x0, x5, tnc
+ 	PSHIFT	x2, x5, cnt
+-	cbnz	x18, L(gt1)
++	cbnz	x17, L(gt1)
+ 	str	x2, [rp]
+ 	ret
+ L(gt1):	ldp	x4, x5, [up,#8]
+@@ -89,7 +89,7 @@
+ 	PSHIFT	x13, x4, cnt
+ 	NSHIFT	x10, x5, tnc
+ 	PSHIFT	x2, x5, cnt
+-	cbnz	x18, L(gt2)
++	cbnz	x17, L(gt2)
+ 	orr	x10, x10, x13
+ 	stp	x10, x2, [rp]
+ 	ret
+@@ -121,11 +121,11 @@
+ 	orr	x11, x12, x2
+ 	stp	x11, x10, [rp,#32]!
+ 	PSHIFT	x2, x5, cnt
+-L(lo0):	sub	x18, x18, #1
++L(lo0):	sub	x17, x17, #1
+ L(lo3):	NSHIFT	x10, x7, tnc
+ 	NSHIFT	x12, x6, tnc
+ 	PSHIFT	x13, x6, cnt
+-	cbnz	x18, L(top)
++	cbnz	x17, L(top)
+ 
+ L(end):	orr	x10, x10, x13
+ 	orr	x11, x12, x2
+diff -r 94c84d919f83 -r 5f32dbc41afc mpn/arm64/sqr_diag_addlsh1.asm
+--- a/mpn/arm64/sqr_diag_addlsh1.asm	Sat Nov 28 23:38:32 2020 +0100
++++ b/mpn/arm64/sqr_diag_addlsh1.asm	Sun Nov 29 22:31:40 2020 +0100
+@@ -47,7 +47,7 @@
+ ASM_START()
+ PROLOGUE(mpn_sqr_diag_addlsh1)
+ 	ldr	x15, [up],#8
+-	lsr	x18, n, #1
++	lsr	x14, n, #1
+ 	tbz	n, #0, L(bx0)
+ 
+ L(bx1):	adds	x7, xzr, xzr
+@@ -62,8 +62,8 @@
+ 	ldr	x17, [up],#16
+ 	ldp	x6, x7, [tp],#32
+ 	umulh	x11, x15, x15
+-	sub	x18, x18, #1
+-	cbz	x18, L(end)
++	sub	x14, x14, #1
++	cbz	x14, L(end)
+ 
+ 	ALIGN(16)
+ L(top):	extr	x9, x6, x5, #63
+@@ -84,8 +84,8 @@
+ 	extr	x8, x5, x4, #63
+ 	stp	x12, x13, [rp],#16
+ 	adcs	x12, x8, x10
+-	sub	x18, x18, #1
+-	cbnz	x18, L(top)
++	sub	x14, x14, #1
++	cbnz	x14, L(top)
+ 
+ L(end):	extr	x9, x6, x5, #63
+ 	mul	x10, x17, x17
diff --git a/deps/patches/gmp-apple-arm64.patch b/deps/patches/gmp-apple-arm64.patch
deleted file mode 100644
index 241c0216de004a..00000000000000
--- a/deps/patches/gmp-apple-arm64.patch
+++ /dev/null
@@ -1,183 +0,0 @@
-
-# HG changeset patch
-# User Torbjorn Granlund <tg@gmplib.org>
-# Date 1593897341 -7200
-# Node ID c5d0fcb069696e02aeff5b64108cd3ba299bf181
-# Parent  9240f425c5853b8f76cf91646301ee39bac434d9
-Initial support for arm64-darwin.
-
-diff -r 9240f425c585 -r c5d0fcb06969 configure.ac
---- a/configure.ac	Thu Jun 18 18:39:48 2020 +0200
-+++ b/configure.ac	Sat Jul 04 23:15:41 2020 +0200
-@@ -3699,6 +3699,14 @@
-       case $ABI in
-         32)
- 	  GMP_INCLUDE_MPN(arm/arm-defs.m4) ;;
-+        64)
-+	  case $host in
-+	    *-*-darwin*)
-+	      GMP_INCLUDE_MPN(arm64/darwin.m4) ;;
-+	    *)
-+	      GMP_INCLUDE_MPN(arm64/arm64-defs.m4) ;;
-+          esac
-+	  ;;
-       esac
-       ;;
-     hppa*-*-*)
-diff -r 9240f425c585 -r c5d0fcb06969 mpn/arm64/arm64-defs.m4
---- /dev/null	Thu Jan 01 00:00:00 1970 +0000
-+++ b/mpn/arm64/arm64-defs.m4	Sat Jul 04 23:15:41 2020 +0200
-@@ -0,0 +1,53 @@
-+divert(-1)
-+
-+dnl  m4 macros for ARM64 ELF assembler.
-+
-+dnl  Copyright 2020 Free Software Foundation, Inc.
-+
-+dnl  This file is part of the GNU MP Library.
-+dnl
-+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-+dnl  it under the terms of either:
-+dnl
-+dnl    * the GNU Lesser General Public License as published by the Free
-+dnl      Software Foundation; either version 3 of the License, or (at your
-+dnl      option) any later version.
-+dnl
-+dnl  or
-+dnl
-+dnl    * the GNU General Public License as published by the Free Software
-+dnl      Foundation; either version 2 of the License, or (at your option) any
-+dnl      later version.
-+dnl
-+dnl  or both in parallel, as here.
-+dnl
-+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+dnl  for more details.
-+dnl
-+dnl  You should have received copies of the GNU General Public License and the
-+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-+dnl  see https://www.gnu.org/licenses/.
-+
-+
-+dnl  Standard commenting is with @, the default m4 # is for constants and we
-+dnl  don't want to disable macro expansions in or after them.
-+
-+changecom
-+
-+
-+dnl  LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
-+dnl
-+dnl  Load the address of gmp_symbol into a register. We split this into two
-+dnl  parts to allow separation for manual insn scheduling.
-+
-+ifdef(`PIC',`dnl
-+define(`LEA_HI', `adrp	$1, :got:$2')dnl
-+define(`LEA_LO', `ldr	$1, [$1, #:got_lo12:$2]')dnl
-+',`dnl
-+define(`LEA_HI', `adrp	$1, $2')dnl
-+define(`LEA_LO', `add	$1, $1, :lo12:$2')dnl
-+')dnl
-+
-+divert`'dnl
-diff -r 9240f425c585 -r c5d0fcb06969 mpn/arm64/bdiv_q_1.asm
---- a/mpn/arm64/bdiv_q_1.asm	Thu Jun 18 18:39:48 2020 +0200
-+++ b/mpn/arm64/bdiv_q_1.asm	Sat Jul 04 23:15:41 2020 +0200
-@@ -61,15 +61,9 @@
- 	clz	cnt, x6
- 	lsr	d, d, cnt
- 
--ifdef(`PIC',`
--	adrp	x7, :got:__gmp_binvert_limb_table
-+	LEA_HI(	x7, binvert_limb_table)
- 	ubfx	x6, d, 1, 7
--	ldr	x7, [x7, #:got_lo12:__gmp_binvert_limb_table]
--',`
--	adrp	x7, __gmp_binvert_limb_table
--	ubfx	x6, d, 1, 7
--	add	x7, x7, :lo12:__gmp_binvert_limb_table
--')
-+	LEA_LO(	x7, binvert_limb_table)
- 	ldrb	w6, [x7, x6]
- 	ubfiz	x7, x6, 1, 8
- 	umull	x6, w6, w6
-@@ -81,7 +75,7 @@
- 	mul	x6, x6, x6
- 	msub	di, x6, d, x7
- 
--	b	mpn_pi1_bdiv_q_1
-+	b	GSYM_PREFIX`'mpn_pi1_bdiv_q_1
- EPILOGUE()
- 
- PROLOGUE(mpn_pi1_bdiv_q_1)
-diff -r 9240f425c585 -r c5d0fcb06969 mpn/arm64/darwin.m4
---- /dev/null	Thu Jan 01 00:00:00 1970 +0000
-+++ b/mpn/arm64/darwin.m4	Sat Jul 04 23:15:41 2020 +0200
-@@ -0,0 +1,50 @@
-+divert(-1)
-+
-+dnl  m4 macros for ARM64 Darwin assembler.
-+
-+dnl  Copyright 2020 Free Software Foundation, Inc.
-+
-+dnl  This file is part of the GNU MP Library.
-+dnl
-+dnl  The GNU MP Library is free software; you can redistribute it and/or modify
-+dnl  it under the terms of either:
-+dnl
-+dnl    * the GNU Lesser General Public License as published by the Free
-+dnl      Software Foundation; either version 3 of the License, or (at your
-+dnl      option) any later version.
-+dnl
-+dnl  or
-+dnl
-+dnl    * the GNU General Public License as published by the Free Software
-+dnl      Foundation; either version 2 of the License, or (at your option) any
-+dnl      later version.
-+dnl
-+dnl  or both in parallel, as here.
-+dnl
-+dnl  The GNU MP Library is distributed in the hope that it will be useful, but
-+dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-+dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+dnl  for more details.
-+dnl
-+dnl  You should have received copies of the GNU General Public License and the
-+dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
-+dnl  see https://www.gnu.org/licenses/.
-+
-+
-+dnl  Standard commenting is with @, the default m4 # is for constants and we
-+dnl  don't want to disable macro expansions in or after them.
-+
-+changecom
-+
-+
-+dnl  LEA_HI(reg,gmp_symbol), LEA_LO(reg,gmp_symbol)
-+dnl
-+dnl  Load the address of gmp_symbol into a register. We split this into two
-+dnl  parts to allow separation for manual insn scheduling.  TODO: Darwin allows
-+dnl  for relaxing these two insns into an adr and a nop, but that requires the
-+dnl  .loh pseudo for connecting them.
-+
-+define(`LEA_HI',`adrp	$1, $2@GOTPAGE')dnl
-+define(`LEA_LO',`ldr	$1, [$1, $2@GOTPAGEOFF]')dnl
-+
-+divert`'dnl
-diff -r 9240f425c585 -r c5d0fcb06969 mpn/arm64/invert_limb.asm
---- a/mpn/arm64/invert_limb.asm	Thu Jun 18 18:39:48 2020 +0200
-+++ b/mpn/arm64/invert_limb.asm	Sat Jul 04 23:15:41 2020 +0200
-@@ -41,9 +41,9 @@
- ASM_START()
- PROLOGUE(mpn_invert_limb)
- 	lsr	x2, x0, #54
--	adrp	x1, approx_tab
-+	LEA_HI(	x1, approx_tab)
- 	and	x2, x2, #0x1fe
--	add	x1, x1, :lo12:approx_tab
-+	LEA_LO(	x1, approx_tab)
- 	ldrh	w3, [x1,x2]
- 	lsr	x4, x0, #24
- 	add	x4, x4, #1
-
diff --git a/deps/patches/libgit2-hostkey.patch b/deps/patches/libgit2-hostkey.patch
new file mode 100644
index 00000000000000..8be5e5cc92e5ee
--- /dev/null
+++ b/deps/patches/libgit2-hostkey.patch
@@ -0,0 +1,29 @@
+diff --git a/src/transports/ssh.c b/src/transports/ssh.c
+index 471c3273ed..32189d0979 100644
+--- a/src/transports/ssh.c
++++ b/src/transports/ssh.c
+@@ -525,6 +525,7 @@ static int _git_ssh_setup_conn(
+ 	git_credential *cred = NULL;
+ 	LIBSSH2_SESSION *session=NULL;
+ 	LIBSSH2_CHANNEL *channel=NULL;
++	char *host_and_port;
+ 
+ 	t->current_stream = NULL;
+ 
+@@ -636,7 +637,15 @@ post_extract:
+ 
+ 		cert_ptr = &cert;
+ 
+-		error = t->owner->certificate_check_cb((git_cert *) cert_ptr, 0, urldata.host, t->owner->message_cb_payload);
++		if (atoi(urldata.port) == SSH_DEFAULT_PORT) {
++			host_and_port = urldata.host;
++		} else {
++			size_t n = strlen(urldata.host) + strlen(urldata.port) + 2;
++			host_and_port = alloca(n);
++			sprintf(host_and_port, "%s:%s", urldata.host, urldata.port);
++		}
++
++		error = t->owner->certificate_check_cb((git_cert *) cert_ptr, 0, host_and_port, t->owner->message_cb_payload);
+ 
+ 		if (error < 0 && error != GIT_PASSTHROUGH) {
+ 			if (!git_error_last())
diff --git a/deps/patches/libssh2-userauth-check.patch b/deps/patches/libssh2-userauth-check.patch
new file mode 100644
index 00000000000000..1dc6108ebece79
--- /dev/null
+++ b/deps/patches/libssh2-userauth-check.patch
@@ -0,0 +1,30 @@
+From 37ee0aa214655b63e7869d1d74ff1ec9f9818a5e Mon Sep 17 00:00:00 2001
+From: Daniel Stenberg <daniel@haxx.se>
+Date: Fri, 17 Dec 2021 17:46:29 +0100
+Subject: [PATCH] userauth: check for too large userauth_kybd_auth_name_len
+ (#650)
+
+... before using it.
+
+Reported-by: MarcoPoloPie
+Fixes #649
+---
+ src/userauth.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/src/userauth.c b/src/userauth.c
+index 40ef915..caa5635 100644
+--- a/src/userauth.c
++++ b/src/userauth.c
+@@ -1769,6 +1769,11 @@ userauth_keyboard_interactive(LIBSSH2_SESSION * session,
+             if(session->userauth_kybd_data_len >= 5) {
+                 /* string    name (ISO-10646 UTF-8) */
+                 session->userauth_kybd_auth_name_len = _libssh2_ntohu32(s);
++                if(session->userauth_kybd_auth_name_len >
++                   session->userauth_kybd_data_len - 5)
++                    return _libssh2_error(session,
++                                          LIBSSH2_ERROR_OUT_OF_BOUNDARY,
++                                          "Bad keyboard auth name");
+                 s += 4;
+             }
+             else {
diff --git a/deps/patches/libunwind-cfa-rsp.patch b/deps/patches/libunwind-cfa-rsp.patch
new file mode 100644
index 00000000000000..6b2080c10c2cf1
--- /dev/null
+++ b/deps/patches/libunwind-cfa-rsp.patch
@@ -0,0 +1,368 @@
+From 8c8c78e2db09c5dc66ad0188a088b1664483a13f Mon Sep 17 00:00:00 2001
+From: Keno Fischer <keno@juliacomputing.com>
+Date: Sun, 29 Aug 2021 11:07:54 -0700
+Subject: [PATCH] x86_64: Stop aliasing RSP and CFA
+
+RSP and CFA are different concepts. RSP refers to the physical
+register, CFA is a virtual register that serves as the base
+address for various other saved registers. It is true that
+in many frames these are set to alias, however this is not
+a requirement. For example, a function that performs a stack
+switch would likely change the rsp in the middle of the function,
+but would keep the CFA at the original RSP such that saved registers
+may be appropriately recovered.
+
+We are seeing incorrect unwinds in the Julia runtime when running
+julia under rr. This is because injects code (with correct CFI)
+that performs just such a stack switch [1]. GDB manages to unwind
+this correctly, but libunwind incorrectly sets the rsp to the CFA
+address, causing a misunwind.
+
+Tested on x86_64, patches for other architectures are ported, but
+not tested.
+
+[1] https://github.com/rr-debugger/rr/blob/469c22059a4a1798d33a8a224457faf22b2c178c/src/preload/syscall_hook.S#L454
+---
+ include/dwarf.h                 |  3 +-
+ include/libunwind_i.h           |  4 ++
+ include/tdep-x86/dwarf-config.h |  2 -
+ include/tdep-x86/libunwind_i.h  | 73 ++++++++++++---------------------
+ src/dwarf/Gparser.c             | 15 +++++--
+ src/x86/Gos-freebsd.c           |  1 +
+ src/x86/Gregs.c                 |  2 +-
+ src/x86/Gstep.c                 |  4 +-
+ src/x86_64/Gos-freebsd.c        |  1 +
+ src/x86_64/Gregs.c              |  2 +-
+ src/x86_64/Gstep.c              |  2 +-
+ 11 files changed, 52 insertions(+), 57 deletions(-)
+
+diff --git a/include/dwarf.h b/include/dwarf.h
+index 175c419bb..23ff4c4f6 100644
+--- a/include/dwarf.h
++++ b/include/dwarf.h
+@@ -231,6 +231,7 @@ typedef enum
+     DWARF_WHERE_REG,            /* register saved in another register */
+     DWARF_WHERE_EXPR,           /* register saved */
+     DWARF_WHERE_VAL_EXPR,       /* register has computed value */
++    DWARF_WHERE_CFA,            /* register is set to the computed cfa value */
+   }
+ dwarf_where_t;
+ 
+@@ -313,7 +314,7 @@ typedef struct dwarf_cursor
+     void *as_arg;               /* argument to address-space callbacks */
+     unw_addr_space_t as;        /* reference to per-address-space info */
+ 
+-    unw_word_t cfa;     /* canonical frame address; aka frame-/stack-pointer */
++    unw_word_t cfa;     /* canonical frame address; aka frame-pointer */
+     unw_word_t ip;              /* instruction pointer */
+     unw_word_t args_size;       /* size of arguments */
+     unw_word_t eh_args[UNW_TDEP_NUM_EH_REGS];
+diff --git a/include/libunwind_i.h b/include/libunwind_i.h
+index fea5c2607..6c7dda9a8 100644
+--- a/include/libunwind_i.h
++++ b/include/libunwind_i.h
+@@ -346,6 +346,10 @@ static inline void invalidate_edi (struct elf_dyn_info *edi)
+ 
+ #include "tdep/libunwind_i.h"
+ 
++#ifndef TDEP_DWARF_SP
++#define TDEP_DWARF_SP UNW_TDEP_SP
++#endif
++
+ #ifndef tdep_get_func_addr
+ # define tdep_get_func_addr(as,addr,v)          (*(v) = addr, 0)
+ #endif
+diff --git a/include/tdep-x86/dwarf-config.h b/include/tdep-x86/dwarf-config.h
+index f76f9c1c4..11398e4e6 100644
+--- a/include/tdep-x86/dwarf-config.h
++++ b/include/tdep-x86/dwarf-config.h
+@@ -43,9 +43,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+ typedef struct dwarf_loc
+   {
+     unw_word_t val;
+-#ifndef UNW_LOCAL_ONLY
+     unw_word_t type;            /* see X86_LOC_TYPE_* macros.  */
+-#endif
+   }
+ dwarf_loc_t;
+ 
+diff --git a/include/tdep-x86/libunwind_i.h b/include/tdep-x86/libunwind_i.h
+index d4c5ccdb1..ad4edc2f5 100644
+--- a/include/tdep-x86/libunwind_i.h
++++ b/include/tdep-x86/libunwind_i.h
+@@ -84,15 +84,26 @@ dwarf_get_uc(const struct dwarf_cursor *cursor)
+ }
+ 
+ #define DWARF_GET_LOC(l)        ((l).val)
++# define DWARF_LOC_TYPE_MEM     (0 << 0)
++# define DWARF_LOC_TYPE_FP      (1 << 0)
++# define DWARF_LOC_TYPE_REG     (1 << 1)
++# define DWARF_LOC_TYPE_VAL     (1 << 2)
+ 
+-#ifdef UNW_LOCAL_ONLY
++# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
++# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
++# define DWARF_IS_MEM_LOC(l)    ((l).type == DWARF_LOC_TYPE_MEM)
++# define DWARF_IS_VAL_LOC(l)    (((l).type & DWARF_LOC_TYPE_VAL) != 0)
++
++# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
+ # define DWARF_NULL_LOC         DWARF_LOC (0, 0)
+-# define DWARF_IS_NULL_LOC(l)   (DWARF_GET_LOC (l) == 0)
+-# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r) })
+-# define DWARF_IS_REG_LOC(l)    0
++# define DWARF_IS_NULL_LOC(l)                                           \
++                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
++# define DWARF_VAL_LOC(c,v)     DWARF_LOC ((v), DWARF_LOC_TYPE_VAL)
++# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), DWARF_LOC_TYPE_MEM)
++
++#ifdef UNW_LOCAL_ONLY
+ # define DWARF_REG_LOC(c,r)     (DWARF_LOC((unw_word_t)                      \
+                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
+-# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
+ # define DWARF_FPREG_LOC(c,r)   (DWARF_LOC((unw_word_t)                      \
+                                  tdep_uc_addr(dwarf_get_uc(c), (r)), 0))
+ 
+@@ -114,35 +125,8 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
+   return 0;
+ }
+ 
+-static inline int
+-dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
+-{
+-  if (!DWARF_GET_LOC (loc))
+-    return -1;
+-  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+-                                   0, c->as_arg);
+-}
+-
+-static inline int
+-dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
+-{
+-  if (!DWARF_GET_LOC (loc))
+-    return -1;
+-  return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), &val,
+-                                   1, c->as_arg);
+-}
+-
+ #else /* !UNW_LOCAL_ONLY */
+-# define DWARF_LOC_TYPE_FP      (1 << 0)
+-# define DWARF_LOC_TYPE_REG     (1 << 1)
+-# define DWARF_NULL_LOC         DWARF_LOC (0, 0)
+-# define DWARF_IS_NULL_LOC(l)                                           \
+-                ({ dwarf_loc_t _l = (l); _l.val == 0 && _l.type == 0; })
+-# define DWARF_LOC(r, t)        ((dwarf_loc_t) { .val = (r), .type = (t) })
+-# define DWARF_IS_REG_LOC(l)    (((l).type & DWARF_LOC_TYPE_REG) != 0)
+-# define DWARF_IS_FP_LOC(l)     (((l).type & DWARF_LOC_TYPE_FP) != 0)
+ # define DWARF_REG_LOC(c,r)     DWARF_LOC((r), DWARF_LOC_TYPE_REG)
+-# define DWARF_MEM_LOC(c,m)     DWARF_LOC ((m), 0)
+ # define DWARF_FPREG_LOC(c,r)   DWARF_LOC((r), (DWARF_LOC_TYPE_REG      \
+                                                 | DWARF_LOC_TYPE_FP))
+ 
+@@ -192,38 +176,33 @@ dwarf_putfp (struct dwarf_cursor *c, dwarf_loc_t loc, unw_fpreg_t val)
+                                    1, c->as_arg);
+ }
+ 
++#endif /* !UNW_LOCAL_ONLY */
++
+ static inline int
+ dwarf_get (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t *val)
+ {
+   if (DWARF_IS_NULL_LOC (loc))
+     return -UNW_EBADREG;
+ 
+-  /* If a code-generator were to save a value of type unw_word_t in a
+-     floating-point register, we would have to support this case.  I
+-     suppose it could happen with MMX registers, but does it really
+-     happen?  */
+-  assert (!DWARF_IS_FP_LOC (loc));
+-
+   if (DWARF_IS_REG_LOC (loc))
+     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), val,
+                                      0, c->as_arg);
+-  else
++  if (DWARF_IS_MEM_LOC (loc))
+     return (*c->as->acc.access_mem) (c->as, DWARF_GET_LOC (loc), val,
+                                      0, c->as_arg);
++  assert(DWARF_IS_VAL_LOC (loc));
++  *val = DWARF_GET_LOC (loc);
++  return 0;
+ }
+ 
+ static inline int
+ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
+ {
++  assert(!DWARF_IS_VAL_LOC (loc));
++
+   if (DWARF_IS_NULL_LOC (loc))
+     return -UNW_EBADREG;
+ 
+-  /* If a code-generator were to save a value of type unw_word_t in a
+-     floating-point register, we would have to support this case.  I
+-     suppose it could happen with MMX registers, but does it really
+-     happen?  */
+-  assert (!DWARF_IS_FP_LOC (loc));
+-
+   if (DWARF_IS_REG_LOC (loc))
+     return (*c->as->acc.access_reg) (c->as, DWARF_GET_LOC (loc), &val,
+                                      1, c->as_arg);
+@@ -232,7 +211,9 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
+                                      1, c->as_arg);
+ }
+ 
+-#endif /* !UNW_LOCAL_ONLY */
++// For historical reasons, the DWARF numbering does not match the libunwind
++// numbering, necessitating this override
++#define TDEP_DWARF_SP 4
+ 
+ #define tdep_getcontext_trace           unw_getcontext
+ #define tdep_init_done                  UNW_OBJ(init_done)
+diff --git a/src/dwarf/Gparser.c b/src/dwarf/Gparser.c
+index da170d4b3..70a62c505 100644
+--- a/src/dwarf/Gparser.c
++++ b/src/dwarf/Gparser.c
+@@ -508,6 +508,9 @@ setup_fde (struct dwarf_cursor *c, dwarf_state_record_t *sr)
+   for (i = 0; i < DWARF_NUM_PRESERVED_REGS + 2; ++i)
+     set_reg (sr, i, DWARF_WHERE_SAME, 0);
+ 
++  // SP defaults to CFA (but is overridable)
++  set_reg (sr, TDEP_DWARF_SP, DWARF_WHERE_CFA, 0);
++
+   struct dwarf_cie_info *dci = c->pi.unwind_info;
+   sr->rs_current.ret_addr_column  = dci->ret_addr_column;
+   unw_word_t addr = dci->cie_instr_start;
+@@ -792,14 +795,14 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
+       /* As a special-case, if the stack-pointer is the CFA and the
+          stack-pointer wasn't saved, popping the CFA implicitly pops
+          the stack-pointer as well.  */
+-      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == UNW_TDEP_SP)
+-          && (UNW_TDEP_SP < ARRAY_SIZE(rs->reg.val))
+-          && (rs->reg.where[UNW_TDEP_SP] == DWARF_WHERE_SAME))
++      if ((rs->reg.val[DWARF_CFA_REG_COLUMN] == TDEP_DWARF_SP)
++          && (TDEP_DWARF_SP < ARRAY_SIZE(rs->reg.val))
++          && (DWARF_IS_NULL_LOC(c->loc[TDEP_DWARF_SP])))
+           cfa = c->cfa;
+       else
+         {
+           regnum = dwarf_to_unw_regnum (rs->reg.val[DWARF_CFA_REG_COLUMN]);
+-          if ((ret = unw_get_reg ((unw_cursor_t *) c, regnum, &cfa)) < 0)
++          if ((ret = unw_get_reg (dwarf_to_cursor(c), regnum, &cfa)) < 0)
+             return ret;
+         }
+       cfa += rs->reg.val[DWARF_CFA_OFF_COLUMN];
+@@ -836,6 +839,10 @@ apply_reg_state (struct dwarf_cursor *c, struct dwarf_reg_state *rs)
+         case DWARF_WHERE_SAME:
+           break;
+ 
++        case DWARF_WHERE_CFA:
++          new_loc[i] = DWARF_VAL_LOC (c, cfa);
++          break;
++
+         case DWARF_WHERE_CFAREL:
+           new_loc[i] = DWARF_MEM_LOC (c, cfa + rs->reg.val[i]);
+           break;
+diff --git a/src/x86/Gos-freebsd.c b/src/x86/Gos-freebsd.c
+index 7dd014046..1b251d027 100644
+--- a/src/x86/Gos-freebsd.c
++++ b/src/x86/Gos-freebsd.c
+@@ -138,6 +138,7 @@ x86_handle_signal_frame (unw_cursor_t *cursor)
+     c->dwarf.loc[ST0] = DWARF_NULL_LOC;
+   } else if (c->sigcontext_format == X86_SCF_FREEBSD_SYSCALL) {
+     c->dwarf.loc[EIP] = DWARF_LOC (c->dwarf.cfa, 0);
++    c->dwarf.loc[ESP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 4);
+     c->dwarf.loc[EAX] = DWARF_NULL_LOC;
+     c->dwarf.cfa += 4;
+     c->dwarf.use_prev_instr = 1;
+diff --git a/src/x86/Gregs.c b/src/x86/Gregs.c
+index 4a9592617..9446d6c62 100644
+--- a/src/x86/Gregs.c
++++ b/src/x86/Gregs.c
+@@ -53,7 +53,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
+       break;
+ 
+     case UNW_X86_CFA:
+-    case UNW_X86_ESP:
+       if (write)
+         return -UNW_EREADONLYREG;
+       *valp = c->dwarf.cfa;
+@@ -81,6 +80,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
+     case UNW_X86_ECX: loc = c->dwarf.loc[ECX]; break;
+     case UNW_X86_EBX: loc = c->dwarf.loc[EBX]; break;
+ 
++    case UNW_X86_ESP: loc = c->dwarf.loc[ESP]; break;
+     case UNW_X86_EBP: loc = c->dwarf.loc[EBP]; break;
+     case UNW_X86_ESI: loc = c->dwarf.loc[ESI]; break;
+     case UNW_X86_EDI: loc = c->dwarf.loc[EDI]; break;
+diff --git a/src/x86/Gstep.c b/src/x86/Gstep.c
+index 129b739a3..061dcbaaa 100644
+--- a/src/x86/Gstep.c
++++ b/src/x86/Gstep.c
+@@ -47,7 +47,7 @@ unw_step (unw_cursor_t *cursor)
+     {
+       /* DWARF failed, let's see if we can follow the frame-chain
+          or skip over the signal trampoline.  */
+-      struct dwarf_loc ebp_loc, eip_loc;
++      struct dwarf_loc ebp_loc, eip_loc, esp_loc;
+ 
+       /* We could get here because of missing/bad unwind information.
+          Validate all addresses before dereferencing. */
+@@ -77,6 +77,7 @@ unw_step (unw_cursor_t *cursor)
+                  c->dwarf.cfa);
+ 
+           ebp_loc = DWARF_LOC (c->dwarf.cfa, 0);
++          esp_loc = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
+           eip_loc = DWARF_LOC (c->dwarf.cfa + 4, 0);
+           c->dwarf.cfa += 8;
+ 
+@@ -87,6 +88,7 @@ unw_step (unw_cursor_t *cursor)
+             c->dwarf.loc[i] = DWARF_NULL_LOC;
+ 
+           c->dwarf.loc[EBP] = ebp_loc;
++          c->dwarf.loc[ESP] = esp_loc;
+           c->dwarf.loc[EIP] = eip_loc;
+           c->dwarf.use_prev_instr = 1;
+         }
+diff --git a/src/x86_64/Gos-freebsd.c b/src/x86_64/Gos-freebsd.c
+index 8f28d1d8c..0c5a17940 100644
+--- a/src/x86_64/Gos-freebsd.c
++++ b/src/x86_64/Gos-freebsd.c
+@@ -133,6 +133,7 @@ x86_64_handle_signal_frame (unw_cursor_t *cursor)
+     c->dwarf.loc[RCX] = c->dwarf.loc[R10];
+     /*  rsp_loc = DWARF_LOC(c->dwarf.cfa - 8, 0);       */
+     /*  rbp_loc = c->dwarf.loc[RBP];                    */
++    c->dwarf.loc[RSP] = DWARF_VAL_LOC (c, c->dwarf.cfa + 8);
+     c->dwarf.loc[RIP] = DWARF_LOC (c->dwarf.cfa, 0);
+     ret = dwarf_get (&c->dwarf, c->dwarf.loc[RIP], &c->dwarf.ip);
+     Debug (1, "Frame Chain [RIP=0x%Lx] = 0x%Lx\n",
+diff --git a/src/x86_64/Gregs.c b/src/x86_64/Gregs.c
+index baf8a24f0..dff5bcbe7 100644
+--- a/src/x86_64/Gregs.c
++++ b/src/x86_64/Gregs.c
+@@ -79,7 +79,6 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
+       break;
+ 
+     case UNW_X86_64_CFA:
+-    case UNW_X86_64_RSP:
+       if (write)
+         return -UNW_EREADONLYREG;
+       *valp = c->dwarf.cfa;
+@@ -107,6 +106,7 @@ tdep_access_reg (struct cursor *c, unw_regnum_t reg, unw_word_t *valp,
+     case UNW_X86_64_RCX: loc = c->dwarf.loc[RCX]; break;
+     case UNW_X86_64_RBX: loc = c->dwarf.loc[RBX]; break;
+ 
++    case UNW_X86_64_RSP: loc = c->dwarf.loc[RSP]; break;
+     case UNW_X86_64_RBP: loc = c->dwarf.loc[RBP]; break;
+     case UNW_X86_64_RSI: loc = c->dwarf.loc[RSI]; break;
+     case UNW_X86_64_RDI: loc = c->dwarf.loc[RDI]; break;
+diff --git a/src/x86_64/Gstep.c b/src/x86_64/Gstep.c
+index 3c5c3830f..fdad298c7 100644
+--- a/src/x86_64/Gstep.c
++++ b/src/x86_64/Gstep.c
+@@ -223,7 +223,7 @@ unw_step (unw_cursor_t *cursor)
+                   Debug (2, "RIP fixup didn't work, falling back\n");
+                   unw_word_t rbp1 = 0;
+                   rbp_loc = DWARF_LOC(rbp, 0);
+-                  rsp_loc = DWARF_NULL_LOC;
++                  rsp_loc = DWARF_VAL_LOC(c, rbp + 16);
+                   rip_loc = DWARF_LOC (rbp + 8, 0);
+                   ret = dwarf_get (&c->dwarf, rbp_loc, &rbp1);
+                   Debug (1, "[RBP=0x%lx] = 0x%lx (cfa = 0x%lx) -> 0x%lx\n",
diff --git a/deps/patches/libunwind-dwarf-table.patch b/deps/patches/libunwind-dwarf-table.patch
new file mode 100644
index 00000000000000..5905982f9a3497
--- /dev/null
+++ b/deps/patches/libunwind-dwarf-table.patch
@@ -0,0 +1,36 @@
+From a5b5fd28ed03cb1ab524d24dc534c1fa167bf5a1 Mon Sep 17 00:00:00 2001
+From: Alex Arslan <ararslan@comcast.net>
+Date: Fri, 5 Nov 2021 16:58:41 -0700
+Subject: [PATCH] Fix table indexing in `dwarf_search_unwind_table`
+
+`table_len` is used as an index into `table`, assuming it represents the
+number of entries. However, it is defined as the number of entries
+multiplied by `sizeof(unw_word_t)`. This is accounted for in other
+places that use `table_len`, e.g. in `lookup`, which divides out the
+size of `unw_word_t`, but the indexing expression uses `table_len`
+directly. So when `table` has say 2 entries, we're actually looking at
+index 15 rather than 1 in the comparison. This can cause the conditional
+to erroneously evaluate to true, allowing the following line to
+segfault.
+
+This was observed with JIT compiled code from Julia with LLVM on
+FreeBSD.
+
+Co-Authored-By: Jameson Nash <vtjnash@gmail.com>
+---
+ src/dwarf/Gfind_proc_info-lsb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/dwarf/Gfind_proc_info-lsb.c b/src/dwarf/Gfind_proc_info-lsb.c
+index 5e27a501..af4cbce8 100644
+--- a/src/dwarf/Gfind_proc_info-lsb.c
++++ b/src/dwarf/Gfind_proc_info-lsb.c
+@@ -866,7 +866,7 @@ dwarf_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
+   if (as == unw_local_addr_space)
+     {
+       e = lookup (table, table_len, ip - ip_base);
+-      if (e && &e[1] < &table[table_len])
++      if (e && &e[1] < &table[table_len / sizeof (unw_word_t)])
+ 	last_ip = e[1].start_ip_offset + ip_base;
+       else
+ 	last_ip = di->end_ip;
diff --git a/deps/patches/libunwind-prefer-extbl.patch b/deps/patches/libunwind-prefer-extbl.patch
index 8d93605e337af7..07b172604d6236 100644
--- a/deps/patches/libunwind-prefer-extbl.patch
+++ b/deps/patches/libunwind-prefer-extbl.patch
@@ -1,6 +1,6 @@
-From bc7b50355cb37cfa56f6131b2f9174b499053188 Mon Sep 17 00:00:00 2001
+From 2d6a50435bb743be1e4d88eee002372344348349 Mon Sep 17 00:00:00 2001
 From: Yichao Yu <yyc1992@gmail.com>
-Date: Sat, 1 Oct 2016 16:55:40 +0000
+Date: Sun, 29 Aug 2021 13:43:01 -0700
 Subject: [PATCH] Prefer EXTBL unwinding on ARM
 
 It is part of the C++ ABI so a EXTBL unwind info that's not `CANT_UNWIND`
@@ -8,16 +8,16 @@ should always be reliable/correct.
 Ignore `ESTOPUNWIND` so that a `CANT_UNWIND` info can fallback to unwinding
 using the debug info instead.
 ---
- include/tdep-arm/libunwind_i.h |  4 ++++
- src/arm/Gex_tables.c           | 18 ++++++++++++++----
- src/arm/Gstep.c                | 35 +++++++++++++++++++++--------------
- 3 files changed, 39 insertions(+), 18 deletions(-)
+ include/tdep-arm/libunwind_i.h |  4 +++
+ src/arm/Gex_tables.c           | 18 ++++++++---
+ src/arm/Gstep.c                | 55 ++++++++++++++++++++--------------
+ 3 files changed, 51 insertions(+), 26 deletions(-)
 
 diff --git a/include/tdep-arm/libunwind_i.h b/include/tdep-arm/libunwind_i.h
-index 2602f41c..074fc8cb 100644
+index 88ebfb069..5bd28c953 100644
 --- a/include/tdep-arm/libunwind_i.h
 +++ b/include/tdep-arm/libunwind_i.h
-@@ -253,6 +253,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
+@@ -256,6 +256,7 @@ dwarf_put (struct dwarf_cursor *c, dwarf_loc_t loc, unw_word_t val)
  #define tdep_init_done                  UNW_OBJ(init_done)
  #define tdep_init                       UNW_OBJ(init)
  #define arm_find_proc_info              UNW_OBJ(find_proc_info)
@@ -25,7 +25,7 @@ index 2602f41c..074fc8cb 100644
  #define arm_put_unwind_info             UNW_OBJ(put_unwind_info)
  /* Platforms that support UNW_INFO_FORMAT_TABLE need to define
     tdep_search_unwind_table.  */
-@@ -294,6 +295,9 @@ extern void tdep_init (void);
+@@ -297,6 +298,9 @@ extern void tdep_init (void);
  extern int arm_find_proc_info (unw_addr_space_t as, unw_word_t ip,
                                 unw_proc_info_t *pi, int need_unwind_info,
                                 void *arg);
@@ -36,7 +36,7 @@ index 2602f41c..074fc8cb 100644
                                    unw_proc_info_t *pi, void *arg);
  extern int tdep_search_unwind_table (unw_addr_space_t as, unw_word_t ip,
 diff --git a/src/arm/Gex_tables.c b/src/arm/Gex_tables.c
-index d6573a65..a895e0cc 100644
+index efdcf2978..083d2b2f7 100644
 --- a/src/arm/Gex_tables.c
 +++ b/src/arm/Gex_tables.c
 @@ -506,18 +506,20 @@ arm_phdr_cb (struct dl_phdr_info *info, size_t size, void *data)
@@ -80,17 +80,17 @@ index d6573a65..a895e0cc 100644
  arm_put_unwind_info (unw_addr_space_t as, unw_proc_info_t *proc_info, void *arg)
  {
 diff --git a/src/arm/Gstep.c b/src/arm/Gstep.c
-index adec02e0..c43daa1c 100644
+index 895e8a892..e4ada651b 100644
 --- a/src/arm/Gstep.c
 +++ b/src/arm/Gstep.c
-@@ -53,8 +53,15 @@ arm_exidx_step (struct cursor *c)
+@@ -54,17 +54,22 @@ arm_exidx_step (struct cursor *c)
                                       c->dwarf.as_arg);
    if (ret == -UNW_ENOINFO)
      {
 +#ifdef UNW_LOCAL_ONLY
-+        if ((ret = arm_find_proc_info2 (c->dwarf.as, ip, &c->dwarf.pi,
-+                                        1, c->dwarf.as_arg,
-+                                        UNW_ARM_METHOD_EXIDX)) < 0)
++      if ((ret = arm_find_proc_info2 (c->dwarf.as, ip, &c->dwarf.pi,
++                                      1, c->dwarf.as_arg,
++                                      UNW_ARM_METHOD_EXIDX)) < 0)
 +        return ret;
 +#else
        if ((ret = tdep_find_proc_info (&c->dwarf, ip, 1)) < 0)
@@ -99,21 +99,39 @@ index adec02e0..c43daa1c 100644
      }
  
    if (c->dwarf.pi.format != UNW_INFO_FORMAT_ARM_EXIDX)
-@@ -94,8 +101,21 @@ unw_step (unw_cursor_t *cursor)
+     return -UNW_ENOINFO;
+ 
+   ret = arm_exidx_extract (&c->dwarf, buf);
+-  if (ret == -UNW_ESTOPUNWIND)
+-    return 0;
+-  else if (ret < 0)
++  if (ret < 0)
+     return ret;
+ 
+   ret = arm_exidx_decode (buf, ret, &c->dwarf);
+@@ -88,6 +93,7 @@ unw_step (unw_cursor_t *cursor)
+ {
+   struct cursor *c = (struct cursor *) cursor;
+   int ret = -UNW_EUNSPEC;
++  int has_stopunwind = 0;
+ 
+   Debug (1, "(cursor=%p)\n", c);
+ 
+@@ -95,17 +101,31 @@ unw_step (unw_cursor_t *cursor)
    if (unw_is_signal_frame (cursor) > 0)
       return arm_handle_signal_frame (cursor);
  
 +  /* First, try extbl-based unwinding. */
 +  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
 +    {
-+      Debug (13, "%s(ret=%d), trying extbl\n",
-+             UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() failed " : "",
-+             ret);
 +      ret = arm_exidx_step (c);
++      Debug(1, "arm_exidx_step()=%d\n", ret);
 +      if (ret > 0)
 +        return 1;
 +      if (ret == 0)
 +        return ret;
++      if (ret == -UNW_ESTOPUNWIND)
++        has_stopunwind = 1;
 +    }
 +
  #ifdef CONFIG_DEBUG_FRAME
@@ -121,24 +139,56 @@ index adec02e0..c43daa1c 100644
 +  /* Second, try DWARF-based unwinding. */
    if (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF))
      {
++      Debug (13, "%s(ret=%d), trying extbl\n",
++             UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() failed " : "",
++             ret);
        ret = dwarf_step (&c->dwarf);
-@@ -114,16 +129,6 @@ unw_step (unw_cursor_t *cursor)
+       Debug(1, "dwarf_step()=%d\n", ret);
+ 
+       if (likely (ret > 0))
+         return 1;
+-      else if (unlikely (ret == -UNW_ESTOPUNWIND))
+-        return ret;
+ 
+       if (ret < 0 && ret != -UNW_ENOINFO)
+         {
+@@ -115,18 +135,9 @@ unw_step (unw_cursor_t *cursor)
      }
  #endif /* CONFIG_DEBUG_FRAME */
  
 -  /* Next, try extbl-based unwinding. */
 -  if (UNW_TRY_METHOD (UNW_ARM_METHOD_EXIDX))
 -    {
+-      Debug (13, "%s(ret=%d), trying extbl\n",
+-             UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() failed " : "",
+-             ret);
 -      ret = arm_exidx_step (c);
 -      if (ret > 0)
 -        return 1;
 -      if (ret == -UNW_ESTOPUNWIND || ret == 0)
 -        return ret;
 -    }
--
++  // Before trying the fallback, if any unwind info tell us to stop, do that.
++  if (has_stopunwind)
++    return -UNW_ESTOPUNWIND;
+ 
    /* Fall back on APCS frame parsing.
       Note: This won't work in case the ARM EABI is used. */
- #ifdef __FreeBSD__
--- 
-2.16.1
-
+@@ -139,13 +150,13 @@ unw_step (unw_cursor_t *cursor)
+       if (UNW_TRY_METHOD(UNW_ARM_METHOD_FRAME))
+         {
+           Debug (13, "%s%s%s%s(ret=%d), trying frame-chain\n",
+-                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
+-                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) && UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "and " : "",
+                  UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) ? "arm_exidx_step() " : "",
+-                 (UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) || UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX)) ? "failed " : "",
++                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) && UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "and " : "",
++                 UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF) ? "dwarf_step() " : "",
++                 (UNW_TRY_METHOD(UNW_ARM_METHOD_EXIDX) || UNW_TRY_METHOD(UNW_ARM_METHOD_DWARF)) ? "failed " : "",
+                  ret);
+           ret = UNW_ESUCCESS;
+-          /* DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
++          /* EXIDX and/or DWARF unwinding failed, try to follow APCS/optimized APCS frame chain */
+           unw_word_t instr, i;
+           dwarf_loc_t ip_loc, fp_loc;
+           unw_word_t frame;
diff --git a/deps/patches/llvm-10-D85553.patch b/deps/patches/llvm-10-D85553.patch
deleted file mode 100644
index cb48edba1c0ee0..00000000000000
--- a/deps/patches/llvm-10-D85553.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 4d30f46738d417c305c0e748a49020d4513ac4ee Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Fri, 7 Aug 2020 16:38:15 -0400
-Subject: [PATCH] [X86] Don't produce bad x86andp nodes for i1 vectors
-
-In D85499, I attempted to fix this same issue by canonicalizing
-andnp for i1 vectors, but since there was some opposition to such
-a change, this commit just fixes the bug by using two different
-forms depending on which kind of vector type is in use. We can
-then always decide to switch the canonical forms later.
-
-Description of the original bug:
-We have a DAG combine that tries to fold (vselect cond, 0000..., X) -> (andnp cond, x).
-However, it does so by attempting to create an i64 vector with the number
-of elements obtained by truncating division by 64 from the bitwidth. This is
-bad for mask vectors like v8i1, since that division is just zero. Besides,
-we don't want i64 vectors anyway. For i1 vectors, switch the pattern
-to (andnp (not cond), x), which is the canonical form for `kandn`
-on mask registers.
-
-Fixes https://github.com/JuliaLang/julia/issues/36955.
-
-Differential Revision: https://reviews.llvm.org/D85553
----
- llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.cpp
-index c8720d9ae3a..17eaa49c83f 100644
---- llvm/lib/Target/X86/X86ISelLowering.cpp
-+++ llvm/lib/Target/X86/X86ISelLowering.cpp
-@@ -37630,10 +37630,14 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
- 
-   // vselect Cond, 000..., X -> andn Cond, X
-   if (TValIsAllZeros) {
--    MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64);
--    SDValue CastCond = DAG.getBitcast(AndNVT, Cond);
--    SDValue CastRHS = DAG.getBitcast(AndNVT, RHS);
--    SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS);
-+    SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
-+    SDValue AndN;
-+    // The canonical form differs for i1 vectors - x86andnp is not used
-+    if (CondVT.getScalarType() == MVT::i1)
-+      AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT),
-+                         CastRHS);
-+    else
-+      AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
-     return DAG.getBitcast(VT, AndN);
-   }
- 
--- 
-2.28.0
-
diff --git a/deps/patches/llvm-10-r_aarch64_prel32.patch b/deps/patches/llvm-10-r_aarch64_prel32.patch
deleted file mode 100644
index 7236574b3311d6..00000000000000
--- a/deps/patches/llvm-10-r_aarch64_prel32.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From c530dd687328d4208f91e62b600d25ec6e7f0f39 Mon Sep 17 00:00:00 2001
-From: Fangrui Song <i@maskray.me>
-Date: Fri, 17 Jul 2020 23:49:15 -0700
-Subject: [PATCH 2/2] [RelocationResolver] Support R_AARCH64_PREL32
-
-Code from D83800 by Yichao Yu
----
- llvm/lib/Object/RelocationResolver.cpp  |  6 ++++++
- llvm/test/DebugInfo/AArch64/eh-frame.ll | 21 +++++++++++++++++++++
- 2 files changed, 27 insertions(+)
- create mode 100644 llvm/test/DebugInfo/AArch64/eh-frame.ll
-
-diff --git llvm/lib/Object/RelocationResolver.cpp llvm/lib/Object/RelocationResolver.cpp
-index eedb236f83d..80339ebf7b9 100644
---- llvm/lib/Object/RelocationResolver.cpp
-+++ llvm/lib/Object/RelocationResolver.cpp
-@@ -62,6 +62,8 @@ static bool supportsAArch64(uint64_t Type) {
-   switch (Type) {
-   case ELF::R_AARCH64_ABS32:
-   case ELF::R_AARCH64_ABS64:
-+  case ELF::R_AARCH64_PREL32:
-+  case ELF::R_AARCH64_PREL64:
-     return true;
-   default:
-     return false;
-@@ -74,6 +76,10 @@ static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
-     return (S + getELFAddend(R)) & 0xFFFFFFFF;
-   case ELF::R_AARCH64_ABS64:
-     return S + getELFAddend(R);
-+  case ELF::R_AARCH64_PREL32:
-+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
-+  case ELF::R_AARCH64_PREL64:
-+    return S + getELFAddend(R) - R.getOffset();
-   default:
-     llvm_unreachable("Invalid relocation type");
-   }
-diff --git llvm/test/DebugInfo/AArch64/eh-frame.ll llvm/test/DebugInfo/AArch64/eh-frame.ll
-new file mode 100644
-index 00000000000..9651159271e
---- /dev/null
-+++ llvm/test/DebugInfo/AArch64/eh-frame.ll
-@@ -0,0 +1,21 @@
-+; RUN: llc -filetype=obj -mtriple=aarch64 %s -o %t.o
-+; RUN: llvm-readobj -r %t.o | FileCheck %s --check-prefix=REL32
-+; RUN: llvm-dwarfdump --eh-frame %t.o 2>&1 | FileCheck %s
-+
-+; REL32:      R_AARCH64_PREL32 .text 0x0
-+; REL32-NEXT: R_AARCH64_PREL32 .text 0x4
-+
-+; CHECK-NOT:  warning:
-+; CHECK: FDE cie=00000000 pc=00000000...00000004
-+;; TODO Take relocation into consideration
-+; CHECK: FDE cie=00000000 pc=00000000...00000004
-+
-+define void @foo() {
-+entry:
-+  ret void
-+}
-+
-+define void @bar() {
-+entry:
-+  ret void
-+}
--- 
-2.28.0
-
diff --git a/deps/patches/llvm-10-r_ppc_rel.patch b/deps/patches/llvm-10-r_ppc_rel.patch
deleted file mode 100644
index f86ee75bb845ea..00000000000000
--- a/deps/patches/llvm-10-r_ppc_rel.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-From b7f604447cbd76c803ccff5c0ff1b699b9c1504e Mon Sep 17 00:00:00 2001
-From: Fangrui Song <i@maskray.me>
-Date: Fri, 17 Jul 2020 23:29:50 -0700
-Subject: [PATCH 1/2] [RelocationResolver] Support R_PPC_REL32 &
- R_PPC64_REL{32,64}
-
-This suppresses `failed to compute relocation: R_PPC_REL32, Invalid data was encountered while parsing the file`
-and its 64-bit variants when running llvm-dwarfdump on a PowerPC object file with .eh_frame
-
-Unfortunately it is difficult to test the computation:
-DWARFDataExtractor::getEncodedPointer does not use the relocated value
-and even if it does, we need to teach llvm-dwarfdump --eh-frame to do
-some linker job to report a reasonable address.
----
- llvm/lib/Object/RelocationResolver.cpp  | 20 +++++++++++--
- llvm/test/DebugInfo/PowerPC/eh-frame.ll | 39 +++++++++++++++++++++++++
- 2 files changed, 57 insertions(+), 2 deletions(-)
- create mode 100644 llvm/test/DebugInfo/PowerPC/eh-frame.ll
-
-diff --git llvm/lib/Object/RelocationResolver.cpp llvm/lib/Object/RelocationResolver.cpp
-index 31478be7899..eedb236f83d 100644
---- llvm/lib/Object/RelocationResolver.cpp
-+++ llvm/lib/Object/RelocationResolver.cpp
-@@ -131,6 +131,8 @@ static bool supportsPPC64(uint64_t Type) {
-   switch (Type) {
-   case ELF::R_PPC64_ADDR32:
-   case ELF::R_PPC64_ADDR64:
-+  case ELF::R_PPC64_REL32:
-+  case ELF::R_PPC64_REL64:
-     return true;
-   default:
-     return false;
-@@ -143,6 +145,10 @@ static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
-     return (S + getELFAddend(R)) & 0xFFFFFFFF;
-   case ELF::R_PPC64_ADDR64:
-     return S + getELFAddend(R);
-+  case ELF::R_PPC64_REL32:
-+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
-+  case ELF::R_PPC64_REL64:
-+    return S + getELFAddend(R) - R.getOffset();
-   default:
-     llvm_unreachable("Invalid relocation type");
-   }
-@@ -238,12 +244,22 @@ static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
- }
- 
- static bool supportsPPC32(uint64_t Type) {
--  return Type == ELF::R_PPC_ADDR32;
-+  switch (Type) {
-+  case ELF::R_PPC_ADDR32:
-+  case ELF::R_PPC_REL32:
-+    return true;
-+  default:
-+    return false;
-+  }
- }
- 
- static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
--  if (R.getType() == ELF::R_PPC_ADDR32)
-+  switch (R.getType()) {
-+  case ELF::R_PPC_ADDR32:
-     return (S + getELFAddend(R)) & 0xFFFFFFFF;
-+  case ELF::R_PPC_REL32:
-+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
-+  }
-   llvm_unreachable("Invalid relocation type");
- }
- 
-diff --git llvm/test/DebugInfo/PowerPC/eh-frame.ll llvm/test/DebugInfo/PowerPC/eh-frame.ll
-new file mode 100644
-index 00000000000..3a8f7df6b61
---- /dev/null
-+++ llvm/test/DebugInfo/PowerPC/eh-frame.ll
-@@ -0,0 +1,39 @@
-+; RUN: llc -filetype=obj -mtriple=powerpc %s -o %t32.o
-+; RUN: llvm-readobj -r %t32.o | FileCheck %s --check-prefix=PPC_REL
-+; RUN: llvm-dwarfdump --eh-frame %t32.o 2>&1 | FileCheck %s --check-prefix=PPC
-+
-+; PPC_REL:      R_PPC_REL32 .text 0x0
-+; PPC_REL-NEXT: R_PPC_REL32 .text 0x4
-+
-+; PPC-NOT: warning:
-+; PPC: FDE cie=00000000 pc=00000000...00000004
-+;; TODO Take relocation into consideration
-+; PPC: FDE cie=00000000 pc=00000000...00000004
-+
-+; RUN: llc -filetype=obj -mtriple=ppc64 %s -o %t64.o
-+; RUN: llvm-readobj -r %t64.o | FileCheck %s --check-prefix=PPC64_REL
-+; RUN: llvm-dwarfdump --eh-frame %t64.o 2>&1 | FileCheck %s --check-prefix=PPC64
-+
-+; PPC64_REL:      R_PPC64_REL32 .text 0x0
-+; PPC64_REL-NEXT: R_PPC64_REL32 .text 0x10
-+
-+; PPC64-NOT: warning:
-+; PPC64: FDE cie=00000000 pc=00000000...00000010
-+; PPC64: FDE cie=00000000 pc=00000000...00000010
-+
-+; RUN: llc -filetype=obj -mtriple=ppc64le -code-model=large %s -o %t64l.o
-+; RUN: llvm-readobj -r %t64l.o | FileCheck %s --check-prefix=PPC64L_REL
-+; RUN: llvm-dwarfdump --eh-frame %t64l.o 2>&1 | FileCheck %s --check-prefix=PPC64
-+
-+; PPC64L_REL:      R_PPC64_REL64 .text 0x0
-+; PPC64L_REL-NEXT: R_PPC64_REL64 .text 0x10
-+
-+define void @foo() {
-+entry:
-+  ret void
-+}
-+
-+define void @bar() {
-+entry:
-+  ret void
-+}
--- 
-2.28.0
-
diff --git a/deps/patches/llvm-10.0-PPC-LI-Elimination.patch b/deps/patches/llvm-10.0-PPC-LI-Elimination.patch
deleted file mode 100644
index f47a2c97f0e372..00000000000000
--- a/deps/patches/llvm-10.0-PPC-LI-Elimination.patch
+++ /dev/null
@@ -1,161 +0,0 @@
-From 5423496713e84dea2650e1703821de620f934573 Mon Sep 17 00:00:00 2001
-From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
-Date: Thu, 9 Apr 2020 21:34:30 -0500
-Subject: [PATCH] [PowerPC] Bail out of redundant LI elimination on an implicit
- kill
-
-The transformation currently does not differentiate between explicit
-and implicit kills. However, it is not valid to later simply clear
-an implicit kill flag since the kill could be due to a call or return.
-
-Fixes: https://bugs.llvm.org/show_bug.cgi?id=45374
----
- .../lib/Target/PowerPC/PPCPreEmitPeephole.cpp |  10 ++
- .../remove-redundant-li-skip-imp-kill.mir     | 114 ++++++++++++++++++
- 2 files changed, 124 insertions(+)
- create mode 100644 llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir
-
-diff --git llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
-index a4b4bf2973d..4ea714ff15f 100644
---- llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
-+++ llvm/lib/Target/PowerPC/PPCPreEmitPeephole.cpp
-@@ -109,6 +109,16 @@ namespace {
-           // Track the operand that kill Reg. We would unset the kill flag of
-           // the operand if there is a following redundant load immediate.
-           int KillIdx = AfterBBI->findRegisterUseOperandIdx(Reg, true, TRI);
-+
-+          // We can't just clear implicit kills, so if we encounter one, stop
-+          // looking further.
-+          if (KillIdx != -1 && AfterBBI->getOperand(KillIdx).isImplicit()) {
-+            LLVM_DEBUG(dbgs()
-+                       << "Encountered an implicit kill, cannot proceed: ");
-+            LLVM_DEBUG(AfterBBI->dump());
-+            break;
-+          }
-+
-           if (KillIdx != -1) {
-             assert(!DeadOrKillToUnset && "Shouldn't kill same register twice");
-             DeadOrKillToUnset = &AfterBBI->getOperand(KillIdx);
-diff --git llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir
-new file mode 100644
-index 00000000000..78091d027ce
---- /dev/null
-+++ llvm/test/CodeGen/PowerPC/remove-redundant-li-skip-imp-kill.mir
-@@ -0,0 +1,114 @@
-+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-+# RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -run-pass \
-+# RUN:   ppc-pre-emit-peephole %s -o - | FileCheck %s
-+--- |
-+  ; ModuleID = 'a.ll'
-+  source_filename = "a.ll"
-+  target datalayout = "e-m:e-i64:64-n32:64"
-+
-+  ; Function Attrs: nounwind
-+  define dso_local signext i32 @b(i32 signext %a, i32* nocapture %b) local_unnamed_addr #0 {
-+  entry:
-+    %call = tail call signext i32 @g(i32 signext %a)
-+    store i32 %call, i32* %b, align 4
-+    %call1 = tail call signext i32 @g(i32 signext %a)
-+    ret i32 %call1
-+  }
-+
-+  ; Function Attrs: nounwind
-+  declare signext i32 @g(i32 signext) local_unnamed_addr #0
-+
-+  ; Function Attrs: nounwind
-+  declare void @llvm.stackprotector(i8*, i8**) #0
-+
-+  attributes #0 = { nounwind }
-+
-+...
-+---
-+name:            b
-+alignment:       16
-+exposesReturnsTwice: false
-+legalized:       false
-+regBankSelected: false
-+selected:        false
-+failedISel:      false
-+tracksRegLiveness: true
-+hasWinCFI:       false
-+registers:       []
-+liveins:
-+  - { reg: '$x3', virtual-reg: '' }
-+  - { reg: '$x4', virtual-reg: '' }
-+frameInfo:
-+  isFrameAddressTaken: false
-+  isReturnAddressTaken: false
-+  hasStackMap:     false
-+  hasPatchPoint:   false
-+  stackSize:       64
-+  offsetAdjustment: 0
-+  maxAlignment:    1
-+  adjustsStack:    true
-+  hasCalls:        true
-+  stackProtector:  ''
-+  maxCallFrameSize: 32
-+  cvBytesOfCalleeSavedRegisters: 0
-+  hasOpaqueSPAdjustment: false
-+  hasVAStart:      false
-+  hasMustTailInVarArgFunc: false
-+  localFrameSize:  0
-+  savePoint:       ''
-+  restorePoint:    ''
-+fixedStack:
-+  - { id: 0, type: spill-slot, offset: -80, size: 8, alignment: 16, stack-id: default,
-+      callee-saved-register: '$x30', callee-saved-restored: true, debug-info-variable: '',
-+      debug-info-expression: '', debug-info-location: '' }
-+  - { id: 1, type: spill-slot, offset: -88, size: 8, alignment: 8, stack-id: default,
-+      callee-saved-register: '$x29', callee-saved-restored: true, debug-info-variable: '',
-+      debug-info-expression: '', debug-info-location: '' }
-+stack:           []
-+callSites:       []
-+constants:       []
-+machineFunctionInfo: {}
-+body:             |
-+  bb.0.entry:
-+    liveins: $x3, $x4, $x29, $x30
-+
-+    ; CHECK-LABEL: name: b
-+    ; CHECK: liveins: $x3, $x4, $x29, $x30
-+    ; CHECK: $x0 = MFLR8 implicit $lr8
-+    ; CHECK: STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.0)
-+    ; CHECK: STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.1, align 16)
-+    ; CHECK: STD killed $x0, 16, $x1
-+    ; CHECK: $x1 = STDU $x1, -64, $x1
-+    ; CHECK: $x30 = OR8 killed $x4, $x4
-+    ; CHECK: dead $r4 = LI 10, implicit-def $x4
-+    ; CHECK: $x29 = OR8 $x3, $x3
-+    ; CHECK: BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit $x2, implicit-def $r1, implicit-def $x3
-+    ; CHECK: STW8 killed renamable $x3, 0, killed renamable $x30 :: (store 4 into %ir.b)
-+    ; CHECK: $x3 = OR8 killed $x29, $x29
-+    ; CHECK: BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
-+    ; CHECK: $x1 = ADDI8 $x1, 64
-+    ; CHECK: $x0 = LD 16, $x1
-+    ; CHECK: $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.1, align 16)
-+    ; CHECK: $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.0)
-+    ; CHECK: MTLR8 killed $x0, implicit-def $lr8
-+    ; CHECK: BLR8 implicit $lr8, implicit $rm, implicit killed $x3
-+    $x0 = MFLR8 implicit $lr8
-+    STD killed $x29, -24, $x1 :: (store 8 into %fixed-stack.1)
-+    STD killed $x30, -16, $x1 :: (store 8 into %fixed-stack.0, align 16)
-+    STD killed $x0, 16, $x1
-+    $x1 = STDU $x1, -64, $x1
-+    $x30 = OR8 killed $x4, $x4
-+    dead $r4 = LI 10, implicit-def $x4
-+    $x29 = OR8 $x3, $x3
-+    BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit killed $x4, implicit $x2, implicit-def $r1, implicit-def $x3
-+    STW8 killed renamable $x3, 0, killed renamable $x30 :: (store 4 into %ir.b)
-+    $x3 = OR8 killed $x29, $x29
-+    BL8_NOP @g, csr_ppc64_r2_altivec, implicit-def dead $lr8, implicit $rm, implicit killed $x3, implicit $x2, implicit-def $r1, implicit-def $x3
-+    $x1 = ADDI8 $x1, 64
-+    $x0 = LD 16, $x1
-+    $x30 = LD -16, $x1 :: (load 8 from %fixed-stack.0, align 16)
-+    $x29 = LD -24, $x1 :: (load 8 from %fixed-stack.1)
-+    MTLR8 killed $x0, implicit-def $lr8
-+    BLR8 implicit $lr8, implicit $rm, implicit killed $x3
-+
-+...
--- 
-2.26.0
-
diff --git a/deps/patches/llvm-10.0-PPC_SELECT_CC.patch b/deps/patches/llvm-10.0-PPC_SELECT_CC.patch
deleted file mode 100644
index a56b20eb064f04..00000000000000
--- a/deps/patches/llvm-10.0-PPC_SELECT_CC.patch
+++ /dev/null
@@ -1,135 +0,0 @@
-From 4765db99fa35257a4209e2976903d81087a3f0d7 Mon Sep 17 00:00:00 2001
-From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
-Date: Thu, 9 Apr 2020 13:53:02 -0500
-Subject: [PATCH] [PowerPC] Don't assert on SELECT_CC with i1 type
-
-When we try to select a SELECT_CC on Power9, we check if it can be matched to a
-SETB instruction. In that function, we assert that the output type is i32/i64.
-This is unnecessary as it is perfectly reasonable to have an i1 SELECT_CC.
-Change that from an assert to an early exit condition.
-Fixes: https://bugs.llvm.org/show_bug.cgi?id=45448
----
- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp |  7 +-
- llvm/test/CodeGen/PowerPC/pr45448.ll        | 90 +++++++++++++++++++++
- 2 files changed, 92 insertions(+), 5 deletions(-)
- create mode 100644 llvm/test/CodeGen/PowerPC/pr45448.ll
-
-diff --git llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
-index 776ec52e260..9230ce4118b 100644
---- llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
-+++ llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
-@@ -4241,13 +4241,10 @@ static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
-   SDValue TrueRes = N->getOperand(2);
-   SDValue FalseRes = N->getOperand(3);
-   ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
--  if (!TrueConst)
-+  if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
-+                     N->getSimpleValueType(0) != MVT::i32))
-     return false;
- 
--  assert((N->getSimpleValueType(0) == MVT::i64 ||
--          N->getSimpleValueType(0) == MVT::i32) &&
--         "Expecting either i64 or i32 here.");
--
-   // We are looking for any of:
-   // (select_cc lhs, rhs,  1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
-   // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
-diff --git llvm/test/CodeGen/PowerPC/pr45448.ll llvm/test/CodeGen/PowerPC/pr45448.ll
-new file mode 100644
-index 00000000000..eb0a61cb075
---- /dev/null
-+++ llvm/test/CodeGen/PowerPC/pr45448.ll
-@@ -0,0 +1,90 @@
-+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-+; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-+; RUN: FileCheck %s
-+define hidden void @julia_tryparse_internal_45896() #0 {
-+; CHECK-LABEL: julia_tryparse_internal_45896:
-+; CHECK:       # %bb.0: # %top
-+; CHECK-NEXT:    ld r3, 0(r3)
-+; CHECK-NEXT:    cmpldi r3, 0
-+; CHECK-NEXT:    beq cr0, .LBB0_3
-+; CHECK-NEXT:  # %bb.1: # %top
-+; CHECK-NEXT:    cmpldi r3, 10
-+; CHECK-NEXT:    beq cr0, .LBB0_4
-+; CHECK-NEXT:  # %bb.2: # %top
-+; CHECK-NEXT:  .LBB0_3: # %fail194
-+; CHECK-NEXT:  .LBB0_4: # %L294
-+; CHECK-NEXT:    bc 12, 4*cr5+lt, .LBB0_6
-+; CHECK-NEXT:  # %bb.5: # %L294
-+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_7
-+; CHECK-NEXT:  .LBB0_6: # %L1057.preheader
-+; CHECK-NEXT:  .LBB0_7: # %L670
-+; CHECK-NEXT:    lis r5, 4095
-+; CHECK-NEXT:    ori r5, r5, 65533
-+; CHECK-NEXT:    sldi r5, r5, 4
-+; CHECK-NEXT:    cmpdi r3, 0
-+; CHECK-NEXT:    sradi r4, r3, 63
-+; CHECK-NEXT:    mulhdu r3, r3, r5
-+; CHECK-NEXT:    maddld r6, r4, r5, r3
-+; CHECK-NEXT:    crnor 4*cr5+gt, eq, eq
-+; CHECK-NEXT:    cmpld r6, r3
-+; CHECK-NEXT:    mulld r3, r4, r5
-+; CHECK-NEXT:    cmpldi cr1, r3, 0
-+; CHECK-NEXT:    crandc 4*cr5+lt, lt, 4*cr1+eq
-+; CHECK-NEXT:    mulhdu. r3, r4, r5
-+; CHECK-NEXT:    bc 4, 4*cr5+gt, .LBB0_10
-+; CHECK-NEXT:  # %bb.8: # %L670
-+; CHECK-NEXT:    crorc 4*cr5+lt, 4*cr5+lt, eq
-+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_10
-+; CHECK-NEXT:  # %bb.9: # %L917
-+; CHECK-NEXT:  .LBB0_10: # %L994
-+top:
-+  %0 = load i64, i64* undef, align 8
-+  %1 = icmp ne i64 %0, 0
-+  %2 = sext i64 %0 to i128
-+  switch i64 %0, label %pass195 [
-+    i64 10, label %L294
-+    i64 16, label %L294.fold.split
-+    i64 0, label %fail194
-+  ]
-+
-+L294.fold.split:                                  ; preds = %top
-+  unreachable
-+
-+L294:                                             ; preds = %top
-+  %3 = add nsw i32 0, -48
-+  %4 = zext i32 %3 to i128
-+  %5 = add i128 %4, 0
-+  switch i32 undef, label %L670 [
-+    i32 -1031471104, label %L1057.preheader
-+    i32 536870912, label %L1057.preheader
-+  ]
-+
-+L670:                                             ; preds = %L294
-+  br label %L898
-+
-+L1057.preheader:                                  ; preds = %L294, %L294
-+  unreachable
-+
-+L898:                                             ; preds = %L670
-+  %umul = call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %2, i128 %5)
-+  %umul.ov = extractvalue { i128, i1 } %umul, 1
-+  %value_phi102 = and i1 %1, %umul.ov
-+  %6 = or i1 %value_phi102, false
-+  br i1 %6, label %L917, label %L994
-+
-+L917:                                             ; preds = %L898
-+  unreachable
-+
-+L994:                                             ; preds = %L898
-+  unreachable
-+
-+fail194:                                          ; preds = %top
-+  unreachable
-+
-+pass195:                                          ; preds = %top
-+  unreachable
-+}
-+
-+; Function Attrs: nounwind readnone speculatable willreturn
-+declare { i128, i1 } @llvm.umul.with.overflow.i128(i128, i128) #1
--- 
-2.26.0
-
diff --git a/deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch b/deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch
deleted file mode 100644
index d537c2579166f8..00000000000000
--- a/deps/patches/llvm-6.0-DISABLE_ABI_CHECKS.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From d793ba4bacae51ae25be19c1636fcf38707938fd Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Fri, 1 Jun 2018 17:43:55 -0400
-Subject: [PATCH] fix LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
-
----
- cmake/modules/HandleLLVMOptions.cmake    | 2 +-
- include/llvm/Config/abi-breaking.h.cmake | 2 +-
- 2 files changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
-index 3d2dd48018c..b67ee6a896e 100644
---- a/cmake/modules/HandleLLVMOptions.cmake
-+++ b/cmake/modules/HandleLLVMOptions.cmake
-@@ -572,7 +572,7 @@ if (LLVM_ENABLE_WARNINGS AND (LLVM_COMPILER_IS_GCC_COMPATIBLE OR CLANG_CL))
- 
-   if (LLVM_ENABLE_PEDANTIC AND LLVM_COMPILER_IS_GCC_COMPATIBLE)
-     append("-pedantic" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
--    append("-Wno-long-long" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
-+    append("-Wno-long-long -Wundef" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
-   endif()
- 
-   add_flag_if_supported("-Wcovered-switch-default" COVERED_SWITCH_DEFAULT_FLAG)
-diff --git a/include/llvm/Config/abi-breaking.h.cmake b/include/llvm/Config/abi-breaking.h.cmake
-index 7ae401e5b8a..d52c4609101 100644
---- a/include/llvm/Config/abi-breaking.h.cmake
-+++ b/include/llvm/Config/abi-breaking.h.cmake
-@@ -20,7 +20,7 @@
- 
- /* Allow selectively disabling link-time mismatch checking so that header-only
-    ADT content from LLVM can be used without linking libSupport. */
--#if !LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
-+#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
- 
- // ABI_BREAKING_CHECKS protection: provides link-time failure when clients build
- // mismatch with LLVM
--- 
-2.17.0
-
diff --git a/deps/patches/llvm-7.0-D44650.patch b/deps/patches/llvm-7.0-D44650.patch
deleted file mode 100644
index 09b5b27149aa87..00000000000000
--- a/deps/patches/llvm-7.0-D44650.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/tools/llvm-cfi-verify/CMakeLists.txt b/tools/llvm-cfi-verify/CMakeLists.txt
-index ae12bec5e80..9ffbe4e070d 100644
---- a/tools/llvm-cfi-verify/CMakeLists.txt
-+++ b/tools/llvm-cfi-verify/CMakeLists.txt
-@@ -11,7 +11,7 @@ set(LLVM_LINK_COMPONENTS
-   Symbolize
-   )
- 
--add_llvm_tool(llvm-cfi-verify
-+add_llvm_tool(llvm-cfi-verify DISABLE_LLVM_LINK_LLVM_DYLIB
-   llvm-cfi-verify.cpp
-   )
- 
diff --git a/deps/patches/llvm-8.0-D50167-scev-umin.patch b/deps/patches/llvm-8.0-D50167-scev-umin.patch
deleted file mode 100644
index f11fd546bb72c7..00000000000000
--- a/deps/patches/llvm-8.0-D50167-scev-umin.patch
+++ /dev/null
@@ -1,1870 +0,0 @@
-commit 18e563f695dd561c32393512fbdb8ce8771d7e5f
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Thu May 2 08:35:22 2019 -0400
-
-    [SCEV] Add explicit representations of umin/smin
-    
-    Summary:
-    Currently we express umin as `~umax(~x, ~y)`. However, this becomes
-    a problem for operands in non-integral pointer spaces, because `~x`
-    is not something we can compute for `x` non-integral. However, since
-    comparisons are generally still allowed, we are actually able to
-    express `umin(x, y)` directly as long as we don't try to express is
-    as a umax. Support this by adding an explicit umin/smin representation
-    to SCEV. We do this by factoring the existing getUMax/getSMax functions
-    into a new function that does all four. The previous two functions were
-    largely identical.
-    
-    Reviewers: reames, sanjoy, mkazantsev
-    
-    Reviewed By: sanjoy
-    
-    Subscribers: tvvikram, dmgreen, vchuravy, javed.absar, llvm-commits
-    
-    Tags: #llvm
-    
-    Differential Revision: https://reviews.llvm.org/D50167
-
-diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
-index 8f4200b07e5..6b76a16a2b4 100644
---- a/include/llvm/Analysis/ScalarEvolution.h
-+++ b/include/llvm/Analysis/ScalarEvolution.h
-@@ -582,6 +582,8 @@ public:
-   /// \p IndexExprs The expressions for the indices.
-   const SCEV *getGEPExpr(GEPOperator *GEP,
-                          const SmallVectorImpl<const SCEV *> &IndexExprs);
-+  const SCEV *getMinMaxExpr(unsigned Kind,
-+                            SmallVectorImpl<const SCEV *> &Operands);
-   const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
-   const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
-   const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
-diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
-index 58d42680d6b..57d658b157d 100644
---- a/include/llvm/Analysis/ScalarEvolutionExpander.h
-+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
-@@ -368,6 +368,10 @@ namespace llvm {
- 
-     Value *visitUMaxExpr(const SCEVUMaxExpr *S);
- 
-+    Value *visitSMinExpr(const SCEVSMinExpr *S);
-+
-+    Value *visitUMinExpr(const SCEVUMinExpr *S);
-+
-     Value *visitUnknown(const SCEVUnknown *S) {
-       return S->getValue();
-     }
-diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index 42e76094eb2..99e39d484c5 100644
---- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
-+++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -40,7 +40,7 @@ class Type;
-     // These should be ordered in terms of increasing complexity to make the
-     // folders simpler.
-     scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
--    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
-+    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr,
-     scUnknown, scCouldNotCompute
-   };
- 
-@@ -183,10 +183,9 @@ class Type;
- 
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr ||
--             S->getSCEVType() == scMulExpr ||
--             S->getSCEVType() == scSMaxExpr ||
--             S->getSCEVType() == scUMaxExpr ||
-+      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-+             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
-+             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr ||
-              S->getSCEVType() == scAddRecExpr;
-     }
-   };
-@@ -201,10 +200,9 @@ class Type;
-   public:
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr ||
--             S->getSCEVType() == scMulExpr ||
--             S->getSCEVType() == scSMaxExpr ||
--             S->getSCEVType() == scUMaxExpr;
-+      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-+             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
-+             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr;
-     }
- 
-     /// Set flags for a non-recurrence without clearing previously set flags.
-@@ -358,17 +356,53 @@ class Type;
-     }
-   };
- 
--  /// This class represents a signed maximum selection.
--  class SCEVSMaxExpr : public SCEVCommutativeExpr {
-+  /// This node is the base class min/max selections.
-+  class SCEVMinMaxExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
- 
--    SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
--                 const SCEV *const *O, size_t N)
--      : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
--      // Max never overflows.
-+    static bool isMinMaxType(enum SCEVTypes T) {
-+      return T == scSMaxExpr || T == scUMaxExpr || T == scSMinExpr ||
-+             T == scUMinExpr;
-+    }
-+
-+  protected:
-+    /// Note: Constructing subclasses via this constructor is allowed
-+    SCEVMinMaxExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
-+                   const SCEV *const *O, size_t N)
-+        : SCEVCommutativeExpr(ID, T, O, N) {
-+      assert(isMinMaxType(T));
-+      // Min and max nenver overflow
-       setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
-     }
- 
-+  public:
-+    static bool classof(const SCEV *S) {
-+      return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType()));
-+    }
-+
-+    static enum SCEVTypes negate(enum SCEVTypes T) {
-+      switch (T) {
-+      case scSMaxExpr:
-+        return scSMinExpr;
-+      case scSMinExpr:
-+        return scSMaxExpr;
-+      case scUMaxExpr:
-+        return scUMaxExpr;
-+      case scUMinExpr:
-+        return scUMinExpr;
-+      default:
-+        llvm_unreachable("Not a min or max SCEV type!");
-+      }
-+    }
-+  };
-+
-+  /// This class represents a signed maximum selection.
-+  class SCEVSMaxExpr : public SCEVMinMaxExpr {
-+    friend class ScalarEvolution;
-+
-+    SCEVSMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scSMaxExpr, O, N) {}
-+
-   public:
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-@@ -377,15 +411,11 @@ class Type;
-   };
- 
-   /// This class represents an unsigned maximum selection.
--  class SCEVUMaxExpr : public SCEVCommutativeExpr {
-+  class SCEVUMaxExpr : public SCEVMinMaxExpr {
-     friend class ScalarEvolution;
- 
--    SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
--                 const SCEV *const *O, size_t N)
--      : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
--      // Max never overflows.
--      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
--    }
-+    SCEVUMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scUMaxExpr, O, N) {}
- 
-   public:
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-@@ -394,6 +424,34 @@ class Type;
-     }
-   };
- 
-+  /// This class represents a signed minimum selection.
-+  class SCEVSMinExpr : public SCEVMinMaxExpr {
-+    friend class ScalarEvolution;
-+
-+    SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scSMinExpr, O, N) {}
-+
-+  public:
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) {
-+      return S->getSCEVType() == scSMinExpr;
-+    }
-+  };
-+
-+  /// This class represents an unsigned minimum selection.
-+  class SCEVUMinExpr : public SCEVMinMaxExpr {
-+    friend class ScalarEvolution;
-+
-+    SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
-+        : SCEVMinMaxExpr(ID, scUMinExpr, O, N) {}
-+
-+  public:
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) {
-+      return S->getSCEVType() == scUMinExpr;
-+    }
-+  };
-+
-   /// This means that we are dealing with an entirely unknown SCEV
-   /// value, and only represent it as its LLVM Value.  This is the
-   /// "bottom" value for the analysis.
-@@ -466,6 +524,10 @@ class Type;
-         return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
-       case scUMaxExpr:
-         return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
-+      case scSMinExpr:
-+        return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S);
-+      case scUMinExpr:
-+        return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S);
-       case scUnknown:
-         return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
-       case scCouldNotCompute:
-@@ -519,6 +581,8 @@ class Type;
-         case scMulExpr:
-         case scSMaxExpr:
-         case scUMaxExpr:
-+        case scSMinExpr:
-+        case scUMinExpr:
-         case scAddRecExpr:
-           for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
-             push(Op);
-@@ -681,6 +745,26 @@ class Type;
-       return !Changed ? Expr : SE.getUMaxExpr(Operands);
-     }
- 
-+    const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
-+      SmallVector<const SCEV *, 2> Operands;
-+      bool Changed = false;
-+      for (auto *Op : Expr->operands()) {
-+        Operands.push_back(((SC *)this)->visit(Op));
-+        Changed |= Op != Operands.back();
-+      }
-+      return !Changed ? Expr : SE.getSMinExpr(Operands);
-+    }
-+
-+    const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
-+      SmallVector<const SCEV *, 2> Operands;
-+      bool Changed = false;
-+      for (auto *Op : Expr->operands()) {
-+        Operands.push_back(((SC *)this)->visit(Op));
-+        Changed |= Op != Operands.back();
-+      }
-+      return !Changed ? Expr : SE.getUMinExpr(Operands);
-+    }
-+
-     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-       return Expr;
-     }
-diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
-index e5134f2eeda..f2553de0af1 100644
---- a/lib/Analysis/ScalarEvolution.cpp
-+++ b/lib/Analysis/ScalarEvolution.cpp
-@@ -273,7 +273,9 @@ void SCEV::print(raw_ostream &OS) const {
-   case scAddExpr:
-   case scMulExpr:
-   case scUMaxExpr:
--  case scSMaxExpr: {
-+  case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr: {
-     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
-     const char *OpStr = nullptr;
-     switch (NAry->getSCEVType()) {
-@@ -281,6 +283,12 @@ void SCEV::print(raw_ostream &OS) const {
-     case scMulExpr: OpStr = " * "; break;
-     case scUMaxExpr: OpStr = " umax "; break;
-     case scSMaxExpr: OpStr = " smax "; break;
-+    case scUMinExpr:
-+      OpStr = " umin ";
-+      break;
-+    case scSMinExpr:
-+      OpStr = " smin ";
-+      break;
-     }
-     OS << "(";
-     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
-@@ -349,6 +357,8 @@ Type *SCEV::getType() const {
-   case scMulExpr:
-   case scUMaxExpr:
-   case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr:
-     return cast<SCEVNAryExpr>(this)->getType();
-   case scAddExpr:
-     return cast<SCEVAddExpr>(this)->getType();
-@@ -713,7 +723,9 @@ static int CompareSCEVComplexity(
-   case scAddExpr:
-   case scMulExpr:
-   case scSMaxExpr:
--  case scUMaxExpr: {
-+  case scUMaxExpr:
-+  case scSMinExpr:
-+  case scUMinExpr: {
-     const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
-     const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
- 
-@@ -913,6 +925,8 @@ public:
-   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
-   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
-   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
-+  void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
-+  void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
-   void visitUnknown(const SCEVUnknown *Numerator) {}
-   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
- 
-@@ -3493,209 +3507,153 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
-   return getAddExpr(BaseExpr, TotalOffset, Wrap);
- }
- 
--const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
--                                         const SCEV *RHS) {
--  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
--  return getSMaxExpr(Ops);
--}
--
--const SCEV *
--ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  assert(!Ops.empty() && "Cannot get empty smax!");
-+const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
-+                                           SmallVectorImpl<const SCEV *> &Ops) {
-+  assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
-   if (Ops.size() == 1) return Ops[0];
- #ifndef NDEBUG
-   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
-   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
-     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
--           "SCEVSMaxExpr operand types don't match!");
-+           "Operand types don't match!");
- #endif
- 
-+  bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
-+  bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
-+
-   // Sort by complexity, this groups all similar expression types together.
-   GroupByComplexity(Ops, &LI, DT);
- 
-+
-+
-+
-+
-+
-   // If there are any constants, fold them together.
-   unsigned Idx = 0;
-   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
-     ++Idx;
-     assert(Idx < Ops.size());
-+    auto FoldOp = [&](const APInt &LHS, const APInt &RHS) {
-+      if (Kind == scSMaxExpr)
-+        return APIntOps::smax(LHS, RHS);
-+      else if (Kind == scSMinExpr)
-+        return APIntOps::smin(LHS, RHS);
-+      else if (Kind == scUMaxExpr)
-+        return APIntOps::umax(LHS, RHS);
-+      else if (Kind == scUMinExpr)
-+        return APIntOps::umin(LHS, RHS);
-+      llvm_unreachable("Unknown SCEV min/max opcode");
-+    };
-+
-     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
-       // We found two constants, fold them together!
-       ConstantInt *Fold = ConstantInt::get(
--          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
-+          getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
-       Ops[0] = getConstant(Fold);
-       Ops.erase(Ops.begin()+1);  // Erase the folded element
-       if (Ops.size() == 1) return Ops[0];
-       LHSC = cast<SCEVConstant>(Ops[0]);
-     }
- 
--    // If we are left with a constant minimum-int, strip it off.
--    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
-+    bool IsMinV = LHSC->getValue()->isMinValue(IsSigned);
-+    bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned);
-+
-+    if (IsMax ? IsMinV : IsMaxV) {
-+      // If we are left with a constant minimum(/maximum)-int, strip it off.
-       Ops.erase(Ops.begin());
-       --Idx;
--    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
--      // If we have an smax with a constant maximum-int, it will always be
--      // maximum-int.
--      return Ops[0];
-+    } else if (IsMax ? IsMaxV : IsMinV) {
-+      // If we have a max(/min) with a constant maximum(/minimum)-int,
-+      // it will always be the extremum.
-+      return LHSC;
-     }
- 
-     if (Ops.size() == 1) return Ops[0];
-   }
- 
--  // Find the first SMax
--  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
-+  // Find the first operation of the same kind
-+  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind)
-     ++Idx;
- 
--  // Check to see if one of the operands is an SMax. If so, expand its operands
--  // onto our operand list, and recurse to simplify.
-+  // Check to see if one of the operands is of the same kind. If so, expand its
-+  // operands onto our operand list, and recurse to simplify.
-   if (Idx < Ops.size()) {
--    bool DeletedSMax = false;
--    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
-+    bool DeletedAny = false;
-+    while (Ops[Idx]->getSCEVType() == Kind) {
-+      const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]);
-       Ops.erase(Ops.begin()+Idx);
--      Ops.append(SMax->op_begin(), SMax->op_end());
--      DeletedSMax = true;
-+      Ops.append(SMME->op_begin(), SMME->op_end());
-+      DeletedAny = true;
-     }
- 
--    if (DeletedSMax)
--      return getSMaxExpr(Ops);
-+    if (DeletedAny)
-+      return getMinMaxExpr(Kind, Ops);
-   }
- 
-   // Okay, check to see if the same value occurs in the operand list twice.  If
-   // so, delete one.  Since we sorted the list, these values are required to
-   // be adjacent.
--  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
--    //  X smax Y smax Y  -->  X smax Y
--    //  X smax Y         -->  X, if X is always greater than Y
--    if (Ops[i] == Ops[i+1] ||
--        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
--      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
--      --i; --e;
--    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
--      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
--      --i; --e;
-+  llvm::CmpInst::Predicate GEPred =
-+      IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
-+  llvm::CmpInst::Predicate LEPred =
-+      IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
-+  llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
-+  llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
-+  for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
-+    if (Ops[i] == Ops[i + 1] ||
-+        isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
-+      //  X op Y op Y  -->  X op Y
-+      //  X op Y       -->  X, if we know X, Y are ordered appropriately
-+      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
-+      --i;
-+      --e;
-+    } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
-+                                               Ops[i + 1])) {
-+      //  X op Y       -->  Y, if we know X, Y are ordered appropriately
-+      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
-+      --i;
-+      --e;
-     }
-+  }
- 
-   if (Ops.size() == 1) return Ops[0];
- 
-   assert(!Ops.empty() && "Reduced smax down to nothing!");
- 
--  // Okay, it looks like we really DO need an smax expr.  Check to see if we
-+  // Okay, it looks like we really DO need an expr.  Check to see if we
-   // already have one, otherwise create a new one.
-   FoldingSetNodeID ID;
--  ID.AddInteger(scSMaxExpr);
-+  ID.AddInteger(Kind);
-   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-     ID.AddPointer(Ops[i]);
-   void *IP = nullptr;
-   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
--                                             O, Ops.size());
-+  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
-+      ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
-   UniqueSCEVs.InsertNode(S, IP);
-   addToLoopUseLists(S);
-   return S;
- }
- 
--const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
--                                         const SCEV *RHS) {
-+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
-   SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
--  return getUMaxExpr(Ops);
-+  return getSMaxExpr(Ops);
- }
- 
--const SCEV *
--ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  assert(!Ops.empty() && "Cannot get empty umax!");
--  if (Ops.size() == 1) return Ops[0];
--#ifndef NDEBUG
--  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
--  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
--    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
--           "SCEVUMaxExpr operand types don't match!");
--#endif
--
--  // Sort by complexity, this groups all similar expression types together.
--  GroupByComplexity(Ops, &LI, DT);
--
--  // If there are any constants, fold them together.
--  unsigned Idx = 0;
--  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
--    ++Idx;
--    assert(Idx < Ops.size());
--    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
--      // We found two constants, fold them together!
--      ConstantInt *Fold = ConstantInt::get(
--          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
--      Ops[0] = getConstant(Fold);
--      Ops.erase(Ops.begin()+1);  // Erase the folded element
--      if (Ops.size() == 1) return Ops[0];
--      LHSC = cast<SCEVConstant>(Ops[0]);
--    }
--
--    // If we are left with a constant minimum-int, strip it off.
--    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
--      Ops.erase(Ops.begin());
--      --Idx;
--    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
--      // If we have an umax with a constant maximum-int, it will always be
--      // maximum-int.
--      return Ops[0];
--    }
--
--    if (Ops.size() == 1) return Ops[0];
--  }
--
--  // Find the first UMax
--  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
--    ++Idx;
--
--  // Check to see if one of the operands is a UMax. If so, expand its operands
--  // onto our operand list, and recurse to simplify.
--  if (Idx < Ops.size()) {
--    bool DeletedUMax = false;
--    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
--      Ops.erase(Ops.begin()+Idx);
--      Ops.append(UMax->op_begin(), UMax->op_end());
--      DeletedUMax = true;
--    }
--
--    if (DeletedUMax)
--      return getUMaxExpr(Ops);
--  }
--
--  // Okay, check to see if the same value occurs in the operand list twice.  If
--  // so, delete one.  Since we sorted the list, these values are required to
--  // be adjacent.
--  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
--    //  X umax Y umax Y  -->  X umax Y
--    //  X umax Y         -->  X, if X is always greater than Y
--    if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
--                                    ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
--      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
--      --i; --e;
--    } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
--                                               Ops[i + 1])) {
--      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
--      --i; --e;
--    }
--
--  if (Ops.size() == 1) return Ops[0];
-+const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
-+  return getMinMaxExpr(scSMaxExpr, Ops);
-+}
- 
--  assert(!Ops.empty() && "Reduced umax down to nothing!");
-+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
-+  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
-+  return getUMaxExpr(Ops);
-+}
- 
--  // Okay, it looks like we really DO need a umax expr.  Check to see if we
--  // already have one, otherwise create a new one.
--  FoldingSetNodeID ID;
--  ID.AddInteger(scUMaxExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
--    ID.AddPointer(Ops[i]);
--  void *IP = nullptr;
--  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
--  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
--  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
--                                             O, Ops.size());
--  UniqueSCEVs.InsertNode(S, IP);
--  addToLoopUseLists(S);
--  return S;
-+const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
-+  return getMinMaxExpr(scUMaxExpr, Ops);
- }
- 
- const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
-@@ -3705,11 +3663,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
- }
- 
- const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  // ~smax(~x, ~y, ~z) == smin(x, y, z).
--  SmallVector<const SCEV *, 2> NotOps;
--  for (auto *S : Ops)
--    NotOps.push_back(getNotSCEV(S));
--  return getNotSCEV(getSMaxExpr(NotOps));
-+  return getMinMaxExpr(scSMinExpr, Ops);
- }
- 
- const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
-@@ -3719,16 +3673,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
- }
- 
- const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
--  assert(!Ops.empty() && "At least one operand must be!");
--  // Trivial case.
--  if (Ops.size() == 1)
--    return Ops[0];
--
--  // ~umax(~x, ~y, ~z) == umin(x, y, z).
--  SmallVector<const SCEV *, 2> NotOps;
--  for (auto *S : Ops)
--    NotOps.push_back(getNotSCEV(S));
--  return getNotSCEV(getUMaxExpr(NotOps));
-+  return getMinMaxExpr(scUMinExpr, Ops);
- }
- 
- const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
-@@ -3970,12 +3915,45 @@ const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V,
-       V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);
- }
- 
-+/// If Expr computes ~A, return A else return nullptr
-+static const SCEV *MatchNotExpr(const SCEV *Expr) {
-+  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
-+  if (!Add || Add->getNumOperands() != 2 ||
-+      !Add->getOperand(0)->isAllOnesValue())
-+    return nullptr;
-+
-+  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
-+  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
-+      !AddRHS->getOperand(0)->isAllOnesValue())
-+    return nullptr;
-+
-+  return AddRHS->getOperand(1);
-+}
-+
- /// Return a SCEV corresponding to ~V = -1-V
- const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {
-   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-     return getConstant(
-                 cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
- 
-+  // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y)
-+  if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) {
-+    auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) {
-+      SmallVector<const SCEV *, 2> MatchedOperands;
-+      for (const SCEV *Operand : MME->operands()) {
-+        const SCEV *Matched = MatchNotExpr(Operand);
-+        if (!Matched)
-+          return (const SCEV *)nullptr;
-+        MatchedOperands.push_back(Matched);
-+      }
-+      return getMinMaxExpr(
-+          SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())),
-+          MatchedOperands);
-+    };
-+    if (const SCEV *Replaced = MatchMinMaxNegation(MME))
-+      return Replaced;
-+  }
-+
-   Type *Ty = V->getType();
-   Ty = getEffectiveSCEVType(Ty);
-   const SCEV *AllOnes =
-@@ -5196,6 +5174,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
-       switch (S->getSCEVType()) {
-       case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
-       case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
-+      case scUMinExpr:
-+      case scSMinExpr:
-         // These expressions are available if their operand(s) is/are.
-         return true;
- 
-@@ -8075,7 +8055,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
-     }
-     case scSMaxExpr:
-     case scUMaxExpr:
--      break; // TODO: smax, umax.
-+    case scSMinExpr:
-+    case scUMinExpr:
-+      break; // TODO: smax, umax, smin, umax.
-   }
-   return nullptr;
- }
-@@ -8201,10 +8183,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
-           return getAddExpr(NewOps);
-         if (isa<SCEVMulExpr>(Comm))
-           return getMulExpr(NewOps);
--        if (isa<SCEVSMaxExpr>(Comm))
--          return getSMaxExpr(NewOps);
--        if (isa<SCEVUMaxExpr>(Comm))
--          return getUMaxExpr(NewOps);
-+        if (isa<SCEVMinMaxExpr>(Comm))
-+          return getMinMaxExpr(Comm->getSCEVType(), NewOps);
-         llvm_unreachable("Unknown commutative SCEV type!");
-       }
-     }
-@@ -10045,41 +10025,15 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
-                                      getNotSCEV(FoundLHS));
- }
- 
--/// If Expr computes ~A, return A else return nullptr
--static const SCEV *MatchNotExpr(const SCEV *Expr) {
--  const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr);
--  if (!Add || Add->getNumOperands() != 2 ||
--      !Add->getOperand(0)->isAllOnesValue())
--    return nullptr;
--
--  const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1));
--  if (!AddRHS || AddRHS->getNumOperands() != 2 ||
--      !AddRHS->getOperand(0)->isAllOnesValue())
--    return nullptr;
--
--  return AddRHS->getOperand(1);
--}
--
--/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
--template<typename MaxExprType>
--static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
--                              const SCEV *Candidate) {
--  const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
--  if (!MaxExpr) return false;
--
--  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
--}
--
--/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
--template<typename MaxExprType>
--static bool IsMinConsistingOf(ScalarEvolution &SE,
--                              const SCEV *MaybeMinExpr,
--                              const SCEV *Candidate) {
--  const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
--  if (!MaybeMaxExpr)
-+/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
-+template <typename MinMaxExprType>
-+static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
-+                                 const SCEV *Candidate) {
-+  const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
-+  if (!MinMaxExpr)
-     return false;
- 
--  return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
-+  return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
- }
- 
- static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
-@@ -10128,20 +10082,20 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
-     LLVM_FALLTHROUGH;
-   case ICmpInst::ICMP_SLE:
-     return
--      // min(A, ...) <= A
--      IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
--      // A <= max(A, ...)
--      IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
-+        // min(A, ...) <= A
-+        IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
-+        // A <= max(A, ...)
-+        IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
- 
-   case ICmpInst::ICMP_UGE:
-     std::swap(LHS, RHS);
-     LLVM_FALLTHROUGH;
-   case ICmpInst::ICMP_ULE:
-     return
--      // min(A, ...) <= A
--      IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
--      // A <= max(A, ...)
--      IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
-+        // min(A, ...) <= A
-+        IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
-+        // A <= max(A, ...)
-+        IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
-   }
- 
-   llvm_unreachable("covered switch fell through?!");
-@@ -11611,7 +11565,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
-   case scAddExpr:
-   case scMulExpr:
-   case scUMaxExpr:
--  case scSMaxExpr: {
-+  case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr: {
-     bool HasVarying = false;
-     for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
-       LoopDisposition D = getLoopDisposition(Op, L);
-@@ -11698,7 +11654,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
-   case scAddExpr:
-   case scMulExpr:
-   case scUMaxExpr:
--  case scSMaxExpr: {
-+  case scSMaxExpr:
-+  case scUMinExpr:
-+  case scSMinExpr: {
-     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
-     bool Proper = true;
-     for (const SCEV *NAryOp : NAry->operands()) {
-diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
-index ca5cf1663b8..b56ec40ab75 100644
---- a/lib/Analysis/ScalarEvolutionExpander.cpp
-+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
-@@ -1634,7 +1634,8 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
-   for (int i = S->getNumOperands()-2; i >= 0; --i) {
-     // In the case of mixed integer and pointer types, do the
-     // rest of the comparisons as integer.
--    if (S->getOperand(i)->getType() != Ty) {
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-       Ty = SE.getEffectiveSCEVType(Ty);
-       LHS = InsertNoopCastOfTo(LHS, Ty);
-     }
-@@ -1658,7 +1659,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
-   for (int i = S->getNumOperands()-2; i >= 0; --i) {
-     // In the case of mixed integer and pointer types, do the
-     // rest of the comparisons as integer.
--    if (S->getOperand(i)->getType() != Ty) {
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-       Ty = SE.getEffectiveSCEVType(Ty);
-       LHS = InsertNoopCastOfTo(LHS, Ty);
-     }
-@@ -1676,6 +1678,56 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
-   return LHS;
- }
- 
-+Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
-+  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
-+  Type *Ty = LHS->getType();
-+  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
-+    // In the case of mixed integer and pointer types, do the
-+    // rest of the comparisons as integer.
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-+      Ty = SE.getEffectiveSCEVType(Ty);
-+      LHS = InsertNoopCastOfTo(LHS, Ty);
-+    }
-+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-+    Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
-+    rememberInstruction(ICmp);
-+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
-+    rememberInstruction(Sel);
-+    LHS = Sel;
-+  }
-+  // In the case of mixed integer and pointer types, cast the
-+  // final result back to the pointer type.
-+  if (LHS->getType() != S->getType())
-+    LHS = InsertNoopCastOfTo(LHS, S->getType());
-+  return LHS;
-+}
-+
-+Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
-+  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
-+  Type *Ty = LHS->getType();
-+  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
-+    // In the case of mixed integer and pointer types, do the
-+    // rest of the comparisons as integer.
-+    Type *OpTy = S->getOperand(i)->getType();
-+    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
-+      Ty = SE.getEffectiveSCEVType(Ty);
-+      LHS = InsertNoopCastOfTo(LHS, Ty);
-+    }
-+    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
-+    Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
-+    rememberInstruction(ICmp);
-+    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
-+    rememberInstruction(Sel);
-+    LHS = Sel;
-+  }
-+  // In the case of mixed integer and pointer types, cast the
-+  // final result back to the pointer type.
-+  if (LHS->getType() != S->getType())
-+    LHS = InsertNoopCastOfTo(LHS, S->getType());
-+  return LHS;
-+}
-+
- Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
-                                    Instruction *IP) {
-   setInsertPoint(IP);
-@@ -2102,7 +2154,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
- 
-   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
-   // the exit condition.
--  if (isa<SCEVSMaxExpr>(S) || isa<SCEVUMaxExpr>(S))
-+  if (isa<SCEVMinMaxExpr>(S))
-     return true;
- 
-   // Recurse past nary expressions, which commonly occur in the
-diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll
-new file mode 100644
-index 00000000000..a08632f38d1
---- /dev/null
-+++ b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll
-@@ -0,0 +1,50 @@
-+; RUN: opt -loop-versioning -S < %s | FileCheck %s
-+
-+; NB: addrspaces 10-13 are non-integral
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
-+
-+%jl_value_t = type opaque
-+%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
-+
-+define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) {
-+; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]]
-+; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]]
-+top:
-+  %1 = alloca [3 x i64], align 8 
-+  %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8
-+  %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1
-+  %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8
-+  %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0
-+  store i64 1, i64* %5, align 8
-+  %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1
-+  %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8
-+  %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)*
-+  %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)*
-+  %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8
-+  %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)*
-+  %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)*
-+  %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8
-+  %14 = load i64, i64* %6, align 8
-+  br label %L74
-+
-+L74:
-+  %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ]
-+  %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ]
-+  %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ]
-+  %15 = add i64 %value_phi21, -1
-+  %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15
-+  %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)*
-+  %18 = load i64, i64 addrspace(13)* %17, align 8
-+  %19 = add i64 %value_phi20, -1
-+  %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19
-+  %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)*
-+  store i64 %18, i64 addrspace(13)* %21, align 8
-+  %22 = add i64 %value_phi20, 1
-+  %23 = add i64 %14, %value_phi21
-+  %24 = icmp eq i64 %value_phi22, %7
-+  %25 = add i64 %value_phi22, 1
-+  br i1 %24, label %L94, label %L74
-+
-+L94:
-+  ret void 
-+}
-diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
-index 405a47554e4..4285ef0f117 100644
---- a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
-+++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
-@@ -58,7 +58,7 @@ for.end:                                          ; preds = %for.body
- 
- ; Here it is not obvious what the limits are, since 'step' could be negative.
- 
--; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
-+; CHECK: Low: ((60000 + %a)<nsw> umin (60000 + (-40000 * %step) + %a)) 
- ; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
- 
- define void @g(i64 %step) {
-diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
-index 3542ad2a41e..d930706d7d2 100644
---- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
-+++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
-@@ -22,5 +22,5 @@ afterfor:		; preds = %forinc, %entry
- 	ret i32 %j.0.lcssa
- }
- 
--; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}}))))
-+; CHECK: backedge-taken count is (-2147483633 + (-1 * (%{{[xy]}} smin %{{[xy]}})))
- 
-diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll
-index e8c1e33e095..51f72c643cc 100644
---- a/test/Analysis/ScalarEvolution/min-max-exprs.ll
-+++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll
-@@ -33,7 +33,7 @@ bb2:                                              ; preds = %bb1
-   %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
- ;                  min(N, i+3)
- ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
--; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nuw><%bb1> to i64))<nsw>)<nsw> smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)<nsw>))<nsw>)<nsw>
-+; CHECK-NEXT:  --> ((sext i32 {3,+,1}<nuw><%bb1> to i64) smin (sext i32 %N to i64))
-   %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
-   %tmp12 = load i32, i32* %tmp11, align 4
-   %tmp13 = shl nsw i32 %tmp12, 1
-diff --git a/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/test/Analysis/ScalarEvolution/predicated-trip-count.ll
-index a0afcf457d2..b07662ed95f 100644
---- a/test/Analysis/ScalarEvolution/predicated-trip-count.ll
-+++ b/test/Analysis/ScalarEvolution/predicated-trip-count.ll
-@@ -80,7 +80,7 @@ return:         ; preds = %bb5
- ; CHECK-NEXT:    -->  (sext i16 {%Start,+,-1}<%bb3> to i32)
- ; CHECK:       Loop %bb3: Unpredictable backedge-taken count.
- ; CHECK-NEXT:  Loop %bb3: Unpredictable max backedge-taken count.
--; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))<nsw>) smax (-1 + (-1 * %M))))
-+; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32))<nsw> smin %M)))
- ; CHECK-NEXT:  Predicates:
- ; CHECK-NEXT:    {%Start,+,-1}<%bb3> Added Flags: <nssw>
- 
-diff --git a/test/Analysis/ScalarEvolution/trip-count14.ll b/test/Analysis/ScalarEvolution/trip-count14.ll
-index 5e6cfe85101..15080613881 100644
---- a/test/Analysis/ScalarEvolution/trip-count14.ll
-+++ b/test/Analysis/ScalarEvolution/trip-count14.ll
-@@ -81,7 +81,7 @@ if.end:
-   br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
- 
- ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
--; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n))))
-+; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (((-1 * %n) + ((2 + %n) smax %n)) umin ((-1 * %n) + %x))
- ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
- 
- do.end:
-@@ -169,7 +169,7 @@ if.end:
-   br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
- 
- ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
--; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) umax %n)) + %n) umax (-1 + (-1 * %x) + %n))))
-+; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (((-1 * %n) + ((2 + %n) umax %n)) umin ((-1 * %n) + %x))
- ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
- 
- do.end:
-diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
-index df6637a4ced..e10012c0c32 100644
---- a/test/Analysis/ScalarEvolution/trip-count3.ll
-+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
-@@ -4,7 +4,7 @@
- ; dividing by the stride will have a remainder. This could theoretically
- ; be teaching it how to use a more elaborate trip count computation.
- 
--; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64)
-+; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64)
- ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431
- 
- %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-diff --git a/test/Transforms/IRCE/conjunctive-checks.ll b/test/Transforms/IRCE/conjunctive-checks.ll
-index 60a0af83174..8711c1b00e8 100644
---- a/test/Transforms/IRCE/conjunctive-checks.ll
-+++ b/test/Transforms/IRCE/conjunctive-checks.ll
-@@ -5,17 +5,15 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
- ; CHECK-LABEL: @f_0(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
--; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
--; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
--; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
-+; CHECK: [[len_sub:[^ ]+]] = add i32 %len, -4
-+; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]]
-+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]]
- ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
- ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
- ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
--; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit
-+; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit
- 
--; CHECK: loop.preheader2:
-+; CHECK: [[loop_preheader2]]:
- ; CHECK: br label %loop
- 
-  entry:
-@@ -35,9 +33,9 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
- ; CHECK: loop:
- ; CHECK:  %cond = load volatile i1, i1* %cond_buf
- ; CHECK:  %abc = and i1 %cond, true
--; CHECK:  br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit3, !prof !1
-+; CHECK:  br i1 %abc, label %in.bounds, label %[[loop_exit:[^ ,]+]], !prof !1
- 
--; CHECK: out.of.bounds.loopexit:
-+; CHECK: [[loop_exit]]:
- ; CHECK:  br label %out.of.bounds
- 
-  in.bounds:
-@@ -58,14 +56,10 @@ define void @f_1(
- ; CHECK-LABEL: @f_1(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
--; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
--; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
--; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
--; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
--; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
-+; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a
-+; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a
-+; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n 
-+; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n
- ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
- ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
- 
-@@ -85,9 +79,9 @@ define void @f_1(
- 
- ; CHECK: loop:
- ; CHECK:   %abc = and i1 true, true
--; CHECK:   br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit4, !prof !1
-+; CHECK:   br i1 %abc, label %in.bounds, label %[[oob_loopexit:[^ ,]+]], !prof !1
- 
--; CHECK: out.of.bounds.loopexit:
-+; CHECK: [[oob_loopexit]]:
- ; CHECK-NEXT:  br label %out.of.bounds
- 
- 
-diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll
-index 4c82cd3e341..2994a432a71 100644
---- a/test/Transforms/IRCE/decrementing-loop.ll
-+++ b/test/Transforms/IRCE/decrementing-loop.ll
-@@ -29,11 +29,8 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
-   ret void
- 
- ; CHECK: loop.preheader:
--; CHECK:   [[not_len:[^ ]+]] = sub i32 -1, %len
--; CHECK:   [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK:   [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
--; CHECK:   [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]]
--; CHECK:   [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]]
-+; CHECK:   [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n
-+; CHECK:   [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n
- ; CHECK:   [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0
- ; CHECK:   [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0
- ; CHECK:   %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1
-diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll
-index 000d1ab36f2..3bde9bd8668 100644
---- a/test/Transforms/IRCE/multiple-access-no-preloop.ll
-+++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll
-@@ -38,14 +38,10 @@ define void @multiple_access_no_preloop(
- ; CHECK-LABEL: @multiple_access_no_preloop(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
--; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
--; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
--; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
--; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
--; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
-+; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a
-+; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a
-+; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n
-+; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n
- ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
- ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
- 
-diff --git a/test/Transforms/IRCE/ranges_of_different_types.ll b/test/Transforms/IRCE/ranges_of_different_types.ll
-index 5c8161369f2..46bd94ce687 100644
---- a/test/Transforms/IRCE/ranges_of_different_types.ll
-+++ b/test/Transforms/IRCE/ranges_of_different_types.ll
-@@ -23,12 +23,11 @@ define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, -13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0
- ; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         loop
-@@ -83,13 +82,11 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
- ; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
- ; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
--; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
--; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         loop.preloop:
- ; CHECK-NEXT:      %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ]
-@@ -151,14 +148,11 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
--; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
--; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13
-+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13
-+; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101
- ; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         postloop:
-@@ -208,10 +202,9 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-LABEL: test_04(
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, 13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         in.bounds.preloop:
- ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
-@@ -252,12 +245,11 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, -13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0
- ; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         loop
-@@ -297,13 +289,11 @@ define void @test_06(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
- ; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
- ; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
--; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
--; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
--; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
-+; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
-+; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         in.bounds.preloop:
- ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
-@@ -344,14 +334,11 @@ define void @test_07(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-NOT:     preloop
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
--; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
--; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
--; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
--; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13
-+; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13
-+; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]]
-+; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101
- ; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
- ; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
- ; CHECK:         loop
-@@ -388,10 +375,9 @@ define void @test_08(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK-LABEL: test_08(
- ; CHECK:         entry:
- ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
--; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
--; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
--; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
--; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
-+; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, 13
-+; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
-+; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
- ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
- ; CHECK:         in.bounds.preloop:
- ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
-diff --git a/test/Transforms/IRCE/rc-negative-bound.ll b/test/Transforms/IRCE/rc-negative-bound.ll
-index bfc0cd14778..d226bffeaae 100644
---- a/test/Transforms/IRCE/rc-negative-bound.ll
-+++ b/test/Transforms/IRCE/rc-negative-bound.ll
-@@ -114,49 +114,44 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK:       loop.preheader:
- ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
- ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
--; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
--; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
--; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
--; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
--; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
--; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
--; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
--; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
--; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
--; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
--; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader5:
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
-+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
-+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SMIN1]], 1
-+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]]
-+; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0
-+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader4:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit6:
-+; CHECK:       out.of.bounds.loopexit5:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-@@ -211,47 +206,41 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
- ; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
- ; CHECK:       loop.preheader:
--; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
--; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
--; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
--; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
--; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
--; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
--; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
--; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader2:
-+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]]
-+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SMIN]], 1
-+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]]
-+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]]
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]]
-+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader1:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit3:
-+; CHECK:       out.of.bounds.loopexit2:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-@@ -413,49 +402,44 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK:       loop.preheader:
- ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
- ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
--; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
--; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
--; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
--; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
--; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
--; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
--; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
--; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
--; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
--; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
--; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader5:
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
-+; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
-+; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SMIN1]], 1
-+; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]]
-+; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0
-+; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader4:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit6:
-+; CHECK:       out.of.bounds.loopexit5:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-@@ -512,47 +496,41 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) {
- ; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
- ; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
- ; CHECK:       loop.preheader:
--; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
--; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
--; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
--; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
--; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
--; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
--; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
--; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
--; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
--; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
--; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
--; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
--; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
--; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
--; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
--; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
--; CHECK:       loop.preheader2:
-+; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0
-+; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0
-+; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]]
-+; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1
-+; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1
-+; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SMIN]], 1
-+; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]]
-+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]]
-+; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]]
-+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
-+; CHECK:       loop.preheader1:
- ; CHECK-NEXT:    br label [[LOOP:%.*]]
- ; CHECK:       loop:
--; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
-+; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ]
- ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
- ; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
--; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
-+; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0
- ; CHECK:       in.bounds:
- ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
- ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
- ; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
--; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
--; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
-+; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
- ; CHECK:       main.exit.selector:
- ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
--; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
-+; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
-+; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
- ; CHECK:       main.pseudo.exit:
- ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
- ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
- ; CHECK:       out.of.bounds.loopexit:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
--; CHECK:       out.of.bounds.loopexit3:
-+; CHECK:       out.of.bounds.loopexit2:
- ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
- ; CHECK:       out.of.bounds:
- ; CHECK-NEXT:    ret void
-diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll
-index fb643139c6d..7bf36f7c254 100644
---- a/test/Transforms/IRCE/single-access-no-preloop.ll
-+++ b/test/Transforms/IRCE/single-access-no-preloop.ll
-@@ -86,15 +86,13 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3
- ; CHECK-LABEL: @single_access_no_preloop_with_offset(
- 
- ; CHECK: loop.preheader:
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
--; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
--; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
--; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
-+; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4
-+; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]]
-+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]]
- ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
- ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
- ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
--; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit
-+; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader:[^ ,]+]], label %main.pseudo.exit
- 
- ; CHECK: loop:
- ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
-diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll
-index 6f3b0324e39..bd235aa4a73 100644
---- a/test/Transforms/IRCE/single-access-with-preloop.ll
-+++ b/test/Transforms/IRCE/single-access-with-preloop.ll
-@@ -34,11 +34,9 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
- ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647
- ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647
- ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
--; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1
--; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
--; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
--; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]]
--; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]]
-+; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]]
-+; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]]
-+; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]]
- ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0
- ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
- 
-@@ -46,17 +44,15 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
- ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647
- ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]]
- ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]]
--; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1
- ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
--; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len
--; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]]
--; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]]
-+; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]]
-+; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]]
-+; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]]
- ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0
- ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0
--; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648
--; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]]
--; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]]
--; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]]
-+; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]]
-+; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]]
-+; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]]
- ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0
- ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0
- 
-@@ -67,7 +63,7 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
- ; CHECK: %abc.high = icmp slt i32 %array.idx, %len
- ; CHECK: %abc.low = icmp sge i32 %array.idx, 0
- ; CHECK: %abc = and i1 true, true
--; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11
-+; CHECK: br i1 %abc, label %in.bounds, label %[[loopexit:[^ ,]+]]
- 
- ; CHECK: in.bounds:
- ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
-diff --git a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
-index 8f00c733569..3451d65c7bb 100644
---- a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
-+++ b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
-@@ -58,8 +58,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-@@ -149,8 +149,8 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-diff --git a/test/Transforms/IRCE/unsigned_comparisons_ult.ll b/test/Transforms/IRCE/unsigned_comparisons_ult.ll
-index dc59c11df1b..aca3c3d192e 100644
---- a/test/Transforms/IRCE/unsigned_comparisons_ult.ll
-+++ b/test/Transforms/IRCE/unsigned_comparisons_ult.ll
-@@ -61,8 +61,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-@@ -194,8 +194,8 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
- ; CHECK:        entry:
- ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
- ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
--; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
--; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
-+; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
-+; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
- ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
- ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
- ; CHECK:        mainloop:
-diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
-index ea3f6077231..d5232e1874c 100644
---- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
-+++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
-@@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
- ; current LSR cost model.
- ; CHECK-NOT: = ptrtoint i8* undef to i64
- ; CHECK: .lr.ph
--; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1
--; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}}
- ; CHECK: ret void
- define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
- bb:
-diff --git a/test/Transforms/LoopVectorize/X86/pr35432.ll b/test/Transforms/LoopVectorize/X86/pr35432.ll
-index 1f2a2061586..6aaa13c183a 100644
---- a/test/Transforms/LoopVectorize/X86/pr35432.ll
-+++ b/test/Transforms/LoopVectorize/X86/pr35432.ll
-@@ -27,7 +27,6 @@ define i32 @main() local_unnamed_addr #0 {
- ; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[CONV17]], 0
- ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END12:%.*]]
- ; CHECK:       for.body.lr.ph:
--; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[TMP2]]
- ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
- ; CHECK:       for.body:
- ; CHECK-NEXT:    [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ]
-@@ -37,77 +36,74 @@ define i32 @main() local_unnamed_addr #0 {
- ; CHECK:       for.body8.lr.ph:
- ; CHECK-NEXT:    [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8
- ; CHECK-NEXT:    [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
--; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[CONV3]], -1
--; CHECK-NEXT:    [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
--; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 -1, [[TMP5]]
--; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP6]]
--; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP3]], i32 [[TMP6]]
--; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[UMAX]], 2
--; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]]
--; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8
-+; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[CONV3]], -1
-+; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
-+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
-+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]]
-+; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]]
-+; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]]
-+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8
- ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
- ; CHECK:       vector.scevcheck:
--; CHECK-NEXT:    [[TMP10:%.*]] = add i8 [[CONV3]], -1
--; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
--; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[TMP11]]
--; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i32 [[TMP3]], [[TMP12]]
--; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP3]], i32 [[TMP12]]
--; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[UMAX1]], 1
--; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]]
--; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8
--; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP16]])
-+; CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[CONV3]], -1
-+; CHECK-NEXT:    [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
-+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]]
-+; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]]
-+; CHECK-NEXT:    [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]]
-+; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8
-+; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]])
- ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
- ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
--; CHECK-NEXT:    [[TMP17:%.*]] = add i8 [[TMP10]], [[MUL_RESULT]]
--; CHECK-NEXT:    [[TMP18:%.*]] = sub i8 [[TMP10]], [[MUL_RESULT]]
--; CHECK-NEXT:    [[TMP19:%.*]] = icmp ugt i8 [[TMP18]], [[TMP10]]
--; CHECK-NEXT:    [[TMP20:%.*]] = icmp ult i8 [[TMP17]], [[TMP10]]
--; CHECK-NEXT:    [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]]
--; CHECK-NEXT:    [[TMP22:%.*]] = icmp ugt i32 [[TMP15]], 255
--; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
--; CHECK-NEXT:    [[TMP24:%.*]] = or i1 [[TMP23]], [[MUL_OVERFLOW]]
--; CHECK-NEXT:    [[TMP25:%.*]] = or i1 false, [[TMP24]]
--; CHECK-NEXT:    br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
-+; CHECK-NEXT:    [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]]
-+; CHECK-NEXT:    [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]]
-+; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]]
-+; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]]
-+; CHECK-NEXT:    [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]]
-+; CHECK-NEXT:    [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255
-+; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
-+; CHECK-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]]
-+; CHECK-NEXT:    [[TMP21:%.*]] = or i1 false, [[TMP20]]
-+; CHECK-NEXT:    br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
- ; CHECK:       vector.ph:
--; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 8
--; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]]
-+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8
-+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]]
- ; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
- ; CHECK-NEXT:    [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]]
--; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
-+; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
- ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
- ; CHECK:       vector.body:
- ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
--; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP26]], [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ]
--; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ]
--; CHECK-NEXT:    [[TMP27:%.*]] = trunc i32 [[INDEX]] to i8
--; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP27]]
-+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
-+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
-+; CHECK-NEXT:    [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8
-+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]]
- ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0
- ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer
- ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 -1, i8 -2, i8 -3>
- ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 -4, i8 -5, i8 -6, i8 -7>
--; CHECK-NEXT:    [[TMP28:%.*]] = add i8 [[OFFSET_IDX]], 0
--; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[OFFSET_IDX]], -4
--; CHECK-NEXT:    [[TMP30]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
--; CHECK-NEXT:    [[TMP31]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
--; CHECK-NEXT:    [[TMP32:%.*]] = add i8 [[TMP28]], -1
--; CHECK-NEXT:    [[TMP33:%.*]] = add i8 [[TMP29]], -1
--; CHECK-NEXT:    [[TMP34:%.*]] = zext i8 [[TMP32]] to i32
--; CHECK-NEXT:    [[TMP35:%.*]] = zext i8 [[TMP33]] to i32
-+; CHECK-NEXT:    [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0
-+; CHECK-NEXT:    [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4
-+; CHECK-NEXT:    [[TMP26]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
-+; CHECK-NEXT:    [[TMP27]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
-+; CHECK-NEXT:    [[TMP28:%.*]] = add i8 [[TMP24]], -1
-+; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[TMP25]], -1
-+; CHECK-NEXT:    [[TMP30:%.*]] = zext i8 [[TMP28]] to i32
-+; CHECK-NEXT:    [[TMP31:%.*]] = zext i8 [[TMP29]] to i32
- ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
--; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
--; CHECK-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
-+; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-+; CHECK-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
- ; CHECK:       middle.block:
--; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP31]], [[TMP30]]
-+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]]
- ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
- ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]]
- ; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
- ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
--; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
--; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]]
-+; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
-+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]]
- ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
- ; CHECK:       scalar.ph:
- ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
--; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
-+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
- ; CHECK-NEXT:    br label [[FOR_BODY8:%.*]]
- ; CHECK:       for.body8:
- ; CHECK-NEXT:    [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
-@@ -118,7 +114,7 @@ define i32 @main() local_unnamed_addr #0 {
- ; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]]
- ; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2
- ; CHECK:       for.cond4.for.inc9_crit_edge:
--; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
-+; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
- ; CHECK-NEXT:    store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
- ; CHECK-NEXT:    br label [[FOR_INC9]]
- ; CHECK:       for.inc9:
diff --git a/deps/patches/llvm-8.0-D55758-tablegen-cond.patch b/deps/patches/llvm-8.0-D55758-tablegen-cond.patch
deleted file mode 100644
index ae9e610883d7c6..00000000000000
--- a/deps/patches/llvm-8.0-D55758-tablegen-cond.patch
+++ /dev/null
@@ -1,794 +0,0 @@
-From 95135c5a18ee14ca091d3513cc7801521d4eb204 Mon Sep 17 00:00:00 2001
-From: Javed Absar <javed.absar@arm.com>
-Date: Fri, 25 Jan 2019 10:25:25 +0000
-Subject: [PATCH] [TblGen] Extend !if semantics through new feature !cond
-
-This patch extends TableGen language with !cond operator.
-Instead of embedding !if inside !if which can get cumbersome,
-one can now use !cond.
-Below is an example to convert an integer 'x' into a string:
-
-    !cond(!lt(x,0) : "Negative",
-          !eq(x,0) : "Zero",
-          !eq(x,1) : "One,
-          1        : "MoreThanOne")
-
-Reviewed By: hfinkel, simon_tatham, greened
-Differential Revision: https://reviews.llvm.org/D55758
-
-llvm-svn: 352185
----
- docs/TableGen/LangIntro.rst          |  14 +++
- docs/TableGen/LangRef.rst            |  10 +-
- include/llvm/TableGen/Record.h       |  78 ++++++++++++++++
- lib/TableGen/Record.cpp              | 131 +++++++++++++++++++++++++++
- lib/TableGen/TGLexer.cpp             |   1 +
- lib/TableGen/TGLexer.h               |   2 +-
- lib/TableGen/TGParser.cpp            |  90 ++++++++++++++++++
- lib/TableGen/TGParser.h              |   1 +
- test/TableGen/cond-bitlist.td        |  27 ++++++
- test/TableGen/cond-default.td        |  11 +++
- test/TableGen/cond-empty-list-arg.td |   8 ++
- test/TableGen/cond-inheritance.td    |  22 +++++
- test/TableGen/cond-let.td            |  36 ++++++++
- test/TableGen/cond-list.td           |  38 ++++++++
- test/TableGen/cond-subclass.td       |  27 ++++++
- test/TableGen/cond-type.td           |  11 +++
- test/TableGen/cond-usage.td          |  29 ++++++
- test/TableGen/condsbit.td            |  15 +++
- 18 files changed, 549 insertions(+), 2 deletions(-)
- create mode 100644 llvm/test/TableGen/cond-bitlist.td
- create mode 100644 llvm/test/TableGen/cond-default.td
- create mode 100644 llvm/test/TableGen/cond-empty-list-arg.td
- create mode 100644 llvm/test/TableGen/cond-inheritance.td
- create mode 100644 llvm/test/TableGen/cond-let.td
- create mode 100644 llvm/test/TableGen/cond-list.td
- create mode 100644 llvm/test/TableGen/cond-subclass.td
- create mode 100644 llvm/test/TableGen/cond-type.td
- create mode 100644 llvm/test/TableGen/cond-usage.td
- create mode 100644 llvm/test/TableGen/condsbit.td
-
-diff --git a/docs/TableGen/LangIntro.rst b/docs/TableGen/LangIntro.rst
-index ea46550ffc0..390f941f0ca 100644
---- a/docs/TableGen/LangIntro.rst
-+++ b/docs/TableGen/LangIntro.rst
-@@ -258,6 +258,20 @@ supported include:
- ``!if(a,b,c)``
-   'b' if the result of 'int' or 'bit' operator 'a' is nonzero, 'c' otherwise.
- 
-+``!cond(condition_1 : val1, condition_2 : val2, ..., condition_n : valn)``
-+    Instead of embedding !if inside !if which can get cumbersome,
-+    one can use !cond. !cond returns 'val1' if the result of 'int' or 'bit'
-+    operator 'condition1' is nonzero. Otherwise, it checks 'condition2'.
-+    If 'condition2' is nonzero, returns 'val2', and so on.
-+    If all conditions are zero, it reports an error.
-+
-+    Below is an example to convert an integer 'x' into a string:
-+
-+    !cond(!lt(x,0) : "Negative",
-+          !eq(x,0) : "Zero",
-+          !eq(x,1) : "One,
-+          1        : "MoreThanOne")
-+
- ``!eq(a,b)``
-     'bit 1' if string a is equal to string b, 0 otherwise.  This only operates
-     on string, int and bit objects.  Use !cast<string> to compare other types of
-diff --git a/docs/TableGen/LangRef.rst b/docs/TableGen/LangRef.rst
-index 2efee12ec9d..a3dbf363151 100644
---- a/docs/TableGen/LangRef.rst
-+++ b/docs/TableGen/LangRef.rst
-@@ -102,6 +102,12 @@ wide variety of meanings:
-                :!isa    !dag     !le      !lt        !ge
-                :!gt     !ne
- 
-+TableGen also has !cond operator that needs a slightly different
-+syntax compared to other "bang operators":
-+
-+.. productionlist::
-+   CondOperator: !cond
-+
- 
- Syntax
- ======
-@@ -140,7 +146,7 @@ considered to define the class if any of the following is true:
- #. The :token:`Body` in the :token:`ObjectBody` is present and is not empty.
- #. The :token:`BaseClassList` in the :token:`ObjectBody` is present.
- 
--You can declare an empty class by giving and empty :token:`TemplateArgList`
-+You can declare an empty class by giving an empty :token:`TemplateArgList`
- and an empty :token:`ObjectBody`. This can serve as a restricted form of
- forward declaration: note that records deriving from the forward-declared
- class will inherit no fields from it since the record expansion is done
-@@ -315,6 +321,8 @@ The initial :token:`DagArg` is called the "operator" of the dag.
- 
- .. productionlist::
-    SimpleValue: `BangOperator` ["<" `Type` ">"] "(" `ValueListNE` ")"
-+              :| `CondOperator` "(" `CondVal` ("," `CondVal`)* ")"
-+   CondVal: `Value` ":" `Value`
- 
- Bodies
- ------
-diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
-index e022bc82b4e..3ca67ec72bd 100644
---- a/include/llvm/TableGen/Record.h
-+++ b/include/llvm/TableGen/Record.h
-@@ -316,6 +316,7 @@ protected:
-     IK_TernOpInit,
-     IK_UnOpInit,
-     IK_LastOpInit,
-+    IK_CondOpInit,
-     IK_FoldOpInit,
-     IK_IsAOpInit,
-     IK_StringInit,
-@@ -912,6 +913,83 @@ public:
-   std::string getAsString() const override;
- };
- 
-+/// !cond(condition_1: value1, ... , condition_n: value)
-+/// Selects the first value for which condition is true.
-+/// Otherwise reports an error.
-+class CondOpInit final : public TypedInit, public FoldingSetNode,
-+                      public TrailingObjects<CondOpInit, Init *> {
-+  unsigned NumConds;
-+  RecTy *ValType;
-+
-+  CondOpInit(unsigned NC, RecTy *Type)
-+    : TypedInit(IK_CondOpInit, Type),
-+      NumConds(NC), ValType(Type) {}
-+
-+  size_t numTrailingObjects(OverloadToken<Init *>) const {
-+    return 2*NumConds;
-+  }
-+
-+public:
-+  CondOpInit(const CondOpInit &) = delete;
-+  CondOpInit &operator=(const CondOpInit &) = delete;
-+
-+  static bool classof(const Init *I) {
-+    return I->getKind() == IK_CondOpInit;
-+  }
-+
-+  static CondOpInit *get(ArrayRef<Init*> C, ArrayRef<Init*> V,
-+                        RecTy *Type);
-+
-+  void Profile(FoldingSetNodeID &ID) const;
-+
-+  RecTy *getValType() const { return ValType; }
-+
-+  unsigned getNumConds() const { return NumConds; }
-+
-+  Init *getCond(unsigned Num) const {
-+    assert(Num < NumConds && "Condition number out of range!");
-+    return getTrailingObjects<Init *>()[Num];
-+  }
-+
-+  Init *getVal(unsigned Num) const {
-+    assert(Num < NumConds && "Val number out of range!");
-+    return getTrailingObjects<Init *>()[Num+NumConds];
-+  }
-+
-+  ArrayRef<Init *> getConds() const {
-+    return makeArrayRef(getTrailingObjects<Init *>(), NumConds);
-+  }
-+
-+  ArrayRef<Init *> getVals() const {
-+    return makeArrayRef(getTrailingObjects<Init *>()+NumConds, NumConds);
-+  }
-+
-+  Init *Fold(Record *CurRec) const;
-+
-+  Init *resolveReferences(Resolver &R) const override;
-+
-+  bool isConcrete() const override;
-+  bool isComplete() const override;
-+  std::string getAsString() const override;
-+
-+  using const_case_iterator = SmallVectorImpl<Init*>::const_iterator;
-+  using const_val_iterator = SmallVectorImpl<Init*>::const_iterator;
-+
-+  inline const_case_iterator  arg_begin() const { return getConds().begin(); }
-+  inline const_case_iterator  arg_end  () const { return getConds().end(); }
-+
-+  inline size_t              case_size () const { return NumConds; }
-+  inline bool                case_empty() const { return NumConds == 0; }
-+
-+  inline const_val_iterator name_begin() const { return getVals().begin();}
-+  inline const_val_iterator name_end  () const { return getVals().end(); }
-+
-+  inline size_t              val_size () const { return NumConds; }
-+  inline bool                val_empty() const { return NumConds == 0; }
-+
-+  Init *getBit(unsigned Bit) const override;
-+};
-+
- /// !foldl (a, b, expr, start, lst) - Fold over a list.
- class FoldOpInit : public TypedInit, public FoldingSetNode {
- private:
-diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp
-index cf1685a2e8c..26ffe761b66 100644
---- a/lib/TableGen/Record.cpp
-+++ b/lib/TableGen/Record.cpp
-@@ -1694,6 +1694,137 @@ Init *FieldInit::Fold(Record *CurRec) const {
-   return const_cast<FieldInit *>(this);
- }
- 
-+static void ProfileCondOpInit(FoldingSetNodeID &ID,
-+                             ArrayRef<Init *> CondRange,
-+                             ArrayRef<Init *> ValRange,
-+                             const RecTy *ValType) {
-+  assert(CondRange.size() == ValRange.size() &&
-+         "Number of conditions and values must match!");
-+  ID.AddPointer(ValType);
-+  ArrayRef<Init *>::iterator Case = CondRange.begin();
-+  ArrayRef<Init *>::iterator Val = ValRange.begin();
-+
-+  while (Case != CondRange.end()) {
-+    ID.AddPointer(*Case++);
-+    ID.AddPointer(*Val++);
-+  }
-+}
-+
-+void CondOpInit::Profile(FoldingSetNodeID &ID) const {
-+  ProfileCondOpInit(ID,
-+      makeArrayRef(getTrailingObjects<Init *>(), NumConds),
-+      makeArrayRef(getTrailingObjects<Init *>() + NumConds, NumConds),
-+      ValType);
-+}
-+
-+CondOpInit *
-+CondOpInit::get(ArrayRef<Init *> CondRange,
-+                ArrayRef<Init *> ValRange, RecTy *Ty) {
-+  assert(CondRange.size() == ValRange.size() &&
-+         "Number of conditions and values must match!");
-+
-+  static FoldingSet<CondOpInit> ThePool;
-+  FoldingSetNodeID ID;
-+  ProfileCondOpInit(ID, CondRange, ValRange, Ty);
-+
-+  void *IP = nullptr;
-+  if (CondOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
-+    return I;
-+
-+  void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(2*CondRange.size()),
-+                                 alignof(BitsInit));
-+  CondOpInit *I = new(Mem) CondOpInit(CondRange.size(), Ty);
-+
-+  std::uninitialized_copy(CondRange.begin(), CondRange.end(),
-+                          I->getTrailingObjects<Init *>());
-+  std::uninitialized_copy(ValRange.begin(), ValRange.end(),
-+                          I->getTrailingObjects<Init *>()+CondRange.size());
-+  ThePool.InsertNode(I, IP);
-+  return I;
-+}
-+
-+Init *CondOpInit::resolveReferences(Resolver &R) const {
-+  SmallVector<Init*, 4> NewConds;
-+  bool Changed = false;
-+  for (const Init *Case : getConds()) {
-+    Init *NewCase = Case->resolveReferences(R);
-+    NewConds.push_back(NewCase);
-+    Changed |= NewCase != Case;
-+  }
-+
-+  SmallVector<Init*, 4> NewVals;
-+  for (const Init *Val : getVals()) {
-+    Init *NewVal = Val->resolveReferences(R);
-+    NewVals.push_back(NewVal);
-+    Changed |= NewVal != Val;
-+  }
-+
-+  if (Changed)
-+    return (CondOpInit::get(NewConds, NewVals,
-+            getValType()))->Fold(R.getCurrentRecord());
-+
-+  return const_cast<CondOpInit *>(this);
-+}
-+
-+Init *CondOpInit::Fold(Record *CurRec) const {
-+  for ( unsigned i = 0; i < NumConds; ++i) {
-+    Init *Cond = getCond(i);
-+    Init *Val = getVal(i);
-+
-+    if (IntInit *CondI = dyn_cast_or_null<IntInit>(
-+            Cond->convertInitializerTo(IntRecTy::get()))) {
-+      if (CondI->getValue())
-+        return Val->convertInitializerTo(getValType());
-+    } else
-+     return const_cast<CondOpInit *>(this);
-+  }
-+
-+  PrintFatalError(CurRec->getLoc(),
-+                  CurRec->getName() +
-+                  " does not have any true condition in:" +
-+                  this->getAsString());
-+  return nullptr;
-+}
-+
-+bool CondOpInit::isConcrete() const {
-+  for (const Init *Case : getConds())
-+    if (!Case->isConcrete())
-+      return false;
-+
-+  for (const Init *Val : getVals())
-+    if (!Val->isConcrete())
-+      return false;
-+
-+  return true;
-+}
-+
-+bool CondOpInit::isComplete() const {
-+  for (const Init *Case : getConds())
-+    if (!Case->isComplete())
-+      return false;
-+
-+  for (const Init *Val : getVals())
-+    if (!Val->isConcrete())
-+      return false;
-+
-+  return true;
-+}
-+
-+std::string CondOpInit::getAsString() const {
-+  std::string Result = "!cond(";
-+  for (unsigned i = 0; i < getNumConds(); i++) {
-+    Result += getCond(i)->getAsString() + ": ";
-+    Result += getVal(i)->getAsString();
-+    if (i != getNumConds()-1)
-+      Result += ", ";
-+  }
-+  return Result + ")";
-+}
-+
-+Init *CondOpInit::getBit(unsigned Bit) const {
-+  return VarBitInit::get(const_cast<CondOpInit *>(this), Bit);
-+}
-+
- static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
-                            ArrayRef<Init *> ArgRange,
-                            ArrayRef<StringInit *> NameRange) {
-diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
-index 16aeee56107..f733cc3c134 100644
---- a/lib/TableGen/TGLexer.cpp
-+++ b/lib/TableGen/TGLexer.cpp
-@@ -545,6 +545,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
-     .Case("ge", tgtok::XGe)
-     .Case("gt", tgtok::XGt)
-     .Case("if", tgtok::XIf)
-+    .Case("cond", tgtok::XCond)
-     .Case("isa", tgtok::XIsA)
-     .Case("head", tgtok::XHead)
-     .Case("tail", tgtok::XTail)
-diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h
-index e9980b36b97..9bdb01cf3dd 100644
---- a/lib/TableGen/TGLexer.h
-+++ b/lib/TableGen/TGLexer.h
-@@ -51,7 +51,7 @@ namespace tgtok {
- 
-     // !keywords.
-     XConcat, XADD, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
--    XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XEq, XIsA, XDag,
-+    XSubst, XForEach, XFoldl, XHead, XTail, XSize, XEmpty, XIf, XCond, XEq, XIsA, XDag,
-     XNe, XLe, XLt, XGe, XGt,
- 
-     // Integer value.
-diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp
-index 1d1f3603c83..200190acd59 100644
---- a/lib/TableGen/TGParser.cpp
-+++ b/lib/TableGen/TGParser.cpp
-@@ -1445,6 +1445,9 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
-     return (TernOpInit::get(Code, LHS, MHS, RHS, Type))->Fold(CurRec);
-   }
- 
-+  case tgtok::XCond:
-+    return ParseOperationCond(CurRec, ItemType);
-+
-   case tgtok::XFoldl: {
-     // Value ::= !foldl '(' Id ',' Id ',' Value ',' Value ',' Value ')'
-     Lex.Lex(); // eat the operation
-@@ -1603,6 +1606,91 @@ RecTy *TGParser::ParseOperatorType() {
-   return Type;
- }
- 
-+Init *TGParser::ParseOperationCond(Record *CurRec, RecTy *ItemType) {
-+  Lex.Lex();  // eat the operation 'cond'
-+
-+  if (Lex.getCode() != tgtok::l_paren) {
-+     TokError("expected '(' after !cond operator");
-+     return nullptr;
-+  }
-+  Lex.Lex();  // eat the '('
-+
-+  // Parse through '[Case: Val,]+'
-+  SmallVector<Init *, 4> Case;
-+  SmallVector<Init *, 4> Val;
-+  while (true) {
-+    if (Lex.getCode() == tgtok::r_paren) {
-+      Lex.Lex(); // eat the ')'
-+      break;
-+    }
-+
-+    Init *V = ParseValue(CurRec);
-+    if (!V)
-+      return nullptr;
-+    Case.push_back(V);
-+
-+    if (Lex.getCode() != tgtok::colon) {
-+      TokError("expected ':'  following a condition in !cond operator");
-+      return nullptr;
-+    }
-+    Lex.Lex(); // eat the ':'
-+
-+    V = ParseValue(CurRec, ItemType);
-+    if (!V)
-+      return nullptr;
-+    Val.push_back(V);
-+
-+    if (Lex.getCode() == tgtok::r_paren) {
-+      Lex.Lex(); // eat the ')'
-+      break;
-+    }
-+
-+    if (Lex.getCode() != tgtok::comma) {
-+      TokError("expected ',' or ')' following a value in !cond operator");
-+      return nullptr;
-+    }
-+    Lex.Lex();  // eat the ','
-+  }
-+
-+  if (Case.size() < 1) {
-+    TokError("there should be at least 1 'condition : value' in the !cond operator");
-+    return nullptr;
-+  }
-+
-+  // resolve type
-+  RecTy *Type = nullptr;
-+  for (Init *V : Val) {
-+    RecTy *VTy = nullptr;
-+    if (TypedInit *Vt = dyn_cast<TypedInit>(V))
-+      VTy = Vt->getType();
-+    if (BitsInit *Vbits = dyn_cast<BitsInit>(V))
-+      VTy = BitsRecTy::get(Vbits->getNumBits());
-+    if (isa<BitInit>(V))
-+      VTy = BitRecTy::get();
-+
-+    if (Type == nullptr) {
-+      if (!isa<UnsetInit>(V))
-+        Type = VTy;
-+    } else {
-+      if (!isa<UnsetInit>(V)) {
-+        RecTy *RType = resolveTypes(Type, VTy);
-+        if (!RType) {
-+          TokError(Twine("inconsistent types '") + Type->getAsString() +
-+                         "' and '" + VTy->getAsString() + "' for !cond");
-+          return nullptr;
-+        }
-+        Type = RType;
-+      }
-+    }
-+  }
-+
-+  if (!Type) {
-+    TokError("could not determine type for !cond from its arguments");
-+    return nullptr;
-+  }
-+  return CondOpInit::get(Case, Val, Type)->Fold(CurRec);
-+}
-+
- /// ParseSimpleValue - Parse a tblgen value.  This returns null on error.
- ///
- ///   SimpleValue ::= IDValue
-@@ -1621,6 +1709,7 @@ RecTy *TGParser::ParseOperatorType() {
- ///   SimpleValue ::= SRLTOK '(' Value ',' Value ')'
- ///   SimpleValue ::= LISTCONCATTOK '(' Value ',' Value ')'
- ///   SimpleValue ::= STRCONCATTOK '(' Value ',' Value ')'
-+///   SimpleValue ::= COND '(' [Value ':' Value,]+ ')'
- ///
- Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
-                                  IDParseMode Mode) {
-@@ -1933,6 +2022,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
-   case tgtok::XListConcat:
-   case tgtok::XStrConcat:   // Value ::= !binop '(' Value ',' Value ')'
-   case tgtok::XIf:
-+  case tgtok::XCond:
-   case tgtok::XFoldl:
-   case tgtok::XForEach:
-   case tgtok::XSubst: {  // Value ::= !ternop '(' Value ',' Value ',' Value ')'
-diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h
-index e3849043513..215b9dad770 100644
---- a/lib/TableGen/TGParser.h
-+++ b/lib/TableGen/TGParser.h
-@@ -194,6 +194,7 @@ private:  // Parser methods.
-   bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges);
-   RecTy *ParseType();
-   Init *ParseOperation(Record *CurRec, RecTy *ItemType);
-+  Init *ParseOperationCond(Record *CurRec, RecTy *ItemType);
-   RecTy *ParseOperatorType();
-   Init *ParseObjectName(MultiClass *CurMultiClass);
-   Record *ParseClassID();
-diff --git a/test/TableGen/cond-bitlist.td b/test/TableGen/cond-bitlist.td
-new file mode 100644
-index 00000000000..bce615838df
---- /dev/null
-+++ b/test/TableGen/cond-bitlist.td
-@@ -0,0 +1,27 @@
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class S<int s> {
-+  bits<2> val = !cond(!eq(s, 8):  {0, 0},
-+                      !eq(s, 16): 0b01,
-+                      !eq(s, 32): 2,
-+                      !eq(s, 64): {1, 1},
-+                              1 : ?);
-+}
-+
-+def D8  : S<8>;
-+def D16 : S<16>;
-+def D32 : S<32>;
-+def D64 : S<64>;
-+def D128: S<128>;
-+// CHECK: def D128
-+// CHECK-NEXT: bits<2> val = { ?, ? };
-+// CHECK: def D16
-+// CHECK-NEXT: bits<2> val = { 0, 1 };
-+// CHECK: def D32
-+// CHECK-NEXT: bits<2> val = { 1, 0 };
-+// CHECK: def D64
-+// CHECK-NEXT: bits<2> val = { 1, 1 };
-+// CHECK: def D8
-+// CHECK-NEXT: bits<2> val = { 0, 0 };
-+
-diff --git a/test/TableGen/cond-default.td b/test/TableGen/cond-default.td
-new file mode 100644
-index 00000000000..816bf10676f
---- /dev/null
-+++ b/test/TableGen/cond-default.td
-@@ -0,0 +1,11 @@
-+// Check that not specifying a valid condition results in error
-+
-+// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class C<int x> {
-+  string s  = !cond(!lt(x,0) : "negative", !gt(x,0) : "positive");
-+}
-+
-+def Zero : C<0>;
-+//CHECK: error: Zero does not have any true condition in:!cond(0: "negative", 0: "positive")
-diff --git a/test/TableGen/cond-empty-list-arg.td b/test/TableGen/cond-empty-list-arg.td
-new file mode 100644
-index 00000000000..5f4ccade169
---- /dev/null
-+++ b/test/TableGen/cond-empty-list-arg.td
-@@ -0,0 +1,8 @@
-+// RUN: llvm-tblgen %s
-+// XFAIL: vg_leak
-+
-+class C<bit cond> {
-+  bit true = 1;
-+  list<int> X = !cond(cond: [1, 2, 3], true : []);
-+  list<int> Y = !cond(cond: [], true : [4, 5, 6]);
-+}
-diff --git a/test/TableGen/cond-inheritance.td b/test/TableGen/cond-inheritance.td
-new file mode 100644
-index 00000000000..4b4abdf72f3
---- /dev/null
-+++ b/test/TableGen/cond-inheritance.td
-@@ -0,0 +1,22 @@
-+// Make sure !cond gets propagated across multiple layers of inheritance.
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class getInt<int c> {
-+  int ret = !cond(c: 0, 1 : 1);
-+}
-+
-+class I1<int c> {
-+  int i = getInt<c>.ret;
-+}
-+
-+class I2<int c> : I1<c>;
-+
-+def DI1: I1<1>;
-+// CHECK: def DI1 {     // I1
-+// CHECK-NEXT: int i = 0;
-+
-+// CHECK: def DI2 {     // I1 I2
-+// CHECK-NEXT: int i = 0;
-+def DI2: I2<1>;
-+
-diff --git a/test/TableGen/cond-let.td b/test/TableGen/cond-let.td
-new file mode 100644
-index 00000000000..044878f2ab8
---- /dev/null
-+++ b/test/TableGen/cond-let.td
-@@ -0,0 +1,36 @@
-+// Check support for `!cond' operator as part of a `let' statement.
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+
-+class C<bits<3> x, bits<4> y, bit z> {
-+  bits<16> n;
-+
-+  let n{11}  = !cond(y{3}: 1,
-+                     y{2}: x{0},
-+                     y{1}: x{1},
-+                     y{0}: x{2},
-+                     {1} :?);
-+  let n{10-9}= !cond(x{2}: y{3-2},
-+                     x{1}: y{2-1},
-+                     x{1}: y{1-0},
-+                     {1} : ?);
-+  let n{8-6} = !cond(x{2}: 0b010,  1 : 0b110);
-+  let n{5-4} = !cond(x{1}: y{3-2}, 1 :  {0, 1});
-+  let n{3-0} = !cond(x{0}: y{3-0}, 1 : {z, y{2}, y{1}, y{0}});
-+}
-+
-+
-+def C1 : C<{1, 0, 1}, {0, 1, 0, 1}, 0>;
-+def C2 : C<{0, 1, 0}, {1, 0, 1, 0}, 1>;
-+def C3 : C<{0, 0, 0}, {1, 0, 1, 0}, 0>;
-+def C4 : C<{0, 0, 0}, {0, 0, 0, 0}, 0>;
-+
-+// CHECK: def C1
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1 };
-+// CHECK: def C2
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0 };
-+// CHECK: def C3
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, 1, ?, ?, 1, 1, 0, 0, 1, 0, 0, 1, 0 };
-+// CHECK: def C4
-+// CHECK-NEXT: bits<16> n = { ?, ?, ?, ?, ?, ?, ?, 1, 1, 0, 0, 1, 0, 0, 0, 0 };
-diff --git a/test/TableGen/cond-list.td b/test/TableGen/cond-list.td
-new file mode 100644
-index 00000000000..aa013cea4e1
---- /dev/null
-+++ b/test/TableGen/cond-list.td
-@@ -0,0 +1,38 @@
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+
-+class A<list<list<int>> vals> {
-+  list<int> first = vals[0];
-+  list<int> rest  = !cond(!empty(!tail(vals)): vals[0],
-+                          1                 : vals[1]);
-+}
-+
-+def A_OneEl : A<[[1,2,3]]>;
-+// CHECK:      def A_OneEl {  // A
-+// CHECK-NEXT: list<int> first = [1, 2, 3];
-+// CHECK-NEXT: list<int> rest = [1, 2, 3];
-+// CHECK-NEXT: }
-+
-+def A_TwoEl : A<[[1,2,3], [4,5,6]]>;
-+// CHECK:      def A_TwoEl { // A
-+// CHECK-NEXT: list<int> first = [1, 2, 3];
-+// CHECK-NEXT: list<int> rest = [4, 5, 6];
-+// CHECK-NEXT: }
-+
-+
-+class B<list<int> v> {
-+  list<int> vals = v;
-+}
-+class BB<list<list<int>> vals> : B<!cond(!empty(!tail(vals)): vals[0],  1 : vals[1])>;
-+class BBB<list<list<int>> vals> : BB<vals>;
-+
-+def B_OneEl : BBB<[[1,2,3]]>;
-+// CHECK:      def B_OneEl { //  B BB BBB
-+// CHECK-NEXT: list<int> vals = [1, 2, 3];
-+// CHECK-NEXT: }
-+
-+def B_TwoEl : BBB<[[1,2,3],[4,5,6]]>;
-+// CHECK:      def B_TwoEl { // B BB BBB
-+// CHECK-NEXT: list<int> vals = [4, 5, 6];
-+// CHECK-NEXT: }
-diff --git a/test/TableGen/cond-subclass.td b/test/TableGen/cond-subclass.td
-new file mode 100644
-index 00000000000..9f6f6e2cb8c
---- /dev/null
-+++ b/test/TableGen/cond-subclass.td
-@@ -0,0 +1,27 @@
-+// Check that !cond with operands of different subtypes can
-+// initialize a supertype variable.
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class E<int dummy> {}
-+class E1<int dummy> : E<dummy> {}
-+class E2<int dummy> : E<dummy> {}
-+
-+class EX<int cc, E1 b, E2 c> {
-+  E x = !cond(cc: b, 1 : c);
-+}
-+
-+def E1d : E1<0>;
-+def E2d : E2<0>;
-+
-+def EXd1 : EX<1, E1d, E2d>;
-+def EXd2 : EX<0, E1d, E2d>;
-+
-+// CHECK: def EXd1 {
-+// CHECK:   E x = E1d;
-+// CHECK: }
-+//
-+// CHECK: def EXd2 {
-+// CHECK:   E x = E2d;
-+// CHECK: }
-+
-diff --git a/test/TableGen/cond-type.td b/test/TableGen/cond-type.td
-new file mode 100644
-index 00000000000..fd2a3cc52b7
---- /dev/null
-+++ b/test/TableGen/cond-type.td
-@@ -0,0 +1,11 @@
-+// RUN: not llvm-tblgen %s 2>&1 | FileCheck %s
-+// XFAIL: vg_leak
-+
-+class A<int dummy> {}
-+class B<int dummy> : A<dummy> {}
-+class C<int dummy> : A<dummy> {}
-+
-+// CHECK: Value 'x' of type 'C' is incompatible with initializer '{{.*}}' of type 'A'
-+class X<int cc, B b, C c> {
-+  C x = !cond(cc: b, 1 : c);
-+}
-diff --git a/test/TableGen/cond-usage.td b/test/TableGen/cond-usage.td
-new file mode 100644
-index 00000000000..055fd6d7c69
---- /dev/null
-+++ b/test/TableGen/cond-usage.td
-@@ -0,0 +1,29 @@
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+
-+// Check that !cond picks the first true value
-+// CHECK:       class A
-+// CHECK-NEXT:  string S = !cond(!eq(A:x, 10): "ten", !eq(A:x, 11): "eleven", !eq(A:x, 10): "TEN", !gt(A:x, 9): "MoreThanNine", 1: "unknown"); 
-+// CHECK: B1
-+// CHECK-NEXT: string S = "unknown"
-+// CHECK: B10
-+// CHECK-NEXT: string S = "ten";
-+// CHECK: def B11
-+// CHECK-NEXT: string S = "eleven";
-+// CHECK: def B12
-+// CHECK-NEXT:  string S = "MoreThanNine";
-+// CHECK: def B9
-+// CHECK-NEXT: string S = "unknown"
-+
-+class A<int x> {
-+  string S = !cond(!eq(x,10) : "ten",
-+                   !eq(x,11) : "eleven",
-+                   !eq(x,10) : "TEN",
-+                   !gt(x,9) : "MoreThanNine",
-+                   !eq(1,1) : "unknown");
-+}
-+def B1  : A<1>;
-+def B9  : A<9>;
-+def B10 : A<10>;
-+def B11 : A<11>;
-+def B12 : A<12>;
-diff --git a/test/TableGen/condsbit.td b/test/TableGen/condsbit.td
-new file mode 100644
-index 00000000000..e08ac97f68b
---- /dev/null
-+++ b/test/TableGen/condsbit.td
-@@ -0,0 +1,15 @@
-+// check that !cond works well with bit conditional values
-+// RUN: llvm-tblgen %s | FileCheck %s
-+// XFAIL: vg_leak
-+// CHECK: a = 6
-+// CHECK: a = 5
-+
-+class A<bit b = 1> {
-+  bit true = 1;
-+  int a = !cond(b: 5, true : 6);
-+  bit c = !cond(b: 0, true : 1);
-+  bits<1> d = !cond(b: 0, true : 1);
-+}
-+
-+def X : A<0>;
-+def Y : A;
--- 
-2.17.1
-
diff --git a/deps/patches/llvm-8.0-D59389-refactor-wmma.patch b/deps/patches/llvm-8.0-D59389-refactor-wmma.patch
deleted file mode 100644
index 31af5246715a16..00000000000000
--- a/deps/patches/llvm-8.0-D59389-refactor-wmma.patch
+++ /dev/null
@@ -1,899 +0,0 @@
-From e9737bf498597707d084398b9485676dc7421644 Mon Sep 17 00:00:00 2001
-From: Artem Belevich <tra@google.com>
-Date: Thu, 25 Apr 2019 22:27:35 +0000
-Subject: [PATCH] [NVPTX] Refactor generation of MMA intrinsics and
- instructions. NFC.
-
-Generalized constructions of 'fragments' of MMA operations to provide
-common primitives for construction of the ops. This will make it easier
-to add new variants of the instructions that operate on integer types.
-
-Use nested foreach loops which makes it possible to better control
-naming of the intrinsics.
-
-This patch does not affect LLVM's output, so there are no test changes.
-
-Differential Revision: https://reviews.llvm.org/D59389
-
-llvm-svn: 359245
----
- include/llvm/IR/IntrinsicsNVVM.td   | 258 ++++++--------
- lib/Target/NVPTX/NVPTXIntrinsics.td | 512 ++++++++++------------------
- 2 files changed, 295 insertions(+), 475 deletions(-)
-
-diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
-index 7f694f68969..e30a27613a6 100644
---- a/include/llvm/IR/IntrinsicsNVVM.td
-+++ b/include/llvm/IR/IntrinsicsNVVM.td
-@@ -38,6 +38,69 @@ def llvm_anyi64ptr_ty     : LLVMAnyPointerType<llvm_i64_ty>;     // (space)i64*
- // MISC
- //
- 
-+// Helper class for construction of n-element list<LLVMtype> [t,t,...,t]
-+class RepLLVMType<int N, LLVMType T> {
-+  list<LLVMType> ret = !if(N, !listconcat(RepLLVMType<!add(N,-1), T>.ret, [T]), []);
-+}
-+
-+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
-+// Geom: m<M>n<N>k<K>. E.g. m8n32k16
-+// Frag: [abcd]
-+// PtxEltType: PTX type for the element.
-+class WMMA_REGS<string Geom, string Frag, string PtxEltType> {
-+  string geom = Geom;
-+  string frag = Frag;
-+  string ptx_elt_type = PtxEltType;
-+  string ft = frag#":"#ptx_elt_type;
-+  list<LLVMType> regs = !cond(
-+    // fp16 -> fp16/fp32 @  m16n16k16/m8n32k16/m32n8k16
-+    // All currently supported geometries use the same fragment format,
-+    // so we only need to consider {fragment, type}.
-+    !eq(ft,"a:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
-+    !eq(ft,"b:f16") : RepLLVMType<8, llvm_v2f16_ty>.ret,
-+    !eq(ft,"c:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
-+    !eq(ft,"d:f16") : RepLLVMType<4, llvm_v2f16_ty>.ret,
-+    !eq(ft,"c:f32") : RepLLVMType<8, llvm_float_ty>.ret,
-+    !eq(ft,"d:f32") : RepLLVMType<8, llvm_float_ty>.ret);
-+}
-+
-+class WMMA_NAME_LDST<string Op, WMMA_REGS Frag, string Layout, int WithStride> {
-+  string intr = "llvm.nvvm.wmma."
-+                # Frag.geom
-+                # "." # Op
-+                # "." # Frag.frag
-+                # "." # Layout
-+                # !if(WithStride, ".stride", "")
-+                # "." # Frag.ptx_elt_type
-+                ;
-+  // TODO(tra): record name should ideally use the same field order as the intrinsic.
-+  // E.g. string record = !subst("llvm", "int",
-+  //                      !subst(".", "_", llvm));
-+  string record = "int_nvvm_wmma_"
-+                # Frag.geom
-+                # "_" # Op
-+                # "_" # Frag.frag
-+                # "_" # Frag.ptx_elt_type
-+                # "_" # Layout
-+                # !if(WithStride, "_stride", "");
-+}
-+
-+class WMMA_NAME_MMA<string ALayout, string BLayout,
-+                    WMMA_REGS C, WMMA_REGS D,
-+                    int Satfinite> {
-+  string llvm = "llvm.nvvm.wmma."
-+                # C.geom
-+                # ".mma"
-+                # "." # ALayout
-+                # "." # BLayout
-+                # "." # D.ptx_elt_type  // Intrinsic encodes 'd' first.
-+                # "." # C.ptx_elt_type
-+                # !if(Satfinite, ".satfinite", "");
-+
-+  string record = !subst(".", "_",
-+                  !subst("llvm.", "int_", llvm));
-+}
-+
- let TargetPrefix = "nvvm" in {
-   def int_nvvm_prmt : GCCBuiltin<"__nvvm_prmt">,
-       Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
-@@ -3882,166 +3945,69 @@ def int_nvvm_match_all_sync_i64p :
- //
- // WMMA instructions
- //
--
- // WMMA.LOAD
--class NVVM_WMMA_LD_GALSTS<string Geometry, string Abc, string Layout,
--                          string Type, LLVMType regty, int WithStride>
--  : Intrinsic<!if(!eq(Abc#Type,"cf16"),
--                  [regty, regty, regty, regty],
--                  [regty, regty, regty, regty,
--                   regty, regty, regty, regty]),
-+class NVVM_WMMA_LD<WMMA_REGS Frag, string Layout, int WithStride>
-+  : Intrinsic<Frag.regs,
-               !if(WithStride, [llvm_anyptr_ty, llvm_i32_ty], [llvm_anyptr_ty]),
-               [IntrReadMem, IntrArgMemOnly, ReadOnly<0>, NoCapture<0>],
--              "llvm.nvvm.wmma."
--                # Geometry
--                # ".load"
--                # "." # Abc
--                # "." # Layout
--                # !if(WithStride, ".stride", "")
--                # "." # Type>;
--
--multiclass NVVM_WMMA_LD_GALT<string Geometry, string Abc, string Layout,
--                             string Type, LLVMType regty> {
--  def _stride: NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 1>;
--  def NAME   : NVVM_WMMA_LD_GALSTS<Geometry, Abc, Layout, Type, regty, 0>;
--}
--
--multiclass NVVM_WMMA_LD_GAT<string Geometry, string Abc,
--                           string Type, LLVMType regty> {
--  defm _row: NVVM_WMMA_LD_GALT<Geometry, Abc, "row", Type, regty>;
--  defm _col: NVVM_WMMA_LD_GALT<Geometry, Abc, "col", Type, regty>;
--}
--
--multiclass NVVM_WMMA_LD_G<string Geometry> {
--  defm _a_f16: NVVM_WMMA_LD_GAT<Geometry, "a", "f16", llvm_v2f16_ty>;
--  defm _b_f16: NVVM_WMMA_LD_GAT<Geometry, "b", "f16", llvm_v2f16_ty>;
--  defm _c_f16: NVVM_WMMA_LD_GAT<Geometry, "c", "f16", llvm_v2f16_ty>;
--  defm _c_f32: NVVM_WMMA_LD_GAT<Geometry, "c", "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_LD {
--  defm _m32n8k16_load: NVVM_WMMA_LD_G<"m32n8k16">;
--  defm _m16n16k16_load: NVVM_WMMA_LD_G<"m16n16k16">;
--  defm _m8n32k16_load: NVVM_WMMA_LD_G<"m8n32k16">;
--}
--
--defm int_nvvm_wmma: NVVM_WMMA_LD;
-+              WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.intr>;
- 
- // WMMA.STORE.D
--class NVVM_WMMA_STD_GLSTS<string Geometry, string Layout,
--                          string Type, LLVMType regty, int WithStride,
--                          // This is only used to create a typed empty array we
--                          // need to pass to !if below.
--                          list<LLVMType>Empty=[]>
-+class NVVM_WMMA_ST<WMMA_REGS Frag, string Layout, int WithStride>
-   : Intrinsic<[],
-               !listconcat(
-                 [llvm_anyptr_ty],
--                !if(!eq(Type,"f16"),
--                    [regty, regty, regty, regty],
--                    [regty, regty, regty, regty,
--                     regty, regty, regty, regty]),
--                !if(WithStride, [llvm_i32_ty], Empty)),
-+                Frag.regs,
-+                !if(WithStride, [llvm_i32_ty], [])),
-               [IntrWriteMem, IntrArgMemOnly, WriteOnly<0>, NoCapture<0>],
--              "llvm.nvvm.wmma."
--                   # Geometry
--                   # ".store.d"
--                   # "." # Layout
--                   # !if(WithStride, ".stride", "")
--                   # "." # Type>;
--
--multiclass NVVM_WMMA_STD_GLT<string Geometry, string Layout,
--                             string Type, LLVMType regty> {
--  def _stride: NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 1>;
--  def NAME:    NVVM_WMMA_STD_GLSTS<Geometry, Layout, Type, regty, 0>;
--}
--
--multiclass NVVM_WMMA_STD_GT<string Geometry, string Type, LLVMType regty> {
--  defm _row: NVVM_WMMA_STD_GLT<Geometry, "row", Type, regty>;
--  defm _col: NVVM_WMMA_STD_GLT<Geometry, "col", Type, regty>;
--}
--multiclass NVVM_WMMA_STD_G<string Geometry> {
--  defm _d_f16: NVVM_WMMA_STD_GT<Geometry, "f16", llvm_v2f16_ty>;
--  defm _d_f32: NVVM_WMMA_STD_GT<Geometry, "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_STD {
--  defm _m32n8k16_store:  NVVM_WMMA_STD_G<"m32n8k16">;
--  defm _m16n16k16_store: NVVM_WMMA_STD_G<"m16n16k16">;
--  defm _m8n32k16_store:  NVVM_WMMA_STD_G<"m8n32k16">;
-+              WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.intr>;
-+
-+// Create all load/store variants 
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout = ["row", "col"] in {
-+    foreach stride = [0, 1] in {
-+      foreach frag = [WMMA_REGS<geom, "a", "f16">,
-+                      WMMA_REGS<geom, "b", "f16">,
-+                      WMMA_REGS<geom, "c", "f16">,
-+                      WMMA_REGS<geom, "c", "f32">] in {
-+          def WMMA_NAME_LDST<"load", frag, layout, stride>.record
-+             : NVVM_WMMA_LD<frag, layout, stride>;
-+      }
-+      foreach frag = [WMMA_REGS<geom, "d", "f16">,
-+                      WMMA_REGS<geom, "d", "f32">] in {
-+          def WMMA_NAME_LDST<"store", frag, layout, stride>.record
-+             : NVVM_WMMA_ST<frag, layout, stride>;
-+      }
-+    }
-+  }
- }
- 
--defm int_nvvm_wmma: NVVM_WMMA_STD;
--
- // WMMA.MMA
--class NVVM_WMMA_MMA_GABDCS<string Geometry,
--                           string ALayout, string BLayout,
--                           string DType, LLVMType d_regty,
--                           string CType, LLVMType c_regty,
--                           string Satfinite = "">
--  : Intrinsic<!if(!eq(DType,"f16"),
--                      [d_regty, d_regty, d_regty, d_regty],
--                      [d_regty, d_regty, d_regty, d_regty,
--                       d_regty, d_regty, d_regty, d_regty]),
-+class NVVM_WMMA_MMA<string ALayout, string BLayout,
-+                    WMMA_REGS C, WMMA_REGS D, int Satfinite>
-+  : Intrinsic<D.regs,
-               !listconcat(
--                [// A
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
--                // B
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty,
--                llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
--                !if(!eq(CType,"f16"),
--                      [c_regty, c_regty, c_regty, c_regty],
--                      [c_regty, c_regty, c_regty, c_regty,
--                       c_regty, c_regty, c_regty, c_regty])),
-+                WMMA_REGS<C.geom, "a", "f16">.regs,
-+                WMMA_REGS<C.geom, "b", "f16">.regs,
-+                C.regs),
-               [IntrNoMem],
--              "llvm.nvvm.wmma."
--                # Geometry
--                # ".mma"
--                # "." # ALayout
--                # "." # BLayout
--                # "." # DType
--                # "." # CType
--                # Satfinite> {
--}
--
--multiclass NVVM_WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
--                               string DType, LLVMType d_regty,
--                               string CType, LLVMType c_regty> {
--  def NAME : NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                  DType, d_regty, CType, c_regty>;
--  def _satfinite: NVVM_WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                       DType, d_regty, CType, c_regty,".satfinite">;
--}
--
--multiclass NVVM_WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
--                              string DType, LLVMType d_regty> {
--  defm _f16: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
--                                "f16", llvm_v2f16_ty>;
--  defm _f32: NVVM_WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_regty,
--                                "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
--  defm _f16: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", llvm_v2f16_ty>;
--  defm _f32: NVVM_WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", llvm_float_ty>;
--}
--
--multiclass NVVM_WMMA_MMA_GA<string Geometry, string ALayout> {
--  defm _col: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "col">;
--  defm _row: NVVM_WMMA_MMA_GAB<Geometry, ALayout, "row">;
--}
--
--multiclass NVVM_WMMA_MMA_G<string Geometry> {
--  defm _col: NVVM_WMMA_MMA_GA<Geometry, "col">;
--  defm _row: NVVM_WMMA_MMA_GA<Geometry, "row">;
--}
--
--multiclass NVVM_WMMA_MMA {
--  defm _m32n8k16_mma : NVVM_WMMA_MMA_G<"m32n8k16">;
--  defm _m16n16k16_mma : NVVM_WMMA_MMA_G<"m16n16k16">;
--  defm _m8n32k16_mma : NVVM_WMMA_MMA_G<"m8n32k16">;
-+              WMMA_NAME_MMA<ALayout, BLayout, C, D, Satfinite>.llvm>;
-+
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout_a = ["row", "col"] in {
-+    foreach layout_b = ["row", "col"] in {
-+      foreach frag_c = [WMMA_REGS<geom, "c", "f16">,
-+                        WMMA_REGS<geom, "c", "f32">] in {
-+        foreach frag_d = [WMMA_REGS<geom, "d", "f16">,
-+                          WMMA_REGS<geom, "d", "f32">] in {
-+          foreach satf = [0, 1] in {
-+            def WMMA_NAME_MMA<layout_a, layout_b, frag_c, frag_d, satf>.record
-+             : NVVM_WMMA_MMA<layout_a, layout_b, frag_c, frag_d, satf>;
-+          }
-+        }
-+      }
-+    }
-+  }
- }
- 
--defm int_nvvm_wmma : NVVM_WMMA_MMA;
--
- } // let TargetPrefix = "nvvm"
-diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
-index 47dcdcf6e0b..b9a67ba5ed3 100644
---- a/lib/Target/NVPTX/NVPTXIntrinsics.td
-+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
-@@ -27,7 +27,17 @@ def immDouble1 : PatLeaf<(fpimm), [{
-     return (d==1.0);
- }]>;
- 
--
-+def AS_match {
-+  code generic = [{
-+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
-+  }];
-+  code shared = [{
-+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
-+  }];
-+  code global = [{
-+   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
-+  }];
-+}
- 
- //-----------------------------------
- // Synchronization and shuffle functions
-@@ -1007,17 +1017,11 @@ def INT_FNS_iii : INT_FNS_MBO<(ins    i32imm:$mask,    i32imm:$base,    i32imm:$
- //-----------------------------------
- 
- class ATOMIC_GLOBAL_CHK <dag ops, dag frag>
-- : PatFrag<ops, frag, [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
--}]>;
-+ : PatFrag<ops, frag, AS_match.global>;
- class ATOMIC_SHARED_CHK <dag ops, dag frag>
-- : PatFrag<ops, frag, [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
--}]>;
-+ : PatFrag<ops, frag, AS_match.shared>;
- class ATOMIC_GENERIC_CHK <dag ops, dag frag>
-- : PatFrag<ops, frag, [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
--}]>;
-+ : PatFrag<ops, frag, AS_match.generic>;
- 
- multiclass F_ATOMIC_2_imp<NVPTXRegClass ptrclass, NVPTXRegClass regclass,
-   string SpaceStr, string TypeStr, string OpcStr, PatFrag IntOp,
-@@ -7381,36 +7385,60 @@ def INT_PTX_SREG_WARPSIZE :
-     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
-               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
- 
--//
--// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
--//
--
- class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
-+// Generates list of n sequential register names.
-+class RegSeq<int n, string prefix> {
-+  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
-+                                        [prefix # !add(n, -1)]),
-+                            []);
-+}
- 
--class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
--                        string Space, string Type, NVPTXRegClass regclass,
--                        DAGOperand SrcOp, bit WithStride>
--  : EmptyNVPTXInst,
--    Requires<[!if(!eq(Geometry, "m16n16k16"),
--                  hasPTX60,
--                  hasPTX61),
--              hasSM70]> {
--  // Pattern (created by WMMA_LOAD_INTR_HELPER below) that matches the intrinsic
--  // for this function.
--  PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA_"
--                                       # Geometry # "_load_"
--                                       # !subst("c", "c_" # Type, Abc)
--                                       # "_" # Layout
--                                       # !subst(".", "_", Space)
--                                       # !if(WithStride,"_stride", "")
--                                       # "_Intr");
--  dag OutsR03 = (outs regclass:$r0, regclass:$r1, regclass:$r2, regclass:$r3);
--  dag OutsR47 = (outs regclass:$r4, regclass:$r5, regclass:$r6, regclass:$r7);
--  dag Outs = !if(!eq(Abc#Type,"cf16"), OutsR03, !con(OutsR03, OutsR47));
--
--  dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
--  dag Ins = !con((ins SrcOp:$src), StrideArg);
-+// Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
-+// In addition to target-independent fields provided by WMMA_REGS, it adds
-+// the fields commonly used to implement specific PTX instruction -- register
-+// types and names, constraints, parts of assembly, etc.
-+class WMMA_REGINFO<string Geom, string Frag, string PtxEltType>
-+      : WMMA_REGS<Geom, Frag, PtxEltType> {
-+  // NVPTX register types used to carry fragment data.
-+  NVPTXRegClass regclass = !cond(
-+    !eq(PtxEltType, "f16") : Float16x2Regs,
-+    !eq(PtxEltType, "f32") : Float32Regs);
-+
-+  // Instruction input/output arguments for the fragment.
-+  list<NVPTXRegClass> ptx_regs = !foreach(tmp, regs, regclass);
-+
-+  // List of register names for the fragment -- ["ra0", "ra1",...]
-+  list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
-+  // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
-+  string regstring = "{{$" # !head(reg_names)
-+                           # !foldl("", !tail(reg_names), a, b,
-+                                    !strconcat(a, ", $", b))
-+                     # "}}";
-+
-+  // Predicates for particular fragment variant. Technically those are
-+  // per-instruction predicates, but currently all fragments that can be used in
-+  // a given instruction are subject to the same constraints, so an instruction
-+  // can use predicates from any of its fragments. If/when this is no
-+  // longer the case, we can concat all per-fragment predicates to enforce that
-+  // all fragments of the instruction are viable.
-+  list<Predicate> Predicates = !cond(
-+    // fp16 -> fp16/fp32 @ m16n16k16
-+    !and(!eq(Geom, "m16n16k16"),
-+         !or(!eq(PtxEltType, "f16"),
-+             !eq(PtxEltType, "f32"))) : [hasSM70, hasPTX60],
-+
-+    // fp16 -> fp16/fp32 @ m8n32k16/m32n8k16
-+    !and(!or(!eq(Geom, "m8n32k16"),
-+             !eq(Geom, "m32n8k16")),
-+         !or(!eq(PtxEltType, "f16"),
-+             !eq(PtxEltType, "f32"))) : [hasSM70, hasPTX61]);
-+
-+  // template DAGs for instruction inputs/output.
-+  dag Outs = !dag(outs, ptx_regs, reg_names);
-+  dag Ins = !dag(ins, ptx_regs, reg_names);
-+}
- 
-+class BuildPattern<dag Outs, PatFrag IntrMatcher, dag Ins> {
-   // Build a dag pattern that matches the intrinsic call.
-   // We want a dag that looks like this:
-   // (set <output args>, (intrinsic <input arguments>)) where input and
-@@ -7431,277 +7459,127 @@ class WMMA_LOAD_GALSTOS<string Geometry, string Abc, string Layout,
-                               !subst(ins, IntrMatcher, tmp)))));
-   // Finally, consatenate both parts together. !con() requires both dags to have
-   // the same operator, so we wrap PatArgs in a (set ...) dag.
--  let Pattern = [!con(PatOuts, (set PatArgs))];
--  let OutOperandList = Outs;
--  let InOperandList = Ins;
--  let AsmString = "wmma.load."
--                  # Abc
--                  # ".sync"
--                  # "." # Layout
--                  # "." # Geometry
--                  # Space
--                  # "." # Type # " \t"
--                  # !if(!eq(Abc#Type, "cf16"),
--                        "{{$r0, $r1, $r2, $r3}}",
--                        "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
--                  # ", [$src]"
--                  # !if(WithStride, ", $ldm", "")
--                  # ";";
-+  dag ret = !con(PatOuts, (set PatArgs));
- }
- 
--class WMMA_LOAD_INTR_HELPER<string Geometry, string Abc, string Layout,
--                            string Space, string Type, bit WithStride>
-+//
-+// wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
-+//
-+
-+class WMMA_LOAD_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
-+                            bit WithStride>
-                            : PatFrag <(ops),(ops)> {
-   // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma"
--                                    # "_" # Geometry # "_load_"
--                                    # Abc # "_" # Type # "_" # Layout
--                                    # !if(WithStride,"_stride", ""));
--  code match_generic = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
--  }];
--  code match_shared = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
--  }];
--  code match_global = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
--  }];
--
-+  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"load", Frag, Layout,
-+                                                   WithStride>.record);
-   let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
-   let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
--                      !if(!eq(Space, ".global"), match_global, match_generic));
--}
--
--multiclass WMMA_LOAD_GALSTS<string Geometry, string Abc, string Layout,
--                            string Space, string Type, NVPTXRegClass regclass,
--                            bit WithStride> {
--  def _avar:  WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                imem, WithStride>;
--  def _areg: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                Int32Regs, WithStride>;
--  def _areg64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                Int64Regs, WithStride>;
--  def _ari: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                MEMri, WithStride>;
--  def _ari64: WMMA_LOAD_GALSTOS<Geometry, Abc, Layout, Space, Type, regclass,
--                                MEMri64, WithStride>;
-+  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
-+                            !eq(Space, ".global"): AS_match.global,
-+                            1: AS_match.generic);
- }
- 
--multiclass WMMA_LOAD_GALSTSh<string Geometry, string Abc, string Layout,
--                             string Space, string Type, NVPTXRegClass regclass,
--                             bit WithStride> {
--  // Define a PatFrag that matches appropriate intrinsic that loads from the
--  // given address space.
--  def _Intr:  WMMA_LOAD_INTR_HELPER<Geometry, Abc, Layout, Space, Type,
--                                    WithStride>;
--  defm NAME:  WMMA_LOAD_GALSTS<Geometry, Abc, Layout, Space, Type, regclass,
--                               WithStride>;
--}
--
--multiclass WMMA_LOAD_GALST<string Geometry, string Abc, string Layout,
--                           string Space, string Type, NVPTXRegClass regclass> {
--  defm _stride: WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 1>;
--  defm NAME:    WMMA_LOAD_GALSTSh<Geometry, Abc, Layout, Space, Type, regclass, 0>;
--}
--
--multiclass WMMA_LOAD_GALT<string Geometry, string Abc, string Layout,
--                          string Type, NVPTXRegClass regclass> {
--  defm _global: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".global",
--                                Type, regclass>;
--  defm _shared: WMMA_LOAD_GALST<Geometry, Abc, Layout, ".shared",
--                                Type, regclass>;
--  defm NAME:    WMMA_LOAD_GALST<Geometry, Abc, Layout,        "",
--                                Type, regclass>;
--}
--
--multiclass WMMA_LOAD_GAT<string Geometry, string Abc,
--                         string Type, NVPTXRegClass regclass> {
--  defm _row: WMMA_LOAD_GALT<Geometry, Abc, "row", Type, regclass>;
--  defm _col: WMMA_LOAD_GALT<Geometry, Abc, "col", Type, regclass>;
--}
-+class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
-+                DAGOperand SrcOp>
-+  : EmptyNVPTXInst,
-+    Requires<Frag.Predicates> {
-+  // Pattern that matches the intrinsic for this instruction variant.
-+  PatFrag IntrMatcher = WMMA_LOAD_INTR_HELPER<Frag, Layout, Space, WithStride>;
-+  dag Ins = !con((ins SrcOp:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)));
- 
--multiclass WMMA_LOAD_G<string Geometry> {
--  defm _load_a: WMMA_LOAD_GAT<Geometry, "a", "f16", Float16x2Regs>;
--  defm _load_b: WMMA_LOAD_GAT<Geometry, "b", "f16", Float16x2Regs>;
--  defm _load_c_f16: WMMA_LOAD_GAT<Geometry, "c", "f16", Float16x2Regs>;
--  defm _load_c_f32: WMMA_LOAD_GAT<Geometry, "c", "f32", Float32Regs>;
-+  let Pattern = [BuildPattern<Frag.Outs, IntrMatcher, Ins>.ret];
-+  let OutOperandList = Frag.Outs;
-+  let InOperandList = Ins;
-+  let AsmString = "wmma.load."
-+                  # Frag.frag
-+                  # ".sync"
-+                  # "." # Layout
-+                  # "." # Frag.geom
-+                  # Space
-+                  # "." # Frag.ptx_elt_type # " \t"
-+                  # Frag.regstring
-+                  # ", [$src]"
-+                  # !if(WithStride, ", $ldm", "")
-+                  # ";";
- }
- 
--defm INT_WMMA_m32n8k16: WMMA_LOAD_G<"m32n8k16">;
--defm INT_WMMA_m16n16k16: WMMA_LOAD_G<"m16n16k16">;
--defm INT_WMMA_m8n32k16: WMMA_LOAD_G<"m8n32k16">;
--
- //
- // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
- //
--class WMMA_STORE_D_GLSTSO<string Geometry, string Layout, string Space,
--                          string Type, NVPTXRegClass regclass,
--                          bit WithStride, DAGOperand DstOp>
-+class WMMA_STORE_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
-+                             bit WithStride>
-+                            : PatFrag <(ops),(ops)> {
-+  // Intrinsic that matches this instruction.
-+  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"store", Frag, Layout,
-+                                                   WithStride>.record);
-+  let Operands = !con((ops node:$dst),
-+                      !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
-+                      !if(WithStride, (ops node:$ldm), (ops)));
-+  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
-+  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
-+                            !eq(Space, ".global"): AS_match.global,
-+                            1: AS_match.generic);
-+}
-+
-+class WMMA_STORE<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
-+                 DAGOperand DstOp>
-   : EmptyNVPTXInst,
--    Requires<[!if(!eq(Geometry, "m16n16k16"),
--                  hasPTX60,
--                  hasPTX61),
--              hasSM70]> {
--  PatFrag IntrMatcher = !cast<PatFrag>("INT_WMMA"
--                                       # "_" # Geometry # "_store_d"
--                                       # "_" # Type
--                                       # "_" # Layout
--                                       # !subst(".", "_", Space)
--                                       # !if(WithStride,"_stride", "")
--                                       # "_Intr");
--  dag InsR03 = (ins DstOp:$src, regclass:$r0, regclass:$r1,
--                                regclass:$r2, regclass:$r3);
--  dag InsR47 = (ins regclass:$r4, regclass:$r5,
--                    regclass:$r6, regclass:$r7);
--  dag InsR = !if(!eq(Type,"f16"), InsR03, !con(InsR03, InsR47));
--  dag StrideArg = !if(WithStride, (ins Int32Regs:$ldm), (ins));
--  dag Ins = !con(InsR, StrideArg);
--
--  // Construct the pattern to match corresponding intrinsic call. See the
--  // details in the comments in WMMA_LOAD_ALSTOS.
--  dag PatArgs = !foreach(tmp, Ins,
--                              !subst(imem, ADDRvar,
--                              !subst(MEMri64, ADDRri64,
--                              !subst(MEMri, ADDRri,
--                              !subst(ins, IntrMatcher, tmp)))));
--  let Pattern = [PatArgs];
-+    Requires<Frag.Predicates> {
-+  PatFrag IntrMatcher = WMMA_STORE_INTR_HELPER<Frag, Layout, Space, WithStride>;
-+  dag Ins = !con((ins DstOp:$src),
-+                 Frag.Ins,
-+                 !if(WithStride, (ins Int32Regs:$ldm), (ins)));
-+  let Pattern = [BuildPattern<(set), IntrMatcher, Ins>.ret];
-   let OutOperandList = (outs);
-   let InOperandList = Ins;
-   let AsmString = "wmma.store.d.sync."
-                   # Layout
--                  # "." # Geometry
-+                  # "." # Frag.geom
-                   # Space
--                  # "." # Type
-+                  # "." # Frag.ptx_elt_type
-                   # " \t[$src],"
--                  # !if(!eq(Type,"f16"),
--                        "{{$r0, $r1, $r2, $r3}}",
--                        "{{$r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7}}")
-+                  # Frag.regstring
-                   # !if(WithStride, ", $ldm", "")
-                   # ";";
--
- }
- 
--class WMMA_STORE_INTR_HELPER<string Geometry, string Layout, string Space,
--                             string Type, bit WithStride>
--                            : PatFrag <(ops),(ops)> {
--  // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
--                                    # Geometry
--                                    # "_store_d"
--                                    # "_" # Type
--                                    # "_" # Layout
--                                    # !if(WithStride, "_stride", ""));
--  code match_generic = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GENERIC);
--  }];
--  code match_shared = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_SHARED);
--  }];
--  code match_global = [{
--   return ChkMemSDNodeAddressSpace(N, llvm::ADDRESS_SPACE_GLOBAL);
--  }];
--
--  dag Args = !if(!eq(Type,"f16"),
--                 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3),
--                 (ops node:$dst, node:$r0, node:$r1, node:$r2, node:$r3,
--                                 node:$r4, node:$r5, node:$r6, node:$r7));
--  dag StrideArg = !if(WithStride, (ops node:$ldm), (ops));
--  let Operands = !con(Args, StrideArg);
--  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !if(!eq(Space, ".shared"), match_shared,
--                      !if(!eq(Space, ".global"), match_global, match_generic));
--}
--
--multiclass WMMA_STORE_D_GLSTS<string Geometry, string Layout, string Space,
--                              string Type, NVPTXRegClass regclass,
--                              bit WithStride> {
--  def _avar:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, imem>;
--  def _areg:   WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, Int32Regs>;
--  def _areg64: WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, Int64Regs>;
--  def _ari:    WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, MEMri>;
--  def _ari64:  WMMA_STORE_D_GLSTSO<Geometry, Layout, Space, Type, regclass,
--                                   WithStride, MEMri64>;
--}
--
--multiclass WMMA_STORE_D_GLSTSh<string Geometry, string Layout, string Space,
--                               string Type, NVPTXRegClass regclass,
--                               bit WithStride> {
--  // Define a PatFrag that matches appropriate intrinsic that loads from the
--  // given address space.
--  def _Intr:    WMMA_STORE_INTR_HELPER<Geometry, Layout, Space, Type,
--                                       WithStride>;
--  defm NAME:    WMMA_STORE_D_GLSTS<Geometry, Layout, Space, Type, regclass,
--                                   WithStride>;
--}
--
--multiclass WMMA_STORE_D_GLST<string Geometry, string Layout, string Space,
--                             string Type, NVPTXRegClass regclass > {
--  defm _stride: WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 1>;
--  defm NAME:    WMMA_STORE_D_GLSTSh<Geometry, Layout, Space, Type, regclass, 0>;
--}
--
--multiclass WMMA_STORE_D_GLT<string Geometry, string Layout,
--                           string Type, NVPTXRegClass regclass> {
--  defm _global: WMMA_STORE_D_GLST<Geometry, Layout, ".global", Type, regclass>;
--  defm _shared: WMMA_STORE_D_GLST<Geometry, Layout, ".shared", Type, regclass>;
--  defm NAME:    WMMA_STORE_D_GLST<Geometry, Layout,        "", Type, regclass>;
--}
--
--multiclass WMMA_STORE_D_GT<string Geometry, string Type,
--                           NVPTXRegClass regclass> {
--  defm _row:    WMMA_STORE_D_GLT<Geometry, "row", Type, regclass>;
--  defm _col:    WMMA_STORE_D_GLT<Geometry, "col", Type, regclass>;
--}
--
--multiclass WMMA_STORE_D_G<string Geometry> {
--  defm _store_d_f16: WMMA_STORE_D_GT<Geometry, "f16", Float16x2Regs>;
--  defm _store_d_f32: WMMA_STORE_D_GT<Geometry, "f32", Float32Regs>;
--}
--
--defm INT_WMMA_m32n8k16: WMMA_STORE_D_G<"m32n8k16">;
--defm INT_WMMA_m16n16k16: WMMA_STORE_D_G<"m16n16k16">;
--defm INT_WMMA_m8n32k16: WMMA_STORE_D_G<"m8n32k16">;
-+// Create all load/store variants
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout = ["row", "col"] in {
-+    foreach stride = [0, 1] in {
-+      foreach space = [".global", ".shared", ""] in {
-+        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
-+          foreach frag = [WMMA_REGINFO<geom, "a", "f16">,
-+                          WMMA_REGINFO<geom, "b", "f16">,
-+                          WMMA_REGINFO<geom, "c", "f16">,
-+                          WMMA_REGINFO<geom, "c", "f32">] in {
-+              def : WMMA_LOAD<frag, layout, space, stride, addr>;
-+          }
-+          foreach frag = [WMMA_REGINFO<geom, "d", "f16">,
-+                          WMMA_REGINFO<geom, "d", "f32">] in {
-+              def : WMMA_STORE<frag, layout, space, stride, addr>;
-+          }
-+        } // addr
-+      } // space
-+    } // stride
-+  } // layout
-+} // geom
- 
- // WMMA.MMA
--class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
--                     string DType, NVPTXRegClass d_reg,
--                     string CType, NVPTXRegClass c_reg,
--                     NVPTXRegClass ab_reg,
--                     string Satfinite = "">
-+class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
-+               WMMA_REGINFO FragC, WMMA_REGINFO FragD,
-+               string ALayout, string BLayout, int Satfinite>
-   : EmptyNVPTXInst,
--    Requires<[!if(!eq(Geometry, "m16n16k16"),
--                  hasPTX60,
--                  hasPTX61),
--              hasSM70]> {
--  Intrinsic Intr = !cast<Intrinsic>("int_nvvm_wmma_"
--                                    # Geometry
--                                    # "_mma"
--                                    # "_" # ALayout
--                                    # "_" # BLayout
--                                    # "_" # DType
--                                    # "_" # CType
--                                    # !subst(".", "_", Satfinite));
--  dag Outs = !if(!eq(DType,"f16"),
--                 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3),
--                 (outs d_reg:$d0, d_reg:$d1, d_reg:$d2, d_reg:$d3,
--                       d_reg:$d4, d_reg:$d5, d_reg:$d6, d_reg:$d7));
--  dag InsExtraCArgs = !if(!eq(CType,"f16"),
--                          (ins),
--                          (ins c_reg:$c4,  c_reg:$c5,  c_reg:$c6,  c_reg:$c7));
--  dag Ins = !con((ins ab_reg:$a0, ab_reg:$a1, ab_reg:$a2, ab_reg:$a3,
--                      ab_reg:$a4, ab_reg:$a5, ab_reg:$a6, ab_reg:$a7,
--                      ab_reg:$b0, ab_reg:$b1, ab_reg:$b2, ab_reg:$b3,
--                      ab_reg:$b4, ab_reg:$b5, ab_reg:$b6, ab_reg:$b7,
--                      c_reg:$c0,  c_reg:$c1,  c_reg:$c2,  c_reg:$c3),
--                  InsExtraCArgs);
--
--  // Construct the pattern to match corresponding intrinsic call. See the
--  // details in the comments in WMMA_LOAD_ALSTOS.
-+    Requires<FragC.Predicates> {
-+  //Intrinsic Intr = int_nvvm_suld_1d_v4i32_zero;
-+  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_MMA<ALayout, BLayout, FragC, FragD, Satfinite>.record);
-+  dag Outs = FragD.Outs;
-+  dag Ins = !con(FragA.Ins,
-+                 FragB.Ins,
-+                 FragC.Ins);
-+
-+  // Construct the pattern to match corresponding intrinsic call.
-+  // mma does not load/store anything, so we don't need complex operand matching here.
-   dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
-   dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
-   let Pattern = [!con(PatOuts, (set PatArgs))];
-@@ -7710,54 +7588,30 @@ class WMMA_MMA_GABDCS<string Geometry, string ALayout, string BLayout,
-   let AsmString = "wmma.mma.sync."
-                   # ALayout
-                   # "." # BLayout
--                  # "." # Geometry
--                  # "." # DType
--                  # "." # CType
--                  # Satfinite # "\n\t\t"
--                  # !if(!eq(DType,"f16"),
--                        "{{$d0, $d1, $d2, $d3}}, \n\t\t",
--                        "{{$d0, $d1, $d2, $d3, $d4, $d5, $d6, $d7}},\n\t\t")
--                  # "{{$a0, $a1, $a2, $a3, $a4, $a5, $a6, $a7}},\n\t\t"
--                  # "{{$b0, $b1, $b2, $b3, $b4, $b5, $b6, $b7}},\n\t\t"
--                  # !if(!eq(CType,"f16"),
--                        "{{$c0, $c1, $c2, $c3}};",
--                        "{{$c0, $c1, $c2, $c3, $c4, $c5, $c6, $c7}};");
--}
--
--multiclass WMMA_MMA_GABDC<string Geometry, string ALayout, string BLayout,
--                         string DType, NVPTXRegClass d_reg,
--                         string CType, NVPTXRegClass c_reg> {
--  def _satfinite: WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                 DType, d_reg, CType, c_reg,
--                                 Float16x2Regs, ".satfinite">;
--  def NAME:       WMMA_MMA_GABDCS<Geometry, ALayout, BLayout,
--                                 DType, d_reg, CType, c_reg,
--                                 Float16x2Regs>;
--}
--
--multiclass WMMA_MMA_GABD<string Geometry, string ALayout, string BLayout,
--                        string DType, NVPTXRegClass d_reg> {
--  defm _f16: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
--                            "f16", Float16x2Regs>;
--  defm _f32: WMMA_MMA_GABDC<Geometry, ALayout, BLayout, DType, d_reg,
--                            "f32", Float32Regs>;
--}
--
--multiclass WMMA_MMA_GAB<string Geometry, string ALayout, string BLayout> {
--  defm _f16: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f16", Float16x2Regs>;
--  defm _f32: WMMA_MMA_GABD<Geometry, ALayout, BLayout, "f32", Float32Regs>;
--}
--
--multiclass WMMA_MMA_GA<string Geometry, string ALayout> {
--  defm _col: WMMA_MMA_GAB<Geometry, ALayout, "col">;
--  defm _row: WMMA_MMA_GAB<Geometry, ALayout, "row">;
--}
--
--multiclass WMMA_MMA_G<string Geometry> {
--  defm _col: WMMA_MMA_GA<Geometry, "col">;
--  defm _row: WMMA_MMA_GA<Geometry, "row">;
-+                  # "." # FragA.geom
-+                  # "." # FragD.ptx_elt_type
-+                  # "." # FragC.ptx_elt_type
-+                  # !if(Satfinite, ".satfinite", "") # "\n\t\t"
-+                  # FragD.regstring # ",\n\t\t"
-+                  # FragA.regstring # ",\n\t\t"
-+                  # FragB.regstring # ",\n\t\t"
-+                  # FragC.regstring # ";";
- }
- 
--defm INT_WMMA_MMA_m32n8k16 : WMMA_MMA_G<"m32n8k16">;
--defm INT_WMMA_MMA_m16n16k16 : WMMA_MMA_G<"m16n16k16">;
--defm INT_WMMA_MMA_m8n32k16 : WMMA_MMA_G<"m8n32k16">;
-+foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+  foreach layout_a = ["row", "col"] in {
-+    foreach layout_b = ["row", "col"] in {
-+      foreach frag_c = [WMMA_REGINFO<geom, "c", "f16">,
-+                        WMMA_REGINFO<geom, "c", "f32">] in {
-+        foreach frag_d = [WMMA_REGINFO<geom, "d", "f16">,
-+                          WMMA_REGINFO<geom, "d", "f32">] in {
-+          foreach satf = [0, 1] in {
-+            def : WMMA_MMA<WMMA_REGINFO<geom, "a", "f16">,
-+                           WMMA_REGINFO<geom, "b", "f16">,
-+                           frag_c, frag_d, layout_a, layout_b, satf>;
-+          } // satf
-+        } // frag_d
-+      } // frag_c
-+    } // layout_b
-+  } // layout_a
-+} // geom
--- 
-2.17.1
-
diff --git a/deps/patches/llvm-8.0-D59393-mma-ptx63-fix.patch b/deps/patches/llvm-8.0-D59393-mma-ptx63-fix.patch
deleted file mode 100644
index 10e6cd7b6f98db..00000000000000
--- a/deps/patches/llvm-8.0-D59393-mma-ptx63-fix.patch
+++ /dev/null
@@ -1,510 +0,0 @@
-From be924be7f9e699775fe7690d4b421bebfed73aa9 Mon Sep 17 00:00:00 2001
-From: Artem Belevich <tra@google.com>
-Date: Thu, 25 Apr 2019 22:27:46 +0000
-Subject: [PATCH] [NVPTX] generate correct MMA instruction mnemonics with
- PTX63+.
-
-PTX 6.3 requires using ".aligned" in the MMA instruction names.
-In order to generate correct name, now we pass current
-PTX version to each instruction as an extra constant operand
-and InstPrinter adjusts its output accordingly.
-
-Differential Revision: https://reviews.llvm.org/D59393
-
-llvm-svn: 359246
----
- .../NVPTX/InstPrinter/NVPTXInstPrinter.cpp    |  14 +
- .../NVPTX/InstPrinter/NVPTXInstPrinter.h      |   2 +
- lib/Target/NVPTX/NVPTXInstrInfo.td            |   4 +
- lib/Target/NVPTX/NVPTXIntrinsics.td           | 279 ++++++++++--------
- test/CodeGen/NVPTX/wmma.py                    |  17 +-
- 5 files changed, 184 insertions(+), 132 deletions(-)
-
-diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
-index b774fe169d7..6fb577d5499 100644
---- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
-+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp
-@@ -270,6 +270,20 @@ void NVPTXInstPrinter::printLdStCode(const MCInst *MI, int OpNum,
-     llvm_unreachable("Empty Modifier");
- }
- 
-+void NVPTXInstPrinter::printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
-+                                    const char *Modifier) {
-+  const MCOperand &MO = MI->getOperand(OpNum);
-+  int Imm = (int)MO.getImm();
-+  if (Modifier == nullptr || strcmp(Modifier, "version") == 0) {
-+    O << Imm; // Just print out PTX version
-+  } else if (strcmp(Modifier, "aligned") == 0) {
-+    // PTX63 requires '.aligned' in the name of the instruction.
-+    if (Imm >= 63)
-+      O << ".aligned";
-+  } else
-+    llvm_unreachable("Unknown Modifier");
-+}
-+
- void NVPTXInstPrinter::printMemOperand(const MCInst *MI, int OpNum,
-                                        raw_ostream &O, const char *Modifier) {
-   printOperand(MI, OpNum, O);
-diff --git a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
-index f0f223aa057..588439137f9 100644
---- a/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
-+++ b/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h
-@@ -41,6 +41,8 @@ public:
-                     const char *Modifier = nullptr);
-   void printLdStCode(const MCInst *MI, int OpNum,
-                      raw_ostream &O, const char *Modifier = nullptr);
-+  void printMmaCode(const MCInst *MI, int OpNum, raw_ostream &O,
-+                    const char *Modifier = nullptr);
-   void printMemOperand(const MCInst *MI, int OpNum,
-                        raw_ostream &O, const char *Modifier = nullptr);
-   void printProtoIdent(const MCInst *MI, int OpNum,
-diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
-index 02a40b9f526..603d3212395 100644
---- a/lib/Target/NVPTX/NVPTXInstrInfo.td
-+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
-@@ -1549,6 +1549,10 @@ def LdStCode : Operand<i32> {
-   let PrintMethod = "printLdStCode";
- }
- 
-+def MmaCode : Operand<i32> {
-+  let PrintMethod = "printMmaCode";
-+}
-+
- def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
- def Wrapper    : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
- 
-diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
-index b9a67ba5ed3..5cd534914f7 100644
---- a/lib/Target/NVPTX/NVPTXIntrinsics.td
-+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
-@@ -39,6 +39,24 @@ def AS_match {
-   }];
- }
- 
-+// A node that will be replaced with the current PTX version.
-+class PTX {
-+  SDNodeXForm PTXVerXform = SDNodeXForm<imm, [{
-+    return getI32Imm(Subtarget->getPTXVersion(), SDLoc(N));
-+  }]>;
-+  // (i32 0) will be XForm'ed to the currently used PTX version.
-+  dag version = (PTXVerXform (i32 0));
-+}
-+def ptx : PTX;
-+
-+// Generates list of n sequential register names.
-+// E.g. RegNames<3,"r">.ret -> ["r0", "r1", "r2" ]
-+class RegSeq<int n, string prefix> {
-+  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
-+                                        [prefix # !add(n, -1)]),
-+                            []);
-+}
-+
- //-----------------------------------
- // Synchronization and shuffle functions
- //-----------------------------------
-@@ -7385,14 +7403,6 @@ def INT_PTX_SREG_WARPSIZE :
-     NVPTXInst<(outs Int32Regs:$dst), (ins), "mov.u32 \t$dst, WARP_SZ;",
-               [(set Int32Regs:$dst, (int_nvvm_read_ptx_sreg_warpsize))]>;
- 
--class EmptyNVPTXInst : NVPTXInst<(outs), (ins), "?", []>;
--// Generates list of n sequential register names.
--class RegSeq<int n, string prefix> {
--  list<string> ret = !if(n, !listconcat(RegSeq<!add(n,-1), prefix>.ret,
--                                        [prefix # !add(n, -1)]),
--                            []);
--}
--
- // Helper class that represents a 'fragment' of an NVPTX *MMA instruction.
- // In addition to target-independent fields provided by WMMA_REGS, it adds
- // the fields commonly used to implement specific PTX instruction -- register
-@@ -7409,6 +7419,7 @@ class WMMA_REGINFO<string Geom, string Frag, string PtxEltType>
- 
-   // List of register names for the fragment -- ["ra0", "ra1",...]
-   list<string> reg_names = RegSeq<!size(ptx_regs), "r"#frag>.ret;
-+
-   // Generates "{{$r0, $r1,.... $rN-1}}" for use in asm string construction.
-   string regstring = "{{$" # !head(reg_names)
-                            # !foldl("", !tail(reg_names), a, b,
-@@ -7438,61 +7449,65 @@ class WMMA_REGINFO<string Geom, string Frag, string PtxEltType>
-   dag Ins = !dag(ins, ptx_regs, reg_names);
- }
- 
--class BuildPattern<dag Outs, PatFrag IntrMatcher, dag Ins> {
-+// Convert dag of arguments into a dag to match given intrinsic.
-+class BuildPatternI<Intrinsic Intr, dag Ins> {
-+  // Build a dag pattern that matches the intrinsic call.
-+  dag ret = !foreach(tmp, Ins,
-+                          !subst(imem, ADDRvar,
-+                          !subst(MEMri64, ADDRri64,
-+                          !subst(MEMri, ADDRri,
-+                          !subst(ins, Intr, tmp)))));
-+}
-+
-+// Same as above, but uses PatFrag instead of an Intrinsic.
-+class BuildPatternPF<PatFrag Intr, dag Ins> {
-   // Build a dag pattern that matches the intrinsic call.
--  // We want a dag that looks like this:
--  // (set <output args>, (intrinsic <input arguments>)) where input and
--  // output arguments are named patterns that would match corresponding
--  // input/output arguments of the instruction.
--  //
--  // First we construct (set <output arguments>) from instruction's outs dag by
--  // replacing dag operator 'outs' with 'set'.
--  dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
--  // Similarly, construct (intrinsic <input arguments>) sub-dag from
--  // instruction's input arguments, only now we also need to replace operands
--  // with patterns that would match them and the operator 'ins' with the
--  // intrinsic.
--  dag PatArgs = !foreach(tmp, Ins,
--                              !subst(imem, ADDRvar,
--                              !subst(MEMri64, ADDRri64,
--                              !subst(MEMri, ADDRri,
--                              !subst(ins, IntrMatcher, tmp)))));
--  // Finally, consatenate both parts together. !con() requires both dags to have
--  // the same operator, so we wrap PatArgs in a (set ...) dag.
--  dag ret = !con(PatOuts, (set PatArgs));
-+  dag ret = !foreach(tmp, Ins,
-+                          !subst(imem, ADDRvar,
-+                          !subst(MEMri64, ADDRri64,
-+                          !subst(MEMri, ADDRri,
-+                          !subst(ins, Intr, tmp)))));
-+}
-+
-+// Common WMMA-related fields used for building patterns for all MMA instructions.
-+class WMMA_INSTR<string _Intr, list<dag> _Args>
-+  : NVPTXInst<(outs), (ins), "?", []> {
-+  Intrinsic Intr = !cast<Intrinsic>(_Intr);
-+  // Concatenate all arguments into a single dag.
-+  dag Args = !foldl((ins), _Args, a, b, !con(a,b));
-+  // Pre-build the pattern to match (intrinsic arg0, arg1, ...).
-+  dag IntrinsicPattern = BuildPatternI<!cast<Intrinsic>(Intr), Args>.ret;
- }
- 
- //
- // wmma.load.[a|b|c].sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
- //
- 
--class WMMA_LOAD_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
--                            bit WithStride>
--                           : PatFrag <(ops),(ops)> {
--  // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"load", Frag, Layout,
--                                                   WithStride>.record);
--  let Operands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
--  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
--                            !eq(Space, ".global"): AS_match.global,
--                            1: AS_match.generic);
--}
--
- class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
-                 DAGOperand SrcOp>
--  : EmptyNVPTXInst,
-+  : WMMA_INSTR<WMMA_NAME_LDST<"load", Frag, Layout, WithStride>.record,
-+                              [!con((ins SrcOp:$src),
-+                                    !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
-     Requires<Frag.Predicates> {
--  // Pattern that matches the intrinsic for this instruction variant.
--  PatFrag IntrMatcher = WMMA_LOAD_INTR_HELPER<Frag, Layout, Space, WithStride>;
--  dag Ins = !con((ins SrcOp:$src), !if(WithStride, (ins Int32Regs:$ldm), (ins)));
-+  // Load/store intrinsics are overloaded on pointer's address space.
-+  // To match the right intrinsic, we need to build AS-constrained PatFrag.
-+  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
-+  dag PFOperands = !if(WithStride, (ops node:$src, node:$ldm), (ops node:$src));
-+  // Build PatFrag that only matches particular address space.
-+  PatFrag IntrFrag = PatFrag<PFOperands,
-+                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
-+                             !cond(!eq(Space, ".shared"): AS_match.shared,
-+                                   !eq(Space, ".global"): AS_match.global,
-+                                   1: AS_match.generic)>;
-+  // Build AS-constrained pattern.
-+  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
- 
--  let Pattern = [BuildPattern<Frag.Outs, IntrMatcher, Ins>.ret];
-   let OutOperandList = Frag.Outs;
--  let InOperandList = Ins;
-+  let InOperandList = !con(Args, (ins MmaCode:$ptx));
-   let AsmString = "wmma.load."
-                   # Frag.frag
-                   # ".sync"
-+                  # "${ptx:aligned}"
-                   # "." # Layout
-                   # "." # Frag.geom
-                   # Space
-@@ -7506,87 +7521,79 @@ class WMMA_LOAD<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
- //
- // wmma.store.d.sync.[row|col].m16n16k16[|.global|.shared].[f16|f32]
- //
--class WMMA_STORE_INTR_HELPER<WMMA_REGINFO Frag, string Layout, string Space,
--                             bit WithStride>
--                            : PatFrag <(ops),(ops)> {
--  // Intrinsic that matches this instruction.
--  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_LDST<"store", Frag, Layout,
--                                                   WithStride>.record);
--  let Operands = !con((ops node:$dst),
--                      !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
--                      !if(WithStride, (ops node:$ldm), (ops)));
--  let Fragments = [!foreach(tmp, Operands, !subst(ops, Intr, tmp))];
--  let PredicateCode = !cond(!eq(Space, ".shared"): AS_match.shared,
--                            !eq(Space, ".global"): AS_match.global,
--                            1: AS_match.generic);
--}
--
--class WMMA_STORE<WMMA_REGINFO Frag, string Layout, string Space, bit WithStride,
--                 DAGOperand DstOp>
--  : EmptyNVPTXInst,
-+class WMMA_STORE_D<WMMA_REGINFO Frag, string Layout, string Space,
-+                   bit WithStride, DAGOperand DstOp>
-+  : WMMA_INSTR<WMMA_NAME_LDST<"store", Frag, Layout, WithStride>.record,
-+               [!con((ins DstOp:$dst),
-+                     Frag.Ins,
-+                     !if(WithStride, (ins Int32Regs:$ldm), (ins)))]>,
-     Requires<Frag.Predicates> {
--  PatFrag IntrMatcher = WMMA_STORE_INTR_HELPER<Frag, Layout, Space, WithStride>;
--  dag Ins = !con((ins DstOp:$src),
--                 Frag.Ins,
--                 !if(WithStride, (ins Int32Regs:$ldm), (ins)));
--  let Pattern = [BuildPattern<(set), IntrMatcher, Ins>.ret];
-+
-+  // Load/store intrinsics are overloaded on pointer's address space.
-+  // To match the right intrinsic, we need to build AS-constrained PatFrag.
-+  // Operands is a dag equivalent in shape to Args, but using (ops node:$name, .....).
-+  dag PFOperands = !con((ops node:$dst),
-+                        !dag(ops, !foreach(tmp, Frag.regs, node), Frag.reg_names),
-+                        !if(WithStride, (ops node:$ldm), (ops)));
-+  // Build PatFrag that only matches particular address space.
-+  PatFrag IntrFrag = PatFrag<PFOperands,
-+                             !foreach(tmp, PFOperands, !subst(ops, Intr, tmp)),
-+                             !cond(!eq(Space, ".shared"): AS_match.shared,
-+                                   !eq(Space, ".global"): AS_match.global,
-+                                   1: AS_match.generic)>;
-+  // Build AS-constrained pattern.
-+  let IntrinsicPattern = BuildPatternPF<IntrFrag, Args>.ret;
-+
-+  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
-   let OutOperandList = (outs);
--  let InOperandList = Ins;
--  let AsmString = "wmma.store.d.sync."
--                  # Layout
-+  let AsmString = "wmma.store.d.sync"
-+                  # "${ptx:aligned}"
-+                  # "." # Layout
-                   # "." # Frag.geom
-                   # Space
-                   # "." # Frag.ptx_elt_type
--                  # " \t[$src],"
-+                  # " \t[$dst],"
-                   # Frag.regstring
-                   # !if(WithStride, ", $ldm", "")
-                   # ";";
- }
- 
- // Create all load/store variants
--foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
--  foreach layout = ["row", "col"] in {
--    foreach stride = [0, 1] in {
--      foreach space = [".global", ".shared", ""] in {
--        foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
--          foreach frag = [WMMA_REGINFO<geom, "a", "f16">,
--                          WMMA_REGINFO<geom, "b", "f16">,
--                          WMMA_REGINFO<geom, "c", "f16">,
--                          WMMA_REGINFO<geom, "c", "f32">] in {
--              def : WMMA_LOAD<frag, layout, space, stride, addr>;
--          }
--          foreach frag = [WMMA_REGINFO<geom, "d", "f16">,
--                          WMMA_REGINFO<geom, "d", "f32">] in {
--              def : WMMA_STORE<frag, layout, space, stride, addr>;
--          }
--        } // addr
--      } // space
--    } // stride
--  } // layout
--} // geom
-+defset list<WMMA_INSTR> MMA_LDSTs  = {
-+  foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+    foreach layout = ["row", "col"] in {
-+      foreach stride = [0, 1] in {
-+        foreach space = [".global", ".shared", ""] in {
-+          foreach addr = [imem, Int32Regs, Int64Regs, MEMri, MEMri64] in {
-+            foreach frag = [WMMA_REGINFO<geom, "a", "f16">,
-+                            WMMA_REGINFO<geom, "b", "f16">,
-+                            WMMA_REGINFO<geom, "c", "f16">,
-+                            WMMA_REGINFO<geom, "c", "f32">] in {
-+                def : WMMA_LOAD<frag, layout, space, stride, addr>;
-+            }
-+            foreach frag = [WMMA_REGINFO<geom, "d", "f16">,
-+                            WMMA_REGINFO<geom, "d", "f32">] in {
-+                def : WMMA_STORE_D<frag, layout, space, stride, addr>;
-+            }
-+          } // addr
-+        } // space
-+      } // stride
-+    } // layout
-+  } // geom
-+} // defset
- 
- // WMMA.MMA
- class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
-                WMMA_REGINFO FragC, WMMA_REGINFO FragD,
-                string ALayout, string BLayout, int Satfinite>
--  : EmptyNVPTXInst,
-+  : WMMA_INSTR<WMMA_NAME_MMA<ALayout, BLayout, FragC, FragD, Satfinite>.record,
-+                             [FragA.Ins, FragB.Ins, FragC.Ins]>,
-     Requires<FragC.Predicates> {
--  //Intrinsic Intr = int_nvvm_suld_1d_v4i32_zero;
--  Intrinsic Intr = !cast<Intrinsic>(WMMA_NAME_MMA<ALayout, BLayout, FragC, FragD, Satfinite>.record);
--  dag Outs = FragD.Outs;
--  dag Ins = !con(FragA.Ins,
--                 FragB.Ins,
--                 FragC.Ins);
--
--  // Construct the pattern to match corresponding intrinsic call.
--  // mma does not load/store anything, so we don't need complex operand matching here.
--  dag PatOuts = !foreach(tmp, Outs, !subst(outs, set, tmp));
--  dag PatArgs = !foreach(tmp, Ins, !subst(ins, Intr, tmp));
--  let Pattern = [!con(PatOuts, (set PatArgs))];
--  let OutOperandList = Outs;
--  let InOperandList  = Ins;
--  let AsmString = "wmma.mma.sync."
--                  # ALayout
-+  let OutOperandList = FragD.Outs;
-+  let InOperandList  = !con(Args, (ins MmaCode:$ptx));
-+  let AsmString = "wmma.mma.sync"
-+                  # "${ptx:aligned}"
-+                  # "." # ALayout
-                   # "." # BLayout
-                   # "." # FragA.geom
-                   # "." # FragD.ptx_elt_type
-@@ -7598,20 +7605,34 @@ class WMMA_MMA<WMMA_REGINFO FragA, WMMA_REGINFO FragB,
-                   # FragC.regstring # ";";
- }
- 
--foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
--  foreach layout_a = ["row", "col"] in {
--    foreach layout_b = ["row", "col"] in {
--      foreach frag_c = [WMMA_REGINFO<geom, "c", "f16">,
--                        WMMA_REGINFO<geom, "c", "f32">] in {
--        foreach frag_d = [WMMA_REGINFO<geom, "d", "f16">,
--                          WMMA_REGINFO<geom, "d", "f32">] in {
--          foreach satf = [0, 1] in {
--            def : WMMA_MMA<WMMA_REGINFO<geom, "a", "f16">,
--                           WMMA_REGINFO<geom, "b", "f16">,
--                           frag_c, frag_d, layout_a, layout_b, satf>;
--          } // satf
--        } // frag_d
--      } // frag_c
--    } // layout_b
--  } // layout_a
--} // geom
-+defset list<WMMA_INSTR> MMAs  = {
-+  foreach geom = ["m16n16k16", "m32n8k16", "m8n32k16" ] in {
-+    foreach layout_a = ["row", "col"] in {
-+      foreach layout_b = ["row", "col"] in {
-+        foreach frag_c = [WMMA_REGINFO<geom, "c", "f16">,
-+                          WMMA_REGINFO<geom, "c", "f32">] in {
-+          foreach frag_d = [WMMA_REGINFO<geom, "d", "f16">,
-+                            WMMA_REGINFO<geom, "d", "f32">] in {
-+            foreach satf = [0, 1] in {
-+              def : WMMA_MMA<WMMA_REGINFO<geom, "a", "f16">,
-+                             WMMA_REGINFO<geom, "b", "f16">,
-+                             frag_c, frag_d, layout_a, layout_b, satf>;
-+            } // satf
-+          } // frag_d
-+        } // frag_c
-+      } // layout_b
-+    } // layout_a
-+  } // geom
-+} // defset
-+
-+// Constructing non-flat DAGs is still a pain. I can't !subst a dag node with a
-+// dag, so the ptx.version must be appended *after* foreach replaces 'ins' with
-+// the instruction record.
-+class WMMA_PAT<WMMA_INSTR wi>
-+      : Pat<wi.IntrinsicPattern,
-+            !con(!foreach(tmp, wi.Args, !subst(ins, wi, tmp)),
-+                 (wi ptx.version))>;
-+
-+// Build intrinsic->instruction patterns for all MMA instructions.
-+foreach mma = !listconcat(MMAs, MMA_LDSTs) in
-+  def : WMMA_PAT<mma>;
-diff --git a/test/CodeGen/NVPTX/wmma.py b/test/CodeGen/NVPTX/wmma.py
-index 14bbfd7df09..72d189ca050 100644
---- a/test/CodeGen/NVPTX/wmma.py
-+++ b/test/CodeGen/NVPTX/wmma.py
-@@ -3,9 +3,12 @@
- 
- # RUN: python %s > %t.ll
- # RUN: llc < %t.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx61 | FileCheck %t.ll
-+# RUN: python %s --ptx=63 > %t-ptx63.ll
-+# RUN: llc < %t-ptx63.ll -march=nvptx64 -mcpu=sm_70 -mattr=+ptx63 | FileCheck %t-ptx63.ll
- 
- from __future__ import print_function
- 
-+import argparse
- from itertools import product
- from string import Template
- 
-@@ -64,7 +67,7 @@ define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) {
- }
- """
-   intrinsic_template = "llvm.nvvm.wmma.${geom}.load.${abc}.${layout}${stride}.${itype}.${pspace}"
--  instruction_template = "wmma.load.${abc}.sync.${layout}.${geom}${space}.${itype}"
-+  instruction_template = "wmma.load.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}"
- 
-   for geom, abc, layout, space, stride, itype in product(
-       known_geoms,
-@@ -76,6 +79,7 @@ define ${ret_ty} @test_${function}_o(i8 ${as}* %src ${extra_args}) {
- 
-     params = {
-         "abc" : abc,
-+        "aligned" : ".aligned" if ptx_version >= 63 else "",
-         "layout" : layout,
-         "space" : space,
-         "stride" : stride,
-@@ -135,7 +139,7 @@ define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) {
- }
- """
-   intrinsic_template = "llvm.nvvm.wmma.${geom}.store.${abc}.${layout}${stride}.${itype}.${pspace}"
--  instruction_template = "wmma.store.${abc}.sync.${layout}.${geom}${space}.${itype}"
-+  instruction_template = "wmma.store.${abc}.sync${aligned}.${layout}.${geom}${space}.${itype}"
- 
-   for geom, abc, layout, space, stride, itype in product(
-       known_geoms,
-@@ -147,6 +151,7 @@ define void @test_${function}_o(i8 ${as}* %src, ${args}${extra_args}) {
- 
-     params = {
-         "abc" : abc,
-+        "aligned" : ".aligned" if ptx_version >= 63 else "",
-         "layout" : layout,
-         "space" : space,
-         "stride" : stride,
-@@ -191,7 +196,7 @@ define ${ret_ty} @test_${function}(
- }
- """
-   intrinsic_template = "llvm.nvvm.wmma.${geom}.mma.${alayout}.${blayout}.${dtype}.${ctype}${satf}"
--  instruction_template = "wmma.mma.sync.${alayout}.${blayout}.${geom}.${dtype}.${ctype}${satf}"
-+  instruction_template = "wmma.mma.sync${aligned}.${alayout}.${blayout}.${geom}.${dtype}.${ctype}${satf}"
- 
-   for geom, alayout, blayout, ctype, dtype, satf in product(
-       known_geoms,
-@@ -202,6 +207,7 @@ define ${ret_ty} @test_${function}(
-       [".satfinite", ""]):
- 
-     params = {
-+        "aligned" : ".aligned" if ptx_version >= 63 else "",
-         "alayout" : alayout,
-         "blayout" : blayout,
-         "ctype" : ctype,
-@@ -230,4 +236,9 @@ def main():
-   gen_wmma_store_tests()
-   gen_wmma_mma_tests()
- 
-+parser = argparse.ArgumentParser()
-+parser.add_argument('--ptx', type=int, default=60)
-+args = parser.parse_args()
-+ptx_version = args.ptx
-+
- main()
--- 
-2.17.1
-
diff --git a/deps/patches/llvm-8.0-D63688-wasm-isLocal.patch b/deps/patches/llvm-8.0-D63688-wasm-isLocal.patch
deleted file mode 100644
index 820363d3054246..00000000000000
--- a/deps/patches/llvm-8.0-D63688-wasm-isLocal.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 83d5085a7fcbb4596d964dbe037c5ebf4de02b69 Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@alumni.harvard.edu>
-Date: Sun, 23 Jun 2019 00:29:59 +0000
-Subject: [PATCH] [Support] Fix build under Emscripten
-
-Summary:
-Emscripten's libc doesn't define MNT_LOCAL, thus causing a build
-failure in the fallback path. However, to the best of my knowledge,
-it also doesn't support remote file system mounts, so we may simply
-return `true` here (as we do for e.g. Fuchsia). With this fix, the
-core LLVM libraries build correctly under emscripten (though some
-of the tools and utils do not).
-
-Reviewers: kripken
-Differential Revision: https://reviews.llvm.org/D63688
-
-llvm-svn: 364143
-(cherry picked from commit 5f4ae7c45718618c4c571495e7d910d5722f70ad)
----
- llvm/lib/Support/Unix/Path.inc | 3 +++
- 1 file changed, 3 insertions(+)
-
-diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
-index d7cc0d627d0..eb38a71fffb 100644
---- a/lib/Support/Unix/Path.inc
-+++ b/lib/Support/Unix/Path.inc
-@@ -398,6 +398,9 @@ static bool is_local_impl(struct STATVFS &Vfs) {
- #elif defined(__Fuchsia__)
-   // Fuchsia doesn't yet support remote filesystem mounts.
-   return true;
-+#elif defined(__EMSCRIPTEN__)
-+  // Emscripten doesn't currently support remote filesystem mounts.
-+  return true;
- #elif defined(__HAIKU__)
-   // Haiku doesn't expose this information.
-   return false;
--- 
-2.24.0
-
diff --git a/deps/patches/llvm-8.0-D65174-limit-merge-stores.patch b/deps/patches/llvm-8.0-D65174-limit-merge-stores.patch
deleted file mode 100644
index 646c44f454641e..00000000000000
--- a/deps/patches/llvm-8.0-D65174-limit-merge-stores.patch
+++ /dev/null
@@ -1,119 +0,0 @@
-From 19992a8c7f2df2000ea7fd4a284ec7b407400fb0 Mon Sep 17 00:00:00 2001
-From: Wei Mi <wmi@google.com>
-Date: Sun, 29 Mar 2020 17:14:12 -0400
-Subject: [PATCH] [DAGCombine] Limit the number of times for the same store and
- root nodes to bail out in store merging dependence check.
-
-We run into a case where dependence check in store merging bail out many times
-for the same store and root nodes in a huge basicblock. That increases compile
-time by almost 100x. The patch add a map to track how many times the bailing
-out happen for the same store and root, and if it is over a limit, stop
-considering the store with the same root as a merging candidate.
-
-Differential Revision: https://reviews.llvm.org/D65174
----
- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 45 +++++++++++++++++--
- 1 file changed, 42 insertions(+), 3 deletions(-)
-
-diff --git llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-index 6af01423ca1..9c7e37d6945 100644
---- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-@@ -112,6 +112,11 @@ static cl::opt<bool>
-   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
-                     cl::desc("DAG combiner may split indexing from loads"));
- 
-+static cl::opt<unsigned> StoreMergeDependenceLimit(
-+    "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
-+    cl::desc("Limit the number of times for the same StoreNode and RootNode "
-+             "to bail out in store merging dependence check"));
-+
- namespace {
- 
-   class DAGCombiner {
-@@ -145,6 +150,14 @@ namespace {
-     /// which have not yet been combined to the worklist.
-     SmallPtrSet<SDNode *, 32> CombinedNodes;
- 
-+    /// Map from candidate StoreNode to the pair of RootNode and count.
-+    /// The count is used to track how many times we have seen the StoreNode
-+    /// with the same RootNode bail out in dependence check. If we have seen
-+    /// the bail out for the same pair many times over a limit, we won't
-+    /// consider the StoreNode with the same RootNode as store merging
-+    /// candidate again.
-+    DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
-+
-     // AA - Used for DAG load/store alias analysis.
-     AliasAnalysis *AA;
- 
-@@ -190,6 +203,7 @@ namespace {
-     /// Remove all instances of N from the worklist.
-     void removeFromWorklist(SDNode *N) {
-       CombinedNodes.erase(N);
-+      StoreRootCountMap.erase(N);
- 
-       auto It = WorklistMap.find(N);
-       if (It == WorklistMap.end())
-@@ -14423,6 +14437,18 @@ void DAGCombiner::getStoreMergeCandidates(
-     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
-   };
- 
-+  // Check if the pair of StoreNode and the RootNode already bail out many
-+  // times which is over the limit in dependence check.
-+  auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
-+                                        SDNode *RootNode) -> bool {
-+    auto RootCount = StoreRootCountMap.find(StoreNode);
-+    if (RootCount != StoreRootCountMap.end() &&
-+        RootCount->second.first == RootNode &&
-+        RootCount->second.second > StoreMergeDependenceLimit)
-+      return true;
-+    return false;
-+  };
-+
-   // We looking for a root node which is an ancestor to all mergable
-   // stores. We search up through a load, to our root and then down
-   // through all children. For instance we will find Store{1,2,3} if
-@@ -14450,7 +14476,8 @@ void DAGCombiner::getStoreMergeCandidates(
-             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
-               BaseIndexOffset Ptr;
-               int64_t PtrDiff;
--              if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+              if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+                  !OverLimitInDependenceCheck(OtherST, RootNode))
-                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-             }
-   } else
-@@ -14459,7 +14486,8 @@ void DAGCombiner::getStoreMergeCandidates(
-         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
-           BaseIndexOffset Ptr;
-           int64_t PtrDiff;
--          if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+          if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+              !OverLimitInDependenceCheck(OtherST, RootNode))
-             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-         }
- }
-@@ -14517,8 +14545,19 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
-   // Search through DAG. We can stop early if we find a store node.
-   for (unsigned i = 0; i < NumStores; ++i)
-     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
--                                     Max))
-+                                     Max)) {
-+      // If the searching bail out, record the StoreNode and RootNode in the
-+      // StoreRootCountMap. If we have seen the pair many times over a limit,
-+      // we won't add the StoreNode into StoreNodes set again.
-+      if (Visited.size() >= Max) {
-+        auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
-+        if (RootCount.first == RootNode)
-+          RootCount.second++;
-+        else
-+          RootCount = {RootNode, 1};
-+      }
-       return false;
-+    }
-   return true;
- }
- 
--- 
-2.25.2
-
diff --git a/deps/patches/llvm-8.0-D66401-mingw-reloc.patch b/deps/patches/llvm-8.0-D66401-mingw-reloc.patch
deleted file mode 100644
index 384399f2162f27..00000000000000
--- a/deps/patches/llvm-8.0-D66401-mingw-reloc.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-diff --git a/test/CodeGen/X86/mingw-refptr.ll b/test/CodeGen/X86/mingw-refptr.ll
---- a/test/CodeGen/X86/mingw-refptr.ll
-+++ b/test/CodeGen/X86/mingw-refptr.ll
-@@ -1,5 +1,6 @@
- ; RUN: llc < %s -mtriple=x86_64-w64-mingw32 | FileCheck %s -check-prefix=CHECK-X64
- ; RUN: llc < %s -mtriple=i686-w64-mingw32 | FileCheck %s -check-prefix=CHECK-X86
-+; RUN: llc < %s -mtriple=i686-w64-mingw32-none-elf | FileCheck %s -check-prefix=CHECK-X86-ELF
- 
- @var = external local_unnamed_addr global i32, align 4
- @dsolocalvar = external dso_local local_unnamed_addr global i32, align 4
-@@ -16,6 +17,9 @@
- ; CHECK-X86:    movl .refptr._var, %eax
- ; CHECK-X86:    movl (%eax), %eax
- ; CHECK-X86:    retl
-+; CHECK-X86-ELF-LABEL: getVar:
-+; CHECK-X86-ELF:    movl var, %eax
-+; CHECK-X86-ELF:    retl
- entry:
-   %0 = load i32, i32* @var, align 4
-   ret i32 %0
-@@ -66,6 +70,9 @@
- ; CHECK-X86:    movl __imp__extvar, %eax
- ; CHECK-X86:    movl (%eax), %eax
- ; CHECK-X86:    retl
-+; CHECK-X86-ELF-LABEL: getExtVar:
-+; CHECK-X86-ELF:    movl extvar, %eax
-+; CHECK-X86-ELF:    retl
- entry:
-   %0 = load i32, i32* @extvar, align 4
-   ret i32 %0
-diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
---- a/lib/Target/X86/X86Subtarget.cpp
-+++ b/lib/Target/X86/X86Subtarget.cpp
-@@ -146,6 +146,9 @@
-       return X86II::MO_DLLIMPORT;
-     return X86II::MO_COFFSTUB;
-   }
-+  // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables.
-+  if (isOSWindows())
-+    return X86II::MO_NO_FLAG;
- 
-   if (is64Bit()) {
-     // ELF supports a large, truly PIC code model with non-PC relative GOT
-diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
---- a/lib/Target/TargetMachine.cpp
-+++ b/lib/Target/TargetMachine.cpp
-@@ -128,8 +128,8 @@
-   // don't assume the variables to be DSO local unless we actually know
-   // that for sure. This only has to be done for variables; for functions
-   // the linker can insert thunks for calling functions from another DLL.
--  if (TT.isWindowsGNUEnvironment() && GV && GV->isDeclarationForLinker() &&
--      isa<GlobalVariable>(GV))
-+  if (TT.isWindowsGNUEnvironment() && TT.isOSBinFormatCOFF() && GV &&
-+      GV->isDeclarationForLinker() && isa<GlobalVariable>(GV))
-     return false;
- 
-   // On COFF, don't mark 'extern_weak' symbols as DSO local. If these symbols
-@@ -142,7 +142,9 @@
-   // Make an exception for windows OS in the triple: Some firmware builds use
-   // *-win32-macho triples. This (accidentally?) produced windows relocations
-   // without GOT tables in older clang versions; Keep this behaviour.
--  if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
-+  // Some JIT users use *-win32-elf triples; these shouldn't use GOT tables
-+  // either.
-+  if (TT.isOSBinFormatCOFF() || TT.isOSWindows())
-     return true;
- 
-   // Most PIC code sequences that assume that a symbol is local cannot
-
diff --git a/deps/patches/llvm-8.0-D66657-codegen-degenerate.patch b/deps/patches/llvm-8.0-D66657-codegen-degenerate.patch
deleted file mode 100644
index ddcf4dc7ddaa66..00000000000000
--- a/deps/patches/llvm-8.0-D66657-codegen-degenerate.patch
+++ /dev/null
@@ -1,65 +0,0 @@
-From 4c7e1defbddafcfcfe1211b041d43a36114a8f48 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Sat, 14 Dec 2019 10:33:30 -0500
-Subject: [PATCH 2/2] [CodegenPrepare] Guard against degenerate branches
-
-Summary:
-Guard against a potential crash observed in https://github.com/JuliaLang/julia/issues/32994#issuecomment-524249628
-If two branches are collapsed we can encounter a degenerate conditional branch `TBB==FBB`.
-The subsequent code assumes that they differ, so we exit out early.
-
-Reviewers: ributzka, spatel
-
-Subscribers: loladiro, dexonsmith, hiraditya, llvm-commits
-
-Tags: #llvm
-
-Differential Revision: https://reviews.llvm.org/D66657
----
- llvm/lib/CodeGen/CodeGenPrepare.cpp            |  4 ++++
- .../CodeGen/X86/codegen-prepare-collapse.ll    | 18 ++++++++++++++++++
- 2 files changed, 22 insertions(+)
- create mode 100644 llvm/test/CodeGen/X86/codegen-prepare-collapse.ll
-
-diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
-index c35f8666fa3..3647641c594 100644
---- a/lib/CodeGen/CodeGenPrepare.cpp
-+++ b/lib/CodeGen/CodeGenPrepare.cpp
-@@ -6929,6 +6929,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
-     if (Br1->getMetadata(LLVMContext::MD_unpredictable))
-       continue;
- 
-+    // The merging of mostly empty BB can cause a degenerate branch.
-+    if (TBB == FBB)
-+      continue;
-+
-     unsigned Opc;
-     Value *Cond1, *Cond2;
-     if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
-diff --git a/test/CodeGen/X86/codegen-prepare-collapse.ll b/test/CodeGen/X86/codegen-prepare-collapse.ll
-new file mode 100644
-index 00000000000..18e3ef7afbd
---- /dev/null
-+++ b/test/CodeGen/X86/codegen-prepare-collapse.ll
-@@ -0,0 +1,18 @@
-+; RUN: llc -fast-isel=true -O1 -mtriple=x86_64-unkown-linux-gnu -start-before=codegenprepare -stop-after=codegenprepare -o - < %s | FileCheck %s
-+
-+; CHECK-LABEL: @foo
-+define void @foo() {
-+top:
-+; CHECK: br label %L34
-+  br label %L34
-+
-+L34:                                              ; preds = %L34, %L34, %top
-+  %.sroa.075.0 = phi i64 [ undef, %top ], [ undef, %L34 ], [ undef, %L34 ]
-+  %0 = icmp sgt i8 undef, -1
-+  %cond5896 = icmp eq i8 0, 2
-+  %cond58 = and i1 %cond5896, %0
-+; During codegenprepare such degenerate branches can occur and should not
-+; lead to crashes.
-+; CHECK: br label %L34
-+  br i1 %cond58, label %L34, label %L34
-+}
--- 
-2.24.1
-
diff --git a/deps/patches/llvm-8.0-D71495-vectorize-freduce.patch b/deps/patches/llvm-8.0-D71495-vectorize-freduce.patch
deleted file mode 100644
index fb461920c176fb..00000000000000
--- a/deps/patches/llvm-8.0-D71495-vectorize-freduce.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From 7c30e23f115ae285b497ef11af0153703111dff2 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Sun, 22 Dec 2019 14:25:50 -0500
-Subject: [PATCH 1/2] [SelectionDAG] Copy FP flags when visiting a binary
- instruction.
-
-Summary:
-We noticed in Julia that the sequence below no longer turned into
-a sequence of FMA instructions in LLVM 7+, but it did in LLVM 6.
-
-```
-    %29 = fmul contract <4 x double> %wide.load, %wide.load16
-    %30 = fmul contract <4 x double> %wide.load13, %wide.load17
-    %31 = fmul contract <4 x double> %wide.load14, %wide.load18
-    %32 = fmul contract <4 x double> %wide.load15, %wide.load19
-    %33 = fadd fast <4 x double> %vec.phi, %29
-    %34 = fadd fast <4 x double> %vec.phi10, %30
-    %35 = fadd fast <4 x double> %vec.phi11, %31
-    %36 = fadd fast <4 x double> %vec.phi12, %32
-```
-
-Unlike Clang, Julia doesn't set the `unsafe-fp-math=true` function
-attribute, but rather emits more local instruction flags.
-
-This partially undoes https://reviews.llvm.org/D46854 and if required I can try to minimize the test further.
-
-Reviewers: spatel, mcberg2017
-
-Reviewed By: spatel
-
-Subscribers: chriselrod, merge_guards_bot, hiraditya, llvm-commits
-
-Tags: #llvm
-
-Differential Revision: https://reviews.llvm.org/D71495
----
- .../SelectionDAG/SelectionDAGBuilder.cpp      |  7 +++++
- llvm/test/CodeGen/X86/fmf-reduction.ll        | 26 +++++++++++++++++++
- 2 files changed, 33 insertions(+)
- create mode 100644 llvm/test/CodeGen/X86/fmf-reduction.ll
-
-diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-index bfeb3d1bc2b..e6362c19691 100644
---- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
-@@ -2833,6 +2833,13 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
-   if (isVectorReductionOp(&I)) {
-     Flags.setVectorReduction(true);
-     LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
-+
-+    // If no flags are set we will propagate the incoming flags, if any flags
-+    // are set, we will intersect them with the incoming flag and so we need to
-+    // copy the FMF flags here.
-+    if (auto *FPOp = dyn_cast<FPMathOperator>(&I)) {
-+      Flags.copyFMF(*FPOp);
-+    }
-   }
- 
-   SDValue Op1 = getValue(I.getOperand(0));
-diff --git a/test/CodeGen/X86/fmf-reduction.ll b/test/CodeGen/X86/fmf-reduction.ll
-new file mode 100644
-index 00000000000..1d669d2a924
---- /dev/null
-+++ b/test/CodeGen/X86/fmf-reduction.ll
-@@ -0,0 +1,26 @@
-+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-+; RUN: llc < %s -mtriple=x86_64-- -mattr=fma | FileCheck %s
-+
-+; Propagation of IR FMF should not drop flags when adding the DAG reduction flag.
-+; This should include an FMA instruction, not separate FMUL/FADD.
-+
-+define double @julia_dotf(<4 x double> %x, <4 x double> %y, <4 x double> %z, i1 %t3) {
-+; CHECK-LABEL: julia_dotf:
-+; CHECK:       # %bb.0:
-+; CHECK-NEXT:    vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
-+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm1
-+; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
-+; CHECK-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-+; CHECK-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
-+; CHECK-NEXT:    vzeroupper
-+; CHECK-NEXT:    retq
-+  %t1 = fmul contract <4 x double> %x, %y
-+  %t2 = fadd fast <4 x double> %z, %t1
-+  %rdx.shuf = shufflevector <4 x double> %t2, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
-+  %bin.rdx22 = fadd fast <4 x double> %t2, %rdx.shuf
-+  %rdx.shuf23 = shufflevector <4 x double> %bin.rdx22, <4 x double> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-+  %bin.rdx24 = fadd fast <4 x double> %bin.rdx22, %rdx.shuf23
-+  %t4 = extractelement <4 x double> %bin.rdx24, i32 0
-+  ret double %t4
-+}
-+
--- 
-2.24.1
-
diff --git a/deps/patches/llvm-8.0-D75072-SCEV-add-type.patch b/deps/patches/llvm-8.0-D75072-SCEV-add-type.patch
deleted file mode 100644
index 6418eca5d28b69..00000000000000
--- a/deps/patches/llvm-8.0-D75072-SCEV-add-type.patch
+++ /dev/null
@@ -1,415 +0,0 @@
-From f11f45a45ce8b90c798dd939d2782205e4291360 Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Fri, 6 Mar 2020 10:29:20 -0500
-Subject: [PATCH] [SCEV] Record NI types in add exprs
-
-Summary:
-(Rebased to LLVM 8 from the original LLVM 9 patch)
-This fixes a case where loop-reduce introduces ptrtoint/inttoptr for
-non-integral address space pointers. Over the past several years, we
-have gradually improved the SCEVExpander to actually do something
-sensible for non-integral pointer types. However, that obviously
-relies on the expander knowing what the type of the SCEV expression is.
-That is usually the case, but there is one important case where it's
-not: The type of an add expression is just the type of the last operand,
-so if the non-integral pointer is not the last operand, later uses of
-that SCEV may not realize that the given add expression contains
-non-integral pointers and may try to expand it as integers.
-
-One interesting observation is that we do get away with this scheme in
-shockingly many cases. The reason for this is that SCEV expressions
-often have an `scUnknown` pointer base, which our sort order on the
-operands of add expressions sort behind basically everything else,
-so it usually ends up as the last operand.
-
-One situation where this fails is included as a test case. This test
-case was bugpoint-reduced from the issue reported at
-https://github.com/JuliaLang/julia/issues/31156. What happens here
-is that the pointer base is an scAddRec from an outer loop, plus an
-scUnknown integer offset. By our sort order, the scUnknown gets sorted
-after the scAddRec pointer base, thus making an add expression of these
-two operands have integer type. This then confuses the expander, into
-attempting to expand the whole thing as integers, which will obviously
-fail when reaching the non-integral pointer.
-
-I considered a few options to solve this, but here's what I ended up
-settling on: The AddExpr class gains a new subclass that explicitly
-stores the type of the expression. This subclass is used whenever one
-of the operands is a non-integral pointer. To reduce the impact for the
-regular case (where the SCEV expression contains no non-integral
-pointers), a bit flag is kept in each flag expression to indicate
-whether it is of non-integral pointer type (this should give the same
-answer as asking if getType() is non-integral, but performing that
-query may involve a pointer chase and requires the DataLayout). For
-add expressions that flag is also used to indicate whether we're using
-the subclass or not. This is slightly inefficient, because it uses
-the subclass even in the (not uncommon) case where the last operand
-does actually accurately reflect the non-integral pointer type. However,
-it didn't seem worth the extra flag bit and complexity to do this
-micro-optimization.
-
-I had hoped that we could additionally restrict mul exprs from
-containing any non-integral pointers, and also require add exprs to
-only have one operand containg such pointers (but not more), but this
-turned out not to work. The reason for this is that SCEV wants to
-form differences between pointers, which it represents as `A + B*-1`,
-so we need to allow both multiplication by `-1` and addition with
-multiple non-integral pointer arguments. I'm not super happy with
-that situation, but I think it exposes a more general problem with
-non-integral pointers in LLVM. We don't actually have a way to express
-the difference between two non-integral pointers at the IR level.
-In theory this is a problem for SCEV, because it means that we can't
-materialize such SCEV expression. However, in practice, these
-expressions generally have the same base pointer, so SCEV will
-appropriately simplify them to just the integer components.
-Nevertheless it is a bit unsatisfying. Perhaps we could have an
-intrinsic that takes the byte difference between two pointers to the
-same allocated object (in the same sense as is used in getelementptr),
-which should be a sensible operation even for non-integral pointers.
-However, given the practical considerations above, that's a project
-for another time. For now, simply allowing the existing pointer-diff
-pattern for non-integral pointers seems to work ok.
-
-Reviewers: sanjoy, reames, vtjnash, vchuravy
-
-Subscribers: hiraditya, javed.absar, llvm-commits
-
-Tags: #llvm, #julialang
-
-Differential Revision: https://reviews.llvm.org/D75072
----
- llvm/include/llvm/Analysis/ScalarEvolution.h  | 26 +++++--
- .../Analysis/ScalarEvolutionExpressions.h     | 70 ++++++++++++++++---
- llvm/lib/Analysis/ScalarEvolution.cpp         | 44 +++++++++---
- .../LoopStrengthReduce/nonintegral.ll         | 35 +++++++++-
- 4 files changed, 150 insertions(+), 25 deletions(-)
-
-diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h
-index 5286f6a220e..f27fceb70d2 100644
---- llvm/include/llvm/Analysis/ScalarEvolution.h
-+++ llvm/include/llvm/Analysis/ScalarEvolution.h
-@@ -116,6 +116,19 @@ public:
-     NoWrapMask = (1 << 3) - 1
-   };
-
-+  /// HasNonIntegralPointerFlag are bitfield indices into SubclassData.
-+  ///
-+  /// When constructing SCEV expressions for LLVM expressions with non-integral
-+  /// pointer types, some additional processing is required to ensure that we
-+  /// don't introduce any illegal transformations. However, non-integral pointer
-+  /// types are a very rarely used feature, so we want to make sure to only do
-+  /// such processing if they are actually used. To ensure minimal performance
-+  /// impact, we memoize that fact in using these flags.
-+  enum HasNonIntegralPointerFlag {
-+    FlagNoNIPointers = 0,
-+    FlagHasNIPointers = (1 << 3)
-+  };
-+
-   explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy)
-       : FastID(ID), SCEVType(SCEVTy) {}
-   SCEV(const SCEV &) = delete;
-@@ -138,6 +138,10 @@ public:
-   /// Return true if the specified scev is negated, but not a constant.
-   bool isNonConstantNegative() const;
- 
-+  bool hasNonIntegralPointers() const {
-+    return SubclassData & FlagHasNIPointers;
-+  }
-+
-   /// Print out the internal representation of this scalar to the specified
-   /// stream.  This should really only be used for debugging purposes.
-   void print(raw_ostream &OS) const;
-diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index 876d68438ef..b9ea23c0086 100644
---- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-+++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -181,6 +184,13 @@ class Type;
-       return getNoWrapFlags(FlagNW) != FlagAnyWrap;
-     }
- 
-+    void setHasNIPtr(bool HasNIPtr) {
-+      if (HasNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+      else
-+        SubclassData &= ~FlagHasNIPointers;
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-@@ -215,24 +220,54 @@ class Type;
-   class SCEVAddExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
- 
-+  protected:
-     SCEVAddExpr(const FoldingSetNodeIDRef ID,
-                 const SCEV *const *O, size_t N)
-       : SCEVCommutativeExpr(ID, scAddExpr, O, N) {}
- 
-   public:
--    Type *getType() const {
--      // Use the type of the last operand, which is likely to be a pointer
--      // type, if there is one. This doesn't usually matter, but it can help
--      // reduce casts when the expressions are expanded.
--      return getOperand(getNumOperands() - 1)->getType();
-+    /// Returns the type of the add expression, by looking either at the last
-+    /// operand or deferring to the SCEVAddNIExpr subclass for non-integral
-+    /// pointers.
-+    Type *getType() const;
-+
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; }
-+  };
-+
-+  /// This node represents an addition of some number of SCEVs, one which
-+  /// is a non-integral pointer type, requiring us to know the type exactly for
-+  /// correctness.
-+  class SCEVAddNIExpr : public SCEVAddExpr {
-+    friend class ScalarEvolution;
-+    PointerType *NIType;
-+
-+    SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N,
-+                  PointerType *NIType)
-+        : SCEVAddExpr(ID, O, N), NIType(NIType) {
-+      SubclassData |= FlagHasNIPointers;
-     }
- 
-+  public:
-+    Type *getType() const { return NIType; }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr;
-+      return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers();
-     }
-   };
- 
-+  inline Type *SCEVAddExpr::getType() const {
-+    // In general, use the type of the last operand, which is likely to be a
-+    // pointer type, if there is one. This doesn't usually matter, but it can
-+    // help reduce casts when the expressions are expanded. In the (unusual)
-+    // case that we're working with non-integral pointers, we have a subclass
-+    // that stores that type explicitly.
-+    if (hasNonIntegralPointers())
-+      return cast<SCEVAddNIExpr>(this)->getType();
-+    return getOperand(getNumOperands() - 1)->getType();
-+  }
-+
-   /// This node represents multiplication of some number of SCEVs.
-   class SCEVMulExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
-@@ -242,6 +273,18 @@ class Type;
-       : SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
- 
-   public:
-+    Type *getType() const {
-+      // In general, we can't form SCEVMulExprs with non-integral pointer types,
-+      // but for the moment we need to allow a special case: Multiplying by
-+      // -1 to be able express the difference between two pointers. In order
-+      // to maintain the invariant that SCEVs with the NI flag set should have
-+      // a type corresponding to the contained NI ptr, we need to return the
-+      // type of the pointer here.
-+      if (hasNonIntegralPointers())
-+        return getOperand(getNumOperands() - 1)->getType();
-+      return SCEVCommutativeExpr::getType();
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scMulExpr;
-@@ -467,9 +690,12 @@ class Type;
-     /// instances owned by a ScalarEvolution.
-     SCEVUnknown *Next;
- 
--    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
--                ScalarEvolution *se, SCEVUnknown *next) :
--      SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {}
-+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se,
-+                SCEVUnknown *next, bool ValueIsNIPtr)
-+        : SCEV(ID, scUnknown), CallbackVH(V), SE(se), Next(next) {
-+      if (ValueIsNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+    }
- 
-     // Implement CallbackVH.
-     void deleted() override;
-diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp
-index cd74815a895..09e98345d0f 100644
---- llvm/lib/Analysis/ScalarEvolution.cpp
-+++ llvm/lib/Analysis/ScalarEvolution.cpp
-@@ -354,12 +354,13 @@ Type *SCEV::getType() const {
-   case scSignExtend:
-     return cast<SCEVCastExpr>(this)->getType();
-   case scAddRecExpr:
--  case scMulExpr:
-   case scUMaxExpr:
-   case scSMaxExpr:
-   case scUMinExpr:
-   case scSMinExpr:
-     return cast<SCEVNAryExpr>(this)->getType();
-+  case scMulExpr:
-+    return cast<SCEVMulExpr>(this)->getType();
-   case scAddExpr:
-     return cast<SCEVAddExpr>(this)->getType();
-   case scUDivExpr:
-@@ -2419,8 +2420,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-   }
- 
-   // Limit recursion calls depth.
--  if (Depth > MaxArithDepth)
-+  if (Depth > MaxArithDepth) {
-     return getOrCreateAddExpr(Ops, Flags);
-+  }
- 
-   // Okay, check to see if the same value occurs in the operand list more than
-   // once.  If so, merge them together into an multiply expression.  Since we
-@@ -2761,16 +2763,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddExpr);
--  for (const SCEV *Op : Ops)
--    ID.AddPointer(Op);
-+  bool HasNIPtr = false;
-+  PointerType *NIPtrType = nullptr;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    ID.AddPointer(Ops[i]);
-+    if (Ops[i]->hasNonIntegralPointers()) {
-+      HasNIPtr = true;
-+      NIPtrType = cast<PointerType>(Ops[i]->getType());
-+    }
-+  }
-   void *IP = nullptr;
-   SCEVAddExpr *S =
-       static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-   if (!S) {
-     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--    S = new (SCEVAllocator)
--        SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-+    if (HasNIPtr)
-+      S = new (SCEVAllocator)
-+          SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType);
-+    else
-+      S = new (SCEVAllocator)
-+          SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-     UniqueSCEVs.InsertNode(S, IP);
-     addToLoopUseLists(S);
-   }
-@@ -2783,8 +2763,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-                                        const Loop *L, SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddRecExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    assert(i == 0 || !Ops[i]->hasNonIntegralPointers());
-     ID.AddPointer(Ops[i]);
-+  }
-   ID.AddPointer(L);
-   void *IP = nullptr;
-   SCEVAddRecExpr *S =
-@@ -2798,6 +2813,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
-   return S;
- }
- 
-@@ -2806,8 +2822,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scMulExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  bool HasNIPtr = false;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    HasNIPtr |= Ops[i]->hasNonIntegralPointers();
-     ID.AddPointer(Ops[i]);
-+  }
-   void *IP = nullptr;
-   SCEVMulExpr *S =
-     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-@@ -2820,6 +2839,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(HasNIPtr);
-   return S;
- }
- 
-@@ -3631,8 +3591,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
-   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
-   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
-+  SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr(
-       ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
-+  // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the
-+  // operands all need to be of the same type.
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
-   UniqueSCEVs.InsertNode(S, IP);
-   addToLoopUseLists(S);
-   return S;
-@@ -3708,8 +3731,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
-            "Stale SCEVUnknown in uniquing map!");
-     return S;
-   }
-+  bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType());
-   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
--                                            FirstUnknown);
-+                                            FirstUnknown, ValueIsNIPtr);
-   FirstUnknown = cast<SCEVUnknown>(S);
-   UniqueSCEVs.InsertNode(S, IP);
-   return S;
-diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-index 5648e3aa74a..6936521f3a6 100644
---- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-+++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-@@ -2,7 +2,7 @@
- 
- ; Address Space 10 is non-integral. The optimizer is not allowed to use
- ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
-@@ -43,3 +43,36 @@ if38:                                             ; preds = %L119
- done:                                             ; preds = %if38
-   ret void
- }
-+
-+; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself,
-+; but creates the correct SCEV expressions to reproduce the issue. See
-+; https://github.com/JuliaLang/julia/issues/31156 for the original bug report.
-+define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 {
-+; CHECK-NOT: inttoptr
-+; CHECK-NOT: ptrtoint
-+; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}}
-+top:
-+  br label %L42.L46_crit_edge.us
-+
-+L42.L46_crit_edge.us:                             ; preds = %L82.us.us.loopexit, %top
-+  %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ]
-+  %0 = sub i64 %value_phi11.us, %b
-+  %1 = add i64 %0, %c
-+  %spec.select = select i1 %g, i64 %d, i64 0
-+  br label %L62.us.us
-+
-+L82.us.us.loopexit:                               ; preds = %L62.us.us
-+  %2 = add i64 %e, %value_phi11.us
-+  br label %L42.L46_crit_edge.us
-+
-+L62.us.us:                                        ; preds = %L62.us.us, %L42.L46_crit_edge.us
-+  %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ]
-+  %3 = add i64 %1, %value_phi21.us.us
-+  %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3
-+  %5 = load i8, i8 addrspace(13)* %4, align 1
-+  %6 = add i64 %f, %value_phi21.us.us
-+  br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1
-+}
-+
-+!1 = distinct !{!1, !2}
-+!2 = !{!"llvm.loop.isvectorized", i32 1}
--- 
-2.25.1
-
diff --git a/deps/patches/llvm-9.0-D65174-limit-merge-stores.patch b/deps/patches/llvm-9.0-D65174-limit-merge-stores.patch
deleted file mode 100644
index 6d6cfb4acd8001..00000000000000
--- a/deps/patches/llvm-9.0-D65174-limit-merge-stores.patch
+++ /dev/null
@@ -1,116 +0,0 @@
-commit f49c107f06c6a98d11a09d758f08554c78b9b933
-Author: Wei Mi <wmi@google.com>
-Date:   Wed Jul 31 19:59:24 2019 +0000
-
-    [DAGCombine] Limit the number of times for the same store and root nodes
-    to bail out in store merging dependence check.
-    
-    We run into a case where dependence check in store merging bail out many times
-    for the same store and root nodes in a huge basicblock. That increases compile
-    time by almost 100x. The patch add a map to track how many times the bailing
-    out happen for the same store and root, and if it is over a limit, stop
-    considering the store with the same root as a merging candidate.
-    
-    Differential Revision: https://reviews.llvm.org/D65174
-    
-    llvm-svn: 367472
-
-diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-index bf62aa86509..2e5ba82af22 100644
---- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-@@ -120,6 +120,11 @@ static cl::opt<unsigned> TokenFactorInlineLimit(
-     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
-     cl::desc("Limit the number of operands to inline for Token Factors"));
- 
-+static cl::opt<unsigned> StoreMergeDependenceLimit(
-+    "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
-+    cl::desc("Limit the number of times for the same StoreNode and RootNode "
-+             "to bail out in store merging dependence check"));
-+
- namespace {
- 
-   class DAGCombiner {
-@@ -157,6 +162,14 @@ namespace {
-     /// which have not yet been combined to the worklist.
-     SmallPtrSet<SDNode *, 32> CombinedNodes;
- 
-+    /// Map from candidate StoreNode to the pair of RootNode and count.
-+    /// The count is used to track how many times we have seen the StoreNode
-+    /// with the same RootNode bail out in dependence check. If we have seen
-+    /// the bail out for the same pair many times over a limit, we won't
-+    /// consider the StoreNode with the same RootNode as store merging
-+    /// candidate again.
-+    DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
-+
-     // AA - Used for DAG load/store alias analysis.
-     AliasAnalysis *AA;
- 
-@@ -241,6 +254,7 @@ namespace {
-     void removeFromWorklist(SDNode *N) {
-       CombinedNodes.erase(N);
-       PruningList.remove(N);
-+      StoreRootCountMap.erase(N);
- 
-       auto It = WorklistMap.find(N);
-       if (It == WorklistMap.end())
-@@ -15423,6 +15437,18 @@ void DAGCombiner::getStoreMergeCandidates(
-     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
-   };
- 
-+  // Check if the pair of StoreNode and the RootNode already bail out many
-+  // times which is over the limit in dependence check.
-+  auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
-+                                        SDNode *RootNode) -> bool {
-+    auto RootCount = StoreRootCountMap.find(StoreNode);
-+    if (RootCount != StoreRootCountMap.end() &&
-+        RootCount->second.first == RootNode &&
-+        RootCount->second.second > StoreMergeDependenceLimit)
-+      return true;
-+    return false;
-+  };
-+
-   // We looking for a root node which is an ancestor to all mergable
-   // stores. We search up through a load, to our root and then down
-   // through all children. For instance we will find Store{1,2,3} if
-@@ -15452,7 +15478,8 @@ void DAGCombiner::getStoreMergeCandidates(
-             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
-               BaseIndexOffset Ptr;
-               int64_t PtrDiff;
--              if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+              if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+                  !OverLimitInDependenceCheck(OtherST, RootNode))
-                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-             }
-   } else
-@@ -15462,7 +15489,8 @@ void DAGCombiner::getStoreMergeCandidates(
-         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
-           BaseIndexOffset Ptr;
-           int64_t PtrDiff;
--          if (CandidateMatch(OtherST, Ptr, PtrDiff))
-+          if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
-+              !OverLimitInDependenceCheck(OtherST, RootNode))
-             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
-         }
- }
-@@ -15520,8 +15548,19 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
-   // Search through DAG. We can stop early if we find a store node.
-   for (unsigned i = 0; i < NumStores; ++i)
-     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
--                                     Max))
-+                                     Max)) {
-+      // If the searching bail out, record the StoreNode and RootNode in the
-+      // StoreRootCountMap. If we have seen the pair many times over a limit,
-+      // we won't add the StoreNode into StoreNodes set again.
-+      if (Visited.size() >= Max) {
-+        auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
-+        if (RootCount.first == RootNode)
-+          RootCount.second++;
-+        else
-+          RootCount = {RootNode, 1};
-+      }
-       return false;
-+    }
-   return true;
- }
- 
diff --git a/deps/patches/llvm-9.0-D78196.patch b/deps/patches/llvm-9.0-D78196.patch
deleted file mode 100644
index 6ae23fd7d8600d..00000000000000
--- a/deps/patches/llvm-9.0-D78196.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
---- a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-@@ -210,6 +210,10 @@
-     for (auto *Sym : UpdateOther)
-       if (Sym->isVariable())
-         copyLocalEntry(Sym, Sym->getVariableValue());
-+
-+    // Clear the set of symbols that needs to be updated so the streamer can
-+    // be reused without issues.
-+    UpdateOther.clear();
-   }
- 
- private:
-
diff --git a/deps/patches/llvm-9.0-D85499.patch b/deps/patches/llvm-9.0-D85499.patch
deleted file mode 100644
index 1be91fc4717f58..00000000000000
--- a/deps/patches/llvm-9.0-D85499.patch
+++ /dev/null
@@ -1,425 +0,0 @@
-commit ac8729e23232d0fd3933b76093a40b7c65332aff
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Fri Aug 7 00:31:43 2020 -0400
-
-    [X86] Canonicalize andnp for bitmask arithmetic
-    
-    We have a DAG combine that tries to fold (vselect cond, 0000..., X) -> (andnp cond, x).
-    However, it does so by attempting to create an i64 vector with the number
-    of elements obtained by truncating division by 64 from the bitwidth. This is
-    bad for mask vectors like v8i1, since that division is just zero. Besides,
-    we don't want i64 vectors anyway. The easy change is just to avoid changing
-    the VT, but this is slightly problematic because the canonical pattern for
-    `kandn` is `(and (vnot a) b)` rather than `(x86andnp a b)`, so this fails
-    to select. Rather than playing games here with having the mask vectors
-    use a different canonical representation, the bulk of this commit switches
-    the canonical ISD representation for `kandn` to `(x86andnp a b)` such
-    that all vector types may be handled equally here. To avoid regressing
-    other tests, we need to extend a few other folds to handle `x86andnp` in
-    addition to plain `and`. However, that should be generally a good
-    improvement, since x86andnp is already canonical for non-i1 vectors
-    prior to this commit, and said folds were just missing.
-    
-    When all is said and done, fixes the issue reported in
-    https://github.com/JuliaLang/julia/issues/36955.
-    
-    Differential Revision: https://reviews.llvm.org/D85499
-
-diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
-index 34ad589d205..eb21b0de89d 100644
---- a/lib/Target/X86/X86ISelDAGToDAG.cpp
-+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
-@@ -503,7 +503,7 @@ namespace {
-     bool isMaskZeroExtended(SDNode *N) const;
-     bool tryShiftAmountMod(SDNode *N);
-     bool tryShrinkShlLogicImm(SDNode *N);
--    bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
-+    bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask, bool Invert);
- 
-     MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
-                                 const SDLoc &dl, MVT VT, SDNode *Node);
-@@ -2998,7 +2998,7 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
-       bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
-       // ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
-       if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
--        unsigned NewOpc = 
-+        unsigned NewOpc =
-           ((Opc == X86ISD::ADD) == IsOne)
-               ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
-               : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
-@@ -3999,8 +3999,8 @@ static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad,
- 
- // Try to create VPTESTM instruction. If InMask is not null, it will be used
- // to form a masked operation.
--bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
--                                 SDValue InMask) {
-+bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue InMask,
-+                                 bool Invert) {
-   assert(Subtarget->hasAVX512() && "Expected AVX512!");
-   assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 &&
-          "Unexpected VT!");
-@@ -4140,6 +4140,9 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc,
-   }
- 
-   bool IsTestN = CC == ISD::SETEQ;
-+  if (Invert)
-+    IsTestN = !IsTestN;
-+
-   unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast,
-                                IsMasked);
- 
-@@ -4309,16 +4312,27 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
-       return;
-     break;
- 
-+  case X86ISD::ANDNP:
-+    if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
-+      SDValue N0 = Node->getOperand(0);
-+      SDValue N1 = Node->getOperand(1);
-+      // Try to form a masked VPTESTM
-+      if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
-+          tryVPTESTM(Node, N0, N1, true))
-+        return;
-+    }
-+    break;
-+
-   case ISD::AND:
-     if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) {
-       // Try to form a masked VPTESTM. Operands can be in either order.
-       SDValue N0 = Node->getOperand(0);
-       SDValue N1 = Node->getOperand(1);
-       if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() &&
--          tryVPTESTM(Node, N0, N1))
-+          tryVPTESTM(Node, N0, N1, false))
-         return;
-       if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() &&
--          tryVPTESTM(Node, N1, N0))
-+          tryVPTESTM(Node, N1, N0, false))
-         return;
-     }
- 
-@@ -5000,7 +5014,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
-   }
- 
-   case ISD::SETCC: {
--    if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue()))
-+    if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue(), false))
-       return;
- 
-     break;
-diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
-index 920cdd7e625..6b9738074c7 100644
---- a/lib/Target/X86/X86ISelLowering.cpp
-+++ b/lib/Target/X86/X86ISelLowering.cpp
-@@ -196,7 +196,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
-   // Integer absolute.
-   if (Subtarget.hasCMov()) {
-     setOperationAction(ISD::ABS            , MVT::i16  , Custom);
--    setOperationAction(ISD::ABS            , MVT::i32  , Custom); 
-+    setOperationAction(ISD::ABS            , MVT::i32  , Custom);
-   }
-   setOperationAction(ISD::ABS              , MVT::i64  , Custom);
- 
-@@ -26053,7 +26053,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
- 
-   // If this is a canonical idempotent atomicrmw w/no uses, we have a better
-   // lowering available in lowerAtomicArith.
--  // TODO: push more cases through this path. 
-+  // TODO: push more cases through this path.
-   if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand()))
-     if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() &&
-         AI->use_empty())
-@@ -26111,7 +26111,7 @@ X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
- /// Emit a locked operation on a stack location which does not change any
- /// memory location, but does involve a lock prefix.  Location is chosen to be
- /// a) very likely accessed only by a single thread to minimize cache traffic,
--/// and b) definitely dereferenceable.  Returns the new Chain result.  
-+/// and b) definitely dereferenceable.  Returns the new Chain result.
- static SDValue emitLockedStackOp(SelectionDAG &DAG,
-                                  const X86Subtarget &Subtarget,
-                                  SDValue Chain, SDLoc DL) {
-@@ -26120,22 +26120,22 @@ static SDValue emitLockedStackOp(SelectionDAG &DAG,
-   // operations issued by the current processor.  As such, the location
-   // referenced is not relevant for the ordering properties of the instruction.
-   // See: Intel® 64 and IA-32 ArchitecturesSoftware Developer’s Manual,
--  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions 
-+  // 8.2.3.9  Loads and Stores Are Not Reordered with Locked Instructions
-   // 2) Using an immediate operand appears to be the best encoding choice
-   // here since it doesn't require an extra register.
-   // 3) OR appears to be very slightly faster than ADD. (Though, the difference
-   // is small enough it might just be measurement noise.)
-   // 4) When choosing offsets, there are several contributing factors:
-   //   a) If there's no redzone, we default to TOS.  (We could allocate a cache
--  //      line aligned stack object to improve this case.) 
-+  //      line aligned stack object to improve this case.)
-   //   b) To minimize our chances of introducing a false dependence, we prefer
--  //      to offset the stack usage from TOS slightly.  
-+  //      to offset the stack usage from TOS slightly.
-   //   c) To minimize concerns about cross thread stack usage - in particular,
-   //      the idiomatic MyThreadPool.run([&StackVars]() {...}) pattern which
-   //      captures state in the TOS frame and accesses it from many threads -
-   //      we want to use an offset such that the offset is in a distinct cache
-   //      line from the TOS frame.
--  // 
-+  //
-   // For a general discussion of the tradeoffs and benchmark results, see:
-   // https://shipilev.net/blog/2014/on-the-fence-with-dependencies/
- 
-@@ -26188,7 +26188,7 @@ static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget,
-     if (Subtarget.hasMFence())
-       return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0));
- 
--    SDValue Chain = Op.getOperand(0); 
-+    SDValue Chain = Op.getOperand(0);
-     return emitLockedStackOp(DAG, Subtarget, Chain, dl);
-   }
- 
-@@ -26677,12 +26677,12 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
-     // seq_cst which isn't SingleThread, everything just needs to be preserved
-     // during codegen and then dropped. Note that we expect (but don't assume),
-     // that orderings other than seq_cst and acq_rel have been canonicalized to
--    // a store or load. 
-+    // a store or load.
-     if (AN->getOrdering() == AtomicOrdering::SequentiallyConsistent &&
-         AN->getSyncScopeID() == SyncScope::System) {
-       // Prefer a locked operation against a stack location to minimize cache
-       // traffic.  This assumes that stack locations are very likely to be
--      // accessed only by the owning thread. 
-+      // accessed only by the owning thread.
-       SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL);
-       assert(!N->hasAnyUseOfValue(0));
-       // NOTE: The getUNDEF is needed to give something for the unused result 0.
-@@ -35620,7 +35620,7 @@ static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) {
-   }
- 
-   // TODO: This switch could include FNEG and the x86-specific FP logic ops
--  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid 
-+  // (FAND, FANDN, FOR, FXOR). But that may require enhancements to avoid
-   // missed load folding and fma+fneg combining.
-   switch (Vec.getOpcode()) {
-   case ISD::FMA: // Begin 3 operands
-@@ -35935,10 +35935,8 @@ combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
- 
-   // vselect Cond, 000..., X -> andn Cond, X
-   if (TValIsAllZeros) {
--    MVT AndNVT = MVT::getVectorVT(MVT::i64, CondVT.getSizeInBits() / 64);
--    SDValue CastCond = DAG.getBitcast(AndNVT, Cond);
--    SDValue CastRHS = DAG.getBitcast(AndNVT, RHS);
--    SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, AndNVT, CastCond, CastRHS);
-+    SDValue CastRHS = DAG.getBitcast(CondVT, RHS);
-+    SDValue AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS);
-     return DAG.getBitcast(VT, AndN);
-   }
- 
-@@ -38147,12 +38145,17 @@ static SDValue IsNOT(SDValue V, SelectionDAG &DAG) {
-   return SDValue();
- }
- 
--/// Try to fold: (and (xor X, -1), Y) -> (andnp X, Y).
--static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
-+
-+/// Try to fold:
-+///   (and (not X), Y) -> (andnp X, Y)
-+///   (and (xor X, -1), Y) -> (andnp X, Y).
-+static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG,
-+                                                 const X86Subtarget &Subtarget) {
-   assert(N->getOpcode() == ISD::AND);
- 
-   MVT VT = N->getSimpleValueType(0);
--  if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector())
-+  if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector() &&
-+      !(VT.isVector() && VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()))
-     return SDValue();
- 
-   SDValue X, Y;
-@@ -38558,7 +38561,7 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
-   if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
-     return FPLogic;
- 
--  if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG))
-+  if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG, Subtarget))
-     return R;
- 
-   if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget))
-diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
-index 54eddeacaa1..91027fa903f 100644
---- a/lib/Target/X86/X86InstrAVX512.td
-+++ b/lib/Target/X86/X86InstrAVX512.td
-@@ -2978,7 +2978,6 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
- def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
- def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
- // These nodes use 'vnot' instead of 'not' to support vectors.
--def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
- def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
- 
- // TODO - do we need a X86SchedWriteWidths::KMASK type?
-@@ -2986,7 +2985,7 @@ defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,     SchedWriteVecLogic.XM
- defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,      SchedWriteVecLogic.XMM, 1>;
- defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor,   SchedWriteVecLogic.XMM, 1>;
- defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,     SchedWriteVecLogic.XMM, 1>;
--defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn,   SchedWriteVecLogic.XMM, 0>;
-+defm KANDN : avx512_mask_binop_all<0x42, "kandn", X86andnp,   SchedWriteVecLogic.XMM, 0>;
- defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  X86kadd, SchedWriteVecLogic.XMM, 1, HasDQI>;
- 
- multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
-@@ -3015,7 +3014,7 @@ multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
- }
- 
- defm : avx512_binop_pat<and,   and,  KANDWrr>;
--defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
-+defm : avx512_binop_pat<X86andnp, X86andnp, KANDNWrr>;
- defm : avx512_binop_pat<or,    or,   KORWrr>;
- defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
- defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
-@@ -11570,7 +11569,7 @@ multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr,
- }
- 
- multiclass avx512_fixupimm_packed_all<X86SchedWriteWidths sched,
--                                      AVX512VLVectorVTInfo _Vec, 
-+                                      AVX512VLVectorVTInfo _Vec,
-                                       AVX512VLVectorVTInfo _Tbl> {
-   let Predicates = [HasAVX512] in
-     defm Z    : avx512_fixupimm_packed_sae<0x54, "vfixupimm", sched.ZMM,
-@@ -11687,7 +11686,7 @@ multiclass AVX512_scalar_math_fp_patterns<SDNode Op, string OpcPrefix, SDNode Mo
-                             (Op (_.EltVT
-                                  (extractelt (_.VT VR128X:$src1), (iPTR 0))),
-                                 _.FRC:$src2), (_.EltVT ZeroFP)))),
--      (!cast<I>("V"#OpcPrefix#Zrr_Intkz) 
-+      (!cast<I>("V"#OpcPrefix#Zrr_Intkz)
-           VK1WM:$mask, _.VT:$src1,
-           (_.VT (COPY_TO_REGCLASS _.FRC:$src2, VR128X)))>;
-     def : Pat<(MoveNode (_.VT VR128X:$src1),
-diff --git a/test/CodeGen/X86/avx512-select.ll b/test/CodeGen/X86/avx512-select.ll
-index 1ed7b408baf..64320d63eac 100644
---- a/test/CodeGen/X86/avx512-select.ll
-+++ b/test/CodeGen/X86/avx512-select.ll
-@@ -595,3 +595,74 @@ define <16 x i64> @narrowExtractedVectorSelect_crash(<16 x i64> %arg, <16 x i16>
-   %tmp3 = zext <16 x i16> %tmp2 to <16 x i64>
-   ret <16 x i64> %tmp3
- }
-+
-+; Regression test from https://github.com/JuliaLang/julia/issues/36955
-+define i8 @julia_issue36955(<8 x i1> %mask, <8 x double> %a) {
-+; X86-AVX512F-LABEL: julia_issue36955:
-+; X86-AVX512F:       # %bb.0:
-+; X86-AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
-+; X86-AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
-+; X86-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
-+; X86-AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X86-AVX512F-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X86-AVX512F-NEXT:    kandnw %k0, %k1, %k0
-+; X86-AVX512F-NEXT:    kandw %k1, %k0, %k0
-+; X86-AVX512F-NEXT:    knotw %k1, %k1
-+; X86-AVX512F-NEXT:    korw %k1, %k0, %k0
-+; X86-AVX512F-NEXT:    kmovw %k0, %eax
-+; X86-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
-+; X86-AVX512F-NEXT:    vzeroupper
-+; X86-AVX512F-NEXT:    retl
-+;
-+; X64-AVX512F-LABEL: julia_issue36955:
-+; X64-AVX512F:       # %bb.0:
-+; X64-AVX512F-NEXT:    vpmovsxwq %xmm0, %zmm0
-+; X64-AVX512F-NEXT:    vpsllq $63, %zmm0, %zmm0
-+; X64-AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
-+; X64-AVX512F-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X64-AVX512F-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X64-AVX512F-NEXT:    kandnw %k0, %k1, %k0
-+; X64-AVX512F-NEXT:    kandw %k1, %k0, %k0
-+; X64-AVX512F-NEXT:    knotw %k1, %k1
-+; X64-AVX512F-NEXT:    korw %k1, %k0, %k0
-+; X64-AVX512F-NEXT:    kmovw %k0, %eax
-+; X64-AVX512F-NEXT:    # kill: def $al killed $al killed $eax
-+; X64-AVX512F-NEXT:    vzeroupper
-+; X64-AVX512F-NEXT:    retq
-+;
-+; X86-AVX512BW-LABEL: julia_issue36955:
-+; X86-AVX512BW:       # %bb.0:
-+; X86-AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
-+; X86-AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
-+; X86-AVX512BW-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X86-AVX512BW-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X86-AVX512BW-NEXT:    kandnw %k0, %k1, %k0
-+; X86-AVX512BW-NEXT:    kandw %k1, %k0, %k0
-+; X86-AVX512BW-NEXT:    knotw %k1, %k1
-+; X86-AVX512BW-NEXT:    korw %k1, %k0, %k0
-+; X86-AVX512BW-NEXT:    kmovd %k0, %eax
-+; X86-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
-+; X86-AVX512BW-NEXT:    vzeroupper
-+; X86-AVX512BW-NEXT:    retl
-+;
-+; X64-AVX512BW-LABEL: julia_issue36955:
-+; X64-AVX512BW:       # %bb.0:
-+; X64-AVX512BW-NEXT:    vpsllw $15, %xmm0, %xmm0
-+; X64-AVX512BW-NEXT:    vpmovw2m %zmm0, %k0
-+; X64-AVX512BW-NEXT:    vpxor %xmm0, %xmm0, %xmm0
-+; X64-AVX512BW-NEXT:    vcmpnlepd %zmm0, %zmm1, %k1
-+; X64-AVX512BW-NEXT:    kandnw %k0, %k1, %k0
-+; X64-AVX512BW-NEXT:    kandw %k1, %k0, %k0
-+; X64-AVX512BW-NEXT:    knotw %k1, %k1
-+; X64-AVX512BW-NEXT:    korw %k1, %k0, %k0
-+; X64-AVX512BW-NEXT:    kmovd %k0, %eax
-+; X64-AVX512BW-NEXT:    # kill: def $al killed $al killed $eax
-+; X64-AVX512BW-NEXT:    vzeroupper
-+; X64-AVX512BW-NEXT:    retq
-+  %fcmp = fcmp ugt <8 x double> %a, zeroinitializer
-+  %xor = xor <8 x i1> %fcmp, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
-+  %select1 = select <8 x i1> %fcmp, <8 x i1> zeroinitializer, <8 x i1> %mask
-+  %select2 = select <8 x i1> %xor, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i1> %select1
-+  %ret = bitcast <8 x i1> %select2 to i8
-+  ret i8 %ret
-+}
-diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll
-index 8cb6a4dca09..3c08a871c86 100644
---- a/test/CodeGen/X86/combine-bitselect.ll
-+++ b/test/CodeGen/X86/combine-bitselect.ll
-@@ -616,13 +616,13 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) {
- ; AVX512F:       # %bb.0: # %bb
- ; AVX512F-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
- ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
--; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12]
--; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm1, %k1
-+; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
-+; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm0 = [12,12,12,12]
- ; AVX512F-NEXT:    vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15]
--; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm1, %k2
--; AVX512F-NEXT:    vptestnmd %zmm0, %zmm0, %k0 {%k2}
--; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1 {%k1}
--; AVX512F-NEXT:    korw %k0, %k1, %k1
-+; AVX512F-NEXT:    vpcmpeqd %zmm2, %zmm1, %k0
-+; AVX512F-NEXT:    vpcmpeqd %zmm0, %zmm1, %k2 {%k1}
-+; AVX512F-NEXT:    kandnw %k0, %k1, %k0
-+; AVX512F-NEXT:    korw %k0, %k2, %k1
- ; AVX512F-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
- ; AVX512F-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
- ; AVX512F-NEXT:    vzeroupper
-diff --git a/test/CodeGen/X86/vec_ssubo.ll b/test/CodeGen/X86/vec_ssubo.ll
-index 515dc5c5aa2..dfb1e7c4dee 100644
---- a/test/CodeGen/X86/vec_ssubo.ll
-+++ b/test/CodeGen/X86/vec_ssubo.ll
-@@ -1640,7 +1640,7 @@ define <4 x i32> @ssubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
- ; AVX512-NEXT:    vptestmd %xmm1, %xmm1, %k0
- ; AVX512-NEXT:    vpslld $31, %xmm0, %xmm0
- ; AVX512-NEXT:    vptestmd %xmm0, %xmm0, %k1
--; AVX512-NEXT:    vptestnmd %xmm1, %xmm1, %k2 {%k1}
-+; AVX512-NEXT:    kandnw %k1, %k0, %k2
- ; AVX512-NEXT:    kxorw %k0, %k1, %k0
- ; AVX512-NEXT:    kxorw %k2, %k0, %k1
- ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
-diff --git a/test/CodeGen/X86/vec_usubo.ll b/test/CodeGen/X86/vec_usubo.ll
-index c5a7b19cf14..367c491d25a 100644
---- a/test/CodeGen/X86/vec_usubo.ll
-+++ b/test/CodeGen/X86/vec_usubo.ll
-@@ -1244,10 +1244,10 @@ define <4 x i32> @usubo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind
- ; AVX512:       # %bb.0:
- ; AVX512-NEXT:    vpslld $31, %xmm0, %xmm0
- ; AVX512-NEXT:    vptestmd %xmm0, %xmm0, %k0
--; AVX512-NEXT:    vpslld $31, %xmm1, %xmm1
--; AVX512-NEXT:    vptestmd %xmm1, %xmm1, %k1
-+; AVX512-NEXT:    vpslld $31, %xmm1, %xmm0
-+; AVX512-NEXT:    vptestmd %xmm0, %xmm0, %k1
- ; AVX512-NEXT:    kxorw %k1, %k0, %k1
--; AVX512-NEXT:    vptestnmd %xmm0, %xmm0, %k2 {%k1}
-+; AVX512-NEXT:    kandnw %k1, %k0, %k2
- ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
- ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k2} {z}
- ; AVX512-NEXT:    kmovd %k1, %eax
diff --git a/deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch b/deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch
deleted file mode 100644
index 89beefdd157e5f..00000000000000
--- a/deps/patches/llvm-D27629-AArch64-large_model_6.0.1.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From f76abe65e6d07fea5e838c4f8c9a9421c16debb0 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Thu, 5 Jul 2018 12:37:50 -0400
-Subject: [PATCH] Fix unwind info relocation with large code model on AArch64
-
----
- lib/MC/MCObjectFileInfo.cpp                   |  2 ++
- .../AArch64/ELF_ARM64_large-relocations.s     | 20 +++++++++++++++++++
- 2 files changed, 22 insertions(+)
- create mode 100644 test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
-
-diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
-index 328f000f37c..938b35f20d1 100644
---- a/lib/MC/MCObjectFileInfo.cpp
-+++ b/lib/MC/MCObjectFileInfo.cpp
-@@ -291,6 +291,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T, bool Large) {
-     break;
-   case Triple::ppc64:
-   case Triple::ppc64le:
-+  case Triple::aarch64:
-+  case Triple::aarch64_be:
-   case Triple::x86_64:
-     FDECFIEncoding = dwarf::DW_EH_PE_pcrel |
-                      (Large ? dwarf::DW_EH_PE_sdata8 : dwarf::DW_EH_PE_sdata4);
-diff --git a/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
-new file mode 100644
-index 00000000000..66f28dabd79
---- /dev/null
-+++ b/test/ExecutionEngine/RuntimeDyld/AArch64/ELF_ARM64_large-relocations.s
-@@ -0,0 +1,20 @@
-+# RUN: llvm-mc -triple=arm64-none-linux-gnu -large-code-model -filetype=obj -o %T/large-reloc.o %s
-+# RUN: llvm-rtdyld -triple=arm64-none-linux-gnu -verify -map-section large-reloc.o,.eh_frame=0x10000 -map-section large-reloc.o,.text=0xffff000000000000 -check=%s %T/large-reloc.o
-+# RUN-BE: llvm-mc -triple=aarch64_be-none-linux-gnu -large-code-model -filetype=obj -o %T/be-large-reloc.o %s
-+# RUN-BE: llvm-rtdyld -triple=aarch64_be-none-linux-gnu -verify -map-section be-large-reloc.o,.eh_frame=0x10000 -map-section be-large-reloc.o,.text=0xffff000000000000 -check=%s %T/be-large-reloc.o
-+
-+        .text
-+        .globl  g
-+        .p2align        2
-+        .type   g,@function
-+g:
-+        .cfi_startproc
-+        mov      x0, xzr
-+        ret
-+        .Lfunc_end0:
-+        .size   g, .Lfunc_end0-g
-+        .cfi_endproc
-+
-+# Skip the CIE and load the 8 bytes PC begin pointer.
-+# Assuming the CIE and the FDE length are both 4 bytes.
-+# rtdyld-check: *{8}(section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc) = g - (section_addr(large-reloc.o, .eh_frame) + (*{4}(section_addr(large-reloc.o, .eh_frame))) + 0xc)
--- 
-2.18.0
-
diff --git a/deps/patches/llvm-D57118-powerpc.patch b/deps/patches/llvm-D57118-powerpc.patch
deleted file mode 100644
index 328fe205d1280e..00000000000000
--- a/deps/patches/llvm-D57118-powerpc.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-commit 812db527538f30ac77a19d755e24109a6db7e569
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Wed Jan 23 16:46:59 2019 -0500
-
-    [CMake][PowerPC] Recognize LLVM_NATIVE_TARGET="ppc64le" as PowerPC
-    
-    Summary:
-    This value is derived from the host triple, which on the machine
-    I'm currently using is `ppc64le-linux-redhat`. This change makes
-    LLVM compile.
-    
-    Reviewers: hfinkel
-    
-    Subscribers: nemanjai, mgorny, jsji, llvm-commits
-    
-    Differential Revision: https://reviews.llvm.org/D57118
-
-diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
-index 900c35ee4f0..b9c9757a4f6 100644
---- a/cmake/config-ix.cmake
-+++ b/cmake/config-ix.cmake
-@@ -386,6 +386,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "sparc")
-   set(LLVM_NATIVE_ARCH Sparc)
- elseif (LLVM_NATIVE_ARCH MATCHES "powerpc")
-   set(LLVM_NATIVE_ARCH PowerPC)
-+elseif (LLVM_NATIVE_ARCH MATCHES "ppc64le")
-+  set(LLVM_NATIVE_ARCH PowerPC)
- elseif (LLVM_NATIVE_ARCH MATCHES "aarch64")
-   set(LLVM_NATIVE_ARCH AArch64)
- elseif (LLVM_NATIVE_ARCH MATCHES "arm64")
diff --git a/deps/patches/llvm-D75072-SCEV-add-type.patch b/deps/patches/llvm-D75072-SCEV-add-type.patch
deleted file mode 100644
index 9a9e801e970a47..00000000000000
--- a/deps/patches/llvm-D75072-SCEV-add-type.patch
+++ /dev/null
@@ -1,415 +0,0 @@
-commit a55a3ab4dc5c66c153b2988fc4fa46b39bfc92fc
-Author: Keno Fischer <keno@juliacomputing.com>
-Date:   Mon Feb 24 14:18:22 2020 -0500
-
-    [SCEV] Record NI types in add exprs
-    
-    Summary:
-    This fixes a case where loop-reduce introduces ptrtoint/inttoptr for
-    non-integral address space pointers. Over the past several years, we
-    have gradually improved the SCEVExpander to actually do something
-    sensible for non-integral pointer types. However, that obviously
-    relies on the expander knowing what the type of the SCEV expression is.
-    That is usually the case, but there is one important case where it's
-    not: The type of an add expression is just the type of the last operand,
-    so if the non-integral pointer is not the last operand, later uses of
-    that SCEV may not realize that the given add expression contains
-    non-integral pointers and may try to expand it as integers.
-    
-    One interesting observation is that we do get away with this scheme in
-    shockingly many cases. The reason for this is that SCEV expressions
-    often have an `scUnknown` pointer base, which our sort order on the
-    operands of add expressions sort behind basically everything else,
-    so it usually ends up as the last operand.
-    
-    One situation where this fails is included as a test case. This test
-    case was bugpoint-reduced from the issue reported at
-    https://github.com/JuliaLang/julia/issues/31156. What happens here
-    is that the pointer base is an scAddRec from an outer loop, plus an
-    scUnknown integer offset. By our sort order, the scUnknown gets sorted
-    after the scAddRec pointer base, thus making an add expression of these
-    two operands have integer type. This then confuses the expander, into
-    attempting to expand the whole thing as integers, which will obviously
-    fail when reaching the non-integral pointer.
-    
-    I considered a few options to solve this, but here's what I ended up
-    settling on: The AddExpr class gains a new subclass that explicitly
-    stores the type of the expression. This subclass is used whenever one
-    of the operands is a non-integral pointer. To reduce the impact for the
-    regular case (where the SCEV expression contains no non-integral
-    pointers), a bit flag is kept in each flag expression to indicate
-    whether it is of non-integral pointer type (this should give the same
-    answer as asking if getType() is non-integral, but performing that
-    query may involve a pointer chase and requires the DataLayout). For
-    add expressions that flag is also used to indicate whether we're using
-    the subclass or not. This is slightly inefficient, because it uses
-    the subclass even in the (not uncommon) case where the last operand
-    does actually accurately reflect the non-integral pointer type. However,
-    it didn't seem worth the extra flag bit and complexity to do this
-    micro-optimization.
-    
-    I had hoped that we could additionally restrict mul exprs from
-    containing any non-integral pointers, and also require add exprs to
-    only have one operand containg such pointers (but not more), but this
-    turned out not to work. The reason for this is that SCEV wants to
-    form differences between pointers, which it represents as `A + B*-1`,
-    so we need to allow both multiplication by `-1` and addition with
-    multiple non-integral pointer arguments. I'm not super happy with
-    that situation, but I think it exposes a more general problem with
-    non-integral pointers in LLVM. We don't actually have a way to express
-    the difference between two non-integral pointers at the IR level.
-    In theory this is a problem for SCEV, because it means that we can't
-    materialize such SCEV expression. However, in practice, these
-    expressions generally have the same base pointer, so SCEV will
-    appropriately simplify them to just the integer components.
-    Nevertheless it is a bit unsatisfying. Perhaps we could have an
-    intrinsic that takes the byte difference between two pointers to the
-    same allocated object (in the same sense as is used in getelementptr),
-    which should be a sensible operation even for non-integral pointers.
-    However, given the practical considerations above, that's a project
-    for another time. For now, simply allowing the existing pointer-diff
-    pattern for non-integral pointers seems to work ok.
-    
-    Reviewers: sanjoy, reames, vtjnash, vchuravy
-    
-    Subscribers: hiraditya, javed.absar, llvm-commits
-    
-    Tags: #llvm
-    
-    Differential Revision: https://reviews.llvm.org/D75072
-
-diff --git llvm/include/llvm/Analysis/ScalarEvolution.h llvm/include/llvm/Analysis/ScalarEvolution.h
-index 0bd98ef37e7..317bdeac3f0 100644
---- llvm/include/llvm/Analysis/ScalarEvolution.h
-+++ llvm/include/llvm/Analysis/ScalarEvolution.h
-@@ -118,6 +118,19 @@ public:
-     NoWrapMask = (1 << 3) - 1
-   };
- 
-+  /// HasNonIntegralPointerFlag are bitfield indices into SubclassData.
-+  ///
-+  /// When constructing SCEV expressions for LLVM expressions with non-integral
-+  /// pointer types, some additional processing is required to ensure that we
-+  /// don't introduce any illegal transformations. However, non-integral pointer
-+  /// types are a very rarely used feature, so we want to make sure to only do
-+  /// such processing if they are actually used. To ensure minimal performance
-+  /// impact, we memoize that fact in using these flags.
-+  enum HasNonIntegralPointerFlag {
-+    FlagNoNIPointers = 0,
-+    FlagHasNIPointers = (1 << 3)
-+  };
-+
-   explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy,
-                 unsigned short ExpressionSize)
-       : FastID(ID), SCEVType(SCEVTy), ExpressionSize(ExpressionSize) {}
-@@ -154,6 +167,10 @@ public:
-     return ExpressionSize;
-   }
- 
-+  bool hasNonIntegralPointers() const {
-+    return SubclassData & FlagHasNIPointers;
-+  }
-+
-   /// Print out the internal representation of this scalar to the specified
-   /// stream.  This should really only be used for debugging purposes.
-   void print(raw_ostream &OS) const;
-@@ -747,7 +764,7 @@ public:
-                                         BasicBlock *ExitingBlock);
- 
-   /// Return the number of times the backedge executes before the given exit
--  /// would be taken; if not exactly computable, return SCEVCouldNotCompute. 
-+  /// would be taken; if not exactly computable, return SCEVCouldNotCompute.
-   /// For a single exit loop, this value is equivelent to the result of
-   /// getBackedgeTakenCount.  The loop is guaranteed to exit (via *some* exit)
-   /// before the backedge is executed (ExitCount + 1) times.  Note that there
-diff --git llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-index d008af7b7e6..39ab35a8b8c 100644
---- llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-+++ llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
-@@ -188,6 +188,13 @@ class Type;
-       return getNoWrapFlags(FlagNW) != FlagAnyWrap;
-     }
- 
-+    void setHasNIPtr(bool HasNIPtr) {
-+      if (HasNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+      else
-+        SubclassData &= ~FlagHasNIPointers;
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
-@@ -222,24 +229,54 @@ class Type;
-   class SCEVAddExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
- 
-+  protected:
-     SCEVAddExpr(const FoldingSetNodeIDRef ID,
-                 const SCEV *const *O, size_t N)
-       : SCEVCommutativeExpr(ID, scAddExpr, O, N) {}
- 
-   public:
--    Type *getType() const {
--      // Use the type of the last operand, which is likely to be a pointer
--      // type, if there is one. This doesn't usually matter, but it can help
--      // reduce casts when the expressions are expanded.
--      return getOperand(getNumOperands() - 1)->getType();
-+    /// Returns the type of the add expression, by looking either at the last
-+    /// operand or deferring to the SCEVAddNIExpr subclass for non-integral
-+    /// pointers.
-+    Type *getType() const;
-+
-+    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-+    static bool classof(const SCEV *S) { return S->getSCEVType() == scAddExpr; }
-+  };
-+
-+  /// This node represents an addition of some number of SCEVs, one which
-+  /// is a non-integral pointer type, requiring us to know the type exactly for
-+  /// correctness.
-+  class SCEVAddNIExpr : public SCEVAddExpr {
-+    friend class ScalarEvolution;
-+    PointerType *NIType;
-+
-+    SCEVAddNIExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N,
-+                  PointerType *NIType)
-+        : SCEVAddExpr(ID, O, N), NIType(NIType) {
-+      SubclassData |= FlagHasNIPointers;
-     }
- 
-+  public:
-+    Type *getType() const { return NIType; }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
--      return S->getSCEVType() == scAddExpr;
-+      return S->getSCEVType() == scAddExpr && S->hasNonIntegralPointers();
-     }
-   };
- 
-+  inline Type *SCEVAddExpr::getType() const {
-+    // In general, use the type of the last operand, which is likely to be a
-+    // pointer type, if there is one. This doesn't usually matter, but it can
-+    // help reduce casts when the expressions are expanded. In the (unusual)
-+    // case that we're working with non-integral pointers, we have a subclass
-+    // that stores that type explicitly.
-+    if (hasNonIntegralPointers())
-+      return cast<SCEVAddNIExpr>(this)->getType();
-+    return getOperand(getNumOperands() - 1)->getType();
-+  }
-+
-   /// This node represents multiplication of some number of SCEVs.
-   class SCEVMulExpr : public SCEVCommutativeExpr {
-     friend class ScalarEvolution;
-@@ -249,6 +286,18 @@ class Type;
-       : SCEVCommutativeExpr(ID, scMulExpr, O, N) {}
- 
-   public:
-+    Type *getType() const {
-+      // In general, we can't form SCEVMulExprs with non-integral pointer types,
-+      // but for the moment we need to allow a special case: Multiplying by
-+      // -1 to be able express the difference between two pointers. In order
-+      // to maintain the invariant that SCEVs with the NI flag set should have
-+      // a type corresponding to the contained NI ptr, we need to return the
-+      // type of the pointer here.
-+      if (hasNonIntegralPointers())
-+        return getOperand(getNumOperands() - 1)->getType();
-+      return SCEVCommutativeExpr::getType();
-+    }
-+
-     /// Methods for support type inquiry through isa, cast, and dyn_cast:
-     static bool classof(const SCEV *S) {
-       return S->getSCEVType() == scMulExpr;
-@@ -475,9 +524,12 @@ class Type;
-     /// instances owned by a ScalarEvolution.
-     SCEVUnknown *Next;
- 
--    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V,
--                ScalarEvolution *se, SCEVUnknown *next) :
--      SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {}
-+    SCEVUnknown(const FoldingSetNodeIDRef ID, Value *V, ScalarEvolution *se,
-+                SCEVUnknown *next, bool ValueIsNIPtr)
-+        : SCEV(ID, scUnknown, 1), CallbackVH(V), SE(se), Next(next) {
-+      if (ValueIsNIPtr)
-+        SubclassData |= FlagHasNIPointers;
-+    }
- 
-     // Implement CallbackVH.
-     void deleted() override;
-diff --git llvm/lib/Analysis/ScalarEvolution.cpp llvm/lib/Analysis/ScalarEvolution.cpp
-index bc2cfd6fcc4..2f8eb665c5d 100644
---- llvm/lib/Analysis/ScalarEvolution.cpp
-+++ llvm/lib/Analysis/ScalarEvolution.cpp
-@@ -358,12 +358,13 @@ Type *SCEV::getType() const {
-   case scSignExtend:
-     return cast<SCEVCastExpr>(this)->getType();
-   case scAddRecExpr:
--  case scMulExpr:
-   case scUMaxExpr:
-   case scSMaxExpr:
-   case scUMinExpr:
-   case scSMinExpr:
-     return cast<SCEVNAryExpr>(this)->getType();
-+  case scMulExpr:
-+    return cast<SCEVMulExpr>(this)->getType();
-   case scAddExpr:
-     return cast<SCEVAddExpr>(this)->getType();
-   case scUDivExpr:
-@@ -2441,8 +2442,9 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-   }
- 
-   // Limit recursion calls depth.
--  if (Depth > MaxArithDepth || hasHugeExpression(Ops))
-+  if (Depth > MaxArithDepth || hasHugeExpression(Ops)) {
-     return getOrCreateAddExpr(Ops, Flags);
-+  }
- 
-   // Okay, check to see if the same value occurs in the operand list more than
-   // once.  If so, merge them together into an multiply expression.  Since we
-@@ -2783,16 +2785,27 @@ ScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddExpr);
--  for (const SCEV *Op : Ops)
--    ID.AddPointer(Op);
-+  bool HasNIPtr = false;
-+  PointerType *NIPtrType = nullptr;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    ID.AddPointer(Ops[i]);
-+    if (Ops[i]->hasNonIntegralPointers()) {
-+      HasNIPtr = true;
-+      NIPtrType = cast<PointerType>(Ops[i]->getType());
-+    }
-+  }
-   void *IP = nullptr;
-   SCEVAddExpr *S =
-       static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-   if (!S) {
-     const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-     std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--    S = new (SCEVAllocator)
--        SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-+    if (HasNIPtr)
-+      S = new (SCEVAllocator)
-+          SCEVAddNIExpr(ID.Intern(SCEVAllocator), O, Ops.size(), NIPtrType);
-+    else
-+      S = new (SCEVAllocator)
-+          SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size());
-     UniqueSCEVs.InsertNode(S, IP);
-     addToLoopUseLists(S);
-   }
-@@ -2805,8 +2818,10 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-                                        const Loop *L, SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scAddRecExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    assert(i == 0 || !Ops[i]->hasNonIntegralPointers());
-     ID.AddPointer(Ops[i]);
-+  }
-   ID.AddPointer(L);
-   void *IP = nullptr;
-   SCEVAddRecExpr *S =
-@@ -2820,6 +2835,7 @@ ScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
-   return S;
- }
- 
-@@ -2828,8 +2844,11 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-                                     SCEV::NoWrapFlags Flags) {
-   FoldingSetNodeID ID;
-   ID.AddInteger(scMulExpr);
--  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-+  bool HasNIPtr = false;
-+  for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-+    HasNIPtr |= Ops[i]->hasNonIntegralPointers();
-     ID.AddPointer(Ops[i]);
-+  }
-   void *IP = nullptr;
-   SCEVMulExpr *S =
-     static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP));
-@@ -2842,6 +2861,7 @@ ScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops,
-     addToLoopUseLists(S);
-   }
-   S->setNoWrapFlags(Flags);
-+  S->setHasNIPtr(HasNIPtr);
-   return S;
- }
- 
-@@ -3666,8 +3686,11 @@ const SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind,
-     return ExistingSCEV;
-   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
-   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
--  SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr(
-+  SCEVMinMaxExpr *S = new (SCEVAllocator) SCEVMinMaxExpr(
-       ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size());
-+  // For MinMaxExprs it's sufficient to see if the first Op has NI data, as the
-+  // operands all need to be of the same type.
-+  S->setHasNIPtr(Ops[0]->hasNonIntegralPointers());
- 
-   UniqueSCEVs.InsertNode(S, IP);
-   addToLoopUseLists(S);
-@@ -3744,8 +3767,9 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
-            "Stale SCEVUnknown in uniquing map!");
-     return S;
-   }
-+  bool ValueIsNIPtr = getDataLayout().isNonIntegralPointerType(V->getType());
-   SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this,
--                                            FirstUnknown);
-+                                            FirstUnknown, ValueIsNIPtr);
-   FirstUnknown = cast<SCEVUnknown>(S);
-   UniqueSCEVs.InsertNode(S, IP);
-   return S;
-diff --git llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-index 5648e3aa74a..6936521f3a6 100644
---- llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-+++ llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll
-@@ -2,7 +2,7 @@
- 
- ; Address Space 10 is non-integral. The optimizer is not allowed to use
- ; ptrtoint/inttoptr instructions. Make sure that this doesn't happen
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @japi1__unsafe_getindex_65028(i64 addrspace(10)* %arg) {
-@@ -43,3 +43,36 @@ if38:                                             ; preds = %L119
- done:                                             ; preds = %if38
-   ret void
- }
-+
-+; This is a bugpoint-reduced regression test - It doesn't make too much sense by itself,
-+; but creates the correct SCEV expressions to reproduce the issue. See
-+; https://github.com/JuliaLang/julia/issues/31156 for the original bug report.
-+define void @"japi1_permutedims!_4259"(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i1 %g, i8 addrspace(13)* %base) #0 {
-+; CHECK-NOT: inttoptr
-+; CHECK-NOT: ptrtoint
-+; CHECK: getelementptr i8, i8 addrspace(13)* {{.*}}, i64 {{.*}}
-+top:
-+  br label %L42.L46_crit_edge.us
-+
-+L42.L46_crit_edge.us:                             ; preds = %L82.us.us.loopexit, %top
-+  %value_phi11.us = phi i64 [ %a, %top ], [ %2, %L82.us.us.loopexit ]
-+  %0 = sub i64 %value_phi11.us, %b
-+  %1 = add i64 %0, %c
-+  %spec.select = select i1 %g, i64 %d, i64 0
-+  br label %L62.us.us
-+
-+L82.us.us.loopexit:                               ; preds = %L62.us.us
-+  %2 = add i64 %e, %value_phi11.us
-+  br label %L42.L46_crit_edge.us
-+
-+L62.us.us:                                        ; preds = %L62.us.us, %L42.L46_crit_edge.us
-+  %value_phi21.us.us = phi i64 [ %6, %L62.us.us ], [ %spec.select, %L42.L46_crit_edge.us ]
-+  %3 = add i64 %1, %value_phi21.us.us
-+  %4 = getelementptr inbounds i8, i8 addrspace(13)* %base, i64 %3
-+  %5 = load i8, i8 addrspace(13)* %4, align 1
-+  %6 = add i64 %f, %value_phi21.us.us
-+  br i1 %g, label %L82.us.us.loopexit, label %L62.us.us, !llvm.loop !1
-+}
-+
-+!1 = distinct !{!1, !2}
-+!2 = !{!"llvm.loop.isvectorized", i32 1}
diff --git a/deps/patches/llvm-D80101.patch b/deps/patches/llvm-D80101.patch
deleted file mode 100644
index f8b3ff7c643ada..00000000000000
--- a/deps/patches/llvm-D80101.patch
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
-index 9d4fdc6b624..14b52e0ca33 100644
---- a/lib/CodeGen/StackColoring.cpp
-+++ b/lib/CodeGen/StackColoring.cpp
-@@ -913,6 +913,8 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
-     assert(To && From && "Invalid allocation object");
-     Allocas[From] = To;
-
-+    const_cast<AllocaInst*>(To)->moveBefore(const_cast<Instruction*>(&*To->getParent()->getFirstInsertionPt()));
-+
-     // AA might be used later for instruction scheduling, and we need it to be
-     // able to deduce the correct aliasing releationships between pointers
-     // derived from the alloca being remapped and the target of that remapping.
diff --git a/deps/patches/llvm-D84031.patch b/deps/patches/llvm-D84031.patch
deleted file mode 100644
index 60fea4e41b9596..00000000000000
--- a/deps/patches/llvm-D84031.patch
+++ /dev/null
@@ -1,10 +0,0 @@
---- a/lib/CodeGen/CodeGenPrepare.cpp
-+++ b/lib/CodeGen/CodeGenPrepare.cpp
-@@ -540,6 +540,7 @@
-     LargeOffsetGEPID.clear();
-   }
- 
-+  NewGEPBases.clear();
-   SunkAddrs.clear();
- 
-   if (!DisableBranchOpts) {
diff --git a/deps/patches/llvm-exegesis-mingw.patch b/deps/patches/llvm-exegesis-mingw.patch
deleted file mode 100644
index ff11f4da8231e9..00000000000000
--- a/deps/patches/llvm-exegesis-mingw.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 9ba86352649a39b03adce98670714c4c8eb5341d Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Wed, 24 Jul 2019 21:19:20 -0400
-Subject: [PATCH] Fix build of llvm-exegis on mingw32
-
----
- llvm/tools/llvm-exegesis/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/llvm-exegesis/CMakeLists.txt b/tools/llvm-exegesis/CMakeLists.txt
-index a59e1b74024..7a30e0ea98f 100644
---- a/tools/llvm-exegesis/CMakeLists.txt
-+++ b/tools/llvm-exegesis/CMakeLists.txt
-@@ -4,7 +4,7 @@ set(LLVM_LINK_COMPONENTS
-   native
-   )
- 
--add_llvm_tool(llvm-exegesis
-+add_llvm_tool(llvm-exegesis DISABLE_LLVM_LINK_LLVM_DYLIB
-   llvm-exegesis.cpp
-   )
- 
--- 
-2.22.0
diff --git a/deps/patches/llvm-julia-tsan-custom-as.patch b/deps/patches/llvm-julia-tsan-custom-as.patch
deleted file mode 100644
index a6f8a42ad2e32d..00000000000000
--- a/deps/patches/llvm-julia-tsan-custom-as.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From bd41be423127b8946daea805290ad2eb19e66be4 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Sat, 19 May 2018 11:56:55 -0400
-Subject: [PATCH] [TSAN] Allow for custom address spaces
-
-Julia uses addressspaces for GC and we want these to be sanitized as well.
----
- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
-index ec6904486e1..9d673353f43 100644
---- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
-+++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
-@@ -296,7 +296,9 @@ static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) {
-   // with them.
-   if (Addr) {
-     Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
--    if (PtrTy->getPointerAddressSpace() != 0)
-+    auto AS = PtrTy->getPointerAddressSpace();
-+    // Allow for custom addresspaces
-+    if (AS != 0 && AS < 10)
-       return false;
-   }
- 
--- 
-2.17.0
-
diff --git a/deps/patches/llvm-symver-jlprefix.patch b/deps/patches/llvm-symver-jlprefix.patch
deleted file mode 100644
index 59872380ad0737..00000000000000
--- a/deps/patches/llvm-symver-jlprefix.patch
+++ /dev/null
@@ -1,18 +0,0 @@
-From f23277bb91a4925ba8763337137a3123a7600557 Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Tue, 16 Jan 2018 17:29:05 -0500
-Subject: [PATCH] add JL prefix to all LLVM version suffixes
-
----
- tools/llvm-shlib/simple_version_script.map.in | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/tools/llvm-shlib/simple_version_script.map.in b/tools/llvm-shlib/simple_version_script.map.in
-index e9515fe7862..af082581627 100644
---- a/tools/llvm-shlib/simple_version_script.map.in
-+++ b/tools/llvm-shlib/simple_version_script.map.in
-@@ -1 +1 @@
--LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; };
-+JL_LLVM_@LLVM_VERSION_MAJOR@.@LLVM_VERSION_MINOR@ { global: *; };
---
-2.15.1
diff --git a/deps/patches/llvm-test-plugin-mingw.patch b/deps/patches/llvm-test-plugin-mingw.patch
deleted file mode 100644
index 14cf07166d8610..00000000000000
--- a/deps/patches/llvm-test-plugin-mingw.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 9bd3774db73533c8df475639805ff1516aea274c Mon Sep 17 00:00:00 2001
-From: Valentin Churavy <v.churavy@gmail.com>
-Date: Wed, 24 Jul 2019 21:45:33 -0400
-Subject: [PATCH] add missing components to TestPlugin
-
----
- llvm/unittests/Passes/CMakeLists.txt | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/unittests/Passes/CMakeLists.txt b/unittests/Passes/CMakeLists.txt
-index 3e83b527958..4b09f47c234 100644
---- a/unittests/Passes/CMakeLists.txt
-+++ b/unittests/Passes/CMakeLists.txt
-@@ -14,7 +14,7 @@ add_llvm_unittest(PluginsTests
- export_executable_symbols(PluginsTests)
- target_link_libraries(PluginsTests PRIVATE LLVMTestingSupport)
- 
--set(LLVM_LINK_COMPONENTS)
-+set(LLVM_LINK_COMPONENTS Support Passes Core)
- add_llvm_library(TestPlugin MODULE BUILDTREE_ONLY
-   TestPlugin.cpp
-   )
--- 
-2.22.0
diff --git a/deps/patches/llvm7-D50010-VNCoercion-ni.patch b/deps/patches/llvm7-D50010-VNCoercion-ni.patch
deleted file mode 100644
index 729c4185128c9e..00000000000000
--- a/deps/patches/llvm7-D50010-VNCoercion-ni.patch
+++ /dev/null
@@ -1,67 +0,0 @@
-diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
-index 948d9bd5baa..fbd5b9bb3be 100644
---- a/lib/Transforms/Utils/VNCoercion.cpp
-+++ b/lib/Transforms/Utils/VNCoercion.cpp
-@@ -20,7 +20,8 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
-       StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
-     return false;
- 
--  uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
-+  Type *StoredValTy = StoredVal->getType();
-+  uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy);
- 
-   // The store size must be byte-aligned to support future type casts.
-   if (llvm::alignTo(StoreSize, 8) != StoreSize)
-@@ -30,10 +31,15 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
-   if (StoreSize < DL.getTypeSizeInBits(LoadTy))
-     return false;
- 
--  // Don't coerce non-integral pointers to integers or vice versa.
--  if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
--      DL.isNonIntegralPointerType(LoadTy))
-+  bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
-+  bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
-+  if (StoredNI != LoadNI) {
-+    return false;
-+  } else if (StoredNI && LoadNI &&
-+             cast<PointerType>(StoredValTy)->getAddressSpace() !=
-+                 cast<PointerType>(LoadTy)->getAddressSpace()) {
-     return false;
-+  }
- 
-   return true;
- }
-diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
-index 9ae4132231d..5217fc1a06a 100644
---- a/test/Transforms/GVN/non-integral-pointers.ll
-+++ b/test/Transforms/GVN/non-integral-pointers.ll
-@@ -1,6 +1,6 @@
- ; RUN: opt -gvn -S < %s | FileCheck %s
- 
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
-@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
-  alwaysTaken:
-   ret i64 42
- }
-+
-+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
-+ ; CHECK-LABEL: @multini(
-+ ; CHECK-NOT: inttoptr
-+ ; CHECK-NOT: ptrtoint
-+ ; CHECK-NOT: addrspacecast
-+  entry:
-+   store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
-+   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
-+
-+  neverTaken:
-+   %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
-+   %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
-+   ret i8 addrspace(5)* %differentas
-+
-+  alwaysTaken:
-+   ret i8 addrspace(5)* null
-+ }
diff --git a/deps/patches/llvm7-revert-D44485.patch b/deps/patches/llvm7-revert-D44485.patch
deleted file mode 100644
index 121e1974f5f4f6..00000000000000
--- a/deps/patches/llvm7-revert-D44485.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From 4370214628487ac8495f963ae05960b5ecc31103 Mon Sep 17 00:00:00 2001
-From: Jameson Nash <vtjnash@gmail.com>
-Date: Thu, 12 Sep 2019 11:45:07 -0400
-Subject: [PATCH] Revert "[MC] Always emit relocations for same-section
- function references"
-
-This reverts commit 9232972575cafac29c3e4817c8714c9aca0e8585.
----
- lib/MC/WinCOFFObjectWriter.cpp | 12 +++++-------
- test/MC/COFF/diff.s            | 25 ++++++++-----------------
- 2 files changed, 13 insertions(+), 24 deletions(-)
-
-diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
-index 9ffecd99df6..0214161e03c 100644
---- a/lib/MC/WinCOFFObjectWriter.cpp
-+++ b/lib/MC/WinCOFFObjectWriter.cpp
-@@ -690,14 +690,12 @@ void WinCOFFObjectWriter::executePostLayoutBinding(MCAssembler &Asm,
- bool WinCOFFObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(
-     const MCAssembler &Asm, const MCSymbol &SymA, const MCFragment &FB,
-     bool InSet, bool IsPCRel) const {
--  // Don't drop relocations between functions, even if they are in the same text
--  // section. Multiple Visual C++ linker features depend on having the
--  // relocations present. The /INCREMENTAL flag will cause these relocations to
--  // point to thunks, and the /GUARD:CF flag assumes that it can use relocations
--  // to approximate the set of all address taken functions. LLD's implementation
--  // of /GUARD:CF also relies on the existance of these relocations.
-+  // MS LINK expects to be able to replace all references to a function with a
-+  // thunk to implement their /INCREMENTAL feature.  Make sure we don't optimize
-+  // away any relocations to functions.
-   uint16_t Type = cast<MCSymbolCOFF>(SymA).getType();
--  if ((Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
-+  if (Asm.isIncrementalLinkerCompatible() &&
-+      (Type >> COFF::SCT_COMPLEX_TYPE_SHIFT) == COFF::IMAGE_SYM_DTYPE_FUNCTION)
-     return false;
-   return MCObjectWriter::isSymbolRefDifferenceFullyResolvedImpl(Asm, SymA, FB,
-                                                                 InSet, IsPCRel);
-diff --git a/test/MC/COFF/diff.s b/test/MC/COFF/diff.s
-index f89e4ed8901..d68e628577b 100644
---- a/test/MC/COFF/diff.s
-+++ b/test/MC/COFF/diff.s
-@@ -1,14 +1,19 @@
- // RUN: llvm-mc -filetype=obj -triple i686-pc-mingw32 %s | llvm-readobj -s -sr -sd | FileCheck %s
- 
--// COFF resolves differences between labels in the same section, unless that
--// label is declared with function type.
--
- .section baz, "xr"
-+	.def	X
-+	.scl	2;
-+	.type	32;
-+	.endef
- 	.globl	X
- X:
- 	mov	Y-X+42,	%eax
- 	retl
- 
-+	.def	Y
-+	.scl	2;
-+	.type	32;
-+	.endef
- 	.globl	Y
- Y:
- 	retl
-@@ -25,11 +30,6 @@ _foobar:                                # @foobar
- # %bb.0:
- 	ret
- 
--	.globl	_baz
--_baz:
--	calll	_foobar
--	retl
--
- 	.data
- 	.globl	_rust_crate             # @rust_crate
- 	.align	4
-@@ -39,15 +39,6 @@ _rust_crate:
- 	.long	_foobar-_rust_crate
- 	.long	_foobar-_rust_crate
- 
--// Even though _baz and _foobar are in the same .text section, we keep the
--// relocation for compatibility with the VC linker's /guard:cf and /incremental
--// flags, even on mingw.
--
--// CHECK:        Name: .text
--// CHECK:        Relocations [
--// CHECK-NEXT:     0x12 IMAGE_REL_I386_REL32 _foobar
--// CHECK-NEXT:   ]
--
- // CHECK:        Name: .data
- // CHECK:        Relocations [
- // CHECK-NEXT:     0x4 IMAGE_REL_I386_DIR32 _foobar
--- 
-2.17.1
-
diff --git a/deps/patches/llvm7-windows-race.patch b/deps/patches/llvm7-windows-race.patch
deleted file mode 100644
index b6ae6bae43ba42..00000000000000
--- a/deps/patches/llvm7-windows-race.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-diff --git a/tools/llvm-config/CMakeLists.txt b/tools/llvm-config/CMakeLists.txt
-index f59402ac4b0..5de4c6febe7 100644
---- a/tools/llvm-config/CMakeLists.txt
-+++ b/tools/llvm-config/CMakeLists.txt
-@@ -77,5 +77,10 @@ if(CMAKE_CROSSCOMPILING AND NOT LLVM_CONFIG_PATH)
-   add_custom_target(NativeLLVMConfig DEPENDS ${LLVM_CONFIG_PATH})
-   add_dependencies(NativeLLVMConfig CONFIGURE_LLVM_NATIVE)
- 
-+  # Add a dependency on the host tblgen, which uses the same working
-+  # directory and with which we're otherwise racing to build some
-+  # of the utility libraries.
-+  add_dependencies(NativeLLVMConfig LLVM-tablegen-host)
-+
-   add_dependencies(llvm-config NativeLLVMConfig)
- endif()
diff --git a/deps/patches/llvm8-D34078-vectorize-fdiv.patch b/deps/patches/llvm8-D34078-vectorize-fdiv.patch
deleted file mode 100644
index c386d04b853761..00000000000000
--- a/deps/patches/llvm8-D34078-vectorize-fdiv.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
-index aaebc4a481e..91fe4c0003c 100644
---- a/lib/Analysis/IVDescriptors.cpp
-+++ b/lib/Analysis/IVDescriptors.cpp
-@@ -571,6 +571,7 @@ RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
-     return InstDesc(Kind == RK_IntegerOr, I);
-   case Instruction::Xor:
-     return InstDesc(Kind == RK_IntegerXor, I);
-+  case Instruction::FDiv:
-   case Instruction::FMul:
-     return InstDesc(Kind == RK_FloatMult, I, UAI);
-   case Instruction::FSub:
-diff --git a/test/Transforms/LoopVectorize/float-reduction.ll b/test/Transforms/LoopVectorize/float-reduction.ll
-index f3b95d0ead7..669c54d55a2 100644
---- a/test/Transforms/LoopVectorize/float-reduction.ll
-+++ b/test/Transforms/LoopVectorize/float-reduction.ll
-@@ -44,3 +44,25 @@ for.body:                                         ; preds = %for.body, %entry
- for.end:                                          ; preds = %for.body
-   ret float %sub
- }
-+
-+;CHECK-LABEL: @foodiv(
-+;CHECK: fdiv fast <4 x float>
-+;CHECK: ret
-+define float @foodiv(float* nocapture %A, i32* nocapture %n) nounwind uwtable readonly ssp {
-+entry:
-+  br label %for.body
-+
-+for.body:                                         ; preds = %for.body, %entry
-+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-+  %sum.04 = phi float [ 1.000000e+00, %entry ], [ %sub, %for.body ]
-+  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-+  %0 = load float, float* %arrayidx, align 4
-+  %sub = fdiv fast float %sum.04, %0
-+  %indvars.iv.next = add i64 %indvars.iv, 1
-+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-+  %exitcond = icmp eq i32 %lftr.wideiv, 200
-+  br i1 %exitcond, label %for.end, label %for.body
-+
-+for.end:                                          ; preds = %for.body
-+  ret float %sub
-+}
diff --git a/deps/patches/llvm9-D50010-VNCoercion-ni.patch b/deps/patches/llvm9-D50010-VNCoercion-ni.patch
deleted file mode 100644
index 988d669fe08fc4..00000000000000
--- a/deps/patches/llvm9-D50010-VNCoercion-ni.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
---- a/lib/Transforms/Utils/VNCoercion.cpp
-+++ b/lib/Transforms/Utils/VNCoercion.cpp
-@@ -34,17 +34,22 @@
-   if (StoreSize < DL.getTypeSizeInBits(LoadTy))
-     return false;
- 
-+  bool StoredNI = DL.isNonIntegralPointerType(StoredTy->getScalarType());
-+  bool LoadNI = DL.isNonIntegralPointerType(LoadTy->getScalarType());
-   // Don't coerce non-integral pointers to integers or vice versa.
--  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
--      DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
-+  if (StoredNI != LoadNI) {
-     // As a special case, allow coercion of memset used to initialize
-     // an array w/null.  Despite non-integral pointers not generally having a
-     // specific bit pattern, we do assume null is zero.
-     if (auto *CI = dyn_cast<Constant>(StoredVal))
-       return CI->isNullValue();
-     return false;
-+  } else if (StoredNI && LoadNI &&
-+             cast<PointerType>(StoredTy)->getAddressSpace() !=
-+                 cast<PointerType>(LoadTy)->getAddressSpace()) {
-+    return false;
-   }
--  
-+
-   return true;
- }
- 
-diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
---- a/test/Transforms/GVN/non-integral-pointers.ll
-+++ b/test/Transforms/GVN/non-integral-pointers.ll
-@@ -1,7 +1,7 @@
- ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
- ; RUN: opt -gvn -S < %s | FileCheck %s
- 
--target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
-+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
- target triple = "x86_64-unknown-linux-gnu"
- 
- define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
-@@ -285,3 +285,21 @@
-   %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
-   ret i8 addrspace(4)* %ref
- }
-+
-+ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
-+ ; CHECK-LABEL: @multini(
-+ ; CHECK-NOT: inttoptr
-+ ; CHECK-NOT: ptrtoint
-+ ; CHECK-NOT: addrspacecast
-+  entry:
-+   store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
-+   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
-+
-+  neverTaken:
-+   %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
-+   %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
-+   ret i8 addrspace(5)* %differentas
-+
-+  alwaysTaken:
-+   ret i8 addrspace(5)* null
-+ }
-
diff --git a/deps/patches/llvm9-D71443-PPC-MC-redef-symbol.patch b/deps/patches/llvm9-D71443-PPC-MC-redef-symbol.patch
deleted file mode 100644
index 904514a60f83f3..00000000000000
--- a/deps/patches/llvm9-D71443-PPC-MC-redef-symbol.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-From 5cd52dbfa9c60cfd12676924bed97701ee9bc4ef Mon Sep 17 00:00:00 2001
-From: Fangrui Song <maskray@google.com>
-Date: Thu, 12 Dec 2019 16:18:57 -0800
-Subject: [PATCH] [MC][PowerPC] Fix a crash when redefining a symbol after .set
-
-Fix PR44284. This is probably not valid assembly but we should not crash.
-
-Reviewed By: luporl, #powerpc, steven.zhang
-
-Differential Revision: https://reviews.llvm.org/D71443
-
-(cherry picked from commit f99eedeb72644671cd584f48e4c136d47f6b0020)
----
- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 3 ++-
- llvm/test/MC/PowerPC/ppc64-localentry-symbols.s          | 5 +++++
- 2 files changed, 7 insertions(+), 1 deletion(-)
-
-diff --git llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-index 90c3c8d20ed..71f926c265e 100644
---- llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-+++ llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
-@@ -196,7 +196,8 @@ public:
- 
-   void finish() override {
-     for (auto *Sym : UpdateOther)
--      copyLocalEntry(Sym, Sym->getVariableValue());
-+      if (Sym->isVariable())
-+        copyLocalEntry(Sym, Sym->getVariableValue());
-   }
- 
- private:
-diff --git llvm/test/MC/PowerPC/ppc64-localentry-symbols.s llvm/test/MC/PowerPC/ppc64-localentry-symbols.s
-index f1d5c5d0ab1..a663af57ad4 100644
---- llvm/test/MC/PowerPC/ppc64-localentry-symbols.s
-+++ llvm/test/MC/PowerPC/ppc64-localentry-symbols.s
-@@ -32,3 +32,8 @@ func:
-   nop
-   nop
-   .localentry func, 8
-+
-+## PR44284 Don't crash if err is redefined after .set
-+.set err, _err
-+.globl err
-+err:
--- 
-2.26.0
-
diff --git a/deps/patches/neoverse-generic-kernels.patch b/deps/patches/neoverse-generic-kernels.patch
new file mode 100644
index 00000000000000..ab37e3783bf3e5
--- /dev/null
+++ b/deps/patches/neoverse-generic-kernels.patch
@@ -0,0 +1,19 @@
+diff --git a/kernel/arm64/KERNEL.NEOVERSEN1 b/kernel/arm64/KERNEL.NEOVERSEN1
+index ea010db4..074d7215 100644
+--- a/kernel/arm64/KERNEL.NEOVERSEN1
++++ b/kernel/arm64/KERNEL.NEOVERSEN1
+@@ -91,10 +91,10 @@ IDAMAXKERNEL   = iamax_thunderx2t99.c
+ ICAMAXKERNEL   = izamax_thunderx2t99.c
+ IZAMAXKERNEL   = izamax_thunderx2t99.c
+ 
+-SNRM2KERNEL    = scnrm2_thunderx2t99.c
+-DNRM2KERNEL    = dznrm2_thunderx2t99.c
+-CNRM2KERNEL    = scnrm2_thunderx2t99.c
+-ZNRM2KERNEL    = dznrm2_thunderx2t99.c
++SNRM2KERNEL    = nrm2.S
++DNRM2KERNEL    = nrm2.S
++CNRM2KERNEL    = znrm2.S
++ZNRM2KERNEL    = znrm2.S
+ 
+ DDOTKERNEL     = dot_thunderx2t99.c
+ SDOTKERNEL     = dot_thunderx2t99.c
diff --git a/deps/patches/openblas-ofast-power.patch b/deps/patches/openblas-ofast-power.patch
index 3d777eb2c8f7a3..405e3f7581331a 100644
--- a/deps/patches/openblas-ofast-power.patch
+++ b/deps/patches/openblas-ofast-power.patch
@@ -1,33 +1,31 @@
- Makefile.power | 8 ++++----
- 1 file changed, 4 insertions(+), 4 deletions(-)
-
 diff --git a/Makefile.power b/Makefile.power
-index 24d8aa8a..e53a243a 100644
+index 28a0bae0..b4869fbd 100644
 --- a/Makefile.power
 +++ b/Makefile.power
-@@ -11,20 +11,20 @@ endif
+@@ -11,7 +11,7 @@ endif
  
- ifeq ($(CORE), POWER9)
- ifeq ($(USE_OPENMP), 1)
--COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
-+COMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
- FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
+ ifeq ($(CORE), POWER10)
+ ifneq ($(C_COMPILER), PGI)
+-CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
++CCOMMON_OPT += -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
+ ifeq ($(F_COMPILER), IBM)
+ FCOMMON_OPT += -O2 -qrecur -qnosave
  else
--COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
-+COMMON_OPT += -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
- FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
- endif
- endif
+@@ -22,7 +22,7 @@ endif
+ 
+ ifeq ($(CORE), POWER9)
+ ifneq ($(C_COMPILER), PGI)
+-CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
++CCOMMON_OPT += -mvsx -fno-fast-math
+ ifeq ($(C_COMPILER), GCC)
+ ifneq ($(GCCVERSIONGT4), 1)
+ $(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
+@@ -59,7 +59,7 @@ endif
  
  ifeq ($(CORE), POWER8)
- ifeq ($(USE_OPENMP), 1)
--COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
-+COMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
- FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
+ ifneq ($(C_COMPILER), PGI)
+-CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx  -fno-fast-math
++CCOMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx  -fno-fast-math
  else
--COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
-+COMMON_OPT += -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
- FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
- endif
+ CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
  endif
-
diff --git a/deps/patches/openblas-winexit.patch b/deps/patches/openblas-winexit.patch
index 8db2fbf0cb09b8..33389f34a40742 100644
--- a/deps/patches/openblas-winexit.patch
+++ b/deps/patches/openblas-winexit.patch
@@ -1,16 +1,8 @@
-From f919c3301fabbaa5d965dcc7b1c3d6892a8c730a Mon Sep 17 00:00:00 2001
-From: Keno Fischer <keno@juliacomputing.com>
-Date: Sat, 14 Mar 2020 12:05:19 +0100
-
----
- driver/others/memory.c | 131 +----------------------------------------
- 1 file changed, 2 insertions(+), 129 deletions(-)
-
 diff --git a/driver/others/memory.c b/driver/others/memory.c
-index 62a5a021..23f8fe65 100644
+index 6e654ccf..1d2f9f12 100644
 --- a/driver/others/memory.c
 +++ b/driver/others/memory.c
-@@ -1510,7 +1510,7 @@ void CONSTRUCTOR gotoblas_init(void) {
+@@ -1534,7 +1534,7 @@ void CONSTRUCTOR gotoblas_init(void) {
  
  }
  
@@ -19,11 +11,10 @@ index 62a5a021..23f8fe65 100644
  
    if (gotoblas_initialized == 0) return;
  
-@@ -1547,74 +1547,12 @@ void DESTRUCTOR gotoblas_quit(void) {
- #endif
+@@ -1572,75 +1572,11 @@ void DESTRUCTOR gotoblas_quit(void) {
  }
  
--#if defined(_MSC_VER) && !defined(__clang__)
+ #if defined(_MSC_VER) && !defined(__clang__)
 -BOOL APIENTRY DllMain(HMODULE hModule, DWORD  ul_reason_for_call, LPVOID lpReserved)
 -{
 -  switch (ul_reason_for_call)
@@ -57,24 +48,26 @@ index 62a5a021..23f8fe65 100644
 -*/
 -static int on_process_term(void)
 -{
--	gotoblas_quit();
--	return 0;
+-  gotoblas_quit();
+-  return 0;
 -}
  #ifdef _WIN64
  #pragma comment(linker, "/INCLUDE:_tls_used")
  #else
  #pragma comment(linker, "/INCLUDE:__tls_used")
  #endif
- 
+-
 -#ifdef _WIN64
 -#pragma const_seg(".CRT$XLB")
 -#else
 -#pragma data_seg(".CRT$XLB")
 -#endif
--static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
+-
 -#ifdef _WIN64
+-static const PIMAGE_TLS_CALLBACK dll_callback(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
 -#pragma const_seg()
 -#else
+-static void (APIENTRY *dll_callback)(HINSTANCE h, DWORD ul_reason_for_call, PVOID pv) = DllMain;
 -#pragma data_seg()
 -#endif
 -
@@ -83,18 +76,18 @@ index 62a5a021..23f8fe65 100644
 -#else
 -#pragma data_seg(".CRT$XTU")
 -#endif
--static int(*p_process_term)(void) = on_process_term;
+-
 -#ifdef _WIN64
+-static const int(*p_process_term)(void) = on_process_term;
 -#pragma const_seg()
 -#else
+-static int(*p_process_term)(void) = on_process_term;
 -#pragma data_seg()
 -#endif
--#endif
--
+ #endif
+ 
  #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
- /* Don't call me; this is just work around for PGI / Sun bug */
- void gotoblas_dummy_for_PGI(void) {
-@@ -3104,7 +3042,7 @@ void CONSTRUCTOR gotoblas_init(void) {
+@@ -3146,7 +3082,7 @@ void CONSTRUCTOR gotoblas_init(void) {
  
  }
  
@@ -103,7 +96,7 @@ index 62a5a021..23f8fe65 100644
  
    if (gotoblas_initialized == 0) return;
  
-@@ -3133,71 +3071,6 @@ void DESTRUCTOR gotoblas_quit(void) {
+@@ -3175,71 +3111,6 @@ void DESTRUCTOR gotoblas_quit(void) {
  #endif
  }
  
@@ -138,8 +131,8 @@ index 62a5a021..23f8fe65 100644
 -*/
 -static int on_process_term(void)
 -{
--	gotoblas_quit();
--	return 0;
+-  gotoblas_quit();
+-  return 0;
 -}
 -#ifdef _WIN64
 -#pragma comment(linker, "/INCLUDE:_tls_used")
@@ -175,4 +168,3 @@ index 62a5a021..23f8fe65 100644
  #if (defined(C_PGI) || (!defined(C_SUN) && defined(F_INTERFACE_SUN))) && (defined(ARCH_X86) || defined(ARCH_X86_64))
  /* Don't call me; this is just work around for PGI / Sun bug */
  void gotoblas_dummy_for_PGI(void) {
-
diff --git a/deps/patches/pcre2-sljit-apple-silicon-support.patch b/deps/patches/pcre2-sljit-apple-silicon-support.patch
new file mode 100644
index 00000000000000..3aff832ca08fd8
--- /dev/null
+++ b/deps/patches/pcre2-sljit-apple-silicon-support.patch
@@ -0,0 +1,244 @@
+From e87e1ccf93768238db3d6e28d0272980dba707fa Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
+Date: Mon, 30 Nov 2020 01:35:13 -0800
+Subject: [PATCH] macos: add BigSur support to execalloc (#90)
+
+Apple Silicon requires that pages that will hold JIT code are
+marked with MAP_JIT (even if not using the hardened runtime)
+and that a call be made to a pthread function before writing
+to them, so a special exception could be made to the current
+thread[1]; add support for both.
+
+since the allocator keeps the metadata about chunk/block in the
+executable pages, all functions that modify that metadata will
+also need to be updated.
+
+note that since there is no need for an accurate pointer range
+with the apple implementation, NULL is passed for the pointers.
+
+historically, adding MAP_JIT was only recommended when the hardened
+runtime was being used as it adds several undocumented restrictions
+(like not being able to use JIT pages accross fork()) so the
+new codepath won't be used if running in Intel.
+
+Tested-by: @Keno
+Fixes: #51
+
+[1] https://developer.apple.com/documentation/apple_silicon/porting_just-in-time_compilers_to_apple_silicon?language=objc
+---
+ sljit_src/sljitExecAllocator.c | 113 ++++++++++++++++++---------------
+ 1 file changed, 63 insertions(+), 50 deletions(-)
+
+diff --git a/sljit_src/sljitExecAllocator.c b/sljit_src/sljitExecAllocator.c
+index 61a32f2..2e1c138 100644
+--- a/sljit_src/sljitExecAllocator.c
++++ b/sljit_src/sljitExecAllocator.c
+@@ -79,6 +79,7 @@
+ */
+ 
+ #ifdef _WIN32
++#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
+ 
+ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+ {
+@@ -91,65 +92,76 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+ 	VirtualFree(chunk, 0, MEM_RELEASE);
+ }
+ 
+-#else
+-
+-#ifdef __APPLE__
+-#ifdef MAP_ANON
+-/* Configures TARGET_OS_OSX when appropriate */
+-#include <TargetConditionals.h>
+-
+-#if TARGET_OS_OSX && defined(MAP_JIT)
+-#include <sys/utsname.h>
+-#endif /* TARGET_OS_OSX && MAP_JIT */
+-
+-#ifdef MAP_JIT
++#else /* POSIX */
+ 
++#if defined(__APPLE__) && defined(MAP_JIT)
+ /*
+    On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a
+-   version where it's OK to have more than one JIT block.
++   version where it's OK to have more than one JIT block or where MAP_JIT is
++   required.
+    On non-macOS systems, returns MAP_JIT if it is defined.
+ */
++#include <TargetConditionals.h>
++#if TARGET_OS_OSX
++#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86
++#ifdef MAP_ANON
++#include <sys/utsname.h>
++#include <stdlib.h>
++
++#define SLJIT_MAP_JIT	(get_map_jit_flag())
++
+ static SLJIT_INLINE int get_map_jit_flag()
+ {
+-#if TARGET_OS_OSX
+-	sljit_sw page_size = get_page_alignment() + 1;
++	sljit_sw page_size;
+ 	void *ptr;
++	struct utsname name;
+ 	static int map_jit_flag = -1;
+ 
+-	/*
+-	  The following code is thread safe because multiple initialization
+-	  sets map_jit_flag to the same value and the code has no side-effects.
+-	  Changing the kernel version witout system restart is (very) unlikely.
+-	*/
+-	if (map_jit_flag == -1) {
+-		struct utsname name;
+-
++	if (map_jit_flag < 0) {
+ 		map_jit_flag = 0;
+ 		uname(&name);
+ 
+-		/* Kernel version for 10.14.0 (Mojave) */
++		/* Kernel version for 10.14.0 (Mojave) or later */
+ 		if (atoi(name.release) >= 18) {
++			page_size = get_page_alignment() + 1;
+ 			/* Only use MAP_JIT if a hardened runtime is used */
++			ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC,
++					MAP_PRIVATE | MAP_ANON, -1, 0);
+ 
+-			ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+-
+-			if (ptr == MAP_FAILED) {
+-				map_jit_flag = MAP_JIT;
+-			} else {
++			if (ptr != MAP_FAILED)
+ 				munmap(ptr, page_size);
+-			}
++			else
++				map_jit_flag = MAP_JIT;
+ 		}
+ 	}
+-
+ 	return map_jit_flag;
+-#else /* !TARGET_OS_OSX */
+-	return MAP_JIT;
+-#endif /* TARGET_OS_OSX */
+ }
+-
+-#endif /* MAP_JIT */
+ #endif /* MAP_ANON */
+-#endif /* __APPLE__ */
++#else /* !SLJIT_CONFIG_X86 */
++#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM)
++#error Unsupported architecture
++#endif /* SLJIT_CONFIG_ARM */
++#include <pthread.h>
++
++#define SLJIT_MAP_JIT	(MAP_JIT)
++#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \
++                        apple_update_wx_flags(enable_exec)
++
++static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec)
++{
++	pthread_jit_write_protect_np(enable_exec);
++}
++#endif /* SLJIT_CONFIG_X86 */
++#else /* !TARGET_OS_OSX */
++#define SLJIT_MAP_JIT	(MAP_JIT)
++#endif /* TARGET_OS_OSX */
++#endif /* __APPLE__ && MAP_JIT */
++#ifndef SLJIT_UPDATE_WX_FLAGS
++#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec)
++#endif /* !SLJIT_UPDATE_WX_FLAGS */
++#ifndef SLJIT_MAP_JIT
++#define SLJIT_MAP_JIT	(0)
++#endif /* !SLJIT_MAP_JIT */
+ 
+ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+ {
+@@ -157,12 +169,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+ 	const int prot = PROT_READ | PROT_WRITE | PROT_EXEC;
+ 
+ #ifdef MAP_ANON
+-
+-	int flags = MAP_PRIVATE | MAP_ANON;
+-
+-#ifdef MAP_JIT
+-	flags |= get_map_jit_flag();
+-#endif
++	int flags = MAP_PRIVATE | MAP_ANON | SLJIT_MAP_JIT;
+ 
+ 	retval = mmap(NULL, size, prot, flags, -1, 0);
+ #else /* !MAP_ANON */
+@@ -173,14 +180,15 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+ #endif /* MAP_ANON */
+ 
+ 	if (retval == MAP_FAILED)
+-		retval = NULL;
+-	else {
+-		if (mprotect(retval, size, prot) < 0) {
+-			munmap(retval, size);
+-			retval = NULL;
+-		}
++		return NULL;
++
++	if (mprotect(retval, size, prot) < 0) {
++		munmap(retval, size);
++		return NULL;
+ 	}
+ 
++	SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0);
++
+ 	return retval;
+ }
+ 
+@@ -189,7 +197,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size)
+ 	munmap(chunk, size);
+ }
+ 
+-#endif
++#endif /* windows */
+ 
+ /* --------------------------------------------------------------------- */
+ /*  Common functions                                                     */
+@@ -261,6 +269,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size)
+ 	while (free_block) {
+ 		if (free_block->size >= size) {
+ 			chunk_size = free_block->size;
++			SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
+ 			if (chunk_size > size + 64) {
+ 				/* We just cut a block from the end of the free block. */
+ 				chunk_size -= size;
+@@ -326,6 +335,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+ 	allocated_size -= header->size;
+ 
+ 	/* Connecting free blocks together if possible. */
++	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
+ 
+ 	/* If header->prev_size == 0, free_block will equal to header.
+ 	   In this case, free_block->header.size will be > 0. */
+@@ -358,6 +368,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr)
+ 		}
+ 	}
+ 
++	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
+ 	SLJIT_ALLOCATOR_UNLOCK();
+ }
+ 
+@@ -367,6 +378,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+ 	struct free_block* next_free_block;
+ 
+ 	SLJIT_ALLOCATOR_LOCK();
++	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0);
+ 
+ 	free_block = free_blocks;
+ 	while (free_block) {
+@@ -381,5 +393,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void)
+ 	}
+ 
+ 	SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks));
++	SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1);
+ 	SLJIT_ALLOCATOR_UNLOCK();
+ }
+-- 
+2.30.0
+
diff --git a/deps/patches/pcre2-sljit-nomprotect.patch b/deps/patches/pcre2-sljit-nomprotect.patch
new file mode 100644
index 00000000000000..3c2df1808630b9
--- /dev/null
+++ b/deps/patches/pcre2-sljit-nomprotect.patch
@@ -0,0 +1,17 @@
+diff --git a/sljit_src/sljitExecAllocator.c b/sljit_src/sljitExecAllocator.c
+index 2e1c138..bae8cd6 100644
+--- a/sljit_src/sljitExecAllocator.c
++++ b/sljit_src/sljitExecAllocator.c
+@@ -182,10 +182,12 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
+ 	if (retval == MAP_FAILED)
+ 		return NULL;
+ 
++#ifdef SLIJT_WX_OS_NEEDSCHEK
+ 	if (mprotect(retval, size, prot) < 0) {
+ 		munmap(retval, size);
+ 		return NULL;
+ 	}
++#endif
+ 
+ 	SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0);
+ 
diff --git a/deps/pcre.mk b/deps/pcre.mk
index d7832adeef5cf3..71b69b318f695f 100644
--- a/deps/pcre.mk
+++ b/deps/pcre.mk
@@ -6,19 +6,29 @@ PCRE_CFLAGS := -O3
 PCRE_LDFLAGS := $(RPATH_ESCAPED_ORIGIN)
 
 $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://ftp.pcre.org/pub/pcre/pcre2-$(PCRE_VER).tar.bz2
+	$(JLDOWNLOAD) $@ https://github.com/PhilipHazel/pcre2/releases/download/pcre2-$(PCRE_VER)/pcre2-$(PCRE_VER).tar.bz2
 
 $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
 	$(JLCHECKSUM) $<
 	cd $(dir $<) && $(TAR) jxf $(notdir $<)
 	cp $(SRCDIR)/patches/config.sub $(SRCCACHE)/pcre2-$(PCRE_VER)/config.sub
-	touch -c $(SRCCACHE)/pcre2-$(PCRE_VER)/configure # old target
-	echo $1 > $@
+	echo 1 > $@
+
+checksum-pcre: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
+	$(JLCHECKSUM) $<
+
+$(SRCCACHE)/pcre2-$(PCRE_VER)/pcre2-sljit-apple-silicon-support.patch-applied: $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted
+	cd $(SRCCACHE)/pcre2-$(PCRE_VER) && patch -d src/sljit -p2 -f < $(SRCDIR)/patches/pcre2-sljit-apple-silicon-support.patch
+	echo 1 > $@
+
+$(SRCCACHE)/pcre2-$(PCRE_VER)/pcre2-sljit-nomprotect.patch-applied: $(SRCCACHE)/pcre2-$(PCRE_VER)/pcre2-sljit-apple-silicon-support.patch-applied
+	cd $(SRCCACHE)/pcre2-$(PCRE_VER) && patch -d src/sljit -p2 -f < $(SRCDIR)/patches/pcre2-sljit-nomprotect.patch
+	echo 1 > $@
 
-$(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured: $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted
+$(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured: $(SRCCACHE)/pcre2-$(PCRE_VER)/source-extracted $(SRCCACHE)/pcre2-$(PCRE_VER)/pcre2-sljit-apple-silicon-support.patch-applied $(SRCCACHE)/pcre2-$(PCRE_VER)/pcre2-sljit-nomprotect.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) --enable-jit --includedir=$(build_includedir) CFLAGS="$(CFLAGS) $(PCRE_CFLAGS)" LDFLAGS="$(LDFLAGS) $(PCRE_LDFLAGS)"
+	$(dir $<)/configure $(CONFIGURE_COMMON) --enable-jit --includedir=$(build_includedir) CFLAGS="$(CFLAGS) $(PCRE_CFLAGS) -g -O0" LDFLAGS="$(LDFLAGS) $(PCRE_LDFLAGS)"
 	echo 1 > $@
 
 $(BUILDDIR)/pcre2-$(PCRE_VER)/build-compiled: $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured
@@ -36,15 +46,15 @@ endif
 $(eval $(call staged-install, \
 	pcre,pcre2-$$(PCRE_VER), \
 	MAKE_INSTALL,$$(LIBTOOL_CCLD),, \
-	rm $$(build_shlibdir)/libpcre2-posix.* && \
+	rm -f $$(build_shlibdir)/libpcre2-posix.* && \
 	$$(INSTALL_NAME_CMD)libpcre2-8.$$(SHLIB_EXT) $$(build_shlibdir)/libpcre2-8.$$(SHLIB_EXT)))
 
 clean-pcre:
-	-rm $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured $(BUILDDIR)/pcre2-$(PCRE_VER)/build-compiled
+	-rm -f $(BUILDDIR)/pcre2-$(PCRE_VER)/build-configured $(BUILDDIR)/pcre2-$(PCRE_VER)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/pcre2-$(PCRE_VER) clean
 
 distclean-pcre:
-	-rm -rf $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2 $(SRCCACHE)/pcre2-$(PCRE_VER) $(BUILDDIR)/pcre2-$(PCRE_VER)
+	rm -rf $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2 $(SRCCACHE)/pcre2-$(PCRE_VER) $(BUILDDIR)/pcre2-$(PCRE_VER)
 
 
 get-pcre: $(SRCCACHE)/pcre2-$(PCRE_VER).tar.bz2
@@ -55,8 +65,6 @@ fastcheck-pcre: check-pcre
 check-pcre: $(BUILDDIR)/pcre2-$(PCRE_VER)/build-checked
 
 else # USE_BINARYBUILDER_PCRE
-PCRE_BB_URL_BASE := https://github.com/JuliaPackaging/Yggdrasil/releases/download/PCRE2-v$(PCRE_VER)-$(PCRE_BB_REL)
-PCRE_BB_NAME := PCRE2.v$(PCRE_VER).0
 
 $(eval $(call bb-install,pcre,PCRE,false))
 
diff --git a/deps/suitesparse.mk b/deps/suitesparse.mk
deleted file mode 100644
index b6fe277543c4db..00000000000000
--- a/deps/suitesparse.mk
+++ /dev/null
@@ -1,157 +0,0 @@
-## SUITESPARSE ##
-
-ifeq ($(USE_BLAS64), 1)
-UMFPACK_CONFIG := -DLONGBLAS='long long'
-CHOLMOD_CONFIG := -DLONGBLAS='long long'
-SPQR_CONFIG := -DLONGBLAS='long long'
-ifeq ($(OPENBLAS_SYMBOLSUFFIX), 64_)
-UMFPACK_CONFIG += -DSUN64
-CHOLMOD_CONFIG += -DSUN64
-SPQR_CONFIG += -DSUN64
-endif
-endif
-
-# Disable trying to link against libmetis
-CHOLMOD_CONFIG += -DNPARTITION
-
-ifneq ($(USE_BINARYBUILDER_SUITESPARSE), 1)
-
-SUITESPARSE_PROJECTS := AMD BTF CAMD CCOLAMD COLAMD CHOLMOD LDL KLU UMFPACK RBio SPQR
-SUITESPARSE_LIBS := $(addsuffix .*$(SHLIB_EXT)*,suitesparseconfig amd btf camd ccolamd colamd cholmod klu umfpack rbio spqr)
-
-SUITE_SPARSE_LIB := $(LDFLAGS) -L"$(abspath $(BUILDDIR))/SuiteSparse-$(SUITESPARSE_VER)/lib"
-ifeq ($(OS), Darwin)
-SUITE_SPARSE_LIB += $(RPATH_ESCAPED_ORIGIN)
-endif
-SUITESPARSE_MFLAGS := CC="$(CC)" CXX="$(CXX)" F77="$(FC)" AR="$(AR)" RANLIB="$(RANLIB)" BLAS="$(LIBBLAS)" LAPACK="$(LIBLAPACK)" \
-	  LDFLAGS="$(SUITE_SPARSE_LIB)" CFOPENMP="" CUDA=no CUDA_PATH="" \
-	  UMFPACK_CONFIG="$(UMFPACK_CONFIG)" CHOLMOD_CONFIG="$(CHOLMOD_CONFIG)" SPQR_CONFIG="$(SPQR_CONFIG)"
-ifeq ($(OS),WINNT)
-SUITESPARSE_MFLAGS += UNAME=Windows
-else
-SUITESPARSE_MFLAGS += UNAME=$(OS)
-endif
-
-$(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/DrTimothyAldenDavis/SuiteSparse/archive/v$(SUITESPARSE_VER).tar.gz
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted: $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz
-	$(JLCHECKSUM) $<
-	mkdir -p $(dir $@)
-	$(TAR) -C $(dir $@) --strip-components 1 -zxf $<
-	echo 1 > $@
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-winclang.patch-applied: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p0 < $(SRCDIR)/patches/SuiteSparse-winclang.patch
-	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-shlib.patch-applied: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-	cd $(dir $@) && patch -p1 < $(SRCDIR)/patches/SuiteSparse-shlib.patch
-	echo 1 > $@
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-winclang.patch-applied
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse-shlib.patch-applied
-
-ifeq ($(USE_SYSTEM_BLAS), 0)
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/openblas
-else ifeq ($(USE_SYSTEM_LAPACK), 0)
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: | $(build_prefix)/manifest/lapack
-endif
-
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-	$(MAKE) -C $(dir $<)SuiteSparse_config library config $(SUITESPARSE_MFLAGS)
-	$(INSTALL_NAME_CMD)libsuitesparseconfig.$(SHLIB_EXT) $(dir $<)lib/libsuitesparseconfig.$(SHLIB_EXT)
-	for PROJ in $(SUITESPARSE_PROJECTS); do \
-		$(MAKE) -C $(dir $<)$${PROJ} library $(SUITESPARSE_MFLAGS) || exit 1; \
-		$(INSTALL_NAME_CMD)lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) $(dir $<)lib/lib`echo $${PROJ} | tr A-Z a-z`.$(SHLIB_EXT) || exit 1; \
-	done
-	echo 1 > $@
-
-ifeq ($(OS),WINNT)
-SUITESPARSE_SHLIB_ENV:=PATH="$(abspath $(dir $<))lib:$(build_bindir):$(PATH)"
-else
-SUITESPARSE_SHLIB_ENV:=LD_LIBRARY_PATH="$(build_shlibdir)"
-endif
-$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-checked: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled
-	for PROJ in $(SUITESPARSE_PROJECTS); do \
-		$(SUITESPARSE_SHLIB_ENV) $(MAKE) -C $(dir $<)$${PROJ} default $(SUITESPARSE_MFLAGS) || exit 1; \
-	done
-	echo 1 > $@
-
-$(build_prefix)/manifest/suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled | $(build_prefix)/manifest $(build_shlibdir)
-	for lib in $(SUITESPARSE_LIBS); do \
-		cp -a $(dir $<)lib/lib$${lib} $(build_shlibdir) || exit 1; \
-	done
-	#cp -a $(dir $<)lib/* $(build_shlibdir)
-	#cp -a $(dir $<)include/* $(build_includedir)
-	echo $(SUITESPARSE_VER) > $@
-
-uninstall-suitesparse:
-	-rm $(build_prefix)/manifest/suitesparse
-	-rm $(addprefix $(build_shlibdir)/lib, $(SUITESPARSE_LIBS))
-
-clean-suitesparse: clean-suitesparse-wrapper uninstall-suitesparse
-	-rm $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled
-	-rm -fr $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/lib
-	-rm -fr $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/include
-	-$(MAKE) -C $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER) clean
-
-distclean-suitesparse: clean-suitesparse-wrapper
-	-rm -rf $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz \
-		$(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)
-
-get-suitesparse: $(SRCCACHE)/SuiteSparse-$(SUITESPARSE_VER).tar.gz
-extract-suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/source-extracted
-configure-suitesparse: extract-suitesparse
-compile-suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-compiled
-fastcheck-suitesparse: #none
-check-suitesparse: $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/build-checked
-install-suitesparse: $(build_prefix)/manifest/suitesparse install-suitesparse-wrapper
-
-# SUITESPARSE WRAPPER
-
-ifeq ($(USE_SYSTEM_SUITESPARSE), 1)
-SUITESPARSE_INC := -I $(LOCALBASE)/include/suitesparse
-SUITESPARSE_LIB := -lumfpack -lcholmod -lamd -lcamd -lcolamd -lspqr
-else
-SUITESPARSE_INC := -I $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/CHOLMOD/Include -I $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SuiteSparse_config -I $(BUILDDIR)/SuiteSparse-$(SUITESPARSE_VER)/SPQR/Include
-SUITESPARSE_LIB := -L$(build_shlibdir) -lcholmod -lumfpack -lspqr $(RPATH_ORIGIN)
-$(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT): $(build_prefix)/manifest/suitesparse
-endif
-
-$(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT): $(SRCDIR)/SuiteSparse_wrapper.c
-	mkdir -p $(build_shlibdir)
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(LDFLAGS) -O2 -shared $(fPIC) $(SUITESPARSE_INC) $< -o $@ $(SUITESPARSE_LIB)
-	$(INSTALL_NAME_CMD)libsuitesparse_wrapper.$(SHLIB_EXT) $@
-	touch -c $@
-
-clean-suitesparse-wrapper:
-	-rm -f $(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT)
-
-distclean-suitesparse-wrapper: clean-suitesparse-wrapper
-
-get-suitesparse-wrapper:
-extract-suitesparse-wrapper:
-configure-suitesparse-wrapper:
-compile-suitesparse-wrapper:
-fastcheck-suitesparse-wrapper: #none
-check-suitesparse-wrapper:
-install-suitesparse-wrapper: $(build_shlibdir)/libsuitesparse_wrapper.$(SHLIB_EXT)
-
-else # USE_BINARYBUILDER_SUITESPARSE
-
-SUITESPARSE_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/SuiteSparse_jll.jl/releases/download/SuiteSparse-v$(SUITESPARSE_VER)+$(SUITESPARSE_BB_REL)
-SUITESPARSE_BB_NAME := SuiteSparse.v$(SUITESPARSE_VER)
-
-$(eval $(call bb-install,suitesparse,SUITESPARSE,false))
-get-suitesparse-wrapper: get-suitesparse
-extract-suitesparse-wrapper: extract-suitesparse
-configure-suitesparse-wrapper: configure-suitesparse
-compile-suitesparse-wrapper: compile-suitesparse
-fastcheck-suitesparse-wrapper: fastcheck-suitesparse
-check-suitesparse-wrapper: check-suitesparse
-clean-suitesparse-wrapper: clean-suitesparse
-distclean-suitesparse-wrapper: distclean-suitesparse
-install-suitesparse-wrapper: install-suitesparse
-
-# suitesparse depends on OpenBLAS
-compile-suitesparse: | $(build_prefix)/manifest/openblas
-endif
diff --git a/deps/tools/bb-install.mk b/deps/tools/bb-install.mk
index 67bead578d4bcd..3b6ef327f944fb 100644
--- a/deps/tools/bb-install.mk
+++ b/deps/tools/bb-install.mk
@@ -4,31 +4,37 @@
 #    3 gfortran, \             # signifies a GCC ABI (e.g. libgfortran version) dependency
 #    4 cxx11)                  # signifies a cxx11 ABI dependency
 
-# Auto-detect triplet once, create different versions that we use as defaults below for each BB install target
-BB_TRIPLET_LIBGFORTRAN_CXXABI := $(shell $(call invoke_python,$(JULIAHOME)/contrib/normalize_triplet.py) $(or $(XC_HOST),$(XC_HOST),$(BUILD_MACHINE)) "$(shell $(FC) --version | head -1)" "$(or $(shell echo '\#include <string>' | $(CXX) $(CXXFLAGS) -x c++ -dM -E - | grep _GLIBCXX_USE_CXX11_ABI | awk '{ print $$3 }' ),1)")
-BB_TRIPLET_LIBGFORTRAN := $(subst $(SPACE),-,$(filter-out cxx%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
-BB_TRIPLET_CXXABI := $(subst $(SPACE),-,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI))))
-BB_TRIPLET := $(subst $(SPACE),-,$(filter-out cxx%,$(filter-out libgfortran%,$(subst -,$(SPACE),$(BB_TRIPLET_LIBGFORTRAN_CXXABI)))))
-
 define bb-install
 TRIPLET_VAR := BB_TRIPLET
 ifeq ($(3),true)
 TRIPLET_VAR := $$(TRIPLET_VAR)_LIBGFORTRAN
 endif
 ifeq ($(4),true)
+# Darwin and FreeBSD use `clang` instead of `gcc`, so they don't have a cxx11 string ABI break
+ifeq (,$(filter $(OS),Darwin FreeBSD))
 TRIPLET_VAR := $$(TRIPLET_VAR)_CXXABI
 endif
-$(2)_BB_TRIPLET := $$($$(TRIPLET_VAR))
-$(2)_BB_URL := $$($(2)_BB_URL_BASE)/$$($(2)_BB_NAME).$$($(2)_BB_TRIPLET).tar.gz
-$(2)_BB_BASENAME := $$($(2)_BB_NAME)-$$($(2)_BB_REL).$$($(2)_BB_TRIPLET).tar.gz
+endif
+
+# Look for JLL version within Project.toml in stdlib/
+$(2)_STDLIB_PATH := $(JULIAHOME)/stdlib/$$($(2)_JLL_NAME)_jll
+
+# If the file doesn't exist (e.g. we're downloading a JLL release for something
+# that we don't actually ship) silently continue despite the Project.toml file missing.
+$(2)_JLL_VER ?= $$(shell [ -f $$($(2)_STDLIB_PATH)/Project.toml ] && grep "^version" $$($(2)_STDLIB_PATH)/Project.toml | sed -E 's/version[[:space:]]*=[[:space:]]*"?([^"]+)"?/\1/')
 
-$$(BUILDDIR)/$$($(2)_BB_NAME):
-	mkdir -p $$@
+# Allow things to override which JLL we pull from, e.g. libLLVM_jll vs. libLLVM_assert_jll
+$(2)_JLL_DOWNLOAD_NAME ?= $$($(2)_JLL_NAME)
 
-$$(SRCCACHE)/$$($(2)_BB_BASENAME): | $$(SRCCACHE)
+$(2)_BB_TRIPLET := $$($$(TRIPLET_VAR))
+$(2)_JLL_VER_NOPLUS := $$(firstword $$(subst +,$(SPACE),$$($(2)_JLL_VER)))
+$(2)_JLL_BASENAME := $$($(2)_JLL_DOWNLOAD_NAME).v$$($(2)_JLL_VER).$$($(2)_BB_TRIPLET).tar.gz
+$(2)_BB_URL := https://github.com/JuliaBinaryWrappers/$$($(2)_JLL_DOWNLOAD_NAME)_jll.jl/releases/download/$$($(2)_JLL_DOWNLOAD_NAME)-v$$($(2)_JLL_VER)/$$($(2)_JLL_DOWNLOAD_NAME).v$$($(2)_JLL_VER_NOPLUS).$$($(2)_BB_TRIPLET).tar.gz
+
+$$(SRCCACHE)/$$($(2)_JLL_BASENAME): | $$(SRCCACHE)
 	$$(JLDOWNLOAD) $$@ $$($(2)_BB_URL)
 
-stage-$(strip $1): $$(SRCCACHE)/$$($(2)_BB_BASENAME)
+stage-$(strip $1): $$(SRCCACHE)/$$($(2)_JLL_BASENAME)
 install-$(strip $1): $$(build_prefix)/manifest/$(strip $1)
 
 reinstall-$(strip $1):
@@ -36,21 +42,30 @@ reinstall-$(strip $1):
 	+$$(MAKE) stage-$(strip $1)
 	+$$(MAKE) install-$(strip $1)
 
-UNINSTALL_$(strip $1) := $$($(2)_BB_BASENAME:.tar.gz=) bb-uninstaller
+UNINSTALL_$(strip $1) := $$($(2)_JLL_BASENAME:.tar.gz=) bb-uninstaller
 
-$$(build_prefix)/manifest/$(strip $1): $$(SRCCACHE)/$$($(2)_BB_BASENAME) | $(build_prefix)/manifest
+$$(build_prefix)/manifest/$(strip $1): $$(SRCCACHE)/$$($(2)_JLL_BASENAME) | $(build_prefix)/manifest
 	-+[ ! -e $$@ ] || $$(MAKE) uninstall-$(strip $1)
 	$$(JLCHECKSUM) $$<
-	mkdir -p $$(build_prefix)
+ifneq (bsdtar,$(findstring bsdtar,$(TAR_TEST)))
+	@# work-around a gtar bug: they do some complicated work to avoid the mkdir
+	@# syscall, which is buggy when working with Tar.jl files so we manually do
+	@# the mkdir calls first in a pre-pass
+	$(TAR) -tzf $$< | xargs -n 1 dirname | sort -u | (cd $$(build_prefix) && xargs -t mkdir -p)
+endif
 	$(UNTAR) $$< -C $$(build_prefix)
 	echo '$$(UNINSTALL_$(strip $1))' > $$@
 
+# Special "checksum-foo" target to speed up `contrib/refresh_checksums.sh`
+checksum-$(1): $$(SRCCACHE)/$$($(2)_JLL_BASENAME)
+	$$(JLCHECKSUM) $$<
+
 clean-bb-download-$(1):
-	rm -f $$(SRCCACHE)/$$($(2)_BB_BASENAME)
+	rm -f $$(SRCCACHE)/$$($(2)_JLL_BASENAME)
 
 clean-$(1):
 distclean-$(1): clean-bb-download-$(1)
-get-$(1): $$(SRCCACHE)/$$($(2)_BB_BASENAME)
+get-$(1): $$(SRCCACHE)/$$($(2)_JLL_BASENAME)
 extract-$(1):
 configure-$(1):
 compile-$(1): get-$(1)
@@ -63,6 +78,6 @@ endef
 
 define bb-uninstaller
 uninstall-$(strip $1):
-	-cd $$(build_prefix) && rm -fdv -- $$$$($$(TAR) -tzf $$(SRCCACHE)/$2.tar.gz --exclude './$$$$')
-	-rm $$(build_prefix)/manifest/$(strip $1)
+	-cd $$(build_prefix) && rm -fv -- $$$$($$(TAR) -tzf $$(SRCCACHE)/$2.tar.gz | grep -v '/$$$$')
+	-rm -f $$(build_prefix)/manifest/$(strip $1)
 endef
diff --git a/deps/tools/common.mk b/deps/tools/common.mk
index 9af4966733df2f..006d3486fcc370 100644
--- a/deps/tools/common.mk
+++ b/deps/tools/common.mk
@@ -4,14 +4,12 @@
 # it will make its way into the LLVM build flags, and LLVM is picky about RPATH (though
 # apparently not on FreeBSD). Ref PR #22352
 
-CONFIGURE_COMMON := --prefix=$(abspath $(build_prefix)) --build=$(BUILD_MACHINE) --libdir=$(abspath $(build_libdir)) --bindir=$(abspath $(build_depsbindir)) $(CUSTOM_LD_LIBRARY_PATH)
+CONFIGURE_COMMON = --prefix=$(abspath $(build_prefix)) --build=$(BUILD_MACHINE) --libdir=$(abspath $(build_libdir)) --bindir=$(abspath $(build_depsbindir)) $(CUSTOM_LD_LIBRARY_PATH)
 ifneq ($(XC_HOST),)
 CONFIGURE_COMMON += --host=$(XC_HOST)
 endif
 ifeq ($(OS),WINNT)
-ifneq ($(USEMSVC), 1)
 CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) -Wl,--stack,8388608"
-endif
 else
 CONFIGURE_COMMON += LDFLAGS="$(LDFLAGS) $(RPATH_ESCAPED_ORIGIN)"
 endif
@@ -23,6 +21,10 @@ CMAKE_CXX_ARG := $(CXX_ARG)
 CMAKE_COMMON := -DCMAKE_INSTALL_PREFIX:PATH=$(build_prefix) -DCMAKE_PREFIX_PATH=$(build_prefix)
 CMAKE_COMMON += -DCMAKE_INSTALL_LIBDIR=$(build_libdir) -DCMAKE_INSTALL_BINDIR=$(build_bindir)
 CMAKE_COMMON += -DLIB_INSTALL_DIR=$(build_shlibdir)
+ifeq ($(OS), Darwin)
+CMAKE_COMMON += -DCMAKE_MACOSX_RPATH=1
+endif
+
 ifneq ($(VERBOSE), 0)
 CMAKE_COMMON += -DCMAKE_VERBOSE_MAKEFILE=ON
 endif
@@ -35,14 +37,12 @@ CMAKE_COMMON += -DCMAKE_CXX_COMPILER="$(CXX_BASE)"
 ifneq ($(strip $(CMAKE_CXX_ARG)),)
 CMAKE_COMMON += -DCMAKE_CXX_COMPILER_ARG1="$(CMAKE_CXX_ARG)"
 endif
-CMAKE_COMMON += -DCMAKE_LINKER="$(LD)" -DCMAKE_AR="$(shell which $(AR))" -DCMAKE_RANLIB="$(shell which $(RANLIB))"
+CMAKE_COMMON += -DCMAKE_LINKER="$$(which $(LD))" -DCMAKE_AR="$$(which $(AR))" -DCMAKE_RANLIB="$$(which $(RANLIB))"
 
 ifeq ($(OS),WINNT)
 CMAKE_COMMON += -DCMAKE_SYSTEM_NAME=Windows
-ifneq ($(BUILD_OS),WINNT)
 CMAKE_COMMON += -DCMAKE_RC_COMPILER="$$(which $(CROSS_COMPILE)windres)"
 endif
-endif
 
 # For now this is LLVM specific, but I expect it won't be in the future
 ifeq ($(CMAKE_GENERATOR),Ninja)
@@ -156,7 +156,7 @@ endif
 
 reinstall-$(strip $1):
 	+$$(MAKE) uninstall-$(strip $1)
-	-rm $$(build_staging)/$2.tgz
+	-rm -f $$(build_staging)/$2.tgz
 	+$$(MAKE) stage-$(strip $1)
 	+$$(MAKE) install-$(strip $1)
 
@@ -164,7 +164,7 @@ $$(build_staging)/$2.tgz: $$(BUILDDIR)/$2/build-compiled
 	rm -rf $$(build_staging)/$2
 	mkdir -p $$(build_staging)/$2$$(build_prefix)
 	$(call $3,$$(BUILDDIR)/$2,$$(build_staging)/$2,$4)
-	cd $$(build_staging)/$2$$(build_prefix) && tar -czf $$@.tmp .
+	cd $$(build_staging)/$2$$(build_prefix) && $$(TAR) -czf $$@.tmp .
 	rm -rf $$(build_staging)/$2
 	mv $$@.tmp $$@
 
@@ -172,7 +172,6 @@ UNINSTALL_$(strip $1) := $2 staged-uninstaller
 
 $$(build_prefix)/manifest/$(strip $1): $$(build_staging)/$2.tgz | $(build_prefix)/manifest
 	-+[ ! -e $$@ ] || $$(MAKE) uninstall-$(strip $1)
-	mkdir -p $$(build_prefix)
 	$(UNTAR) $$< -C $$(build_prefix)
 	$6
 	echo '$$(UNINSTALL_$(strip $1))' > $$@
@@ -180,8 +179,8 @@ endef
 
 define staged-uninstaller
 uninstall-$(strip $1):
-	-cd $$(build_prefix) && rm -fdv -- $$$$($$(TAR) -tzf $$(build_staging)/$2.tgz --exclude './$$$$')
-	-rm $$(build_prefix)/manifest/$(strip $1)
+	-cd $$(build_prefix) && rm -fv -- $$$$($$(TAR) -tzf $$(build_staging)/$2.tgz | grep -v '/$$$$')
+	-rm -f $$(build_prefix)/manifest/$(strip $1)
 endef
 
 
@@ -217,9 +216,9 @@ uninstall-$1:
 ifeq ($$(BUILD_OS), WINNT)
 	-cmd //C rmdir $$(call mingw_to_dos,$3/$1,cd $3 &&)
 else
-	-rm -r $3/$1
+	rm -rf $3/$1
 endif
-	-rm $$(build_prefix)/manifest/$1
+	-rm -f $$(build_prefix)/manifest/$1
 endef
 
 
@@ -234,6 +233,6 @@ endif
 
 ## phony targets ##
 
-.PHONY: default get extract configure compile fastcheck check install uninstall reinstall cleanall distcleanall \
+.PHONY: default get extract configure compile fastcheck check install uninstall reinstall cleanall distcleanall version-check \
 	get-* extract-* configure-* compile-* fastcheck-* check-* install-* uninstall-* reinstall-* clean-* distclean-* \
 	update-llvm
diff --git a/deps/tools/git-external.mk b/deps/tools/git-external.mk
index 7a88f1a2d78b73..65b40b87ee9376 100644
--- a/deps/tools/git-external.mk
+++ b/deps/tools/git-external.mk
@@ -63,13 +63,16 @@ $$($2_SRC_FILE): | $$(SRCCACHE)
 	$$(JLDOWNLOAD) $$@ $$(call $2_TAR_URL,$$($2_SHA1))
 $5/$$($2_SRC_DIR)/source-extracted: $$($2_SRC_FILE)
 	$$(JLCHECKSUM) $$<
-	-[ ! \( -e $$(dir $$@) -o -h $$(dir $$@) \) ] || rm -r $$(dir $$@)
+	-[ ! \( -e $$(dir $$@) -o -h $$(dir $$@) \) ] || rm -rf $$(dir $$@)
 	mkdir -p $$(dir $$@)
 	$(TAR) -C $$(dir $$@) --strip-components 1 -xf $$<
 	echo 1 > $$@
+
+checksum-$(1): $$($2_SRC_FILE)
+	$$(JLCHECKSUM) $$<
 endif # DEPS_GIT
 
 $$(build_prefix)/manifest/$1: $$(SRCDIR)/$1.version # make the manifest stale if the version file is touched (causing re-install for compliant targets)
 distclean-$1:
-	-rm -rf $5/$$($2_SRC_DIR) $$($2_SRC_FILE) $$(BUILDDIR)/$$($2_SRC_DIR)
+	rm -rf $5/$$($2_SRC_DIR) $$($2_SRC_FILE) $$(BUILDDIR)/$$($2_SRC_DIR)
 endef
diff --git a/deps/tools/jlchecksum b/deps/tools/jlchecksum
index 93ef872cce6bc6..87db805dbfab35 100755
--- a/deps/tools/jlchecksum
+++ b/deps/tools/jlchecksum
@@ -8,6 +8,8 @@ if [ -z "$1" ]; then
     exit 1
 fi
 
+set -eu
+
 # Get the deps directory, one level up from this script
 DEPSDIR="$( cd "$( dirname "$0" )"/.. && pwd )"
 
@@ -21,7 +23,7 @@ print_hash()
     if [ ${#1} -gt 64 ]; then
         NUM_LINES=$(( (${#1} + 63) / 64))
         for i in `seq 0 1 $((NUM_LINES - 1))`; do
-            str_piece=$(echo "$1" | awk "{ string=substr(\$0, $((i*64 + 1)), $(((i+1)*64))); print string; }")
+            str_piece=$(echo "$1" | awk "{ string=substr(\$0, $((i*64 + 1)), 64); print string; }")
             echo "      $str_piece"
         done
     else
@@ -29,8 +31,14 @@ print_hash()
     fi
 }
 
+abspath()
+{
+    echo $(cd $(dirname "$1"); pwd)/$BASENAME;
+}
+
 checksum_error()
 {
+    ARG1=$(realpath "$ARG1" 2>/dev/null || echo $(abspath "$ARG1"))
     echo "===============================================================================" >&2
     echo "  ERROR: $CHECKSUM_TYPE checksum failure on $BASENAME, should be:" >&2
     print_hash "$TRUE_CHECKSUM"
@@ -38,7 +46,7 @@ checksum_error()
     print_hash "$CURR_CHECKSUM"
     echo "  This can happen due to bad downloads or network proxies, please check your" >&2
     echo "  network proxy/firewall settings and delete" >&2
-    echo "  $(realpath $ARG1)" >&2
+    echo "  $ARG1" >&2
     echo "  to force a redownload when you are ready" >&2
     echo "===============================================================================" >&2
     exit 2
@@ -46,7 +54,19 @@ checksum_error()
 
 find_checksum()
 {
+    for pack in "$DEPSDIR"/checksums/*; do
+        if [ -f "$pack" ]; then
+            TRUE_CHECKSUM=$(awk -F / "{ if (\$1 == \"$BASENAME\" && \$2 == \"$CHECKSUM_TYPE\") print \$3 }" "$pack")
+            if [ $TRUE_CHECKSUM ]; then
+                return
+            fi
+        fi
+    done
     if [ ! -f "$DEPSDIR/checksums/$BASENAME/$CHECKSUM_TYPE" ]; then
+        if [ ${TAGGED_RELEASE_BANNER:-} ]; then
+            echo "WARNING: $CHECKSUM_TYPE checksum for $BASENAME not found in deps/checksums/, failing release build." >&2
+            exit 3
+        fi
         echo "WARNING: $CHECKSUM_TYPE checksum for $BASENAME not found in deps/checksums/, autogenerating..." >&2
 
         # Generate as many checksum types as we can
diff --git a/deps/tools/stdlib-external.mk b/deps/tools/stdlib-external.mk
index 043a53341193a6..60f50b56ee2e0f 100644
--- a/deps/tools/stdlib-external.mk
+++ b/deps/tools/stdlib-external.mk
@@ -18,7 +18,7 @@ $$(BUILDDIR)/$$($2_SRC_DIR)/build-compiled: $$(BUILDDIR)/$$($2_SRC_DIR)/source-e
 	echo 1 > $$@
 $$(eval $$(call symlink_install,$1,$$$$($2_SRC_DIR),$$$$(build_datarootdir)/julia/stdlib/$$$$(VERSDIR)))
 clean-$1:
-	-rm $$(BUILDDIR)/$$($2_SRC_DIR)/build-compiled
+	-rm -f $$(BUILDDIR)/$$($2_SRC_DIR)/build-compiled
 get-$1: $$($2_SRC_FILE)
 extract-$1: $$(BUILDDIR)/$$($2_SRC_DIR)/source-extracted
 configure-$1: extract-$1
diff --git a/deps/tools/uninstallers.mk b/deps/tools/uninstallers.mk
index e6e37292ab1c4f..48387914643db0 100644
--- a/deps/tools/uninstallers.mk
+++ b/deps/tools/uninstallers.mk
@@ -2,10 +2,6 @@
 # defines uninstallers and version-checks
 # based on the contents of the UNINSTALL_* variables and the manifest files
 
-install: version-check
-version-check: $(addprefix version-check-, $(DEP_LIBS_STAGED))
-uninstall: $(addprefix uninstall-, $(DEP_LIBS_STAGED))
-
 ## read 'uninstall-*' definition from either the manifest or the current session
 define define-uninstaller
 MANIFEST_$1 := $$(shell [ -e $$(build_prefix)/manifest/$1 ] && cat $$(build_prefix)/manifest/$1)
@@ -17,12 +13,15 @@ ifneq ($$(UNINST_HOW_$1),)
 UNINST_WHO_$1 := $$(firstword $$(MANIFEST_$1))
 UNINST_WHERE_$1 := $$(wordlist 3,99,$$(MANIFEST_$1))
 $$(eval $$(call $$(UNINST_HOW_$1),$1,$$(UNINST_WHO_$1),$$(UNINST_WHERE_$1)))
+else
+uninstall-$1:
+	@echo "skipping uninstall: $1 not installed"
 endif
 endef
-$(foreach dep,$(DEP_LIBS_STAGED),$(eval $(call define-uninstaller,$(dep))))
+$(foreach dep,$(DEP_LIBS_STAGED_ALL),$(eval $(call define-uninstaller,$(dep))))
 
 # for each subproject with a manifest, keep the user aware if something is not the expected version
-$(addprefix version-check-,$(DEP_LIBS_STAGED)) : version-check-% : install-%
+$(addprefix version-check-,$(DEP_LIBS_STAGED_ALL)) : version-check-% : install-%
 	@if [ ! -e $(build_prefix)/manifest/$* ] || ( \
 			[ "1" != "`wc -w $(build_prefix)/manifest/$* | cut -f 1 -d ' '`" ] && \
 			[ "$(UNINSTALL_$*)" != "`cat $(build_prefix)/manifest/$*`" ]) ; then \
diff --git a/deps/unwind.mk b/deps/unwind.mk
index 08d8990a720e8f..ad7a91f4dff4a1 100644
--- a/deps/unwind.mk
+++ b/deps/unwind.mk
@@ -1,11 +1,15 @@
 ## UNWIND ##
 
 ifneq ($(USE_BINARYBUILDER_LIBUNWIND),1)
-LIBUNWIND_CFLAGS := -U_FORTIFY_SOURCE $(fPIC)
+LIBUNWIND_CFLAGS := -U_FORTIFY_SOURCE $(fPIC) -lz
 LIBUNWIND_CPPFLAGS :=
 
+ifeq ($(USE_SYSTEM_ZLIB),0)
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: | $(build_prefix)/manifest/zlib
+endif
+
 $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/libunwind/libunwind/releases/download/v$(UNWIND_VER)/libunwind-$(UNWIND_VER).tar.gz
+	$(JLDOWNLOAD) $@ https://github.com/libunwind/libunwind/releases/download/v$(UNWIND_VER_TAG)/libunwind-$(UNWIND_VER).tar.gz
 
 $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
 	$(JLCHECKSUM) $<
@@ -13,6 +17,9 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted: $(SRCCACHE)/libunwind-$(UN
 	touch -c $(SRCCACHE)/libunwind-$(UNWIND_VER)/configure # old target
 	echo 1 > $@
 
+checksum-unwind: $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz
+	$(JLCHECKSUM) $<
+
 $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-prefer-extbl.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted
 	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-prefer-extbl.patch
 	echo 1 > $@
@@ -21,10 +28,18 @@ $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied: $(SRCCAC
 	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f < $(SRCDIR)/patches/libunwind-static-arm.patch
 	echo 1 > $@
 
-$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-static-arm.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u < $(SRCDIR)/patches/libunwind-cfa-rsp.patch
+	echo 1 > $@
+
+$(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied: $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-cfa-rsp.patch-applied
+	cd $(SRCCACHE)/libunwind-$(UNWIND_VER) && patch -p1 -f -u -l < $(SRCDIR)/patches/libunwind-dwarf-table.patch
+	echo 1 > $@
+
+$(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured: $(SRCCACHE)/libunwind-$(UNWIND_VER)/source-extracted $(SRCCACHE)/libunwind-$(UNWIND_VER)/libunwind-dwarf-table.patch-applied
 	mkdir -p $(dir $@)
 	cd $(dir $@) && \
-	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --disable-shared --disable-minidebuginfo --disable-tests
+	$(dir $<)/configure $(CONFIGURE_COMMON) CPPFLAGS="$(CPPFLAGS) $(LIBUNWIND_CPPFLAGS)" CFLAGS="$(CFLAGS) $(LIBUNWIND_CFLAGS)" --enable-shared --disable-minidebuginfo --disable-tests --enable-zlibdebuginfo
 	echo 1 > $@
 
 $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured
@@ -42,11 +57,11 @@ $(eval $(call staged-install, \
 	MAKE_INSTALL,,,))
 
 clean-unwind:
-	-rm $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled
+	-rm -f $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-configured $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/libunwind-$(UNWIND_VER) clean
 
 distclean-unwind:
-	-rm -rf $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz \
+	rm -rf $(SRCCACHE)/libunwind-$(UNWIND_VER).tar.gz \
 		$(SRCCACHE)/libunwind-$(UNWIND_VER) \
 		$(BUILDDIR)/libunwind-$(UNWIND_VER)
 
@@ -59,59 +74,66 @@ fastcheck-unwind: #none
 check-unwind: $(BUILDDIR)/libunwind-$(UNWIND_VER)/build-checked
 
 
-## OS X Unwind ##
+## LLVM libunwind ##
 
-OSXUNWIND_FLAGS := ARCH="$(ARCH)" CC="$(CC)" FC="$(FC)" AR="$(AR)" OS="$(OS)" USECLANG=$(USECLANG) USEGCC=$(USEGCC) CFLAGS="$(CFLAGS) -ggdb3 -O0" CXXFLAGS="$(CXXFLAGS) -ggdb3 -O0" SFLAGS="-ggdb3" LDFLAGS="$(LDFLAGS) -Wl,-macosx_version_min,10.7"
+LLVMUNWIND_OPTS := $(CMAKE_COMMON) -DCMAKE_BUILD_TYPE=MinSizeRel -DLIBUNWIND_ENABLE_PEDANTIC=OFF -DLLVM_CONFIG_PATH=$(build_depsbindir)/llvm-config
 
-$(SRCCACHE)/libosxunwind-$(OSXUNWIND_VER).tar.gz: | $(SRCCACHE)
-	$(JLDOWNLOAD) $@ https://github.com/JuliaLang/libosxunwind/archive/v$(OSXUNWIND_VER).tar.gz
+$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz: | $(SRCCACHE)
+	$(JLDOWNLOAD) $@ https://github.com/llvm/llvm-project/releases/download/llvmorg-$(LLVMUNWIND_VER)/libunwind-$(LLVMUNWIND_VER).src.tar.xz
 
-$(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/source-extracted: $(SRCCACHE)/libosxunwind-$(OSXUNWIND_VER).tar.gz
+$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
 	$(JLCHECKSUM) $<
-	mkdir -p $(BUILDDIR)
-	cd $(BUILDDIR) && $(TAR) xfz $<
+	cd $(dir $<) && $(TAR) xf $<
+	mv $(SRCCACHE)/libunwind-$(LLVMUNWIND_VER).src $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)
 	echo 1 > $@
 
-$(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/build-compiled: $(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/source-extracted
-	$(MAKE) -C $(dir $<) $(OSXUNWIND_FLAGS)
+$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
+	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-prologue-epilogue.patch
 	echo 1 > $@
 
-$(build_prefix)/manifest/osxunwind: $(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/build-compiled | $(build_libdir) $(build_shlibdir) $(build_includedir) $(build_prefix)/manifest
-	cp $(dir $<)/libosxunwind.a $(build_libdir)/libosxunwind.a
-	cp $(dir $<)/libosxunwind.$(SHLIB_EXT) $(build_shlibdir)/libosxunwind.$(SHLIB_EXT)
-	cp -R $(dir $<)/include/* $(build_includedir)
-	$(INSTALL_NAME_CMD)libosxunwind.$(SHLIB_EXT) $(build_shlibdir)/libosxunwind.$(SHLIB_EXT)
-	echo $(OSXUNWIND_VER) > $(build_prefix)/manifest/osxunwind
+$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-prologue-epilogue.patch-applied
+	cd $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) && patch -p2 -f < $(SRCDIR)/patches/llvm-libunwind-force-dwarf.patch
+	echo 1 > $@
 
-clean-osxunwind:
-	-rm $(build_prefix)/manifest/osxunwind $(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/build-compiled
-	-rm -r $(build_libdir)/libosxunwind.a $(build_shlibdir)/libosxunwind.$(SHLIB_EXT) \
-		$(build_includedir)/mach-o/ $(build_includedir)/unwind.h $(build_includedir)/libunwind.h
-	-$(MAKE) -C $(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER) clean $(OSXUNWIND_FLAGS)
+checksum-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+	$(JLCHECKSUM) $<
 
-distclean-osxunwind:
-	-rm -rf $(SRCCACHE)/libosxunwind-$(OSXUNWIND_VER).tar.gz \
-		$(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)
+$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/llvm-libunwind-force-dwarf.patch-applied
+	mkdir -p $(dir $@)
+	cd $(dir $@) && \
+	$(CMAKE) $(dir $<) $(LLVMUNWIND_OPTS)
+	echo 1 > $@
 
+$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
+	$(MAKE) -C $(dir $<)
+	echo 1 > $@
 
-get-osxunwind: $(SRCCACHE)/libosxunwind-$(OSXUNWIND_VER).tar.gz
-extract-osxunwind: $(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/source-extracted
-configure-osxunwind: extract-osxunwind
-compile-osxunwind: $(BUILDDIR)/libosxunwind-$(OSXUNWIND_VER)/build-compiled
-fastcheck-osxunwind: check-osxunwind
-check-osxunwind: compile-osxunwind
-install-osxunwind: $(build_prefix)/manifest/osxunwind
+$(eval $(call staged-install, \
+	llvmunwind,llvmunwind-$(LLVMUNWIND_VER), \
+	MAKE_INSTALL,,, \
+	cp -fR $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/include/* $(build_includedir)))
+
+clean-llvmunwind:
+	-rm -f $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
+	rm -rf $(build_includedir)/mach-o/ $(build_includedir)/unwind.h $(build_includedir)/libunwind.h
+	-$(MAKE) -C $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER) clean
+
+distclean-llvmunwind:
+	rm -rf $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz \
+		$(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER) \
+		$(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)
+
+get-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER).tar.xz
+extract-llvmunwind: $(SRCCACHE)/llvmunwind-$(LLVMUNWIND_VER)/source-extracted
+configure-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-configured
+compile-llvmunwind: $(BUILDDIR)/llvmunwind-$(LLVMUNWIND_VER)/build-compiled
+fastcheck-llvmunwind: check-llvmunwind
+check-llvmunwind: # no test/check provided by Makefile
 
 else # USE_BINARYBUILDER_LIBUNWIND
 
-UNWIND_BB_URL_BASE := https://github.com/JuliaPackaging/Yggdrasil/releases/download/LibUnwind-v$(UNWIND_VER)+$(UNWIND_BB_REL)
-UNWIND_BB_NAME := LibUnwind.v$(UNWIND_VER)
-
 $(eval $(call bb-install,unwind,UNWIND,false))
 
-OSXUNWIND_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/LibOSXUnwind_jll.jl/releases/download/LibOSXUnwind-v$(OSXUNWIND_VER)+$(OSXUNWIND_BB_REL)
-OSXUNWIND_BB_NAME := LibOSXUnwind.v$(OSXUNWIND_VER)
-
-$(eval $(call bb-install,osxunwind,OSXUNWIND,false))
+$(eval $(call bb-install,llvmunwind,LLVMUNWIND,false))
 
 endif
diff --git a/deps/utf8proc.mk b/deps/utf8proc.mk
index b368e1c12b0dae..70cf4e396ff651 100644
--- a/deps/utf8proc.mk
+++ b/deps/utf8proc.mk
@@ -1,5 +1,5 @@
 ## UTF8PROC ##
-UTF8PROC_GIT_URL := git://github.com/JuliaLang/utf8proc.git
+UTF8PROC_GIT_URL := https://github.com/JuliaLang/utf8proc.git
 UTF8PROC_TAR_URL = https://api.github.com/repos/JuliaLang/utf8proc/tarball/$1
 $(eval $(call git-external,utf8proc,UTF8PROC,,,$(BUILDDIR)))
 
@@ -29,7 +29,7 @@ $(eval $(call staged-install, \
 	UTF8PROC_INSTALL,,,))
 
 clean-utf8proc:
-	-rm $(BUILDDIR)/$(UTF8PROC_SRC_DIR)/build-compiled
+	-rm -f $(BUILDDIR)/$(UTF8PROC_SRC_DIR)/build-compiled
 	-$(MAKE) -C $(BUILDDIR)/$(UTF8PROC_SRC_DIR) clean
 
 get-utf8proc: $(UTF8PROC_SRC_FILE)
diff --git a/deps/utf8proc.version b/deps/utf8proc.version
index 32a970d0a12d17..246a38de00bae8 100644
--- a/deps/utf8proc.version
+++ b/deps/utf8proc.version
@@ -1,2 +1,2 @@
-UTF8PROC_BRANCH=v2.5.0
-UTF8PROC_SHA1=0890a538bf8238cded9be0c81171f57e43f2c755
+UTF8PROC_BRANCH=v2.7.0
+UTF8PROC_SHA1=8ca6144c85c165987cb1c5d8395c7314e13d4cd7
diff --git a/deps/zlib.mk b/deps/zlib.mk
index cdefe1b81ab827..d43f829c131112 100644
--- a/deps/zlib.mk
+++ b/deps/zlib.mk
@@ -1,8 +1,9 @@
-ZLIB_GIT_URL := git://github.com/madler/zlib.git
+## Zlib ##
+ifneq ($(USE_BINARYBUILDER_ZLIB), 1)
+ZLIB_GIT_URL := https://github.com/madler/zlib.git
 ZLIB_TAR_URL = https://api.github.com/repos/madler/zlib/tarball/$1
 $(eval $(call git-external,zlib,ZLIB,,,$(SRCCACHE)))
 
-ifneq ($(USE_BINARYBUILDER_ZLIB), 1)
 $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-configured: $(SRCCACHE)/$(ZLIB_SRC_DIR)/source-extracted
 	mkdir -p $(dir $@)
 	cd $(dir $@) && $(CMAKE) -DCMAKE_INSTALL_PREFIX=$(abspath $(build_prefix)) -DCMAKE_BUILD_TYPE=Release -DUNIX=true $(dir $<)
@@ -18,7 +19,7 @@ $(eval $(call staged-install, \
 	$(INSTALL_NAME_CMD)libz.$(SHLIB_EXT) $(build_shlibdir)/libz.$(SHLIB_EXT)))
 
 clean-zlib:
-	-rm $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled $(build_libdir)/libz.a* $(build_libdir)/libz.so* $(build_includedir)/zlib.h $(build_includedir)/zconf.h
+	-rm -f $(BUILDDIR)/$(ZLIB_SRC_DIR)/build-compiled $(build_libdir)/libz.a* $(build_libdir)/libz.so* $(build_includedir)/zlib.h $(build_includedir)/zconf.h
 	-$(MAKE) -C $(BUILDDIR)/$(ZLIB_SRC_DIR) distclean $(ZLIB_FLAGS)
 
 get-zlib: $(ZLIB_SRC_FILE)
@@ -30,8 +31,6 @@ check-zlib: compile-zlib
 
 else # USE_BINARYBUILDER_ZLIB
 
-ZLIB_BB_URL_BASE := https://github.com/JuliaBinaryWrappers/Zlib_jll.jl/releases/download/Zlib-v$(ZLIB_VER)+$(ZLIB_BB_REL)
-ZLIB_BB_NAME := Zlib.v$(ZLIB_VER)
 $(eval $(call bb-install,zlib,ZLIB,false))
 
 endif # USE_BINARYBUILDER_ZLIB
diff --git a/deps/zlib.version b/deps/zlib.version
index e3631693150511..0b16a7f662dd17 100644
--- a/deps/zlib.version
+++ b/deps/zlib.version
@@ -1,2 +1,2 @@
-ZLIB_BRANCH=v1.2.11
-ZLIB_SHA1=cacf7f1d4e3d44d871b605da3b647f07d718623f
+ZLIB_BRANCH=v1.2.12
+ZLIB_SHA1=21767c654d31d2dccdde4330529775c6c5fd5389
diff --git a/doc/Makefile b/doc/Makefile
index 3bf710c6a4a480..246d5c3f4b5139 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -19,10 +19,11 @@ help:
 	@echo "To run linkcheck, use 'make <target> linkcheck=true'"
 	@echo "To run doctests, use 'make <target> doctest=true'"
 	@echo "To fix outdated doctests, use 'make <target> doctest=fix'"
+	@echo "To run doctests using Revise (to test changes without rebuilding the sysimage), use 'make <target> doctest=true revise=true'"
 
 
 DOCUMENTER_OPTIONS := linkcheck=$(linkcheck) doctest=$(doctest) buildroot=$(call cygpath_w,$(BUILDROOT)) \
-    texplatform=$(texplatform)
+    texplatform=$(texplatform) revise=$(revise)
 
 UNICODE_DATA_VERSION=13.0.0
 $(SRCCACHE)/UnicodeData-$(UNICODE_DATA_VERSION).txt:
@@ -33,8 +34,11 @@ deps: $(SRCCACHE)/UnicodeData-$(UNICODE_DATA_VERSION).txt
 	$(JLCHECKSUM) "$<"
 	cp "$<" UnicodeData.txt
 
+checksum-unicodedata: $(SRCCACHE)/UnicodeData-$(UNICODE_DATA_VERSION).txt
+	$(JLCHECKSUM) "$<"
+
 clean:
-	-rm -rf _build/* deps/* docbuild.log UnicodeData.txt
+	rm -rf _build/* deps/* docbuild.log UnicodeData.txt
 
 cleanall: clean
 
@@ -48,7 +52,7 @@ pdf: deps
 	$(JULIA_EXECUTABLE) --color=yes $(call cygpath_w,$(SRCDIR)/make.jl) -- pdf $(DOCUMENTER_OPTIONS)
 	@echo "Build finished."
 
-# The deploy target should only be called in Travis builds
+# The deploy target should only be called in CI builds
 deploy: deps
 	@echo "Deploying HTML documentation."
 	$(JULIA_EXECUTABLE) --color=yes $(call cygpath_w,$(SRCDIR)/make.jl) -- deploy $(DOCUMENTER_OPTIONS)
diff --git a/doc/Manifest.toml b/doc/Manifest.toml
index cf28a009d70613..b34ea115c26e91 100644
--- a/doc/Manifest.toml
+++ b/doc/Manifest.toml
@@ -1,99 +1,98 @@
 # This file is machine-generated - editing it directly is not advised
 
-[[Base64]]
+julia_version = "1.9.0-DEV"
+manifest_format = "2.0"
+project_hash = "e0c77beb18dc1f6cce661ebd60658c0c1a77390f"
+
+[[deps.ANSIColoredPrinters]]
+git-tree-sha1 = "574baf8110975760d391c710b6341da1afa48d8c"
+uuid = "a4c015fc-c6ff-483c-b24f-f7ea428134e9"
+version = "0.0.1"
+
+[[deps.Base64]]
 uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
 
-[[Dates]]
+[[deps.Dates]]
 deps = ["Printf"]
 uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
 
-[[Distributed]]
-deps = ["Random", "Serialization", "Sockets"]
-uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
-
-[[DocStringExtensions]]
-deps = ["LibGit2", "Markdown", "Pkg", "Test"]
-git-tree-sha1 = "c5714d9bcdba66389612dc4c47ed827c64112997"
+[[deps.DocStringExtensions]]
+deps = ["LibGit2"]
+git-tree-sha1 = "b19534d1895d702889b219c382a6e18010797f0b"
 uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
-version = "0.8.2"
+version = "0.8.6"
 
-[[Documenter]]
-deps = ["Base64", "Dates", "DocStringExtensions", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
-git-tree-sha1 = "fb1ff838470573adc15c71ba79f8d31328f035da"
+[[deps.Documenter]]
+deps = ["ANSIColoredPrinters", "Base64", "Dates", "DocStringExtensions", "IOCapture", "InteractiveUtils", "JSON", "LibGit2", "Logging", "Markdown", "REPL", "Test", "Unicode"]
+git-tree-sha1 = "6edbf28671b4df4f692e54ae72f1e35851cfbf38"
 uuid = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-version = "0.25.2"
+version = "0.27.16"
 
-[[DocumenterLaTeX]]
-deps = ["Documenter", "Test"]
-git-tree-sha1 = "653299370be20ff580bccd707dc9f360c0852d7f"
-uuid = "cd674d7a-5f81-5cf3-af33-235ef1834b99"
-version = "0.2.0"
+[[deps.IOCapture]]
+deps = ["Logging", "Random"]
+git-tree-sha1 = "f7be53659ab06ddc986428d3a9dcc95f6fa6705a"
+uuid = "b5f81e59-6552-4d32-b1f0-c071b021bf89"
+version = "0.2.2"
 
-[[InteractiveUtils]]
+[[deps.InteractiveUtils]]
 deps = ["Markdown"]
 uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 
-[[JSON]]
+[[deps.JSON]]
 deps = ["Dates", "Mmap", "Parsers", "Unicode"]
-git-tree-sha1 = "b34d7cef7b337321e97d22242c3c2b91f476748e"
+git-tree-sha1 = "3c837543ddb02250ef42f4738347454f95079d4e"
 uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
-version = "0.21.0"
+version = "0.21.3"
 
-[[LibGit2]]
-deps = ["Printf"]
+[[deps.LibGit2]]
+deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 
-[[Libdl]]
-uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-
-[[Logging]]
+[[deps.Logging]]
 uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
 
-[[Markdown]]
+[[deps.Markdown]]
 deps = ["Base64"]
 uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
 
-[[Mmap]]
+[[deps.Mmap]]
 uuid = "a63ad114-7e13-5084-954f-fe012c677804"
 
-[[Parsers]]
-deps = ["Dates", "Test"]
-git-tree-sha1 = "8077624b3c450b15c087944363606a6ba12f925e"
-uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
-version = "1.0.10"
+[[deps.NetworkOptions]]
+uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
+version = "1.2.0"
 
-[[Pkg]]
-deps = ["Dates", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
-uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+[[deps.Parsers]]
+deps = ["Dates"]
+git-tree-sha1 = "621f4f3b4977325b9128d5fae7a8b4829a0c2222"
+uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0"
+version = "2.2.4"
 
-[[Printf]]
+[[deps.Printf]]
 deps = ["Unicode"]
 uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
-[[REPL]]
+[[deps.REPL]]
 deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
 uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
 
-[[Random]]
-deps = ["Serialization"]
+[[deps.Random]]
+deps = ["SHA", "Serialization"]
 uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
-[[SHA]]
+[[deps.SHA]]
 uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
+version = "0.7.0"
 
-[[Serialization]]
+[[deps.Serialization]]
 uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 
-[[Sockets]]
+[[deps.Sockets]]
 uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
 
-[[Test]]
-deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
+[[deps.Test]]
+deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
-[[UUIDs]]
-deps = ["Random", "SHA"]
-uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
-
-[[Unicode]]
+[[deps.Unicode]]
 uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
diff --git a/doc/Project.toml b/doc/Project.toml
index c09e74d6533a49..dfa65cd107d069 100644
--- a/doc/Project.toml
+++ b/doc/Project.toml
@@ -1,3 +1,2 @@
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
-DocumenterLaTeX = "cd674d7a-5f81-5cf3-af33-235ef1834b99"
diff --git a/doc/build/README.md b/doc/build/README.md
deleted file mode 100644
index 83c822c5da3f20..00000000000000
--- a/doc/build/README.md
+++ /dev/null
@@ -1,25 +0,0 @@
-<a name="logo"/>
-<div align="center">
-<a href="https://julialang.org/" target="_blank">
-<img src="https://julialang.org/images/logo_hires.png" alt="Julia Logo" width="210" height="142"></img>
-</a>
-</div>
-
-This directory contains various details related to building Julia:
-
-* [Detailed build instructions](build.md)
-
-Notes for various OSes:
-
-* [Linux](linux.md)
-* [macOS](macos.md)
-* [Windows](windows.md)
-* [FreeBSD](freebsd.md)
-
-Notes for various architectures:
-
-* [ARM](arm.md)
-
-Notes for building Julia for distribution:
-
-* [Distribution Notes](distributing.md)
diff --git a/doc/build/windows.md b/doc/build/windows.md
deleted file mode 100644
index b90295d9f42e82..00000000000000
--- a/doc/build/windows.md
+++ /dev/null
@@ -1,320 +0,0 @@
-# Julia on Windows
-
-This file describes how to install, or build, and use Julia on Windows.
-
-For more general information about Julia, please see the
-[main README](https://github.com/JuliaLang/julia/blob/master/README.md)
-or the [documentation](https://docs.julialang.org).
-
-
-## General Information for Windows
-
-
-### Unicode font support
-
-The built-in Windows fonts have rather poor coverage of the Unicode character
-space.  The free [`DejaVu Sans Mono`](https://dejavu-fonts.github.io/) font can be used
-as a replacement font in the Windows console.  Since Windows 2000, simply
-downloading the font and installing it is insufficient, since Windows keeps a
-list of approved fonts in the registry.
-
-Instructions for adding fonts to the terminal are available at
-[this answer on superuser.com](https://superuser.com/a/5079)
-
-Additionally, rather than sticking with the default command prompt, you may want
-to use a different terminal emulator program, such as
-[Conemu](https://conemu.github.io/) or [Mintty](
-https://github.com/mintty/mintty) (note that running Julia on Mintty needs a
-copy of `stty.exe` in your `%PATH%` to work properly).  Alternatively, you may
-prefer the features of a more full-function IDE, such as [Juno](http://junolab.org),
-[Sublime-IJulia](https://github.com/quinnj/Sublime-IJulia), or
-[IJulia](https://github.com/JuliaLang/IJulia.jl).
-
-
-### Line endings
-
-Julia uses binary-mode files exclusively.  Unlike many other Windows programs,
-if you write `\n` to a file, you get a `\n` in the file, not some other bit
-pattern.  This matches the behavior exhibited by other operating systems.  If
-you have installed Git for Windows, it is suggested, but not required, that you
-configure your system Git to use the same convention:
-```sh
-git config --global core.eol lf
-git config --global core.autocrlf input
-```
-or edit `%USERPROFILE%\.gitconfig` and add/edit the lines:
-```
-[core]
-    eol = lf
-    autocrlf = input
-```
-
-## Binary distribution
-
-Julia runs on Windows 7 and later.
-Both the 32-bit and 64-bit versions are supported.
-The 32-bit (i686) binary will run on either a 32-bit and 64-bit operating system.
-The 64-bit (x86_64) binary will only run on 64-bit Windows and will otherwise refuse to launch.
-
- 1. [Download](https://julialang.org/downloads) the latest version of Julia.
-    Extract the binary to a reasonable destination folder, e.g. `C:\julia`.
-    By default, Julia will be installed into
-    `C:\Users\YOURNAME\AppData\Local\Julia-1.0.0\bin`,
-    where `YOURNAME` is your Windows user name.
-
- 2. Double-click the `julia` shortcut to launch Julia.
-
- 3. Julia's home directory is the location pointed to by the Windows environment
-    variable `%HOME%`: this directory is for instance where the startup file
-    `.julia/config/startup.jl` resides. `%HOMEDRIVE%\%HOMEPATH%` is used as a fallback if
-    `%HOME%` is not defined.
-
-## Source distribution
-
-### Supported build platforms
-
- -  Windows 10: supported (32 and 64 bits)
- -  Windows 8: supported (32 and 64 bits)
- -  Windows 7: supported (32 and 64 bits)
-
-### Cygwin-to-MinGW cross-compiling
-
-The recommended way of compiling Julia from source on Windows is by cross
-compiling from [Cygwin](https://www.cygwin.com), using versions of the
-MinGW-w64 compilers available through Cygwin's package manager.
-
- 1. Download and run Cygwin setup for [32 bit](https://cygwin.com/setup-x86.exe)
-    or [64 bit](https://cygwin.com/setup-x86_64.exe). Note, that you can compile
-    either 32 or 64 bit Julia from either 32 or 64 bit Cygwin. 64 bit Cygwin
-    has a slightly smaller but often more up-to-date selection of packages.
-
-    Advanced: you may skip steps 2-4 by running:
-
-        setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
-        :: replace <url> with a site from https://cygwin.com/mirrors.html
-        :: or run setup manually first and select a mirror
-
- 2. Select installation location and download mirror.
-
- 3. At the '*Select Packages'* step, select the following:
-
-    1.  From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
-    2.  From the *Net* category: `curl`
-    3.  From *Interpreters* (or *Python*) category: `m4`, `python`
-    4.  From the *Archive* category: `p7zip`
-    5.  For 32 bit Julia, and also from the *Devel* category:
-        `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
-    6.  For 64 bit Julia, and also from the *Devel* category:
-        `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
-
- 4. At the *'Resolving Dependencies'* step, be sure to leave *'Select required
-    packages (RECOMMENDED)'* enabled.
-
- 5. Allow Cygwin installation to finish, then start from the installed shortcut
-    a *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
-
- 6. Build Julia and its dependencies from source:
-
-    1. Get the Julia sources
-       ```sh
-       git clone https://github.com/JuliaLang/julia.git
-       cd julia
-       ```
-       Tip: If you get an `error: cannot fork() for fetch-pack: Resource
-       temporarily unavailable` from git, add `alias git="env PATH=/usr/bin git"`
-       to `~/.bashrc` and restart Cygwin.
-
-    2. Set the `XC_HOST` variable in `Make.user` to indicate MinGW-w64 cross
-       compilation
-       ```sh
-       echo 'XC_HOST = i686-w64-mingw32' > Make.user     # for 32 bit Julia
-       # or
-       echo 'XC_HOST = x86_64-w64-mingw32' > Make.user   # for 64 bit Julia
-       ```
-
-    3. Start the build
-       ```sh
-       make -j 4   # Adjust the number of threads (4) to match your build environment.
-       ```
-
-
-    > Protip: build both!
-    > ```sh
-    > make O=julia-win32 configure
-    > make O=julia-win64 configure
-    > echo 'XC_HOST = i686-w64-mingw32' > julia-win32/Make.user
-    > echo 'XC_HOST = x86_64-w64-mingw32' > julia-win64/Make.user
-    > echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
-    >         $(error "in-tree build disabled")
-    >       endif' >> Make.user
-    > make -C julia-win32  # build for Windows x86 in julia-win32 folder
-    > make -C julia-win64  # build for Windows x86-64 in julia-win64 folder
-    > ```
-
- 7. Run Julia using the Julia executables directly
-    ```sh
-    usr/bin/julia.exe
-    usr/bin/julia-debug.exe
-    ```
-
-### Compiling with MinGW/MSYS2
-
-Compiling Julia from source using [MSYS2](https://msys2.github.io) has worked
-in the past but is not actively supported. Pull requests to restore support
-would be welcome. See a [past version of this file](
-https://github.com/JuliaLang/julia/blob/v0.6.0/README.windows.md)
-for the former instructions for compiling using MSYS2.
-
-
-### Cross-compiling from Unix
-
-You can also use MinGW-w64 cross compilers to build a Windows version of Julia from
-Linux, Mac, or the Windows Subsystem for Linux (WSL).
-
-For maximum compatibility with packages that use [WinRPM.jl](
-https://github.com/JuliaLang/WinRPM.jl) for binary dependencies on Windows, it
-is recommended that you use OpenSUSE 42.2 for cross-compiling a Windows build
-of Julia.  If you use a different Linux distribution or OS X, install
-[Vagrant](https://www.vagrantup.com/downloads.html) and use the following `Vagrantfile`:
-
-```
-# Vagrantfile for MinGW-w64 cross-compilation of Julia
-
-$script = <<SCRIPT
-# Change the following to i686-w64-mingw32 for 32 bit Julia:
-export XC_HOST=x86_64-w64-mingw32
-# Change the following to 32 for 32 bit Julia:
-export BITS=64
-zypper addrepo https://download.opensuse.org/repositories/windows:mingw:win$BITS/openSUSE_Leap_42.2/windows:mingw:win$BITS.repo
-zypper --gpg-auto-import-keys refresh
-zypper -n install --no-recommends git make cmake tar wine which curl \
-    python python-xml patch gcc-c++ m4 p7zip.i586 libxml2-tools winbind
-zypper -n install mingw$BITS-cross-gcc-c++ mingw$BITS-cross-gcc-fortran \
-    mingw$BITS-libstdc++6 mingw$BITS-libgfortran3 mingw$BITS-libssp0
-# opensuse packages the mingw runtime dlls under sys-root/mingw/bin, not /usr/lib64/gcc
-cp /usr/$XC_HOST/sys-root/mingw/bin/*.dll /usr/lib*/gcc/$XC_HOST/*/
-git clone git://github.com/JuliaLang/julia.git julia
-cd julia
-make -j4 win-extras julia-ui-release
-export WINEDEBUG=-all # suppress wine fixme's
-# this last step may need to be run interactively
-make -j4 binary-dist
-make -j4 exe
-SCRIPT
-
-Vagrant.configure("2") do |config|
-  config.vm.box = "bento/opensuse-leap-42.2"
-  config.vm.provider :virtualbox do |vb|
-    # Use VBoxManage to customize the VM. For example to change memory:
-    vb.memory = 2048
-  end
-  config.vm.provision :shell, :inline => $script
-end
-```
-
-
-### Cross-building Julia without Vagrant
-
-Alternatively, if you want to build it on the local system,
-use the following steps to cross-compile julia:
-
-First, you will need to ensure your system has the required dependencies.  We
-need wine (>=1.7.5), a system compiler, and some downloaders.
-
-**On Ubuntu** (on other linux systems, the dependency names are likely to be similar):
-```sh
-apt-get install wine-stable gcc wget p7zip-full winbind mingw-w64 gfortran-mingw-w64
-# switch all of the following to their "-posix" variants (interactively):
-for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do sudo update-alternatives --config $pkg; done
-```
-
-**On Mac**: Install XCode, XCode command line tools, X11 (now [XQuartz](
-https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
-or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`,
-or `brew install wine wget mingw-w64`, as appropriate.
-
-Then run the build:
-
- 1. `git clone https://github.com/JuliaLang/julia.git julia-win32`
- 2. `echo override XC_HOST = i686-w64-mingw32 >> Make.user`
- 3. `make`
- 4. `make win-extras` (Necessary before running `make binary-dist`)
- 5. `make binary-dist` then `make exe` to create the Windows installer.
- 6. move the `julia-*.exe` installer to the target machine
-
-If you are building for 64-bit windows, the steps are essentially the same.
-Just replace `i686` in `XC_HOST` with `x86_64`. (note: on Mac, wine only runs
-in 32-bit mode).
-
-
-## Debugging a cross-compiled build under wine
-
-The most effective way to debug a cross-compiled version of julia on the
-cross-compilation host is to install a windows version of gdb and run it under wine
-as usual. The pre-built packages available [as part of the MSYS2 project](
-https://sourceforge.net/projects/msys2/files/REPOS/MINGW/) are known to work.
-Apart from the GDB package you may also need the python and termcap packages.
-Finally, GDB's prompt may not work when launch from the command line. This can
-be worked around by prepending `wineconsole` to the regular GDB invocation.
-
-
-## Using a Windows VM
-
-[Vagrant](https://www.vagrantup.com/downloads.html) can also be used with a Windows
-guest VM via the `Vagrantfile` in [contrib/windows](
-https://github.com/JuliaLang/julia/blob/master/contrib/windows/Vagrantfile),
-just run `vagrant up` from that folder.
-
-
-## After compiling
-
-Compiling using one of the options above creates a basic Julia build, but not some
-extra components that are included if you run the full Julia binary installer.
-If you need these components, the easiest way to get them is to build the installer
-yourself using ```make win-extras``` followed by ```make binary-dist``` and ```make exe```. Then running the resulting installer.
-
-
-## Windows Build Debugging
-
-
-### GDB hangs with cygwin mintty
-
-- Run gdb under the windows console (cmd) instead. gdb [may not function properly](
-  https://www.cygwin.com/ml/cygwin/2009-02/msg00531.html) under mintty with non-
-  cygwin applications. You can use `cmd /c start` to start the windows console
-  from mintty if necessary.
-
-### GDB not attaching to the right process
-
- - Use the PID from the windows task manager or `WINPID` from the `ps` command
-   instead of the PID from unix style command line tools (e.g. `pgrep`).  You
-   may need to add the PID column if it is not shown by default in the windows
-   task manager.
-
-### GDB not showing the right backtrace
-
- - When attaching to the julia process, GDB may not be attaching to the right
-   thread.  Use `info threads` command to show all the threads and
-   `thread <threadno>` to switch threads.
- - Be sure to use a 32 bit version of GDB to debug a 32 bit build of Julia, or
-   a 64 bit version of GDB to debug a 64 bit build of Julia.
-
-### Build process is slow/eats memory/hangs my computer
-
- - Disable the Windows [Superfetch](https://en.wikipedia.org/wiki/Windows_Vista_I/O_technologies#SuperFetch)
-   and [Program Compatibility Assistant](
-   https://blogs.msdn.com/b/cjacks/archive/2011/11/22/managing-the-windows-7-program-compatibility-assistant-pca.aspx)
-   services, as they are known to have [spurious interactions](
-   https://cygwin.com/ml/cygwin/2011-12/msg00058.html) with MinGW/Cygwin.
-
-   As mentioned in the link above: excessive memory use by `svchost` specifically
-   may be investigated in the Task Manager by clicking on the high-memory
-   `svchost.exe` process and selecting `Go to Services`. Disable child services
-   one-by-one until a culprit is found.
-
- - Beware of [BLODA](https://cygwin.com/faq/faq.html#faq.using.bloda).
-   The [vmmap](https://technet.microsoft.com/en-us/sysinternals/dd535533.aspx)
-   tool is indispensable for identifying such software conflicts. Use vmmap to
-   inspect the list of loaded DLLs for bash, mintty, or another persistent
-   process used to drive the build. Essentially *any* DLL outside of the Windows
-   System directory is potential BLODA.
diff --git a/doc/make.jl b/doc/make.jl
index c3dd67cffe427e..972da3d7e38919 100644
--- a/doc/make.jl
+++ b/doc/make.jl
@@ -7,7 +7,7 @@ pushfirst!(DEPOT_PATH, joinpath(@__DIR__, "deps"))
 using Pkg
 Pkg.instantiate()
 
-using Documenter, DocumenterLaTeX
+using Documenter
 
 baremodule GenStdLib end
 
@@ -46,11 +46,20 @@ end
 const render_pdf = "pdf" in ARGS
 
 # Generate a suitable markdown file from NEWS.md and put it in src
-str = read(joinpath(@__DIR__, "..", "NEWS.md"), String)
-splitted = split(str, "<!--- generated by NEWS-update.jl: -->")
-@assert length(splitted) == 2
-replaced_links = replace(splitted[1], r"\[\#([0-9]*?)\]" => s"[#\g<1>](https://github.com/JuliaLang/julia/issues/\g<1>)")
-write(joinpath(@__DIR__, "src", "NEWS.md"), replaced_links)
+function generate_markdown(basename)
+    str = read(joinpath(@__DIR__, "..", "$basename.md"), String)
+    splitted = split(str, "<!--- generated by $basename-update.jl: -->")
+    @assert length(splitted) == 2
+    replaced_links = replace(splitted[1], r"\[\#([0-9]*?)\]" => s"[#\g<1>](https://github.com/JuliaLang/julia/issues/\g<1>)")
+    write(
+        joinpath(@__DIR__, "src", "$basename.md"),
+        """
+        ```@meta
+        EditURL = "https://github.com/JuliaLang/julia/blob/master/$basename.md"
+        ```
+        """ * replaced_links)
+end
+generate_markdown("NEWS")
 
 Manual = [
     "manual/getting-started.md",
@@ -91,6 +100,7 @@ Manual = [
     "manual/faq.md",
     "manual/noteworthy-differences.md",
     "manual/unicode-input.md",
+    "manual/command-line-options.md",
 ]
 
 BaseDocs = [
@@ -140,6 +150,7 @@ DevDocs = [
         "devdocs/require.md",
         "devdocs/inference.md",
         "devdocs/ssair.md",
+        "devdocs/EscapeAnalysis.md",
         "devdocs/gc-sa.md",
     ],
     "Developing/debugging Julia's C code" => [
@@ -147,6 +158,16 @@ DevDocs = [
         "devdocs/debuggingtips.md",
         "devdocs/valgrind.md",
         "devdocs/sanitizers.md",
+        "devdocs/probes.md",
+    ],
+    "Building Julia" => [
+        "devdocs/build/build.md",
+        "devdocs/build/linux.md",
+        "devdocs/build/macos.md",
+        "devdocs/build/windows.md",
+        "devdocs/build/freebsd.md",
+        "devdocs/build/arm.md",
+        "devdocs/build/distributing.md",
     ]
 ]
 
@@ -156,8 +177,8 @@ const PAGES = [
     "Manual" => ["index.md", Manual...],
     "Base" => BaseDocs,
     "Standard Library" => StdlibDocs,
-    "Developer Documentation" => DevDocs,
-    hide("NEWS.md"),
+    # Add "Release Notes" to devdocs
+    "Developer Documentation" => [DevDocs..., hide("NEWS.md")],
 ]
 else
 const PAGES = [
@@ -170,26 +191,96 @@ const PAGES = [
 ]
 end
 
+const use_revise = "revise=true" in ARGS
+if use_revise
+    let revise_env = joinpath(@__DIR__, "deps", "revise")
+        Pkg.activate(revise_env)
+        Pkg.add("Revise"; preserve=Pkg.PRESERVE_NONE)
+        Base.ACTIVE_PROJECT[] = nothing
+        pushfirst!(LOAD_PATH, revise_env)
+    end
+end
+function maybe_revise(ex)
+    use_revise || return ex
+    STDLIB_DIR = Sys.STDLIB
+    STDLIBS = filter!(x -> isfile(joinpath(STDLIB_DIR, x, "src", "$(x).jl")), readdir(STDLIB_DIR))
+    return quote
+        $ex
+        using Revise
+        const STDLIBS = $STDLIBS
+        union!(Revise.stdlib_names, Symbol.(STDLIBS))
+        Revise.track(Core.Compiler)
+        Revise.track(Base)
+        for (id, mod) in Base.loaded_modules
+            if id.name in STDLIBS
+                Revise.track(mod)
+            end
+        end
+        Revise.revise()
+    end
+end
+
 for stdlib in STDLIB_DOCS
     @eval using $(stdlib.stdlib)
     # All standard library modules get `using $STDLIB` as their global
-    DocMeta.setdocmeta!(Base.root_module(Base, stdlib.stdlib), :DocTestSetup, :(using $(stdlib.stdlib)), recursive=true)
+    DocMeta.setdocmeta!(
+        Base.root_module(Base, stdlib.stdlib),
+        :DocTestSetup,
+        maybe_revise(:(using $(stdlib.stdlib)));
+        recursive=true,
+    )
 end
 # A few standard libraries need more than just the module itself in the DocTestSetup.
 # This overwrites the existing ones from above though, hence the warn=false.
-DocMeta.setdocmeta!(SparseArrays, :DocTestSetup, :(using SparseArrays, LinearAlgebra), recursive=true, warn=false)
-DocMeta.setdocmeta!(SuiteSparse, :DocTestSetup, :(using SparseArrays, LinearAlgebra, SuiteSparse), recursive=true, warn=false)
-DocMeta.setdocmeta!(UUIDs, :DocTestSetup, :(using UUIDs, Random), recursive=true, warn=false)
-DocMeta.setdocmeta!(Pkg, :DocTestSetup, :(using Pkg, Pkg.Artifacts), recursive=true, warn=false)
-DocMeta.setdocmeta!(Pkg.BinaryPlatforms, :DocTestSetup, :(using Pkg, Pkg.BinaryPlatforms), recursive=true, warn=false)
-DocMeta.setdocmeta!(Base.BinaryPlatforms, :DocTestSetup, :(using Base.BinaryPlatforms), recursive=true, warn=false)
+DocMeta.setdocmeta!(
+    SparseArrays,
+    :DocTestSetup,
+    maybe_revise(:(using SparseArrays, LinearAlgebra));
+    recursive=true, warn=false,
+)
+DocMeta.setdocmeta!(
+    SuiteSparse,
+    :DocTestSetup,
+    maybe_revise(:(using SparseArrays, LinearAlgebra, SuiteSparse));
+    recursive=true, warn=false,
+)
+DocMeta.setdocmeta!(
+    UUIDs,
+    :DocTestSetup,
+    maybe_revise(:(using UUIDs, Random));
+    recursive=true, warn=false,
+)
+DocMeta.setdocmeta!(
+    Pkg,
+    :DocTestSetup,
+    maybe_revise(:(using Pkg, Pkg.Artifacts));
+    recursive=true, warn=false,
+)
+DocMeta.setdocmeta!(
+    Base,
+    :DocTestSetup,
+    maybe_revise(:(;;));
+    recursive=true,
+)
+DocMeta.setdocmeta!(
+    Base.BinaryPlatforms,
+    :DocTestSetup,
+    maybe_revise(:(using Base.BinaryPlatforms));
+    recursive=true, warn=false,
+)
+DocMeta.setdocmeta!(
+    Pkg.LazilyInitializedFields,
+    :DocTestSetup,
+    maybe_revise(:(using Pkg.LazilyInitializedFields));
+    recursive=true, warn=false,
+)
 
 let r = r"buildroot=(.+)", i = findfirst(x -> occursin(r, x), ARGS)
     global const buildroot = i === nothing ? (@__DIR__) : first(match(r, ARGS[i]).captures)
 end
 
 const format = if render_pdf
-    LaTeX(
+    Documenter.LaTeX(
         platform = "texplatform=docker" in ARGS ? "docker" : "native"
     )
 else
@@ -203,11 +294,13 @@ else
         analytics = "UA-28835595-6",
         collapselevel = 1,
         sidebar_sitename = false,
+        ansicolor = true,
     )
 end
 
+const output_path = joinpath(buildroot, "doc", "_build", (render_pdf ? "pdf" : "html"), "en")
 makedocs(
-    build     = joinpath(buildroot, "doc", "_build", (render_pdf ? "pdf" : "html"), "en"),
+    build     = output_path,
     modules   = [Main, Base, Core, [Base.root_module(Base, stdlib.stdlib) for stdlib in STDLIB_DOCS]...],
     clean     = true,
     doctest   = ("doctest=fix" in ARGS) ? (:fix) : ("doctest=only" in ARGS) ? (:only) : ("doctest=true" in ARGS) ? true : false,
@@ -221,6 +314,32 @@ makedocs(
     pages     = PAGES,
 )
 
+# Update URLs to external stdlibs (JuliaLang/julia#43199)
+for (root, _, files) in walkdir(output_path), file in joinpath.(root, files)
+    endswith(file, ".html") || continue
+    local str
+    str = read(file, String)
+    # Index page links, update
+    #   https://github.com/JuliaLang/julia/blob/master/stdlib/${STDLIB_NAME}-${STDLIB_COMMIT}/path/to.md
+    # to
+    #   https://github.com/JuliaLang/${STDLIB_NAME}.jl/blob/master/docs/src/index.md
+    str = replace(str, r"https://github.com/JuliaLang/julia/blob/master/stdlib/(.*)-\w{40}/(.*\.md)" =>
+                       s"https://github.com/JuliaLang/\1.jl/blob/master/\2")
+    # Link to source links, update
+    #   https://github.com/JuliaLang/julia/blob/${JULIA_COMMIT}/stdlib/${STDLIB_NAME}-${STDLIB_COMMIT}/path/to.jl#${LINES}
+    # to
+    #   https://github.com/JuliaLang/${STDLIB_NAME}.jl/blob/${STDLIB_COMMIT}/path/to.jl#${LINES}
+    str = replace(str, r"https://github\.com/JuliaLang/julia/blob/\w{40}/stdlib/(.*)-(\w{40})/(.*\.jl#L\d+(?:-L\d+)?)" =>
+                       s"https://github.com/JuliaLang/\1.jl/blob/\2/\3")
+    # Some stdlibs are not hosted by JuliaLang
+    str = replace(str, r"(https://github\.com)/JuliaLang/(ArgTools\.jl/blob)" => s"\1/JuliaIO/\2")
+    str = replace(str, r"(https://github\.com)/JuliaLang/(LibCURL\.jl/blob)" => s"\1/JuliaWeb/\2")
+    str = replace(str, r"(https://github\.com)/JuliaLang/(SHA\.jl/blob)" => s"\1/JuliaCrypto/\2")
+    str = replace(str, r"(https://github\.com)/JuliaLang/(Tar\.jl/blob)" => s"\1/JuliaIO/\2")
+    # Write back to the file
+    write(file, str)
+end
+
 # Define our own DeployConfig
 struct BuildBotConfig <: Documenter.DeployConfig end
 function Documenter.deploy_folder(::BuildBotConfig; devurl, repo, branch, kwargs...)
diff --git a/doc/man/julia.1 b/doc/man/julia.1
index 49ee30e0af90e2..9423cffd45cd44 100644
--- a/doc/man/julia.1
+++ b/doc/man/julia.1
@@ -21,16 +21,18 @@
 .\" - diagnostics
 .\" - notes
 
-.TH JULIA 1 2013-12-10 Julia "Julia Programmers' Reference Guide"
+.TH JULIA 1 2022-02-17 JULIA
 
 .\" from the front page of https://julialang.org/
 .SH NAME
-julia - high-level, high-performance dynamic programming language for technical computing
+julia - a high-level, high-performance dynamic programming language for technical computing
 
 .SH SYNOPSIS
-julia [option] [program] [args..]
+\fBjulia\fR [OPTIONS...] \fB--\fR [PROGRAMMFILE] [ARGS...]
+
+If a Julia source file is given as a \fIPROGRAMFILE\fP (optionally followed by
+arguments in \fIARGS\fP) Julia will execute the program and exit.
 
-.\" Taken almost verbatim from the front page of https://julialang.org/
 .SH DESCRIPTION
 Julia is a high-level, high-performance dynamic programming language
 for technical computing, with syntax that is familiar to users
@@ -49,10 +51,6 @@ For a more in-depth discussion of the rationale and advantages of Julia
 over other systems, please see the online manual:
 https://docs.julialang.org
 
-If a Julia source file is given as a \fIprogram\fP (optionally followed by
- arguments in \fIargs\fP) Julia will execute the program and exit.
-
-.\" This section was taken nearly verbatim from the output of `julia --help`
 .SH "COMMAND-LINE OPTIONS"
 
 .TP
@@ -63,6 +61,10 @@ Display version information
 -h, --help
 Print help message
 
+.TP
+--help-hidden
+Print uncommon options not shown by `-h`
+
 .TP
 --project[=<dir>/@.]
 Set <dir> as the home project/environment. The default @. option will search
@@ -73,22 +75,27 @@ found.
 -J, --sysimage <file>
 Start up with the given system image file
 
-.TP
---sysimage-native-code={yes|no}
-Use precompiled code from system image if available
-
 .TP
 -H, --home <dir>
 Set location of julia executable
 
 .TP
---startup-file={yes|no}
-Load ~/.julia/config/startup.jl
+--startup-file={yes*|no}
+Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH`
+environment variable is unset, load `~/.julia/config/startup.jl`
 
 .TP
---handle-signals={yes|no}
+--handle-signals={yes*|no}
 Enable or disable Julia's default signal handlers
 
+.TP
+--sysimage-native-code={yes*|no}
+Use native code from system image if available
+
+.TP
+--compiled-modules={yes*|no}
+Enable or disable incremental precompilation of modules
+
 .TP
 -e, --eval <expr>
 Evaluate <expr>
@@ -103,11 +110,18 @@ Load <file> immediately on all processors
 
 .TP
 -t, --threads <n>
-Enable n threads
+Enable n threads; "auto" tries to infer a useful default number
+of threads to use but the exact behavior might change in the future.
+Currently, "auto" uses the number of CPUs assigned to this julia
+process based on the OS-specific affinity assignment interface, if
+supported (Linux and Windows). If this is not supported (macOS) or
+process affinity is not configured, it uses the number of CPU
+threads.
 
 .TP
--p, --procs <n>
-Run n local processes
+-p, --procs {N|auto}
+Integer value N launches N additional local worker processes `auto` launches as many workers
+as the number of local CPU threads (logical cores)
 
 .TP
 --machine-file <file>
@@ -115,64 +129,96 @@ Run processes on hosts listed in <file>
 
 .TP
 -i
-Interactive mode; REPL runs and isinteractive() is true
+Interactive mode; REPL runs and `isinteractive()` is true
 
 .TP
---banner={yes|no|auto}
+-q, --quiet
+Quiet startup: no banner, suppress REPL warnings
+
+.TP
+--banner={yes|no|auto*}
 Enable or disable startup banner
 
 .TP
---color={yes|no|auto}
+--color={yes|no|auto*}
 Enable or disable color text
 
 .TP
---history-file={yes|no}
+--history-file={yes*|no}
 Load or save history
 
 .TP
---compile={yes|no|all|min}
-Enable or disable compiler, or request exhaustive or minimal compilation
+--depwarn={yes|no*|error}
+Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)
 
 .TP
--C, --cpu-target=<target>
-Limit usage of cpu features up to <target>
+--warn-overwrite={yes|no*}
+Enable or disable method overwrite warnings
 
 .TP
--O, --optimize
-Run time-intensive code optimizations
+--warn-scope={yes*|no}
+Enable or disable warning for ambiguous top-level scope
 
 .TP
--O <n>, --optimize=<n>
-Set the optimization level to <n>
+-C, --cpu-target=<target>
+Limit usage of CPU features up to <target>; set to `help` to see the available options
 
 .TP
--g
-Enable generation of full debug info
+-O, --optimize={0,1,2*,3}
+Set the optimization level (level 3 if `-O` is used without a level)
 
 .TP
--g <n>
-Set the level of debug info generation to <n>
+--min-optlevel={0*,1,2,3}
+Set a lower bound on the optimization level
 
 .TP
---inline={yes|no}
-Control whether inlining is permitted (overrides functions declared as @inline)
+-g {0,1*,2}
+Set the level of debug info generation (level 2 if `-g` is used without a level)
 
 .TP
---check-bounds={yes|no}
-Emit bounds checks always or never (ignoring declarations)
+--inline={yes*|no}
+Control whether inlining is permitted, including overriding @inline declarations
+
+.TP
+--check-bounds={yes|no|auto*}
+Emit bounds checks always, never, or respect @inbounds declarations
 
 .TP
 --math-mode={ieee|user}
-Always use IEEE semantics for math (ignoring declarations),
-or adhere to declarations in source code
+Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)
 
 .TP
---depwarn={yes|no|error}
-Enable or disable syntax and method deprecation warnings ('error' turns warnings into errors)
+--code-coverage[={none*|user|all}]
+Count executions of source lines (omitting setting is equivalent to `user`)
 
 .TP
---warn-overwrite={yes|no}
-Enable or disable method overwrite warnings
+--code-coverage=@<path>
+Count executions of source lines in a file or files under a given directory. A `@` must
+be placed before the path to indicate this option. A `@` with no path will track the current directory.
+
+.TP
+ --code-coverage=tracefile.info
+ Append coverage information to the LCOV tracefile (filename supports format tokens)
+
+.TP
+--track-allocation[={none*|user|all}]
+Count bytes allocated by each source line (omitting setting is equivalent to `user`)
+
+.TP
+--track-allocation=@<path>
+Count bytes allocated by each source line in a file or files under a given directory. A `@`
+must be placed before the path to indicate this option. A `@` with no path will track the current directory.
+
+.TP
+--bug-report=KIND
+Launch a bug report session. It can be used to start a REPL, run a script, or evaluate
+expressions. It first tries to use BugReporting.jl installed in current environment and
+fallbacks to the latest compatible BugReporting.jl if not. For more information, see
+--bug-report=help.
+
+.TP
+--compile={yes*|no|all|min}
+Enable or disable JIT compiler, or request exhaustive or minimal compilation
 
 .TP
 --output-o <name>
@@ -182,36 +228,45 @@ Generate an object file (including system image data)
 --output-ji <name>
 Generate a system image data file (.ji)
 
+.TP
+--strip-metadata
+Remove docstrings and source location info from system image
+
+.TP
+--strip-ir
+Remove IR (intermediate representation) of compiled functions
+
+.TP
+--output-unopt-bc <name>
+Generate unoptimized LLVM bitcode (.bc)
+
 .TP
 --output-bc <name>
 Generate LLVM bitcode (.bc)
 
 .TP
---output-incremental={yes|no}
-Generate an incremental output file (rather than complete)
+--output-asm <name>
+Generate an assembly file (.s)
 
 .TP
---code-coverage={none|user|all}, --code-coverage
-Count executions of source lines (omitting setting is equivalent to 'user')
+--output-incremental={yes|no*}
+Generate an incremental output file (rather than complete)
 
 .TP
---track-allocation={none|user|all}, --track-allocation
-Count bytes allocated by each source line
+--trace-compile={stderr,name}
+Print precompile statements for methods compiled during execution or save to a path
 
-.SH FILES
-.I ~/.julia/config/startup.jl
-.RS
-Per user startup file.
-.RE
+.TP
+-image-codegen
+Force generate code in imaging mode
 
-.I /etc/julia/startup.jl
-.RS
-System-wide startup file.
-.RE
+.SH FILES AND ENVIRONMENT
+See https://docs.julialang.org/en/v1/manual/environment-variables/
 
 .SH BUGS
 Please report any bugs using the GitHub issue tracker:
 https://github.com/julialang/julia/issues?state=open
 
+
 .SH AUTHORS
 Contributors: https://github.com/JuliaLang/julia/graphs/contributors
diff --git a/doc/src/assets/cover-splash.tex b/doc/src/assets/cover-splash.tex
new file mode 100644
index 00000000000000..10409a14d5742b
--- /dev/null
+++ b/doc/src/assets/cover-splash.tex
@@ -0,0 +1,353 @@
+%% Direct translation of the backsplash image in "JuliaLang/www.julialang.org"
+%% And cropping of the image
+%%  https://github.com/JuliaLang/www.julialang.org/blob/main/_assets/infra/backsplash-min-0.5.svg
+
+
+\newcommand{\splashScaleFactor}{0.6}
+\newcommand{\whiteMaskTransparency}{0.5}
+\newcommand{\triangleTransparency}{0.6}
+\begin{tikzpicture}[x=1,y=1,yscale=-\splashScaleFactor,xscale=\splashScaleFactor,draw=white]
+% Clipping
+\clip (510,15) rectangle (1570,350);
+% Cropping
+\useasboundingbox(510,0) rectangle (1570,350.0);
+% gary background
+% \draw[fill=splash_gary,opacity=0] (510.0,0.0)--++(1057.5,0.0)--++(0.0,350.0)--++(-1057.5,0.0)--cycle;
+
+%% Draw triangles
+\draw[fill=julia_red,opacity=\triangleTransparency] 
+  (991.9,11.4)--++(51.5,19.7)--++(-56.2,25.3)
+    ++(56.8,56.9)--++(-47.4,-27.5)--++(48.4,-52.2)
+  (990.9,9.0)--++(-40.6,1.4)--++(125.4,-21.0)
+  (969.5,111.5)--++(35.3,20.8)--++(-45.2,3.6)
+  (952.4,205.8)--++(14.0,55.9)--++(-44.7,-29.8)
+    ++(33.8,-160.3)--++(12.1,37.4)--++(-63.5,-1.9)
+  (946.8,335.7)--++(-6.5,-33.3)--++(58.2,17.6)
+  (920.0,235.8)--++(17.8,64.6)--++(-39.3,15.0)
+    ++(-0.7,3.0)--++(45.6,18.6)--++(-51.2,22.3)
+  (910.2,17.7)--++(43.6,51.2)--++(-72.1,-0.9)
+  (901.8,109.5)--++(53.8,27.4)--++(-62.3,24.5)
+  (885.4,204.6)--++(32.3,26.8)--++(-47.1,9.2)
+  (879.5,70.3)--++(19.3,35.8)--++(-54.1,-22.0)
+  (870.0,317.0)--++(0.0,-69.4)--++(25.6,68.5)
+  (868.7,319.7)--++(20.5,40.0)--++(-53.1,-8.4)
+  (867.4,190.9)--++(-12.9,-43.1)--++(35.8,15.6)
+  (866.0,193.8)--++(1.9,45.8)--++(-63.6,-57.1)
+  (815.2,79.7)--++(-16.2,-17.1)--++(65.6,-10.8)
+    ++(3.4,-40.0)--++(0.9,35.9)--++(-50.7,-41.5)
+  (801.8,362.7)--++(-7.3,-40.3)--++(37.5,29.3)
+    ++(-16.2,-267.4)--++(35.8,61.3)--++(-41.5,22.6)
+  (796.4,60.5)--++(-49.1,-26.4)--++(66.1,-27.4)
+  (765.0,352.3)--++(-0.9,-61.7)--++(27.6,29.5)
+  (758.0,131.6)--++(40.8,48.4)--++(-80.7,-0.9)
+    ++(66.2,78.4)--++(-38.6,-22.6)--++(53.7,-50.9)
+  (741.8,98.9)--++(14.7,30.4)--++(-45.1,7.4)
+  (720.7,321.9)--++(38.2,30.3)--++(-40.8,-13.9)
+  (716.2,182.1)--++(19.5,30.7)--++(-50.2,38.1)
+  (708.3,140.2)--++(6.5,39.0)--++(-52.0,9.3)
+    ++(4.7,130.8)--++(46.6,19.6)--++(-64.3,20.5)
+    ++(33.5,-102.7)--++(34.9,62.2)--++(-50.0,-1.9)
+  (697.1,1.3)--++(3.7,57.5)--++(-37.1,-47.3)
+    ++(20.3,86.2)--++(-30.6,-59.4)--++(47.4,24.1)
+  (660.1,193.2)--++(21.3,59.2)--++(-35.1,-12.0)
+  (650.6,34.4)--++(-49.2,-41.9)--++(59.2,19.1)
+  (641.9,103.0)--++(40.0,-1.9)--++(-45.6,59.5)
+  (640.4,99.6)--++(-46.4,-58.7)--++(55.9,-3.8)
+  (611.6,316.3)--++(34.5,43.8)--++(-43.8,2.8)
+  (600.4,137.3)--++(-7.1,-17.7)--++(43.4,-15.0)
+  (596.6,210.6)--++(44.0,28.8)--++(-49.4,-7.2)
+  (591.0,41.9)--++(0.0,71.1)--++(-19.8,-57.6)
+  (551.4,303.2)--++(-11.2,-49.5)--++(47.7,-18.7)
+  (541.9,104.3)--++(47.4,14.9)--++(-51.2,25.1)
+  (540.0,214.9)--++(-1.9,-66.3)--++(55.9,59.6)
+    ++(-2.7,-170.7)--++(-44.8,-53.2)--++(49.5,6.5)
+  (512.0,192.8)--++(-28.2,-58.3)--++(51.7,12.2)
+    ++(-23.0,50.0)--++(24.9,19.4)--++(-44.3,38.8)
+  (503.7,105.9)--++(-15.7,-57.3)--++(50.9,53.6)
+  (487.0,42.5)--(487.0,6.7)--++(53.2,-21.1)
+  (1602.3,36.6)--++(12.3,58.5)--++(-57.5,-32.1)
+  (1545.2,19.0)--++(-49.1,-29.1)--++(117.4,19.1)
+  (1538.8,89.3)--++(67.7,71.6)--++(-73.5,-3.8)
+  (1537.9,366.1)--++(-2.9,-80.5)--++(70.9,33.6)
+  (1532.0,159.8)--++(18.4,27.6)--++(-47.0,12.9)
+    ++(34.3,-114.6)--++(-48.3,-30.1)--++(63.8,8.2)
+  (1501.7,203.7)--++(63.0,41.4)--++(-71.5,0.9)
+  (1499.9,137.4)--++(-43.1,-47.7)--++(59.6,25.7)
+  (1493.0,315.9)--++(0.0,-66.7)--++(39.5,34.8)
+  (1491.7,319.6)--++(15.7,27.7)--++(-60.1,12.0)
+  (1469.9,265.9)--++(20.1,49.8)--++(-21.0,-12.2)
+  (1445.7,193.2)--++(21.8,66.4)--++(-57.8,-16.1)
+    ++(44.0,-158.6)--++(-7.4,-46.2)--++(37.0,15.7)
+  (1433.7,139.9)--++(-33.8,-45.1)--++(51.7,-6.6)
+    ++(-7.4,-52.1)--++(-30.1,1.8)--++(38.3,-51.9)
+  (1418.7,179.9)--++(-19.1,-27.3)--++(32.7,-9.1)
+  (1407.3,246.3)--++(7.5,63.2)--++(-49.1,-17.0)
+  (1384.6,-17.4)--++(24.9,53.5)--++(-49.9,-34.2)
+  (1367.2,191.1)--++(-28.2,-58.4)--++(57.4,19.8)
+  (1364.4,296.2)--++(23.2,49.3)--++(-35.3,17.7)
+  (1301.7,135.9)--++(-4.3,-16.4)--++(35.3,11.2)
+  (1297.0,116.5)--++(-1.9,-78.9)--++(52.3,57.0)
+    ++(-49.6,-80.8)--++(48.0,38.2)--++(-50.7,-17.8)
+  (1296.9,251.9)--++(-8.5,-57.6)--++(69.0,52.0)
+  (1295.1,254.1)--++(7.4,59.0)--++(-37.8,-54.3)
+    ++(23.2,74.8)--++(-32.1,-28.5)--++(46.3,12.5)
+  (1288.0,336.1)--++(55.2,26.3)--++(-77.0,-15.8)
+  (1258.6,34.5)--++(-14.0,-47.5)--++(50.3,25.2)
+    ++(-8.4,177.8)--++(-32.7,-39.3)--++(45.8,-12.2)
+  (1255.1,194.9)--++(6.6,60.1)--++(-60.1,-69.5)
+    ++(49.7,116.1)--++(-66.9,-32.0)--++(77.2,-10.4)
+    ++(-10.2,45.7)--++(10.3,41.3)--++(-66.6,13.1)
+  (1249.1,118.0)--++(1.8,31.1)--++(-43.9,1.8)
+  (1187.6,-8.5)--++(17.0,65.3)--++(-53.0,-5.7)
+  (1182.3,316.6)--++(8.3,42.4)--++(-40.6,-24.0)
+  (1180.3,267.6)--++(-31.7,-12.1)--++(48.5,-67.1)
+  (1180.1,271.2)--++(1.8,42.2)--++(-31.2,-14.7)
+  (1150.7,162.1)--++(-7.6,-81.0)--++(57.2,7.6)
+  (1146.0,256.9)--++(1.9,39.9)--++(-56.7,-44.6)
+    ++(55.8,81.3)--++(-43.2,-14.7)--++(44.1,-18.4)
+  (1141.6,77.9)--++(-32.5,-38.8)--++(39.7,13.5)
+    ++(-40.4,87.8)--++(39.0,24.2)--++(-60.4,13.9)
+  (1108.9,-14.3)--++(21.7,26.3)--++(-23.5,22.6)
+  (1108.3,136.4)--++(-14.1,-53.4)--++(45.9,-1.9)
+  (1066.9,169.8)--++(16.8,10.6)--++(-19.4,35.3)
+    ++(-15.7,97.5)--++(-10.4,-69.9)--++(48.2,8.5)
+  (1066.5,166.5)--++(-19.7,-49.7)--++(59.1,22.5)
+  (1058.4,4.5)--++(43.9,31.1)--++(-54.9,-5.5)
+  (1048.8,317.1)--++(48.4,2.7)--++(-55.7,27.4)
+  (1037.4,240.4)--++(-15.5,-33.7)--++(39.1,13.6)
+  (1007.4,135.4)--++(11.4,68.6)--++(-63.8,-0.9)
+  (1005.9,249.3)--++(40.4,65.8)--++(-43.3,3.8);
+
+\draw[fill=julia_purple,opacity=\triangleTransparency]
+  (995.4,84.1)--++(-8.2,-25.5)--++(54.7,-24.6)
+  (969.5,108.2)--++(-11.9,-36.5)--++(35.6,14.6)
+  (967.9,265.8)--++(31.9,52.5)--++(-59.1,-17.8)
+    ++(28.0,-37.9)--++(-14.0,-55.9)--++(48.5,39.1)
+  (952.1,202.2)--++(-58.0,-39.0)--++(62.7,-24.7)
+  (891.2,161.6)--++(-36.3,-15.8)--++(44.7,-35.4)
+    ++(-19.6,-43.5)--++(-8.8,-16.7)--++(35.1,-29.8)
+  (884.3,201.4)--++(-15.7,-8.7)--++(21.8,-26.2)
+  (841.1,84.9)--++(9.9,55.8)--++(-34.2,-58.4)
+    ++(-7.5,89.0)--++(47.9,19.0)--++(-54.5,-9.9)
+  (815.2,4.0)--++(-37.7,-10.6)--++(69.6,14.2)
+    ++(-45.4,175.3)--++(65.2,58.5)--++(-80.6,16.3)
+  (784.3,261.5)--++(7.3,55.5)--++(-27.3,-29.1)
+    ++(-18.4,-252.3)--++(49.1,26.4)--++(-51.9,33.1)
+  (762.0,290.0)--++(0.9,62.9)--++(-41.3,-32.9)
+  (758.3,128.4)--++(-14.9,-30.7)--++(67.9,-14.9)
+  (744.5,233.2)--++(-6.3,-19.7)--++(57.2,-28.6)
+  (744.2,32.2)--++(-43.9,-31.7)--++(49.5,-12.1)
+  (709.1,136.3)--++(-23.3,-35.4)--++(54.0,-2.8)
+  (635.0,165.5)--++(23.1,24.0)--++(-59.1,17.6)
+  (611.3,311.1)--++(-19.9,-76.8)--++(52.2,7.6)
+  (591.5,120.4)--++(7.1,17.8)--++(-56.0,6.2)
+  (551.3,307.9)--++(47.1,54.6)--++(-64.0,-12.2)
+  (540.0,251.5)--++(0.0,-34.1)--++(47.1,15.7)
+    ++(-16.6,-179.6)--++(-23.7,-65.5)--++(43.7,51.9)
+  (538.7,256.1)--++(11.1,49.2)--++(-38.0,8.4)
+    ++(29.1,-212.2)--++(-52.2,-55.0)--++(79.7,9.5)
+  (1608.2,159.7)--++(-67.6,-71.4)--++(74.2,9.5)
+  (1567.8,247.7)--++(37.8,69.1)--++(-70.0,-33.1)
+  (1555.7,61.5)--++(-9.3,-40.1)--++(54.0,14.0)
+  (1518.1,113.1)--++(-30.4,-56.3)--++(48.9,30.4)
+  (1517.6,117.1)--++(12.6,38.6)--++(-28.8,-17.1)
+    ++(7.4,207.0)--++(-15.5,-27.4)--++(38.4,-31.1)
+  (1484.1,52.5)--++(-37.4,-15.9)--++(43.0,-45.8)
+    ++(-20.2,268.7)--++(-20.9,-63.7)--++(41.9,51.0)
+  (1446.6,189.1)--++(-11.3,-46.1)--++(62.1,-2.8)
+  (1443.9,358.7)--++(-26.3,-46.9)--++(48.8,-6.6)
+  (1419.8,183.3)--++(24.6,8.2)--++(-34.6,48.3)
+  (1412.5,41.1)--++(38.5,45.1)--++(-51.7,6.6)
+  (1367.7,195.4)--++(38.3,47.7)--++(-44.9,2.8)
+    ++(-2.4,-242.2)--++(50.9,34.8)--++(-57.4,15.1)
+  (1359.1,248.1)--++(3.7,43.1)--++(-63.7,-37.4)
+    ++(37.6,-120.9)--++(28.6,59.1)--++(-76.3,-1.0)
+  (1351.9,57.0)--++(43.4,36.0)--++(-45.2,0.9)
+  (1336.4,129.8)--++(-37.5,-11.9)--++(48.5,-21.0)
+  (1304.0,318.5)--++(42.3,43.2)--++(-56.7,-27.0)
+  (1293.0,36.0)--++(1.9,77.2)--++(-35.3,-76.3)
+  (1256.9,192.9)--++(-3.6,-39.8)--++(31.7,38.0)
+  (1252.9,148.1)--++(-1.8,-31.1)--++(42.0,1.8)
+  (1204.9,153.0)--++(43.5,-1.8)--++(-48.0,30.8)
+  (1201.2,86.2)--++(-47.4,-32.8)--++(51.1,5.5)
+    ++(1.4,-3.7)--++(-17.0,-65.3)--++(52.0,-3.8)
+  (1201.1,91.1)--++(1.9,60.1)--++(-50.7,12.2)
+  (1184.0,313.8)--++(-1.9,-43.2)--++(66.7,31.9)
+  (1155.9,186.0)--++(41.1,-0.9)--++(-48.6,67.3)
+  (1149.3,166.2)--++(4.4,17.7)--++(-62.0,-4.4)
+  (1132.3,10.8)--++(-22.0,-26.6)--++(73.4,5.5)
+  (1131.7,13.7)--++(16.5,36.6)--++(-40.3,-13.7)
+  (1101.8,317.6)--++(-12.3,-64.2)--++(57.6,45.3)
+  (1092.5,80.0)--++(-44.0,-47.8)--++(56.2,5.6)
+  (1092.4,83.6)--++(14.1,53.7)--++(-59.4,-22.6)
+  (1062.9,221.4)--++(22.7,28.1)--++(-46.3,-8.2)
+  (1057.8,2.0)--++(-50.7,5.3)--++(88.5,-20.4)
+  (1036.2,243.1)--++(10.1,68.2)--++(-39.6,-64.5)
+    ++(32.7,100.5)--++(-34.7,-26.5)--++(42.0,-3.7)
+  (1020.7,203.2)--++(-11.4,-68.2)--++(54.9,33.1)
+  (1008.6,131.6)--++(-12.0,-43.5)--++(46.3,26.8)
+  (1001.4,321.2)--++(14.9,40.1)--++(-67.1,-24.2);
+
+\draw[fill=julia_green,opacity=\triangleTransparency]
+  (994.5,87.8)--++(11.9,43.0)--++(-35.7,-21.1)
+    ++(-11.7,28.1)--++(46.9,-3.8)--++(-51.6,66.6)
+    ++(-8.2,137.3)--++(66.0,23.8)--++(-116.4,-1.8)
+  (985.4,59.3)--++(8.0,24.9)--++(-34.7,-14.2)
+  (985.2,55.9)--++(-47.7,-43.0)--++(52.4,-1.9)
+  (957.5,135.6)--++(-52.1,-26.5)--++(62.1,1.8)
+  (935.0,11.0)--++(-170.3,-23.1)--++(330.5,-3.7)
+  (934.3,13.1)--++(18.8,52.0)--++(-41.3,-48.4)
+  (901.4,106.5)--++(-19.7,-36.5)--++(71.2,0.9)
+  (896.9,313.7)--++(-26.5,-71.0)--++(48.3,-9.5)
+  (892.7,164.7)--++(56.9,38.3)--++(-63.5,-0.9)
+  (890.4,357.7)--++(-19.8,-38.8)--++(25.2,-0.9)
+  (870.9,47.9)--++(-0.9,-35.8)--++(37.6,4.6)
+  (869.8,51.8)--++(8.8,16.7)--++(-33.4,13.2)
+    ++(24.6,155.7)--++(-1.7,-42.6)--++(15.7,8.7)
+  (868.0,243.2)--++(0.0,72.9)--++(-80.5,-56.6)
+    ++(28.0,-252.8)--++(52.1,42.7)--++(-69.2,11.4)
+  (852.4,147.5)--++(13.1,43.8)--++(-54.1,-21.5)
+  (833.9,350.7)--++(-38.1,-29.8)--++(70.7,-1.9)
+  (800.9,179.4)--++(-35.9,-42.6)--++(42.6,33.4)
+    ++(-55.8,-181.9)--++(60.1,16.9)--++(-65.7,27.2)
+  (796.8,63.3)--++(16.2,17.1)--++(-65.8,14.4)
+  (763.3,285.9)--++(-17.4,-48.7)--++(37.7,22.0)
+  (736.5,214.7)--++(6.3,19.7)--++(-54.6,17.0)
+  (720.3,318.5)--++(-35.0,-62.5)--++(75.7,32.2)
+  (717.0,340.1)--++(41.8,14.2)--++(-103.1,5.3)
+  (716.6,177.7)--++(-6.5,-38.9)--++(45.3,-7.4)
+  (702.9,60.2)--++(-3.8,-58.2)--++(44.1,31.9)
+  (702.3,63.6)--++(37.1,32.5)--++(-53.9,2.8)
+  (683.9,101.7)--++(23.5,35.8)--++(-69.6,24.5)
+    ++(45.3,89.4)--++(-21.8,-60.5)--++(53.0,-9.5)
+  (666.9,314.1)--++(-20.4,-71.6)--++(35.3,12.1)
+  (662.2,12.9)--++(36.2,46.1)--++(-46.1,-23.5)
+  (651.5,39.2)--++(30.9,59.9)--++(-40.2,1.9)
+  (647.8,359.1)--++(-34.6,-43.9)--++(52.3,3.7)
+  (644.4,239.4)--++(-46.1,-30.1)--++(60.2,-17.9)
+  (634.2,162.2)--++(-32.6,-23.3)--++(38.2,-34.4)
+  (595.1,206.6)--++(-56.0,-59.8)--++(59.8,-6.6)
+  (589.7,235.7)--++(20.0,77.1)--++(-57.1,-7.6)
+  (569.8,57.3)--++(20.7,60.1)--++(-47.9,-15.0)
+  (538.0,218.2)--++(0.0,33.9)--++(-44.0,4.6)
+    ++(42.2,-113.1)--++(-31.2,-35.8)--++(34.9,-3.7)
+  (532.8,348.8)--++(-21.2,-33.2)--++(37.8,-8.3)
+  (496.9,336.0)--++(30.8,13.7)--++(-86.5,-4.3)
+  (486.2,4.8)--++(-20.6,-17.9)--++(72.7,-2.7)
+  (1607.2,320.8)--++(4.7,48.2)--++(-71.8,-1.9)
+    ++(15.2,-302.8)--++(55.7,31.0)--++(-71.2,-9.1)
+  (1602.6,33.9)--++(-52.5,-13.6)--++(67.9,-10.0)
+  (1588.5,189.0)--++(-32.4,-1.8)--++(49.9,-22.8)
+  (1551.3,189.2)--++(15.0,54.5)--++(-62.9,-41.3)
+  (1544.3,21.5)--++(9.4,40.3)--++(-65.6,-8.4)
+  (1533.2,288.9)--++(2.8,77.1)--++(-25.7,-18.4)
+  (1531.4,153.0)--++(-12.3,-37.8)--++(17.6,-24.6)
+  (1502.0,198.9)--++(-0.9,-58.1)--++(29.5,17.5)
+  (1491.0,248.8)--++(0.0,64.1)--++(-20.8,-51.4)
+  (1484.8,55.7)--++(31.0,57.3)--++(-61.0,-26.3)
+  (1454.8,-16.9)--++(34.2,5.5)--++(-42.5,45.2)
+  (1453.4,88.9)--++(44.4,49.2)--++(-62.4,2.8)
+    ++(56.0,103.8)--++(-42.9,-52.2)--++(51.3,10.3)
+  (1444.2,38.1)--++(7.3,45.6)--++(-37.4,-43.8)
+  (1433.8,145.4)--++(10.8,44.1)--++(-24.3,-8.1)
+  (1416.8,308.9)--++(-7.6,-63.5)--++(57.9,16.1)
+  (1412.2,37.0)--++(-25.6,-55.0)--++(65.5,0.9)
+  (1399.0,150.7)--(1399,97.0)--++(33.3,44.4)
+  (1398.0,153.7)--++(19.3,27.6)--++(-47.9,10.1)
+  (1397.4,92.2)--++(-44.3,-36.7)--++(57.5,-15.1)
+    ++(-21.4,303.7)--++(-23.3,-49.4)--++(48.4,16.8)
+  (1389.105,347.066)--++(48.442,13.211)--++(-81.91,3.524)
+  (1364.8,290.6)--++(-3.7,-42.7)--++(44.5,-2.8)
+  (1359.2,245.2)--++(-69.2,-52.1)--++(75.8,0.9)
+  (1350.4,362.9)--++(-44.6,-45.5)--++(56.9,-22.8)
+  (1350.2,53.1)--++(-50.9,-40.5)--++(57.4,-9.4)
+  (1349.4,97.0)--++(45.4,52.9)--++(-56.6,-19.5)
+  (1301.8,137.9)--++(32.7,-5.5)--++(-45.5,55.5)
+  (1297.1,10.9)--++(-50.1,-25.0)--++(131.6,-3.7)
+  (1295.4,119.7)--++(4.4,16.6)--++(-42.9,11.4)
+    ++(6.5,112.7)--++(38.2,54.9)--++(-48.4,-13.0)
+  (1286.6,196.1)--++(8.3,56.1)--++(-30.4,4.6)
+    ++(28.6,-222.7)--++(-31.4,0.9)--++(34.0,-20.9)
+  (1263.0,348.0)--++(76.7,15.7)--++(-138.7,-3.5)
+  (1251.2,151.7)--++(3.7,41.1)--++(-53.2,-9.3)
+  (1249.3,113.4)--++(-41.7,-54.9)--++(49.3,-20.8)
+    ++(-57.3,148.6)--++(61.5,71.1)--++(-78.8,10.6)
+  (1204.9,150.0)--++(-1.9,-60.2)--++(45.2,26.3)
+  (1192.6,359.0)--++(-8.5,-43.2)--++(64.8,-11.3)
+  (1155.8,184.0)--++(-4.3,-17.4)--++(42.5,16.5)
+    ++(-43.4,-129.5)--++(47.4,32.8)--++(-54.7,-7.3)
+  (1150.1,49.9)--++(-16.8,-37.3)--++(52.2,-21.5)
+    ++(-37.5,345.1)--++(40.4,23.9)--++(-72.5,1.8)
+  (1149.9,296.8)--++(-1.8,-39.3)--++(31.1,11.9)
+    ++(-29.2,31.2)--++(30.9,14.5)--++(-31.8,18.2)
+  (1141.3,83.2)--++(7.5,79.9)--++(-39.5,-24.4)
+    ++(36.8,116.3)--++(-55.9,-4.7)--++(63.4,-62.5)
+  (1106.4,39.1)--++(33.5,40.0)--++(-45.6,1.9)
+  (1105.1,35.1)--++(-45.0,-31.9)--++(46.9,-18.7)
+  (1100.4,320.4)--++(11.3,41.3)--++(-68.5,-13.1)
+  (1087.5,253.7)--++(12.3,64.2)--++(-50.0,-2.8)
+  (1084.2,183.6)--++(2.7,64.4)--++(-22.7,-28.1)
+    ++(-17.2,-186.4)--++(44.5,48.3)--++(-45.5,31.3)
+  (1045.5,29.7)--++(-50.2,-19.2)--++(61.1,-6.4)
+  (1044.5,116.4)--++(19.5,49.2)--++(-53.9,-32.5)
+  (1020.1,207.6)--++(15.4,33.5)--++(-29.0,3.6)
+    ++(12.0,116.7)--++(-14.4,-38.6)--++(34.1,26.0)
+  (1001.1,316.6)--++(-31.7,-52.2)--++(34.5,-16.8);
+
+\draw[fill=julia_blue,opacity=\triangleTransparency] 
+  (956.6,68.7)--++(-19.3,-53.3)--++(46.8,42.2)
+  (939.4,298.8)--++(-17.8,-64.5)--++(44.9,29.9)
+  (938.2,302.4)--++(6.4,33.0)--++(-44.9,-18.3)
+  (920.0,230.7)--++(-32.2,-26.7)--++(62.5,0.9)
+  (869.1,10.0)--++(-88.1,-17.9)--++(137.3,18.7)
+  (853.7,144.2)--++(-10.4,-58.6)--++(55.7,22.7)
+  (841.6,82.9)--++(-23.4,-2.6)--++(47.6,-26.8)
+  (808.1,168.1)--++(-48.6,-38.1)--++(54.3,-46.7)
+  (793.9,319.0)--++(-7.6,-57.9)--++(79.7,56.0)
+  (792.4,322.3)--++(7.3,40.4)--++(-34.0,-8.3)
+  (743.4,236.2)--++(17.9,49.9)--++(-75.4,-32.0)
+  (741.1,94.9)--++(-37.5,-32.8)--++(40.3,-26.2)
+  (737.4,211.7)--++(-19.5,-30.7)--++(79.0,0.9)
+  (716.2,337.6)--++(-43.8,-18.4)--++(46.4,1.8)
+  (662.0,10.0)--++(-55.8,-18.0)--++(86.8,8.6)
+  (660.0,188.6)--++(-23.2,-24.1)--++(68.7,-24.1)
+  (644.8,243.8)--++(20.9,73.0)--++(-53.1,-3.8)
+  (600.4,361.8)--++(-47.0,-54.5)--++(56.3,7.5)
+    ++(-8.9,-174.0)--++(32.6,23.3)--++(-36.4,42.0)
+  (597.8,-8.0)--++(50.7,43.2)--++(-55.4,3.8)
+  (593.0,117.6)--++(0.0,-74.7)--++(46.3,58.6)
+  (589.3,231.7)--++(-45.8,-15.3)--++(51.2,-6.3)
+  (543.6,-15.1)--++(24.9,68.9)--++(-80.4,-9.6)
+  (536.1,149.9)--++(1.8,64.0)--++(-24.7,-19.2)
+    ++(-3.1,117.6)--++(-17.8,-53.4)--++(45.0,-4.7)
+  (510.0,316.9)--++(20.4,31.9)--++(-31.9,-14.2)
+    ++(4.5,-226.0)--++(31.2,35.8)--++(-50.4,-11.9)
+  (1552.4,186.7)--++(-18.4,-27.6)--++(70.9,3.7)
+  (1533.9,282.6)--++(-39.3,-34.6)--++(71.1,-0.9)
+    ++(2.4,-4.3)--++(-14.8,-53.7)--++(34.3,1.9)
+  (1508.8,349.1)--++(24.7,17.7)--++(-82.2,-6.2)
+  (1499.0,141.3)--++(0.9,59.5)--++(-51.9,-10.4)
+  (1491.9,-10.3)--++(51.2,30.3)--++(-56.8,32.2)
+  (1468.5,305.4)--++(21.8,12.7)--++(-43.7,39.1)
+  (1467.0,303.1)--++(-48.1,6.5)--++(49.0,-46.2)
+  (1415.9,312.8)--++(26.1,46.6)--++(-51.3,-14.0)
+  (1407.4,241.7)--++(-38.6,-48.0)--++(49.0,-10.4)
+  (1397.0,95.0)--++(0.0,54.3)--++(-45.9,-53.3)
+  (1357.6,1.1)--++(-49.6,8.1)--++(71.5,-25.2)
+  (1348.1,92.6)--++(-50.7,-55.3)--++(52.5,18.4)
+  (1304.8,315.6)--++(-7.6,-60.7)--++(64.5,37.9)
+  (1263.5,253.6)--++(-6.4,-58.7)--++(28.4,-1.8)
+    ++(-21.9,152.5)--++(-9.9,-39.7)--++(32.5,28.9)
+  (1258.6,39.7)--++(35.8,77.2)--++(-43.3,-1.9)
+  (1242.8,-12.3)--++(14.0,47.7)--++(-48.7,20.6)
+  (1206.7,60.5)--++(39.5,52.1)--++(-43.1,-25.1)
+  (1198.2,182.6)--++(-44.9,-17.4)--++(49.4,-11.9)
+  (1113.6,361.3)--++(-11.1,-40.7)--++(43.5,14.8)
+  (1088.9,248.7)--++(-2.9,-67.6)--++(66.7,4.8)
+  (1084.6,178.6)--++(-16.8,-10.6)--++(37.2,-25.7)
+  (1062.1,218.6)--++(-40.0,-14.0)--++(42.8,-34.5)
+  (1004.5,244.3)--++(-48.7,-39.3)--++(62.7,0.9);
+
+% White Mask
+\draw[fill=white,opacity=\whiteMaskTransparency] (500,0)--(1560,0)--(1560,360)--(500,360)--cycle;
+\end{tikzpicture}
diff --git a/doc/src/assets/cover.tex b/doc/src/assets/cover.tex
new file mode 100644
index 00000000000000..67b77e520acd3f
--- /dev/null
+++ b/doc/src/assets/cover.tex
@@ -0,0 +1,46 @@
+%% ============================================================================
+%% Custom tex styles, including this file, add a custom cover to the document.
+%%
+%% These custom styles include:
+%%  - `cover.tex`: This file, The main definition of the cover, 
+%%      used to replace the default `\maketitle` command.
+%%  - `custom.sty`: Load the macro package required for the cover, 
+%%      define the background image style, etc.
+%%  - `preamble.tex`: Replace the default preamble for inserting a custom cover.
+%%  - `logo.tex`: logo of julia.
+%%  - `cover-splash.tex`: Background image of the cover title, 
+%%      from julia's homepage.
+%% ============================================================================
+
+%% ---- reset page geometry for cover page
+\newgeometry{left=2cm,right=2cm,bottom=3cm}
+% ref: memman@v3.7q, P65, "4.1. Styling the titling"
+%   http://mirrors.ctan.org/macros/latex/contrib/memoir/memman.pdf
+\begin{titlingpage}
+    % set background image
+    \BgThispage
+    \vspace*{2.2cm}
+
+    %% Centering content
+    \begin{center}
+        %% Main Heading
+        \textcolor{black}{ \MainHeading  \DocMainTitle } 
+        \vfill
+        
+        %% logo
+        % logo scale factor
+        \newcommand{\scaleFactor}{0.5}
+        \input{./assets/logo} 
+        \\[1.5cm]
+        % git tag or doc version
+        { \SecondaryHeading V\JuliaVersion\ } 
+        \vfill
+        
+        { \HUGE \DocAuthors }
+        \\[0.5cm]
+        % build time
+        { \huge \today }
+    \end{center} 
+\end{titlingpage}
+\restoregeometry
+%% ---- restore geometry
diff --git a/doc/src/assets/custom.sty b/doc/src/assets/custom.sty
new file mode 100644
index 00000000000000..f257d2d3d21744
--- /dev/null
+++ b/doc/src/assets/custom.sty
@@ -0,0 +1,45 @@
+%% Load the macro package required for the cover.
+
+
+%% pkg for make cover page BEGIN ----------------------------------------------
+% Load `geometry' to modify margins later
+\usepackage{geometry}
+% "some": use \BgThispage to change background
+% ref: background@v2.1,# 2.1 Options, "pages="
+%   http://mirrors.ctan.org/macros/latex/contrib/background/background.pdf
+\usepackage[pages=some]{background}
+
+%% Color definitions for Julia
+%%  https://github.com/JuliaLang/julia-logo-graphics#color-definitions
+\definecolor{julia_blue}  {HTML}{4063D8}
+\definecolor{julia_green} {HTML}{389826}
+\definecolor{julia_purple}{HTML}{9558B2}
+\definecolor{julia_red}   {HTML}{CB3C33}
+\definecolor{splash_gary} {HTML}{1A1A33}
+
+% ---- define heading background
+% ref: background.pdf, #2.1 Options
+\backgroundsetup{
+scale=1,    % scaling factor
+angle=0,    % counterclockwise angle
+opacity=1,  % transparency
+contents={
+%% Place the background image `title-bg' in the right place via `tikz'.
+% tikz option "remember picture", "overlay"
+% ref: pgfmanual@3.1.9a, #17.13.1 Referencing a Node in a Different Picture\
+%   http://mirrors.ctan.org/graphics/pgf/base/doc/pgfmanual.pdf
+\begin{tikzpicture}[remember picture,overlay,draw=white]
+  \draw [path picture={
+    % ref: pgfmanual, 15.6, "Predefined node path picture bounding box"
+    \node at (path picture bounding box.center){
+      \input{assets/cover-splash}
+  };}] (-0.5\paperwidth,4cm) rectangle (0.5\paperwidth,11cm);
+  % Put picture to right place
+  %   ref: pgfmanual, #2.6 Rectangle Path Construction
+\end{tikzpicture}
+}}%
+
+% ---- Heading font style
+\DeclareFixedFont{\MainHeading}{T1}{phv}{b}{n}{1.5cm}
+\DeclareFixedFont{\SecondaryHeading}{T1}{phv}{b}{n}{0.8cm}
+%% cover page END -------------------------------------------------------------
diff --git a/doc/src/assets/julia-manual.css b/doc/src/assets/julia-manual.css
index f303c677497978..309398dd3ccb53 100644
--- a/doc/src/assets/julia-manual.css
+++ b/doc/src/assets/julia-manual.css
@@ -1,3 +1,7 @@
+pre, code {
+    font-variant-ligatures: no-contextual;
+}
+
 nav.toc h1 {
     display: none;
 }
diff --git a/doc/src/assets/logo-dark.svg b/doc/src/assets/logo-dark.svg
index 0c90d2f7713c20..e578fd9f9a0351 100644
--- a/doc/src/assets/logo-dark.svg
+++ b/doc/src/assets/logo-dark.svg
@@ -1 +1 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="320pt" height="200pt" version="1.1" viewBox="0 0 320 200"><g id="surface1"><path fill="#FFF" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 67.871094 164.3125 C 67.871094 171.847656 67.023438 177.933594 65.328125 182.566406 C 63.632812 187.203125 61.222656 190.800781 58.09375 193.363281 C 54.96875 195.925781 51.21875 197.640625 46.847656 198.507812 C 42.476562 199.371094 37.613281 199.804688 32.265625 199.804688 C 25.027344 199.804688 19.488281 198.675781 15.648438 196.414062 C 11.804688 194.152344 9.882812 191.441406 9.882812 188.273438 C 9.882812 185.636719 10.953125 183.414062 13.101562 181.605469 C 15.25 179.796875 18.132812 178.894531 21.75 178.894531 C 24.464844 178.894531 26.632812 179.628906 28.25 181.097656 C 29.871094 182.566406 31.210938 184.019531 32.265625 185.449219 C 33.46875 187.03125 34.488281 188.085938 35.316406 188.613281 C 36.144531 189.140625 36.898438 189.40625 37.578125 189.40625 C 39.007812 189.40625 40.101562 188.558594 40.855469 186.863281 C 41.609375 185.167969 41.984375 181.871094 41.984375 176.972656 L 41.984375 84.050781 L 67.871094 76.929688 L 67.871094 164.3125 M 104.738281 79.414062 L 104.738281 139.214844 C 104.738281 140.875 105.058594 142.4375 105.699219 143.90625 C 106.339844 145.375 107.226562 146.640625 108.355469 147.695312 C 109.488281 148.75 110.804688 149.597656 112.3125 150.238281 C 113.820312 150.878906 115.441406 151.199219 117.175781 151.199219 C 119.132812 151.199219 121.359375 150.101562 124.070312 148.203125 C 128.363281 145.195312 130.964844 143.128906 130.964844 140.683594 C 130.964844 140.097656 130.964844 79.414062 130.964844 79.414062 L 156.738281 79.414062 L 156.738281 164.3125 L 130.964844 164.3125 L 130.964844 156.398438 C 127.574219 159.261719 123.957031 161.558594 120.113281 163.292969 C 116.269531 165.027344 112.539062 165.894531 108.921875 165.894531 C 104.703125 165.894531 100.78125 165.195312 97.164062 163.800781 C 93.546875 162.40625 90.382812 160.503906 87.671875 158.09375 C 84.957031 155.683594 82.828125 152.855469 81.28125 149.613281 C 79.738281 146.375 78.964844 142.90625 78.964844 139.214844 L 78.964844 79.414062 L 104.738281 79.414062 M 192.882812 164.3125 L 167.222656 164.3125 L 167.222656 45.277344 L 192.882812 38.15625 L 192.882812 164.3125 M 203.601562 84.050781 L 229.375 76.929688 L 229.375 164.3125 L 203.601562 164.3125 L 203.601562 84.050781 M 283.226562 120.449219 C 280.738281 121.507812 278.230469 122.730469 275.707031 124.125 C 273.183594 125.519531 270.882812 127.046875 268.8125 128.703125 C 266.738281 130.359375 265.0625 132.132812 263.78125 134.015625 C 262.5 135.898438 261.859375 137.859375 261.859375 139.894531 C 261.859375 141.476562 262.066406 143.003906 262.480469 144.472656 C 262.894531 145.941406 263.480469 147.203125 264.234375 148.257812 C 264.988281 149.3125 265.816406 150.160156 266.722656 150.800781 C 267.625 151.441406 268.605469 151.761719 269.660156 151.761719 C 271.769531 151.761719 273.898438 151.121094 276.046875 149.839844 C 278.195312 148.558594 280.585938 146.941406 283.226562 144.980469 L 283.226562 120.449219 M 309.109375 164.3125 L 283.226562 164.3125 L 283.226562 157.527344 C 281.792969 158.734375 280.398438 159.847656 279.042969 160.863281 C 277.6875 161.878906 276.160156 162.765625 274.464844 163.519531 C 272.769531 164.273438 270.867188 164.855469 268.753906 165.273438 C 266.644531 165.6875 264.15625 165.894531 261.296875 165.894531 C 257.375 165.894531 253.851562 165.328125 250.726562 164.199219 C 247.597656 163.066406 244.941406 161.523438 242.757812 159.5625 C 240.570312 157.605469 238.894531 155.285156 237.726562 152.609375 C 236.558594 149.9375 235.972656 147.015625 235.972656 143.851562 C 235.972656 140.609375 236.59375 137.671875 237.839844 135.03125 C 239.082031 132.394531 240.777344 130.023438 242.925781 127.910156 C 245.074219 125.800781 247.578125 123.917969 250.441406 122.257812 C 253.304688 120.601562 256.378906 119.074219 259.65625 117.679688 C 262.933594 116.285156 266.34375 115.007812 269.886719 113.839844 C 273.425781 112.671875 276.933594 111.558594 280.398438 110.503906 L 283.226562 109.824219 L 283.226562 101.460938 C 283.226562 96.035156 282.1875 92.191406 280.117188 89.929688 C 278.042969 87.667969 275.273438 86.539062 271.808594 86.539062 C 267.738281 86.539062 264.910156 87.519531 263.328125 89.476562 C 261.746094 91.4375 260.953125 93.808594 260.953125 96.597656 C 260.953125 98.179688 260.785156 99.726562 260.445312 101.234375 C 260.109375 102.742188 259.523438 104.058594 258.695312 105.191406 C 257.867188 106.320312 256.679688 107.226562 255.132812 107.902344 C 253.589844 108.582031 251.648438 108.921875 249.3125 108.921875 C 245.695312 108.921875 242.757812 107.882812 240.496094 105.8125 C 238.234375 103.738281 237.105469 101.121094 237.105469 97.953125 C 237.105469 95.015625 238.101562 92.285156 240.097656 89.761719 C 242.097656 87.234375 244.789062 85.066406 248.183594 83.261719 C 251.574219 81.449219 255.492188 80.019531 259.9375 78.964844 C 264.382812 77.910156 269.09375 77.382812 274.066406 77.382812 C 280.171875 77.382812 285.429688 77.929688 289.839844 79.019531 C 294.246094 80.113281 297.882812 81.675781 300.746094 83.710938 C 303.609375 85.746094 305.71875 88.195312 307.074219 91.058594 C 308.433594 93.921875 309.109375 97.128906 309.109375 100.667969 L 309.109375 164.3125"/><path fill="#CB3C33" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 235.273438 55.089844 C 235.273438 64.757812 227.4375 72.589844 217.773438 72.589844 C 208.105469 72.589844 200.273438 64.757812 200.273438 55.089844 C 200.273438 45.425781 208.105469 37.589844 217.773438 37.589844 C 227.4375 37.589844 235.273438 45.425781 235.273438 55.089844"/><path fill="#4063D8" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 72.953125 55.089844 C 72.953125 64.757812 65.117188 72.589844 55.453125 72.589844 C 45.789062 72.589844 37.953125 64.757812 37.953125 55.089844 C 37.953125 45.425781 45.789062 37.589844 55.453125 37.589844 C 65.117188 37.589844 72.953125 45.425781 72.953125 55.089844"/><path fill="#9558B2" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 277.320312 55.089844 C 277.320312 64.757812 269.484375 72.589844 259.820312 72.589844 C 250.15625 72.589844 242.320312 64.757812 242.320312 55.089844 C 242.320312 45.425781 250.15625 37.589844 259.820312 37.589844 C 269.484375 37.589844 277.320312 45.425781 277.320312 55.089844"/><path fill="#389826" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 256.300781 18.671875 C 256.300781 28.335938 248.464844 36.171875 238.800781 36.171875 C 229.132812 36.171875 221.300781 28.335938 221.300781 18.671875 C 221.300781 9.007812 229.132812 1.171875 238.800781 1.171875 C 248.464844 1.171875 256.300781 9.007812 256.300781 18.671875"/></g></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="320pt" height="200pt" version="1.1" viewBox="0 0 320 200"><g id="surface1"><path fill="#FFF" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 67.871094 164.3125 C 67.871094 171.847656 67.023438 177.933594 65.328125 182.566406 C 63.632812 187.203125 61.222656 190.800781 58.09375 193.363281 C 54.96875 195.925781 51.21875 197.640625 46.847656 198.507812 C 42.476562 199.371094 37.613281 199.804688 32.265625 199.804688 C 25.027344 199.804688 19.488281 198.675781 15.648438 196.414062 C 11.804688 194.152344 9.882812 191.441406 9.882812 188.273438 C 9.882812 185.636719 10.953125 183.414062 13.101562 181.605469 C 15.25 179.796875 18.132812 178.894531 21.75 178.894531 C 24.464844 178.894531 26.632812 179.628906 28.25 181.097656 C 29.871094 182.566406 31.210938 184.019531 32.265625 185.449219 C 33.46875 187.03125 34.488281 188.085938 35.316406 188.613281 C 36.144531 189.140625 36.898438 189.40625 37.578125 189.40625 C 39.007812 189.40625 40.101562 188.558594 40.855469 186.863281 C 41.609375 185.167969 41.984375 181.871094 41.984375 176.972656 L 41.984375 84.050781 L 67.871094 76.929688 L 67.871094 164.3125 M 104.738281 79.414062 L 104.738281 139.214844 C 104.738281 140.875 105.058594 142.4375 105.699219 143.90625 C 106.339844 145.375 107.226562 146.640625 108.355469 147.695312 C 109.488281 148.75 110.804688 149.597656 112.3125 150.238281 C 113.820312 150.878906 115.441406 151.199219 117.175781 151.199219 C 119.132812 151.199219 121.359375 150.101562 124.070312 148.203125 C 128.363281 145.195312 130.964844 143.128906 130.964844 140.683594 C 130.964844 140.097656 130.964844 79.414062 130.964844 79.414062 L 156.738281 79.414062 L 156.738281 164.3125 L 130.964844 164.3125 L 130.964844 156.398438 C 127.574219 159.261719 123.957031 161.558594 120.113281 163.292969 C 116.269531 165.027344 112.539062 165.894531 108.921875 165.894531 C 104.703125 165.894531 100.78125 165.195312 97.164062 163.800781 C 93.546875 162.40625 90.382812 160.503906 87.671875 158.09375 C 84.957031 155.683594 82.828125 152.855469 81.28125 149.613281 C 79.738281 146.375 78.964844 142.90625 78.964844 139.214844 L 78.964844 79.414062 L 104.738281 79.414062 M 192.882812 164.3125 L 167.222656 164.3125 L 167.222656 45.277344 L 192.882812 38.15625 L 192.882812 164.3125 M 203.601562 84.050781 L 229.375 76.929688 L 229.375 164.3125 L 203.601562 164.3125 L 203.601562 84.050781 M 283.226562 120.449219 C 280.738281 121.507812 278.230469 122.730469 275.707031 124.125 C 273.183594 125.519531 270.882812 127.046875 268.8125 128.703125 C 266.738281 130.359375 265.0625 132.132812 263.78125 134.015625 C 262.5 135.898438 261.859375 137.859375 261.859375 139.894531 C 261.859375 141.476562 262.066406 143.003906 262.480469 144.472656 C 262.894531 145.941406 263.480469 147.203125 264.234375 148.257812 C 264.988281 149.3125 265.816406 150.160156 266.722656 150.800781 C 267.625 151.441406 268.605469 151.761719 269.660156 151.761719 C 271.769531 151.761719 273.898438 151.121094 276.046875 149.839844 C 278.195312 148.558594 280.585938 146.941406 283.226562 144.980469 L 283.226562 120.449219 M 309.109375 164.3125 L 283.226562 164.3125 L 283.226562 157.527344 C 281.792969 158.734375 280.398438 159.847656 279.042969 160.863281 C 277.6875 161.878906 276.160156 162.765625 274.464844 163.519531 C 272.769531 164.273438 270.867188 164.855469 268.753906 165.273438 C 266.644531 165.6875 264.15625 165.894531 261.296875 165.894531 C 257.375 165.894531 253.851562 165.328125 250.726562 164.199219 C 247.597656 163.066406 244.941406 161.523438 242.757812 159.5625 C 240.570312 157.605469 238.894531 155.285156 237.726562 152.609375 C 236.558594 149.9375 235.972656 147.015625 235.972656 143.851562 C 235.972656 140.609375 236.59375 137.671875 237.839844 135.03125 C 239.082031 132.394531 240.777344 130.023438 242.925781 127.910156 C 245.074219 125.800781 247.578125 123.917969 250.441406 122.257812 C 253.304688 120.601562 256.378906 119.074219 259.65625 117.679688 C 262.933594 116.285156 266.34375 115.007812 269.886719 113.839844 C 273.425781 112.671875 276.933594 111.558594 280.398438 110.503906 L 283.226562 109.824219 L 283.226562 101.460938 C 283.226562 96.035156 282.1875 92.191406 280.117188 89.929688 C 278.042969 87.667969 275.273438 86.539062 271.808594 86.539062 C 267.738281 86.539062 264.910156 87.519531 263.328125 89.476562 C 261.746094 91.4375 260.953125 93.808594 260.953125 96.597656 C 260.953125 98.179688 260.785156 99.726562 260.445312 101.234375 C 260.109375 102.742188 259.523438 104.058594 258.695312 105.191406 C 257.867188 106.320312 256.679688 107.226562 255.132812 107.902344 C 253.589844 108.582031 251.648438 108.921875 249.3125 108.921875 C 245.695312 108.921875 242.757812 107.882812 240.496094 105.8125 C 238.234375 103.738281 237.105469 101.121094 237.105469 97.953125 C 237.105469 95.015625 238.101562 92.285156 240.097656 89.761719 C 242.097656 87.234375 244.789062 85.066406 248.183594 83.261719 C 251.574219 81.449219 255.492188 80.019531 259.9375 78.964844 C 264.382812 77.910156 269.09375 77.382812 274.066406 77.382812 C 280.171875 77.382812 285.429688 77.929688 289.839844 79.019531 C 294.246094 80.113281 297.882812 81.675781 300.746094 83.710938 C 303.609375 85.746094 305.71875 88.195312 307.074219 91.058594 C 308.433594 93.921875 309.109375 97.128906 309.109375 100.667969 L 309.109375 164.3125"/><path fill="#CB3C33" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 235.273438 55.089844 C 235.273438 64.757812 227.4375 72.589844 217.773438 72.589844 C 208.105469 72.589844 200.273438 64.757812 200.273438 55.089844 C 200.273438 45.425781 208.105469 37.589844 217.773438 37.589844 C 227.4375 37.589844 235.273438 45.425781 235.273438 55.089844"/><path fill="#4063D8" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 72.953125 55.089844 C 72.953125 64.757812 65.117188 72.589844 55.453125 72.589844 C 45.789062 72.589844 37.953125 64.757812 37.953125 55.089844 C 37.953125 45.425781 45.789062 37.589844 55.453125 37.589844 C 65.117188 37.589844 72.953125 45.425781 72.953125 55.089844"/><path fill="#9558B2" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 277.320312 55.089844 C 277.320312 64.757812 269.484375 72.589844 259.820312 72.589844 C 250.15625 72.589844 242.320312 64.757812 242.320312 55.089844 C 242.320312 45.425781 250.15625 37.589844 259.820312 37.589844 C 269.484375 37.589844 277.320312 45.425781 277.320312 55.089844"/><path fill="#389826" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 256.300781 18.671875 C 256.300781 28.335938 248.464844 36.171875 238.800781 36.171875 C 229.132812 36.171875 221.300781 28.335938 221.300781 18.671875 C 221.300781 9.007812 229.132812 1.171875 238.800781 1.171875 C 248.464844 1.171875 256.300781 9.007812 256.300781 18.671875"/></g></svg>
diff --git a/doc/src/assets/logo.svg b/doc/src/assets/logo.svg
index 3e2f41fccfae00..052ba0c5942158 100644
--- a/doc/src/assets/logo.svg
+++ b/doc/src/assets/logo.svg
@@ -1 +1,9 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="320pt" height="200pt" version="1.1" viewBox="0 0 320 200"><g id="surface61"><path fill="#000" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 67.871094 164.3125 C 67.871094 171.847656 67.023438 177.933594 65.328125 182.566406 C 63.632812 187.203125 61.222656 190.800781 58.09375 193.363281 C 54.96875 195.925781 51.21875 197.640625 46.847656 198.507812 C 42.476562 199.371094 37.613281 199.804688 32.265625 199.804688 C 25.027344 199.804688 19.488281 198.675781 15.648438 196.414062 C 11.804688 194.152344 9.882812 191.441406 9.882812 188.273438 C 9.882812 185.636719 10.953125 183.414062 13.101562 181.605469 C 15.25 179.796875 18.132812 178.894531 21.75 178.894531 C 24.464844 178.894531 26.632812 179.628906 28.25 181.097656 C 29.871094 182.566406 31.210938 184.019531 32.265625 185.449219 C 33.46875 187.03125 34.488281 188.085938 35.316406 188.613281 C 36.144531 189.140625 36.898438 189.40625 37.578125 189.40625 C 39.007812 189.40625 40.101562 188.558594 40.855469 186.863281 C 41.609375 185.167969 41.984375 181.871094 41.984375 176.972656 L 41.984375 84.050781 L 67.871094 76.929688 L 67.871094 164.3125 M 104.738281 79.414062 L 104.738281 139.214844 C 104.738281 140.875 105.058594 142.4375 105.699219 143.90625 C 106.339844 145.375 107.226562 146.640625 108.355469 147.695312 C 109.488281 148.75 110.804688 149.597656 112.3125 150.238281 C 113.820312 150.878906 115.441406 151.199219 117.175781 151.199219 C 119.132812 151.199219 121.359375 150.101562 124.070312 148.203125 C 128.363281 145.195312 130.964844 143.128906 130.964844 140.683594 C 130.964844 140.097656 130.964844 79.414062 130.964844 79.414062 L 156.738281 79.414062 L 156.738281 164.3125 L 130.964844 164.3125 L 130.964844 156.398438 C 127.574219 159.261719 123.957031 161.558594 120.113281 163.292969 C 116.269531 165.027344 112.539062 165.894531 108.921875 165.894531 C 104.703125 165.894531 100.78125 165.195312 97.164062 163.800781 C 93.546875 162.40625 90.382812 160.503906 87.671875 158.09375 C 84.957031 155.683594 82.828125 152.855469 81.28125 149.613281 C 79.738281 146.375 78.964844 142.90625 78.964844 139.214844 L 78.964844 79.414062 L 104.738281 79.414062 M 192.882812 164.3125 L 167.222656 164.3125 L 167.222656 45.277344 L 192.882812 38.15625 L 192.882812 164.3125 M 203.601562 84.050781 L 229.375 76.929688 L 229.375 164.3125 L 203.601562 164.3125 L 203.601562 84.050781 M 283.226562 120.449219 C 280.738281 121.507812 278.230469 122.730469 275.707031 124.125 C 273.183594 125.519531 270.882812 127.046875 268.8125 128.703125 C 266.738281 130.359375 265.0625 132.132812 263.78125 134.015625 C 262.5 135.898438 261.859375 137.859375 261.859375 139.894531 C 261.859375 141.476562 262.066406 143.003906 262.480469 144.472656 C 262.894531 145.941406 263.480469 147.203125 264.234375 148.257812 C 264.988281 149.3125 265.816406 150.160156 266.722656 150.800781 C 267.625 151.441406 268.605469 151.761719 269.660156 151.761719 C 271.769531 151.761719 273.898438 151.121094 276.046875 149.839844 C 278.195312 148.558594 280.585938 146.941406 283.226562 144.980469 L 283.226562 120.449219 M 309.109375 164.3125 L 283.226562 164.3125 L 283.226562 157.527344 C 281.792969 158.734375 280.398438 159.847656 279.042969 160.863281 C 277.6875 161.878906 276.160156 162.765625 274.464844 163.519531 C 272.769531 164.273438 270.867188 164.855469 268.753906 165.273438 C 266.644531 165.6875 264.15625 165.894531 261.296875 165.894531 C 257.375 165.894531 253.851562 165.328125 250.726562 164.199219 C 247.597656 163.066406 244.941406 161.523438 242.757812 159.5625 C 240.570312 157.605469 238.894531 155.285156 237.726562 152.609375 C 236.558594 149.9375 235.972656 147.015625 235.972656 143.851562 C 235.972656 140.609375 236.59375 137.671875 237.839844 135.03125 C 239.082031 132.394531 240.777344 130.023438 242.925781 127.910156 C 245.074219 125.800781 247.578125 123.917969 250.441406 122.257812 C 253.304688 120.601562 256.378906 119.074219 259.65625 117.679688 C 262.933594 116.285156 266.34375 115.007812 269.886719 113.839844 C 273.425781 112.671875 276.933594 111.558594 280.398438 110.503906 L 283.226562 109.824219 L 283.226562 101.460938 C 283.226562 96.035156 282.1875 92.191406 280.117188 89.929688 C 278.042969 87.667969 275.273438 86.539062 271.808594 86.539062 C 267.738281 86.539062 264.910156 87.519531 263.328125 89.476562 C 261.746094 91.4375 260.953125 93.808594 260.953125 96.597656 C 260.953125 98.179688 260.785156 99.726562 260.445312 101.234375 C 260.109375 102.742188 259.523438 104.058594 258.695312 105.191406 C 257.867188 106.320312 256.679688 107.226562 255.132812 107.902344 C 253.589844 108.582031 251.648438 108.921875 249.3125 108.921875 C 245.695312 108.921875 242.757812 107.882812 240.496094 105.8125 C 238.234375 103.738281 237.105469 101.121094 237.105469 97.953125 C 237.105469 95.015625 238.101562 92.285156 240.097656 89.761719 C 242.097656 87.234375 244.789062 85.066406 248.183594 83.261719 C 251.574219 81.449219 255.492188 80.019531 259.9375 78.964844 C 264.382812 77.910156 269.09375 77.382812 274.066406 77.382812 C 280.171875 77.382812 285.429688 77.929688 289.839844 79.019531 C 294.246094 80.113281 297.882812 81.675781 300.746094 83.710938 C 303.609375 85.746094 305.71875 88.195312 307.074219 91.058594 C 308.433594 93.921875 309.109375 97.128906 309.109375 100.667969 L 309.109375 164.3125"/><path fill="#CB3C33" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 235.273438 55.089844 C 235.273438 64.757812 227.4375 72.589844 217.773438 72.589844 C 208.105469 72.589844 200.273438 64.757812 200.273438 55.089844 C 200.273438 45.425781 208.105469 37.589844 217.773438 37.589844 C 227.4375 37.589844 235.273438 45.425781 235.273438 55.089844"/><path fill="#4063D8" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 72.953125 55.089844 C 72.953125 64.757812 65.117188 72.589844 55.453125 72.589844 C 45.789062 72.589844 37.953125 64.757812 37.953125 55.089844 C 37.953125 45.425781 45.789062 37.589844 55.453125 37.589844 C 65.117188 37.589844 72.953125 45.425781 72.953125 55.089844"/><path fill="#9558B2" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 277.320312 55.089844 C 277.320312 64.757812 269.484375 72.589844 259.820312 72.589844 C 250.15625 72.589844 242.320312 64.757812 242.320312 55.089844 C 242.320312 45.425781 250.15625 37.589844 259.820312 37.589844 C 269.484375 37.589844 277.320312 45.425781 277.320312 55.089844"/><path fill="#389826" fill-opacity="1" fill-rule="nonzero" stroke="none" d="M 256.300781 18.671875 C 256.300781 28.335938 248.464844 36.171875 238.800781 36.171875 C 229.132812 36.171875 221.300781 28.335938 221.300781 18.671875 C 221.300781 9.007812 229.132812 1.171875 238.800781 1.171875 C 248.464844 1.171875 256.300781 9.007812 256.300781 18.671875"/></g></svg>
\ No newline at end of file
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="320pt" height="200pt" version="1.1" viewBox="0 0 320 200">
+<g id="surface61">
+<path fill="#000" fill-opacity="1" fill-rule="nonzero"  stroke-width="0.5" stroke="#eee" d="M 67.871094 164.3125 C 67.871094 171.847656 67.023438 177.933594 65.328125 182.566406 C 63.632812 187.203125 61.222656 190.800781 58.09375 193.363281 C 54.96875 195.925781 51.21875 197.640625 46.847656 198.507812 C 42.476562 199.371094 37.613281 199.804688 32.265625 199.804688 C 25.027344 199.804688 19.488281 198.675781 15.648438 196.414062 C 11.804688 194.152344 9.882812 191.441406 9.882812 188.273438 C 9.882812 185.636719 10.953125 183.414062 13.101562 181.605469 C 15.25 179.796875 18.132812 178.894531 21.75 178.894531 C 24.464844 178.894531 26.632812 179.628906 28.25 181.097656 C 29.871094 182.566406 31.210938 184.019531 32.265625 185.449219 C 33.46875 187.03125 34.488281 188.085938 35.316406 188.613281 C 36.144531 189.140625 36.898438 189.40625 37.578125 189.40625 C 39.007812 189.40625 40.101562 188.558594 40.855469 186.863281 C 41.609375 185.167969 41.984375 181.871094 41.984375 176.972656 L 41.984375 84.050781 L 67.871094 76.929688 L 67.871094 164.3125 M 104.738281 79.414062 L 104.738281 139.214844 C 104.738281 140.875 105.058594 142.4375 105.699219 143.90625 C 106.339844 145.375 107.226562 146.640625 108.355469 147.695312 C 109.488281 148.75 110.804688 149.597656 112.3125 150.238281 C 113.820312 150.878906 115.441406 151.199219 117.175781 151.199219 C 119.132812 151.199219 121.359375 150.101562 124.070312 148.203125 C 128.363281 145.195312 130.964844 143.128906 130.964844 140.683594 C 130.964844 140.097656 130.964844 79.414062 130.964844 79.414062 L 156.738281 79.414062 L 156.738281 164.3125 L 130.964844 164.3125 L 130.964844 156.398438 C 127.574219 159.261719 123.957031 161.558594 120.113281 163.292969 C 116.269531 165.027344 112.539062 165.894531 108.921875 165.894531 C 104.703125 165.894531 100.78125 165.195312 97.164062 163.800781 C 93.546875 162.40625 90.382812 160.503906 87.671875 158.09375 C 84.957031 155.683594 82.828125 152.855469 81.28125 149.613281 C 79.738281 146.375 78.964844 142.90625 78.964844 139.214844 L 78.964844 79.414062 L 104.738281 79.414062 M 192.882812 164.3125 L 167.222656 164.3125 L 167.222656 45.277344 L 192.882812 38.15625 L 192.882812 164.3125 M 203.601562 84.050781 L 229.375 76.929688 L 229.375 164.3125 L 203.601562 164.3125 L 203.601562 84.050781 M 283.226562 120.449219 C 280.738281 121.507812 278.230469 122.730469 275.707031 124.125 C 273.183594 125.519531 270.882812 127.046875 268.8125 128.703125 C 266.738281 130.359375 265.0625 132.132812 263.78125 134.015625 C 262.5 135.898438 261.859375 137.859375 261.859375 139.894531 C 261.859375 141.476562 262.066406 143.003906 262.480469 144.472656 C 262.894531 145.941406 263.480469 147.203125 264.234375 148.257812 C 264.988281 149.3125 265.816406 150.160156 266.722656 150.800781 C 267.625 151.441406 268.605469 151.761719 269.660156 151.761719 C 271.769531 151.761719 273.898438 151.121094 276.046875 149.839844 C 278.195312 148.558594 280.585938 146.941406 283.226562 144.980469 L 283.226562 120.449219 M 309.109375 164.3125 L 283.226562 164.3125 L 283.226562 157.527344 C 281.792969 158.734375 280.398438 159.847656 279.042969 160.863281 C 277.6875 161.878906 276.160156 162.765625 274.464844 163.519531 C 272.769531 164.273438 270.867188 164.855469 268.753906 165.273438 C 266.644531 165.6875 264.15625 165.894531 261.296875 165.894531 C 257.375 165.894531 253.851562 165.328125 250.726562 164.199219 C 247.597656 163.066406 244.941406 161.523438 242.757812 159.5625 C 240.570312 157.605469 238.894531 155.285156 237.726562 152.609375 C 236.558594 149.9375 235.972656 147.015625 235.972656 143.851562 C 235.972656 140.609375 236.59375 137.671875 237.839844 135.03125 C 239.082031 132.394531 240.777344 130.023438 242.925781 127.910156 C 245.074219 125.800781 247.578125 123.917969 250.441406 122.257812 C 253.304688 120.601562 256.378906 119.074219 259.65625 117.679688 C 262.933594 116.285156 266.34375 115.007812 269.886719 113.839844 C 273.425781 112.671875 276.933594 111.558594 280.398438 110.503906 L 283.226562 109.824219 L 283.226562 101.460938 C 283.226562 96.035156 282.1875 92.191406 280.117188 89.929688 C 278.042969 87.667969 275.273438 86.539062 271.808594 86.539062 C 267.738281 86.539062 264.910156 87.519531 263.328125 89.476562 C 261.746094 91.4375 260.953125 93.808594 260.953125 96.597656 C 260.953125 98.179688 260.785156 99.726562 260.445312 101.234375 C 260.109375 102.742188 259.523438 104.058594 258.695312 105.191406 C 257.867188 106.320312 256.679688 107.226562 255.132812 107.902344 C 253.589844 108.582031 251.648438 108.921875 249.3125 108.921875 C 245.695312 108.921875 242.757812 107.882812 240.496094 105.8125 C 238.234375 103.738281 237.105469 101.121094 237.105469 97.953125 C 237.105469 95.015625 238.101562 92.285156 240.097656 89.761719 C 242.097656 87.234375 244.789062 85.066406 248.183594 83.261719 C 251.574219 81.449219 255.492188 80.019531 259.9375 78.964844 C 264.382812 77.910156 269.09375 77.382812 274.066406 77.382812 C 280.171875 77.382812 285.429688 77.929688 289.839844 79.019531 C 294.246094 80.113281 297.882812 81.675781 300.746094 83.710938 C 303.609375 85.746094 305.71875 88.195312 307.074219 91.058594 C 308.433594 93.921875 309.109375 97.128906 309.109375 100.667969 L 309.109375 164.3125"/>
+<path fill="#CB3C33" fill-opacity="1" fill-rule="nonzero" stroke-width="0.5" stroke="#eee" d="M 235.273438 55.089844 C 235.273438 64.757812 227.4375 72.589844 217.773438 72.589844 C 208.105469 72.589844 200.273438 64.757812 200.273438 55.089844 C 200.273438 45.425781 208.105469 37.589844 217.773438 37.589844 C 227.4375 37.589844 235.273438 45.425781 235.273438 55.089844"/>
+<path fill="#4063D8" fill-opacity="1" fill-rule="nonzero" stroke-width="0.5" stroke="#eee" d="M 72.953125 55.089844 C 72.953125 64.757812 65.117188 72.589844 55.453125 72.589844 C 45.789062 72.589844 37.953125 64.757812 37.953125 55.089844 C 37.953125 45.425781 45.789062 37.589844 55.453125 37.589844 C 65.117188 37.589844 72.953125 45.425781 72.953125 55.089844"/>
+<path fill="#9558B2" fill-opacity="1" fill-rule="nonzero" stroke-width="0.5" stroke="#eee" d="M 277.320312 55.089844 C 277.320312 64.757812 269.484375 72.589844 259.820312 72.589844 C 250.15625 72.589844 242.320312 64.757812 242.320312 55.089844 C 242.320312 45.425781 250.15625 37.589844 259.820312 37.589844 C 269.484375 37.589844 277.320312 45.425781 277.320312 55.089844"/>
+<path fill="#389826" fill-opacity="1" fill-rule="nonzero" stroke-width="0.5" stroke="#eee" d="M 256.300781 18.671875 C 256.300781 28.335938 248.464844 36.171875 238.800781 36.171875 C 229.132812 36.171875 221.300781 28.335938 221.300781 18.671875 C 221.300781 9.007812 229.132812 1.171875 238.800781 1.171875 C 248.464844 1.171875 256.300781 9.007812 256.300781 18.671875"/>
+</g>
+</svg>
diff --git a/doc/src/assets/logo.tex b/doc/src/assets/logo.tex
new file mode 100644
index 00000000000000..a19022140d17fe
--- /dev/null
+++ b/doc/src/assets/logo.tex
@@ -0,0 +1,142 @@
+
+%% Direct translation of the Julia logo definition code in Luxor.jl
+%%  https://github.com/JuliaGraphics/Luxor.jl/blob/master/src/juliagraphics.jl#L62
+\begin{tikzpicture}[x=1,y=1,yscale=-\scaleFactor,xscale=\scaleFactor]
+% Blue circle in "j"
+\path[fill=julia_blue] (77.953125, 68.08984375) ..
+  controls (77.953125,  77.7578125)  and (70.1171875, 85.58984375) .. (60.453125, 85.58984375) ..
+  controls (50.7890625, 85.58984375) and (42.953125,  77.7578125)  .. (42.953125, 68.08984375) ..
+  controls (42.953125,  58.42578125) and (50.7890625, 50.58984375) .. (60.453125, 50.58984375) ..
+  controls (70.1171875, 50.58984375) and (77.953125,  58.42578125) .. (77.953125, 68.08984375);
+
+% Letter "j"
+\path[fill=black] (72.87109375, 177.3125) ..
+  controls (72.87109375, 184.84765625) and (72.0234375, 190.93359375) .. (70.328125, 195.56640625) ..
+  controls (68.6328125, 200.203125) and (66.22265625, 203.80078125) .. (63.09375, 206.36328125) ..
+  controls (59.96875, 208.92578125) and (56.21875, 210.640625) .. (51.84765625, 211.5078125) ..
+  controls (47.4765625, 212.37109375) and (42.61328125, 212.8046875) .. (37.265625, 212.8046875) ..
+  controls (30.02734375, 212.8046875) and (24.48828125, 211.67578125) .. (20.6484375, 209.4140625) ..
+  controls (16.8046875, 207.15234375) and (14.8828125, 204.44140625) .. (14.8828125, 201.2734375) ..
+  controls (14.8828125, 198.63671875) and (15.953125, 196.4140625) .. (18.1015625, 194.60546875) ..
+  controls (20.25, 192.796875) and (23.1328125, 191.89453125) .. (26.75, 191.89453125) ..
+  controls (29.46484375, 191.89453125) and (31.6328125, 192.62890625) .. (33.25, 194.09765625) ..
+  controls (34.87109375, 195.56640625) and (36.2109375, 197.01953125) .. (37.265625, 198.44921875) ..
+  controls (38.46875, 200.03125) and (39.48828125, 201.0859375) .. (40.31640625, 201.61328125) ..
+  controls (41.14453125, 202.140625) and (41.8984375, 202.40625) .. (42.578125, 202.40625) ..
+  controls (44.0078125, 202.40625) and (45.1015625, 201.55859375) .. (45.85546875, 199.86328125) ..
+  controls (46.609375, 198.16796875) and (46.984375, 194.87109375) .. (46.984375, 189.97265625) --
+  (46.984375, 97.05078125) --
+  (72.87109375, 89.9296875) --
+  cycle;
+
+% Letter "u"
+\path[fill=black] (109.73828125, 92.4140625) --
+  (109.73828125, 152.21484375) ..
+  controls (109.73828125, 153.875) and (110.05859375, 155.4375) .. (110.69921875, 156.90625) ..
+  controls (111.33984375, 158.375) and (112.2265625, 159.640625) .. (113.35546875, 160.6953125) ..
+  controls (114.48828125, 161.75) and (115.8046875, 162.59765625) .. (117.3125, 163.23828125) ..
+  controls (118.8203125, 163.87890625) and (120.44140625, 164.19921875) .. (122.17578125, 164.19921875) ..
+  controls (124.1328125, 164.19921875) and (126.359375, 163.1015625) .. (129.0703125, 161.203125) ..
+  controls (133.36328125, 158.1953125) and (135.96484375, 156.12890625) .. (135.96484375, 153.68359375) ..
+  controls (135.96484375, 153.09765625) and (135.96484375, 92.4140625) .. (135.96484375, 92.4140625) --
+  (161.73828125, 92.4140625) --
+  (161.73828125, 177.3125) --
+  (135.96484375, 177.3125) --
+  (135.96484375, 169.3984375) ..
+  controls (132.57421875, 172.26171875) and (128.95703125, 174.55859375) .. (125.11328125, 176.29296875) ..
+  controls (121.26953125, 178.02734375) and (117.5390625, 178.89453125) .. (113.921875, 178.89453125) ..
+  controls (109.703125, 178.89453125) and (105.78125, 178.1953125) .. (102.1640625, 176.80078125) ..
+  controls (98.546875, 175.40625) and (95.3828125, 173.50390625) .. (92.671875, 171.09375) ..
+  controls (89.95703125, 168.68359375) and (87.828125, 165.85546875) .. (86.28125, 162.61328125) ..
+  controls (84.73828125, 159.375) and (83.96484375, 155.90625) .. (83.96484375, 152.21484375) --
+  (83.96484375, 92.4140625) --
+  cycle;
+
+% Letter "l"
+\path[fill=black] (197.8828125, 177.3125) --
+  (172.22265625, 177.3125) --
+  (172.22265625, 58.27734375) --
+  (197.8828125, 51.15625) --
+  cycle;
+
+%% "i" + 3 circles
+% Green circle
+\path[fill=julia_green] (261.30078125, 31.671875) ..
+  controls (261.30078125, 41.3359375) and (253.46484375, 49.171875)  .. (243.80078125, 49.171875) ..
+  controls (234.1328125,  49.171875)  and (226.30078125, 41.3359375) .. (226.30078125, 31.671875) ..
+  controls (226.30078125, 22.0078125) and (234.1328125,  14.171875)  .. (243.80078125, 14.171875) ..
+  controls (253.46484375, 14.171875)  and (261.30078125, 22.0078125) .. (261.30078125, 31.671875);
+
+% Purple circle
+\path[fill=julia_purple] (282.3203125, 68.08984375) ..
+  controls (282.3203125, 77.7578125)  and (274.484375,  85.58984375) .. (264.8203125, 85.58984375) ..
+  controls (255.15625,   85.58984375) and (247.3203125, 77.7578125)  .. (247.3203125, 68.08984375) ..
+  controls (247.3203125, 58.42578125) and (255.15625,   50.58984375) .. (264.8203125, 50.58984375) ..
+  controls (274.484375,  50.58984375) and (282.3203125, 58.42578125) .. (282.3203125, 68.08984375);
+
+% Red circle in "i"
+\path[fill=julia_red] (240.2734375, 68.08984375) ..
+  controls (240.2734375,  77.7578125)  and (232.4375,     85.58984375) .. (222.7734375, 85.58984375) ..
+  controls (213.10546875, 85.58984375) and (205.2734375,  77.7578125)  .. (205.2734375, 68.08984375) ..
+  controls (205.2734375,  58.42578125) and (213.10546875, 50.58984375) .. (222.7734375, 50.58984375) ..
+  controls (232.4375,     50.58984375) and (240.2734375,  58.42578125) .. (240.2734375, 68.08984375);
+
+% Letter "i"
+\path[fill=black] (208.6015625, 97.05078125) --
+  (234.375, 89.9296875) --
+  (234.375, 177.3125) --
+  (208.6015625, 177.3125) --
+  cycle;
+
+% Letter "a"
+\path[fill=black,nonzero rule] (288.2265625, 133.44921875) ..
+  controls (285.73828125, 134.5078125) and (283.23046875, 135.73046875) .. (280.70703125, 137.125) ..
+  controls (278.18359375, 138.51953125) and (275.8828125, 140.046875) .. (273.8125, 141.703125) ..
+  controls (271.73828125, 143.359375) and (270.0625, 145.1328125) .. (268.78125, 147.015625) ..
+  controls (267.5, 148.8984375) and (266.859375, 150.859375) .. (266.859375, 152.89453125) ..
+  controls (266.859375, 154.4765625) and (267.06640625, 156.00390625) .. (267.48046875, 157.47265625) ..
+  controls (267.89453125, 158.94140625) and (268.48046875, 160.203125) .. (269.234375, 161.2578125) ..
+  controls (269.98828125, 162.3125) and (270.81640625, 163.16015625) .. (271.72265625, 163.80078125) ..
+  controls (272.625, 164.44140625) and (273.60546875, 164.76171875) .. (274.66015625, 164.76171875) ..
+  controls (276.76953125, 164.76171875) and (278.8984375, 164.12109375) .. (281.046875, 162.83984375) ..
+  controls (283.1953125, 161.55859375) and (285.5859375, 159.94140625) .. (288.2265625, 157.98046875) --
+  cycle
+  (314.109375, 177.3125) --
+  (288.2265625, 177.3125) --
+  (288.2265625, 170.52734375) ..
+  controls (286.79296875, 171.734375) and (285.3984375, 172.84765625) .. (284.04296875, 173.86328125) ..
+  controls (282.6875, 174.87890625) and (281.16015625, 175.765625) .. (279.46484375, 176.51953125) ..
+  controls (277.76953125, 177.2734375) and (275.8671875, 177.85546875) .. (273.75390625, 178.2734375) ..
+  controls (271.64453125, 178.6875) and (269.15625, 178.89453125) .. (266.296875, 178.89453125) ..
+  controls (262.375, 178.89453125) and (258.8515625, 178.328125) .. (255.7265625, 177.19921875) ..
+  controls (252.59765625, 176.06640625) and (249.94140625, 174.5234375) .. (247.7578125, 172.5625) ..
+  controls (245.5703125, 170.60546875) and (243.89453125, 168.28515625) .. (242.7265625, 165.609375) ..
+  controls (241.55859375, 162.9375) and (240.97265625, 160.015625) .. (240.97265625, 156.8515625) ..
+  controls (240.97265625, 153.609375) and (241.59375, 150.671875) .. (242.83984375, 148.03125) ..
+  controls (244.08203125, 145.39453125) and (245.77734375, 143.0234375) .. (247.92578125, 140.91015625) ..
+  controls (250.07421875, 138.80078125) and (252.578125, 136.91796875) .. (255.44140625, 135.2578125) ..
+  controls (258.3046875, 133.6015625) and (261.37890625, 132.07421875) .. (264.65625, 130.6796875) ..
+  controls (267.93359375, 129.28515625) and (271.34375, 128.0078125) .. (274.88671875, 126.83984375) ..
+  controls (278.42578125, 125.671875) and (281.93359375, 124.55859375) .. (285.3984375, 123.50390625) --
+  (288.2265625, 122.82421875) --
+  (288.2265625, 114.4609375) ..
+  controls (288.2265625, 109.03515625) and (287.1875, 105.19140625) .. (285.1171875, 102.9296875) ..
+  controls (283.04296875, 100.66796875) and (280.2734375, 99.5390625) .. (276.80859375, 99.5390625) ..
+  controls (272.73828125, 99.5390625) and (269.91015625, 100.51953125) .. (268.328125, 102.4765625) ..
+  controls (266.74609375, 104.4375) and (265.953125, 106.80859375) .. (265.953125, 109.59765625) ..
+  controls (265.953125, 111.1796875) and (265.78515625, 112.7265625) .. (265.4453125, 114.234375) ..
+  controls (265.109375, 115.7421875) and (264.5234375, 117.05859375) .. (263.6953125, 118.19140625) ..
+  controls (262.8671875, 119.3203125) and (261.6796875, 120.2265625) .. (260.1328125, 120.90234375) ..
+  controls (258.58984375, 121.58203125) and (256.6484375, 121.921875) .. (254.3125, 121.921875) ..
+  controls (250.6953125, 121.921875) and (247.7578125, 120.8828125) .. (245.49609375, 118.8125) ..
+  controls (243.234375, 116.73828125) and (242.10546875, 114.12109375) .. (242.10546875, 110.953125) ..
+  controls (242.10546875, 108.015625) and (243.1015625, 105.28515625) .. (245.09765625, 102.76171875) ..
+  controls (247.09765625, 100.234375) and (249.7890625, 98.06640625) .. (253.18359375, 96.26171875) ..
+  controls (256.57421875, 94.44921875) and (260.4921875, 93.01953125) .. (264.9375, 91.96484375) ..
+  controls (269.3828125, 90.91015625) and (274.09375, 90.3828125) .. (279.06640625, 90.3828125) ..
+  controls (285.171875, 90.3828125) and (290.4296875, 90.9296875) .. (294.83984375, 92.01953125) ..
+  controls (299.24609375, 93.11328125) and (302.8828125, 94.67578125) .. (305.74609375, 96.7109375) ..
+  controls (308.609375, 98.74609375) and (310.71875, 101.1953125) .. (312.07421875, 104.05859375) ..
+  controls (313.43359375, 106.921875) and (314.109375, 110.12890625) .. (314.109375, 113.66796875) --
+  cycle;
+\end{tikzpicture}
diff --git a/doc/src/assets/preamble.tex b/doc/src/assets/preamble.tex
new file mode 100644
index 00000000000000..fe26add788c5b3
--- /dev/null
+++ b/doc/src/assets/preamble.tex
@@ -0,0 +1,48 @@
+%% Copied from the default preamble of `Documenter.jl`.
+%%
+%%  With patch:
+%%  - inserting a custom cover
+
+
+%% Default preamble BEGIN
+\documentclass[oneside, a4paper]{memoir}
+
+\usepackage{./documenter}
+\usepackage{./custom}
+
+
+%% TOC settings
+% -- TOC depth
+%   value: [part, chapter, section, subsection,
+%           subsubsection, paragraph, subparagraph]
+\settocdepth{section}  % show "part+chapter+section" in TOC
+% -- TOC spacing
+%   ref: https://tex.stackexchange.com/questions/60317/toc-spacing-in-memoir
+%   doc: memoir/memman.pdf
+%       - Figure 9.2: Layout of a ToC
+%       - Table 9.3: Value of K in macros for styling entries
+\makeatletter
+% {part} to {chaper}
+\setlength{\cftbeforepartskip}{1.5em \@plus \p@}
+% {chaper} to {chaper}
+\setlength{\cftbeforechapterskip}{0.0em \@plus \p@}
+% Chapter num to chapter title spacing (Figure 9.2@memman)
+\setlength{\cftchapternumwidth}{2.5em \@plus \p@}
+% indent before section number
+\setlength{\cftsectionindent}{2.5em \@plus \p@}
+% Section num to section title spacing (Figure 9.2@memman)
+\setlength{\cftsectionnumwidth}{4.0em \@plus \p@}
+\makeatother
+
+%% Main document begin
+\begin{document}
+
+\frontmatter
+%% ---- Custom cover page
+% \maketitle
+\input{assets/cover.tex}  % insert cover page
+%% ---- Custom cover page
+\cleardoublepage % makes the next page a odd-numbered page
+\tableofcontents
+\mainmatter
+%% preamble END
diff --git a/doc/src/base/arrays.md b/doc/src/base/arrays.md
index 50c2c09caae9c3..1dc2d8ed926afd 100644
--- a/doc/src/base/arrays.md
+++ b/doc/src/base/arrays.md
@@ -40,6 +40,7 @@ Base.trues
 Base.falses
 Base.fill
 Base.fill!
+Base.empty
 Base.similar
 ```
 
@@ -51,6 +52,7 @@ Base.size
 Base.axes(::Any)
 Base.axes(::AbstractArray, ::Any)
 Base.length(::AbstractArray)
+Base.keys(::AbstractArray)
 Base.eachindex
 Base.IndexStyle
 Base.IndexLinear
@@ -91,6 +93,7 @@ Base.Broadcast.result_style
 Base.getindex(::AbstractArray, ::Any...)
 Base.setindex!(::AbstractArray, ::Any, ::Any...)
 Base.copyto!(::AbstractArray, ::CartesianIndices, ::AbstractArray, ::CartesianIndices)
+Base.copy!
 Base.isassigned
 Base.Colon
 Base.CartesianIndex
@@ -100,6 +103,7 @@ Base.LinearIndices
 Base.to_indices
 Base.checkbounds
 Base.checkindex
+Base.elsize
 ```
 
 ## Views (SubArrays and other view types)
@@ -127,6 +131,7 @@ Base.reinterpret
 Base.reshape
 Base.dropdims
 Base.vec
+Base.SubArray
 ```
 
 ## Concatenation and permutation
@@ -136,6 +141,7 @@ Base.cat
 Base.vcat
 Base.hcat
 Base.hvcat
+Base.hvncat
 Base.vect
 Base.circshift
 Base.circshift!
diff --git a/doc/src/base/base.md b/doc/src/base/base.md
index 180c49f1315bf5..93d05470987067 100644
--- a/doc/src/base/base.md
+++ b/doc/src/base/base.md
@@ -15,6 +15,11 @@ Some general notes:
   * By convention, function names ending with an exclamation point (`!`) modify their arguments.
     Some functions have both modifying (e.g., `sort!`) and non-modifying (`sort`) versions.
 
+The behaviors of `Base` and standard libraries are stable as defined in
+[SemVer](https://semver.org/) only if they are documented; i.e., included in the
+[Julia documentation](https://docs.julialang.org/) and not marked as unstable.
+See [API FAQ](@ref man-api) for more information.
+
 ## Getting Around
 
 ```@docs
@@ -33,9 +38,11 @@ Base.which(::Any, ::Any)
 Base.methods
 Base.@show
 ans
+Base.active_project
+Base.set_active_project
 ```
 
-## Keywords
+## [Keywords](@id Keywords)
 
 This is the list of reserved keywords in Julia:
 `baremodule`, `begin`, `break`, `catch`, `const`, `continue`, `do`,
@@ -49,9 +56,11 @@ The following two-word sequences are reserved:
 However, you can create variables with names:
 `abstract`, `mutable`, `primitive` and `type`.
 
-Finally, `where` is parsed as an infix operator for writing parametric method
-and type definitions. Also `in` and `isa` are parsed as infix operators.
-Creation of a variable named `where`, `in` or `isa` is allowed though.
+Finally:
+`where` is parsed as an infix operator for writing parametric method and type definitions;
+`in` and `isa` are parsed as infix operators;
+and `outer` is parsed as a keyword when used to modify the scope of a variable in an iteration specification of a `for` loop or `generator` expression.
+Creation of variables named `where`, `in`, `isa` or `outer` is allowed though.
 
 ```@docs
 module
@@ -155,6 +164,7 @@ Base.typejoin
 Base.typeintersect
 Base.promote_type
 Base.promote_rule
+Base.promote_typejoin
 Base.isdispatchtuple
 ```
 
@@ -170,7 +180,12 @@ Base.isstructtype
 Base.nameof(::DataType)
 Base.fieldnames
 Base.fieldname
+Core.fieldtype
+Base.fieldtypes
+Base.fieldcount
 Base.hasfield
+Core.nfields
+Base.isconst
 ```
 
 ### Memory layout
@@ -180,9 +195,6 @@ Base.sizeof(::Type)
 Base.isconcretetype
 Base.isbits
 Base.isbitstype
-Core.fieldtype
-Base.fieldtypes
-Base.fieldcount
 Base.fieldoffset
 Base.datatype_alignment
 Base.datatype_haspadding
@@ -210,14 +222,17 @@ Core.Union
 Union{}
 Core.UnionAll
 Core.Tuple
+Core.NTuple
 Core.NamedTuple
 Base.@NamedTuple
 Base.Val
 Core.Vararg
 Core.Nothing
 Base.isnothing
+Base.notnothing
 Base.Some
 Base.something
+Base.@something
 Base.Enums.Enum
 Base.Enums.@enum
 Core.Expr
@@ -232,11 +247,18 @@ Core.Module
 Core.Function
 Base.hasmethod
 Core.applicable
+Base.isambiguous
 Core.invoke
+Base.@invoke
 Base.invokelatest
+Base.@invokelatest
 new
 Base.:(|>)
 Base.:(∘)
+Base.ComposedFunction
+Base.splat
+Base.Fix1
+Base.Fix2
 ```
 
 ## Syntax
@@ -263,6 +285,7 @@ Base.@simd
 Base.@polly
 Base.@generated
 Base.@pure
+Base.@assume_effects
 Base.@deprecate
 ```
 
@@ -271,6 +294,7 @@ Base.@deprecate
 Base.Missing
 Base.missing
 Base.coalesce
+Base.@coalesce
 Base.ismissing
 Base.skipmissing
 Base.nonmissingtype
@@ -291,7 +315,9 @@ Base.ignorestatus
 Base.detach
 Base.Cmd
 Base.setenv
+Base.addenv
 Base.withenv
+Base.setcpuaffinity
 Base.pipeline(::Any, ::Any, ::Any, ::Any...)
 Base.pipeline(::Base.AbstractCmd)
 Base.Libc.gethostname
@@ -299,6 +325,7 @@ Base.Libc.getpid
 Base.Libc.time()
 Base.time_ns
 Base.@time
+Base.@showtime
 Base.@timev
 Base.@timed
 Base.@elapsed
@@ -335,7 +362,7 @@ Core.throw
 Base.rethrow
 Base.backtrace
 Base.catch_backtrace
-Base.catch_stack
+Base.current_exceptions
 Base.@assert
 Base.Experimental.register_error_hint
 Base.Experimental.show_error_hints
@@ -385,15 +412,20 @@ Base.AsyncCondition(::Function)
 Base.nameof(::Module)
 Base.parentmodule
 Base.pathof(::Module)
+Base.pkgdir(::Module)
 Base.moduleroot
+__module__
+__source__
 Base.@__MODULE__
+Base.@__FILE__
+Base.@__DIR__
+Base.@__LINE__
 Base.fullname
 Base.names
-Core.nfields
-Base.isconst
 Base.nameof(::Function)
 Base.functionloc(::Any, ::Any)
 Base.functionloc(::Method)
+Base.@locals
 ```
 
 ## Internals
@@ -403,6 +435,7 @@ Base.GC.gc
 Base.GC.enable
 Base.GC.@preserve
 Base.GC.safepoint
+Base.GC.enable_logging
 Meta.lower
 Meta.@lower
 Meta.parse(::AbstractString, ::Int)
@@ -415,11 +448,16 @@ Base.@macroexpand1
 Base.code_lowered
 Base.code_typed
 Base.precompile
+Base.jit_total_bytes
 ```
 
 ## Meta
 ```@docs
 Meta.quot
 Meta.isexpr
+Meta.isidentifier
+Meta.isoperator
+Meta.isunaryoperator
+Meta.isbinaryoperator
 Meta.show_sexpr
 ```
diff --git a/doc/src/base/collections.md b/doc/src/base/collections.md
index 383dbcda4f93ef..d096bf08e13ad4 100644
--- a/doc/src/base/collections.md
+++ b/doc/src/base/collections.md
@@ -66,6 +66,7 @@ Base.LinRange
 Base.isempty
 Base.empty!
 Base.length
+Base.checked_length
 ```
 
 Fully implemented by:
@@ -93,6 +94,7 @@ Base.indexin
 Base.unique
 Base.unique!
 Base.allunique
+Base.allequal
 Base.reduce(::Any, ::Any)
 Base.foldl(::Any, ::Any)
 Base.foldr(::Any, ::Any)
@@ -101,6 +103,7 @@ Base.maximum!
 Base.minimum
 Base.minimum!
 Base.extrema
+Base.extrema!
 Base.argmax
 Base.argmin
 Base.findmax
@@ -118,8 +121,6 @@ Base.all(::Any)
 Base.all(::AbstractArray, ::Any)
 Base.all!
 Base.count
-Base.any(::Any, ::Any)
-Base.all(::Any, ::Any)
 Base.foreach
 Base.map
 Base.map!
@@ -138,6 +139,8 @@ Base.filter!
 Base.replace(::Any, ::Pair...)
 Base.replace(::Base.Callable, ::Any)
 Base.replace!
+Base.rest
+Base.split_rest
 ```
 
 ## Indexable Collections
@@ -199,10 +202,8 @@ Base.IdDict
 Base.WeakKeyDict
 Base.ImmutableDict
 Base.haskey
-Base.get(::Any, ::Any, ::Any)
 Base.get
-Base.get!(::Any, ::Any, ::Any)
-Base.get!(::Function, ::Any, ::Any)
+Base.get!
 Base.getkey
 Base.delete!
 Base.pop!(::Any, ::Any, ::Any)
@@ -274,6 +275,7 @@ Base.pushfirst!
 Base.popfirst!
 Base.insert!
 Base.deleteat!
+Base.keepat!
 Base.splice!
 Base.resize!
 Base.append!
diff --git a/doc/src/base/file.md b/doc/src/base/file.md
index da44a64d4cf147..86a1f2bab5dcd8 100644
--- a/doc/src/base/file.md
+++ b/doc/src/base/file.md
@@ -8,12 +8,14 @@ Base.Filesystem.readdir
 Base.Filesystem.walkdir
 Base.Filesystem.mkdir
 Base.Filesystem.mkpath
+Base.Filesystem.hardlink
 Base.Filesystem.symlink
 Base.Filesystem.readlink
 Base.Filesystem.chmod
 Base.Filesystem.chown
 Base.RawFD
 Base.stat
+Base.Filesystem.diskstat
 Base.Filesystem.lstat
 Base.Filesystem.ctime
 Base.Filesystem.mtime
@@ -48,9 +50,6 @@ Base.Filesystem.issticky
 Base.Filesystem.homedir
 Base.Filesystem.dirname
 Base.Filesystem.basename
-Base.@__FILE__
-Base.@__DIR__
-Base.@__LINE__
 Base.Filesystem.isabspath
 Base.Filesystem.isdirpath
 Base.Filesystem.joinpath
diff --git a/doc/src/base/io-network.md b/doc/src/base/io-network.md
index b798a708f22b21..4e371039f1a9b6 100644
--- a/doc/src/base/io-network.md
+++ b/doc/src/base/io-network.md
@@ -13,6 +13,7 @@ Base.take!(::Base.GenericIOBuffer)
 Base.fdio
 Base.flush
 Base.close
+Base.closewrite
 Base.write
 Base.read
 Base.read!
@@ -28,7 +29,7 @@ Base.seekend
 Base.skip
 Base.mark
 Base.unmark
-Base.reset
+Base.reset(::IO)
 Base.ismarked
 Base.eof
 Base.isreadonly
@@ -36,6 +37,7 @@ Base.iswritable
 Base.isreadable
 Base.isopen
 Base.fd
+Base.redirect_stdio
 Base.redirect_stdout
 Base.redirect_stdout(::Function, ::Any)
 Base.redirect_stderr
@@ -72,7 +74,7 @@ Base.eachline
 Base.displaysize
 ```
 
-## Multimedia I/O
+## [Multimedia I/O](@id Multimedia-I/O)
 
 Just as text output is performed by [`print`](@ref) and user-defined types can indicate their textual
 representation by overloading [`show`](@ref), Julia provides a standardized mechanism for rich multimedia
diff --git a/doc/src/base/iterators.md b/doc/src/base/iterators.md
index 8afc54b3bd11bb..1c4831e52bc141 100644
--- a/doc/src/base/iterators.md
+++ b/doc/src/base/iterators.md
@@ -14,6 +14,7 @@ Base.Iterators.cycle
 Base.Iterators.repeated
 Base.Iterators.product
 Base.Iterators.flatten
+Base.Iterators.flatmap
 Base.Iterators.partition
 Base.Iterators.map
 Base.Iterators.filter
diff --git a/doc/src/base/math.md b/doc/src/base/math.md
index 600b88efb5546a..bdf91c991183fb 100644
--- a/doc/src/base/math.md
+++ b/doc/src/base/math.md
@@ -49,6 +49,8 @@ Base.:(~)
 Base.:(&)
 Base.:(|)
 Base.xor
+Base.nand
+Base.nor
 Base.:(!)
 &&
 ||
@@ -65,6 +67,7 @@ Base.tan(::Number)
 Base.Math.sind
 Base.Math.cosd
 Base.Math.tand
+Base.Math.sincosd
 Base.Math.sinpi
 Base.Math.cospi
 Base.Math.sincospi
@@ -153,15 +156,16 @@ Base.copysign
 Base.sign
 Base.signbit
 Base.flipsign
-Base.sqrt(::Real)
+Base.sqrt(::Number)
 Base.isqrt
 Base.Math.cbrt
-Base.real(::Complex)
+Base.real
 Base.imag
 Base.reim
 Base.conj
 Base.angle
 Base.cis
+Base.cispi
 Base.binomial
 Base.factorial
 Base.gcd
@@ -174,6 +178,7 @@ Base.nextprod
 Base.invmod
 Base.powermod
 Base.ndigits
+Base.add_sum
 Base.widemul
 Base.Math.evalpoly
 Base.Math.@evalpoly
diff --git a/doc/src/base/multi-threading.md b/doc/src/base/multi-threading.md
index 4f3e4e53634a96..293857c1c6c650 100644
--- a/doc/src/base/multi-threading.md
+++ b/doc/src/base/multi-threading.md
@@ -6,22 +6,35 @@ Base.Threads.foreach
 Base.Threads.@spawn
 Base.Threads.threadid
 Base.Threads.nthreads
+Base.Threads.threadpool
+Base.Threads.nthreadpools
 ```
 
-## Synchronization
+See also [Multi-Threading](@ref man-multithreading).
+
+## Atomic operations
 
 ```@docs
-Base.Threads.Condition
-Base.Threads.Event
+Base.@atomic
+Base.@atomicswap
+Base.@atomicreplace
 ```
 
-See also [Synchronization](@ref lib-task-sync).
+!!! note
 
-## Atomic operations
+    The following APIs are fairly primitive, and will likely be exposed through an `unsafe_*`-like wrapper.
+
+```
+Core.Intrinsics.atomic_pointerref(pointer::Ptr{T}, order::Symbol) --> T
+Core.Intrinsics.atomic_pointerset(pointer::Ptr{T}, new::T, order::Symbol) --> pointer
+Core.Intrinsics.atomic_pointerswap(pointer::Ptr{T}, new::T, order::Symbol) --> old
+Core.Intrinsics.atomic_pointermodify(pointer::Ptr{T}, function::(old::T,arg::S)->T, arg::S, order::Symbol) --> old
+Core.Intrinsics.atomic_pointerreplace(pointer::Ptr{T}, expected::Any, new::T, success_order::Symbol, failure_order::Symbol) --> (old, cmp)
+```
 
 !!! warning
 
-    The API for atomic operations has not yet been finalized and is likely to change.
+    The following APIs are deprecated, though support for them is likely to remain for several releases.
 
 ```@docs
 Base.Threads.Atomic
@@ -38,7 +51,7 @@ Base.Threads.atomic_min!
 Base.Threads.atomic_fence
 ```
 
-## ccall using a threadpool (Experimental)
+## ccall using a libuv threadpool (Experimental)
 
 ```@docs
 Base.@threadcall
diff --git a/doc/src/base/numbers.md b/doc/src/base/numbers.md
index 47a33b44472643..b92bf6a1d87684 100644
--- a/doc/src/base/numbers.md
+++ b/doc/src/base/numbers.md
@@ -111,7 +111,7 @@ Base.@int128_str
 Base.@uint128_str
 ```
 
-## BigFloats and BigInts
+## [BigFloats and BigInts](@id BigFloats-and-BigInts)
 
 The [`BigFloat`](@ref) and [`BigInt`](@ref) types implements
 arbitrary-precision floating point and integer arithmetic, respectively. For
@@ -122,7 +122,6 @@ and for [`BigInt`](@ref) the [GNU Multiple Precision Arithmetic Library (GMP)]
 ```@docs
 Base.MPFR.BigFloat(::Any, rounding::RoundingMode)
 Base.precision
-Base.MPFR.precision(::Type{BigFloat})
 Base.MPFR.setprecision
 Base.GMP.BigInt(::Any)
 Base.@big_str
diff --git a/doc/src/base/parallel.md b/doc/src/base/parallel.md
index ce8e25107ab51c..47f18d77f654c2 100644
--- a/doc/src/base/parallel.md
+++ b/doc/src/base/parallel.md
@@ -26,14 +26,20 @@ Base.schedule
 
 ## [Synchronization](@id lib-task-sync)
 
+## Synchronization
+
 ```@docs
+Base.errormonitor
 Base.@sync
 Base.wait
 Base.fetch(t::Task)
 Base.timedwait
 
 Base.Condition
+Base.Threads.Condition
+Base.Threads.Event
 Base.notify
+Base.reset(::Base.Threads.Event)
 
 Base.Semaphore
 Base.acquire
@@ -59,3 +65,92 @@ Base.fetch(::Channel)
 Base.close(::Channel)
 Base.bind(c::Channel, task::Task)
 ```
+
+## [Low-level synchronization using `schedule` and `wait`](@id low-level-schedule-wait)
+
+The easiest correct use of [`schedule`](@ref) is on a `Task` that is not started (scheduled)
+yet.  However, it is possible to use [`schedule`](@ref) and [`wait`](@ref) as a very
+low-level building block for constructing synchronization interfaces.  A crucial
+pre-condition of calling `schedule(task)` is that the caller must "own" the `task`; i.e., it
+must know that the call to `wait` in the given `task` is happening at the locations known to
+the code calling `schedule(task)`.  One strategy for ensuring such pre-condition is to use
+atomics, as demonstrated in the following example:
+
+```jldoctest
+@enum OWEState begin
+    OWE_EMPTY
+    OWE_WAITING
+    OWE_NOTIFYING
+end
+
+mutable struct OneWayEvent
+    @atomic state::OWEState
+    task::Task
+    OneWayEvent() = new(OWE_EMPTY)
+end
+
+function Base.notify(ev::OneWayEvent)
+    state = @atomic ev.state
+    while state !== OWE_NOTIFYING
+        # Spin until we successfully update the state to OWE_NOTIFYING:
+        state, ok = @atomicreplace(ev.state, state => OWE_NOTIFYING)
+        if ok
+            if state == OWE_WAITING
+                # OWE_WAITING -> OWE_NOTIFYING transition means that the waiter task is
+                # already waiting or about to call `wait`. The notifier task must wake up
+                # the waiter task.
+                schedule(ev.task)
+            else
+                @assert state == OWE_EMPTY
+                # Since we are assuming that there is only one notifier task (for
+                # simplicity), we know that the other possible case here is OWE_EMPTY.
+                # We do not need to do anything because we know that the waiter task has
+                # not called `wait(ev::OneWayEvent)` yet.
+            end
+            break
+        end
+    end
+    return
+end
+
+function Base.wait(ev::OneWayEvent)
+    ev.task = current_task()
+    state, ok = @atomicreplace(ev.state, OWE_EMPTY => OWE_WAITING)
+    if ok
+        # OWE_EMPTY -> OWE_WAITING transition means that the notifier task is guaranteed to
+        # invoke OWE_WAITING -> OWE_NOTIFYING transition.  The waiter task must call
+        # `wait()` immediately.  In particular, it MUST NOT invoke any function that may
+        # yield to the scheduler at this point in code.
+        wait()
+    else
+        @assert state == OWE_NOTIFYING
+        # Otherwise, the `state` must have already been moved to OWE_NOTIFYING by the
+        # notifier task.
+    end
+    return
+end
+
+ev = OneWayEvent()
+@sync begin
+    @async begin
+        wait(ev)
+        println("done")
+    end
+    println("notifying...")
+    notify(ev)
+end
+
+# output
+notifying...
+done
+```
+
+`OneWayEvent` lets one task to `wait` for another task's `notify`.  It is a limited
+communication interface since `wait` can only be used once from a single task (note the
+non-atomic assignment of `ev.task`)
+
+In this example, `notify(ev::OneWayEvent)` is allowed to call `schedule(ev.task)` if and
+only if *it* modifies the state from `OWE_WAITING` to `OWE_NOTIFYING`.  This lets us know that
+the task executing `wait(ev::OneWayEvent)` is now in the `ok` branch and that there cannot be
+other tasks that tries to `schedule(ev.task)` since their
+`@atomicreplace(ev.state, state => OWE_NOTIFYING)` will fail.
diff --git a/doc/src/base/punctuation.md b/doc/src/base/punctuation.md
index c91df5e3f2ea6a..9cb9f1eb1dfbfc 100644
--- a/doc/src/base/punctuation.md
+++ b/doc/src/base/punctuation.md
@@ -2,17 +2,17 @@
 
 Extended documentation for mathematical symbols & functions is [here](@ref math-ops).
 
-| symbol      | meaning                                                                                                                                         |
-|:----------- |:----------------------------------------------------------------------------------------------------------------------------------------------- |
-| `@m`        | the at-symbol invokes [macro](@ref man-macros) `m`; followed by space-separated expressions or a function-call-like argument list |
+| symbol      | meaning                                                                                     |
+|:----------- |:--------------------------------------------------------------------------------------------|
+| `@`         | the at-sign marks a [macro](@ref man-macros) invocation; optionally followed by an argument list |
 | [`!`](@ref) | an exclamation mark is a prefix operator for logical negation ("not")                       |
 | `a!`        | function names that end with an exclamation mark modify one or more of their arguments by convention |
 | `#`         | the number sign (or hash or pound) character begins single line comments                    |
-| `#=`        | when followed by an equals sign, it begins a multi-line comment (these are nestable)          |
+| `#=`        | when followed by an equals sign, it begins a multi-line comment (these are nestable)        |
 | `=#`        | end a multi-line comment by immediately preceding the number sign with an equals sign       |
 | `$`         | the dollar sign is used for [string](@ref string-interpolation) and [expression](@ref man-expression-interpolation) interpolation |
 | [`%`](@ref rem) | the percent symbol is the remainder operator                                            |
-| [`^`](@ref) | the caret is the exponentiation operator                                                  |
+| [`^`](@ref) | the caret is the exponentiation operator                                                    |
 | [`&`](@ref) | single ampersand is bitwise and                                                             |
 | [`&&`](@ref)| double ampersands is short-circuiting boolean and                                           |
 | [`\|`](@ref)| single pipe character is bitwise or                                                         |
@@ -30,8 +30,8 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | `a[]`       | [array indexing](@ref man-array-indexing) (calling [`getindex`](@ref) or [`setindex!`](@ref)) |
 | `[,]`       | [vector literal constructor](@ref man-array-literals) (calling [`vect`](@ref Base.vect))    |
 | `[;]`       | [vertical concatenation](@ref man-array-concatenation) (calling [`vcat`](@ref) or [`hvcat`](@ref)) |
-| `[    ]`    | with space-separated expressions, [horizontal concatenation](@ref man-concatenation) (calling [`hcat`](@ref) or [`hvcat`](@ref)) |
-| `T{ }`      | curly braces following a type list that type's [parameters](@ref Parametric-Types)      |
+| `[    ]`    | with space-separated expressions, [horizontal concatenation](@ref man-concatenation) (calling [`hcat`](@ref) or [`hvcat`](@ref)) |
+| `T{ }`      | curly braces following a type list that type's [parameters](@ref Parametric-Types)          |
 | `{}`        | curly braces can also be used to group multiple [`where`](@ref) expressions in function declarations |
 | `;`         | semicolons separate statements, begin a list of keyword arguments in function declarations or calls, or are used to separate array literals for vertical concatenation |
 | `,`         | commas separate function arguments or tuple or array components                             |
@@ -55,4 +55,7 @@ Extended documentation for mathematical symbols & functions is [here](@ref math-
 | [`==`](@ref)| double equals sign is value equality comparison                                             |
 | [`===`](@ref) | triple equals sign is programmatically identical equality comparison                      |
 | [`=>`](@ref Pair) | right arrow using an equals sign defines a [`Pair`](@ref) typically used to populate [dictionaries](@ref Dictionaries) |
-| `->` | right arrow using a hyphen defines an [anonymous function](@ref man-anonymous-functions) on a single line         |
+| `->`        | right arrow using a hyphen defines an [anonymous function](@ref man-anonymous-functions) on a single line |
+| [`\|>`](@ref)       | pipe operator passes output from the left argument to input of the right argument, usually a [function](@ref Function-composition-and-piping) |
+| `∘`         | function composition operator (typed with \circ{tab}) combines two functions as though they are a single larger [function](@ref Function-composition-and-piping) |
+| `_`         | underscores may be assigned values which will not be saved, often used to ignore [multiple return values](@ref destructuring-assignment) or create repetitive [comprehensions](@ref man-comprehensions) |
diff --git a/doc/src/base/sort.md b/doc/src/base/sort.md
index f4c07b1e138e80..9f00381ab892c8 100644
--- a/doc/src/base/sort.md
+++ b/doc/src/base/sort.md
@@ -125,6 +125,7 @@ Base.issorted
 Base.Sort.searchsorted
 Base.Sort.searchsortedfirst
 Base.Sort.searchsortedlast
+Base.Sort.insorted
 Base.Sort.partialsort!
 Base.Sort.partialsort
 Base.Sort.partialsortperm
@@ -187,3 +188,32 @@ defalg(v::AbstractArray{<:Number}) = QuickSort
 As for numeric arrays, choosing a non-stable default algorithm for array types for which the notion
 of a stable sort is meaningless (i.e. when two values comparing equal can not be distinguished)
 may make sense.
+
+## Alternate orderings
+
+By default, `sort` and related functions use [`isless`](@ref) to compare two
+elements in order to determine which should come first. The
+[`Base.Order.Ordering`](@ref) abstract type provides a mechanism for defining
+alternate orderings on the same set of elements. Instances of `Ordering` define
+a [total order](https://en.wikipedia.org/wiki/Total_order) on a set of elements,
+so that for any elements `a`, `b`, `c` the following hold:
+
+* Exactly one of the following is true: `a` is less than `b`, `b` is less than
+  `a`, or `a` and `b` are equal (according to [`isequal`](@ref)).
+* The relation is transitive - if `a` is less than `b` and `b` is less than `c`
+  then `a` is less than `c`.
+
+The [`Base.Order.lt`](@ref) function works as a generalization of `isless` to
+test whether `a` is less than `b` according to a given order.
+
+```@docs
+Base.Order.Ordering
+Base.Order.lt
+Base.Order.ord
+Base.Order.Forward
+Base.Order.ReverseOrdering
+Base.Order.Reverse
+Base.Order.By
+Base.Order.Lt
+Base.Order.Perm
+```
diff --git a/doc/src/base/strings.md b/doc/src/base/strings.md
index 3fe9c592828505..263c0019788c33 100644
--- a/doc/src/base/strings.md
+++ b/doc/src/base/strings.md
@@ -1,6 +1,7 @@
 # [Strings](@id lib-strings)
 
 ```@docs
+Core.AbstractString
 Core.AbstractChar
 Core.Char
 Base.codepoint
@@ -14,12 +15,15 @@ Base.repeat(::AbstractChar, ::Integer)
 Base.repr(::Any)
 Core.String(::AbstractString)
 Base.SubString
+Base.LazyString
+Base.@lazy_str
 Base.transcode
 Base.unsafe_string
 Base.ncodeunits(::AbstractString)
 Base.codeunit
 Base.codeunits
 Base.ascii
+Base.Regex
 Base.@r_str
 Base.SubstitutionString
 Base.@s_str
@@ -32,6 +36,8 @@ Base.isvalid(::Any, ::Any)
 Base.isvalid(::AbstractString, ::Integer)
 Base.match
 Base.eachmatch
+Base.RegexMatch
+Base.keys(::RegexMatch)
 Base.isless(::AbstractString, ::AbstractString)
 Base.:(==)(::AbstractString, ::AbstractString)
 Base.cmp(::AbstractString, ::AbstractString)
@@ -45,7 +51,8 @@ Base.findlast(::AbstractChar, ::AbstractString)
 Base.findprev(::AbstractString, ::AbstractString, ::Integer)
 Base.occursin
 Base.reverse(::Union{String,SubString{String}})
-Base.replace(s::AbstractString, ::Pair)
+Base.replace(s::AbstractString, ::Pair...)
+Base.eachsplit
 Base.split
 Base.rsplit
 Base.strip
@@ -63,6 +70,8 @@ Base.uppercasefirst
 Base.lowercasefirst
 Base.join
 Base.chop
+Base.chopprefix
+Base.chopsuffix
 Base.chomp
 Base.thisind
 Base.nextind
diff --git a/doc/src/devdocs/EscapeAnalysis.md b/doc/src/devdocs/EscapeAnalysis.md
new file mode 100644
index 00000000000000..983a6782ccc79f
--- /dev/null
+++ b/doc/src/devdocs/EscapeAnalysis.md
@@ -0,0 +1,399 @@
+# `EscapeAnalysis`
+
+`Core.Compiler.EscapeAnalysis` is a compiler utility module that aims to analyze
+escape information of [Julia's SSA-form IR](@ref Julia-SSA-form-IR) a.k.a. `IRCode`.
+
+This escape analysis aims to:
+- leverage Julia's high-level semantics, especially reason about escapes and aliasing via
+  inter-procedural calls
+- be versatile enough to be used for various optimizations including
+  [alias-aware SROA](https://github.com/JuliaLang/julia/pull/43888),
+  [early `finalize` insertion](https://github.com/JuliaLang/julia/pull/44056),
+  [copy-free `ImmutableArray` construction](https://github.com/JuliaLang/julia/pull/42465),
+  stack allocation of mutable objects,
+  and so on.
+- achieve a simple implementation based on a fully backward data-flow analysis implementation
+  as well as a new lattice design that combines orthogonal lattice properties
+
+## Try it out!
+
+You can give a try to the escape analysis by loading the `EAUtils.jl` utility script that
+define the convenience entries `code_escapes` and `@code_escapes` for testing and debugging purposes:
+```@repl EAUtils
+include(normpath(Sys.BINDIR, "..", "share", "julia", "test", "compiler", "EscapeAnalysis", "EAUtils.jl")); using .EAUtils
+
+mutable struct SafeRef{T}
+    x::T
+end
+Base.getindex(x::SafeRef) = x.x;
+Base.setindex!(x::SafeRef, v) = x.x = v;
+Base.isassigned(x::SafeRef) = true;
+get′(x) = isassigned(x) ? x[] : throw(x);
+
+result = code_escapes((String,String,String,String)) do s1, s2, s3, s4
+    r1 = Ref(s1)
+    r2 = Ref(s2)
+    r3 = SafeRef(s3)
+    try
+        s1 = get′(r1)
+        ret = sizeof(s1)
+    catch err
+        global GV = err # will definitely escape `r1`
+    end
+    s2 = get′(r2)       # still `r2` doesn't escape fully
+    s3 = get′(r3)       # still `r3` doesn't escape fully
+    s4 = sizeof(s4)     # the argument `s4` doesn't escape here
+    return s2, s3, s4
+end
+```
+
+The symbols in the side of each call argument and SSA statements represents the following meaning:
+- `◌` (plain): this value is not analyzed because escape information of it won't be used anyway (when the object is `isbitstype` for example)
+- `✓` (green or cyan): this value never escapes (`has_no_escape(result.state[x])` holds), colored blue if it has arg escape also (`has_arg_escape(result.state[x])` holds)
+- `↑` (blue or yellow): this value can escape to the caller via return (`has_return_escape(result.state[x])` holds), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
+- `X` (red): this value can escape to somewhere the escape analysis can't reason about like escapes to a global memory (`has_all_escape(result.state[x])` holds)
+- `*` (bold): this value's escape state is between the `ReturnEscape` and `AllEscape` in the partial order of [`EscapeInfo`](@ref Core.Compiler.EscapeAnalysis.EscapeInfo), colored yellow if it has unhandled thrown escape also (`has_thrown_escape(result.state[x])` holds)
+- `′`: this value has additional object field / array element information in its `AliasInfo` property
+
+Escape information of each call argument and SSA value can be inspected programmatically as like:
+```@repl EAUtils
+result.state[Core.Argument(3)] # get EscapeInfo of `s2`
+
+result.state[Core.SSAValue(3)] # get EscapeInfo of `r3`
+```
+
+## Analysis Design
+
+### Lattice Design
+
+`EscapeAnalysis` is implemented as a [data-flow analysis](https://en.wikipedia.org/wiki/Data-flow_analysis)
+that works on a lattice of [`x::EscapeInfo`](@ref Core.Compiler.EscapeAnalysis.EscapeInfo),
+which is composed of the following properties:
+- `x.Analyzed::Bool`: not formally part of the lattice, only indicates `x` has not been analyzed or not
+- `x.ReturnEscape::BitSet`: records SSA statements where `x` can escape to the caller via return
+- `x.ThrownEscape::BitSet`: records SSA statements where `x` can be thrown as exception
+  (used for the [exception handling](@ref EA-Exception-Handling) described below)
+- `x.AliasInfo`: maintains all possible values that can be aliased to fields or array elements of `x`
+  (used for the [alias analysis](@ref EA-Alias-Analysis) described below)
+- `x.ArgEscape::Int` (not implemented yet): indicates it will escape to the caller through
+  `setfield!` on argument(s)
+
+These attributes can be combined to create a partial lattice that has a finite height, given
+the invariant that an input program has a finite number of statements, which is assured by Julia's semantics.
+The clever part of this lattice design is that it enables a simpler implementation of
+lattice operations by allowing them to handle each lattice property separately[^LatticeDesign].
+
+### Backward Escape Propagation
+
+This escape analysis implementation is based on the data-flow algorithm described in the paper[^MM02].
+The analysis works on the lattice of `EscapeInfo` and transitions lattice elements from the
+bottom to the top until every lattice element gets converged to a fixed point by maintaining
+a (conceptual) working set that contains program counters corresponding to remaining SSA
+statements to be analyzed. The analysis manages a single global state that tracks
+`EscapeInfo` of each argument and SSA statement, but also note that some flow-sensitivity
+is encoded as program counters recorded in `EscapeInfo`'s `ReturnEscape` property,
+which can be combined with domination analysis later to reason about flow-sensitivity if necessary.
+
+One distinctive design of this escape analysis is that it is fully _backward_,
+i.e. escape information flows _from usages to definitions_.
+For example, in the code snippet below, EA first analyzes the statement `return %1` and
+imposes `ReturnEscape` on `%1` (corresponding to `obj`), and then it analyzes
+`%1 = %new(Base.RefValue{String, _2}))` and propagates the `ReturnEscape` imposed on `%1`
+to the call argument `_2` (corresponding to `s`):
+```@repl EAUtils
+code_escapes((String,)) do s
+    obj = Ref(s)
+    return obj
+end
+```
+
+The key observation here is that this backward analysis allows escape information to flow
+naturally along the use-def chain rather than control-flow[^BackandForth].
+As a result this scheme enables a simple implementation of escape analysis,
+e.g. `PhiNode` for example can be handled simply by propagating escape information
+imposed on a `PhiNode` to its predecessor values:
+```@repl EAUtils
+code_escapes((Bool, String, String)) do cnd, s, t
+    if cnd
+        obj = Ref(s)
+    else
+        obj = Ref(t)
+    end
+    return obj
+end
+```
+
+### [Alias Analysis](@id EA-Alias-Analysis)
+
+`EscapeAnalysis` implements a backward field analysis in order to reason about escapes
+imposed on object fields with certain accuracy,
+and `x::EscapeInfo`'s `x.AliasInfo` property exists for this purpose.
+It records all possible values that can be aliased to fields of `x` at "usage" sites,
+and then the escape information of that recorded values are propagated to the actual field values later at "definition" sites.
+More specifically, the analysis records a value that may be aliased to a field of object by analyzing `getfield` call,
+and then it propagates its escape information to the field when analyzing `%new(...)` expression or `setfield!` call[^Dynamism].
+```@repl EAUtils
+code_escapes((String,)) do s
+    obj = SafeRef("init")
+    obj[] = s
+    v = obj[]
+    return v
+end
+```
+In the example above, `ReturnEscape` imposed on `%3` (corresponding to `v`) is _not_ directly
+propagated to `%1` (corresponding to `obj`) but rather that `ReturnEscape` is only propagated
+to `_2` (corresponding to `s`). Here `%3` is recorded in `%1`'s `AliasInfo` property as
+it can be aliased to the first field of `%1`, and then when analyzing `Base.setfield!(%1, :x, _2)::String`,
+that escape information is propagated to `_2` but not to `%1`.
+
+So `EscapeAnalysis` tracks which IR elements can be aliased across a `getfield`-`%new`/`setfield!` chain
+in order to analyze escapes of object fields, but actually this alias analysis needs to be
+generalized to handle other IR elements as well. This is because in Julia IR the same
+object is sometimes represented by different IR elements and so we should make sure that those
+different IR elements that actually can represent the same object share the same escape information.
+IR elements that return the same object as their operand(s), such as `PiNode` and `typeassert`,
+can cause that IR-level aliasing and thus requires escape information imposed on any of such
+aliased values to be shared between them.
+More interestingly, it is also needed for correctly reasoning about mutations on `PhiNode`.
+Let's consider the following example:
+```@repl EAUtils
+code_escapes((Bool, String,)) do cond, x
+    if cond
+        ϕ2 = ϕ1 = SafeRef("foo")
+    else
+        ϕ2 = ϕ1 = SafeRef("bar")
+    end
+    ϕ2[] = x
+    y = ϕ1[]
+    return y
+end
+```
+`ϕ1 = %5` and `ϕ2 = %6` are aliased and thus `ReturnEscape` imposed on `%8 = Base.getfield(%6, :x)::String` (corresponding to `y = ϕ1[]`)
+needs to be propagated to `Base.setfield!(%5, :x, _3)::String` (corresponding to `ϕ2[] = x`).
+In order for such escape information to be propagated correctly, the analysis should recognize that
+the _predecessors_ of `ϕ1` and `ϕ2` can be aliased as well and equalize their escape information.
+
+One interesting property of such aliasing information is that it is not known at "usage" site
+but can only be derived at "definition" site (as aliasing is conceptually equivalent to assignment),
+and thus it doesn't naturally fit in a backward analysis. In order to efficiently propagate escape
+information between related values, EscapeAnalysis.jl uses an approach inspired by the escape
+analysis algorithm explained in an old JVM paper[^JVM05]. That is, in addition to managing
+escape lattice elements, the analysis also maintains an "equi"-alias set, a disjoint set of
+aliased arguments and SSA statements. The alias set manages values that can be aliased to
+each other and allows escape information imposed on any of such aliased values to be equalized
+between them.
+
+### [Array Analysis](@id EA-Array-Analysis)
+
+The alias analysis for object fields described above can also be generalized to analyze array operations.
+`EscapeAnalysis` implements handlings for various primitive array operations so that it can propagate
+escapes via `arrayref`-`arrayset` use-def chain and does not escape allocated arrays too conservatively:
+```@repl EAUtils
+code_escapes((String,)) do s
+    ary = Any[]
+    push!(ary, SafeRef(s))
+    return ary[1], length(ary)
+end
+```
+In the above example `EscapeAnalysis` understands that `%20` and `%2` (corresponding to the allocated object `SafeRef(s)`)
+are aliased via the `arrayset`-`arrayref` chain and imposes `ReturnEscape` on them,
+but not impose it on the allocated array `%1` (corresponding to `ary`).
+`EscapeAnalysis` still imposes `ThrownEscape` on `ary` since it also needs to account for
+potential escapes via `BoundsError`, but also note that such unhandled `ThrownEscape` can
+often be ignored when optimizing the `ary` allocation.
+
+Furthermore, in cases when array index information as well as array dimensions can be known _precisely_,
+`EscapeAnalysis` is able to even reason about "per-element" aliasing via `arrayref`-`arrayset` chain,
+as `EscapeAnalysis` does "per-field" alias analysis for objects:
+```@repl EAUtils
+code_escapes((String,String)) do s, t
+    ary = Vector{Any}(undef, 2)
+    ary[1] = SafeRef(s)
+    ary[2] = SafeRef(t)
+    return ary[1], length(ary)
+end
+```
+Note that `ReturnEscape` is only imposed on `%2` (corresponding to `SafeRef(s)`) but not on `%4` (corresponding to `SafeRef(t)`).
+This is because the allocated array's dimension and indices involved with all `arrayref`/`arrayset`
+operations are available as constant information and `EscapeAnalysis` can understand that
+`%6` is aliased to `%2` but never be aliased to `%4`.
+In this kind of case, the succeeding optimization passes will be able to
+replace `Base.arrayref(true, %1, 1)::Any` with `%2` (a.k.a. "load-forwarding") and
+eventually eliminate the allocation of array `%1` entirely (a.k.a. "scalar-replacement").
+
+When compared to object field analysis, where an access to object field can be analyzed trivially
+using type information derived by inference, array dimension isn't encoded as type information
+and so we need an additional analysis to derive that information. `EscapeAnalysis` at this moment
+first does an additional simple linear scan to analyze dimensions of allocated arrays before
+firing up the main analysis routine so that the succeeding escape analysis can precisely
+analyze operations on those arrays.
+
+However, such precise "per-element" alias analysis is often hard.
+Essentially, the main difficulty inherit to array is that array dimension and index are often non-constant:
+- loop often produces loop-variant, non-constant array indices
+- (specific to vectors) array resizing changes array dimension and invalidates its constant-ness
+
+Let's discuss those difficulties with concrete examples.
+
+In the following example, `EscapeAnalysis` fails the precise alias analysis since the index
+at the `Base.arrayset(false, %4, %8, %6)::Vector{Any}` is not (trivially) constant.
+Especially `Any[nothing, nothing]` forms a loop and calls that `arrayset` operation in a loop,
+where `%6` is represented as a ϕ-node value (whose value is control-flow dependent).
+As a result, `ReturnEscape` ends up imposed on both `%23` (corresponding to `SafeRef(s)`) and
+`%25` (corresponding to `SafeRef(t)`), although ideally we want it to be imposed only on `%23` but not on `%25`:
+```@repl EAUtils
+code_escapes((String,String)) do s, t
+    ary = Any[nothing, nothing]
+    ary[1] = SafeRef(s)
+    ary[2] = SafeRef(t)
+    return ary[1], length(ary)
+end
+```
+
+The next example illustrates how vector resizing makes precise alias analysis hard.
+The essential difficulty is that the dimension of allocated array `%1` is first initialized as `0`,
+but it changes by the two `:jl_array_grow_end` calls afterwards.
+`EscapeAnalysis` currently simply gives up precise alias analysis whenever it encounters any
+array resizing operations and so `ReturnEscape` is imposed on both `%2` (corresponding to `SafeRef(s)`)
+and `%20` (corresponding to `SafeRef(t)`):
+```@repl EAUtils
+code_escapes((String,String)) do s, t
+    ary = Any[]
+    push!(ary, SafeRef(s))
+    push!(ary, SafeRef(t))
+    ary[1], length(ary)
+end
+```
+
+In order to address these difficulties, we need inference to be aware of array dimensions
+and propagate array dimensions in a flow-sensitive way[^ArrayDimension], as well as come
+up with nice representation of loop-variant values.
+
+`EscapeAnalysis` at this moment quickly switches to the more imprecise analysis that doesn't
+track precise index information in cases when array dimensions or indices are trivially non
+constant. The switch can naturally be implemented as a lattice join operation of
+`EscapeInfo.AliasInfo` property in the data-flow analysis framework.
+
+### [Exception Handling](@id EA-Exception-Handling)
+
+It would be also worth noting how `EscapeAnalysis` handles possible escapes via exceptions.
+Naively it seems enough to propagate escape information imposed on `:the_exception` object to
+all values that may be thrown in a corresponding `try` block.
+But there are actually several other ways to access to the exception object in Julia,
+such as `Base.current_exceptions` and `rethrow`.
+For example, escape analysis needs to account for potential escape of `r` in the example below:
+```@repl EAUtils
+const GR = Ref{Any}();
+@noinline function rethrow_escape!()
+    try
+        rethrow()
+    catch err
+        GR[] = err
+    end
+end;
+get′(x) = isassigned(x) ? x[] : throw(x);
+
+code_escapes() do
+    r = Ref{String}()
+    local t
+    try
+        t = get′(r)
+    catch err
+        t = typeof(err)   # `err` (which `r` aliases to) doesn't escape here
+        rethrow_escape!() # but `r` escapes here
+    end
+    return t
+end
+```
+
+It requires a global analysis in order to correctly reason about all possible escapes via
+existing exception interfaces. For now we always propagate the topmost escape information to
+all potentially thrown objects conservatively, since such an additional analysis might not be
+worthwhile to do given that exception handling and error path usually don't need to be
+very performance sensitive, and also optimizations of error paths might be very ineffective anyway
+since they are often even "unoptimized" intentionally for latency reasons.
+
+`x::EscapeInfo`'s `x.ThrownEscape` property records SSA statements where `x` can be thrown as an exception.
+Using this information `EscapeAnalysis` can propagate possible escapes via exceptions limitedly
+to only those may be thrown in each `try` region:
+```@repl EAUtils
+result = code_escapes((String,String)) do s1, s2
+    r1 = Ref(s1)
+    r2 = Ref(s2)
+    local ret
+    try
+        s1 = get′(r1)
+        ret = sizeof(s1)
+    catch err
+        global GV = err # will definitely escape `r1`
+    end
+    s2 = get′(r2)       # still `r2` doesn't escape fully
+    return s2
+end
+```
+
+## Analysis Usage
+
+`analyze_escapes` is the entry point to analyze escape information of SSA-IR elements.
+
+Most optimizations like SROA (`sroa_pass!`) are more effective when applied to
+an optimized source that the inlining pass (`ssa_inlining_pass!`) has simplified
+by resolving inter-procedural calls and expanding callee sources.
+Accordingly, `analyze_escapes` is also able to analyze post-inlining IR and collect
+escape information that is useful for certain memory-related optimizations.
+
+However, since certain optimization passes like inlining can change control flows and eliminate dead code,
+they can break the inter-procedural validity of escape information. In particularity,
+in order to collect inter-procedurally valid escape information, we need to analyze a pre-inlining IR.
+
+Because of this reason, `analyze_escapes` can analyze `IRCode` at any Julia-level optimization stage,
+and especially, it is supposed to be used at the following two stages:
+- `IPO EA`: analyze pre-inlining IR to generate IPO-valid escape information cache
+- `Local EA`: analyze post-inlining IR to collect locally-valid escape information
+
+Escape information derived by `IPO EA` is transformed to the `ArgEscapeCache` data structure and cached globally.
+By passing an appropriate `get_escape_cache` callback to `analyze_escapes`,
+the escape analysis can improve analysis accuracy by utilizing cached inter-procedural information of
+non-inlined callees that has been derived by previous `IPO EA`.
+More interestingly, it is also valid to use `IPO EA` escape information for type inference,
+e.g., inference accuracy can be improved by forming `Const`/`PartialStruct`/`MustAlias` of mutable object.
+
+Since the computational cost of `analyze_escapes` is not that cheap,
+both `IPO EA` and `Local EA` are better to run only when there is any profitability.
+Currently `EscapeAnalysis` provides the `is_ipo_profitable` heuristic to check a profitability of `IPO EA`.
+```@docs
+Core.Compiler.EscapeAnalysis.analyze_escapes
+Core.Compiler.EscapeAnalysis.EscapeState
+Core.Compiler.EscapeAnalysis.EscapeInfo
+Core.Compiler.EscapeAnalysis.is_ipo_profitable
+```
+
+--------------------------------------------------------------------------------------------
+
+[^LatticeDesign]: Our type inference implementation takes the alternative approach,
+    where each lattice property is represented by a special lattice element type object.
+    It turns out that it started to complicate implementations of the lattice operations
+    mainly because it often requires conversion rules between each lattice element type object.
+    And we are working on [overhauling our type inference lattice implementation](https://github.com/JuliaLang/julia/pull/42596)
+    with `EscapeInfo`-like lattice design.
+
+[^MM02]: _A Graph-Free approach to Data-Flow Analysis_.
+         Markas Mohnen, 2002, April.
+         <https://api.semanticscholar.org/CorpusID:28519618>.
+
+[^BackandForth]: Our type inference algorithm in contrast is implemented as a forward analysis,
+    because type information usually flows from "definition" to "usage" and it is more
+    natural and effective to propagate such information in a forward way.
+
+[^Dynamism]: In some cases, however, object fields can't be analyzed precisely.
+    For example, object may escape to somewhere `EscapeAnalysis` can't account for possible memory effects on it,
+    or fields of the objects simply can't be known because of the lack of type information.
+    In such cases `AliasInfo` property is raised to the topmost element within its own lattice order,
+    and it causes succeeding field analysis to be conservative and escape information imposed on
+    fields of an unanalyzable object to be propagated to the object itself.
+
+[^JVM05]: _Escape Analysis in the Context of Dynamic Compilation and Deoptimization_.
+          Thomas Kotzmann and Hanspeter Mössenböck, 2005, June.
+          <https://dl.acm.org/doi/10.1145/1064979.1064996>.
+
+[^ArrayDimension]: Otherwise we will need yet another forward data-flow analysis on top of the escape analysis.
diff --git a/doc/src/devdocs/ast.md b/doc/src/devdocs/ast.md
index df003265e30aca..83f8c1cb2b6958 100644
--- a/doc/src/devdocs/ast.md
+++ b/doc/src/devdocs/ast.md
@@ -53,8 +53,6 @@ call. Finally, chains of comparisons have their own special expression structure
 | `a&&b`      | `(&& a b)`                |
 | `x += 1`    | `(+= x 1)`                |
 | `a ? 1 : 2` | `(if a 1 2)`              |
-| `a:b`       | `(: a b)`                 |
-| `a:b:c`     | `(: a b c)`               |
 | `a,b`       | `(tuple a b)`             |
 | `a==b`      | `(call == a b)`           |
 | `1<i<=n`    | `(comparison 1 < i <= n)` |
@@ -63,23 +61,25 @@ call. Finally, chains of comparisons have their own special expression structure
 
 ### Bracketed forms
 
-| Input                    | AST                                  |
-|:------------------------ |:------------------------------------ |
-| `a[i]`                   | `(ref a i)`                          |
-| `t[i;j]`                 | `(typed_vcat t i j)`                 |
-| `t[i j]`                 | `(typed_hcat t i j)`                 |
-| `t[a b; c d]`            | `(typed_vcat t (row a b) (row c d))` |
-| `a{b}`                   | `(curly a b)`                        |
-| `a{b;c}`                 | `(curly a (parameters c) b)`         |
-| `[x]`                    | `(vect x)`                           |
-| `[x,y]`                  | `(vect x y)`                         |
-| `[x;y]`                  | `(vcat x y)`                         |
-| `[x y]`                  | `(hcat x y)`                         |
-| `[x y; z t]`             | `(vcat (row x y) (row z t))`         |
-| `[x for y in z, a in b]` | `(comprehension x (= y z) (= a b))`  |
-| `T[x for y in z]`        | `(typed_comprehension T x (= y z))`  |
-| `(a, b, c)`              | `(tuple a b c)`                      |
-| `(a; b; c)`              | `(block a (block b c))`              |
+| Input                    | AST                                               |
+|:------------------------ |:------------------------------------------------- |
+| `a[i]`                   | `(ref a i)`                                       |
+| `t[i;j]`                 | `(typed_vcat t i j)`                              |
+| `t[i j]`                 | `(typed_hcat t i j)`                              |
+| `t[a b; c d]`            | `(typed_vcat t (row a b) (row c d))`              |
+| `t[a b;;; c d]`          | `(typed_ncat t 3 (row a b) (row c d))`            |
+| `a{b}`                   | `(curly a b)`                                     |
+| `a{b;c}`                 | `(curly a (parameters c) b)`                      |
+| `[x]`                    | `(vect x)`                                        |
+| `[x,y]`                  | `(vect x y)`                                      |
+| `[x;y]`                  | `(vcat x y)`                                      |
+| `[x y]`                  | `(hcat x y)`                                      |
+| `[x y; z t]`             | `(vcat (row x y) (row z t))`                      |
+| `[x;y;; z;t;;;]`         | `(ncat 3 (nrow 2 (nrow 1 x y) (nrow 1 z t)))`     |
+| `[x for y in z, a in b]` | `(comprehension (generator x (= y z) (= a b)))`   |
+| `T[x for y in z]`        | `(typed_comprehension T (generator x (= y z)))`   |
+| `(a, b, c)`              | `(tuple a b c)`                                   |
+| `(a; b; c)`              | `(block a b c)`                                   |
 
 ### Macros
 
@@ -128,11 +128,11 @@ instead of `:import`.
 Julia supports more number types than many scheme implementations, so not all numbers are represented
 directly as scheme numbers in the AST.
 
-| Input                   | AST                                                     |
-|:----------------------- |:------------------------------------------------------- |
-| `11111111111111111111`  | `(macrocall @int128_str (null) "11111111111111111111")` |
-| `0xfffffffffffffffff`   | `(macrocall @uint128_str (null) "0xfffffffffffffffff")` |
-| `1111...many digits...` | `(macrocall @big_str (null) "1111....")`                |
+| Input                   | AST                                                      |
+|:----------------------- |:-------------------------------------------------------- |
+| `11111111111111111111`  | `(macrocall @int128_str nothing "11111111111111111111")` |
+| `0xfffffffffffffffff`   | `(macrocall @uint128_str nothing "0xfffffffffffffffff")` |
+| `1111...many digits...` | `(macrocall @big_str nothing "1111....")`                |
 
 ### Block forms
 
@@ -155,7 +155,7 @@ parses as:
 ```
 (if a (block (line 2) b)
     (elseif (block (line 3) c) (block (line 4) d)
-            (block (line 5 e))))
+            (block (line 6 e))))
 ```
 
 A `while` loop parses as `(while condition body)`.
@@ -254,7 +254,7 @@ types exist in lowered form:
     Identifies arguments and local variables by consecutive numbering. `Slot` is an abstract type
     with subtypes `SlotNumber` and `TypedSlot`. Both types have an integer-valued `id` field giving
     the slot index. Most slots have the same type at all uses, and so are represented with `SlotNumber`.
-    The types of these slots are found in the `slottypes` field of their `MethodInstance` object.
+    The types of these slots are found in the `slottypes` field of their `CodeInfo` object.
     Slots that require per-use type annotations are represented with `TypedSlot`, which has a `typ`
     field.
 
@@ -341,9 +341,10 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
       * `args[1]`
 
-        A function name, or `false` if unknown. If a symbol, then the expression first
-        behaves like the 1-argument form above. This argument is ignored from then on. When
-        this is `false`, it means a method is being added strictly by type, `(::T)(x) = x`.
+        A function name, or `nothing` if unknown or unneeded. If a symbol, then the expression
+        first behaves like the 1-argument form above. This argument is ignored from then on.
+        It can be `nothing` when methods are added strictly by type, `(::T)(x) = x`,
+        or when a method is being added to an existing function, `MyModule.f(x) = x`.
 
       * `args[2]`
 
@@ -435,6 +436,10 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
     Yields the caught exception inside a `catch` block, as returned by `jl_current_exception()`.
 
+  * `undefcheck`
+
+    Temporary node inserted by the compiler and will be processed in `type_lift_pass!`.
+
   * `enter`
 
     Enters an exception handler (`setjmp`). `args[1]` is the label of the catch block to jump to on
@@ -504,15 +509,46 @@ These symbols appear in the `head` field of [`Expr`](@ref)s in lowered form.
 
         The calling convention for the call.
 
-      * `args[6:length(args[3])]` : arguments
+      * `args[6:5+length(args[3])]` : arguments
 
         The values for all the arguments (with types of each given in args[3]).
 
-      * `args[(length(args[3]) + 1):end]` : gc-roots
+      * `args[6+length(args[3])+1:end]` : gc-roots
 
         The additional objects that may need to be gc-rooted for the duration of the call.
         See [Working with LLVM](@ref Working-with-LLVM) for where these are derived from and how they get handled.
 
+  * `new_opaque_closure`
+
+    Constructs a new opaque closure. The fields are:
+
+      * `args[1]` : signature
+
+        The function signature of the opaque closure. Opaque closures don't participate in dispatch, but the input types can be restricted.
+
+      * `args[2]` : isva
+
+        Indicates whether the closure accepts varargs.
+
+      * `args[3]` : lb
+
+        Lower bound on the output type. (Defaults to `Union{}`)
+
+      * `args[4]` : ub
+
+        Upper bound on the output type. (Defaults to `Any`)
+
+      * `args[5]` : method
+
+        The actual method as an `opaque_closure_method` expression.
+
+      * `args[6:end]` : captures
+
+        The values captured by the opaque closure.
+
+    !!! compat "Julia 1.7"
+        Opaque closures were added in Julia 1.7
+
 
 ### [Method](@id ast-lowered-method)
 
@@ -556,7 +592,8 @@ A unique'd container describing the shared metadata for a single method.
 
 ### MethodInstance
 
-A unique'd container describing a single callable signature for a Method. See especially [Proper maintenance and care of multi-threading locks](@ref)
+A unique'd container describing a single callable signature for a Method.
+See especially [Proper maintenance and care of multi-threading locks](@ref Proper-maintenance-and-care-of-multi-threading-locks)
 for important details on how to modify these fields safely.
 
   * `specTypes`
diff --git a/doc/src/devdocs/backtraces.md b/doc/src/devdocs/backtraces.md
index a65a494bbc3a4f..4ed3ea47efbb5d 100644
--- a/doc/src/devdocs/backtraces.md
+++ b/doc/src/devdocs/backtraces.md
@@ -11,6 +11,7 @@ and follow the instructions to generate the debugging information requested.  Ta
   * [Segfaults during bootstrap (`sysimg.jl`)](@ref)
   * [Segfaults when running a script](@ref)
   * [Errors during Julia startup](@ref)
+  * [Other generic segfaults or unreachables reached](@ref)
 
 ## [Version/Environment info](@id dev-version-info)
 
@@ -106,6 +107,22 @@ Create a [gist](https://gist.github.com) with the `strace`/ `dtruss` output, the
 and any other pertinent information and open a new [issue](https://github.com/JuliaLang/julia/issues?q=is%3Aopen)
 on Github with a link to the gist.
 
+## Other generic segfaults or unreachables reached
+
+As mentioned elsewhere, `julia` has good integration with `rr` for generating traces; this includes, on Linux, the ability to automatically run `julia` under `rr` and share the trace after a crash. This can be immensely helpful when debugging such crashes and is strongly encouraged when reporting crash issues to the JuliaLang/julia repo. To run `julia` under `rr` automatically, do:
+
+```julia
+julia --bug-report=rr
+```
+
+To generate the `rr` trace locally, but not share, you can do:
+
+```julia
+julia --bug-report=rr-local
+```
+
+Note that this is only works on Linux. The blog post on [Time Travelling Bug Reporting](https://julialang.org/blog/2020/05/rr/) has many more details.
+
 ## Glossary
 
 A few terms have been used as shorthand in this guide:
diff --git a/doc/src/devdocs/boundscheck.md b/doc/src/devdocs/boundscheck.md
index a9fb3baaae4476..258528dbd5960b 100644
--- a/doc/src/devdocs/boundscheck.md
+++ b/doc/src/devdocs/boundscheck.md
@@ -18,7 +18,7 @@ For example, you might write the method `sum` as:
 ```julia
 function sum(A::AbstractArray)
     r = zero(eltype(A))
-    for i = 1:length(A)
+    for i in eachindex(A)
         @inbounds r += A[i]
     end
     return r
@@ -36,6 +36,37 @@ your function contains multiple layers of inlining, only `@boundscheck` blocks a
 of inlining deeper are eliminated. The rule prevents unintended changes in program behavior from
 code further up the stack.
 
+### Caution!
+
+It is easy to accidentally expose unsafe operations with `@inbounds`. You might be tempted
+to write the above example as
+
+```julia
+function sum(A::AbstractArray)
+    r = zero(eltype(A))
+    for i in 1:length(A)
+        @inbounds r += A[i]
+    end
+	return r
+end
+```
+
+Which quietly assumes 1-based indexing and therefore exposes unsafe memory access when used
+with [`OffsetArrays`](@ref man-custom-indices):
+
+```julia-repl
+julia> using OffsetArrays
+
+julia> sum(OffsetArray([1,2,3], -10))
+9164911648 # inconsistent results or segfault
+```
+
+While the original source of the error here is `1:length(A)`, the use of `@inbounds`
+increases the consequences from a bounds error to a less easily caught and debugged unsafe
+memory access. It is often difficult or impossible to prove that a method which uses
+`@inbounds` is safe, so one must weigh the benefits of performance improvements against the
+risk of segfaults and silent misbehavior, especially in public facing APIs.
+
 ## Propagating inbounds
 
 There may be certain scenarios where for code-organization reasons you want more than one layer
@@ -89,3 +120,7 @@ Note this hierarchy has been designed to reduce the likelihood of method ambigui
 to make `checkbounds` the place to specialize on array type, and try to avoid specializations
 on index types; conversely, `checkindex` is intended to be specialized only on index type (especially,
 the last argument).
+
+## Emit bounds checks
+
+Julia can be launched with `--check-bounds={yes|no|auto}` to emit bounds checks always, never, or respect @inbounds declarations.
diff --git a/doc/build/arm.md b/doc/src/devdocs/build/arm.md
similarity index 100%
rename from doc/build/arm.md
rename to doc/src/devdocs/build/arm.md
diff --git a/doc/build/build.md b/doc/src/devdocs/build/build.md
similarity index 76%
rename from doc/build/build.md
rename to doc/src/devdocs/build/build.md
index 412606d5b14998..091a15d8925139 100644
--- a/doc/build/build.md
+++ b/doc/src/devdocs/build/build.md
@@ -17,8 +17,8 @@ variables.
 When compiled the first time, the build will automatically download
 pre-built [external
 dependencies](#required-build-tools-and-external-libraries). If you
-prefer to build all the dependencies on your own, add the following in
-`Make.user`
+prefer to build all the dependencies on your own, or are building on a system that cannot
+access the network during the build process, add the following in `Make.user`:
 
 ```
 USE_BINARYBUILDER=0
@@ -66,7 +66,7 @@ Now you should be able to run Julia like this:
 
 If you are building a Julia package for distribution on Linux, macOS,
 or Windows, take a look at the detailed notes in
-[distributing.md](https://github.com/JuliaLang/julia/blob/master/doc/build/distributing.md).
+[distributing.md](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/distributing.md).
 
 ## Updating an existing source tree
 
@@ -130,16 +130,16 @@ latest version.
 
 Notes for various operating systems:
 
-* [Linux](https://github.com/JuliaLang/julia/blob/master/doc/build/linux.md)
-* [macOS](https://github.com/JuliaLang/julia/blob/master/doc/build/macos.md)
-* [Windows](https://github.com/JuliaLang/julia/blob/master/doc/build/windows.md)
-* [FreeBSD](https://github.com/JuliaLang/julia/blob/master/doc/build/freebsd.md)
+* [Linux](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/linux.md)
+* [macOS](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/macos.md)
+* [Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md)
+* [FreeBSD](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/freebsd.md)
 
 Notes for various architectures:
 
-* [ARM](https://github.com/JuliaLang/julia/blob/master/doc/build/arm.md)
+* [ARM](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/arm.md)
 
-## Required Build Tools and External Libraries
+## [Required Build Tools and External Libraries](@id build-tools)
 
 Building Julia requires that the following software be installed:
 
@@ -160,7 +160,7 @@ Building Julia requires that the following software be installed:
 
 On Debian-based distributions (e.g. Ubuntu), you can easily install them with `apt-get`:
 ```
-sudo apt-get install build-essential libatomic1 python gfortran perl wget m4 cmake pkg-config
+sudo apt-get install build-essential libatomic1 python gfortran perl wget m4 cmake pkg-config curl
 ```
 
 Julia uses the following external libraries, which are automatically
@@ -186,7 +186,7 @@ uses are listed in [`deps/Versions.make`](https://github.com/JuliaLang/julia/blo
 - **[libssh2]**              — library for SSH transport, used by libgit2 for packages with SSH remotes.
 - **[mbedtls]**              — library used for cryptography and transport layer security, used by libssh2
 - **[utf8proc]**             — a library for processing UTF-8 encoded Unicode strings.
-- **[libosxunwind]**         — fork of [libunwind], a library that determines the call-chain of a program.
+- **[LLVM libunwind]**       — LLVM's fork of [libunwind], a library that determines the call-chain of a program.
 
 [GNU make]:     https://www.gnu.org/software/make
 [patch]:        https://www.gnu.org/software/patch
@@ -202,20 +202,20 @@ uses are listed in [`deps/Versions.make`](https://github.com/JuliaLang/julia/blo
 [perl]:         https://www.perl.org
 [cmake]:        https://www.cmake.org
 [OpenLibm]:     https://github.com/JuliaLang/openlibm
-[DSFMT]:        http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/#dSFMT
+[DSFMT]:        https://github.com/MersenneTwister-Lab/dSFMT
 [OpenBLAS]:     https://github.com/xianyi/OpenBLAS
 [LAPACK]:       https://www.netlib.org/lapack
 [MKL]:          https://software.intel.com/en-us/articles/intel-mkl
-[SuiteSparse]:  http://faculty.cse.tamu.edu/davis/suitesparse.html
+[SuiteSparse]:  https://people.engr.tamu.edu/davis/suitesparse.html
 [PCRE]:         https://www.pcre.org
 [LLVM]:         https://www.llvm.org
+[LLVM libunwind]: https://github.com/llvm/llvm-project/tree/main/libunwind
 [FemtoLisp]:    https://github.com/JeffBezanson/femtolisp
 [GMP]:          https://gmplib.org
 [MPFR]:         https://www.mpfr.org
 [libuv]:        https://github.com/JuliaLang/libuv
 [libgit2]:      https://libgit2.org/
 [utf8proc]:     https://julialang.org/utf8proc/
-[libosxunwind]: https://github.com/JuliaLang/libosxunwind
 [libunwind]:    https://www.nongnu.org/libunwind
 [libssh2]:      https://www.libssh2.org
 [mbedtls]:      https://tls.mbed.org/
@@ -250,21 +250,6 @@ Julia uses a custom fork of libuv. It is a small dependency, and can be safely b
 
 As a high-performance numerical language, Julia should be linked to a multi-threaded BLAS and LAPACK, such as OpenBLAS or ATLAS, which will provide much better performance than the reference `libblas` implementations which may be default on some systems.
 
-### Intel MKL
-
-For a 64-bit architecture, the environment should be set up as follows:
-```sh
-# bash
-source /path/to/intel/bin/compilervars.sh intel64
-```
-Add the following to the `Make.user` file:
-
-    USE_INTEL_MKL = 1
-
-It is highly recommended to start with a fresh clone of the Julia repository.
-
-If you are building Julia for the sole purpose of incorporating Intel MKL, it may be beneficial to first try [MKL.jl](https://github.com/JuliaComputing/MKL.jl). This package will automatically download MKL and rebuild Julia's system image against it, sidestepping the need to set up a working build environment just to add MKL functionality.
-
 ## Source distributions of releases
 
 Each pre-release and release of Julia has a "full" source distribution and a "light" source
@@ -276,3 +261,45 @@ distribution does not include the source code of dependencies.
 
 For example, `julia-1.0.0.tar.gz` is the light source distribution for the `v1.0.0` release
 of Julia, while `julia-1.0.0-full.tar.gz` is the full source distribution.
+
+## Building Julia from source with a Git checkout of a stdlib
+
+If you need to build Julia from source with a Git checkout of a stdlib, then use `make DEPS_GIT=NAME_OF_STDLIB` when building Julia.
+
+For example, if you need to build Julia from source with a Git checkout of Pkg, then use `make DEPS_GIT=Pkg` when building Julia. The `Pkg` repo is in `stdlib/Pkg`, and created initially with a detached `HEAD`. If you're doing this from a pre-existing Julia repository, you may need to `make clean` beforehand.
+
+If you need to build Julia from source with Git checkouts of more than one stdlib, then `DEPS_GIT` should be a space-separated list of the stdlib names. For example, if you need to build Julia from source with a Git checkout of Pkg, Tar, and Downloads, then use `make DEPS_GIT='Pkg Tar Downloads'` when building Julia.
+
+## Building an "assert build" of Julia
+
+An "assert build" of Julia is a build that was built with both `FORCE_ASSERTIONS=1` and
+`LLVM_ASSERTIONS=1`. To build an assert build, define both of the following variables
+in your `Make.user` file:
+
+```
+FORCE_ASSERTIONS=1
+LLVM_ASSERTIONS=1
+```
+
+Please note that assert builds of Julia will be slower than regular (non-assert) builds.
+
+## Building 32-bit Julia on a 64-bit machine
+
+Occasionally, bugs specific to 32-bit architectures may arise, and when this happens it is useful to be able to debug the problem on your local machine.  Since most modern 64-bit systems support running programs built for 32-bit ones, if you don't have to recompile Julia from source (e.g. you mainly need to inspect the behavior of a 32-bit Julia without having to touch the C code), you can likely use a 32-bit build of Julia for your system that you can obtain from the [official downloads page](https://julialang.org/downloads/).
+However, if you do need to recompile Julia from source one option is to use a Docker container of a 32-bit system.  At least for now, building a 32-bit version of Julia is relatively straightforward using [ubuntu 32-bit docker images](https://hub.docker.com/r/i386/ubuntu). In brief, after setting up `docker` here are the required steps:
+
+```sh
+$ docker pull i386/ubuntu
+$ docker run --platform i386 -i -t i386/ubuntu /bin/bash
+```
+
+At this point you should be in a 32-bit machine console (note that `uname` reports the host architecture, so will still say 64-bit, but this will not affect the Julia build). You can add packages and compile code; when you `exit`, all the changes will be lost, so be sure to finish your analysis in a single session or set up a copy/pastable script you can use to set up your environment.
+
+From this point, you should
+
+```sh
+# apt update
+```
+(Note that `sudo` isn't installed, but neither is it necessary since you are running as `root`, so you can omit `sudo` from all commands.)
+
+Then add all the [build dependencies](@ref build-tools), a console-based editor of your choice, `git`, and anything else you'll need (e.g., `gdb`, `rr`, etc). Pick a directory to work in and `git clone` Julia, check out the branch you wish to debug, and build Julia as usual.
diff --git a/doc/build/distributing.md b/doc/src/devdocs/build/distributing.md
similarity index 95%
rename from doc/build/distributing.md
rename to doc/src/devdocs/build/distributing.md
index dcbf0d3b606e2a..c49f6f071224c4 100644
--- a/doc/build/distributing.md
+++ b/doc/src/devdocs/build/distributing.md
@@ -1,4 +1,4 @@
-Notes for building binary distributions
+Binary distributions
 =======================================
 
 These notes are for those wishing to compile a binary distribution of Julia
@@ -71,7 +71,7 @@ package such as a `.deb`, or `.rpm`, some extra effort is needed. See the
 [julia-debian](https://github.com/staticfloat/julia-debian) repository
 for an example of what metadata is needed for creating `.deb` packages
 for Debian and Ubuntu-based systems. See the
-[Fedora package](https://admin.fedoraproject.org/pkgdb/package/julia/)
+[Fedora package](https://src.fedoraproject.org/rpms/julia)
 for RPM-based distributions. Although we have not yet experimented
 with it, [Alien](https://wiki.debian.org/Alien) could be used to
 generate Julia packages for various Linux distributions.
@@ -108,14 +108,8 @@ Alternatively, Julia may be built as a framework by invoking `make` with the
 Windows
 -------
 
-The best supported method of creating a Julia distribution on Windows
-is to cross-compile from a Linux distribution such as Ubuntu. In-depth
-compilation instructions [are
-available](https://github.com/JuliaLang/julia/blob/master/README.windows.md).
-However the important steps for redistribution are to ensure to `make
-win-extras` in between `make` and `make binary-dist`.  After that process is
-completed, the `.zip` file created in the head Julia directory will
-hold a completely self-contained Julia.
+Instructions for reating a Julia distribution on Windows are described in the
+[build devdocs for Windows](https://github.com/JuliaLang/julia/blob/master/doc/src/devdocs/build/windows.md).
 
 Notes on BLAS and LAPACK
 ------------------------
@@ -157,15 +151,8 @@ set `USE_SYSTEM_BLAS=1` and `USE_SYSTEM_LAPACK=1`, you should also set
 `LIBLAPACK=-l$(YOURBLAS)` and `LIBLAPACKNAME=lib$(YOURBLAS)`. Else, the
 reference LAPACK will be used and performance will typically be much lower.
 
-Compilation scripts
-===================
-
-The [julia-nightly-packaging](https://github.com/staticfloat/julia-nightly-packaging)
-repository contains multiple example scripts to ease the creation of
-binary packages. It also includes miscellaneous tools to do things such as
-fetching the last good commit that passed the
-[Travis](https://travis-ci.org/JuliaLang/julia/builds) tests.
-
+Starting with Julia 1.7, Julia uses [libblastrampoline](https://github.com/JuliaLinearAlgebra/libblastrampoline)
+to pick a different BLAS at runtime.
 
 # Point releasing 101
 
diff --git a/doc/build/freebsd.md b/doc/src/devdocs/build/freebsd.md
similarity index 99%
rename from doc/build/freebsd.md
rename to doc/src/devdocs/build/freebsd.md
index 51b16d58b29598..12012cd7454280 100644
--- a/doc/build/freebsd.md
+++ b/doc/src/devdocs/build/freebsd.md
@@ -1,4 +1,4 @@
-## FreeBSD
+# FreeBSD
 
 Clang is the default compiler on FreeBSD 11.0-RELEASE and above.
 The remaining build tools are available from the Ports Collection, and can be installed using
diff --git a/doc/build/linux.md b/doc/src/devdocs/build/linux.md
similarity index 94%
rename from doc/build/linux.md
rename to doc/src/devdocs/build/linux.md
index 4aa85c432d96d0..4e596ef73341b8 100644
--- a/doc/build/linux.md
+++ b/doc/src/devdocs/build/linux.md
@@ -1,12 +1,12 @@
-## Linux
+# Linux
 
 * GCC version 4.7 or later is required to build Julia.
 * To use external shared libraries not in the system library search path, set `USE_SYSTEM_XXX=1` and `LDFLAGS=-Wl,-rpath,/path/to/dir/contains/libXXX.so` in `Make.user`.
 * Instead of setting `LDFLAGS`, putting the library directory into the environment variable `LD_LIBRARY_PATH` (at both compile and run time) also works.
 * The `USE_SYSTEM_*` flags should be used with caution. These are meant only for troubleshooting, porting, and packaging, where package maintainers work closely with the Julia developers to make sure that Julia is built correctly. Production use cases should use the officially provided binaries. Issues arising from the use of these flags will generally not be accepted.
-* See also the [external dependencies](#required-build-tools-and-external-libraries).
+* See also the [external dependencies](build.md#required-build-tools-and-external-libraries).
 
-### Architecture Customization
+## Architecture Customization
 
 Julia can be built for a non-generic architecture by configuring the `ARCH` Makefile variable in a `Make.user` file. See the appropriate section of `Make.inc` for additional customization options, such as `MARCH` and `JULIA_CPU_TARGET`.
 
@@ -14,7 +14,7 @@ For example, to build for Pentium 4, set `MARCH=pentium4` and install the necess
 
 You can also set `MARCH=native` in `Make.user` for a maximum-performance build customized for the current machine CPU.
 
-### Linux Build Troubleshooting
+## Linux Build Troubleshooting
 
  Problem              | Possible Solution
 ------------------------|---------------------
diff --git a/doc/build/macos.md b/doc/src/devdocs/build/macos.md
similarity index 98%
rename from doc/build/macos.md
rename to doc/src/devdocs/build/macos.md
index bbe36494756687..d78c5f938fc9c1 100644
--- a/doc/build/macos.md
+++ b/doc/src/devdocs/build/macos.md
@@ -1,4 +1,4 @@
-## macOS
+# macOS
 
 You need to have the current Xcode command line utilities installed: run `xcode-select --install` in the terminal. You will need to rerun this terminal command after each macOS update, otherwise you may run into errors involving missing libraries or headers.
 
diff --git a/doc/src/devdocs/build/windows.md b/doc/src/devdocs/build/windows.md
new file mode 100644
index 00000000000000..fef4413db7d1aa
--- /dev/null
+++ b/doc/src/devdocs/build/windows.md
@@ -0,0 +1,228 @@
+# Windows
+
+This file describes how to install, or build, and use Julia on Windows.
+
+For more general information about Julia, please see the
+[main README](https://github.com/JuliaLang/julia/blob/master/README.md)
+or the [documentation](https://docs.julialang.org).
+
+
+## General Information for Windows
+
+We highly recommend running Julia using a modern terminal application, in particular Windows Terminal, which can be installed from the [Microsoft Store](https://aka.ms/terminal).
+
+### Line endings
+
+Julia uses binary-mode files exclusively. Unlike many other Windows programs,
+if you write `\n` to a file, you get a `\n` in the file, not some other bit
+pattern. This matches the behavior exhibited by other operating systems. If
+you have installed Git for Windows, it is suggested, but not required, that you
+configure your system Git to use the same convention:
+```sh
+git config --global core.eol lf
+git config --global core.autocrlf input
+```
+or edit `%USERPROFILE%\.gitconfig` and add/edit the lines:
+```
+[core]
+    eol = lf
+    autocrlf = input
+```
+
+## Binary distribution
+
+For the binary distribution installation notes on Windows please see the instructions at
+[https://julialang.org/downloads/platform/#windows](https://julialang.org/downloads/platform/#windows).
+
+## Source distribution
+
+### Cygwin-to-MinGW cross-compiling
+
+The recommended way of compiling Julia from source on Windows is by cross
+compiling from [Cygwin](https://www.cygwin.com), using versions of the
+MinGW-w64 compilers available through Cygwin's package manager.
+
+ 1. Download and run Cygwin setup for [32 bit](https://cygwin.com/setup-x86.exe)
+    or [64 bit](https://cygwin.com/setup-x86_64.exe). Note, that you can compile
+    either 32 or 64 bit Julia from either 32 or 64 bit Cygwin. 64 bit Cygwin
+    has a slightly smaller but often more up-to-date selection of packages.
+
+    Advanced: you may skip steps 2-4 by running:
+
+        setup-x86_64.exe -s <url> -q -P cmake,gcc-g++,git,make,patch,curl,m4,python3,p7zip,mingw64-i686-gcc-g++,mingw64-i686-gcc-fortran,mingw64-x86_64-gcc-g++,mingw64-x86_64-gcc-fortran
+        :: replace <url> with a site from https://cygwin.com/mirrors.html
+        :: or run setup manually first and select a mirror
+
+ 2. Select installation location and download mirror.
+
+ 3. At the '*Select Packages'* step, select the following:
+
+    1.  From the *Devel* category: `cmake`, `gcc-g++`, `git`, `make`, `patch`
+    2.  From the *Net* category: `curl`
+    3.  From *Interpreters* (or *Python*) category: `m4`, `python3`
+    4.  From the *Archive* category: `p7zip`
+    5.  For 32 bit Julia, and also from the *Devel* category:
+        `mingw64-i686-gcc-g++` and `mingw64-i686-gcc-fortran`
+    6.  For 64 bit Julia, and also from the *Devel* category:
+        `mingw64-x86_64-gcc-g++` and `mingw64-x86_64-gcc-fortran`
+
+ 4. At the *'Resolving Dependencies'* step, be sure to leave *'Select required
+    packages (RECOMMENDED)'* enabled.
+
+ 5. Allow Cygwin installation to finish, then start from the installed shortcut
+    a *'Cygwin Terminal'*, or *'Cygwin64 Terminal'*, respectively.
+
+ 6. Build Julia and its dependencies from source:
+
+    1. Get the Julia sources
+       ```sh
+       git clone https://github.com/JuliaLang/julia.git
+       cd julia
+       ```
+       Tip: If you get an `error: cannot fork() for fetch-pack: Resource
+       temporarily unavailable` from git, add `alias git="env PATH=/usr/bin git"`
+       to `~/.bashrc` and restart Cygwin.
+
+    2. Set the `XC_HOST` variable in `Make.user` to indicate MinGW-w64 cross
+       compilation
+       ```sh
+       echo 'XC_HOST = i686-w64-mingw32' > Make.user     # for 32 bit Julia
+       # or
+       echo 'XC_HOST = x86_64-w64-mingw32' > Make.user   # for 64 bit Julia
+       ```
+
+    3. Start the build
+       ```sh
+       make -j 4   # Adjust the number of threads (4) to match your build environment.
+       ```
+
+
+    > Protip: build both!
+    > ```sh
+    > make O=julia-win32 configure
+    > make O=julia-win64 configure
+    > echo 'XC_HOST = i686-w64-mingw32' > julia-win32/Make.user
+    > echo 'XC_HOST = x86_64-w64-mingw32' > julia-win64/Make.user
+    > echo 'ifeq ($(BUILDROOT),$(JULIAHOME))
+    >         $(error "in-tree build disabled")
+    >       endif' >> Make.user
+    > make -C julia-win32  # build for Windows x86 in julia-win32 folder
+    > make -C julia-win64  # build for Windows x86-64 in julia-win64 folder
+    > ```
+
+ 7. Run Julia using the Julia executables directly
+    ```sh
+    usr/bin/julia.exe
+    usr/bin/julia-debug.exe
+    ```
+
+### Compiling with MinGW/MSYS2
+
+Compiling Julia from source using [MSYS2](https://msys2.github.io) has worked in the past
+but is not actively supported. Pull requests to restore support would be welcome. See a
+[past version of this
+file](https://github.com/JuliaLang/julia/blob/v0.6.0/README.windows.md) for the former
+instructions for compiling using MSYS2.
+
+
+### Cross-compiling from Unix (Linux/Mac/WSL)
+
+You can also use MinGW-w64 cross compilers to build a Windows version of Julia from
+Linux, Mac, or the Windows Subsystem for Linux (WSL).
+
+First, you will need to ensure your system has the required dependencies. We
+need wine (>=1.7.5), a system compiler, and some downloaders. Note: a cygwin install might
+interfere with this method if using WSL.
+
+**On Ubuntu** (on other Linux systems the dependency names are likely to be similar):
+```sh
+apt-get install wine-stable gcc wget p7zip-full winbind mingw-w64 gfortran-mingw-w64
+dpkg --add-architecture i386 && apt-get update && apt-get install wine32 # add sudo to each if needed
+# switch all of the following to their "-posix" variants (interactively):
+for pkg in i686-w64-mingw32-g++ i686-w64-mingw32-gcc i686-w64-mingw32-gfortran x86_64-w64-mingw32-g++ x86_64-w64-mingw32-gcc x86_64-w64-mingw32-gfortran; do sudo update-alternatives --config $pkg; done
+```
+
+**On Mac**: Install XCode, XCode command line tools, X11 (now
+[XQuartz](https://www.xquartz.org/)), and [MacPorts](https://www.macports.org/install.php)
+or [Homebrew](https://brew.sh/).  Then run `port install wine wget mingw-w64`, or `brew
+install wine wget mingw-w64`, as appropriate.
+
+**Then run the build:**
+
+ 1. `git clone https://github.com/JuliaLang/julia.git julia-win32`
+ 2. `cd julia-win32`
+ 3. `echo override XC_HOST = i686-w64-mingw32 >> Make.user`
+ 4. `make`
+ 5. `make win-extras` (Necessary before running `make binary-dist`)
+ 6. `make binary-dist` then `make exe` to create the Windows installer.
+ 7. move the `julia-*.exe` installer to the target machine
+
+If you are building for 64-bit windows, the steps are essentially the same.
+Just replace `i686` in `XC_HOST` with `x86_64`. (note: on Mac, wine only runs
+in 32-bit mode).
+
+
+## Debugging a cross-compiled build under wine
+
+The most effective way to debug a cross-compiled version of Julia on the cross-compilation
+host is to install a windows version of gdb and run it under wine as usual. The pre-built
+packages available [as part of the MSYS2
+project](https://sourceforge.net/projects/msys2/files/REPOS/MINGW/) are known to work. Apart
+from the GDB package you may also need the python and termcap packages. Finally, GDB's
+prompt may not work when launch from the command line. This can be worked around by
+prepending `wineconsole` to the regular GDB invocation.
+
+
+## After compiling
+
+Compiling using one of the options above creates a basic Julia build, but not some
+extra components that are included if you run the full Julia binary installer.
+If you need these components, the easiest way to get them is to build the installer
+yourself using ```make win-extras``` followed by ```make binary-dist``` and ```make exe```. Then running the resulting installer.
+
+
+## Windows Build Debugging
+
+
+### GDB hangs with cygwin mintty
+
+- Run gdb under the windows console (cmd) instead. gdb [may not function
+  properly](https://www.cygwin.com/ml/cygwin/2009-02/msg00531.html) under mintty with non-
+  cygwin applications. You can use `cmd /c start` to start the windows console from mintty
+  if necessary.
+
+### GDB not attaching to the right process
+
+ - Use the PID from the windows task manager or `WINPID` from the `ps` command
+   instead of the PID from unix style command line tools (e.g. `pgrep`).  You
+   may need to add the PID column if it is not shown by default in the windows
+   task manager.
+
+### GDB not showing the right backtrace
+
+ - When attaching to the julia process, GDB may not be attaching to the right
+   thread.  Use `info threads` command to show all the threads and
+   `thread <threadno>` to switch threads.
+ - Be sure to use a 32 bit version of GDB to debug a 32 bit build of Julia, or
+   a 64 bit version of GDB to debug a 64 bit build of Julia.
+
+### Build process is slow/eats memory/hangs my computer
+
+ - Disable the Windows
+   [Superfetch](https://en.wikipedia.org/wiki/Windows_Vista_I/O_technologies#SuperFetch) and
+   [Program Compatibility
+   Assistant](https://blogs.msdn.com/b/cjacks/archive/2011/11/22/managing-the-windows-7-program-compatibility-assistant-pca.aspx)
+   services, as they are known to have [spurious
+   interactions](https://cygwin.com/ml/cygwin/2011-12/msg00058.html) with MinGW/Cygwin.
+
+   As mentioned in the link above: excessive memory use by `svchost` specifically
+   may be investigated in the Task Manager by clicking on the high-memory
+   `svchost.exe` process and selecting `Go to Services`. Disable child services
+   one-by-one until a culprit is found.
+
+ - Beware of [BLODA](https://cygwin.com/faq/faq.html#faq.using.bloda).
+   The [vmmap](https://technet.microsoft.com/en-us/sysinternals/dd535533.aspx)
+   tool is indispensable for identifying such software conflicts. Use vmmap to
+   inspect the list of loaded DLLs for bash, mintty, or another persistent
+   process used to drive the build. Essentially *any* DLL outside of the Windows
+   System directory is potential BLODA.
diff --git a/doc/src/devdocs/debuggingtips.md b/doc/src/devdocs/debuggingtips.md
index f145931df3ecce..7639e8be2ef965 100644
--- a/doc/src/devdocs/debuggingtips.md
+++ b/doc/src/devdocs/debuggingtips.md
@@ -1,4 +1,4 @@
-# gdb debugging tips
+# [gdb debugging tips](@id gdb-debugging-tips)
 
 ## Displaying Julia variables
 
@@ -107,11 +107,11 @@ Since this function is used for every call, you will make everything 1000x slowe
 
 ## Dealing with signals
 
-Julia requires a few signal to function property. The profiler uses `SIGUSR2` for sampling and
+Julia requires a few signals to function properly. The profiler uses `SIGUSR2` for sampling and
 the garbage collector uses `SIGSEGV` for threads synchronization. If you are debugging some code
 that uses the profiler or multiple threads, you may want to let the debugger ignore these signals
 since they can be triggered very often during normal operations. The command to do this in GDB
-is (replace `SIGSEGV` with `SIGUSRS` or other signals you want to ignore):
+is (replace `SIGSEGV` with `SIGUSR2` or other signals you want to ignore):
 
 ```
 (gdb) handle SIGSEGV noprint nostop pass
@@ -234,7 +234,7 @@ process)
 
 ## Mozilla's Record and Replay Framework (rr)
 
-Julia now works out of the box with [rr](http://rr-project.org/), the lightweight recording and
+Julia now works out of the box with [rr](https://rr-project.org/), the lightweight recording and
 deterministic debugging framework from Mozilla. This allows you to replay the trace of an execution
 deterministically.  The replayed execution's address spaces, register contents, syscall data etc
 are exactly the same in every run.
diff --git a/doc/src/devdocs/eval.md b/doc/src/devdocs/eval.md
index e04035a3d49e60..1aea5161ad23a6 100644
--- a/doc/src/devdocs/eval.md
+++ b/doc/src/devdocs/eval.md
@@ -23,7 +23,7 @@ function, and primitive function, before turning into the desired result (hopefu
 The 10,000 foot view of the whole process is as follows:
 
 1. The user starts `julia`.
-2. The C function `main()` from `ui/repl.c` gets called. This function processes the command line
+2. The C function `main()` from `cli/loader_exe.c` gets called. This function processes the command line
    arguments, filling in the `jl_options` struct and setting the variable `ARGS`. It then initializes
    Julia (by calling [`julia_init` in `task.c`](https://github.com/JuliaLang/julia/blob/master/src/task.c),
    which may load a previously compiled [sysimg](@ref dev-sysimg)). Finally, it passes off control to Julia
@@ -43,7 +43,7 @@ The 10,000 foot view of the whole process is as follows:
    interpreter.
 9. `jl_toplevel_eval_flex()` then [expands](@ref dev-macro-expansion) the code to eliminate any macros and to "lower"
    the AST to make it simpler to execute.
-10. `jl_toplevel_eval_flex()` then uses some simple heuristics to decide whether to JIT compiler the
+10. `jl_toplevel_eval_flex()` then uses some simple heuristics to decide whether to JIT compile the
     AST or to interpret it directly.
 11. The bulk of the work to interpret code is handled by [`eval` in `interpreter.c`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c).
 12. If instead, the code is compiled, the bulk of the work is handled by `codegen.cpp`. Whenever a
diff --git a/doc/src/devdocs/functions.md b/doc/src/devdocs/functions.md
index 48b0081f9b2b3b..13f863cd26d81c 100644
--- a/doc/src/devdocs/functions.md
+++ b/doc/src/devdocs/functions.md
@@ -13,7 +13,7 @@ share the same `Complex` type name object.
 All objects in Julia are potentially callable, because every object has a type, which in turn
 has a `TypeName`.
 
-## Function calls
+## [Function calls](@id Function-calls)
 
 Given the call `f(x,y)`, the following steps are performed: first, the method table to use is
 accessed as `typeof(f).name.mt`. Second, an argument tuple type is formed, `Tuple{typeof(f), typeof(x), typeof(y)}`.
@@ -101,10 +101,30 @@ currently share a method table via special arrangement.
 
 The "builtin" functions, defined in the `Core` module, are:
 
-```
-=== typeof sizeof <: isa typeassert throw tuple getfield setfield! fieldtype
-nfields isdefined arrayref arrayset arraysize applicable invoke apply_type _apply
-_expr svec
+```@eval
+function lines(words)
+    io = IOBuffer()
+    n = 0
+    for w in words
+        if n+length(w) > 80
+            print(io, '\n', w)
+            n = length(w)
+        elseif n == 0
+            print(io, w);
+            n += length(w)
+        else
+            print(io, ' ', w);
+            n += length(w)+1
+        end
+    end
+    String(take!(io))
+end
+import Markdown
+[string(n) for n in names(Core;all=true)
+    if getfield(Core,n) isa Core.Builtin && nameof(getfield(Core,n)) === n] |>
+    lines |>
+    s ->  "```\n$s\n```" |>
+    Markdown.parse
 ```
 
 These are all singleton objects whose types are subtypes of `Builtin`, which is a subtype of
diff --git a/doc/src/devdocs/gc-sa.md b/doc/src/devdocs/gc-sa.md
index 85d16c1e4e1950..ffbb7451fce5f2 100644
--- a/doc/src/devdocs/gc-sa.md
+++ b/doc/src/devdocs/gc-sa.md
@@ -2,13 +2,23 @@
 
 ## Running the analysis
 
-The analyzer plugin that drives the anlysis ships with julia. Its
+The analyzer plugin that drives the analysis ships with julia. Its
 source code can be found in `src/clangsa`. Running it requires
 the clang dependency to be build. Set the `BUILD_LLVM_CLANG` variable
 in your Make.user in order to build an appropriate version of clang.
 You may also want to use the prebuilt binaries using the
-`USE_BINARYBUILDER_LLVM` options. Afterwards, running the analysis
-over the source tree is as simple as running `make -C src analyzegc`.
+`USE_BINARYBUILDER_LLVM` options.
+
+Alternatively (or if these do not suffice), try
+
+```sh
+make -C src install-analysis-deps
+```
+
+from Julia's toplevel directory.
+
+
+Afterwards, running the analysis over the source tree is as simple as running `make -C src analyzegc`.
 
 ## General Overview
 
diff --git a/doc/src/devdocs/inference.md b/doc/src/devdocs/inference.md
index c8fea74995595b..68d63600f1bb1e 100644
--- a/doc/src/devdocs/inference.md
+++ b/doc/src/devdocs/inference.md
@@ -6,8 +6,8 @@
 to the process of deducing the types of later values from the types of
 input values. Julia's approach to inference has been described in blog
 posts
-([1](https://juliacomputing.com/blog/2016/04/04/inference-convergence.html),
-[2](https://juliacomputing.com/blog/2017/05/15/inference-converage2.html)).
+([1](https://juliacomputing.com/blog/2016/04/inference-convergence/),
+[2](https://juliacomputing.com/blog/2017/05/inference-converage2/)).
 
 ## Debugging compiler.jl
 
@@ -37,9 +37,8 @@ m = first(mths)
 interp = Core.Compiler.NativeInterpreter()
 sparams = Core.svec()      # this particular method doesn't have type-parameters
 optimize = true            # run all inference optimizations
-cached = false             # force inference to happen (do not use cached results)
 types = Tuple{typeof(convert), atypes.parameters...} # Tuple{typeof(convert), Type{Int}, UInt}
-Core.Compiler.typeinf_code(interp, types, sparams, optimize, cached)
+Core.Compiler.typeinf_code(interp, m, types, sparams, optimize)
 ```
 
 If your debugging adventures require a `MethodInstance`, you can look it up by
@@ -50,16 +49,12 @@ A `CodeInfo` object may be obtained with
 ci = (@code_typed convert(Int, UInt(1)))[1]
 ```
 
-## The inlining algorithm (inline_worthy)
+## The inlining algorithm (`inline_worthy`)
 
-Much of the hardest work for inlining runs in
-`inlining_pass`. However, if your question is "why didn't my function
-inline?" then you will most likely be interested in `isinlineable` and
-its primary callee, `inline_worthy`. `isinlineable` handles a number
-of special cases (e.g., critical functions like `next` and `done`,
-incorporating a bonus for functions that return tuples, etc.). The
-main decision-making happens in `inline_worthy`, which returns `true`
-if the function should be inlined.
+Much of the hardest work for inlining runs in `ssa_inlining_pass!`.
+However, if your question is "why didn't my function inline?"
+then you will most likely be interested in `inline_worthy`,
+which makes a decision to inline the function call or not.
 
 `inline_worthy` implements a cost-model, where "cheap" functions get
 inlined; more specifically, we inline functions if their anticipated
@@ -91,7 +86,7 @@ input and output types were inferred in advance) is assigned a fixed
 cost (currently 20 cycles). In contrast, a `:call` expression, for
 functions other than intrinsics/builtins, indicates that the call will
 require dynamic dispatch, in which case we assign a cost set by
-`Params.inline_nonleaf_penalty` (currently set at 1000). Note
+`Params.inline_nonleaf_penalty` (currently set at `1000`). Note
 that this is not a "first-principles" estimate of the raw cost of
 dynamic dispatch, but a mere heuristic indicating that dynamic
 dispatch is extremely expensive.
diff --git a/doc/src/devdocs/init.md b/doc/src/devdocs/init.md
index 03d4433c4be293..348e69f673f806 100644
--- a/doc/src/devdocs/init.md
+++ b/doc/src/devdocs/init.md
@@ -4,15 +4,17 @@ How does the Julia runtime execute `julia -e 'println("Hello World!")'` ?
 
 ## `main()`
 
-Execution starts at [`main()` in `ui/repl.c`](https://github.com/JuliaLang/julia/blob/master/ui/repl.c).
+Execution starts at [`main()` in `cli/loader_exe.c`](https://github.com/JuliaLang/julia/blob/master/cli/loader_exe.c),
+which calls `jl_load_repl()` in [`cli/loader_lib.c`](https://github.com/JuliaLang/julia/blob/master/cli/loader_lib.c)
+which loads a few libraries, eventually calling [`repl_entrypoint()` in `src/jlapi.c`](https://github.com/JuliaLang/julia/blob/master/src/jlapi.c).
 
-`main()` calls [`libsupport_init()`](https://github.com/JuliaLang/julia/blob/master/src/support/libsupportinit.c)
+`repl_entrypoint()` calls [`libsupport_init()`](https://github.com/JuliaLang/julia/blob/master/src/support/libsupportinit.c)
 to set the C library locale and to initialize the "ios" library (see [`ios_init_stdstreams()`](https://github.com/JuliaLang/julia/blob/master/src/support/ios.c)
-and [Legacy `ios.c` library](@ref)).
+and [Legacy `ios.c` library](@ref Legacy-ios.c-library)).
 
 Next [`jl_parse_opts()`](https://github.com/JuliaLang/julia/blob/master/src/jloptions.c) is called to process
 command line options. Note that `jl_parse_opts()` only deals with options that affect code generation
-or early initialization. Other options are handled later by [`process_options()` in `base/client.jl`](https://github.com/JuliaLang/julia/blob/master/base/client.jl).
+or early initialization. Other options are handled later by [`exec_options()` in `base/client.jl`](https://github.com/JuliaLang/julia/blob/master/base/client.jl).
 
 `jl_parse_opts()` stores command line options in the [global `jl_options` struct](https://github.com/JuliaLang/julia/blob/master/src/julia.h).
 
@@ -27,7 +29,7 @@ by `main()` and calls [`_julia_init()` in `init.c`](https://github.com/JuliaLang
 to zero the signal handler mask.
 
 [`jl_resolve_sysimg_location()`](https://github.com/JuliaLang/julia/blob/master/src/init.c) searches
-configured paths for the base system image. See [Building the Julia system image](@ref).
+configured paths for the base system image. See [Building the Julia system image](@ref Building-the-Julia-system-image).
 
 [`jl_gc_init()`](https://github.com/JuliaLang/julia/blob/master/src/gc.c) sets up allocation pools
 and lists for weak refs, preserved values and finalization.
@@ -53,7 +55,7 @@ jl_int32_type = jl_new_primitivetype(jl_symbol("Int32"), core,
 object; initializes the global `jl_root_task` struct; and sets `jl_current_task` to the root task.
 
 [`jl_init_codegen()`](https://github.com/JuliaLang/julia/blob/master/src/codegen.cpp) initializes
-the [LLVM library](http://llvm.org).
+the [LLVM library](https://llvm.org).
 
 [`jl_init_serializer()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) initializes
 8-bit serialization tags for builtin `jl_value_t` values.
@@ -123,26 +125,26 @@ each deserialized module to run the `__init__()` function.
 Finally [`sigint_handler()`](https://github.com/JuliaLang/julia/blob/master/src/signals-unix.c)
 is hooked up to `SIGINT` and calls `jl_throw(jl_interrupt_exception)`.
 
-`_julia_init()` then returns [back to `main()` in `ui/repl.c`](https://github.com/JuliaLang/julia/blob/master/ui/repl.c)
-and `main()` calls `true_main(argc, (char**)argv)`.
+`_julia_init()` then returns [back to `main()` in `cli/loader_exe.c`](https://github.com/JuliaLang/julia/blob/master/cli/loader_exe.c)
+and `main()` calls `repl_entrypoint(argc, (char**)argv)`.
 
 !!! sidebar "sysimg"
     If there is a sysimg file, it contains a pre-cooked image of the `Core` and `Main` modules (and
-    whatever else is created by `boot.jl`). See [Building the Julia system image](@ref).
+    whatever else is created by `boot.jl`). See [Building the Julia system image](@ref Building-the-Julia-system-image).
 
     [`jl_restore_system_image()`](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c) deserializes
     the saved sysimg into the current Julia runtime environment and initialization continues after
     `jl_init_box_caches()` below...
 
     Note: [`jl_restore_system_image()` (and `staticdata.c` in general)](https://github.com/JuliaLang/julia/blob/master/src/staticdata.c)
-    uses the [Legacy `ios.c` library](@ref).
+    uses the [Legacy `ios.c` library](@ref Legacy-ios.c-library).
 
-## `true_main()`
+## `repl_entrypoint()`
 
-[`true_main()`](https://github.com/JuliaLang/julia/blob/master/ui/repl.c) loads the contents of
+[`repl_entrypoint()`](https://github.com/JuliaLang/julia/blob/master/src/jlapi.c) loads the contents of
 `argv[]` into [`Base.ARGS`](@ref).
 
-If a `.jl` "program" file was supplied on the command line, then [`exec_program()`](https://github.com/JuliaLang/julia/blob/master/ui/repl.c)
+If a `.jl` "program" file was supplied on the command line, then [`exec_program()`](https://github.com/JuliaLang/julia/blob/master/src/jlapi.c)
 calls [`jl_load(program,len)`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c) which
 calls [`jl_parse_eval_all`](https://github.com/JuliaLang/julia/blob/master/src/ast.c) which repeatedly
 calls [`jl_toplevel_eval_flex()`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c)
@@ -154,28 +156,25 @@ executes it.
 
 ## `Base._start`
 
-[`Base._start`](https://github.com/JuliaLang/julia/blob/master/base/client.jl) calls [`Base.process_options`](https://github.com/JuliaLang/julia/blob/master/base/client.jl)
+[`Base._start`](https://github.com/JuliaLang/julia/blob/master/base/client.jl) calls [`Base.exec_options`](https://github.com/JuliaLang/julia/blob/master/base/client.jl)
 which calls [`jl_parse_input_line("println("Hello World!")")`](https://github.com/JuliaLang/julia/blob/master/src/ast.c)
-to create an expression object and [`Base.eval()`](@ref eval) to execute it.
+to create an expression object and [`Core.eval(Main, ex)`](@ref Core.eval) to execute the parsed expression `ex` in the module context of `Main`.
 
-## `Base.eval`
+## `Core.eval`
 
-[`Base.eval()`](@ref eval) was [mapped to `jl_f_top_eval`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c)
-by `jl_init_primitives()`.
-
-[`jl_f_top_eval()`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c) calls [`jl_toplevel_eval_in(jl_main_module, ex)`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c),
-where `ex` is the parsed expression `println("Hello World!")`.
-
-[`jl_toplevel_eval_in()`](https://github.com/JuliaLang/julia/blob/master/src/builtins.c) calls
-[`jl_toplevel_eval_flex()`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c) which
-calls [`eval()` in `interpreter.c`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c).
+[`Core.eval(Main, ex)`](@ref Core.eval) calls [`jl_toplevel_eval_in(m, ex)`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c),
+which calls [`jl_toplevel_eval_flex`](https://github.com/JuliaLang/julia/blob/master/src/toplevel.c).
+`jl_toplevel_eval_flex` implements a simple heuristic to decide whether to compile a given code thunk or run it by interpreter.
+When given `println("Hello World!")`, it would usually decide to run the code by interpreter, in which case it calls
+[`jl_interpret_toplevel_thunk`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c), which then calls
+[`eval_body`](https://github.com/JuliaLang/julia/blob/master/src/interpreter.c).
 
 The stack dump below shows how the interpreter works its way through various methods of [`Base.println()`](@ref)
 and [`Base.print()`](@ref) before arriving at [`write(s::IO, a::Array{T}) where T`](https://github.com/JuliaLang/julia/blob/master/base/stream.jl)
  which does `ccall(jl_uv_write())`.
 
 [`jl_uv_write()`](https://github.com/JuliaLang/julia/blob/master/src/jl_uv.c) calls `uv_write()`
-to write "Hello World!" to `JL_STDOUT`. See [Libuv wrappers for stdio](@ref).:
+to write "Hello World!" to `JL_STDOUT`. See [Libuv wrappers for stdio](@ref Libuv-wrappers-for-stdio).:
 
 ```
 Hello World!
@@ -186,33 +185,32 @@ Hello World!
 | `jl_uv_write()`                | `jl_uv.c`       | called though [`ccall`](@ref)                        |
 | `julia_write_282942`           | `stream.jl`     | function `write!(s::IO, a::Array{T}) where T`        |
 | `julia_print_284639`           | `ascii.jl`      | `print(io::IO, s::String) = (write(io, s); nothing)` |
-| `jlcall_print_284639`          |                 |                                                      |
-| `jl_apply()`                   | `julia.h`       |                                                      |
-| `jl_trampoline()`              | `builtins.c`    |                                                      |
-| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jlcall_print_284639`          |                 |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_trampoline()`              | `builtins.c`    |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
 | `jl_apply_generic()`           | `gf.c`          | `Base.print(Base.TTY, String)`                       |
-| `jl_apply()`                   | `julia.h`       |                                                      |
-| `jl_trampoline()`              | `builtins.c`    |                                                      |
-| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_trampoline()`              | `builtins.c`    |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
 | `jl_apply_generic()`           | `gf.c`          | `Base.print(Base.TTY, String, Char, Char...)`        |
-| `jl_apply()`                   | `julia.h`       |                                                      |
-| `jl_f_apply()`                 | `builtins.c`    |                                                      |
-| `jl_apply()`                   | `julia.h`       |                                                      |
-| `jl_trampoline()`              | `builtins.c`    |                                                      |
-| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_f_apply()`                 | `builtins.c`    |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_trampoline()`              | `builtins.c`    |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
 | `jl_apply_generic()`           | `gf.c`          | `Base.println(Base.TTY, String, String...)`          |
-| `jl_apply()`                   | `julia.h`       |                                                      |
-| `jl_trampoline()`              | `builtins.c`    |                                                      |
-| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
+| `jl_trampoline()`              | `builtins.c`    |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
 | `jl_apply_generic()`           | `gf.c`          | `Base.println(String,)`                              |
-| `jl_apply()`                   | `julia.h`       |                                                      |
-| `do_call()`                    | `interpreter.c` |                                                      |
-| `eval()`                       | `interpreter.c` |                                                      |
-| `jl_interpret_toplevel_expr()` | `interpreter.c` |                                                      |
-| `jl_toplevel_eval_flex()`      | `toplevel.c`    |                                                      |
-| `jl_toplevel_eval()`           | `toplevel.c`    |                                                      |
-| `jl_toplevel_eval_in()`        | `builtins.c`    |                                                      |
-| `jl_f_top_eval()`              | `builtins.c`    |                                                      |
+| `jl_apply()`                   | `julia.h`       |                                                      |
+| `do_call()`                    | `interpreter.c` |                                                      |
+| `eval_body()`                  | `interpreter.c` |                                                      |
+| `jl_interpret_toplevel_thunk`  | `interpreter.c` |                                                      |
+| `jl_toplevel_eval_flex`        | `toplevel.c`    |                                                      |
+| `jl_toplevel_eval_in`          | `toplevel.c`    |                                                      |
+| `Core.eval`                    | `boot.jl`       |                                                      |
 
 Since our example has just one function call, which has done its job of printing "Hello World!",
 the stack now rapidly unwinds back to `main()`.
diff --git a/doc/src/devdocs/llvm.md b/doc/src/devdocs/llvm.md
index 45ecb918917640..840822f1360045 100644
--- a/doc/src/devdocs/llvm.md
+++ b/doc/src/devdocs/llvm.md
@@ -28,13 +28,13 @@ The difference between an intrinsic and a builtin is that a builtin is a first c
 that can be used like any other Julia function.  An intrinsic can operate only on unboxed data,
 and therefore its arguments must be statically typed.
 
-### Alias Analysis
+### [Alias Analysis](@id LLVM-Alias-Analysis)
 
-Julia currently uses LLVM's [Type Based Alias Analysis](http://llvm.org/docs/LangRef.html#tbaa-metadata).
+Julia currently uses LLVM's [Type Based Alias Analysis](https://llvm.org/docs/LangRef.html#tbaa-metadata).
 To find the comments that document the inclusion relationships, look for `static MDNode*` in
 `src/codegen.cpp`.
 
-The `-O` option enables LLVM's [Basic Alias Analysis](http://llvm.org/docs/AliasAnalysis.html#the-basicaa-pass).
+The `-O` option enables LLVM's [Basic Alias Analysis](https://llvm.org/docs/AliasAnalysis.html#the-basic-aa-pass).
 
 ## Building Julia with a different version of LLVM
 
@@ -42,11 +42,11 @@ The default version of LLVM is specified in `deps/Versions.make`. You can overri
 a file called `Make.user` in the top-level directory and adding a line to it such as:
 
 ```
-LLVM_VER = 6.0.1
+LLVM_VER = 13.0.0
 ```
 
-Besides the LLVM release numerals, you can also use `LLVM_VER = svn` to build against the latest
-development version of LLVM.
+Besides the LLVM release numerals, you can also use `DEPS_GIT = llvm` in combination with
+`USE_BINARYBUILDER_LLVM = 0` to build against the latest development version of LLVM.
 
 You can also specify to build a debug version of LLVM, by setting either `LLVM_DEBUG = 1` or
 `LLVM_DEBUG = Release` in your `Make.user` file. The former will be a fully unoptimized build
@@ -60,8 +60,8 @@ implies that option by default.
 You can pass options to LLVM via the environment variable `JULIA_LLVM_ARGS`.
 Here are example settings using `bash` syntax:
 
-  * `export JULIA_LLVM_ARGS = -print-after-all` dumps IR after each pass.
-  * `export JULIA_LLVM_ARGS = -debug-only=loop-vectorize` dumps LLVM `DEBUG(...)` diagnostics for
+  * `export JULIA_LLVM_ARGS=-print-after-all` dumps IR after each pass.
+  * `export JULIA_LLVM_ARGS=-debug-only=loop-vectorize` dumps LLVM `DEBUG(...)` diagnostics for
     loop vectorizer. If you get warnings about "Unknown command line argument", rebuild LLVM with
     `LLVM_ASSERTIONS = 1`.
 
@@ -79,18 +79,12 @@ environment. In addition, it exposes the `-julia` meta-pass, which runs the
 entire Julia pass-pipeline over the IR. As an example, to generate a system
 image, one could do:
 ```
-opt -load libjulia.so -julia -o opt.bc unopt.bc
+opt -enable-new-pm=0 -load libjulia-codegen.so -julia -o opt.bc unopt.bc
 llc -o sys.o opt.bc
 cc -shared -o sys.so sys.o
 ```
 This system image can then be loaded by `julia` as usual.
 
-Alternatively, you can
-use `--output-jit-bc jit.bc` to obtain a trace of all IR passed to the JIT.
-This is useful for code that cannot be run as part of the sysimg generation
-process (e.g. because it creates unserializable state). However, the resulting
-`jit.bc` does not include sysimage data, and can thus not be used as such.
-
 It is also possible to dump an LLVM IR module for just one Julia function,
 using:
 ```julia
@@ -108,12 +102,12 @@ above.
 Improving LLVM code generation usually involves either changing Julia lowering to be more friendly
 to LLVM's passes, or improving a pass.
 
-If you are planning to improve a pass, be sure to read the [LLVM developer policy](http://llvm.org/docs/DeveloperPolicy.html).
+If you are planning to improve a pass, be sure to read the [LLVM developer policy](https://llvm.org/docs/DeveloperPolicy.html).
 The best strategy is to create a code example in a form where you can use LLVM's `opt` tool to
 study it and the pass of interest in isolation.
 
 1. Create an example Julia code of interest.
-2. Use `JULIA_LLVM_ARGS = -print-after-all` to dump the IR.
+2. Use `JULIA_LLVM_ARGS=-print-after-all` to dump the IR.
 3. Pick out the IR at the point just before the pass of interest runs.
 4. Strip the debug metadata and fix up the TBAA metadata by hand.
 
diff --git a/doc/src/devdocs/locks.md b/doc/src/devdocs/locks.md
index 7591f0df2d3566..59dac6ad794983 100644
--- a/doc/src/devdocs/locks.md
+++ b/doc/src/devdocs/locks.md
@@ -1,4 +1,4 @@
-# Proper maintenance and care of multi-threading locks
+# [Proper maintenance and care of multi-threading locks](@id Proper-maintenance-and-care-of-multi-threading-locks)
 
 The following strategies are used to ensure that the code is dead-lock free (generally by addressing
 the 4th Coffman condition: circular wait).
@@ -28,8 +28,12 @@ The following are definitely leaf locks (level 1), and must not try to acquire a
 >   * gc_perm_lock
 >   * flisp
 >   * jl_in_stackwalk (Win32)
+>   * ResourcePool<?>::mutex
+>   * RLST_mutex
+>   * jl_locked_stream::mutex
 >
 >     > flisp itself is already threadsafe, this lock only protects the `jl_ast_context_list_t` pool
+>     > likewise, the ResourcePool<?>::mutexes just protect the associated resource pool
 
 The following is a leaf lock (level 2), and only acquires level 1 locks (safepoint) internally:
 
@@ -46,6 +50,11 @@ The following is a level 4 lock, which can only recurse to acquire level 1, 2, o
 
 No Julia code may be called while holding a lock above this point.
 
+orc::ThreadSafeContext locks occupy a special spot in the locking diagram. They are used to protect
+LLVM's global non-threadsafe state, but there may be an arbitrary number of them. For now, there is
+only one global context, and thus acquiring it is a level 5 lock. However, acquiring such a lock
+should only be done at the same time that the codegen lock is acquired.
+
 The following are a level 6 lock, which can only recurse to acquire locks at lower levels:
 
 >   * codegen
diff --git a/doc/src/devdocs/object.md b/doc/src/devdocs/object.md
index 8cba7c8ba45008..cf377c052bf154 100644
--- a/doc/src/devdocs/object.md
+++ b/doc/src/devdocs/object.md
@@ -199,4 +199,3 @@ objects.
     0 bytes, and consist only of their metadata. e.g. `nothing::Nothing`.
 
     See [Singleton Types](@ref man-singleton-types) and [Nothingness and missing values](@ref)
-
diff --git a/doc/src/devdocs/offset-arrays.md b/doc/src/devdocs/offset-arrays.md
index 2a562049a263c0..cc647eb1bd4641 100644
--- a/doc/src/devdocs/offset-arrays.md
+++ b/doc/src/devdocs/offset-arrays.md
@@ -56,7 +56,7 @@ the cause try running julia with the option `--check-bounds=yes`.)
 
 ### Using `axes` for bounds checks and loop iteration
 
-`axes(A)` (reminiscent of `size(A)`) returns a tuple of `AbstractUnitRange` objects, specifying
+`axes(A)` (reminiscent of `size(A)`) returns a tuple of `AbstractUnitRange{<:Integer}` objects, specifying
 the range of valid indices along each dimension of `A`.  When `A` has unconventional indexing,
 the ranges may not start at 1.  If you just want the range for a particular dimension `d`, there
 is `axes(A, d)`.
diff --git a/doc/src/devdocs/probes.md b/doc/src/devdocs/probes.md
new file mode 100644
index 00000000000000..5cfd9f6a762f84
--- /dev/null
+++ b/doc/src/devdocs/probes.md
@@ -0,0 +1,326 @@
+# Instrumenting Julia with DTrace, and bpftrace
+
+DTrace and bpftrace are tools that enable lightweight instrumentation of processes.
+You can turn the instrumentation on and off while the process is running,
+and with instrumentation off the overhead is minimal.
+
+!!! compat "Julia 1.8"
+    Support for probes was added in Julia 1.8
+
+!!! note
+    This documentation has been written from a Linux perspective, most of this
+    should hold on Mac OS/Darwin and FreeBSD.
+
+## Enabling support
+
+On Linux install the `systemtap` package that has a version of `dtrace` and create a `Make.user` file containing
+
+```
+WITH_DTRACE=1
+```
+
+to enable USDT probes.
+
+### Verifying
+
+```
+> readelf -n usr/lib/libjulia-internal.so.1
+
+Displaying notes found in: .note.gnu.build-id
+  Owner                Data size 	Description
+  GNU                  0x00000014	NT_GNU_BUILD_ID (unique build ID bitstring)
+    Build ID: 57161002f35548772a87418d2385c284ceb3ead8
+
+Displaying notes found in: .note.stapsdt
+  Owner                Data size 	Description
+  stapsdt              0x00000029	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__begin
+    Location: 0x000000000013213e, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cac
+    Arguments:
+  stapsdt              0x00000032	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__stop_the_world
+    Location: 0x0000000000132144, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cae
+    Arguments:
+  stapsdt              0x00000027	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__end
+    Location: 0x000000000013214a, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb0
+    Arguments:
+  stapsdt              0x0000002d	NT_STAPSDT (SystemTap probe descriptors)
+    Provider: julia
+    Name: gc__finalizer
+    Location: 0x0000000000132150, Base: 0x00000000002bb4da, Semaphore: 0x0000000000346cb2
+    Arguments:
+```
+
+## Adding probes in libjulia
+
+Probes are declared in dtraces format in the file `src/uprobes.d`. The generated
+header file is included in `src/julia_internal.h` and if you add probes you should
+provide a noop implementation there.
+
+The header will contain a semaphore `*_ENABLED` and the actual call to the probe.
+If the probe arguments are expensive to compute you should first check if the
+probe is enabled and then compute the arguments and call the probe.
+
+```c
+  if (JL_PROBE_{PROBE}_ENABLED())
+    auto expensive_arg = ...;
+    JL_PROBE_{PROBE}(expensive_arg);
+```
+
+If your probe has no arguments it is preferred to not include the semaphore check.
+With USDT probes enabled the cost of a semaphore is a memory load, irrespective of
+the fact that the probe is enabled or not.
+
+```c
+#define JL_PROBE_GC_BEGIN_ENABLED() __builtin_expect (julia_gc__begin_semaphore, 0)
+__extension__ extern unsigned short julia_gc__begin_semaphore __attribute__ ((unused)) __attribute__ ((section (".probes")));
+```
+
+Whereas the probe itself is a noop sled that will be patched to a trampoline to
+the probe handler.
+
+## Available probes
+
+### GC probes
+
+1. `julia:gc__begin`: GC begins running on one thread and triggers stop-the-world.
+2. `julia:gc__stop_the_world`: All threads have reached a safepoint and GC runs.
+3. `julia:gc__mark__begin`: Beginning the mark phase
+4. `julia:gc__mark_end(scanned_bytes, perm_scanned)`: Mark phase ended
+5. `julia:gc__sweep_begin(full)`: Starting sweep
+6. `julia:gc__sweep_end`: Sweep phase finished
+7. `julia:gc__end`: GC is finished, other threads continue work
+8. `julia:gc__finalizer`: Initial GC thread has finished running finalizers
+
+### Task runtime probes
+
+1. `julia:rt__run__task(task)`: Switching to task `task` on current thread.
+2. `julia:rt__pause__task(task)`: Switching from task `task` on current thread.
+3. `julia:rt__new__task(parent, child)`: Task `parent` created task `child` on current thread.
+4. `julia:rt__start__task(task)`: Task `task` started for the first time with a new stack.
+5. `julia:rt__finish__task(task)`: Task `task` finished and will no longer execute.
+6. `julia:rt__start__process__events(task)`: Task `task` started processing libuv events.
+7. `julia:rt__finish__process__events(task)`: Task `task` finished processing libuv events.
+
+### Task queue probes
+
+1. `julia:rt__taskq__insert(ptls, task)`: Thread `ptls` attempted to insert `task` into a PARTR multiq.
+2. `julia:rt__taskq__get(ptls, task)`: Thread `ptls` popped `task` from a PARTR multiq.
+
+### Thread sleep/wake probes
+
+1. `julia:rt__sleep__check__wake(ptls, old_state)`: Thread (PTLS `ptls`) waking up, previously in state `old_state`.
+2. `julia:rt__sleep__check__wakeup(ptls)`: Thread (PTLS `ptls`) woke itself up.
+3. `julia:rt__sleep__check__sleep(ptls)`: Thread (PTLS `ptls`) is attempting to sleep.
+4. `julia:rt__sleep__check__taskq__wake(ptls)`: Thread (PTLS `ptls`) fails to sleep due to tasks in PARTR multiq.
+5. `julia:rt__sleep__check__task__wake(ptls)`: Thread (PTLS `ptls`) fails to sleep due to tasks in Base workqueue.
+6. `julia:rt__sleep__check__uv__wake(ptls)`: Thread (PTLS `ptls`) fails to sleep due to libuv wakeup.
+
+## Probe usage examples
+
+### GC stop-the-world latency
+
+An example `bpftrace` script is given in `contrib/gc_stop_the_world_latency.bt`
+and it creates a histogram of the latency for all threads to reach a safepoint.
+
+Running this Julia code, with `julia -t 2`
+
+```
+using Base.Threads
+
+fib(x) = x <= 1 ? 1 : fib(x-1) + fib(x-2)
+
+beaver = @spawn begin
+    while true
+        fib(30)
+        # This safepoint is necessary until #41616, since otherwise this
+        # loop will never yield to GC.
+        GC.safepoint()
+    end
+end
+
+allocator = @spawn begin
+    while true
+        zeros(1024)
+    end
+end
+
+wait(allocator)
+```
+
+and in a second terminal
+
+```
+> sudo contrib/bpftrace/gc_stop_the_world_latency.bt
+Attaching 4 probes...
+Tracing Julia GC Stop-The-World Latency... Hit Ctrl-C to end.
+^C
+
+
+@usecs[1743412]:
+[4, 8)               971 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@|
+[8, 16)              837 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@        |
+[16, 32)             129 |@@@@@@                                              |
+[32, 64)              10 |                                                    |
+[64, 128)              1 |                                                    |
+```
+
+We can see the latency distribution of the stop-the-world phase in the executed Julia process.
+
+### Task spawn monitor
+
+It's sometimes useful to know when a task is spawning other tasks. This is very
+easy to see with `rt__new__task`. The first argument to the probe, `parent`, is
+the existing task which is creating a new task. This means that if you know the
+address of the task you want to monitor, you can easily just look at the tasks
+that that specific task spawned. Let's see how to do this; first let's start a
+Julia session and get the PID and REPL's task address:
+
+```
+> julia
+               _
+   _       _ _(_)_     |  Documentation: https://docs.julialang.org
+  (_)     | (_) (_)    |
+   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
+  | | | | | | |/ _` |  |
+  | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+|__/                   |
+
+1> getpid()
+997825
+
+2> current_task()
+Task (runnable) @0x00007f524d088010
+```
+
+Now we can start `bpftrace` and have it monitor `rt__new__task` for *only* this parent:
+
+`sudo bpftrace -p 997825 -e 'usdt:usr/lib/libjulia-internal.so:julia:rt__new__task /arg0==0x00007f524d088010/{ printf("Task: %x\n", arg0); }'`
+
+(Note that in the above, `arg0` is the first argument, `parent`).
+
+And if we spawn a single task:
+
+`@async 1+1`
+
+we see this task being created:
+
+`Task: 4d088010`
+
+However, if we spawn a bunch of tasks from that newly-spawned task:
+
+```julia
+@async for i in 1:10
+   @async 1+1
+end
+```
+
+we still only see one task from `bpftrace`:
+
+`Task: 4d088010`
+
+and it's still the same task we were monitoring! Of course, we can remove this
+filter to see *all* newly-created tasks just as easily:
+
+`sudo bpftrace -p 997825 -e 'usdt:usr/lib/libjulia-internal.so:julia:rt__new__task { printf("Task: %x\n", arg0); }'`
+
+```
+Task: 4d088010
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+Task: 4dc4e290
+```
+
+We can see our root task, and the newly-spawned task as the parent of the ten
+even newer tasks.
+
+### Thundering herd detection
+
+Task runtimes can often suffer from the "thundering herd" problem: when some
+work is added to a quiet task runtime, all threads may be woken up from their
+slumber, even if there isn't enough work for each thread to process. This can
+cause extra latency and CPU cycles while all threads awaken (and simultaneously
+go back to sleep, not finding any work to execute).
+
+We can see this problem illustrated with `bpftrace` quite easily. First, in one terminal we start Julia with multiple threads (6 in this example), and get the PID of that process:
+
+```
+> julia -t 6
+               _
+   _       _ _(_)_     |  Documentation: https://docs.julialang.org
+  (_)     | (_) (_)    |
+   _ _   _| |_  __ _   |  Type "?" for help, "]?" for Pkg help.
+  | | | | | | |/ _` |  |
+  | | |_| | | | (_| |  |  Version 1.6.2 (2021-07-14)
+ _/ |\__'_|_|_|\__'_|  |  Official https://julialang.org/ release
+|__/                   |
+
+1> getpid()
+997825
+```
+
+And in another terminal we start `bpftrace` monitoring our process,
+specifically probing the `rt__sleep__check__wake` hook:
+
+`sudo bpftrace -p 997825 -e 'usdt:usr/lib/libjulia-internal.so:julia:rt__sleep__check__wake { printf("Thread wake up! %x\n", arg0); }'`
+
+Now, we create and execute a single task in Julia:
+
+`Threads.@spawn 1+1`
+
+And in `bpftrace` we see printed out something like:
+
+```
+Thread wake up! 3f926100
+Thread wake up! 3ebd5140
+Thread wake up! 3f876130
+Thread wake up! 3e2711a0
+Thread wake up! 3e312190
+```
+
+Even though we only spawned a single task (which only one thread could process
+at a time), we woke up all of our other threads! In the future, a smarter task
+runtime might only wake up a single thread (or none at all; the spawning thread
+could execute this task!), and we should see this behavior go away.
+
+### Task Monitor with BPFnative.jl
+
+BPFnative.jl is able to attach to USDT probe points just like `bpftrace`. There
+is a demo available for monitoring the task runtime, GC, and thread sleep/wake
+transitions [here](https://github.com/jpsamaroo/BPFnative.jl/blob/master/examples/task-runtime.jl).
+
+## Notes on using `bpftrace`
+
+An example probe in the bpftrace format looks like:
+
+```
+usdt:usr/lib/libjulia-internal.so:julia:gc__begin
+{
+	@start[pid] = nsecs;
+}
+```
+
+The probe declaration takes the kind `usdt`, then either the
+path to the library or the PID, the provider name `julia`
+and the probe name `gc__begin`. Note that I am using a
+relative path to the `libjulia-internal.so`, but this might
+need to be an absolute path on a production system.
+
+## Useful references:
+
+- [Julia Evans blog on Linux tracing systems](https://jvns.ca/blog/2017/07/05/linux-tracing-systems)
+- [LWN article on USDT and BPF](https://lwn.net/Articles/753601/)
+- [GDB support for probes](https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html)
+- [Brendan Gregg -- Linux Performance](https://www.brendangregg.com/linuxperf.html)
diff --git a/doc/src/devdocs/reflection.md b/doc/src/devdocs/reflection.md
index fbf0fd58d86a44..8ffe305a0d7240 100644
--- a/doc/src/devdocs/reflection.md
+++ b/doc/src/devdocs/reflection.md
@@ -65,7 +65,7 @@ recursive application of [`subtypes`](@ref) may be used to inspect the full type
 ## DataType layout
 
 The internal representation of a `DataType` is critically important when interfacing with C code
-and several functions are available to inspect these details. [`isbits(T::DataType)`](@ref) returns
+and several functions are available to inspect these details. [`isbitstype(T::DataType)`](@ref) returns
 true if `T` is stored with C-compatible alignment. [`fieldoffset(T::DataType, i::Integer)`](@ref)
 returns the (byte) offset for field *i* relative to the start of the type.
 
@@ -96,7 +96,7 @@ as assignments, branches, and calls:
 ```jldoctest
 julia> Meta.lower(@__MODULE__, :( [1+2, sin(0.5)] ))
 :($(Expr(:thunk, CodeInfo(
-    @ none within `top-level scope'
+    @ none within `top-level scope`
 1 ─ %1 = 1 + 2
 │   %2 = sin(0.5)
 │   %3 = Base.vect(%1, %2)
@@ -122,11 +122,12 @@ calls and expand argument types automatically:
 
 ```julia-repl
 julia> @code_llvm +(1,1)
-
-define i64 @"julia_+_130862"(i64, i64) {
+;  @ int.jl:87 within `+`
+; Function Attrs: sspstrong uwtable
+define i64 @"julia_+_476"(i64 signext %0, i64 signext %1) #0 {
 top:
-    %2 = add i64 %1, %0
-    ret i64 %2
+  %2 = add i64 %1, %0
+  ret i64 %2
 }
 ```
 
@@ -138,7 +139,7 @@ For more informations see [`@code_lowered`](@ref), [`@code_typed`](@ref), [`@cod
 The aforementioned functions and macros take the keyword argument `debuginfo` that controls the level
 debug information printed.
 
-```
+```julia-repl
 julia> @code_typed debuginfo=:source +(1,1)
 CodeInfo(
     @ int.jl:53 within `+'
@@ -147,7 +148,6 @@ CodeInfo(
 ) => Int64
 ```
 
-Possible values for `debuginfo` are: `:none`, `:source`, and`:default`.
+Possible values for `debuginfo` are: `:none`, `:source`, and `:default`.
 Per default debug information is not printed, but that can be changed
 by setting `Base.IRShow.default_debuginfo[] = :source`.
-
diff --git a/doc/src/devdocs/sanitizers.md b/doc/src/devdocs/sanitizers.md
index 4bffa61e2a5dcf..5eaf4b45d9f57f 100644
--- a/doc/src/devdocs/sanitizers.md
+++ b/doc/src/devdocs/sanitizers.md
@@ -1,16 +1,41 @@
 # Sanitizer support
 
+[Sanitizers](https://github.com/google/sanitizers) can be used in custom Julia builds to make it
+easier to detect certain kinds of errors in Julia's internal C/C++ code.
+
+## Address Sanitizer: easy build
+
+From a source-checkout of Julia, you should be able to build a version
+supporting address sanitization in Julia and LLVM as follows:
+
+```sh
+$ mkdir /tmp/julia
+$ contrib/asan/build.sh /tmp/julia/
+```
+
+Here we've chosen `/tmp/julia` as a build directory, but you can
+choose whatever you wish. Once built, run the workload you wish to
+test with `/tmp/julia/julia`. Memory bugs will result in errors.
+
+If you require customization or further detail, see the documentation below.
+
 ## General considerations
 
-Using Clang's sanitizers obviously require you to use Clang (`USECLANG=1`), but there's another
+Using Clang's sanitizers obviously requires you to use Clang (`USECLANG=1`), but there's another
 catch: most sanitizers require a run-time library, provided by the host compiler, while the instrumented
 code generated by Julia's JIT relies on functionality from that library. This implies that the
-LLVM version of your host compiler matches that of the LLVM library used within Julia.
+LLVM version of your host compiler must match that of the LLVM library used within Julia.
 
-An easy solution is to have an dedicated build folder for providing a matching toolchain, by building
+An easy solution is to have a dedicated build folder for providing a matching toolchain, by building
 with `BUILD_LLVM_CLANG=1`. You can then refer to this toolchain from another build
 folder by specifying `USECLANG=1` while overriding the `CC` and `CXX` variables.
 
+The sanitizers error out when they detect a shared library being opened using `RTLD_DEEPBIND`
+(ref: [google/sanitizers#611](https://github.com/google/sanitizers/issues/611)).
+Since [libblastrampoline](https://github.com/staticfloat/libblastrampoline) by default
+uses `RTLD_DEEPBIND`, we need to set the environment variable `LBT_USE_RTLD_DEEPBIND=0`
+when using a sanitizer.
+
 To use one of of the sanitizers set `SANITIZE=1` and then the appropriate flag for the sanitizer you
 want to use.
 
@@ -29,11 +54,11 @@ look like this, plus one or more of the `SANITIZE_*` flags listed below:
 
 ## Address Sanitizer (ASAN)
 
-For detecting or debugging memory bugs, you can use Clang's [address sanitizer (ASAN)](http://clang.llvm.org/docs/AddressSanitizer.html).
+For detecting or debugging memory bugs, you can use Clang's [address sanitizer (ASAN)](https://clang.llvm.org/docs/AddressSanitizer.html).
 By compiling with `SANITIZE_ADDRESS=1` you enable ASAN for the Julia compiler and its generated code.
 In addition, you can specify `LLVM_SANITIZE=1` to sanitize the LLVM library as well. Note that
 these options incur a high performance and memory cost. For example, using ASAN for Julia and
-LLVM makes `testall1` takes 8-10 times as long while using 20 times as much memory (this can be
+LLVM makes `testall1` take 8-10 times as long while using 20 times as much memory (this can be
 reduced to respectively a factor of 3 and 4 by using the options described below).
 
 By default, Julia sets the `allow_user_segv_handler=1` ASAN flag, which is required for signal
@@ -43,9 +68,74 @@ can be reduced by specifying `fast_unwind_on_malloc=0` and `malloc_context_size=
 of backtrace accuracy. For now, Julia also sets `detect_leaks=0`, but this should be removed in
 the future.
 
+### Example setup
+
+#### Step 1: Install toolchain
+
+Checkout a Git worktree (or create out-of-tree build directory) at
+`$TOOLCHAIN_WORKTREE` and create a config file `$TOOLCHAIN_WORKTREE/Make.user`
+with
+
+```
+USE_BINARYBUILDER_LLVM=1
+BUILD_LLVM_CLANG=1
+```
+
+Run:
+
+```sh
+cd $TOOLCHAIN_WORKTREE
+make -C deps install-llvm install-clang install-llvm-tools
+```
+
+to install toolchain binaries in `$TOOLCHAIN_WORKTREE/usr/tools`
+
+#### Step 2: Build Julia with ASAN
+
+Checkout a Git worktree (or create out-of-tree build directory) at
+`$BUILD_WORKTREE` and create a config file `$BUILD_WORKTREE/Make.user` with
+
+```
+TOOLCHAIN=$(TOOLCHAIN_WORKTREE)/usr/tools
+
+# use our new toolchain
+USECLANG=1
+override CC=$(TOOLCHAIN)/clang
+override CXX=$(TOOLCHAIN)/clang++
+export ASAN_SYMBOLIZER_PATH=$(TOOLCHAIN)/llvm-symbolizer
+
+USE_BINARYBUILDER_LLVM=1
+
+override SANITIZE=1
+override SANITIZE_ADDRESS=1
+
+# make the GC use regular malloc/frees, which are hooked by ASAN
+override WITH_GC_DEBUG_ENV=1
+
+# default to a debug build for better line number reporting
+override JULIA_BUILD_MODE=debug
+
+# make ASAN consume less memory
+export ASAN_OPTIONS=detect_leaks=0:fast_unwind_on_malloc=0:allow_user_segv_handler=1:malloc_context_size=2
+
+JULIA_PRECOMPILE=1
+
+# tell libblastrampoline to not use RTLD_DEEPBIND
+export LBT_USE_RTLD_DEEPBIND=0
+```
+
+Run:
+
+```sh
+cd $BUILD_WORKTREE
+make debug
+```
+
+to build `julia-debug` with ASAN.
+
 ## Memory Sanitizer (MSAN)
 
-For detecting use of uninitialized memory, you can use Clang's [memory sanitizer (MSAN)](http://clang.llvm.org/docs/MemorySanitizer.html)
+For detecting use of uninitialized memory, you can use Clang's [memory sanitizer (MSAN)](https://clang.llvm.org/docs/MemorySanitizer.html)
 by compiling with `SANITIZE_MEMORY=1`.
 
 ## Thread Sanitizer (TSAN)
diff --git a/doc/src/devdocs/ssair.md b/doc/src/devdocs/ssair.md
index d0ad27b833301f..84f5b8c0838d9a 100644
--- a/doc/src/devdocs/ssair.md
+++ b/doc/src/devdocs/ssair.md
@@ -3,12 +3,12 @@
 ## Background
 
 Beginning in Julia 0.7, parts of the compiler use a new [SSA-form](https://en.wikipedia.org/wiki/Static_single_assignment_form)
-intermediate representation. Historically, the compiler used to directly generate LLVM IR, from a lowered form of the Julia
+intermediate representation. Historically, the compiler would directly generate LLVM IR from a lowered form of the Julia
 AST. This form had most syntactic abstractions removed, but still looked a lot like an abstract syntax tree.
 Over time, in order to facilitate optimizations, SSA values were introduced to this IR and the IR was
-linearized (i.e. a form where function arguments may only be SSA values or constants). However, non-SSA values
+linearized (i.e. turned into a form where function arguments could only be SSA values or constants). However, non-SSA values
 (slots) remained in the IR due to the lack of Phi nodes in the IR (necessary for back-edges and re-merging of
-conditional control flow), negating much of the usefulness of the SSA form representation to perform
+conditional control flow). This negated much of the usefulness of SSA form representation when performing
 middle end optimizations. Some heroic effort was put into making these optimizations work without a complete SSA
 form representation, but the lack of such a representation ultimately proved prohibitive.
 
@@ -23,7 +23,7 @@ Phi nodes are part of generic SSA abstraction (see the link above if you're not
 the concept). In the Julia IR, these nodes are represented as:
 ```
 struct PhiNode
-    edges::Vector{Int}
+    edges::Vector{Int32}
     values::Vector{Any}
 end
 ```
@@ -74,7 +74,7 @@ that is generally done for most optimizations that care about these conditions a
 
 Exception handling complicates the SSA story moderately, because exception handling
 introduces additional control flow edges into the IR across which values must be tracked.
-One approach to do so, which is followed by LLVM is to make calls which may throw exceptions
+One approach to do so, which is followed by LLVM, is to make calls which may throw exceptions
 into basic block terminators and add an explicit control flow edge to the catch handler:
 
 ```
@@ -87,16 +87,16 @@ catch:
 # Exceptions go here
 ```
 
-However, this is problematic in a language like julia where at the start of the optimization
+However, this is problematic in a language like Julia, where at the start of the optimization
 pipeline, we do not know which calls throw. We would have to conservatively assume that every
-call (which in julia is every statement) throws. This would have several negative effects.
+call (which in Julia is every statement) throws. This would have several negative effects.
 On the one hand, it would essentially reduce the scope of every basic block to a single call,
 defeating the purpose of having operations be performed at the basic block level. On the other
 hand, every catch basic block would have `n*m` phi node arguments (`n`, the number of statements
-in the critical region, `m` the number of live values through the catch block). To work around
-this, we use a combination of `Upsilon` and `PhiC` (the C standing for `catch`,
-written `φᶜ` in the IR pretty printer, because
-unicode subscript c is not available) nodes. There are several ways to think of these nodes, but
+in the critical region, `m` the number of live values through the catch block).
+
+To work around this, we use a combination of `Upsilon` and `PhiC` nodes (the C standing for `catch`,
+written `φᶜ` in the IR pretty printer, because unicode subscript c is not available). There are several ways to think of these nodes, but
 perhaps the easiest is to think of each `PhiC` as a load from a unique store-many, read-once slot,
 with `Upsilon` being the corresponding store operation. The `PhiC` has an operand list of all the
 upsilon nodes that store to its implicit slot. The `Upsilon` nodes however, do not record which `PhiC`
diff --git a/doc/src/devdocs/stdio.md b/doc/src/devdocs/stdio.md
index d17f7abc06bb9f..5ee4f0206ee0b0 100644
--- a/doc/src/devdocs/stdio.md
+++ b/doc/src/devdocs/stdio.md
@@ -1,8 +1,8 @@
 # printf() and stdio in the Julia runtime
 
-## Libuv wrappers for stdio
+## [Libuv wrappers for stdio](@id Libuv-wrappers-for-stdio)
 
-`julia.h` defines [libuv](http://docs.libuv.org) wrappers for the `stdio.h` streams:
+`julia.h` defines [libuv](https://docs.libuv.org) wrappers for the `stdio.h` streams:
 
 ```c
 uv_stream_t *JL_STDIN;
@@ -17,7 +17,7 @@ int jl_printf(uv_stream_t *s, const char *format, ...);
 int jl_vprintf(uv_stream_t *s, const char *format, va_list args);
 ```
 
-These `printf` functions are used by the `.c` files in the `src/` and `ui/` directories wherever stdio is
+These `printf` functions are used by the `.c` files in the `src/` and `cli/` directories wherever stdio is
 needed to ensure that output buffering is handled in a unified way.
 
 In special cases, like signal handlers, where the full libuv infrastructure is too heavy, `jl_safe_printf()`
@@ -74,7 +74,7 @@ In `jl_uv.c` the `jl_uv_puts()` function checks its `uv_stream_t* stream` argume
 This allows for uniform use of `jl_printf()` throughout the runtime regardless of whether or not
 any particular piece of code is reachable before initialization is complete.
 
-## Legacy `ios.c` library
+## [Legacy `ios.c` library](@id Legacy-ios.c-library)
 
 The `src/support/ios.c` library is inherited from [femtolisp](https://github.com/JeffBezanson/femtolisp).
 It provides cross-platform buffered file IO and in-memory temporary buffers.
diff --git a/doc/src/devdocs/subarrays.md b/doc/src/devdocs/subarrays.md
index 8ebc773812131c..cec7a64a652452 100644
--- a/doc/src/devdocs/subarrays.md
+++ b/doc/src/devdocs/subarrays.md
@@ -19,14 +19,14 @@ julia> A = rand(2,3,4);
 
 julia> S1 = view(A, :, 1, 2:3)
 2×2 view(::Array{Float64, 3}, :, 1, 2:3) with eltype Float64:
- 0.200586  0.066423
- 0.298614  0.956753
+ 0.839622  0.711389
+ 0.967143  0.103929
 
 julia> S2 = view(A, 1, :, 2:3)
 3×2 view(::Array{Float64, 3}, 1, :, 2:3) with eltype Float64:
- 0.200586  0.066423
- 0.246837  0.646691
- 0.648882  0.276021
+ 0.839622  0.711389
+ 0.789764  0.806704
+ 0.566704  0.962715
 ```
 ```@meta
 DocTestSetup = nothing
diff --git a/doc/src/devdocs/sysimg.md b/doc/src/devdocs/sysimg.md
index 734c8a5ca4659a..5c976875846d3a 100644
--- a/doc/src/devdocs/sysimg.md
+++ b/doc/src/devdocs/sysimg.md
@@ -1,6 +1,6 @@
 # System Image Building
 
-## Building the Julia system image
+## [Building the Julia system image](@id Building-the-Julia-system-image)
 
 Julia ships with a preparsed system image containing the contents of the `Base` module, named
 `sys.ji`.  This file is also precompiled into a shared library called `sys.{so,dll,dylib}` on
diff --git a/doc/src/devdocs/types.md b/doc/src/devdocs/types.md
index fa968a2767cedb..003574f99c1826 100644
--- a/doc/src/devdocs/types.md
+++ b/doc/src/devdocs/types.md
@@ -82,7 +82,7 @@ f3(A::Array{T}) where {T<:Any} = 3
 f4(A::Array{Any}) = 4
 ```
 
-The signature - as described in [Function calls](@ref) - of `f3` is a `UnionAll` type wrapping a tuple type: `Tuple{typeof(f3), Array{T}} where T`.
+The signature - as described in [Function calls](@ref Function-calls) - of `f3` is a `UnionAll` type wrapping a tuple type: `Tuple{typeof(f3), Array{T}} where T`.
 All but `f4` can be called with `a = [1,2]`; all but `f2` can be called with `b = Any[1,2]`.
 
 Let's look at these types a little more closely:
@@ -252,7 +252,7 @@ julia> Tuple
 Tuple
 
 julia> Tuple.parameters
-svec(Vararg{Any, N} where N)
+svec(Vararg{Any})
 ```
 
 Unlike other types, tuple types are covariant in their parameters, so this definition permits
@@ -367,7 +367,7 @@ Therefore in this case the diagonal rule is not really necessary, since
 the array determines `T` and we can then allow `x` and `y` to be of
 any subtypes of `T`.
 So variables that occur in invariant position are never considered diagonal.
-This choice of behavior is slightly controversial --- some feel this definition
+This choice of behavior is slightly controversial -- some feel this definition
 should be written as
 
 ```julia
@@ -418,15 +418,19 @@ whether that appearance of the variable is used or not.
 Otherwise types can behave differently depending on which other types
 they are compared to, making subtyping not transitive. For example, consider
 
+```julia
 Tuple{Int,Int8,Vector{Integer}} <: Tuple{T,T,Vector{Union{Integer,T}}} where T
+```
 
-If the `T` inside the Union is ignored, then `T` is concrete and the answer is "false"
+If the `T` inside the `Union` is ignored, then `T` is concrete and the answer is "false"
 since the first two types aren't the same.
 But consider instead
 
+```julia
 Tuple{Int,Int8,Vector{Any}} <: Tuple{T,T,Vector{Union{Integer,T}}} where T
+```
 
-Now we cannot ignore the `T` in the Union (we must have T == Any), so `T` is not
+Now we cannot ignore the `T` in the `Union` (we must have `T == Any`), so `T` is not
 concrete and the answer is "true".
 That would make the concreteness of `T` depend on the other type, which is not
 acceptable since a type must have a clear meaning on its own.
@@ -474,10 +478,10 @@ We have not yet worked out a complete algorithm for this.
 Most operations for dealing with types are found in the files `jltypes.c` and `subtype.c`.
 A good way to start is to watch subtyping in action.
 Build Julia with `make debug` and fire up Julia within a debugger.
-[gdb debugging tips](@ref) has some tips which may be useful.
+[gdb debugging tips](@ref gdb-debugging-tips) has some tips which may be useful.
 
-Because the subtyping code is used heavily in the REPL itself--and hence breakpoints in this
-code get triggered often--it will be easiest if you make the following definition:
+Because the subtyping code is used heavily in the REPL itself -- and hence breakpoints in this
+code get triggered often -- it will be easiest if you make the following definition:
 
 ```julia-repl
 julia> function mysubtype(a,b)
diff --git a/doc/src/devdocs/valgrind.md b/doc/src/devdocs/valgrind.md
index 5b25762133b072..8a11cb411a6fd6 100644
--- a/doc/src/devdocs/valgrind.md
+++ b/doc/src/devdocs/valgrind.md
@@ -1,6 +1,6 @@
 # Using Valgrind with Julia
 
-[Valgrind](http://valgrind.org/) is a tool for memory debugging, memory leak detection, and profiling.
+[Valgrind](https://valgrind.org/) is a tool for memory debugging, memory leak detection, and profiling.
  This section describes things to keep in mind when using Valgrind to debug memory issues with
 Julia.
 
@@ -24,10 +24,13 @@ Another thing to note: if your program uses multiple workers processes, it is li
 want all such worker processes to run under Valgrind, not just the parent process.  To do this,
 pass `--trace-children=yes` to `valgrind`.
 
+Yet another thing to note: if using `valgrind` errors with `Unable to find compatible target in system image`,
+try rebuilding the sysimage with target `generic` or julia with `JULIA_CPU_TARGET=generic`.
+
 ## Suppressions
 
 Valgrind will typically display spurious warnings as it runs.  To reduce the number of such warnings,
-it helps to provide a [suppressions file](http://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
+it helps to provide a [suppressions file](https://valgrind.org/docs/manual/manual-core.html#manual-core.suppress)
 to Valgrind.  A sample suppressions file is included in the Julia source distribution at `contrib/valgrind-julia.supp`.
 
 The suppressions file can be used from the `julia/` source directory as follows:
diff --git a/doc/src/index.md b/doc/src/index.md
index e9319b18a90410..a1915395151bc7 100644
--- a/doc/src/index.md
+++ b/doc/src/index.md
@@ -49,7 +49,8 @@ slow, we highly recommend reading through the [Performance Tips](@ref man-perfor
 else. Once you understand how Julia works, it's easy to write code that's nearly as fast as C.
 
 Julia features optional typing, multiple dispatch, and good performance, achieved using type inference
-and [just-in-time (JIT) compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation),
+and [just-in-time (JIT) compilation](https://en.wikipedia.org/wiki/Just-in-time_compilation) (and
+[optional ahead-of-time compilation](https://github.com/JuliaLang/PackageCompiler.jl)),
 implemented using [LLVM](https://en.wikipedia.org/wiki/Low_Level_Virtual_Machine). It is multi-paradigm,
 combining features of imperative, functional, and object-oriented programming. Julia provides
 ease and expressiveness for high-level numerical computing, in the same way as languages such
diff --git a/doc/src/manual/arrays.md b/doc/src/manual/arrays.md
index 1db77f107aabdb..f6e43507262697 100644
--- a/doc/src/manual/arrays.md
+++ b/doc/src/manual/arrays.md
@@ -5,10 +5,11 @@ technical computing languages pay a lot of attention to their array implementati
 of other containers. Julia does not treat arrays in any special way. The array library is implemented
 almost completely in Julia itself, and derives its performance from the compiler, just like any
 other code written in Julia. As such, it's also possible to define custom array types by inheriting
-from [`AbstractArray`](@ref). See the [manual section on the AbstractArray interface](@ref man-interface-array) for more details
-on implementing a custom array type.
+from [`AbstractArray`](@ref). See the [manual section on the AbstractArray interface](@ref man-interface-array)
+for more details on implementing a custom array type.
 
-An array is a collection of objects stored in a multi-dimensional grid. In the most general case,
+An array is a collection of objects stored in a multi-dimensional grid. Zero-dimensional arrays
+are allowed, see [this FAQ entry](@ref faq-array-0dim). In the most general case,
 an array may contain objects of type [`Any`](@ref). For most computational purposes, arrays should contain
 objects of a more specific type, such as [`Float64`](@ref) or [`Int32`](@ref).
 
@@ -67,9 +68,9 @@ omitted it will default to [`Float64`](@ref).
 | [`rand(T, dims...)`](@ref)                     | an `Array` with random, iid [^1] and uniformly distributed values in the half-open interval ``[0, 1)``                                                                                                                                       |
 | [`randn(T, dims...)`](@ref)                    | an `Array` with random, iid and standard normally distributed values                                                                                                                                                                         |
 | [`Matrix{T}(I, m, n)`](@ref)                   | `m`-by-`n` identity matrix. Requires `using LinearAlgebra` for [`I`](@ref).                                                                                                                                                                                                                   |
-| [`range(start, stop=stop, length=n)`](@ref)    | range of `n` linearly spaced elements from `start` to `stop`                                                                                                                                                                                 |
+| [`range(start, stop, n)`](@ref)                | a range of `n` linearly spaced elements from `start` to `stop` |
 | [`fill!(A, x)`](@ref)                          | fill the array `A` with the value `x`                                                                                                                                                                                                        |
-| [`fill(x, dims...)`](@ref)                     | an `Array` filled with the value `x`                                                                                                                                                                                                         |
+| [`fill(x, dims...)`](@ref)                     | an `Array` filled with the value `x`. In particular, `fill(x)` constructs a zero-dimensional `Array` containing `x`. |
 
 [^1]: *iid*, independently and identically distributed.
 
@@ -95,7 +96,7 @@ Here, `(2, 3)` is a [`Tuple`](@ref) and the first argument — the element type
 ## [Array literals](@id man-array-literals)
 
 Arrays can also be directly constructed with square braces; the syntax `[A, B, C, ...]`
-creates a one dimensional array (i.e., a vector) containing the comma-separated arguments as
+creates a one-dimensional array (i.e., a vector) containing the comma-separated arguments as
 its elements. The element type ([`eltype`](@ref)) of the resulting array is automatically
 determined by the types of the arguments inside the braces. If all the arguments are the
 same type, then that is its `eltype`. If they all have a common
@@ -126,7 +127,7 @@ Any[]
 
 ### [Concatenation](@id man-array-concatenation)
 
-If the arguments inside the square brackets are separated by semicolons (`;`) or newlines
+If the arguments inside the square brackets are separated by single semicolons (`;`) or newlines
 instead of commas, then their contents are _vertically concatenated_ together instead of
 the arguments being used as elements themselves.
 
@@ -154,7 +155,7 @@ julia> [1:2
  6
 ```
 
-Similarly, if the arguments are separated by tabs or spaces, then their contents are
+Similarly, if the arguments are separated by tabs or spaces or double semicolons, then their contents are
 _horizontally concatenated_ together.
 
 ```jldoctest
@@ -171,9 +172,13 @@ julia> [[1,2]  [4,5]  [7,8]]
 julia> [1 2 3] # Numbers can also be horizontally concatenated
 1×3 Matrix{Int64}:
  1  2  3
+
+julia> [1;; 2;; 3;; 4]
+1×4 Matrix{Int64}:
+ 1  2  3  4
 ```
 
-Using semicolons (or newlines) and spaces (or tabs) can be combined to concatenate
+Single semicolons (or newlines) and spaces (or tabs) can be combined to concatenate
 both horizontally and vertically at the same time.
 
 ```jldoctest
@@ -189,17 +194,135 @@ julia> [zeros(Int, 2, 2) [1; 2]
  0  0  1
  0  0  2
  3  4  5
+
+julia> [[1 1]; 2 3; [4 4]]
+3×2 Matrix{Int64}:
+ 1  1
+ 2  3
+ 4  4
+```
+
+Spaces (and tabs) have a higher precedence than semicolons, performing any horizontal
+concatenations first and then concatenating the result. Using double semicolons for the
+horizontal concatenation, on the other hand, performs any vertical concatenations before
+horizontally concatenating the result.
+
+```jldoctest
+julia> [zeros(Int, 2, 2) ; [3 4] ;; [1; 2] ; 5]
+3×3 Matrix{Int64}:
+ 0  0  1
+ 0  0  2
+ 3  4  5
+
+julia> [1:2; 4;; 1; 3:4]
+3×2 Matrix{Int64}:
+ 1  1
+ 2  3
+ 4  4
+```
+
+Just as `;` and `;;` concatenate in the first and second dimension, using more semicolons
+extends this same general scheme. The number of semicolons in the separator specifies the
+particular dimension, so `;;;` concatenates in the third dimension, `;;;;` in the 4th, and
+so on. Fewer semicolons take precedence, so the lower dimensions are generally concatenated
+first.
+
+```jldoctest
+julia> [1; 2;; 3; 4;; 5; 6;;;
+        7; 8;; 9; 10;; 11; 12]
+2×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  3  5
+ 2  4  6
+
+[:, :, 2] =
+ 7   9  11
+ 8  10  12
+```
+
+Like before, spaces (and tabs) for horizontal concatenation have a higher precedence than
+any number of semicolons. Thus, higher-dimensional arrays can also be written by specifying
+their rows first, with their elements textually arranged in a manner similar to their layout:
+
+```jldoctest
+julia> [1 3 5
+        2 4 6;;;
+        7 9 11
+        8 10 12]
+2×3×2 Array{Int64, 3}:
+[:, :, 1] =
+ 1  3  5
+ 2  4  6
+
+[:, :, 2] =
+ 7   9  11
+ 8  10  12
+
+julia> [1 2;;; 3 4;;;; 5 6;;; 7 8]
+1×2×2×2 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 1  2
+
+[:, :, 2, 1] =
+ 3  4
+
+[:, :, 1, 2] =
+ 5  6
+
+[:, :, 2, 2] =
+ 7  8
+
+julia> [[1 2;;; 3 4];;;; [5 6];;; [7 8]]
+1×2×2×2 Array{Int64, 4}:
+[:, :, 1, 1] =
+ 1  2
+
+[:, :, 2, 1] =
+ 3  4
+
+[:, :, 1, 2] =
+ 5  6
+
+[:, :, 2, 2] =
+ 7  8
+```
+
+Although they both mean concatenation in the second dimension, spaces (or tabs) and `;;`
+cannot appear in the same array expression unless the double semicolon is simply serving as
+a "line continuation" character. This allows a single horizontal concatenation to span
+multiple lines (without the line break being interpreted as a vertical concatenation).
+
+```jldoctest
+julia> [1 2 ;;
+       3 4]
+1×4 Matrix{Int64}:
+ 1  2  3  4
+```
+
+Terminating semicolons may also be used to add trailing length 1 dimensions.
+
+```jldoctest
+julia> [1;;]
+1×1 Matrix{Int64}:
+ 1
+
+julia> [2; 3;;;]
+2×1×1 Array{Int64, 3}:
+[:, :, 1] =
+ 2
+ 3
 ```
 
 More generally, concatenation can be accomplished through the [`cat`](@ref) function.
 These syntaxes are shorthands for function calls that themselves are convenience functions:
 
-| Syntax            | Function        | Description                                        |
-|:----------------- |:--------------- |:-------------------------------------------------- |
-|                   | [`cat`](@ref)   | concatenate input arrays along dimension(s) `k`    |
-| `[A; B; C; ...]`  | [`vcat`](@ref)  | shorthand for `cat(A...; dims=1)                   |
-| `[A B C ...]`     | [`hcat`](@ref)  | shorthand for `cat(A...; dims=2)                   |
-| `[A B; C D; ...]` | [`hvcat`](@ref) | simultaneous vertical and horizontal concatenation |
+| Syntax                 | Function         | Description                                                                                                |
+|:---------------------- |:---------------- |:---------------------------------------------------------------------------------------------------------- |
+|                        | [`cat`](@ref)    | concatenate input arrays along dimension(s) `k`                                                            |
+| `[A; B; C; ...]`       | [`vcat`](@ref)   | shorthand for `cat(A...; dims=1)                                                                           |
+| `[A B C ...]`          | [`hcat`](@ref)   | shorthand for `cat(A...; dims=2)                                                                           |
+| `[A B; C D; ...]`      | [`hvcat`](@ref)  | simultaneous vertical and horizontal concatenation                                                         |
+| `[A; C;; B; D;;; ...]` | [`hvncat`](@ref) | simultaneous n-dimensional concatenation, where number of semicolons indicate the dimension to concatenate |
 
 ### Typed array literals
 
@@ -470,7 +593,7 @@ overwritten with the value of `X`, [`convert`](@ref)ing to the
 [`eltype`](@ref) of `A` if necessary.
 
 
-If any index `I_k` selects more than one location, then the right hand side `X` must be an
+If any index `I_k` is itself an array, then the right hand side `X` must also be an
 array with the same shape as the result of indexing `A[I_1, I_2, ..., I_n]` or a vector with
 the same number of elements. The value in location `I_1[i_1], I_2[i_2], ..., I_n[i_n]` of
 `A` is overwritten with the value `X[I_1, I_2, ..., I_n]`, converting if necessary. The
@@ -573,6 +696,12 @@ julia> A[:, 3]
  13
  15
  17
+
+julia> A[:, 3:3]
+3×1 Matrix{Int64}:
+ 13
+ 15
+ 17
 ```
 
 ### Cartesian indices
@@ -735,7 +864,7 @@ julia> LinearIndices(A)[2, 2]
 5
 ```
 
-It's important to note that there's a very large assymmetry in the performance
+It's important to note that there's a very large asymmetry in the performance
 of these conversions. Converting a linear index to a set of cartesian indices
 requires dividing and taking the remainder, whereas going the other way is just
 multiplies and adds. In modern processors, integer division can be 10-50 times
@@ -746,7 +875,7 @@ full set of cartesian indices to do their lookup (see [`IndexStyle`](@ref) to
 introspect which is which). As such, when iterating over an entire array, it's
 much better to iterate over [`eachindex(A)`](@ref) instead of `1:length(A)`.
 Not only will the former be much faster in cases where `A` is `IndexCartesian`,
-but it will also support OffsetArrays, too.
+but it will also support [OffsetArrays](https://github.com/JuliaArrays/OffsetArrays.jl), too.
 
 #### Omitted and extra indices
 
diff --git a/doc/src/manual/asynchronous-programming.md b/doc/src/manual/asynchronous-programming.md
index 1791d4b0e40f70..4eee0fccf7da2a 100644
--- a/doc/src/manual/asynchronous-programming.md
+++ b/doc/src/manual/asynchronous-programming.md
@@ -186,7 +186,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
 
     # we can schedule `n` instances of `foo` to be active concurrently.
     for _ in 1:n
-        @async foo()
+        errormonitor(@async foo())
     end
     ```
   * Channels are created via the `Channel{T}(sz)` constructor. The channel will only hold objects
@@ -211,7 +211,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     julia> close(c);
 
     julia> put!(c, 2) # `put!` on a closed channel throws an exception.
-    ERROR: InvalidStateException("Channel is closed.",:closed)
+    ERROR: InvalidStateException: Channel is closed.
     Stacktrace:
     [...]
     ```
@@ -230,7 +230,7 @@ A channel can be visualized as a pipe, i.e., it has a write end and a read end :
     1
 
     julia> take!(c) # No more data available on a closed channel.
-    ERROR: InvalidStateException("Channel is closed.",:closed)
+    ERROR: InvalidStateException: Channel is closed.
     Stacktrace:
     [...]
     ```
@@ -263,10 +263,10 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> @async make_jobs(n); # feed the jobs channel with "n" jobs
+julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for i in 1:4 # start 4 tasks to process requests in parallel
-           @async do_work()
+           errormonitor(@async do_work())
        end
 
 julia> @elapsed while n > 0 # print out results
@@ -289,6 +289,10 @@ julia> @elapsed while n > 0 # print out results
 0.029772311
 ```
 
+Instead of `errormonitor(t)`, a more robust solution may be use use `bind(results, t)`, as that will
+not only log any unexpected failures, but also force the associated resources to close and propagate
+the exception everywhere.
+
 ## More task operations
 
 Task operations are built on a low-level primitive called [`yieldto`](@ref).
diff --git a/doc/src/manual/calling-c-and-fortran-code.md b/doc/src/manual/calling-c-and-fortran-code.md
index 2b0242cd5eabf2..5529018217c1a3 100644
--- a/doc/src/manual/calling-c-and-fortran-code.md
+++ b/doc/src/manual/calling-c-and-fortran-code.md
@@ -157,7 +157,7 @@ This is why we don't use the `Cstring` type here: as the array is uninitialized,
 NUL bytes. Converting to a `Cstring` as part of the [`ccall`](@ref) checks for contained NUL bytes
 and could therefore throw a conversion error.
 
-Deferencing `pointer(hostname)` with `unsafe_string` is an unsafe operation as it requires access to
+Dereferencing `pointer(hostname)` with `unsafe_string` is an unsafe operation as it requires access to
 the memory allocated for `hostname` that may have been in the meanwhile garbage collected. The macro
 [`GC.@preserve`](@ref) prevents this from happening and therefore accessing an invalid memory location.
 
@@ -186,6 +186,10 @@ Julia function. The arguments to [`@cfunction`](@ref) are:
     function on 32-bit Windows, but can be used on WIN64 (where `stdcall` is unified with the
     C calling convention).
 
+!!! note
+    Callback functions exposed via `@cfunction` should not throw errors, as that will
+    return control to the Julia runtime unexpectedly and may leave the program in an undefined state.
+
 A classic example is the standard C library `qsort` function, declared as:
 
 ```c
@@ -365,30 +369,31 @@ an `Int` in Julia).
 | `unsigned char`                                         | `CHARACTER`              | `Cuchar`             | `UInt8`                                                                                                        |
 | `bool` (_Bool in C99+)                                  |                          | `Cuchar`             | `UInt8`                                                                                                        |
 | `short`                                                 | `INTEGER*2`, `LOGICAL*2` | `Cshort`             | `Int16`                                                                                                        |
-| `unsigned short`                                        |                          | `Cushort`            | `UInt16`                                                                                                       |
+| `unsigned short`                                        |                          | `Cushort`            | `UInt16`                                                                                                       |
 | `int`, `BOOL` (C, typical)                              | `INTEGER*4`, `LOGICAL*4` | `Cint`               | `Int32`                                                                                                        |
-| `unsigned int`                                          |                          | `Cuint`              | `UInt32`                                                                                                       |
+| `unsigned int`                                          |                          | `Cuint`              | `UInt32`                                                                                                       |
 | `long long`                                             | `INTEGER*8`, `LOGICAL*8` | `Clonglong`          | `Int64`                                                                                                        |
-| `unsigned long long`                                    |                          | `Culonglong`         | `UInt64`                                                                                                       |
-| `intmax_t`                                              |                          | `Cintmax_t`          | `Int64`                                                                                                        |
-| `uintmax_t`                                             |                          | `Cuintmax_t`         | `UInt64`                                                                                                       |
+| `unsigned long long`                                    |                          | `Culonglong`         | `UInt64`                                                                                                       |
+| `intmax_t`                                              |                          | `Cintmax_t`          | `Int64`                                                                                                        |
+| `uintmax_t`                                             |                          | `Cuintmax_t`         | `UInt64`                                                                                                       |
 | `float`                                                 | `REAL*4i`                | `Cfloat`             | `Float32`                                                                                                      |
 | `double`                                                | `REAL*8`                 | `Cdouble`            | `Float64`                                                                                                      |
-| `complex float`                                         | `COMPLEX*8`              | `ComplexF32`          | `Complex{Float32}`                                                                                             |
+| `complex float`                                         | `COMPLEX*8`              | `ComplexF32`         | `Complex{Float32}`                                                                                             |
 | `complex double`                                        | `COMPLEX*16`             | `ComplexF64`         | `Complex{Float64}`                                                                                             |
-| `ptrdiff_t`                                             |                          | `Cptrdiff_t`         | `Int`                                                                                                          |
-| `ssize_t`                                               |                          | `Cssize_t`           | `Int`                                                                                                          |
-| `size_t`                                                |                          | `Csize_t`            | `UInt`                                                                                                         |
-| `void`                                                  |                          |                      | `Cvoid`                                                                                                         |
-| `void` and `[[noreturn]]` or `_Noreturn`                |                          |                      | `Union{}`                                                                                                      |
-| `void*`                                                 |                          |                      | `Ptr{Cvoid}`                                                                                                    |
-| `T*` (where T represents an appropriately defined type) |                          |                      | `Ref{T}`                                                                                                       |
-| `char*` (or `char[]`, e.g. a string)                    | `CHARACTER*N`            |                      | `Cstring` if NUL-terminated, or `Ptr{UInt8}` if not                                                            |
-| `char**` (or `*char[]`)                                 |                          |                      | `Ptr{Ptr{UInt8}}`                                                                                              |
-| `jl_value_t*` (any Julia Type)                          |                          |                      | `Any`                                                                                                          |
-| `jl_value_t**` (a reference to a Julia Type)            |                          |                      | `Ref{Any}`                                                                                                     |
-| `va_arg`                                                |                          |                      | Not supported                                                                                                  |
-| `...` (variadic function specification)                 |                          |                      | `T...` (where `T` is one of the above types, variadic functions of different argument types are not supported) |
+| `ptrdiff_t`                                             |                          | `Cptrdiff_t`         | `Int`                                                                                                          |
+| `ssize_t`                                               |                          | `Cssize_t`           | `Int`                                                                                                          |
+| `size_t`                                                |                          | `Csize_t`            | `UInt`                                                                                                         |
+| `void`                                                  |                          |                      | `Cvoid`                                                                                                        |
+| `void` and `[[noreturn]]` or `_Noreturn`                |                          |                      | `Union{}`                                                                                                      |
+| `void*`                                                 |                          |                      | `Ptr{Cvoid}` (or similarly `Ref{Cvoid}`)                                                                       |
+| `T*` (where T represents an appropriately defined type) |                          |                      | `Ref{T}` (T may be safely mutated only if T is an isbits type)                                                 |
+| `char*` (or `char[]`, e.g. a string)                    | `CHARACTER*N`            |                      | `Cstring` if NUL-terminated, or `Ptr{UInt8}` if not                                                            |
+| `char**` (or `*char[]`)                                 |                          |                      | `Ptr{Ptr{UInt8}}`                                                                                              |
+| `jl_value_t*` (any Julia Type)                          |                          |                      | `Any`                                                                                                          |
+| `jl_value_t* const*` (a reference to a Julia value)     |                          |                      | `Ref{Any}` (const, since mutation would require a write barrier, which is not possible to insert correctly)    |
+| `va_arg`                                                |                          |                      | Not supported                                                                                                  |
+| `...` (variadic function specification)                 |                          |                      | `T...` (where `T` is one of the above types, when using the `ccall` function)                                  |
+| `...` (variadic function specification)                 |                          |                      | `; va_arg1::T, va_arg2::S, etc.` (only supported with `@ccall` macro)                                          |
 
 The [`Cstring`](@ref) type is essentially a synonym for `Ptr{UInt8}`, except the conversion to `Cstring`
 throws an error if the Julia string contains any embedded NUL characters (which would cause the
@@ -651,21 +656,28 @@ For translating a C argument list to Julia:
 
       * `Any`
       * argument value must be a valid Julia object
-  * `jl_value_t**`
+  * `jl_value_t* const*`
 
       * `Ref{Any}`
-      * argument value must be a valid Julia object (or `C_NULL`)
+      * argument list must be a valid Julia object (or `C_NULL`)
+      * cannot be used for an output parameter, unless the user is able to
+        separately arrange for the object to be GC-preserved
   * `T*`
 
       * `Ref{T}`, where `T` is the Julia type corresponding to `T`
-      * argument value will be copied if it is an `isbits` type otherwise, the value must be a valid Julia
-        object
+      * argument value will be copied if it is an `inlinealloc` type (which
+        includes `isbits` otherwise, the value must be a valid Julia object
   * `T (*)(...)` (e.g. a pointer to a function)
 
-      * `Ptr{Cvoid}` (you may need to use [`@cfunction`](@ref) explicitly to create this pointer)
+      * `Ptr{Cvoid}` (you may need to use [`@cfunction`](@ref) explicitly to
+        create this pointer)
   * `...` (e.g. a vararg)
 
-      * `T...`, where `T` is the Julia type
+      * [for `ccall`]: `T...`, where `T` is the single Julia type of all
+        remaining arguments
+      * [for `@ccall`]: `; va_arg1::T, va_arg2::S, etc`, where `T` and `S` are
+        the Julia type (i.e. separate the regular arguments from varargs with
+        a `;`)
       * currently unsupported by `@cfunction`
   * `va_arg`
 
@@ -700,7 +712,6 @@ For translating a C return type to Julia:
   * `jl_value_t**`
 
       * `Ptr{Any}` (`Ref{Any}` is invalid as a return type)
-      * argument value must be a valid Julia object (or `C_NULL`)
   * `T*`
 
       * If the memory is already owned by Julia, or is an `isbits` type, and is known to be non-null:
@@ -714,7 +725,8 @@ For translating a C return type to Julia:
           * `Ptr{T}`, where `T` is the Julia type corresponding to `T`
   * `T (*)(...)` (e.g. a pointer to a function)
 
-      * `Ptr{Cvoid}` (you may need to use [`@cfunction`](@ref) explicitly to create this pointer)
+      * `Ptr{Cvoid}` to call this directly from Julia you will need to pass this as the first argument to [`ccall`](@ref).
+        See [Indirect Calls](@ref).
 
 ### Passing Pointers for Modifying Inputs
 
@@ -833,7 +845,7 @@ the Julia pointer to a Julia array data structure into a form understandable by
 
 ## Fortran Wrapper Example
 
-The following example utilizes ccall to call a function in a common Fortran library (libBLAS) to
+The following example utilizes `ccall` to call a function in a common Fortran library (libBLAS) to
 computes a dot product. Notice that the argument mapping is a bit different here than above, as
 we need to map from Julia to Fortran.  On every argument type, we specify `Ref` or `Ptr`. This
 mangling convention may be specific to your fortran compiler and operating system, and is likely
@@ -921,7 +933,7 @@ macro dlsym(func, lib)
         let zlocal = $z[]
             if zlocal == C_NULL
                 zlocal = dlsym($(esc(lib))::Ptr{Cvoid}, $(esc(func)))::Ptr{Cvoid}
-                $z[] = $zlocal
+                $z[] = zlocal
             end
             zlocal
         end
@@ -996,12 +1008,12 @@ hn = Vector{UInt8}(undef, 256)
 err = ccall(:gethostname, stdcall, Int32, (Ptr{UInt8}, UInt32), hn, length(hn))
 ```
 
-For more information, please see the [LLVM Language Reference](http://llvm.org/docs/LangRef.html#calling-conventions).
+For more information, please see the [LLVM Language Reference](https://llvm.org/docs/LangRef.html#calling-conventions).
 
 There is one additional special calling convention [`llvmcall`](@ref Base.llvmcall),
 which allows inserting calls to LLVM intrinsics directly.
 This can be especially useful when targeting unusual platforms such as GPGPUs.
-For example, for [CUDA](http://llvm.org/docs/NVPTXUsage.html), we need to be able to read the thread index:
+For example, for [CUDA](https://llvm.org/docs/NVPTXUsage.html), we need to be able to read the thread index:
 
 ```julia
 ccall("llvm.nvvm.read.ptx.sreg.tid.x", llvmcall, Int32, ())
diff --git a/doc/src/manual/code-loading.md b/doc/src/manual/code-loading.md
index 342f9e4c9a3364..d6f359f83d5cb4 100644
--- a/doc/src/manual/code-loading.md
+++ b/doc/src/manual/code-loading.md
@@ -1,4 +1,4 @@
-# Code Loading
+# [Code Loading](@id code-loading)
 
 !!! note
     This chapter covers the technical details of package loading. To install packages, use [`Pkg`](@ref Pkg), Julia's built-in package manager, to add packages to your active environment. To use packages already in your active environment, write `import X` or `using X`, as described in the [Modules documentation](@ref modules).
@@ -160,11 +160,11 @@ What happens if `import Zebra` is evaluated in the main `App` code base? Since `
 **The paths map** of a project environment is extracted from the manifest file. The path of a package `uuid` named `X` is determined by these rules (in order):
 
 1. If the project file in the directory matches `uuid` and name `X`, then either:
-  - It has a toplevel `path` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
-  - Otherwise, `uuid` is mapped to  `src/X.jl` relative to the directory containing the project file.
+   - It has a toplevel `path` entry, then `uuid` will be mapped to that path, interpreted relative to the directory containing the project file.
+   - Otherwise, `uuid` is mapped to  `src/X.jl` relative to the directory containing the project file.
 2. If the above is not the case and the project file has a corresponding manifest file and the manifest contains a stanza matching `uuid` then:
-  - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
-  - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
+   - If it has a `path` entry, use that path (relative to the directory containing the manifest file).
+   - If it has a `git-tree-sha1` entry, compute a deterministic hash function of `uuid` and `git-tree-sha1`—call it `slug`—and look for a directory named `packages/X/$slug` in each directory in the Julia `DEPOT_PATH` global array. Use the first such directory that exists.
 
 If any of these result in success, the path to the source code entry point will be either that result, the relative path from that result plus `src/X.jl`; otherwise, there is no path mapping for `uuid`. When loading `X`, if no source code path is found, the lookup will fail, and the user may be prompted to install the appropriate package version or to take other corrective action (e.g. declaring `X` as a dependency).
 
@@ -349,6 +349,23 @@ The subscripted `rootsᵢ`, `graphᵢ` and `pathsᵢ` variables correspond to th
 
 Since the primary environment is typically the environment of a project you're working on, while environments later in the stack contain additional tools, this is the right trade-off: it's better to break your development tools but keep the project working. When such incompatibilities occur, you'll typically want to upgrade your dev tools to versions that are compatible with the main project.
 
+### Package/Environment Preferences
+
+Preferences are dictionaries of metadata that influence package behavior within an environment.
+The preferences system supports reading preferences at compile-time, which means that at code-loading time, we must ensure that a particular `.ji` file was built with the same preferences as the current environment before loading it.
+The public API for modifying Preferences is contained within the [Preferences.jl](https://github.com/JuliaPackaging/Preferences.jl) package.
+Preferences are stored as TOML dictionaries within a `(Julia)LocalPreferences.toml` file next to the currently-active project.
+If a preference is "exported", it is instead stored within the `(Julia)Project.toml` instead.
+The intention is to allow shared projects to contain shared preferences, while allowing for users themselves to override those preferences with their own settings in the LocalPreferences.toml file, which should be .gitignored as the name implies.
+
+Preferences that are accessed during compilation are automatically marked as compile-time preferences, and any change recorded to these preferences will cause the Julia compiler to recompile any cached precompilation `.ji` files for that module.
+This is done by serializing the hash of all compile-time preferences during compilation, then checking that hash against the current environment when searching for the proper `.ji` file to load.
+
+Preferences can be set with depot-wide defaults; if package Foo is installed within your global environment and it has preferences set, these preferences will apply as long as your global environment is part of your `LOAD_PATH`.
+Preferences in environments higher up in the environment stack get overridden by the more proximal entries in the load path, ending with the currently active project.
+This allows depot-wide preference defaults to exist, with active projects able to merge or even completely overwrite these inherited preferences.
+See the docstring for `Preferences.set_preferences!()` for the full details of how to set preferences to allow or disallow merging.
+
 ## Conclusion
 
 Federated package management and precise software reproducibility are difficult but worthy goals in a package system. In combination, these goals lead to a more complex package loading mechanism than most dynamic languages have, but it also yields scalability and reproducibility that is more commonly associated with static languages. Typically, Julia users should be able to use the built-in package manager to manage their projects without needing a precise understanding of these interactions. A call to `Pkg.add("X")` will add to the appropriate project and manifest files, selected via `Pkg.activate("Y")`, so that a future call to `import X` will load `X` without further thought.
diff --git a/doc/src/manual/command-line-options.md b/doc/src/manual/command-line-options.md
new file mode 100644
index 00000000000000..3839e503ab4bb4
--- /dev/null
+++ b/doc/src/manual/command-line-options.md
@@ -0,0 +1,130 @@
+# [Command-line Options](@id command-line-options)
+
+## Using arguments inside scripts
+
+When running a script using `julia`, you can pass additional arguments to your script:
+
+```
+$ julia script.jl arg1 arg2...
+```
+
+These additional command-line arguments are passed in the global constant `ARGS`. The
+name of the script itself is passed in as the global `PROGRAM_FILE`. Note that `ARGS` is
+also set when a Julia expression is given using the `-e` option on the command line (see the
+`julia` help output below) but `PROGRAM_FILE` will be empty. For example, to just print the
+arguments given to a script, you could do this:
+
+```
+$ julia -e 'println(PROGRAM_FILE); for x in ARGS; println(x); end' foo bar
+
+foo
+bar
+```
+
+Or you could put that code into a script and run it:
+
+```
+$ echo 'println(PROGRAM_FILE); for x in ARGS; println(x); end' > script.jl
+$ julia script.jl foo bar
+script.jl
+foo
+bar
+```
+
+The `--` delimiter can be used to separate command-line arguments intended for the script file from arguments intended for Julia:
+
+```
+$ julia --color=yes -O -- script.jl arg1 arg2..
+```
+
+See also [Scripting](@ref man-scripting) for more information on writing Julia scripts.
+
+Julia can be started in parallel mode with either the `-p` or the `--machine-file` options. `-p n`
+will launch an additional `n` worker processes, while `--machine-file file` will launch a worker
+for each line in file `file`. The machines defined in `file` must be accessible via a password-less
+`ssh` login, with Julia installed at the same location as the current host. Each machine definition
+takes the form `[count*][user@]host[:port] [bind_addr[:port]]`. `user` defaults to current user,
+`port` to the standard ssh port. `count` is the number of workers to spawn on the node, and defaults
+to 1. The optional `bind-to bind_addr[:port]` specifies the IP address and port that other workers
+should use to connect to this worker.
+
+If you have code that you want executed whenever Julia is run, you can put it in
+`~/.julia/config/startup.jl`:
+
+```
+$ echo 'println("Greetings! 你好! 안녕하세요?")' > ~/.julia/config/startup.jl
+$ julia
+Greetings! 你好! 안녕하세요?
+
+...
+```
+
+Note that although you should have a `~/.julia` directory once you've run Julia for the
+first time, you may need to create the `~/.julia/config` folder and the
+`~/.julia/config/startup.jl` file if you use it.
+
+## Command-line switches for Julia
+
+There are various ways to run Julia code and provide options, similar to those available for the
+`perl` and `ruby` programs:
+
+```
+julia [switches] -- [programfile] [args...]
+```
+
+The following is a complete list of command-line switches available when launching julia (a '*' marks the default value, if applicable):
+
+|Switch                                 |Description|
+|:---                                   |:---|
+|`-v`, `--version`                      |Display version information|
+|`-h`, `--help`                         |Print command-line options (this message).|
+|`--help-hidden`                        |Uncommon options not shown by `-h`|
+|`--project[={<dir>\|@.}]`              |Set `<dir>` as the home project/environment. The default `@.` option will search through parent directories until a `Project.toml` or `JuliaProject.toml` file is found.|
+|`-J`, `--sysimage <file>`              |Start up with the given system image file|
+|`-H`, `--home <dir>`                   |Set location of `julia` executable|
+|`--startup-file={yes*\|no}`            |Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH` environment variable is unset, load `~/.julia/config/startup.jl`|
+|`--handle-signals={yes*\|no}`          |Enable or disable Julia's default signal handlers|
+|`--sysimage-native-code={yes*\|no}`    |Use native code from system image if available|
+|`--compiled-modules={yes*\|no}`        |Enable or disable incremental precompilation of modules|
+|`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
+|`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
+|`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
+|`-t`, `--threads {N\|auto`}            |Enable N threads; `auto` tries to infer a useful default number of threads to use but the exact behavior might change in the future.  Currently, `auto` uses the number of CPUs assigned to this julia process based on the OS-specific affinity assignment interface, if supported (Linux and Windows). If this is not supported (macOS) or process affinity is not configured, it uses the number of CPU threads.|
+|`-p`, `--procs {N\|auto`}              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
+|`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
+|`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
+|`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
+|`--banner={yes\|no\|auto*}`            |Enable or disable startup banner|
+|`--color={yes\|no\|auto*}`             |Enable or disable color text|
+|`--history-file={yes*\|no}`            |Load or save history|
+|`--depwarn={yes\|no*\|error}`          |Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)|
+|`--warn-overwrite={yes\|no*}`          |Enable or disable method overwrite warnings|
+|`--warn-scope={yes*\|no}`              |Enable or disable warning for ambiguous top-level scope|
+|`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
+|`-O`, `--optimize={0,1,2*,3}`          |Set the optimization level (level is 3 if `-O` is used without a level)|
+|`--min-optlevel={0*,1,2,3}`            |Set the lower bound on per-module optimization|
+|`-g {0,1*,2}`                          |Set the level of debug info generation (level is 2 if `-g` is used without a level)|
+|`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
+|`--check-bounds={yes\|no\|auto*}`      |Emit bounds checks always, never, or respect `@inbounds` declarations|
+|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides `@fastmath` declaration)|
+|`--code-coverage[={none*\|user\|all}]` |Count executions of source lines (omitting setting is equivalent to `user`)|
+|`--code-coverage=tracefile.info`       |Append coverage information to the LCOV tracefile (filename supports format tokens).|
+|`--track-allocation[={none*\|user\|all}]` |Count bytes allocated by each source line (omitting setting is equivalent to "user")|
+|`--bug-report=KIND`                    |Launch a bug report session. It can be used to start a REPL, run a script, or evaluate expressions. It first tries to use BugReporting.jl installed in current environment and falls back to the latest compatible BugReporting.jl if not. For more information, see `--bug-report=help`.|
+|`--compile={yes*\|no\|all\|min}`       |Enable or disable JIT compiler, or request exhaustive or minimal compilation|
+|`--output-o <name>`                    |Generate an object file (including system image data)|
+|`--output-ji <name>`                   |Generate a system image data file (.ji)|
+|`--strip-metadata`                     |Remove docstrings and source location info from system image|
+|`--strip-ir`                           |Remove IR (intermediate representation) of compiled functions|
+|`--output-unopt-bc <name>`             |Generate unoptimized LLVM bitcode (.bc)|
+|`--output-bc <name>`                   |Generate LLVM bitcode (.bc)|
+|`--output-asm <name>`                  |Generate an assembly file (.s)|
+|`--output-incremental={yes\|no*}`      |Generate an incremental output file (rather than complete)|
+|`--trace-compile={stderr,name}`        |Print precompile statements for methods compiled during execution or save to a path|
+|`--image-codegen`                      |Force generate code in imaging mode|
+
+
+!!! compat "Julia 1.1"
+    In Julia 1.0, the default `--project=@.` option did not search up from the root
+    directory of a Git repository for the `Project.toml` file. From Julia 1.1 forward, it
+    does.
diff --git a/doc/src/manual/complex-and-rational-numbers.md b/doc/src/manual/complex-and-rational-numbers.md
index faf8ffcf8c198d..ac48e5b420f5e4 100644
--- a/doc/src/manual/complex-and-rational-numbers.md
+++ b/doc/src/manual/complex-and-rational-numbers.md
@@ -8,7 +8,7 @@ behave as expected.
 ## Complex Numbers
 
 The global constant [`im`](@ref) is bound to the complex number *i*, representing the principal
-square root of -1. (Using mathematicians' `i` or engineers' `j` for this global constant were rejected since they are such popular index variable names.) Since Julia allows numeric literals to be [juxtaposed with identifiers as coefficients](@ref man-numeric-literal-coefficients),
+square root of -1. (Using mathematicians' `i` or engineers' `j` for this global constant was rejected since they are such popular index variable names.) Since Julia allows numeric literals to be [juxtaposed with identifiers as coefficients](@ref man-numeric-literal-coefficients),
 this binding suffices to provide convenient syntax for complex numbers, similar to the traditional
 mathematical notation:
 
@@ -124,7 +124,7 @@ julia> sqrt(1 + 2im)
 1.272019649514069 + 0.7861513777574233im
 
 julia> cos(1 + 2im)
-2.0327230070196656 - 3.0518977991518im
+2.0327230070196656 - 3.0518977991517997im
 
 julia> exp(1 + 2im)
 -1.1312043837568135 + 2.4717266720048188im
diff --git a/doc/src/manual/constructors.md b/doc/src/manual/constructors.md
index 555f2066018c66..39d6d6bcaf0f51 100644
--- a/doc/src/manual/constructors.md
+++ b/doc/src/manual/constructors.md
@@ -420,7 +420,9 @@ julia> struct OurRational{T<:Integer} <: Real
                if num == 0 && den == 0
                     error("invalid rational: 0//0")
                end
-               g = gcd(den, num)
+               num = flipsign(num, den)
+               den = flipsign(den, den)
+               g = gcd(num, den)
                num = div(num, g)
                den = div(den, g)
                new(num, den)
@@ -466,10 +468,9 @@ and `den::T` indicate that the data held in a `OurRational{T}` object are a pair
 
 Now things get interesting. `OurRational` has a single inner constructor method which checks that
 `num` and `den` aren't both zero and ensures that every rational is constructed in "lowest
-terms" with a non-negative denominator. This is accomplished by dividing the given numerator and
-denominator values by their greatest common divisor, computed using the `gcd` function. Since
-`gcd` returns the greatest common divisor of its arguments with sign matching the first argument
-(`den` here), after this division the new value of `den` is guaranteed to be non-negative. Because
+terms" with a non-negative denominator. This is accomplished by first flipping the signs of numerator
+and denominator if the denominator is negative. Then, both are divided by their greatest common
+divisor (`gcd` always returns a non-negative number, regardless of the sign of its arguments). Because
 this is the only inner constructor for `OurRational`, we can be certain that `OurRational` objects are
 always constructed in this normalized form.
 
@@ -490,7 +491,7 @@ The first and most basic definition just makes `a ⊘ b` construct a `OurRationa
 is already a rational number, we construct a new rational for the resulting ratio slightly differently;
 this behavior is actually identical to division of a rational with an integer.
 Finally, applying
-`⊘` to complex integral values creates an instance of `Complex{OurRational}` -- a complex
+`⊘` to complex integral values creates an instance of `Complex{<:OurRational}` -- a complex
 number whose real and imaginary parts are rationals:
 
 ```jldoctest rational
@@ -499,12 +500,12 @@ julia> z = (1 + 2im) ⊘ (1 - 2im);
 julia> typeof(z)
 Complex{OurRational{Int64}}
 
-julia> typeof(z) <: Complex{OurRational}
-false
+julia> typeof(z) <: Complex{<:OurRational}
+true
 ```
 
 Thus, although the `⊘` operator usually returns an instance of `OurRational`, if either
-of its arguments are complex integers, it will return an instance of `Complex{OurRational}` instead.
+of its arguments are complex integers, it will return an instance of `Complex{<:OurRational}` instead.
 The interested reader should consider perusing the rest of [`rational.jl`](https://github.com/JuliaLang/julia/blob/master/base/rational.jl):
 it is short, self-contained, and implements an entire basic Julia type.
 
diff --git a/doc/src/manual/control-flow.md b/doc/src/manual/control-flow.md
index b9a5a9f60e2988..63832cc4c90c96 100644
--- a/doc/src/manual/control-flow.md
+++ b/doc/src/manual/control-flow.md
@@ -256,8 +256,9 @@ short-circuit behavior, but beware that `&` and `|` have higher precedence than
 Short-circuit evaluation is quite similar to conditional evaluation. The behavior is found in
 most imperative programming languages having the `&&` and `||` boolean operators: in a series
 of boolean expressions connected by these operators, only the minimum number of expressions are
-evaluated as are necessary to determine the final boolean value of the entire chain. Explicitly,
-this means that:
+evaluated as are necessary to determine the final boolean value of the entire chain. Some
+languages (like Python) refer to them as `and` (`&&`) and `or` (`||`). Explicitly, this means
+that:
 
   * In the expression `a && b`, the subexpression `b` is only evaluated if `a` evaluates to `true`.
   * In the expression `a || b`, the subexpression `b` is only evaluated if `a` evaluates to `false`.
@@ -553,6 +554,21 @@ julia> for i = 1:2, j = 3:4
 If this example were rewritten to use a `for` keyword for each variable, then the output would
 be different: the second and fourth values would contain `0`.
 
+Multiple containers can be iterated over at the same time in a single `for` loop using [`zip`](@ref):
+
+```jldoctest
+julia> for (j, k) in zip([1 2 3], [4 5 6 7])
+           println((j,k))
+       end
+(1, 4)
+(2, 5)
+(3, 6)
+```
+
+Using [`zip`](@ref) will create an iterator that is a tuple containing the subiterators for the containers passed to it.
+The `zip` iterator will iterate over all subiterators in order, choosing the ``i``th element of each subiterator in the
+``i``th iteration of the `for` loop. Once any of the subiterators run out, the `for` loop will stop.
+
 ## Exception Handling
 
 When an unexpected condition occurs, a function may be unable to return a reasonable value to
@@ -801,7 +817,7 @@ The power of the `try/catch` construct lies in the ability to unwind a deeply ne
 immediately to a much higher level in the stack of calling functions. There are situations where
 no error has occurred, but the ability to unwind the stack and pass a value to a higher level
 is desirable. Julia provides the [`rethrow`](@ref), [`backtrace`](@ref), [`catch_backtrace`](@ref)
-and [`Base.catch_stack`](@ref) functions for more advanced error handling.
+and [`current_exceptions`](@ref) functions for more advanced error handling.
 
 ### `finally` Clauses
 
diff --git a/doc/src/manual/conversion-and-promotion.md b/doc/src/manual/conversion-and-promotion.md
index c33a765d215d97..63ae37660cff41 100644
--- a/doc/src/manual/conversion-and-promotion.md
+++ b/doc/src/manual/conversion-and-promotion.md
@@ -319,9 +319,15 @@ julia> promote_type(Int8, Int64)
 Int64
 ```
 
+Note that we do **not** overload `promote_type` directly: we overload `promote_rule` instead.
+`promote_type` uses `promote_rule`, and adds the symmetry.
+Overloading it directly can cause ambiguity errors.
+We overload `promote_rule` to define how things should be promoted, and we use `promote_type`
+to query that.
+
 Internally, `promote_type` is used inside of `promote` to determine what type argument values
-should be converted to for promotion. It can, however, be useful in its own right. The curious
-reader can read the code in [`promotion.jl`](https://github.com/JuliaLang/julia/blob/master/base/promotion.jl),
+should be converted to for promotion. The curious reader can read the code in
+[`promotion.jl`](https://github.com/JuliaLang/julia/blob/master/base/promotion.jl),
 which defines the complete promotion mechanism in about 35 lines.
 
 ### Case Study: Rational Promotions
diff --git a/doc/src/manual/distributed-computing.md b/doc/src/manual/distributed-computing.md
index e5b6e78cae9815..73c7bd8b1ee008 100644
--- a/doc/src/manual/distributed-computing.md
+++ b/doc/src/manual/distributed-computing.md
@@ -1,6 +1,6 @@
 # Multi-processing and Distributed Computing
 
-An implementation of distributed memory parallel computing is provided by module `Distributed`
+An implementation of distributed memory parallel computing is provided by module [`Distributed`](@ref man-distributed)
 as part of the standard library shipped with Julia.
 
 Most modern computers possess more than one CPU, and several computers can be combined together
@@ -45,11 +45,11 @@ computation is running on the worker.
 
 Let's try this out. Starting with `julia -p n` provides `n` worker processes on the local machine.
 Generally it makes sense for `n` to equal the number of CPU threads (logical cores) on the machine. Note that the `-p`
-argument implicitly loads module `Distributed`.
+argument implicitly loads module [`Distributed`](@ref man-distributed).
 
 
 ```julia
-$ ./julia -p 2
+$ julia -p 2
 
 julia> r = remotecall(rand, 2, 2, 2)
 Future(2, 1, 4, nothing)
@@ -80,10 +80,18 @@ you read from a remote object to obtain data needed by the next local operation.
 but is more efficient.
 
 ```julia-repl
-julia> remotecall_fetch(getindex, 2, r, 1, 1)
+julia> remotecall_fetch(r-> fetch(r)[1, 1], 2, r)
 0.18526337335308085
 ```
 
+This fetches the array on worker 2 and returns the first value. Note, that `fetch` doesn't move any data in
+this case, since it's executed on the worker that owns the array. One can also write:
+
+```julia-repl
+julia> remotecall_fetch(getindex, 2, r, 1, 1)
+0.10824216411304866
+```
+
 Remember that [`getindex(r,1,1)`](@ref) is [equivalent](@ref man-array-indexing) to `r[1,1]`, so this call fetches
 the first element of the future `r`.
 
@@ -190,7 +198,7 @@ loaded
 ```
 
 As usual, this does not bring `DummyModule` into scope on any of the process, which requires
-`using` or `import`.  Moreover, when `DummyModule` is brought into scope on one process, it
+[`using`](@ref) or [`import`](@ref).  Moreover, when `DummyModule` is brought into scope on one process, it
 is not on any other:
 
 ```julia-repl
@@ -228,7 +236,7 @@ like a process providing an interactive prompt.
 
 Finally, if `DummyModule.jl` is not a standalone file but a package, then `using
 DummyModule` will _load_ `DummyModule.jl` on all processes, but only bring it into scope on
-the process where `using` was called.
+the process where [`using`](@ref) was called.
 
 ## Starting and managing worker processes
 
@@ -254,7 +262,7 @@ julia> addprocs(2)
  3
 ```
 
-Module `Distributed` must be explicitly loaded on the master process before invoking [`addprocs`](@ref).
+Module [`Distributed`](@ref man-distributed) must be explicitly loaded on the master process before invoking [`addprocs`](@ref).
 It is automatically made available on the worker processes.
 
 Note that workers do not run a `~/.julia/config/startup.jl` startup script, nor do they synchronize
@@ -314,8 +322,8 @@ is replaced with a more expensive operation. Then it might make sense to add ano
 statement just for this step.
 
 ## Global variables
-Expressions executed remotely via `@spawnat`, or closures specified for remote execution using
-`remotecall` may refer to global variables. Global bindings under module `Main` are treated
+Expressions executed remotely via [`@spawnat`](@ref), or closures specified for remote execution using
+[`remotecall`](@ref) may refer to global variables. Global bindings under module `Main` are treated
 a little differently compared to global bindings in other modules. Consider the following code
 snippet:
 
@@ -327,7 +335,7 @@ remotecall_fetch(()->sum(A), 2)
 In this case [`sum`](@ref) MUST be defined in the remote process.
 Note that `A` is a global variable defined in the local workspace. Worker 2 does not have a variable called
 `A` under `Main`. The act of shipping the closure `()->sum(A)` to worker 2 results in `Main.A` being defined
-on 2. `Main.A` continues to exist on worker 2 even after the call `remotecall_fetch` returns. Remote calls
+on 2. `Main.A` continues to exist on worker 2 even after the call [`remotecall_fetch`](@ref) returns. Remote calls
 with embedded global references (under `Main` module only) manage globals as follows:
 
 - New global bindings are created on destination workers if they are referenced as part of a remote call.
@@ -580,7 +588,7 @@ julia> function make_jobs(n)
 
 julia> n = 12;
 
-julia> @async make_jobs(n); # feed the jobs channel with "n" jobs
+julia> errormonitor(@async make_jobs(n)); # feed the jobs channel with "n" jobs
 
 julia> for p in workers() # start tasks on the workers to process requests in parallel
            remote_do(do_work, p, jobs, results)
@@ -648,7 +656,7 @@ Once finalized, a reference becomes invalid and cannot be used in any further ca
 ## Local invocations
 
 Data is necessarily copied over to the remote node for execution. This is the case for both
-remotecalls and when data is stored to a[`RemoteChannel`](@ref) / [`Future`](@ref Distributed.Future) on
+remotecalls and when data is stored to a [`RemoteChannel`](@ref) / [`Future`](@ref Distributed.Future) on
 a different node. As expected, this results in a copy of the serialized objects
 on the remote node. However, when the destination node is the local node, i.e.
 the calling process id is the same as the remote node id, it is executed
@@ -697,11 +705,11 @@ Num Unique objects : 3
 ```
 
 As can be seen, [`put!`](@ref) on a locally owned [`RemoteChannel`](@ref) with the same
-object `v` modifed between calls results in the same single object instance stored. As
+object `v` modified between calls results in the same single object instance stored. As
 opposed to copies of `v` being created when the node owning `rc` is a different node.
 
 It is to be noted that this is generally not an issue. It is something to be factored in only
-if the object is both being stored locally and modifed post the call. In such cases it may be
+if the object is both being stored locally and modified post the call. In such cases it may be
 appropriate to store a `deepcopy` of the object.
 
 This is also true for remotecalls on the local node as seen in the following example:
@@ -1197,12 +1205,12 @@ requirements for the inbuilt `LocalManager` and `SSHManager`:
     Securing and encrypting all worker-worker traffic (via SSH) or encrypting individual messages
     can be done via a custom `ClusterManager`.
 
-  * If you specify `multiplex=true` as an option to `addprocs`, SSH multiplexing is used to create
+  * If you specify `multiplex=true` as an option to [`addprocs`](@ref), SSH multiplexing is used to create
     a tunnel between the master and workers. If you have configured SSH multiplexing on your own and
     the connection has already been established, SSH multiplexing is used regardless of `multiplex`
     option. If multiplexing is enabled, forwarding is set by using the existing connection
     (`-O forward` option in ssh). This is beneficial if your servers require password authentication;
-    you can avoid authentication in Julia by logging in to the server ahead of `addprocs`. The control
+    you can avoid authentication in Julia by logging in to the server ahead of [`addprocs`](@ref). The control
     socket will be located at `~/.ssh/julia-%r@%h:%p` during the session unless the existing multiplexing
     connection is used. Note that bandwidth may be limited if you create multiple processes on a node
     and enable multiplexing, because in that case processes share a single multiplexing TCP connection.
@@ -1228,7 +1236,7 @@ For example, cookies can be pre-shared and hence not specified as a startup argu
 
 ## Specifying Network Topology (Experimental)
 
-The keyword argument `topology` passed to `addprocs` is used to specify how the workers must be
+The keyword argument `topology` passed to [`addprocs`](@ref) is used to specify how the workers must be
 connected to each other:
 
   * `:all_to_all`, the default: all workers are connected to each other.
@@ -1250,20 +1258,21 @@ in future releases.
 ## Noteworthy external packages
 
 Outside of Julia parallelism there are plenty of external packages that should be mentioned.
-For example [MPI.jl](https://github.com/JuliaParallel/MPI.jl) is a Julia wrapper for the `MPI` protocol, or
-[DistributedArrays.jl](https://github.com/JuliaParallel/Distributedarrays.jl), as presented in [Shared Arrays](@ref).
+For example [MPI.jl](https://github.com/JuliaParallel/MPI.jl) is a Julia wrapper for the `MPI` protocol, [Dagger.jl](https://github.com/JuliaParallel/Dagger.jl) provides functionality similar to Python's [Dask](https://dask.org/), and
+[DistributedArrays.jl](https://github.com/JuliaParallel/Distributedarrays.jl) provides array operations distributed across workers, as presented in [Shared Arrays](@ref).
+
 A mention must be made of Julia's GPU programming ecosystem, which includes:
 
-1. Low-level (C kernel) based operations [OpenCL.jl](https://github.com/JuliaGPU/OpenCL.jl) and [CUDAdrv.jl](https://github.com/JuliaGPU/CUDAdrv.jl) which are respectively an OpenCL interface and a CUDA wrapper.
+1. [CUDA.jl](https://github.com/JuliaGPU/CUDA.jl) wraps the various CUDA libraries and supports compiling Julia kernels for Nvidia GPUs.
 
-2. Low-level (Julia Kernel) interfaces like [CUDAnative.jl](https://github.com/JuliaGPU/CUDAnative.jl) which is a Julia native CUDA implementation.
+2. [oneAPI.jl](https://github.com/JuliaGPU/oneAPI.jl) wraps the oneAPI unified programming model, and supports executing Julia kernels on supported accelerators. Currently only Linux is supported.
 
-3. High-level vendor-specific abstractions like [CuArrays.jl](https://github.com/JuliaGPU/CuArrays.jl) and [CLArrays.jl](https://github.com/JuliaGPU/CLArrays.jl)
+3. [AMDGPU.jl](https://github.com/JuliaGPU/AMDGPU.jl) wraps the AMD ROCm libraries and supports compiling Julia kernels for AMD GPUs. Currently only Linux is supported.
 
-4. High-level libraries like [ArrayFire.jl](https://github.com/JuliaComputing/ArrayFire.jl) and [GPUArrays.jl](https://github.com/JuliaGPU/GPUArrays.jl)
+4. High-level libraries like [KernelAbstractions.jl](https://github.com/JuliaGPU/KernelAbstractions.jl), [Tullio.jl](https://github.com/mcabbott/Tullio.jl) and [ArrayFire.jl](https://github.com/JuliaComputing/ArrayFire.jl).
 
 
-In the following example we will use both `DistributedArrays.jl` and `CuArrays.jl` to distribute an array across multiple
+In the following example we will use both `DistributedArrays.jl` and `CUDA.jl` to distribute an array across multiple
 processes by first casting it through `distribute()` and `CuArray()`.
 
 Remember when importing `DistributedArrays.jl` to import it across all processes using [`@everywhere`](@ref)
@@ -1276,7 +1285,7 @@ julia> addprocs()
 
 julia> @everywhere using DistributedArrays
 
-julia> using CuArrays
+julia> using CUDA
 
 julia> B = ones(10_000) ./ 2;
 
@@ -1314,9 +1323,8 @@ true
 julia> typeof(cuC)
 CuArray{Float64,1}
 ```
-Keep in mind that some Julia features are not currently supported by CUDAnative.jl[^2] , especially some functions like `sin` will need to be replaced with `CUDAnative.sin`(cc: @maleadt).
 
-In the following example we will use both `DistributedArrays.jl` and `CuArrays.jl` to distribute an array across multiple
+In the following example we will use both `DistributedArrays.jl` and `CUDA.jl` to distribute an array across multiple
 processes and call a generic function on it.
 
 ```julia
@@ -1399,6 +1407,3 @@ mpirun -np 4 ./julia example.jl
     introduced a new set of communication mechanisms, collectively referred to as Remote Memory Access
     (RMA). The motivation for adding rma to the MPI standard was to facilitate one-sided communication
     patterns. For additional information on the latest MPI standard, see <https://mpi-forum.org/docs>.
-
-[^2]:
-    [Julia GPU man pages](http://juliagpu.github.io/CUDAnative.jl/stable/man/usage.html#Julia-support-1)
diff --git a/doc/src/manual/documentation.md b/doc/src/manual/documentation.md
index d9f23bf8e264dd..99d46e364b3eb9 100644
--- a/doc/src/manual/documentation.md
+++ b/doc/src/manual/documentation.md
@@ -1,9 +1,30 @@
 # [Documentation](@id man-documentation)
 
+## Accessing Documentation
+
+Documentation can be accessed at the REPL or in [IJulia](https://github.com/JuliaLang/IJulia.jl)
+by typing `?` followed by the name of a function or macro, and pressing `Enter`. For example,
+
+```julia
+?cos
+?@time
+?r""
+```
+
+will show documentation for the relevant function, macro or string macro respectively. Most Julia
+environments provide a way to access documentation directly:
+- [VS Code](https://www.julia-vscode.org/) shows documentation when you hover over a function name.
+  You can also use the Julia panel in the sidebar to search for documentation.
+- In [Pluto](https://github.com/fonsp/Pluto.jl), open the "Live Docs" panel on the bottom right.
+- In [Juno](https://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
+under the cursor.
+
+## Writing Documentation
+
 Julia enables package developers and users to document functions, types and other objects easily
 via a built-in documentation system.
 
-The basic syntax is simple: any string appearing at the toplevel right before an object
+The basic syntax is simple: any string appearing just before an object
 (function, macro, type or instance) will be interpreted as documenting it (these are called
 *docstrings*). Note that no blank lines or comments may intervene between a docstring and
 the documented object. Here is a basic example:
@@ -91,10 +112,10 @@ As in the example above, we recommend following some simple conventions when wri
 5. Provide hints to related functions.
 
    Sometimes there are functions of related functionality. To increase discoverability please provide
-   a short list of these in a `See also:` paragraph.
+   a short list of these in a `See also` paragraph.
 
    ```
-   See also: [`bar!`](@ref), [`baz`](@ref), [`baaz`](@ref)
+   See also [`bar!`](@ref), [`baz`](@ref), [`baaz`](@ref).
    ```
 6. Include any code examples in an `# Examples` section.
 
@@ -128,8 +149,7 @@ As in the example above, we recommend following some simple conventions when wri
        Calling `rand` and other RNG-related functions should be avoided in doctests since they will not
        produce consistent outputs during different Julia sessions. If you would like to show some random
        number generation related functionality, one option is to explicitly construct and seed your own
-       [`MersenneTwister`](@ref) (or other pseudorandom number generator) and pass it to the functions you are
-       doctesting.
+       RNG object (see [`Random`](@ref Random-Numbers)) and pass it to the functions you are doctesting.
 
        Operating system word size ([`Int32`](@ref) or [`Int64`](@ref)) as well as path separator differences
        (`/` or `\`) will also affect the reproducibility of some doctests.
@@ -203,21 +223,6 @@ As in the example above, we recommend following some simple conventions when wri
    material above the header; you can access the full help by adding a '?'
    at the beginning of the expression (i.e., "??foo" rather than "?foo").
 
-## Accessing Documentation
-
-Documentation can be accessed at the REPL or in [IJulia](https://github.com/JuliaLang/IJulia.jl)
-by typing `?` followed by the name of a function or macro, and pressing `Enter`. For example,
-
-```julia
-?cos
-?@time
-?r""
-```
-
-will show documentation for the relevant function, macro or string macro respectively. In
-[Juno](http://junolab.org) using `Ctrl-J, Ctrl-D` will show the documentation for the object
-under the cursor.
-
 ## Functions & Methods
 
 Functions in Julia may have multiple implementations, known as methods. While it's good practice
@@ -343,17 +348,17 @@ for your custom type that returns the documentation on a per-instance basis. For
 
 ```julia
 struct MyType
-    value::String
+    value::Int
 end
 
 Docs.getdoc(t::MyType) = "Documentation for MyType with value $(t.value)"
 
-x = MyType("x")
-y = MyType("y")
+x = MyType(1)
+y = MyType(2)
 ```
 
-`?x` will display "Documentation for MyType with value x" while `?y` will display
-"Documentation for MyType with value y".
+`?x` will display "Documentation for MyType with value 1" while `?y` will display
+"Documentation for MyType with value 2".
 
 ## Syntax Guide
 
@@ -496,7 +501,7 @@ end
 
 Documenting a `baremodule` by placing a docstring above the expression automatically imports
 `@doc` into the module. These imports must be done manually when the module expression is not
-documented. Empty `baremodule`s cannot be documented.
+documented.
 
 ### Global Variables
 
diff --git a/doc/src/manual/embedding.md b/doc/src/manual/embedding.md
index 1eb42a693f321d..58490a039b5edc 100644
--- a/doc/src/manual/embedding.md
+++ b/doc/src/manual/embedding.md
@@ -16,7 +16,7 @@ We start with a simple C program that initializes Julia and calls some Julia cod
 
 ```c
 #include <julia.h>
-JULIA_DEFINE_FAST_TLS() // only define this once, in an executable (not in a shared library) if you want fast code.
+JULIA_DEFINE_FAST_TLS // only define this once, in an executable (not in a shared library) if you want fast code.
 
 int main(int argc, char *argv[])
 {
@@ -45,7 +45,7 @@ gcc -o test -fPIC -I$JULIA_DIR/include/julia -L$JULIA_DIR/lib -Wl,-rpath,$JULIA_
 ```
 
 Alternatively, look at the `embedding.c` program in the Julia source tree in the `test/embedding/` folder.
-The file `ui/repl.c` program is another simple example of how to set `jl_options` options while
+The file `cli/loader_exe.c` program is another simple example of how to set `jl_options` options while
 linking against `libjulia`.
 
 The first thing that has to be done before calling any other Julia C function is to initialize
@@ -243,16 +243,18 @@ arguments.
 
 ## Memory Management
 
-As we have seen, Julia objects are represented in C as pointers. This raises the question of who
+As we have seen, Julia objects are represented in C as pointers of type `jl_value_t*`. This raises the question of who
 is responsible for freeing these objects.
 
-Typically, Julia objects are freed by a garbage collector (GC), but the GC does not automatically
+Typically, Julia objects are freed by the garbage collector (GC), but the GC does not automatically
 know that we are holding a reference to a Julia value from C. This means the GC can free objects
 out from under you, rendering pointers invalid.
 
-The GC can only run when Julia objects are allocated. Calls like `jl_box_float64` perform allocation,
-and allocation might also happen at any point in running Julia code. However, it is generally
-safe to use pointers in between `jl_...` calls. But in order to make sure that values can survive
+The GC will only run when new Julia objects are being allocated. Calls like `jl_box_float64` perform allocation,
+but allocation might also happen at any point in running Julia code.
+
+When writing code that embeds Julia, it is generally safe to use `jl_value_t*` values in between `jl_...` calls
+(as GC will only get triggered by those calls). But in order to make sure that values can survive
 `jl_...` calls, we have to tell Julia that we still hold a reference to Julia
 [root](https://www.cs.purdue.edu/homes/hosking/690M/p611-fenichel.pdf) values, a process
 called "GC rooting". Rooting a value will ensure that the garbage collector does not accidentally
@@ -271,9 +273,14 @@ The `JL_GC_POP` call releases the references established by the previous `JL_GC_
 before the scope is exited. That is, before the function returns, or control flow otherwise
 leaves the block in which the `JL_GC_PUSH` was invoked.
 
-Several Julia values can be pushed at once using the `JL_GC_PUSH2` , `JL_GC_PUSH3` , `JL_GC_PUSH4` ,
-`JL_GC_PUSH5` , and `JL_GC_PUSH6` macros. To push an array of Julia values one can use the
-`JL_GC_PUSHARGS` macro, which can be used as follows:
+Several Julia values can be pushed at once using the `JL_GC_PUSH2` to `JL_GC_PUSH6` macros:
+```
+JL_GC_PUSH2(&ret1, &ret2);
+// ...
+JL_GC_PUSH6(&ret1, &ret2, &ret3, &ret4, &ret5, &ret6);
+```
+
+To push an array of Julia values one can use the `JL_GC_PUSHARGS` macro, which can be used as follows:
 
 ```c
 jl_value_t **args;
@@ -284,8 +291,8 @@ args[1] = some_other_value;
 JL_GC_POP();
 ```
 
-Each scope must have only one call to `JL_GC_PUSH*`. Hence, if all variables cannot be pushed once by
-a single call to `JL_GC_PUSH*`, or if there are more than 6 variables to be pushed and using an array
+Each scope must have only one call to `JL_GC_PUSH*`, and should be paired with only a single `JL_GC_POP` call.
+If all necessary variables you want to root cannot be pushed by a one single call to `JL_GC_PUSH*`, or if there are more than 6 variables to be pushed and using an array
 of arguments is not an option, then one can use inner blocks:
 
 ```c
@@ -302,6 +309,19 @@ jl_value_t *ret2 = 0;
 JL_GC_POP();    // This pops ret1.
 ```
 
+Note that it is not necessary to have valid `jl_value_t*` values before calling
+`JL_GC_PUSH*`. It is fine to have a number of them initialized to `NULL`, pass those
+to `JL_GC_PUSH*` and then create the actual Julia values. For example:
+
+```
+jl_value_t *ret1 = NULL, *ret2 = NULL;
+JL_GC_PUSH2(&ret1, &ret2);
+ret1 = jl_eval_string("sqrt(2.0)");
+ret2 = jl_eval_string("sqrt(3.0)");
+// Use ret1 and ret2
+JL_GC_POP();
+```
+
 If it is required to hold the pointer to a variable between functions (or block scopes), then it is
 not possible to use `JL_GC_PUSH*`. In this case, it is necessary to create and keep a reference to the
 variable in the Julia global scope. One simple way to accomplish this is to use a global `IdDict` that
@@ -371,7 +391,8 @@ As an alternative for very simple cases, it is possible to just create a global
 per pointer using
 
 ```c
-jl_set_global(jl_main_module, jl_symbol("var"), var);
+jl_binding_t *bp = jl_get_binding_wr(jl_main_module, jl_symbol("var"), 1);
+jl_checked_assignment(bp, val);
 ```
 
 ### Updating fields of GC-managed objects
@@ -486,7 +507,7 @@ that creates a 2D array and accesses its properties:
 
 ```c
 // Create 2D array of float64 type
-jl_value_t *array_type = jl_apply_array_type(jl_float64_type, 2);
+jl_value_t *array_type = jl_apply_array_type((jl_value_t*)jl_float64_type, 2);
 jl_array_t *x  = jl_alloc_array_2d(array_type, 10, 5);
 
 // Get array pointer
@@ -551,3 +572,110 @@ jl_errorf("argument x = %d is too large", x);
 ```
 
 where in this example `x` is assumed to be an integer.
+
+### Thread-safety
+
+In general, the Julia C API is not fully thread-safe. When embedding Julia in a multi-threaded application care needs to be taken not to violate
+the following restrictions:
+
+* `jl_init()` may only be called once in the application life-time. The same applies to `jl_atexit_hook()`, and it may only be called after `jl_init()`.
+* `jl_...()` API functions may only be called from the thread in which `jl_init()` was called, *or from threads started by the Julia runtime*. Calling Julia API functions from user-started threads is not supported, and may lead to undefined behaviour and crashes.
+
+The second condition above implies that you can not safely call `jl_...()` functions from threads that were not started by Julia (the thread calling `jl_init()` being the exception). For example, the following is not supported and will most likely segfault:
+
+```c
+void *func(void*)
+{
+    // Wrong, jl_eval_string() called from thread that was not started by Julia
+    jl_eval_string("println(Threads.nthreads())");
+    return NULL;
+}
+
+int main()
+{
+    pthread_t t;
+
+    jl_init();
+
+    // Start a new thread
+    pthread_create(&t, NULL, func, NULL);
+    pthread_join(t, NULL);
+
+    jl_atexit_hook(0);
+}
+```
+
+Instead, performing all Julia calls from the same user-created thread will work:
+
+```c
+void *func(void*)
+{
+    // Okay, all jl_...() calls from the same thread,
+    // even though it is not the main application thread
+    jl_init();
+    jl_eval_string("println(Threads.nthreads())");
+    jl_atexit_hook(0);
+    return NULL;
+}
+
+int main()
+{
+    pthread_t t;
+    // Create a new thread, which runs func()
+    pthread_create(&t, NULL, func, NULL);
+    pthread_join(t, NULL);
+}
+```
+
+An example of calling the Julia C API from a thread started by Julia itself:
+
+```c
+#include <julia/julia.h>
+JULIA_DEFINE_FAST_TLS
+
+double c_func(int i)
+{
+    printf("[C %08x] i = %d\n", pthread_self(), i);
+
+    // Call the Julia sqrt() function to compute the square root of i, and return it
+    jl_function_t *sqrt = jl_get_function(jl_base_module, "sqrt");
+    jl_value_t* arg = jl_box_int32(i);
+    double ret = jl_unbox_float64(jl_call1(sqrt, arg));
+
+    return ret;
+}
+
+int main()
+{
+    jl_init();
+
+    // Define a Julia function func() that calls our c_func() defined in C above
+    jl_eval_string("func(i) = ccall(:c_func, Float64, (Int32,), i)");
+
+    // Call func() multiple times, using multiple threads to do so
+    jl_eval_string("println(Threads.nthreads())");
+    jl_eval_string("use(i) = println(\"[J $(Threads.threadid())] i = $(i) -> $(func(i))\")");
+    jl_eval_string("Threads.@threads for i in 1:5 use(i) end");
+
+    jl_atexit_hook(0);
+}
+```
+
+If we run this code with 2 Julia threads we get the following output (note: the output will vary per run and system):
+
+```sh
+$ JULIA_NUM_THREADS=2 ./thread_example
+2
+[C 3bfd9c00] i = 1
+[C 23938640] i = 4
+[J 1] i = 1 -> 1.0
+[C 3bfd9c00] i = 2
+[J 1] i = 2 -> 1.4142135623730951
+[C 3bfd9c00] i = 3
+[J 2] i = 4 -> 2.0
+[C 23938640] i = 5
+[J 1] i = 3 -> 1.7320508075688772
+[J 2] i = 5 -> 2.23606797749979
+```
+
+As can be seen, Julia thread 1 corresponds to pthread ID 3bfd9c00, and Julia thread 2 corresponds to ID 23938640, showing that indeed multiple threads are used at the C level, and that we can safely call Julia C API routines from those threads.
diff --git a/doc/src/manual/environment-variables.md b/doc/src/manual/environment-variables.md
index 75079107ea6daf..bc4a742365d692 100644
--- a/doc/src/manual/environment-variables.md
+++ b/doc/src/manual/environment-variables.md
@@ -80,7 +80,7 @@ Setting this environment variable has the same effect as specifying the `--proje
 start-up option, but `--project` has higher precedence. If the variable is set to `@.`
 then Julia tries to find a project directory that contains `Project.toml` or
 `JuliaProject.toml` file from the current directory and its parents. See also
-the chapter on [Code Loading](@ref).
+the chapter on [Code Loading](@ref code-loading).
 
 !!! note
 
@@ -91,7 +91,7 @@ the chapter on [Code Loading](@ref).
 
 The `JULIA_LOAD_PATH` environment variable is used to populate the global Julia
 [`LOAD_PATH`](@ref) variable, which determines which packages can be loaded via
-`import` and `using` (see [Code Loading](@ref)).
+`import` and `using` (see [Code Loading](@ref code-loading)).
 
 Unlike the shell `PATH` variable, empty entries in `JULIA_LOAD_PATH` are expanded to
 the default value of `LOAD_PATH`, `["@", "@v#.#", "@stdlib"]` when populating
@@ -111,6 +111,11 @@ This behavior was chosen so that it would be possible to set an empty load path
 the environment variable. If you want the default load path, either unset the
 environment variable or if it must have a value, set it to the string `:`.
 
+!!! note
+
+    On Windows, path elements are separated by the `;` character, as is the case with
+    most path lists on Windows. Replace `:` with `;` in the above paragraph.
+
 ### `JULIA_DEPOT_PATH`
 
 The `JULIA_DEPOT_PATH` environment variable is used to populate the global Julia
@@ -137,6 +142,17 @@ chosen so that it would be possible to set an empty depot path via the environme
 variable. If you want the default depot path, either unset the environment variable
 or if it must have a value, set it to the string `:`.
 
+!!! note
+
+    On Windows, path elements are separated by the `;` character, as is the case with
+    most path lists on Windows. Replace `:` with `;` in the above paragraph.
+
+!!! note
+    `JULIA_DEPOT_PATH` must be defined before starting julia; defining it in
+    `startup.jl` is too late in the startup process; at that point you can instead
+    directly modify the `DEPOT_PATH` array, which is populated from the environment
+    variable.
+
 ### `JULIA_HISTORY`
 
 The absolute path `REPL.find_hist_file()` of the REPL's history file. If
@@ -146,6 +162,89 @@ The absolute path `REPL.find_hist_file()` of the REPL's history file. If
 $(DEPOT_PATH[1])/logs/repl_history.jl
 ```
 
+### `JULIA_MAX_NUM_PRECOMPILE_FILES`
+
+Sets the maximum number of different instances of a single package that are to be stored in the precompile cache (default = 10).
+
+## Pkg.jl
+
+### `JULIA_CI`
+
+If set to `true`, this indicates to the package server that any package operations are part of a continuous integration (CI) system for the purposes of gathering package usage statistics.
+
+### `JULIA_NUM_PRECOMPILE_TASKS`
+
+The number of parallel tasks to use when precompiling packages. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
+
+### `JULIA_PKG_DEVDIR`
+
+The default directory used by [`Pkg.develop`](https://pkgdocs.julialang.org/v1/api/#Pkg.develop) for downloading packages.
+
+### `JULIA_PKG_IGNORE_HASHES`
+
+If set to `1`, this will ignore incorrect hashes in artifacts. This should be used carefully, as it disables verification of downloads, but can resolve issues when moving files across different types of file systems. See [Pkg.jl issue #2317](https://github.com/JuliaLang/Pkg.jl/issues/2317) for more details.
+
+!!! compat "Julia 1.6"
+    This is only supported in Julia 1.6 and above.
+
+### `JULIA_PKG_OFFLINE`
+
+If set to `true`, this will enable offline mode: see [`Pkg.offline`](https://pkgdocs.julialang.org/v1/api/#Pkg.offline).
+
+!!! compat "Julia 1.5"
+    Pkg's offline mode requires Julia 1.5 or later.
+
+### `JULIA_PKG_PRECOMPILE_AUTO`
+
+If set to `0`, this will disable automatic precompilation by package actions which change the manifest. See [`Pkg.precompile`](https://pkgdocs.julialang.org/v1/api/#Pkg.precompile).
+
+### `JULIA_PKG_SERVER`
+
+Specifies the URL of the package registry to use. By default, `Pkg` uses
+`https://pkg.julialang.org` to fetch Julia packages. In addition, you can disable the use of the PkgServer
+protocol, and instead access the packages directly from their hosts (GitHub, GitLab, etc.)
+by setting: ``` export JULIA_PKG_SERVER="" ```
+
+### `JULIA_PKG_SERVER_REGISTRY_PREFERENCE`
+
+Specifies the preferred registry flavor. Currently supported values are `conservative`
+(the default), which will only publish resources that have been processed by the storage
+server (and thereby have a higher probability of being available from the PkgServers),
+whereas `eager` will publish registries whose resources have not necessarily been
+processed by the storage servers.  Users behind restrictive firewalls that do not allow
+downloading from arbitrary servers should not use the `eager` flavor.
+
+!!! compat "Julia 1.7"
+    This only affects Julia 1.7 and above.
+
+### `JULIA_PKG_UNPACK_REGISTRY`
+
+If set to `true`, this will unpack the registry instead of storing it as a compressed tarball.
+
+!!! compat "Julia 1.7"
+    This only affects Julia 1.7 and above. Earlier versions will always unpack the registry.
+
+### `JULIA_PKG_USE_CLI_GIT`
+
+If set to `true`, Pkg operations which use the git protocol will use an external `git` executable instead of the default libgit2 library.
+
+!!! compat "Julia 1.7"
+    Use of the `git` executable is only supported on Julia 1.7 and above.
+
+### `JULIA_PKGRESOLVE_ACCURACY`
+
+The accuracy of the package resolver. This should be a positive integer, the default is `1`.
+
+## Network transport
+
+### `JULIA_NO_VERIFY_HOSTS` / `JULIA_SSL_NO_VERIFY_HOSTS` / `JULIA_SSH_NO_VERIFY_HOSTS` / `JULIA_ALWAYS_VERIFY_HOSTS`
+
+Specify hosts whose identity should or should not be verified for specific transport layers. See [`NetworkOptions.verify_host`](https://github.com/JuliaLang/NetworkOptions.jl#verify_host)
+
+### `JULIA_SSL_CA_ROOTS_PATH`
+
+Specify the file or directory containing the certificate authority roots. See [`NetworkOptions.ca_roots`](https://github.com/JuliaLang/NetworkOptions.jl#ca_roots)
+
 ## External applications
 
 ### `JULIA_SHELL`
@@ -169,6 +268,8 @@ over `$EDITOR`. If none of these environment variables is set, then the editor
 is taken to be `open` on Windows and OS X, or `/etc/alternatives/editor` if it
 exists, or `emacs` otherwise.
 
+To use Visual Studio Code on Windows, set `$JULIA_EDITOR` to `code.cmd`.
+
 ## Parallelization
 
 ### `JULIA_CPU_THREADS`
@@ -185,20 +286,24 @@ a master process to establish a connection before dying.
 ### [`JULIA_NUM_THREADS`](@id JULIA_NUM_THREADS)
 
 An unsigned 64-bit integer (`uint64_t`) that sets the maximum number of threads
-available to Julia. If `$JULIA_NUM_THREADS` exceeds the number of available
-CPU threads (logical cores), then the number of threads is set to the number of CPU threads. If
-`$JULIA_NUM_THREADS` is not positive or is not set, or if the number of CPU
-threads cannot be determined through system calls, then the number of threads is
-set to `1`.
+available to Julia.  If `$JULIA_NUM_THREADS` is not positive or is not set, or
+if the number of CPU threads cannot be determined through system calls, then the
+number of threads is set to `1`.
 
-!!! note
+If `$JULIA_NUM_THREADS` is set to `auto`, then the number of threads will be set
+to the number of CPU threads.
 
-    `JULIA_NUM_THREADS` must be defined before starting julia; defining it in `startup.jl` is too late in the startup process.
+!!! note
+    `JULIA_NUM_THREADS` must be defined before starting julia; defining it in
+    `startup.jl` is too late in the startup process.
 
 !!! compat "Julia 1.5"
     In Julia 1.5 and above the number of threads can also be specified on startup
     using the `-t`/`--threads` command line argument.
 
+!!! compat "Julia 1.7"
+    The `auto` value for `$JULIA_NUM_THREADS` requires Julia 1.7 or above.
+
 ### `JULIA_THREAD_SLEEP_THRESHOLD`
 
 If set to a string that starts with the case-insensitive substring `"infinite"`,
@@ -216,7 +321,7 @@ affinitized. Otherwise, Julia lets the operating system handle thread policy.
 
 Environment variables that determine how REPL output should be formatted at the
 terminal. Generally, these variables should be set to [ANSI terminal escape
-sequences](http://ascii-table.com/ansi-escape-sequences.php). Julia provides
+sequences](https://en.wikipedia.org/wiki/ANSI_escape_code). Julia provides
 a high-level interface with much of the same functionality; see the section on
 [The Julia REPL](@ref).
 
@@ -249,7 +354,7 @@ should have at the terminal.
 
 ### `JULIA_DEBUG`
 
-Enable debug logging for a file or module, see [`Logging`](@ref Logging) for more information.
+Enable debug logging for a file or module, see [`Logging`](@ref man-logging) for more information.
 
 ### `JULIA_GC_ALLOC_POOL`, `JULIA_GC_ALLOC_OTHER`, `JULIA_GC_ALLOC_PRINT`
 
@@ -311,11 +416,17 @@ event listener for just-in-time (JIT) profiling.
     profiling support, using either
     * Intel's [VTune™ Amplifier](https://software.intel.com/en-us/vtune)
       (`USE_INTEL_JITEVENTS` set to `1` in the build configuration), or
-    * [OProfile](http://oprofile.sourceforge.net/news/) (`USE_OPROFILE_JITEVENTS` set to `1`
+    * [OProfile](https://oprofile.sourceforge.io/news/) (`USE_OPROFILE_JITEVENTS` set to `1`
       in the build configuration).
+    * [Perf](https://perf.wiki.kernel.org) (`USE_PERF_JITEVENTS` set to `1`
+      in the build configuration). This integration is enabled by default.
 
-### `JULIA_LLVM_ARGS`
+### `ENABLE_GDBLISTENER`
 
-Arguments to be passed to the LLVM backend.
+If set to anything besides `0` enables GDB registration of Julia code on release builds.
+On debug builds of Julia this is always enabled. Recommended to use with `-g 2`.
 
 
+### `JULIA_LLVM_ARGS`
+
+Arguments to be passed to the LLVM backend.
diff --git a/doc/src/manual/faq.md b/doc/src/manual/faq.md
index 03a0be4a64acd9..ac7e6e81e5d700 100644
--- a/doc/src/manual/faq.md
+++ b/doc/src/manual/faq.md
@@ -18,6 +18,33 @@ For similar reasons, automated translation to Julia would also typically generat
 
 On the other hand, language *interoperability* is extremely useful: we want to exploit existing high-quality code in other languages from Julia (and vice versa)!  The best way to enable this is not a transpiler, but rather via easy inter-language calling facilities.  We have worked hard on this, from the built-in `ccall` intrinsic (to call C and Fortran libraries) to [JuliaInterop](https://github.com/JuliaInterop) packages that connect Julia to Python, Matlab, C++, and more.
 
+## [Public API](@id man-api)
+
+### How does Julia define its public API?
+
+The only interfaces that are stable with respect to [SemVer](https://semver.org/) of `julia`
+version are the Julia `Base` and standard libraries interfaces described in
+[the documentation](https://docs.julialang.org/) and not marked as unstable (e.g.,
+experimental and internal).  Functions, types, and constants are not part of the public
+API if they are not included in the documentation, _even if they have docstrings_.
+
+### There is a useful undocumented function/type/constant. Can I use it?
+
+Updating Julia may break your code if you use non-public API.  If the code is
+self-contained, it may be a good idea to copy it into your project.  If you want to rely on
+a complex non-public API, especially when using it from a stable package, it is a good idea
+to open an [issue](https://github.com/JuliaLang/julia/issues) or
+[pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning it
+into a public API.  However, we do not discourage the attempt to create packages that expose
+stable public interfaces while relying on non-public implementation details of `julia` and
+buffering the differences across different `julia` versions.
+
+### The documentation is not accurate enough. Can I rely on the existing behavior?
+
+Please open an [issue](https://github.com/JuliaLang/julia/issues) or
+[pull request](https://github.com/JuliaLang/julia/pulls) to start a discussion for turning the
+existing behavior into a public API.
+
 ## Sessions and the REPL
 
 ### How do I delete an object in memory?
@@ -28,7 +55,7 @@ session (technically, in module `Main`), it is always present.
 If memory usage is your concern, you can always replace objects with ones that consume less memory.
  For example, if `A` is a gigabyte-sized array that you no longer need, you can free the memory
 with `A = nothing`.  The memory will be released the next time the garbage collector runs; you can force
-this to happen with [`gc()`](@ref Base.GC.gc). Moreover, an attempt to use `A` will likely result in an error, because most methods are not defined on type `Nothing`.
+this to happen with [`GC.gc()`](@ref Base.GC.gc). Moreover, an attempt to use `A` will likely result in an error, because most methods are not defined on type `Nothing`.
 
 ### How can I modify the declaration of a type in my session?
 
@@ -114,6 +141,55 @@ parsing the file once it reaches to the `exec` statement.
     ```
     instead. Note that with this strategy [`PROGRAM_FILE`](@ref) will not be set.
 
+### Why doesn't `run` support `*` or pipes for scripting external programs?
+
+Julia's [`run`](@ref) function launches external programs *directly*, without
+invoking an [operating-system shell](https://en.wikipedia.org/wiki/Shell_(computing))
+(unlike the `system("...")` function in other languages like Python, R, or C).
+That means that `run` does not perform wildcard expansion of `*` (["globbing"](https://en.wikipedia.org/wiki/Glob_(programming))),
+nor does it interpret [shell pipelines](https://en.wikipedia.org/wiki/Pipeline_(Unix)) like `|` or `>`.
+
+You can still do globbing and pipelines using Julia features, however.  For example, the built-in
+[`pipeline`](@ref) function allows you to chain external programs and files, similar to shell pipes, and
+the [Glob.jl package](https://github.com/vtjnash/Glob.jl) implements POSIX-compatible globbing.
+
+You can, of course, run programs through the shell by explicitly passing a shell and a command string to `run`,
+e.g. ```run(`sh -c "ls > files.txt"`)``` to use the Unix [Bourne shell](https://en.wikipedia.org/wiki/Bourne_shell),
+but you should generally prefer pure-Julia scripting like ```run(pipeline(`ls`, "files.txt"))```.
+The reason why we avoid the shell by default is that [shelling out sucks](https://julialang.org/blog/2012/03/shelling-out-sucks/):
+launching processes via the shell is slow, fragile to quoting of special characters,  has poor error handling, and is
+problematic for portability.  (The Python developers came to a [similar conclusion](https://www.python.org/dev/peps/pep-0324/#motivation).)
+
+## Variables and Assignments
+
+### Why am I getting `UndefVarError` from a simple loop?
+
+You might have something like:
+```
+x = 0
+while x < 10
+    x += 1
+end
+```
+and notice that it works fine in an interactive environment (like the Julia REPL),
+but gives `UndefVarError: x not defined` when you try to run it in script or other
+file.   What is going on is that Julia generally requires you to **be explicit about assigning to global variables in a local scope**.
+
+Here, `x` is a global variable, `while` defines a [local scope](@ref scope-of-variables), and `x += 1` is
+an assignment to a global in that local scope.
+
+As mentioned above, Julia (version 1.5 or later) allows you to omit the `global`
+keyword for code in the REPL (and many other interactive environments), to simplify
+exploration (e.g. copy-pasting code from a function to run interactively).
+However, once you move to code in files, Julia requires a more disciplined approach
+to global variables.  You have least three options:
+
+1. Put the code into a function (so that `x` is a *local* variable in a function). In general, it is good software engineering to use functions rather than global scripts (search online for "why global variables bad" to see many explanations). In Julia, global variables are also [slow](@ref man-performance-tips).
+2. Wrap the code in a [`let`](@ref) block.  (This makes `x` a local variable within the `let ... end` statement, again eliminating the need for `global`).
+3. Explicitly mark `x` as `global` inside the local scope before assigning to it, e.g. write `global x += 1`.
+
+More explanation can be found in the manual section [on soft scope](@ref on-soft-scope).
+
 ## Functions
 
 ### I passed an argument `x` to a function, modified it inside that function, but on the outside, the variable `x` is still unchanged. Why?
@@ -207,12 +283,12 @@ have two options:
 
 ### What does the `...` operator do?
 
-### The two uses of the `...` operator: slurping and splatting
+#### The two uses of the `...` operator: slurping and splatting
 
 Many newcomers to Julia find the use of `...` operator confusing. Part of what makes the `...`
 operator confusing is that it means two different things depending on context.
 
-### `...` combines many arguments into one argument in function definitions
+#### `...` combines many arguments into one argument in function definitions
 
 In the context of function definitions, the `...` operator is used to combine many different arguments
 into a single argument. This use of `...` for combining many different arguments into a single
@@ -237,7 +313,7 @@ Arg #3 = 3
 If Julia were a language that made more liberal use of ASCII characters, the slurping operator
 might have been written as `<-...` instead of `...`.
 
-### `...` splits one argument into many different arguments in function calls
+#### `...` splits one argument into many different arguments in function calls
 
 In contrast to the use of the `...` operator to denote slurping many different arguments into
 one argument when defining a function, the `...` operator is also used to cause a single function
@@ -293,23 +369,23 @@ julia> threefloat()
 and similarly:
 
 ```jldoctest
-julia> function threetup()
-           x, y = [3, 3]
+julia> function twothreetup()
+           x, y = [2, 3] # assigns 2 to x and 3 to y
            x, y # returns a tuple
        end
-threetup (generic function with 1 method)
+twothreetup (generic function with 1 method)
 
-julia> function threearr()
-           x, y = [3, 3] # returns an array
+julia> function twothreearr()
+           x, y = [2, 3] # returns an array
        end
-threearr (generic function with 1 method)
+twothreearr (generic function with 1 method)
 
-julia> threetup()
-(3, 3)
+julia> twothreetup()
+(2, 3)
 
-julia> threearr()
+julia> twothreearr()
 2-element Vector{Int64}:
- 3
+ 2
  3
 ```
 
@@ -367,7 +443,7 @@ julia> sqrt(-2.0+0im)
 ### How can I constrain or compute type parameters?
 
 The parameters of a [parametric type](@ref Parametric-Types) can hold either
-types or bits values, and the type itself chooses how it makes use of these parameters.
+types or bits values, and the type itself chooses how it makes use of these parameters.
 For example, `Array{Float64, 2}` is parameterized by the type `Float64` to express its
 element type and the integer value `2` to express its number of dimensions.  When
 defining your own parametric type, you can use subtype constraints to declare that a
@@ -621,7 +697,7 @@ the loop, but it cannot algebraically reduce multiple operations into fewer equi
 
 The most reasonable alternative to having integer arithmetic silently overflow is to do checked
 arithmetic everywhere, raising errors when adds, subtracts, and multiplies overflow, producing
-values that are not value-correct. In this [blog post](http://danluu.com/integer-overflow/), Dan
+values that are not value-correct. In this [blog post](https://danluu.com/integer-overflow/), Dan
 Luu analyzes this and finds that rather than the trivial cost that this approach should in theory
 have, it ends up having a substantial cost due to compilers (LLVM and GCC) not gracefully optimizing
 around the added overflow checks. If this improves in the future, we could consider defaulting
@@ -714,6 +790,32 @@ julia> remotecall_fetch(anon_bar, 2)
 1
 ```
 
+## Troubleshooting "method not matched": parametric type invariance and `MethodError`s
+
+### Why doesn't it work to declare `foo(bar::Vector{Real}) = 42` and then call `foo([1])`?
+
+As you'll see if you try this, the result is a `MethodError`:
+
+```jldoctest
+julia> foo(x::Vector{Real}) = 42
+foo (generic function with 1 method)
+
+julia> foo([1])
+ERROR: MethodError: no method matching foo(::Vector{Int64})
+Closest candidates are:
+  foo(!Matched::Vector{Real}) at none:1
+```
+
+This is because `Vector{Real}` is not a supertype of `Vector{Int}`! You can solve this problem with something
+like `foo(bar::Vector{T}) where {T<:Real}` (or the short form `foo(bar::Vector{<:Real})` if the static parameter `T`
+is not needed in the body of the function). The `T` is a wild card: you first specify that it must be a
+subtype of Real, then specify the function takes a Vector of with elements of that type.
+
+This same issue goes for any composite type `Comp`, not just `Vector`. If `Comp` has a parameter declared of
+type `Y`, then another type `Comp2` with a parameter of type `X<:Y` is not a subtype of `Comp`. This is
+type-invariance (by contrast, Tuple is type-covariant in its parameters). See [Parametric Composite
+Types](@ref man-parametric-composite-types) for more explanation of these.
+
 ### Why does Julia use `*` for string concatenation? Why not `+` or something else?
 
 The [main argument](@ref man-concatenation) against `+` is that string concatenation is not
@@ -781,9 +883,10 @@ no values and no subtypes (except itself). You will generally not need to use th
 
 ### Why does `x += y` allocate memory when `x` and `y` are arrays?
 
-In Julia, `x += y` gets replaced during parsing by `x = x + y`. For arrays, this has the consequence
+In Julia, `x += y` gets replaced during lowering by `x = x + y`. For arrays, this has the consequence
 that, rather than storing the result in the same location in memory as `x`, it allocates a new
-array to store the result.
+array to store the result. If you prefer to mutate `x`, use `x .+= y` to update each element
+individually.
 
 While this behavior might surprise some, the choice is deliberate. The main reason is the presence
 of immutable objects within Julia, which cannot change their value once created.  Indeed, a
@@ -816,8 +919,8 @@ After a call like `x = 5; y = power_by_squaring(x, 4)`, you would get the expect
     `x`, after the call you'd have (in general) `y != x`, but for mutable `x` you'd have `y == x`.
 
 Because supporting generic programming is deemed more important than potential performance optimizations
-that can be achieved by other means (e.g., using explicit loops), operators like `+=` and `*=`
-work by rebinding new values.
+that can be achieved by other means (e.g., using broadcasting or explicit loops), operators like `+=` and
+`*=` work by rebinding new values.
 
 ## [Asynchronous IO and concurrent synchronous writes](@id faq-async-io)
 
@@ -869,7 +972,7 @@ julia> @sync for i in 1:3
 
 ## Arrays
 
-### What are the differences between zero-dimensional arrays and scalars?
+### [What are the differences between zero-dimensional arrays and scalars?](@id faq-array-0dim)
 
 Zero-dimensional arrays are arrays of the form `Array{T,0}`. They behave similar
 to scalars, but there are important differences. They deserve a special mention
@@ -959,15 +1062,15 @@ The Stable version of Julia is the latest released version of Julia, this is the
 It has the latest features, including improved performance.
 The Stable version of Julia is versioned according to [SemVer](https://semver.org/) as v1.x.y.
 A new minor release of Julia corresponding to a new Stable version is made approximately every 4-5 months after a few weeks of testing as a release candidate.
-Unlike the LTS version the a Stable version will not normally recieve bugfixes after another Stable version of Julia has been released.
+Unlike the LTS version the a Stable version will not normally receive bugfixes after another Stable version of Julia has been released.
 However, upgrading to the next Stable release will always be possible as each release of Julia v1.x will continue to run code written for earlier versions.
 
 You may prefer the LTS (Long Term Support) version of Julia if you are looking for a very stable code base.
 The current LTS version of Julia is versioned according to SemVer as v1.0.x;
-this branch will continue to recieve bugfixes until a new LTS branch is chosen, at which point the v1.0.x series will no longer recieved regular bug fixes and all but the most conservative users will be advised to upgrade to the new LTS version series.
+this branch will continue to receive bugfixes until a new LTS branch is chosen, at which point the v1.0.x series will no longer received regular bug fixes and all but the most conservative users will be advised to upgrade to the new LTS version series.
 As a package developer, you may prefer to develop for the LTS version, to maximize the number of users who can use your package.
 As per SemVer, code written for v1.0 will continue to work for all future LTS and Stable versions.
-In general, even if targetting the LTS, one can develop and run code in the latest Stable version, to take advantage of the improved performance; so long as one avoids using new features (such as added library functions or new methods).
+In general, even if targeting the LTS, one can develop and run code in the latest Stable version, to take advantage of the improved performance; so long as one avoids using new features (such as added library functions or new methods).
 
 You may prefer the nightly version of Julia if you want to take advantage of the latest updates to the language, and don't mind if the version available today occasionally doesn't actually work.
 As the name implies, releases to the nightly version are made roughly every night (depending on build infrastructure stability).
diff --git a/doc/src/manual/functions.md b/doc/src/manual/functions.md
index 6dcec615d29948..2724fa32ec3821 100644
--- a/doc/src/manual/functions.md
+++ b/doc/src/manual/functions.md
@@ -63,6 +63,23 @@ are identical to the passed values. Modifications to mutable values (such as `Ar
 a function will be visible to the caller. This is the same behavior found in Scheme, most Lisps,
 Python, Ruby and Perl, among other dynamic languages.
 
+## Argument-type declarations
+
+You can declare the types of function arguments by appending `::TypeName` to the argument name, as usual for [Type Declarations](@ref) in Julia.
+For example, the following function computes [Fibonacci numbers](https://en.wikipedia.org/wiki/Fibonacci_number) recursively:
+```
+fib(n::Integer) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)
+```
+and the `::Integer` specification means that it will only be callable when `n` is a subtype of the [abstract](@ref man-abstract-types) `Integer` type.
+
+Argument-type declarations **normally have no impact on performance**: regardless of what argument types (if any) are declared, Julia compiles a specialized version of the function for the actual argument types passed by the caller.   For example, calling `fib(1)` will trigger the compilation of specialized version of `fib` optimized specifically for `Int` arguments, which is then re-used if `fib(7)` or `fib(15)` are called.  (There are rare exceptions when an argument-type declaration can trigger additional compiler specializations; see: [Be aware of when Julia avoids specializing](@ref).)  The most common reasons to declare argument types in Julia are, instead:
+
+* **Dispatch:** As explained in [Methods](@ref), you can have different versions ("methods") of a function for different argument types, in which case the argument types are used to determine which implementation is called for which arguments.  For example, you might implement a completely different algorithm `fib(x::Number) = ...` that works for any `Number` type by using [Binet's formula](https://en.wikipedia.org/wiki/Fibonacci_number#Binet%27s_formula) to extend it to non-integer values.
+* **Correctness:** Type declarations can be useful if your function only returns correct results for certain argument types.  For example, if we omitted argument types and wrote `fib(n) = n ≤ 2 ? one(n) : fib(n-1) + fib(n-2)`, then `fib(1.5)` would silently give us the nonsensical answer `1.0`.
+* **Clarity:** Type declarations can serve as a form of documentation about the expected arguments.
+
+However, it is a **common mistake to overly restrict the argument types**, which can unnecessarily limit the applicability of the function and prevent it from being re-used in circumstances you did not anticipate.    For example, the `fib(n::Integer)` function above works equally well for `Int` arguments (machine integers) and `BigInt` arbitrary-precision integers (see [BigFloats and BigInts](@ref BigFloats-and-BigInts)), which is especially useful because Fibonacci numbers grow exponentially rapidly and will quickly overflow any fixed-precision type like `Int` (see [Overflow behavior](@ref)).  If we had declared our function as `fib(n::Int)`, however, the application to `BigInt` would have been prevented for no reason.   In general, you should use the most general applicable abstract types for arguments, and **when in doubt, omit the argument types**.  You can always add argument-type specifications later if they become necessary, and you don't sacrifice performance or functionality by omitting them.
+
 ## The `return` Keyword
 
 The value returned by a function is the value of the last expression evaluated, which, by default,
@@ -146,6 +163,10 @@ Int8
 This function will always return an `Int8` regardless of the types of `x` and `y`.
 See [Type Declarations](@ref) for more on return types.
 
+Return type declarations are **rarely used** in Julia: in general, you should
+instead write "type-stable" functions in which Julia's compiler can automatically
+infer the return type.  For more information, see the [Performance Tips](@ref man-performance-tips) chapter.
+
 ### Returning nothing
 
 For functions that do not need to return a value (functions used only for some side effects),
@@ -159,7 +180,7 @@ end
 ```
 
 This is a *convention* in the sense that `nothing` is not a Julia keyword
-but a only singleton object of type `Nothing`.
+but only a singleton object of type `Nothing`.
 Also, you may notice that the `printx` function example above is contrived,
 because `println` already returns `nothing`, so that the `return` line is redundant.
 
@@ -331,12 +352,26 @@ Named tuples are very similar to tuples, except that fields can additionally be
 using dot syntax (`x.a`) in addition to the regular indexing syntax
 (`x[1]`).
 
-## Multiple Return Values
+## [Destructuring Assignment and Multiple Return Values](@id destructuring-assignment)
+
+A comma-separated list of variables (optionally wrapped in parentheses) can appear on the
+left side of an assignment: the value on the right side is _destructured_ by iterating
+over and assigning to each variable in turn:
+
+```jldoctest
+julia> (a,b,c) = 1:3
+1:3
+
+julia> b
+2
+```
+
+The value on the right should be an iterator (see [Iteration interface](@ref man-interface-iteration))
+at least as long as the number of variables on the left (any excess elements of the
+iterator are ignored).
 
-In Julia, one returns a tuple of values to simulate returning multiple values. However, tuples
-can be created and destructured without needing parentheses, thereby providing an illusion that
-multiple values are being returned, rather than a single tuple value. For example, the following
-function returns a pair of values:
+This can be used to return multiple values from functions by returning a tuple or
+other iterable value. For example, the following function returns two values:
 
 ```jldoctest foofunc
 julia> function foo(a,b)
@@ -353,8 +388,7 @@ julia> foo(2,3)
 (5, 6)
 ```
 
-A typical usage of such a pair of return values, however, extracts each value into a variable.
-Julia supports simple tuple "destructuring" that facilitates this:
+Destructuring assignment extracts each value into a variable:
 
 ```jldoctest foofunc
 julia> x, y = foo(2,3)
@@ -367,23 +401,155 @@ julia> y
 6
 ```
 
-You can also return multiple values using the `return` keyword:
+Another common use is for swapping variables:
+```jldoctest foofunc
+julia> y, x = x, y
+(5, 6)
 
-```julia
-function foo(a,b)
-    return a+b, a*b
-end
+julia> x
+6
+
+julia> y
+5
+```
+
+If only a subset of the elements of the iterator are required, a common convention is to assign ignored elements to a variable
+consisting of only underscores `_` (which is an otherwise invalid variable name, see
+[Allowed Variable Names](@ref man-allowed-variable-names)):
+
+```jldoctest
+julia> _, _, _, d = 1:10
+1:10
+
+julia> d
+4
+```
+
+Other valid left-hand side expressions can be used as elements of the assignment list, which will call [`setindex!`](@ref) or [`setproperty!`](@ref), or recursively destructure individual elements of the iterator:
+
+```jldoctest
+julia> X = zeros(3);
+
+julia> X[1], (a,b) = (1, (2, 3))
+(1, (2, 3))
+
+julia> X
+3-element Vector{Float64}:
+ 1.0
+ 0.0
+ 0.0
+
+julia> a
+2
+
+julia> b
+3
+```
+
+!!! compat "Julia 1.6"
+    `...` with assignment requires Julia 1.6
+
+If the last symbol in the assignment list is suffixed by `...` (known as _slurping_), then
+it will be assigned a collection or lazy iterator of the remaining elements of the
+right-hand side iterator:
+
+```jldoctest
+julia> a, b... = "hello"
+"hello"
+
+julia> a
+'h': ASCII/Unicode U+0068 (category Ll: Letter, lowercase)
+
+julia> b
+"ello"
+
+julia> a, b... = Iterators.map(abs2, 1:4)
+Base.Generator{UnitRange{Int64}, typeof(abs2)}(abs2, 1:4)
+
+julia> a
+1
+
+julia> b
+Base.Iterators.Rest{Base.Generator{UnitRange{Int64}, typeof(abs2)}, Int64}(Base.Generator{UnitRange{Int64}, typeof(abs2)}(abs2, 1:4), 1)
 ```
 
-This has the exact same effect as the previous definition of `foo`.
+See [`Base.rest`](@ref) for details on the precise handling and customization for specific iterators.
+
+!!! compat "Julia 1.9"
+    `...` in non-final position of an assignment requires Julia 1.9
+
+Slurping in assignments can also occur in any other position. As opposed to slurping the end
+of a collection however, this will always be eager.
+
+```jldoctest
+julia> a, b..., c = 1:5
+1:5
 
-## Argument destructuring
+julia> a
+1
+
+julia> b
+3-element Vector{Int64}:
+ 2
+ 3
+ 4
+
+julia> c
+5
+
+julia> front..., tail = "Hi!"
+"Hi!"
+
+julia> front
+"Hi"
+
+julia> tail
+'!': ASCII/Unicode U+0021 (category Po: Punctuation, other)
+```
+
+This is implemented in terms of the function [`Base.split_rest`](@ref).
+
+Note that for variadic function definitions, slurping is still only allowed in final position.
+This does not apply to [single argument destructuring](@ref man-argument-destructuring) though,
+as that does not affect method dispatch:
+
+```jldoctest
+julia> f(x..., y) = x
+ERROR: syntax: invalid "..." on non-final argument
+Stacktrace:
+[...]
+
+julia> f((x..., y)) = x
+f (generic function with 1 method)
+
+julia> f((1, 2, 3))
+(1, 2)
+```
+
+## Property destructuring
+
+Instead of destructuring based on iteration, the right side of assignments can also be destructured using property names.
+This follows the syntax for NamedTuples, and works by assigning to each variable on the left a
+property of the right side of the assignment with the same name using `getproperty`:
+
+```jldoctest
+julia> (; b, a) = (a=1, b=2, c=3)
+(a = 1, b = 2, c = 3)
+
+julia> a
+1
+
+julia> b
+2
+```
+
+## [Argument destructuring](@id man-argument-destructuring)
 
 The destructuring feature can also be used within a function argument.
 If a function argument name is written as a tuple (e.g. `(x, y)`) instead of just
 a symbol, then an assignment `(x, y) = argument` will be inserted for you:
 
-```julia
+```julia-repl
 julia> minmax(x, y) = (y < x) ? (y, x) : (x, y)
 
 julia> gap((min, max)) = max - min
@@ -395,6 +561,33 @@ julia> gap(minmax(10, 2))
 Notice the extra set of parentheses in the definition of `gap`. Without those, `gap`
 would be a two-argument function, and this example would not work.
 
+Similarly, property destructuring can also be used for function arguments:
+
+```julia-repl
+julia> foo((; x, y)) = x + y
+foo (generic function with 1 method)
+
+julia> foo((x=1, y=2))
+3
+
+julia> struct A
+           x
+           y
+       end
+
+julia> foo(A(3, 4))
+7
+```
+
+For anonymous functions, destructuring a single argument requires an extra comma:
+
+```
+julia> map(((x,y),) -> x + y, [(1,2), (3,4)])
+2-element Array{Int64,1}:
+ 3
+ 7
+```
+
 ## Varargs Functions
 
 It is often convenient to be able to write functions taking an arbitrary number of arguments.
@@ -681,8 +874,8 @@ end
 ```
 
 The `do x` syntax creates an anonymous function with argument `x` and passes it as the first argument
-to [`map`](@ref). Similarly, `do a,b` would create a two-argument anonymous function, and a
-plain `do` would declare that what follows is an anonymous function of the form `() -> ...`.
+to [`map`](@ref). Similarly, `do a,b` would create a two-argument anonymous function. Note that `do (a,b)` would create a one-argument anonymous function,
+whose argument is a tuple to be deconstructed. A plain `do` would declare that what follows is an anonymous function of the form `() -> ...`.
 
 How these arguments are initialized depends on the "outer" function; here, [`map`](@ref) will
 sequentially set `x` to `A`, `B`, `C`, calling the anonymous function on each, just as would happen
@@ -770,7 +963,7 @@ julia> (sqrt ∘ sum)(1:10)
 7.416198487095663
 ```
 
-The pipe operator can also be used with broadcasting, as `.|>`, to provide a useful combination of the chaining/piping and dot vectorization syntax (described next).
+The pipe operator can also be used with broadcasting, as `.|>`, to provide a useful combination of the chaining/piping and dot vectorization syntax (described below).
 
 ```jldoctest
 julia> ["a", "list", "of", "strings"] .|> [uppercase, reverse, titlecase, length]
@@ -781,6 +974,19 @@ julia> ["a", "list", "of", "strings"] .|> [uppercase, reverse, titlecase, length
  7
 ```
 
+When combining pipes with anonymous functions, parentheses must be used if subsequent pipes are not to parsed as part of the anonymous function's body. Compare:
+
+```jldoctest
+julia> 1:3 .|> (x -> x^2) |> sum |> sqrt
+3.7416573867739413
+
+julia> 1:3 .|> x -> x^2 |> sum |> sqrt
+3-element Vector{Float64}:
+ 1.0
+ 2.0
+ 3.0
+```
+
 ## [Dot Syntax for Vectorizing Functions](@id man-vectorized)
 
 In technical-computing languages, it is common to have "vectorized" versions of functions, which
@@ -838,6 +1044,9 @@ julia> f.(A, B)
  33.0
 ```
 
+Keyword arguments are not broadcasted over, but are simply passed through to each call of
+the function.  For example, `round.(x, digits=3)` is equivalent to `broadcast(x -> round(x, digits=3), x)`.
+
 Moreover, *nested* `f.(args...)` calls are *fused* into a single `broadcast` loop. For example,
 `sin.(cos.(X))` is equivalent to `broadcast(x -> sin(cos(x)), X)`, similar to `[sin(cos(x)) for x in X]`:
 there is only a single loop over `X`, and a single array is allocated for the result. [In contrast,
diff --git a/doc/src/manual/getting-started.md b/doc/src/manual/getting-started.md
index 7c493c2b594986..a3a92c6d7c93c3 100644
--- a/doc/src/manual/getting-started.md
+++ b/doc/src/manual/getting-started.md
@@ -31,112 +31,29 @@ To run code in a file non-interactively, you can give it as the first argument t
 command:
 
 ```
-$ julia script.jl arg1 arg2...
+$ julia script.jl
 ```
 
-As the example implies, the following command-line arguments to `julia` are interpreted as
-command-line arguments to the program `script.jl`, passed in the global constant `ARGS`. The
-name of the script itself is passed in as the global `PROGRAM_FILE`. Note that `ARGS` is
-also set when a Julia expression is given using the `-e` option on the command line (see the
-`julia` help output below) but `PROGRAM_FILE` will be empty. For example, to just print the
-arguments given to a script, you could do this:
+You can pass additional arguments to Julia, and to your program `script.jl`. A detailed list of all the available switches can be found at [Command-line Options](@ref
+command-line-options).
 
-```
-$ julia -e 'println(PROGRAM_FILE); for x in ARGS; println(x); end' foo bar
-
-foo
-bar
-```
-
-Or you could put that code into a script and run it:
-
-```
-$ echo 'println(PROGRAM_FILE); for x in ARGS; println(x); end' > script.jl
-$ julia script.jl foo bar
-script.jl
-foo
-bar
-```
-
-The `--` delimiter can be used to separate command-line arguments intended for the script file from arguments intended for Julia:
-
-```
-$ julia --color=yes -O -- foo.jl arg1 arg2..
-```
-
-See also [Scripting](@ref man-scripting) for more information on writing Julia scripts.
-
-Julia can be started in parallel mode with either the `-p` or the `--machine-file` options. `-p n`
-will launch an additional `n` worker processes, while `--machine-file file` will launch a worker
-for each line in file `file`. The machines defined in `file` must be accessible via a password-less
-`ssh` login, with Julia installed at the same location as the current host. Each machine definition
-takes the form `[count*][user@]host[:port] [bind_addr[:port]]`. `user` defaults to current user,
-`port` to the standard ssh port. `count` is the number of workers to spawn on the node, and defaults
-to 1. The optional `bind-to bind_addr[:port]` specifies the IP address and port that other workers
-should use to connect to this worker.
+## Resources
 
-If you have code that you want executed whenever Julia is run, you can put it in
-`~/.julia/config/startup.jl`:
+A curated list of useful learning resources to help new users get started can be found on the [learning](https://julialang.org/learning/) page of the main Julia website.
 
-```
-$ echo 'println("Greetings! 你好! 안녕하세요?")' > ~/.julia/config/startup.jl
-$ julia
-Greetings! 你好! 안녕하세요?
+You can use the REPL as a learning resource by switching into the help mode.
+Switch to help mode by pressing `?` at an empty `julia> ` prompt, before typing
+anything else. Typing a keyword in help mode will fetch the documentation for
+it, along with examples. Similarly for most functions or other objects you
+might encounter!
 
-...
 ```
+help?> begin
+search: begin disable_sigint reenable_sigint
 
-Note that although you should have a `~/.julia` directory once you've run Julia for the
-first time, you may need to create the `~/.julia/config` folder and the
-`~/.julia/config/startup.jl` file if you use it.
+  begin
 
-There are various ways to run Julia code and provide options, similar to those available for the
-`perl` and `ruby` programs:
-
-```
-julia [switches] -- [programfile] [args...]
+  begin...end denotes a block of code.
 ```
 
-|Switch                                 |Description|
-|:---                                   |:---|
-|`-v`, `--version`                      |Display version information|
-|`-h`, `--help`                         |Print command-line options (this message).|
-|`--project[={<dir>\|@.}]`              |Set <dir> as the home project/environment. The default @. option will search through parent directories until a Project.toml or JuliaProject.toml file is found.|
-|`-J`, `--sysimage <file>`              |Start up with the given system image file|
-|`-H`, `--home <dir>`                   |Set location of `julia` executable|
-|`--startup-file={yes\|no}`             |Load `~/.julia/config/startup.jl`|
-|`--handle-signals={yes\|no}`           |Enable or disable Julia's default signal handlers|
-|`--sysimage-native-code={yes\|no}`     |Use native code from system image if available|
-|`--compiled-modules={yes\|no}`         |Enable or disable incremental precompilation of modules|
-|`-e`, `--eval <expr>`                  |Evaluate `<expr>`|
-|`-E`, `--print <expr>`                 |Evaluate `<expr>` and display the result|
-|`-L`, `--load <file>`                  |Load `<file>` immediately on all processors|
-|`-t`, `--threads {N\|auto`}            |Enable N threads; `auto` currently sets N to the number of local CPU threads but this might change in the future|
-|`-p`, `--procs {N\|auto`}              |Integer value N launches N additional local worker processes; `auto` launches as many workers as the number of local CPU threads (logical cores)|
-|`--machine-file <file>`                |Run processes on hosts listed in `<file>`|
-|`-i`                                   |Interactive mode; REPL runs and `isinteractive()` is true|
-|`-q`, `--quiet`                        |Quiet startup: no banner, suppress REPL warnings|
-|`--banner={yes\|no\|auto}`             |Enable or disable startup banner|
-|`--color={yes\|no\|auto}`              |Enable or disable color text|
-|`--history-file={yes\|no}`             |Load or save history|
-|`--depwarn={yes\|no\|error}`           |Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)|
-|`--warn-overwrite={yes\|no}`           |Enable or disable method overwrite warnings|
-|`-C`, `--cpu-target <target>`          |Limit usage of CPU features up to `<target>`; set to `help` to see the available options|
-|`-O`, `--optimize={0,1,2,3}`           |Set the optimization level (default level is 2 if unspecified or 3 if used without a level)|
-|`-g`, `-g <level>`                     |Enable / Set the level of debug info generation (default level is 1 if unspecified or 2 if used without a level)|
-|`--inline={yes\|no}`                   |Control whether inlining is permitted, including overriding `@inline` declarations|
-|`--check-bounds={yes\|no}`             |Emit bounds checks always or never (ignoring declarations)|
-|`--math-mode={ieee,fast}`              |Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)|
-|`--code-coverage={none\|user\|all}`    |Count executions of source lines|
-|`--code-coverage`                      |equivalent to `--code-coverage=user`|
-|`--track-allocation={none\|user\|all}` |Count bytes allocated by each source line|
-|`--track-allocation`                   |equivalent to `--track-allocation=user`|
-
-!!! compat "Julia 1.1"
-    In Julia 1.0, the default `--project=@.` option did not search up from the root
-    directory of a Git repository for the `Project.toml` file. From Julia 1.1 forward, it
-    does.
-
-## Resources
-
-A curated list of useful learning resources to help new users get started can be found on the [learning](https://julialang.org/learning/) page of the main Julia web site.
+If you already know Julia a bit, you might want to peek ahead at [Performance Tips](@ref man-performance-tips) and [Workflow Tips](@ref man-workflow-tips).
diff --git a/doc/src/manual/handling-operating-system-variation.md b/doc/src/manual/handling-operating-system-variation.md
index 026d7df26cedde..26583b1379e456 100644
--- a/doc/src/manual/handling-operating-system-variation.md
+++ b/doc/src/manual/handling-operating-system-variation.md
@@ -27,15 +27,16 @@ Complex blocks:
 ```julia
 @static if Sys.islinux()
     linux_specific_thing(a)
+elseif Sys.isapple()
+    apple_specific_thing(a)
 else
     generic_thing(a)
 end
 ```
 
-When chaining conditionals (including `if`/`elseif`/`end`), the `@static` must be repeated for
-each level (parentheses optional, but recommended for readability):
+When nesting conditionals, the `@static` must be repeated for each level
+(parentheses optional, but recommended for readability):
 
 ```julia
 @static Sys.iswindows() ? :a : (@static Sys.isapple() ? :b : :c)
 ```
-
diff --git a/doc/src/manual/integers-and-floating-point-numbers.md b/doc/src/manual/integers-and-floating-point-numbers.md
index 8469c1b04dec07..2d073b83aec0a0 100644
--- a/doc/src/manual/integers-and-floating-point-numbers.md
+++ b/doc/src/manual/integers-and-floating-point-numbers.md
@@ -21,15 +21,15 @@ The following are Julia's primitive numeric types:
 | Type              | Signed? | Number of bits | Smallest value | Largest value |
 |:----------------- |:------- |:-------------- |:-------------- |:------------- |
 | [`Int8`](@ref)    | ✓       | 8              | -2^7           | 2^7 - 1       |
-| [`UInt8`](@ref)   |         | 8              | 0              | 2^8 - 1       |
+| [`UInt8`](@ref)   |         | 8              | 0              | 2^8 - 1       |
 | [`Int16`](@ref)   | ✓       | 16             | -2^15          | 2^15 - 1      |
-| [`UInt16`](@ref)  |         | 16             | 0              | 2^16 - 1      |
+| [`UInt16`](@ref)  |         | 16             | 0              | 2^16 - 1      |
 | [`Int32`](@ref)   | ✓       | 32             | -2^31          | 2^31 - 1      |
-| [`UInt32`](@ref)  |         | 32             | 0              | 2^32 - 1      |
+| [`UInt32`](@ref)  |         | 32             | 0              | 2^32 - 1      |
 | [`Int64`](@ref)   | ✓       | 64             | -2^63          | 2^63 - 1      |
-| [`UInt64`](@ref)  |         | 64             | 0              | 2^64 - 1      |
+| [`UInt64`](@ref)  |         | 64             | 0              | 2^64 - 1      |
 | [`Int128`](@ref)  | ✓       | 128            | -2^127         | 2^127 - 1     |
-| [`UInt128`](@ref) |         | 128            | 0              | 2^128 - 1     |
+| [`UInt128`](@ref) |         | 128            | 0              | 2^128 - 1     |
 | [`Bool`](@ref)    | N/A     | 8              | `false` (0)    | `true` (1)    |
 
   * **Floating-point types:**
@@ -173,8 +173,18 @@ UInt128
 As for hexadecimal literals, binary and octal literals produce unsigned integer types. The size
 of the binary data item is the minimal needed size, if the leading digit of the literal is not
 `0`. In the case of leading zeros, the size is determined by the minimal needed size for a
-literal, which has the same length but leading digit `1`. That allows the user to control
-the size.
+literal, which has the same length but leading digit `1`. It means that:
+
+- `0x1` and `0x12` are `UInt8` literals,
+- `0x123` and `0x1234` are `UInt16` literals,
+- `0x12345` and `0x12345678` are `UInt32` literals,
+- `0x123456789` and `0x1234567890adcdef` are `UInt64` literals, etc.
+
+Even if there are leading zero digits which don’t contribute to the value, they count for
+determining storage size of a literal. So `0x01` is a `UInt8` while `0x0001` is a `UInt16`.
+
+That allows the user to control the size.
+
 Values which cannot be stored in `UInt128` cannot be written as such literals.
 
 Binary, octal, and hexadecimal literals may be signed by a `-` immediately preceding the
@@ -257,7 +267,7 @@ second argument is zero.
 ## Floating-Point Numbers
 
 Literal floating-point numbers are represented in the standard formats, using
-[E-notation](https://en.wikipedia.org/wiki/Scientific_notation#E-notation) when necessary:
+[E-notation](https://en.wikipedia.org/wiki/Scientific_notation#E_notation) when necessary:
 
 ```jldoctest
 julia> 1.0
@@ -369,6 +379,7 @@ the real number line:
 | `-Inf16`  | `-Inf32`  | `-Inf`    | negative infinity | a value less than all finite floating-point values              |
 | `NaN16`   | `NaN32`   | `NaN`     | not a number      | a value not `==` to any floating-point value (including itself) |
 
+
 For further discussion of how these non-finite floating-point values are ordered with respect
 to each other and other floats, see [Numeric Comparisons](@ref). By the [IEEE 754 standard](https://en.wikipedia.org/wiki/IEEE_754-2008),
 these floating-point values are the results of certain arithmetic operations:
@@ -409,6 +420,18 @@ NaN
 
 julia> 0 * Inf
 NaN
+
+julia> NaN == NaN
+false
+
+julia> NaN != NaN
+true
+
+julia> NaN < NaN
+false
+
+julia> NaN > NaN
+false
 ```
 
 The [`typemin`](@ref) and [`typemax`](@ref) functions also apply to floating-point types:
@@ -521,7 +544,7 @@ most books on scientific computation, and also in the following references:
     abstraction of real numbers.
   * Also recommended is Bruce Dawson's [series of blog posts on floating-point numbers](https://randomascii.wordpress.com/2012/05/20/thats-not-normalthe-performance-of-odd-floats/).
   * For an excellent, in-depth discussion of floating-point numbers and issues of numerical accuracy
-    encountered when computing with them, see David Goldberg's paper [What Every Computer Scientist Should Know About Floating-Point Arithmetic](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.22.6768&rep=rep1&type=pdf).
+    encountered when computing with them, see David Goldberg's paper [What Every Computer Scientist Should Know About Floating-Point Arithmetic](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.22.6768&rep=rep1&type=pdf).
   * For even more extensive documentation of the history of, rationale for, and issues with floating-point
     numbers, as well as discussion of many other topics in numerical computing, see the [collected writings](https://people.eecs.berkeley.edu/~wkahan/)
     of [William Kahan](https://en.wikipedia.org/wiki/William_Kahan), commonly known as the "Father
@@ -534,9 +557,18 @@ the [GNU Multiple Precision Arithmetic Library (GMP)](https://gmplib.org) and th
 respectively. The [`BigInt`](@ref) and [`BigFloat`](@ref) types are available in Julia for arbitrary
 precision integer and floating point numbers respectively.
 
-Constructors exist to create these types from primitive numerical types, and the [string literal](@ref non-standard-string-literals) [`@big_str`](@ref) or [`parse`](@ref)
-can be used to construct them from `AbstractString`s.  Once created, they participate in arithmetic
-with all other numeric types thanks to Julia's [type promotion and conversion mechanism](@ref conversion-and-promotion):
+Constructors exist to create these types from primitive numerical types, and the
+[string literal](@ref non-standard-string-literals) [`@big_str`](@ref) or [`parse`](@ref)
+can be used to construct them from `AbstractString`s.
+`BigInt`s can also be input as integer literals when
+they are too big for other built-in integer types. Note that as there
+is no unsigned arbitrary-precision integer type in `Base` (`BigInt` is
+sufficient in most cases), hexadecimal, octal and binary literals can
+be used (in addition to decimal literals).
+
+Once created, they participate in arithmetic
+with all other numeric types thanks to Julia's
+[type promotion and conversion mechanism](@ref conversion-and-promotion):
 
 ```jldoctest
 julia> BigInt(typemax(Int64)) + 1
@@ -548,6 +580,18 @@ julia> big"123456789012345678901234567890" + 1
 julia> parse(BigInt, "123456789012345678901234567890") + 1
 123456789012345678901234567891
 
+julia> string(big"2"^200, base=16)
+"100000000000000000000000000000000000000000000000000"
+
+julia> 0x100000000000000000000000000000000-1 == typemax(UInt128)
+true
+
+julia> 0x000000000000000000000000000000000
+0
+
+julia> typeof(ans)
+BigInt
+
 julia> big"1.23456789012345678901"
 1.234567890123456789010000000000000000000000000000000000000000000000000000000004
 
diff --git a/doc/src/manual/interfaces.md b/doc/src/manual/interfaces.md
index 6349e1555cbc22..8c29bd70ca1415 100644
--- a/doc/src/manual/interfaces.md
+++ b/doc/src/manual/interfaces.md
@@ -7,28 +7,29 @@ to generically build upon those behaviors.
 
 ## [Iteration](@id man-interface-iteration)
 
-| Required methods               |                        | Brief description                                                                     |
+| Required methods               |                        | Brief description                                                                     |
 |:------------------------------ |:---------------------- |:------------------------------------------------------------------------------------- |
-| `iterate(iter)`                |                        | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty        |
-| `iterate(iter, state)`         |                        | Returns either a tuple of the next item and next state or `nothing` if no items remain  |
+| `iterate(iter)`                |                        | Returns either a tuple of the first item and initial state or [`nothing`](@ref) if empty        |
+| `iterate(iter, state)`         |                        | Returns either a tuple of the next item and next state or `nothing` if no items remain  |
 | **Important optional methods** | **Default definition** | **Brief description**                                                                 |
-| `IteratorSize(IterType)`       | `HasLength()`          | One of `HasLength()`, `HasShape{N}()`, `IsInfinite()`, or `SizeUnknown()` as appropriate |
-| `IteratorEltype(IterType)`     | `HasEltype()`          | Either `EltypeUnknown()` or `HasEltype()` as appropriate                              |
+| `Base.IteratorSize(IterType)`  | `Base.HasLength()`     | One of `Base.HasLength()`, `Base.HasShape{N}()`, `Base.IsInfinite()`, or `Base.SizeUnknown()` as appropriate |
+| `Base.IteratorEltype(IterType)`| `Base.HasEltype()`     | Either `Base.EltypeUnknown()` or `Base.HasEltype()` as appropriate                    |
 | `eltype(IterType)`             | `Any`                  | The type of the first entry of the tuple returned by `iterate()`                      |
 | `length(iter)`                 | (*undefined*)          | The number of items, if known                                                         |
 | `size(iter, [dim])`            | (*undefined*)          | The number of items in each dimension, if known                                       |
+| `Base.isdone(iter[, state])`   | `missing`              | Fast-path hint for iterator completion. Should be defined for stateful iterators, or else `isempty(iter)` may call `iterate(iter[, state])` and mutate the iterator. |
 
 | Value returned by `IteratorSize(IterType)` | Required Methods                           |
 |:------------------------------------------ |:------------------------------------------ |
-| `HasLength()`                              | [`length(iter)`](@ref)                     |
-| `HasShape{N}()`                            | `length(iter)`  and `size(iter, [dim])`    |
-| `IsInfinite()`                             | (*none*)                                   |
-| `SizeUnknown()`                            | (*none*)                                   |
+| `Base.HasLength()`                         | [`length(iter)`](@ref)                     |
+| `Base.HasShape{N}()`                       | `length(iter)`  and `size(iter, [dim])`    |
+| `Base.IsInfinite()`                        | (*none*)                                   |
+| `Base.SizeUnknown()`                       | (*none*)                                   |
 
 | Value returned by `IteratorEltype(IterType)` | Required Methods   |
 |:-------------------------------------------- |:------------------ |
-| `HasEltype()`                                | `eltype(IterType)` |
-| `EltypeUnknown()`                            | (*none*)           |
+| `Base.HasEltype()`                           | `eltype(IterType)` |
+| `Base.EltypeUnknown()`                       | (*none*)           |
 
 Sequential iteration is implemented by the [`iterate`](@ref) function. Instead
 of mutating objects as they are iterated over, Julia iterators may keep track
@@ -41,7 +42,7 @@ Any object that defines this function is iterable and can be used in the [many f
 It can also be used directly in a [`for`](@ref) loop since the syntax:
 
 ```julia
-for i in iter   # or  "for i = iter"
+for item in iter   # or  "for item = iter"
     # body
 end
 ```
@@ -51,7 +52,7 @@ is translated into:
 ```julia
 next = iterate(iter)
 while next !== nothing
-    (i, state) = next
+    (item, state) = next
     # body
     next = iterate(iter, state)
 end
@@ -71,8 +72,8 @@ With only [`iterate`](@ref) definition, the `Squares` type is already pretty pow
 We can iterate over all the elements:
 
 ```jldoctest squaretype
-julia> for i in Squares(7)
-           println(i)
+julia> for item in Squares(7)
+           println(item)
        end
 1
 4
@@ -193,6 +194,10 @@ julia> Squares(23)[end]
 529
 ```
 
+For multi-dimensional `begin`/`end` indexing as in `a[3, begin, 7]`, for example,
+you should define `firstindex(a, dim)` and `lastindex(a, dim)`
+(which default to calling `first` and `last` on `axes(a, dim)`, respectively).
+
 Note, though, that the above *only* defines [`getindex`](@ref) with one integer index. Indexing with
 anything other than an `Int` will throw a [`MethodError`](@ref) saying that there was no matching method.
 In order to support indexing with ranges or vectors of `Int`s, separate methods must be written:
@@ -216,13 +221,13 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 
 ## [Abstract Arrays](@id man-interface-array)
 
-| Methods to implement                            |                                        | Brief description                                                                     |
+| Methods to implement                            |                                        | Brief description                                                                     |
 |:----------------------------------------------- |:-------------------------------------- |:------------------------------------------------------------------------------------- |
-| `size(A)`                                       |                                        | Returns a tuple containing the dimensions of `A`                                      |
-| `getindex(A, i::Int)`                           |                                        | (if `IndexLinear`) Linear scalar indexing                                             |
-| `getindex(A, I::Vararg{Int, N})`                |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexing             |
-| `setindex!(A, v, i::Int)`                       |                                        | (if `IndexLinear`) Scalar indexed assignment                                          |
-| `setindex!(A, v, I::Vararg{Int, N})`            |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexed assignment   |
+| `size(A)`                                       |                                        | Returns a tuple containing the dimensions of `A`                                      |
+| `getindex(A, i::Int)`                           |                                        | (if `IndexLinear`) Linear scalar indexing                                             |
+| `getindex(A, I::Vararg{Int, N})`                |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexing             |
+| `setindex!(A, v, i::Int)`                       |                                        | (if `IndexLinear`) Scalar indexed assignment                                          |
+| `setindex!(A, v, I::Vararg{Int, N})`            |                                        | (if `IndexCartesian`, where `N = ndims(A)`) N-dimensional scalar indexed assignment   |
 | **Optional methods**                            | **Default definition**                 | **Brief description**                                                                 |
 | `IndexStyle(::Type)`                            | `IndexCartesian()`                     | Returns either `IndexLinear()` or `IndexCartesian()`. See the description below.      |
 | `getindex(A, I...)`                             | defined in terms of scalar `getindex`  | [Multidimensional and nonscalar indexing](@ref man-array-indexing)                    |
@@ -234,7 +239,7 @@ ourselves, we can officially define it as a subtype of an [`AbstractArray`](@ref
 | `similar(A, dims::Dims)`                        | `similar(A, eltype(A), dims)`          | Return a mutable array with the same element type and size *dims*                     |
 | `similar(A, ::Type{S}, dims::Dims)`             | `Array{S}(undef, dims)`                | Return a mutable array with the specified element type and size                       |
 | **Non-traditional indices**                     | **Default definition**                 | **Brief description**                                                                 |
-| `axes(A)`                                    | `map(OneTo, size(A))`                  | Return the `AbstractUnitRange` of valid indices                                       |
+| `axes(A)`                                    | `map(OneTo, size(A))`                  | Return a tuple of `AbstractUnitRange{<:Integer}` of valid indices                    |
 | `similar(A, ::Type{S}, inds)`              | `similar(A, S, Base.to_shape(inds))`   | Return a mutable array with the specified indices `inds` (see below)                  |
 | `similar(T::Union{Type,Function}, inds)`   | `T(Base.to_shape(inds))`               | Return an array similar to `T` with the specified indices `inds` (see below)          |
 
@@ -253,7 +258,7 @@ provides a traits-based mechanism to enable efficient generic code for all array
 
 This distinction determines which scalar indexing methods the type must define. `IndexLinear()`
 arrays are simple: just define `getindex(A::ArrayType, i::Int)`.  When the array is subsequently
-indexed with a multidimensional set of indices, the fallback `getindex(A::AbstractArray, I...)()`
+indexed with a multidimensional set of indices, the fallback `getindex(A::AbstractArray, I...)`
 efficiently converts the indices into one linear index and then calls the above method. `IndexCartesian()`
 arrays, on the other hand, require methods to be defined for each supported dimensionality with
 `ndims(A)` `Int` indices. For example, [`SparseMatrixCSC`](@ref) from the `SparseArrays` standard
@@ -367,7 +372,7 @@ julia> A[1:2,:]
  2.0  5.0  8.0
 ```
 
-In this example it is accomplished by defining `Base.similar{T}(A::SparseArray, ::Type{T}, dims::Dims)`
+In this example it is accomplished by defining `Base.similar(A::SparseArray, ::Type{T}, dims::Dims) where T`
 to create the appropriate wrapped array. (Note that while `similar` supports 1- and 2-argument
 forms, in most case you only need to specialize the 3-argument form.) For this to work it's important
 that `SparseArray` is mutable (supports `setindex!`). Defining `similar`, `getindex` and
@@ -403,12 +408,13 @@ perhaps range-types `Ind` of your own design. For more information, see
 
 ## [Strided Arrays](@id man-interface-strided-arrays)
 
-| Methods to implement                            |                                        | Brief description                                                                     |
+| Methods to implement                            |                                        | Brief description                                                                     |
 |:----------------------------------------------- |:-------------------------------------- |:------------------------------------------------------------------------------------- |
-| `strides(A)`                             |                                        | Return the distance in memory (in number of elements) between adjacent elements in each dimension as a tuple. If `A` is an `AbstractArray{T,0}`, this should return an empty tuple.    |
-| `Base.unsafe_convert(::Type{Ptr{T}}, A)`        |                                        | Return the native address of an array.                                     |
-| **Optional methods**                            | **Default definition**                 | **Brief description**                                                                 |
-| `stride(A, i::Int)`                             |     `strides(A)[i]`                                   | Return the distance in memory (in number of elements) between adjacent elements in dimension k.    |
+| `strides(A)`                                    |                                        | Return the distance in memory (in number of elements) between adjacent elements in each dimension as a tuple. If `A` is an `AbstractArray{T,0}`, this should return an empty tuple.    |
+| `Base.unsafe_convert(::Type{Ptr{T}}, A)`        |                                        | Return the native address of an array.                                                             |
+| `Base.elsize(::Type{<:A})`                      |                                        | Return the stride between consecutive elements in the array.                                       |
+| **Optional methods**                            | **Default definition**                 | **Brief description**                                                                              |
+| `stride(A, i::Int)`                             |     `strides(A)[i]`                    | Return the distance in memory (in number of elements) between adjacent elements in dimension k.    |
 
 A strided array is a subtype of `AbstractArray` whose entries are stored in memory with fixed strides.
 Provided the element type of the array is compatible with BLAS, a strided array can utilize BLAS and LAPACK routines
@@ -474,7 +480,7 @@ they are iterable collections of their characters (see [Strings](@ref) for more)
 The next two steps (selecting the output array and implementation) are dependent upon
 determining a single answer for a given set of arguments. Broadcast must take all the varied
 types of its arguments and collapse them down to just one output array and one
-implementation. Broadcast calls this single answer a "style." Every broadcastable object
+implementation. Broadcast calls this single answer a "style". Every broadcastable object
 each has its own preferred style, and a promotion-like system is used to combine these
 styles into a single answer — the "destination style".
 
@@ -544,7 +550,7 @@ Base.showarg(io::IO, A::ArrayAndChar, toplevel) = print(io, typeof(A), " with ch
 
 ```
 
-You might want broadcasting to preserve the `char` "metadata." First we define
+You might want broadcasting to preserve the `char` "metadata". First we define
 
 ```jldoctest ArrayAndChar; output = false
 Base.BroadcastStyle(::Type{<:ArrayAndChar}) = Broadcast.ArrayStyle{ArrayAndChar}()
@@ -701,7 +707,7 @@ array types that have fixed dimensionality requirements.
 BroadcastStyle(a::AbstractArrayStyle{Any}, ::DefaultArrayStyle) = a
 BroadcastStyle(a::AbstractArrayStyle{N}, ::DefaultArrayStyle{N}) where N = a
 BroadcastStyle(a::AbstractArrayStyle{M}, ::DefaultArrayStyle{N}) where {M,N} =
-    typeof(a)(_max(Val(M),Val(N)))
+    typeof(a)(Val(max(M, N)))
 ```
 
 You do not need to write binary `BroadcastStyle`
diff --git a/doc/src/manual/mathematical-operations.md b/doc/src/manual/mathematical-operations.md
index ce34e7a1fa9be8..21722a5e80684e 100644
--- a/doc/src/manual/mathematical-operations.md
+++ b/doc/src/manual/mathematical-operations.md
@@ -9,18 +9,18 @@ collection of standard mathematical functions.
 The following [arithmetic operators](https://en.wikipedia.org/wiki/Arithmetic#Arithmetic_operations)
 are supported on all primitive numeric types:
 
-| Expression | Name           | Description                            |
-|:---------- |:-------------- |:-------------------------------------- |
-| `+x`       | unary plus     | the identity operation                 |
-| `-x`       | unary minus    | maps values to their additive inverses |
-| `x + y`    | binary plus    | performs addition                      |
-| `x - y`    | binary minus   | performs subtraction                   |
-| `x * y`    | times          | performs multiplication                |
-| `x / y`    | divide         | performs division                      |
-| `x ÷ y`    | integer divide | x / y, truncated to an integer         |
-| `x \ y`    | inverse divide | equivalent to `y / x`                  |
-| `x ^ y`    | power          | raises `x` to the `y`th power          |
-| `x % y`    | remainder      | equivalent to `rem(x,y)`               |
+| Expression | Name           | Description                             |
+|:---------- |:-------------- |:----------------------------------------|
+| `+x`       | unary plus     | the identity operation                  |
+| `-x`       | unary minus    | maps values to their additive inverses  |
+| `x + y`    | binary plus    | performs addition                       |
+| `x - y`    | binary minus   | performs subtraction                    |
+| `x * y`    | times          | performs multiplication                 |
+| `x / y`    | divide         | performs division                       |
+| `x ÷ y`    | integer divide | x / y, truncated to an integer          |
+| `x \ y`    | inverse divide | equivalent to `y / x`                   |
+| `x ^ y`    | power          | raises `x` to the `y`th power           |
+| `x % y`    | remainder      | equivalent to `rem(x,y)`                |
 
 A numeric literal placed directly before an identifier or parentheses, e.g. `2x` or `2(x+y)`, is treated as a multiplication, except with higher precedence than other binary operations.  See [Numeric Literal Coefficients](@ref man-numeric-literal-coefficients) for details.
 
@@ -28,6 +28,8 @@ Julia's promotion system makes arithmetic operations on mixtures of argument typ
 naturally and automatically. See [Conversion and Promotion](@ref conversion-and-promotion) for details of the promotion
 system.
 
+The ÷ sign can be conveniently typed by writing `\div<tab>` to the REPL or Julia IDE. See the [manual section on Unicode input](@ref Unicode-Input) for more information.
+
 Here are some simple examples using arithmetic operators:
 
 ```jldoctest
@@ -67,7 +69,7 @@ The following [Boolean operators](https://en.wikipedia.org/wiki/Boolean_algebra#
 | `x && y`   | [short-circuiting and](@ref man-conditional-evaluation) |
 | `x \|\| y` | [short-circuiting or](@ref man-conditional-evaluation)  |
 
-Negation changes `true` to `false` and vice versa. The short-circuiting opeations are explained on the linked page.
+Negation changes `true` to `false` and vice versa. The short-circuiting operations are explained on the linked page.
 
 Note that `Bool` is an integer type and all the usual promotion rules and numeric operators are also defined on it.
 
@@ -82,6 +84,8 @@ are supported on all primitive integer types:
 | `x & y`    | bitwise and                                                              |
 | `x \| y`   | bitwise or                                                               |
 | `x ⊻ y`    | bitwise xor (exclusive or)                                               |
+| `x ⊼ y`    | bitwise nand (not and)                                                   |
+| `x ⊽ y`    | bitwise nor (not or)                                                     |
 | `x >>> y`  | [logical shift](https://en.wikipedia.org/wiki/Logical_shift) right       |
 | `x >> y`   | [arithmetic shift](https://en.wikipedia.org/wiki/Arithmetic_shift) right |
 | `x << y`   | logical/arithmetic shift left                                            |
@@ -104,6 +108,18 @@ julia> 123 ⊻ 234
 julia> xor(123, 234)
 145
 
+julia> nand(123, 123)
+-124
+
+julia> 123 ⊼ 123
+-124
+
+julia> nor(123, 124)
+-128
+
+julia> 123 ⊽ 124
+-128
+
 julia> ~UInt32(123)
 0xffffff84
 
@@ -179,7 +195,7 @@ all vectorized "dot calls," these "dot operators" are
 *fusing*. For example, if you compute `2 .* A.^2 .+ sin.(A)` (or
 equivalently `@. 2A^2 + sin(A)`, using the [`@.`](@ref @__dot__) macro) for
 an array `A`, it performs a *single* loop over `A`, computing `2a^2 + sin(a)`
-for each element of `A`. In particular, nested dot calls like `f.(g.(x))`
+for each element `a` of `A`. In particular, nested dot calls like `f.(g.(x))`
 are fused, and "adjacent" binary operators like `x .+ 3 .* x.^2` are
 equivalent to nested dot calls `(+).(x, (*).(3, (^).(x, 2)))`.
 
@@ -401,8 +417,6 @@ For a complete list of *every* Julia operator's precedence, see the top of this
 [`src/julia-parser.scm`](https://github.com/JuliaLang/julia/blob/master/src/julia-parser.scm). Note that some of the operators there are not defined
 in the `Base` module but may be given definitions by standard libraries, packages or user code.
 
-[Numeric literal coefficients](@ref man-numeric-literal-coefficients), e.g. `2x`, are treated as multiplications with higher precedence than any other binary operation, and also have higher precedence than `^`.
-
 You can also find the numerical precedence for any given operator via the built-in function `Base.operator_precedence`, where higher numbers take precedence:
 
 ```jldoctest
@@ -426,6 +440,18 @@ julia> Base.operator_associativity(:⊗), Base.operator_associativity(:sin), Bas
 Note that symbols such as `:sin` return precedence `0`. This value represents invalid operators and not
 operators of lowest precedence. Similarly, such operators are assigned associativity `:none`.
 
+[Numeric literal coefficients](@ref man-numeric-literal-coefficients), e.g. `2x`, are treated as multiplications with higher precedence than any other binary operation, with the exception of `^` where they have higher precedence only as the exponent.
+
+```jldoctest
+julia> x = 3; 2x^2
+18
+
+julia> x = 3; 2^2x
+64
+```
+
+Juxtaposition parses like a unary operator, which has the same natural asymmetry around exponents: `-x^y` and `2x^y` parse as `-(x^y)` and `2(x^y)` whereas `x^-y` and `x^2y` parse as `x^(-y)` and `x^(2y)`.
+
 ## Numerical Conversions
 
 Julia supports three forms of numerical conversion, which differ in their handling of inexact
@@ -503,7 +529,7 @@ See [Conversion and Promotion](@ref conversion-and-promotion) for how to define
 | [`div(x,y)`](@ref), `x÷y` | truncated division; quotient rounded towards zero                                                         |
 | [`fld(x,y)`](@ref)        | floored division; quotient rounded towards `-Inf`                                                         |
 | [`cld(x,y)`](@ref)        | ceiling division; quotient rounded towards `+Inf`                                                         |
-| [`rem(x,y)`](@ref)        | remainder; satisfies `x == div(x,y)*y + rem(x,y)`; sign matches `x`                                       |
+| [`rem(x,y)`](@ref), `x%y` | remainder; satisfies `x == div(x,y)*y + rem(x,y)`; sign matches `x`                                       |
 | [`mod(x,y)`](@ref)        | modulus; satisfies `x == fld(x,y)*y + mod(x,y)`; sign matches `y`                                         |
 | [`mod1(x,y)`](@ref)       | `mod` with offset 1; returns `r∈(0,y]` for `y>0` or `r∈[y,0)` for `y<0`, where `mod(r, y) == mod(x, y)`   |
 | [`mod2pi(x)`](@ref)       | modulus with respect to 2pi;  `0 <= mod2pi(x) < 2pi`                                                      |
diff --git a/doc/src/manual/metaprogramming.md b/doc/src/manual/metaprogramming.md
index eac6605ff3771a..a374b9c879e6af 100644
--- a/doc/src/manual/metaprogramming.md
+++ b/doc/src/manual/metaprogramming.md
@@ -149,7 +149,7 @@ julia> :(::)
 The second syntactic purpose of the `:` character is to create expression objects without using
 the explicit [`Expr`](@ref) constructor. This is referred to as *quoting*. The `:` character, followed
 by paired parentheses around a single statement of Julia code, produces an `Expr` object based
-on the enclosed code. Here is example of the short form used to quote an arithmetic expression:
+on the enclosed code. Here is an example of the short form used to quote an arithmetic expression:
 
 ```jldoctest
 julia> ex = :(a+b*c+1)
@@ -466,7 +466,7 @@ julia> eval(ex)
 
 ## [Macros](@id man-macros)
 
-Macros provide a method to include generated code in the final body of a program. A macro maps
+Macros provide a mechanism to include generated code in the final body of a program. A macro maps
 a tuple of arguments to a returned *expression*, and the resulting expression is compiled directly
 rather than requiring a runtime [`eval`](@ref) call. Macro arguments may include expressions,
 literal values, and symbols.
@@ -981,13 +981,13 @@ block:
 end
 ```
 
-## Non-Standard String Literals
+## [Non-Standard String Literals](@id meta-non-standard-string-literals)
 
 Recall from [Strings](@ref non-standard-string-literals) that string literals prefixed by an identifier are called non-standard
 string literals, and can have different semantics than un-prefixed string literals. For example:
 
-  * `r"^\s*(?:#|$)"` produces a regular expression object rather than a string
-  * `b"DATA\xff\u2200"` is a byte array literal for `[68,65,84,65,255,226,136,128]`.
+  * `r"^\s*(?:#|$)"` produces a [regular expression object](@ref man-regex-literals) rather than a string
+  * `b"DATA\xff\u2200"` is a [byte array literal](@ref man-byte-array-literals) for `[68,65,84,65,255,226,136,128]`.
 
 Perhaps surprisingly, these behaviors are not hard-coded into the Julia parser or compiler. Instead,
 they are custom behaviors provided by a general mechanism that anyone can use: prefixed string
@@ -1051,20 +1051,9 @@ constructed on each iteration. In the vast majority of use cases, however, regul
 are not constructed based on run-time data. In this majority of cases, the ability to write regular
 expressions as compile-time values is invaluable.
 
-Like non-standard string literals, non-standard command literals exist using a prefixed variant
-of the command literal syntax. The command literal ```custom`literal` ``` is parsed as `@custom_cmd "literal"`.
-Julia itself does not contain any non-standard command literals, but packages can make use of
-this syntax. Aside from the different syntax and the `_cmd` suffix instead of the `_str` suffix,
-non-standard command literals behave exactly like non-standard string literals.
-
-In the event that two modules provide non-standard string or command literals with the same name,
-it is possible to qualify the string or command literal with a module name. For instance, if both
-`Foo` and `Bar` provide non-standard string literal `@x_str`, then one can write `Foo.x"literal"`
-or `Bar.x"literal"` to disambiguate between the two.
-
 The mechanism for user-defined string literals is deeply, profoundly powerful. Not only are Julia's
-non-standard literals implemented using it, but also the command literal syntax (``` `echo "Hello, $person"` ```)
-is implemented with the following innocuous-looking macro:
+non-standard literals implemented using it, but the command literal syntax (``` `echo "Hello, $person"` ```)
+is also implemented using the following innocuous-looking macro:
 
 ```julia
 macro cmd(str)
@@ -1077,6 +1066,20 @@ but they are just functions, written entirely in Julia. You can read their sourc
 what they do -- and all they do is construct expression objects to be inserted into your program's
 syntax tree.
 
+Like string literals, command literals can also be prefixed by an identifier
+to form what are called non-standard command literals. These command literals are parsed
+as calls to specially-named macros. For example, the syntax ```custom`literal` ``` is parsed
+as `@custom_cmd "literal"`.
+Julia itself does not contain any non-standard command literals, but packages can make use of
+this syntax. Aside from the different syntax and the `_cmd` suffix instead of the `_str` suffix,
+non-standard command literals behave exactly like non-standard string literals.
+
+In the event that two modules provide non-standard string or command literals with the same name,
+it is possible to qualify the string or command literal with a module name. For instance, if both
+`Foo` and `Bar` provide non-standard string literal `@x_str`, then one can write `Foo.x"literal"`
+or `Bar.x"literal"` to disambiguate between the two.
+
+
 Another way to define a macro would be like this:
 
 ```julia
diff --git a/doc/src/manual/methods.md b/doc/src/manual/methods.md
index eb03c6190828f3..5839256212923b 100644
--- a/doc/src/manual/methods.md
+++ b/doc/src/manual/methods.md
@@ -40,6 +40,11 @@ for structuring and organizing programs.
     an explicit method argument. When the current `this` object is the receiver of a method call,
     it can be omitted altogether, writing just `meth(arg1,arg2)`, with `this` implied as the receiving
     object.
+!!! note
+    All the examples in this chapter assume that you are defining methods for a function in the *same*
+    module. If you want to add methods to a function in *another* module, you have to `import` it or
+    use the name qualified with module names. See the section on [namespace management](@ref
+    namespace-management).
 
 ## Defining Methods
 
@@ -252,8 +257,8 @@ julia> g(2, 3.0)
 
 julia> g(2.0, 3.0)
 ERROR: MethodError: g(::Float64, ::Float64) is ambiguous. Candidates:
-  g(x, y::Float64) in Main at none:1
   g(x::Float64, y) in Main at none:1
+  g(x, y::Float64) in Main at none:1
 Possible fix, define
   g(::Float64, ::Float64)
 ```
@@ -541,38 +546,19 @@ Here are a few common design patterns that come up sometimes when using dispatch
 ### Extracting the type parameter from a super-type
 
 
-Here is the correct code template for returning the element-type `T`
-of any arbitrary subtype of `AbstractArray`:
+Here is a correct code template for returning the element-type `T`
+of any arbitrary subtype of `AbstractArray` that has well-defined
+element type:
 
 ```julia
 abstract type AbstractArray{T, N} end
 eltype(::Type{<:AbstractArray{T}}) where {T} = T
 ```
-using so-called triangular dispatch.  Note that if `T` is a `UnionAll`
-type, as e.g. `eltype(Array{T} where T <: Integer)`, then `Any` is
-returned (as does the version of `eltype` in `Base`).
-
-Another way, which used to be the only correct way before the advent of
-triangular dispatch in Julia v0.6, is:
 
-```julia
-abstract type AbstractArray{T, N} end
-eltype(::Type{AbstractArray}) = Any
-eltype(::Type{AbstractArray{T}}) where {T} = T
-eltype(::Type{AbstractArray{T, N}}) where {T, N} = T
-eltype(::Type{A}) where {A<:AbstractArray} = eltype(supertype(A))
-```
-
-Another possibility is the following, which could be useful to adapt
-to cases where the parameter `T` would need to be matched more
-narrowly:
-```julia
-eltype(::Type{AbstractArray{T, N} where {T<:S, N<:M}}) where {M, S} = Any
-eltype(::Type{AbstractArray{T, N} where {T<:S}}) where {N, S} = Any
-eltype(::Type{AbstractArray{T, N} where {N<:M}}) where {M, T} = T
-eltype(::Type{AbstractArray{T, N}}) where {T, N} = T
-eltype(::Type{A}) where {A <: AbstractArray} = eltype(supertype(A))
-```
+using so-called triangular dispatch.  Note that `UnionAll` types, for
+example `eltype(AbstractArray{T} where T <: Integer)`, do not match the
+above method. The implementation of `eltype` in `Base` adds a fallback
+method to `Any` for such cases.
 
 
 One common mistake is to try and get the element-type by using introspection:
@@ -591,6 +577,25 @@ Here we have created a type `BitVector` which has no parameters,
 but where the element-type is still fully specified, with `T` equal to `Bool`!
 
 
+Another mistake is to try to walk up the type hierarchy using
+`supertype`:
+```julia
+eltype_wrong(::Type{AbstractArray{T}}) where {T} = T
+eltype_wrong(::Type{AbstractArray{T, N}}) where {T, N} = T
+eltype_wrong(::Type{A}) where {A<:AbstractArray} = eltype_wrong(supertype(A))
+```
+
+While this works for declared types, it fails for types without
+supertypes:
+
+```julia-repl
+julia> eltype_wrong(Union{AbstractArray{Int}, AbstractArray{Float64}})
+ERROR: MethodError: no method matching supertype(::Type{Union{AbstractArray{Float64,N} where N, AbstractArray{Int64,N} where N}})
+Closest candidates are:
+  supertype(::DataType) at operators.jl:43
+  supertype(::UnionAll) at operators.jl:48
+```
+
 ### Building a similar type with a different type parameter
 
 When building generic code, there is often a need for constructing a similar
@@ -600,7 +605,6 @@ For instance, you might have some sort of abstract array with an arbitrary eleme
 and want to write your computation on it with a specific element type.
 We must implement a method for each `AbstractArray{T}` subtype that describes how to compute this type transform.
 There is no general transform of one subtype into another subtype with a different parameter.
-(Quick review: do you see why this is?)
 
 The subtypes of `AbstractArray` typically implement two methods to
 achieve this:
@@ -675,7 +679,7 @@ other functions such as `map` can dispatch on this information to pick
 the best algorithm (see [Abstract Array Interface](@ref man-interface-array)).
 This means that each subtype does not need to implement a custom version of `map`,
 since the generic definitions + trait classes will enable the system to select the fastest version.
-Here a toy implementation of `map` illustrating the trait-based dispatch:
+Here is a toy implementation of `map` illustrating the trait-based dispatch:
 
 ```julia
 map(f, a::AbstractArray, b::AbstractArray) = map(Base.IndexStyle(a, b), f, a, b)
diff --git a/doc/src/manual/missing.md b/doc/src/manual/missing.md
index 57ebb37952a624..4c6d36c7381b2c 100644
--- a/doc/src/manual/missing.md
+++ b/doc/src/manual/missing.md
@@ -1,7 +1,7 @@
 # [Missing Values](@id missing)
 
-Julia provides support for representing missing values in the statistical sense,
-that is for situations where no value is available for a variable in an observation,
+Julia provides support for representing missing values in the statistical sense.
+This is for situations where no value is available for a variable in an observation,
 but a valid value theoretically exists.
 Missing values are represented via the [`missing`](@ref) object, which is the
 singleton instance of the type [`Missing`](@ref). `missing` is equivalent to
@@ -15,7 +15,7 @@ and behaves like them in most situations.
 operators and functions.
 For these functions, uncertainty about the value of one of the operands
 induces uncertainty about the result. In practice, this means a math operation
-involving a `missing` value generally returns `missing`
+involving a `missing` value generally returns `missing`:
 ```jldoctest
 julia> missing + 1
 missing
@@ -27,14 +27,16 @@ julia> abs(missing)
 missing
 ```
 
-As `missing` is a normal Julia object, this propagation rule only works
+Since `missing` is a normal Julia object, this propagation rule only works
 for functions which have opted in to implement this behavior. This can be
-achieved either via a specific method defined for arguments of type `Missing`,
-or simply by accepting arguments of this type, and passing them to functions
-which propagate them (like standard math operators). Packages should consider
+achieved by:
+ - adding a specific method defined for arguments of type `Missing`,
+ - accepting arguments of this type, and passing them to functions
+   which propagate them (like standard math operators).
+Packages should consider
 whether it makes sense to propagate missing values when defining new functions,
-and define methods appropriately if that is the case. Passing a `missing` value
-to a function for which no method accepting arguments of type `Missing` is defined
+and define methods appropriately if this is the case. Passing a `missing` value
+to a function which does not have a method accepting arguments of type `Missing`
 throws a [`MethodError`](@ref), just like for any other type.
 
 Functions that do not propagate `missing` values can be made to do so by wrapping
@@ -46,7 +48,8 @@ For example, `f(x)` becomes `passmissing(f)(x)`.
 
 Standard equality and comparison operators follow the propagation rule presented
 above: if any of the operands is `missing`, the result is `missing`.
-Here are a few examples
+Here are a few examples:
+
 ```jldoctest
 julia> missing == 1
 missing
@@ -66,9 +69,10 @@ be used to test whether a value is missing. To test whether `x` is `missing`,
 use [`ismissing(x)`](@ref).
 
 Special comparison operators [`isequal`](@ref) and [`===`](@ref) are exceptions
-to the propagation rule: they always return a `Bool` value, even in the presence
+to the propagation rule. They will always return a `Bool` value, even in the presence
 of `missing` values, considering `missing` as equal to `missing` and as different
-from any other value. They can therefore be used to test whether a value is `missing`
+from any other value. They can therefore be used to test whether a value is `missing`:
+
 ```jldoctest
 julia> missing === 1
 false
@@ -85,7 +89,8 @@ true
 
 The [`isless`](@ref) operator is another exception: `missing` is considered
 as greater than any other value. This operator is used by [`sort`](@ref),
-which therefore places `missing` values after all other values.
+which therefore places `missing` values after all other values:
+
 ```jldoctest
 julia> isless(1, missing)
 true
@@ -100,18 +105,19 @@ false
 ## Logical operators
 
 Logical (or boolean) operators [`|`](@ref), [`&`](@ref) and [`xor`](@ref) are
-another special case, as they only propagate `missing` values when it is logically
-required. For these operators, whether or not the result is uncertain depends
-on the particular operation, following the well-established rules of
+another special case since they only propagate `missing` values when it is logically
+required. For these operators, whether or not the result is uncertain, depends
+on the particular operation. This follows the well-established rules of
 [*three-valued logic*](https://en.wikipedia.org/wiki/Three-valued_logic) which are
-also implemented by `NULL` in SQL and `NA` in R. This abstract definition actually
+implemented by e.g. `NULL` in SQL and `NA` in R. This abstract definition
 corresponds to a relatively natural behavior which is best explained
 via concrete examples.
 
 Let us illustrate this principle with the logical "or" operator [`|`](@ref).
 Following the rules of boolean logic, if one of the operands is `true`,
 the value of the other operand does not have an influence on the result,
-which will always be `true`
+which will always be `true`:
+
 ```jldoctest
 julia> true | true
 true
@@ -123,12 +129,13 @@ julia> false | true
 true
 ```
 
-Based on this observation, we can conclude that if one of the operands is `true`
+Based on this observation, we can conclude if one of the operands is `true`
 and the other `missing`, we know that the result is `true` in spite of the
 uncertainty about the actual value of one of the operands. If we had
 been able to observe the actual value of the second operand, it could only be
 `true` or `false`, and in both cases the result would be `true`. Therefore,
-in this particular case, missingness does *not* propagate
+in this particular case, missingness does *not* propagate:
+
 ```jldoctest
 julia> true | missing
 true
@@ -139,7 +146,8 @@ true
 
 On the contrary, if one of the operands is `false`, the result could be either
 `true` or `false` depending on the value of the other operand. Therefore,
-if that operand is `missing`, the result has to be `missing` too
+if that operand is `missing`, the result has to be `missing` too:
+
 ```jldoctest
 julia> false | true
 true
@@ -160,7 +168,8 @@ missing
 The behavior of the logical "and" operator [`&`](@ref) is similar to that of the
 `|` operator, with the difference that missingness does not propagate when
 one of the operands is `false`. For example, when that is the case of the first
-operand
+operand:
+
 ```jldoctest
 julia> false & false
 false
@@ -173,7 +182,8 @@ false
 ```
 
 On the other hand, missingness propagates when one of the operands is `true`,
-for example the first one
+for example the first one:
+
 ```jldoctest
 julia> true & true
 true
@@ -188,16 +198,17 @@ missing
 Finally, the "exclusive or" logical operator [`xor`](@ref) always propagates
 `missing` values, since both operands always have an effect on the result.
 Also note that the negation operator [`!`](@ref) returns `missing` when the
-operand is `missing` just like other unary operators.
+operand is `missing`, just like other unary operators.
 
 ## Control Flow and Short-Circuiting Operators
 
 Control flow operators including [`if`](@ref), [`while`](@ref) and the
 [ternary operator](@ref man-conditional-evaluation) `x ? y : z`
 do not allow for missing values. This is because of the uncertainty about whether
-the actual value would be `true` or `false` if we could observe it,
-which implies that we do not know how the program should behave. A [`TypeError`](@ref)
-is thrown as soon as a `missing` value is encountered in this context
+the actual value would be `true` or `false` if we could observe it.
+This implies we do not know how the program should behave. In this case, a
+[`TypeError`](@ref) is thrown as soon as a `missing` value is encountered in this context:
+
 ```jldoctest
 julia> if missing
            println("here")
@@ -208,7 +219,8 @@ ERROR: TypeError: non-boolean (Missing) used in boolean context
 For the same reason, contrary to logical operators presented above,
 the short-circuiting boolean operators [`&&`](@ref) and [`||`](@ref) do not
 allow for `missing` values in situations where the value of the operand
-determines whether the next operand is evaluated or not. For example
+determines whether the next operand is evaluated or not. For example:
+
 ```jldoctest
 julia> missing || false
 ERROR: TypeError: non-boolean (Missing) used in boolean context
@@ -220,10 +232,11 @@ julia> true && missing && false
 ERROR: TypeError: non-boolean (Missing) used in boolean context
 ```
 
-On the other hand, no error is thrown when the result can be determined without
+In contrast, there is no error thrown when the result can be determined without
 the `missing` values. This is the case when the code short-circuits
 before evaluating the `missing` operand, and when the `missing` operand is the
-last one
+last one:
+
 ```jldoctest
 julia> true && missing
 missing
@@ -234,7 +247,8 @@ false
 
 ## Arrays With Missing Values
 
-Arrays containing missing values can be created like other arrays
+Arrays containing missing values can be created like other arrays:
+
 ```jldoctest
 julia> [1, missing]
 2-element Vector{Union{Missing, Int64}}:
@@ -243,7 +257,7 @@ julia> [1, missing]
 ```
 
 As this example shows, the element type of such arrays is `Union{Missing, T}`,
-with `T` the type of the non-missing values. This simply reflects the fact that
+with `T` the type of the non-missing values. This reflects the fact that
 array entries can be either of type `T` (here, `Int64`) or of type `Missing`.
 This kind of array uses an efficient memory storage equivalent to an `Array{T}`
 holding the actual values combined with an `Array{UInt8}` indicating the type
@@ -252,6 +266,7 @@ of the entry (i.e. whether it is `Missing` or `T`).
 Arrays allowing for missing values can be constructed with the standard syntax.
 Use `Array{Union{Missing, T}}(missing, dims)` to create arrays filled with
 missing values:
+
 ```jldoctest
 julia> Array{Union{Missing, String}}(missing, 2, 3)
 2×3 Matrix{Union{Missing, String}}:
@@ -259,10 +274,17 @@ julia> Array{Union{Missing, String}}(missing, 2, 3)
  missing  missing  missing
 ```
 
-An array allowing for `missing` values but which does not contain any such value
-can be converted back to an array which does not allow for missing values using
+!!! note
+    Using `undef` or `similar` may currently give an array filled with
+    `missing`, but this is not the correct way to obtain such an array.
+    Use a `missing` constructor as shown above instead.
+
+An array with element type allowing `missing` entries (e.g. `Vector{Union{Missing, T}}`)
+which does not contain any `missing` entries can be converted to an array type that does
+not allow for `missing` entries (e.g. `Vector{T}`) using
 [`convert`](@ref). If the array contains `missing` values, a `MethodError` is thrown
-during conversion
+during conversion:
+
 ```jldoctest
 julia> x = Union{Missing, String}["a", "b"]
 2-element Vector{Union{Missing, String}}:
@@ -282,23 +304,27 @@ julia> y = Union{Missing, String}[missing, "b"]
 julia> convert(Array{String}, y)
 ERROR: MethodError: Cannot `convert` an object of type Missing to an object of type String
 ```
+
 ## Skipping Missing Values
 
 Since `missing` values propagate with standard mathematical operators, reduction
-functions return `missing` when called on arrays which contain missing values
+functions return `missing` when called on arrays which contain missing values:
+
 ```jldoctest
 julia> sum([1, missing])
 missing
 ```
 
-In this situation, use the [`skipmissing`](@ref) function to skip missing values
+In this situation, use the [`skipmissing`](@ref) function to skip missing values:
+
 ```jldoctest
 julia> sum(skipmissing([1, missing]))
 1
 ```
 
 This convenience function returns an iterator which filters out `missing` values
-efficiently. It can therefore be used with any function which supports iterators
+efficiently. It can therefore be used with any function which supports iterators:
+
 ```jldoctest skipmissing; setup = :(using Statistics)
 julia> x = skipmissing([3, missing, 2, 1])
 skipmissing(Union{Missing, Int64}[3, missing, 2, 1])
@@ -315,8 +341,9 @@ julia> mapreduce(sqrt, +, x)
 
 Objects created by calling `skipmissing` on an array can be indexed using indices
 from the parent array. Indices corresponding to missing values are not valid for
-these objects and an error is thrown when trying to use them (they are also skipped
-by `keys` and `eachindex`)
+these objects, and an error is thrown when trying to use them (they are also skipped
+by `keys` and `eachindex`):
+
 ```jldoctest skipmissing
 julia> x[1]
 3
@@ -327,9 +354,10 @@ ERROR: MissingException: the value at index (2,) is missing
 ```
 
 This allows functions which operate on indices to work in combination with `skipmissing`.
-This is notably the case for search and find functions, which return indices
-valid for the object returned by `skipmissing` which are also the indices of the
-matching entries *in the parent array*
+This is notably the case for search and find functions. These functions return indices
+valid for the object returned by `skipmissing`, and are also the indices of the
+matching entries *in the parent array*:
+
 ```jldoctest skipmissing
 julia> findall(==(1), x)
 1-element Vector{Int64}:
@@ -342,7 +370,8 @@ julia> argmax(x)
 1
 ```
 
-Use [`collect`](@ref) to extract non-`missing` values and store them in an array
+Use [`collect`](@ref) to extract non-`missing` values and store them in an array:
+
 ```jldoctest skipmissing
 julia> collect(x)
 3-element Vector{Int64}:
@@ -357,9 +386,10 @@ The three-valued logic described above for logical operators is also used
 by logical functions applied to arrays. Thus, array equality tests using
 the [`==`](@ref) operator return `missing` whenever the result cannot be
 determined without knowing the actual value of the `missing` entry. In practice,
-this means that `missing` is returned if all non-missing values of the compared
+this means `missing` is returned if all non-missing values of the compared
 arrays are equal, but one or both arrays contain missing values (possibly at
-different positions)
+different positions):
+
 ```jldoctest
 julia> [1, missing] == [2, missing]
 false
@@ -372,7 +402,8 @@ missing
 ```
 
 As for single values, use [`isequal`](@ref) to treat `missing` values as equal
-to other `missing` values but different from non-missing values
+to other `missing` values, but different from non-missing values:
+
 ```jldoctest
 julia> isequal([1, missing], [1, missing])
 true
@@ -382,7 +413,8 @@ false
 ```
 
 Functions [`any`](@ref) and [`all`](@ref) also follow the rules of
-three-valued logic, returning `missing` when the result cannot be determined
+three-valued logic. Thus, returning `missing` when the result cannot be determined:
+
 ```jldoctest
 julia> all([true, missing])
 missing
diff --git a/doc/src/manual/modules.md b/doc/src/manual/modules.md
index 803000d151c5b6..c6009594bea2da 100644
--- a/doc/src/manual/modules.md
+++ b/doc/src/manual/modules.md
@@ -1,120 +1,332 @@
 # [Modules](@id modules)
 
-Modules in Julia are separate variable workspaces, i.e. they introduce a new global scope. They
-are delimited syntactically, inside `module Name ... end`. Modules allow you to create top-level
-definitions (aka global variables) without worrying about name conflicts when your code is used
-together with somebody else's. Within a module, you can control which names from other modules
-are visible (via importing), and specify which of your names are intended to be public (via exporting).
+Modules in Julia help organize code into coherent units. They are delimited syntactically inside
+`module NameOfModule ... end`, and have the following features:
 
-The following example demonstrates the major features of modules. It is not meant to be run, but
-is shown for illustrative purposes:
+1. Modules are separate namespaces, each introducing a new global scope. This is useful, because it
+   allows the same name to be used for different functions or global variables without conflict, as long as they are in separate modules.
 
-```julia
-module MyModule
-using Lib
+2. Modules have facilities for detailed namespace management: each defines a set of names it
+   `export`s, and can import names from other modules with `using` and `import` (we explain these below).
+
+3. Modules can be precompiled for faster loading, and contain code for runtime initialization.
 
-using BigLib: thing1, thing2
+Typically, in larger Julia packages you will see module code organized into files, eg
 
-import Base.show
+```julia
+module SomeModule
 
-export MyType, foo
+# export, using, import statements are usually here; we discuss these below
+
+include("file1.jl")
+include("file2.jl")
 
-struct MyType
-    x
 end
+```
+
+Files and file names are mostly unrelated to modules; modules are associated only with module
+expressions. One can have multiple files per module, and multiple modules per file. `include`
+behaves as if the contents of the source file were evaluated in the global scope of the
+including module. In this chapter, we use short and simplified examples, so we won't use `include`.
 
-bar(x) = 2x
-foo(a::MyType) = bar(a.x) + 1
+The recommended style is not to indent the body of the module, since that would typically lead to
+whole files being indented. Also, it is common to use `UpperCamelCase` for module names (just like
+types), and use the plural form if applicable, especially if the module contains a similarly named
+identifier, to avoid name clashes. For example,
+
+```julia
+module FastThings
+
+struct FastThing
+    ...
+end
 
-show(io::IO, a::MyType) = print(io, "MyType $(a.x)")
 end
 ```
 
-Note that the style is not to indent the body of the module, since that would typically lead to
-whole files being indented.
+## [Namespace management](@id namespace-management)
+
+Namespace management refers to the facilities the language offers for making names in a module
+available in other modules. We discuss the related concepts and functionality below in detail.
+
+### Qualified names
+
+Names for functions, variables and types in the global scope like `sin`, `ARGS`, and
+`UnitRange` always belong to a module, called the *parent module*, which can be found
+interactively with [`parentmodule`](@ref), for example
+
+```jldoctest
+julia> parentmodule(UnitRange)
+Base
+```
+
+One can also refer to these names outside their parent module by prefixing them with their module,
+eg `Base.UnitRange`. This is called a *qualified name*. The parent module may be accessible using a
+chain of submodules like `Base.Math.sin`, where `Base.Math` is called the *module path*.
+Due to syntactic ambiguities, qualifying a name that contains only symbols, such as an operator,
+requires inserting a colon, e.g. `Base.:+`. A small number of operators additionally require
+parentheses, e.g. `Base.:(==)`.
+
+If a name is qualified, then it is always *accessible*, and in case of a function, it can also have
+methods added to it by using the qualified name as the function name.
+
+Within a module, a variable name can be “reserved” without assigning to it by declaring it as
+`global x`. This prevents name conflicts for globals initialized after load time. The syntax
+`M.x = y` does not work to assign a global in another module; global assignment is always
+module-local.
+
+### Export lists
+
+Names (referring to functions, types, global variables, and constants) can be added to the
+*export list* of a module with `export`: these are the symbols that are imported when `using` the module. Typically, they are at or near the top of the module definition
+so that readers of the source code can find them easily, as in
+
+```jldoctest module_manual
+julia> module NiceStuff
+       export nice, DOG
+       struct Dog end      # singleton type, not exported
+       const DOG = Dog()   # named instance, exported
+       nice(x) = "nice $x" # function, exported
+       end;
+
+```
 
-This module defines a type `MyType`, and two functions. Function `foo` and type `MyType` are exported,
-and so will be available for importing into other modules.  Function `bar` is private to `MyModule`.
+but this is just a style suggestion — a module can have multiple `export` statements in arbitrary
+locations.
 
-The statement `using Lib` means that a module called `Lib` will be available for resolving names
-as needed. When a global variable is encountered that has no definition in the current module,
-the system will search for it among variables exported by `Lib` and import it if it is found there.
-This means that all uses of that global within the current module will resolve to the definition
-of that variable in `Lib`.
+It is common to export names which form part of the API (application programming interface). In
+the above code, the export list suggests that users should use `nice` and `DOG`. However, since
+qualified names always make identifiers accessible, this is just an option for organizing APIs:
+unlike other languages, Julia has no facilities for truly hiding module internals.
 
-The statement `using BigLib: thing1, thing2` brings just the identifiers `thing1` and `thing2`
-into scope from module `BigLib`. If these names refer to functions, adding methods to them
-will not be allowed (you may only "use" them, not extend them).
+Also, some modules don't export names at all. This is usually done if they use common
+words, such as `derivative`, in their API, which could easily clash with the export lists of other
+modules. We will see how to manage name clashes below.
 
-The [`import`](@ref) keyword supports the same syntax as [`using`](@ref).
-It does not add modules to be searched the way `using` does. `import` also differs
-from `using` in that functions imported using `import` can be extended with new methods.
+### Standalone `using` and `import`
+
+Possibly the most common way of loading a module is `using ModuleName`. This [loads](@ref
+code-loading) the code associated with `ModuleName`, and brings
+
+1. the module name
+
+2. and the elements of the export list into the surrounding global namespace.
+
+Technically, the statement `using ModuleName` means that a module called `ModuleName` will be
+available for resolving names as needed. When a global variable is encountered that has no
+definition in the current module, the system will search for it among variables exported by `ModuleName`
+and use it if it is found there. This means that all uses of that global within the current
+module will resolve to the definition of that variable in `ModuleName`.
+
+To load a module from a package, the statement `using ModuleName` can be used.
+To load a module from a locally defined module, a dot needs to be added before the module name like `using .ModuleName`.
+
+To continue with our example,
+
+```jldoctest module_manual
+julia> using .NiceStuff
+```
 
-In `MyModule` above we wanted to add a method to the standard [`show`](@ref) function, so we had to write
-`import Base.show`. Functions whose names are only visible via `using` cannot be extended.
+would load the above code, making `NiceStuff` (the module name), `DOG` and `nice` available. `Dog` is not on the export list, but it can be accessed if the name is qualified with the module path (which here is just the module name) as `NiceStuff.Dog`.
+
+Importantly, **`using ModuleName` is the only form for which export lists matter at all**.
+
+In contrast,
+
+```jldoctest module_manual
+julia> import .NiceStuff
+```
+
+brings *only* the module name into scope. Users would need to use `NiceStuff.DOG`, `NiceStuff.Dog`, and `NiceStuff.nice` to access its contents. Usually, `import ModuleName` is used in contexts when the user wants to keep the namespace clean.
+As we will see in the next section `import .NiceStuff` is equivalent to `using .NiceStuff: NiceStuff`.
+
+You can combine multiple `using` and `import` statements of the same kind in a comma-separated expression, e.g.
+
+```jldoctest module_manual
+julia> using LinearAlgebra, Statistics
+```
+
+### `using` and `import` with specific identifiers, and adding methods
+
+When `using ModuleName:` or `import ModuleName:` is followed by a comma-separated list of names, the module is loaded, but *only those specific names are brought into the namespace* by the statement. For example,
+
+```jldoctest module_manual
+julia> using .NiceStuff: nice, DOG
+```
+
+will import the names `nice` and `DOG`.
+
+Importantly, the module name `NiceStuff` will *not* be in the namespace. If you want to make it accessible, you have to list it explicitly, as
+```jldoctest module_manual
+julia> using .NiceStuff: nice, DOG, NiceStuff
+```
+
+Julia has two forms for seemingly the same thing because only `import ModuleName: f` allows adding methods to `f`
+*without a module path*.
+That is to say, the following example will give an error:
+
+```jldoctest module_manual
+julia> using .NiceStuff: nice
+
+julia> struct Cat end
+
+julia> nice(::Cat) = "nice 😸"
+ERROR: error in method definition: function NiceStuff.nice must be explicitly imported to be extended
+Stacktrace:
+ [1] top-level scope
+   @ none:0
+ [2] top-level scope
+   @ none:1
+
+```
+
+This error prevents accidentally adding methods to functions in other modules that you only intended to use.
+
+There are two ways to deal with this. You can always qualify function names with a module path:
+```jldoctest module_manual
+julia> using .NiceStuff
+
+julia> struct Cat end
+
+julia> NiceStuff.nice(::Cat) = "nice 😸"
+
+```
+
+Alternatively, you can `import` the specific function name:
+```jldoctest module_manual
+julia> import .NiceStuff: nice
+
+julia> struct Cat end
+
+julia> nice(::Cat) = "nice 😸"
+nice (generic function with 2 methods)
+```
+
+Which one you choose is a matter of style. The first form makes it clear that you are adding a
+method to a function in another module (remember, that the imports and the method definition may be
+in separate files), while the second one is shorter, which is especially convenient if you are
+defining multiple methods.
 
 Once a variable is made visible via `using` or `import`, a module may not create its own variable
 with the same name. Imported variables are read-only; assigning to a global variable always affects
 a variable owned by the current module, or else raises an error.
 
-## Summary of module usage
+### Renaming with `as`
+
+An identifier brought into scope by `import` or `using` can be renamed with the keyword `as`.
+This is useful for working around name conflicts as well as for shortening names.
+For example, `Base` exports the function name `read`, but the CSV.jl package also provides `CSV.read`.
+If we are going to invoke CSV reading many times, it would be convenient to drop the `CSV.` qualifier.
+But then it is ambiguous whether we are referring to `Base.read` or `CSV.read`:
 
-To load a module, two main keywords can be used: `using` and `import`. To understand their differences,
-consider the following example:
+```julia-repl
+julia> read;
+
+julia> import CSV: read
+WARNING: ignoring conflicting import of CSV.read into Main
+```
+
+Renaming provides a solution:
+
+```julia-repl
+julia> import CSV: read as rd
+```
+
+Imported packages themselves can also be renamed:
 
 ```julia
-module MyModule
+import BenchmarkTools as BT
+```
 
-export x, y
+`as` works with `using` only when a single identifier is brought into scope.
+For example `using CSV: read as rd` works, but `using CSV as C` does not, since it operates
+on all of the exported names in `CSV`.
 
-x() = "x"
-y() = "y"
-p() = "p"
+### Mixing multiple `using` and `import` statements
+
+When multiple `using` or `import` statements of any of the forms above are used, their effect is combined in the order they appear.
+For example,
+
+```jldoctest module_manual
+julia> using .NiceStuff         # exported names and the module name
+
+julia> import .NiceStuff: nice  # allows adding methods to unqualified functions
 
-end
 ```
 
-In this module we export the `x` and `y` functions (with the keyword [`export`](@ref)), and also have
-the non-exported function `p`. There are several different ways to load the Module and its inner
-functions into the current workspace:
+would bring all the exported names of `NiceStuff` and the module name itself into scope, and also
+allow adding methods to `nice` without prefixing it with a module name.
 
-| Import Command                  | What is brought into scope                                                      | Available for method extension              |
-|:------------------------------- |:------------------------------------------------------------------------------- |:------------------------------------------- |
-| `using MyModule`                | All `export`ed names (`x` and `y`), `MyModule.x`, `MyModule.y` and `MyModule.p` | `MyModule.x`, `MyModule.y` and `MyModule.p` |
-| `using MyModule: x, p`          | `x` and `p`                                                                     |                                             |
-| `import MyModule`               | `MyModule.x`, `MyModule.y` and `MyModule.p`                                     | `MyModule.x`, `MyModule.y` and `MyModule.p` |
-| `import MyModule.x, MyModule.p` | `x` and `p`                                                                     | `x` and `p`                                 |
-| `import MyModule: x, p`         | `x` and `p`                                                                     | `x` and `p`                                 |
+### Handling name conflicts
 
-### Modules and files
+Consider the situation where two (or more) packages export the same name, as in
 
-Files and file names are mostly unrelated to modules; modules are associated only with module
-expressions. One can have multiple files per module, and multiple modules per file:
+```jldoctest module_manual
+julia> module A
+       export f
+       f() = 1
+       end
+A
+julia> module B
+       export f
+       f() = 2
+       end
+B
+```
 
-```julia
-module Foo
+The statement `using .A, .B` works, but when you try to call `f`, you get a warning
 
-include("file1.jl")
-include("file2.jl")
+```jldoctest module_manual
+julia> using .A, .B
 
-end
+julia> f
+WARNING: both B and A export "f"; uses of it in module Main must be qualified
+ERROR: UndefVarError: f not defined
 ```
 
-Including the same code in different modules provides mixin-like behavior. One could use this
-to run the same code with different base definitions, for example testing code by running it with
-"safe" versions of some operators:
+Here, Julia cannot decide which `f` you are referring to, so you have to make a choice. The following solutions are commonly used:
 
-```julia
-module Normal
-include("mycode.jl")
-end
+1. Simply proceed with qualified names like `A.f` and `B.f`. This makes the context clear to the reader of your code, especially if `f` just happens to coincide but has different meaning in various packages. For example, `degree` has various uses in mathematics, the natural sciences, and in everyday life, and these meanings should be kept separate.
+
+2. Use the `as` keyword above to rename one or both identifiers, eg
+
+   ```jldoctest module_manual
+   julia> using .A: f as f
+
+   julia> using .B: f as g
+
+   ```
+
+   would make `B.f` available as `g`. Here, we are assuming that you did not use `using A` before,
+   which would have brought `f` into the namespace.
+
+3. When the names in question *do* share a meaning, it is common for one module to import it from another, or have a lightweight “base” package with the sole function of defining an interface like this, which can be used by other packages. It is conventional to have such package names end in `...Base` (which has nothing to do with Julia's `Base` module).
+
+### Default top-level definitions and bare modules
+
+Modules automatically contain `using Core`, `using Base`, and definitions of the [`eval`](@ref)
+and [`include`](@ref) functions, which evaluate expressions/files within the global scope of that
+module.
+
+If these default definitions are not wanted, modules can be defined using the keyword
+[`baremodule`](@ref) instead (note: `Core` is still imported). In terms of
+`baremodule`, a standard `module` looks like this:
+
+```
+baremodule Mod
+
+using Base
+
+eval(x) = Core.eval(Mod, x)
+include(p) = Base.include(Mod, p)
+
+...
 
-module Testing
-include("safe_operators.jl")
-include("mycode.jl")
 end
 ```
 
+If even `Core` is not wanted, a module that imports nothing and defines no names at all can be defined with `Module(:YourNameHere, false, false)` and code can be evaluated into it with [`@eval`](@ref) or [`Core.eval`](@ref).
+
 ### Standard modules
 
 There are three important standard modules:
@@ -122,7 +334,6 @@ There are three important standard modules:
 * [`Base`](@ref) contains basic functionality that is useful in almost all cases.
 * [`Main`](@ref) is the top-level module and the current module, when Julia is started.
 
-
 !!! note "Standard library modules"
     By default Julia ships with some standard library modules. These behave like regular
     Julia packages except that you don't need to install them explicitly. For example,
@@ -132,79 +343,77 @@ There are three important standard modules:
     using Test
     ```
 
-### Default top-level definitions and bare modules
+## Submodules and relative paths
 
-In addition to `using Base`, modules also automatically contain
-definitions of the [`eval`](@ref) and [`include`](@ref) functions,
-which evaluate expressions/files within the global scope of that module.
+Modules can contain *submodules*, nesting the same syntax `module ... end`. They can be used to introduce separate namespaces, which can be helpful for organizing complex codebases. Note that each `module` introduces its own [scope](@ref scope-of-variables), so submodules do not automatically “inherit” names from their parent.
 
-If these default definitions are not wanted, modules can be defined using the keyword [`baremodule`](@ref)
-instead (note: `Core` is still imported, as per above). In terms of `baremodule`, a standard
-`module` looks like this:
+It is recommended that submodules refer to other modules within the enclosing parent module (including the latter) using *relative module qualifiers* in `using` and `import` statements. A relative module qualifier starts with a period (`.`), which corresponds to the current module, and each successive `.` leads to the parent of the current module. This should be followed by modules if necessary, and eventually the actual name to access, all separated by `.`s.
 
-```
-baremodule Mod
+Consider the following example, where the submodule `SubA` defines a function, which is then extended in its “sibling” module:
 
-using Base
+```jldoctest module_manual
+julia> module ParentModule
+       module SubA
+       export add_D  # exported interface
+       const D = 3
+       add_D(x) = x + D
+       end
+       using .SubA  # brings `add_D` into the namespace
+       export add_D # export it from ParentModule too
+       module SubB
+       import ..SubA: add_D # relative path for a “sibling” module
+       struct Infinity end
+       add_D(x::Infinity) = x
+       end
+       end;
 
-eval(x) = Core.eval(Mod, x)
-include(p) = Base.include(Mod, p)
+```
 
-...
+You may see code in packages, which, in a similar situation, uses
+```jldoctest module_manual
+julia> import .ParentModule.SubA: add_D
 
-end
 ```
+However, this operates through [code loading](@ref code-loading), and thus only works if `ParentModule` is in a package. It is better to use relative paths.
 
-### Relative and absolute module paths
+Note that the order of definitions also matters if you are evaluating values. Consider
 
-Given the statement `using Foo`, the system consults an internal table of top-level modules
-to look for one named `Foo`. If the module does not exist, the system attempts to `require(:Foo)`,
-which typically results in loading code from an installed package.
+```julia
+module TestPackage
 
-However, some modules contain submodules, which means you sometimes need to access a non-top-level
-module. There are two ways to do this. The first is to use an absolute path, for example
-`using Base.Sort`. The second is to use a relative path, which makes it easier to import submodules
-of the current module or any of its enclosing modules:
+export x, y
 
-```
-module Parent
+x = 0
 
-module Utils
-...
+module Sub
+using ..TestPackage
+z = y # ERROR: UndefVarError: y not defined
 end
 
-using .Utils
+y = 1
 
-...
 end
 ```
 
-Here module `Parent` contains a submodule `Utils`, and code in `Parent` wants the contents of
-`Utils` to be visible. This is done by starting the `using` path with a period. Adding more leading
-periods moves up additional levels in the module hierarchy. For example `using ..Utils` would
-look for `Utils` in `Parent`'s enclosing module rather than in `Parent` itself.
+where `Sub` is trying to use `TestPackage.y` before it was defined, so it does not have a value.
 
-Note that relative-import qualifiers are only valid in `using` and `import` statements.
+For similar reasons, you cannot use a cyclic ordering:
 
-### Namespace miscellanea
-
-If a name is qualified (e.g. `Base.sin`), then it can be accessed even if it is not exported.
-This is often useful when debugging. It can also have methods added to it by using the qualified
-name as the function name. However, due to syntactic ambiguities that arise, if you wish to add
-methods to a function in a different module whose name contains only symbols, such as an operator,
-`Base.+` for example, you must use `Base.:+` to refer to it. If the operator is more than one
-character in length you must surround it in brackets, such as: `Base.:(==)`.
+```julia
+module A
 
-Macro names are written with `@` in import and export statements, e.g. `import Mod.@mac`. Macros
-in other modules can be invoked as `Mod.@mac` or `@Mod.mac`.
+module B
+using ..C # ERROR: UndefVarError: C not defined
+end
 
-The syntax `M.x = y` does not work to assign a global in another module; global assignment is
-always module-local.
+module C
+using ..B
+end
 
-A variable name can be "reserved" without assigning to it by declaring it as `global x`.
-This prevents name conflicts for globals initialized after load time.
+end
+```
 
-### Module initialization and precompilation
+## Module initialization and precompilation
 
 Large modules can take several seconds to load because executing all of the statements in a module
 often involves compiling a large amount of code.
@@ -212,7 +421,7 @@ Julia creates precompiled caches of the module to reduce this time.
 
 The incremental precompiled module file are created and used automatically when using `import`
 or `using` to load a module.  This will cause it to be automatically compiled the first time
-it is imported. Alternatively, you can manually call [`Base.compilecache(modulename)`](@ref). The resulting
+it is imported. Alternatively, you can manually call [`Base.compilecache(Base.identify_package("modulename"))`](@ref). The resulting
 cache files will be stored in `DEPOT_PATH[1]/compiled/`. Subsequently, the module is automatically
 recompiled upon `using` or `import` whenever any of its dependencies change; dependencies are modules it
 imports, the Julia build, files it includes, or explicit dependencies declared by [`include_dependency(path)`](@ref)
@@ -227,7 +436,7 @@ into account the set of dependencies already loaded into the current process and
 modules, even if their files change or disappear, in order to avoid creating incompatibilities between
 the running system and the precompile cache.
 
-If you know that a module is *not* safe to precompile your module
+If you know that a module is *not* safe to precompile
 (for example, for one of the reasons described below), you should
 put `__precompile__(false)` in the module file (typically placed at the top).
 This will cause `Base.compilecache` to throw an error, and will cause `using` / `import` to load it
diff --git a/doc/src/manual/multi-threading.md b/doc/src/manual/multi-threading.md
index 952e7acea35abd..b20d0e54f1087d 100644
--- a/doc/src/manual/multi-threading.md
+++ b/doc/src/manual/multi-threading.md
@@ -8,7 +8,7 @@ of Julia multi-threading features.
 By default, Julia starts up with a single thread of execution. This can be verified by using the
 command [`Threads.nthreads()`](@ref):
 
-```julia-repl
+```jldoctest
 julia> Threads.nthreads()
 1
 ```
@@ -18,10 +18,17 @@ The number of execution threads is controlled either by using the
 [`JULIA_NUM_THREADS`](@ref JULIA_NUM_THREADS) environment variable. When both are
 specified, then `-t`/`--threads` takes precedence.
 
+The number of threads can either be specified as an integer (`--threads=4`) or as `auto`
+(`--threads=auto`), where `auto` tries to infer a useful default number of threads to use
+(see [Command-line Options](@ref command-line-options) for more details).
+
 !!! compat "Julia 1.5"
     The `-t`/`--threads` command line argument requires at least Julia 1.5.
     In older versions you must use the environment variable instead.
 
+!!! compat "Julia 1.7"
+    Using `auto` as value of the environment variable `JULIA_NUM_THREADS` requires at least Julia 1.7.
+    In older versions, this value is ignored.
 Lets start Julia with 4 threads:
 
 ```bash
@@ -37,7 +44,7 @@ julia> Threads.nthreads()
 
 But we are currently on the master thread. To check, we use the function [`Threads.threadid`](@ref)
 
-```julia-repl
+```jldoctest
 julia> Threads.threadid()
 1
 ```
@@ -65,7 +72,61 @@ julia> Threads.threadid()
     three processes have 2 threads enabled. For more fine grained control over worker
     threads use [`addprocs`](@ref) and pass `-t`/`--threads` as `exeflags`.
 
-## Data-race freedom
+## [Threadpools](@id man-threadpools)
+
+When a program's threads are busy with many tasks to run, tasks may experience
+delays which may negatively affect the responsiveness and interactivity of the
+program. To address this, you can specify that a task is interactive when you
+[`Threads.@spawn`](@ref) it:
+
+```julia
+using Base.Threads
+@spawn :interactive f()
+```
+
+Interactive tasks should avoid performing high latency operations, and if they
+are long duration tasks, should yield frequently.
+
+Julia may be started with one or more threads reserved to run interactive tasks:
+
+```bash
+$ julia --threads 3,1
+```
+
+The environment variable `JULIA_NUM_THREADS` can also be used similarly:
+```bash
+export JULIA_NUM_THREADS=3,1
+```
+
+This starts Julia with 3 threads in the `:default` threadpool and 1 thread in
+the `:interactive` threadpool:
+
+```julia-repl
+julia> using Base.Threads
+
+julia> nthreads()
+4
+
+julia> nthreadpools()
+2
+
+julia> threadpool()
+:default
+
+julia> nthreads(:interactive)
+1
+```
+
+Either or both numbers can be replaced with the word `auto`, which causes
+Julia to choose a reasonable default.
+
+## Communication and synchronization
+
+Although Julia's threads can communicate through shared memory, it is notoriously
+difficult to write correct and data-race free multi-threaded code. Julia's
+[`Channel`](@ref)s are thread-safe and may be used to communicate safely.
+
+### Data-race freedom
 
 You are entirely responsible for ensuring that your program is data-race free,
 and nothing promised here can be assumed if you do not observe that
@@ -147,7 +208,7 @@ to its assigned locations:
 
 ```julia-repl
 julia> a
-10-element Array{Float64,1}:
+10-element Vector{Float64}:
  1.0
  1.0
  1.0
@@ -182,14 +243,17 @@ julia> Threads.@threads for id in 1:4
        end
 
 julia> old_is
-4-element Array{Float64,1}:
+4-element Vector{Float64}:
  0.0
  1.0
  7.0
  3.0
 
+julia> i[]
+ 10
+
 julia> ids
-4-element Array{Float64,1}:
+4-element Vector{Float64}:
  1.0
  2.0
  3.0
@@ -227,11 +291,28 @@ julia> acc[]
 1000
 ```
 
-!!! note
-    Not *all* primitive types can be wrapped in an `Atomic` tag. Supported types
-    are `Int8`, `Int16`, `Int32`, `Int64`, `Int128`, `UInt8`, `UInt16`, `UInt32`,
-    `UInt64`, `UInt128`, `Float16`, `Float32`, and `Float64`. Additionally,
-    `Int128` and `UInt128` are not supported on AAarch32 and ppc64le.
+
+## [Per-field atomics](@id man-atomics)
+
+We can also use atomics on a more granular level using the [`@atomic`](@ref
+Base.@atomic), [`@atomicswap`](@ref Base.@atomicswap), and
+[`@atomicreplace`](@ref Base.@atomicreplace) macros.
+
+Specific details of the memory model and other details of the design are written
+in the [Julia Atomics
+Manifesto](https://gist.github.com/vtjnash/11b0031f2e2a66c9c24d33e810b34ec0),
+which will later be published formally.
+
+Any field in a struct declaration can be decorated with `@atomic`, and then any
+write must be marked with `@atomic` also, and must use one of the defined atomic
+orderings (`:monotonic`, `:acquire`, `:release`, `:acquire_release`, or
+`:sequentially_consistent`). Any read of an atomic field can also be annotated
+with an atomic ordering constraint, or will be done with monotonic (relaxed)
+ordering if unspecified.
+
+!!! compat "Julia 1.7"
+    Per-field atomics requires at least Julia 1.7.
+
 
 ## Side effects and mutable function arguments
 
@@ -241,6 +322,7 @@ For instance functions that have a
 [name ending with `!`](@ref bang-convention)
 by convention modify their arguments and thus are not pure.
 
+
 ## @threadcall
 
 External libraries, such as those called via [`ccall`](@ref), pose a problem for
@@ -279,9 +361,6 @@ threads in Julia:
     multiple threads where at least one thread modifies the collection
     (common examples include `push!` on arrays, or inserting
     items into a `Dict`).
-  * After a task starts running on a certain thread (e.g. via `@spawn`), it
-    will always be restarted on the same thread after blocking. In the future
-    this limitation will be removed, and tasks will migrate between threads.
   * `@threads` currently uses a static schedule, using all threads and assigning
     equal iteration counts to each. In the future the default schedule is likely
     to change to be dynamic.
@@ -339,7 +418,7 @@ There are a few approaches to dealing with this problem:
 
 3. A related third strategy is to use a yield-free queue. We don't currently
    have a lock-free queue implemented in Base, but
-   `Base.InvasiveLinkedListSynchronized{T}` is suitable. This can frequently be a
+   `Base.IntrusiveLinkedListSynchronized{T}` is suitable. This can frequently be a
    good strategy to use for code with event loops. For example, this strategy is
    employed by `Gtk.jl` to manage lifetime ref-counting. In this approach, we
    don't do any explicit work inside the `finalizer`, and instead add it to a queue
diff --git a/doc/src/manual/networking-and-streams.md b/doc/src/manual/networking-and-streams.md
index 163716c5838040..fc62632433850c 100644
--- a/doc/src/manual/networking-and-streams.md
+++ b/doc/src/manual/networking-and-streams.md
@@ -193,13 +193,13 @@ Let's first create a simple server:
 ```julia-repl
 julia> using Sockets
 
-julia> @async begin
+julia> errormonitor(@async begin
            server = listen(2000)
            while true
                sock = accept(server)
                println("Hello World\n")
            end
-       end
+       end)
 Task (runnable) @0x00007fd31dc11ae0
 ```
 
@@ -265,7 +265,7 @@ printed the message and waited for the next client. Reading and writing works in
 To see this, consider the following simple echo server:
 
 ```julia-repl
-julia> @async begin
+julia> errormonitor(@async begin
            server = listen(2001)
            while true
                sock = accept(server)
@@ -273,15 +273,15 @@ julia> @async begin
                    write(sock, readline(sock, keep=true))
                end
            end
-       end
+       end)
 Task (runnable) @0x00007fd31dc12e60
 
 julia> clientside = connect(2001)
 TCPSocket(RawFD(28) open, 0 bytes waiting)
 
-julia> @async while isopen(clientside)
+julia> errormonitor(@async while isopen(clientside)
            write(stdout, readline(clientside, keep=true))
-       end
+       end)
 Task (runnable) @0x00007fd31dc11870
 
 julia> println(clientside,"Hello World from the Echo Server")
@@ -351,3 +351,68 @@ Finished connection to google.com
 Finished connection to julialang.org
 Finished connection to github.com
 ```
+
+## Multicast
+
+Julia supports [multicast](https://datatracker.ietf.org/doc/html/rfc1112) over IPv4 and IPv6 using the User Datagram Protocol ([UDP](https://datatracker.ietf.org/doc/html/rfc768)) as transport.
+
+Unlike the Transmission Control Protocol ([TCP](https://datatracker.ietf.org/doc/html/rfc793)), UDP makes almost no assumptions about the needs of the application.
+TCP provides flow control (it accelerates and decelerates to maximize throughput), reliability (lost or corrupt packets are automatically retransmitted), sequencing (packets are ordered by the operating system before they are given to the application), segment size, and session setup and teardown.
+UDP provides no such features.
+
+A common use for UDP is in multicast applications.
+TCP is a stateful protocol for communication between exactly two devices.
+UDP can use special multicast addresses to allow simultaneous communication between many devices.
+
+### Receiving IP Multicast Packets
+
+To transmit data over UDP multicast, simply `recv` on the socket, and the first packet received will be returned. Note that it may not be the first packet that you sent however!
+
+```
+using Sockets
+group = ip"228.5.6.7"
+socket = Sockets.UDPSocket()
+bind(socket, ip"0.0.0.0", 6789)
+join_multicast_group(socket, group)
+println(String(recv(socket)))
+leave_multicast_group(socket, group)
+close(socket)
+```
+
+### Sending IP Multicast Packets
+
+To transmit data over UDP multicast, simply `send` to the socket.
+Notice that it is not necessary for a sender to join the multicast group.
+
+```
+using Sockets
+group = ip"228.5.6.7"
+socket = Sockets.UDPSocket()
+send(socket, group, 6789, "Hello over IPv4")
+close(socket)
+```
+
+### IPv6 Example
+
+This example gives the same functionality as the previous program, but uses IPv6 as the network-layer protocol.
+
+Listener:
+```
+using Sockets
+group = Sockets.IPv6("ff05::5:6:7")
+socket = Sockets.UDPSocket()
+bind(socket, Sockets.IPv6("::"), 6789)
+join_multicast_group(socket, group)
+println(String(recv(socket)))
+leave_multicast_group(socket, group)
+close(socket)
+```
+
+Sender:
+```
+using Sockets
+group = Sockets.IPv6("ff05::5:6:7")
+socket = Sockets.UDPSocket()
+send(socket, group, 6789, "Hello over IPv6")
+close(socket)
+```
diff --git a/doc/src/manual/noteworthy-differences.md b/doc/src/manual/noteworthy-differences.md
index 5ca2bfe6f6bada..dc3093ad8db6b4 100644
--- a/doc/src/manual/noteworthy-differences.md
+++ b/doc/src/manual/noteworthy-differences.md
@@ -7,8 +7,8 @@ major syntactic and functional differences. The following are some noteworthy di
 may trip up Julia users accustomed to MATLAB:
 
   * Julia arrays are indexed with square brackets, `A[i,j]`.
-  * Julia arrays are not copied when assigned to another variable. After `A = B`, changing elements of `B` will modify `A`
-    as well.
+  * Julia arrays are not copied when assigned to another variable. After `A = B`, changing elements of `B`
+    will modify `A` as well. To avoid this, use `A = copy(B)`.
   * Julia values are not copied when passed to a function. If a function modifies an array, the changes
     will be visible in the caller.
   * Julia does not automatically grow arrays in an assignment statement. Whereas in MATLAB `a(4) = 3.2`
@@ -18,8 +18,8 @@ may trip up Julia users accustomed to MATLAB:
     which grow `Vector`s much more efficiently than MATLAB's `a(end+1) = val`.
   * The imaginary unit `sqrt(-1)` is represented in Julia as [`im`](@ref), not `i` or `j` as in MATLAB.
   * In Julia, literal numbers without a decimal point (such as `42`) create integers instead of floating
-    point numbers. As a result, some operations can throw
-    a domain error if they expect a float; for example, `julia> a = -1; 2^a` throws a domain error, as the
+    point numbers. As a result, some operations can throw a domain error if they expect a float; for example,
+    `julia> a = -1; 2^a` throws a domain error, as the
     result is not an integer (see [the FAQ entry on domain errors](@ref faq-domain-errors) for details).
   * In Julia, multiple values are returned and assigned as tuples, e.g. `(a, b) = (1, 2)` or `a, b = 1, 2`.
     MATLAB's `nargout`, which is often used in MATLAB to do optional work based on the number of returned
@@ -35,10 +35,10 @@ may trip up Julia users accustomed to MATLAB:
     - To construct block matrices (concatenating in the first two dimensions), use either [`hvcat`](@ref)
       or combine spaces and semicolons (`[a b; c d]`).
   * In Julia, `a:b` and `a:b:c` construct `AbstractRange` objects. To construct a full vector like in MATLAB,
-    use [`collect(a:b)`](@ref). Generally, there is no need to call `collect` though. An `AbstractRange` object will
-    act like a normal array in most cases but is more efficient because it lazily computes its values.
-    This pattern of creating specialized objects instead of full arrays is used frequently, and is
-    also seen in functions such as [`range`](@ref), or with iterators such as `enumerate`, and
+    use [`collect(a:b)`](@ref). Generally, there is no need to call `collect` though. An `AbstractRange`
+    object will act like a normal array in most cases but is more efficient because it lazily computes
+    its values. This pattern of creating specialized objects instead of full arrays is used frequently,
+    and is also seen in functions such as [`range`](@ref), or with iterators such as `enumerate`, and
     `zip`. The special objects can mostly be used as if they were normal arrays.
   * Functions in Julia return values from their last expression or the `return` keyword instead of
     listing the names of variables to return in the function definition (see [The return Keyword](@ref)
@@ -70,7 +70,7 @@ may trip up Julia users accustomed to MATLAB:
     in an interactive session. In Julia, unlike MATLAB, `ans` is not set when Julia code is run in
     non-interactive mode.
   * Julia's `struct`s do not support dynamically adding fields at runtime, unlike MATLAB's `class`es.
-    Instead, use a [`Dict`](@ref).
+    Instead, use a [`Dict`](@ref). Dict in Julia isn't ordered.
   * In Julia each module has its own global scope/namespace, whereas in MATLAB there is just one global
     scope.
   * In MATLAB, an idiomatic way to remove unwanted values is to use logical indexing, like in the
@@ -80,8 +80,10 @@ may trip up Julia users accustomed to MATLAB:
     `x[x.>3]` and `x = x[x.>3]`. Using [`filter!`](@ref) reduces the use of temporary arrays.
   * The analogue of extracting (or "dereferencing") all elements of a cell array, e.g. in `vertcat(A{:})`
     in MATLAB, is written using the splat operator in Julia, e.g. as `vcat(A...)`.
-  * In Julia, the `adjoint` function performs conjugate transposition; in MATLAB, `adjoint` provides the "adjugate" or
-    classical adjoint, which is the transpose of the matrix of cofactors.
+  * In Julia, the `adjoint` function performs conjugate transposition; in MATLAB, `adjoint` provides the
+    "adjugate" or classical adjoint, which is the transpose of the matrix of cofactors.
+  * In Julia, a^b^c is evaluated a^(b^c) while in MATLAB it's (a^b)^c.
+
 ## Noteworthy differences from R
 
 One of Julia's goals is to provide an effective language for data analysis and statistical programming.
@@ -95,6 +97,7 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, varargs are specified using the splat operator `...`, which always follows the name
     of a specific variable, unlike R, for which `...` can occur in isolation.
   * In Julia, modulus is `mod(a, b)`, not `a %% b`. `%` in Julia is the remainder operator.
+  * Julia constructs vectors using brackets. Julia's `[1, 2, 3]` is the equivalent of R's `c(1, 2, 3)`.
   * In Julia, not all data structures support logical indexing. Furthermore, logical indexing in Julia
     is supported only with vectors of length equal to the object being indexed. For example:
 
@@ -120,7 +123,6 @@ For users coming to Julia from R, these are some noteworthy differences:
     statements in the latter two syntaxes must be explicitly wrapped in parentheses, e.g. `cond && (x = value)`.
   * In Julia, `<-`, `<<-` and `->` are not assignment operators.
   * Julia's `->` creates an anonymous function.
-  * Julia constructs vectors using brackets. Julia's `[1, 2, 3]` is the equivalent of R's `c(1, 2, 3)`.
   * Julia's [`*`](@ref) operator can perform matrix multiplication, unlike in R. If `A` and `B` are
     matrices, then `A * B` denotes a matrix multiplication in Julia, equivalent to R's `A %*% B`.
     In R, this same notation would perform an element-wise (Hadamard) product. To get the element-wise
@@ -159,8 +161,11 @@ For users coming to Julia from R, these are some noteworthy differences:
   * In Julia, vectors and matrices are concatenated using [`hcat`](@ref), [`vcat`](@ref) and
     [`hvcat`](@ref), not `c`, `rbind` and `cbind` like in R.
   * In Julia, a range like `a:b` is not shorthand for a vector like in R, but is a specialized `AbstractRange`
-    object that is used for iteration without high memory overhead. To convert a range into a vector, use
+    object that is used for iteration. To convert a range into a vector, use
     [`collect(a:b)`](@ref).
+  * The `:` operator has a different precedence in R and Julia. In particular, in Julia arithmetic operators
+    have higher precedence than the `:` operator, whereas the reverse is true in R. For example, `1:n-1` in
+    Julia is equivalent to `1:(n-1)` in R.
   * Julia's [`max`](@ref) and [`min`](@ref) are the equivalent of `pmax` and `pmin` respectively
     in R, but both arguments need to have the same dimensions.  While [`maximum`](@ref) and [`minimum`](@ref)
     replace `max` and `min` in R, there are important differences.
@@ -201,19 +206,22 @@ For users coming to Julia from R, these are some noteworthy differences:
     is not significant as it is in Python. Unlike Python, Julia has no `pass` keyword.
   * Strings are denoted by double quotation marks (`"text"`) in Julia (with three double quotation marks for multi-line strings), whereas in Python they can be denoted either by single (`'text'`) or double quotation marks (`"text"`). Single quotation marks are used for characters in Julia (`'c'`).
   * String concatenation is done with `*` in Julia, not `+` like in Python. Analogously, string repetition is done with `^`, not `*`. Implicit string concatenation of string literals like in Python (e.g. `'ab' 'cd' == 'abcd'`) is not done in Julia.
-  * Python Lists—flexible but slow—correspond to the Julia `Vector{Any}` type or more generally `Vector{T}` where `T` is some non-concrete element type. "Fast" arrays like Numpy arrays that store elements in-place (i.e., `dtype` is `np.float64`, `[('f1', np.uint64), ('f2', np.int32)]`, etc.) can be represented by `Array{T}` where `T` is a concrete, immutable element type. This includes built-in types like `Float64`, `Int32`, `Int64` but also more complex types like `Tuple{UInt64,Float64}` and many user-defined types as well.
+  * Python Lists—flexible but slow—correspond to the Julia `Vector{Any}` type or more generally `Vector{T}` where `T` is some non-concrete element type. "Fast" arrays like NumPy arrays that store elements in-place (i.e., `dtype` is `np.float64`, `[('f1', np.uint64), ('f2', np.int32)]`, etc.) can be represented by `Array{T}` where `T` is a concrete, immutable element type. This includes built-in types like `Float64`, `Int32`, `Int64` but also more complex types like `Tuple{UInt64,Float64}` and many user-defined types as well.
   * In Julia, indexing of arrays, strings, etc. is 1-based not 0-based.
   * Julia's slice indexing includes the last element, unlike in Python. `a[2:3]` in Julia is `a[1:3]`
     in Python.
-  * Julia does not support negative indices. In particular, the last element of a list or array is
-    indexed with `end` in Julia, not `-1` as in Python.
+  * Unlike Python, Julia allows [AbstractArrays with arbitrary indexes](https://julialang.org/blog/2017/04/offset-arrays/).
+    Python's special interpretation of negative indexing, `a[-1]` and `a[-2]`, should be written
+    `a[end]` and `a[end-1]` in Julia.
   * Julia requires `end` for indexing until the last element. `x[1:]` in Python is equivalent to `x[2:end]` in Julia.
   * Julia's range indexing has the format of `x[start:step:stop]`, whereas Python's format is `x[start:(stop+1):step]`. Hence, `x[0:10:2]` in Python is equivalent to `x[1:2:10]` in Julia. Similarly, `x[::-1]` in Python, which refers to the reversed array, is equivalent to `x[end:-1:1]` in Julia.
+  * In Julia, ranges can be constructed independently as `start:step:stop`, the same syntax it uses
+    in array-indexing.  The `range` function is also supported.
   * In Julia, indexing a matrix with arrays like `X[[1,2], [1,3]]` refers to a sub-matrix that contains the intersections of the first and second rows with the first and third columns. In Python, `X[[1,2], [1,3]]` refers to a vector that contains the values of cell `[1,1]` and `[2,3]` in the matrix. `X[[1,2], [1,3]]` in Julia is equivalent with `X[np.ix_([0,1],[0,2])]` in Python. `X[[0,1], [0,2]]` in Python is equivalent with `X[[CartesianIndex(1,1), CartesianIndex(2,3)]]` in Julia.
   * Julia has no line continuation syntax: if, at the end of a line, the input so far is a complete
     expression, it is considered done; otherwise the input continues. One way to force an expression
     to continue is to wrap it in parentheses.
-  * Julia arrays are column major (Fortran ordered) whereas NumPy arrays are row major (C-ordered)
+  * Julia arrays are column-major (Fortran-ordered) whereas NumPy arrays are row-major (C-ordered)
     by default. To get optimal performance when looping over arrays, the order of the loops should
     be reversed in Julia relative to NumPy (see [relevant section of Performance Tips](@ref man-performance-column-major)).
   * Julia's updating operators (e.g. `+=`, `-=`, ...) are *not in-place* whereas NumPy's are. This
@@ -237,17 +245,19 @@ For users coming to Julia from R, these are some noteworthy differences:
   * Julia uses `nothing` of type `Nothing` to represent a null value, whereas Python uses `None` of type `NoneType`.
   * In Julia, the standard operators over a matrix type are matrix operations, whereas, in Python, the standard operators are element-wise operations. When both `A` and `B` are matrices, `A * B` in Julia performs matrix multiplication, not element-wise multiplication as in Python. `A * B` in Julia is equivalent with `A @ B` in Python, whereas `A * B` in Python is equivalent with `A .* B` in Julia.
   * The adjoint operator `'` in Julia returns an adjoint of a vector (a lazy representation of row vector), whereas the transpose operator `.T` over a vector in Python returns the original vector (non-op).
-  * In Julia, a function may contain multiple concrete implementations (called *Methods*), selected via multiple dispatch, whereas functions in Python have a single implementation (no polymorphism).
-  * There are no classes in Julia. Instead they are structures (mutable or immutable), containing data but no methods.
-  * Calling a method of a class in Python (`a = MyClass(x), x.func(y)`) corresponds to a function call in Julia, e.g. `a = MyStruct(x), func(x::MyStruct, y)`. In general, multiple dispatch is more flexible and powerful than the Python class system.
+  * In Julia, a function may contain multiple concrete implementations (called *methods*), which are selected via multiple dispatch based on the types of all arguments to the call, as compared to functions in Python, which have a single implementation and no polymorphism (as opposed to Python method calls which use a different syntax and allows dispatch on the receiver of the method).
+  * There are no classes in Julia. Instead there are structures (mutable or immutable), containing data but no methods.
+  * Calling a method of a class instance in Python (`x = MyClass(*args); x.f(y)`) corresponds to a function call in Julia, e.g. `x = MyType(args...); f(x, y)`. In general, multiple dispatch is more flexible and powerful than the Python class system.
   * Julia structures may have exactly one abstract supertype, whereas Python classes can inherit from one or more (abstract or concrete) superclasses.
-  * The logical Julia program structure (Packages and Modules) is independent of the file strucutre (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
-  * The ternary operator `x > 0 ? 1 : -1` in Julia corresponds to conditional expression in Python `1 if x > 0 else -1`.
+  * The logical Julia program structure (Packages and Modules) is independent of the file structure (`include` for additional files), whereas the Python code structure is defined by directories (Packages) and files (Modules).
+  * The ternary operator `x > 0 ? 1 : -1` in Julia corresponds to a conditional expression in Python `1 if x > 0 else -1`.
   * In Julia the `@` symbol refers to a macro, whereas in Python it refers to a decorator.
-  * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia due to performance reasons.
+  * Exception handling in Julia is done using `try` — `catch` — `finally`, instead of `try` — `except` — `finally`. In contrast to Python, it is not recommended to use exception handling as part of the normal workflow in Julia (compared with Python, Julia is faster at ordinary control flow but slower at exception-catching).
   * In Julia loops are fast, there is no need to write "vectorized" code for performance reasons.
   * Be careful with non-constant global variables in Julia, especially in tight loops. Since you can write close-to-metal code in Julia (unlike Python), the effect of globals can be drastic (see [Performance Tips](@ref man-performance-tips)).
-  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.
+  * In Julia, rounding and truncation are explicit. Python's `int(3.7)` should be `floor(Int, 3.7)` or `Int(floor(3.7))` and is distinguished from `round(Int, 3.7)`. `floor(x)` and `round(x)` on their own return an integer value of the same type as `x` rather than always returning `Int`.
+  * In Julia, parsing is explicit. Python's `float("3.7")` would be `parse(Float64, "3.7")` in Julia.
+  * In Python, the majority of values can be used in logical contexts (e.g. `if "a":` means the following block is executed, and `if "":` means it is not). In Julia, you need explicit conversion to `Bool` (e.g. `if "a"` throws an exception). If you want to test for a non-empty string in Julia, you would explicitly write `if !isempty("")`.  Perhaps surprisingly, in Python `if "False"` and `bool("False")` both evaluate to `True` (because `"False"` is a non-empty string); in Julia, `parse(Bool, "false")` returns `false`.
   * In Julia, a new local scope is introduced by most code blocks, including loops and `try` — `catch` — `finally`. Note that comprehensions (list, generator, etc.) introduce a new local scope both in Python and Julia, whereas `if` blocks do not introduce a new local scope in both languages.
 
 ## Noteworthy differences from C/C++
@@ -271,16 +281,22 @@ For users coming to Julia from R, these are some noteworthy differences:
     a larger size type, such as `Int64` (if `Int` is `Int32`), `Int128`, or the arbitrarily large
     `BigInt` type. There are no numeric literal suffixes, such as `L`, `LL`, `U`, `UL`, `ULL` to indicate
     unsigned and/or signed vs. unsigned. Decimal literals are always signed, and hexadecimal literals
-    (which start with `0x` like C/C++), are unsigned. Hexadecimal literals also, unlike C/C++/Java
+    (which start with `0x` like C/C++), are unsigned, unless when they encode more than 128 bits,
+    in which case they are of type `BigInt`. Hexadecimal literals also, unlike C/C++/Java
     and unlike decimal literals in Julia, have a type based on the *length* of the literal, including
     leading 0s. For example, `0x0` and `0x00` have type [`UInt8`](@ref), `0x000` and `0x0000` have type
     [`UInt16`](@ref), then literals with 5 to 8 hex digits have type `UInt32`, 9 to 16 hex digits type
-    `UInt64` and 17 to 32 hex digits type `UInt128`. This needs to be taken into account when defining
+    `UInt64`, 17 to 32 hex digits type `UInt128`, and more that 32 hex digits type `BigInt`.
+    This needs to be taken into account when defining
     hexadecimal masks, for example `~0xf == 0xf0` is very different from `~0x000f == 0xfff0`. 64 bit `Float64`
     and 32 bit [`Float32`](@ref) bit literals are expressed as `1.0` and `1.0f0` respectively. Floating point
     literals are rounded (and not promoted to the `BigFloat` type) if they can not be exactly represented.
      Floating point literals are closer in behavior to C/C++. Octal (prefixed with `0o`) and binary
-    (prefixed with `0b`) literals are also treated as unsigned.
+    (prefixed with `0b`) literals are also treated as unsigned (or `BigInt` for more than 128 bits).
+  * In Julia, the division operator [`/`](@ref) returns a floating point number when both operands
+    are of integer type.  To perform integer division, use [`div`](@ref) or [`÷`](@ref div).
+  * Indexing an `Array` with floating point types is generally an error in Julia. The Julia
+    equivalent of the C expression `a[i / 2]` is `a[i ÷ 2 + 1]`, where `i` is of integer type.
   * String literals can be delimited with either `"`  or `"""`, `"""` delimited literals can contain
     `"` characters without quoting it like `"\""`. String literals can have values of other variables
     or expressions interpolated into them, indicated by `$variablename` or `$(expression)`, which
@@ -298,11 +314,11 @@ For users coming to Julia from R, these are some noteworthy differences:
     meaning within `[ ]`, something to watch out for. `;` can be used to separate expressions on a
     single line, but are not strictly necessary in many cases, and are more an aid to readability.
   * In Julia, the operator [`⊻`](@ref xor) ([`xor`](@ref)) performs the bitwise XOR operation, i.e.
-    [`^`](@ref) in C/C++.  Also, the bitwise operators do not have the same precedence as C/++, so
+    [`^`](@ref) in C/C++.  Also, the bitwise operators do not have the same precedence as C/C++, so
     parenthesis may be required.
   * Julia's [`^`](@ref) is exponentiation (pow), not bitwise XOR as in C/C++ (use [`⊻`](@ref xor), or
     [`xor`](@ref), in Julia)
-  * Julia has two right-shift operators, `>>` and `>>>`.  `>>>` performs an arithmetic shift, `>>`
+  * Julia has two right-shift operators, `>>` and `>>>`.  `>>` performs an arithmetic shift, `>>>`
     always performs a logical shift, unlike C/C++, where the meaning of `>>` depends on the type of
     the value being shifted.
   * Julia's `->` creates an anonymous function, it does not access a member via a pointer.
@@ -345,7 +361,13 @@ For users coming to Julia from R, these are some noteworthy differences:
 
 - The typical Julia workflow for prototyping also uses continuous manipulation of the image, implemented with the [Revise.jl](https://github.com/timholy/Revise.jl) package.
 
-- Bignums are supported, but conversion is not automatic; ordinary integers [overflow](@ref faq-integer-arithmetic).
+- For performance, Julia prefers that operations have [type stability](@ref man-type-stability). Where Common Lisp abstracts away from the underlying machine operations, Julia cleaves closer to them. For example:
+  - Integer division using `/` always returns a floating-point result, even if the computation is exact.
+    - `//` always returns a rational result
+    - `÷` always returns a (truncated) integer result
+  - Bignums are supported, but conversion is not automatic; ordinary integers [overflow](@ref faq-integer-arithmetic).
+  - Complex numbers are supported, but to get complex results, [you need complex inputs](@ref faq-domain-errors).
+  - There are multiple Complex and Rational types, with different component types.
 
 - Modules (namespaces) can be hierarchical. [`import`](@ref) and [`using`](@ref) have a dual role: they load the code and make it available in the namespace. `import` for only the module name is possible (roughly equivalent to `ASDF:LOAD-OP`). Slot names don't need to be exported separately. Global variables can't be assigned to from outside the module (except with `eval(mod, :(var = val))` as an escape hatch).
 
diff --git a/doc/src/manual/parallel-computing.md b/doc/src/manual/parallel-computing.md
index 71c3ba5354c1fc..80df333a8ab04f 100644
--- a/doc/src/manual/parallel-computing.md
+++ b/doc/src/manual/parallel-computing.md
@@ -8,7 +8,7 @@ Julia supports these four categories of concurrent and parallel programming:
     for I/O, event handling, producer-consumer processes, and similar patterns.
     Tasks can synchronize through operations like [`wait`](@ref) and [`fetch`](@ref), and
     communicate via [`Channel`](@ref)s. While strictly not parallel computing by themselves,
-    Julia lets you schedule `Task`s on several threads.
+    Julia lets you schedule [`Task`](@ref)s on several threads.
 
 2. **Multi-threading**:
 
@@ -21,7 +21,7 @@ Julia supports these four categories of concurrent and parallel programming:
 3. **Distributed computing**:
 
     Distributed computing runs multiple Julia processes with separate memory spaces. These can be on the same
-    computer or multiple computers. The `Distributed` standard library provides the capability for remote execution
+    computer or multiple computers. The [`Distributed`](@ref man-distributed) standard library provides the capability for remote execution
     of a Julia function. With this basic building block, it is possible to build many different kinds of
     distributed computing abstractions. Packages like [`DistributedArrays.jl`](https://github.com/JuliaParallel/DistributedArrays.jl)
     are an example of such an abstraction. On the other hand, packages like [`MPI.jl`](https://github.com/JuliaParallel/MPI.jl) and
diff --git a/doc/src/manual/performance-tips.md b/doc/src/manual/performance-tips.md
index 7039daec30a50f..8403b71b524a43 100644
--- a/doc/src/manual/performance-tips.md
+++ b/doc/src/manual/performance-tips.md
@@ -3,13 +3,19 @@
 In the following sections, we briefly go through a few techniques that can help make your Julia
 code run as fast as possible.
 
-## Avoid global variables
+## Performance critical code should be inside a function
 
-A global variable might have its value, and therefore its type, change at any point. This makes
-it difficult for the compiler to optimize code using global variables. Variables should be local,
-or passed as arguments to functions, whenever possible.
+Any code that is performance critical should be inside a function. Code inside functions tends to run much faster than top level code, due to how Julia's compiler works.
 
-Any code that is performance critical or being benchmarked should be inside a function.
+The use of functions is not only important for performance: functions are more reusable and testable, and clarify what steps are being done and what their inputs and outputs are, [Write functions, not just scripts](@ref) is also a recommendation of Julia's Styleguide.
+
+The functions should take arguments, instead of operating directly on global variables, see the next point.
+
+## Avoid untyped global variables
+
+The value of an untyped global variable might change at any point, possibly leading to a change of its type. This makes
+it difficult for the compiler to optimize code using global variables. This also applies to type-valued variables,
+i.e. type aliases on the global level. Variables should be local, or passed as arguments to functions, whenever possible.
 
 We find that global names are frequently constants, and declaring them as such greatly improves
 performance:
@@ -18,7 +24,9 @@ performance:
 const DEFAULT_VAL = 0
 ```
 
-Uses of non-constant globals can be optimized by annotating their types at the point of use:
+If a global is known to always be of the same type, [the type should be annotated](@ref man-typed-globals).
+
+Uses of untyped globals can be optimized by annotating their types at the point of use:
 
 ```julia
 global x = rand(1000)
@@ -70,12 +78,12 @@ julia> function sum_global()
        end;
 
 julia> @time sum_global()
-  0.017705 seconds (15.28 k allocations: 694.484 KiB)
-496.84883432553846
+  0.011539 seconds (9.08 k allocations: 373.386 KiB, 98.69% compilation time)
+523.0007221951678
 
 julia> @time sum_global()
-  0.000140 seconds (3.49 k allocations: 70.313 KiB)
-496.84883432553846
+  0.000091 seconds (3.49 k allocations: 70.156 KiB)
+523.0007221951678
 ```
 
 On the first call (`@time sum_global()`) the function gets compiled. (If you've not yet used [`@time`](@ref)
@@ -106,23 +114,23 @@ julia> function sum_arg(x)
        end;
 
 julia> @time sum_arg(x)
-  0.007701 seconds (821 allocations: 43.059 KiB)
-496.84883432553846
+  0.007551 seconds (3.98 k allocations: 200.548 KiB, 99.77% compilation time)
+523.0007221951678
 
 julia> @time sum_arg(x)
-  0.000006 seconds (5 allocations: 176 bytes)
-496.84883432553846
+  0.000006 seconds (1 allocation: 16 bytes)
+523.0007221951678
 ```
 
-The 5 allocations seen are from running the `@time` macro itself in global scope. If we instead run
+The 1 allocation seen is from running the `@time` macro itself in global scope. If we instead run
 the timing in a function, we can see that indeed no allocations are performed:
 
 ```jldoctest sumarg; filter = r"[0-9\.]+ seconds"
 julia> time_sum(x) = @time sum_arg(x);
 
 julia> time_sum(x)
-  0.000001 seconds
-496.84883432553846
+  0.000002 seconds
+523.0007221951678
 ```
 
 In some situations, your function may need to allocate memory as part of its operation, and this
@@ -318,7 +326,7 @@ Float32
 
 For all practical purposes, such objects behave identically to those of `MyStillAmbiguousType`.
 
-It's quite instructive to compare the sheer amount code generated for a simple function
+It's quite instructive to compare the sheer amount of code generated for a simple function
 
 ```julia
 func(m::MyType) = m.a+1
@@ -335,6 +343,14 @@ For reasons of length the results are not shown here, but you may wish to try th
 the type is fully-specified in the first case, the compiler doesn't need to generate any code
 to resolve the type at run-time. This results in shorter and faster code.
 
+One should also keep in mind that not-fully-parameterized types behave like abstract types. For example, even though a fully specified `Array{T,n}` is concrete, `Array` itself with no parameters given is not concrete:
+
+```jldoctest myambig3
+julia> !isconcretetype(Array), !isabstracttype(Array), isstructtype(Array), !isconcretetype(Array{Int}), isconcretetype(Array{Int,1})
+(true, true, true, true, true)
+```
+In this case, it would be better to avoid declaring `MyType` with a field `a::Array` and instead declare the field as `a::Array{T,N}` or as `a::A`, where `{T,N}` or `A` are parameters of `MyType`.
+
 ### Avoid fields with abstract containers
 
 The same best practices also work for container types:
@@ -347,6 +363,10 @@ julia> struct MySimpleContainer{A<:AbstractVector}
 julia> struct MyAmbiguousContainer{T}
            a::AbstractVector{T}
        end
+
+julia> struct MyAlsoAmbiguousContainer
+           a::Array
+       end
 ```
 
 For example:
@@ -371,6 +391,17 @@ julia> b = MyAmbiguousContainer([1:3;]);
 
 julia> typeof(b)
 MyAmbiguousContainer{Int64}
+
+julia> d = MyAlsoAmbiguousContainer(1:3);
+
+julia> typeof(d), typeof(d.a)
+(MyAlsoAmbiguousContainer, Vector{Int64})
+
+julia> d = MyAlsoAmbiguousContainer(1:1.0:3);
+
+julia> typeof(d), typeof(d.a)
+(MyAlsoAmbiguousContainer, Vector{Float64})
+
 ```
 
 For `MySimpleContainer`, the object is fully-specified by its type and parameters, so the compiler
@@ -641,10 +672,10 @@ julia> function strange_twos(n)
        end;
 
 julia> strange_twos(3)
-3-element Vector{Float64}:
- 2.0
- 2.0
- 2.0
+3-element Vector{Int64}:
+ 2
+ 2
+ 2
 ```
 
 This should be written as:
@@ -663,10 +694,10 @@ julia> function strange_twos(n)
        end;
 
 julia> strange_twos(3)
-3-element Vector{Float64}:
- 2.0
- 2.0
- 2.0
+3-element Vector{Int64}:
+ 2
+ 2
+ 2
 ```
 
 Julia's compiler specializes code for argument types at function boundaries, so in the original
@@ -995,7 +1026,7 @@ consider the two functions:
 ```jldoctest dotfuse
 julia> f(x) = 3x.^2 + 4x + 7x.^3;
 
-julia> fdot(x) = @. 3x^2 + 4x + 7x^3 # equivalent to 3 .* x.^2 .+ 4 .* x .+ 7 .* x.^3;
+julia> fdot(x) = @. 3x^2 + 4x + 7x^3; # equivalent to 3 .* x.^2 .+ 4 .* x .+ 7 .* x.^3
 ```
 
 Both `f` and `fdot` compute the same thing. However, `fdot`
@@ -1507,7 +1538,7 @@ The following examples may help you interpret expressions marked as containing n
         element accesses
 
   * `Base.getfield(%%x, :(:data))::ARRAY{FLOAT64,N} WHERE N`
-      * Interpretation: getting a field that is of non-leaf type. In this case, `ArrayContainer` had a
+      * Interpretation: getting a field that is of non-leaf type. In this case, the type of `x`, say `ArrayContainer`, had a
         field `data::Array{T}`. But `Array` needs the dimension `N`, too, to be a concrete type.
       * Suggestion: use concrete types like `Array{T,3}` or `Array{T,N}`, where `N` is now a parameter
         of `ArrayContainer`
@@ -1593,11 +1624,3 @@ will not require this degree of programmer annotation to attain performance.
 In the mean time, some user-contributed packages like
 [FastClosures](https://github.com/c42f/FastClosures.jl) automate the
 insertion of `let` statements as in `abmult3`.
-
-## Checking for equality with a singleton
-
-When checking if a value is equal to some singleton it can be
-better for performance to check for identicality (`===`) instead of
-equality (`==`). The same advice applies to using `!==` over `!=`.
-These type of checks frequently occur e.g. when implementing the iteration
-protocol and checking if `nothing` is returned from [`iterate`](@ref).
diff --git a/doc/src/manual/profile.md b/doc/src/manual/profile.md
index e9bf79016887a7..c3dc1ca090a46f 100644
--- a/doc/src/manual/profile.md
+++ b/doc/src/manual/profile.md
@@ -297,12 +297,18 @@ on the author's laptop).
 
 ## Memory allocation analysis
 
-One of the most common techniques to improve performance is to reduce memory allocation. The
-total amount of allocation can be measured with [`@time`](@ref) and [`@allocated`](@ref), and
+One of the most common techniques to improve performance is to reduce memory allocation. Julia
+provides several tools measure this:
+
+### `@time`
+
+The total amount of allocation can be measured with [`@time`](@ref) and [`@allocated`](@ref), and
 specific lines triggering allocation can often be inferred from profiling via the cost of garbage
 collection that these lines incur. However, sometimes it is more efficient to directly measure
 the amount of memory allocated by each line of code.
 
+### Line-by-Line Allocation Tracking
+
 To measure allocation line-by-line, start Julia with the `--track-allocation=<setting>` command-line
 option, for which you can choose `none` (the default, do not measure allocation), `user` (measure
 memory allocation everywhere except Julia's core code), or `all` (measure memory allocation at
@@ -321,6 +327,42 @@ you want to analyze, then call [`Profile.clear_malloc_data()`](@ref) to reset al
  Finally, execute the desired commands and quit Julia to trigger the generation of the `.mem`
 files.
 
+### GC Logging
+
+While [`@time`](@ref) logs high-level stats about memory usage and garbage collection over the course
+of evaluating an expression, it can be useful to log each garbage collection event, to get an
+intuitive sense of how often the garbage collector is running, how long it's running each time,
+and how much garbage it collects each time. This can be enabled with
+[`GC.enable_logging(true)`](@ref), which causes Julia to log to stderr every time
+a garbage collection happens.
+
+### Allocation Profiler
+
+The allocation profiler records the stack trace, type, and size of each
+allocation while it is running. It can be invoked with
+[`Profile.Allocs.@profile`](@ref).
+
+This information about the allocations is returned as an array of `Alloc`
+objects, wrapped in an `AllocResults` object. The best way to visualize
+these is currently with the [PProf.jl](https://github.com/JuliaPerf/PProf.jl)
+library, which can visualize the call stacks which are making the most
+allocations.
+
+The allocation profiler does have significant overhead, so a `sample_rate`
+argument can be passed to speed it up by making it skip some allocations.
+Passing `sample_rate=1.0` will make it record everything (which is slow);
+`sample_rate=0.1` will record only 10% of the allocations (faster), etc.
+
+!!! note
+
+    The current implementation of the Allocations Profiler _does not
+    capture types for all allocations._ Allocations for which the profiler
+    could not capture the type are represented as having type
+    `Profile.Allocs.UnknownType`.
+
+    You can read more about the missing types and the plan to improve this, here:
+    https://github.com/JuliaLang/julia/issues/43688.
+
 ## External Profiling
 
 Currently Julia supports `Intel VTune`, `OProfile` and `perf` as external profiling tools.
@@ -338,15 +380,16 @@ For example with `OProfile` you can try a simple recording :
 >opreport -l `which ./julia`
 ```
 
-Or similary with `perf` :
+Or similarly with `perf` :
 
 ```
-$ ENABLE_JITPROFILING=1 perf record -o /tmp/perf.data --call-graph dwarf ./julia /test/fastmath.jl
-$ perf report --call-graph -G
+$ ENABLE_JITPROFILING=1 perf record -o /tmp/perf.data --call-graph dwarf -k 1 ./julia /test/fastmath.jl
+$ perf inject --jit --input /tmp/perf.data --output /tmp/perf-jit.data
+$ perf report --call-graph -G -i /tmp/perf-jit.data
 ```
 
 There are many more interesting things that you can measure about your program, to get a comprehensive list
-please read the [Linux perf examples page](http://www.brendangregg.com/perf.html).
+please read the [Linux perf examples page](https://www.brendangregg.com/perf.html).
 
 Remember that perf saves for each execution a `perf.data` file that, even for small programs, can get
 quite large. Also the perf LLVM module saves temporarily debug objects in `~/.debug/jit`, remember
diff --git a/doc/src/manual/running-external-programs.md b/doc/src/manual/running-external-programs.md
index 67d2f1d7aae94e..e643ffff3ee61d 100644
--- a/doc/src/manual/running-external-programs.md
+++ b/doc/src/manual/running-external-programs.md
@@ -20,6 +20,13 @@ differs in several aspects from the behavior in various shells, Perl, or Ruby:
     interpolating variables and splitting on words as the shell would, respecting shell quoting syntax.
     The command is run as `julia`'s immediate child process, using `fork` and `exec` calls.
 
+
+!!! note
+    The following assumes a Posix environment as on Linux or MacOS.
+    On Windows, many similar commands, such as `echo` and `dir`, are not external programs and instead are built into the shell `cmd.exe` itself.
+    One option to run these commands is to invoke `cmd.exe`, for example `cmd /C echo hello`.
+    Alternatively Julia can be run inside a Posix environment such as Cygwin.
+
 Here's a simple example of running an external program:
 
 ```jldoctest
@@ -33,18 +40,18 @@ julia> run(mycommand);
 hello
 ```
 
-The `hello` is the output of the `echo` command, sent to [`stdout`](@ref). The run method itself
-returns `nothing`, and throws an [`ErrorException`](@ref) if the external command fails to run
-successfully.
+The `hello` is the output of the `echo` command, sent to [`stdout`](@ref). If the external command fails to run
+successfully, the run method throws an [`ErrorException`](@ref).
 
-If you want to read the output of the external command, [`read`](@ref) can be used instead:
+If you want to read the output of the external command, [`read`](@ref) or [`readchomp`](@ref)
+can be used instead:
 
 ```jldoctest
-julia> a = read(`echo hello`, String)
+julia> read(`echo hello`, String)
 "hello\n"
 
-julia> chomp(a) == "hello"
-true
+julia> readchomp(`echo hello`)
+"hello"
 ```
 
 More generally, you can use [`open`](@ref) to read from or write to an external command.
@@ -319,6 +326,8 @@ wait(writer)
 fetch(reader)
 ```
 
+(commonly also, reader is not a separate task, since we immediately `fetch` it anyways).
+
 ### Complex Example
 
 The combination of a high-level programming language, a first-class command abstraction, and automatic
@@ -365,3 +374,38 @@ stages have different latency so they use a different number of parallel workers
 saturated throughput.
 
 We strongly encourage you to try all these examples to see how they work.
+
+## `Cmd` Objects
+The backtick syntax create an object of type [`Cmd`](@ref). Such object may also be constructed directly from
+an existing `Cmd` or list of arguments:
+
+```julia
+run(Cmd(`pwd`, dir=".."))
+run(Cmd(["pwd"], detach=true, ignorestatus=true))
+```
+
+This allows you to specify several aspects of the `Cmd`'s execution environment via keyword arguments. For
+example, the `dir` keyword provides control over the `Cmd`'s working directory:
+
+```jldoctest
+julia> run(Cmd(`pwd`, dir="/"));
+/
+```
+
+And the `env` keyword allows you to set execution environment variables:
+
+```jldoctest
+julia> run(Cmd(`sh -c "echo foo \$HOWLONG"`, env=("HOWLONG" => "ever!",)));
+foo ever!
+```
+
+See [`Cmd`](@ref) for additional keyword arguments. The [`setenv`](@ref) and [`addenv`](@ref) commands
+provide another means for replacing or adding to the `Cmd` execution environment variables, respectively:
+
+```jldoctest
+julia> run(setenv(`sh -c "echo foo \$HOWLONG"`, ("HOWLONG" => "ever!",)));
+foo ever!
+
+julia> run(addenv(`sh -c "echo foo \$HOWLONG"`, "HOWLONG" => "ever!"));
+foo ever!
+```
diff --git a/doc/src/manual/stacktraces.md b/doc/src/manual/stacktraces.md
index fe5dadcd107197..40130d9e7dd445 100644
--- a/doc/src/manual/stacktraces.md
+++ b/doc/src/manual/stacktraces.md
@@ -185,7 +185,7 @@ ERROR: Whoops!
 [...]
 ```
 
-## Exception stacks and `catch_stack`
+## Exception stacks and [`current_exceptions`](@ref)
 
 !!! compat "Julia 1.1"
     Exception stacks requires at least Julia 1.1.
@@ -195,7 +195,7 @@ identify the root cause of a problem. The julia runtime supports this by pushing
 *exception stack* as it occurs. When the code exits a `catch` normally, any exceptions which were pushed onto the stack
 in the associated `try` are considered to be successfully handled and are removed from the stack.
 
-The stack of current exceptions can be accessed using the experimental [`Base.catch_stack`](@ref) function. For example,
+The stack of current exceptions can be accessed using the [`current_exceptions`](@ref) function. For example,
 
 ```julia-repl
 julia> try
@@ -204,9 +204,9 @@ julia> try
            try
                error("(B) An exception while handling the exception")
            catch
-               for (exc, bt) in Base.catch_stack()
+               for (exc, bt) in current_exceptions()
                    showerror(stdout, exc, bt)
-                   println()
+                   println(stdout)
                end
            end
        end
@@ -233,7 +233,7 @@ exiting both catch blocks normally (i.e., without throwing a further exception)
 and are no longer accessible.
 
 The exception stack is stored on the `Task` where the exceptions occurred. When a task fails with uncaught exceptions,
-`catch_stack(task)` may be used to inspect the exception stack for that task.
+`current_exceptions(task)` may be used to inspect the exception stack for that task.
 
 ## Comparison with [`backtrace`](@ref)
 
diff --git a/doc/src/manual/strings.md b/doc/src/manual/strings.md
index 2d4898bbcf2598..be3f76bb99683c 100644
--- a/doc/src/manual/strings.md
+++ b/doc/src/manual/strings.md
@@ -166,6 +166,14 @@ julia> """Contains "quote" characters"""
 "Contains \"quote\" characters"
 ```
 
+Long lines in strings can be broken up by preceding the newline with a backslash (`\`):
+
+```jldoctest
+julia> "This is a long \
+       line"
+"This is a long line"
+```
+
 If you want to extract a character from a string, you index into it:
 
 ```jldoctest helloworldstring
@@ -234,8 +242,9 @@ The former is a single character value of type `Char`, while the latter is a str
 happens to contain only a single character. In Julia these are very different things.
 
 Range indexing makes a copy of the selected part of the original string.
-Alternatively, it is possible to create a view into a string using the type [`SubString`](@ref),
-for example:
+Alternatively, it is possible to create a view into a string using the type [`SubString`](@ref).
+More simply, using the [`@views`](@ref) macro on a block of code converts all string slices
+into substrings.  For example:
 
 ```jldoctest
 julia> str = "long string"
@@ -246,6 +255,9 @@ julia> substr = SubString(str, 1, 4)
 
 julia> typeof(substr)
 SubString{String}
+
+julia> @views typeof(str[1:4]) # @views converts slices to SubStrings
+SubString{String}
 ```
 
 Several standard functions like [`chop`](@ref), [`chomp`](@ref) or [`strip`](@ref)
@@ -335,7 +347,7 @@ julia> s[1:4]
 Because of variable-length encodings, the number of characters in a string (given by [`length(s)`](@ref))
 is not always the same as the last index. If you iterate through the indices 1 through [`lastindex(s)`](@ref)
 and index into `s`, the sequence of characters returned when errors aren't thrown is the sequence
-of characters comprising the string `s`. Thus we have the identity that `length(s) <= lastindex(s)`,
+of characters comprising the string `s`. Thus `length(s) <= lastindex(s)`,
 since each character in a string must have its own index. The following is an inefficient and
 verbose way to iterate through the characters of `s`:
 
@@ -474,17 +486,17 @@ of the concatenated strings, e.g.:
 julia> a, b = "\xe2\x88", "\x80"
 ("\xe2\x88", "\x80")
 
-julia> c = a*b
+julia> c = string(a, b)
 "∀"
 
 julia> collect.([a, b, c])
-3-element Array{Array{Char,1},1}:
+3-element Vector{Vector{Char}}:
  ['\xe2\x88']
  ['\x80']
  ['∀']
 
 julia> length.([a, b, c])
-3-element Array{Int64,1}:
+3-element Vector{Int64}:
  1
  1
  1
@@ -639,6 +651,15 @@ julia> """
 "Hello,\nworld."
 ```
 
+If the newline is removed using a backslash, dedentation will be respected as well:
+
+```jldoctest
+julia> """
+         Averylong\
+         word"""
+"Averylongword"
+```
+
 Trailing whitespace is left unaltered.
 
 Triple-quoted string literals can contain `"` characters without escaping.
@@ -670,29 +691,29 @@ You can search for the index of a particular character using the
 [`findfirst`](@ref) and [`findlast`](@ref) functions:
 
 ```jldoctest
-julia> findfirst(isequal('o'), "xylophone")
+julia> findfirst('o', "xylophone")
 4
 
-julia> findlast(isequal('o'), "xylophone")
+julia> findlast('o', "xylophone")
 7
 
-julia> findfirst(isequal('z'), "xylophone")
+julia> findfirst('z', "xylophone")
 ```
 
 You can start the search for a character at a given offset by using
 the functions [`findnext`](@ref) and [`findprev`](@ref):
 
 ```jldoctest
-julia> findnext(isequal('o'), "xylophone", 1)
+julia> findnext('o', "xylophone", 1)
 4
 
-julia> findnext(isequal('o'), "xylophone", 5)
+julia> findnext('o', "xylophone", 5)
 7
 
-julia> findprev(isequal('o'), "xylophone", 5)
+julia> findprev('o', "xylophone", 5)
 4
 
-julia> findnext(isequal('o'), "xylophone", 8)
+julia> findnext('o', "xylophone", 8)
 ```
 
 You can use the [`occursin`](@ref) function to check if a substring is found within a string:
@@ -739,16 +760,19 @@ Some other useful functions include:
 
 There are situations when you want to construct a string or use string semantics, but the behavior
 of the standard string construct is not quite what is needed. For these kinds of situations, Julia
-provides [non-standard string literals](@ref). A non-standard string literal looks like a regular
-double-quoted string literal, but is immediately prefixed by an identifier, and doesn't behave
-quite like a normal string literal.  Regular expressions, byte array literals and version number
-literals, as described below, are some examples of non-standard string literals. Other examples
-are given in the [Metaprogramming](@ref) section.
+provides non-standard string literals. A non-standard string literal looks like a regular
+double-quoted string literal,
+but is immediately prefixed by an identifier, and may behave differently from a normal string literal.
+
+[Regular expressions](@ref man-regex-literals), [byte array literals](@ref man-byte-array-literals),
+and [version number literals](@ref man-version-number-literals), as described below,
+are some examples of non-standard string literals. Users and packages may also define new non-standard string literals.
+Further documentation is given in the [Metaprogramming](@ref meta-non-standard-string-literals) section.
 
-## Regular Expressions
+## [Regular Expressions](@id man-regex-literals)
 
-Julia has Perl-compatible regular expressions (regexes), as provided by the [PCRE](http://www.pcre.org/)
-library (a description of the syntax can be found [here](http://www.pcre.org/current/doc/html/pcre2syntax.html)). Regular expressions are related to strings in two ways: the obvious connection is that
+Julia has Perl-compatible regular expressions (regexes), as provided by the [PCRE](https://www.pcre.org/)
+library (a description of the syntax can be found [here](https://www.pcre.org/current/doc/html/pcre2syntax.html)). Regular expressions are related to strings in two ways: the obvious connection is that
 regular expressions are used to find regular patterns in strings; the other connection is that
 regular expressions are themselves input as strings, which are parsed into a state machine that
 can be used to efficiently search for patterns in strings. In Julia, regular expressions are input
@@ -798,7 +822,7 @@ else
 end
 ```
 
-If a regular expression does match, the value returned by [`match`](@ref) is a `RegexMatch`
+If a regular expression does match, the value returned by [`match`](@ref) is a [`RegexMatch`](@ref)
 object. These objects record how the expression matches, including the substring that the pattern
 matches and any captured substrings, if there are any. This example only captures the portion
 of the substring that matches, but perhaps we want to capture any non-blank text after the comment
@@ -879,10 +903,10 @@ julia> m.offsets
 ```
 
 It is convenient to have captures returned as an array so that one can use destructuring syntax
-to bind them to local variables:
+to bind them to local variables. As a convenience, the `RegexMatch` object implements iterator methods that pass through to the `captures` field, so you can destructure the match object directly:
 
 ```jldoctest acdmatch
-julia> first, second, third = m.captures; first
+julia> first, second, third = m; first
 "a"
 ```
 
@@ -919,7 +943,7 @@ julia> replace("a", r"." => s"\g<0>1")
 
 You can modify the behavior of regular expressions by some combination of the flags `i`, `m`,
 `s`, and `x` after the closing double quote mark. These flags have the same meaning as they do
-in Perl, as explained in this excerpt from the [perlre manpage](http://perldoc.perl.org/perlre.html#Modifiers):
+in Perl, as explained in this excerpt from the [perlre manpage](https://perldoc.perl.org/perlre#Modifiers):
 
 ```
 i   Do case-insensitive pattern matching.
@@ -1002,16 +1026,20 @@ RegexMatch("Day 10")
 julia> name = "Jon"
 "Jon"
 
-julia> regex_name = Regex("[\"( ]$name[\") ]")  # interpolate value of name
-r"[\"( ]Jon[\") ]"
+julia> regex_name = Regex("[\"( ]\\Q$name\\E[\") ]")  # interpolate value of name
+r"[\"( ]\QJon\E[\") ]"
 
-julia> match(regex_name," Jon ")
+julia> match(regex_name, " Jon ")
 RegexMatch(" Jon ")
 
-julia> match(regex_name,"[Jon]") === nothing
+julia> match(regex_name, "[Jon]") === nothing
 true
 ```
 
+Note the use of the `\Q...\E` escape sequence. All characters between the `\Q` and the `\E`
+are interpreted as literal characters (after string interpolation). This escape sequence can
+be useful when interpolating, possibly malicious, user input.
+
 ## [Byte Array Literals](@id man-byte-array-literals)
 
 Another useful non-standard string literal is the byte-array string literal: `b"..."`. This
@@ -1064,7 +1092,7 @@ julia> x[1]
 0x31
 
 julia> x[1] = 0x32
-ERROR: setindex! not defined for Base.CodeUnits{UInt8, String}
+ERROR: CanonicalIndexError: setindex! not defined for Base.CodeUnits{UInt8, String}
 [...]
 
 julia> Vector{UInt8}(x)
diff --git a/doc/src/manual/style-guide.md b/doc/src/manual/style-guide.md
index 4719f259c8d0e6..cbe7e9b94eefc7 100644
--- a/doc/src/manual/style-guide.md
+++ b/doc/src/manual/style-guide.md
@@ -4,6 +4,10 @@ The following sections explain a few aspects of idiomatic Julia coding style. No
 are absolute; they are only suggestions to help familiarize you with the language and to help
 you choose among alternative designs.
 
+## Indentation
+
+Use 4 spaces per indentation level.
+
 ## Write functions, not just scripts
 
 Writing code as a series of steps at the top level is a quick way to get started solving a problem,
@@ -130,6 +134,32 @@ a = Vector{Union{Int,AbstractString,Tuple,Array}}(undef, n)
 In this case `Vector{Any}(undef, n)` is better. It is also more helpful to the compiler to annotate specific
 uses (e.g. `a[i]::Int`) than to try to pack many alternatives into one type.
 
+## Prefer exported methods over direct field access
+
+Idiomatic Julia code should generally treat a module's exported methods as the
+interface to its types. An object's fields are generally considered
+implementation details and user code should only access them directly if this
+is stated to be the API. This has several benefits:
+
+- Package developers are freer to change the implementation without breaking
+  user code.
+- Methods can be passed to higher-order constructs like [`map`](@ref) (e.g.
+  `map(imag, zs)`) rather than `[z.im for z in zs]`).
+- Methods can be defined on abstract types.
+- Methods can describe a conceptual operation that can be shared across
+  disparate types (e.g. `real(z)` works on Complex numbers or Quaternions).
+
+Julia's dispatch system encourages this style because `play(x::MyType)` only
+defines the `play` method on that particular type, leaving other types to
+have their own implementation.
+
+Similarly, non-exported functions are typically internal and subject to change,
+unless the documentations states otherwise. Names sometimes are given a `_` prefix
+(or suffix) to further suggest that something is "internal" or an
+implementation-detail, but it is not a rule.
+
+Counter-examples to this rule include [`NamedTuple`](@ref), [`RegexMatch`](@ref match), [`StatStruct`](@ref stat).
+
 ## Use naming conventions consistent with Julia `base/`
 
   * modules and type names use capitalization and camel case: `module SparseArrays`, `struct UnitRange`.
@@ -137,6 +167,7 @@ uses (e.g. `a[i]::Int`) than to try to pack many alternatives into one type.
     words squashed together ([`isequal`](@ref), [`haskey`](@ref)). When necessary, use underscores
     as word separators. Underscores are also used to indicate a combination of concepts ([`remotecall_fetch`](@ref)
     as a more efficient implementation of `fetch(remotecall(...))`) or as modifiers.
+  * functions mutating at least one of their arguments end in `!`.
   * conciseness is valued, but avoid abbreviation ([`indexin`](@ref) rather than `indxin`) as
     it becomes difficult to remember whether and how particular words are abbreviated.
 
@@ -314,8 +345,7 @@ to behave in a certain way, and overly customizing its behavior can make it hard
 ## Avoid type piracy
 
 "Type piracy" refers to the practice of extending or redefining methods in Base
-or other packages on types that you have not defined. In some cases, you can get away with
-type piracy with little ill effect. In extreme cases, however, you can even crash Julia
+or other packages on types that you have not defined. In extreme cases, you can crash Julia
 (e.g. if your method extension or redefinition causes invalid input to be passed to a
 `ccall`). Type piracy can complicate reasoning about code, and may introduce
 incompatibilities that are hard to predict and diagnose.
diff --git a/doc/src/manual/types.md b/doc/src/manual/types.md
index bb02de735e5f9f..2a4d7a4e05b6cb 100644
--- a/doc/src/manual/types.md
+++ b/doc/src/manual/types.md
@@ -171,11 +171,11 @@ Let's consider some of the abstract types that make up Julia's numerical hierarc
 
 ```julia
 abstract type Number end
-abstract type Real     <: Number end
+abstract type Real          <: Number end
 abstract type AbstractFloat <: Real end
-abstract type Integer  <: Real end
-abstract type Signed   <: Integer end
-abstract type Unsigned <: Integer end
+abstract type Integer       <: Real end
+abstract type Signed        <: Integer end
+abstract type Unsigned      <: Integer end
 ```
 
 The [`Number`](@ref) type is a direct child type of `Any`, and [`Real`](@ref) is its child.
@@ -459,6 +459,30 @@ To recap, two essential properties define immutability in Julia:
       functions as pointers to heap-allocated values except in cases where the compiler
       is sure that there's no way to tell that this is not what is happening.
 
+In cases where one or more fields of an otherwise mutable struct is known to be immutable,
+one can declare these fields as such using `const` as shown below. This enables some,
+but not all of the optimizations of immutable structs, and can be used to enforce invariants
+on the particular fields marked as `const`.
+
+!!! compat "Julia 1.8"
+    `const` annotating fields of mutable structs requires at least Julia 1.8.
+
+```jldoctest baztype
+julia> mutable struct Baz
+           a::Int
+           const b::Float64
+       end
+
+julia> baz = Baz(1, 1.5);
+
+julia> baz.a = 2
+2
+
+julia> baz.b = 2.0
+ERROR: setfield!: const field .b of type Baz cannot be changed
+[...]
+```
+
 ## [Declared Types](@id man-declared-types)
 
 The three kinds of types (abstract, primitive, composite) discussed in the previous
@@ -896,12 +920,12 @@ signature (when the signature matches).
 
 ### Vararg Tuple Types
 
-The last parameter of a tuple type can be the special type [`Vararg`](@ref), which denotes any number
+The last parameter of a tuple type can be the special value [`Vararg`](@ref), which denotes any number
 of trailing elements:
 
 ```jldoctest
 julia> mytupletype = Tuple{AbstractString,Vararg{Int}}
-Tuple{AbstractString, Vararg{Int64, N} where N}
+Tuple{AbstractString, Vararg{Int64}}
 
 julia> isa(("1",), mytupletype)
 true
@@ -916,10 +940,11 @@ julia> isa(("1",1,2,3.0), mytupletype)
 false
 ```
 
-Notice that `Vararg{T}` corresponds to zero or more elements of type `T`. Vararg tuple types are
+Moreover `Vararg{T}` corresponds to zero or more elements of type `T`. Vararg tuple types are
 used to represent the arguments accepted by varargs methods (see [Varargs Functions](@ref)).
 
-The type `Vararg{T,N}` corresponds to exactly `N` elements of type `T`.  `NTuple{N,T}` is a convenient
+The special value `Vararg{T,N}` (when used as the last parameter of a tuple type)
+corresponds to exactly `N` elements of type `T`.  `NTuple{N,T}` is a convenient
 alias for `Tuple{Vararg{T,N}}`, i.e. a tuple type containing exactly `N` elements of type `T`.
 
 ### Named Tuple Types
@@ -1032,8 +1057,8 @@ The `where` keyword itself can be nested inside a more complex declaration. For
 consider the two types created by the following declarations:
 
 ```jldoctest
-julia> const T1 = Array{Array{T,1} where T, 1}
-Vector{Vector{T} where T} (alias for Array{Array{T, 1} where T, 1})
+julia> const T1 = Array{Array{T, 1} where T, 1}
+Vector{Vector} (alias for Array{Array{T, 1} where T, 1})
 
 julia> const T2 = Array{Array{T, 1}, 1} where T
 Array{Vector{T}, 1} where T
@@ -1107,6 +1132,50 @@ julia> NoFieldsParam{Int}() === NoFieldsParam{Int}()
 true
 ```
 
+## Types of functions
+
+Each function has its own type, which is a subtype of `Function`.
+
+```jldoctest foo41
+julia> foo41(x) = x + 1
+foo41 (generic function with 1 method)
+
+julia> typeof(foo41)
+typeof(foo41) (singleton type of function foo41, subtype of Function)
+```
+
+Note how `typeof(foo41)` prints as itself. This is merely a convention for printing, as it is a first-class object that can be used like any other value:
+
+```jldoctest foo41
+julia> T = typeof(foo41)
+typeof(foo41) (singleton type of function foo41, subtype of Function)
+
+julia> T <: Function
+true
+```
+
+Types of functions defined at top-level are singletons. When necessary, you can compare them with [`===`](@ref).
+
+[Closures](@ref man-anonymous-functions) also have their own type, which is usually printed with names that end in `#<number>`. Names and types for functions defined at different locations are distinct, but not guaranteed to be printed the same way across sessions.
+
+```jldoctest; filter = r"[0-9\.]+"
+julia> typeof(x -> x + 1)
+var"#9#10"
+```
+
+Types of closures are not necessarily singletons.
+
+```jldoctest
+julia> addy(y) = x -> x + y
+addy (generic function with 1 method)
+
+julia> Base.issingletontype(addy(1))
+false
+
+julia> addy(1) === addy(2)
+false
+```
+
 ## [`Type{T}` type selectors](@id man-typet-type)
 
 For each type `T`, `Type{T}` is an abstract parametric type whose only instance is the
@@ -1181,7 +1250,7 @@ While `Type` is part of Julia's type hierarchy like any other abstract parametri
 is not commonly used outside method signatures except in some special cases. Another
 important use case for `Type` is sharpening field types which would otherwise be captured
 less precisely, e.g. as [`DataType`](@ref man-declared-types) in the example below where the
-default constuctor could lead to performance problems in code relying on the precise wrapped
+default constructor could lead to performance problems in code relying on the precise wrapped
 type (similarly to [abstract type parameters](@ref man-performance-abstract-container)).
 
 ```jldoctest
@@ -1341,7 +1410,7 @@ REPL and other interactive environments, and also a more compact single-line for
 [`print`](@ref) or for displaying the object as part of another object (e.g. in an array). Although
 by default the `show(io, z)` function is called in both cases, you can define a *different* multi-line
 format for displaying an object by overloading a three-argument form of `show` that takes the
-`text/plain` MIME type as its second argument (see [Multimedia I/O](@ref)), for example:
+`text/plain` MIME type as its second argument (see [Multimedia I/O](@ref Multimedia-I/O)), for example:
 
 ```jldoctest polartype
 julia> Base.show(io::IO, ::MIME"text/plain", z::Polar{T}) where{T} =
@@ -1364,7 +1433,7 @@ julia> [Polar(3, 4.0), Polar(4.0,5.3)]
 where the single-line `show(io, z)` form is still used for an array of `Polar` values.   Technically,
 the REPL calls `display(z)` to display the result of executing a line, which defaults to `show(stdout, MIME("text/plain"), z)`,
 which in turn defaults to `show(stdout, z)`, but you should *not* define new [`display`](@ref)
-methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref)).
+methods unless you are defining a new multimedia display handler (see [Multimedia I/O](@ref Multimedia-I/O)).
 
 Moreover, you can also define `show` methods for other MIME types in order to enable richer display
 (HTML, images, etcetera) of objects in environments that support this (e.g. IJulia).   For example,
diff --git a/doc/src/manual/unicode-input.md b/doc/src/manual/unicode-input.md
index 489b256cbdea25..7539e75bb4f244 100644
--- a/doc/src/manual/unicode-input.md
+++ b/doc/src/manual/unicode-input.md
@@ -46,7 +46,7 @@ end
 
 # Surround combining characters with no-break spaces (i.e '\u00A0'). Follows the same format
 # for how unicode is displayed on the unicode.org website:
-# http://unicode.org/cldr/utility/character.jsp?a=0300
+# https://util.unicode.org/UnicodeJsps/character.jsp?a=0300
 function fix_combining_chars(char)
     cat = Base.Unicode.category_code(char)
     return cat == 6 || cat == 8 ? "$NBSP$char$NBSP" : "$char"
diff --git a/doc/src/manual/variables-and-scoping.md b/doc/src/manual/variables-and-scoping.md
index ba20aaf0321a7a..ca6ebc2157b71c 100644
--- a/doc/src/manual/variables-and-scoping.md
+++ b/doc/src/manual/variables-and-scoping.md
@@ -1,7 +1,7 @@
 # [Scope of Variables](@id scope-of-variables)
 
-The *scope* of a variable is the region of code within which a variable is visible. Variable scoping
-helps avoid variable naming conflicts. The concept is intuitive: two functions can both have
+The *scope* of a variable is the region of code within which a variable is accessible. Variable
+scoping helps avoid variable naming conflicts. The concept is intuitive: two functions can both have
 arguments called `x` without the two `x`'s referring to the same thing. Similarly, there are many
 other cases where different blocks of code can use the same name without referring to the same
 thing. The rules for when the same variable name does or doesn't refer to the same thing are called
@@ -91,26 +91,59 @@ julia> module D
            b = a # errors as D's global scope is separate from A's
        end;
 ERROR: UndefVarError: a not defined
+```
 
-julia> module E
-           import ..A # make module A available
-           A.a = 2    # throws below error
-       end;
-ERROR: cannot assign variables in other modules
+If a top-level expression contains a variable declaration with keyword `local`,
+then that variable is not accessible outside that expression.
+The variable inside the expression does not affect global variables of the same name.
+An example is to declare `local x` in a `begin` or `if` block at the top-level:
+
+```jldoctest
+julia> x = 1
+       begin
+           local x = 0
+           @show x
+       end
+       @show x;
+x = 0
+x = 1
 ```
 
 Note that the interactive prompt (aka REPL) is in the global scope of the module `Main`.
 
 ## Local Scope
 
-A new local scope is introduced by most code blocks (see above [table](@ref man-scope-table) for a
-complete list). Some programming languages require explicitly declaring new variables before using
-them. Explicit declaration works in Julia too: in any local scope, writing `local x` declares a new
-local variable in that scope, regardless of whether there is already a variable named `x` in an
-outer scope or not. Declaring each new local like this is somewhat verbose and tedious, however, so
-Julia, like many other languages, considers assignment to a new variable in a local scope to
-implicitly declare that variable as a new local. Mostly this is pretty intuitive, but as with many
-things that behave intuitively, the details are more subtle than one might naïvely imagine.
+A new local scope is introduced by most code blocks (see above [table](@ref
+man-scope-table) for a complete list). If such a block is syntactically nested
+inside of another local scope, the scope it creates is nested inside of all the
+local scopes that it appears within, which are all ultimately nested inside of
+the global scope of the module in which the code is evaluated. Variables in
+outer scopes are visible from any scope they contain — meaning that they can be
+read and written in inner scopes — unless there is a local variable with the
+same name that "shadows" the outer variable of the same name. This is true even
+if the outer local is declared after (in the sense of textually below) an inner
+block. When we say that a variable "exists" in a given scope, this means that a
+variable by that name exists in any of the scopes that the current scope is
+nested inside of, including the current one.
+
+Some programming languages require explicitly declaring new variables before
+using them. Explicit declaration works in Julia too: in any local scope, writing
+`local x` declares a new local variable in that scope, regardless of whether
+there is already a variable named `x` in an outer scope or not. Declaring each
+new variable like this is somewhat verbose and tedious, however, so Julia, like
+many other languages, considers assignment to a variable name that doesn't
+already exist to implicitly declare that variable. If the current scope is
+global, the new variable is global; if the current scope is local, the new
+variable is local to the innermost local scope and will be visible inside of
+that scope but not outside of it. If you assign to an existing local, it
+_always_ updates that existing local: you can only shadow a local by explicitly
+declaring a new local in a nested scope with the `local` keyword. In particular,
+this applies to variables assigned in inner functions, which may surprise users
+coming from Python where assignment in an inner function creates a new local
+unless the variable is explicitly declared to be non-local.
+
+Mostly this is pretty intuitive, but as with many things that behave
+intuitively, the details are more subtle than one might naïvely imagine.
 
 When `x = <value>` occurs in a local scope, Julia applies the following rules to decide what the
 expression means based on where the assignment expression occurs and what `x` already refers to at
@@ -119,7 +152,7 @@ that location:
 1. **Existing local:** If `x` is *already a local variable*, then the existing local `x` is
    assigned;
 2. **Hard scope:** If `x` is *not already a local variable* and assignment occurs inside of any
-   hard scope construct (i.e. within a let block, function or macro body, comprehension, or
+   hard scope construct (i.e. within a `let` block, function or macro body, comprehension, or
    generator), a new local named `x` is created in the scope of the assignment;
 3. **Soft scope:** If `x` is *not already a local variable* and all of the scope constructs
    containing the assignment are soft scopes (loops, `try`/`catch` blocks, or `struct` blocks), the
@@ -183,9 +216,15 @@ Since the `x` in `greet` is local, the value (or lack thereof) of the global `x`
 calling `greet`. The hard scope rule doesn't care whether a global named `x` exists or not:
 assignment to `x` in a hard scope is local (unless `x` is declared global).
 
-The next clear cut situation we'll consider is when there is already a local variable named `x`, in
-which case `x = <value>` always assigns to this existing local `x`.  The function `sum_to` computes
-the sum of the numbers from one up to `n`:
+The next clear cut situation we'll consider is when there is already a local
+variable named `x`, in which case `x = <value>` always assigns to this existing
+local `x`. This is true whether the assignment occurs in the same local scope,
+an inner local scope in the same function body, or in the body of a function
+nested inside of another function, also known as a
+[closure](https://en.wikipedia.org/wiki/Closure_(computer_programming)).
+
+We'll use the `sum_to` function, which computes the sum of integers from one up
+to `n`, as an example:
 
 ```julia
 function sum_to(n)
@@ -252,6 +291,44 @@ introduces a hard scope, the assignment causes `t` to become a new local variabl
 where it appears, i.e. inside of the loop body. Even if there were a global named `t`, it would make
 no difference—the hard scope rule isn't affected by anything in global scope.
 
+Note that the local scope of a for loop body is no different from the local
+scope of an inner function. This means that we could rewrite this example so
+that the loop body is implemented as a call to an inner helper function and it
+behaves the same way:
+
+```jldoctest
+julia> function sum_to_def_closure(n)
+           function loop_body(i)
+               t = s + i # new local `t`
+               s = t # assign same local `s` as below
+           end
+           s = 0 # new local
+           for i = 1:n
+               loop_body(i)
+           end
+           return s, @isdefined(t)
+       end
+sum_to_def_closure (generic function with 1 method)
+
+julia> sum_to_def_closure(10)
+(55, false)
+```
+
+This example illustrates a couple of key points:
+
+1. Inner function scopes are just like any other nested local scope. In
+   particular, if a variable is already a local outside of an inner function and
+   you assign to it in the inner function, the outer local variable is updated.
+
+2. It doesn't matter if the definition of an outer local happens below where it
+   is updated, the rule remains the same. The entire enclosing local scope is
+   parsed and its locals determined before inner local meanings are resolved.
+
+This design means that you can generally move code in or out of an inner
+function without changing its meaning, which facilitates a number of common
+idioms in the language using closures (see [do blocks](@ref
+Do-Block-Syntax-for-Function-Arguments)).
+
 Let's move onto some more ambiguous cases covered by the soft scope rule. We'll explore this by
 extracting the bodies of the `greet` and `sum_to_def` functions into soft scope contexts. First, let's put the
 body of `greet` in a `for` loop—which is soft, rather than hard—and evaluate it in the REPL:
@@ -356,7 +433,7 @@ evaluated first. One might imagine that the `s` on the first line of the loop co
 the `s` on the second line of the loop is local, but that's not possible since the two lines are in
 the same scope block and each variable can only mean one thing in a given scope.
 
-#### On Soft Scope
+#### [On Soft Scope](@id on-soft-scope)
 
 We have now covered all the local scope rules, but before wrapping up this section, perhaps a few
 words should be said about why the ambiguous soft scope case is handled differently in interactive
@@ -449,7 +526,7 @@ prints this very direct warning:
 This addresses both issues while preserving the "programming at scale" benefits of the 1.0 behavior:
 global variables have no spooky effect on the meaning of code that may be far away; in the REPL
 copy-and-paste debugging works and beginners don't have any issues; any time someone either forgets
-a `global` annotation or accidentally shadows an existing global with a local in a soft scope,
+a `global` annotation or accidentally shadows an existing global with a local in a soft scope,
 which would be confusing anyway, they get a nice clear warning.
 
 An important property of this design is that any code that executes in a file without a warning will
@@ -458,11 +535,21 @@ file, if it behaves differently than it did in the REPL, then you will get a war
 
 ### Let Blocks
 
-Unlike assignments to local variables, `let` statements allocate new variable bindings each time
-they run. An assignment modifies an existing value location, and `let` creates new locations.
-This difference is usually not important, and is only detectable in the case of variables that
-outlive their scope via closures. The `let` syntax accepts a comma-separated series of assignments
-and variable names:
+`let` statements create a new *hard scope* block (see above) and introduce new variable
+bindings each time they run. The variable need not be immediately assigned:
+```jldoctest
+julia> var1 = let x
+           for i in 1:5
+               (i == 4) && (x = i; break)
+           end
+           x
+       end
+4
+```
+Whereas assignments might reassign a new value to an existing value location, `let` always creates a
+new location. This difference is usually not important, and is only detectable in the case of
+variables that outlive their scope via closures. The `let` syntax accepts a comma-separated series of
+assignments and variable names:
 
 ```jldoctest
 julia> x, y, z = -1, -1, -1;
@@ -517,7 +604,7 @@ julia> Fs[2]()
 ```
 
 Since the `begin` construct does not introduce a new scope, it can be useful to use a zero-argument
-`let` to just introduce a new scope block without creating any new bindings:
+`let` to just introduce a new scope block without creating any new bindings immediately:
 
 ```jldoctest
 julia> let
@@ -531,7 +618,16 @@ julia> let
 ```
 
 Since `let` introduces a new scope block, the inner local `x` is a different variable than the
-outer local `x`.
+outer local `x`. This particular example is equivalent to:
+
+```jldoctest
+julia> let x = 1
+           let x = 2
+           end
+           x
+       end
+1
+```
 
 ### Loops and Comprehensions
 
@@ -697,3 +793,58 @@ WARNING: redefinition of constant x. This may fail, cause incorrect answers, or
 julia> f()
 1
 ```
+
+## [Typed Globals](@id man-typed-globals)
+
+!!! compat "Julia 1.8"
+    Support for typed globals was added in Julia 1.8
+
+Similar to being declared as constants, global bindings can also be declared to always be of a
+constant type. This can either be done without assigning an actual value using the syntax
+`global x::T` or upon assignment as `x::T = 123`.
+
+```jldoctest
+julia> x::Float64 = 2.718
+2.718
+
+julia> f() = x
+f (generic function with 1 method)
+
+julia> Base.return_types(f)
+1-element Vector{Any}:
+ Float64
+```
+
+For any assignment to a global, Julia will first try to convert it to the appropriate type using
+[`convert`](@ref):
+
+```jldoctest
+julia> global y::Int
+
+julia> y = 1.0
+1.0
+
+julia> y
+1
+
+julia> y = 3.14
+ERROR: InexactError: Int64(3.14)
+Stacktrace:
+[...]
+```
+
+The type does not need to be concrete, but annotations with abstract types typically have little
+performance benefit.
+
+Once a global has either been assigned to or its type has been set, the binding type is not allowed
+to change:
+
+```jldoctest
+julia> x = 1
+1
+
+julia> global x::Int
+ERROR: cannot set type for global x. It already has a value or is already set to a different type.
+Stacktrace:
+[...]
+```
diff --git a/doc/src/manual/variables.md b/doc/src/manual/variables.md
index e7b2899dd931bc..0dfc4f508577f0 100644
--- a/doc/src/manual/variables.md
+++ b/doc/src/manual/variables.md
@@ -54,8 +54,8 @@ julia> 안녕하세요 = "Hello"
 
 In the Julia REPL and several other Julia editing environments, you can type many Unicode math
 symbols by typing the backslashed LaTeX symbol name followed by tab. For example, the variable
-name `δ` can be entered by typing `\delta`-*tab*, or even `α̂₂` by `\alpha`-*tab*-`\hat`-
-*tab*-`\_2`-*tab*. (If you find a symbol somewhere, e.g. in someone else's code,
+name `δ` can be entered by typing `\delta`-*tab*, or even `α̂⁽²⁾` by `\alpha`-*tab*-`\hat`-
+*tab*-`\^(2)`-*tab*. (If you find a symbol somewhere, e.g. in someone else's code,
 that you don't know how to type, the REPL help will tell you: just type `?` and
 then paste the symbol.)
 
@@ -81,19 +81,19 @@ julia> pi
 π = 3.1415926535897...
 
 julia> pi = 3
-ERROR: cannot assign a value to variable MathConstants.pi from module Main
+ERROR: cannot assign a value to imported variable MathConstants.pi from module Main
 
 julia> sqrt(100)
 10.0
 
 julia> sqrt = 4
-ERROR: cannot assign a value to variable Base.sqrt from module Main
+ERROR: cannot assign a value to imported variable Base.sqrt from module Main
 ```
 
-## Allowed Variable Names
+## [Allowed Variable Names](@id man-allowed-variable-names)
 
 Variable names must begin with a letter (A-Z or a-z), underscore, or a subset of Unicode code
-points greater than 00A0; in particular, [Unicode character categories](http://www.fileformat.info/info/unicode/category/index.htm)
+points greater than 00A0; in particular, [Unicode character categories](https://www.fileformat.info/info/unicode/category/index.htm)
 Lu/Ll/Lt/Lm/Lo/Nl (letters), Sc/So (currency and other symbols), and a few other letter-like characters
 (e.g. a subset of the Sm math symbols) are allowed. Subsequent characters may also include ! and
 digits (0-9 and other characters in categories Nd/No), as well as other Unicode code points: diacritics
@@ -110,7 +110,20 @@ A space is required between an operator that ends with a subscript/superscript l
 variable name. For example, if `+ᵃ` is an operator, then `+ᵃx` must be written as `+ᵃ x` to distinguish
 it from `+ ᵃx` where `ᵃx` is the variable name.
 
-The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref):
+
+A particular class of variable names is one that contains only underscores. These identifiers can only be assigned values but cannot be used to assign values to other variables.
+More technically, they can only be used as an [L-value](https://en.wikipedia.org/wiki/Value_(computer_science)#lrvalue), but not as an
+ [R-value](https://en.wikipedia.org/wiki/R-value):
+
+```julia-repl
+julia> x, ___ = size([2 2; 1 1])
+(2, 2)
+
+julia> y = ___
+ERROR: syntax: all-underscore identifier used as rvalue
+```
+
+The only explicitly disallowed names for variables are the names of the built-in [Keywords](@ref Keywords):
 
 ```julia-repl
 julia> else = false
@@ -123,9 +136,14 @@ ERROR: syntax: unexpected "="
 Some Unicode characters are considered to be equivalent in identifiers.
 Different ways of entering Unicode combining characters (e.g., accents)
 are treated as equivalent (specifically, Julia identifiers are [NFC](http://www.macchiato.com/unicode/nfc-faq)-normalized).
-The Unicode characters `ɛ` (U+025B: Latin small letter open e)
-and `µ` (U+00B5: micro sign) are treated as equivalent to the corresponding
-Greek letters, because the former are easily accessible via some input methods.
+Julia also includes a few non-standard equivalences for characters that are
+visually similar and are easily entered by some input methods. The Unicode
+characters `ɛ` (U+025B: Latin small letter open e) and `µ` (U+00B5: micro sign)
+are treated as equivalent to the corresponding Greek letters. The middle dot
+`·` (U+00B7) and the Greek
+[interpunct](https://en.wikipedia.org/wiki/Interpunct) `·` (U+0387) are both
+treated as the mathematical dot operator `⋅` (U+22C5).
+The minus sign `−` (U+2212) is treated as equivalent to the hyphen-minus sign `-` (U+002D).
 
 ## Stylistic Conventions
 
diff --git a/doc/src/manual/workflow-tips.md b/doc/src/manual/workflow-tips.md
index e340b19987c0bd..4085a51ff91312 100644
--- a/doc/src/manual/workflow-tips.md
+++ b/doc/src/manual/workflow-tips.md
@@ -64,8 +64,9 @@ line. A common pattern includes the following elements:
 
 ## Browser-based workflow
 
-It is also possible to interact with a Julia REPL in the browser via [IJulia](https://github.com/JuliaLang/IJulia.jl).
-See the package home for details.
+There are a few ways to interact with Julia in a browser:
+- Using Pluto notebooks through [Pluto.jl](https://github.com/fonsp/Pluto.jl)
+- Using Jupyter notebooks through [IJulia.jl](https://github.com/JuliaLang/IJulia.jl)
 
 ## Revise-based workflows
 
@@ -104,7 +105,7 @@ the following modifications:
 
      Navigate to your temporary directory and launch Julia, then do the following:
 
-     ```julia
+     ```julia-repl
      pkg> generate MyPkg            # type ] to enter pkg mode
      julia> push!(LOAD_PATH, pwd())   # hit backspace to exit pkg mode
      ```
@@ -123,7 +124,7 @@ the following modifications:
    Then navigate to the directory containing your test file (here
    assumed to be `"runtests.jl"`) and do the following:
 
-   ```julia
+   ```julia-repl
    julia> using MyPkg
 
    julia> include("runtests.jl")
@@ -131,5 +132,4 @@ the following modifications:
 
    You can iteratively modify the code in MyPkg in your editor and re-run the
    tests with `include("runtests.jl")`.  You generally should not need to restart
-   your Julia session to see the changes take effect (subject to a few limitations,
-   see https://timholy.github.io/Revise.jl/stable/limitations/).
+   your Julia session to see the changes take effect (subject to a few [limitations](https://timholy.github.io/Revise.jl/stable/limitations/)).
diff --git a/julia.spdx.json b/julia.spdx.json
new file mode 100644
index 00000000000000..2d047efdd78db9
--- /dev/null
+++ b/julia.spdx.json
@@ -0,0 +1,613 @@
+{
+    "spdxVersion": "SPDX-2.2",
+    "dataLicense": "CC0-1.0",
+    "SPDXID": "SPDXRef-DOCUMENT",
+    "name": "julia-spdx",
+    "documentNamespace": "https://julialang.org/spdxdocs/julia-spdx-dfcfa3b6-fcb6-4ad1-99e0-deb7bec44bee",
+    "creationInfo": {
+        "creators": [
+            "Organization: julialang.org ()",
+            "Person: Simon Avery ()"
+        ],
+        "created": "2022-02-16T11:46:38Z"
+    },
+    "documentDescribes": [
+        "SPDXRef-JuliaMain"
+    ],
+    "packages": [
+        {
+            "name": "Julia",
+            "SPDXID": "SPDXRef-JuliaMain",
+            "versionInfo": "1.8.0-DEV",
+            "packageFileName": "./",
+            "downloadLocation": "git+https://github.com/JuliaLang/julia.git@v1.8.0-DEV",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors",
+            "summary": "Julia is a high-level, high-performance dynamic language for technical computing.",
+            "comment": "In addition to the source code described by this package, Julia pulls in code from many other respositories, which are also described in this document. See relationships for details."
+        },
+        {
+            "name": "Pkg.jl",
+            "SPDXID": "SPDXRef-JuliaPkg",
+            "downloadLocation": "git+https://github.com/JuliaLang/Pkg.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/Pkg.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2017-2021: Stefan Karpinski, Kristoffer Carlsson, Fredrik Ekre, David Varela, Ian Butterworth, and contributors: https://github.com/JuliaLang/Pkg.jl/graphs/contributors",
+            "summary": "Julia's package manager, shipped with Julia v1.0 and above"
+        },
+        {
+            "name": "Statistics.jl",
+            "SPDXID": "SPDXRef-JuliaStatistics",
+            "downloadLocation": "git+https://github.com/JuliaLang/Statistics.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/Statistics.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2012-2016: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, Dahua Lin, Simon Byrne, Andreas Noack, Douglas Bates, John Myles White, Simon Kornblith, and other contributors.",
+            "summary": "Development repository for the Statistics standard library (stdlib) that ships with Julia."
+        },
+        {
+            "name": "libCURL.jl",
+            "SPDXID": "SPDXRef-JuliaCurl",
+            "downloadLocation": "git+https://github.com/JuliaWeb/LibCURL.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/libCURL.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2013: JuliaWeb contributors",
+            "summary": "Julia wrapper for libCURL"
+        },
+        {
+            "name": "Downloads.jl",
+            "SPDXID": "SPDXRef-JuliaDownloads",
+            "downloadLocation": "git+https://github.com/JuliaLang/Downloads.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/Downloads.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2020 Stefan Karpinski <stefan@karpinski.org> and contributors",
+            "summary": "The Downloads package provides a single function, download, which provides cross-platform, multi-protocol, in-process download functionality implemented with libcurl."
+        },
+        {
+            "name": "ArgTools.jl",
+            "SPDXID": "SPDXRef-JuliaArgTools",
+            "downloadLocation": "git+https://github.com/JuliaIO/ArgTools.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/ArgTools.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2020 Stefan Karpinski <stefan@karpinski.org> and contributors",
+            "summary": "ArgTools provides tools for creating consistent, flexible APIs that work with various kinds of function arguments."
+        },
+        {
+            "name": "Tar.jl",
+            "SPDXID": "SPDXRef-JuliaTar",
+            "downloadLocation": "git+https://github.com/JuliaIO/Tar.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/Tar.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2019 Stefan Karpinski <stefan@karpinski.org> and contributors",
+            "summary": "The Tar package can list, extract and create POSIX TAR archives (tarballs) as specified in POSIX 1003.1-2001."
+        },
+        {
+            "name": "NetworkOptions.jl",
+            "SPDXID": "SPDXRef-JuliaNetworkOptions",
+            "downloadLocation": "git+https://github.com/JuliaLang/NetworkOptions.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/NetworkOptions.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2020 Stefan Karpinski <stefan@karpinski.org> and contributors",
+            "summary": "The NetworkOptions package acts as a mediator between ways of configuring network transport mechanisms (SSL/TLS, SSH, proxies, etc.) and Julia packages that provide access to transport mechanisms."
+        },
+        {
+            "name": "SuiteSparse.jl",
+            "SPDXID": "SPDXRef-JuliaSuiteSparse",
+            "downloadLocation": "git+https://github.com/JuliaSparse/SuiteSparse.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/SuiteSparse.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors",
+            "summary": "SuiteSparse.jl provides Julia wrappers for the SuiteSparse library, and provides Julia's sparse linear algebra capabilities - specifically the solvers."
+        },
+        {
+            "name": "SparseArrays.jl",
+            "SPDXID": "SPDXRef-JuliaSparseArrays",
+            "downloadLocation": "git+https://github.com/JuliaSparse/SparseArrays.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/SparseArrays.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2009-2021: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, and other contributors: https://github.com/JuliaLang/julia/contributors",
+            "summary": "SparseArrays.jl provides Julia's sparse linear algebra capabilities."
+        },
+        {
+            "name": "SHA.jl",
+            "SPDXID": "SPDXRef-JuliaSHA",
+            "downloadLocation": "git+https://github.com/JuliaCrypto/SHA.jl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/SHA.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2014: Elliot Saba",
+            "summary": "A performant, 100% native-julia SHA1, SHA2, and SHA3 implementation"
+        },
+        {
+            "name": "dSFMT",
+            "SPDXID": "SPDXRef-dSFMT",
+            "downloadLocation": "git+https://github.com/MersenneTwister-Lab/dSFMT.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/MersenneTwister-Lab/dSFMT",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 2007, 2008, 2009 Mutsuo Saito, Makoto Matsumoto and Hiroshima University. Copyright (c) 2011, 2002 Mutsuo Saito, Makoto Matsumoto, Hiroshima University and The University of Tokyo.",
+            "summary": "Double precision SIMD-oriented Fast Mersenne Twister"
+        },
+        {
+            "name": "OpenLibm",
+            "SPDXID": "SPDXRef-OpenLibm",
+            "downloadLocation": "git+https://github.com/JuliaMath/openlibm.git",
+            "filesAnalyzed": false,
+            "homepage": "https://julialang.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/openlibm.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT AND BSD-2-Clause-FreeBSD AND ISC",
+            "copyrightText": "Copyright (c) 2011-14 The Julia Project.  Copyright (c) 2008 Stephen L. Moshier steve@moshier.net  Copyright 1992-2011 The FreeBSD Project. All rights reserved.  Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.",
+            "summary": "High quality system independent, portable, open source libm implementation"
+        },
+        {
+            "name": "GMP",
+            "SPDXID": "SPDXRef-GMP",
+            "downloadLocation": "https://gmplib.org/download/gmp/",
+            "filesAnalyzed": false,
+            "homepage": "https://gmplib.org/",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "LGPL-3.0-or-later",
+            "licenseDeclared": "LGPL-3.0-or-later OR GPL-2.0-or-later",
+            "copyrightText": "Copyright 1991, 1996, 1999, 2000, 2007 Free Software Foundation, Inc.",
+            "summary": "GNU MP is a portable library written in C for arbitrary precision arithmetic on integers, rational numbers, and floating-point numbers."
+        },
+        {
+            "name": "libgit2",
+            "SPDXID": "SPDXRef-libgit2",
+            "downloadLocation": "git+https://github.com/libgit2/libgit2.git",
+            "filesAnalyzed": false,
+            "homepage": "https://libgit2.org",
+            "sourceInfo": "The version in use can be found in the file deps/libgit2.version",
+            "licenseConcluded": "LicenseRef-GPL-2.0-only-with-libgit2-exception",
+            "licenseDeclared": "LicenseRef-GPL-2.0-only-with-libgit2-exception",
+            "copyrightText": "libgit2 is Copyright (C) the libgit2 contributors, unless otherwise stated. See the AUTHORS file for details.",
+            "summary": "A cross-platform, linkable library implementation of Git that you can use in your application."
+        },
+        {
+            "name": "curl",
+            "SPDXID": "SPDXRef-curl",
+            "downloadLocation": "git+https://github.com/curl/curl.git",
+            "filesAnalyzed": false,
+            "homepage": "https://curl.se",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "curl",
+            "licenseDeclared": "curl",
+            "copyrightText": "Copyright (c) 1996 - 2021, Daniel Stenberg, daniel@haxx.se, and many contributors, see the THANKS file.",
+            "summary": "A command line tool and library for transferring data with URL syntax, supporting DICT, FILE, FTP, FTPS, GOPHER, GOPHERS, HTTP, HTTPS, IMAP, IMAPS, LDAP, LDAPS, MQTT, POP3, POP3S, RTMP, RTMPS, RTSP, SCP, SFTP, SMB, SMBS, SMTP, SMTPS, TELNET and TFTP. libcurl offers a myriad of powerful features"
+        },
+        {
+            "name": "libssh2",
+            "SPDXID": "SPDXRef-libssh2",
+            "downloadLocation": "git+https://github.com/libssh2/libssh2.git",
+            "filesAnalyzed": false,
+            "homepage": "https://www.libssh2.org",
+            "sourceInfo": "The version in use can be found in the file deps/libssh2.version",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 2004-2007 Sara Golemon <sarag@libssh2.org>\nCopyright (c) 2005,2006 Mikhail Gusarov <dottedmag@dottedmag.net>\nCopyright (c) 2006-2007 The Written Word, Inc.\nCopyright (c) 2007 Eli Fant <elifantu@mail.ru>\nCopyright (c) 2009-2021 Daniel Stenberg\nCopyright (C) 2008, 2009 Simon Josefsson\nCopyright (c) 2000 Markus Friedl\nCopyright (c) 2015 Microsoft Corp.\nAll rights reserved.",
+            "summary": "libssh2 is a library implementing the SSH2 protocol, available under the revised BSD license."
+        },
+        {
+            "name": "mbedtls",
+            "SPDXID": "SPDXRef-mbedtls",
+            "downloadLocation": "git+https://github.com/ARMmbed/mbedtls.git",
+            "filesAnalyzed": false,
+            "homepage": "https://tls.mbed.org",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "Apache-2.0",
+            "licenseDeclared": "Apache-2.0",
+            "copyrightText": "NOASSERTION",
+            "summary": "An open source, portable, easy to use, readable and flexible SSL library."
+        },
+        {
+            "name": "mpfr",
+            "SPDXID": "SPDXRef-mpfr",
+            "downloadLocation": "https://www.mpfr.org/",
+            "filesAnalyzed": false,
+            "homepage": "https://www.mpfr.org/",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "LGPL-3.0-or-later",
+            "licenseDeclared": "LGPL-3.0-or-later",
+            "copyrightText": "Copyright 2000-2020 Free Software Foundation, Inc.",
+            "summary": "The MPFR library is a C library for multiple-precision floating-point computations with correct rounding."
+        },
+        {
+            "name": "OpenBLAS",
+            "SPDXID": "SPDXRef-OpenBLAS",
+            "downloadLocation": "git+https://github.com/xianyi/OpenBLAS.git",
+            "filesAnalyzed": false,
+            "homepage": "https://www.openblas.net",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/openblas.version",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 2011-2014, The OpenBLAS Project",
+            "summary": "OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version."
+        },
+        {
+            "name": "LAPACK",
+            "SPDXID": "SPDXRef-LAPACK",
+            "downloadLocation": "https://www.netlib.org/lapack/",
+            "filesAnalyzed": false,
+            "homepage": "https://netlib.org/",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 1992-2013 The University of Tennessee and The University of Tennessee Research Foundation.  All rights reserved.\nCopyright (c) 2000-2013 The University of California Berkeley. All rights reserved.\nCopyright (c) 2006-2013 The University of Colorado Denver.  All rights reserved.",
+            "summary": "LAPACK is written in Fortran 90 and provides routines for solving systems of simultaneous linear equations, least-squares solutions of linear systems of equations, eigenvalue problems, and singular value problems."
+        },
+        {
+            "name": "PCRE",
+            "SPDXID": "SPDXRef-PCRE",
+            "downloadLocation": "git+https://github.com/PhilipHazel/pcre2.git",
+            "filesAnalyzed": false,
+            "homepage": "https://www.pcre.org",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "BSD-3-Clause",
+            "licenseDeclared": "BSD-3-Clause",
+            "copyrightText": "Copyright (c) 1997-2021 University of Cambridge All rights reserved.\nCopyright(c) 2009-2021 Zoltan Herczeg\n",
+            "summary": "PCRE2 is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language."
+        },
+        {
+            "name": "LibSuiteSparse",
+            "SPDXID": "SPDXRef-LibSuiteSparse",
+            "packageFileName": "./",
+            "downloadLocation": "git+https://github.com/DrTimothyAldenDavis/SuiteSparse.git",
+            "filesAnalyzed": false,
+            "homepage": "https://people.engr.tamu.edu/davis/suitesparse.html",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "GPL-2.0-or-later",
+            "licenseDeclared": "LGPL-2.0-or-later AND GPL-2.0-or-later AND BSD-3 AND Apache-2.0 ",
+            "licenseComments": "SuiteSparse consists of many modules, each of which is licensed separately.",
+            "copyrightText": "AMD, Copyright (c), 1996-2015, Timothy A. Davis,\nBTF, Copyright (C) 2004-2013, University of Florida\nCAMD, Copyright (c) by Timothy A. Davis, Yanqing Chen, Patrick R. Amestoy, and Iain S. Duff.  All Rights Reserved.\nCCOLAMD: Copyright (C) 2005-2016, Univ. of Florida.  Authors: Timothy A. Davis, Sivasankaran Rajamanickam, and Stefan Larimore.  Closely based on COLAMD by Davis, Stefan Larimore, in collaboration with Esmond Ng, and John Gilbert.\nCHOLMOD/Check Module.  Copyright (C) 2005-2006, Timothy A. Davis\nCHOLMOD/Cholesky module, Copyright (C) 2005-2006, Timothy A. Davis.\nCHOLMOD/Core Module.  Copyright (C) 2005-2006, Univ. of Florida.  Author: Timothy A. Davis.\nCHOLMOD/Demo Module.  Copyright (C) 2005-2006, Timothy A. Davis.\nCHOLMOD/Include/* files.  Copyright (C) 2005-2006, either Univ. of Florida or T. Davis, depending on the file\nCHOLMOD/MATLAB Module.  Copyright (C) 2005-2006, Timothy A. Davis.\nCHOLMOD/MatrixOps Module.  Copyright (C) 2005-2006, Timothy A. Davis.\nCHOLMOD/Modify Module.  Copyright (C) 2005-2006, Timothy A. Davis and William W. Hager.\nCHOLMOD/Partition Module.  Copyright (C) 2005-2006, Univ. of Florida.  Author: Timothy A. Davis\nCHOLMOD/Supernodal Module.  Copyright (C) 2005-2006, Timothy A. Davis\nCHOLMOD/Tcov Module.  Copyright (C) 2005-2006, Timothy A. Davis\nCHOLMOD/Valgrind Module.  Copyright (C) 2005-2006, Timothy A. Davis.\nCOLAMD, Copyright 1998-2016, Timothy A. Davis.\nCSparse, Copyright (c) 2006, Timothy A. Davis.\nCXSparse: Copyright (c) 2006, Timothy A. Davis.\nGPUQREngine, Copyright (c) 2013, Timothy A. Davis, Sencer Nuri Yeralan, and Sanjay Ranka.\nKLU, Copyright (C) 2004-2013, University of Florida by Timothy A. Davis and Ekanathan Palamadai.\nLDL, Copyright (c) 2005-2013 by Timothy A. Davis.\nThe MATLAB_Tools collection of packages is Copyright (c), Timothy A. Davis, All Rights Reserved, with the exception of the spqr_rank package, which is Copyright (c), Timothy A. Davis and Les Foster, All Rights Reserved\nMATLAB_Tools, SSMULT, Copyright (c) 2007-2011, Timothy A. Davis,\nMongoose Graph Partitioning Library  Copyright (C) 2017-2018, Scott P. Kolodziej, Nuri S. Yeralan, Timothy A. Davis, William W. Hager\nRBio toolbox.  Copyright (C) 2006-2009, Timothy A. Davis\nSLIP_LU: (c) 2019-2020, Chris Lourenco, Jinhao Chen, Erick Moreno-Centeno, Timothy A. Davis, Texas A&M University. \nSPQR, Copyright 2008-2016 by Timothy A. Davis.\nSuiteSparse_GPURuntime Copyright (c) 2013-2016, Timothy A. Davis, Sencer Nuri Yeralan, and Sanjay Ranka.\nUMFPACK, Copyright 1995-2009 by Timothy A. Davis.",
+            "summary": "The official SuiteSparse library: a suite of sparse matrix algorithms authored or co-authored by Tim Davis, Texas A&M University"
+        },
+        {
+            "name": "LibBlasTrampoline",
+            "SPDXID": "SPDXRef-LibBlasTrampoline",
+            "downloadLocation": "git+https://github.com/JuliaLinearAlgebra/libblastrampoline.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/JuliaLinearAlgebra",
+            "sourceInfo": "The version in use can be found in the file deps/blastrampoline.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2021: Elliot Saba, Viral B. Shah, Julia Computing.",
+            "summary": "Using PLT trampolines to provide a BLAS and LAPACK demuxing library."
+        },
+        {
+            "name": "NGHTTP2",
+            "SPDXID": "SPDXRef-NGHTTP2",
+            "downloadLocation": "git+https://github.com/nghttp2/nghttp2.git",
+            "filesAnalyzed": false,
+            "homepage": "https://nghttp2.org",
+            "sourceInfo": "The version in use can be found in the file deps/Version.make",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2012, 2014, 2015, 2016 Tatsuhiro Tsujikawa\nCopyright (c) 2012, 2014, 2015, 2016 nghttp2 contributors",
+            "summary": "nghttp2 is an implementation of HTTP/2 and its header compression algorithm HPACK in C."
+        },
+        {
+            "name": "libunwind",
+            "SPDXID": "SPDXRef-libunwind",
+            "downloadLocation": "git+https://github.com/libunwind/libunwind.git",
+            "filesAnalyzed": false,
+            "homepage": "http://www.nongnu.org/libunwind/",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2002 Hewlett-Packard Co.",
+            "summary": "The primary goal of this project is to define a portable and efficient C programming interface (API) to determine the call-chain of a program."
+        },
+        {
+            "name": "libuv",
+            "SPDXID": "SPDXRef-libuv",
+            "supplier": "Organization: julialang.org ()",
+            "originator": "Organization: libuv.org ()",
+            "downloadLocation": "git+https://github.com/JuliaLang/libuv.git",
+            "filesAnalyzed": false,
+            "homepage": "https://libuv.org",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/libuv.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2015-present libuv project contributors",
+            "summary": "libuv is a multi-platform support library with a focus on asynchronous I/O. It was primarily developed for use by Node.js, but it's also used by Luvit, Julia, pyuv, and others.",
+            "comment": "The Julia project has forked libuv and maintains their own repository of the code"
+        },
+        {
+            "name": "llvm",
+            "SPDXID": "SPDXRef-llvm",
+            "supplier": "Organization: julialang.org ()",
+            "originator": "Organization: llvm.org ()",
+            "downloadLocation": "git+https://github.com/JuliaLang/llvm-project.git",
+            "filesAnalyzed": false,
+            "homepage": "https://llvm.org",
+            "sourceInfo": "The version in use can be found in the file deps/llvm.version",
+            "licenseConcluded": "Apache-2.0 WITH LLVM-exception",
+            "licenseDeclared": "Apache-2.0 WITH LLVM-exception",
+            "copyrightText": "The LLVM project does not collect copyright assignments, which means that the copyright for the code in the project is held by the respective contributors",
+            "summary": "The LLVM Project is a collection of modular and reusable compiler and toolchain technologies.",
+            "comment": "The Julia project has forked llvm and maintains their own repository of the code"
+        },
+        {
+            "name": "utf8proc",
+            "SPDXID": "SPDXRef-utf8proc",
+            "downloadLocation": "git+https://github.com/JuliaLang/utf8proc.git",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/JuliaStrings/utf8proc",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/utf8proc.version",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright © 2014-2019 by Steven G. Johnson, Jiahao Chen, Tony Kelman, Jonas Fonseca, and other contributors listed in the git history.",
+            "summary": "utf8proc is a small, clean C library that provides Unicode normalization, case-folding, and other operations for data in the UTF-8 encoding."
+        },
+        {
+            "name": "7-Zip",
+            "SPDXID": "SPDXRef-7zip",
+            "downloadLocation": "https://sourceforge.net/projects/p7zip/files/p7zip",
+            "filesAnalyzed": false,
+            "homepage": "https://www.7-zip.org",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "LGPL-3.0-or-later",
+            "licenseDeclared": "LGPL-3.0-or-later AND BSD-3",
+            "copyrightText": "Copyright (C) 1999-2021 Igor Pavlov",
+            "summary": "7-Zip is a file archiver with a high compression ratio."
+        },
+        {
+            "name": "zlib",
+            "SPDXID": "SPDXRef-zlib",
+            "downloadLocation": "git+https://github.com/madler/zlib.git",
+            "filesAnalyzed": false,
+            "homepage": "https://zlib.net",
+            "sourceInfo": "The git hash of the version in use can be found in the file deps/zlib.version",
+            "licenseConcluded": "Zlib",
+            "licenseDeclared": "Zlib",
+            "copyrightText": "Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler",
+            "summary": "A massively spiffy yet delicately unobtrusive compression library."
+        },
+        {
+            "name": "patchelf",
+            "SPDXID": "SPDXRef-patchelf",
+            "downloadLocation": "git+https://github.com/NixOS/patchelf.git",
+            "filesAnalyzed": false,
+            "homepage": "https://nixos.org/patchelf.html",
+            "sourceInfo": "The version in use can be found in the file deps/Versions.make",
+            "licenseConcluded": "GPL-3.0-or-later",
+            "licenseDeclared": "GPL-3.0-or-later",
+            "copyrightText": "Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>",
+            "summary": "A small utility to modify the dynamic linker and RPATH of ELF executables.",
+            "comment": "PATCHELF is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convienence."
+        },
+        {
+            "name": "objconv",
+            "SPDXID": "SPDXRef-objconv",
+            "downloadLocation": "https://www.agner.org/optimize/objconv.zip",
+            "filesAnalyzed": false,
+            "homepage": "https://www.agner.org/optimize/#objconv",
+            "licenseConcluded": "GPL-3.0-or-later",
+            "licenseDeclared": "GPL-3.0-or-later",
+            "copyrightText": "By Agner Fog © 2018",
+            "summary": "A utility for cross-platform development of function libraries, for converting and modifying object files and for dumping and disassembling object and executable files for all x86 and x86-64 platforms.",
+            "comment": "OBJCONV is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convienence."
+        },
+        {
+            "name": "libwhich",
+            "SPDXID": "SPDXRef-libwhich",
+            "downloadLocation": "git+https://github.com/vtjnash/libwhich.git",
+            "sourceInfo": "The git hash of the version in use can be found in the file stdlib/libwhich.version",
+            "filesAnalyzed": false,
+            "homepage": "https://github.com/vtjnash/libwhich",
+            "licenseConcluded": "MIT",
+            "licenseDeclared": "MIT",
+            "copyrightText": "Copyright (c) 2017 Jameson Nash",
+            "summary": "Like `which`, for dynamic libraries",
+            "comment": "LIBWHICH is not part of the Julia binary. It is a tool used as part of building the binary, a bit like a compiler. Julia chooses to build the tool from source during the build process as a convienence."
+        }
+    ],
+    "relationships": [
+        {
+            "spdxElementId": "SPDXRef-DOCUMENT",
+            "relationshipType": "DESCRIBES",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaPkg",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaStatistics",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaCurl",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaDownloads",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaArgTools",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaTar",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaNetworkOptions",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaSuiteSparse",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-JuliaSHA",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-dSFMT",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-OpenLibm",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-GMP",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-libgit2",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-curl",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-libssh2",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-mbedtls",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-mpfr",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-OpenBLAS",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-LAPACK",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-PCRE",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-LibSuiteSparse",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-LibBlasTrampoline",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-NGHTTP2",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-libunwind",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-libuv",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-llvm",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-utf8proc",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-7zip",
+            "relationshipType": "RUNTIME_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-zlib",
+            "relationshipType": "BUILD_DEPENDENCY_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-patchelf",
+            "relationshipType": "BUILD_TOOL_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-objconv",
+            "relationshipType": "BUILD_TOOL_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        },
+        {
+            "spdxElementId": "SPDXRef-libwhich",
+            "relationshipType": "BUILD_TOOL_OF",
+            "relatedSpdxElement": "SPDXRef-JuliaMain"
+        }
+    ],
+    "hasExtractedLicensingInfos": [
+        {
+            "licenseId": "LicenseRef-GPL-2.0-only-with-libgit2-exception",
+            "extractedText": "Note that the only valid version of the GPL as far as this project is concerned is _this_ particular version of the license (ie v2, not v2.2 or v3.x or whatever), unless explicitly otherwise stated.\n----------------------------------------------------------------------\nIn addition to the permissions in the GNU General Public License, the authors give you unlimited permission to link the compiled version of this library into combinations with other programs, and to distribute those combinations without any restriction coming from the use of this file.  (The General Public License restrictions do apply in other respects; for example, they cover modification of the file, and distribution when not linked into a combined executable.)\n----------------------------------------------------------------------\nGNU GENERAL PUBLIC LICENSE\nVersion 2, June 1991\n\nCopyright (C) 1989, 1991 Free Software Foundation, Inc.\n59 Temple Place, Suite 330, Boston, MA  02111-1307  USA\nEveryone is permitted to copy and distribute verbatim copies\nof this license document, but changing it is not allowed.\n... [more text]",
+            "name": "GPL-2.0-only-with-libgit2-exception"
+        }
+    ]
+}
diff --git a/src/.gitignore b/src/.gitignore
index 3b845e647b02c9..388e971d4f12da 100644
--- a/src/.gitignore
+++ b/src/.gitignore
@@ -10,6 +10,7 @@
 /julia_flisp.boot
 /julia_flisp.boot.inc
 /flisp.boot.inc
+/jl_internal_funcs.inc
 
 /libjulia-debug.a
 /libjulia-debug.so
diff --git a/src/APInt-C.cpp b/src/APInt-C.cpp
index 0e0ffbfa73713e..bc0a62e21dd3ef 100644
--- a/src/APInt-C.cpp
+++ b/src/APInt-C.cpp
@@ -9,6 +9,7 @@
 #include "APInt-C.h"
 #include "julia.h"
 #include "julia_assert.h"
+#include "julia_internal.h"
 
 using namespace llvm;
 
@@ -312,14 +313,16 @@ void LLVMByteSwap(unsigned numbits, integerPart *pa, integerPart *pr) {
     ASSIGN(r, a)
 }
 
-void LLVMFPtoInt(unsigned numbits, integerPart *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
+void LLVMFPtoInt(unsigned numbits, void *pa, unsigned onumbits, integerPart *pr, bool isSigned, bool *isExact) {
     double Val;
-    if (numbits == 32)
+    if (numbits == 16)
+        Val = __gnu_h2f_ieee(*(uint16_t*)pa);
+    else if (numbits == 32)
         Val = *(float*)pa;
     else if (numbits == 64)
         Val = *(double*)pa;
     else
-        jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
+        jl_error("FPtoSI: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
     unsigned onumbytes = RoundUpToAlignment(onumbits, host_char_bit) / host_char_bit;
     if (onumbits <= 64) { // fast-path, if possible
         if (isSigned) {
@@ -387,12 +390,14 @@ void LLVMSItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar
         CREATE(a)
         val = a.roundToDouble(true);
     }
-    if (onumbits == 32)
+    if (onumbits == 16)
+        *(uint16_t*)pr = __gnu_f2h_ieee(val);
+    else if (onumbits == 32)
         *(float*)pr = val;
     else if (onumbits == 64)
         *(double*)pr = val;
     else
-        jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
+        jl_error("SItoFP: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
 }
 
 extern "C" JL_DLLEXPORT
@@ -402,7 +407,9 @@ void LLVMUItoFP(unsigned numbits, integerPart *pa, unsigned onumbits, integerPar
         CREATE(a)
         val = a.roundToDouble(false);
     }
-    if (onumbits == 32)
+    if (onumbits == 16)
+        *(uint16_t*)pr = __gnu_f2h_ieee(val);
+    else if (onumbits == 32)
         *(float*)pr = val;
     else if (onumbits == 64)
         *(double*)pr = val;
diff --git a/src/Makefile b/src/Makefile
index 578677b3e1b9b1..e6d83b1e1f4e95 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -17,15 +17,17 @@ JLDFLAGS += $(LDFLAGS)
 FLAGS := \
 	-D_GNU_SOURCE -I$(BUILDDIR) -I$(SRCDIR) \
 	-I$(SRCDIR)/flisp -I$(SRCDIR)/support \
-	-I$(LIBUV_INC) -I$(build_includedir) -DLIBRARY_EXPORTS \
+	-I$(LIBUV_INC) -I$(build_includedir) \
 	-I$(JULIAHOME)/deps/valgrind
-ifneq ($(USEMSVC), 1)
 FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -fvisibility=hidden -fno-common \
 		 -Wno-comment -Wpointer-arith -Wundef
 ifeq ($(USEGCC),1) # GCC bug #25509 (void)__attribute__((warn_unused_result))
 FLAGS += -Wno-unused-result
 endif
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
+
+ifeq ($(USECLANG),1)
+FLAGS += -Wno-return-type-c-linkage
 endif
 
 FLAGS += -DJL_BUILD_ARCH='"$(ARCH)"'
@@ -39,28 +41,27 @@ ifeq ($(OS),FreeBSD)
 FLAGS += -I$(LOCALBASE)/include
 endif
 
-RUNTIME_SRCS := \
+SRCS := \
 	jltypes gf typemap smallintset ast builtins module interpreter symbol \
 	dlload sys init task array dump staticdata toplevel jl_uv datatype \
-	simplevector runtime_intrinsics precompile \
-	threading partr stackwalk gc gc-debug gc-pages gc-stacks method \
-	jlapi signal-handling safepoint timing subtype \
-	crc32c APInt-C processor ircode
-SRCS := jloptions runtime_ccall rtutils
+	simplevector runtime_intrinsics precompile jloptions \
+	threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \
+	jlapi signal-handling safepoint timing subtype rtutils \
+	crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage runtime_ccall
 
-LLVMLINK :=
+RT_LLVMLINK :=
+CG_LLVMLINK :=
 
 ifeq ($(JULIACODEGEN),LLVM)
-SRCS += codegen llvm-ptls
-RUNTIME_SRCS += jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
-	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering \
+CODEGEN_SRCS := codegen jitlayers aotcompile debuginfo disasm llvm-simdloop llvm-muladd \
+	llvm-final-gc-lowering llvm-pass-helpers llvm-late-gc-lowering llvm-ptls \
 	llvm-lower-handlers llvm-gc-invariant-verifier llvm-propagate-addrspaces \
-	llvm-multiversioning llvm-alloc-opt cgmemmgr llvm-api llvm-remove-addrspaces \
-	llvm-remove-ni llvm-julia-licm
+	llvm-multiversioning llvm-alloc-opt llvm-alloc-helpers cgmemmgr llvm-remove-addrspaces \
+	llvm-remove-ni llvm-julia-licm llvm-demote-float16 llvm-cpufeatures
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --includedir)
-LLVM_LIBS := all
+CG_LLVM_LIBS := all
 ifeq ($(USE_POLLY),1)
-LLVMLINK += -lPolly -lPollyISL
+CG_LLVMLINK += -lPolly -lPollyISL
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/include
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --obj-root)/tools/polly/include
 FLAGS += -DUSE_POLLY
@@ -68,31 +69,42 @@ ifeq ($(USE_POLLY_OPENMP),1)
 FLAGS += -fopenmp
 endif
 ifeq ($(USE_POLLY_ACC),1)
-LLVMLINK += -lPollyPPCG -lGPURuntime
+CG_LLVMLINK += -lPollyPPCG -lGPURuntime
 FLAGS += -DUSE_POLLY_ACC
 FLAGS += -I$(shell $(LLVM_CONFIG_HOST) --src-root)/tools/polly/tools # Required to find GPURuntime/GPUJIT.h
 endif
 endif
 else
-RUNTIME_SRCS += anticodegen
-LLVM_LIBS := support
+# JULIACODEGEN != LLVM
 endif
 
-ifeq ($(USEMSVC), 1)
-SRCS += getopt
-endif
+RT_LLVM_LIBS := support
 
-SRCS += $(RUNTIME_SRCS)
+ifeq ($(OS),WINNT)
+SRCS += win32_ucontext
+endif
 
+ifeq ($(WITH_DTRACE),1)
+DTRACE_HEADERS := uprobes.h.gen
+ifneq ($(OS),Darwin)
+SRCS += uprobes
+endif
+else
+DTRACE_HEADERS :=
+endif
 
 # headers are used for dependency tracking, while public headers will be part of the dist
 UV_HEADERS :=
-HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h tls.h locks.h atomics.h julia_internal.h options.h timing.h)
-PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h tls.h locks.h atomics.h julia_gcext.h)
 ifeq ($(USE_SYSTEM_LIBUV),0)
 UV_HEADERS += uv.h
 UV_HEADERS += uv/*.h
 endif
+PUBLIC_HEADERS := $(BUILDDIR)/julia_version.h $(wildcard $(SRCDIR)/support/*.h) $(addprefix $(SRCDIR)/,julia.h julia_assert.h julia_threads.h julia_fasttls.h julia_locks.h julia_atomics.h jloptions.h)
+ifeq ($(OS),WINNT)
+PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,win32_ucontext.h)
+endif
+HEADERS := $(PUBLIC_HEADERS) $(addprefix $(SRCDIR)/,julia_internal.h options.h timing.h passes.h) $(addprefix $(BUILDDIR)/,$(DTRACE_HEADERS) jl_internal_funcs.inc)
+PUBLIC_HEADERS += $(addprefix $(SRCDIR)/,julia_gcext.h)
 PUBLIC_HEADER_TARGETS := $(addprefix $(build_includedir)/julia/,$(notdir $(PUBLIC_HEADERS)) $(UV_HEADERS))
 
 LLVM_LDFLAGS := $(shell $(LLVM_CONFIG_HOST) --ldflags)
@@ -100,14 +112,18 @@ LLVM_CXXFLAGS := $(shell $(LLVM_CONFIG_HOST) --cxxflags)
 
 ifeq ($(JULIACODEGEN),LLVM)
 ifneq ($(USE_SYSTEM_LLVM),0)
-LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
+CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs --system-libs)
 # HACK: llvm-config doesn't correctly point to shared libs on all platforms
 #       https://github.com/JuliaLang/julia/issues/29981
 else
 ifneq ($(USE_LLVM_SHLIB),1)
-LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(LLVM_LIBS)) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
+CG_LLVMLINK += $(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --libs $(CG_LLVM_LIBS) --link-static) $($(LLVM_LDFLAGS) $(shell $(LLVM_CONFIG_HOST) --system-libs 2> /dev/null)
 else
-LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
+ifeq ($(OS), Darwin)
+CG_LLVMLINK += $(LLVM_LDFLAGS) -lLLVM
+else
+CG_LLVMLINK += $(LLVM_LDFLAGS) -lLLVM-13jl
+endif
 endif
 endif
 ifeq ($(USE_LLVM_SHLIB),1)
@@ -115,32 +131,52 @@ FLAGS += -DLLVM_SHLIB
 endif # USE_LLVM_SHLIB == 1
 endif
 
+RT_LLVM_LINK_ARGS := $(shell $(LLVM_CONFIG_HOST) --libs $(RT_LLVM_LIBS) --system-libs --link-static)
+RT_LLVMLINK += $(LLVM_LDFLAGS) $(RT_LLVM_LINK_ARGS)
+ifeq ($(OS), WINNT)
+RT_LLVMLINK += -luuid -lole32
+endif
+
 CLANG_LDFLAGS := $(LLVM_LDFLAGS)
 ifeq ($(OS), Darwin)
 CLANG_LDFLAGS += -Wl,-undefined,dynamic_lookup
+OSLIBS += $(SRCDIR)/mach_dyld_atfork.tbd
 endif
 
-
-COMMON_LIBS := -L$(build_shlibdir) -L$(build_libdir) $(LIBUV) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(LLVMLINK) $(OSLIBS)
-DEBUG_LIBS := $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a $(COMMON_LIBS)
-RELEASE_LIBS := $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a $(COMMON_LIBS)
+COMMON_LIBPATHS := -L$(build_libdir) -L$(build_shlibdir)
+RT_LIBS := $(LIBUV) $(LIBUTF8PROC) $(NO_WHOLE_ARCHIVE) $(LIBUNWIND) $(RT_LLVMLINK) $(OSLIBS)
+CG_LIBS := $(NO_WHOLE_ARCHIVE) $(LIBUV) $(LIBUNWIND) $(CG_LLVMLINK) $(OSLIBS)
+RT_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp-debug.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport-debug.a -ljulia-debug $(RT_LIBS)
+CG_DEBUG_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia-debug -ljulia-internal-debug
+RT_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(BUILDDIR)/flisp/libflisp.a $(WHOLE_ARCHIVE) $(BUILDDIR)/support/libsupport.a -ljulia $(RT_LIBS)
+CG_RELEASE_LIBS := $(COMMON_LIBPATHS) $(WHOLE_ARCHIVE) $(WHOLE_ARCHIVE) $(CG_LIBS) -ljulia -ljulia-internal
 
 OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
-DEBUGFLAGS += $(FLAGS)
-SHIPFLAGS += $(FLAGS)
+
+CODEGEN_OBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.o)
+CODEGEN_DOBJS := $(CODEGEN_SRCS:%=$(BUILDDIR)/%.dbg.obj)
+
+DEBUGFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
+SHIPFLAGS += $(FLAGS) -DLIBRARY_EXPORTS
 
 # if not absolute, then relative to the directory of the julia executable
-SHIPFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
+SHIPFLAGS  += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys.$(SHLIB_EXT)\""
 DEBUGFLAGS += "-DJL_SYSTEM_IMAGE_PATH=\"$(build_private_libdir_rel)/sys-debug.$(SHLIB_EXT)\""
 
+# Add SONAME defines so we can embed proper `dlopen()` calls.
+SHIPFLAGS  += "-DJL_LIBJULIA_SONAME=\"libjulia.$(JL_MAJOR_SHLIB_EXT)\""       "-DJL_LIBJULIA_INTERNAL_SONAME=\"libjulia-internal.$(JL_MAJOR_SHLIB_EXT)\""
+DEBUGFLAGS += "-DJL_LIBJULIA_SONAME=\"libjulia-debug.$(JL_MAJOR_SHLIB_EXT)\"" "-DJL_LIBJULIA_INTERNAL_SONAME=\"libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)\""
+
 ifeq ($(USE_CROSS_FLISP), 1)
 FLISPDIR := $(BUILDDIR)/flisp/host
+FLISP_EXECUTABLE_debug := $(FLISPDIR)/flisp-debug$(BUILD_EXE)
+FLISP_EXECUTABLE_release := $(FLISPDIR)/flisp$(BUILD_EXE)
 else
 FLISPDIR := $(BUILDDIR)/flisp
-endif
 FLISP_EXECUTABLE_debug := $(FLISPDIR)/flisp-debug$(EXE)
 FLISP_EXECUTABLE_release := $(FLISPDIR)/flisp$(EXE)
+endif
 ifeq ($(OS),WINNT)
 FLISP_EXECUTABLE := $(FLISP_EXECUTABLE_release)
 else
@@ -150,22 +186,40 @@ endif
 default: $(JULIA_BUILD_MODE) # contains either "debug" or "release"
 all: debug release
 
-release debug: %: libjulia-%
+release debug: %: libjulia-internal-% libjulia-codegen-%
 
 $(BUILDDIR):
 	mkdir -p $(BUILDDIR)
 
 LLVM_CONFIG_ABSOLUTE := $(shell which $(LLVM_CONFIG))
 
+# Generate the DTrace header file, while also renaming the macros from
+# JULIA_ to JL_PROBE to clearly delinate them.
+$(BUILDDIR)/%.h.gen : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -h -s $< -o $@)
+	sed 's/JULIA_/JL_PROBE_/' $@ > $@.tmp
+	mv $@.tmp $@
+
+$(BUILDDIR)/jl_internal_funcs.inc: $(SRCDIR)/jl_exported_funcs.inc
+	# Generate `.inc` file that contains a list of `#define` macros to rename functions defined in `libjulia-internal`
+	# to have a `ijl_` prefix instead of `jl_`, to denote that they are coming from `libjulia-internal`.  This avoids
+	# potential confusion with debugging tools, when inspecting a process that has both `libjulia` and `libjulia-internal`
+	# loaded at the same time.
+	grep 'XX(.\+)' $< | sed -E 's/.*XX\((.+)\).*/#define \1 i\1/g' >$@
+
 # source file rules
 $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -c $< -o $@)
 $(BUILDDIR)/%.o: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(SHIPFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(SHIPFLAGS) $(CXX_DISABLE_ASSERTION) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.cpp $(SRCDIR)/llvm-version.h $(HEADERS) $(LLVM_CONFIG_ABSOLUTE) | $(BUILDDIR)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -c $< -o $@)
+$(BUILDDIR)/%.o : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
+$(BUILDDIR)/%.dbg.obj : $(SRCDIR)/%.d
+	@$(call PRINT_DTRACE, $(DTRACE) -G -s $< -o $@)
 
 # public header rules
 $(eval $(call dir_target,$(build_includedir)/julia))
@@ -188,7 +242,7 @@ else
 JULIA_SPLITDEBUG := 0
 endif
 $(build_shlibdir)/libccalltest.$(SHLIB_EXT): $(SRCDIR)/ccalltest.c
-	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JCPPFLAGS) $(DEBUGFLAGS) -O3 $< $(fPIC) -shared -o $@.tmp $(JLDFLAGS))
+	@$(call PRINT_CC, $(CC) $(JCFLAGS) $(JL_CFLAGS) $(JCPPFLAGS) $(FLAGS) -O3 $< $(fPIC) -shared -o $@.tmp $(LDFLAGS))
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@.tmp
 ifeq ($(JULIA_SPLITDEBUG),1)
 	@# Create split debug info file for libccalltest stacktraces test
@@ -200,12 +254,12 @@ endif
 	@## clang should have made the dSYM split-debug directory,
 	@## but we are intentionally not going to give it the correct name
 	@## because we want to test the non-default debug configuration
-	@#rm -r $@.dSYM && mv $@.tmp.dSYM $@.dSYM
+	@#rm -rf $@.dSYM && mv $@.tmp.dSYM $@.dSYM
 	mv $@.tmp $@
 	$(INSTALL_NAME_CMD)libccalltest.$(SHLIB_EXT) $@
 
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/llvmcalltest.cpp $(LLVM_CONFIG_ABSOLUTE)
-	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(JCXXFLAGS) $(JCPPFLAGS) $(DEBUGFLAGS) -O3 $< $(fPIC) -shared -o $@ $(JLDFLAGS) -L$(build_shlibdir) -L$(build_libdir) $(NO_WHOLE_ARCHIVE) $(LLVMLINK))
+	@$(call PRINT_CC, $(CXX) $(LLVM_CXXFLAGS) $(FLAGS) $(CPPFLAGS) $(CXXFLAGS) -O3 $< $(fPIC) -shared -o $@ $(LDFLAGS) $(COMMON_LIBPATHS) $(NO_WHOLE_ARCHIVE) $(CG_LLVMLINK)) -lpthread
 
 julia_flisp.boot.inc.phony: $(BUILDDIR)/julia_flisp.boot.inc
 
@@ -219,29 +273,35 @@ $(BUILDDIR)/julia_flisp.boot: $(addprefix $(SRCDIR)/,jlfrontend.scm flisp/aliase
 		$(call cygpath_w,$(SRCDIR)/mk_julia_flisp_boot.scm) $(call cygpath_w,$(dir $<)) $(notdir $<) $(call cygpath_w,$@))
 
 # additional dependency links
-$(BUILDDIR)/anticodegen.o $(BUILDDIR)/anticodegen.dbg.obj: $(SRCDIR)/intrinsics.h
-$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/codegen-stubs.o $(BUILDDIR)/codegen-stubs.dbg.obj: $(SRCDIR)/intrinsics.h
+$(BUILDDIR)/aotcompile.o $(BUILDDIR)/aotcompile.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h $(SRCDIR)/debug-registry.h
 $(BUILDDIR)/ast.o $(BUILDDIR)/ast.dbg.obj: $(BUILDDIR)/julia_flisp.boot.inc $(SRCDIR)/flisp/*.h
 $(BUILDDIR)/builtins.o $(BUILDDIR)/builtins.dbg.obj: $(SRCDIR)/iddict.c $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/codegen.o $(BUILDDIR)/codegen.dbg.obj: $(addprefix $(SRCDIR)/,\
-	intrinsics.cpp jitlayers.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
-$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h)
+	intrinsics.cpp jitlayers.h debug-registry.h intrinsics.h codegen_shared.h cgutils.cpp ccall.cpp abi_*.cpp processor.h builtin_proto.h)
+$(BUILDDIR)/debuginfo.o $(BUILDDIR)/debuginfo.dbg.obj: $(addprefix $(SRCDIR)/,debuginfo.h processor.h jitlayers.h debug-registry.h)
 $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR)/processor.h
 $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h)
 $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h
 $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h
-$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h
+$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h
 $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h
-$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h $(SRCDIR)/debug-registry.h
 $(BUILDDIR)/jltypes.o $(BUILDDIR)/jltypes.dbg.obj: $(SRCDIR)/builtin_proto.h
 $(build_shlibdir)/libllvmcalltest.$(SHLIB_EXT): $(SRCDIR)/codegen_shared.h $(BUILDDIR)/julia_version.h
-$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
+$(BUILDDIR)/llvm-alloc-helpers.o $(BUILDDIR)/llvm-alloc-helpers.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
+$(BUILDDIR)/llvm-alloc-opt.o $(BUILDDIR)/llvm-alloc-opt.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/llvm-alloc-helpers.h
+$(BUILDDIR)/llvm-cpufeatures.o $(BUILDDIR)/llvm-cpufeatures.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/debug-registry.h
+$(BUILDDIR)/llvm-final-gc-lowering.o $(BUILDDIR)/llvm-final-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/llvm-gc-invariant-verifier.o $(BUILDDIR)/llvm-gc-invariant-verifier.dbg.obj: $(SRCDIR)/codegen_shared.h
-$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h
-$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-julia-licm.o $(BUILDDIR)/llvm-julia-licm.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/llvm-alloc-helpers.h $(SRCDIR)/llvm-pass-helpers.h
+$(BUILDDIR)/llvm-late-gc-lowering.o $(BUILDDIR)/llvm-late-gc-lowering.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-lower-handlers.o $(BUILDDIR)/llvm-lower-handlers.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-multiversioning.o $(BUILDDIR)/llvm-multiversioning.dbg.obj: $(SRCDIR)/codegen_shared.h $(SRCDIR)/processor.h
 $(BUILDDIR)/llvm-pass-helpers.o $(BUILDDIR)/llvm-pass-helpers.dbg.obj: $(SRCDIR)/llvm-pass-helpers.h $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-propagate-addrspaces.o $(BUILDDIR)/llvm-propagate-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h
+$(BUILDDIR)/llvm-remove-addrspaces.o $(BUILDDIR)/llvm-remove-addrspaces.dbg.obj: $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/llvm-ptls.o $(BUILDDIR)/llvm-ptls.dbg.obj: $(SRCDIR)/codegen_shared.h
 $(BUILDDIR)/processor.o $(BUILDDIR)/processor.dbg.obj: $(addprefix $(SRCDIR)/,processor_*.cpp processor.h features_*.h)
 $(BUILDDIR)/signal-handling.o $(BUILDDIR)/signal-handling.dbg.obj: $(addprefix $(SRCDIR)/,signals-*.c)
@@ -253,10 +313,10 @@ $(addprefix $(BUILDDIR)/,threading.o threading.dbg.obj gc.o gc.dbg.obj init.c in
 $(addprefix $(BUILDDIR)/,APInt-C.o APInt-C.dbg.obj runtime_intrinsics.o runtime_intrinsics.dbg.obj): $(SRCDIR)/APInt-C.h
 
 # archive library file rules
-$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
-$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S) $(SRCDIR)/support/*.c
+$(BUILDDIR)/support/libsupport-debug.a: $(addprefix $(SRCDIR)/support/,*.h *.c *.S *.inc) $(SRCDIR)/support/*.c
 	$(MAKE) -C $(SRCDIR)/support debug BUILDDIR='$(abspath $(BUILDDIR)/support)'
 
 $(FLISP_EXECUTABLE_release): $(BUILDDIR)/flisp/libflisp.a
@@ -284,70 +344,64 @@ $(BUILDDIR)/julia_version.h: $(JULIAHOME)/VERSION
 	@echo "#endif" >> $@.$(JULIA_BUILD_MODE).tmp
 	mv $@.$(JULIA_BUILD_MODE).tmp $@
 
-ifneq ($(USEMSVC), 1)
 CXXLD = $(CXX) -shared
-ifeq ($(OS),WINNT)
-CXXLD += -Wl,--out-implib,$(build_libdir)/$(notdir $@).a
-endif
-else
-CXXLD = $(LD) -dll -export:jl_setjmp -export:jl_longjmp
-endif
 
-# If we're on windows, don't do versioned shared libraries.  If we're on OSX,
-# put the version number before the .dylib.  Otherwise, put it after.
-ifeq ($(OS), WINNT)
-JL_MAJOR_MINOR_SHLIB_EXT := $(SHLIB_EXT)
-else
-ifeq ($(OS), Darwin)
-JL_MAJOR_MINOR_SHLIB_EXT := $(SOMAJOR).$(SOMINOR).$(SHLIB_EXT)
-JL_MAJOR_SHLIB_EXT := $(SOMAJOR).$(SHLIB_EXT)
-else
-JL_MAJOR_MINOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR).$(SOMINOR)
-JL_MAJOR_SHLIB_EXT := $(SHLIB_EXT).$(SOMAJOR)
-endif
-endif
+$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-internal.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-internal.$(SHLIB_EXT) $@
+	$(DSYMUTIL) $@
 
-ifeq ($(SHLIB_EXT), so)
-  SONAME       := -Wl,-soname=libjulia.$(JL_MAJOR_SHLIB_EXT)
-  SONAME_DEBUG := -Wl,-soname=libjulia-debug.$(JL_MAJOR_SHLIB_EXT)
-else
-  SONAME       :=
-  SONAME_DEBUG :=
-endif
+$(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(RT_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-internal-debug.$(SHLIB_EXT) $@
+	$(DSYMUTIL) $@
 
-$(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a $(LIBUV)
-	@$(call PRINT_LINK, $(CXXLD) $(JCXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(DOBJS) $(RPATH_LIB) -o $@ $(JLDFLAGS) $(JLIBLDFLAGS) $(DEBUG_LIBS) $(SONAME_DEBUG))
-	$(INSTALL_NAME_CMD)libjulia-debug.$(SHLIB_EXT) $@
 ifneq ($(OS), WINNT)
-	@ln -sf libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_SHLIB_EXT)
-	@ln -sf libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) $(build_shlibdir)/libjulia-debug.$(SHLIB_EXT)
+$(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_SHLIB_EXT): \
+		$(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+$(build_shlibdir)/libjulia-internal.$(SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT): $(build_shlibdir)/libjulia-internal%.$(SHLIB_EXT): \
+		$(build_shlibdir)/libjulia-internal%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal.$(SHLIB_EXT)
+libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-internal-debug.$(SHLIB_EXT)
 endif
+libjulia-internal-release: $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
+libjulia-internal-debug: $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
+libjulia-internal-debug libjulia-internal-release: $(PUBLIC_HEADER_TARGETS)
+
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_OBJS) $(BUILDDIR)/support/libsupport.a $(build_shlibdir)/libjulia-internal.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(CODEGEN_OBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_RELEASE_LIBS) $(call SONAME_FLAGS,libjulia-codegen.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-codegen.$(SHLIB_EXT) $@
 	$(DSYMUTIL) $@
 
-$(BUILDDIR)/libjulia-debug.a: $(SRCDIR)/julia.expmap $(DOBJS) $(BUILDDIR)/flisp/libflisp-debug.a $(BUILDDIR)/support/libsupport-debug.a
-	rm -f $@
-	@$(call PRINT_LINK, ar -rcs $@ $(DOBJS))
-
-libjulia-debug: $(build_shlibdir)/libjulia-debug.$(JL_MAJOR_MINOR_SHLIB_EXT) $(PUBLIC_HEADER_TARGETS)
+$(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(CODEGEN_DOBJS) $(BUILDDIR)/support/libsupport-debug.a $(build_shlibdir)/libjulia-internal-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, $(CXXLD) $(call IMPLIB_FLAGS,$@) $(JCXXFLAGS) $(JL_CXXFLAGS) $(CXXLDFLAGS) $(DEBUGFLAGS) $(CODEGEN_DOBJS) $(RPATH_LIB) -o $@ \
+		$(JLDFLAGS) $(JLIBLDFLAGS) $(CG_DEBUG_LIBS) $(call SONAME_FLAGS,libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT)))
+	@$(INSTALL_NAME_CMD)libjulia-codegen-debug.$(SHLIB_EXT) $@
+	$(DSYMUTIL) $@
 
-$(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT): $(SRCDIR)/julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a $(LIBUV)
-	@$(call PRINT_LINK, $(CXXLD) $(JCXXFLAGS) $(CXXLDFLAGS) $(SHIPFLAGS) $(OBJS) $(RPATH_LIB) -o $@ $(JLDFLAGS) $(JLIBLDFLAGS) $(RELEASE_LIBS) $(SONAME))
-	$(INSTALL_NAME_CMD)libjulia.$(SHLIB_EXT) $@
 ifneq ($(OS), WINNT)
-	@ln -sf libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT) $(build_shlibdir)/libjulia.$(JL_MAJOR_SHLIB_EXT)
-	@ln -sf libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT) $(build_shlibdir)/libjulia.$(SHLIB_EXT)
+$(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT): $(build_shlibdir)/libjulia-codegen%.$(JL_MAJOR_SHLIB_EXT): \
+		$(build_shlibdir)/libjulia-codegen%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+$(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT) $(build_shlibdir)/libjulia-codegen-debug.$(SHLIB_EXT): $(build_shlibdir)/libjulia-codegen%.$(SHLIB_EXT): \
+		$(build_shlibdir)/libjulia-codegen%.$(JL_MAJOR_MINOR_SHLIB_EXT)
+	@$(call PRINT_LINK, ln -sf $(notdir $<) $@)
+libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-codegen.$(SHLIB_EXT)
+libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_SHLIB_EXT) $(build_shlibdir)/libjulia-codegen-debug.$(SHLIB_EXT)
 endif
-	$(DSYMUTIL) $@
-
-$(BUILDDIR)/libjulia.a: julia.expmap $(OBJS) $(BUILDDIR)/flisp/libflisp.a $(BUILDDIR)/support/libsupport.a
-	rm -f $@
-	@$(call PRINT_LINK, ar -rcs $@ $(OBJS))
-libjulia-release: $(build_shlibdir)/libjulia.$(JL_MAJOR_MINOR_SHLIB_EXT) $(PUBLIC_HEADER_TARGETS)
+libjulia-codegen-release: $(build_shlibdir)/libjulia-codegen.$(JL_MAJOR_MINOR_SHLIB_EXT)
+libjulia-codegen-debug: $(build_shlibdir)/libjulia-codegen-debug.$(JL_MAJOR_MINOR_SHLIB_EXT)
+libjulia-codegen-debug libjulia-codegen-release: $(PUBLIC_HEADER_TARGETS)
 
 clean:
-	-rm -fr $(build_shlibdir)/libjulia* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
-	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc
-	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a
+	-rm -fr $(build_shlibdir)/libjulia-internal* $(build_shlibdir)/libjulia-codegen* $(build_shlibdir)/libccalltest* $(build_shlibdir)/libllvmcalltest*
+	-rm -f $(BUILDDIR)/julia_flisp.boot $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
+	-rm -f $(BUILDDIR)/*.dbg.obj $(BUILDDIR)/*.o $(BUILDDIR)/*.dwo $(BUILDDIR)/*.$(SHLIB_EXT) $(BUILDDIR)/*.a $(BUILDDIR)/*.h.gen
 	-rm -f $(BUILDDIR)/julia_version.h
 
 clean-flisp:
@@ -358,7 +412,7 @@ clean-support:
 
 cleanall: clean clean-flisp clean-support clean-analyzegc
 
-$(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/GCChecker.cpp $(LLVM_CONFIG_ABSOLUTE)
+$(build_shlibdir)/lib%Plugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/%.cpp $(LLVM_CONFIG_ABSOLUTE)
 	@$(call PRINT_CC, $(CXX) -g $(fPIC) -shared -o $@ -DCLANG_PLUGIN -I$(build_includedir) -L$(build_libdir) \
 		$(LLVM_CXXFLAGS) $(CLANG_LDFLAGS) $(CPPFLAGS) $(CXXFLAGS) $(LDFLAGS) $(CXXLDFLAGS) $<)
 
@@ -366,8 +420,11 @@ $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT): $(SRCDIR)/clangsa/GCChecker.c
 # Note that for a default install, you will need to have run the following
 # before attempting this static analysis, so that all necessary headers
 # and dependencies are properly installed:
-#   make -c deps install-llvm install-libuv install-utf8proc install-unwind
-analyzegc-deps-check: $(BUILDDIR)/julia_version.h $(BUILDDIR)/julia_flisp.boot.inc
+#   make -C src install-analysis-deps
+install-analysis-deps:
+	$(MAKE) -C $(JULIAHOME)/deps install-llvm install-clang install-llvm-tools install-libuv install-utf8proc install-unwind
+
+analyzegc-deps-check: $(BUILDDIR)/julia_version.h $(BUILDDIR)/julia_flisp.boot.inc $(BUILDDIR)/jl_internal_funcs.inc
 ifeq ($(USE_BINARYBUILDER_LLVM),0)
 ifneq ($(BUILD_LLVM_CLANG),1)
 	$(error Clang must be available to use the clang analyzer. Either build it (BUILD_LLVM_CLANG=1) or use BinaryBuilder)
@@ -375,16 +432,60 @@ endif
 endif
 
 clangsa: $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT)
-
-clang-sa-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) | analyzegc-deps-check
-	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS)  -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c $<)
-clang-sa-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) | analyzegc-deps-check
-	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text -Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(DEBUGFLAGS) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker --analyzer-no-default-checks -fcolor-diagnostics -Werror -x c++ $<)
-
-# Add C files as a target of `analyzegc`
-analyzegc: $(addprefix clang-sa-,$(RUNTIME_SRCS))
+clangsa: $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT)
+
+# optarg is a required_argument for these
+SA_EXCEPTIONS-jloptions.c                   := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.NonNullParamChecker;unix.cstring.NullArg"
+ # clang doesn't understand that e->vars has the same value in save_env (NULL) and restore_env (assumed non-NULL)
+SA_EXCEPTIONS-subtype.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core.uninitialized.Assign;core.UndefinedBinaryOperatorResult"
+SA_EXCEPTIONS-codegen.c                     := -Xanalyzer -analyzer-config -Xanalyzer silence-checkers="core"
+ # these need to be annotated (and possibly fixed)
+SKIP_IMPLICIT_ATOMICS := dump.c module.c staticdata.c codegen.cpp
+ # these need to be annotated (and possibly fixed)
+SKIP_GC_CHECK := codegen.cpp rtutils.c
+
+clang-sagc-%: $(SRCDIR)/%.c $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
+		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
+		$(SA_EXCEPTIONS-$(notdir $<)) \
+		$(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c $<)
+clang-sagc-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text --analyzer-no-default-checks \
+		-Xclang -load -Xclang $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT) -Xclang -analyzer-checker=core$(COMMA)julia.GCChecker \
+		$(SA_EXCEPTIONS-$(notdir $<)) \
+		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -x c++ $<)
+
+clang-sa-%: $(SRCDIR)/%.c .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
+		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
+		$(SA_EXCEPTIONS-$(notdir $<)) \
+		$(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c $<)
+clang-sa-%: $(SRCDIR)/%.cpp .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang --analyze -Xanalyzer -analyzer-werror -Xanalyzer -analyzer-output=text \
+		-Xanalyzer -analyzer-disable-checker=deadcode.DeadStores \
+		$(SA_EXCEPTIONS-$(notdir $<)) \
+		$(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -Werror -x c++ $<)
+
+clang-tidy-%: $(SRCDIR)/%.c $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
+		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
+		-- $(CLANGSA_FLAGS) $(JCPPFLAGS) $(JCFLAGS) $(JL_CFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics -fno-caret-diagnostics -x c)
+clang-tidy-%: $(SRCDIR)/%.cpp $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) .FORCE | analyzegc-deps-check
+	@$(call PRINT_ANALYZE, $(build_depsbindir)/clang-tidy $< -header-filter='.*' --quiet \
+		-load $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT) --checks='-clang-analyzer-*$(COMMA)-clang-diagnostic-*$(COMMA)concurrency-implicit-atomics' --warnings-as-errors='*' \
+		-- $(CLANGSA_FLAGS) $(CLANGSA_CXXFLAGS) $(LLVM_CXXFLAGS) $(JCPPFLAGS) $(JCXXFLAGS) $(JL_CXXFLAGS) $(DEBUGFLAGS) -fcolor-diagnostics --system-header-prefix=llvm -Wno-deprecated-declarations -fno-caret-diagnostics -x c++)
+
+
+# Add C files as a target of `analyzesrc` and `analyzegc` and `tidysrc`
+tidysrc: $(addprefix clang-tidy-,$(filter-out $(basename $(SKIP_IMPLICIT_ATOMICS)),$(CODEGEN_SRCS) $(SRCS)))
+analyzesrc: $(addprefix clang-sa-,$(CODEGEN_SRCS) $(SRCS))
+analyzegc: $(addprefix clang-sagc-,$(filter-out $(basename $(SKIP_GC_CHECK)),$(CODEGEN_SRCS) $(SRCS)))
+analyzegc: analyzesrc tidysrc # TODO: remove me (depended on by CI currently)
+analyze: analyzesrc analyzegc tidysrc
 
 clean-analyzegc:
 	rm -f $(build_shlibdir)/libGCCheckerPlugin.$(SHLIB_EXT)
+	rm -f $(build_shlibdir)/libImplicitAtomicsPlugin.$(SHLIB_EXT)
 
-.PHONY: default all debug release clean cleanall clean-* libccalltest libllvmcalltest julia_flisp.boot.inc.phony analyzegc clang-sa-*
+.FORCE:
+.PHONY: default all debug release clean cleanall clean-* libccalltest libllvmcalltest julia_flisp.boot.inc.phony analyzegc analyzesrc .FORCE
diff --git a/src/abi_aarch64.cpp b/src/abi_aarch64.cpp
index 475fe3ba4596c8..1a3f160329c6cf 100644
--- a/src/abi_aarch64.cpp
+++ b/src/abi_aarch64.cpp
@@ -13,28 +13,26 @@
 
 struct ABI_AArch64Layout : AbiLayout {
 
-Type *get_llvm_vectype(jl_datatype_t *dt) const
+Type *get_llvm_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
-    if (dt->layout == NULL)
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields > 0`
+    if (dt->layout == NULL || jl_is_layout_opaque(dt->layout))
         return nullptr;
     size_t nfields = dt->layout->nfields;
     assert(nfields > 0);
     if (nfields < 2)
         return nullptr;
-    static Type *T_vec64 = VectorType::get(T_int32, 2);
-    static Type *T_vec128 = VectorType::get(T_int32, 4);
     Type *lltype;
     // Short vector should be either 8 bytes or 16 bytes.
     // Note that there are only two distinct fundamental types for
     // short vectors so we normalize them to <2 x i32> and <4 x i32>
     switch (jl_datatype_size(dt)) {
     case 8:
-        lltype = T_vec64;
+        lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 2);
         break;
     case 16:
-        lltype = T_vec128;
+        lltype = FixedVectorType::get(Type::getInt32Ty(ctx), 4);
         break;
     default:
         return nullptr;
@@ -59,24 +57,24 @@ Type *get_llvm_vectype(jl_datatype_t *dt) const
 }
 
 #define jl_is_floattype(v)   jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
-Type *get_llvm_fptype(jl_datatype_t *dt) const
+Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    // `!dt->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
+    // `!dt->name->mutabl && dt->pointerfree && !dt->haspadding && dt->nfields == 0`
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
     case 2:
-        lltype = T_float16;
+        lltype = Type::getHalfTy(ctx);
         break;
     case 4:
-        lltype = T_float32;
+        lltype = Type::getFloatTy(ctx);
         break;
     case 8:
-        lltype = T_float64;
+        lltype = Type::getDoubleTy(ctx);
         break;
     case 16:
-        lltype = T_float128;
+        lltype = Type::getFP128Ty(ctx);
         break;
     default:
         return nullptr;
@@ -85,12 +83,12 @@ Type *get_llvm_fptype(jl_datatype_t *dt) const
             lltype : nullptr);
 }
 
-Type *get_llvm_fp_or_vectype(jl_datatype_t *dt) const
+Type *get_llvm_fp_or_vectype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->mutabl || dt->layout->npointers || dt->layout->haspadding)
+    if (dt->name->mutabl || dt->layout->npointers || dt->layout->haspadding)
         return nullptr;
-    return dt->layout->nfields ? get_llvm_vectype(dt) : get_llvm_fptype(dt);
+    return dt->layout->nfields ? get_llvm_vectype(dt, ctx) : get_llvm_fptype(dt, ctx);
 }
 
 struct ElementType {
@@ -105,7 +103,7 @@ struct ElementType {
 // Data Types of the members that compose the type are the same.
 // Note that it is the fundamental types that are important and not the member
 // types.
-bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) const
+bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele, LLVMContext &ctx) const
 {
     // Assume:
     //     dt is a pointerfree type, (all members are isbits)
@@ -133,7 +131,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
             dt = (jl_datatype_t*)jl_field_type(dt, i);
             continue;
         }
-        if (Type *vectype = get_llvm_vectype(dt)) {
+        if (Type *vectype = get_llvm_vectype(dt, ctx)) {
             if ((ele.sz && dsz != ele.sz) || (ele.type && ele.type != vectype))
                 return false;
             ele.type = vectype;
@@ -149,7 +147,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
             jl_datatype_t *fieldtype = (jl_datatype_t*)jl_field_type(dt, i);
             // Check element count.
             // This needs to be done after the zero size member check
-            if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele)) {
+            if (nele > 3 || !isHFAorHVA(fieldtype, fieldsz, nele, ele, ctx)) {
                 return false;
             }
         }
@@ -158,7 +156,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
     // For bitstypes
     if (ele.sz && dsz != ele.sz)
         return false;
-    Type *new_type = get_llvm_fptype(dt);
+    Type *new_type = get_llvm_fptype(dt, ctx);
     if (new_type && (!ele.type || ele.type == new_type)) {
         ele.type = new_type;
         ele.sz = dsz;
@@ -168,7 +166,7 @@ bool isHFAorHVA(jl_datatype_t *dt, size_t dsz, size_t &nele, ElementType &ele) c
     return false;
 }
 
-Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const
+Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
 
@@ -184,18 +182,18 @@ Type *isHFAorHVA(jl_datatype_t *dt, size_t &nele) const
         return NULL;
     nele = 0;
     ElementType eltype;
-    if (isHFAorHVA(dt, dsz, nele, eltype))
+    if (isHFAorHVA(dt, dsz, nele, eltype, ctx))
         return eltype.type;
     return NULL;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     // B.2
     //   If the argument type is an HFA or an HVA, then the argument is used
     //   unmodified.
     size_t size;
-    if (isHFAorHVA(dt, size))
+    if (isHFAorHVA(dt, size, ctx))
         return false;
     // B.3
     //   If the argument type is a Composite Type that is larger than 16 bytes,
@@ -222,7 +220,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
 //
 // All the out parameters should be default to `false`.
 Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
-                   size_t *rewrite_len) const
+                   size_t *rewrite_len, LLVMContext &ctx) const
 {
     // Based on section 5.4 C of the Procedure Call Standard
     // C.1
@@ -231,7 +229,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     //   the argument is allocated to the least significant bits of register
     //   v[NSRN]. The NSRN is incremented by one. The argument has now been
     //   allocated.
-    if (get_llvm_fp_or_vectype(dt)) {
+    if (get_llvm_fp_or_vectype(dt, ctx)) {
         *fpreg = true;
         return NULL;
     }
@@ -243,7 +241,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     //   Floating-point Registers (with one register per member of the HFA
     //   or HVA). The NSRN is incremented by the number of registers used.
     //   The argument has now been allocated.
-    if (Type *eltype = isHFAorHVA(dt, *rewrite_len)) {
+    if (Type *eltype = isHFAorHVA(dt, *rewrite_len, ctx)) {
         assert(*rewrite_len > 0 && *rewrite_len <= 4);
         // HFA and HVA have <= 4 members
         *fpreg = true;
@@ -322,7 +320,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     assert(jl_datatype_size(dt) <= 16); // Should be pass by reference otherwise
     *rewrite_len = (jl_datatype_size(dt) + 7) >> 3;
     // Rewrite to [n x Int64] where n is the **size in dword**
-    return jl_datatype_size(dt) ? T_int64 : NULL;
+    return jl_datatype_size(dt) ? Type::getInt64Ty(ctx) : NULL;
 
     // C.11
     //   The NGRN is set to 8.
@@ -346,7 +344,7 @@ Type *classify_arg(jl_datatype_t *dt, bool *fpreg, bool *onstack,
     // <handled by C.10 above>
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     // Section 5.5
     // If the type, T, of the result of a function is such that
@@ -360,18 +358,18 @@ bool use_sret(jl_datatype_t *dt) override
     bool fpreg = false;
     bool onstack = false;
     size_t rewrite_len = 0;
-    classify_arg(dt, &fpreg, &onstack, &rewrite_len);
+    classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx);
     return onstack;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
-    if (Type *fptype = get_llvm_fp_or_vectype(dt))
+    if (Type *fptype = get_llvm_fp_or_vectype(dt, ctx))
         return fptype;
     bool fpreg = false;
     bool onstack = false;
     size_t rewrite_len = 0;
-    if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len))
+    if (Type *rewrite_ty = classify_arg(dt, &fpreg, &onstack, &rewrite_len, ctx))
         return ArrayType::get(rewrite_ty, rewrite_len);
     return NULL;
 }
diff --git a/src/abi_arm.cpp b/src/abi_arm.cpp
index 1a5d3d06513689..4987d07657ae6f 100644
--- a/src/abi_arm.cpp
+++ b/src/abi_arm.cpp
@@ -23,29 +23,29 @@
 
 struct ABI_ARMLayout : AbiLayout {
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &abi, LLVMContext &ctx, Type *Ty) override
 {
     return false;
 }
 
 #define jl_is_floattype(v)   jl_subtype(v,(jl_value_t*)jl_floatingpoint_type)
 
-Type *get_llvm_fptype(jl_datatype_t *dt) const
+Type *get_llvm_fptype(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // Assume jl_is_datatype(dt) && !jl_is_abstracttype(dt)
-    if (dt->mutabl || jl_datatype_nfields(dt) != 0)
+    if (dt->name->mutabl || jl_datatype_nfields(dt) != 0)
         return NULL;
     Type *lltype;
     // Check size first since it's cheaper.
     switch (jl_datatype_size(dt)) {
     case 2:
-        lltype = T_float16;
+        lltype = Type::getHalfTy(ctx);
         break;
     case 4:
-        lltype = T_float32;
+        lltype = Type::getFloatTy(ctx);
         break;
     case 8:
-        lltype = T_float64;
+        lltype = Type::getDoubleTy(ctx);
         break;
     default:
         return NULL;
@@ -58,10 +58,10 @@ Type *get_llvm_fptype(jl_datatype_t *dt) const
 // fundamental type.
 //
 // Returns the corresponding LLVM type.
-Type *isLegalHAType(jl_datatype_t *dt) const
+Type *isLegalHAType(jl_datatype_t *dt, LLVMContext &ctx) const
 {
     // single- or double-precision floating-point type
-    if (Type *fp = get_llvm_fptype(dt))
+    if (Type *fp = get_llvm_fptype(dt, ctx))
         return fp;
 
     // NOT SUPPORTED: 64- or 128-bit containerized vectors
@@ -74,7 +74,7 @@ Type *isLegalHAType(jl_datatype_t *dt) const
 //
 // Legality of the HA is determined by a nonzero return value.
 // In case of a non-legal HA, the value of 'base' is undefined.
-size_t isLegalHA(jl_datatype_t *dt, Type *&base) const
+size_t isLegalHA(jl_datatype_t *dt, Type *&base, LLVMContext &ctx) const
 {
     // Homogeneous aggregates are only used for VFP registers,
     // so use that definition of legality (section 6.1.2.1)
@@ -92,10 +92,10 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base) const
         for (size_t i = 0; i < parent_members; ++i) {
             jl_datatype_t *fdt = (jl_datatype_t*)jl_field_type(dt,i);
 
-            Type *T = isLegalHAType(fdt);
+            Type *T = isLegalHAType(fdt, ctx);
             if (T)
                 total_members++;
-            else if (size_t field_members = isLegalHA(fdt, T))
+            else if (size_t field_members = isLegalHA(fdt, T, ctx))
                 // recursive application (expanding nested composite types)
                 total_members += field_members;
             else
@@ -120,7 +120,7 @@ size_t isLegalHA(jl_datatype_t *dt, Type *&base) const
 // Determine if an argument can be passed through a coprocessor register.
 //
 // All the out parameters should be default to `false`.
-void classify_cprc(jl_datatype_t *dt, bool *vfp) const
+void classify_cprc(jl_datatype_t *dt, bool *vfp, LLVMContext &ctx) const
 {
     // Based on section 6.1 of the Procedure Call Standard
 
@@ -128,7 +128,7 @@ void classify_cprc(jl_datatype_t *dt, bool *vfp) const
     // - A half-precision floating-point type.
     // - A single-precision floating-point type.
     // - A double-precision floating-point type.
-    if (get_llvm_fptype(dt)) {
+    if (get_llvm_fptype(dt, ctx)) {
         *vfp = true;
         return;
     }
@@ -137,14 +137,14 @@ void classify_cprc(jl_datatype_t *dt, bool *vfp) const
 
     // - A Homogeneous Aggregate
     Type *base = NULL;
-    if (isLegalHA(dt, base)) {
+    if (isLegalHA(dt, base, ctx)) {
         *vfp = true;
         return;
     }
 }
 
-void classify_return_arg(jl_datatype_t *dt, bool *reg,
-                         bool *onstack, bool *need_rewrite) const
+void classify_return_arg(jl_datatype_t *dt, bool *reg, bool *onstack,
+                         bool *need_rewrite, LLVMContext &ctx) const
 {
     // Based on section 5.4 of the Procedure Call Standard
 
@@ -152,7 +152,7 @@ void classify_return_arg(jl_datatype_t *dt, bool *reg,
     //   Any result whose type would satisfy the conditions for a VFP CPRC is
     //   returned in the appropriate number of consecutive VFP registers
     //   starting with the lowest numbered register (s0, d0, q0).
-    classify_cprc(dt, reg);
+    classify_cprc(dt, reg, ctx);
     if (*reg)
         return;
 
@@ -196,12 +196,12 @@ void classify_return_arg(jl_datatype_t *dt, bool *reg,
         *onstack = true;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     bool reg = false;
     bool onstack = false;
     bool need_rewrite = false;
-    classify_return_arg(dt, &reg, &onstack, &need_rewrite);
+    classify_return_arg(dt, &reg, &onstack, &need_rewrite, ctx);
 
     return onstack;
 }
@@ -218,7 +218,7 @@ bool use_sret(jl_datatype_t *dt) override
 //
 // All the out parameters should be default to `false`.
 void classify_arg(jl_datatype_t *dt, bool *reg,
-                  bool *onstack, bool *need_rewrite) const
+                  bool *onstack, bool *need_rewrite, LLVMContext &ctx) const
 {
     // Based on section 5.5 of the Procedure Call Standard
 
@@ -226,7 +226,7 @@ void classify_arg(jl_datatype_t *dt, bool *reg,
     //   If the argument is a CPRC and there are sufficient unallocated
     //   co-processor registers of the appropriate class, the argument is
     //   allocated to co-processor registers.
-    classify_cprc(dt, reg);
+    classify_cprc(dt, reg, ctx);
     if (*reg)
         return;
 
@@ -239,18 +239,18 @@ void classify_arg(jl_datatype_t *dt, bool *reg,
     *need_rewrite = true;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
-    if (Type *fptype = get_llvm_fptype(dt))
+    if (Type *fptype = get_llvm_fptype(dt, ctx))
         return fptype;
 
     bool reg = false;
     bool onstack = false;
     bool need_rewrite = false;
     if (isret)
-        classify_return_arg(dt, &reg, &onstack, &need_rewrite);
+        classify_return_arg(dt, &reg, &onstack, &need_rewrite, ctx);
     else
-        classify_arg(dt, &reg, &onstack, &need_rewrite);
+        classify_arg(dt, &reg, &onstack, &need_rewrite, ctx);
 
     if (!need_rewrite)
         return NULL;
@@ -276,7 +276,7 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     if (align > 8)
         align = 8;
 
-    Type *T = Type::getIntNTy(jl_LLVMContext, align*8);
+    Type *T = Type::getIntNTy(ctx, align*8);
     return ArrayType::get(T, (jl_datatype_size(dt) + align - 1) / align);
 }
 
diff --git a/src/abi_llvm.cpp b/src/abi_llvm.cpp
index 1ab30da1b2f75b..181b05ef799790 100644
--- a/src/abi_llvm.cpp
+++ b/src/abi_llvm.cpp
@@ -40,17 +40,17 @@
 
 struct ABI_LLVMLayout : AbiLayout {
 
-bool use_sret(jl_datatype_t *ty) override
+bool use_sret(jl_datatype_t *ty, LLVMContext &ctx) override
 {
     return false;
 }
 
-bool needPassByRef(jl_datatype_t *ty, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *ty, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     return false;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *ty, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *ty, bool isret, LLVMContext &ctx) const override
 {
     return NULL;
 }
diff --git a/src/abi_ppc64le.cpp b/src/abi_ppc64le.cpp
index 3c13d8b47aa4d5..016eebd4555258 100644
--- a/src/abi_ppc64le.cpp
+++ b/src/abi_ppc64le.cpp
@@ -83,7 +83,7 @@ unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
     int n = 0;
     for (i = 0; i < l; i++) {
         jl_datatype_t *fld = (jl_datatype_t*)jl_field_type(ty, i);
-        if (!jl_is_datatype(fld) || ((jl_datatype_t*)fld)->layout == NULL)
+        if (!jl_is_datatype(fld) || ((jl_datatype_t*)fld)->layout == NULL || jl_is_layout_opaque(((jl_datatype_t*)fld)->layout))
             return 9;
         n += isHFA((jl_datatype_t*)fld, ty0, hva);
         if (n > 8)
@@ -92,7 +92,7 @@ unsigned isHFA(jl_datatype_t *ty, jl_datatype_t **ty0, bool *hva) const
     return n;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     jl_datatype_t *ty0 = NULL;
     bool hva = false;
@@ -101,18 +101,18 @@ bool use_sret(jl_datatype_t *dt) override
     return false;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     jl_datatype_t *ty0 = NULL;
     bool hva = false;
     if (jl_datatype_size(dt) > 64 && isHFA(dt, &ty0, &hva) > 8) {
-        ab.addAttribute(Attribute::ByVal);
+        ab.addByValAttr(Ty);
         return true;
     }
     return false;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
     size_t size = jl_datatype_size(dt);
@@ -125,16 +125,16 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     int hfa = isHFA(dt, &ty0, &hva);
     if (hfa <= 8) {
         if (ty0 == jl_float32_type) {
-            return ArrayType::get(T_float32, hfa);
+            return ArrayType::get(llvm::Type::getFloatTy(ctx), hfa);
         }
         else if (ty0 == jl_float64_type) {
-            return ArrayType::get(T_float64, hfa);
+            return ArrayType::get(llvm::Type::getDoubleTy(ctx), hfa);
         }
         else {
             jl_datatype_t *vecty = (jl_datatype_t*)jl_field_type(ty0, 0);
             assert(jl_is_datatype(vecty) && vecty->name == jl_vecelement_typename);
-            Type *ety = bitstype_to_llvm(jl_tparam0(vecty));
-            Type *vty = VectorType::get(ety, jl_datatype_nfields(ty0));
+            Type *ety = bitstype_to_llvm(jl_tparam0(vecty), ctx);
+            Type *vty = FixedVectorType::get(ety, jl_datatype_nfields(ty0));
             return ArrayType::get(vty, hfa);
         }
     }
@@ -142,14 +142,15 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     // the bitsize of the integer gives the desired alignment
     if (size > 8) {
         if (jl_datatype_align(dt) <= 8) {
+            Type  *T_int64 = Type::getInt64Ty(ctx);
             return ArrayType::get(T_int64, (size + 7) / 8);
         }
         else {
-            Type *T_int128 = Type::getIntNTy(jl_LLVMContext, 128);
+            Type *T_int128 = Type::getIntNTy(ctx, 128);
             return ArrayType::get(T_int128, (size + 15) / 16);
         }
     }
-    return Type::getIntNTy(jl_LLVMContext, size * 8);
+    return Type::getIntNTy(ctx, size * 8);
 }
 
 };
diff --git a/src/abi_win32.cpp b/src/abi_win32.cpp
index af16a0310b1248..078d9b6df4e446 100644
--- a/src/abi_win32.cpp
+++ b/src/abi_win32.cpp
@@ -39,7 +39,7 @@
 
 struct ABI_Win32Layout : AbiLayout {
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     // Use sret if the size of the argument is not one of 1, 2, 4, 8 bytes
     // This covers the special case of ComplexF32
@@ -49,23 +49,23 @@ bool use_sret(jl_datatype_t *dt) override
     return true;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     // Use pass by reference for all structs
     if (dt->layout->nfields > 0) {
-        ab.addAttribute(Attribute::ByVal);
+        ab.addByValAttr(Ty);
         return true;
     }
     return false;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     // Arguments are either scalar or passed by value
     // rewrite integer sized (non-sret) struct to the corresponding integer
     if (!dt->layout->nfields)
         return NULL;
-    return Type::getIntNTy(jl_LLVMContext, jl_datatype_nbits(dt));
+    return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
 }
 
 };
diff --git a/src/abi_win64.cpp b/src/abi_win64.cpp
index 16e46a9703f6ad..ec97203eee5ff9 100644
--- a/src/abi_win64.cpp
+++ b/src/abi_win64.cpp
@@ -47,7 +47,7 @@ struct ABI_Win64Layout : AbiLayout {
 int nargs;
 ABI_Win64Layout() : nargs(0) { }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     size_t size = jl_datatype_size(dt);
     if (win64_reg_size(size) || is_native_simd_type(dt))
@@ -56,22 +56,23 @@ bool use_sret(jl_datatype_t *dt) override
     return true;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     nargs++;
     size_t size = jl_datatype_size(dt);
     if (win64_reg_size(size))
         return false;
-    if (nargs <= 4)
-        ab.addAttribute(Attribute::ByVal);
+    if (nargs <= 4) {
+        ab.addByValAttr(Ty);
+    }
     return true;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     size_t size = jl_datatype_size(dt);
     if (size > 0 && win64_reg_size(size) && !jl_is_primitivetype(dt))
-        return Type::getIntNTy(jl_LLVMContext, jl_datatype_nbits(dt));
+        return Type::getIntNTy(ctx, jl_datatype_nbits(dt));
     return NULL;
 }
 
diff --git a/src/abi_x86.cpp b/src/abi_x86.cpp
index 7a65de028e083e..23815993d01293 100644
--- a/src/abi_x86.cpp
+++ b/src/abi_x86.cpp
@@ -57,7 +57,7 @@ inline bool is_complex128(jl_datatype_t *dt) const
     return is_complex_type(dt) && jl_tparam0(dt) == (jl_value_t*)jl_float64_type;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     size_t size = jl_datatype_size(dt);
     if (size == 0)
@@ -67,22 +67,22 @@ bool use_sret(jl_datatype_t *dt) override
     return true;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     size_t size = jl_datatype_size(dt);
     if (is_complex64(dt) || is_complex128(dt) || (jl_is_primitivetype(dt) && size <= 8))
         return false;
-    ab.addAttribute(Attribute::ByVal);
+    ab.addByValAttr(Ty);
     return true;
 }
 
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     if (!isret)
         return NULL;
     // special case Complex{Float32} as a return type
     if (is_complex64(dt))
-        return T_int64;
+        return llvm::Type::getInt64Ty(ctx);
     return NULL;
 }
 
diff --git a/src/abi_x86_64.cpp b/src/abi_x86_64.cpp
index 605cd468d7ca43..43e539b8386cef 100644
--- a/src/abi_x86_64.cpp
+++ b/src/abi_x86_64.cpp
@@ -147,11 +147,11 @@ void classifyType(Classification& accum, jl_datatype_t *dt, uint64_t offset) con
         accum.addField(offset, Sse);
     }
     // Other struct types
-    else if (jl_datatype_size(dt) <= 16) {
+    else if (jl_datatype_size(dt) <= 16 && dt->layout) {
         size_t i;
         for (i = 0; i < jl_datatype_nfields(dt); ++i) {
             jl_value_t *ty = jl_field_type(dt, i);
-            if (!jl_is_datatype(ty) || ((jl_datatype_t*)ty)->layout == NULL || jl_is_array_type(ty))
+            if (jl_field_isptr(dt, i))
                 ty = (jl_value_t*)jl_voidpointer_type;
             classifyType(accum, (jl_datatype_t*)ty, offset + jl_field_offset(dt, i));
         }
@@ -168,7 +168,7 @@ Classification classify(jl_datatype_t *dt) const
     return cl;
 }
 
-bool use_sret(jl_datatype_t *dt) override
+bool use_sret(jl_datatype_t *dt, LLVMContext &ctx) override
 {
     int sret = classify(dt).isMemory;
     if (sret) {
@@ -178,11 +178,11 @@ bool use_sret(jl_datatype_t *dt) override
     return sret;
 }
 
-bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
+bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab, LLVMContext &ctx, Type *Ty) override
 {
     Classification cl = classify(dt);
     if (cl.isMemory) {
-        ab.addAttribute(Attribute::ByVal);
+        ab.addByValAttr(Ty);
         return true;
     }
 
@@ -202,7 +202,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
     else if (jl_is_structtype(dt)) {
         // spill to memory even though we would ordinarily pass
         // it in registers
-        ab.addAttribute(Attribute::ByVal);
+        ab.addByValAttr(Ty);
         return true;
     }
     return false;
@@ -210,7 +210,7 @@ bool needPassByRef(jl_datatype_t *dt, AttrBuilder &ab) override
 
 // Called on behalf of ccall to determine preferred LLVM representation
 // for an argument or return value.
-Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
+Type *preferred_llvm_type(jl_datatype_t *dt, bool isret, LLVMContext &ctx) const override
 {
     (void) isret;
     // no need to rewrite these types (they are returned as pointers anyways)
@@ -230,15 +230,15 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
     switch (cl.classes[0]) {
         case Integer:
             if (size >= 8)
-                types[0] = T_int64;
+                types[0] = Type::getInt64Ty(ctx);
             else
-                types[0] = Type::getIntNTy(jl_LLVMContext, nbits);
+                types[0] = Type::getIntNTy(ctx, nbits);
             break;
         case Sse:
             if (size <= 4)
-                types[0] = T_float32;
+                types[0] = Type::getFloatTy(ctx);
             else
-                types[0] = T_float64;
+                types[0] = Type::getDoubleTy(ctx);
             break;
         default:
             assert(0 && "Unexpected cl.classes[0]");
@@ -248,14 +248,14 @@ Type *preferred_llvm_type(jl_datatype_t *dt, bool isret) const override
             return types[0];
         case Integer:
             assert(size > 8);
-            types[1] = Type::getIntNTy(jl_LLVMContext, (nbits-64));
-            return StructType::get(jl_LLVMContext,ArrayRef<Type*>(&types[0],2));
+            types[1] = Type::getIntNTy(ctx, (nbits-64));
+            return StructType::get(ctx,ArrayRef<Type*>(&types[0],2));
         case Sse:
             if (size <= 12)
-                types[1] = T_float32;
+                types[1] = Type::getFloatTy(ctx);
             else
-                types[1] = T_float64;
-            return StructType::get(jl_LLVMContext,ArrayRef<Type*>(&types[0],2));
+                types[1] = Type::getDoubleTy(ctx);
+            return StructType::get(ctx,ArrayRef<Type*>(&types[0],2));
         default:
             assert(0 && "Unexpected cl.classes[0]");
     }
diff --git a/src/anticodegen.c b/src/anticodegen.c
deleted file mode 100644
index ff65f9bcdf96f6..00000000000000
--- a/src/anticodegen.c
+++ /dev/null
@@ -1,65 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#include "julia.h"
-#include "julia_internal.h"
-
-#include "intrinsics.h"
-
-#define UNAVAILABLE { jl_errorf("%s: not available in this build of Julia", __func__); }
-
-void jl_dump_native(const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname, const char *sysimg_data, size_t sysimg_len) UNAVAILABLE
-int32_t jl_get_llvm_gv(jl_value_t *p) UNAVAILABLE
-void jl_write_malloc_log(void) UNAVAILABLE
-void jl_write_coverage_data(void) UNAVAILABLE
-
-JL_DLLEXPORT void jl_clear_malloc_data(void) UNAVAILABLE
-JL_DLLEXPORT void jl_extern_c(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
-JL_DLLEXPORT void *jl_function_ptr(jl_function_t *f, jl_value_t *rt, jl_value_t *argt) UNAVAILABLE
-JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world, int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT const jl_value_t *jl_dump_function_ir(void *f, uint8_t strip_ir_metadata, uint8_t dump_module, const char *debuginfo) UNAVAILABLE
-JL_DLLEXPORT void *jl_get_llvmf_defn(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
-
-JL_DLLEXPORT void *jl_LLVMCreateDisasm(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
-JL_DLLEXPORT size_t jl_LLVMDisasmInstruction(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
-
-int32_t jl_assign_functionID(const char *fname) UNAVAILABLE
-
-void jl_init_codegen(void) { }
-
-int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline)
-{
-    return 0;
-}
-
-void jl_register_fptrs(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
-                       jl_method_instance_t **linfos, size_t n)
-{
-    (void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
-}
-
-jl_llvm_functions_t jl_compile_linfo(jl_method_instance_t **pli, jl_code_info_t *src, size_t world, const jl_cgparams_t *params)
-{
-    jl_method_instance_t *li = *pli;
-    jl_llvm_functions_t decls = {};
-
-    if (jl_is_method(li->def.method)) {
-        jl_printf(JL_STDERR, "code missing for ");
-        jl_static_show(JL_STDERR, (jl_value_t*)li);
-        jl_printf(JL_STDERR, " : sysimg may not have been built with --compile=all\n");
-    }
-    else {
-        jl_printf(JL_STDERR, "top level expression cannot be compiled in this build of Julia");
-    }
-    return decls;
-}
-
-jl_value_t *jl_fptr_interpret_call(jl_method_instance_t *lam, jl_value_t **args, uint32_t nargs);
-jl_callptr_t jl_generate_fptr(jl_method_instance_t **pli, jl_llvm_functions_t decls, size_t world)
-{
-    return (jl_callptr_t)&jl_fptr_interpret_call;
-}
-
-JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
-{
-    return 0;
-}
diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp
index 7fa1b03404de6a..0df8b9047e14e2 100644
--- a/src/aotcompile.cpp
+++ b/src/aotcompile.cpp
@@ -5,17 +5,22 @@
 
 // target support
 #include <llvm/ADT/Triple.h>
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#include <llvm/IR/DataLayout.h>
+#if JL_LLVM_VERSION >= 140000
+#include <llvm/MC/TargetRegistry.h>
+#else
 #include <llvm/Support/TargetRegistry.h>
+#endif
 #include <llvm/Target/TargetMachine.h>
-#include <llvm/IR/DataLayout.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
 
 // analysis passes
 #include <llvm/Analysis/Passes.h>
 #include <llvm/Analysis/BasicAliasAnalysis.h>
 #include <llvm/Analysis/TypeBasedAliasAnalysis.h>
 #include <llvm/Analysis/ScopedNoAliasAA.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/IPO.h>
 #include <llvm/Transforms/Scalar.h>
@@ -26,6 +31,9 @@
 #include <llvm/Transforms/IPO/AlwaysInliner.h>
 #include <llvm/Transforms/InstCombine/InstCombine.h>
 #include <llvm/Transforms/Scalar/InstSimplifyPass.h>
+#include <llvm/Transforms/Utils/SimplifyCFGOptions.h>
+#include <llvm/Passes/PassBuilder.h>
+#include <llvm/Passes/PassPlugin.h>
 #if defined(USE_POLLY)
 #include <polly/RegisterPasses.h>
 #include <polly/LinkAllPasses.h>
@@ -35,21 +43,11 @@
 #endif
 #endif
 
-// for outputting assembly
+// for outputting code
 #include <llvm/Bitcode/BitcodeWriter.h>
 #include <llvm/Bitcode/BitcodeWriterPass.h>
 #include "llvm/Object/ArchiveWriter.h"
 #include <llvm/IR/IRPrintingPasses.h>
-#include <llvm/CodeGen/AsmPrinter.h>
-#include <llvm/CodeGen/MachineModuleInfo.h>
-#include <llvm/CodeGen/TargetPassConfig.h>
-#include <llvm/MC/MCAsmInfo.h>
-#include <llvm/MC/MCStreamer.h>
-#include <llvm/MC/MCAsmBackend.h>
-#include <llvm/MC/MCCodeEmitter.h>
-#if JL_LLVM_VERSION >= 100000
-#include <llvm/Support/CodeGen.h>
-#endif
 
 #include <llvm/IR/LegacyPassManagers.h>
 #include <llvm/Transforms/Utils/Cloning.h>
@@ -57,40 +55,16 @@
 
 using namespace llvm;
 
-// our passes
-namespace llvm {
-    extern Pass *createLowerSimdLoopPass();
-}
-
-#if JL_LLVM_VERSION < 100000
-static const TargetMachine::CodeGenFileType CGFT_ObjectFile = TargetMachine::CGFT_ObjectFile;
-static const TargetMachine::CodeGenFileType CGFT_AssemblyFile = TargetMachine::CGFT_AssemblyFile;
-#endif
-
-
 #include "julia.h"
 #include "julia_internal.h"
 #include "jitlayers.h"
 #include "julia_assert.h"
 
-// MSVC's link.exe requires each function declaration to have a Comdat section
-// So rather than litter the code with conditionals,
-// all global values that get emitted call this function
-// and it decides whether the definition needs a Comdat section and adds the appropriate declaration
 template<class T> // for GlobalObject's
 static T *addComdat(T *G)
 {
 #if defined(_OS_WINDOWS_)
     if (!G->isDeclaration()) {
-        // Add comdat information to make MSVC link.exe happy
-        // it's valid to emit this for ld.exe too,
-        // but makes it very slow to link for no benefit
-#if defined(_COMPILER_MICROSOFT_)
-        Comdat *jl_Comdat = G->getParent()->getOrInsertComdat(G->getName());
-        // ELF only supports Comdat::Any
-        jl_Comdat->setSelectionKind(Comdat::NoDuplicates);
-        G->setComdat(jl_Comdat);
-#endif
         // add __declspec(dllexport) to everything marked for export
         if (G->getLinkage() == GlobalValue::ExternalLinkage)
             G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
@@ -103,7 +77,7 @@ static T *addComdat(T *G)
 
 
 typedef struct {
-    std::unique_ptr<Module> M;
+    orc::ThreadSafeModule M;
     std::vector<GlobalValue*> jl_sysimg_fvars;
     std::vector<GlobalValue*> jl_sysimg_gvars;
     std::map<jl_code_instance_t*, std::tuple<uint32_t, uint32_t>> jl_fvar_map;
@@ -111,7 +85,7 @@ typedef struct {
 } jl_native_code_desc_t;
 
 extern "C" JL_DLLEXPORT
-void jl_get_function_id(void *native_code, jl_code_instance_t *codeinst,
+void jl_get_function_id_impl(void *native_code, jl_code_instance_t *codeinst,
         int32_t *func_idx, int32_t *specfunc_idx)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -124,8 +98,8 @@ void jl_get_function_id(void *native_code, jl_code_instance_t *codeinst,
     }
 }
 
-extern "C"
-int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p)
+extern "C" JL_DLLEXPORT
+int32_t jl_get_llvm_gv_impl(void *native_code, jl_value_t *p)
 {
     // map a jl_value_t memory location to a GlobalVariable
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
@@ -139,17 +113,17 @@ int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p)
 }
 
 extern "C" JL_DLLEXPORT
-Module* jl_get_llvm_module(void *native_code)
+LLVMOrcThreadSafeModuleRef jl_get_llvm_module_impl(void *native_code)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
     if (data)
-        return data->M.get();
+        return reinterpret_cast<LLVMOrcThreadSafeModuleRef>(&data->M);
     else
         return NULL;
 }
 
 extern "C" JL_DLLEXPORT
-GlobalValue* jl_get_llvm_function(void *native_code, uint32_t idx)
+GlobalValue* jl_get_llvm_function_impl(void *native_code, uint32_t idx)
 {
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
     if (data)
@@ -158,16 +132,6 @@ GlobalValue* jl_get_llvm_function(void *native_code, uint32_t idx)
         return NULL;
 }
 
-extern "C" JL_DLLEXPORT
-LLVMContext* jl_get_llvm_context(void *native_code)
-{
-    jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    if (data)
-        return &data->M->getContext();
-    else
-        return NULL;
-}
-
 
 static void emit_offset_table(Module &mod, const std::vector<GlobalValue*> &vars, StringRef name, Type *T_psize)
 {
@@ -279,31 +243,46 @@ static void jl_ci_cache_lookup(const jl_cgparams_t &cgparams, jl_method_instance
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup, and can
 // also be used be extern consumers like GPUCompiler.jl to obtain a module containing
-// all reachable & inferrrable functions. The `policy` flag switches between the defaul
-// mode `0` and the extern mode `1`.
+// all reachable & inferrrable functions. The `policy` flag switches between the default
+// mode `0`, the extern mode `1`, and imaging mode `2`.
 extern "C" JL_DLLEXPORT
-void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _policy)
+void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int _policy)
 {
+    if (cgparams == NULL)
+        cgparams = &jl_default_cgparams;
     jl_native_code_desc_t *data = new jl_native_code_desc_t;
-    jl_codegen_params_t params;
-    params.params = &cgparams;
-    std::map<jl_code_instance_t*, jl_compile_result_t> emitted;
+    CompilationPolicy policy = (CompilationPolicy) _policy;
+    bool imaging = imaging_default() || policy == CompilationPolicy::ImagingMode;
+    jl_workqueue_t emitted;
     jl_method_instance_t *mi = NULL;
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
-    JL_LOCK(&codegen_lock);
+    JL_LOCK(&jl_codegen_lock);
+    orc::ThreadSafeContext ctx;
+    orc::ThreadSafeModule backing;
+    if (!llvmmod) {
+        ctx = jl_ExecutionEngine->acquireContext();
+        backing = jl_create_llvm_module("text", ctx, imaging);
+    }
+    orc::ThreadSafeModule &clone = llvmmod ? *reinterpret_cast<orc::ThreadSafeModule*>(llvmmod) : backing;
+    auto ctxt = clone.getContext();
+    jl_codegen_params_t params(ctxt);
+    params.params = cgparams;
+    uint64_t compiler_start_time = 0;
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
+        compiler_start_time = jl_hrtime();
 
-    CompilationPolicy policy = (CompilationPolicy) _policy;
-    std::unique_ptr<Module> clone(jl_create_llvm_module("text"));
+    params.imaging = imaging;
 
     // compile all methods for the current world and type-inference world
-    size_t compile_for[] = { jl_typeinf_world, jl_world_counter };
+    size_t compile_for[] = { jl_typeinf_world, jl_atomic_load_acquire(&jl_world_counter) };
     for (int worlds = 0; worlds < 2; worlds++) {
         params.world = compile_for[worlds];
         if (!params.world)
             continue;
         // Don't emit methods for the typeinf_world with extern policy
-        if (policy == CompilationPolicy::Extern && params.world == jl_typeinf_world)
+        if (policy != CompilationPolicy::Default && params.world == jl_typeinf_world)
             continue;
         size_t i, l;
         for (i = 0, l = jl_array_len(methods); i < l; i++) {
@@ -312,7 +291,7 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
             jl_value_t *item = jl_array_ptr_ref(methods, i);
             if (jl_is_simplevector(item)) {
                 if (worlds == 1)
-                    jl_compile_extern_c(clone.get(), &params, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
+                    jl_compile_extern_c(reinterpret_cast<LLVMOrcThreadSafeModuleRef>(&clone), &params, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
                 continue;
             }
             mi = (jl_method_instance_t*)item;
@@ -323,19 +302,23 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
             if (mi->def.method->primary_world <= params.world && params.world <= mi->def.method->deleted_world) {
                 // find and prepare the source code to compile
                 jl_code_instance_t *codeinst = NULL;
-                jl_ci_cache_lookup(cgparams, mi, params.world, &codeinst, &src);
+                jl_ci_cache_lookup(*cgparams, mi, params.world, &codeinst, &src);
                 if (src && !emitted.count(codeinst)) {
                     // now add it to our compilation results
                     JL_GC_PROMISE_ROOTED(codeinst->rettype);
-                    jl_compile_result_t result = jl_emit_code(mi, src, codeinst->rettype, params);
-                    if (std::get<0>(result))
-                        emitted[codeinst] = std::move(result);
+                    orc::ThreadSafeModule result_m = jl_create_llvm_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, params.imaging,
+                            clone.getModuleUnlocked()->getDataLayout(),
+                            Triple(clone.getModuleUnlocked()->getTargetTriple()));
+                    jl_llvm_functions_t decls = jl_emit_code(result_m, mi, src, codeinst->rettype, params);
+                    if (result_m)
+                        emitted[codeinst] = {std::move(result_m), std::move(decls)};
                 }
             }
         }
 
         // finally, make sure all referenced methods also get compiled or fixed up
-        jl_compile_workqueue(emitted, params, policy);
+        jl_compile_workqueue(emitted, *clone.getModuleUnlocked(), params, policy);
     }
     JL_GC_POP();
 
@@ -350,7 +333,7 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
     // clones the contents of the module `m` to the shadow_output collector
     // while examining and recording what kind of function pointer we have
     for (auto &def : emitted) {
-        jl_merge_module(clone.get(), std::move(std::get<0>(def.second)));
+        jl_merge_module(clone, std::move(std::get<0>(def.second)));
         jl_code_instance_t *this_code = def.first;
         jl_llvm_functions_t decls = std::get<1>(def.second);
         StringRef func = decls.functionObject;
@@ -364,58 +347,67 @@ void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int _p
             func_id = -2;
         }
         else {
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone->getNamedValue(func)));
+            //Safe b/c context is locked by params
+            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(func)));
             func_id = data->jl_sysimg_fvars.size();
         }
         if (!cfunc.empty()) {
-            data->jl_sysimg_fvars.push_back(cast<Function>(clone->getNamedValue(cfunc)));
+            //Safe b/c context is locked by params
+            data->jl_sysimg_fvars.push_back(cast<Function>(clone.getModuleUnlocked()->getNamedValue(cfunc)));
             cfunc_id = data->jl_sysimg_fvars.size();
         }
         data->jl_fvar_map[this_code] = std::make_tuple(func_id, cfunc_id);
     }
     if (params._shared_module) {
-        std::unique_ptr<Module> shared(params._shared_module);
-        params._shared_module = NULL;
-        jl_merge_module(clone.get(), std::move(shared));
+        jl_merge_module(clone, std::move(params._shared_module));
     }
 
     // now get references to the globals in the merged module
     // and set them to be internalized and initialized at startup
     for (auto &global : gvars) {
-        GlobalVariable *G = cast<GlobalVariable>(clone->getNamedValue(global));
+        //Safe b/c context is locked by params
+        GlobalVariable *G = cast<GlobalVariable>(clone.getModuleUnlocked()->getNamedValue(global));
         G->setInitializer(ConstantPointerNull::get(cast<PointerType>(G->getValueType())));
         G->setLinkage(GlobalVariable::InternalLinkage);
         data->jl_sysimg_gvars.push_back(G);
     }
 
+    //Safe b/c context is locked by params
 #if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
     // setting the function personality enables stack unwinding and catching exceptions
     // so make sure everything has something set
-    Type *T_int32 = Type::getInt32Ty(clone->getContext());
+    Type *T_int32 = Type::getInt32Ty(clone.getModuleUnlocked()->getContext());
     Function *juliapersonality_func =
        Function::Create(FunctionType::get(T_int32, true),
-           Function::ExternalLinkage, "__julia_personality", clone.get());
+           Function::ExternalLinkage, "__julia_personality", clone.getModuleUnlocked());
     juliapersonality_func->setDLLStorageClass(GlobalValue::DLLImportStorageClass);
 #endif
 
     // move everything inside, now that we've merged everything
     // (before adding the exported headers)
-    for (GlobalObject &G : clone->global_objects()) {
-        if (!G.isDeclaration()) {
-            G.setLinkage(Function::InternalLinkage);
-            makeSafeName(G);
-            addComdat(&G);
+    if (policy == CompilationPolicy::Default) {
+        //Safe b/c context is locked by params
+        for (GlobalObject &G : clone.getModuleUnlocked()->global_objects()) {
+            if (!G.isDeclaration()) {
+                G.setLinkage(Function::InternalLinkage);
+                makeSafeName(G);
+                addComdat(&G);
 #if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-            // Add unwind exception personalities to functions to handle async exceptions
-            if (Function *F = dyn_cast<Function>(&G))
-                F->setPersonalityFn(juliapersonality_func);
+                // Add unwind exception personalities to functions to handle async exceptions
+                if (Function *F = dyn_cast<Function>(&G))
+                    F->setPersonalityFn(juliapersonality_func);
 #endif
+            }
         }
     }
 
     data->M = std::move(clone);
-
-    JL_UNLOCK(&codegen_lock); // Might GC
+    if (measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    if (ctx.getContext()) {
+        jl_ExecutionEngine->releaseContext(std::move(ctx));
+    }
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
     return (void*)data;
 }
 
@@ -445,19 +437,21 @@ static void reportWriterError(const ErrorInfoBase &E)
 
 // takes the running content that has collected in the shadow module and dump it to disk
 // this builds the object file portion of the sysimage files for fast startup
-extern "C"
-void jl_dump_native(void *native_code,
+extern "C" JL_DLLEXPORT
+void jl_dump_native_impl(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname,
         const char *asm_fname,
         const char *sysimg_data, size_t sysimg_len)
 {
     JL_TIMING(NATIVE_DUMP);
     jl_native_code_desc_t *data = (jl_native_code_desc_t*)native_code;
-    LLVMContext &Context = data->M->getContext();
+    auto TSCtx = data->M.getContext();
+    auto lock = TSCtx.getLock();
+    LLVMContext &Context = *TSCtx.getContext();
     // We don't want to use MCJIT's target machine because
     // it uses the large code model and we may potentially
     // want less optimizations there.
-    Triple TheTriple = Triple(jl_TargetMachine->getTargetTriple());
+    Triple TheTriple = Triple(jl_ExecutionEngine->getTargetTriple());
     // make sure to emit the native object format, even if FORCE_ELF was set in codegen
 #if defined(_OS_WINDOWS_)
     TheTriple.setObjectFormat(Triple::COFF);
@@ -466,11 +460,11 @@ void jl_dump_native(void *native_code,
     TheTriple.setOS(llvm::Triple::MacOSX);
 #endif
     std::unique_ptr<TargetMachine> TM(
-        jl_TargetMachine->getTarget().createTargetMachine(
+        jl_ExecutionEngine->getTarget().createTargetMachine(
             TheTriple.getTriple(),
-            jl_TargetMachine->getTargetCPU(),
-            jl_TargetMachine->getTargetFeatureString(),
-            jl_TargetMachine->Options,
+            jl_ExecutionEngine->getTargetCPU(),
+            jl_ExecutionEngine->getTargetFeatureString(),
+            jl_ExecutionEngine->getTargetOptions(),
 #if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
             Reloc::PIC_,
 #else
@@ -487,13 +481,13 @@ void jl_dump_native(void *native_code,
             ));
 
     legacy::PassManager PM;
-    addTargetPasses(&PM, TM.get());
+    addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
 
     // set up optimization passes
-    SmallVector<char, 128> bc_Buffer;
-    SmallVector<char, 128> obj_Buffer;
-    SmallVector<char, 128> asm_Buffer;
-    SmallVector<char, 128> unopt_bc_Buffer;
+    SmallVector<char, 0> bc_Buffer;
+    SmallVector<char, 0> obj_Buffer;
+    SmallVector<char, 0> asm_Buffer;
+    SmallVector<char, 0> unopt_bc_Buffer;
     raw_svector_ostream bc_OS(bc_Buffer);
     raw_svector_ostream obj_OS(obj_Buffer);
     raw_svector_ostream asm_OS(asm_Buffer);
@@ -506,8 +500,10 @@ void jl_dump_native(void *native_code,
 
     if (unopt_bc_fname)
         PM.add(createBitcodeWriterPass(unopt_bc_OS));
-    if (bc_fname || obj_fname || asm_fname)
+    if (bc_fname || obj_fname || asm_fname) {
         addOptimizationPasses(&PM, jl_options.opt_level, true, true);
+        addMachinePasses(&PM, jl_options.opt_level);
+    }
     if (bc_fname)
         PM.add(createBitcodeWriterPass(bc_OS));
     if (obj_fname)
@@ -518,10 +514,9 @@ void jl_dump_native(void *native_code,
             jl_safe_printf("ERROR: target does not support generation of object files\n");
 
     // Reset the target triple to make sure it matches the new target machine
-    data->M->setTargetTriple(TM->getTargetTriple().str());
-    DataLayout DL = TM->createDataLayout();
-    DL.reset(DL.getStringRepresentation() + "-ni:10:11:12:13");
-    data->M->setDataLayout(DL);
+    auto dataM = data->M.getModuleUnlocked();
+    dataM->setTargetTriple(TM->getTargetTriple().str());
+    dataM->setDataLayout(jl_create_datalayout(*TM));
     Type *T_size;
     if (sizeof(size_t) == 8)
         T_size = Type::getInt64Ty(Context);
@@ -530,14 +525,14 @@ void jl_dump_native(void *native_code,
     Type *T_psize = T_size->getPointerTo();
 
     // add metadata information
-    if (imaging_mode) {
-        emit_offset_table(*data->M, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize);
-        emit_offset_table(*data->M, data->jl_sysimg_fvars, "jl_sysimg_fvars", T_psize);
+    if (imaging_default()) {
+        emit_offset_table(*dataM, data->jl_sysimg_gvars, "jl_sysimg_gvars", T_psize);
+        emit_offset_table(*dataM, data->jl_sysimg_fvars, "jl_sysimg_fvars", T_psize);
 
         // reflect the address of the jl_RTLD_DEFAULT_handle variable
         // back to the caller, so that we can check for consistency issues
-        GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(data->M.get());
-        addComdat(new GlobalVariable(*data->M,
+        GlobalValue *jlRTLD_DEFAULT_var = jl_emit_RTLD_DEFAULT_var(dataM);
+        addComdat(new GlobalVariable(*dataM,
                                      jlRTLD_DEFAULT_var->getType(),
                                      true,
                                      GlobalVariable::ExternalLinkage,
@@ -558,25 +553,30 @@ void jl_dump_native(void *native_code,
             emit_result(asm_Archive, asm_Buffer, asm_Name, outputs);
     };
 
-    add_output(*data->M, "unopt.bc", "text.bc", "text.o", "text.s");
+    add_output(*dataM, "unopt.bc", "text.bc", "text.o", "text.s");
 
-    std::unique_ptr<Module> sysimage(new Module("sysimage", Context));
-    sysimage->setTargetTriple(data->M->getTargetTriple());
-    sysimage->setDataLayout(data->M->getDataLayout());
-    data->M.reset(); // free memory for data->M
+    orc::ThreadSafeModule sysimage(std::make_unique<Module>("sysimage", Context), TSCtx);
+    auto sysimageM = sysimage.getModuleUnlocked();
+    sysimageM->setTargetTriple(dataM->getTargetTriple());
+    sysimageM->setDataLayout(dataM->getDataLayout());
+#if JL_LLVM_VERSION >= 130000
+    sysimageM->setStackProtectorGuard(dataM->getStackProtectorGuard());
+    sysimageM->setOverrideStackAlignment(dataM->getOverrideStackAlignment());
+#endif
+    data->M = orc::ThreadSafeModule(); // free memory for data->M
 
     if (sysimg_data) {
         Constant *data = ConstantDataArray::get(Context,
             ArrayRef<uint8_t>((const unsigned char*)sysimg_data, sysimg_len));
-        addComdat(new GlobalVariable(*sysimage, data->getType(), false,
+        addComdat(new GlobalVariable(*sysimageM, data->getType(), false,
                                      GlobalVariable::ExternalLinkage,
                                      data, "jl_system_image_data"))->setAlignment(Align(64));
         Constant *len = ConstantInt::get(T_size, sysimg_len);
-        addComdat(new GlobalVariable(*sysimage, len->getType(), true,
+        addComdat(new GlobalVariable(*sysimageM, len->getType(), true,
                                      GlobalVariable::ExternalLinkage,
                                      len, "jl_system_image_size"));
     }
-    add_output(*sysimage, "data.bc", "data.bc", "data.o", "data.s");
+    add_output(*sysimageM, "data.bc", "data.bc", "data.o", "data.s");
 
     object::Archive::Kind Kind = getDefaultForHost(TheTriple);
     if (unopt_bc_fname)
@@ -595,18 +595,35 @@ void jl_dump_native(void *native_code,
     delete data;
 }
 
+void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis)
+{
+    PM->add(new TargetLibraryInfoWrapperPass(triple));
+    PM->add(createTargetTransformInfoWrapperPass(std::move(analysis)));
+}
 
-void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM)
+
+void addMachinePasses(legacy::PassManagerBase *PM, int optlevel)
 {
-    PM->add(new TargetLibraryInfoWrapperPass(Triple(TM->getTargetTriple())));
-    PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+    // TODO: don't do this on CPUs that natively support Float16
+    PM->add(createDemoteFloat16Pass());
+    if (optlevel > 1)
+        PM->add(createGVNPass());
 }
 
 // this defines the set of optimization passes defined for Julia at various optimization levels.
 // it assumes that the TLI and TTI wrapper passes have already been added.
 void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
-                           bool lower_intrinsics, bool dump_native)
+                           bool lower_intrinsics, bool dump_native,
+                           bool external_use)
 {
+    // Note: LLVM 12 disabled the hoisting of common instruction
+    //       before loop vectorization (https://reviews.llvm.org/D84108).
+    //
+    // TODO: CommonInstruction hoisting/sinking enables AllocOpt
+    //       to merge allocations and sometimes eliminate them,
+    //       since AllocOpt does not handle PhiNodes.
+    //       Enable this instruction hoisting because of this and Union benchmarks.
+    auto simplifyCFGOptions = SimplifyCFGOptions().hoistCommonInsts(true);
 #ifdef JL_DEBUG_BUILD
     PM->add(createGCInvariantVerifierPass(true));
     PM->add(createVerifierPass());
@@ -614,7 +631,14 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
 
     PM->add(createConstantMergePass());
     if (opt_level < 2) {
-        PM->add(createCFGSimplificationPass());
+        if (!dump_native) {
+            // we won't be multiversioning, so lower CPU feature checks early on
+            // so that we can avoid an additional CFG simplification pass at the end.
+            PM->add(createCPUFeaturesPass());
+            if (opt_level == 1)
+                PM->add(createInstSimplifyLegacyPass());
+        }
+        PM->add(createCFGSimplificationPass(simplifyCFGOptions));
         if (opt_level == 1) {
             PM->add(createSROAPass());
             PM->add(createInstructionCombiningPass());
@@ -638,15 +662,22 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
             PM->add(createRemoveNIPass());
         }
         PM->add(createLowerSimdLoopPass()); // Annotate loop marked with "loopinfo" as LLVM parallel loop
-        if (dump_native)
-            PM->add(createMultiVersioningPass());
-#if defined(JL_ASAN_ENABLED)
+        if (dump_native) {
+            PM->add(createMultiVersioningPass(external_use));
+            PM->add(createCPUFeaturesPass());
+            // minimal clean-up to get rid of CPU feature checks
+            if (opt_level == 1) {
+                PM->add(createInstSimplifyLegacyPass());
+                PM->add(createCFGSimplificationPass(simplifyCFGOptions));
+            }
+        }
+#if defined(_COMPILER_ASAN_ENABLED_)
         PM->add(createAddressSanitizerFunctionPass());
 #endif
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
         PM->add(createMemorySanitizerPass(true));
 #endif
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
         PM->add(createThreadSanitizerLegacyPassPass());
 #endif
         return;
@@ -658,7 +689,7 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
         PM->add(createBasicAAWrapperPass());
     }
 
-    PM->add(createCFGSimplificationPass());
+    PM->add(createCFGSimplificationPass(simplifyCFGOptions));
     PM->add(createDeadCodeEliminationPass());
     PM->add(createSROAPass());
 
@@ -672,12 +703,14 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createAllocOptPass());
     // consider AggressiveInstCombinePass at optlevel > 2
     PM->add(createInstructionCombiningPass());
-    PM->add(createCFGSimplificationPass());
+    PM->add(createCFGSimplificationPass(simplifyCFGOptions));
     if (dump_native)
-        PM->add(createMultiVersioningPass());
+        PM->add(createMultiVersioningPass(external_use));
+    PM->add(createCPUFeaturesPass());
     PM->add(createSROAPass());
     PM->add(createInstSimplifyLegacyPass());
     PM->add(createJumpThreadingPass());
+    PM->add(createCorrelatedValuePropagationPass());
 
     PM->add(createReassociatePass());
 
@@ -688,7 +721,6 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createAllocOptPass());
     PM->add(createLoopRotatePass());
     // moving IndVarSimplify here prevented removing the loop in perf_sumcartesian(10:-1:1)
-    PM->add(createLoopIdiomPass());
 #ifdef USE_POLLY
     // LCSSA (which has already run at this point due to the dependencies of the
     // above passes) introduces redundant phis that hinder Polly. Therefore we
@@ -705,8 +737,10 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createLoopUnswitchPass());
     PM->add(createLICMPass());
     PM->add(createJuliaLICMPass());
+    PM->add(createInductiveRangeCheckEliminationPass()); // Must come before indvars
     // Subsequent passes not stripping metadata from terminator
     PM->add(createInstSimplifyLegacyPass());
+    PM->add(createLoopIdiomPass());
     PM->add(createIndVarSimplifyPass());
     PM->add(createLoopDeletionPass());
     PM->add(createSimpleLoopUnrollPass());
@@ -723,12 +757,21 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     PM->add(createMemCpyOptPass());
     PM->add(createSCCPPass());
 
+    //These next two passes must come before IRCE to eliminate the bounds check in #43308
+    PM->add(createCorrelatedValuePropagationPass());
+    PM->add(createDeadCodeEliminationPass());
+
+    PM->add(createInductiveRangeCheckEliminationPass()); // Must come between the two GVN passes
+
     // Run instcombine after redundancy elimination to exploit opportunities
     // opened up by them.
     // This needs to be InstCombine instead of InstSimplify to allow
     // loops over Union-typed arrays to vectorize.
     PM->add(createInstructionCombiningPass());
     PM->add(createJumpThreadingPass());
+    if (opt_level >= 3) {
+        PM->add(createGVNPass()); // Must come after JumpThreading and before LoopVectorize
+    }
     PM->add(createDeadStoreEliminationPass());
 
     // More dead allocation (store) deletion before loop optimization
@@ -737,12 +780,21 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     // see if all of the constant folding has exposed more loops
     // to simplification and deletion
     // this helps significantly with cleaning up iteration
-    PM->add(createCFGSimplificationPass());
+    PM->add(createCFGSimplificationPass()); // See note above, don't hoist instructions before LV
     PM->add(createLoopDeletionPass());
     PM->add(createInstructionCombiningPass());
     PM->add(createLoopVectorizePass());
     PM->add(createLoopLoadEliminationPass());
-    PM->add(createCFGSimplificationPass());
+    // Cleanup after LV pass
+    PM->add(createInstructionCombiningPass());
+    PM->add(createCFGSimplificationPass( // Aggressive CFG simplification
+        SimplifyCFGOptions()
+            .forwardSwitchCondToPhi(true)
+            .convertSwitchToLookupTable(true)
+            .needCanonicalLoops(false)
+            .hoistCommonInsts(true)
+            // .sinkCommonInsts(true) // FIXME: Causes assertion in llvm-late-lowering
+    ));
     PM->add(createSLPVectorizerPass());
     // might need this after LLVM 11:
     //PM->add(createVectorCombinePass());
@@ -778,20 +830,20 @@ void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level,
     }
     PM->add(createCombineMulAddPass());
     PM->add(createDivRemPairsPass());
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
     PM->add(createAddressSanitizerFunctionPass());
 #endif
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
     PM->add(createMemorySanitizerPass(true));
 #endif
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
     PM->add(createThreadSanitizerLegacyPassPass());
 #endif
 }
 
 // An LLVM module pass that just runs all julia passes in order. Useful for
 // debugging
-template <int OptLevel>
+template <int OptLevel, bool dump_native>
 class JuliaPipeline : public Pass {
 public:
     static char ID;
@@ -805,33 +857,132 @@ class JuliaPipeline : public Pass {
         (void)jl_init_llvm();
         PMTopLevelManager *TPM = Stack.top()->getTopLevelManager();
         TPMAdapter Adapter(TPM);
-        addTargetPasses(&Adapter, jl_TargetMachine);
-        addOptimizationPasses(&Adapter, OptLevel);
+        addTargetPasses(&Adapter, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
+        addOptimizationPasses(&Adapter, OptLevel, true, dump_native, true);
+        addMachinePasses(&Adapter, OptLevel);
     }
     JuliaPipeline() : Pass(PT_PassManager, ID) {}
     Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const override {
         return createPrintModulePass(O, Banner);
     }
 };
-template<> char JuliaPipeline<0>::ID = 0;
-template<> char JuliaPipeline<2>::ID = 0;
-template<> char JuliaPipeline<3>::ID = 0;
-static RegisterPass<JuliaPipeline<0>> X("juliaO0", "Runs the entire julia pipeline (at -O0)", false, false);
-static RegisterPass<JuliaPipeline<2>> Y("julia", "Runs the entire julia pipeline (at -O2)", false, false);
-static RegisterPass<JuliaPipeline<3>> Z("juliaO3", "Runs the entire julia pipeline (at -O3)", false, false);
+template<> char JuliaPipeline<0,false>::ID = 0;
+template<> char JuliaPipeline<2,false>::ID = 0;
+template<> char JuliaPipeline<3,false>::ID = 0;
+template<> char JuliaPipeline<0,true>::ID = 0;
+template<> char JuliaPipeline<2,true>::ID = 0;
+template<> char JuliaPipeline<3,true>::ID = 0;
+static RegisterPass<JuliaPipeline<0,false>> X("juliaO0", "Runs the entire julia pipeline (at -O0)", false, false);
+static RegisterPass<JuliaPipeline<2,false>> Y("julia", "Runs the entire julia pipeline (at -O2)", false, false);
+static RegisterPass<JuliaPipeline<3,false>> Z("juliaO3", "Runs the entire julia pipeline (at -O3)", false, false);
+
+static RegisterPass<JuliaPipeline<0,true>> XS("juliaO0-sysimg", "Runs the entire julia pipeline (at -O0/sysimg mode)", false, false);
+static RegisterPass<JuliaPipeline<2,true>> YS("julia-sysimg", "Runs the entire julia pipeline (at -O2/sysimg mode)", false, false);
+static RegisterPass<JuliaPipeline<3,true>> ZS("juliaO3-sysimg", "Runs the entire julia pipeline (at -O3/sysimg mode)", false, false);
 
 extern "C" JL_DLLEXPORT
-void jl_add_optimization_passes(LLVMPassManagerRef PM, int opt_level, int lower_intrinsics) {
+void jl_add_optimization_passes_impl(LLVMPassManagerRef PM, int opt_level, int lower_intrinsics) {
     addOptimizationPasses(unwrap(PM), opt_level, lower_intrinsics);
 }
 
+// new pass manager plugin
+
+// NOTE: Instead of exporting all the constructors in passes.h we could
+// forward the callbacks to the respective passes. LLVM seems to prefer this,
+// and when we add the full pass builder having them directly will be helpful.
+static void registerCallbacks(PassBuilder &PB) {
+    PB.registerPipelineParsingCallback(
+        [](StringRef Name, FunctionPassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
+            if (Name == "DemoteFloat16") {
+                PM.addPass(DemoteFloat16());
+                return true;
+            }
+            if (Name == "CombineMulAdd") {
+              PM.addPass(CombineMulAdd());
+              return true;
+            }
+            if (Name == "LateLowerGCFrame") {
+                PM.addPass(LateLowerGC());
+                return true;
+            }
+            if (Name == "AllocOpt") {
+                PM.addPass(AllocOptPass());
+                return true;
+            }
+            if (Name == "PropagateJuliaAddrspaces") {
+                PM.addPass(PropagateJuliaAddrspacesPass());
+                return true;
+            }
+            if (Name == "LowerExcHandlers") {
+                PM.addPass(LowerExcHandlers());
+                return true;
+            }
+            if (Name == "GCInvariantVerifier") {
+                // TODO: Parse option and allow users to set `Strong`
+                PM.addPass(GCInvariantVerifierPass());
+                return true;
+            }
+            return false;
+        });
+
+    PB.registerPipelineParsingCallback(
+        [](StringRef Name, ModulePassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
+            if (Name == "CPUFeatures") {
+              PM.addPass(CPUFeatures());
+              return true;
+            }
+            if (Name == "RemoveNI") {
+              PM.addPass(RemoveNI());
+              return true;
+            }
+            if (Name == "LowerSIMDLoop") {
+              PM.addPass(LowerSIMDLoop());
+              return true;
+            }
+            if (Name == "FinalLowerGC") {
+                PM.addPass(FinalLowerGCPass());
+                return true;
+            }
+            if (Name == "RemoveJuliaAddrspaces") {
+                PM.addPass(RemoveJuliaAddrspacesPass());
+                return true;
+            }
+            if (Name == "MultiVersioning") {
+                PM.addPass(MultiVersioning());
+                return true;
+            }
+            if (Name == "LowerPTLS") {
+                PM.addPass(LowerPTLSPass());
+                return true;
+            }
+            return false;
+        });
+
+    PB.registerPipelineParsingCallback(
+        [](StringRef Name, LoopPassManager &PM,
+           ArrayRef<PassBuilder::PipelineElement> InnerPipeline) {
+            if (Name == "JuliaLICM") {
+                PM.addPass(JuliaLICMPass());
+                return true;
+            }
+            return false;
+        });
+}
+
+extern "C" JL_DLLEXPORT ::llvm::PassPluginLibraryInfo
+llvmGetPassPluginInfo() {
+      return {LLVM_PLUGIN_API_VERSION, "Julia", "1", registerCallbacks};
+}
+
 // --- native code info, and dump function to IR and ASM ---
 // Get pointer to llvm::Function instance, compiling if necessary
 // for use in reflection from Julia.
-// this is paired with jl_dump_function_ir, jl_dump_method_asm, jl_dump_llvm_asm in particular ways:
+// this is paired with jl_dump_function_ir, jl_dump_function_asm, jl_dump_method_asm in particular ways:
 // misuse will leak memory or cause read-after-free
 extern "C" JL_DLLEXPORT
-void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
+void *jl_get_llvmf_defn_impl(jl_method_instance_t *mi, size_t world, char getwrapper, char optimize, const jl_cgparams_t params)
 {
     if (jl_is_method(mi->def.method) && mi->def.method->source == NULL &&
             mi->def.method->generator == NULL) {
@@ -842,43 +993,54 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
     static legacy::PassManager *PM;
     if (!PM) {
         PM = new legacy::PassManager();
-        addTargetPasses(PM, jl_TargetMachine);
+        addTargetPasses(PM, jl_ExecutionEngine->getTargetTriple(), jl_ExecutionEngine->getTargetIRAnalysis());
         addOptimizationPasses(PM, jl_options.opt_level);
+        addMachinePasses(PM, jl_options.opt_level);
     }
 
     // get the source code for this function
     jl_value_t *jlrettype = (jl_value_t*)jl_any_type;
     jl_code_info_t *src = NULL;
     JL_GC_PUSH2(&src, &jlrettype);
-    jl_value_t *ci = jl_rettype_inferred(mi, world, world);
-    if (ci != jl_nothing) {
-        jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-        src = (jl_code_info_t*)codeinst->inferred;
-        if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-            src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
-        jlrettype = codeinst->rettype;
-    }
-    if (!src || (jl_value_t*)src == jl_nothing) {
-        src = jl_type_infer(mi, world, 0);
-        if (src)
-            jlrettype = src->rettype;
-        else if (jl_is_method(mi->def.method)) {
-            src = mi->def.method->generator ? jl_code_for_staged(mi) : (jl_code_info_t*)mi->def.method->source;
-            if (src && !jl_is_code_info(src) && jl_is_method(mi->def.method))
-                src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
+    if (jl_is_method(mi->def.method) && mi->def.method->source != NULL && jl_ir_flag_inferred((jl_array_t*)mi->def.method->source)) {
+        src = (jl_code_info_t*)mi->def.method->source;
+        if (src && !jl_is_code_info(src))
+            src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
+    } else {
+        jl_value_t *ci = jl_rettype_inferred(mi, world, world);
+        if (ci != jl_nothing) {
+            jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
+            src = (jl_code_info_t*)codeinst->inferred;
+            if ((jl_value_t*)src != jl_nothing && !jl_is_code_info(src) && jl_is_method(mi->def.method))
+                src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
+            jlrettype = codeinst->rettype;
+        }
+        if (!src || (jl_value_t*)src == jl_nothing) {
+            src = jl_type_infer(mi, world, 0);
+            if (src)
+                jlrettype = src->rettype;
+            else if (jl_is_method(mi->def.method)) {
+                src = mi->def.method->generator ? jl_code_for_staged(mi) : (jl_code_info_t*)mi->def.method->source;
+                if (src && !jl_is_code_info(src) && jl_is_method(mi->def.method))
+                    src = jl_uncompress_ir(mi->def.method, NULL, (jl_array_t*)src);
+            }
+            // TODO: use mi->uninferred
         }
-        // TODO: use mi->uninferred
     }
 
     // emit this function into a new llvm module
     if (src && jl_is_code_info(src)) {
-        jl_codegen_params_t output;
+        JL_LOCK(&jl_codegen_lock);
+        auto ctx = jl_ExecutionEngine->getContext();
+        jl_codegen_params_t output(*ctx);
         output.world = world;
         output.params = &params;
-        std::unique_ptr<Module> m;
-        jl_llvm_functions_t decls;
-        JL_LOCK(&codegen_lock);
-        std::tie(m, decls) = jl_emit_code(mi, src, jlrettype, output);
+        orc::ThreadSafeModule m = jl_create_llvm_module(name_from_method_instance(mi), output.tsctx, output.imaging);
+        uint64_t compiler_start_time = 0;
+        uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+        if (measure_compile_time_enabled)
+            compiler_start_time = jl_hrtime();
+        auto decls = jl_emit_code(m, mi, src, jlrettype, output);
 
         Function *F = NULL;
         if (m) {
@@ -889,7 +1051,8 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
             for (auto &global : output.globals)
                 global.second->setLinkage(GlobalValue::ExternalLinkage);
             if (optimize)
-                PM->run(*m.get());
+                //Safe b/c context lock is held by output
+                PM->run(*m.getModuleUnlocked());
             const std::string *fname;
             if (decls.functionObject == "jl_fptr_args" || decls.functionObject == "jl_fptr_sparam")
                 getwrapper = false;
@@ -897,99 +1060,16 @@ void *jl_get_llvmf_defn(jl_method_instance_t *mi, size_t world, char getwrapper,
                 fname = &decls.specFunctionObject;
             else
                 fname = &decls.functionObject;
-            F = cast<Function>(m->getNamedValue(*fname));
-            m.release(); // the return object `llvmf` will be the owning pointer
+            F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
         }
         JL_GC_POP();
-        JL_UNLOCK(&codegen_lock); // Might GC
+        if (measure_compile_time_enabled)
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+        JL_UNLOCK(&jl_codegen_lock); // Might GC
         if (F)
-            return F;
+            return new jl_llvmf_dump_t{std::move(m), F};
     }
 
     const char *mname = name_from_method_instance(mi);
     jl_errorf("unable to compile source for function %s", mname);
 }
-
-/// addPassesToX helper drives creation and initialization of TargetPassConfig.
-static MCContext *
-addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
-    TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-    PassConfig->setDisableVerify(false);
-    PM.add(PassConfig);
-#if JL_LLVM_VERSION >= 100000
-    MachineModuleInfoWrapperPass *MMIWP =
-        new MachineModuleInfoWrapperPass(TM);
-    PM.add(MMIWP);
-#else
-    MachineModuleInfo *MMI = new MachineModuleInfo(TM);
-    PM.add(MMI);
-#endif
-    if (PassConfig->addISelPasses())
-        return NULL;
-    PassConfig->addMachinePasses();
-    PassConfig->setInitialized();
-#if JL_LLVM_VERSION >= 100000
-    return &MMIWP->getMMI().getContext();
-#else
-    return &MMI->getContext();
-#endif
-}
-
-void jl_strip_llvm_debug(Module *m);
-
-
-// get a native assembly for llvm::Function
-// TODO: implement debuginfo handling
-extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo)
-{
-    // precise printing via IR assembler
-    SmallVector<char, 4096> ObjBufferSV;
-    { // scope block
-        Function *f = (Function*)F;
-        llvm::raw_svector_ostream asmfile(ObjBufferSV);
-        assert(!f->isDeclaration());
-        std::unique_ptr<Module> m(f->getParent());
-        for (auto &f2 : m->functions()) {
-            if (f != &f2 && !f->isDeclaration())
-                f2.deleteBody();
-        }
-        jl_strip_llvm_debug(m.get());
-        legacy::PassManager PM;
-        LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(jl_TargetMachine);
-        MCContext *Context = addPassesToGenerateCode(TM, PM);
-        if (Context) {
-            const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
-            const MCAsmInfo &MAI = *TM->getMCAsmInfo();
-            const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
-            const MCInstrInfo &MII = *TM->getMCInstrInfo();
-            unsigned OutputAsmDialect = MAI.getAssemblerDialect();
-            if (!strcmp(asm_variant, "att"))
-                OutputAsmDialect = 0;
-            if (!strcmp(asm_variant, "intel"))
-                OutputAsmDialect = 1;
-            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
-                TM->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
-             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
-                STI, MRI, TM->Options.MCOptions));
-            std::unique_ptr<MCCodeEmitter> MCE;
-#if JL_LLVM_VERSION >= 100000
-            auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
-#else
-            auto FOut = llvm::make_unique<formatted_raw_ostream>(asmfile);
-#endif
-            std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
-                *Context, std::move(FOut), true,
-                true, InstPrinter,
-                std::move(MCE), std::move(MAB),
-                false));
-            std::unique_ptr<AsmPrinter> Printer(
-                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
-            if (Printer) {
-                PM.add(Printer.release());
-                PM.run(*m);
-            }
-        }
-    }
-    return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
-}
diff --git a/src/array.c b/src/array.c
index b38b38188b9f38..728dbf40bd4e1c 100644
--- a/src/array.c
+++ b/src/array.c
@@ -20,27 +20,31 @@ extern "C" {
 
 #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
 
-// this is a version of memcpy that preserves atomic memory ordering
-// which makes it safe to use for objects that can contain memory references
-// without risk of creating pointers out of thin air
-// TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
-//       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
-static void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+static inline void arrayassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
 {
-    size_t i;
-    if (dstp < srcp || dstp > srcp + n) {
-        for (i = 0; i < n; i++) {
-            jl_atomic_store_relaxed(dstp + i, jl_atomic_load_relaxed(srcp + i));
-        }
+    // array can assume more alignment than a field would normally have
+    assert(nb >= jl_datatype_size(jl_typeof(src))); // nb might move some undefined bits, but we should be okay with that
+    if (hasptr) {
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((void**)dst, (void* const*)src, nptr);
+        jl_gc_multi_wb(parent, src);
     }
     else {
-        for (i = 0; i < n; i++) {
-            jl_atomic_store_relaxed(dstp + n - i - 1, jl_atomic_load_relaxed(srcp + n - i - 1));
+        switch (nb) {
+        case  0: break;
+        case  1: *(uint8_t*)dst  = *(uint8_t*)src;  break;
+        case  2: *(uint16_t*)dst = *(uint16_t*)src; break;
+        case  4: *(uint32_t*)dst = *(uint32_t*)src; break;
+        case  8: *(uint64_t*)dst = *(uint64_t*)src; break;
+        case 16:
+            memcpy(jl_assume_aligned(dst, 16), jl_assume_aligned(src, 16), 16);
+            break;
+        default: memcpy(dst, src, nb);
         }
     }
 }
 
-static void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
+static inline void memmove_safe(int hasptr, char *dst, const char *src, size_t nb) JL_NOTSAFEPOINT
 {
     if (hasptr)
         memmove_refs((void**)dst, (void**)src, nb / sizeof(void*));
@@ -70,31 +74,42 @@ typedef __uint128_t wideint_t;
 typedef uint64_t wideint_t;
 #endif
 
-size_t jl_arr_xtralloc_limit = 0;
-
 #define MAXINTVAL (((size_t)-1)>>1)
 
+JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz)
+{
+    size_t i;
+    size_t _nel = 1;
+    for(i=0; i < ndims; i++) {
+        size_t di = dims[i];
+        wideint_t prod = (wideint_t)_nel * (wideint_t)di;
+        if (prod >= (wideint_t) MAXINTVAL || di >= MAXINTVAL)
+            return 1;
+        _nel = prod;
+    }
+    wideint_t prod = (wideint_t)elsz * (wideint_t)_nel;
+    if (prod >= (wideint_t) MAXINTVAL)
+        return 2;
+    *nel = _nel;
+    *tot = (size_t)prod;
+    return 0;
+}
+
 static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
-                               int isunboxed, int hasptr, int isunion, int elsz)
+                               int8_t isunboxed, int8_t hasptr, int8_t isunion, int8_t zeroinit, size_t elsz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t i, tot, nel=1;
+    jl_task_t *ct = jl_current_task;
+    size_t i, tot, nel;
     void *data;
     jl_array_t *a;
-
-    for(i=0; i < ndims; i++) {
-        size_t di = dims[i];
-        wideint_t prod = (wideint_t)nel * (wideint_t)di;
-        if (prod > (wideint_t) MAXINTVAL || di > MAXINTVAL)
-            jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
-        nel = prod;
-    }
+    assert(isunboxed || elsz == sizeof(void*));
     assert(atype == NULL || isunion == jl_is_uniontype(jl_tparam0(atype)));
+    int validated = jl_array_validate_dims(&nel, &tot, ndims, dims, elsz);
+    if (validated == 1)
+        jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
+    else if (validated == 2)
+        jl_error("invalid Array size");
     if (isunboxed) {
-        wideint_t prod = (wideint_t)elsz * (wideint_t)nel;
-        if (prod > (wideint_t) MAXINTVAL)
-            jl_error("invalid Array size");
-        tot = prod;
         if (elsz == 1 && !isunion) {
             // extra byte for all julia allocated byte arrays
             tot++;
@@ -104,49 +119,41 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
             tot += nel;
         }
     }
-    else {
-        wideint_t prod = (wideint_t)sizeof(void*) * (wideint_t)nel;
-        if (prod > (wideint_t) MAXINTVAL)
-            jl_error("invalid Array size");
-        tot = prod;
-    }
 
     int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
     if (tot <= ARRAY_INLINE_NBYTES) {
-        if (isunboxed && elsz >= 4)
-            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align data area
+        // align data area
+        if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+            tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT);
+        else if (isunboxed && elsz >= 4)
+            tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT);
         size_t doffs = tsz;
         tsz += tot;
-        tsz = JL_ARRAY_ALIGN(tsz, JL_SMALL_BYTE_ALIGNMENT); // align whole object
-        a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+        // jl_array_t is large enough that objects will always be aligned 16
+        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
+        assert(((size_t)a & 15) == 0);
         // No allocation or safepoint allowed after this
         a->flags.how = 0;
         data = (char*)a + doffs;
-        if (tot > 0 && (!isunboxed || hasptr || isunion)) // TODO: check for zeroinit
-            memset(data, 0, tot);
     }
     else {
-        tsz = JL_ARRAY_ALIGN(tsz, JL_CACHE_BYTE_ALIGNMENT); // align whole object
         data = jl_gc_managed_malloc(tot);
         // Allocate the Array **after** allocating the data
         // to make sure the array is still young
-        a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+        a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
         // No allocation or safepoint allowed after this
         a->flags.how = 2;
-        jl_gc_track_malloced_array(ptls, a);
-        if (tot > 0 && (!isunboxed || hasptr || isunion)) // TODO: check for zeroinit
-            // need to zero out isbits union array selector bytes to ensure a valid type index
-            memset(data, 0, tot);
+        jl_gc_track_malloced_array(ct->ptls, a);
     }
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
 
+    if (zeroinit)
+        memset(data, 0, tot);
     a->data = data;
     if (JL_ARRAY_IMPL_NUL && elsz == 1)
         ((char*)data)[tot - 1] = '\0';
-#ifdef STORE_ARRAY_LEN
     a->length = nel;
-#endif
     a->flags.ndims = ndims;
     a->flags.ptrarray = !isunboxed;
     a->flags.hasptr = hasptr;
@@ -186,14 +193,15 @@ static inline jl_array_t *_new_array(jl_value_t *atype, uint32_t ndims, size_t *
     else {
         elsz = LLT_ALIGN(elsz, al);
     }
+    int zi = !isunboxed || hasptr || isunion || (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->zeroinit);
 
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz);
+    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, zi, elsz);
 }
 
 jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
                                              int isunboxed, int hasptr, int isunion, int elsz)
 {
-    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, elsz);
+    return _new_array_(atype, ndims, dims, isunboxed, hasptr, isunion, 0, (size_t)elsz);
 }
 
 #ifndef JL_NDEBUG
@@ -215,52 +223,29 @@ static inline int is_ntuple_long(jl_value_t *v)
 JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
                                           jl_value_t *_dims)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_array_t *a;
+    jl_task_t *ct = jl_current_task;
+    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
+
     size_t ndims = jl_nfields(_dims);
     assert(is_ntuple_long(_dims));
     size_t *dims = (size_t*)_dims;
-    assert(jl_types_equal(jl_tparam0(jl_typeof(data)), jl_tparam0(atype)));
-
     int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+    int tsz = sizeof(jl_array_t) + ndimwords * sizeof(size_t) + sizeof(void*);
+    jl_array_t *a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
+    // copy data (except dims) from the old object
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->flags.ndims = ndims;
     a->offset = 0;
     a->data = NULL;
     a->flags.isaligned = data->flags.isaligned;
-    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
-    jl_value_t *eltype = jl_tparam0(atype);
-    size_t elsz = 0, align = 0;
-    int isboxed = !jl_islayout_inline(eltype, &elsz, &align);
-    assert(isboxed == data->flags.ptrarray);
-    if (!isboxed) {
-        a->elsize = LLT_ALIGN(elsz, align);
-        jl_value_t *ownerty = jl_typeof(owner);
-        size_t oldelsz = 0, oldalign = 0;
-        if (ownerty == (jl_value_t*)jl_string_type) {
-            oldalign = 1;
-        }
-        else {
-            jl_islayout_inline(jl_tparam0(ownerty), &oldelsz, &oldalign);
-        }
-        if (oldalign < align)
-            jl_exceptionf(jl_argumenterror_type,
-                          "reinterpret from alignment %d bytes to alignment %d bytes not allowed",
-                          (int) oldalign, (int) align);
-        a->flags.ptrarray = 0;
-        a->flags.hasptr = data->flags.hasptr;
-    }
-    else {
-        a->elsize = sizeof(void*);
-        a->flags.ptrarray = 1;
-        a->flags.hasptr = 0;
-    }
+    a->elsize = data->elsize;
+    a->flags.ptrarray = data->flags.ptrarray;
+    a->flags.hasptr = data->flags.hasptr;
 
     // if data is itself a shared wrapper,
     // owner should point back to the original array
+    jl_array_t *owner = (jl_array_t*)jl_array_owner(data);
     jl_array_data_owner(a) = (jl_value_t*)owner;
 
     a->flags.how = 3;
@@ -270,9 +255,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
 
     if (ndims == 1) {
         size_t l = dims[0];
-#ifdef STORE_ARRAY_LEN
         a->length = l;
-#endif
         a->nrows = l;
         a->maxsize = l;
     }
@@ -290,9 +273,7 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
                 jl_exceptionf(jl_argumenterror_type, "invalid Array dimensions");
             l = prod;
         }
-#ifdef STORE_ARRAY_LEN
         a->length = l;
-#endif
     }
 
     return a;
@@ -300,12 +281,12 @@ JL_DLLEXPORT jl_array_t *jl_reshape_array(jl_value_t *atype, jl_array_t *data,
 
 JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *a;
 
     int ndimwords = jl_array_ndimwords(1);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*), JL_SMALL_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, jl_array_uint8_type);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t) + sizeof(void*);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, jl_array_uint8_type);
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->flags.ndims = 1;
     a->offset = 0;
@@ -318,9 +299,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
     a->flags.how = 3;
     a->flags.isshared = 1;
     size_t l = jl_string_len(str);
-#ifdef STORE_ARRAY_LEN
     a->length = l;
-#endif
     a->nrows = a->maxsize = l;
     return a;
 }
@@ -329,7 +308,7 @@ JL_DLLEXPORT jl_array_t *jl_string_to_array(jl_value_t *str)
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                                             size_t nel, int own_buffer)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *a;
     jl_value_t *eltype = jl_tparam0(atype);
 
@@ -351,14 +330,12 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
                       "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
 
     int ndimwords = jl_array_ndimwords(1);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->data = data;
-#ifdef STORE_ARRAY_LEN
     a->length = nel;
-#endif
     a->elsize = LLT_ALIGN(elsz, align);
     a->flags.ptrarray = !isunboxed;
     a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
@@ -367,7 +344,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
     a->flags.isaligned = 0;  // TODO: allow passing memalign'd buffers
     if (own_buffer) {
         a->flags.how = 2;
-        jl_gc_track_malloced_array(ptls, a);
+        jl_gc_track_malloced_array(ct->ptls, a);
         jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
     }
     else {
@@ -383,7 +360,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
 JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                                          jl_value_t *_dims, int own_buffer)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     size_t nel = 1;
     jl_array_t *a;
     size_t ndims = jl_nfields(_dims);
@@ -418,14 +395,12 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
                       "unsafe_wrap: pointer %p is not properly aligned to %u bytes", data, align);
 
     int ndimwords = jl_array_ndimwords(ndims);
-    int tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords*sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
-    a = (jl_array_t*)jl_gc_alloc(ptls, tsz, atype);
+    int tsz = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
+    a = (jl_array_t*)jl_gc_alloc(ct->ptls, tsz, atype);
     // No allocation or safepoint allowed after this
     a->flags.pooled = tsz <= GC_MAX_SZCLASS;
     a->data = data;
-#ifdef STORE_ARRAY_LEN
     a->length = nel;
-#endif
     a->elsize = LLT_ALIGN(elsz, align);
     a->flags.ptrarray = !isunboxed;
     a->flags.hasptr = isunboxed && (jl_is_datatype(eltype) && ((jl_datatype_t*)eltype)->layout->npointers > 0);
@@ -435,7 +410,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
     a->flags.isaligned = 0;
     if (own_buffer) {
         a->flags.how = 2;
-        jl_gc_track_malloced_array(ptls, a);
+        jl_gc_track_malloced_array(ct->ptls, a);
         jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
     }
     else {
@@ -485,6 +460,12 @@ JL_DLLEXPORT jl_array_t *jl_pchar_to_array(const char *str, size_t len)
 JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
 {
     size_t len = jl_array_len(a);
+    if (len == 0) {
+        // this may seem like purely an optimization (which it also is), but it
+        // also ensures that calling `String(a)` doesn't corrupt a previous
+        // string also created the same way, where `a = StringVector(_)`.
+        return jl_an_empty_string;
+    }
     if (a->flags.how == 3 && a->offset == 0 && a->elsize == 1 &&
         (jl_array_ndims(a) != 1 ||
          ((a->maxsize + sizeof(void*) + 1 <= GC_MAX_SZCLASS) == (len + sizeof(void*) + 1 <= GC_MAX_SZCLASS)))) {
@@ -493,45 +474,53 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
             a->flags.isshared = 1;
             *(size_t*)o = len;
             a->nrows = 0;
-#ifdef STORE_ARRAY_LEN
             a->length = 0;
-#endif
             a->maxsize = 0;
             return o;
         }
     }
     a->nrows = 0;
-#ifdef STORE_ARRAY_LEN
     a->length = 0;
-#endif
     a->maxsize = 0;
     return jl_pchar_to_string((const char*)jl_array_data(a), len);
 }
 
-JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
+JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
 {
+    if (len == 0)
+        return jl_an_empty_string;
     size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
     if (sz < len) // overflow
         jl_throw(jl_memory_exception);
-    if (len == 0)
-        return jl_an_empty_string;
-    jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *s;
+    jl_ptls_t ptls = ct->ptls;
+    const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
+    if (sz <= GC_MAX_SZCLASS) {
+        int pool_id = jl_gc_szclass_align8(allocsz);
+        jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
+        int osize = jl_gc_sizeclasses[pool_id];
+        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
+        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+        s = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
+    }
+    else {
+        if (allocsz < sz) // overflow in adding offs, size was "negative"
+            jl_throw(jl_memory_exception);
+        s = jl_gc_big_alloc_noinline(ptls, allocsz);
+    }
+    jl_set_typeof(s, jl_string_type);
+    maybe_record_alloc_to_profile(s, len, jl_string_type);
     *(size_t*)s = len;
-    memcpy((char*)s + sizeof(size_t), str, len);
-    ((char*)s + sizeof(size_t))[len] = 0;
+    jl_string_data(s)[len] = 0;
     return s;
 }
 
-JL_DLLEXPORT jl_value_t *jl_alloc_string(size_t len)
+JL_DLLEXPORT jl_value_t *jl_pchar_to_string(const char *str, size_t len)
 {
-    size_t sz = sizeof(size_t) + len + 1; // add space for trailing \nul protector and size
-    if (sz < len) // overflow
-        jl_throw(jl_memory_exception);
-    if (len == 0)
-        return jl_an_empty_string;
-    jl_value_t *s = jl_gc_alloc_(jl_get_ptls_states(), sz, jl_string_type); // force inlining
-    *(size_t*)s = len;
-    ((char*)s + sizeof(size_t))[len] = 0;
+    jl_value_t *s = jl_alloc_string(len);
+    if (len > 0)
+        memcpy(jl_string_data(s), str, len);
     return s;
 }
 
@@ -556,21 +545,11 @@ JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim)
 
 // array primitives -----------------------------------------------------------
 
-#ifndef STORE_ARRAY_LEN
-JL_DLLEXPORT size_t jl_array_len_(jl_array_t *a)
-{
-    size_t l = 1;
-    for(size_t i=0; i < jl_array_ndims(a); i++)
-        l *= jl_array_dim(a, i);
-    return l;
-}
-#endif
-
 JL_DLLEXPORT jl_value_t *jl_ptrarrayref(jl_array_t *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
 {
     assert(i < jl_array_len(a));
     assert(a->flags.ptrarray);
-    jl_value_t *elt = jl_atomic_load_relaxed(((jl_value_t**)a->data) + i);
+    jl_value_t *elt = jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)a->data) + i);
     if (elt == NULL)
         jl_throw(jl_undefref_exception);
     return elt;
@@ -590,13 +569,16 @@ JL_DLLEXPORT jl_value_t *jl_arrayref(jl_array_t *a, size_t i)
         if (jl_is_datatype_singleton((jl_datatype_t*)eltype))
             return ((jl_datatype_t*)eltype)->instance;
     }
-    return undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
+    jl_value_t *r = undefref_check((jl_datatype_t*)eltype, jl_new_bits(eltype, &((char*)a->data)[i * a->elsize]));
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
 }
 
 JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i)
 {
     if (a->flags.ptrarray) {
-        return jl_atomic_load_relaxed(((jl_value_t**)jl_array_data(a)) + i) != NULL;
+        return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)jl_array_data(a)) + i) != NULL;
     }
     else if (a->flags.hasptr) {
          jl_datatype_t *eltype = (jl_datatype_t*)jl_tparam0(jl_typeof(a));
@@ -618,6 +600,7 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
         JL_GC_POP();
     }
     if (!a->flags.ptrarray) {
+        int hasptr;
         if (jl_is_uniontype(eltype)) {
             uint8_t *psel = &((uint8_t*)jl_array_typetagdata(a))[i];
             unsigned nth = 0;
@@ -626,18 +609,15 @@ JL_DLLEXPORT void jl_arrayset(jl_array_t *a JL_ROOTING_ARGUMENT, jl_value_t *rhs
             *psel = nth;
             if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
                 return;
-        }
-        if (a->flags.hasptr) {
-            memmove_refs((void**)&((char*)a->data)[i * a->elsize], (void**)rhs, a->elsize / sizeof(void*));
+            hasptr = 0;
         }
         else {
-            jl_assign_bits(&((char*)a->data)[i * a->elsize], rhs);
+            hasptr = a->flags.hasptr;
         }
-        if (a->flags.hasptr)
-            jl_gc_multi_wb(jl_array_owner(a), rhs);
+        arrayassign_safe(hasptr, jl_array_owner(a), &((char*)a->data)[i * a->elsize], rhs, a->elsize);
     }
     else {
-        jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, rhs);
+        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, rhs);
         jl_gc_wb(jl_array_owner(a), rhs);
     }
 }
@@ -647,7 +627,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
     if (i >= jl_array_len(a))
         jl_bounds_error_int((jl_value_t*)a, i + 1);
     if (a->flags.ptrarray)
-        jl_atomic_store_relaxed(((jl_value_t**)a->data) + i, NULL);
+        jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)a->data) + i, NULL);
     else if (a->flags.hasptr) {
         size_t elsize = a->elsize;
         jl_assume(elsize >= sizeof(void*) && elsize % sizeof(void*) == 0);
@@ -668,7 +648,7 @@ JL_DLLEXPORT void jl_arrayunset(jl_array_t *a, size_t i)
 // the **beginning** of the new buffer.
 static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(!a->flags.isshared || a->flags.how == 3);
     size_t elsz = a->elsize;
     size_t nbytes = newlen * elsz;
@@ -710,12 +690,12 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
         newbuf = 1;
         if (nbytes >= MALLOC_THRESH) {
             a->data = jl_gc_managed_malloc(nbytes);
-            jl_gc_track_malloced_array(ptls, a);
+            jl_gc_track_malloced_array(ct->ptls, a);
             a->flags.how = 2;
             a->flags.isaligned = 1;
         }
         else {
-            a->data = jl_gc_alloc_buf(ptls, nbytes);
+            a->data = jl_gc_alloc_buf(ct->ptls, nbytes);
             a->flags.how = 1;
             jl_gc_wb_buf(a, a->data, nbytes);
         }
@@ -752,16 +732,23 @@ static void NOINLINE array_try_unshare(jl_array_t *a)
     }
 }
 
-static size_t limit_overallocation(jl_array_t *a, size_t alen, size_t newlen, size_t inc)
+size_t overallocation(size_t maxsize)
 {
-    // Limit overallocation to jl_arr_xtralloc_limit
-    size_t es = a->elsize;
-    size_t xtra_elems_mem = (newlen - a->offset - alen - inc) * es;
-    if (xtra_elems_mem > jl_arr_xtralloc_limit) {
-        // prune down
-        return alen + inc + a->offset + (jl_arr_xtralloc_limit / es);
-    }
-    return newlen;
+    if (maxsize < 8)
+        return 8;
+    // compute maxsize = maxsize + 4*maxsize^(7/8) + maxsize/8
+    // for small n, we grow faster than O(n)
+    // for large n, we grow at O(n/8)
+    // and as we reach O(memory) for memory>>1MB,
+    // this means we end by adding about 10% of memory each time
+    int exp2 = sizeof(maxsize) * 8 -
+#ifdef _P64
+        __builtin_clzll(maxsize);
+#else
+        __builtin_clz(maxsize);
+#endif
+    maxsize += ((size_t)1 << (exp2 * 7 / 8)) * 4 + maxsize / 8;
+    return maxsize;
 }
 
 STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
@@ -784,7 +771,7 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
     char *data = (char*)a->data;
     char *newdata;
     char *typetagdata;
-    char *newtypetagdata;
+    char *newtypetagdata = NULL;
     int isbitsunion = jl_array_isbitsunion(a);
     if (isbitsunion) typetagdata = jl_array_typetagdata(a);
     if (a->offset >= inc) {
@@ -809,10 +796,12 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
         size_t nb1 = idx * elsz;
         if (inc > (a->maxsize - n) / 2 - (a->maxsize - n) / 20) {
             // not enough room for requested growth from end of array
-            size_t newlen = a->maxsize == 0 ? inc * 2 : a->maxsize * 2;
+            size_t newlen = inc * 2;
             while (n + 2 * inc > newlen - a->offset)
                 newlen *= 2;
-            newlen = limit_overallocation(a, n, newlen, 2 * inc);
+            size_t newmaxsize = overallocation(a->maxsize);
+            if (newlen < newmaxsize)
+                newlen = newmaxsize;
             size_t newoffset = (newlen - newnrows) / 2;
             if (!array_resize_buffer(a, newlen)) {
                 data = (char*)a->data + oldoffsnb;
@@ -859,15 +848,13 @@ STATIC_INLINE void jl_array_grow_at_beg(jl_array_t *a, size_t idx, size_t inc,
             }
         }
     }
-#ifdef STORE_ARRAY_LEN
     a->length = newnrows;
-#endif
     a->nrows = newnrows;
     a->data = newdata;
-    if (a->flags.ptrarray || a->flags.hasptr) { // TODO: check for zeroinit
+    if (jl_is_array_zeroinit(a)) {
         memset(newdata + idx * elsz, 0, nbinc);
     }
-    else if (isbitsunion) {
+    if (newtypetagdata) {
         memset(newtypetagdata + idx, 0, inc);
     }
 }
@@ -897,12 +884,11 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
     if (__unlikely(reqmaxsize > a->maxsize)) {
         size_t nb1 = idx * elsz;
         size_t nbinc = inc * elsz;
-        // if the requested size is more than 2x current maxsize, grow exactly
-        // otherwise double the maxsize
-        size_t newmaxsize = reqmaxsize >= a->maxsize * 2
-                          ? (reqmaxsize < 4 ? 4 : reqmaxsize)
-                          : a->maxsize * 2;
-        newmaxsize = limit_overallocation(a, n, newmaxsize, inc);
+        // grow either by our computed overallocation factor or exactly the requested size,
+        // whichever is larger
+        size_t newmaxsize = overallocation(a->maxsize);
+        if (newmaxsize < reqmaxsize)
+            newmaxsize = reqmaxsize;
         size_t oldmaxsize = a->maxsize;
         int newbuf = array_resize_buffer(a, newmaxsize);
         char *newdata = (char*)a->data + a->offset * elsz;
@@ -941,11 +927,9 @@ STATIC_INLINE void jl_array_grow_at_end(jl_array_t *a, size_t idx,
             memset(typetagdata + idx, 0, inc);
     }
     size_t newnrows = n + inc;
-#ifdef STORE_ARRAY_LEN
     a->length = newnrows;
-#endif
     a->nrows = newnrows;
-    if (a->flags.ptrarray || a->flags.hasptr) { // TODO: check for zeroinit
+    if (jl_is_array_zeroinit(a)) {
         memset(data + idx * elsz, 0, inc * elsz);
     }
 }
@@ -998,6 +982,24 @@ STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
     char *originalptr = ((char*) a->data) - a->offset * a->elsize;
     if (a->flags.how == 1) {
         //this is a julia-allocated buffer that needs to be marked
+        char *typetagdata;
+        char *newtypetagdata;
+        if (isbitsunion) {
+            typetagdata = (char*)malloc_s(a->nrows);
+            memcpy(typetagdata, jl_array_typetagdata(a), a->nrows);
+        }
+        jl_task_t *ct = jl_current_task;
+        char *originaldata = (char*) a->data - a->offset * a->elsize;
+        char *newdata = (char*)jl_gc_alloc_buf(ct->ptls, newbytes);
+        jl_gc_wb_buf(a, newdata, newbytes);
+        a->maxsize -= dec;
+        if (isbitsunion) {
+            newtypetagdata = jl_array_typetagdata(a);
+            memcpy(newtypetagdata, typetagdata, a->nrows);
+            free(typetagdata);
+        }
+        memcpy(newdata, originaldata, newbytes);
+        a->data = newdata + a->offset * elsz;
     }
     else if (a->flags.how == 2) {
         //malloc-allocated pointer this array object manages
@@ -1045,9 +1047,7 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
     size_t offset = a->offset;
     int isbitsunion = jl_array_isbitsunion(a);
     offset += dec;
-#ifdef STORE_ARRAY_LEN
     a->length = n - dec;
-#endif
     a->nrows = n - dec;
     size_t newoffs = jl_array_limit_offset(a, offset);
     assert(newoffs <= offset);
@@ -1074,7 +1074,7 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
         // Move the rest of the data if the offset changed
         if (newoffs != offset) {
             memmove_safe(a->flags.hasptr, newdata + nb1, olddata + nb1 + nbdec, nbtotal - nb1);
-            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, n - idx);
+            if (isbitsunion) memmove(newtypetagdata + idx, typetagdata + idx + dec, a->nrows - idx);
         }
         a->data = newdata;
     }
@@ -1105,9 +1105,7 @@ STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
     if (elsz == 1 && !isbitsunion)
         data[n] = 0;
     a->nrows = n;
-#ifdef STORE_ARRAY_LEN
     a->length = n;
-#endif
 }
 
 JL_DLLEXPORT void jl_array_del_at(jl_array_t *a, ssize_t idx, size_t dec)
@@ -1171,9 +1169,7 @@ JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz)
         jl_array_grow_end(a, inc);
 
         a->nrows = n;
-#ifdef STORE_ARRAY_LEN
         a->length = n;
-#endif
     }
 }
 
@@ -1184,7 +1180,7 @@ JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary)
     int isunion = jl_is_uniontype(jl_tparam0(jl_typeof(ary)));
     jl_array_t *new_ary = _new_array_(jl_typeof(ary), jl_array_ndims(ary),
                                       &ary->nrows, !ary->flags.ptrarray,
-                                      ary->flags.hasptr, isunion, elsz);
+                                      ary->flags.hasptr, isunion, 0, elsz);
     memcpy(new_ary->data, ary->data, len * elsz);
     // ensure isbits union arrays copy their selector bytes correctly
     if (jl_array_isbitsunion(ary))
@@ -1198,9 +1194,11 @@ static NOINLINE ssize_t jl_array_ptr_copy_forward(jl_value_t *owner,
                                                   void **src_p, void **dest_p,
                                                   ssize_t n) JL_NOTSAFEPOINT
 {
+    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
+    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
     for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_p + i);
-        jl_atomic_store_relaxed(dest_p + i, val);
+        void *val = jl_atomic_load_relaxed(src_pa + i);
+        jl_atomic_store_relaxed(dest_pa + i, val);
         // `val` is young or old-unmarked
         if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
             jl_gc_queue_root(owner);
@@ -1214,9 +1212,11 @@ static NOINLINE ssize_t jl_array_ptr_copy_backward(jl_value_t *owner,
                                                    void **src_p, void **dest_p,
                                                    ssize_t n) JL_NOTSAFEPOINT
 {
+    _Atomic(void*) *src_pa = (_Atomic(void*)*)src_p;
+    _Atomic(void*) *dest_pa = (_Atomic(void*)*)dest_p;
     for (ssize_t i = 0; i < n; i++) {
-        void *val = jl_atomic_load_relaxed(src_p + n - i - 1);
-        jl_atomic_store_relaxed(dest_p + n - i - 1, val);
+        void *val = jl_atomic_load_relaxed(src_pa + n - i - 1);
+        jl_atomic_store_relaxed(dest_pa + n - i - 1, val);
         // `val` is young or old-unmarked
         if (val && !(jl_astaggedvalue(val)->bits.gc & GC_MARKED)) {
             jl_gc_queue_root(owner);
diff --git a/src/ast.c b/src/ast.c
index fc7cdbbf5e7879..14a6e21e54bbe1 100644
--- a/src/ast.c
+++ b/src/ast.c
@@ -19,80 +19,105 @@
 extern "C" {
 #endif
 
-// MSVC complains about "julia_flisp.boot.inc : error C4335: Mac file format
-// detected: please convert the source file to either DOS or UNIX format"
-#ifdef _MSC_VER
-#pragma warning(disable:4335)
-#endif
-
-
 // head symbols for each expression type
-jl_sym_t *call_sym;    jl_sym_t *invoke_sym;
-jl_sym_t *empty_sym;   jl_sym_t *top_sym;
-jl_sym_t *module_sym;  jl_sym_t *slot_sym;
-jl_sym_t *export_sym;  jl_sym_t *import_sym;
-jl_sym_t *toplevel_sym; jl_sym_t *quote_sym;
-jl_sym_t *line_sym;    jl_sym_t *jl_incomplete_sym;
-jl_sym_t *goto_sym;    jl_sym_t *goto_ifnot_sym;
-jl_sym_t *return_sym;
-jl_sym_t *lambda_sym;  jl_sym_t *assign_sym;
-jl_sym_t *globalref_sym; jl_sym_t *do_sym;
-jl_sym_t *method_sym;  jl_sym_t *core_sym;
-jl_sym_t *enter_sym;   jl_sym_t *leave_sym;
-jl_sym_t *pop_exception_sym;
-jl_sym_t *exc_sym;     jl_sym_t *error_sym;
-jl_sym_t *new_sym;     jl_sym_t *using_sym;
-jl_sym_t *splatnew_sym;
-jl_sym_t *const_sym;   jl_sym_t *thunk_sym;
-jl_sym_t *foreigncall_sym;
-jl_sym_t *global_sym; jl_sym_t *list_sym;
-jl_sym_t *dot_sym;    jl_sym_t *newvar_sym;
-jl_sym_t *boundscheck_sym; jl_sym_t *inbounds_sym;
-jl_sym_t *copyast_sym; jl_sym_t *cfunction_sym;
-jl_sym_t *pure_sym; jl_sym_t *loopinfo_sym;
-jl_sym_t *meta_sym; jl_sym_t *inert_sym;
-jl_sym_t *polly_sym; jl_sym_t *unused_sym;
-jl_sym_t *static_parameter_sym; jl_sym_t *inline_sym;
-jl_sym_t *noinline_sym; jl_sym_t *generated_sym;
-jl_sym_t *generated_only_sym; jl_sym_t *isdefined_sym;
-jl_sym_t *propagate_inbounds_sym; jl_sym_t *specialize_sym;
-jl_sym_t *nospecialize_sym; jl_sym_t *macrocall_sym;
-jl_sym_t *colon_sym; jl_sym_t *hygienicscope_sym;
-jl_sym_t *throw_undef_if_not_sym; jl_sym_t *getfield_undefref_sym;
-jl_sym_t *gc_preserve_begin_sym; jl_sym_t *gc_preserve_end_sym;
-jl_sym_t *coverageeffect_sym; jl_sym_t *escape_sym;
-jl_sym_t *aliasscope_sym; jl_sym_t *popaliasscope_sym;
-jl_sym_t *optlevel_sym; jl_sym_t *thismodule_sym;
-jl_sym_t *atom_sym; jl_sym_t *statement_sym; jl_sym_t *all_sym;
-jl_sym_t *compile_sym; jl_sym_t *infer_sym;
-
-static uint8_t flisp_system_image[] = {
+JL_DLLEXPORT jl_sym_t *jl_call_sym;
+JL_DLLEXPORT jl_sym_t *jl_invoke_sym;
+JL_DLLEXPORT jl_sym_t *jl_invoke_modify_sym;
+JL_DLLEXPORT jl_sym_t *jl_empty_sym;
+JL_DLLEXPORT jl_sym_t *jl_top_sym;
+JL_DLLEXPORT jl_sym_t *jl_module_sym;
+JL_DLLEXPORT jl_sym_t *jl_slot_sym;
+JL_DLLEXPORT jl_sym_t *jl_export_sym;
+JL_DLLEXPORT jl_sym_t *jl_import_sym;
+JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
+JL_DLLEXPORT jl_sym_t *jl_quote_sym;
+JL_DLLEXPORT jl_sym_t *jl_line_sym;
+JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
+JL_DLLEXPORT jl_sym_t *jl_goto_sym;
+JL_DLLEXPORT jl_sym_t *jl_goto_ifnot_sym;
+JL_DLLEXPORT jl_sym_t *jl_return_sym;
+JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
+JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
+JL_DLLEXPORT jl_sym_t *jl_assign_sym;
+JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
+JL_DLLEXPORT jl_sym_t *jl_do_sym;
+JL_DLLEXPORT jl_sym_t *jl_method_sym;
+JL_DLLEXPORT jl_sym_t *jl_core_sym;
+JL_DLLEXPORT jl_sym_t *jl_enter_sym;
+JL_DLLEXPORT jl_sym_t *jl_leave_sym;
+JL_DLLEXPORT jl_sym_t *jl_pop_exception_sym;
+JL_DLLEXPORT jl_sym_t *jl_exc_sym;
+JL_DLLEXPORT jl_sym_t *jl_error_sym;
+JL_DLLEXPORT jl_sym_t *jl_new_sym;
+JL_DLLEXPORT jl_sym_t *jl_using_sym;
+JL_DLLEXPORT jl_sym_t *jl_splatnew_sym;
+JL_DLLEXPORT jl_sym_t *jl_block_sym;
+JL_DLLEXPORT jl_sym_t *jl_new_opaque_closure_sym;
+JL_DLLEXPORT jl_sym_t *jl_opaque_closure_method_sym;
+JL_DLLEXPORT jl_sym_t *jl_const_sym;
+JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
+JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
+JL_DLLEXPORT jl_sym_t *jl_as_sym;
+JL_DLLEXPORT jl_sym_t *jl_global_sym;
+JL_DLLEXPORT jl_sym_t *jl_list_sym;
+JL_DLLEXPORT jl_sym_t *jl_dot_sym;
+JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
+JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
+JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
+JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
+JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
+JL_DLLEXPORT jl_sym_t *jl_pure_sym;
+JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
+JL_DLLEXPORT jl_sym_t *jl_meta_sym;
+JL_DLLEXPORT jl_sym_t *jl_inert_sym;
+JL_DLLEXPORT jl_sym_t *jl_polly_sym;
+JL_DLLEXPORT jl_sym_t *jl_unused_sym;
+JL_DLLEXPORT jl_sym_t *jl_static_parameter_sym;
+JL_DLLEXPORT jl_sym_t *jl_inline_sym;
+JL_DLLEXPORT jl_sym_t *jl_noinline_sym;
+JL_DLLEXPORT jl_sym_t *jl_generated_sym;
+JL_DLLEXPORT jl_sym_t *jl_generated_only_sym;
+JL_DLLEXPORT jl_sym_t *jl_isdefined_sym;
+JL_DLLEXPORT jl_sym_t *jl_propagate_inbounds_sym;
+JL_DLLEXPORT jl_sym_t *jl_specialize_sym;
+JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
+JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
+JL_DLLEXPORT jl_sym_t *jl_purity_sym;
+JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
+JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
+JL_DLLEXPORT jl_sym_t *jl_colon_sym;
+JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
+JL_DLLEXPORT jl_sym_t *jl_throw_undef_if_not_sym;
+JL_DLLEXPORT jl_sym_t *jl_getfield_undefref_sym;
+JL_DLLEXPORT jl_sym_t *jl_gc_preserve_begin_sym;
+JL_DLLEXPORT jl_sym_t *jl_gc_preserve_end_sym;
+JL_DLLEXPORT jl_sym_t *jl_coverageeffect_sym;
+JL_DLLEXPORT jl_sym_t *jl_escape_sym;
+JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
+JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
+JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
+JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+JL_DLLEXPORT jl_sym_t *jl_atom_sym;
+JL_DLLEXPORT jl_sym_t *jl_statement_sym;
+JL_DLLEXPORT jl_sym_t *jl_all_sym;
+JL_DLLEXPORT jl_sym_t *jl_compile_sym;
+JL_DLLEXPORT jl_sym_t *jl_force_compile_sym;
+JL_DLLEXPORT jl_sym_t *jl_infer_sym;
+JL_DLLEXPORT jl_sym_t *jl_max_methods_sym;
+JL_DLLEXPORT jl_sym_t *jl_atomic_sym;
+JL_DLLEXPORT jl_sym_t *jl_not_atomic_sym;
+JL_DLLEXPORT jl_sym_t *jl_unordered_sym;
+JL_DLLEXPORT jl_sym_t *jl_monotonic_sym;
+JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
+JL_DLLEXPORT jl_sym_t *jl_release_sym;
+JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
+JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+
+
+static const uint8_t flisp_system_image[] = {
 #include <julia_flisp.boot.inc>
 };
 
-typedef struct _jl_ast_context_list_t {
-    struct _jl_ast_context_list_t *next;
-    struct _jl_ast_context_list_t **prev;
-} jl_ast_context_list_t;
-
-STATIC_INLINE void jl_ast_context_list_insert(jl_ast_context_list_t **head,
-                                              jl_ast_context_list_t *node) JL_NOTSAFEPOINT
-{
-    jl_ast_context_list_t *next = *head;
-    if (next)
-        next->prev = &node->next;
-    node->next = next;
-    node->prev = head;
-    *head = node;
-}
-
-STATIC_INLINE void jl_ast_context_list_delete(jl_ast_context_list_t *node) JL_NOTSAFEPOINT
-{
-    if (node->next)
-        node->next->prev = node->prev;
-    *node->prev = node->next;
-}
-
 typedef struct _jl_ast_context_t {
     fl_context_t fl;
     fltype_t *jvtype;
@@ -103,36 +128,26 @@ typedef struct _jl_ast_context_t {
     value_t null_sym;
     value_t ssavalue_sym;
     value_t slot_sym;
-    jl_ast_context_list_t list;
-    int ref;
-    jl_task_t *task; // the current owner (user) of this jl_ast_context_t
     jl_module_t *module; // context module for `current-julia-module-counter`
+    struct _jl_ast_context_t *next; // invasive list pointer for getting free contexts
 } jl_ast_context_t;
 
 static jl_ast_context_t jl_ast_main_ctx;
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 jl_ast_context_t *jl_ast_ctx(fl_context_t *fl) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #else
 #define jl_ast_ctx(fl_ctx) container_of(fl_ctx, jl_ast_context_t, fl)
 #endif
-#define jl_ast_context_list_item(node)          \
-    container_of(node, jl_ast_context_t, list)
 
 struct macroctx_stack {
     jl_module_t *m;
     struct macroctx_stack *parent;
 };
 
-#define JL_AST_PRESERVE_PUSH(ctx, old, inmodule)  \
-    jl_module_t *(old) = ctx->module;           \
-    ctx->module = (inmodule)
-#define JL_AST_PRESERVE_POP(ctx, old)           \
-    ctx->module = (old)
-
 static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod);
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v);
-static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel);
+static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error);
 
 static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
@@ -145,23 +160,32 @@ static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint
     return (b != NULL && b->owner == ctx->module) ? fl_ctx->T : fl_ctx->F;
 }
 
-static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+static value_t fl_current_module_counter(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
 {
     jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
     assert(ctx->module);
     return fixnum(jl_module_next_counter(ctx->module));
 }
 
-static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+static value_t fl_julia_current_file(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
 {
     return symbol(fl_ctx, jl_filename);
 }
 
-static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+static value_t fl_julia_current_line(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) JL_NOTSAFEPOINT
 {
     return fixnum(jl_lineno);
 }
 
+static int jl_is_number(jl_value_t *v)
+{
+    jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
+    for (; t->super != t; t = t->super)
+        if (t == jl_number_type)
+            return 1;
+    return 0;
+}
+
 // Check whether v is a scalar for purposes of inlining fused-broadcast
 // arguments when lowering; should agree with broadcast.jl on what is a
 // scalar.  When in doubt, return false, since this is only an optimization.
@@ -172,7 +196,7 @@ static value_t fl_julia_scalar(fl_context_t *fl_ctx, value_t *args, uint32_t nar
         return fl_ctx->T;
     else if (iscvalue(args[0]) && fl_ctx->jl_sym == cv_type((cvalue_t*)ptr(args[0]))) {
         jl_value_t *v = *(jl_value_t**)cptr(args[0]);
-        if (jl_isa(v,(jl_value_t*)jl_number_type) || jl_is_string(v))
+        if (jl_is_number(v) || jl_is_string(v))
             return fl_ctx->T;
     }
     return fl_ctx->F;
@@ -180,59 +204,18 @@ static value_t fl_julia_scalar(fl_context_t *fl_ctx, value_t *args, uint32_t nar
 
 static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *mod);
 
-static value_t fl_julia_logmsg(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
-{
-    int kwargs_len = (int)nargs - 6;
-    if (nargs < 6 || kwargs_len % 2 != 0) {
-        lerror(fl_ctx, fl_ctx->ArgError, "julia-logmsg: bad argument list - expected "
-               "level (symbol) group (symbol) id file line msg . kwargs");
-    }
-    value_t arg_level = args[0];
-    value_t arg_group = args[1];
-    value_t arg_id    = args[2];
-    value_t arg_file  = args[3];
-    value_t arg_line  = args[4];
-    value_t arg_msg   = args[5];
-    value_t *arg_kwargs = args + 6;
-    if (!isfixnum(arg_level) || !issymbol(arg_group) || !issymbol(arg_id) ||
-        !issymbol(arg_file) || !isfixnum(arg_line) || !fl_isstring(fl_ctx, arg_msg)) {
-        lerror(fl_ctx, fl_ctx->ArgError,
-               "julia-logmsg: Unexpected type in argument list");
-    }
-
-    // Abuse scm_to_julia here to convert arguments.  This is meant for `Expr`s
-    // but should be good enough provided we're only passing simple numbers,
-    // symbols and strings.
-    jl_value_t *group=NULL, *id=NULL, *file=NULL, *line=NULL, *msg=NULL;
-    jl_array_t *kwargs=NULL;
-    JL_GC_PUSH6(&group, &id, &file, &line, &msg, &kwargs);
-    group = scm_to_julia(fl_ctx, arg_group, NULL);
-    id    = scm_to_julia(fl_ctx, arg_id, NULL);
-    file  = scm_to_julia(fl_ctx, arg_file, NULL);
-    line  = scm_to_julia(fl_ctx, arg_line, NULL);
-    msg   = scm_to_julia(fl_ctx, arg_msg, NULL);
-    kwargs = jl_alloc_vec_any(kwargs_len);
-    for (int i = 0; i < kwargs_len; ++i) {
-        jl_array_ptr_set(kwargs, i, scm_to_julia(fl_ctx, arg_kwargs[i], NULL));
-    }
-    jl_log(numval(arg_level), NULL, group, id, file, line, (jl_value_t*)kwargs, msg);
-    JL_GC_POP();
-    return fl_ctx->T;
-}
-
 static const builtinspec_t julia_flisp_ast_ext[] = {
-    { "defined-julia-global", fl_defined_julia_global },
+    { "defined-julia-global", fl_defined_julia_global }, // TODO: can we kill this safepoint
     { "current-julia-module-counter", fl_current_module_counter },
     { "julia-scalar?", fl_julia_scalar },
-    { "julia-logmsg", fl_julia_logmsg },
     { "julia-current-file", fl_julia_current_file },
     { "julia-current-line", fl_julia_current_line },
     { NULL, NULL }
 };
 
-static void jl_init_ast_ctx(jl_ast_context_t *ast_ctx) JL_NOTSAFEPOINT
+static void jl_init_ast_ctx(jl_ast_context_t *ctx) JL_NOTSAFEPOINT
 {
-    fl_context_t *fl_ctx = &ast_ctx->fl;
+    fl_context_t *fl_ctx = &ctx->fl;
     fl_init(fl_ctx, 4*1024*1024);
 
     if (fl_load_system_image_str(fl_ctx, (char*)flisp_system_image,
@@ -242,7 +225,6 @@ static void jl_init_ast_ctx(jl_ast_context_t *ast_ctx) JL_NOTSAFEPOINT
 
     fl_applyn(fl_ctx, 0, symbol_value(symbol(fl_ctx, "__init_globals")));
 
-    jl_ast_context_t *ctx = jl_ast_ctx(fl_ctx);
     ctx->jvtype = define_opaque_type(fl_ctx->jl_sym, sizeof(void*), NULL, NULL);
     assign_global_builtins(fl_ctx, julia_flisp_ast_ext);
     ctx->true_sym = symbol(fl_ctx, "true");
@@ -251,76 +233,48 @@ static void jl_init_ast_ctx(jl_ast_context_t *ast_ctx) JL_NOTSAFEPOINT
     ctx->null_sym = symbol(fl_ctx, "null");
     ctx->ssavalue_sym = symbol(fl_ctx, "ssavalue");
     ctx->slot_sym = symbol(fl_ctx, "slot");
-    ctx->task = NULL;
     ctx->module = NULL;
-    set(symbol(fl_ctx, "*depwarn-opt*"), fixnum(jl_options.depwarn));
     set(symbol(fl_ctx, "*scopewarn-opt*"), fixnum(jl_options.warn_scope));
 }
 
 // There should be no GC allocation while holding this lock
-static jl_mutex_t flisp_lock;
-static jl_ast_context_list_t *jl_ast_ctx_using = NULL;
-static jl_ast_context_list_t *jl_ast_ctx_freed = NULL;
+static uv_mutex_t flisp_lock;
+static jl_ast_context_t *jl_ast_ctx_freed = NULL;
 
-static jl_ast_context_t *jl_ast_ctx_enter(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
+static jl_ast_context_t *jl_ast_ctx_enter(jl_module_t *m) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
     JL_SIGATOMIC_BEGIN();
-    JL_LOCK_NOGC(&flisp_lock);
-    jl_ast_context_list_t *node;
-    jl_ast_context_t *ctx;
-    // First check if the current task is using one of the contexts
-    for (node = jl_ast_ctx_using;node;(node = node->next)) {
-        ctx = jl_ast_context_list_item(node);
-        if (ctx->task == ptls->current_task) {
-            ctx->ref++;
-            JL_UNLOCK_NOGC(&flisp_lock);
-            return ctx;
-        }
+    uv_mutex_lock(&flisp_lock);
+    jl_ast_context_t *ctx = jl_ast_ctx_freed;
+    if (ctx != NULL) {
+        jl_ast_ctx_freed = ctx->next;
+        ctx->next = NULL;
     }
-    // If not, grab one from the free list
-    if ((node = jl_ast_ctx_freed)) {
-        jl_ast_context_list_delete(node);
-        jl_ast_context_list_insert(&jl_ast_ctx_using, node);
-        ctx = jl_ast_context_list_item(node);
-        ctx->ref = 1;
-        ctx->task = ptls->current_task;
-        ctx->module = NULL;
-        JL_UNLOCK_NOGC(&flisp_lock);
-        return ctx;
+    uv_mutex_unlock(&flisp_lock);
+    if (ctx == NULL) {
+        // Construct a new one if we can't find any
+        ctx = (jl_ast_context_t*)calloc(1, sizeof(jl_ast_context_t));
+        jl_init_ast_ctx(ctx);
     }
-    // Construct a new one if we can't find any
-    ctx = (jl_ast_context_t*)calloc(1, sizeof(jl_ast_context_t));
-    ctx->ref = 1;
-    ctx->task = ptls->current_task;
-    node = &ctx->list;
-    jl_ast_context_list_insert(&jl_ast_ctx_using, node);
-    JL_UNLOCK_NOGC(&flisp_lock);
-    jl_init_ast_ctx(ctx);
+    ctx->module = m;
     return ctx;
 }
 
 static void jl_ast_ctx_leave(jl_ast_context_t *ctx)
 {
+    uv_mutex_lock(&flisp_lock);
+    ctx->module = NULL;
+    ctx->next = jl_ast_ctx_freed;
+    jl_ast_ctx_freed = ctx;
+    uv_mutex_unlock(&flisp_lock);
     JL_SIGATOMIC_END();
-    if (--ctx->ref)
-        return;
-    JL_LOCK_NOGC(&flisp_lock);
-    ctx->task = NULL;
-    jl_ast_context_list_t *node = &ctx->list;
-    jl_ast_context_list_delete(node);
-    jl_ast_context_list_insert(&jl_ast_ctx_freed, node);
-    JL_UNLOCK_NOGC(&flisp_lock);
 }
 
 void jl_init_flisp(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (jl_ast_ctx_using || jl_ast_ctx_freed)
+    if (jl_ast_ctx_freed)
         return;
-    jl_ast_main_ctx.ref = 1;
-    jl_ast_main_ctx.task = ptls->current_task;
-    jl_ast_context_list_insert(&jl_ast_ctx_using, &jl_ast_main_ctx.list);
+    uv_mutex_init(&flisp_lock);
     jl_init_ast_ctx(&jl_ast_main_ctx);
     // To match the one in jl_ast_ctx_leave
     JL_SIGATOMIC_BEGIN();
@@ -329,79 +283,98 @@ void jl_init_flisp(void)
 
 void jl_init_common_symbols(void)
 {
-    empty_sym = jl_symbol("");
-    call_sym = jl_symbol("call");
-    invoke_sym = jl_symbol("invoke");
-    foreigncall_sym = jl_symbol("foreigncall");
-    cfunction_sym = jl_symbol("cfunction");
-    quote_sym = jl_symbol("quote");
-    inert_sym = jl_symbol("inert");
-    top_sym = jl_symbol("top");
-    core_sym = jl_symbol("core");
-    globalref_sym = jl_symbol("globalref");
-    line_sym = jl_symbol("line");
+    jl_empty_sym = jl_symbol("");
+    jl_call_sym = jl_symbol("call");
+    jl_invoke_sym = jl_symbol("invoke");
+    jl_invoke_modify_sym = jl_symbol("invoke_modify");
+    jl_foreigncall_sym = jl_symbol("foreigncall");
+    jl_cfunction_sym = jl_symbol("cfunction");
+    jl_quote_sym = jl_symbol("quote");
+    jl_inert_sym = jl_symbol("inert");
+    jl_top_sym = jl_symbol("top");
+    jl_core_sym = jl_symbol("core");
+    jl_globalref_sym = jl_symbol("globalref");
+    jl_line_sym = jl_symbol("line");
+    jl_lineinfo_sym = jl_symbol("lineinfo");
     jl_incomplete_sym = jl_symbol("incomplete");
-    error_sym = jl_symbol("error");
-    goto_sym = jl_symbol("goto");
-    goto_ifnot_sym = jl_symbol("gotoifnot");
-    return_sym = jl_symbol("return");
-    lambda_sym = jl_symbol("lambda");
-    module_sym = jl_symbol("module");
-    export_sym = jl_symbol("export");
-    import_sym = jl_symbol("import");
-    using_sym = jl_symbol("using");
-    assign_sym = jl_symbol("=");
-    method_sym = jl_symbol("method");
-    exc_sym = jl_symbol("the_exception");
-    enter_sym = jl_symbol("enter");
-    leave_sym = jl_symbol("leave");
-    pop_exception_sym = jl_symbol("pop_exception");
-    new_sym = jl_symbol("new");
-    splatnew_sym = jl_symbol("splatnew");
-    const_sym = jl_symbol("const");
-    global_sym = jl_symbol("global");
-    thunk_sym = jl_symbol("thunk");
-    toplevel_sym = jl_symbol("toplevel");
-    dot_sym = jl_symbol(".");
-    colon_sym = jl_symbol(":");
-    boundscheck_sym = jl_symbol("boundscheck");
-    inbounds_sym = jl_symbol("inbounds");
-    newvar_sym = jl_symbol("newvar");
-    copyast_sym = jl_symbol("copyast");
-    loopinfo_sym = jl_symbol("loopinfo");
-    pure_sym = jl_symbol("pure");
-    meta_sym = jl_symbol("meta");
-    list_sym = jl_symbol("list");
-    unused_sym = jl_symbol("#unused#");
-    slot_sym = jl_symbol("slot");
-    static_parameter_sym = jl_symbol("static_parameter");
-    inline_sym = jl_symbol("inline");
-    noinline_sym = jl_symbol("noinline");
-    polly_sym = jl_symbol("polly");
-    propagate_inbounds_sym = jl_symbol("propagate_inbounds");
-    isdefined_sym = jl_symbol("isdefined");
-    nospecialize_sym = jl_symbol("nospecialize");
-    specialize_sym = jl_symbol("specialize");
-    optlevel_sym = jl_symbol("optlevel");
-    compile_sym = jl_symbol("compile");
-    infer_sym = jl_symbol("infer");
-    macrocall_sym = jl_symbol("macrocall");
-    escape_sym = jl_symbol("escape");
-    hygienicscope_sym = jl_symbol("hygienic-scope");
-    gc_preserve_begin_sym = jl_symbol("gc_preserve_begin");
-    gc_preserve_end_sym = jl_symbol("gc_preserve_end");
-    generated_sym = jl_symbol("generated");
-    generated_only_sym = jl_symbol("generated_only");
-    throw_undef_if_not_sym = jl_symbol("throw_undef_if_not");
-    getfield_undefref_sym = jl_symbol("##getfield##");
-    do_sym = jl_symbol("do");
-    coverageeffect_sym = jl_symbol("code_coverage_effect");
-    aliasscope_sym = jl_symbol("aliasscope");
-    popaliasscope_sym = jl_symbol("popaliasscope");
-    thismodule_sym = jl_symbol("thismodule");
-    atom_sym = jl_symbol("atom");
-    statement_sym = jl_symbol("statement");
-    all_sym = jl_symbol("all");
+    jl_error_sym = jl_symbol("error");
+    jl_goto_sym = jl_symbol("goto");
+    jl_goto_ifnot_sym = jl_symbol("gotoifnot");
+    jl_return_sym = jl_symbol("return");
+    jl_lambda_sym = jl_symbol("lambda");
+    jl_module_sym = jl_symbol("module");
+    jl_export_sym = jl_symbol("export");
+    jl_import_sym = jl_symbol("import");
+    jl_using_sym = jl_symbol("using");
+    jl_assign_sym = jl_symbol("=");
+    jl_method_sym = jl_symbol("method");
+    jl_exc_sym = jl_symbol("the_exception");
+    jl_enter_sym = jl_symbol("enter");
+    jl_leave_sym = jl_symbol("leave");
+    jl_pop_exception_sym = jl_symbol("pop_exception");
+    jl_new_sym = jl_symbol("new");
+    jl_splatnew_sym = jl_symbol("splatnew");
+    jl_new_opaque_closure_sym = jl_symbol("new_opaque_closure");
+    jl_opaque_closure_method_sym = jl_symbol("opaque_closure_method");
+    jl_const_sym = jl_symbol("const");
+    jl_global_sym = jl_symbol("global");
+    jl_thunk_sym = jl_symbol("thunk");
+    jl_toplevel_sym = jl_symbol("toplevel");
+    jl_dot_sym = jl_symbol(".");
+    jl_as_sym = jl_symbol("as");
+    jl_colon_sym = jl_symbol(":");
+    jl_boundscheck_sym = jl_symbol("boundscheck");
+    jl_inbounds_sym = jl_symbol("inbounds");
+    jl_newvar_sym = jl_symbol("newvar");
+    jl_copyast_sym = jl_symbol("copyast");
+    jl_loopinfo_sym = jl_symbol("loopinfo");
+    jl_pure_sym = jl_symbol("pure");
+    jl_meta_sym = jl_symbol("meta");
+    jl_list_sym = jl_symbol("list");
+    jl_unused_sym = jl_symbol("#unused#");
+    jl_slot_sym = jl_symbol("slot");
+    jl_static_parameter_sym = jl_symbol("static_parameter");
+    jl_inline_sym = jl_symbol("inline");
+    jl_noinline_sym = jl_symbol("noinline");
+    jl_polly_sym = jl_symbol("polly");
+    jl_propagate_inbounds_sym = jl_symbol("propagate_inbounds");
+    jl_aggressive_constprop_sym = jl_symbol("aggressive_constprop");
+    jl_no_constprop_sym = jl_symbol("no_constprop");
+    jl_purity_sym = jl_symbol("purity");
+    jl_isdefined_sym = jl_symbol("isdefined");
+    jl_nospecialize_sym = jl_symbol("nospecialize");
+    jl_specialize_sym = jl_symbol("specialize");
+    jl_optlevel_sym = jl_symbol("optlevel");
+    jl_compile_sym = jl_symbol("compile");
+    jl_force_compile_sym = jl_symbol("force_compile");
+    jl_infer_sym = jl_symbol("infer");
+    jl_max_methods_sym = jl_symbol("max_methods");
+    jl_macrocall_sym = jl_symbol("macrocall");
+    jl_escape_sym = jl_symbol("escape");
+    jl_hygienicscope_sym = jl_symbol("hygienic-scope");
+    jl_gc_preserve_begin_sym = jl_symbol("gc_preserve_begin");
+    jl_gc_preserve_end_sym = jl_symbol("gc_preserve_end");
+    jl_generated_sym = jl_symbol("generated");
+    jl_generated_only_sym = jl_symbol("generated_only");
+    jl_throw_undef_if_not_sym = jl_symbol("throw_undef_if_not");
+    jl_getfield_undefref_sym = jl_symbol("##getfield##");
+    jl_do_sym = jl_symbol("do");
+    jl_coverageeffect_sym = jl_symbol("code_coverage_effect");
+    jl_aliasscope_sym = jl_symbol("aliasscope");
+    jl_popaliasscope_sym = jl_symbol("popaliasscope");
+    jl_thismodule_sym = jl_symbol("thismodule");
+    jl_block_sym = jl_symbol("block");
+    jl_atom_sym = jl_symbol("atom");
+    jl_statement_sym = jl_symbol("statement");
+    jl_all_sym = jl_symbol("all");
+    jl_atomic_sym = jl_symbol("atomic");
+    jl_not_atomic_sym = jl_symbol("not_atomic");
+    jl_unordered_sym = jl_symbol("unordered");
+    jl_monotonic_sym = jl_symbol("monotonic");
+    jl_acquire_sym = jl_symbol("acquire");
+    jl_release_sym = jl_symbol("release");
+    jl_acquire_release_sym = jl_symbol("acquire_release");
+    jl_sequentially_consistent_sym = jl_symbol("sequentially_consistent");
 }
 
 JL_DLLEXPORT void jl_lisp_prompt(void)
@@ -410,17 +383,15 @@ JL_DLLEXPORT void jl_lisp_prompt(void)
     // We don't have our signal handler registered in that case anyway...
     JL_SIGATOMIC_BEGIN();
     jl_init_flisp();
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, jl_main_module);
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(jl_main_module);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "__start")), fl_cons(fl_ctx, fl_ctx->NIL,fl_ctx->NIL));
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
 }
 
 JL_DLLEXPORT void fl_show_profile(void)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 0, symbol_value(symbol(fl_ctx, "show-profiles")));
     jl_ast_ctx_leave(ctx);
@@ -428,7 +399,7 @@ JL_DLLEXPORT void fl_show_profile(void)
 
 JL_DLLEXPORT void fl_clear_profile(void)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 0, symbol_value(symbol(fl_ctx, "clear-profiles")));
     jl_ast_ctx_leave(ctx);
@@ -436,7 +407,7 @@ JL_DLLEXPORT void fl_clear_profile(void)
 
 JL_DLLEXPORT void fl_profile(const char *fname)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "profile-e")), symbol(fl_ctx, fname));
     jl_ast_ctx_leave(ctx);
@@ -465,7 +436,7 @@ static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mo
     }
     JL_CATCH {
         // if expression cannot be converted, replace with error expr
-        jl_expr_t *ex = jl_exprn(error_sym, 1);
+        jl_expr_t *ex = jl_exprn(jl_error_sym, 1);
         v = (jl_value_t*)ex;
         jl_array_ptr_set(ex->args, 0, jl_cstr_to_string("invalid AST"));
     }
@@ -539,7 +510,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
         if (issymbol(hd))
             sym = scmsym_to_julia(fl_ctx, hd);
         else
-            sym = list_sym;
+            sym = jl_list_sym;
         size_t n = llength(e)-1;
         if (issymbol(hd))
             e = cdr_(e);
@@ -547,7 +518,7 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             n++;
         // nodes with special representations
         jl_value_t *ex = NULL, *temp = NULL;
-        if (sym == line_sym && (n == 1 || n == 2)) {
+        if (sym == jl_line_sym && (n == 1 || n == 2)) {
             jl_value_t *linenum = scm_to_julia_(fl_ctx, car_(e), mod);
             jl_value_t *file = jl_nothing;
             JL_GC_PUSH2(&linenum, &file);
@@ -557,52 +528,62 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             JL_GC_POP();
             return temp;
         }
+        else if (sym == jl_lineinfo_sym && n == 5) {
+            jl_value_t *modu=NULL, *name=NULL, *file=NULL, *linenum=NULL, *inlinedat=NULL;
+            JL_GC_PUSH5(&modu, &name, &file, &linenum, &inlinedat);
+            value_t lst = e;
+            modu = scm_to_julia_(fl_ctx, car_(lst), mod);
+            lst = cdr_(lst);
+            name = scm_to_julia_(fl_ctx, car_(lst), mod);
+            lst = cdr_(lst);
+            file = scm_to_julia_(fl_ctx, car_(lst), mod);
+            lst = cdr_(lst);
+            linenum = scm_to_julia_(fl_ctx, car_(lst), mod);
+            lst = cdr_(lst);
+            inlinedat = scm_to_julia_(fl_ctx, car_(lst), mod);
+            temp = jl_new_struct(jl_lineinfonode_type, modu, name, file, linenum, inlinedat);
+            JL_GC_POP();
+            return temp;
+        }
         JL_GC_PUSH2(&ex, &temp);
-        if (sym == goto_sym) {
+        if (sym == jl_goto_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_gotonode_type, ex);
         }
-        else if (sym == goto_ifnot_sym) {
+        else if (sym == jl_goto_ifnot_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = scm_to_julia(fl_ctx, car_(cdr_(e)), mod);
             temp = jl_new_struct(jl_gotoifnot_type, ex, temp);
         }
-        else if (sym == newvar_sym) {
+        else if (sym == jl_newvar_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_newvarnode_type, ex);
         }
-        else if (sym == globalref_sym) {
+        else if (sym == jl_globalref_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = scm_to_julia_(fl_ctx, car_(cdr_(e)), mod);
             assert(jl_is_module(ex));
             assert(jl_is_symbol(temp));
             temp = jl_module_globalref((jl_module_t*)ex, (jl_sym_t*)temp);
         }
-        else if (sym == top_sym) {
+        else if (sym == jl_top_sym) {
             assert(mod && "top should not be generated by the parser");
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             assert(jl_is_symbol(ex));
             temp = jl_module_globalref(jl_base_relative_to(mod), (jl_sym_t*)ex);
         }
-        else if (sym == core_sym) {
+        else if (sym == jl_core_sym) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             assert(jl_is_symbol(ex));
             temp = jl_module_globalref(jl_core_module, (jl_sym_t*)ex);
         }
-        else if (sym == thismodule_sym) {
+        else if (sym == jl_thismodule_sym) {
             temp = (jl_value_t*)mod;
         }
-        else if (iscons(e) && (sym == inert_sym || (sym == quote_sym && (!iscons(car_(e)))))) {
+        else if (iscons(e) && (sym == jl_inert_sym || (sym == jl_quote_sym && (!iscons(car_(e)))))) {
             ex = scm_to_julia_(fl_ctx, car_(e), mod);
             temp = jl_new_struct(jl_quotenode_type, ex);
         }
-        else if (sym == thunk_sym) {
-            ex = scm_to_julia_(fl_ctx, car_(e), mod);
-            assert(jl_is_code_info(ex));
-            jl_linenumber_to_lineinfo((jl_code_info_t*)ex, mod, (jl_value_t*)jl_symbol("top-level scope"));
-            temp = (jl_value_t*)jl_exprn(sym, 1);
-            jl_exprargset(temp, 0, ex);
-        }
         if (temp) {
             JL_GC_POP();
             return temp;
@@ -614,10 +595,10 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
             jl_array_ptr_set(((jl_expr_t*)ex)->args, i, scm_to_julia_(fl_ctx, car_(e), mod));
             e = cdr_(e);
         }
-        if (sym == lambda_sym)
+        if (sym == jl_lambda_sym)
             ex = (jl_value_t*)jl_new_code_info_from_ir((jl_expr_t*)ex);
         JL_GC_POP();
-        if (sym == list_sym)
+        if (sym == jl_list_sym)
             return (jl_value_t*)((jl_expr_t*)ex)->args;
         return (jl_value_t*)ex;
     }
@@ -640,14 +621,14 @@ static jl_value_t *scm_to_julia_(fl_context_t *fl_ctx, value_t e, jl_module_t *m
     jl_error("malformed tree");
 }
 
-static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v);
+static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_valid);
 
 static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v)
 {
     value_t temp;
     // need try/catch to reset GC handle stack in case of error
     FL_TRY_EXTERN(fl_ctx) {
-        temp = julia_to_scm_(fl_ctx, v);
+        temp = julia_to_scm_(fl_ctx, v, 1);
     }
     FL_CATCH_EXTERN(fl_ctx) {
         temp = fl_ctx->lasterror;
@@ -655,24 +636,22 @@ static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v)
     return temp;
 }
 
-static void array_to_list(fl_context_t *fl_ctx, jl_array_t *a, value_t *pv)
+static void array_to_list(fl_context_t *fl_ctx, jl_array_t *a, value_t *pv, int check_valid)
 {
-    if (jl_array_len(a) > 650000)
-        lerror(fl_ctx, symbol(fl_ctx, "error"), "expression too large");
     value_t temp;
     for(long i=jl_array_len(a)-1; i >= 0; i--) {
         *pv = fl_cons(fl_ctx, fl_ctx->NIL, *pv);
-        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a,i));
+        temp = julia_to_scm_(fl_ctx, jl_array_ptr_ref(a,i), check_valid);
         // note: must be separate statement
         car_(*pv) = temp;
     }
 }
 
-static value_t julia_to_list2(fl_context_t *fl_ctx, jl_value_t *a, jl_value_t *b)
+static value_t julia_to_list2(fl_context_t *fl_ctx, jl_value_t *a, jl_value_t *b, int check_valid)
 {
-    value_t sa = julia_to_scm_(fl_ctx, a);
+    value_t sa = julia_to_scm_(fl_ctx, a, check_valid);
     fl_gc_handle(fl_ctx, &sa);
-    value_t sb = julia_to_scm_(fl_ctx, b);
+    value_t sb = julia_to_scm_(fl_ctx, b, check_valid);
     value_t l = fl_list2(fl_ctx, sa, sb);
     fl_free_gc_handles(fl_ctx, 1);
     return l;
@@ -695,20 +674,22 @@ static int julia_to_scm_noalloc1(fl_context_t *fl_ctx, jl_value_t *v, value_t *r
     return 1;
 }
 
-static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v) JL_NOTSAFEPOINT
+static value_t julia_to_scm_noalloc2(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) JL_NOTSAFEPOINT
 {
     if (jl_is_long(v) && fits_fixnum(jl_unbox_long(v)))
         return fixnum(jl_unbox_long(v));
-    if (jl_is_ssavalue(v))
-        lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
-    if (jl_is_slot(v))
-        lerror(fl_ctx, symbol(fl_ctx, "error"), "Slot objects should not occur in an AST");
+    if (check_valid) {
+        if (jl_is_ssavalue(v))
+            lerror(fl_ctx, symbol(fl_ctx, "error"), "SSAValue objects should not occur in an AST");
+        if (jl_is_slot(v))
+            lerror(fl_ctx, symbol(fl_ctx, "error"), "Slot objects should not occur in an AST");
+    }
     value_t opaque = cvalue(fl_ctx, jl_ast_ctx(fl_ctx)->jvtype, sizeof(void*));
     *(jl_value_t**)cv_data((cvalue_t*)ptr(opaque)) = v;
     return opaque;
 }
 
-static value_t julia_to_scm_noalloc(fl_context_t *fl_ctx, jl_value_t *v) JL_NOTSAFEPOINT
+static value_t julia_to_scm_noalloc(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) JL_NOTSAFEPOINT
 {
     value_t retval;
     if (julia_to_scm_noalloc1(fl_ctx, v, &retval))
@@ -719,20 +700,20 @@ static value_t julia_to_scm_noalloc(fl_context_t *fl_ctx, jl_value_t *v) JL_NOTS
            !jl_typeis(v, jl_quotenode_type) &&
            !jl_typeis(v, jl_newvarnode_type) &&
            !jl_typeis(v, jl_globalref_type));
-    return julia_to_scm_noalloc2(fl_ctx, v);
+    return julia_to_scm_noalloc2(fl_ctx, v, check_valid);
 }
 
-static value_t julia_to_list2_noalloc(fl_context_t *fl_ctx, jl_value_t *a, jl_value_t *b) JL_NOTSAFEPOINT
+static value_t julia_to_list2_noalloc(fl_context_t *fl_ctx, jl_value_t *a, jl_value_t *b, int check_valid) JL_NOTSAFEPOINT
 {
-    value_t sa = julia_to_scm_noalloc(fl_ctx, a);
+    value_t sa = julia_to_scm_noalloc(fl_ctx, a, check_valid);
     fl_gc_handle(fl_ctx, &sa);
-    value_t sb = julia_to_scm_noalloc(fl_ctx, b);
+    value_t sb = julia_to_scm_noalloc(fl_ctx, b, check_valid);
     value_t l = fl_list2(fl_ctx, sa, sb);
     fl_free_gc_handles(fl_ctx, 1);
     return l;
 }
 
-static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v)
+static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_valid)
 {
     value_t retval;
     if (julia_to_scm_noalloc1(fl_ctx, v, &retval))
@@ -741,12 +722,14 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v)
         jl_expr_t *ex = (jl_expr_t*)v;
         value_t args = fl_ctx->NIL;
         fl_gc_handle(fl_ctx, &args);
-        array_to_list(fl_ctx, ex->args, &args);
-        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)ex->head);
-        if (ex->head == lambda_sym && jl_expr_nargs(ex)>0 && jl_is_array(jl_exprarg(ex,0))) {
+        if (jl_expr_nargs(ex) > 520000 && ex->head != jl_block_sym)
+            lerror(fl_ctx, symbol(fl_ctx, "error"), "expression too large");
+        array_to_list(fl_ctx, ex->args, &args, check_valid);
+        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)ex->head, check_valid);
+        if (ex->head == jl_lambda_sym && jl_expr_nargs(ex)>0 && jl_is_array(jl_exprarg(ex,0))) {
             value_t llist = fl_ctx->NIL;
             fl_gc_handle(fl_ctx, &llist);
-            array_to_list(fl_ctx, (jl_array_t*)jl_exprarg(ex,0), &llist);
+            array_to_list(fl_ctx, (jl_array_t*)jl_exprarg(ex,0), &llist, check_valid);
             car_(args) = llist;
             fl_free_gc_handles(fl_ctx, 1);
         }
@@ -760,40 +743,40 @@ static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v)
     if (jl_typeis(v, jl_linenumbernode_type)) {
         jl_value_t *file = jl_fieldref_noalloc(v,1);
         jl_value_t *line = jl_fieldref(v,0);
-        value_t args = julia_to_list2_noalloc(fl_ctx, line, file);
+        value_t args = julia_to_list2_noalloc(fl_ctx, line, file, check_valid);
         fl_gc_handle(fl_ctx, &args);
-        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)line_sym);
+        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)jl_line_sym, check_valid);
         value_t scmv = fl_cons(fl_ctx, hd, args);
         fl_free_gc_handles(fl_ctx, 1);
         return scmv;
     }
     if (jl_typeis(v, jl_gotonode_type))
-        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)goto_sym, jl_fieldref(v,0));
+        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_goto_sym, jl_fieldref(v,0), check_valid);
     if (jl_typeis(v, jl_quotenode_type))
-        return julia_to_list2(fl_ctx, (jl_value_t*)inert_sym, jl_fieldref_noalloc(v,0));
+        return julia_to_list2(fl_ctx, (jl_value_t*)jl_inert_sym, jl_fieldref_noalloc(v,0), 0);
     if (jl_typeis(v, jl_newvarnode_type))
-        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)newvar_sym, jl_fieldref(v,0));
+        return julia_to_list2_noalloc(fl_ctx, (jl_value_t*)jl_newvar_sym, jl_fieldref(v,0), check_valid);
     if (jl_typeis(v, jl_globalref_type)) {
         jl_module_t *m = jl_globalref_mod(v);
         jl_sym_t *sym = jl_globalref_name(v);
         if (m == jl_core_module)
-            return julia_to_list2(fl_ctx, (jl_value_t*)core_sym,
-                                  (jl_value_t*)sym);
-        value_t args = julia_to_list2(fl_ctx, (jl_value_t*)m, (jl_value_t*)sym);
+            return julia_to_list2(fl_ctx, (jl_value_t*)jl_core_sym,
+                                  (jl_value_t*)sym, check_valid);
+        value_t args = julia_to_list2(fl_ctx, (jl_value_t*)m, (jl_value_t*)sym, check_valid);
         fl_gc_handle(fl_ctx, &args);
-        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)globalref_sym);
+        value_t hd = julia_to_scm_(fl_ctx, (jl_value_t*)jl_globalref_sym, check_valid);
         value_t scmv = fl_cons(fl_ctx, hd, args);
         fl_free_gc_handles(fl_ctx, 1);
         return scmv;
     }
-    return julia_to_scm_noalloc2(fl_ctx, v);
+    return julia_to_scm_noalloc2(fl_ctx, v, check_valid);
 }
 
 // Parse `text` starting at 0-based `offset` and attributing the content to
 // `filename`. Return an svec of (parsed_expr, final_offset)
 JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
-                                     jl_value_t *filename, size_t offset,
-                                     jl_value_t *options)
+                                     jl_value_t *filename, size_t lineno,
+                                     size_t offset, jl_value_t *options)
 {
     JL_TIMING(PARSING);
     if (offset > text_len) {
@@ -802,14 +785,14 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
         jl_bounds_error(textstr, jl_box_long(offset+1));
     }
     jl_sym_t *rule = (jl_sym_t*)options;
-    if (rule != atom_sym && rule != statement_sym && rule != all_sym) {
+    if (rule != jl_atom_sym && rule != jl_statement_sym && rule != jl_all_sym) {
         jl_error("jl_fl_parse: unrecognized parse options");
     }
-    if (offset != 0 && rule == all_sym) {
+    if (offset != 0 && rule == jl_all_sym) {
         jl_error("Parse `all`: offset not supported");
     }
 
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     value_t fl_text = cvalue_static_cstrn(fl_ctx, text, text_len);
     fl_gc_handle(fl_ctx, &fl_text);
@@ -818,16 +801,16 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
     fl_gc_handle(fl_ctx, &fl_filename);
     value_t fl_expr;
     size_t offset1 = 0;
-    if (rule == all_sym) {
-        value_t e = fl_applyn(fl_ctx, 2, symbol_value(symbol(fl_ctx, "jl-parse-all")),
-                              fl_text, fl_filename);
+    if (rule == jl_all_sym) {
+        value_t e = fl_applyn(fl_ctx, 3, symbol_value(symbol(fl_ctx, "jl-parse-all")),
+                              fl_text, fl_filename, fixnum(lineno));
         fl_expr = e;
         offset1 = e == fl_ctx->FL_EOF ? text_len : 0;
     }
     else {
-        value_t greedy = rule == statement_sym ? fl_ctx->T : fl_ctx->F;
-        value_t p = fl_applyn(fl_ctx, 4, symbol_value(symbol(fl_ctx, "jl-parse-one")),
-                              fl_text, fl_filename, fixnum(offset), greedy);
+        value_t greedy = rule == jl_statement_sym ? fl_ctx->T : fl_ctx->F;
+        value_t p = fl_applyn(fl_ctx, 5, symbol_value(symbol(fl_ctx, "jl-parse-one")),
+                              fl_text, fl_filename, fixnum(offset), greedy, fixnum(lineno));
         fl_expr = car_(p);
         offset1 = tosize(fl_ctx, cdr_(p), "parse");
     }
@@ -847,14 +830,12 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
 // returns either an expression or a thunk
 jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
     value_t arg = julia_to_scm(fl_ctx, expr);
     value_t e = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, funcname)), arg);
     jl_value_t *result = scm_to_julia(fl_ctx, e, inmodule);
     JL_GC_PUSH1(&result);
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
     JL_GC_POP();
     return result;
@@ -863,15 +844,13 @@ jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module
 static jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *expr,
                                               jl_module_t *inmodule, const char *file, int line)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
     value_t arg = julia_to_scm(fl_ctx, expr);
     value_t e = fl_applyn(fl_ctx, 3, symbol_value(symbol(fl_ctx, funcname)), arg,
                           symbol(fl_ctx, file), fixnum(line));
     jl_value_t *result = scm_to_julia(fl_ctx, e, inmodule);
     JL_GC_PUSH1(&result);
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
     JL_GC_POP();
     return result;
@@ -881,7 +860,46 @@ static jl_value_t *jl_call_scm_on_ast_and_loc(const char *funcname, jl_value_t *
 
 JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
 {
-    if (expr && jl_is_expr(expr)) {
+    if (!expr)
+        return NULL;
+    if (jl_is_code_info(expr)) {
+        jl_code_info_t *new_ci = (jl_code_info_t *)expr;
+        jl_array_t *new_code = NULL;
+        JL_GC_PUSH2(&new_ci, &new_code);
+        new_ci = jl_copy_code_info(new_ci);
+        new_code = jl_array_copy(new_ci->code);
+        size_t clen = jl_array_len(new_code);
+        for (int i = 0; i < clen; ++i) {
+            jl_array_ptr_set(new_code, i, jl_copy_ast(
+                jl_array_ptr_ref(new_code, i)
+            ));
+        }
+        new_ci->code = new_code;
+        jl_gc_wb(new_ci, new_code);
+        new_ci->slotnames = jl_array_copy(new_ci->slotnames);
+        jl_gc_wb(new_ci, new_ci->slotnames);
+        new_ci->slotflags = jl_array_copy(new_ci->slotflags);
+        jl_gc_wb(new_ci, new_ci->slotflags);
+        new_ci->codelocs = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->codelocs);
+        jl_gc_wb(new_ci, new_ci->codelocs);
+        new_ci->linetable = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->linetable);
+        jl_gc_wb(new_ci, new_ci->linetable);
+        new_ci->ssaflags = jl_array_copy(new_ci->ssaflags);
+        jl_gc_wb(new_ci, new_ci->ssaflags);
+
+        if (new_ci->edges != jl_nothing) {
+            new_ci->edges = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->edges);
+            jl_gc_wb(new_ci, new_ci->edges);
+        }
+
+        if (jl_is_array(new_ci->ssavaluetypes)) {
+            new_ci->ssavaluetypes = (jl_value_t*)jl_array_copy((jl_array_t*)new_ci->ssavaluetypes);
+            jl_gc_wb(new_ci, new_ci->ssavaluetypes);
+        }
+        JL_GC_POP();
+        return (jl_value_t*)new_ci;
+    }
+    if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
         size_t i, l = jl_array_len(e->args);
         jl_expr_t *ne = jl_exprn(e->head, l);
@@ -893,12 +911,30 @@ JL_DLLEXPORT jl_value_t *jl_copy_ast(jl_value_t *expr)
         JL_GC_POP();
         return (jl_value_t*)ne;
     }
+    if (jl_is_phinode(expr)) {
+        jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(expr, 0);
+        jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 1);
+        JL_GC_PUSH2(&edges, &values);
+        edges = jl_array_copy(edges);
+        values = jl_array_copy(values);
+        jl_value_t *ret = jl_new_struct(jl_phinode_type, edges, values);
+        JL_GC_POP();
+        return ret;
+    }
+    if (jl_is_phicnode(expr)) {
+        jl_array_t *values = (jl_array_t*)jl_fieldref_noalloc(expr, 0);
+        JL_GC_PUSH1(&values);
+        values = jl_array_copy(values);
+        jl_value_t *ret = jl_new_struct(jl_phinode_type, values);
+        JL_GC_POP();
+        return ret;
+    }
     return expr;
 }
 
 JL_DLLEXPORT int jl_is_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "operator?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
@@ -907,7 +943,7 @@ JL_DLLEXPORT int jl_is_operator(char *sym)
 
 JL_DLLEXPORT int jl_is_unary_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "unary-op?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
@@ -916,16 +952,25 @@ JL_DLLEXPORT int jl_is_unary_operator(char *sym)
 
 JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "unary-and-binary-op?")), symbol(fl_ctx, sym)) == fl_ctx->T;
     jl_ast_ctx_leave(ctx);
     return res;
 }
 
+JL_DLLEXPORT int jl_is_syntactic_operator(char *sym)
+{
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
+    fl_context_t *fl_ctx = &ctx->fl;
+    int res = fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "syntactic-op?")), symbol(fl_ctx, sym)) == fl_ctx->T;
+    jl_ast_ctx_leave(ctx);
+    return res;
+}
+
 JL_DLLEXPORT int jl_operator_precedence(char *sym)
 {
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(NULL);
     fl_context_t *fl_ctx = &ctx->fl;
     int res = numval(fl_applyn(fl_ctx, 1, symbol_value(symbol(fl_ctx, "operator-precedence")), symbol(fl_ctx, sym)));
     jl_ast_ctx_leave(ctx);
@@ -937,7 +982,7 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
     size_t i, l = jl_array_len(body);
     for (i = 0; i < l; i++) {
         jl_expr_t *stmt = (jl_expr_t*)jl_array_ptr_ref(body, i);
-        if (jl_is_expr((jl_value_t*)stmt) && stmt->head == meta_sym) {
+        if (jl_is_expr((jl_value_t*)stmt) && stmt->head == jl_meta_sym) {
             size_t i, l = jl_array_len(stmt->args);
             for (i = 0; i < l; i++)
                 if (jl_array_ptr_ref(stmt->args, i) == (jl_value_t*)sym)
@@ -947,9 +992,9 @@ int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT
     return 0;
 }
 
-static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx)
+static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule, jl_module_t **ctx, size_t world, int throw_load_error)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     JL_TIMING(MACRO_INVOCATION);
     size_t nargs = jl_array_len(args) + 1;
     JL_NARGSV("macrocall", 3); // macro name, location, and module
@@ -967,9 +1012,10 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
     for (i = 3; i < nargs; i++)
         margs[i] = jl_array_ptr_ref(args, i - 1);
 
-    size_t last_age = ptls->world_age;
-    size_t world = jl_world_counter;
-    ptls->world_age = world;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    if (ct->world_age > world)
+        ct->world_age = world;
     jl_value_t *result;
     JL_TRY {
         margs[0] = jl_toplevel_eval(*ctx, margs[0]);
@@ -983,7 +1029,7 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
         result = jl_invoke(margs[0], &margs[1], nargs - 1, mfunc);
     }
     JL_CATCH {
-        if (jl_loaderror_type == NULL) {
+        if ((jl_loaderror_type == NULL) || !throw_load_error) {
             jl_rethrow();
         }
         else {
@@ -998,61 +1044,55 @@ static jl_value_t *jl_invoke_julia_macro(jl_array_t *args, jl_module_t *inmodule
                                            jl_current_exception()));
         }
     }
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     JL_GC_POP();
     return result;
 }
 
-static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel)
+static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error)
 {
     if (!expr || !jl_is_expr(expr))
         return expr;
     jl_expr_t *e = (jl_expr_t*)expr;
-    if (e->head == inert_sym ||
-        e->head == module_sym ||
-        //e->head == toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
-        e->head == meta_sym) {
+    if (e->head == jl_inert_sym ||
+        e->head == jl_module_sym ||
+        //e->head == jl_toplevel_sym || // TODO: enable this once julia-expand-macroscope is fixed / removed
+        e->head == jl_meta_sym) {
         return expr;
     }
-    if (e->head == quote_sym && jl_expr_nargs(e) == 1) {
+    if (e->head == jl_quote_sym && jl_expr_nargs(e) == 1) {
         expr = jl_call_scm_on_ast("julia-bq-macro", jl_exprarg(e, 0), inmodule);
         JL_GC_PUSH1(&expr);
-        if (macroctx) {
-            // in a macro, `quote` also implies `escape`
-            jl_expr_t *e2 = jl_exprn(escape_sym, 1);
-            jl_array_ptr_set(e2->args, 0, expr);
-            expr = (jl_value_t*)e2;
-        }
-        expr = jl_expand_macros(expr, inmodule, macroctx, onelevel);
+        expr = jl_expand_macros(expr, inmodule, macroctx, onelevel, world, throw_load_error);
         JL_GC_POP();
         return expr;
     }
-    if (e->head == hygienicscope_sym && jl_expr_nargs(e) == 2) {
+    if (e->head == jl_hygienicscope_sym && jl_expr_nargs(e) == 2) {
         struct macroctx_stack newctx;
         newctx.m = (jl_module_t*)jl_exprarg(e, 1);
         JL_TYPECHK(hygienic-scope, module, (jl_value_t*)newctx.m);
         newctx.parent = macroctx;
         jl_value_t *a = jl_exprarg(e, 0);
-        jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel);
+        jl_value_t *a2 = jl_expand_macros(a, inmodule, &newctx, onelevel, world, throw_load_error);
         if (a != a2)
             jl_array_ptr_set(e->args, 0, a2);
         return expr;
     }
-    if (e->head == macrocall_sym) {
+    if (e->head == jl_macrocall_sym) {
         struct macroctx_stack newctx;
         newctx.m = macroctx ? macroctx->m : inmodule;
         newctx.parent = macroctx;
-        jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m);
+        jl_value_t *result = jl_invoke_julia_macro(e->args, inmodule, &newctx.m, world, throw_load_error);
         jl_value_t *wrap = NULL;
         JL_GC_PUSH3(&result, &wrap, &newctx.m);
         // copy and wrap the result in `(hygienic-scope ,result ,newctx)
-        if (jl_is_expr(result) && ((jl_expr_t*)result)->head == escape_sym)
+        if (jl_is_expr(result) && ((jl_expr_t*)result)->head == jl_escape_sym)
             result = jl_exprarg(result, 0);
         else
-            wrap = (jl_value_t*)jl_exprn(hygienicscope_sym, 2);
+            wrap = (jl_value_t*)jl_exprn(jl_hygienicscope_sym, 2);
         result = jl_copy_ast(result);
         if (!onelevel)
-            result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel);
+            result = jl_expand_macros(result, inmodule, wrap ? &newctx : macroctx, onelevel, world, throw_load_error);
         if (wrap) {
             jl_exprargset(wrap, 0, result);
             jl_exprargset(wrap, 1, newctx.m);
@@ -1061,11 +1101,11 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         JL_GC_POP();
         return result;
     }
-    if (e->head == do_sym && jl_expr_nargs(e) == 2 && jl_is_expr(jl_exprarg(e, 0)) &&
-        ((jl_expr_t*)jl_exprarg(e, 0))->head == macrocall_sym) {
+    if (e->head == jl_do_sym && jl_expr_nargs(e) == 2 && jl_is_expr(jl_exprarg(e, 0)) &&
+        ((jl_expr_t*)jl_exprarg(e, 0))->head == jl_macrocall_sym) {
         jl_expr_t *mc = (jl_expr_t*)jl_exprarg(e, 0);
         size_t nm = jl_expr_nargs(mc);
-        jl_expr_t *mc2 = jl_exprn(macrocall_sym, nm+1);
+        jl_expr_t *mc2 = jl_exprn(jl_macrocall_sym, nm+1);
         JL_GC_PUSH1(&mc2);
         jl_exprargset(mc2, 0, jl_exprarg(mc, 0));  // macro name
         jl_exprargset(mc2, 1, jl_exprarg(mc, 1));  // location
@@ -1074,18 +1114,18 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str
         for (j = 2; j < nm; j++) {
             jl_exprargset(mc2, j+1, jl_exprarg(mc, j));
         }
-        jl_value_t *ret = jl_expand_macros((jl_value_t*)mc2, inmodule, macroctx, onelevel);
+        jl_value_t *ret = jl_expand_macros((jl_value_t*)mc2, inmodule, macroctx, onelevel, world, throw_load_error);
         JL_GC_POP();
         return ret;
     }
-    if (e->head == escape_sym && macroctx) {
+    if (e->head == jl_escape_sym && macroctx) {
         macroctx = macroctx->parent;
     }
 
     size_t i;
     for (i = 0; i < jl_array_len(e->args); i++) {
         jl_value_t *a = jl_array_ptr_ref(e->args, i);
-        jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel);
+        jl_value_t *a2 = jl_expand_macros(a, inmodule, macroctx, onelevel, world, throw_load_error);
         if (a != a2)
             jl_array_ptr_set(e->args, i, a2);
     }
@@ -1097,7 +1137,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand(jl_value_t *expr, jl_module_t *inmodule)
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, jl_atomic_load_acquire(&jl_world_counter), 0);
     expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
@@ -1108,7 +1148,7 @@ JL_DLLEXPORT jl_value_t *jl_macroexpand1(jl_value_t *expr, jl_module_t *inmodule
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 1);
+    expr = jl_expand_macros(expr, inmodule, NULL, 1, jl_atomic_load_acquire(&jl_world_counter), 0);
     expr = jl_call_scm_on_ast("jl-expand-macroscope", expr, inmodule);
     JL_GC_POP();
     return expr;
@@ -1123,11 +1163,18 @@ JL_DLLEXPORT jl_value_t *jl_expand(jl_value_t *expr, jl_module_t *inmodule)
 // Lowering, with starting program location specified
 JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmodule,
                                             const char *file, int line)
+{
+    return jl_expand_in_world(expr, inmodule, file, line, ~(size_t)0);
+}
+
+// Lowering, with starting program location and worldage specified
+JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
+                                            const char *file, int line, size_t world)
 {
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, world, 1);
     expr = jl_call_scm_on_ast_and_loc("jl-expand-to-thunk", expr, inmodule, file, line);
     JL_GC_POP();
     return expr;
@@ -1138,18 +1185,45 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *
                                                  const char *file, int line)
 {
     JL_TIMING(LOWERING);
-    JL_GC_PUSH1(&expr);
+    jl_array_t *kwargs = NULL;
+    JL_GC_PUSH2(&expr, &kwargs);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0);
-    jl_ast_context_t *ctx = jl_ast_ctx_enter();
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
+    jl_ast_context_t *ctx = jl_ast_ctx_enter(inmodule);
     fl_context_t *fl_ctx = &ctx->fl;
-    JL_AST_PRESERVE_PUSH(ctx, old_roots, inmodule);
     value_t arg = julia_to_scm(fl_ctx, expr);
     value_t e = fl_applyn(fl_ctx, 4, symbol_value(symbol(fl_ctx, "jl-expand-to-thunk-warn")), arg,
                           symbol(fl_ctx, file), fixnum(line), fl_ctx->F);
     expr = scm_to_julia(fl_ctx, e, inmodule);
-    JL_AST_PRESERVE_POP(ctx, old_roots);
     jl_ast_ctx_leave(ctx);
+    jl_sym_t *warn_sym = jl_symbol("warn");
+    if (jl_is_expr(expr) && ((jl_expr_t*)expr)->head == warn_sym) {
+        size_t nargs = jl_expr_nargs(expr);
+        for (int i = 0; i < nargs - 1; i++) {
+            jl_value_t *warning = jl_exprarg(expr, i);
+            size_t nargs = 0;
+            if (jl_is_expr(warning) && ((jl_expr_t*)warning)->head == warn_sym)
+                 nargs = jl_expr_nargs(warning);
+            int kwargs_len = (int)nargs - 6;
+            if (nargs < 6 || kwargs_len % 2 != 0) {
+                jl_error("julia-logmsg: bad argument list - expected "
+                         ":warn level (symbol) group (symbol) id file line msg . kwargs");
+            }
+            jl_value_t *level = jl_exprarg(warning, 0);
+            jl_value_t *group = jl_exprarg(warning, 1);
+            jl_value_t *id = jl_exprarg(warning, 2);
+            jl_value_t *file = jl_exprarg(warning, 3);
+            jl_value_t *line = jl_exprarg(warning, 4);
+            jl_value_t *msg = jl_exprarg(warning, 5);
+            kwargs = jl_alloc_vec_any(kwargs_len);
+            for (int i = 0; i < kwargs_len; ++i) {
+                jl_array_ptr_set(kwargs, i, jl_exprarg(warning, i + 6));
+            }
+            JL_TYPECHK(logmsg, long, level);
+            jl_log(jl_unbox_long(level), NULL, group, id, file, line, (jl_value_t*)kwargs, msg);
+        }
+        expr = jl_exprarg(expr, nargs - 1);
+    }
     JL_GC_POP();
     return expr;
 }
@@ -1161,7 +1235,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *
     JL_TIMING(LOWERING);
     JL_GC_PUSH1(&expr);
     expr = jl_copy_ast(expr);
-    expr = jl_expand_macros(expr, inmodule, NULL, 0);
+    expr = jl_expand_macros(expr, inmodule, NULL, 0, ~(size_t)0, 1);
     expr = jl_call_scm_on_ast_and_loc("jl-expand-to-thunk-stmt", expr, inmodule, file, line);
     JL_GC_POP();
     return expr;
@@ -1180,7 +1254,7 @@ JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule)
 // `text` is passed as a pointer to allow raw non-String buffers to be used
 // without copying.
 JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                                  size_t offset, jl_value_t *options)
+                                  size_t lineno, size_t offset, jl_value_t *options)
 {
     jl_value_t *core_parse = NULL;
     if (jl_core_module) {
@@ -1188,23 +1262,24 @@ JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t
     }
     if (!core_parse || core_parse == jl_nothing) {
         // In bootstrap, directly call the builtin parser.
-        jl_value_t *result = jl_fl_parse(text, text_len, filename, offset, options);
+        jl_value_t *result = jl_fl_parse(text, text_len, filename, lineno, offset, options);
         return result;
     }
     jl_value_t **args;
-    JL_GC_PUSHARGS(args, 5);
+    JL_GC_PUSHARGS(args, 6);
     args[0] = core_parse;
     args[1] = (jl_value_t*)jl_alloc_svec(2);
     jl_svecset(args[1], 0, jl_box_uint8pointer((uint8_t*)text));
     jl_svecset(args[1], 1, jl_box_long(text_len));
     args[2] = filename;
-    args[3] = jl_box_ulong(offset);
-    args[4] = options;
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    ptls->world_age = jl_world_counter;
-    jl_value_t *result = jl_apply(args, 5);
-    ptls->world_age = last_age;
+    args[3] = jl_box_ulong(lineno);
+    args[4] = jl_box_ulong(offset);
+    args[5] = options;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *result = jl_apply(args, 6);
+    ct->world_age = last_age;
     args[0] = result; // root during error checks below
     JL_TYPECHK(parse, simplevector, result);
     if (jl_svec_len(result) != 2)
@@ -1217,11 +1292,11 @@ JL_DLLEXPORT jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t
 
 // parse an entire string as a file, reading multiple expressions
 JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
-                                      const char *filename, size_t filename_len)
+                                      const char *filename, size_t filename_len, size_t lineno)
 {
     jl_value_t *fname = jl_pchar_to_string(filename, filename_len);
     JL_GC_PUSH1(&fname);
-    jl_value_t *p = jl_parse(text, text_len, fname, 0, (jl_value_t*)all_sym);
+    jl_value_t *p = jl_parse(text, text_len, fname, lineno, 0, (jl_value_t*)jl_all_sym);
     JL_GC_POP();
     return jl_svecref(p, 0);
 }
@@ -1233,8 +1308,8 @@ JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
 {
     jl_value_t *fname = jl_cstr_to_string("none");
     JL_GC_PUSH1(&fname);
-    jl_value_t *result = jl_parse(text, text_len, fname, offset,
-                                  (jl_value_t*)(greedy ? statement_sym : atom_sym));
+    jl_value_t *result = jl_parse(text, text_len, fname, 1, offset,
+                                  (jl_value_t*)(greedy ? jl_statement_sym : jl_atom_sym));
     JL_GC_POP();
     return result;
 }
@@ -1243,7 +1318,7 @@ JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
 JL_DLLEXPORT jl_value_t *jl_parse_input_line(const char *text, size_t text_len,
                                              const char *filename, size_t filename_len)
 {
-    return jl_parse_all(text, text_len, filename, filename_len);
+    return jl_parse_all(text, text_len, filename, filename_len, 1);
 }
 
 #ifdef __cplusplus
diff --git a/src/ast.scm b/src/ast.scm
index e94e56c56de61a..0f69638fdb52e0 100644
--- a/src/ast.scm
+++ b/src/ast.scm
@@ -46,6 +46,27 @@
       (string ":" (deparse e))
       (deparse e)))
 
+(define (deparse-import-path e)
+  (cond ((and (pair? e) (eq? (car e) '|.|))
+         (let loop ((lst   (cdr e))
+                    (ndots 0))
+           (if (or (null? lst)
+                   (not (eq? (car lst) '|.|)))
+               (string (string.rep "." ndots)
+                       (string.join (map deparse lst) "."))
+               (loop (cdr lst) (+ ndots 1)))))
+        ((and (pair? e) (eq? (car e) ':))
+         (string (deparse-import-path (cadr e)) ": "
+                 (string.join (map deparse-import-path (cddr e)) ", ")))
+        (else
+         (string e))))
+
+(define (deparse-semicolons n)
+  ; concatenate n semicolons
+  (if (<= n 0)
+      ""
+      (string ";" (deparse-semicolons (1- n)))))
+
 (define (deparse e (ilvl 0))
   (cond ((or (symbol? e) (number? e)) (string e))
         ((string? e) (print-to-string e))
@@ -58,19 +79,23 @@
         ((char? e) (string "'" e "'"))
         ((atom? e) (string e))
         ((eq? (car e) '|.|)
-         (string (deparse (cadr e)) '|.|
-                 (cond ((and (pair? (caddr e)) (memq (caaddr e) '(quote inert)))
-                        (deparse-colon-dot (cadr (caddr e))))
-                       ((and (pair? (caddr e)) (eq? (caaddr e) 'copyast))
-                        (deparse-colon-dot (cadr (cadr (caddr e)))))
-                       (else
-                        (string #\( (deparse (caddr e)) #\))))))
+         (if (length= e 2)
+             (string "(." (deparse (cadr e)) ")")
+             (string (deparse (cadr e)) '|.|
+                     (cond ((and (pair? (caddr e)) (memq (caaddr e) '(quote inert)))
+                            (deparse-colon-dot (cadr (caddr e))))
+                           ((and (pair? (caddr e)) (eq? (caaddr e) 'copyast))
+                            (deparse-colon-dot (cadr (cadr (caddr e)))))
+                           (else
+                            (string #\( (deparse (caddr e)) #\)))))))
         ((memq (car e) '(... |'|))
          (string (deparse (cadr e)) (car e)))
         ((or (syntactic-op? (car e)) (eq? (car e) '|<:|) (eq? (car e) '|>:|) (eq? (car e) '-->))
          (if (length= e 2)
              (string (car e) (deparse (cadr e)))
              (string (deparse (cadr e)) " " (car e) " " (deparse (caddr e)))))
+        ((eq? (car e) 'as)
+         (string (deparse-import-path (cadr e)) " as " (deparse (caddr e))))
         (else
          (case (car e)
            ((null)  "nothing")
@@ -117,7 +142,14 @@
            ((hcat)        (string #\[ (deparse-arglist (cdr e) " ") #\]))
            ((typed_hcat)  (string (deparse (cadr e))
                                   (deparse (cons 'hcat (cddr e)))))
+           ((ncat)        (string #\[ (deparse-arglist (cddr e) (string (deparse-semicolons (cadr e)) " "))
+                                      (if (<= (length (cddr e)) 1)
+                                          (deparse-semicolons (cadr e))
+                                          "") #\]))
+           ((typed_ncat)  (string (deparse (cadr e))
+                                  (deparse (cons 'ncat (cddr e)))))
            ((row)        (deparse-arglist (cdr e) " "))
+           ((nrow)       (deparse-arglist (cddr e) (string (deparse-semicolons (cadr e)) " ")))
            ((braces)     (string #\{ (deparse-arglist (cdr e) ", ") #\}))
            ((bracescat)  (string #\{ (deparse-arglist (cdr e) "; ") #\}))
            ((string)
@@ -147,7 +179,7 @@
                   (cdr e)
                   (list e)))
             (deparse-block (string (car e) " " (deparse (cadr e)))
-                           (block-stmts (caddr e))
+                           (if (null? (cddr e)) '() (block-stmts (caddr e)))
                            ilvl))
            ((return)         (string "return " (deparse (cadr e))))
            ((break continue) (string (car e)))
@@ -179,6 +211,13 @@
                                 "\n"
                                 (indented-block (cdr (cadddr e)) ilvl))
                         "")
+                    (if (length> e 5)
+                        (let ((els (cadddddr e)))
+                          (if (and (pair? els) (eq? (car els) 'block))
+                              (string (string.rep "    " ilvl) "else\n"
+                                      (indented-block (cdr els) ilvl))
+                              ""))
+                        "")
                     (if (length> e 4)
                         (let ((fin (caddddr e)))
                           (if (and (pair? fin) (eq? (car fin) 'block))
@@ -209,21 +248,7 @@
                     "end"))
            ;; misc syntax forms
            ((import using)
-            (define (deparse-path e)
-              (cond ((and (pair? e) (eq? (car e) '|.|))
-                     (let loop ((lst   (cdr e))
-                                (ndots 0))
-                       (if (or (null? lst)
-                               (not (eq? (car lst) '|.|)))
-                           (string (string.rep "." ndots)
-                                   (string.join (map deparse lst) "."))
-                           (loop (cdr lst) (+ ndots 1)))))
-                    ((and (pair? e) (eq? (car e) ':))
-                     (string (deparse-path (cadr e)) ": "
-                             (string.join (map deparse-path (cddr e)) ", ")))
-                    (else
-                     (string e))))
-            (string (car e) " " (string.join (map deparse-path (cdr e)) ", ")))
+            (string (car e) " " (string.join (map deparse-import-path (cdr e)) ", ")))
            ((global local export) (string (car e) " " (string.join (map deparse (cdr e)) ", ")))
            ((const)        (string "const " (deparse (cadr e))))
            ((top)          (deparse (cadr e)))
@@ -264,9 +289,6 @@
 (define (reset-gensyms)
   (set! *current-gensyms* *gensyms*))
 
-(define (some-gensym? x)
-  (or (gensym? x) (memq x *gensyms*)))
-
 (define make-ssavalue
   (let ((ssavalue-counter 0))
     (lambda ()
@@ -276,7 +298,7 @@
 ;; predicates and accessors
 
 (define (quoted? e)
-  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds loopinfo)))
+  (memq (car e) '(quote top core globalref outerref line break inert meta inbounds inline noinline loopinfo)))
 (define (quotify e) `',e)
 (define (unquote e)
   (if (and (pair? e) (memq (car e) '(quote inert)))
@@ -306,7 +328,7 @@
          (bad-formal-argument v))
         (else
          (case (car v)
-           ((... kw)
+           ((...)
 	    (arg-name (cadr v)) ;; to check for errors
 	    (decl-var (cadr v)))
            ((|::|)
@@ -317,6 +339,8 @@
             (if (nospecialize-meta? v #t)
                 (arg-name (caddr v))
                 (bad-formal-argument v)))
+           ((kw)
+            (arg-name (cadr v)))
            (else (bad-formal-argument v))))))
 
 (define (arg-type v)
@@ -336,6 +360,8 @@
             (if (nospecialize-meta? v #t)
                 (arg-type (caddr v))
                 (bad-formal-argument v)))
+           ((kw)
+            (arg-type (cadr v)))
            (else (bad-formal-argument v))))))
 
 ;; convert a lambda list into a list of just symbols
@@ -350,6 +376,12 @@
 (define (decl? e)
   (and (pair? e) (eq? (car e) '|::|)))
 
+(define (symdecl? e)
+  (or (symbol? e) (decl? e)))
+
+(define (eventually-decl? e)
+  (or (symbol? e) (and (pair? e) (memq (car e) '(|::| atomic const)) (eventually-decl? (cadr e)))))
+
 (define (make-decl n t) `(|::| ,n ,t))
 
 (define (ssavalue? e)
@@ -415,7 +447,7 @@
   (if (dotop-named? e)
       (error (string "invalid function name \"" (deparse e) "\""))
       (if (pair? e)
-          (if (eq? (car e) '|.|)
+          (if (and (eq? (car e) '|.|) (length= e 3))
               (check-dotop (caddr e))
               (if (quoted? e)
                   (check-dotop (cadr e))))))
@@ -438,9 +470,6 @@
 (define (make-assignment l r) `(= ,l ,r))
 (define (assignment? e) (and (pair? e) (eq? (car e) '=)))
 (define (return? e) (and (pair? e) (eq? (car e) 'return)))
-(define (complex-return? e) (and (return? e)
-                                 (let ((x (cadr e)))
-                                   (not (simple-atom? x)))))
 
 (define (tuple-call? e)
   (and (length> e 1)
diff --git a/src/atomics.h b/src/atomics.h
deleted file mode 100644
index d3aa9d8ba8b3be..00000000000000
--- a/src/atomics.h
+++ /dev/null
@@ -1,343 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#ifndef JL_ATOMICS_H
-#define JL_ATOMICS_H
-
-// Low-level atomic operations
-
-#if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
-#  error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
-#endif
-#ifdef _COMPILER_MICROSOFT_
-#  include <intrin.h>
-#  include <type_traits>
-#endif
-#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
-#  include <immintrin.h>
-#endif
-#ifndef _OS_WINDOWS_
-#  include <pthread.h>
-#endif
-#include <signal.h>
-
-/**
- * Thread synchronization primitives:
- *
- * These roughly follows the c11/c++11 memory model and the act as memory
- * barriers at both the compiler level and the hardware level.
- * The only exception is the GC safepoint and GC state transitions for which
- * we use only a compiler (signal) barrier and use the signal handler to do the
- * synchronization in order to lower the mutator overhead as much as possible.
- *
- * We use the compiler intrinsics to implement a similar API to the c11/c++11
- * one instead of using it directly because,
- *
- *     1. We support GCC 4.7 and GCC add support for c11 atomics in 4.9.
- *        Luckily, the __atomic intrinsics were added in GCC 4.7.
- *     2. (most importantly) we need interoperability between code written
- *        in different languages.
- *        The current c++ standard (c++14) does not allow using c11 atomic
- *        functions or types and there's currently no guarantee that the two
- *        types are compatible (although most of them probably are).
- *        We also need to access these atomic variables from the LLVM JIT code
- *        which is very hard unless the layout of the object is fully
- *        specified.
- */
-#if defined(__GNUC__)
-#  define jl_fence() __atomic_thread_fence(__ATOMIC_SEQ_CST)
-#  define jl_fence_release() __atomic_thread_fence(__ATOMIC_RELEASE)
-#  define jl_signal_fence() __atomic_signal_fence(__ATOMIC_SEQ_CST)
-#  define jl_atomic_fetch_add_relaxed(obj, arg)         \
-    __atomic_fetch_add(obj, arg, __ATOMIC_RELAXED)
-#  define jl_atomic_fetch_add(obj, arg)                 \
-    __atomic_fetch_add(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_add_fetch(obj, arg)                 \
-    __atomic_add_fetch(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_fetch_and_relaxed(obj, arg)         \
-    __atomic_fetch_and(obj, arg, __ATOMIC_RELAXED)
-#  define jl_atomic_fetch_and(obj, arg)                 \
-    __atomic_fetch_and(obj, arg, __ATOMIC_SEQ_CST)
-#  define jl_atomic_fetch_or_relaxed(obj, arg)          \
-    __atomic_fetch_or(obj, arg, __ATOMIC_RELAXED)
-#  define jl_atomic_fetch_or(obj, arg)                  \
-    __atomic_fetch_or(obj, arg, __ATOMIC_SEQ_CST)
-// Returns the original value of `obj`
-// Use the legacy __sync builtins for now, this can also be written using
-// the __atomic builtins or c11 atomics with GNU extension or c11 _Generic
-#  define jl_atomic_compare_exchange(obj, expected, desired)    \
-    __sync_val_compare_and_swap(obj, expected, desired)
-#  define jl_atomic_bool_compare_exchange(obj, expected, desired)          \
-    __sync_bool_compare_and_swap(obj, expected, desired)
-#  define jl_atomic_exchange(obj, desired)              \
-    __atomic_exchange_n(obj, desired, __ATOMIC_SEQ_CST)
-#  define jl_atomic_exchange_relaxed(obj, desired)      \
-    __atomic_exchange_n(obj, desired, __ATOMIC_RELAXED)
-// TODO: Maybe add jl_atomic_compare_exchange_weak for spin lock
-#  define jl_atomic_store(obj, val)                     \
-    __atomic_store_n(obj, val, __ATOMIC_SEQ_CST)
-#  define jl_atomic_store_relaxed(obj, val)             \
-    __atomic_store_n(obj, val, __ATOMIC_RELAXED)
-#  if defined(__clang__) || defined(__ICC) || defined(__INTEL_COMPILER) || \
-    !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
-// ICC and Clang doesn't have this bug...
-#    define jl_atomic_store_release(obj, val)           \
-    __atomic_store_n(obj, val, __ATOMIC_RELEASE)
-#  else
-// Workaround a GCC bug when using store with release order by using the
-// stronger version instead.
-// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
-#    define jl_atomic_store_release(obj, val) do {      \
-        jl_signal_fence();                              \
-        __atomic_store_n(obj, val, __ATOMIC_RELEASE);   \
-    } while (0)
-#  endif
-#  define jl_atomic_load(obj)                   \
-    __atomic_load_n(obj, __ATOMIC_SEQ_CST)
-#  define jl_atomic_load_acquire(obj)           \
-    __atomic_load_n(obj, __ATOMIC_ACQUIRE)
-#ifdef JL_TSAN_ENABLED
-// For the sake of tsan, call these loads consume ordering since they will act
-// as such on the processors we support while normally, the compiler would
-// upgrade this to acquire ordering, which is strong (and slower) than we want.
-#  define jl_atomic_load_relaxed(obj)           \
-    __atomic_load_n(obj, __ATOMIC_CONSUME)
-#else
-#  define jl_atomic_load_relaxed(obj)           \
-    __atomic_load_n(obj, __ATOMIC_RELAXED)
-#endif
-#elif defined(_COMPILER_MICROSOFT_)
-// TODO: these only define compiler barriers, and aren't correct outside of x86
-#  define jl_fence() _ReadWriteBarrier()
-#  define jl_fence_release() _WriteBarrier()
-#  define jl_signal_fence() _ReadWriteBarrier()
-
-// add
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd8((volatile char*)obj, (char)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd16((volatile short*)obj, (short)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd((volatile LONG*)obj, (LONG)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_fetch_add(T *obj, T2 arg)
-{
-    return (T)_InterlockedExchangeAdd64((volatile __int64*)obj, (__int64)arg);
-}
-#define jl_atomic_fetch_add_relaxed(obj, arg) jl_atomic_fetch_add(obj, arg)
-
-// and
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd8((volatile char*)obj, (char)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd16((volatile short*)obj, (short)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd((volatile LONG*)obj, (LONG)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_fetch_and(T *obj, T2 arg)
-{
-    return (T)_InterlockedAnd64((volatile __int64*)obj, (__int64)arg);
-}
-#define jl_atomic_fetch_and_relaxed(obj, arg) jl_atomic_fetch_and(obj, arg)
-
-// or
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr8((volatile char*)obj, (char)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr16((volatile short*)obj, (short)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr((volatile LONG*)obj, (LONG)arg);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_fetch_or(T *obj, T2 arg)
-{
-    return (T)_InterlockedOr64((volatile __int64*)obj, (__int64)arg);
-}
-#define jl_atomic_fetch_or_relaxed(obj, arg) jl_atomic_fetch_or(obj, arg)
-
-// Returns the original value of `obj`
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange8((volatile char*)obj,
-                                           (char)desired, (char)expected);
-}
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange16((volatile short*)obj,
-                                            (short)desired, (short)expected);
-}
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange((volatile LONG*)obj,
-                                          (LONG)desired, (LONG)expected);
-}
-template<typename T, typename T2, typename T3>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_compare_exchange(volatile T *obj, T2 expected, T3 desired)
-{
-    return (T)_InterlockedCompareExchange64((volatile __int64*)obj,
-                                            (__int64)desired, (__int64)expected);
-}
-// TODO: jl_atomic_bool_compare_exchange
-// atomic exchange
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange8((volatile char*)obj, (char)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange16((volatile short*)obj, (short)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange((volatile LONG*)obj, (LONG)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8, T>::type
-jl_atomic_exchange(volatile T *obj, T2 val)
-{
-    return _InterlockedExchange64((volatile __int64*)obj, (__int64)val);
-}
-#define jl_atomic_exchange_relaxed(obj, val) jl_atomic_exchange(obj, val)
-// atomic stores
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 1>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange8((volatile char*)obj, (char)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 2>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange16((volatile short*)obj, (short)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 4>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange((volatile LONG*)obj, (LONG)val);
-}
-template<typename T, typename T2>
-static inline typename std::enable_if<sizeof(T) == 8>::type
-jl_atomic_store(volatile T *obj, T2 val)
-{
-    _InterlockedExchange64((volatile __int64*)obj, (__int64)val);
-}
-template<typename T, typename T2>
-static inline void jl_atomic_store_release(volatile T *obj, T2 val)
-{
-    jl_signal_fence();
-    *obj = (T)val;
-}
-template<typename T, typename T2>
-static inline void jl_atomic_store_relaxed(volatile T *obj, T2 val)
-{
-    *obj = (T)val;
-}
-// atomic loads
-template<typename T>
-static inline T jl_atomic_load(volatile T *obj)
-{
-    // Trick to generate cheaper instructions compare to `_InterlockedOr`
-    // Note that we don't care whether the exchange succeeded or not...
-    return jl_atomic_compare_exchange(obj, T(0), T(0));
-}
-template<typename T>
-static inline T jl_atomic_load_acquire(volatile T *obj)
-{
-    T val = *obj;
-    jl_signal_fence();
-    return val;
-}
-#else
-#  error "No atomic operations supported."
-#endif
-
-#ifdef __clang_analyzer__
-// for the purposes of the analyzer, we can turn these into non-atomic expressions with similar properties
-
-#undef jl_atomic_exchange
-#undef jl_atomic_exchange_relaxed
-#define jl_atomic_exchange(obj, desired) \
-    (__extension__({ \
-            __typeof__((obj)) p = (obj); \
-            __typeof__(*p) temp = *p; \
-            *p = desired; \
-            temp; \
-        }))
-#define jl_atomic_exchange_relaxed jl_atomic_exchange
-
-#undef jl_atomic_compare_exchange
-#define jl_atomic_compare_exchange(obj, expected, desired) ((expected), jl_atomic_exchange((obj), (desired)))
-
-#undef jl_atomic_bool_compare_exchange
-#define jl_atomic_bool_compare_exchange(obj, expected, desired) ((expected) == jl_atomic_exchange((obj), (desired)))
-
-#undef jl_atomic_store
-#undef jl_atomic_store_release
-#undef jl_atomic_store_relaxed
-#define jl_atomic_store(obj, val)         (*(obj) = (val))
-#define jl_atomic_store_release(obj, val) (*(obj) = (val))
-#define jl_atomic_store_relaxed(obj, val) (*(obj) = (val))
-
-#undef jl_atomic_load
-#undef jl_atomic_load_acquire
-#undef jl_atomic_load_relaxed
-#define jl_atomic_load(obj)         (*(obj))
-#define jl_atomic_load_acquire(obj) (*(obj))
-#define jl_atomic_load_relaxed(obj) (*(obj))
-
-#endif
-
-
-#endif // JL_ATOMICS_H
diff --git a/src/builtin_proto.h b/src/builtin_proto.h
index 8021c404bd5e73..c820751ab56e23 100644
--- a/src/builtin_proto.h
+++ b/src/builtin_proto.h
@@ -12,36 +12,67 @@ extern "C" {
 #ifdef DEFINE_BUILTIN_GLOBALS
 #define DECLARE_BUILTIN(name) \
     JL_CALLABLE(jl_f_##name); \
-    jl_value_t *jl_builtin_##name
+    JL_DLLEXPORT jl_value_t *jl_builtin_##name; \
+    JL_DLLEXPORT jl_fptr_args_t jl_f_##name##_addr = &jl_f_##name
 #else
 #define DECLARE_BUILTIN(name) \
     JL_CALLABLE(jl_f_##name); \
-    extern jl_value_t *jl_builtin_##name
+    JL_DLLEXPORT extern jl_value_t *jl_builtin_##name; \
+    JL_DLLEXPORT extern jl_fptr_args_t jl_f_##name##_addr
 #endif
 
-DECLARE_BUILTIN(throw);      DECLARE_BUILTIN(is);
-DECLARE_BUILTIN(typeof);     DECLARE_BUILTIN(sizeof);
-DECLARE_BUILTIN(issubtype);  DECLARE_BUILTIN(isa);
-DECLARE_BUILTIN(_apply);     DECLARE_BUILTIN(_apply_pure);
-DECLARE_BUILTIN(_apply_latest); DECLARE_BUILTIN(_apply_iterate);
-DECLARE_BUILTIN(_apply_in_world);
-DECLARE_BUILTIN(isdefined);  DECLARE_BUILTIN(nfields);
-DECLARE_BUILTIN(tuple);      DECLARE_BUILTIN(svec);
-DECLARE_BUILTIN(getfield);   DECLARE_BUILTIN(setfield);
-DECLARE_BUILTIN(fieldtype);  DECLARE_BUILTIN(arrayref);
+DECLARE_BUILTIN(applicable);
+DECLARE_BUILTIN(_apply_iterate);
+DECLARE_BUILTIN(_apply_pure);
+DECLARE_BUILTIN(apply_type);
+DECLARE_BUILTIN(arrayref);
+DECLARE_BUILTIN(arrayset);
+DECLARE_BUILTIN(arraysize);
+DECLARE_BUILTIN(_call_in_world);
+DECLARE_BUILTIN(_call_in_world_total);
+DECLARE_BUILTIN(_call_latest);
+DECLARE_BUILTIN(replacefield);
 DECLARE_BUILTIN(const_arrayref);
-DECLARE_BUILTIN(arrayset);   DECLARE_BUILTIN(arraysize);
-DECLARE_BUILTIN(apply_type); DECLARE_BUILTIN(applicable);
-DECLARE_BUILTIN(invoke);     DECLARE_BUILTIN(_expr);
-DECLARE_BUILTIN(typeassert); DECLARE_BUILTIN(ifelse);
-DECLARE_BUILTIN(_typevar);   DECLARE_BUILTIN(_typebody);
+DECLARE_BUILTIN(_expr);
+DECLARE_BUILTIN(fieldtype);
+DECLARE_BUILTIN(getfield);
+DECLARE_BUILTIN(ifelse);
+DECLARE_BUILTIN(invoke);
+DECLARE_BUILTIN(is);
+DECLARE_BUILTIN(isa);
+DECLARE_BUILTIN(isdefined);
+DECLARE_BUILTIN(issubtype);
+DECLARE_BUILTIN(modifyfield);
+DECLARE_BUILTIN(nfields);
+DECLARE_BUILTIN(setfield);
+DECLARE_BUILTIN(sizeof);
+DECLARE_BUILTIN(svec);
+DECLARE_BUILTIN(swapfield);
+DECLARE_BUILTIN(throw);
+DECLARE_BUILTIN(tuple);
+DECLARE_BUILTIN(typeassert);
+DECLARE_BUILTIN(_typebody);
+DECLARE_BUILTIN(typeof);
+DECLARE_BUILTIN(_typevar);
+DECLARE_BUILTIN(donotdelete);
+DECLARE_BUILTIN(getglobal);
+DECLARE_BUILTIN(setglobal);
 
 JL_CALLABLE(jl_f_invoke_kwsorter);
+#ifdef DEFINE_BUILTIN_GLOBALS
+JL_DLLEXPORT jl_fptr_args_t jl_f_invoke_kwsorter_addr = &jl_f_invoke_kwsorter;
+#else
+JL_DLLEXPORT extern jl_fptr_args_t jl_f_invoke_kwsorter_addr;
+#endif
 JL_CALLABLE(jl_f__structtype);
 JL_CALLABLE(jl_f__abstracttype);
 JL_CALLABLE(jl_f__primitivetype);
 JL_CALLABLE(jl_f__setsuper);
 JL_CALLABLE(jl_f__equiv_typedef);
+JL_CALLABLE(jl_f_get_binding_type);
+JL_CALLABLE(jl_f_set_binding_type);
+JL_CALLABLE(jl_f_donotdelete);
+JL_CALLABLE(jl_f_setglobal);
 
 #ifdef __cplusplus
 }
diff --git a/src/builtins.c b/src/builtins.c
index 6d5f3f2779a126..90dc0ec6a0e5c4 100644
--- a/src/builtins.c
+++ b/src/builtins.c
@@ -32,7 +32,7 @@ extern "C" {
 
 // egal and object_id ---------------------------------------------------------
 
-static int bits_equal(void *a, void *b, int sz) JL_NOTSAFEPOINT
+static int bits_equal(const void *a, const void *b, int sz) JL_NOTSAFEPOINT
 {
     switch (sz) {
     case 1:  return *(int8_t*)a == *(int8_t*)b;
@@ -65,51 +65,54 @@ static int bits_equal(void *a, void *b, int sz) JL_NOTSAFEPOINT
 // NOINLINE.
 static int NOINLINE compare_svec(jl_svec_t *a, jl_svec_t *b) JL_NOTSAFEPOINT
 {
-    size_t l = jl_svec_len(a);
+    size_t i, l = jl_svec_len(a);
     if (l != jl_svec_len(b))
         return 0;
-    for(size_t i=0; i < l; i++) {
-        if (!jl_egal(jl_svecref(a,i),jl_svecref(b,i)))
+    for (i = 0; i < l; i++) {
+        if (!jl_egal(jl_svecref(a, i), jl_svecref(b, i)))
             return 0;
     }
     return 1;
 }
 
 // See comment above for an explanation of NOINLINE.
-static int NOINLINE compare_fields(jl_value_t *a, jl_value_t *b, jl_datatype_t *dt) JL_NOTSAFEPOINT
+static int NOINLINE compare_fields(const jl_value_t *a, const jl_value_t *b, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
-    size_t f, nf = jl_datatype_nfields(dt);
-    for (f = 0; f < nf; f++) {
+    size_t nf = jl_datatype_nfields(dt);
+    // npointers is used at end, but fetched here for locality with nfields.
+    int npointers = ((jl_datatype_t*)dt)->layout->npointers;
+    for (size_t f = 0; f < nf; f++) {
         size_t offs = jl_field_offset(dt, f);
         char *ao = (char*)a + offs;
         char *bo = (char*)b + offs;
         if (jl_field_isptr(dt, f)) {
-            jl_value_t *af = *(jl_value_t**)ao;
-            jl_value_t *bf = *(jl_value_t**)bo;
-            if (af != bf) {
-                if (af == NULL || bf == NULL)
-                    return 0;
-                if (!jl_egal(af, bf))
-                    return 0;
-            }
+            // Save ptr recursion until the end -- only recurse if otherwise equal
+            // Note that we also skip comparing the pointers for null here, because
+            // null fields are rare so it can save CPU to delay this read too.
+            continue;
         }
         else {
             jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(dt, f);
             if (jl_is_uniontype(ft)) {
-                uint8_t asel = ((uint8_t*)ao)[jl_field_size(dt, f) - 1];
-                uint8_t bsel = ((uint8_t*)bo)[jl_field_size(dt, f) - 1];
+                size_t idx = jl_field_size(dt, f) - 1;
+                uint8_t asel = ((uint8_t*)ao)[idx];
+                uint8_t bsel = ((uint8_t*)bo)[idx];
                 if (asel != bsel)
                     return 0;
                 ft = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)ft, asel);
             }
             else if (ft->layout->first_ptr >= 0) {
-                // If the field is a inline immutable that can be can be undef
-                // we need to check to check for undef first since undef struct
+                // If the field is a inline immutable that can be undef
+                // we need to check for undef first since undef struct
                 // may have fields that are different but should still be treated as equal.
-                jl_value_t *ptra = ((jl_value_t**)ao)[ft->layout->first_ptr];
-                jl_value_t *ptrb = ((jl_value_t**)bo)[ft->layout->first_ptr];
-                if (ptra == NULL && ptrb == NULL) {
-                    return 1;
+                int32_t idx = ft->layout->first_ptr;
+                jl_value_t *ptra = ((jl_value_t**)ao)[idx];
+                jl_value_t *ptrb = ((jl_value_t**)bo)[idx];
+                if ((ptra == NULL) != (ptrb == NULL)) {
+                    return 0;
+                }
+                else if (ptra == NULL) { // implies ptrb == NULL
+                    continue; // skip this field (it is #undef)
                 }
             }
             if (!ft->layout->haspadding) {
@@ -123,16 +126,44 @@ static int NOINLINE compare_fields(jl_value_t *a, jl_value_t *b, jl_datatype_t *
             }
         }
     }
+    // If we've gotten here, the objects are bitwise equal, besides their pointer fields.
+    // Now, we will recurse into jl_egal for the pointed-to elements, which might be
+    // arbitrarily expensive.
+    for (size_t p = 0; p < npointers; p++) {
+        size_t offs = jl_ptr_offset(dt, p);
+        jl_value_t *af = ((jl_value_t**)a)[offs];
+        jl_value_t *bf = ((jl_value_t**)b)[offs];
+        if (af != bf) {
+            if (af == NULL || bf == NULL)
+                return 0;
+            if (!jl_egal(af, bf))
+                return 0;
+        }
+    }
     return 1;
 }
 
-static int egal_types(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env) JL_NOTSAFEPOINT
+static int egal_types(const jl_value_t *a, const jl_value_t *b, jl_typeenv_t *env, int tvar_names) JL_NOTSAFEPOINT
 {
     if (a == b)
         return 1;
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
     if (dt != (jl_datatype_t*)jl_typeof(b))
         return 0;
+    if (dt == jl_datatype_type) {
+        jl_datatype_t *dta = (jl_datatype_t*)a;
+        jl_datatype_t *dtb = (jl_datatype_t*)b;
+        if (dta->name != dtb->name)
+            return 0;
+        size_t i, l = jl_nparams(dta);
+        if (jl_nparams(dtb) != l)
+            return 0;
+        for (i = 0; i < l; i++) {
+            if (!egal_types(jl_tparam(dta, i), jl_tparam(dtb, i), env, tvar_names))
+                return 0;
+        }
+        return 1;
+    }
     if (dt == jl_tvar_type) {
         jl_typeenv_t *pe = env;
         while (pe != NULL) {
@@ -142,45 +173,56 @@ static int egal_types(jl_value_t *a, jl_value_t *b, jl_typeenv_t *env) JL_NOTSAF
         }
         return 0;
     }
-    if (dt == jl_uniontype_type) {
-        return egal_types(((jl_uniontype_t*)a)->a, ((jl_uniontype_t*)b)->a, env) &&
-            egal_types(((jl_uniontype_t*)a)->b, ((jl_uniontype_t*)b)->b, env);
-    }
     if (dt == jl_unionall_type) {
         jl_unionall_t *ua = (jl_unionall_t*)a;
         jl_unionall_t *ub = (jl_unionall_t*)b;
-        if (ua->var->name != ub->var->name)
+        if (tvar_names && ua->var->name != ub->var->name)
             return 0;
-        if (!(egal_types(ua->var->lb, ub->var->lb, env) && egal_types(ua->var->ub, ub->var->ub, env)))
+        if (!(egal_types(ua->var->lb, ub->var->lb, env, tvar_names) && egal_types(ua->var->ub, ub->var->ub, env, tvar_names)))
             return 0;
         jl_typeenv_t e = { ua->var, (jl_value_t*)ub->var, env };
-        return egal_types(ua->body, ub->body, &e);
+        return egal_types(ua->body, ub->body, &e, tvar_names);
     }
-    if (dt == jl_datatype_type) {
-        jl_datatype_t *dta = (jl_datatype_t*)a;
-        jl_datatype_t *dtb = (jl_datatype_t*)b;
-        if (dta->name != dtb->name)
-            return 0;
-        size_t i, l = jl_nparams(dta);
-        if (jl_nparams(dtb) != l)
+    if (dt == jl_uniontype_type) {
+        return egal_types(((jl_uniontype_t*)a)->a, ((jl_uniontype_t*)b)->a, env, tvar_names) &&
+            egal_types(((jl_uniontype_t*)a)->b, ((jl_uniontype_t*)b)->b, env, tvar_names);
+    }
+    if (dt == jl_vararg_type) {
+        jl_vararg_t *vma = (jl_vararg_t*)a;
+        jl_vararg_t *vmb = (jl_vararg_t*)b;
+        jl_value_t *vmaT = vma->T ? vma->T : (jl_value_t*)jl_any_type;
+        jl_value_t *vmbT = vmb->T ? vmb->T : (jl_value_t*)jl_any_type;
+        if (!egal_types(vmaT, vmbT, env, tvar_names))
             return 0;
-        for (i = 0; i < l; i++) {
-            if (!egal_types(jl_tparam(dta, i), jl_tparam(dtb, i), env))
-                return 0;
-        }
-        return 1;
+        if (vma->N && vmb->N)
+            return egal_types(vma->N, vmb->N, env, tvar_names);
+        return !vma->N && !vmb->N;
     }
+    if (dt == jl_symbol_type)
+        return 0;
+    assert(!dt->name->mutabl);
+    return jl_egal__bits(a, b, dt);
+}
+
+JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b)
+{
+    return egal_types(a, b, NULL, 0);
+}
+
+JL_DLLEXPORT int (jl_egal)(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
+{
+    // warning: a,b may NOT have been gc-rooted by the caller
     return jl_egal(a, b);
 }
 
-JL_DLLEXPORT int jl_egal(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
 {
     // warning: a,b may NOT have been gc-rooted by the caller
-    if (a == b)
-        return 1;
-    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
-    if (dt != (jl_datatype_t*)jl_typeof(b))
-        return 0;
+    return jl_egal__unboxed_(a, b, dt);
+}
+
+int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
     if (dt == jl_simplevector_type)
         return compare_svec((jl_svec_t*)a, (jl_svec_t*)b);
     if (dt == jl_datatype_type) {
@@ -198,8 +240,12 @@ JL_DLLEXPORT int jl_egal(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE
             return 0;
         return !memcmp(jl_string_data(a), jl_string_data(b), l);
     }
-    if (dt->mutabl)
-        return 0;
+    assert(0 && "unreachable");
+    return 0;
+}
+
+int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
     size_t sz = jl_datatype_size(dt);
     if (sz == 0)
         return 1;
@@ -207,7 +253,7 @@ JL_DLLEXPORT int jl_egal(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE
     if (nf == 0 || !dt->layout->haspadding)
         return bits_equal(a, b, sz);
     if (dt == jl_unionall_type)
-        return egal_types(a, b, NULL);
+        return egal_types(a, b, NULL, 1);
     return compare_fields(a, b, dt);
 }
 
@@ -245,6 +291,8 @@ static uintptr_t NOINLINE hash_svec(jl_svec_t *v) JL_NOTSAFEPOINT
     return h;
 }
 
+static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOTSAFEPOINT;
+
 typedef struct _varidx {
     jl_tvar_t *var;
     struct _varidx *prev;
@@ -290,7 +338,17 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN
         }
         return h;
     }
-    return jl_object_id_((jl_value_t*)tv, v);
+    if (tv == jl_vararg_type) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        jl_value_t *t = vm->T ? vm->T : (jl_value_t*)jl_any_type;
+        jl_value_t *n = vm->N ? vm->N : jl_nothing;
+        return bitmix(type_object_id_(t, env),
+            type_object_id_(n, env));
+    }
+    if (tv == jl_symbol_type)
+        return ((jl_sym_t*)v)->hash;
+    assert(!tv->name->mutabl);
+    return immut_id_(tv, v, tv->hash);
 }
 
 static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOTSAFEPOINT
@@ -323,7 +381,7 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
                 uint8_t sel = ((uint8_t*)vo)[jl_field_size(dt, f) - 1];
                 fieldtype = (jl_datatype_t*)jl_nth_union_component((jl_value_t*)fieldtype, sel);
             }
-            assert(jl_is_datatype(fieldtype) && !fieldtype->abstract && !fieldtype->mutabl);
+            assert(jl_is_datatype(fieldtype) && !fieldtype->name->abstract && !fieldtype->name->mutabl);
             int32_t first_ptr = fieldtype->layout->first_ptr;
             if (first_ptr >= 0 && ((jl_value_t**)vo)[first_ptr] == NULL) {
                 // If the field is a inline immutable that can be can be undef
@@ -340,22 +398,15 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT
     return h;
 }
 
-JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT
+static uintptr_t NOINLINE jl_object_id__cold(jl_datatype_t *dt, jl_value_t *v) JL_NOTSAFEPOINT
 {
-    if (tv == (jl_value_t*)jl_symbol_type)
-        return ((jl_sym_t*)v)->hash;
-    if (tv == (jl_value_t*)jl_simplevector_type)
+    if (dt == jl_simplevector_type)
         return hash_svec((jl_svec_t*)v);
-    jl_datatype_t *dt = (jl_datatype_t*)tv;
     if (dt == jl_datatype_type) {
         jl_datatype_t *dtv = (jl_datatype_t*)v;
-        if (dtv->isconcretetype)
-            return dtv->hash;
         uintptr_t h = ~dtv->name->hash;
         return bitmix(h, hash_svec(dtv->parameters));
     }
-    if (dt == jl_typename_type)
-        return ((jl_typename_t*)v)->hash;
     if (dt == jl_string_type) {
 #ifdef _P64
         return memhash_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
@@ -363,11 +414,27 @@ JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPO
         return memhash32_seed(jl_string_data(v), jl_string_len(v), 0xedc3b677);
 #endif
     }
-    if (dt->mutabl)
+    if (dt->name->mutabl)
         return inthash((uintptr_t)v);
-    return immut_id_(dt, v, ((jl_datatype_t*)tv)->hash);
+    return immut_id_(dt, v, dt->hash);
 }
 
+JL_DLLEXPORT inline uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT
+{
+    jl_datatype_t *dt = (jl_datatype_t*)tv;
+    if (dt == jl_symbol_type)
+        return ((jl_sym_t*)v)->hash;
+    if (dt == jl_typename_type)
+        return ((jl_typename_t*)v)->hash;
+    if (dt == jl_datatype_type) {
+        jl_datatype_t *dtv = (jl_datatype_t*)v;
+        if (dtv->isconcretetype)
+            return dtv->hash;
+    }
+    return jl_object_id__cold(dt, v);
+}
+
+
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
 {
     return jl_object_id_(jl_typeof(v), v);
@@ -382,8 +449,6 @@ JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT
 JL_CALLABLE(jl_f_is)
 {
     JL_NARGS(===, 2, 2);
-    if (args[0] == args[1])
-        return jl_true;
     return jl_egal(args[0], args[1]) ? jl_true : jl_false;
 }
 
@@ -399,8 +464,8 @@ JL_CALLABLE(jl_f_sizeof)
     jl_value_t *x = args[0];
     if (jl_is_unionall(x) || jl_is_uniontype(x)) {
         x = jl_unwrap_unionall(x);
-        size_t elsize = 0, al = 0;
-        int isinline = jl_islayout_inline(x, &elsize, &al);
+        size_t elsize = 0;
+        int isinline = jl_uniontype_size(x, &elsize);
         if (isinline)
             return jl_box_long(elsize);
         if (!jl_is_datatype(x))
@@ -409,7 +474,7 @@ JL_CALLABLE(jl_f_sizeof)
     if (jl_is_datatype(x)) {
         jl_datatype_t *dx = (jl_datatype_t*)x;
         if (dx->layout == NULL) {
-            if (dx->abstract)
+            if (dx->name->abstract)
                 jl_errorf("Abstract type %s does not have a definite size.", jl_symbol_name(dx->name->name));
             else
                 jl_errorf("Argument is an incomplete %s type and does not have a definite size.", jl_symbol_name(dx->name->name));
@@ -431,7 +496,7 @@ JL_CALLABLE(jl_f_sizeof)
         return jl_box_long((1+jl_svec_len(x))*sizeof(void*));
     jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(x);
     assert(jl_is_datatype(dt));
-    assert(!dt->abstract);
+    assert(!dt->name->abstract);
     return jl_box_long(jl_datatype_size(dt));
 }
 
@@ -504,9 +569,7 @@ STATIC_INLINE void _grow_to(jl_value_t **root, jl_value_t ***oldargs, jl_svec_t
     *n_alloc = newalloc;
 }
 
-static jl_function_t *jl_iterate_func JL_GLOBALLY_ROOTED;
-
-static jl_value_t *do_apply(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_value_t *iterate)
+static jl_value_t *do_apply( jl_value_t **args, uint32_t nargs, jl_value_t *iterate)
 {
     jl_function_t *f = args[0];
     if (nargs == 2) {
@@ -548,12 +611,7 @@ static jl_value_t *do_apply(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl
         }
     }
     if (extra && iterate == NULL) {
-        if (jl_iterate_func == NULL) {
-            jl_iterate_func = jl_get_function(jl_top_module, "iterate");
-            if (jl_iterate_func == NULL)
-                jl_undefined_var_error(jl_symbol("iterate"));
-        }
-        iterate = jl_iterate_func;
+        jl_undefined_var_error(jl_symbol("iterate"));
     }
     // allocate space for the argument array and gc roots for it
     // based on our previous estimates
@@ -665,7 +723,7 @@ static jl_value_t *do_apply(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl
     }
     if (arg_heap) {
         // optimization: keep only the first root, free the others
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
         ((void**)roots)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(1);
 #endif
     }
@@ -677,68 +735,88 @@ static jl_value_t *do_apply(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl
 JL_CALLABLE(jl_f__apply_iterate)
 {
     JL_NARGSV(_apply_iterate, 2);
-    return do_apply(F, args+1, nargs-1, args[0]);
-}
-
-JL_CALLABLE(jl_f__apply)
-{
-    JL_NARGSV(_apply, 1);
-    return do_apply(F, args, nargs, NULL);
+    return do_apply(args + 1, nargs - 1, args[0]);
 }
 
 // this is like `_apply`, but with quasi-exact checks to make sure it is pure
 JL_CALLABLE(jl_f__apply_pure)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    int last_in = ptls->in_pure_callback;
+    jl_task_t *ct = jl_current_task;
+    int last_in = ct->ptls->in_pure_callback;
     jl_value_t *ret = NULL;
     JL_TRY {
-        ptls->in_pure_callback = 1;
+        ct->ptls->in_pure_callback = 1;
         // because this function was declared pure,
         // we should be allowed to run it in any world
         // so we run it in the newest world;
         // because, why not :)
         // and `promote` works better this way
-        size_t last_age = ptls->world_age;
-        ptls->world_age = jl_world_counter;
-        ret = jl_f__apply(NULL, args, nargs);
-        ptls->world_age = last_age;
-        ptls->in_pure_callback = last_in;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        ret = do_apply(args, nargs, NULL);
+        ct->world_age = last_age;
+        ct->ptls->in_pure_callback = last_in;
     }
     JL_CATCH {
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
         jl_rethrow();
     }
     return ret;
 }
 
-// this is like `_apply`, but always runs in the newest world
-JL_CALLABLE(jl_f__apply_latest)
+// this is like a regular call, but always runs in the newest world
+JL_CALLABLE(jl_f__call_latest)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    if (!ptls->in_pure_callback)
-        ptls->world_age = jl_world_counter;
-    jl_value_t *ret = jl_f__apply(NULL, args, nargs);
-    ptls->world_age = last_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    if (!ct->ptls->in_pure_callback)
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+    jl_value_t *ret = jl_apply(args, nargs);
+    ct->world_age = last_age;
     return ret;
 }
 
-// Like `_apply`, but runs in the specified world.
-// If world > jl_world_counter, run in the latest world.
-JL_CALLABLE(jl_f__apply_in_world)
+// Like call_in_world, but runs in the specified world.
+// If world > jl_atomic_load_acquire(&jl_world_counter), run in the latest world.
+JL_CALLABLE(jl_f__call_in_world)
 {
     JL_NARGSV(_apply_in_world, 2);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
     JL_TYPECHK(_apply_in_world, ulong, args[0]);
     size_t world = jl_unbox_ulong(args[0]);
-    world = world <= jl_world_counter ? world : jl_world_counter;
-    if (!ptls->in_pure_callback) {
-        ptls->world_age = world;
+    if (!ct->ptls->in_pure_callback) {
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        if (ct->world_age > world)
+            ct->world_age = world;
+    }
+    jl_value_t *ret = jl_apply(&args[1], nargs - 1);
+    ct->world_age = last_age;
+    return ret;
+}
+
+JL_CALLABLE(jl_f__call_in_world_total)
+{
+    JL_NARGSV(_call_in_world_total, 2);
+    JL_TYPECHK(_apply_in_world, ulong, args[0]);
+    jl_task_t *ct = jl_current_task;
+    int last_in = ct->ptls->in_pure_callback;
+    jl_value_t *ret = NULL;
+    size_t last_age = ct->world_age;
+    JL_TRY {
+        ct->ptls->in_pure_callback = 1;
+        size_t world = jl_unbox_ulong(args[0]);
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+        if (ct->world_age > world)
+            ct->world_age = world;
+        ret = jl_apply(&args[1], nargs - 1);
+        ct->world_age = last_age;
+        ct->ptls->in_pure_callback = last_in;
+    }
+    JL_CATCH {
+        ct->ptls->in_pure_callback = last_in;
+        jl_rethrow();
     }
-    jl_value_t *ret = do_apply(NULL, args+1, nargs-1, NULL);
-    ptls->world_age = last_age;
     return ret;
 }
 
@@ -753,10 +831,10 @@ JL_CALLABLE(jl_f_tuple)
     JL_GC_PROMISE_ROOTED(tt); // it is a concrete type
     if (tt->instance != NULL)
         return tt->instance;
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(tt), tt);
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(tt), tt);
     for (i = 0; i < nargs; i++)
-        set_nth_field(tt, (void*)jv, i, args[i]);
+        set_nth_field(tt, jv, i, args[i], 0);
     return jv;
 }
 
@@ -774,64 +852,183 @@ JL_CALLABLE(jl_f_svec)
 
 // struct operations ------------------------------------------------------------
 
-JL_CALLABLE(jl_f_getfield)
+enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing)
 {
-    if (nargs == 3) {
-        JL_TYPECHK(getfield, bool, args[2]);
-        nargs -= 1;
-    }
-    JL_NARGS(getfield, 2, 2);
-    jl_value_t *v = args[0];
-    jl_value_t *vt = (jl_value_t*)jl_typeof(v);
-    if (vt == (jl_value_t*)jl_module_type) {
-        JL_TYPECHK(getfield, symbol, args[1]);
-        return jl_eval_global_var((jl_module_t*)v, (jl_sym_t*)args[1]);
+    if (order == jl_not_atomic_sym)
+        return jl_memory_order_notatomic;
+    if (order == jl_unordered_sym && (loading ^ storing))
+        return jl_memory_order_unordered;
+    if (order == jl_monotonic_sym && (loading || storing))
+        return jl_memory_order_monotonic;
+    if (order == jl_acquire_sym && loading)
+        return jl_memory_order_acquire;
+    if (order == jl_release_sym && storing)
+        return jl_memory_order_release;
+    if (order == jl_acquire_release_sym && loading && storing)
+        return jl_memory_order_acq_rel;
+    if (order == jl_sequentially_consistent_sym)
+        return jl_memory_order_seq_cst;
+    return jl_memory_order_invalid;
+}
+
+enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing)
+{
+    enum jl_memory_order mo = jl_get_atomic_order(order, loading, storing);
+    if (mo < 0) // invalid
+        jl_atomic_error("invalid atomic ordering");
+    return mo;
+}
+
+static inline size_t get_checked_fieldindex(const char *name, jl_datatype_t *st, jl_value_t *v, jl_value_t *arg, int mutabl)
+{
+    if (mutabl) {
+        if (st == jl_module_type)
+            jl_error("cannot assign variables in other modules");
+        if (!st->name->mutabl)
+            jl_errorf("%s: immutable struct of type %s cannot be changed", name, jl_symbol_name(st->name->name));
     }
-    if (!jl_is_datatype(vt))
-        jl_type_error("getfield", (jl_value_t*)jl_datatype_type, v);
-    jl_datatype_t *st = (jl_datatype_t*)vt;
     size_t idx;
-    if (jl_is_long(args[1])) {
-        idx = jl_unbox_long(args[1])-1;
+    if (jl_is_long(arg)) {
+        idx = jl_unbox_long(arg) - 1;
         if (idx >= jl_datatype_nfields(st))
-            jl_bounds_error(args[0], args[1]);
+            jl_bounds_error(v, arg);
     }
     else {
-        JL_TYPECHK(getfield, symbol, args[1]);
-        jl_sym_t *fld = (jl_sym_t*)args[1];
-        idx = jl_field_index(st, fld, 1);
+        JL_TYPECHKS(name, symbol, arg);
+        idx = jl_field_index(st, (jl_sym_t*)arg, 1);
+    }
+    if (mutabl && jl_field_isconst(st, idx)) {
+        jl_errorf("%s: const field .%s of type %s cannot be changed", name,
+                jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(st), idx)), jl_symbol_name(st->name->name));
+    }
+    return idx;
+}
+
+JL_CALLABLE(jl_f_getfield)
+{
+    enum jl_memory_order order = jl_memory_order_unspecified;
+    JL_NARGS(getfield, 2, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(getfield, symbol, args[2]);
+        JL_TYPECHK(getfield, bool, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+    }
+    else if (nargs == 3) {
+        if (!jl_is_bool(args[2])) {
+            JL_TYPECHK(getfield, symbol, args[2]);
+            order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+        }
     }
-    return jl_get_nth_field_checked(v, idx);
+    jl_value_t *v = args[0];
+    jl_value_t *vt = jl_typeof(v);
+    if (vt == (jl_value_t*)jl_module_type)
+        return jl_f_getglobal(NULL, args, 2); // we just ignore the atomic order and boundschecks
+    jl_datatype_t *st = (jl_datatype_t*)vt;
+    size_t idx = get_checked_fieldindex("getfield", st, v, args[1], 0);
+    int isatomic = jl_field_isatomic(st, idx);
+    if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified)
+        jl_atomic_error("getfield: non-atomic field cannot be accessed atomically");
+    if (isatomic && order == jl_memory_order_notatomic)
+        jl_atomic_error("getfield: atomic field cannot be accessed non-atomically");
+    v = jl_get_nth_field_checked(v, idx);
+    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
+        jl_fence(); // `v` already had at least consume ordering
+    return v;
 }
 
 JL_CALLABLE(jl_f_setfield)
 {
-    JL_NARGS(setfield!, 3, 3);
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(setfield!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(setfield!, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 0, 1);
+    }
     jl_value_t *v = args[0];
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    assert(jl_is_datatype(st));
-    if (st == jl_module_type)
-        jl_error("cannot assign variables in other modules");
-    if (!st->mutabl)
-        jl_errorf("setfield! immutable struct of type %s cannot be changed", jl_symbol_name(st->name->name));
-    size_t idx;
-    if (jl_is_long(args[1])) {
-        idx = jl_unbox_long(args[1]) - 1;
-        if (idx >= jl_datatype_nfields(st))
-            jl_bounds_error(args[0], args[1]);
-    }
-    else {
-        JL_TYPECHK(setfield!, symbol, args[1]);
-        idx = jl_field_index(st, (jl_sym_t*)args[1], 1);
-    }
+    size_t idx = get_checked_fieldindex("setfield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "setfield!: atomic field cannot be written non-atomically"
+                                 : "setfield!: non-atomic field cannot be written atomically");
     jl_value_t *ft = jl_field_type_concrete(st, idx);
-    if (!jl_isa(args[2], ft)) {
+    if (!jl_isa(args[2], ft))
         jl_type_error("setfield!", ft, args[2]);
-    }
-    set_nth_field(st, (void*)v, idx, args[2]);
+    if (order >= jl_memory_order_acq_rel || order == jl_memory_order_release)
+        jl_fence(); // `st->[idx]` will have at least relaxed ordering
+    set_nth_field(st, v, idx, args[2], isatomic);
     return args[2];
 }
 
+JL_CALLABLE(jl_f_swapfield)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(swapfield!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(swapfield!, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 1, 1);
+    }
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("swapfield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "swapfield!: atomic field cannot be written non-atomically"
+                                 : "swapfield!: non-atomic field cannot be written atomically");
+    v = swap_nth_field(st, v, idx, args[2], isatomic); // always seq_cst, if isatomic needed at all
+    return v;
+}
+
+JL_CALLABLE(jl_f_modifyfield)
+{
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    JL_NARGS(modifyfield!, 4, 5);
+    if (nargs == 5) {
+        JL_TYPECHK(modifyfield!, symbol, args[4]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("modifyfield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                 : "modifyfield!: non-atomic field cannot be written atomically");
+    v = modify_nth_field(st, v, idx, args[2], args[3], isatomic); // always seq_cst, if isatomic needed at all
+    return v;
+}
+
+JL_CALLABLE(jl_f_replacefield)
+{
+    enum jl_memory_order success_order = jl_memory_order_notatomic;
+    JL_NARGS(replacefield!, 4, 6);
+    if (nargs >= 5) {
+        JL_TYPECHK(replacefield!, symbol, args[4]);
+        success_order = jl_get_atomic_order_checked((jl_sym_t*)args[4], 1, 1);
+    }
+    enum jl_memory_order failure_order = success_order;
+    if (nargs == 6) {
+        JL_TYPECHK(replacefield!, symbol, args[5]);
+        failure_order = jl_get_atomic_order_checked((jl_sym_t*)args[5], 1, 0);
+    }
+    if (failure_order > success_order)
+        jl_atomic_error("invalid atomic ordering");
+    // TODO: filter more invalid ordering combinations?
+    jl_value_t *v = args[0];
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t idx = get_checked_fieldindex("replacefield!", st, v, args[1], 1);
+    int isatomic = !!jl_field_isatomic(st, idx);
+    if (isatomic == (success_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "replacefield!: atomic field cannot be written non-atomically"
+                                 : "replacefield!: non-atomic field cannot be written atomically");
+    if (isatomic == (failure_order == jl_memory_order_notatomic))
+        jl_atomic_error(isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
+                                 : "replacefield!: non-atomic field cannot be accessed atomically");
+    v = replace_nth_field(st, v, idx, args[2], args[3], isatomic); // always seq_cst, if isatomic needed at all
+    return v;
+}
+
+
 static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
 {
     if (jl_is_unionall(t)) {
@@ -897,7 +1094,7 @@ static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
     int nf = jl_svec_len(types);
     if (nf > 0 && field_index >= nf-1 && st->name == jl_tuple_typename) {
         jl_value_t *ft = jl_field_type(st, nf-1);
-        if (jl_is_vararg_type(ft))
+        if (jl_is_vararg(ft))
             return jl_unwrap_vararg(ft);
     }
     if (field_index < 0 || field_index >= nf) {
@@ -911,11 +1108,10 @@ static jl_value_t *get_fieldtype(jl_value_t *t, jl_value_t *f, int dothrow)
 
 JL_CALLABLE(jl_f_fieldtype)
 {
+    JL_NARGS(fieldtype, 2, 3);
     if (nargs == 3) {
         JL_TYPECHK(fieldtype, bool, args[2]);
-        nargs -= 1;
     }
-    JL_NARGS(fieldtype, 2, 2);
     return get_fieldtype(args[0], args[1], 1);
 }
 
@@ -930,35 +1126,135 @@ JL_CALLABLE(jl_f_isdefined)
 {
     jl_module_t *m = NULL;
     jl_sym_t *s = NULL;
-    JL_NARGS(isdefined, 2, 2);
-    if (!jl_is_module(args[0])) {
-        jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
-        assert(jl_is_datatype(vt));
-        size_t idx;
-        if (jl_is_long(args[1])) {
-            idx = jl_unbox_long(args[1]) - 1;
-            if (idx >= jl_datatype_nfields(vt))
-                return jl_false;
+    JL_NARGS(isdefined, 2, 3);
+    enum jl_memory_order order = jl_memory_order_unspecified;
+    if (nargs == 3) {
+        JL_TYPECHK(isdefined, symbol, args[2]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+    }
+    if (jl_is_module(args[0])) {
+        JL_TYPECHK(isdefined, symbol, args[1]);
+        m = (jl_module_t*)args[0];
+        s = (jl_sym_t*)args[1];
+        return jl_boundp(m, s) ? jl_true : jl_false; // is seq_cst already
+    }
+    jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(args[0]);
+    assert(jl_is_datatype(vt));
+    size_t idx;
+    if (jl_is_long(args[1])) {
+        idx = jl_unbox_long(args[1]) - 1;
+        if (idx >= jl_datatype_nfields(vt)) {
+            if (order != jl_memory_order_unspecified)
+                jl_atomic_error("isdefined: atomic ordering cannot be specified for nonexistent field");
+            return jl_false;
         }
-        else {
-            JL_TYPECHK(isdefined, symbol, args[1]);
-            idx = jl_field_index(vt, (jl_sym_t*)args[1], 0);
-            if ((int)idx == -1)
-                return jl_false;
+    }
+    else {
+        JL_TYPECHK(isdefined, symbol, args[1]);
+        idx = jl_field_index(vt, (jl_sym_t*)args[1], 0);
+        if ((int)idx == -1) {
+            if (order != jl_memory_order_unspecified)
+                jl_atomic_error("isdefined: atomic ordering cannot be specified for nonexistent field");
+            return jl_false;
         }
-        return jl_field_isdefined(args[0], idx) ? jl_true : jl_false;
     }
-    JL_TYPECHK(isdefined, module, args[0]);
-    JL_TYPECHK(isdefined, symbol, args[1]);
-    m = (jl_module_t*)args[0];
-    s = (jl_sym_t*)args[1];
-    return jl_boundp(m, s) ? jl_true : jl_false;
+    int isatomic = jl_field_isatomic(vt, idx);
+    if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified)
+        jl_atomic_error("isdefined: non-atomic field cannot be accessed atomically");
+    if (isatomic && order == jl_memory_order_notatomic)
+        jl_atomic_error("isdefined: atomic field cannot be accessed non-atomically");
+    int v = jl_field_isdefined(args[0], idx);
+    if (v == 2) {
+        if (order > jl_memory_order_notatomic)
+            jl_fence(); // isbits case has no ordering already
+    }
+    else {
+        if (order >= jl_memory_order_acq_rel || order == jl_memory_order_acquire)
+            jl_fence(); // `v` already gave at least consume ordering
+    }
+    return v ? jl_true : jl_false;
+}
+
+
+// module bindings
+
+JL_CALLABLE(jl_f_getglobal)
+{
+    enum jl_memory_order order = jl_memory_order_monotonic;
+    JL_NARGS(getglobal, 2, 3);
+    if (nargs == 3) {
+        JL_TYPECHK(getglobal, symbol, args[2]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[2], 1, 0);
+    }
+    JL_TYPECHK(getglobal, module, args[0]);
+    JL_TYPECHK(getglobal, symbol, args[1]);
+    if (order == jl_memory_order_notatomic)
+        jl_atomic_error("getglobal: module binding cannot be read non-atomically");
+    jl_value_t *v = jl_eval_global_var((jl_module_t*)args[0], (jl_sym_t*)args[1]);
+    // is seq_cst already, no fence needed
+    return v;
+}
+
+JL_CALLABLE(jl_f_setglobal)
+{
+    enum jl_memory_order order = jl_memory_order_monotonic;
+    JL_NARGS(setglobal!, 3, 4);
+    if (nargs == 4) {
+        JL_TYPECHK(setglobal!, symbol, args[3]);
+        order = jl_get_atomic_order_checked((jl_sym_t*)args[3], 0, 1);
+    }
+    JL_TYPECHK(setglobal!, module, args[0]);
+    JL_TYPECHK(setglobal!, symbol, args[1]);
+    if (order == jl_memory_order_notatomic)
+        jl_atomic_error("setglobal!: module binding cannot be written non-atomically");
+    // is seq_cst already, no fence needed
+    jl_binding_t *b = jl_get_binding_wr_or_error((jl_module_t*)args[0], (jl_sym_t*)args[1]);
+    jl_checked_assignment(b, args[2]);
+    return args[2];
+}
+
+JL_CALLABLE(jl_f_get_binding_type)
+{
+    JL_NARGS(get_binding_type, 2, 2);
+    JL_TYPECHK(get_binding_type, module, args[0]);
+    JL_TYPECHK(get_binding_type, symbol, args[1]);
+    jl_module_t *mod = (jl_module_t*)args[0];
+    jl_sym_t *sym = (jl_sym_t*)args[1];
+    jl_value_t *ty = jl_binding_type(mod, sym);
+    if (ty == (jl_value_t*)jl_nothing) {
+        jl_binding_t *b = jl_get_binding_wr(mod, sym, 0);
+        if (b && b->owner == mod) {
+            jl_value_t *old_ty = NULL;
+            jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
+            return jl_atomic_load_relaxed(&b->ty);
+        }
+        return (jl_value_t*)jl_any_type;
+    }
+    return ty;
+}
+
+JL_CALLABLE(jl_f_set_binding_type)
+{
+    JL_NARGS(set_binding_type!, 2, 3);
+    JL_TYPECHK(set_binding_type!, module, args[0]);
+    JL_TYPECHK(set_binding_type!, symbol, args[1]);
+    jl_value_t *ty = nargs == 2 ? (jl_value_t*)jl_any_type : args[2];
+    JL_TYPECHK(set_binding_type!, type, ty);
+    jl_binding_t *b = jl_get_binding_wr((jl_module_t*)args[0], (jl_sym_t*)args[1], 1);
+    jl_value_t *old_ty = NULL;
+    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, ty) && ty != old_ty) {
+        if (nargs == 2)
+            return jl_nothing;
+        jl_errorf("cannot set type for global %s. It already has a value or is already set to a different type.",
+                  jl_symbol_name(b->name));
+    }
+    return jl_nothing;
 }
 
 
 // apply_type -----------------------------------------------------------------
 
-static int valid_type_param(jl_value_t *v)
+int jl_valid_type_param(jl_value_t *v)
 {
     if (jl_is_tuple(v)) {
         // NOTE: tuples of symbols are not currently bits types, but have been
@@ -972,7 +1268,7 @@ static int valid_type_param(jl_value_t *v)
         }
         return 1;
     }
-    if (jl_is_vararg_type(v))
+    if (jl_is_vararg(v))
         return 0;
     // TODO: maybe more things
     return jl_is_type(v) || jl_is_typevar(v) || jl_is_symbol(v) || jl_isbits(jl_typeof(v));
@@ -987,11 +1283,11 @@ JL_CALLABLE(jl_f_apply_type)
             jl_value_t *pi = args[i];
             // TODO: should possibly only allow Types and TypeVars, but see
             // https://github.com/JuliaLang/julia/commit/85f45974a581ab9af955bac600b90d9ab00f093b#commitcomment-13041922
-            if (jl_is_vararg_type(pi)) {
+            if (jl_is_vararg(pi)) {
                 if (i != nargs-1)
                     jl_type_error_rt("Tuple", "non-final parameter", (jl_value_t*)jl_type_type, pi);
             }
-            else if (!valid_type_param(pi)) {
+            else if (!jl_valid_type_param(pi)) {
                 jl_type_error_rt("Tuple", "parameter", (jl_value_t*)jl_type_type, pi);
             }
         }
@@ -1002,10 +1298,21 @@ JL_CALLABLE(jl_f_apply_type)
         // substituting typevars (a valid_type_param check here isn't sufficient).
         return (jl_value_t*)jl_type_union(&args[1], nargs-1);
     }
+    else if (jl_is_vararg(args[0])) {
+        jl_vararg_t *vm = (jl_vararg_t*)args[0];
+        if (!vm->T) {
+            JL_NARGS(apply_type, 2, 3);
+            return (jl_value_t*)jl_wrap_vararg(args[1], nargs == 3 ? args[2] : NULL);
+        }
+        else if (!vm->N) {
+            JL_NARGS(apply_type, 2, 2);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, args[1]);
+        }
+    }
     else if (jl_is_unionall(args[0])) {
         for(i=1; i < nargs; i++) {
             jl_value_t *pi = args[i];
-            if (!valid_type_param(pi)) {
+            if (!jl_valid_type_param(pi)) {
                 jl_type_error_rt("Type", "parameter",
                                  jl_isa(pi, (jl_value_t*)jl_number_type) ?
                                  (jl_value_t*)jl_long_type : (jl_value_t*)jl_type_type,
@@ -1022,7 +1329,7 @@ JL_CALLABLE(jl_f_apply_type)
 JL_CALLABLE(jl_f_applicable)
 {
     JL_NARGSV(applicable, 1);
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     return jl_method_lookup(args, nargs, world) != NULL ? jl_true : jl_false;
 }
 
@@ -1056,7 +1363,7 @@ JL_CALLABLE(jl_f_invoke_kwsorter)
         if (nt < jl_page_size/sizeof(jl_value_t*)) {
             jl_value_t **types = (jl_value_t**)alloca(nt*sizeof(jl_value_t*));
             types[0] = (jl_value_t*)jl_namedtuple_type;
-            types[1] = jl_typeof(func);
+            types[1] = jl_is_type(func) ? (jl_value_t*)jl_wrap_Type(func) : jl_typeof(func);
             for (i = 2; i < nt; i++)
                 types[i] = jl_tparam(argtypes, i - 2);
             argtypes = (jl_value_t*)jl_apply_tuple_type_v(types, nt);
@@ -1065,7 +1372,7 @@ JL_CALLABLE(jl_f_invoke_kwsorter)
             jl_svec_t *types = jl_alloc_svec_uninit(nt);
             JL_GC_PUSH1(&types);
             jl_svecset(types, 0, jl_namedtuple_type);
-            jl_svecset(types, 1, jl_typeof(func));
+            jl_svecset(types, 1, jl_is_type(func) ? (jl_value_t*)jl_wrap_Type(func) : jl_typeof(func));
             for (i = 2; i < nt; i++)
                 jl_svecset(types, i, jl_tparam(argtypes, i - 2));
             argtypes = (jl_value_t*)jl_apply_tuple_type(types);
@@ -1088,10 +1395,10 @@ JL_CALLABLE(jl_f_invoke_kwsorter)
 
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_array_t *ar = jl_alloc_vec_any(n);
     JL_GC_PUSH1(&ar);
-    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t),
+    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ct->ptls, sizeof(jl_expr_t),
                                             jl_expr_type);
     ex->head = head;
     ex->args = ar;
@@ -1101,14 +1408,14 @@ jl_expr_t *jl_exprn(jl_sym_t *head, size_t n)
 
 JL_CALLABLE(jl_f__expr)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     JL_NARGSV(Expr, 1);
     JL_TYPECHK(Expr, symbol, args[0]);
     jl_array_t *ar = jl_alloc_vec_any(nargs-1);
     JL_GC_PUSH1(&ar);
     for(size_t i=0; i < nargs-1; i++)
         jl_array_ptr_set(ar, i, args[i+1]);
-    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ptls, sizeof(jl_expr_t),
+    jl_expr_t *ex = (jl_expr_t*)jl_gc_alloc(ct->ptls, sizeof(jl_expr_t),
                                             jl_expr_type);
     ex->head = (jl_sym_t*)args[0];
     ex->args = ar;
@@ -1119,12 +1426,12 @@ JL_CALLABLE(jl_f__expr)
 // Typevar constructor for internal use
 JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_t *ub)
 {
-    if ((lb != jl_bottom_type && !jl_is_type(lb) && !jl_is_typevar(lb)) || jl_is_vararg_type(lb))
+    if (lb != jl_bottom_type && !jl_is_type(lb) && !jl_is_typevar(lb))
         jl_type_error_rt("TypeVar", "lower bound", (jl_value_t *)jl_type_type, lb);
-    if ((ub != (jl_value_t *)jl_any_type && !jl_is_type(ub) && !jl_is_typevar(ub)) || jl_is_vararg_type(ub))
+    if (ub != (jl_value_t *)jl_any_type && !jl_is_type(ub) && !jl_is_typevar(ub))
         jl_type_error_rt("TypeVar", "upper bound", (jl_value_t *)jl_type_type, ub);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ptls, sizeof(jl_tvar_t), jl_tvar_type);
+    jl_task_t *ct = jl_current_task;
+    jl_tvar_t *tv = (jl_tvar_t *)jl_gc_alloc(ct->ptls, sizeof(jl_tvar_t), jl_tvar_type);
     tv->name = name;
     tv->lb = lb;
     tv->ub = ub;
@@ -1208,18 +1515,20 @@ JL_CALLABLE(jl_f_arrayset)
 
 JL_CALLABLE(jl_f__structtype)
 {
-    JL_NARGS(_structtype, 6, 6);
+    JL_NARGS(_structtype, 7, 7);
     JL_TYPECHK(_structtype, module, args[0]);
     JL_TYPECHK(_structtype, symbol, args[1]);
     JL_TYPECHK(_structtype, simplevector, args[2]);
     JL_TYPECHK(_structtype, simplevector, args[3]);
-    JL_TYPECHK(_structtype, bool, args[4]);
-    JL_TYPECHK(_structtype, long, args[5]);
+    JL_TYPECHK(_structtype, simplevector, args[4]);
+    JL_TYPECHK(_structtype, bool, args[5]);
+    JL_TYPECHK(_structtype, long, args[6]);
     jl_value_t *fieldnames = args[3];
+    jl_value_t *fieldattrs = args[4];
     jl_datatype_t *dt = NULL;
     dt = jl_new_datatype((jl_sym_t*)args[1], (jl_module_t*)args[0], NULL, (jl_svec_t*)args[2],
-                         (jl_svec_t*)fieldnames, NULL,
-                         0, args[4]==jl_true ? 1 : 0, jl_unbox_long(args[5]));
+                         (jl_svec_t*)fieldnames, NULL, (jl_svec_t*)fieldattrs,
+                         0, args[5]==jl_true ? 1 : 0, jl_unbox_long(args[6]));
     return dt->name->wrapper;
 }
 
@@ -1257,7 +1566,6 @@ static void jl_set_datatype_super(jl_datatype_t *tt, jl_value_t *super)
     if (!jl_is_datatype(super) || !jl_is_abstracttype(super) ||
         tt->super != NULL ||
         tt->name == ((jl_datatype_t*)super)->name ||
-        jl_subtype(super, (jl_value_t*)jl_vararg_type) ||
         jl_is_tuple_type(super) ||
         jl_is_namedtuple_type(super) ||
         jl_subtype(super, (jl_value_t*)jl_type_type) ||
@@ -1278,6 +1586,11 @@ JL_CALLABLE(jl_f__setsuper)
     return jl_nothing;
 }
 
+JL_CALLABLE(jl_f_donotdelete)
+{
+    return jl_nothing;
+}
+
 static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
 {
     size_t nf = jl_svec_len(ft);
@@ -1299,6 +1612,48 @@ static int equiv_field_types(jl_value_t *old, jl_value_t *ft)
     return 1;
 }
 
+// If a field can reference its enclosing type, then the inlining
+// recursive depth is not statically bounded for some layouts, so we cannot
+// inline it. The only way fields can reference this type (due to
+// syntax-enforced restrictions) is via being passed as a type parameter. Thus
+// we can conservatively check this by examining only the parameters of the
+// dependent types.
+// affects_layout is a hack introduced by #35275 to workaround a problem
+// introduced by #34223: it checks whether we will potentially need to
+// compute the layout of the object before we have fully computed the types of
+// the fields during recursion over the allocation of the parameters for the
+// field types (of the concrete subtypes)
+static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
+{
+    if (jl_is_uniontype(p))
+        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
+               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
+    if (jl_is_unionall(p))
+        return references_name((jl_value_t*)((jl_unionall_t*)p)->var->lb, name, 0) ||
+               references_name((jl_value_t*)((jl_unionall_t*)p)->var->ub, name, 0) ||
+               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
+    if (jl_is_typevar(p))
+        return 0; // already checked by unionall, if applicable
+    if (jl_is_datatype(p)) {
+        jl_datatype_t *dp = (jl_datatype_t*)p;
+        if (affects_layout && dp->name == name)
+            return 1;
+        // affects_layout checks whether we will need to attempt to layout this
+        // type (based on whether all copies of it have the same layout) in
+        // that case, we still need to check the recursive parameters for
+        // layout recursion happening also, but we know it won't itself cause
+        // problems for the layout computation
+        affects_layout = ((jl_datatype_t*)jl_unwrap_unionall(dp->name->wrapper))->layout == NULL;
+        size_t i, l = jl_nparams(p);
+        for (i = 0; i < l; i++) {
+            if (references_name(jl_tparam(p, i), name, affects_layout))
+                return 1;
+        }
+    }
+    return 0;
+}
+
+
 JL_CALLABLE(jl_f__typebody)
 {
     JL_NARGS(_typebody!, 1, 2);
@@ -1310,7 +1665,7 @@ JL_CALLABLE(jl_f__typebody)
         size_t nf = jl_svec_len(ft);
         for (size_t i = 0; i < nf; i++) {
             jl_value_t *elt = jl_svecref(ft, i);
-            if ((!jl_is_type(elt) && !jl_is_typevar(elt)) || jl_is_vararg_type(elt)) {
+            if (!jl_is_type(elt) && !jl_is_typevar(elt)) {
                 jl_type_error_rt(jl_symbol_name(dt->name->name),
                                  "type definition",
                                  (jl_value_t*)jl_type_type, elt);
@@ -1323,6 +1678,22 @@ JL_CALLABLE(jl_f__typebody)
         else {
             dt->types = (jl_svec_t*)ft;
             jl_gc_wb(dt, ft);
+            // If a supertype can reference the same type, then we may not be
+            // able to compute the layout of the object before needing to
+            // publish it, so we must assume it cannot be inlined, if that
+            // check passes, then we also still need to check the fields too.
+            if (!dt->name->mutabl && (nf == 0 || !references_name((jl_value_t*)dt->super, dt->name, 1))) {
+                int mayinlinealloc = 1;
+                size_t i;
+                for (i = 0; i < nf; i++) {
+                    jl_value_t *fld = jl_svecref(ft, i);
+                    if (references_name(fld, dt->name, 1)) {
+                        mayinlinealloc = 0;
+                        break;
+                    }
+                }
+                dt->name->mayinlinealloc = mayinlinealloc;
+            }
         }
     }
 
@@ -1348,10 +1719,18 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     jl_datatype_t *dtb = (jl_datatype_t*)jl_unwrap_unionall(tb);
     if (!(jl_typeof(dta) == jl_typeof(dtb) &&
           dta->name->name == dtb->name->name &&
-          dta->abstract == dtb->abstract &&
-          dta->mutabl == dtb->mutabl &&
+          dta->name->abstract == dtb->name->abstract &&
+          dta->name->mutabl == dtb->name->mutabl &&
+          dta->name->n_uninitialized == dtb->name->n_uninitialized &&
           (jl_svec_len(jl_field_names(dta)) != 0 || dta->size == dtb->size) &&
-          dta->ninitialized == dtb->ninitialized &&
+          (dta->name->atomicfields == NULL
+           ? dtb->name->atomicfields == NULL
+           : (dtb->name->atomicfields != NULL &&
+              memcmp(dta->name->atomicfields, dtb->name->atomicfields, (jl_svec_len(dta->name->names) + 31) / 32 * sizeof(uint32_t)) == 0)) &&
+          (dta->name->constfields == NULL
+           ? dtb->name->constfields == NULL
+           : (dtb->name->constfields != NULL &&
+              memcmp(dta->name->constfields, dtb->name->constfields, (jl_svec_len(dta->name->names) + 31) / 32 * sizeof(uint32_t)) == 0)) &&
           jl_egal((jl_value_t*)jl_field_names(dta), (jl_value_t*)jl_field_names(dtb)) &&
           jl_nparams(dta) == jl_nparams(dtb)))
         return 0;
@@ -1376,7 +1755,7 @@ static int equiv_type(jl_value_t *ta, jl_value_t *tb)
     while (jl_is_unionall(a)) {
         jl_unionall_t *ua = (jl_unionall_t*)a;
         jl_unionall_t *ub = (jl_unionall_t*)b;
-        if (!jl_egal(ua->var->lb, ub->var->lb) || !jl_egal(ua->var->ub, ub->var->ub) ||
+        if (!jl_types_egal(ua->var->lb, ub->var->lb) || !jl_types_egal(ua->var->ub, ub->var->ub) ||
             ua->var->name != ub->var->name)
             goto no;
         a = jl_instantiate_unionall(ua, (jl_value_t*)ub->var);
@@ -1402,14 +1781,13 @@ static unsigned intrinsic_nargs[num_intrinsics];
 
 JL_CALLABLE(jl_f_intrinsic_call)
 {
-    JL_NARGSV(intrinsic_call, 1);
     JL_TYPECHK(intrinsic_call, intrinsic, F);
     enum intrinsic f = (enum intrinsic)*(uint32_t*)jl_data_ptr(F);
     if (f == cglobal && nargs == 1)
         f = cglobal_auto;
     unsigned fargs = intrinsic_nargs[f];
     if (!fargs)
-        jl_error("this intrinsic must be compiled to be called");
+        jl_errorf("`%s` must be compiled to be called", jl_intrinsic_name(f));
     JL_NARGS(intrinsic_call, fargs, fargs);
 
     union {
@@ -1418,6 +1796,7 @@ JL_CALLABLE(jl_f_intrinsic_call)
         jl_value_t *(*call2)(jl_value_t*, jl_value_t*);
         jl_value_t *(*call3)(jl_value_t*, jl_value_t*, jl_value_t*);
         jl_value_t *(*call4)(jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*);
+        jl_value_t *(*call5)(jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*, jl_value_t*);
     } fptr;
     fptr.fptr = runtime_fp[f];
     switch (fargs) {
@@ -1429,10 +1808,12 @@ JL_CALLABLE(jl_f_intrinsic_call)
             return fptr.call3(args[0], args[1], args[2]);
         case 4:
             return fptr.call4(args[0], args[1], args[2], args[3]);
+        case 5:
+            return fptr.call5(args[0], args[1], args[2], args[3], args[4]);
         default:
             assert(0 && "unexpected number of arguments to an intrinsic function");
     }
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     abort();
 }
 
@@ -1488,6 +1869,9 @@ void jl_init_intrinsic_functions(void) JL_GC_DISABLED
     inm->parent = jl_core_module;
     jl_set_const(jl_core_module, jl_symbol("Intrinsics"), (jl_value_t*)inm);
     jl_mk_builtin_func(jl_intrinsic_type, "IntrinsicFunction", jl_f_intrinsic_call);
+    jl_mk_builtin_func(
+        (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type),
+        "OpaqueClosure", jl_f_opaque_closure_call);
 
 #define ADD_I(name, nargs) add_intrinsic(inm, #name, name);
 #define ADD_HIDDEN(name, nargs)
@@ -1506,7 +1890,10 @@ static void add_builtin(const char *name, jl_value_t *v)
 jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b)
 {
     assert(jl_isa(b, (jl_value_t*)jl_builtin_type));
-    return ((jl_typemap_entry_t*)jl_gf_mtable(b)->cache)->func.linfo->cache->specptr.fptr1;
+    jl_typemap_entry_t *entry = (jl_typemap_entry_t*)jl_atomic_load_relaxed(&jl_gf_mtable(b)->defs);
+    jl_method_instance_t *mi = jl_atomic_load_relaxed(&entry->func.method->unspecialized);
+    jl_code_instance_t *ci = jl_atomic_load_relaxed(&mi->cache);
+    return jl_atomic_load_relaxed(&ci->specptr.fptr1);
 }
 
 static jl_value_t *add_builtin_func(const char *name, jl_fptr_args_t fptr)
@@ -1529,10 +1916,19 @@ void jl_init_primitives(void) JL_GC_DISABLED
     // field access
     jl_builtin_getfield = add_builtin_func("getfield",  jl_f_getfield);
     jl_builtin_setfield = add_builtin_func("setfield!",  jl_f_setfield);
+    jl_builtin_swapfield = add_builtin_func("swapfield!",  jl_f_swapfield);
+    jl_builtin_modifyfield = add_builtin_func("modifyfield!",  jl_f_modifyfield);
+    jl_builtin_replacefield = add_builtin_func("replacefield!",  jl_f_replacefield);
     jl_builtin_fieldtype = add_builtin_func("fieldtype", jl_f_fieldtype);
     jl_builtin_nfields = add_builtin_func("nfields", jl_f_nfields);
     jl_builtin_isdefined = add_builtin_func("isdefined", jl_f_isdefined);
 
+    // module bindings
+    jl_builtin_getglobal = add_builtin_func("getglobal", jl_f_getglobal);
+    jl_builtin_setglobal = add_builtin_func("setglobal!", jl_f_setglobal);
+    add_builtin_func("get_binding_type", jl_f_get_binding_type);
+    add_builtin_func("set_binding_type!", jl_f_set_binding_type);
+
     // array primitives
     jl_builtin_arrayref = add_builtin_func("arrayref", jl_f_arrayref);
     jl_builtin_const_arrayref = add_builtin_func("const_arrayref", jl_f_arrayref);
@@ -1550,13 +1946,13 @@ void jl_init_primitives(void) JL_GC_DISABLED
 
     // internal functions
     jl_builtin_apply_type = add_builtin_func("apply_type", jl_f_apply_type);
-    jl_builtin__apply = add_builtin_func("_apply", jl_f__apply);
     jl_builtin__apply_iterate = add_builtin_func("_apply_iterate", jl_f__apply_iterate);
     jl_builtin__expr = add_builtin_func("_expr", jl_f__expr);
     jl_builtin_svec = add_builtin_func("svec", jl_f_svec);
     add_builtin_func("_apply_pure", jl_f__apply_pure);
-    add_builtin_func("_apply_latest", jl_f__apply_latest);
-    add_builtin_func("_apply_in_world", jl_f__apply_in_world);
+    add_builtin_func("_call_latest", jl_f__call_latest);
+    add_builtin_func("_call_in_world", jl_f__call_in_world);
+    add_builtin_func("_call_in_world_total", jl_f__call_in_world_total);
     add_builtin_func("_typevar", jl_f__typevar);
     add_builtin_func("_structtype", jl_f__structtype);
     add_builtin_func("_abstracttype", jl_f__abstracttype);
@@ -1564,6 +1960,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin_func("_setsuper!", jl_f__setsuper);
     jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody);
     add_builtin_func("_equiv_typedef", jl_f__equiv_typedef);
+    jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete);
 
     // builtin types
     add_builtin("Any", (jl_value_t*)jl_any_type);
@@ -1577,7 +1974,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Union", (jl_value_t*)jl_uniontype_type);
     add_builtin("TypeofBottom", (jl_value_t*)jl_typeofbottom_type);
     add_builtin("Tuple", (jl_value_t*)jl_anytuple_type);
-    add_builtin("Vararg", (jl_value_t*)jl_vararg_type);
+    add_builtin("TypeofVararg", (jl_value_t*)jl_vararg_type);
     add_builtin("SimpleVector", (jl_value_t*)jl_simplevector_type);
 
     add_builtin("Module", (jl_value_t*)jl_module_type);
@@ -1594,6 +1991,8 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Argument", (jl_value_t*)jl_argument_type);
     add_builtin("Const", (jl_value_t*)jl_const_type);
     add_builtin("PartialStruct", (jl_value_t*)jl_partial_struct_type);
+    add_builtin("PartialOpaque", (jl_value_t*)jl_partial_opaque_type);
+    add_builtin("InterConditional", (jl_value_t*)jl_interconditional_type);
     add_builtin("MethodMatch", (jl_value_t*)jl_method_match_type);
     add_builtin("IntrinsicFunction", (jl_value_t*)jl_intrinsic_type);
     add_builtin("Function", (jl_value_t*)jl_function_type);
@@ -1604,6 +2003,7 @@ void jl_init_primitives(void) JL_GC_DISABLED
     add_builtin("Ptr", (jl_value_t*)jl_pointer_type);
     add_builtin("LLVMPtr", (jl_value_t*)jl_llvmpointer_type);
     add_builtin("Task", (jl_value_t*)jl_task_type);
+    add_builtin("OpaqueClosure", (jl_value_t*)jl_opaque_closure_type);
 
     add_builtin("AbstractArray", (jl_value_t*)jl_abstractarray_type);
     add_builtin("DenseArray", (jl_value_t*)jl_densearray_type);
@@ -1626,10 +2026,11 @@ void jl_init_primitives(void) JL_GC_DISABLED
 
     add_builtin("Bool", (jl_value_t*)jl_bool_type);
     add_builtin("UInt8", (jl_value_t*)jl_uint8_type);
-    add_builtin("Int32", (jl_value_t*)jl_int32_type);
-    add_builtin("Int64", (jl_value_t*)jl_int64_type);
+    add_builtin("UInt16", (jl_value_t*)jl_uint16_type);
     add_builtin("UInt32", (jl_value_t*)jl_uint32_type);
     add_builtin("UInt64", (jl_value_t*)jl_uint64_type);
+    add_builtin("Int32", (jl_value_t*)jl_int32_type);
+    add_builtin("Int64", (jl_value_t*)jl_int64_type);
 #ifdef _P64
     add_builtin("Int", (jl_value_t*)jl_int64_type);
 #else
diff --git a/src/ccall.cpp b/src/ccall.cpp
index 576323924819bf..3e912b11021eb1 100644
--- a/src/ccall.cpp
+++ b/src/ccall.cpp
@@ -1,9 +1,49 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // --- the ccall, cglobal, and llvm intrinsics ---
-#include "llvm/Support/Path.h" // for llvm::sys::path
-#include <llvm/Bitcode/BitcodeReader.h>
-#include <llvm/Linker/Linker.h>
+
+// Mark our stats as being from ccall
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "julia_irgen_ccall"
+
+STATISTIC(RuntimeSymLookups, "Number of runtime symbol lookups emitted");
+STATISTIC(PLTThunks, "Number of PLT Thunks emitted");
+STATISTIC(PLT, "Number of direct PLT entries emitted");
+STATISTIC(EmittedCGlobals, "Number of C globals emitted");
+STATISTIC(EmittedLLVMCalls, "Number of llvmcall intrinsics emitted");
+
+#define _CCALL_STAT(name) jl_transformed_ccall__##name
+#define CCALL_STAT(name) _CCALL_STAT(name)
+#define TRANSFORMED_CCALL_STAT(name) STATISTIC(_CCALL_STAT(name), "Number of " #name " ccalls intercepted")
+TRANSFORMED_CCALL_STAT(jl_array_ptr);
+TRANSFORMED_CCALL_STAT(jl_value_ptr);
+TRANSFORMED_CCALL_STAT(jl_cpu_pause);
+TRANSFORMED_CCALL_STAT(jl_cpu_wake);
+TRANSFORMED_CCALL_STAT(jl_gc_safepoint);
+TRANSFORMED_CCALL_STAT(jl_get_ptls_states);
+TRANSFORMED_CCALL_STAT(jl_threadid);
+TRANSFORMED_CCALL_STAT(jl_gc_enable_disable_finalizers_internal);
+TRANSFORMED_CCALL_STAT(jl_get_current_task);
+TRANSFORMED_CCALL_STAT(jl_set_next_task);
+TRANSFORMED_CCALL_STAT(jl_sigatomic_begin);
+TRANSFORMED_CCALL_STAT(jl_sigatomic_end);
+TRANSFORMED_CCALL_STAT(jl_svec_len);
+TRANSFORMED_CCALL_STAT(jl_svec_isassigned);
+TRANSFORMED_CCALL_STAT(jl_svec_ref);
+TRANSFORMED_CCALL_STAT(jl_array_isassigned);
+TRANSFORMED_CCALL_STAT(jl_string_ptr);
+TRANSFORMED_CCALL_STAT(jl_symbol_name);
+TRANSFORMED_CCALL_STAT(memcpy);
+TRANSFORMED_CCALL_STAT(memset);
+TRANSFORMED_CCALL_STAT(memmove);
+TRANSFORMED_CCALL_STAT(jl_object_id);
+#undef TRANSFORMED_CCALL_STAT
+
+STATISTIC(EmittedCCalls, "Number of ccalls emitted");
+STATISTIC(DeferredCCallLookups, "Number of ccalls looked up at runtime");
+STATISTIC(LiteralCCalls, "Number of ccalls directly emitted through a pointer");
+STATISTIC(RetBoxedCCalls, "Number of ccalls that were retboxed");
+STATISTIC(SRetCCalls, "Number of ccalls that were marked sret");
 
 // somewhat unusual variable, in that aotcompile wants to get the address of this for a sanity check
 GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M)
@@ -14,38 +54,44 @@ GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M)
 // Find or create the GVs for the library and symbol lookup.
 // Return `runtime_lib` (whether the library name is a string)
 // The `lib` and `sym` GV returned may not be in the current module.
-static bool runtime_sym_gvs(jl_codegen_params_t &emission_context, const char *f_lib, const char *f_name,
+static bool runtime_sym_gvs(jl_codectx_t &ctx, const char *f_lib, const char *f_name,
                             GlobalVariable *&lib, GlobalVariable *&sym)
 {
-    Module *M = emission_context.shared_module(jl_LLVMContext);
+    auto &TSM = ctx.emission_context.shared_module(*jl_Module);
+    //Safe b/c emission context holds context lock
+    auto M = TSM.getModuleUnlocked();
     bool runtime_lib = false;
     GlobalVariable *libptrgv;
     jl_codegen_params_t::SymMapGV *symMap;
 #ifdef _OS_WINDOWS_
-    if ((intptr_t)f_lib == 1) {
+    if ((intptr_t)f_lib == (intptr_t)JL_EXE_LIBNAME) {
         libptrgv = prepare_global_in(M, jlexe_var);
-        symMap = &emission_context.symMapExe;
+        symMap = &ctx.emission_context.symMapExe;
+    }
+    else if ((intptr_t)f_lib == (intptr_t)JL_LIBJULIA_INTERNAL_DL_LIBNAME) {
+        libptrgv = prepare_global_in(M, jldlli_var);
+        symMap = &ctx.emission_context.symMapDlli;
     }
-    else if ((intptr_t)f_lib == 2) {
+    else if ((intptr_t)f_lib == (intptr_t)JL_LIBJULIA_DL_LIBNAME) {
         libptrgv = prepare_global_in(M, jldll_var);
-        symMap = &emission_context.symMapDl;
+        symMap = &ctx.emission_context.symMapDll;
     }
     else
 #endif
     if (f_lib == NULL) {
         libptrgv = jl_emit_RTLD_DEFAULT_var(M);
-        symMap = &emission_context.symMapDefault;
+        symMap = &ctx.emission_context.symMapDefault;
     }
     else {
         std::string name = "ccalllib_";
         name += llvm::sys::path::filename(f_lib);
-        name += std::to_string(globalUnique++);
+        name += std::to_string(globalUniqueGeneratedNames++);
         runtime_lib = true;
-        auto &libgv = emission_context.libMapGV[f_lib];
+        auto &libgv = ctx.emission_context.libMapGV[f_lib];
         if (libgv.first == NULL) {
-            libptrgv = new GlobalVariable(*M, T_pint8, false,
+            libptrgv = new GlobalVariable(*M, getInt8PtrTy(M->getContext()), false,
                                           GlobalVariable::ExternalLinkage,
-                                          Constant::getNullValue(T_pint8), name);
+                                          Constant::getNullValue(getInt8PtrTy(M->getContext())), name);
             libgv.first = libptrgv;
         }
         else {
@@ -59,7 +105,8 @@ static bool runtime_sym_gvs(jl_codegen_params_t &emission_context, const char *f
         std::string name = "ccall_";
         name += f_name;
         name += "_";
-        name += std::to_string(globalUnique++);
+        name += std::to_string(globalUniqueGeneratedNames++);
+        auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
         llvmgv = new GlobalVariable(*M, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
                                     Constant::getNullValue(T_pvoidfunc), name);
@@ -79,6 +126,7 @@ static Value *runtime_sym_lookup(
         GlobalVariable *libptrgv,
         GlobalVariable *llvmgv, bool runtime_lib)
 {
+    ++RuntimeSymLookups;
     // in pseudo-code, this function emits the following:
     //   global HMODULE *libptrgv
     //   global void **llvmgv
@@ -86,10 +134,11 @@ static Value *runtime_sym_lookup(
     //       *llvmgv = jl_load_and_lookup(f_lib, f_name, libptrgv);
     //   }
     //   return (*llvmgv)
+    auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(irbuilder.getContext());
     BasicBlock *enter_bb = irbuilder.GetInsertBlock();
-    BasicBlock *dlsym_lookup = BasicBlock::Create(jl_LLVMContext, "dlsym");
-    BasicBlock *ccall_bb = BasicBlock::Create(jl_LLVMContext, "ccall");
-    Constant *initnul = ConstantPointerNull::get((PointerType*)T_pvoidfunc);
+    BasicBlock *dlsym_lookup = BasicBlock::Create(irbuilder.getContext(), "dlsym");
+    BasicBlock *ccall_bb = BasicBlock::Create(irbuilder.getContext(), "ccall");
+    Constant *initnul = ConstantPointerNull::get(T_pvoidfunc);
     LoadInst *llvmf_orig = irbuilder.CreateAlignedLoad(T_pvoidfunc, llvmgv, Align(sizeof(void*)));
     // This in principle needs a consume ordering so that load from
     // this pointer sees a valid value. However, this is not supported by
@@ -121,7 +170,7 @@ static Value *runtime_sym_lookup(
         }
         else {
             // f_lib is actually one of the special sentinel values
-            libname = ConstantExpr::getIntToPtr(ConstantInt::get(T_size, (uintptr_t)f_lib), T_pint8);
+            libname = ConstantExpr::getIntToPtr(ConstantInt::get(getSizeTy(irbuilder.getContext()), (uintptr_t)f_lib), getInt8PtrTy(irbuilder.getContext()));
         }
         llvmf = irbuilder.CreateCall(prepare_call_in(jl_builderModule(irbuilder), jldlsym_func),
                     { libname, nameval, libptrgv });
@@ -154,6 +203,7 @@ static Value *runtime_sym_lookup(
         PointerType *funcptype, const char *f_lib, jl_value_t *lib_expr,
         const char *f_name, Function *f)
 {
+    auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(ctx.builder.getContext());
     GlobalVariable *libptrgv;
     GlobalVariable *llvmgv;
     bool runtime_lib;
@@ -165,13 +215,13 @@ static Value *runtime_sym_lookup(
         std::string gvname = "libname_";
         gvname += f_name;
         gvname += "_";
-        gvname += std::to_string(globalUnique++);
+        gvname += std::to_string(globalUniqueGeneratedNames++);
         llvmgv = new GlobalVariable(*jl_Module, T_pvoidfunc, false,
                                     GlobalVariable::ExternalLinkage,
                                     Constant::getNullValue(T_pvoidfunc), gvname);
     }
     else {
-        runtime_lib = runtime_sym_gvs(ctx.emission_context, f_lib, f_name, libptrgv, llvmgv);
+        runtime_lib = runtime_sym_gvs(ctx, f_lib, f_name, libptrgv, llvmgv);
         libptrgv = prepare_global_in(jl_Module, libptrgv);
     }
     llvmgv = prepare_global_in(jl_Module, llvmgv);
@@ -181,18 +231,20 @@ static Value *runtime_sym_lookup(
 // Emit a "PLT" entry that will be lazily initialized
 // when being called the first time.
 static GlobalVariable *emit_plt_thunk(
-        jl_codegen_params_t &emission_context,
+        jl_codectx_t &ctx,
         FunctionType *functype, const AttributeList &attrs,
         CallingConv::ID cc, const char *f_lib, const char *f_name,
         GlobalVariable *libptrgv, GlobalVariable *llvmgv,
         bool runtime_lib)
 {
-    Module *M = emission_context.shared_module(jl_LLVMContext);
+    ++PLTThunks;
+    auto &TSM = ctx.emission_context.shared_module(*jl_Module);
+    Module *M = TSM.getModuleUnlocked();
     PointerType *funcptype = PointerType::get(functype, 0);
     libptrgv = prepare_global_in(M, libptrgv);
     llvmgv = prepare_global_in(M, llvmgv);
     std::string fname;
-    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << globalUnique++;
+    raw_string_ostream(fname) << "jlplt_" << f_name << "_" << globalUniqueGeneratedNames++;
     Function *plt = Function::Create(functype,
                                      GlobalVariable::ExternalLinkage,
                                      fname, M);
@@ -200,21 +252,23 @@ static GlobalVariable *emit_plt_thunk(
     if (cc != CallingConv::C)
         plt->setCallingConv(cc);
     fname += "_got";
+    auto T_pvoidfunc = JuliaType::get_pvoidfunc_ty(M->getContext());
     GlobalVariable *got = new GlobalVariable(*M, T_pvoidfunc, false,
                                              GlobalVariable::ExternalLinkage,
                                              ConstantExpr::getBitCast(plt, T_pvoidfunc),
                                              fname);
-    BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", plt);
+    BasicBlock *b0 = BasicBlock::Create(M->getContext(), "top", plt);
     IRBuilder<> irbuilder(b0);
-    Value *ptr = runtime_sym_lookup(emission_context, irbuilder, NULL, funcptype, f_lib, NULL, f_name, plt, libptrgv,
+    Value *ptr = runtime_sym_lookup(ctx.emission_context, irbuilder, NULL, funcptype, f_lib, NULL, f_name, plt, libptrgv,
                                     llvmgv, runtime_lib);
     StoreInst *store = irbuilder.CreateAlignedStore(irbuilder.CreateBitCast(ptr, T_pvoidfunc), got, Align(sizeof(void*)));
     store->setAtomic(AtomicOrdering::Release);
     SmallVector<Value*, 16> args;
     for (Function::arg_iterator arg = plt->arg_begin(), arg_e = plt->arg_end(); arg != arg_e; ++arg)
         args.push_back(&*arg);
+    assert(cast<PointerType>(ptr->getType())->isOpaqueOrPointeeTypeMatches(functype));
     CallInst *ret = irbuilder.CreateCall(
-        cast<FunctionType>(ptr->getType()->getPointerElementType()),
+        functype,
         ptr, ArrayRef<Value*>(args));
     ret->setAttributes(attrs);
     if (cc != CallingConv::C)
@@ -222,21 +276,21 @@ static GlobalVariable *emit_plt_thunk(
     // NoReturn function can trigger LLVM verifier error when declared as
     // MustTail since other passes might replace the `ret` with
     // `unreachable` (LLVM should probably accept `unreachable`).
-    if (attrs.hasAttribute(AttributeList::FunctionIndex,
-                           Attribute::NoReturn)) {
+    if (hasFnAttr(attrs, Attribute::NoReturn)) {
         irbuilder.CreateUnreachable();
     }
     else {
         // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
         // Known failures includes vararg (not needed here) and sret.
-#if (defined(_CPU_X86_) || defined(_CPU_X86_64_) || \
-                        defined(_CPU_AARCH64_))
+
+#if (defined(_CPU_X86_) || defined(_CPU_X86_64_) || (defined(_CPU_AARCH64_) && !defined(_OS_DARWIN_)))
         // Ref https://bugs.llvm.org/show_bug.cgi?id=47058
         // LLVM, as of 10.0.1 emits wrong/worse code when musttail is set
+        // Apple silicon macs give an LLVM ERROR if musttail is set here #44107.
         if (!attrs.hasAttrSomewhere(Attribute::ByVal))
             ret->setTailCallKind(CallInst::TCK_MustTail);
 #endif
-        if (functype->getReturnType() == T_void) {
+        if (functype->getReturnType() == getVoidTy(irbuilder.getContext())) {
             irbuilder.CreateRetVoid();
         }
         else {
@@ -254,24 +308,25 @@ static Value *emit_plt(
         const AttributeList &attrs,
         CallingConv::ID cc, const char *f_lib, const char *f_name)
 {
-    assert(imaging_mode);
+    ++PLT;
+    assert(ctx.emission_context.imaging);
     // Don't do this for vararg functions so that the `musttail` is only
     // an optimization and is not required to function correctly.
     assert(!functype->isVarArg());
     GlobalVariable *libptrgv;
     GlobalVariable *llvmgv;
-    bool runtime_lib = runtime_sym_gvs(ctx.emission_context, f_lib, f_name, libptrgv, llvmgv);
+    bool runtime_lib = runtime_sym_gvs(ctx, f_lib, f_name, libptrgv, llvmgv);
     PointerType *funcptype = PointerType::get(functype, 0);
 
     auto &pltMap = ctx.emission_context.allPltMap[attrs];
     auto key = std::make_tuple(llvmgv, functype, cc);
     GlobalVariable *&sharedgot = pltMap[key];
     if (!sharedgot) {
-        sharedgot = emit_plt_thunk(ctx.emission_context,
+        sharedgot = emit_plt_thunk(ctx,
                 functype, attrs, cc, f_lib, f_name, libptrgv, llvmgv, runtime_lib);
     }
     GlobalVariable *got = prepare_global_in(jl_Module, sharedgot);
-    LoadInst *got_val = ctx.builder.CreateAlignedLoad(got, Align(sizeof(void*)));
+    LoadInst *got_val = ctx.builder.CreateAlignedLoad(got->getValueType(), got, Align(sizeof(void*)));
     // See comment in `runtime_sym_lookup` above. This in principle needs a
     // consume ordering too. This is even less likely to cause issues though
     // since the only thing we do to this loaded pointer is to call it
@@ -286,9 +341,9 @@ static Value *emit_plt(
 class AbiLayout {
 public:
     virtual ~AbiLayout() {}
-    virtual bool use_sret(jl_datatype_t *ty) = 0;
-    virtual bool needPassByRef(jl_datatype_t *ty, AttrBuilder&) = 0;
-    virtual Type *preferred_llvm_type(jl_datatype_t *ty, bool isret) const = 0;
+    virtual bool use_sret(jl_datatype_t *ty, LLVMContext &ctx) = 0;
+    virtual bool needPassByRef(jl_datatype_t *ty, AttrBuilder&, LLVMContext &ctx, Type* llvm_t) = 0;
+    virtual Type *preferred_llvm_type(jl_datatype_t *ty, bool isret, LLVMContext &ctx) const = 0;
 };
 
 // Determine if object of bitstype ty maps to a native x86 SIMD type (__m128, __m256, or __m512) in C
@@ -341,7 +396,7 @@ static bool is_native_simd_type(jl_datatype_t *dt) {
 #elif defined _CPU_PPC64_
   typedef ABI_PPC64leLayout DefaultAbiState;
 #else
-#  warning "ccall is defaulting to llvm ABI, since no platform ABI has been defined for this CPU/OS combination"
+#  pragma message("ccall is defaulting to llvm ABI, since no platform ABI has been defined for this CPU/OS combination")
   typedef ABI_LLVMLayout DefaultAbiState;
 #endif
 
@@ -355,7 +410,7 @@ static Value *llvm_type_rewrite(
     if (target_type == from_type)
         return v;
 
-    if (from_type == T_void || isa<UndefValue>(v))
+    if (from_type == getVoidTy(ctx.builder.getContext()) || isa<UndefValue>(v))
         return UndefValue::get(target_type); // convert undef (unreachable) -> undef (target_type)
 
     assert(from_type->isPointerTy() == target_type->isPointerTy()); // expect that all ABIs consider all pointers to be equivalent
@@ -390,7 +445,7 @@ static Value *llvm_type_rewrite(
     // sizes.
     Value *from;
     Value *to;
-    const DataLayout &DL = jl_data_layout;
+    const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     unsigned align = std::max(DL.getPrefTypeAlignment(target_type), DL.getPrefTypeAlignment(from_type));
     if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) {
         to = emit_static_alloca(ctx, target_type);
@@ -403,12 +458,12 @@ static Value *llvm_type_rewrite(
         to = emit_bitcast(ctx, from, target_type->getPointerTo());
     }
     ctx.builder.CreateAlignedStore(v, from, Align(align));
-    return ctx.builder.CreateAlignedLoad(to, Align(align));
+    return ctx.builder.CreateAlignedLoad(target_type, to, Align(align));
 }
 
 // --- argument passing and scratch space utilities ---
 
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *runtime_apply_type_env(jl_codectx_t &ctx, jl_value_t *ty)
 {
     // box if concrete type was not statically known
@@ -416,13 +471,12 @@ static Value *runtime_apply_type_env(jl_codectx_t &ctx, jl_value_t *ty)
         literal_pointer_val(ctx, ty),
         literal_pointer_val(ctx, (jl_value_t*)ctx.linfo->def.method->sig),
         ctx.builder.CreateInBoundsGEP(
-                T_prjlvalue,
+                ctx.types().T_prjlvalue,
                 ctx.spvals_ptr,
-                ConstantInt::get(T_size, sizeof(jl_svec_t) / sizeof(jl_value_t*)))
+                ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(jl_svec_t) / sizeof(jl_value_t*)))
     };
     auto call = ctx.builder.CreateCall(prepare_call(jlapplytype_func), makeArrayRef(args));
-    call->addAttribute(AttributeList::ReturnIndex,
-                       Attribute::getWithAlignment(jl_LLVMContext, Align(16)));
+    addRetAttr(call, Attribute::getWithAlignment(ctx.builder.getContext(), Align(16)));
     return call;
 }
 
@@ -460,9 +514,9 @@ static void typeassert_input(jl_codectx_t &ctx, const jl_cgval_t &jvinfo, jl_val
                 Value *vx = boxed(ctx, jvinfo);
                 Value *istype = ctx.builder.CreateICmpNE(
                         ctx.builder.CreateCall(prepare_call(jlisa_func), { vx, boxed(ctx, jlto_runtime) }),
-                        ConstantInt::get(T_int32, 0));
-                BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx.f);
-                BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "pass", ctx.f);
+                        ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+                BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
+                BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass", ctx.f);
                 ctx.builder.CreateCondBr(istype, passBB, failBB);
 
                 ctx.builder.SetInsertPoint(failBB);
@@ -489,7 +543,7 @@ static Value *julia_to_native(
         assert(!byRef); // don't expect any ABI to pass pointers by pointer
         return boxed(ctx, jvinfo);
     }
-    assert(jl_is_datatype(jlto) && julia_struct_has_layout((jl_datatype_t*)jlto, jlto_env));
+    assert(jl_is_datatype(jlto) && jl_struct_try_layout((jl_datatype_t*)jlto));
 
     typeassert_input(ctx, jvinfo, jlto, jlto_env, argn);
     if (!byRef)
@@ -526,7 +580,7 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
 
     jl_value_t *ptr = static_eval(ctx, arg);
     if (ptr == NULL) {
-        if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == call_sym && jl_expr_nargs(arg) == 3 &&
+        if (jl_is_expr(arg) && ((jl_expr_t*)arg)->head == jl_call_sym && jl_expr_nargs(arg) == 3 &&
             jl_is_globalref(jl_exprarg(arg,0)) && jl_globalref_mod(jl_exprarg(arg,0)) == jl_core_module &&
             jl_globalref_name(jl_exprarg(arg,0)) == jl_symbol("tuple")) {
             // attempt to interpret a non-constant 2-tuple expression as (func_name, lib_name()), where
@@ -553,7 +607,7 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
             emit_cpointercheck(ctx, arg1, errmsg);
         }
         arg1 = update_julia_type(ctx, arg1, (jl_value_t*)jl_voidpointer_type);
-        jl_ptr = emit_unbox(ctx, T_size, arg1, (jl_value_t*)jl_voidpointer_type);
+        jl_ptr = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), arg1, (jl_value_t*)jl_voidpointer_type);
     }
     else {
         out.gcroot = ptr;
@@ -569,10 +623,22 @@ static void interpret_symbol_arg(jl_codectx_t &ctx, native_sym_arg_t &out, jl_va
         if (f_name != NULL) {
             // just symbol, default to JuliaDLHandle
             // will look in process symbol table
+            if (!llvmcall) {
+                void *symaddr;
+                std::string iname("i");
+                iname += f_name;
+                if (jl_dlsym(jl_libjulia_internal_handle, iname.c_str(), &symaddr, 0)) {
 #ifdef _OS_WINDOWS_
-            if (!llvmcall)
-                f_lib = jl_dlfind_win32(f_name);
+                    f_lib = JL_LIBJULIA_INTERNAL_DL_LIBNAME;
+#endif
+                    f_name = jl_symbol_name(jl_symbol(iname.c_str()));
+                }
+#ifdef _OS_WINDOWS_
+                else {
+                    f_lib = jl_dlfind_win32(f_name);
+                }
 #endif
+            }
         }
         else if (jl_is_cpointer_type(jl_typeof(ptr))) {
             fptr = *(void(**)(void))jl_data_ptr(ptr);
@@ -606,6 +672,7 @@ static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const
 
 static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 {
+    ++EmittedCGlobals;
     JL_NARGS(cglobal, 1, 2);
     jl_value_t *rt = NULL;
     Value *res;
@@ -616,7 +683,7 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
         rt = static_eval(ctx, args[2]);
         if (rt == NULL) {
             JL_GC_POP();
-            jl_cgval_t argv[2];
+            jl_cgval_t argv[2] = {jl_cgval_t(ctx.builder.getContext()), jl_cgval_t(ctx.builder.getContext())};
             argv[0] = emit_expr(ctx, args[1]);
             argv[1] = emit_expr(ctx, args[2]);
             return emit_runtime_call(ctx, JL_I::cglobal, argv, nargs);
@@ -628,7 +695,7 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     else {
         rt = (jl_value_t*)jl_voidpointer_type;
     }
-    Type *lrt = T_size;
+    Type *lrt = getSizeTy(ctx.builder.getContext());
     assert(lrt == julia_type_to_llvm(ctx, rt));
 
     interpret_symbol_arg(ctx, sym, args[1], "cglobal", false);
@@ -638,15 +705,15 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
     }
     else if (sym.fptr != NULL) {
         res = ConstantInt::get(lrt, (uint64_t)sym.fptr);
-        if (imaging_mode)
+        if (ctx.emission_context.imaging)
             jl_printf(JL_STDERR,"WARNING: literal address used in cglobal for %s; code cannot be statically compiled\n", sym.f_name);
     }
     else {
         if (sym.lib_expr) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(T_pint8), NULL, sym.lib_expr, sym.f_name, ctx.f);
+            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), NULL, sym.lib_expr, sym.f_name, ctx.f);
         }
-        else if (imaging_mode) {
-            res = runtime_sym_lookup(ctx, cast<PointerType>(T_pint8), sym.f_lib, NULL, sym.f_name, ctx.f);
+        else if (ctx.emission_context.imaging) {
+            res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
             res = ctx.builder.CreatePtrToInt(res, lrt);
         }
         else {
@@ -656,7 +723,7 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
             if (!libsym || !jl_dlsym(libsym, sym.f_name, &symaddr, 0)) {
                 // Error mode, either the library or the symbol couldn't be find during compiletime.
                 // Fallback to a runtime symbol lookup.
-                res = runtime_sym_lookup(ctx, cast<PointerType>(T_pint8), sym.f_lib, NULL, sym.f_name, ctx.f);
+                res = runtime_sym_lookup(ctx, cast<PointerType>(getInt8PtrTy(ctx.builder.getContext())), sym.f_lib, NULL, sym.f_name, ctx.f);
                 res = ctx.builder.CreatePtrToInt(res, lrt);
             } else {
                 // since we aren't saving this code, there's no sense in
@@ -674,6 +741,7 @@ static jl_cgval_t emit_cglobal(jl_codectx_t &ctx, jl_value_t **args, size_t narg
 
 static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 {
+    ++EmittedLLVMCalls;
     // parse and validate arguments
     //
     // two forms of llvmcall are supported:
@@ -691,7 +759,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     ir = static_eval(ctx, ir_arg);
     if (!ir) {
         emit_error(ctx, "error statically evaluating llvm IR argument");
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
     if (jl_is_ssavalue(args[2]) && !jl_is_long(ctx.source->ssavaluetypes)) {
         jl_value_t *rtt = jl_arrayref((jl_array_t*)ctx.source->ssavaluetypes, ((jl_ssavalue_t*)args[2])->id - 1);
@@ -702,7 +770,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         rt = static_eval(ctx, args[2]);
         if (!rt) {
             emit_error(ctx, "error statically evaluating llvmcall return type");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
     }
     if (jl_is_ssavalue(args[3]) && !jl_is_long(ctx.source->ssavaluetypes)) {
@@ -714,31 +782,31 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         at = static_eval(ctx, args[3]);
         if (!at) {
             emit_error(ctx, "error statically evaluating llvmcall argument tuple");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
     }
     if (jl_is_tuple(ir)) {
         // if the IR is a tuple, we expect (mod, fn)
         if (jl_nfields(ir) != 2) {
             emit_error(ctx, "Tuple as first argument to llvmcall must have exactly two children");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         entry = jl_fieldref(ir, 1);
         if (!jl_is_string(entry)) {
             emit_error(ctx, "Function name passed to llvmcall must be a string");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         ir = jl_fieldref(ir, 0);
 
         if (!jl_is_string(ir) && !jl_typeis(ir, jl_array_uint8_type)) {
             emit_error(ctx, "Module IR passed to llvmcall must be a string or an array of bytes");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
     }
     else {
         if (!jl_is_string(ir)) {
             emit_error(ctx, "Function IR passed to llvmcall must be a string");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
     }
 
@@ -766,7 +834,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         argtypes.push_back(t);
         if (4 + i > nargs) {
             emit_error(ctx, "Missing arguments to llvmcall!");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         jl_value_t *argi = args[4 + i];
         jl_cgval_t arg = emit_expr(ctx, argi);
@@ -782,7 +850,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     // Make sure to find a unique name
     std::string ir_name;
     while (true) {
-        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << globalUnique++;
+        raw_string_ostream(ir_name) << (ctx.f->getName().str()) << "u" << globalUniqueGeneratedNames++;
         if (jl_Module->getFunction(ir_name) == NULL)
             break;
     }
@@ -814,13 +882,13 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         << jl_string_data(ir) << "\n}";
 
         SMDiagnostic Err = SMDiagnostic();
-        Mod = parseAssemblyString(ir_stream.str(), Err, jl_LLVMContext);
+        Mod = parseAssemblyString(ir_stream.str(), Err, ctx.builder.getContext());
         if (!Mod) {
             std::string message = "Failed to parse LLVM assembly: \n";
             raw_string_ostream stream(message);
             Err.print("", stream, true);
             emit_error(ctx, stream.str());
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
 
         Function *f = Mod->getFunction(ir_name);
@@ -831,13 +899,13 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
 
         if (jl_is_string(ir)) {
             SMDiagnostic Err = SMDiagnostic();
-            Mod = parseAssemblyString(jl_string_data(ir), Err, jl_LLVMContext);
+            Mod = parseAssemblyString(jl_string_data(ir), Err, ctx.builder.getContext());
             if (!Mod) {
                 std::string message = "Failed to parse LLVM assembly: \n";
                 raw_string_ostream stream(message);
                 Err.print("", stream, true);
                 emit_error(ctx, stream.str());
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
         else {
@@ -845,7 +913,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
                 StringRef((char *)jl_array_data(ir), jl_array_len(ir)), "llvmcall",
                 /*RequiresNullTerminator*/ false);
             Expected<std::unique_ptr<Module>> ModuleOrErr =
-                parseBitcodeFile(*Buf, jl_LLVMContext);
+                parseBitcodeFile(*Buf, ctx.builder.getContext());
             if (Error Err = ModuleOrErr.takeError()) {
                 std::string Message;
                 handleAllErrors(std::move(Err),
@@ -854,7 +922,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
                 raw_string_ostream stream(message);
                 stream << Message;
                 emit_error(ctx, stream.str());
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
             Mod = std::move(ModuleOrErr.get());
         }
@@ -862,7 +930,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         Function *f = Mod->getFunction(jl_string_data(entry));
         if (!f) {
             emit_error(ctx, "Module IR does not contain specified entry function");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         f->setName(ir_name);
 
@@ -878,6 +946,10 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     // copy module properties that should always match
     Mod->setTargetTriple(jl_Module->getTargetTriple());
     Mod->setDataLayout(jl_Module->getDataLayout());
+#if JL_LLVM_VERSION >= 130000
+    Mod->setStackProtectorGuard(jl_Module->getStackProtectorGuard());
+    Mod->setOverrideStackAlignment(jl_Module->getOverrideStackAlignment());
+#endif
 
     // verify the definition
     Function *def = Mod->getFunction(ir_name);
@@ -886,7 +958,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     raw_string_ostream stream(message);
     if (verifyFunction(*def, &stream)) {
         emit_error(ctx, stream.str());
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
     def->setLinkage(GlobalVariable::LinkOnceODRLinkage);
 
@@ -900,7 +972,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
     // save the module to be linked later.
     // we cannot do this right now, because linking mutates the destination module,
     // which might invalidate LLVM values cached in cgval_t's (specifically constant arrays)
-    ctx.llvmcall_modules.push_back(std::move(Mod));
+    ctx.llvmcall_modules.push_back(orc::ThreadSafeModule(std::move(Mod), ctx.emission_context.tsctx));
 
     JL_GC_POP();
 
@@ -910,7 +982,7 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
         stream << "llvmcall return type " << *inst->getType()
                << " does not match declared return type" << *rettype;
         emit_error(ctx, stream.str());
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
 
     return mark_julia_type(ctx, inst, retboxed, rtt);
@@ -918,13 +990,13 @@ static jl_cgval_t emit_llvmcall(jl_codectx_t &ctx, jl_value_t **args, size_t nar
 
 // --- code generator for ccall itself ---
 
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *box_ccall_result(jl_codectx_t &ctx, Value *result, Value *runtime_dt, jl_value_t *rt)
 {
     // XXX: need to handle parameterized zero-byte types (singleton)
-    const DataLayout &DL = jl_data_layout;
+    const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
     unsigned nb = DL.getTypeStoreSize(result->getType());
-    MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
+    MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
     Value *strct = emit_allocobj(ctx, nb, runtime_dt);
     init_bits_value(ctx, strct, result, tbaa);
     return strct;
@@ -974,12 +1046,12 @@ class function_sig_t {
         err_msg = generate_func_sig(fname);
     }
 
-    FunctionType *functype() const {
+    FunctionType *functype(LLVMContext &ctxt) const {
         assert(err_msg.empty());
         if (nreqargs > 0)
-            return FunctionType::get(sret ? T_void : prt, makeArrayRef(fargt_sig).slice(0, nreqargs), true);
+            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, makeArrayRef(fargt_sig).slice(0, nreqargs), true);
         else
-            return FunctionType::get(sret ? T_void : prt, fargt_sig, false);
+            return FunctionType::get(sret ? getVoidTy(ctxt) : prt, fargt_sig, false);
     }
 
     jl_cgval_t emit_a_ccall(
@@ -1001,20 +1073,24 @@ std::string generate_func_sig(const char *fname)
     else
         abi.reset(new DefaultAbiState());
     sret = 0;
-
+    LLVMContext &LLVMCtx = lrt->getContext();
     if (type_is_ghost(lrt)) {
-        prt = lrt = T_void;
-        abi->use_sret(jl_nothing_type);
+        prt = lrt = getVoidTy(LLVMCtx);
+        abi->use_sret(jl_nothing_type, LLVMCtx);
     }
     else {
-        if (!jl_is_datatype(rt) || ((jl_datatype_t*)rt)->layout == NULL || jl_is_cpointer_type(rt) || jl_is_array_type(rt) || retboxed) {
+        if (retboxed || jl_is_cpointer_type(rt) || lrt->isPointerTy()) {
             prt = lrt; // passed as pointer
-            abi->use_sret(jl_voidpointer_type);
+            abi->use_sret(jl_voidpointer_type, LLVMCtx);
         }
-        else if (abi->use_sret((jl_datatype_t*)rt)) {
-            AttrBuilder retattrs = AttrBuilder();
+        else if (abi->use_sret((jl_datatype_t*)rt, LLVMCtx)) {
+#if JL_LLVM_VERSION >= 140000
+            AttrBuilder retattrs(LLVMCtx);
+#else
+            AttrBuilder retattrs;
+#endif
 #if !defined(_OS_WINDOWS_) // llvm used to use the old mingw ABI, skipping this marking works around that difference
-            retattrs.addAttribute(Attribute::StructRet);
+            retattrs.addStructRetAttr(lrt);
 #endif
             retattrs.addAttribute(Attribute::NoAlias);
             paramattrs.push_back(std::move(retattrs));
@@ -1023,24 +1099,28 @@ std::string generate_func_sig(const char *fname)
             prt = lrt;
         }
         else {
-            prt = abi->preferred_llvm_type((jl_datatype_t*)rt, true);
+            prt = abi->preferred_llvm_type((jl_datatype_t*)rt, true, LLVMCtx);
             if (prt == NULL)
                 prt = lrt;
         }
     }
 
     for (size_t i = 0; i < nccallargs; ++i) {
+#if JL_LLVM_VERSION >= 140000
+        AttrBuilder ab(LLVMCtx);
+#else
         AttrBuilder ab;
+#endif
         jl_value_t *tti = jl_svecref(at, i);
         Type *t = NULL;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            t = T_pint8;
+            t = getInt8PtrTy(LLVMCtx);
             isboxed = false;
         }
         else if (llvmcall && jl_is_llvmpointer_type(tti)) {
-            t = bitstype_to_llvm(tti, true);
+            t = bitstype_to_llvm(tti, LLVMCtx, true);
             tti = (jl_value_t*)jl_voidpointer_type;
             isboxed = false;
         }
@@ -1049,7 +1129,7 @@ std::string generate_func_sig(const char *fname)
                 // see pull req #978. need to annotate signext/zeroext for
                 // small integer arguments.
                 jl_datatype_t *bt = (jl_datatype_t*)tti;
-                if (jl_datatype_size(bt) < 4) {
+                if (jl_datatype_size(bt) < 4 && bt != jl_float16_type) {
                     if (jl_signed_type && jl_subtype(tti, (jl_value_t*)jl_signed_type))
                         ab.addAttribute(Attribute::SExt);
                     else
@@ -1057,19 +1137,20 @@ std::string generate_func_sig(const char *fname)
                 }
             }
 
-            t = _julia_struct_to_llvm(ctx, tti, unionall_env, &isboxed, llvmcall);
-            if (t == NULL || t == T_void) {
-                return make_errmsg(fname, i + 1, " doesn't correspond to a C type");
+            t = _julia_struct_to_llvm(ctx, LLVMCtx, tti, &isboxed, llvmcall);
+            if (t == getVoidTy(LLVMCtx)) {
+                return make_errmsg(fname, i + 1, " type doesn't correspond to a C type");
             }
         }
 
         Type *pat;
-        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_array_type(tti)) {
+        if (!jl_is_datatype(tti) || ((jl_datatype_t*)tti)->layout == NULL || jl_is_layout_opaque(((jl_datatype_t*)tti)->layout)) {
             tti = (jl_value_t*)jl_voidpointer_type; // passed as pointer
         }
 
         // Whether or not LLVM wants us to emit a pointer to the data
-        bool byRef = abi->needPassByRef((jl_datatype_t*)tti, ab);
+        assert(t && "LLVM type should not be null");
+        bool byRef = abi->needPassByRef((jl_datatype_t*)tti, ab, LLVMCtx, t);
 
         if (jl_is_cpointer_type(tti)) {
             pat = t;
@@ -1078,7 +1159,7 @@ std::string generate_func_sig(const char *fname)
             pat = PointerType::get(t, AddressSpace::Derived);
         }
         else {
-            pat = abi->preferred_llvm_type((jl_datatype_t*)tti, false);
+            pat = abi->preferred_llvm_type((jl_datatype_t*)tti, false, LLVMCtx);
             if (pat == NULL)
                 pat = t;
         }
@@ -1089,9 +1170,9 @@ std::string generate_func_sig(const char *fname)
             // so attempt to do that coercion here
             if (!llvmcall && cc == CallingConv::C) {
                 if (pat->isIntegerTy() && pat->getPrimitiveSizeInBits() < sizeof(int) * 8)
-                    pat = T_int32;
+                    pat = getInt32Ty(lrt->getContext());
                 if (pat->isFloatingPointTy() && pat->getPrimitiveSizeInBits() < sizeof(double) * 8)
-                    pat = T_float64;
+                    pat = getDoubleTy(lrt->getContext());
                 ab.removeAttribute(Attribute::SExt);
                 ab.removeAttribute(Attribute::ZExt);
             }
@@ -1101,23 +1182,24 @@ std::string generate_func_sig(const char *fname)
         fargt.push_back(t);
         fargt_isboxed.push_back(isboxed);
         fargt_sig.push_back(pat);
-        paramattrs.push_back(AttributeSet::get(jl_LLVMContext, ab));
+#if JL_LLVM_VERSION >= 140000
+        paramattrs.push_back(AttrBuilder(LLVMCtx, AttributeSet::get(LLVMCtx, ab)));
+#else
+        paramattrs.push_back(AttributeSet::get(LLVMCtx, ab));
+#endif
     }
 
     for (size_t i = 0; i < nccallargs + sret; ++i) {
         const auto &as = paramattrs.at(i);
         if (!as.hasAttributes())
             continue;
-        attributes = attributes.addAttributes(jl_LLVMContext, i + 1, as);
+        attributes = addAttributesAtIndex(attributes, LLVMCtx, i + 1, as);
     }
     // If return value is boxed it must be non-null.
     if (retboxed)
-        attributes = attributes.addAttribute(jl_LLVMContext, AttributeList::ReturnIndex,
-                                             Attribute::NonNull);
+        attributes = addRetAttribute(attributes, LLVMCtx, Attribute::NonNull);
     if (rt == jl_bottom_type) {
-        attributes = attributes.addAttribute(jl_LLVMContext,
-                                             AttributeList::FunctionIndex,
-                                             Attribute::NoReturn);
+        attributes = addFnAttribute(attributes, LLVMCtx, Attribute::NoReturn);
     }
     return "";
 }
@@ -1193,19 +1275,27 @@ static bool verify_ref_type(jl_codectx_t &ctx, jl_value_t* ref, jl_unionall_t *u
 static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
                                           jl_unionall_t *unionall_env, jl_svec_t *sparam_vals,
                                           jl_codegen_params_t *ctx,
-                                          Type *&lrt, bool &retboxed, bool &static_rt, bool llvmcall=false)
+                                          Type *&lrt, LLVMContext &ctxt,
+                                          bool &retboxed, bool &static_rt, bool llvmcall=false)
 {
     JL_TYPECHK(ccall, type, rt);
     JL_TYPECHK(ccall, simplevector, at);
 
-    if (jl_is_array_type(rt)) {
-        // `Array` used as return type just returns a julia object reference
-        rt = (jl_value_t*)jl_any_type;
+    if (rt == (jl_value_t*)jl_any_type || jl_is_array_type(rt) ||
+            (jl_is_datatype(rt) && ((jl_datatype_t*)rt)->layout != NULL &&
+             jl_is_layout_opaque(((jl_datatype_t*)rt)->layout))) {
+        // n.b. `Array` used as return type just returns a julia object reference
+        lrt = JuliaType::get_prjlvalue_ty(ctxt);
+        retboxed = true;
+    }
+    else {
+        // jl_type_mappable_to_c should have already ensured that these are valid
+        assert(jl_is_structtype(rt) || jl_is_primitivetype(rt) || rt == (jl_value_t*)jl_bottom_type);
+        lrt = _julia_struct_to_llvm(ctx, ctxt, rt, &retboxed, llvmcall);
+        assert(!retboxed);
+        if (CountTrackedPointers(lrt).count != 0)
+            return "return type struct fields cannot contain a reference";
     }
-
-    lrt = _julia_struct_to_llvm(ctx, rt, unionall_env, &retboxed, llvmcall);
-    if (lrt == NULL)
-        return "return type doesn't correspond to a C type";
 
     // is return type fully statically known?
     if (unionall_env == NULL) {
@@ -1223,7 +1313,9 @@ static const std::string verify_ccall_sig(jl_value_t *&rt, jl_value_t *at,
     return "";
 }
 
-// Expr(:foreigncall, pointer, rettype, (argtypes...), nreq, cconv, args..., roots...)
+const int fc_args_start = 6;
+
+// Expr(:foreigncall, pointer, rettype, (argtypes...), nreq, [cconv | (cconv, effects)], args..., roots...)
 static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 {
     JL_NARGSV(ccall, 5);
@@ -1233,7 +1325,14 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     size_t nccallargs = jl_svec_len(at);
     size_t nreqargs = jl_unbox_long(args[4]); // if vararg
     assert(jl_is_quotenode(args[5]));
-    jl_sym_t *cc_sym = *(jl_sym_t**)args[5];
+    jl_value_t *jlcc = jl_quotenode_value(args[5]);
+    jl_sym_t *cc_sym = NULL;
+    if (jl_is_symbol(jlcc)) {
+        cc_sym = (jl_sym_t*)jlcc;
+    }
+    else if (jl_is_tuple(jlcc)) {
+        cc_sym = (jl_sym_t*)jl_get_nth_field_noalloc(jlcc, 0);
+    }
     assert(jl_is_symbol(cc_sym));
     native_sym_arg_t symarg = {};
     JL_GC_PUSH3(&rt, &at, &symarg.gcroot);
@@ -1251,36 +1350,34 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     if (f_name == NULL && fptr == NULL && jl_ptr == NULL) {
         emit_error(ctx, "ccall: null function pointer");
         JL_GC_POP();
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
 
     auto ccallarg = [=] (size_t i) {
-        assert(i < nccallargs && i + 6 <= nargs);
-        return args[6 + i];
+        assert(i < nccallargs && i + fc_args_start <= nargs);
+        return args[fc_args_start + i];
     };
 
-    auto _is_libjulia_func = [&] (uintptr_t ptr, const char *name) {
+    auto _is_libjulia_func = [&] (uintptr_t ptr, StringRef name) {
         if ((uintptr_t)fptr == ptr)
             return true;
-        return (!f_lib || f_lib == JL_DL_LIBNAME) && f_name && !strcmp(f_name, name);
-    };
-#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), #name)
-
-    static jl_ptls_t (*ptls_getter)(void) = [] {
-    // directly accessing the address of an ifunc can cause compile-time linker issues
-    // on some configurations (e.g. AArch64 + -Bsymbolic-functions), so we guard the
-    // `&jl_get_ptls_states` within this `#ifdef` guard, and use a more roundabout
-    // method involving `jl_dlsym()` on Linux platforms instead.
-#ifdef _OS_LINUX_
-        jl_ptls_t (*p)(void);
-        void *handle = jl_dlopen(nullptr, 0);
-        jl_dlsym(handle, "jl_get_ptls_states", (void **)&p, 0);
-        jl_dlclose(handle);
-        return p;
+        if (f_lib) {
+#ifdef _OS_WINDOWS_
+            if ((f_lib == JL_EXE_LIBNAME) || // preventing invalid pointer access
+                (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME) ||
+                (f_lib == JL_LIBJULIA_DL_LIBNAME) ||
+                (!strcmp(f_lib, jl_crtdll_basename))) {
+                // libjulia-like
+            }
+            else
+                return false;
 #else
-        return &jl_get_ptls_states;
+            return false;
 #endif
-    }();
+        }
+        return f_name && f_name == name;
+    };
+#define is_libjulia_func(name) _is_libjulia_func((uintptr_t)&(name), StringRef(XSTR(name)))
 
     // emit arguments
     jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nccallargs);
@@ -1292,7 +1389,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
 
     // emit roots
     SmallVector<Value*, 16> gc_uses;
-    for (size_t i = nccallargs + 6; i <= nargs; i++) {
+    for (size_t i = nccallargs + fc_args_start; i <= nargs; i++) {
         // Julia (expression) value of current parameter gcroot
         jl_value_t *argi_root = args[i];
         if (jl_is_long(argi_root))
@@ -1310,7 +1407,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     if (jl_is_abstract_ref_type(rt)) {
         if (!verify_ref_type(ctx, jl_tparam0(rt), unionall, 0, "ccall")) {
             JL_GC_POP();
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         rt = (jl_value_t*)jl_any_type; // convert return type to jl_value_t*
     }
@@ -1325,7 +1422,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
       ctx.spvals_ptr == NULL ? ctx.linfo->sparam_vals : NULL,
       &ctx.emission_context,
       /* outputs: */
-      lrt, retboxed, static_rt,
+      lrt, ctx.builder.getContext(),
+      retboxed, static_rt,
       /* optional arguments */
       llvmcall);
     if (err.empty()) {
@@ -1343,7 +1441,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     if (!err.empty()) {
         emit_error(ctx, "ccall " + err);
         JL_GC_POP();
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
     if (rt != args[2] && rt != (jl_value_t*)jl_any_type)
         jl_add_method_root(ctx, rt);
@@ -1355,7 +1453,7 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
         if (jl_is_abstract_ref_type(tti)) {
             if (!verify_ref_type(ctx, jl_tparam0(tti), unionall, i + 1, "ccall")) {
                 JL_GC_POP();
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
     }
@@ -1364,7 +1462,8 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     bool isVa = nreqargs > 0;
     (void)isVa; // prevent compiler warning
     if (is_libjulia_func(jl_array_ptr)) {
-        assert(lrt == T_size);
+        ++CCALL_STAT(jl_array_ptr);
+        assert(lrt == getSizeTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         const jl_cgval_t &ary = argv[0];
         JL_GC_POP();
@@ -1372,192 +1471,213 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
                                         retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_value_ptr)) {
-        assert(retboxed ? lrt == T_prjlvalue : lrt == T_size);
+        ++CCALL_STAT(jl_value_ptr);
+        assert(retboxed ? lrt == ctx.types().T_prjlvalue : lrt == getSizeTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         jl_value_t *tti = jl_svecref(at, 0);
-        Value *ary;
         Type *largty;
         bool isboxed;
         if (jl_is_abstract_ref_type(tti)) {
             tti = (jl_value_t*)jl_voidpointer_type;
-            largty = T_size;
+            largty = getSizeTy(ctx.builder.getContext());
             isboxed = false;
         }
         else {
-            largty = _julia_struct_to_llvm(&ctx.emission_context, tti, unionall, &isboxed, llvmcall);
+            largty = _julia_struct_to_llvm(&ctx.emission_context, ctx.builder.getContext(), tti, &isboxed, llvmcall);
         }
+        Value *retval;
         if (isboxed) {
-            ary = boxed(ctx, argv[0]);
+            retval = boxed(ctx, argv[0]);
+            retval = emit_pointer_from_objref(ctx, emit_bitcast(ctx, retval, ctx.types().T_prjlvalue));
         }
         else {
-            ary = emit_unbox(ctx, largty, argv[0], tti);
+            retval = emit_unbox(ctx, largty, argv[0], tti);
+            retval = emit_inttoptr(ctx, retval, ctx.types().T_pjlvalue);
         }
+        // retval is now an untracked jl_value_t*
+        if (retboxed)
+            // WARNING: this addrspace cast necessarily implies that the value is rooted elsewhere!
+            retval = ctx.builder.CreateAddrSpaceCast(retval, ctx.types().T_prjlvalue);
         JL_GC_POP();
-        if (!retboxed) {
-            return mark_or_box_ccall_result(
-                    ctx,
-                    ctx.builder.CreatePtrToInt(
-                        emit_pointer_from_objref(ctx, emit_bitcast(ctx, ary, T_prjlvalue)),
-                        T_size),
-                    retboxed, rt, unionall, static_rt);
-        }
-        else {
-            return mark_or_box_ccall_result(
-                    ctx,
-                    ctx.builder.CreateAddrSpaceCast(
-                        emit_inttoptr(ctx, ary, T_pjlvalue),
-                        T_prjlvalue), // WARNING: this addrspace cast necessarily implies that the value is rooted elsewhere!
-                    retboxed, rt, unionall, static_rt);
-        }
+        return mark_or_box_ccall_result(ctx, retval, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_cpu_pause)) {
+        ++CCALL_STAT(jl_cpu_pause);
         // Keep in sync with the julia_threads.h version
-        assert(lrt == T_void);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
 #ifdef __MIC__
         // TODO
 #elif defined(_CPU_X86_64_) || defined(_CPU_X86_)  /* !__MIC__ */
-        static auto pauseinst = InlineAsm::get(FunctionType::get(T_void, false), "pause",
+        auto pauseinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "pause",
                                                "~{memory}", true);
         ctx.builder.CreateCall(pauseinst);
         JL_GC_POP();
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
 #elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-        static auto wfeinst = InlineAsm::get(FunctionType::get(T_void, false), "wfe",
+        auto wfeinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "wfe",
                                              "~{memory}", true);
         ctx.builder.CreateCall(wfeinst);
         JL_GC_POP();
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
 #else
         JL_GC_POP();
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
 #endif
     }
     else if (is_libjulia_func(jl_cpu_wake)) {
+        ++CCALL_STAT(jl_cpu_wake);
         // Keep in sync with the julia_threads.h version
-        assert(lrt == T_void);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
 #if JL_CPU_WAKE_NOOP == 1
         JL_GC_POP();
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
 #elif defined(_CPU_AARCH64_) || (defined(_CPU_ARM_) && __ARM_ARCH >= 7)
-        static auto sevinst = InlineAsm::get(FunctionType::get(T_void, false), "sev",
+        auto sevinst = InlineAsm::get(FunctionType::get(getVoidTy(ctx.builder.getContext()), false), "sev",
                                              "~{memory}", true);
         ctx.builder.CreateCall(sevinst);
         JL_GC_POP();
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
 #endif
     }
     else if (is_libjulia_func(jl_gc_safepoint)) {
-        assert(lrt == T_void);
+        ++CCALL_STAT(jl_gc_safepoint);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
         emit_signal_fence(ctx);
-        ctx.builder.CreateLoad(T_size, ctx.signalPage, true);
+        ctx.builder.CreateLoad(getSizeTy(ctx.builder.getContext()), get_current_signal_page(ctx), true);
         emit_signal_fence(ctx);
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
     }
-    else if (_is_libjulia_func((uintptr_t)ptls_getter, "jl_get_ptls_states")) {
-        assert(lrt == T_size);
+    else if (is_libjulia_func("jl_get_ptls_states")) {
+        ++CCALL_STAT(jl_get_ptls_states);
+        assert(lrt == getSizeTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx,
-            ctx.builder.CreatePtrToInt(ctx.ptlsStates, lrt),
+            ctx.builder.CreatePtrToInt(get_current_ptls(ctx), lrt),
             retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_threadid)) {
-        assert(lrt == T_int16);
+        ++CCALL_STAT(jl_threadid);
+        assert(lrt == getInt16Ty(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptls_i16 = emit_bitcast(ctx, ctx.ptlsStates, T_pint16);
-        const int tid_offset = offsetof(jl_tls_states_t, tid);
-        Value *ptid = ctx.builder.CreateInBoundsGEP(ptls_i16, ConstantInt::get(T_size, tid_offset / 2));
-        LoadInst *tid = ctx.builder.CreateAlignedLoad(ptid, Align(sizeof(int16_t)));
-        tbaa_decorate(tbaa_const, tid);
+        Value *ptask_i16 = emit_bitcast(ctx, get_current_task(ctx), getInt16PtrTy(ctx.builder.getContext()));
+        const int tid_offset = offsetof(jl_task_t, tid);
+        Value *ptid = ctx.builder.CreateInBoundsGEP(getInt16Ty(ctx.builder.getContext()), ptask_i16, ConstantInt::get(getSizeTy(ctx.builder.getContext()), tid_offset / sizeof(int16_t)));
+        LoadInst *tid = ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), ptid, Align(sizeof(int16_t)));
+        tbaa_decorate(ctx.tbaa().tbaa_gcframe, tid);
         return mark_or_box_ccall_result(ctx, tid, retboxed, rt, unionall, static_rt);
     }
+    else if (is_libjulia_func(jl_gc_disable_finalizers_internal)
+#ifdef NDEBUG
+             || is_libjulia_func(jl_gc_enable_finalizers_internal)
+#endif
+             ) {
+        JL_GC_POP();
+        Value *ptls_i32 = emit_bitcast(ctx, get_current_ptls(ctx), getInt32PtrTy(ctx.builder.getContext()));
+        const int finh_offset = offsetof(jl_tls_states_t, finalizers_inhibited);
+        Value *pfinh = ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), ptls_i32, ConstantInt::get(getSizeTy(ctx.builder.getContext()), finh_offset / 4));
+        LoadInst *finh = ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), pfinh, Align(sizeof(int32_t)));
+        Value *newval;
+        if (is_libjulia_func(jl_gc_disable_finalizers_internal)) {
+            newval = ctx.builder.CreateAdd(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1));
+        }
+        else {
+            newval = ctx.builder.CreateSelect(ctx.builder.CreateICmpEQ(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)),
+                                              ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0),
+                                              ctx.builder.CreateSub(finh, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1)));
+        }
+        ctx.builder.CreateStore(newval, pfinh);
+        return ghostValue(ctx, jl_nothing_type);
+    }
     else if (is_libjulia_func(jl_get_current_task)) {
-        assert(lrt == T_prjlvalue);
+        ++CCALL_STAT(jl_get_current_task);
+        assert(lrt == ctx.types().T_prjlvalue);
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_pprjlvalue);
-        const int ct_offset = offsetof(jl_tls_states_t, current_task);
-        Value *pct = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, ct_offset / sizeof(void*)));
-        LoadInst *ct = ctx.builder.CreateAlignedLoad(pct, Align(sizeof(void*)));
-        tbaa_decorate(tbaa_const, ct);
+        auto ct = track_pjlvalue(ctx, emit_bitcast(ctx, get_current_task(ctx), ctx.types().T_pjlvalue));
         return mark_or_box_ccall_result(ctx, ct, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_set_next_task)) {
-        assert(lrt == T_void);
+        ++CCALL_STAT(jl_set_next_task);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         JL_GC_POP();
-        Value *ptls_pv = emit_bitcast(ctx, ctx.ptlsStates, T_ppjlvalue);
+        Value *ptls_pv = emit_bitcast(ctx, get_current_ptls(ctx), ctx.types().T_ppjlvalue);
         const int nt_offset = offsetof(jl_tls_states_t, next_task);
-        Value *pnt = ctx.builder.CreateInBoundsGEP(ptls_pv, ConstantInt::get(T_size, nt_offset / sizeof(void*)));
+        Value *pnt = ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, ptls_pv, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nt_offset / sizeof(void*)));
         ctx.builder.CreateStore(emit_pointer_from_objref(ctx, boxed(ctx, argv[0])), pnt);
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
     }
     else if (is_libjulia_func(jl_sigatomic_begin)) {
-        assert(lrt == T_void);
+        ++CCALL_STAT(jl_sigatomic_begin);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
         Value *pdefer_sig = emit_defer_signal(ctx);
-        Value *defer_sig = ctx.builder.CreateLoad(pdefer_sig);
-        defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(T_sigatomic, 1));
+        Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig);
+        defer_sig = ctx.builder.CreateAdd(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 1));
         ctx.builder.CreateStore(defer_sig, pdefer_sig);
         emit_signal_fence(ctx);
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
     }
     else if (is_libjulia_func(jl_sigatomic_end)) {
-        assert(lrt == T_void);
+        ++CCALL_STAT(jl_sigatomic_end);
+        assert(lrt == getVoidTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 0);
         JL_GC_POP();
         ctx.builder.CreateCall(prepare_call(gcroot_flush_func));
         Value *pdefer_sig = emit_defer_signal(ctx);
-        Value *defer_sig = ctx.builder.CreateLoad(pdefer_sig);
+        Value *defer_sig = ctx.builder.CreateLoad(ctx.types().T_sigatomic, pdefer_sig);
         emit_signal_fence(ctx);
         error_unless(ctx,
-                ctx.builder.CreateICmpNE(defer_sig, ConstantInt::get(T_sigatomic, 0)),
+                ctx.builder.CreateICmpNE(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)),
                 "sigatomic_end called in non-sigatomic region");
         defer_sig = ctx.builder.CreateSub(
                 defer_sig,
-                ConstantInt::get(T_sigatomic, 1));
+                ConstantInt::get(ctx.types().T_sigatomic, 1));
         ctx.builder.CreateStore(defer_sig, pdefer_sig);
-        BasicBlock *checkBB = BasicBlock::Create(jl_LLVMContext, "check",
+        BasicBlock *checkBB = BasicBlock::Create(ctx.builder.getContext(), "check",
                                                  ctx.f);
-        BasicBlock *contBB = BasicBlock::Create(jl_LLVMContext, "cont");
+        BasicBlock *contBB = BasicBlock::Create(ctx.builder.getContext(), "cont");
         ctx.builder.CreateCondBr(
-                ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(T_sigatomic, 0)),
+                ctx.builder.CreateICmpEQ(defer_sig, ConstantInt::get(ctx.types().T_sigatomic, 0)),
                 checkBB, contBB);
         ctx.builder.SetInsertPoint(checkBB);
         ctx.builder.CreateLoad(
-                ctx.builder.CreateConstInBoundsGEP1_32(T_size, ctx.signalPage, -1),
+                getSizeTy(ctx.builder.getContext()),
+                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), get_current_signal_page(ctx), -1),
                 true);
         ctx.builder.CreateBr(contBB);
         ctx.f->getBasicBlockList().push_back(contBB);
         ctx.builder.SetInsertPoint(contBB);
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
     }
     else if (is_libjulia_func(jl_svec_len)) {
+        ++CCALL_STAT(jl_svec_len);
         assert(!isVa && !llvmcall && nccallargs == 1);
         const jl_cgval_t &svecv = argv[0];
         Value *len;
         if (svecv.constant && svecv.typ == (jl_value_t*)jl_simplevector_type) {
             // Check the type as well before we call
-            len = ConstantInt::get(T_size, jl_svec_len(svecv.constant));
+            len = ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_svec_len(svecv.constant));
         }
         else {
-            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), T_psize);
-            len = ctx.builder.CreateAlignedLoad(T_size, ptr, Align(sizeof(size_t)));
+            auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), getSizePtrTy(ctx.builder.getContext()));
+            len = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ptr, Align(sizeof(size_t)));
             // Only mark with TBAA if we are sure about the type.
             // This could otherwise be in a dead branch
             if (svecv.typ == (jl_value_t*)jl_simplevector_type)
-                tbaa_decorate(tbaa_const, cast<Instruction>(len));
-            MDBuilder MDB(jl_LLVMContext);
+                tbaa_decorate(ctx.tbaa().tbaa_const, cast<Instruction>(len));
+            MDBuilder MDB(ctx.builder.getContext());
             auto rng = MDB.createRange(
-                V_size0, ConstantInt::get(T_size, INTPTR_MAX / sizeof(void*) - 1));
+                Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX / sizeof(void*) - 1));
             cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
         }
         JL_GC_POP();
@@ -1565,48 +1685,51 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
     }
     else if (is_libjulia_func(jl_svec_isassigned) &&
              argv[1].typ == (jl_value_t*)jl_long_type) {
+        ++CCALL_STAT(jl_svec_isassigned);
         assert(!isVa && !llvmcall && nccallargs == 2);
         const jl_cgval_t &svecv = argv[0];
         const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, T_size, idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, 1));
-        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), T_pprjlvalue);
-        Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue,
+        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_long_type);
+        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
+        Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
                                                          decay_derived(ctx, ptr), idx);
-        LoadInst *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, slot_addr,
+        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr,
                                                        Align(sizeof(void*)));
         load->setAtomic(AtomicOrdering::Unordered);
         // Only mark with TBAA if we are sure about the type.
         // This could otherwise be in a dead branch
         if (svecv.typ == (jl_value_t*)jl_simplevector_type)
-            tbaa_decorate(tbaa_const, load);
-        Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, V_rnull), T_int8);
+            tbaa_decorate(ctx.tbaa().tbaa_const, load);
+        Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(ctx.types().T_prjlvalue)), getInt8Ty(ctx.builder.getContext()));
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_svec_ref) && argv[1].typ == (jl_value_t*)jl_long_type) {
-        assert(lrt == T_prjlvalue);
+        ++CCALL_STAT(jl_svec_ref);
+        assert(lrt == ctx.types().T_prjlvalue);
         assert(!isVa && !llvmcall && nccallargs == 2);
         const jl_cgval_t &svecv = argv[0];
         const jl_cgval_t &idxv = argv[1];
-        Value *idx = emit_unbox(ctx, T_size, idxv, (jl_value_t*)jl_long_type);
-        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, 1));
-        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), T_pprjlvalue);
-        Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue,
+        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_long_type);
+        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
+        auto ptr = emit_bitcast(ctx, boxed(ctx, svecv), ctx.types().T_pprjlvalue);
+        Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue,
                                                          decay_derived(ctx, ptr), idx);
-        LoadInst *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, slot_addr,
+        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr,
                                                        Align(sizeof(void*)));
         load->setAtomic(AtomicOrdering::Unordered);
         // Only mark with TBAA if we are sure about the type.
         // This could otherwise be in a dead branch
         if (svecv.typ == (jl_value_t*)jl_simplevector_type)
-            tbaa_decorate(tbaa_const, load);
+            tbaa_decorate(ctx.tbaa().tbaa_const, load);
         null_pointer_check(ctx, load);
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, load, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_array_isassigned) &&
              argv[1].typ == (jl_value_t*)jl_ulong_type) {
+        ++CCALL_STAT(jl_array_isassigned);
         assert(!isVa && !llvmcall && nccallargs == 2);
         jl_value_t *aryex = ccallarg(0);
         const jl_cgval_t &aryv = argv[0];
@@ -1617,89 +1740,132 @@ static jl_cgval_t emit_ccall(jl_codectx_t &ctx, jl_value_t **args, size_t nargs)
             bool ptrarray = !jl_stored_inline(ety);
             if (!ptrarray && !jl_type_hasptr(ety)) {
                 JL_GC_POP();
-                return mark_or_box_ccall_result(ctx, ConstantInt::get(T_int32, 1),
+                return mark_or_box_ccall_result(ctx, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 1),
                                                 false, rt, unionall, static_rt);
             }
             else if (!jl_has_free_typevars(ety)) {
-                Value *idx = emit_unbox(ctx, T_size, idxv, (jl_value_t*)jl_ulong_type);
-                Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), T_pprjlvalue);
+                Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idxv, (jl_value_t*)jl_ulong_type);
+                Value *arrayptr = emit_bitcast(ctx, emit_arrayptr(ctx, aryv, aryex), ctx.types().T_pprjlvalue);
                 if (!ptrarray) {
                     size_t elsz = jl_datatype_size(ety);
                     unsigned align = jl_datatype_align(ety);
                     size_t stride = LLT_ALIGN(elsz, align) / sizeof(jl_value_t*);
                     if (stride != 1)
-                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(T_size, stride));
-                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ((jl_datatype_t*)ety)->layout->first_ptr));
+                        idx = ctx.builder.CreateMul(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), stride));
+                    idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ((jl_datatype_t*)ety)->layout->first_ptr));
                 }
-                Value *slot_addr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, arrayptr, idx);
-                LoadInst *load = ctx.builder.CreateAlignedLoad(T_prjlvalue, slot_addr, Align(sizeof(void*)));
+                Value *slot_addr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, arrayptr, idx);
+                LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, slot_addr, Align(sizeof(void*)));
                 load->setAtomic(AtomicOrdering::Unordered);
-                tbaa_decorate(tbaa_ptrarraybuf, load);
-                Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, V_rnull), T_int32);
+                tbaa_decorate(ctx.tbaa().tbaa_ptrarraybuf, load);
+                Value *res = ctx.builder.CreateZExt(ctx.builder.CreateICmpNE(load, Constant::getNullValue(ctx.types().T_prjlvalue)), getInt32Ty(ctx.builder.getContext()));
                 JL_GC_POP();
                 return mark_or_box_ccall_result(ctx, res, retboxed, rt, unionall, static_rt);
             }
         }
     }
     else if (is_libjulia_func(jl_string_ptr)) {
-        assert(lrt == T_size);
+        ++CCALL_STAT(jl_string_ptr);
+        assert(lrt == getSizeTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                T_pprjlvalue);
+                                ctx.types().T_pprjlvalue);
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
-        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, obj, 1);
-        strp = ctx.builder.CreatePtrToInt(strp, T_size);
+        auto strp = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, obj, 1);
+        strp = ctx.builder.CreatePtrToInt(strp, getSizeTy(ctx.builder.getContext()));
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_symbol_name)) {
-        assert(lrt == T_size);
+        ++CCALL_STAT(jl_symbol_name);
+        assert(lrt == getSizeTy(ctx.builder.getContext()));
         assert(!isVa && !llvmcall && nccallargs == 1);
         auto obj = emit_bitcast(ctx, emit_pointer_from_objref(ctx, boxed(ctx, argv[0])),
-                                T_pprjlvalue);
+                                ctx.types().T_pprjlvalue);
         // The inbounds gep makes it more clear to LLVM that the resulting value is not
         // a null pointer.
         auto strp = ctx.builder.CreateConstInBoundsGEP1_32(
-            T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
-        strp = ctx.builder.CreatePtrToInt(strp, T_size);
+            ctx.types().T_prjlvalue, obj, (sizeof(jl_sym_t) + sizeof(void*) - 1) / sizeof(void*));
+        strp = ctx.builder.CreatePtrToInt(strp, getSizeTy(ctx.builder.getContext()));
         JL_GC_POP();
         return mark_or_box_ccall_result(ctx, strp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(memcpy) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
+        ++CCALL_STAT(memcpy);
         const jl_cgval_t &dst = argv[0];
         const jl_cgval_t &src = argv[1];
         const jl_cgval_t &n = argv[2];
-        Value *destp = emit_unbox(ctx, T_size, dst, (jl_value_t*)jl_voidpointer_type);
+        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
 
         ctx.builder.CreateMemCpy(
-                emit_inttoptr(ctx, destp, T_pint8),
-#if JL_LLVM_VERSION >= 100000
+                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(1),
-#else
-                1,
-#endif
                 emit_inttoptr(ctx,
-                    emit_unbox(ctx, T_size, src, (jl_value_t*)jl_voidpointer_type),
-                    T_pint8),
-#if JL_LLVM_VERSION >= 100000
+                    emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), src, (jl_value_t*)jl_voidpointer_type),
+                    getInt8PtrTy(ctx.builder.getContext())),
                 MaybeAlign(0),
-#else
-                0,
-#endif
-                emit_unbox(ctx, T_size, n, (jl_value_t*)jl_ulong_type),
+                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+                false);
+        JL_GC_POP();
+        return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
+            mark_or_box_ccall_result(ctx, destp, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(memset) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
+        ++CCALL_STAT(memset);
+        const jl_cgval_t &dst = argv[0];
+        const jl_cgval_t &val = argv[1];
+        const jl_cgval_t &n = argv[2];
+        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+        Value *val32 = emit_unbox(ctx, getInt32Ty(ctx.builder.getContext()), val, (jl_value_t*)jl_uint32_type);
+        Value *val8 = ctx.builder.CreateTrunc(val32, getInt8Ty(ctx.builder.getContext()), "memset_val");
+        ctx.builder.CreateMemSet(
+            emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+            val8,
+            emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
+            MaybeAlign(1)
+        );
+        JL_GC_POP();
+        return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
+            mark_or_box_ccall_result(ctx, destp, retboxed, rt, unionall, static_rt);
+    }
+    else if (is_libjulia_func(memmove) && (rt == (jl_value_t*)jl_nothing_type || jl_is_cpointer_type(rt))) {
+        ++CCALL_STAT(memmove);
+        const jl_cgval_t &dst = argv[0];
+        const jl_cgval_t &src = argv[1];
+        const jl_cgval_t &n = argv[2];
+        Value *destp = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), dst, (jl_value_t*)jl_voidpointer_type);
+
+        ctx.builder.CreateMemMove(
+                emit_inttoptr(ctx, destp, getInt8PtrTy(ctx.builder.getContext())),
+                MaybeAlign(0),
+                emit_inttoptr(ctx,
+                    emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), src, (jl_value_t*)jl_voidpointer_type),
+                    getInt8PtrTy(ctx.builder.getContext())),
+                MaybeAlign(0),
+                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), n, (jl_value_t*)jl_ulong_type),
                 false);
         JL_GC_POP();
-        return rt == (jl_value_t*)jl_nothing_type ? ghostValue(jl_nothing_type) :
+        return rt == (jl_value_t*)jl_nothing_type ? ghostValue(ctx, jl_nothing_type) :
             mark_or_box_ccall_result(ctx, destp, retboxed, rt, unionall, static_rt);
     }
     else if (is_libjulia_func(jl_object_id) && nccallargs == 1 &&
             rt == (jl_value_t*)jl_ulong_type) {
+        ++CCALL_STAT(jl_object_id);
         jl_cgval_t val = argv[0];
-        if (!val.isboxed) {
+        if (val.typ == (jl_value_t*)jl_symbol_type) {
+            JL_GC_POP();
+            const int hash_offset = offsetof(jl_sym_t, hash);
+            Value *ph1 = emit_bitcast(ctx, decay_derived(ctx, boxed(ctx, val)), getSizePtrTy(ctx.builder.getContext()));
+            Value *ph2 = ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), ph1, ConstantInt::get(getSizeTy(ctx.builder.getContext()), hash_offset / sizeof(size_t)));
+            LoadInst *hashval = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ph2, Align(sizeof(size_t)));
+            tbaa_decorate(ctx.tbaa().tbaa_const, hashval);
+            return mark_or_box_ccall_result(ctx, hashval, retboxed, rt, unionall, static_rt);
+        }
+        else if (!val.isboxed) {
             // If the value is not boxed, try to compute the object id without
             // reboxing it.
-            auto T_pint8_derived = PointerType::get(T_int8, AddressSpace::Derived);
+            auto T_pint8_derived = PointerType::get(getInt8Ty(ctx.builder.getContext()), AddressSpace::Derived);
             if (!val.isghost && !val.ispointer())
                 val = value_to_pointer(ctx, val);
             Value *args[] = {
@@ -1732,12 +1898,13 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         SmallVector<Value*, 16> &gc_uses,
         bool static_rt) const
 {
+    ++EmittedCCalls;
     if (!err_msg.empty()) {
         emit_error(ctx, err_msg);
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
 
-    FunctionType *functype = this->functype();
+    FunctionType *functype = this->functype(ctx.builder.getContext());
 
     Value **argvals = (Value**) alloca((nccallargs + sret) * sizeof(Value*));
     for (size_t ai = 0; ai < nccallargs; ai++) {
@@ -1781,13 +1948,15 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         }
 
         if (isa<UndefValue>(v)) {
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         assert(v->getType() == pargty);
         argvals[ai + sret] = v;
     }
 
     Value *result = NULL;
+    //This is only needed if !retboxed && srt && !jlretboxed
+    Type *sretty = nullptr;
     // First, if the ABI requires us to provide the space for the return
     // argument, allocate the box and store that as the first argument type
     bool sretboxed = false;
@@ -1795,6 +1964,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
         assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid");
         if (jl_is_pointerfree(rt)) {
             result = emit_static_alloca(ctx, lrt);
+            sretty = lrt;
             argvals[0] = ctx.builder.CreateBitCast(result, fargt_sig.at(0));
         }
         else {
@@ -1804,6 +1974,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             assert(jl_datatype_size(rt) > 0 && "sret shouldn't be a singleton instance");
             result = emit_allocobj(ctx, jl_datatype_size(rt),
                                    literal_pointer_val(ctx, (jl_value_t*)rt));
+            sretty = ctx.types().T_jlvalue;
             sretboxed = true;
             gc_uses.push_back(result);
             argvals[0] = ctx.builder.CreateBitCast(emit_pointer_from_objref(ctx, result), fargt_sig.at(0));
@@ -1815,56 +1986,79 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     // optimize the global pointer load in the common case
     Value *llvmf;
     if (llvmcall) {
+        ++EmittedLLVMCalls;
         if (symarg.jl_ptr != NULL) {
             emit_error(ctx, "llvmcall doesn't support dynamic pointers");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         else if (symarg.fptr != NULL) {
             emit_error(ctx, "llvmcall doesn't support static pointers");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         else if (symarg.f_lib != NULL) {
             emit_error(ctx, "llvmcall doesn't support dynamic libraries");
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         else {
             assert(symarg.f_name != NULL);
-            const char* f_name = symarg.f_name;
-            bool f_extern = (strncmp(f_name, "extern ", 7) == 0);
-            if (f_extern)
-                f_name += 7;
-            llvmf = jl_Module->getOrInsertFunction(f_name, functype)
-#if JL_LLVM_VERSION >= 90000
-                .getCallee();
-#else
-                ;
-#endif
-            if (!f_extern && (!isa<Function>(llvmf) ||
-                              cast<Function>(llvmf)->getIntrinsicID() ==
-                                      Intrinsic::not_intrinsic)) {
+            StringRef f_name(symarg.f_name);
+            bool f_extern = f_name.consume_front("extern ");
+            llvmf = NULL;
+            if (f_extern) {
+                llvmf = jl_Module->getOrInsertFunction(f_name, functype).getCallee();
+                if (!isa<Function>(llvmf) || cast<Function>(llvmf)->isIntrinsic() || cast<Function>(llvmf)->getFunctionType() != functype)
+                    llvmf = NULL;
+            }
+            else if (f_name.startswith("llvm.")) {
+                // compute and verify auto-mangling for intrinsic name
+                auto ID = Function::lookupIntrinsicID(f_name);
+                if (ID != Intrinsic::not_intrinsic) {
+                    // Accumulate an array of overloaded types for the given intrinsic
+                    // and compute the new name mangling schema
+                    SmallVector<Type*, 4> overloadTys;
+                    SmallVector<Intrinsic::IITDescriptor, 8> Table;
+                    getIntrinsicInfoTableEntries(ID, Table);
+                    ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
+                    auto res = Intrinsic::matchIntrinsicSignature(functype, TableRef, overloadTys);
+                    if (res == Intrinsic::MatchIntrinsicTypes_Match) {
+                        bool matchvararg = !Intrinsic::matchIntrinsicVarArg(functype->isVarArg(), TableRef);
+                        if (matchvararg) {
+                            Function *intrinsic = Intrinsic::getDeclaration(jl_Module, ID, overloadTys);
+                            assert(intrinsic->getFunctionType() == functype);
+                            if (intrinsic->getName() == f_name || Intrinsic::getBaseName(ID) == f_name)
+                                llvmf = intrinsic;
+                        }
+                    }
+                }
+            }
+            if (llvmf == NULL) {
                 emit_error(ctx, "llvmcall only supports intrinsic calls");
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
     }
     else if (symarg.jl_ptr != NULL) {
+        ++LiteralCCalls;
         null_pointer_check(ctx, symarg.jl_ptr);
         Type *funcptype = PointerType::get(functype, 0);
         llvmf = emit_inttoptr(ctx, symarg.jl_ptr, funcptype);
     }
     else if (symarg.fptr != NULL) {
+        ++LiteralCCalls;
         Type *funcptype = PointerType::get(functype, 0);
         llvmf = literal_static_pointer_val((void*)(uintptr_t)symarg.fptr, funcptype);
-        if (imaging_mode)
+        if (ctx.emission_context.imaging)
             jl_printf(JL_STDERR,"WARNING: literal address used in ccall for %s; code cannot be statically compiled\n", symarg.f_name);
     }
     else {
         assert(symarg.f_name != NULL);
         PointerType *funcptype = PointerType::get(functype, 0);
         if (symarg.lib_expr) {
+            ++DeferredCCallLookups;
             llvmf = runtime_sym_lookup(ctx, funcptype, NULL, symarg.lib_expr, symarg.f_name, ctx.f);
         }
-        else if (imaging_mode) {
+        else if (ctx.emission_context.imaging) {
+            ++DeferredCCallLookups;
             // vararg requires musttail,
             // but musttail is incompatible with noreturn.
             if (functype->isVarArg())
@@ -1876,10 +2070,12 @@ jl_cgval_t function_sig_t::emit_a_ccall(
             void *symaddr;
             void *libsym = jl_get_library_(symarg.f_lib, 0);
             if (!libsym || !jl_dlsym(libsym, symarg.f_name, &symaddr, 0)) {
+                ++DeferredCCallLookups;
                 // either the library or the symbol could not be found, place a runtime
                 // lookup here instead.
                 llvmf = runtime_sym_lookup(ctx, funcptype, symarg.f_lib, NULL, symarg.f_name, ctx.f);
             } else {
+                ++LiteralCCalls;
                 // since we aren't saving this code, there's no sense in
                 // putting anything complicated here: just JIT the function address
                 llvmf = literal_static_pointer_val(symaddr, funcptype);
@@ -1897,14 +2093,14 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     if (cc != CallingConv::C)
         ((CallInst*)ret)->setCallingConv(cc);
     if (!sret)
-        result = ret;
+        result = ret; // no need to update sretty here because we know !sret
     if (0) { // Enable this to turn on SSPREQ (-fstack-protector) on the function containing this ccall
         ctx.f->addFnAttr(Attribute::StackProtectReq);
     }
 
     if (rt == jl_bottom_type) {
         CreateTrap(ctx.builder);
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
 
     // Finally we need to box the result into julia type
@@ -1915,23 +2111,25 @@ jl_cgval_t function_sig_t::emit_a_ccall(
     if (retboxed) {
         assert(!sret);
         jlretboxed = true;
+        ++RetBoxedCCalls;
     }
     else if (sret) {
         jlretboxed = sretboxed;
         if (!jlretboxed) {
             // something alloca'd above is SSA
             if (static_rt)
-                return mark_julia_slot(result, rt, NULL, tbaa_stack);
-            result = ctx.builder.CreateLoad(result);
+                return mark_julia_slot(result, rt, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
+            ++SRetCCalls;
+            result = ctx.builder.CreateLoad(sretty, result);
         }
     }
     else {
         Type *jlrt = julia_type_to_llvm(ctx, rt, &jlretboxed); // compute the real "julian" return type and compute whether it is boxed
         if (type_is_ghost(jlrt)) {
-            return ghostValue(rt);
+            return ghostValue(ctx, rt);
         }
         else if (jl_is_datatype(rt) && jl_is_datatype_singleton((jl_datatype_t*)rt)) {
-            return mark_julia_const(((jl_datatype_t*)rt)->instance);
+            return mark_julia_const(ctx, ((jl_datatype_t*)rt)->instance);
         }
         else if (jlretboxed && !retboxed) {
             assert(jl_is_datatype(rt));
@@ -1940,10 +2138,10 @@ jl_cgval_t function_sig_t::emit_a_ccall(
                 size_t rtsz = jl_datatype_size(rt);
                 assert(rtsz > 0);
                 Value *strct = emit_allocobj(ctx, rtsz, runtime_bt);
-                MDNode *tbaa = jl_is_mutable(rt) ? tbaa_mutab : tbaa_immut;
+                MDNode *tbaa = jl_is_mutable(rt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut;
                 int boxalign = julia_alignment(rt);
                 // copy the data from the return value to the new struct
-                const DataLayout &DL = jl_data_layout;
+                const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout();
                 auto resultTy = result->getType();
                 if (DL.getTypeStoreSize(resultTy) > rtsz) {
                     // ARM and AArch64 can use a LLVM type larger than the julia type.
@@ -1968,3 +2166,7 @@ jl_cgval_t function_sig_t::emit_a_ccall(
 
     return mark_or_box_ccall_result(ctx, result, jlretboxed, rt, unionall_env, static_rt);
 }
+
+// Reset us back to codegen debug type
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "julia_irgen_codegen"
diff --git a/src/ccalltest.c b/src/ccalltest.c
index 23137adf7b9a80..64a6a3aabfb0b9 100644
--- a/src/ccalltest.c
+++ b/src/ccalltest.c
@@ -9,6 +9,18 @@
 #include "../src/support/platform.h"
 #include "../src/support/dtypes.h"
 
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_)
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
+
 #ifdef _P64
 #define jint int64_t
 #define PRIjint PRId64
@@ -28,12 +40,8 @@ int c_int = 0;
 int xs[300] = {0,0,0,1,0};
 
 //int testUcharX(unsigned char x);
-#ifdef _COMPILER_MICROSOFT_
-int __declspec(noinline)
-#else
 int __attribute__((noinline))
-#endif
-JL_DLLEXPORT testUcharX(unsigned char x) {
+DLLEXPORT testUcharX(unsigned char x) {
     return xs[x];
 }
 
@@ -47,41 +55,41 @@ typedef struct {
     jint imag;
 } complex_t;
 
-JL_DLLEXPORT complex_t ctest(complex_t a) {
+DLLEXPORT complex_t ctest(complex_t a) {
     a.real += 1;
     a.imag -= 2;
     return a;
 }
 
-JL_DLLEXPORT complex double cgtest(complex double a) {
+DLLEXPORT complex double cgtest(complex double a) {
     //Unpack a ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(a), cimag(a));
     a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex double *cgptest(complex double *a) {
+DLLEXPORT complex double *cgptest(complex double *a) {
     //Unpack a ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(*a), cimag(*a));
     *a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex float cftest(complex float a) {
+DLLEXPORT complex float cftest(complex float a) {
     //Unpack a ComplexPair{Float32} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(a), cimag(a));
     a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex float *cfptest(complex float *a) {
+DLLEXPORT complex float *cfptest(complex float *a) {
     //Unpack a ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i\n", creal(*a), cimag(*a));
     *a += 1 - (2.0*I);
     return a;
 }
 
-JL_DLLEXPORT complex_t *cptest(complex_t *a) {
+DLLEXPORT complex_t *cptest(complex_t *a) {
     //Unpack a ComplexPair{Int} struct pointer
     if (verbose) fprintf(stderr,"%" PRIjint " + %" PRIjint " i\n", a->real, a->imag);
     a->real += 1;
@@ -89,7 +97,7 @@ JL_DLLEXPORT complex_t *cptest(complex_t *a) {
     return a;
 }
 
-JL_DLLEXPORT complex_t *cptest_static(complex_t *a) {
+DLLEXPORT complex_t *cptest_static(complex_t *a) {
     if (verbose) fprintf(stderr,"%" PRIjint " + %" PRIjint " i\n", a->real, a->imag);
     complex_t *b = (complex_t*)malloc_s(sizeof(complex_t));
     b->real = a->real;
@@ -331,7 +339,7 @@ typedef struct {
 #endif // _COMPILER_INTEL_
 
 
-JL_DLLEXPORT struct1 test_1(struct1 a, float b) {
+DLLEXPORT struct1 test_1(struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", a.x, a.y, b);
     a.x += b * 1;
@@ -339,7 +347,7 @@ JL_DLLEXPORT struct1 test_1(struct1 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct1 test_1long_a(jint x1, jint x2, jint x3, struct1 a, float b) {
+DLLEXPORT struct1 test_1long_a(jint x1, jint x2, jint x3, struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %" PRIjint ", %" PRIjint ") & %g + %g i & %g\n", x1, x2, x3, a.x, a.y, b);
     a.x += b + x1 + x2 + x3;
@@ -347,7 +355,7 @@ JL_DLLEXPORT struct1 test_1long_a(jint x1, jint x2, jint x3, struct1 a, float b)
     return a;
 }
 
-JL_DLLEXPORT struct1 test_1long_b(jint x1, double x2, jint x3, struct1 a, float b) {
+DLLEXPORT struct1 test_1long_b(jint x1, double x2, jint x3, struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %g, %" PRIjint ") & %g + %g i & %g\n", x1, x2, x3, a.x, a.y, b);
     a.x += b + x1 + x2 + x3;
@@ -355,7 +363,7 @@ JL_DLLEXPORT struct1 test_1long_b(jint x1, double x2, jint x3, struct1 a, float
     return a;
 }
 
-JL_DLLEXPORT struct1 test_1long_c(jint x1, double x2, jint x3, jint x4, struct1 a, float b) {
+DLLEXPORT struct1 test_1long_c(jint x1, double x2, jint x3, jint x4, struct1 a, float b) {
     //Unpack a "small" struct { float, double }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %g, %" PRIjint ", %" PRIjint ") & %g + %g i & %g\n", x1, x2, x3, x4, a.x, a.y, b);
     a.x += b + x1 + x2 + x3 + x4;
@@ -363,7 +371,7 @@ JL_DLLEXPORT struct1 test_1long_c(jint x1, double x2, jint x3, jint x4, struct1
     return a;
 }
 
-JL_DLLEXPORT struct2a test_2a(struct2a a, int32_t b) {
+DLLEXPORT struct2a test_2a(struct2a a, int32_t b) {
     //Unpack a ComplexPair{Int32} struct
     if (verbose) fprintf(stderr,"%" PRId32 " + %" PRId32 " i & %" PRId32 "\n", a.x.x, a.y.y, b);
     a.x.x += b*1;
@@ -371,7 +379,7 @@ JL_DLLEXPORT struct2a test_2a(struct2a a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct2b test_2b(struct2b a, int32_t b) {
+DLLEXPORT struct2b test_2b(struct2b a, int32_t b) {
     //Unpack a ComplexPair{Int32} struct
     if (verbose) fprintf(stderr,"%" PRId32 " + %" PRId32 " i & %" PRId32 "\n", a.x, a.y, b);
     a.x += b*1;
@@ -379,7 +387,7 @@ JL_DLLEXPORT struct2b test_2b(struct2b a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct3a test_3a(struct3a a, int64_t b) {
+DLLEXPORT struct3a test_3a(struct3a a, int64_t b) {
     //Unpack a ComplexPair{Int64} struct
     if (verbose) fprintf(stderr,"%" PRId64 " + %" PRId64 " i & %" PRId64 "\n", a.x.x, a.y.y, b);
     a.x.x += b*1;
@@ -387,7 +395,7 @@ JL_DLLEXPORT struct3a test_3a(struct3a a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct3b test_3b(struct3b a, int64_t b) {
+DLLEXPORT struct3b test_3b(struct3b a, int64_t b) {
     //Unpack a ComplexPair{Int64} struct
     if (verbose) fprintf(stderr,"%" PRId64 " + %" PRId64 " i & %" PRId64 "\n", a.x, a.y, b);
     a.x += b*1;
@@ -395,7 +403,7 @@ JL_DLLEXPORT struct3b test_3b(struct3b a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct4 test_4(struct4 a, int32_t b) {
+DLLEXPORT struct4 test_4(struct4 a, int32_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId32 ",%" PRId32 " & %" PRId32 "\n", a.x, a.y, a.z, b);
     a.x += b*1;
     a.y -= b*2;
@@ -403,7 +411,7 @@ JL_DLLEXPORT struct4 test_4(struct4 a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct5 test_5(struct5 a, int32_t b) {
+DLLEXPORT struct5 test_5(struct5 a, int32_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId32 ",%" PRId32 ",%" PRId32 " & %" PRId32 "\n", a.x, a.y, a.z, a.a, b);
     a.x += b*1;
     a.y -= b*2;
@@ -413,7 +421,7 @@ JL_DLLEXPORT struct5 test_5(struct5 a, int32_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct6 test_6(struct6 a, int64_t b) {
+DLLEXPORT struct6 test_6(struct6 a, int64_t b) {
     if (verbose) fprintf(stderr,"%" PRId64 ",%" PRId64 ",%" PRId64 " & %" PRId64 "\n", a.x, a.y, a.z, b);
     a.x += b*1;
     a.y -= b*2;
@@ -421,28 +429,28 @@ JL_DLLEXPORT struct6 test_6(struct6 a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct7 test_7(struct7 a, int8_t b) {
+DLLEXPORT struct7 test_7(struct7 a, int8_t b) {
     if (verbose) fprintf(stderr,"%" PRId64 ",%" PRId8 " & %" PRId8 "\n", a.x, a.y, b);
     a.x += b*1;
     a.y -= b*2;
     return a;
 }
 
-JL_DLLEXPORT struct8 test_8(struct8 a, int8_t b) {
+DLLEXPORT struct8 test_8(struct8 a, int8_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId8 " & %" PRId8 "\n", a.x, a.y, b);
     a.x += b*1;
     a.y -= b*2;
     return a;
 }
 
-JL_DLLEXPORT struct9 test_9(struct9 a, int16_t b) {
+DLLEXPORT struct9 test_9(struct9 a, int16_t b) {
     if (verbose) fprintf(stderr,"%" PRId32 ",%" PRId16 " & %" PRId16 "\n", a.x, a.y, b);
     a.x += b*1;
     a.y -= b*2;
     return a;
 }
 
-JL_DLLEXPORT struct10 test_10(struct10 a, int8_t b) {
+DLLEXPORT struct10 test_10(struct10 a, int8_t b) {
     if (verbose) fprintf(stderr,"%" PRId8 ",%" PRId8 ",%" PRId8 ",%" PRId8 " & %" PRId8 "\n", a.x, a.y, a.z, a.a, b);
     a.x += b*1;
     a.y -= b*2;
@@ -452,14 +460,14 @@ JL_DLLEXPORT struct10 test_10(struct10 a, int8_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct11 test_11(struct11 a, float b) {
+DLLEXPORT struct11 test_11(struct11 a, float b) {
     //Unpack a nested ComplexPair{Float32} struct
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", creal(a.x), cimag(a.x), b);
     a.x += b*1 - (b*2.0*I);
     return a;
 }
 
-JL_DLLEXPORT struct12 test_12(struct12 a, float b) {
+DLLEXPORT struct12 test_12(struct12 a, float b) {
     //Unpack two nested ComplexPair{Float32} structs
     if (verbose) fprintf(stderr,"%g + %g i & %g + %g i & %g\n",
                          creal(a.x), cimag(a.x), creal(a.y), cimag(a.y), b);
@@ -468,14 +476,14 @@ JL_DLLEXPORT struct12 test_12(struct12 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct13 test_13(struct13 a, double b) {
+DLLEXPORT struct13 test_13(struct13 a, double b) {
     //Unpack a nested ComplexPair{Float64} struct
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", creal(a.x), cimag(a.x), b);
     a.x += b*1 - (b*2.0*I);
     return a;
 }
 
-JL_DLLEXPORT struct14 test_14(struct14 a, float b) {
+DLLEXPORT struct14 test_14(struct14 a, float b) {
     //The C equivalent of a  ComplexPair{Float32} struct (but without special complex ABI)
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", a.x, a.y, b);
     a.x += b*1;
@@ -483,7 +491,7 @@ JL_DLLEXPORT struct14 test_14(struct14 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct15 test_15(struct15 a, double b) {
+DLLEXPORT struct15 test_15(struct15 a, double b) {
     //The C equivalent of a  ComplexPair{Float64} struct (but without special complex ABI)
     if (verbose) fprintf(stderr,"%g + %g i & %g\n", a.x, a.y, b);
     a.x += b*1;
@@ -491,7 +499,7 @@ JL_DLLEXPORT struct15 test_15(struct15 a, double b) {
     return a;
 }
 
-JL_DLLEXPORT struct16 test_16(struct16 a, float b) {
+DLLEXPORT struct16 test_16(struct16 a, float b) {
     //Unpack a struct with non-obvious packing requirements
     if (verbose) fprintf(stderr,"%g %g %g %g %g %g & %g\n", a.x, a.y, a.z, a.a, a.b, a.c, b);
     a.x += b*1;
@@ -503,7 +511,7 @@ JL_DLLEXPORT struct16 test_16(struct16 a, float b) {
     return a;
 }
 
-JL_DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
+DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
     //Unpack a struct with non-obvious packing requirements
     if (verbose) fprintf(stderr,"%d %d & %d\n", (int)a.a, (int)a.b, (int)b);
     a.a += b*1;
@@ -511,7 +519,7 @@ JL_DLLEXPORT struct17 test_17(struct17 a, int8_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
+DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
     //Unpack a struct with non-obvious packing requirements
     if (verbose) fprintf(stderr,"%d %d %d & %d\n",
                          (int)a.a, (int)a.b, (int)a.c, (int)b);
@@ -526,7 +534,7 @@ JL_DLLEXPORT struct18 test_18(struct18 a, int8_t b) {
 // However, it happens to have the same calling convention with `[2 x i64]`
 // when used as first argument or return value.
 #define int128_t struct3b
-JL_DLLEXPORT int128_t test_128(int128_t a, int64_t b) {
+DLLEXPORT int128_t test_128(int128_t a, int64_t b) {
     //Unpack a Int128
     if (verbose) fprintf(stderr,"0x%016" PRIx64 "%016" PRIx64 " & %" PRId64 "\n", a.y, a.x, b);
     a.x += b*1;
@@ -535,7 +543,7 @@ JL_DLLEXPORT int128_t test_128(int128_t a, int64_t b) {
     return a;
 }
 
-JL_DLLEXPORT struct_big test_big(struct_big a) {
+DLLEXPORT struct_big test_big(struct_big a) {
     //Unpack a "big" struct { int, int, char }
     if (verbose) fprintf(stderr,"%" PRIjint " %" PRIjint " %c\n", a.x, a.y, a.z);
     a.x += 1;
@@ -544,7 +552,7 @@ JL_DLLEXPORT struct_big test_big(struct_big a) {
     return a;
 }
 
-JL_DLLEXPORT struct_big test_big_long(jint x1, jint x2, jint x3, struct_big a) {
+DLLEXPORT struct_big test_big_long(jint x1, jint x2, jint x3, struct_big a) {
     //Unpack a "big" struct { int, int, char }
     if (verbose) fprintf(stderr,"(%" PRIjint ", %" PRIjint ", %" PRIjint ") %" PRIjint " %" PRIjint " %c\n", x1, x2, x3, a.x, a.y, a.z);
     a.x += 1 + x1 + x2 + x3;
@@ -554,7 +562,7 @@ JL_DLLEXPORT struct_big test_big_long(jint x1, jint x2, jint x3, struct_big a) {
 }
 
 #define test_huge(suffix, reg) \
-JL_DLLEXPORT struct_huge##suffix test_huge##suffix(char a, struct_huge##suffix b, char c) { \
+DLLEXPORT struct_huge##suffix test_huge##suffix(char a, struct_huge##suffix b, char c) { \
     if (verbose) fprintf(stderr,"%c-%c\n", a, c); \
     b.reg *= 39; \
     return b; \
@@ -577,7 +585,7 @@ test_huge(5b, r1);
 
 // Enough arguments for architectures that uses registers for integer or
 // floating point arguments to spill.
-JL_DLLEXPORT int test_long_args_intp(int *a1, int *a2, int *a3, int *a4,
+DLLEXPORT int test_long_args_intp(int *a1, int *a2, int *a3, int *a4,
                                      int *a5, int *a6, int *a7, int *a8,
                                      int *a9, int *a10, int *a11, int *a12,
                                      int *a13, int *a14)
@@ -586,7 +594,7 @@ JL_DLLEXPORT int test_long_args_intp(int *a1, int *a2, int *a3, int *a4,
             *a11 + *a12 + *a13 + *a14);
 }
 
-JL_DLLEXPORT int test_long_args_int(int a1, int a2, int a3, int a4,
+DLLEXPORT int test_long_args_int(int a1, int a2, int a3, int a4,
                                     int a5, int a6, int a7, int a8,
                                     int a9, int a10, int a11, int a12,
                                     int a13, int a14)
@@ -595,7 +603,7 @@ JL_DLLEXPORT int test_long_args_int(int a1, int a2, int a3, int a4,
             a11 + a12 + a13 + a14);
 }
 
-JL_DLLEXPORT float test_long_args_float(float a1, float a2, float a3,
+DLLEXPORT float test_long_args_float(float a1, float a2, float a3,
                                         float a4, float a5, float a6,
                                         float a7, float a8, float a9,
                                         float a10, float a11, float a12,
@@ -605,7 +613,7 @@ JL_DLLEXPORT float test_long_args_float(float a1, float a2, float a3,
             a11 + a12 + a13 + a14);
 }
 
-JL_DLLEXPORT double test_long_args_double(double a1, double a2, double a3,
+DLLEXPORT double test_long_args_double(double a1, double a2, double a3,
                                           double a4, double a5, double a6,
                                           double a7, double a8, double a9,
                                           double a10, double a11, double a12,
@@ -620,59 +628,59 @@ typedef struct {
     int *b;
 } struct_spill_pint;
 
-JL_DLLEXPORT int test_spill_int1(int *v1, struct_spill_pint s)
+DLLEXPORT int test_spill_int1(int *v1, struct_spill_pint s)
 {
     return *v1 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int2(int *v1, int *v2, struct_spill_pint s)
+DLLEXPORT int test_spill_int2(int *v1, int *v2, struct_spill_pint s)
 {
     return *v1 + *v2 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int3(int *v1, int *v2, int *v3, struct_spill_pint s)
+DLLEXPORT int test_spill_int3(int *v1, int *v2, int *v3, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int4(int *v1, int *v2, int *v3, int *v4,
+DLLEXPORT int test_spill_int4(int *v1, int *v2, int *v3, int *v4,
                                  struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int5(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int5(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int6(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int6(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int7(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int7(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, int *v7, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *v7 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int8(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int8(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, int *v7, int *v8, struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *v7 + *v8 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int9(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int9(int *v1, int *v2, int *v3, int *v4, int *v5,
                                  int *v6, int *v7, int *v8, int *v9,
                                  struct_spill_pint s)
 {
     return *v1 + *v2 + *v3 + *v4 + *v5 + *v6 + *v7 + *v8 + *v9 + *s.a + *s.b;
 }
 
-JL_DLLEXPORT int test_spill_int10(int *v1, int *v2, int *v3, int *v4, int *v5,
+DLLEXPORT int test_spill_int10(int *v1, int *v2, int *v3, int *v4, int *v5,
                                   int *v6, int *v7, int *v8, int *v9, int *v10,
                                   struct_spill_pint s)
 {
@@ -685,79 +693,79 @@ typedef struct {
     float b;
 } struct_spill_float;
 
-JL_DLLEXPORT float test_spill_float1(float v1, struct_spill_float s)
+DLLEXPORT float test_spill_float1(float v1, struct_spill_float s)
 {
     return v1 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float2(float v1, float v2, struct_spill_float s)
+DLLEXPORT float test_spill_float2(float v1, float v2, struct_spill_float s)
 {
     return v1 + v2 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float3(float v1, float v2, float v3,
+DLLEXPORT float test_spill_float3(float v1, float v2, float v3,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float4(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float4(float v1, float v2, float v3, float v4,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float5(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float5(float v1, float v2, float v3, float v4,
                                      float v5, struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float6(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float6(float v1, float v2, float v3, float v4,
                                      float v5, float v6, struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float7(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float7(float v1, float v2, float v3, float v4,
                                      float v5, float v6, float v7,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + v7 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float8(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float8(float v1, float v2, float v3, float v4,
                                      float v5, float v6, float v7, float v8,
                                      struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float9(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float9(float v1, float v2, float v3, float v4,
                                      float v5, float v6, float v7, float v8,
                                      float v9, struct_spill_float s)
 {
     return v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9 + s.a + s.b;
 }
 
-JL_DLLEXPORT float test_spill_float10(float v1, float v2, float v3, float v4,
+DLLEXPORT float test_spill_float10(float v1, float v2, float v3, float v4,
                                       float v5, float v6, float v7, float v8,
                                       float v9, float v10, struct_spill_float s)
 {
     return (v1 + v2 + v3 + v4 + v5 + v6 + v7 + v8 + v9 + v10 + s.a + s.b);
 }
 
-JL_DLLEXPORT int get_c_int(void)
+DLLEXPORT int get_c_int(void)
 {
     return c_int;
 }
 
-JL_DLLEXPORT void set_c_int(int i)
+DLLEXPORT void set_c_int(int i)
 {
     c_int = i;
 }
 
-JL_DLLEXPORT void finalizer_cptr(void* v)
+DLLEXPORT void finalizer_cptr(void* v)
 {
     set_c_int(-1);
 }
@@ -766,7 +774,7 @@ JL_DLLEXPORT void finalizer_cptr(void* v)
 //////////////////////////////////
 // Turn off verbose for automated tests, leave on for debugging
 
-JL_DLLEXPORT void set_verbose(int level) {
+DLLEXPORT void set_verbose(int level) {
     verbose = level;
 }
 
@@ -774,7 +782,7 @@ JL_DLLEXPORT void set_verbose(int level) {
 //////////////////////////////////
 // Other tests
 
-JL_DLLEXPORT void *test_echo_p(void *p) {
+DLLEXPORT void *test_echo_p(void *p) {
     return p;
 }
 
@@ -782,7 +790,7 @@ JL_DLLEXPORT void *test_echo_p(void *p) {
 
 #include <xmmintrin.h>
 
-JL_DLLEXPORT __m128i test_m128i(__m128i a, __m128i b, __m128i c, __m128i d)
+DLLEXPORT __m128i test_m128i(__m128i a, __m128i b, __m128i c, __m128i d)
 {
     // 64-bit x86 has only level 2 SSE, which does not have a <4 x int32> multiplication,
     // so we use floating-point instead, and assume caller knows about the hack.
@@ -791,7 +799,7 @@ JL_DLLEXPORT __m128i test_m128i(__m128i a, __m128i b, __m128i c, __m128i d)
                                                     _mm_cvtepi32_ps(_mm_sub_epi32(c,d)))));
 }
 
-JL_DLLEXPORT __m128 test_m128(__m128 a, __m128 b, __m128 c, __m128 d)
+DLLEXPORT __m128 test_m128(__m128 a, __m128 b, __m128 c, __m128 d)
 {
     return _mm_add_ps(a, _mm_mul_ps(b, _mm_sub_ps(c, d)));
 }
@@ -800,7 +808,7 @@ JL_DLLEXPORT __m128 test_m128(__m128 a, __m128 b, __m128 c, __m128 d)
 
 #ifdef _CPU_AARCH64_
 
-JL_DLLEXPORT __int128 test_aa64_i128_1(int64_t v1, __int128 v2)
+DLLEXPORT __int128 test_aa64_i128_1(int64_t v1, __int128 v2)
 {
     return v1 * 2 - v2;
 }
@@ -810,7 +818,7 @@ typedef struct {
     __int128 v2;
 } struct_aa64_1;
 
-JL_DLLEXPORT struct_aa64_1 test_aa64_i128_2(int64_t v1, __int128 v2,
+DLLEXPORT struct_aa64_1 test_aa64_i128_2(int64_t v1, __int128 v2,
                                             struct_aa64_1 v3)
 {
     struct_aa64_1 x = {(int32_t)v1 / 2 + 1 - v3.v1, v2 * 2 - 1 - v3.v2};
@@ -822,12 +830,12 @@ typedef struct {
     double v2;
 } struct_aa64_2;
 
-JL_DLLEXPORT __fp16 test_aa64_fp16_1(int v1, float v2, double v3, __fp16 v4)
+DLLEXPORT __fp16 test_aa64_fp16_1(int v1, float v2, double v3, __fp16 v4)
 {
     return (__fp16)(v1 + v2 * 2 + v3 * 3 + v4 * 4);
 }
 
-JL_DLLEXPORT struct_aa64_2 test_aa64_fp16_2(int v1, float v2,
+DLLEXPORT struct_aa64_2 test_aa64_fp16_2(int v1, float v2,
                                             double v3, __fp16 v4)
 {
     struct_aa64_2 x = {v4 / 2 + 1, v1 * 2 + v2 * 4 - v3};
@@ -836,7 +844,7 @@ JL_DLLEXPORT struct_aa64_2 test_aa64_fp16_2(int v1, float v2,
 
 #include <arm_neon.h>
 
-JL_DLLEXPORT int64x2_t test_aa64_vec_1(int32x2_t v1, float _v2, int32x2_t v3)
+DLLEXPORT int64x2_t test_aa64_vec_1(int32x2_t v1, float _v2, int32x2_t v3)
 {
     int v2 = (int)_v2;
     return vmovl_s32(v1 * v2 + v3);
@@ -854,7 +862,7 @@ typedef struct {
     int16x8_t v1;
 } struct_aa64_4;
 
-JL_DLLEXPORT struct_aa64_3 test_aa64_vec_2(struct_aa64_3 v1, struct_aa64_4 v2)
+DLLEXPORT struct_aa64_3 test_aa64_vec_2(struct_aa64_3 v1, struct_aa64_4 v2)
 {
     // The cast below is to workaround GCC issue.
     // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=96990
@@ -945,21 +953,21 @@ test_huge(3_ppc64_hva, vf1[0]);
 test_huge(4_ppc64_hva, v1[0]);
 test_huge(5_ppc64_hva, v1[0]);
 
-JL_DLLEXPORT int64_t test_ppc64_vec1long(
+DLLEXPORT int64_t test_ppc64_vec1long(
         int64_t d1, int64_t d2, int64_t d3, int64_t d4, int64_t d5, int64_t d6,
         int64_t d7, int64_t d8, int64_t d9, struct_huge1_ppc64 vs)
 {
     return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + vs.m + vs.v[0] + vs.v[1] + vs.v[2] + vs.v[3];
 }
 
-JL_DLLEXPORT int64_t test_ppc64_vec1long_vec(
+DLLEXPORT int64_t test_ppc64_vec1long_vec(
         int64_t d1, int64_t d2, int64_t d3, int64_t d4, int64_t d5, int64_t d6,
         int64_t d7, int64_t d8, int64_t d9, float32x4_t vs)
 {
     return d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + vs[0] + vs[1] + vs[2] + vs[3];
 }
 
-JL_DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t b, float32x4_t c, float32x4_t d,
+DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t b, float32x4_t c, float32x4_t d,
                                          float32x4_t e, float32x4_t f, float32x4_t g, float32x4_t h, float32x4_t i,
                                          float32x4_t j, float32x4_t k, float32x4_t l, float32x4_t m, float32x4_t n)
 {
@@ -973,13 +981,13 @@ JL_DLLEXPORT float32x4_t test_ppc64_vec2(int64_t d1, float32x4_t a, float32x4_t
 
 #endif
 
-JL_DLLEXPORT int threadcall_args(int a, int b) {
+DLLEXPORT int threadcall_args(int a, int b) {
     return a + b;
 }
 
-JL_DLLEXPORT void c_exit_finalizer(void* v) {
+DLLEXPORT void c_exit_finalizer(void* v) {
     printf("c_exit_finalizer: %d, %u", *(int*)v, (unsigned)((uintptr_t)v & (uintptr_t)1));
 }
 
 // global variable for cglobal testing
-JL_DLLEXPORT const int global_var = 1;
+DLLEXPORT const int global_var = 1;
diff --git a/src/cgmemmgr.cpp b/src/cgmemmgr.cpp
index d2d347a022a92a..37e02b0efccbbe 100644
--- a/src/cgmemmgr.cpp
+++ b/src/cgmemmgr.cpp
@@ -10,6 +10,7 @@
 #ifdef _OS_LINUX_
 #  include <sys/syscall.h>
 #  include <sys/utsname.h>
+#  include <sys/resource.h>
 #endif
 #ifndef _OS_WINDOWS_
 #  include <sys/mman.h>
@@ -22,6 +23,7 @@
 #endif
 #ifdef _OS_FREEBSD_
 #  include <sys/types.h>
+#  include <sys/resource.h>
 #endif
 #include "julia_assert.h"
 
@@ -62,7 +64,8 @@ static void unmap_page(void *ptr, size_t size)
 enum class Prot : int {
     RW = PAGE_READWRITE,
     RX = PAGE_EXECUTE,
-    RO = PAGE_READONLY
+    RO = PAGE_READONLY,
+    NO = PAGE_NOACCESS
 };
 
 static void protect_page(void *ptr, size_t size, Prot flags)
@@ -79,7 +82,8 @@ static void protect_page(void *ptr, size_t size, Prot flags)
 enum class Prot : int {
     RW = PROT_READ | PROT_WRITE,
     RX = PROT_READ | PROT_EXEC,
-    RO = PROT_READ
+    RO = PROT_READ,
+    NO = PROT_NONE
 };
 
 static void protect_page(void *ptr, size_t size, Prot flags)
@@ -171,7 +175,7 @@ static intptr_t get_anon_hdl(void)
     if (check_fd_or_close(fd))
         return fd;
 #  endif
-    char shm_name[] = "julia-codegen-0123456789-0123456789/tmp///";
+    char shm_name[JL_PATH_MAX] = "julia-codegen-0123456789-0123456789/tmp///";
     pid_t pid = getpid();
     // `shm_open` can't be mapped exec on mac
 #  ifndef _OS_DARWIN_
@@ -193,8 +197,14 @@ static intptr_t get_anon_hdl(void)
             return fd;
         }
     }
-    snprintf(shm_name, sizeof(shm_name),
-             "/tmp/julia-codegen-%d-XXXXXX", (int)pid);
+    size_t len = sizeof(shm_name);
+    if (uv_os_tmpdir(shm_name, &len) != 0) {
+        // Unknown error; default to `/tmp`
+        snprintf(shm_name, sizeof(shm_name), "/tmp");
+        len = 4;
+    }
+    snprintf(shm_name + len, sizeof(shm_name) - len,
+             "/julia-codegen-%d-XXXXXX", (int)pid);
     fd = mkstemp(shm_name);
     if (check_fd_or_close(fd)) {
         unlink(shm_name);
@@ -203,12 +213,26 @@ static intptr_t get_anon_hdl(void)
     return -1;
 }
 
-static size_t map_offset = 0;
+static _Atomic(size_t) map_offset{0};
 // Multiple of 128MB.
 // Hopefully no one will set a ulimit for this to be a problem...
-static constexpr size_t map_size_inc = 128 * 1024 * 1024;
+static constexpr size_t map_size_inc_default = 128 * 1024 * 1024;
 static size_t map_size = 0;
-static jl_mutex_t shared_map_lock;
+static uv_mutex_t shared_map_lock;
+
+static size_t get_map_size_inc()
+{
+    rlimit rl;
+    if (getrlimit(RLIMIT_FSIZE, &rl) != -1) {
+        if (rl.rlim_cur != RLIM_INFINITY) {
+            return std::min<size_t>(map_size_inc_default, rl.rlim_cur);
+        }
+        if (rl.rlim_max != RLIM_INFINITY) {
+            return std::min<size_t>(map_size_inc_default, rl.rlim_max);
+        }
+    }
+    return map_size_inc_default;
+}
 
 static void *create_shared_map(size_t size, size_t id)
 {
@@ -223,8 +247,8 @@ static intptr_t init_shared_map()
     anon_hdl = get_anon_hdl();
     if (anon_hdl == -1)
         return -1;
-    map_offset = 0;
-    map_size = map_size_inc;
+    jl_atomic_store_relaxed(&map_offset, 0);
+    map_size = get_map_size_inc();
     int ret = ftruncate(anon_hdl, map_size);
     if (ret != 0) {
         perror(__func__);
@@ -238,8 +262,9 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
     assert(size % jl_page_size == 0);
     size_t off = jl_atomic_fetch_add(&map_offset, size);
     *id = off;
+    size_t map_size_inc = get_map_size_inc();
     if (__unlikely(off + size > map_size)) {
-        JL_LOCK_NOGC(&shared_map_lock);
+        uv_mutex_lock(&shared_map_lock);
         size_t old_size = map_size;
         while (off + size > map_size)
             map_size += map_size_inc;
@@ -250,7 +275,7 @@ static void *alloc_shared_page(size_t size, size_t *id, bool exec)
                 abort();
             }
         }
-        JL_UNLOCK_NOGC(&shared_map_lock);
+        uv_mutex_unlock(&shared_map_lock);
     }
     return create_shared_map(size, off);
 }
@@ -288,6 +313,7 @@ ssize_t pwrite_addr(int fd, const void *buf, size_t nbyte, uintptr_t addr)
 // Use `get_self_mem_fd` which has a guard to call this only once.
 static int _init_self_mem()
 {
+    uv_mutex_init(&shared_map_lock);
     struct utsname kernel;
     uname(&kernel);
     int major, minor;
@@ -623,7 +649,7 @@ class DualMapAllocator : public ROAllocator<exec> {
                 unmap_page((void*)block.wr_ptr, block.total);
             }
             else {
-                protect_page((void*)block.wr_ptr, block.total, Prot::RO);
+                protect_page((void*)block.wr_ptr, block.total, Prot::NO);
                 block.state = SplitPtrBlock::WRInit;
             }
         }
@@ -738,6 +764,7 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
     std::unique_ptr<ROAllocator<false>> ro_alloc;
     std::unique_ptr<ROAllocator<true>> exe_alloc;
     bool code_allocated;
+    size_t total_allocated;
 
 public:
     RTDyldMemoryManagerJL()
@@ -746,7 +773,8 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
           rw_alloc(),
           ro_alloc(),
           exe_alloc(),
-          code_allocated(false)
+          code_allocated(false),
+          total_allocated(0)
     {
 #ifdef _OS_LINUX_
         if (!ro_alloc && get_self_mem_fd() != -1) {
@@ -762,6 +790,7 @@ class RTDyldMemoryManagerJL : public SectionMemoryManager {
     ~RTDyldMemoryManagerJL() override
     {
     }
+    size_t getTotalBytes() { return total_allocated; }
     void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
                           size_t Size) override;
 #if 0
@@ -829,6 +858,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateCodeSection(uintptr_t Size,
     // allocating more than one code section can confuse libunwind.
     assert(!code_allocated);
     code_allocated = true;
+    total_allocated += Size;
     if (exe_alloc)
         return (uint8_t*)exe_alloc->alloc(Size, Alignment);
     return SectionMemoryManager::allocateCodeSection(Size, Alignment, SectionID,
@@ -841,6 +871,7 @@ uint8_t *RTDyldMemoryManagerJL::allocateDataSection(uintptr_t Size,
                                                     StringRef SectionName,
                                                     bool isReadOnly)
 {
+    total_allocated += Size;
     if (!isReadOnly)
         return (uint8_t*)rw_alloc.alloc(Size, Alignment);
     if (ro_alloc)
@@ -913,3 +944,8 @@ RTDyldMemoryManager* createRTDyldMemoryManager()
 {
     return new RTDyldMemoryManagerJL();
 }
+
+size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm)
+{
+    return ((RTDyldMemoryManagerJL*)mm)->getTotalBytes();
+}
diff --git a/src/cgutils.cpp b/src/cgutils.cpp
index 2d61c49c5730c1..6f346b32728b32 100644
--- a/src/cgutils.cpp
+++ b/src/cgutils.cpp
@@ -2,26 +2,61 @@
 
 // utility procedures used in code generation
 
-static Instruction *tbaa_decorate(MDNode *md, Instruction *inst)
-{
-    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
-    if (isa<LoadInst>(inst) && md == tbaa_const)
-        inst->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(md->getContext(), None));
-    return inst;
-}
+// Mark our stats as being from cgutils
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "julia_irgen_cgutils"
+
+STATISTIC(EmittedPointerFromObjref, "Number of emitted pointer_from_objref calls");
+STATISTIC(EmittedPointerBitcast, "Number of emitted pointer bitcasts");
+STATISTIC(EmittedNthPtrAddr, "Number of emitted nth pointer address instructions");
+STATISTIC(EmittedTypeof, "Number of emitted typeof instructions");
+STATISTIC(EmittedErrors, "Number of emitted errors");
+STATISTIC(EmittedConditionalErrors, "Number of emitted conditional errors");
+STATISTIC(EmittedExceptions, "Number of emitted exceptions");
+STATISTIC(EmittedConditionalExceptions, "Number of emitted conditional exceptions");
+STATISTIC(EmittedNullchecks, "Number of emitted nullchecks");
+STATISTIC(EmittedGuards, "Number of emitted guards");
+STATISTIC(EmittedIsaUnions, "Number of emitted isa-union checks");
+STATISTIC(EmittedIsa, "Number of emitted isa checks");
+STATISTIC(EmittedTypechecks, "Number of emitted typechecks");
+STATISTIC(EmittedConcretechecks, "Number of emitted concrete checks");
+STATISTIC(EmittedBoundschecks, "Number of emitted boundschecks");
+STATISTIC(EmittedLockstates, "Number of emitted lockstate value calls");
+STATISTIC(EmittedMemcpys, "Number of emitted memcpy instructions");
+STATISTIC(SkippedMemcpys, "Number of skipped memcpy instructions");
+STATISTIC(EmittedGetfieldUnknowns, "Number of unknown getfield calls emitted");
+STATISTIC(EmittedGetfieldKnowns, "Number of known getfield calls emitted");
+STATISTIC(EmittedSetfield, "Number of setfield calls emitted");
+STATISTIC(EmittedUnionLoads, "Number of union loads emitted");
+STATISTIC(EmittedVarargsLength, "Number of varargs length calls emitted");
+STATISTIC(EmittedArraysize, "Number of arraysize calls emitted");
+STATISTIC(EmittedArraylen, "Number of array length calls emitted");
+STATISTIC(EmittedArrayptr, "Number of array data pointer loads emitted");
+STATISTIC(EmittedArrayflags, "Number of arrayflags calls emitted");
+STATISTIC(EmittedArrayNDims, "Number of array ndims calls emitted");
+STATISTIC(EmittedArrayElsize, "Number of array elsize calls emitted");
+STATISTIC(EmittedArrayOffset, "Number of array offset calls emitted");
+STATISTIC(EmittedArrayNdIndex, "Number of array nd index calls emitted");
+STATISTIC(EmittedBoxes, "Number of box operations emitted");
+STATISTIC(EmittedCPointerChecks, "Number of C pointer checks emitted");
+STATISTIC(EmittedAllocObjs, "Number of object allocations emitted");
+STATISTIC(EmittedWriteBarriers, "Number of write barriers emitted");
+STATISTIC(EmittedNewStructs, "Number of new structs emitted");
+STATISTIC(EmittedSignalFences, "Number of signal fences emitted");
+STATISTIC(EmittedDeferSignal, "Number of deferred signals emitted");
 
 static Value *track_pjlvalue(jl_codectx_t &ctx, Value *V)
 {
-    assert(V->getType() == T_pjlvalue);
-    return ctx.builder.CreateAddrSpaceCast(V, T_prjlvalue);
+    assert(V->getType() == ctx.types().T_pjlvalue);
+    return ctx.builder.CreateAddrSpaceCast(V, ctx.types().T_prjlvalue);
 }
 
 // Take an arbitrary untracked value and make it gc-tracked
 static Value *maybe_decay_untracked(jl_codectx_t &ctx, Value *V)
 {
-    if (V->getType() == T_pjlvalue)
-        return ctx.builder.CreateAddrSpaceCast(V, T_prjlvalue);
-    assert(V->getType() == T_prjlvalue);
+    if (V->getType() == ctx.types().T_pjlvalue)
+        return ctx.builder.CreateAddrSpaceCast(V, ctx.types().T_prjlvalue);
+    assert(V->getType() == ctx.types().T_prjlvalue);
     return V;
 }
 
@@ -32,7 +67,7 @@ static Value *decay_derived(jl_codectx_t &ctx, Value *V)
     if (cast<PointerType>(T)->getAddressSpace() == AddressSpace::Derived)
         return V;
     // Once llvm deletes pointer element types, we won't need it here any more either.
-    Type *NewT = PointerType::get(cast<PointerType>(T)->getElementType(), AddressSpace::Derived);
+    Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
@@ -42,15 +77,31 @@ static Value *maybe_decay_tracked(jl_codectx_t &ctx, Value *V)
     Type *T = V->getType();
     if (cast<PointerType>(T)->getAddressSpace() != AddressSpace::Tracked)
         return V;
-    Type *NewT = PointerType::get(cast<PointerType>(T)->getElementType(), AddressSpace::Derived);
+    Type *NewT = PointerType::getWithSamePointeeType(cast<PointerType>(T), AddressSpace::Derived);
     return ctx.builder.CreateAddrSpaceCast(V, NewT);
 }
 
 static Value *mark_callee_rooted(jl_codectx_t &ctx, Value *V)
 {
-    assert(V->getType() == T_pjlvalue || V->getType() == T_prjlvalue);
+    assert(V->getType() == ctx.types().T_pjlvalue || V->getType() == ctx.types().T_prjlvalue);
     return ctx.builder.CreateAddrSpaceCast(V,
-        PointerType::get(T_jlvalue, AddressSpace::CalleeRooted));
+        PointerType::get(ctx.types().T_jlvalue, AddressSpace::CalleeRooted));
+}
+
+AtomicOrdering get_llvm_atomic_order(enum jl_memory_order order)
+{
+    switch (order) {
+    case jl_memory_order_notatomic: return AtomicOrdering::NotAtomic;
+    case jl_memory_order_unordered: return AtomicOrdering::Unordered;
+    case jl_memory_order_monotonic: return AtomicOrdering::Monotonic;
+    case jl_memory_order_acquire:   return AtomicOrdering::Acquire;
+    case jl_memory_order_release:   return AtomicOrdering::Release;
+    case jl_memory_order_acq_rel:   return AtomicOrdering::AcquireRelease;
+    case jl_memory_order_seq_cst:   return AtomicOrdering::SequentiallyConsistent;
+    default:
+        assert("invalid atomic ordering");
+        abort();
+    }
 }
 
 // --- language feature checks ---
@@ -66,33 +117,33 @@ static Value *stringConstPtr(
 {
     Module *M = jl_builderModule(irbuilder);
     StringRef ctxt(txt.c_str(), txt.size() + 1);
-    Constant *Data = ConstantDataArray::get(jl_LLVMContext, arrayRefFromStringRef(ctxt));
+    Constant *Data = ConstantDataArray::get(irbuilder.getContext(), arrayRefFromStringRef(ctxt));
     GlobalVariable *gv = get_pointer_to_constant(emission_context, Data, "_j_str", *M);
-    Value *zero = ConstantInt::get(Type::getInt32Ty(jl_LLVMContext), 0);
+    Value *zero = ConstantInt::get(Type::getInt32Ty(irbuilder.getContext()), 0);
     Value *Args[] = { zero, zero };
     return irbuilder.CreateInBoundsGEP(gv->getValueType(), gv, Args);
 }
 
 
 // --- MDNode ---
-Metadata *to_md_tree(jl_value_t *val) {
+Metadata *to_md_tree(jl_value_t *val, LLVMContext &ctxt) {
     if (val == jl_nothing)
         return nullptr;
     Metadata *MD = nullptr;
     if (jl_is_symbol(val)) {
-        MD = MDString::get(jl_LLVMContext, jl_symbol_name((jl_sym_t*)val));
+        MD = MDString::get(ctxt, jl_symbol_name((jl_sym_t*)val));
     } else if (jl_is_bool(val)) {
-        MD = ConstantAsMetadata::get(ConstantInt::get(T_int1, jl_unbox_bool(val)));
+        MD = ConstantAsMetadata::get(ConstantInt::get(getInt1Ty(ctxt), jl_unbox_bool(val)));
     } else if (jl_is_long(val)) {
-        MD = ConstantAsMetadata::get(ConstantInt::get(T_int64, jl_unbox_long(val)));
+        MD = ConstantAsMetadata::get(ConstantInt::get(getInt64Ty(ctxt), jl_unbox_long(val)));
     } else if (jl_is_tuple(val)) {
         SmallVector<Metadata *, 8> MDs;
         for (int f = 0, nf = jl_nfields(val); f < nf; ++f) {
-            MD = to_md_tree(jl_fieldref(val, f));
+            MD = to_md_tree(jl_fieldref(val, f), ctxt);
             if (MD)
                 MDs.push_back(MD);
         }
-        MD = MDNode::get(jl_LLVMContext, MDs);
+        MD = MDNode::get(ctxt, MDs);
     } else {
         jl_error("LLVM metadata needs to Symbol/Bool/Int or Tuple thereof");
     }
@@ -101,11 +152,11 @@ Metadata *to_md_tree(jl_value_t *val) {
 
 // --- Debug info ---
 
-static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed)
+static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_debugcache_t &debuginfo, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed)
 {
     jl_datatype_t *jdt = (jl_datatype_t*)jt;
     if (isboxed || !jl_is_datatype(jt) || !jdt->isconcretetype)
-        return jl_pvalue_dillvmt;
+        return debuginfo.jl_pvalue_dillvmt;
     assert(jdt->layout);
     DIType* _ditype = NULL;
     DIType* &ditype = (ctx ? ctx->ditypes[jdt] : _ditype);
@@ -120,12 +171,13 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_value_t *jt, DIBui
         size_t ntypes = jl_datatype_nfields(jdt);
         std::vector<llvm::Metadata*> Elements(ntypes);
         for (unsigned i = 0; i < ntypes; i++) {
-            jl_value_t *el = jl_svecref(jdt->types, i);
+            jl_value_t *el = jl_field_type_concrete(jdt, i);
             DIType *di;
             if (jl_field_isptr(jdt, i))
-                di = jl_pvalue_dillvmt;
+                di = debuginfo.jl_pvalue_dillvmt;
+            // TODO: elseif jl_islayout_inline
             else
-                di = _julia_type_to_di(ctx, el, dbuilder, false);
+                di = _julia_type_to_di(ctx, debuginfo, el, dbuilder, false);
             Elements[i] = di;
         }
         DINodeArray ElemArray = dbuilder->getOrCreateArray(Elements);
@@ -148,14 +200,56 @@ static DIType *_julia_type_to_di(jl_codegen_params_t *ctx, jl_value_t *jt, DIBui
     }
     else {
         // return a typealias for types with hidden content
-        ditype = dbuilder->createTypedef(jl_pvalue_dillvmt, tname, NULL, 0, NULL);
+        ditype = dbuilder->createTypedef(debuginfo.jl_pvalue_dillvmt, tname, NULL, 0, NULL);
     }
     return ditype;
 }
 
-static DIType *julia_type_to_di(jl_codectx_t &ctx, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed)
+static DIType *julia_type_to_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *jt, DIBuilder *dbuilder, bool isboxed)
 {
-    return _julia_type_to_di(&ctx.emission_context, jt, dbuilder, isboxed);
+    return _julia_type_to_di(&ctx.emission_context, debuginfo, jt, dbuilder, isboxed);
+}
+
+void jl_debugcache_t::initialize(Module *m) {
+    if (initialized) {
+        return;
+    }
+    initialized = true;
+    // add needed base debugging definitions to our LLVM environment
+    DIBuilder dbuilder(*m);
+    DIFile *julia_h = dbuilder.createFile("julia.h", "");
+    DICompositeType *jl_value_dillvmt = dbuilder.createStructType(nullptr,
+        "jl_value_t",
+        julia_h,
+        71, // At the time of this writing. Not sure if it's worth it to keep this in sync
+        0 * 8, // sizeof(jl_value_t) * 8,
+        __alignof__(void*) * 8, // __alignof__(jl_value_t) * 8,
+        DINode::FlagZero, // Flags
+        nullptr,    // Derived from
+        nullptr);  // Elements - will be corrected later
+
+    jl_pvalue_dillvmt = dbuilder.createPointerType(jl_value_dillvmt, sizeof(jl_value_t*) * 8,
+                                                __alignof__(jl_value_t*) * 8);
+
+    SmallVector<llvm::Metadata *, 1> Elts;
+    std::vector<Metadata*> diargs(0);
+    Elts.push_back(jl_pvalue_dillvmt);
+    dbuilder.replaceArrays(jl_value_dillvmt,
+    dbuilder.getOrCreateArray(Elts));
+
+    jl_ppvalue_dillvmt = dbuilder.createPointerType(jl_pvalue_dillvmt, sizeof(jl_value_t**) * 8,
+                                                    __alignof__(jl_value_t**) * 8);
+
+    diargs.push_back(jl_pvalue_dillvmt);    // Return Type (ret value)
+    diargs.push_back(jl_pvalue_dillvmt);    // First Argument (function)
+    diargs.push_back(jl_ppvalue_dillvmt);   // Second Argument (argv)
+    // Third argument (length(argv))
+    diargs.push_back(_julia_type_to_di(NULL, *this, (jl_value_t*)jl_int32_type, &dbuilder, false));
+
+    jl_di_func_sig = dbuilder.createSubroutineType(
+        dbuilder.getOrCreateTypeArray(diargs));
+    jl_di_func_null_sig = dbuilder.createSubroutineType(
+        dbuilder.getOrCreateTypeArray(None));
 }
 
 static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
@@ -164,12 +258,13 @@ static Value *emit_pointer_from_objref(jl_codectx_t &ctx, Value *V)
     if (AS != AddressSpace::Tracked && AS != AddressSpace::Derived)
         return V;
     V = decay_derived(ctx, V);
-    Type *T = PointerType::get(T_jlvalue, AddressSpace::Derived);
+    Type *T = PointerType::get(ctx.types().T_jlvalue, AddressSpace::Derived);
     if (V->getType() != T)
         V = ctx.builder.CreateBitCast(V, T);
     Function *F = prepare_call(pointer_from_objref_func);
     CallInst *Call = ctx.builder.CreateCall(F, V);
     Call->setAttributes(F->getAttributes());
+    ++EmittedPointerFromObjref;
     return Call;
 }
 
@@ -192,7 +287,7 @@ static Value *get_gc_root_for(const jl_cgval_t &x)
 // --- emitting pointers directly into code ---
 
 
-static inline Constant *literal_static_pointer_val(const void *p, Type *T = T_pjlvalue);
+static inline Constant *literal_static_pointer_val(const void *p, Type *T);
 
 static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
 {
@@ -213,11 +308,11 @@ static Value *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr)
             gv = cast_or_null<GlobalVariable>(M->getNamedValue(localname));
     }
     if (gv == nullptr)
-        gv = new GlobalVariable(*M, T_pjlvalue,
+        gv = new GlobalVariable(*M, ctx.types().T_pjlvalue,
                                 false, GlobalVariable::PrivateLinkage,
                                 NULL, localname);
     // LLVM passes sometimes strip metadata when moving load around
-    // since the load at the new location satisfy the same condition as the origional one.
+    // since the load at the new location satisfy the same condition as the original one.
     // Mark the global as constant to LLVM code using our own metadata
     // which is much less likely to be striped.
     gv->setMetadata("julia.constgv", MDNode::get(gv->getContext(), None));
@@ -259,13 +354,13 @@ static Value *literal_pointer_val_slot(jl_codectx_t &ctx, jl_value_t *p)
 {
     // emit a pointer to a jl_value_t* which will allow it to be valid across reloading code
     // also, try to give it a nice name for gdb, for easy identification
-    if (!imaging_mode) {
+    if (!ctx.emission_context.imaging) {
         // TODO: this is an optimization, but is it useful or premature
         // (it'll block any attempt to cache these, but can be simply deleted)
         Module *M = jl_Module;
         GlobalVariable *gv = new GlobalVariable(
-                *M, T_pjlvalue, true, GlobalVariable::PrivateLinkage,
-                literal_static_pointer_val(p));
+                *M, ctx.types().T_pjlvalue, true, GlobalVariable::PrivateLinkage,
+                literal_static_pointer_val(p, ctx.types().T_pjlvalue));
         gv->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
         return gv;
     }
@@ -304,7 +399,7 @@ static size_t dereferenceable_size(jl_value_t *jt)
         // Array has at least this much data
         return sizeof(jl_array_t);
     }
-    else if (jl_is_datatype(jt) && ((jl_datatype_t*)jt)->layout) {
+    else if (jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt)) {
         return jl_datatype_size(jt);
     }
     return 0;
@@ -322,7 +417,7 @@ static unsigned julia_alignment(jl_value_t *jt)
         // and this is the guarantee we have for the GC bits
         return 16;
     }
-    assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->layout);
+    assert(jl_is_datatype(jt) && jl_struct_try_layout((jl_datatype_t*)jt));
     unsigned alignment = jl_datatype_align(jt);
     if (alignment > JL_HEAP_ALIGNMENT)
         return JL_HEAP_ALIGNMENT;
@@ -331,7 +426,11 @@ static unsigned julia_alignment(jl_value_t *jt)
 
 static inline void maybe_mark_argument_dereferenceable(Argument *A, jl_value_t *jt)
 {
+#if JL_LLVM_VERSION >= 140000
+    AttrBuilder B(A->getContext());
+#else
     AttrBuilder B;
+#endif
     B.addAttribute(Attribute::NonNull);
     // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
     size_t size = dereferenceable_size(jt);
@@ -348,14 +447,14 @@ static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool
     if (isa<PointerType>(LI->getType())) {
         if (!can_be_null)
             // The `dereferencable` below does not imply `nonnull` for non addrspace(0) pointers.
-            LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(jl_LLVMContext, None));
+            LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(LI->getContext(), None));
         if (size) {
-            Metadata *OP = ConstantAsMetadata::get(ConstantInt::get(T_int64, size));
+            Metadata *OP = ConstantAsMetadata::get(ConstantInt::get(getInt64Ty(LI->getContext()), size));
             LI->setMetadata(can_be_null ? LLVMContext::MD_dereferenceable_or_null : LLVMContext::MD_dereferenceable,
-                            MDNode::get(jl_LLVMContext, { OP }));
+                            MDNode::get(LI->getContext(), { OP }));
             if (align >= 1) {
-                Metadata *OP = ConstantAsMetadata::get(ConstantInt::get(T_int64, align));
-                LI->setMetadata(LLVMContext::MD_align, MDNode::get(jl_LLVMContext, { OP }));
+                Metadata *OP = ConstantAsMetadata::get(ConstantInt::get(getInt64Ty(LI->getContext()), align));
+                LI->setMetadata(LLVMContext::MD_align, MDNode::get(LI->getContext(), { OP }));
             }
         }
     }
@@ -371,31 +470,31 @@ static inline Instruction *maybe_mark_load_dereferenceable(Instruction *LI, bool
     return maybe_mark_load_dereferenceable(LI, can_be_null, size, alignment);
 }
 
-// Returns T_pjlvalue
+// Returns ctx.types().T_pjlvalue
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p)
 {
     if (p == NULL)
-        return V_null;
-    if (!imaging_mode)
-        return literal_static_pointer_val(p);
+        return Constant::getNullValue(ctx.types().T_pjlvalue);
+    if (!ctx.emission_context.imaging)
+        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
     Value *pgv = literal_pointer_val_slot(ctx, p);
-    return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(T_pjlvalue, pgv, Align(sizeof(void*))),
+    return tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
             false, jl_typeof(p)));
 }
 
-// Returns T_pjlvalue
+// Returns ctx.types().T_pjlvalue
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_binding_t *p)
 {
     // emit a pointer to any jl_value_t which will be valid across reloading code
     if (p == NULL)
-        return V_null;
-    if (!imaging_mode)
-        return literal_static_pointer_val(p);
+        return Constant::getNullValue(ctx.types().T_pjlvalue);
+    if (!ctx.emission_context.imaging)
+        return literal_static_pointer_val(p, ctx.types().T_pjlvalue);
     // bindings are prefixed with jl_bnd#
     Value *pgv = julia_pgv(ctx, "jl_bnd#", p->name, p->owner, p);
-    return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(T_pjlvalue, pgv, Align(sizeof(void*))),
+    return tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, pgv, Align(sizeof(void*))),
             false, sizeof(jl_binding_t), alignof(jl_binding_t)));
 }
 
@@ -405,9 +504,8 @@ static Value *emit_bitcast(jl_codectx_t &ctx, Value *v, Type *jl_value)
     if (isa<PointerType>(jl_value) &&
         v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
         // Cast to the proper address space
-        Type *jl_value_addr =
-                PointerType::get(cast<PointerType>(jl_value)->getElementType(),
-                                 v->getType()->getPointerAddressSpace());
+        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
+        ++EmittedPointerBitcast;
         return ctx.builder.CreateBitCast(v, jl_value_addr);
     }
     else {
@@ -421,25 +519,22 @@ static Value *maybe_bitcast(jl_codectx_t &ctx, Value *V, Type *to) {
     return V;
 }
 
-static Value *julia_binding_gv(jl_codectx_t &ctx, Value *bv)
+static Value *julia_binding_pvalue(jl_codectx_t &ctx, Value *bv)
 {
-    Value *offset = ConstantInt::get(T_size, offsetof(jl_binding_t, value) / sizeof(size_t));
-    return ctx.builder.CreateInBoundsGEP(T_prjlvalue, bv, offset);
+    bv = emit_bitcast(ctx, bv, ctx.types().T_pprjlvalue);
+    Value *offset = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_binding_t, value) / sizeof(size_t));
+    return ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, bv, offset);
 }
 
 static Value *julia_binding_gv(jl_codectx_t &ctx, jl_binding_t *b)
 {
-    // emit a literal_pointer_val to the value field of a jl_binding_t
+    // emit a literal_pointer_val to a jl_binding_t
     // binding->value are prefixed with *
-    Value *bv;
-    if (imaging_mode)
-        bv = emit_bitcast(ctx,
-                tbaa_decorate(tbaa_const,
-                              ctx.builder.CreateAlignedLoad(T_pjlvalue, julia_pgv(ctx, "*", b->name, b->owner, b), Align(sizeof(void*)))),
-                T_pprjlvalue);
+    if (ctx.emission_context.imaging)
+        return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue,
+                    julia_pgv(ctx, "*", b->name, b->owner, b), Align(sizeof(void*))));
     else
-        bv = ConstantExpr::getBitCast(literal_static_pointer_val(b), T_pprjlvalue);
-    return julia_binding_gv(ctx, bv);
+        return literal_static_pointer_val(b, ctx.types().T_pjlvalue);
 }
 
 // --- mapping between julia and llvm types ---
@@ -455,9 +550,8 @@ static bool type_is_permalloc(jl_value_t *typ)
         typ == (jl_value_t*)jl_uint8_type;
 }
 
-static unsigned convert_struct_offset(Type *lty, unsigned byte_offset)
+static unsigned convert_struct_offset(const llvm::DataLayout &DL, Type *lty, unsigned byte_offset)
 {
-    const DataLayout &DL = jl_data_layout;
     const StructLayout *SL = DL.getStructLayout(cast<StructType>(lty));
     unsigned idx = SL->getElementContainingOffset(byte_offset);
     assert(SL->getElementOffset(idx) == byte_offset);
@@ -466,7 +560,7 @@ static unsigned convert_struct_offset(Type *lty, unsigned byte_offset)
 
 static unsigned convert_struct_offset(jl_codectx_t &ctx, Type *lty, unsigned byte_offset)
 {
-    return convert_struct_offset(lty, byte_offset);
+    return convert_struct_offset(ctx.builder.GetInsertBlock()->getModule()->getDataLayout(), lty, byte_offset);
 }
 
 static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigned byte_offset)
@@ -475,53 +569,53 @@ static Value *emit_struct_gep(jl_codectx_t &ctx, Type *lty, Value *base, unsigne
     return ctx.builder.CreateConstInBoundsGEP2_32(lty, base, 0, idx);
 }
 
-static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_unionall_t *ua, bool *isboxed, bool llvmcall=false);
+static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall=false);
 
-static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, bool *isboxed)
+static Type *_julia_type_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed)
 {
     // this function converts a Julia Type into the equivalent LLVM type
     if (isboxed) *isboxed = false;
     if (jt == (jl_value_t*)jl_bottom_type)
-        return T_void;
+        return getVoidTy(ctxt);
     if (jl_is_concrete_immutable(jt)) {
         if (jl_datatype_nbits(jt) == 0)
-            return T_void;
-        Type *t = _julia_struct_to_llvm(ctx, jt, NULL, isboxed);
+            return getVoidTy(ctxt);
+        Type *t = _julia_struct_to_llvm(ctx, ctxt, jt, isboxed);
         assert(t != NULL);
         return t;
     }
     if (isboxed) *isboxed = true;
-    return T_prjlvalue;
+    return JuliaType::get_prjlvalue_ty(ctxt);
 }
 
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed)
 {
-    return _julia_type_to_llvm(&ctx.emission_context, jt, isboxed);
+    return _julia_type_to_llvm(&ctx.emission_context, ctx.builder.getContext(), jt, isboxed);
 }
 
 extern "C" JL_DLLEXPORT
-Type *jl_type_to_llvm(jl_value_t *jt, bool *isboxed)
+Type *jl_type_to_llvm_impl(jl_value_t *jt, LLVMContextRef ctxt, bool *isboxed)
 {
-    return _julia_type_to_llvm(NULL, jt, isboxed);
+    return _julia_type_to_llvm(NULL, *unwrap(ctxt), jt, isboxed);
 }
 
 
 // converts a julia bitstype into the equivalent LLVM bitstype
-static Type *bitstype_to_llvm(jl_value_t *bt, bool llvmcall = false)
+static Type *bitstype_to_llvm(jl_value_t *bt, LLVMContext &ctxt, bool llvmcall = false)
 {
     assert(jl_is_primitivetype(bt));
     if (bt == (jl_value_t*)jl_bool_type)
-        return T_int8;
+        return getInt8Ty(ctxt);
     if (bt == (jl_value_t*)jl_int32_type)
-        return T_int32;
+        return getInt32Ty(ctxt);
     if (bt == (jl_value_t*)jl_int64_type)
-        return T_int64;
-    if (llvmcall && (bt == (jl_value_t*)jl_float16_type))
-        return T_float16;
+        return getInt64Ty(ctxt);
+    if (bt == (jl_value_t*)jl_float16_type)
+        return getHalfTy(ctxt);
     if (bt == (jl_value_t*)jl_float32_type)
-        return T_float32;
+        return getFloatTy(ctxt);
     if (bt == (jl_value_t*)jl_float64_type)
-        return T_float64;
+        return getDoubleTy(ctxt);
     if (jl_is_llvmpointer_type(bt)) {
         jl_value_t *as_param = jl_tparam1(bt);
         int as;
@@ -531,36 +625,17 @@ static Type *bitstype_to_llvm(jl_value_t *bt, bool llvmcall = false)
             as = jl_unbox_int64(as_param);
         else
             jl_error("invalid pointer address space");
-        return PointerType::get(T_int8, as);
+        return PointerType::get(getInt8Ty(ctxt), as);
     }
     int nb = jl_datatype_size(bt);
-    return Type::getIntNTy(jl_LLVMContext, nb * 8);
+    return Type::getIntNTy(ctxt, nb * 8);
 }
 
 static bool jl_type_hasptr(jl_value_t* typ)
-{ // assumes that jl_stored_inline(typ) is true
+{ // assumes that jl_stored_inline(typ) is true (and therefore that layout is defined)
     return jl_is_datatype(typ) && ((jl_datatype_t*)typ)->layout->npointers > 0;
 }
 
-// compute whether all concrete subtypes of this type have the same layout
-// (which is conservatively approximated here by asking whether the types of any of the
-// fields depend on any of the parameters of the containing type)
-static bool julia_struct_has_layout(jl_datatype_t *dt, jl_unionall_t *ua)
-{
-    if (dt->layout)
-        return true;
-    if (ua) {
-        jl_svec_t *types = jl_get_fieldtypes(dt);
-        size_t i, ntypes = jl_svec_len(types);
-        for (i = 0; i < ntypes; i++) {
-            jl_value_t *ty = jl_svecref(types, i);
-            if (jl_has_typevar_from_unionall(ty, ua))
-                return false;
-        }
-    }
-    return true;
-}
-
 static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
 {
     unsigned al = jl_field_offset(dt, i);
@@ -569,31 +644,33 @@ static unsigned jl_field_align(jl_datatype_t *dt, size_t i)
     return std::min({al, (unsigned)jl_datatype_align(dt), (unsigned)JL_HEAP_ALIGNMENT});
 }
 
-static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_unionall_t *ua_env, bool *isboxed, bool llvmcall)
+static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, LLVMContext &ctxt, jl_value_t *jt, bool *isboxed, bool llvmcall)
 {
     // this function converts a Julia Type into the equivalent LLVM struct
     // use this where C-compatible (unboxed) structs are desired
     // use julia_type_to_llvm directly when you want to preserve Julia's type semantics
     if (isboxed) *isboxed = false;
     if (jt == (jl_value_t*)jl_bottom_type)
-        return T_void;
+        return getVoidTy(ctxt);
     if (jl_is_primitivetype(jt))
-        return bitstype_to_llvm(jt, llvmcall);
+        return bitstype_to_llvm(jt, ctxt, llvmcall);
     jl_datatype_t *jst = (jl_datatype_t*)jt;
     if (jl_is_structtype(jt) && !(jst->layout && jl_is_layout_opaque(jst->layout))) {
         bool isTuple = jl_is_tuple_type(jt);
         jl_svec_t *ftypes = jl_get_fieldtypes(jst);
         size_t i, ntypes = jl_svec_len(ftypes);
-        if (ntypes == 0 || (jst->layout && jl_datatype_nbits(jst) == 0))
-            return T_void;
+        if (!jl_struct_try_layout(jst)) {
+            assert(0 && "caller should have checked jl_type_mappable_to_c already");
+            abort();
+        }
+        if (ntypes == 0 || jl_datatype_nbits(jst) == 0)
+            return getVoidTy(ctxt);
         Type *_struct_decl = NULL;
         // TODO: we should probably make a temporary root for `jst` somewhere
         // don't use pre-filled struct_decl for llvmcall (f16, etc. may be different)
         Type *&struct_decl = (ctx && !llvmcall ? ctx->llvmtypes[jst] : _struct_decl);
         if (struct_decl)
             return struct_decl;
-        if (!julia_struct_has_layout(jst, ua_env))
-            return NULL;
         std::vector<Type*> latypes(0);
         bool isarray = true;
         bool isvector = true;
@@ -605,46 +682,52 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
             if (jlasttype != NULL && ty != jlasttype)
                 isvector = false;
             jlasttype = ty;
-            size_t fsz = 0, al = 0;
-            bool isptr = !jl_islayout_inline(ty, &fsz, &al);
-            if (jst->layout) {
-                assert(isptr == jl_field_isptr(jst, i));
-                assert((isptr ? sizeof(void*) : fsz + jl_is_uniontype(ty)) == jl_field_size(jst, i));
+            if (jl_field_isatomic(jst, i)) {
+                // TODO: eventually support this?
+                // though it's a bit unclear how the implicit load should be interpreted
+                assert(0 && "caller should have checked jl_type_mappable_to_c already");
+                abort();
             }
             Type *lty;
-            if (isptr) {
-                lty = T_prjlvalue;
+            if (jl_field_isptr(jst, i)) {
+                lty = JuliaType::get_prjlvalue_ty(ctxt);
                 isvector = false;
             }
             else if (ty == (jl_value_t*)jl_bool_type) {
-                lty = T_int8;
+                lty = getInt8Ty(ctxt);
             }
             else if (jl_is_uniontype(ty)) {
                 // pick an Integer type size such that alignment will generally be correct,
                 // and always end with an Int8 (selector byte).
                 // We may need to insert padding first to get to the right offset
-                if (al > MAX_ALIGN) {
-                    Type *AlignmentType = ArrayType::get(VectorType::get(T_int8, al), 0);
-                    latypes.push_back(AlignmentType);
-                    al = MAX_ALIGN;
+                size_t fsz = 0, al = 0;
+                bool isptr = !jl_islayout_inline(ty, &fsz, &al);
+                assert(!isptr && fsz == jl_field_size(jst, i) - 1); (void)isptr;
+                if (fsz > 0) {
+                    if (al > MAX_ALIGN) {
+                        Type *AlignmentType;
+                        AlignmentType = ArrayType::get(FixedVectorType::get(getInt8Ty(ctxt), al), 0);
+                        latypes.push_back(AlignmentType);
+                        al = MAX_ALIGN;
+                    }
+                    Type *AlignmentType = IntegerType::get(ctxt, 8 * al);
+                    unsigned NumATy = fsz / al;
+                    unsigned remainder = fsz % al;
+                    assert(al == 1 || NumATy > 0);
+                    while (NumATy--)
+                        latypes.push_back(AlignmentType);
+                    while (remainder--)
+                        latypes.push_back(getInt8Ty(ctxt));
                 }
-                assert(al <= jl_field_align(jst, i));
-                Type *AlignmentType = IntegerType::get(jl_LLVMContext, 8 * al);
-                unsigned NumATy = fsz / al;
-                unsigned remainder = fsz % al;
-                assert(al == 1 || NumATy > 0);
-                while (NumATy--)
-                    latypes.push_back(AlignmentType);
-                while (remainder--)
-                    latypes.push_back(T_int8);
-                latypes.push_back(T_int8);
+                latypes.push_back(getInt8Ty(ctxt));
                 isarray = false;
                 allghost = false;
                 continue;
             }
             else {
-                lty = _julia_struct_to_llvm(ctx, ty, NULL, &isptr, llvmcall);
-                assert(!isptr);
+                bool isptr;
+                lty = _julia_struct_to_llvm(ctx, ctxt, ty, &isptr, llvmcall);
+                assert(lty && !isptr);
             }
             if (lasttype != NULL && lasttype != lty)
                 isarray = false;
@@ -656,7 +739,7 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
         }
         if (allghost) {
             assert(jst->layout == NULL); // otherwise should have been caught above
-            struct_decl = T_void;
+            struct_decl = getVoidTy(ctxt);
         }
         else if (jl_is_vecelement_type(jt) && !jl_is_uniontype(jl_svecref(ftypes, 0))) {
             // VecElement type is unwrapped in LLVM (when possible)
@@ -664,21 +747,21 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
         }
         else if (isarray && !type_is_ghost(lasttype)) {
             if (isTuple && isvector && jl_special_vector_alignment(ntypes, jlasttype) != 0)
-                struct_decl = VectorType::get(lasttype, ntypes);
+                struct_decl = FixedVectorType::get(lasttype, ntypes);
             else if (isTuple || !llvmcall)
                 struct_decl = ArrayType::get(lasttype, ntypes);
             else
-                struct_decl = StructType::get(jl_LLVMContext, latypes);
+                struct_decl = StructType::get(ctxt, latypes);
         }
         else {
 #if 0 // stress-test code that tries to assume julia-index == llvm-index
       // (also requires change to emit_new_struct to not assume 0 == 0)
             if (!isTuple && latypes.size() > 1) {
-                Type *NoopType = ArrayType::get(T_int1, 0);
+                Type *NoopType = ArrayType::get(getInt1Ty(ctxt), 0);
                 latypes.insert(latypes.begin(), NoopType);
             }
 #endif
-            struct_decl = StructType::get(jl_LLVMContext, latypes);
+            struct_decl = StructType::get(ctxt, latypes);
         }
         return struct_decl;
     }
@@ -686,29 +769,22 @@ static Type *_julia_struct_to_llvm(jl_codegen_params_t *ctx, jl_value_t *jt, jl_
     // if (jl_is_uniontype(ty)) {
     //  // pick an Integer type size such that alignment will be correct
     //  // and always end with an Int8 (selector byte)
-    //  lty = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), fsz / al);
+    //  lty = ArrayType::get(IntegerType::get(lty->getContext(), 8 * al), fsz / al);
     //  std::vector<Type*> Elements(2);
     //  Elements[0] = lty;
-    //  Elements[1] = T_int8;
+    //  Elements[1] = getInt8Ty(ctxt);
     //  unsigned remainder = fsz % al;
     //  while (remainder--)
-    //      Elements.push_back(T_int8);
-    //  lty = StructType::get(jl_LLVMContext, makeArrayRef(Elements));
+    //      Elements.push_back(getInt8Ty(ctxt));
+    //  lty = StructType::get(lty->getContext(), makeArrayRef(Elements));
     // }
     if (isboxed) *isboxed = true;
-    return T_prjlvalue;
-}
-
-static Type *julia_struct_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, jl_unionall_t *ua, bool *isboxed)
-{
-    return _julia_struct_to_llvm(&ctx.emission_context, jt, ua, isboxed);
+    return JuliaType::get_prjlvalue_ty(ctxt);
 }
 
-bool jl_type_mappable_to_c(jl_value_t *ty)
+static Type *julia_struct_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed)
 {
-    jl_codegen_params_t params;
-    bool toboxed;
-    return _julia_struct_to_llvm(&params, ty, NULL, &toboxed) != NULL;
+    return _julia_struct_to_llvm(&ctx.emission_context, ctx.builder.getContext(), jt, isboxed);
 }
 
 static bool is_datatype_all_pointers(jl_datatype_t *dt)
@@ -728,13 +804,13 @@ static bool is_tupletype_homogeneous(jl_svec_t *t, bool allow_va = false)
     if (l > 0) {
         jl_value_t *t0 = jl_svecref(t, 0);
         if (!jl_is_concrete_type(t0)) {
-            if (allow_va && jl_is_vararg_type(t0) &&
+            if (allow_va && jl_is_vararg(t0) &&
                   jl_is_concrete_type(jl_unwrap_vararg(t0)))
                 return true;
             return false;
         }
         for (i = 1; i < l; i++) {
-            if (allow_va && i == l - 1 && jl_is_vararg_type(jl_svecref(t, i))) {
+            if (allow_va && i == l - 1 && jl_is_vararg(jl_svecref(t, i))) {
                 if (t0 != jl_unwrap_vararg(jl_svecref(t, i)))
                     return false;
                 continue;
@@ -793,39 +869,44 @@ static unsigned get_box_tindex(jl_datatype_t *jt, jl_value_t *ut)
 
 static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, ssize_t n, bool gctracked = true)
 {
+    ++EmittedNthPtrAddr;
     return ctx.builder.CreateInBoundsGEP(
-            T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), T_pprjlvalue),
-            ConstantInt::get(T_size, n));
+            ctx.types().T_prjlvalue,
+            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
+            ConstantInt::get(getSizeTy(ctx.builder.getContext()), n));
 }
 
 static Value *emit_nthptr_addr(jl_codectx_t &ctx, Value *v, Value *idx)
 {
+    ++EmittedNthPtrAddr;
     return ctx.builder.CreateInBoundsGEP(
-            T_prjlvalue,
-            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), T_pprjlvalue),
+            ctx.types().T_prjlvalue,
+            emit_bitcast(ctx, maybe_decay_tracked(ctx, v), ctx.types().T_pprjlvalue),
             idx);
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *ptype)
+static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, Value *idx, MDNode *tbaa, Type *type)
 {
-    // p = (jl_value_t**)v; *(ptype)&p[n]
+    // p = (jl_value_t**)v; *(type*)&p[n]
     Value *vptr = emit_nthptr_addr(ctx, v, idx);
-    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(emit_bitcast(ctx, vptr, ptype))));
+    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(type,
+        emit_bitcast(ctx, vptr, PointerType::get(type, 0)))));
 }
 
-static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode *tbaa, Type *ptype)
+static LoadInst *emit_nthptr_recast(jl_codectx_t &ctx, Value *v, ssize_t n, MDNode *tbaa, Type *type)
 {
-    // p = (jl_value_t**)v; *(ptype)&p[n]
+    // p = (jl_value_t**)v; *(type*)&p[n]
     Value *vptr = emit_nthptr_addr(ctx, v, n);
-    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(emit_bitcast(ctx, vptr, ptype))));
-}
+    return cast<LoadInst>(tbaa_decorate(tbaa, ctx.builder.CreateLoad(type,
+        emit_bitcast(ctx, vptr, PointerType::get(type, 0)))));
+ }
 
 static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &v);
 
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *emit_typeof(jl_codectx_t &ctx, Value *tt)
 {
+    ++EmittedTypeof;
     assert(tt != NULL && !isa<AllocaInst>(tt) && "expected a conditionally boxed value");
     return ctx.builder.CreateCall(prepare_call(jl_typeof_func), {tt});
 }
@@ -834,27 +915,27 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
 {
     // given p, compute its type
     if (p.constant)
-        return mark_julia_const(jl_typeof(p.constant));
+        return mark_julia_const(ctx, jl_typeof(p.constant));
     if (p.isboxed && !jl_is_concrete_type(p.typ)) {
         if (jl_is_type_type(p.typ)) {
             jl_value_t *tp = jl_tparam0(p.typ);
             if (!jl_is_type(tp) || jl_is_concrete_type(tp)) {
                 // convert 1::Type{1} ==> typeof(1) ==> Int
-                return mark_julia_const(jl_typeof(tp));
+                return mark_julia_const(ctx, jl_typeof(tp));
             }
         }
         return mark_julia_type(ctx, emit_typeof(ctx, p.V), true, jl_datatype_type);
     }
     if (p.TIndex) {
-        Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(T_int8, 0x7f));
+        Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         bool allunboxed = is_uniontype_allunboxed(p.typ);
-        Value *datatype_or_p = imaging_mode ? Constant::getNullValue(T_ppjlvalue) : V_rnull;
+        Value *datatype_or_p = ctx.emission_context.imaging ? Constant::getNullValue(ctx.types().T_ppjlvalue) : Constant::getNullValue(ctx.types().T_prjlvalue);
         unsigned counter = 0;
         for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
-                Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
+                Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
                 Value *ptr;
-                if (imaging_mode) {
+                if (ctx.emission_context.imaging) {
                     ptr = literal_pointer_val_slot(ctx, (jl_value_t*)jt);
                 }
                 else {
@@ -865,17 +946,17 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
             p.typ,
             counter);
         auto emit_unboxty = [&] () -> Value* {
-            if (imaging_mode)
+            if (ctx.emission_context.imaging)
                 return track_pjlvalue(
-                    ctx, tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_pjlvalue, datatype_or_p, Align(sizeof(void*)))));
+                    ctx, tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, datatype_or_p, Align(sizeof(void*)))));
             return datatype_or_p;
         };
         Value *res;
         if (!allunboxed) {
             Value *isnull = ctx.builder.CreateIsNull(datatype_or_p);
-            BasicBlock *boxBB = BasicBlock::Create(jl_LLVMContext, "boxed", ctx.f);
-            BasicBlock *unboxBB = BasicBlock::Create(jl_LLVMContext, "unboxed", ctx.f);
-            BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge", ctx.f);
+            BasicBlock *boxBB = BasicBlock::Create(ctx.builder.getContext(), "boxed", ctx.f);
+            BasicBlock *unboxBB = BasicBlock::Create(ctx.builder.getContext(), "unboxed", ctx.f);
+            BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge", ctx.f);
             ctx.builder.CreateCondBr(isnull, boxBB, unboxBB);
             ctx.builder.SetInsertPoint(boxBB);
             auto boxTy = emit_typeof(ctx, p.Vboxed);
@@ -886,7 +967,7 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
             ctx.builder.CreateBr(mergeBB);
             unboxBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(mergeBB);
-            auto phi = ctx.builder.CreatePHI(T_prjlvalue, 2);
+            auto phi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
             phi->addIncoming(boxTy, boxBB);
             phi->addIncoming(unboxTy, unboxBB);
             res = phi;
@@ -896,10 +977,10 @@ static jl_cgval_t emit_typeof(jl_codectx_t &ctx, const jl_cgval_t &p)
         }
         return mark_julia_type(ctx, res, true, jl_datatype_type);
     }
-    return mark_julia_const(p.typ);
+    return mark_julia_const(ctx, p.typ);
 }
 
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p)
 {
     return boxed(ctx, emit_typeof(ctx, p));
@@ -907,46 +988,46 @@ static Value *emit_typeof_boxed(jl_codectx_t &ctx, const jl_cgval_t &p)
 
 static Value *emit_datatype_types(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_ppjlvalue);
-    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, types) / sizeof(void*));
-    return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(
-                T_pjlvalue, ctx.builder.CreateInBoundsGEP(T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
+    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppjlvalue);
+    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, types) / sizeof(void*));
+    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(
+                ctx.types().T_pjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_pjlvalue, Ptr, Idx), Align(sizeof(void*))));
 }
 
 static Value *emit_datatype_nfields(jl_codectx_t &ctx, Value *dt)
 {
-    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), T_psize);
-    return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_size, type_svec, Align(sizeof(void*))));
+    Value *type_svec = emit_bitcast(ctx, emit_datatype_types(ctx, dt), getSizePtrTy(ctx.builder.getContext()));
+    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), type_svec, Align(sizeof(void*))));
 }
 
 static Value *emit_datatype_size(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_pint32);
-    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, size) / sizeof(int));
-    return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int32, ctx.builder.CreateInBoundsGEP(T_int32, Ptr, Idx), Align(sizeof(int32_t))));
+    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), getInt32PtrTy(ctx.builder.getContext()));
+    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, size) / sizeof(int));
+    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt32Ty(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int32_t))));
 }
 
 /* this is valid code, it's simply unused
 static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
 {
     if (p.TIndex) {
-        Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(T_int8, 0x7f));
-        Value *size = ConstantInt::get(T_int32, -1);
+        Value *tindex = ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+        Value *size = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), -1);
         unsigned counter = 0;
         bool allunboxed = for_each_uniontype_small(
                 [&](unsigned idx, jl_datatype_t *jt) {
-                    Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
-                    size = ctx.builder.CreateSelect(cmp, ConstantInt::get(T_int32, jl_datatype_size(jt)), size);
+                    Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
+                    size = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(jt)), size);
                 },
                 p.typ,
                 counter);
         if (!allunboxed && p.ispointer() && p.V && !isa<AllocaInst>(p.V)) {
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
-            BasicBlock *dynloadBB = BasicBlock::Create(jl_LLVMContext, "dyn_sizeof", ctx.f);
-            BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_sizeof", ctx.f);
+            BasicBlock *dynloadBB = BasicBlock::Create(ctx.builder.getContext(), "dyn_sizeof", ctx.f);
+            BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_sizeof", ctx.f);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(T_int8, 0x80)),
-                    ConstantInt::get(T_int8, 0));
+                    ctx.builder.CreateAnd(p.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             ctx.builder.CreateCondBr(isboxed, dynloadBB, postBB);
             ctx.builder.SetInsertPoint(dynloadBB);
             Value *datatype = emit_typeof(p.V);
@@ -954,19 +1035,19 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
             ctx.builder.CreateBr(postBB);
             dynloadBB = ctx.builder.GetInsertBlock(); // could have changed
             ctx.builder.SetInsertPoint(postBB);
-            PHINode *sizeof_merge = ctx.builder.CreatePHI(T_int32, 2);
+            PHINode *sizeof_merge = ctx.builder.CreatePHI(getInt32Ty(ctx.builder.getContext()), 2);
             sizeof_merge->addIncoming(dyn_size, dynloadBB);
             sizeof_merge->addIncoming(size, currBB);
             size = sizeof_merge;
         }
 #ifndef NDEBUG
         // try to catch codegen errors early, before it uses this to memcpy over the entire stack
-        CreateConditionalAbort(ctx.builder, ctx.builder.CreateICmpEQ(size, ConstantInt::get(T_int32, -1)));
+        CreateConditionalAbort(ctx.builder, ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), -1)));
 #endif
         return size;
     }
     else if (jl_is_concrete_type(p.typ)) {
-        return ConstantInt::get(T_int32, jl_datatype_size(p.typ));
+        return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(p.typ));
     }
     else {
         Value *datatype = emit_typeof_boxed(ctx, p);
@@ -978,37 +1059,29 @@ static Value *emit_sizeof(jl_codectx_t &ctx, const jl_cgval_t &p)
 
 static Value *emit_datatype_mutabl(jl_codectx_t &ctx, Value *dt)
 {
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_pint8);
-    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, mutabl));
-    Value *mutabl = tbaa_decorate(tbaa_const,
-            ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx), Align(1)));
-    return ctx.builder.CreateTrunc(mutabl, T_int1);
+    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), ctx.types().T_ppint8);
+    Value *Idx = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_datatype_t, name));
+    Value *Nam = tbaa_decorate(ctx.tbaa().tbaa_const,
+            ctx.builder.CreateAlignedLoad(getInt8PtrTy(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8PtrTy(ctx.builder.getContext()), Ptr, Idx), Align(sizeof(int8_t*))));
+    Value *Idx2 = ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_typename_t, n_uninitialized) + sizeof(((jl_typename_t*)nullptr)->n_uninitialized));
+    Value *mutabl = tbaa_decorate(ctx.tbaa().tbaa_const,
+            ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), Nam, Idx2), Align(1)));
+    mutabl = ctx.builder.CreateLShr(mutabl, 1);
+    return ctx.builder.CreateTrunc(mutabl, getInt1Ty(ctx.builder.getContext()));
 }
 
-/* this is valid code, it's simply unused
-static Value *emit_datatype_abstract(jl_codectx_t &ctx, Value *dt)
-{
-    Value *Ptr = emit_bitcast(ctx, decay_derived(ctx, dt), T_pint8);
-    Value *Idx = ConstantInt::get(T_size, offsetof(jl_datatype_t, abstract));
-
-    Value *abstract = tbaa_decorate(tbaa_const,
-            ctx.builder.CreateAlignedLoad(T_int8, ctx.builder.CreateInBoundsGEP(T_int8, Ptr, Idx), Align(1)));
-    return ctx.builder.CreateTrunc(abstract, T_int1);
-}
-*/
-
 static Value *emit_datatype_isprimitivetype(jl_codectx_t &ctx, Value *dt)
 {
     Value *immut = ctx.builder.CreateNot(emit_datatype_mutabl(ctx, dt));
-    Value *nofields = ctx.builder.CreateICmpEQ(emit_datatype_nfields(ctx, dt), V_size0);
-    Value *sized = ctx.builder.CreateICmpSGT(emit_datatype_size(ctx, dt), ConstantInt::get(T_int32, 0));
+    Value *nofields = ctx.builder.CreateICmpEQ(emit_datatype_nfields(ctx, dt), Constant::getNullValue(getSizeTy(ctx.builder.getContext())));
+    Value *sized = ctx.builder.CreateICmpSGT(emit_datatype_size(ctx, dt), ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
     return ctx.builder.CreateAnd(immut, ctx.builder.CreateAnd(nofields, sized));
 }
 
 static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 {
     Value *vptr = emit_nthptr_addr(ctx, dt, (ssize_t)(offsetof(jl_datatype_t, name) / sizeof(char*)));
-    return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, vptr, Align(sizeof(void*))));
+    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, vptr, Align(sizeof(void*))));
 }
 
 // --- generating various error checks ---
@@ -1016,27 +1089,34 @@ static Value *emit_datatype_name(jl_codectx_t &ctx, Value *dt)
 // the error is always thrown. This may cause non dominated use
 // of SSA value error in the verifier.
 
-static void just_emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void just_emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
 {
-    ctx.builder.CreateCall(prepare_call(jlerror_func), stringConstPtr(ctx.emission_context, ctx.builder, txt));
+    ++EmittedErrors;
+    ctx.builder.CreateCall(F, stringConstPtr(ctx.emission_context, ctx.builder, txt));
 }
 
-static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+static void emit_error(jl_codectx_t &ctx, Function *F, const std::string &txt)
 {
-    just_emit_error(ctx, txt);
+    just_emit_error(ctx, F, txt);
     ctx.builder.CreateUnreachable();
-    BasicBlock *cont = BasicBlock::Create(jl_LLVMContext,"after_error",ctx.f);
+    BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_error", ctx.f);
     ctx.builder.SetInsertPoint(cont);
 }
 
+static void emit_error(jl_codectx_t &ctx, const std::string &txt)
+{
+    emit_error(ctx, prepare_call(jlerror_func), txt);
+}
+
 // DO NOT PASS IN A CONST CONDITION!
 static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
 {
-    BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext,"fail",ctx.f);
-    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext,"pass");
+    ++EmittedConditionalErrors;
+    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
+    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
     ctx.builder.CreateCondBr(cond, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
-    just_emit_error(ctx, msg);
+    just_emit_error(ctx, prepare_call(jlerror_func), msg);
     ctx.builder.CreateUnreachable();
     ctx.f->getBasicBlockList().push_back(passBB);
     ctx.builder.SetInsertPoint(passBB);
@@ -1045,10 +1125,11 @@ static void error_unless(jl_codectx_t &ctx, Value *cond, const std::string &msg)
 static void raise_exception(jl_codectx_t &ctx, Value *exc,
                             BasicBlock *contBB=nullptr)
 {
+    ++EmittedExceptions;
     ctx.builder.CreateCall(prepare_call(jlthrow_func), { mark_callee_rooted(ctx, exc) });
     ctx.builder.CreateUnreachable();
     if (!contBB) {
-        contBB = BasicBlock::Create(jl_LLVMContext, "after_throw", ctx.f);
+        contBB = BasicBlock::Create(ctx.builder.getContext(), "after_throw", ctx.f);
     }
     else {
         ctx.f->getBasicBlockList().push_back(contBB);
@@ -1059,8 +1140,9 @@ static void raise_exception(jl_codectx_t &ctx, Value *exc,
 // DO NOT PASS IN A CONST CONDITION!
 static void raise_exception_unless(jl_codectx_t &ctx, Value *cond, Value *exc)
 {
-    BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext,"fail",ctx.f);
-    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext,"pass");
+    ++EmittedConditionalExceptions;
+    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(),"fail",ctx.f);
+    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(),"pass");
     ctx.builder.CreateCondBr(cond, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
     raise_exception(ctx, exc, passBB);
@@ -1068,9 +1150,11 @@ static void raise_exception_unless(jl_codectx_t &ctx, Value *cond, Value *exc)
 
 static Value *null_pointer_cmp(jl_codectx_t &ctx, Value *v)
 {
+    ++EmittedNullchecks;
     return ctx.builder.CreateICmpNE(v, Constant::getNullValue(v->getType()));
 }
 
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck = nullptr)
@@ -1083,6 +1167,63 @@ static void null_pointer_check(jl_codectx_t &ctx, Value *v, Value **nullcheck =
             literal_pointer_val(ctx, jl_undefref_exception));
 }
 
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, Value *defval, Func &&func)
+{
+    if (auto Cond = dyn_cast<ConstantInt>(ifnot)) {
+        if (Cond->isZero())
+            return defval;
+        return func();
+    }
+    ++EmittedGuards;
+    BasicBlock *currBB = ctx.builder.GetInsertBlock();
+    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "guard_pass", ctx.f);
+    BasicBlock *exitBB = BasicBlock::Create(ctx.builder.getContext(), "guard_exit", ctx.f);
+    ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
+    ctx.builder.SetInsertPoint(passBB);
+    auto res = func();
+    passBB = ctx.builder.GetInsertBlock();
+    ctx.builder.CreateBr(exitBB);
+    ctx.builder.SetInsertPoint(exitBB);
+    if (defval == nullptr)
+        return nullptr;
+    PHINode *phi = ctx.builder.CreatePHI(defval->getType(), 2);
+    phi->addIncoming(defval, currBB);
+    phi->addIncoming(res, passBB);
+    return phi;
+}
+
+template<typename Func>
+static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, bool defval, Func &&func)
+{
+    return emit_guarded_test(ctx, ifnot, ConstantInt::get(getInt1Ty(ctx.builder.getContext()), defval), func);
+}
+
+template<typename Func>
+static Value *emit_nullcheck_guard(jl_codectx_t &ctx, Value *nullcheck, Func &&func)
+{
+    if (!nullcheck)
+        return func();
+    return emit_guarded_test(ctx, null_pointer_cmp(ctx, nullcheck), false, func);
+}
+
+template<typename Func>
+static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
+                                    Value *nullcheck2, Func &&func)
+{
+    if (!nullcheck1)
+        return emit_nullcheck_guard(ctx, nullcheck2, func);
+    if (!nullcheck2)
+        return emit_nullcheck_guard(ctx, nullcheck1, func);
+    nullcheck1 = null_pointer_cmp(ctx, nullcheck1);
+    nullcheck2 = null_pointer_cmp(ctx, nullcheck2);
+    // If both are NULL, return true.
+    return emit_guarded_test(ctx, ctx.builder.CreateOr(nullcheck1, nullcheck2), true, [&] {
+        return emit_guarded_test(ctx, ctx.builder.CreateAnd(nullcheck1, nullcheck2),
+                                 false, func);
+    });
+}
+
 static void emit_type_error(jl_codectx_t &ctx, const jl_cgval_t &x, Value *type, const std::string &msg)
 {
     Value *msg_val = stringConstPtr(ctx.emission_context, ctx.builder, msg);
@@ -1107,7 +1248,7 @@ static bool _can_optimize_isa(jl_value_t *type, int &counter)
     if (jl_is_concrete_type(type))
         return true;
     jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(type);
-    if (jl_is_datatype(dt) && !dt->abstract && jl_subtype(dt->name->wrapper, type))
+    if (jl_is_datatype(dt) && !dt->name->abstract && jl_subtype(dt->name->wrapper, type))
         return true;
     return false;
 }
@@ -1118,25 +1259,39 @@ static bool can_optimize_isa_union(jl_uniontype_t *type)
     return (_can_optimize_isa(type->a, counter) && _can_optimize_isa(type->b, counter));
 }
 
+// a simple case of emit_isa that is obvious not to include a safe-point
+static Value *emit_exactly_isa(jl_codectx_t &ctx, const jl_cgval_t &arg, jl_value_t *dt)
+{
+    assert(jl_is_concrete_type(dt));
+    return ctx.builder.CreateICmpEQ(
+            emit_typeof_boxed(ctx, arg),
+            track_pjlvalue(ctx, literal_pointer_val(ctx, dt)));
+}
+
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
                                         jl_value_t *type, const std::string *msg);
 
 static void emit_isa_union(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type,
-                           SmallVectorImpl<std::pair<BasicBlock*,Value*>> &bbs)
+                           SmallVectorImpl<std::pair<std::pair<BasicBlock*,BasicBlock*>,Value*>> &bbs)
 {
+    ++EmittedIsaUnions;
     if (jl_is_uniontype(type)) {
         emit_isa_union(ctx, x, ((jl_uniontype_t*)type)->a, bbs);
         emit_isa_union(ctx, x, ((jl_uniontype_t*)type)->b, bbs);
         return;
     }
-    bbs.emplace_back(ctx.builder.GetInsertBlock(), emit_isa(ctx, x, type, nullptr).first);
-    BasicBlock *isaBB = BasicBlock::Create(jl_LLVMContext, "isa", ctx.f);
+    BasicBlock *enter = ctx.builder.GetInsertBlock();
+    Value *v = emit_isa(ctx, x, type, nullptr).first;
+    BasicBlock *exit = ctx.builder.GetInsertBlock();
+    bbs.emplace_back(std::make_pair(enter, exit), v);
+    BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
     ctx.builder.SetInsertPoint(isaBB);
 }
 
 // Should agree with `_can_optimize_isa` above
 static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string *msg)
 {
+    ++EmittedIsa;
     // TODO: The subtype check below suffers from incorrectness issues due to broken
     // subtyping for kind types (see https://github.com/JuliaLang/julia/issues/27078). For
     // actual `isa` calls, this optimization should already have been performed upstream
@@ -1157,10 +1312,10 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         if (!*known_isa && msg) {
             emit_type_error(ctx, x, literal_pointer_val(ctx, type), *msg);
             ctx.builder.CreateUnreachable();
-            BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx.f);
+            BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
             ctx.builder.SetInsertPoint(failBB);
         }
-        return std::make_pair(ConstantInt::get(T_int1, *known_isa), true);
+        return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), *known_isa), true);
     }
 
     if (jl_is_type_type(intersected_type) && jl_pointer_egal(intersected_type)) {
@@ -1175,11 +1330,11 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
         Value *vtyp = track_pjlvalue(ctx, literal_pointer_val(ctx, type));
         if (msg && *msg == "typeassert") {
             ctx.builder.CreateCall(prepare_call(jltypeassert_func), { vx, vtyp });
-            return std::make_pair(ConstantInt::get(T_int1, 1), true);
+            return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1), true);
         }
         return std::make_pair(ctx.builder.CreateICmpNE(
                 ctx.builder.CreateCall(prepare_call(jlisa_func), { vx, vtyp }),
-                ConstantInt::get(T_int32, 0)), false);
+                ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), false);
     }
     // tests for isa concretetype can be handled with pointer comparisons
     if (jl_is_concrete_type(intersected_type)) {
@@ -1187,15 +1342,15 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
             unsigned tindex = get_box_tindex((jl_datatype_t*)intersected_type, x.typ);
             if (tindex > 0) {
                 // optimize more when we know that this is a split union-type where tindex = 0 is invalid
-                Value *xtindex = ctx.builder.CreateAnd(x.TIndex, ConstantInt::get(T_int8, 0x7f));
-                return std::make_pair(ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(T_int8, tindex)), false);
+                Value *xtindex = ctx.builder.CreateAnd(x.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+                return std::make_pair(ctx.builder.CreateICmpEQ(xtindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex)), false);
             }
             else if (x.Vboxed) {
                 // test for (x.TIndex == 0x80 && typeof(x.V) == type)
-                Value *isboxed = ctx.builder.CreateICmpEQ(x.TIndex, ConstantInt::get(T_int8, 0x80));
+                Value *isboxed = ctx.builder.CreateICmpEQ(x.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
                 BasicBlock *currBB = ctx.builder.GetInsertBlock();
-                BasicBlock *isaBB = BasicBlock::Create(jl_LLVMContext, "isa", ctx.f);
-                BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_isa", ctx.f);
+                BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "isa", ctx.f);
+                BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_isa", ctx.f);
                 ctx.builder.CreateCondBr(isboxed, isaBB, postBB);
                 ctx.builder.SetInsertPoint(isaBB);
                 Value *istype_boxed = ctx.builder.CreateICmpEQ(emit_typeof(ctx, x.Vboxed),
@@ -1203,24 +1358,19 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
                 ctx.builder.CreateBr(postBB);
                 isaBB = ctx.builder.GetInsertBlock(); // could have changed
                 ctx.builder.SetInsertPoint(postBB);
-                PHINode *istype = ctx.builder.CreatePHI(T_int1, 2);
-                istype->addIncoming(ConstantInt::get(T_int1, 0), currBB);
+                PHINode *istype = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+                istype->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB);
                 istype->addIncoming(istype_boxed, isaBB);
                 return std::make_pair(istype, false);
             } else {
                 // handle the case where we know that `x` is unboxed (but of unknown type), but that concrete type `type` cannot be unboxed
-                return std::make_pair(ConstantInt::get(T_int1, 0), false);
+                return std::make_pair(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), false);
             }
         }
-        if (auto val = ((jl_datatype_t*)intersected_type)->instance) {
-            auto ptr = track_pjlvalue(ctx, literal_pointer_val(ctx, val));
-            return {ctx.builder.CreateICmpEQ(boxed(ctx, x), ptr), false};
-        }
-        return std::make_pair(ctx.builder.CreateICmpEQ(emit_typeof_boxed(ctx, x),
-            track_pjlvalue(ctx, literal_pointer_val(ctx, intersected_type))), false);
+        return std::make_pair(emit_exactly_isa(ctx, x, intersected_type), false);
     }
     jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(intersected_type);
-    if (jl_is_datatype(dt) && !dt->abstract && jl_subtype(dt->name->wrapper, type)) {
+    if (jl_is_datatype(dt) && !dt->name->abstract && jl_subtype(dt->name->wrapper, type)) {
         // intersection is a supertype of all instances of its constructor,
         // so the isa test reduces to a comparison of the typename by pointer
         return std::make_pair(
@@ -1231,17 +1381,17 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
     }
     if (jl_is_uniontype(intersected_type) &&
         can_optimize_isa_union((jl_uniontype_t*)intersected_type)) {
-        SmallVector<std::pair<BasicBlock*,Value*>,4> bbs;
+        SmallVector<std::pair<std::pair<BasicBlock*,BasicBlock*>,Value*>,4> bbs;
         emit_isa_union(ctx, x, intersected_type, bbs);
         int nbbs = bbs.size();
         BasicBlock *currBB = ctx.builder.GetInsertBlock();
-        PHINode *res = ctx.builder.CreatePHI(T_int1, nbbs);
+        PHINode *res = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), nbbs);
         for (int i = 0; i < nbbs; i++) {
-            auto bb = bbs[i].first;
+            auto bb = bbs[i].first.second;
             ctx.builder.SetInsertPoint(bb);
             if (i + 1 < nbbs) {
-                ctx.builder.CreateCondBr(bbs[i].second, currBB, bbs[i + 1].first);
-                res->addIncoming(ConstantInt::get(T_int1, 1), bb);
+                ctx.builder.CreateCondBr(bbs[i].second, currBB, bbs[i + 1].first.first);
+                res->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1), bb);
             }
             else {
                 ctx.builder.CreateBr(currBB);
@@ -1256,7 +1406,7 @@ static std::pair<Value*, bool> emit_isa(jl_codectx_t &ctx, const jl_cgval_t &x,
             ctx.builder.CreateCall(prepare_call(jlsubtype_func),
               { emit_typeof_boxed(ctx, x),
                 track_pjlvalue(ctx, literal_pointer_val(ctx, type)) }),
-            ConstantInt::get(T_int32, 0)), false);
+            ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0)), false);
 }
 
 static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *type, const std::string &msg)
@@ -1265,8 +1415,9 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
     bool handled_msg;
     std::tie(istype, handled_msg) = emit_isa(ctx, x, type, &msg);
     if (!handled_msg) {
-        BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx.f);
-        BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "pass");
+        ++EmittedTypechecks;
+        BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
+        BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
         ctx.builder.CreateCondBr(istype, passBB, failBB);
         ctx.builder.SetInsertPoint(failBB);
 
@@ -1281,15 +1432,17 @@ static void emit_typecheck(jl_codectx_t &ctx, const jl_cgval_t &x, jl_value_t *t
 static Value *emit_isconcrete(jl_codectx_t &ctx, Value *typ)
 {
     Value *isconcrete;
-    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, emit_bitcast(ctx, decay_derived(ctx, typ), T_pint8), offsetof(jl_datatype_t, isconcretetype));
-    isconcrete = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int8, isconcrete, Align(1)));
-    isconcrete = ctx.builder.CreateTrunc(isconcrete, T_int1);
+    isconcrete = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, decay_derived(ctx, typ), getInt8PtrTy(ctx.builder.getContext())), offsetof(jl_datatype_t, hash) + sizeof(((jl_datatype_t*)nullptr)->hash));
+    isconcrete = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), isconcrete, Align(1)));
+    isconcrete = ctx.builder.CreateLShr(isconcrete, 1);
+    isconcrete = ctx.builder.CreateTrunc(isconcrete, getInt1Ty(ctx.builder.getContext()));
     return isconcrete;
 }
 
 static void emit_concretecheck(jl_codectx_t &ctx, Value *typ, const std::string &msg)
 {
-    assert(typ->getType() == T_prjlvalue);
+    ++EmittedConcretechecks;
+    assert(typ->getType() == ctx.types().T_prjlvalue);
     emit_typecheck(ctx, mark_julia_type(ctx, typ, true, jl_any_type), (jl_value_t*)jl_datatype_type, msg);
     error_unless(ctx, emit_isconcrete(ctx, typ), msg);
 }
@@ -1311,12 +1464,13 @@ static bool bounds_check_enabled(jl_codectx_t &ctx, jl_value_t *inbounds) {
 
 static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ty, Value *i, Value *len, jl_value_t *boundscheck)
 {
-    Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(T_size, 1));
+    Value *im1 = ctx.builder.CreateSub(i, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
 #if CHECK_BOUNDS==1
     if (bounds_check_enabled(ctx, boundscheck)) {
+        ++EmittedBoundschecks;
         Value *ok = ctx.builder.CreateICmpULT(im1, len);
-        BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext, "fail", ctx.f);
-        BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "pass");
+        BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(), "fail", ctx.f);
+        BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(), "pass");
         ctx.builder.CreateCondBr(ok, passBB, failBB);
         ctx.builder.SetInsertPoint(failBB);
         if (!ty) { // jl_value_t** tuple (e.g. the vararg)
@@ -1328,7 +1482,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
         else { // unboxed jl_value_t*
             Value *a = ainfo.V;
             if (ainfo.isghost) {
-                a = Constant::getNullValue(T_pint8);
+                a = Constant::getNullValue(getInt8PtrTy(ctx.builder.getContext()));
             }
             else if (!ainfo.ispointer()) {
                 // CreateAlloca is OK here since we are on an error branch
@@ -1337,7 +1491,7 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
                 a = tempSpace;
             }
             ctx.builder.CreateCall(prepare_call(jluboundserror_func), {
-                    emit_bitcast(ctx, decay_derived(ctx, a), T_pint8),
+                    emit_bitcast(ctx, decay_derived(ctx, a), getInt8PtrTy(ctx.builder.getContext())),
                     literal_pointer_val(ctx, ty),
                     i });
         }
@@ -1349,7 +1503,11 @@ static Value *emit_bounds_check(jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_v
     return im1;
 }
 
-static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest = NULL, MDNode *tbaa_dest = nullptr, bool isVolatile = false);
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value* dest, MDNode *tbaa_dest, bool isVolatile = false);
+static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt)
+{
+    return emit_unbox(ctx, to, x, jt, nullptr, nullptr, false);
+}
 static void emit_write_barrier(jl_codectx_t&, Value*, ArrayRef<Value*>);
 static void emit_write_barrier(jl_codectx_t&, Value*, Value*);
 static void emit_write_multibarrier(jl_codectx_t&, Value*, Value*, jl_value_t*);
@@ -1361,8 +1519,15 @@ std::vector<unsigned> first_ptr(Type *T)
             uint64_t num_elements;
             if (auto *AT = dyn_cast<ArrayType>(T))
                 num_elements = AT->getNumElements();
-            else
-                num_elements = cast<VectorType>(T)->getNumElements();
+            else {
+                VectorType *VT = cast<VectorType>(T);
+#if JL_LLVM_VERSION >= 120000
+                ElementCount EC = VT->getElementCount();
+                num_elements = EC.getKnownMinValue();
+#else
+                num_elements = VT->getNumElements();
+#endif
+            }
             if (num_elements == 0)
                 return {};
         }
@@ -1390,17 +1555,44 @@ Value *extract_first_ptr(jl_codectx_t &ctx, Value *V)
     return ctx.builder.CreateExtractValue(V, path);
 }
 
+
+static void emit_lockstate_value(jl_codectx_t &ctx, Value *strct, bool newstate)
+{
+    ++EmittedLockstates;
+    Value *v = mark_callee_rooted(ctx, strct);
+    ctx.builder.CreateCall(prepare_call(newstate ? jllockvalue_func : jlunlockvalue_func), v);
+}
+static void emit_lockstate_value(jl_codectx_t &ctx, const jl_cgval_t &strct, bool newstate)
+{
+    assert(strct.isboxed);
+    emit_lockstate_value(ctx, boxed(ctx, strct), newstate);
+}
+
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, jl_value_t *jltype,
-                             MDNode *tbaa, MDNode *aliasscope,
+                             MDNode *tbaa, MDNode *aliasscope, bool isboxed, AtomicOrdering Order,
                              bool maybe_null_if_boxed = true, unsigned alignment = 0,
                              Value **nullcheck = nullptr)
 {
-    bool isboxed;
-    Type *elty = julia_type_to_llvm(ctx, jltype, &isboxed);
+    Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
     if (type_is_ghost(elty))
-        return ghostValue(jltype);
+        return ghostValue(ctx, jltype);
+    AllocaInst *intcast = NULL;
+    if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        const DataLayout &DL = jl_Module->getDataLayout();
+        unsigned nb = DL.getTypeSizeInBits(elty);
+        intcast = ctx.builder.CreateAlloca(elty);
+        elty = Type::getIntNTy(ctx.builder.getContext(), nb);
+    }
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
+        unsigned nb = cast<IntegerType>(elty)->getBitWidth();
+        unsigned nb2 = PowerOf2Ceil(nb);
+        if (nb != nb2)
+            elty = Type::getIntNTy(ctx.builder.getContext(), nb2);
+    }
     Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
     Value *data;
     if (ptr->getType() != ptrty)
@@ -1409,9 +1601,9 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
         data = ptr;
     if (idx_0based)
         data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based);
-    Instruction *load;
+    Value *instr;
     // TODO: can only lazy load if we can create a gc root for ptr for the lifetime of elt
-    //if (elty->isAggregateType() && tbaa == tbaa_immut && !alignment) { // can lazy load on demand, no copy needed
+    //if (elty->isAggregateType() && tbaa == ctx.tbaa().tbaa_immut && !alignment) { // can lazy load on demand, no copy needed
     //    elt = data;
     //}
     //else {
@@ -1419,46 +1611,110 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j
             alignment = sizeof(void*);
         else if (!alignment)
             alignment = julia_alignment(jltype);
-        load = ctx.builder.CreateAlignedLoad(data, Align(alignment), false);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(elty, data, Align(alignment), false);
+        load->setOrdering(Order);
         if (aliasscope)
             load->setMetadata("alias.scope", aliasscope);
-        if (isboxed) {
-            cast<LoadInst>(load)->setOrdering(AtomicOrdering::Unordered);
-            load = maybe_mark_load_dereferenceable(load, true, jltype);
-        }
+        if (isboxed)
+            maybe_mark_load_dereferenceable(load, true, jltype);
         if (tbaa)
-            load = tbaa_decorate(tbaa, load);
+            tbaa_decorate(tbaa, load);
+        instr = load;
+        if (elty != realelty)
+            instr = ctx.builder.CreateTrunc(instr, realelty);
+        if (intcast) {
+            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
+            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+        }
         if (maybe_null_if_boxed) {
-            Value *first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
+            Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
             if (first_ptr)
                 null_pointer_check(ctx, first_ptr, nullcheck);
         }
     //}
     if (jltype == (jl_value_t*)jl_bool_type) { // "freeze" undef memory to a valid value
         // NOTE: if we zero-initialize arrays, this optimization should become valid
-        //load->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
+        //load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
         //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
-        load = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, load, T_int1));
+        instr = ctx.builder.CreateTrunc(instr, getInt1Ty(ctx.builder.getContext()));
     }
-    return mark_julia_type(ctx, load, isboxed, jltype);
+    return mark_julia_type(ctx, instr, isboxed, jltype);
 }
 
-static void typed_store(jl_codectx_t &ctx,
-        Value *ptr, Value *idx_0based, const jl_cgval_t &rhs,
+static jl_cgval_t typed_store(jl_codectx_t &ctx,
+        Value *ptr, Value *idx_0based, jl_cgval_t rhs, jl_cgval_t cmp,
         jl_value_t *jltype, MDNode *tbaa, MDNode *aliasscope,
         Value *parent,  // for the write barrier, NULL if no barrier needed
-        unsigned alignment = 0)
+        bool isboxed, AtomicOrdering Order, AtomicOrdering FailOrder, unsigned alignment,
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        bool maybe_null_if_boxed, const jl_cgval_t *modifyop, const std::string &fname)
 {
-    bool isboxed;
-    Type *elty = julia_type_to_llvm(ctx, jltype, &isboxed);
-    if (type_is_ghost(elty))
-        return;
-    Value *r;
-    if (!isboxed)
-        r = emit_unbox(ctx, elty, rhs, jltype);
-    else
-        r = boxed(ctx, rhs);
+    auto newval = [&](const jl_cgval_t &lhs) {
+        const jl_cgval_t argv[3] = { cmp, lhs, rhs };
+        jl_cgval_t ret(ctx.builder.getContext());
+        if (modifyop) {
+            ret = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+        }
+        else {
+            Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+            ret = mark_julia_type(ctx, callval, true, jl_any_type);
+        }
+        if (!jl_subtype(ret.typ, jltype)) {
+            emit_typecheck(ctx, ret, jltype, fname);
+            ret = update_julia_type(ctx, ret, jltype);
+        }
+        return ret;
+    };
+    assert(!needlock || parent != nullptr);
+    Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype);
+    if (type_is_ghost(elty)) {
+        if (isStrongerThanMonotonic(Order))
+            ctx.builder.CreateFence(Order);
+        if (issetfield) {
+            return rhs;
+        }
+        else if (isreplacefield) {
+            Value *Success = emit_f_is(ctx, cmp, ghostValue(ctx, jltype));
+            Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
+            const jl_cgval_t argv[2] = {ghostValue(ctx, jltype), mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        else if (isswapfield) {
+            return ghostValue(ctx, jltype);
+        }
+        else { // modifyfield
+            jl_cgval_t oldval = ghostValue(ctx, jltype);
+            const jl_cgval_t argv[2] = { oldval, newval(oldval) };
+            jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+            return emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+    }
+    AllocaInst *intcast = nullptr;
+    if (!isboxed && Order != AtomicOrdering::NotAtomic && !elty->isIntOrPtrTy()) {
+        const DataLayout &DL = jl_Module->getDataLayout();
+        unsigned nb = DL.getTypeSizeInBits(elty);
+        if (!issetfield)
+            intcast = ctx.builder.CreateAlloca(elty);
+        elty = Type::getIntNTy(ctx.builder.getContext(), nb);
+    }
+    Type *realelty = elty;
+    if (Order != AtomicOrdering::NotAtomic && isa<IntegerType>(elty)) {
+        unsigned nb = cast<IntegerType>(elty)->getBitWidth();
+        unsigned nb2 = PowerOf2Ceil(nb);
+        if (nb != nb2)
+            elty = Type::getIntNTy(ctx.builder.getContext(), nb2);
+    }
+    Value *r = nullptr;
+    if (issetfield || isswapfield || isreplacefield)  {
+        if (!isboxed)
+            r = emit_unbox(ctx, realelty, rhs, jltype);
+        else
+            r = boxed(ctx, rhs);
+        if (realelty != elty)
+            r = ctx.builder.CreateZExt(r, elty);
+    }
     Type *ptrty = PointerType::get(elty, ptr->getType()->getPointerAddressSpace());
     if (ptr->getType() != ptrty)
         ptr = ctx.builder.CreateBitCast(ptr, ptrty);
@@ -1468,24 +1724,284 @@ static void typed_store(jl_codectx_t &ctx,
         alignment = sizeof(void*);
     else if (!alignment)
         alignment = julia_alignment(jltype);
-    StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
-    if (isboxed) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
-        store->setOrdering(AtomicOrdering::Unordered);
-    if (aliasscope)
-        store->setMetadata("noalias", aliasscope);
-    if (tbaa)
-        tbaa_decorate(tbaa, store);
+    Value *instr = nullptr;
+    Value *Compare = nullptr;
+    Value *Success = nullptr;
+    BasicBlock *DoneBB = nullptr;
+    if (needlock)
+        emit_lockstate_value(ctx, parent, true);
+    jl_cgval_t oldval = rhs;
+    if (issetfield || (Order == AtomicOrdering::NotAtomic && isswapfield)) {
+        if (isswapfield) {
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            if (aliasscope)
+                load->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, load);
+            assert(realelty == elty);
+            instr = load;
+        }
+        StoreInst *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+        store->setOrdering(Order);
+        if (aliasscope)
+            store->setMetadata("noalias", aliasscope);
+        if (tbaa)
+            tbaa_decorate(tbaa, store);
+    }
+    else if (isswapfield && !isboxed) {
+        // we can't handle isboxed here as a workaround for really bad LLVM
+        // design issue: plain Xchg only works with integers
+#if JL_LLVM_VERSION >= 130000
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Align(alignment), Order);
+#else
+        auto *store = ctx.builder.CreateAtomicRMW(AtomicRMWInst::Xchg, ptr, r, Order);
+        store->setAlignment(Align(alignment));
+#endif
+        if (aliasscope)
+            store->setMetadata("noalias", aliasscope);
+        if (tbaa)
+            tbaa_decorate(tbaa, store);
+        instr = store;
+    }
+    else {
+        // replacefield, modifyfield, or swapfield (isboxed && atomic)
+        DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
+        bool needloop;
+        PHINode *Succ = nullptr, *Current = nullptr;
+        if (isreplacefield) {
+            if (Order == AtomicOrdering::NotAtomic) {
+                needloop = false;
+            }
+            else if (!isboxed) {
+                assert(jl_is_concrete_type(jltype));
+                needloop = ((jl_datatype_t*)jltype)->layout->haspadding;
+                Value *SameType = emit_isa(ctx, cmp, jltype, nullptr).first;
+                if (SameType != ConstantInt::getTrue(ctx.builder.getContext())) {
+                    BasicBlock *SkipBB = BasicBlock::Create(ctx.builder.getContext(), "skip_xchg", ctx.f);
+                    BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "ok_xchg", ctx.f);
+                    ctx.builder.CreateCondBr(SameType, BB, SkipBB);
+                    ctx.builder.SetInsertPoint(SkipBB);
+                    LoadInst *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+                    load->setOrdering(FailOrder);
+                    if (aliasscope)
+                        load->setMetadata("noalias", aliasscope);
+                    if (tbaa)
+                        tbaa_decorate(tbaa, load);
+                    instr = load;
+                    ctx.builder.CreateBr(DoneBB);
+                    ctx.builder.SetInsertPoint(DoneBB);
+                    Succ = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+                    Succ->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), false), SkipBB);
+                    Current = ctx.builder.CreatePHI(instr->getType(), 2);
+                    Current->addIncoming(instr, SkipBB);
+                    ctx.builder.SetInsertPoint(BB);
+                }
+                Compare = emit_unbox(ctx, realelty, cmp, jltype);
+                if (realelty != elty)
+                    Compare = ctx.builder.CreateZExt(Compare, elty);
+            }
+            else if (cmp.isboxed || cmp.constant || jl_pointer_egal(jltype)) {
+                Compare = boxed(ctx, cmp);
+                needloop = !jl_pointer_egal(jltype) && !jl_pointer_egal(cmp.typ);
+                if (needloop && !cmp.isboxed) // try to use the same box in the compare now and later
+                    cmp = mark_julia_type(ctx, Compare, true, cmp.typ);
+            }
+            else {
+                Compare = Constant::getNullValue(ctx.types().T_prjlvalue); // TODO: does this need to be an invalid bit pattern?
+                needloop = true;
+            }
+        }
+        else { // swap or modify
+            LoadInst *Current = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            Current->setOrdering(Order == AtomicOrdering::NotAtomic ? Order : AtomicOrdering::Monotonic);
+            if (aliasscope)
+                Current->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, Current);
+            Compare = Current;
+            needloop = !isswapfield || Order != AtomicOrdering::NotAtomic;
+        }
+        BasicBlock *BB = NULL;
+        PHINode *CmpPhi = NULL;
+        if (needloop) {
+            BasicBlock *From = ctx.builder.GetInsertBlock();
+            BB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
+            ctx.builder.CreateBr(BB);
+            ctx.builder.SetInsertPoint(BB);
+            CmpPhi = ctx.builder.CreatePHI(elty, 2);
+            CmpPhi->addIncoming(Compare, From);
+            Compare = CmpPhi;
+        }
+        if (ismodifyfield) {
+            if (needlock)
+                emit_lockstate_value(ctx, parent, false);
+            Value *realCompare = Compare;
+            if (realelty != elty)
+                realCompare = ctx.builder.CreateTrunc(realCompare, realelty);
+            if (intcast) {
+                ctx.builder.CreateStore(realCompare, ctx.builder.CreateBitCast(intcast, realCompare->getType()->getPointerTo()));
+                if (maybe_null_if_boxed)
+                    realCompare = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+            }
+            if (maybe_null_if_boxed) {
+                Value *first_ptr = isboxed ? Compare : extract_first_ptr(ctx, Compare);
+                if (first_ptr)
+                    null_pointer_check(ctx, first_ptr, nullptr);
+            }
+            if (intcast)
+                oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
+            else
+                oldval = mark_julia_type(ctx, realCompare, isboxed, jltype);
+            rhs = newval(oldval);
+            if (!isboxed)
+                r = emit_unbox(ctx, realelty, rhs, jltype);
+            else
+                r = boxed(ctx, rhs);
+            if (realelty != elty)
+                r = ctx.builder.CreateZExt(r, elty);
+            if (needlock)
+                emit_lockstate_value(ctx, parent, true);
+            cmp = oldval;
+        }
+        Value *Done;
+        if (Order == AtomicOrdering::NotAtomic) {
+            // modifyfield or replacefield
+            assert(elty == realelty && !intcast);
+            auto *load = ctx.builder.CreateAlignedLoad(elty, ptr, Align(alignment));
+            if (aliasscope)
+                load->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, load);
+            Value *first_ptr = nullptr;
+            if (maybe_null_if_boxed && !ismodifyfield)
+                first_ptr = isboxed ? load : extract_first_ptr(ctx, load);
+            oldval = mark_julia_type(ctx, load, isboxed, jltype);
+            Success = emit_nullcheck_guard(ctx, first_ptr, [&] {
+                return emit_f_is(ctx, oldval, cmp);
+            });
+            if (needloop && ismodifyfield)
+                CmpPhi->addIncoming(load, ctx.builder.GetInsertBlock());
+            assert(Succ == nullptr);
+            BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
+            ctx.builder.CreateCondBr(Success, XchgBB, needloop && ismodifyfield ? BB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
+            auto *store = ctx.builder.CreateAlignedStore(r, ptr, Align(alignment));
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            ctx.builder.CreateBr(DoneBB);
+            instr = load;
+        }
+        else {
+            if (Order == AtomicOrdering::Unordered)
+                Order = AtomicOrdering::Monotonic;
+            if (!isreplacefield)
+                FailOrder = AtomicOrdering::Monotonic;
+            else if (FailOrder == AtomicOrdering::Unordered)
+                FailOrder = AtomicOrdering::Monotonic;
+#if JL_LLVM_VERSION >= 130000
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Align(alignment), Order, FailOrder);
+#else
+            auto *store = ctx.builder.CreateAtomicCmpXchg(ptr, Compare, r, Order, FailOrder);
+            store->setAlignment(Align(alignment));
+#endif
+            if (aliasscope)
+                store->setMetadata("noalias", aliasscope);
+            if (tbaa)
+                tbaa_decorate(tbaa, store);
+            instr = ctx.builder.Insert(ExtractValueInst::Create(store, 0));
+            Success = ctx.builder.Insert(ExtractValueInst::Create(store, 1));
+            Done = Success;
+            if (isreplacefield && needloop) {
+                Value *realinstr = instr;
+                if (realelty != elty)
+                    realinstr = ctx.builder.CreateTrunc(realinstr, realelty);
+                if (intcast) {
+                    ctx.builder.CreateStore(realinstr, ctx.builder.CreateBitCast(intcast, realinstr->getType()->getPointerTo()));
+                    oldval = mark_julia_slot(intcast, jltype, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
+                    if (maybe_null_if_boxed)
+                        realinstr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+                }
+                else {
+                    oldval = mark_julia_type(ctx, realinstr, isboxed, jltype);
+                }
+                Done = emit_guarded_test(ctx, ctx.builder.CreateNot(Success), false, [&] {
+                    Value *first_ptr = nullptr;
+                    if (maybe_null_if_boxed)
+                        first_ptr = isboxed ? realinstr : extract_first_ptr(ctx, realinstr);
+                    return emit_nullcheck_guard(ctx, first_ptr, [&] {
+                        return emit_f_is(ctx, oldval, cmp);
+                    });
+                });
+                Done = ctx.builder.CreateNot(Done);
+            }
+            if (needloop)
+                ctx.builder.CreateCondBr(Done, DoneBB, BB);
+            else
+                ctx.builder.CreateBr(DoneBB);
+            if (needloop)
+                CmpPhi->addIncoming(instr, ctx.builder.GetInsertBlock());
+        }
+        if (Succ != nullptr) {
+            Current->addIncoming(instr, ctx.builder.GetInsertBlock());
+            instr = Current;
+            Succ->addIncoming(Success, ctx.builder.GetInsertBlock());
+            Success = Succ;
+        }
+    }
+    if (DoneBB)
+        ctx.builder.SetInsertPoint(DoneBB);
+    if (needlock)
+        emit_lockstate_value(ctx, parent, false);
     if (parent != NULL) {
+        if (isreplacefield) {
+            // TOOD: avoid this branch if we aren't making a write barrier
+            BasicBlock *BB = BasicBlock::Create(ctx.builder.getContext(), "xchg_wb", ctx.f);
+            DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg_wb", ctx.f);
+            ctx.builder.CreateCondBr(Success, BB, DoneBB);
+            ctx.builder.SetInsertPoint(BB);
+        }
         if (!isboxed)
             emit_write_multibarrier(ctx, parent, r, rhs.typ);
         else if (!type_is_permalloc(rhs.typ))
             emit_write_barrier(ctx, parent, r);
+        if (isreplacefield) {
+            ctx.builder.CreateBr(DoneBB);
+            ctx.builder.SetInsertPoint(DoneBB);
+        }
+    }
+    if (ismodifyfield) {
+        const jl_cgval_t argv[2] = { oldval, rhs };
+        jl_datatype_t *rettyp = jl_apply_modify_type(jltype);
+        oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+    }
+    else if (!issetfield) { // swapfield or replacefield
+        if (realelty != elty)
+            instr = ctx.builder.Insert(CastInst::Create(Instruction::Trunc, instr, realelty));
+        if (intcast) {
+            ctx.builder.CreateStore(instr, ctx.builder.CreateBitCast(intcast, instr->getType()->getPointerTo()));
+            instr = ctx.builder.CreateLoad(intcast->getAllocatedType(), intcast);
+        }
+        if (maybe_null_if_boxed) {
+            Value *first_ptr = isboxed ? instr : extract_first_ptr(ctx, instr);
+            if (first_ptr)
+                null_pointer_check(ctx, first_ptr, nullptr);
+        }
+        oldval = mark_julia_type(ctx, instr, isboxed, jltype);
+        if (isreplacefield) {
+            Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
+            const jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jltype);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
     }
+    return oldval;
 }
 
 // --- convert boolean value to julia ---
 
-// Returns T_pjlvalue
+// Returns ctx.types().T_pjlvalue
 static Value *julia_bool(jl_codectx_t &ctx, Value *cond)
 {
     return ctx.builder.CreateSelect(cond, literal_pointer_val(ctx, jl_true),
@@ -1519,14 +2035,18 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
     // If the types are small and simple, use load and store directly.
     // Going through memcpy can cause LLVM (e.g. SROA) to create bitcasts between float and int
     // that interferes with other optimizations.
+#ifndef JL_LLVM_OPAQUE_POINTERS
+    // TODO: Restore this for opaque pointers? Needs extra type information from the caller.
     if (sz <= 64) {
         // The size limit is arbitrary but since we mainly care about floating points and
         // machine size vectors this should be enough.
-        const DataLayout &DL = jl_data_layout;
+        const DataLayout &DL = jl_Module->getDataLayout();
         auto srcty = cast<PointerType>(src->getType());
-        auto srcel = srcty->getElementType();
+        //TODO unsafe nonopaque pointer
+        auto srcel = srcty->getPointerElementType();
         auto dstty = cast<PointerType>(dst->getType());
-        auto dstel = dstty->getElementType();
+        //TODO unsafe nonopaque pointer
+        auto dstel = dstty->getPointerElementType();
         if (srcel->isArrayTy() && srcel->getArrayNumElements() == 1) {
             src = ctx.builder.CreateConstInBoundsGEP2_32(srcel, src, 0, 0);
             srcel = srcel->getArrayElementType();
@@ -1538,31 +2058,30 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
             dstty = dstel->getPointerTo();
         }
 
-        bool direct = false;
+        llvm::Type *directel = nullptr;
         if (srcel->isSized() && srcel->isSingleValueType() && DL.getTypeStoreSize(srcel) == sz) {
-            direct = true;
+            directel = srcel;
             dst = emit_bitcast(ctx, dst, srcty);
         }
         else if (dstel->isSized() && dstel->isSingleValueType() &&
                  DL.getTypeStoreSize(dstel) == sz) {
-            direct = true;
+            directel = dstel;
             src = emit_bitcast(ctx, src, dstty);
         }
-        if (direct) {
-            auto val = tbaa_decorate(tbaa_src, ctx.builder.CreateAlignedLoad(src, Align(align), is_volatile));
+        if (directel) {
+            auto val = tbaa_decorate(tbaa_src, ctx.builder.CreateAlignedLoad(directel, src, Align(align), is_volatile));
             tbaa_decorate(tbaa_dst, ctx.builder.CreateAlignedStore(val, dst, Align(align), is_volatile));
+            ++SkippedMemcpys;
             return;
         }
     }
+#endif
     // the memcpy intrinsic does not allow to specify different alias tags
-    // for the load part (x.tbaa) and the store part (tbaa_stack).
+    // for the load part (x.tbaa) and the store part (ctx.tbaa().tbaa_stack).
     // since the tbaa lattice has to be a tree we have unfortunately
-    // x.tbaa ∪ tbaa_stack = tbaa_root if x.tbaa != tbaa_stack
-#if JL_LLVM_VERSION >= 100000
+    // x.tbaa ∪ ctx.tbaa().tbaa_stack = tbaa_root if x.tbaa != ctx.tbaa().tbaa_stack
+    ++EmittedMemcpys;
     ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#else
-    ctx.builder.CreateMemCpy(dst, align, src, 0, sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#endif
 }
 
 static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Value *src, MDNode *tbaa_src,
@@ -1572,11 +2091,8 @@ static void emit_memcpy_llvm(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, Va
         emit_memcpy_llvm(ctx, dst, tbaa_dst, src, tbaa_src, const_sz->getZExtValue(), align, is_volatile);
         return;
     }
-#if JL_LLVM_VERSION >= 100000
+    ++EmittedMemcpys;
     ctx.builder.CreateMemCpy(dst, MaybeAlign(align), src, MaybeAlign(0), sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#else
-    ctx.builder.CreateMemCpy(dst, align, src, 0, sz, is_volatile, MDNode::getMostGenericTBAA(tbaa_dst, tbaa_src));
-#endif
 }
 
 template<typename T1>
@@ -1594,28 +2110,37 @@ static void emit_memcpy(jl_codectx_t &ctx, Value *dst, MDNode *tbaa_dst, const j
 }
 
 
+static void emit_atomic_error(jl_codectx_t &ctx, const std::string &msg)
+{
+    emit_error(ctx, prepare_call(jlatomicerror_func), msg);
+}
 
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
-                                         Value **nullcheck = nullptr);
+                                         enum jl_memory_order order, Value **nullcheck=nullptr);
 
 static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         jl_cgval_t *ret, jl_cgval_t strct,
-        Value *idx, jl_datatype_t *stt, jl_value_t *inbounds)
+        Value *idx, jl_datatype_t *stt, jl_value_t *inbounds,
+        enum jl_memory_order order)
 {
+    ++EmittedGetfieldUnknowns;
     size_t nfields = jl_datatype_nfields(stt);
-    bool maybe_null = (unsigned)stt->ninitialized != nfields;
+    bool maybe_null = (unsigned)stt->name->n_uninitialized != 0;
     auto idx0 = [&]() {
-        return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(T_size, nfields), inbounds);
+        return emit_bounds_check(ctx, strct, (jl_value_t*)stt, idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nfields), inbounds);
     };
     if (nfields == 0) {
         (void)idx0();
-        *ret = jl_cgval_t();
+        *ret = jl_cgval_t(ctx.builder.getContext());
         return true;
     }
     if (nfields == 1) {
+        if (jl_has_free_typevars(jl_field_type(stt, 0))) {
+            return false;
+        }
         (void)idx0();
-        *ret = emit_getfield_knownidx(ctx, strct, 0, stt);
+        *ret = emit_getfield_knownidx(ctx, strct, 0, stt, order);
         return true;
     }
     assert(!jl_is_vecelement_type((jl_value_t*)stt));
@@ -1623,19 +2148,20 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     if (!strct.ispointer()) { // unboxed
         assert(jl_is_concrete_immutable((jl_value_t*)stt));
         bool isboxed = is_datatype_all_pointers(stt);
-        bool issame = is_tupletype_homogeneous(stt->types);
+        jl_svec_t *types = stt->types;
+        bool issame = is_tupletype_homogeneous(types);
         if (issame) {
-            jl_value_t *jft = jl_svecref(stt->types, 0);
+            jl_value_t *jft = jl_svecref(types, 0);
             if (strct.isghost) {
                 (void)idx0();
-                *ret = ghostValue(jft);
+                *ret = ghostValue(ctx, jft);
                 return true;
             }
             if (isa<VectorType>(strct.V->getType())) {
                 assert(stt->layout->npointers == 0); // we could, but don't emit this
                 idx = idx0();
                 if (sizeof(void*) != sizeof(int))
-                    idx = ctx.builder.CreateTrunc(idx, T_int32); // llvm3.3 requires this, harmless elsewhere
+                    idx = ctx.builder.CreateTrunc(idx, getInt32Ty(ctx.builder.getContext())); // llvm3.3 requires this, harmless elsewhere
                 Value *fld = ctx.builder.CreateExtractElement(strct.V, idx);
                 *ret = mark_julia_type(ctx, fld, isboxed, jft);
                 return true;
@@ -1655,7 +2181,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             }
         }
         if (isboxed || (issame && isa<ArrayType>(strct.V->getType()))) {
-            assert((cast<ArrayType>(strct.V->getType())->getElementType() == T_prjlvalue) == isboxed);
+            assert((cast<ArrayType>(strct.V->getType())->getElementType() == ctx.types().T_prjlvalue) == isboxed);
             Value *idx = idx0();
             unsigned i = 0;
             Value *fld = ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i));
@@ -1665,7 +2191,7 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
                         ctx.builder.CreateExtractValue(strct.V, makeArrayRef(i)),
                         fld);
             }
-            jl_value_t *jft = issame ? jl_svecref(stt->types, 0) : (jl_value_t*)jl_any_type;
+            jl_value_t *jft = issame ? jl_svecref(types, 0) : (jl_value_t*)jl_any_type;
             if (isboxed && maybe_null)
                 null_pointer_check(ctx, fld);
             *ret = mark_julia_type(ctx, fld, isboxed, jft);
@@ -1673,7 +2199,13 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
         }
     }
 
-    if (strct.ispointer()) { // boxed or stack
+    bool maybeatomic = stt->name->atomicfields != NULL;
+    if (strct.ispointer() && !maybeatomic) { // boxed or stack
+        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+            return true;
+        }
         if (is_datatype_all_pointers(stt)) {
             size_t minimum_field_size = std::numeric_limits<size_t>::max();
             size_t minimum_align = JL_HEAP_ALIGNMENT;
@@ -1689,10 +2221,10 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
                     (size_t)julia_alignment(ft));
             }
             Value *fldptr = ctx.builder.CreateInBoundsGEP(
-                    T_prjlvalue,
-                    maybe_decay_tracked(ctx, emit_bitcast(ctx, data_pointer(ctx, strct), T_pprjlvalue)),
+                    ctx.types().T_prjlvalue,
+                    maybe_decay_tracked(ctx, emit_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pprjlvalue)),
                     idx0());
-            LoadInst *fld = ctx.builder.CreateAlignedLoad(T_prjlvalue, fldptr, Align(sizeof(void*)));
+            LoadInst *fld = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fldptr, Align(sizeof(void*)));
             fld->setOrdering(AtomicOrdering::Unordered);
             tbaa_decorate(strct.tbaa, fld);
             maybe_mark_load_dereferenceable(fld, maybe_null, minimum_field_size, minimum_align);
@@ -1701,25 +2233,25 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
         }
-        else if (is_tupletype_homogeneous(stt->types)) {
+        else if (is_tupletype_homogeneous(jl_get_fieldtypes(stt))) {
             assert(nfields > 0); // nf == 0 trapped by all_pointers case
-            jl_value_t *jft = jl_svecref(stt->types, 0);
+            jl_value_t *jft = jl_svecref(stt->types, 0); // n.b. jl_get_fieldtypes assigned stt->types for here
             assert(jl_is_concrete_type(jft));
             idx = idx0();
             Value *ptr = maybe_decay_tracked(ctx, data_pointer(ctx, strct));
-            if (!stt->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
+            if (!stt->name->mutabl && !(maybe_null && (jft == (jl_value_t*)jl_bool_type ||
                                                  ((jl_datatype_t*)jft)->layout->npointers))) {
                 // just compute the pointer and let user load it when necessary
                 Type *fty = julia_type_to_llvm(ctx, jft);
                 Value *addr = ctx.builder.CreateInBoundsGEP(fty, emit_bitcast(ctx, ptr, PointerType::get(fty, 0)), idx);
-                *ret = mark_julia_slot(addr, jft, NULL, strct.tbaa);
+                *ret = mark_julia_slot(addr, jft, NULL, ctx.tbaa(), strct.tbaa);
                 return true;
             }
-            *ret = typed_load(ctx, ptr, idx, jft, strct.tbaa, nullptr, maybe_null);
+            *ret = typed_load(ctx, ptr, idx, jft, strct.tbaa, nullptr, false, AtomicOrdering::NotAtomic, maybe_null);
             return true;
         }
         else if (strct.isboxed) {
-            idx = ctx.builder.CreateSub(idx, ConstantInt::get(T_size, 1));
+            idx = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
             Value *fld = ctx.builder.CreateCall(prepare_call(jlgetnthfieldchecked_func), { boxed(ctx, strct), idx });
             *ret = mark_julia_type(ctx, fld, true, jl_any_type);
             return true;
@@ -1728,24 +2260,60 @@ static bool emit_getfield_unknownidx(jl_codectx_t &ctx,
     return false;
 }
 
+static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex,
+        jl_value_t *jfty, size_t fsz, size_t al, MDNode *tbaa, bool mutabl,
+        unsigned union_max, MDNode *tbaa_ptindex)
+{
+    ++EmittedUnionLoads;
+    Instruction *tindex0 = tbaa_decorate(tbaa_ptindex, ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), ptindex, Align(1)));
+    tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
+        ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
+        ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), union_max)) }));
+    Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
+    if (fsz > 0 && mutabl) {
+        // move value to an immutable stack slot (excluding tindex)
+        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al);
+        AllocaInst *lv = emit_static_alloca(ctx, AT);
+        if (al > 1)
+            lv->setAlignment(Align(al));
+        emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al);
+        addr = lv;
+    }
+    return mark_julia_slot(fsz > 0 ? addr : nullptr, jfty, tindex, ctx.tbaa(), tbaa);
+}
+
 // If `nullcheck` is not NULL and a pointer NULL check is necessary
 // store the pointer to be checked in `*nullcheck` instead of checking it
 static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &strct,
                                          unsigned idx, jl_datatype_t *jt,
-                                         Value **nullcheck)
+                                         enum jl_memory_order order, Value **nullcheck)
 {
     jl_value_t *jfty = jl_field_type(jt, idx);
+    bool isatomic = jl_field_isatomic(jt, idx);
+    bool needlock = isatomic && !jl_field_isptr(jt, idx) && jl_datatype_size(jfty) > MAX_ATOMIC_SIZE;
+    if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+        emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+        return jl_cgval_t(ctx.builder.getContext()); // unreachable
+    }
+    if (isatomic && order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx, "getfield: atomic field cannot be accessed non-atomically");
+        return jl_cgval_t(ctx.builder.getContext()); // unreachable
+    }
+    if (order == jl_memory_order_unspecified) {
+        order = isatomic ? jl_memory_order_unordered : jl_memory_order_notatomic;
+    }
     if (jfty == jl_bottom_type) {
         raise_exception(ctx, literal_pointer_val(ctx, jl_undefref_exception));
-        return jl_cgval_t(); // unreachable
+        return jl_cgval_t(ctx.builder.getContext()); // unreachable
     }
     if (type_is_ghost(julia_type_to_llvm(ctx, jfty)))
-        return ghostValue(jfty);
-    bool maybe_null = idx >= (unsigned)jt->ninitialized;
+        return ghostValue(ctx, jfty);
+    size_t nfields = jl_datatype_nfields(jt);
+    bool maybe_null = idx >= nfields - (unsigned)jt->name->n_uninitialized;
     size_t byte_offset = jl_field_offset(jt, idx);
     auto tbaa = strct.tbaa;
-    if (tbaa == tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types))
-        tbaa = tbaa_const;
+    if (tbaa == ctx.tbaa().tbaa_datatype && byte_offset != offsetof(jl_datatype_t, types))
+        tbaa = ctx.tbaa().tbaa_const;
     if (strct.ispointer()) {
         Value *staddr = maybe_decay_tracked(ctx, data_pointer(ctx, strct));
         bool isboxed;
@@ -1757,9 +2325,9 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             // can pessimize mem2reg
             if (byte_offset > 0) {
                 addr = ctx.builder.CreateInBoundsGEP(
-                        T_int8,
-                        emit_bitcast(ctx, staddr, T_pint8),
-                        ConstantInt::get(T_size, byte_offset));
+                        getInt8Ty(ctx.builder.getContext()),
+                        emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())),
+                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), byte_offset));
             }
             else {
                 addr = staddr;
@@ -1775,8 +2343,8 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 addr = ctx.builder.CreateConstInBoundsGEP2_32(lt, staddr, 0, idx);
         }
         if (jl_field_isptr(jt, idx)) {
-            LoadInst *Load = ctx.builder.CreateAlignedLoad(T_prjlvalue, maybe_bitcast(ctx, addr, T_pprjlvalue), Align(sizeof(void*)));
-            Load->setOrdering(AtomicOrdering::Unordered);
+            LoadInst *Load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, maybe_bitcast(ctx, addr, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
+            Load->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             maybe_mark_load_dereferenceable(Load, maybe_null, jl_field_type(jt, idx));
             Value *fldv = tbaa_decorate(tbaa, Load);
             if (maybe_null)
@@ -1785,42 +2353,37 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
         }
         else if (jl_is_uniontype(jfty)) {
             size_t fsz = 0, al = 0;
-            bool isptr = !jl_islayout_inline(jfty, &fsz, &al);
+            int union_max = jl_islayout_inline(jfty, &fsz, &al);
+            bool isptr = (union_max == 0);
             assert(!isptr && fsz == jl_field_size(jt, idx) - 1); (void)isptr;
             Value *ptindex;
             if (isboxed) {
                 ptindex = ctx.builder.CreateConstInBoundsGEP1_32(
-                    T_int8, emit_bitcast(ctx, staddr, T_pint8), byte_offset + fsz);
+                    getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, staddr, getInt8PtrTy(ctx.builder.getContext())), byte_offset + fsz);
             }
             else {
                 ptindex = emit_struct_gep(ctx, cast<StructType>(lt), staddr, byte_offset + fsz);
             }
-            Instruction *tindex0 = tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
-            //tindex0->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
-            //    ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
-            //    ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
-            Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex0);
-            if (jt->mutabl) {
-                // move value to an immutable stack slot (excluding tindex)
-                Type *ET = IntegerType::get(jl_LLVMContext, 8 * al);
-                AllocaInst *lv = emit_static_alloca(ctx, ET);
-                lv->setOperand(0, ConstantInt::get(T_int32, (fsz + al - 1) / al));
-                emit_memcpy(ctx, lv, tbaa, addr, tbaa, fsz, al);
-                addr = lv;
-            }
-            return mark_julia_slot(addr, jfty, tindex, tbaa);
+            return emit_unionload(ctx, addr, ptindex, jfty, fsz, al, tbaa, !jl_field_isconst(jt, idx), union_max, ctx.tbaa().tbaa_unionselbyte);
         }
         assert(jl_is_concrete_type(jfty));
-        if (!jt->mutabl && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
+        if (jl_field_isconst(jt, idx) && !(maybe_null && (jfty == (jl_value_t*)jl_bool_type ||
                                             ((jl_datatype_t*)jfty)->layout->npointers))) {
             // just compute the pointer and let user load it when necessary
-            return mark_julia_slot(addr, jfty, NULL, tbaa);
+            return mark_julia_slot(addr, jfty, NULL, ctx.tbaa(), tbaa);
         }
         unsigned align = jl_field_align(jt, idx);
-        return typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, maybe_null, align, nullcheck);
+        if (needlock)
+            emit_lockstate_value(ctx, strct, true);
+        jl_cgval_t ret = typed_load(ctx, addr, NULL, jfty, tbaa, nullptr, false,
+                needlock ? AtomicOrdering::NotAtomic : get_llvm_atomic_order(order), // TODO: we should use unordered for anything with CountTrackedPointers(elty).count > 0
+                maybe_null, align, nullcheck);
+        if (needlock)
+            emit_lockstate_value(ctx, strct, false);
+        return ret;
     }
     else if (isa<UndefValue>(strct.V)) {
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
     else {
         Value *obj = strct.V; // aka emit_unbox
@@ -1831,7 +2394,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
             fldv = obj;
         }
         else if (isa<VectorType>(T)) {
-            fldv = ctx.builder.CreateExtractElement(obj, ConstantInt::get(T_int32, idx));
+            fldv = ctx.builder.CreateExtractElement(obj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), idx));
         }
         else if (!jl_field_isptr(jt, idx) && jl_is_uniontype(jfty)) {
             int fsz = jl_field_size(jt, idx) - 1;
@@ -1842,7 +2405,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 IntegerType *ET = cast<IntegerType>(T->getStructElementType(st_idx));
                 unsigned align = (ET->getBitWidth() + 7) / 8;
                 lv = emit_static_alloca(ctx, ET);
-                lv->setOperand(0, ConstantInt::get(T_int32, (fsz + align - 1) / align));
+                lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align));
                 // emit all of the align-sized words
                 unsigned i = 0;
                 for (; i < fsz / align; i++) {
@@ -1854,17 +2417,17 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
                 // emit remaining bytes up to tindex
                 if (i < ptindex - st_idx) {
                     Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                    staddr = ctx.builder.CreateBitCast(staddr, T_pint8);
+                    staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
                     for (; i < ptindex - st_idx; i++) {
                         Value *fldv = ctx.builder.CreateExtractValue(obj, makeArrayRef(st_idx + i));
-                        Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, staddr, i);
+                        Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
                         ctx.builder.CreateAlignedStore(fldv, fldp, Align(1));
                     }
                 }
             }
             Value *tindex0 = ctx.builder.CreateExtractValue(obj, makeArrayRef(ptindex));
-            Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex0);
-            return mark_julia_slot(lv, jfty, tindex, tbaa_stack);
+            Value *tindex = ctx.builder.CreateNUWAdd(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1), tindex0);
+            return mark_julia_slot(lv, jfty, tindex, ctx.tbaa(), ctx.tbaa().tbaa_stack);
         }
         else {
             unsigned st_idx;
@@ -1888,17 +2451,18 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st
 // emit length of vararg tuple
 static Value *emit_n_varargs(jl_codectx_t &ctx)
 {
+    ++EmittedVarargsLength;
     Value *valen = NULL;
     if (ctx.nvargs != -1) {
-        valen = ConstantInt::get(T_int32, ctx.nvargs);
+        valen = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), ctx.nvargs);
     } else {
         assert(ctx.argCount);
         int nreq = ctx.nReqArgs;
         valen = ctx.builder.CreateSub((Value*)ctx.argCount,
-                                        ConstantInt::get(T_int32, nreq));
+                                        ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq));
     }
 #ifdef _P64
-    return ctx.builder.CreateSExt(valen, T_int64);
+    return ctx.builder.CreateSExt(valen, getInt64Ty(ctx.builder.getContext()));
 #else
     return valen;
 #endif
@@ -1953,36 +2517,46 @@ static intptr_t arraytype_maxsize(jl_value_t *ty)
     return INTPTR_MAX / elsz;
 }
 
+static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo);
+
 static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *dim)
 {
     size_t ndim;
-    MDNode *tbaa = tbaa_arraysize;
+    MDNode *tbaa = ctx.tbaa().tbaa_arraysize;
     if (arraytype_constdim(tinfo.typ, &ndim)) {
         if (ndim == 0)
-            return ConstantInt::get(T_size, 1);
+            return ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
+        if (ndim == 1) {
+            if (auto d = dyn_cast<ConstantInt>(dim)) {
+                if (d->getZExtValue() == 1) {
+                    return emit_arraylen(ctx, tinfo);
+                }
+            }
+        }
         if (ndim > 1) {
             if (tinfo.constant && isa<ConstantInt>(dim)) {
                 auto n = cast<ConstantInt>(dim)->getZExtValue() - 1;
-                return ConstantInt::get(T_size, jl_array_dim(tinfo.constant, n));
+                return ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_array_dim(tinfo.constant, n));
             }
-            tbaa = tbaa_const;
+            tbaa = ctx.tbaa().tbaa_const;
         }
     }
+    ++EmittedArraysize;
     Value *t = boxed(ctx, tinfo);
     int o = offsetof(jl_array_t, nrows) / sizeof(void*) - 1;
     auto load = emit_nthptr_recast(ctx,
             t,
             ctx.builder.CreateAdd(dim, ConstantInt::get(dim->getType(), o)),
-            tbaa, T_psize);
-    MDBuilder MDB(jl_LLVMContext);
-    auto rng = MDB.createRange(V_size0, ConstantInt::get(T_size, arraytype_maxsize(tinfo.typ)));
+            tbaa, getSizeTy(ctx.builder.getContext()));
+    MDBuilder MDB(ctx.builder.getContext());
+    auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), arraytype_maxsize(tinfo.typ)));
     load->setMetadata(LLVMContext::MD_range, rng);
     return load;
 }
 
 static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int dim)
 {
-    return emit_arraysize(ctx, tinfo, ConstantInt::get(T_int32, dim));
+    return emit_arraysize(ctx, tinfo, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), dim));
 }
 
 static Value *emit_vectormaxsize(jl_codectx_t &ctx, const jl_cgval_t &ary)
@@ -1994,46 +2568,27 @@ static Value *emit_arraylen_prim(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
 {
     size_t ndim;
     jl_value_t *ty = tinfo.typ;
-    MDNode *tbaa = tbaa_arraylen;
+    MDNode *tbaa = ctx.tbaa().tbaa_arraylen;
     if (arraytype_constdim(ty, &ndim)) {
         if (ndim == 0)
-            return ConstantInt::get(T_size, 1);
+            return ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
         if (ndim != 1) {
             if (tinfo.constant)
-                return ConstantInt::get(T_size, jl_array_len(tinfo.constant));
-            tbaa = tbaa_const;
+                return ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_array_len(tinfo.constant));
+            tbaa = ctx.tbaa().tbaa_const;
         }
     }
+    ++EmittedArraylen;
     Value *t = boxed(ctx, tinfo);
-#ifdef STORE_ARRAY_LEN
-    Value *addr = ctx.builder.CreateStructGEP(jl_array_llvmt,
-            emit_bitcast(ctx, decay_derived(ctx, t), jl_parray_llvmt),
-            1); //index (not offset) of length field in jl_parray_llvmt
-    LoadInst *len = ctx.builder.CreateAlignedLoad(addr, Align(sizeof(size_t)));
+    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
+            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
+            1); //index (not offset) of length field in ctx.types().T_pjlarray
+    LoadInst *len = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), addr, Align(sizeof(size_t)));
     len->setOrdering(AtomicOrdering::NotAtomic);
-    MDBuilder MDB(jl_LLVMContext);
-    auto rng = MDB.createRange(V_size0, ConstantInt::get(T_size, arraytype_maxsize(tinfo.typ)));
+    MDBuilder MDB(ctx.builder.getContext());
+    auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), arraytype_maxsize(tinfo.typ)));
     len->setMetadata(LLVMContext::MD_range, rng);
     return tbaa_decorate(tbaa, len);
-#else
-    (void)tbaa;
-    jl_value_t *p1 = jl_tparam1(ty); // FIXME: check that ty is an array type
-    if (jl_is_long(p1)) {
-        size_t nd = jl_unbox_long(p1);
-        Value *l = ConstantInt::get(T_size, 1);
-        for(size_t i=0; i < nd; i++) {
-            l = ctx.builder.CreateMul(l, emit_arraysize(ctx, t, (int)(i + 1)));
-        }
-        return l;
-    }
-    else {
-        std::vector<Type *> fargt(0);
-        fargt.push_back(T_pjlvalue);
-        FunctionType *ft = FunctionType::get(T_size, fargt, false);
-        Value *alen = jl_Module->getOrInsertFunction("jl_array_len_", ft); // TODO: move to codegen init block
-        return ctx.builder.CreateCall(prepare_call(alen), t);
-    }
-#endif
 }
 
 static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
@@ -2043,29 +2598,30 @@ static Value *emit_arraylen(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
 
 static Value *emit_arrayptr_internal(jl_codectx_t &ctx, const jl_cgval_t &tinfo, Value *t, unsigned AS, bool isboxed)
 {
-    Value *addr =
-        ctx.builder.CreateStructGEP(jl_array_llvmt,
-            emit_bitcast(ctx, t, jl_parray_llvmt),
-            0); // index (not offset) of data field in jl_parray_llvmt
-    // Normally allocated array of 0 dimention always have a inline pointer.
+    ++EmittedArrayptr;
+    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
+                                              emit_bitcast(ctx, t, ctx.types().T_pjlarray), 0);
+    // Normally allocated array of 0 dimension always have a inline pointer.
     // However, we can't rely on that here since arrays can also be constructed from C pointers.
-    MDNode *tbaa = arraytype_constshape(tinfo.typ) ? tbaa_const : tbaa_arrayptr;
     PointerType *PT = cast<PointerType>(addr->getType());
-    PointerType *PPT = cast<PointerType>(PT->getElementType());
+    PointerType *PPT = cast<PointerType>(ctx.types().T_jlarray->getElementType(0));
+    PointerType *LoadT = PPT;
+
     if (isboxed) {
-        addr = ctx.builder.CreateBitCast(addr,
-            PointerType::get(PointerType::get(T_prjlvalue, AS),
-            PT->getAddressSpace()));
-    } else if (AS != PPT->getAddressSpace()) {
-        addr = ctx.builder.CreateBitCast(addr,
-            PointerType::get(
-                PointerType::get(PPT->getElementType(), AS),
-                PT->getAddressSpace()));
-    }
-    LoadInst *LI = ctx.builder.CreateAlignedLoad(addr, Align(sizeof(char*)));
+        LoadT = PointerType::get(ctx.types().T_prjlvalue, AS);
+    }
+    else if (AS != PPT->getAddressSpace()) {
+        LoadT = PointerType::getWithSamePointeeType(PPT, AS);
+    }
+    if (LoadT != PPT) {
+        const auto Ty = PointerType::get(LoadT, PT->getAddressSpace());
+        addr = ctx.builder.CreateBitCast(addr, Ty);
+    }
+
+    LoadInst *LI = ctx.builder.CreateAlignedLoad(LoadT, addr, Align(sizeof(char *)));
     LI->setOrdering(AtomicOrdering::NotAtomic);
-    LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(jl_LLVMContext, None));
-    tbaa_decorate(tbaa, LI);
+    LI->setMetadata(LLVMContext::MD_nonnull, MDNode::get(ctx.builder.getContext(), None));
+    tbaa_decorate(arraytype_constshape(tinfo.typ) ? ctx.tbaa().tbaa_const : ctx.tbaa().tbaa_arrayptr, LI);
     return LI;
 }
 
@@ -2094,23 +2650,21 @@ static Value *emit_arraysize(jl_codectx_t &ctx, const jl_cgval_t &tinfo, jl_valu
 
 static Value *emit_arrayflags(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
 {
+    ++EmittedArrayflags;
     Value *t = boxed(ctx, tinfo);
-#ifdef STORE_ARRAY_LEN
     int arrayflag_field = 2;
-#else
-    int arrayflag_field = 1;
-#endif
     Value *addr = ctx.builder.CreateStructGEP(
-            jl_array_llvmt,
-            emit_bitcast(ctx, decay_derived(ctx, t), jl_parray_llvmt),
+            ctx.types().T_jlarray,
+            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
             arrayflag_field);
-    return tbaa_decorate(tbaa_arrayflags, ctx.builder.CreateAlignedLoad(T_int16, addr, Align(sizeof(int16_t))));
+    return tbaa_decorate(ctx.tbaa().tbaa_arrayflags, ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
 }
 
 static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary)
 {
+    ++EmittedArrayNDims;
     Value *flags = emit_arrayflags(ctx, ary);
-    cast<LoadInst>(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(jl_LLVMContext, None));
+    cast<LoadInst>(flags)->setMetadata(LLVMContext::MD_invariant_load, MDNode::get(ctx.builder.getContext(), None));
     flags = ctx.builder.CreateLShr(flags, 2);
     flags = ctx.builder.CreateAnd(flags, 0x1FF); // (1<<9) - 1
     return flags;
@@ -2118,34 +2672,28 @@ static Value *emit_arrayndims(jl_codectx_t &ctx, const jl_cgval_t &ary)
 
 static Value *emit_arrayelsize(jl_codectx_t &ctx, const jl_cgval_t &tinfo)
 {
+    ++EmittedArrayElsize;
     Value *t = boxed(ctx, tinfo);
-#ifdef STORE_ARRAY_LEN
     int elsize_field = 3;
-#else
-    int elsize_field = 2;
-#endif
-    Value *addr = ctx.builder.CreateStructGEP(jl_array_llvmt,
-            emit_bitcast(ctx, decay_derived(ctx, t), jl_parray_llvmt),
+    Value *addr = ctx.builder.CreateStructGEP(ctx.types().T_jlarray,
+            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
             elsize_field);
-    return tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_int16, addr, Align(sizeof(int16_t))));
+    return tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getInt16Ty(ctx.builder.getContext()), addr, Align(sizeof(int16_t))));
 }
 
 static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int nd)
 {
+    ++EmittedArrayOffset;
     if (nd != -1 && nd != 1) // only Vector can have an offset
-        return ConstantInt::get(T_int32, 0);
+        return ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0);
     Value *t = boxed(ctx, tinfo);
-#ifdef STORE_ARRAY_LEN
     int offset_field = 4;
-#else
-    int offset_field = 3;
-#endif
 
     Value *addr = ctx.builder.CreateStructGEP(
-            jl_array_llvmt,
-            emit_bitcast(ctx, decay_derived(ctx, t), jl_parray_llvmt),
+            ctx.types().T_jlarray,
+            emit_bitcast(ctx, decay_derived(ctx, t), ctx.types().T_pjlarray),
             offset_field);
-    return tbaa_decorate(tbaa_arrayoffset, ctx.builder.CreateAlignedLoad(T_int32, addr, Align(sizeof(int32_t))));
+    return tbaa_decorate(ctx.tbaa().tbaa_arrayoffset, ctx.builder.CreateAlignedLoad(getInt32Ty(ctx.builder.getContext()), addr, Align(sizeof(int32_t))));
 }
 
 // Returns the size of the array represented by `tinfo` for the given dimension `dim` if
@@ -2153,7 +2701,7 @@ static Value *emit_arrayoffset(jl_codectx_t &ctx, const jl_cgval_t &tinfo, int n
 static Value *emit_arraysize_for_unsafe_dim(jl_codectx_t &ctx,
         const jl_cgval_t &tinfo, jl_value_t *ex, size_t dim, size_t nd)
 {
-    return dim > nd ? ConstantInt::get(T_size, 1) : emit_arraysize(ctx, tinfo, ex, dim);
+    return dim > nd ? ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1) : emit_arraysize(ctx, tinfo, ex, dim);
 }
 
 // `nd == -1` means the dimension is unknown.
@@ -2161,31 +2709,32 @@ static Value *emit_array_nd_index(
         jl_codectx_t &ctx, const jl_cgval_t &ainfo, jl_value_t *ex, ssize_t nd,
         const jl_cgval_t *argv, size_t nidxs, jl_value_t *inbounds)
 {
+    ++EmittedArrayNdIndex;
     Value *a = boxed(ctx, ainfo);
-    Value *i = V_size0;
-    Value *stride = ConstantInt::get(T_size, 1);
+    Value *i = Constant::getNullValue(getSizeTy(ctx.builder.getContext()));
+    Value *stride = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
 #if CHECK_BOUNDS==1
     bool bc = bounds_check_enabled(ctx, inbounds);
     BasicBlock *failBB = NULL, *endBB = NULL;
     if (bc) {
-        failBB = BasicBlock::Create(jl_LLVMContext, "oob");
-        endBB = BasicBlock::Create(jl_LLVMContext, "idxend");
+        failBB = BasicBlock::Create(ctx.builder.getContext(), "oob");
+        endBB = BasicBlock::Create(ctx.builder.getContext(), "idxend");
     }
 #endif
     Value **idxs = (Value**)alloca(sizeof(Value*) * nidxs);
     for (size_t k = 0; k < nidxs; k++) {
-        idxs[k] = emit_unbox(ctx, T_size, argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
+        idxs[k] = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), argv[k], (jl_value_t*)jl_long_type); // type asserted by caller
     }
     Value *ii = NULL;
     for (size_t k = 0; k < nidxs; k++) {
-        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(T_size, 1));
+        ii = ctx.builder.CreateSub(idxs[k], ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
         i = ctx.builder.CreateAdd(i, ctx.builder.CreateMul(ii, stride));
         if (k < nidxs - 1) {
             assert(nd >= 0);
             Value *d = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k + 1, nd);
 #if CHECK_BOUNDS==1
             if (bc) {
-                BasicBlock *okBB = BasicBlock::Create(jl_LLVMContext, "ib");
+                BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "ib");
                 // if !(i < d) goto error
                 ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(ii, d), okBB, failBB);
                 ctx.f->getBasicBlockList().push_back(okBB);
@@ -2211,7 +2760,7 @@ static Value *emit_array_nd_index(
             ctx.builder.CreateCondBr(ctx.builder.CreateICmpULT(last_index, last_dimension), endBB, failBB);
         } else {
             // There were fewer indices than dimensions; check the last remaining index
-            BasicBlock *checktrailingdimsBB = BasicBlock::Create(jl_LLVMContext, "dimsib");
+            BasicBlock *checktrailingdimsBB = BasicBlock::Create(ctx.builder.getContext(), "dimsib");
             assert(nd >= 0);
             Value *last_index = ii;
             Value *last_dimension = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nidxs, nd);
@@ -2221,25 +2770,25 @@ static Value *emit_array_nd_index(
             // And then also make sure that all dimensions that weren't explicitly
             // indexed into have size 1
             for (size_t k = nidxs+1; k < (size_t)nd; k++) {
-                BasicBlock *dimsokBB = BasicBlock::Create(jl_LLVMContext, "dimsok");
+                BasicBlock *dimsokBB = BasicBlock::Create(ctx.builder.getContext(), "dimsok");
                 Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, k, nd);
-                ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(T_size, 1)), dimsokBB, failBB);
+                ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1)), dimsokBB, failBB);
                 ctx.f->getBasicBlockList().push_back(dimsokBB);
                 ctx.builder.SetInsertPoint(dimsokBB);
             }
             Value *dim = emit_arraysize_for_unsafe_dim(ctx, ainfo, ex, nd, nd);
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(T_size, 1)), endBB, failBB);
+            ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(dim, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1)), endBB, failBB);
         }
 
         ctx.f->getBasicBlockList().push_back(failBB);
         ctx.builder.SetInsertPoint(failBB);
         // CreateAlloca is OK here since we are on an error branch
-        Value *tmp = ctx.builder.CreateAlloca(T_size, ConstantInt::get(T_size, nidxs));
+        Value *tmp = ctx.builder.CreateAlloca(getSizeTy(ctx.builder.getContext()), ConstantInt::get(getSizeTy(ctx.builder.getContext()), nidxs));
         for (size_t k = 0; k < nidxs; k++) {
-            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(T_size, tmp, ConstantInt::get(T_size, k)), Align(sizeof(size_t)));
+            ctx.builder.CreateAlignedStore(idxs[k], ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), tmp, ConstantInt::get(getSizeTy(ctx.builder.getContext()), k)), Align(sizeof(size_t)));
         }
         ctx.builder.CreateCall(prepare_call(jlboundserrorv_func),
-            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(T_size, nidxs) });
+            { mark_callee_rooted(ctx, a), tmp, ConstantInt::get(getSizeTy(ctx.builder.getContext()), nidxs) });
         ctx.builder.CreateUnreachable();
 
         ctx.f->getBasicBlockList().push_back(endBB);
@@ -2273,7 +2822,7 @@ static void init_bits_cgval(jl_codectx_t &ctx, Value *newv, const jl_cgval_t& v,
     }
 }
 
-static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt)
+static jl_value_t *static_constant_instance(const llvm::DataLayout &DL, Constant *constant, jl_value_t *jt)
 {
     assert(constant != NULL && jl_is_concrete_type(jt));
     jl_datatype_t *jst = (jl_datatype_t*)jt;
@@ -2302,7 +2851,7 @@ static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt)
     if (ConstantExpr *ce = dyn_cast<ConstantExpr>(constant)) {
         unsigned OpCode = ce->getOpcode();
         if (OpCode == Instruction::BitCast || OpCode == Instruction::PtrToInt || OpCode == Instruction::IntToPtr) {
-            return static_constant_instance(ce->getOperand(0), jt);
+            return static_constant_instance(DL, ce->getOperand(0), jt);
         }
         return NULL;
     }
@@ -2313,8 +2862,14 @@ static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt)
     size_t nargs;
     if (const auto *CC = dyn_cast<ConstantAggregate>(constant))
         nargs = CC->getNumOperands();
-    else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant))
+    else if (const auto *CAZ = dyn_cast<ConstantAggregateZero>(constant)) {
+#if JL_LLVM_VERSION >= 130000
+        // SVE: Elsewhere we use `getMinKownValue`
+        nargs = CAZ->getElementCount().getFixedValue();
+#else
         nargs = CAZ->getNumElements();
+#endif
+    }
     else if (const auto *CDS = dyn_cast<ConstantDataSequential>(constant))
         nargs = CDS->getNumElements();
     else
@@ -2333,9 +2888,9 @@ static jl_value_t *static_constant_instance(Constant *constant, jl_value_t *jt)
         }
         unsigned llvm_idx = i;
         if (i > 0 && isa<StructType>(constant->getType()))
-            llvm_idx = convert_struct_offset(constant->getType(), jl_field_offset(jst, i));
+            llvm_idx = convert_struct_offset(DL, constant->getType(), jl_field_offset(jst, i));
         Constant *fld = constant->getAggregateElement(llvm_idx);
-        flds[i] = static_constant_instance(fld, ft);
+        flds[i] = static_constant_instance(DL, fld, ft);
         if (flds[i] == NULL) {
             JL_GC_POP();
             return NULL; // must have been unreachable
@@ -2365,27 +2920,27 @@ static Value *as_value(jl_codectx_t &ctx, Type *to, const jl_cgval_t &v)
 static Value *load_i8box(jl_codectx_t &ctx, Value *v, jl_datatype_t *ty)
 {
     auto jvar = ty == jl_int8_type ? jlboxed_int8_cache : jlboxed_uint8_cache;
-    Constant *gv = prepare_global_in(jl_Module, jvar);
-    Value *idx[] = {ConstantInt::get(T_int32, 0), ctx.builder.CreateZExt(v, T_int32)};
-    auto slot = ctx.builder.CreateInBoundsGEP(gv, idx);
-    return tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(
-            ctx.builder.CreateAlignedLoad(T_pjlvalue, slot, Align(sizeof(void*))), false,
+    GlobalVariable *gv = prepare_global_in(jl_Module, jvar);
+    Value *idx[] = {ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0), ctx.builder.CreateZExt(v, getInt32Ty(ctx.builder.getContext()))};
+    auto slot = ctx.builder.CreateInBoundsGEP(gv->getValueType(), gv, idx);
+    return tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(
+            ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, slot, Align(sizeof(void*))), false,
             (jl_value_t*)ty));
 }
 
 // some types have special boxing functions with small-value caches
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t)
 {
     jl_value_t *jt = vinfo.typ;
     if (jt == (jl_value_t*)jl_bool_type)
-        return track_pjlvalue(ctx, julia_bool(ctx, ctx.builder.CreateTrunc(as_value(ctx, t, vinfo), T_int1)));
-    if (t == T_int1)
+        return track_pjlvalue(ctx, julia_bool(ctx, ctx.builder.CreateTrunc(as_value(ctx, t, vinfo), getInt1Ty(ctx.builder.getContext()))));
+    if (t == getInt1Ty(ctx.builder.getContext()))
         return track_pjlvalue(ctx, julia_bool(ctx, as_value(ctx, t, vinfo)));
 
     if (ctx.linfo && jl_is_method(ctx.linfo->def.method) && !vinfo.ispointer()) { // don't bother codegen pre-boxing for toplevel
         if (Constant *c = dyn_cast<Constant>(vinfo.V)) {
-            jl_value_t *s = static_constant_instance(c, jt);
+            jl_value_t *s = static_constant_instance(jl_Module->getDataLayout(), c, jt);
             if (s) {
                 jl_add_method_root(ctx, s);
                 return track_pjlvalue(ctx, literal_pointer_val(ctx, s));
@@ -2426,7 +2981,7 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
         v = ctx.builder.CreateExtractValue(v, makeArrayRef(&zero, 1));
         box = call_with_attrs(ctx, box_ssavalue_func, v);
     }
-    else if (!jb->abstract && jl_datatype_nbits(jb) == 0) {
+    else if (!jb->name->abstract && jl_datatype_nbits(jb) == 0) {
         // singleton
         assert(jb->instance != NULL);
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jb->instance));
@@ -2436,13 +2991,13 @@ static Value *_boxed_special(jl_codectx_t &ctx, const jl_cgval_t &vinfo, Type *t
 
 static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t *supertype, jl_value_t *ut)
 {
-    Value *tindex = ConstantInt::get(T_int8, 0);
+    Value *tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0);
     unsigned counter = 0;
     for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 if (jl_subtype((jl_value_t*)jt, supertype)) {
                     Value *cmp = ctx.builder.CreateICmpEQ(track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt)), datatype);
-                    tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(T_int8, idx), tindex);
+                    tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx), tindex);
                 }
             },
             ut,
@@ -2450,19 +3005,43 @@ static Value *compute_box_tindex(jl_codectx_t &ctx, Value *datatype, jl_value_t
     return tindex;
 }
 
+// Returns typeof(v), or null if v is a null pointer at run time.
+// This is used when the value might have come from an undefined variable,
+// yet we try to read its type to compute a union index when moving the value.
+static Value *emit_typeof_or_null(jl_codectx_t &ctx, Value *v)
+{
+    BasicBlock *nonnull = BasicBlock::Create(ctx.builder.getContext(), "nonnull", ctx.f);
+    BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "postnull", ctx.f);
+    Value *isnull = ctx.builder.CreateICmpEQ(v, Constant::getNullValue(v->getType()));
+    ctx.builder.CreateCondBr(isnull, postBB, nonnull);
+    BasicBlock *entry = ctx.builder.GetInsertBlock();
+    ctx.builder.SetInsertPoint(nonnull);
+    Value *typof = emit_typeof(ctx, v);
+    ctx.builder.CreateBr(postBB);
+    nonnull = ctx.builder.GetInsertBlock(); // could have changed
+    ctx.builder.SetInsertPoint(postBB);
+    PHINode *ti = ctx.builder.CreatePHI(typof->getType(), 2);
+    ti->addIncoming(Constant::getNullValue(typof->getType()), entry);
+    ti->addIncoming(typof, nonnull);
+    return ti;
+}
+
 // get the runtime tindex value, assuming val is already converted to type typ if it has a TIndex
 static Value *compute_tindex_unboxed(jl_codectx_t &ctx, const jl_cgval_t &val, jl_value_t *typ)
 {
     if (val.typ == jl_bottom_type)
-        return UndefValue::get(T_int8);
+        return UndefValue::get(getInt8Ty(ctx.builder.getContext()));
     if (val.constant)
-        return ConstantInt::get(T_int8, get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
+        return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), get_box_tindex((jl_datatype_t*)jl_typeof(val.constant), typ));
 
     if (val.TIndex)
-        return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(T_int8, 0x7f));
-    if (val.isboxed)
-        return compute_box_tindex(ctx, emit_typeof_boxed(ctx, val), val.typ, typ);
-    return compute_box_tindex(ctx, emit_typeof_boxed(ctx, val), val.typ, typ);
+        return ctx.builder.CreateAnd(val.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+    Value *typof;
+    if (val.isboxed && !jl_is_concrete_type(val.typ) && !jl_is_type_type(val.typ))
+        typof = emit_typeof_or_null(ctx, val.V);
+    else
+        typof = emit_typeof_boxed(ctx, val);
+    return compute_box_tindex(ctx, typof, val.typ, typ);
 }
 
 static void union_alloca_type(jl_uniontype_t *ut,
@@ -2497,7 +3076,7 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
     if (nbytes > 0) {
         // at least some of the values can live on the stack
         // try to pick an Integer type size such that SROA will emit reasonable code
-        Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * min_align), (nbytes + min_align - 1) / min_align);
+        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align);
         AllocaInst *lv = emit_static_alloca(ctx, AT);
         if (align > 1)
             lv->setAlignment(Align(align));
@@ -2508,12 +3087,12 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut,
 
 /*
  * Box unboxed values in a union. Optionally, skip certain unboxed values,
- * returning `V_null` in one of the skipped cases. If `skip` is not empty,
+ * returning `Constant::getNullValue(ctx.types().T_pjlvalue)` in one of the skipped cases. If `skip` is not empty,
  * skip[0] (corresponding to unknown boxed) must always be set. In that
  * case, the calling code must separately deal with the case where
  * `vinfo` is already an unknown boxed union (union tag 0x80).
  */
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallBitVector &skip)
 {
     // given vinfo::Union{T, S}, emit IR of the form:
@@ -2532,20 +3111,20 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
     //   box = phi [ box1, box_union_1 ], [ box2, box_union_2 ], [ vinfo, box_union_isboxed ]
     //   ...
     Value *tindex = vinfo.TIndex;
-    BasicBlock *defaultBB = BasicBlock::Create(jl_LLVMContext, "box_union_isboxed", ctx.f);
+    BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "box_union_isboxed", ctx.f);
     SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
-    BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_box_union", ctx.f);
+    BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_box_union", ctx.f);
     ctx.builder.SetInsertPoint(postBB);
-    PHINode *box_merge = ctx.builder.CreatePHI(T_prjlvalue, 2);
+    PHINode *box_merge = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
     unsigned counter = 0;
     for_each_uniontype_small(
             [&](unsigned idx, jl_datatype_t *jt) {
                 if (idx < skip.size() && skip[idx])
                     return;
                 Type *t = julia_type_to_llvm(ctx, (jl_value_t*)jt);
-                BasicBlock *tempBB = BasicBlock::Create(jl_LLVMContext, "box_union", ctx.f);
+                BasicBlock *tempBB = BasicBlock::Create(ctx.builder.getContext(), "box_union", ctx.f);
                 ctx.builder.SetInsertPoint(tempBB);
-                switchInst->addCase(ConstantInt::get(T_int8, idx), tempBB);
+                switchInst->addCase(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx), tempBB);
                 Value *box;
                 if (type_is_ghost(t)) {
                     box = track_pjlvalue(ctx, literal_pointer_val(ctx, jt->instance));
@@ -2555,7 +3134,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
                     box = _boxed_special(ctx, vinfo_r, t);
                     if (!box) {
                         box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
-                        init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut);
+                        init_bits_cgval(ctx, box, vinfo_r, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
                     }
                 }
                 tempBB = ctx.builder.GetInsertBlock(); // could have changed
@@ -2567,7 +3146,7 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
     ctx.builder.SetInsertPoint(defaultBB);
     if (skip.size() > 0) {
         assert(skip[0]);
-        box_merge->addIncoming(V_rnull, defaultBB);
+        box_merge->addIncoming(Constant::getNullValue(ctx.types().T_prjlvalue), defaultBB);
         ctx.builder.CreateBr(postBB);
     }
     else if (!vinfo.Vboxed) {
@@ -2588,21 +3167,21 @@ static Value *box_union(jl_codectx_t &ctx, const jl_cgval_t &vinfo, const SmallB
 // this is used to wrap values for generic contexts, where a
 // dynamically-typed value is required (e.g. argument to unknown function).
 // if it's already a pointer it's left alone.
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
 {
     jl_value_t *jt = vinfo.typ;
     if (jt == jl_bottom_type || jt == NULL)
         // We have an undef value on a (hopefully) dead branch
-        return UndefValue::get(T_prjlvalue);
+        return UndefValue::get(ctx.types().T_prjlvalue);
     if (vinfo.constant)
         return track_pjlvalue(ctx, literal_pointer_val(ctx, vinfo.constant));
     // This can happen in early bootstrap for `gc_preserve_begin` return value.
     if (jt == (jl_value_t*)jl_nothing_type)
         return track_pjlvalue(ctx, literal_pointer_val(ctx, jl_nothing));
     if (vinfo.isboxed) {
-        assert(vinfo.V == vinfo.Vboxed);
-        assert(vinfo.V->getType() == T_prjlvalue);
+        assert(vinfo.V == vinfo.Vboxed && vinfo.V != nullptr);
+        assert(vinfo.V->getType() == ctx.types().T_prjlvalue);
         return vinfo.V;
     }
 
@@ -2619,7 +3198,7 @@ static Value *boxed(jl_codectx_t &ctx, const jl_cgval_t &vinfo)
         box = _boxed_special(ctx, vinfo, t);
         if (!box) {
             box = emit_allocobj(ctx, jl_datatype_size(jt), literal_pointer_val(ctx, (jl_value_t*)jt));
-            init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut);
+            init_bits_cgval(ctx, box, vinfo, jl_is_mutable(jt) ? ctx.tbaa().tbaa_mutab : ctx.tbaa().tbaa_immut);
         }
     }
     return box;
@@ -2630,13 +3209,7 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
 {
     if (AllocaInst *ai = dyn_cast<AllocaInst>(dest))
         // TODO: make this a lifetime_end & dereferencable annotation?
-        ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai,
-#if JL_LLVM_VERSION >= 110000
-                ai->getAlign()
-#else
-                ai->getAlignment()
-#endif
-                );
+        ctx.builder.CreateAlignedStore(UndefValue::get(ai->getAllocatedType()), ai, ai->getAlign());
     if (jl_is_concrete_type(src.typ) || src.constant) {
         jl_value_t *typ = src.constant ? jl_typeof(src.constant) : src.typ;
         Type *store_ty = julia_type_to_llvm(ctx, typ);
@@ -2649,35 +3222,35 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
                 Value *src_ptr = data_pointer(ctx, src);
                 unsigned nb = jl_datatype_size(typ);
                 unsigned alignment = julia_alignment(typ);
-                Value *nbytes = ConstantInt::get(T_size, nb);
+                Value *nbytes = ConstantInt::get(getSizeTy(ctx.builder.getContext()), nb);
                 if (skip) {
                     // TODO: this Select is very bad for performance, but is necessary to work around LLVM bugs with the undef option that we want to use:
                     //   select copy dest -> dest to simulate an undef value / conditional copy
                     // src_ptr = ctx.builder.CreateSelect(skip, dest, src_ptr);
-                    nbytes = ctx.builder.CreateSelect(skip, V_size0, nbytes);
+                    nbytes = ctx.builder.CreateSelect(skip, Constant::getNullValue(getSizeTy(ctx.builder.getContext())), nbytes);
                 }
                 emit_memcpy(ctx, dest, tbaa_dst, src_ptr, src.tbaa, nbytes, alignment, isVolatile);
             }
         }
     }
     else if (src.TIndex) {
-        Value *tindex = ctx.builder.CreateAnd(src.TIndex, ConstantInt::get(T_int8, 0x7f));
+        Value *tindex = ctx.builder.CreateAnd(src.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         if (skip)
-            tindex = ctx.builder.CreateSelect(skip, ConstantInt::get(T_int8, 0), tindex);
+            tindex = ctx.builder.CreateSelect(skip, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), tindex);
         Value *src_ptr = data_pointer(ctx, src);
-        src_ptr = src_ptr ? maybe_bitcast(ctx, src_ptr, T_pint8) : src_ptr;
-        dest = maybe_bitcast(ctx, dest, T_pint8);
-        BasicBlock *defaultBB = BasicBlock::Create(jl_LLVMContext, "union_move_skip", ctx.f);
+        src_ptr = src_ptr ? maybe_bitcast(ctx, src_ptr, getInt8PtrTy(ctx.builder.getContext())) : src_ptr;
+        dest = maybe_bitcast(ctx, dest, getInt8PtrTy(ctx.builder.getContext()));
+        BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "union_move_skip", ctx.f);
         SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
-        BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_union_move", ctx.f);
+        BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_move", ctx.f);
         unsigned counter = 0;
         bool allunboxed = for_each_uniontype_small(
                 [&](unsigned idx, jl_datatype_t *jt) {
                     unsigned nb = jl_datatype_size(jt);
                     unsigned alignment = julia_alignment((jl_value_t*)jt);
-                    BasicBlock *tempBB = BasicBlock::Create(jl_LLVMContext, "union_move", ctx.f);
+                    BasicBlock *tempBB = BasicBlock::Create(ctx.builder.getContext(), "union_move", ctx.f);
                     ctx.builder.SetInsertPoint(tempBB);
-                    switchInst->addCase(ConstantInt::get(T_int8, idx), tempBB);
+                    switchInst->addCase(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx), tempBB);
                     if (nb > 0) {
                         if (!src_ptr) {
                             Function *trap_func =
@@ -2721,14 +3294,15 @@ static void emit_unionmove(jl_codectx_t &ctx, Value *dest, MDNode *tbaa_dst, con
 
 static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std::string &msg)
 {
+    ++EmittedCPointerChecks;
     Value *t = emit_typeof_boxed(ctx, x);
     emit_typecheck(ctx, mark_julia_type(ctx, t, true, jl_any_type), (jl_value_t*)jl_datatype_type, msg);
 
     Value *istype =
         ctx.builder.CreateICmpEQ(mark_callee_rooted(ctx, emit_datatype_name(ctx, t)),
                                  mark_callee_rooted(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_pointer_typename)));
-    BasicBlock *failBB = BasicBlock::Create(jl_LLVMContext,"fail",ctx.f);
-    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext,"pass");
+    BasicBlock *failBB = BasicBlock::Create(ctx.builder.getContext(),"fail",ctx.f);
+    BasicBlock *passBB = BasicBlock::Create(ctx.builder.getContext(),"pass");
     ctx.builder.CreateCondBr(istype, passBB, failBB);
     ctx.builder.SetInsertPoint(failBB);
 
@@ -2740,11 +3314,13 @@ static void emit_cpointercheck(jl_codectx_t &ctx, const jl_cgval_t &x, const std
 }
 
 // allocation for known size object
+// returns a prjlvalue
 static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
 {
-    Value *ptls_ptr = emit_bitcast(ctx, ctx.ptlsStates, T_pint8);
+    ++EmittedAllocObjs;
+    Value *current_task = get_current_task(ctx);
     Function *F = prepare_call(jl_alloc_obj_func);
-    auto call = ctx.builder.CreateCall(F, {ptls_ptr, ConstantInt::get(T_size, static_size), maybe_decay_untracked(ctx, jt)});
+    auto call = ctx.builder.CreateCall(F, {current_task, ConstantInt::get(getSizeTy(ctx.builder.getContext()), static_size), maybe_decay_untracked(ctx, jt)});
     call->setAttributes(F->getAttributes());
     return call;
 }
@@ -2752,7 +3328,7 @@ static Value *emit_allocobj(jl_codectx_t &ctx, size_t static_size, Value *jt)
 // allocation for unknown object from an untracked pointer
 static Value *emit_new_bits(jl_codectx_t &ctx, Value *jt, Value *pval)
 {
-    pval = ctx.builder.CreateBitCast(pval, T_pint8);
+    pval = ctx.builder.CreateBitCast(pval, getInt8PtrTy(ctx.builder.getContext()));
     Function *F = prepare_call(jl_newbits_func);
     auto call = ctx.builder.CreateCall(F, { jt, pval });
     call->setAttributes(F->getAttributes());
@@ -2767,22 +3343,35 @@ static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, Value *ptr)
 
 static void emit_write_barrier(jl_codectx_t &ctx, Value *parent, ArrayRef<Value*> ptrs)
 {
+    ++EmittedWriteBarriers;
+    // if there are no child objects we can skip emission
+    if (ptrs.empty())
+        return;
     SmallVector<Value*, 8> decay_ptrs;
-    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, T_prjlvalue)));
+    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
     for (auto ptr : ptrs) {
-        decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, T_prjlvalue)));
+        decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
     }
     ctx.builder.CreateCall(prepare_call(jl_write_barrier_func), decay_ptrs);
 }
 
+static void emit_write_barrier_binding(jl_codectx_t &ctx, Value *parent, Value *ptr)
+{
+    SmallVector<Value*, 8> decay_ptrs;
+    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, parent, ctx.types().T_prjlvalue)));
+    decay_ptrs.push_back(maybe_decay_untracked(ctx, emit_bitcast(ctx, ptr, ctx.types().T_prjlvalue)));
+    ctx.builder.CreateCall(prepare_call(jl_write_barrier_binding_func), decay_ptrs);
+}
+
 static void find_perm_offsets(jl_datatype_t *typ, SmallVector<unsigned,4> &res, unsigned offset)
 {
     // This is a inlined field at `offset`.
     if (!typ->layout || typ->layout->npointers == 0)
         return;
-    size_t nf = jl_svec_len(typ->types);
+    jl_svec_t *types = jl_get_fieldtypes(typ);
+    size_t nf = jl_svec_len(types);
     for (size_t i = 0; i < nf; i++) {
-        jl_value_t *_fld = jl_svecref(typ->types, i);
+        jl_value_t *_fld = jl_svecref(types, i);
         if (!jl_is_datatype(_fld))
             continue;
         jl_datatype_t *fld = (jl_datatype_t*)_fld;
@@ -2807,68 +3396,123 @@ static void emit_write_multibarrier(jl_codectx_t &ctx, Value *parent, Value *agg
     emit_write_barrier(ctx, parent, ptrs);
 }
 
-
-static void emit_setfield(jl_codectx_t &ctx,
+static jl_cgval_t emit_setfield(jl_codectx_t &ctx,
         jl_datatype_t *sty, const jl_cgval_t &strct, size_t idx0,
-        const jl_cgval_t &rhs, bool checked, bool wb)
-{
-    if (sty->mutabl || !checked) {
-        assert(strct.ispointer());
-        size_t byte_offset = jl_field_offset(sty, idx0);
-        Value *addr = data_pointer(ctx, strct);
-        if (byte_offset > 0) {
-            addr = ctx.builder.CreateInBoundsGEP(
-                    T_int8,
-                    emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8),
-                    ConstantInt::get(T_size, byte_offset)); // TODO: use emit_struct_gep
-        }
-        jl_value_t *jfty = jl_svecref(sty->types, idx0);
-        if (jl_field_isptr(sty, idx0)) {
-            Value *r = boxed(ctx, rhs); // don't need a temporary gcroot since it'll be rooted by strct
-            cast<StoreInst>(tbaa_decorate(strct.tbaa, ctx.builder.CreateAlignedStore(r,
-                        emit_bitcast(ctx, addr, T_pprjlvalue),
-                        Align(sizeof(jl_value_t*)))))
-                    ->setOrdering(AtomicOrdering::Unordered);
-            if (wb && strct.isboxed && !type_is_permalloc(rhs.typ))
-                emit_write_barrier(ctx, boxed(ctx, strct), r);
-        }
-        else if (jl_is_uniontype(jfty)) {
-            int fsz = jl_field_size(sty, idx0) - 1;
-            // compute tindex from rhs
-            jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
-            if (rhs_union.typ == jl_bottom_type)
-                return;
-            Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
-            tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
-            Value *ptindex = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), T_pint8), ConstantInt::get(T_size, fsz));
-            tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
-            // copy data
-            if (!rhs.isghost) {
-                emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
+        jl_cgval_t rhs, jl_cgval_t cmp,
+        bool wb, AtomicOrdering Order, AtomicOrdering FailOrder,
+        bool needlock, bool issetfield, bool isreplacefield, bool isswapfield, bool ismodifyfield,
+        const jl_cgval_t *modifyop, const std::string &fname)
+{
+    ++EmittedSetfield;
+    assert(strct.ispointer());
+    size_t byte_offset = jl_field_offset(sty, idx0);
+    Value *addr = data_pointer(ctx, strct);
+    if (byte_offset > 0) {
+        addr = ctx.builder.CreateInBoundsGEP(
+                getInt8Ty(ctx.builder.getContext()),
+                emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), getInt8PtrTy(ctx.builder.getContext())),
+                ConstantInt::get(getSizeTy(ctx.builder.getContext()), byte_offset)); // TODO: use emit_struct_gep
+    }
+    jl_value_t *jfty = jl_field_type(sty, idx0);
+    if (!jl_field_isptr(sty, idx0) && jl_is_uniontype(jfty)) {
+        size_t fsz = 0, al = 0;
+        int union_max = jl_islayout_inline(jfty, &fsz, &al);
+        bool isptr = (union_max == 0);
+        assert(!isptr && fsz == jl_field_size(sty, idx0) - 1); (void)isptr;
+        // compute tindex from rhs
+        jl_cgval_t rhs_union = convert_julia_type(ctx, rhs, jfty);
+        if (rhs_union.typ == jl_bottom_type)
+            return jl_cgval_t(ctx.builder.getContext());
+        Value *ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, maybe_decay_tracked(ctx, addr), getInt8PtrTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), fsz));
+        if (needlock)
+            emit_lockstate_value(ctx, strct, true);
+        BasicBlock *ModifyBB = NULL;
+        if (ismodifyfield) {
+            ModifyBB = BasicBlock::Create(ctx.builder.getContext(), "modify_xchg", ctx.f);
+            ctx.builder.CreateBr(ModifyBB);
+            ctx.builder.SetInsertPoint(ModifyBB);
+        }
+        jl_cgval_t oldval = rhs;
+        if (!issetfield)
+            oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
+        Value *Success = NULL;
+        BasicBlock *DoneBB = NULL;
+        if (isreplacefield || ismodifyfield) {
+            if (ismodifyfield) {
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, false);
+                const jl_cgval_t argv[3] = { cmp, oldval, rhs };
+                if (modifyop) {
+                    rhs = emit_invoke(ctx, *modifyop, argv, 3, (jl_value_t*)jl_any_type);
+                }
+                else {
+                    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, 3, JLCALL_F_CC);
+                    rhs = mark_julia_type(ctx, callval, true, jl_any_type);
+                }
+                if (!jl_subtype(rhs.typ, jfty)) {
+                    emit_typecheck(ctx, rhs, jfty, fname);
+                    rhs = update_julia_type(ctx, rhs, jfty);
+                }
+                rhs_union = convert_julia_type(ctx, rhs, jfty);
+                if (rhs_union.typ == jl_bottom_type)
+                    return jl_cgval_t(ctx.builder.getContext());
+                if (needlock)
+                    emit_lockstate_value(ctx, strct, true);
+                cmp = oldval;
+                oldval = emit_unionload(ctx, addr, ptindex, jfty, fsz, al, strct.tbaa, true, union_max, ctx.tbaa().tbaa_unionselbyte);
             }
-        }
-        else {
-            unsigned align = jl_field_align(sty, idx0);
-            typed_store(ctx, addr, NULL, rhs, jfty,
-                strct.tbaa, nullptr, maybe_bitcast(ctx,
-                data_pointer(ctx, strct), T_pjlvalue), align);
-        }
+            BasicBlock *XchgBB = BasicBlock::Create(ctx.builder.getContext(), "xchg", ctx.f);
+            DoneBB = BasicBlock::Create(ctx.builder.getContext(), "done_xchg", ctx.f);
+            Success = emit_f_is(ctx, oldval, cmp);
+            ctx.builder.CreateCondBr(Success, XchgBB, ismodifyfield ? ModifyBB : DoneBB);
+            ctx.builder.SetInsertPoint(XchgBB);
+        }
+        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jfty);
+        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
+        tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+        // copy data
+        if (!rhs.isghost) {
+            emit_unionmove(ctx, addr, strct.tbaa, rhs, nullptr);
+        }
+        if (isreplacefield || ismodifyfield) {
+            ctx.builder.CreateBr(DoneBB);
+            ctx.builder.SetInsertPoint(DoneBB);
+        }
+        if (needlock)
+            emit_lockstate_value(ctx, strct, false);
+        if (isreplacefield) {
+            Success = ctx.builder.CreateZExt(Success, getInt8Ty(ctx.builder.getContext()));
+            jl_cgval_t argv[2] = {oldval, mark_julia_type(ctx, Success, false, jl_bool_type)};
+            jl_datatype_t *rettyp = jl_apply_cmpswap_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        else if (ismodifyfield) {
+            jl_cgval_t argv[2] = {oldval, rhs};
+            jl_datatype_t *rettyp = jl_apply_modify_type(jfty);
+            oldval = emit_new_struct(ctx, (jl_value_t*)rettyp, 2, argv);
+        }
+        return oldval;
     }
     else {
-        std::string msg = "setfield! immutable struct of type "
-            + std::string(jl_symbol_name(sty->name->name))
-            + " cannot be changed";
-        emit_error(ctx, msg);
+        unsigned align = jl_field_align(sty, idx0);
+        bool isboxed = jl_field_isptr(sty, idx0);
+        size_t nfields = jl_datatype_nfields(sty);
+        bool maybe_null = idx0 >= nfields - (unsigned)sty->name->n_uninitialized;
+        return typed_store(ctx, addr, NULL, rhs, cmp, jfty, strct.tbaa, nullptr,
+            wb ? maybe_bitcast(ctx, data_pointer(ctx, strct), ctx.types().T_pjlvalue) : nullptr,
+            isboxed, Order, FailOrder, align,
+            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield, maybe_null, modifyop, fname);
     }
 }
 
 static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv)
 {
+    ++EmittedNewStructs;
     assert(jl_is_datatype(ty));
     assert(jl_is_concrete_type(ty));
     jl_datatype_t *sty = (jl_datatype_t*)ty;
     size_t nf = jl_datatype_nfields(sty);
-    if (nf > 0 || sty->mutabl) {
+    if (nf > 0 || sty->name->mutabl) {
         if (deserves_stack(ty)) {
             Type *lt = julia_type_to_llvm(ctx, ty);
             unsigned na = nargs < nf ? nargs : nf;
@@ -2897,11 +3541,11 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
             else {
                 strct = emit_static_alloca(ctx, lt);
                 if (tracked.count)
-                    undef_derived_strct(ctx.builder, strct, sty, tbaa_stack);
+                    undef_derived_strct(ctx.builder, strct, sty, ctx.tbaa().tbaa_stack);
             }
 
             for (unsigned i = 0; i < na; i++) {
-                jl_value_t *jtype = jl_svecref(sty->types, i);
+                jl_value_t *jtype = jl_svecref(sty->types, i); // n.b. ty argument must be concrete
                 jl_cgval_t fval_info = argv[i];
                 emit_typecheck(ctx, fval_info, jtype, "new");
                 fval_info = update_julia_type(ctx, fval_info, jtype);
@@ -2922,7 +3566,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 if (jl_field_isptr(sty, i)) {
                     fval = boxed(ctx, fval_info);
                     if (!init_as_value)
-                        cast<StoreInst>(tbaa_decorate(tbaa_stack,
+                        cast<StoreInst>(tbaa_decorate(ctx.tbaa().tbaa_stack,
                                     ctx.builder.CreateAlignedStore(fval, dest, Align(jl_field_align(sty, i)))))
                                 ->setOrdering(AtomicOrdering::Unordered);
                 }
@@ -2930,9 +3574,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     // compute tindex from rhs
                     jl_cgval_t rhs_union = convert_julia_type(ctx, fval_info, jtype);
                     if (rhs_union.typ == jl_bottom_type)
-                        return jl_cgval_t();
+                        return jl_cgval_t(ctx.builder.getContext());
                     Value *tindex = compute_tindex_unboxed(ctx, rhs_union, jtype);
-                    tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
+                    tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
                     size_t fsz = 0, al = 0;
                     bool isptr = !jl_islayout_inline(jtype, &fsz, &al);
                     assert(!isptr && fsz == jl_field_size(sty, i) - 1); (void)isptr;
@@ -2943,25 +3587,25 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                         // But more efficient to just store it directly.
                         unsigned ptindex = convert_struct_offset(ctx, lt, offs + fsz);
                         if (fsz > 0 && !fval_info.isghost) {
-                            Type *ET = IntegerType::get(jl_LLVMContext, 8 * al);
+                            Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al);
                             assert(lt->getStructElementType(llvm_idx) == ET);
                             AllocaInst *lv = emit_static_alloca(ctx, ET);
-                            lv->setOperand(0, ConstantInt::get(T_int32, (fsz + al - 1) / al));
-                            emit_unionmove(ctx, lv, tbaa_stack, fval_info, nullptr);
+                            lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + al - 1) / al));
+                            emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                             // emit all of the align-sized words
                             unsigned i = 0;
                             for (; i < fsz / al; i++) {
                                 Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                                Value *fldv = tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
+                                Value *fldv = tbaa_decorate(ctx.tbaa().tbaa_stack, ctx.builder.CreateAlignedLoad(ET, fldp, Align(al)));
                                 strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
                             }
                             // emit remaining bytes up to tindex
                             if (i < ptindex - llvm_idx) {
                                 Value *staddr = ctx.builder.CreateConstInBoundsGEP1_32(ET, lv, i);
-                                staddr = ctx.builder.CreateBitCast(staddr, T_pint8);
+                                staddr = ctx.builder.CreateBitCast(staddr, getInt8PtrTy(ctx.builder.getContext()));
                                 for (; i < ptindex - llvm_idx; i++) {
-                                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(T_int8, staddr, i);
-                                    Value *fldv = tbaa_decorate(tbaa_stack, ctx.builder.CreateAlignedLoad(T_int8, fldp, Align(1)));
+                                    Value *fldp = ctx.builder.CreateConstInBoundsGEP1_32(getInt8Ty(ctx.builder.getContext()), staddr, i);
+                                    Value *fldv = tbaa_decorate(ctx.tbaa().tbaa_stack, ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), fldp, Align(1)));
                                     strct = ctx.builder.CreateInsertValue(strct, fldv, makeArrayRef(llvm_idx + i));
                                 }
                             }
@@ -2973,20 +3617,20 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     }
                     else {
                         Value *ptindex = emit_struct_gep(ctx, lt, strct, offs + fsz);
-                        tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
+                        tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(tindex, ptindex, Align(1)));
                         if (!rhs_union.isghost)
-                            emit_unionmove(ctx, dest, tbaa_stack, fval_info, nullptr);
+                            emit_unionmove(ctx, dest, ctx.tbaa().tbaa_stack, fval_info, nullptr);
                     }
                 }
                 else {
-                    fval = emit_unbox(ctx, fty, fval_info, jtype, dest, tbaa_stack);
+                    fval = emit_unbox(ctx, fty, fval_info, jtype, dest, ctx.tbaa().tbaa_stack);
                 }
                 if (init_as_value) {
                     assert(fval);
                     if (jl_is_vecelement_type(ty))
                         strct = fval;  // VecElement type comes unwrapped in LLVM.
                     else if (lt->isVectorTy())
-                        strct = ctx.builder.CreateInsertElement(strct, fval, ConstantInt::get(T_int32, llvm_idx));
+                        strct = ctx.builder.CreateInsertElement(strct, fval, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), llvm_idx));
                     else if (lt->isAggregateType())
                         strct = ctx.builder.CreateInsertValue(strct, fval, makeArrayRef(llvm_idx));
                     else
@@ -2999,20 +3643,20 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                     int fsz = jl_field_size(sty, i) - 1;
                     unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
                     if (init_as_value)
-                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(T_int8, 0), makeArrayRef(llvm_idx));
+                        strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), makeArrayRef(llvm_idx));
                     else
-                        tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(
-                                ConstantInt::get(T_int8, 0),
+                        tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(
+                                ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
                                 ctx.builder.CreateConstInBoundsGEP2_32(lt, strct, 0, llvm_idx),
                                 Align(1)));
                 }
             }
             if (type_is_ghost(lt))
-                return mark_julia_const(sty->instance);
+                return mark_julia_const(ctx, sty->instance);
             else if (init_as_value)
                 return mark_julia_type(ctx, strct, false, ty);
             else
-                return mark_julia_slot(strct, ty, NULL, tbaa_stack);
+                return mark_julia_slot(strct, ty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
         }
         Value *strct = emit_allocobj(ctx, jl_datatype_size(sty),
                                      literal_pointer_val(ctx, (jl_value_t*)ty));
@@ -3021,10 +3665,10 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
         undef_derived_strct(ctx.builder, strct, sty, strctinfo.tbaa);
         for (size_t i = nargs; i < nf; i++) {
             if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
-                tbaa_decorate(tbaa_unionselbyte, ctx.builder.CreateAlignedStore(
-                        ConstantInt::get(T_int8, 0),
-                        ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, strct, T_pint8),
-                                ConstantInt::get(T_size, jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
+                tbaa_decorate(ctx.tbaa().tbaa_unionselbyte, ctx.builder.CreateAlignedStore(
+                        ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
+                        ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, strct, getInt8PtrTy(ctx.builder.getContext())),
+                                ConstantInt::get(getSizeTy(ctx.builder.getContext()), jl_field_offset(sty, i) + jl_field_size(sty, i) - 1)),
                         Align(1)));
             }
         }
@@ -3036,15 +3680,15 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
                 need_wb = !rhs.isboxed;
             else
                 need_wb = false;
-            emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new");
-            emit_setfield(ctx, sty, strctinfo, i, rhs, false, need_wb);
+            emit_typecheck(ctx, rhs, jl_svecref(sty->types, i), "new"); // n.b. ty argument must be concrete
+            emit_setfield(ctx, sty, strctinfo, i, rhs, jl_cgval_t(ctx.builder.getContext()), need_wb, AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, false, true, false, false, false, nullptr, "");
         }
         return strctinfo;
     }
     else {
         // 0 fields, ghost or bitstype
         if (jl_datatype_nbits(sty) == 0)
-            return ghostValue(sty);
+            return ghostValue(ctx, sty);
         bool isboxed;
         Type *lt = julia_type_to_llvm(ctx, ty, &isboxed);
         assert(!isboxed);
@@ -3054,28 +3698,21 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
 
 static void emit_signal_fence(jl_codectx_t &ctx)
 {
-#if defined(_CPU_ARM_) || defined(_CPU_AARCH64_)
-    // LLVM generates very inefficient code (and might include function call)
-    // for signal fence. Fallback to the poor man signal fence with
-    // inline asm instead.
-    // https://llvm.org/bugs/show_bug.cgi?id=27545
-    ctx.builder.CreateCall(InlineAsm::get(FunctionType::get(T_void, false), "",
-                                      "~{memory}", true));
-#else
+    ++EmittedSignalFences;
     ctx.builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread);
-#endif
 }
 
 static Value *emit_defer_signal(jl_codectx_t &ctx)
 {
-    Value *ptls = emit_bitcast(ctx, ctx.ptlsStates,
-                                        PointerType::get(T_sigatomic, 0));
-    Constant *offset = ConstantInt::getSigned(T_int32,
+    ++EmittedDeferSignal;
+    Value *ptls = emit_bitcast(ctx, get_current_ptls(ctx),
+                                        PointerType::get(ctx.types().T_sigatomic, 0));
+    Constant *offset = ConstantInt::getSigned(getInt32Ty(ctx.builder.getContext()),
         offsetof(jl_tls_states_t, defer_signal) / sizeof(sig_atomic_t));
-    return ctx.builder.CreateInBoundsGEP(T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
+    return ctx.builder.CreateInBoundsGEP(ctx.types().T_sigatomic, ptls, ArrayRef<Value*>(offset), "jl_defer_signal");
 }
 
-#ifndef NDEBUG
+#ifndef JL_NDEBUG
 static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
 {
     return
@@ -3088,3 +3725,7 @@ static int compare_cgparams(const jl_cgparams_t *a, const jl_cgparams_t *b)
            (a->generic_context == b->generic_context);
 }
 #endif
+
+// Reset us back to codegen debug type
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "julia_irgen_codegen"
diff --git a/src/clangsa/GCChecker.cpp b/src/clangsa/GCChecker.cpp
index 05974a0358e280..38bd012ff46fc0 100644
--- a/src/clangsa/GCChecker.cpp
+++ b/src/clangsa/GCChecker.cpp
@@ -19,19 +19,11 @@
 
 #if defined(__GNUC__)
 #define USED_FUNC __attribute__((used))
-#elif defined(_COMPILER_MICROSOFT_)
-// Does MSVC have this?
-#define USED_FUNC
 #else
 #define USED_FUNC
 #endif
 
-#if LLVM_VERSION_MAJOR >= 10
 using std::make_unique;
-#else
-using llvm::make_unique;
-#define PathSensitiveBugReport BugReport
-#endif
 
 namespace {
 using namespace clang;
@@ -42,11 +34,7 @@ using namespace ento;
 
 static const Stmt *getStmtForDiagnostics(const ExplodedNode *N)
 {
-#if LLVM_VERSION_MAJOR >= 10
     return N->getStmtForDiagnostics();
-#else
-    return PathDiagnosticLocation::getStmt(N);
-#endif
 }
 
 
@@ -235,11 +223,7 @@ class GCChecker
 public:
   void checkBeginFunction(CheckerContext &Ctx) const;
   void checkEndFunction(const clang::ReturnStmt *RS, CheckerContext &Ctx) const;
-#if LLVM_VERSION_MAJOR >= 9
   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
-#else
-  bool evalCall(const CallExpr *CE, CheckerContext &C) const;
-#endif
   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
   void checkPostStmt(const CStyleCastExpr *CE, CheckerContext &C) const;
@@ -721,6 +705,10 @@ bool GCChecker::isFDAnnotatedNotSafepoint(const clang::FunctionDecl *FD) {
   return declHasAnnotation(FD, "julia_not_safepoint");
 }
 
+#if LLVM_VERSION_MAJOR >= 13
+#define endswith_lower endswith_insensitive
+#endif
+
 bool GCChecker::isGCTrackedType(QualType QT) {
   return isValueCollection(QT) ||
          isJuliaType(
@@ -741,7 +729,6 @@ bool GCChecker::isGCTrackedType(QualType QT) {
                    Name.endswith_lower("jl_module_t") ||
                    Name.endswith_lower("jl_tupletype_t") ||
                    Name.endswith_lower("jl_gc_tracked_buffer_t") ||
-                   Name.endswith_lower("jl_tls_states_t") ||
                    Name.endswith_lower("jl_binding_t") ||
                    Name.endswith_lower("jl_ordereddict_t") ||
                    Name.endswith_lower("jl_tvar_t") ||
@@ -756,8 +743,9 @@ bool GCChecker::isGCTrackedType(QualType QT) {
                    Name.endswith_lower("jl_task_t") ||
                    Name.endswith_lower("jl_uniontype_t") ||
                    Name.endswith_lower("jl_method_match_t") ||
-                   // Probably not technically true for these, but let's allow
-                   // it
+                   Name.endswith_lower("jl_vararg_t") ||
+                   Name.endswith_lower("jl_opaque_closure_t") ||
+                   // Probably not technically true for these, but let's allow it
                    Name.endswith_lower("typemap_intersection_env") ||
                    Name.endswith_lower("interpreter_state") ||
                    Name.endswith_lower("jl_typeenv_t") ||
@@ -787,7 +775,7 @@ bool GCChecker::isGCTracked(const Expr *E) {
 
 bool GCChecker::isGloballyRootedType(QualType QT) const {
   return isJuliaType(
-      [](StringRef Name) { return Name.endswith_lower("jl_sym_t"); }, QT);
+      [](StringRef Name) { return Name.endswith("jl_sym_t"); }, QT);
 }
 
 bool GCChecker::isSafepoint(const CallEvent &Call) const {
@@ -825,8 +813,8 @@ bool GCChecker::isSafepoint(const CallEvent &Call) const {
       if (FD->getBuiltinID() != 0 || FD->isTrivial())
         isCalleeSafepoint = false;
       else if (FD->getDeclName().isIdentifier() &&
-               (FD->getName().startswith_lower("uv_") ||
-                FD->getName().startswith_lower("unw_") ||
+               (FD->getName().startswith("uv_") ||
+                FD->getName().startswith("unw_") ||
                 FD->getName().startswith("_U")) &&
                FD->getName() != "uv_run")
         isCalleeSafepoint = false;
@@ -964,13 +952,13 @@ bool GCChecker::processAllocationOfResult(const CallEvent &Call,
         // global roots.
         StringRef FDName =
             FD->getDeclName().isIdentifier() ? FD->getName() : "";
-        if (FDName.startswith_lower("jl_box_")) {
+        if (FDName.startswith("jl_box_") || FDName.startswith("ijl_box_")) {
           SVal Arg = Call.getArgSVal(0);
           if (auto CI = Arg.getAs<nonloc::ConcreteInt>()) {
             const llvm::APSInt &Value = CI->getValue();
             bool GloballyRooted = false;
             const int64_t NBOX_C = 1024;
-            if (FDName.startswith_lower("jl_box_u")) {
+            if (FDName.startswith("jl_box_u") || FDName.startswith("ijl_box_u")) {
               if (Value < NBOX_C) {
                 GloballyRooted = true;
               }
@@ -1030,15 +1018,15 @@ SymbolRef GCChecker::getSymbolForResult(const Expr *Result,
                                         const ValueState *OldValS,
                                         ProgramStateRef &State,
                                         CheckerContext &C) const {
+  QualType QT = Result->getType();
+  if (!QT->isPointerType() || QT->getPointeeType()->isVoidType())
+    return nullptr;
   auto ValLoc = State->getSVal(Result, C.getLocationContext()).getAs<Loc>();
   if (!ValLoc) {
     return nullptr;
   }
   SVal Loaded = State->getSVal(*ValLoc);
   if (Loaded.isUnknown() || !Loaded.getAsSymbol()) {
-    QualType QT = Result->getType();
-    if (!QT->isPointerType())
-      return nullptr;
     if (OldValS || GCChecker::isGCTracked(Result)) {
       Loaded = C.getSValBuilder().conjureSymbolVal(
           nullptr, Result, C.getLocationContext(), Result->getType(),
@@ -1080,10 +1068,10 @@ void GCChecker::checkDerivingExpr(const Expr *Result, const Expr *Parent,
     // TODO: We may want to refine this. This is to track pointers through the
     // array list in jl_module_t.
     bool ParentIsModule = isJuliaType(
-        [](StringRef Name) { return Name.endswith_lower("jl_module_t"); },
+        [](StringRef Name) { return Name.endswith("jl_module_t"); },
         Parent->getType());
     bool ResultIsArrayList = isJuliaType(
-        [](StringRef Name) { return Name.endswith_lower("arraylist_t"); },
+        [](StringRef Name) { return Name.endswith("arraylist_t"); },
         Result->getType());
     if (!(ParentIsModule && ResultIsArrayList) && isGCTracked(Parent)) {
       ResultTracked = false;
@@ -1296,17 +1284,12 @@ void GCChecker::checkPreCall(const CallEvent &Call, CheckerContext &C) const {
   }
 }
 
-#if LLVM_VERSION_MAJOR >= 9
-bool GCChecker::evalCall(const CallEvent &Call,
-#else
-bool GCChecker::evalCall(const CallExpr *CE,
-#endif
-                         CheckerContext &C) const {
+bool GCChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
   // These checks should have no effect on the surrounding environment
   // (globals should not be invalidated, etc), hence the use of evalCall.
-#if LLVM_VERSION_MAJOR >= 9
   const CallExpr *CE = dyn_cast<CallExpr>(Call.getOriginExpr());
-#endif
+  if (!CE)
+    return false;
   unsigned CurrentDepth = C.getState()->get<GCDepth>();
   auto name = C.getCalleeName(CE);
   if (name == "JL_GC_POP") {
@@ -1340,7 +1323,8 @@ bool GCChecker::evalCall(const CallExpr *CE,
     return true;
   } else if (name == "JL_GC_PUSH1" || name == "JL_GC_PUSH2" ||
              name == "JL_GC_PUSH3" || name == "JL_GC_PUSH4" ||
-             name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6") {
+             name == "JL_GC_PUSH5" || name == "JL_GC_PUSH6" ||
+             name == "JL_GC_PUSH7") {
     ProgramStateRef State = C.getState();
     // Transform slots to roots, transform values to rooted
     unsigned NumArgs = CE->getNumArgs();
@@ -1348,8 +1332,7 @@ bool GCChecker::evalCall(const CallExpr *CE,
       SVal V = C.getSVal(CE->getArg(i));
       auto MRV = V.getAs<loc::MemRegionVal>();
       if (!MRV) {
-        report_error(C,
-                     "JL_GC_PUSH with something other than a local variable");
+        report_error(C, "JL_GC_PUSH with something other than a local variable");
         return true;
       }
       const MemRegion *Region = MRV->getRegion();
@@ -1445,7 +1428,7 @@ bool GCChecker::evalCall(const CallExpr *CE,
     C.addTransition(
         State->set<GCValueMap>(Sym, ValueState::getRooted(nullptr, -1)));
     return true;
-  } else if (name == "jl_gc_enable") {
+  } else if (name == "jl_gc_enable" || name == "ijl_gc_enable") {
     ProgramStateRef State = C.getState();
     // Check for a literal argument
     SVal Arg = C.getSVal(CE->getArg(0));
@@ -1469,6 +1452,23 @@ bool GCChecker::evalCall(const CallExpr *CE,
     C.addTransition(State->BindExpr(CE, C.getLocationContext(), Result));
     return true;
   }
+  else if (name == "uv_mutex_lock") {
+    ProgramStateRef State = C.getState();
+    if (State->get<SafepointDisabledAt>() == (unsigned)-1) {
+      C.addTransition(State->set<SafepointDisabledAt>(C.getStackFrame()->getIndex()));
+      return true;
+    }
+  }
+  else if (name == "uv_mutex_unlock") {
+    ProgramStateRef State = C.getState();
+    const auto *LCtx = C.getLocationContext();
+    const auto *FD = dyn_cast<FunctionDecl>(LCtx->getDecl());
+    if (State->get<SafepointDisabledAt>() == (unsigned)C.getStackFrame()->getIndex() &&
+        !isFDAnnotatedNotSafepoint(FD)) {
+      C.addTransition(State->set<SafepointDisabledAt>(-1));
+      return true;
+    }
+  }
   return false;
 }
 
diff --git a/src/clangsa/ImplicitAtomics.cpp b/src/clangsa/ImplicitAtomics.cpp
new file mode 100644
index 00000000000000..b9ffc43bc22f84
--- /dev/null
+++ b/src/clangsa/ImplicitAtomics.cpp
@@ -0,0 +1,155 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "clang/AST/ASTContext.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang-tidy/ClangTidy.h"
+#include "clang-tidy/ClangTidyCheck.h"
+#include "clang-tidy/ClangTidyModule.h"
+#include "clang-tidy/ClangTidyModuleRegistry.h"
+
+using namespace clang;
+using namespace clang::tidy;
+using namespace clang::ast_matchers;
+
+class ImplicitAtomicsChecker : public ClangTidyCheck {
+  void reportBug(const Stmt *S, StringRef desc="");
+
+public:
+  ImplicitAtomicsChecker(StringRef Name, ClangTidyContext *Context);
+  void registerMatchers(ast_matchers::MatchFinder *Finder) override;
+  void check(const ast_matchers::MatchFinder::MatchResult &Result) override;
+
+private:
+};
+
+// Checks if RD has name in Names and is in std namespace
+static bool hasStdClassWithName(const CXXRecordDecl *RD,
+                                ArrayRef<llvm::StringLiteral> Names) {
+  // or could check ASTContext::getQualifiedTemplateName()->isDerivedFrom() ?
+  if (!RD || !RD->getDeclContext()->isStdNamespace())
+    return false;
+  if (RD->getDeclName().isIdentifier()) {
+    StringRef Name = RD->getName();
+    return llvm::any_of(Names, [&Name](StringRef GivenName) -> bool {
+      return Name == GivenName;
+    });
+  }
+  return false;
+}
+
+constexpr llvm::StringLiteral STD_PTR_NAMES[] = {"atomic", "atomic_ref"};
+
+static bool isStdAtomic(const CXXRecordDecl *RD) {
+  return hasStdClassWithName(RD, STD_PTR_NAMES);
+}
+
+static bool isStdAtomicCall(const Expr *E) {
+  return E && isStdAtomic(E->IgnoreImplicit()->getType()->getAsCXXRecordDecl());
+}
+
+static bool isStdAtomic(const Expr *E) {
+  return E->getType()->isAtomicType();
+}
+
+void ImplicitAtomicsChecker::reportBug(const Stmt *S, StringRef desc) {
+  // try to find the "best" node to attach this to, so we generate fewer duplicate reports
+  while (1) {
+    const auto *expr = dyn_cast<Expr>(S);
+    if (!expr)
+      break;
+    expr = expr->IgnoreParenCasts();
+    if (const auto *UO = dyn_cast<UnaryOperator>(expr))
+      S = UO->getSubExpr();
+    else if (const auto *BO = dyn_cast<BinaryOperator>(expr))
+      S = isStdAtomic(BO->getLHS()) ? BO->getLHS() :
+             isStdAtomic(BO->getRHS()) ? BO->getRHS() :
+             BO->getLHS();
+    else
+      break;
+  }
+  SmallString<100> buf;
+  llvm::raw_svector_ostream os(buf);
+  os << "Implicit Atomic seq_cst synchronization" << desc;
+  diag(S->getBeginLoc(), buf.str());
+}
+
+
+ImplicitAtomicsChecker::
+    ImplicitAtomicsChecker(StringRef Name, ClangTidyContext *Context)
+    : ClangTidyCheck(Name, Context) {
+}
+
+void ImplicitAtomicsChecker::registerMatchers(MatchFinder *Finder) {
+  Finder->addMatcher(castExpr(hasCastKind(CK_AtomicToNonAtomic))
+                         .bind("cast"),
+                     this);
+  Finder->addMatcher(unaryOperator(unless(hasAnyOperatorName("&")))
+                         .bind("unary-op"),
+                     this);
+  Finder->addMatcher(binaryOperator()
+                         .bind("binary-op"),
+                     this);
+  Finder->addMatcher(cxxOperatorCallExpr()
+                         .bind("cxxcall"),
+                     this);
+  Finder->addMatcher(cxxMemberCallExpr()
+                         .bind("cxxcall"),
+                     this);
+}
+
+void ImplicitAtomicsChecker::check(const MatchFinder::MatchResult &Result) {
+  if (const auto *UOp = Result.Nodes.getNodeAs<UnaryOperator>("unary-op")) {
+    const Expr *Sub = UOp->getSubExpr();
+    if (isStdAtomic(UOp) || isStdAtomic(Sub))
+      reportBug(UOp);
+  }
+  if (const auto *BOp = Result.Nodes.getNodeAs<BinaryOperator>("binary-op")) {
+    const Expr *Lhs = BOp->getLHS();
+    const Expr *Rhs = BOp->getRHS();
+    if (isStdAtomic(Lhs) || isStdAtomic(Rhs) || isStdAtomic(BOp))
+      reportBug(BOp);
+  }
+  if (const auto *CE = Result.Nodes.getNodeAs<CastExpr>("cast")) {
+    reportBug(CE);
+  }
+  if (const auto *Call = Result.Nodes.getNodeAs<CallExpr>("cxxcall")) {
+    if (const auto *OC = dyn_cast<CXXOperatorCallExpr>(Call)) {
+      const auto *CXXThisExpr = OC->getArg(0);
+      if (isStdAtomicCall(CXXThisExpr)) {
+        OverloadedOperatorKind OOK = OC->getOperator();
+        if (CXXOperatorCallExpr::isAssignmentOp(OOK) || OOK == OO_PlusPlus || OOK == OO_MinusMinus) {
+          reportBug(CXXThisExpr, " (std::atomic operator)");
+        }
+      }
+    }
+    else if (const auto *OC = dyn_cast<CXXMemberCallExpr>(Call)) {
+      const auto *CXXThisExpr = OC->getImplicitObjectArgument();
+      if (isStdAtomicCall(CXXThisExpr)) {
+        if (isa<CXXConversionDecl>(OC->getMethodDecl())) {
+          reportBug(CXXThisExpr, " (std::atomic cast)");
+        }
+      }
+    }
+  }
+}
+
+class ImplicitAtomicsCheckerModule : public ClangTidyModule {
+public:
+  void addCheckFactories(ClangTidyCheckFactories &CheckFactories) override {
+    CheckFactories.registerCheck<ImplicitAtomicsChecker>("concurrency-implicit-atomics");
+  }
+};
+
+namespace clang {
+namespace tidy {
+
+// Register the ImplicitAtomicsCheckerModule using this statically initialized variable.
+static ClangTidyModuleRegistry::Add<::ImplicitAtomicsCheckerModule>
+    X("concurrency-module", "Adds my concurrency checks.");
+
+// This anchor is used to force the linker to link in the generated object file
+// and thus register the ImplicitAtomicsCheckerModule.
+volatile int ImplicitAtomicsCheckerModuleAnchorSource = 0;
+
+} // namespace tidy
+} // namespace clang
diff --git a/src/codegen-stubs.c b/src/codegen-stubs.c
new file mode 100644
index 00000000000000..bf220a0456066d
--- /dev/null
+++ b/src/codegen-stubs.c
@@ -0,0 +1,145 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// This file provides a fallback implementation of the codegen plugin interface,
+// used when libjulia-codegen is not available.
+
+#include "julia.h"
+#include "julia_internal.h"
+
+#include "intrinsics.h"
+
+#define UNAVAILABLE { jl_errorf("%s: not available in this build of Julia", __func__); }
+
+JL_DLLEXPORT void jl_dump_native_fallback(void *native_code,
+        const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
+        const char *sysimg_data, size_t sysimg_len) UNAVAILABLE
+JL_DLLEXPORT int32_t jl_get_llvm_gv_fallback(void *native_code, jl_value_t *p) UNAVAILABLE
+
+JL_DLLEXPORT void jl_extern_c_fallback(jl_function_t *f, jl_value_t *rt, jl_value_t *argt, char *name) UNAVAILABLE
+JL_DLLEXPORT jl_value_t *jl_dump_method_asm_fallback(jl_method_instance_t *linfo, size_t world,
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+JL_DLLEXPORT jl_value_t *jl_dump_function_ir_fallback(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo) UNAVAILABLE
+JL_DLLEXPORT void *jl_get_llvmf_defn_fallback(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_LLVMCreateDisasm_fallback(const char *TripleName, void *DisInfo, int TagType, void *GetOpInfo, void *SymbolLookUp) UNAVAILABLE
+JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_fallback(void *DC, uint8_t *Bytes, uint64_t BytesSize, uint64_t PC, char *OutString, size_t OutStringSize) UNAVAILABLE
+
+JL_DLLEXPORT void jl_init_codegen_fallback(void) { }
+
+JL_DLLEXPORT int jl_getFunctionInfo_fallback(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_register_fptrs_fallback(uint64_t sysimage_base, const struct _jl_sysimg_fptrs_t *fptrs,
+                       jl_method_instance_t **linfos, size_t n)
+{
+    (void)sysimage_base; (void)fptrs; (void)linfos; (void)n;
+}
+
+JL_DLLEXPORT jl_code_instance_t *jl_generate_fptr_fallback(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+{
+    return NULL;
+}
+
+JL_DLLEXPORT void jl_generate_fptr_for_unspecialized_fallback(jl_code_instance_t *unspec)
+{
+    jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
+}
+
+JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_fallback(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT int jl_compile_extern_c_fallback(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_teardown_codegen_fallback(void)
+{
+}
+
+JL_DLLEXPORT size_t jl_jit_total_bytes_fallback(void)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_lock_profile_fallback(void)
+{
+}
+
+JL_DLLEXPORT void jl_unlock_profile_fallback(void)
+{
+}
+
+JL_DLLEXPORT void *jl_create_native_fallback(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmctxt, const jl_cgparams_t *cgparams, int _policy) UNAVAILABLE
+
+JL_DLLEXPORT void jl_dump_compiles_fallback(void *s)
+{
+}
+
+JL_DLLEXPORT void jl_dump_emitted_mi_name_fallback(void *s)
+{
+}
+
+JL_DLLEXPORT void jl_dump_llvm_opt_fallback(void *s)
+{
+}
+
+JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm_fallback(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+
+JL_DLLEXPORT jl_value_t *jl_dump_function_asm_fallback(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary) UNAVAILABLE
+
+JL_DLLEXPORT void jl_get_function_id_fallback(void *native_code, jl_code_instance_t *ncode,
+        int32_t *func_idx, int32_t *specfunc_idx) UNAVAILABLE
+
+
+JL_DLLEXPORT void *jl_get_llvm_function_fallback(void *native_code, uint32_t idx) UNAVAILABLE
+
+JL_DLLEXPORT LLVMOrcThreadSafeModuleRef jl_get_llvm_module_fallback(void *native_code) UNAVAILABLE
+
+JL_DLLEXPORT void *jl_type_to_llvm_fallback(jl_value_t *jt, LLVMContextRef llvmctxt, bool_t *isboxed) UNAVAILABLE
+
+JL_DLLEXPORT jl_value_t *jl_get_libllvm_fallback(void) JL_NOTSAFEPOINT
+{
+    return jl_nothing;
+}
+
+JL_DLLEXPORT uint64_t jl_getUnwindInfo_fallback(uint64_t dwAddr)
+{
+    return 0;
+}
+
+JL_DLLEXPORT void jl_add_optimization_passes_fallback(void *PM, int opt_level, int lower_intrinsics) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraJuliaLICMPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddAllocOptPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_fallback(void *PM, bool_t imaging_mode) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_fallback(void *PM, bool_t Strong) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_fallback(void *PM) UNAVAILABLE
+
+JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_fallback(void *PM) UNAVAILABLE
diff --git a/src/codegen.cpp b/src/codegen.cpp
index 01f4b0086e8bf7..fa7485a8448af1 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -1,12 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#undef DEBUG
 #include "llvm-version.h"
 #include "platform.h"
-#if defined(_OS_WINDOWS_)
-// use ELF because RuntimeDyld COFF i686 support didn't exist
-// use ELF because RuntimeDyld COFF X86_64 doesn't seem to work (fails to generate function pointers)?
-#define FORCE_ELF
-#endif
 #if defined(_CPU_X86_)
 #define JL_NEED_FLOATTEMP_VAR 1
 #endif
@@ -30,7 +26,11 @@
 
 // target machine computation
 #include <llvm/CodeGen/TargetSubtargetInfo.h>
+#if JL_LLVM_VERSION >= 140000
+#include <llvm/MC/TargetRegistry.h>
+#else
 #include <llvm/Support/TargetRegistry.h>
+#endif
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/Support/Host.h>
 #include <llvm/Support/TargetSelect.h>
@@ -44,6 +44,7 @@
 #include <llvm/IR/DIBuilder.h>
 #include <llvm/AsmParser/Parser.h>
 #include <llvm/DebugInfo/DIContext.h>
+#include "llvm/IR/DebugInfoMetadata.h"
 #include <llvm/IR/DerivedTypes.h>
 #include <llvm/IR/Intrinsics.h>
 #include <llvm/IR/Attributes.h>
@@ -69,6 +70,7 @@
 // for configuration options
 #include <llvm/Support/PrettyStackTrace.h>
 #include <llvm/Support/CommandLine.h>
+#include <llvm/Support/Process.h>
 
 #include <llvm/IR/InlineAsm.h>
 #if defined(_CPU_ARM_) || defined(_CPU_AARCH64_)
@@ -80,8 +82,97 @@
 #endif
 #include <llvm/Target/TargetMachine.h>
 
+#include "llvm/Support/Path.h" // for llvm::sys::path
+#include <llvm/Bitcode/BitcodeReader.h>
+#include <llvm/Linker/Linker.h>
+
+#define DEBUG_TYPE "julia_irgen_codegen"
+
 using namespace llvm;
 
+STATISTIC(EmittedAllocas, "Number of allocas emitted");
+STATISTIC(EmittedIntToPtrs, "Number of inttoptrs emitted");
+STATISTIC(ModulesCreated, "Number of LLVM Modules created");
+STATISTIC(EmittedBoxCompares, "Number of box compares emitted");
+STATISTIC(EmittedBitsUnionCompares, "Number of bitsunion compares emitted");
+STATISTIC(EmittedBitsCompares, "Number of bits compares emitted");
+STATISTIC(EmittedEgals, "Number of egals emitted");
+STATISTIC(EmittedOpfields, "Number of opfields emitted");
+STATISTIC(EmittedBuiltinCalls, "Number of builtin calls emitted");
+STATISTIC(EmittedJLCalls, "Number of jlcalls emitted");
+STATISTIC(EmittedSpecfunCalls, "Number of specialized calls emitted");
+STATISTIC(EmittedInvokes, "Number of invokes emitted");
+STATISTIC(EmittedCalls, "Number of calls emitted");
+STATISTIC(EmittedUndefVarErrors, "Number of undef var errors emitted");
+STATISTIC(EmittedOpaqueClosureFunctions, "Number of opaque closures emitted");
+STATISTIC(EmittedToJLInvokes, "Number of tojlinvoke calls emitted");
+STATISTIC(EmittedCFuncInvalidates, "Number of C function invalidates emitted");
+STATISTIC(GeneratedCFuncWrappers, "Number of C function wrappers generated");
+STATISTIC(GeneratedCCallables, "Number of C-callable functions generated");
+STATISTIC(GeneratedInvokeWrappers, "Number of invoke wrappers generated");
+STATISTIC(EmittedFunctions, "Number of functions emitted");
+
+//Drag some useful type functions into our namespace
+//to reduce verbosity of our code
+auto getInt1Ty(LLVMContext &ctxt) {
+    return Type::getInt1Ty(ctxt);
+}
+auto getInt8Ty(LLVMContext &ctxt) {
+    return Type::getInt8Ty(ctxt);
+}
+auto getInt16Ty(LLVMContext &ctxt) {
+    return Type::getInt16Ty(ctxt);
+}
+auto getInt32Ty(LLVMContext &ctxt) {
+    return Type::getInt32Ty(ctxt);
+}
+auto getInt64Ty(LLVMContext &ctxt) {
+    return Type::getInt64Ty(ctxt);
+}
+auto getHalfTy(LLVMContext &ctxt) {
+    return Type::getHalfTy(ctxt);
+}
+auto getFloatTy(LLVMContext &ctxt) {
+    return Type::getFloatTy(ctxt);
+}
+auto getDoubleTy(LLVMContext &ctxt) {
+    return Type::getDoubleTy(ctxt);
+}
+auto getFP128Ty(LLVMContext &ctxt) {
+    return Type::getFP128Ty(ctxt);
+}
+auto getVoidTy(LLVMContext &ctxt) {
+    return Type::getVoidTy(ctxt);
+}
+auto getCharTy(LLVMContext &ctxt) {
+    return getInt32Ty(ctxt);
+}
+auto getInt8PtrTy(LLVMContext &ctxt) {
+    return Type::getInt8PtrTy(ctxt);
+}
+auto getInt16PtrTy(LLVMContext &ctxt) {
+    return Type::getInt16PtrTy(ctxt);
+}
+auto getInt32PtrTy(LLVMContext &ctxt) {
+    return Type::getInt32PtrTy(ctxt);
+}
+auto getInt64PtrTy(LLVMContext &ctxt) {
+    return Type::getInt64PtrTy(ctxt);
+}
+auto getFloatPtrTy(LLVMContext &ctxt) {
+    return Type::getFloatPtrTy(ctxt);
+}
+auto getDoublePtrTy(LLVMContext &ctxt) {
+    return Type::getDoublePtrTy(ctxt);
+}
+auto getSizePtrTy(LLVMContext &ctxt) {
+    if (sizeof(size_t) > sizeof(uint32_t)) {
+        return getInt64PtrTy(ctxt);
+    } else {
+        return getInt32PtrTy(ctxt);
+    }
+}
+
 typedef Instruction TerminatorInst;
 
 #if defined(_OS_WINDOWS_) && !defined(NOMINMAX)
@@ -95,23 +186,17 @@ typedef Instruction TerminatorInst;
 #include "processor.h"
 #include "julia_assert.h"
 
+extern "C" JL_DLLEXPORT
+void jl_dump_emitted_mi_name_impl(void *s)
+{
+    **jl_ExecutionEngine->get_dump_emitted_mi_name_stream() = (JL_STREAM*)s;
+}
+
 extern "C" {
 
 #include "builtin_proto.h"
 
-#ifdef HAVE_SSP
-extern uintptr_t __stack_chk_guard;
 extern void __stack_chk_fail();
-#else
-JL_DLLEXPORT uintptr_t __stack_chk_guard = (uintptr_t)0xBAD57ACCBAD67ACC; // 0xBADSTACKBADSTACK
-JL_DLLEXPORT void __stack_chk_fail()
-{
-    /* put your panic function or similar in here */
-    fprintf(stderr, "fatal error: stack corruption detected\n");
-    gc_debug_critical_error();
-    abort(); // end with abort, since the compiler destroyed the stack upon entry to this function, there's no going back now
-}
-#endif
 
 #ifdef _OS_WINDOWS_
 #if defined(_CPU_X86_64_)
@@ -134,112 +219,164 @@ extern void _chkstk(void);
 #endif
 }
 
-#if defined(_COMPILER_MICROSOFT_) && !defined(__alignof__)
-#define __alignof__ __alignof
-#endif
-
-#define DISABLE_FLOAT16
-
-// llvm state
-extern JITEventListener *CreateJuliaJITEventListener();
-
-// for image reloading
-bool imaging_mode = false;
-
 // shared llvm state
-JL_DLLEXPORT LLVMContext &jl_LLVMContext = *(new LLVMContext());
-TargetMachine *jl_TargetMachine;
-static DataLayout &jl_data_layout = *(new DataLayout(""));
 #define jl_Module ctx.f->getParent()
 #define jl_builderModule(builder) (builder).GetInsertBlock()->getParent()->getParent()
 #define prepare_call(Callee) prepare_call_in(jl_Module, (Callee))
 
 // types
-static Type *T_jlvalue;
-static Type *T_pjlvalue;
-static Type *T_prjlvalue;
-static Type *T_ppjlvalue;
-static Type *T_pprjlvalue;
-static Type *jl_array_llvmt;
-static Type *jl_parray_llvmt;
-static FunctionType *jl_func_sig;
-static FunctionType *jl_func_sig_sparams;
-static Type *T_pvoidfunc;
-
-static IntegerType *T_int1;
-static IntegerType *T_int8;
-static IntegerType *T_int16;
-static IntegerType *T_int32;
-static IntegerType *T_int64;
-
-static IntegerType *T_uint8;
-static IntegerType *T_uint16;
-static IntegerType *T_uint32;
-static IntegerType *T_uint64;
-
-static IntegerType *T_char;
-static IntegerType *T_size;
-static IntegerType *T_sigatomic;
-
-static Type *T_float16;
-static Type *T_float32;
-static Type *T_float64;
-static Type *T_float128;
-
-static Type *T_pint8;
-static Type *T_pint16;
-static Type *T_pint32;
-static Type *T_pint64;
-static Type *T_psize;
-static Type *T_pfloat32;
-static Type *T_pfloat64;
-
-static Type *T_ppint8;
-static Type *T_pppint8;
-
-static Type *T_void;
-
-// type-based alias analysis nodes.  Indentation of comments indicates hierarchy.
-static MDNode *tbaa_root;     // Everything
-static MDNode *tbaa_gcframe;    // GC frame
-// LLVM should have enough info for alias analysis of non-gcframe stack slot
-// this is mainly a place holder for `jl_cgval_t::tbaa`
-static MDNode *tbaa_stack;      // stack slot
-static MDNode *tbaa_unionselbyte;   // a selector byte in isbits Union struct fields
-static MDNode *tbaa_data;       // Any user data that `pointerset/ref` are allowed to alias
-static MDNode *tbaa_binding;        // jl_binding_t::value
-static MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t
-static MDNode *tbaa_mutab;              // mutable type
-static MDNode *tbaa_datatype;               // datatype
-static MDNode *tbaa_immut;              // immutable type
-static MDNode *tbaa_ptrarraybuf;    // Data in an array of boxed values
-static MDNode *tbaa_arraybuf;       // Data in an array of POD
-static MDNode *tbaa_array;      // jl_array_t
-static MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t
-static MDNode *tbaa_arraysize;      // A size in a jl_array_t
-static MDNode *tbaa_arraylen;       // The len in a jl_array_t
-static MDNode *tbaa_arrayflags;     // The flags in a jl_array_t
-static MDNode *tbaa_arrayoffset;     // The offset in a jl_array_t
-static MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_array_t
-static MDNode *tbaa_const;      // Memory that is immutable by the time LLVM can see it
-
-static Attribute Thunk;
-
-// Basic DITypes
-static DICompositeType *jl_value_dillvmt;
-static DIDerivedType *jl_pvalue_dillvmt;
-static DIDerivedType *jl_ppvalue_dillvmt;
-static DISubroutineType *jl_di_func_sig;
-static DISubroutineType *jl_di_func_null_sig;
+struct jl_typecache_t {
+    Type *T_jlvalue;
+    Type *T_pjlvalue;
+    Type *T_prjlvalue;
+    Type *T_ppjlvalue;
+    Type *T_pprjlvalue;
+    StructType *T_jlarray;
+    Type *T_pjlarray;
+    FunctionType *T_jlfunc;
+    FunctionType *T_jlfuncparams;
+
+    IntegerType *T_sigatomic;
+
+    Type *T_ppint8;
+
+    bool initialized;
+
+    jl_typecache_t() :
+        T_jlvalue(nullptr), T_pjlvalue(nullptr), T_prjlvalue(nullptr),
+        T_ppjlvalue(nullptr), T_pprjlvalue(nullptr), T_jlarray(nullptr),
+        T_pjlarray(nullptr), T_jlfunc(nullptr), T_jlfuncparams(nullptr),
+        T_sigatomic(nullptr), T_ppint8(nullptr), initialized(false) {}
+
+    void initialize(LLVMContext &context) {
+        if (initialized) {
+            return;
+        }
+        initialized = true;
+        T_ppint8 = PointerType::get(getInt8PtrTy(context), 0);
+        T_sigatomic = Type::getIntNTy(context, sizeof(sig_atomic_t) * 8);
+        T_jlvalue = JuliaType::get_jlvalue_ty(context);
+        T_pjlvalue = PointerType::get(T_jlvalue, 0);
+        T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
+        T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
+
+        T_jlfunc = JuliaType::get_jlfunc_ty(context);
+        assert(T_jlfunc != NULL);
+        T_jlfuncparams = JuliaType::get_jlfuncparams_ty(context);
+        assert(T_jlfuncparams != NULL);
+
+        Type *vaelts[] = {PointerType::get(getInt8Ty(context), AddressSpace::Loaded)
+                        , getSizeTy(context)
+                        , getInt16Ty(context)
+                        , getInt16Ty(context)
+                        , getInt32Ty(context)
+        };
+        static_assert(sizeof(jl_array_flags_t) == sizeof(int16_t),
+                    "Size of jl_array_flags_t is not the same as int16_t");
+        T_jlarray = StructType::get(context, makeArrayRef(vaelts));
+        T_pjlarray = PointerType::get(T_jlarray, 0);
+    }
+};
+
+struct jl_tbaacache_t {
+    // type-based alias analysis nodes.  Indentation of comments indicates hierarchy.
+    MDNode *tbaa_root;     // Everything
+    MDNode *tbaa_gcframe;    // GC frame
+    // LLVM should have enough info for alias analysis of non-gcframe stack slot
+    // this is mainly a place holder for `jl_cgval_t::tbaa`
+    MDNode *tbaa_stack;      // stack slot
+    MDNode *tbaa_unionselbyte;   // a selector byte in isbits Union struct fields
+    MDNode *tbaa_data;       // Any user data that `pointerset/ref` are allowed to alias
+    MDNode *tbaa_binding;        // jl_binding_t::value
+    MDNode *tbaa_value;          // jl_value_t, that is not jl_array_t
+    MDNode *tbaa_mutab;              // mutable type
+    MDNode *tbaa_datatype;               // datatype
+    MDNode *tbaa_immut;              // immutable type
+    MDNode *tbaa_ptrarraybuf;    // Data in an array of boxed values
+    MDNode *tbaa_arraybuf;       // Data in an array of POD
+    MDNode *tbaa_array;      // jl_array_t
+    MDNode *tbaa_arrayptr;       // The pointer inside a jl_array_t
+    MDNode *tbaa_arraysize;      // A size in a jl_array_t
+    MDNode *tbaa_arraylen;       // The len in a jl_array_t
+    MDNode *tbaa_arrayflags;     // The flags in a jl_array_t
+    MDNode *tbaa_arrayoffset;     // The offset in a jl_array_t
+    MDNode *tbaa_arrayselbyte;   // a selector byte in a isbits Union jl_array_t
+    MDNode *tbaa_const;      // Memory that is immutable by the time LLVM can see it
+    bool initialized;
+
+    jl_tbaacache_t(): tbaa_root(nullptr), tbaa_gcframe(nullptr), tbaa_stack(nullptr),
+                    tbaa_unionselbyte(nullptr), tbaa_data(nullptr), tbaa_binding(nullptr),
+                    tbaa_value(nullptr), tbaa_mutab(nullptr), tbaa_datatype(nullptr),
+                    tbaa_immut(nullptr), tbaa_ptrarraybuf(nullptr), tbaa_arraybuf(nullptr),
+                    tbaa_array(nullptr), tbaa_arrayptr(nullptr), tbaa_arraysize(nullptr),
+                    tbaa_arraylen(nullptr), tbaa_arrayflags(nullptr), tbaa_arrayoffset(nullptr),
+                    tbaa_arrayselbyte(nullptr), tbaa_const(nullptr), initialized(false) {}
+
+    auto tbaa_make_child(MDBuilder &mbuilder, const char *name, MDNode *parent = nullptr, bool isConstant = false) {
+        MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
+        MDNode *n = mbuilder.createTBAAStructTagNode(scalar, scalar, 0, isConstant);
+        return std::make_pair(n, scalar);
+    }
+
+    void initialize(llvm::LLVMContext &context) {
+        if (initialized) {
+            assert(&tbaa_root->getContext() == &context);
+            return;
+        }
+        initialized = true;
+        MDBuilder mbuilder(context);
+        MDNode *jtbaa = mbuilder.createTBAARoot("jtbaa");
+        tbaa_root = mbuilder.createTBAAScalarTypeNode("jtbaa", jtbaa);
+        tbaa_gcframe = tbaa_make_child(mbuilder, "jtbaa_gcframe").first;
+        MDNode *tbaa_stack_scalar;
+        std::tie(tbaa_stack, tbaa_stack_scalar) = tbaa_make_child(mbuilder, "jtbaa_stack");
+        tbaa_unionselbyte = tbaa_make_child(mbuilder, "jtbaa_unionselbyte", tbaa_stack_scalar).first;
+        MDNode *tbaa_data_scalar;
+        std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child(mbuilder, "jtbaa_data");
+        tbaa_binding = tbaa_make_child(mbuilder, "jtbaa_binding", tbaa_data_scalar).first;
+        MDNode *tbaa_value_scalar;
+        std::tie(tbaa_value, tbaa_value_scalar) =
+            tbaa_make_child(mbuilder, "jtbaa_value", tbaa_data_scalar);
+        MDNode *tbaa_mutab_scalar;
+        std::tie(tbaa_mutab, tbaa_mutab_scalar) =
+            tbaa_make_child(mbuilder, "jtbaa_mutab", tbaa_value_scalar);
+        tbaa_datatype = tbaa_make_child(mbuilder, "jtbaa_datatype", tbaa_mutab_scalar).first;
+        tbaa_immut = tbaa_make_child(mbuilder, "jtbaa_immut", tbaa_value_scalar).first;
+        tbaa_arraybuf = tbaa_make_child(mbuilder, "jtbaa_arraybuf", tbaa_data_scalar).first;
+        tbaa_ptrarraybuf = tbaa_make_child(mbuilder, "jtbaa_ptrarraybuf", tbaa_data_scalar).first;
+        MDNode *tbaa_array_scalar;
+        std::tie(tbaa_array, tbaa_array_scalar) = tbaa_make_child(mbuilder, "jtbaa_array");
+        tbaa_arrayptr = tbaa_make_child(mbuilder, "jtbaa_arrayptr", tbaa_array_scalar).first;
+        tbaa_arraysize = tbaa_make_child(mbuilder, "jtbaa_arraysize", tbaa_array_scalar).first;
+        tbaa_arraylen = tbaa_make_child(mbuilder, "jtbaa_arraylen", tbaa_array_scalar).first;
+        tbaa_arrayflags = tbaa_make_child(mbuilder, "jtbaa_arrayflags", tbaa_array_scalar).first;
+        tbaa_arrayoffset = tbaa_make_child(mbuilder, "jtbaa_arrayoffset", tbaa_array_scalar).first;
+        tbaa_const = tbaa_make_child(mbuilder, "jtbaa_const", nullptr, true).first;
+        tbaa_arrayselbyte = tbaa_make_child(mbuilder, "jtbaa_arrayselbyte", tbaa_array_scalar).first;
+    }
+};
+
+struct jl_debugcache_t {
+    // Basic DITypes
+    DIDerivedType *jl_pvalue_dillvmt;
+    DIDerivedType *jl_ppvalue_dillvmt;
+    DISubroutineType *jl_di_func_sig;
+    DISubroutineType *jl_di_func_null_sig;
+    bool initialized;
+
+    jl_debugcache_t()
+    : jl_pvalue_dillvmt(nullptr), jl_ppvalue_dillvmt(nullptr),
+    jl_di_func_sig(nullptr), jl_di_func_null_sig(nullptr), initialized(false) {}
+
+    void initialize(Module *m);
+};
 
 
 // constants
-static Constant *V_null;
-static Constant *V_rnull;
-static Constant *V_size0;
 static bool type_is_ghost(Type *ty)
 {
-    return (ty == T_void || ty->isEmptyTy());
+    return (ty == getVoidTy(ty->getContext()) || ty->isEmptyTy());
 }
 
 // should agree with `Core.Compiler.hasuniquerep`
@@ -257,7 +394,7 @@ static bool type_has_unique_rep(jl_value_t *t)
         return true;
     if (jl_is_datatype(t)) {
         jl_datatype_t *dt = (jl_datatype_t*)t;
-        if (dt->name != jl_tuple_typename && !jl_is_vararg_type(t)) {
+        if (dt->name != jl_tuple_typename) {
             for (size_t i = 0; i < jl_nparams(dt); i++)
                 if (!type_has_unique_rep(jl_tparam(dt, i)))
                     return false;
@@ -297,24 +434,24 @@ static inline void add_named_global(JuliaVariable *name, void *addr)
 
 struct JuliaFunction {
 public:
-    StringLiteral name;
-    FunctionType *(*_type)(LLVMContext &C);
-    AttributeList (*_attrs)(LLVMContext &C);
+    llvm::StringLiteral name;
+    llvm::FunctionType *(*_type)(llvm::LLVMContext &C);
+    llvm::AttributeList (*_attrs)(llvm::LLVMContext &C);
 
     JuliaFunction(const JuliaFunction&) = delete;
     JuliaFunction(const JuliaFunction&&) = delete;
-    Function *realize(Module *m) {
-        if (GlobalValue *V = m->getNamedValue(name))
-            return cast<Function>(V);
-        Function *F = Function::Create(_type(m->getContext()),
-                         Function::ExternalLinkage,
+    llvm::Function *realize(llvm::Module *m) {
+        if (llvm::GlobalValue *V = m->getNamedValue(name))
+            return llvm::cast<llvm::Function>(V);
+        llvm::Function *F = llvm::Function::Create(_type(m->getContext()),
+                         llvm::Function::ExternalLinkage,
                          name, m);
         if (_attrs)
             F->setAttributes(_attrs(m->getContext()));
         return F;
     }
-    Function *realize(jl_codectx_t &ctx);
 };
+
 template<typename T>
 static inline void add_named_global(JuliaFunction *name, T *addr)
 {
@@ -338,18 +475,34 @@ AttributeSet Attributes(LLVMContext &C, std::initializer_list<Attribute::AttrKin
     return AttributeSet::get(C, makeArrayRef(attrs));
 }
 
-static Type *get_pjlvalue(LLVMContext &C) { return T_pjlvalue; }
+static Type *get_pjlvalue(LLVMContext &C) { return JuliaType::get_pjlvalue_ty(C); }
+
+static FunctionType *get_func_sig(LLVMContext &C) { return JuliaType::get_jlfunc_ty(C); }
 
-static FunctionType *get_func_sig(LLVMContext &C) { return jl_func_sig; }
+static FunctionType *get_donotdelete_sig(LLVMContext &C) {
+    return FunctionType::get(getVoidTy(C), true);
+}
 
 static AttributeList get_func_attrs(LLVMContext &C)
 {
     return AttributeList::get(C,
-            AttributeSet::get(C, makeArrayRef({Thunk})),
+            AttributeSet::get(C, makeArrayRef({Attribute::get(C, "thunk")})),
             Attributes(C, {Attribute::NonNull}),
             None);
 }
 
+static AttributeList get_donotdelete_func_attrs(LLVMContext &C)
+{
+    AttributeSet FnAttrs = AttributeSet::get(C, makeArrayRef({Attribute::get(C, "thunk")}));
+    FnAttrs = FnAttrs.addAttribute(C, Attribute::InaccessibleMemOnly);
+    FnAttrs = FnAttrs.addAttribute(C, Attribute::WillReturn);
+    FnAttrs = FnAttrs.addAttribute(C, Attribute::NoUnwind);
+    return AttributeList::get(C,
+            FnAttrs,
+            Attributes(C, {}),
+            None);
+}
+
 static AttributeList get_attrs_noreturn(LLVMContext &C)
 {
     return AttributeList::get(C,
@@ -377,176 +530,215 @@ static AttributeList get_attrs_zext(LLVMContext &C)
 
 // global vars
 static const auto jlRTLD_DEFAULT_var = new JuliaVariable{
-    "jl_RTLD_DEFAULT_handle",
+    XSTR(jl_RTLD_DEFAULT_handle),
     true,
-    [](LLVMContext &C) { return T_pint8; },
+    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
 };
 #ifdef _OS_WINDOWS_
 static const auto jlexe_var = new JuliaVariable{
-    "jl_exe_handle",
+    XSTR(jl_exe_handle),
     true,
-    [](LLVMContext &C) { return T_pint8; },
+    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
 };
 static const auto jldll_var = new JuliaVariable{
-    "jl_dl_handle",
+    XSTR(jl_libjulia_handle),
     true,
-    [](LLVMContext &C) { return T_pint8; },
+    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
+};
+static const auto jldlli_var = new JuliaVariable{
+    XSTR(jl_libjulia_internal_handle),
+    true,
+    [](LLVMContext &C) { return static_cast<llvm::Type*>(getInt8PtrTy(C)); },
 };
 #endif //_OS_WINDOWS_
 
 static const auto jlstack_chk_guard_var = new JuliaVariable{
-    "__stack_chk_guard",
+    XSTR(__stack_chk_guard),
     true,
     get_pjlvalue,
 };
 
 static const auto jlgetworld_global = new JuliaVariable{
-    "jl_world_counter",
+    XSTR(jl_world_counter),
     false,
-    [](LLVMContext &C) { return (Type*)T_size; },
+    [](LLVMContext &C) { return (Type*)getSizeTy(C); },
 };
 
 static const auto jlboxed_int8_cache = new JuliaVariable{
-    "jl_boxed_int8_cache",
+    XSTR(jl_boxed_int8_cache),
     true,
-    [](LLVMContext &C) { return (Type*)ArrayType::get(T_pjlvalue, 256); },
+    [](LLVMContext &C) { return (Type*)ArrayType::get(get_pjlvalue(C), 256); },
 };
 
 static const auto jlboxed_uint8_cache = new JuliaVariable{
-    "jl_boxed_uint8_cache",
+    XSTR(jl_boxed_uint8_cache),
     true,
-    [](LLVMContext &C) { return (Type*)ArrayType::get(T_pjlvalue, 256); },
+    [](LLVMContext &C) { return (Type*)ArrayType::get(get_pjlvalue(C), 256); },
 };
 
-static const auto jltls_states_func = new JuliaFunction{
-    "julia.ptls_states",
-    [](LLVMContext &C) { return FunctionType::get(PointerType::get(T_ppjlvalue, 0), false); },
+static const auto jlpgcstack_func = new JuliaFunction{
+    "julia.get_pgcstack",
+    [](LLVMContext &C) { return FunctionType::get(PointerType::get(JuliaType::get_ppjlvalue_ty(C), 0), false); },
     nullptr,
 };
 
 
+
 // important functions
 // Symbols are not gc-tracked, but we'll treat them as callee rooted anyway,
 // because they may come from a gc-rooted location
 static const auto jlnew_func = new JuliaFunction{
-    "jl_new_structv",
+    XSTR(jl_new_structv),
     get_func_sig,
     get_func_attrs,
 };
 static const auto jlsplatnew_func = new JuliaFunction{
-    "jl_new_structt",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue, T_prjlvalue}, false); },
+    XSTR(jl_new_structt),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_prjlvalue, T_prjlvalue}, false);
+    },
     get_func_attrs,
 };
 static const auto jlthrow_func = new JuliaFunction{
-    "jl_throw",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
+    XSTR(jl_throw),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlerror_func = new JuliaFunction{
-    "jl_error",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_pint8}, false); },
+    XSTR(jl_error),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getInt8PtrTy(C)}, false); },
+    get_attrs_noreturn,
+};
+static const auto jlatomicerror_func = new JuliaFunction{
+    XSTR(jl_atomic_error),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getInt8PtrTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jltypeerror_func = new JuliaFunction{
-    "jl_type_error",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_pint8, T_prjlvalue, PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
+    XSTR(jl_type_error),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getInt8PtrTy(C), JuliaType::get_prjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlundefvarerror_func = new JuliaFunction{
-    "jl_undefined_var_error",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
+    XSTR(jl_undefined_var_error),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserrorv_func = new JuliaFunction{
-    "jl_bounds_error_ints",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted), T_psize, T_size}, false); },
+    XSTR(jl_bounds_error_ints),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), getSizePtrTy(C), getSizeTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlboundserror_func = new JuliaFunction{
-    "jl_bounds_error_int",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {PointerType::get(T_jlvalue, AddressSpace::CalleeRooted), T_size}, false); },
+    XSTR(jl_bounds_error_int),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted), getSizeTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlvboundserror_func = new JuliaFunction{
-    "jl_bounds_error_tuple_int",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_pprjlvalue, T_size, T_size}, false); },
+    XSTR(jl_bounds_error_tuple_int),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_pprjlvalue_ty(C), getSizeTy(C), getSizeTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jluboundserror_func = new JuliaFunction{
-    "jl_bounds_error_unboxed_int",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {PointerType::get(T_int8, AddressSpace::Derived), T_pjlvalue, T_size}, false); },
+    XSTR(jl_bounds_error_unboxed_int),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(getInt8Ty(C), AddressSpace::Derived), JuliaType::get_pjlvalue_ty(C), getSizeTy(C)}, false); },
     get_attrs_noreturn,
 };
 static const auto jlcheckassign_func = new JuliaFunction{
-    "jl_checked_assignment",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_pjlvalue, PointerType::get(T_jlvalue, AddressSpace::CalleeRooted)}, false); },
+    XSTR(jl_checked_assignment),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_pjlvalue_ty(C), PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
     nullptr,
 };
 static const auto jldeclareconst_func = new JuliaFunction{
-    "jl_declare_constant",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_pjlvalue}, false); },
+    XSTR(jl_declare_constant),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_pjlvalue_ty(C)}, false); },
     nullptr,
 };
 static const auto jlgetbindingorerror_func = new JuliaFunction{
-    "jl_get_binding_or_error",
-    [](LLVMContext &C) { return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false); },
+    XSTR(jl_get_binding_or_error),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(T_pjlvalue,
+                {T_pjlvalue, T_pjlvalue}, false);
+    },
+    nullptr,
+};
+static const auto jlgetbindingwrorerror_func = new JuliaFunction{
+    XSTR(jl_get_binding_wr_or_error),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(T_pjlvalue,
+                {T_pjlvalue, T_pjlvalue}, false);
+    },
     nullptr,
 };
 static const auto jlboundp_func = new JuliaFunction{
-    "jl_boundp",
-    [](LLVMContext &C) { return FunctionType::get(T_int32,
-                {T_pjlvalue, T_pjlvalue}, false); },
+    XSTR(jl_boundp),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+                {T_pjlvalue, T_pjlvalue}, false);
+    },
     nullptr,
 };
 static const auto jltopeval_func = new JuliaFunction{
-    "jl_toplevel_eval",
-    [](LLVMContext &C) { return FunctionType::get(T_pjlvalue,
-                {T_pjlvalue, T_pjlvalue}, false); },
+    XSTR(jl_toplevel_eval),
+    [](LLVMContext &C) {
+        auto T_pjlvalue = JuliaType::get_pjlvalue_ty(C);
+        return FunctionType::get(T_pjlvalue,
+                {T_pjlvalue, T_pjlvalue}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
 static const auto jlcopyast_func = new JuliaFunction{
-    "jl_copy_ast",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue}, false); },
+    XSTR(jl_copy_ast),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_prjlvalue}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
 //static const auto jlnsvec_func = new JuliaFunction{
-//    "jl_svec",
+//    XSTR(jl_svec),
 //    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-//                {T_size}, true); },
+//                {getSizeTy(C)}, true); },
 //    [](LLVMContext &C) { return AttributeList::get(C,
 //            AttributeSet(),
 //            Attributes(C, {Attribute::NonNull}),
 //            None); },
 //};
 static const auto jlapplygeneric_func = new JuliaFunction{
-    "jl_apply_generic",
+    XSTR(jl_apply_generic),
     get_func_sig,
     get_func_attrs,
 };
 static const auto jlinvoke_func = new JuliaFunction{
-    "jl_invoke",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue, T_pprjlvalue, T_uint32, T_prjlvalue}, false); },
+    XSTR(jl_invoke),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_prjlvalue, PointerType::get(T_prjlvalue, 0), getInt32Ty(C), T_prjlvalue}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
@@ -554,65 +746,105 @@ static const auto jlinvoke_func = new JuliaFunction{
              Attributes(C, {Attribute::ReadOnly, Attribute::NoCapture})}); },
 };
 static const auto jlmethod_func = new JuliaFunction{
-    "jl_method_def",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-                {T_prjlvalue, T_prjlvalue, T_pjlvalue}, false); },
+    XSTR(jl_method_def),
+    [](LLVMContext &C) {
+        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
+        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        return FunctionType::get(T_prjlvalue,
+                {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_pjlvalue}, false);
+    },
     nullptr,
 };
 static const auto jlgenericfunction_func = new JuliaFunction{
-    "jl_generic_function_def",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue, T_pjlvalue}, false); },
+    XSTR(jl_generic_function_def),
+    [](LLVMContext &C) {
+        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
+        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
+        return FunctionType::get(T_prjlvalue,
+                {T_pjlvalue, T_pjlvalue, T_pprjlvalue, T_pjlvalue, T_pjlvalue}, false);
+    },
     nullptr,
 };
+static const auto jllockvalue_func = new JuliaFunction{
+    XSTR(jl_lock_value),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
+static const auto jlunlockvalue_func = new JuliaFunction{
+    XSTR(jl_unlock_value),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::CalleeRooted)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            AttributeSet(),
+            AttributeSet(),
+            {Attributes(C, {Attribute::NoCapture})}); },
+};
 static const auto jlenter_func = new JuliaFunction{
-    "jl_enter_handler",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_pint8}, false); },
+    XSTR(jl_enter_handler),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getInt8PtrTy(C)}, false); },
     nullptr,
 };
 static const auto jl_current_exception_func = new JuliaFunction{
-    "jl_current_exception",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue, false); },
+    XSTR(jl_current_exception),
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), false); },
     nullptr,
 };
 static const auto jlleave_func = new JuliaFunction{
-    "jl_pop_handler",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_int32}, false); },
+    XSTR(jl_pop_handler),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getInt32Ty(C)}, false); },
     nullptr,
 };
 static const auto jl_restore_excstack_func = new JuliaFunction{
-    "jl_restore_excstack",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_size}, false); },
+    XSTR(jl_restore_excstack),
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {getSizeTy(C)}, false); },
     nullptr,
 };
 static const auto jl_excstack_state_func = new JuliaFunction{
-    "jl_excstack_state",
-    [](LLVMContext &C) { return FunctionType::get(T_size, false); },
+    XSTR(jl_excstack_state),
+    [](LLVMContext &C) { return FunctionType::get(getSizeTy(C), false); },
     nullptr,
 };
-static const auto jlegal_func = new JuliaFunction{
-    "jl_egal",
+static const auto jlegalx_func = new JuliaFunction{
+    XSTR(jl_egal__unboxed),
     [](LLVMContext &C) {
-        Type *T = PointerType::get(T_jlvalue, AddressSpace::CalleeRooted);
-        return FunctionType::get(T_int32, {T, T}, false); },
-    nullptr,
+        Type *T = PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived);
+        return FunctionType::get(getInt32Ty(C), {T, T, JuliaType::get_prjlvalue_ty(C)}, false); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
+            AttributeSet(),
+            None); },
 };
 static const auto jl_alloc_obj_func = new JuliaFunction{
     "julia.gc_alloc_obj",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_pint8, T_size, T_prjlvalue}, false); },
+    [](LLVMContext &C) {
+        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
+        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        auto T_ppjlvalue = PointerType::get(PointerType::get(T_jlvalue, 0), 0);
+        return FunctionType::get(T_prjlvalue,
+                {T_ppjlvalue, getSizeTy(C), T_prjlvalue}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet::get(C, makeArrayRef({Attribute::getWithAllocSizeArgs(C, 1, None)})), // returns %1 bytes
             Attributes(C, {Attribute::NoAlias, Attribute::NonNull}),
             None); },
 };
 static const auto jl_newbits_func = new JuliaFunction{
-    "jl_new_bits",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue, T_pint8}, false); },
+    XSTR(jl_new_bits),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_prjlvalue, getInt8PtrTy(C)}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
@@ -623,8 +855,11 @@ static const auto jl_newbits_func = new JuliaFunction{
 // `julia.gc_alloc_obj`.
 static const auto jl_typeof_func = new JuliaFunction{
     "julia.typeof",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-                {T_prjlvalue}, false); },
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+                {T_prjlvalue}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReadNone, Attribute::NoUnwind, Attribute::NoRecurse}),
             Attributes(C, {Attribute::NonNull}),
@@ -632,7 +867,7 @@ static const auto jl_typeof_func = new JuliaFunction{
 };
 static const auto jl_loopinfo_marker_func = new JuliaFunction{
     "julia.loopinfo_marker",
-    [](LLVMContext &C) { return FunctionType::get(T_void, false); },
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReadOnly, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
             AttributeSet(),
@@ -640,30 +875,51 @@ static const auto jl_loopinfo_marker_func = new JuliaFunction{
 };
 static const auto jl_write_barrier_func = new JuliaFunction{
     "julia.write_barrier",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_prjlvalue}, true); },
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_prjlvalue_ty(C)}, true); },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
             AttributeSet(),
-            None); },
+            {Attributes(C, {Attribute::ReadOnly})}); },
+};
+static const auto jl_write_barrier_binding_func = new JuliaFunction{
+    "julia.write_barrier_binding",
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C),
+            {JuliaType::get_prjlvalue_ty(C)}, true); },
+    [](LLVMContext &C) { return AttributeList::get(C,
+            Attributes(C, {Attribute::NoUnwind, Attribute::NoRecurse, Attribute::InaccessibleMemOnly}),
+            AttributeSet(),
+            {Attributes(C, {Attribute::ReadOnly})}); },
 };
 static const auto jlisa_func = new JuliaFunction{
-    "jl_isa",
-    [](LLVMContext &C) { return FunctionType::get(T_int32,
-            {T_prjlvalue, T_prjlvalue}, false); },
+    XSTR(jl_isa),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+            {T_prjlvalue, T_prjlvalue}, false);
+    },
     nullptr,
 };
 
 static const auto jlsubtype_func = new JuliaFunction{
-    "jl_subtype",
-    [](LLVMContext &C) { return FunctionType::get(T_int32,
-            {T_prjlvalue, T_prjlvalue}, false); },
+    XSTR(jl_subtype),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getInt32Ty(C),
+            {T_prjlvalue, T_prjlvalue}, false);
+    },
     nullptr,
 };
 static const auto jlapplytype_func = new JuliaFunction{
-    "jl_instantiate_type_in_env",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-            {T_pjlvalue, T_pjlvalue, T_pprjlvalue}, false); },
+    XSTR(jl_instantiate_type_in_env),
+    [](LLVMContext &C) {
+        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
+        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
+        return FunctionType::get(T_prjlvalue,
+            {T_pjlvalue, T_pjlvalue, T_pprjlvalue}, false);
+    },
     [](LLVMContext &C) {
         return AttributeList::get(C,
             AttributeSet(),
@@ -673,17 +929,17 @@ static const auto jlapplytype_func = new JuliaFunction{
     },
 };
 static const auto jl_object_id__func = new JuliaFunction{
-    "jl_object_id_",
-    [](LLVMContext &C) { return FunctionType::get(T_size,
-            {T_prjlvalue, PointerType::get(T_int8, AddressSpace::Derived)}, false); },
+    XSTR(jl_object_id_),
+    [](LLVMContext &C) { return FunctionType::get(getSizeTy(C),
+            {JuliaType::get_prjlvalue_ty(C), PointerType::get(getInt8Ty(C), AddressSpace::Derived)}, false); },
     nullptr,
 };
 static const auto setjmp_func = new JuliaFunction{
     jl_setjmp_name,
-    [](LLVMContext &C) { return FunctionType::get(T_int32,
-            {T_pint8,
+    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C),
+            {getInt8PtrTy(C),
 #ifndef _OS_WINDOWS_
-            T_int32,
+            getInt32Ty(C),
 #endif
             }, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
@@ -692,9 +948,9 @@ static const auto setjmp_func = new JuliaFunction{
             None); },
 };
 static const auto memcmp_func = new JuliaFunction{
-    "memcmp",
-    [](LLVMContext &C) { return FunctionType::get(T_int32,
-            {T_pint8, T_pint8, T_size}, false); },
+    XSTR(memcmp),
+    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C),
+            {getInt8PtrTy(C), getInt8PtrTy(C), getSizeTy(C)}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind, Attribute::ArgMemOnly}),
             AttributeSet(),
@@ -702,93 +958,109 @@ static const auto memcmp_func = new JuliaFunction{
     // TODO: inferLibFuncAttributes(*memcmp_func, TLI);
 };
 static const auto jldlsym_func = new JuliaFunction{
-    "jl_load_and_lookup",
-    [](LLVMContext &C) { return FunctionType::get(T_pvoidfunc,
-            {T_pint8, T_pint8, PointerType::get(T_pint8, 0)}, false); },
+    XSTR(jl_load_and_lookup),
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
+            {getInt8PtrTy(C), getInt8PtrTy(C), PointerType::get(getInt8PtrTy(C), 0)}, false); },
     nullptr,
 };
 static const auto jllazydlsym_func = new JuliaFunction{
-    "jl_lazy_load_and_lookup",
-    [](LLVMContext &C) { return FunctionType::get(T_pvoidfunc,
-            {T_prjlvalue, T_pint8}, false); },
+    XSTR(jl_lazy_load_and_lookup),
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pvoidfunc_ty(C),
+            {JuliaType::get_prjlvalue_ty(C), getInt8PtrTy(C)}, false); },
     nullptr,
 };
 static const auto jltypeassert_func = new JuliaFunction{
-    "jl_typeassert",
-    [](LLVMContext &C) { return FunctionType::get(T_void,
-            {T_prjlvalue, T_prjlvalue}, false); },
+    XSTR(jl_typeassert),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(getVoidTy(C),
+            {T_prjlvalue, T_prjlvalue}, false);
+    },
     nullptr,
 };
 static const auto jlgetnthfieldchecked_func = new JuliaFunction{
-    "jl_get_nth_field_checked",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue, T_size}, false); },
+    XSTR(jl_get_nth_field_checked),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_prjlvalue, getSizeTy(C)}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
 static const auto jlgetcfunctiontrampoline_func = new JuliaFunction{
-    "jl_get_cfunction_trampoline",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
+    XSTR(jl_get_cfunction_trampoline),
+    [](LLVMContext &C) {
+        auto T_jlvalue = JuliaType::get_jlvalue_ty(C);
+        auto T_pjlvalue = PointerType::get(T_jlvalue, 0);
+        auto T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
+        auto T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
+        auto T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
+        return FunctionType::get(T_prjlvalue,
             {
                 T_prjlvalue, // f (object)
                 T_pjlvalue, // result
-                T_pint8, // cache
+                getInt8PtrTy(C), // cache
                 T_pjlvalue, // fill
-                FunctionType::get(T_pint8, { T_pint8, T_ppjlvalue }, false)->getPointerTo(), // trampoline
+                FunctionType::get(getInt8PtrTy(C), { getInt8PtrTy(C), T_ppjlvalue }, false)->getPointerTo(), // trampoline
                 T_pjlvalue, // env
                 T_pprjlvalue, // vals
-            }, false); },
+            }, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet(),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
 static const auto diff_gc_total_bytes_func = new JuliaFunction{
-    "jl_gc_diff_total_bytes",
-    [](LLVMContext &C) { return FunctionType::get(T_int64, false); },
+    XSTR(jl_gc_diff_total_bytes),
+    [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C), false); },
     nullptr,
 };
 static const auto sync_gc_total_bytes_func = new JuliaFunction{
-    "jl_gc_sync_total_bytes",
-    [](LLVMContext &C) { return FunctionType::get(T_int64,
-            {T_int64}, false); },
+    XSTR(jl_gc_sync_total_bytes),
+    [](LLVMContext &C) { return FunctionType::get(getInt64Ty(C),
+            {getInt64Ty(C)}, false); },
     nullptr,
 };
 static const auto jlarray_data_owner_func = new JuliaFunction{
-    "jl_array_data_owner",
-    [](LLVMContext &C) { return FunctionType::get(T_prjlvalue,
-            {T_prjlvalue}, false); },
+    XSTR(jl_array_data_owner),
+    [](LLVMContext &C) {
+        auto T_prjlvalue = JuliaType::get_prjlvalue_ty(C);
+        return FunctionType::get(T_prjlvalue,
+            {T_prjlvalue}, false);
+    },
     [](LLVMContext &C) { return AttributeList::get(C,
             Attributes(C, {Attribute::ReadOnly, Attribute::NoUnwind}),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
-#define BOX_FUNC(ct,rt,at,attrs)                                              \
-static const auto box_##ct##_func = new JuliaFunction{                        \
-    "jl_box_"#ct,                                                             \
-    [](LLVMContext &C) { return FunctionType::get(rt,                         \
-            {at}, false); },                                                  \
-    attrs,                                                                    \
-}
-BOX_FUNC(int16, T_prjlvalue, T_int16, get_attrs_sext);
-BOX_FUNC(uint16, T_prjlvalue, T_int16, get_attrs_zext);
-BOX_FUNC(int32, T_prjlvalue, T_int32, get_attrs_sext);
-BOX_FUNC(uint32, T_prjlvalue, T_int32, get_attrs_zext);
-BOX_FUNC(int64, T_prjlvalue, T_int64, get_attrs_sext);
-BOX_FUNC(uint64, T_prjlvalue, T_int64, get_attrs_zext);
-BOX_FUNC(char, T_prjlvalue, T_char, get_attrs_zext);
-BOX_FUNC(float32, T_prjlvalue, T_float32, get_func_attrs);
-BOX_FUNC(float64, T_prjlvalue, T_float64, get_func_attrs);
-BOX_FUNC(ssavalue, T_prjlvalue, T_size, get_func_attrs);
+#define BOX_FUNC(ct,at,attrs)                                                    \
+static const auto box_##ct##_func = new JuliaFunction{                           \
+    XSTR(jl_box_##ct),                                                           \
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C),\
+            {at}, false); },                                                     \
+    attrs,                                                                       \
+}
+BOX_FUNC(int16, getInt16Ty(C), get_attrs_sext);
+BOX_FUNC(uint16, getInt16Ty(C), get_attrs_zext);
+BOX_FUNC(int32, getInt32Ty(C), get_attrs_sext);
+BOX_FUNC(uint32, getInt32Ty(C), get_attrs_zext);
+BOX_FUNC(int64, getInt64Ty(C), get_attrs_sext);
+BOX_FUNC(uint64, getInt64Ty(C), get_attrs_zext);
+BOX_FUNC(char, getCharTy(C), get_attrs_zext);
+BOX_FUNC(float32, getFloatTy(C), get_func_attrs);
+BOX_FUNC(float64, getDoubleTy(C), get_func_attrs);
+BOX_FUNC(ssavalue, getSizeTy(C), get_func_attrs);
 #undef BOX_FUNC
 
 
 // placeholder functions
 static const auto gcroot_flush_func = new JuliaFunction{
     "julia.gcroot_flush",
-    [](LLVMContext &C) { return FunctionType::get(T_void, false); },
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), false); },
     nullptr,
 };
 static const auto gc_preserve_begin_func = new JuliaFunction{
@@ -798,12 +1070,12 @@ static const auto gc_preserve_begin_func = new JuliaFunction{
 };
 static const auto gc_preserve_end_func = new JuliaFunction {
     "llvm.julia.gc_preserve_end",
-    [](LLVMContext &C) { return FunctionType::get(T_void, {Type::getTokenTy(C)}, false); },
+    [](LLVMContext &C) { return FunctionType::get(getVoidTy(C), {Type::getTokenTy(C)}, false); },
     nullptr,
 };
 static const auto except_enter_func = new JuliaFunction{
     "julia.except_enter",
-    [](LLVMContext &C) { return FunctionType::get(T_int32, false); },
+    [](LLVMContext &C) { return FunctionType::get(getInt32Ty(C), false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReturnsTwice)})),
             AttributeSet(),
@@ -811,94 +1083,94 @@ static const auto except_enter_func = new JuliaFunction{
 };
 static const auto pointer_from_objref_func = new JuliaFunction{
     "julia.pointer_from_objref",
-    [](LLVMContext &C) { return FunctionType::get(T_pjlvalue,
-            {PointerType::get(T_jlvalue, AddressSpace::Derived)}, false); },
+    [](LLVMContext &C) { return FunctionType::get(JuliaType::get_pjlvalue_ty(C),
+            {PointerType::get(JuliaType::get_jlvalue_ty(C), AddressSpace::Derived)}, false); },
     [](LLVMContext &C) { return AttributeList::get(C,
             AttributeSet::get(C, makeArrayRef({Attribute::get(C, Attribute::ReadNone), Attribute::get(C, Attribute::NoUnwind)})),
             Attributes(C, {Attribute::NonNull}),
             None); },
 };
 
-static const auto jltuple_func = new JuliaFunction{"jl_f_tuple", get_func_sig, get_func_attrs};
-static const auto jlgetfield_func = new JuliaFunction{"jl_f_getfield", get_func_sig, get_func_attrs};
-static const std::map<jl_fptr_args_t, JuliaFunction*> builtin_func_map = {
-    { &jl_f_is,                 new JuliaFunction{"jl_f_is", get_func_sig, get_func_attrs} },
-    { &jl_f_typeof,             new JuliaFunction{"jl_f_typeof", get_func_sig, get_func_attrs} },
-    { &jl_f_sizeof,             new JuliaFunction{"jl_f_sizeof", get_func_sig, get_func_attrs} },
-    { &jl_f_issubtype,          new JuliaFunction{"jl_f_issubtype", get_func_sig, get_func_attrs} },
-    { &jl_f_isa,                new JuliaFunction{"jl_f_isa", get_func_sig, get_func_attrs} },
-    { &jl_f_typeassert,         new JuliaFunction{"jl_f_typeassert", get_func_sig, get_func_attrs} },
-    { &jl_f_ifelse,             new JuliaFunction{"jl_f_ifelse", get_func_sig, get_func_attrs} },
-    { &jl_f__apply,             new JuliaFunction{"jl_f__apply", get_func_sig, get_func_attrs} },
-    { &jl_f__apply_iterate,     new JuliaFunction{"jl_f__apply_iterate", get_func_sig, get_func_attrs} },
-    { &jl_f__apply_pure,        new JuliaFunction{"jl_f__apply_pure", get_func_sig, get_func_attrs} },
-    { &jl_f__apply_latest,      new JuliaFunction{"jl_f__apply_latest", get_func_sig, get_func_attrs} },
-    { &jl_f__apply_in_world,    new JuliaFunction{"jl_f__apply_in_world", get_func_sig, get_func_attrs} },
-    { &jl_f_throw,              new JuliaFunction{"jl_f_throw", get_func_sig, get_func_attrs} },
-    { &jl_f_tuple,              jltuple_func },
-    { &jl_f_svec,               new JuliaFunction{"jl_f_svec", get_func_sig, get_func_attrs} },
-    { &jl_f_applicable,         new JuliaFunction{"jl_f_applicable", get_func_sig, get_func_attrs} },
-    { &jl_f_invoke,             new JuliaFunction{"jl_f_invoke", get_func_sig, get_func_attrs} },
-    { &jl_f_invoke_kwsorter,    new JuliaFunction{"jl_f_invoke_kwsorter", get_func_sig, get_func_attrs} },
-    { &jl_f_isdefined,          new JuliaFunction{"jl_f_isdefined", get_func_sig, get_func_attrs} },
-    { &jl_f_getfield,           jlgetfield_func },
-    { &jl_f_setfield,           new JuliaFunction{"jl_f_setfield", get_func_sig, get_func_attrs} },
-    { &jl_f_fieldtype,          new JuliaFunction{"jl_f_fieldtype", get_func_sig, get_func_attrs} },
-    { &jl_f_nfields,            new JuliaFunction{"jl_f_nfields", get_func_sig, get_func_attrs} },
-    { &jl_f__expr,              new JuliaFunction{"jl_f__expr", get_func_sig, get_func_attrs} },
-    { &jl_f__typevar,           new JuliaFunction{"jl_f__typevar", get_func_sig, get_func_attrs} },
-    { &jl_f_arrayref,           new JuliaFunction{"jl_f_arrayref", get_func_sig, get_func_attrs} },
-    { &jl_f_const_arrayref,     new JuliaFunction{"jl_f_const_arrayref", get_func_sig, get_func_attrs} },
-    { &jl_f_arrayset,           new JuliaFunction{"jl_f_arrayset", get_func_sig, get_func_attrs} },
-    { &jl_f_arraysize,          new JuliaFunction{"jl_f_arraysize", get_func_sig, get_func_attrs} },
-    { &jl_f_apply_type,         new JuliaFunction{"jl_f_apply_type", get_func_sig, get_func_attrs} },
-};
+static const auto jltuple_func = new JuliaFunction{XSTR(jl_f_tuple), get_func_sig, get_func_attrs};
+static const auto &builtin_func_map() {
+    static std::map<jl_fptr_args_t, JuliaFunction*> builtins = { { jl_f_is_addr,                 new JuliaFunction{XSTR(jl_f_is), get_func_sig, get_func_attrs} },
+          { jl_f_typeof_addr,             new JuliaFunction{XSTR(jl_f_typeof), get_func_sig, get_func_attrs} },
+          { jl_f_sizeof_addr,             new JuliaFunction{XSTR(jl_f_sizeof), get_func_sig, get_func_attrs} },
+          { jl_f_issubtype_addr,          new JuliaFunction{XSTR(jl_f_issubtype), get_func_sig, get_func_attrs} },
+          { jl_f_isa_addr,                new JuliaFunction{XSTR(jl_f_isa), get_func_sig, get_func_attrs} },
+          { jl_f_typeassert_addr,         new JuliaFunction{XSTR(jl_f_typeassert), get_func_sig, get_func_attrs} },
+          { jl_f_ifelse_addr,             new JuliaFunction{XSTR(jl_f_ifelse), get_func_sig, get_func_attrs} },
+          { jl_f__apply_iterate_addr,     new JuliaFunction{XSTR(jl_f__apply_iterate), get_func_sig, get_func_attrs} },
+          { jl_f__apply_pure_addr,        new JuliaFunction{XSTR(jl_f__apply_pure), get_func_sig, get_func_attrs} },
+          { jl_f__call_latest_addr,       new JuliaFunction{XSTR(jl_f__call_latest), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_addr,     new JuliaFunction{XSTR(jl_f__call_in_world), get_func_sig, get_func_attrs} },
+          { jl_f__call_in_world_total_addr, new JuliaFunction{XSTR(jl_f__call_in_world_total), get_func_sig, get_func_attrs} },
+          { jl_f_throw_addr,              new JuliaFunction{XSTR(jl_f_throw), get_func_sig, get_func_attrs} },
+          { jl_f_tuple_addr,              jltuple_func },
+          { jl_f_svec_addr,               new JuliaFunction{XSTR(jl_f_svec), get_func_sig, get_func_attrs} },
+          { jl_f_applicable_addr,         new JuliaFunction{XSTR(jl_f_applicable), get_func_sig, get_func_attrs} },
+          { jl_f_invoke_addr,             new JuliaFunction{XSTR(jl_f_invoke), get_func_sig, get_func_attrs} },
+          { jl_f_invoke_kwsorter_addr,    new JuliaFunction{XSTR(jl_f_invoke_kwsorter), get_func_sig, get_func_attrs} },
+          { jl_f_isdefined_addr,          new JuliaFunction{XSTR(jl_f_isdefined), get_func_sig, get_func_attrs} },
+          { jl_f_getfield_addr,           new JuliaFunction{XSTR(jl_f_getfield), get_func_sig, get_func_attrs} },
+          { jl_f_setfield_addr,           new JuliaFunction{XSTR(jl_f_setfield), get_func_sig, get_func_attrs} },
+          { jl_f_swapfield_addr,          new JuliaFunction{XSTR(jl_f_swapfield), get_func_sig, get_func_attrs} },
+          { jl_f_modifyfield_addr,        new JuliaFunction{XSTR(jl_f_modifyfield), get_func_sig, get_func_attrs} },
+          { jl_f_fieldtype_addr,          new JuliaFunction{XSTR(jl_f_fieldtype), get_func_sig, get_func_attrs} },
+          { jl_f_nfields_addr,            new JuliaFunction{XSTR(jl_f_nfields), get_func_sig, get_func_attrs} },
+          { jl_f__expr_addr,              new JuliaFunction{XSTR(jl_f__expr), get_func_sig, get_func_attrs} },
+          { jl_f__typevar_addr,           new JuliaFunction{XSTR(jl_f__typevar), get_func_sig, get_func_attrs} },
+          { jl_f_arrayref_addr,           new JuliaFunction{XSTR(jl_f_arrayref), get_func_sig, get_func_attrs} },
+          { jl_f_const_arrayref_addr,     new JuliaFunction{XSTR(jl_f_const_arrayref), get_func_sig, get_func_attrs} },
+          { jl_f_arrayset_addr,           new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} },
+          { jl_f_arraysize_addr,          new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} },
+          { jl_f_apply_type_addr,         new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} },
+          { jl_f_donotdelete_addr,        new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} }
+        };
+    return builtins;
+}
+
+static const auto jl_new_opaque_closure_jlcall_func = new JuliaFunction{XSTR(jl_new_opaque_closure_jlcall), get_func_sig, get_func_attrs};
 
-static int globalUnique = 0;
+static std::atomic<int> globalUniqueGeneratedNames{0};
 
 // --- code generation ---
 extern "C" {
-    int jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
     jl_cgparams_t jl_default_cgparams = {1, 1, 0,
 #ifdef _OS_WINDOWS_
         0,
 #else
         1,
 #endif
-        jl_default_debug_info_kind,
+        (int) DICompileUnit::DebugEmissionKind::FullDebug,
         jl_rettype_inferred, NULL };
 }
 
-template<typename T>
-static void add_return_attr(T *f, Attribute::AttrKind Kind)
-{
-    f->addAttribute(AttributeList::ReturnIndex, Kind);
-}
 
-static MDNode *best_tbaa(jl_value_t *jt) {
+static MDNode *best_tbaa(jl_tbaacache_t &tbaa_cache, jl_value_t *jt) {
     jt = jl_unwrap_unionall(jt);
     if (jt == (jl_value_t*)jl_datatype_type ||
         (jl_is_type_type(jt) && jl_is_datatype(jl_tparam0(jt))))
-        return tbaa_datatype;
+        return tbaa_cache.tbaa_datatype;
     if (!jl_is_datatype(jt))
-        return tbaa_value;
+        return tbaa_cache.tbaa_value;
     if (jl_is_abstracttype(jt))
-        return tbaa_value;
+        return tbaa_cache.tbaa_value;
     // If we're here, we know all subtypes are (im)mutable, even if we
     // don't know what the exact type is
-    return jl_is_mutable(jt) ? tbaa_mutab : tbaa_immut;
+    return jl_is_mutable(jt) ? tbaa_cache.tbaa_mutab : tbaa_cache.tbaa_immut;
 }
 
 // tracks whether codegen is currently able to simply stack-allocate this type
 // note that this includes jl_isbits, although codegen should work regardless
 static bool jl_is_concrete_immutable(jl_value_t* t)
 {
-    return jl_is_immutable_datatype(t) && ((jl_datatype_t*)t)->layout;
+    return jl_is_immutable_datatype(t) && ((jl_datatype_t*)t)->isconcretetype;
 }
 
 static bool jl_is_pointerfree(jl_value_t* t)
 {
-    if (!jl_is_immutable_datatype(t))
+    if (!jl_is_concrete_immutable(t))
         return 0;
     const jl_datatype_layout_t *layout = ((jl_datatype_t*)t)->layout;
     return layout && layout->npointers == 0;
@@ -906,11 +1178,13 @@ static bool jl_is_pointerfree(jl_value_t* t)
 
 // these queries are usually related, but we split them out here
 // for convenience and clarity (and because it changes the calling convention)
-static bool deserves_stack(jl_value_t* t, bool pointerfree=false)
+// n.b. this must include jl_is_datatype_singleton (ghostType) and primitive types
+static bool deserves_stack(jl_value_t* t)
 {
     if (!jl_is_concrete_immutable(t))
         return false;
-    return ((jl_datatype_t*)t)->isinlinealloc;
+    jl_datatype_t *dt = (jl_datatype_t*)t;
+    return jl_is_datatype_singleton(dt) || jl_datatype_isinlinealloc(dt, 0);
 }
 static bool deserves_argbox(jl_value_t* t)
 {
@@ -948,7 +1222,7 @@ struct jl_cgval_t {
         // whether this value is compatible with `data_pointer`
         return tbaa != nullptr;
     }
-    jl_cgval_t(Value *V, Value *gcroot, bool isboxed, jl_value_t *typ, Value *tindex) : // general constructor (with pointer type auto-detect)
+    jl_cgval_t(Value *V, Value *gcroot, bool isboxed, jl_value_t *typ, Value *tindex, jl_tbaacache_t &tbaa_cache) : // general constructor (with pointer type auto-detect)
         V(V), // V is allowed to be NULL in a jl_varinfo_t context, but not during codegen contexts
         Vboxed(isboxed ? V : nullptr),
         TIndex(tindex),
@@ -956,16 +1230,16 @@ struct jl_cgval_t {
         typ(typ),
         isboxed(isboxed),
         isghost(false),
-        tbaa(isboxed ? best_tbaa(typ) : nullptr)
+        tbaa(isboxed ? best_tbaa(tbaa_cache, typ) : nullptr)
     {
         if (Vboxed)
-            assert(Vboxed->getType() == T_prjlvalue);
+            assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext()));
         assert(gcroot == nullptr);
         assert(!(isboxed && TIndex != NULL));
-        assert(TIndex == NULL || TIndex->getType() == T_int8);
+        assert(TIndex == NULL || TIndex->getType() == getInt8Ty(TIndex->getContext()));
     }
     explicit jl_cgval_t(jl_value_t *typ) : // ghost value constructor
-        // mark explicit to avoid being used implicitly for conversion from NULL (use jl_cgval_t() instead)
+        // mark explicit to avoid being used implicitly for conversion from NULL (use jl_cgval_t(ctx.builder.getContext()) instead)
         V(NULL),
         Vboxed(NULL),
         TIndex(NULL),
@@ -989,7 +1263,7 @@ struct jl_cgval_t {
         tbaa(v.tbaa)
     {
         if (Vboxed)
-            assert(Vboxed->getType() == T_prjlvalue);
+            assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext()));
         // this constructor expects we had a badly or equivalently typed version
         // make sure we aren't discarding the actual type information
         if (v.TIndex) {
@@ -999,8 +1273,8 @@ struct jl_cgval_t {
             assert(isboxed || v.typ == typ || tindex);
         }
     }
-    jl_cgval_t() : // undef / unreachable / default constructor
-        V(UndefValue::get(T_void)),
+    explicit jl_cgval_t(LLVMContext &ctxt) : // undef / unreachable constructor
+        V(UndefValue::get(getVoidTy(ctxt))),
         Vboxed(NULL),
         TIndex(NULL),
         constant(NULL),
@@ -1014,7 +1288,7 @@ struct jl_cgval_t {
 
 // per-local-variable information
 struct jl_varinfo_t {
-    Instruction *boxroot; // an address, if the var might be in a jl_value_t** stack slot (marked tbaa_const, if appropriate)
+    Instruction *boxroot; // an address, if the var might be in a jl_value_t** stack slot (marked ctx.tbaa().tbaa_const, if appropriate)
     jl_cgval_t value; // a stack slot or constant value
     Value *pTIndex; // i8* stack slot for the value.TIndex tag describing `value.V`
     DILocalVariable *dinfo;
@@ -1027,8 +1301,8 @@ struct jl_varinfo_t {
     bool usedUndef;
     bool used;
 
-    jl_varinfo_t() : boxroot(NULL),
-                     value(jl_cgval_t()),
+    jl_varinfo_t(LLVMContext &ctxt) : boxroot(NULL),
+                     value(jl_cgval_t(ctxt)),
                      pTIndex(NULL),
                      dinfo(NULL),
                      defFlag(NULL),
@@ -1047,7 +1321,7 @@ class jl_codectx_t {
 public:
     IRBuilder<> builder;
     jl_codegen_params_t &emission_context;
-    jl_codegen_call_targets_t &call_targets;
+    llvm::MapVector<jl_code_instance_t*, jl_codegen_call_target_t> call_targets;
     std::map<void*, GlobalVariable*> &global_targets;
     Function *f = NULL;
     // local var info. globals are not in here.
@@ -1056,7 +1330,10 @@ class jl_codectx_t {
     std::vector<jl_cgval_t> SAvalues;
     std::vector<std::tuple<jl_cgval_t, BasicBlock *, AllocaInst *, PHINode *, jl_value_t *>> PhiNodes;
     std::vector<bool> ssavalue_assigned;
+    std::vector<orc::ThreadSafeModule> oc_modules;
     jl_module_t *module = NULL;
+    jl_typecache_t type_cache;
+    jl_tbaacache_t tbaa_cache;
     jl_method_instance_t *linfo = NULL;
     jl_value_t *rettype = NULL;
     jl_code_info_t *source = NULL;
@@ -1075,28 +1352,41 @@ class jl_codectx_t {
     int nReqArgs = 0;
     int nargs = 0;
     int nvargs = -1;
+    bool is_opaque_closure = false;
 
-    CallInst *ptlsStates = NULL;
-    Value *signalPage = NULL;
+    CallInst *pgcstack = NULL;
     Value *world_age_field = NULL;
 
     bool debug_enabled = false;
     bool use_cache = false;
     const jl_cgparams_t *params = NULL;
 
-    std::vector<std::unique_ptr<llvm::Module>> llvmcall_modules;
+    std::vector<orc::ThreadSafeModule> llvmcall_modules;
 
     jl_codectx_t(LLVMContext &llvmctx, jl_codegen_params_t &params)
       : builder(llvmctx),
         emission_context(params),
-        call_targets(params.workqueue),
+        call_targets(),
         global_targets(params.globals),
         world(params.world),
         use_cache(params.cache),
         params(params.params) { }
 
+    jl_typecache_t &types() {
+        type_cache.initialize(builder.getContext());
+        return type_cache;
+    }
+
+    jl_tbaacache_t &tbaa() {
+        tbaa_cache.initialize(builder.getContext());
+        return tbaa_cache;
+    }
+
     ~jl_codectx_t() {
         assert(this->roots == NULL);
+        // Transfer local delayed calls to the global queue
+        for (auto call_target : call_targets)
+            emission_context.workqueue.push_back(call_target);
     }
 };
 
@@ -1105,7 +1395,7 @@ GlobalVariable *JuliaVariable::realize(jl_codectx_t &ctx) {
 }
 
 static Type *julia_type_to_llvm(jl_codectx_t &ctx, jl_value_t *jt, bool *isboxed = NULL);
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype);
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure);
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval = -1);
 static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t *s,
                                      jl_binding_t **pbnd, bool assign);
@@ -1113,15 +1403,22 @@ static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name,
 static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i);
 static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const std::string &msg);
 static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0);
-static void CreateTrap(IRBuilder<> &irbuilder);
+static Value *get_current_task(jl_codectx_t &ctx);
+static Value *get_current_ptls(jl_codectx_t &ctx);
+static Value *get_current_signal_page(jl_codectx_t &ctx);
+static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block = true);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+                             const jl_cgval_t *args, size_t nargs, CallingConv::ID cc);
+static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
+                        Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
+static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t nargs, const jl_cgval_t *argv);
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt);
 
 static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p);
 static GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G);
-static Instruction *tbaa_decorate(MDNode *md, Instruction *inst);
+Instruction *tbaa_decorate(MDNode *md, Instruction *inst);
 
 static GlobalVariable *prepare_global_in(Module *M, JuliaVariable *G)
 {
@@ -1140,7 +1437,7 @@ static inline GlobalVariable *prepare_global_in(Module *M, GlobalVariable *G)
     GlobalValue *local = M->getNamedValue(G->getName());
     if (!local) {
         // Copy the GlobalVariable, but without the initializer, so it becomes a declaration
-        GlobalVariable *proto = new GlobalVariable(*M, G->getType()->getElementType(),
+        GlobalVariable *proto = new GlobalVariable(*M, G->getValueType(),
                 G->isConstant(), GlobalVariable::ExternalLinkage,
                 nullptr, G->getName(), nullptr, G->getThreadLocalMode());
         proto->copyAttributesFrom(G);
@@ -1186,53 +1483,64 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con
 
 static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty)
 {
-    return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.ptlsStates);
+    ++EmittedAllocas;
+    return new AllocaInst(lty, 0, "", /*InsertBefore=*/ctx.pgcstack);
 }
 
 static void undef_derived_strct(IRBuilder<> &irbuilder, Value *ptr, jl_datatype_t *sty, MDNode *tbaa)
 {
     assert(ptr->getType()->getPointerAddressSpace() != AddressSpace::Tracked);
+    size_t first_offset = sty->layout->nfields ? jl_field_offset(sty, 0) : 0;
+    if (first_offset != 0)
+        irbuilder.CreateMemSet(ptr, ConstantInt::get(getInt8Ty(irbuilder.getContext()), 0), first_offset, MaybeAlign(0));
     size_t i, np = sty->layout->npointers;
     if (np == 0)
         return;
+    auto T_prjlvalue = JuliaType::get_prjlvalue_ty(irbuilder.getContext());
     ptr = irbuilder.CreateBitCast(ptr, T_prjlvalue->getPointerTo(ptr->getType()->getPointerAddressSpace()));
     for (i = 0; i < np; i++) {
         Value *fld = irbuilder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, jl_ptr_offset(sty, i));
-        tbaa_decorate(tbaa, irbuilder.CreateStore(V_rnull, fld));
+        tbaa_decorate(tbaa, irbuilder.CreateStore(Constant::getNullValue(T_prjlvalue), fld));
     }
 }
 
 static Value *emit_inttoptr(jl_codectx_t &ctx, Value *v, Type *ty)
 {
-    // Almost all of our inttoptr are generated due to representing `Ptr` with `T_size`
+    // Almost all of our inttoptr are generated due to representing `Ptr` with `getSizeTy(ctx.builder.getContext())`
     // in LLVM and most of these integers are generated from `ptrtoint` in the first place.
-    if (auto I = dyn_cast<PtrToIntInst>(v))
-        return ctx.builder.CreateBitCast(I->getOperand(0), ty);
+    if (auto I = dyn_cast<PtrToIntInst>(v)) {
+        auto ptr = I->getOperand(0);
+        if (ty->getPointerAddressSpace() == ptr->getType()->getPointerAddressSpace())
+            return ctx.builder.CreateBitCast(ptr, ty);
+        else if (cast<PointerType>(ty)->hasSameElementTypeAs(cast<PointerType>(ptr->getType())))
+            return ctx.builder.CreateAddrSpaceCast(ptr, ty);
+    }
+    ++EmittedIntToPtrs;
     return ctx.builder.CreateIntToPtr(v, ty);
 }
 
-static inline jl_cgval_t ghostValue(jl_value_t *typ)
+static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_value_t *typ)
 {
     if (typ == jl_bottom_type)
-        return jl_cgval_t(); // Undef{}
+        return jl_cgval_t(ctx.builder.getContext()); // Undef{}
     if (typ == (jl_value_t*)jl_typeofbottom_type) {
         // normalize TypeofBottom to Type{Union{}}
         typ = (jl_value_t*)jl_typeofbottom_type->super;
     }
     if (jl_is_type_type(typ)) {
         // replace T::Type{T} with T, by assuming that T must be a leaftype of some sort
-        jl_cgval_t constant(NULL, NULL, true, typ, NULL);
+        jl_cgval_t constant(NULL, NULL, true, typ, NULL, ctx.tbaa());
         constant.constant = jl_tparam0(typ);
         return constant;
     }
     return jl_cgval_t(typ);
 }
-static inline jl_cgval_t ghostValue(jl_datatype_t *typ)
+static inline jl_cgval_t ghostValue(jl_codectx_t &ctx, jl_datatype_t *typ)
 {
-    return ghostValue((jl_value_t*)typ);
+    return ghostValue(ctx, (jl_value_t*)typ);
 }
 
-static inline jl_cgval_t mark_julia_const(jl_value_t *jv)
+static inline jl_cgval_t mark_julia_const(jl_codectx_t &ctx, jl_value_t *jv)
 {
     jl_value_t *typ;
     if (jl_is_type(jv)) {
@@ -1241,19 +1549,19 @@ static inline jl_cgval_t mark_julia_const(jl_value_t *jv)
     else {
         typ = jl_typeof(jv);
         if (jl_is_datatype_singleton((jl_datatype_t*)typ))
-            return ghostValue(typ);
+            return ghostValue(ctx, typ);
     }
-    jl_cgval_t constant(NULL, NULL, true, typ, NULL);
+    jl_cgval_t constant(NULL, NULL, true, typ, NULL, ctx.tbaa());
     constant.constant = jv;
     return constant;
 }
 
 
-static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, MDNode *tbaa)
+static inline jl_cgval_t mark_julia_slot(Value *v, jl_value_t *typ, Value *tindex, jl_tbaacache_t &tbaa_cache, MDNode *tbaa)
 {
     // this enables lazy-copying of immutable values and stack or argument slots
     assert(tbaa);
-    jl_cgval_t tagval(v, NULL, false, typ, tindex);
+    jl_cgval_t tagval(v, NULL, false, typ, tindex, tbaa_cache);
     tagval.tbaa = tbaa;
     return tagval;
 }
@@ -1282,7 +1590,7 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_
         loc = emit_static_alloca(ctx, v->getType());
         ctx.builder.CreateStore(v, loc);
     }
-    return mark_julia_slot(loc, typ, tindex, tbaa_stack);
+    return mark_julia_slot(loc, typ, tindex, ctx.tbaa(), ctx.tbaa().tbaa_stack);
 }
 static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, const jl_cgval_t &v)
 {
@@ -1295,25 +1603,25 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
 {
     if (jl_is_datatype(typ) && jl_is_datatype_singleton((jl_datatype_t*)typ)) {
         // no need to explicitly load/store a constant/ghost value
-        return ghostValue(typ);
+        return ghostValue(ctx, typ);
     }
     if (jl_is_type_type(typ)) {
         jl_value_t *tp0 = jl_tparam0(typ);
         if (jl_is_concrete_type(tp0) || tp0 == jl_bottom_type) {
             // replace T::Type{T} with T
-            return ghostValue(typ);
+            return ghostValue(ctx, typ);
         }
     }
     Type *T = julia_type_to_llvm(ctx, typ);
     if (type_is_ghost(T)) {
-        return ghostValue(typ);
+        return ghostValue(ctx, typ);
     }
     if (v && !isboxed && v->getType()->isAggregateType() && !jl_is_vecelement_type(typ) && CountTrackedPointers(v->getType()).count == 0) {
         // eagerly put this back onto the stack
         // llvm mem2reg pass will remove this if unneeded
         return value_to_pointer(ctx, v, typ, NULL);
     }
-    return jl_cgval_t(v, NULL, isboxed, typ, NULL);
+    return jl_cgval_t(v, NULL, isboxed, typ, NULL, ctx.tbaa());
 }
 
 static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isboxed, jl_datatype_t *typ)
@@ -1324,13 +1632,13 @@ static inline jl_cgval_t mark_julia_type(jl_codectx_t &ctx, Value *v, bool isbox
 // see if it might be profitable (and cheap) to change the type of v to typ
 static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ)
 {
-    if (v.typ == typ || v.typ == jl_bottom_type || v.constant || typ == (jl_value_t*)jl_any_type || jl_egal(v.typ, typ))
+    if (v.typ == jl_bottom_type || v.constant || typ == (jl_value_t*)jl_any_type || jl_egal(v.typ, typ))
         return v; // fast-path
     if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) {
         if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) {
             // type mismatch: changing from one leaftype to another
             CreateTrap(ctx.builder);
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         return v; // doesn't improve type info
     }
@@ -1341,16 +1649,16 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
             if (jl_is_concrete_type(utyp))
                 alwaysboxed = !jl_is_pointerfree(utyp);
             else
-                alwaysboxed = !((jl_datatype_t*)utyp)->abstract && ((jl_datatype_t*)utyp)->mutabl;
+                alwaysboxed = !((jl_datatype_t*)utyp)->name->abstract && ((jl_datatype_t*)utyp)->name->mutabl;
             if (alwaysboxed) {
                 // discovered that this union-split type must actually be isboxed
                 if (v.Vboxed) {
-                    return jl_cgval_t(v.Vboxed, nullptr, true, typ, NULL);
+                    return jl_cgval_t(v.Vboxed, nullptr, true, typ, NULL, ctx.tbaa());
                 }
                 else {
                     // type mismatch (there weren't any boxed values in the union)
                     CreateTrap(ctx.builder);
-                    return jl_cgval_t();
+                    return jl_cgval_t(ctx.builder.getContext());
                 }
             }
         }
@@ -1359,11 +1667,11 @@ static inline jl_cgval_t update_julia_type(jl_codectx_t &ctx, const jl_cgval_t &
     }
     Type *T = julia_type_to_llvm(ctx, typ);
     if (type_is_ghost(T))
-        return ghostValue(typ);
+        return ghostValue(ctx, typ);
     return jl_cgval_t(v, typ, NULL);
 }
 
-static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ);
+static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip=nullptr);
 
 // --- allocating local variables ---
 
@@ -1376,14 +1684,14 @@ static void store_def_flag(jl_codectx_t &ctx, const jl_varinfo_t &vi, bool val)
 {
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     assert(vi.usedUndef && vi.defFlag && "undef flag codegen corrupted");
-    ctx.builder.CreateStore(ConstantInt::get(T_int1, val), vi.defFlag, vi.isVolatile);
+    ctx.builder.CreateStore(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), val), vi.defFlag, vi.isVolatile);
 }
 
 static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 {
     assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things");
     if (vi.usedUndef) {
-        vi.defFlag = emit_static_alloca(ctx, T_int1);
+        vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()));
         store_def_flag(ctx, vi, false);
     }
 }
@@ -1391,7 +1699,18 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi)
 
 // --- utilities ---
 
-static void CreateTrap(IRBuilder<> &irbuilder)
+static Constant *undef_value_for_type(Type *T) {
+    auto tracked = CountTrackedPointers(T);
+    Constant *undef;
+    if (tracked.count)
+        // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
+        undef = Constant::getNullValue(T);
+    else
+        undef = UndefValue::get(T);
+    return undef;
+}
+
+static void CreateTrap(IRBuilder<> &irbuilder, bool create_new_block)
 {
     Function *f = irbuilder.GetInsertBlock()->getParent();
     Function *trap_func = Intrinsic::getDeclaration(
@@ -1399,8 +1718,13 @@ static void CreateTrap(IRBuilder<> &irbuilder)
             Intrinsic::trap);
     irbuilder.CreateCall(trap_func);
     irbuilder.CreateUnreachable();
-    BasicBlock *newBB = BasicBlock::Create(irbuilder.getContext(), "after_noret", f);
-    irbuilder.SetInsertPoint(newBB);
+    if (create_new_block) {
+        BasicBlock *newBB = BasicBlock::Create(irbuilder.getContext(), "after_noret", f);
+        irbuilder.SetInsertPoint(newBB);
+    }
+    else {
+        irbuilder.ClearInsertionPoint();
+    }
 }
 
 #if 0 // this code is likely useful, but currently unused
@@ -1408,8 +1732,8 @@ static void CreateTrap(IRBuilder<> &irbuilder)
 static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
 {
     Function *f = irbuilder.GetInsertBlock()->getParent();
-    BasicBlock *abortBB = BasicBlock::Create(jl_LLVMContext, "debug_abort", f);
-    BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_abort", f);
+    BasicBlock *abortBB = BasicBlock::Create(irbuilder.getContext(), "debug_abort", f);
+    BasicBlock *postBB = BasicBlock::Create(irbuilder.getContext(), "post_abort", f);
     irbuilder.CreateCondBr(test, abortBB, postBB);
     irbuilder.SetInsertPoint(abortBB);
     Function *trap_func = Intrinsic::getDeclaration(
@@ -1422,14 +1746,15 @@ static void CreateConditionalAbort(IRBuilder<> &irbuilder, Value *test)
 #endif
 #endif
 
+
 #include "cgutils.cpp"
 
-static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ)
+static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     // previous value was a split union, compute new index, or box
-    Value *new_tindex = ConstantInt::get(T_int8, 0x80);
+    Value *new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
     SmallBitVector skip_box(1, true);
-    Value *tindex = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(T_int8, 0x7f));
+    Value *tindex = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
     if (jl_is_uniontype(typ)) {
         // compute the TIndex mapping from v.typ -> typ
         unsigned counter = 0;
@@ -1441,14 +1766,18 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 if (new_idx) {
                     // found a matching element,
                     // match it against either the unboxed index
-                    Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(T_int8, idx));
-                    new_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(T_int8, new_idx), new_tindex);
+                    Value *cmp = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
+                    new_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx), new_tindex);
                     t = true;
                 }
                 else if (!jl_subtype((jl_value_t*)jt, typ)) {
                     // new value doesn't need to be boxed
                     // since it isn't part of the new union
                     t = true;
+                    if (skip) {
+                        Value *skip1 = ctx.builder.CreateICmpEQ(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx));
+                        *skip = *skip ? ctx.builder.CreateOr(*skip, skip1) : skip1;
+                    }
                 }
                 else {
                     // will actually need to box this element
@@ -1472,9 +1801,9 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
         // value) against all the types that are now explicitly
         // selected and select the appropriate one as our new tindex.
         if (v.Vboxed) {
-            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(T_int8, 0x80));
+            wasboxed = ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
             new_tindex = ctx.builder.CreateOr(wasboxed, new_tindex);
-            wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(T_int8, 0));
+            wasboxed = ctx.builder.CreateICmpNE(wasboxed, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
 
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
 
@@ -1482,11 +1811,13 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
             // actually need it.
             Value *union_box_dt = NULL;
             BasicBlock *union_isaBB = NULL;
+            BasicBlock *post_union_isaBB = NULL;
             auto maybe_setup_union_isa = [&]() {
                 if (!union_isaBB) {
-                    union_isaBB = BasicBlock::Create(jl_LLVMContext, "union_isa", ctx.f);
+                    union_isaBB = BasicBlock::Create(ctx.builder.getContext(), "union_isa", ctx.f);
                     ctx.builder.SetInsertPoint(union_isaBB);
-                    union_box_dt = emit_typeof(ctx, v.Vboxed);
+                    union_box_dt = emit_typeof_or_null(ctx, v.Vboxed);
+                    post_union_isaBB = ctx.builder.GetInsertBlock();
                 }
             };
 
@@ -1494,7 +1825,7 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
             // (0x80). We could use `v.Tindex`, here, since we know
             // it has to be 0x80, but it seems likely the backend
             // will like the explicit constant better.
-            Value *union_box_tindex = ConstantInt::get(T_int8, 0x80);
+            Value *union_box_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
             unsigned counter = 0;
             for_each_uniontype_small(
                 // for each new union-split value
@@ -1504,21 +1835,21 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                         // didn't handle this item before, select its new union index
                         maybe_setup_union_isa();
                         Value *cmp = ctx.builder.CreateICmpEQ(track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jt)), union_box_dt);
-                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(T_int8, 0x80 | idx), union_box_tindex);
+                        union_box_tindex = ctx.builder.CreateSelect(cmp, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | idx), union_box_tindex);
                     }
                 },
                 typ,
                 counter);
             if (union_box_dt) {
-                BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_union_isa", ctx.f);
+                BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_union_isa", ctx.f);
                 ctx.builder.CreateBr(postBB);
                 ctx.builder.SetInsertPoint(currBB);
-                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(T_int8, 0x80));
+                Value *wasunknown = ctx.builder.CreateICmpEQ(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
                 ctx.builder.CreateCondBr(wasunknown, union_isaBB, postBB);
                 ctx.builder.SetInsertPoint(postBB);
-                PHINode *tindex_phi = ctx.builder.CreatePHI(T_int8, 2);
+                PHINode *tindex_phi = ctx.builder.CreatePHI(getInt8Ty(ctx.builder.getContext()), 2);
                 tindex_phi->addIncoming(new_tindex, currBB);
-                tindex_phi->addIncoming(union_box_tindex, union_isaBB);
+                tindex_phi->addIncoming(union_box_tindex, post_union_isaBB);
                 new_tindex = tindex_phi;
             }
         }
@@ -1531,18 +1862,20 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 // to touch it at all. Otherwise we're either transitioning
                 // unboxed->boxed, or leaving an unboxed value in place.
                 Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(T_int8, 0x80)),
-                    ConstantInt::get(T_int8, 0));
+                    ctx.builder.CreateAnd(new_tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 boxv = ctx.builder.CreateSelect(
                     ctx.builder.CreateAnd(wasboxed, isboxed), v.Vboxed, boxv);
             }
+            Value *slotv;
+            MDNode *tbaa;
             if (v.V == NULL) {
                 // v.V might be NULL if it was all ghost objects before
-                return jl_cgval_t(boxv, NULL, false, typ, new_tindex);
-            } else {
+                slotv = NULL;
+                tbaa = ctx.tbaa().tbaa_const;
+            }
+            else {
                 Value *isboxv = ctx.builder.CreateIsNotNull(boxv);
-                Value *slotv;
-                MDNode *tbaa;
                 if (v.ispointer()) {
                     slotv = v.V;
                     tbaa = v.tbaa;
@@ -1550,84 +1883,90 @@ static jl_cgval_t convert_julia_type_union(jl_codectx_t &ctx, const jl_cgval_t &
                 else {
                     slotv = emit_static_alloca(ctx, v.V->getType());
                     ctx.builder.CreateStore(v.V, slotv);
-                    tbaa = tbaa_stack;
+                    tbaa = ctx.tbaa().tbaa_stack;
                 }
                 slotv = ctx.builder.CreateSelect(isboxv,
                             decay_derived(ctx, boxv),
                             decay_derived(ctx, emit_bitcast(ctx, slotv, boxv->getType())));
-                jl_cgval_t newv = jl_cgval_t(slotv, NULL, false, typ, new_tindex);
-                assert(boxv->getType() == T_prjlvalue);
-                newv.Vboxed = boxv;
-                newv.tbaa = tbaa;
-                return newv;
             }
+            jl_cgval_t newv = jl_cgval_t(slotv, NULL, false, typ, new_tindex, ctx.tbaa());
+            assert(boxv->getType() == ctx.types().T_prjlvalue);
+            newv.Vboxed = boxv;
+            newv.tbaa = tbaa;
+            return newv;
         }
     }
     else {
-        return jl_cgval_t(boxed(ctx, v), NULL, true, typ, NULL);
+        return jl_cgval_t(boxed(ctx, v), NULL, true, typ, NULL, ctx.tbaa());
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
 
 // given a value marked with type `v.typ`, compute the mapping and/or boxing to return a value of type `typ`
 // TODO: should this set TIndex when trivial (such as 0x80 or concrete types) ?
-static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ)
+static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_value_t *typ, Value **skip)
 {
     if (typ == (jl_value_t*)jl_typeofbottom_type)
-        return ghostValue(typ); // normalize TypeofBottom to Type{Union{}}
-    if (v.typ == typ || v.typ == jl_bottom_type || jl_egal(v.typ, typ))
+        return ghostValue(ctx, typ); // normalize TypeofBottom to Type{Union{}}
+    if (v.typ == jl_bottom_type || jl_egal(v.typ, typ))
         return v; // fast-path
     Type *T = julia_type_to_llvm(ctx, typ);
     if (type_is_ghost(T))
-        return ghostValue(typ);
+        return ghostValue(ctx, typ);
     Value *new_tindex = NULL;
     if (jl_is_concrete_type(typ)) {
+        assert(skip == nullptr && "skip only valid for union type return");
         if (v.TIndex && !jl_is_pointerfree(typ)) {
             // discovered that this union-split type must actually be isboxed
             if (v.Vboxed) {
-                return jl_cgval_t(v.Vboxed, nullptr, true, typ, NULL);
+                return jl_cgval_t(v.Vboxed, nullptr, true, typ, NULL, ctx.tbaa());
             }
             else {
                 // type mismatch: there weren't any boxed values in the union
                 CreateTrap(ctx.builder);
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
         if (jl_is_concrete_type(v.typ) && !jl_is_kind(v.typ)) {
             if (jl_is_concrete_type(typ) && !jl_is_kind(typ)) {
                 // type mismatch: changing from one leaftype to another
                 CreateTrap(ctx.builder);
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
     }
     else {
         bool makeboxed = false;
         if (v.TIndex) {
-            return convert_julia_type_union(ctx, v, typ);
+            return convert_julia_type_union(ctx, v, typ, skip);
         }
         else if (!v.isboxed && jl_is_uniontype(typ)) {
             // previous value was unboxed (leaftype), statically compute union tindex
             assert(jl_is_concrete_type(v.typ));
             unsigned new_idx = get_box_tindex((jl_datatype_t*)v.typ, typ);
             if (new_idx) {
-                new_tindex = ConstantInt::get(T_int8, new_idx);
+                new_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), new_idx);
                 if (v.V && !v.ispointer()) {
                     // TODO: remove this branch once all consumers of v.TIndex understand how to handle a non-ispointer value
                     Value *slotv = emit_static_alloca(ctx, v.V->getType());
                     ctx.builder.CreateStore(v.V, slotv);
-                    jl_cgval_t newv = jl_cgval_t(slotv, NULL, false, typ, new_tindex);
-                    newv.tbaa = tbaa_stack;
+                    jl_cgval_t newv = jl_cgval_t(slotv, NULL, false, typ, new_tindex, ctx.tbaa());
+                    newv.tbaa = ctx.tbaa().tbaa_stack;
                     return newv;
                 }
             }
             else if (jl_subtype(v.typ, typ)) {
                 makeboxed = true;
             }
+            else if (skip) {
+                // undef
+                *skip = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
+                return jl_cgval_t(ctx.builder.getContext());
+            }
             else {
                 // unreachable
                 CreateTrap(ctx.builder);
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
         else if (!v.isboxed) {
@@ -1635,14 +1974,18 @@ static jl_cgval_t convert_julia_type(jl_codectx_t &ctx, const jl_cgval_t &v, jl_
         }
         if (makeboxed) {
             // convert to a simple isboxed value
-            return jl_cgval_t(boxed(ctx, v), NULL, true, typ, NULL);
+            return jl_cgval_t(boxed(ctx, v), NULL, true, typ, NULL, ctx.tbaa());
         }
     }
     return jl_cgval_t(v, typ, new_tindex);
 }
 
-static void jl_setup_module(Module *m, const jl_cgparams_t *params = &jl_default_cgparams)
+orc::ThreadSafeModule jl_create_llvm_module(StringRef name, orc::ThreadSafeContext context, bool imaging_mode, const DataLayout &DL, const Triple &triple)
 {
+    ++ModulesCreated;
+    auto lock = context.getLock();
+    Module *m = new Module(name, *context.getContext());
+    orc::ThreadSafeModule TSM(std::unique_ptr<Module>(m), std::move(context));
     // Some linkers (*cough* OS X) don't understand DWARF v4, so we use v2 in
     // imaging mode. The structure of v4 is slightly nicer for debugging JIT
     // code.
@@ -1655,17 +1998,21 @@ static void jl_setup_module(Module *m, const jl_cgparams_t *params = &jl_default
         m->addModuleFlag(llvm::Module::Warning, "Dwarf Version", dwarf_version);
     }
     if (!m->getModuleFlag("Debug Info Version"))
-        m->addModuleFlag(llvm::Module::Error, "Debug Info Version",
+        m->addModuleFlag(llvm::Module::Warning, "Debug Info Version",
             llvm::DEBUG_METADATA_VERSION);
-    m->setDataLayout(jl_data_layout);
-    m->setTargetTriple(jl_TargetMachine->getTargetTriple().str());
-}
+    m->setDataLayout(DL);
+    m->setTargetTriple(triple.str());
 
-Module *jl_create_llvm_module(StringRef name)
-{
-    Module *M = new Module(name, jl_LLVMContext);
-    jl_setup_module(M);
-    return M;
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) && JL_LLVM_VERSION >= 130000
+    // tell Win32 to assume the stack is always 16-byte aligned,
+    // and to ensure that it is 16-byte aligned for out-going calls,
+    // to ensure compatibility with GCC codes
+    m->setOverrideStackAlignment(16);
+#endif
+#if defined(JL_DEBUG_BUILD) && JL_LLVM_VERSION >= 130000
+    m->setStackProtectorGuard("global");
+#endif
+    return TSM;
 }
 
 static void jl_init_function(Function *F)
@@ -1676,7 +2023,11 @@ static void jl_init_function(Function *F)
     // upon entry to any function. This achieves compatibility
     // with both MinGW-GCC (which assumes an 16-byte-aligned stack) and
     // i686 Windows (which uses a 4-byte-aligned stack)
+#if JL_LLVM_VERSION >= 140000
+    AttrBuilder attr(F->getContext());
+#else
     AttrBuilder attr;
+#endif
     attr.addStackAlignmentAttr(16);
     F->addAttributes(AttributeList::FunctionIndex, attr);
 #endif
@@ -1684,11 +2035,15 @@ static void jl_init_function(Function *F)
     F->setHasUWTable(); // force NeedsWinEH
 #endif
 #ifdef JL_DISABLE_FPO
-#if LLVM_VERSION_MAJOR >= 8
     F->addFnAttr("frame-pointer", "all");
-#else
-    F->addFnAttr("no-frame-pointer-elim", "true");
 #endif
+#if !defined(_COMPILER_ASAN_ENABLED_) && !defined(_OS_WINDOWS_)
+    // ASAN won't like us accessing undefined memory causing spurious issues,
+    // and Windows has platform-specific handling which causes it to mishandle
+    // this annotation. Other platforms should just ignore this if they don't
+    // implement it.
+    F->addFnAttr("probe-stack", "inline-asm");
+    //F->addFnAttr("stack-probe-size", 4096); // can use this to change the default
 #endif
 }
 
@@ -1719,13 +2074,13 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
     if (jl_nparams(sig) == 0)
         return std::make_pair(false, false);
     if (va) {
-        if (jl_is_vararg_type(jl_tparam(sig, jl_nparams(sig) - 1)))
+        if (jl_is_vararg(jl_tparam(sig, jl_nparams(sig) - 1)))
             return std::make_pair(false, false);
     }
     // not invalid, consider if specialized signature is worthwhile
     if (prefer_specsig)
         return std::make_pair(true, false);
-    if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype))
+    if (!deserves_retbox(rettype) && !jl_is_datatype_singleton((jl_datatype_t*)rettype) && rettype != (jl_value_t*)jl_bool_type)
         return std::make_pair(true, false);
     if (jl_is_uniontype(rettype)) {
         bool allunbox;
@@ -1734,6 +2089,8 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
         if (nbytes > 0)
             return std::make_pair(true, false); // some elements of the union could be returned unboxed avoiding allocation
     }
+    if (jl_nparams(sig) <= 3) // few parameters == more efficient to pass directly
+        return std::make_pair(true, false);
     bool allSingleton = true;
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *sigt = jl_tparam(sig, i);
@@ -1751,32 +2108,16 @@ static std::pair<bool, bool> uses_specsig(jl_method_instance_t *lam, jl_value_t
 
 // Logging for code coverage and memory allocation
 
-const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
-typedef uint64_t logdata_block[logdata_blocksize];
-typedef StringMap< std::vector<logdata_block*> > logdata_t;
+JL_DLLEXPORT void jl_coverage_alloc_line(StringRef filename, int line);
+JL_DLLEXPORT uint64_t *jl_coverage_data_pointer(StringRef filename, int line);
+JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line);
 
-static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+static void visitLine(jl_codectx_t &ctx, uint64_t *ptr, Value *addend, const char *name)
 {
-    unsigned block = line / logdata_blocksize;
-    line = line % logdata_blocksize;
-    if (vec.size() <= block)
-        vec.resize(block + 1);
-    if (vec[block] == NULL) {
-        vec[block] = (logdata_block*)calloc(1, sizeof(logdata_block));
-    }
-    logdata_block &data = *vec[block];
-    if (data[line] == 0)
-        data[line] = 1;
-    return &data[line];
-}
-
-static void visitLine(jl_codectx_t &ctx, std::vector<logdata_block*> &vec, int line, Value *addend, const char* name)
-{
-    uint64_t *ptr = allocLine(vec, line);
     Value *pv = ConstantExpr::getIntToPtr(
-        ConstantInt::get(T_size, (uintptr_t)ptr),
-        T_pint64);
-    Value *v = ctx.builder.CreateLoad(pv, true, name);
+        ConstantInt::get(getSizeTy(ctx.builder.getContext()), (uintptr_t)ptr),
+        getInt64PtrTy(ctx.builder.getContext()));
+    Value *v = ctx.builder.CreateLoad(getInt64Ty(ctx.builder.getContext()), pv, true, name);
     v = ctx.builder.CreateAdd(v, addend);
     ctx.builder.CreateStore(v, pv, true); // volatile, not atomic, so this might be an underestimate,
                                           // but it's faster this way
@@ -1784,173 +2125,25 @@ static void visitLine(jl_codectx_t &ctx, std::vector<logdata_block*> &vec, int l
 
 // Code coverage
 
-static logdata_t coverageData;
-
 static void coverageVisitLine(jl_codectx_t &ctx, StringRef filename, int line)
 {
-    assert(!imaging_mode);
-    if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
-        return;
-    visitLine(ctx, coverageData[filename], line, ConstantInt::get(T_int64, 1), "lcnt");
-}
-
-static void coverageAllocLine(StringRef filename, int line)
-{
-    assert(!imaging_mode);
+    assert(!ctx.emission_context.imaging);
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
-    allocLine(coverageData[filename], line);
+    visitLine(ctx, jl_coverage_data_pointer(filename, line), ConstantInt::get(getInt64Ty(ctx.builder.getContext()), 1), "lcnt");
 }
 
 // Memory allocation log (malloc_log)
 
-static logdata_t mallocData;
-
 static void mallocVisitLine(jl_codectx_t &ctx, StringRef filename, int line, Value *sync)
 {
-    assert(!imaging_mode);
+    assert(!ctx.emission_context.imaging);
     if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
         return;
     Value *addend = sync
         ? ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync})
         : ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
-    visitLine(ctx, mallocData[filename], line, addend, "bytecnt");
-}
-
-// Resets the malloc counts.
-extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
-{
-    logdata_t::iterator it = mallocData.begin();
-    for (; it != mallocData.end(); it++) {
-        std::vector<logdata_block*> &bytes = (*it).second;
-        std::vector<logdata_block*>::iterator itb;
-        for (itb = bytes.begin(); itb != bytes.end(); itb++) {
-            if (*itb) {
-                logdata_block &data = **itb;
-                for (int i = 0; i < logdata_blocksize; i++) {
-                    if (data[i] > 0)
-                        data[i] = 1;
-                }
-            }
-        }
-    }
-    jl_gc_sync_total_bytes(0);
-}
-
-static void write_log_data(logdata_t &logData, const char *extension)
-{
-    std::string base = std::string(jl_options.julia_bindir);
-    base = base + "/../share/julia/base/";
-    logdata_t::iterator it = logData.begin();
-    for (; it != logData.end(); it++) {
-        std::string filename(it->first());
-        std::vector<logdata_block*> &values = it->second;
-        if (!values.empty()) {
-            if (!isabspath(filename.c_str()))
-                filename = base + filename;
-            std::ifstream inf(filename.c_str());
-            if (!inf.is_open())
-                continue;
-            std::string outfile = filename + extension;
-            std::ofstream outf(outfile.c_str(), std::ofstream::trunc | std::ofstream::out | std::ofstream::binary);
-            if (outf.is_open()) {
-                inf.exceptions(std::ifstream::badbit);
-                outf.exceptions(std::ifstream::failbit | std::ifstream::badbit);
-                char line[1024];
-                int l = 1;
-                unsigned block = 0;
-                while (!inf.eof()) {
-                    inf.getline(line, sizeof(line));
-                    if (inf.fail()) {
-                        if (inf.eof())
-                            break; // no content on trailing line
-                        // Read through lines longer than sizeof(line)
-                        inf.clear();
-                        inf.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
-                    }
-                    logdata_block *data = NULL;
-                    if (block < values.size()) {
-                        data = values[block];
-                    }
-                    uint64_t value = data ? (*data)[l] : 0;
-                    if (++l >= logdata_blocksize) {
-                        l = 0;
-                        block++;
-                    }
-                    outf.width(9);
-                    if (value == 0)
-                        outf << '-';
-                    else
-                        outf << (value - 1);
-                    outf.width(0);
-                    outf << " " << line << '\n';
-                }
-                outf.close();
-            }
-            inf.close();
-        }
-    }
-}
-
-static void write_lcov_data(logdata_t &logData, const std::string &outfile)
-{
-    std::ofstream outf(outfile.c_str(), std::ofstream::ate | std::ofstream::out | std::ofstream::binary);
-    //std::string base = std::string(jl_options.julia_bindir);
-    //base = base + "/../share/julia/base/";
-    logdata_t::iterator it = logData.begin();
-    for (; it != logData.end(); it++) {
-        StringRef filename = it->first();
-        const std::vector<logdata_block*> &values = it->second;
-        if (!values.empty()) {
-            outf << "SF:" << filename.str() << '\n';
-            size_t n_covered = 0;
-            size_t n_instrumented = 0;
-            size_t lno = 0;
-            for (auto &itv : values) {
-                if (itv) {
-                    logdata_block &data = *itv;
-                    for (int i = 0; i < logdata_blocksize; i++) {
-                        auto cov = data[i];
-                        if (cov > 0) {
-                            n_instrumented++;
-                            if (cov > 1)
-                                n_covered++;
-                            outf << "DA:" << lno << ',' << (cov - 1) << '\n';
-                        }
-                        lno++;
-                    }
-                }
-                else {
-                    lno += logdata_blocksize;
-                }
-            }
-            outf << "LH:" << n_covered << '\n';
-            outf << "LF:" << n_instrumented << '\n';
-            outf << "end_of_record\n";
-        }
-    }
-    outf.close();
-}
-
-extern "C" void jl_write_coverage_data(const char *output)
-{
-    if (output) {
-        StringRef output_pattern(output);
-        if (output_pattern.endswith(".info"))
-            write_lcov_data(coverageData, jl_format_filename(output_pattern));
-    }
-    else {
-        std::string stm;
-        raw_string_ostream(stm) << "." << jl_getpid() << ".cov";
-        write_log_data(coverageData, stm.c_str());
-    }
-}
-
-extern "C" void jl_write_malloc_log(void)
-{
-    std::string stm;
-    raw_string_ostream(stm) << "." << jl_getpid() << ".mem";
-    write_log_data(mallocData, stm.c_str());
+    visitLine(ctx, jl_malloc_data_pointer(filename, line), addend, "bytecnt");
 }
 
 // --- constant determination ---
@@ -1971,6 +2164,7 @@ static void cg_bdw(jl_codectx_t &ctx, jl_binding_t *b)
 
 static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args, size_t nargs)
 {
+    assert(nargs > 1);
     jl_value_t **v = (jl_value_t**)alloca(sizeof(jl_value_t*) * nargs);
     for (size_t i = 0; i < nargs; i++) {
         if (!args[i].constant)
@@ -1978,9 +2172,9 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
         v[i] = args[i].constant;
     }
     assert(v[0] == jl_builtin_apply_type);
-    size_t last_age = jl_get_ptls_states()->world_age;
+    size_t last_age = jl_current_task->world_age;
     // call apply_type, but ignore errors. we know that will work in world 1.
-    jl_get_ptls_states()->world_age = 1;
+    jl_current_task->world_age = 1;
     jl_value_t *result;
     JL_TRY {
         result = jl_apply(v, nargs);
@@ -1988,7 +2182,7 @@ static jl_value_t *static_apply_type(jl_codectx_t &ctx, const jl_cgval_t *args,
     JL_CATCH {
         result = NULL;
     }
-    jl_get_ptls_states()->world_age = last_age;
+    jl_current_task->world_age = last_age;
     return result;
 }
 
@@ -2030,10 +2224,10 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
     }
     if (jl_is_expr(ex)) {
         jl_expr_t *e = (jl_expr_t*)ex;
-        if (e->head == call_sym) {
+        if (e->head == jl_call_sym) {
             jl_value_t *f = static_eval(ctx, jl_exprarg(e, 0));
             if (f) {
-                if (jl_array_dim0(e->args) == 3 && f == jl_builtin_getfield) {
+                if (jl_array_dim0(e->args) == 3 && (f == jl_builtin_getfield || f == jl_builtin_getglobal)) {
                     m = (jl_module_t*)static_eval(ctx, jl_exprarg(e, 1));
                     // Check the tag before evaluating `s` so that a value of random
                     // type won't be corrupted.
@@ -2064,9 +2258,9 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                             return NULL;
                         }
                     }
-                    size_t last_age = jl_get_ptls_states()->world_age;
+                    size_t last_age = jl_current_task->world_age;
                     // here we know we're calling specific builtin functions that work in world 1.
-                    jl_get_ptls_states()->world_age = 1;
+                    jl_current_task->world_age = 1;
                     jl_value_t *result;
                     JL_TRY {
                         result = jl_apply(v, n+1);
@@ -2074,13 +2268,13 @@ static jl_value_t *static_eval(jl_codectx_t &ctx, jl_value_t *ex)
                     JL_CATCH {
                         result = NULL;
                     }
-                    jl_get_ptls_states()->world_age = last_age;
+                    jl_current_task->world_age = last_age;
                     JL_GC_POP();
                     return result;
                 }
             }
         }
-        else if (e->head == static_parameter_sym) {
+        else if (e->head == jl_static_parameter_sym) {
             size_t idx = jl_unbox_long(jl_exprarg(e, 0));
             if (idx <= jl_svec_len(ctx.linfo->sparam_vals)) {
                 jl_value_t *e = jl_svecref(ctx.linfo->sparam_vals, idx - 1);
@@ -2137,7 +2331,7 @@ static std::set<int> assigned_in_try(jl_array_t *stmts, int s, long l)
     for(int i=s; i <= l; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts,i);
         if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == assign_sym) {
+            if (((jl_expr_t*)st)->head == jl_assign_sym) {
                 jl_value_t *ar = jl_exprarg(st, 0);
                 if (jl_is_slot(ar)) {
                     av.insert(jl_slot_number(ar)-1);
@@ -2154,7 +2348,7 @@ static void mark_volatile_vars(jl_array_t *stmts, std::vector<jl_varinfo_t> &slo
     for (int i = 0; i < (int)slength; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
         if (jl_is_expr(st)) {
-            if (((jl_expr_t*)st)->head == enter_sym) {
+            if (((jl_expr_t*)st)->head == jl_enter_sym) {
                 int last = jl_unbox_long(jl_exprarg(st, 0));
                 std::set<int> as = assigned_in_try(stmts, i + 1, last);
                 for (int j = 0; j < (int)slength; j++) {
@@ -2185,14 +2379,14 @@ static void simple_use_analysis(jl_codectx_t &ctx, jl_value_t *expr)
     }
     else if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        if (e->head == method_sym) {
+        if (e->head == jl_method_sym) {
             simple_use_analysis(ctx, jl_exprarg(e, 0));
             if (jl_expr_nargs(e) > 1) {
                 simple_use_analysis(ctx, jl_exprarg(e, 1));
                 simple_use_analysis(ctx, jl_exprarg(e, 2));
             }
         }
-        else if (e->head == assign_sym) {
+        else if (e->head == jl_assign_sym) {
             // don't consider assignment LHS as a variable "use"
             simple_use_analysis(ctx, jl_exprarg(e, 1));
         }
@@ -2267,138 +2461,94 @@ static void jl_add_method_root(jl_codectx_t &ctx, jl_value_t *val)
 
 // --- generating function calls ---
 
-static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name)
+static jl_cgval_t emit_globalref(jl_codectx_t &ctx, jl_module_t *mod, jl_sym_t *name, AtomicOrdering order)
 {
     jl_binding_t *bnd = NULL;
     Value *bp = global_binding_pointer(ctx, mod, name, &bnd, false);
+    if (bp == NULL)
+        return jl_cgval_t(ctx.builder.getContext());
+    bp = julia_binding_pvalue(ctx, bp);
     if (bnd && bnd->value != NULL) {
         if (bnd->constp) {
-            return mark_julia_const(bnd->value);
+            return mark_julia_const(ctx, bnd->value);
         }
-        LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, Align(sizeof(void*)));
-        v->setOrdering(AtomicOrdering::Unordered);
-        tbaa_decorate(tbaa_binding, v);
-        return mark_julia_type(ctx, v, true, (jl_value_t*)jl_any_type);
+        LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
+        v->setOrdering(order);
+        tbaa_decorate(ctx.tbaa().tbaa_binding, v);
+        return mark_julia_type(ctx, v, true, bnd->ty);
     }
     // todo: use type info to avoid undef check
-    return emit_checked_var(ctx, bp, name, false, tbaa_binding);
+    return emit_checked_var(ctx, bp, name, false, ctx.tbaa().tbaa_binding);
 }
 
-static jl_cgval_t emit_getfield(jl_codectx_t &ctx, const jl_cgval_t &strct, jl_sym_t *name)
+static void emit_globalset(jl_codectx_t &ctx, jl_binding_t *bnd, Value *bp, const jl_cgval_t &rval_info, AtomicOrdering Order)
 {
-    if (strct.constant && jl_is_module(strct.constant))
-        return emit_globalref(ctx, (jl_module_t*)strct.constant, name);
-
-    jl_datatype_t *sty = (jl_datatype_t*)strct.typ;
-    if (jl_is_type_type((jl_value_t*)sty) && jl_is_concrete_type(jl_tparam0(sty)))
-        sty = (jl_datatype_t*)jl_typeof(jl_tparam0(sty));
-    sty = (jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)sty);
-    if (jl_is_structtype(sty) && sty != jl_module_type && sty->layout) {
-        unsigned idx = jl_field_index(sty, name, 0);
-        if (idx != (unsigned)-1) {
-            return emit_getfield_knownidx(ctx, strct, idx, sty);
-        }
+    Value *rval = boxed(ctx, rval_info);
+    if (bnd && !bnd->constp && bnd->ty && jl_subtype(rval_info.typ, bnd->ty)) {
+        StoreInst *v = ctx.builder.CreateAlignedStore(rval, julia_binding_pvalue(ctx, bp), Align(sizeof(void*)));
+        v->setOrdering(Order);
+        tbaa_decorate(ctx.tbaa().tbaa_binding, v);
+        emit_write_barrier_binding(ctx, bp, rval);
+    }
+    else {
+        ctx.builder.CreateCall(prepare_call(jlcheckassign_func), { bp, mark_callee_rooted(ctx, rval) });
     }
-    // TODO: attempt better codegen for approximate types, if the types
-    // and offsets of some fields are independent of parameters.
-
-    // TODO: generic getfield func with more efficient calling convention
-    jl_cgval_t myargs_array[2] = {
-        strct,
-        mark_julia_const((jl_value_t*)name)
-    };
-    Value *result = emit_jlcall(ctx, jlgetfield_func, V_rnull, myargs_array, 2, JLCALL_F_CC);
-    return mark_julia_type(ctx, result, true, jl_any_type);
-}
-
-template<typename Func>
-static Value *emit_guarded_test(jl_codectx_t &ctx, Value *ifnot, bool defval, Func &&func)
-{
-    BasicBlock *currBB = ctx.builder.GetInsertBlock();
-    BasicBlock *passBB = BasicBlock::Create(jl_LLVMContext, "guard_pass", ctx.f);
-    BasicBlock *exitBB = BasicBlock::Create(jl_LLVMContext, "guard_exit", ctx.f);
-    ctx.builder.CreateCondBr(ifnot, passBB, exitBB);
-    ctx.builder.SetInsertPoint(passBB);
-    auto res = func();
-    passBB = ctx.builder.GetInsertBlock();
-    ctx.builder.CreateBr(exitBB);
-    ctx.builder.SetInsertPoint(exitBB);
-    PHINode *phi = ctx.builder.CreatePHI(T_int1, 2);
-    phi->addIncoming(ConstantInt::get(T_int1, defval), currBB);
-    phi->addIncoming(res, passBB);
-    return phi;
-}
-
-template<typename Func>
-static Value *emit_nullcheck_guard(jl_codectx_t &ctx, Value *nullcheck, Func &&func)
-{
-    if (!nullcheck)
-        return func();
-    return emit_guarded_test(ctx, null_pointer_cmp(ctx, nullcheck), false, func);
-}
-
-template<typename Func>
-static Value *emit_nullcheck_guard2(jl_codectx_t &ctx, Value *nullcheck1,
-                                    Value *nullcheck2, Func &&func)
-{
-    if (!nullcheck1)
-        return emit_nullcheck_guard(ctx, nullcheck2, func);
-    if (!nullcheck2)
-        return emit_nullcheck_guard(ctx, nullcheck1, func);
-    nullcheck1 = null_pointer_cmp(ctx, nullcheck1);
-    nullcheck2 = null_pointer_cmp(ctx, nullcheck2);
-    // If both are NULL, return true.
-    return emit_guarded_test(ctx, ctx.builder.CreateOr(nullcheck1, nullcheck2), true, [&] {
-        return emit_guarded_test(ctx, ctx.builder.CreateAnd(nullcheck1, nullcheck2),
-                                 false, func);
-    });
 }
 
 static Value *emit_box_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                                Value *nullcheck1, Value *nullcheck2)
 {
+    ++EmittedBoxCompares;
     if (jl_pointer_egal(arg1.typ) || jl_pointer_egal(arg2.typ)) {
-        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : arg1.V;
-        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : arg2.V;
-        assert(varg1 && varg2 && (arg1.isboxed || arg1.TIndex) && (arg2.isboxed || arg2.TIndex) &&
-                "Only boxed types are valid for pointer comparison.");
-        varg1 = maybe_decay_tracked(ctx, varg1);
-        varg2 = maybe_decay_tracked(ctx, varg2);
-        if (cast<PointerType>(varg1->getType())->getAddressSpace() != cast<PointerType>(varg2->getType())->getAddressSpace()) {
-            varg1 = decay_derived(ctx, varg1);
-            varg2 = decay_derived(ctx, varg2);
-        }
-        return ctx.builder.CreateICmpEQ(emit_bitcast(ctx, varg1, T_pint8),
-                                        emit_bitcast(ctx, varg2, T_pint8));
+        // if we can be certain we won't try to load from the pointer (because
+        // we know boxed is trivial), we can skip the separate null checks
+        // and just do the ICmpEQ test
+        if (!arg1.TIndex && !arg2.TIndex)
+            nullcheck1 = nullcheck2 = nullptr;
     }
-
     return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] {
-        Value *varg1 = mark_callee_rooted(ctx, boxed(ctx, arg1));
-        Value *varg2 = mark_callee_rooted(ctx, boxed(ctx, arg2));
-        return ctx.builder.CreateTrunc(ctx.builder.CreateCall(prepare_call(jlegal_func),
-                                                              {varg1, varg2}), T_int1);
+        Value *varg1 = decay_derived(ctx, boxed(ctx, arg1));
+        Value *varg2 = decay_derived(ctx, boxed(ctx, arg2));
+        if (jl_pointer_egal(arg1.typ) || jl_pointer_egal(arg2.typ)) {
+            return ctx.builder.CreateICmpEQ(varg1, varg2);
+        }
+        Value *neq = ctx.builder.CreateICmpNE(varg1, varg2);
+        return emit_guarded_test(ctx, neq, true, [&] {
+            Value *dtarg = emit_typeof_boxed(ctx, arg1);
+            Value *dt_eq = ctx.builder.CreateICmpEQ(dtarg, emit_typeof_boxed(ctx, arg2));
+            return emit_guarded_test(ctx, dt_eq, false, [&] {
+                return ctx.builder.CreateTrunc(ctx.builder.CreateCall(prepare_call(jlegalx_func),
+                                                                      {varg1, varg2, dtarg}), getInt1Ty(ctx.builder.getContext()));
+            });
+        });
     });
 }
 
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2);
-static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
-                        Value *nullcheck1 = nullptr, Value *nullcheck2 = nullptr);
 
 static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2)
 {
+    ++EmittedBitsUnionCompares;
     assert(jl_egal(arg1.typ, arg2.typ) && arg1.TIndex && arg2.TIndex && jl_is_uniontype(arg1.typ) && "unimplemented");
     Value *tindex = arg1.TIndex;
-    BasicBlock *defaultBB = BasicBlock::Create(jl_LLVMContext, "unionbits_is_boxed", ctx.f);
+    tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+    Value *tindex2 = arg2.TIndex;
+    tindex2 = ctx.builder.CreateAnd(tindex2, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
+    Value *typeeq = ctx.builder.CreateICmpEQ(tindex, tindex2);
+    tindex = ctx.builder.CreateSelect(typeeq, tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x00));
+    BasicBlock *defaultBB = BasicBlock::Create(ctx.builder.getContext(), "unionbits_is_boxed", ctx.f);
     SwitchInst *switchInst = ctx.builder.CreateSwitch(tindex, defaultBB);
-    BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_unionbits_is", ctx.f);
+    BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_unionbits_is", ctx.f);
     ctx.builder.SetInsertPoint(postBB);
-    PHINode *phi = ctx.builder.CreatePHI(T_int1, 2);
+    PHINode *phi = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+    switchInst->addCase(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), postBB);
+    phi->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), switchInst->getParent());
     unsigned counter = 0;
     bool allunboxed = for_each_uniontype_small(
         [&](unsigned idx, jl_datatype_t *jt) {
-            BasicBlock *tempBB = BasicBlock::Create(jl_LLVMContext, "unionbits_is", ctx.f);
+            BasicBlock *tempBB = BasicBlock::Create(ctx.builder.getContext(), "unionbits_is", ctx.f);
             ctx.builder.SetInsertPoint(tempBB);
-            switchInst->addCase(ConstantInt::get(T_int8, idx), tempBB);
+            switchInst->addCase(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), idx), tempBB);
             jl_cgval_t sel_arg1(arg1, (jl_value_t*)jt, NULL);
             jl_cgval_t sel_arg2(arg2, (jl_value_t*)jt, NULL);
             Value *cmp = emit_bits_compare(ctx, sel_arg1, sel_arg2);
@@ -2416,17 +2566,18 @@ static Value *emit_bitsunion_compare(jl_codectx_t &ctx, const jl_cgval_t &arg1,
     ctx.builder.CreateCall(trap_func);
     ctx.builder.CreateUnreachable();
     ctx.builder.SetInsertPoint(postBB);
-    return ctx.builder.CreateAnd(phi, ctx.builder.CreateICmpEQ(arg1.TIndex, arg2.TIndex));
+    return phi;
 }
 
 static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t arg2)
 {
+    ++EmittedBitsCompares;
     bool isboxed;
     Type *at = julia_type_to_llvm(ctx, arg1.typ, &isboxed);
     assert(jl_is_datatype(arg1.typ) && arg1.typ == arg2.typ && !isboxed);
 
     if (type_is_ghost(at))
-        return ConstantInt::get(T_int1, 1);
+        return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
 
     if (at->isIntegerTy() || at->isPointerTy() || at->isFloatingPointTy()) {
         Type *at_int = INTT(at);
@@ -2437,14 +2588,14 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
 
     if (at->isVectorTy()) {
         jl_svec_t *types = ((jl_datatype_t*)arg1.typ)->types;
-        Value *answer = ConstantInt::get(T_int1, 1);
+        Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
         Value *varg1 = emit_unbox(ctx, at, arg1, arg1.typ);
         Value *varg2 = emit_unbox(ctx, at, arg2, arg2.typ);
         for (size_t i = 0, l = jl_svec_len(types); i < l; i++) {
             jl_value_t *fldty = jl_svecref(types, i);
             Value *subAns, *fld1, *fld2;
-            fld1 = ctx.builder.CreateExtractElement(varg1, ConstantInt::get(T_int32, i)),
-            fld2 = ctx.builder.CreateExtractElement(varg2, ConstantInt::get(T_int32, i)),
+            fld1 = ctx.builder.CreateExtractElement(varg1, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), i)),
+            fld2 = ctx.builder.CreateExtractElement(varg2, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), i)),
             subAns = emit_bits_compare(ctx,
                     mark_julia_type(ctx, fld1, false, fldty),
                     mark_julia_type(ctx, fld2, false, fldty));
@@ -2471,9 +2622,9 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
                 nroots++;
             OperandBundleDef OpBundle("jl_roots", makeArrayRef(gc_uses, nroots));
             auto answer = ctx.builder.CreateCall(prepare_call(memcmp_func), {
-                        ctx.builder.CreateBitCast(varg1, T_pint8),
-                        ctx.builder.CreateBitCast(varg2, T_pint8),
-                        ConstantInt::get(T_size, sz) },
+                        ctx.builder.CreateBitCast(varg1, getInt8PtrTy(ctx.builder.getContext())),
+                        ctx.builder.CreateBitCast(varg2, getInt8PtrTy(ctx.builder.getContext())),
+                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), sz) },
                     ArrayRef<OperandBundleDef>(&OpBundle, nroots ? 1 : 0));
             MDNode *tbaa = nullptr;
             if (!arg1.tbaa) {
@@ -2487,21 +2638,29 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
             }
             if (tbaa)
                 tbaa_decorate(tbaa, answer);
-            return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(T_int32, 0));
+            return ctx.builder.CreateICmpEQ(answer, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
         else {
             jl_svec_t *types = sty->types;
-            Value *answer = ConstantInt::get(T_int1, 1);
+            Value *answer = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1);
             for (size_t i = 0, l = jl_svec_len(types); i < l; i++) {
                 jl_value_t *fldty = jl_svecref(types, i);
                 if (type_is_ghost(julia_type_to_llvm(ctx, fldty)))
                     continue;
                 Value *nullcheck1 = nullptr;
                 Value *nullcheck2 = nullptr;
-                auto fld1 = emit_getfield_knownidx(ctx, arg1, i, sty, &nullcheck1);
-                auto fld2 = emit_getfield_knownidx(ctx, arg2, i, sty, &nullcheck2);
-                answer = ctx.builder.CreateAnd(answer, emit_f_is(ctx, fld1, fld2,
-                                                                 nullcheck1, nullcheck2));
+                auto fld1 = emit_getfield_knownidx(ctx, arg1, i, sty, jl_memory_order_notatomic, &nullcheck1);
+                auto fld2 = emit_getfield_knownidx(ctx, arg2, i, sty, jl_memory_order_notatomic, &nullcheck2);
+                Value *fld_answer;
+                if (jl_field_isptr(sty, i) && jl_is_concrete_immutable(fldty)) {
+                    // concrete immutables that are !isinlinealloc might be reference cycles
+                    // issue #37872
+                    fld_answer = emit_box_compare(ctx, fld1, fld2, nullcheck1, nullcheck2);
+                }
+                else {
+                    fld_answer = emit_f_is(ctx, fld1, fld2, nullcheck1, nullcheck2);
+                }
+                answer = ctx.builder.CreateAnd(answer, fld_answer);
             }
             return answer;
         }
@@ -2515,18 +2674,20 @@ static Value *emit_bits_compare(jl_codectx_t &ctx, jl_cgval_t arg1, jl_cgval_t a
 // representing the undef-ness of `arg1` and `arg2`.
 // This can only happen when comparing two fields of the same time and the result should be
 // true if both are NULL
+// Like the runtime counterpart, this is codegen guaranteed to be non-allocating and to exclude safepoints
 static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgval_t &arg2,
                         Value *nullcheck1, Value *nullcheck2)
 {
+    ++EmittedEgals;
     // handle simple static expressions with no side-effects
     if (arg1.constant && arg2.constant)
-        return ConstantInt::get(T_int1, jl_egal(arg1.constant, arg2.constant));
+        return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), jl_egal(arg1.constant, arg2.constant));
 
     jl_value_t *rt1 = arg1.typ;
     jl_value_t *rt2 = arg2.typ;
     if (jl_is_concrete_type(rt1) && jl_is_concrete_type(rt2) && !jl_is_kind(rt1) && !jl_is_kind(rt2) && rt1 != rt2) {
         // disjoint concrete leaf types are never equal (quick test)
-        return ConstantInt::get(T_int1, 0);
+        return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
     }
 
     if (arg1.isghost || arg2.isghost || arg1.constant == jl_bottom_type ||
@@ -2535,47 +2696,48 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
         // since it is normalized to `::Type{Union{}}` instead...
         if (arg1.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck1, [&] {
-                return emit_isa(ctx, arg1, rt2, NULL).first; // rt2 is a singleton type
+                return emit_exactly_isa(ctx, arg1, rt2); // rt2 is a singleton type
             });
         if (arg2.TIndex)
             return emit_nullcheck_guard(ctx, nullcheck2, [&] {
-                return emit_isa(ctx, arg2, rt1, NULL).first; // rt1 is a singleton type
+                return emit_exactly_isa(ctx, arg2, rt1); // rt1 is a singleton type
             });
+        if (!(arg1.isboxed || arg1.constant) || !(arg2.isboxed || arg2.constant))
+            // not TIndex && not boxed implies it is an unboxed value of a different type from this singleton
+            // (which was probably caught above, but just to be safe, we repeat it here explicitly)
+            return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
+        Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
+        Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
         // rooting these values isn't needed since we won't load this pointer
         // and we know at least one of them is a unique Singleton
         // which is already enough to ensure pointer uniqueness for this test
         // even if the other pointer managed to get garbage collected
-        return ctx.builder.CreateICmpEQ(
-            mark_callee_rooted(ctx, boxed(ctx, arg1)),
-            mark_callee_rooted(ctx, boxed(ctx, arg2)));
+        // TODO: use emit_pointer_from_objref instead, per comment above
+        return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
     }
 
     if (jl_type_intersection(rt1, rt2) == (jl_value_t*)jl_bottom_type) // types are disjoint (exhaustive test)
-        return ConstantInt::get(T_int1, 0);
-
-    // If both sides are boxed or can be trivially boxed,
-    // we'll prefer to do a pointer check.
-    // At this point, we know that at least one of the arguments isn't a constant
-    // so a runtime content check will involve at least one load from the
-    // pointer (and likely a type check)
-    // so a pointer comparison should be no worse than that even in imaging mode
-    // when the constant pointer has to be loaded.
-    if ((arg1.V || arg1.constant) && (arg2.V || arg2.constant) &&
-        (jl_pointer_egal(rt1) || jl_pointer_egal(rt2)))
-        return ctx.builder.CreateICmpEQ(boxed(ctx, arg1), boxed(ctx, arg2));
+        return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
 
     bool justbits1 = jl_is_concrete_immutable(rt1);
     bool justbits2 = jl_is_concrete_immutable(rt2);
     if (justbits1 || justbits2) { // whether this type is unique'd by value
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] () -> Value* {
             jl_value_t *typ = justbits1 ? rt1 : rt2;
+            if (typ == (jl_value_t*)jl_bool_type) { // aka jl_pointer_egal
+                // some optimizations for bool, since pointer comparison may be better
+                if ((arg1.isboxed || arg1.constant) && (arg2.isboxed || arg2.constant)) { // aka have-fast-pointer
+                    Value *varg1 = arg1.constant ? literal_pointer_val(ctx, arg1.constant) : maybe_bitcast(ctx, arg1.Vboxed, ctx.types().T_pjlvalue);
+                    Value *varg2 = arg2.constant ? literal_pointer_val(ctx, arg2.constant) : maybe_bitcast(ctx, arg2.Vboxed, ctx.types().T_pjlvalue);
+                    return ctx.builder.CreateICmpEQ(decay_derived(ctx, varg1), decay_derived(ctx, varg2));
+                }
+            }
             if (rt1 == rt2)
                 return emit_bits_compare(ctx, arg1, arg2);
-            Value *same_type = (typ == rt2) ? emit_isa(ctx, arg1, typ, NULL).first :
-                emit_isa(ctx, arg2, typ, NULL).first;
+            Value *same_type = emit_exactly_isa(ctx, (typ == rt2 ? arg1 : arg2), typ);
             BasicBlock *currBB = ctx.builder.GetInsertBlock();
-            BasicBlock *isaBB = BasicBlock::Create(jl_LLVMContext, "is", ctx.f);
-            BasicBlock *postBB = BasicBlock::Create(jl_LLVMContext, "post_is", ctx.f);
+            BasicBlock *isaBB = BasicBlock::Create(ctx.builder.getContext(), "is", ctx.f);
+            BasicBlock *postBB = BasicBlock::Create(ctx.builder.getContext(), "post_is", ctx.f);
             ctx.builder.CreateCondBr(same_type, isaBB, postBB);
             ctx.builder.SetInsertPoint(isaBB);
             Value *bitcmp = emit_bits_compare(ctx, jl_cgval_t(arg1, typ, NULL),
@@ -2583,15 +2745,15 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
             isaBB = ctx.builder.GetInsertBlock(); // might have changed
             ctx.builder.CreateBr(postBB);
             ctx.builder.SetInsertPoint(postBB);
-            PHINode *cmp = ctx.builder.CreatePHI(T_int1, 2);
-            cmp->addIncoming(ConstantInt::get(T_int1, 0), currBB);
+            PHINode *cmp = ctx.builder.CreatePHI(getInt1Ty(ctx.builder.getContext()), 2);
+            cmp->addIncoming(ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0), currBB);
             cmp->addIncoming(bitcmp, isaBB);
             return cmp;
         });
     }
 
-    // TODO: handle the case where arg1.typ != arg2.typ, or when one of these isn't union,
-    //       or when the union can be pointer
+    // TODO: handle the case where arg1.typ is not exactly arg2.typ, or when
+    // one of these isn't union, or when the union can be pointer
     if (arg1.TIndex && arg2.TIndex && jl_egal(arg1.typ, arg2.typ) &&
         jl_is_uniontype(arg1.typ) && is_uniontype_allunboxed(arg1.typ))
         return emit_nullcheck_guard2(ctx, nullcheck1, nullcheck2, [&] {
@@ -2601,15 +2763,177 @@ static Value *emit_f_is(jl_codectx_t &ctx, const jl_cgval_t &arg1, const jl_cgva
     return emit_box_compare(ctx, arg1, arg2, nullcheck1, nullcheck2);
 }
 
+static bool emit_f_opglobal(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                            const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    const jl_cgval_t &mod = argv[1];
+    const jl_cgval_t &sym = argv[2];
+    const jl_cgval_t &val = argv[3];
+    enum jl_memory_order order = jl_memory_order_unspecified;
+
+    if (nargs == 4) {
+        const jl_cgval_t &arg4 = argv[4];
+        if (arg4.constant && jl_is_symbol(arg4.constant))
+            order = jl_get_atomic_order((jl_sym_t*)arg4.constant, false, true);
+        else
+            return false;
+    }
+    else
+        order = jl_memory_order_monotonic;
+
+    if (order == jl_memory_order_invalid || order == jl_memory_order_notatomic) {
+        emit_atomic_error(ctx, order == jl_memory_order_invalid ? "invalid atomic ordering" : "setglobal!: module binding cannot be written non-atomically");
+        *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+        return true;
+    }
+
+    if (sym.constant && jl_is_symbol(sym.constant)) {
+        jl_sym_t *name = (jl_sym_t*)sym.constant;
+        if (mod.constant && jl_is_module(mod.constant)) {
+            jl_binding_t *bnd = NULL;
+            Value *bp = global_binding_pointer(ctx, (jl_module_t*)mod.constant, name, &bnd, true);
+            if (bp) {
+                emit_globalset(ctx, bnd, bp, val, get_llvm_atomic_order(order));
+                *ret = val;
+            }
+            else {
+                *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+            }
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static bool emit_f_opfield(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
+                           const jl_cgval_t *argv, size_t nargs, const jl_cgval_t *modifyop)
+{
+    ++EmittedOpfields;
+    bool issetfield = f == jl_builtin_setfield;
+    bool isreplacefield = f == jl_builtin_replacefield;
+    bool isswapfield = f == jl_builtin_swapfield;
+    bool ismodifyfield = f == jl_builtin_modifyfield;
+    const jl_cgval_t undefval(ctx.builder.getContext());
+    const jl_cgval_t &obj = argv[1];
+    const jl_cgval_t &fld = argv[2];
+    jl_cgval_t val = argv[isreplacefield || ismodifyfield ? 4 : 3];
+    const jl_cgval_t &cmp = isreplacefield || ismodifyfield ? argv[3] : undefval;
+    enum jl_memory_order order = jl_memory_order_notatomic;
+    const std::string fname = issetfield ? "setfield!" : isreplacefield ? "replacefield!" : isswapfield ? "swapfield!" : "modifyfield!";
+    if (nargs >= (isreplacefield || ismodifyfield ? 5 : 4)) {
+        const jl_cgval_t &ord = argv[isreplacefield || ismodifyfield ? 5 : 4];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
+    }
+    enum jl_memory_order fail_order = order;
+    if (isreplacefield && nargs == 6) {
+        const jl_cgval_t &ord = argv[6];
+        emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, fname);
+        if (!ord.constant)
+            return false;
+        fail_order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    }
+    if (order == jl_memory_order_invalid || fail_order == jl_memory_order_invalid || fail_order > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+        return true;
+    }
+
+    jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
+    if (jl_is_datatype(uty) && jl_struct_try_layout(uty)) {
+        ssize_t idx = -1;
+        if (fld.constant && jl_is_symbol(fld.constant)) {
+            idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
+        }
+        else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
+            ssize_t i = jl_unbox_long(fld.constant);
+            if (i > 0 && i <= jl_datatype_nfields(uty))
+                idx = i - 1;
+        }
+        if (idx != -1) {
+            jl_value_t *ft = jl_field_type(uty, idx);
+            if (!jl_has_free_typevars(ft)) {
+                if (!ismodifyfield && !jl_subtype(val.typ, ft)) {
+                    emit_typecheck(ctx, val, ft, fname);
+                    val = update_julia_type(ctx, val, ft);
+                }
+                // TODO: attempt better codegen for approximate types
+                bool isboxed = jl_field_isptr(uty, idx);
+                bool isatomic = jl_field_isatomic(uty, idx);
+                bool needlock = isatomic && !isboxed && jl_datatype_size(jl_field_type(uty, idx)) > MAX_ATOMIC_SIZE;
+                *ret = jl_cgval_t(ctx.builder.getContext());
+                if (isatomic == (order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            issetfield ?
+                            (isatomic ? "setfield!: atomic field cannot be written non-atomically"
+                                      : "setfield!: non-atomic field cannot be written atomically") :
+                            isreplacefield ?
+                            (isatomic ? "replacefield!: atomic field cannot be written non-atomically"
+                                      : "replacefield!: non-atomic field cannot be written atomically") :
+                            isswapfield ?
+                            (isatomic ? "swapfield!: atomic field cannot be written non-atomically"
+                                      : "swapfield!: non-atomic field cannot be written atomically") :
+                            (isatomic ? "modifyfield!: atomic field cannot be written non-atomically"
+                                      : "modifyfield!: non-atomic field cannot be written atomically"));
+                }
+                else if (isatomic == (fail_order == jl_memory_order_notatomic)) {
+                    emit_atomic_error(ctx,
+                            (isatomic ? "replacefield!: atomic field cannot be accessed non-atomically"
+                                      : "replacefield!: non-atomic field cannot be accessed atomically"));
+                }
+                else if (!uty->name->mutabl) {
+                    std::string msg = fname + ": immutable struct of type "
+                        + std::string(jl_symbol_name(uty->name->name))
+                        + " cannot be changed";
+                    emit_error(ctx, msg);
+                }
+                else if (jl_field_isconst(uty, idx)) {
+                    std::string msg = fname + ": const field ."
+                        + std::string(jl_symbol_name((jl_sym_t*)jl_svec_ref(jl_field_names(uty), idx)))
+                        + " of type "
+                        + std::string(jl_symbol_name(uty->name->name))
+                        + " cannot be changed";
+                    emit_error(ctx, msg);
+                }
+                else {
+                    *ret = emit_setfield(ctx, uty, obj, idx, val, cmp, true,
+                            (needlock || order <= jl_memory_order_notatomic)
+                            ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                            : get_llvm_atomic_order(order),
+                            (needlock || fail_order <= jl_memory_order_notatomic)
+                            ? (isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic) // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                            : get_llvm_atomic_order(fail_order),
+                            needlock, issetfield, isreplacefield, isswapfield, ismodifyfield,
+                            modifyop, fname);
+                }
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+static jl_llvm_functions_t
+    emit_function(
+        orc::ThreadSafeModule &TSM,
+        jl_method_instance_t *lam,
+        jl_code_info_t *src,
+        jl_value_t *jlrettype,
+        jl_codegen_params_t &params);
+
 static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                               const jl_cgval_t *argv, size_t nargs, jl_value_t *rt,
                               jl_expr_t *ex)
 // returns true if the call has been handled
 {
+    ++EmittedBuiltinCalls;
     if (f == jl_builtin_is && nargs == 2) {
         // emit comparison test
         Value *ans = emit_f_is(ctx, argv[1], argv[2]);
-        *ret = mark_julia_type(ctx, ctx.builder.CreateZExt(ans, T_int8), false, jl_bool_type);
+        *ret = mark_julia_type(ctx, ctx.builder.CreateZExt(ans, getInt8Ty(ctx.builder.getContext())), false, jl_bool_type);
         return true;
     }
 
@@ -2642,8 +2966,8 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         if (jl_is_type_type(ty.typ) && !jl_has_free_typevars(ty.typ)) {
             jl_value_t *tp0 = jl_tparam0(ty.typ);
             Value *isa_result = emit_isa(ctx, arg, tp0, NULL).first;
-            if (isa_result->getType() == T_int1)
-                isa_result = ctx.builder.CreateZExt(isa_result, T_int8);
+            if (isa_result->getType() == getInt1Ty(ctx.builder.getContext()))
+                isa_result = ctx.builder.CreateZExt(isa_result, getInt8Ty(ctx.builder.getContext()));
             *ret = mark_julia_type(ctx, isa_result, false, jl_bool_type);
             return true;
         }
@@ -2655,23 +2979,21 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         if (jl_is_type_type(ta.typ) && !jl_has_free_typevars(ta.typ) &&
             jl_is_type_type(tb.typ) && !jl_has_free_typevars(tb.typ)) {
             int issub = jl_subtype(jl_tparam0(ta.typ), jl_tparam0(tb.typ));
-            *ret = mark_julia_type(ctx, ConstantInt::get(T_int8, issub), false, jl_bool_type);
+            *ret = mark_julia_type(ctx, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), issub), false, jl_bool_type);
             return true;
         }
     }
 
-    else if (((f == jl_builtin__apply && nargs == 2) ||
-              (f == jl_builtin__apply_iterate && nargs == 3)) && ctx.vaSlot > 0) {
-        int arg_start = f == jl_builtin__apply ? 2 : 3;
-        // turn Core._apply(f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the va allocation
-        if (LoadInst *load = dyn_cast_or_null<LoadInst>(argv[arg_start].V)) {
+    else if ((f == jl_builtin__apply_iterate && nargs == 3) && ctx.vaSlot > 0) {
+        // turn Core._apply_iterate(iter, f, Tuple) ==> f(Tuple...) using the jlcall calling convention if Tuple is the va allocation
+        if (LoadInst *load = dyn_cast_or_null<LoadInst>(argv[3].V)) {
             if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
-                Value *theF = boxed(ctx, argv[arg_start-1]);
+                Value *theF = boxed(ctx, argv[2]);
                 Value *nva = emit_n_varargs(ctx);
 #ifdef _P64
-                nva = ctx.builder.CreateTrunc(nva, T_int32);
+                nva = ctx.builder.CreateTrunc(nva, getInt32Ty(ctx.builder.getContext()));
 #endif
-                Value *theArgs = ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs));
+                Value *theArgs = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs));
                 Value *r = ctx.builder.CreateCall(prepare_call(jlapplygeneric_func), { theF, theArgs, nva });
                 *ret = mark_julia_type(ctx, r, true, jl_any_type);
                 return true;
@@ -2681,7 +3003,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
     else if (f == jl_builtin_tuple) {
         if (nargs == 0) {
-            *ret = ghostValue(jl_emptytuple_type);
+            *ret = ghostValue(ctx, jl_emptytuple_type);
             return true;
         }
         if (jl_is_tuple_type(rt) && jl_is_concrete_type(rt) && nargs == jl_datatype_nfields(rt)) {
@@ -2693,7 +3015,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     else if (f == jl_builtin_throw && nargs == 1) {
         Value *arg1 = boxed(ctx, argv[1]);
         raise_exception(ctx, arg1);
-        *ret = jl_cgval_t();
+        *ret = jl_cgval_t(ctx.builder.getContext());
         return true;
     }
 
@@ -2713,22 +3035,22 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                         return true;
                     }
                     else if (idx_const > ndims) {
-                        *ret = mark_julia_type(ctx, ConstantInt::get(T_size, 1), false, jl_long_type);
+                        *ret = mark_julia_type(ctx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1), false, jl_long_type);
                         return true;
                     }
                 }
                 else {
-                    Value *idx_dyn = emit_unbox(ctx, T_size, idx, (jl_value_t*)jl_long_type);
-                    error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, V_size0),
+                    Value *idx_dyn = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), idx, (jl_value_t*)jl_long_type);
+                    error_unless(ctx, ctx.builder.CreateICmpSGT(idx_dyn, Constant::getNullValue(getSizeTy(ctx.builder.getContext()))),
                                  "arraysize: dimension out of range");
-                    BasicBlock *outBB = BasicBlock::Create(jl_LLVMContext, "outofrange", ctx.f);
-                    BasicBlock *inBB = BasicBlock::Create(jl_LLVMContext, "inrange");
-                    BasicBlock *ansBB = BasicBlock::Create(jl_LLVMContext, "arraysize");
+                    BasicBlock *outBB = BasicBlock::Create(ctx.builder.getContext(), "outofrange", ctx.f);
+                    BasicBlock *inBB = BasicBlock::Create(ctx.builder.getContext(), "inrange");
+                    BasicBlock *ansBB = BasicBlock::Create(ctx.builder.getContext(), "arraysize");
                     ctx.builder.CreateCondBr(ctx.builder.CreateICmpSLE(idx_dyn,
-                                ConstantInt::get(T_size, ndims)),
+                                ConstantInt::get(getSizeTy(ctx.builder.getContext()), ndims)),
                             inBB, outBB);
                     ctx.builder.SetInsertPoint(outBB);
-                    Value *v_one = ConstantInt::get(T_size, 1);
+                    Value *v_one = ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1);
                     ctx.builder.CreateBr(ansBB);
                     ctx.f->getBasicBlockList().push_back(inBB);
                     ctx.builder.SetInsertPoint(inBB);
@@ -2737,7 +3059,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     inBB = ctx.builder.GetInsertBlock(); // could have changed
                     ctx.f->getBasicBlockList().push_back(ansBB);
                     ctx.builder.SetInsertPoint(ansBB);
-                    PHINode *result = ctx.builder.CreatePHI(T_size, 2);
+                    PHINode *result = ctx.builder.CreatePHI(getSizeTy(ctx.builder.getContext()), 2);
                     result->addIncoming(v_one, outBB);
                     result->addIncoming(v_sz, inBB);
                     *ret = mark_julia_type(ctx, result, false, jl_long_type);
@@ -2769,41 +3091,45 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                     ety = (jl_value_t*)jl_any_type;
                 ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
                 jl_value_t *boundscheck = argv[1].constant;
+                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayref");
                 Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[3], nargs - 2, boundscheck);
                 if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
                     assert(((jl_datatype_t*)ety)->instance != NULL);
-                    *ret = ghostValue(ety);
+                    *ret = ghostValue(ctx, ety);
                 }
                 else if (!isboxed && jl_is_uniontype(ety)) {
-                    Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
-                    Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                    // isbits union selector bytes are stored after a->maxsize
-                    Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
-                    Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
+                    Value *data = emit_arrayptr(ctx, ary, ary_ex);
                     Value *offset = emit_arrayoffset(ctx, ary, nd);
-                    Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
-                    Value *selidx_m = emit_arraylen(ctx, ary);
-                    Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                    Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                    ptindex = emit_bitcast(ctx, ptindex, T_pint8);
-                    ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
-                    ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
-                    Instruction *tindex = tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateAlignedLoad(T_int8, ptindex, Align(1)));
-                    tindex->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
-                        ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
-                        ConstantAsMetadata::get(ConstantInt::get(T_int8, union_max)) }));
-                    AllocaInst *lv = emit_static_alloca(ctx, AT);
-                    if (al > 1)
-                        lv->setAlignment(Align(al));
-                    emit_memcpy(ctx, lv, tbaa_arraybuf, ctx.builder.CreateInBoundsGEP(AT, data, idx), tbaa_arraybuf, elsz, al, false);
-                    *ret = mark_julia_slot(lv, ety, ctx.builder.CreateNUWAdd(ConstantInt::get(T_int8, 1), tindex), tbaa_arraybuf);
+                    Value *ptindex;
+                    if (elsz == 0) {
+                        ptindex = data;
+                    }
+                    else {
+                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+                        data = emit_bitcast(ctx, data, AT->getPointerTo());
+                        // isbits union selector bytes are stored after a->maxsize
+                        Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
+                        Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
+                        Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, getSizeTy(ctx.builder.getContext())));
+                        Value *selidx_m = emit_arraylen(ctx, ary);
+                        Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
+                        ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
+                        data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
+                    }
+                    ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
+                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
+                    ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
+                    *ret = emit_unionload(ctx, data, ptindex, ety, elsz, al, ctx.tbaa().tbaa_arraybuf, true, union_max, ctx.tbaa().tbaa_arrayselbyte);
                 }
                 else {
                     MDNode *aliasscope = (f == jl_builtin_const_arrayref) ? ctx.aliasscope : nullptr;
                     *ret = typed_load(ctx,
                             emit_arrayptr(ctx, ary, ary_ex),
                             idx, ety,
-                            !isboxed ? tbaa_arraybuf : tbaa_ptrarraybuf, aliasscope);
+                            isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                            aliasscope,
+                            isboxed,
+                            AtomicOrdering::NotAtomic);
                 }
                 return true;
             }
@@ -2812,7 +3138,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
 
     else if (f == jl_builtin_arrayset && nargs >= 4) {
         const jl_cgval_t &ary = argv[2];
-        const jl_cgval_t &val = argv[3];
+        jl_cgval_t val = argv[3];
         bool indices_ok = true;
         for (size_t i = 4; i <= nargs; i++) {
             if (argv[i].typ != (jl_value_t*)jl_long_type) {
@@ -2825,205 +3151,286 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             jl_value_t *ety = jl_tparam0(aty_dt);
             jl_value_t *ndp = jl_tparam1(aty_dt);
             if (!jl_has_free_typevars(ety) && (jl_is_long(ndp) || nargs == 4)) {
-                if (jl_subtype(val.typ, ety)) { // TODO: probably should just convert this to a type-assert
-                    size_t elsz = 0, al = 0;
-                    int union_max = jl_islayout_inline(ety, &elsz, &al);
-                    bool isboxed = (union_max == 0);
-                    if (isboxed)
-                        ety = (jl_value_t*)jl_any_type;
-                    jl_value_t *ary_ex = jl_exprarg(ex, 2);
-                    ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
-                    jl_value_t *boundscheck = argv[1].constant;
-                    Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
-                    if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
-                        // no-op
+                if (!jl_subtype(val.typ, ety)) {
+                    emit_typecheck(ctx, val, ety, "arrayset");
+                    val = update_julia_type(ctx, val, ety);
+                }
+                size_t elsz = 0, al = 0;
+                int union_max = jl_islayout_inline(ety, &elsz, &al);
+                bool isboxed = (union_max == 0);
+                if (isboxed)
+                    ety = (jl_value_t*)jl_any_type;
+                jl_value_t *ary_ex = jl_exprarg(ex, 2);
+                ssize_t nd = jl_is_long(ndp) ? jl_unbox_long(ndp) : -1;
+                jl_value_t *boundscheck = argv[1].constant;
+                emit_typecheck(ctx, argv[1], (jl_value_t*)jl_bool_type, "arrayset");
+                Value *idx = emit_array_nd_index(ctx, ary, ary_ex, nd, &argv[4], nargs - 3, boundscheck);
+                if (!isboxed && jl_is_datatype(ety) && jl_datatype_size(ety) == 0) {
+                    // no-op
+                }
+                else {
+                    PHINode *data_owner = NULL; // owner object against which the write barrier must check
+                    if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
+                        Value *aryv = boxed(ctx, ary);
+                        Value *flags = emit_arrayflags(ctx, ary);
+                        // the owner of the data is ary itself except if ary->how == 3
+                        flags = ctx.builder.CreateAnd(flags, 3);
+                        Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 3));
+                        BasicBlock *curBB = ctx.builder.GetInsertBlock();
+                        BasicBlock *ownedBB = BasicBlock::Create(ctx.builder.getContext(), "array_owned", ctx.f);
+                        BasicBlock *mergeBB = BasicBlock::Create(ctx.builder.getContext(), "merge_own", ctx.f);
+                        ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
+                        ctx.builder.SetInsertPoint(ownedBB);
+                        // load owner pointer
+                        Instruction *own_ptr;
+                        if (jl_is_long(ndp)) {
+                            own_ptr = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
+                                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue,
+                                        emit_bitcast(ctx, decay_derived(ctx, aryv), ctx.types().T_pprjlvalue),
+                                        jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
+                                    Align(sizeof(void*)));
+                            tbaa_decorate(ctx.tbaa().tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
+                        }
+                        else {
+                            own_ptr = ctx.builder.CreateCall(
+                                prepare_call(jlarray_data_owner_func),
+                                {aryv});
+                        }
+                        ctx.builder.CreateBr(mergeBB);
+                        ctx.builder.SetInsertPoint(mergeBB);
+                        data_owner = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
+                        data_owner->addIncoming(aryv, curBB);
+                        data_owner->addIncoming(own_ptr, ownedBB);
                     }
-                    else {
-                        PHINode *data_owner = NULL; // owner object against which the write barrier must check
-                        if (isboxed || (jl_is_datatype(ety) && ((jl_datatype_t*)ety)->layout->npointers > 0)) { // if elements are just bits, don't need a write barrier
-                            Value *aryv = boxed(ctx, ary);
-                            Value *flags = emit_arrayflags(ctx, ary);
-                            // the owner of the data is ary itself except if ary->how == 3
-                            flags = ctx.builder.CreateAnd(flags, 3);
-                            Value *is_owned = ctx.builder.CreateICmpEQ(flags, ConstantInt::get(T_int16, 3));
-                            BasicBlock *curBB = ctx.builder.GetInsertBlock();
-                            BasicBlock *ownedBB = BasicBlock::Create(jl_LLVMContext, "array_owned", ctx.f);
-                            BasicBlock *mergeBB = BasicBlock::Create(jl_LLVMContext, "merge_own", ctx.f);
-                            ctx.builder.CreateCondBr(is_owned, ownedBB, mergeBB);
-                            ctx.builder.SetInsertPoint(ownedBB);
-                            // load owner pointer
-                            Instruction *own_ptr;
-                            if (jl_is_long(ndp)) {
-                                own_ptr = ctx.builder.CreateAlignedLoad(T_prjlvalue,
-                                        ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue,
-                                            emit_bitcast(ctx, decay_derived(ctx, aryv), T_pprjlvalue),
-                                            jl_array_data_owner_offset(nd) / sizeof(jl_value_t*)),
-                                        Align(sizeof(void*)));
-                                tbaa_decorate(tbaa_const, maybe_mark_load_dereferenceable(own_ptr, false, (jl_value_t*)jl_array_any_type));
-                            }
-                            else {
-                                own_ptr = ctx.builder.CreateCall(
-                                    prepare_call(jlarray_data_owner_func),
-                                    {aryv});
-                            }
-                            ctx.builder.CreateBr(mergeBB);
-                            ctx.builder.SetInsertPoint(mergeBB);
-                            data_owner = ctx.builder.CreatePHI(T_prjlvalue, 2);
-                            data_owner->addIncoming(aryv, curBB);
-                            data_owner->addIncoming(own_ptr, ownedBB);
+                    if (!isboxed && jl_is_uniontype(ety)) {
+                        Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (elsz + al - 1) / al);
+                        Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
+                        Value *offset = emit_arrayoffset(ctx, ary, nd);
+                        // compute tindex from val
+                        jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
+                        Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
+                        tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1));
+                        Value *ptindex;
+                        if (elsz == 0) {
+                            ptindex = data;
                         }
-                        if (jl_is_uniontype(ety)) {
-                            Type *AT = ArrayType::get(IntegerType::get(jl_LLVMContext, 8 * al), (elsz + al - 1) / al);
-                            Value *data = emit_bitcast(ctx, emit_arrayptr(ctx, ary, ary_ex), AT->getPointerTo());
-                            // compute tindex from val
-                            jl_cgval_t rhs_union = convert_julia_type(ctx, val, ety);
-                            Value *tindex = compute_tindex_unboxed(ctx, rhs_union, ety);
-                            tindex = ctx.builder.CreateNUWSub(tindex, ConstantInt::get(T_int8, 1));
-                            Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(T_int16, nd));
-                            Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(T_int16, 1));
-                            Value *offset = emit_arrayoffset(ctx, ary, nd);
-                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, T_size));
+                        else {
+                            Value *ndims = (nd == -1 ? emit_arrayndims(ctx, ary) : ConstantInt::get(getInt16Ty(ctx.builder.getContext()), nd));
+                            Value *is_vector = ctx.builder.CreateICmpEQ(ndims, ConstantInt::get(getInt16Ty(ctx.builder.getContext()), 1));
+                            Value *selidx_v = ctx.builder.CreateSub(emit_vectormaxsize(ctx, ary), ctx.builder.CreateZExt(offset, getSizeTy(ctx.builder.getContext())));
                             Value *selidx_m = emit_arraylen(ctx, ary);
                             Value *selidx = ctx.builder.CreateSelect(is_vector, selidx_v, selidx_m);
-                            Value *ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
-                            ptindex = emit_bitcast(ctx, ptindex, T_pint8);
-                            ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, offset);
-                            ptindex = ctx.builder.CreateInBoundsGEP(T_int8, ptindex, idx);
-                            tbaa_decorate(tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
-                            if (jl_is_datatype(val.typ) && jl_datatype_size(val.typ) == 0) {
-                                // no-op
-                            }
-                            else {
-                                // copy data
-                                Value *addr = ctx.builder.CreateInBoundsGEP(AT, data, idx);
-                                emit_unionmove(ctx, addr, tbaa_arraybuf, val, nullptr);
-                            }
+                            ptindex = ctx.builder.CreateInBoundsGEP(AT, data, selidx);
+                            data = ctx.builder.CreateInBoundsGEP(AT, data, idx);
                         }
-                        else {
-                            typed_store(ctx,
-                                        emit_arrayptr(ctx, ary, ary_ex, isboxed),
-                                        idx, val, ety,
-                                        !isboxed ? tbaa_arraybuf : tbaa_ptrarraybuf,
-                                        ctx.aliasscope, data_owner, 0);
+                        ptindex = emit_bitcast(ctx, ptindex, getInt8PtrTy(ctx.builder.getContext()));
+                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, offset);
+                        ptindex = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), ptindex, idx);
+                        tbaa_decorate(ctx.tbaa().tbaa_arrayselbyte, ctx.builder.CreateStore(tindex, ptindex));
+                        if (elsz > 0 && (!jl_is_datatype(val.typ) || jl_datatype_size(val.typ) > 0)) {
+                            // copy data (if any)
+                            emit_unionmove(ctx, data, ctx.tbaa().tbaa_arraybuf, val, nullptr);
                         }
                     }
-                    *ret = ary;
-                    return true;
+                    else {
+                        typed_store(ctx,
+                                    emit_arrayptr(ctx, ary, ary_ex, isboxed),
+                                    idx, val, jl_cgval_t(ctx.builder.getContext()), ety,
+                                    isboxed ? ctx.tbaa().tbaa_ptrarraybuf : ctx.tbaa().tbaa_arraybuf,
+                                    ctx.aliasscope,
+                                    data_owner,
+                                    isboxed,
+                                    isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                    isboxed ? AtomicOrdering::Unordered : AtomicOrdering::NotAtomic, // TODO: we should do this for anything with CountTrackedPointers(elty).count > 0
+                                    0,
+                                    false,
+                                    true,
+                                    false,
+                                    false,
+                                    false,
+                                    false,
+                                    nullptr,
+                                    "");
+                    }
                 }
+                *ret = ary;
+                return true;
             }
         }
     }
 
-    else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3)) {
+    else if (f == jl_builtin_getfield && (nargs == 2 || nargs == 3 || nargs == 4)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
-        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
-            *ret = emit_getfield(ctx, argv[1], (jl_sym_t*)fld.constant);
+        enum jl_memory_order order = jl_memory_order_unspecified;
+        jl_value_t *boundscheck = jl_true;
+
+        if (nargs == 4) {
+            const jl_cgval_t &ord = argv[3];
+            const jl_cgval_t &inb = argv[4];
+            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "getfield");
+            emit_typecheck(ctx, inb, (jl_value_t*)jl_bool_type, "getfield");
+            if (!ord.constant)
+                return false;
+            order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+            if (inb.constant == jl_false)
+                boundscheck = jl_false;
+        }
+        else if (nargs == 3) {
+            const jl_cgval_t &arg3 = argv[3];
+            if (arg3.constant && jl_is_symbol(arg3.constant))
+                order = jl_get_atomic_order((jl_sym_t*)arg3.constant, true, false);
+            else if (arg3.constant == jl_false)
+                boundscheck = jl_false;
+            else if (arg3.typ != (jl_value_t*)jl_bool_type)
+                return false;
+        }
+        if (order == jl_memory_order_invalid) {
+            emit_atomic_error(ctx, "invalid atomic ordering");
+            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
             return true;
         }
 
-        if (fld.typ == (jl_value_t*)jl_long_type) {
+        jl_datatype_t *utt = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
+        if (jl_is_type_type((jl_value_t*)utt) && jl_is_concrete_type(jl_tparam0(utt)))
+            utt = (jl_datatype_t*)jl_typeof(jl_tparam0(utt));
+
+        if (fld.constant && jl_is_symbol(fld.constant)) {
+            jl_sym_t *name = (jl_sym_t*)fld.constant;
+            if (obj.constant && jl_is_module(obj.constant)) {
+                *ret = emit_globalref(ctx, (jl_module_t*)obj.constant, name, order == jl_memory_order_unspecified ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
+                return true;
+            }
+
+            if (jl_is_datatype(utt) && jl_struct_try_layout(utt)) {
+                ssize_t idx = jl_field_index(utt, name, 0);
+                if (idx != -1 && !jl_has_free_typevars(jl_field_type(utt, idx))) {
+                    *ret = emit_getfield_knownidx(ctx, obj, idx, utt, order);
+                    return true;
+                }
+            }
+        }
+        else if (fld.typ == (jl_value_t*)jl_long_type) {
             if (ctx.vaSlot > 0) {
                 // optimize VA tuple
                 if (LoadInst *load = dyn_cast_or_null<LoadInst>(obj.V)) {
                     if (load->getPointerOperand() == ctx.slots[ctx.vaSlot].boxroot && ctx.argArray) {
                         Value *valen = emit_n_varargs(ctx);
                         jl_cgval_t va_ary( // fake instantiation of a cgval, in order to call emit_bounds_check
-                                ctx.builder.CreateInBoundsGEP(T_prjlvalue, ctx.argArray, ConstantInt::get(T_size, ctx.nReqArgs)),
-                                NULL, false, NULL, NULL);
-                        Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
-                        jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
+                                ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs)),
+                                NULL, false, NULL, NULL, ctx.tbaa());
+                        Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
                         idx = emit_bounds_check(ctx, va_ary, NULL, idx, valen, boundscheck);
-                        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(T_size, ctx.nReqArgs));
-                        Instruction *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.argArray, idx), Align(sizeof(void*)));
+                        idx = ctx.builder.CreateAdd(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), ctx.nReqArgs));
+                        Instruction *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, ctx.argArray, idx), Align(sizeof(void*)));
                         // if we know the result type of this load, we will mark that information here too
-                        tbaa_decorate(tbaa_value, maybe_mark_load_dereferenceable(v, false, rt));
+                        tbaa_decorate(ctx.tbaa().tbaa_value, maybe_mark_load_dereferenceable(v, false, rt));
                         *ret = mark_julia_type(ctx, v, /*boxed*/ true, rt);
                         return true;
                     }
                 }
             }
 
-            jl_datatype_t *utt = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-            if (jl_is_datatype(utt) && utt->layout) {
-                if ((jl_is_structtype(utt) || jl_is_tuple_type(utt)) && !jl_subtype((jl_value_t*)jl_module_type, obj.typ)) {
+            if (jl_is_datatype(utt)) {
+                if (jl_struct_try_layout(utt)) {
                     size_t nfields = jl_datatype_nfields(utt);
                     // integer index
                     size_t idx;
                     if (fld.constant && (idx = jl_unbox_long(fld.constant) - 1) < nfields) {
-                        // known index
-                        *ret = emit_getfield_knownidx(ctx, obj, idx, utt);
-                        return true;
+                        if (!jl_has_free_typevars(jl_field_type(utt, idx))) {
+                            // known index
+                            *ret = emit_getfield_knownidx(ctx, obj, idx, utt, order);
+                            return true;
+                        }
                     }
                     else {
                         // unknown index
-                        Value *vidx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
-                        jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
-                        if (emit_getfield_unknownidx(ctx, ret, obj, vidx, utt, boundscheck)) {
+                        Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
+                        if (emit_getfield_unknownidx(ctx, ret, obj, vidx, utt, boundscheck, order)) {
                             return true;
                         }
                     }
                 }
-            }
-            else {
-                if (jl_is_tuple_type(utt) && is_tupletype_homogeneous(utt->types, true)) {
+                if (jl_is_tuple_type(utt) && is_tupletype_homogeneous(utt->parameters, true)) {
                     // For tuples, we can emit code even if we don't know the exact
                     // type (e.g. because we don't know the length). This is possible
                     // as long as we know that all elements are of the same (leaf) type.
                     if (obj.ispointer()) {
+                        if (order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+                            emit_atomic_error(ctx, "getfield: non-atomic field cannot be accessed atomically");
+                            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+                            return true;
+                        }
                         // Determine which was the type that was homogenous
                         jl_value_t *jt = jl_tparam0(utt);
-                        if (jl_is_vararg_type(jt))
+                        if (jl_is_vararg(jt))
                             jt = jl_unwrap_vararg(jt);
-                        Value *vidx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
+                        assert(jl_is_datatype(jt));
+                        Value *vidx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
                         // This is not necessary for correctness, but allows to omit
                         // the extra code for getting the length of the tuple
-                        jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
                         if (!bounds_check_enabled(ctx, boundscheck)) {
-                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(T_size, 1));
+                            vidx = ctx.builder.CreateSub(vidx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
                         } else {
                             vidx = emit_bounds_check(ctx, obj, (jl_value_t*)obj.typ, vidx,
                                 emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj)),
                                 jl_true);
                         }
-                        bool isboxed = !jl_datatype_isinlinealloc(jt);
+                        bool isboxed = !jl_datatype_isinlinealloc((jl_datatype_t*)jt, 0);
                         Value *ptr = maybe_decay_tracked(ctx, data_pointer(ctx, obj));
                         *ret = typed_load(ctx, ptr, vidx,
                                 isboxed ? (jl_value_t*)jl_any_type : jt,
-                                obj.tbaa, nullptr, false);
+                                obj.tbaa, nullptr, isboxed, AtomicOrdering::NotAtomic, false);
                         return true;
                     }
                 }
             }
         }
+        // TODO: generic getfield func with more efficient calling convention
+        return false;
     }
 
-    else if (f == jl_builtin_setfield && nargs == 3) {
-        const jl_cgval_t &obj = argv[1];
-        const jl_cgval_t &fld = argv[2];
-        const jl_cgval_t &val = argv[3];
-
-        jl_datatype_t *uty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-        if (jl_is_structtype(uty) && uty != jl_module_type && uty->layout) {
-            size_t idx = (size_t)-1;
-            if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
-                idx = jl_field_index(uty, (jl_sym_t*)fld.constant, 0);
-            }
-            else if (fld.constant && fld.typ == (jl_value_t*)jl_long_type) {
-                ssize_t i = jl_unbox_long(fld.constant);
-                if (i > 0 && i <= jl_datatype_nfields(uty))
-                    idx = i - 1;
-            }
-            if (idx != (size_t)-1) {
-                jl_value_t *ft = jl_svecref(uty->types, idx);
-                if (jl_subtype(val.typ, ft)) {
-                    // TODO: attempt better codegen for approximate types
-                    emit_setfield(ctx, uty, obj, idx, val, true, true);
-                    *ret = val;
-                    return true;
-                }
+    else if (f == jl_builtin_getglobal && (nargs == 2 || nargs == 3)) {
+        const jl_cgval_t &mod = argv[1];
+        const jl_cgval_t &sym = argv[2];
+        enum jl_memory_order order = jl_memory_order_unspecified;
+
+        if (nargs == 3) {
+            const jl_cgval_t &arg3 = argv[3];
+            if (arg3.constant && jl_is_symbol(arg3.constant))
+                order = jl_get_atomic_order((jl_sym_t*)arg3.constant, true, false);
+            else
+                return false;
+        }
+        else
+            order = jl_memory_order_monotonic;
+
+        if (order == jl_memory_order_invalid || order == jl_memory_order_notatomic) {
+            emit_atomic_error(ctx, order == jl_memory_order_invalid ? "invalid atomic ordering" : "getglobal: module binding cannot be read non-atomically");
+            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+            return true;
+        }
+
+        if (sym.constant && jl_is_symbol(sym.constant)) {
+            jl_sym_t *name = (jl_sym_t*)sym.constant;
+            if (mod.constant && jl_is_module(mod.constant)) {
+                *ret = emit_globalref(ctx, (jl_module_t*)mod.constant, name, get_llvm_atomic_order(order));
+                return true;
             }
         }
+
+        return false;
+    }
+
+    else if (f == jl_builtin_setglobal && (nargs == 3 || nargs == 4)) {
+        return emit_f_opglobal(ctx, ret, f, argv, nargs, nullptr);
+    }
+
+    else if ((f == jl_builtin_setfield && (nargs == 3 || nargs == 4)) ||
+             (f == jl_builtin_swapfield && (nargs == 3 || nargs == 4)) ||
+             (f == jl_builtin_replacefield && (nargs == 4 || nargs == 5 || nargs == 6)) ||
+             (f == jl_builtin_modifyfield && (nargs == 4 || nargs == 5))) {
+        return emit_f_opfield(ctx, ret, f, argv, nargs, nullptr);
     }
 
     else if (f == jl_builtin_nfields && nargs == 1) {
@@ -3044,14 +3451,14 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         else if (jl_is_type_type(obj.typ)) {
             jl_value_t *tp0 = jl_tparam0(obj.typ);
             if (jl_is_datatype(tp0) && jl_is_datatype_singleton((jl_datatype_t*)tp0))
-                nf = jl_datatype_nfields(jl_typeof(tp0));
+                nf = jl_datatype_nfields((jl_value_t*)jl_datatype_type);
         }
         else if (jl_is_concrete_type(obj.typ)) {
             nf = jl_datatype_nfields(obj.typ);
         }
         Value *sz;
         if (nf != -1)
-            sz = ConstantInt::get(T_size, nf);
+            sz = ConstantInt::get(getSizeTy(ctx.builder.getContext()), nf);
         else
             sz = emit_datatype_nfields(ctx, emit_typeof_boxed(ctx, obj));
         *ret = mark_julia_type(ctx, sz, false, jl_long_type);
@@ -3068,11 +3475,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 Value *tyv = boxed(ctx, typ);
                 Value *types_svec = emit_datatype_types(ctx, tyv);
                 Value *types_len = emit_datatype_nfields(ctx, tyv);
-                Value *idx = emit_unbox(ctx, T_size, fld, (jl_value_t*)jl_long_type);
+                Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), fld, (jl_value_t*)jl_long_type);
                 jl_value_t *boundscheck = (nargs == 3 ? argv[3].constant : jl_true);
+                if (nargs == 3)
+                    emit_typecheck(ctx, argv[3], (jl_value_t*)jl_bool_type, "fieldtype");
                 emit_bounds_check(ctx, typ, (jl_value_t*)jl_datatype_type, idx, types_len, boundscheck);
-                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, T_pprjlvalue)), idx);
-                Value *fieldtyp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
+                Value *fieldtyp_p = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, decay_derived(ctx, emit_bitcast(ctx, types_svec, ctx.types().T_pprjlvalue)), idx);
+                Value *fieldtyp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, fieldtyp_p, Align(sizeof(void*))));
                 *ret = mark_julia_type(ctx, fieldtyp, true, (jl_value_t*)jl_type_type);
                 return true;
             }
@@ -3082,7 +3491,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
     else if (f == jl_builtin_sizeof && nargs == 1) {
         const jl_cgval_t &obj = argv[1];
         jl_datatype_t *sty = (jl_datatype_t*)jl_unwrap_unionall(obj.typ);
-        assert(jl_string_type->mutabl);
+        assert(jl_string_type->name->mutabl);
         if (sty == jl_string_type || sty == jl_simplevector_type) {
             if (obj.constant) {
                 size_t sz;
@@ -3092,22 +3501,22 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
                 else {
                     sz = (1 + jl_svec_len(obj.constant)) * sizeof(void*);
                 }
-                *ret = mark_julia_type(ctx, ConstantInt::get(T_size, sz), false, jl_long_type);
+                *ret = mark_julia_type(ctx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sz), false, jl_long_type);
                 return true;
             }
             // String and SimpleVector's length fields have the same layout
-            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), T_psize);
-            Value *len = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_size, ptr, Align(sizeof(size_t))));
-            MDBuilder MDB(jl_LLVMContext);
+            auto ptr = emit_bitcast(ctx, boxed(ctx, obj), getSizePtrTy(ctx.builder.getContext()));
+            Value *len = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ptr, Align(sizeof(size_t))));
+            MDBuilder MDB(ctx.builder.getContext());
             if (sty == jl_simplevector_type) {
                 auto rng = MDB.createRange(
-                    V_size0, ConstantInt::get(T_size, INTPTR_MAX / sizeof(void*) - 1));
+                    Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX / sizeof(void*) - 1));
                 cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
-                len = ctx.builder.CreateMul(len, ConstantInt::get(T_size, sizeof(void*)));
-                len = ctx.builder.CreateAdd(len, ConstantInt::get(T_size, sizeof(void*)));
+                len = ctx.builder.CreateMul(len, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(void*)));
+                len = ctx.builder.CreateAdd(len, ConstantInt::get(getSizeTy(ctx.builder.getContext()), sizeof(void*)));
             }
             else {
-                auto rng = MDB.createRange(V_size0, ConstantInt::get(T_size, INTPTR_MAX));
+                auto rng = MDB.createRange(Constant::getNullValue(getSizeTy(ctx.builder.getContext())), ConstantInt::get(getSizeTy(ctx.builder.getContext()), INTPTR_MAX));
                 cast<LoadInst>(len)->setMetadata(LLVMContext::MD_range, rng);
             }
             *ret = mark_julia_type(ctx, len, false, jl_long_type);
@@ -3118,10 +3527,10 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             Value *elsize;
             size_t elsz;
             if (arraytype_constelsize(sty, &elsz)) {
-                elsize = ConstantInt::get(T_size, elsz);
+                elsize = ConstantInt::get(getSizeTy(ctx.builder.getContext()), elsz);
             }
             else {
-                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), T_size);
+                elsize = ctx.builder.CreateZExt(emit_arrayelsize(ctx, obj), getSizeTy(ctx.builder.getContext()));
             }
             *ret = mark_julia_type(ctx, ctx.builder.CreateMul(len, elsize), false, jl_long_type);
             return true;
@@ -3134,13 +3543,13 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             jl_value_t *ty = static_apply_type(ctx, argv, nargs + 1);
             if (ty != NULL) {
                 jl_add_method_root(ctx, ty);
-                *ret = mark_julia_const(ty);
+                *ret = mark_julia_const(ctx, ty);
                 return true;
             }
         }
     }
 
-    else if (f == jl_builtin_isdefined && nargs == 2) {
+    else if (f == jl_builtin_isdefined && (nargs == 2 || nargs == 3)) {
         const jl_cgval_t &obj = argv[1];
         const jl_cgval_t &fld = argv[2];
         jl_datatype_t *stt = (jl_datatype_t*)obj.typ;
@@ -3160,7 +3569,7 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         assert(jl_is_datatype(stt));
 
         ssize_t fieldidx = -1;
-        if (fld.constant && fld.typ == (jl_value_t*)jl_symbol_type) {
+        if (fld.constant && jl_is_symbol(fld.constant)) {
             jl_sym_t *sym = (jl_sym_t*)fld.constant;
             fieldidx = jl_field_index(stt, sym, 0);
         }
@@ -3170,27 +3579,58 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
         else {
             return false;
         }
-        if (fieldidx < 0 || fieldidx >= jl_datatype_nfields(stt)) {
-            *ret = mark_julia_const(jl_false);
+        enum jl_memory_order order = jl_memory_order_unspecified;
+        if (nargs == 3) {
+            const jl_cgval_t &ord = argv[3];
+            emit_typecheck(ctx, ord, (jl_value_t*)jl_symbol_type, "isdefined");
+            if (!ord.constant)
+                return false;
+            order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+        }
+        if (order == jl_memory_order_invalid) {
+            emit_atomic_error(ctx, "invalid atomic ordering");
+            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+            return true;
         }
-        else if (fieldidx < stt->ninitialized) {
-            *ret = mark_julia_const(jl_true);
+        ssize_t nf = jl_datatype_nfields(stt);
+        if (fieldidx < 0 || fieldidx >= nf) {
+            if (order != jl_memory_order_unspecified) {
+                emit_atomic_error(ctx, "isdefined: atomic ordering cannot be specified for nonexistent field");
+                *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+                return true;
+            }
+            *ret = mark_julia_const(ctx, jl_false);
+            return true;
+        }
+        bool isatomic = jl_field_isatomic(stt, fieldidx);
+        if (!isatomic && order != jl_memory_order_notatomic && order != jl_memory_order_unspecified) {
+            emit_atomic_error(ctx, "isdefined: non-atomic field cannot be accessed atomically");
+            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+            return true;
+        }
+        if (isatomic && order == jl_memory_order_notatomic) {
+            emit_atomic_error(ctx, "isdefined: atomic field cannot be accessed non-atomically");
+            *ret = jl_cgval_t(ctx.builder.getContext()); // unreachable
+            return true;
+        }
+        else if (fieldidx < nf - stt->name->n_uninitialized) {
+            *ret = mark_julia_const(ctx, jl_true);
         }
         else if (jl_field_isptr(stt, fieldidx) || jl_type_hasptr(jl_field_type(stt, fieldidx))) {
             Value *fldv;
             size_t offs = jl_field_offset(stt, fieldidx) / sizeof(jl_value_t*);
             auto tbaa = obj.tbaa;
-            if (tbaa == tbaa_datatype && offs != offsetof(jl_datatype_t, types))
-                tbaa = tbaa_const;
+            if (tbaa == ctx.tbaa().tbaa_datatype && offs != offsetof(jl_datatype_t, types))
+                tbaa = ctx.tbaa().tbaa_const;
             if (obj.ispointer()) {
                 if (!jl_field_isptr(stt, fieldidx))
                     offs += ((jl_datatype_t*)jl_field_type(stt, fieldidx))->layout->first_ptr;
-                Value *ptr = emit_bitcast(ctx, maybe_decay_tracked(ctx, data_pointer(ctx, obj)), T_pprjlvalue);
-                Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, ptr, offs);
+                Value *ptr = emit_bitcast(ctx, maybe_decay_tracked(ctx, data_pointer(ctx, obj)), ctx.types().T_pprjlvalue);
+                Value *addr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, ptr, offs);
                 // emit this using the same type as emit_getfield_knownidx
                 // so that LLVM may be able to load-load forward them and fold the result
-                fldv = tbaa_decorate(tbaa, ctx.builder.CreateAlignedLoad(T_prjlvalue, addr, Align(sizeof(size_t))));
-                cast<LoadInst>(fldv)->setOrdering(AtomicOrdering::Unordered);
+                fldv = tbaa_decorate(tbaa, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, addr, Align(sizeof(size_t))));
+                cast<LoadInst>(fldv)->setOrdering(order <= jl_memory_order_notatomic ? AtomicOrdering::Unordered : get_llvm_atomic_order(order));
             }
             else {
                 fldv = ctx.builder.CreateExtractValue(obj.V, offs);
@@ -3203,50 +3643,88 @@ static bool emit_builtin_call(jl_codectx_t &ctx, jl_cgval_t *ret, jl_value_t *f,
             *ret = mark_julia_type(ctx, isdef, false, jl_bool_type);
         }
         else {
-            *ret = mark_julia_const(jl_true);
+            *ret = mark_julia_const(ctx, jl_true);
+        }
+        if (order > jl_memory_order_monotonic && ret->constant) {
+            // fence instructions may only have acquire, release, acq_rel, or seq_cst ordering.
+            ctx.builder.CreateFence(get_llvm_atomic_order(order));
         }
         return true;
     }
+
+    else if (f == jl_builtin_donotdelete) {
+        // For now we emit this as a vararg call to the builtin
+        // (which doesn't look at the arguments). In the future,
+        // this should be an LLVM builtin.
+        auto it = builtin_func_map().find(jl_f_donotdelete_addr);
+        if (it == builtin_func_map().end()) {
+            return false;
+        }
+
+        *ret = mark_julia_const(ctx, jl_nothing);
+        FunctionType *Fty = FunctionType::get(getVoidTy(ctx.builder.getContext()), true);
+        Function *dnd = prepare_call(it->second);
+        SmallVector<Value*, 1> call_args;
+
+        for (size_t i = 1; i <= nargs; ++i) {
+            const jl_cgval_t &obj = argv[i];
+            if (obj.V) {
+                // TODO is this strong enough to constitute a read of any contained
+                // pointers?
+                Value *V = obj.V;
+                if (obj.isboxed) {
+                    V = emit_pointer_from_objref(ctx, V);
+                }
+                call_args.push_back(V);
+            }
+        }
+        ctx.builder.CreateCall(Fty, dnd, call_args);
+        return true;
+    }
+
     return false;
 }
 
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, Function *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
+    ++EmittedJLCalls;
     // emit arguments
     SmallVector<Value*, 3> theArgs;
     SmallVector<Type*, 3> argsT;
     if (theF) {
         theArgs.push_back(theF);
-        argsT.push_back(T_prjlvalue);
+        argsT.push_back(ctx.types().T_prjlvalue);
     }
     for (size_t i = 0; i < nargs; i++) {
         Value *arg = boxed(ctx, argv[i]);
         theArgs.push_back(arg);
-        argsT.push_back(T_prjlvalue);
+        argsT.push_back(ctx.types().T_prjlvalue);
     }
-    FunctionType *FTy = FunctionType::get(T_prjlvalue, argsT, false);
+    FunctionType *FTy = FunctionType::get(ctx.types().T_prjlvalue, argsT, false);
     CallInst *result = ctx.builder.CreateCall(FTy,
         ctx.builder.CreateBitCast(theFptr, FTy->getPointerTo()),
         theArgs);
-    add_return_attr(result, Attribute::NonNull);
+    addRetAttr(result, Attribute::NonNull);
     result->setCallingConv(cc);
     return result;
 }
-// Returns T_prjlvalue
+// Returns ctx.types().T_prjlvalue
 static CallInst *emit_jlcall(jl_codectx_t &ctx, JuliaFunction *theFptr, Value *theF,
-                             jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
+                             const jl_cgval_t *argv, size_t nargs, CallingConv::ID cc)
 {
     return emit_jlcall(ctx, prepare_call(theFptr), theF, argv, nargs, cc);
 }
 
 
 static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_t *mi, jl_value_t *jlretty, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
+                                          const jl_cgval_t *argv, size_t nargs, jl_returninfo_t::CallingConv *cc, unsigned *return_roots, jl_value_t *inferred_retty)
 {
+    ++EmittedSpecfunCalls;
     // emit specialized call site
-    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, specFunctionObject, mi->specTypes, jlretty);
+    bool is_opaque_closure = jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure;
+    jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module, specFunctionObject, mi->specTypes, jlretty, is_opaque_closure);
     FunctionType *cft = returninfo.decl->getFunctionType();
     *cc = returninfo.cc;
     *return_roots = returninfo.return_roots;
@@ -3261,12 +3739,13 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        result = emit_static_alloca(ctx, cft->getParamType(0)->getPointerElementType());
+        result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+        assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
         argvals[idx] = result;
         idx++;
         break;
     case jl_returninfo_t::Union:
-        result = emit_static_alloca(ctx, ArrayType::get(T_int8, returninfo.union_bytes));
+        result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes));
         if (returninfo.union_align > 1)
             result->setAlignment(Align(returninfo.union_align));
         argvals[idx] = result;
@@ -3275,30 +3754,29 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     }
 
     if (returninfo.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, T_prjlvalue);
-        return_roots->setOperand(0, ConstantInt::get(T_int32, returninfo.return_roots));
+        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots));
         argvals[idx] = return_roots;
         idx++;
     }
 
     for (size_t i = 0; i < nargs; i++) {
-        jl_value_t *jt = jl_nth_slot_type(mi->specTypes, i);
+        jl_value_t *jt = (is_opaque_closure && i == 0) ? (jl_value_t*)jl_any_type :
+            jl_nth_slot_type(mi->specTypes, i);
         if (is_uniquerep_Type(jt))
             continue;
         bool isboxed = deserves_argbox(jt);
-        Type *et = isboxed ?  T_prjlvalue : julia_type_to_llvm(ctx, jt);
+        Type *et = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         if (type_is_ghost(et))
             continue;
         assert(idx < nfargs);
         Type *at = cft->getParamType(idx);
         jl_cgval_t arg = argv[i];
         if (isboxed) {
-            assert(at == T_prjlvalue && et == T_prjlvalue);
+            assert(at == ctx.types().T_prjlvalue && et == ctx.types().T_prjlvalue);
             argvals[idx] = boxed(ctx, arg);
         }
         else if (et->isAggregateType()) {
-            if (!arg.ispointer())
-                arg = value_to_pointer(ctx, arg);
+            arg = value_to_pointer(ctx, arg);
             // can lazy load on demand, no copy needed
             assert(at == PointerType::get(et, AddressSpace::Derived));
             argvals[idx] = decay_derived(ctx, maybe_bitcast(ctx,
@@ -3310,7 +3788,7 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
             if (!val) {
                 // There was a type mismatch of some sort - exit early
                 CreateTrap(ctx.builder);
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
             argvals[idx] = val;
         }
@@ -3320,56 +3798,52 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, jl_method_instance_
     CallInst *call = ctx.builder.CreateCall(returninfo.decl, ArrayRef<Value*>(&argvals[0], nfargs));
     call->setAttributes(returninfo.decl->getAttributes());
 
-    jl_cgval_t retval;
+    jl_cgval_t retval(ctx.builder.getContext());
     switch (returninfo.cc) {
         case jl_returninfo_t::Boxed:
-            retval = mark_julia_type(ctx, call, true, inferred_retty);
+            retval = mark_julia_type(ctx, call, true, jlretty);
             break;
         case jl_returninfo_t::Register:
             retval = mark_julia_type(ctx, call, false, jlretty);
             break;
         case jl_returninfo_t::SRet:
-            retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack);
+            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
             break;
         case jl_returninfo_t::Union: {
             Value *box = ctx.builder.CreateExtractValue(call, 0);
             Value *tindex = ctx.builder.CreateExtractValue(call, 1);
             Value *derived = ctx.builder.CreateSelect(
                 ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)),
-                        ConstantInt::get(T_int8, 0)),
-                decay_derived(ctx, ctx.builder.CreateBitCast(argvals[0], T_pjlvalue)),
+                        ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
+                decay_derived(ctx, ctx.builder.CreateBitCast(argvals[0], ctx.types().T_pjlvalue)),
                 decay_derived(ctx, box)
             );
             retval = mark_julia_slot(derived,
                                      jlretty,
                                      tindex,
-                                     tbaa_stack);
+                                     ctx.tbaa(),
+                                     ctx.tbaa().tbaa_stack);
             retval.Vboxed = box;
             break;
         }
         case jl_returninfo_t::Ghosts:
-            retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack);
+            retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa(), ctx.tbaa().tbaa_stack);
             break;
     }
     // see if inference has a different / better type for the call than the lambda
-    if (inferred_retty != retval.typ)
-        retval = update_julia_type(ctx, retval, inferred_retty);
-    return retval;
+    return update_julia_type(ctx, retval, inferred_retty);
 }
 
-static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, StringRef specFunctionObject,
-                                          jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
+static jl_cgval_t emit_call_specfun_boxed(jl_codectx_t &ctx, jl_value_t *jlretty, StringRef specFunctionObject,
+                                          const jl_cgval_t *argv, size_t nargs, jl_value_t *inferred_retty)
 {
-    auto theFptr = cast<Function>(jl_Module->getOrInsertFunction(specFunctionObject, jl_func_sig)
-#if JL_LLVM_VERSION >= 90000
-                .getCallee()
-#endif
-            );
-    add_return_attr(theFptr, Attribute::NonNull);
-    theFptr->addFnAttr(Thunk);
+    auto theFptr = cast<Function>(
+        jl_Module->getOrInsertFunction(specFunctionObject, ctx.types().T_jlfunc).getCallee());
+    addRetAttr(theFptr, Attribute::NonNull);
+    theFptr->addFnAttr(Attribute::get(ctx.builder.getContext(), "thunk"));
     Value *ret = emit_jlcall(ctx, theFptr, nullptr, argv, nargs, JLCALL_F_CC);
-    return mark_julia_type(ctx, ret, true, inferred_retty);
+    return update_julia_type(ctx, mark_julia_type(ctx, ret, true, jlretty), inferred_retty);
 }
 
 static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
@@ -3384,11 +3858,16 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     for (size_t i = 0; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i + 1]);
         if (argv[i].typ == jl_bottom_type)
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
     }
+    return emit_invoke(ctx, lival, argv, nargs, rt);
+}
 
+static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const jl_cgval_t *argv, size_t nargs, jl_value_t *rt)
+{
+    ++EmittedInvokes;
     bool handled = false;
-    jl_cgval_t result;
+    jl_cgval_t result(ctx.builder.getContext());
     if (lival.constant) {
         jl_method_instance_t *mi = (jl_method_instance_t*)lival.constant;
         assert(jl_is_method_instance(mi));
@@ -3397,11 +3876,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
             jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
             FunctionType *ft = ctx.f->getFunctionType();
             StringRef protoname = ctx.f->getName();
-            if (ft == jl_func_sig) {
-                result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+            if (ft == ctx.types().T_jlfunc) {
+                result = emit_call_specfun_boxed(ctx, ctx.rettype, protoname, argv, nargs, rt);
                 handled = true;
             }
-            else if (ft != jl_func_sig_sparams) {
+            else if (ft != ctx.types().T_jlfuncparams) {
                 unsigned return_roots = 0;
                 result = emit_call_specfun_other(ctx, mi, ctx.rettype, protoname, argv, nargs, &cc, &return_roots, rt);
                 handled = true;
@@ -3410,12 +3889,14 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
         else {
             jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
             jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
-            if (ci != jl_nothing && codeinst->invoke != jl_fptr_sparam) { // check if we know we definitely can't handle this specptr
-                if (codeinst->invoke == jl_fptr_const_return) {
-                    result = mark_julia_const(codeinst->rettype_const);
+            if (ci != jl_nothing) {
+                auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                 // check if we know how to handle this specptr
+                if (invoke == jl_fptr_const_return_addr) {
+                    result = mark_julia_const(ctx, codeinst->rettype_const);
                     handled = true;
                 }
-                else {
+                else if (invoke != jl_fptr_sparam_addr) {
                     bool specsig, needsparams;
                     std::tie(specsig, needsparams) = uses_specsig(mi, codeinst->rettype, ctx.params->prefer_specsig);
                     std::string name;
@@ -3424,15 +3905,22 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
                     if (ctx.use_cache) {
                         // optimization: emit the correct name immediately, if we know it
                         // TODO: use `emitted` map here too to try to consolidate names?
-                        if (codeinst->specptr.fptr) {
-                            if (specsig ? codeinst->isspecsig : codeinst->invoke == jl_fptr_args) {
-                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+                        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+                        auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+                        if (fptr) {
+                            if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
+                                protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                                 need_to_emit = false;
                             }
                         }
                     }
+                    auto it = ctx.call_targets.find(codeinst);
+                    if (need_to_emit && it != ctx.call_targets.end()) {
+                        protoname = std::get<2>(it->second)->getName();
+                        need_to_emit = false;
+                    }
                     if (need_to_emit) {
-                        raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << globalUnique++;
+                        raw_string_ostream(name) << (specsig ? "j_" : "j1_") << name_from_method_instance(mi) << "_" << globalUniqueGeneratedNames++;
                         protoname = StringRef(name);
                     }
                     jl_returninfo_t::CallingConv cc = jl_returninfo_t::CallingConv::Boxed;
@@ -3440,11 +3928,11 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
                     if (specsig)
                         result = emit_call_specfun_other(ctx, mi, codeinst->rettype, protoname, argv, nargs, &cc, &return_roots, rt);
                     else
-                        result = emit_call_specfun_boxed(ctx, protoname, argv, nargs, rt);
+                        result = emit_call_specfun_boxed(ctx, codeinst->rettype, protoname, argv, nargs, rt);
                     handled = true;
                     if (need_to_emit) {
                         Function *trampoline_decl = cast<Function>(jl_Module->getNamedValue(protoname));
-                        ctx.call_targets.push_back(std::make_tuple(codeinst, cc, return_roots, trampoline_decl, specsig));
+                        ctx.call_targets[codeinst] = std::make_tuple(cc, return_roots, trampoline_decl, specsig);
                     }
                 }
             }
@@ -3459,8 +3947,44 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     return result;
 }
 
+static jl_cgval_t emit_invoke_modify(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
+{
+    ++EmittedInvokes;
+    jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    size_t arglen = jl_array_dim0(ex->args);
+    size_t nargs = arglen - 1;
+    assert(arglen >= 2);
+    jl_cgval_t lival = emit_expr(ctx, args[0]);
+    jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
+    for (size_t i = 0; i < nargs; ++i) {
+        argv[i] = emit_expr(ctx, args[i + 1]);
+        if (argv[i].typ == jl_bottom_type)
+            return jl_cgval_t(ctx.builder.getContext());
+    }
+    const jl_cgval_t &f = argv[0];
+    jl_cgval_t ret(ctx.builder.getContext());
+    if (f.constant && f.constant == jl_builtin_modifyfield) {
+        if (emit_f_opfield(ctx, &ret, jl_builtin_modifyfield, argv, nargs - 1, &lival))
+            return ret;
+        auto it = builtin_func_map().find(jl_f_modifyfield_addr);
+        assert(it != builtin_func_map().end());
+        Value *oldnew = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, JLCALL_F_CC);
+        return mark_julia_type(ctx, oldnew, true, rt);
+    }
+    if (f.constant && jl_typeis(f.constant, jl_intrinsic_type)) {
+        JL_I::intrinsic fi = (intrinsic)*(uint32_t*)jl_data_ptr(f.constant);
+        if (fi == JL_I::atomic_pointermodify && jl_intrinsic_nargs((int)fi) == nargs - 1)
+            return emit_atomic_pointerop(ctx, fi, argv, nargs - 1, &lival);
+    }
+
+    // emit function and arguments
+    Value *callval = emit_jlcall(ctx, jlapplygeneric_func, nullptr, argv, nargs, JLCALL_F_CC);
+    return mark_julia_type(ctx, callval, true, rt);
+}
+
 static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 {
+    ++EmittedCalls;
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
     size_t nargs = jl_array_dim0(ex->args);
     assert(nargs >= 1);
@@ -3477,29 +4001,29 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
     jl_cgval_t *generic_argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * n_generic_args);
     jl_cgval_t *argv = generic_argv;
     if (context) {
-        generic_argv[0] = mark_julia_const(context);
+        generic_argv[0] = mark_julia_const(ctx, context);
         argv = &generic_argv[1];
     }
     argv[0] = f;
     for (size_t i = 1; i < nargs; ++i) {
         argv[i] = emit_expr(ctx, args[i]);
         if (argv[i].typ == jl_bottom_type)
-            return jl_cgval_t(); // anything past here is unreachable
+            return jl_cgval_t(ctx.builder.getContext()); // anything past here is unreachable
     }
 
     if (f.constant && jl_isa(f.constant, (jl_value_t*)jl_builtin_type)) {
         if (f.constant == jl_builtin_ifelse && nargs == 4)
             return emit_ifelse(ctx, argv[1], argv[2], argv[3], rt);
-        jl_cgval_t result;
+        jl_cgval_t result(ctx.builder.getContext());
         bool handled = emit_builtin_call(ctx, &result, f.constant, argv, nargs - 1, rt, ex);
         if (handled) {
             return result;
         }
 
         // special case for known builtin not handled by emit_builtin_call
-        auto it = builtin_func_map.find(jl_get_builtin_fptr(f.constant));
-        if (it != builtin_func_map.end()) {
-            Value *ret = emit_jlcall(ctx, it->second, V_rnull, &argv[1], nargs - 1, JLCALL_F_CC);
+        auto it = builtin_func_map().find(jl_get_builtin_fptr(f.constant));
+        if (it != builtin_func_map().end()) {
+            Value *ret = emit_jlcall(ctx, it->second, Constant::getNullValue(ctx.types().T_prjlvalue), &argv[1], nargs - 1, JLCALL_F_CC);
             return mark_julia_type(ctx, ret, true, rt);
         }
     }
@@ -3513,8 +4037,9 @@ static jl_cgval_t emit_call(jl_codectx_t &ctx, jl_expr_t *ex, jl_value_t *rt)
 
 static void undef_var_error_ifnot(jl_codectx_t &ctx, Value *ok, jl_sym_t *name)
 {
-    BasicBlock *err = BasicBlock::Create(jl_LLVMContext, "err", ctx.f);
-    BasicBlock *ifok = BasicBlock::Create(jl_LLVMContext, "ok");
+    ++EmittedUndefVarErrors;
+    BasicBlock *err = BasicBlock::Create(ctx.builder.getContext(), "err", ctx.f);
+    BasicBlock *ifok = BasicBlock::Create(ctx.builder.getContext(), "ok");
     ctx.builder.CreateCondBr(ok, ifok, err);
     ctx.builder.SetInsertPoint(err);
     ctx.builder.CreateCall(prepare_call(jlundefvarerror_func),
@@ -3531,55 +4056,56 @@ static Value *global_binding_pointer(jl_codectx_t &ctx, jl_module_t *m, jl_sym_t
                                      jl_binding_t **pbnd, bool assign)
 {
     jl_binding_t *b = NULL;
-    if (assign) {
+    if (assign)
         b = jl_get_binding_wr(m, s, 0);
-        assert(b != NULL);
+    else
+        b = jl_get_binding(m, s);
+    if (b == NULL) {
+        // var not found. switch to delayed lookup.
+        Constant *initnul = Constant::getNullValue(ctx.types().T_pjlvalue);
+        GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), ctx.types().T_pjlvalue,
+                false, GlobalVariable::PrivateLinkage, initnul);
+        LoadInst *cachedval = ctx.builder.CreateAlignedLoad(ctx.types().T_pjlvalue, bindinggv, Align(sizeof(void*)));
+        cachedval->setOrdering(AtomicOrdering::Unordered);
+        BasicBlock *have_val = BasicBlock::Create(ctx.builder.getContext(), "found");
+        BasicBlock *not_found = BasicBlock::Create(ctx.builder.getContext(), "notfound");
+        BasicBlock *currentbb = ctx.builder.GetInsertBlock();
+        ctx.builder.CreateCondBr(ctx.builder.CreateICmpNE(cachedval, initnul), have_val, not_found);
+        ctx.f->getBasicBlockList().push_back(not_found);
+        ctx.builder.SetInsertPoint(not_found);
+        Value *bval = ctx.builder.CreateCall(prepare_call(assign ? jlgetbindingwrorerror_func : jlgetbindingorerror_func),
+                { literal_pointer_val(ctx, (jl_value_t*)m),
+                  literal_pointer_val(ctx, (jl_value_t*)s) });
+        ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
+        ctx.builder.CreateBr(have_val);
+        ctx.f->getBasicBlockList().push_back(have_val);
+        ctx.builder.SetInsertPoint(have_val);
+        PHINode *p = ctx.builder.CreatePHI(ctx.types().T_pjlvalue, 2);
+        p->addIncoming(cachedval, currentbb);
+        p->addIncoming(bval, not_found);
+        return p;
+    }
+    if (assign) {
         if (b->owner != m) {
             char *msg;
-            (void)asprintf(&msg, "cannot assign a value to variable %s.%s from module %s",
+            (void)asprintf(&msg, "cannot assign a value to imported variable %s.%s from module %s",
                     jl_symbol_name(b->owner->name), jl_symbol_name(s), jl_symbol_name(m->name));
             emit_error(ctx, msg);
             free(msg);
+            return NULL;
         }
     }
     else {
-        b = jl_get_binding(m, s);
-        if (b == NULL) {
-            // var not found. switch to delayed lookup.
-            Constant *initnul = V_null;
-            GlobalVariable *bindinggv = new GlobalVariable(*ctx.f->getParent(), T_pjlvalue,
-                    false, GlobalVariable::PrivateLinkage, initnul);
-            LoadInst *cachedval = ctx.builder.CreateAlignedLoad(T_pjlvalue, bindinggv, Align(sizeof(void*)));
-            cachedval->setOrdering(AtomicOrdering::Unordered);
-            BasicBlock *have_val = BasicBlock::Create(jl_LLVMContext, "found");
-            BasicBlock *not_found = BasicBlock::Create(jl_LLVMContext, "notfound");
-            BasicBlock *currentbb = ctx.builder.GetInsertBlock();
-            ctx.builder.CreateCondBr(ctx.builder.CreateICmpNE(cachedval, initnul), have_val, not_found);
-            ctx.f->getBasicBlockList().push_back(not_found);
-            ctx.builder.SetInsertPoint(not_found);
-            Value *bval = ctx.builder.CreateCall(prepare_call(jlgetbindingorerror_func),
-                    { literal_pointer_val(ctx, (jl_value_t*)m),
-                      literal_pointer_val(ctx, (jl_value_t*)s) });
-            ctx.builder.CreateAlignedStore(bval, bindinggv, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
-            ctx.builder.CreateBr(have_val);
-            ctx.f->getBasicBlockList().push_back(have_val);
-            ctx.builder.SetInsertPoint(have_val);
-            PHINode *p = ctx.builder.CreatePHI(T_pjlvalue, 2);
-            p->addIncoming(cachedval, currentbb);
-            p->addIncoming(bval, not_found);
-            return julia_binding_gv(ctx, emit_bitcast(ctx, p, T_pprjlvalue));
-        }
         if (b->deprecated)
             cg_bdw(ctx, b);
     }
-    if (pbnd)
-        *pbnd = b;
+    *pbnd = b;
     return julia_binding_gv(ctx, b);
 }
 
 static jl_cgval_t emit_checked_var(jl_codectx_t &ctx, Value *bp, jl_sym_t *name, bool isvol, MDNode *tbaa)
 {
-    LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, Align(sizeof(void*)));
+    LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
     if (isvol)
         v->setVolatile(true);
     v->setOrdering(AtomicOrdering::Unordered);
@@ -3594,15 +4120,15 @@ static jl_cgval_t emit_sparam(jl_codectx_t &ctx, size_t i)
     if (jl_svec_len(ctx.linfo->sparam_vals) > 0) {
         jl_value_t *e = jl_svecref(ctx.linfo->sparam_vals, i);
         if (!jl_is_typevar(e)) {
-            return mark_julia_const(e);
+            return mark_julia_const(ctx, e);
         }
     }
     assert(ctx.spvals_ptr != NULL);
     Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-            T_prjlvalue,
+            ctx.types().T_prjlvalue,
             ctx.spvals_ptr,
             i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
-    Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, Align(sizeof(void*))));
+    Value *sp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
     Value *isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp),
             track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type)));
     jl_unionall_t *sparam = (jl_unionall_t*)ctx.linfo->def.method->sig;
@@ -3618,19 +4144,20 @@ static jl_cgval_t emit_global(jl_codectx_t &ctx, jl_sym_t *sym)
 {
     jl_binding_t *jbp = NULL;
     Value *bp = global_binding_pointer(ctx, ctx.module, sym, &jbp, false);
-    assert(bp != NULL);
+    if (bp == NULL)
+        return jl_cgval_t(ctx.builder.getContext());
     if (jbp && jbp->value != NULL) {
         if (jbp->constp)
-            return mark_julia_const(jbp->value);
+            return mark_julia_const(ctx, jbp->value);
         // double-check that a global variable is actually defined. this
         // can be a problem in parallel when a definition is missing on
         // one machine.
-        LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, Align(sizeof(void*)));
+        LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
         v->setOrdering(AtomicOrdering::Unordered);
-        tbaa_decorate(tbaa_binding, v);
+        tbaa_decorate(ctx.tbaa().tbaa_binding, v);
         return mark_julia_type(ctx, v, true, jl_any_type);
     }
-    return emit_checked_var(ctx, bp, sym, false, tbaa_binding);
+    return emit_checked_var(ctx, bp, sym, false, ctx.tbaa().tbaa_binding);
 }
 
 static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
@@ -3640,21 +4167,21 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
         size_t sl = jl_slot_number(sym) - 1;
         jl_varinfo_t &vi = ctx.slots[sl];
         if (!vi.usedUndef)
-            return mark_julia_const(jl_true);
+            return mark_julia_const(ctx, jl_true);
         if (vi.boxroot == NULL || vi.pTIndex != NULL) {
             assert(vi.defFlag);
-            isnull = ctx.builder.CreateAlignedLoad(T_int1, vi.defFlag, Align(1), vi.isVolatile);
+            isnull = ctx.builder.CreateAlignedLoad(getInt1Ty(ctx.builder.getContext()), vi.defFlag, Align(1), vi.isVolatile);
         }
         if (vi.boxroot != NULL) {
-            Value *boxed = ctx.builder.CreateAlignedLoad(T_prjlvalue, vi.boxroot, Align(sizeof(void*)), vi.isVolatile);
-            Value *box_isnull = ctx.builder.CreateICmpNE(boxed, V_rnull);
+            Value *boxed = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, vi.boxroot, Align(sizeof(void*)), vi.isVolatile);
+            Value *box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
             if (vi.pTIndex) {
                 // value is either boxed in the stack slot, or unboxed in value
                 // as indicated by testing (pTIndex & 0x80)
-                Value *tindex = ctx.builder.CreateAlignedLoad(T_int8, vi.pTIndex, Align(sizeof(void*)), vi.isVolatile);
+                Value *tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(sizeof(void*)), vi.isVolatile);
                 Value *load_unbox = ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)),
-                            ConstantInt::get(T_int8, 0));
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
             }
             else {
@@ -3663,20 +4190,20 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
         }
     }
     else if (jl_is_expr(sym)) {
-        assert(((jl_expr_t*)sym)->head == static_parameter_sym && "malformed isdefined expression");
+        assert(((jl_expr_t*)sym)->head == jl_static_parameter_sym && "malformed isdefined expression");
         size_t i = jl_unbox_long(jl_exprarg(sym, 0)) - 1;
         if (jl_svec_len(ctx.linfo->sparam_vals) > 0) {
             jl_value_t *e = jl_svecref(ctx.linfo->sparam_vals, i);
             if (!jl_is_typevar(e)) {
-                return mark_julia_const(jl_true);
+                return mark_julia_const(ctx, jl_true);
             }
         }
         assert(ctx.spvals_ptr != NULL);
         Value *bp = ctx.builder.CreateConstInBoundsGEP1_32(
-                T_prjlvalue,
+                ctx.types().T_prjlvalue,
                 ctx.spvals_ptr,
                 i + sizeof(jl_svec_t) / sizeof(jl_value_t*));
-        Value *sp = tbaa_decorate(tbaa_const, ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, Align(sizeof(void*))));
+        Value *sp = tbaa_decorate(ctx.tbaa().tbaa_const, ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*))));
         isnull = ctx.builder.CreateICmpNE(emit_typeof(ctx, sp),
             track_pjlvalue(ctx, literal_pointer_val(ctx, (jl_value_t*)jl_tvar_type)));
     }
@@ -3695,19 +4222,20 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
         jl_binding_t *bnd = jl_get_binding(modu, name);
         if (bnd) {
             if (bnd->value != NULL)
-                return mark_julia_const(jl_true);
+                return mark_julia_const(ctx, jl_true);
             Value *bp = julia_binding_gv(ctx, bnd);
-            LoadInst *v = ctx.builder.CreateAlignedLoad(T_prjlvalue, bp, Align(sizeof(void*)));
-            tbaa_decorate(tbaa_binding, v);
+            bp = julia_binding_pvalue(ctx, bp);
+            LoadInst *v = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, bp, Align(sizeof(void*)));
+            tbaa_decorate(ctx.tbaa().tbaa_binding, v);
             v->setOrdering(AtomicOrdering::Unordered);
-            isnull = ctx.builder.CreateICmpNE(v, V_rnull);
+            isnull = ctx.builder.CreateICmpNE(v, Constant::getNullValue(ctx.types().T_prjlvalue));
         }
         else {
             Value *v = ctx.builder.CreateCall(prepare_call(jlboundp_func), {
                     literal_pointer_val(ctx, (jl_value_t*)modu),
                     literal_pointer_val(ctx, (jl_value_t*)name)
                 });
-            isnull = ctx.builder.CreateICmpNE(v, ConstantInt::get(T_int32, 0));
+            isnull = ctx.builder.CreateICmpNE(v, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
         }
     }
     return mark_julia_type(ctx, isnull, false, jl_bool_type);
@@ -3715,13 +4243,13 @@ static jl_cgval_t emit_isdefined(jl_codectx_t &ctx, jl_value_t *sym)
 
 static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *varname, jl_value_t *better_typ=NULL) {
     jl_value_t *typ = better_typ ? better_typ : vi.value.typ;
-    jl_cgval_t v;
+    jl_cgval_t v(ctx.builder.getContext());
     Value *isnull = NULL;
     if (vi.boxroot == NULL || vi.pTIndex != NULL) {
         if ((!vi.isVolatile && vi.isSA) || vi.isArgument || vi.value.constant || !vi.value.V) {
             v = vi.value;
             if (vi.pTIndex)
-                v.TIndex = ctx.builder.CreateAlignedLoad(T_int8, vi.pTIndex, Align(1));
+                v.TIndex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1));
         }
         else {
             // copy value to a non-mutable (non-volatile SSA) location
@@ -3732,55 +4260,39 @@ static jl_cgval_t emit_varinfo(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_sym_t *va
             ssaslot->insertAfter(varslot);
             if (vi.isVolatile) {
                 Value *unbox = ctx.builder.CreateAlignedLoad(ssaslot->getAllocatedType(), varslot,
-#if JL_LLVM_VERSION >= 110000
                         varslot->getAlign(),
-#else
-                        varslot->getAlignment(),
-#endif
                         true);
-                ctx.builder.CreateAlignedStore(unbox, ssaslot,
-#if JL_LLVM_VERSION >= 110000
-                        ssaslot->getAlign()
-#else
-                        ssaslot->getAlignment()
-#endif
-                        );
+                ctx.builder.CreateAlignedStore(unbox, ssaslot, ssaslot->getAlign());
             }
             else {
-                const DataLayout &DL = jl_data_layout;
+                const DataLayout &DL = jl_Module->getDataLayout();
                 uint64_t sz = DL.getTypeStoreSize(T);
-                emit_memcpy(ctx, ssaslot, tbaa_stack, vi.value, sz,
-#if JL_LLVM_VERSION >= 110000
-                        ssaslot->getAlign().value()
-#else
-                        ssaslot->getAlignment()
-#endif
-                        );
+                emit_memcpy(ctx, ssaslot, ctx.tbaa().tbaa_stack, vi.value, sz, ssaslot->getAlign().value());
             }
             Value *tindex = NULL;
             if (vi.pTIndex)
-                tindex = ctx.builder.CreateAlignedLoad(T_int8, vi.pTIndex, Align(1), vi.isVolatile);
-            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, tbaa_stack);
+                tindex = ctx.builder.CreateAlignedLoad(getInt8Ty(ctx.builder.getContext()), vi.pTIndex, Align(1), vi.isVolatile);
+            v = mark_julia_slot(ssaslot, vi.value.typ, tindex, ctx.tbaa(), ctx.tbaa().tbaa_stack);
         }
         if (vi.boxroot == NULL)
             v = update_julia_type(ctx, v, typ);
         if (vi.usedUndef) {
             assert(vi.defFlag);
-            isnull = ctx.builder.CreateAlignedLoad(T_int1, vi.defFlag, Align(1), vi.isVolatile);
+            isnull = ctx.builder.CreateAlignedLoad(getInt1Ty(ctx.builder.getContext()), vi.defFlag, Align(1), vi.isVolatile);
         }
     }
     if (vi.boxroot != NULL) {
-        Instruction *boxed = ctx.builder.CreateAlignedLoad(T_prjlvalue, vi.boxroot, Align(sizeof(void*)), vi.isVolatile);
+        Instruction *boxed = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, vi.boxroot, Align(sizeof(void*)), vi.isVolatile);
         Value *box_isnull = NULL;
         if (vi.usedUndef)
-            box_isnull = ctx.builder.CreateICmpNE(boxed, V_rnull);
+            box_isnull = ctx.builder.CreateICmpNE(boxed, Constant::getNullValue(ctx.types().T_prjlvalue));
         maybe_mark_load_dereferenceable(boxed, vi.usedUndef || vi.pTIndex, typ);
         if (vi.pTIndex) {
             // value is either boxed in the stack slot, or unboxed in value
             // as indicated by testing (pTIndex & 0x80)
             Value *load_unbox = ctx.builder.CreateICmpEQ(
-                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(T_int8, 0x80)),
-                        ConstantInt::get(T_int8, 0));
+                        ctx.builder.CreateAnd(v.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                        ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             if (vi.usedUndef)
                 isnull = ctx.builder.CreateSelect(load_unbox, isnull, box_isnull);
             if (v.V) { // v.V will be null if it is a union of all ghost values
@@ -3845,7 +4357,7 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
                     Type *dest_ty = store_ty->getPointerTo();
                     if (dest_ty != dest->getType())
                         dest = emit_bitcast(ctx, dest, dest_ty);
-                    tbaa_decorate(tbaa_stack, ctx.builder.CreateStore(
+                    tbaa_decorate(ctx.tbaa().tbaa_stack, ctx.builder.CreateStore(
                                       emit_unbox(ctx, store_ty, rval_info, rval_info.typ),
                                       dest,
                                       vi.isVolatile));
@@ -3860,13 +4372,13 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
                 // due to LLVM bugs.
                 // This check should probably mostly catch the relevant situations.
                 if (vi.value.V != rval_info.V) {
-                    Value *copy_bytes = ConstantInt::get(T_int32, jl_datatype_size(vi.value.typ));
-                    emit_memcpy(ctx, vi.value.V, tbaa_stack, rval_info, copy_bytes,
+                    Value *copy_bytes = ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_datatype_size(vi.value.typ));
+                    emit_memcpy(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, copy_bytes,
                                 julia_alignment(rval_info.typ), vi.isVolatile);
                 }
             }
             else {
-                emit_unionmove(ctx, vi.value.V, tbaa_stack, rval_info, isboxed, vi.isVolatile);
+                emit_unionmove(ctx, vi.value.V, ctx.tbaa().tbaa_stack, rval_info, isboxed, vi.isVolatile);
             }
         }
     }
@@ -3878,9 +4390,13 @@ static void emit_vi_assignment_unboxed(jl_codectx_t &ctx, jl_varinfo_t &vi, Valu
 static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
 {
     jl_value_t *ssavalue_types = (jl_value_t*)ctx.source->ssavaluetypes;
-    assert(jl_is_array(ssavalue_types));
+    jl_value_t *phiType = NULL;
+    if (jl_is_array(ssavalue_types)) {
+        phiType = jl_array_ptr_ref(ssavalue_types, idx);
+    } else {
+        phiType = (jl_value_t*)jl_any_type;
+    }
     jl_array_t *edges = (jl_array_t*)jl_fieldref_noalloc(r, 0);
-    jl_value_t *phiType = jl_array_ptr_ref(ssavalue_types, idx);
     BasicBlock *BB = ctx.builder.GetInsertBlock();
     auto InsertPt = BB->getFirstInsertionPt();
     if (phiType == jl_bottom_type) {
@@ -3899,23 +4415,19 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         if (dest) {
             Instruction *phi = dest->clone();
             phi->insertAfter(dest);
-            PHINode *Tindex_phi = PHINode::Create(T_int8, jl_array_len(edges), "tindex_phi");
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
             BB->getInstList().insert(InsertPt, Tindex_phi);
-            PHINode *ptr_phi = PHINode::Create(T_prjlvalue, jl_array_len(edges), "ptr_phi");
+            PHINode *ptr_phi = PHINode::Create(ctx.types().T_prjlvalue, jl_array_len(edges), "ptr_phi");
             BB->getInstList().insert(InsertPt, ptr_phi);
             Value *isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(T_int8, 0x80)),
-                    ConstantInt::get(T_int8, 0));
-#if JL_LLVM_VERSION >= 100000
+                    ctx.builder.CreateAnd(Tindex_phi, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
             ctx.builder.CreateMemCpy(phi, MaybeAlign(min_align), dest, MaybeAlign(0), nbytes, false);
-#else
-            ctx.builder.CreateMemCpy(phi, min_align, dest, 0, nbytes, false);
-#endif
             ctx.builder.CreateLifetimeEnd(dest);
             Value *ptr = ctx.builder.CreateSelect(isboxed,
-                maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), T_pint8),
-                maybe_bitcast(ctx, decay_derived(ctx, phi), T_pint8));
-            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, tbaa_stack); // XXX: this TBAA is wrong for ptr_phi
+                maybe_bitcast(ctx, decay_derived(ctx, ptr_phi), getInt8PtrTy(ctx.builder.getContext())),
+                maybe_bitcast(ctx, decay_derived(ctx, phi), getInt8PtrTy(ctx.builder.getContext())));
+            jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, ctx.tbaa(), ctx.tbaa().tbaa_stack); // XXX: this TBAA is wrong for ptr_phi
             val.Vboxed = ptr_phi;
             ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, r));
             ctx.SAvalues.at(idx) = val;
@@ -3923,9 +4435,9 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
             return;
         }
         else if (allunbox) {
-            PHINode *Tindex_phi = PHINode::Create(T_int8, jl_array_len(edges), "tindex_phi");
+            PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_len(edges), "tindex_phi");
             BB->getInstList().insert(InsertPt, Tindex_phi);
-            jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, tbaa_stack);
+            jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa(), ctx.tbaa().tbaa_stack);
             ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)NULL, r));
             ctx.SAvalues.at(idx) = val;
             ctx.ssavalue_assigned.at(idx) = true;
@@ -3933,34 +4445,28 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         }
     }
     bool isboxed = !deserves_stack(phiType);
-    Type *vtype = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, phiType);
+    Type *vtype = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, phiType);
     // The frontend should really not emit this, but we allow it
     // for convenience.
     if (type_is_ghost(vtype)) {
         assert(jl_is_datatype(phiType) && ((jl_datatype_t*)phiType)->instance);
         // Skip adding it to the PhiNodes list, since we didn't create one.
-        ctx.SAvalues.at(idx) = mark_julia_const(((jl_datatype_t*)phiType)->instance);
+        ctx.SAvalues.at(idx) = mark_julia_const(ctx, ((jl_datatype_t*)phiType)->instance);
         ctx.ssavalue_assigned.at(idx) = true;
         return;
     }
-    jl_cgval_t slot;
+    jl_cgval_t slot(ctx.builder.getContext());
     PHINode *value_phi = NULL;
     if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) {
         // the value will be moved into dest in the predecessor critical block.
         // here it's moved into phi in the successor (from dest)
         dest = emit_static_alloca(ctx, vtype);
         Value *phi = emit_static_alloca(ctx, vtype);
-#if JL_LLVM_VERSION >= 100000
         ctx.builder.CreateMemCpy(phi, MaybeAlign(julia_alignment(phiType)),
              dest, MaybeAlign(0),
              jl_datatype_size(phiType), false);
-#else
-        ctx.builder.CreateMemCpy(phi, julia_alignment(phiType),
-             dest, 0,
-             jl_datatype_size(phiType), false);
-#endif
         ctx.builder.CreateLifetimeEnd(dest);
-        slot = mark_julia_slot(phi, phiType, NULL, tbaa_stack);
+        slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
     }
     else {
         value_phi = PHINode::Create(vtype, jl_array_len(edges), "value_phi");
@@ -3980,10 +4486,13 @@ static void emit_ssaval_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r)
         return emit_phinode_assign(ctx, idx, r);
     }
 
-    jl_cgval_t slot;
+    jl_cgval_t slot(ctx.builder.getContext());
     if (jl_is_phicnode(r)) {
-        jl_varinfo_t &vi = ctx.phic_slots[idx];
-        slot = emit_varinfo(ctx, vi, jl_symbol("phic"));
+        auto it = ctx.phic_slots.find(idx);
+        if (it == ctx.phic_slots.end()) {
+            it = ctx.phic_slots.emplace(idx, jl_varinfo_t(ctx.builder.getContext())).first;
+        }
+        slot = emit_varinfo(ctx, it->second, jl_symbol("phic"));
     } else {
         slot = emit_expr(ctx, r, idx); // slot could be a jl_value_t (unboxed) or jl_value_t* (ispointer)
     }
@@ -4019,13 +4528,13 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (rval_info.TIndex) {
             tindex = rval_info.TIndex;
             if (!vi.boxroot)
-                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x7f));
+                tindex = ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x7f));
         }
         else {
             assert(rval_info.isboxed || rval_info.constant);
             tindex = compute_tindex_unboxed(ctx, rval_info, vi.value.typ);
             if (vi.boxroot)
-                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(T_int8, 0x80));
+                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
             else
                 rval_info.TIndex = tindex;
         }
@@ -4039,10 +4548,10 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
         if (vi.pTIndex && rval_info.TIndex) {
             ctx.builder.CreateStore(rval_info.TIndex, vi.pTIndex, vi.isVolatile);
             isboxed = ctx.builder.CreateICmpNE(
-                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(T_int8, 0x80)),
-                    ConstantInt::get(T_int8, 0));
-            rval = rval_info.Vboxed ? rval_info.Vboxed : V_rnull;
-            assert(rval->getType() == T_prjlvalue);
+                    ctx.builder.CreateAnd(rval_info.TIndex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                    ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
+            rval = rval_info.Vboxed ? rval_info.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
+            assert(rval->getType() == ctx.types().T_prjlvalue);
             assert(!vi.value.constant);
         }
         else {
@@ -4061,39 +4570,38 @@ static void emit_varinfo_assign(jl_codectx_t &ctx, jl_varinfo_t &vi, jl_cgval_t
 static void emit_assignment(jl_codectx_t &ctx, jl_value_t *l, jl_value_t *r, ssize_t ssaval)
 {
     assert(!jl_is_ssavalue(l));
+    jl_cgval_t rval_info = emit_expr(ctx, r, ssaval);
 
-    jl_sym_t *s = NULL;
     jl_binding_t *bnd = NULL;
     Value *bp = NULL;
     if (jl_is_symbol(l))
-        s = (jl_sym_t*)l;
+        bp = global_binding_pointer(ctx, ctx.module, (jl_sym_t*)l, &bnd, true); // now bp != NULL or bnd != NULL
     else if (jl_is_globalref(l))
-        bp = global_binding_pointer(ctx, jl_globalref_mod(l), jl_globalref_name(l), &bnd, true); // now bp != NULL
+        bp = global_binding_pointer(ctx, jl_globalref_mod(l), jl_globalref_name(l), &bnd, true); // now bp != NULL or bnd != NULL
     else
         assert(jl_is_slot(l));
-    if (bp == NULL && s != NULL)
-        bp = global_binding_pointer(ctx, ctx.module, s, &bnd, true);
-    if (bp != NULL) { // it's a global
-        assert(bnd);
-        Value *rval = mark_callee_rooted(ctx, boxed(ctx, emit_expr(ctx, r, ssaval)));
-        ctx.builder.CreateCall(prepare_call(jlcheckassign_func),
-                           { literal_pointer_val(ctx, bnd),
-                             rval });
-        // Global variable. Does not need debug info because the debugger knows about
-        // its memory location.
+    if (bp != NULL || bnd != NULL) { // it is a global
+        if (bp != NULL) {
+            emit_globalset(ctx, bnd, bp, rval_info, AtomicOrdering::Unordered);
+            // Global variable. Does not need debug info because the debugger knows about
+            // its memory location.
+        }
         return;
     }
 
     int sl = jl_slot_number(l) - 1;
     // it's a local variable
     jl_varinfo_t &vi = ctx.slots[sl];
-    jl_cgval_t rval_info = emit_expr(ctx, r, ssaval);
     emit_varinfo_assign(ctx, vi, rval_info, l);
 }
 
 static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
 {
-    jl_varinfo_t &vi = ctx.phic_slots[phic];
+    auto it = ctx.phic_slots.find(phic);
+    if (it == ctx.phic_slots.end()) {
+        it = ctx.phic_slots.emplace(phic, jl_varinfo_t(ctx.builder.getContext())).first;
+    }
+    jl_varinfo_t &vi = it->second;
     // If the val is null, we can ignore the store.
     // The middle end guarantees that the value from this
     // upsilon node is not dynamically observed.
@@ -4112,15 +4620,15 @@ static void emit_upsilonnode(jl_codectx_t &ctx, ssize_t phic, jl_value_t *val)
     if (!val) {
         if (vi.boxroot) {
             // memory optimization: eagerly clear this gc-root now
-            ctx.builder.CreateAlignedStore(V_rnull, vi.boxroot, Align(sizeof(void*)), true);
+            ctx.builder.CreateAlignedStore(Constant::getNullValue(ctx.types().T_prjlvalue), vi.boxroot, Align(sizeof(void*)), true);
         }
         if (vi.pTIndex) {
             // We don't care what the contents of the variable are, but it
             // does need to satisfy the union invariants (i.e. inbounds
             // tindex).
             ctx.builder.CreateAlignedStore(
-                vi.boxroot ? ConstantInt::get(T_int8, 0x80) :
-                             ConstantInt::get(T_int8, 0x01),
+                vi.boxroot ? ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80) :
+                             ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x01),
                 vi.pTIndex, Align(1), true);
         }
         else if (vi.value.V && !vi.value.constant && vi.value.typ != jl_bottom_type) {
@@ -4149,16 +4657,16 @@ static Value *emit_condition(jl_codectx_t &ctx, const jl_cgval_t &condV, const s
         emit_typecheck(ctx, condV, (jl_value_t*)jl_bool_type, msg);
     }
     if (isbool) {
-        Value *cond = emit_unbox(ctx, T_int8, condV, (jl_value_t*)jl_bool_type);
-        assert(cond->getType() == T_int8);
-        return ctx.builder.CreateXor(ctx.builder.CreateTrunc(cond, T_int1), ConstantInt::get(T_int1, 1));
+        Value *cond = emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), condV, (jl_value_t*)jl_bool_type);
+        assert(cond->getType() == getInt8Ty(ctx.builder.getContext()));
+        return ctx.builder.CreateXor(ctx.builder.CreateTrunc(cond, getInt1Ty(ctx.builder.getContext())), ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 1));
     }
     if (condV.isboxed) {
         return ctx.builder.CreateICmpEQ(boxed(ctx, condV),
             track_pjlvalue(ctx, literal_pointer_val(ctx, jl_false)));
     }
     // not a boolean
-    return ConstantInt::get(T_int1, 0); // TODO: replace with Undef
+    return ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0); // TODO: replace with Undef
 }
 
 static Value *emit_condition(jl_codectx_t &ctx, jl_value_t *cond, const std::string &msg)
@@ -4188,7 +4696,7 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
             // create a new uninitialized variable
             Value *lv = vi.boxroot;
             if (lv != NULL)
-                ctx.builder.CreateStore(V_rnull, lv);
+                ctx.builder.CreateStore(Constant::getNullValue(ctx.types().T_prjlvalue), lv);
             if (lv == NULL || vi.pTIndex != NULL)
                 store_def_flag(ctx, vi, false);
         }
@@ -4202,28 +4710,29 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     jl_expr_t *ex = (jl_expr_t*)expr;
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
     jl_sym_t *head = ex->head;
-    if (head == meta_sym || head == inbounds_sym || head == coverageeffect_sym
-            || head == aliasscope_sym || head == popaliasscope_sym) {
+    if (head == jl_meta_sym || head == jl_inbounds_sym || head == jl_coverageeffect_sym
+            || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         // some expression types are metadata and can be ignored
         // in statement position
         return;
     }
-    else if (head == leave_sym) {
+    else if (head == jl_leave_sym) {
         assert(jl_is_long(args[0]));
         ctx.builder.CreateCall(prepare_call(jlleave_func),
-                           ConstantInt::get(T_int32, jl_unbox_long(args[0])));
+                           ConstantInt::get(getInt32Ty(ctx.builder.getContext()), jl_unbox_long(args[0])));
     }
-    else if (head == pop_exception_sym) {
+    else if (head == jl_pop_exception_sym) {
         jl_cgval_t excstack_state = emit_expr(ctx, jl_exprarg(expr, 0));
-        assert(excstack_state.V && excstack_state.V->getType() == T_size);
+        assert(excstack_state.V && excstack_state.V->getType() == getSizeTy(ctx.builder.getContext()));
         ctx.builder.CreateCall(prepare_call(jl_restore_excstack_func), excstack_state.V);
         return;
     }
     else {
-        if (!jl_is_method(ctx.linfo->def.method)) {
+        if (!jl_is_method(ctx.linfo->def.method) && !ctx.is_opaque_closure) {
             // TODO: inference is invalid if this has any effect (which it often does)
-            Value *world = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
-            // TODO: world->setOrdering(AtomicOrdering::Monotonic);
+            LoadInst *world = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
+                prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
+            world->setOrdering(AtomicOrdering::Acquire);
             ctx.builder.CreateAlignedStore(world, ctx.world_age_field, Align(sizeof(size_t)));
         }
         assert(ssaval_result != -1);
@@ -4231,6 +4740,71 @@ static void emit_stmtpos(jl_codectx_t &ctx, jl_value_t *expr, int ssaval_result)
     }
 }
 
+static std::pair<Function*, Function*> get_oc_function(jl_codectx_t &ctx, jl_method_t *closure_method, jl_tupletype_t *env_t, jl_tupletype_t *argt_typ, jl_value_t *rettype)
+{
+    jl_svec_t *sig_args = NULL;
+    jl_value_t *sigtype = NULL;
+    jl_code_info_t *ir = NULL;
+    JL_GC_PUSH3(&sig_args, &sigtype, &ir);
+
+    size_t nsig = 1 + jl_svec_len(argt_typ->parameters);
+    sig_args = jl_alloc_svec_uninit(nsig);
+    jl_svecset(sig_args, 0, env_t);
+    for (size_t i = 0; i < jl_svec_len(argt_typ->parameters); ++i) {
+        jl_svecset(sig_args, 1+i, jl_svecref(argt_typ->parameters, i));
+    }
+    sigtype = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
+
+    jl_method_instance_t *mi = jl_specializations_get_linfo(closure_method, sigtype, jl_emptysvec);
+    jl_code_instance_t *ci = (jl_code_instance_t*)jl_rettype_inferred(mi, ctx.world, ctx.world);
+
+    if (ci == NULL || (jl_value_t*)ci == jl_nothing || ci->inferred == NULL || ci->inferred == jl_nothing) {
+        JL_GC_POP();
+        return std::make_pair((Function*)NULL, (Function*)NULL);
+    }
+    ++EmittedOpaqueClosureFunctions;
+
+    ir = jl_uncompress_ir(closure_method, ci, (jl_array_t*)ci->inferred);
+
+    // TODO: Emit this inline and outline it late using LLVM's coroutine support.
+    orc::ThreadSafeModule closure_m = jl_create_llvm_module(
+            name_from_method_instance(mi), ctx.emission_context.tsctx,
+            ctx.emission_context.imaging,
+            jl_Module->getDataLayout(), Triple(jl_Module->getTargetTriple()));
+    jl_llvm_functions_t closure_decls = emit_function(closure_m, mi, ir, rettype, ctx.emission_context);
+
+    assert(closure_decls.functionObject != "jl_fptr_sparam");
+    bool isspecsig = closure_decls.functionObject != "jl_fptr_args";
+
+    Function *F = NULL;
+    std::string fname = isspecsig ?
+        closure_decls.functionObject :
+        closure_decls.specFunctionObject;
+    if (GlobalValue *V = jl_Module->getNamedValue(fname)) {
+        F = cast<Function>(V);
+    } else {
+        F = Function::Create(get_func_sig(ctx.builder.getContext()),
+                             Function::ExternalLinkage,
+                             fname, jl_Module);
+        F->setAttributes(get_func_attrs(ctx.builder.getContext()));
+    }
+    Function *specF = NULL;
+    if (!isspecsig) {
+        specF = F;
+    } else {
+        //emission context holds context lock so can get module
+        specF = closure_m.getModuleUnlocked()->getFunction(closure_decls.specFunctionObject);
+        if (specF) {
+            jl_returninfo_t returninfo = get_specsig_function(ctx, jl_Module,
+                closure_decls.specFunctionObject, sigtype, rettype, true);
+            specF = returninfo.decl;
+        }
+    }
+    ctx.oc_modules.push_back(std::move(closure_m));
+    JL_GC_POP();
+    return std::make_pair(F, specF);
+}
+
 // `expr` is not clobbered in JL_TRY
 JL_GCC_IGNORE_START("-Wclobbered")
 static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
@@ -4247,14 +4821,14 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         assert(idx >= 0);
         if (!ctx.ssavalue_assigned.at(idx)) {
             ctx.ssavalue_assigned.at(idx) = true; // (assignment, not comparison test)
-            return jl_cgval_t(); // dead code branch
+            return jl_cgval_t(ctx.builder.getContext()); // dead code branch
         }
         else {
             return ctx.SAvalues.at(idx); // at this point, SAvalues[idx] actually contains the SAvalue
         }
     }
     if (jl_is_globalref(expr)) {
-        return emit_globalref(ctx, jl_globalref_mod(expr), jl_globalref_name(expr));
+        return emit_globalref(ctx, jl_globalref_mod(expr), jl_globalref_name(expr), AtomicOrdering::Unordered);
     }
     if (jl_is_linenode(expr)) {
         jl_error("LineNumberNode in value position");
@@ -4297,37 +4871,46 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         if (needroot && jl_is_method(ctx.linfo->def.method)) { // toplevel exprs and some integers are already rooted
             jl_add_method_root(ctx, expr);
         }
-        return mark_julia_const(expr);
+        return mark_julia_const(ctx, expr);
     }
 
     jl_expr_t *ex = (jl_expr_t*)expr;
     jl_value_t **args = (jl_value_t**)jl_array_data(ex->args);
+    size_t nargs = jl_array_len(ex->args);
     jl_sym_t *head = ex->head;
     // this is object-disoriented.
     // however, this is a good way to do it because it should *not* be easy
     // to add new node types.
-    if (head == isdefined_sym) {
+    if (head == jl_isdefined_sym) {
+        assert(nargs == 1);
         return emit_isdefined(ctx, args[0]);
     }
-    else if (head == throw_undef_if_not_sym) {
+    else if (head == jl_throw_undef_if_not_sym) {
+        assert(nargs == 2);
         jl_sym_t *var = (jl_sym_t*)args[0];
-        Value *cond = ctx.builder.CreateTrunc(emit_unbox(ctx, T_int8, emit_expr(ctx, args[1]), (jl_value_t*)jl_bool_type), T_int1);
-        if (var == getfield_undefref_sym) {
+        Value *cond = ctx.builder.CreateTrunc(emit_unbox(ctx, getInt8Ty(ctx.builder.getContext()), emit_expr(ctx, args[1]), (jl_value_t*)jl_bool_type), getInt1Ty(ctx.builder.getContext()));
+        if (var == jl_getfield_undefref_sym) {
             raise_exception_unless(ctx, cond,
                 literal_pointer_val(ctx, jl_undefref_exception));
         }
         else {
             undef_var_error_ifnot(ctx, cond, var);
         }
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
     }
-    else if (head == invoke_sym) {
+    else if (head == jl_invoke_sym) {
         assert(ssaval >= 0);
         jl_value_t *expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type :
             jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
         return emit_invoke(ctx, ex, expr_t);
     }
-    else if (head == call_sym) {
+    else if (head == jl_invoke_modify_sym) {
+        assert(ssaval >= 0);
+        jl_value_t *expr_t = jl_is_long(ctx.source->ssavaluetypes) ? (jl_value_t*)jl_any_type :
+            jl_array_ptr_ref(ctx.source->ssavaluetypes, ssaval);
+        return emit_invoke_modify(ctx, ex, expr_t);
+    }
+    else if (head == jl_call_sym) {
         jl_value_t *expr_t;
         if (ssaval < 0)
             // TODO: this case is needed for the call to emit_expr in emit_llvmcall
@@ -4343,80 +4926,94 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         }
         return res;
     }
-    else if (head == foreigncall_sym) {
+    else if (head == jl_foreigncall_sym) {
         return emit_ccall(ctx, args, jl_array_dim0(ex->args));
     }
-    else if (head == cfunction_sym) {
+    else if (head == jl_cfunction_sym) {
+        assert(nargs == 5);
         jl_cgval_t fexpr_rt = emit_expr(ctx, args[1]);
         return emit_cfunction(ctx, args[0], fexpr_rt, args[2], (jl_svec_t*)args[3]);
     }
-    else if (head == assign_sym) {
+    else if (head == jl_assign_sym) {
+        assert(nargs == 2);
         emit_assignment(ctx, args[0], args[1], ssaval);
-        return ghostValue(jl_nothing_type);
+        return ghostValue(ctx, jl_nothing_type);
     }
-    else if (head == static_parameter_sym) {
+    else if (head == jl_static_parameter_sym) {
+        assert(nargs == 1);
         return emit_sparam(ctx, jl_unbox_long(args[0]) - 1);
     }
-    else if (head == method_sym) {
-        jl_value_t *mn = args[0];
-        assert(jl_expr_nargs(ex) != 1 || jl_is_symbol(mn) || jl_is_slot(mn));
+    else if (head == jl_method_sym) {
+        if (nargs == 1) {
+            jl_value_t *mn = args[0];
+            assert(jl_is_symbol(mn) || jl_is_slot(mn));
 
-        Value *bp = NULL, *name, *bp_owner = V_null;
-        jl_binding_t *bnd = NULL;
-        bool issym = jl_is_symbol(mn);
-        bool isglobalref = !issym && jl_is_globalref(mn);
-        jl_module_t *mod = ctx.module;
-        if (issym || isglobalref) {
-            if (isglobalref) {
-                mod = jl_globalref_mod(mn);
-                mn = (jl_value_t*)jl_globalref_name(mn);
-            }
-            JL_TRY {
-                if (jl_symbol_name((jl_sym_t*)mn)[0] == '@')
-                    jl_errorf("macro definition not allowed inside a local scope");
-                name = literal_pointer_val(ctx, mn);
-                bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
-            }
-            JL_CATCH {
-                jl_value_t *e = jl_current_exception();
-                // errors. boo. root it somehow :(
-                bnd = jl_get_binding_wr(ctx.module, (jl_sym_t*)jl_gensym(), 1);
-                bnd->value = e;
-                bnd->constp = 1;
-                raise_exception(ctx, literal_pointer_val(ctx, e));
-                return ghostValue(jl_nothing_type);
-            }
-            bp = julia_binding_gv(ctx, bnd);
-            bp_owner = literal_pointer_val(ctx, (jl_value_t*)mod);
-        }
-        else if (jl_is_slot(mn) || jl_is_argument(mn)) {
-            int sl = jl_slot_number(mn)-1;
-            jl_varinfo_t &vi = ctx.slots[sl];
-            bp = vi.boxroot;
-            name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
-        }
-        if (bp) {
-            Value *mdargs[5] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp,
-                                 bp_owner, literal_pointer_val(ctx, bnd) };
-            jl_cgval_t gf = mark_julia_type(
-                    ctx,
-                    ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
-                    true,
-                    jl_function_type);
-            if (jl_expr_nargs(ex) == 1)
+            Value *bp = NULL, *name, *bp_owner = Constant::getNullValue(ctx.types().T_pjlvalue);
+            jl_binding_t *bnd = NULL;
+            bool issym = jl_is_symbol(mn);
+            bool isglobalref = !issym && jl_is_globalref(mn);
+            jl_module_t *mod = ctx.module;
+            if (issym || isglobalref) {
+                if (isglobalref) {
+                    mod = jl_globalref_mod(mn);
+                    mn = (jl_value_t*)jl_globalref_name(mn);
+                }
+                JL_TRY {
+                    if (jl_symbol_name((jl_sym_t*)mn)[0] == '@')
+                        jl_errorf("macro definition not allowed inside a local scope");
+                    name = literal_pointer_val(ctx, mn);
+                    bnd = jl_get_binding_for_method_def(mod, (jl_sym_t*)mn);
+                }
+                JL_CATCH {
+                    jl_value_t *e = jl_current_exception();
+                    // errors. boo. root it somehow :(
+                    bnd = jl_get_binding_wr(ctx.module, (jl_sym_t*)jl_gensym(), 1);
+                    bnd->value = e;
+                    bnd->constp = 1;
+                    raise_exception(ctx, literal_pointer_val(ctx, e));
+                    return ghostValue(ctx, jl_nothing_type);
+                }
+                bp = julia_binding_gv(ctx, bnd);
+                bp = julia_binding_pvalue(ctx, bp);
+                bp_owner = literal_pointer_val(ctx, (jl_value_t*)mod);
+            }
+            else if (jl_is_slot(mn) || jl_is_argument(mn)) {
+                int sl = jl_slot_number(mn)-1;
+                jl_varinfo_t &vi = ctx.slots[sl];
+                bp = vi.boxroot;
+                name = literal_pointer_val(ctx, (jl_value_t*)slot_symbol(ctx, sl));
+            }
+            if (bp) {
+                Value *mdargs[5] = { name, literal_pointer_val(ctx, (jl_value_t*)mod), bp,
+                                    bp_owner, literal_pointer_val(ctx, bnd) };
+                jl_cgval_t gf = mark_julia_type(
+                        ctx,
+                        ctx.builder.CreateCall(prepare_call(jlgenericfunction_func), makeArrayRef(mdargs)),
+                        true,
+                        jl_function_type);
                 return gf;
+            }
+            emit_error(ctx, "method: invalid declaration");
+            return jl_cgval_t(ctx.builder.getContext());
         }
+        assert(nargs == 3);
         Value *a1 = boxed(ctx, emit_expr(ctx, args[1]));
         Value *a2 = boxed(ctx, emit_expr(ctx, args[2]));
-        Value *mdargs[3] = {
+        Value *mdargs[4] = {
             /*argdata*/a1,
+            ConstantPointerNull::get(cast<PointerType>(ctx.types().T_prjlvalue)),
             /*code*/a2,
             /*module*/literal_pointer_val(ctx, (jl_value_t*)ctx.module)
         };
-        ctx.builder.CreateCall(prepare_call(jlmethod_func), makeArrayRef(mdargs));
-        return ghostValue(jl_nothing_type);
-    }
-    else if (head == const_sym) {
+        jl_cgval_t meth = mark_julia_type(
+            ctx,
+            ctx.builder.CreateCall(prepare_call(jlmethod_func), makeArrayRef(mdargs)),
+            true,
+            jl_method_type);
+        return meth;
+    }
+    else if (head == jl_const_sym) {
+        assert(nargs == 1);
         jl_sym_t *sym = (jl_sym_t*)args[0];
         jl_module_t *mod = ctx.module;
         if (jl_is_globalref(sym)) {
@@ -4425,13 +5022,13 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         }
         if (jl_is_symbol(sym)) {
             jl_binding_t *bnd = NULL;
-            (void)global_binding_pointer(ctx, mod, sym, &bnd, true); assert(bnd);
-            ctx.builder.CreateCall(prepare_call(jldeclareconst_func),
-                               literal_pointer_val(ctx, bnd));
+            Value *bp = global_binding_pointer(ctx, mod, sym, &bnd, true);
+            if (bp)
+                ctx.builder.CreateCall(prepare_call(jldeclareconst_func), bp);
         }
     }
-    else if (head == new_sym) {
-        size_t nargs = jl_array_len(ex->args);
+    else if (head == jl_new_sym) {
+        assert(nargs > 0);
         jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
@@ -4448,8 +5045,9 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
-    else if (head == splatnew_sym) {
-        jl_cgval_t argv[2];
+    else if (head == jl_splatnew_sym) {
+        jl_cgval_t argv[2] = {jl_cgval_t(ctx.builder.getContext()), jl_cgval_t(ctx.builder.getContext())};
+        assert(nargs == 2);
         argv[0] = emit_expr(ctx, args[0]);
         argv[1] = emit_expr(ctx, args[1]);
         Value *typ = boxed(ctx, argv[0]);
@@ -4459,12 +5057,90 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
         // it to the inferred type.
         return mark_julia_type(ctx, val, true, (jl_value_t*)jl_any_type);
     }
-    else if (head == exc_sym) {
+    else if (head == jl_new_opaque_closure_sym) {
+        assert(nargs >= 4 && "Not enough arguments in new_opaque_closure");
+        SmallVector<jl_cgval_t, 4> argv(nargs, jl_cgval_t(ctx.builder.getContext()));
+        for (size_t i = 0; i < nargs; ++i) {
+            argv[i] = emit_expr(ctx, args[i]);
+        }
+        const jl_cgval_t &argt = argv[0];
+        const jl_cgval_t &lb = argv[1];
+        const jl_cgval_t &ub = argv[2];
+        const jl_cgval_t &source = argv[3];
+        if (source.constant == NULL) {
+            // For now, we require non-constant source to be handled by using
+            // eval. This should probably be a verifier error and an abort here.
+            emit_error(ctx, "(internal error) invalid IR: opaque closure source be constant");
+            return jl_cgval_t(ctx.builder.getContext());
+        }
+        bool can_optimize = argt.constant != NULL && lb.constant != NULL && ub.constant != NULL &&
+            jl_is_tuple_type(argt.constant) &&
+            jl_is_type(lb.constant) && jl_is_type(ub.constant) && jl_is_method(source.constant) &&
+            ((jl_method_t*)source.constant)->nargs > 0 &&
+            jl_is_valid_oc_argtype((jl_tupletype_t*)argt.constant, (jl_method_t*)source.constant);
+
+        if (can_optimize) {
+            jl_value_t *closure_t = NULL;
+            jl_tupletype_t *env_t = NULL;
+            JL_GC_PUSH2(&closure_t, &env_t);
+
+            jl_value_t **env_component_ts = (jl_value_t**)alloca(sizeof(jl_value_t*) * (nargs-4));
+            for (size_t i = 0; i < nargs - 4; ++i) {
+                env_component_ts[i] = argv[4+i].typ;
+            }
+
+            env_t = jl_apply_tuple_type_v(env_component_ts, nargs-4);
+            // we need to know the full env type to look up the right specialization
+            if (jl_is_concrete_type((jl_value_t*)env_t)) {
+                jl_tupletype_t *argt_typ = (jl_tupletype_t*)argt.constant;
+                Function *F, *specF;
+                std::tie(F, specF) = get_oc_function(ctx, (jl_method_t*)source.constant, env_t, argt_typ, ub.constant);
+                if (F) {
+                    jl_cgval_t jlcall_ptr = mark_julia_type(ctx, F, false, jl_voidpointer_type);
+                    jl_cgval_t world_age = mark_julia_type(ctx,
+                                      tbaa_decorate(ctx.tbaa().tbaa_gcframe,
+                                      ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ctx.world_age_field, Align(sizeof(size_t)))),
+                        false,
+                        jl_long_type);
+                    jl_cgval_t fptr(ctx.builder.getContext());
+                    if (specF)
+                        fptr = mark_julia_type(ctx, specF, false, jl_voidpointer_type);
+                    else
+                        fptr = mark_julia_type(ctx, (llvm::Value*)Constant::getNullValue(getSizeTy(ctx.builder.getContext())), false, jl_voidpointer_type);
+
+                    // TODO: Inline the env at the end of the opaque closure and generate a descriptor for GC
+                    jl_cgval_t env = emit_new_struct(ctx, (jl_value_t*)env_t, nargs-4, &argv.data()[4]);
+
+                    jl_cgval_t closure_fields[5] = {
+                        env,
+                        world_age,
+                        source,
+                        jlcall_ptr,
+                        fptr
+                    };
+
+                    closure_t = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt_typ, ub.constant);
+                    jl_cgval_t ret = emit_new_struct(ctx, closure_t, 5, closure_fields);
+
+                    JL_GC_POP();
+                    return ret;
+                }
+            }
+            JL_GC_POP();
+        }
+
+        return mark_julia_type(ctx,
+                emit_jlcall(ctx, jl_new_opaque_closure_jlcall_func, Constant::getNullValue(ctx.types().T_prjlvalue), argv.data(), nargs, JLCALL_F_CC),
+                true, jl_any_type);
+    }
+    else if (head == jl_exc_sym) {
+        assert(nargs == 0);
         return mark_julia_type(ctx,
                 ctx.builder.CreateCall(prepare_call(jl_current_exception_func)),
                 true, jl_any_type);
     }
-    else if (head == copyast_sym) {
+    else if (head == jl_copyast_sym) {
+        assert(nargs == 1);
         jl_cgval_t ast = emit_expr(ctx, args[0]);
         if (ast.typ != (jl_value_t*)jl_expr_type && ast.typ != (jl_value_t*)jl_any_type) {
             // elide call to jl_copy_ast when possible
@@ -4474,30 +5150,29 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                 ctx.builder.CreateCall(prepare_call(jlcopyast_func),
                                        boxed(ctx, ast)), true, jl_expr_type);
     }
-    else if (head == loopinfo_sym) {
+    else if (head == jl_loopinfo_sym) {
         // parse Expr(:loopinfo, "julia.simdloop", ("llvm.loop.vectorize.width", 4))
         SmallVector<Metadata *, 8> MDs;
-        for (int i = 0, ie = jl_expr_nargs(ex); i < ie; ++i) {
-            Metadata *MD = to_md_tree(args[i]);
+        for (int i = 0, ie = nargs; i < ie; ++i) {
+            Metadata *MD = to_md_tree(args[i], ctx.builder.getContext());
             if (MD)
                 MDs.push_back(MD);
         }
 
-        MDNode* MD = MDNode::get(jl_LLVMContext, MDs);
+        MDNode* MD = MDNode::get(ctx.builder.getContext(), MDs);
         CallInst *I = ctx.builder.CreateCall(prepare_call(jl_loopinfo_marker_func));
         I->setMetadata("julia.loopinfo", MD);
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
-    else if (head == leave_sym || head == coverageeffect_sym
-            || head == pop_exception_sym || head == enter_sym || head == inbounds_sym
-            || head == aliasscope_sym || head == popaliasscope_sym) {
+    else if (head == jl_leave_sym || head == jl_coverageeffect_sym
+            || head == jl_pop_exception_sym || head == jl_enter_sym || head == jl_inbounds_sym
+            || head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         jl_errorf("Expr(:%s) in value position", jl_symbol_name(head));
     }
-    else if (head == boundscheck_sym) {
-        return mark_julia_const(bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
+    else if (head == jl_boundscheck_sym) {
+        return mark_julia_const(ctx, bounds_check_enabled(ctx, jl_true) ? jl_true : jl_false);
     }
-    else if (head == gc_preserve_begin_sym) {
-        size_t nargs = jl_array_len(ex->args);
+    else if (head == jl_gc_preserve_begin_sym) {
         jl_cgval_t *argv = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * nargs);
         for (size_t i = 0; i < nargs; ++i) {
             argv[i] = emit_expr(ctx, args[i]);
@@ -4510,18 +5185,18 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
             if (ai.isboxed) {
                 vals.push_back(ai.Vboxed);
             }
-            else if (!jl_is_pointerfree(ai.typ)) {
+            else if (jl_is_concrete_immutable(ai.typ) && !jl_is_pointerfree(ai.typ)) {
                 Type *at = julia_type_to_llvm(ctx, ai.typ);
                 vals.push_back(emit_unbox(ctx, at, ai, ai.typ));
             }
         }
         Value *token = vals.empty()
-            ? (Value*)ConstantTokenNone::get(jl_LLVMContext)
+            ? (Value*)ConstantTokenNone::get(ctx.builder.getContext())
             : ctx.builder.CreateCall(prepare_call(gc_preserve_begin_func), vals);
-        jl_cgval_t tok(token, NULL, false, (jl_value_t*)jl_nothing_type, NULL);
+        jl_cgval_t tok(token, NULL, false, (jl_value_t*)jl_nothing_type, NULL, ctx.tbaa());
         return tok;
     }
-    else if (head == gc_preserve_end_sym) {
+    else if (head == jl_gc_preserve_end_sym) {
         // We only support ssa values as the argument. Everything else will
         // fall back to the default behavior of preserving the argument value
         // until the end of the scope, which is correct, but not optimal.
@@ -4544,12 +5219,12 @@ static jl_cgval_t emit_expr(jl_codectx_t &ctx, jl_value_t *expr, ssize_t ssaval)
                 literal_pointer_val(ctx, expr)
             };
             ctx.builder.CreateCall(prepare_call(jltopeval_func), args);
-            return ghostValue(jl_nothing_type);
+            return ghostValue(ctx, jl_nothing_type);
         }
         jl_errorf("unsupported or misplaced expression \"%s\" in function %s",
                   jl_symbol_name(head), ctx.name);
     }
-    return jl_cgval_t();
+    return jl_cgval_t(ctx.builder.getContext());
 }
 JL_GCC_IGNORE_STOP
 
@@ -4559,45 +5234,69 @@ JL_GCC_IGNORE_STOP
 static void allocate_gc_frame(jl_codectx_t &ctx, BasicBlock *b0)
 {
     // TODO: requires the runtime, but is generated unconditionally
-
     // allocate a placeholder gc instruction
-    ctx.ptlsStates = ctx.builder.CreateCall(prepare_call(jltls_states_func));
-    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void*);
-    ctx.signalPage = emit_nthptr_recast(ctx, ctx.ptlsStates, nthfield, tbaa_const,
-                                        PointerType::get(T_psize, 0));
+    ctx.pgcstack = ctx.builder.CreateCall(prepare_call(jlpgcstack_func));
+}
+
+static Value *get_current_task(jl_codectx_t &ctx)
+{
+    const int ptls_offset = offsetof(jl_task_t, gcstack);
+    return ctx.builder.CreateInBoundsGEP(
+        ctx.types().T_pjlvalue, emit_bitcast(ctx, ctx.pgcstack, ctx.types().T_ppjlvalue),
+        ConstantInt::get(getSizeTy(ctx.builder.getContext()), -(ptls_offset / sizeof(void *))),
+        "current_task");
+}
+
+// Get PTLS through current task.
+static Value *get_current_ptls(jl_codectx_t &ctx)
+{
+    return get_current_ptls_from_task(ctx.builder, get_current_task(ctx), ctx.tbaa().tbaa_gcframe);
 }
 
+// Store world age at the entry block of the function. This function should be
+// called right after `allocate_gc_frame` and there should be no context switch.
 static void emit_last_age_field(jl_codectx_t &ctx)
 {
+    auto ptls = get_current_task(ctx);
+    assert(ctx.builder.GetInsertBlock() == ctx.pgcstack->getParent());
     ctx.world_age_field = ctx.builder.CreateInBoundsGEP(
-            T_size,
-            ctx.builder.CreateBitCast(ctx.ptlsStates, T_psize),
-            ConstantInt::get(T_size, offsetof(jl_tls_states_t, world_age) / sizeof(size_t)));
+            getSizeTy(ctx.builder.getContext()),
+            ctx.builder.CreateBitCast(ptls, getSizePtrTy(ctx.builder.getContext())),
+            ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_task_t, world_age) / sizeof(size_t)),
+            "world_age");
+}
+
+// Get signal page through current task.
+static Value *get_current_signal_page(jl_codectx_t &ctx)
+{
+    // return ctx.builder.CreateCall(prepare_call(reuse_signal_page_func));
+    auto ptls = get_current_ptls(ctx);
+    int nthfield = offsetof(jl_tls_states_t, safepoint) / sizeof(void *);
+    return emit_nthptr_recast(ctx, ptls, nthfield, ctx.tbaa().tbaa_const, getSizePtrTy(ctx.builder.getContext()));
 }
 
 static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_codegen_params_t &params)
 {
-    jl_codectx_t ctx(jl_LLVMContext, params);
+    ++EmittedToJLInvokes;
+    jl_codectx_t ctx(M->getContext(), params);
     std::string name;
-    raw_string_ostream(name) << "tojlinvoke" << globalUnique++;
-    Function *f = Function::Create(jl_func_sig,
-            GlobalVariable::PrivateLinkage,
+    raw_string_ostream(name) << "tojlinvoke" << globalUniqueGeneratedNames++;
+    Function *f = Function::Create(ctx.types().T_jlfunc,
+            GlobalVariable::InternalLinkage,
             name, M);
     jl_init_function(f);
-    f->addFnAttr(Thunk);
+    f->addFnAttr(Attribute::get(M->getContext(), "thunk"));
     //f->setAlwaysInline();
     ctx.f = f; // for jl_Module
-    BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", f);
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f);
     ctx.builder.SetInsertPoint(b0);
     Function *theFunc;
     Value *theFarg;
-    if (codeinst->invoke != NULL) {
-        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->invoke, codeinst);
-        theFunc = cast<Function>(M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(jl_LLVMContext))
-#if JL_LLVM_VERSION >= 90000
-                .getCallee()
-#endif
-                );
+    auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+    if (params.cache && invoke != NULL) {
+        StringRef theFptrName = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)invoke, codeinst);
+        theFunc = cast<Function>(
+            M->getOrInsertFunction(theFptrName, jlinvoke_func->_type(ctx.builder.getContext())).getCallee());
         theFarg = literal_pointer_val(ctx, (jl_value_t*)codeinst);
     }
     else {
@@ -4612,6 +5311,14 @@ static Function *emit_tojlinvoke(jl_code_instance_t *codeinst, Module *M, jl_cod
     return f;
 }
 
+static Type *get_returnroots_type(jl_codectx_t &ctx, unsigned rootcount) {
+    return ArrayType::get(ctx.types().T_prjlvalue, rootcount);
+}
+
+static Type *get_unionbytes_type(LLVMContext &C, unsigned unionbytes) {
+    return ArrayType::get(getInt8Ty(C), unionbytes);
+}
+
 static void emit_cfunc_invalidate(
         Function *gf_thunk, jl_returninfo_t::CallingConv cc, unsigned return_roots,
         jl_value_t *calltype, jl_value_t *rettype,
@@ -4619,10 +5326,11 @@ static void emit_cfunc_invalidate(
         jl_codegen_params_t &params,
         Function *target)
 {
-    jl_codectx_t ctx(jl_LLVMContext, params);
+    ++EmittedCFuncInvalidates;
+    jl_codectx_t ctx(gf_thunk->getParent()->getContext(), params);
     ctx.f = gf_thunk;
 
-    BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", gf_thunk);
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", gf_thunk);
     ctx.builder.SetInsertPoint(b0);
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
@@ -4637,20 +5345,20 @@ static void emit_cfunc_invalidate(
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_nth_slot_type(calltype, i);
         bool isboxed = deserves_argbox(jt);
-        Type *et = isboxed ?  T_prjlvalue : julia_type_to_llvm(ctx, jt);
+        Type *et = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         if (is_uniquerep_Type(jt)) {
-            myargs[i] = mark_julia_const(jl_tparam0(jt));
+            myargs[i] = mark_julia_const(ctx, jl_tparam0(jt));
         }
         else if (type_is_ghost(et)) {
             assert(jl_is_datatype(jt) && ((jl_datatype_t*)jt)->instance);
-            myargs[i] = mark_julia_const(((jl_datatype_t*)jt)->instance);
+            myargs[i] = mark_julia_const(ctx, ((jl_datatype_t*)jt)->instance);
         }
         else {
             Value *arg_v = &*AI;
             ++AI;
             Type *at = arg_v->getType();
             if (!isboxed && et->isAggregateType()) {
-                myargs[i] = mark_julia_slot(arg_v, jt, NULL, tbaa_const);
+                myargs[i] = mark_julia_slot(arg_v, jt, NULL, ctx.tbaa(), ctx.tbaa().tbaa_const);
             }
             else {
                 assert(at == et);
@@ -4677,13 +5385,17 @@ static void emit_cfunc_invalidate(
         }
         else {
             gf_ret = emit_bitcast(ctx, gf_ret, gfrt->getPointerTo());
-            ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gf_ret, Align(julia_alignment(rettype))));
+            ctx.builder.CreateRet(ctx.builder.CreateAlignedLoad(gfrt, gf_ret, Align(julia_alignment(rettype))));
         }
         break;
     }
     case jl_returninfo_t::SRet: {
-        if (return_roots)
-            ctx.builder.CreateStore(gf_ret, gf_thunk->arg_begin() + 1);
+        if (return_roots) {
+            Value *root1 = gf_thunk->arg_begin() + 1; // root1 has type [n x {}*]*
+            assert(cast<PointerType>(root1->getType())->isOpaqueOrPointeeTypeMatches(get_returnroots_type(ctx, return_roots)));
+            root1 = ctx.builder.CreateConstInBoundsGEP2_32(get_returnroots_type(ctx, return_roots), root1, 0, 0);
+            ctx.builder.CreateStore(gf_ret, root1);
+        }
         emit_memcpy(ctx, &*gf_thunk->arg_begin(), nullptr, gf_ret, nullptr, jl_datatype_size(rettype), julia_alignment(rettype));
         ctx.builder.CreateRetVoid();
         break;
@@ -4692,7 +5404,7 @@ static void emit_cfunc_invalidate(
         Type *retty = gf_thunk->getReturnType();
         Value *gf_retval = UndefValue::get(retty);
         Value *tindex = compute_box_tindex(ctx, emit_typeof_boxed(ctx, gf_retbox), (jl_value_t*)jl_any_type, rettype);
-        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(T_int8, 0x80));
+        tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, gf_ret, 0);
         gf_retval = ctx.builder.CreateInsertValue(gf_retval, tindex, 1);
         ctx.builder.CreateRet(gf_retval);
@@ -4722,11 +5434,12 @@ static Function* gen_cfun_wrapper(
     jl_value_t *declrt, jl_method_instance_t *lam,
     jl_unionall_t *unionall_env, jl_svec_t *sparam_vals, jl_array_t **closure_types)
 {
+    ++GeneratedCFuncWrappers;
     // Generate a c-callable wrapper
     assert(into);
     size_t nargs = sig.nccallargs;
     const char *name = "cfunction";
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     jl_code_instance_t *codeinst = NULL;
     bool nest = (!ff || unionall_env);
     jl_value_t *astrt = (jl_value_t*)jl_any_type;
@@ -4740,11 +5453,11 @@ static Function* gen_cfun_wrapper(
         // TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
         codeinst = jl_compile_method_internal(lam, world);
         assert(codeinst->invoke);
-        if (codeinst->invoke == jl_fptr_args) {
+        if (codeinst->invoke == jl_fptr_args_addr) {
             callptr = codeinst->specptr.fptr;
             calltype = 1;
         }
-        else if (codeinst->invoke == jl_fptr_const_return) {
+        else if (codeinst->invoke == jl_fptr_const_return_addr) {
             // don't need the fptr
             callptr = (void*)codeinst->rettype_const;
             calltype = 2;
@@ -4764,21 +5477,72 @@ static Function* gen_cfun_wrapper(
     }
 
     std::string funcName;
-    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << globalUnique++;
+    raw_string_ostream(funcName) << "jlcapi_" << name << "_" << globalUniqueGeneratedNames++;
 
-    Module *M = into;
+    Module *M = into; // Safe because ctx lock is held by params
     AttributeList attributes = sig.attributes;
     FunctionType *functype;
     if (nest) {
         // add nest parameter (pointer to jl_value_t* data array) after sret arg
         assert(closure_types);
         std::vector<Type*> fargt_sig(sig.fargt_sig);
-        fargt_sig.insert(fargt_sig.begin() + sig.sret, T_pprjlvalue);
-        functype = FunctionType::get(sig.sret ? T_void : sig.prt, fargt_sig, /*isVa*/false);
-        attributes = attributes.addAttribute(jl_LLVMContext, 1 + sig.sret, Attribute::Nest);
+
+        fargt_sig.insert(fargt_sig.begin() + sig.sret, JuliaType::get_pprjlvalue_ty(M->getContext()));
+
+        // Shift LLVM attributes for parameters one to the right, as
+        // we are adding the extra nest parameter after sret arg.
+        std::vector<std::pair<unsigned, AttributeSet>> newAttributes;
+        newAttributes.reserve(attributes.getNumAttrSets() + 1);
+#if JL_LLVM_VERSION >= 140000
+        auto it = *attributes.indexes().begin();
+        const auto it_end = *attributes.indexes().end();
+#else
+        auto it = attributes.index_begin();
+        const auto it_end = attributes.index_end();
+#endif
+
+        // Skip past FunctionIndex
+        if (it == AttributeList::AttrIndex::FunctionIndex) {
+            ++it;
+        }
+
+        // Move past ReturnValue and parameter return value
+        for (;it < AttributeList::AttrIndex::FirstArgIndex + sig.sret; ++it) {
+            if (hasAttributesAtIndex(attributes, it)) {
+                newAttributes.emplace_back(it, attributes.getAttributes(it));
+            }
+        }
+
+        // Add the new nest attribute
+#if JL_LLVM_VERSION >= 140000
+        AttrBuilder attrBuilder(M->getContext());
+#else
+        AttrBuilder attrBuilder;
+#endif
+        attrBuilder.addAttribute(Attribute::Nest);
+        newAttributes.emplace_back(it, AttributeSet::get(M->getContext(), attrBuilder));
+
+        // Shift forward the rest of the attributes
+        if (attributes.getNumAttrSets() > 0) { // without this check the loop range below is invalid
+            for(; it != it_end; ++it) {
+                if (hasAttributesAtIndex(attributes, it)) {
+                    newAttributes.emplace_back(it + 1, attributes.getAttributes(it));
+                }
+            }
+        }
+
+        // Remember to add back FunctionIndex
+        if (hasAttributesAtIndex(attributes, AttributeList::AttrIndex::FunctionIndex)) {
+            newAttributes.emplace_back(AttributeList::AttrIndex::FunctionIndex,
+                                       getFnAttrs(attributes));
+        }
+
+        // Create the new AttributeList
+        attributes = AttributeList::get(M->getContext(), newAttributes);
+        functype = FunctionType::get(sig.sret ? getVoidTy(M->getContext()) : sig.prt, fargt_sig, /*isVa*/false);
     }
     else {
-        functype = sig.functype();
+        functype = sig.functype(M->getContext());
     }
     Function *cw = Function::Create(functype,
             GlobalVariable::ExternalLinkage,
@@ -4786,38 +5550,36 @@ static Function* gen_cfun_wrapper(
     cw->setAttributes(attributes);
     jl_init_function(cw);
 
-    jl_codectx_t ctx(jl_LLVMContext, params);
+    jl_codectx_t ctx(M->getContext(), params);
     ctx.f = cw;
     ctx.world = world;
     ctx.name = name;
     ctx.funcName = name;
 
-    BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", cw);
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw);
     ctx.builder.SetInsertPoint(b0);
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
     allocate_gc_frame(ctx, b0);
     emit_last_age_field(ctx);
 
-    Value *dummy_world = ctx.builder.CreateAlloca(T_size);
-    Value *have_tls = ctx.builder.CreateIsNotNull(ctx.ptlsStates);
+    Value *dummy_world = ctx.builder.CreateAlloca(getSizeTy(ctx.builder.getContext()));
+    Value *have_tls = ctx.builder.CreateIsNotNull(ctx.pgcstack);
     // TODO: in the future, try to initialize a full TLS context here
     // for now, just use a dummy field to avoid a branch in this function
     ctx.world_age_field = ctx.builder.CreateSelect(have_tls, ctx.world_age_field, dummy_world);
-    Value *last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, Align(sizeof(size_t))));
-    Value *valid_tls = ctx.builder.CreateIsNotNull(last_age);
-    have_tls = ctx.builder.CreateAnd(have_tls, valid_tls);
-    ctx.world_age_field = ctx.builder.CreateSelect(valid_tls, ctx.world_age_field, dummy_world);
-    Value *world_v = ctx.builder.CreateAlignedLoad(prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
-    // TODO: cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Monotonic);
+    Value *last_age = tbaa_decorate(ctx.tbaa().tbaa_gcframe, ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()), ctx.world_age_field, Align(sizeof(size_t))));
+    Value *world_v = ctx.builder.CreateAlignedLoad(getSizeTy(ctx.builder.getContext()),
+        prepare_global_in(jl_Module, jlgetworld_global), Align(sizeof(size_t)));
+    cast<LoadInst>(world_v)->setOrdering(AtomicOrdering::Acquire);
 
     Value *age_ok = NULL;
     if (calltype) {
         LoadInst *lam_max = ctx.builder.CreateAlignedLoad(
-                T_size,
+                getSizeTy(ctx.builder.getContext()),
                 ctx.builder.CreateConstInBoundsGEP1_32(
-                    T_size,
-                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), T_psize),
+                    getSizeTy(ctx.builder.getContext()),
+                    emit_bitcast(ctx, literal_pointer_val(ctx, (jl_value_t*)codeinst), getSizePtrTy(ctx.builder.getContext())),
                     offsetof(jl_code_instance_t, max_world) / sizeof(size_t)),
                 Align(sizeof(size_t)));
         // XXX: age is always OK if we don't have a TLS. This is a hack required due to `@threadcall` abuse.
@@ -4836,11 +5598,11 @@ static Function* gen_cfun_wrapper(
     jl_cgval_t *inputargs = (jl_cgval_t*)alloca(sizeof(jl_cgval_t) * (nargs + 1));
     if (ff) {
         // we need to pass the function object even if (even though) it is a singleton
-        inputargs[0] = mark_julia_const(ff);
+        inputargs[0] = mark_julia_const(ctx, ff);
     }
     else {
         assert(nest && nestPtr);
-        Value *ff = ctx.builder.CreateAlignedLoad(T_prjlvalue, nestPtr, Align(sizeof(void*)));
+        Value *ff = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, nestPtr, Align(sizeof(void*)));
         inputargs[0] = mark_julia_type(ctx, ff, true, jl_any_type);
     }
     // XXX: these values may need to be rooted until the end of the function
@@ -4876,7 +5638,7 @@ static Function* gen_cfun_wrapper(
         if (aref) {
             if (jargty == (jl_value_t*)jl_any_type) {
                 inputarg = mark_julia_type(ctx,
-                        ctx.builder.CreateAlignedLoad(T_prjlvalue, emit_bitcast(ctx, val, T_pprjlvalue), Align(sizeof(void*))),
+                        ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, emit_bitcast(ctx, val, ctx.types().T_pprjlvalue), Align(sizeof(void*))),
                         true, jl_any_type);
             }
             else if (static_at && jl_is_concrete_immutable(jargty)) { // anything that could be stored unboxed
@@ -4885,17 +5647,17 @@ static Function* gen_cfun_wrapper(
                 assert(!isboxed);
                 // a T* (of unknown origin)
                 if (type_is_ghost(T)) {
-                    inputarg = ghostValue(jargty);
+                    inputarg = ghostValue(ctx, jargty);
                 }
                 else {
                     val = emit_bitcast(ctx, val, T->getPointerTo());
-                    val = ctx.builder.CreateAlignedLoad(val, Align(1)); // make no alignment assumption about pointer from C
+                    val = ctx.builder.CreateAlignedLoad(T, val, Align(1)); // make no alignment assumption about pointer from C
                     inputarg = mark_julia_type(ctx, val, false, jargty);
                 }
             }
             else if (static_at || (!jl_is_typevar(jargty) && !jl_is_immutable_datatype(jargty))) {
                 // must be a jl_value_t* (because it's mutable or contains gc roots)
-                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, emit_bitcast(ctx, val, T_prjlvalue)), true, jargty_proper);
+                inputarg = mark_julia_type(ctx, maybe_decay_untracked(ctx, emit_bitcast(ctx, val, ctx.types().T_prjlvalue)), true, jargty_proper);
             }
             else {
                 // allocate val into a new box, if it might not be boxed
@@ -4904,34 +5666,34 @@ static Function* gen_cfun_wrapper(
                 if (!*closure_types)
                     *closure_types = jl_alloc_vec_any(0);
                 jl_array_ptr_1d_push(*closure_types, jargty);
-                Value *runtime_dt = ctx.builder.CreateAlignedLoad(T_prjlvalue,
-                        ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
+                        ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
                         Align(sizeof(void*)));
-                BasicBlock *boxedBB = BasicBlock::Create(jl_LLVMContext, "isboxed", cw);
-                BasicBlock *loadBB = BasicBlock::Create(jl_LLVMContext, "need-load", cw);
-                BasicBlock *unboxedBB = BasicBlock::Create(jl_LLVMContext, "maybe-unboxed", cw);
-                BasicBlock *isanyBB = BasicBlock::Create(jl_LLVMContext, "any", cw);
-                BasicBlock *afterBB = BasicBlock::Create(jl_LLVMContext, "after", cw);
+                BasicBlock *boxedBB = BasicBlock::Create(ctx.builder.getContext(), "isboxed", cw);
+                BasicBlock *loadBB = BasicBlock::Create(ctx.builder.getContext(), "need-load", cw);
+                BasicBlock *unboxedBB = BasicBlock::Create(ctx.builder.getContext(), "maybe-unboxed", cw);
+                BasicBlock *isanyBB = BasicBlock::Create(ctx.builder.getContext(), "any", cw);
+                BasicBlock *afterBB = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
                 Value *isrtboxed = ctx.builder.CreateIsNull(val);
                 ctx.builder.CreateCondBr(isrtboxed, boxedBB, loadBB);
                 ctx.builder.SetInsertPoint(boxedBB);
-                Value *p1 = ctx.builder.CreateBitCast(val, T_pjlvalue);
+                Value *p1 = ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue);
                 p1 = track_pjlvalue(ctx, p1);
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(loadBB);
                 Value *isrtany = ctx.builder.CreateICmpEQ(
                         literal_pointer_val(ctx, (jl_value_t*)jl_any_type),
-                        ctx.builder.CreateBitCast(val, T_pjlvalue));
+                        ctx.builder.CreateBitCast(val, ctx.types().T_pjlvalue));
                 ctx.builder.CreateCondBr(isrtany, isanyBB, unboxedBB);
                 ctx.builder.SetInsertPoint(isanyBB);
-                Value *p2 = ctx.builder.CreateAlignedLoad(T_prjlvalue, ctx.builder.CreateBitCast(val, T_pprjlvalue), Align(sizeof(void*)));
+                Value *p2 = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateBitCast(val, ctx.types().T_pprjlvalue), Align(sizeof(void*)));
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(unboxedBB);
                 Value *p3 = emit_new_bits(ctx, runtime_dt, val);
                 unboxedBB = ctx.builder.GetInsertBlock(); // could have changed
                 ctx.builder.CreateBr(afterBB);
                 ctx.builder.SetInsertPoint(afterBB);
-                PHINode *p = ctx.builder.CreatePHI(T_prjlvalue, 3);
+                PHINode *p = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 3);
                 p->addIncoming(p1, boxedBB);
                 p->addIncoming(p2, isanyBB);
                 p->addIncoming(p3, unboxedBB);
@@ -4949,8 +5711,7 @@ static Function* gen_cfun_wrapper(
                 // undo whatever we might have done to this poor argument
                 assert(jl_is_datatype(jargty));
                 if (sig.byRefList.at(i)) {
-                    assert(cast<PointerType>(val->getType())->getElementType() == sig.fargt[i]);
-                    val = ctx.builder.CreateAlignedLoad(val, Align(1)); // unknown alignment from C
+                    val = ctx.builder.CreateAlignedLoad(sig.fargt[i], val, Align(1)); // unknown alignment from C
                 }
                 else {
                     bool issigned = jl_signed_type && jl_subtype(jargty_proper, (jl_value_t*)jl_signed_type);
@@ -4972,8 +5733,8 @@ static Function* gen_cfun_wrapper(
                     if (!*closure_types)
                         *closure_types = jl_alloc_vec_any(0);
                     jl_array_ptr_1d_push(*closure_types, jargty);
-                    Value *runtime_dt = ctx.builder.CreateAlignedLoad(T_prjlvalue,
-                            ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
+                    Value *runtime_dt = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
+                            ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, nestPtr, jl_array_len(*closure_types)),
                             Align(sizeof(void*)));
                     Value *strct = box_ccall_result(ctx, val, runtime_dt, jargty);
                     inputarg = mark_julia_type(ctx, strct, true, jargty_proper);
@@ -4986,11 +5747,11 @@ static Function* gen_cfun_wrapper(
 
     // Create the call
     bool jlfunc_sret;
-    jl_cgval_t retval;
+    jl_cgval_t retval(ctx.builder.getContext());
     if (calltype == 2) {
         nargs = 0; // arguments not needed -- TODO: not really true, should emit an age_ok test and jlcall
         jlfunc_sret = false;
-        retval = mark_julia_const((jl_value_t*)callptr);
+        retval = mark_julia_const(ctx, (jl_value_t*)callptr);
     }
     else if (calltype == 0 || calltype == 1) {
         // emit a jlcall
@@ -5000,23 +5761,23 @@ static Function* gen_cfun_wrapper(
             StringRef fname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
             theFptr = cast_or_null<Function>(jl_Module->getNamedValue(fname));
             if (!theFptr) {
-                theFptr = Function::Create(jl_func_sig, GlobalVariable::ExternalLinkage,
+                theFptr = Function::Create(ctx.types().T_jlfunc, GlobalVariable::ExternalLinkage,
                                            fname, jl_Module);
                 jl_init_function(theFptr);
             }
             else {
-                assert(theFptr->getFunctionType() == jl_func_sig);
+                assert(theFptr->getFunctionType() == ctx.types().T_jlfunc);
             }
-            add_return_attr(theFptr, Attribute::NonNull);
-            theFptr->addFnAttr(Thunk);
+            addRetAttr(theFptr, Attribute::NonNull);
+            theFptr->addFnAttr(Attribute::get(ctx.builder.getContext(), "thunk"));
         }
         BasicBlock *b_generic, *b_jlcall, *b_after;
         Value *ret_jlcall;
         if (age_ok) {
             assert(theFptr);
-            b_generic = BasicBlock::Create(jl_LLVMContext, "generic", cw);
-            b_jlcall = BasicBlock::Create(jl_LLVMContext, "apply", cw);
-            b_after = BasicBlock::Create(jl_LLVMContext, "after", cw);
+            b_generic = BasicBlock::Create(ctx.builder.getContext(), "generic", cw);
+            b_jlcall = BasicBlock::Create(ctx.builder.getContext(), "apply", cw);
+            b_after = BasicBlock::Create(ctx.builder.getContext(), "after", cw);
             ctx.builder.CreateCondBr(age_ok, b_jlcall, b_generic);
             ctx.builder.SetInsertPoint(b_jlcall);
             // for jlcall, we need to pass the function object even if it is a ghost.
@@ -5030,7 +5791,7 @@ static Function* gen_cfun_wrapper(
         if (age_ok) {
             ctx.builder.CreateBr(b_after);
             ctx.builder.SetInsertPoint(b_after);
-            PHINode *retphi = ctx.builder.CreatePHI(T_prjlvalue, 2);
+            PHINode *retphi = ctx.builder.CreatePHI(ctx.types().T_prjlvalue, 2);
             retphi->addIncoming(ret_jlcall, b_jlcall);
             retphi->addIncoming(ret, b_generic);
             ret = retphi;
@@ -5038,10 +5799,11 @@ static Function* gen_cfun_wrapper(
         retval = mark_julia_type(ctx, ret, true, astrt);
     }
     else {
+        bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
         assert(calltype == 3);
         // emit a specsig call
         StringRef protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)callptr, codeinst);
-        jl_returninfo_t returninfo = get_specsig_function(ctx, M, protoname, lam->specTypes, astrt);
+        jl_returninfo_t returninfo = get_specsig_function(ctx, M, protoname, lam->specTypes, astrt, is_opaque_closure);
         FunctionType *cft = returninfo.decl->getFunctionType();
         jlfunc_sret = (returninfo.cc == jl_returninfo_t::SRet);
 
@@ -5054,22 +5816,28 @@ static Function* gen_cfun_wrapper(
                 result = emit_bitcast(ctx, sretPtr, cft->getParamType(0));
             }
             else {
-                result = emit_static_alloca(ctx, cft->getParamType(0)->getPointerElementType());
+                if (jlfunc_sret) {
+                    result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
+                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
+                } else {
+                    result = emit_static_alloca(ctx, get_unionbytes_type(ctx.builder.getContext(), returninfo.union_bytes));
+                    assert(cast<PointerType>(result->getType())->hasSameElementTypeAs(cast<PointerType>(cft->getParamType(0))));
+                }
             }
             args.push_back(result);
         }
         if (returninfo.return_roots) {
-            AllocaInst *return_roots = emit_static_alloca(ctx, T_prjlvalue);
-            return_roots->setOperand(0, ConstantInt::get(T_int32, returninfo.return_roots));
+            AllocaInst *return_roots = emit_static_alloca(ctx, get_returnroots_type(ctx, returninfo.return_roots));
             args.push_back(return_roots);
         }
         for (size_t i = 0; i < nargs + 1; i++) {
             // figure out how to repack the arguments
             jl_cgval_t &inputarg = inputargs[i];
             Value *arg;
-            jl_value_t *spect = jl_nth_slot_type(lam->specTypes, i);
+            jl_value_t *spect = (i == 0 && is_opaque_closure) ? (jl_value_t*)jl_any_type :
+                jl_nth_slot_type(lam->specTypes, i);
             bool isboxed = deserves_argbox(spect);
-            Type *T = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, spect);
+            Type *T = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, spect);
             if (is_uniquerep_Type(spect)) {
                 continue;
             }
@@ -5081,8 +5849,7 @@ static Function* gen_cfun_wrapper(
             }
             else if (T->isAggregateType()) {
                 // aggregate types are passed by pointer
-                if (!inputarg.ispointer())
-                    inputarg = value_to_pointer(ctx, inputarg);
+                inputarg = value_to_pointer(ctx, inputarg);
                 arg = maybe_bitcast(ctx, decay_derived(ctx, data_pointer(ctx, inputarg)),
                     T->getPointerTo());
             }
@@ -5108,8 +5875,9 @@ static Function* gen_cfun_wrapper(
             emit_cfunc_invalidate(gf_thunk, returninfo.cc, returninfo.return_roots, lam->specTypes, codeinst->rettype, nargs + 1, ctx.emission_context);
             theFptr = ctx.builder.CreateSelect(age_ok, theFptr, gf_thunk);
         }
+        assert(cast<PointerType>(theFptr->getType())->isOpaqueOrPointeeTypeMatches(returninfo.decl->getFunctionType()));
         CallInst *call = ctx.builder.CreateCall(
-            cast<FunctionType>(theFptr->getType()->getPointerElementType()),
+            cast<FunctionType>(returninfo.decl->getFunctionType()),
             theFptr, ArrayRef<Value*>(args));
         call->setAttributes(returninfo.decl->getAttributes());
         switch (returninfo.cc) {
@@ -5120,27 +5888,28 @@ static Function* gen_cfun_wrapper(
                 retval = mark_julia_type(ctx, call, false, astrt);
                 break;
             case jl_returninfo_t::SRet:
-                retval = mark_julia_slot(result, astrt, NULL, tbaa_stack);
+                retval = mark_julia_slot(result, astrt, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
                 break;
             case jl_returninfo_t::Union: {
                 Value *box = ctx.builder.CreateExtractValue(call, 0);
                 Value *tindex = ctx.builder.CreateExtractValue(call, 1);
                 Value *derived = ctx.builder.CreateSelect(
                     ctx.builder.CreateICmpEQ(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)),
-                            ConstantInt::get(T_int8, 0)),
-                    decay_derived(ctx, ctx.builder.CreateBitCast(result, T_pjlvalue)),
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
+                    decay_derived(ctx, ctx.builder.CreateBitCast(result, ctx.types().T_pjlvalue)),
                     decay_derived(ctx, box));
                 retval = mark_julia_slot(derived,
                                          astrt,
                                          tindex,
-                                         tbaa_stack);
-                assert(box->getType() == T_prjlvalue);
+                                         ctx.tbaa(),
+                                         ctx.tbaa().tbaa_stack);
+                assert(box->getType() == ctx.types().T_prjlvalue);
                 retval.Vboxed = box;
                 break;
             }
             case jl_returninfo_t::Ghosts:
-                retval = mark_julia_slot(NULL, astrt, call, tbaa_stack);
+                retval = mark_julia_slot(NULL, astrt, call, ctx.tbaa(), ctx.tbaa().tbaa_stack);
                 break;
         }
     }
@@ -5183,18 +5952,18 @@ static Function* gen_cfun_wrapper(
     ctx.builder.ClearInsertionPoint();
 
     if (aliasname) {
-        GlobalAlias::create(cw->getType()->getElementType(), cw->getType()->getAddressSpace(),
+        GlobalAlias::create(cw->getValueType(), cw->getType()->getAddressSpace(),
                             GlobalValue::ExternalLinkage, aliasname, cw, M);
     }
 
     if (nest) {
         funcName += "make";
         Function *cw_make = Function::Create(
-                FunctionType::get(T_pint8, { T_pint8, T_ppjlvalue }, false),
+                FunctionType::get(getInt8PtrTy(ctx.builder.getContext()), { getInt8PtrTy(ctx.builder.getContext()), ctx.types().T_ppjlvalue }, false),
                 GlobalVariable::ExternalLinkage,
                 funcName, M);
         jl_init_function(cw_make);
-        BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", cw_make);
+        BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", cw_make);
         IRBuilder<> cwbuilder(b0);
         Function::arg_iterator AI = cw_make->arg_begin();
         Argument *Tramp = &*AI; ++AI;
@@ -5203,8 +5972,8 @@ static Function* gen_cfun_wrapper(
         Function *adjust_trampoline = Intrinsic::getDeclaration(cw_make->getParent(), Intrinsic::adjust_trampoline);
         cwbuilder.CreateCall(init_trampoline, {
                 Tramp,
-                cwbuilder.CreateBitCast(cw, T_pint8),
-                cwbuilder.CreateBitCast(NVal, T_pint8)
+                cwbuilder.CreateBitCast(cw, getInt8PtrTy(ctx.builder.getContext())),
+                cwbuilder.CreateBitCast(NVal, getInt8PtrTy(ctx.builder.getContext()))
             });
         cwbuilder.CreateRet(cwbuilder.CreateCall(adjust_trampoline, { Tramp }));
         cw = cw_make;
@@ -5229,7 +5998,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     if (jl_is_abstract_ref_type(declrt)) {
         declrt = jl_tparam0(declrt);
         if (!verify_ref_type(ctx, declrt, unionall_env, 0, "cfunction")) {
-            return jl_cgval_t();
+            return jl_cgval_t(ctx.builder.getContext());
         }
         if (unionall_env)
             declrt = jl_rewrap_unionall(declrt, (jl_value_t*)unionall_env);
@@ -5238,11 +6007,8 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
 
     // some sanity checking and check whether there's a vararg
     size_t nargt = jl_svec_len(argt);
-    bool isVa = (nargt > 0 && jl_is_vararg_type(jl_svecref(argt, nargt - 1)));
-    if (isVa) {
-        emit_error(ctx, "cfunction: Vararg syntax not allowed for argument list");
-        return jl_cgval_t();
-    }
+    bool isVa = (nargt > 0 && jl_is_vararg(jl_svecref(argt, nargt - 1)));
+    assert(!isVa); (void)isVa;
 
     jl_array_t *closure_types = NULL;
     jl_value_t *sigt = NULL; // dispatch-sig = type signature with Ref{} annotations removed and applied to the env
@@ -5256,11 +6022,12 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
             sparam_vals,
             &ctx.emission_context,
             /* outputs: */
-            lrt, retboxed, static_rt);
+            lrt, ctx.builder.getContext(),
+            retboxed, static_rt);
     if (!err.empty()) {
         emit_error(ctx, "cfunction " + err);
         JL_GC_POP();
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
     if (rt != declrt && rt != (jl_value_t*)jl_any_type)
         jl_add_method_root(ctx, rt);
@@ -5270,7 +6037,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
     if (!sig.err_msg.empty()) {
         emit_error(ctx, sig.err_msg);
         JL_GC_POP();
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
 
     // compute+verify the dispatch signature, and see if it depends on the environment sparams
@@ -5285,7 +6052,7 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
             jargty = jl_tparam0(jargty);
             if (!verify_ref_type(ctx, jargty, unionall_env, i + 1, "cfunction")) {
                 JL_GC_POP();
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
         if (unionall_env && jl_has_typevar_from_unionall(jargty, unionall_env)) {
@@ -5310,10 +6077,10 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
 #if defined(_CPU_AARCH64_) || defined(_CPU_ARM_) || defined(_CPU_PPC64_)
     if (nest) {
         emit_error(ctx, "cfunction: closures are not supported on this platform");
-        return jl_cgval_t();
+        return jl_cgval_t(ctx.builder.getContext());
     }
 #endif
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
     // try to look up this function for direct invoking
@@ -5338,38 +6105,38 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
             }
             jl_add_method_root(ctx, (jl_value_t*)fill);
         }
-        Type *T_htable = ArrayType::get(T_size, sizeof(htable_t) / sizeof(void*));
+        Type *T_htable = ArrayType::get(getSizeTy(ctx.builder.getContext()), sizeof(htable_t) / sizeof(void*));
         Value *cache = new GlobalVariable(*jl_Module, T_htable, false,
                                GlobalVariable::PrivateLinkage,
                                ConstantAggregateZero::get(T_htable));
         F = ctx.builder.CreateCall(prepare_call(jlgetcfunctiontrampoline_func), {
                  fobj,
                  literal_pointer_val(ctx, output_type),
-                 ctx.builder.CreateBitCast(cache, T_pint8),
+                 ctx.builder.CreateBitCast(cache, getInt8PtrTy(ctx.builder.getContext())),
                  literal_pointer_val(ctx, (jl_value_t*)fill),
                  F,
-                 closure_types ? literal_pointer_val(ctx, (jl_value_t*)unionall_env) : V_null,
-                 closure_types ? ctx.spvals_ptr : ConstantPointerNull::get(cast<PointerType>(T_pprjlvalue))
+                 closure_types ? literal_pointer_val(ctx, (jl_value_t*)unionall_env) : Constant::getNullValue(ctx.types().T_pjlvalue),
+                 closure_types ? ctx.spvals_ptr : ConstantPointerNull::get(cast<PointerType>(ctx.types().T_pprjlvalue))
              });
         outboxed = true;
     }
     else {
-        F = ctx.builder.CreatePtrToInt(F, T_size);
+        F = ctx.builder.CreatePtrToInt(F, getSizeTy(ctx.builder.getContext()));
         outboxed = (output_type != (jl_value_t*)jl_voidpointer_type);
         if (outboxed) {
             assert(jl_datatype_size(output_type) == sizeof(void*) * 4);
             Value *strct = emit_allocobj(ctx, jl_datatype_size(output_type),
                                          literal_pointer_val(ctx, (jl_value_t*)output_type));
-            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), T_psize);
-            MDNode *tbaa = best_tbaa(output_type);
+            Value *derived_strct = emit_bitcast(ctx, decay_derived(ctx, strct), getSizePtrTy(ctx.builder.getContext()));
+            MDNode *tbaa = best_tbaa(ctx.tbaa(), output_type);
             tbaa_decorate(tbaa, ctx.builder.CreateStore(F, derived_strct));
             tbaa_decorate(tbaa, ctx.builder.CreateStore(
-                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), T_size),
-                ctx.builder.CreateConstInBoundsGEP1_32(T_size, derived_strct, 1)));
-            tbaa_decorate(tbaa, ctx.builder.CreateStore(V_size0,
-                    ctx.builder.CreateConstInBoundsGEP1_32(T_size, derived_strct, 2)));
-            tbaa_decorate(tbaa, ctx.builder.CreateStore(V_size0,
-                    ctx.builder.CreateConstInBoundsGEP1_32(T_size, derived_strct, 3)));
+                ctx.builder.CreatePtrToInt(literal_pointer_val(ctx, fexpr_rt.constant), getSizeTy(ctx.builder.getContext())),
+                ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 1)));
+            tbaa_decorate(tbaa, ctx.builder.CreateStore(Constant::getNullValue(getSizeTy(ctx.builder.getContext())),
+                    ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 2)));
+            tbaa_decorate(tbaa, ctx.builder.CreateStore(Constant::getNullValue(getSizeTy(ctx.builder.getContext())),
+                    ctx.builder.CreateConstInBoundsGEP1_32(getSizeTy(ctx.builder.getContext()), derived_strct, 3)));
             F = strct;
         }
     }
@@ -5379,8 +6146,9 @@ static jl_cgval_t emit_cfunction(jl_codectx_t &ctx, jl_value_t *output_type, con
 
 // do codegen to create a C-callable alias/wrapper, or if sysimg_handle is set,
 // restore one from a loaded system image.
-void jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params)
+const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params)
 {
+    ++GeneratedCCallables;
     jl_datatype_t *ft = (jl_datatype_t*)jl_tparam0(sigt);
     jl_value_t *ff = ft->instance;
     assert(ff);
@@ -5391,9 +6159,9 @@ void jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declr
         crt = (jl_value_t*)jl_any_type;
     }
     bool toboxed;
-    Type *lcrt = _julia_struct_to_llvm(&params, crt, NULL, &toboxed);
+    Type *lcrt = _julia_struct_to_llvm(&params, *params.tsctx.getContext(), crt, &toboxed);
     if (toboxed)
-        lcrt = T_prjlvalue;
+        lcrt = JuliaType::get_prjlvalue_ty(lcrt->getContext());
     size_t nargs = jl_nparams(sigt)-1;
     jl_svec_t *argtypes = NULL;
     JL_GC_PUSH1(&argtypes);
@@ -5406,7 +6174,7 @@ void jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declr
         function_sig_t sig("cfunction", lcrt, crt, toboxed,
                            argtypes, NULL, false, CallingConv::C, false, &params);
         if (sig.err_msg.empty()) {
-            size_t world = jl_world_counter;
+            size_t world = jl_atomic_load_acquire(&jl_world_counter);
             size_t min_valid = 0;
             size_t max_valid = ~(size_t)0;
             if (sysimg_handle) {
@@ -5418,10 +6186,11 @@ void jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declr
             }
             else {
                 jl_method_instance_t *lam = jl_get_specialization1((jl_tupletype_t*)sigt, world, &min_valid, &max_valid, 0);
-                gen_cfun_wrapper((Module*)llvmmod, params, sig, ff, name, declrt, lam, NULL, NULL, NULL);
+                //Safe b/c params holds context lock
+                gen_cfun_wrapper(reinterpret_cast<orc::ThreadSafeModule*>(llvmmod)->getModuleUnlocked(), params, sig, ff, name, declrt, lam, NULL, NULL, NULL);
             }
             JL_GC_POP();
-            return;
+            return name;
         }
         err = jl_get_exceptionf(jl_errorexception_type, "%s", sig.err_msg.c_str());
     }
@@ -5432,9 +6201,10 @@ void jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declr
 static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlretty, const jl_returninfo_t &f, int retarg, StringRef funcName,
         Module *M, jl_codegen_params_t &params)
 {
-    Function *w = Function::Create(jl_func_sig, GlobalVariable::ExternalLinkage, funcName, M);
-    add_return_attr(w, Attribute::NonNull);
-    w->addFnAttr(Thunk);
+    ++GeneratedInvokeWrappers;
+    Function *w = Function::Create(JuliaType::get_jlfunc_ty(M->getContext()), GlobalVariable::ExternalLinkage, funcName, M);
+    addRetAttr(w, Attribute::NonNull);
+    w->addFnAttr(Attribute::get(M->getContext(), "thunk"));
     jl_init_function(w);
     Function::arg_iterator AI = w->arg_begin();
     Value *funcArg = &*AI++;
@@ -5443,13 +6213,13 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     //Value *mfunc = &*AI++; (void)mfunc; // unused
     assert(AI == w->arg_end());
 
-    jl_codectx_t ctx(jl_LLVMContext, params);
+    jl_codectx_t ctx(M->getContext(), params);
     ctx.f = w;
     ctx.linfo = lam;
     ctx.rettype = jlretty;
     ctx.world = 0;
 
-    BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", w);
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", w);
     ctx.builder.SetInsertPoint(b0);
     DebugLoc noDbg;
     ctx.builder.SetCurrentDebugLocation(noDbg);
@@ -5467,12 +6237,13 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     case jl_returninfo_t::Ghosts:
         break;
     case jl_returninfo_t::SRet:
-        result = ctx.builder.CreateAlloca(ftype->getParamType(0)->getPointerElementType());
+        assert(cast<PointerType>(ftype->getParamType(0))->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(f.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
+        result = ctx.builder.CreateAlloca(getAttributeAtIndex(f.decl->getAttributes(), 1, Attribute::StructRet).getValueAsType());
         args[idx] = result;
         idx++;
         break;
     case jl_returninfo_t::Union:
-        result = ctx.builder.CreateAlloca(ArrayType::get(T_int8, f.union_bytes));
+        result = ctx.builder.CreateAlloca(ArrayType::get(getInt8Ty(ctx.builder.getContext()), f.union_bytes));
         if (f.union_align > 1)
             result->setAlignment(Align(f.union_align));
         args[idx] = result;
@@ -5480,16 +6251,17 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
         break;
     }
     if (f.return_roots) {
-        AllocaInst *return_roots = emit_static_alloca(ctx, T_prjlvalue);
-        return_roots->setOperand(0, ConstantInt::get(T_int32, f.return_roots));
+        AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, f.return_roots));
         args[idx] = return_roots;
         idx++;
     }
 
+    bool is_opaque_closure = jl_is_method(lam->def.value) && lam->def.method->is_for_opaque_closure;
     for (size_t i = 0; i < jl_nparams(lam->specTypes) && idx < nfargs; ++i) {
-        jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
+        jl_value_t *ty = ((i == 0) && is_opaque_closure) ? (jl_value_t*)jl_any_type :
+            jl_nth_slot_type(lam->specTypes, i);
         bool isboxed = deserves_argbox(ty);
-        Type *lty = isboxed ?  T_prjlvalue : julia_type_to_llvm(ctx, ty);
+        Type *lty = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, ty);
         if (type_is_ghost(lty) || is_uniquerep_Type(ty))
             continue;
         Value *theArg;
@@ -5497,16 +6269,16 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
             theArg = funcArg;
         }
         else {
-            Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, i - 1);
+            Value *argPtr = ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, i - 1);
             theArg = maybe_mark_load_dereferenceable(
-                    ctx.builder.CreateAlignedLoad(T_prjlvalue, argPtr, Align(sizeof(void*))),
+                    ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                     false,
                     ty);
         }
         if (!isboxed) {
             theArg = decay_derived(ctx, emit_bitcast(ctx, theArg, PointerType::get(lty, 0)));
             if (!lty->isAggregateType()) // keep "aggregate" type values in place as pointers
-                theArg = ctx.builder.CreateAlignedLoad(theArg, Align(julia_alignment(ty)));
+                theArg = ctx.builder.CreateAlignedLoad(lty, theArg, Align(julia_alignment(ty)));
         }
         assert(dyn_cast<UndefValue>(theArg) == NULL);
         args[idx] = theArg;
@@ -5515,14 +6287,14 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     CallInst *call = ctx.builder.CreateCall(f.decl, ArrayRef<Value*>(&args[0], nfargs));
     call->setAttributes(f.decl->getAttributes());
 
-    jl_cgval_t retval;
+    jl_cgval_t retval(ctx.builder.getContext());
     if (retarg != -1) {
         Value *theArg;
         if (retarg == 0)
             theArg = funcArg;
         else
-            theArg = ctx.builder.CreateAlignedLoad(T_prjlvalue,
-                    ctx.builder.CreateConstInBoundsGEP1_32(T_prjlvalue, argArray, retarg - 1),
+            theArg = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue,
+                    ctx.builder.CreateConstInBoundsGEP1_32(ctx.types().T_prjlvalue, argArray, retarg - 1),
                     Align(sizeof(void*)));
         retval = mark_julia_type(ctx, theArg, true, jl_any_type);
     }
@@ -5535,7 +6307,7 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
             retval = mark_julia_type(ctx, call, false, jlretty);
             break;
         case jl_returninfo_t::SRet:
-            retval = mark_julia_slot(result, jlretty, NULL, tbaa_stack);
+            retval = mark_julia_slot(result, jlretty, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
             break;
         case jl_returninfo_t::Union:
             // result is technically not right here, but `boxed` will only look at it
@@ -5543,12 +6315,13 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
             retval = mark_julia_slot(result,
                                      jlretty,
                                      ctx.builder.CreateExtractValue(call, 1),
-                                     tbaa_stack);
+                                     ctx.tbaa(),
+                                     ctx.tbaa().tbaa_stack);
             retval.Vboxed = ctx.builder.CreateExtractValue(call, 0);
-            assert(retval.Vboxed->getType() == T_prjlvalue);
+            assert(retval.Vboxed->getType() == ctx.types().T_prjlvalue);
             break;
         case jl_returninfo_t::Ghosts:
-            retval = mark_julia_slot(NULL, jlretty, call, tbaa_stack);
+            retval = mark_julia_slot(NULL, jlretty, call, ctx.tbaa(), ctx.tbaa().tbaa_stack);
             break;
         }
     }
@@ -5557,13 +6330,14 @@ static Function *gen_invoke_wrapper(jl_method_instance_t *lam, jl_value_t *jlret
     return w;
 }
 
-static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype)
+static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, StringRef name, jl_value_t *sig, jl_value_t *jlrettype, bool is_opaque_closure)
 {
     jl_returninfo_t props = {};
     SmallVector<Type*, 8> fsig;
-    Type *rt;
+    Type *rt = NULL;
+    Type *srt = NULL;
     if (jl_is_structtype(jlrettype) && jl_is_datatype_singleton((jl_datatype_t*)jlrettype)) {
-        rt = T_void;
+        rt = getVoidTy(ctx.builder.getContext());
         props.cc = jl_returninfo_t::Register;
     }
     else if (jl_is_uniontype(jlrettype)) {
@@ -5571,76 +6345,87 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
         union_alloca_type((jl_uniontype_t*)jlrettype, allunbox, props.union_bytes, props.union_align, props.union_minalign);
         if (props.union_bytes) {
             props.cc = jl_returninfo_t::Union;
-            Type *AT = ArrayType::get(T_int8, props.union_bytes);
+            Type *AT = ArrayType::get(getInt8Ty(ctx.builder.getContext()), props.union_bytes);
             fsig.push_back(AT->getPointerTo());
-            Type *pair[] = { T_prjlvalue, T_int8 };
-            rt = StructType::get(jl_LLVMContext, makeArrayRef(pair));
+            Type *pair[] = { ctx.types().T_prjlvalue, getInt8Ty(ctx.builder.getContext()) };
+            rt = StructType::get(ctx.builder.getContext(), makeArrayRef(pair));
         }
         else if (allunbox) {
             props.cc = jl_returninfo_t::Ghosts;
-            rt = T_int8;
+            rt = getInt8Ty(ctx.builder.getContext());
         }
         else {
-            rt = T_prjlvalue;
+            rt = ctx.types().T_prjlvalue;
         }
     }
     else if (!deserves_retbox(jlrettype)) {
         bool retboxed;
         rt = julia_type_to_llvm(ctx, jlrettype, &retboxed);
         assert(!retboxed);
-        if (rt != T_void && deserves_sret(jlrettype, rt)) {
+        if (rt != getVoidTy(ctx.builder.getContext()) && deserves_sret(jlrettype, rt)) {
             auto tracked = CountTrackedPointers(rt);
             assert(!tracked.derived);
             if (tracked.count && !tracked.all)
                 props.return_roots = tracked.count;
             props.cc = jl_returninfo_t::SRet;
             fsig.push_back(rt->getPointerTo());
-            rt = T_void;
+            srt = rt;
+            rt = getVoidTy(ctx.builder.getContext());
         }
         else {
             props.cc = jl_returninfo_t::Register;
         }
     }
     else {
-        rt = T_prjlvalue;
+        rt = ctx.types().T_prjlvalue;
     }
 
     AttributeList attributes; // function declaration attributes
     if (props.cc == jl_returninfo_t::SRet) {
+        assert(srt);
         unsigned argno = 1;
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::StructRet);
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoAlias);
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoCapture);
+        Attribute sret = Attribute::getWithStructRetType(ctx.builder.getContext(), srt);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, sret);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, Attribute::NoAlias);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, Attribute::NoCapture);
     }
     if (props.cc == jl_returninfo_t::Union) {
         unsigned argno = 1;
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoAlias);
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoCapture);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, Attribute::NoAlias);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, Attribute::NoCapture);
     }
 
     if (props.return_roots) {
-        fsig.push_back(T_pprjlvalue);
+        fsig.push_back(get_returnroots_type(ctx, props.return_roots)->getPointerTo(0));
         unsigned argno = fsig.size();
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoAlias);
-        attributes = attributes.addAttribute(jl_LLVMContext, argno, Attribute::NoCapture);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, Attribute::NoAlias);
+        attributes = addAttributeAtIndex(attributes, ctx.builder.getContext(), argno, Attribute::NoCapture);
     }
 
     for (size_t i = 0; i < jl_nparams(sig); i++) {
         jl_value_t *jt = jl_tparam(sig, i);
+        if (i == 0 && is_opaque_closure) {
+            jt = (jl_value_t*)jl_any_type;
+        }
         if (is_uniquerep_Type(jt))
             continue;
         bool isboxed = deserves_argbox(jt);
-        Type *ty = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, jt);
+        Type *ty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jt);
         if (type_is_ghost(ty))
             continue;
         unsigned argno = fsig.size();
         if (ty->isAggregateType()) { // aggregate types are passed by pointer
-            attributes = attributes.addParamAttribute(jl_LLVMContext, argno, Attribute::NoCapture);
-            attributes = attributes.addParamAttribute(jl_LLVMContext, argno, Attribute::ReadOnly);
+            attributes = attributes.addParamAttribute(ctx.builder.getContext(), argno, Attribute::NoCapture);
+            attributes = attributes.addParamAttribute(ctx.builder.getContext(), argno, Attribute::ReadOnly);
             ty = PointerType::get(ty, AddressSpace::Derived);
         }
         else if (isboxed && jl_is_immutable_datatype(jt)) {
-            attributes = attributes.addParamAttribute(jl_LLVMContext, argno, Attribute::ReadOnly);
+            attributes = attributes.addParamAttribute(ctx.builder.getContext(), argno, Attribute::ReadOnly);
+        }
+        else if (jl_is_primitivetype(jt) && ty->isIntegerTy()) {
+            bool issigned = jl_signed_type && jl_subtype(jt, (jl_value_t*)jl_signed_type);
+            Attribute::AttrKind attr = issigned ? Attribute::SExt : Attribute::ZExt;
+            attributes = attributes.addParamAttribute(ctx.builder.getContext(), argno, attr);
         }
         fsig.push_back(ty);
     }
@@ -5655,31 +6440,31 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, String
     else {
         assert(f->getFunctionType() == ftype);
     }
-    if (rt == T_prjlvalue)
-        add_return_attr(f, Attribute::NonNull);
+    if (rt == ctx.types().T_prjlvalue)
+        addRetAttr(f, Attribute::NonNull);
     props.decl = f;
     return props;
 }
 
-static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, unsigned count)
+static void emit_sret_roots(jl_codectx_t &ctx, bool isptr, Value *Src, Type *T, Value *Shadow, Type *ShadowT, unsigned count)
 {
     if (isptr)
         Src = maybe_decay_tracked(ctx, Src);
-    if (isptr && Src->getType()->getPointerElementType() != T)
+    if (isptr && !cast<PointerType>(Src->getType())->isOpaqueOrPointeeTypeMatches(T))
         Src = ctx.builder.CreateBitCast(Src, T->getPointerTo(Src->getType()->getPointerAddressSpace()));
-    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ctx.builder);
+    unsigned emitted = TrackWithShadow(Src, T, isptr, Shadow, ShadowT, ctx.builder); //This comes from Late-GC-Lowering??
     assert(emitted == count); (void)emitted; (void)count;
 }
 
 static DISubroutineType *
-get_specsig_di(jl_codectx_t &ctx, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder)
+get_specsig_di(jl_codectx_t &ctx, jl_debugcache_t &debuginfo, jl_value_t *rt, jl_value_t *sig, DIBuilder &dbuilder)
 {
     size_t nargs = jl_nparams(sig); // TODO: if this is a Varargs function, our debug info for the `...` var may be misleading
     std::vector<Metadata*> ditypes(nargs + 1);
-    ditypes[0] = julia_type_to_di(ctx, rt, &dbuilder, false);
+    ditypes[0] = julia_type_to_di(ctx, debuginfo, rt, &dbuilder, false);
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *jt = jl_tparam(sig, i);
-        ditypes[i + 1] = julia_type_to_di(ctx, jt, &dbuilder, false);
+        ditypes[i + 1] = julia_type_to_di(ctx, debuginfo, jt, &dbuilder, false);
     }
     return dbuilder.createSubroutineType(dbuilder.getOrCreateTypeArray(ditypes));
 }
@@ -5699,29 +6484,52 @@ static jl_datatype_t *compute_va_type(jl_method_instance_t *lam, size_t nreq)
 }
 
 // Compile to LLVM IR, using a specialized signature if applicable.
-static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
+static jl_llvm_functions_t
     emit_function(
+        orc::ThreadSafeModule &TSM,
         jl_method_instance_t *lam,
         jl_code_info_t *src,
         jl_value_t *jlrettype,
         jl_codegen_params_t &params)
 {
+    ++EmittedFunctions;
     // step 1. unpack AST and allocate codegen context for this function
     jl_llvm_functions_t declarations;
-    jl_codectx_t ctx(jl_LLVMContext, params);
-    JL_GC_PUSH2(&ctx.code, &ctx.roots);
+    jl_codectx_t ctx(*params.tsctx.getContext(), params);
+    jl_datatype_t *vatyp = NULL;
+    JL_GC_PUSH3(&ctx.code, &ctx.roots, &vatyp);
     ctx.code = src->code;
 
     std::map<int, BasicBlock*> labels;
+    bool toplevel = false;
     ctx.module = jl_is_method(lam->def.method) ? lam->def.method->module : lam->def.module;
     ctx.linfo = lam;
+    ctx.name = TSM.getModuleUnlocked()->getModuleIdentifier().data();
+    size_t nreq = 0;
+    int va = 0;
+    if (jl_is_method(lam->def.method)) {
+        ctx.nargs = nreq = lam->def.method->nargs;
+        ctx.is_opaque_closure = lam->def.method->is_for_opaque_closure;
+        if ((nreq > 0 && jl_is_method(lam->def.value) && lam->def.method->isva)) {
+            assert(nreq > 0);
+            nreq--;
+            va = 1;
+        }
+    }
+    else {
+        ctx.nargs = 0;
+    }
+    ctx.nReqArgs = nreq;
+    if (va) {
+        jl_sym_t *vn = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, ctx.nargs - 1);
+        if (vn != jl_unused_sym)
+            ctx.vaSlot = ctx.nargs - 1;
+    }
+    toplevel = !jl_is_method(lam->def.method);
     ctx.rettype = jlrettype;
     ctx.source = src;
-    ctx.name = name_from_method_instance(lam);
     ctx.funcName = ctx.name;
     ctx.spvals_ptr = NULL;
-    ctx.nargs = jl_is_method(lam->def.method) ? lam->def.method->nargs : 0;
-    bool toplevel = !jl_is_method(lam->def.method);
     jl_array_t *stmts = ctx.code;
     size_t stmtslen = jl_array_dim0(stmts);
 
@@ -5735,14 +6543,14 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
 
     StringRef dbgFuncName = ctx.name;
     int toplineno = -1;
-    if (jl_is_method(lam->def.method)) {
+    if (lam && jl_is_method(lam->def.method)) {
         toplineno = lam->def.method->line;
         ctx.file = jl_symbol_name(lam->def.method->file);
     }
     else if (jl_array_len(src->linetable) > 0) {
         jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, 0);
         ctx.file = jl_symbol_name((jl_sym_t*)jl_fieldref_noalloc(locinfo, 2));
-        toplineno = jl_unbox_long(jl_fieldref(locinfo, 3));
+        toplineno = jl_unbox_int32(jl_fieldref(locinfo, 3));
     }
     if (ctx.file.empty())
         ctx.file = "<missing>";
@@ -5758,24 +6566,13 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     // step 2. process var-info lists to see what vars need boxing
     int n_ssavalues = jl_is_long(src->ssavaluetypes) ? jl_unbox_long(src->ssavaluetypes) : jl_array_len(src->ssavaluetypes);
     size_t vinfoslen = jl_array_dim0(src->slotflags);
-    ctx.slots.resize(vinfoslen);
-    size_t nreq = ctx.nargs;
-    int va = 0;
-
+    ctx.slots.resize(vinfoslen, jl_varinfo_t(ctx.builder.getContext()));
     assert(lam->specTypes); // the specTypes field should always be assigned
 
-    if (nreq > 0 && lam->def.method->isva) {
-        nreq--;
-        va = 1;
-        jl_sym_t *vn = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, ctx.nargs - 1);
-        if (vn != unused_sym)
-            ctx.vaSlot = ctx.nargs - 1;
-    }
-    ctx.nReqArgs = nreq;
 
     // create SAvalue locations for SSAValue objects
     ctx.ssavalue_assigned.assign(n_ssavalues, false);
-    ctx.SAvalues.assign(n_ssavalues, jl_cgval_t());
+    ctx.SAvalues.assign(n_ssavalues, jl_cgval_t(ctx.builder.getContext()));
 
     bool specsig, needsparams;
     std::tie(specsig, needsparams) = uses_specsig(lam, jlrettype, params.params->prefer_specsig);
@@ -5788,15 +6585,24 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         jl_varinfo_t &varinfo = ctx.slots[i];
         varinfo.isArgument = true;
         jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
-        if (argname == unused_sym)
+        if (argname == jl_unused_sym)
             continue;
         jl_value_t *ty = jl_nth_slot_type(lam->specTypes, i);
+        // OpaqueClosure implicitly loads the env
+        if (i == 0 && ctx.is_opaque_closure) {
+            if (jl_is_array(src->slottypes)) {
+                ty = jl_arrayref((jl_array_t*)src->slottypes, i);
+            }
+            else {
+                ty = (jl_value_t*)jl_any_type;
+            }
+        }
         varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, ty);
     }
     if (va && ctx.vaSlot != -1) {
         jl_varinfo_t &varinfo = ctx.slots[ctx.vaSlot];
         varinfo.isArgument = true;
-        jl_datatype_t *vatyp = specsig ? compute_va_type(lam, nreq) : (jl_tuple_type);
+        vatyp = specsig ? compute_va_type(lam, nreq) : (jl_tuple_type);
         varinfo.value = mark_julia_type(ctx, (Value*)NULL, false, vatyp);
     }
 
@@ -5835,17 +6641,19 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     if (unadorned_name[0] == '@')
         unadorned_name++;
 #endif
-    funcName << unadorned_name << "_" << globalUnique++;
+    funcName << unadorned_name << "_" << globalUniqueGeneratedNames++;
     declarations.specFunctionObject = funcName.str();
 
     // allocate Function declarations and wrapper objects
-    Module *M = new Module(ctx.name, jl_LLVMContext);
-    jl_setup_module(M, ctx.params);
+    //Safe because params holds ctx lock
+    Module *M = TSM.getModuleUnlocked();
+    jl_debugcache_t debuginfo;
+    debuginfo.initialize(M);
     jl_returninfo_t returninfo = {};
     Function *f = NULL;
     bool has_sret = false;
     if (specsig) { // assumes !va and !needsparams
-        returninfo = get_specsig_function(ctx, M, declarations.specFunctionObject, lam->specTypes, jlrettype);
+        returninfo = get_specsig_function(ctx, M, declarations.specFunctionObject, lam->specTypes, jlrettype, ctx.is_opaque_closure);
         f = returninfo.decl;
         has_sret = (returninfo.cc == jl_returninfo_t::SRet || returninfo.cc == jl_returninfo_t::Union);
         jl_init_function(f);
@@ -5875,17 +6683,17 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }();
 
         std::string wrapName;
-        raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUnique++;
+        raw_string_ostream(wrapName) << "jfptr_" << unadorned_name << "_" << globalUniqueGeneratedNames++;
         declarations.functionObject = wrapName;
         (void)gen_invoke_wrapper(lam, jlrettype, returninfo, retarg, declarations.functionObject, M, ctx.emission_context);
     }
     else {
-        f = Function::Create(needsparams ? jl_func_sig_sparams : jl_func_sig,
+        f = Function::Create(needsparams ? ctx.types().T_jlfuncparams : ctx.types().T_jlfunc,
                              GlobalVariable::ExternalLinkage,
                              declarations.specFunctionObject, M);
         jl_init_function(f);
-        add_return_attr(f, Attribute::NonNull);
-        f->addFnAttr(Thunk);
+        addRetAttr(f, Attribute::NonNull);
+        f->addFnAttr(Attribute::get(ctx.builder.getContext(), "thunk"));
         // TODO: (if needsparams) add attributes: dereferenceable<sizeof(void*) * length(sp)>, readonly, nocapture
         // TODO: add attributes: dereferenceable<sizeof(ft)>, readonly, nocapture - e.g. maybe_mark_argument_dereferenceable(Arg, argType);
         // TODO: add attributes: dereferenceable<sizeof(void*) * nreq>, readonly, nocapture
@@ -5897,25 +6705,25 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         f->setDoesNotReturn();
 
 #ifdef USE_POLLY
-    if (!jl_has_meta(stmts, polly_sym) || jl_options.polly == JL_OPTIONS_POLLY_OFF) {
+    if (!jl_has_meta(stmts, jl_polly_sym) || jl_options.polly == JL_OPTIONS_POLLY_OFF) {
         f->addFnAttr(polly::PollySkipFnAttr);
     }
 #endif
 
-    if (jl_has_meta(stmts, noinline_sym)) {
+    if (jl_has_meta(stmts, jl_noinline_sym)) {
         f->addFnAttr(Attribute::NoInline);
     }
 
     if (returninfo.cc == jl_returninfo_t::Union) {
-        f->addAttribute(1, Attribute::getWithDereferenceableBytes(jl_LLVMContext, returninfo.union_bytes));
-        f->addAttribute(1, Attribute::getWithAlignment(jl_LLVMContext, Align(returninfo.union_align)));
+        addAttributeAtIndex(f, 1, Attribute::getWithDereferenceableBytes(ctx.builder.getContext(), returninfo.union_bytes));
+        addAttributeAtIndex(f, 1, Attribute::getWithAlignment(ctx.builder.getContext(), Align(returninfo.union_align)));
     }
 
 #ifdef JL_DEBUG_BUILD
     f->addFnAttr(Attribute::StackProtectStrong);
 #endif
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     // TODO: enable this only when a argument like `-race` is passed to Julia
     //       add a macro for no_sanitize_thread
     f->addFnAttr(llvm::Attribute::SanitizeThread);
@@ -5963,13 +6771,13 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
 
         DISubroutineType *subrty;
         if (jl_options.debug_level <= 1) {
-            subrty = jl_di_func_null_sig;
+            subrty = debuginfo.jl_di_func_null_sig;
         }
         else if (!specsig) {
-            subrty = jl_di_func_sig;
+            subrty = debuginfo.jl_di_func_sig;
         }
         else {
-            subrty = get_specsig_di(ctx, jlrettype, lam->specTypes, dbuilder);
+            subrty = get_specsig_di(ctx, debuginfo, jlrettype, lam->specTypes, dbuilder);
         }
         SP = dbuilder.createFunction(CU
                                      ,dbgFuncName      // Name
@@ -5984,14 +6792,14 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                                      ,nullptr          // Template Declaration
                                      ,nullptr          // ThrownTypes
                                      );
-        topdebugloc = DebugLoc::get(toplineno, 0, SP, NULL);
+        topdebugloc = DILocation::get(ctx.builder.getContext(), toplineno, 0, SP, NULL);
         f->setSubprogram(SP);
         if (jl_options.debug_level >= 2) {
             const bool AlwaysPreserve = true;
             // Go over all arguments and local variables and initialize their debug information
             for (i = 0; i < nreq; i++) {
                 jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
-                if (argname == unused_sym)
+                if (argname == jl_unused_sym)
                     continue;
                 jl_varinfo_t &varinfo = ctx.slots[i];
                 varinfo.dinfo = dbuilder.createParameterVariable(
@@ -6001,7 +6809,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line
                     // Variable type
-                    julia_type_to_di(ctx, varinfo.value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
@@ -6012,14 +6820,14 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     has_sret + nreq + 1,                // Argument number (1-based)
                     topfile,                            // File
                     toplineno == -1 ? 0 : toplineno,    // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
+                    julia_type_to_di(ctx, debuginfo, ctx.slots[ctx.vaSlot].value.typ, &dbuilder, false),
                     AlwaysPreserve,                     // May be deleted if optimized out
                     DINode::FlagZero);                  // Flags (TODO: Do we need any)
             }
             for (i = 0; i < vinfoslen; i++) {
                 jl_sym_t *s = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
                 jl_varinfo_t &varinfo = ctx.slots[i];
-                if (varinfo.isArgument || s == empty_sym || s == unused_sym)
+                if (varinfo.isArgument || s == jl_empty_sym || s == jl_unused_sym)
                     continue;
                 // LLVM 4.0: Assume the variable has default alignment
                 varinfo.dinfo = dbuilder.createAutoVariable(
@@ -6027,7 +6835,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     jl_symbol_name(s),       // Variable name
                     topfile,                 // File
                     toplineno == -1 ? 0 : toplineno, // Line (for now, use lineno of the function)
-                    julia_type_to_di(ctx, varinfo.value.typ, &dbuilder, false), // Variable type
+                    julia_type_to_di(ctx, debuginfo, varinfo.value.typ, &dbuilder, false), // Variable type
                     AlwaysPreserve,          // May be deleted if optimized out
                     DINode::FlagZero         // Flags (TODO: Do we need any)
                     );
@@ -6036,7 +6844,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     }
 
     // step 5. create first basic block
-    BasicBlock *b0 = BasicBlock::Create(jl_LLVMContext, "top", f);
+    BasicBlock *b0 = BasicBlock::Create(ctx.builder.getContext(), "top", f);
     ctx.builder.SetInsertPoint(b0);
     ctx.builder.SetCurrentDebugLocation(noDbg);
 
@@ -6057,28 +6865,16 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    /*
-    // step 6. (optional) check for stack overflow (the slower way)
-    Value *cur_sp =
-        ctx.builder.CreateCall(Intrinsic::getDeclaration(M,
-                                                     Intrinsic::frameaddress),
-                           ConstantInt::get(T_int32, 0));
-    Value *sp_ok =
-        ctx.builder.CreateICmpUGT(cur_sp,
-                              ConstantInt::get(T_size,
-                                               (uptrint_t)jl_stack_lo));
-    error_unless(ctx, sp_ok, "stack overflow");
-    */
-
-    // step 7. set up GC frame
+    // step 6. set up GC frame
     allocate_gc_frame(ctx, b0);
     Value *last_age = NULL;
-    if (toplevel) {
-        emit_last_age_field(ctx);
-        last_age = tbaa_decorate(tbaa_gcframe, ctx.builder.CreateAlignedLoad(ctx.world_age_field, Align(sizeof(size_t))));
+    emit_last_age_field(ctx);
+    if (toplevel || ctx.is_opaque_closure) {
+        last_age = tbaa_decorate(ctx.tbaa().tbaa_gcframe, ctx.builder.CreateAlignedLoad(
+            getSizeTy(ctx.builder.getContext()), ctx.world_age_field, Align(sizeof(size_t))));
     }
 
-    // step 8. allocate local variables slots
+    // step 7. allocate local variables slots
     // must be in the first basic block for the llvm mem2reg pass to work
     auto allocate_local = [&](jl_varinfo_t &varinfo, jl_sym_t *s) {
         jl_value_t *jt = varinfo.value.typ;
@@ -6099,12 +6895,12 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             Value *lv = try_emit_union_alloca(ctx, (jl_uniontype_t*)jt, allunbox, align, nbytes);
             if (lv) {
                 lv->setName(jl_symbol_name(s));
-                varinfo.value = mark_julia_slot(lv, jt, NULL, tbaa_stack);
-                varinfo.pTIndex = emit_static_alloca(ctx, T_int8);
+                varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
+                varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
             }
             else if (allunbox) {
                 // all ghost values just need a selector allocated
-                AllocaInst *lv = emit_static_alloca(ctx, T_int8);
+                AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()));
                 lv->setName(jl_symbol_name(s));
                 varinfo.pTIndex = lv;
                 varinfo.value.tbaa = NULL;
@@ -6115,17 +6911,20 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             if (allunbox)
                 return;
         }
-        else if (deserves_stack(jt, true)) {
+        else if (deserves_stack(jt)) {
             bool isboxed;
             Type *vtype = julia_type_to_llvm(ctx, jt, &isboxed);
             assert(!isboxed);
             assert(!type_is_ghost(vtype) && "constants should already be handled");
-            // CreateAlloca is OK during prologue setup
-            Value *lv = ctx.builder.CreateAlloca(vtype, NULL, jl_symbol_name(s));
-            varinfo.value = mark_julia_slot(lv, jt, NULL, tbaa_stack);
+            Value *lv = new AllocaInst(vtype, 0, jl_symbol_name(s), /*InsertBefore*/ctx.pgcstack);
+            if (CountTrackedPointers(vtype).count) {
+                StoreInst *SI = new StoreInst(Constant::getNullValue(vtype), lv, false, Align(sizeof(void*)));
+                SI->insertAfter(ctx.pgcstack);
+            }
+            varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa(), ctx.tbaa().tbaa_stack);
             alloc_def_flag(ctx, varinfo);
             if (ctx.debug_enabled && varinfo.dinfo) {
-                assert((Metadata*)varinfo.dinfo->getType() != jl_pvalue_dillvmt);
+                assert((Metadata*)varinfo.dinfo->getType() != debuginfo.jl_pvalue_dillvmt);
                 dbuilder.insertDeclare(lv, varinfo.dinfo, dbuilder.createExpression(),
                                        topdebugloc,
                                        ctx.builder.GetInsertBlock());
@@ -6136,14 +6935,14 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             specsig || // for arguments, give them stack slots if they aren't in `argArray` (otherwise, will use that pointer)
             (va && (int)i == ctx.vaSlot) || // or it's the va arg tuple
             i == 0) { // or it is the first argument (which isn't in `argArray`)
-            AllocaInst *av = new AllocaInst(T_prjlvalue, 0,
-                jl_symbol_name(s), /*InsertBefore*/ctx.ptlsStates);
-            StoreInst *SI = new StoreInst(V_rnull, av, false, Align(sizeof(void*)));
-            SI->insertAfter(ctx.ptlsStates);
+            AllocaInst *av = new AllocaInst(ctx.types().T_prjlvalue, 0,
+                jl_symbol_name(s), /*InsertBefore*/ctx.pgcstack);
+            StoreInst *SI = new StoreInst(Constant::getNullValue(ctx.types().T_prjlvalue), av, false, Align(sizeof(void*)));
+            SI->insertAfter(ctx.pgcstack);
             varinfo.boxroot = av;
             if (ctx.debug_enabled && varinfo.dinfo) {
                 DIExpression *expr;
-                if ((Metadata*)varinfo.dinfo->getType() == jl_pvalue_dillvmt) {
+                if ((Metadata*)varinfo.dinfo->getType() == debuginfo.jl_pvalue_dillvmt) {
                     expr = dbuilder.createExpression();
                 }
                 else {
@@ -6161,7 +6960,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     // get pointers for locals stored in the gc frame array (argTemp)
     for (i = 0; i < vinfoslen; i++) {
         jl_sym_t *s = slot_symbol(ctx, i);
-        if (s == unused_sym)
+        if (s == jl_unused_sym)
             continue;
         jl_varinfo_t &varinfo = ctx.slots[i];
         if (!varinfo.used) {
@@ -6185,8 +6984,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     assert(jl_is_ssavalue(val));
                     upsilon_to_phic[((jl_ssavalue_t*)val)->id] = i;
                 }
-                ctx.phic_slots[i] = jl_varinfo_t{};
-                jl_varinfo_t &vi = ctx.phic_slots[i];
+                jl_varinfo_t &vi = (ctx.phic_slots.emplace(i, jl_varinfo_t(ctx.builder.getContext())).first->second =
+                                    jl_varinfo_t(ctx.builder.getContext()));
                 jl_value_t *typ = jl_array_ptr_ref(src->ssavaluetypes, i);
                 vi.used = true;
                 vi.isVolatile = true;
@@ -6196,29 +6995,29 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    // step 9. move args into local variables
+    // step 8. move args into local variables
     Function::arg_iterator AI = f->arg_begin();
 
     auto get_specsig_arg = [&](jl_value_t *argType, Type *llvmArgType, bool isboxed) {
-        jl_cgval_t theArg;
+        jl_cgval_t theArg(ctx.builder.getContext());
         if (type_is_ghost(llvmArgType)) { // this argument is not actually passed
-            theArg = ghostValue(argType);
+            theArg = ghostValue(ctx, argType);
         }
         else if (is_uniquerep_Type(argType)) {
-            theArg = mark_julia_const(jl_tparam0(argType));
+            theArg = mark_julia_const(ctx, jl_tparam0(argType));
         }
         else if (llvmArgType->isAggregateType()) {
             Argument *Arg = &*AI; ++AI;
             maybe_mark_argument_dereferenceable(Arg, argType);
-            theArg = mark_julia_slot(Arg, argType, NULL, tbaa_const); // this argument is by-pointer
+            theArg = mark_julia_slot(Arg, argType, NULL, ctx.tbaa(), ctx.tbaa().tbaa_const); // this argument is by-pointer
         }
         else {
             Argument *Arg = &*AI; ++AI;
             if (isboxed) // e.g. is-pointer
                 maybe_mark_argument_dereferenceable(Arg, argType);
             theArg = mark_julia_type(ctx, Arg, isboxed, argType);
-            if (theArg.tbaa == tbaa_immut)
-                theArg.tbaa = tbaa_const;
+            if (theArg.tbaa == ctx.tbaa().tbaa_immut)
+                theArg.tbaa = ctx.tbaa().tbaa_const;
         }
         return theArg;
     };
@@ -6229,17 +7028,18 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         AI++; // skip return_roots slot
     for (i = 0; i < nreq; i++) {
         jl_sym_t *s = (jl_sym_t*)jl_array_ptr_ref(src->slotnames, i);
-        jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
+        jl_value_t *argType = (i == 0 && ctx.is_opaque_closure) ? (jl_value_t*)jl_any_type :
+            jl_nth_slot_type(lam->specTypes, i);
         bool isboxed = deserves_argbox(argType);
-        Type *llvmArgType = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, argType);
-        if (s == unused_sym) {
+        Type *llvmArgType = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
+        if (s == jl_unused_sym) {
             if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
                 ++AI;
             continue;
         }
         jl_varinfo_t &vi = ctx.slots[i];
-        jl_cgval_t theArg;
-        if (s == unused_sym || vi.value.constant) {
+        jl_cgval_t theArg(ctx.builder.getContext());
+        if (s == jl_unused_sym || vi.value.constant) {
             assert(vi.boxroot == NULL);
             if (specsig && !type_is_ghost(llvmArgType) && !is_uniquerep_Type(argType))
                 ++AI;
@@ -6251,12 +7051,13 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             else {
                 if (i == 0) {
                     // first (function) arg is separate in jlcall
-                    theArg = mark_julia_type(ctx, fArg, true, vi.value.typ);
+                    theArg = mark_julia_type(ctx, fArg, true, ctx.is_opaque_closure ?
+                        argType : vi.value.typ);
                 }
                 else {
-                    Value *argPtr = ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray, ConstantInt::get(T_size, i-1));
+                    Value *argPtr = ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray, ConstantInt::get(getSizeTy(ctx.builder.getContext()), i-1));
                     Value *load = maybe_mark_load_dereferenceable(
-                            ctx.builder.CreateAlignedLoad(T_prjlvalue, argPtr, Align(sizeof(void*))),
+                            ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, argPtr, Align(sizeof(void*))),
                             false, vi.value.typ);
                     theArg = mark_julia_type(ctx, load, true, vi.value.typ);
                     if (ctx.debug_enabled && vi.dinfo && !vi.boxroot && !vi.value.V) {
@@ -6264,7 +7065,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                         addr.push_back(llvm::dwarf::DW_OP_deref);
                         addr.push_back(llvm::dwarf::DW_OP_plus_uconst);
                         addr.push_back((i - 1) * sizeof(void*));
-                        if ((Metadata*)vi.dinfo->getType() != jl_pvalue_dillvmt)
+                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
                             addr.push_back(llvm::dwarf::DW_OP_deref);
                         dbuilder.insertDeclare(pargArray, vi.dinfo, dbuilder.createExpression(addr),
                                         topdebugloc,
@@ -6273,6 +7074,29 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 }
             }
 
+            // If this is an opaque closure, implicitly load the env and switch
+            // the world age.
+            if (i == 0 && ctx.is_opaque_closure) {
+                // Load closure world
+                Value *argaddr = emit_bitcast(ctx, maybe_decay_tracked(ctx, data_pointer(ctx, theArg)), getInt8PtrTy(ctx.builder.getContext()));
+                Value *worldaddr = ctx.builder.CreateInBoundsGEP(
+                        getInt8Ty(ctx.builder.getContext()), argaddr,
+                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_opaque_closure_t, world)));
+
+                jl_cgval_t closure_world = typed_load(ctx, worldaddr, NULL, (jl_value_t*)jl_long_type,
+                    theArg.tbaa, nullptr, false, AtomicOrdering::NotAtomic, false, sizeof(size_t));
+                emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), closure_world, (jl_value_t*)jl_long_type, ctx.world_age_field, ctx.tbaa().tbaa_gcframe);
+
+                // Load closure env
+                Value *envaddr = ctx.builder.CreateInBoundsGEP(
+                        getInt8Ty(ctx.builder.getContext()), argaddr,
+                        ConstantInt::get(getSizeTy(ctx.builder.getContext()), offsetof(jl_opaque_closure_t, captures)));
+
+                jl_cgval_t closure_env = typed_load(ctx, envaddr, NULL, (jl_value_t*)jl_any_type,
+                    theArg.tbaa, nullptr, true, AtomicOrdering::NotAtomic, false, sizeof(void*));
+                theArg = convert_julia_type(ctx, closure_env, vi.value.typ);
+            }
+
             if (vi.boxroot == NULL) {
                 assert(vi.value.V == NULL && "unexpected variable slot created for argument");
                 // keep track of original (possibly boxed) value to avoid re-boxing or moving
@@ -6282,7 +7106,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     Value *parg;
                     if (theArg.ispointer()) {
                         parg = theArg.V;
-                        if ((Metadata*)vi.dinfo->getType() != jl_pvalue_dillvmt)
+                        if ((Metadata*)vi.dinfo->getType() != debuginfo.jl_pvalue_dillvmt)
                             addr.push_back(llvm::dwarf::DW_OP_deref);
                     }
                     else {
@@ -6301,7 +7125,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         }
     }
 
-    // step 10. allocate rest argument
+    // step 9. allocate rest argument
     CallInst *restTuple = NULL;
     if (va && ctx.vaSlot != -1) {
         jl_varinfo_t &vi = ctx.slots[ctx.vaSlot];
@@ -6314,7 +7138,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             for (size_t i = nreq; i < jl_nparams(lam->specTypes); ++i) {
                 jl_value_t *argType = jl_nth_slot_type(lam->specTypes, i);
                 bool isboxed = deserves_argbox(argType);
-                Type *llvmArgType = isboxed ?  T_prjlvalue : julia_type_to_llvm(ctx, argType);
+                Type *llvmArgType = isboxed ?  ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, argType);
                 vargs[i - nreq] = get_specsig_arg(argType, llvmArgType, isboxed);
             }
             if (jl_is_concrete_type(vi.value.typ)) {
@@ -6322,7 +7146,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 emit_varinfo_assign(ctx, vi, tuple);
             }
             else {
-                restTuple = emit_jlcall(ctx, jltuple_func, V_rnull,
+                restTuple = emit_jlcall(ctx, jltuple_func, Constant::getNullValue(ctx.types().T_prjlvalue),
                     vargs, ctx.nvargs, JLCALL_F_CC);
                 jl_cgval_t tuple = mark_julia_type(ctx, restTuple, true, vi.value.typ);
                 emit_varinfo_assign(ctx, vi, tuple);
@@ -6333,32 +7157,37 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             Function *F = prepare_call(jltuple_func);
             restTuple =
                 ctx.builder.CreateCall(F,
-                        { V_rnull,
-                          ctx.builder.CreateInBoundsGEP(T_prjlvalue, argArray,
-                                  ConstantInt::get(T_size, nreq - 1)),
+                        { Constant::getNullValue(ctx.types().T_prjlvalue),
+                          ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, argArray,
+                                  ConstantInt::get(getSizeTy(ctx.builder.getContext()), nreq - 1)),
                           ctx.builder.CreateSub(argCount,
-                                  ConstantInt::get(T_int32, nreq - 1)) });
+                                  ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nreq - 1)) });
             restTuple->setAttributes(F->getAttributes());
             ctx.builder.CreateStore(restTuple, vi.boxroot);
         }
     }
 
-    // step 11. Compute properties for each statements
+    // step 10. Compute properties for each statements
     //     This needs to be computed by iterating in the IR order
     //     instead of control flow order.
     auto in_user_mod = [] (jl_module_t *mod) {
         return (!jl_is_submodule(mod, jl_base_module) &&
                 !jl_is_submodule(mod, jl_core_module));
     };
+    auto in_tracked_path = [] (StringRef file) {
+        return jl_options.tracked_path != NULL && file.startswith(jl_options.tracked_path);
+    };
     bool mod_is_user_mod = in_user_mod(ctx.module);
+    bool mod_is_tracked = in_tracked_path(ctx.file);
     struct DebugLineTable {
         DebugLoc loc;
         StringRef file;
         ssize_t line;
         bool is_user_code;
+        bool is_tracked; // falls within an explicitly set file or directory
         unsigned inlined_at;
         bool operator ==(const DebugLineTable &other) const {
-            return other.loc == loc && other.file == file && other.line == line && other.is_user_code == is_user_code && other.inlined_at == inlined_at;
+            return other.loc == loc && other.file == file && other.line == line && other.is_user_code == is_user_code && other.is_tracked == is_tracked && other.inlined_at == inlined_at;
         }
     };
     std::vector<DebugLineTable> linetable;
@@ -6371,26 +7200,28 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         topinfo.file = ctx.file;
         topinfo.line = toplineno;
         topinfo.is_user_code = mod_is_user_mod;
+        topinfo.is_tracked = mod_is_tracked;
         topinfo.inlined_at = 0;
         topinfo.loc = topdebugloc;
         for (size_t i = 0; i < nlocs; i++) {
-            // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int, inlined_at::Int)
+            // LineInfoNode(mod::Module, method::Any, file::Symbol, line::Int32, inlined_at::Int32)
             jl_value_t *locinfo = jl_array_ptr_ref(src->linetable, i);
             DebugLineTable &info = linetable[i + 1];
             assert(jl_typeis(locinfo, jl_lineinfonode_type));
             jl_module_t *module = (jl_module_t*)jl_fieldref_noalloc(locinfo, 0);
             jl_value_t *method = jl_fieldref_noalloc(locinfo, 1);
             jl_sym_t *filesym = (jl_sym_t*)jl_fieldref_noalloc(locinfo, 2);
-            info.line = jl_unbox_long(jl_fieldref(locinfo, 3));
-            info.inlined_at = jl_unbox_long(jl_fieldref(locinfo, 4));
+            info.line = jl_unbox_int32(jl_fieldref(locinfo, 3));
+            info.inlined_at = jl_unbox_int32(jl_fieldref(locinfo, 4));
             assert(info.inlined_at <= i);
+            info.file = jl_symbol_name(filesym);
+            if (info.file.empty())
+                info.file = "<missing>";
             if (module == ctx.module)
                 info.is_user_code = mod_is_user_mod;
             else
                 info.is_user_code = in_user_mod(module);
-            info.file = jl_symbol_name(filesym);
-            if (info.file.empty())
-                info.file = "<missing>";
+            info.is_tracked = in_tracked_path(info.file);
             if (ctx.debug_enabled) {
                 StringRef fname;
                 if (jl_is_method_instance(method))
@@ -6402,7 +7233,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 if (fname.empty())
                     fname = "macro expansion";
                 if (info.inlined_at == 0 && info.file == ctx.file) { // if everything matches, emit a toplevel line number
-                    info.loc = DebugLoc::get(info.line, 0, SP, NULL);
+                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, SP, NULL);
                 }
                 else { // otherwise, describe this as an inlining frame
                     DISubprogram *&inl_SP = subprograms[std::make_tuple(fname, info.file)];
@@ -6413,7 +7244,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                                                      ,fname            // LinkageName
                                                      ,difile           // File
                                                      ,0                // LineNo
-                                                     ,jl_di_func_null_sig // Ty
+                                                     ,debuginfo.jl_di_func_null_sig // Ty
                                                      ,0                // ScopeLine
                                                      ,DINode::FlagZero // Flags
                                                      ,DISubprogram::SPFlagDefinition | DISubprogram::SPFlagOptimized // SPFlags
@@ -6422,8 +7253,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                                                      ,nullptr          // ThrownTypes
                                                      );
                     }
-                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc::get(0, 0, SP, NULL) : linetable.at(info.inlined_at).loc;
-                    info.loc = DebugLoc::get(info.line, 0, inl_SP, inl_loc);
+                    DebugLoc inl_loc = (info.inlined_at == 0) ? DebugLoc(DILocation::get(ctx.builder.getContext(), 0, 0, SP, NULL)) : linetable.at(info.inlined_at).loc;
+                    info.loc = DILocation::get(ctx.builder.getContext(), info.line, 0, inl_SP, inl_loc);
                 }
             }
         }
@@ -6436,19 +7267,19 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     {
         size_t nstmts = jl_array_len(stmts);
         aliasscopes.resize(nstmts + 1, nullptr);
-        MDBuilder mbuilder(jl_LLVMContext);
+        MDBuilder mbuilder(ctx.builder.getContext());
         MDNode *alias_domain = mbuilder.createAliasScopeDomain(ctx.name);
         for (i = 0; i < nstmts; i++) {
             jl_value_t *stmt = jl_array_ptr_ref(stmts, i);
             jl_expr_t *expr = jl_is_expr(stmt) ? (jl_expr_t*)stmt : nullptr;
             if (expr) {
-                if (expr->head == aliasscope_sym) {
+                if (expr->head == jl_aliasscope_sym) {
                     MDNode *scope = mbuilder.createAliasScope("aliasscope", alias_domain);
                     scope_stack.push_back(scope);
-                    MDNode *scope_list = MDNode::get(jl_LLVMContext, ArrayRef<Metadata*>(scope_stack));
+                    MDNode *scope_list = MDNode::get(ctx.builder.getContext(), ArrayRef<Metadata*>(scope_stack));
                     scope_list_stack.push_back(scope_list);
                     current_aliasscope = scope_list;
-                } else if (expr->head == popaliasscope_sym) {
+                } else if (expr->head == jl_popaliasscope_sym) {
                     scope_stack.pop_back();
                     scope_list_stack.pop_back();
                     if (scope_list_stack.empty()) {
@@ -6465,7 +7296,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     Instruction &prologue_end = ctx.builder.GetInsertBlock()->back();
 
 
-    // step 12. Do codegen in control flow order
+    // step 11. Do codegen in control flow order
     std::vector<int> workstack;
     std::map<int, BasicBlock*> BB;
     std::map<size_t, BasicBlock*> come_from_bb;
@@ -6478,8 +7309,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         if (seq_next >= 0 && (unsigned)seq_next < stmtslen) {
             workstack.push_back(seq_next);
         }
-        else if (!ctx.builder.GetInsertBlock()->getTerminator()) {
-            ctx.builder.CreateUnreachable();
+        else if (ctx.builder.GetInsertBlock() && !ctx.builder.GetInsertBlock()->getTerminator()) {
+            CreateTrap(ctx.builder, false);
         }
         while (!workstack.empty()) {
             int item = workstack.back();
@@ -6489,7 +7320,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 cursor = item;
                 return;
             }
-            if (seq_next != -1 && !ctx.builder.GetInsertBlock()->getTerminator()) {
+            if (seq_next != -1 && ctx.builder.GetInsertBlock() && !ctx.builder.GetInsertBlock()->getTerminator()) {
                 come_from_bb[cursor + 1] = ctx.builder.GetInsertBlock();
                 ctx.builder.CreateBr(nextbb->second);
             }
@@ -6504,17 +7335,19 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         cursor = -1;
     };
 
-    auto do_coverage = [&] (bool in_user_code) {
+    auto do_coverage = [&] (bool in_user_code, bool is_tracked) {
         return (coverage_mode == JL_LOG_ALL ||
-                (coverage_mode == JL_LOG_USER && in_user_code));
+                (in_user_code && coverage_mode == JL_LOG_USER) ||
+                (is_tracked && coverage_mode == JL_LOG_PATH));
     };
-    auto do_malloc_log = [&] (bool in_user_code) {
+    auto do_malloc_log = [&] (bool in_user_code, bool is_tracked) {
         return (malloc_log_mode == JL_LOG_ALL ||
-                (malloc_log_mode == JL_LOG_USER && in_user_code));
+                (in_user_code && malloc_log_mode == JL_LOG_USER) ||
+                (is_tracked && malloc_log_mode == JL_LOG_PATH));
     };
     std::vector<unsigned> current_lineinfo, new_lineinfo;
     auto coverageVisitStmt = [&] (size_t dbg) {
-        if (dbg == 0)
+        if (dbg == 0 || dbg >= linetable.size())
             return;
         // Compute inlining stack for current line, inner frame first
         while (dbg) {
@@ -6529,15 +7362,15 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             if (newdbg != current_lineinfo[dbg]) {
                 current_lineinfo[dbg] = newdbg;
                 const auto &info = linetable.at(newdbg);
-                if (do_coverage(info.is_user_code))
+                if (do_coverage(info.is_user_code, info.is_tracked))
                     coverageVisitLine(ctx, info.file, info.line);
             }
         }
         new_lineinfo.clear();
     };
     auto mallocVisitStmt = [&] (unsigned dbg, Value *sync) {
-        if (!do_malloc_log(mod_is_user_mod) || dbg == 0) {
-            if (do_malloc_log(true) && sync)
+        if (!do_malloc_log(mod_is_user_mod, mod_is_tracked) || dbg == 0) {
+            if (do_malloc_log(true, mod_is_tracked) && sync)
                 ctx.builder.CreateCall(prepare_call(sync_gc_total_bytes_func), {sync});
             return;
         }
@@ -6548,8 +7381,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     if (coverage_mode != JL_LOG_NONE) {
         // record all lines that could be covered
         for (const auto &info : linetable)
-            if (do_coverage(info.is_user_code))
-                coverageAllocLine(info.file, info.line);
+            if (do_coverage(info.is_user_code, info.is_tracked))
+                jl_coverage_alloc_line(info.file, info.line);
     }
 
     come_from_bb[0] = ctx.builder.GetInsertBlock();
@@ -6573,7 +7406,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 if (i + 2 <= stmtslen)
                     branch_targets.insert(i + 2);
             } else if (jl_is_expr(stmt)) {
-                if (((jl_expr_t*)stmt)->head == enter_sym) {
+                if (((jl_expr_t*)stmt)->head == jl_enter_sym) {
                     branch_targets.insert(i + 1);
                     if (i + 2 <= stmtslen)
                         branch_targets.insert(i + 2);
@@ -6597,19 +7430,21 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     }
 
     for (int label : branch_targets) {
-        BasicBlock *bb = BasicBlock::Create(jl_LLVMContext,
+        BasicBlock *bb = BasicBlock::Create(ctx.builder.getContext(),
             "L" + std::to_string(label), f);
         BB[label] = bb;
     }
 
     Value *sync_bytes = nullptr;
-    if (do_malloc_log(true))
+    if (do_malloc_log(true, mod_is_tracked))
         sync_bytes = ctx.builder.CreateCall(prepare_call(diff_gc_total_bytes_func), {});
     { // coverage for the function definition line number
         const auto &topinfo = linetable.at(0);
-        if (topinfo == linetable.at(1))
-            current_lineinfo.push_back(1);
-        if (do_coverage(topinfo.is_user_code))
+        if (linetable.size() > 1) {
+            if (topinfo == linetable.at(1))
+                current_lineinfo.push_back(1);
+        }
+        if (do_coverage(topinfo.is_user_code, topinfo.is_tracked))
             coverageVisitLine(ctx, topinfo.file, topinfo.line);
     }
 
@@ -6627,7 +7462,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         if (jl_is_returnnode(stmt)) {
             jl_value_t *retexpr = jl_returnnode_value(stmt);
             if (retexpr == NULL) {
-                ctx.builder.CreateUnreachable();
+                CreateTrap(ctx.builder, false);
                 find_next_stmt(-1);
                 continue;
             }
@@ -6636,7 +7471,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             jl_cgval_t retvalinfo = emit_expr(ctx, retexpr);
             retvalinfo = convert_julia_type(ctx, retvalinfo, jlrettype);
             if (retvalinfo.typ == jl_bottom_type) {
-                ctx.builder.CreateUnreachable();
+                CreateTrap(ctx.builder, false);
                 find_next_stmt(-1);
                 continue;
             }
@@ -6662,7 +7497,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 Value *data, *tindex;
                 if (retvalinfo.TIndex) {
                     tindex = retvalinfo.TIndex;
-                    data = V_rnull;
+                    data = Constant::getNullValue(ctx.types().T_prjlvalue);
                     if (retvalinfo.V == NULL) {
                         // treat this as a simple Ghosts
                         sret = NULL;
@@ -6671,8 +7506,8 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                         // also need to account for the possibility the return object is boxed
                         // and avoid / skip copying it to the stack
                         isboxed_union = ctx.builder.CreateICmpNE(
-                            ctx.builder.CreateAnd(tindex, ConstantInt::get(T_int8, 0x80)),
-                            ConstantInt::get(T_int8, 0));
+                            ctx.builder.CreateAnd(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                            ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
                         data = ctx.builder.CreateSelect(isboxed_union, retvalinfo.Vboxed, data);
                     }
                 }
@@ -6680,7 +7515,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     // treat this as a simple boxed returninfo
                     //assert(retvalinfo.isboxed);
                     tindex = compute_tindex_unboxed(ctx, retvalinfo, jlrettype);
-                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(T_int8, 0x80));
+                    tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
                     data = boxed(ctx, retvalinfo);
                     sret = NULL;
                 }
@@ -6697,7 +7532,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 if (retvalinfo.ispointer()) {
                     if (returninfo.return_roots) {
                         Type *store_ty = julia_type_to_llvm(ctx, retvalinfo.typ);
-                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, returninfo.return_roots);
+                        emit_sret_roots(ctx, true, data_pointer(ctx, retvalinfo), store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
                     }
                     if (returninfo.cc == jl_returninfo_t::SRet) {
                         assert(jl_is_concrete_type(jlrettype));
@@ -6714,7 +7549,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     Value *Val = retvalinfo.V;
                     if (returninfo.return_roots) {
                         assert(julia_type_to_llvm(ctx, retvalinfo.typ) == store_ty);
-                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, returninfo.return_roots);
+                        emit_sret_roots(ctx, false, Val, store_ty, f->arg_begin() + 1, get_returnroots_type(ctx, returninfo.return_roots), returninfo.return_roots);
                     }
                     if (dest_ty != sret->getType())
                         sret = emit_bitcast(ctx, sret, dest_ty);
@@ -6724,7 +7559,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             }
 
             mallocVisitStmt(debuginfoloc, sync_bytes);
-            if (toplevel)
+            if (toplevel || ctx.is_opaque_closure)
                 ctx.builder.CreateStore(last_age, ctx.world_age_field);
             assert(type_is_ghost(retty) || returninfo.cc == jl_returninfo_t::SRet ||
                 retval->getType() == ctx.f->getReturnType());
@@ -6760,7 +7595,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             find_next_stmt(cursor + 1);
             continue;
         }
-        else if (expr && expr->head == enter_sym) {
+        else if (expr && expr->head == jl_enter_sym) {
             jl_value_t **args = (jl_value_t**)jl_array_data(expr->args);
 
             assert(jl_is_long(args[0]));
@@ -6770,13 +7605,13 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 ctx.builder.CreateCall(prepare_call(jl_excstack_state_func));
             assert(!ctx.ssavalue_assigned.at(cursor));
             ctx.SAvalues.at(cursor) = jl_cgval_t(excstack_state, NULL, false,
-                                                 (jl_value_t*)jl_ulong_type, NULL);
+                                                 (jl_value_t*)jl_ulong_type, NULL, ctx.tbaa());
             ctx.ssavalue_assigned.at(cursor) = true;
             CallInst *sj = ctx.builder.CreateCall(prepare_call(except_enter_func));
             // We need to mark this on the call site as well. See issue #6757
             sj->setCanReturnTwice();
-            Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(T_int32, 0));
-            BasicBlock *tryblk = BasicBlock::Create(jl_LLVMContext, "try", f);
+            Value *isz = ctx.builder.CreateICmpEQ(sj, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), 0));
+            BasicBlock *tryblk = BasicBlock::Create(ctx.builder.getContext(), "try", f);
             BasicBlock *handlr = NULL;
             handlr = BB[lname];
             workstack.push_back(lname - 1);
@@ -6800,22 +7635,11 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
     ctx.builder.SetCurrentDebugLocation(noDbg);
     ctx.builder.ClearInsertionPoint();
 
-    auto undef_value_for_type = [&](Type *T) {
-        auto tracked = CountTrackedPointers(T);
-        Value *undef;
-        if (tracked.count)
-            // make sure gc pointers (including ptr_phi of union-split) are initialized to NULL
-            undef = Constant::getNullValue(T);
-        else
-            undef = UndefValue::get(T);
-        return undef;
-    };
-
     // Codegen Phi nodes
     std::map<std::pair<BasicBlock*, BasicBlock*>, BasicBlock*> BB_rewrite_map;
     std::vector<llvm::PHINode*> ToDelete;
     for (auto &tup : ctx.PhiNodes) {
-        jl_cgval_t phi_result;
+        jl_cgval_t phi_result(ctx.builder.getContext());
         PHINode *VN;
         jl_value_t *r;
         AllocaInst *dest;
@@ -6854,7 +7678,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
 #endif
                 continue;
             }
-            assert(find(pred_begin(PhiBB), pred_end(PhiBB), FromBB) != pred_end(PhiBB)); // consistency check
+            assert(std::find(pred_begin(PhiBB), pred_end(PhiBB), FromBB) != pred_end(PhiBB)); // consistency check
             TerminatorInst *terminator = FromBB->getTerminator();
             if (!terminator->getParent()->getUniqueSuccessor()) {
                 // Can't use `llvm::SplitCriticalEdge` here because
@@ -6863,14 +7687,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                    FromBB->getName() + "." + PhiBB->getName() + "_crit_edge");
                 Function::iterator FBBI = FromBB->getIterator();
                 ctx.f->getBasicBlockList().insert(++FBBI, NewBB); // insert after existing block
-#if JL_LLVM_VERSION >= 90000
                 terminator->replaceSuccessorWith(PhiBB, NewBB);
-#else
-                for (unsigned Idx = 0, NumSuccessors = terminator->getNumSuccessors(); Idx != NumSuccessors; ++Idx) {
-                    if (terminator->getSuccessor(Idx) == PhiBB)
-                      terminator->setSuccessor(Idx, NewBB);
-                }
-#endif
                 DebugLoc Loc = terminator->getDebugLoc();
                 terminator = BranchInst::Create(PhiBB);
                 terminator->setDebugLoc(Loc);
@@ -6884,7 +7701,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 ctx.builder.CreateLifetimeStart(dest);
             jl_cgval_t val = emit_expr(ctx, value);
             if (val.constant)
-                val = mark_julia_const(val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex
+                val = mark_julia_const(ctx, val.constant); // be over-conservative at making sure `.typ` is set concretely, not tindex
             if (!jl_is_uniontype(phiType) || !TindexN) {
                 Type *lty = julia_type_to_llvm(ctx, phiType);
                 if (VN) {
@@ -6892,19 +7709,31 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                     if (val.typ == (jl_value_t*)jl_bottom_type) {
                         V = undef_value_for_type(VN->getType());
                     }
-                    else if (VN && VN->getType() == T_prjlvalue) {
+                    else if (VN->getType() == ctx.types().T_prjlvalue) {
                         // Includes the jl_is_uniontype(phiType) && !TindexN case
+                        // TODO: if convert_julia_type says it is wasted effort and to skip it, is it worth using Constant::getNullValue(ctx.types().T_prjlvalue) (dynamically)?
                         V = boxed(ctx, val);
                     }
                     else {
-                        V = emit_unbox(ctx, VN->getType(), val, phiType);
+                        // must be careful to emit undef here (rather than a bitcast or
+                        // load of val) if the runtime type of val isn't phiType
+                        Value *isvalid = emit_isa(ctx, val, phiType, NULL).first;
+                        V = emit_guarded_test(ctx, isvalid, undef_value_for_type(VN->getType()), [&] {
+                            return emit_unbox(ctx, VN->getType(), val, phiType);
+                        });
                     }
                     VN->addIncoming(V, ctx.builder.GetInsertBlock());
                     assert(!TindexN);
                 }
                 else if (dest && val.typ != (jl_value_t*)jl_bottom_type) {
-                    assert(lty != T_prjlvalue);
-                    (void)emit_unbox(ctx, lty, val, phiType, maybe_decay_tracked(ctx, dest));
+                    // must be careful to emit undef here (rather than a bitcast or
+                    // load of val) if the runtime type of val isn't phiType
+                    assert(lty != ctx.types().T_prjlvalue);
+                    Value *isvalid = emit_isa(ctx, val, phiType, NULL).first;
+                    emit_guarded_test(ctx, isvalid, nullptr, [&] {
+                        (void)emit_unbox(ctx, lty, val, phiType, maybe_decay_tracked(ctx, dest), ctx.tbaa().tbaa_stack);
+                        return nullptr;
+                    });
                 }
             }
             else {
@@ -6916,26 +7745,29 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 if (val.typ == (jl_value_t*)jl_bottom_type) {
                     if (VN)
                         V = undef_value_for_type(VN->getType());
-                    RTindex = UndefValue::get(T_int8);
+                    RTindex = UndefValue::get(getInt8Ty(ctx.builder.getContext()));
                 }
                 else if (jl_is_concrete_type(val.typ) || val.constant) {
                     size_t tindex = get_box_tindex((jl_datatype_t*)val.typ, phiType);
                     if (tindex == 0) {
                         if (VN)
                             V = boxed(ctx, val);
-                        RTindex = ConstantInt::get(T_int8, 0x80);
+                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
                     }
                     else {
                         if (VN)
-                            V = V_rnull;
+                            V = Constant::getNullValue(ctx.types().T_prjlvalue);
                         Type *lty = julia_type_to_llvm(ctx, val.typ);
                         if (dest && !type_is_ghost(lty)) // basically, if !ghost union
-                            emit_unbox(ctx, lty, val, val.typ, dest);
-                        RTindex = ConstantInt::get(T_int8, tindex);
+                            emit_unbox(ctx, lty, val, val.typ, dest, ctx.tbaa().tbaa_stack);
+                        RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), tindex);
                     }
                 }
                 else {
-                    jl_cgval_t new_union = convert_julia_type(ctx, val, phiType);
+                    Value *skip = NULL;
+                    // must compute skip here, since the runtime type of val might not be in phiType
+                    // caution: only Phi and PhiC are allowed to do this (and maybe sometimes Pi)
+                    jl_cgval_t new_union = convert_julia_type(ctx, val, phiType, &skip);
                     RTindex = new_union.TIndex;
                     if (!RTindex) {
                         assert(new_union.isboxed && new_union.Vboxed && "convert_julia_type failed");
@@ -6943,19 +7775,20 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                         if (dest) {
                             // If dest is not set, this is a ghost union, the recipient of which
                             // is often not prepared to handle a boxed representation of the ghost.
-                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(T_int8, 0x80));
+                            RTindex = ctx.builder.CreateOr(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
                         }
                         new_union.TIndex = RTindex;
                     }
                     if (VN)
-                        V = new_union.Vboxed ? new_union.Vboxed : V_rnull;
+                        V = new_union.Vboxed ? new_union.Vboxed : Constant::getNullValue(ctx.types().T_prjlvalue);
                     if (dest) { // basically, if !ghost union
-                        Value *skip = NULL;
-                        if (new_union.Vboxed != nullptr)
-                            skip = ctx.builder.CreateICmpNE( // if 0x80 is set, we won't select this slot anyways
-                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(T_int8, 0x80)),
-                                    ConstantInt::get(T_int8, 0));
-                        emit_unionmove(ctx, dest, tbaa_arraybuf, new_union, skip);
+                        if (new_union.Vboxed != nullptr) {
+                            Value *isboxed = ctx.builder.CreateICmpNE( // if 0x80 is set, we won't select this slot anyways
+                                    ctx.builder.CreateAnd(RTindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80)),
+                                    ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0));
+                            skip = skip ? ctx.builder.CreateOr(isboxed, skip) : isboxed;
+                        }
+                        emit_unionmove(ctx, dest, ctx.tbaa().tbaa_arraybuf, new_union, skip);
                     }
                 }
                 if (VN)
@@ -6972,17 +7805,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
             if (FromBB != NewBB) {
                 BB_rewrite_map[LookupKey] = NewBB;
                 preds.insert(NewBB);
-#if JL_LLVM_VERSION >= 90000
                 PhiBB->replacePhiUsesWith(FromBB, NewBB);
-#else
-                for (BasicBlock::iterator I = PhiBB->begin(); isa<PHINode>(I); ++I) {
-                    PHINode *PN = cast<PHINode>(I);
-                    ssize_t BBIdx = PN->getBasicBlockIndex(FromBB);
-                    if (BBIdx == -1)
-                        continue;
-                    PN->setIncomingBlock(BBIdx, NewBB);
-                }
-#endif
             }
             ctx.builder.ClearInsertionPoint();
         }
@@ -7001,12 +7824,12 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                 continue;
             ctx.builder.SetInsertPoint(FromBB->getTerminator());
             // PHI is undef on this branch. But still may need to put a valid pointer in place.
-            Value *RTindex = TindexN ? UndefValue::get(T_int8) : NULL;
+            Value *RTindex = TindexN ? UndefValue::get(getInt8Ty(ctx.builder.getContext())) : NULL;
             if (VN) {
                 Value *undef = undef_value_for_type(VN->getType());
                 VN->addIncoming(undef, FromBB);
                 if (TindexN) // let the runtime / optimizer know this is unknown / boxed / null, so that it won't try to union_move / copy it later
-                    RTindex = ConstantInt::get(T_int8, 0x80);
+                    RTindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80);
             }
             if (TindexN)
                 TindexN->addIncoming(RTindex, FromBB);
@@ -7024,7 +7847,7 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
         PN->eraseFromParent();
     }
 
-    // step 13. Perform any delayed instantiations
+    // step 12. Perform any delayed instantiations
     if (ctx.debug_enabled) {
         bool in_prologue = true;
         for (auto &BB : *ctx.f) {
@@ -7104,29 +7927,42 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
                         break;
                 }
                 if (j == jlen) // not found - add to array
-                    jl_array_ptr_1d_push(m->roots, ival);
+                    jl_add_method_root(m, jl_precompile_toplevel_module, ival);
             }
         }
         ctx.roots = NULL;
         JL_UNLOCK(&m->writelock);
     }
 
-    // link the dependent llvmcall modules, but switch their function's linkage to private
-    // so that they don't show up in the execution engine.
-    for (auto &Mod : ctx.llvmcall_modules) {
+    // link the dependent llvmcall modules, but switch their function's linkage to internal
+    // so that they don't conflict when they show up in the execution engine.
+    for (auto &TSMod : ctx.llvmcall_modules) {
         SmallVector<std::string, 1> Exports;
-        for (const auto &F: Mod->functions())
-            if (!F.isDeclaration())
-                Exports.push_back(F.getName().str());
-        if (Linker::linkModules(*jl_Module, std::move(Mod))) {
-            jl_error("Failed to link LLVM bitcode");
-        }
+        TSMod.withModuleDo([&](Module &Mod) {
+            for (const auto &F: Mod.functions())
+                if (!F.isDeclaration())
+                    Exports.push_back(F.getName().str());
+        });
+        jl_merge_module(TSM, std::move(TSMod));
+        for (auto FN: Exports)
+            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
+    }
+
+    // link in opaque closure modules
+    for (auto &TSMod : ctx.oc_modules) {
+        SmallVector<std::string, 1> Exports;
+        TSMod.withModuleDo([&](Module &Mod) {
+            for (const auto &F: Mod.functions())
+                if (!F.isDeclaration())
+                    Exports.push_back(F.getName().str());
+        });
+        jl_merge_module(TSM, std::move(TSMod));
         for (auto FN: Exports)
-            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::PrivateLinkage);
+            jl_Module->getFunction(FN)->setLinkage(GlobalVariable::InternalLinkage);
     }
 
     JL_GC_POP();
-    return std::make_pair(std::unique_ptr<Module>(M), declarations);
+    return declarations;
 }
 
 // --- entry point ---
@@ -7134,42 +7970,56 @@ static std::pair<std::unique_ptr<Module>, jl_llvm_functions_t>
 void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL);
 
 JL_GCC_IGNORE_START("-Wclobbered")
-jl_compile_result_t jl_emit_code(
+jl_llvm_functions_t jl_emit_code(
+        orc::ThreadSafeModule &m,
         jl_method_instance_t *li,
         jl_code_info_t *src,
         jl_value_t *jlrettype,
         jl_codegen_params_t &params)
 {
+    JL_TIMING(CODEGEN);
     // caller must hold codegen_lock
     jl_llvm_functions_t decls = {};
-    std::unique_ptr<Module> m;
     assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
         compare_cgparams(params.params, &jl_default_cgparams)) &&
         "functions compiled with custom codegen params must not be cached");
     JL_TRY {
-        std::tie(m, decls) = emit_function(li, src, jlrettype, params);
+        decls = emit_function(m, li, src, jlrettype, params);
+        auto stream = *jl_ExecutionEngine->get_dump_emitted_mi_name_stream();
+        if (stream) {
+            jl_printf(stream, "%s\t", decls.specFunctionObject.c_str());
+            // NOTE: We print the Type Tuple without surrounding quotes, because the quotes
+            // break CSV parsing if there are any internal quotes in the Type name (e.g. in
+            // Symbol("...")). The \t delineator should be enough to ensure whitespace is
+            // handled correctly. (And we don't need to worry about any tabs in the printed
+            // string, because tabs are printed as "\t" by `show`.)
+            jl_static_show(stream, li->specTypes);
+            jl_printf(stream, "\n");
+        }
     }
     JL_CATCH {
         // Something failed! This is very, very bad.
         // Try to pretend that it isn't and attempt to recover.
-        m.reset();
+        const char *mname = m.getModuleUnlocked()->getModuleIdentifier().data();
+        m = orc::ThreadSafeModule();
         decls.functionObject = "";
         decls.specFunctionObject = "";
-        const char *mname = name_from_method_instance(li);
         jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error during compilation of %s:\n", mname);
         jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
         jlbacktrace(); // written to STDERR_FILENO
     }
 
-    return std::make_tuple(std::move(m), decls);
+    return decls;
 }
 
-jl_compile_result_t jl_emit_codeinst(
+jl_llvm_functions_t jl_emit_codeinst(
+        orc::ThreadSafeModule &m,
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
         jl_codegen_params_t &params)
 {
+    JL_TIMING(CODEGEN);
     JL_GC_PUSH1(&src);
     if (!src) {
         src = (jl_code_info_t*)codeinst->inferred;
@@ -7178,22 +8028,22 @@ jl_compile_result_t jl_emit_codeinst(
             src = jl_uncompress_ir(def, codeinst, (jl_array_t*)src);
         if (!src || !jl_is_code_info(src)) {
             JL_GC_POP();
-            return jl_compile_result_t(); // failed
+            m = orc::ThreadSafeModule();
+            return jl_llvm_functions_t(); // failed
         }
     }
-    jl_compile_result_t result = jl_emit_code(codeinst->def, src, codeinst->rettype, params);
+    jl_llvm_functions_t decls = jl_emit_code(m, codeinst->def, src, codeinst->rettype, params);
 
-    const jl_llvm_functions_t &decls = std::get<1>(result);
     const std::string &specf = decls.specFunctionObject;
     const std::string &f = decls.functionObject;
     if (params.cache && !f.empty()) {
-        const Module *m = std::get<0>(result).get();
         // Prepare debug info to receive this function
         // record that this function name came from this linfo,
         // so we can build a reverse mapping for debug-info.
         bool toplevel = !jl_is_method(codeinst->def->def.method);
         if (!toplevel) {
-            const DataLayout &DL = m->getDataLayout();
+            //Safe b/c params holds context lock
+            const DataLayout &DL = m.getModuleUnlocked()->getDataLayout();
             // but don't remember toplevel thunks because
             // they may not be rooted in the gc for the life of the program,
             // and the runtime doesn't notify us when the code becomes unreachable :(
@@ -7214,8 +8064,11 @@ jl_compile_result_t jl_emit_codeinst(
                 jl_options.debug_level > 1) {
                 // update the stored code
                 if (codeinst->inferred != (jl_value_t*)src) {
-                    if (jl_is_method(def))
+                    if (jl_is_method(def)) {
                         src = (jl_code_info_t*)jl_compress_ir(def, src);
+                        assert(jl_typeis(src, jl_array_uint8_type));
+                        codeinst->relocatability = ((uint8_t*)jl_array_data(src))[jl_array_len(src)-1];
+                    }
                     codeinst->inferred = (jl_value_t*)src;
                     jl_gc_wb(codeinst, src);
                 }
@@ -7225,23 +8078,25 @@ jl_compile_result_t jl_emit_codeinst(
                      // and there is something to delete (test this before calling jl_ir_flag_inlineable)
                      codeinst->inferred != jl_nothing &&
                      // don't delete inlineable code, unless it is constant
-                     (codeinst->invoke == jl_fptr_const_return || !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) &&
+                     (codeinst->invoke == jl_fptr_const_return_addr || !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) &&
                      // don't delete code when generating a precompile file
-                     !imaging_mode) {
+                     !(params.imaging || jl_options.incremental)) {
                 // if not inlineable, code won't be needed again
                 codeinst->inferred = jl_nothing;
             }
         }
     }
     JL_GC_POP();
-    return result;
+    return decls;
 }
 
 
 void jl_compile_workqueue(
-    std::map<jl_code_instance_t*, jl_compile_result_t> &emitted,
+    jl_workqueue_t &emitted,
+    Module &original,
     jl_codegen_params_t &params, CompilationPolicy policy)
 {
+    JL_TIMING(CODEGEN);
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
     while (!params.workqueue.empty()) {
@@ -7250,24 +8105,28 @@ void jl_compile_workqueue(
         jl_returninfo_t::CallingConv proto_cc;
         bool proto_specsig;
         unsigned proto_return_roots;
-        std::tie(codeinst, proto_cc, proto_return_roots, protodecl, proto_specsig) = params.workqueue.back();
+        auto it = params.workqueue.back();
+        codeinst = it.first;
+        std::tie(proto_cc, proto_return_roots, protodecl, proto_specsig) = it.second;
         params.workqueue.pop_back();
         // try to emit code for this item from the workqueue
         assert(codeinst->min_world <= params.world && codeinst->max_world >= params.world &&
             "invalid world for code-instance");
         StringRef preal_decl = "";
         bool preal_specsig = false;
-        if (params.cache && codeinst->invoke != NULL) {
-            if (codeinst->invoke == jl_fptr_args) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+        auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        if (params.cache && invoke != NULL) {
+            auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+            if (invoke == jl_fptr_args_addr) {
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
             }
             else if (codeinst->isspecsig) {
-                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)codeinst->specptr.fptr, codeinst);
+                preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
                 preal_specsig = true;
             }
         }
         else {
-            jl_compile_result_t &result = emitted[codeinst];
+            auto &result = emitted[codeinst];
             jl_llvm_functions_t *decls = NULL;
             if (std::get<0>(result)) {
                 decls = &std::get<1>(result);
@@ -7275,14 +8134,25 @@ void jl_compile_workqueue(
             else {
                 // Reinfer the function. The JIT came along and removed the inferred
                 // method body. See #34993
-                if (policy == CompilationPolicy::Extern &&
+                if (policy != CompilationPolicy::Default &&
                     codeinst->inferred && codeinst->inferred == jl_nothing) {
-                    src = jl_type_infer(codeinst->def, jl_world_counter, 0);
-                    if (src)
-                        result = jl_emit_code(codeinst->def, src, src->rettype, params);
+                    src = jl_type_infer(codeinst->def, jl_atomic_load_acquire(&jl_world_counter), 0);
+                    if (src) {
+                        orc::ThreadSafeModule result_m =
+                        jl_create_llvm_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, params.imaging,
+                            original.getDataLayout(), Triple(original.getTargetTriple()));
+                        result.second = jl_emit_code(result_m, codeinst->def, src, src->rettype, params);
+                        result.first = std::move(result_m);
+                    }
                 }
                 else {
-                    result = jl_emit_codeinst(codeinst, NULL, params);
+                    orc::ThreadSafeModule result_m =
+                        jl_create_llvm_module(name_from_method_instance(codeinst->def),
+                            params.tsctx, params.imaging,
+                            original.getDataLayout(), Triple(original.getTargetTriple()));
+                    result.second = jl_emit_codeinst(result_m, codeinst, NULL, params);
+                    result.first = std::move(result_m);
                 }
                 if (std::get<0>(result))
                     decls = &std::get<1>(result);
@@ -7307,9 +8177,9 @@ void jl_compile_workqueue(
             if (!preal_specsig) {
                 // emit specsig-to-(jl)invoke conversion
                 Function *preal = emit_tojlinvoke(codeinst, mod, params);
-                protodecl->setLinkage(GlobalVariable::PrivateLinkage);
+                protodecl->setLinkage(GlobalVariable::InternalLinkage);
                 //protodecl->setAlwaysInline();
-                protodecl->addFnAttr("no-frame-pointer-elim", "true");
+                jl_init_function(protodecl);
                 size_t nrealargs = jl_nparams(codeinst->def->specTypes); // number of actual arguments being passed
                 // TODO: maybe this can be cached in codeinst->specfptr?
                 emit_cfunc_invalidate(protodecl, proto_cc, proto_return_roots, codeinst->def->specTypes, codeinst->rettype, nrealargs, params, preal);
@@ -7342,19 +8212,6 @@ void jl_compile_workqueue(
 
 
 // --- initialization ---
-
-std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false)
-{
-    MDBuilder mbuilder(jl_LLVMContext);
-    if (tbaa_root == nullptr) {
-        MDNode *jtbaa = mbuilder.createTBAARoot("jtbaa");
-        tbaa_root = mbuilder.createTBAAScalarTypeNode("jtbaa", jtbaa);
-    }
-    MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
-    MDNode *n = mbuilder.createTBAAStructTagNode(scalar, scalar, 0, isConstant);
-    return std::make_pair(n, scalar);
-}
-
 std::vector<std::pair<jl_value_t**, JuliaVariable*>> gv_for_global;
 static void global_jlvalue_to_llvm(JuliaVariable *var, jl_value_t **addr)
 {
@@ -7369,147 +8226,14 @@ static JuliaVariable *julia_const_gv(jl_value_t *val)
     return nullptr;
 }
 
-static void init_julia_llvm_meta(void)
-{
-    tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first;
-    MDNode *tbaa_stack_scalar;
-    std::tie(tbaa_stack, tbaa_stack_scalar) = tbaa_make_child("jtbaa_stack");
-    tbaa_unionselbyte = tbaa_make_child("jtbaa_unionselbyte", tbaa_stack_scalar).first;
-    MDNode *tbaa_data_scalar;
-    std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child("jtbaa_data");
-    tbaa_binding = tbaa_make_child("jtbaa_binding", tbaa_data_scalar).first;
-    MDNode *tbaa_value_scalar;
-    std::tie(tbaa_value, tbaa_value_scalar) =
-        tbaa_make_child("jtbaa_value", tbaa_data_scalar);
-    MDNode *tbaa_mutab_scalar;
-    std::tie(tbaa_mutab, tbaa_mutab_scalar) =
-        tbaa_make_child("jtbaa_mutab", tbaa_value_scalar);
-    tbaa_datatype = tbaa_make_child("jtbaa_datatype", tbaa_mutab_scalar).first;
-    tbaa_immut = tbaa_make_child("jtbaa_immut", tbaa_value_scalar).first;
-    tbaa_arraybuf = tbaa_make_child("jtbaa_arraybuf", tbaa_data_scalar).first;
-    tbaa_ptrarraybuf = tbaa_make_child("jtbaa_ptrarraybuf", tbaa_data_scalar).first;
-    MDNode *tbaa_array_scalar;
-    std::tie(tbaa_array, tbaa_array_scalar) = tbaa_make_child("jtbaa_array");
-    tbaa_arrayptr = tbaa_make_child("jtbaa_arrayptr", tbaa_array_scalar).first;
-    tbaa_arraysize = tbaa_make_child("jtbaa_arraysize", tbaa_array_scalar).first;
-    tbaa_arraylen = tbaa_make_child("jtbaa_arraylen", tbaa_array_scalar).first;
-    tbaa_arrayflags = tbaa_make_child("jtbaa_arrayflags", tbaa_array_scalar).first;
-    tbaa_arrayoffset = tbaa_make_child("jtbaa_arrayoffset", tbaa_array_scalar).first;
-    tbaa_const = tbaa_make_child("jtbaa_const", nullptr, true).first;
-    tbaa_arrayselbyte = tbaa_make_child("jtbaa_arrayselbyte", tbaa_array_scalar).first;
-
-    Thunk = Attribute::get(jl_LLVMContext, "thunk");
-}
-
-static void init_julia_llvm_env(Module *m)
-{
-    // every variable or function mapped in this function must be
-    // exported from libjulia, to support static compilation
-    T_int1  = Type::getInt1Ty(jl_LLVMContext);
-    T_int8  = Type::getInt8Ty(jl_LLVMContext);
-    T_pint8 = PointerType::get(T_int8, 0);
-    T_ppint8 = PointerType::get(T_pint8, 0);
-    T_pppint8 = PointerType::get(T_ppint8, 0);
-    T_int16 = Type::getInt16Ty(jl_LLVMContext);
-    T_pint16 = PointerType::get(T_int16, 0);
-    T_int32 = Type::getInt32Ty(jl_LLVMContext);
-    T_char = Type::getInt32Ty(jl_LLVMContext);
-    T_pint32 = PointerType::get(T_int32, 0);
-    T_int64 = Type::getInt64Ty(jl_LLVMContext);
-    T_pint64 = PointerType::get(T_int64, 0);
-    T_uint8 = T_int8;   T_uint16 = T_int16;
-    T_uint32 = T_int32; T_uint64 = T_int64;
-    if (sizeof(size_t) == 8)
-        T_size = T_uint64;
-    else
-        T_size = T_uint32;
-    T_sigatomic = Type::getIntNTy(jl_LLVMContext, sizeof(sig_atomic_t) * 8);
-    T_psize = PointerType::get(T_size, 0);
-    T_float16 = Type::getHalfTy(jl_LLVMContext);
-    T_float32 = Type::getFloatTy(jl_LLVMContext);
-    T_pfloat32 = PointerType::get(T_float32, 0);
-    T_float64 = Type::getDoubleTy(jl_LLVMContext);
-    T_pfloat64 = PointerType::get(T_float64, 0);
-    T_float128 = Type::getFP128Ty(jl_LLVMContext);
-    T_void = Type::getVoidTy(jl_LLVMContext);
-    T_pvoidfunc = FunctionType::get(T_void, /*isVarArg*/false)->getPointerTo();
-
-    // add needed base debugging definitions to our LLVM environment
-    DIBuilder dbuilder(*m);
-    DIFile *julia_h = dbuilder.createFile("julia.h", "");
-    jl_value_dillvmt = dbuilder.createStructType(nullptr,
-        "jl_value_t",
-        julia_h,
-        71, // At the time of this writing. Not sure if it's worth it to keep this in sync
-        0 * 8, // sizeof(jl_value_t) * 8,
-        __alignof__(void*) * 8, // __alignof__(jl_value_t) * 8,
-        DINode::FlagZero, // Flags
-        nullptr,    // Derived from
-        nullptr);  // Elements - will be corrected later
-
-    jl_pvalue_dillvmt = dbuilder.createPointerType(jl_value_dillvmt, sizeof(jl_value_t*) * 8,
-                                                   __alignof__(jl_value_t*) * 8);
-
-    SmallVector<llvm::Metadata *, 1> Elts;
-    std::vector<Metadata*> diargs(0);
-    Elts.push_back(jl_pvalue_dillvmt);
-    dbuilder.replaceArrays(jl_value_dillvmt,
-       dbuilder.getOrCreateArray(Elts));
-
-    jl_ppvalue_dillvmt = dbuilder.createPointerType(jl_pvalue_dillvmt, sizeof(jl_value_t**) * 8,
-                                                    __alignof__(jl_value_t**) * 8);
-
-    diargs.push_back(jl_pvalue_dillvmt);    // Return Type (ret value)
-    diargs.push_back(jl_pvalue_dillvmt);    // First Argument (function)
-    diargs.push_back(jl_ppvalue_dillvmt);   // Second Argument (argv)
-    // Third argument (length(argv))
-    diargs.push_back(_julia_type_to_di(NULL, (jl_value_t*)jl_int32_type, &dbuilder, false));
-
-    jl_di_func_sig = dbuilder.createSubroutineType(
-        dbuilder.getOrCreateTypeArray(diargs));
-    jl_di_func_null_sig = dbuilder.createSubroutineType(
-        dbuilder.getOrCreateTypeArray(None));
-
-    T_jlvalue = StructType::get(jl_LLVMContext);
-    T_pjlvalue = PointerType::get(T_jlvalue, 0);
-    T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-    T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
-    T_pprjlvalue = PointerType::get(T_prjlvalue, 0);
-    V_null = Constant::getNullValue(T_pjlvalue);
-    V_rnull = Constant::getNullValue(T_prjlvalue);
-    V_size0 = Constant::getNullValue(T_size);
-
-    std::vector<Type*> ftargs(0);
-    ftargs.push_back(T_prjlvalue);  // function
-    ftargs.push_back(T_pprjlvalue); // args[]
-    ftargs.push_back(T_int32);      // nargs
-    jl_func_sig = FunctionType::get(T_prjlvalue, ftargs, false);
-    assert(jl_func_sig != NULL);
-    ftargs.push_back(T_pprjlvalue); // linfo->sparam_vals
-    jl_func_sig_sparams = FunctionType::get(T_prjlvalue, ftargs, false);
-    assert(jl_func_sig_sparams != NULL);
-
-    Type *vaelts[] = {PointerType::get(T_int8, AddressSpace::Loaded)
-#ifdef STORE_ARRAY_LEN
-                      , T_size
-#endif
-                      , T_int16
-                      , T_int16
-                      , T_int32
-    };
-    static_assert(sizeof(jl_array_flags_t) == sizeof(int16_t),
-                  "Size of jl_array_flags_t is not the same as int16_t");
-    jl_array_llvmt = StructType::get(jl_LLVMContext, makeArrayRef(vaelts));
-    jl_parray_llvmt = PointerType::get(jl_array_llvmt, 0);
-}
-
 static void init_jit_functions(void)
 {
     add_named_global(jlstack_chk_guard_var, &__stack_chk_guard);
     add_named_global(jlRTLD_DEFAULT_var, &jl_RTLD_DEFAULT_handle);
 #ifdef _OS_WINDOWS_
     add_named_global(jlexe_var, &jl_exe_handle);
-    add_named_global(jldll_var, &jl_dl_handle);
+    add_named_global(jldll_var, &jl_libjulia_handle);
+    add_named_global(jldlli_var, &jl_libjulia_internal_handle);
 #endif
     global_jlvalue_to_llvm(new JuliaVariable{"jl_true", true, get_pjlvalue}, &jl_true);
     global_jlvalue_to_llvm(new JuliaVariable{"jl_false", true, get_pjlvalue}, &jl_false);
@@ -7519,8 +8243,9 @@ static void init_jit_functions(void)
     global_jlvalue_to_llvm(new JuliaVariable{"jl_undefref_exception", true, get_pjlvalue}, &jl_undefref_exception);
     add_named_global(jlgetworld_global, &jl_world_counter);
     add_named_global("__stack_chk_fail", &__stack_chk_fail);
-    add_named_global(jltls_states_func, (void*)NULL);
+    add_named_global(jlpgcstack_func, (void*)NULL);
     add_named_global(jlerror_func, &jl_error);
+    add_named_global(jlatomicerror_func, &jl_atomic_error);
     add_named_global(jlthrow_func, &jl_throw);
     add_named_global(jlundefvarerror_func, &jl_undefined_var_error);
     add_named_global(jlboundserrorv_func, &jl_bounds_error_ints);
@@ -7535,8 +8260,9 @@ static void init_jit_functions(void)
     add_named_global(jlcheckassign_func, &jl_checked_assignment);
     add_named_global(jldeclareconst_func, &jl_declare_constant);
     add_named_global(jlgetbindingorerror_func, &jl_get_binding_or_error);
+    add_named_global(jlgetbindingwrorerror_func, &jl_get_binding_wr_or_error);
     add_named_global(jlboundp_func, &jl_boundp);
-    for (auto it : builtin_func_map)
+    for (auto it : builtin_func_map())
         add_named_global(it.second, it.first);
     add_named_global(jlapplygeneric_func, &jl_apply_generic);
     add_named_global(jlinvoke_func, &jl_invoke);
@@ -7550,7 +8276,7 @@ static void init_jit_functions(void)
     add_named_global(jlleave_func, &jl_pop_handler);
     add_named_global(jl_restore_excstack_func, &jl_restore_excstack);
     add_named_global(jl_excstack_state_func, &jl_excstack_state);
-    add_named_global(jlegal_func, &jl_egal);
+    add_named_global(jlegalx_func, &jl_egal__unboxed);
     add_named_global(jlisa_func, &jl_isa);
     add_named_global(jlsubtype_func, &jl_subtype);
     add_named_global(jltypeassert_func, &jl_typeassert);
@@ -7561,6 +8287,7 @@ static void init_jit_functions(void)
     add_named_global(jl_loopinfo_marker_func, (void*)NULL);
     add_named_global(jl_typeof_func, (void*)NULL);
     add_named_global(jl_write_barrier_func, (void*)NULL);
+    add_named_global(jl_write_barrier_binding_func, (void*)NULL);
     add_named_global(jldlsym_func, &jl_load_and_lookup);
     add_named_global(jlgetcfunctiontrampoline_func, &jl_get_cfunction_trampoline);
     add_named_global(jlgetnthfieldchecked_func, &jl_get_nth_field_checked);
@@ -7574,7 +8301,6 @@ static void init_jit_functions(void)
     add_named_global(except_enter_func, (void*)NULL);
 
 #ifdef _OS_WINDOWS_
-#ifndef FORCE_ELF
 #if defined(_CPU_X86_64_)
 #if defined(_COMPILER_GCC_)
     add_named_global("___chkstk_ms", &___chkstk_ms);
@@ -7589,9 +8315,8 @@ static void init_jit_functions(void)
 #endif
 #endif
 #endif
-#endif
 
-#define BOX_F(ct) add_named_global("jl_box_"#ct, &jl_box_##ct);
+#define BOX_F(ct) add_named_global(XSTR(jl_box_##ct), &jl_box_##ct);
     BOX_F(int8); BOX_F(uint8);
     BOX_F(int16); BOX_F(uint16);
     BOX_F(int32); BOX_F(uint32);
@@ -7601,12 +8326,23 @@ static void init_jit_functions(void)
 #undef BOX_F
 }
 
+#ifdef JL_USE_INTEL_JITEVENTS
+char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
+#endif
+
+#ifdef JL_USE_OPROFILE_JITEVENTS
+char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
+#endif
+
+#ifdef JL_USE_PERF_JITEVENTS
+char jl_using_perf_jitevents = 0;
+#endif
+
 extern "C" void jl_init_llvm(void)
 {
     jl_page_size = jl_getpagesize();
-    imaging_mode = jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
+    jl_default_debug_info_kind = (int) DICompileUnit::DebugEmissionKind::FullDebug;
     jl_default_cgparams.generic_context = jl_nothing;
-    jl_init_debuginfo();
 
     InitializeNativeTarget();
     InitializeNativeTargetAsmPrinter();
@@ -7630,107 +8366,60 @@ extern "C" void jl_init_llvm(void)
 #endif
 
     // Parse command line flags after initialization
-    const char *const argv_tailmerge[] = {"", "-enable-tail-merge=0"}; // NOO TOUCHIE; NO TOUCH! See #922
-    cl::ParseCommandLineOptions(sizeof(argv_tailmerge)/sizeof(argv_tailmerge[0]), argv_tailmerge, "disable-tail-merge\n");
-#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
-    const char *const argv_copyprop[] = {"", "-disable-copyprop"}; // llvm bug 21743
-    cl::ParseCommandLineOptions(sizeof(argv_copyprop)/sizeof(argv_copyprop[0]), argv_copyprop, "disable-copyprop\n");
-#endif
-#if defined(_CPU_X86_) || defined(_CPU_X86_64_)
-    const char *const argv_avoidsfb[] = {"", "-x86-disable-avoid-SFB"}; // llvm bug 41629, see https://gist.github.com/vtjnash/192cab72a6cfc00256ff118238163b55
-    cl::ParseCommandLineOptions(sizeof(argv_avoidsfb)/sizeof(argv_avoidsfb[0]), argv_avoidsfb, "disable-avoidsfb\n");
-#endif
-    cl::ParseEnvironmentOptions("Julia", "JULIA_LLVM_ARGS");
-
+    StringMap<cl::Option*> &llvmopts = cl::getRegisteredOptions();
+    const char *const argv[1] = {"julia"};
+    cl::ParseCommandLineOptions(1, argv, "", nullptr, "JULIA_LLVM_ARGS");
+
+    // Set preferred non-default options
+    cl::Option *clopt;
+    clopt = llvmopts.lookup("enable-tail-merge"); // NOO TOUCHIE; NO TOUCH! See #922
+    if (clopt->getNumOccurrences() == 0)
+        cl::ProvidePositionalOption(clopt, "0", 1);
     // if the patch adding this option has been applied, lower its limit to provide
     // better DAGCombiner performance.
-    auto &clOptions = cl::getRegisteredOptions();
-    if (clOptions.find("combiner-store-merge-dependence-limit") != clOptions.end()) {
-        const char *const argv_smdl[] = {"", "-combiner-store-merge-dependence-limit=4"};
-        cl::ParseCommandLineOptions(sizeof(argv_smdl)/sizeof(argv_smdl[0]), argv_smdl);
-    }
+    clopt = llvmopts.lookup("combiner-store-merge-dependence-limit");
+    if (clopt && clopt->getNumOccurrences() == 0)
+        cl::ProvidePositionalOption(clopt, "4", 1);
 
-    TargetOptions options = TargetOptions();
-    //options.PrintMachineCode = true; //Print machine code produced during JIT compiling
-#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    // tell Win32 to assume the stack is always 16-byte aligned,
-    // and to ensure that it is 16-byte aligned for out-going calls,
-    // to ensure compatibility with GCC codes
-    options.StackAlignmentOverride = 16;
-#endif
-    Triple TheTriple(sys::getProcessTriple());
-#if defined(FORCE_ELF)
-    TheTriple.setObjectFormat(Triple::ELF);
+    jl_ExecutionEngine = new JuliaOJIT();
+
+    bool jl_using_gdb_jitevents = false;
+    // Register GDB event listener
+#if defined(JL_DEBUG_BUILD)
+    jl_using_gdb_jitevents = true;
+# else
+    const char *jit_gdb = getenv("ENABLE_GDBLISTENER");
+    if (jit_gdb && atoi(jit_gdb)) {
+        jl_using_gdb_jitevents = true;
+    }
 #endif
-    uint32_t target_flags = 0;
-    auto target = jl_get_llvm_target(imaging_mode, target_flags);
-    auto &TheCPU = target.first;
-    SmallVector<std::string, 10> targetFeatures(target.second.begin(), target.second.end());
-    std::string errorstr;
-    const Target *TheTarget = TargetRegistry::lookupTarget("", TheTriple, errorstr);
-    if (!TheTarget)
-        jl_errorf("%s", errorstr.c_str());
-    if (jl_processor_print_help || (target_flags & JL_TARGET_UNKNOWN_NAME)) {
-        std::unique_ptr<MCSubtargetInfo> MSTI(
-            TheTarget->createMCSubtargetInfo(TheTriple.str(), "", ""));
-        if (!MSTI->isCPUStringValid(TheCPU))
-            jl_errorf("Invalid CPU name %s.", TheCPU.c_str());
-        if (jl_processor_print_help) {
-            // This is the only way I can find to print the help message once.
-            // It'll be nice if we can iterate through the features and print our own help
-            // message...
-            MSTI->setDefaultFeatures("help", "");
-        }
-    }
-    // Package up features to be passed to target/subtarget
-    std::string FeaturesStr;
-    if (!targetFeatures.empty()) {
-        SubtargetFeatures Features;
-        for (unsigned i = 0; i != targetFeatures.size(); ++i)
-            Features.AddFeature(targetFeatures[i]);
-        FeaturesStr = Features.getString();
-    }
-    // Allocate a target...
-    Optional<CodeModel::Model> codemodel =
-#ifdef _P64
-        // Make sure we are using the large code model on 64bit
-        // Let LLVM pick a default suitable for jitting on 32bit
-        CodeModel::Large;
-#elif JL_LLVM_VERSION < 60000
-        CodeModel::JITDefault;
+    if (jl_using_gdb_jitevents)
+        jl_ExecutionEngine->enableJITDebuggingSupport();
+
+#if defined(JL_USE_INTEL_JITEVENTS) || \
+    defined(JL_USE_OPROFILE_JITEVENTS) || \
+    defined(JL_USE_PERF_JITEVENTS)
+#ifdef JL_USE_JITLINK
+#error "JIT profiling support (JL_USE_*_JITEVENTS) not yet available on platforms that use JITLink"
 #else
-        None;
+    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
+
+#if defined(JL_USE_INTEL_JITEVENTS)
+    if (jit_profiling && atoi(jit_profiling)) {
+        jl_using_intel_jitevents = 1;
+    }
 #endif
-    auto optlevel = CodeGenOptLevelFor(jl_options.opt_level);
-    jl_TargetMachine = TheTarget->createTargetMachine(
-            TheTriple.getTriple(), TheCPU, FeaturesStr,
-            options,
-            Reloc::Static, // Generate simpler code for JIT
-            codemodel,
-            optlevel
-#if JL_LLVM_VERSION >= 60000
-            , /*JIT*/ true
+
+#if defined(JL_USE_OPROFILE_JITEVENTS)
+    if (jit_profiling && atoi(jit_profiling)) {
+        jl_using_oprofile_jitevents = 1;
+    }
 #endif
-            );
-    assert(jl_TargetMachine && "Failed to select target machine -"
-                               " Is the LLVM backend for this CPU enabled?");
-    #if (!defined(_CPU_ARM_) && !defined(_CPU_PPC64_))
-    // FastISel seems to be buggy for ARM. Ref #13321
-    if (jl_options.opt_level < 2)
-        jl_TargetMachine->setFastISel(true);
-    #endif
-
-    init_julia_llvm_meta();
-    jl_ExecutionEngine = new JuliaOJIT(*jl_TargetMachine);
-
-    // Mark our address spaces as non-integral
-    jl_data_layout = jl_ExecutionEngine->getDataLayout();
-    std::string DL = jl_data_layout.getStringRepresentation() + "-ni:10:11:12:13";
-    jl_data_layout.reset(DL);
-
-// Register GDB event listener
-#ifdef JL_DEBUG_BUILD
-    jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
+
+#if defined(JL_USE_PERF_JITEVENTS)
+    if (jit_profiling && atoi(jit_profiling)) {
+        jl_using_perf_jitevents= 1;
+    }
 #endif
 
 #ifdef JL_USE_INTEL_JITEVENTS
@@ -7747,23 +8436,20 @@ extern "C" void jl_init_llvm(void)
     if (jl_using_perf_jitevents)
         jl_ExecutionEngine->RegisterJITEventListener(JITEventListener::createPerfJITEventListener());
 #endif
+#endif
+#endif
+
+    cl::PrintOptionValues();
 }
 
-extern "C" void jl_init_codegen(void)
+extern "C" JL_DLLEXPORT void jl_init_codegen_impl(void)
 {
     jl_init_llvm();
     // Now that the execution engine exists, initialize all modules
-    jl_init_jit();
     init_jit_functions();
-
-    Module *m = new Module("julia", jl_LLVMContext);
-    jl_setup_module(m);
-    init_julia_llvm_env(m);
-
-    jl_init_intrinsic_functions_codegen();
 }
 
-extern "C" void jl_teardown_codegen()
+extern "C" JL_DLLEXPORT void jl_teardown_codegen_impl()
 {
     // output LLVM timings and statistics
     reportAndResetTimings();
@@ -7820,12 +8506,39 @@ extern "C" void jl_dump_llvm_mfunction(void *v)
 
 extern void jl_write_bitcode_func(void *F, char *fname) {
     std::error_code EC;
-    raw_fd_ostream OS(fname, EC, sys::fs::F_None);
+    raw_fd_ostream OS(fname, EC, sys::fs::OF_None);
     llvm::WriteBitcodeToFile(*((llvm::Function*)F)->getParent(), OS);
 }
 
 extern void jl_write_bitcode_module(void *M, char *fname) {
     std::error_code EC;
-    raw_fd_ostream OS(fname, EC, sys::fs::F_None);
+    raw_fd_ostream OS(fname, EC, sys::fs::OF_None);
     llvm::WriteBitcodeToFile(*(llvm::Module*)M, OS);
 }
+
+#ifdef _OS_WINDOWS_
+#include <psapi.h>
+#else
+#include <dlfcn.h>
+#endif
+
+#include <llvm-c/Core.h>
+
+extern "C" JL_DLLEXPORT jl_value_t *jl_get_libllvm_impl(void) JL_NOTSAFEPOINT
+{
+#if defined(_OS_WINDOWS_)
+    HMODULE mod;
+    if (!GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS, (LPCSTR)&llvm::DebugFlag, &mod))
+        return jl_nothing;
+
+    char path[MAX_PATH];
+    if (!GetModuleFileNameA(mod, path, sizeof(path)))
+        return jl_nothing;
+    return (jl_value_t*) jl_symbol(path);
+#else
+    Dl_info dli;
+    if (!dladdr((void*)LLVMContextCreate, &dli))
+        return jl_nothing;
+    return (jl_value_t*) jl_symbol(dli.dli_fname);
+#endif
+}
diff --git a/src/codegen_shared.h b/src/codegen_shared.h
index ee118708746887..e66f9702703041 100644
--- a/src/codegen_shared.h
+++ b/src/codegen_shared.h
@@ -5,6 +5,11 @@
 #include <llvm/Support/Debug.h>
 #include <llvm/IR/DebugLoc.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/MDBuilder.h>
+#include "julia.h"
+
+#define STR(csym)           #csym
+#define XSTR(csym)          STR(csym)
 
 enum AddressSpace {
     Generic = 0,
@@ -16,6 +21,65 @@ enum AddressSpace {
     LastSpecial = Loaded,
 };
 
+static inline auto getSizeTy(llvm::LLVMContext &ctxt) {
+    if (sizeof(size_t) > sizeof(uint32_t)) {
+        return llvm::Type::getInt64Ty(ctxt);
+    } else {
+        return llvm::Type::getInt32Ty(ctxt);
+    }
+}
+
+namespace JuliaType {
+    static inline llvm::StructType* get_jlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::StructType::get(C);
+    }
+
+    static inline llvm::PointerType* get_pjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_jlvalue_ty(C), 0);
+    }
+
+    static inline llvm::PointerType* get_prjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_jlvalue_ty(C), AddressSpace::Tracked);
+    }
+
+    static inline llvm::PointerType* get_ppjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_pjlvalue_ty(C), 0);
+    }
+
+    static inline llvm::PointerType* get_pprjlvalue_ty(llvm::LLVMContext &C) {
+        return llvm::PointerType::get(get_prjlvalue_ty(C), 0);
+    }
+
+    static inline auto get_jlfunc_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        std::vector<llvm::Type*> ftargs(0);
+        ftargs.push_back(T_prjlvalue);  // function
+        ftargs.push_back(T_pprjlvalue); // args[]
+        ftargs.push_back(llvm::Type::getInt32Ty(C));      // nargs
+        return llvm::FunctionType::get(T_prjlvalue, ftargs, false);
+    }
+
+    static inline auto get_jlfuncparams_ty(llvm::LLVMContext &C) {
+        auto T_prjlvalue = get_prjlvalue_ty(C);
+        auto T_pprjlvalue = llvm::PointerType::get(T_prjlvalue, 0);
+        std::vector<llvm::Type*> ftargs(0);
+        ftargs.push_back(T_prjlvalue);  // function
+        ftargs.push_back(T_pprjlvalue); // args[]
+        ftargs.push_back(llvm::Type::getInt32Ty(C));      // nargs
+        ftargs.push_back(T_pprjlvalue); // linfo->sparam_vals
+        return llvm::FunctionType::get(T_prjlvalue, ftargs, false);
+    }
+
+    static inline auto get_voidfunc_ty(llvm::LLVMContext &C) {
+        return llvm::FunctionType::get(llvm::Type::getVoidTy(C), /*isVarArg*/false);
+    }
+
+    static inline auto get_pvoidfunc_ty(llvm::LLVMContext &C) {
+        return get_voidfunc_ty(C)->getPointerTo();
+    }
+}
+
 // JLCALL with API arguments ([extra], arg0, arg1, arg2, ...) has the following ABI calling conventions defined:
 #define JLCALL_F_CC (CallingConv::ID)37     // (jl_value_t *arg0, jl_value_t **argv, uint32_t nargv)
 #define JLCALL_F2_CC (CallingConv::ID)38    // (jl_value_t *arg0, jl_value_t **argv, uint32_t nargv, jl_value_t *extra)
@@ -29,13 +93,8 @@ struct CountTrackedPointers {
     CountTrackedPointers(llvm::Type *T);
 };
 
-#if JL_LLVM_VERSION >= 110000
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> &irbuilder);
+unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::Type *DTy, llvm::IRBuilder<> &irbuilder);
 std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> &irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
-#else
-unsigned TrackWithShadow(llvm::Value *Src, llvm::Type *T, bool isptr, llvm::Value *Dst, llvm::IRBuilder<> irbuilder);
-std::vector<llvm::Value*> ExtractTrackedValues(llvm::Value *Src, llvm::Type *STy, bool isptr, llvm::IRBuilder<> irbuilder, llvm::ArrayRef<unsigned> perm_offsets={});
-#endif
 
 static inline void llvm_dump(llvm::Value *v)
 {
@@ -70,3 +129,194 @@ static inline void llvm_dump(llvm::DebugLoc *dbg)
     dbg->print(llvm::dbgs());
     llvm::dbgs() << "\n";
 }
+
+static inline std::pair<llvm::MDNode*,llvm::MDNode*> tbaa_make_child_with_context(llvm::LLVMContext &ctxt, const char *name, llvm::MDNode *parent=nullptr, bool isConstant=false)
+{
+    llvm::MDBuilder mbuilder(ctxt);
+    llvm::MDNode *jtbaa = mbuilder.createTBAARoot("jtbaa");
+    llvm::MDNode *tbaa_root = mbuilder.createTBAAScalarTypeNode("jtbaa", jtbaa);
+    llvm::MDNode *scalar = mbuilder.createTBAAScalarTypeNode(name, parent ? parent : tbaa_root);
+    llvm::MDNode *n = mbuilder.createTBAAStructTagNode(scalar, scalar, 0, isConstant);
+    return std::make_pair(n, scalar);
+}
+
+static inline llvm::MDNode *get_tbaa_const(llvm::LLVMContext &ctxt) {
+    return tbaa_make_child_with_context(ctxt, "jtbaa_const", nullptr, true).first;
+}
+
+static inline llvm::Instruction *tbaa_decorate(llvm::MDNode *md, llvm::Instruction *inst)
+{
+    inst->setMetadata(llvm::LLVMContext::MD_tbaa, md);
+    if (llvm::isa<llvm::LoadInst>(inst) && md && md == get_tbaa_const(md->getContext()))
+        inst->setMetadata(llvm::LLVMContext::MD_invariant_load, llvm::MDNode::get(md->getContext(), llvm::None));
+    return inst;
+}
+
+// bitcast a value, but preserve its address space when dealing with pointer types
+static inline llvm::Value *emit_bitcast_with_builder(llvm::IRBuilder<> &builder, llvm::Value *v, llvm::Type *jl_value)
+{
+    using namespace llvm;
+    if (isa<PointerType>(jl_value) &&
+        v->getType()->getPointerAddressSpace() != jl_value->getPointerAddressSpace()) {
+        // Cast to the proper address space
+        Type *jl_value_addr = PointerType::getWithSamePointeeType(cast<PointerType>(jl_value), v->getType()->getPointerAddressSpace());
+        return builder.CreateBitCast(v, jl_value_addr);
+    }
+    else {
+        return builder.CreateBitCast(v, jl_value);
+    }
+}
+
+// Get PTLS through current task.
+static inline llvm::Value *get_current_ptls_from_task(llvm::IRBuilder<> &builder, llvm::Value *current_task, llvm::MDNode *tbaa)
+{
+    using namespace llvm;
+    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(builder.getContext());
+    auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
+    auto T_size = builder.GetInsertBlock()->getModule()->getDataLayout().getIntPtrType(builder.getContext());
+    const int ptls_offset = offsetof(jl_task_t, ptls);
+    llvm::Value *pptls = builder.CreateInBoundsGEP(
+        T_pjlvalue, current_task,
+        ConstantInt::get(T_size, ptls_offset / sizeof(void *)),
+        "ptls_field");
+    LoadInst *ptls_load = builder.CreateAlignedLoad(T_pjlvalue,
+        emit_bitcast_with_builder(builder, pptls, T_ppjlvalue), Align(sizeof(void *)), "ptls_load");
+    // Note: Corresponding store (`t->ptls = ptls`) happens in `ctx_switch` of tasks.c.
+    tbaa_decorate(tbaa, ptls_load);
+    // Using `CastInst::Create` to get an `Instruction*` without explicit cast:
+    auto ptls = CastInst::Create(Instruction::BitCast, ptls_load, T_ppjlvalue, "ptls");
+    builder.Insert(ptls);
+    return ptls;
+}
+
+// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.
+//
+// Once we no longer support LLVM < 14, these can be mechanically removed by
+// translating foo(Bar, …) into Bar->foo(…) resp. Bar.foo(…).
+namespace {
+using namespace llvm;
+
+inline void addFnAttr(CallInst *Target, Attribute::AttrKind Attr)
+{
+#if JL_LLVM_VERSION >= 140000
+    Target->addFnAttr(Attr);
+#else
+    Target->addAttribute(AttributeList::FunctionIndex, Attr);
+#endif
+}
+
+template<class T, class A>
+inline void addRetAttr(T *Target, A Attr)
+{
+#if JL_LLVM_VERSION >= 140000
+    Target->addRetAttr(Attr);
+#else
+    Target->addAttribute(AttributeList::ReturnIndex, Attr);
+#endif
+}
+
+inline void addAttributeAtIndex(Function *F, unsigned Index, Attribute Attr)
+{
+#if JL_LLVM_VERSION >= 140000
+    F->addAttributeAtIndex(Index, Attr);
+#else
+    F->addAttribute(Index, Attr);
+#endif
+}
+
+inline AttributeSet getFnAttrs(const AttributeList &Attrs)
+{
+#if JL_LLVM_VERSION >= 140000
+    return Attrs.getFnAttrs();
+#else
+    return Attrs.getFnAttributes();
+#endif
+}
+
+inline AttributeSet getRetAttrs(const AttributeList &Attrs)
+{
+#if JL_LLVM_VERSION >= 140000
+    return Attrs.getRetAttrs();
+#else
+    return Attrs.getRetAttributes();
+#endif
+}
+
+inline bool hasFnAttr(const AttributeList &L, Attribute::AttrKind Kind)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.hasFnAttr(Kind);
+#else
+    return L.hasAttribute(AttributeList::FunctionIndex, Kind);
+#endif
+}
+
+inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
+                                         unsigned Index, Attribute::AttrKind Kind)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.addAttributeAtIndex(C, Index, Kind);
+#else
+    return L.addAttribute(C, Index, Kind);
+#endif
+}
+
+inline AttributeList addAttributeAtIndex(const AttributeList &L, LLVMContext &C,
+                                         unsigned Index, Attribute Attr)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.addAttributeAtIndex(C, Index, Attr);
+#else
+    return L.addAttribute(C, Index, Attr);
+#endif
+}
+
+inline AttributeList addAttributesAtIndex(const AttributeList &L, LLVMContext &C,
+                                          unsigned Index, const AttrBuilder &Builder)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.addAttributesAtIndex(C, Index, Builder);
+#else
+    return L.addAttributes(C, Index, Builder);
+#endif
+}
+
+inline AttributeList addFnAttribute(const AttributeList &L, LLVMContext &C,
+                                    Attribute::AttrKind Kind)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.addFnAttribute(C, Kind);
+#else
+    return L.addAttribute(C, AttributeList::FunctionIndex, Kind);
+#endif
+}
+
+inline AttributeList addRetAttribute(const AttributeList &L, LLVMContext &C,
+                                     Attribute::AttrKind Kind)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.addRetAttribute(C, Kind);
+#else
+    return L.addAttribute(C, AttributeList::ReturnIndex, Kind);
+#endif
+}
+
+inline bool hasAttributesAtIndex(const AttributeList &L, unsigned Index)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.hasAttributesAtIndex(Index);
+#else
+    return L.hasAttributes(Index);
+#endif
+}
+
+inline Attribute getAttributeAtIndex(const AttributeList &L, unsigned Index, Attribute::AttrKind Kind)
+{
+#if JL_LLVM_VERSION >= 140000
+    return L.getAttributeAtIndex(Index, Kind);
+#else
+    return L.getAttribute(Index, Kind);
+#endif
+}
+
+}
diff --git a/src/common_symbols1.inc b/src/common_symbols1.inc
index d035ab76aa6ad8..7d445289e80fa0 100644
--- a/src/common_symbols1.inc
+++ b/src/common_symbols1.inc
@@ -1,100 +1,99 @@
-jl_symbol("getproperty"),
 jl_symbol("="),
-jl_symbol("Type"),
+jl_symbol("getproperty"),
+jl_symbol("apply_type"),
 jl_symbol("getfield"),
 jl_symbol("getindex"),
-jl_symbol("apply_type"),
 jl_symbol("convert"),
 jl_symbol("==="),
+jl_symbol("iterate"),
 jl_symbol("=="),
 jl_symbol("new"),
 jl_symbol("foreigncall"),
-jl_symbol("ccall"),
 jl_symbol("int.jl"),
-jl_symbol("+"),
-jl_symbol("boot.jl"),
-jl_symbol("not_int"),
+jl_symbol("throw"),
+jl_symbol("nothing"),
 jl_symbol("essentials.jl"),
-jl_symbol("sysimg.jl"),
-jl_symbol("<"),
+jl_symbol("+"),
 jl_symbol("unsafe_convert"),
+jl_symbol("not_int"),
 jl_symbol("-"),
-jl_symbol("iterate"),
+jl_symbol("boot.jl"),
 jl_symbol("number.jl"),
-jl_symbol("throw"),
-jl_symbol("promotion.jl"),
-jl_symbol("static_parameter"),
 jl_symbol("length"),
+jl_symbol("<"),
 jl_symbol("cconvert"),
+jl_symbol("Base.jl"),
+jl_symbol("promotion.jl"),
 jl_symbol("tuple.jl"),
+jl_symbol("static_parameter"),
+jl_symbol("isempty"),
+jl_symbol("<="),
 jl_symbol("array.jl"),
 jl_symbol("operators.jl"),
-jl_symbol("*"),
+jl_symbol("NamedTuple"),
 jl_symbol("bitcast"),
-jl_symbol("slt_int"),
-jl_symbol("isempty"),
-jl_symbol("indexed_iterate"),
-jl_symbol("size"),
 jl_symbol("!"),
-jl_symbol("nothing"),
-jl_symbol("NamedTuple"),
-jl_symbol("<="),
+jl_symbol("indexed_iterate"),
+jl_symbol("sle_int"),
 jl_symbol("bool.jl"),
-jl_symbol("string"),
-jl_symbol("!="),
-jl_symbol("deprecated.jl"),
-jl_symbol("_apply"),
-jl_symbol("none"),
-jl_symbol("meta"),
-jl_symbol("typeof"),
-jl_symbol("ifelse"),
-jl_symbol("name"),
+jl_symbol("Ptr"),
+jl_symbol("size"),
 jl_symbol("add_int"),
-jl_symbol("setindex!"),
+jl_symbol("slt_int"),
+jl_symbol("*"),
 jl_symbol("range.jl"),
+jl_symbol("abstractarray.jl"),
+jl_symbol("!="),
+jl_symbol("isa"),
+jl_symbol("setindex!"),
+jl_symbol("string"),
+jl_symbol("ifelse"),
 jl_symbol(":"),
-jl_symbol("depwarn"),
-jl_symbol("noinline"),
 jl_symbol(">"),
-jl_symbol("UInt8"),
-jl_symbol("abstractarray.jl"),
-jl_symbol("sub_int"),
-jl_symbol("max"),
-jl_symbol("sle_int"),
-jl_symbol("Typeof"),
-jl_symbol("mt"),
+jl_symbol("_apply_iterate"),
+jl_symbol("UInt64"),
 jl_symbol("&"),
-jl_symbol("Ptr"),
-jl_symbol("pointer.jl"),
+jl_symbol("max"),
 jl_symbol("rem"),
+jl_symbol("sub_int"),
 jl_symbol(">="),
-jl_symbol("typeassert"),
-jl_symbol("lshr_int"),
-jl_symbol("toInt64"),
-jl_symbol("trunc_int"),
+jl_symbol("UInt8"),
+jl_symbol("iterators.jl"),
+jl_symbol("Int64"),
 jl_symbol("pairs"),
 jl_symbol("and_int"),
 jl_symbol("last"),
-jl_symbol("iterators.jl"),
-jl_symbol("first"),
-jl_symbol("eq_int"),
-jl_symbol("throw_inexacterror"),
-jl_symbol("map"),
-jl_symbol("UInt64"),
+jl_symbol("typeof"),
 jl_symbol("arrayref"),
-jl_symbol("Int"),
-jl_symbol("reinterpret"),
-jl_symbol("Int64"),
-jl_symbol("setfield!"),
-jl_symbol("kwfunc"),
+jl_symbol("pointer.jl"),
+jl_symbol("toInt64"),
 jl_symbol("arraylen"),
-jl_symbol("axes"),
+jl_symbol("typeassert"),
+jl_symbol("map"),
+jl_symbol("kwfunc"),
 jl_symbol("ArgumentError"),
-jl_symbol("macro expansion"),
+jl_symbol("lshr_int"),
+jl_symbol("axes"),
+jl_symbol("reinterpret"),
+jl_symbol("Array"),
+jl_symbol("first"),
+jl_symbol("trunc_int"),
+jl_symbol("OneTo"),
+jl_symbol("haskey"),
+jl_symbol("Int"),
+jl_symbol("oneto"),
+jl_symbol("eq_int"),
+jl_symbol("throw_inexacterror"),
 jl_symbol("toUInt64"),
-jl_symbol("check_top_bit"),
-jl_symbol("is_top_bit_set"),
-jl_symbol("isa"),
+jl_symbol("arraysize"),
 jl_symbol("UInt"),
-jl_symbol("haskey"),
 jl_symbol("setproperty!"),
+jl_symbol("check_top_bit"),
+jl_symbol("promote"),
+jl_symbol("unsigned"),
+jl_symbol("is_top_bit_set"),
+jl_symbol("structdiff"),
+jl_symbol("undef"),
+jl_symbol("sizeof"),
+jl_symbol("String"),
+jl_symbol("namedtuple.jl"),
diff --git a/src/common_symbols2.inc b/src/common_symbols2.inc
index d49528920c0e24..c9f4e41b83e33f 100644
--- a/src/common_symbols2.inc
+++ b/src/common_symbols2.inc
@@ -1,254 +1,254 @@
-jl_symbol("promote"),
-jl_symbol("undef"),
+jl_symbol("pop"),
+jl_symbol("inbounds"),
+jl_symbol("strings/string.jl"),
+jl_symbol("Ref"),
 jl_symbol("Vector"),
-jl_symbol("parent"),
+jl_symbol("kwerr"),
 jl_symbol("_promote"),
-jl_symbol("Ref"),
-jl_symbol("push!"),
-jl_symbol("arraysize"),
-jl_symbol("jl_value_ptr"),
-jl_symbol("mutable"),
-jl_symbol("<<"),
-jl_symbol("pointer_from_objref"),
-jl_symbol("promote_typeof"),
-jl_symbol("unsigned"),
-jl_symbol("zext_int"),
-jl_symbol("strings/string.jl"),
+jl_symbol("sext_int"),
 jl_symbol("pointer"),
-jl_symbol("jl_alloc_array_1d"),
-jl_symbol("inbounds"),
+jl_symbol("similar"),
 jl_symbol("arrayset"),
-jl_symbol("data"),
+jl_symbol("axes1"),
+jl_symbol("eachindex"),
 jl_symbol("|"),
-jl_symbol(">>"),
-jl_symbol("pop"),
-jl_symbol("sizeof"),
-jl_symbol("strings/basic.jl"),
-jl_symbol("namedtuple.jl"),
-jl_symbol("structdiff"),
-jl_symbol("print"),
-jl_symbol("bitarray.jl"),
-jl_symbol("oftype"),
-jl_symbol("kwerr"),
-jl_symbol("adjoint"),
 jl_symbol("ult_int"),
-jl_symbol("isdefined"),
-jl_symbol("shl_int"),
 jl_symbol("lastindex"),
-jl_symbol("DimensionMismatch"),
-jl_symbol("abstractdict.jl"),
-jl_symbol("zero"),
+jl_symbol("setfield!"),
+jl_symbol("UnitRange"),
+jl_symbol("push!"),
 jl_symbol("Bool"),
 jl_symbol("Colon"),
-jl_symbol("copy"),
-jl_symbol("Cvoid"),
 jl_symbol("fieldtype"),
-jl_symbol("add_ptr"),
-jl_symbol("isdone"),
-jl_symbol("eachindex"),
-jl_symbol("eltype"),
-jl_symbol("float.jl"),
 jl_symbol("unitrange_last"),
-jl_symbol("strings/io.jl"),
+jl_symbol("bitarray.jl"),
+jl_symbol("<<"),
+jl_symbol("zext_int"),
+jl_symbol("Tuple"),
+jl_symbol("reflection.jl"),
+jl_symbol("TypeError"),
+jl_symbol("print"),
+jl_symbol("eltype"),
+jl_symbol(">>"),
+jl_symbol("strings/basic.jl"),
 jl_symbol("gc_preserve_begin"),
+jl_symbol("require_one_based_indexing"),
 jl_symbol("gc_preserve_end"),
-jl_symbol("tail"),
-jl_symbol("String"),
-jl_symbol("mul_int"),
+jl_symbol("DimensionMismatch"),
 jl_symbol("indices.jl"),
-jl_symbol("in"),
-jl_symbol("BlasInt"),
-jl_symbol("indices1"),
+jl_symbol("Cvoid"),
+jl_symbol("oftype"),
+jl_symbol("zero"),
+jl_symbol("float.jl"),
 jl_symbol("Any"),
-jl_symbol("min"),
-jl_symbol("Tuple"),
-jl_symbol("error"),
-jl_symbol("gcutils.jl"),
-jl_symbol("ptr"),
+jl_symbol("checkbounds"),
 jl_symbol("or_int"),
+jl_symbol("isdefined"),
 jl_symbol("dict.jl"),
+jl_symbol("strings/io.jl"),
+jl_symbol("shl_int"),
+jl_symbol("copy"),
+jl_symbol("macro expansion"),
+jl_symbol("abstractdict.jl"),
+jl_symbol("in"),
+jl_symbol("io.jl"),
+jl_symbol("BlasInt"),
 jl_symbol("Float64"),
-jl_symbol("Array"),
-jl_symbol("reflection.jl"),
-jl_symbol("transpose"),
-jl_symbol("copyto!"),
-jl_symbol("checkbounds"),
-jl_symbol("stride"),
-jl_symbol("unsafe_load"),
-jl_symbol("show"),
-jl_symbol("broadcasted"),
-jl_symbol("chkstride1"),
-jl_symbol("contents"),
-jl_symbol("_growend!"),
-jl_symbol("argtail"),
-jl_symbol("trunc"),
+jl_symbol("mul_int"),
 jl_symbol("UInt32"),
-jl_symbol("refvalue.jl"),
-jl_symbol("io.jl"),
-jl_symbol("jl_array_grow_end"),
-jl_symbol("multidimensional.jl"),
-jl_symbol("real"),
-jl_symbol("pointerref"),
-jl_symbol("jl_array_ptr"),
-jl_symbol("keys"),
-jl_symbol("Int32"),
-jl_symbol("get"),
-jl_symbol("stop"),
-jl_symbol("liblapack"),
-jl_symbol("Enums.jl"),
-jl_symbol("unsafe_length"),
-jl_symbol("one"),
-jl_symbol("broadcast.jl"),
-jl_symbol("BoundsError"),
-jl_symbol("char.jl"),
 jl_symbol("C_NULL"),
-jl_symbol("x"),
+jl_symbol("Integer"),
+jl_symbol("!=="),
+jl_symbol("merge"),
+jl_symbol("BoundsError"),
+jl_symbol("broadcasted"),
+jl_symbol("Cint"),
+jl_symbol("min"),
+jl_symbol("libblastrampoline"),
+jl_symbol("iszero"),
+jl_symbol("refvalue.jl"),
+jl_symbol("stride"),
+jl_symbol("error"),
 jl_symbol("ncodeunits"),
+jl_symbol("LinearIndices"),
+jl_symbol("Clong"),
+jl_symbol("pair.jl"),
+jl_symbol("_growend!"),
+jl_symbol("char.jl"),
+jl_symbol("copyto!"),
+jl_symbol("get"),
+jl_symbol("tail"),
+jl_symbol("real"),
+jl_symbol("Union"),
+jl_symbol("multidimensional.jl"),
 jl_symbol("enter"),
-jl_symbol("Float32"),
-jl_symbol("value"),
-jl_symbol("write"),
 jl_symbol("leave"),
-jl_symbol("isless"),
+jl_symbol("add_ptr"),
+jl_symbol("chkstride1"),
 jl_symbol("Expr"),
-jl_symbol("gmp.jl"),
-jl_symbol("AssertionError"),
+jl_symbol("write"),
+jl_symbol("broadcast.jl"),
+jl_symbol("show.jl"),
+jl_symbol("none"),
+jl_symbol("Generator"),
+jl_symbol("Int32"),
 jl_symbol("materialize"),
-jl_symbol("Union"),
-jl_symbol("Integer"),
-jl_symbol("neg_int"),
-jl_symbol("print_to_string"),
-jl_symbol("chklapackerror"),
-jl_symbol("prod"),
+jl_symbol("show"),
+jl_symbol("lock"),
+jl_symbol("unsafe_load"),
+jl_symbol("gmp.jl"),
+jl_symbol("mpfr.jl"),
+jl_symbol("Symbol"),
+jl_symbol("Pair"),
 jl_symbol("resize!"),
-jl_symbol("ldiv!"),
-jl_symbol("Cint"),
+jl_symbol("neg_int"),
+jl_symbol("strings/substring.jl"),
+jl_symbol("AssertionError"),
+jl_symbol("identity"),
+jl_symbol("one"),
+jl_symbol("reduce.jl"),
+jl_symbol("libcholmod"),
+jl_symbol("isless"),
+jl_symbol("reducedim.jl"),
 jl_symbol("checksquare"),
-jl_symbol("args"),
-jl_symbol("_length"),
-jl_symbol("!=="),
+jl_symbol("sort.jl"),
+jl_symbol("generator.jl"),
+jl_symbol("pointer_from_objref"),
+jl_symbol("Float32"),
+jl_symbol("chklapackerror"),
+jl_symbol("parent"),
+jl_symbol("task.jl"),
+jl_symbol("div"),
+jl_symbol("cholmod_common"),
+jl_symbol("ht_keyindex"),
+jl_symbol("pop_exception"),
+jl_symbol("c.jl"),
+jl_symbol("firstindex"),
+jl_symbol("some.jl"),
+jl_symbol("iobuffer.jl"),
+jl_symbol("sub_ptr"),
+jl_symbol("vect"),
+jl_symbol("unsafe_string"),
+jl_symbol("llvmcall"),
+jl_symbol("checkindex"),
+jl_symbol("_call_latest"),
+jl_symbol("rethrow"),
+jl_symbol("pointerref"),
+jl_symbol("println"),
+jl_symbol("keys"),
+jl_symbol("RefValue"),
 jl_symbol("_expr"),
-jl_symbol("merge"),
-jl_symbol("dims"),
-jl_symbol("the_exception"),
-jl_symbol("Base"),
 jl_symbol("toUInt32"),
-jl_symbol("mpfr.jl"),
-jl_symbol("<:"),
-jl_symbol("div"),
-jl_symbol("start"),
-jl_symbol("pair.jl"),
+jl_symbol("ismissing"),
+jl_symbol("throw_boundserror"),
+jl_symbol("IteratorSize"),
+jl_symbol("iddict.jl"),
+jl_symbol("to_shape"),
+jl_symbol("Csize_t"),
+jl_symbol("~"),
+jl_symbol("argtail"),
+jl_symbol("include"),
+jl_symbol("set.jl"),
+jl_symbol("isequal"),
 jl_symbol("refpointer.jl"),
-jl_symbol("chunks"),
+jl_symbol("=>"),
 jl_symbol("Val"),
-jl_symbol("show.jl"),
-jl_symbol("sort.jl"),
+jl_symbol("Base"),
+jl_symbol("%"),
+jl_symbol("collect"),
+jl_symbol("Type##kw"),
+jl_symbol("typemax"),
 jl_symbol("fill!"),
-jl_symbol("step"),
-jl_symbol("vals"),
-jl_symbol("toInt32"),
-jl_symbol("mul!"),
-jl_symbol("vect"),
-jl_symbol("len"),
-jl_symbol("ashr_int"),
-jl_symbol("~"),
-jl_symbol("count"),
-jl_symbol("ht_keyindex"),
-jl_symbol("iobuffer.jl"),
-jl_symbol("Generator"),
-jl_symbol("eval"),
-jl_symbol("f"),
-jl_symbol("throw_undef_if_not"),
 jl_symbol("ule_int"),
+jl_symbol("atomics.jl"),
 jl_symbol("libgit2"),
-jl_symbol("head"),
-jl_symbol("LinearIndices"),
-jl_symbol("collect"),
-jl_symbol("set.jl"),
-jl_symbol("lmul!"),
-jl_symbol("offset"),
-jl_symbol("abs"),
-jl_symbol("Symbol"),
-jl_symbol("identity"),
-jl_symbol("typemax"),
+jl_symbol("BigFloat"),
+jl_symbol("ashr_int"),
 jl_symbol("boundscheck"),
-jl_symbol("isequal"),
-jl_symbol("id"),
+jl_symbol("abs"),
 jl_symbol("^"),
-jl_symbol("generator.jl"),
-jl_symbol("=>"),
-jl_symbol("c.jl"),
-jl_symbol("fastmath.jl"),
-jl_symbol("copyast"),
-jl_symbol("IteratorSize"),
-jl_symbol("checkindex"),
-jl_symbol("strings/substring.jl"),
-jl_symbol("println"),
-jl_symbol("throw_boundserror"),
-jl_symbol("io"),
-jl_symbol("dict"),
-jl_symbol("Cstring"),
-jl_symbol("codeunit"),
-jl_symbol("unsafe_string"),
-jl_symbol("n"),
-jl_symbol("close"),
-jl_symbol("BigFloat"),
-jl_symbol("%"),
-jl_symbol("read"),
-jl_symbol("checked.jl"),
-jl_symbol("checked_trunc_sint"),
-jl_symbol("math.jl"),
-jl_symbol("round"),
-jl_symbol("iostream.jl"),
+jl_symbol("ensure_initialized"),
+jl_symbol("_array_for"),
+jl_symbol("strings/util.jl"),
+jl_symbol("Dict"),
 jl_symbol("Nothing"),
-jl_symbol("state"),
 jl_symbol("compiler/ssair/ir.jl"),
-jl_symbol("stream.jl"),
-jl_symbol("Box"),
-jl_symbol("missing.jl"),
-jl_symbol("rmul!"),
-jl_symbol("process.jl"),
-jl_symbol("Core"),
-jl_symbol("reduce.jl"),
-jl_symbol("SizeUnknown"),
-jl_symbol("diag"),
-jl_symbol("atomics.jl"),
-jl_symbol("promote_rule"),
-jl_symbol("_mod64"),
-jl_symbol("llvmcall"),
-jl_symbol("reducedim.jl"),
-jl_symbol("to_shape"),
-jl_symbol("ComplexF32"),
-jl_symbol("HasShape"),
-jl_symbol("block"),
-jl_symbol("checked_trunc_uint"),
-jl_symbol("float"),
-jl_symbol("unsafe_trunc"),
-jl_symbol("isnan"),
+jl_symbol("unsafe_write"),
+jl_symbol("util.jl"),
+jl_symbol("toInt32"),
+jl_symbol("loading.jl"),
+jl_symbol("value"),
+jl_symbol("expr.jl"),
+jl_symbol("print_to_string"),
+jl_symbol("the_exception"),
+jl_symbol("nonzeros"),
+jl_symbol("<:"),
+jl_symbol("KeyError"),
 jl_symbol("xor"),
-jl_symbol("task.jl"),
-jl_symbol("complex.jl"),
-jl_symbol(">>>"),
-jl_symbol("bitset.jl"),
 jl_symbol("logging.jl"),
-jl_symbol("s"),
-jl_symbol("libmpfr"),
+jl_symbol("stat.jl"),
+jl_symbol("close"),
+jl_symbol("adjoint"),
+jl_symbol("meta"),
+jl_symbol("path.jl"),
+jl_symbol("round"),
+jl_symbol("Cstring"),
+jl_symbol("SizeUnknown"),
+jl_symbol("esc"),
+jl_symbol("missing.jl"),
+jl_symbol("throw_undef_if_not"),
+jl_symbol("error.jl"),
+jl_symbol("Type"),
+jl_symbol("mul!"),
+jl_symbol("math.jl"),
+jl_symbol("unsafe_trunc"),
 jl_symbol("missing"),
-jl_symbol("nzval"),
-jl_symbol("special/trig.jl"),
-jl_symbol("loading.jl"),
-jl_symbol("KeyError"),
-jl_symbol("cmp"),
-jl_symbol("promote_type"),
 jl_symbol("subarray.jl"),
-jl_symbol("handle"),
+jl_symbol("noinline"),
+jl_symbol("isnan"),
+jl_symbol("ldiv!"),
+jl_symbol("DataType"),
+jl_symbol("codeunit"),
+jl_symbol("condition.jl"),
+jl_symbol("step"),
+jl_symbol("copyast"),
+jl_symbol("bitset.jl"),
+jl_symbol("float"),
+jl_symbol("fastmath.jl"),
+jl_symbol("_mod64"),
 jl_symbol("_div64"),
-jl_symbol("ht"),
-jl_symbol("UInt128"),
-jl_symbol("zeros"),
-jl_symbol("Dict"),
 jl_symbol("all"),
-jl_symbol("ComplexF64"),
+jl_symbol("parse"),
+jl_symbol("joinpath"),
+jl_symbol("nextind"),
+jl_symbol("regex.jl"),
+jl_symbol("Enums.jl"),
+jl_symbol("promote_type"),
+jl_symbol("Cdouble"),
+jl_symbol("ComplexF32"),
+jl_symbol("read"),
+jl_symbol("intfuncs.jl"),
 jl_symbol("Complex"),
-jl_symbol("checked_add"),
-jl_symbol("mod"),
+jl_symbol("_deleteend!"),
+jl_symbol("stat"),
+jl_symbol("UnionAll"),
+jl_symbol("special/trig.jl"),
+jl_symbol("UInt128"),
+jl_symbol("_copyto_impl!"),
+jl_symbol("stream.jl"),
+jl_symbol("lmul!"),
+jl_symbol("repr"),
+jl_symbol("promote_rule"),
+jl_symbol("xor_int"),
+jl_symbol("complex.jl"),
+jl_symbol("transpose"),
+jl_symbol(">>>"),
+jl_symbol("cholmod_sparse"),
+jl_symbol("filemode"),
+jl_symbol("ComplexF64"),
+jl_symbol("SparseMatrixCSC"),
+jl_symbol("view"),
+jl_symbol("GitError"),
+jl_symbol("zeros"),
+jl_symbol("InexactError"),
+jl_symbol("LogLevel"),
diff --git a/src/coverage.cpp b/src/coverage.cpp
new file mode 100644
index 00000000000000..46363a7e9ac01d
--- /dev/null
+++ b/src/coverage.cpp
@@ -0,0 +1,214 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include <string>
+#include <fstream>
+#include <map>
+#include <vector>
+
+#include "llvm-version.h"
+#include <llvm/ADT/StringRef.h>
+#include <llvm/ADT/StringMap.h>
+#include <llvm/Support/raw_ostream.h>
+
+#include "julia.h"
+#include "julia_internal.h"
+
+using namespace llvm;
+
+static int codegen_imaging_mode(void)
+{
+    return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
+}
+
+// Logging for code coverage and memory allocation
+
+const int logdata_blocksize = 32; // target getting nearby lines in the same general cache area and reducing calls to malloc by chunking
+typedef uint64_t logdata_block[logdata_blocksize];
+typedef StringMap< std::vector<logdata_block*> > logdata_t;
+
+static uint64_t *allocLine(std::vector<logdata_block*> &vec, int line)
+{
+    unsigned block = line / logdata_blocksize;
+    line = line % logdata_blocksize;
+    if (vec.size() <= block)
+        vec.resize(block + 1);
+    if (vec[block] == NULL) {
+        vec[block] = (logdata_block*)calloc(1, sizeof(logdata_block));
+    }
+    logdata_block &data = *vec[block];
+    if (data[line] == 0)
+        data[line] = 1;
+    return &data[line];
+}
+
+// Code coverage
+
+static logdata_t coverageData;
+
+JL_DLLEXPORT void jl_coverage_alloc_line(StringRef filename, int line)
+{
+    assert(!codegen_imaging_mode());
+    if (filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
+        return;
+    allocLine(coverageData[filename], line);
+}
+
+JL_DLLEXPORT uint64_t *jl_coverage_data_pointer(StringRef filename, int line)
+{
+    return allocLine(coverageData[filename], line);
+}
+
+extern "C" JL_DLLEXPORT void jl_coverage_visit_line(const char *filename_, size_t len_filename, int line)
+{
+    StringRef filename = StringRef(filename_, len_filename);
+    if (codegen_imaging_mode() || filename == "" || filename == "none" || filename == "no file" || filename == "<missing>" || line < 0)
+        return;
+    std::vector<logdata_block*> &vec = coverageData[filename];
+    uint64_t *ptr = allocLine(vec, line);
+    (*ptr)++;
+}
+
+// Memory allocation log (malloc_log)
+
+static logdata_t mallocData;
+
+JL_DLLEXPORT uint64_t *jl_malloc_data_pointer(StringRef filename, int line)
+{
+    return allocLine(mallocData[filename], line);
+}
+
+// Resets the malloc counts.
+extern "C" JL_DLLEXPORT void jl_clear_malloc_data(void)
+{
+    logdata_t::iterator it = mallocData.begin();
+    for (; it != mallocData.end(); it++) {
+        std::vector<logdata_block*> &bytes = (*it).second;
+        std::vector<logdata_block*>::iterator itb;
+        for (itb = bytes.begin(); itb != bytes.end(); itb++) {
+            if (*itb) {
+                logdata_block &data = **itb;
+                for (int i = 0; i < logdata_blocksize; i++) {
+                    if (data[i] > 0)
+                        data[i] = 1;
+                }
+            }
+        }
+    }
+    jl_gc_sync_total_bytes(0);
+}
+
+static void write_log_data(logdata_t &logData, const char *extension)
+{
+    std::string base = std::string(jl_options.julia_bindir);
+    base = base + "/../share/julia/base/";
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
+        std::string filename(it->first());
+        std::vector<logdata_block*> &values = it->second;
+        if (!values.empty()) {
+            if (!jl_isabspath(filename.c_str()))
+                filename = base + filename;
+            std::ifstream inf(filename.c_str());
+            if (!inf.is_open())
+                continue;
+            std::string outfile = filename + extension;
+            std::ofstream outf(outfile.c_str(), std::ofstream::trunc | std::ofstream::out | std::ofstream::binary);
+            if (outf.is_open()) {
+                inf.exceptions(std::ifstream::badbit);
+                outf.exceptions(std::ifstream::failbit | std::ifstream::badbit);
+                char line[1024];
+                int l = 1;
+                unsigned block = 0;
+                while (!inf.eof()) {
+                    inf.getline(line, sizeof(line));
+                    if (inf.fail()) {
+                        if (inf.eof())
+                            break; // no content on trailing line
+                        // Read through lines longer than sizeof(line)
+                        inf.clear();
+                        inf.ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+                    }
+                    logdata_block *data = NULL;
+                    if (block < values.size()) {
+                        data = values[block];
+                    }
+                    uint64_t value = data ? (*data)[l] : 0;
+                    if (++l >= logdata_blocksize) {
+                        l = 0;
+                        block++;
+                    }
+                    outf.width(9);
+                    if (value == 0)
+                        outf << '-';
+                    else
+                        outf << (value - 1);
+                    outf.width(0);
+                    outf << " " << line << '\n';
+                }
+                outf.close();
+            }
+            inf.close();
+        }
+    }
+}
+
+static void write_lcov_data(logdata_t &logData, const std::string &outfile)
+{
+    std::ofstream outf(outfile.c_str(), std::ofstream::ate | std::ofstream::out | std::ofstream::binary);
+    //std::string base = std::string(jl_options.julia_bindir);
+    //base = base + "/../share/julia/base/";
+    logdata_t::iterator it = logData.begin();
+    for (; it != logData.end(); it++) {
+        StringRef filename = it->first();
+        const std::vector<logdata_block*> &values = it->second;
+        if (!values.empty()) {
+            outf << "SF:" << filename.str() << '\n';
+            size_t n_covered = 0;
+            size_t n_instrumented = 0;
+            size_t lno = 0;
+            for (auto &itv : values) {
+                if (itv) {
+                    logdata_block &data = *itv;
+                    for (int i = 0; i < logdata_blocksize; i++) {
+                        auto cov = data[i];
+                        if (cov > 0) {
+                            n_instrumented++;
+                            if (cov > 1)
+                                n_covered++;
+                            outf << "DA:" << lno << ',' << (cov - 1) << '\n';
+                        }
+                        lno++;
+                    }
+                }
+                else {
+                    lno += logdata_blocksize;
+                }
+            }
+            outf << "LH:" << n_covered << '\n';
+            outf << "LF:" << n_instrumented << '\n';
+            outf << "end_of_record\n";
+        }
+    }
+    outf.close();
+}
+
+extern "C" JL_DLLEXPORT void jl_write_coverage_data(const char *output)
+{
+    if (output) {
+        StringRef output_pattern(output);
+        if (output_pattern.endswith(".info"))
+            write_lcov_data(coverageData, jl_format_filename(output_pattern.str().c_str()));
+    }
+    else {
+        std::string stm;
+        raw_string_ostream(stm) << "." << uv_os_getpid() << ".cov";
+        write_log_data(coverageData, stm.c_str());
+    }
+}
+
+extern "C" JL_DLLEXPORT void jl_write_malloc_log(void)
+{
+    std::string stm;
+    raw_string_ostream(stm) << "." << uv_os_getpid() << ".mem";
+    write_log_data(mallocData, stm.c_str());
+}
diff --git a/src/crc32c.c b/src/crc32c.c
index 8bc9b0c23381b7..4ca8db06459a1c 100644
--- a/src/crc32c.c
+++ b/src/crc32c.c
@@ -80,7 +80,7 @@ JL_UNUSED static inline uint32_t crc32c_shift(const uint32_t zeros[][256], uint3
         zeros[2][(crc >> 16) & 0xff] ^ zeros[3][crc >> 24];
 }
 
-#if (defined(_CPU_X86_64_) || defined(_CPU_X86_)) && !defined(_COMPILER_MICROSOFT_)
+#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
 #  ifdef _CPU_X86_64_
 #    define CRC32_PTR "crc32q"
 #  else
@@ -204,7 +204,11 @@ static crc32c_func_t crc32c_dispatch(void)
 #    define crc32c_dispatch_ifunc "crc32c_dispatch"
 #  endif
 #elif defined(_CPU_AARCH64_)
+#ifdef _COMPILER_CLANG_
+#define CRC_TARGET __attribute__((target("crc")))
+#else
 #define CRC_TARGET __attribute__((target("+crc")))
+#endif
 /* Compute CRC-32C using the ARMv8 CRC32 extension. */
 CRC_TARGET static inline uint32_t crc32cx(uint32_t crc, uint64_t val)
 {
@@ -346,7 +350,7 @@ static crc32c_func_t crc32c_dispatch(unsigned long hwcap)
 #    define crc32c_dispatch() crc32c_dispatch(getauxval(AT_HWCAP))
 #    define crc32c_dispatch_ifunc "crc32c_dispatch"
 #  else
-#  warning CRC32 feature detection not implemented for this OS. Falling back to software version.
+#  pragma message("CRC32 feature detection not implemented for this OS. Falling back to software version.")
 #  endif
 #else
 // If we don't have any accelerated version to define, just make the _sw version define
diff --git a/src/datatype.c b/src/datatype.c
index 269076ef2819b8..54d74f59023f3f 100644
--- a/src/datatype.c
+++ b/src/datatype.c
@@ -42,15 +42,15 @@ static jl_sym_t *jl_demangle_typename(jl_sym_t *s) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_methtable_t *mt =
-        (jl_methtable_t*)jl_gc_alloc(ptls, sizeof(jl_methtable_t),
+        (jl_methtable_t*)jl_gc_alloc(ct->ptls, sizeof(jl_methtable_t),
                                      jl_methtable_type);
     mt->name = jl_demangle_typename(name);
     mt->module = module;
-    mt->defs = jl_nothing;
-    mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
-    mt->cache = jl_nothing;
+    jl_atomic_store_relaxed(&mt->defs, jl_nothing);
+    jl_atomic_store_relaxed(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&mt->cache, jl_nothing);
     mt->max_args = 0;
     mt->kwsorter = NULL;
     mt->backedges = NULL;
@@ -60,21 +60,28 @@ JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *mo
     return mt;
 }
 
-JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *module)
+JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *module, int abstract, int mutabl)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_typename_t *tn =
-        (jl_typename_t*)jl_gc_alloc(ptls, sizeof(jl_typename_t),
+        (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t),
                                     jl_typename_type);
     tn->name = name;
     tn->module = module;
     tn->wrapper = NULL;
-    tn->cache = jl_emptysvec;
-    tn->linearcache = jl_emptysvec;
+    jl_atomic_store_release(&tn->Typeofwrapper, NULL);
+    jl_atomic_store_relaxed(&tn->cache, jl_emptysvec);
+    jl_atomic_store_relaxed(&tn->linearcache, jl_emptysvec);
     tn->names = NULL;
     tn->hash = bitmix(bitmix(module ? module->build_id : 0, name->hash), 0xa1ada1da);
+    tn->abstract = abstract;
+    tn->mutabl = mutabl;
+    tn->mayinlinealloc = 0;
     tn->mt = NULL;
     tn->partial = NULL;
+    tn->atomicfields = NULL;
+    tn->constfields = NULL;
+    tn->max_methods = 0;
     return tn;
 }
 
@@ -82,23 +89,24 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu
 
 jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_datatype_t *super, jl_svec_t *parameters)
 {
-    return jl_new_datatype((jl_sym_t*)name, module, super, parameters, jl_emptysvec, jl_emptysvec, 1, 0, 0);
+    return jl_new_datatype((jl_sym_t*)name, module, super, parameters, jl_emptysvec, jl_emptysvec, jl_emptysvec, 1, 0, 0);
 }
 
 jl_datatype_t *jl_new_uninitialized_datatype(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ptls, sizeof(jl_datatype_t), jl_datatype_type);
+    jl_task_t *ct = jl_current_task;
+    jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type);
     t->hash = 0;
     t->hasfreetypevars = 0;
     t->isdispatchtuple = 0;
     t->isbitstype = 0;
     t->zeroinit = 0;
-    t->isinlinealloc = 0;
     t->has_concrete_subtype = 1;
     t->cached_by_hash = 0;
+    t->name = NULL;
+    t->super = NULL;
+    t->parameters = NULL;
     t->layout = NULL;
-    t->names = NULL;
     t->types = NULL;
     t->instance = NULL;
     return t;
@@ -215,37 +223,67 @@ unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *t)
     return next_power_of_two(size);
 }
 
-STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d)
+STATIC_INLINE int jl_is_datatype_make_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 {
-    return (!d->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
-            d->isconcretetype && !d->mutabl);
+    return (!d->name->abstract && jl_datatype_size(d) == 0 && d != jl_symbol_type && d->name != jl_array_typename &&
+            d->isconcretetype && !d->name->mutabl);
 }
 
-STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st)
+STATIC_INLINE void jl_maybe_allocate_singleton_instance(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
     if (jl_is_datatype_make_singleton(st)) {
         // It's possible for st to already have an ->instance if it was redefined
-        if (!st->instance) {
-            st->instance = jl_gc_alloc(jl_get_ptls_states(), 0, st);
-            jl_gc_wb(st, st->instance);
+        if (!st->instance)
+            st->instance = jl_gc_permobj(0, st);
+    }
+}
+
+// return whether all concrete subtypes of this type have the same layout
+int jl_struct_try_layout(jl_datatype_t *dt)
+{
+    if (dt->layout)
+        return 1;
+    else if (!jl_has_fixed_layout(dt))
+        return 0;
+    // jl_has_fixed_layout also ensured that dt->types is assigned now
+    jl_compute_field_offsets(dt);
+    assert(dt->layout);
+    return 1;
+}
+
+int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree)
+{
+    if (ty->name->mayinlinealloc && jl_struct_try_layout(ty)) {
+        if (ty->layout->npointers > 0) {
+            if (pointerfree)
+                return 0;
+            if (ty->name->n_uninitialized != 0)
+                return 0;
+            if (ty->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
+                return 0;
         }
+        return 1;
     }
+    return 0;
 }
 
-static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbytes, size_t *align) JL_NOTSAFEPOINT
+static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbytes, size_t *align, int asfield)
 {
     if (jl_is_uniontype(ty)) {
-        unsigned na = union_isinlinable(((jl_uniontype_t*)ty)->a, 1, nbytes, align);
+        unsigned na = union_isinlinable(((jl_uniontype_t*)ty)->a, 1, nbytes, align, asfield);
         if (na == 0)
             return 0;
-        unsigned nb = union_isinlinable(((jl_uniontype_t*)ty)->b, 1, nbytes, align);
+        unsigned nb = union_isinlinable(((jl_uniontype_t*)ty)->b, 1, nbytes, align, asfield);
         if (nb == 0)
             return 0;
         return na + nb;
     }
-    if (jl_is_datatype(ty) && jl_datatype_isinlinealloc(ty) && (!pointerfree || ((jl_datatype_t*)ty)->layout->npointers == 0)) {
+    if (jl_is_datatype(ty) && jl_datatype_isinlinealloc((jl_datatype_t*)ty, pointerfree)) {
         size_t sz = jl_datatype_size(ty);
         size_t al = jl_datatype_align(ty);
+        // primitive types in struct slots need their sizes aligned. issue #37974
+        if (asfield && jl_is_primitivetype(ty))
+            sz = LLT_ALIGN(sz, al);
         if (*nbytes < sz)
             *nbytes = sz;
         if (*align < al)
@@ -255,13 +293,19 @@ static unsigned union_isinlinable(jl_value_t *ty, int pointerfree, size_t *nbyte
     return 0;
 }
 
-JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al) JL_NOTSAFEPOINT
+int jl_uniontype_size(jl_value_t *ty, size_t *sz)
 {
-    unsigned countbits = union_isinlinable(eltype, 0, fsz, al);
+    size_t al = 0;
+    return union_isinlinable(ty, 0, sz, &al, 0) != 0;
+}
+
+JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al)
+{
+    unsigned countbits = union_isinlinable(eltype, 0, fsz, al, 1);
     return (countbits > 0 && countbits < 127) ? countbits : 0;
 }
 
-JL_DLLEXPORT int jl_stored_inline(jl_value_t *eltype) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_stored_inline(jl_value_t *eltype)
 {
     size_t fsz = 0, al = 0;
     return jl_islayout_inline(eltype, &fsz, &al);
@@ -276,7 +320,7 @@ int jl_pointer_egal(jl_value_t *t)
         return 1;
     if (t == (jl_value_t*)jl_bool_type)
         return 1;
-    if (jl_is_mutable_datatype(t) && // excludes abstract types
+    if (jl_is_mutable_datatype(jl_unwrap_unionall(t)) && // excludes abstract types
         t != (jl_value_t*)jl_string_type && // technically mutable, but compared by contents
         t != (jl_value_t*)jl_simplevector_type &&
         !jl_is_kind(t))
@@ -299,30 +343,9 @@ int jl_pointer_egal(jl_value_t *t)
             return 1;
         }
     }
-    return 0;
-}
-
-static int references_name(jl_value_t *p, jl_typename_t *name, int affects_layout) JL_NOTSAFEPOINT
-{
-    if (jl_is_uniontype(p))
-        return references_name(((jl_uniontype_t*)p)->a, name, affects_layout) ||
-               references_name(((jl_uniontype_t*)p)->b, name, affects_layout);
-    if (jl_is_unionall(p))
-        return references_name((jl_value_t*)((jl_unionall_t*)p)->var, name, 0) ||
-               references_name(((jl_unionall_t*)p)->body, name, affects_layout);
-    if (jl_is_typevar(p))
-        return references_name(((jl_tvar_t*)p)->ub, name, 0) ||
-               references_name(((jl_tvar_t*)p)->lb, name, 0);
-    if (jl_is_datatype(p)) {
-        jl_datatype_t *dp = (jl_datatype_t*)p;
-        if (affects_layout && dp->name == name)
-            return 1;
-        affects_layout = dp->types == NULL || jl_svec_len(dp->types) != 0;
-        size_t i, l = jl_nparams(p);
-        for (i = 0; i < l; i++) {
-            if (references_name(jl_tparam(p, i), name, affects_layout))
-                return 1;
-        }
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        return jl_pointer_egal(u->a) && jl_pointer_egal(u->b);
     }
     return 0;
 }
@@ -339,17 +362,9 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     const uint64_t max_offset = (((uint64_t)1) << 32) - 1;
     const uint64_t max_size = max_offset >> 1;
 
-    if (st->types == NULL || st->name->wrapper == NULL)
-        return;
-    if ((jl_is_tuple_type(st) || jl_is_namedtuple_type(st)) && !jl_is_concrete_type((jl_value_t*)st))
-        return;
+    if (st->name->wrapper == NULL)
+        return; // we got called too early--we'll be back
     jl_datatype_t *w = (jl_datatype_t*)jl_unwrap_unionall(st->name->wrapper);
-    if (w->types == NULL) // we got called too early--we'll be back
-        return;
-    size_t i, nfields = jl_svec_len(st->types);
-    int isinlinealloc = st->isconcretetype && !st->mutabl;
-    int isbitstype = isinlinealloc;
-    assert(st->ninitialized <= nfields);
     if (st == w && st->layout) {
         // this check allows us to force re-computation of the layout for some types during init
         st->layout = NULL;
@@ -357,6 +372,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->zeroinit = 0;
         st->has_concrete_subtype = 1;
     }
+    int isbitstype = st->isconcretetype && st->name->mayinlinealloc;
     // If layout doesn't depend on type parameters, it's stored in st->name->wrapper
     // and reused by all subtypes.
     if (w->layout) {
@@ -364,11 +380,16 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         st->size = w->size;
         st->zeroinit = w->zeroinit;
         st->has_concrete_subtype = w->has_concrete_subtype;
-        if (jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
-            return;
+        if (!jl_is_layout_opaque(st->layout)) { // e.g. jl_array_typename
+            st->isbitstype = isbitstype && st->layout->npointers == 0;
+            jl_maybe_allocate_singleton_instance(st);
         }
+        return;
     }
-    else if (nfields == 0) {
+    assert(st->types && w->types);
+    size_t i, nfields = jl_svec_len(st->types);
+    assert(st->name->n_uninitialized <= nfields);
+    if (nfields == 0) {
         // if we have no fields, we can trivially skip the rest
         if (st == jl_symbol_type || st == jl_string_type) {
             // opaque layout - heap-allocated blob
@@ -376,7 +397,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             st->layout = &opaque_byte_layout;
             return;
         }
-        else if (st == jl_simplevector_type || st->name == jl_array_typename) {
+        else if (st == jl_simplevector_type || st == jl_module_type || st->name == jl_array_typename) {
             static const jl_datatype_layout_t opaque_ptr_layout = {0, 1, -1, sizeof(void*), 0, 0};
             st->layout = &opaque_ptr_layout;
             return;
@@ -389,7 +410,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
     }
     else {
         // compute a conservative estimate of whether there could exist an instance of a subtype of this
-        for (i = 0; st->has_concrete_subtype && i < st->ninitialized; i++) {
+        for (i = 0; st->has_concrete_subtype && i < nfields - st->name->n_uninitialized; i++) {
             jl_value_t *fld = jl_svecref(st->types, i);
             if (fld == jl_bottom_type)
                 st->has_concrete_subtype = 0;
@@ -397,32 +418,15 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 st->has_concrete_subtype = !jl_is_datatype(fld) || ((jl_datatype_t *)fld)->has_concrete_subtype;
         }
         // compute layout for the wrapper object if the field types have no free variables
-        if (!st->isconcretetype) {
-            if (st != w)
-                return; // otherwise we would leak memory
-            for (i = 0; i < nfields; i++) {
-                if (jl_has_free_typevars(jl_field_type(st, i)))
-                    return; // not worthwhile computing the rest
-            }
+        if (!st->isconcretetype && !jl_has_fixed_layout(st)) {
+            assert(st == w); // otherwise caller should not have requested this layout
+            return;
         }
     }
 
-    // compute whether this type may ever be inlined
-    // based solely on whether its definition is self-referential
-    if (isinlinealloc) {
-        size_t i, nf = jl_svec_len(w->types);
-        for (i = 0; i < nf; i++) {
-            jl_value_t *fld = jl_svecref(w->types, i);
-            if (references_name(fld, w->name, 1)) {
-                isinlinealloc = 0;
-                isbitstype = 0;
-                break;
-            }
-        }
-        for (i = 0; isbitstype && i < nfields; i++) {
-            jl_value_t *fld = jl_field_type(st, i);
-            isbitstype = jl_isbits(fld);
-        }
+    for (i = 0; isbitstype && i < nfields; i++) {
+        jl_value_t *fld = jl_field_type(st, i);
+        isbitstype = jl_isbits(fld);
     }
 
     // if we didn't reuse the layout above, compute it now
@@ -440,12 +444,14 @@ void jl_compute_field_offsets(jl_datatype_t *st)
         int zeroinit = 0;
         int haspadding = 0;
         int homogeneous = 1;
+        int needlock = 0;
         uint32_t npointers = 0;
         jl_value_t *firstty = jl_field_type(st, 0);
         for (i = 0; i < nfields; i++) {
             jl_value_t *fld = jl_field_type(st, i);
+            int isatomic = jl_field_isatomic(st, i);
             size_t fsz = 0, al = 1;
-            if (jl_islayout_inline(fld, &fsz, &al)) { // aka jl_datatype_isinlinealloc
+            if (jl_islayout_inline(fld, &fsz, &al) && (!isatomic || jl_is_datatype(fld))) { // aka jl_datatype_isinlinealloc
                 if (__unlikely(fsz > max_size))
                     // Should never happen
                     throw_ovf(should_malloc, desc, st, fsz);
@@ -459,7 +465,7 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     uint32_t fld_npointers = ((jl_datatype_t*)fld)->layout->npointers;
                     if (((jl_datatype_t*)fld)->layout->haspadding)
                         haspadding = 1;
-                    if (i >= st->ninitialized && fld_npointers &&
+                    if (i >= nfields - st->name->n_uninitialized && fld_npointers &&
                         fld_npointers * sizeof(void*) != fsz) {
                         // field may be undef (may be uninitialized and contains pointer),
                         // and contains non-pointer fields of non-zero sizes.
@@ -483,9 +489,13 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                     haspadding = 1;
                 }
             }
+            if (isatomic && fsz > MAX_ATOMIC_SIZE)
+                needlock = 1;
+            if (isatomic && fsz <= MAX_ATOMIC_SIZE)
+                al = fsz = next_power_of_two(fsz);
             if (al != 0) {
                 size_t alsz = LLT_ALIGN(sz, al);
-                if (sz & (al - 1))
+                if (alsz != sz)
                     haspadding = 1;
                 sz = alsz;
                 if (al > alignm)
@@ -498,6 +508,16 @@ void jl_compute_field_offsets(jl_datatype_t *st)
                 throw_ovf(should_malloc, desc, st, sz);
             sz += fsz;
         }
+        if (needlock) {
+            size_t offset = LLT_ALIGN(sizeof(jl_mutex_t), alignm);
+            for (i = 0; i < nfields; i++) {
+                desc[i].offset += offset;
+            }
+            if (__unlikely(max_offset - sz < offset))
+                throw_ovf(should_malloc, desc, st, sz);
+            sz += offset;
+            haspadding = 1;
+        }
         if (homogeneous && jl_is_tuple_type(st)) {
             // Some tuples become LLVM vectors with stronger alignment than what was calculated above.
             unsigned al = jl_special_vector_alignment(nfields, firstty);
@@ -532,17 +552,11 @@ void jl_compute_field_offsets(jl_datatype_t *st)
             if (npointers)
                 free(pointers);
         }
+        st->zeroinit = zeroinit;
     }
     // now finish deciding if this instantiation qualifies for special properties
     assert(!isbitstype || st->layout->npointers == 0); // the definition of isbits
-    if (isinlinealloc && st->layout->npointers > 0) {
-        if (st->ninitialized != nfields)
-            isinlinealloc = 0;
-        else if (st->layout->fielddesc_type > 1) // GC only implements support for 8 and 16 (not array32)
-            isinlinealloc = 0;
-    }
     st->isbitstype = isbitstype;
-    st->isinlinealloc = isinlinealloc;
     jl_maybe_allocate_singleton_instance(st);
     return;
 }
@@ -562,6 +576,7 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
         jl_svec_t *parameters,
         jl_svec_t *fnames,
         jl_svec_t *ftypes,
+        jl_svec_t *fattrs,
         int abstract, int mutabl,
         int ninitialized)
 {
@@ -579,18 +594,17 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_gc_wb(t, t->parameters);
     t->types = ftypes;
     if (ftypes != NULL) jl_gc_wb(t, t->types);
-    t->abstract = abstract;
-    t->mutabl = mutabl;
-    t->ninitialized = ninitialized;
     t->size = 0;
 
     t->name = NULL;
     if (jl_is_typename(name)) {
-        // This code-path is used by the Serialization module to by-pass normal expectations
+        // This code-path is used by the Serialization module to bypass normal expectations
         tn = (jl_typename_t*)name;
+        tn->abstract = abstract;
+        tn->mutabl = mutabl;
     }
     else {
-        tn = jl_new_typename_in((jl_sym_t*)name, module);
+        tn = jl_new_typename_in((jl_sym_t*)name, module, abstract, mutabl);
         if (super == jl_function_type || super == jl_builtin_type || is_anonfn_typename(jl_symbol_name(name))) {
             // Callable objects (including compiler-generated closures) get independent method tables
             // as an optimization
@@ -608,6 +622,53 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
     jl_gc_wb(t, t->name);
     t->name->names = fnames;
     jl_gc_wb(t->name, t->name->names);
+    tn->n_uninitialized = jl_svec_len(fnames) - ninitialized;
+
+    uint32_t *volatile atomicfields = NULL;
+    uint32_t *volatile constfields = NULL;
+    int i;
+    JL_TRY {
+        for (i = 0; i + 1 < jl_svec_len(fattrs); i += 2) {
+            jl_value_t *fldi = jl_svecref(fattrs, i);
+            jl_sym_t *attr = (jl_sym_t*)jl_svecref(fattrs, i + 1);
+            JL_TYPECHK(typeassert, long, fldi);
+            JL_TYPECHK(typeassert, symbol, (jl_value_t*)attr);
+            size_t fldn = jl_unbox_long(fldi);
+            if (fldn < 1 || fldn > jl_svec_len(fnames))
+                jl_errorf("invalid field attribute %lld", (long long)fldn);
+            fldn--;
+            if (attr == jl_atomic_sym) {
+                if (!mutabl)
+                    jl_errorf("invalid field attribute atomic for immutable struct");
+                if (atomicfields == NULL) {
+                    size_t nb = (jl_svec_len(fnames) + 31) / 32 * sizeof(uint32_t);
+                    atomicfields = (uint32_t*)malloc_s(nb);
+                    memset(atomicfields, 0, nb);
+                }
+                atomicfields[fldn / 32] |= 1 << (fldn % 32);
+            }
+            else if (attr == jl_const_sym) {
+                if (!mutabl)
+                    jl_errorf("invalid field attribute const for immutable struct");
+                if (constfields == NULL) {
+                    size_t nb = (jl_svec_len(fnames) + 31) / 32 * sizeof(uint32_t);
+                    constfields = (uint32_t*)malloc_s(nb);
+                    memset(constfields, 0, nb);
+                }
+                constfields[fldn / 32] |= 1 << (fldn % 32);
+            }
+            else {
+                jl_errorf("invalid field attribute %s", jl_symbol_name(attr));
+            }
+        }
+    }
+    JL_CATCH {
+        free(atomicfields);
+        free(constfields);
+        jl_rethrow();
+    }
+    tn->atomicfields = atomicfields;
+    tn->constfields = constfields;
 
     if (t->name->wrapper == NULL) {
         t->name->wrapper = (jl_value_t*)t;
@@ -617,10 +678,12 @@ JL_DLLEXPORT jl_datatype_t *jl_new_datatype(
             t->name->wrapper = jl_new_struct(jl_unionall_type, jl_svecref(parameters, i), t->name->wrapper);
             jl_gc_wb(t->name, t->name->wrapper);
         }
+        if (!mutabl && !abstract && ftypes != NULL)
+            tn->mayinlinealloc = 1;
     }
     jl_precompute_memoized_dt(t, 0);
 
-    if (!abstract)
+    if (!abstract && t->types != NULL)
         jl_compute_field_offsets(t);
 
     JL_GC_POP();
@@ -632,12 +695,12 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
                                                  jl_svec_t *parameters, size_t nbits)
 {
     jl_datatype_t *bt = jl_new_datatype((jl_sym_t*)name, module, super, parameters,
-                                        jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     uint32_t nbytes = (nbits + 7) / 8;
     uint32_t alignm = next_power_of_two(nbytes);
     if (alignm > MAX_ALIGN)
         alignm = MAX_ALIGN;
-    bt->isbitstype = bt->isinlinealloc = (parameters == jl_emptysvec);
+    bt->isbitstype = (parameters == jl_emptysvec);
     bt->size = nbytes;
     bt->layout = jl_get_layout(0, 0, alignm, 0, NULL, NULL);
     bt->instance = NULL;
@@ -653,7 +716,7 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
                                                  int large)
 {
     jl_datatype_t *bt = jl_new_datatype(name, module, super,
-      jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+      jl_emptysvec, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
     bt->size = large ? GC_MAX_SZCLASS+1 : 0;
     jl_datatype_layout_t *layout = (jl_datatype_layout_t *)
       jl_gc_perm_alloc(sizeof(jl_datatype_layout_t) + sizeof(jl_fielddescdyn_t),
@@ -672,13 +735,74 @@ JL_DLLEXPORT jl_datatype_t * jl_new_foreign_type(jl_sym_t *name,
     return bt;
 }
 
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt)
+{
+    return jl_is_datatype(dt) && dt->layout && dt->layout->fielddesc_type == 3;
+}
+
+
 // bits constructors ----------------------------------------------------------
 
-JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data)
+#if MAX_ATOMIC_SIZE > MAX_POINTERATOMIC_SIZE
+#error MAX_ATOMIC_SIZE too large
+#endif
+#if MAX_ATOMIC_SIZE >= 16 && !defined(_P64)
+#error 12 byte GC pool size alignment unimplemented for 32-bit
+#endif
+#if MAX_POINTERATOMIC_SIZE > 16
+#error MAX_POINTERATOMIC_SIZE too large
+#endif
+#if BYTE_ORDER != LITTLE_ENDIAN
+#error using masks for atomics (instead of memcpy like nb == 16) assumes little endian
+#endif
+
+#if MAX_POINTERATOMIC_SIZE >= 16
+typedef struct _jl_uint128_t {
+    uint64_t a;
+    uint64_t b;
+} jl_uint128_t;
+#endif
+
+static inline uint32_t zext_read32(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+{
+    uint32_t y = *(uint32_t*)x;
+    if (nb == 4)
+        return y;
+    else // if (nb == 3)
+        return 0xffffffu & y;
+}
+
+#if MAX_POINTERATOMIC_SIZE >= 8
+static inline uint64_t zext_read64(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+{
+    uint64_t y = *(uint64_t*)x;
+    if (nb == 8)
+        return y;
+    else if (nb == 7)
+        return 0xffffffffffffffu & y;
+    else if (nb == 6)
+        return 0xffffffffffffu & y;
+    else // if (nb == 5)
+        return 0xffffffffffu & y;
+}
+#endif
+
+#if MAX_POINTERATOMIC_SIZE >= 16
+static inline jl_uint128_t zext_read128(const jl_value_t *x, size_t nb) JL_NOTSAFEPOINT
+{
+    jl_uint128_t y = {0};
+    if (nb == 16)
+        y = *(jl_uint128_t*)x;
+    else
+        memcpy(&y, x, nb);
+    return y;
+}
+#endif
+
+JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, const void *data)
 {
     // data may not have the alignment required by the size
     // but will always have the alignment required by the datatype
-    jl_ptls_t ptls = jl_get_ptls_states();
     assert(jl_is_datatype(dt));
     jl_datatype_t *bt = (jl_datatype_t*)dt;
     size_t nb = jl_datatype_size(bt);
@@ -695,44 +819,275 @@ JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data)
     if (bt == jl_uint16_type)  return jl_box_uint16(*(uint16_t*)data);
     if (bt == jl_char_type)    return jl_box_char(*(uint32_t*)data);
 
-    jl_value_t *v = jl_gc_alloc(ptls, nb, bt);
-    switch (nb) {
-    case  1: *(uint8_t*) v = *(uint8_t*)data;    break;
-    case  2: *(uint16_t*)v = jl_load_unaligned_i16(data);   break;
-    case  4: *(uint32_t*)v = jl_load_unaligned_i32(data);   break;
-    case  8: *(uint64_t*)v = jl_load_unaligned_i64(data);   break;
-    case 16:
-        memcpy(jl_assume_aligned(v, 16), data, 16);
-        break;
-    default: memcpy(v, data, nb);
-    }
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    memcpy(jl_assume_aligned(v, sizeof(void*)), data, nb);
     return v;
 }
 
-// used by boot.jl
-JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
+JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *data)
 {
-    uint64_t data = 0xffffffffffffffffULL;
-    jl_value_t *v = jl_gc_alloc(jl_get_ptls_states(), sizeof(size_t), bt);
-    memcpy(v, &data, sizeof(size_t));
+    // data must have the required alignment for an atomic of the given size
+    assert(jl_is_datatype(dt));
+    jl_datatype_t *bt = (jl_datatype_t*)dt;
+    size_t nb = jl_datatype_size(bt);
+    // some types have special pools to minimize allocations
+    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
+    if (bt == jl_bool_type)    return (1 & jl_atomic_load((_Atomic(int8_t)*)data)) ? jl_true : jl_false;
+    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_load((_Atomic(uint8_t)*)data));
+    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_load((_Atomic(int64_t)*)data));
+    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_load((_Atomic(int32_t)*)data));
+    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_load((_Atomic(int8_t)*)data));
+    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_load((_Atomic(int16_t)*)data));
+    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_load((_Atomic(uint64_t)*)data));
+    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_load((_Atomic(uint32_t)*)data));
+    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_load((_Atomic(uint16_t)*)data));
+    if (bt == jl_char_type)    return jl_box_char(jl_atomic_load((_Atomic(uint32_t)*)data));
+
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, nb, bt);
+    // data is aligned to the power of two,
+    // we will write too much of v, but the padding should exist
+    if (nb == 1)
+        *(uint8_t*) v = jl_atomic_load((_Atomic(uint8_t)*)data);
+    else if (nb <= 2)
+        *(uint16_t*)v = jl_atomic_load((_Atomic(uint16_t)*)data);
+    else if (nb <= 4)
+        *(uint32_t*)v = jl_atomic_load((_Atomic(uint32_t)*)data);
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8)
+        *(uint64_t*)v = jl_atomic_load((_Atomic(uint64_t)*)data);
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16)
+        *(jl_uint128_t*)v = jl_atomic_load((_Atomic(jl_uint128_t)*)data);
+#endif
+    else
+        abort();
     return v;
 }
 
-void jl_assign_bits(void *dest, jl_value_t *bits) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb)
 {
-    // bits must be a heap box.
-    size_t nb = jl_datatype_size(jl_typeof(bits));
-    if (nb == 0) return;
-    switch (nb) {
-    case  1: *(uint8_t*)dest    = *(uint8_t*)bits;    break;
-    case  2: jl_store_unaligned_i16(dest, *(uint16_t*)bits); break;
-    case  4: jl_store_unaligned_i32(dest, *(uint32_t*)bits); break;
-    case  8: jl_store_unaligned_i64(dest, *(uint64_t*)bits); break;
-    case 16:
-        memcpy(dest, jl_assume_aligned(bits, 16), 16);
-        break;
-    default: memcpy(dest, bits, nb);
+    // dst must have the required alignment for an atomic of the given size
+    // src must be aligned by the GC
+    // we may therefore read too much from src, but will zero the excess bits
+    // before the store (so that we can get faster cmpswap later)
+    if (nb == 0)
+        ;
+    else if (nb == 1)
+        jl_atomic_store((_Atomic(uint8_t)*)dst, *(uint8_t*)src);
+    else if (nb == 2)
+        jl_atomic_store((_Atomic(uint16_t)*)dst, *(uint16_t*)src);
+    else if (nb <= 4)
+        jl_atomic_store((_Atomic(uint32_t)*)dst, zext_read32(src, nb));
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8)
+        jl_atomic_store((_Atomic(uint64_t)*)dst, zext_read64(src, nb));
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16)
+        jl_atomic_store((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb));
+#endif
+    else
+        abort();
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb)
+{
+    // dst must have the required alignment for an atomic of the given size
+    assert(jl_is_datatype(dt));
+    jl_datatype_t *bt = (jl_datatype_t*)dt;
+    // some types have special pools to minimize allocations
+    if (nb == 0)               return jl_new_struct_uninit(bt); // returns bt->instance
+    if (bt == jl_bool_type)    return (1 & jl_atomic_exchange((_Atomic(int8_t)*)dst, 1 & *(int8_t*)src)) ? jl_true : jl_false;
+    if (bt == jl_uint8_type)   return jl_box_uint8(jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(int8_t*)src));
+    if (bt == jl_int64_type)   return jl_box_int64(jl_atomic_exchange((_Atomic(int64_t)*)dst, *(int64_t*)src));
+    if (bt == jl_int32_type)   return jl_box_int32(jl_atomic_exchange((_Atomic(int32_t)*)dst, *(int32_t*)src));
+    if (bt == jl_int8_type)    return jl_box_int8(jl_atomic_exchange((_Atomic(int8_t)*)dst, *(int8_t*)src));
+    if (bt == jl_int16_type)   return jl_box_int16(jl_atomic_exchange((_Atomic(int16_t)*)dst, *(int16_t*)src));
+    if (bt == jl_uint64_type)  return jl_box_uint64(jl_atomic_exchange((_Atomic(uint64_t)*)dst, *(uint64_t*)src));
+    if (bt == jl_uint32_type)  return jl_box_uint32(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
+    if (bt == jl_uint16_type)  return jl_box_uint16(jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src));
+    if (bt == jl_char_type)    return jl_box_char(jl_atomic_exchange((_Atomic(uint32_t)*)dst, *(uint32_t*)src));
+
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, jl_datatype_size(bt), bt);
+    if (nb == 1)
+        *(uint8_t*)v = jl_atomic_exchange((_Atomic(uint8_t)*)dst, *(uint8_t*)src);
+    else if (nb == 2)
+        *(uint16_t*)v = jl_atomic_exchange((_Atomic(uint16_t)*)dst, *(uint16_t*)src);
+    else if (nb <= 4)
+        *(uint32_t*)v = jl_atomic_exchange((_Atomic(uint32_t)*)dst, zext_read32(src, nb));
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8)
+        *(uint64_t*)v = jl_atomic_exchange((_Atomic(uint64_t)*)dst, zext_read64(src, nb));
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16)
+        *(jl_uint128_t*)v = jl_atomic_exchange((_Atomic(jl_uint128_t)*)dst, zext_read128(src, nb));
+#endif
+    else
+        abort();
+    return v;
+}
+
+JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+{
+    // dst must have the required alignment for an atomic of the given size
+    // n.b.: this can spuriously fail if there are padding bits, the caller should deal with that
+    int success;
+    if (nb == 0) {
+        success = 1;
+    }
+    else if (nb == 1) {
+        uint8_t y = *(uint8_t*)expected;
+        success = jl_atomic_cmpswap((_Atomic(uint8_t)*)dst, &y, *(uint8_t*)src);
+    }
+    else if (nb == 2) {
+        uint16_t y = *(uint16_t*)expected;
+        success = jl_atomic_cmpswap((_Atomic(uint16_t)*)dst, &y, *(uint16_t*)src);
+    }
+    else if (nb <= 4) {
+        uint32_t y = zext_read32(expected, nb);
+        uint32_t z = zext_read32(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, &y, z);
+    }
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8) {
+        uint64_t y = zext_read64(expected, nb);
+        uint64_t z = zext_read64(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, &y, z);
+    }
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16) {
+        jl_uint128_t y = zext_read128(expected, nb);
+        jl_uint128_t z = zext_read128(src, nb);
+        success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, &y, z);
     }
+#endif
+    else {
+        abort();
+    }
+    return success;
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettyp, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb)
+{
+    // dst must have the required alignment for an atomic of the given size
+    // n.b.: this does not spuriously fail if there are padding bits
+    jl_task_t *ct = jl_current_task;
+    int isptr = jl_field_isptr(rettyp, 0);
+    jl_value_t *y = jl_gc_alloc(ct->ptls, isptr ? nb : rettyp->size, isptr ? dt : rettyp);
+    int success;
+    jl_datatype_t *et = (jl_datatype_t*)jl_typeof(expected);
+    if (nb == 0) {
+        success = (dt == et);
+    }
+    else if (nb == 1) {
+        uint8_t *y8 = (uint8_t*)y;
+        assert(!dt->layout->haspadding);
+        if (dt == et) {
+            *y8 = *(uint8_t*)expected;
+            uint8_t z8 = *(uint8_t*)src;
+            success = jl_atomic_cmpswap((_Atomic(uint8_t)*)dst, y8, z8);
+        }
+        else {
+            *y8 = jl_atomic_load((_Atomic(uint8_t)*)dst);
+            success = 0;
+        }
+    }
+    else if (nb == 2) {
+        uint16_t *y16 = (uint16_t*)y;
+        assert(!dt->layout->haspadding);
+        if (dt == et) {
+            *y16 = *(uint16_t*)expected;
+            uint16_t z16 = *(uint16_t*)src;
+            success = jl_atomic_cmpswap((_Atomic(uint16_t)*)dst, y16, z16);
+        }
+        else {
+            *y16 = jl_atomic_load((_Atomic(uint16_t)*)dst);
+            success = 0;
+        }
+    }
+    else if (nb <= 4) {
+        uint32_t *y32 = (uint32_t*)y;
+        if (dt == et) {
+            *y32 = zext_read32(expected, nb);
+            uint32_t z32 = zext_read32(src, nb);
+            while (1) {
+                success = jl_atomic_cmpswap((_Atomic(uint32_t)*)dst, y32, z32);
+                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                    break;
+            }
+        }
+        else {
+            *y32 = jl_atomic_load((_Atomic(uint32_t)*)dst);
+            success = 0;
+        }
+    }
+#if MAX_POINTERATOMIC_SIZE >= 8
+    else if (nb <= 8) {
+        uint64_t *y64 = (uint64_t*)y;
+        if (dt == et) {
+            *y64 = zext_read64(expected, nb);
+            uint64_t z64 = zext_read64(src, nb);
+            while (1) {
+                success = jl_atomic_cmpswap((_Atomic(uint64_t)*)dst, y64, z64);
+                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                    break;
+            }
+        }
+        else {
+            *y64 = jl_atomic_load((_Atomic(uint64_t)*)dst);
+            success = 0;
+        }
+    }
+#endif
+#if MAX_POINTERATOMIC_SIZE >= 16
+    else if (nb <= 16) {
+        jl_uint128_t *y128 = (jl_uint128_t*)y;
+        if (dt == et) {
+            *y128 = zext_read128(expected, nb);
+            jl_uint128_t z128 = zext_read128(src, nb);
+            while (1) {
+                success = jl_atomic_cmpswap((_Atomic(jl_uint128_t)*)dst, y128, z128);
+                if (success || !dt->layout->haspadding || !jl_egal__bits(y, expected, dt))
+                    break;
+            }
+        }
+        else {
+            *y128 = jl_atomic_load((_Atomic(jl_uint128_t)*)dst);
+            success = 0;
+        }
+    }
+#endif
+    else {
+        abort();
+    }
+    if (isptr) {
+        JL_GC_PUSH1(&y);
+        jl_value_t *z = jl_gc_alloc(ct->ptls, rettyp->size, rettyp);
+        *(jl_value_t**)z = y;
+        JL_GC_POP();
+        y = z;
+        nb = sizeof(jl_value_t*);
+    }
+    *((uint8_t*)y + nb) = success ? 1 : 0;
+    return y;
+}
+
+
+
+// used by boot.jl
+JL_DLLEXPORT jl_value_t *jl_typemax_uint(jl_value_t *bt)
+{
+    uint64_t data = 0xffffffffffffffffULL;
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(size_t), bt);
+    memcpy(v, &data, sizeof(size_t));
+    return v;
 }
 
 #define PERMBOXN_FUNC(nb,nw)                                            \
@@ -773,14 +1128,14 @@ UNBOX_FUNC(float64, double)
 UNBOX_FUNC(voidpointer, void*)
 UNBOX_FUNC(uint8pointer, uint8_t*)
 
-#define BOX_FUNC(typ,c_type,pfx,nw)                             \
-    JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x)              \
-    {                                                           \
-        jl_ptls_t ptls = jl_get_ptls_states();                  \
-        jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*),   \
-                                    jl_##typ##_type);           \
-        *(c_type*)jl_data_ptr(v) = x;                           \
-        return v;                                               \
+#define BOX_FUNC(typ,c_type,pfx,nw)                                     \
+    JL_DLLEXPORT jl_value_t *pfx##_##typ(c_type x)                      \
+    {                                                                   \
+        jl_task_t *ct = jl_current_task;                                \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+                                    jl_##typ##_type);                   \
+        *(c_type*)jl_data_ptr(v) = x;                                   \
+        return v;                                                       \
     }
 BOX_FUNC(float32, float,  jl_box, 1)
 BOX_FUNC(voidpointer, void*,  jl_box, 1)
@@ -794,29 +1149,29 @@ BOX_FUNC(float64, double, jl_box, 2)
 #define NBOX_C 1024
 
 #define SIBOX_FUNC(typ,c_type,nw)\
-    static jl_value_t *boxed_##typ##_cache[NBOX_C];             \
-    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)             \
-    {                                                           \
-        jl_ptls_t ptls = jl_get_ptls_states();                  \
-        c_type idx = x+NBOX_C/2;                                \
-        if ((u##c_type)idx < (u##c_type)NBOX_C)                 \
-            return boxed_##typ##_cache[idx];                    \
-        jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*),   \
-                                    jl_##typ##_type);           \
-        *(c_type*)jl_data_ptr(v) = x;                           \
-        return v;                                               \
-    }
-#define UIBOX_FUNC(typ,c_type,nw)                               \
-    static jl_value_t *boxed_##typ##_cache[NBOX_C];             \
-    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)             \
-    {                                                           \
-        jl_ptls_t ptls = jl_get_ptls_states();                  \
-        if (x < NBOX_C)                                         \
-            return boxed_##typ##_cache[x];                      \
-        jl_value_t *v = jl_gc_alloc(ptls, nw * sizeof(void*),   \
-                                    jl_##typ##_type);           \
-        *(c_type*)jl_data_ptr(v) = x;                           \
-        return v;                                               \
+    static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
+    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
+    {                                                                   \
+        jl_task_t *ct = jl_current_task;                                \
+        c_type idx = x+NBOX_C/2;                                        \
+        if ((u##c_type)idx < (u##c_type)NBOX_C)                         \
+            return boxed_##typ##_cache[idx];                            \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+                                    jl_##typ##_type);                   \
+        *(c_type*)jl_data_ptr(v) = x;                                   \
+        return v;                                                       \
+    }
+#define UIBOX_FUNC(typ,c_type,nw)                                       \
+    static jl_value_t *boxed_##typ##_cache[NBOX_C];                     \
+    JL_DLLEXPORT jl_value_t *jl_box_##typ(c_type x)                     \
+    {                                                                   \
+        jl_task_t *ct = jl_current_task;                                \
+        if (x < NBOX_C)                                                 \
+            return boxed_##typ##_cache[x];                              \
+        jl_value_t *v = jl_gc_alloc(ct->ptls, nw * sizeof(void*),       \
+                                    jl_##typ##_type);                   \
+        *(c_type*)jl_data_ptr(v) = x;                                   \
+        return v;                                                       \
     }
 SIBOX_FUNC(int16,  int16_t, 1)
 SIBOX_FUNC(int32,  int32_t, 1)
@@ -835,11 +1190,11 @@ UIBOX_FUNC(uint64, uint64_t, 2)
 static jl_value_t *boxed_char_cache[128];
 JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     uint32_t u = bswap_32(x);
     if (u < 128)
         return boxed_char_cache[(uint8_t)u];
-    jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), jl_char_type);
+    jl_value_t *v = jl_gc_alloc(ct->ptls, sizeof(void*), jl_char_type);
     *(uint32_t*)jl_data_ptr(v) = x;
     return v;
 }
@@ -902,34 +1257,30 @@ JL_DLLEXPORT jl_value_t *jl_box_bool(int8_t x)
 
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (type->instance != NULL) return type->instance;
     va_list args;
-    size_t nf = jl_datatype_nfields(type);
+    size_t i, nf = jl_datatype_nfields(type);
     va_start(args, type);
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type);
-    for (size_t i = 0; i < nf; i++) {
-        set_nth_field(type, (void*)jv, i, va_arg(args, jl_value_t*));
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (nf > 0 && jl_field_offset(type, 0) != 0) {
+        memset(jv, 0, jl_field_offset(type, 0));
+    }
+    for (i = 0; i < nf; i++) {
+        set_nth_field(type, jv, i, va_arg(args, jl_value_t*), 0);
     }
     va_end(args);
     return jv;
 }
 
-static void init_struct_tail(jl_datatype_t *type, jl_value_t *jv, size_t na)
-{
-    if (na < jl_datatype_nfields(type)) {
-        char *data = (char*)jl_data_ptr(jv);
-        size_t offs = jl_field_offset(type, na);
-        memset(data + offs, 0, jl_datatype_size(type) - offs);
-    }
-}
-
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (!jl_is_datatype(type) || type->layout == NULL)
+    jl_task_t *ct = jl_current_task;
+    if (!jl_is_datatype(type) || type->layout == NULL) {
         jl_type_error("new", (jl_value_t*)jl_datatype_type, (jl_value_t*)type);
-    if (type->ninitialized > na || na > jl_datatype_nfields(type))
+    }
+    size_t nf = jl_datatype_nfields(type);
+    if (nf - type->name->n_uninitialized > na || na > nf)
         jl_error("invalid struct allocation");
     for (size_t i = 0; i < na; i++) {
         jl_value_t *ft = jl_field_type_concrete(type, i);
@@ -938,19 +1289,28 @@ JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args,
     }
     if (type->instance != NULL)
         return type->instance;
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type);
-    JL_GC_PUSH1(&jv);
-    for (size_t i = 0; i < na; i++) {
-        set_nth_field(type, (void*)jv, i, args[i]);
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, jl_datatype_size(type), type);
+    if (jl_datatype_nfields(type) > 0) {
+        if (jl_field_offset(type, 0) != 0) {
+            memset(jl_data_ptr(jv), 0, jl_field_offset(type, 0));
+        }
+        JL_GC_PUSH1(&jv);
+        for (size_t i = 0; i < na; i++) {
+            set_nth_field(type, jv, i, args[i], 0);
+        }
+        if (na < jl_datatype_nfields(type)) {
+            char *data = (char*)jl_data_ptr(jv);
+            size_t offs = jl_field_offset(type, na);
+            memset(data + offs, 0, jl_datatype_size(type) - offs);
+        }
+        JL_GC_POP();
     }
-    init_struct_tail(type, jv, na);
-    JL_GC_POP();
     return jv;
 }
 
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_tuple(tup))
         jl_type_error("new", (jl_value_t*)jl_tuple_type, tup);
     if (!jl_is_datatype(type) || type->layout == NULL)
@@ -969,21 +1329,27 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
         }
         return type->instance;
     }
-    jl_value_t *jv = jl_gc_alloc(ptls, jl_datatype_size(type), type);
+    size_t size = jl_datatype_size(type);
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
+    if (nf == 0)
+        return jv;
     jl_value_t *fi = NULL;
-    JL_GC_PUSH2(&jv, &fi);
-    if (type->layout->npointers > 0) {
+    if (type->zeroinit) {
         // if there are references, zero the space first to prevent the GC
         // from seeing uninitialized references during jl_get_nth_field and jl_isa,
         // which can allocate.
-        memset(jl_data_ptr(jv), 0, jl_datatype_size(type));
+        memset(jl_data_ptr(jv), 0, size);
     }
+    else if (jl_field_offset(type, 0) != 0) {
+        memset(jl_data_ptr(jv), 0, jl_field_offset(type, 0));
+    }
+    JL_GC_PUSH2(&jv, &fi);
     for (size_t i = 0; i < nargs; i++) {
         jl_value_t *ft = jl_field_type_concrete(type, i);
         fi = jl_get_nth_field(tup, i);
         if (!jl_isa(fi, ft))
             jl_type_error("new", ft, fi);
-        set_nth_field(type, (void*)jv, i, fi);
+        set_nth_field(type, jv, i, fi, 0);
     }
     JL_GC_POP();
     return jv;
@@ -991,10 +1357,10 @@ JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup)
 
 JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (type->instance != NULL) return type->instance;
     size_t size = jl_datatype_size(type);
-    jl_value_t *jv = jl_gc_alloc(ptls, size, type);
+    jl_value_t *jv = jl_gc_alloc(ct->ptls, size, type);
     if (size > 0)
         memset(jl_data_ptr(jv), 0, size);
     return jv;
@@ -1002,26 +1368,34 @@ JL_DLLEXPORT jl_value_t *jl_new_struct_uninit(jl_datatype_t *type)
 
 // field access ---------------------------------------------------------------
 
+JL_DLLEXPORT void jl_lock_value(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    JL_LOCK_NOGC((jl_mutex_t*)v);
+}
+
+JL_DLLEXPORT void jl_unlock_value(jl_value_t *v) JL_NOTSAFEPOINT
+{
+    JL_UNLOCK_NOGC((jl_mutex_t*)v);
+}
+
 JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
 {
-    jl_svec_t *fn = jl_field_names(t);
-    size_t n = jl_svec_len(fn);
-    if (n == 0) {
-        if (jl_is_namedtuple_type(t)) {
-            jl_value_t *ns = jl_tparam0(t);
-            if (jl_is_tuple(ns)) {
-                n = jl_nfields(ns);
-                for(size_t i=0; i < n; i++) {
-                    if (jl_get_nth_field(ns, i) == (jl_value_t*)fld) {
-                        return (int)i;
-                    }
+    if (jl_is_namedtuple_type(t)) {
+        jl_value_t *ns = jl_tparam0(t);
+        if (jl_is_tuple(ns)) {
+            size_t i, n = jl_nfields(ns);
+            for (i = 0; i < n; i++) {
+                if (jl_get_nth_field(ns, i) == (jl_value_t*)fld) {
+                    return (int)i;
                 }
             }
         }
     }
     else {
-        for(size_t i=0; i < n; i++) {
-            if (jl_svecref(fn,i) == (jl_value_t*)fld) {
+        jl_svec_t *fn = jl_field_names(t);
+        size_t i, n = jl_svec_len(fn);
+        for (i = 0; i < n; i++) {
+            if (jl_svecref(fn, i) == (jl_value_t*)fld) {
                 return (int)i;
             }
         }
@@ -1035,19 +1409,39 @@ JL_DLLEXPORT int jl_field_index(jl_datatype_t *t, jl_sym_t *fld, int err)
 JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i)
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    assert(i < jl_datatype_nfields(st));
+    if (i >= jl_datatype_nfields(st))
+        jl_bounds_error_int(v, i + 1);
     size_t offs = jl_field_offset(st, i);
     if (jl_field_isptr(st, i)) {
-        return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
+        return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs));
     }
     jl_value_t *ty = jl_field_type_concrete(st, i);
+    int isatomic = jl_field_isatomic(st, i);
     if (jl_is_uniontype(ty)) {
-        uint8_t sel = ((uint8_t*)v)[offs + jl_field_size(st, i) - 1];
+        assert(!isatomic);
+        size_t fsz = jl_field_size(st, i);
+        uint8_t sel = ((uint8_t*)v)[offs + fsz - 1];
         ty = jl_nth_union_component(ty, sel);
         if (jl_is_datatype_singleton((jl_datatype_t*)ty))
             return ((jl_datatype_t*)ty)->instance;
     }
-    return jl_new_bits(ty, (char*)v + offs);
+    jl_value_t *r;
+    size_t fsz = jl_datatype_size(ty);
+    int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+    if (isatomic && !needlock) {
+        r = jl_atomic_new_bits(ty, (char*)v + offs);
+    }
+    else if (needlock) {
+        jl_task_t *ct = jl_current_task;
+        r = jl_gc_alloc(ct->ptls, fsz, ty);
+        jl_lock_value(v);
+        memcpy((char*)r, (char*)v + offs, fsz);
+        jl_unlock_value(v);
+    }
+    else {
+        r = jl_new_bits(ty, (char*)v + offs);
+    }
+    return undefref_check((jl_datatype_t*)ty, r);
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT
@@ -1056,33 +1450,40 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_RO
     assert(i < jl_datatype_nfields(st));
     size_t offs = jl_field_offset(st,i);
     assert(jl_field_isptr(st,i));
-    return jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs));
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i)
 {
-    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    if (i >= jl_datatype_nfields(st))
-        jl_bounds_error_int(v, i + 1);
-    size_t offs = jl_field_offset(st, i);
-    if (jl_field_isptr(st, i)) {
-        jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)((char*)v + offs));
-        if (__unlikely(fval == NULL))
-            jl_throw(jl_undefref_exception);
-        return fval;
+    jl_value_t *r = jl_get_nth_field(v, i);
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+static inline void memassign_safe(int hasptr, jl_value_t *parent, char *dst, const jl_value_t *src, size_t nb) JL_NOTSAFEPOINT
+{
+    if (hasptr) {
+        // assert that although dst might have some undefined bits, the src heap box should be okay with that
+        assert(LLT_ALIGN(nb, sizeof(void*)) == LLT_ALIGN(jl_datatype_size(jl_typeof(src)), sizeof(void*)));
+        size_t nptr = nb / sizeof(void*);
+        memmove_refs((void**)dst, (void**)src, nptr);
+        jl_gc_multi_wb(parent, src);
+        src = (jl_value_t*)((char*)src + nptr * sizeof(void*));
+        nb -= nptr * sizeof(void*);
     }
-    jl_value_t *ty = jl_field_type_concrete(st, i);
-    if (jl_is_uniontype(ty)) {
-        size_t fsz = jl_field_size(st, i);
-        uint8_t sel = ((uint8_t*)v)[offs + fsz - 1];
-        ty = jl_nth_union_component(ty, sel);
-        if (jl_is_datatype_singleton((jl_datatype_t*)ty))
-            return ((jl_datatype_t*)ty)->instance;
+    else {
+        // src must be a heap box.
+        assert(nb == jl_datatype_size(jl_typeof(src)));
+        if (nb >= 16) {
+            memcpy(dst, jl_assume_aligned(src, 16), nb);
+            return;
+        }
     }
-    return undefref_check((jl_datatype_t*)ty, jl_new_bits(ty, (char*)v + offs));
+    memcpy(dst, jl_assume_aligned(src, sizeof(void*)), nb);
 }
 
-void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT
+void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT
 {
     size_t offs = jl_field_offset(st, i);
     if (rhs == NULL) { // TODO: this should be invalid, but it happens frequently in ircode.c
@@ -1090,44 +1491,307 @@ void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOT
         return;
     }
     if (jl_field_isptr(st, i)) {
-        jl_atomic_store_relaxed((jl_value_t**)((char*)v + offs), rhs);
+        jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
         jl_gc_wb(v, rhs);
     }
     else {
         jl_value_t *ty = jl_field_type_concrete(st, i);
-        if (jl_is_uniontype(ty)) {
-            uint8_t *psel = &((uint8_t*)v)[offs + jl_field_size(st, i) - 1];
+        jl_value_t *rty = jl_typeof(rhs);
+        int hasptr;
+        int isunion = jl_is_uniontype(ty);
+        if (isunion) {
+            assert(!isatomic);
+            size_t fsz = jl_field_size(st, i);
+            uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
             unsigned nth = 0;
-            if (!jl_find_union_component(ty, jl_typeof(rhs), &nth))
+            if (!jl_find_union_component(ty, rty, &nth))
                 assert(0 && "invalid field assignment to isbits union");
             *psel = nth;
-            if (jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(rhs)))
+            if (jl_is_datatype_singleton((jl_datatype_t*)rty))
                 return;
+            hasptr = 0;
+        }
+        else {
+            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+        }
+        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        if (isatomic && !needlock) {
+            jl_atomic_store_bits((char*)v + offs, rhs, fsz);
+            if (hasptr)
+                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        }
+        else if (needlock) {
+            jl_lock_value(v);
+            memcpy((char*)v + offs, (char*)rhs, fsz);
+            jl_unlock_value(v);
+        }
+        else {
+            memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
         }
-        jl_assign_bits((char*)v + offs, rhs);
-        jl_gc_multi_wb(v, rhs);
     }
 }
 
-JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
+jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic)
 {
-    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+       jl_type_error("swapfield!", ty, rhs);
     size_t offs = jl_field_offset(st, i);
-    char *fld = (char*)v + offs;
+    jl_value_t *r;
     if (jl_field_isptr(st, i)) {
-        jl_value_t *fval = jl_atomic_load_relaxed((jl_value_t**)fld);
-        return fval != NULL;
+        if (isatomic)
+            r = jl_atomic_exchange((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
+        else
+            r = jl_atomic_exchange_relaxed((_Atomic(jl_value_t*)*)((char*)v + offs), rhs);
+        jl_gc_wb(v, rhs);
     }
-    jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(st, i);
-    if (jl_is_datatype(ft) && ft->layout->first_ptr >= 0) {
-         return ((jl_value_t**)fld)[ft->layout->first_ptr] != NULL;
+    else {
+        jl_value_t *rty = jl_typeof(rhs);
+        int hasptr;
+        int isunion = jl_is_uniontype(ty);
+        if (isunion) {
+            assert(!isatomic);
+            r = jl_get_nth_field(v, i);
+            size_t fsz = jl_field_size(st, i);
+            uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
+            unsigned nth = 0;
+            if (!jl_find_union_component(ty, rty, &nth))
+                assert(0 && "invalid field assignment to isbits union");
+            *psel = nth;
+            if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                return r;
+            hasptr = 0;
+        }
+        else {
+            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+        }
+        size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+        int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        if (isatomic && !needlock) {
+            r = jl_atomic_swap_bits(rty, (char*)v + offs, rhs, fsz);
+            if (hasptr)
+                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        }
+        else {
+            if (needlock) {
+                jl_task_t *ct = jl_current_task;
+                r = jl_gc_alloc(ct->ptls, fsz, ty);
+                jl_lock_value(v);
+                memcpy((char*)r, (char*)v + offs, fsz);
+                memcpy((char*)v + offs, (char*)rhs, fsz);
+                jl_unlock_value(v);
+            }
+            else {
+                if (!isunion)
+                    r = jl_new_bits(ty, (char*)v + offs);
+                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+            }
+            if (needlock || !isunion)
+                r = undefref_check((jl_datatype_t*)ty, r);
+        }
     }
-    return 1;
+    if (__unlikely(r == NULL))
+        jl_throw(jl_undefref_exception);
+    return r;
+}
+
+jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic)
+{
+    size_t offs = jl_field_offset(st, i);
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    jl_value_t *r = jl_get_nth_field_checked(v, i);
+    if (isatomic && jl_field_isptr(st, i))
+        jl_fence(); // load was previously only relaxed
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2);
+    args[0] = r;
+    while (1) {
+        args[1] = rhs;
+        jl_value_t *y = jl_apply_generic(op, args, 2);
+        args[1] = y;
+        if (!jl_isa(y, ty))
+            jl_type_error("modifyfield!", ty, y);
+        if (jl_field_isptr(st, i)) {
+            _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
+            if (isatomic ? jl_atomic_cmpswap(p, &r, y) : jl_atomic_cmpswap_relaxed(p, &r, y))
+                break;
+        }
+        else {
+            jl_value_t *yty = jl_typeof(y);
+            jl_value_t *rty = jl_typeof(r);
+            int hasptr;
+            int isunion = jl_is_uniontype(ty);
+            if (isunion) {
+                assert(!isatomic);
+                hasptr = 0;
+            }
+            else {
+                hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            }
+            size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+            int needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+            if (isatomic && !needlock) {
+                if (jl_atomic_bool_cmpswap_bits((char*)v + offs, r, y, fsz)) {
+                    if (hasptr)
+                        jl_gc_multi_wb(v, y); // y is immutable
+                    break;
+                }
+                r = jl_atomic_new_bits(ty, (char*)v + offs);
+            }
+            else {
+                if (needlock)
+                    jl_lock_value(v);
+                int success = memcmp((char*)v + offs, r, fsz) == 0;
+                if (success) {
+                    if (isunion) {
+                        size_t fsz = jl_field_size(st, i);
+                        uint8_t *psel = &((uint8_t*)v)[offs + fsz - 1];
+                        success = (jl_typeof(r) == jl_nth_union_component(ty, *psel));
+                        if (success) {
+                            unsigned nth = 0;
+                            if (!jl_find_union_component(ty, yty, &nth))
+                                assert(0 && "invalid field assignment to isbits union");
+                            *psel = nth;
+                            if (jl_is_datatype_singleton((jl_datatype_t*)yty))
+                                break;
+                        }
+                        fsz = jl_datatype_size((jl_datatype_t*)yty); // need to shrink-wrap the final copy
+                    }
+                    else {
+                        assert(yty == ty && rty == ty);
+                    }
+                    memassign_safe(hasptr, v, (char*)v + offs, y, fsz);
+                }
+                if (needlock)
+                    jl_unlock_value(v);
+                if (success)
+                    break;
+                r = jl_get_nth_field(v, i);
+            }
+        }
+        args[0] = r;
+        jl_gc_safepoint();
+    }
+    // args[0] == r (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
+    JL_GC_POP();
+    return args[0];
+}
+
+jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic)
+{
+    jl_value_t *ty = jl_field_type_concrete(st, i);
+    if (!jl_isa(rhs, ty))
+        jl_type_error("replacefield!", ty, rhs);
+    size_t offs = jl_field_offset(st, i);
+    jl_value_t *r = expected;
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ty);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    if (jl_field_isptr(st, i)) {
+        _Atomic(jl_value_t*) *p = (_Atomic(jl_value_t*)*)((char*)v + offs);
+        int success;
+        while (1) {
+            success = isatomic ? jl_atomic_cmpswap(p, &r, rhs) : jl_atomic_cmpswap_relaxed(p, &r, rhs);
+            if (success)
+                jl_gc_wb(v, rhs);
+            if (__unlikely(r == NULL))
+                jl_throw(jl_undefref_exception);
+            if (success || !jl_egal(r, expected))
+                break;
+        }
+        JL_GC_PUSH1(&r);
+        r = jl_new_struct(rettyp, r, success ? jl_true : jl_false);
+        JL_GC_POP();
+    }
+    else {
+        int hasptr;
+        int isunion = jl_is_uniontype(ty);
+        int needlock;
+        jl_value_t *rty = ty;
+        size_t fsz = jl_field_size(st, i);
+        if (isunion) {
+            assert(!isatomic);
+            hasptr = 0;
+            needlock = 0;
+            isatomic = 0; // this makes GCC happy
+        }
+        else {
+            hasptr = ((jl_datatype_t*)ty)->layout->npointers > 0;
+            fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+            needlock = (isatomic && fsz > MAX_ATOMIC_SIZE);
+        }
+        if (isatomic && !needlock) {
+            r = jl_atomic_cmpswap_bits((jl_datatype_t*)ty, rettyp, (char*)v + offs, r, rhs, fsz);
+            int success = *((uint8_t*)r + fsz);
+            if (success && hasptr)
+                jl_gc_multi_wb(v, rhs); // rhs is immutable
+        }
+        else {
+            jl_task_t *ct = jl_current_task;
+            uint8_t *psel = NULL;
+            if (isunion) {
+                psel = &((uint8_t*)v)[offs + fsz - 1];
+                rty = jl_nth_union_component(rty, *psel);
+            }
+            assert(!jl_field_isptr(rettyp, 0));
+            r = jl_gc_alloc(ct->ptls, rettyp->size, (jl_value_t*)rettyp);
+            int success = (rty == jl_typeof(expected));
+            if (needlock)
+                jl_lock_value(v);
+            memcpy((char*)r, (char*)v + offs, fsz); // copy field, including union bits
+            if (success) {
+                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+                if (((jl_datatype_t*)rty)->layout->haspadding)
+                    success = jl_egal__bits(r, expected, (jl_datatype_t*)rty);
+                else
+                    success = memcmp((char*)r, (char*)expected, fsz) == 0;
+            }
+            *((uint8_t*)r + fsz) = success ? 1 : 0;
+            if (success) {
+                jl_value_t *rty = jl_typeof(rhs);
+                size_t fsz = jl_datatype_size((jl_datatype_t*)rty); // need to shrink-wrap the final copy
+                if (isunion) {
+                    unsigned nth = 0;
+                    if (!jl_find_union_component(ty, rty, &nth))
+                        assert(0 && "invalid field assignment to isbits union");
+                    *psel = nth;
+                    if (jl_is_datatype_singleton((jl_datatype_t*)rty))
+                        return r;
+                }
+                memassign_safe(hasptr, v, (char*)v + offs, rhs, fsz);
+            }
+            if (needlock)
+                jl_unlock_value(v);
+        }
+        r = undefref_check((jl_datatype_t*)rty, r);
+        if (__unlikely(r == NULL))
+            jl_throw(jl_undefref_exception);
+    }
+    return r;
+}
+
+JL_DLLEXPORT int jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT
+{
+    jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
+    size_t offs = jl_field_offset(st, i);
+    _Atomic(jl_value_t*) *fld = (_Atomic(jl_value_t*)*)((char*)v + offs);
+    if (!jl_field_isptr(st, i)) {
+        jl_datatype_t *ft = (jl_datatype_t*)jl_field_type_concrete(st, i);
+        if (!jl_is_datatype(ft) || ft->layout->first_ptr < 0)
+            return 2; // isbits are always defined
+        fld += ft->layout->first_ptr;
+    }
+    jl_value_t *fval = jl_atomic_load_relaxed(fld);
+    return fval != NULL ? 1 : 0;
 }
 
-JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field) JL_NOTSAFEPOINT
+JL_DLLEXPORT size_t jl_get_field_offset(jl_datatype_t *ty, int field)
 {
-    if (ty->layout == NULL || field > jl_datatype_nfields(ty) || field < 1)
+    if (!jl_struct_try_layout(ty) || field > jl_datatype_nfields(ty) || field < 1)
         jl_bounds_error_int((jl_value_t*)ty, field);
     return jl_field_offset(ty, field - 1);
 }
diff --git a/src/debug-registry.h b/src/debug-registry.h
new file mode 100644
index 00000000000000..dfdb09ca845190
--- /dev/null
+++ b/src/debug-registry.h
@@ -0,0 +1,183 @@
+#include <llvm/ADT/StringMap.h>
+#include <llvm/DebugInfo/DIContext.h>
+#include <llvm/IR/DataLayout.h>
+
+#include "julia_internal.h"
+#include "processor.h"
+
+#include <map>
+#include <mutex>
+#include <type_traits>
+
+typedef struct {
+    const llvm::object::ObjectFile *obj;
+    llvm::DIContext *ctx;
+    int64_t slide;
+} objfileentry_t;
+
+
+// Central registry for resolving function addresses to `jl_method_instance_t`s and
+// originating `ObjectFile`s (for the DWARF debug info).
+//
+// A global singleton instance is notified by the JIT whenever a new object is emitted,
+// and later queried by the various function info APIs. We also use the chance to handle
+// some platform-specific unwind info registration (which is unrelated to the query
+// functionality).
+class JITDebugInfoRegistry
+{
+public:
+    template<typename ResourceT>
+    struct Locked {
+
+        template<typename CResourceT>
+        struct Lock {
+            std::unique_lock<std::mutex> lock;
+            CResourceT &resource;
+
+            Lock(std::mutex &mutex, CResourceT &resource) JL_NOTSAFEPOINT : lock(mutex), resource(resource) {}
+            Lock(Lock &&) JL_NOTSAFEPOINT = default;
+            Lock &operator=(Lock &&) JL_NOTSAFEPOINT = default;
+
+            CResourceT &operator*() JL_NOTSAFEPOINT {
+                return resource;
+            }
+
+            const CResourceT &operator*() const JL_NOTSAFEPOINT {
+                return resource;
+            }
+
+            CResourceT *operator->() JL_NOTSAFEPOINT {
+                return &**this;
+            }
+
+            const CResourceT *operator->() const JL_NOTSAFEPOINT {
+                return &**this;
+            }
+
+            operator const CResourceT &() const JL_NOTSAFEPOINT {
+                return resource;
+            }
+
+            ~Lock() JL_NOTSAFEPOINT = default;
+        };
+    private:
+
+        mutable std::mutex mutex;
+        ResourceT resource;
+    public:
+        typedef Lock<ResourceT> LockT;
+        typedef Lock<const ResourceT> ConstLockT;
+
+        Locked(ResourceT resource = ResourceT()) JL_NOTSAFEPOINT : mutex(), resource(std::move(resource)) {}
+
+        LockT operator*() JL_NOTSAFEPOINT {
+            return LockT(mutex, resource);
+        }
+
+        ConstLockT operator*() const JL_NOTSAFEPOINT {
+            return ConstLockT(mutex, resource);
+        }
+
+        ~Locked() JL_NOTSAFEPOINT = default;
+    };
+
+    template<typename datatype>
+    struct jl_pthread_key_t {
+        static_assert(std::is_trivially_default_constructible<datatype>::value, "Invalid datatype for pthread key!");
+        static_assert(std::is_trivially_destructible<datatype>::value, "Expected datatype to be trivially destructible!");
+        static_assert(sizeof(datatype) == sizeof(void*), "Expected datatype to be like a void*!");
+        pthread_key_t key;
+
+        void init() JL_NOTSAFEPOINT {
+            if (pthread_key_create(&key, NULL))
+                jl_error("fatal: pthread_key_create failed");
+        }
+
+        operator datatype() JL_NOTSAFEPOINT {
+            return reinterpret_cast<datatype>(pthread_getspecific(key));
+        }
+
+        jl_pthread_key_t &operator=(datatype val) JL_NOTSAFEPOINT {
+            pthread_setspecific(key, reinterpret_cast<void*>(val));
+            return *this;
+        }
+
+        void destroy() JL_NOTSAFEPOINT {
+            pthread_key_delete(key);
+        }
+    };
+
+    struct sysimg_info_t {
+        uint64_t jl_sysimage_base;
+        jl_sysimg_fptrs_t sysimg_fptrs;
+        jl_method_instance_t **sysimg_fvars_linfo;
+        size_t sysimg_fvars_n;
+    };
+
+    struct libc_frames_t {
+#if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
+        std::atomic<void(*)(void*)> libc_register_frame_{nullptr};
+        std::atomic<void(*)(void*)> libc_deregister_frame_{nullptr};
+
+        void libc_register_frame(const char *Entry) JL_NOTSAFEPOINT;
+
+        void libc_deregister_frame(const char *Entry) JL_NOTSAFEPOINT;
+#endif
+    };
+private:
+
+    struct ObjectInfo {
+        const llvm::object::ObjectFile *object = nullptr;
+        size_t SectionSize = 0;
+        ptrdiff_t slide = 0;
+        llvm::object::SectionRef Section{};
+        llvm::DIContext *context = nullptr;
+    };
+
+    template<typename KeyT, typename ValT>
+    using rev_map = std::map<KeyT, ValT, std::greater<KeyT>>;
+
+    typedef rev_map<size_t, ObjectInfo> objectmap_t;
+    typedef rev_map<uint64_t, objfileentry_t> objfilemap_t;
+
+    objectmap_t objectmap{};
+    rev_map<size_t, std::pair<size_t, jl_method_instance_t *>> linfomap{};
+
+    // Maintain a mapping of unrealized function names -> linfo objects
+    // so that when we see it get emitted, we can add a link back to the linfo
+    // that it came from (providing name, type signature, file info, etc.)
+    Locked<llvm::StringMap<jl_code_instance_t*>> codeinst_in_flight{};
+
+    Locked<sysimg_info_t> sysimg_info{};
+
+    Locked<objfilemap_t> objfilemap{};
+
+    static std::string mangle(llvm::StringRef Name, const llvm::DataLayout &DL) JL_NOTSAFEPOINT;
+
+public:
+
+    JITDebugInfoRegistry() JL_NOTSAFEPOINT;
+    ~JITDebugInfoRegistry() JL_NOTSAFEPOINT = default;
+
+    // Any function that acquires this lock must be either a unmanaged thread
+    // or in the GC safe region and must NOT allocate anything through the GC
+    // while holding this lock.
+    // Certain functions in this file might be called from an unmanaged thread
+    // and cannot have any interaction with the julia runtime
+    // They also may be re-entrant, and operating while threads are paused, so we
+    // separately manage the re-entrant count behavior for safety across platforms
+    // Note that we cannot safely upgrade read->write
+    uv_rwlock_t debuginfo_asyncsafe{};
+    jl_pthread_key_t<uintptr_t> debuginfo_asyncsafe_held{};
+    libc_frames_t libc_frames{};
+
+    void add_code_in_flight(llvm::StringRef name, jl_code_instance_t *codeinst, const llvm::DataLayout &DL) JL_NOTSAFEPOINT;
+    jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT;
+    void registerJITObject(const llvm::object::ObjectFile &Object,
+                        std::function<uint64_t(const llvm::StringRef &)> getLoadAddress,
+                        std::function<void*(void*)> lookupWriteAddress) JL_NOTSAFEPOINT;
+    objectmap_t& getObjectMap() JL_NOTSAFEPOINT;
+    void set_sysimg_info(sysimg_info_t info) JL_NOTSAFEPOINT;
+    Locked<sysimg_info_t>::ConstLockT get_sysimg_info() const JL_NOTSAFEPOINT;
+    Locked<objfilemap_t>::LockT get_objfile_map() JL_NOTSAFEPOINT;
+};
diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp
index 48ef62fb52822f..ec79486da55fe5 100644
--- a/src/debuginfo.cpp
+++ b/src/debuginfo.cpp
@@ -3,8 +3,6 @@
 #include "platform.h"
 
 #include "llvm-version.h"
-#include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
 #include <llvm/DebugInfo/DIContext.h>
 #include <llvm/DebugInfo/DWARF/DWARFContext.h>
 #include <llvm/Object/SymbolSize.h>
@@ -15,6 +13,7 @@
 #include <llvm/IR/DebugInfo.h>
 #include <llvm/IR/DataLayout.h>
 #include <llvm/IR/Mangler.h>
+#include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
 #include <llvm/ExecutionEngine/RuntimeDyld.h>
 #include <llvm/BinaryFormat/Magic.h>
 #include <llvm/Object/MachO.h>
@@ -35,34 +34,129 @@ using namespace llvm;
 #include <map>
 #include <vector>
 #include <set>
+#include <mutex>
 #include "julia_assert.h"
 
 #ifdef _OS_DARWIN_
 #include <CoreFoundation/CoreFoundation.h>
 #endif
 
-typedef object::SymbolRef SymRef;
+#include "jitlayers.h"
 
-// Any function that acquires this lock must be either a unmanaged thread
-// or in the GC safe region and must NOT allocate anything through the GC
-// while holding this lock.
-// Certain functions in this file might be called from an unmanaged thread
-// and cannot have any interaction with the julia runtime
-static uv_rwlock_t threadsafe;
+static JITDebugInfoRegistry &getJITDebugRegistry() JL_NOTSAFEPOINT {
+    return jl_ExecutionEngine->getDebugInfoRegistry();
+}
+
+struct debug_link_info {
+    StringRef filename;
+    uint32_t crc32;
+};
+
+extern "C" JL_DLLEXPORT void jl_lock_profile_impl(void) JL_NOTSAFEPOINT;
+extern "C" JL_DLLEXPORT void jl_unlock_profile_impl(void) JL_NOTSAFEPOINT;
+
+template <typename T>
+static void jl_profile_atomic(T f);
+
+#if (defined(_OS_LINUX_) || defined(_OS_FREEBSD_) || (defined(_OS_DARWIN_) && defined(LLVM_SHLIB)))
+extern "C" void __register_frame(void*);
+extern "C" void __deregister_frame(void*);
 
-extern "C" void jl_init_debuginfo(void)
+template <typename callback>
+static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
 {
-    uv_rwlock_init(&threadsafe);
+    const char *P = EHFrameAddr;
+    const char *End = P + EHFrameSize;
+    do {
+        const char *Entry = P;
+        P += 4;
+        assert(P <= End);
+        uint32_t Length = *(const uint32_t*)Entry;
+        // Length == 0: Terminator
+        if (Length == 0)
+            break;
+        assert(P + Length <= End);
+        uint32_t Offset = *(const uint32_t*)P;
+        // Offset == 0: CIE
+        if (Offset != 0)
+            f(Entry);
+        P += Length;
+    } while (P != End);
 }
+#endif
+
+std::string JITDebugInfoRegistry::mangle(StringRef Name, const DataLayout &DL) JL_NOTSAFEPOINT
+{
+    std::string MangledName;
+    {
+        raw_string_ostream MangledNameStream(MangledName);
+        Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
+    }
+    return MangledName;
+}
+
+void JITDebugInfoRegistry::add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL) JL_NOTSAFEPOINT {
+    (**codeinst_in_flight)[mangle(name, DL)] = codeinst;
+}
+
+jl_method_instance_t *JITDebugInfoRegistry::lookupLinfo(size_t pointer) JL_NOTSAFEPOINT
+{
+    jl_lock_profile_impl();
+    auto region = linfomap.lower_bound(pointer);
+    jl_method_instance_t *linfo = NULL;
+    if (region != linfomap.end() && pointer < region->first + region->second.first)
+        linfo = region->second.second;
+    jl_unlock_profile_impl();
+    return linfo;
+}
+
+//Protected by debuginfo_asyncsafe
+JITDebugInfoRegistry::objectmap_t &
+JITDebugInfoRegistry::getObjectMap() JL_NOTSAFEPOINT
+{
+    return objectmap;
+}
+
+void JITDebugInfoRegistry::set_sysimg_info(sysimg_info_t info) JL_NOTSAFEPOINT {
+    (**this->sysimg_info) = info;
+}
+
+JITDebugInfoRegistry::Locked<JITDebugInfoRegistry::sysimg_info_t>::ConstLockT
+JITDebugInfoRegistry::get_sysimg_info() const JL_NOTSAFEPOINT {
+    return *this->sysimg_info;
+}
+
+JITDebugInfoRegistry::Locked<JITDebugInfoRegistry::objfilemap_t>::LockT
+JITDebugInfoRegistry::get_objfile_map() JL_NOTSAFEPOINT {
+    return *this->objfilemap;
+}
+
+JITDebugInfoRegistry::JITDebugInfoRegistry() JL_NOTSAFEPOINT {
+    uv_rwlock_init(&debuginfo_asyncsafe);
+    debuginfo_asyncsafe_held.init();
+}
+
+struct unw_table_entry
+{
+    int32_t start_ip_offset;
+    int32_t fde_offset;
+};
 
-extern "C" void jl_lock_profile(void)
+extern "C" JL_DLLEXPORT void jl_lock_profile_impl(void) JL_NOTSAFEPOINT
 {
-    uv_rwlock_rdlock(&threadsafe);
+    uintptr_t held = getJITDebugRegistry().debuginfo_asyncsafe_held;
+    if (held++ == 0)
+        uv_rwlock_rdlock(&getJITDebugRegistry().debuginfo_asyncsafe);
+    getJITDebugRegistry().debuginfo_asyncsafe_held = held;
 }
 
-extern "C" void jl_unlock_profile(void)
+extern "C" JL_DLLEXPORT void jl_unlock_profile_impl(void) JL_NOTSAFEPOINT
 {
-    uv_rwlock_rdunlock(&threadsafe);
+    uintptr_t held = getJITDebugRegistry().debuginfo_asyncsafe_held;
+    assert(held);
+    if (--held == 0)
+        uv_rwlock_rdunlock(&getJITDebugRegistry().debuginfo_asyncsafe);
+    getJITDebugRegistry().debuginfo_asyncsafe_held = held;
 }
 
 // some actions aren't signal (especially profiler) safe so we acquire a lock
@@ -70,7 +164,8 @@ extern "C" void jl_unlock_profile(void)
 template <typename T>
 static void jl_profile_atomic(T f)
 {
-    uv_rwlock_wrlock(&threadsafe);
+    assert(0 == getJITDebugRegistry().debuginfo_asyncsafe_held);
+    uv_rwlock_wrlock(&getJITDebugRegistry().debuginfo_asyncsafe);
 #ifndef _OS_WINDOWS_
     sigset_t sset;
     sigset_t oset;
@@ -81,45 +176,17 @@ static void jl_profile_atomic(T f)
 #ifndef _OS_WINDOWS_
     pthread_sigmask(SIG_SETMASK, &oset, NULL);
 #endif
-    uv_rwlock_wrunlock(&threadsafe);
+    uv_rwlock_wrunlock(&getJITDebugRegistry().debuginfo_asyncsafe);
 }
 
 
 // --- storing and accessing source location metadata ---
-
-struct ObjectInfo {
-    const object::ObjectFile *object;
-    size_t SectionSize;
-    ptrdiff_t slide;
-    object::SectionRef Section;
-    DIContext *context;
-};
-
-// Maintain a mapping of unrealized function names -> linfo objects
-// so that when we see it get emitted, we can add a link back to the linfo
-// that it came from (providing name, type signature, file info, etc.)
-static StringMap<jl_code_instance_t*> codeinst_in_flight;
-static std::string mangle(StringRef Name, const DataLayout &DL)
-{
-    std::string MangledName;
-    {
-        raw_string_ostream MangledNameStream(MangledName);
-        Mangler::getNameWithPrefix(MangledNameStream, Name, DL);
-    }
-    return MangledName;
-}
 void jl_add_code_in_flight(StringRef name, jl_code_instance_t *codeinst, const DataLayout &DL)
 {
-    codeinst_in_flight[mangle(name, DL)] = codeinst;
+    getJITDebugRegistry().add_code_in_flight(name, codeinst, DL);
 }
 
 
-#ifdef _OS_WINDOWS_
-#if defined(_CPU_X86_64_)
-void *lookupWriteAddressFor(RTDyldMemoryManager *memmgr, void *rt_addr);
-#endif
-#endif
-
 #if defined(_OS_WINDOWS_)
 static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnname,
                                      uint8_t *Section, size_t Allocated, uint8_t *UnwindData)
@@ -131,12 +198,14 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
     tbl->BeginAddress = (DWORD)(Code - Section);
     tbl->EndAddress = (DWORD)(Code - Section + Size);
     tbl->UnwindData = (DWORD)(UnwindData - Section);
+    assert(Code >= Section && Code + Size <= Section + Allocated);
+    assert(UnwindData >= Section && UnwindData <= Section + Allocated);
 #else // defined(_CPU_X86_64_)
     Section += (uintptr_t)Code;
     mod_size = Size;
 #endif
     if (0) {
-        JL_LOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_lock(&jl_in_stackwalk);
         if (mod_size && !SymLoadModuleEx(GetCurrentProcess(), NULL, NULL, NULL, (DWORD64)Section, mod_size, NULL, SLMFLAG_VIRTUAL)) {
             static int warned = 0;
             if (!warned) {
@@ -156,7 +225,7 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
                 jl_printf(JL_STDERR, "WARNING: failed to insert function name %s into debug info: %lu\n", name, GetLastError());
             }
         }
-        JL_UNLOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_unlock(&jl_in_stackwalk);
     }
 #if defined(_CPU_X86_64_)
     jl_profile_atomic([&]() {
@@ -172,307 +241,181 @@ static void create_PRUNTIME_FUNCTION(uint8_t *Code, size_t Size, StringRef fnnam
 }
 #endif
 
-struct revcomp {
-    bool operator() (const size_t& lhs, const size_t& rhs) const
-    { return lhs>rhs; }
-};
-
-struct strrefcomp {
-    bool operator() (const StringRef& lhs, const StringRef& rhs) const
-    {
-        return lhs.compare(rhs) > 0;
-    }
-};
-
-class JuliaJITEventListener: public JITEventListener
+void JITDebugInfoRegistry::registerJITObject(const object::ObjectFile &Object,
+                        std::function<uint64_t(const StringRef &)> getLoadAddress,
+                        std::function<void*(void*)> lookupWriteAddress)
 {
-    std::map<size_t, ObjectInfo, revcomp> objectmap;
-    std::map<size_t, std::pair<size_t, jl_method_instance_t *>, revcomp> linfomap;
-
-public:
-    JuliaJITEventListener(){}
-    virtual ~JuliaJITEventListener() {}
-
-    jl_method_instance_t *lookupLinfo(size_t pointer) JL_NOTSAFEPOINT
-    {
-        uv_rwlock_rdlock(&threadsafe);
-        auto region = linfomap.lower_bound(pointer);
-        jl_method_instance_t *linfo = NULL;
-        if (region != linfomap.end() && pointer < region->first + region->second.first)
-            linfo = region->second.second;
-        uv_rwlock_rdunlock(&threadsafe);
-        return linfo;
-    }
-
-    virtual void NotifyObjectEmitted(const object::ObjectFile &Object,
-                                     const RuntimeDyld::LoadedObjectInfo &L)
-    {
-        return _NotifyObjectEmitted(Object, L, nullptr);
-    }
-
-    virtual void _NotifyObjectEmitted(const object::ObjectFile &Object,
-                                      const RuntimeDyld::LoadedObjectInfo &L,
-                                      RTDyldMemoryManager *memmgr)
-    {
-        jl_ptls_t ptls = jl_get_ptls_states();
-        // This function modify codeinst->fptr in GC safe region.
-        // This should be fine since the GC won't scan this field.
-        int8_t gc_state = jl_gc_safe_enter(ptls);
-
-        auto SavedObject = L.getObjectForDebug(Object).takeBinary();
-        // If the debug object is unavailable, save (a copy of) the original object
-        // for our backtraces.
-        // This copy seems unfortunate, but there doesn't seem to be a way to take
-        // ownership of the original buffer.
-        if (!SavedObject.first) {
-            auto NewBuffer = MemoryBuffer::getMemBufferCopy(
-                    Object.getData(), Object.getFileName());
-            auto NewObj = object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef());
-            assert(NewObj);
-            SavedObject = std::make_pair(std::move(*NewObj), std::move(NewBuffer));
-        }
-        const object::ObjectFile &debugObj = *SavedObject.first.release();
-        SavedObject.second.release();
-
-        object::section_iterator Section = debugObj.section_begin();
-        object::section_iterator EndSection = debugObj.section_end();
-
-        std::map<StringRef, object::SectionRef, strrefcomp> loadedSections;
-        for (const object::SectionRef &lSection: Object.sections()) {
-#if JL_LLVM_VERSION >= 100000
-            auto sName = lSection.getName();
-            if (sName)
-                loadedSections[*sName] = lSection;
-#else
-            StringRef sName;
-            if (!lSection.getName(sName))
-                loadedSections[sName] = lSection;
-#endif
-        }
-        auto getLoadAddress = [&] (const StringRef &sName) -> uint64_t {
-            auto search = loadedSections.find(sName);
-            if (search == loadedSections.end())
-                return 0;
-            return L.getSectionLoadAddress(search->second);
-        };
+    object::section_iterator EndSection = Object.section_end();
 
 #ifdef _CPU_ARM_
-        // ARM does not have/use .eh_frame
-        uint64_t arm_exidx_addr = 0;
-        size_t arm_exidx_len = 0;
-        uint64_t arm_text_addr = 0;
-        size_t arm_text_len = 0;
-        for (auto &section: Object.sections()) {
-            bool istext = false;
-            if (section.isText()) {
-                istext = true;
-            }
-            else {
-#if JL_LLVM_VERSION >= 100000
-                auto sName = section.getName();
-                if (!sName)
-                    continue;
-                if (sName.get() != ".ARM.exidx") {
-                    continue;
-                }
-#else
-                StringRef sName;
-                if (section.getName(sName))
-                    continue;
-                if (sName != ".ARM.exidx") {
-                    continue;
-                }
-#endif
+    // ARM does not have/use .eh_frame
+    uint64_t arm_exidx_addr = 0;
+    size_t arm_exidx_len = 0;
+    uint64_t arm_text_addr = 0;
+    size_t arm_text_len = 0;
+    for (auto &section: Object.sections()) {
+        bool istext = false;
+        if (section.isText()) {
+            istext = true;
+        }
+        else {
+            auto sName = section.getName();
+            if (!sName)
+                continue;
+            if (sName.get() != ".ARM.exidx") {
+                continue;
             }
-            uint64_t loadaddr = L.getSectionLoadAddress(section);
-            size_t seclen = section.getSize();
-            if (istext) {
-                arm_text_addr = loadaddr;
-                arm_text_len = seclen;
-                if (!arm_exidx_addr) {
-                    continue;
-                }
+        }
+        uint64_t loadaddr = getLoadAddress(section.getName().get());
+        size_t seclen = section.getSize();
+        if (istext) {
+            arm_text_addr = loadaddr;
+            arm_text_len = seclen;
+            if (!arm_exidx_addr) {
+                continue;
             }
-            else {
-                arm_exidx_addr = loadaddr;
-                arm_exidx_len = seclen;
-                if (!arm_text_addr) {
-                    continue;
-                }
+        }
+        else {
+            arm_exidx_addr = loadaddr;
+            arm_exidx_len = seclen;
+            if (!arm_text_addr) {
+                continue;
             }
-            unw_dyn_info_t *di = new unw_dyn_info_t;
-            di->gp = 0;
-            di->format = UNW_INFO_FORMAT_ARM_EXIDX;
-            di->start_ip = (uintptr_t)arm_text_addr;
-            di->end_ip = (uintptr_t)(arm_text_addr + arm_text_len);
-            di->u.rti.name_ptr = 0;
-            di->u.rti.table_data = arm_exidx_addr;
-            di->u.rti.table_len = arm_exidx_len;
-            jl_profile_atomic([&]() {
-                _U_dyn_register(di);
-            });
-            break;
         }
+        unw_dyn_info_t *di = new unw_dyn_info_t;
+        di->gp = 0;
+        di->format = UNW_INFO_FORMAT_ARM_EXIDX;
+        di->start_ip = (uintptr_t)arm_text_addr;
+        di->end_ip = (uintptr_t)(arm_text_addr + arm_text_len);
+        di->u.rti.name_ptr = 0;
+        di->u.rti.table_data = arm_exidx_addr;
+        di->u.rti.table_len = arm_exidx_len;
+        jl_profile_atomic([&]() {
+            _U_dyn_register(di);
+        });
+        break;
+    }
 #endif
 
 #if defined(_OS_WINDOWS_)
-        uint64_t SectionAddrCheck = 0; // assert that all of the Sections are at the same location
-        uint8_t *UnwindData = NULL;
+    uint64_t SectionAddrCheck = 0;
+    uint64_t SectionLoadCheck = 0; (void)SectionLoadCheck;
+    uint64_t SectionWriteCheck = 0; (void)SectionWriteCheck;
+    uint8_t *UnwindData = NULL;
 #if defined(_CPU_X86_64_)
-        uint64_t SectionLoadOffset = 1; // The real offset shouldn't be 1.
-        uint8_t *catchjmp = NULL;
-        for (const object::SymbolRef &sym_iter : debugObj.symbols()) {
-            StringRef sName;
-            auto sNameOrError = sym_iter.getName();
-            assert(sNameOrError);
-            sName = sNameOrError.get();
-            uint8_t **pAddr = NULL;
+    uint8_t *catchjmp = NULL;
+    for (const object::SymbolRef &sym_iter : Object.symbols()) {
+        StringRef sName = cantFail(sym_iter.getName());
+        if (sName.equals("__UnwindData") || sName.equals("__catchjmp")) {
+            uint64_t Addr = cantFail(sym_iter.getAddress());
+            auto Section = cantFail(sym_iter.getSection());
+            assert(Section != EndSection && Section->isText());
+            uint64_t SectionAddr = Section->getAddress();
+            StringRef secName = cantFail(Section->getName());
+            uint64_t SectionLoadAddr = getLoadAddress(secName);
+            assert(SectionLoadAddr);
+            if (SectionAddrCheck) // assert that all of the Sections are at the same location
+                assert(SectionAddrCheck == SectionAddr &&
+                        SectionLoadCheck == SectionLoadAddr);
+            SectionAddrCheck = SectionAddr;
+            SectionLoadCheck = SectionLoadAddr;
+            SectionWriteCheck = SectionLoadAddr;
+            if (lookupWriteAddress)
+                SectionWriteCheck = (uintptr_t)lookupWriteAddress((void*)SectionLoadAddr);
+            Addr += SectionWriteCheck - SectionLoadCheck;
             if (sName.equals("__UnwindData")) {
-                pAddr = &UnwindData;
+                UnwindData = (uint8_t*)Addr;
             }
             else if (sName.equals("__catchjmp")) {
-                pAddr = &catchjmp;
-            }
-            if (pAddr) {
-                uint64_t Addr, SectionAddr, SectionLoadAddr;
-                auto AddrOrError = sym_iter.getAddress();
-                assert(AddrOrError);
-                Addr = AddrOrError.get();
-                auto SectionOrError = sym_iter.getSection();
-                assert(SectionOrError);
-                Section = SectionOrError.get();
-                assert(Section != EndSection && Section->isText());
-                SectionAddr = Section->getAddress();
-#if JL_LLVM_VERSION >= 100000
-                auto secName = Section->getName();
-                assert(secName);
-                SectionLoadAddr = getLoadAddress(*secName);
-#else
-                Section->getName(sName);
-                SectionLoadAddr = getLoadAddress(sName);
-#endif
-                Addr -= SectionAddr - SectionLoadAddr;
-                *pAddr = (uint8_t*)Addr;
-                if (SectionAddrCheck)
-                    assert(SectionAddrCheck == SectionLoadAddr);
-                else
-                    SectionAddrCheck = SectionLoadAddr;
-                if (memmgr)
-                    SectionAddr =
-                        (uintptr_t)lookupWriteAddressFor(memmgr,
-                                                         (void*)SectionLoadAddr);
-                if (SectionLoadOffset != 1)
-                    assert(SectionLoadOffset == SectionAddr - SectionLoadAddr);
-                else
-                    SectionLoadOffset = SectionAddr - SectionLoadAddr;
+                catchjmp = (uint8_t*)Addr;
             }
         }
-        assert(catchjmp);
-        assert(UnwindData);
-        assert(SectionAddrCheck);
-        assert(SectionLoadOffset != 1);
-        catchjmp[SectionLoadOffset] = 0x48;
-        catchjmp[SectionLoadOffset + 1] = 0xb8; // mov RAX, QWORD PTR [&__julia_personality]
-        *(uint64_t*)(&catchjmp[SectionLoadOffset + 2]) =
-            (uint64_t)&__julia_personality;
-        catchjmp[SectionLoadOffset + 10] = 0xff;
-        catchjmp[SectionLoadOffset + 11] = 0xe0; // jmp RAX
-        UnwindData[SectionLoadOffset] = 0x09; // version info, UNW_FLAG_EHANDLER
-        UnwindData[SectionLoadOffset + 1] = 4;    // size of prolog (bytes)
-        UnwindData[SectionLoadOffset + 2] = 2;    // count of unwind codes (slots)
-        UnwindData[SectionLoadOffset + 3] = 0x05; // frame register (rbp) = rsp
-        UnwindData[SectionLoadOffset + 4] = 4;    // second instruction
-        UnwindData[SectionLoadOffset + 5] = 0x03; // mov RBP, RSP
-        UnwindData[SectionLoadOffset + 6] = 1;    // first instruction
-        UnwindData[SectionLoadOffset + 7] = 0x50; // push RBP
-        *(DWORD*)&UnwindData[SectionLoadOffset + 8] = (DWORD)(catchjmp - (uint8_t*)SectionAddrCheck); // relative location of catchjmp
+    }
+    assert(catchjmp);
+    assert(UnwindData);
+    assert(SectionAddrCheck);
+    assert(SectionLoadCheck);
+    assert(!memcmp(catchjmp, "\0\0\0\0\0\0\0\0\0\0\0\0", 12) &&
+            !memcmp(UnwindData, "\0\0\0\0\0\0\0\0\0\0\0\0", 12));
+    catchjmp[0] = 0x48;
+    catchjmp[1] = 0xb8; // mov RAX, QWORD PTR [&__julia_personality]
+    *(uint64_t*)(&catchjmp[2]) = (uint64_t)&__julia_personality;
+    catchjmp[10] = 0xff;
+    catchjmp[11] = 0xe0; // jmp RAX
+    UnwindData[0] = 0x09; // version info, UNW_FLAG_EHANDLER
+    UnwindData[1] = 4;    // size of prolog (bytes)
+    UnwindData[2] = 2;    // count of unwind codes (slots)
+    UnwindData[3] = 0x05; // frame register (rbp) = rsp
+    UnwindData[4] = 4;    // second instruction
+    UnwindData[5] = 0x03; // mov RBP, RSP
+    UnwindData[6] = 1;    // first instruction
+    UnwindData[7] = 0x50; // push RBP
+    *(DWORD*)&UnwindData[8] = (DWORD)(catchjmp - (uint8_t*)SectionWriteCheck); // relative location of catchjmp
+    UnwindData -= SectionWriteCheck - SectionLoadCheck;
 #endif // defined(_OS_X86_64_)
 #endif // defined(_OS_WINDOWS_)
 
-        auto symbols = object::computeSymbolSizes(debugObj);
-        bool first = true;
-        for (const auto &sym_size : symbols) {
-            const object::SymbolRef &sym_iter = sym_size.first;
-            auto SymbolTypeOrError = sym_iter.getType();
-            assert(SymbolTypeOrError);
-            object::SymbolRef::Type SymbolType = SymbolTypeOrError.get();
-            if (SymbolType != object::SymbolRef::ST_Function) continue;
-            auto AddrOrError = sym_iter.getAddress();
-            assert(AddrOrError);
-            uint64_t Addr = AddrOrError.get();
-            auto SectionOrError = sym_iter.getSection();
-            assert(SectionOrError);
-            Section = SectionOrError.get();
-            if (Section == EndSection) continue;
-            if (!Section->isText()) continue;
-            uint64_t SectionAddr = Section->getAddress();
-#if JL_LLVM_VERSION >= 100000
-            Expected<StringRef> secName = Section->getName();
-            assert(secName);
-            uint64_t SectionLoadAddr = getLoadAddress(*secName);
-#else
-            StringRef secName;
-            Section->getName(secName);
-            uint64_t SectionLoadAddr = getLoadAddress(secName);
-#endif
-            Addr -= SectionAddr - SectionLoadAddr;
-            auto sNameOrError = sym_iter.getName();
-            assert(sNameOrError);
-            StringRef sName = sNameOrError.get();
-            uint64_t SectionSize = Section->getSize();
-            size_t Size = sym_size.second;
+    auto symbols = object::computeSymbolSizes(Object);
+    bool first = true;
+    for (const auto &sym_size : symbols) {
+        const object::SymbolRef &sym_iter = sym_size.first;
+        object::SymbolRef::Type SymbolType = cantFail(sym_iter.getType());
+        if (SymbolType != object::SymbolRef::ST_Function) continue;
+        uint64_t Addr = cantFail(sym_iter.getAddress());
+        auto Section = cantFail(sym_iter.getSection());
+        if (Section == EndSection) continue;
+        if (!Section->isText()) continue;
+        uint64_t SectionAddr = Section->getAddress();
+        StringRef secName = cantFail(Section->getName());
+        uint64_t SectionLoadAddr = getLoadAddress(secName);
+        Addr -= SectionAddr - SectionLoadAddr;
+        StringRef sName = cantFail(sym_iter.getName());
+        uint64_t SectionSize = Section->getSize();
+        size_t Size = sym_size.second;
 #if defined(_OS_WINDOWS_)
-            if (SectionAddrCheck)
-                assert(SectionAddrCheck == SectionLoadAddr);
-            else
-                SectionAddrCheck = SectionLoadAddr;
-            create_PRUNTIME_FUNCTION(
-                   (uint8_t*)(uintptr_t)Addr, (size_t)Size, sName,
-                   (uint8_t*)(uintptr_t)SectionLoadAddr, (size_t)SectionSize, UnwindData);
+        if (SectionAddrCheck)
+            assert(SectionAddrCheck == SectionAddr &&
+                    SectionLoadCheck == SectionLoadAddr);
+        SectionAddrCheck = SectionAddr;
+        SectionLoadCheck = SectionLoadAddr;
+        create_PRUNTIME_FUNCTION(
+                (uint8_t*)(uintptr_t)Addr, (size_t)Size, sName,
+                (uint8_t*)(uintptr_t)SectionLoadAddr, (size_t)SectionSize, UnwindData);
 #endif
+        jl_code_instance_t *codeinst = NULL;
+        {
+            auto lock = *this->codeinst_in_flight;
+            auto &codeinst_in_flight = *lock;
             StringMap<jl_code_instance_t*>::iterator codeinst_it = codeinst_in_flight.find(sName);
-            jl_code_instance_t *codeinst = NULL;
             if (codeinst_it != codeinst_in_flight.end()) {
                 codeinst = codeinst_it->second;
                 codeinst_in_flight.erase(codeinst_it);
             }
-            jl_profile_atomic([&]() {
-                if (codeinst)
-                    linfomap[Addr] = std::make_pair(Size, codeinst->def);
-                if (first) {
-                    ObjectInfo tmp = {&debugObj,
-                        (size_t)SectionSize,
-                        (ptrdiff_t)(SectionAddr - SectionLoadAddr),
-                        *Section,
-                        nullptr,
-                        };
-                    objectmap[SectionLoadAddr] = tmp;
-                    first = false;
-                }
-            });
         }
-        jl_gc_safe_leave(ptls, gc_state);
-    }
-
-    // must implement if we ever start freeing code
-    // virtual void NotifyFreeingObject(const ObjectImage &Object) {}
-    // virtual void NotifyFreeingObject(const object::ObjectFile &Obj) {}
-
-    std::map<size_t, ObjectInfo, revcomp>& getObjectMap() JL_NOTSAFEPOINT
-    {
-        uv_rwlock_rdlock(&threadsafe);
-        return objectmap;
+        jl_profile_atomic([&]() {
+            if (codeinst)
+                linfomap[Addr] = std::make_pair(Size, codeinst->def);
+            if (first) {
+                objectmap[SectionLoadAddr] = {&Object,
+                    (size_t)SectionSize,
+                    (ptrdiff_t)(SectionAddr - SectionLoadAddr),
+                    *Section,
+                    nullptr,
+                    };
+                first = false;
+            }
+        });
     }
-};
+}
 
-JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener,
-                                         const object::ObjectFile &Object,
-                                         const RuntimeDyld::LoadedObjectInfo &L,
-                                         RTDyldMemoryManager *memmgr)
+void jl_register_jit_object(const object::ObjectFile &Object,
+                            std::function<uint64_t(const StringRef &)> getLoadAddress,
+                            std::function<void *(void *)> lookupWriteAddress)
 {
-    ((JuliaJITEventListener*)Listener)->_NotifyObjectEmitted(Object, L, memmgr);
+    getJITDebugRegistry().registerJITObject(Object, getLoadAddress, lookupWriteAddress);
 }
 
+// TODO: convert the safe names from aotcomile.cpp:makeSafeName back into symbols
 static std::pair<char *, bool> jl_demangle(const char *name) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables since
@@ -505,13 +448,6 @@ static std::pair<char *, bool> jl_demangle(const char *name) JL_NOTSAFEPOINT
     return std::make_pair(strdup(name), false);
 }
 
-static JuliaJITEventListener *jl_jit_events;
-JITEventListener *CreateJuliaJITEventListener(void)
-{
-    jl_jit_events = new JuliaJITEventListener();
-    return jl_jit_events;
-}
-
 // *frames is a one element array containing whatever we could come up
 // with for the current frame. here we'll try to expand it using debug info
 // func_name and file_name are either NULL or malloc'd pointers
@@ -544,7 +480,12 @@ static int lookup_pointer(
     DILineInfoSpecifier infoSpec(DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath,
                                  DILineInfoSpecifier::FunctionNameKind::ShortName);
 
+    // DWARFContext/DWARFUnit update some internal tables during these queries, so
+    // a lock is needed.
+    assert(0 == getJITDebugRegistry().debuginfo_asyncsafe_held);
+    uv_rwlock_wrlock(&getJITDebugRegistry().debuginfo_asyncsafe);
     auto inlineInfo = context->getInliningInfoForAddress(makeAddress(Section, pointer + slide), infoSpec);
+    uv_rwlock_wrunlock(&getJITDebugRegistry().debuginfo_asyncsafe);
 
     int fromC = (*frames)[0].fromC;
     int n_frames = inlineInfo.getNumberOfFrames();
@@ -567,7 +508,9 @@ static int lookup_pointer(
             info = inlineInfo.getFrame(i);
         }
         else {
+            uv_rwlock_wrlock(&getJITDebugRegistry().debuginfo_asyncsafe);
             info = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec);
+            uv_rwlock_wrunlock(&getJITDebugRegistry().debuginfo_asyncsafe);
         }
 
         jl_frame_t *frame = &(*frames)[i];
@@ -611,13 +554,37 @@ static int lookup_pointer(
 #ifndef _OS_WINDOWS_
 #include <dlfcn.h>
 #endif
-typedef struct {
-    const llvm::object::ObjectFile *obj;
-    DIContext *ctx;
-    int64_t slide;
-} objfileentry_t;
-typedef std::map<uint64_t, objfileentry_t, revcomp> obfiletype;
-static obfiletype objfilemap;
+
+
+
+#if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
+
+void JITDebugInfoRegistry::libc_frames_t::libc_register_frame(const char *Entry) {
+    auto libc_register_frame_ = jl_atomic_load_relaxed(&this->libc_register_frame_);
+    if (!libc_register_frame_) {
+        libc_register_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__register_frame");
+        jl_atomic_store_relaxed(&this->libc_register_frame_, libc_register_frame_);
+    }
+    assert(libc_register_frame_);
+    jl_profile_atomic([&]() {
+        libc_register_frame_(const_cast<char *>(Entry));
+        __register_frame(const_cast<char *>(Entry));
+    });
+}
+
+void JITDebugInfoRegistry::libc_frames_t::libc_deregister_frame(const char *Entry) {
+    auto libc_deregister_frame_ = jl_atomic_load_relaxed(&this->libc_deregister_frame_);
+    if (!libc_deregister_frame_) {
+        libc_deregister_frame_ = (void(*)(void*))dlsym(RTLD_NEXT, "__deregister_frame");
+        jl_atomic_store_relaxed(&this->libc_deregister_frame_, libc_deregister_frame_);
+    }
+    assert(libc_deregister_frame_);
+    jl_profile_atomic([&]() {
+        libc_deregister_frame_(const_cast<char *>(Entry));
+        __deregister_frame(const_cast<char *>(Entry));
+    });
+}
+#endif
 
 static bool getObjUUID(llvm::object::MachOObjectFile *obj, uint8_t uuid[16]) JL_NOTSAFEPOINT
 {
@@ -630,32 +597,16 @@ static bool getObjUUID(llvm::object::MachOObjectFile *obj, uint8_t uuid[16]) JL_
     }
     return false;
 }
-
-struct debug_link_info {
-    StringRef filename;
-    uint32_t crc32;
-};
 static debug_link_info getDebuglink(const object::ObjectFile &Obj) JL_NOTSAFEPOINT
 {
     debug_link_info info = {};
     for (const object::SectionRef &Section: Obj.sections()) {
-#if JL_LLVM_VERSION >= 100000
         Expected<StringRef> sName = Section.getName();
         if (sName && *sName == ".gnu_debuglink")
-#else
-        StringRef sName;
-        if (!Section.getName(sName) && sName == ".gnu_debuglink")
-#endif
         {
-            StringRef Contents;
-#if JL_LLVM_VERSION >= 90000
             auto found = Section.getContents();
-            if (found)
-                Contents = *found;
-#else
-            bool found = !Section.getContents(Contents);
-#endif
             if (found) {
+                StringRef Contents = *found;
                 size_t length = Contents.find('\0');
                 info.filename = Contents.substr(0, length);
                 info.crc32 = *(const uint32_t*)Contents.substr(LLT_ALIGN(length + 1, 4), 4).data();
@@ -756,25 +707,17 @@ openDebugInfo(StringRef debuginfopath, const debug_link_info &info)
             std::move(error_splitobj.get()),
             std::move(SplitFile.get()));
 }
-
-static uint64_t jl_sysimage_base;
-static jl_sysimg_fptrs_t sysimg_fptrs;
-static jl_method_instance_t **sysimg_fvars_linfo;
-static size_t sysimg_fvars_n;
-void jl_register_fptrs(uint64_t sysimage_base, const jl_sysimg_fptrs_t *fptrs,
-                       jl_method_instance_t **linfos, size_t n)
+extern "C" JL_DLLEXPORT
+void jl_register_fptrs_impl(uint64_t sysimage_base, const jl_sysimg_fptrs_t *fptrs,
+    jl_method_instance_t **linfos, size_t n)
 {
-    jl_sysimage_base = (uintptr_t)sysimage_base;
-    sysimg_fptrs = *fptrs;
-    sysimg_fvars_linfo = linfos;
-    sysimg_fvars_n = n;
+    getJITDebugRegistry().set_sysimg_info({(uintptr_t) sysimage_base, *fptrs, linfos, n});
 }
 
 template<typename T>
 static inline void ignoreError(T &err) JL_NOTSAFEPOINT
 {
-#if !defined(NDEBUG)
-    // Needed only with LLVM assertion build
+#if !defined(NDEBUG) // Needed only with LLVM assertion build
     consumeError(err.takeError());
 #endif
 }
@@ -783,7 +726,7 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
                                        void **saddr, char **name, bool untrusted_dladdr) JL_NOTSAFEPOINT
 {
     // Assume we only need base address for sysimg for now
-    if (!insysimage || !sysimg_fptrs.base)
+    if (!insysimage || !getJITDebugRegistry().get_sysimg_info()->sysimg_fptrs.base)
         saddr = nullptr;
     bool needs_saddr = saddr && (!*saddr || untrusted_dladdr);
     bool needs_name = name && (!*name || untrusted_dladdr);
@@ -809,7 +752,7 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
     }
     if (Section.getObject() && (needs_saddr || needs_name)) {
         size_t distance = (size_t)-1;
-        SymRef sym_found;
+        object::SymbolRef sym_found;
         for (auto sym : Section.getObject()->symbols()) {
             if (!Section.containsSymbol(sym))
                 continue;
@@ -827,18 +770,37 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
         }
         if (distance != (size_t)-1) {
             if (needs_saddr) {
-                auto addr = sym_found.getAddress();
-                assert(addr);
-                *saddr = (void*)(uintptr_t)(addr.get() - slide);
+                uintptr_t addr = cantFail(sym_found.getAddress());
+                *saddr = (void*)(addr - slide);
                 needs_saddr = false;
             }
             if (needs_name) {
                 if (auto name_or_err = sym_found.getName()) {
                     auto nameref = name_or_err.get();
+                    const char globalPrefix = // == DataLayout::getGlobalPrefix
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
+                        '_';
+#elif defined(_OS_DARWIN_)
+                        '_';
+#else
+                        '\0';
+#endif
+                    if (globalPrefix) {
+                        if (nameref[0] == globalPrefix)
+                          nameref = nameref.drop_front();
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
+                        else if (nameref[0] == '@') // X86_VectorCall
+                          nameref = nameref.drop_front();
+#endif
+                        // else VectorCall, Assembly, Internal, etc.
+                    }
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
+                    nameref = nameref.split('@').first;
+#endif
                     size_t len = nameref.size();
                     *name = (char*)realloc_s(*name, len + 1);
-                    (*name)[len] = 0;
                     memcpy(*name, nameref.data(), len);
+                    (*name)[len] = 0;
                     needs_name = false;
                 }
             }
@@ -855,17 +817,17 @@ static void get_function_name_and_base(llvm::object::SectionRef Section, size_t
         PSYMBOL_INFO pSymbol = (PSYMBOL_INFO)frame_info_func;
         pSymbol->SizeOfStruct = sizeof(SYMBOL_INFO);
         pSymbol->MaxNameLen = MAX_SYM_NAME;
-        JL_LOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_lock(&jl_in_stackwalk);
         if (SymFromAddr(GetCurrentProcess(), dwAddress, &dwDisplacement64, pSymbol)) {
             // errors are ignored
             jl_copy_str(name, pSymbol->Name);
         }
-        JL_UNLOCK_NOGC(&jl_in_stackwalk);
+        uv_mutex_unlock(&jl_in_stackwalk);
     }
 #endif
 }
 
-static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTSAFEPOINT
+static objfileentry_t find_object_file(uint64_t fbase, StringRef fname) JL_NOTSAFEPOINT
 {
     int isdarwin = 0, islinux = 0, iswindows = 0;
 #if defined(_OS_DARWIN_)
@@ -878,12 +840,11 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
     (void)iswindows;
 
 // GOAL: Read debuginfo from file
-    // TODO: need read/write lock here for objfilemap synchronization
-    obfiletype::iterator it = objfilemap.find(fbase);
-    if (it != objfilemap.end())
+    objfileentry_t entry{nullptr, nullptr, 0};
+    auto success = getJITDebugRegistry().get_objfile_map()->emplace(fbase, entry);
+    if (!success.second)
         // Return cached value
-        return it->second;
-    auto &entry = objfilemap[fbase]; // default initialized
+        return success.first->second;
 
 // GOAL: Assign errorobj
     StringRef objpath;
@@ -898,8 +859,10 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
                 StringRef((const char *)fbase, msize), "", false);
         auto origerrorobj = llvm::object::ObjectFile::createObjectFile(
             membuf->getMemBufferRef(), file_magic::unknown);
-        if (!origerrorobj)
+        if (!origerrorobj) {
+            ignoreError(origerrorobj);
             return entry;
+        }
 
         llvm::object::MachOObjectFile *morigobj = (llvm::object::MachOObjectFile*)
             origerrorobj.get().get();
@@ -949,7 +912,7 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
             CFRelease(objuuid);
             CFRelease(objurl);
 
-            char objpathcstr[PATH_MAX];
+            char objpathcstr[JL_PATH_MAX];
             if (dsympathurl != NULL &&
                 CFURLGetFileSystemRepresentation(
                     dsympathurl, true, (UInt8 *)objpathcstr,
@@ -998,10 +961,10 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
                     DebugInfo(errorCodeToError(std::make_error_code(std::errc::no_such_file_or_directory)));
                 // Can't find a way to construct an empty Expected object
                 // that can be ignored.
-                ignoreError(DebugInfo);
                 if (fname.substr(sep + 1) != info.filename) {
                     debuginfopath = fname.substr(0, sep + 1).str();
                     debuginfopath += info.filename;
+                    ignoreError(DebugInfo);
                     DebugInfo = openDebugInfo(debuginfopath, info);
                 }
                 if (!DebugInfo) {
@@ -1021,7 +984,7 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
                 if (DebugInfo) {
                     errorobj = std::move(DebugInfo);
                     // Yes, we've checked, and yes LLVM want us to check again.
-                    assert(errorobj);
+                    ignoreError(errorobj);
                     debugobj = errorobj->getBinary();
                 }
                 else {
@@ -1051,8 +1014,9 @@ static objfileentry_t &find_object_file(uint64_t fbase, StringRef fname) JL_NOTS
         auto binary = errorobj->takeBinary();
         binary.first.release();
         binary.second.release();
-        // update cache
         entry = {debugobj, context, slide};
+        // update cache
+        (*getJITDebugRegistry().get_objfile_map())[fbase] = entry;
     }
     else {
         // TODO: report the error instead of silently consuming it?
@@ -1099,10 +1063,10 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
 #ifdef _OS_WINDOWS_
     IMAGEHLP_MODULE64 ModuleInfo;
     ModuleInfo.SizeOfStruct = sizeof(IMAGEHLP_MODULE64);
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     jl_refresh_dbg_module_list();
     bool isvalid = SymGetModuleInfo64(GetCurrentProcess(), (DWORD64)pointer, &ModuleInfo);
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     if (!isvalid)
         return false;
 
@@ -1110,7 +1074,7 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     if (fname.empty()) // empirically, LoadedImageName might be missing
         fname = ModuleInfo.ImageName;
     DWORD64 fbase = ModuleInfo.BaseOfImage;
-    bool insysimage = (fbase == jl_sysimage_base);
+    bool insysimage = (fbase == getJITDebugRegistry().get_sysimg_info()->jl_sysimage_base);
     if (isSysImg)
         *isSysImg = insysimage;
     if (onlySysImg && !insysimage)
@@ -1130,6 +1094,14 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     struct link_map *extra_info;
     dladdr_success = dladdr1((void*)pointer, &dlinfo, (void**)&extra_info, RTLD_DL_LINKMAP) != 0;
 #else
+#ifdef _OS_DARWIN_
+    // On macOS 12, dladdr(-1, …) succeeds and returns the main executable image,
+    // despite there never actually being an image there. This is not what we want,
+    // as we use -1 as a known-invalid value e.g. in the test suite.
+    if (pointer == ~(size_t)0) {
+        return false;
+    }
+#endif
     dladdr_success = dladdr((void*)pointer, &dlinfo) != 0;
 #endif
     if (!dladdr_success || !dlinfo.dli_fname)
@@ -1142,7 +1114,7 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
     fbase = (uintptr_t)dlinfo.dli_fbase;
 #endif
     StringRef fname;
-    bool insysimage = (fbase == jl_sysimage_base);
+    bool insysimage = (fbase == getJITDebugRegistry().get_sysimg_info()->jl_sysimage_base);
     if (saddr && !(insysimage && untrusted_dladdr))
         *saddr = dlinfo.dli_saddr;
     if (isSysImg)
@@ -1157,7 +1129,7 @@ bool jl_dylib_DI_for_fptr(size_t pointer, object::SectionRef *Section, int64_t *
         jl_copy_str(filename, dlinfo.dli_fname);
     fname = dlinfo.dli_fname;
 #endif // ifdef _OS_WINDOWS_
-    auto &entry = find_object_file(fbase, fname);
+    auto entry = find_object_file(fbase, fname);
     *slide = entry.slide;
     *context = entry.ctx;
     if (entry.obj)
@@ -1175,7 +1147,8 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
 #ifdef _OS_WINDOWS_
     static IMAGEHLP_LINE64 frame_info_line;
     DWORD dwDisplacement = 0;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
+    jl_refresh_dbg_module_list();
     DWORD64 dwAddress = pointer;
     frame_info_line.SizeOfStruct = sizeof(IMAGEHLP_LINE64);
     if (SymGetLineFromAddr64(GetCurrentProcess(), dwAddress, &dwDisplacement, &frame_info_line)) {
@@ -1185,7 +1158,7 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
             jl_copy_str(&frame0->file_name, frame_info_line.FileName);
         frame0->line = frame_info_line.LineNumber;
     }
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
 #endif
     object::SectionRef Section;
     llvm::DIContext *context = NULL;
@@ -1197,20 +1170,23 @@ static int jl_getDylibFunctionInfo(jl_frame_t **frames, size_t pointer, int skip
         return 1;
     }
     frame0->fromC = !isSysImg;
-    if (isSysImg && sysimg_fptrs.base && saddr) {
-        intptr_t diff = (uintptr_t)saddr - (uintptr_t)sysimg_fptrs.base;
-        for (size_t i = 0; i < sysimg_fptrs.nclones; i++) {
-            if (diff == sysimg_fptrs.clone_offsets[i]) {
-                uint32_t idx = sysimg_fptrs.clone_idxs[i] & jl_sysimg_val_mask;
-                if (idx < sysimg_fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks)
-                    frame0->linfo = sysimg_fvars_linfo[idx];
-                break;
+    {
+        auto sysimg_locked = getJITDebugRegistry().get_sysimg_info();
+        if (isSysImg && sysimg_locked->sysimg_fptrs.base && saddr) {
+            intptr_t diff = (uintptr_t)saddr - (uintptr_t)sysimg_locked->sysimg_fptrs.base;
+            for (size_t i = 0; i < sysimg_locked->sysimg_fptrs.nclones; i++) {
+                if (diff == sysimg_locked->sysimg_fptrs.clone_offsets[i]) {
+                    uint32_t idx = sysimg_locked->sysimg_fptrs.clone_idxs[i] & jl_sysimg_val_mask;
+                    if (idx < sysimg_locked->sysimg_fvars_n) // items after this were cloned but not referenced directly by a method (such as our ccall PLT thunks)
+                        frame0->linfo = sysimg_locked->sysimg_fvars_linfo[idx];
+                    break;
+                }
             }
-        }
-        for (size_t i = 0; i < sysimg_fvars_n; i++) {
-            if (diff == sysimg_fptrs.offsets[i]) {
-                frame0->linfo = sysimg_fvars_linfo[i];
-                break;
+            for (size_t i = 0; i < sysimg_locked->sysimg_fvars_n; i++) {
+                if (diff == sysimg_locked->sysimg_fptrs.offsets[i]) {
+                    frame0->linfo = sysimg_locked->sysimg_fvars_linfo[i];
+                    break;
+                }
             }
         }
     }
@@ -1221,11 +1197,13 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
         object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT
 {
     int found = 0;
-    std::map<size_t, ObjectInfo, revcomp> &objmap = jl_jit_events->getObjectMap();
-    std::map<size_t, ObjectInfo, revcomp>::iterator fit = objmap.lower_bound(fptr);
-
+    assert(0 == getJITDebugRegistry().debuginfo_asyncsafe_held);
+    uv_rwlock_wrlock(&getJITDebugRegistry().debuginfo_asyncsafe);
     if (symsize)
         *symsize = 0;
+
+    auto &objmap = getJITDebugRegistry().getObjectMap();
+    auto fit = objmap.lower_bound(fptr);
     if (fit != objmap.end() && fptr < fit->first + fit->second.SectionSize) {
         *slide = fit->second.slide;
         *Section = fit->second.Section;
@@ -1236,12 +1214,12 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
         }
         found = 1;
     }
-    uv_rwlock_rdunlock(&threadsafe);
+    uv_rwlock_wrunlock(&getJITDebugRegistry().debuginfo_asyncsafe);
     return found;
 }
 
 // Set *name and *filename to either NULL or malloc'd string
-int jl_getFunctionInfo(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
+extern "C" JL_DLLEXPORT int jl_getFunctionInfo_impl(jl_frame_t **frames_out, size_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT
 {
     // This function is not allowed to reference any TLS variables if noInline
     // since it can be called from an unmanaged thread on OSX.
@@ -1255,7 +1233,7 @@ int jl_getFunctionInfo(jl_frame_t **frames_out, size_t pointer, int skipC, int n
     int64_t slide;
     uint64_t symsize;
     if (jl_DI_for_fptr(pointer, &symsize, &slide, &Section, &context)) {
-        frames[0].linfo = jl_jit_events->lookupLinfo(pointer);
+        frames[0].linfo = getJITDebugRegistry().lookupLinfo(pointer);
         int nf = lookup_pointer(Section, context, frames_out, pointer, slide, true, noInline);
         return nf;
     }
@@ -1264,35 +1242,8 @@ int jl_getFunctionInfo(jl_frame_t **frames_out, size_t pointer, int skipC, int n
 
 extern "C" jl_method_instance_t *jl_gdblookuplinfo(void *p) JL_NOTSAFEPOINT
 {
-    return jl_jit_events->lookupLinfo((size_t)p);
-}
-
-#if (defined(_OS_LINUX_) || (defined(_OS_DARWIN_) && defined(LLVM_SHLIB)))
-extern "C" void __register_frame(void*);
-extern "C" void __deregister_frame(void*);
-
-template <typename callback>
-static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
-{
-    const char *P = EHFrameAddr;
-    const char *End = P + EHFrameSize;
-    do {
-        const char *Entry = P;
-        P += 4;
-        assert(P <= End);
-        uint32_t Length = *(const uint32_t*)Entry;
-        // Length == 0: Terminator
-        if (Length == 0)
-            break;
-        assert(P + Length <= End);
-        uint32_t Offset = *(const uint32_t*)P;
-        // Offset == 0: CIE
-        if (Offset != 0)
-            f(Entry);
-        P += Length;
-    } while (P != End);
+    return getJITDebugRegistry().lookupLinfo((size_t)p);
 }
-#endif
 
 #if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
 
@@ -1304,51 +1255,28 @@ static void processFDEs(const char *EHFrameAddr, size_t EHFrameSize, callback f)
  * ourselves to ensure the right one gets picked.
  */
 
-static void (*libc_register_frame)(void*)   = NULL;
-static void (*libc_deregister_frame)(void*) = NULL;
-
 // This implementation handles frame registration for local targets.
 void register_eh_frames(uint8_t *Addr, size_t Size)
 {
   // On OS X OS X __register_frame takes a single FDE as an argument.
   // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html
   processFDEs((char*)Addr, Size, [](const char *Entry) {
-        if (!libc_register_frame) {
-          libc_register_frame = (void(*)(void*))dlsym(RTLD_NEXT, "__register_frame");
-        }
-        assert(libc_register_frame);
-        jl_profile_atomic([&]() {
-            libc_register_frame(const_cast<char *>(Entry));
-            __register_frame(const_cast<char *>(Entry));
-        });
+      getJITDebugRegistry().libc_frames.libc_register_frame(Entry);
     });
 }
 
 void deregister_eh_frames(uint8_t *Addr, size_t Size)
 {
    processFDEs((char*)Addr, Size, [](const char *Entry) {
-        if (!libc_deregister_frame) {
-          libc_deregister_frame = (void(*)(void*))dlsym(RTLD_NEXT, "__deregister_frame");
-        }
-        assert(libc_deregister_frame);
-        jl_profile_atomic([&]() {
-            libc_deregister_frame(const_cast<char *>(Entry));
-            __deregister_frame(const_cast<char *>(Entry));
-        });
+      getJITDebugRegistry().libc_frames.libc_deregister_frame(Entry);
     });
 }
 
-#elif defined(_OS_LINUX_) && \
+#elif (defined(_OS_LINUX_) || defined(_OS_FREEBSD_)) && \
     defined(JL_UNW_HAS_FORMAT_IP) && \
     !defined(_CPU_ARM_) // ARM does not have/use .eh_frame, so we handle this elsewhere
 #include <type_traits>
 
-struct unw_table_entry
-{
-    int32_t start_ip_offset;
-    int32_t fde_offset;
-};
-
 // Skip over an arbitrary long LEB128 encoding.
 // Return the pointer to the first unprocessed byte.
 static const uint8_t *consume_leb128(const uint8_t *Addr, const uint8_t *End)
@@ -1529,6 +1457,13 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     jl_profile_atomic([&]() {
         __register_frame(Addr);
     });
+
+    // Now first count the number of FDEs
+    size_t nentries = 0;
+    processFDEs((char*)Addr, Size, [&](const char*){ nentries++; });
+    if (nentries == 0)
+        return;
+
     // Our unwinder
     unw_dyn_info_t *di = new unw_dyn_info_t;
     // In a shared library, this is set to the address of the PLT.
@@ -1536,13 +1471,10 @@ void register_eh_frames(uint8_t *Addr, size_t Size)
     // not seem to be used on our supported architectures.
     di->gp = 0;
     // I'm not a great fan of the naming of this constant, but it means the
-    // right thing, which is a table of FDEs and ips.
+    // right thing, which is a table of FDEs and IPs.
     di->format = UNW_INFO_FORMAT_IP_OFFSET;
     di->u.rti.name_ptr = 0;
     di->u.rti.segbase = (unw_word_t)Addr;
-    // Now first count the number of FDEs
-    size_t nentries = 0;
-    processFDEs((char*)Addr, Size, [&](const char*){ nentries++; });
 
     uintptr_t start_ip = (uintptr_t)-1;
     uintptr_t end_ip = 0;
@@ -1677,16 +1609,17 @@ void deregister_eh_frames(uint8_t *Addr, size_t Size)
 
 #endif
 
-extern "C"
-uint64_t jl_getUnwindInfo(uint64_t dwAddr)
+extern "C" JL_DLLEXPORT
+uint64_t jl_getUnwindInfo_impl(uint64_t dwAddr)
 {
     // Might be called from unmanaged thread
-    std::map<size_t, ObjectInfo, revcomp> &objmap = jl_jit_events->getObjectMap();
-    std::map<size_t, ObjectInfo, revcomp>::iterator it = objmap.lower_bound(dwAddr);
+    jl_lock_profile_impl();
+    auto &objmap = getJITDebugRegistry().getObjectMap();
+    auto it = objmap.lower_bound(dwAddr);
     uint64_t ipstart = 0; // ip of the start of the section (if found)
     if (it != objmap.end() && dwAddr < it->first + it->second.SectionSize) {
         ipstart = (uint64_t)(uintptr_t)(*it).first;
     }
-    uv_rwlock_rdunlock(&threadsafe);
+    jl_unlock_profile_impl();
     return ipstart;
 }
diff --git a/src/debuginfo.h b/src/debuginfo.h
index 76fcabb62964ac..5ea34350ac1fb2 100644
--- a/src/debuginfo.h
+++ b/src/debuginfo.h
@@ -8,15 +8,8 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide,
 bool jl_dylib_DI_for_fptr(size_t pointer, llvm::object::SectionRef *Section, int64_t *slide, llvm::DIContext **context,
     bool onlySysImg, bool *isSysImg, void **saddr, char **name, char **filename) JL_NOTSAFEPOINT;
 
-#if JL_LLVM_VERSION >= 90000
 static object::SectionedAddress makeAddress(
         llvm::object::SectionRef Section, uint64_t address) JL_NOTSAFEPOINT
 {
     return object::SectionedAddress{address, Section.getIndex()};
 }
-#else
-static uint64_t makeAddress(llvm::object::SectionRef Section, uint64_t address) JL_NOTSAFEPOINT
-{
-    return address;
-}
-#endif
diff --git a/src/disasm.cpp b/src/disasm.cpp
index 086a1deae5a172..4d21f49e4af2ab 100644
--- a/src/disasm.cpp
+++ b/src/disasm.cpp
@@ -5,11 +5,45 @@
 //
 // Original copyright:
 //
-//                     The LLVM Compiler Infrastructure
+// University of Illinois/NCSA
+// Open Source License
+// Copyright (c) 2003-2016 University of Illinois at Urbana-Champaign.
+// All rights reserved.
 //
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+//  Developed by:
 //
+//    LLVM Team
+//
+//    University of Illinois at Urbana-Champaign
+//
+//    http://llvm.org
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy of
+// this software and associated documentation files (the "Software"), to deal with
+// the Software without restriction, including without limitation the rights to
+// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+// of the Software, and to permit persons to whom the Software is furnished to do
+// so, subject to the following conditions:
+//
+//    * Redistributions of source code must retain the above copyright notice,
+//      this list of conditions and the following disclaimers.
+//
+//    * Redistributions in binary form must reproduce the above copyright notice,
+//      this list of conditions and the following disclaimers in the
+//      documentation and/or other materials provided with the distribution.
+//
+//    * Neither the names of the LLVM Team, University of Illinois at
+//      Urbana-Champaign, nor the names of its contributors may be used to
+//      endorse or promote products derived from this Software without specific
+//      prior written permission.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+// CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+// SOFTWARE.
 //===----------------------------------------------------------------------===//
 //
 // This class implements a disassembler of a memory block, given a function
@@ -22,45 +56,61 @@
 #include <string>
 
 #include "llvm-version.h"
-#include <llvm/Object/ObjectFile.h>
-#include <llvm/BinaryFormat/MachO.h>
+
+// for outputting disassembly
+#include <llvm/ADT/Triple.h>
+#include <llvm/AsmParser/Parser.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/BinaryFormat/COFF.h>
-#include <llvm/MC/MCInst.h>
-#include <llvm/MC/MCStreamer.h>
-#include <llvm/MC/MCSubtargetInfo.h>
-#include <llvm/MC/MCObjectFileInfo.h>
-#include <llvm/MC/MCRegisterInfo.h>
-#include <llvm/MC/MCAsmInfo.h>
+#include <llvm/BinaryFormat/MachO.h>
+#include <llvm/DebugInfo/DIContext.h>
+#include <llvm/DebugInfo/DWARF/DWARFContext.h>
+#include <llvm/IR/AssemblyAnnotationWriter.h>
+#include <llvm/IR/DebugInfo.h>
+#include <llvm/IR/Function.h>
+#include <llvm/IR/IntrinsicInst.h>
+#include <llvm/IR/LLVMContext.h>
+#include <llvm/IR/Module.h>
 #include <llvm/MC/MCAsmBackend.h>
+#include <llvm/MC/MCAsmInfo.h>
 #include <llvm/MC/MCCodeEmitter.h>
-#include <llvm/MC/MCInstPrinter.h>
-#include <llvm/MC/MCInstrInfo.h>
 #include <llvm/MC/MCContext.h>
+#include <llvm/MC/MCDisassembler/MCDisassembler.h>
+#include <llvm/MC/MCDisassembler/MCExternalSymbolizer.h>
 #include <llvm/MC/MCExpr.h>
+#include <llvm/MC/MCInst.h>
+#include <llvm/MC/MCInstPrinter.h>
 #include <llvm/MC/MCInstrAnalysis.h>
+#include <llvm/MC/MCInstrInfo.h>
+#include <llvm/MC/MCObjectFileInfo.h>
+#include <llvm/MC/MCRegisterInfo.h>
+#include <llvm/MC/MCStreamer.h>
+#include <llvm/MC/MCSubtargetInfo.h>
 #include <llvm/MC/MCSymbol.h>
-#include <llvm/AsmParser/Parser.h>
-#include <llvm/MC/MCDisassembler/MCDisassembler.h>
-#include <llvm/MC/MCDisassembler/MCExternalSymbolizer.h>
-#include <llvm/ADT/Triple.h>
+#include <llvm/Object/ObjectFile.h>
+#include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/MemoryBuffer.h>
+#include <llvm/Support/NativeFormatting.h>
 #include <llvm/Support/SourceMgr.h>
+#if JL_LLVM_VERSION >= 140000
+#include <llvm/MC/TargetRegistry.h>
+#else
 #include <llvm/Support/TargetRegistry.h>
+#endif
 #include <llvm/Support/TargetSelect.h>
 #include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/FormattedStream.h>
-#include <llvm/Support/NativeFormatting.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
-#include <llvm/IR/LLVMContext.h>
-#include <llvm/DebugInfo/DIContext.h>
-#include <llvm/DebugInfo/DWARF/DWARFContext.h>
-#include <llvm/IR/DebugInfo.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/Module.h>
-#include <llvm/IR/IntrinsicInst.h>
-#include <llvm/IR/AssemblyAnnotationWriter.h>
+
+// for outputting assembly
+#include <llvm/CodeGen/AsmPrinter.h>
+#include <llvm/CodeGen/AsmPrinterHandler.h>
+#include <llvm/CodeGen/MachineModuleInfo.h>
+#include <llvm/CodeGen/Passes.h>
+#include <llvm/CodeGen/TargetPassConfig.h>
+#include <llvm/Support/CodeGen.h>
 #include <llvm/IR/LegacyPassManager.h>
 
+#include <llvm-c/Disassembler.h>
+
 #include "julia.h"
 #include "julia_internal.h"
 #include "jitlayers.h"
@@ -166,7 +216,6 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
 {
     if (verbosity == output_none)
         return;
-    bool update_line_only = false;
     uint32_t nframes = DI.size();
     if (nframes == 0)
         return; // just skip over lines with no debug info at all
@@ -179,21 +228,40 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
             break;
         }
     }
-    if (collapse_recursive && 0 < nctx) {
-        // check if we're adding more frames with the same method name,
-        // if so, drop all existing calls to it from the top of the context
-        // AND check if instead the context was previously printed that way
-        // but now has removed the recursive frames
-        StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';');
-        if ((nctx < nframes && StringRef(DI.at(nframes - nctx - 1).FunctionName).rtrim(';') == method) ||
-            (nctx < context.size() && StringRef(context.at(nctx).FunctionName).rtrim(';') == method)) {
-            update_line_only = true;
-            while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method) {
-                nctx -= 1;
+    bool update_line_only = false;
+    if (collapse_recursive) {
+        if (nctx > 0) {
+            // check if we're adding more frames with the same method name,
+            // if so, drop all existing calls to it from the top of the context
+            // AND check if instead the context was previously printed that way
+            // but now has removed the recursive frames
+            StringRef method = StringRef(context.at(nctx - 1).FunctionName).rtrim(';'); // last matching frame
+            if ((nctx < nframes && StringRef(DI.at(nframes - nctx - 1).FunctionName).rtrim(';') == method) ||
+                (nctx < context.size() && StringRef(context.at(nctx).FunctionName).rtrim(';') == method)) {
+                update_line_only = true;
+                // transform nctx to exclude the combined frames
+                while (nctx > 0 && StringRef(context.at(nctx - 1).FunctionName).rtrim(';') == method)
+                    nctx -= 1;
             }
         }
+        if (!update_line_only && nctx < context.size() && nctx < nframes) {
+            // look at the first non-matching element to see if we are only changing the line number
+            const DILineInfo &CtxLine = context.at(nctx);
+            const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+            if (StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';'))
+                update_line_only = true;
+        }
     }
-    // examine what frames we're returning from
+    else if (nctx < context.size() && nctx < nframes) {
+        // look at the first non-matching element to see if we are only changing the line number
+        const DILineInfo &CtxLine = context.at(nctx);
+        const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
+        if (CtxLine.FileName == FrameLine.FileName &&
+                StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';')) {
+            update_line_only = true;
+        }
+    }
+    // examine how many frames we're returning from
     if (nctx < context.size()) {
         // compute the new inlining depth
         uint32_t npops;
@@ -209,15 +277,6 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         }
         else {
             npops = context.size() - nctx;
-            // look at the first non-matching element to see if we are only changing the line number
-            if (!update_line_only && nctx < nframes) {
-                const DILineInfo &CtxLine = context.at(nctx);
-                const DILineInfo &FrameLine = DI.at(nframes - 1 - nctx);
-                if (CtxLine.FileName == FrameLine.FileName &&
-                        StringRef(CtxLine.FunctionName).rtrim(';') == StringRef(FrameLine.FunctionName).rtrim(';')) {
-                    update_line_only = true;
-                }
-            }
         }
         context.resize(nctx);
         update_line_only && (npops -= 1);
@@ -244,7 +303,7 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
         if (frame.Line != UINT_MAX && frame.Line != 0)
             Out << ":" << frame.Line;
         StringRef method = StringRef(frame.FunctionName).rtrim(';');
-        Out << " within `" << method << "'";
+        Out << " within `" << method << "`";
         if (collapse_recursive) {
             while (nctx < nframes) {
                 const DILineInfo &frame = DI.at(nframes - 1 - nctx);
@@ -274,20 +333,26 @@ void DILineInfoPrinter::emit_lineinfo(raw_ostream &Out, std::vector<DILineInfo>
 
 // adaptor class for printing line numbers before llvm IR lines
 class LineNumberAnnotatedWriter : public AssemblyAnnotationWriter {
-    DILocation *InstrLoc = nullptr;
-    DILineInfoPrinter LinePrinter{"; ", false};
+    const DILocation *InstrLoc = nullptr;
+    DILineInfoPrinter LinePrinter;
     DenseMap<const Instruction *, DILocation *> DebugLoc;
     DenseMap<const Function *, DISubprogram *> Subprogram;
 public:
-    LineNumberAnnotatedWriter(const char *debuginfo)
-    {
+    LineNumberAnnotatedWriter(const char *LineStart, bool bracket_outer, const char *debuginfo)
+      : LinePrinter(LineStart, bracket_outer) {
         LinePrinter.SetVerbosity(debuginfo);
     }
     virtual void emitFunctionAnnot(const Function *, formatted_raw_ostream &);
     virtual void emitInstructionAnnot(const Instruction *, formatted_raw_ostream &);
+    virtual void emitInstructionAnnot(const DILocation *, formatted_raw_ostream &);
     virtual void emitBasicBlockEndAnnot(const BasicBlock *, formatted_raw_ostream &);
     // virtual void printInfoComment(const Value &, formatted_raw_ostream &) {}
 
+    void emitEnd(formatted_raw_ostream &Out) {
+        LinePrinter.emit_finish(Out);
+        InstrLoc = nullptr;
+    }
+
     void addSubprogram(const Function *F, DISubprogram *SP)
     {
         Subprogram[F] = SP;
@@ -322,12 +387,19 @@ void LineNumberAnnotatedWriter::emitFunctionAnnot(
 void LineNumberAnnotatedWriter::emitInstructionAnnot(
       const Instruction *I, formatted_raw_ostream &Out)
 {
-    DILocation *NewInstrLoc = I->getDebugLoc();
+    const DILocation *NewInstrLoc = I->getDebugLoc();
     if (!NewInstrLoc) {
         auto Loc = DebugLoc.find(I);
         if (Loc != DebugLoc.end())
             NewInstrLoc = Loc->second;
     }
+    emitInstructionAnnot(NewInstrLoc, Out);
+    Out << LinePrinter.inlining_indent(" ");
+}
+
+void LineNumberAnnotatedWriter::emitInstructionAnnot(
+      const DILocation *NewInstrLoc, formatted_raw_ostream &Out)
+{
     if (NewInstrLoc && NewInstrLoc != InstrLoc) {
         InstrLoc = NewInstrLoc;
         std::vector<DILineInfo> DIvec;
@@ -343,14 +415,13 @@ void LineNumberAnnotatedWriter::emitInstructionAnnot(
         } while (NewInstrLoc);
         LinePrinter.emit_lineinfo(Out, DIvec);
     }
-    Out << LinePrinter.inlining_indent(" ");
 }
 
 void LineNumberAnnotatedWriter::emitBasicBlockEndAnnot(
         const BasicBlock *BB, formatted_raw_ostream &Out)
 {
     if (BB == &BB->getParent()->back())
-        LinePrinter.emit_finish(Out);
+        emitEnd(Out);
 }
 
 static void jl_strip_llvm_debug(Module *m, bool all_meta, LineNumberAnnotatedWriter *AAW)
@@ -419,41 +490,40 @@ void jl_strip_llvm_addrspaces(Module *m)
 // print an llvm IR acquired from jl_get_llvmf
 // warning: this takes ownership of, and destroys, f->getParent()
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo)
+jl_value_t *jl_dump_function_ir_impl(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo)
 {
     std::string code;
     raw_string_ostream stream(code);
 
     {
-        Function *llvmf = dyn_cast_or_null<Function>((Function*)f);
-        if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent()))
-            jl_error("jl_dump_function_ir: Expected Function* in a temporary Module");
-
-        JL_LOCK(&codegen_lock); // Might GC
-        LineNumberAnnotatedWriter AAW{debuginfo};
-        if (!llvmf->getParent()) {
-            // print the function declaration as-is
-            llvmf->print(stream, &AAW);
-            delete llvmf;
-        }
-        else {
-            Module *m = llvmf->getParent();
-            if (strip_ir_metadata) {
-                std::string llvmfn(llvmf->getName());
-                jl_strip_llvm_addrspaces(m);
-                jl_strip_llvm_debug(m, true, &AAW);
-                // rewriting the function type creates a new function, so look it up again
-                llvmf = m->getFunction(llvmfn);
-            }
-            if (dump_module) {
-                m->print(stream, &AAW);
+        std::unique_ptr<jl_llvmf_dump_t> dump(static_cast<jl_llvmf_dump_t*>(f));
+        dump->TSM.withModuleDo([&](Module &m) {
+            Function *llvmf = dump->F;
+            if (!llvmf || (!llvmf->isDeclaration() && !llvmf->getParent()))
+                jl_error("jl_dump_function_ir: Expected Function* in a temporary Module");
+
+            LineNumberAnnotatedWriter AAW{"; ", false, debuginfo};
+            if (!llvmf->getParent()) {
+                // print the function declaration as-is
+                llvmf->print(stream, &AAW);
+                delete llvmf;
             }
             else {
-                llvmf->print(stream, &AAW);
+                if (strip_ir_metadata) {
+                    std::string llvmfn(llvmf->getName());
+                    jl_strip_llvm_addrspaces(&m);
+                    jl_strip_llvm_debug(&m, true, &AAW);
+                    // rewriting the function type creates a new function, so look it up again
+                    llvmf = m.getFunction(llvmfn);
+                }
+                if (dump_module) {
+                    m.print(stream, &AAW);
+                }
+                else {
+                    llvmf->print(stream, &AAW);
+                }
             }
-            delete m;
-        }
-        JL_UNLOCK(&codegen_lock); // Might GC
+        });
     }
 
     return jl_pchar_to_string(stream.str().data(), stream.str().size());
@@ -465,7 +535,8 @@ static void jl_dump_asm_internal(
         DIContext *di_ctx,
         raw_ostream &rstream,
         const char* asm_variant,
-        const char* debuginfo);
+        const char* debuginfo,
+        bool binary);
 
 // This isn't particularly fast, but neither is printing assembly, and they're only used for interactive mode
 static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
@@ -501,10 +572,9 @@ static uint64_t compute_obj_symsize(object::SectionRef Section, uint64_t offset)
 
 // print a native disassembly for the function starting at fptr
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo)
+jl_value_t *jl_dump_fptr_asm_impl(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
 {
     assert(fptr != 0);
-    jl_ptls_t ptls = jl_get_ptls_states();
     std::string code;
     raw_string_ostream stream(code);
 
@@ -532,13 +602,15 @@ jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant,
     }
 
     // Dump assembly code
+    jl_ptls_t ptls = jl_current_task->ptls;
     int8_t gc_state = jl_gc_safe_enter(ptls);
     jl_dump_asm_internal(
             fptr, symsize, slide,
             Section, context,
             stream,
             asm_variant,
-            debuginfo);
+            debuginfo,
+            binary);
     jl_gc_safe_leave(ptls, gc_state);
 
     return jl_pchar_to_string(stream.str().data(), stream.str().size());
@@ -734,6 +806,33 @@ static int OpInfoLookup(void *DisInfo, uint64_t PC, uint64_t Offset, uint64_t Si
 }
 } // namespace
 
+// Stringify raw bytes as a comment string.
+std::string rawCodeComment(const llvm::ArrayRef<uint8_t>& Memory, const llvm::Triple& Triple)
+{
+    std::string Buffer{"; "};
+    llvm::raw_string_ostream Stream{Buffer};
+    auto Address = reinterpret_cast<uintptr_t>(Memory.data());
+    // write abbreviated address
+    llvm::write_hex(Stream, Address & 0xffff, HexPrintStyle::Lower, 4);
+    Stream << ":";
+    auto Arch = Triple.getArch();
+    bool FixedLength = !(Arch == Triple::x86 || Arch == Triple::x86_64);
+    if (FixedLength)
+        Stream << " ";
+    if (FixedLength && Triple.isLittleEndian()) {
+        for (auto Iter = Memory.rbegin(); Iter != Memory.rend(); ++Iter)
+            llvm::write_hex(Stream, *Iter, HexPrintStyle::Lower, 2);
+    }
+    else {
+        // variable-length or (fixed-length) big-endian format
+        for (auto Byte : Memory) {
+            if (!FixedLength)
+                Stream << " ";
+            llvm::write_hex(Stream, Byte, HexPrintStyle::Lower, 2);
+        }
+    }
+    return Stream.str();
+}
 
 static void jl_dump_asm_internal(
         uintptr_t Fptr, size_t Fsize, int64_t slide,
@@ -741,7 +840,8 @@ static void jl_dump_asm_internal(
         DIContext *di_ctx,
         raw_ostream &rstream,
         const char* asm_variant,
-        const char* debuginfo)
+        const char* debuginfo,
+        bool binary)
 {
     // GC safe
     // Get the host information
@@ -758,23 +858,28 @@ static void jl_dump_asm_internal(
     SourceMgr SrcMgr;
 
     MCTargetOptions Options;
-    std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*TheTarget->createMCRegInfo(TheTriple.str()), TheTriple.str()
-#if JL_LLVM_VERSION >= 100000
-            , Options
-#endif
-        ));
+    std::unique_ptr<MCAsmInfo> MAI(
+        TheTarget->createMCAsmInfo(*TheTarget->createMCRegInfo(TheTriple.str()), TheTriple.str(), Options));
     assert(MAI && "Unable to create target asm info!");
 
     std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TheTriple.str()));
     assert(MRI && "Unable to create target register info!");
 
+    std::unique_ptr<llvm::MCSubtargetInfo> STI(
+      TheTarget->createMCSubtargetInfo(TheTriple.str(), cpu, features));
+    assert(STI && "Unable to create subtarget info!");
+
+#if JL_LLVM_VERSION >= 130000
+    MCContext Ctx(TheTriple, MAI.get(), MRI.get(), STI.get(), &SrcMgr);
+    std::unique_ptr<MCObjectFileInfo> MOFI(
+      TheTarget->createMCObjectFileInfo(Ctx, /*PIC=*/false, /*LargeCodeModel=*/ false));
+    Ctx.setObjectFileInfo(MOFI.get());
+#else
     std::unique_ptr<MCObjectFileInfo> MOFI(new MCObjectFileInfo());
     MCContext Ctx(MAI.get(), MRI.get(), MOFI.get(), &SrcMgr);
     MOFI->InitMCObjectFileInfo(TheTriple, /* PIC */ false, Ctx);
+#endif
 
-    // Set up Subtarget and Disassembler
-    std::unique_ptr<MCSubtargetInfo>
-        STI(TheTarget->createMCSubtargetInfo(TheTriple.str(), cpu, features));
     std::unique_ptr<MCDisassembler> DisAsm(TheTarget->createMCDisassembler(*STI, Ctx));
     if (!DisAsm) {
         rstream << "ERROR: no disassembler for target " << TheTriple.str();
@@ -797,7 +902,11 @@ static void jl_dump_asm_internal(
     std::unique_ptr<MCCodeEmitter> CE;
     std::unique_ptr<MCAsmBackend> MAB;
     if (ShowEncoding) {
+#if JL_LLVM_VERSION >= 150000
+        CE.reset(TheTarget->createMCCodeEmitter(*MCII, Ctx));
+#else
         CE.reset(TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx));
+#endif
         MAB.reset(TheTarget->createMCAsmBackend(*STI, *MRI, Options));
     }
 
@@ -812,7 +921,11 @@ static void jl_dump_asm_internal(
                                          IP.release(),
                                          std::move(CE), std::move(MAB),
                                          /*ShowInst*/ false));
+#if JL_LLVM_VERSION >= 140000
+    Streamer->initSections(true, *STI);
+#else
     Streamer->InitSections(true);
+#endif
 
     // Make the MemoryObject wrapper
     ArrayRef<uint8_t> memoryObject(const_cast<uint8_t*>((const uint8_t*)Fptr),Fsize);
@@ -840,6 +953,16 @@ static void jl_dump_asm_internal(
         }
     }
 
+    if (binary) {
+        // Print the complete address and the size at the top (instruction addresses are abbreviated)
+        std::string Buffer{"; code origin: "};
+        llvm::raw_string_ostream Stream{Buffer};
+        auto Address = reinterpret_cast<uintptr_t>(memoryObject.data());
+        llvm::write_hex(Stream, Address, HexPrintStyle::Lower, 16);
+        Stream << ", code size: " << memoryObject.size();
+        Streamer->emitRawText(Stream.str());
+    }
+
     // Take two passes: In the first pass we record all branch labels,
     // in the second we actually perform the output
     for (int pass = 0; pass < 2; ++ pass) {
@@ -873,11 +996,7 @@ static void jl_dump_asm_internal(
                     std::string buf;
                     dbgctx.emit_lineinfo(buf, di_lineIter->second);
                     if (!buf.empty()) {
-#if JL_LLVM_VERSION >= 110000
                         Streamer->emitRawText(buf);
-#else
-                        Streamer->EmitRawText(buf);
-#endif
                     }
                 }
             }
@@ -893,11 +1012,7 @@ static void jl_dump_asm_internal(
                 if (di_ctx) {
                     std::string buf;
                     DILineInfoSpecifier infoSpec(
-#if JL_LLVM_VERSION >= 110000
                         DILineInfoSpecifier::FileLineInfoKind::RawValue,
-#else
-                        DILineInfoSpecifier::FileLineInfoKind::Default,
-#endif
                         DILineInfoSpecifier::FunctionNameKind::ShortName);
                     DIInliningInfo dbg = di_ctx->getInliningInfoForAddress(makeAddress(Section, Index + Fptr + slide), infoSpec);
                     if (dbg.getNumberOfFrames()) {
@@ -907,11 +1022,7 @@ static void jl_dump_asm_internal(
                         dbgctx.emit_lineinfo(buf, di_lineIter->second);
                     }
                     if (!buf.empty()) {
-#if JL_LLVM_VERSION >= 110000
                         Streamer->emitRawText(buf);
-#else
-                        Streamer->EmitRawText(buf);
-#endif
                     }
                     nextLineAddr = (++di_lineIter)->first;
                 }
@@ -923,11 +1034,7 @@ static void jl_dump_asm_internal(
                 // stream << Index << ": ";
                 MCSymbol *symbol = DisInfo.lookupSymbol(Fptr+Index);
                 if (symbol) {
-#if JL_LLVM_VERSION >= 110000
                     Streamer->emitLabel(symbol);
-#else
-                    Streamer->EmitLabel(symbol);
-#endif
                 }
             }
 
@@ -935,9 +1042,6 @@ static void jl_dump_asm_internal(
             MCDisassembler::DecodeStatus S;
             FuncMCView view = memoryObject.slice(Index);
             S = DisAsm->getInstruction(Inst, insSize, view, 0,
-#if JL_LLVM_VERSION < 100000
-                                      /*VStream*/ nulls(),
-#endif
                                       /*CStream*/ pass != 0 ? Streamer->GetCommentOS() : nulls());
             if (pass != 0 && Streamer->GetCommentOS().tell() > 0)
                 Streamer->GetCommentOS() << '\n';
@@ -962,21 +1066,13 @@ static void jl_dump_asm_internal(
                             llvm::write_hex(buf, *(uint8_t*)(Fptr + Index + i), HexPrintStyle::PrefixLower, 2);
                         }
                     }
-#if JL_LLVM_VERSION >= 110000
                     Streamer->emitRawText(StringRef(buf.str()));
-#else
-                    Streamer->EmitRawText(StringRef(buf.str()));
-#endif
                 }
                 break;
 
             case MCDisassembler::SoftFail:
                 if (pass != 0) {
-#if JL_LLVM_VERSION >= 110000
                     Streamer->emitRawText(StringRef("potentially undefined instruction encoding:"));
-#else
-                    Streamer->EmitRawText(StringRef("potentially undefined instruction encoding:"));
-#endif
                 }
                 // Fall through
 
@@ -1009,11 +1105,9 @@ static void jl_dump_asm_internal(
                             }
                         }
                     }
-#if JL_LLVM_VERSION >= 110000
+                    if (binary)
+                        Streamer->emitRawText(rawCodeComment(memoryObject.slice(Index, insSize), TheTriple));
                     Streamer->emitInstruction(Inst, *STI);
-#else
-                    Streamer->EmitInstruction(Inst, *STI);
-#endif
                 }
                 break;
             }
@@ -1027,18 +1121,154 @@ static void jl_dump_asm_internal(
             std::string buf;
             dbgctx.emit_finish(buf);
             if (!buf.empty()) {
-#if JL_LLVM_VERSION >= 110000
                 Streamer->emitRawText(buf);
+            }
+        }
+    }
+}
+
+/// addPassesToX helper drives creation and initialization of TargetPassConfig.
+static MCContext *
+addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM) {
+    TargetPassConfig *PassConfig = TM->createPassConfig(PM);
+    PassConfig->setDisableVerify(false);
+    PM.add(PassConfig);
+    MachineModuleInfoWrapperPass *MMIWP =
+        new MachineModuleInfoWrapperPass(TM);
+    PM.add(MMIWP);
+    if (PassConfig->addISelPasses())
+        return NULL;
+    PassConfig->addMachinePasses();
+    PassConfig->setInitialized();
+    return &MMIWP->getMMI().getContext();
+}
+
+class LineNumberPrinterHandler : public AsmPrinterHandler {
+    MCStreamer &S;
+    LineNumberAnnotatedWriter LinePrinter;
+    std::string Buffer;
+    llvm::raw_string_ostream RawStream;
+    llvm::formatted_raw_ostream Stream;
+
+public:
+    LineNumberPrinterHandler(AsmPrinter &Printer, const char *debuginfo)
+        : S(*Printer.OutStreamer),
+          LinePrinter("; ", true, debuginfo),
+          RawStream(Buffer),
+          Stream(RawStream) {}
+
+    void emitAndReset() {
+        Stream.flush();
+        RawStream.flush();
+        if (Buffer.empty())
+            return;
+        S.emitRawText(Buffer);
+        Buffer.clear();
+    }
+
+    virtual void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {}
+    //virtual void beginModule(Module *M) override {}
+    virtual void endModule() override {}
+    /// note that some AsmPrinter implementations may not call beginFunction at all
+    virtual void beginFunction(const MachineFunction *MF) override {
+        LinePrinter.emitFunctionAnnot(&MF->getFunction(), Stream);
+        emitAndReset();
+    }
+    //virtual void markFunctionEnd() override {}
+    virtual void endFunction(const MachineFunction *MF) override {
+        LinePrinter.emitEnd(Stream);
+        emitAndReset();
+    }
+    //virtual void beginFragment(const MachineBasicBlock *MBB,
+    //                           ExceptionSymbolProvider ESP) override {}
+    //virtual void endFragment() override {}
+    //virtual void beginFunclet(const MachineBasicBlock &MBB,
+    //                          MCSymbol *Sym = nullptr) override {}
+    //virtual void endFunclet() override {}
+    virtual void beginInstruction(const MachineInstr *MI) override {
+        LinePrinter.emitInstructionAnnot(MI->getDebugLoc(), Stream);
+        emitAndReset();
+    }
+    virtual void endInstruction() override {}
+};
+
+// get a native assembly for llvm::Function
+extern "C" JL_DLLEXPORT
+jl_value_t *jl_dump_function_asm_impl(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary)
+{
+    // precise printing via IR assembler
+    SmallVector<char, 4096> ObjBufferSV;
+    { // scope block
+        std::unique_ptr<jl_llvmf_dump_t> dump(static_cast<jl_llvmf_dump_t*>(F));
+        Function *f = dump->F;
+        llvm::raw_svector_ostream asmfile(ObjBufferSV);
+        assert(!f->isDeclaration());
+        dump->TSM.withModuleDo([&](Module &m) {
+            for (auto &f2 : m.functions()) {
+                if (f != &f2 && !f->isDeclaration())
+                    f2.deleteBody();
+            }
+        });
+        auto TMBase = jl_ExecutionEngine->cloneTargetMachine();
+        LLVMTargetMachine *TM = static_cast<LLVMTargetMachine*>(TMBase.get());
+        legacy::PassManager PM;
+        addTargetPasses(&PM, TM->getTargetTriple(), TM->getTargetIRAnalysis());
+        if (raw_mc) {
+            raw_svector_ostream obj_OS(ObjBufferSV);
+            if (TM->addPassesToEmitFile(PM, obj_OS, nullptr, CGFT_ObjectFile, false, nullptr))
+                return jl_an_empty_string;
+            dump->TSM.withModuleDo([&](Module &m) { PM.run(m); });
+        }
+        else {
+            MCContext *Context = addPassesToGenerateCode(TM, PM);
+            if (!Context)
+                return jl_an_empty_string;
+            Context->setGenDwarfForAssembly(false);
+            // Duplicate LLVMTargetMachine::addAsmPrinter here so we can set the asm dialect and add the custom annotation printer
+            const MCSubtargetInfo &STI = *TM->getMCSubtargetInfo();
+            const MCAsmInfo &MAI = *TM->getMCAsmInfo();
+            const MCRegisterInfo &MRI = *TM->getMCRegisterInfo();
+            const MCInstrInfo &MII = *TM->getMCInstrInfo();
+            unsigned OutputAsmDialect = MAI.getAssemblerDialect();
+            if (!strcmp(asm_variant, "att"))
+                OutputAsmDialect = 0;
+            if (!strcmp(asm_variant, "intel"))
+                OutputAsmDialect = 1;
+            MCInstPrinter *InstPrinter = TM->getTarget().createMCInstPrinter(
+                jl_ExecutionEngine->getTargetTriple(), OutputAsmDialect, MAI, MII, MRI);
+             std::unique_ptr<MCAsmBackend> MAB(TM->getTarget().createMCAsmBackend(
+                STI, MRI, TM->Options.MCOptions));
+            std::unique_ptr<MCCodeEmitter> MCE;
+            if (binary) { // enable MCAsmStreamer::AddEncodingComment printing
+#if JL_LLVM_VERSION >= 150000
+                MCE.reset(TM->getTarget().createMCCodeEmitter(MII, *Context));
 #else
-                Streamer->EmitRawText(buf);
+                MCE.reset(TM->getTarget().createMCCodeEmitter(MII, MRI, *Context));
 #endif
             }
+            auto FOut = std::make_unique<formatted_raw_ostream>(asmfile);
+            std::unique_ptr<MCStreamer> S(TM->getTarget().createAsmStreamer(
+                *Context, std::move(FOut), true,
+                true, InstPrinter,
+                std::move(MCE), std::move(MAB),
+                false));
+            std::unique_ptr<AsmPrinter> Printer(
+                TM->getTarget().createAsmPrinter(*TM, std::move(S)));
+            Printer->addAsmPrinterHandler(AsmPrinter::HandlerInfo(
+                        std::unique_ptr<AsmPrinterHandler>(new LineNumberPrinterHandler(*Printer, debuginfo)),
+                        "emit", "Debug Info Emission", "Julia", "Julia::LineNumberPrinterHandler Markup"));
+            if (!Printer)
+                return jl_an_empty_string;
+            PM.add(Printer.release());
+            PM.add(createFreeMachineFunctionPass());
+            dump->TSM.withModuleDo([&](Module &m){ PM.run(m); });
         }
     }
+    return jl_pchar_to_string(ObjBufferSV.data(), ObjBufferSV.size());
 }
 
 extern "C" JL_DLLEXPORT
-LLVMDisasmContextRef jl_LLVMCreateDisasm(
+LLVMDisasmContextRef jl_LLVMCreateDisasm_impl(
         const char *TripleName, void *DisInfo, int TagType,
         LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp)
 {
@@ -1046,7 +1276,7 @@ LLVMDisasmContextRef jl_LLVMCreateDisasm(
 }
 
 extern "C" JL_DLLEXPORT
-JL_DLLEXPORT size_t jl_LLVMDisasmInstruction(
+JL_DLLEXPORT size_t jl_LLVMDisasmInstruction_impl(
         LLVMDisasmContextRef DC, uint8_t *Bytes, uint64_t BytesSize,
         uint64_t PC, char *OutString, size_t OutStringSize)
 {
diff --git a/src/dlload.c b/src/dlload.c
index f696eabc4382de..230a31ed3d695b 100644
--- a/src/dlload.c
+++ b/src/dlload.c
@@ -9,7 +9,6 @@
 #include "julia.h"
 #include "julia_internal.h"
 #ifdef _OS_WINDOWS_
-#include <windows.h>
 #include <direct.h>
 #else
 #include <unistd.h>
@@ -57,6 +56,15 @@ static int endswith_extension(const char *path) JL_NOTSAFEPOINT
     return 0;
 }
 
+#ifdef _OS_WINDOWS_
+#define CRTDLL_BASENAME "msvcrt"
+
+JL_DLLEXPORT const char *jl_crtdll_basename = CRTDLL_BASENAME;
+const char *jl_crtdll_name = CRTDLL_BASENAME ".dll";
+
+#undef CRTDLL_BASENAME
+#endif
+
 #define PATHBUF 4096
 
 #define JL_RTLD(flags, FLAG) (flags & JL_RTLD_ ## FLAG ? RTLD_ ## FLAG : 0)
@@ -101,7 +109,6 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
         needsSymRefreshModuleList = 1;
     return lib;
 #else
-    dlerror(); /* Reset error status. */
     return dlopen(filename,
                   (flags & JL_RTLD_NOW ? RTLD_NOW : RTLD_LAZY)
                   | JL_RTLD(flags, LOCAL)
@@ -112,7 +119,7 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
 #ifdef RTLD_NOLOAD
                   | JL_RTLD(flags, NOLOAD)
 #endif
-#if defined(RTLD_DEEPBIND) && !(defined(JL_ASAN_ENABLED) || defined(JL_TSAN_ENABLED) || defined(JL_MSAN_ENABLED))
+#if defined(RTLD_DEEPBIND) && !(defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_))
                   | JL_RTLD(flags, DEEPBIND)
 #endif
 #ifdef RTLD_FIRST
@@ -125,16 +132,20 @@ JL_DLLEXPORT void *jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOI
 JL_DLLEXPORT int jl_dlclose(void *handle) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
-    if (!handle) return -1;
+    if (!handle) {
+        return -1;
+    }
     return !FreeLibrary((HMODULE) handle);
 #else
-    dlerror(); /* Reset error status. */
-    if (!handle) return -1;
+    if (!handle) {
+        dlerror(); /* Reset error status. */
+        return -1;
+    }
     return dlclose(handle);
 #endif
 }
 
-JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err) JL_NOTSAFEPOINT // (or throw)
+JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags, int throw_err)
 {
     char path[PATHBUF], relocated[PATHBUF];
     int i;
@@ -144,51 +155,59 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
     uv_stat_t stbuf;
     void *handle;
     int abspath;
-    // number of extensions to try — if modname already ends with the
+    int is_atpath;
+    // number of extensions to try — if modname already ends with the
     // standard extension, then we don't try adding additional extensions
     int n_extensions = endswith_extension(modname) ? 1 : N_EXTENSIONS;
+    int ret;
 
     /*
-      this branch returns handle of libjulia
+      this branch returns handle of libjulia-internal
     */
     if (modname == NULL) {
 #ifdef _OS_WINDOWS_
         if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
                                 (LPCWSTR)(uintptr_t)(&jl_load_dynamic_library),
                                 (HMODULE*)&handle)) {
-#ifndef __clang_analyzer__
-            // Hide the error throwing from the analyser since there isn't a way to express
-            // "safepoint only when throwing error" currently.
             jl_error("could not load base module");
-#endif
         }
 #else
         Dl_info info;
         if (!dladdr((void*)(uintptr_t)&jl_load_dynamic_library, &info) || !info.dli_fname) {
-#ifndef __clang_analyzer__
-            // Hide the error throwing from the analyser since there isn't a way to express
-            // "safepoint only when throwing error" currently.
             jl_error("could not load base module");
-#endif
         }
         handle = dlopen(info.dli_fname, RTLD_NOW);
 #endif
         goto done;
     }
 
-    abspath = isabspath(modname);
+    abspath = jl_isabspath(modname);
+    is_atpath = 0;
+
+    // Detect if our `modname` is something like `@rpath/libfoo.dylib`
+#ifdef _OS_DARWIN_
+    size_t nameLen = strlen(modname);
+    const char *const atPaths[] = {"@executable_path/", "@loader_path/", "@rpath/"};
+    for (i = 0; i < sizeof(atPaths)/sizeof(char*); ++i) {
+        size_t atLen = strlen(atPaths[i]);
+        if (nameLen >= atLen && 0 == strncmp(modname, atPaths[i], atLen)) {
+            is_atpath = 1;
+        }
+    }
+#endif
 
     /*
       this branch permutes all base paths in DL_LOAD_PATH with all extensions
       note: skip when !jl_base_module to avoid UndefVarError(:DL_LOAD_PATH),
             and also skip for absolute paths
+            and also skip for `@`-paths on macOS
       We also do simple string replacement here for elements starting with `@executable_path/`.
       While these exist as OS concepts on Darwin, we want to use them on other platforms
       such as Windows, so we emulate them here.
     */
-    if (!abspath && jl_base_module != NULL) {
+    if (!abspath && !is_atpath && jl_base_module != NULL) {
         jl_binding_t *b = jl_get_module_binding(jl_base_module, jl_symbol("DL_LOAD_PATH"));
-        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? b->value : NULL);
+        jl_array_t *DL_LOAD_PATH = (jl_array_t*)(b ? jl_atomic_load_relaxed(&b->value) : NULL);
         if (DL_LOAD_PATH != NULL) {
             size_t j;
             for (j = 0; j < jl_array_len(DL_LOAD_PATH); j++) {
@@ -210,8 +229,12 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
                     path[0] = '\0';
                     if (relocated[len-1] == PATHSEPSTRING[0])
                         snprintf(path, PATHBUF, "%s%s%s", relocated, modname, ext);
-                    else
-                        snprintf(path, PATHBUF, "%s" PATHSEPSTRING "%s%s", relocated, modname, ext);
+                    else {
+                        ret = snprintf(path, PATHBUF, "%s" PATHSEPSTRING "%s%s", relocated, modname, ext);
+                        if (ret < 0)
+                            jl_errorf("path is longer than %d\n", PATHBUF);
+                    }
+
 #ifdef _OS_WINDOWS_
                     if (i == 0) { // LoadLibrary already tested the extensions, we just need to check the `stat` result
 #endif
@@ -252,11 +275,7 @@ JL_DLLEXPORT void *jl_load_dynamic_library(const char *modname, unsigned flags,
 #else
         const char *reason = dlerror();
 #endif
-#ifndef __clang_analyzer__
-        // Hide the error throwing from the analyser since there isn't a way to express
-        // "safepoint only when throwing error" currently.
         jl_errorf("could not load library \"%s\"\n%s", modname, reason);
-#endif
     }
     handle = NULL;
 
@@ -272,19 +291,26 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
 #ifdef _OS_WINDOWS_
     *value = GetProcAddress((HMODULE) handle, symbol);
 #else
-    dlerror(); /* Reset error status. */
     *value = dlsym(handle, symbol);
 #endif
 
-    /* Next, check for errors.  On Windows, a NULL pointer means the symbol
-     * was not found.  On everything else, we can have NULL symbols, so we check
-     * for non-NULL returns from dlerror().  Note that means we unconditionally
-     * call dlerror() on POSIX systems.*/
-#ifdef _OS_WINDOWS_
+    /* Next, check for errors. On Windows, a NULL pointer means the symbol was
+     * not found. On everything else, we can have NULL symbols, so we check for
+     * non-NULL returns from dlerror(). Since POSIX doesn't require `dlerror`
+     * to be implemented safely, FreeBSD doesn't (unlike everyone else, who
+     * realized decades ago that threads are here to stay), so we avoid calling
+     * `dlerror` unless we need to get the error message.
+     * https://github.com/freebsd/freebsd-src/blob/12db51d20823a5e3b9e5f8a2ea73156fe1cbfc28/libexec/rtld-elf/rtld.c#L198
+     */
     symbol_found = *value != NULL;
-#else
-    const char *err = dlerror();
-    symbol_found = err == NULL;
+#ifndef _OS_WINDOWS_
+    const char *err = "";
+    if (!symbol_found) {
+        dlerror(); /* Reset error status. */
+        *value = dlsym(handle, symbol);
+        err = dlerror();
+        symbol_found = *value != NULL || err == NULL;
+    }
 #endif
 
     if (!symbol_found && throw_err) {
@@ -292,7 +318,7 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
         char err[256];
         win32_formatmessage(GetLastError(), err, sizeof(err));
 #endif
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
         // Hide the error throwing from the analyser since there isn't a way to express
         // "safepoint only when throwing error" currently.
         jl_errorf("could not load symbol \"%s\":\n%s", symbol, err);
@@ -303,27 +329,21 @@ JL_DLLEXPORT int jl_dlsym(void *handle, const char *symbol, void ** value, int t
 
 #ifdef _OS_WINDOWS_
 //Look for symbols in win32 libraries
-const char *jl_dlfind_win32(const char *f_name)
+JL_DLLEXPORT const char *jl_dlfind_win32(const char *f_name)
 {
     void * dummy;
     if (jl_dlsym(jl_exe_handle, f_name, &dummy, 0))
         return JL_EXE_LIBNAME;
-    if (jl_dlsym(jl_dl_handle, f_name, &dummy, 0))
-        return JL_DL_LIBNAME;
+    if (jl_dlsym(jl_libjulia_internal_handle, f_name, &dummy, 0))
+        return JL_LIBJULIA_INTERNAL_DL_LIBNAME;
+    if (jl_dlsym(jl_libjulia_handle, f_name, &dummy, 0))
+        return JL_LIBJULIA_DL_LIBNAME;
     if (jl_dlsym(jl_kernel32_handle, f_name, &dummy, 0))
         return "kernel32";
+    if (jl_dlsym(jl_crtdll_handle, f_name, &dummy, 0)) // Prefer crtdll over ntdll
+        return jl_crtdll_basename;
     if (jl_dlsym(jl_ntdll_handle, f_name, &dummy, 0))
         return "ntdll";
-    if (jl_dlsym(jl_crtdll_handle, f_name, &dummy, 0))
-#if defined(_MSC_VER)
-#if _MSC_VER == 1800
-        return "msvcr120";
-#else
-#error This version of MSVC has not been tested.
-#endif
-#else
-        return "msvcrt";
-#endif
     if (jl_dlsym(jl_winsock_handle, f_name, &dummy, 0))
         return "ws2_32";
     // additional common libraries (libc?) could be added here, but in general,
@@ -334,8 +354,8 @@ const char *jl_dlfind_win32(const char *f_name)
     // explicit is preferred over implicit
     return NULL;
     // oops, we didn't find it. NULL defaults to searching jl_RTLD_DEFAULT_handle,
-    // which defaults to jl_dl_handle, where we won't find it, and will throw the
-    // appropriate error.
+    // which defaults to jl_libjulia_internal_handle, where we won't find it, and
+    // will throw the appropriate error.
 }
 #endif
 
diff --git a/src/dump.c b/src/dump.c
index 4425ada1ad268f..63c504d5813c7e 100644
--- a/src/dump.c
+++ b/src/dump.c
@@ -8,6 +8,7 @@
 
 #include "julia.h"
 #include "julia_internal.h"
+#include "julia_gcext.h"
 #include "builtin_proto.h"
 #include "serialize.h"
 
@@ -15,24 +16,50 @@
 #include <dlfcn.h>
 #endif
 
-#ifndef _COMPILER_MICROSOFT_
 #include "valgrind.h"
-#else
-#define RUNNING_ON_VALGRIND 0
-#endif
 #include "julia_assert.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+// This file, together with ircode.c, allows (de)serialization between
+// modules and *.ji cache files. `jl_save_incremental` gets called as the final step
+// during package precompilation, and `_jl_restore_incremental` by `using SomePkg`
+// whenever `SomePkg` has not yet been loaded.
+
+// Types, methods, and method instances form a graph that may have cycles, so
+// serialization has to break these cycles. This is handled via "backreferences,"
+// referring to already (de)serialized items by an index. It is critial to ensure
+// that the indexes of these backreferences align precisely during serialization
+// and deserialization, to ensure that these integer indexes mean the same thing
+// under both circumstances. Consequently, if you are modifying this file, be
+// careful to match the sequence, if necessary reserving space for something that will
+// be updated later.
+
+// It is also necessary to save & restore references to externally-defined objects,
+// e.g., for package methods that call methods defined in Base or elsewhere.
+// Consequently during deserialization there's a distinction between "reference"
+// types, methods, and method instances (essentially like a GlobalRef),
+// and "recached" version that refer to the actual entity in the running session.
+// We complete deserialization before beginning the process of recaching,
+// because we need the backreferences during deserialization and the actual
+// objects during recaching.
+
+// Finally, because our backedge graph is not bidirectional, special handling is
+// required to identify backedges from external methods that call internal methods.
+// These get set aside and restored at the end of deserialization.
+
+// Note that one should prioritize deserialization performance over serialization performance,
+// since deserialization may be performed much more often than serialization.
+
+
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
-// hash of definitions for predefined tagged object
+// type => tag hash for a few core types (e.g., Expr, PhiNode, etc)
 static htable_t ser_tag;
-// array of definitions for the predefined tagged object types
-// (reverse of ser_tag)
+// tag => type mapping, the reverse of ser_tag
 static jl_value_t *deser_tag[256];
 // hash of some common symbols, encoded as CommonSym_tag plus 1 byte
 static htable_t common_symbol_tag;
@@ -45,11 +72,15 @@ static jl_value_t *deser_symbols[256];
 static htable_t backref_table;
 static int backref_table_numel;
 static arraylist_t backref_list;
+static htable_t new_code_instance_validate;
 
 // list of (jl_value_t **loc, size_t pos) entries
 // for anything that was flagged by the deserializer for later
-// type-rewriting of some sort
+// type-rewriting of some sort. pos is the index in backref_list.
 static arraylist_t flagref_list;
+// ref => value hash for looking up the "real" entity from
+// the deserialized ref. Used for entities that must be unique,
+// like types, methods, and method instances
 static htable_t uniquing_table;
 
 // list of (size_t pos, (void *f)(jl_value_t*)) entries
@@ -61,8 +92,18 @@ static arraylist_t reinit_list;
 // This is not quite globally rooted, but we take care to only
 // ever assigned rooted values here.
 static jl_array_t *serializer_worklist JL_GLOBALLY_ROOTED;
-
-// inverse of backedges tree
+// external MethodInstances we want to serialize
+static htable_t external_mis;
+// Inference tracks newly-inferred MethodInstances during precompilation
+// and registers them by calling jl_set_newly_inferred
+static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED;
+
+// New roots to add to Methods. These can't be added until after
+// recaching is complete, so we have to hold on to them separately
+// Stored as method => (worklist_key, roots)
+static htable_t queued_method_roots;
+
+// inverse of backedges graph (caller=>callees hash)
 htable_t edges_map;
 
 // list of requested ccallable signatures
@@ -78,7 +119,6 @@ static jl_value_t *jl_idtable_type = NULL;
 static jl_typename_t *jl_idtable_typename = NULL;
 static jl_value_t *jl_bigint_type = NULL;
 static int gmp_limb_size = 0;
-static arraylist_t builtin_typenames;
 
 static void write_uint64(ios_t *s, uint64_t i) JL_NOTSAFEPOINT
 {
@@ -110,6 +150,18 @@ jl_value_t *jl_deser_symbol(uint8_t tag)
     return deser_symbols[tag];
 }
 
+uint64_t jl_worklist_key(jl_array_t *worklist)
+{
+    assert(jl_is_array(worklist));
+    size_t len = jl_array_len(worklist);
+    if (len > 0) {
+        jl_module_t *topmod = (jl_module_t*)jl_array_ptr_ref(worklist, len-1);
+        assert(jl_is_module(topmod));
+        return topmod->build_id;
+    }
+    return 0;
+}
+
 // --- serialize ---
 
 #define jl_serialize_value(s, v) jl_serialize_value_((s), (jl_value_t*)(v), 0)
@@ -133,6 +185,11 @@ static int module_in_worklist(jl_module_t *mod) JL_NOTSAFEPOINT
     return 0;
 }
 
+static int method_instance_in_queue(jl_method_instance_t *mi)
+{
+    return ptrhash_get(&external_mis, mi) != HT_NOTFOUND;
+}
+
 // compute whether a type references something internal to worklist
 // and thus could not have existed before deserialize
 // and thus does not need delayed unique-ing
@@ -143,7 +200,7 @@ static int type_in_worklist(jl_datatype_t *dt) JL_NOTSAFEPOINT
     int i, l = jl_svec_len(dt->parameters);
     for (i = 0; i < l; i++) {
         jl_value_t *p = jl_unwrap_unionall(jl_tparam(dt, i));
-        // XXX: what about Union and TypeVar??
+        // TODO: what about Union and TypeVar??
         if (type_in_worklist((jl_datatype_t*)(jl_is_datatype(p) ? p : jl_typeof(p))))
             return 1;
     }
@@ -189,6 +246,76 @@ static int type_recursively_external(jl_datatype_t *dt) JL_NOTSAFEPOINT
     return 1;
 }
 
+// When we infer external method instances, ensure they link back to the
+// package. Otherwise they might be, e.g., for external macros
+static int has_backedge_to_worklist(jl_method_instance_t *mi, htable_t *visited)
+{
+    void **bp = ptrhash_bp(visited, mi);
+    // HT_NOTFOUND: not yet analyzed
+    // HT_NOTFOUND + 1: doesn't link back
+    // HT_NOTFOUND + 2: does link back
+    if (*bp != HT_NOTFOUND)
+        return (char*)*bp - (char*)HT_NOTFOUND - 1;
+    *bp = (void*)((char*)HT_NOTFOUND + 1);  // preliminarily mark as "not found"
+    jl_module_t *mod = mi->def.module;
+    if (jl_is_method(mod))
+        mod = ((jl_method_t*)mod)->module;
+    assert(jl_is_module(mod));
+    if (mi->precompiled || module_in_worklist(mod)) {
+        *bp = (void*)((char*)HT_NOTFOUND + 2);      // found
+        return 1;
+    }
+    if (!mi->backedges) {
+        return 0;
+    }
+    size_t i, n = jl_array_len(mi->backedges);
+    for (i = 0; i < n; i++) {
+        jl_method_instance_t *be = (jl_method_instance_t*)jl_array_ptr_ref(mi->backedges, i);
+        if (has_backedge_to_worklist(be, visited)) {
+            bp = ptrhash_bp(visited, mi);           // re-acquire since rehashing might change the location
+            *bp = (void*)((char*)HT_NOTFOUND + 2);  // found
+            return 1;
+        }
+    }
+    return 0;
+}
+
+// given the list of MethodInstances that were inferred during the
+// build, select those that are external and have at least one
+// relocatable CodeInstance.
+static size_t queue_external_mis(jl_array_t *list)
+{
+    size_t i, n = 0;
+    htable_t visited;
+    if (list) {
+        assert(jl_is_array(list));
+        size_t n0 = jl_array_len(list);
+        htable_new(&visited, n0);
+        for (i = 0; i < n0; i++) {
+            jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
+            assert(jl_is_method_instance(mi));
+            if (jl_is_method(mi->def.value)) {
+                jl_method_t *m = mi->def.method;
+                if (!module_in_worklist(m->module)) {
+                    jl_code_instance_t *ci = mi->cache;
+                    int relocatable = 0;
+                    while (ci) {
+                        relocatable |= ci->relocatability;
+                        ci = ci->next;
+                    }
+                    if (relocatable && ptrhash_get(&external_mis, mi) == HT_NOTFOUND) {
+                        if (has_backedge_to_worklist(mi, &visited)) {
+                            ptrhash_put(&external_mis, mi, mi);
+                            n++;
+                        }
+                    }
+                }
+            }
+        }
+        htable_free(&visited);
+    }
+    return n;
+}
 
 static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_GC_DISABLED
 {
@@ -197,7 +324,7 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
     if (!internal && jl_unwrap_unionall(dt->name->wrapper) == (jl_value_t*)dt) {
         tag = 6; // external primary type
     }
-    else if (!dt->isconcretetype) {
+    else if (jl_is_tuple_type(dt) ? !dt->isconcretetype : dt->hasfreetypevars) {
         tag = 0; // normal struct
     }
     else if (internal) {
@@ -213,8 +340,8 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
         tag = 11; // external, but definitely new (still needs caching, but not full unique-ing)
     }
     else {
-        // this'll need unique-ing later
-        // flag this in the backref table as special
+        // this is eligible for (and possibly requires) unique-ing later,
+        // so flag this in the backref table as special
         uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, dt);
         assert(*bp != (uintptr_t)HT_NOTFOUND);
         *bp |= 1;
@@ -271,18 +398,14 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
     write_int32(s->s, dt->size);
     int has_instance = (dt->instance != NULL);
     int has_layout = (dt->layout != NULL);
-    write_uint8(s->s, dt->abstract | (dt->mutabl << 1) | (has_layout << 2) | (has_instance << 3));
+    write_uint8(s->s, has_layout | (has_instance << 1));
     write_uint8(s->s, dt->hasfreetypevars
             | (dt->isconcretetype << 1)
             | (dt->isdispatchtuple << 2)
             | (dt->isbitstype << 3)
             | (dt->zeroinit << 4)
-            | (dt->isinlinealloc << 5)
-            | (dt->has_concrete_subtype << 6)
-            | (dt->cached_by_hash << 7));
-    if (!dt->abstract) {
-        write_uint16(s->s, dt->ninitialized);
-    }
+            | (dt->has_concrete_subtype << 5)
+            | (dt->cached_by_hash << 6));
     write_int32(s->s, dt->hash);
 
     if (has_layout) {
@@ -312,7 +435,6 @@ static void jl_serialize_datatype(jl_serializer_state *s, jl_datatype_t *dt) JL_
     if (has_instance)
         jl_serialize_value(s, dt->instance);
     jl_serialize_value(s, dt->name);
-    jl_serialize_value(s, dt->names);
     jl_serialize_value(s, dt->parameters);
     jl_serialize_value(s, dt->super);
     jl_serialize_value(s, dt->types);
@@ -349,18 +471,20 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
     write_int8(s->s, 0);
     jl_serialize_value(s, m->parent);
     void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i += 2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
+    for (i = 0; i < m->bindings.size; i += 2) {
+        if (table[i+1] != HT_NOTFOUND) {
+            jl_serialize_value(s, (jl_value_t*)table[i]);
+            jl_binding_t *b = (jl_binding_t*)table[i+1];
             jl_serialize_value(s, b->name);
-            jl_value_t *e = b->value;
+            jl_value_t *e = jl_atomic_load_relaxed(&b->value);
             if (!b->constp && e && jl_is_cpointer(e) && jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
                 // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                 jl_serialize_cnull(s, jl_typeof(e));
             else
                 jl_serialize_value(s, e);
-            jl_serialize_value(s, b->globalref);
+            jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
             jl_serialize_value(s, b->owner);
+            jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty));
             write_int8(s->s, (b->deprecated<<3) | (b->constp<<2) | (b->exportp<<1) | (b->imported));
         }
     }
@@ -378,13 +502,14 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
     write_uint8(s->s, m->optlevel);
     write_uint8(s->s, m->compile);
     write_uint8(s->s, m->infer);
+    write_uint8(s->s, m->max_methods);
 }
 
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
+static int jl_serialize_generic(jl_serializer_state *s, jl_value_t *v) JL_GC_DISABLED
 {
     if (v == NULL) {
         write_uint8(s->s, TAG_NULL);
-        return;
+        return 1;
     }
 
     void *tag = ptrhash_get(&ser_tag, v);
@@ -393,28 +518,29 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         if (t8 <= LAST_TAG)
             write_uint8(s->s, 0);
         write_uint8(s->s, t8);
-        return;
+        return 1;
     }
+
     if (jl_is_symbol(v)) {
         void *idx = ptrhash_get(&common_symbol_tag, v);
         if (idx != HT_NOTFOUND) {
             write_uint8(s->s, TAG_COMMONSYM);
             write_uint8(s->s, (uint8_t)(size_t)idx);
-            return;
+            return 1;
         }
     }
     else if (v == (jl_value_t*)jl_core_module) {
         write_uint8(s->s, TAG_CORE);
-        return;
+        return 1;
     }
     else if (v == (jl_value_t*)jl_base_module) {
         write_uint8(s->s, TAG_BASE);
-        return;
+        return 1;
     }
 
     if (jl_typeis(v, jl_string_type) && jl_string_len(v) == 0) {
         jl_serialize_value(s, jl_an_empty_string);
-        return;
+        return 1;
     }
     else if (!jl_is_uint8(v)) {
         void **bp = ptrhash_bp(&backref_table, v);
@@ -428,7 +554,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
                 write_uint8(s->s, TAG_BACKREF);
                 write_int32(s->s, pos);
             }
-            return;
+            return 1;
         }
         intptr_t pos = backref_table_numel++;
         if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
@@ -453,6 +579,78 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         ptrhash_put(&backref_table, v, (char*)HT_NOTFOUND + pos + 1);
     }
 
+    return 0;
+}
+
+static void jl_serialize_code_instance(jl_serializer_state *s, jl_code_instance_t *codeinst, int skip_partial_opaque, int internal) JL_GC_DISABLED
+{
+    if (internal > 2) {
+        while (codeinst && !codeinst->relocatability)
+            codeinst = codeinst->next;
+    }
+    if (jl_serialize_generic(s, (jl_value_t*)codeinst)) {
+        return;
+    }
+    assert(codeinst != NULL); // handle by jl_serialize_generic, but this makes clang-sa happy
+
+    int validate = 0;
+    if (codeinst->max_world == ~(size_t)0)
+        validate = 1; // can check on deserialize if this cache entry is still valid
+    int flags = validate << 0;
+    if (codeinst->invoke == jl_fptr_const_return)
+        flags |= 1 << 2;
+    if (codeinst->precompile)
+        flags |= 1 << 3;
+
+    // CodeInstances with PartialOpaque return type are currently not allowed
+    // to be cached. We skip them in serialization here, forcing them to
+    // be re-infered on reload.
+    int write_ret_type = validate || codeinst->min_world == 0;
+    if (write_ret_type && codeinst->rettype_const &&
+            jl_typeis(codeinst->rettype_const, jl_partial_opaque_type)) {
+        if (skip_partial_opaque) {
+            jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, internal);
+            return;
+        }
+        else {
+            jl_error("Cannot serialize CodeInstance with PartialOpaque rettype");
+        }
+    }
+
+    write_uint8(s->s, TAG_CODE_INSTANCE);
+    write_uint8(s->s, flags);
+    write_uint32(s->s, codeinst->ipo_purity_bits);
+    write_uint32(s->s, codeinst->purity_bits);
+    jl_serialize_value(s, (jl_value_t*)codeinst->def);
+    if (write_ret_type) {
+        jl_serialize_value(s, codeinst->inferred);
+        jl_serialize_value(s, codeinst->rettype_const);
+        jl_serialize_value(s, codeinst->rettype);
+        jl_serialize_value(s, codeinst->argescapes);
+    }
+    else {
+        // skip storing useless data
+        jl_serialize_value(s, NULL);
+        jl_serialize_value(s, NULL);
+        jl_serialize_value(s, jl_any_type);
+        jl_serialize_value(s, jl_nothing);
+    }
+    write_uint8(s->s, codeinst->relocatability);
+    jl_serialize_code_instance(s, codeinst->next, skip_partial_opaque, internal);
+}
+
+enum METHOD_SERIALIZATION_MODE {
+    METHOD_INTERNAL = 1,
+    METHOD_EXTERNAL_MT = 2,
+    METHOD_HAS_NEW_ROOTS = 4,
+};
+
+static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
+{
+    if (jl_serialize_generic(s, v)) {
+        return;
+    }
+
     size_t i;
     if (jl_is_svec(v)) {
         size_t l = jl_svec_len(v);
@@ -570,9 +768,17 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
     else if (jl_is_method(v)) {
         write_uint8(s->s, TAG_METHOD);
         jl_method_t *m = (jl_method_t*)v;
-        int internal = 1;
-        internal = module_in_worklist(m->module);
-        if (!internal) {
+        uint64_t key = 0;
+        int serialization_mode = 0, nwithkey = 0;
+        if (m->is_for_opaque_closure || module_in_worklist(m->module))
+            serialization_mode |= METHOD_INTERNAL;
+        if (!(serialization_mode & METHOD_INTERNAL)) {
+            key = jl_worklist_key(serializer_worklist);
+            nwithkey = nroots_with_key(m, key);
+            if (nwithkey > 0)
+                serialization_mode |= METHOD_HAS_NEW_ROOTS;
+        }
+        if (!(serialization_mode & METHOD_INTERNAL)) {
             // flag this in the backref table as special
             uintptr_t *bp = (uintptr_t*)ptrhash_bp(&backref_table, v);
             assert(*bp != (uintptr_t)HT_NOTFOUND);
@@ -580,11 +786,44 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         }
         jl_serialize_value(s, (jl_value_t*)m->sig);
         jl_serialize_value(s, (jl_value_t*)m->module);
-        write_uint8(s->s, internal);
-        if (!internal)
+        if (m->external_mt != NULL) {
+            assert(jl_typeis(m->external_mt, jl_methtable_type));
+            jl_methtable_t *mt = (jl_methtable_t*)m->external_mt;
+            if (!module_in_worklist(mt->module)) {
+                serialization_mode |= METHOD_EXTERNAL_MT;
+            }
+        }
+        write_uint8(s->s, serialization_mode);
+        if (serialization_mode & METHOD_EXTERNAL_MT) {
+            // We reference this method table by module and binding
+            jl_methtable_t *mt = (jl_methtable_t*)m->external_mt;
+            jl_serialize_value(s, mt->module);
+            jl_serialize_value(s, mt->name);
+        }
+        else {
+            jl_serialize_value(s, (jl_value_t*)m->external_mt);
+        }
+        if (!(serialization_mode & METHOD_INTERNAL)) {
+            if (serialization_mode & METHOD_HAS_NEW_ROOTS) {
+                // Serialize the roots that belong to key
+                write_uint64(s->s, key);
+                write_int32(s->s, nwithkey);
+                rle_iter_state rootiter = rle_iter_init(0);
+                uint64_t *rletable = NULL;
+                size_t nblocks2 = 0, nroots = jl_array_len(m->roots);
+                if (m->root_blocks) {
+                    rletable = (uint64_t*)jl_array_data(m->root_blocks);
+                    nblocks2 = jl_array_len(m->root_blocks);
+                }
+                // this visits every item, if it becomes a bottlneck we could hop blocks
+                while (rle_iter_increment(&rootiter, nroots, rletable, nblocks2))
+                    if (rootiter.key == key)
+                        jl_serialize_value(s, jl_array_ptr_ref(m->roots, rootiter.i));
+            }
             return;
+        }
         jl_serialize_value(s, m->specializations);
-        jl_serialize_value(s, m->speckeyset);
+        jl_serialize_value(s, jl_atomic_load_relaxed(&m->speckeyset));
         jl_serialize_value(s, (jl_value_t*)m->name);
         jl_serialize_value(s, (jl_value_t*)m->file);
         write_int32(s->s, m->line);
@@ -594,22 +833,33 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         write_int32(s->s, m->nkw);
         write_int8(s->s, m->isva);
         write_int8(s->s, m->pure);
+        write_int8(s->s, m->is_for_opaque_closure);
+        write_int8(s->s, m->constprop);
+        write_uint8(s->s, m->purity.bits);
         jl_serialize_value(s, (jl_value_t*)m->slot_syms);
         jl_serialize_value(s, (jl_value_t*)m->roots);
+        jl_serialize_value(s, (jl_value_t*)m->root_blocks);
+        write_int32(s->s, m->nroots_sysimg);
         jl_serialize_value(s, (jl_value_t*)m->ccallable);
         jl_serialize_value(s, (jl_value_t*)m->source);
         jl_serialize_value(s, (jl_value_t*)m->unspecialized);
         jl_serialize_value(s, (jl_value_t*)m->generator);
         jl_serialize_value(s, (jl_value_t*)m->invokes);
+        jl_serialize_value(s, (jl_value_t*)m->recursion_relation);
     }
     else if (jl_is_method_instance(v)) {
-        write_uint8(s->s, TAG_METHOD_INSTANCE);
         jl_method_instance_t *mi = (jl_method_instance_t*)v;
+        if (jl_is_method(mi->def.value) && mi->def.method->is_for_opaque_closure) {
+            jl_error("unimplemented: serialization of MethodInstances for OpaqueClosure");
+        }
+        write_uint8(s->s, TAG_METHOD_INSTANCE);
         int internal = 0;
         if (!jl_is_method(mi->def.method))
             internal = 1;
         else if (module_in_worklist(mi->def.method->module))
             internal = 2;
+        else if (ptrhash_get(&external_mis, (void*)mi) != HT_NOTFOUND)
+            internal = 3;
         write_uint8(s->s, internal);
         if (!internal) {
             // also flag this in the backref table as special
@@ -627,12 +877,12 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
         jl_array_t *backedges = mi->backedges;
         if (backedges) {
             // filter backedges to only contain pointers
-            // to items that we will actually store (internal == 2)
+            // to items that we will actually store (internal >= 2)
             size_t ins, i, l = jl_array_len(backedges);
             jl_method_instance_t **b_edges = (jl_method_instance_t**)jl_array_data(backedges);
             for (ins = i = 0; i < l; i++) {
                 jl_method_instance_t *backedge = b_edges[i];
-                if (module_in_worklist(backedge->def.method->module)) {
+                if (module_in_worklist(backedge->def.method->module) || method_instance_in_queue(backedge)) {
                     b_edges[ins++] = backedge;
                 }
             }
@@ -642,33 +892,11 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
                 backedges = NULL;
         }
         jl_serialize_value(s, (jl_value_t*)backedges);
-        jl_serialize_value(s, (jl_value_t*)mi->cache);
+        jl_serialize_value(s, (jl_value_t*)NULL); //callbacks
+        jl_serialize_code_instance(s, mi->cache, 1, internal);
     }
     else if (jl_is_code_instance(v)) {
-        write_uint8(s->s, TAG_CODE_INSTANCE);
-        jl_code_instance_t *codeinst = (jl_code_instance_t*)v;
-        int validate = 0;
-        if (codeinst->max_world == ~(size_t)0)
-            validate = 1; // can check on deserialize if this cache entry is still valid
-        int flags = validate << 0;
-        if (codeinst->invoke == jl_fptr_const_return)
-            flags |= 1 << 2;
-        if (codeinst->precompile)
-            flags |= 1 << 3;
-        write_uint8(s->s, flags);
-        jl_serialize_value(s, (jl_value_t*)codeinst->def);
-        if (validate || codeinst->min_world == 0) {
-            jl_serialize_value(s, codeinst->inferred);
-            jl_serialize_value(s, codeinst->rettype_const);
-            jl_serialize_value(s, codeinst->rettype);
-        }
-        else {
-            // skip storing useless data
-            jl_serialize_value(s, NULL);
-            jl_serialize_value(s, NULL);
-            jl_serialize_value(s, jl_any_type);
-        }
-        jl_serialize_value(s, codeinst->next);
+        jl_serialize_code_instance(s, (jl_code_instance_t*)v, 0, 2);
     }
     else if (jl_typeis(v, jl_module_type)) {
         jl_serialize_module(s, (jl_module_t*)v);
@@ -676,6 +904,9 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
     else if (jl_typeis(v, jl_task_type)) {
         jl_error("Task cannot be serialized");
     }
+    else if (jl_typeis(v, jl_opaque_closure_type)) {
+        jl_error("Live opaque closures cannot be serialized");
+    }
     else if (jl_typeis(v, jl_string_type)) {
         write_uint8(s->s, TAG_STRING);
         write_int32(s->s, jl_string_len(v));
@@ -771,10 +1002,26 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
                 jl_serialize_value(s, tn->wrapper);
                 jl_serialize_value(s, tn->mt);
                 ios_write(s->s, (char*)&tn->hash, sizeof(tn->hash));
+                write_uint8(s->s, tn->abstract | (tn->mutabl << 1) | (tn->mayinlinealloc << 2));
+                write_uint8(s->s, tn->max_methods);
+                if (!tn->abstract)
+                    write_uint16(s->s, tn->n_uninitialized);
+                size_t nb = tn->atomicfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0;
+                write_int32(s->s, nb);
+                if (nb)
+                    ios_write(s->s, (char*)tn->atomicfields, nb);
+                nb = tn->constfields ? (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t) : 0;
+                write_int32(s->s, nb);
+                if (nb)
+                    ios_write(s->s, (char*)tn->constfields, nb);
             }
             return;
         }
 
+        if (jl_is_foreign_type(t)) {
+            jl_error("Cannot serialize instances of foreign datatypes");
+        }
+
         char *data = (char*)jl_data_ptr(v);
         size_t i, j, np = t->layout->npointers;
         uint32_t nf = t->layout->nfields;
@@ -789,7 +1036,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
                         ios_write(s->s, last, prevptr - last);
                     jl_value_t *e = *(jl_value_t**)prevptr;
                     JL_GC_PROMISE_ROOTED(e);
-                    if (t->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) &&
+                    if (t->name->mutabl && e && jl_field_isptr(t, i - 1) && jl_is_cpointer(e) &&
                         jl_unbox_voidpointer(e) != (void*)-1 && jl_unbox_voidpointer(e) != NULL)
                         // reset Ptr fields to C_NULL (but keep MAP_FAILED / INVALID_HANDLE)
                         jl_serialize_cnull(s, jl_typeof(e));
@@ -805,7 +1052,7 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
             }
             if (i == nf)
                 break;
-            if (t->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) {
+            if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i)) && *(void**)ptr != (void*)-1) {
                 if (ptr > last)
                     ios_write(s->s, last, ptr - last);
                 char *n = NULL;
@@ -819,6 +1066,28 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int as_li
     }
 }
 
+// Used to serialize the external method instances queued in queued_method_roots (from newly_inferred)
+static void serialize_htable_keys(jl_serializer_state *s, htable_t *ht, int nitems)
+{
+    write_int32(s->s, nitems);
+    void **table = ht->table;
+    size_t i, n = 0, sz = ht->size;
+    (void)n;
+    for (i = 0; i < sz; i += 2) {
+        if (table[i+1] != HT_NOTFOUND) {
+            jl_serialize_value(s, (jl_value_t*)table[i]);
+            n += 1;
+        }
+    }
+    assert(n == nitems);
+}
+
+// Create the forward-edge map (caller => callees)
+// the intent of these functions is to invert the backedges tree
+// for anything that points to a method not part of the worklist
+// or method instances not in the queue
+//
+// from MethodTables
 static void jl_collect_missing_backedges_to_mod(jl_methtable_t *mt)
 {
     jl_array_t *backedges = mt->backedges;
@@ -826,7 +1095,7 @@ static void jl_collect_missing_backedges_to_mod(jl_methtable_t *mt)
         size_t i, l = jl_array_len(backedges);
         for (i = 1; i < l; i += 2) {
             jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);
+            jl_value_t *missing_callee = jl_array_ptr_ref(backedges, i - 1);  // signature of abstract callee
             jl_array_t **edges = (jl_array_t**)ptrhash_bp(&edges_map, (void*)caller);
             if (*edges == HT_NOTFOUND)
                 *edges = jl_alloc_vec_any(0);
@@ -835,8 +1104,7 @@ static void jl_collect_missing_backedges_to_mod(jl_methtable_t *mt)
     }
 }
 
-// the intent of this function is to invert the backedges tree
-// for anything that points to a method not part of the worklist
+// from MethodInstances
 static void collect_backedges(jl_method_instance_t *callee) JL_GC_DISABLED
 {
     jl_array_t *backedges = callee->backedges;
@@ -853,6 +1121,11 @@ static void collect_backedges(jl_method_instance_t *callee) JL_GC_DISABLED
 }
 
 
+// For functions owned by modules not on the worklist, call this on each method.
+// - if the method is owned by a worklist module, add it to the list of things to be
+//   fully serialized
+// - otherwise (i.e., if it's an external method), check all of its specializations.
+//   Collect backedges from those that are not being fully serialized.
 static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure) JL_GC_DISABLED
 {
     jl_array_t *s = (jl_array_t*)closure;
@@ -866,7 +1139,7 @@ static int jl_collect_methcache_from_mod(jl_typemap_entry_t *ml, void *closure)
         size_t i, l = jl_svec_len(specializations);
         for (i = 0; i < l; i++) {
             jl_method_instance_t *callee = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if (callee != NULL)
+            if ((jl_value_t*)callee != jl_nothing && !method_instance_in_queue(callee))
                 collect_backedges(callee);
         }
     }
@@ -878,7 +1151,10 @@ static void jl_collect_methtable_from_mod(jl_array_t *s, jl_methtable_t *mt) JL_
     jl_typemap_visitor(mt->defs, jl_collect_methcache_from_mod, (void*)s);
 }
 
-static void jl_collect_lambdas_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DISABLED
+// Collect methods of external functions defined by modules in the worklist
+// "extext" = "extending external"
+// Also collect relevant backedges
+static void jl_collect_extext_methods_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DISABLED
 {
     if (module_in_worklist(m))
         return;
@@ -905,7 +1181,17 @@ static void jl_collect_lambdas_from_mod(jl_array_t *s, jl_module_t *m) JL_GC_DIS
                     jl_module_t *child = (jl_module_t*)b->value;
                     if (child != m && child->parent == m && child->name == b->name) {
                         // this is the original/primary binding for the submodule
-                        jl_collect_lambdas_from_mod(s, (jl_module_t*)b->value);
+                        jl_collect_extext_methods_from_mod(s, (jl_module_t*)b->value);
+                    }
+                }
+                else if (jl_is_mtable(b->value)) {
+                    jl_methtable_t *mt = (jl_methtable_t*)b->value;
+                    if (mt->module == m && mt->name == b->name) {
+                        // this is probably an external method table, so let's assume so
+                        // as there is no way to precisely distinguish them,
+                        // and the rest of this serializer does not bother
+                        // to handle any method tables specially
+                        jl_collect_methtable_from_mod(s, (jl_methtable_t*)bv);
                     }
                 }
             }
@@ -931,18 +1217,20 @@ static void jl_collect_backedges_to(jl_method_instance_t *caller, htable_t *all_
     }
 }
 
-static void jl_collect_backedges(jl_array_t *s, jl_array_t *t)
+// Extract `edges` and `ext_targets` from `edges_map`
+// This identifies internal->external edges in the call graph, pulling them out for special treatment.
+static void jl_collect_backedges( /* edges */ jl_array_t *s, /* ext_targets */ jl_array_t *t)
 {
-    htable_t all_targets;
-    htable_t all_callees;
+    htable_t all_targets;         // target => tgtindex mapping
+    htable_t all_callees;         // MIs called by worklist methods (eff. Set{MethodInstance})
     htable_new(&all_targets, 0);
     htable_new(&all_callees, 0);
     size_t i;
-    void **table = edges_map.table;
+    void **table = edges_map.table;    // edges is caller => callees
     for (i = 0; i < edges_map.size; i += 2) {
         jl_method_instance_t *caller = (jl_method_instance_t*)table[i];
         jl_array_t *callees = (jl_array_t*)table[i + 1];
-        if (callees != HT_NOTFOUND && module_in_worklist(caller->def.method->module)) {
+        if (callees != HT_NOTFOUND && (module_in_worklist(caller->def.method->module) || method_instance_in_queue(caller))) {
             size_t i, l = jl_array_len(callees);
             for (i = 0; i < l; i++) {
                 jl_value_t *c = jl_array_ptr_ref(callees, i);
@@ -971,7 +1259,7 @@ static void jl_collect_backedges(jl_array_t *s, jl_array_t *t)
                         size_t min_valid = 0;
                         size_t max_valid = ~(size_t)0;
                         int ambig = 0;
-                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
+                        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
                         if (matches == jl_false) {
                             valid = 0;
                             break;
@@ -1069,9 +1357,11 @@ static void write_module_path(ios_t *s, jl_module_t *depmod) JL_NOTSAFEPOINT
     ios_write(s, mname, slen);
 }
 
-// serialize the global _require_dependencies array of pathnames that
-// are include dependencies
-static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *mod_array)
+// Cache file header
+// Serialize the global Base._require_dependencies array of pathnames that
+// are include dependencies. Also write Preferences and return
+// the location of the srctext "pointer" in the header index.
+static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp)
 {
     int64_t initial_pos = 0;
     int64_t pos = 0;
@@ -1085,10 +1375,11 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
     if (!unique_func)
         unique_func = jl_get_global(jl_base_module, jl_symbol("unique"));
     jl_value_t *uniqargs[2] = {unique_func, (jl_value_t*)deps};
-    size_t last_age = jl_get_ptls_states()->world_age;
-    jl_get_ptls_states()->world_age = jl_world_counter;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     jl_array_t *udeps = (*udepsp = deps && unique_func ? (jl_array_t*)jl_apply(uniqargs, 2) : NULL);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
 
     // write a placeholder for total size so that we can quickly seek past all of the
     // dependencies if we don't need them
@@ -1123,6 +1414,58 @@ static int64_t write_dependency_list(ios_t *s, jl_array_t **udepsp, jl_array_t *
             write_int32(s, 0);
         }
         write_int32(s, 0); // terminator, for ease of reading
+
+        // Calculate Preferences hash for current package.
+        jl_value_t *prefs_hash = NULL;
+        jl_value_t *prefs_list = NULL;
+        JL_GC_PUSH1(&prefs_list);
+        if (jl_base_module) {
+            // Toplevel module is the module we're currently compiling, use it to get our preferences hash
+            jl_value_t * toplevel = (jl_value_t*)jl_get_global(jl_base_module, jl_symbol("__toplevel__"));
+            jl_value_t * prefs_hash_func = jl_get_global(jl_base_module, jl_symbol("get_preferences_hash"));
+            jl_value_t * get_compiletime_prefs_func = jl_get_global(jl_base_module, jl_symbol("get_compiletime_preferences"));
+
+            if (toplevel && prefs_hash_func && get_compiletime_prefs_func) {
+                // Temporary invoke in newest world age
+                size_t last_age = ct->world_age;
+                ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
+
+                // call get_compiletime_prefs(__toplevel__)
+                jl_value_t *args[3] = {get_compiletime_prefs_func, (jl_value_t*)toplevel, NULL};
+                prefs_list = (jl_value_t*)jl_apply(args, 2);
+
+                // Call get_preferences_hash(__toplevel__, prefs_list)
+                args[0] = prefs_hash_func;
+                args[2] = prefs_list;
+                prefs_hash = (jl_value_t*)jl_apply(args, 3);
+
+                // Reset world age to normal
+                ct->world_age = last_age;
+            }
+        }
+
+        // If we successfully got the preferences, write it out, otherwise write `0` for this `.ji` file.
+        if (prefs_hash != NULL && prefs_list != NULL) {
+            size_t i, l = jl_array_len(prefs_list);
+            for (i = 0; i < l; i++) {
+                jl_value_t *pref_name = jl_array_ptr_ref(prefs_list, i);
+                size_t slen = jl_string_len(pref_name);
+                write_int32(s, slen);
+                ios_write(s, jl_string_data(pref_name), slen);
+            }
+            write_int32(s, 0); // terminator
+            write_uint64(s, jl_unbox_uint64(prefs_hash));
+        } else {
+            // This is an error path, but let's at least generate a valid `.ji` file.
+            // We declare an empty list of preference names, followed by a zero-hash.
+            // The zero-hash is not what would be generated for an empty set of preferences,
+            // and so this `.ji` file will be invalidated by a future non-erroring pass
+            // through this function.
+            write_int32(s, 0);
+            write_uint64(s, 0);
+        }
+        JL_GC_POP(); // for prefs_list
+
         // write a dummy file position to indicate the beginning of the source-text
         pos = ios_pos(s);
         ios_seek(s, initial_pos);
@@ -1139,6 +1482,7 @@ static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc
 
 static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_value_t **loc) JL_GC_DISABLED
 {
+    assert(pos == backref_list.len - 1 && "nothing should have been deserialized since assigning pos");
     int tag = read_uint8(s->s);
     if (tag == 6 || tag == 7) {
         jl_typename_t *name = (jl_typename_t*)jl_deserialize_value(s, NULL);
@@ -1154,40 +1498,27 @@ static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_v
         backref_list.items[pos] = dtv;
         return dtv;
     }
-    size_t size = read_int32(s->s);
-    uint8_t flags = read_uint8(s->s);
-    uint8_t memflags = read_uint8(s->s);
-    jl_datatype_t *dt = NULL;
-    if (tag == 0 || tag == 5 || tag == 10 || tag == 11 || tag == 12)
-        dt = jl_new_uninitialized_datatype();
-    else {
+    if (!(tag == 0 || tag == 5 || tag == 10 || tag == 11 || tag == 12)) {
         assert(0 && "corrupt deserialization state");
         abort();
     }
-    assert(pos == backref_list.len - 1 && "nothing should have been deserialized since assigning pos");
+    jl_datatype_t *dt = jl_new_uninitialized_datatype();
     backref_list.items[pos] = dt;
+    if (loc != NULL && loc != HT_NOTFOUND)
+        *loc = (jl_value_t*)dt;
+    size_t size = read_int32(s->s);
+    uint8_t flags = read_uint8(s->s);
+    uint8_t memflags = read_uint8(s->s);
     dt->size = size;
-    dt->abstract = flags & 1;
-    dt->mutabl = (flags >> 1) & 1;
-    int has_layout = (flags >> 2) & 1;
-    int has_instance = (flags >> 3) & 1;
+    int has_layout = flags & 1;
+    int has_instance = (flags >> 1) & 1;
     dt->hasfreetypevars = memflags & 1;
     dt->isconcretetype = (memflags >> 1) & 1;
     dt->isdispatchtuple = (memflags >> 2) & 1;
     dt->isbitstype = (memflags >> 3) & 1;
     dt->zeroinit = (memflags >> 4) & 1;
-    dt->isinlinealloc = (memflags >> 5) & 1;
-    dt->has_concrete_subtype = (memflags >> 6) & 1;
-    dt->cached_by_hash = (memflags >> 7) & 1;
-    dt->types = NULL;
-    dt->parameters = NULL;
-    dt->name = NULL;
-    dt->super = NULL;
-    dt->layout = NULL;
-    if (!dt->abstract)
-        dt->ninitialized = read_uint16(s->s);
-    else
-        dt->ninitialized = 0;
+    dt->has_concrete_subtype = (memflags >> 5) & 1;
+    dt->cached_by_hash = (memflags >> 6) & 1;
     dt->hash = read_int32(s->s);
 
     if (has_layout) {
@@ -1235,8 +1566,6 @@ static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_v
     }
     dt->name = (jl_typename_t*)jl_deserialize_value(s, (jl_value_t**)&dt->name);
     jl_gc_wb(dt, dt->name);
-    dt->names = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->names);
-    jl_gc_wb(dt, dt->names);
     dt->parameters = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&dt->parameters);
     jl_gc_wb(dt, dt->parameters);
     dt->super = (jl_datatype_t*)jl_deserialize_value(s, (jl_value_t**)&dt->super);
@@ -1247,14 +1576,16 @@ static jl_value_t *jl_deserialize_datatype(jl_serializer_state *s, int pos, jl_v
     return (jl_value_t*)dt;
 }
 
-static jl_value_t *jl_deserialize_value_svec(jl_serializer_state *s, uint8_t tag) JL_GC_DISABLED
+static jl_value_t *jl_deserialize_value_svec(jl_serializer_state *s, uint8_t tag, jl_value_t **loc) JL_GC_DISABLED
 {
     size_t i, len;
     if (tag == TAG_SVEC)
         len = read_uint8(s->s);
     else
         len = read_int32(s->s);
-    jl_svec_t *sv = jl_alloc_svec_uninit(len);
+    jl_svec_t *sv = jl_alloc_svec(len);
+    if (loc != NULL)
+        *loc = (jl_value_t*)sv;
     arraylist_push(&backref_list, (jl_value_t*)sv);
     jl_value_t **data = jl_svec_data(sv);
     for (i = 0; i < len; i++) {
@@ -1298,7 +1629,7 @@ static jl_value_t *jl_deserialize_value_array(jl_serializer_state *s, uint8_t ta
         isptr = (elsize >> 15) & 1;
         hasptr = (elsize >> 14) & 1;
         isunion = (elsize >> 13) & 1;
-        elsize = elsize & 0x3fff;
+        elsize = elsize & 0x1fff;
     }
     uintptr_t pos = backref_list.len;
     arraylist_push(&backref_list, NULL);
@@ -1365,22 +1696,52 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
     jl_gc_wb(m, m->sig);
     m->module = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&m->module);
     jl_gc_wb(m, m->module);
-    int internal = read_uint8(s->s);
-    if (!internal) {
+    int serialization_mode = read_uint8(s->s);
+    if (serialization_mode & METHOD_EXTERNAL_MT) {
+        jl_module_t *mt_mod = (jl_module_t*)jl_deserialize_value(s, NULL);
+        jl_sym_t *mt_name = (jl_sym_t*)jl_deserialize_value(s, NULL);
+        m->external_mt = jl_get_global(mt_mod, mt_name);
+        jl_gc_wb(m, m->external_mt);
+        assert(jl_typeis(m->external_mt, jl_methtable_type));
+    }
+    else {
+        m->external_mt = jl_deserialize_value(s, &m->external_mt);
+        jl_gc_wb(m, m->external_mt);
+    }
+    if (!(serialization_mode & METHOD_INTERNAL)) {
         assert(loc != NULL && loc != HT_NOTFOUND);
         arraylist_push(&flagref_list, loc);
         arraylist_push(&flagref_list, (void*)pos);
+        if (serialization_mode & METHOD_HAS_NEW_ROOTS) {
+            uint64_t key = read_uint64(s->s);
+            int i, nnew = read_int32(s->s);
+            jl_array_t *newroots = jl_alloc_vec_any(nnew);
+            jl_value_t **data = (jl_value_t**)jl_array_data(newroots);
+            for (i = 0; i < nnew; i++)
+                data[i] = jl_deserialize_value(s, &(data[i]));
+            // Storing the new roots in `m->roots` risks losing them due to recaching
+            // (which replaces pointers to `m` with ones to the "live" method).
+            // Put them in separate storage so we can find them later.
+            assert(ptrhash_get(&queued_method_roots, m) == HT_NOTFOUND);
+            // In storing the key, on 32-bit platforms we need two slots. Might as well do this for all platforms.
+            jl_svec_t *qmrval = jl_alloc_svec_uninit(3);    // GC is disabled
+            jl_svec_data(qmrval)[0] = (jl_value_t*)(uintptr_t)(key & ((((uint64_t)1) << 32) - 1));          // lo bits
+            jl_svec_data(qmrval)[1] = (jl_value_t*)(uintptr_t)((key >> 32) & ((((uint64_t)1) << 32) - 1));  // hi bits
+            jl_svec_data(qmrval)[2] = (jl_value_t*)newroots;
+            ptrhash_put(&queued_method_roots, m, qmrval);
+        }
         return (jl_value_t*)m;
     }
     m->specializations = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->specializations);
     jl_gc_wb(m, m->specializations);
-    m->speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset);
-    jl_gc_wb(m, m->speckeyset);
+    jl_array_t *speckeyset = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->speckeyset);
+    jl_atomic_store_relaxed(&m->speckeyset, speckeyset);
+    jl_gc_wb(m, speckeyset);
     m->name = (jl_sym_t*)jl_deserialize_value(s, NULL);
     jl_gc_wb(m, m->name);
     m->file = (jl_sym_t*)jl_deserialize_value(s, NULL);
     m->line = read_int32(s->s);
-    m->primary_world = jl_world_counter;
+    m->primary_world = jl_atomic_load_acquire(&jl_world_counter);
     m->deleted_world = ~(size_t)0;
     m->called = read_int32(s->s);
     m->nargs = read_int32(s->s);
@@ -1388,11 +1749,18 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
     m->nkw = read_int32(s->s);
     m->isva = read_int8(s->s);
     m->pure = read_int8(s->s);
+    m->is_for_opaque_closure = read_int8(s->s);
+    m->constprop = read_int8(s->s);
+    m->purity.bits = read_uint8(s->s);
     m->slot_syms = jl_deserialize_value(s, (jl_value_t**)&m->slot_syms);
     jl_gc_wb(m, m->slot_syms);
     m->roots = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->roots);
     if (m->roots)
         jl_gc_wb(m, m->roots);
+    m->root_blocks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&m->root_blocks);
+    if (m->root_blocks)
+        jl_gc_wb(m, m->root_blocks);
+    m->nroots_sysimg = read_int32(s->s);
     m->ccallable = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&m->ccallable);
     if (m->ccallable) {
         jl_gc_wb(m, m->ccallable);
@@ -1409,6 +1777,9 @@ static jl_value_t *jl_deserialize_value_method(jl_serializer_state *s, jl_value_
         jl_gc_wb(m, m->generator);
     m->invokes = jl_deserialize_value(s, (jl_value_t**)&m->invokes);
     jl_gc_wb(m, m->invokes);
+    m->recursion_relation = jl_deserialize_value(s, (jl_value_t**)&m->recursion_relation);
+    if (m->recursion_relation)
+        jl_gc_wb(m, m->recursion_relation);
     JL_MUTEX_INIT(&m->writelock);
     return (jl_value_t*)m;
 }
@@ -1422,6 +1793,10 @@ static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s,
     uintptr_t pos = backref_list.len;
     arraylist_push(&backref_list, mi);
     int internal = read_uint8(s->s);
+    if (internal == 1) {
+        mi->uninferred = jl_deserialize_value(s, &mi->uninferred);
+        jl_gc_wb(mi, mi->uninferred);
+    }
     mi->specTypes = (jl_value_t*)jl_deserialize_value(s, (jl_value_t**)&mi->specTypes);
     jl_gc_wb(mi, mi->specTypes);
     mi->def.value = jl_deserialize_value(s, &mi->def.value);
@@ -1434,15 +1809,14 @@ static jl_value_t *jl_deserialize_value_method_instance(jl_serializer_state *s,
         return (jl_value_t*)mi;
     }
 
-    if (internal == 1) {
-        mi->uninferred = jl_deserialize_value(s, &mi->uninferred);
-        jl_gc_wb(mi, mi->uninferred);
-    }
     mi->sparam_vals = (jl_svec_t*)jl_deserialize_value(s, (jl_value_t**)&mi->sparam_vals);
     jl_gc_wb(mi, mi->sparam_vals);
     mi->backedges = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->backedges);
     if (mi->backedges)
         jl_gc_wb(mi, mi->backedges);
+    mi->callbacks = (jl_array_t*)jl_deserialize_value(s, (jl_value_t**)&mi->callbacks);
+    if (mi->callbacks)
+        jl_gc_wb(mi, mi->callbacks);
     mi->cache = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&mi->cache);
     if (mi->cache)
         jl_gc_wb(mi, mi->cache);
@@ -1458,6 +1832,8 @@ static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl
     int flags = read_uint8(s->s);
     int validate = (flags >> 0) & 3;
     int constret = (flags >> 2) & 1;
+    codeinst->ipo_purity_bits = read_uint32(s->s);
+    codeinst->purity_bits = read_uint32(s->s);
     codeinst->def = (jl_method_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->def);
     jl_gc_wb(codeinst, codeinst->def);
     codeinst->inferred = jl_deserialize_value(s, &codeinst->inferred);
@@ -1467,14 +1843,20 @@ static jl_value_t *jl_deserialize_value_code_instance(jl_serializer_state *s, jl
         jl_gc_wb(codeinst, codeinst->rettype_const);
     codeinst->rettype = jl_deserialize_value(s, &codeinst->rettype);
     jl_gc_wb(codeinst, codeinst->rettype);
+    codeinst->argescapes = jl_deserialize_value(s, &codeinst->argescapes);
+    jl_gc_wb(codeinst, codeinst->argescapes);
     if (constret)
         codeinst->invoke = jl_fptr_const_return;
     if ((flags >> 3) & 1)
         codeinst->precompile = 1;
+    codeinst->relocatability = read_uint8(s->s);
+    assert(codeinst->relocatability <= 1);
     codeinst->next = (jl_code_instance_t*)jl_deserialize_value(s, (jl_value_t**)&codeinst->next);
     jl_gc_wb(codeinst, codeinst->next);
-    if (validate)
-        codeinst->min_world = jl_world_counter;
+    if (validate) {
+        codeinst->min_world = jl_atomic_load_acquire(&jl_world_counter);
+        ptrhash_put(&new_code_instance_validate, codeinst, (void*)(~(uintptr_t)HT_NOTFOUND));   // "HT_FOUND"
+    }
     return (jl_value_t*)codeinst;
 }
 
@@ -1499,17 +1881,21 @@ static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DIS
     jl_gc_wb(m, m->parent);
 
     while (1) {
-        jl_sym_t *name = (jl_sym_t*)jl_deserialize_value(s, NULL);
-        if (name == NULL)
+        jl_sym_t *asname = (jl_sym_t*)jl_deserialize_value(s, NULL);
+        if (asname == NULL)
             break;
-        jl_binding_t *b = jl_get_binding_wr(m, name, 1);
-        b->value = jl_deserialize_value(s, &b->value);
-        jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
-        if (b->value != NULL) jl_gc_wb(m, b->value);
-        b->globalref = jl_deserialize_value(s, &b->globalref);
-        if (b->globalref != NULL) jl_gc_wb(m, b->globalref);
+        jl_binding_t *b = jl_get_binding_wr(m, asname, 1);
+        b->name = (jl_sym_t*)jl_deserialize_value(s, (jl_value_t**)&b->name);
+        jl_value_t *bvalue = jl_deserialize_value(s, (jl_value_t**)&b->value);
+        *(jl_value_t**)&b->value = bvalue;
+        if (bvalue != NULL) jl_gc_wb(m, bvalue);
+        jl_value_t *bglobalref = jl_deserialize_value(s, (jl_value_t**)&b->globalref);
+        *(jl_value_t**)&b->globalref = bglobalref;
+        if (bglobalref != NULL) jl_gc_wb(m, bglobalref);
         b->owner = (jl_module_t*)jl_deserialize_value(s, (jl_value_t**)&b->owner);
         if (b->owner != NULL) jl_gc_wb(m, b->owner);
+        jl_value_t *bty = jl_deserialize_value(s, (jl_value_t**)&b->ty);
+        *(jl_value_t**)&b->ty = bty;
         int8_t flags = read_int8(s->s);
         b->deprecated = (flags>>3) & 1;
         b->constp = (flags>>2) & 1;
@@ -1533,7 +1919,8 @@ static jl_value_t *jl_deserialize_value_module(jl_serializer_state *s) JL_GC_DIS
     m->optlevel = read_int8(s->s);
     m->compile = read_int8(s->s);
     m->infer = read_int8(s->s);
-    m->primary_world = jl_world_counter;
+    m->max_methods = read_int8(s->s);
+    m->primary_world = jl_atomic_load_acquire(&jl_world_counter);
     return (jl_value_t*)m;
 }
 
@@ -1583,7 +1970,7 @@ static void jl_deserialize_struct(jl_serializer_state *s, jl_value_t *v) JL_GC_D
         if (entry->max_world == ~(size_t)0) {
             if (entry->min_world > 1) {
                 // update world validity to reflect current state of the counter
-                entry->min_world = jl_world_counter;
+                entry->min_world = jl_atomic_load_acquire(&jl_world_counter);
             }
         }
         else {
@@ -1612,7 +1999,6 @@ static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag,
             memset(tn, 0, sizeof(jl_typename_t));
             tn->cache = jl_emptysvec; // the cache is refilled later (tag 5)
             tn->linearcache = jl_emptysvec; // the cache is refilled later (tag 5)
-            tn->partial = NULL;
             backref_list.items[pos] = tn;
         }
         jl_module_t *m = (jl_module_t*)jl_deserialize_value(s, NULL);
@@ -1624,9 +2010,29 @@ static jl_value_t *jl_deserialize_value_any(jl_serializer_state *s, uint8_t tag,
             jl_gc_wb(tn, tn->names);
             tn->wrapper = jl_deserialize_value(s, &tn->wrapper);
             jl_gc_wb(tn, tn->wrapper);
+            tn->Typeofwrapper = NULL;
             tn->mt = (jl_methtable_t*)jl_deserialize_value(s, (jl_value_t**)&tn->mt);
             jl_gc_wb(tn, tn->mt);
             ios_read(s->s, (char*)&tn->hash, sizeof(tn->hash));
+            int8_t flags = read_int8(s->s);
+            tn->abstract = flags & 1;
+            tn->mutabl = (flags>>1) & 1;
+            tn->mayinlinealloc = (flags>>2) & 1;
+            tn->max_methods = read_uint8(s->s);
+            if (tn->abstract)
+                tn->n_uninitialized = 0;
+            else
+                tn->n_uninitialized = read_uint16(s->s);
+            size_t nfields = read_int32(s->s);
+            if (nfields) {
+                tn->atomicfields = (uint32_t*)malloc(nfields);
+                ios_read(s->s, (char*)tn->atomicfields, nfields);
+            }
+            nfields = read_int32(s->s);
+            if (nfields) {
+                tn->constfields = (uint32_t*)malloc(nfields);
+                ios_read(s->s, (char*)tn->constfields, nfields);
+            }
         }
         else {
             jl_datatype_t *dt = (jl_datatype_t*)jl_unwrap_unionall(jl_get_global(m, sym));
@@ -1689,7 +2095,7 @@ static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc
         }
         return (jl_value_t*)bp;
     case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
-        return jl_deserialize_value_svec(s, tag);
+        return jl_deserialize_value_svec(s, tag, loc);
     case TAG_COMMONSYM:
         return deser_symbols[read_uint8(s->s)];
     case TAG_SYMBOL: JL_FALLTHROUGH; case TAG_LONG_SYMBOL:
@@ -1788,25 +2194,135 @@ static jl_value_t *jl_deserialize_value(jl_serializer_state *s, jl_value_t **loc
     }
 }
 
+// Add methods to external (non-worklist-owned) functions
 static void jl_insert_methods(jl_array_t *list)
 {
     size_t i, l = jl_array_len(list);
     for (i = 0; i < l; i += 2) {
         jl_method_t *meth = (jl_method_t*)jl_array_ptr_ref(list, i);
-        jl_tupletype_t *simpletype = (jl_tupletype_t*)jl_array_ptr_ref(list, i + 1);
         assert(jl_is_method(meth));
-        jl_methtable_t *mt = jl_method_table_for((jl_value_t*)meth->sig);
+        assert(!meth->is_for_opaque_closure);
+        jl_tupletype_t *simpletype = (jl_tupletype_t*)jl_array_ptr_ref(list, i + 1);
+        jl_methtable_t *mt = jl_method_get_table(meth);
         assert((jl_value_t*)mt != jl_nothing);
         jl_method_table_insert(mt, meth, simpletype);
     }
 }
 
+void remove_code_instance_from_validation(jl_code_instance_t *codeinst)
+{
+    ptrhash_remove(&new_code_instance_validate, codeinst);
+}
+
+static void jl_insert_method_instances(jl_array_t *list)
+{
+    size_t i, l = jl_array_len(list);
+    // Validate the MethodInstances
+    jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
+    memset(jl_array_data(valids), 1, l);
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
+        assert(jl_is_method_instance(mi));
+        if (jl_is_method(mi->def.method)) {
+            // Is this still the method we'd be calling?
+            jl_methtable_t *mt = jl_method_table_for(mi->specTypes);
+            struct jl_typemap_assoc search = {(jl_value_t*)mi->specTypes, world, NULL, 0, ~(size_t)0};
+            jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/1);
+            if (entry) {
+                jl_value_t *mworld = entry->func.value;
+                if (jl_is_method(mworld) && mi->def.method != (jl_method_t*)mworld && jl_type_morespecific(((jl_method_t*)mworld)->sig, mi->def.method->sig)) {
+                    jl_array_uint8_set(valids, i, 0);
+                    invalidate_backedges(&remove_code_instance_from_validation, mi, world, "jl_insert_method_instance");
+                    // The codeinst of this mi haven't yet been removed
+                    jl_code_instance_t *codeinst = mi->cache;
+                    while (codeinst) {
+                        remove_code_instance_from_validation(codeinst);
+                        codeinst = codeinst->next;
+                    }
+                    if (_jl_debug_method_invalidation) {
+                        jl_array_ptr_1d_push(_jl_debug_method_invalidation, mworld);
+                        jl_array_ptr_1d_push(_jl_debug_method_invalidation, jl_cstr_to_string("jl_method_table_insert")); // GC disabled
+                    }
+                }
+            }
+        }
+    }
+    // While it's tempting to just remove the invalidated MIs altogether,
+    // this hurts the ability of SnoopCompile to diagnose problems.
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
+        jl_method_instance_t *milive = jl_specializations_get_or_insert(mi);
+        ptrhash_put(&uniquing_table, mi, milive);  // store the association for the 2nd pass
+    }
+    // We may need to fix up the backedges for the ones that didn't "go live"
+    for (i = 0; i < l; i++) {
+        jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
+        jl_method_instance_t *milive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, mi);
+        if (milive != mi) {
+            // A previously-loaded module compiled this method, so the one we deserialized will be dropped.
+            // But make sure the backedges are copied over.
+            if (mi->backedges) {
+                if (!milive->backedges) {
+                    // Copy all the backedges (after looking up the live ones)
+                    size_t j, n = jl_array_len(mi->backedges);
+                    milive->backedges = jl_alloc_vec_any(n);
+                    jl_gc_wb(milive, milive->backedges);
+                    for (j = 0; j < n; j++) {
+                        jl_method_instance_t *be = (jl_method_instance_t*)jl_array_ptr_ref(mi->backedges, j);
+                        jl_method_instance_t *belive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, be);
+                        if (belive == HT_NOTFOUND)
+                            belive = be;
+                        jl_array_ptr_set(milive->backedges, j, belive);
+                    }
+                } else {
+                    // Copy the missing backedges (this is an O(N^2) algorithm, but many methods have few MethodInstances)
+                    size_t j, k, n = jl_array_len(mi->backedges), nlive = jl_array_len(milive->backedges);
+                    for (j = 0; j < n; j++) {
+                        jl_method_instance_t *be = (jl_method_instance_t*)jl_array_ptr_ref(mi->backedges, j);
+                        jl_method_instance_t *belive = (jl_method_instance_t*)ptrhash_get(&uniquing_table, be);
+                        if (belive == HT_NOTFOUND)
+                            belive = be;
+                        int found = 0;
+                        for (k = 0; k < nlive; k++) {
+                            if (belive == (jl_method_instance_t*)jl_array_ptr_ref(milive->backedges, k)) {
+                                found = 1;
+                                break;
+                            }
+                        }
+                        if (!found)
+                            jl_array_ptr_1d_push(milive->backedges, (jl_value_t*)belive);
+                    }
+                }
+            }
+            // Additionally, if we have CodeInstance(s) and the running CodeInstance is world-limited, transfer it
+            if (mi->cache && jl_array_uint8_ref(valids, i)) {
+                if (!milive->cache || milive->cache->max_world < ~(size_t)0) {
+                    jl_code_instance_t *cilive = milive->cache, *ci;
+                    milive->cache = mi->cache;
+                    jl_gc_wb(milive, milive->cache);
+                    ci = mi->cache;
+                    ci->def = milive;
+                    while (ci->next) {
+                        ci = ci->next;
+                        ci->def = milive;
+                    }
+                    ci->next = cilive;
+                    jl_gc_wb(ci, ci->next);
+                }
+            }
+        }
+    }
+}
+
 // verify that these edges intersect with the same methods as before
 static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
 {
     size_t i, l = jl_array_len(targets) / 2;
     jl_array_t *valids = jl_alloc_array_1d(jl_array_uint8_type, l);
     memset(jl_array_data(valids), 1, l);
+    jl_value_t *loctag = NULL;
+    JL_GC_PUSH1(&loctag);
     *pvalids = valids;
     for (i = 0; i < l; i++) {
         jl_value_t *callee = jl_array_ptr_ref(targets, i * 2);
@@ -1825,7 +2341,7 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
         size_t max_valid = ~(size_t)0;
         int ambig = 0;
         // TODO: possibly need to included ambiguities too (for the optimizer correctness)?
-        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, -1, 0, jl_world_counter, &min_valid, &max_valid, &ambig);
+        jl_value_t *matches = jl_matching_methods((jl_tupletype_t*)sig, jl_nothing, -1, 0, jl_atomic_load_acquire(&jl_world_counter), &min_valid, &max_valid, &ambig);
         if (matches == jl_false || jl_array_len(matches) != jl_array_len(expected)) {
             valid = 0;
         }
@@ -1848,9 +2364,18 @@ static void jl_verify_edges(jl_array_t *targets, jl_array_t **pvalids)
             }
         }
         jl_array_uint8_set(valids, i, valid);
+        if (!valid && _jl_debug_method_invalidation) {
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)callee);
+            loctag = jl_cstr_to_string("insert_backedges_callee");
+            jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
+        }
     }
+    JL_GC_POP();
 }
 
+// Restore backedges to external targets
+// `targets` is [callee1, matches1, ...], the global set of non-worklist callees of worklist-owned methods.
+// `list` = [caller1, targets_indexes1, ...], the list of worklist-owned methods calling external methods.
 static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
 {
     // map(enable, ((list[i] => targets[list[i + 1] .* 2]) for i in 1:2:length(list) if all(valids[list[i + 1]])))
@@ -1862,7 +2387,6 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
     for (i = 0; i < l; i += 2) {
         jl_method_instance_t *caller = (jl_method_instance_t*)jl_array_ptr_ref(list, i);
         assert(jl_is_method_instance(caller) && jl_is_method(caller->def.method));
-        assert(caller->def.method->primary_world == jl_world_counter); // caller should be new
         jl_array_t *idxs_array = (jl_array_t*)jl_array_ptr_ref(list, i + 1);
         assert(jl_isa((jl_value_t*)idxs_array, jl_array_int32_type));
         int32_t *idxs = (int32_t*)jl_array_data(idxs_array);
@@ -1882,19 +2406,29 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
                 }
                 else {
                     jl_methtable_t *mt = jl_method_table_for(callee);
-                    assert((jl_value_t*)mt != jl_nothing);
-                    jl_method_table_add_backedge(mt, callee, (jl_value_t*)caller);
+                    // FIXME: rarely, `callee` has an unexpected `Union` signature,
+                    // see https://github.com/JuliaLang/julia/pull/43990#issuecomment-1030329344
+                    // Fix the issue and turn this back into an `assert((jl_value_t*)mt != jl_nothing)`
+                    // This workaround exposes us to (rare) 265-violations.
+                    if ((jl_value_t*)mt != jl_nothing)
+                        jl_method_table_add_backedge(mt, callee, (jl_value_t*)caller);
                 }
             }
             // then enable it
             jl_code_instance_t *codeinst = caller->cache;
             while (codeinst) {
-                if (codeinst->min_world > 0)
+                if (ptrhash_get(&new_code_instance_validate, codeinst) != HT_NOTFOUND && codeinst->min_world > 0)
                     codeinst->max_world = ~(size_t)0;
+                ptrhash_remove(&new_code_instance_validate, codeinst);  // mark it as handled
                 codeinst = jl_atomic_load_relaxed(&codeinst->next);
             }
         }
         else {
+            jl_code_instance_t *codeinst = caller->cache;
+            while (codeinst) {
+                ptrhash_remove(&new_code_instance_validate, codeinst);  // should be left invalid
+                codeinst = jl_atomic_load_relaxed(&codeinst->next);
+            }
             if (_jl_debug_method_invalidation) {
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)caller);
                 loctag = jl_cstr_to_string("insert_backedges");
@@ -1905,6 +2439,15 @@ static void jl_insert_backedges(jl_array_t *list, jl_array_t *targets)
     JL_GC_POP();
 }
 
+static void validate_new_code_instances(void)
+{
+    size_t i;
+    for (i = 0; i < new_code_instance_validate.size; i += 2) {
+        if (new_code_instance_validate.table[i+1] != HT_NOTFOUND) {
+            ((jl_code_instance_t*)new_code_instance_validate.table[i])->max_world = ~(size_t)0;
+        }
+    }
+}
 
 static jl_value_t *read_verify_mod_list(ios_t *s, jl_array_t *mod_list)
 {
@@ -1995,8 +2538,8 @@ static void jl_reinit_item(jl_value_t *v, int how, arraylist_t *tracee_list)
                 jl_module_t *mod = (jl_module_t*)v;
                 if (mod->parent == mod) // top level modules handled by loader
                     break;
-                jl_binding_t *b = jl_get_binding_wr(mod->parent, mod->name, 1);
-                jl_declare_constant(b); // this can throw
+                jl_binding_t *b = jl_get_binding_wr(mod->parent, mod->name, 1); // this can throw
+                jl_declare_constant(b); // this can also throw
                 if (b->value != NULL) {
                     if (!jl_is_module(b->value)) {
                         jl_errorf("Invalid redefinition of constant %s.",
@@ -2023,11 +2566,12 @@ static void jl_reinit_item(jl_value_t *v, int how, arraylist_t *tracee_list)
         }
     }
     JL_CATCH {
-        jl_printf(JL_STDERR, "WARNING: error while reinitializing value ");
-        jl_static_show(JL_STDERR, v);
-        jl_printf(JL_STDERR, ":\n");
-        jl_static_show(JL_STDERR, jl_current_exception());
-        jl_printf(JL_STDERR, "\n");
+        jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: error while reinitializing value ");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, v);
+        jl_printf((JL_STREAM*)STDERR_FILENO, ":\n");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        jlbacktrace(); // written to STDERR_FILENO
     }
 }
 
@@ -2046,8 +2590,6 @@ static jl_array_t *jl_finalize_deserializer(jl_serializer_state *s, arraylist_t
 
 JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)
 {
-    if (!init_order)
-        return;
     int i, l = jl_array_len(init_order);
     for (i = 0; i < l; i++) {
         jl_value_t *mod = jl_array_ptr_ref(init_order, i);
@@ -2065,6 +2607,15 @@ JL_DLLEXPORT void jl_init_restored_modules(jl_array_t *init_order)
 
 // --- entry points ---
 
+// Register all newly-inferred MethodInstances
+// This gets called as the final step of Base.include_package_for_output
+JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t* _newly_inferred)
+{
+    assert(_newly_inferred == NULL || jl_is_array(_newly_inferred));
+    newly_inferred = (jl_array_t*) _newly_inferred;
+}
+
+// Serialize the modules in `worklist` to file `fname`
 JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
 {
     JL_TIMING(SAVE_MODULE);
@@ -2075,15 +2626,19 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
         return 1;
     }
     JL_GC_PUSH2(&mod_array, &udeps);
-    mod_array = jl_get_loaded_modules();
+    mod_array = jl_get_loaded_modules();  // __toplevel__ modules loaded in this session (from Base.loaded_modules_array)
+    assert(jl_precompile_toplevel_module == NULL);
+    jl_precompile_toplevel_module = (jl_module_t*)jl_array_ptr_ref(worklist, jl_array_len(worklist)-1);
 
     serializer_worklist = worklist;
     write_header(&f);
-    // write description on contents
+    // write description of contents (name, uuid, buildid)
     write_work_list(&f);
-    // write binary blob from caller
-    int64_t srctextpos = write_dependency_list(&f, &udeps, mod_array);
-    // write description of requirements for loading
+    // Determine unique (module, abspath, mtime) dependencies for the files defining modules in the worklist
+    // (see Base._require_dependencies). These get stored in `udeps` and written to the ji-file header.
+    // Also write Preferences.
+    int64_t srctextpos = write_dependency_list(&f, &udeps);  // srctextpos: position of srctext entry in header index (update later)
+    // write description of requirements for loading (modules that must be pre-loaded if initialization is to succeed)
     // this can return errors during deserialize,
     // best to keep it early (before any actual initialization)
     write_mod_list(&f, mod_array);
@@ -2091,6 +2646,7 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
     arraylist_new(&reinit_list, 0);
     htable_new(&edges_map, 0);
     htable_new(&backref_table, 5000);
+    htable_new(&external_mis, newly_inferred ? jl_array_len(newly_inferred) : 0);
     ptrhash_put(&backref_table, jl_main_module, (char*)HT_NOTFOUND + 1);
     backref_table_numel = 1;
     jl_idtable_type = jl_base_module ? jl_get_global(jl_base_module, jl_symbol("IdDict")) : NULL;
@@ -2102,9 +2658,11 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
     }
 
     int en = jl_gc_enable(0); // edges map is not gc-safe
-    jl_array_t *lambdas = jl_alloc_vec_any(0);
-    jl_array_t *edges = jl_alloc_vec_any(0);
-    jl_array_t *targets = jl_alloc_vec_any(0);
+    jl_array_t *extext_methods = jl_alloc_vec_any(0);  // [method1, simplesig1, ...], worklist-owned "extending external" methods added to functions owned by modules outside the worklist
+    jl_array_t *ext_targets = jl_alloc_vec_any(0);     // [callee1, matches1, ...] non-worklist callees of worklist-owned methods
+    jl_array_t *edges = jl_alloc_vec_any(0);           // [caller1, ext_targets_indexes1, ...] for worklist-owned methods calling external methods
+
+    int n_ext_mis = queue_external_mis(newly_inferred);
 
     size_t i;
     size_t len = jl_array_len(mod_array);
@@ -2112,30 +2670,37 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
         jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
         assert(jl_is_module(m));
         if (m->parent == m) // some toplevel modules (really just Base) aren't actually
-            jl_collect_lambdas_from_mod(lambdas, m);
+            jl_collect_extext_methods_from_mod(extext_methods, m);
     }
-    jl_collect_methtable_from_mod(lambdas, jl_type_type_mt);
+    jl_collect_methtable_from_mod(extext_methods, jl_type_type_mt);
     jl_collect_missing_backedges_to_mod(jl_type_type_mt);
-    jl_collect_methtable_from_mod(lambdas, jl_nonfunction_mt);
+    jl_collect_methtable_from_mod(extext_methods, jl_nonfunction_mt);
     jl_collect_missing_backedges_to_mod(jl_nonfunction_mt);
 
-    jl_collect_backedges(edges, targets);
+    // jl_collect_extext_methods_from_mod and jl_collect_missing_backedges_to_mod accumulate data in edges_map.
+    // Process this to extract `edges` and `ext_targets`.
+    jl_collect_backedges(edges, ext_targets);
 
     jl_serializer_state s = {
         &f,
-        jl_get_ptls_states(),
+        jl_current_task->ptls,
         mod_array
     };
-    jl_serialize_value(&s, worklist);
-    jl_serialize_value(&s, lambdas);
+    jl_serialize_value(&s, worklist);   // serialize module-owned items (those accessible from the bindings table)
+    jl_serialize_value(&s, extext_methods);  // serialize new worklist-owned methods for external functions
+    serialize_htable_keys(&s, &external_mis, n_ext_mis);  // serialize external MethodInstances
+
+    // The next two allow us to restore backedges from external "unserialized" (stub-serialized) MethodInstances
+    // to the ones we serialize here
     jl_serialize_value(&s, edges);
-    jl_serialize_value(&s, targets);
+    jl_serialize_value(&s, ext_targets);
     jl_finalize_serializer(&s);
     serializer_worklist = NULL;
 
     jl_gc_enable(en);
     htable_reset(&edges_map, 0);
     htable_reset(&backref_table, 0);
+    htable_reset(&external_mis, 0);
     arraylist_free(&reinit_list);
 
     // Write the source-text for the dependent files
@@ -2185,6 +2750,7 @@ JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist)
     write_int32(&f, 0); // mark the end of the source text
     ios_close(&f);
     JL_GC_POP();
+    jl_precompile_toplevel_module = NULL;
 
     return 0;
 }
@@ -2203,7 +2769,54 @@ STATIC_INLINE jl_value_t *verify_type(jl_value_t *v) JL_NOTSAFEPOINT
 }
 #endif
 
-static jl_datatype_t *jl_recache_type(jl_datatype_t *dt) JL_GC_DISABLED
+
+static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED;
+
+static jl_value_t *recache_type(jl_value_t *p) JL_GC_DISABLED
+{
+    if (jl_is_datatype(p)) {
+        jl_datatype_t *pdt = (jl_datatype_t*)p;
+        if (ptrhash_get(&uniquing_table, p) != HT_NOTFOUND) {
+            p = (jl_value_t*)recache_datatype(pdt);
+        }
+        else {
+            jl_svec_t *tt = pdt->parameters;
+            // ensure all type parameters are recached
+            size_t i, l = jl_svec_len(tt);
+            for (i = 0; i < l; i++)
+                jl_svecset(tt, i, recache_type(jl_svecref(tt, i)));
+            ptrhash_put(&uniquing_table, p, p); // ensures this algorithm isn't too exponential
+        }
+    }
+    else if (jl_is_typevar(p)) {
+        jl_tvar_t *ptv = (jl_tvar_t*)p;
+        ptv->lb = recache_type(ptv->lb);
+        ptv->ub = recache_type(ptv->ub);
+    }
+    else if (jl_is_uniontype(p)) {
+        jl_uniontype_t *pu = (jl_uniontype_t*)p;
+        pu->a = recache_type(pu->a);
+        pu->b = recache_type(pu->b);
+    }
+    else if (jl_is_unionall(p)) {
+        jl_unionall_t *pa = (jl_unionall_t*)p;
+        pa->var = (jl_tvar_t*)recache_type((jl_value_t*)pa->var);
+        pa->body = recache_type(pa->body);
+    }
+    else {
+        jl_datatype_t *pt = (jl_datatype_t*)jl_typeof(p);
+        jl_datatype_t *cachep = recache_datatype(pt);
+        if (cachep->instance)
+            p = cachep->instance;
+        else if (pt != cachep)
+            jl_set_typeof(p, cachep);
+    }
+    return p;
+}
+
+// Extract pre-existing datatypes from cache, and insert new types into cache
+// insertions also update uniquing_table
+static jl_datatype_t *recache_datatype(jl_datatype_t *dt) JL_GC_DISABLED
 {
     jl_datatype_t *t; // the type after unique'ing
     assert(verify_type((jl_value_t*)dt));
@@ -2216,27 +2829,8 @@ static jl_datatype_t *jl_recache_type(jl_datatype_t *dt) JL_GC_DISABLED
     jl_svec_t *tt = dt->parameters;
     // recache all type parameters
     size_t i, l = jl_svec_len(tt);
-    for (i = 0; i < l; i++) {
-        jl_datatype_t *p = (jl_datatype_t*)jl_svecref(tt, i);
-        if (jl_is_datatype(p)) {
-            jl_datatype_t *cachep = jl_recache_type(p);
-            if (p != cachep)
-                jl_svecset(tt, i, cachep);
-        }
-        // XXX: else if (jl_is_typevar(p))
-        // XXX: else if (jl_is_uniontype(p))
-        // XXX: else if (jl_is_unionall(p))
-        else {
-            p = (jl_datatype_t*)jl_typeof(p);
-            jl_datatype_t *cachep = jl_recache_type(p);
-            if (p != cachep) {
-                if (cachep->instance)
-                    jl_svecset(tt, i, cachep->instance);
-                else
-                    jl_set_typeof(jl_svecref(tt, i), cachep);
-            }
-        }
-    }
+    for (i = 0; i < l; i++)
+        jl_svecset(tt, i, recache_type(jl_svecref(tt, i)));
 
     // then recache the type itself
     if (jl_svec_len(tt) == 0) { // jl_cache_type doesn't work if length(parameters) == 0
@@ -2255,6 +2849,8 @@ static jl_datatype_t *jl_recache_type(jl_datatype_t *dt) JL_GC_DISABLED
     return t;
 }
 
+// Recache everything from flagref_list except methods and method instances
+// Cleans out any handled items so that anything left in flagref_list still needs future processing
 static void jl_recache_types(void) JL_GC_DISABLED
 {
     size_t i;
@@ -2274,7 +2870,7 @@ static void jl_recache_types(void) JL_GC_DISABLED
                 dt = (jl_datatype_t*)jl_typeof(o);
                 v = o;
             }
-            jl_datatype_t *t = jl_recache_type(dt);
+            jl_datatype_t *t = recache_datatype(dt); // get or create cached type (also updates uniquing_table)
             if ((jl_value_t*)dt == o && t != dt) {
                 assert(!type_in_worklist(dt));
                 if (loc)
@@ -2293,8 +2889,8 @@ static void jl_recache_types(void) JL_GC_DISABLED
     }
     // invalidate the old datatypes to help catch errors
     for (i = 0; i < uniquing_table.size; i += 2) {
-        jl_datatype_t *o = (jl_datatype_t*)uniquing_table.table[i];
-        jl_datatype_t *t = (jl_datatype_t*)uniquing_table.table[i + 1];
+        jl_datatype_t *o = (jl_datatype_t*)uniquing_table.table[i];      // deserialized ref
+        jl_datatype_t *t = (jl_datatype_t*)uniquing_table.table[i + 1];  // the real type
         if (o != t) {
             assert(t != NULL && jl_is_datatype(o));
             if (t->instance != o->instance)
@@ -2316,7 +2912,7 @@ static void jl_recache_types(void) JL_GC_DISABLED
             flagref_list.len -= 2;
             if (i >= flagref_list.len)
                 break;
-            flagref_list.items[i + 0] = flagref_list.items[flagref_list.len + 0];
+            flagref_list.items[i + 0] = flagref_list.items[flagref_list.len + 0];  // move end-of-list here (executes a `reverse()`)
             flagref_list.items[i + 1] = flagref_list.items[flagref_list.len + 1];
         }
     }
@@ -2334,12 +2930,13 @@ static jl_method_t *jl_lookup_method(jl_methtable_t *mt, jl_datatype_t *sig, siz
 
 static jl_method_t *jl_recache_method(jl_method_t *m)
 {
+    assert(!m->is_for_opaque_closure);
+    assert(jl_is_method(m));
     jl_datatype_t *sig = (jl_datatype_t*)m->sig;
-    jl_methtable_t *mt = jl_method_table_for((jl_value_t*)m->sig);
+    jl_methtable_t *mt = jl_method_get_table(m);
     assert((jl_value_t*)mt != jl_nothing);
     jl_set_typeof(m, (void*)(intptr_t)0x30); // invalidate the old value to help catch errors
-    jl_method_t *_new = jl_lookup_method(mt, sig, m->module->primary_world);
-    return _new;
+    return jl_lookup_method(mt, sig, m->module->primary_world);
 }
 
 static jl_value_t *jl_recache_other_(jl_value_t *o);
@@ -2398,16 +2995,45 @@ static void jl_recache_other(void)
     flagref_list.len = 0;
 }
 
+// Wait to copy roots until recaching is done
+// This is because recaching requires that all pointers to methods and methodinstances
+// stay at their source location as recorded by flagref_list. Once recaching is complete,
+// they can be safely copied over.
+static void jl_copy_roots(void)
+{
+    size_t i, j, l;
+    for (i = 0; i < queued_method_roots.size; i+=2) {
+        jl_method_t *m = (jl_method_t*)queued_method_roots.table[i];
+        m = (jl_method_t*)ptrhash_get(&uniquing_table, m);
+        jl_svec_t *keyroots = (jl_svec_t*)queued_method_roots.table[i+1];
+        if (keyroots != HT_NOTFOUND) {
+            uint64_t key = (uint64_t)(uintptr_t)jl_svec_ref(keyroots, 0) | ((uint64_t)(uintptr_t)jl_svec_ref(keyroots, 1) << 32);
+            jl_array_t *roots = (jl_array_t*)jl_svec_ref(keyroots, 2);
+            assert(jl_is_array(roots));
+            l = jl_array_len(roots);
+            for (j = 0; j < l; j++) {
+                jl_value_t *r = jl_array_ptr_ref(roots, j);
+                jl_value_t *newr = (jl_value_t*)ptrhash_get(&uniquing_table, r);
+                if (newr != HT_NOTFOUND) {
+                    jl_array_ptr_set(roots, j, newr);
+                }
+            }
+            jl_append_method_roots(m, key, roots);
+        }
+    }
+}
+
 static int trace_method(jl_typemap_entry_t *entry, void *closure)
 {
     jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)entry->func.method);
     return 1;
 }
 
+// Restore module(s) from a cache file f
 static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
 {
     JL_TIMING(LOAD_MODULE);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (ios_eof(f) || !jl_read_verify_header(f)) {
         ios_close(f);
         return jl_get_exceptionf(jl_errorexception_type,
@@ -2438,53 +3064,70 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
 
     // prepare to deserialize
     int en = jl_gc_enable(0);
-    jl_gc_enable_finalizers(ptls, 0);
-    ++jl_world_counter; // reserve a world age for the deserialization
+    jl_gc_enable_finalizers(ct, 0);
+    jl_atomic_fetch_add(&jl_world_counter, 1); // reserve a world age for the deserialization
 
     arraylist_new(&backref_list, 4000);
     arraylist_push(&backref_list, jl_main_module);
     arraylist_new(&flagref_list, 0);
+    htable_new(&queued_method_roots, 0);
+    htable_new(&new_code_instance_validate, 0);
     arraylist_new(&ccallable_list, 0);
     htable_new(&uniquing_table, 0);
 
     jl_serializer_state s = {
         f,
-        ptls,
+        ct->ptls,
         mod_array
     };
     jl_array_t *restored = (jl_array_t*)jl_deserialize_value(&s, (jl_value_t**)&restored);
     serializer_worklist = restored;
     assert(jl_isa((jl_value_t*)restored, jl_array_any_type));
 
-    // get list of external generic functions
-    jl_value_t *external_methods = jl_deserialize_value(&s, &external_methods);
-    jl_value_t *external_backedges = jl_deserialize_value(&s, &external_backedges);
-    jl_value_t *external_edges = jl_deserialize_value(&s, &external_edges);
+    // See explanation in jl_save_incremental for variables of the same names
+    jl_value_t *extext_methods = jl_deserialize_value(&s, &extext_methods);
+    int i, n_ext_mis = read_int32(s.s);
+    jl_array_t *mi_list = jl_alloc_vec_any(n_ext_mis);   // reload MIs stored by serialize_htable_keys
+    jl_value_t **midata = (jl_value_t**)jl_array_data(mi_list);
+    for (i = 0; i < n_ext_mis; i++)
+        midata[i] = jl_deserialize_value(&s, &(midata[i]));
+    jl_value_t *edges = jl_deserialize_value(&s, &edges);
+    jl_value_t *ext_targets = jl_deserialize_value(&s, &ext_targets);
 
     arraylist_t *tracee_list = NULL;
-    if (jl_newmeth_tracer)
+    if (jl_newmeth_tracer)  // debugging
         tracee_list = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
 
     // at this point, the AST is fully reconstructed, but still completely disconnected
     // now all of the interconnects will be created
     jl_recache_types(); // make all of the types identities correct
-    htable_reset(&uniquing_table, 0);
-    jl_insert_methods((jl_array_t*)external_methods); // hook up methods of external generic functions (needs to be after recache types)
+    jl_insert_methods((jl_array_t*)extext_methods); // hook up extension methods for external generic functions (needs to be after recache types)
     jl_recache_other(); // make all of the other objects identities correct (needs to be after insert methods)
+    jl_copy_roots();    // copying new roots of external methods (must wait until recaching is complete)
+    // At this point, the novel specializations in mi_list reference the real method, but they haven't been cached in its specializations
+    jl_insert_method_instances(mi_list);   // insert novel specializations
     htable_free(&uniquing_table);
     jl_array_t *init_order = jl_finalize_deserializer(&s, tracee_list); // done with f and s (needs to be after recache)
+    if (init_order == NULL)
+        init_order = (jl_array_t*)jl_an_empty_vec_any;
+    assert(jl_isa((jl_value_t*)init_order, jl_array_any_type));
 
-    JL_GC_PUSH4(&init_order, &restored, &external_backedges, &external_edges);
+    JL_GC_PUSH4(&init_order, &restored, &edges, &ext_targets);
     jl_gc_enable(en); // subtyping can allocate a lot, not valid before recache-other
 
-    jl_insert_backedges((jl_array_t*)external_backedges, (jl_array_t*)external_edges); // restore external backedges (needs to be last)
+    jl_insert_backedges((jl_array_t*)edges, (jl_array_t*)ext_targets); // restore external backedges (needs to be last)
+
+    // check new CodeInstances and validate any that lack external backedges
+    validate_new_code_instances();
 
     serializer_worklist = NULL;
+    htable_free(&new_code_instance_validate);
     arraylist_free(&flagref_list);
     arraylist_free(&backref_list);
+    htable_free(&queued_method_roots);
     ios_close(f);
 
-    jl_gc_enable_finalizers(ptls, 1); // make sure we don't run any Julia code concurrently before this point
+    jl_gc_enable_finalizers(ct, 1); // make sure we don't run any Julia code concurrently before this point
     if (tracee_list) {
         jl_methtable_t *mt;
         while ((mt = (jl_methtable_t*)arraylist_pop(tracee_list)) != NULL) {
@@ -2497,7 +3140,9 @@ static jl_value_t *_jl_restore_incremental(ios_t *f, jl_array_t *mod_array)
     for (int i = 0; i < ccallable_list.len; i++) {
         jl_svec_t *item = (jl_svec_t*)ccallable_list.items[i];
         JL_GC_PROMISE_ROOTED(item);
-        jl_compile_extern_c(NULL, NULL, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
+        int success = jl_compile_extern_c(NULL, NULL, NULL, jl_svecref(item, 0), jl_svecref(item, 1));
+        if (!success)
+            jl_safe_printf("@ccallable was already defined for this method name\n");
     }
     arraylist_free(&ccallable_list);
     jl_value_t *ret = (jl_value_t*)jl_svec(2, restored, init_order);
@@ -2527,13 +3172,13 @@ JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *m
 
 void jl_init_serializer(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     htable_new(&ser_tag, 0);
     htable_new(&common_symbol_tag, 0);
     htable_new(&backref_table, 0);
 
     void *vals[] = { jl_emptysvec, jl_emptytuple, jl_false, jl_true, jl_nothing, jl_any_type,
-                     call_sym, invoke_sym, goto_ifnot_sym, return_sym, jl_symbol("tuple"),
+                     jl_call_sym, jl_invoke_sym, jl_invoke_modify_sym, jl_goto_ifnot_sym, jl_return_sym, jl_symbol("tuple"),
                      jl_an_empty_string, jl_an_empty_vec_any,
 
                      // empirical list of very common symbols
@@ -2554,11 +3199,11 @@ void jl_init_serializer(void)
                      jl_box_int64(12), jl_box_int64(13), jl_box_int64(14),
                      jl_box_int64(15), jl_box_int64(16), jl_box_int64(17),
                      jl_box_int64(18), jl_box_int64(19), jl_box_int64(20),
-                     jl_box_int64(21),
 
                      jl_bool_type, jl_linenumbernode_type, jl_pinode_type,
                      jl_upsilonnode_type, jl_type_type, jl_bottom_type, jl_ref_type,
-                     jl_pointer_type, jl_vararg_type, jl_abstractarray_type, jl_nothing_type,
+                     jl_pointer_type, jl_abstractarray_type, jl_nothing_type,
+                     jl_vararg_type,
                      jl_densearray_type, jl_function_type, jl_typename_type,
                      jl_builtin_type, jl_task_type, jl_uniontype_type,
                      jl_array_any_type, jl_intrinsic_type,
@@ -2570,8 +3215,9 @@ void jl_init_serializer(void)
                      jl_namedtuple_type, jl_array_int32_type,
                      jl_typedslot_type, jl_uint32_type, jl_uint64_type,
                      jl_type_type_mt, jl_nonfunction_mt,
+                     jl_opaque_closure_type,
 
-                     ptls->root_task,
+                     ct->ptls->root_task,
 
                      NULL };
 
@@ -2627,17 +3273,6 @@ void jl_init_serializer(void)
         i += 1;
     }
     assert(i <= 256);
-
-    arraylist_new(&builtin_typenames, 0);
-    arraylist_push(&builtin_typenames, jl_array_typename);
-    arraylist_push(&builtin_typenames, ((jl_datatype_t*)jl_ref_type->body)->name);
-    arraylist_push(&builtin_typenames, jl_pointer_typename);
-    arraylist_push(&builtin_typenames, jl_type_typename);
-    arraylist_push(&builtin_typenames, ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_abstractarray_type))->name);
-    arraylist_push(&builtin_typenames, ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_densearray_type))->name);
-    arraylist_push(&builtin_typenames, jl_tuple_typename);
-    arraylist_push(&builtin_typenames, jl_vararg_typename);
-    arraylist_push(&builtin_typenames, jl_namedtuple_typename);
 }
 
 #ifdef __cplusplus
diff --git a/src/features_aarch64.h b/src/features_aarch64.h
index a6b70b8ffd79e9..88d988a35f03f3 100644
--- a/src/features_aarch64.h
+++ b/src/features_aarch64.h
@@ -35,20 +35,20 @@ JL_FEATURE_DEF(fp16fml, 23, 0) // HWCAP_ASIMDFHM, ARMv8.2-FHM
 JL_FEATURE_DEF(dit, 24, 0) // HWCAP_DIT, ARMv8.4-DIT. Required in ARMv8.4
 // JL_FEATURE_DEF(uscat, 25, UINT32_MAX) // HWCAP_USCAT, ARMv8.4-LSE
 JL_FEATURE_DEF_NAME(rcpc_immo, 26, 0, "rcpc-immo") // HWCAP_ILRCPC, ARMv8.4-RCPC. Required in ARMv8.4
-JL_FEATURE_DEF(fmi, 27, 0) // HWCAP_FLAGM, ARMv8.4-CondM. Requird in ARMv8.4
+JL_FEATURE_DEF(flagm, 27, 120000) // HWCAP_FLAGM, ARMv8.4-CondM. Required in ARMv8.4
 JL_FEATURE_DEF(ssbs, 28, 0) // HWCAP_SSBS
 JL_FEATURE_DEF(sb, 29, 0) // HWCAP_SB. Required in ARMv8.5
-JL_FEATURE_DEF(pa, 30, 0) // HWCAP_PACA
-// JL_FEATURE_DEF(pa, 31, 0) // HWCAP_PACG. Merged with `pa`.
+JL_FEATURE_DEF(pauth, 30, 120000) // HWCAP_PACA
+// JL_FEATURE_DEF(pa, 31, 0) // HWCAP_PACG. Merged with `pauth`.
 
 // hwcap2
 JL_FEATURE_DEF(ccdp, 32 + 0, 0) // HWCAP2_DCPODP, ARMv8.2-DCCVADP. Required in ARMv8.5
-JL_FEATURE_DEF(sve2, 32 + 1, 90000) // HWCAP2_SVE2
-// JL_FEATURE_DEF_NAME(sve2_aes, 32 + 2, 90000, "sve2-aes") // HWCAP2_SVEAES, Implied by `sve2-aes`
-JL_FEATURE_DEF_NAME(sve2_aes, 32 + 3, 90000, "sve2-aes") // HWCAP2_SVEPMULL, ID_AA64ZFR0_EL1.AES == 2
+JL_FEATURE_DEF(sve2, 32 + 1, 0) // HWCAP2_SVE2
+// JL_FEATURE_DEF_NAME(sve2_aes, 32 + 2, 0, "sve2-aes") // HWCAP2_SVEAES, Implied by `sve2-aes`
+JL_FEATURE_DEF_NAME(sve2_aes, 32 + 3, 0, "sve2-aes") // HWCAP2_SVEPMULL, ID_AA64ZFR0_EL1.AES == 2
 JL_FEATURE_DEF_NAME(sve2_bitperm, 32 + 4, 100000, "sve2-bitperm") // HWCAP2_SVEBITPERM
-JL_FEATURE_DEF_NAME(sve2_sha3, 32 + 5, 90000, "sve2-sha3") // HWCAP2_SVESHA3
-JL_FEATURE_DEF_NAME(sve2_sm4, 32 + 6, 90000, "sve2-sm4") // HWCAP2_SM4
+JL_FEATURE_DEF_NAME(sve2_sha3, 32 + 5, 0, "sve2-sha3") // HWCAP2_SVESHA3
+JL_FEATURE_DEF_NAME(sve2_sm4, 32 + 6, 0, "sve2-sm4") // HWCAP2_SM4
 JL_FEATURE_DEF(altnzcv, 32 + 7, 0) // HWCAP2_FLAGM2, ARMv8.5-CondM. Required in ARMv8.5
 JL_FEATURE_DEF(fptoint, 32 + 8, 0) // HWCAP2_FRINT. Required in ARMv8.5
 // JL_FEATURE_DEF(svei8mm, 32 + 9, UINT32_MAX) // HWCAP2_SVEI8MM, ARMv8.2-I8MM. Same as `i8mm`
@@ -60,6 +60,7 @@ JL_FEATURE_DEF(bf16, 32 + 14, 110000) // HWCAP2_BF16, ARMv8.2-BF16. Required in
 // JL_FEATURE_DEF(dgh, 32 + 15, UINT32_MAX) // HWCAP2_DGH, ARMv8.0-DGH. Not implement in LLVM yet
 JL_FEATURE_DEF(rand, 32 + 16, 0) // HWCAP2_RNG, ARMv8.5-RNG
 JL_FEATURE_DEF(bti, 32 + 17, 0) // HWCAP2_BTI
+JL_FEATURE_DEF(mte, 32 + 18, 0) // HWCAP2_MTE, ARMv8.5-MemTag (reserved as of kernel 5.9-rc1)
 
 // custom bits to match llvm model
 JL_FEATURE_DEF(v8_1a, 32 * 2 + 0, 0)
@@ -74,7 +75,6 @@ JL_FEATURE_DEF(v8_6a, 32 * 2 + 5, 110000)
 //     am: ID_AA64PFR0_EL1.AMU (0b1, 0b10)
 //     specrestrict: ID_AA64PFR0_EL1.CSV2 (0b10)
 //     predres: ID_AA64PFR0_EL1.CSV3 (0b1)
-//     mte: ID_AA64PFR1_EL1.MTE (0b1, 0b10)
 //     ecv: ID_AA64MMFR0_EL1.ECV (0b1, 0b10) (LLVM 11)
 //     lor: ID_AA64MMFR1_EL1.LO (0b1)
 //     perfmon: ID_AA64DFR0_EL1.PMUVer (0b1, 0b100, 0b101, 0b110)
diff --git a/src/features_x86.h b/src/features_x86.h
index 6fc8fa0b303e61..3ef71fb217db69 100644
--- a/src/features_x86.h
+++ b/src/features_x86.h
@@ -33,7 +33,7 @@ JL_FEATURE_DEF(bmi, 32 * 2 + 3, 0)
 // JL_FEATURE_DEF(hle, 32 * 2 + 4, 0) // Not used and gone in LLVM 5.0
 JL_FEATURE_DEF(avx2, 32 * 2 + 5, 0)
 JL_FEATURE_DEF(bmi2, 32 * 2 + 8, 0)
-// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Priviledged instruction
+// JL_FEATURE_DEF(invpcid, 32 * 2 + 10, 0) // Privileged instruction
 JL_FEATURE_DEF(rtm, 32 * 2 + 11, 0)
 // JL_FEATURE_DEF(mpx, 32 * 2 + 14, 0) // Deprecated in LLVM 10.0
 JL_FEATURE_DEF(avx512f, 32 * 2 + 16, 0)
@@ -69,12 +69,12 @@ JL_FEATURE_DEF(rdpid, 32 * 3 + 22, 0)
 JL_FEATURE_DEF(cldemote, 32 * 3 + 25, 0)
 JL_FEATURE_DEF(movdiri, 32 * 3 + 27, 0)
 JL_FEATURE_DEF(movdir64b, 32 * 3 + 28, 0)
-JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 90000)
+JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 0)
 
 // EAX=7,ECX=0: EDX
 // JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
 // JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
-JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 90000)
+JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 0)
 JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
 JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
 JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
@@ -108,7 +108,7 @@ JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
 JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)
 
 // EAX=7,ECX=1: EAX
-JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 90000)
+JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 0)
 
 // EAX=0x14,ECX=0: EBX
 JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)
diff --git a/src/flisp/LICENSE b/src/flisp/LICENSE
new file mode 100644
index 00000000000000..bf599268bffe8a
--- /dev/null
+++ b/src/flisp/LICENSE
@@ -0,0 +1,26 @@
+Copyright (c) 2009 Jeff Bezanson
+
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the author nor the names of any contributors may be used to
+      endorse or promote products derived from this software without specific
+      prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/src/flisp/Makefile b/src/flisp/Makefile
index 2158e2a5b24990..7a363b0ec13d72 100644
--- a/src/flisp/Makefile
+++ b/src/flisp/Makefile
@@ -32,23 +32,29 @@ OBJS := $(SRCS:%.c=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%.c=$(BUILDDIR)/%.dbg.obj)
 LLT_release := $(LLT_BUILDDIR)/libsupport.a
 LLT_debug := $(LLT_BUILDDIR)/libsupport-debug.a
-LIBFILES_release := $(LLT_release) $(LIBUV) $(LIBUTF8PROC)
-LIBFILES_debug := $(LLT_debug) $(LIBUV) $(LIBUTF8PROC)
+LIBFILES_release := $(LLT_release) $(LIBUV)
+LIBFILES_debug := $(LLT_debug) $(LIBUV)
 LIBS :=
 ifneq ($(OS),WINNT)
 LIBS += -lpthread
 endif
 
+ifeq ($(USE_SYSTEM_UTF8PROC),0)
+LIBFILES_release += $(LIBUTF8PROC)
+LIBFILES_debug += $(LIBUTF8PROC)
+else
+LIBS += $(LIBUTF8PROC)
+endif
+
+
 FLAGS := -I$(LLTSRCDIR) $(JCFLAGS) $(HFILEDIRS:%=-I%) \
         -I$(LIBUV_INC) -I$(UTF8PROC_INC) -I$(build_includedir) $(LIBDIRS:%=-L%) \
         -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
-ifneq ($(USEMSVC), 1)
 ifneq ($(OS), emscripten)
 FLAGS += -DUSE_COMPUTED_GOTO
 endif
 FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
 FLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
-endif
 
 DEBUGFLAGS += $(FLAGS)
 SHIPFLAGS += $(FLAGS)
@@ -67,8 +73,8 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(DEBUGFLAGS) -c $< -o $@)
 
-FLISP_SRCS := $(flisp.c cvalues.c types.c flisp.h print.c read.c equal.c:%=$(SRCDIR)/%)
-FLMAIN_SRCS := $(flmain.c flisp.h:%=$(SRCDIR)/%)
+FLISP_SRCS := $(addprefix $(SRCDIR)/,flisp.c cvalues.c types.c flisp.h print.c read.c equal.c)
+FLMAIN_SRCS := $(addprefix $(SRCDIR)/,flmain.c flisp.h)
 $(BUILDDIR)/flisp.o: $(FLISP_SRCS)
 $(BUILDDIR)/flisp.dbg.obj: $(FLISP_SRCS)
 $(BUILDDIR)/flmain.o: $(FLMAIN_SRCS)
@@ -87,11 +93,7 @@ $(BUILDDIR)/$(LIBTARGET).a: $(OBJS) | $(BUILDDIR)
 	rm -rf $@
 	@$(call PRINT_LINK, $(AR) -rcs $@ $(OBJS))
 
-ifneq ($(USEMSVC), 1)
 CCLD := $(CC)
-else
-CCLD := $(LD)
-endif
 
 $(BUILDDIR)/$(EXENAME)-debug$(EXE): $(DOBJS) $(LIBFILES_debug) $(BUILDDIR)/$(LIBTARGET)-debug.a $(BUILDDIR)/flmain.dbg.obj | $(BUILDDIR)/flisp.boot
 	@$(call PRINT_LINK, $(CCLD) $(DEBUGFLAGS) $(JLDFLAGS) $(DOBJS) $(BUILDDIR)/flmain.dbg.obj -o $@ $(BUILDDIR)/$(LIBTARGET)-debug.a $(LIBFILES_debug) $(LIBS) $(OSLIBS))
@@ -107,9 +109,13 @@ $(BUILDDIR)/host/Makefile:
 	@echo 'BUILDING_HOST_TOOLS=1' >> $@
 	@echo 'include $(SRCDIR)/Makefile' >> $@
 
-$(BUILDDIR)/host/$(EXENAME): $(BUILDDIR)/host/Makefile
+$(BUILDDIR)/host/$(EXENAME): $(BUILDDIR)/host/Makefile | ${BUILDDIR}/host/flisp.boot
 	make -C $(BUILDDIR)/host $(EXENAME)
 
+
+$(BUILDDIR)/host/flisp.boot: $(SRCDIR)/flisp.boot | $(BUILDDIR)/host/Makefile
+	cp $< $@
+
 ifneq ($(BUILDDIR),.)
 ifneq ($(BUILDDIR),$(SRCDIR))
 $(BUILDDIR)/flisp.boot: $(SRCDIR)/flisp.boot | $(BUILDDIR)
@@ -118,9 +124,7 @@ endif
 endif
 
 test:
-ifneq ($(USEMSVC), 1)
 	$(call spawn,./$(EXENAME)$(EXE)) unittest.lsp
-endif
 
 clean:
 	rm -f $(BUILDDIR)/*.o
diff --git a/src/flisp/flisp.boot b/src/flisp/flisp.boot
index a94b74ec978ace..94feba815b399b 100644
--- a/src/flisp/flisp.boot
+++ b/src/flisp/flisp.boot
@@ -35,10 +35,11 @@
 	  *interactive* #f *syntax-environment*
 	  #table(throw #fn("9000r2c0c1c2c3L2|}L4L2;" [raise list quote
 						      thrown-value])  assert #fn(";000r1c0|]c1c2c3|L2L2L2L4;" [if
-  raise quote assert-failed])  case #fn("@000s1]\x8c6g6c0O2c130c2g7|L2L1c3c4c5g6g7q2}32KL3;" [#fn("8000r2}c0\x8250c0;}\x8540^;}C6=0c1|e2}31L3;}?6=0c3|e2}31L3;}N\x85>0c3|e2}M31L3;e4c5}326=0c6|c7}L2L3;c8|c7}L2L3;" [else
+  raise quote assert-failed])  case #fn("A000s1]\x8c6g6c0O2c130c2g7|L2L1c3c4L1c5c6g6g7q2}3232L3;" [#fn("8000r2}c0\x8250c0;}\x8540^;}C6=0c1|e2}31L3;}?6=0c3|e2}31L3;}N\x85>0c3|e2}M31L3;e4c5}326=0c6|c7}L2L3;c8|c7}L2L3;" [else
   eq? quote-value eqv? every #.symbol? memq quote memv] vals->cond)
-  #fn(gensym) let cond #fn(map) #fn("7000r1~M\x7f|M32|NK;" [])])  unwind-protect #fn("A000r2c030c030c1g7c2_}L3L2L1c3c4|c2g6L1c5g7L1c6g6L2L3L3L3g7L1L3L3;" [#fn(gensym)
-  let lambda prog1 trycatch begin raise])  with-bindings #fn("F000s1c0c1|32c0e2|32c0c3|32c4c5L1c0c6g8g633L1c7c0c8g6g73331c9c:c7}31Kc:c7c0c;g6g83331KL3L144;" [#fn(map)
+  #fn(gensym) let #fn(nconc) cond #fn(map)
+  #fn("7000r1~M\x7f|M32|NK;" [])])  unwind-protect #fn("A000r2c030c030c1g7c2_}L3L2L1c3c4|c2g6L1c5g7L1c6g6L2L3L3L3g7L1L3L3;" [#fn(gensym)
+  let lambda prog1 trycatch begin raise])  with-bindings #fn("G000s1c0c1|32c0e2|32c0c3|32c4c5L1c0c6g8g633L1c7c0c8g6g73331c9c4c:L1c7}3132c4c:L1c7c0c;g6g8333132L3L144;" [#fn(map)
   #.car cadr #fn("5000r1c040;" [#fn(gensym)])
   #fn(nconc) let #.list #fn(copy-list) #fn("7000r2c0|}L3;" [set!])
   unwind-protect begin #fn("7000r2c0|}L3;" [set!])])  time #fn(">000r1c030c1g5c2L1L2L1c3|c4c5c6c2L1g5L3c7L4L3L3;" [#fn(gensym)
@@ -50,8 +51,8 @@
 							       *output-stream*
 							       #fn(copy-list)])  cond #fn(":000s0]\x8c5g5c0g5q1O2g5M|41;" [#fn(">000r1|?640^;|Mg5Mc0<17802g5M]<6C0g5N\x8560g5M;c1g5NK;g5N\x85@0c2g5M~M|N31L3;g5\x84c3\x82\x980e4e5g531316c0e6e5g53131c7g6g5ML2L1c8g6c1e9e5g53131K~M|N31L4L3;c:30c7g6g5ML2L1c8g6e5g531g6L2~M|N31L4L3;c8g5Mc1g5NK~M|N31L4;" [else
   begin or => 1arg-lambda? caddr caadr let if cddr #fn(gensym)] cond-clauses->if)])  with-input-from #fn("<000s1c0c1L1c2|L2L1L1c3}3143;" [#fn(nconc)
-  with-bindings *input-stream* #fn(copy-list)])  quasiquote #fn("6000r1e0|41;" [bq-process])  letrec #fn("=000s1c0c1L1c2c3|32L1c2c4|32c5}3134c2c6|32K;" [#fn(nconc)
-  lambda #fn(map) #.car #fn("7000r1c0c1|31K;" [set! #fn(copy-list)])
+  with-bindings *input-stream* #fn(copy-list)])  quasiquote #fn("7000r1e0|`42;" [bq-process])  letrec #fn(">000s1c0c0c1L1c2c3|32L1c2c4|32c5}3134L1c2c6|3242;" [#fn(nconc)
+  lambda #fn(map) #.car #fn("8000r1c0c1L1c2|3142;" [#fn(nconc) set! #fn(copy-list)])
   #fn(copy-list) #fn("5000r1e040;" [void])])  receive #fn("?000s2c0c1_}L3c2c1L1|L1c3g23133L3;" [call-with-values
   lambda #fn(nconc) #fn(copy-list)])  let* #fn("@000s1|?6E0c0c1L1_L1c2}3133L1;c0c1L1e3|31L1L1c2|NF6H0c0c4L1|NL1c2}3133L1530}3133e5|31L2;" [#fn(nconc)
   lambda #fn(copy-list) caar let* cadar])  when #fn(";000s1c0|c1}K^L4;" [if
@@ -89,14 +90,17 @@
 	  #fn("6000r1|b4[;" [] bcode:sp) bcode:stack #fn("8000r2|b4|b4[}w\\;" [] bcode:stack)
 	  box-vars #fn("9000r2]\x8c6g6c0|g6q2O2g6M\x8e1}41;" [#fn("9000r1|F6Q0|M\x846B0e0~c1e2|M3133530]2\x7fM|N41;];" [emit
   box caddr])] box-vars)
-	  bq-bracket #fn("7000r1|?6<0c0e1|31L2;|Mc2\x8290c0|\x84L2;|Mc3\x8290c4|\x84L2;|Mc5\x8250|\x84;c0e1|31L2;" [#.list
-  bq-process unquote unquote-splicing copy-list unquote-nsplicing] bq-bracket)
-	  bq-process #fn("A000r1]]c0m52c1m62e2|316Z0|H6S0e3e4|3131g7Mc5\x8290c6g7NK;c7c6g7L3;|;|?680c8|L2;|Mc9\x82=0e3e3|\x843141;|Mc:\x8250|\x84;e;g5|327o0e<|31c=g6|32g7\x8580c5g8K;g8N\x85@0c>g8Me3g731L3;c?c@g8Ke3g731L142;|_]g7F16:02g7Mc:<@6H02eAg7M31g8Km82g7Nm75\x0a/2g7F6@0eBg8g7\x84L1325N0g7\x85;0eCg8315@0eBg8e3g731L132g9N\x8560g9M;eDg9b23216J02eDg9Mb23216<02cEeFg931<6@0c>eGg931g9\x84L3;cHg9K;" [#fn("6000r1|F16B02|Mc0<17802|Mc1<17702|c2<;" [unquote-splicing
-  unquote-nsplicing unquote] splice-form?)
-  #fn("6000r1|F16802|Mc0<650|\x84;e1|41;" [unquote bq-process] bq-bracket1)
-  self-evaluating? bq-process vector->list list #.vector #.apply quote
-  quasiquote unquote any lastcdr #fn(map) #.cons #fn(nconc) list* bq-bracket
-  nreconc reverse! length= #.list caar cadar nconc] bq-process)
+	  bq-bracket #fn(";000r2|?6=0c0e1|}32L2;|Mc2\x82R0}`W680c0|NK;c0c3c4e1|N}ax32L3L2;|Mc5\x82S0}`W690c6|\x84L2;c0c0c7e1|\x84}ax32L3L2;|Mc8\x82O0}`W650|\x84;c0c0c9e1|\x84}ax32L3L2;c0e1|}32L2;" [#.list
+  bq-process unquote #.cons 'unquote unquote-splicing copy-list 'unquote-splicing
+  unquote-nsplicing 'unquote-nsplicing] bq-bracket)
+	  bq-bracket1 #fn(":000r2|F16802|Mc0<6K0}`W650|\x84;c1c2e3|N}ax32L3;e3|}42;" [unquote
+  #.cons 'unquote bq-process] bq-bracket1)
+	  bq-process #fn("<000r2|C680c0|L2;|H6T0e1e2|31}32g6Mc3\x8290c4g6NK;c5c4g6L3;|?640|;|Mc6\x82B0c3c7e1|\x84}aw32L3;|Mc8\x82W0}`W16:02e9|b232650|\x84;c:c;e1|N}ax32L3;e<e=|327t0e>|31c?c@}q1|32g6\x8580c3g7K;g7N\x85A0c:g7Me1g6}32L3;cAcBg7Ke1g6}32L142;]\x8c6g6cC}g6q2O2g6M\x8e1|_42;" [quote
+  bq-process vector->list #.list #.vector #.apply quasiquote 'quasiquote
+  unquote length= #.cons 'unquote any splice-form? lastcdr #fn(map)
+  #fn("7000r1e0|~42;" [bq-bracket1]) #fn(nconc)
+  #fn(list*) #fn("=000r2|\x85;0c0e1}31K;|F6n0|Mc2\x82W0c0e3}~`W670|N5C0c4c5L2e6|N~ax32L232K;\x7fM|Ne7|M~32}K42;c0e1e6|~32}K31K;" [nconc
+  reverse! unquote nreconc #.list 'unquote bq-process bq-bracket])] bq-process)
 	  builtin->instruction #fn("8000r1c0~|^43;" [#fn(get)] [#table(#.equal? equal?  #.* *  #.car car  #.apply apply  #.aref aref  #.- -  #.boolean? boolean?  #.builtin? builtin?  #.null? null?  #.eqv? eqv?  #.function? function?  #.bound? bound?  #.cdr cdr  #.list list  #.set-car! set-car!  #.cons cons  #.atom? atom?  #.set-cdr! set-cdr!  #.symbol? symbol?  #.eq? eq?  #.vector vector  #.not not  #.pair? pair?  #.number? number?  #.div0 div0  #.aset! aset!  #.+ +  #.= =  #.compare compare  #.vector? vector?  #./ /  #.< <  #.fixnum? fixnum?)])
 	  caaaar #fn("5000r1|MMMM;" [] caaaar) caaadr
 	  #fn("5000r1|\x84MM;" [] caaadr) caaar #fn("5000r1|MMM;" [] caaar)
@@ -370,6 +374,8 @@
 	  simple-sort #fn("9000r1|A17602|NA640|;|Me0c1g5|q2c2g5q142;" [call-with-values
   #fn("7000r0e0c1~q1\x7fN42;" [separate #fn("6000r1|~X;" [])])
   #fn("9000r2c0e1|31~L1e1}3143;" [#fn(nconc) simple-sort])] simple-sort)
+	  splice-form? #fn("7000r1|F16X02|Mc0<17N02|Mc1<17D02|Mc2<16:02e3|b23217702|c2<;" [unquote-splicing
+  unquote-nsplicing unquote length>] splice-form?)
 	  string.join #fn("9000r2|\x8550c0;c130c2g6|M322c3c4g6}q2|N322c5g641;" [""
   #fn(buffer) #fn(io.write) #fn(for-each)
   #fn("7000r1c0~\x7f322c0~|42;" [#fn(io.write)])
diff --git a/src/flisp/flisp.c b/src/flisp/flisp.c
index 6f311fc7c8c1d4..86421f6d966cf8 100644
--- a/src/flisp/flisp.c
+++ b/src/flisp/flisp.c
@@ -41,6 +41,7 @@
 #include <locale.h>
 #include <limits.h>
 #include <errno.h>
+#include <libgen.h> // defines dirname
 
 #include "platform.h"
 #include "libsupport.h"
@@ -51,13 +52,6 @@
 extern "C" {
 #endif
 
-#if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
-#include <malloc.h>
-JL_DLLEXPORT char * dirname(char *);
-#else
-#include <libgen.h>
-#endif
-
 static const char *const builtin_names[] =
     { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
       NULL, NULL, NULL, NULL,
diff --git a/src/flisp/flisp.h b/src/flisp/flisp.h
index 233c3340d0e48d..209a4f2d4fcdb5 100644
--- a/src/flisp/flisp.h
+++ b/src/flisp/flisp.h
@@ -6,6 +6,14 @@
 
 #include "platform.h"
 #include "libsupport.h"
+#include "utils.h"
+#include "bitvector.h"
+#include "timefuncs.h"
+#include "strtod.h"
+#include "dirpath.h"
+#include "hashing.h"
+#include "ptrhash.h"
+#include "htable.h"
 #include "uv.h"
 
 //#define MEMDEBUG
@@ -204,13 +212,13 @@ typedef struct _ectx_t {
         for(l__ca=1; l__ca; l__ca=0, fl_restorestate(fl_ctx, &_ctx))
 
 #if defined(_OS_WINDOWS_)
-__declspec(noreturn) void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...);
+__declspec(noreturn) void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...) JL_NOTSAFEPOINT;
 __declspec(noreturn) void lerror(fl_context_t *fl_ctx, value_t e, const char *msg) JL_NOTSAFEPOINT;
 __declspec(noreturn) void fl_raise(fl_context_t *fl_ctx, value_t e);
 __declspec(noreturn) void type_error(fl_context_t *fl_ctx, const char *fname, const char *expected, value_t got);
 __declspec(noreturn) void bounds_error(fl_context_t *fl_ctx, const char *fname, value_t arr, value_t ind);
 #else
-void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...) __attribute__ ((__noreturn__));
+void lerrorf(fl_context_t *fl_ctx, value_t e, const char *format, ...) __attribute__ ((__noreturn__)) JL_NOTSAFEPOINT;
 void lerror(fl_context_t *fl_ctx, value_t e, const char *msg) __attribute__((__noreturn__)) JL_NOTSAFEPOINT;
 void fl_raise(fl_context_t *fl_ctx, value_t e) __attribute__ ((__noreturn__));
 void type_error(fl_context_t *fl_ctx, const char *fname, const char *expected, value_t got) __attribute__ ((__noreturn__));
@@ -336,10 +344,10 @@ value_t cvalue_static_cstrn(fl_context_t *fl_ctx, const char *str, size_t n);
 value_t cvalue_static_cstring(fl_context_t *fl_ctx, const char *str);
 value_t string_from_cstr(fl_context_t *fl_ctx, char *str);
 value_t string_from_cstrn(fl_context_t *fl_ctx, char *str, size_t n);
-int fl_isstring(fl_context_t *fl_ctx, value_t v);
-int fl_isnumber(fl_context_t *fl_ctx, value_t v);
-int fl_isgensym(fl_context_t *fl_ctx, value_t v);
-int fl_isiostream(fl_context_t *fl_ctx, value_t v);
+int fl_isstring(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
+int fl_isnumber(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
+int fl_isgensym(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
+int fl_isiostream(fl_context_t *fl_ctx, value_t v) JL_NOTSAFEPOINT;
 ios_t *fl_toiostream(fl_context_t *fl_ctx, value_t v, const char *fname);
 value_t cvalue_compare(value_t a, value_t b);
 int numeric_compare(fl_context_t *fl_ctx, value_t a, value_t b, int eq, int eqnans, char *fname);
@@ -500,7 +508,7 @@ struct _fl_context_t {
     void *jlbuf;
 };
 
-static inline void argcount(fl_context_t *fl_ctx, const char *fname, uint32_t nargs, uint32_t c)
+static inline void argcount(fl_context_t *fl_ctx, const char *fname, uint32_t nargs, uint32_t c) JL_NOTSAFEPOINT
 {
     if (__unlikely(nargs != c))
         lerrorf(fl_ctx, fl_ctx->ArgError,"%s: too %s arguments", fname, nargs<c ? "few":"many");
diff --git a/src/flisp/iostream.c b/src/flisp/iostream.c
index eee28910853c5e..b2b2477bb43c64 100644
--- a/src/flisp/iostream.c
+++ b/src/flisp/iostream.c
@@ -164,20 +164,6 @@ value_t fl_ioputc(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     return fixnum(ios_pututf8(s, wc));
 }
 
-value_t fl_ioungetc(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
-{
-    argcount(fl_ctx, "io.ungetc", nargs, 2);
-    ios_t *s = toiostream(fl_ctx, args[0], "io.ungetc");
-    if (!iscprim(args[1]) || ((cprim_t*)ptr(args[1]))->type != fl_ctx->wchartype)
-        type_error(fl_ctx, "io.ungetc", "wchar", args[1]);
-    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[1]));
-    if (wc >= 0x80) {
-        lerror(fl_ctx, fl_ctx->ArgError, "io_ungetc: unicode not yet supported");
-    }
-    s->u_colno -= utf8proc_charwidth(wc);
-    return fixnum(ios_ungetc((int)wc,s));
-}
-
 value_t fl_ioflush(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     argcount(fl_ctx, "io.flush", nargs, 1);
@@ -217,6 +203,15 @@ value_t fl_iolineno(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     return size_wrap(fl_ctx, s->lineno);
 }
 
+value_t fl_iosetlineno(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+{
+    argcount(fl_ctx, "io.set-lineno!", nargs, 2);
+    ios_t *s = toiostream(fl_ctx, args[0], "io.set-lineno!");
+    size_t new_lineno = tosize(fl_ctx, args[1], "io.set-lineno!");
+    s->lineno = new_lineno;
+    return args[1];
+}
+
 value_t fl_iocolno(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     argcount(fl_ctx, "input-port-column", nargs, 1);
@@ -235,6 +230,17 @@ value_t fl_ioseek(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
     return fl_ctx->T;
 }
 
+value_t fl_ioskip(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+{
+    argcount(fl_ctx, "io.skip", nargs, 2);
+    ios_t *s = toiostream(fl_ctx, args[0], "io.skip");
+    int64_t pos = (ssize_t)tosize(fl_ctx, args[1], "io.skip");
+    int64_t res = ios_skip(s, pos);
+    if (res < 0)
+        return fl_ctx->F;
+    return fl_ctx->T;
+}
+
 value_t fl_iopos(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
 {
     argcount(fl_ctx, "io.pos", nargs, 1);
@@ -428,9 +434,9 @@ static const builtinspec_t iostreamfunc_info[] = {
     { "io.close", fl_ioclose },
     { "io.eof?" , fl_ioeof },
     { "io.seek" , fl_ioseek },
+    { "io.skip" , fl_ioskip },
     { "io.pos",   fl_iopos },
     { "io.getc" , fl_iogetc },
-    { "io.ungetc", fl_ioungetc },
     { "io.putc" , fl_ioputc },
     { "io.peekc" , fl_iopeekc },
     { "io.discardbuffer", fl_iopurge },
@@ -442,6 +448,7 @@ static const builtinspec_t iostreamfunc_info[] = {
     { "io.tostring!", fl_iotostring },
     { "input-port-line", fl_iolineno },
     { "input-port-column", fl_iocolno },
+    { "io.set-lineno!", fl_iosetlineno },
 
     { NULL, NULL }
 };
diff --git a/src/flisp/julia_charmap.h b/src/flisp/julia_charmap.h
index bed88a9ace4cd9..3c54eaf98f484c 100644
--- a/src/flisp/julia_charmap.h
+++ b/src/flisp/julia_charmap.h
@@ -1,7 +1,13 @@
 /* Array of {original codepoint, replacement codepoint} normalizations
    to perform on Julia identifiers, to canonicalize characters that
-   are both easily confused and easily inputted by accident. */
+   are both easily confused and easily inputted by accident.
+
+   Important: when this table is updated, also update the corresponding table
+              in base/strings/unicode.jl */
 static const uint32_t charmap[][2] = {
     { 0x025B, 0x03B5 }, // latin small letter open e -> greek small letter epsilon
     { 0x00B5, 0x03BC }, // micro sign -> greek small letter mu
+    { 0x00B7, 0x22C5 }, // middot char -> dot operator (#25098)
+    { 0x0387, 0x22C5 }, // Greek interpunct -> dot operator (#25098)
+    { 0x2212, 0x002D }, // minus -> hyphen-minus (#26193)
 };
diff --git a/src/flisp/julia_extensions.c b/src/flisp/julia_extensions.c
index 11f17309401c25..9fcd3e9789af4a 100644
--- a/src/flisp/julia_extensions.c
+++ b/src/flisp/julia_extensions.c
@@ -72,7 +72,9 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
             cat == UTF8PROC_CATEGORY_SC ||  // allow currency symbols
             // other symbols, but not arrows or replacement characters
             (cat == UTF8PROC_CATEGORY_SO && !(wc >= 0x2190 && wc <= 0x21FF) &&
-             wc != 0xfffc && wc != 0xfffd) ||
+             wc != 0xfffc && wc != 0xfffd &&
+             wc != 0x233f &&  // notslash
+             wc != 0x00a6) || // broken bar
 
             // math symbol (category Sm) whitelist
             (wc >= 0x2140 && wc <= 0x2a1c &&
@@ -80,9 +82,10 @@ static int is_wc_cat_id_start(uint32_t wc, utf8proc_category_t cat)
               wc == 0x223f || wc == 0x22be || wc == 0x22bf || // ∿, ⊾, ⊿
               wc == 0x22a4 || wc == 0x22a5 ||   // ⊤ ⊥
 
-              (wc >= 0x2202 && wc <= 0x2233 &&
+              (wc >= 0x2200 && wc <= 0x2233 &&
                (wc == 0x2202 || wc == 0x2205 || wc == 0x2206 || // ∂, ∅, ∆
                 wc == 0x2207 || wc == 0x220e || wc == 0x220f || // ∇, ∎, ∏
+                wc == 0x2200 || wc == 0x2203 || wc == 0x2204 || // ∀, ∃, ∄
                 wc == 0x2210 || wc == 0x2211 || // ∐, ∑
                 wc == 0x221e || wc == 0x221f || // ∞, ∟
                 wc >= 0x222b)) || // ∫, ∬, ∭, ∮, ∯, ∰, ∱, ∲, ∳
@@ -152,7 +155,7 @@ JL_DLLEXPORT int jl_id_char(uint32_t wc)
 #include "julia_opsuffs.h"
 
 // chars that can follow an operator (e.g. +) and be parsed as part of the operator
-int jl_op_suffix_char(uint32_t wc)
+JL_DLLEXPORT int jl_op_suffix_char(uint32_t wc)
 {
     static htable_t jl_opsuffs; // XXX: requires uv_once
     if (!jl_opsuffs.size) { // initialize hash table of suffixes
@@ -325,22 +328,22 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg
     ios_t *s = fl_toiostream(fl_ctx, args[1], "accum-julia-symbol");
     if (!iscprim(args[0]) || ((cprim_t*)ptr(args[0]))->type != fl_ctx->wchartype)
         type_error(fl_ctx, "accum-julia-symbol", "wchar", args[0]);
-    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0]));
+    uint32_t wc = *(uint32_t*)cp_data((cprim_t*)ptr(args[0])); // peek the first character we'll read
     ios_t str;
     int allascii = 1;
     ios_mem(&str, 0);
     do {
-        allascii &= (wc <= 0x7f);
         ios_getutf8(s, &wc);
         if (wc == '!') {
             uint32_t nwc = 0;
             ios_peekutf8(s, &nwc);
             // make sure != is always an operator
             if (nwc == '=') {
-                ios_ungetc('!', s);
+                ios_skip(s, -1);
                 break;
             }
         }
+        allascii &= (wc <= 0x7f);
         ios_pututf8(&str, wc);
         if (safe_peekutf8(fl_ctx, s, &wc) == IOS_EOF)
             break;
@@ -349,6 +352,15 @@ value_t fl_accum_julia_symbol(fl_context_t *fl_ctx, value_t *args, uint32_t narg
     return symbol(fl_ctx, allascii ? str.buf : normalize(fl_ctx, str.buf));
 }
 
+/* convert a string to a symbol, first applying normalization */
+value_t fl_string2normsymbol(fl_context_t *fl_ctx, value_t *args, uint32_t nargs)
+{
+    argcount(fl_ctx, "string->normsymbol", nargs, 1);
+    if (!fl_isstring(fl_ctx, args[0]))
+        type_error(fl_ctx, "string->normsymbol", "string", args[0]);
+    return symbol(fl_ctx, normalize(fl_ctx, (char*)cvalue_data(args[0])));
+}
+
 static const builtinspec_t julia_flisp_func_info[] = {
     { "skip-ws", fl_skipws },
     { "accum-julia-symbol", fl_accum_julia_symbol },
@@ -358,6 +370,7 @@ static const builtinspec_t julia_flisp_func_info[] = {
     { "op-suffix-char?", fl_julia_op_suffix_char },
     { "strip-op-suffix", fl_julia_strip_op_suffix },
     { "underscore-symbol?", fl_julia_underscore_symbolp },
+    { "string->normsymbol", fl_string2normsymbol },
     { NULL, NULL }
 };
 
diff --git a/src/flisp/print.c b/src/flisp/print.c
index 789a42133c6b79..2b20d0d98b225d 100644
--- a/src/flisp/print.c
+++ b/src/flisp/print.c
@@ -643,10 +643,10 @@ static void cvalue_printdata(fl_context_t *fl_ctx, ios_t *f, void *data,
             if (init == 0) {
 #if defined(RTLD_SELF)
                 jl_static_print = (size_t (*)(ios_t*, void*))
-                    (uintptr_t)dlsym(RTLD_SELF, "jl_static_show");
+                    (uintptr_t)dlsym(RTLD_SELF, "ijl_static_show");
 #elif defined(RTLD_DEFAULT)
                 jl_static_print = (size_t (*)(ios_t*, void*))
-                    (uintptr_t)dlsym(RTLD_DEFAULT, "jl_static_show");
+                    (uintptr_t)dlsym(RTLD_DEFAULT, "ijl_static_show");
 #elif defined(_OS_WINDOWS_)
                 HMODULE handle;
                 if (GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS |
@@ -654,7 +654,7 @@ static void cvalue_printdata(fl_context_t *fl_ctx, ios_t *f, void *data,
                                        (LPCWSTR)(&cvalue_printdata),
                                        &handle)) {
                     jl_static_print = (size_t (*)(ios_t*, void*))
-                        (uintptr_t)GetProcAddress(handle, "jl_static_show");
+                        (uintptr_t)GetProcAddress(handle, "ijl_static_show");
                 }
 #endif
                 init = 1;
diff --git a/src/flisp/profile.scm b/src/flisp/profile.scm
index f5486996703cf7..64a98326c79299 100644
--- a/src/flisp/profile.scm
+++ b/src/flisp/profile.scm
@@ -69,4 +69,3 @@
           (for-each (lambda (k)
                       (put! *profiles* k (cons 0 (cons 0 0))))
                     (table.keys *profiles*)))))
-
diff --git a/src/flisp/read.c b/src/flisp/read.c
index 494303ef9add7e..9a480e0536c7aa 100644
--- a/src/flisp/read.c
+++ b/src/flisp/read.c
@@ -164,23 +164,25 @@ static void accumchar(fl_context_t *fl_ctx, char c, int *pi)
 // return: 1 if escaped (forced to be symbol)
 static int read_token(fl_context_t *fl_ctx, char c, int digits)
 {
-    int i=0, ch, escaped=0, issym=0, first=1;
+    int i=0, ch, escaped=0, issym=0, nc=0;
 
     while (1) {
-        if (!first) {
-            ch = ios_getc(readF(fl_ctx));
+        if (nc != 0) {
+            if (nc != 1)
+                (void)ios_getc(readF(fl_ctx)); // consume ch
+            ch = ios_peekc(readF(fl_ctx));
             if (ch == IOS_EOF)
                 goto terminate;
             c = (char)ch;
         }
-        first = 0;
         if (c == '|') {
             issym = 1;
             escaped = !escaped;
         }
         else if (c == '\\') {
             issym = 1;
-            ch = ios_getc(readF(fl_ctx));
+            (void)ios_getc(readF(fl_ctx)); // consume '\'
+            ch = ios_peekc(readF(fl_ctx));
             if (ch == IOS_EOF)
                 goto terminate;
             accumchar(fl_ctx, (char)ch, &i);
@@ -191,8 +193,10 @@ static int read_token(fl_context_t *fl_ctx, char c, int digits)
         else {
             accumchar(fl_ctx, c, &i);
         }
+        nc++;
     }
-    ios_ungetc(c, readF(fl_ctx));
+    if (nc == 0)
+        ios_skip(readF(fl_ctx), -1); // rewind stream for the caller, to prepare for throwing an error
  terminate:
     fl_ctx->readbuf[i++] = '\0';
     return issym;
@@ -376,7 +380,7 @@ static uint32_t peek(fl_context_t *fl_ctx)
     }
     else if (c == ',') {
         fl_ctx->readtoktype = TOK_COMMA;
-        ch = ios_getc(readF(fl_ctx));
+        ch = ios_peekc(readF(fl_ctx));
         if (ch == IOS_EOF)
             return fl_ctx->readtoktype;
         if ((char)ch == '@')
@@ -384,7 +388,8 @@ static uint32_t peek(fl_context_t *fl_ctx)
         else if ((char)ch == '.')
             fl_ctx->readtoktype = TOK_COMMADOT;
         else
-            ios_ungetc((char)ch, readF(fl_ctx));
+            return fl_ctx->readtoktype;
+        (void)ios_getc(readF(fl_ctx)); // consume ch
     }
     else {
         if (!read_token(fl_ctx, c, 0)) {
@@ -486,13 +491,15 @@ static value_t read_string(fl_context_t *fl_ctx)
                 free(buf);
                 lerror(fl_ctx, fl_ctx->ParseError, "read: end of input in escape sequence");
             }
-            j=0;
+            j = 0;
             if (octal_digit(c)) {
-                do {
+                while (1) {
                     eseq[j++] = c;
-                    c = ios_getc(readF(fl_ctx));
-                } while (octal_digit(c) && j<3 && (c!=IOS_EOF));
-                if (c!=IOS_EOF) ios_ungetc(c, readF(fl_ctx));
+                    c = ios_peekc(readF(fl_ctx));
+                    if (c == IOS_EOF || !octal_digit(c) || j >= 3)
+                        break;
+                    (void)ios_getc(readF(fl_ctx)); // consume c
+                }
                 eseq[j] = '\0';
                 wc = strtol(eseq, NULL, 8);
                 // \DDD and \xXX read bytes, not characters
@@ -501,12 +508,13 @@ static value_t read_string(fl_context_t *fl_ctx)
             else if ((c=='x' && (ndig=2)) ||
                      (c=='u' && (ndig=4)) ||
                      (c=='U' && (ndig=8))) {
-                c = ios_getc(readF(fl_ctx));
-                while (hex_digit(c) && j<ndig && (c!=IOS_EOF)) {
+                while (1) {
+                    c = ios_peekc(readF(fl_ctx));
+                    if (c == IOS_EOF || !hex_digit(c) || j >= ndig)
+                        break;
                     eseq[j++] = c;
-                    c = ios_getc(readF(fl_ctx));
+                    (void)ios_getc(readF(fl_ctx)); // consume c
                 }
-                if (c!=IOS_EOF) ios_ungetc(c, readF(fl_ctx));
                 eseq[j] = '\0';
                 if (j) wc = strtol(eseq, NULL, 16);
                 if (!j || wc > 0x10ffff) {
diff --git a/src/flisp/system.lsp b/src/flisp/system.lsp
index 482a5b60a665dc..a56c73c88d6da0 100644
--- a/src/flisp/system.lsp
+++ b/src/flisp/system.lsp
@@ -339,59 +339,84 @@
 	   (symbol? x)
            (eq? x (top-level-value x)))))
 
-(define-macro (quasiquote x) (bq-process x))
-
-(define (bq-process x)
-  (define (splice-form? x)
-    (or (and (pair? x) (or (eq? (car x) 'unquote-splicing)
-			   (eq? (car x) 'unquote-nsplicing)))
-	(eq? x 'unquote)))
-  ; bracket without splicing
-  (define (bq-bracket1 x)
-    (if (and (pair? x) (eq? (car x) 'unquote))
-	(cadr x)
-	(bq-process x)))
-  (cond ((self-evaluating? x)
-         (if (vector? x)
-             (let ((body (bq-process (vector->list x))))
-               (if (eq? (car body) 'list)
-                   (cons vector (cdr body))
-		   (list apply vector body)))
-	     x))
-        ((atom? x)                    (list 'quote x))
-        ((eq? (car x) 'quasiquote)    (bq-process (bq-process (cadr x))))
-        ((eq? (car x) 'unquote)       (cadr x))
+(define-macro (quasiquote x) (bq-process x 0))
+
+(define (splice-form? x)
+  (or (and (pair? x) (or (eq? (car x) 'unquote-splicing)
+                         (eq? (car x) 'unquote-nsplicing)
+                         (and (eq? (car x) 'unquote)
+                              (length> x 2))))
+      (eq? x 'unquote)))
+
+;; bracket without splicing
+(define (bq-bracket1 x d)
+  (if (and (pair? x) (eq? (car x) 'unquote))
+      (if (= d 0)
+          (cadr x)
+          (list cons ''unquote
+                (bq-process (cdr x) (- d 1))))
+      (bq-process x d)))
+
+(define (bq-bracket x d)
+  (cond ((atom? x)  (list list (bq-process x d)))
+        ((eq? (car x) 'unquote)
+         (if (= d 0)
+             (cons list (cdr x))
+             (list list (list cons ''unquote
+                              (bq-process (cdr x) (- d 1))))))
+        ((eq? (car x) 'unquote-splicing)
+         (if (= d 0)
+             (list 'copy-list (cadr x))
+             (list list (list list ''unquote-splicing
+                              (bq-process (cadr x) (- d 1))))))
+        ((eq? (car x) 'unquote-nsplicing)
+         (if (= d 0)
+             (cadr x)
+             (list list (list list ''unquote-nsplicing
+                              (bq-process (cadr x) (- d 1))))))
+        (else  (list list (bq-process x d)))))
+
+(define (bq-process x d)
+  (cond ((symbol? x)  (list 'quote x))
+        ((vector? x)
+         (let ((body (bq-process (vector->list x) d)))
+           (if (eq? (car body) list)
+               (cons vector (cdr body))
+               (list apply vector body))))
+        ((atom? x)  x)
+        ((eq? (car x) 'quasiquote)
+         (list list ''quasiquote (bq-process (cadr x) (+ d 1))))
+        ((eq? (car x) 'unquote)
+         (if (and (= d 0) (length= x 2))
+             (cadr x)
+             (list cons ''unquote (bq-process (cdr x) (- d 1)))))
         ((not (any splice-form? x))
          (let ((lc    (lastcdr x))
-               (forms (map bq-bracket1 x)))
+               (forms (map (lambda (x) (bq-bracket1 x d)) x)))
            (if (null? lc)
-               (cons 'list forms)
-	       (if (null? (cdr forms))
-		   (list cons (car forms) (bq-process lc))
-		   (nconc (cons 'list* forms) (list (bq-process lc)))))))
-        (#t (let ((p x) (q ()))
-	      (while (and (pair? p)
-			  (not (eq? (car p) 'unquote)))
-		     (set! q (cons (bq-bracket (car p)) q))
-		     (set! p (cdr p)))
-	      (let ((forms
-		     (cond ((pair? p) (nreconc q (list (cadr p))))
-			   ((null? p)  (reverse! q))
-			   (#t        (nreconc q (list (bq-process p)))))))
-		(if (null? (cdr forms))
-		    (car forms)
-		    (if (and (length= forms 2)
-			     (length= (car forms) 2)
-			     (eq? list (caar forms)))
-			(list cons (cadar forms) (cadr forms))
-			(cons 'nconc forms))))))))
-
-(define (bq-bracket x)
-  (cond ((atom? x)                        (list list (bq-process x)))
-        ((eq? (car x) 'unquote)           (list list (cadr x)))
-        ((eq? (car x) 'unquote-splicing)  (list 'copy-list (cadr x)))
-        ((eq? (car x) 'unquote-nsplicing) (cadr x))
-        (#t                               (list list (bq-process x)))))
+               (cons list forms)
+               (if (null? (cdr forms))
+                   (list cons (car forms) (bq-process lc d))
+                   (nconc (cons list* forms) (list (bq-process lc d)))))))
+        (else
+         (let loop ((p x) (q ()))
+           (cond ((null? p) ;; proper list
+                  (cons 'nconc (reverse! q)))
+                 ((pair? p)
+                  (cond ((eq? (car p) 'unquote)
+                         ;; (... . ,x)
+                         (cons 'nconc
+                               (nreconc q
+                                        (if (= d 0)
+                                            (cdr p)
+                                            (list (list list ''unquote)
+                                                  (bq-process (cdr p)
+                                                              (- d 1)))))))
+                        (else
+                         (loop (cdr p) (cons (bq-bracket (car p) d) q)))))
+                 (else
+                  ;; (... . x)
+                  (cons 'nconc (reverse! (cons (bq-process p d) q)))))))))
 
 ; standard macros -------------------------------------------------------------
 
diff --git a/src/flisp/unittest.lsp b/src/flisp/unittest.lsp
index 0e02b54392fecb..584d5c81225e8c 100644
--- a/src/flisp/unittest.lsp
+++ b/src/flisp/unittest.lsp
@@ -264,4 +264,7 @@
 (assert (not (equal? (hash (iota 41))
 		     (hash (iota 42)))))
 
+(assert (equal? `(a `(b c)) '(a (quasiquote (b c)))))
+(assert (equal? ````x '```x))
+
 #t
diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp
new file mode 100644
index 00000000000000..818d6e803c9dfa
--- /dev/null
+++ b/src/gc-alloc-profiler.cpp
@@ -0,0 +1,152 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "gc-alloc-profiler.h"
+
+#include "julia_internal.h"
+#include "gc.h"
+
+#include <string>
+#include <vector>
+
+using std::string;
+using std::vector;
+
+struct jl_raw_backtrace_t {
+    jl_bt_element_t *data;
+    size_t size;
+};
+
+struct jl_raw_alloc_t {
+    jl_datatype_t *type_address;
+    jl_raw_backtrace_t backtrace;
+    size_t size;
+    void *task;
+    uint64_t timestamp;
+};
+
+// == These structs define the global singleton profile buffer that will be used by
+// callbacks to store profile results. ==
+struct jl_per_thread_alloc_profile_t {
+    vector<jl_raw_alloc_t> allocs;
+};
+
+struct jl_alloc_profile_t {
+    double sample_rate;
+
+    vector<jl_per_thread_alloc_profile_t> per_thread_profiles;
+};
+
+struct jl_combined_results {
+    vector<jl_raw_alloc_t> combined_allocs;
+};
+
+// == Global variables manipulated by callbacks ==
+
+jl_alloc_profile_t g_alloc_profile;
+int g_alloc_profile_enabled = false;
+jl_combined_results g_combined_results; // Will live forever.
+
+// === stack stuff ===
+
+jl_raw_backtrace_t get_raw_backtrace() JL_NOTSAFEPOINT {
+    // We first record the backtrace onto a MAX-sized buffer, so that we don't have to
+    // allocate the buffer until we know the size. To ensure thread-safety, we use a
+    // per-thread backtrace buffer.
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_bt_element_t *shared_bt_data_buffer = ptls->profiling_bt_buffer;
+    if (shared_bt_data_buffer == NULL) {
+        size_t size = sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1);
+        shared_bt_data_buffer = (jl_bt_element_t*) malloc_s(size);
+        ptls->profiling_bt_buffer = shared_bt_data_buffer;
+    }
+
+    size_t bt_size = rec_backtrace(shared_bt_data_buffer, JL_MAX_BT_SIZE, 2);
+
+    // Then we copy only the needed bytes out of the buffer into our profile.
+    size_t bt_bytes = bt_size * sizeof(jl_bt_element_t);
+    jl_bt_element_t *bt_data = (jl_bt_element_t*) malloc_s(bt_bytes);
+    memcpy(bt_data, shared_bt_data_buffer, bt_bytes);
+
+
+    return jl_raw_backtrace_t{
+        bt_data,
+        bt_size
+    };
+}
+
+// == exported interface ==
+
+extern "C" {  // Needed since these functions doesn't take any arguments.
+
+JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate) {
+    // We only need to do this once, the first time this is called.
+    while (g_alloc_profile.per_thread_profiles.size() < (size_t)jl_n_threads) {
+        g_alloc_profile.per_thread_profiles.push_back(jl_per_thread_alloc_profile_t{});
+    }
+
+    g_alloc_profile.sample_rate = sample_rate;
+    g_alloc_profile_enabled = true;
+}
+
+JL_DLLEXPORT jl_profile_allocs_raw_results_t jl_fetch_alloc_profile() {
+    // combine allocs
+    // TODO: interleave to preserve ordering
+    for (auto& profile : g_alloc_profile.per_thread_profiles) {
+        for (const auto& alloc : profile.allocs) {
+            g_combined_results.combined_allocs.push_back(alloc);
+        }
+
+        profile.allocs.clear();
+    }
+
+    return jl_profile_allocs_raw_results_t{
+        g_combined_results.combined_allocs.data(),
+        g_combined_results.combined_allocs.size(),
+    };
+}
+
+JL_DLLEXPORT void jl_stop_alloc_profile() {
+    g_alloc_profile_enabled = false;
+}
+
+JL_DLLEXPORT void jl_free_alloc_profile() {
+    // Free any allocs that remain in the per-thread profiles, that haven't
+    // been combined yet (which happens in fetch_alloc_profiles()).
+    for (auto& profile : g_alloc_profile.per_thread_profiles) {
+        for (auto alloc : profile.allocs) {
+            free(alloc.backtrace.data);
+        }
+        profile.allocs.clear();
+    }
+
+    // Free the allocs that have been already combined into the combined results object.
+    for (auto alloc : g_combined_results.combined_allocs) {
+        free(alloc.backtrace.data);
+    }
+
+    g_combined_results.combined_allocs.clear();
+}
+
+// == callback called into by the outside ==
+
+void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *type) JL_NOTSAFEPOINT {
+    auto& global_profile = g_alloc_profile;
+    auto thread_id = jl_atomic_load_relaxed(&jl_current_task->tid);
+    auto& profile = global_profile.per_thread_profiles[thread_id];
+
+    auto sample_val = double(rand()) / double(RAND_MAX);
+    auto should_record = sample_val <= global_profile.sample_rate;
+    if (!should_record) {
+        return;
+    }
+
+    profile.allocs.emplace_back(jl_raw_alloc_t{
+        type,
+        get_raw_backtrace(),
+        size,
+        (void *)jl_current_task,
+        cycleclock()
+    });
+}
+
+}  // extern "C"
diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h
new file mode 100644
index 00000000000000..8be6fed21a899c
--- /dev/null
+++ b/src/gc-alloc-profiler.h
@@ -0,0 +1,51 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_GC_ALLOC_PROFILER_H
+#define JL_GC_ALLOC_PROFILER_H
+
+#include "julia.h"
+#include "ios.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// ---------------------------------------------------------------------
+// The public interface to call from Julia for allocations profiling
+// ---------------------------------------------------------------------
+
+// Forward-declaration to avoid depenency in header file.
+struct jl_raw_alloc_t;  // Defined in gc-alloc-profiler.cpp
+
+typedef struct {
+    struct jl_raw_alloc_t *allocs;
+    size_t num_allocs;
+} jl_profile_allocs_raw_results_t;
+
+JL_DLLEXPORT void jl_start_alloc_profile(double sample_rate);
+JL_DLLEXPORT jl_profile_allocs_raw_results_t jl_fetch_alloc_profile(void);
+JL_DLLEXPORT void jl_stop_alloc_profile(void);
+JL_DLLEXPORT void jl_free_alloc_profile(void);
+
+// ---------------------------------------------------------------------
+// Functions to call from GC when alloc profiling is enabled
+// ---------------------------------------------------------------------
+
+void _maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT;
+
+extern int g_alloc_profile_enabled;
+
+#define jl_gc_unknown_type_tag ((jl_datatype_t*)0xdeadaa03)
+
+static inline void maybe_record_alloc_to_profile(jl_value_t *val, size_t size, jl_datatype_t *typ) JL_NOTSAFEPOINT {
+    if (__unlikely(g_alloc_profile_enabled)) {
+        _maybe_record_alloc_to_profile(val, size, typ);
+    }
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif  // JL_GC_ALLOC_PROFILER_H
diff --git a/src/gc-debug.c b/src/gc-debug.c
index febab095a409c9..929b68258cc097 100644
--- a/src/gc-debug.c
+++ b/src/gc-debug.c
@@ -282,8 +282,8 @@ void gc_verify(jl_ptls_t ptls)
     }
     restore();
     gc_verify_track(ptls);
-    gc_debug_print_status();
-    gc_debug_critical_error();
+    jl_gc_debug_print_status();
+    jl_gc_debug_critical_error();
     abort();
 }
 #endif
@@ -315,7 +315,6 @@ static void gc_verify_tags_page(jl_gc_pagemeta_t *pg)
         char *cur_page = gc_page_data((char*)halfpages - 1);
         if (cur_page == data) {
             lim = (char*)halfpages - 1;
-            break;
         }
     }
     // compute the freelist_map
@@ -468,10 +467,9 @@ static void gc_debug_alloc_init(jl_alloc_num_t *num, const char *name)
         return;
     if (*env == 'r') {
         env++;
-        srand((unsigned)uv_hrtime());
         for (int i = 0;i < 3;i++) {
             while (num->random[i] == 0) {
-                num->random[i] = rand();
+                num->random[i] = jl_rand();
             }
         }
     }
@@ -497,12 +495,12 @@ int gc_debug_check_pool(void)
     return gc_debug_alloc_check(&jl_gc_debug_env.pool);
 }
 
-int gc_debug_check_other(void)
+int jl_gc_debug_check_other(void)
 {
     return gc_debug_alloc_check(&jl_gc_debug_env.other);
 }
 
-void gc_debug_print_status(void)
+void jl_gc_debug_print_status(void)
 {
     uint64_t pool_count = jl_gc_debug_env.pool.num;
     uint64_t other_count = jl_gc_debug_env.other.num;
@@ -511,9 +509,9 @@ void gc_debug_print_status(void)
                    pool_count + other_count, pool_count, other_count, gc_num.pause);
 }
 
-void gc_debug_critical_error(void)
+void jl_gc_debug_critical_error(void)
 {
-    gc_debug_print_status();
+    jl_gc_debug_print_status();
     if (!jl_gc_debug_env.wait_for_debugger)
         return;
     jl_safe_printf("Waiting for debugger to attach\n");
@@ -522,11 +520,11 @@ void gc_debug_critical_error(void)
     }
 }
 
-void gc_debug_print(void)
+void jl_gc_debug_print(void)
 {
     if (!gc_debug_alloc_check(&jl_gc_debug_env.print))
         return;
-    gc_debug_print_status();
+    jl_gc_debug_print_status();
 }
 
 // a list of tasks for conservative stack scan during gc_scrub
@@ -539,14 +537,14 @@ void gc_scrub_record_task(jl_task_t *t)
 
 static void gc_scrub_range(char *low, char *high)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
     if (jl_setjmp(buf, 0)) {
-        ptls->safe_restore = old_buf;
+        jl_set_safe_restore(old_buf);
         return;
     }
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     low = (char*)((uintptr_t)low & ~(uintptr_t)15);
     for (char **stack_p = ((char**)high) - 1; stack_p > (char**)low; stack_p--) {
         char *p = *stack_p;
@@ -570,20 +568,20 @@ static void gc_scrub_range(char *low, char *high)
         // set mark to GC_MARKED (young and marked)
         tag->bits.gc = GC_MARKED;
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 }
 
 static void gc_scrub_task(jl_task_t *ta)
 {
     int16_t tid = ta->tid;
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_ptls_t ptls2 = NULL;
     if (tid != -1)
         ptls2 = jl_all_tls_states[tid];
 
     char *low;
     char *high;
-    if (ta->copy_stack && ptls2 && ta == ptls2->current_task) {
+    if (ta->copy_stack && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         low  = (char*)ptls2->stackbase - ptls2->stacksize;
         high = (char*)ptls2->stackbase;
     }
@@ -594,7 +592,7 @@ static void gc_scrub_task(jl_task_t *ta)
     else
         return;
 
-    if (ptls == ptls2 && ptls2 && ta == ptls2->current_task) {
+    if (ptls == ptls2 && ptls2 && ta == jl_atomic_load_relaxed(&ptls2->current_task)) {
         // scan up to current `sp` for current thread and task
         low = (char*)jl_get_frame_addr();
     }
@@ -608,11 +606,11 @@ void gc_scrub(void)
     jl_gc_debug_tasks.len = 0;
 }
 #else
-void gc_debug_critical_error(void)
+void jl_gc_debug_critical_error(void)
 {
 }
 
-void gc_debug_print_status(void)
+void jl_gc_debug_print_status(void)
 {
     // May not be accurate but should be helpful enough
     uint64_t pool_count = gc_num.poolalloc;
@@ -978,9 +976,25 @@ void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
                    jl_ns2ms(gc_postmark_end - gc_premark_end),
                    sweep_full ? "full" : "quick", -gc_num.allocd / 1024);
 }
+
+void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
+                     uint64_t freed, uint64_t live, uint64_t interval,
+                     uint64_t pause)
+{
+    if (sweep_full > 0)
+        jl_safe_printf("TS: %" PRIu64 " Major collection: estimate freed = %" PRIu64
+                       " live = %" PRIu64 "m new interval = %" PRIu64 "m time = %" PRIu64 "ms\n",
+                       end - start, freed, live/1024/1024,
+                       interval/1024/1024, pause/1000000 );
+    else
+        jl_safe_printf("TS: %" PRIu64 " Minor collection: estimate freed = %" PRIu64 " live = %" PRIu64
+                       "m new interval = %" PRIu64 "m time = %" PRIu64 "ms\n",
+                       end - start, freed, live/1024/1024,
+                       interval/1024/1024, pause/1000000 );
+}
 #endif
 
-void gc_debug_init(void)
+void jl_gc_debug_init(void)
 {
 #ifdef GC_DEBUG_ENV
     char *env = getenv("JULIA_GC_NO_GENERATIONAL");
@@ -1252,12 +1266,12 @@ int gc_slot_to_arrayidx(void *obj, void *_slot)
 // `pc_offset` will be added to `sp` for convenience in the debugger.
 NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset)
 {
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     if (jl_setjmp(buf, 0) != 0) {
         jl_safe_printf("\n!!! ERROR when unwinding gc mark loop -- ABORTING !!!\n");
-        ptls->safe_restore = old_buf;
+        jl_set_safe_restore(old_buf);
         return;
     }
     void **top = sp.pc + pc_offset;
@@ -1378,7 +1392,24 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_off
             break;
         }
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
+}
+
+static int gc_logging_enabled = 0;
+
+JL_DLLEXPORT void jl_enable_gc_logging(int enable) {
+    gc_logging_enabled = enable;
+}
+
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT {
+    if (!gc_logging_enabled) {
+        return;
+    }
+    jl_safe_printf("GC: pause %.2fms. collected %fMB. %s %s\n",
+        pause/1e6, freed/1e6,
+        full ? "full" : "incr",
+        recollect ? "recollect" : ""
+    );
 }
 
 #ifdef __cplusplus
diff --git a/src/gc-pages.c b/src/gc-pages.c
index 95752eb426cca5..a4ebe0315d71e0 100644
--- a/src/gc-pages.c
+++ b/src/gc-pages.c
@@ -82,7 +82,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
             block_pg_cnt = pg_cnt = min_block_pg_alloc;
         }
         else {
-            JL_UNLOCK_NOGC(&gc_perm_lock);
+            uv_mutex_unlock(&gc_perm_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -110,7 +110,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
             i = REGION_INDEX(ptr);
             info.pagetable_i = i % 32;
             info.pagetable_i32 = i / 32;
-            msk = (1 << info.pagetable_i);
+            msk = (1u << info.pagetable_i);
             if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0)
                 memory_map.freemap1[info.pagetable_i32] |= msk; // has free
             info.pagetable1 = *(ppagetable1 = &memory_map.meta1[i]);
@@ -126,7 +126,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
             i = REGION1_INDEX(ptr);
             info.pagetable1_i = i % 32;
             info.pagetable1_i32 = i / 32;
-            msk = (1 << info.pagetable1_i);
+            msk = (1u << info.pagetable1_i);
             if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0)
                 info.pagetable1->freemap0[info.pagetable1_i32] |= msk; // has free
             info.pagetable0 = *(ppagetable0 = &info.pagetable1->meta0[i]);
@@ -142,7 +142,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
             i = REGION0_INDEX(ptr);
             info.pagetable0_i = i % 32;
             info.pagetable0_i32 = i / 32;
-            msk = (1 << info.pagetable0_i);
+            msk = (1u << info.pagetable0_i);
             info.pagetable0->freemap[info.pagetable0_i32] |= msk; // is free
             pmeta = &info.pagetable0->meta[i];
             info.meta = (*pmeta = &page_meta[pg]);
@@ -159,7 +159,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
                GC_PAGE_SZ * pg_cnt - LLT_ALIGN(GC_PAGE_SZ * pg, jl_page_size));
 #endif
         if (pg == 0) {
-            JL_UNLOCK_NOGC(&gc_perm_lock);
+            uv_mutex_unlock(&gc_perm_lock);
             jl_throw(jl_memory_exception);
         }
     }
@@ -171,7 +171,7 @@ static jl_gc_pagemeta_t *jl_gc_alloc_new_page(void) JL_NOTSAFEPOINT
 NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
 {
     struct jl_gc_metadata_ext info;
-    JL_LOCK_NOGC(&gc_perm_lock);
+    uv_mutex_lock(&gc_perm_lock);
 
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
@@ -210,10 +210,10 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
                             goto have_free_page; // break out of all of these loops
                         }
                     }
-                    info.pagetable1->freemap0[info.pagetable1_i32] &= ~(uint32_t)(1 << info.pagetable1_i); // record that this was full
+                    info.pagetable1->freemap0[info.pagetable1_i32] &= ~(uint32_t)(1u << info.pagetable1_i); // record that this was full
                 }
             }
-            memory_map.freemap1[info.pagetable_i32] &= ~(uint32_t)(1 << info.pagetable_i); // record that this was full
+            memory_map.freemap1[info.pagetable_i32] &= ~(uint32_t)(1u << info.pagetable_i); // record that this was full
         }
     }
 
@@ -241,10 +241,10 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
         info.pagetable0->ub = info.pagetable0_i32;
 
     // mark this entry as in-use and not free
-    info.pagetable0->freemap[info.pagetable0_i32] &= ~(uint32_t)(1 << info.pagetable0_i);
-    info.pagetable0->allocmap[info.pagetable0_i32] |= (uint32_t)(1 << info.pagetable0_i);
-    info.pagetable1->allocmap0[info.pagetable1_i32] |= (uint32_t)(1 << info.pagetable1_i);
-    memory_map.allocmap1[info.pagetable_i32] |= (uint32_t)(1 << info.pagetable_i);
+    info.pagetable0->freemap[info.pagetable0_i32] &= ~(uint32_t)(1u << info.pagetable0_i);
+    info.pagetable0->allocmap[info.pagetable0_i32] |= (uint32_t)(1u << info.pagetable0_i);
+    info.pagetable1->allocmap0[info.pagetable1_i32] |= (uint32_t)(1u << info.pagetable1_i);
+    memory_map.allocmap1[info.pagetable_i32] |= (uint32_t)(1u << info.pagetable_i);
 
 #ifdef _OS_WINDOWS_
     VirtualAlloc(info.meta->data, GC_PAGE_SZ, MEM_COMMIT, PAGE_READWRITE);
@@ -255,7 +255,7 @@ NOINLINE jl_gc_pagemeta_t *jl_gc_alloc_page(void) JL_NOTSAFEPOINT
     errno = last_errno;
     current_pg_count++;
     gc_final_count_page(current_pg_count);
-    JL_UNLOCK_NOGC(&gc_perm_lock);
+    uv_mutex_unlock(&gc_perm_lock);
     return info.meta;
 }
 
@@ -265,18 +265,18 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT
     // update the allocmap and freemap to indicate this contains a free entry
     struct jl_gc_metadata_ext info = page_metadata_ext(p);
     uint32_t msk;
-    msk = (uint32_t)(1 << info.pagetable0_i);
+    msk = (uint32_t)(1u << info.pagetable0_i);
     assert(!(info.pagetable0->freemap[info.pagetable0_i32] & msk));
     assert(info.pagetable0->allocmap[info.pagetable0_i32] & msk);
     info.pagetable0->allocmap[info.pagetable0_i32] &= ~msk;
     info.pagetable0->freemap[info.pagetable0_i32] |= msk;
 
-    msk = (uint32_t)(1 << info.pagetable1_i);
+    msk = (uint32_t)(1u << info.pagetable1_i);
     assert(info.pagetable1->allocmap0[info.pagetable1_i32] & msk);
     if ((info.pagetable1->freemap0[info.pagetable1_i32] & msk) == 0)
         info.pagetable1->freemap0[info.pagetable1_i32] |= msk;
 
-    msk = (uint32_t)(1 << info.pagetable_i);
+    msk = (uint32_t)(1u << info.pagetable_i);
     assert(memory_map.allocmap1[info.pagetable_i32] & msk);
     if ((memory_map.freemap1[info.pagetable_i32] & msk) == 0)
         memory_map.freemap1[info.pagetable_i32] |= msk;
@@ -294,7 +294,7 @@ void jl_gc_free_page(void *p) JL_NOTSAFEPOINT
         p = otherp;
         while (n_pages--) {
             struct jl_gc_metadata_ext info = page_metadata_ext(otherp);
-            msk = (uint32_t)(1 << info.pagetable0_i);
+            msk = (uint32_t)(1u << info.pagetable0_i);
             if (info.pagetable0->allocmap[info.pagetable0_i32] & msk)
                 goto no_decommit;
             otherp = (void*)((char*)otherp + GC_PAGE_SZ);
diff --git a/src/gc-stacks.c b/src/gc-stacks.c
index 71668b3d4c2ff9..b7adf254026ca2 100644
--- a/src/gc-stacks.c
+++ b/src/gc-stacks.c
@@ -23,7 +23,7 @@
 #define MIN_STACK_MAPPINGS_PER_POOL 5
 
 const size_t jl_guard_size = (4096 * 8);
-static uint32_t num_stack_mappings = 0;
+static _Atomic(uint32_t) num_stack_mappings = 0;
 
 #ifdef _OS_WINDOWS_
 #define MAP_FAILED NULL
@@ -119,7 +119,8 @@ static void _jl_free_stack(jl_ptls_t ptls, void *stkbuf, size_t bufsz)
 
 JL_DLLEXPORT void jl_free_stack(void *stkbuf, size_t bufsz)
 {
-    _jl_free_stack(jl_get_ptls_states(), stkbuf, bufsz);
+    jl_task_t *ct = jl_current_task;
+    _jl_free_stack(ct->ptls, stkbuf, bufsz);
 }
 
 
@@ -142,7 +143,8 @@ void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task)
 
 JL_DLLEXPORT void *jl_malloc_stack(size_t *bufsz, jl_task_t *owner) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     size_t ssize = *bufsz;
     void *stk = NULL;
     if (ssize <= pool_sizes[JL_N_STACK_POOLS - 1]) {
@@ -216,8 +218,12 @@ void sweep_stack_pools(void)
             continue;
         while (1) {
             jl_task_t *t = (jl_task_t*)lst[n];
+            assert(jl_is_task(t));
             if (gc_marked(jl_astaggedvalue(t)->bits.gc)) {
-                n++;
+                if (t->stkbuf == NULL)
+                    ndel++; // jl_release_task_stack called
+                else
+                    n++;
             }
             else {
                 ndel++;
@@ -227,7 +233,7 @@ void sweep_stack_pools(void)
                     t->stkbuf = NULL;
                     _jl_free_stack(ptls2, stkbuf, bufsz);
                 }
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
                 if (t->ctx.tsan_state) {
                     __tsan_destroy_fiber(t->ctx.tsan_state);
                     t->ctx.tsan_state = NULL;
@@ -243,3 +249,31 @@ void sweep_stack_pools(void)
         live_tasks->len -= ndel;
     }
 }
+
+JL_DLLEXPORT jl_array_t *jl_live_tasks(void)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    arraylist_t *live_tasks = &ptls->heap.live_tasks;
+    size_t i, j, l;
+    jl_array_t *a;
+    do {
+        l = live_tasks->len;
+        a = jl_alloc_vec_any(l + 1); // may gc, changing the number of tasks
+    } while (l + 1 < live_tasks->len);
+    l = live_tasks->len;
+    void **lst = live_tasks->items;
+    j = 0;
+    ((void**)jl_array_data(a))[j++] = ptls->root_task;
+    for (i = 0; i < l; i++) {
+        if (((jl_task_t*)lst[i])->stkbuf != NULL)
+            ((void**)jl_array_data(a))[j++] = lst[i];
+    }
+    l = jl_array_len(a);
+    if (j < l) {
+        JL_GC_PUSH1(&a);
+        jl_array_del_end(a, l - j);
+        JL_GC_POP();
+    }
+    return a;
+}
diff --git a/src/gc.c b/src/gc.c
index bf78b0d88b0056..e299661db87d42 100644
--- a/src/gc.c
+++ b/src/gc.c
@@ -128,11 +128,11 @@ STATIC_INLINE void import_gc_state(jl_ptls_t ptls, jl_gc_mark_sp_t *sp) {
 // is going to realloc the buffer (of its own list) or accessing the
 // list of another thread
 static jl_mutex_t finalizers_lock;
-static jl_mutex_t gc_cache_lock;
+static uv_mutex_t gc_cache_lock;
 
 // Flag that tells us whether we need to support conservative marking
 // of objects.
-static int support_conservative_marking = 0;
+static _Atomic(int) support_conservative_marking = 0;
 
 /**
  * Note about GC synchronization:
@@ -166,7 +166,7 @@ static int support_conservative_marking = 0;
  * finalizers in unmanaged (GC safe) mode.
  */
 
-jl_gc_num_t gc_num = {0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+jl_gc_num_t gc_num = {0};
 static size_t last_long_collect_interval;
 
 pagetable_t memory_map;
@@ -181,6 +181,7 @@ bigval_t *big_objects_marked = NULL;
 // `to_finalize` should not have tagged pointers.
 arraylist_t finalizer_list_marked;
 arraylist_t to_finalize;
+JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers = 0;
 
 NOINLINE uintptr_t gc_get_stack_ptr(void)
 {
@@ -261,9 +262,12 @@ static void schedule_finalization(void *o, void *f) JL_NOTSAFEPOINT
 {
     arraylist_push(&to_finalize, o);
     arraylist_push(&to_finalize, f);
+    // doesn't need release, since we'll keep checking (on the reader) until we see the work and
+    // release our lock, and that will have a release barrier by then
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 1);
 }
 
-static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff)
+static void run_finalizer(jl_task_t *ct, jl_value_t *o, jl_value_t *ff)
 {
     if (gc_ptr_tag(o, 1)) {
         ((void (*)(void*))ff)(gc_ptr_clear_tag(o, 1));
@@ -271,15 +275,16 @@ static void run_finalizer(jl_ptls_t ptls, jl_value_t *o, jl_value_t *ff)
     }
     jl_value_t *args[2] = {ff,o};
     JL_TRY {
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_world_counter;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_apply(args, 2);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
     }
     JL_CATCH {
-        jl_printf(JL_STDERR, "error in running finalizer: ");
-        jl_static_show(JL_STDERR, jl_current_exception());
-        jl_printf(JL_STDERR, "\n");
+        jl_printf((JL_STREAM*)STDERR_FILENO, "error in running finalizer: ");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        jlbacktrace(); // written to STDERR_FILENO
     }
 }
 
@@ -295,7 +300,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
     // This way, the mutation should not conflict with the owning thread,
     // which only writes to locations later than `len`
     // and will not resize the buffer without acquiring the lock.
-    size_t len = need_sync ? jl_atomic_load_acquire(&list->len) : list->len;
+    size_t len = need_sync ? jl_atomic_load_acquire((_Atomic(size_t)*)&list->len) : list->len;
     size_t oldlen = len;
     void **items = list->items;
     size_t j = 0;
@@ -328,7 +333,7 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
         // The `memset` (like any other content mutation) has to be done
         // **before** the `cmpxchg` which publishes the length.
         memset(&items[len], 0, (oldlen - len) * sizeof(void*));
-        jl_atomic_compare_exchange(&list->len, oldlen, len);
+        jl_atomic_cmpswap((_Atomic(size_t)*)&list->len, &oldlen, len);
     }
     else {
         list->len = len;
@@ -337,36 +342,40 @@ static void finalize_object(arraylist_t *list, jl_value_t *o,
 
 // The first two entries are assumed to be empty and the rest are assumed to
 // be pointers to `jl_value_t` objects
-static void jl_gc_push_arraylist(jl_ptls_t ptls, arraylist_t *list)
+static void jl_gc_push_arraylist(jl_task_t *ct, arraylist_t *list)
 {
     void **items = list->items;
     items[0] = (void*)JL_GC_ENCODE_PUSHARGS(list->len - 2);
-    items[1] = ptls->pgcstack;
-    ptls->pgcstack = (jl_gcframe_t*)items;
+    items[1] = ct->gcstack;
+    ct->gcstack = (jl_gcframe_t*)items;
 }
 
 // Same assumption as `jl_gc_push_arraylist`. Requires the finalizers lock
 // to be hold for the current thread and will release the lock when the
 // function returns.
-static void jl_gc_run_finalizers_in_list(jl_ptls_t ptls, arraylist_t *list)
+static void jl_gc_run_finalizers_in_list(jl_task_t *ct, arraylist_t *list)
 {
+    // Avoid marking `ct` as non-migratable via an `@async` task (as noted in the docstring
+    // of `finalizer`) in a finalizer:
+    uint8_t sticky = ct->sticky;
     // empty out the first two entries for the GC frame
     arraylist_push(list, list->items[0]);
     arraylist_push(list, list->items[1]);
-    jl_gc_push_arraylist(ptls, list);
+    jl_gc_push_arraylist(ct, list);
     jl_value_t **items = (jl_value_t**)list->items;
     size_t len = list->len;
     JL_UNLOCK_NOGC(&finalizers_lock);
     // run finalizers in reverse order they were added, so lower-level finalizers run last
     for (size_t i = len-4; i >= 2; i -= 2)
-        run_finalizer(ptls, items[i], items[i + 1]);
+        run_finalizer(ct, items[i], items[i + 1]);
     // first entries were moved last to make room for GC frame metadata
-    run_finalizer(ptls, items[len-2], items[len-1]);
+    run_finalizer(ct, items[len-2], items[len-1]);
     // matches the jl_gc_push_arraylist above
     JL_GC_POP();
+    ct->sticky = sticky;
 }
 
-static void run_finalizers(jl_ptls_t ptls)
+static void run_finalizers(jl_task_t *ct)
 {
     // Racy fast path:
     // The race here should be OK since the race can only happen if
@@ -385,21 +394,73 @@ static void run_finalizers(jl_ptls_t ptls)
     if (to_finalize.items == to_finalize._space) {
         copied_list.items = copied_list._space;
     }
+    jl_atomic_store_relaxed(&jl_gc_have_pending_finalizers, 0);
     arraylist_new(&to_finalize, 0);
     // This releases the finalizers lock.
-    jl_gc_run_finalizers_in_list(ptls, &copied_list);
+    jl_gc_run_finalizers_in_list(ct, &copied_list);
     arraylist_free(&copied_list);
 }
 
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on)
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(jl_task_t *ct)
 {
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (!ptls->in_finalizer && ptls->locks.len == 0 && ptls->finalizers_inhibited == 0) {
+        ptls->in_finalizer = 1;
+        run_finalizers(ct);
+        ptls->in_finalizer = 0;
+    }
+}
+
+JL_DLLEXPORT int jl_gc_get_finalizers_inhibited(jl_ptls_t ptls)
+{
+    if (ptls == NULL)
+        ptls = jl_current_task->ptls;
+    return ptls->finalizers_inhibited;
+}
+
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    ptls->finalizers_inhibited++;
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void)
+{
+    jl_task_t *ct = jl_current_task;
+#ifdef NDEBUG
+    ct->ptls->finalizers_inhibited--;
+#else
+    jl_gc_enable_finalizers(ct, 1);
+#endif
+}
+
+JL_DLLEXPORT void jl_gc_enable_finalizers(jl_task_t *ct, int on)
+{
+    if (ct == NULL)
+        ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     int old_val = ptls->finalizers_inhibited;
     int new_val = old_val + (on ? -1 : 1);
+    if (new_val < 0) {
+        JL_TRY {
+            jl_error(""); // get a backtrace
+        }
+        JL_CATCH {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: GC finalizers already enabled on this thread.\n");
+            // Only print the backtrace once, to avoid spamming the logs
+            static int backtrace_printed = 0;
+            if (backtrace_printed == 0) {
+                backtrace_printed = 1;
+                jlbacktrace(); // written to STDERR_FILENO
+            }
+        }
+        return;
+    }
     ptls->finalizers_inhibited = new_val;
-    if (!new_val && old_val && !ptls->in_finalizer) {
-        ptls->in_finalizer = 1;
-        run_finalizers(ptls);
-        ptls->in_finalizer = 0;
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
+        jl_gc_run_pending_finalizers(ct);
     }
 }
 
@@ -417,19 +478,19 @@ static void schedule_all_finalizers(arraylist_t *flist) JL_NOTSAFEPOINT
     flist->len = 0;
 }
 
-void jl_gc_run_all_finalizers(jl_ptls_t ptls)
+void jl_gc_run_all_finalizers(jl_task_t *ct)
 {
     schedule_all_finalizers(&finalizer_list_marked);
     for (int i = 0;i < jl_n_threads;i++) {
         jl_ptls_t ptls2 = jl_all_tls_states[i];
         schedule_all_finalizers(&ptls2->finalizers);
     }
-    run_finalizers(ptls);
+    run_finalizers(ct);
 }
 
-static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f)
+static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT
 {
-    int8_t gc_state = jl_gc_unsafe_enter(ptls);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
     arraylist_t *a = &ptls->finalizers;
     // This acquire load and the release store at the end are used to
     // synchronize with `finalize_object` on another thread. Apart from the GC,
@@ -438,7 +499,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f)
     // (only one thread since it needs to acquire the finalizer lock).
     // Similar to `finalize_object`, all content mutation has to be done
     // between the acquire and the release of the length.
-    size_t oldlen = jl_atomic_load_acquire(&a->len);
+    size_t oldlen = jl_atomic_load_acquire((_Atomic(size_t)*)&a->len);
     if (__unlikely(oldlen + 2 > a->max)) {
         JL_LOCK_NOGC(&finalizers_lock);
         // `a->len` might have been modified.
@@ -452,16 +513,15 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f)
     void **items = a->items;
     items[oldlen] = v;
     items[oldlen + 1] = f;
-    jl_atomic_store_release(&a->len, oldlen + 2);
-    jl_gc_unsafe_leave(ptls, gc_state);
+    jl_atomic_store_release((_Atomic(size_t)*)&a->len, oldlen + 2);
 }
 
-JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f)
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT
 {
     gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f);
 }
 
-JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f)
+JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT
 {
     if (__unlikely(jl_typeis(f, jl_voidpointer_type))) {
         jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f));
@@ -471,7 +531,7 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct
     }
 }
 
-JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o)
+JL_DLLEXPORT void jl_finalize_th(jl_task_t *ct, jl_value_t *o)
 {
     JL_LOCK_NOGC(&finalizers_lock);
     // Copy the finalizers into a temporary list so that code in the finalizer
@@ -483,12 +543,12 @@ JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o)
     // still holding a reference to the object
     for (int i = 0; i < jl_n_threads; i++) {
         jl_ptls_t ptls2 = jl_all_tls_states[i];
-        finalize_object(&ptls2->finalizers, o, &copied_list, ptls != ptls2);
+        finalize_object(&ptls2->finalizers, o, &copied_list, jl_atomic_load_relaxed(&ct->tid) != i);
     }
     finalize_object(&finalizer_list_marked, o, &copied_list, 0);
     if (copied_list.len > 0) {
         // This releases the finalizers lock.
-        jl_gc_run_finalizers_in_list(ptls, &copied_list);
+        jl_gc_run_finalizers_in_list(ct, &copied_list);
     }
     else {
         JL_UNLOCK_NOGC(&finalizers_lock);
@@ -496,20 +556,21 @@ JL_DLLEXPORT void jl_finalize_th(jl_ptls_t ptls, jl_value_t *o)
     arraylist_free(&copied_list);
 }
 
+// explicitly scheduled objects for the sweepfunc callback
 static void gc_sweep_foreign_objs_in_list(arraylist_t *objs)
 {
     size_t p = 0;
     for (size_t i = 0; i < objs->len; i++) {
-        jl_value_t *v = (jl_value_t *)(objs->items[i]);
-        jl_datatype_t *t = (jl_datatype_t *)(jl_typeof(v));
+        jl_value_t *v = (jl_value_t*)(objs->items[i]);
+        jl_datatype_t *t = (jl_datatype_t*)(jl_typeof(v));
         const jl_datatype_layout_t *layout = t->layout;
         jl_fielddescdyn_t *desc = (jl_fielddescdyn_t*)jl_dt_layout_fields(layout);
-        if (!gc_ptr_tag(v, 1)) {
+
+        int bits = jl_astaggedvalue(v)->bits.gc;
+        if (!gc_marked(bits))
             desc->sweepfunc(v);
-        }
-        else {
+        else
             objs->items[p++] = v;
-        }
     }
     objs->len = p;
 }
@@ -525,12 +586,20 @@ static void gc_sweep_foreign_objs(void)
 // GC knobs and self-measurement variables
 static int64_t last_gc_total_bytes = 0;
 
+// max_total_memory is a suggestion.  We try very hard to stay
+// under this limit, but we will go above it rather than halting.
 #ifdef _P64
 #define default_collect_interval (5600*1024*sizeof(void*))
 static size_t max_collect_interval = 1250000000UL;
+// Eventually we can expose this to the user/ci.
+static uint64_t max_total_memory = (uint64_t) 2 * 1024 * 1024 * 1024 * 1024 * 1024;
 #else
 #define default_collect_interval (3200*1024*sizeof(void*))
 static size_t max_collect_interval =  500000000UL;
+// Work really hard to stay within 2GB
+// Alternative is to risk running out of address space
+// on 32 bit architectures.
+static uint32_t max_total_memory = (uint32_t) 2 * 1024 * 1024 * 1024;
 #endif
 
 // global variables for GC stats
@@ -597,9 +666,8 @@ static int prev_sweep_full = 1;
 // Full collection heuristics
 static int64_t live_bytes = 0;
 static int64_t promoted_bytes = 0;
-static int64_t last_full_live = 0;  // live_bytes after last full collection
 static int64_t last_live_bytes = 0; // live_bytes at last collection
-static int64_t grown_heap_age = 0;  // # of collects since live_bytes grew and remained
+static int64_t t_start = 0; // Time GC starts;
 #ifdef __GLIBC__
 // maxrss at last malloc_trim
 static int64_t last_trim_maxrss = 0;
@@ -629,9 +697,9 @@ static void gc_sync_cache_nolock(jl_ptls_t ptls, jl_gc_mark_cache_t *gc_cache) J
 
 static void gc_sync_cache(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
-    JL_LOCK_NOGC(&gc_cache_lock);
+    uv_mutex_lock(&gc_cache_lock);
     gc_sync_cache_nolock(ptls, &ptls->gc_cache);
-    JL_UNLOCK_NOGC(&gc_cache_lock);
+    uv_mutex_unlock(&gc_cache_lock);
 }
 
 // No other threads can be running marking at the same time
@@ -684,7 +752,7 @@ STATIC_INLINE int gc_setmark_tag(jl_taggedvalue_t *o, uint8_t mark_mode,
         assert((tag & 0x3) == mark_mode);
     }
     *bits = mark_mode;
-    tag = jl_atomic_exchange_relaxed(&o->header, tag);
+    tag = jl_atomic_exchange_relaxed((_Atomic(uintptr_t)*)&o->header, tag);
     verify_val(jl_valueof(o));
     return !gc_marked(tag);
 }
@@ -727,7 +795,8 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
     jl_assume(page);
     if (mark_mode == GC_OLD_MARKED) {
         ptls->gc_cache.perm_scanned_bytes += page->osize;
-        jl_atomic_fetch_add_relaxed(&page->nold, 1);
+        static_assert(sizeof(_Atomic(uint16_t)) == sizeof(page->nold), "");
+        jl_atomic_fetch_add_relaxed((_Atomic(uint16_t)*)&page->nold, 1);
     }
     else {
         ptls->gc_cache.scanned_bytes += page->osize;
@@ -736,7 +805,7 @@ STATIC_INLINE void gc_setmark_pool_(jl_ptls_t ptls, jl_taggedvalue_t *o,
             char *page_begin = gc_page_data(o) + GC_PAGE_OFFSET;
             int obj_id = (((char*)o) - page_begin) / page->osize;
             uint8_t *ages = page->ages + obj_id / 8;
-            jl_atomic_fetch_and_relaxed(ages, ~(1 << (obj_id % 8)));
+            jl_atomic_fetch_and_relaxed((_Atomic(uint8_t)*)ages, ~(1 << (obj_id % 8)));
         }
     }
     objprofile_count(jl_typeof(jl_valueof(o)),
@@ -823,7 +892,7 @@ void jl_gc_force_mark_old(jl_ptls_t ptls, jl_value_t *v) JL_NOTSAFEPOINT
 
 static inline void maybe_collect(jl_ptls_t ptls)
 {
-    if (ptls->gc_num.allocd >= 0 || gc_debug_check_other()) {
+    if (jl_atomic_load_relaxed(&ptls->gc_num.allocd) >= 0 || jl_gc_debug_check_other()) {
         jl_gc_collect(JL_GC_AUTO);
     }
     else {
@@ -843,6 +912,20 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls,
     return wr;
 }
 
+static void clear_weak_refs(void)
+{
+    for (int i = 0; i < jl_n_threads; i++) {
+        jl_ptls_t ptls2 = jl_all_tls_states[i];
+        size_t n, l = ptls2->heap.weak_refs.len;
+        void **lst = ptls2->heap.weak_refs.items;
+        for (n = 0; n < l; n++) {
+            jl_weakref_t *wr = (jl_weakref_t*)lst[n];
+            if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
+                wr->value = (jl_value_t*)jl_nothing;
+        }
+    }
+}
+
 static void sweep_weak_refs(void)
 {
     for (int i = 0; i < jl_n_threads; i++) {
@@ -855,16 +938,10 @@ static void sweep_weak_refs(void)
             continue;
         while (1) {
             jl_weakref_t *wr = (jl_weakref_t*)lst[n];
-            if (gc_marked(jl_astaggedvalue(wr)->bits.gc)) {
-                // weakref itself is alive,
-                // so the user could still re-set it to a new value
-                if (!gc_marked(jl_astaggedvalue(wr->value)->bits.gc))
-                    wr->value = (jl_value_t*)jl_nothing;
+            if (gc_marked(jl_astaggedvalue(wr)->bits.gc))
                 n++;
-            }
-            else {
+            else
                 ndel++;
-            }
             if (n >= l - ndel)
                 break;
             void *tmp = lst[n];
@@ -875,10 +952,11 @@ static void sweep_weak_refs(void)
     }
 }
 
+
 // big value list
 
 // Size includes the tag and the tag is not cleared!!
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
+static inline jl_value_t *jl_gc_big_alloc_inner(jl_ptls_t ptls, size_t sz)
 {
     maybe_collect(ptls);
     size_t offs = offsetof(bigval_t, header);
@@ -893,8 +971,10 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
         jl_throw(jl_memory_exception);
     gc_invoke_callbacks(jl_gc_cb_notify_external_alloc_t,
         gc_cblist_notify_external_alloc, (v, allocsz));
-    ptls->gc_num.allocd += allocsz;
-    ptls->gc_num.bigalloc++;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_num.bigalloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.bigalloc) + 1);
 #ifdef MEMDEBUG
     memset(v, 0xee, allocsz);
 #endif
@@ -904,6 +984,22 @@ JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
     return jl_valueof(&v->header);
 }
 
+// Instrumented version of jl_gc_big_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t sz)
+{
+    jl_value_t *val = jl_gc_big_alloc_inner(ptls, sz);
+
+    maybe_record_alloc_to_profile(val, sz, jl_gc_unknown_type_tag);
+    return val;
+}
+
+// This wrapper exists only to prevent `jl_gc_big_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_big_alloc_inner`
+// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t sz) {
+    return jl_gc_big_alloc_inner(ptls, sz);
+}
+
 // Sweep list rooted at *pv, removing and freeing any unmarked objects.
 // Return pointer to last `next` field in the culled list.
 static bigval_t **sweep_big_list(int sweep_full, bigval_t **pv) JL_NOTSAFEPOINT
@@ -986,8 +1082,9 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT
 
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->gc_num.allocd += sz;
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
 }
 
 static void combine_thread_gc_counts(jl_gc_num_t *dest) JL_NOTSAFEPOINT
@@ -1011,8 +1108,8 @@ static void reset_thread_gc_counts(void) JL_NOTSAFEPOINT
     for (int i = 0; i < jl_n_threads; i++) {
         jl_ptls_t ptls = jl_all_tls_states[i];
         if (ptls) {
-            memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t));
-            ptls->gc_num.allocd = -(int64_t)gc_num.interval;
+            memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
+            jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
         }
     }
 }
@@ -1049,6 +1146,7 @@ static void jl_gc_free_array(jl_array_t *a) JL_NOTSAFEPOINT
         else
             free(d);
         gc_num.freed += jl_array_nbytes(a);
+        gc_num.freecall++;
     }
 }
 
@@ -1117,7 +1215,7 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 {
     // Do not pass in `ptls` as argument. This slows down the fast path
     // in pool_alloc significantly
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_gc_pagemeta_t *pg = jl_gc_alloc_page();
     pg->osize = p->osize;
     pg->ages = (uint8_t*)malloc_s(GC_PAGE_SZ / 8 / p->osize + 1);
@@ -1128,20 +1226,22 @@ static NOINLINE jl_taggedvalue_t *add_page(jl_gc_pool_t *p) JL_NOTSAFEPOINT
 }
 
 // Size includes the tag and the tag is not cleared!!
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
+static inline jl_value_t *jl_gc_pool_alloc_inner(jl_ptls_t ptls, int pool_offset,
                                           int osize)
 {
     // Use the pool offset instead of the pool address as the argument
     // to workaround a llvm bug.
     // Ref https://llvm.org/bugs/show_bug.cgi?id=27190
     jl_gc_pool_t *p = (jl_gc_pool_t*)((char*)ptls + pool_offset);
-    assert(ptls->gc_state == 0);
+    assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0);
 #ifdef MEMDEBUG
     return jl_gc_big_alloc(ptls, osize);
 #endif
     maybe_collect(ptls);
-    ptls->gc_num.allocd += osize;
-    ptls->gc_num.poolalloc++;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + osize);
+    jl_atomic_store_relaxed(&ptls->gc_num.poolalloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.poolalloc) + 1);
     // first try to use the freelist
     jl_taggedvalue_t *v = p->freelist;
     if (v) {
@@ -1182,6 +1282,23 @@ JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
     return jl_valueof(v);
 }
 
+// Instrumented version of jl_gc_pool_alloc_inner, called into by LLVM-generated code.
+JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
+                                          int osize)
+{
+    jl_value_t *val = jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+
+    maybe_record_alloc_to_profile(val, osize, jl_gc_unknown_type_tag);
+    return val;
+}
+
+// This wrapper exists only to prevent `jl_gc_pool_alloc_inner` from being inlined into
+// its callers. We provide an external-facing interface for callers, and inline `jl_gc_pool_alloc_inner`
+// into this. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset, int osize) {
+    return jl_gc_pool_alloc_inner(ptls, pool_offset, osize);
+}
+
 int jl_gc_classify_pools(size_t sz, int *osize)
 {
     if (sz > GC_MAX_SZCLASS)
@@ -1486,20 +1603,20 @@ static void gc_sweep_perm_alloc(void)
 
 // mark phase
 
-JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *ptr)
+JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *o = jl_astaggedvalue(ptr);
     // The modification of the `gc_bits` is not atomic but it
     // should be safe here since GC is not allowed to run here and we only
     // write GC_OLD to the GC bits outside GC. This could cause
     // duplicated objects in the remset but that shouldn't be a problem.
     o->bits.gc = GC_MARKED;
-    arraylist_push(ptls->heap.remset, ptr);
+    arraylist_push(ptls->heap.remset, (jl_value_t*)ptr);
     ptls->heap.remset_nptr++; // conservative
 }
 
-void jl_gc_queue_multiroot(jl_value_t *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
+void jl_gc_queue_multiroot(const jl_value_t *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
     // first check if this is really necessary
     // TODO: should we store this info in one of the extra gc bits?
@@ -1538,9 +1655,9 @@ void jl_gc_queue_multiroot(jl_value_t *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
     }
 }
 
-void gc_queue_binding(jl_binding_t *bnd)
+JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_taggedvalue_t *buf = jl_astaggedvalue(bnd);
     buf->bits.gc = GC_MARKED;
     arraylist_push(&ptls->heap.rem_bindings, bnd);
@@ -1580,10 +1697,10 @@ STATIC_INLINE uintptr_t gc_read_stack(void *_addr, uintptr_t offset,
 JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t *vt,
                                                   jl_gc_mark_sp_t sp)
 {
-    jl_printf(JL_STDOUT, "GC error (probable corruption) :\n");
-    gc_debug_print_status();
+    jl_safe_printf("GC error (probable corruption) :\n");
+    jl_gc_debug_print_status();
     jl_(vt);
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     gc_mark_loop_unwind(ptls, sp, 0);
     abort();
 }
@@ -1593,20 +1710,18 @@ JL_NORETURN NOINLINE void gc_assert_datatype_fail(jl_ptls_t ptls, jl_datatype_t
 // See the call to `gc_mark_loop` in init with a `NULL` `ptls`.
 void *gc_mark_label_addrs[_GC_MARK_L_MAX];
 
-// Double the mark stack (both pc and data) with the lock held.
+// Double the local mark stack (both pc and data)
 static void NOINLINE gc_mark_stack_resize(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) JL_NOTSAFEPOINT
 {
     jl_gc_mark_data_t *old_data = gc_cache->data_stack;
     void **pc_stack = sp->pc_start;
     size_t stack_size = (char*)sp->pc_end - (char*)pc_stack;
-    JL_LOCK_NOGC(&gc_cache->stack_lock);
     gc_cache->data_stack = (jl_gc_mark_data_t *)realloc_s(old_data, stack_size * 2 * sizeof(jl_gc_mark_data_t));
     sp->data = (jl_gc_mark_data_t *)(((char*)sp->data) + (((char*)gc_cache->data_stack) - ((char*)old_data)));
 
     sp->pc_start = gc_cache->pc_stack = (void**)realloc_s(pc_stack, stack_size * 2 * sizeof(void*));
     gc_cache->pc_stack_end = sp->pc_end = sp->pc_start + stack_size * 2;
-    sp->pc += sp->pc_start - pc_stack;
-    JL_UNLOCK_NOGC(&gc_cache->stack_lock);
+    sp->pc = sp->pc_start + (sp->pc - pc_stack);
 }
 
 // Push a work item to the stack. The type of the work item is marked with `pc`.
@@ -2335,8 +2450,8 @@ module_binding: {
             void *vb = jl_astaggedvalue(b);
             verify_parent1("module", binding->parent, &vb, "binding_buff");
             (void)vb;
-            jl_value_t *value = b->value;
-            jl_value_t *globalref = b->globalref;
+            jl_value_t *value = jl_atomic_load_relaxed(&b->value);
+            jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
             if (value) {
                 verify_parent2("module", binding->parent,
                                &b->value, "binding(%s)", jl_symbol_name(b->name));
@@ -2501,6 +2616,8 @@ mark: {
             if (a->data == NULL || jl_array_len(a) == 0)
                 goto pop;
             if (flags.ptrarray) {
+                if ((jl_datatype_t*)jl_tparam0(vt) == jl_symbol_type)
+                    goto pop;
                 size_t l = jl_array_len(a);
                 uintptr_t nptr = (l << 2) | (bits & GC_OLD);
                 objary_begin = (jl_value_t**)a->data;
@@ -2574,41 +2691,34 @@ mark: {
                 objprofile_count(vt, bits == GC_OLD_MARKED, sizeof(jl_task_t));
             jl_task_t *ta = (jl_task_t*)new_obj;
             gc_scrub_record_task(ta);
-            void *stkbuf = ta->stkbuf;
-            int16_t tid = ta->tid;
-            jl_ptls_t ptls2 = NULL;
-            if (tid != -1)
-                ptls2 = jl_all_tls_states[tid];
             if (gc_cblist_task_scanner) {
                 export_gc_state(ptls, &sp);
+                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
                 gc_invoke_callbacks(jl_gc_cb_task_scanner_t,
                     gc_cblist_task_scanner,
-                    (ta, ptls2 != NULL && ta == ptls2->root_task));
+                    (ta, tid != -1 && ta == jl_all_tls_states[tid]->root_task));
                 import_gc_state(ptls, &sp);
             }
 #ifdef COPY_STACKS
+            void *stkbuf = ta->stkbuf;
             if (stkbuf && ta->copy_stack)
                 gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz);
 #endif
-            jl_gcframe_t *s = NULL;
+            jl_gcframe_t *s = ta->gcstack;
             size_t nroots;
             uintptr_t offset = 0;
             uintptr_t lb = 0;
             uintptr_t ub = (uintptr_t)-1;
-            if (ptls2 && ta == ptls2->current_task) {
-                s = ptls2->pgcstack;
-            }
-            else if (stkbuf) {
-                s = ta->gcstack;
 #ifdef COPY_STACKS
-                if (ta->copy_stack) {
-                    assert(tid != -1 && ptls2 != NULL);
-                    ub = (uintptr_t)ptls2->stackbase;
-                    lb = ub - ta->copy_stack;
-                    offset = (uintptr_t)stkbuf - lb;
-                }
-#endif
+            if (stkbuf && ta->copy_stack && ta->ptls == NULL) {
+                int16_t tid = jl_atomic_load_relaxed(&ta->tid);
+                assert(tid >= 0);
+                jl_ptls_t ptls2 = jl_all_tls_states[tid];
+                ub = (uintptr_t)ptls2->stackbase;
+                lb = ub - ta->copy_stack;
+                offset = (uintptr_t)stkbuf - lb;
             }
+#endif
             if (s) {
                 nroots = gc_read_stack(&s->nroots, offset, lb, ub);
                 assert(nroots <= UINT32_MAX);
@@ -2661,7 +2771,7 @@ mark: {
             if (npointers == 0)
                 goto pop;
             uintptr_t nptr = npointers << 2 | (bits & GC_OLD);
-            assert(layout->nfields > 0 && layout->fielddesc_type != 3 && "opaque types should have been handled specially");
+            assert((layout->nfields > 0 || layout->fielddesc_type == 3) && "opaque types should have been handled specially");
             if (layout->fielddesc_type == 0) {
                 obj8_parent = (char*)new_obj;
                 obj8_begin = (uint8_t*)jl_dt_layout_ptrs(layout);
@@ -2713,15 +2823,17 @@ mark: {
 static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
                                      jl_ptls_t ptls2)
 {
-    gc_mark_queue_obj(gc_cache, sp, ptls2->current_task);
+    gc_mark_queue_obj(gc_cache, sp, jl_atomic_load_relaxed(&ptls2->current_task));
     gc_mark_queue_obj(gc_cache, sp, ptls2->root_task);
     if (ptls2->next_task)
         gc_mark_queue_obj(gc_cache, sp, ptls2->next_task);
+    if (ptls2->previous_task) // shouldn't be necessary, but no reason not to
+        gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task);
     if (ptls2->previous_exception)
         gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception);
 }
 
-void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp);
+extern jl_value_t *cmpswap_names JL_GLOBALLY_ROOTED;
 
 // mark the initial root set
 static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
@@ -2729,9 +2841,6 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
     // modules
     gc_mark_queue_obj(gc_cache, sp, jl_main_module);
 
-    // tasks
-    jl_gc_mark_enqueued_tasks(gc_cache, sp);
-
     // invisible builtin values
     if (jl_an_empty_vec_any != NULL)
         gc_mark_queue_obj(gc_cache, sp, jl_an_empty_vec_any);
@@ -2743,9 +2852,11 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
         }
     }
     gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type);
-    for (size_t i = 0; i < N_CALL_CACHE; i++)
-        if (call_cache[i])
-            gc_mark_queue_obj(gc_cache, sp, call_cache[i]);
+    for (size_t i = 0; i < N_CALL_CACHE; i++) {
+        jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]);
+        if (v != NULL)
+            gc_mark_queue_obj(gc_cache, sp, v);
+    }
     if (jl_all_methods != NULL)
         gc_mark_queue_obj(gc_cache, sp, jl_all_methods);
     if (_jl_debug_method_invalidation != NULL)
@@ -2753,6 +2864,8 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
 
     // constants
     gc_mark_queue_obj(gc_cache, sp, jl_emptytuple_type);
+    if (cmpswap_names != NULL)
+        gc_mark_queue_obj(gc_cache, sp, cmpswap_names);
 }
 
 // find unmarked objects that need to be finalized from the finalizer list "list".
@@ -2799,11 +2912,11 @@ static void sweep_finalizer_list(arraylist_t *list)
 }
 
 // collector entry point and control
-static volatile uint32_t jl_gc_disable_counter = 0;
+static _Atomic(uint32_t) jl_gc_disable_counter = 1;
 
 JL_DLLEXPORT int jl_gc_enable(int on)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     int prev = !ptls->disable_gc;
     ptls->disable_gc = (on == 0);
     if (on && !prev) {
@@ -2824,7 +2937,7 @@ JL_DLLEXPORT int jl_gc_enable(int on)
 
 JL_DLLEXPORT int jl_gc_is_enabled(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return !ptls->disable_gc;
 }
 
@@ -2910,8 +3023,8 @@ static void jl_gc_queue_remset(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp
         jl_binding_t *ptr = (jl_binding_t*)items[i];
         // A null pointer can happen here when the binding is cleaned up
         // as an exception is thrown after it was already queued (#10221)
-        if (!ptr->value) continue;
-        if (gc_mark_queue_obj(gc_cache, sp, ptr->value)) {
+        jl_value_t *v = jl_atomic_load_relaxed(&ptr->value);
+        if (v != NULL && gc_mark_queue_obj(gc_cache, sp, v)) {
             items[n_bnd_refyoung] = ptr;
             n_bnd_refyoung++;
         }
@@ -2946,6 +3059,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
     uint64_t t0 = jl_hrtime();
     int64_t last_perm_scanned_bytes = perm_scanned_bytes;
+    JL_PROBE_GC_MARK_BEGIN();
 
     // 1. fix GC bits of objects in the remset.
     for (int t_i = 0; t_i < jl_n_threads; t_i++)
@@ -2972,12 +3086,14 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_mark_loop(ptls, sp);
     gc_mark_sp_init(gc_cache, &sp);
     gc_num.since_sweep += gc_num.allocd;
+    JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes);
     gc_settime_premark_end();
     gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes);
     int64_t actual_allocd = gc_num.since_sweep;
     // marking is over
 
     // 4. check for objects to finalize
+    clear_weak_refs();
     // Record the length of the marked list since we need to
     // mark the object moved to the marked list from the
     // `finalizer_list` by `sweep_finalizer_list`
@@ -3001,7 +3117,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_mark_sp_init(gc_cache, &sp);
     // Conservative marking relies on age to tell allocated objects
     // and freelist entries apart.
-    mark_reset_age = !support_conservative_marking;
+    mark_reset_age = !jl_gc_conservative_gc_support_enabled();
     // Reset the age and old bit for any unmarked objects referenced by the
     // `to_finalize` list. These objects are only reachable from this list
     // and should not be referenced by any old objects so this won't break
@@ -3028,54 +3144,52 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     if (!prev_sweep_full)
         promoted_bytes += perm_scanned_bytes - last_perm_scanned_bytes;
     // 5. next collection decision
-    int not_freed_enough = estimate_freed < (7*(actual_allocd/10));
+    int not_freed_enough = (collection == JL_GC_AUTO) && estimate_freed < (7*(actual_allocd/10));
     int nptr = 0;
     for (int i = 0;i < jl_n_threads;i++)
         nptr += jl_all_tls_states[i]->heap.remset_nptr;
-    int large_frontier = nptr*sizeof(void*) >= default_collect_interval; // many pointers in the intergen frontier => "quick" mark is not quick
-    int sweep_full;
+
+    // many pointers in the intergen frontier => "quick" mark is not quick
+    int large_frontier = nptr*sizeof(void*) >= default_collect_interval;
+    int sweep_full = 0;
     int recollect = 0;
-    // trigger a full collection if the number of live bytes doubles since the last full
-    // collection and then remains at least that high for a while.
-    if (grown_heap_age == 0) {
-        if (live_bytes > 2 * last_full_live)
-            grown_heap_age = 1;
-    }
-    else if (live_bytes >= last_live_bytes) {
-        grown_heap_age++;
-    }
-    if (collection == JL_GC_INCREMENTAL) {
-        sweep_full = 0;
-    } else if ((collection == JL_GC_FULL || large_frontier ||
-         ((not_freed_enough || promoted_bytes >= gc_num.interval) &&
-          (promoted_bytes >= default_collect_interval || prev_sweep_full)) ||
-         grown_heap_age > 1) &&
-        gc_num.pause > 1) {
-        recollect = (collection == JL_GC_FULL);
-        if (large_frontier)
-            gc_num.interval = last_long_collect_interval;
-        if (not_freed_enough || large_frontier) {
-            if (gc_num.interval <= 2*(max_collect_interval/5)) {
-                gc_num.interval = 5 * (gc_num.interval / 2);
-            }
+
+    // update heuristics only if this GC was automatically triggered
+    if (collection == JL_GC_AUTO) {
+        if (not_freed_enough) {
+            gc_num.interval = gc_num.interval * 2;
+        }
+        if (large_frontier) {
+            sweep_full = 1;
+        }
+        if (gc_num.interval > max_collect_interval) {
+            sweep_full = 1;
+            gc_num.interval = max_collect_interval;
         }
-        last_long_collect_interval = gc_num.interval;
+    }
+
+    // If the live data outgrows the suggested max_total_memory
+    // we keep going with minimum intervals and full gcs until
+    // we either free some space or get an OOM error.
+    if (live_bytes > max_total_memory) {
         sweep_full = 1;
-        promoted_bytes = 0;
     }
-    else {
-        // reset interval to default, or at least half of live_bytes
-        int64_t half = live_bytes/2;
-        if (default_collect_interval < half && half <= max_collect_interval)
-            gc_num.interval = half;
-        else
-            gc_num.interval = default_collect_interval;
-        sweep_full = gc_sweep_always_full;
+    if (gc_sweep_always_full) {
+        sweep_full = 1;
     }
-    if (sweep_full)
+    if (collection == JL_GC_FULL) {
+        sweep_full = 1;
+        recollect = 1;
+    }
+    if (sweep_full) {
+        // these are the difference between the number of gc-perm bytes scanned
+        // on the first collection after sweep_full, and the current scan
         perm_scanned_bytes = 0;
+        promoted_bytes = 0;
+    }
     scanned_bytes = 0;
     // 5. start sweeping
+    JL_PROBE_GC_SWEEP_BEGIN(sweep_full);
     sweep_weak_refs();
     sweep_stack_pools();
     gc_sweep_foreign_objs();
@@ -3085,6 +3199,7 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
     gc_sweep_pool(sweep_full);
     if (sweep_full)
         gc_sweep_perm_alloc();
+    JL_PROBE_GC_SWEEP_END();
     // sweeping is over
     // 6. if it is a quick sweep, put back the remembered objects in queued state
     // so that we don't trigger the barrier again on them.
@@ -3119,22 +3234,51 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
     uint64_t gc_end_t = jl_hrtime();
     uint64_t pause = gc_end_t - t0;
+
+    _report_gc_finished(pause, gc_num.freed, sweep_full, recollect);
+
     gc_final_pause_end(t0, gc_end_t);
     gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,
                         estimate_freed, sweep_full);
     gc_num.full_sweep += sweep_full;
+    uint64_t max_memory = last_live_bytes + gc_num.allocd;
+    if (max_memory > gc_num.max_memory) {
+        gc_num.max_memory = max_memory;
+    }
+
     gc_num.allocd = 0;
     last_live_bytes = live_bytes;
     live_bytes += -gc_num.freed + gc_num.since_sweep;
-    if (prev_sweep_full) {
-        last_full_live = live_bytes;
-        grown_heap_age = 0;
+
+    if (collection == JL_GC_AUTO) {
+      // If the current interval is larger than half the live data decrease the interval
+      int64_t half = live_bytes/2;
+      if (gc_num.interval > half) gc_num.interval = half;
+      // But never go below default
+      if (gc_num.interval < default_collect_interval) gc_num.interval = default_collect_interval;
+    }
+
+    // We need this for 32 bit but will be useful to set limits on 64 bit
+    if (gc_num.interval + live_bytes > max_total_memory) {
+        if (live_bytes < max_total_memory) {
+            gc_num.interval = max_total_memory - live_bytes;
+        } else {
+            // We can't stay under our goal so let's go back to
+            // the minimum interval and hope things get better
+            gc_num.interval = default_collect_interval;
+       }
     }
+
+    gc_time_summary(sweep_full, t_start, gc_end_t, gc_num.freed, live_bytes, gc_num.interval, pause);
+
     prev_sweep_full = sweep_full;
     gc_num.pause += !recollect;
     gc_num.total_time += pause;
     gc_num.since_sweep = 0;
     gc_num.freed = 0;
+    if (pause > gc_num.max_pause) {
+        gc_num.max_pause = pause;
+    }
     reset_thread_gc_counts();
 
     return recollect;
@@ -3142,16 +3286,20 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection)
 
 JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (jl_gc_disable_counter) {
-        size_t localbytes = ptls->gc_num.allocd + gc_num.interval;
-        ptls->gc_num.allocd = -(int64_t)gc_num.interval;
-        jl_atomic_add_fetch(&gc_num.deferred_alloc, localbytes);
+    JL_PROBE_GC_BEGIN(collection);
+
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    if (jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
+        size_t localbytes = jl_atomic_load_relaxed(&ptls->gc_num.allocd) + gc_num.interval;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
+        static_assert(sizeof(_Atomic(uint64_t)) == sizeof(gc_num.deferred_alloc), "");
+        jl_atomic_fetch_add((_Atomic(uint64_t)*)&gc_num.deferred_alloc, localbytes);
         return;
     }
-    gc_debug_print();
+    jl_gc_debug_print();
 
-    int8_t old_state = ptls->gc_state;
+    int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
     jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
     // `jl_safepoint_start_gc()` makes sure only one thread can
     // run the GC.
@@ -3170,10 +3318,12 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // TODO (concurrently queue objects)
     // no-op for non-threading
     jl_gc_wait_for_the_world();
+    JL_PROBE_GC_STOP_THE_WORLD();
+
     gc_invoke_callbacks(jl_gc_cb_pre_gc_t,
         gc_cblist_pre_gc, (collection));
 
-    if (!jl_gc_disable_counter) {
+    if (!jl_atomic_load_relaxed(&jl_gc_disable_counter)) {
         JL_LOCK_NOGC(&finalizers_lock);
         if (_jl_gc_collect(ptls, collection)) {
             // recollect
@@ -3187,16 +3337,19 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
     // no-op for non-threading
     jl_safepoint_end_gc();
     jl_gc_state_set(ptls, old_state, JL_GC_STATE_WAITING);
+    JL_PROBE_GC_END();
 
     // Only disable finalizers on current thread
     // Doing this on all threads is racy (it's impossible to check
     // or wait for finalizers on other threads without dead lock).
-    if (!ptls->finalizers_inhibited) {
+    if (!ptls->finalizers_inhibited && ptls->locks.len == 0) {
         int8_t was_in_finalizer = ptls->in_finalizer;
         ptls->in_finalizer = 1;
-        run_finalizers(ptls);
+        run_finalizers(ct);
         ptls->in_finalizer = was_in_finalizer;
     }
+    JL_PROBE_GC_FINALIZER();
+
     gc_invoke_callbacks(jl_gc_cb_post_gc_t,
         gc_cblist_post_gc, (collection));
 #ifdef _OS_WINDOWS_
@@ -3223,12 +3376,11 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty)
 // Per-thread initialization
 void jl_init_thread_heap(jl_ptls_t ptls)
 {
+    if (ptls->tid == 0)
+        ptls->disable_gc = 1;
     jl_thread_heap_t *heap = &ptls->heap;
     jl_gc_pool_t *p = heap->norm_pools;
-    for(int i=0; i < JL_GC_N_POOLS; i++) {
-        assert((jl_gc_sizeclasses[i] < 16 &&
-                jl_gc_sizeclasses[i] % sizeof(void*) == 0) ||
-               (jl_gc_sizeclasses[i] % 16 == 0));
+    for (int i = 0; i < JL_GC_N_POOLS; i++) {
         p[i].osize = jl_gc_sizeclasses[i];
         p[i].freelist = NULL;
         p[i].newpages = NULL;
@@ -3250,22 +3402,25 @@ void jl_init_thread_heap(jl_ptls_t ptls)
     gc_cache->perm_scanned_bytes = 0;
     gc_cache->scanned_bytes = 0;
     gc_cache->nbig_obj = 0;
-    JL_MUTEX_INIT(&gc_cache->stack_lock);
     size_t init_size = 1024;
     gc_cache->pc_stack = (void**)malloc_s(init_size * sizeof(void*));
     gc_cache->pc_stack_end = gc_cache->pc_stack + init_size;
     gc_cache->data_stack = (jl_gc_mark_data_t *)malloc_s(init_size * sizeof(jl_gc_mark_data_t));
 
-    memset(&ptls->gc_num, 0, sizeof(jl_thread_gc_num_t));
+    memset(&ptls->gc_num, 0, sizeof(ptls->gc_num));
     assert(gc_num.interval == default_collect_interval);
-    ptls->gc_num.allocd = -(int64_t)gc_num.interval;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd, -(int64_t)gc_num.interval);
 }
 
 // System-wide initializations
 void jl_gc_init(void)
 {
+    JL_MUTEX_INIT(&finalizers_lock);
+    uv_mutex_init(&gc_cache_lock);
+    uv_mutex_init(&gc_perm_lock);
+
     jl_gc_init_page();
-    gc_debug_init();
+    jl_gc_debug_init();
 
     arraylist_new(&finalizer_list_marked, 0);
     arraylist_new(&to_finalize, 0);
@@ -3273,6 +3428,8 @@ void jl_gc_init(void)
     gc_num.interval = default_collect_interval;
     last_long_collect_interval = default_collect_interval;
     gc_num.allocd = 0;
+    gc_num.max_pause = 0;
+    gc_num.max_memory = 0;
 
 #ifdef _P64
     // on a big memory machine, set max_collect_interval to totalmem / ncores / 2
@@ -3286,6 +3443,7 @@ void jl_gc_init(void)
 #endif
     jl_gc_mark_sp_t sp = {NULL, NULL, NULL, NULL};
     gc_mark_loop(NULL, sp);
+    t_start = jl_hrtime();
 }
 
 // callback for passing OOM errors from gmp
@@ -3298,46 +3456,63 @@ JL_DLLEXPORT void jl_throw_out_of_memory_error(void)
 
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls && ptls->world_age) {
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        ptls->gc_num.allocd += sz;
-        ptls->gc_num.malloc++;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
     }
     return malloc(sz);
 }
 
 JL_DLLEXPORT void *jl_gc_counted_calloc(size_t nm, size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls && ptls->world_age) {
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
-        ptls->gc_num.allocd += nm*sz;
-        ptls->gc_num.malloc++;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + nm*sz);
+        jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+            jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
     }
     return calloc(nm, sz);
 }
 
 JL_DLLEXPORT void jl_gc_counted_free_with_size(void *p, size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
     free(p);
-    if (ptls && ptls->world_age) {
-        ptls->gc_num.freed += sz;
-        ptls->gc_num.freecall++;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
+        jl_atomic_store_relaxed(&ptls->gc_num.freed,
+            jl_atomic_load_relaxed(&ptls->gc_num.freed) + sz);
+        jl_atomic_store_relaxed(&ptls->gc_num.freecall,
+            jl_atomic_load_relaxed(&ptls->gc_num.freecall) + 1);
     }
 }
 
 JL_DLLEXPORT void *jl_gc_counted_realloc_with_old_size(void *p, size_t old, size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls && ptls->world_age) {
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    jl_task_t *ct = jl_current_task;
+    if (pgcstack && ct->world_age) {
+        jl_ptls_t ptls = ct->ptls;
         maybe_collect(ptls);
         if (sz < old)
-            ptls->gc_num.freed += (old - sz);
+            jl_atomic_store_relaxed(&ptls->gc_num.freed,
+                jl_atomic_load_relaxed(&ptls->gc_num.freed) + (old - sz));
         else
-            ptls->gc_num.allocd += (sz - old);
-        ptls->gc_num.realloc++;
+            jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+                jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (sz - old));
+        jl_atomic_store_relaxed(&ptls->gc_num.realloc,
+            jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
     }
     return realloc(p, sz);
 }
@@ -3354,8 +3529,8 @@ JL_DLLEXPORT void *jl_malloc(size_t sz)
     return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
 }
 
-JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
-{
+//_unchecked_calloc does not check for potential overflow of nm*sz
+STATIC_INLINE void *_unchecked_calloc(size_t nm, size_t sz) {
     size_t nmsz = nm*sz;
     int64_t *p = (int64_t *)jl_gc_counted_calloc(nmsz + JL_SMALL_BYTE_ALIGNMENT, 1);
     if (p == NULL)
@@ -3364,6 +3539,13 @@ JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
     return (void *)(p + 2); // assumes JL_SMALL_BYTE_ALIGNMENT == 16
 }
 
+JL_DLLEXPORT void *jl_calloc(size_t nm, size_t sz)
+{
+    if (nm > SSIZE_MAX/sz - JL_SMALL_BYTE_ALIGNMENT)
+        return NULL;
+    return _unchecked_calloc(nm, sz);
+}
+
 JL_DLLEXPORT void jl_free(void *p)
 {
     if (p != NULL) {
@@ -3396,13 +3578,15 @@ JL_DLLEXPORT void *jl_realloc(void *p, size_t sz)
 
 JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     maybe_collect(ptls);
     size_t allocsz = LLT_ALIGN(sz, JL_CACHE_BYTE_ALIGNMENT);
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
-    ptls->gc_num.allocd += allocsz;
-    ptls->gc_num.malloc++;
+    jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+        jl_atomic_load_relaxed(&ptls->gc_num.allocd) + allocsz);
+    jl_atomic_store_relaxed(&ptls->gc_num.malloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.malloc) + 1);
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
@@ -3414,6 +3598,8 @@ JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz)
     SetLastError(last_error);
 #endif
     errno = last_errno;
+    // jl_gc_managed_malloc is currently always used for allocating array buffers.
+    maybe_record_alloc_to_profile((jl_value_t*)b, sz, (jl_datatype_t*)jl_buff_tag);
     return b;
 }
 
@@ -3432,10 +3618,13 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
         live_bytes += allocsz - oldsz;
     }
     else if (allocsz < oldsz)
-        ptls->gc_num.freed += (oldsz - allocsz);
+        jl_atomic_store_relaxed(&ptls->gc_num.freed,
+            jl_atomic_load_relaxed(&ptls->gc_num.freed) + (oldsz - allocsz));
     else
-        ptls->gc_num.allocd += (allocsz - oldsz);
-    ptls->gc_num.realloc++;
+        jl_atomic_store_relaxed(&ptls->gc_num.allocd,
+            jl_atomic_load_relaxed(&ptls->gc_num.allocd) + (allocsz - oldsz));
+    jl_atomic_store_relaxed(&ptls->gc_num.realloc,
+        jl_atomic_load_relaxed(&ptls->gc_num.realloc) + 1);
 
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
@@ -3452,14 +3641,14 @@ static void *gc_managed_realloc_(jl_ptls_t ptls, void *d, size_t sz, size_t olds
     SetLastError(last_error);
 #endif
     errno = last_errno;
-
+    maybe_record_alloc_to_profile((jl_value_t*)b, sz, jl_gc_unknown_type_tag);
     return b;
 }
 
 JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
                                          int isaligned, jl_value_t *owner)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return gc_managed_realloc_(ptls, d, sz, oldsz, isaligned, owner, 1);
 }
 
@@ -3484,7 +3673,7 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
     if (allocsz < sz)  // overflow in adding offs, size was "negative"
         jl_throw(jl_memory_exception);
     bigval_t *hdr = bigval_header(v);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     maybe_collect(ptls); // don't want this to happen during jl_gc_managed_realloc
     gc_big_object_unlink(hdr);
     // TODO: this is not safe since it frees the old pointer. ideally we'd like
@@ -3505,7 +3694,7 @@ jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz)
 #define GC_PERM_POOL_SIZE (2 * 1024 * 1024)
 // 20k limit for pool allocation. At most 1% fragmentation
 #define GC_PERM_POOL_LIMIT (20 * 1024)
-jl_mutex_t gc_perm_lock = {0, 0};
+uv_mutex_t gc_perm_lock;
 static uintptr_t gc_perm_pool = 0;
 static uintptr_t gc_perm_end = 0;
 
@@ -3582,57 +3771,56 @@ void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, unsigned offset)
     if (__unlikely(sz > GC_PERM_POOL_LIMIT))
 #endif
         return gc_perm_alloc_large(sz, zero, align, offset);
-    JL_LOCK_NOGC(&gc_perm_lock);
+    uv_mutex_lock(&gc_perm_lock);
     void *p = jl_gc_perm_alloc_nolock(sz, zero, align, offset);
-    JL_UNLOCK_NOGC(&gc_perm_lock);
+    uv_mutex_unlock(&gc_perm_lock);
     return p;
 }
 
 JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     jl_gc_add_finalizer_th(ptls, v, f);
 }
 
 JL_DLLEXPORT void jl_finalize(jl_value_t *o)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_finalize_th(ptls, o);
+    jl_finalize_th(jl_current_task, o);
 }
 
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_new_weakref_th(ptls, value);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_allocobj(size_t sz)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sz, NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, 0, NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_1w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sizeof(void*), NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_2w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sizeof(void*) * 2, NULL);
 }
 
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_3w(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     return jl_gc_alloc(ptls, sizeof(void*) * 3, NULL);
 }
 
@@ -3651,8 +3839,8 @@ JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void)
         }
         return result;
     } else {
-        int result = support_conservative_marking;
-        support_conservative_marking = 1;
+        int result = jl_atomic_load(&support_conservative_marking);
+        jl_atomic_store(&support_conservative_marking, 1);
         return result;
     }
 }
diff --git a/src/gc.h b/src/gc.h
index 06faa64a8b07f3..858cafa6cec07f 100644
--- a/src/gc.h
+++ b/src/gc.h
@@ -11,9 +11,7 @@
 
 #include <stdlib.h>
 #include <string.h>
-#ifndef _MSC_VER
 #include <strings.h>
-#endif
 #include <inttypes.h>
 #include "julia.h"
 #include "julia_threads.h"
@@ -26,6 +24,7 @@
 #endif
 #endif
 #include "julia_assert.h"
+#include "gc-alloc-profiler.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -73,6 +72,8 @@ typedef struct {
     size_t      interval;
     int         pause;
     int         full_sweep;
+    uint64_t    max_pause;
+    uint64_t    max_memory;
 } jl_gc_num_t;
 
 enum {
@@ -371,18 +372,12 @@ typedef struct {
     int ub;
 } pagetable_t;
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT;
 #else
 STATIC_INLINE unsigned ffs_u32(uint32_t bitvec)
 {
-#if defined(_COMPILER_MICROSOFT_)
-    unsigned long j;
-    _BitScanForward(&j, bitvec);
-    return j;
-#else
     return __builtin_ffs(bitvec) - 1;
-#endif
 }
 #endif
 
@@ -517,7 +512,7 @@ void gc_mark_queue_finlist(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp,
                            arraylist_t *list, size_t start);
 void gc_mark_loop(jl_ptls_t ptls, jl_gc_mark_sp_t sp);
 void sweep_stack_pools(void);
-void gc_debug_init(void);
+void jl_gc_debug_init(void);
 
 extern void *gc_mark_label_addrs[_GC_MARK_L_MAX];
 
@@ -564,6 +559,9 @@ void gc_time_mark_pause(int64_t t0, int64_t scanned_bytes,
 void gc_time_sweep_pause(uint64_t gc_end_t, int64_t actual_allocd,
                          int64_t live_bytes, int64_t estimate_freed,
                          int sweep_full);
+void gc_time_summary(int sweep_full, uint64_t start, uint64_t end,
+                     uint64_t freed, uint64_t live, uint64_t interval,
+                     uint64_t pause);
 #else
 #define gc_time_pool_start()
 STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
@@ -589,6 +587,8 @@ STATIC_INLINE void gc_time_count_mallocd_array(int bits) JL_NOTSAFEPOINT
 #define gc_time_mark_pause(t0, scanned_bytes, perm_scanned_bytes)
 #define gc_time_sweep_pause(gc_end_t, actual_allocd, live_bytes,        \
                             estimate_freed, sweep_full)
+#define  gc_time_summary(sweep_full, start, end, freed, live,           \
+                            interval, pause)
 #endif
 
 #ifdef MEMFENCE
@@ -646,14 +646,14 @@ NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_off
 #ifdef GC_DEBUG_ENV
 JL_DLLEXPORT extern jl_gc_debug_env_t jl_gc_debug_env;
 #define gc_sweep_always_full jl_gc_debug_env.always_full
-int gc_debug_check_other(void);
+int jl_gc_debug_check_other(void);
 int gc_debug_check_pool(void);
-void gc_debug_print(void);
+void jl_gc_debug_print(void);
 void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT;
 void gc_scrub(void);
 #else
 #define gc_sweep_always_full 0
-static inline int gc_debug_check_other(void)
+static inline int jl_gc_debug_check_other(void)
 {
     return 0;
 }
@@ -661,7 +661,7 @@ static inline int gc_debug_check_pool(void)
 {
     return 0;
 }
-static inline void gc_debug_print(void)
+static inline void jl_gc_debug_print(void)
 {
 }
 static inline void gc_scrub_record_task(jl_task_t *ta) JL_NOTSAFEPOINT
@@ -704,6 +704,9 @@ void gc_count_pool(void);
 
 size_t jl_array_nbytes(jl_array_t *a) JL_NOTSAFEPOINT;
 
+JL_DLLEXPORT void jl_enable_gc_logging(int enable);
+void _report_gc_finished(uint64_t pause, uint64_t freed, int full, int recollect) JL_NOTSAFEPOINT;
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/gen_sysimg_symtab.jl b/src/gen_sysimg_symtab.jl
index 2d389a7209b339..8f03cc15607679 100644
--- a/src/gen_sysimg_symtab.jl
+++ b/src/gen_sysimg_symtab.jl
@@ -69,5 +69,5 @@ function outputline(io, name)
     println(io, "jl_symbol(\"", name, "\"),")
 end
 
-open(f->foreach(l->outputline(f,l), take(syms, 106)), "common_symbols1.inc", "w")
-open(f->foreach(l->outputline(f,l), take(drop(syms, 106), 254)), "common_symbols2.inc", "w")
+open(f->foreach(l->outputline(f,l), take(syms, 100)), "common_symbols1.inc", "w")
+open(f->foreach(l->outputline(f,l), take(drop(syms, 100), 254)), "common_symbols2.inc", "w")
diff --git a/src/getopt.c b/src/getopt.c
deleted file mode 100644
index 1170ecb5c9a0af..00000000000000
--- a/src/getopt.c
+++ /dev/null
@@ -1,147 +0,0 @@
-/* This file is adapted from musl-libc
-----------------------------------------------------------------------
-Copyright © 2005-2014 Rich Felker, et al.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----------------------------------------------------------------------
-*/
-
-#include <wchar.h>
-#include <string.h>
-#include <limits.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <stddef.h>
-#include "getopt.h"
-
-char *optarg;
-int optind=1, opterr=1, optopt, __optpos, __optreset=0;
-
-#define optpos __optpos
-
-int getopt(int argc, char * const argv[], const char *optstring)
-{
-  int i;
-  wchar_t c, d;
-  int k, l;
-  char *optchar;
-
-  if (!optind || __optreset) {
-    __optreset = 0;
-    __optpos = 0;
-    optind = 1;
-  }
-
-  if (optind >= argc || !argv[optind] || argv[optind][0] != '-' || !argv[optind][1])
-    return -1;
-  if (argv[optind][1] == '-' && !argv[optind][2])
-    return optind++, -1;
-
-  if (!optpos) optpos++;
-  if ((k = mbtowc(&c, argv[optind]+optpos, MB_LEN_MAX)) < 0) {
-    k = 1;
-    c = 0xfffd; /* replacement char */
-  }
-  optchar = argv[optind]+optpos;
-  optopt = c;
-  optpos += k;
-
-  if (!argv[optind][optpos]) {
-    optind++;
-    optpos = 0;
-  }
-
-  for (i=0; (l = mbtowc(&d, optstring+i, MB_LEN_MAX)) && d!=c; i+=l>0?l:1);
-
-  if (d != c) {
-    if (optstring[0] != ':' && opterr) {
-      fprintf(stderr, "%s: illegal option: %c\n", argv[0], optchar);
-    }
-    return '?';
-  }
-  if (optstring[i+1] == ':') {
-    if (optind >= argc) {
-      if (optstring[0] == ':') return ':';
-      if (opterr) {
-        fprintf(stderr, "%s: option requires an argument: %c\n", argv[0], optchar);
-      }
-      return '?';
-    }
-    if (optstring[i+2] == ':') optarg = 0;
-    if (optstring[i+2] != ':' || optpos) {
-      optarg = argv[optind++] + optpos;
-      optpos = 0;
-    }
-  }
-  return c;
-}
-
-static int __getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx, int longonly)
-{
-  if (!optind || __optreset) {
-    __optreset = 0;
-    __optpos = 0;
-    optind = 1;
-  }
-  if (optind >= argc || !argv[optind] || argv[optind][0] != '-') return -1;
-  if ((longonly && argv[optind][1]) ||
-    (argv[optind][1] == '-' && argv[optind][2]))
-  {
-    int i;
-    for (i=0; longopts[i].name; i++) {
-      const char *name = longopts[i].name;
-      char *opt = argv[optind]+1;
-      if (*opt == '-') opt++;
-      for (; *name && *name == *opt; name++, opt++);
-      if (*name || (*opt && *opt != '=')) continue;
-      if (*opt == '=') {
-        if (!longopts[i].has_arg) continue;
-        optarg = opt+1;
-      } else {
-        if (longopts[i].has_arg == required_argument) {
-          if (!(optarg = argv[++optind]))
-            return ':';
-        } else optarg = NULL;
-      }
-      optind++;
-      if (idx) *idx = i;
-      if (longopts[i].flag) {
-        *longopts[i].flag = longopts[i].val;
-        return 0;
-      }
-      return longopts[i].val;
-    }
-    if (argv[optind][1] == '-') {
-      optind++;
-      return '?';
-    }
-  }
-  return getopt(argc, argv, optstring);
-}
-
-int getopt_long(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx)
-{
-  return __getopt_long(argc, argv, optstring, longopts, idx, 0);
-}
-
-int getopt_long_only(int argc, char *const *argv, const char *optstring, const struct option *longopts, int *idx)
-{
-  return __getopt_long(argc, argv, optstring, longopts, idx, 1);
-}
diff --git a/src/getopt.h b/src/getopt.h
deleted file mode 100644
index 7bcf5fb94d40e7..00000000000000
--- a/src/getopt.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* This file is adapted from musl-libc
-----------------------------------------------------------------------
-Copyright © 2005-2014 Rich Felker, et al.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-"Software"), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-----------------------------------------------------------------------
-*/
-
-#ifndef _GETOPT_H
-#define _GETOPT_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-int getopt(int, char * const [], const char *);
-extern char *optarg;
-extern int optind, opterr, optopt;
-
-struct option
-{
-  const char *name;
-  int has_arg;
-  int *flag;
-  int val;
-};
-
-int getopt_long(int, char *const *, const char *, const struct option *, int *);
-int getopt_long_only(int, char *const *, const char *, const struct option *, int *);
-
-#define no_argument        0
-#define required_argument  1
-#define optional_argument  2
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/gf.c b/src/gf.c
index 322222798e8dec..3fc75f862500a1 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -24,23 +24,23 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT size_t jl_world_counter = 1; // TODO: should this be atomic release/consume?
+JL_DLLEXPORT _Atomic(size_t) jl_world_counter = 1; // uses atomic acquire/release
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT
 {
-    return jl_world_counter;
+    return jl_atomic_load_acquire(&jl_world_counter);
 }
 
 JL_DLLEXPORT size_t jl_get_tls_world_age(void) JL_NOTSAFEPOINT
 {
-    return jl_get_ptls_states()->world_age;
+    return jl_current_task->world_age;
 }
 
 /// ----- Handling for Julia callbacks ----- ///
 
 JL_DLLEXPORT int8_t jl_is_in_pure_context(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return ptls->in_pure_callback;
+    jl_task_t *ct = jl_current_task;
+    return ct->ptls->in_pure_callback;
 }
 
 tracer_cb jl_newmeth_tracer = NULL;
@@ -51,31 +51,24 @@ JL_DLLEXPORT void jl_register_newmeth_tracer(void (*callback)(jl_method_t *trace
 
 void jl_call_tracer(tracer_cb callback, jl_value_t *tracee)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    int last_in = ptls->in_pure_callback;
+    jl_task_t *ct = jl_current_task;
+    int last_in = ct->ptls->in_pure_callback;
     JL_TRY {
-        ptls->in_pure_callback = 1;
+        ct->ptls->in_pure_callback = 1;
         callback(tracee);
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
     }
     JL_CATCH {
-        ptls->in_pure_callback = last_in;
-        jl_printf(JL_STDERR, "WARNING: tracer callback function threw an error:\n");
-        jl_static_show(JL_STDERR, jl_current_exception());
-        jl_printf(JL_STDERR, "\n");
-        jlbacktrace();
+        ct->ptls->in_pure_callback = last_in;
+        jl_printf((JL_STREAM*)STDERR_FILENO, "WARNING: tracer callback function threw an error:\n");
+        jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        jlbacktrace(); // written to STDERR_FILENO
     }
 }
 
 /// ----- Definitions for various internal TypeMaps ----- ///
 
-static const struct jl_typemap_info method_defs = {
-    1, &jl_method_type
-};
-static const struct jl_typemap_info lambda_cache = {
-    0, &jl_method_instance_type
-};
-
 static int8_t jl_cachearg_offset(jl_methtable_t *mt)
 {
     return mt->offs;
@@ -106,12 +99,16 @@ static int speccache_eq(size_t idx, const void *ty, jl_svec_t *data, uint_t hv)
 }
 
 // get or create the MethodInstance for a specialization
-JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams)
+static jl_method_instance_t *jl_specializations_get_linfo_(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams, jl_method_instance_t *mi_insert)
 {
-    uint_t hv = ((jl_datatype_t*)(jl_is_unionall(type) ? jl_unwrap_unionall(type) : type))->hash;
+    if (m->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&m->unspecialized) != NULL)
+        return jl_atomic_load_relaxed(&m->unspecialized); // handle builtin methods
+    jl_value_t *ut = jl_is_unionall(type) ? jl_unwrap_unionall(type) : type;
+    JL_TYPECHK(specializations, datatype, ut);
+    uint_t hv = ((jl_datatype_t*)ut)->hash;
     for (int locked = 0; ; locked++) {
         jl_array_t *speckeyset = jl_atomic_load_acquire(&m->speckeyset);
-        jl_svec_t *specializations = jl_atomic_load_acquire(&m->specializations);
+        jl_svec_t *specializations = jl_atomic_load_relaxed(&m->specializations);
         size_t i, cl = jl_svec_len(specializations);
         if (hv) {
             ssize_t idx = jl_smallintset_lookup(speckeyset, speccache_eq, type, specializations, hv);
@@ -123,11 +120,11 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
             }
         }
         else {
-            jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
+            _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
             JL_GC_PUSH1(&specializations); // clang-sa doesn't realize this loop uses specializations
             for (i = cl; i > 0; i--) {
                 jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i - 1]);
-                if (mi == NULL)
+                if ((jl_value_t*)mi == jl_nothing)
                     break;
                 if (jl_types_equal(mi->specTypes, type)) {
                     if (locked)
@@ -145,22 +142,23 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
         }
         else {
             if (hv) {
-                jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
+                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
                 for (i = 0; i < cl; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
-                    if (mi == NULL)
+                    if ((jl_value_t*)mi == jl_nothing)
                         break;
                     assert(!jl_types_equal(mi->specTypes, type));
                 }
             }
-            jl_method_instance_t *mi = jl_get_specialized(m, type, sparams);
+            jl_method_instance_t *mi = mi_insert ? mi_insert : jl_get_specialized(m, type, sparams);
             JL_GC_PUSH1(&mi);
-            if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != NULL) : (i <= 1 || jl_svecref(specializations, i - 2) != NULL)) {
+            if (hv ? (i + 1 >= cl || jl_svecref(specializations, i + 1) != jl_nothing) : (i <= 1 || jl_svecref(specializations, i - 2) != jl_nothing)) {
                 size_t ncl = cl < 8 ? 8 : (cl*3)>>1;
                 jl_svec_t *nc = jl_alloc_svec_uninit(ncl);
                 if (i > 0)
                     memcpy((char*)jl_svec_data(nc), jl_svec_data(specializations), sizeof(void*) * i);
-                memset((char*)jl_svec_data(nc) + sizeof(void*) * i, 0, sizeof(void*) * (ncl - cl));
+                for (int j = 0; j < ncl - cl; j++)
+                    jl_svecset(nc, j+i, jl_nothing);
                 if (i < cl)
                     memcpy((char*)jl_svec_data(nc) + sizeof(void*) * (i + ncl - cl),
                            (char*)jl_svec_data(specializations) + sizeof(void*) * i,
@@ -173,7 +171,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
             }
             if (!hv)
                 i -= 1;
-            assert(jl_svecref(specializations, i) == NULL);
+            assert(jl_svecref(specializations, i) == jl_nothing);
             jl_svecset(specializations, i, mi); // jl_atomic_store_release?
             if (hv) {
                 // TODO: fuse lookup and insert steps?
@@ -186,6 +184,19 @@ JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m J
     }
 }
 
+JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams)
+{
+    return jl_specializations_get_linfo_(m, type, sparams, NULL);
+}
+
+jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi)
+{
+    jl_method_t *m = mi->def.method;
+    jl_value_t *type = mi->specTypes;
+    jl_svec_t *sparams = mi->sparam_vals;
+    return jl_specializations_get_linfo_(m, type, sparams, mi);
+}
+
 JL_DLLEXPORT jl_value_t *jl_specializations_lookup(jl_method_t *m, jl_value_t *type)
 {
     jl_value_t *mi = (jl_value_t*)jl_specializations_get_linfo(m, type, NULL);
@@ -197,7 +208,7 @@ JL_DLLEXPORT jl_value_t *jl_specializations_lookup(jl_method_t *m, jl_value_t *t
 JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world)
 {
     struct jl_typemap_assoc search = {type, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0);
+    jl_typemap_entry_t *sf = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
     if (!sf)
         return jl_nothing;
     return sf->func.value;
@@ -205,11 +216,12 @@ JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *typ
 
 // ----- MethodInstance specialization instantiation ----- //
 
-JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
 JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world);
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
+        uint8_t relocatability);
 JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
                                      jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
 
@@ -229,24 +241,31 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
     m->nargs = 2;
     m->sig = (jl_value_t*)jl_anytuple_type;
     m->slot_syms = jl_an_empty_string;
+    m->nospecialize = 0;
+    m->nospecialize = ~m->nospecialize;
 
+    jl_methtable_t *mt = dt->name->mt;
     jl_typemap_entry_t *newentry = NULL;
     JL_GC_PUSH2(&m, &newentry);
+
+    newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
+            (jl_value_t*)m, 1, ~(size_t)0);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
+
     jl_method_instance_t *mi = jl_get_specialized(m, (jl_value_t*)jl_anytuple_type, jl_emptysvec);
-    m->unspecialized = mi;
+    jl_atomic_store_relaxed(&m->unspecialized, mi);
     jl_gc_wb(m, mi);
 
     jl_code_instance_t *codeinst = jl_new_codeinst(mi,
         (jl_value_t*)jl_any_type, jl_nothing, jl_nothing,
-        0, 1, ~(size_t)0);
+        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
     jl_mi_cache_insert(mi, codeinst);
-    codeinst->specptr.fptr1 = fptr;
-    codeinst->invoke = jl_fptr_args;
+    jl_atomic_store_relaxed(&codeinst->specptr.fptr1, fptr);
+    jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_args);
 
-    jl_methtable_t *mt = dt->name->mt;
     newentry = jl_typemap_alloc(jl_anytuple_type, NULL, jl_emptysvec,
             (jl_value_t*)mi, 1, ~(size_t)0);
-    jl_typemap_insert(&mt->cache, (jl_value_t*)mt, newentry, 0, &lambda_cache);
+    jl_typemap_insert(&mt->cache, (jl_value_t*)mt, newentry, 0);
 
     mt->frozen = 1;
     JL_GC_POP();
@@ -262,7 +281,7 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
     JL_TIMING(INFERENCE);
     if (jl_typeinf_func == NULL)
         return NULL;
-    if (jl_is_method(mi->def.method) && mi->def.method->unspecialized == mi)
+    if (jl_is_method(mi->def.method) && jl_atomic_load_relaxed(&mi->def.method->unspecialized) == mi)
         return NULL; // avoid inferring the unspecialized method
     static int in_inference;
     if (in_inference > 2)
@@ -272,8 +291,6 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
 #ifdef ENABLE_INFERENCE
     if (mi->inInference && !force)
         return NULL;
-    if (jl_is_method(mi->def.method) && mi->def.method->unspecialized == mi)
-        return NULL; // be careful never to infer the unspecialized method, this would not be valid
 
     jl_value_t **fargs;
     JL_GC_PUSHARGS(fargs, 3);
@@ -287,26 +304,35 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
         jl_printf(JL_STDERR, "\n");
     }
 #endif
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    size_t last_age = ptls->world_age;
-    ptls->world_age = jl_typeinf_world;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
     mi->inInference = 1;
     in_inference++;
     JL_TRY {
         src = (jl_code_info_t*)jl_apply(fargs, 3);
     }
     JL_CATCH {
-        jl_printf(JL_STDERR, "Internal error: encountered unexpected error in runtime:\n");
-        jl_static_show(JL_STDERR, jl_current_exception());
-        jl_printf(JL_STDERR, "\n");
-        jlbacktrace(); // written to STDERR_FILENO
+        jl_value_t *e = jl_current_exception();
+        if (e == jl_stackovf_exception) {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: stack overflow in type inference of ");
+            jl_static_show_func_sig((JL_STREAM*)STDERR_FILENO, (jl_value_t*)mi->specTypes);
+            jl_printf((JL_STREAM*)STDERR_FILENO, ".\n");
+            jl_printf((JL_STREAM*)STDERR_FILENO, "This might be caused by recursion over very long tuples or argument lists.\n");
+        }
+        else {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "Internal error: encountered unexpected error in runtime:\n");
+            jl_static_show((JL_STREAM*)STDERR_FILENO, e);
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+            jlbacktrace(); // written to STDERR_FILENO
+        }
         src = NULL;
     }
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     in_inference--;
     mi->inInference = 0;
 #ifdef _OS_WINDOWS_
@@ -324,11 +350,11 @@ jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
 
 JL_DLLEXPORT jl_value_t *jl_call_in_typeinf_world(jl_value_t **args, int nargs)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    size_t last_age = ptls->world_age;
-    ptls->world_age = jl_typeinf_world;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_typeinf_world;
     jl_value_t *ret = jl_apply(args, nargs);
-    ptls->world_age = last_age;
+    ct->world_age = last_age;
     return ret;
 }
 
@@ -362,7 +388,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
     }
     codeinst = jl_new_codeinst(
         mi, rettype, NULL, NULL,
-        0, min_world, max_world);
+        0, min_world, max_world, 0, 0, jl_nothing, 0);
     jl_mi_cache_insert(mi, codeinst);
     return codeinst;
 }
@@ -370,12 +396,14 @@ JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
 JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
         jl_method_instance_t *mi, jl_value_t *rettype,
         jl_value_t *inferred_const, jl_value_t *inferred,
-        int32_t const_flags, size_t min_world, size_t max_world
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
+        uint8_t relocatability
         /*, jl_array_t *edges, int absolute_max*/)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(min_world <= max_world && "attempting to set invalid world constraints");
-    jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ptls, sizeof(jl_code_instance_t),
+    jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t),
             jl_code_instance_type);
     codeinst->def = mi;
     codeinst->min_world = min_world;
@@ -386,15 +414,19 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
     if ((const_flags & 2) == 0)
         inferred_const = NULL;
     codeinst->rettype_const = inferred_const;
-    codeinst->invoke = NULL;
-    codeinst->specptr.fptr = NULL;
+    jl_atomic_store_relaxed(&codeinst->invoke, NULL);
+    jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
     if ((const_flags & 1) != 0) {
         assert(const_flags & 2);
-        codeinst->invoke = jl_fptr_const_return;
+        jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
     }
     codeinst->isspecsig = 0;
-    codeinst->precompile = 0;
-    codeinst->next = NULL;
+    jl_atomic_store_relaxed(&codeinst->precompile, 0);
+    jl_atomic_store_relaxed(&codeinst->next, NULL);
+    codeinst->ipo_purity_bits = ipo_effects;
+    codeinst->purity_bits = effects;
+    codeinst->argescapes = argescapes;
+    codeinst->relocatability = relocatability;
     return codeinst;
 }
 
@@ -404,7 +436,9 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
     JL_GC_PUSH1(&ci);
     if (jl_is_method(mi->def.method))
         JL_LOCK(&mi->def.method->writelock);
-    ci->next = mi->cache;
+    jl_code_instance_t *oldci = jl_atomic_load_relaxed(&mi->cache);
+    jl_atomic_store_relaxed(&ci->next, oldci);
+    jl_gc_wb(ci, oldci); // likely older, but just being careful
     jl_atomic_store_release(&mi->cache, ci);
     jl_gc_wb(mi, ci);
     if (jl_is_method(mi->def.method))
@@ -415,97 +449,114 @@ JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMEN
 
 static int get_method_unspec_list(jl_typemap_entry_t *def, void *closure)
 {
-    jl_svec_t *specializations = def->func.method->specializations;
+    jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
     size_t i, l = jl_svec_len(specializations);
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-        if (mi) {
+        if ((jl_value_t*)mi != jl_nothing) {
             assert(jl_is_method_instance(mi));
-            if (jl_rettype_inferred(mi, jl_world_counter, jl_world_counter) == jl_nothing)
+            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
                 jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
         }
     }
     return 1;
 }
 
-static void foreach_mtable_in_module(
+static int foreach_mtable_in_module(
         jl_module_t *m,
-        void (*visit)(jl_methtable_t *mt, void *env),
-        void *env,
-        jl_array_t **visited)
+        int (*visit)(jl_methtable_t *mt, void *env),
+        void *env)
 {
     size_t i;
     void **table = m->bindings.table;
-    *visited = jl_eqtable_put(*visited, (jl_value_t*)m, jl_true, NULL);
     for (i = 1; i < m->bindings.size; i += 2) {
         if (table[i] != HT_NOTFOUND) {
             jl_binding_t *b = (jl_binding_t*)table[i];
-            if (b->owner == m && b->value && b->constp) {
-                jl_value_t *v = jl_unwrap_unionall(b->value);
-                if (jl_is_datatype(v)) {
-                    jl_typename_t *tn = ((jl_datatype_t*)v)->name;
-                    if (tn->module == m && tn->name == b->name) {
-                        jl_methtable_t *mt = tn->mt;
-                        if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt) {
-                            visit(mt, env);
+            JL_GC_PROMISE_ROOTED(b);
+            if (b->owner == m && b->constp) {
+                jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+                if (v) {
+                    jl_value_t *uw = jl_unwrap_unionall(v);
+                    if (jl_is_datatype(uw)) {
+                        jl_typename_t *tn = ((jl_datatype_t*)uw)->name;
+                        if (tn->module == m && tn->name == b->name && tn->wrapper == v) {
+                            // this is the original/primary binding for the type (name/wrapper)
+                            jl_methtable_t *mt = tn->mt;
+                            if (mt != NULL && (jl_value_t*)mt != jl_nothing && mt != jl_type_type_mt && mt != jl_nonfunction_mt) {
+                                if (!visit(mt, env))
+                                    return 0;
+                            }
                         }
                     }
-                }
-                else if (jl_is_module(v)) {
-                    jl_module_t *child = (jl_module_t*)v;
-                    if (child != m && child->parent == m && child->name == b->name &&
-                        !jl_eqtable_get(*visited, v, NULL)) {
-                        // this is the original/primary binding for the submodule
-                        foreach_mtable_in_module(child, visit, env, visited);
+                    else if (jl_is_module(v)) {
+                        jl_module_t *child = (jl_module_t*)v;
+                        if (child != m && child->parent == m && child->name == b->name) {
+                            // this is the original/primary binding for the submodule
+                            if (!foreach_mtable_in_module(child, visit, env))
+                                return 0;
+                        }
                     }
                 }
             }
         }
     }
+    return 1;
 }
 
-void jl_foreach_reachable_mtable(void (*visit)(jl_methtable_t *mt, void *env), void *env)
+int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env)
 {
-    jl_array_t *visited = jl_alloc_vec_any(16);
-    jl_array_t *mod_array = NULL;
-    JL_GC_PUSH2(&visited, &mod_array);
-    mod_array = jl_get_loaded_modules();
-    visit(jl_type_type_mt, env);
+    if (!visit(jl_type_type_mt, env))
+        return 0;
+    if (!visit(jl_nonfunction_mt, env))
+        return 0;
+    jl_array_t *mod_array = jl_get_loaded_modules();
     if (mod_array) {
+        JL_GC_PUSH1(&mod_array);
         int i;
         for (i = 0; i < jl_array_len(mod_array); i++) {
             jl_module_t *m = (jl_module_t*)jl_array_ptr_ref(mod_array, i);
             assert(jl_is_module(m));
-            if (!jl_eqtable_get(visited, (jl_value_t*)m, NULL))
-                foreach_mtable_in_module(m, visit, env, &visited);
+            if (m->parent == m) // some toplevel modules (really just Base) aren't actually
+                if (!foreach_mtable_in_module(m, visit, env)) {
+                    JL_GC_POP();
+                    return 0;
+                }
         }
+        JL_GC_POP();
     }
     else {
-        foreach_mtable_in_module(jl_main_module, visit, env, &visited);
+        if (!foreach_mtable_in_module(jl_main_module, visit, env))
+            return 0;
+        if (!foreach_mtable_in_module(jl_core_module, visit, env))
+            return 0;
     }
-    JL_GC_POP();
+    return 1;
 }
 
-static void reset_mt_caches(jl_methtable_t *mt, void *env)
+static int reset_mt_caches(jl_methtable_t *mt, void *env)
 {
     // removes all method caches
-    if (mt->defs != jl_nothing) { // make sure not to reset builtin functions
-        mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
-        mt->cache = jl_nothing;
+    // this might not be entirely safe (GC or MT), thus we only do it very early in bootstrapping
+    if (!mt->frozen) { // make sure not to reset builtin functions
+        jl_atomic_store_release(&mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+        jl_atomic_store_release(&mt->cache, jl_nothing);
     }
-    jl_typemap_visitor(mt->defs, get_method_unspec_list, env);
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), get_method_unspec_list, env);
+    return 1;
 }
 
 
 jl_function_t *jl_typeinf_func = NULL;
-size_t jl_typeinf_world = 0;
+JL_DLLEXPORT size_t jl_typeinf_world = 1;
 
 JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
 {
+    size_t newfunc = jl_typeinf_world == 1 && jl_typeinf_func == NULL;
     jl_typeinf_func = (jl_function_t*)f;
     jl_typeinf_world = jl_get_tls_world_age();
-    ++jl_world_counter; // make type-inference the only thing in this world
-    if (jl_typeinf_world == 0) {
+    int world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1; // make type-inference the only thing in this world
+    if (newfunc) {
         // give type inference a chance to see all of these
         // TODO: also reinfer if max_world != ~(size_t)0
         jl_array_t *unspec = jl_alloc_vec_any(0);
@@ -514,8 +565,8 @@ JL_DLLEXPORT void jl_set_typeinf_func(jl_value_t *f)
         size_t i, l;
         for (i = 0, l = jl_array_len(unspec); i < l; i++) {
             jl_method_instance_t *mi = (jl_method_instance_t*)jl_array_ptr_ref(unspec, i);
-            if (jl_rettype_inferred(mi, jl_world_counter, jl_world_counter) == jl_nothing)
-                jl_type_infer(mi, jl_world_counter, 1);
+            if (jl_rettype_inferred(mi, world, world) == jl_nothing)
+                jl_type_infer(mi, world, 1);
         }
         JL_GC_POP();
     }
@@ -534,7 +585,7 @@ jl_value_t *jl_nth_slot_type(jl_value_t *sig, size_t i) JL_NOTSAFEPOINT
         return NULL;
     if (i < len-1)
         return jl_tparam(sig, i);
-    if (jl_is_vararg_type(jl_tparam(sig, len-1)))
+    if (jl_is_vararg(jl_tparam(sig, len-1)))
         return jl_unwrap_vararg(jl_tparam(sig, len-1));
     if (i == len-1)
         return jl_tparam(sig, i);
@@ -559,10 +610,10 @@ jl_value_t *jl_nth_slot_type(jl_value_t *sig, size_t i) JL_NOTSAFEPOINT
 //    return 1;
 //}
 
-static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
+static jl_value_t *ml_matches(jl_methtable_t *mt,
                               jl_tupletype_t *type, int lim, int include_ambiguous,
                               int intersections, size_t world, int cache_result,
-                              size_t *min_valid, size_t *max_valid, int *has_ambiguity);
+                              size_t *min_valid, size_t *max_valid, int *ambig);
 
 // get the compilation signature specialization for this method
 static void jl_compilation_sig(
@@ -578,6 +629,10 @@ static void jl_compilation_sig(
         // so assume the caller was intelligent about calling us
         return;
     }
+    if (definition->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&definition->unspecialized)) {
+        *newparams = jl_anytuple_type->parameters; // handle builtin methods
+        return;
+    }
 
     jl_value_t *decl = definition->sig;
     assert(jl_is_tuple_type(tt));
@@ -637,6 +692,10 @@ static void jl_compilation_sig(
             }
         }
 
+        if (jl_is_vararg(elt)) {
+            continue;
+        }
+
         if (jl_types_equal(elt, (jl_value_t*)jl_type_type)) { // elt == Type{T} where T
             // not triggered for isdispatchtuple(tt), this attempts to handle
             // some cases of adapting a random signature into a compilation signature
@@ -650,7 +709,7 @@ static void jl_compilation_sig(
         else if (jl_is_type_type(elt)) { // elt isa Type{T}
             if (very_general_type(decl_i)) {
                 /*
-                  here's a fairly simple heuristic: if this argument slot's
+                  Here's a fairly simple heuristic: if this argument slot's
                   declared type is general (Type or Any),
                   then don't specialize for every Type that got passed.
 
@@ -665,8 +724,9 @@ static void jl_compilation_sig(
                   x::TypeConstructor matches the first but not the second, while
                   also matching all other TypeConstructors. This means neither
                   Type{TC} nor TypeConstructor is more specific.
+
+                  But don't apply this heuristic if the argument is called (issue #36783).
                 */
-                // don't apply this heuristic if the argument is called (issue #36783)
                 int iscalled = i_arg > 0 && i_arg <= 8 && (definition->called & (1 << (i_arg - 1)));
                 if (!iscalled) {
                     if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
@@ -676,13 +736,13 @@ static void jl_compilation_sig(
             else if (jl_is_type_type(jl_tparam0(elt)) &&
                      // try to give up on specializing type parameters for Type{Type{Type{...}}}
                      (jl_is_type_type(jl_tparam0(jl_tparam0(elt))) || !jl_has_free_typevars(decl_i))) {
-                // TODO: this is probably solidly unsound and would corrupt the cache in many cases
                 /*
                   actual argument was Type{...}, we computed its type as
-                  Type{Type{...}}. we must avoid unbounded nesting here, so
-                  cache the signature as Type{T}, unless something more
-                  specific like Type{Type{Int32}} was actually declared.
-                  this can be determined using a type intersection.
+                  Type{Type{...}}. we like to avoid unbounded nesting here, so
+                  compile (and hopefully cache) the signature as Type{T},
+                  unless something more specific like Type{Type{Int32}} was
+                  actually declared. this can be determined using a type
+                  intersection.
                 */
                 if (!*newparams) *newparams = jl_svec_copy(tt->parameters);
                 if (i < nargs || !definition->isva) {
@@ -742,7 +802,10 @@ static void jl_compilation_sig(
         size_t j = i;
         int all_are_subtypes = 1;
         for (; j < jl_svec_len(*newparams); j++) {
-            if (!jl_subtype(jl_svecref(*newparams, j), lasttype)) {
+            jl_value_t *paramj = jl_svecref(*newparams, j);
+            if (jl_is_vararg(paramj))
+                paramj = jl_unwrap_vararg(paramj);
+            if (!jl_subtype(paramj, lasttype)) {
                 all_are_subtypes = 0;
                 break;
             }
@@ -755,8 +818,8 @@ static void jl_compilation_sig(
         }
         else {
             jl_value_t *unw = jl_unwrap_unionall(decl);
-            jl_value_t *lastdeclt = jl_tparam(unw, nargs - 1);
-            assert(jl_is_vararg_type(lastdeclt) && jl_nparams(unw) == nargs);
+            jl_value_t *lastdeclt = jl_tparam(unw, jl_nparams(unw) - 1);
+            assert(jl_is_vararg(lastdeclt));
             int nsp = jl_svec_len(sparams);
             if (nsp > 0 && jl_has_free_typevars(lastdeclt)) {
                 assert(jl_subtype_env_size(decl) == nsp);
@@ -789,6 +852,8 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
 
     if (!jl_is_datatype(type) || jl_has_free_typevars((jl_value_t*)type))
         return 0;
+    if (definition->sig == (jl_value_t*)jl_anytuple_type && jl_atomic_load_relaxed(&definition->unspecialized))
+        return jl_egal((jl_value_t*)type, definition->sig); // handle builtin methods
 
     size_t i, np = jl_nparams(type);
     size_t nargs = definition->nargs; // == jl_nparams(jl_unwrap_unionall(decl));
@@ -817,7 +882,7 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
                 nspec_max = nspec_min;
         }
         int isbound = (jl_va_tuple_kind((jl_datatype_t*)decl) == JL_VARARG_UNBOUND);
-        if (jl_is_vararg_type(jl_tparam(type, np - 1))) {
+        if (jl_is_vararg(jl_tparam(type, np - 1))) {
             if (!isbound || np < nspec_min || np > nspec_max)
                 return 0;
         }
@@ -826,7 +891,7 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
                 return 0;
         }
     }
-    else if (np != nargs || jl_is_vararg_type(jl_tparam(type, np - 1))) {
+    else if (np != nargs || jl_is_vararg(jl_tparam(type, np - 1))) {
         return 0;
     }
 
@@ -835,7 +900,7 @@ JL_DLLEXPORT int jl_isa_compileable_sig(
         jl_value_t *decl_i = jl_nth_slot_type((jl_value_t*)decl, i);
         size_t i_arg = (i < nargs - 1 ? i : nargs - 1);
 
-        if (jl_is_vararg_type(elt)) {
+        if (jl_is_vararg(elt)) {
             elt = jl_unwrap_vararg(elt);
             if (jl_has_free_typevars(decl_i)) {
                 // TODO: in this case, answer semi-conservatively that these varargs are always compilable
@@ -959,7 +1024,7 @@ static int concretesig_equal(jl_value_t *tt, jl_value_t *simplesig) JL_NOTSAFEPO
     jl_value_t **sigs = jl_svec_data(((jl_datatype_t*)simplesig)->parameters);
     size_t i, lensig = jl_nparams(simplesig);
     assert(lensig == jl_nparams(tt));
-    assert(lensig > 0 && !jl_is_vararg_type(jl_tparam(simplesig, lensig - 1)));
+    assert(lensig > 0 && !jl_is_vararg(jl_tparam(simplesig, lensig - 1)));
     for (i = 0; i < lensig; i++) {
         jl_value_t *decl = sigs[i];
         jl_value_t *a = types[i];
@@ -980,14 +1045,14 @@ static inline jl_typemap_entry_t *lookup_leafcache(jl_array_t *leafcache JL_PROP
                 if (entry->simplesig == (void*)jl_nothing || concretesig_equal(tt, (jl_value_t*)entry->simplesig))
                     return entry;
             }
-            entry = entry->next;
+            entry = jl_atomic_load_relaxed(&entry->next);
         } while ((jl_value_t*)entry != jl_nothing);
     }
     return NULL;
 }
 
 static jl_method_instance_t *cache_method(
-        jl_methtable_t *mt, jl_typemap_t **cache, jl_value_t *parent JL_PROPAGATES_ROOT,
+        jl_methtable_t *mt, _Atomic(jl_typemap_t*) *cache, jl_value_t *parent JL_PROPAGATES_ROOT,
         jl_tupletype_t *tt, // the original tupletype of the signature
         jl_method_t *definition,
         size_t world, size_t min_valid, size_t max_valid,
@@ -1004,7 +1069,7 @@ static jl_method_instance_t *cache_method(
                 return entry->func.linfo;
         }
         struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(*cache, &search, offs, /*subtype*/1);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
         if (entry && entry->func.value)
             return entry->func.linfo;
     }
@@ -1021,12 +1086,12 @@ static jl_method_instance_t *cache_method(
     intptr_t nspec = (mt == NULL || mt == jl_type_type_mt || mt == jl_nonfunction_mt ? definition->nargs + 1 : mt->max_args + 2);
     jl_compilation_sig(tt, sparams, definition, nspec, &newparams);
     if (newparams) {
-        cache_with_orig = 0;
         compilationsig = jl_apply_tuple_type(newparams);
         temp2 = (jl_value_t*)compilationsig;
         // In most cases `!jl_isa_compileable_sig(tt, definition))`,
         // although for some cases, (notably Varargs)
         // we might choose a replacement type that's preferable but not strictly better
+        cache_with_orig = !jl_subtype((jl_value_t*)compilationsig, definition->sig);
     }
     // TODO: maybe assert(jl_isa_compileable_sig(compilationsig, definition));
     newmeth = jl_specializations_get_linfo(definition, (jl_value_t*)compilationsig, sparams);
@@ -1035,11 +1100,9 @@ static jl_method_instance_t *cache_method(
     jl_svec_t* guardsigs = jl_emptysvec;
     if (!cache_with_orig && mt) {
         // now examine what will happen if we chose to use this sig in the cache
-        // TODO: should we first check `compilationsig <: definition`?
         size_t min_valid2 = 1;
         size_t max_valid2 = ~(size_t)0;
-        int ambig = 0;
-        temp = ml_matches(mt, 0, compilationsig, MAX_UNSPECIALIZED_CONFLICTS, 1, 1, world, 0, &min_valid2, &max_valid2, &ambig);
+        temp = ml_matches(mt, compilationsig, MAX_UNSPECIALIZED_CONFLICTS, 1, 1, world, 0, &min_valid2, &max_valid2, NULL);
         int guards = 0;
         if (temp == jl_false) {
             cache_with_orig = 1;
@@ -1052,7 +1115,8 @@ static jl_method_instance_t *cache_method(
                 jl_svec_t *env = matc->sparams;
                 int k, l;
                 for (k = 0, l = jl_svec_len(env); k < l; k++) {
-                    if (jl_is_typevar(jl_svecref(env, k))) {
+                    jl_value_t *env_k = jl_svecref(env, k);
+                    if (jl_is_typevar(env_k) || jl_is_vararg(env_k)) {
                         unmatched_tvars = 1;
                         break;
                     }
@@ -1086,7 +1150,7 @@ static jl_method_instance_t *cache_method(
                     guards++;
                     // alternative approach: insert sentinel entry
                     //jl_typemap_insert(cache, parent, (jl_tupletype_t*)matc->spec_types,
-                    //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(mt), &lambda_cache, other->min_world, other->max_world);
+                    //        NULL, jl_emptysvec, /*guard*/NULL, jl_cachearg_offset(mt), other->min_world, other->max_world);
                 }
             }
         }
@@ -1109,7 +1173,7 @@ static jl_method_instance_t *cache_method(
     newparams = NULL;
     for (i = 0; i < np; i++) {
         jl_value_t *elt = jl_svecref(cachett->parameters, i);
-        if (jl_is_vararg_type(elt)) {
+        if (jl_is_vararg(elt)) {
         }
         else if (jl_is_type_type(elt)) {
             // TODO: if (!jl_is_singleton(elt)) ...
@@ -1131,7 +1195,7 @@ static jl_method_instance_t *cache_method(
     // that satisfies our requirements
     if (cachett != tt) {
         struct jl_typemap_assoc search = {(jl_value_t*)cachett, world, NULL, 0, ~(size_t)0};
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(*cache, &search, offs, /*subtype*/1);
+        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(cache), &search, offs, /*subtype*/1);
         if (entry && jl_egal((jl_value_t*)entry->simplesig, simplett ? (jl_value_t*)simplett : jl_nothing) &&
                 jl_egal((jl_value_t*)guardsigs, (jl_value_t*)entry->guardsigs)) {
             JL_GC_POP();
@@ -1154,41 +1218,45 @@ static jl_method_instance_t *cache_method(
                 jl_cache_type_(tt);
             JL_UNLOCK(&typecache_lock); // Might GC
         }
-        jl_typemap_entry_t *old = (jl_typemap_entry_t*)jl_eqtable_get(mt->leafcache, (jl_value_t*)tt, jl_nothing);
-        newentry->next = old;
+        jl_array_t *oldcache = jl_atomic_load_relaxed(&mt->leafcache);
+        jl_typemap_entry_t *old = (jl_typemap_entry_t*)jl_eqtable_get(oldcache, (jl_value_t*)tt, jl_nothing);
+        jl_atomic_store_relaxed(&newentry->next, old);
         jl_gc_wb(newentry, old);
-        jl_atomic_store_release(&mt->leafcache, jl_eqtable_put(mt->leafcache, (jl_value_t*)tt, (jl_value_t*)newentry, NULL));
-        jl_gc_wb(mt, mt->leafcache);
+        jl_array_t *newcache = (jl_array_t*)jl_eqtable_put(jl_atomic_load_relaxed(&mt->leafcache), (jl_value_t*)tt, (jl_value_t*)newentry, NULL);
+        if (newcache != oldcache) {
+            jl_atomic_store_release(&mt->leafcache, newcache);
+            jl_gc_wb(mt, newcache);
+        }
     }
     else {
-         jl_typemap_insert(cache, parent, newentry, offs, &lambda_cache);
+         jl_typemap_insert(cache, parent, newentry, offs);
     }
 
     JL_GC_POP();
     return newmeth;
 }
 
-static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid);
+static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid);
 
 static jl_method_instance_t *jl_mt_assoc_by_type(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_datatype_t *tt, size_t world)
 {
     // caller must hold the mt->writelock
     assert(tt->isdispatchtuple || tt->hasfreetypevars);
     if (tt->isdispatchtuple) {
-        jl_array_t *leafcache = mt->leafcache;
+        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
         jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)tt, world);
         if (entry)
             return entry->func.linfo;
     }
 
     struct jl_typemap_assoc search = {(jl_value_t*)tt, world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(mt->cache, &search, jl_cachearg_offset(mt), /*subtype*/1);
+    jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
     if (entry)
         return entry->func.linfo;
 
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
-    jl_method_match_t *matc = _gf_invoke_lookup((jl_value_t*)tt, world, &min_valid, &max_valid);
+    jl_method_match_t *matc = _gf_invoke_lookup((jl_value_t*)tt, jl_nothing, world, &min_valid, &max_valid);
     jl_method_instance_t *nf = NULL;
     if (matc) {
         JL_GC_PUSH1(&matc);
@@ -1230,7 +1298,7 @@ static jl_value_t *get_intersect_matches(jl_typemap_t *defs, jl_typemap_entry_t
     jl_value_t *va = NULL;
     if (l > 0) {
         va = jl_tparam(ttypes, l - 1);
-        if (jl_is_vararg_type(va))
+        if (jl_is_vararg(va))
             va = jl_unwrap_vararg(va);
         else
             va = NULL;
@@ -1319,8 +1387,45 @@ JL_DLLEXPORT jl_value_t *jl_debug_method_invalidation(int state)
     return jl_nothing;
 }
 
+// call external callbacks registered with this method_instance
+static void invalidate_external(jl_method_instance_t *mi, size_t max_world) {
+    jl_array_t *callbacks = mi->callbacks;
+    if (callbacks) {
+        // AbstractInterpreter allows for MethodInstances to be present in non-local caches
+        // inform those caches about the invalidation.
+        JL_TRY {
+            size_t i, l = jl_array_len(callbacks);
+            jl_value_t **args;
+            JL_GC_PUSHARGS(args, 3);
+            // these arguments are constant per call
+            args[1] = (jl_value_t*)mi;
+            args[2] = jl_box_uint32(max_world);
+
+            jl_task_t *ct = jl_current_task;
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_get_world_counter();
+
+            jl_value_t **cbs = (jl_value_t**)jl_array_ptr_data(callbacks);
+            for (i = 0; i < l; i++) {
+                args[0] = cbs[i];
+                jl_apply(args, 3);
+            }
+            ct->world_age = last_age;
+            JL_GC_POP();
+        }
+        JL_CATCH {
+            jl_printf((JL_STREAM*)STDERR_FILENO, "error in invalidation callback: ");
+            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+            jlbacktrace(); // written to STDERR_FILENO
+        }
+    }
+}
+
+static void do_nothing_with_codeinst(jl_code_instance_t *ci) {}
+
 // recursively invalidate cached methods that had an edge to a replaced method
-static void invalidate_method_instance(jl_method_instance_t *replaced, size_t max_world, int depth)
+static void invalidate_method_instance(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced, size_t max_world, int depth)
 {
     if (_jl_debug_method_invalidation) {
         jl_value_t *boxeddepth = NULL;
@@ -1332,14 +1437,15 @@ static void invalidate_method_instance(jl_method_instance_t *replaced, size_t ma
     }
     if (!jl_is_method(replaced->def.method))
         return; // shouldn't happen, but better to be safe
-    JL_LOCK_NOGC(&replaced->def.method->writelock);
-    jl_code_instance_t *codeinst = replaced->cache;
+    JL_LOCK(&replaced->def.method->writelock);
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&replaced->cache);
     while (codeinst) {
         if (codeinst->max_world == ~(size_t)0) {
             assert(codeinst->min_world - 1 <= max_world && "attempting to set illogical world constraints (probable race condition)");
             codeinst->max_world = max_world;
         }
         assert(codeinst->max_world <= max_world);
+        (*f)(codeinst);
         codeinst = jl_atomic_load_relaxed(&codeinst->next);
     }
     // recurse to all backedges to update their valid range also
@@ -1349,16 +1455,16 @@ static void invalidate_method_instance(jl_method_instance_t *replaced, size_t ma
         size_t i, l = jl_array_len(backedges);
         for (i = 0; i < l; i++) {
             jl_method_instance_t *replaced = (jl_method_instance_t*)jl_array_ptr_ref(backedges, i);
-            invalidate_method_instance(replaced, max_world, depth + 1);
+            invalidate_method_instance(f, replaced, max_world, depth + 1);
         }
     }
-    JL_UNLOCK_NOGC(&replaced->def.method->writelock);
+    JL_UNLOCK(&replaced->def.method->writelock);
 }
 
 // invalidate cached methods that overlap this definition
-static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
+void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why)
 {
-    JL_LOCK_NOGC(&replaced_mi->def.method->writelock);
+    JL_LOCK(&replaced_mi->def.method->writelock);
     jl_array_t *backedges = replaced_mi->backedges;
     if (backedges) {
         // invalidate callers (if any)
@@ -1366,10 +1472,10 @@ static void invalidate_backedges(jl_method_instance_t *replaced_mi, size_t max_w
         size_t i, l = jl_array_len(backedges);
         jl_method_instance_t **replaced = (jl_method_instance_t**)jl_array_ptr_data(backedges);
         for (i = 0; i < l; i++) {
-            invalidate_method_instance(replaced[i], max_world, 1);
+            invalidate_method_instance(f, replaced[i], max_world, 1);
         }
     }
-    JL_UNLOCK_NOGC(&replaced_mi->def.method->writelock);
+    JL_UNLOCK(&replaced_mi->def.method->writelock);
     if (why && _jl_debug_method_invalidation) {
         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)replaced_mi);
         jl_value_t *loctag = jl_cstr_to_string(why);
@@ -1454,7 +1560,7 @@ static int invalidate_mt_cache(jl_typemap_entry_t *oldentry, void *closure0)
         }
         if (intersects) {
             if (_jl_debug_method_invalidation) {
-                jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)oldentry);
+                jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)mi);
                 jl_value_t *loctag = jl_cstr_to_string("invalidate_mt_cache");
                 JL_GC_PUSH1(&loctag);
                 jl_array_ptr_1d_push(_jl_debug_method_invalidation, loctag);
@@ -1488,10 +1594,10 @@ static int typemap_search(jl_typemap_entry_t *entry, void *closure)
 
 static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT;
 
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
 static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_ROOT, jl_method_t *method) JL_NOTSAFEPOINT {
     jl_value_t *closure = (jl_value_t*)(method);
-    if (jl_typemap_visitor(mt->defs, typemap_search, &closure))
+    if (jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), typemap_search, &closure))
         jl_error("method not in method table");
     return (jl_typemap_entry_t *)closure;
 }
@@ -1499,6 +1605,7 @@ static jl_typemap_entry_t *do_typemap_search(jl_methtable_t *mt JL_PROPAGATES_RO
 
 static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *methodentry, jl_method_t *method, size_t max_world)
 {
+    assert(!method->is_for_opaque_closure);
     method->deleted_world = methodentry->max_world = max_world;
     // drop this method from mt->cache
     struct invalidate_mt_env mt_cache_env;
@@ -1506,8 +1613,8 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
     mt_cache_env.newentry = methodentry;
     mt_cache_env.shadowed = NULL;
     mt_cache_env.invalidated = 0;
-    jl_typemap_visitor(mt->cache, disable_mt_cache, (void*)&mt_cache_env);
-    jl_array_t *leafcache = mt->leafcache;
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), disable_mt_cache, (void*)&mt_cache_env);
+    jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
     size_t i, l = jl_array_len(leafcache);
     for (i = 1; i < l; i += 2) {
         jl_typemap_entry_t *oldentry = (jl_typemap_entry_t*)jl_array_ptr_ref(leafcache, i);
@@ -1515,19 +1622,20 @@ static void jl_method_table_invalidate(jl_methtable_t *mt, jl_typemap_entry_t *m
             while ((jl_value_t*)oldentry != jl_nothing) {
                 if (oldentry->max_world == ~(size_t)0)
                     oldentry->max_world = mt_cache_env.max_world;
-                oldentry = oldentry->next;
+                oldentry = jl_atomic_load_relaxed(&oldentry->next);
             }
         }
     }
     // Invalidate the backedges
     int invalidated = 0;
-    jl_svec_t *specializations = methodentry->func.method->specializations;
+    jl_svec_t *specializations = jl_atomic_load_relaxed(&methodentry->func.method->specializations);
     l = jl_svec_len(specializations);
     for (i = 0; i < l; i++) {
         jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-        if (mi) {
+        if ((jl_value_t*)mi != jl_nothing) {
             invalidated = 1;
-            invalidate_backedges(mi, methodentry->max_world, "jl_method_table_disable");
+            invalidate_external(mi, methodentry->max_world);
+            invalidate_backedges(&do_nothing_with_codeinst, mi, methodentry->max_world, "jl_method_table_disable");
         }
     }
     if (invalidated && _jl_debug_method_invalidation) {
@@ -1547,10 +1655,37 @@ JL_DLLEXPORT void jl_method_table_disable(jl_methtable_t *mt, jl_method_t *metho
     jl_typemap_entry_t *methodentry = do_typemap_search(mt, method);
     JL_LOCK(&mt->writelock);
     // Narrow the world age on the method to make it uncallable
-    jl_method_table_invalidate(mt, methodentry, method, jl_world_counter++);
+    size_t world = jl_atomic_fetch_add(&jl_world_counter, 1);
+    jl_method_table_invalidate(mt, methodentry, method, world);
     JL_UNLOCK(&mt->writelock);
 }
 
+static int jl_type_intersection2(jl_value_t *t1, jl_value_t *t2, jl_value_t **isect, jl_value_t **isect2)
+{
+    *isect2 = NULL;
+    int is_subty = 0;
+    *isect = jl_type_intersection_env_s(t1, t2, NULL, &is_subty);
+    if (*isect == jl_bottom_type)
+        return 0;
+    if (is_subty)
+        return 1;
+    // TODO: sometimes type intersection returns types with free variables
+    if (jl_has_free_typevars(t1) || jl_has_free_typevars(t2))
+        return 1;
+    // determine if type-intersection can be convinced to give a better, non-bad answer
+    // if the intersection was imprecise, see if we can do better by switching the types
+    *isect2 = jl_type_intersection(t2, t1);
+    if (*isect2 == jl_bottom_type) {
+        *isect = jl_bottom_type;
+        *isect2 = NULL;
+        return 0;
+    }
+    if (jl_types_egal(*isect2, *isect)) {
+        *isect2 = NULL;
+    }
+    return 1;
+}
+
 JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method, jl_tupletype_t *simpletype)
 {
     JL_TIMING(ADD_METHOD);
@@ -1560,27 +1695,29 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
     jl_value_t *oldvalue = NULL;
     jl_array_t *oldmi = NULL;
     if (method->primary_world == 1)
-        method->primary_world = ++jl_world_counter;
+        method->primary_world = jl_atomic_fetch_add(&jl_world_counter, 1) + 1;
     size_t max_world = method->primary_world - 1;
     jl_value_t *loctag = NULL;  // debug info for invalidation
     jl_value_t *isect = NULL;
+    jl_value_t *isect2 = NULL;
+    jl_value_t *isect3 = NULL;
     jl_typemap_entry_t *newentry = NULL;
-    JL_GC_PUSH5(&oldvalue, &oldmi, &newentry, &loctag, &isect);
+    JL_GC_PUSH7(&oldvalue, &oldmi, &newentry, &loctag, &isect, &isect2, &isect3);
     JL_LOCK(&mt->writelock);
     // first find if we have an existing entry to delete
     struct jl_typemap_assoc search = {(jl_value_t*)type, method->primary_world, NULL, 0, ~(size_t)0};
-    jl_typemap_entry_t *oldentry = jl_typemap_assoc_by_type(mt->defs, &search, /*offs*/0, /*subtype*/0);
+    jl_typemap_entry_t *oldentry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->defs), &search, /*offs*/0, /*subtype*/0);
     // then add our new entry
     newentry = jl_typemap_alloc((jl_tupletype_t*)type, simpletype, jl_emptysvec,
             (jl_value_t*)method, method->primary_world, method->deleted_world);
-    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0, &method_defs);
+    jl_typemap_insert(&mt->defs, (jl_value_t*)mt, newentry, 0);
     if (oldentry) {
         jl_method_t *m = oldentry->func.method;
         method_overwrite(newentry, m);
         jl_method_table_invalidate(mt, oldentry, m, max_world);
     }
     else {
-        oldvalue = get_intersect_matches(mt->defs, newentry);
+        oldvalue = get_intersect_matches(jl_atomic_load_relaxed(&mt->defs), newentry);
 
         int invalidated = 0;
         jl_method_t **d;
@@ -1600,22 +1737,36 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
             size_t ins = 0;
             for (i = 1; i < na; i += 2) {
                 jl_value_t *backedgetyp = backedges[i - 1];
-                isect = jl_type_intersection(backedgetyp, (jl_value_t*)type);
-                if (isect != jl_bottom_type) {
-                    // see if the intersection was actually already fully
-                    // covered by anything (method or ambiguity is okay)
+                int missing = 0;
+                if (jl_type_intersection2(backedgetyp, (jl_value_t*)type, &isect, &isect2)) {
+                    // See if the intersection was actually already fully
+                    // covered, but that the new method is ambiguous.
+                    //  -> no previous method: now there is one, need to update the missing edge
+                    //  -> one+ previously matching method(s):
+                    //    -> more specific then all of them: need to update the missing edge
+                    //      -> some may have been ambiguous: now there is a replacement
+                    //      -> some may have been called: now there is a replacement (also will be detected in the loop later)
+                    //    -> less specific or ambiguous with any one of them: can ignore the missing edge (not missing)
+                    //      -> some may have been ambiguous: still are
+                    //      -> some may have been called: they may be partly replaced (will be detected in the loop later)
+                    missing = 1;
                     size_t j;
                     for (j = 0; j < n; j++) {
                         jl_method_t *m = d[j];
-                        if (jl_subtype(isect, m->sig))
-                            break;
+                        if (jl_subtype(isect, m->sig) || (isect2 && jl_subtype(isect2, m->sig))) {
+                            // We now know that there actually was a previous
+                            // method for this part of the type intersection.
+                            if (!jl_type_morespecific(type, m->sig)) {
+                                missing = 0;
+                                break;
+                            }
+                        }
                     }
-                    if (j != n)
-                        isect = jl_bottom_type;
                 }
-                if (isect != jl_bottom_type) {
+                if (missing) {
                     jl_method_instance_t *backedge = (jl_method_instance_t*)backedges[i];
-                    invalidate_method_instance(backedge, max_world, 0);
+                    invalidate_external(backedge, max_world);
+                    invalidate_method_instance(&do_nothing_with_codeinst, backedge, max_world, 0);
                     invalidated = 1;
                     if (_jl_debug_method_invalidation)
                         jl_array_ptr_1d_push(_jl_debug_method_invalidation, (jl_value_t*)backedgetyp);
@@ -1643,19 +1794,18 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 jl_method_t *m = d[j];
                 if (morespec[j] == (char)morespec_is)
                     continue;
-                jl_svec_t *specializations = jl_atomic_load_acquire(&m->specializations);
-                jl_method_instance_t **data = (jl_method_instance_t**)jl_svec_data(specializations);
+                jl_svec_t *specializations = jl_atomic_load_relaxed(&m->specializations);
+                _Atomic(jl_method_instance_t*) *data = (_Atomic(jl_method_instance_t*)*)jl_svec_data(specializations);
                 size_t i, l = jl_svec_len(specializations);
                 enum morespec_options ambig = morespec_unknown;
                 for (i = 0; i < l; i++) {
                     jl_method_instance_t *mi = jl_atomic_load_relaxed(&data[i]);
-                    if (mi == NULL)
+                    if ((jl_value_t*)mi == jl_nothing)
                         continue;
-                    isect = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
-                    isect = jl_type_intersection(type, isect);
-                    if (isect != jl_bottom_type) {
+                    isect3 = jl_type_intersection(m->sig, (jl_value_t*)mi->specTypes);
+                    if (jl_type_intersection2(type, isect3, &isect, &isect2)) {
                         if (morespec[j] == (char)morespec_unknown)
-                            morespec[j] = (char)jl_type_morespecific(m->sig, type) ? morespec_is : morespec_isnot;
+                            morespec[j] = (char)(jl_type_morespecific(m->sig, type) ? morespec_is : morespec_isnot);
                         if (morespec[j] == (char)morespec_is)
                             // not actually shadowing--the existing method is still better
                             break;
@@ -1667,10 +1817,10 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                             size_t k;
                             for (k = 0; k < n; k++) {
                                 jl_method_t *m2 = d[k];
-                                if (m == m2 || !jl_subtype(isect, m2->sig))
+                                if (m == m2 || !(jl_subtype(isect, m2->sig) || (isect && jl_subtype(isect, m2->sig))))
                                     continue;
                                 if (morespec[k] == (char)morespec_unknown)
-                                    morespec[k] = (char)jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot;
+                                    morespec[k] = (char)(jl_type_morespecific(m2->sig, type) ? morespec_is : morespec_isnot);
                                 if (morespec[k] == (char)morespec_is)
                                     // not actually shadowing this--m2 will still be better
                                     break;
@@ -1683,9 +1833,10 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                                 continue;
                         }
                         jl_array_ptr_1d_push(oldmi, (jl_value_t*)mi);
+                        invalidate_external(mi, max_world);
                         if (mi->backedges) {
                             invalidated = 1;
-                            invalidate_backedges(mi, max_world, "jl_method_table_insert");
+                            invalidate_backedges(&do_nothing_with_codeinst, mi, max_world, "jl_method_table_insert");
                         }
                     }
                 }
@@ -1699,15 +1850,15 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
                 mt_cache_env.newentry = newentry;
                 mt_cache_env.invalidated = 0;
 
-                jl_typemap_visitor(mt->cache, invalidate_mt_cache, (void*)&mt_cache_env);
-                jl_array_t *leafcache = mt->leafcache;
+                jl_typemap_visitor(jl_atomic_load_relaxed(&mt->cache), invalidate_mt_cache, (void*)&mt_cache_env);
+                jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
                 size_t i, l = jl_array_len(leafcache);
                 for (i = 1; i < l; i += 2) {
                     jl_value_t *entry = jl_array_ptr_ref(leafcache, i);
                     if (entry) {
                         while (entry != jl_nothing) {
                             invalidate_mt_cache((jl_typemap_entry_t*)entry, (void*)&mt_cache_env);
-                            entry = (jl_value_t*)((jl_typemap_entry_t*)entry)->next;
+                            entry = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)entry)->next);
                         }
                     }
                 }
@@ -1741,9 +1892,9 @@ static void JL_NORETURN jl_method_error_bare(jl_function_t *f, jl_value_t *args,
         jl_printf((JL_STREAM*)STDERR_FILENO, "A method error occurred before the base MethodError type was defined. Aborting...\n");
         jl_static_show((JL_STREAM*)STDERR_FILENO,(jl_value_t*)f); jl_printf((JL_STREAM*)STDERR_FILENO," world %u\n", (unsigned)world);
         jl_static_show((JL_STREAM*)STDERR_FILENO,args); jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
-        jl_ptls_t ptls = jl_get_ptls_states();
+        jl_ptls_t ptls = jl_current_task->ptls;
         ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
-        jl_critical_error(0, NULL, ptls->bt_data, &ptls->bt_size);
+        jl_critical_error(0, NULL, jl_current_task);
         abort();
     }
     // not reached
@@ -1796,37 +1947,51 @@ jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t w
 //
 // lim is the max # of methods to return. if there are more, returns jl_false.
 // -1 for no limit.
-JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, int lim, int include_ambiguous,
+JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig)
 {
     JL_TIMING(METHOD_MATCH);
-    *ambig = 0;
+    if (ambig != NULL)
+        *ambig = 0;
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
     if (jl_is_tuple_type(unw) && jl_tparam0(unw) == jl_bottom_type)
         return (jl_value_t*)jl_an_empty_vec_any;
-    jl_methtable_t *mt = jl_method_table_for(unw);
-    if ((jl_value_t*)mt == jl_nothing)
-        return jl_false; // indeterminate - ml_matches can't deal with this case
-    return ml_matches(mt, 0, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
+    if (mt == jl_nothing)
+        mt = (jl_value_t*)jl_method_table_for(unw);
+    if (mt == jl_nothing)
+        mt = NULL;
+    return ml_matches((jl_methtable_t*)mt, types, lim, include_ambiguous, 1, world, 1, min_valid, max_valid, ambig);
 }
 
-jl_method_instance_t *jl_get_unspecialized(jl_method_instance_t *method JL_PROPAGATES_ROOT)
+jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT)
 {
-    // one unspecialized version of a function can be shared among all cached specializations
     jl_method_t *def = method->def.method;
+    jl_method_instance_t *mi = jl_get_unspecialized(def);
+    if (mi == NULL) {
+        return method;
+    }
+    return mi;
+}
+
+jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT)
+{
+    // one unspecialized version of a function can be shared among all cached specializations
     if (!jl_is_method(def) || def->source == NULL) {
         // generated functions might instead randomly just never get inferred, sorry
-        return method;
+        return NULL;
     }
-    if (def->unspecialized == NULL) {
+    jl_method_instance_t *unspec = jl_atomic_load_relaxed(&def->unspecialized);
+    if (unspec == NULL) {
         JL_LOCK(&def->writelock);
-        if (def->unspecialized == NULL) {
-            def->unspecialized = jl_get_specialized(def, def->sig, jl_emptysvec);
-            jl_gc_wb(def, def->unspecialized);
+        unspec = jl_atomic_load_relaxed(&def->unspecialized);
+        if (unspec == NULL) {
+            unspec = jl_get_specialized(def, def->sig, jl_emptysvec);
+            jl_atomic_store_release(&def->unspecialized, unspec);
+            jl_gc_wb(def, unspec);
         }
         JL_UNLOCK(&def->writelock);
     }
-    return def->unspecialized;
+    return unspec;
 }
 
 
@@ -1843,6 +2008,42 @@ jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi, size_t world)
     return NULL;
 }
 
+jl_mutex_t precomp_statement_out_lock;
+
+static void record_precompile_statement(jl_method_instance_t *mi)
+{
+    static ios_t f_precompile;
+    static JL_STREAM* s_precompile = NULL;
+    jl_method_t *def = mi->def.method;
+    if (jl_options.trace_compile == NULL)
+        return;
+    if (!jl_is_method(def))
+        return;
+
+    if (jl_n_threads > 1)
+        JL_LOCK(&precomp_statement_out_lock);
+    if (s_precompile == NULL) {
+        const char *t = jl_options.trace_compile;
+        if (!strncmp(t, "stderr", 6)) {
+            s_precompile = JL_STDERR;
+        }
+        else {
+            if (ios_file(&f_precompile, t, 1, 1, 1, 1) == NULL)
+                jl_errorf("cannot open precompile statement file \"%s\" for writing", t);
+            s_precompile = (JL_STREAM*) &f_precompile;
+        }
+    }
+    if (!jl_has_free_typevars(mi->specTypes)) {
+        jl_printf(s_precompile, "precompile(");
+        jl_static_show(s_precompile, mi->specTypes);
+        jl_printf(s_precompile, ")\n");
+        if (s_precompile != JL_STDERR)
+            ios_flush(&f_precompile);
+    }
+    if (jl_n_threads > 1)
+        JL_UNLOCK(&precomp_statement_out_lock);
+}
+
 jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t world)
 {
     jl_code_instance_t *codeinst = jl_method_compiled(mi, world);
@@ -1858,31 +2059,41 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             compile_option = ((jl_method_t*)def)->module->compile;
     }
 
+    // if compilation is disabled or source is unavailable, try calling unspecialized version
     if (compile_option == JL_OPTIONS_COMPILE_OFF ||
-        compile_option == JL_OPTIONS_COMPILE_MIN) {
+        compile_option == JL_OPTIONS_COMPILE_MIN ||
+        def->source == jl_nothing) {
         // copy fptr from the template method definition
-        jl_method_t *def = mi->def.method;
-        if (jl_is_method(def) && def->unspecialized) {
-            jl_code_instance_t *unspec = jl_atomic_load_relaxed(&def->unspecialized->cache);
-            if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
-                jl_code_instance_t *codeinst = jl_new_codeinst(mi,
-                    (jl_value_t*)jl_any_type, NULL, NULL,
-                    0, 1, ~(size_t)0);
-                codeinst->isspecsig = 0;
-                codeinst->specptr = unspec->specptr;
-                codeinst->rettype_const = unspec->rettype_const;
-                codeinst->invoke = unspec->invoke;
-                jl_mi_cache_insert(mi, codeinst);
-                return codeinst;
+        if (jl_is_method(def)) {
+            jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
+            if (unspecmi) {
+                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
+                if (unspec && jl_atomic_load_acquire(&unspec->invoke)) {
+                    jl_code_instance_t *codeinst = jl_new_codeinst(mi,
+                        (jl_value_t*)jl_any_type, NULL, NULL,
+                        0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+                    codeinst->isspecsig = 0;
+                    codeinst->specptr = unspec->specptr;
+                    codeinst->rettype_const = unspec->rettype_const;
+                    jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&unspec->invoke));
+                    jl_mi_cache_insert(mi, codeinst);
+                    record_precompile_statement(mi);
+                    return codeinst;
+                }
             }
         }
+    }
+    // if that didn't work and compilation is off, try running in the interpreter
+    if (compile_option == JL_OPTIONS_COMPILE_OFF ||
+        compile_option == JL_OPTIONS_COMPILE_MIN) {
         jl_code_info_t *src = jl_code_for_interpreter(mi);
         if (!jl_code_requires_compiler(src)) {
             jl_code_instance_t *codeinst = jl_new_codeinst(mi,
                 (jl_value_t*)jl_any_type, NULL, NULL,
-                0, 1, ~(size_t)0);
-            codeinst->invoke = jl_fptr_interpret_call;
+                0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
+            jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_interpret_call);
             jl_mi_cache_insert(mi, codeinst);
+            record_precompile_statement(mi);
             return codeinst;
         }
         if (compile_option == JL_OPTIONS_COMPILE_OFF) {
@@ -1894,11 +2105,19 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
 
     codeinst = jl_generate_fptr(mi, world);
     if (!codeinst) {
-        jl_method_instance_t *unspec = jl_get_unspecialized(mi);
+        jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
         jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
         // ask codegen to make the fptr for unspec
-        if (jl_atomic_load_relaxed(&ucache->invoke) == NULL)
+        if (jl_atomic_load_acquire(&ucache->invoke) == NULL) {
+            if (def->source == jl_nothing && (ucache->def->uninferred == jl_nothing ||
+                                              ucache->def->uninferred == NULL)) {
+                jl_printf(JL_STDERR, "source not available for ");
+                jl_static_show(JL_STDERR, (jl_value_t*)mi);
+                jl_printf(JL_STDERR, "\n");
+                jl_error("source missing for method that needs to be compiled");
+            }
             jl_generate_fptr_for_unspecialized(ucache);
+        }
         assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
         if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
             jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
@@ -1906,24 +2125,27 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
             return ucache;
         }
         codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
-            0, 1, ~(size_t)0);
+            0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
         codeinst->isspecsig = 0;
         codeinst->specptr = ucache->specptr;
         codeinst->rettype_const = ucache->rettype_const;
-        codeinst->invoke = ucache->invoke;
+        jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&ucache->invoke));
         jl_mi_cache_insert(mi, codeinst);
     }
+    else {
+        record_precompile_statement(mi);
+    }
     jl_atomic_store_relaxed(&codeinst->precompile, 1);
     return codeinst;
 }
 
 
-JL_DLLEXPORT jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     return m->rettype_const;
 }
 
-JL_DLLEXPORT jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     while (1) {
         jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
@@ -1932,7 +2154,7 @@ JL_DLLEXPORT jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t
     }
 }
 
-JL_DLLEXPORT jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
+jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
 {
     jl_svec_t *sparams = m->def->sparam_vals;
     assert(sparams != jl_emptysvec);
@@ -1943,6 +2165,12 @@ JL_DLLEXPORT jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32
     }
 }
 
+JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
+
+JL_DLLEXPORT jl_callptr_t jl_fptr_const_return_addr = &jl_fptr_const_return;
+
+JL_DLLEXPORT jl_callptr_t jl_fptr_sparam_addr = &jl_fptr_sparam;
+
 // Return the index of the invoke api, if known
 JL_DLLEXPORT int32_t jl_invoke_api(jl_code_instance_t *codeinst)
 {
@@ -1977,7 +2205,6 @@ JL_DLLEXPORT jl_value_t *jl_normalize_to_compilable_sig(jl_methtable_t *mt, jl_t
 // compile-time method lookup
 jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache)
 {
-    JL_TIMING(METHOD_LOOKUP_COMPILE);
     if (jl_has_free_typevars((jl_value_t*)types))
         return NULL; // don't poison the cache due to a malformed query
     if (!jl_has_concrete_subtype((jl_value_t*)types))
@@ -1987,7 +2214,7 @@ jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types JL_PROPAGATES
     size_t min_valid2 = 1;
     size_t max_valid2 = ~(size_t)0;
     int ambig = 0;
-    jl_value_t *matches = jl_matching_methods(types, 1, 1, world, &min_valid2, &max_valid2, &ambig);
+    jl_value_t *matches = jl_matching_methods(types, jl_nothing, 1, 1, world, &min_valid2, &max_valid2, &ambig);
     if (*min_valid < min_valid2)
         *min_valid = min_valid2;
     if (*max_valid > max_valid2)
@@ -2042,7 +2269,7 @@ static void _generate_from_hint(jl_method_instance_t *mi, size_t world)
 
 static void jl_compile_now(jl_method_instance_t *mi)
 {
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
     _generate_from_hint(mi, world);
     if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
@@ -2053,7 +2280,7 @@ static void jl_compile_now(jl_method_instance_t *mi)
 
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
 {
-    size_t world = jl_world_counter;
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
     size_t tworld = jl_typeinf_world;
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
@@ -2061,6 +2288,7 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
     if (mi == NULL)
         return 0;
     JL_GC_PROMISE_ROOTED(mi);
+    mi->precompiled = 1;
     if (jl_generating_output()) {
         jl_compile_now(mi);
         // In addition to full compilation of the compilation-signature, if `types` is more specific (e.g. due to nospecialize),
@@ -2075,6 +2303,7 @@ JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types)
             types2 = jl_type_intersection_env((jl_value_t*)types, (jl_value_t*)mi->def.method->sig, &tpenv2);
             jl_method_instance_t *li2 = jl_specializations_get_linfo(mi->def.method, (jl_value_t*)types2, tpenv2);
             JL_GC_POP();
+            li2->precompiled = 1;
             if (jl_rettype_inferred(li2, world, world) == jl_nothing)
                 (void)jl_type_infer(li2, world, 1);
             if (jl_typeinf_func && mi->def.method->primary_world <= tworld) {
@@ -2166,7 +2395,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
 
 JL_DLLEXPORT jl_value_t *jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t nargs, jl_method_instance_t *mfunc)
 {
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     return _jl_invoke(F, args, nargs, mfunc, world);
 }
 
@@ -2190,8 +2419,8 @@ STATIC_INLINE int sig_match_fast(jl_value_t *arg1t, jl_value_t **args, jl_value_
     return 1;
 }
 
-jl_typemap_entry_t *call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
-static uint8_t pick_which[N_CALL_CACHE];
+_Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
+static _Atomic(uint8_t) pick_which[N_CALL_CACHE];
 #ifdef JL_GF_PROFILE
 size_t ncalls;
 void call_cache_stats()
@@ -2337,7 +2566,7 @@ STATIC_INLINE jl_method_instance_t *jl_lookup_generic_(jl_value_t *F, jl_value_t
 
 JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint32_t nargs)
 {
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     jl_method_instance_t *mfunc = jl_lookup_generic_(F, args, nargs,
                                                      jl_int32hash_fast(jl_return_address()),
                                                      world);
@@ -2345,44 +2574,42 @@ JL_DLLEXPORT jl_value_t *jl_apply_generic(jl_value_t *F, jl_value_t **args, uint
     return _jl_invoke(F, args, nargs, mfunc, world);
 }
 
-static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, size_t world, size_t *min_valid, size_t *max_valid)
+static jl_method_match_t *_gf_invoke_lookup(jl_value_t *types JL_PROPAGATES_ROOT, jl_value_t *mt, size_t world, size_t *min_valid, size_t *max_valid)
 {
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
     if (jl_is_tuple_type(unw) && jl_tparam0(unw) == jl_bottom_type)
         return NULL;
-    jl_methtable_t *mt = jl_method_table_for(unw);
-    if ((jl_value_t*)mt == jl_nothing)
-        return NULL;
-    int ambig = 0;
-    jl_value_t *matches = ml_matches(mt, 0, (jl_tupletype_t*)types, 1, 0, 0, world, 1, min_valid, max_valid, &ambig);
+    if (mt == jl_nothing)
+        mt = (jl_value_t*)jl_method_table_for(unw);
+    if (mt == jl_nothing)
+        mt = NULL;
+    jl_value_t *matches = ml_matches((jl_methtable_t*)mt, (jl_tupletype_t*)types, 1, 0, 0, world, 1, min_valid, max_valid, NULL);
     if (matches == jl_false || jl_array_len(matches) != 1)
         return NULL;
     jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(matches, 0);
     return matc;
 }
 
-JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup(jl_value_t *types, size_t world)
+JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup(jl_value_t *types, jl_value_t *mt, size_t world)
 {
     // Deprecated: Use jl_gf_invoke_lookup_worlds for future development
     size_t min_valid = 0;
     size_t max_valid = ~(size_t)0;
-    jl_method_match_t *matc = _gf_invoke_lookup(types, world, &min_valid, &max_valid);
+    jl_method_match_t *matc = _gf_invoke_lookup(types, mt, world, &min_valid, &max_valid);
     if (matc == NULL)
         return jl_nothing;
     return (jl_value_t*)matc->method;
 }
 
 
-JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, size_t world, size_t *min_world, size_t *max_world)
+JL_DLLEXPORT jl_value_t *jl_gf_invoke_lookup_worlds(jl_value_t *types, jl_value_t *mt, size_t world, size_t *min_world, size_t *max_world)
 {
-    jl_method_match_t *matc = _gf_invoke_lookup(types, world, min_world, max_world);
+    jl_method_match_t *matc = _gf_invoke_lookup(types, mt, world, min_world, max_world);
     if (matc == NULL)
         return jl_nothing;
-    return (jl_value_t*)matc->method;
+    return (jl_value_t*)matc;
 }
 
-static jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
-
 // invoke()
 // this does method dispatch with a set of types to match other than the
 // types of the actual arguments. this means it sometimes does NOT call the
@@ -2394,11 +2621,11 @@ static jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, j
 // NOTE: assumes argument type is a subtype of the lookup type.
 jl_value_t *jl_gf_invoke(jl_value_t *types0, jl_value_t *gf, jl_value_t **args, size_t nargs)
 {
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     jl_value_t *types = NULL;
     JL_GC_PUSH1(&types);
     types = jl_argtype_with_function(gf, types0);
-    jl_method_t *method = (jl_method_t*)jl_gf_invoke_lookup(types, world);
+    jl_method_t *method = (jl_method_t*)jl_gf_invoke_lookup(types, jl_nothing, world);
     JL_GC_PROMISE_ROOTED(method);
 
     if ((jl_value_t*)method == jl_nothing) {
@@ -2412,12 +2639,13 @@ jl_value_t *jl_gf_invoke(jl_value_t *types0, jl_value_t *gf, jl_value_t **args,
     return jl_gf_invoke_by_method(method, gf, args, nargs);
 }
 
-static jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs)
+jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs)
 {
     jl_method_instance_t *mfunc = NULL;
     jl_typemap_entry_t *tm = NULL;
-    if (method->invokes != NULL)
-        tm = jl_typemap_assoc_exact(method->invokes, gf, args, nargs, 1, 1);
+    jl_typemap_t *invokes = jl_atomic_load_relaxed(&method->invokes);
+    if (invokes != jl_nothing)
+        tm = jl_typemap_assoc_exact(invokes, gf, args, nargs, 1, 1);
     if (tm) {
         mfunc = tm->func.linfo;
     }
@@ -2427,23 +2655,27 @@ static jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, j
         jl_tupletype_t *tt = NULL;
         JL_GC_PUSH2(&tpenv, &tt);
         JL_LOCK(&method->writelock);
-        tt = arg_type_tuple(gf, args, nargs);
-        if (jl_is_unionall(method->sig)) {
-            int sub = jl_subtype_matching((jl_value_t*)tt, (jl_value_t*)method->sig, &tpenv);
-            assert(sub); (void)sub;
+        invokes = jl_atomic_load_relaxed(&method->invokes);
+        tm = jl_typemap_assoc_exact(invokes, gf, args, nargs, 1, 1);
+        if (tm) {
+            mfunc = tm->func.linfo;
         }
+        else {
+            tt = arg_type_tuple(gf, args, nargs);
+            if (jl_is_unionall(method->sig)) {
+                int sub = jl_subtype_matching((jl_value_t*)tt, (jl_value_t*)method->sig, &tpenv);
+                assert(sub); (void)sub;
+            }
 
-        if (method->invokes == NULL)
-            method->invokes = jl_nothing;
-
-        mfunc = cache_method(NULL, &method->invokes, (jl_value_t*)method, tt, method, 1, 1, ~(size_t)0, tpenv);
+            mfunc = cache_method(NULL, &method->invokes, (jl_value_t*)method, tt, method, 1, 1, ~(size_t)0, tpenv);
+        }
         JL_UNLOCK(&method->writelock);
         JL_GC_POP();
         if (jl_options.malloc_log)
             jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
     }
     JL_GC_PROMISE_ROOTED(mfunc);
-    size_t world = jl_get_ptls_states()->world_age;
+    size_t world = jl_current_task->world_age;
     return _jl_invoke(gf, args, nargs - 1, mfunc, world);
 }
 
@@ -2459,7 +2691,8 @@ jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_
     jl_sym_t *tname = jl_symbol(prefixed);
     free(prefixed);
     jl_datatype_t *ftype = (jl_datatype_t*)jl_new_datatype(
-            tname, module, st, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
+            tname, module, st, jl_emptysvec, jl_emptysvec, jl_emptysvec, jl_emptysvec,
+            0, 0, 0);
     assert(jl_is_datatype(ftype));
     JL_GC_PUSH1(&ftype);
     ftype->name->mt->name = name;
@@ -2495,6 +2728,7 @@ JL_DLLEXPORT jl_function_t *jl_get_kwsorter(jl_value_t *ty)
             strcpy(&suffixed[0], name);
             strcpy(&suffixed[l], "##kw");
             jl_sym_t *fname = jl_symbol(suffixed);
+            free(suffixed);
             mt->kwsorter = jl_new_generic_function_with_supertype(fname, mt->module, jl_function_type);
             jl_gc_wb(mt, mt->kwsorter);
         }
@@ -2530,8 +2764,8 @@ enum SIGNATURE_FULLY_COVERS {
 
 static jl_method_match_t *make_method_match(jl_tupletype_t *spec_types, jl_svec_t *sparams, jl_method_t *method, enum SIGNATURE_FULLY_COVERS fully_covers)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ptls, sizeof(jl_method_match_t), jl_method_match_type);
+    jl_task_t *ct = jl_current_task;
+    jl_method_match_t *match = (jl_method_match_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_match_t), jl_method_match_type);
     match->spec_types = spec_types;
     match->sparams = sparams;
     match->method = method;
@@ -2583,6 +2817,11 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
     return 1;
 }
 
+static int ml_mtable_visitor(jl_methtable_t *mt, void *env)
+{
+    return jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, (struct typemap_intersection_env*)env);
+}
+
 // This is the collect form of calling jl_typemap_intersection_visitor
 // with optimizations to skip fully shadowed methods.
 //
@@ -2593,21 +2832,19 @@ static int ml_matches_visitor(jl_typemap_entry_t *ml, struct typemap_intersectio
 // fully-covers is a Bool indicating subtyping, though temporarily it may be
 // tri-values, with `nothing` indicating a match that is not a subtype, but
 // which is dominated by one which is (and thus should be excluded unless ambiguous)
-static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
+static jl_value_t *ml_matches(jl_methtable_t *mt,
                               jl_tupletype_t *type, int lim, int include_ambiguous,
                               int intersections, size_t world, int cache_result,
-                              size_t *min_valid, size_t *max_valid, int *has_ambiguity)
+                              size_t *min_valid, size_t *max_valid, int *ambig)
 {
-    jl_typemap_t *defs = mt->defs;
-    if (defs == jl_nothing) // special-case: ignore builtin functions
-        return jl_an_empty_vec_any;
+    int has_ambiguity = 0;
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)type);
     assert(jl_is_datatype(unw));
     size_t l = jl_svec_len(((jl_datatype_t*)unw)->parameters);
     jl_value_t *va = NULL;
     if (l > 0) {
         va = jl_tparam(unw, l - 1);
-        if (jl_is_vararg_type(va))
+        if (jl_is_vararg(va))
             va = jl_unwrap_vararg(va);
         else
             va = NULL;
@@ -2617,66 +2854,78 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
         intersections, world, lim, /* .t = */ jl_an_empty_vec_any,
         /* .min_valid = */ *min_valid, /* .max_valid = */ *max_valid, /* .matc = */ NULL};
     struct jl_typemap_assoc search = {(jl_value_t*)type, world, jl_emptysvec, 1, ~(size_t)0};
-    JL_GC_PUSH5(&env.t, &env.matc, &env.match.env, &search.env, &env.match.ti);
+    jl_value_t *isect2 = NULL;
+    JL_GC_PUSH6(&env.t, &env.matc, &env.match.env, &search.env, &env.match.ti, &isect2);
 
-    // check the leaf cache if this type can be in there
-    if (((jl_datatype_t*)unw)->isdispatchtuple) {
-        jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
-        jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)type, world);
-        if (entry) {
-            jl_method_instance_t *mi = entry->func.linfo;
-            jl_method_t *meth = mi->def.method;
-            if (!jl_is_unionall(meth->sig)) {
-                env.match.env = jl_emptysvec;
-                env.match.ti = unw;
-            }
-            else if (jl_egal((jl_value_t*)type, mi->specTypes)) {
-                env.match.env = mi->sparam_vals;
-                env.match.ti = mi->specTypes;
-            }
-            else {
-                // this just calls jl_subtype_env (since we know that `type <: meth->sig` by transitivity)
-                env.match.ti = jl_type_intersection_env((jl_value_t*)type, (jl_value_t*)meth->sig, &env.match.env);
-            }
-            env.matc = make_method_match((jl_tupletype_t*)env.match.ti,
-                env.match.env, meth, FULLY_COVERS);
-            env.t = (jl_value_t*)jl_alloc_vec_any(1);
-            jl_array_ptr_set(env.t, 0, env.matc);
-            if (*min_valid < entry->min_world)
-                *min_valid = entry->min_world;
-            if (*max_valid > entry->max_world)
-                *max_valid = entry->max_world;
-            JL_GC_POP();
-            return env.t;
-        }
-    }
-    // then check the full cache if it seems profitable
-    if (((jl_datatype_t*)unw)->isdispatchtuple) {
-        jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
-        if (entry && (((jl_datatype_t*)unw)->isdispatchtuple || entry->guardsigs == jl_emptysvec)) {
-            jl_method_instance_t *mi = entry->func.linfo;
-            jl_method_t *meth = mi->def.method;
-            if (!jl_is_unionall(meth->sig) && ((jl_datatype_t*)unw)->isdispatchtuple) {
-                env.match.env = jl_emptysvec;
-                env.match.ti = unw;
+    if (mt) {
+        // check the leaf cache if this type can be in there
+        if (((jl_datatype_t*)unw)->isdispatchtuple) {
+            jl_array_t *leafcache = jl_atomic_load_relaxed(&mt->leafcache);
+            jl_typemap_entry_t *entry = lookup_leafcache(leafcache, (jl_value_t*)type, world);
+            if (entry) {
+                jl_method_instance_t *mi = entry->func.linfo;
+                jl_method_t *meth = mi->def.method;
+                if (!jl_is_unionall(meth->sig)) {
+                    env.match.env = jl_emptysvec;
+                    env.match.ti = unw;
+                }
+                else if (jl_egal((jl_value_t*)type, mi->specTypes)) {
+                    env.match.env = mi->sparam_vals;
+                    env.match.ti = mi->specTypes;
+                }
+                else {
+                    // this just calls jl_subtype_env (since we know that `type <: meth->sig` by transitivity)
+                    env.match.ti = jl_type_intersection_env((jl_value_t*)type, (jl_value_t*)meth->sig, &env.match.env);
+                }
+                env.matc = make_method_match((jl_tupletype_t*)env.match.ti,
+                    env.match.env, meth, FULLY_COVERS);
+                env.t = (jl_value_t*)jl_alloc_vec_any(1);
+                jl_array_ptr_set(env.t, 0, env.matc);
+                if (*min_valid < entry->min_world)
+                    *min_valid = entry->min_world;
+                if (*max_valid > entry->max_world)
+                    *max_valid = entry->max_world;
+                JL_GC_POP();
+                return env.t;
             }
-            else {
-                // this just calls jl_subtype_env (since we know that `type <: meth->sig` by transitivity)
-                env.match.ti = jl_type_intersection_env((jl_value_t*)type, (jl_value_t*)meth->sig, &env.match.env);
+        }
+        // then check the full cache if it seems profitable
+        if (((jl_datatype_t*)unw)->isdispatchtuple) {
+            jl_typemap_entry_t *entry = jl_typemap_assoc_by_type(jl_atomic_load_relaxed(&mt->cache), &search, jl_cachearg_offset(mt), /*subtype*/1);
+            if (entry && (((jl_datatype_t*)unw)->isdispatchtuple || entry->guardsigs == jl_emptysvec)) {
+                jl_method_instance_t *mi = entry->func.linfo;
+                jl_method_t *meth = mi->def.method;
+                if (!jl_is_unionall(meth->sig) && ((jl_datatype_t*)unw)->isdispatchtuple) {
+                    env.match.env = jl_emptysvec;
+                    env.match.ti = unw;
+                }
+                else {
+                    // this just calls jl_subtype_env (since we know that `type <: meth->sig` by transitivity)
+                    env.match.ti = jl_type_intersection_env((jl_value_t*)type, (jl_value_t*)meth->sig, &env.match.env);
+                }
+                env.matc = make_method_match((jl_tupletype_t*)env.match.ti,
+                    env.match.env, meth, FULLY_COVERS);
+                env.t = (jl_value_t*)jl_alloc_vec_any(1);
+                jl_array_ptr_set(env.t, 0, env.matc);
+                if (*min_valid < entry->min_world)
+                    *min_valid = entry->min_world;
+                if (*max_valid > entry->max_world)
+                    *max_valid = entry->max_world;
+                JL_GC_POP();
+                return env.t;
             }
-            env.matc = make_method_match((jl_tupletype_t*)env.match.ti,
-                env.match.env, meth, FULLY_COVERS);
-            env.t = (jl_value_t*)jl_alloc_vec_any(1);
-            jl_array_ptr_set(env.t, 0, env.matc);
-            *min_valid = entry->min_world;
-            *max_valid = entry->max_world;
+        }
+        if (!jl_typemap_intersection_visitor(jl_atomic_load_relaxed(&mt->defs), 0, &env.match)) {
             JL_GC_POP();
-            return env.t;
+            return jl_false;
         }
     }
-    if (!jl_typemap_intersection_visitor(defs, offs, &env.match)) {
-        JL_GC_POP();
-        return jl_false;
+    else {
+        // else: scan everything
+        if (!jl_foreach_reachable_mtable(ml_mtable_visitor, &env.match)) {
+            JL_GC_POP();
+            return jl_false;
+        }
     }
     *min_valid = env.min_valid;
     *max_valid = env.max_valid;
@@ -2687,84 +2936,85 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
     int minmax_ambig = 0;
     int all_subtypes = 1;
     if (len > 1) {
-        // first try to pre-process the results to find the most specific result that fully covers the input
-        // (since we can do this in linear time, and the rest is O(n^2)
-        //   - first see if this might even be profitable, given the requested output we need to compute
+        // first try to pre-process the results to find the most specific
+        // result that fully covers the input, since we can do this in linear
+        // time, and the rest is O(n^2)
+        //   - first find a candidate for the best of these method results
         for (i = 0; i < len; i++) {
             jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            if (matc->fully_covers != FULLY_COVERS) {
+            if (matc->fully_covers == FULLY_COVERS) {
+                jl_method_t *m = matc->method;
+                if (minmax != NULL) {
+                    jl_method_t *minmaxm = minmax->method;
+                    if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+                        continue;
+                }
+                minmax = matc;
+            }
+            else {
                 all_subtypes = 0;
-                break;
             }
         }
-        if (all_subtypes || !include_ambiguous) {
-            //   - then find a candidate for the best of these method results
-            //     (If we have a reason to compute this. There's no point in
-            //     finding the minmax now, if we still need to examine all
-            //     methods for ambiguities later.)
+        //   - then see if it dominated all of the other choices
+        if (minmax != NULL) {
             for (i = 0; i < len; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+                if (matc == minmax)
+                    break;
                 if (matc->fully_covers == FULLY_COVERS) {
                     jl_method_t *m = matc->method;
-                    if (minmax != NULL) {
-                        jl_method_t *minmaxm = minmax->method;
-                        if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
-                            continue;
-                    }
-                    minmax = matc;
-                }
-            }
-            //   - then see if it dominated all of the other choices
-            if (minmax != NULL) {
-                for (i = 0; i < len; i++) {
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                    if (matc == minmax)
+                    jl_method_t *minmaxm = minmax->method;
+                    if (!jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig)) {
+                        minmax_ambig = 1;
+                        minmax = NULL;
+                        has_ambiguity = 1;
                         break;
-                    if (matc->fully_covers == FULLY_COVERS) {
-                        jl_method_t *m = matc->method;
-                        jl_method_t *minmaxm = minmax->method;
-                        if (!jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig)) {
-                            minmax_ambig = 1;
-                            *has_ambiguity = 1;
-                            if (include_ambiguous)
-                                minmax = NULL;
-                            break;
-                        }
                     }
                 }
             }
-            //   - it may even dominate some choices that are not subtypes!
-            //     move those into the subtype group, where we're filter them out shortly after
-            if (!all_subtypes && minmax) {
-                jl_method_t *minmaxm = minmax->method;
-                all_subtypes = 1;
-                for (i = 0; i < len; i++) {
-                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                    if (matc->fully_covers != FULLY_COVERS) {
-                        jl_method_t *m = matc->method;
-                        if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
-                            matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
-                        else
-                            all_subtypes = 0;
-                    }
+        }
+        //   - it may even dominate some choices that are not subtypes!
+        //     move those into the subtype group, where we're filter them out shortly after
+        //     (potentially avoiding reporting these as an ambiguity, and
+        //     potentially allowing us to hit the next fast path)
+        //   - we could always check here if *any* FULLY_COVERS method is
+        //     more-specific (instead of just considering minmax), but that may
+        //     cost much extra and is less likely to help us hit a fast path
+        //     (we will look for this later, when we compute ambig_groupid, for
+        //     correctness)
+        if (!all_subtypes && minmax != NULL) {
+            jl_method_t *minmaxm = minmax->method;
+            all_subtypes = 1;
+            for (i = 0; i < len; i++) {
+                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+                if (matc->fully_covers != FULLY_COVERS) {
+                    jl_method_t *m = matc->method;
+                    if (jl_type_morespecific((jl_value_t*)minmaxm->sig, (jl_value_t*)m->sig))
+                        matc->fully_covers = SENTINEL; // put a sentinel value here for sorting
+                    else
+                        all_subtypes = 0;
                 }
             }
-            //    - now we might have a fast-return here, if we see that
-            //      we've already processed all of the possible outputs
-            if (all_subtypes) {
-                if (minmax_ambig) {
-                    if (!include_ambiguous) {
-                        len = 0;
-                        env.t = jl_an_empty_vec_any;
-                    }
+        }
+        //    - now we might have a fast-return here, if we see that
+        //      we've already processed all of the possible outputs
+        if (all_subtypes) {
+            if (minmax_ambig) {
+                if (!include_ambiguous) {
+                    len = 0;
+                    env.t = jl_an_empty_vec_any;
                 }
-                else {
-                    assert(minmax != NULL);
-                    jl_array_ptr_set(env.t, 0, minmax);
-                    jl_array_del_end((jl_array_t*)env.t, len - 1);
-                    len = 1;
+                else if (lim == 1) {
+                    JL_GC_POP();
+                    return jl_false;
                 }
             }
+            else {
+                assert(minmax != NULL);
+                jl_array_ptr_set(env.t, 0, minmax);
+                jl_array_del_end((jl_array_t*)env.t, len - 1);
+                len = 1;
+            }
         }
     }
     if (len > 1) {
@@ -2776,8 +3026,8 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
             env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
             jl_method_t *m = env.matc->method;
             int subt = env.matc->fully_covers != NOT_FULLY_COVERS;
-            if (minmax != NULL && subt) {
-                continue; // already the biggest
+            if ((minmax != NULL || (minmax_ambig && !include_ambiguous)) && subt) {
+                continue; // already the biggest (skip will filter others)
             }
             for (j = 0; j < i; j++) {
                 jl_method_match_t *matc2 = (jl_method_match_t *)jl_array_ptr_ref(env.t, i - j - 1);
@@ -2785,221 +3035,242 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
                 int subt2 = matc2->fully_covers != NOT_FULLY_COVERS;
                 if (!subt2 && subt)
                     break;
-                if (subt == subt2)
-                    if (subt || !jl_has_empty_intersection(m->sig, m2->sig))
-                        if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
+                if (subt == subt2) {
+                    if (lim >= 0) {
+                        if (subt || !jl_has_empty_intersection(m->sig, m2->sig))
+                            if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
+                                break;
+                    }
+                    else {
+                        // if unlimited, use approximate sorting, with the only
+                        // main downside being that it may be overly-
+                        // conservative at reporting existence of ambiguities
+                        if (jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
                             break;
+                    }
+                }
                 jl_array_ptr_set(env.t, i - j, matc2);
             }
             jl_array_ptr_set(env.t, i - j, env.matc);
         }
-        // final step to finish sort:
-        // we stopped early with just having all non-subtypes before all
-        // subtypes, but the case on the boundary might be wrongly placed:
-        // check for that now
-        if (!minmax) {
-            for (i = 0; i < len; i++) {
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (matc->fully_covers == FULLY_COVERS)
-                    break;
-            }
-            for (; i > 0; i--) {
-                env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i - 1);
-                jl_method_t *m = env.matc->method;
-                for (j = i; j < len; j++) {
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
-                    jl_method_t *m2 = matc2->method;
-                    if (matc2->fully_covers != FULLY_COVERS)
-                        break;
-                    if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                        break;
-                    jl_array_ptr_set(env.t, j - 1, matc2);
-                }
-                if (j == i)
-                    break;
-                env.matc->fully_covers = SENTINEL;
-                jl_array_ptr_set(env.t, j - 1, env.matc);
-            }
-        }
         char *skip = (char*)alloca(len);
         memset(skip, 0, len);
-        // since we had a minmax method, now may now be able to cleanup some of our sort result
-        if (minmax_ambig && !include_ambiguous) {
+        // if we had a minmax method (any subtypes), now may now be able to
+        // quickly cleanup some of our sort result
+        if (minmax != NULL || (minmax_ambig && !include_ambiguous)) {
             for (i = 0; i < len; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (matc->fully_covers != NOT_FULLY_COVERS) {
+                if (minmax != matc && matc->fully_covers != NOT_FULLY_COVERS) {
                     skip[i] = 1;
                 }
             }
         }
-        else if (minmax != NULL) {
-            assert(all_subtypes || !include_ambiguous);
+        if (include_ambiguous && lim == -1 && ambig == NULL && !minmax_ambig) {
+            // in this case, we don't actually need to compute the ambiguity
+            // information at all as the user doesn't need us to filter them
+            // out or report them
+        }
+        else {
+            // now that the results are (mostly) sorted, assign group numbers to each ambiguity
+            // by computing the specificity-ambiguity matrix covering this query
+            uint32_t *ambig_groupid = (uint32_t*)alloca(len * sizeof(uint32_t));
+            for (i = 0; i < len; i++)
+                ambig_groupid[i] = i;
+            // as we go, keep a rough count of how many methods are disjoint, which
+            // gives us a lower bound on how many methods we will be returning
+            // and lets us stop early if we reach our limit
+            int ndisjoint = minmax ? 1 : 0;
             for (i = 0; i < len; i++) {
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                if (minmax != matc && matc->fully_covers != NOT_FULLY_COVERS) {
-                    skip[i] = 1;
-                }
-            }
-        }
-        // now that the results are (mostly) sorted, assign group numbers to each ambiguity
-        // by computing the specificity-ambiguity matrix covering this query
-        uint32_t *ambig_groupid = (uint32_t*)alloca(len * sizeof(uint32_t));
-        // as we go, keep a rough count of how many methods are disjoint, which
-        // gives us a lower bound on how many methods we will be returning
-        // and lets us stop early if we reach our limit
-        int ndisjoint = 0;
-        for (i = 0; i < len; i++) {
-            // can't use skip[i] here, since we still need to make sure the minmax dominates
-            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            jl_method_t *m = matc->method;
-            int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-            ambig_groupid[i] = i;
-            int disjoint = 1;
-            for (j = i; j > 0; j--) {
-                jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j - 1);
-                jl_method_t *m2 = matc2->method;
-                int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-                if (skip[j - 1]) {
-                    // if there was a minmax method, we can just pretend these are all in the same group
+                if (skip[i]) {
+                    // if there was a minmax method, we can just pretend the rest are all in the same group:
                     // they're all together but unsorted in the list, since we'll drop them all later anyways
-                    assert(matc2->fully_covers != NOT_FULLY_COVERS);
-                    disjoint = 0;
-                    ambig_groupid[i] = j - 1; // ambiguity covering range [i:j)
+                    assert(matc->fully_covers != NOT_FULLY_COVERS);
+                    if (ambig_groupid[len - 1] > i)
+                        ambig_groupid[len - 1] = i; // ambiguity covering range [i:len)
+                    break;
                 }
-                else if (subt || subt2 || !jl_has_empty_intersection(m->sig, m2->sig)) {
-                    if (!jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig))
-                        ambig_groupid[i] = j - 1; // ambiguity covering range [i:j)
-                    disjoint = 0;
+                jl_method_t *m = matc->method;
+                int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+                int rsubt = jl_egal((jl_value_t*)matc->spec_types, m->sig);
+                int disjoint = 1;
+                for (j = len; j > i; j--) {
+                    if (ambig_groupid[j - 1] < i) {
+                        disjoint = 0;
+                        break;
+                    }
+                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j - 1);
+                    // can't use skip[j - 1] here, since we still need to make sure the minmax dominates
+                    jl_method_t *m2 = matc2->method;
+                    int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                    int rsubt2 = jl_egal((jl_value_t*)matc2->spec_types, m2->sig);
+                    jl_value_t *ti;
+                    if (!subt && !subt2 && rsubt && rsubt2 && lim == -1 && ambig == NULL)
+                        // these would only be filtered out of the list as
+                        // ambiguous if they are also type-equal, as we
+                        // aren't skipping matches and the user doesn't
+                        // care if we report any ambiguities
+                        continue;
+                    if (jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig))
+                        continue;
+                    if (subt) {
+                        ti = (jl_value_t*)matc2->spec_types;
+                        isect2 = NULL;
+                    }
+                    else if (subt2) {
+                        ti = (jl_value_t*)matc->spec_types;
+                        isect2 = NULL;
+                    }
+                    else {
+                        jl_type_intersection2((jl_value_t*)matc->spec_types, (jl_value_t*)matc2->spec_types, &env.match.ti, &isect2);
+                        ti = env.match.ti;
+                    }
+                    if (ti != jl_bottom_type) {
+                        disjoint = 0;
+                        // m and m2 are ambiguous, but let's see if we can find another method (m3)
+                        // that dominates their intersection, and means we can ignore this
+                        size_t k;
+                        for (k = i; k > 0; k--) {
+                            jl_method_match_t *matc3 = (jl_method_match_t*)jl_array_ptr_ref(env.t, k - 1);
+                            jl_method_t *m3 = matc3->method;
+                            if ((jl_subtype(ti, m3->sig) || (isect2 && jl_subtype(isect2, m3->sig)))
+                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m->sig)
+                                    && jl_type_morespecific((jl_value_t*)m3->sig, (jl_value_t*)m2->sig))
+                                break;
+                        }
+                        if (k == 0) {
+                            ambig_groupid[j - 1] = i; // ambiguity covering range [i:j)
+                            isect2 = NULL;
+                            break;
+                        }
+                    }
+                    isect2 = NULL;
                 }
-            }
-            if (disjoint && lim >= 0) {
-                ndisjoint += 1;
-                if (ndisjoint > lim) {
-                    JL_GC_POP();
-                    return jl_false;
+                if (disjoint && lim >= 0) {
+                    ndisjoint += 1;
+                    if (ndisjoint > lim) {
+                        JL_GC_POP();
+                        return jl_false;
+                    }
                 }
             }
-        }
-        // then we'll merge those numbers to assign each item in the group the same number
-        uint32_t groupid = 0;
-        uint32_t grouphi = 0;
-        for (i = 0; i < len; i++) {
-            j = len - i - 1;
-            uint32_t agid = ambig_groupid[j];
-            if (agid != j) { // thus agid < j
-                if (grouphi == 0) {
-                    groupid = agid;
-                    grouphi = j;
+            // then we'll merge those numbers to assign each item in the group the same number
+            uint32_t groupid = 0;
+            uint32_t grouphi = 0;
+            for (i = 0; i < len; i++) {
+                j = len - i - 1;
+                uint32_t agid = ambig_groupid[j];
+                if (agid != j) { // thus agid < j
+                    if (grouphi == 0) {
+                        groupid = agid;
+                        grouphi = j;
+                    }
+                    else if (agid < groupid) {
+                        groupid = agid;
+                    }
                 }
-                else if (agid < groupid) {
-                    groupid = agid;
+                if (grouphi && j == groupid) {
+                    do {
+                        ambig_groupid[grouphi--] = groupid;
+                    } while (grouphi > j);
+                    ambig_groupid[j] = groupid;
+                    groupid = 0;
+                    grouphi = 0;
                 }
             }
-            if (grouphi && j == groupid) {
-                do {
-                    ambig_groupid[grouphi--] = groupid;
-                } while (grouphi > j);
-                ambig_groupid[j] = groupid;
-                groupid = 0;
-                grouphi = 0;
-            }
-        }
-        // always remove matches after the first subtype, now that we've sorted the list for ambiguities
-        for (i = 0; i < len; i++) {
-            jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-            if (matc->fully_covers == FULLY_COVERS) { // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                uint32_t agid = ambig_groupid[i];
-                while (i < len && agid == ambig_groupid[i])
-                    i++; // keep ambiguous ones
-                for (; i < len; i++)
-                    skip[i] = 1; // drop the rest
-            }
-        }
-        // when limited, skip matches that are covered by earlier ones (and aren't perhaps ambiguous with them)
-        if (lim >= 0) {
+            // always remove matches after the first subtype, now that we've sorted the list for ambiguities
             for (i = 0; i < len; i++) {
-                if (skip[i])
-                    continue;
                 jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                jl_method_t *m = matc->method;
-                jl_tupletype_t *ti = matc->spec_types;
-                if (matc->fully_covers == FULLY_COVERS)
-                    break; // remaining matches are ambiguous or already skipped
-                for (j = 0; j < i; j++) {
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
-                    jl_method_t *m2 = matc2->method;
-                    if (jl_subtype((jl_value_t*)ti, m2->sig)) {
-                        if (ambig_groupid[i] != ambig_groupid[j]) {
-                            skip[i] = 1;
-                            break;
-                        }
-                        else if (!include_ambiguous) {
-                            if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig)) {
+                if (matc->fully_covers == FULLY_COVERS) { // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+                    uint32_t agid = ambig_groupid[i];
+                    while (i < len && agid == ambig_groupid[i])
+                        i++; // keep ambiguous ones
+                    for (; i < len; i++)
+                        skip[i] = 1; // drop the rest
+                }
+            }
+            // when limited, skip matches that are covered by earlier ones (and aren't perhaps ambiguous with them)
+            if (lim >= 0) {
+                for (i = 0; i < len; i++) {
+                    if (skip[i])
+                        continue;
+                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+                    jl_method_t *m = matc->method;
+                    jl_tupletype_t *ti = matc->spec_types;
+                    if (matc->fully_covers == FULLY_COVERS)
+                        break; // remaining matches are ambiguous or already skipped
+                    for (j = 0; j < i; j++) {
+                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+                        jl_method_t *m2 = matc2->method;
+                        if (jl_subtype((jl_value_t*)ti, m2->sig)) {
+                            if (ambig_groupid[i] != ambig_groupid[j]) {
                                 skip[i] = 1;
                                 break;
                             }
+                            else if (!include_ambiguous) {
+                                if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig)) {
+                                    skip[i] = 1;
+                                    break;
+                                }
+                            }
                         }
                     }
                 }
             }
-        }
-        // Compute whether anything could be ambiguous by seeing if any two
-        // methods in the result are in the same ambiguity group.
-        for (i = 0; i < len; i++) {
-            if (!skip[i]) {
-                uint32_t agid = ambig_groupid[i];
-                for (; i < len; i++) {
-                    if (!skip[i]) {
-                        if (agid == ambig_groupid[i]) {
-                            *has_ambiguity = 1;
-                            break;
-                        }
-                        agid = ambig_groupid[i];
+            // Compute whether anything could be ambiguous by seeing if any two
+            // remaining methods in the result are in the same ambiguity group.
+            assert(len > 0);
+            uint32_t agid = ambig_groupid[0];
+            for (i = 1; i < len; i++) {
+                if (!skip[i]) {
+                    if (agid == ambig_groupid[i]) {
+                        has_ambiguity = 1;
+                        break;
                     }
+                    agid = ambig_groupid[i];
                 }
-                break;
             }
-        }
-        // If we're only returning possible matches, now filter out any method
-        // whose intersection is fully ambiguous with the group it is in.
-        if (!include_ambiguous) {
-            for (i = 0; i < len; i++) {
-                if (skip[i])
-                    continue;
-                uint32_t agid = ambig_groupid[i];
-                jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
-                jl_method_t *m = matc->method;
-                jl_tupletype_t *ti = matc->spec_types;
-                int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
-                char ambig1 = 0;
-                for (j = agid; j < len && ambig_groupid[j] == agid; j++) {
-                    if (j == i)
+            // If we're only returning possible matches, now filter out any method
+            // whose intersection is fully ambiguous with the group it is in.
+            if (!include_ambiguous) {
+                for (i = 0; i < len; i++) {
+                    if (skip[i])
                         continue;
-                    jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
-                    jl_method_t *m2 = matc2->method;
-                    int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
-                    // if their intersection contributes to the ambiguity cycle
-                    if (subt || subt2 || !jl_has_empty_intersection((jl_value_t*)ti, m2->sig)) {
-                        // and the contribution of m is ambiguous with the portion of the cycle from m2
-                        if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
-                            // but they aren't themselves simply ordered (here
-                            // we don't consider that a third method might be
-                            // disrupting that ordering and just consider them
-                            // pairwise to keep this simple).
-                            if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
-                                !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
-                                ambig1 = 1;
+                    uint32_t agid = ambig_groupid[i];
+                    jl_method_match_t *matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, i);
+                    jl_method_t *m = matc->method;
+                    jl_tupletype_t *ti = matc->spec_types;
+                    int subt = matc->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m->sig)
+                    char ambig1 = 0;
+                    for (j = agid; j < len && ambig_groupid[j] == agid; j++) {
+                        if (j == i)
+                            continue;
+                        jl_method_match_t *matc2 = (jl_method_match_t*)jl_array_ptr_ref(env.t, j);
+                        jl_method_t *m2 = matc2->method;
+                        int subt2 = matc2->fully_covers == FULLY_COVERS; // jl_subtype((jl_value_t*)type, (jl_value_t*)m2->sig)
+                        // if their intersection contributes to the ambiguity cycle
+                        if (subt || subt2 || !jl_has_empty_intersection((jl_value_t*)ti, m2->sig)) {
+                            // and the contribution of m is ambiguous with the portion of the cycle from m2
+                            if (subt2 || jl_subtype((jl_value_t*)ti, m2->sig)) {
+                                // but they aren't themselves simply ordered (here
+                                // we don't consider that a third method might be
+                                // disrupting that ordering and just consider them
+                                // pairwise to keep this simple).
+                                if (!jl_type_morespecific((jl_value_t*)m->sig, (jl_value_t*)m2->sig) &&
+                                    !jl_type_morespecific((jl_value_t*)m2->sig, (jl_value_t*)m->sig)) {
+                                    ambig1 = 1;
+                                }
+                            }
+                            else {
+                                // otherwise some aspect of m is not ambiguous
+                                ambig1 = 0;
+                                break;
                             }
-                        }
-                        else {
-                            // otherwise some aspect of m is not ambiguous
-                            ambig1 = 0;
-                            break;
                         }
                     }
+                    if (ambig1)
+                        skip[i] = 1;
                 }
-                if (ambig1)
-                    skip[i] = 1;
             }
         }
         // cleanup array to remove skipped entries
@@ -3016,16 +3287,18 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
             jl_array_del_end((jl_array_t*)env.t, len - j);
         len = j;
     }
-    if (cache_result && ((jl_datatype_t*)unw)->isdispatchtuple) { // cache_result parameter keeps this from being recursive
-        if (len == 1 && !*has_ambiguity) {
+    if (mt && cache_result && ((jl_datatype_t*)unw)->isdispatchtuple) { // cache_result parameter keeps this from being recursive
+        if (len == 1 && !has_ambiguity) {
             env.matc = (jl_method_match_t*)jl_array_ptr_ref(env.t, 0);
             jl_method_t *meth = env.matc->method;
             jl_svec_t *tpenv = env.matc->sparams;
             JL_LOCK(&mt->writelock);
-            cache_method(mt, &mt->cache, (jl_value_t*)mt, type, meth, world, env.min_valid, env.max_valid, tpenv);
+            cache_method(mt, &mt->cache, (jl_value_t*)mt, (jl_tupletype_t*)unw, meth, world, env.min_valid, env.max_valid, tpenv);
             JL_UNLOCK(&mt->writelock);
         }
     }
+    if (ambig != NULL)
+        *ambig = has_ambiguity;
     JL_GC_POP();
     if (lim >= 0 && len > lim)
         return jl_false;
@@ -3033,14 +3306,14 @@ static jl_value_t *ml_matches(jl_methtable_t *mt, int offs,
 }
 
 // see if it might be possible to construct an instance of `typ`
-// if ninitialized == nfields, but a fieldtype is Union{},
+// if n_uninitialized == 0, but a fieldtype is Union{},
 // that type will not be constructable, for example, tested recursively
 int jl_has_concrete_subtype(jl_value_t *typ)
 {
     if (typ == jl_bottom_type)
         return 0;
     typ = jl_unwrap_unionall(typ);
-    if (jl_is_vararg_type(typ))
+    if (jl_is_vararg(typ))
         typ = jl_unwrap_vararg(typ);
     if (!jl_is_datatype(typ))
         return 1;
@@ -3052,15 +3325,26 @@ int jl_has_concrete_subtype(jl_value_t *typ)
 //   the best way to avoid acquisition priority
 //   ordering violations
 //static jl_mutex_t typeinf_lock;
-#define typeinf_lock codegen_lock
+#define typeinf_lock jl_codegen_lock
+
+static uint64_t inference_start_time = 0;
+static uint8_t inference_is_measuring_compile_time = 0;
 
 JL_DLLEXPORT void jl_typeinf_begin(void)
 {
     JL_LOCK(&typeinf_lock);
+    if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
+        inference_start_time = jl_hrtime();
+        inference_is_measuring_compile_time = 1;
+    }
 }
 
 JL_DLLEXPORT void jl_typeinf_end(void)
 {
+    if (typeinf_lock.count == 1 && inference_is_measuring_compile_time) {
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - inference_start_time));
+        inference_is_measuring_compile_time = 0;
+    }
     JL_UNLOCK(&typeinf_lock);
 }
 
diff --git a/src/iddict.c b/src/iddict.c
index 74ae515b0c7dd8..e6c9eee44b9804 100644
--- a/src/iddict.c
+++ b/src/iddict.c
@@ -5,10 +5,10 @@
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 
-#define keyhash(k) jl_object_id(k)
+#define keyhash(k) jl_object_id_(jl_typeof(k), k)
 #define h2index(hv, sz) (size_t)(((hv) & ((sz)-1)) * 2)
 
-static int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val);
+static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val);
 
 JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
 {
@@ -18,7 +18,7 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
     jl_array_t *newa = jl_alloc_vec_any(newsz);
     // keep the original array in the original slot since we need `ol`
     // to be valid in the loop below.
-    JL_GC_PUSH1(&newa);
+    JL_GC_PUSH2(&newa, &a);
     for (i = 0; i < sz; i += 2) {
         if (ol[i + 1] != NULL) {
             jl_table_assign_bp(&newa, ol[i], ol[i + 1]);
@@ -30,9 +30,9 @@ JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz)
     return newa;
 }
 
-static int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
+static inline int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
 {
-    // pa points to a **rooted** gc frame slot
+    // pa points to a **un**rooted address
     uint_t hv;
     jl_array_t *a = *pa;
     size_t orig, index, iter, empty_slot;
@@ -43,7 +43,7 @@ static int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
         *pa = a;
     }
     size_t maxprobe = max_probe(sz);
-    void **tab = (void **)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
 
     hv = keyhash(key);
     while (1) {
@@ -54,14 +54,14 @@ static int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
         empty_slot = -1;
 
         do {
-            jl_value_t *k2 = (jl_value_t*)tab[index];
+            jl_value_t *k2 = jl_atomic_load_relaxed(&tab[index]);
             if (k2 == NULL) {
                 if (empty_slot == -1)
                     empty_slot = index;
                 break;
             }
             if (jl_egal(key, k2)) {
-                if (tab[index + 1] != NULL) {
+                if (jl_atomic_load_relaxed(&tab[index + 1]) != NULL) {
                     jl_atomic_store_release(&tab[index + 1], val);
                     jl_gc_wb(a, val);
                     return 0;
@@ -71,8 +71,8 @@ static int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
                 if (empty_slot == -1)
                     empty_slot = index;
             }
-            if (empty_slot == -1 && tab[index + 1] == NULL) {
-                assert(tab[index] == jl_nothing);
+            if (empty_slot == -1 && jl_atomic_load_relaxed(&tab[index + 1]) == NULL) {
+                assert(jl_atomic_load_relaxed(&tab[index]) == jl_nothing);
                 empty_slot = index;
             }
 
@@ -102,20 +102,20 @@ static int jl_table_assign_bp(jl_array_t **pa, jl_value_t *key, jl_value_t *val)
         *pa = jl_idtable_rehash(*pa, newsz);
 
         a = *pa;
-        tab = (void **)a->data;
+        tab = (_Atomic(jl_value_t*)*)a->data;
         sz = hash_size(a);
         maxprobe = max_probe(sz);
     }
 }
 
 /* returns bp if key is in hash, otherwise NULL */
-jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
+inline _Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
 {
     size_t sz = hash_size(a);
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    void **tab = (void **)a->data;
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)a->data;
     uint_t hv = keyhash(key);
     size_t index = h2index(hv, sz);
     sz *= 2;
@@ -123,12 +123,12 @@ jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
     size_t iter = 0;
 
     do {
-        jl_value_t *k2 = (jl_value_t*)jl_atomic_load_relaxed(&tab[index]); // just to ensure the load doesn't get duplicated
+        jl_value_t *k2 = jl_atomic_load_relaxed(&tab[index]); // just to ensure the load doesn't get duplicated
         if (k2 == NULL)
             return NULL;
         if (jl_egal(key, k2)) {
             if (jl_atomic_load_relaxed(&tab[index + 1]) != NULL)
-                return (jl_value_t**)&tab[index + 1];
+                return &tab[index + 1];
             // `nothing` is our sentinel value for deletion, so need to keep searching if it's also our search key
             if (key != jl_nothing)
                 return NULL; // concurrent insertion hasn't completed yet
@@ -144,12 +144,9 @@ jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT
 JL_DLLEXPORT
 jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int *p_inserted)
 {
-    JL_GC_PUSH1(&h);
-    // &h may be assigned to in jl_idtable_rehash so it need to be rooted
     int inserted = jl_table_assign_bp(&h, key, val);
     if (p_inserted)
         *p_inserted = inserted;
-    JL_GC_POP();
     return h;
 }
 
@@ -158,21 +155,21 @@ jl_array_t *jl_eqtable_put(jl_array_t *h, jl_value_t *key, jl_value_t *val, int
 JL_DLLEXPORT
 jl_value_t *jl_eqtable_get(jl_array_t *h, jl_value_t *key, jl_value_t *deflt) JL_NOTSAFEPOINT
 {
-    jl_value_t **bp = jl_table_peek_bp(h, key);
+    _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     return (bp == NULL) ? deflt : jl_atomic_load_relaxed(bp);
 }
 
 JL_DLLEXPORT
 jl_value_t *jl_eqtable_pop(jl_array_t *h, jl_value_t *key, jl_value_t *deflt, int *found)
 {
-    jl_value_t **bp = jl_table_peek_bp(h, key);
+    _Atomic(jl_value_t*) *bp = jl_table_peek_bp(h, key);
     if (found)
         *found = (bp != NULL);
     if (bp == NULL)
         return deflt;
-    jl_value_t *val = *bp;
-    *(bp - 1) = jl_nothing; // clear the key
-    *bp = NULL;
+    jl_value_t *val = jl_atomic_load_relaxed(bp);
+    jl_atomic_store_relaxed(bp - 1, jl_nothing); // clear the key
+    jl_atomic_store_relaxed(bp, NULL); // and the value (briefly corrupting the table)
     return val;
 }
 
diff --git a/src/init.c b/src/init.c
index 752db74e691d13..228222b3658fc6 100644
--- a/src/init.c
+++ b/src/init.c
@@ -10,8 +10,8 @@
 #include <string.h>
 #include <stdio.h>
 #include <fcntl.h>
-
 #include <errno.h>
+#include <libgen.h> // defines dirname
 
 #if !defined(_OS_WINDOWS_) || defined(_COMPILER_GCC_)
 #include <getopt.h>
@@ -28,17 +28,12 @@
 #undef DEFINE_BUILTIN_GLOBALS
 #include "threading.h"
 #include "julia_assert.h"
+#include "processor.h"
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#ifdef _MSC_VER
-JL_DLLEXPORT char *dirname(char *);
-#else
-#include <libgen.h>
-#endif
-
 #ifdef _OS_WINDOWS_
 extern int needsSymRefreshModuleList;
 extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
@@ -50,7 +45,7 @@ extern BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 // list of modules being deserialized with __init__ methods
 jl_array_t *jl_module_init_order;
 
-size_t jl_page_size;
+JL_DLLEXPORT size_t jl_page_size;
 
 void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
 {
@@ -114,7 +109,7 @@ void jl_init_stack_limits(int ismaster, void **stack_lo, void **stack_hi)
 static void jl_prep_sanitizers(void)
 {
 #if !defined(_OS_WINDOWS_)
-#if defined(JL_ASAN_ENABLED) || defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
     struct rlimit rl;
 
     // When using the sanitizers, increase stack size because they bloat
@@ -170,8 +165,7 @@ static void jl_close_item_atexit(uv_handle_t *handle)
     switch(handle->type) {
     case UV_PROCESS:
         // cause Julia to forget about the Process object
-        if (handle->data)
-            jl_uv_call_close_callback((jl_value_t*)handle->data);
+        handle->data = NULL;
         // and make libuv think it is already dead
         ((uv_process_t*)handle)->pid = 0;
         // fall-through
@@ -204,7 +198,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     if (jl_all_tls_states == NULL)
         return;
 
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
 
     if (exitcode == 0)
         jl_write_compiler_output();
@@ -217,14 +211,16 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
         jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("_atexit"));
         if (f != NULL) {
             JL_TRY {
-                size_t last_age = ptls->world_age;
-                ptls->world_age = jl_get_world_counter();
+                size_t last_age = ct->world_age;
+                ct->world_age = jl_get_world_counter();
                 jl_apply(&f, 1);
-                ptls->world_age = last_age;
+                ct->world_age = last_age;
             }
             JL_CATCH {
-                jl_printf(JL_STDERR, "\natexit hook threw an error: ");
-                jl_static_show(JL_STDERR, jl_current_exception());
+                jl_printf((JL_STREAM*)STDERR_FILENO, "\natexit hook threw an error: ");
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+                jlbacktrace(); // written to STDERR_FILENO
             }
         }
     }
@@ -234,7 +230,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     JL_STDOUT = (uv_stream_t*) STDOUT_FILENO;
     JL_STDERR = (uv_stream_t*) STDERR_FILENO;
 
-    jl_gc_run_all_finalizers(ptls);
+    jl_gc_run_all_finalizers(ct);
 
     uv_loop_t *loop = jl_global_event_loop();
 
@@ -246,7 +242,7 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
     JL_UV_LOCK();
     uv_walk(loop, jl_uv_exitcleanup_walk, &queue);
     struct uv_shutdown_queue_item *item = queue.first;
-    if (ptls->current_task != NULL) {
+    if (ct != NULL) {
         while (item) {
             JL_TRY {
                 while (item) {
@@ -258,8 +254,10 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
                 //error handling -- continue cleanup, as much as possible
                 assert(item);
                 uv_unref(item->h);
-                jl_printf(JL_STDERR, "error during exit cleanup: close: ");
-                jl_static_show(JL_STDERR, jl_current_exception());
+                jl_printf((JL_STREAM*)STDERR_FILENO, "error during exit cleanup: close: ");
+                jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+                jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+                jlbacktrace(); // written to STDERR_FILENO
                 item = next_shutdown_queue_item(item);
             }
         }
@@ -288,14 +286,16 @@ JL_DLLEXPORT void jl_atexit_hook(int exitcode)
 
 static void post_boot_hooks(void);
 
-JL_DLLEXPORT void *jl_dl_handle;
-void *jl_RTLD_DEFAULT_handle;
+JL_DLLEXPORT void *jl_libjulia_internal_handle;
+JL_DLLEXPORT void *jl_libjulia_handle;
+JL_DLLEXPORT void *jl_RTLD_DEFAULT_handle;
 JL_DLLEXPORT void *jl_exe_handle;
 #ifdef _OS_WINDOWS_
 void *jl_ntdll_handle;
 void *jl_kernel32_handle;
 void *jl_crtdll_handle;
 void *jl_winsock_handle;
+extern const char *jl_crtdll_name;
 #endif
 
 uv_loop_t *jl_io_loop;
@@ -424,19 +424,7 @@ static void init_stdio(void)
     jl_flush_cstdio();
 }
 
-#ifdef JL_USE_INTEL_JITEVENTS
-char jl_using_intel_jitevents; // Non-zero if running under Intel VTune Amplifier
-#endif
-
-#ifdef JL_USE_OPROFILE_JITEVENTS
-char jl_using_oprofile_jitevents = 0; // Non-zero if running under OProfile
-#endif
-
-#ifdef JL_USE_PERF_JITEVENTS
-char jl_using_perf_jitevents = 0;
-#endif
-
-int isabspath(const char *in) JL_NOTSAFEPOINT
+int jl_isabspath(const char *in) JL_NOTSAFEPOINT
 {
 #ifdef _OS_WINDOWS_
     char c0 = in[0];
@@ -475,8 +463,8 @@ static char *abspath(const char *in, int nprefix)
             memcpy(out, in, sz + nprefix);
         }
         else {
-            size_t path_size = PATH_MAX;
-            char *path = (char*)malloc_s(PATH_MAX);
+            size_t path_size = JL_PATH_MAX;
+            char *path = (char*)malloc_s(JL_PATH_MAX);
             if (uv_cwd(path, &path_size)) {
                 jl_error("fatal error: unexpected error while retrieving current working directory");
             }
@@ -508,11 +496,11 @@ static char *abspath(const char *in, int nprefix)
 // unless `in` starts with `%`
 static const char *absformat(const char *in)
 {
-    if (in[0] == '%' || isabspath(in))
+    if (in[0] == '%' || jl_isabspath(in))
         return in;
     // get an escaped copy of cwd
-    size_t path_size = PATH_MAX;
-    char path[PATH_MAX];
+    size_t path_size = JL_PATH_MAX;
+    char path[JL_PATH_MAX];
     if (uv_cwd(path, &path_size)) {
         jl_error("fatal error: unexpected error while retrieving current working directory");
     }
@@ -536,17 +524,17 @@ static const char *absformat(const char *in)
 static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
 {   // this function resolves the paths in jl_options to absolute file locations as needed
     // and it replaces the pointers to `julia_bindir`, `julia_bin`, `image_file`, and output file paths
-    // it may fail, print an error, and exit(1) if any of these paths are longer than PATH_MAX
+    // it may fail, print an error, and exit(1) if any of these paths are longer than JL_PATH_MAX
     //
     // note: if you care about lost memory, you should call the appropriate `free()` function
     // on the original pointer for each `char*` you've inserted into `jl_options`, after
     // calling `julia_init()`
-    char *free_path = (char*)malloc_s(PATH_MAX);
-    size_t path_size = PATH_MAX;
+    char *free_path = (char*)malloc_s(JL_PATH_MAX);
+    size_t path_size = JL_PATH_MAX;
     if (uv_exepath(free_path, &path_size)) {
         jl_error("fatal error: unexpected error while retrieving exepath");
     }
-    if (path_size >= PATH_MAX) {
+    if (path_size >= JL_PATH_MAX) {
         jl_error("fatal error: jl_options.julia_bin path too long");
     }
     jl_options.julia_bin = (char*)malloc_s(path_size + 1);
@@ -563,12 +551,12 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
     free(free_path);
     free_path = NULL;
     if (jl_options.image_file) {
-        if (rel == JL_IMAGE_JULIA_HOME && !isabspath(jl_options.image_file)) {
+        if (rel == JL_IMAGE_JULIA_HOME && !jl_isabspath(jl_options.image_file)) {
             // build time path, relative to JULIA_BINDIR
-            free_path = (char*)malloc_s(PATH_MAX);
-            int n = snprintf(free_path, PATH_MAX, "%s" PATHSEPSTRING "%s",
+            free_path = (char*)malloc_s(JL_PATH_MAX);
+            int n = snprintf(free_path, JL_PATH_MAX, "%s" PATHSEPSTRING "%s",
                              jl_options.julia_bindir, jl_options.image_file);
-            if (n >= PATH_MAX || n < 0) {
+            if (n >= JL_PATH_MAX || n < 0) {
                 jl_error("fatal error: jl_options.image_file path too long");
             }
             jl_options.image_file = free_path;
@@ -592,6 +580,8 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
         jl_options.machine_file = abspath(jl_options.machine_file, 0);
     if (jl_options.output_code_coverage)
         jl_options.output_code_coverage = absformat(jl_options.output_code_coverage);
+    if (jl_options.tracked_path)
+        jl_options.tracked_path = absformat(jl_options.tracked_path);
 
     const char **cmdp = jl_options.cmds;
     if (cmdp) {
@@ -604,58 +594,74 @@ static void jl_resolve_sysimg_location(JL_IMAGE_SEARCH rel)
     }
 }
 
+JL_DLLEXPORT int jl_is_file_tracked(jl_sym_t *path)
+{
+    const char* path_ = jl_symbol_name(path);
+    int tpath_len = strlen(jl_options.tracked_path);
+    return (strlen(path_) >= tpath_len) && (strncmp(path_, jl_options.tracked_path, tpath_len) == 0);
+}
+
 static void jl_set_io_wait(int v)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->io_wait = v;
+    jl_task_t *ct = jl_current_task;
+    ct->ptls->io_wait = v;
 }
 
 extern jl_mutex_t jl_modules_mutex;
 
-void _julia_init(JL_IMAGE_SEARCH rel)
+static void restore_fp_env(void)
+{
+    if (jl_set_zero_subnormals(0) || jl_set_default_nans(0)) {
+        jl_error("Failed to configure floating point environment");
+    }
+}
+
+static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct);
+
+JL_DLLEXPORT int jl_default_debug_info_kind;
+
+JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
 {
+    jl_default_debug_info_kind = 0;
+
     jl_init_timing();
     // Make sure we finalize the tls callback before starting any threads.
-    jl_get_ptls_states_getter();
-    jl_ptls_t ptls = jl_get_ptls_states();
-    (void)ptls; assert(ptls); // make sure early that we have initialized ptls
+    (void)jl_get_pgcstack();
     jl_safepoint_init();
     libsupport_init();
     htable_new(&jl_current_modules, 0);
     JL_MUTEX_INIT(&jl_modules_mutex);
+    jl_precompile_toplevel_module = NULL;
     ios_set_io_wait_func = jl_set_io_wait;
     jl_io_loop = uv_default_loop(); // this loop will internal events (spawning process etc.),
                                     // best to call this first, since it also initializes libuv
     jl_init_uv();
     init_stdio();
+    restore_fp_env();
     restore_signals();
+    jl_init_intrinsic_properties();
 
     jl_page_size = jl_getpagesize();
-    uint64_t total_mem = uv_get_total_memory();
-    uint64_t constrained_mem = uv_get_constrained_memory();
-    if (constrained_mem > 0 && constrained_mem < total_mem)
-        total_mem = constrained_mem;
-    if (total_mem >= (size_t)-1) {
-        total_mem = (size_t)-1;
-    }
-    jl_arr_xtralloc_limit = total_mem / 100;  // Extra allocation limited to 1% of total RAM
     jl_prep_sanitizers();
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(1, &stack_lo, &stack_hi);
-    jl_dl_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1);
+
+    jl_libjulia_internal_handle = jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 1);
 #ifdef _OS_WINDOWS_
+    jl_exe_handle = GetModuleHandleA(NULL);
+    jl_RTLD_DEFAULT_handle = jl_libjulia_internal_handle;
+    if (!GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT,
+                            (LPCWSTR)&jl_any_type,
+                            (HMODULE*)&jl_libjulia_handle)) {
+        jl_error("could not load base module");
+    }
     jl_ntdll_handle = jl_dlopen("ntdll.dll", 0); // bypass julia's pathchecking for system dlls
     jl_kernel32_handle = jl_dlopen("kernel32.dll", 0);
-#if defined(_MSC_VER) && _MSC_VER == 1800
-    jl_crtdll_handle = jl_dlopen("msvcr120.dll", 0);
-#else
-    jl_crtdll_handle = jl_dlopen("msvcrt.dll", 0);
-#endif
+    jl_crtdll_handle = jl_dlopen(jl_crtdll_name, 0);
     jl_winsock_handle = jl_dlopen("ws2_32.dll", 0);
-    jl_exe_handle = GetModuleHandleA(NULL);
-    JL_MUTEX_INIT(&jl_in_stackwalk);
-    SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES);
-    if (!SymInitialize(GetCurrentProcess(), NULL, 1)) {
+    uv_mutex_init(&jl_in_stackwalk);
+    SymSetOptions(SYMOPT_UNDNAME | SYMOPT_DEFERRED_LOADS | SYMOPT_LOAD_LINES | SYMOPT_IGNORE_CVREC);
+    if (!SymInitialize(GetCurrentProcess(), "", 1)) {
         jl_printf(JL_STDERR, "WARNING: failed to initialize stack walk info\n");
     }
     needsSymRefreshModuleList = 0;
@@ -671,38 +677,27 @@ void _julia_init(JL_IMAGE_SEARCH rel)
 #endif
 #endif
 
-#if defined(JL_USE_INTEL_JITEVENTS)
-    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_intel_jitevents = 1;
-    }
-#endif
-
-#if defined(JL_USE_OPROFILE_JITEVENTS)
-    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_oprofile_jitevents = 1;
-    }
-#endif
-
-#if defined(JL_USE_PERF_JITEVENTS)
-    const char *jit_profiling = getenv("ENABLE_JITPROFILING");
-    if (jit_profiling && atoi(jit_profiling)) {
-        jl_using_perf_jitevents= 1;
-    }
-#endif
-
     if ((jl_options.outputo || jl_options.outputbc || jl_options.outputasm) &&
         (jl_options.code_coverage || jl_options.malloc_log)) {
         jl_error("cannot generate code-coverage or track allocation information while generating a .o, .bc, or .s output file");
     }
 
-    jl_gc_init();
-
+    jl_init_rand();
+    jl_init_runtime_ccall();
+    jl_init_tasks();
     jl_init_threading();
-    jl_init_intrinsic_properties();
 
-    jl_gc_enable(0);
+    jl_gc_init();
+    jl_ptls_t ptls = jl_init_threadtls(0);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    _finish_julia_init(rel, ptls, ct);
+}
+
+static NOINLINE void _finish_julia_init(JL_IMAGE_SEARCH rel, jl_ptls_t ptls, jl_task_t *ct)
+{
+    jl_init_threadinginfra();
 
     jl_resolve_sysimg_location(rel);
     // loads sysimg if available, and conditionally sets jl_options.cpu_target
@@ -713,17 +708,11 @@ void _julia_init(JL_IMAGE_SEARCH rel)
 
     if (jl_options.image_file) {
         jl_restore_system_image(jl_options.image_file);
-    }
-    else {
+    } else {
         jl_init_types();
         jl_init_codegen();
     }
 
-    jl_init_tasks();
-    jl_init_root_task(stack_lo, stack_hi);
-#ifdef ENABLE_TIMINGS
-    jl_root_task->timing_stack = jl_root_timing;
-#endif
     jl_init_common_symbols();
     jl_init_flisp();
     jl_init_serializer();
@@ -740,17 +729,7 @@ void _julia_init(JL_IMAGE_SEARCH rel)
         post_boot_hooks();
     }
 
-    if (jl_base_module != NULL) {
-        // Do initialization needed before starting child threads
-        jl_value_t *f = jl_get_global(jl_base_module, jl_symbol("__preinit_threads__"));
-        if (f) {
-            size_t last_age = ptls->world_age;
-            ptls->world_age = jl_get_world_counter();
-            jl_apply(&f, 1);
-            ptls->world_age = last_age;
-        }
-    }
-    else {
+    if (jl_base_module == NULL) {
         // nthreads > 1 requires code in Base
         jl_n_threads = 1;
     }
@@ -789,7 +768,6 @@ static void post_boot_hooks(void)
     jl_char_type    = (jl_datatype_t*)core("Char");
     jl_int8_type    = (jl_datatype_t*)core("Int8");
     jl_int16_type   = (jl_datatype_t*)core("Int16");
-    jl_uint16_type  = (jl_datatype_t*)core("UInt16");
     jl_float16_type = (jl_datatype_t*)core("Float16");
     jl_float32_type = (jl_datatype_t*)core("Float32");
     jl_float64_type = (jl_datatype_t*)core("Float64");
@@ -801,16 +779,18 @@ static void post_boot_hooks(void)
 
     jl_bool_type->super = jl_integer_type;
     jl_uint8_type->super = jl_unsigned_type;
-    jl_int32_type->super = jl_signed_type;
-    jl_int64_type->super = jl_signed_type;
+    jl_uint16_type->super = jl_unsigned_type;
     jl_uint32_type->super = jl_unsigned_type;
     jl_uint64_type->super = jl_unsigned_type;
+    jl_int32_type->super = jl_signed_type;
+    jl_int64_type->super = jl_signed_type;
 
     jl_errorexception_type = (jl_datatype_t*)core("ErrorException");
     jl_stackovf_exception  = jl_new_struct_uninit((jl_datatype_t*)core("StackOverflowError"));
     jl_diverror_exception  = jl_new_struct_uninit((jl_datatype_t*)core("DivideError"));
     jl_undefref_exception  = jl_new_struct_uninit((jl_datatype_t*)core("UndefRefError"));
     jl_undefvarerror_type  = (jl_datatype_t*)core("UndefVarError");
+    jl_atomicerror_type    = (jl_datatype_t*)core("ConcurrencyViolationError");
     jl_interrupt_exception = jl_new_struct_uninit((jl_datatype_t*)core("InterruptException"));
     jl_boundserror_type    = (jl_datatype_t*)core("BoundsError");
     jl_memory_exception    = jl_new_struct_uninit((jl_datatype_t*)core("OutOfMemoryError"));
@@ -823,6 +803,7 @@ static void post_boot_hooks(void)
     jl_methoderror_type    = (jl_datatype_t*)core("MethodError");
     jl_loaderror_type      = (jl_datatype_t*)core("LoadError");
     jl_initerror_type      = (jl_datatype_t*)core("InitError");
+    jl_pair_type           = core("Pair");
 
     jl_weakref_type = (jl_datatype_t*)core("WeakRef");
     jl_vecelement_typename = ((jl_datatype_t*)jl_unwrap_unionall(core("VecElement")))->name;
@@ -835,7 +816,7 @@ static void post_boot_hooks(void)
     for (i = 1; i < jl_core_module->bindings.size; i += 2) {
         if (table[i] != HT_NOTFOUND) {
             jl_binding_t *b = (jl_binding_t*)table[i];
-            jl_value_t *v = b->value;
+            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
             if (v) {
                 if (jl_is_unionall(v))
                     v = jl_unwrap_unionall(v);
diff --git a/src/interpreter.c b/src/interpreter.c
index ba97321a922eee..60bd4a6e1ce7e0 100644
--- a/src/interpreter.c
+++ b/src/interpreter.c
@@ -37,28 +37,30 @@ typedef struct {
   JL_GCC_IGNORE_STOP
 #endif
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT;
 
 // This is necessary, because otherwise the analyzer considers this undefined
 // behavior and terminates the exploration
-#define JL_GC_PUSHFRAME(frame,n)     \
-  JL_CPPALLOCA(frame, sizeof(*frame)+((n) * sizeof(jl_value_t*)));                  \
+#define JL_GC_PUSHFRAME(frame,locals,n)     \
+  JL_CPPALLOCA(frame, sizeof(*frame)+((n) * sizeof(jl_value_t*)));  \
   memset(&frame[1], 0, sizeof(void*) * n); \
-  _JL_GC_PUSHARGS((jl_value_t**)&frame[1], n);
+  _JL_GC_PUSHARGS((jl_value_t**)&frame[1], n); \
+  locals = (jl_value_t**)&frame[1];
 
 #else
 
 #define JL_GC_ENCODE_PUSHFRAME(n)  ((((size_t)(n))<<2)|2)
 
-#define JL_GC_PUSHFRAME(frame,n)                                                    \
+#define JL_GC_PUSHFRAME(frame,locals,n)                                             \
   JL_CPPALLOCA(frame, sizeof(*frame)+(((n)+3)*sizeof(jl_value_t*)));                \
   ((void**)&frame[1])[0] = NULL;                                                    \
   ((void**)&frame[1])[1] = (void*)JL_GC_ENCODE_PUSHFRAME(n);                        \
   ((void**)&frame[1])[2] = jl_pgcstack;                                             \
   memset(&((void**)&frame[1])[3], 0, (n)*sizeof(jl_value_t*));                      \
-  jl_pgcstack = (jl_gcframe_t*)&(((void**)&frame[1])[1])
+  jl_pgcstack = (jl_gcframe_t*)&(((void**)&frame[1])[1]);                           \
+  locals = &((jl_value_t**)&frame[1])[3];
 
 // we define this separately so that we can populate the frame before we add it to the backtrace
 // it's recommended to mark the containing function with NOINLINE, though not essential
@@ -76,28 +78,37 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 static jl_value_t *eval_methoddef(jl_expr_t *ex, interpreter_state *s)
 {
     jl_value_t **args = jl_array_ptr_data(ex->args);
-    jl_sym_t *fname = (jl_sym_t*)args[0];
-    jl_module_t *modu = s->module;
-    if (jl_is_globalref(fname)) {
-        modu = jl_globalref_mod(fname);
-        fname = jl_globalref_name(fname);
-    }
-    assert(jl_expr_nargs(ex) != 1 || jl_is_symbol(fname));
 
-    if (jl_is_symbol(fname)) {
+    // generic function definition
+    if (jl_expr_nargs(ex) == 1) {
+        jl_value_t **args = jl_array_ptr_data(ex->args);
+        jl_sym_t *fname = (jl_sym_t*)args[0];
+        jl_module_t *modu = s->module;
+        if (jl_is_globalref(fname)) {
+            modu = jl_globalref_mod(fname);
+            fname = jl_globalref_name(fname);
+        }
+        if (!jl_is_symbol(fname)) {
+            jl_error("method: invalid declaration");
+        }
         jl_value_t *bp_owner = (jl_value_t*)modu;
         jl_binding_t *b = jl_get_binding_for_method_def(modu, fname);
-        jl_value_t **bp = &b->value;
+        _Atomic(jl_value_t*) *bp = &b->value;
         jl_value_t *gf = jl_generic_function_def(b->name, b->owner, bp, bp_owner, b);
-        if (jl_expr_nargs(ex) == 1)
-            return gf;
+        return gf;
     }
 
-    jl_value_t *atypes = NULL, *meth = NULL;
-    JL_GC_PUSH2(&atypes, &meth);
+    jl_value_t *atypes = NULL, *meth = NULL, *fname = NULL;
+    JL_GC_PUSH3(&atypes, &meth, &fname);
+
+    fname = eval_value(args[0], s);
+    jl_methtable_t *mt = NULL;
+    if (jl_typeis(fname, jl_methtable_type)) {
+        mt = (jl_methtable_t*)fname;
+    }
     atypes = eval_value(args[1], s);
     meth = eval_value(args[2], s);
-    jl_method_def((jl_svec_t*)atypes, (jl_code_info_t*)meth, s->module);
+    jl_method_def((jl_svec_t*)atypes, mt, (jl_code_info_t*)meth, s->module);
     JL_GC_POP();
     return jl_nothing;
 }
@@ -200,13 +211,16 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
     jl_value_t **args = jl_array_ptr_data(ex->args);
     size_t nargs = jl_array_len(ex->args);
     jl_sym_t *head = ex->head;
-    if (head == call_sym) {
+    if (head == jl_call_sym) {
         return do_call(args, nargs, s);
     }
-    else if (head == invoke_sym) {
+    else if (head == jl_invoke_sym) {
         return do_invoke(args, nargs, s);
     }
-    else if (head == isdefined_sym) {
+    else if (head == jl_invoke_modify_sym) {
+        return do_call(args + 1, nargs - 1, s);
+    }
+    else if (head == jl_isdefined_sym) {
         jl_value_t *sym = args[0];
         int defined = 0;
         if (jl_is_slot(sym) || jl_is_argument(sym)) {
@@ -221,7 +235,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         else if (jl_is_symbol(sym)) {
             defined = jl_boundp(s->module, (jl_sym_t*)sym);
         }
-        else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == static_parameter_sym) {
+        else if (jl_is_expr(sym) && ((jl_expr_t*)sym)->head == jl_static_parameter_sym) {
             ssize_t n = jl_unbox_long(jl_exprarg(sym, 0));
             assert(n > 0);
             if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
@@ -238,19 +252,19 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         }
         return defined ? jl_true : jl_false;
     }
-    else if (head == throw_undef_if_not_sym) {
+    else if (head == jl_throw_undef_if_not_sym) {
         jl_value_t *cond = eval_value(args[1], s);
         assert(jl_is_bool(cond));
         if (cond == jl_false) {
             jl_sym_t *var = (jl_sym_t*)args[0];
-            if (var == getfield_undefref_sym)
+            if (var == jl_getfield_undefref_sym)
                 jl_throw(jl_undefref_exception);
             else
                 jl_undefined_var_error(var);
         }
         return jl_nothing;
     }
-    else if (head == new_sym) {
+    else if (head == jl_new_sym) {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, nargs);
         for (size_t i = 0; i < nargs; i++)
@@ -259,7 +273,7 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         JL_GC_POP();
         return v;
     }
-    else if (head == splatnew_sym) {
+    else if (head == jl_splatnew_sym) {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 2);
         argv[0] = eval_value(args[0], s);
@@ -268,7 +282,18 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         JL_GC_POP();
         return v;
     }
-    else if (head == static_parameter_sym) {
+    else if (head == jl_new_opaque_closure_sym) {
+        jl_value_t **argv;
+        JL_GC_PUSHARGS(argv, nargs);
+        for (size_t i = 0; i < nargs; i++)
+            argv[i] = eval_value(args[i], s);
+        JL_NARGSV(new_opaque_closure, 4);
+        jl_value_t *ret = (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)argv[0], argv[1], argv[2],
+            argv[3], argv+4, nargs-4);
+        JL_GC_POP();
+        return ret;
+    }
+    else if (head == jl_static_parameter_sym) {
         ssize_t n = jl_unbox_long(args[0]);
         assert(n > 0);
         if (s->sparam_vals && n <= jl_svec_len(s->sparam_vals)) {
@@ -280,27 +305,34 @@ static jl_value_t *eval_value(jl_value_t *e, interpreter_state *s)
         // static parameter val unknown needs to be an error for ccall
         jl_error("could not determine static parameter value");
     }
-    else if (head == copyast_sym) {
+    else if (head == jl_copyast_sym) {
         return jl_copy_ast(eval_value(args[0], s));
     }
-    else if (head == exc_sym) {
+    else if (head == jl_exc_sym) {
         return jl_current_exception();
     }
-    else if (head == boundscheck_sym) {
+    else if (head == jl_boundscheck_sym) {
         return jl_true;
     }
-    else if (head == meta_sym || head == coverageeffect_sym || head == inbounds_sym || head == loopinfo_sym) {
+    else if (head == jl_meta_sym || head == jl_coverageeffect_sym || head == jl_inbounds_sym || head == jl_loopinfo_sym ||
+             head == jl_aliasscope_sym || head == jl_popaliasscope_sym || head == jl_inline_sym || head == jl_noinline_sym) {
         return jl_nothing;
     }
-    else if (head == gc_preserve_begin_sym || head == gc_preserve_end_sym) {
+    else if (head == jl_gc_preserve_begin_sym || head == jl_gc_preserve_end_sym) {
         // The interpreter generally keeps values that were assigned in this scope
         // rooted. If the interpreter learns to be more aggressive here, we may
         // want to explicitly root these values.
         return jl_nothing;
     }
-    else if (head == method_sym && nargs == 1) {
+    else if (head == jl_method_sym && nargs == 1) {
         return eval_methoddef(ex, s);
     }
+    else if (head == jl_foreigncall_sym) {
+        jl_error("`ccall` requires the compiler");
+    }
+    else if (head == jl_cfunction_sym) {
+        jl_error("`cfunction` requires the compiler");
+    }
     jl_errorf("unsupported or misplaced expression %s", jl_symbol_name(head));
     abort();
 }
@@ -391,13 +423,14 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
 {
     jl_handler_t __eh;
     size_t ns = jl_array_len(stmts);
+    jl_task_t *ct = jl_current_task;
 
     while (1) {
         s->ip = ip;
         if (ip >= ns)
             jl_error("`body` expression must terminate in `return`. Use `block` instead.");
         if (toplevel)
-            jl_get_ptls_states()->world_age = jl_world_counter;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_value_t *stmt = jl_array_ptr_ref(stmts, ip);
         assert(!jl_is_phinode(stmt));
         size_t next_ip = ip + 1;
@@ -429,7 +462,7 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
         else if (jl_is_expr(stmt)) {
             // Most exprs are allowed to end a BB by fall through
             jl_sym_t *head = ((jl_expr_t*)stmt)->head;
-            if (head == assign_sym) {
+            if (head == jl_assign_sym) {
                 jl_value_t *lhs = jl_exprarg(stmt, 0);
                 jl_value_t *rhs = eval_value(jl_exprarg(stmt, 1), s);
                 if (jl_is_slot(lhs)) {
@@ -450,12 +483,12 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                         sym = (jl_sym_t*)lhs;
                     }
                     JL_GC_PUSH1(&rhs);
-                    jl_binding_t *b = jl_get_binding_wr(modu, sym, 1);
+                    jl_binding_t *b = jl_get_binding_wr_or_error(modu, sym);
                     jl_checked_assignment(b, rhs);
                     JL_GC_POP();
                 }
             }
-            else if (head == enter_sym) {
+            else if (head == jl_enter_sym) {
                 jl_enter_handler(&__eh);
                 // This is a bit tricky, but supports the implementation of PhiC nodes.
                 // They are conceptually slots, but the slot to store to doesn't get explicitly
@@ -493,17 +526,16 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                     s->continue_at = 0;
                     continue;
                 }
-                else { // a real exeception
+                else { // a real exception
                     ip = catch_ip;
                     continue;
                 }
             }
-            else if (head == leave_sym) {
+            else if (head == jl_leave_sym) {
                 int hand_n_leave = jl_unbox_long(jl_exprarg(stmt, 0));
                 assert(hand_n_leave > 0);
                 // equivalent to jl_pop_handler(hand_n_leave), but retaining eh for longjmp:
-                jl_ptls_t ptls = jl_get_ptls_states();
-                jl_handler_t *eh = ptls->current_task->eh;
+                jl_handler_t *eh = ct->eh;
                 while (--hand_n_leave > 0)
                     eh = eh->prev;
                 jl_eh_restore_state(eh);
@@ -512,45 +544,50 @@ static jl_value_t *eval_body(jl_array_t *stmts, interpreter_state *s, size_t ip,
                 s->continue_at = next_ip;
                 jl_longjmp(eh->eh_ctx, 1);
             }
-            else if (head == pop_exception_sym) {
+            else if (head == jl_pop_exception_sym) {
                 size_t prev_state = jl_unbox_ulong(eval_value(jl_exprarg(stmt, 0), s));
                 jl_restore_excstack(prev_state);
             }
             else if (toplevel) {
-                if (head == method_sym && jl_expr_nargs(stmt) > 1) {
+                if (head == jl_method_sym && jl_expr_nargs(stmt) > 1) {
                     eval_methoddef((jl_expr_t*)stmt, s);
                 }
-                else if (head == toplevel_sym) {
+                else if (head == jl_toplevel_sym) {
                     jl_value_t *res = jl_toplevel_eval(s->module, stmt);
                     s->locals[jl_source_nslots(s->src) + s->ip] = res;
                 }
                 else if (jl_is_toplevel_only_expr(stmt)) {
                     jl_toplevel_eval(s->module, stmt);
                 }
-                else if (head == meta_sym) {
-                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)nospecialize_sym) {
+                else if (head == jl_meta_sym) {
+                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)jl_nospecialize_sym) {
                         jl_set_module_nospecialize(s->module, 1);
                     }
-                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)specialize_sym) {
+                    if (jl_expr_nargs(stmt) == 1 && jl_exprarg(stmt, 0) == (jl_value_t*)jl_specialize_sym) {
                         jl_set_module_nospecialize(s->module, 0);
                     }
                     if (jl_expr_nargs(stmt) == 2) {
-                        if (jl_exprarg(stmt, 0) == (jl_value_t*)optlevel_sym) {
+                        if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_optlevel_sym) {
                             if (jl_is_long(jl_exprarg(stmt, 1))) {
                                 int n = jl_unbox_long(jl_exprarg(stmt, 1));
                                 jl_set_module_optlevel(s->module, n);
                             }
                         }
-                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)compile_sym) {
+                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_compile_sym) {
                             if (jl_is_long(jl_exprarg(stmt, 1))) {
                                 jl_set_module_compile(s->module, jl_unbox_long(jl_exprarg(stmt, 1)));
                             }
                         }
-                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)infer_sym) {
+                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_infer_sym) {
                             if (jl_is_long(jl_exprarg(stmt, 1))) {
                                 jl_set_module_infer(s->module, jl_unbox_long(jl_exprarg(stmt, 1)));
                             }
                         }
+                        else if (jl_exprarg(stmt, 0) == (jl_value_t*)jl_max_methods_sym) {
+                            if (jl_is_long(jl_exprarg(stmt, 1))) {
+                                jl_set_module_max_methods(s->module, jl_unbox_long(jl_exprarg(stmt, 1)));
+                            }
+                        }
                     }
                 }
                 else {
@@ -618,8 +655,8 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     jl_array_t *stmts = src->code;
     assert(jl_typeis(stmts, jl_array_any_type));
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src) + 2;
-    JL_GC_PUSHFRAME(s, nroots);
-    jl_value_t **locals = (jl_value_t**)&s[1] + 3;
+    jl_value_t **locals = NULL;
+    JL_GC_PUSHFRAME(s, locals, nroots);
     locals[0] = (jl_value_t*)src;
     locals[1] = (jl_value_t*)stmts;
     s->locals = locals + 2;
@@ -651,23 +688,67 @@ jl_value_t *NOINLINE jl_fptr_interpret_call(jl_value_t *f, jl_value_t **args, ui
     return r;
 }
 
+JL_DLLEXPORT jl_callptr_t jl_fptr_interpret_call_addr = &jl_fptr_interpret_call;
+
+jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
+{
+    jl_method_t *source = oc->source;
+    jl_code_info_t *code = jl_uncompress_ir(source, NULL, (jl_array_t*)source->source);
+    interpreter_state *s;
+    unsigned nroots = jl_source_nslots(code) + jl_source_nssavalues(code) + 2;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = oc->world;
+    jl_value_t **locals = NULL;
+    JL_GC_PUSHFRAME(s, locals, nroots);
+    locals[0] = (jl_value_t*)oc;
+    // The analyzer has some trouble with this
+    locals[1] = (jl_value_t*)code;
+    JL_GC_PROMISE_ROOTED(code);
+    locals[2] = (jl_value_t*)oc->captures;
+    s->locals = locals + 2;
+    s->src = code;
+    s->module = source->module;
+    s->sparam_vals = NULL;
+    s->preevaluation = 0;
+    s->continue_at = 0;
+    s->mi = NULL;
+    size_t defargs = source->nargs;
+    int isva = source->isva;
+    assert(isva ? nargs + 2 >= defargs : nargs + 1 == defargs);
+    for (size_t i = 1; i < defargs - isva; i++)
+        s->locals[i] = args[i - 1];
+    if (isva) {
+        assert(defargs >= 2);
+        s->locals[defargs - 1] = jl_f_tuple(NULL, &args[defargs - 2], nargs + 2 - defargs);
+    }
+    JL_GC_ENABLEFRAME(s);
+    jl_value_t *r = eval_body(code->code, s, 0, 0);
+    locals[0] = r; // GC root
+    JL_GC_PROMISE_ROOTED(r);
+    jl_typeassert(r, jl_tparam1(jl_typeof(oc)));
+    ct->world_age = last_age;
+    JL_GC_POP();
+    return r;
+}
+
 jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t *src)
 {
     interpreter_state *s;
     unsigned nroots = jl_source_nslots(src) + jl_source_nssavalues(src);
-    JL_GC_PUSHFRAME(s, nroots);
+    JL_GC_PUSHFRAME(s, s->locals, nroots);
     jl_array_t *stmts = src->code;
     assert(jl_typeis(stmts, jl_array_any_type));
     s->src = src;
-    s->locals = (jl_value_t**)&s[1] + 3;
     s->module = m;
     s->sparam_vals = jl_emptysvec;
     s->continue_at = 0;
     s->mi = NULL;
     JL_GC_ENABLEFRAME(s);
-    size_t last_age = jl_get_ptls_states()->world_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
     jl_value_t *r = eval_body(stmts, s, 0, 1);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
     JL_GC_POP();
     return r;
 }
@@ -678,7 +759,9 @@ jl_value_t *NOINLINE jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t
 jl_value_t *NOINLINE jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e, jl_code_info_t *src, jl_svec_t *sparam_vals)
 {
     interpreter_state *s;
-    JL_GC_PUSHFRAME(s, 0);
+    jl_value_t **locals;
+    JL_GC_PUSHFRAME(s, locals, 0);
+    (void)locals;
     s->src = src;
     s->module = m;
     s->sparam_vals = sparam_vals;
diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp
index 9d367f60e35f0a..2bb8cdcd9cf894 100644
--- a/src/intrinsics.cpp
+++ b/src/intrinsics.cpp
@@ -6,27 +6,57 @@ namespace JL_I {
 
 #include "ccall.cpp"
 
-using namespace JL_I;
+//Mark our stats as being from intrinsics irgen
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "julia_irgen_intrinsics"
+
+STATISTIC(EmittedConstants, "Number of constants emitted");
+STATISTIC(EmittedCoercedUnboxes, "Number of unbox coercions emitted");
+STATISTIC(EmittedUnboxes, "Number of unboxes emitted");
+STATISTIC(EmittedRuntimeCalls, "Number of runtime intrinsic calls emitted");
+STATISTIC(EmittedIntrinsics, "Number of intrinsic calls emitted");
+STATISTIC(Emitted_arraylen, "Number of arraylen calls emitted");
+STATISTIC(Emitted_pointerref, "Number of pointerref calls emitted");
+STATISTIC(Emitted_pointerset, "Number of pointerset calls emitted");
+STATISTIC(Emitted_atomic_fence, "Number of atomic_fence calls emitted");
+STATISTIC(Emitted_atomic_pointerref, "Number of atomic_pointerref calls emitted");
+STATISTIC(Emitted_atomic_pointerop, "Number of atomic_pointerop calls emitted");
+STATISTIC(Emitted_bitcast, "Number of bitcast calls emitted");
+STATISTIC(Emitted_trunc_int, "Number of trunc_int calls emitted");
+STATISTIC(Emitted_sext_int, "Number of sext_int calls emitted");
+STATISTIC(Emitted_zext_int, "Number of zext_int calls emitted");
+STATISTIC(Emitted_uitofp, "Number of uitofp calls emitted");
+STATISTIC(Emitted_sitofp, "Number of sitofp calls emitted");
+STATISTIC(Emitted_fptoui, "Number of fptoui calls emitted");
+STATISTIC(Emitted_fptosi, "Number of fptosi calls emitted");
+STATISTIC(Emitted_fptrunc, "Number of fptrunc calls emitted");
+STATISTIC(Emitted_fpext, "Number of fpext calls emitted");
+STATISTIC(Emitted_not_int, "Number of not_int calls emitted");
+STATISTIC(Emitted_have_fma, "Number of have_fma calls emitted");
+STATISTIC(EmittedUntypedIntrinsics, "Number of untyped intrinsics emitted");
 
-FunctionType *get_intr_args1(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue}, false); }
-FunctionType *get_intr_args2(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue}, false); }
-FunctionType *get_intr_args3(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue, T_prjlvalue}, false); }
-FunctionType *get_intr_args4(LLVMContext &C) { return FunctionType::get(T_prjlvalue, {T_prjlvalue, T_prjlvalue, T_prjlvalue, T_prjlvalue}, false); }
+using namespace JL_I;
 
-static JuliaFunction *runtime_func[num_intrinsics] = {
-#define ADD_I(name, nargs) new JuliaFunction{"jl_"#name, get_intr_args##nargs, nullptr},
+FunctionType *get_intr_args1(LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_prjlvalue_ty(C)}, false); }
+FunctionType *get_intr_args2(LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C)}, false); }
+FunctionType *get_intr_args3(LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C)}, false); }
+FunctionType *get_intr_args4(LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C)}, false); }
+FunctionType *get_intr_args5(LLVMContext &C) { return FunctionType::get(JuliaType::get_prjlvalue_ty(C), {JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C), JuliaType::get_prjlvalue_ty(C)}, false); }
+
+const auto &runtime_func() {
+    static struct runtime_funcs_t {
+        std::array<JuliaFunction *, num_intrinsics> runtime_func;
+        runtime_funcs_t() :
+        runtime_func{
+#define ADD_I(name, nargs) new JuliaFunction{XSTR(jl_##name), get_intr_args##nargs, nullptr},
 #define ADD_HIDDEN ADD_I
 #define ALIAS(alias, base) nullptr,
     INTRINSICS
 #undef ADD_I
 #undef ADD_HIDDEN
 #undef ALIAS
-};
-
-static bool float_func[num_intrinsics];
-
-static void jl_init_intrinsic_functions_codegen(void)
-{
+        }
+        {
 #define ADD_I(name, nargs)
 #define ADD_HIDDEN(name, nargs)
 #define ALIAS(alias, base) runtime_func[alias] = runtime_func[base];
@@ -34,43 +64,54 @@ static void jl_init_intrinsic_functions_codegen(void)
 #undef ADD_I
 #undef ADD_HIDDEN
 #undef ALIAS
+        }
+    } runtime_funcs;
+    return runtime_funcs.runtime_func;
+}
+
+const auto &float_func() {
+    static struct float_funcs_t {
+        std::bitset<num_intrinsics> float_func;
+        float_funcs_t() {
+            float_func[neg_float] = true;
+            float_func[neg_float_fast] = true;
+            float_func[add_float] = true;
+            float_func[sub_float] = true;
+            float_func[mul_float] = true;
+            float_func[div_float] = true;
+            float_func[rem_float] = true;
+            float_func[add_float_fast] = true;
+            float_func[sub_float_fast] = true;
+            float_func[mul_float_fast] = true;
+            float_func[div_float_fast] = true;
+            float_func[rem_float_fast] = true;
+            float_func[fma_float] = true;
+            float_func[muladd_float] = true;
+            float_func[eq_float] = true;
+            float_func[ne_float] = true;
+            float_func[lt_float] = true;
+            float_func[le_float] = true;
+            float_func[eq_float_fast] = true;
+            float_func[ne_float_fast] = true;
+            float_func[lt_float_fast] = true;
+            float_func[le_float_fast] = true;
+            float_func[fpiseq] = true;
+            float_func[abs_float] = true;
+            float_func[copysign_float] = true;
+            float_func[ceil_llvm] = true;
+            float_func[floor_llvm] = true;
+            float_func[trunc_llvm] = true;
+            float_func[rint_llvm] = true;
+            float_func[sqrt_llvm] = true;
+            float_func[sqrt_llvm_fast] = true;
+        }
+    } float_funcs;
 
-    float_func[neg_float] = true;
-    float_func[neg_float_fast] = true;
-    float_func[add_float] = true;
-    float_func[sub_float] = true;
-    float_func[mul_float] = true;
-    float_func[div_float] = true;
-    float_func[rem_float] = true;
-    float_func[add_float_fast] = true;
-    float_func[sub_float_fast] = true;
-    float_func[mul_float_fast] = true;
-    float_func[div_float_fast] = true;
-    float_func[rem_float_fast] = true;
-    float_func[fma_float] = true;
-    float_func[muladd_float] = true;
-    float_func[eq_float] = true;
-    float_func[ne_float] = true;
-    float_func[lt_float] = true;
-    float_func[le_float] = true;
-    float_func[eq_float_fast] = true;
-    float_func[ne_float_fast] = true;
-    float_func[lt_float_fast] = true;
-    float_func[le_float_fast] = true;
-    float_func[fpiseq] = true;
-    float_func[fpislt] = true;
-    float_func[abs_float] = true;
-    //float_func[copysign_float] = false; // this is actually an integer operation
-    float_func[ceil_llvm] = true;
-    float_func[floor_llvm] = true;
-    float_func[trunc_llvm] = true;
-    float_func[rint_llvm] = true;
-    float_func[sqrt_llvm] = true;
-    float_func[sqrt_llvm_fast] = true;
+    return float_funcs.float_func;
 }
 
 extern "C"
-JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION(void)
+JL_DLLEXPORT uint32_t jl_get_LLVM_VERSION_impl(void)
 {
     return 10000 * LLVM_VERSION_MAJOR + 100 * LLVM_VERSION_MINOR
 #ifdef LLVM_VERSION_PATCH
@@ -102,35 +143,35 @@ static Type *FLOATT(Type *t)
     if (t->isFloatingPointTy())
         return t;
     unsigned nb = (t->isPointerTy() ? sizeof(void*) * 8 : t->getPrimitiveSizeInBits());
+    auto &ctxt = t->getContext();
     if (nb == 64)
-        return T_float64;
+        return getDoubleTy(ctxt);
     if (nb == 32)
-        return T_float32;
-#ifndef DISABLE_FLOAT16
+        return getFloatTy(ctxt);
     if (nb == 16)
-        return T_float16;
-#endif
+        return getHalfTy(ctxt);
     if (nb == 128)
-        return T_float128;
+        return getFP128Ty(ctxt);
     return NULL;
 }
 
 // convert an llvm type to same-size int type
 static Type *INTT(Type *t)
 {
+    auto &ctxt = t->getContext();
     if (t->isIntegerTy())
         return t;
     if (t->isPointerTy())
-        return T_size;
-    if (t == T_float64)
-        return T_int64;
-    if (t == T_float32)
-        return T_int32;
-    if (t == T_float16)
-        return T_int16;
+        return getSizeTy(ctxt);
+    if (t == getDoubleTy(ctxt))
+        return getInt64Ty(ctxt);
+    if (t == getFloatTy(ctxt))
+        return getInt32Ty(ctxt);
+    if (t == getHalfTy(ctxt))
+        return getInt16Ty(ctxt);
     unsigned nb = t->getPrimitiveSizeInBits();
-    assert(t != T_void && nb > 0);
-    return IntegerType::get(jl_LLVMContext, nb);
+    assert(t != getVoidTy(ctxt) && nb > 0);
+    return IntegerType::get(ctxt, nb);
 }
 
 static Value *uint_cnvt(jl_codectx_t &ctx, Type *to, Value *x)
@@ -150,9 +191,9 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
     // make sure to return exactly the type specified by
     // julia_type_to_llvm as this will be assumed by the callee.
     if (bt == jl_bool_type)
-        return ConstantInt::get(T_int8, (*(const uint8_t*)ptr) ? 1 : 0);
+        return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), (*(const uint8_t*)ptr) ? 1 : 0);
 
-    Type *lt = julia_struct_to_llvm(ctx, (jl_value_t*)bt, NULL, NULL);
+    Type *lt = julia_struct_to_llvm(ctx, (jl_value_t*)bt, NULL);
 
     if (jl_is_vecelement_type((jl_value_t*)bt) && !jl_is_uniontype(jl_tparam0(bt)))
         bt = (jl_datatype_t*)jl_tparam0(bt);
@@ -162,24 +203,29 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
 
     if (lt->isFloatTy()) {
         uint32_t data32 = *(const uint32_t*)ptr;
-        return ConstantFP::get(jl_LLVMContext,
+        return ConstantFP::get(ctx.builder.getContext(),
                 APFloat(lt->getFltSemantics(), APInt(32, data32)));
     }
     if (lt->isDoubleTy()) {
         uint64_t data64 = *(const uint64_t*)ptr;
-        return ConstantFP::get(jl_LLVMContext,
+        return ConstantFP::get(ctx.builder.getContext(),
                 APFloat(lt->getFltSemantics(), APInt(64, data64)));
     }
-    if (lt->isFloatingPointTy() || lt->isIntegerTy()) {
+    if (lt->isFloatingPointTy() || lt->isIntegerTy() || lt->isPointerTy()) {
         int nb = jl_datatype_size(bt);
         APInt val(8 * nb, 0);
         void *bits = const_cast<uint64_t*>(val.getRawData());
         assert(sys::IsLittleEndianHost);
         memcpy(bits, ptr, nb);
         if (lt->isFloatingPointTy()) {
-            return ConstantFP::get(jl_LLVMContext,
+            return ConstantFP::get(ctx.builder.getContext(),
                     APFloat(lt->getFltSemantics(), val));
         }
+        if (lt->isPointerTy()) {
+            Type *Ty = IntegerType::get(ctx.builder.getContext(), 8 * nb);
+            Constant *addr = ConstantInt::get(Ty, val);
+            return ConstantExpr::getIntToPtr(addr, lt);
+        }
         assert(cast<IntegerType>(lt)->getBitWidth() == 8u * nb);
         return ConstantInt::get(lt, val);
     }
@@ -193,23 +239,20 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
         if (type_is_ghost(lft))
             continue;
         assert(!jl_field_isptr(bt, i));
-        unsigned llvm_idx = isa<StructType>(lt) ? convert_struct_offset(lt, offs) : i;
+        unsigned llvm_idx = isa<StructType>(lt) ? convert_struct_offset(jl_Module->getDataLayout(), lt, offs) : i;
         while (fields.size() < llvm_idx)
             fields.push_back(
-#if JL_LLVM_VERSION >= 110000
                 UndefValue::get(GetElementPtrInst::getTypeAtIndex(lt, fields.size())));
-#else
-                UndefValue::get(cast<CompositeType>(lt)->getTypeAtIndex(fields.size())));
-#endif
         const uint8_t *ov = (const uint8_t*)ptr + offs;
         if (jl_is_uniontype(ft)) {
             // compute the same type layout as julia_struct_to_llvm
-            size_t fsz = jl_field_size(bt, i);
-            size_t al = jl_field_align(bt, i);
+            size_t fsz = 0, al = 0;
+            (void)jl_islayout_inline(ft, &fsz, &al);
+            fsz = jl_field_size(bt, i);
             uint8_t sel = ((const uint8_t*)ptr)[offs + fsz - 1];
             jl_value_t *active_ty = jl_nth_union_component(ft, sel);
             size_t active_sz = jl_datatype_size(active_ty);
-            Type *AlignmentType = IntegerType::get(jl_LLVMContext, 8 * al);
+            Type *AlignmentType = IntegerType::get(ctx.builder.getContext(), 8 * al);
             unsigned NumATy = (fsz - 1) / al;
             unsigned remainder = (fsz - 1) % al;
             while (NumATy--) {
@@ -239,15 +282,15 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
                     uint8_t byte = *ov;
                     APInt Elem(8, byte);
                     active_sz -= 1;
-                    fld = ConstantInt::get(T_int8, Elem);
+                    fld = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), Elem);
                 }
                 else {
-                    fld = UndefValue::get(T_int8);
+                    fld = UndefValue::get(getInt8Ty(ctx.builder.getContext()));
                 }
                 ov += 1;
                 fields.push_back(fld);
             }
-            fields.push_back(ConstantInt::get(T_int8, sel));
+            fields.push_back(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), sel));
         }
         else {
             Constant *val = julia_const_to_llvm(ctx, ov, (jl_datatype_t*)ft);
@@ -259,39 +302,41 @@ static Constant *julia_const_to_llvm(jl_codectx_t &ctx, const void *ptr, jl_data
         return ConstantVector::get(fields);
     if (StructType *st = dyn_cast<StructType>(lt))
         return ConstantStruct::get(st, fields);
-    ArrayType *at = cast<ArrayType>(lt);
-    return ConstantArray::get(at, fields);
+    if (ArrayType *at = dyn_cast<ArrayType>(lt))
+        return ConstantArray::get(at, fields);
+    assert(false && "Unknown LLVM type");
+    jl_unreachable();
 }
 
 static Constant *julia_const_to_llvm(jl_codectx_t &ctx, jl_value_t *e)
 {
     if (e == jl_true)
-        return ConstantInt::get(T_int8, 1);
+        return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1);
     if (e == jl_false)
-        return ConstantInt::get(T_int8, 0);
+        return ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0);
     jl_value_t *bt = jl_typeof(e);
     if (!jl_is_pointerfree(bt))
         return NULL;
     return julia_const_to_llvm(ctx, e, (jl_datatype_t*)bt);
 }
 
-static jl_cgval_t ghostValue(jl_value_t *ty);
-
 static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
 {
     Type *ty = unboxed->getType();
+    if (ty == to)
+        return unboxed;
     bool frompointer = ty->isPointerTy();
     bool topointer = to->isPointerTy();
-    const DataLayout &DL = jl_data_layout;
-    if (ty == T_int1 && to == T_int8) {
+    const DataLayout &DL = jl_Module->getDataLayout();
+    if (ty == getInt1Ty(ctx.builder.getContext()) && to == getInt8Ty(ctx.builder.getContext())) {
         // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateZExt(unboxed, T_int8);
+        unboxed = ctx.builder.CreateZExt(unboxed, getInt8Ty(ctx.builder.getContext()));
     }
-    else if (ty == T_int8 && to == T_int1) {
+    else if (ty == getInt8Ty(ctx.builder.getContext()) && to == getInt1Ty(ctx.builder.getContext())) {
         // bools may be stored internally as int8
-        unboxed = ctx.builder.CreateTrunc(unboxed, T_int1);
+        unboxed = ctx.builder.CreateTrunc(unboxed, getInt1Ty(ctx.builder.getContext()));
     }
-    else if (ty == T_void || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
+    else if (ty == getVoidTy(ctx.builder.getContext()) || DL.getTypeSizeInBits(ty) != DL.getTypeSizeInBits(to)) {
         // this can happen in dead code
         //emit_unreachable(ctx);
         return UndefValue::get(to);
@@ -299,6 +344,15 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
     if (frompointer && topointer) {
         unboxed = emit_bitcast(ctx, unboxed, to);
     }
+    else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) {
+#ifndef JL_NDEBUG
+        const DataLayout &DL = jl_Module->getDataLayout();
+#endif
+        assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to));
+        AllocaInst *cast = ctx.builder.CreateAlloca(ty);
+        ctx.builder.CreateStore(unboxed, cast);
+        unboxed = ctx.builder.CreateLoad(to, ctx.builder.CreateBitCast(cast, to->getPointerTo()));
+    }
     else if (frompointer) {
         Type *INTT_to = INTT(to);
         unboxed = ctx.builder.CreatePtrToInt(unboxed, INTT_to);
@@ -311,7 +365,7 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
             unboxed = ctx.builder.CreateBitCast(unboxed, INTT_to);
         unboxed = emit_inttoptr(ctx, unboxed, to);
     }
-    else if (ty != to) {
+    else {
         unboxed = ctx.builder.CreateBitCast(unboxed, to);
     }
     return unboxed;
@@ -320,7 +374,7 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed)
 // emit code to unpack a raw value from a box into registers or a stack slot
 static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_value_t *jt, Value *dest, MDNode *tbaa_dest, bool isVolatile)
 {
-    assert(to != T_void);
+    assert(to != getVoidTy(ctx.builder.getContext()));
     // TODO: fully validate that x.typ == jt?
     if (x.isghost) {
         // this can happen when a branch yielding a different type ends
@@ -348,17 +402,17 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
     // bools stored as int8, so an extra Trunc is needed to get an int1
     Value *p = x.constant ? literal_pointer_val(ctx, x.constant) : x.V;
 
-    if (jt == (jl_value_t*)jl_bool_type || to == T_int1) {
-        Instruction *unbox_load = tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(T_int8, maybe_bitcast(ctx, p, T_pint8)));
+    if (jt == (jl_value_t*)jl_bool_type || to == getInt1Ty(ctx.builder.getContext())) {
+        Instruction *unbox_load = tbaa_decorate(x.tbaa, ctx.builder.CreateLoad(getInt8Ty(ctx.builder.getContext()), maybe_bitcast(ctx, p, getInt8PtrTy(ctx.builder.getContext()))));
         if (jt == (jl_value_t*)jl_bool_type)
-            unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(jl_LLVMContext, {
-                ConstantAsMetadata::get(ConstantInt::get(T_int8, 0)),
-                ConstantAsMetadata::get(ConstantInt::get(T_int8, 2)) }));
+            unbox_load->setMetadata(LLVMContext::MD_range, MDNode::get(ctx.builder.getContext(), {
+                ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0)),
+                ConstantAsMetadata::get(ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 2)) }));
         Value *unboxed;
-        if (to == T_int1)
-            unboxed = ctx.builder.CreateTrunc(unbox_load, T_int1);
+        if (to == getInt1Ty(ctx.builder.getContext()))
+            unboxed = ctx.builder.CreateTrunc(unbox_load, getInt1Ty(ctx.builder.getContext()));
         else
-            unboxed = unbox_load; // `to` must be T_int8
+            unboxed = unbox_load; // `to` must be getInt8Ty(ctx.builder.getContext())
         if (!dest)
             return unboxed;
         Type *dest_ty = unboxed->getType()->getPointerTo();
@@ -382,17 +436,17 @@ static Value *emit_unbox(jl_codectx_t &ctx, Type *to, const jl_cgval_t &x, jl_va
             // appropriate coercion manually.
             AllocaInst *AI = cast<AllocaInst>(p);
             Type *AllocType = AI->getAllocatedType();
-            const DataLayout &DL = jl_data_layout;
+            const DataLayout &DL = jl_Module->getDataLayout();
             if (!AI->isArrayAllocation() &&
                     (AllocType->isFloatingPointTy() || AllocType->isIntegerTy() || AllocType->isPointerTy()) &&
                     (to->isFloatingPointTy() || to->isIntegerTy() || to->isPointerTy()) &&
                     DL.getTypeSizeInBits(AllocType) == DL.getTypeSizeInBits(to)) {
-                Instruction *load = ctx.builder.CreateAlignedLoad(p, Align(alignment));
+                Instruction *load = ctx.builder.CreateAlignedLoad(AllocType, p, Align(alignment));
                 return emit_unboxed_coercion(ctx, to, tbaa_decorate(x.tbaa, load));
             }
         }
         p = maybe_bitcast(ctx, p, ptype);
-        Instruction *load = ctx.builder.CreateAlignedLoad(p, Align(alignment));
+        Instruction *load = ctx.builder.CreateAlignedLoad(to, p, Align(alignment));
         return tbaa_decorate(x.tbaa, load);
     }
 }
@@ -410,7 +464,7 @@ static jl_value_t *staticeval_bitstype(const jl_cgval_t &targ)
 
 static jl_cgval_t emit_runtime_call(jl_codectx_t &ctx, JL_I::intrinsic f, const jl_cgval_t *argv, size_t nargs)
 {
-    Function *func = prepare_call(runtime_func[f]);
+    Function *func = prepare_call(runtime_func()[f]);
     Value **argvalues = (Value**)alloca(sizeof(Value*) * nargs);
     for (size_t i = 0; i < nargs; ++i) {
         argvalues[i] = boxed(ctx, argv[i]);
@@ -431,7 +485,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
     if (!bt)
         return emit_runtime_call(ctx, bitcast, argv, 2);
 
-    Type *llvmt = bitstype_to_llvm(bt);
+    Type *llvmt = bitstype_to_llvm(bt, ctx.builder.getContext());
     int nb = jl_datatype_size(bt);
 
     // Examine the second argument //
@@ -447,19 +501,19 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
             }
             else {
                 emit_error(ctx, "bitcast: expected primitive type value for second argument");
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
         if (!jl_is_datatype(v.typ) || jl_datatype_size(v.typ) != nb) {
             if (isboxed) {
                 Value *size = emit_datatype_size(ctx, typ);
                 error_unless(ctx,
-                        ctx.builder.CreateICmpEQ(size, ConstantInt::get(T_int32, nb)),
+                        ctx.builder.CreateICmpEQ(size, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), nb)),
                         "bitcast: argument size does not match size of target type");
             }
             else {
                 emit_error(ctx, "bitcast: argument size does not match size of target type");
-                return jl_cgval_t();
+                return jl_cgval_t(ctx.builder.getContext());
             }
         }
     }
@@ -476,16 +530,18 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
         // but if the v.typ is not well known, use llvmt
         if (isboxed)
             vxt = llvmt;
+        auto storage_type = vxt == getInt1Ty(ctx.builder.getContext()) ? getInt8Ty(ctx.builder.getContext()) : vxt;
         vx = tbaa_decorate(v.tbaa, ctx.builder.CreateLoad(
-                    emit_bitcast(ctx, data_pointer(ctx, v),
-                        vxt == T_int1 ? T_pint8 : vxt->getPointerTo())));
+            storage_type,
+            emit_bitcast(ctx, data_pointer(ctx, v),
+                storage_type->getPointerTo())));
     }
 
     vxt = vx->getType();
     if (vxt != llvmt) {
-        if (llvmt == T_int1)
+        if (llvmt == getInt1Ty(ctx.builder.getContext()))
             vx = ctx.builder.CreateTrunc(vx, llvmt);
-        else if (vxt == T_int1 && llvmt == T_int8)
+        else if (vxt == getInt1Ty(ctx.builder.getContext()) && llvmt == getInt8Ty(ctx.builder.getContext()))
             vx = ctx.builder.CreateZExt(vx, llvmt);
         else if (vxt->isPointerTy() && !llvmt->isPointerTy())
             vx = ctx.builder.CreatePtrToInt(vx, llvmt);
@@ -500,7 +556,7 @@ static jl_cgval_t generic_bitcast(jl_codectx_t &ctx, const jl_cgval_t *argv)
     }
     else {
         Value *box = emit_allocobj(ctx, nb, boxed(ctx, bt_value));
-        init_bits_value(ctx, box, vx, tbaa_immut);
+        init_bits_value(ctx, box, vx, ctx.tbaa().tbaa_immut);
         return mark_julia_type(ctx, box, true, bt);
     }
 }
@@ -515,8 +571,8 @@ static jl_cgval_t generic_cast(
     jl_value_t *jlto = staticeval_bitstype(targ);
     if (!jlto || !jl_is_primitivetype(v.typ))
         return emit_runtime_call(ctx, f, argv, 2);
-    Type *to = bitstype_to_llvm(jlto);
-    Type *vt = bitstype_to_llvm(v.typ);
+    Type *to = bitstype_to_llvm(jlto, ctx.builder.getContext());
+    Type *vt = bitstype_to_llvm(v.typ, ctx.builder.getContext());
     if (toint)
         to = INTT(to);
     else
@@ -544,6 +600,8 @@ static jl_cgval_t generic_cast(
 #endif
     }
     Value *ans = ctx.builder.CreateCast(Op, from, to);
+    if (f == fptosi || f == fptoui)
+        ans = ctx.builder.CreateFreeze(ans);
     return mark_julia_type(ctx, ans, false, jlto);
 }
 
@@ -570,34 +628,30 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
     jl_value_t *ety = jl_tparam0(aty);
     if (jl_is_typevar(ety))
         return emit_runtime_pointerref(ctx, argv);
-    if (!jl_is_datatype(ety))
-        ety = (jl_value_t*)jl_any_type;
+    if (!is_valid_intrinsic_elptr(ety)) {
+        emit_error(ctx, "pointerref: invalid pointer type");
+        return jl_cgval_t(ctx.builder.getContext());
+    }
 
-    Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
-    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(T_size, 1));
+    Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), i, (jl_value_t*)jl_long_type);
+    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
 
     if (ety == (jl_value_t*)jl_any_type) {
-        Value *thePtr = emit_unbox(ctx, T_pprjlvalue, e, e.typ);
-        return mark_julia_type(
-                ctx,
-                ctx.builder.CreateAlignedLoad(ctx.builder.CreateInBoundsGEP(T_prjlvalue, thePtr, im1), Align(align_nb)),
-                true,
-                ety);
+        Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, ctx.builder.CreateInBoundsGEP(ctx.types().T_prjlvalue, thePtr, im1), Align(align_nb));
+        tbaa_decorate(ctx.tbaa().tbaa_data, load);
+        return mark_julia_type(ctx, load, true, ety);
     }
     else if (!jl_isbits(ety)) {
-        if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_concrete_type(ety)) {
-            emit_error(ctx, "pointerref: invalid pointer type");
-            return jl_cgval_t();
-        }
         assert(jl_is_datatype(ety));
         uint64_t size = jl_datatype_size(ety);
         Value *strct = emit_allocobj(ctx, size,
                                      literal_pointer_val(ctx, ety));
-        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(T_size,
+        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(getSizeTy(ctx.builder.getContext()),
                     LLT_ALIGN(size, jl_datatype_align(ety))));
-        Value *thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
-        thePtr = ctx.builder.CreateInBoundsGEP(T_int8, emit_bitcast(ctx, thePtr, T_pint8), im1);
-        MDNode *tbaa = best_tbaa(ety);
+        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        thePtr = ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), emit_bitcast(ctx, thePtr, getInt8PtrTy(ctx.builder.getContext())), im1);
+        MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
         emit_memcpy(ctx, strct, tbaa, thePtr, nullptr, size, 1);
         return mark_julia_type(ctx, strct, true, ety);
     }
@@ -607,10 +661,10 @@ static jl_cgval_t emit_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            return typed_load(ctx, thePtr, im1, ety, tbaa_data, nullptr, true, align_nb);
+            return typed_load(ctx, thePtr, im1, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, AtomicOrdering::NotAtomic, true, align_nb);
         }
         else {
-            return ghostValue(ety);
+            return ghostValue(ctx, ety);
         }
     }
 }
@@ -642,32 +696,30 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         return emit_runtime_pointerset(ctx, argv);
     if (align.constant == NULL || !jl_is_long(align.constant))
         return emit_runtime_pointerset(ctx, argv);
-    if (!jl_is_datatype(ety))
-        ety = (jl_value_t*)jl_any_type;
+    if (!is_valid_intrinsic_elptr(ety)) {
+        emit_error(ctx, "pointerset: invalid pointer type");
+        return jl_cgval_t(ctx.builder.getContext());
+    }
     emit_typecheck(ctx, x, ety, "pointerset");
 
-    Value *idx = emit_unbox(ctx, T_size, i, (jl_value_t*)jl_long_type);
-    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(T_size, 1));
+    Value *idx = emit_unbox(ctx, getSizeTy(ctx.builder.getContext()), i, (jl_value_t*)jl_long_type);
+    Value *im1 = ctx.builder.CreateSub(idx, ConstantInt::get(getSizeTy(ctx.builder.getContext()), 1));
 
     Value *thePtr;
     if (ety == (jl_value_t*)jl_any_type) {
         // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
-        thePtr = emit_unbox(ctx, T_psize, e, e.typ);
+        thePtr = emit_unbox(ctx, getSizePtrTy(ctx.builder.getContext()), e, e.typ);
         Instruction *store = ctx.builder.CreateAlignedStore(
-          ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), T_size),
-            ctx.builder.CreateInBoundsGEP(T_size, thePtr, im1), Align(align_nb));
-        tbaa_decorate(tbaa_data, store);
+          ctx.builder.CreatePtrToInt(emit_pointer_from_objref(ctx, boxed(ctx, x)), getSizeTy(ctx.builder.getContext())),
+            ctx.builder.CreateInBoundsGEP(getSizeTy(ctx.builder.getContext()), thePtr, im1), Align(align_nb));
+        tbaa_decorate(ctx.tbaa().tbaa_data, store);
     }
     else if (!jl_isbits(ety)) {
-        if (!jl_is_structtype(ety) || jl_is_array_type(ety) || !jl_is_concrete_type(ety)) {
-            emit_error(ctx, "pointerset: invalid pointer type");
-            return jl_cgval_t();
-        }
-        thePtr = emit_unbox(ctx, T_pint8, e, e.typ);
+        thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
         uint64_t size = jl_datatype_size(ety);
-        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(T_size,
+        im1 = ctx.builder.CreateMul(im1, ConstantInt::get(getSizeTy(ctx.builder.getContext()),
                     LLT_ALIGN(size, jl_datatype_align(ety))));
-        emit_memcpy(ctx, ctx.builder.CreateInBoundsGEP(T_int8, thePtr, im1), nullptr, x, size, align_nb);
+        emit_memcpy(ctx, ctx.builder.CreateInBoundsGEP(getInt8Ty(ctx.builder.getContext()), thePtr, im1), nullptr, x, size, align_nb);
     }
     else {
         bool isboxed;
@@ -675,21 +727,190 @@ static jl_cgval_t emit_pointerset(jl_codectx_t &ctx, jl_cgval_t *argv)
         assert(!isboxed);
         if (!type_is_ghost(ptrty)) {
             thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
-            typed_store(ctx, thePtr, im1, x, ety, tbaa_data, nullptr, nullptr, align_nb);
+            typed_store(ctx, thePtr, im1, x, jl_cgval_t(ctx.builder.getContext()), ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                        AtomicOrdering::NotAtomic, AtomicOrdering::NotAtomic, align_nb, false, true, false, false, false, false, nullptr, "");
         }
     }
     return e;
 }
 
+static jl_cgval_t emit_atomicfence(jl_codectx_t &ctx, jl_cgval_t *argv)
+{
+    const jl_cgval_t &ord = argv[0];
+    if (ord.constant && jl_is_symbol(ord.constant)) {
+        enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, true);
+        if (order == jl_memory_order_invalid) {
+            emit_atomic_error(ctx, "invalid atomic ordering");
+            return jl_cgval_t(ctx.builder.getContext()); // unreachable
+        }
+        if (order > jl_memory_order_monotonic)
+            ctx.builder.CreateFence(get_llvm_atomic_order(order));
+        return ghostValue(ctx, jl_nothing_type);
+    }
+    return emit_runtime_call(ctx, atomic_fence, argv, 1);
+}
+
+static jl_cgval_t emit_atomic_pointerref(jl_codectx_t &ctx, jl_cgval_t *argv)
+{
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &ord = argv[1];
+    jl_value_t *aty = e.typ;
+    if (!jl_is_cpointer_type(aty) || !ord.constant || !jl_is_symbol(ord.constant))
+        return emit_runtime_call(ctx, atomic_pointerref, argv, 2);
+    jl_value_t *ety = jl_tparam0(aty);
+    if (jl_is_typevar(ety))
+        return emit_runtime_call(ctx, atomic_pointerref, argv, 2);
+    enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, true, false);
+    if (order == jl_memory_order_invalid) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        return jl_cgval_t(ctx.builder.getContext()); // unreachable
+    }
+    AtomicOrdering llvm_order = get_llvm_atomic_order(order);
+
+    if (ety == (jl_value_t*)jl_any_type) {
+        Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(ctx.types().T_prjlvalue, thePtr, Align(sizeof(jl_value_t*)));
+        tbaa_decorate(ctx.tbaa().tbaa_data, load);
+        load->setOrdering(llvm_order);
+        return mark_julia_type(ctx, load, true, ety);
+    }
+
+    if (!is_valid_intrinsic_elptr(ety)) {
+        emit_error(ctx, "atomic_pointerref: invalid pointer type");
+        return jl_cgval_t(ctx.builder.getContext());
+    }
+
+    size_t nb = jl_datatype_size(ety);
+    if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
+        emit_error(ctx, "atomic_pointerref: invalid pointer for atomic operation");
+        return jl_cgval_t(ctx.builder.getContext());
+    }
+
+    if (!jl_isbits(ety)) {
+        assert(jl_is_datatype(ety));
+        uint64_t size = jl_datatype_size(ety);
+        Value *strct = emit_allocobj(ctx, size,
+                                     literal_pointer_val(ctx, ety));
+        Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        Type *loadT = Type::getIntNTy(ctx.builder.getContext(), nb * 8);
+        thePtr = emit_bitcast(ctx, thePtr, loadT->getPointerTo());
+        MDNode *tbaa = best_tbaa(ctx.tbaa(), ety);
+        LoadInst *load = ctx.builder.CreateAlignedLoad(loadT, thePtr, Align(nb));
+        tbaa_decorate(tbaa, load);
+        load->setOrdering(llvm_order);
+        thePtr = emit_bitcast(ctx, strct, thePtr->getType());
+        StoreInst *store = ctx.builder.CreateAlignedStore(load, thePtr, Align(julia_alignment(ety)));
+        tbaa_decorate(tbaa, store);
+        return mark_julia_type(ctx, strct, true, ety);
+    }
+    else {
+        bool isboxed;
+        Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
+        assert(!isboxed);
+        if (!type_is_ghost(ptrty)) {
+            Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+            return typed_load(ctx, thePtr, nullptr, ety, ctx.tbaa().tbaa_data, nullptr, isboxed, llvm_order, true, nb);
+        }
+        else {
+            if (order > jl_memory_order_monotonic)
+                ctx.builder.CreateFence(llvm_order);
+            return ghostValue(ctx, ety);
+        }
+    }
+}
+
+// e[i] = x (set)
+// e[i] <= x (swap)
+// e[i] y => x (replace)
+// x(e[i], y) (modify)
+static jl_cgval_t emit_atomic_pointerop(jl_codectx_t &ctx, intrinsic f, const jl_cgval_t *argv, int nargs, const jl_cgval_t *modifyop)
+{
+    bool issetfield = f == atomic_pointerset;
+    bool isreplacefield = f == atomic_pointerreplace;
+    bool isswapfield = f == atomic_pointerswap;
+    bool ismodifyfield = f == atomic_pointermodify;
+    const jl_cgval_t undefval(ctx.builder.getContext());
+    const jl_cgval_t &e = argv[0];
+    const jl_cgval_t &x = isreplacefield || ismodifyfield ? argv[2] : argv[1];
+    const jl_cgval_t &y = isreplacefield || ismodifyfield ? argv[1] : undefval;
+    const jl_cgval_t &ord = isreplacefield || ismodifyfield ? argv[3] : argv[2];
+    const jl_cgval_t &failord = isreplacefield ? argv[4] : undefval;
+
+    jl_value_t *aty = e.typ;
+    if (!jl_is_cpointer_type(aty) || !ord.constant || !jl_is_symbol(ord.constant))
+        return emit_runtime_call(ctx, f, argv, nargs);
+    if (isreplacefield) {
+        if (!failord.constant || !jl_is_symbol(failord.constant))
+            return emit_runtime_call(ctx, f, argv, nargs);
+    }
+    jl_value_t *ety = jl_tparam0(aty);
+    if (jl_is_typevar(ety))
+        return emit_runtime_call(ctx, f, argv, nargs);
+    enum jl_memory_order order = jl_get_atomic_order((jl_sym_t*)ord.constant, !issetfield, true);
+    enum jl_memory_order failorder = isreplacefield ? jl_get_atomic_order((jl_sym_t*)failord.constant, true, false) : order;
+    if (order == jl_memory_order_invalid || failorder == jl_memory_order_invalid || failorder > order) {
+        emit_atomic_error(ctx, "invalid atomic ordering");
+        return jl_cgval_t(ctx.builder.getContext()); // unreachable
+    }
+    AtomicOrdering llvm_order = get_llvm_atomic_order(order);
+    AtomicOrdering llvm_failorder = get_llvm_atomic_order(failorder);
+
+    if (ety == (jl_value_t*)jl_any_type) {
+        // unsafe_store to Ptr{Any} is allowed to implicitly drop GC roots.
+        // n.b.: the expected value (y) must be rooted, but not the others
+        Value *thePtr = emit_unbox(ctx, ctx.types().T_pprjlvalue, e, e.typ);
+        bool isboxed = true;
+        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, sizeof(jl_value_t*), false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        if (issetfield)
+            ret = e;
+        return ret;
+    }
+
+    if (!is_valid_intrinsic_elptr(ety)) {
+        std::string msg(StringRef(jl_intrinsic_name((int)f)));
+        msg += ": invalid pointer type";
+        emit_error(ctx, msg);
+        return jl_cgval_t(ctx.builder.getContext());
+    }
+    if (!ismodifyfield)
+        emit_typecheck(ctx, x, ety, std::string(jl_intrinsic_name((int)f)));
+
+    size_t nb = jl_datatype_size(ety);
+    if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE) {
+        std::string msg(StringRef(jl_intrinsic_name((int)f)));
+        msg += ": invalid pointer for atomic operation";
+        emit_error(ctx, msg);
+        return jl_cgval_t(ctx.builder.getContext());
+    }
+
+    if (!jl_isbits(ety)) {
+        //Value *thePtr = emit_unbox(ctx, getInt8PtrTy(ctx.builder.getContext()), e, e.typ);
+        //uint64_t size = jl_datatype_size(ety);
+        return emit_runtime_call(ctx, f, argv, nargs); // TODO: optimizations
+    }
+    else {
+        bool isboxed;
+        Type *ptrty = julia_type_to_llvm(ctx, ety, &isboxed);
+        assert(!isboxed);
+        Value *thePtr = emit_unbox(ctx, ptrty->getPointerTo(), e, e.typ);
+        jl_cgval_t ret = typed_store(ctx, thePtr, nullptr, x, y, ety, ctx.tbaa().tbaa_data, nullptr, nullptr, isboxed,
+                    llvm_order, llvm_failorder, nb, false, issetfield, isreplacefield, isswapfield, ismodifyfield, false, modifyop, "atomic_pointermodify");
+        if (issetfield)
+            ret = e;
+        return ret;
+    }
+}
+
 static Value *emit_checked_srem_int(jl_codectx_t &ctx, Value *x, Value *den)
 {
     Type *t = den->getType();
     raise_exception_unless(ctx,
             ctx.builder.CreateICmpNE(den, ConstantInt::get(t, 0)),
             literal_pointer_val(ctx, jl_diverror_exception));
-    BasicBlock *m1BB = BasicBlock::Create(jl_LLVMContext, "minus1", ctx.f);
-    BasicBlock *okBB = BasicBlock::Create(jl_LLVMContext, "oksrem", ctx.f);
-    BasicBlock *cont = BasicBlock::Create(jl_LLVMContext, "after_srem", ctx.f);
+    BasicBlock *m1BB = BasicBlock::Create(ctx.builder.getContext(), "minus1", ctx.f);
+    BasicBlock *okBB = BasicBlock::Create(ctx.builder.getContext(), "oksrem", ctx.f);
+    BasicBlock *cont = BasicBlock::Create(ctx.builder.getContext(), "after_srem", ctx.f);
     PHINode *ret = PHINode::Create(t, 2);
     ctx.builder.CreateCondBr(ctx.builder.CreateICmpEQ(den ,ConstantInt::get(t, -1, true)),
                          m1BB, okBB);
@@ -741,7 +962,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
     jl_value_t *t2 = y.typ;
     // handle cases where the condition is irrelevant based on type info
     if (t1 == jl_bottom_type && t2 == jl_bottom_type)
-        return jl_cgval_t(); // undefined
+        return jl_cgval_t(ctx.builder.getContext()); // undefined
     if (t1 == jl_bottom_type)
         return y;
     if (t2 == jl_bottom_type)
@@ -766,7 +987,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
 
     Value *ifelse_result;
     bool isboxed = t1 != t2 || !deserves_stack(t1);
-    Type *llt1 = isboxed ? T_prjlvalue : julia_type_to_llvm(ctx, t1);
+    Type *llt1 = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, t1);
     if (!isboxed) {
         if (type_is_ghost(llt1))
             return x;
@@ -793,7 +1014,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                 y_vboxed = boxed(ctx, y);
             if (!x_ptr && !y_ptr) { // both ghost
                 ifelse_result = NULL;
-                ifelse_tbaa = tbaa_stack;
+                ifelse_tbaa = ctx.tbaa().tbaa_stack;
             }
             else if (!x_ptr) {
                 ifelse_result = y_ptr;
@@ -812,25 +1033,25 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                 ifelse_tbaa = MDNode::getMostGenericTBAA(x.tbaa, y.tbaa);
                 if (ifelse_tbaa == NULL) {
                     // LLVM won't return a TBAA result for the root, but mark_julia_struct requires it: make it now
-                    auto *OffsetNode = ConstantAsMetadata::get(ConstantInt::get(T_int64, 0));
-                    Metadata *Ops[] = {tbaa_root, tbaa_root, OffsetNode};
-                    ifelse_tbaa = MDNode::get(jl_LLVMContext, Ops);
+                    auto *OffsetNode = ConstantAsMetadata::get(ConstantInt::get(getInt64Ty(ctx.builder.getContext()), 0));
+                    Metadata *Ops[] = {ctx.tbaa().tbaa_root, ctx.tbaa().tbaa_root, OffsetNode};
+                    ifelse_tbaa = MDNode::get(ctx.builder.getContext(), Ops);
                 }
             }
             Value *tindex;
             if (!x_tindex && x.constant) {
-                x_tindex = ConstantInt::get(T_int8, 0x80 | get_box_tindex((jl_datatype_t*)jl_typeof(x.constant), rt_hint));
+                x_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | get_box_tindex((jl_datatype_t*)jl_typeof(x.constant), rt_hint));
             }
             if (!y_tindex && y.constant) {
-                y_tindex = ConstantInt::get(T_int8, 0x80 | get_box_tindex((jl_datatype_t*)jl_typeof(y.constant), rt_hint));
+                y_tindex = ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80 | get_box_tindex((jl_datatype_t*)jl_typeof(y.constant), rt_hint));
             }
             if (x_tindex && y_tindex) {
                 tindex = ctx.builder.CreateSelect(isfalse, y_tindex, x_tindex);
             }
             else {
-                PHINode *ret = PHINode::Create(T_int8, 2);
-                BasicBlock *post = BasicBlock::Create(jl_LLVMContext, "post", ctx.f);
-                BasicBlock *compute = BasicBlock::Create(jl_LLVMContext, "compute_tindex", ctx.f);
+                PHINode *ret = PHINode::Create(getInt8Ty(ctx.builder.getContext()), 2);
+                BasicBlock *post = BasicBlock::Create(ctx.builder.getContext(), "post", ctx.f);
+                BasicBlock *compute = BasicBlock::Create(ctx.builder.getContext(), "compute_tindex", ctx.f);
                 // compute tindex if we select the previously-boxed value
                 if (x_tindex) {
                     assert(y.isboxed && y.V);
@@ -846,7 +1067,7 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                     ctx.builder.SetInsertPoint(compute);
                     tindex = compute_tindex_unboxed(ctx, x, rt_hint);
                 }
-                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(T_int8, 0x80));
+                tindex = ctx.builder.CreateOr(tindex, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0x80));
                 compute = ctx.builder.GetInsertBlock(); // could have changed
                 ctx.builder.CreateBr(post);
                 ret->addIncoming(tindex, compute);
@@ -854,14 +1075,14 @@ static jl_cgval_t emit_ifelse(jl_codectx_t &ctx, jl_cgval_t c, jl_cgval_t x, jl_
                 ctx.builder.Insert(ret);
                 tindex = ret;
             }
-            jl_cgval_t ret = mark_julia_slot(ifelse_result, rt_hint, tindex, ifelse_tbaa);
+            jl_cgval_t ret = mark_julia_slot(ifelse_result, rt_hint, tindex, ctx.tbaa(), ifelse_tbaa);
             if (x_vboxed || y_vboxed) {
                 if (!x_vboxed)
                     x_vboxed = ConstantPointerNull::get(cast<PointerType>(y_vboxed->getType()));
                 if (!y_vboxed)
                     y_vboxed = ConstantPointerNull::get(cast<PointerType>(x_vboxed->getType()));
                 ret.Vboxed = ctx.builder.CreateSelect(isfalse, y_vboxed, x_vboxed);
-                assert(ret.Vboxed->getType() == T_prjlvalue);
+                assert(ret.Vboxed->getType() == ctx.types().T_prjlvalue);
             }
             return ret;
         }
@@ -898,6 +1119,8 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 
     switch (f) {
     case arraylen: {
+        ++Emitted_arraylen;
+        assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         jl_value_t *typ = jl_unwrap_unionall(x.typ);
         if (!jl_is_datatype(typ) || ((jl_datatype_t*)typ)->name != jl_array_typename)
@@ -905,44 +1128,106 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         return mark_julia_type(ctx, emit_arraylen(ctx, x), false, jl_long_type);
     }
     case pointerref:
+        ++Emitted_pointerref;
+        assert(nargs == 3);
         return emit_pointerref(ctx, argv);
     case pointerset:
+        ++Emitted_pointerset;
+        assert(nargs == 4);
         return emit_pointerset(ctx, argv);
+    case atomic_fence:
+        ++Emitted_atomic_fence;
+        assert(nargs == 1);
+        return emit_atomicfence(ctx, argv);
+    case atomic_pointerref:
+        ++Emitted_atomic_pointerref;
+        assert(nargs == 2);
+        return emit_atomic_pointerref(ctx, argv);
+    case atomic_pointerset:
+    case atomic_pointerswap:
+    case atomic_pointermodify:
+    case atomic_pointerreplace:
+        ++Emitted_atomic_pointerop;
+        return emit_atomic_pointerop(ctx, f, argv, nargs, nullptr);
     case bitcast:
+        ++Emitted_bitcast;
+        assert(nargs == 2);
         return generic_bitcast(ctx, argv);
     case trunc_int:
+        ++Emitted_trunc_int;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::Trunc, argv, true, true);
     case sext_int:
+        ++Emitted_sext_int;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::SExt, argv, true, true);
     case zext_int:
+        ++Emitted_zext_int;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::ZExt, argv, true, true);
     case uitofp:
+        ++Emitted_uitofp;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::UIToFP, argv, false, true);
     case sitofp:
+        ++Emitted_sitofp;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::SIToFP, argv, false, true);
     case fptoui:
+        ++Emitted_fptoui;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPToUI, argv, true, false);
     case fptosi:
+        ++Emitted_fptosi;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPToSI, argv, true, false);
     case fptrunc:
+        ++Emitted_fptrunc;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPTrunc, argv, false, false);
     case fpext:
+        ++Emitted_fpext;
+        assert(nargs == 2);
         return generic_cast(ctx, f, Instruction::FPExt, argv, false, false);
 
     case not_int: {
+        ++Emitted_not_int;
+        assert(nargs == 1);
         const jl_cgval_t &x = argv[0];
         if (!jl_is_primitivetype(x.typ))
             return emit_runtime_call(ctx, f, argv, nargs);
-        Type *xt = INTT(bitstype_to_llvm(x.typ));
+        Type *xt = INTT(bitstype_to_llvm(x.typ, ctx.builder.getContext()));
         Value *from = emit_unbox(ctx, xt, x, x.typ);
         Value *ans;
         if (x.typ == (jl_value_t*)jl_bool_type)
-            ans = ctx.builder.CreateXor(from, ConstantInt::get(T_int8, 1, true));
+            ans = ctx.builder.CreateXor(from, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 1, true));
         else
             ans = ctx.builder.CreateXor(from, ConstantInt::get(xt, -1, true));
         return mark_julia_type(ctx, ans, false, x.typ);
     }
 
+    case have_fma: {
+        ++Emitted_have_fma;
+        assert(nargs == 1);
+        const jl_cgval_t &x = argv[0];
+        if (!x.constant || !jl_is_datatype(x.constant))
+            return emit_runtime_call(ctx, f, argv, nargs);
+        jl_datatype_t *dt = (jl_datatype_t*) x.constant;
+
+        // select the appropriated overloaded intrinsic
+        std::string intr_name = "julia.cpu.have_fma.";
+        if (dt == jl_float32_type)
+            intr_name += "f32";
+        else if (dt == jl_float64_type)
+            intr_name += "f64";
+        else
+            return emit_runtime_call(ctx, f, argv, nargs);
+
+        FunctionCallee intr = jl_Module->getOrInsertFunction(intr_name, getInt1Ty(ctx.builder.getContext()));
+        auto ret = ctx.builder.CreateCall(intr);
+        return mark_julia_type(ctx, ret, false, jl_bool_type);
+    }
+
     default: {
         assert(nargs >= 1 && "invalid nargs for intrinsic call");
         const jl_cgval_t &xinfo = argv[0];
@@ -950,8 +1235,8 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         // verify argument types
         if (!jl_is_primitivetype(xinfo.typ))
             return emit_runtime_call(ctx, f, argv, nargs);
-        Type *xtyp = bitstype_to_llvm(xinfo.typ);
-        if (float_func[f])
+        Type *xtyp = bitstype_to_llvm(xinfo.typ, ctx.builder.getContext());
+        if (float_func()[f])
             xtyp = FLOATT(xtyp);
         else
             xtyp = INTT(xtyp);
@@ -965,7 +1250,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         ////LLVM seems to emit better code if we do the latter,
         ////(more likely to fold away the cast) so that's what we'll do.
         //if (xtyp == (jl_value_t*)jl_bool_type)
-        //    r = T_int1;
+        //    r = getInt1Ty(ctx.builder.getContext());
 
         Type **argt = (Type**)alloca(sizeof(Type*) * nargs);
         argt[0] = xtyp;
@@ -973,7 +1258,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         if (f == shl_int || f == lshr_int || f == ashr_int) {
             if (!jl_is_primitivetype(argv[1].typ))
                 return emit_runtime_call(ctx, f, argv, nargs);
-            argt[1] = INTT(bitstype_to_llvm(argv[1].typ));
+            argt[1] = INTT(bitstype_to_llvm(argv[1].typ, ctx.builder.getContext()));
         }
         else {
             for (size_t i = 1; i < nargs; ++i) {
@@ -993,8 +1278,8 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
         jl_value_t *newtyp = xinfo.typ;
         Value *r = emit_untyped_intrinsic(ctx, f, argvalues, nargs, (jl_datatype_t**)&newtyp, xinfo.typ);
         // Turn Bool operations into mod 1 now, if needed
-        if (newtyp == (jl_value_t*)jl_bool_type && r->getType() != T_int1)
-            r = ctx.builder.CreateTrunc(r, T_int1);
+        if (newtyp == (jl_value_t*)jl_bool_type && r->getType() != getInt1Ty(ctx.builder.getContext()))
+            r = ctx.builder.CreateTrunc(r, getInt1Ty(ctx.builder.getContext()));
         return mark_julia_type(ctx, r, false, newtyp);
     }
     }
@@ -1004,6 +1289,7 @@ static jl_cgval_t emit_intrinsic(jl_codectx_t &ctx, intrinsic f, jl_value_t **ar
 static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **argvalues, size_t nargs,
                                      jl_datatype_t **newtyp, jl_value_t *xtyp)
 {
+    ++EmittedUntypedIntrinsics;
     Value *x = nargs > 0 ? argvalues[0] : NULL;
     Value *y = nargs > 1 ? argvalues[1] : NULL;
     Value *z = nargs > 2 ? argvalues[2] : NULL;
@@ -1028,20 +1314,19 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     // for pointer arithmetic which lowers to getelementptr.
     case add_ptr: {
         return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(T_int8,
-                emit_inttoptr(ctx, x, T_pint8), y), t);
+            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
+                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), y), t);
 
     }
 
     case sub_ptr: {
         return ctx.builder.CreatePtrToInt(
-            ctx.builder.CreateGEP(T_int8,
-                emit_inttoptr(ctx, x, T_pint8), ctx.builder.CreateNeg(y)), t);
+            ctx.builder.CreateGEP(getInt8Ty(ctx.builder.getContext()),
+                emit_inttoptr(ctx, x, getInt8PtrTy(ctx.builder.getContext())), ctx.builder.CreateNeg(y)), t);
 
     }
 
-// Implements IEEE negate. See issue #7868
-    case neg_float: return math_builder(ctx)().CreateFSub(ConstantFP::get(t, -0.0), x);
+    case neg_float: return math_builder(ctx)().CreateFNeg(x);
     case neg_float_fast: return math_builder(ctx, true)().CreateFNeg(x);
     case add_float: return math_builder(ctx)().CreateFAdd(x, y);
     case sub_float: return math_builder(ctx)().CreateFSub(x, y);
@@ -1090,7 +1375,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         Value *res = ctx.builder.CreateCall(intr, {x, y});
         Value *val = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(0));
         Value *obit = ctx.builder.CreateExtractValue(res, ArrayRef<unsigned>(1));
-        Value *obyte = ctx.builder.CreateZExt(obit, T_int8);
+        Value *obyte = ctx.builder.CreateZExt(obit, getInt8Ty(ctx.builder.getContext()));
 
         jl_value_t *params[2];
         params[0] = xtyp;
@@ -1159,49 +1444,49 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
                                 ctx.builder.CreateICmpEQ(xi, yi));
     }
 
-    case fpislt: {
-        *newtyp = jl_bool_type;
-        Type *it = INTT(t);
-        Value *xi = ctx.builder.CreateBitCast(x, it);
-        Value *yi = ctx.builder.CreateBitCast(y, it);
-        return ctx.builder.CreateOr(
-            ctx.builder.CreateAnd(
-                ctx.builder.CreateFCmpORD(x, x),
-                ctx.builder.CreateFCmpUNO(y, y)),
-            ctx.builder.CreateAnd(
-                ctx.builder.CreateFCmpORD(x, y),
-                ctx.builder.CreateOr(
-                    ctx.builder.CreateAnd(
-                        ctx.builder.CreateICmpSGE(xi, ConstantInt::get(it, 0)),
-                        ctx.builder.CreateICmpSLT(xi, yi)),
-                    ctx.builder.CreateAnd(
-                        ctx.builder.CreateICmpSLT(xi, ConstantInt::get(it, 0)),
-                        ctx.builder.CreateICmpUGT(xi, yi)))));
-    }
-
     case and_int: return ctx.builder.CreateAnd(x, y);
     case or_int:  return ctx.builder.CreateOr(x, y);
     case xor_int: return ctx.builder.CreateXor(x, y);
 
-    case shl_int:
-        return ctx.builder.CreateSelect(
-                ctx.builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                          t->getPrimitiveSizeInBits())),
-                ConstantInt::get(t, 0),
-                ctx.builder.CreateShl(x, uint_cnvt(ctx, t, y)));
-    case lshr_int:
-        return ctx.builder.CreateSelect(
-                ctx.builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                          t->getPrimitiveSizeInBits())),
-                ConstantInt::get(t, 0),
-                ctx.builder.CreateLShr(x, uint_cnvt(ctx, t, y)));
-    case ashr_int:
-        return ctx.builder.CreateSelect(
-                ctx.builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
-                                                          t->getPrimitiveSizeInBits())),
-                ctx.builder.CreateAShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits() - 1)),
-                ctx.builder.CreateAShr(x, uint_cnvt(ctx, t, y)));
-
+    case shl_int: {
+        Value *the_shl = ctx.builder.CreateShl(x, uint_cnvt(ctx, t, y));
+        if (ConstantInt::isValueValidForType(y->getType(), t->getPrimitiveSizeInBits())) {
+            return ctx.builder.CreateSelect(
+                    ctx.builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                                  t->getPrimitiveSizeInBits())),
+                    ConstantInt::get(t, 0),
+                    the_shl);
+        }
+        else {
+            return the_shl;
+        }
+    }
+    case lshr_int: {
+        Value *the_shr = ctx.builder.CreateLShr(x, uint_cnvt(ctx, t, y));
+        if (ConstantInt::isValueValidForType(y->getType(), t->getPrimitiveSizeInBits())) {
+            return ctx.builder.CreateSelect(
+                    ctx.builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                                  t->getPrimitiveSizeInBits())),
+                    ConstantInt::get(t, 0),
+                    the_shr);
+        }
+        else {
+            return the_shr;
+        }
+    }
+    case ashr_int: {
+        Value *the_shr = ctx.builder.CreateAShr(x, uint_cnvt(ctx, t, y));
+        if (ConstantInt::isValueValidForType(y->getType(), t->getPrimitiveSizeInBits())) {
+            return ctx.builder.CreateSelect(
+                    ctx.builder.CreateICmpUGE(y, ConstantInt::get(y->getType(),
+                                                                  t->getPrimitiveSizeInBits())),
+                    ctx.builder.CreateAShr(x, ConstantInt::get(t, t->getPrimitiveSizeInBits() - 1)),
+                    the_shr);
+        }
+        else {
+            return the_shr;
+        }
+    }
     case bswap_int: {
         FunctionCallee bswapintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::bswap, makeArrayRef(t));
         return ctx.builder.CreateCall(bswapintr, x);
@@ -1212,12 +1497,12 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     }
     case ctlz_int: {
         FunctionCallee ctlz = Intrinsic::getDeclaration(jl_Module, Intrinsic::ctlz, makeArrayRef(t));
-        y = ConstantInt::get(T_int1, 0);
+        y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(ctlz, {x, y});
     }
     case cttz_int: {
         FunctionCallee cttz = Intrinsic::getDeclaration(jl_Module, Intrinsic::cttz, makeArrayRef(t));
-        y = ConstantInt::get(T_int1, 0);
+        y = ConstantInt::get(getInt1Ty(ctx.builder.getContext()), 0);
         return ctx.builder.CreateCall(cttz, {x, y});
     }
 
@@ -1226,14 +1511,8 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
         return ctx.builder.CreateCall(absintr, x);
     }
     case copysign_float: {
-        Value *bits = ctx.builder.CreateBitCast(x, t);
-        Value *sbits = ctx.builder.CreateBitCast(y, t);
-        unsigned nb = cast<IntegerType>(t)->getBitWidth();
-        APInt notsignbit = APInt::getSignedMaxValue(nb);
-        APInt signbit0(nb, 0); signbit0.setBit(nb - 1);
-        return ctx.builder.CreateOr(
-                    ctx.builder.CreateAnd(bits, ConstantInt::get(t, notsignbit)),
-                    ctx.builder.CreateAnd(sbits, ConstantInt::get(t, signbit0)));
+        FunctionCallee copyintr = Intrinsic::getDeclaration(jl_Module, Intrinsic::copysign, makeArrayRef(t));
+        return ctx.builder.CreateCall(copyintr, {x, y});
     }
     case flipsign_int: {
         ConstantInt *cx = dyn_cast<ConstantInt>(x);
@@ -1281,3 +1560,7 @@ static Value *emit_untyped_intrinsic(jl_codectx_t &ctx, intrinsic f, Value **arg
     }
     assert(0 && "unreachable");
 }
+
+//Redefine us as being part of codegen
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "julia_irgen_codegen"
diff --git a/src/intrinsics.h b/src/intrinsics.h
index 1558769eb3643e..bb67460bbb31ff 100644
--- a/src/intrinsics.h
+++ b/src/intrinsics.h
@@ -45,7 +45,6 @@
     ALIAS(lt_float_fast, lt_float) \
     ALIAS(le_float_fast, le_float) \
     ADD_I(fpiseq, 2) \
-    ADD_I(fpislt, 2) \
     /*  bitwise operators */ \
     ADD_I(and_int, 2) \
     ADD_I(or_int, 2) \
@@ -92,11 +91,20 @@
     /*  pointer access */ \
     ADD_I(pointerref, 3) \
     ADD_I(pointerset, 4) \
-    /* c interface */ \
+    /*  pointer atomics */ \
+    ADD_I(atomic_fence, 1) \
+    ADD_I(atomic_pointerref, 2) \
+    ADD_I(atomic_pointerset, 3) \
+    ADD_I(atomic_pointerswap, 3) \
+    ADD_I(atomic_pointermodify, 4) \
+    ADD_I(atomic_pointerreplace, 5) \
+    /*  c interface */ \
     ADD_I(cglobal, 2) \
     ALIAS(llvmcall, llvmcall) \
-    /* object access */ \
+    /*  object access */ \
     ADD_I(arraylen, 1) \
+    /*  cpu feature tests */ \
+    ADD_I(have_fma, 1) \
     /*  hidden intrinsics */ \
     ADD_HIDDEN(cglobal_auto, 1)
 
diff --git a/src/ircode.c b/src/ircode.c
index 62e38e72697f6e..c3fe174db32063 100644
--- a/src/ircode.c
+++ b/src/ircode.c
@@ -14,11 +14,7 @@
 #include <dlfcn.h>
 #endif
 
-#ifndef _COMPILER_MICROSOFT_
 #include "valgrind.h"
-#else
-#define RUNNING_ON_VALGRIND 0
-#endif
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -30,30 +26,37 @@ typedef struct {
     // method we're compressing for
     jl_method_t *method;
     jl_ptls_t ptls;
+    uint8_t relocatability;
 } jl_ircode_state;
 
 // --- encoding ---
 
 #define jl_encode_value(s, v) jl_encode_value_((s), (jl_value_t*)(v), 0)
 
-static int literal_val_id(jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
+static void tagged_root(rle_reference *rr, jl_ircode_state *s, int i)
+{
+    if (!get_root_reference(rr, s->method, i))
+        s->relocatability = 0;
+}
+
+static void literal_val_id(rle_reference *rr, jl_ircode_state *s, jl_value_t *v) JL_GC_DISABLED
 {
     jl_array_t *rs = s->method->roots;
     int i, l = jl_array_len(rs);
     if (jl_is_symbol(v) || jl_is_concrete_type(v)) {
         for (i = 0; i < l; i++) {
             if (jl_array_ptr_ref(rs, i) == v)
-                return i;
+                return tagged_root(rr, s, i);
         }
     }
     else {
         for (i = 0; i < l; i++) {
             if (jl_egal(jl_array_ptr_ref(rs, i), v))
-                return i;
+                return tagged_root(rr, s, i);
         }
     }
-    jl_array_ptr_1d_push(rs, v);
-    return jl_array_len(rs) - 1;
+    jl_add_method_root(s->method, jl_precompile_toplevel_module, v);
+    return tagged_root(rr, s, jl_array_len(rs) - 1);
 }
 
 static void jl_encode_int32(jl_ircode_state *s, int32_t x)
@@ -71,6 +74,7 @@ static void jl_encode_int32(jl_ircode_state *s, int32_t x)
 static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal) JL_GC_DISABLED
 {
     size_t i;
+    rle_reference rr;
 
     if (v == NULL) {
         write_uint8(s->s, TAG_NULL);
@@ -151,7 +155,7 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     else if (jl_is_expr(v)) {
         jl_expr_t *e = (jl_expr_t*)v;
         size_t l = jl_array_len(e->args);
-        if (e->head == call_sym) {
+        if (e->head == jl_call_sym) {
             if (l == 2) {
                 write_uint8(s->s, TAG_CALL1);
                 jl_encode_value(s, jl_exprarg(e, 0));
@@ -325,8 +329,13 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
         if (!as_literal && !(jl_is_uniontype(v) || jl_is_newvarnode(v) || jl_is_tuple(v) ||
                              jl_is_linenode(v) || jl_is_upsilonnode(v) || jl_is_pinode(v) ||
                              jl_is_slot(v) || jl_is_ssavalue(v))) {
-            int id = literal_val_id(s, v);
+            literal_val_id(&rr, s, v);
+            int id = rr.index;
             assert(id >= 0);
+            if (rr.key) {
+                write_uint8(s->s, TAG_RELOC_METHODROOT);
+                write_int64(s->s, rr.key);
+            }
             if (id < 256) {
                 write_uint8(s->s, TAG_METHODROOT);
                 write_uint8(s->s, id);
@@ -381,6 +390,17 @@ static void jl_encode_value_(jl_ircode_state *s, jl_value_t *v, int as_literal)
     }
 }
 
+static jl_code_info_flags_t code_info_flags(uint8_t pure, uint8_t propagate_inbounds, uint8_t inlineable, uint8_t inferred, uint8_t constprop)
+{
+    jl_code_info_flags_t flags;
+    flags.bits.pure = pure;
+    flags.bits.propagate_inbounds = propagate_inbounds;
+    flags.bits.inlineable = inlineable;
+    flags.bits.inferred = inferred;
+    flags.bits.constprop = constprop;
+    return flags;
+}
+
 // --- decoding ---
 
 static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED;
@@ -418,7 +438,7 @@ static jl_value_t *jl_decode_value_array(jl_ircode_state *s, uint8_t tag) JL_GC_
         isptr = (elsize >> 15) & 1;
         hasptr = (elsize >> 14) & 1;
         isunion = (elsize >> 13) & 1;
-        elsize = elsize & 0x3fff;
+        elsize = elsize & 0x1fff;
     }
     size_t *dims = (size_t*)alloca(ndims * sizeof(size_t));
     for (i = 0; i < ndims; i++) {
@@ -475,11 +495,11 @@ static jl_value_t *jl_decode_value_expr(jl_ircode_state *s, uint8_t tag) JL_GC_D
     }
     else if (tag == TAG_CALL1) {
         len = 2;
-        head = call_sym;
+        head = jl_call_sym;
     }
     else if (tag == TAG_CALL2) {
         len = 3;
-        head = call_sym;
+        head = jl_call_sym;
     }
     else {
         len = read_int32(s->s);
@@ -570,6 +590,7 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     assert(!ios_eof(s->s));
     jl_value_t *v;
     size_t i, n;
+    uint64_t key;
     uint8_t tag = read_uint8(s->s);
     if (tag > LAST_TAG)
         return jl_deser_tag(tag);
@@ -578,10 +599,15 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
     case 0:
         tag = read_uint8(s->s);
         return jl_deser_tag(tag);
+    case TAG_RELOC_METHODROOT:
+        key = read_uint64(s->s);
+        tag = read_uint8(s->s);
+        assert(tag == TAG_METHODROOT || tag == TAG_LONG_METHODROOT);
+        return lookup_root(s->method, key, tag == TAG_METHODROOT ? read_uint8(s->s) : read_uint16(s->s));
     case TAG_METHODROOT:
-        return jl_array_ptr_ref(s->method->roots, read_uint8(s->s));
+        return lookup_root(s->method, 0, read_uint8(s->s));
     case TAG_LONG_METHODROOT:
-        return jl_array_ptr_ref(s->method->roots, read_uint16(s->s));
+        return lookup_root(s->method, 0, read_uint16(s->s));
     case TAG_SVEC: JL_FALLTHROUGH; case TAG_LONG_SVEC:
         return jl_decode_value_svec(s, tag);
     case TAG_COMMONSYM:
@@ -608,20 +634,20 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         return jl_decode_value_phic(s, tag);
     case TAG_GOTONODE: JL_FALLTHROUGH; case TAG_QUOTENODE:
         v = jl_new_struct_uninit(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type);
-        set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, (void*)v, 0, jl_decode_value(s));
+        set_nth_field(tag == TAG_GOTONODE ? jl_gotonode_type : jl_quotenode_type, v, 0, jl_decode_value(s), 0);
         return v;
     case TAG_GOTOIFNOT:
         v = jl_new_struct_uninit(jl_gotoifnot_type);
-        set_nth_field(jl_gotoifnot_type, (void*)v, 0, jl_decode_value(s));
-        set_nth_field(jl_gotoifnot_type, (void*)v, 1, jl_decode_value(s));
+        set_nth_field(jl_gotoifnot_type, v, 0, jl_decode_value(s), 0);
+        set_nth_field(jl_gotoifnot_type, v, 1, jl_decode_value(s), 0);
         return v;
     case TAG_ARGUMENT:
         v = jl_new_struct_uninit(jl_argument_type);
-        set_nth_field(jl_argument_type, (void*)v, 0, jl_decode_value(s));
+        set_nth_field(jl_argument_type, v, 0, jl_decode_value(s), 0);
         return v;
     case TAG_RETURNNODE:
         v = jl_new_struct_uninit(jl_returnnode_type);
-        set_nth_field(jl_returnnode_type, (void*)v, 0, jl_decode_value(s));
+        set_nth_field(jl_returnnode_type, v, 0, jl_decode_value(s), 0);
         return v;
     case TAG_SHORTER_INT64:
         v = jl_box_int64((int16_t)read_uint16(s->s));
@@ -670,7 +696,7 @@ static jl_value_t *jl_decode_value(jl_ircode_state *s) JL_GC_DISABLED
         v = jl_new_struct_uninit(jl_lineinfonode_type);
         for (i = 0; i < jl_datatype_nfields(jl_lineinfonode_type); i++) {
             //size_t offs = jl_field_offset(jl_lineinfonode_type, i);
-            set_nth_field(jl_lineinfonode_type, (void*)v, i, jl_decode_value(s));
+            set_nth_field(jl_lineinfonode_type, v, i, jl_decode_value(s), 0);
         }
         return v;
     default:
@@ -699,14 +725,13 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
     jl_ircode_state s = {
         &dest,
         m,
-        jl_get_ptls_states()
+        jl_current_task->ptls,
+        1
     };
 
-    uint8_t flags = (code->inferred << 3)
-                  | (code->inlineable << 2)
-                  | (code->propagate_inbounds << 1)
-                  | (code->pure << 0);
-    write_uint8(s.s, flags);
+    jl_code_info_flags_t flags = code_info_flags(code->pure, code->propagate_inbounds, code->inlineable, code->inferred, code->constprop);
+    write_uint8(s.s, flags.packed);
+    write_uint8(s.s, code->purity.bits);
 
     size_t nslots = jl_array_len(code->slotflags);
     assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions
@@ -730,6 +755,11 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
         jl_encode_value_(&s, jl_get_nth_field((jl_value_t*)code, i), copy);
     }
 
+    // For opaque closure, also save the slottypes. We technically only need the first slot type,
+    // but this is simpler for now. We may want to refactor where this gets stored in the future.
+    if (m->is_for_opaque_closure)
+        jl_encode_value_(&s, code->slottypes, 1);
+
     if (m->generator)
         // can't optimize generated functions
         jl_encode_value_(&s, (jl_value_t*)jl_compress_argnames(code->slotnames), 1);
@@ -752,6 +782,8 @@ JL_DLLEXPORT jl_array_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code)
         ios_write(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
     }
 
+    write_uint8(s.s, s.relocatability);
+
     ios_flush(s.s);
     jl_array_t *v = jl_take_buffer(&dest);
     ios_close(s.s);
@@ -782,15 +814,19 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
     jl_ircode_state s = {
         &src,
         m,
-        jl_get_ptls_states()
+        jl_current_task->ptls,
+        1
     };
 
     jl_code_info_t *code = jl_new_code_info_uninit();
-    uint8_t flags = read_uint8(s.s);
-    code->inferred = !!(flags & (1 << 3));
-    code->inlineable = !!(flags & (1 << 2));
-    code->propagate_inbounds = !!(flags & (1 << 1));
-    code->pure = !!(flags & (1 << 0));
+    jl_code_info_flags_t flags;
+    flags.packed = read_uint8(s.s);
+    code->constprop = flags.bits.constprop;
+    code->inferred = flags.bits.inferred;
+    code->inlineable = flags.bits.inlineable;
+    code->propagate_inbounds = flags.bits.propagate_inbounds;
+    code->pure = flags.bits.pure;
+    code->purity.bits = read_uint8(s.s);
 
     size_t nslots = read_int32(&src);
     code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots);
@@ -803,6 +839,8 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
         jl_value_t **fld = (jl_value_t**)((char*)jl_data_ptr(code) + jl_field_offset(jl_code_info_type, i));
         *fld = jl_decode_value(&s);
     }
+    if (m->is_for_opaque_closure)
+        code->slottypes = jl_decode_value(&s);
 
     jl_value_t *slotnames = jl_decode_value(&s);
     if (!jl_is_string(slotnames))
@@ -825,6 +863,8 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t
         ios_readall(s.s, (char*)jl_array_data(code->codelocs), nstmt * sizeof(int32_t));
     }
 
+    (void) read_uint8(s.s);   // relocatability
+
     assert(ios_getc(s.s) == -1);
     ios_close(s.s);
     JL_GC_PUSH1(&code);
@@ -845,8 +885,9 @@ JL_DLLEXPORT uint8_t jl_ir_flag_inferred(jl_array_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inferred;
     assert(jl_typeis(data, jl_array_uint8_type));
-    uint8_t flags = ((uint8_t*)data->data)[0];
-    return !!(flags & (1 << 3));
+    jl_code_info_flags_t flags;
+    flags.packed = ((uint8_t*)data->data)[0];
+    return flags.bits.inferred;
 }
 
 JL_DLLEXPORT uint8_t jl_ir_flag_inlineable(jl_array_t *data)
@@ -854,8 +895,9 @@ JL_DLLEXPORT uint8_t jl_ir_flag_inlineable(jl_array_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->inlineable;
     assert(jl_typeis(data, jl_array_uint8_type));
-    uint8_t flags = ((uint8_t*)data->data)[0];
-    return !!(flags & (1 << 2));
+    jl_code_info_flags_t flags;
+    flags.packed = ((uint8_t*)data->data)[0];
+    return flags.bits.inlineable;
 }
 
 JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data)
@@ -863,8 +905,9 @@ JL_DLLEXPORT uint8_t jl_ir_flag_pure(jl_array_t *data)
     if (jl_is_code_info(data))
         return ((jl_code_info_t*)data)->pure;
     assert(jl_typeis(data, jl_array_uint8_type));
-    uint8_t flags = ((uint8_t*)data->data)[0];
-    return !!(flags & (1 << 0));
+    jl_code_info_flags_t flags;
+    flags.packed = ((uint8_t*)data->data)[0];
+    return flags.bits.pure;
 }
 
 JL_DLLEXPORT jl_value_t *jl_compress_argnames(jl_array_t *syms)
@@ -901,7 +944,7 @@ JL_DLLEXPORT ssize_t jl_ir_nslots(jl_array_t *data)
     }
     else {
         assert(jl_typeis(data, jl_array_uint8_type));
-        int nslots = jl_load_unaligned_i32((char*)data->data + 1);
+        int nslots = jl_load_unaligned_i32((char*)data->data + 2);
         return nslots;
     }
 }
@@ -912,7 +955,7 @@ JL_DLLEXPORT uint8_t jl_ir_slotflag(jl_array_t *data, size_t i)
     if (jl_is_code_info(data))
         return ((uint8_t*)((jl_code_info_t*)data)->slotflags->data)[i];
     assert(jl_typeis(data, jl_array_uint8_type));
-    return ((uint8_t*)data->data)[1 + sizeof(int32_t) + i];
+    return ((uint8_t*)data->data)[2 + sizeof(int32_t) + i];
 }
 
 JL_DLLEXPORT jl_array_t *jl_uncompress_argnames(jl_value_t *syms)
diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp
index 1e49eba98f03fb..0fb4ecc34466cc 100644
--- a/src/jitlayers.cpp
+++ b/src/jitlayers.cpp
@@ -1,23 +1,36 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// Except for parts of this file which were copied from LLVM, under the UIUC license (marked below).
-
 #include "llvm-version.h"
 #include "platform.h"
 
-#include <llvm/Transforms/Utils/Cloning.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include "llvm/IR/Mangler.h"
+#include <llvm/ADT/StringMap.h>
+#include <llvm/Analysis/TargetLibraryInfo.h>
+#include <llvm/Analysis/TargetTransformInfo.h>
+#include <llvm/ExecutionEngine/Orc/CompileUtils.h>
+#include <llvm/ExecutionEngine/Orc/ExecutionUtils.h>
+#if JL_LLVM_VERSION >= 130000
+#include <llvm/ExecutionEngine/Orc/ExecutorProcessControl.h>
+#endif
 #include <llvm/Support/DynamicLibrary.h>
-
+#include <llvm/Support/FormattedStream.h>
 #include <llvm/Support/SmallVectorMemoryBuffer.h>
 #include <llvm/Support/raw_ostream.h>
-#include <llvm/Support/FormattedStream.h>
-#include <llvm/ADT/StringMap.h>
+#include <llvm/Transforms/Utils/Cloning.h>
+#include <llvm/Transforms/Utils/ModuleUtils.h>
+#include <llvm/Bitcode/BitcodeWriter.h>
 
+// target machine computation
+#include <llvm/CodeGen/TargetSubtargetInfo.h>
+#if JL_LLVM_VERSION >= 140000
+#include <llvm/MC/TargetRegistry.h>
+#else
 #include <llvm/Support/TargetRegistry.h>
-#include <llvm/Target/TargetMachine.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
+#endif
+#include <llvm/Target/TargetOptions.h>
+#include <llvm/Support/Host.h>
+#include <llvm/Support/TargetSelect.h>
+#include <llvm/Object/SymbolSize.h>
 
 using namespace llvm;
 
@@ -26,21 +39,34 @@ using namespace llvm;
 #include "codegen_shared.h"
 #include "jitlayers.h"
 #include "julia_assert.h"
+#include "processor.h"
+
+#ifdef JL_USE_JITLINK
+# if JL_LLVM_VERSION >= 140000
+#  include <llvm/ExecutionEngine/Orc/DebuggerSupportPlugin.h>
+# endif
+# include <llvm/ExecutionEngine/JITLink/EHFrameSupport.h>
+# include <llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h>
+#else
+# include <llvm/ExecutionEngine/SectionMemoryManager.h>
+#endif
 
-RTDyldMemoryManager* createRTDyldMemoryManager(void);
-
-void jl_init_jit(void) { }
+#define DEBUG_TYPE "jitlayers"
 
 // Snooping on which functions are being compiled, and how long it takes
-JL_STREAM *dump_compiles_stream = NULL;
 extern "C" JL_DLLEXPORT
-void jl_dump_compiles(void *s)
+void jl_dump_compiles_impl(void *s)
 {
-    dump_compiles_stream = (JL_STREAM*)s;
+    **jl_ExecutionEngine->get_dump_compiles_stream() = (JL_STREAM*)s;
+}
+extern "C" JL_DLLEXPORT
+void jl_dump_llvm_opt_impl(void *s)
+{
+    **jl_ExecutionEngine->get_dump_llvm_opt_stream() = (JL_STREAM*)s;
 }
 
-static void jl_add_to_ee(std::unique_ptr<Module> m);
-static void jl_add_to_ee(std::unique_ptr<Module> &M, StringMap<std::unique_ptr<Module>*> &NewExports);
+static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports);
+static void jl_decorate_module(Module &M);
 static uint64_t getAddressForFunction(StringRef fname);
 
 void jl_link_global(GlobalVariable *GV, void *addr)
@@ -74,16 +100,14 @@ void jl_jit_globals(std::map<void *, GlobalVariable*> &globals)
 static jl_callptr_t _jl_compile_codeinst(
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
-        size_t world)
+        size_t world,
+        orc::ThreadSafeContext context)
 {
-    // TODO: Merge with jl_dump_compiles?
-    static ios_t f_precompile;
-    static JL_STREAM* s_precompile = NULL;
-
     // caller must hold codegen_lock
     // and have disabled finalizers
     uint64_t start_time = 0;
-    if (dump_compiles_stream != NULL)
+    bool timed = !!*jl_ExecutionEngine->get_dump_compiles_stream();
+    if (timed)
         start_time = jl_hrtime();
 
     assert(jl_is_code_instance(codeinst));
@@ -93,29 +117,35 @@ static jl_callptr_t _jl_compile_codeinst(
 
     jl_callptr_t fptr = NULL;
     // emit the code in LLVM IR form
-    jl_codegen_params_t params;
+    jl_codegen_params_t params(std::move(context)); // Locks the context
     params.cache = true;
     params.world = world;
-    std::map<jl_code_instance_t*, jl_compile_result_t> emitted;
+    jl_workqueue_t emitted;
     {
-        JL_TIMING(CODEGEN);
-        jl_compile_result_t result = jl_emit_codeinst(codeinst, src, params);
-        if (std::get<0>(result))
-            emitted[codeinst] = std::move(result);
-        jl_compile_workqueue(emitted, params, CompilationPolicy::Default);
+        orc::ThreadSafeModule result_m =
+            jl_create_llvm_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging);
+        jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params);
+        if (result_m)
+            emitted[codeinst] = {std::move(result_m), std::move(decls)};
+        {
+            auto temp_module = jl_create_llvm_module(name_from_method_instance(codeinst->def), params.tsctx, params.imaging);
+            jl_compile_workqueue(emitted, *temp_module.getModuleUnlocked(), params, CompilationPolicy::Default);
+        }
 
         if (params._shared_module)
-            jl_add_to_ee(std::unique_ptr<Module>(params._shared_module));
-        StringMap<std::unique_ptr<Module>*> NewExports;
+            jl_ExecutionEngine->addModule(std::move(params._shared_module));
+        StringMap<orc::ThreadSafeModule*> NewExports;
         StringMap<void*> NewGlobals;
         for (auto &global : params.globals) {
             NewGlobals[global.second->getName()] = global.first;
         }
         for (auto &def : emitted) {
-            std::unique_ptr<Module> &M = std::get<0>(def.second);
+            orc::ThreadSafeModule &TSM = std::get<0>(def.second);
+            //The underlying context object is still locked because params is not destroyed yet
+            auto M = TSM.getModuleUnlocked();
             for (auto &F : M->global_objects()) {
                 if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-                    NewExports[F.getName()] = &M;
+                    NewExports[F.getName()] = &TSM;
                 }
             }
             // Let's link all globals here also (for now)
@@ -128,7 +158,7 @@ static jl_callptr_t _jl_compile_codeinst(
         }
         for (auto &def : emitted) {
             // Add the results to the execution engine now
-            std::unique_ptr<Module> &M = std::get<0>(def.second);
+            orc::ThreadSafeModule &M = std::get<0>(def.second);
             jl_add_to_ee(M, NewExports);
         }
     }
@@ -140,102 +170,103 @@ static jl_callptr_t _jl_compile_codeinst(
         jl_callptr_t addr;
         bool isspecsig = false;
         if (decls.functionObject == "jl_fptr_args") {
-            addr = &jl_fptr_args;
+            addr = jl_fptr_args_addr;
         }
         else if (decls.functionObject == "jl_fptr_sparam") {
-            addr = &jl_fptr_sparam;
+            addr = jl_fptr_sparam_addr;
         }
         else {
             addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
             isspecsig = true;
         }
-        if (this_code->invoke == NULL) {
+        if (jl_atomic_load_relaxed(&this_code->invoke) == NULL) {
             // once set, don't change invoke-ptr, as that leads to race conditions
             // with the (not) simultaneous updates to invoke and specptr
             if (!decls.specFunctionObject.empty()) {
-                this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject);
+                jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
                 this_code->isspecsig = isspecsig;
             }
             jl_atomic_store_release(&this_code->invoke, addr);
         }
-        else if (this_code->invoke == jl_fptr_const_return && !decls.specFunctionObject.empty()) {
-            // hack to export this pointer value to jl_dump_method_asm
-            this_code->specptr.fptr = (void*)getAddressForFunction(decls.specFunctionObject);
+        else if (jl_atomic_load_relaxed(&this_code->invoke) == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
+            // hack to export this pointer value to jl_dump_method_disasm
+            jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
         }
         if (this_code== codeinst)
             fptr = addr;
     }
 
     uint64_t end_time = 0;
-    if (dump_compiles_stream != NULL)
+    if (timed)
         end_time = jl_hrtime();
 
     // If logging of the compilation stream is enabled,
     // then dump the method-instance specialization type to the stream
     jl_method_instance_t *mi = codeinst->def;
     if (jl_is_method(mi->def.method)) {
-        if (jl_options.trace_compile != NULL) {
-            if (s_precompile == NULL) {
-                const char* t = jl_options.trace_compile;
-                if (!strncmp(t, "stderr", 6))
-                    s_precompile = JL_STDERR;
-                else {
-                    if (ios_file(&f_precompile, t, 1, 1, 1, 1) == NULL)
-                        jl_errorf("cannot open precompile statement file \"%s\" for writing", t);
-                    s_precompile = (JL_STREAM*) &f_precompile;
-                }
-            }
-            if (!jl_has_free_typevars(mi->specTypes)) {
-                jl_printf(s_precompile, "precompile(");
-                jl_static_show(s_precompile, mi->specTypes);
-                jl_printf(s_precompile, ")\n");
-
-                if (s_precompile != JL_STDERR)
-                    ios_flush(&f_precompile);
-            }
-        }
-        if (dump_compiles_stream != NULL) {
-            jl_printf(dump_compiles_stream, "%" PRIu64 "\t\"", end_time - start_time);
-            jl_static_show(dump_compiles_stream, mi->specTypes);
-            jl_printf(dump_compiles_stream, "\"\n");
+        auto stream = *jl_ExecutionEngine->get_dump_compiles_stream();
+        if (stream) {
+            jl_printf(stream, "%" PRIu64 "\t\"", end_time - start_time);
+            jl_static_show(stream, mi->specTypes);
+            jl_printf(stream, "\"\n");
         }
     }
     return fptr;
 }
 
-void jl_generate_ccallable(void *llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
+const char *jl_generate_ccallable(LLVMOrcThreadSafeModuleRef llvmmod, void *sysimg_handle, jl_value_t *declrt, jl_value_t *sigt, jl_codegen_params_t &params);
 
 // compile a C-callable alias
 extern "C" JL_DLLEXPORT
-void jl_compile_extern_c(void *llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
+int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
 {
-    JL_LOCK(&codegen_lock);
-    jl_codegen_params_t params;
+    JL_LOCK(&jl_codegen_lock);
+    uint64_t compiler_start_time = 0;
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
+        compiler_start_time = jl_hrtime();
+    orc::ThreadSafeContext ctx;
+    auto into = reinterpret_cast<orc::ThreadSafeModule*>(llvmmod);
     jl_codegen_params_t *pparams = (jl_codegen_params_t*)p;
+    orc::ThreadSafeModule backing;
+    if (into == NULL) {
+        if (!pparams) {
+            ctx = jl_ExecutionEngine->acquireContext();
+        }
+        backing = jl_create_llvm_module("cextern", pparams ? pparams->tsctx : ctx, pparams ? pparams->imaging : imaging_default());
+        into = &backing;
+    }
+    jl_codegen_params_t params(into->getContext());
     if (pparams == NULL)
         pparams = &params;
-    Module *into = (Module*)llvmmod;
-    if (into == NULL)
-        into = jl_create_llvm_module("cextern");
-    jl_generate_ccallable(into, sysimg, declrt, sigt, *pparams);
+    assert(pparams->tsctx.getContext() == into->getContext().getContext());
+    const char *name = jl_generate_ccallable(reinterpret_cast<LLVMOrcThreadSafeModuleRef>(into), sysimg, declrt, sigt, *pparams);
+    bool success = true;
     if (!sysimg) {
-        if (p == NULL) {
+        if (jl_ExecutionEngine->getGlobalValueAddress(name)) {
+            success = false;
+        }
+        if (success && p == NULL) {
             jl_jit_globals(params.globals);
             assert(params.workqueue.empty());
             if (params._shared_module)
-                jl_add_to_ee(std::unique_ptr<Module>(params._shared_module));
+                jl_ExecutionEngine->addModule(std::move(params._shared_module));
         }
-        if (llvmmod == NULL)
-            jl_add_to_ee(std::unique_ptr<Module>(into));
+        if (success && llvmmod == NULL)
+            jl_ExecutionEngine->addModule(std::move(*into));
     }
-    JL_UNLOCK(&codegen_lock);
+    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    if (ctx.getContext()) {
+        jl_ExecutionEngine->releaseContext(std::move(ctx));
+    }
+    JL_UNLOCK(&jl_codegen_lock);
+    return success;
 }
 
-bool jl_type_mappable_to_c(jl_value_t *ty);
-
 // declare a C-callable entry point; called during code loading from the toplevel
 extern "C" JL_DLLEXPORT
-void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
+void jl_extern_c_impl(jl_value_t *declrt, jl_tupletype_t *sigt)
 {
     // validate arguments. try to do as many checks as possible here to avoid
     // throwing errors later during codegen.
@@ -250,21 +281,19 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
     // compute / validate return type
     if (!jl_is_concrete_type(declrt) || jl_is_kind(declrt))
         jl_error("@ccallable: return type must be concrete and correspond to a C type");
-    JL_LOCK(&codegen_lock);
     if (!jl_type_mappable_to_c(declrt))
         jl_error("@ccallable: return type doesn't correspond to a C type");
-    JL_UNLOCK(&codegen_lock);
 
     // validate method signature
     size_t i, nargs = jl_nparams(sigt);
     for (i = 1; i < nargs; i++) {
         jl_value_t *ati = jl_tparam(sigt, i);
-        if (!jl_is_concrete_type(ati) || jl_is_kind(ati))
+        if (!jl_is_concrete_type(ati) || jl_is_kind(ati) || !jl_type_mappable_to_c(ati))
             jl_error("@ccallable: argument types must be concrete");
     }
 
     // save a record of this so that the alias is generated when we write an object file
-    jl_method_t *meth = (jl_method_t*)jl_methtable_lookup(ft->name->mt, (jl_value_t*)sigt, jl_world_counter);
+    jl_method_t *meth = (jl_method_t*)jl_methtable_lookup(ft->name->mt, (jl_value_t*)sigt, jl_atomic_load_acquire(&jl_world_counter));
     if (!jl_is_method(meth))
         jl_error("@ccallable: could not find requested method");
     JL_GC_PUSH1(&meth);
@@ -273,14 +302,23 @@ void jl_extern_c(jl_value_t *declrt, jl_tupletype_t *sigt)
     JL_GC_POP();
 
     // create the alias in the current runtime environment
-    jl_compile_extern_c(NULL, NULL, NULL, declrt, (jl_value_t*)sigt);
+    int success = jl_compile_extern_c(NULL, NULL, NULL, declrt, (jl_value_t*)sigt);
+    if (!success)
+        jl_error("@ccallable was already defined for this method name");
 }
 
 // this compiles li and emits fptr
-extern "C"
-jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
+extern "C" JL_DLLEXPORT
+jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
 {
-    JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
+    JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
+    auto ctx = jl_ExecutionEngine->getContext();
+    auto &context = *ctx;
+    uint64_t compiler_start_time = 0;
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    bool is_recompile = false;
+    if (measure_compile_time_enabled)
+        compiler_start_time = jl_hrtime();
     // if we don't have any decls already, try to generate it now
     jl_code_info_t *src = NULL;
     JL_GC_PUSH1(&src);
@@ -293,12 +331,18 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
         else if (jl_is_method(mi->def.method))
             src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
     }
+    else {
+        // identify whether this is an invalidated method that is being recompiled
+        is_recompile = jl_atomic_load_relaxed(&mi->cache) != NULL;
+    }
     if (src == NULL && jl_is_method(mi->def.method) &&
              jl_symbol_name(mi->def.method->name)[0] != '@') {
-        // If the caller didn't provide the source,
-        // see if it is inferred, or try to infer it for ourself.
-        // (but don't bother with typeinf on macros or toplevel thunks)
-        src = jl_type_infer(mi, world, 0);
+        if (mi->def.method->source != jl_nothing) {
+            // If the caller didn't provide the source and IR is available,
+            // see if it is inferred, or try to infer it for ourself.
+            // (but don't bother with typeinf on macros or toplevel thunks)
+            src = jl_type_infer(mi, world, 0);
+        }
     }
     jl_code_instance_t *compiled = jl_method_compiled(mi, world);
     if (compiled) {
@@ -310,26 +354,38 @@ jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT
             if (src->inferred && !codeinst->inferred)
                 codeinst->inferred = jl_nothing;
         }
-        _jl_compile_codeinst(codeinst, src, world);
-        if (codeinst->invoke == NULL)
+        _jl_compile_codeinst(codeinst, src, world, context);
+        if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL)
             codeinst = NULL;
     }
     else {
         codeinst = NULL;
     }
-    JL_UNLOCK(&codegen_lock);
+    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled) {
+        uint64_t t_comp = jl_hrtime() - compiler_start_time;
+        if (is_recompile)
+            jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
+    }
+    JL_UNLOCK(&jl_codegen_lock);
     JL_GC_POP();
     return codeinst;
 }
 
-extern "C"
-void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
+extern "C" JL_DLLEXPORT
+void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
 {
     if (jl_atomic_load_relaxed(&unspec->invoke) != NULL) {
         return;
     }
-    JL_LOCK(&codegen_lock);
-    if (unspec->invoke == NULL) {
+    JL_LOCK(&jl_codegen_lock);
+    auto ctx = jl_ExecutionEngine->getContext();
+    auto &context = *ctx;
+    uint64_t compiler_start_time = 0;
+    uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+    if (measure_compile_time_enabled)
+        compiler_start_time = jl_hrtime();
+    if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
         jl_code_info_t *src = NULL;
         JL_GC_PUSH1(&src);
         jl_method_t *def = unspec->def->def.method;
@@ -348,35 +404,43 @@ void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec)
             src = (jl_code_info_t*)unspec->def->uninferred;
         }
         assert(src && jl_is_code_info(src));
-        _jl_compile_codeinst(unspec, src, unspec->min_world);
-        if (unspec->invoke == NULL) {
+        _jl_compile_codeinst(unspec, src, unspec->min_world, context);
+        if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
             // if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
-            jl_atomic_store_release(&unspec->invoke, &jl_fptr_interpret_call);
+            jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
         }
         JL_GC_POP();
     }
-    JL_UNLOCK(&codegen_lock); // Might GC
+    if (jl_codegen_lock.count == 1 && measure_compile_time_enabled)
+        jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+    JL_UNLOCK(&jl_codegen_lock); // Might GC
 }
 
 
 // get a native disassembly for a compiled method
 extern "C" JL_DLLEXPORT
-jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
-        int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo)
+jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary)
 {
     // printing via disassembly
     jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
     if (codeinst) {
-        uintptr_t fptr = (uintptr_t)codeinst->invoke;
+        uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
         if (getwrapper)
-            return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo);
-        uintptr_t specfptr = (uintptr_t)codeinst->specptr.fptr;
-        if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) {
+            return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
+        uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
+        if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
             // normally we prevent native code from being generated for these functions,
             // (using sentinel value `1` instead)
             // so create an exception here so we can print pretty our lies
-            JL_LOCK(&codegen_lock); // also disables finalizers, to prevent any unexpected recursion
-            specfptr = (uintptr_t)codeinst->specptr.fptr;
+            JL_LOCK(&jl_codegen_lock); // also disables finalizers, to prevent any unexpected recursion
+            auto ctx = jl_ExecutionEngine->getContext();
+            auto &context = *ctx;
+            uint64_t compiler_start_time = 0;
+            uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
+            if (measure_compile_time_enabled)
+                compiler_start_time = jl_hrtime();
+            specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
             if (specfptr == 0) {
                 jl_code_info_t *src = jl_type_infer(mi, world, 0);
                 JL_GC_PUSH1(&src);
@@ -389,305 +453,706 @@ jl_value_t *jl_dump_method_asm(jl_method_instance_t *mi, size_t world,
                     if (src && (jl_value_t*)src != jl_nothing)
                         src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
                 }
-                fptr = (uintptr_t)codeinst->invoke;
-                specfptr = (uintptr_t)codeinst->specptr.fptr;
+                fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
+                specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                 if (src && jl_is_code_info(src)) {
-                    if (fptr == (uintptr_t)&jl_fptr_const_return && specfptr == 0) {
-                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world);
-                        specfptr = (uintptr_t)codeinst->specptr.fptr;
+                    if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
+                        fptr = (uintptr_t)_jl_compile_codeinst(codeinst, src, world, context);
+                        specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
                     }
                 }
                 JL_GC_POP();
             }
-            JL_UNLOCK(&codegen_lock);
+            if (measure_compile_time_enabled)
+                jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
+            JL_UNLOCK(&jl_codegen_lock);
         }
         if (specfptr != 0)
-            return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo);
+            return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
     }
 
     // whatever, that didn't work - use the assembler output instead
-    if (raw_mc) // eh, give up, this flag doesn't really work anyways normally
-        return (jl_value_t*)jl_pchar_to_array("", 0);
-    return jl_dump_llvm_asm(jl_get_llvmf_defn(mi, world, getwrapper, true, jl_default_cgparams), asm_variant, debuginfo);
+    void *F = jl_get_llvmf_defn(mi, world, getwrapper, true, jl_default_cgparams);
+    if (!F)
+        return jl_an_empty_string;
+    return jl_dump_function_asm(F, raw_mc, asm_variant, debuginfo, binary);
 }
 
-#if defined(_OS_LINUX_) || defined(_OS_WINDOWS_) || defined(_OS_FREEBSD_)
-// Resolve non-lock free atomic functions in the libatomic1 library.
-// This is the library that provides support for c11/c++11 atomic operations.
-static uint64_t resolve_atomic(const char *name)
+CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
 {
-#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
-    static const char *const libatomic = "libatomic.so.1";
-#elif defined(_OS_WINDOWS_)
-    static const char *const libatomic = "libatomic-1.dll";
+#ifdef DISABLE_OPT
+    return CodeGenOpt::None;
+#else
+    return optlevel < 2 ? CodeGenOpt::None :
+        optlevel == 2 ? CodeGenOpt::Default :
+        CodeGenOpt::Aggressive;
 #endif
-    static void *atomic_hdl = jl_load_dynamic_library(libatomic,
-                                                      JL_RTLD_LOCAL, 0);
-    static const char *const atomic_prefix = "__atomic_";
-    if (!atomic_hdl)
-        return 0;
-    if (strncmp(name, atomic_prefix, strlen(atomic_prefix)) != 0)
-        return 0;
-    uintptr_t value;
-    jl_dlsym(atomic_hdl, name, (void **)&value, 0);
-    return value;
 }
-#endif
 
-// Custom object emission notification handler for the JuliaOJIT
-extern JITEventListener *CreateJuliaJITEventListener();
-JuliaOJIT::DebugObjectRegistrar::DebugObjectRegistrar(JuliaOJIT &JIT)
-    : JuliaListener(CreateJuliaJITEventListener()),
-      JIT(JIT) {}
+static auto countBasicBlocks(const Function &F)
+{
+    return std::distance(F.begin(), F.end());
+}
 
-JL_DLLEXPORT void ORCNotifyObjectEmitted(JITEventListener *Listener,
-                                         const object::ObjectFile &obj,
-                                         const RuntimeDyld::LoadedObjectInfo &L,
-                                         RTDyldMemoryManager *memmgr);
+void JuliaOJIT::OptSelLayerT::emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) {
+    size_t optlevel = ~0ull;
+    TSM.withModuleDo([&](Module &M) {
+        if (jl_generating_output()) {
+            optlevel = 0;
+        }
+        else {
+            optlevel = std::max(static_cast<int>(jl_options.opt_level), 0);
+            size_t optlevel_min = std::max(static_cast<int>(jl_options.opt_level_min), 0);
+            for (auto &F : M.functions()) {
+                if (!F.getBasicBlockList().empty()) {
+                    Attribute attr = F.getFnAttribute("julia-optimization-level");
+                    StringRef val = attr.getValueAsString();
+                    if (val != "") {
+                        size_t ol = (size_t)val[0] - '0';
+                        if (ol >= 0 && ol < optlevel)
+                            optlevel = ol;
+                    }
+                }
+            }
+            optlevel = std::min(std::max(optlevel, optlevel_min), this->count);
+        }
+    });
+    assert(optlevel != ~0ull && "Failed to select a valid optimization level!");
+    this->optimizers[optlevel]->OptimizeLayer.emit(std::move(R), std::move(TSM));
+}
 
-template <typename ObjT, typename LoadResult>
-void JuliaOJIT::DebugObjectRegistrar::registerObject(RTDyldObjHandleT H, const ObjT &Obj,
-                                                     const LoadResult &LO)
-{
-    const ObjT* Object = &Obj;
+void jl_register_jit_object(const object::ObjectFile &debugObj,
+                            std::function<uint64_t(const StringRef &)> getLoadAddress,
+                            std::function<void *(void *)> lookupWriteAddress);
 
-    JIT.NotifyFinalizer(H, *Object, *LO);
-    ORCNotifyObjectEmitted(JuliaListener.get(), *Object, *LO, JIT.MemMgr.get());
+#ifdef JL_USE_JITLINK
 
-    // record all of the exported symbols defined in this object
-    // in the primary hash table for the enclosing JIT
-    for (auto &Symbol : Object->symbols()) {
-#if JL_LLVM_VERSION >= 110000
-        uint32_t Flags = Symbol.getFlags().get();
-#else
-        uint32_t Flags = Symbol.getFlags();
-#endif
-        if (Flags & object::BasicSymbolRef::SF_Undefined)
-            continue;
-        if (!(Flags & object::BasicSymbolRef::SF_Exported))
-            continue;
-        auto NameOrError = Symbol.getName();
-        assert(NameOrError);
-        auto Name = NameOrError.get();
-        auto Sym = JIT.CompileLayer.findSymbolIn(H, Name.str(), true);
-        assert(Sym);
-        // note: calling getAddress here eagerly finalizes H
-        // as an alternative, we could store the JITSymbol instead
-        // (which would present a lazy-initializer functor interface instead)
-        JIT.LocalSymbolTable[Name] = (void*)(uintptr_t)cantFail(Sym.getAddress());
+namespace {
+
+using namespace llvm::orc;
+
+struct JITObjectInfo {
+    std::unique_ptr<MemoryBuffer> BackingBuffer;
+    std::unique_ptr<object::ObjectFile> Object;
+    StringMap<uint64_t> SectionLoadAddresses;
+};
+
+class JLDebuginfoPlugin : public ObjectLinkingLayer::Plugin {
+    std::map<MaterializationResponsibility *, std::unique_ptr<JITObjectInfo>> PendingObjs;
+    // Resources from distinct MaterializationResponsibilitys can get merged
+    // after emission, so we can have multiple debug objects per resource key.
+    std::map<ResourceKey, std::vector<std::unique_ptr<JITObjectInfo>>> RegisteredObjs;
+
+public:
+    void notifyMaterializing(MaterializationResponsibility &MR, jitlink::LinkGraph &G,
+                             jitlink::JITLinkContext &Ctx,
+                             MemoryBufferRef InputObject) override
+    {
+        // Keeping around a full copy of the input object file (and re-parsing it) is
+        // wasteful, but for now, this lets us reuse the existing debuginfo.cpp code.
+        // Should look into just directly pulling out all the information required in
+        // a JITLink pass and just keeping the required tables/DWARF sections around
+        // (perhaps using the LLVM DebuggerSupportPlugin as a reference).
+        auto NewBuffer =
+            MemoryBuffer::getMemBufferCopy(InputObject.getBuffer(), G.getName());
+        auto NewObj =
+            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
+
+        assert(PendingObjs.count(&MR) == 0);
+        PendingObjs[&MR] = std::unique_ptr<JITObjectInfo>(
+            new JITObjectInfo{std::move(NewBuffer), std::move(NewObj), {}});
     }
-}
 
-template <typename ObjSetT, typename LoadResult>
-void JuliaOJIT::DebugObjectRegistrar::operator()(RTDyldObjHandleT H,
-                const ObjSetT &Object, const LoadResult &LOS)
-{
-    registerObject(H, Object,
-                   static_cast<const RuntimeDyld::LoadedObjectInfo*>(&LOS));
-}
+    Error notifyEmitted(MaterializationResponsibility &MR) override
+    {
+        auto It = PendingObjs.find(&MR);
+        if (It == PendingObjs.end())
+            return Error::success();
+
+        auto NewInfo = PendingObjs[&MR].get();
+        auto getLoadAddress = [NewInfo](const StringRef &Name) -> uint64_t {
+            auto result = NewInfo->SectionLoadAddresses.find(Name);
+            if (result == NewInfo->SectionLoadAddresses.end()) {
+                LLVM_DEBUG({
+                    dbgs() << "JLDebuginfoPlugin: No load address found for section '"
+                           << Name << "'\n";
+                });
+                return 0;
+            }
+            return result->second;
+        };
 
-CodeGenOpt::Level CodeGenOptLevelFor(int optlevel)
-{
-#ifdef DISABLE_OPT
-    return CodeGenOpt::None;
+        jl_register_jit_object(*NewInfo->Object, getLoadAddress, nullptr);
+
+        cantFail(MR.withResourceKeyDo([&](ResourceKey K) {
+            RegisteredObjs[K].push_back(std::move(PendingObjs[&MR]));
+            PendingObjs.erase(&MR);
+        }));
+
+        return Error::success();
+    }
+
+    Error notifyFailed(MaterializationResponsibility &MR) override
+    {
+        PendingObjs.erase(&MR);
+        return Error::success();
+    }
+
+    Error notifyRemovingResources(ResourceKey K) override
+    {
+        RegisteredObjs.erase(K);
+        // TODO: If we ever unload code, need to notify debuginfo registry.
+        return Error::success();
+    }
+
+    void notifyTransferringResources(ResourceKey DstKey, ResourceKey SrcKey) override
+    {
+        auto SrcIt = RegisteredObjs.find(SrcKey);
+        if (SrcIt != RegisteredObjs.end()) {
+            for (std::unique_ptr<JITObjectInfo> &Info : SrcIt->second)
+                RegisteredObjs[DstKey].push_back(std::move(Info));
+            RegisteredObjs.erase(SrcIt);
+        }
+    }
+
+    void modifyPassConfig(MaterializationResponsibility &MR, jitlink::LinkGraph &,
+                          jitlink::PassConfiguration &PassConfig) override
+    {
+        auto It = PendingObjs.find(&MR);
+        if (It == PendingObjs.end())
+            return;
+
+        JITObjectInfo &Info = *It->second;
+        PassConfig.PostAllocationPasses.push_back([&Info](jitlink::LinkGraph &G) -> Error {
+            for (const jitlink::Section &Sec : G.sections()) {
+                // Canonical JITLink section names have the segment name included, e.g.
+                // "__TEXT,__text" or "__DWARF,__debug_str". There are some special internal
+                // sections without a comma separator, which we can just ignore.
+                size_t SepPos = Sec.getName().find(',');
+                if (SepPos >= 16 || (Sec.getName().size() - (SepPos + 1) > 16)) {
+                    LLVM_DEBUG({
+                        dbgs() << "JLDebuginfoPlugin: Ignoring section '" << Sec.getName()
+                               << "'\n";
+                    });
+                    continue;
+                }
+                auto SecName = Sec.getName().substr(SepPos + 1);
+                // https://github.com/llvm/llvm-project/commit/118e953b18ff07d00b8f822dfbf2991e41d6d791
+#if JL_LLVM_VERSION >= 140000
+               Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart().getValue();
 #else
-    return optlevel < 2 ? CodeGenOpt::None :
-        optlevel == 2 ? CodeGenOpt::Default :
-        CodeGenOpt::Aggressive;
+               Info.SectionLoadAddresses[SecName] = jitlink::SectionRange(Sec).getStart();
 #endif
+            }
+            return Error::success();
+        });
+    }
+};
 }
 
-static void addPassesForOptLevel(legacy::PassManager &PM, TargetMachine &TM, raw_svector_ostream &ObjStream, MCContext *Ctx, int optlevel)
-{
-    addTargetPasses(&PM, &TM);
-    addOptimizationPasses(&PM, optlevel);
-    if (TM.addPassesToEmitMC(PM, Ctx, ObjStream))
-        llvm_unreachable("Target does not support MC emission.");
-}
+# ifdef LLVM_SHLIB
+
+#  if JL_LLVM_VERSION >= 140000
+#   define EHFRAME_RANGE(name) orc::ExecutorAddrRange name
+#   define UNPACK_EHFRAME_RANGE(name) \
+        name.Start.toPtr<uint8_t *>(), \
+        static_cast<size_t>(name.size())
+#  else
+#   define EHFRAME_RANGE(name) JITTargetAddress name##Addr, size_t name##Size
+#   define UNPACK_EHFRAME_RANGE(name) \
+        jitTargetAddressToPointer<uint8_t *>(name##Addr), \
+        name##Size
+#  endif
+
+class JLEHFrameRegistrar final : public jitlink::EHFrameRegistrar {
+public:
+    Error registerEHFrames(EHFRAME_RANGE(EHFrameSection)) override {
+        register_eh_frames(
+            UNPACK_EHFRAME_RANGE(EHFrameSection));
+        return Error::success();
+    }
+
+    Error deregisterEHFrames(EHFRAME_RANGE(EHFrameSection)) override {
+        deregister_eh_frames(
+            UNPACK_EHFRAME_RANGE(EHFrameSection));
+        return Error::success();
+    }
+};
+# endif
+
+#else // !JL_USE_JITLINK
 
-CompilerResultT JuliaOJIT::CompilerT::operator()(Module &M)
+RTDyldMemoryManager* createRTDyldMemoryManager(void);
+
+// A simple forwarding class, since OrcJIT v2 needs a unique_ptr, while we have a shared_ptr
+class ForwardingMemoryManager : public RuntimeDyld::MemoryManager {
+private:
+    std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr;
+
+public:
+    ForwardingMemoryManager(std::shared_ptr<RuntimeDyld::MemoryManager> MemMgr) : MemMgr(MemMgr) {}
+    virtual ~ForwardingMemoryManager() = default;
+    virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                     unsigned SectionID,
+                                     StringRef SectionName) override {
+        return MemMgr->allocateCodeSection(Size, Alignment, SectionID, SectionName);
+    }
+    virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                     unsigned SectionID,
+                                     StringRef SectionName,
+                                     bool IsReadOnly) override {
+        return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName, IsReadOnly);
+    }
+    virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+                                        uintptr_t RODataSize,
+                                        uint32_t RODataAlign,
+                                        uintptr_t RWDataSize,
+                                        uint32_t RWDataAlign) override {
+        return MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign, RWDataSize, RWDataAlign);
+    }
+    virtual bool needsToReserveAllocationSpace() override {
+        return MemMgr->needsToReserveAllocationSpace();
+    }
+    virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+                                  size_t Size) override {
+        return MemMgr->registerEHFrames(Addr, LoadAddr, Size);
+    }
+    virtual void deregisterEHFrames() override {
+        return MemMgr->deregisterEHFrames();
+    }
+    virtual bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+        return MemMgr->finalizeMemory(ErrMsg);
+    }
+    virtual void notifyObjectLoaded(RuntimeDyld &RTDyld,
+                                    const object::ObjectFile &Obj) override {
+        return MemMgr->notifyObjectLoaded(RTDyld, Obj);
+    }
+};
+
+
+#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
+void *lookupWriteAddressFor(RTDyldMemoryManager *MemMgr, void *rt_addr);
+#endif
+
+void registerRTDyldJITObject(const object::ObjectFile &Object,
+                             const RuntimeDyld::LoadedObjectInfo &L,
+                             const std::shared_ptr<RTDyldMemoryManager> &MemMgr)
 {
-    JL_TIMING(LLVM_OPT);
-    int optlevel;
-    if (jl_generating_output()) {
-        optlevel = 0;
+    auto SavedObject = L.getObjectForDebug(Object).takeBinary();
+    // If the debug object is unavailable, save (a copy of) the original object
+    // for our backtraces.
+    // This copy seems unfortunate, but there doesn't seem to be a way to take
+    // ownership of the original buffer.
+    if (!SavedObject.first) {
+        auto NewBuffer =
+            MemoryBuffer::getMemBufferCopy(Object.getData(), Object.getFileName());
+        auto NewObj =
+            cantFail(object::ObjectFile::createObjectFile(NewBuffer->getMemBufferRef()));
+        SavedObject = std::make_pair(std::move(NewObj), std::move(NewBuffer));
     }
-    else {
-        optlevel = jl_options.opt_level;
-        for (auto &F : M.functions()) {
-            if (!F.getBasicBlockList().empty()) {
-                Attribute attr = F.getFnAttribute("julia-optimization-level");
-                StringRef val = attr.getValueAsString();
-                if (val != "") {
-                    int ol = (int)val[0] - '0';
-                    if (ol >= 0 && ol < optlevel)
-                        optlevel = ol;
-                }
+    const object::ObjectFile *DebugObj = SavedObject.first.release();
+    SavedObject.second.release();
+
+    StringMap<object::SectionRef> loadedSections;
+    // Use the original Object, not the DebugObject, as this is used for the
+    // RuntimeDyld::LoadedObjectInfo lookup.
+    for (const object::SectionRef &lSection : Object.sections()) {
+        auto sName = lSection.getName();
+        if (sName) {
+            bool inserted = loadedSections.insert(std::make_pair(*sName, lSection)).second;
+            assert(inserted);
+            (void)inserted;
+        }
+    }
+    auto getLoadAddress = [loadedSections = std::move(loadedSections),
+                           &L](const StringRef &sName) -> uint64_t {
+        auto search = loadedSections.find(sName);
+        if (search == loadedSections.end())
+            return 0;
+        return L.getSectionLoadAddress(search->second);
+    };
+
+    jl_register_jit_object(*DebugObj, getLoadAddress,
+#if defined(_OS_WINDOWS_) && defined(_CPU_X86_64_)
+        [MemMgr](void *p) { return lookupWriteAddressFor(MemMgr.get(), p); }
+#else
+        nullptr
+#endif
+    );
+}
+#endif
+namespace {
+    std::unique_ptr<TargetMachine> createTargetMachine() {
+
+        TargetOptions options = TargetOptions();
+#if defined(_OS_WINDOWS_)
+        // use ELF because RuntimeDyld COFF i686 support didn't exist
+        // use ELF because RuntimeDyld COFF X86_64 doesn't seem to work (fails to generate function pointers)?
+#define FORCE_ELF
+#endif
+        //options.PrintMachineCode = true; //Print machine code produced during JIT compiling
+#if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_) && JL_LLVM_VERSION < 130000
+        // tell Win32 to assume the stack is always 16-byte aligned,
+        // and to ensure that it is 16-byte aligned for out-going calls,
+        // to ensure compatibility with GCC codes
+        // In LLVM 13 and onwards this has turned into a module option
+        options.StackAlignmentOverride = 16;
+#endif
+#if defined(JL_DEBUG_BUILD) && JL_LLVM_VERSION < 130000
+        // LLVM defaults to tls stack guard, which causes issues with Julia's tls implementation
+        options.StackProtectorGuard = StackProtectorGuards::Global;
+#endif
+        Triple TheTriple(sys::getProcessTriple());
+#if defined(FORCE_ELF)
+        TheTriple.setObjectFormat(Triple::ELF);
+#endif
+        uint32_t target_flags = 0;
+        auto target = jl_get_llvm_target(imaging_default(), target_flags);
+        auto &TheCPU = target.first;
+        SmallVector<std::string, 10> targetFeatures(target.second.begin(), target.second.end());
+        std::string errorstr;
+        const Target *TheTarget = TargetRegistry::lookupTarget("", TheTriple, errorstr);
+        if (!TheTarget)
+            jl_errorf("%s", errorstr.c_str());
+        if (jl_processor_print_help || (target_flags & JL_TARGET_UNKNOWN_NAME)) {
+            std::unique_ptr<MCSubtargetInfo> MSTI(
+                TheTarget->createMCSubtargetInfo(TheTriple.str(), "", ""));
+            if (!MSTI->isCPUStringValid(TheCPU))
+                jl_errorf("Invalid CPU name \"%s\".", TheCPU.c_str());
+            if (jl_processor_print_help) {
+                // This is the only way I can find to print the help message once.
+                // It'll be nice if we can iterate through the features and print our own help
+                // message...
+                MSTI->setDefaultFeatures("help", "", "");
             }
         }
+        // Package up features to be passed to target/subtarget
+        std::string FeaturesStr;
+        if (!targetFeatures.empty()) {
+            SubtargetFeatures Features;
+            for (unsigned i = 0; i != targetFeatures.size(); ++i)
+                Features.AddFeature(targetFeatures[i]);
+            FeaturesStr = Features.getString();
+        }
+        // Allocate a target...
+        Optional<CodeModel::Model> codemodel =
+#ifdef _P64
+            // Make sure we are using the large code model on 64bit
+            // Let LLVM pick a default suitable for jitting on 32bit
+            CodeModel::Large;
+#else
+            None;
+#endif
+        auto optlevel = CodeGenOptLevelFor(jl_options.opt_level);
+        auto TM = TheTarget->createTargetMachine(
+                TheTriple.getTriple(), TheCPU, FeaturesStr,
+                options,
+                Reloc::Static, // Generate simpler code for JIT
+                codemodel,
+                optlevel,
+                true // JIT
+                );
+        assert(TM && "Failed to select target machine -"
+                                " Is the LLVM backend for this CPU enabled?");
+        #if (!defined(_CPU_ARM_) && !defined(_CPU_PPC64_))
+        // FastISel seems to be buggy for ARM. Ref #13321
+        if (jl_options.opt_level < 2)
+            TM->setFastISel(true);
+        #endif
+        return std::unique_ptr<TargetMachine>(TM);
     }
-    if (optlevel == 0)
-        jit.PM0.run(M);
-    else if (optlevel == 1)
-        jit.PM1.run(M);
-    else if (optlevel == 2)
-        jit.PM2.run(M);
-    else if (optlevel >= 3)
-        jit.PM3.run(M);
-
-    std::unique_ptr<MemoryBuffer> ObjBuffer(
-        new SmallVectorMemoryBuffer(std::move(jit.ObjBufferSV)));
-    auto Obj = object::ObjectFile::createObjectFile(ObjBuffer->getMemBufferRef());
-
-    if (!Obj) {
-        llvm_dump(&M);
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(Obj.takeError(), OS, "");
-        OS.flush();
-        llvm::report_fatal_error("FATAL: Unable to compile LLVM Module: '" + Buf + "'\n"
-                                 "The module's content was printed above. Please file a bug report");
+} // namespace
+
+namespace {
+    orc::JITTargetMachineBuilder createJTMBFromTM(TargetMachine &TM, int optlevel) {
+        return orc::JITTargetMachineBuilder(TM.getTargetTriple())
+        .setCPU(TM.getTargetCPU().str())
+        .setFeatures(TM.getTargetFeatureString())
+        .setOptions(TM.Options)
+        .setRelocationModel(Reloc::Static)
+        .setCodeModel(TM.getCodeModel())
+        .setCodeGenOptLevel(CodeGenOptLevelFor(optlevel));
     }
 
-    return CompilerResultT(std::move(ObjBuffer));
+    struct TMCreator {
+        orc::JITTargetMachineBuilder JTMB;
+
+        TMCreator(TargetMachine &TM, int optlevel) : JTMB(createJTMBFromTM(TM, optlevel)) {}
+
+        std::unique_ptr<TargetMachine> operator()() {
+            return cantFail(JTMB.createTargetMachine());
+        }
+    };
+
+    struct PMCreator {
+        std::unique_ptr<TargetMachine> TM;
+        int optlevel;
+        PMCreator(TargetMachine &TM, int optlevel) : TM(cantFail(createJTMBFromTM(TM, optlevel).createTargetMachine())), optlevel(optlevel) {}
+        PMCreator(const PMCreator &other) : PMCreator(*other.TM, other.optlevel) {}
+        PMCreator(PMCreator &&other) : TM(std::move(other.TM)), optlevel(other.optlevel) {}
+        friend void swap(PMCreator &self, PMCreator &other) {
+            using std::swap;
+            swap(self.TM, other.TM);
+            swap(self.optlevel, other.optlevel);
+        }
+        PMCreator &operator=(PMCreator other) {
+            swap(*this, other);
+            return *this;
+        }
+        std::unique_ptr<legacy::PassManager> operator()() {
+            auto PM = std::make_unique<legacy::PassManager>();
+            addTargetPasses(PM.get(), TM->getTargetTriple(), TM->getTargetIRAnalysis());
+            addOptimizationPasses(PM.get(), optlevel);
+            addMachinePasses(PM.get(), optlevel);
+            return PM;
+        }
+    };
+
+    struct OptimizerT {
+        OptimizerT(TargetMachine &TM, int optlevel) : optlevel(optlevel), PMs(PMCreator(TM, optlevel)) {}
+
+        OptimizerResultT operator()(orc::ThreadSafeModule TSM, orc::MaterializationResponsibility &R) {
+            TSM.withModuleDo([&](Module &M) {
+                uint64_t start_time = 0;
+                {
+                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
+                    if (stream) {
+                        // Print LLVM function statistics _before_ optimization
+                        // Print all the information about this invocation as a YAML object
+                        jl_printf(stream, "- \n");
+                        // We print the name and some statistics for each function in the module, both
+                        // before optimization and again afterwards.
+                        jl_printf(stream, "  before: \n");
+                        for (auto &F : M.functions()) {
+                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                                continue;
+                            }
+                            // Each function is printed as a YAML object with several attributes
+                            jl_printf(stream, "    \"%s\":\n", F.getName().str().c_str());
+                            jl_printf(stream, "        instructions: %u\n", F.getInstructionCount());
+                            jl_printf(stream, "        basicblocks: %lu\n", countBasicBlocks(F));
+                        }
+
+                        start_time = jl_hrtime();
+                    }
+                }
+
+                JL_TIMING(LLVM_OPT);
+
+                //Run the optimization
+                (***PMs).run(M);
+
+                uint64_t end_time = 0;
+                {
+                    auto stream = *jl_ExecutionEngine->get_dump_llvm_opt_stream();
+                    if (stream) {
+                        end_time = jl_hrtime();
+                        jl_printf(stream, "  time_ns: %" PRIu64 "\n", end_time - start_time);
+                        jl_printf(stream, "  optlevel: %d\n", optlevel);
+
+                        // Print LLVM function statistics _after_ optimization
+                        jl_printf(stream, "  after: \n");
+                        for (auto &F : M.functions()) {
+                            if (F.isDeclaration() || F.getName().startswith("jfptr_")) {
+                                continue;
+                            }
+                            jl_printf(stream, "    \"%s\":\n", F.getName().str().c_str());
+                            jl_printf(stream, "        instructions: %u\n", F.getInstructionCount());
+                            jl_printf(stream, "        basicblocks: %lu\n", countBasicBlocks(F));
+                        }
+                    }
+                }
+            });
+            return Expected<orc::ThreadSafeModule>{std::move(TSM)};
+        }
+    private:
+        int optlevel;
+        JuliaOJIT::ResourcePool<std::unique_ptr<legacy::PassManager>> PMs;
+    };
+
+    struct CompilerT : orc::IRCompileLayer::IRCompiler {
+
+        CompilerT(orc::IRSymbolMapper::ManglingOptions MO, TargetMachine &TM, int optlevel)
+        : orc::IRCompileLayer::IRCompiler(MO), TMs(TMCreator(TM, optlevel)) {}
+
+        Expected<std::unique_ptr<MemoryBuffer>> operator()(Module &M) override {
+            return orc::SimpleCompiler(***TMs)(M);
+        }
+
+        JuliaOJIT::ResourcePool<std::unique_ptr<TargetMachine>> TMs;
+    };
 }
 
-JuliaOJIT::JuliaOJIT(TargetMachine &TM)
-  : TM(TM),
-    DL(TM.createDataLayout()),
-    ObjStream(ObjBufferSV),
-    MemMgr(createRTDyldMemoryManager()),
-    registrar(*this),
+llvm::DataLayout jl_create_datalayout(TargetMachine &TM) {
+    // Mark our address spaces as non-integral
+    auto jl_data_layout = TM.createDataLayout();
+    jl_data_layout.reset(jl_data_layout.getStringRepresentation() + "-ni:10:11:12:13");
+    return jl_data_layout;
+}
+
+JuliaOJIT::PipelineT::PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel)
+: CompileLayer(BaseLayer.getExecutionSession(), BaseLayer,
+    std::make_unique<CompilerT>(orc::irManglingOptionsFromTargetOptions(TM.Options), TM, optlevel)),
+  OptimizeLayer(CompileLayer.getExecutionSession(), CompileLayer, OptimizerT(TM, optlevel)) {}
+
+JuliaOJIT::JuliaOJIT()
+  : TM(createTargetMachine()),
+    DL(jl_create_datalayout(*TM)),
+#if JL_LLVM_VERSION >= 130000
+    ES(cantFail(orc::SelfExecutorProcessControl::Create())),
+#else
     ES(),
-    SymbolResolver(llvm::orc::createLegacyLookupResolver(
-          ES,
-          [this](StringRef name) -> llvm::JITSymbol {
-            return this->resolveSymbol(name);
-          },
-          [](llvm::Error Err) {
-            cantFail(std::move(Err), "resolveSymbol failed");
-          })),
+#endif
+    GlobalJD(ES.createBareJITDylib("JuliaGlobals")),
+    JD(ES.createBareJITDylib("JuliaOJIT")),
+    ContextPool([](){
+        auto ctx = std::make_unique<LLVMContext>();
+#ifdef JL_LLVM_OPAQUE_POINTERS
+        ctx->enableOpaquePointers();
+#endif
+        return orc::ThreadSafeContext(std::move(ctx));
+    }),
+#ifdef JL_USE_JITLINK
+    // TODO: Port our memory management optimisations to JITLink instead of using the
+    // default InProcessMemoryManager.
+# if JL_LLVM_VERSION < 140000
+    ObjectLayer(ES, std::make_unique<jitlink::InProcessMemoryManager>()),
+# else
+    ObjectLayer(ES, cantFail(jitlink::InProcessMemoryManager::Create())),
+# endif
+#else
+    MemMgr(createRTDyldMemoryManager()),
     ObjectLayer(
-        AcknowledgeORCv1Deprecation,
-        ES,
-        [this](RTDyldObjHandleT) {
-                      ObjLayerT::Resources result;
-                      result.MemMgr = MemMgr;
-                      result.Resolver = SymbolResolver;
-                      return result;
-                    },
-        std::ref(registrar)
+            ES,
+            [this]() {
+                std::unique_ptr<RuntimeDyld::MemoryManager> result(new ForwardingMemoryManager(MemMgr));
+                return result;
+            }
         ),
-    CompileLayer(
-            AcknowledgeORCv1Deprecation,
-            ObjectLayer,
-            CompilerT(this)
-        )
+#endif
+    Pipelines{
+        std::make_unique<PipelineT>(ObjectLayer, *TM, 0),
+        std::make_unique<PipelineT>(ObjectLayer, *TM, 1),
+        std::make_unique<PipelineT>(ObjectLayer, *TM, 2),
+        std::make_unique<PipelineT>(ObjectLayer, *TM, 3),
+    },
+    OptSelLayer(Pipelines)
 {
-    for (int i = 0; i < 4; i++) {
-        TMs[i] = TM.getTarget().createTargetMachine(TM.getTargetTriple().getTriple(), TM.getTargetCPU(),
-                TM.getTargetFeatureString(), TM.Options, Reloc::Static, TM.getCodeModel(),
-                CodeGenOptLevelFor(i), true);
-    }
-    addPassesForOptLevel(PM0, *TMs[0], ObjStream, Ctx, 0);
-    addPassesForOptLevel(PM1, *TMs[1], ObjStream, Ctx, 1);
-    addPassesForOptLevel(PM2, *TMs[2], ObjStream, Ctx, 2);
-    addPassesForOptLevel(PM3, *TMs[3], ObjStream, Ctx, 3);
+#ifdef JL_USE_JITLINK
+# if defined(_OS_DARWIN_) && defined(LLVM_SHLIB)
+    // When dynamically linking against LLVM, use our custom EH frame registration code
+    // also used with RTDyld to inform both our and the libc copy of libunwind.
+    auto ehRegistrar = std::make_unique<JLEHFrameRegistrar>();
+# else
+    auto ehRegistrar = std::make_unique<jitlink::InProcessEHFrameRegistrar>();
+# endif
+    ObjectLayer.addPlugin(std::make_unique<EHFrameRegistrationPlugin>(
+        ES, std::move(ehRegistrar)));
+
+    ObjectLayer.addPlugin(std::make_unique<JLDebuginfoPlugin>());
+#else
+    ObjectLayer.setNotifyLoaded(
+        [this](orc::MaterializationResponsibility &MR,
+               const object::ObjectFile &Object,
+               const RuntimeDyld::LoadedObjectInfo &LO) {
+            registerRTDyldJITObject(Object, LO, MemMgr);
+        });
+#endif
 
     // Make sure SectionMemoryManager::getSymbolAddressInProcess can resolve
     // symbols in the program as well. The nullptr argument to the function
     // tells DynamicLibrary to load the program, not a library.
     std::string ErrorStr;
     if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr, &ErrorStr))
-        report_fatal_error("FATAL: unable to dlopen self\n" + ErrorStr);
+        report_fatal_error(llvm::Twine("FATAL: unable to dlopen self\n") + ErrorStr);
+
+    GlobalJD.addGenerator(
+      cantFail(orc::DynamicLibrarySearchGenerator::GetForCurrentProcess(
+        DL.getGlobalPrefix())));
+
+    // Resolve non-lock free atomic functions in the libatomic1 library.
+    // This is the library that provides support for c11/c++11 atomic operations.
+    const char *const libatomic =
+#if defined(_OS_LINUX_) || defined(_OS_FREEBSD_)
+        "libatomic.so.1";
+#elif defined(_OS_WINDOWS_)
+        "libatomic-1.dll";
+#else
+        NULL;
+#endif
+    if (libatomic) {
+        static void *atomic_hdl = jl_load_dynamic_library(libatomic, JL_RTLD_LOCAL, 0);
+        if (atomic_hdl != NULL) {
+            GlobalJD.addGenerator(
+              cantFail(orc::DynamicLibrarySearchGenerator::Load(
+                  libatomic,
+                  DL.getGlobalPrefix(),
+                  [&](const orc::SymbolStringPtr &S) {
+                        const char *const atomic_prefix = "__atomic_";
+                        return (*S).startswith(atomic_prefix);
+                  })));
+        }
+    }
+
+    JD.addToLinkOrder(GlobalJD, orc::JITDylibLookupFlags::MatchExportedSymbolsOnly);
 }
 
 void JuliaOJIT::addGlobalMapping(StringRef Name, uint64_t Addr)
 {
     std::string MangleName = getMangledName(Name);
-    bool successful = GlobalSymbolTable.insert(std::make_pair(MangleName, (void*)Addr)).second;
-    (void)successful;
-    assert(successful);
-}
-
-void *JuliaOJIT::getPointerToGlobalIfAvailable(StringRef S)
-{
-    SymbolTableT::const_iterator pos = GlobalSymbolTable.find(S);
-    if (pos != GlobalSymbolTable.end())
-        return pos->second;
-    return nullptr;
+    cantFail(JD.define(orc::absoluteSymbols({{ES.intern(MangleName), JITEvaluatedSymbol::fromPointer((void*)Addr)}})));
 }
 
-void *JuliaOJIT::getPointerToGlobalIfAvailable(const GlobalValue *GV)
+void JuliaOJIT::addModule(orc::ThreadSafeModule TSM)
 {
-    return getPointerToGlobalIfAvailable(getMangledName(GV));
-}
-
-
-void JuliaOJIT::addModule(std::unique_ptr<Module> M)
-{
-    std::vector<StringRef> NewExports;
-    for (auto &F : M->functions()) {
-        if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-            NewExports.push_back(strdup(F.getName().str().c_str()));
+    JL_TIMING(LLVM_MODULE_FINISH);
+    std::vector<std::string> NewExports;
+    TSM.withModuleDo([&](Module &M) {
+        jl_decorate_module(M);
+        shareStrings(M);
+        for (auto &F : M.global_values()) {
+            if (!F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
+                NewExports.push_back(getMangledName(F.getName()));
+            }
         }
-    }
-#ifndef JL_NDEBUG
-    // validate the relocations for M
-    for (Module::global_object_iterator I = M->global_objects().begin(), E = M->global_objects().end(); I != E; ) {
-        GlobalObject *F = &*I;
-        ++I;
-        if (F->isDeclaration()) {
-            if (F->use_empty())
-                F->eraseFromParent();
-            else if (!((isa<Function>(F) && isIntrinsicFunction(cast<Function>(F))) ||
-                       findUnmangledSymbol(F->getName()) ||
-                       SectionMemoryManager::getSymbolAddressInProcess(
-                           getMangledName(F->getName())))) {
-                llvm::errs() << "FATAL ERROR: "
-                             << "Symbol \"" << F->getName().str() << "\""
-                             << "not found";
-                abort();
+#if !defined(JL_NDEBUG) && !defined(JL_USE_JITLINK)
+        // validate the relocations for M (not implemented for the JITLink memory manager yet)
+        for (Module::global_object_iterator I = M.global_objects().begin(), E = M.global_objects().end(); I != E; ) {
+            GlobalObject *F = &*I;
+            ++I;
+            if (F->isDeclaration()) {
+                if (F->use_empty())
+                    F->eraseFromParent();
+                else if (!((isa<Function>(F) && isIntrinsicFunction(cast<Function>(F))) ||
+                        findUnmangledSymbol(F->getName()) ||
+                        SectionMemoryManager::getSymbolAddressInProcess(
+                            getMangledName(F->getName())))) {
+                    llvm::errs() << "FATAL ERROR: "
+                                << "Symbol \"" << F->getName().str() << "\""
+                                << "not found";
+                    abort();
+                }
             }
         }
-    }
 #endif
-    JL_TIMING(LLVM_MODULE_FINISH);
-
-    auto key = ES.allocateVModule();
-    cantFail(CompileLayer.addModule(key, std::move(M)));
-    // Force LLVM to emit the module so that we can register the symbols
-    // in our lookup table.
-    Error Err = CompileLayer.emitAndFinalize(key);
-    // Check for errors to prevent LLVM from crashing the program.
-    if (Err)
-        report_fatal_error(std::move(Err));
-    // record a stable name for this fptr address
-    for (auto Name : NewExports) {
-        void *addr = LocalSymbolTable[getMangledName(Name)];
-        ReverseLocalSymbolTable[addr] = Name;
-    }
-}
+    });
+    // TODO: what is the performance characteristics of this?
+    cantFail(OptSelLayer.add(JD, std::move(TSM)));
+    // force eager compilation (for now), due to memory management specifics
+    // (can't handle compilation recursion)
+    for (auto Name : NewExports)
+        cantFail(ES.lookup({&JD}, Name));
 
-void JuliaOJIT::removeModule(ModuleHandleT H)
-{
-    (void)CompileLayer.removeModule(H);
 }
 
 JL_JITSymbol JuliaOJIT::findSymbol(StringRef Name, bool ExportedSymbolsOnly)
 {
-    void *Addr = nullptr;
-    if (ExportedSymbolsOnly) {
-        // Step 1: Check against list of known external globals
-        Addr = getPointerToGlobalIfAvailable(Name);
-    }
-    // Step 2: Search all previously emitted symbols
-    if (Addr == nullptr) {
-        auto it = LocalSymbolTable.find(Name);
-        if (it != LocalSymbolTable.end())
-            Addr = it->second;
-    }
-    return JL_JITSymbol((uintptr_t)Addr, JITSymbolFlags::Exported);
+    orc::JITDylib* SearchOrders[2] = {&GlobalJD, &JD};
+    ArrayRef<orc::JITDylib*> SearchOrder = makeArrayRef(&SearchOrders[ExportedSymbolsOnly ? 0 : 1], ExportedSymbolsOnly ? 2 : 1);
+    auto Sym = ES.lookup(SearchOrder, Name);
+    if (Sym)
+        return *Sym;
+    return Sym.takeError();
 }
 
 JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name)
@@ -695,90 +1160,92 @@ JL_JITSymbol JuliaOJIT::findUnmangledSymbol(StringRef Name)
     return findSymbol(getMangledName(Name), true);
 }
 
-JL_JITSymbol JuliaOJIT::resolveSymbol(StringRef Name)
-{
-    // Step 0: ObjectLinkingLayer has checked whether it is in the current module
-    // Step 1: See if it's something known to the ExecutionEngine
-    if (auto Sym = findSymbol(Name, true)) {
-        // `findSymbol` already eagerly resolved the address
-        // return it directly.
-        return Sym;
-    }
-    // Step 2: Search the program symbols
-    if (uint64_t addr = SectionMemoryManager::getSymbolAddressInProcess(Name.str()))
-        return JL_SymbolInfo(addr, JITSymbolFlags::Exported);
-#if defined(_OS_LINUX_) || defined(_OS_WINDOWS_) || defined(_OS_FREEBSD_)
-    if (uint64_t addr = resolve_atomic(Name.str().c_str()))
-        return JL_SymbolInfo(addr, JITSymbolFlags::Exported);
-#endif
-    // Return failure code
-    return JL_SymbolInfo(nullptr);
-}
-
 uint64_t JuliaOJIT::getGlobalValueAddress(StringRef Name)
 {
-    auto addr = findSymbol(getMangledName(Name), false).getAddress();
-    return addr ? addr.get() : 0;
+    auto addr = findSymbol(getMangledName(Name), false);
+    if (!addr) {
+        consumeError(addr.takeError());
+        return 0;
+    }
+    return cantFail(addr.getAddress());
 }
 
 uint64_t JuliaOJIT::getFunctionAddress(StringRef Name)
 {
-    auto addr = findSymbol(getMangledName(Name), false).getAddress();
-    return addr ? addr.get() : 0;
+    auto addr = findSymbol(getMangledName(Name), false);
+    if (!addr) {
+        consumeError(addr.takeError());
+        return 0;
+    }
+    return cantFail(addr.getAddress());
 }
 
-static int globalUniqueGeneratedNames;
 StringRef JuliaOJIT::getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst)
 {
-    auto &fname = ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
-    if (fname.empty()) {
+    std::lock_guard<std::mutex> lock(RLST_mutex);
+    std::string *fname = &ReverseLocalSymbolTable[(void*)(uintptr_t)Addr];
+    if (fname->empty()) {
         std::string string_fname;
         raw_string_ostream stream_fname(string_fname);
         // try to pick an appropriate name that describes it
-        if (Addr == (uintptr_t)codeinst->invoke) {
+        jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
+        if (Addr == (uintptr_t)invoke) {
             stream_fname << "jsysw_";
         }
-        else if (codeinst->invoke == &jl_fptr_args) {
+        else if (invoke == jl_fptr_args_addr) {
             stream_fname << "jsys1_";
         }
-        else if (codeinst->invoke == &jl_fptr_sparam) {
+        else if (invoke == jl_fptr_sparam_addr) {
             stream_fname << "jsys3_";
         }
         else {
             stream_fname << "jlsys_";
         }
         const char* unadorned_name = jl_symbol_name(codeinst->def->def.method->name);
-        stream_fname << unadorned_name << "_" << globalUniqueGeneratedNames++;
-        fname = strdup(stream_fname.str().c_str());
-        LocalSymbolTable[getMangledName(string_fname)] = (void*)(uintptr_t)Addr;
+        stream_fname << unadorned_name << "_" << RLST_inc++;
+        *fname = std::move(stream_fname.str()); // store to ReverseLocalSymbolTable
+        addGlobalMapping(*fname, Addr);
     }
-    return fname;
+    return *fname;
 }
 
 
-void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
+#ifdef JL_USE_JITLINK
+# if JL_LLVM_VERSION < 140000
+#  pragma message("JIT debugging (GDB integration) not available on LLVM < 14.0 (for JITLink)")
+void JuliaOJIT::enableJITDebuggingSupport() {}
+# else
+extern "C" orc::shared::CWrapperFunctionResult
+llvm_orc_registerJITLoaderGDBAllocAction(const char *Data, size_t Size);
+
+void JuliaOJIT::enableJITDebuggingSupport()
 {
-    if (!L)
-        return;
-    EventListeners.push_back(L);
+    // We do not use GDBJITDebugInfoRegistrationPlugin::Create, as the runtime name
+    // lookup is unnecessarily involved/fragile for our in-process JIT use case
+    // (with the llvm_orc_registerJITLoaderGDBAllocAction symbol being in either
+    // libjulia-codegen or yet another shared library for LLVM depending on the build
+    // flags, etc.).
+    const auto Addr = ExecutorAddr::fromPtr(&llvm_orc_registerJITLoaderGDBAllocAction);
+    ObjectLayer.addPlugin(std::make_unique<orc::GDBJITDebugInfoRegistrationPlugin>(Addr));
 }
-
-void JuliaOJIT::NotifyFinalizer(RTDyldObjHandleT Key,
-                                const object::ObjectFile &Obj,
-                                const RuntimeDyld::LoadedObjectInfo &LoadedObjectInfo)
+# endif
+#else
+void JuliaOJIT::enableJITDebuggingSupport()
 {
-    for (auto &Listener : EventListeners)
-        Listener->notifyObjectLoaded(Key, Obj, LoadedObjectInfo);
+    RegisterJITEventListener(JITEventListener::createGDBRegistrationListener());
 }
 
-const DataLayout& JuliaOJIT::getDataLayout() const
+void JuliaOJIT::RegisterJITEventListener(JITEventListener *L)
 {
-    return DL;
+    if (!L)
+        return;
+    this->ObjectLayer.registerJITEventListener(*L);
 }
+#endif
 
-const Triple& JuliaOJIT::getTargetTriple() const
+const DataLayout& JuliaOJIT::getDataLayout() const
 {
-    return TM.getTargetTriple();
+    return DL;
 }
 
 std::string JuliaOJIT::getMangledName(StringRef Name)
@@ -793,6 +1260,21 @@ std::string JuliaOJIT::getMangledName(const GlobalValue *GV)
     return getMangledName(GV->getName());
 }
 
+#ifdef JL_USE_JITLINK
+size_t JuliaOJIT::getTotalBytes() const
+{
+    // TODO: Implement in future custom JITLink memory manager.
+    return 0;
+}
+#else
+size_t getRTDyldMemoryManagerTotalBytes(RTDyldMemoryManager *mm);
+
+size_t JuliaOJIT::getTotalBytes() const
+{
+    return getRTDyldMemoryManagerTotalBytes(MemMgr.get());
+}
+#endif
+
 JuliaOJIT *jl_ExecutionEngine;
 
 // destructively move the contents of src into dest
@@ -800,113 +1282,120 @@ JuliaOJIT *jl_ExecutionEngine;
 // including the DataLayout and ModuleFlags (for example)
 // and that there is no module-level assembly
 // Comdat is also removed, since the JIT doesn't need it
-void jl_merge_module(Module *dest, std::unique_ptr<Module> src)
+void jl_merge_module(orc::ThreadSafeModule &destTSM, orc::ThreadSafeModule srcTSM)
 {
-    assert(dest != src.get());
-    for (Module::global_iterator I = src->global_begin(), E = src->global_end(); I != E;) {
-        GlobalVariable *sG = &*I;
-        GlobalVariable *dG = cast_or_null<GlobalVariable>(dest->getNamedValue(sG->getName()));
-        ++I;
-        // Replace a declaration with the definition:
-        if (dG) {
-            if (sG->isDeclaration()) {
-                sG->replaceAllUsesWith(dG);
-                sG->eraseFromParent();
-                continue;
-            }
-            //// If we start using llvm.used, we need to enable and test this
-            //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && sG->hasAppendingLinkage()) {
-            //    auto *dCA = cast<ConstantArray>(dG->getInitializer());
-            //    auto *sCA = cast<ConstantArray>(sG->getInitializer());
-            //    SmallVector<Constant *, 16> Init;
-            //    for (auto &Op : dCA->operands())
-            //        Init.push_back(cast_or_null<Constant>(Op));
-            //    for (auto &Op : sCA->operands())
-            //        Init.push_back(cast_or_null<Constant>(Op));
-            //    Type *Int8PtrTy = Type::getInt8PtrTy(dest.getContext());
-            //    ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
-            //    GlobalVariable *GV = new GlobalVariable(dest, ATy, dG->isConstant(),
-            //            GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init), "",
-            //            dG->getThreadLocalMode(), dG->getType()->getAddressSpace());
-            //    GV->copyAttributesFrom(dG);
-            //    sG->replaceAllUsesWith(GV);
-            //    dG->replaceAllUsesWith(GV);
-            //    GV->takeName(sG);
-            //    sG->eraseFromParent();
-            //    dG->eraseFromParent();
-            //    continue;
-            //}
-            else {
-                assert(dG->isDeclaration() || (dG->getInitializer() == sG->getInitializer() &&
-                            dG->isConstant() && sG->isConstant()));
-                dG->replaceAllUsesWith(sG);
-                dG->eraseFromParent();
+    destTSM.withModuleDo([&](Module &dest) {
+        srcTSM.withModuleDo([&](Module &src) {
+            assert(&dest != &src && "Cannot merge module with itself!");
+            assert(&dest.getContext() == &src.getContext() && "Cannot merge modules with different contexts!");
+            assert(dest.getDataLayout() == src.getDataLayout() && "Cannot merge modules with different data layouts!");
+            assert(dest.getTargetTriple() == src.getTargetTriple() && "Cannot merge modules with different target triples!");
+
+            for (Module::global_iterator I = src.global_begin(), E = src.global_end(); I != E;) {
+                GlobalVariable *sG = &*I;
+                GlobalVariable *dG = cast_or_null<GlobalVariable>(dest.getNamedValue(sG->getName()));
+                ++I;
+                // Replace a declaration with the definition:
+                if (dG) {
+                    if (sG->isDeclaration()) {
+                        sG->replaceAllUsesWith(dG);
+                        sG->eraseFromParent();
+                        continue;
+                    }
+                    //// If we start using llvm.used, we need to enable and test this
+                    //else if (!dG->isDeclaration() && dG->hasAppendingLinkage() && sG->hasAppendingLinkage()) {
+                    //    auto *dCA = cast<ConstantArray>(dG->getInitializer());
+                    //    auto *sCA = cast<ConstantArray>(sG->getInitializer());
+                    //    SmallVector<Constant *, 16> Init;
+                    //    for (auto &Op : dCA->operands())
+                    //        Init.push_back(cast_or_null<Constant>(Op));
+                    //    for (auto &Op : sCA->operands())
+                    //        Init.push_back(cast_or_null<Constant>(Op));
+                    //    Type *Int8PtrTy = Type::getInt8PtrTy(dest.getContext());
+                    //    ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
+                    //    GlobalVariable *GV = new GlobalVariable(dest, ATy, dG->isConstant(),
+                    //            GlobalValue::AppendingLinkage, ConstantArray::get(ATy, Init), "",
+                    //            dG->getThreadLocalMode(), dG->getType()->getAddressSpace());
+                    //    GV->copyAttributesFrom(dG);
+                    //    sG->replaceAllUsesWith(GV);
+                    //    dG->replaceAllUsesWith(GV);
+                    //    GV->takeName(sG);
+                    //    sG->eraseFromParent();
+                    //    dG->eraseFromParent();
+                    //    continue;
+                    //}
+                    else {
+                        assert(dG->isDeclaration() || dG->getInitializer() == sG->getInitializer());
+                        dG->replaceAllUsesWith(sG);
+                        dG->eraseFromParent();
+                    }
+                }
+                // Reparent the global variable:
+                sG->removeFromParent();
+                dest.getGlobalList().push_back(sG);
+                // Comdat is owned by the Module
+                sG->setComdat(nullptr);
             }
-        }
-        // Reparent the global variable:
-        sG->removeFromParent();
-        dest->getGlobalList().push_back(sG);
-        // Comdat is owned by the Module
-        sG->setComdat(nullptr);
-    }
 
-    for (Module::iterator I = src->begin(), E = src->end(); I != E;) {
-        Function *sG = &*I;
-        Function *dG = cast_or_null<Function>(dest->getNamedValue(sG->getName()));
-        ++I;
-        // Replace a declaration with the definition:
-        if (dG) {
-            if (sG->isDeclaration()) {
-                sG->replaceAllUsesWith(dG);
-                sG->eraseFromParent();
-                continue;
-            }
-            else {
-                assert(dG->isDeclaration());
-                dG->replaceAllUsesWith(sG);
-                dG->eraseFromParent();
+            for (Module::iterator I = src.begin(), E = src.end(); I != E;) {
+                Function *sG = &*I;
+                Function *dG = cast_or_null<Function>(dest.getNamedValue(sG->getName()));
+                ++I;
+                // Replace a declaration with the definition:
+                if (dG) {
+                    if (sG->isDeclaration()) {
+                        sG->replaceAllUsesWith(dG);
+                        sG->eraseFromParent();
+                        continue;
+                    }
+                    else {
+                        assert(dG->isDeclaration());
+                        dG->replaceAllUsesWith(sG);
+                        dG->eraseFromParent();
+                    }
+                }
+                // Reparent the global variable:
+                sG->removeFromParent();
+                dest.getFunctionList().push_back(sG);
+                // Comdat is owned by the Module
+                sG->setComdat(nullptr);
             }
-        }
-        // Reparent the global variable:
-        sG->removeFromParent();
-        dest->getFunctionList().push_back(sG);
-        // Comdat is owned by the Module
-        sG->setComdat(nullptr);
-    }
 
-    for (Module::alias_iterator I = src->alias_begin(), E = src->alias_end(); I != E;) {
-        GlobalAlias *sG = &*I;
-        GlobalAlias *dG = cast_or_null<GlobalAlias>(dest->getNamedValue(sG->getName()));
-        ++I;
-        if (dG) {
-            if (!dG->isDeclaration()) { // aliases are always definitions, so this test is reversed from the above two
-                sG->replaceAllUsesWith(dG);
-                sG->eraseFromParent();
-                continue;
-            }
-            else {
-                dG->replaceAllUsesWith(sG);
-                dG->eraseFromParent();
+            for (Module::alias_iterator I = src.alias_begin(), E = src.alias_end(); I != E;) {
+                GlobalAlias *sG = &*I;
+                GlobalAlias *dG = cast_or_null<GlobalAlias>(dest.getNamedValue(sG->getName()));
+                ++I;
+                if (dG) {
+                    if (!dG->isDeclaration()) { // aliases are always definitions, so this test is reversed from the above two
+                        sG->replaceAllUsesWith(dG);
+                        sG->eraseFromParent();
+                        continue;
+                    }
+                    else {
+                        dG->replaceAllUsesWith(sG);
+                        dG->eraseFromParent();
+                    }
+                }
+                sG->removeFromParent();
+                dest.getAliasList().push_back(sG);
             }
-        }
-        sG->removeFromParent();
-        dest->getAliasList().push_back(sG);
-    }
 
-    // metadata nodes need to be explicitly merged not just copied
-    // so there are special passes here for each known type of metadata
-    NamedMDNode *sNMD = src->getNamedMetadata("llvm.dbg.cu");
-    if (sNMD) {
-        NamedMDNode *dNMD = dest->getOrInsertNamedMetadata("llvm.dbg.cu");
-        for (NamedMDNode::op_iterator I = sNMD->op_begin(), E = sNMD->op_end(); I != E; ++I) {
-            dNMD->addOperand(*I);
-        }
-    }
+            // metadata nodes need to be explicitly merged not just copied
+            // so there are special passes here for each known type of metadata
+            NamedMDNode *sNMD = src.getNamedMetadata("llvm.dbg.cu");
+            if (sNMD) {
+                NamedMDNode *dNMD = dest.getOrInsertNamedMetadata("llvm.dbg.cu");
+                for (NamedMDNode::op_iterator I = sNMD->op_begin(), E = sNMD->op_end(); I != E; ++I) {
+                    dNMD->addOperand(*I);
+                }
+            }
+        });
+    });
 }
 
 // optimize memory by turning long strings into memoized copies, instead of
 // making a copy per object file of output.
-void jl_jit_share_data(Module &M)
+void JuliaOJIT::shareStrings(Module &M)
 {
     std::vector<GlobalVariable*> erase;
     for (auto &GV : M.globals()) {
@@ -919,7 +1408,7 @@ void jl_jit_share_data(Module &M)
         if (data.size() > 16) { // only for long strings: keep short ones as values
             Type *T_size = Type::getIntNTy(GV.getContext(), sizeof(void*) * 8);
             Constant *v = ConstantExpr::getIntToPtr(
-                ConstantInt::get(T_size, (uintptr_t)data.data()),
+                ConstantInt::get(T_size, (uintptr_t)(*ES.intern(data)).data()),
                 GV.getType());
             GV.replaceAllUsesWith(v);
             erase.push_back(&GV);
@@ -929,34 +1418,62 @@ void jl_jit_share_data(Module &M)
         GV->eraseFromParent();
 }
 
-static void jl_add_to_ee(std::unique_ptr<Module> m)
+//TargetMachine pass-through methods
+
+std::unique_ptr<TargetMachine> JuliaOJIT::cloneTargetMachine() const
 {
-    JL_TIMING(LLVM_EMIT);
+    return std::unique_ptr<TargetMachine>(getTarget()
+        .createTargetMachine(
+            getTargetTriple().str(),
+            getTargetCPU(),
+            getTargetFeatureString(),
+            getTargetOptions(),
+            TM->getRelocationModel(),
+            TM->getCodeModel(),
+            TM->getOptLevel()));
+}
+
+const Triple& JuliaOJIT::getTargetTriple() const {
+    return TM->getTargetTriple();
+}
+StringRef JuliaOJIT::getTargetFeatureString() const {
+    return TM->getTargetFeatureString();
+}
+StringRef JuliaOJIT::getTargetCPU() const {
+    return TM->getTargetCPU();
+}
+const TargetOptions &JuliaOJIT::getTargetOptions() const {
+    return TM->Options;
+}
+const Target &JuliaOJIT::getTarget() const {
+    return TM->getTarget();
+}
+TargetIRAnalysis JuliaOJIT::getTargetIRAnalysis() const {
+    return TM->getTargetIRAnalysis();
+}
+
+static void jl_decorate_module(Module &M) {
 #if defined(_CPU_X86_64_) && defined(_OS_WINDOWS_)
     // Add special values used by debuginfo to build the UnwindData table registration for Win64
-    Type *T_uint32 = Type::getInt32Ty(m->getContext());
-    ArrayType *atype = ArrayType::get(T_uint32, 3); // want 4-byte alignment of 12-bytes of data
+    ArrayType *atype = ArrayType::get(Type::getInt32Ty(M.getContext()), 3); // want 4-byte alignment of 12-bytes of data
     GlobalVariable *gvs[2] = {
-        new GlobalVariable(*m, atype,
+        new GlobalVariable(M, atype,
             false, GlobalVariable::InternalLinkage,
             ConstantAggregateZero::get(atype), "__UnwindData"),
-        new GlobalVariable(*m, atype,
+        new GlobalVariable(M, atype,
             false, GlobalVariable::InternalLinkage,
             ConstantAggregateZero::get(atype), "__catchjmp") };
     gvs[0]->setSection(".text");
     gvs[1]->setSection(".text");
-    appendToUsed(*m, makeArrayRef((GlobalValue**)gvs, 2));
+    appendToCompilerUsed(M, makeArrayRef((GlobalValue**)gvs, 2));
 #endif
-    jl_jit_share_data(*m);
-    assert(jl_ExecutionEngine);
-    jl_ExecutionEngine->addModule(std::move(m));
 }
 
 static int jl_add_to_ee(
-        std::unique_ptr<Module> &M,
-        StringMap<std::unique_ptr<Module>*> &NewExports,
-        DenseMap<Module*, int> &Queued,
-        std::vector<std::vector<std::unique_ptr<Module>*>> &ToMerge,
+        orc::ThreadSafeModule &M,
+        StringMap<orc::ThreadSafeModule*> &NewExports,
+        DenseMap<orc::ThreadSafeModule*, int> &Queued,
+        std::vector<std::vector<orc::ThreadSafeModule*>> &ToMerge,
         int depth)
 {
     // DAG-sort (post-dominator) the compile to compute the minimum
@@ -965,7 +1482,7 @@ static int jl_add_to_ee(
         return 0;
     // First check and record if it's on the stack somewhere
     {
-        auto &Cycle = Queued[M.get()];
+        auto &Cycle = Queued[&M];
         if (Cycle)
             return Cycle;
         ToMerge.push_back({});
@@ -973,37 +1490,39 @@ static int jl_add_to_ee(
     }
     int MergeUp = depth;
     // Compute the cycle-id
-    for (auto &F : M->global_objects()) {
-        if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
-            auto Callee = NewExports.find(F.getName());
-            if (Callee != NewExports.end()) {
-                auto &CM = Callee->second;
-                int Down = jl_add_to_ee(*CM, NewExports, Queued, ToMerge, depth + 1);
-                assert(Down <= depth);
-                if (Down && Down < MergeUp)
-                    MergeUp = Down;
+    M.withModuleDo([&](Module &m) {
+        for (auto &F : m.global_objects()) {
+            if (F.isDeclaration() && F.getLinkage() == GlobalValue::ExternalLinkage) {
+                auto Callee = NewExports.find(F.getName());
+                if (Callee != NewExports.end()) {
+                    auto &CM = Callee->second;
+                    int Down = jl_add_to_ee(*CM, NewExports, Queued, ToMerge, depth + 1);
+                    assert(Down <= depth);
+                    if (Down && Down < MergeUp)
+                        MergeUp = Down;
+                }
             }
         }
-    }
+    });
     if (MergeUp == depth) {
         // Not in a cycle (or at the top of it)
-        Queued.erase(M.get());
+        Queued.erase(&M);
         for (auto &CM : ToMerge.at(depth - 1)) {
-            assert(Queued.find(CM->get())->second == depth);
-            Queued.erase(CM->get());
-            jl_merge_module(M.get(), std::move(*CM));
+            assert(Queued.find(CM)->second == depth);
+            Queued.erase(CM);
+            jl_merge_module(M, std::move(*CM));
         }
-        jl_add_to_ee(std::move(M));
+        jl_ExecutionEngine->addModule(std::move(M));
         MergeUp = 0;
     }
     else {
         // Add our frame(s) to the top of the cycle
-        Queued[M.get()] = MergeUp;
+        Queued[&M] = MergeUp;
         auto &Top = ToMerge.at(MergeUp - 1);
         Top.push_back(&M);
         for (auto &CM : ToMerge.at(depth - 1)) {
-            assert(Queued.find(CM->get())->second == depth);
-            Queued[CM->get()] = MergeUp;
+            assert(Queued.find(CM)->second == depth);
+            Queued[CM] = MergeUp;
             Top.push_back(CM);
         }
     }
@@ -1011,10 +1530,10 @@ static int jl_add_to_ee(
     return MergeUp;
 }
 
-static void jl_add_to_ee(std::unique_ptr<Module> &M, StringMap<std::unique_ptr<Module>*> &NewExports)
+static void jl_add_to_ee(orc::ThreadSafeModule &M, StringMap<orc::ThreadSafeModule*> &NewExports)
 {
-    DenseMap<Module*, int> Queued;
-    std::vector<std::vector<std::unique_ptr<Module>*>> ToMerge;
+    DenseMap<orc::ThreadSafeModule*, int> Queued;
+    std::vector<std::vector<orc::ThreadSafeModule*>> ToMerge;
     jl_add_to_ee(M, NewExports, Queued, ToMerge, 1);
     assert(!M);
 }
@@ -1022,7 +1541,9 @@ static void jl_add_to_ee(std::unique_ptr<Module> &M, StringMap<std::unique_ptr<M
 
 static uint64_t getAddressForFunction(StringRef fname)
 {
-    return jl_ExecutionEngine->getFunctionAddress(fname);
+    auto addr = jl_ExecutionEngine->getFunctionAddress(fname);
+    assert(addr);
+    return addr;
 }
 
 // helper function for adding a DLLImport (dlsym) address to the execution engine
@@ -1030,3 +1551,9 @@ void add_named_global(StringRef name, void *addr)
 {
     jl_ExecutionEngine->addGlobalMapping(name, (uint64_t)(uintptr_t)addr);
 }
+
+extern "C" JL_DLLEXPORT
+size_t jl_jit_total_bytes_impl(void)
+{
+    return jl_ExecutionEngine->getTotalBytes();
+}
diff --git a/src/jitlayers.h b/src/jitlayers.h
index 8dd45c1f939f52..88129b65e8dd23 100644
--- a/src/jitlayers.h
+++ b/src/jitlayers.h
@@ -1,33 +1,98 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include <llvm/ADT/MapVector.h>
+
 #include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Value.h>
-#include "llvm/IR/LegacyPassManager.h"
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/LegacyPassManager.h>
 
-#include <llvm/ExecutionEngine/SectionMemoryManager.h>
-#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
-#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
-#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
-#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
-#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/JITEventListener.h"
+#include <llvm/ExecutionEngine/Orc/IRCompileLayer.h>
+#include <llvm/ExecutionEngine/Orc/IRTransformLayer.h>
+#include <llvm/ExecutionEngine/JITEventListener.h>
 
 #include <llvm/Target/TargetMachine.h>
 #include "julia_assert.h"
+#include "debug-registry.h"
+
+#include <stack>
+#include <queue>
+
+// As of LLVM 13, there are two runtime JIT linker implementations, the older
+// RuntimeDyld (used via orc::RTDyldObjectLinkingLayer) and the newer JITLink
+// (used via orc::ObjectLinkingLayer).
+//
+// JITLink is not only more flexible (which isn't of great importance for us, as
+// we do only single-threaded in-process codegen), but crucially supports using
+// the Small code model, where the linker needs to fix up relocations between
+// object files that end up far apart in address space. RuntimeDyld can't do
+// that and relies on the Large code model instead, which is broken on
+// aarch64-darwin (macOS on ARM64), and not likely to ever be supported there
+// (see https://bugs.llvm.org/show_bug.cgi?id=52029).
+//
+// However, JITLink is a relatively young library and lags behind in platform
+// and feature support (e.g. Windows, JITEventListeners for various profilers,
+// etc.). Thus, we currently only use JITLink where absolutely required, that is,
+// for Mac/aarch64.
+#if defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
+# if JL_LLVM_VERSION < 130000
+#  pragma message("On aarch64-darwin, LLVM version >= 13 is required for JITLink; fallback suffers from occasional segfaults")
+# endif
+# define JL_USE_JITLINK
+#endif
+
+#ifdef JL_USE_JITLINK
+# include <llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h>
+#else
+# include <llvm/ExecutionEngine/RTDyldMemoryManager.h>
+# include <llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h>
+#endif
 
 using namespace llvm;
 
-extern TargetMachine *jl_TargetMachine;
-extern bool imaging_mode;
+extern "C" jl_cgparams_t jl_default_cgparams;
 
-void addTargetPasses(legacy::PassManagerBase *PM, TargetMachine *TM);
-void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false);
-void jl_finalize_module(std::unique_ptr<Module>  m);
-void jl_merge_module(Module *dest, std::unique_ptr<Module> src);
-Module *jl_create_llvm_module(StringRef name);
+void addTargetPasses(legacy::PassManagerBase *PM, const Triple &triple, TargetIRAnalysis analysis);
+void addOptimizationPasses(legacy::PassManagerBase *PM, int opt_level, bool lower_intrinsics=true, bool dump_native=false, bool external_use=false);
+void addMachinePasses(legacy::PassManagerBase *PM, int optlevel);
+void jl_finalize_module(orc::ThreadSafeModule  m);
+void jl_merge_module(orc::ThreadSafeModule &dest, orc::ThreadSafeModule src);
 GlobalVariable *jl_emit_RTLD_DEFAULT_var(Module *M);
+DataLayout jl_create_datalayout(TargetMachine &TM);
+
+static inline bool imaging_default() {
+    return jl_options.image_codegen || (jl_generating_output() && !jl_options.incremental);
+}
+
+struct jl_locked_stream {
+    JL_STREAM *stream = nullptr;
+    std::mutex mutex;
+
+    struct lock {
+        std::unique_lock<std::mutex> lck;
+        JL_STREAM *&stream;
+
+        lock(std::mutex &mutex, JL_STREAM *&stream) : lck(mutex), stream(stream) {}
+
+        JL_STREAM *&operator*() {
+            return stream;
+        }
+
+        explicit operator bool() {
+            return !!stream;
+        }
+
+        operator JL_STREAM *() {
+            return stream;
+        }
+    };
+
+    lock operator*() {
+        return lock(mutex, stream);
+    }
+};
 
 typedef struct _jl_llvm_functions_t {
     std::string functionObject;     // jlcall llvm Function name
@@ -49,13 +114,19 @@ struct jl_returninfo_t {
     unsigned return_roots;
 };
 
-typedef std::vector<std::tuple<jl_code_instance_t*, jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool>> jl_codegen_call_targets_t;
-typedef std::tuple<std::unique_ptr<Module>, jl_llvm_functions_t> jl_compile_result_t;
+struct jl_llvmf_dump_t {
+    orc::ThreadSafeModule TSM;
+    Function *F;
+};
+
+typedef std::tuple<jl_returninfo_t::CallingConv, unsigned, llvm::Function*, bool> jl_codegen_call_target_t;
 
-typedef struct {
+typedef struct _jl_codegen_params_t {
+    orc::ThreadSafeContext tsctx;
+    orc::ThreadSafeContext::Lock tsctx_lock;
     typedef StringMap<GlobalVariable*> SymMapGV;
     // outputs
-    jl_codegen_call_targets_t workqueue;
+    std::vector<std::pair<jl_code_instance_t*, jl_codegen_call_target_t>> workqueue;
     std::map<void*, GlobalVariable*> globals;
     std::map<jl_datatype_t*, DIType*> ditypes;
     std::map<jl_datatype_t*, Type*> llvmtypes;
@@ -65,7 +136,8 @@ typedef struct {
     StringMap<std::pair<GlobalVariable*,SymMapGV>> libMapGV;
 #ifdef _OS_WINDOWS_
     SymMapGV symMapExe;
-    SymMapGV symMapDl;
+    SymMapGV symMapDll;
+    SymMapGV symMapDlli;
 #endif
     SymMapGV symMapDefault;
     // Map from distinct callee's to its GOT entry.
@@ -77,36 +149,40 @@ typedef struct {
     DenseMap<AttributeList, std::map<
         std::tuple<GlobalVariable*, FunctionType*, CallingConv::ID>,
         GlobalVariable*>> allPltMap;
-    Module *_shared_module = NULL;
-    Module *shared_module(LLVMContext &context) {
-        if (!_shared_module)
-            _shared_module = jl_create_llvm_module("globals");
-        return _shared_module;
-    }
+    orc::ThreadSafeModule _shared_module;
+    inline orc::ThreadSafeModule &shared_module(Module &from);
     // inputs
     size_t world = 0;
     const jl_cgparams_t *params = &jl_default_cgparams;
     bool cache = false;
+    bool imaging;
+    _jl_codegen_params_t(orc::ThreadSafeContext ctx) : tsctx(std::move(ctx)), tsctx_lock(tsctx.getLock()), imaging(imaging_default()) {}
 } jl_codegen_params_t;
 
-jl_compile_result_t jl_emit_code(
+jl_llvm_functions_t jl_emit_code(
+        orc::ThreadSafeModule &M,
         jl_method_instance_t *mi,
         jl_code_info_t *src,
         jl_value_t *jlrettype,
         jl_codegen_params_t &params);
 
-jl_compile_result_t jl_emit_codeinst(
+jl_llvm_functions_t jl_emit_codeinst(
+        orc::ThreadSafeModule &M,
         jl_code_instance_t *codeinst,
         jl_code_info_t *src,
         jl_codegen_params_t &params);
 
 enum CompilationPolicy {
     Default = 0,
-    Extern = 1
+    Extern = 1,
+    ImagingMode = 2
 };
 
+typedef std::map<jl_code_instance_t*, std::pair<orc::ThreadSafeModule, jl_llvm_functions_t>> jl_workqueue_t;
+
 void jl_compile_workqueue(
-    std::map<jl_code_instance_t*, jl_compile_result_t> &emitted,
+    jl_workqueue_t &emitted,
+    Module &original,
     jl_codegen_params_t &params,
     CompilationPolicy policy);
 
@@ -127,107 +203,264 @@ static inline Constant *literal_static_pointer_val(const void *p, Type *T)
 #endif
 }
 
-static const inline char *name_from_method_instance(jl_method_instance_t *li)
+static const inline char *name_from_method_instance(jl_method_instance_t *li) JL_NOTSAFEPOINT
 {
     return jl_is_method(li->def.method) ? jl_symbol_name(li->def.method->name) : "top-level scope";
 }
 
-
-void jl_init_jit(void);
-
 typedef JITSymbol JL_JITSymbol;
 // The type that is similar to SymbolInfo on LLVM 4.0 is actually
 // `JITEvaluatedSymbol`. However, we only use this type when a JITSymbol
 // is expected.
 typedef JITSymbol JL_SymbolInfo;
 
-using RTDyldObjHandleT = orc::VModuleKey;
-#if JL_LLVM_VERSION >= 100000
-using CompilerResultT = Expected<orc::LegacyRTDyldObjectLinkingLayerBase::ObjectPtr>;
-#else
-using CompilerResultT = std::unique_ptr<llvm::MemoryBuffer>;
-#endif
+using CompilerResultT = Expected<std::unique_ptr<llvm::MemoryBuffer>>;
+using OptimizerResultT = Expected<orc::ThreadSafeModule>;
 
 class JuliaOJIT {
-    // Custom object emission notification handler for the JuliaOJIT
-    class DebugObjectRegistrar {
-    public:
-        DebugObjectRegistrar(JuliaOJIT &JIT);
-        template <typename ObjSetT, typename LoadResult>
-        void operator()(RTDyldObjHandleT H, const ObjSetT &Object, const LoadResult &LOS);
-    private:
-        template <typename ObjT, typename LoadResult>
-        void registerObject(RTDyldObjHandleT H, const ObjT &Obj, const LoadResult &LO);
-        std::unique_ptr<JITEventListener> JuliaListener;
-        JuliaOJIT &JIT;
+public:
+#ifdef JL_USE_JITLINK
+    typedef orc::ObjectLinkingLayer ObjLayerT;
+#else
+    typedef orc::RTDyldObjectLinkingLayer ObjLayerT;
+#endif
+    typedef orc::IRCompileLayer CompileLayerT;
+    typedef orc::IRTransformLayer OptimizeLayerT;
+    typedef object::OwningBinary<object::ObjectFile> OwningObj;
+    template
+    <typename ResourceT, size_t max = 0,
+        typename BackingT = std::stack<ResourceT,
+            std::conditional_t<max == 0,
+                SmallVector<ResourceT>,
+                SmallVector<ResourceT, max>
+            >
+        >
+    >
+    struct ResourcePool {
+        public:
+        ResourcePool(std::function<ResourceT()> creator) : creator(std::move(creator)), mutex(std::make_unique<WNMutex>()) {}
+        class OwningResource {
+            public:
+            OwningResource(ResourcePool &pool, ResourceT resource) : pool(pool), resource(std::move(resource)) {}
+            OwningResource(const OwningResource &) = delete;
+            OwningResource &operator=(const OwningResource &) = delete;
+            OwningResource(OwningResource &&) = default;
+            OwningResource &operator=(OwningResource &&) = default;
+            ~OwningResource() {
+                if (resource) pool.release(std::move(*resource));
+            }
+            ResourceT release() {
+                ResourceT res(std::move(*resource));
+                resource.reset();
+                return res;
+            }
+            void reset(ResourceT res) {
+                *resource = std::move(res);
+            }
+            ResourceT &operator*() {
+                return *resource;
+            }
+            ResourceT *operator->() {
+                return get();
+            }
+            ResourceT *get() {
+                return resource.getPointer();
+            }
+            const ResourceT &operator*() const {
+                return *resource;
+            }
+            const ResourceT *operator->() const {
+                return get();
+            }
+            const ResourceT *get() const {
+                return resource.getPointer();
+            }
+            explicit operator bool() const {
+                return resource;
+            }
+            private:
+            ResourcePool &pool;
+            llvm::Optional<ResourceT> resource;
+        };
+
+        OwningResource operator*() {
+            return OwningResource(*this, acquire());
+        }
+
+        OwningResource get() {
+            return **this;
+        }
+
+        ResourceT acquire() {
+            std::unique_lock<std::mutex> lock(mutex->mutex);
+            if (!pool.empty()) {
+                return pop(pool);
+            }
+            if (!max || created < max) {
+                created++;
+                return creator();
+            }
+            mutex->empty.wait(lock, [&](){ return !pool.empty(); });
+            assert(!pool.empty() && "Expected resource pool to have a value!");
+            return pop(pool);
+        }
+        void release(ResourceT &&resource) {
+            std::lock_guard<std::mutex> lock(mutex->mutex);
+            pool.push(std::move(resource));
+            mutex->empty.notify_one();
+        }
+        private:
+        template<typename T, typename Container>
+        static ResourceT pop(std::queue<T, Container> &pool) {
+            ResourceT top = std::move(pool.front());
+            pool.pop();
+            return top;
+        }
+        template<typename PoolT>
+        static ResourceT pop(PoolT &pool) {
+            ResourceT top = std::move(pool.top());
+            pool.pop();
+            return top;
+        }
+        std::function<ResourceT()> creator;
+        size_t created = 0;
+        BackingT pool;
+        struct WNMutex {
+            std::mutex mutex;
+            std::condition_variable empty;
+        };
+
+        std::unique_ptr<WNMutex> mutex;
     };
+    struct PipelineT {
+        PipelineT(orc::ObjectLayer &BaseLayer, TargetMachine &TM, int optlevel);
+        CompileLayerT CompileLayer;
+        OptimizeLayerT OptimizeLayer;
+    };
+
+    struct OptSelLayerT : orc::IRLayer {
 
-    struct CompilerT {
-        CompilerT(JuliaOJIT *pjit)
-            : jit(*pjit)
-        {}
-        CompilerResultT operator()(Module &M);
-    private:
-        JuliaOJIT &jit;
+        template<size_t N>
+        OptSelLayerT(const std::array<std::unique_ptr<PipelineT>, N> &optimizers)
+            : orc::IRLayer(optimizers[0]->OptimizeLayer.getExecutionSession(),
+                optimizers[0]->OptimizeLayer.getManglingOptions()),
+            optimizers(optimizers.data()),
+            count(N) {
+            static_assert(N > 0, "Expected array with at least one optimizer!");
+        }
+
+        void emit(std::unique_ptr<orc::MaterializationResponsibility> R, orc::ThreadSafeModule TSM) override;
+
+        private:
+        const std::unique_ptr<PipelineT> * const optimizers;
+        size_t count;
     };
 
+private:
+    // Custom object emission notification handler for the JuliaOJIT
+    template <typename ObjT, typename LoadResult>
+    void registerObject(const ObjT &Obj, const LoadResult &LO);
+
 public:
-    typedef orc::LegacyRTDyldObjectLinkingLayer ObjLayerT;
-    typedef orc::LegacyIRCompileLayer<ObjLayerT,CompilerT> CompileLayerT;
-    typedef orc::VModuleKey ModuleHandleT;
-    typedef StringMap<void*> SymbolTableT;
-    typedef object::OwningBinary<object::ObjectFile> OwningObj;
 
-    JuliaOJIT(TargetMachine &TM);
+    JuliaOJIT();
 
+    void enableJITDebuggingSupport();
+#ifndef JL_USE_JITLINK
+    // JITLink doesn't support old JITEventListeners (yet).
     void RegisterJITEventListener(JITEventListener *L);
-    std::vector<JITEventListener *> EventListeners;
-    void NotifyFinalizer(RTDyldObjHandleT Key,
-                         const object::ObjectFile &Obj,
-                         const RuntimeDyld::LoadedObjectInfo &LoadedObjectInfo);
+#endif
+
     void addGlobalMapping(StringRef Name, uint64_t Addr);
-    void *getPointerToGlobalIfAvailable(StringRef S);
-    void *getPointerToGlobalIfAvailable(const GlobalValue *GV);
-    void addModule(std::unique_ptr<Module> M);
-    void removeModule(ModuleHandleT H);
+    void addModule(orc::ThreadSafeModule M);
+
     JL_JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly);
     JL_JITSymbol findUnmangledSymbol(StringRef Name);
-    JL_JITSymbol resolveSymbol(StringRef Name);
     uint64_t getGlobalValueAddress(StringRef Name);
     uint64_t getFunctionAddress(StringRef Name);
     StringRef getFunctionAtAddress(uint64_t Addr, jl_code_instance_t *codeinst);
+    auto getContext() {
+        return *ContextPool;
+    }
+    orc::ThreadSafeContext acquireContext() {
+        return ContextPool.acquire();
+    }
+    void releaseContext(orc::ThreadSafeContext &&ctx) {
+        ContextPool.release(std::move(ctx));
+    }
     const DataLayout& getDataLayout() const;
+
+    // TargetMachine pass-through methods
+    std::unique_ptr<TargetMachine> cloneTargetMachine() const;
     const Triple& getTargetTriple() const;
+    StringRef getTargetFeatureString() const;
+    StringRef getTargetCPU() const;
+    const TargetOptions &getTargetOptions() const;
+    const Target &getTarget() const;
+    TargetIRAnalysis getTargetIRAnalysis() const;
+
+    size_t getTotalBytes() const;
+
+    JITDebugInfoRegistry &getDebugInfoRegistry() JL_NOTSAFEPOINT {
+        return DebugRegistry;
+    }
+
+    jl_locked_stream &get_dump_emitted_mi_name_stream() JL_NOTSAFEPOINT {
+        return dump_emitted_mi_name_stream;
+    }
+    jl_locked_stream &get_dump_compiles_stream() JL_NOTSAFEPOINT {
+        return dump_compiles_stream;
+    }
+    jl_locked_stream &get_dump_llvm_opt_stream() JL_NOTSAFEPOINT {
+        return dump_llvm_opt_stream;
+    }
 private:
     std::string getMangledName(StringRef Name);
     std::string getMangledName(const GlobalValue *GV);
+    void shareStrings(Module &M);
 
-    TargetMachine &TM;
+    const std::unique_ptr<TargetMachine> TM;
     const DataLayout DL;
-    // Should be big enough that in the common case, The
-    // object fits in its entirety
-    SmallVector<char, 4096> ObjBufferSV;
-    raw_svector_ostream ObjStream;
-    legacy::PassManager PM0;  // per-optlevel pass managers
-    legacy::PassManager PM1;
-    legacy::PassManager PM2;
-    legacy::PassManager PM3;
-    TargetMachine *TMs[4];
-    MCContext *Ctx;
-    std::shared_ptr<RTDyldMemoryManager> MemMgr;
-    DebugObjectRegistrar registrar;
-
-    llvm::orc::ExecutionSession ES;
-    std::shared_ptr<llvm::orc::SymbolResolver> SymbolResolver;
 
-    ObjLayerT ObjectLayer;
-    CompileLayerT CompileLayer;
+    orc::ExecutionSession ES;
+    orc::JITDylib &GlobalJD;
+    orc::JITDylib &JD;
+
+    JITDebugInfoRegistry DebugRegistry;
+
+    //Map and inc are guarded by RLST_mutex
+    std::mutex RLST_mutex{};
+    int RLST_inc = 0;
+    DenseMap<void*, std::string> ReverseLocalSymbolTable;
+
+    //Compilation streams
+    jl_locked_stream dump_emitted_mi_name_stream;
+    jl_locked_stream dump_compiles_stream;
+    jl_locked_stream dump_llvm_opt_stream;
 
-    SymbolTableT GlobalSymbolTable;
-    SymbolTableT LocalSymbolTable;
-    DenseMap<void*, StringRef> ReverseLocalSymbolTable;
+    ResourcePool<orc::ThreadSafeContext, 0, std::queue<orc::ThreadSafeContext>> ContextPool;
+
+#ifndef JL_USE_JITLINK
+    const std::shared_ptr<RTDyldMemoryManager> MemMgr;
+#endif
+    ObjLayerT ObjectLayer;
+    const std::array<std::unique_ptr<PipelineT>, 4> Pipelines;
+    OptSelLayerT OptSelLayer;
 };
 extern JuliaOJIT *jl_ExecutionEngine;
+orc::ThreadSafeModule jl_create_llvm_module(StringRef name, orc::ThreadSafeContext ctx, bool imaging_mode, const DataLayout &DL = jl_ExecutionEngine->getDataLayout(), const Triple &triple = jl_ExecutionEngine->getTargetTriple());
+
+orc::ThreadSafeModule &jl_codegen_params_t::shared_module(Module &from) {
+    if (!_shared_module) {
+        _shared_module = jl_create_llvm_module("globals", tsctx, imaging, from.getDataLayout(), Triple(from.getTargetTriple()));
+        assert(&from.getContext() == tsctx.getContext() && "Module context differs from codegen_params context!");
+    } else {
+        assert(&from.getContext() == _shared_module.getContext().getContext() && "Module context differs from shared module context!");
+        assert(from.getDataLayout() == _shared_module.getModuleUnlocked()->getDataLayout() && "Module data layout differs from shared module data layout!");
+        assert(from.getTargetTriple() == _shared_module.getModuleUnlocked()->getTargetTriple() && "Module target triple differs from shared module target triple!");
+    }
+    return _shared_module;
+}
 
 Pass *createLowerPTLSPass(bool imaging_mode);
 Pass *createCombineMulAddPass();
@@ -239,8 +472,15 @@ Pass *createPropagateJuliaAddrspaces();
 Pass *createRemoveJuliaAddrspacesPass();
 Pass *createRemoveNIPass();
 Pass *createJuliaLICMPass();
-Pass *createMultiVersioningPass();
+Pass *createMultiVersioningPass(bool external_use);
 Pass *createAllocOptPass();
+Pass *createDemoteFloat16Pass();
+Pass *createCPUFeaturesPass();
+Pass *createLowerSimdLoopPass();
+
+// NewPM
+#include "passes.h"
+
 // Whether the Function is an llvm or julia intrinsic.
 static inline bool isIntrinsicFunction(Function *F)
 {
diff --git a/src/jl_exported_data.inc b/src/jl_exported_data.inc
new file mode 100644
index 00000000000000..b0994ce0a0c4ac
--- /dev/null
+++ b/src/jl_exported_data.inc
@@ -0,0 +1,134 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Pointers that are exposed through the public libjulia
+#define JL_EXPORTED_DATA_POINTERS(XX) \
+    XX(jl_abstractarray_type) \
+    XX(jl_abstractslot_type) \
+    XX(jl_abstractstring_type) \
+    XX(jl_an_empty_string) \
+    XX(jl_an_empty_vec_any) \
+    XX(jl_anytuple_type) \
+    XX(jl_anytuple_type_type) \
+    XX(jl_any_type) \
+    XX(jl_argumenterror_type) \
+    XX(jl_argument_type) \
+    XX(jl_array_any_type) \
+    XX(jl_array_int32_type) \
+    XX(jl_array_symbol_type) \
+    XX(jl_array_type) \
+    XX(jl_array_typename) \
+    XX(jl_array_uint8_type) \
+    XX(jl_array_uint64_type) \
+    XX(jl_atomicerror_type) \
+    XX(jl_base_module) \
+    XX(jl_bool_type) \
+    XX(jl_bottom_type) \
+    XX(jl_boundserror_type) \
+    XX(jl_builtin_type) \
+    XX(jl_char_type) \
+    XX(jl_code_info_type) \
+    XX(jl_code_instance_type) \
+    XX(jl_const_type) \
+    XX(jl_core_module) \
+    XX(jl_datatype_type) \
+    XX(jl_densearray_type) \
+    XX(jl_diverror_exception) \
+    XX(jl_emptysvec) \
+    XX(jl_emptytuple) \
+    XX(jl_emptytuple_type) \
+    XX(jl_errorexception_type) \
+    XX(jl_expr_type) \
+    XX(jl_false) \
+    XX(jl_float16_type) \
+    XX(jl_float32_type) \
+    XX(jl_float64_type) \
+    XX(jl_floatingpoint_type) \
+    XX(jl_function_type) \
+    XX(jl_globalref_type) \
+    XX(jl_gotoifnot_type) \
+    XX(jl_gotonode_type) \
+    XX(jl_initerror_type) \
+    XX(jl_int16_type) \
+    XX(jl_int32_type) \
+    XX(jl_int64_type) \
+    XX(jl_int8_type) \
+    XX(jl_interconditional_type) \
+    XX(jl_interrupt_exception) \
+    XX(jl_intrinsic_type) \
+    XX(jl_lineinfonode_type) \
+    XX(jl_linenumbernode_type) \
+    XX(jl_llvmpointer_type) \
+    XX(jl_llvmpointer_typename) \
+    XX(jl_loaderror_type) \
+    XX(jl_main_module) \
+    XX(jl_memory_exception) \
+    XX(jl_methoderror_type) \
+    XX(jl_method_instance_type) \
+    XX(jl_method_match_type) \
+    XX(jl_method_type) \
+    XX(jl_methtable_type) \
+    XX(jl_module_type) \
+    XX(jl_n_threads_per_pool) \
+    XX(jl_namedtuple_type) \
+    XX(jl_namedtuple_typename) \
+    XX(jl_newvarnode_type) \
+    XX(jl_nonfunction_mt) \
+    XX(jl_nothing) \
+    XX(jl_nothing_type) \
+    XX(jl_number_type) \
+    XX(jl_opaque_closure_type) \
+    XX(jl_opaque_closure_typename) \
+    XX(jl_pair_type) \
+    XX(jl_partial_opaque_type) \
+    XX(jl_partial_struct_type) \
+    XX(jl_phicnode_type) \
+    XX(jl_phinode_type) \
+    XX(jl_pinode_type) \
+    XX(jl_pointer_type) \
+    XX(jl_pointer_typename) \
+    XX(jl_quotenode_type) \
+    XX(jl_readonlymemory_exception) \
+    XX(jl_ref_type) \
+    XX(jl_returnnode_type) \
+    XX(jl_signed_type) \
+    XX(jl_simplevector_type) \
+    XX(jl_slotnumber_type) \
+    XX(jl_ssavalue_type) \
+    XX(jl_stackovf_exception) \
+    XX(jl_string_type) \
+    XX(jl_symbol_type) \
+    XX(jl_task_type) \
+    XX(jl_top_module) \
+    XX(jl_true) \
+    XX(jl_tuple_typename) \
+    XX(jl_tvar_type) \
+    XX(jl_typedslot_type) \
+    XX(jl_typeerror_type) \
+    XX(jl_typemap_entry_type) \
+    XX(jl_typemap_level_type) \
+    XX(jl_typename_type) \
+    XX(jl_typeofbottom_type) \
+    XX(jl_type_type) \
+    XX(jl_type_type_mt) \
+    XX(jl_type_typename) \
+    XX(jl_uint16_type) \
+    XX(jl_uint32_type) \
+    XX(jl_uint64_type) \
+    XX(jl_uint8pointer_type) \
+    XX(jl_uint8_type) \
+    XX(jl_undefref_exception) \
+    XX(jl_undefvarerror_type) \
+    XX(jl_unionall_type) \
+    XX(jl_uniontype_type) \
+    XX(jl_upsilonnode_type) \
+    XX(jl_vararg_type) \
+    XX(jl_vecelement_typename) \
+    XX(jl_voidpointer_type) \
+    XX(jl_void_type) \
+    XX(jl_weakref_type)
+
+// Data symbols that are defined inside the public libjulia
+#define JL_EXPORTED_DATA_SYMBOLS(XX) \
+    XX(jl_n_threadpools, int) \
+    XX(jl_n_threads, int) \
+    XX(jl_options, jl_options_t)
diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc
new file mode 100644
index 00000000000000..ef1f7c929f7e73
--- /dev/null
+++ b/src/jl_exported_funcs.inc
@@ -0,0 +1,571 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#define JL_RUNTIME_EXPORTED_FUNCS(XX) \
+    XX(jl_active_task_stack) \
+    XX(jl_add_standard_imports) \
+    XX(jl_alignment) \
+    XX(jl_alloc_array_1d) \
+    XX(jl_alloc_array_2d) \
+    XX(jl_alloc_array_3d) \
+    XX(jl_alloc_string) \
+    XX(jl_alloc_svec) \
+    XX(jl_alloc_svec_uninit) \
+    XX(jl_alloc_vec_any) \
+    XX(jl_apply_array_type) \
+    XX(jl_apply_cmpswap_type) \
+    XX(jl_apply_generic) \
+    XX(jl_apply_tuple_type) \
+    XX(jl_apply_tuple_type_v) \
+    XX(jl_apply_type) \
+    XX(jl_apply_type1) \
+    XX(jl_apply_type2) \
+    XX(jl_argument_datatype) \
+    XX(jl_argument_method_table) \
+    XX(jl_arraylen) \
+    XX(jl_arrayref) \
+    XX(jl_arrayset) \
+    XX(jl_arrayunset) \
+    XX(jl_array_cconvert_cstring) \
+    XX(jl_array_copy) \
+    XX(jl_array_del_at) \
+    XX(jl_array_del_beg) \
+    XX(jl_array_del_end) \
+    XX(jl_array_eltype) \
+    XX(jl_array_grow_at) \
+    XX(jl_array_grow_beg) \
+    XX(jl_array_grow_end) \
+    XX(jl_array_isassigned) \
+    XX(jl_array_ptr) \
+    XX(jl_array_ptr_1d_append) \
+    XX(jl_array_ptr_1d_push) \
+    XX(jl_array_ptr_copy) \
+    XX(jl_array_rank) \
+    XX(jl_array_size) \
+    XX(jl_array_sizehint) \
+    XX(jl_array_to_string) \
+    XX(jl_array_typetagdata) \
+    XX(jl_array_validate_dims) \
+    XX(jl_atexit_hook) \
+    XX(jl_atomic_bool_cmpswap_bits) \
+    XX(jl_atomic_cmpswap_bits) \
+    XX(jl_atomic_error) \
+    XX(jl_atomic_new_bits) \
+    XX(jl_atomic_store_bits) \
+    XX(jl_atomic_swap_bits) \
+    XX(jl_backtrace_from_here) \
+    XX(jl_base_relative_to) \
+    XX(jl_binding_owner) \
+    XX(jl_binding_resolved_p) \
+    XX(jl_bitcast) \
+    XX(jl_boundp) \
+    XX(jl_bounds_error) \
+    XX(jl_bounds_error_int) \
+    XX(jl_bounds_error_ints) \
+    XX(jl_bounds_error_tuple_int) \
+    XX(jl_bounds_error_unboxed_int) \
+    XX(jl_bounds_error_v) \
+    XX(jl_box_bool) \
+    XX(jl_box_char) \
+    XX(jl_box_float32) \
+    XX(jl_box_float64) \
+    XX(jl_box_int16) \
+    XX(jl_box_int32) \
+    XX(jl_box_int64) \
+    XX(jl_box_int8) \
+    XX(jl_box_slotnumber) \
+    XX(jl_box_ssavalue) \
+    XX(jl_box_uint16) \
+    XX(jl_box_uint32) \
+    XX(jl_box_uint64) \
+    XX(jl_box_uint8) \
+    XX(jl_box_uint8pointer) \
+    XX(jl_box_voidpointer) \
+    XX(jl_call) \
+    XX(jl_call0) \
+    XX(jl_call1) \
+    XX(jl_call2) \
+    XX(jl_call3) \
+    XX(jl_calloc) \
+    XX(jl_call_in_typeinf_world) \
+    XX(jl_capture_interp_frame) \
+    XX(jl_ceil_llvm) \
+    XX(jl_ceil_llvm_withtype) \
+    XX(jl_cglobal) \
+    XX(jl_cglobal_auto) \
+    XX(jl_checked_assignment) \
+    XX(jl_clear_implicit_imports) \
+    XX(jl_close_uv) \
+    XX(jl_code_for_staged) \
+    XX(jl_compile_hint) \
+    XX(jl_compress_argnames) \
+    XX(jl_compress_ir) \
+    XX(jl_compute_fieldtypes) \
+    XX(jl_copy_ast) \
+    XX(jl_copy_code_info) \
+    XX(jl_cpu_threads) \
+    XX(jl_effective_threads) \
+    XX(jl_crc32c_sw) \
+    XX(jl_create_system_image) \
+    XX(jl_cstr_to_string) \
+    XX(jl_current_exception) \
+    XX(jl_debug_method_invalidation) \
+    XX(jl_declare_constant) \
+    XX(jl_defines_or_exports_p) \
+    XX(jl_deprecate_binding) \
+    XX(jl_dlclose) \
+    XX(jl_dlopen) \
+    XX(jl_dlsym) \
+    XX(jl_dump_host_cpu) \
+    XX(jl_egal) \
+    XX(jl_egal__bits) \
+    XX(jl_egal__special) \
+    XX(jl_eh_restore_state) \
+    XX(jl_enter_handler) \
+    XX(jl_enter_threaded_region) \
+    XX(jl_environ) \
+    XX(jl_eof_error) \
+    XX(jl_eqtable_get) \
+    XX(jl_eqtable_nextind) \
+    XX(jl_eqtable_pop) \
+    XX(jl_eqtable_put) \
+    XX(jl_errno) \
+    XX(jl_error) \
+    XX(jl_errorf) \
+    XX(jl_eval_string) \
+    XX(jl_exceptionf) \
+    XX(jl_exception_clear) \
+    XX(jl_exception_occurred) \
+    XX(jl_excstack_state) \
+    XX(jl_exit) \
+    XX(jl_exit_on_sigint) \
+    XX(jl_exit_threaded_region) \
+    XX(jl_expand) \
+    XX(jl_expand_and_resolve) \
+    XX(jl_expand_stmt) \
+    XX(jl_expand_stmt_with_loc) \
+    XX(jl_expand_with_loc) \
+    XX(jl_expand_with_loc_warn) \
+    XX(jl_field_index) \
+    XX(jl_field_isdefined) \
+    XX(jl_gc_add_finalizer) \
+    XX(jl_gc_add_finalizer_th) \
+    XX(jl_gc_add_ptr_finalizer) \
+    XX(jl_gc_allocobj) \
+    XX(jl_gc_alloc_0w) \
+    XX(jl_gc_alloc_1w) \
+    XX(jl_gc_alloc_2w) \
+    XX(jl_gc_alloc_3w) \
+    XX(jl_gc_alloc_typed) \
+    XX(jl_gc_big_alloc) \
+    XX(jl_gc_collect) \
+    XX(jl_gc_conservative_gc_support_enabled) \
+    XX(jl_gc_counted_calloc) \
+    XX(jl_gc_counted_free_with_size) \
+    XX(jl_gc_counted_malloc) \
+    XX(jl_gc_counted_realloc_with_old_size) \
+    XX(jl_gc_diff_total_bytes) \
+    XX(jl_gc_enable) \
+    XX(jl_gc_enable_conservative_gc_support) \
+    XX(jl_gc_enable_finalizers) \
+    XX(jl_gc_external_obj_hdr_size) \
+    XX(jl_gc_find_taggedvalue_pool) \
+    XX(jl_gc_get_total_bytes) \
+    XX(jl_gc_internal_obj_base_ptr) \
+    XX(jl_gc_is_enabled) \
+    XX(jl_gc_live_bytes) \
+    XX(jl_gc_managed_malloc) \
+    XX(jl_gc_managed_realloc) \
+    XX(jl_gc_mark_queue_obj) \
+    XX(jl_gc_mark_queue_objarray) \
+    XX(jl_gc_max_internal_obj_size) \
+    XX(jl_gc_new_weakref) \
+    XX(jl_gc_new_weakref_th) \
+    XX(jl_gc_num) \
+    XX(jl_gc_pool_alloc) \
+    XX(jl_gc_queue_multiroot) \
+    XX(jl_gc_queue_root) \
+    XX(jl_gc_safepoint) \
+    XX(jl_gc_schedule_foreign_sweepfunc) \
+    XX(jl_gc_set_cb_notify_external_alloc) \
+    XX(jl_gc_set_cb_notify_external_free) \
+    XX(jl_gc_set_cb_post_gc) \
+    XX(jl_gc_set_cb_pre_gc) \
+    XX(jl_gc_set_cb_root_scanner) \
+    XX(jl_gc_set_cb_task_scanner) \
+    XX(jl_gc_sync_total_bytes) \
+    XX(jl_gc_total_hrtime) \
+    XX(jl_gdblookup) \
+    XX(jl_generating_output) \
+    XX(jl_generic_function_def) \
+    XX(jl_gensym) \
+    XX(jl_getallocationgranularity) \
+    XX(jl_getnameinfo) \
+    XX(jl_getpagesize) \
+    XX(jl_get_ARCH) \
+    XX(jl_get_backtrace) \
+    XX(jl_get_binding) \
+    XX(jl_get_binding_for_method_def) \
+    XX(jl_get_binding_or_error) \
+    XX(jl_get_binding_wr_or_error) \
+    XX(jl_get_binding_wr) \
+    XX(jl_get_cpu_name) \
+    XX(jl_get_current_task) \
+    XX(jl_get_default_sysimg_path) \
+    XX(jl_get_excstack) \
+    XX(jl_get_fenv_consts) \
+    XX(jl_get_field) \
+    XX(jl_get_global) \
+    XX(jl_get_image_file) \
+    XX(jl_get_JIT) \
+    XX(jl_get_julia_bin) \
+    XX(jl_get_julia_bindir) \
+    XX(jl_get_keyword_sorter) \
+    XX(jl_get_kwsorter) \
+    XX(jl_get_method_inferred) \
+    XX(jl_get_module_binding) \
+    XX(jl_get_module_compile) \
+    XX(jl_get_module_infer) \
+    XX(jl_get_module_of_binding) \
+    XX(jl_get_module_optlevel) \
+    XX(jl_get_next_task) \
+    XX(jl_get_nth_field) \
+    XX(jl_get_nth_field_checked) \
+    XX(jl_get_nth_field_noalloc) \
+    XX(jl_get_pgcstack) \
+    XX(jl_get_ptls_states) \
+    XX(jl_get_root_symbol) \
+    XX(jl_get_safe_restore) \
+    XX(jl_get_size) \
+    XX(jl_get_task_tid) \
+    XX(jl_get_task_threadpoolid) \
+    XX(jl_get_tls_world_age) \
+    XX(jl_get_UNAME) \
+    XX(jl_get_world_counter) \
+    XX(jl_get_zero_subnormals) \
+    XX(jl_gf_invoke_lookup) \
+    XX(jl_gf_invoke_lookup_worlds) \
+    XX(jl_git_branch) \
+    XX(jl_git_commit) \
+    XX(jl_global_event_loop) \
+    XX(jl_has_empty_intersection) \
+    XX(jl_has_free_typevars) \
+    XX(jl_has_so_reuseport) \
+    XX(jl_has_typevar) \
+    XX(jl_has_typevar_from_unionall) \
+    XX(jl_hrtime) \
+    XX(jl_idtable_rehash) \
+    XX(jl_infer_thunk) \
+    XX(jl_init) \
+    XX(jl_init_options) \
+    XX(jl_init_restored_modules) \
+    XX(jl_init_with_image) \
+    XX(jl_init_with_image__threading) \
+    XX(jl_init__threading) \
+    XX(jl_install_sigint_handler) \
+    XX(jl_instantiate_type_in_env) \
+    XX(jl_instantiate_unionall) \
+    XX(jl_intersect_types) \
+    XX(jl_intrinsic_name) \
+    XX(jl_invoke) \
+    XX(jl_invoke_api) \
+    XX(jl_in_threaded_region) \
+    XX(jl_iolock_begin) \
+    XX(jl_iolock_end) \
+    XX(jl_ios_buffer_n) \
+    XX(jl_ios_fd) \
+    XX(jl_ios_get_nbyte_int) \
+    XX(jl_ir_flag_inferred) \
+    XX(jl_ir_flag_inlineable) \
+    XX(jl_ir_flag_pure) \
+    XX(jl_ir_nslots) \
+    XX(jl_ir_slotflag) \
+    XX(jl_isa) \
+    XX(jl_isa_compileable_sig) \
+    XX(jl_islayout_inline) \
+    XX(jl_istopmod) \
+    XX(jl_is_binding_deprecated) \
+    XX(jl_is_char_signed) \
+    XX(jl_is_const) \
+    XX(jl_is_debugbuild) \
+    XX(jl_is_identifier) \
+    XX(jl_is_imported) \
+    XX(jl_is_initialized) \
+    XX(jl_is_in_pure_context) \
+    XX(jl_is_memdebug) \
+    XX(jl_is_not_broken_subtype) \
+    XX(jl_is_operator) \
+    XX(jl_is_task_started) \
+    XX(jl_is_unary_and_binary_operator) \
+    XX(jl_is_unary_operator) \
+    XX(jl_lazy_load_and_lookup) \
+    XX(jl_lisp_prompt) \
+    XX(jl_load) \
+    XX(jl_load_) \
+    XX(jl_load_and_lookup) \
+    XX(jl_load_dynamic_library) \
+    XX(jl_load_file_string) \
+    XX(jl_lookup_code_address) \
+    XX(jl_lseek) \
+    XX(jl_lstat) \
+    XX(jl_macroexpand) \
+    XX(jl_macroexpand1) \
+    XX(jl_malloc) \
+    XX(jl_malloc_stack) \
+    XX(jl_matching_methods) \
+    XX(jl_maxrss) \
+    XX(jl_method_def) \
+    XX(jl_method_instance_add_backedge) \
+    XX(jl_method_table_add_backedge) \
+    XX(jl_method_table_disable) \
+    XX(jl_method_table_for) \
+    XX(jl_method_table_insert) \
+    XX(jl_methtable_lookup) \
+    XX(jl_mi_cache_insert) \
+    XX(jl_module_build_id) \
+    XX(jl_module_export) \
+    XX(jl_module_exports_p) \
+    XX(jl_module_globalref) \
+    XX(jl_module_import) \
+    XX(jl_module_name) \
+    XX(jl_module_names) \
+    XX(jl_module_parent) \
+    XX(jl_module_use) \
+    XX(jl_module_using) \
+    XX(jl_module_usings) \
+    XX(jl_module_uuid) \
+    XX(jl_native_alignment) \
+    XX(jl_nb_available) \
+    XX(jl_new_array) \
+    XX(jl_new_bits) \
+    XX(jl_new_codeinst) \
+    XX(jl_new_code_info_uninit) \
+    XX(jl_new_datatype) \
+    XX(jl_new_foreign_type) \
+    XX(jl_new_method_instance_uninit) \
+    XX(jl_new_method_table) \
+    XX(jl_new_method_uninit) \
+    XX(jl_new_module) \
+    XX(jl_new_primitivetype) \
+    XX(jl_new_struct) \
+    XX(jl_new_structt) \
+    XX(jl_new_structv) \
+    XX(jl_new_struct_uninit) \
+    XX(jl_new_task) \
+    XX(jl_new_typename_in) \
+    XX(jl_new_typevar) \
+    XX(jl_next_from_addrinfo) \
+    XX(jl_normalize_to_compilable_sig) \
+    XX(jl_no_exc_handler) \
+    XX(jl_object_id) \
+    XX(jl_object_id_) \
+    XX(jl_obvious_subtype) \
+    XX(jl_operator_precedence) \
+    XX(jl_parse) \
+    XX(jl_parse_all) \
+    XX(jl_parse_input_line) \
+    XX(jl_parse_opts) \
+    XX(jl_parse_string) \
+    XX(jl_pathname_for_handle) \
+    XX(jl_pchar_to_array) \
+    XX(jl_pchar_to_string) \
+    XX(jl_pointerref) \
+    XX(jl_pointerset) \
+    XX(jl_pop_handler) \
+    XX(jl_preload_sysimg_so) \
+    XX(jl_prepend_cwd) \
+    XX(jl_printf) \
+    XX(jl_print_backtrace) \
+    XX(jl_process_events) \
+    XX(jl_profile_clear_data) \
+    XX(jl_profile_delay_nsec) \
+    XX(jl_profile_get_data) \
+    XX(jl_profile_init) \
+    XX(jl_profile_is_running) \
+    XX(jl_profile_len_data) \
+    XX(jl_profile_maxlen_data) \
+    XX(jl_profile_start_timer) \
+    XX(jl_profile_stop_timer) \
+    XX(jl_ptrarrayref) \
+    XX(jl_ptr_to_array) \
+    XX(jl_ptr_to_array_1d) \
+    XX(jl_queue_work) \
+    XX(jl_raise_debugger) \
+    XX(jl_readuntil) \
+    XX(jl_read_verify_header) \
+    XX(jl_realloc) \
+    XX(jl_register_newmeth_tracer) \
+    XX(jl_reshape_array) \
+    XX(jl_resolve_globals_in_ir) \
+    XX(jl_restore_excstack) \
+    XX(jl_restore_incremental) \
+    XX(jl_restore_incremental_from_buf) \
+    XX(jl_restore_system_image) \
+    XX(jl_restore_system_image_data) \
+    XX(jl_rethrow) \
+    XX(jl_rethrow_other) \
+    XX(jl_rettype_inferred) \
+    XX(jl_running_on_valgrind) \
+    XX(jl_safe_printf) \
+    XX(jl_save_incremental) \
+    XX(jl_save_system_image) \
+    XX(jl_SC_CLK_TCK) \
+    XX(jl_set_ARGS) \
+    XX(jl_set_const) \
+    XX(jl_set_errno) \
+    XX(jl_set_istopmod) \
+    XX(jl_set_module_compile) \
+    XX(jl_set_module_infer) \
+    XX(jl_set_module_nospecialize) \
+    XX(jl_set_module_optlevel) \
+    XX(jl_set_module_uuid) \
+    XX(jl_set_next_task) \
+    XX(jl_set_nth_field) \
+    XX(jl_set_safe_restore) \
+    XX(jl_set_sysimg_so) \
+    XX(jl_set_task_tid) \
+    XX(jl_set_task_threadpoolid) \
+    XX(jl_set_typeinf_func) \
+    XX(jl_set_zero_subnormals) \
+    XX(jl_sigatomic_begin) \
+    XX(jl_sigatomic_end) \
+    XX(jl_sig_throw) \
+    XX(jl_spawn) \
+    XX(jl_specializations_get_linfo) \
+    XX(jl_specializations_lookup) \
+    XX(jl_static_show) \
+    XX(jl_static_show_func_sig) \
+    XX(jl_stderr_obj) \
+    XX(jl_stderr_stream) \
+    XX(jl_stdin_stream) \
+    XX(jl_stdout_obj) \
+    XX(jl_stdout_stream) \
+    XX(jl_stored_inline) \
+    XX(jl_string_ptr) \
+    XX(jl_string_to_array) \
+    XX(jl_subtype) \
+    XX(jl_subtype_env) \
+    XX(jl_subtype_env_size) \
+    XX(jl_svec) \
+    XX(jl_svec1) \
+    XX(jl_svec2) \
+    XX(jl_svec_copy) \
+    XX(jl_svec_fill) \
+    XX(jl_svec_isassigned) \
+    XX(jl_svec_ref) \
+    XX(jl_switch) \
+    XX(jl_switchto) \
+    XX(jl_symbol) \
+    XX(jl_symbol_lookup) \
+    XX(jl_symbol_n) \
+    XX(jl_tagged_gensym) \
+    XX(jl_take_buffer) \
+    XX(jl_task_get_next) \
+    XX(jl_task_stack_buffer) \
+    XX(jl_test_cpu_feature) \
+    XX(jl_threadid) \
+    XX(jl_threadpoolid) \
+    XX(jl_throw) \
+    XX(jl_throw_out_of_memory_error) \
+    XX(jl_too_few_args) \
+    XX(jl_too_many_args) \
+    XX(jl_toplevel_eval) \
+    XX(jl_toplevel_eval_in) \
+    XX(jl_try_substrtod) \
+    XX(jl_try_substrtof) \
+    XX(jl_tty_set_mode) \
+    XX(jl_tupletype_fill) \
+    XX(jl_typeassert) \
+    XX(jl_typeinf_begin) \
+    XX(jl_typeinf_end) \
+    XX(jl_typename_str) \
+    XX(jl_typeof_str) \
+    XX(jl_types_equal) \
+    XX(jl_type_equality_is_identity) \
+    XX(jl_type_error) \
+    XX(jl_type_error_rt) \
+    XX(jl_type_intersection) \
+    XX(jl_type_intersection_with_env) \
+    XX(jl_type_morespecific) \
+    XX(jl_type_morespecific_no_subtype) \
+    XX(jl_type_union) \
+    XX(jl_type_unionall) \
+    XX(jl_unbox_bool) \
+    XX(jl_unbox_float32) \
+    XX(jl_unbox_float64) \
+    XX(jl_unbox_int16) \
+    XX(jl_unbox_int32) \
+    XX(jl_unbox_int64) \
+    XX(jl_unbox_int8) \
+    XX(jl_unbox_uint16) \
+    XX(jl_unbox_uint32) \
+    XX(jl_unbox_uint64) \
+    XX(jl_unbox_uint8) \
+    XX(jl_unbox_uint8pointer) \
+    XX(jl_unbox_voidpointer) \
+    XX(jl_uncompress_argnames) \
+    XX(jl_uncompress_argname_n) \
+    XX(jl_uncompress_ir) \
+    XX(jl_undefined_var_error) \
+    XX(jl_value_ptr) \
+    XX(jl_ver_is_release) \
+    XX(jl_ver_major) \
+    XX(jl_ver_minor) \
+    XX(jl_ver_patch) \
+    XX(jl_ver_string) \
+    XX(jl_vexceptionf) \
+    XX(jl_vprintf) \
+    XX(jl_wakeup_thread) \
+    XX(jl_yield)
+
+#define JL_RUNTIME_EXPORTED_FUNCS_WIN(XX) \
+    XX(jl_setjmp)
+
+// use YY instead of XX to avoid jl -> ijl renaming in libjulia-codegen
+#define JL_CODEGEN_EXPORTED_FUNCS(YY) \
+    YY(jl_dump_function_ir) \
+    YY(jl_dump_method_asm) \
+    YY(jl_extern_c) \
+    YY(jl_get_llvmf_defn) \
+    YY(jl_get_llvm_function) \
+    YY(jl_get_llvm_module) \
+    YY(jl_get_LLVM_VERSION) \
+    YY(jl_dump_native) \
+    YY(jl_get_llvm_gv) \
+    YY(jl_dump_function_asm) \
+    YY(jl_LLVMCreateDisasm) \
+    YY(jl_LLVMDisasmInstruction) \
+    YY(jl_init_codegen) \
+    YY(jl_getFunctionInfo) \
+    YY(jl_register_fptrs) \
+    YY(jl_generate_fptr) \
+    YY(jl_generate_fptr_for_unspecialized) \
+    YY(jl_compile_extern_c) \
+    YY(jl_teardown_codegen) \
+    YY(jl_jit_total_bytes) \
+    YY(jl_lock_profile) \
+    YY(jl_unlock_profile) \
+    YY(jl_create_native) \
+    YY(jl_dump_compiles) \
+    YY(jl_dump_emitted_mi_name) \
+    YY(jl_dump_llvm_opt) \
+    YY(jl_dump_fptr_asm) \
+    YY(jl_get_function_id) \
+    YY(jl_type_to_llvm) \
+    YY(jl_getUnwindInfo) \
+    YY(jl_get_libllvm) \
+    YY(jl_add_optimization_passes) \
+    YY(LLVMExtraAddLowerSimdLoopPass) \
+    YY(LLVMExtraAddFinalLowerGCPass) \
+    YY(LLVMExtraAddPropagateJuliaAddrspaces) \
+    YY(LLVMExtraAddRemoveJuliaAddrspacesPass) \
+    YY(LLVMExtraAddCombineMulAddPass) \
+    YY(LLVMExtraAddMultiVersioningPass) \
+    YY(LLVMExtraAddLowerExcHandlersPass) \
+    YY(LLVMExtraAddLateLowerGCFramePass) \
+    YY(LLVMExtraJuliaLICMPass) \
+    YY(LLVMExtraAddAllocOptPass) \
+    YY(LLVMExtraAddLowerPTLSPass) \
+    YY(LLVMExtraAddRemoveNIPass) \
+    YY(LLVMExtraAddGCInvariantVerifierPass) \
+    YY(LLVMExtraAddDemoteFloat16Pass) \
+    YY(LLVMExtraAddCPUFeaturesPass)
diff --git a/src/jl_uv.c b/src/jl_uv.c
index 378c48090e38e1..ab13056b7601fa 100644
--- a/src/jl_uv.c
+++ b/src/jl_uv.c
@@ -22,11 +22,6 @@
 #include "support/ios.h"
 #include "uv.h"
 
-#if defined(_COMPILER_MICROSOFT_) && !defined(write)
-#include <io.h>
-#define write _write
-#endif
-
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -57,17 +52,17 @@ void jl_init_uv(void)
     JL_MUTEX_INIT(&jl_uv_mutex); // a file-scope initializer can be used instead
 }
 
-int jl_uv_n_waiters = 0;
+_Atomic(int) jl_uv_n_waiters = 0;
 
 void JL_UV_LOCK(void)
 {
     if (jl_mutex_trylock(&jl_uv_mutex)) {
     }
     else {
-        jl_atomic_fetch_add(&jl_uv_n_waiters, 1);
+        jl_atomic_fetch_add_relaxed(&jl_uv_n_waiters, 1);
         jl_wake_libuv();
         JL_LOCK(&jl_uv_mutex);
-        jl_atomic_fetch_add(&jl_uv_n_waiters, -1);
+        jl_atomic_fetch_add_relaxed(&jl_uv_n_waiters, -1);
     }
 }
 
@@ -82,14 +77,16 @@ JL_DLLEXPORT void jl_iolock_end(void)
 }
 
 
-void jl_uv_call_close_callback(jl_value_t *val)
+static void jl_uv_call_close_callback(jl_value_t *val)
 {
-    jl_value_t *args[2];
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2); // val is "rooted" in the finalizer list only right now
     args[0] = jl_get_global(jl_base_relative_to(((jl_datatype_t*)jl_typeof(val))->name->module),
             jl_symbol("_uv_hook_close")); // topmod(typeof(val))._uv_hook_close
     args[1] = val;
     assert(args[0]);
     jl_apply(args, 2); // TODO: wrap in try-catch?
+    JL_GC_POP();
 }
 
 static void jl_uv_closeHandle(uv_handle_t *handle)
@@ -105,11 +102,12 @@ static void jl_uv_closeHandle(uv_handle_t *handle)
         JL_STDERR = (JL_STREAM*)STDERR_FILENO;
     // also let the client app do its own cleanup
     if (handle->type != UV_FILE && handle->data) {
-        jl_ptls_t ptls = jl_get_ptls_states();
-        size_t last_age = ptls->world_age;
-        ptls->world_age = jl_world_counter;
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_uv_call_close_callback((jl_value_t*)handle->data);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
+        return;
     }
     if (handle == (uv_handle_t*)&signal_async)
         return;
@@ -130,6 +128,10 @@ static void jl_uv_flush_close_callback(uv_write_t *req, int status)
         free(req);
         return;
     }
+    if (uv_is_closing((uv_handle_t*)stream)) { // avoid double-close on the stream
+        free(req);
+        return;
+    }
     if (status == 0 && uv_is_writable(stream) && stream->write_queue_size != 0) {
         // new data was written, wait for it to flush too
         uv_buf_t buf;
@@ -137,14 +139,12 @@ static void jl_uv_flush_close_callback(uv_write_t *req, int status)
         buf.len = 0;
         req->data = NULL;
         if (uv_write(req, stream, &buf, 1, (uv_write_cb)jl_uv_flush_close_callback) == 0)
-            return;
-    }
-    if (!uv_is_closing((uv_handle_t*)stream)) { // avoid double-close on the stream
-        if (stream->type == UV_TTY)
-            uv_tty_set_mode((uv_tty_t*)stream, UV_TTY_MODE_NORMAL);
-        uv_close((uv_handle_t*)stream, &jl_uv_closeHandle);
+            return; // success
     }
     free(req);
+    if (stream->type == UV_TTY)
+        uv_tty_set_mode((uv_tty_t*)stream, UV_TTY_MODE_NORMAL);
+    uv_close((uv_handle_t*)stream, &jl_uv_closeHandle);
 }
 
 static void uv_flush_callback(uv_write_t *req, int status)
@@ -201,20 +201,23 @@ JL_DLLEXPORT void jl_uv_req_set_data(uv_req_t *req, void *data) { req->data = da
 JL_DLLEXPORT void *jl_uv_handle_data(uv_handle_t *handle) { return handle->data; }
 JL_DLLEXPORT void *jl_uv_write_handle(uv_write_t *req) { return req->handle; }
 
-extern volatile unsigned _threadedregion;
+extern _Atomic(unsigned) _threadedregion;
 
 JL_DLLEXPORT int jl_process_events(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     uv_loop_t *loop = jl_io_loop;
-    if (loop && (_threadedregion || ptls->tid == 0)) {
-        jl_gc_safepoint_(ptls);
-        if (jl_atomic_load(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
+    jl_gc_safepoint_(ct->ptls);
+    if (loop && (jl_atomic_load_relaxed(&_threadedregion) || jl_atomic_load_relaxed(&ct->tid) == 0)) {
+        if (jl_atomic_load_relaxed(&jl_uv_n_waiters) == 0 && jl_mutex_trylock(&jl_uv_mutex)) {
+            JL_PROBE_RT_START_PROCESS_EVENTS(ct);
             loop->stop_flag = 0;
             int r = uv_run(loop, UV_RUN_NOWAIT);
+            JL_PROBE_RT_FINISH_PROCESS_EVENTS(ct);
             JL_UV_UNLOCK();
             return r;
         }
+        jl_gc_safepoint_(ct->ptls);
     }
     return 0;
 }
@@ -226,15 +229,15 @@ static void jl_proc_exit_cleanup_cb(uv_process_t *process, int64_t exit_status,
 
 JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle)
 {
+    JL_UV_LOCK();
     if (handle->type == UV_PROCESS && ((uv_process_t*)handle)->pid != 0) {
         // take ownership of this handle,
         // so we can waitpid for the resource to exit and avoid leaving zombies
         assert(handle->data == NULL); // make sure Julia has forgotten about it already
         ((uv_process_t*)handle)->exit_cb = jl_proc_exit_cleanup_cb;
-        return;
+        uv_unref(handle);
     }
-    JL_UV_LOCK();
-    if (handle->type == UV_FILE) {
+    else if (handle->type == UV_FILE) {
         uv_fs_t req;
         jl_uv_file_t *fd = (jl_uv_file_t*)handle;
         if ((ssize_t)fd->file != -1) {
@@ -242,31 +245,26 @@ JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle)
             fd->file = (uv_os_fd_t)(ssize_t)-1;
         }
         jl_uv_closeHandle(handle); // synchronous (ok since the callback is known to not interact with any global state)
-        JL_UV_UNLOCK();
-        return;
-    }
-
-    if (handle->type == UV_NAMED_PIPE || handle->type == UV_TCP || handle->type == UV_TTY) {
-        uv_write_t *req = (uv_write_t*)malloc_s(sizeof(uv_write_t));
-        req->handle = (uv_stream_t*)handle;
-        jl_uv_flush_close_callback(req, 0);
-        JL_UV_UNLOCK();
-        return;
     }
-
-    // avoid double-closing the stream
-    if (!uv_is_closing(handle)) {
-        uv_close(handle, &jl_uv_closeHandle);
+    else if (!uv_is_closing(handle)) { // avoid double-closing the stream
+        if (handle->type == UV_NAMED_PIPE || handle->type == UV_TCP || handle->type == UV_TTY) {
+            // flush the stream write-queue first
+            uv_write_t *req = (uv_write_t*)malloc_s(sizeof(uv_write_t));
+            req->handle = (uv_stream_t*)handle;
+            jl_uv_flush_close_callback(req, 0);
+        }
+        else {
+            uv_close(handle, &jl_uv_closeHandle);
+        }
     }
     JL_UV_UNLOCK();
 }
 
 JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle)
 {
-    // avoid double-closing the stream
-    if (!uv_is_closing(handle)) {
+    if (!uv_is_closing(handle)) { // avoid double-closing the stream
         JL_UV_LOCK();
-        if (!uv_is_closing(handle)) {
+        if (!uv_is_closing(handle)) { // double-check
             uv_close(handle, &jl_uv_closeHandle);
         }
         JL_UV_UNLOCK();
@@ -284,12 +282,13 @@ JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle)
     handle->data = NULL;
 }
 
-#define UV_CLOSED 0x02 // UV_HANDLE_CLOSED on Windows (same value)
+#define UV_HANDLE_CLOSED 0x02
 
 JL_DLLEXPORT int jl_spawn(char *name, char **argv,
                           uv_loop_t *loop, uv_process_t *proc,
                           uv_stdio_container_t *stdio, int nstdio,
-                          uint32_t flags, char **env, char *cwd, uv_exit_cb cb)
+                          uint32_t flags, char **env, char *cwd, char* cpumask,
+                          size_t cpumask_size, uv_exit_cb cb)
 {
     uv_process_options_t opts = {0};
     opts.stdio = stdio;
@@ -299,8 +298,8 @@ JL_DLLEXPORT int jl_spawn(char *name, char **argv,
     // unused fields:
     //opts.uid = 0;
     //opts.gid = 0;
-    //opts.cpumask = NULL;
-    //opts.cpumask_size = 0;
+    opts.cpumask = cpumask;
+    opts.cpumask_size = cpumask_size;
     opts.cwd = cwd;
     opts.args = argv;
     opts.stdio_count = nstdio;
@@ -309,7 +308,7 @@ JL_DLLEXPORT int jl_spawn(char *name, char **argv,
         if (!(flags == UV_INHERIT_FD || flags == UV_INHERIT_STREAM || flags == UV_IGNORE)) {
             proc->type = UV_PROCESS;
             proc->loop = loop;
-            proc->flags = UV_CLOSED;
+            proc->flags = UV_HANDLE_CLOSED;
             return UV_EINVAL;
         }
     }
@@ -368,6 +367,14 @@ JL_DLLEXPORT int jl_fs_sendfile(uv_os_fd_t src_fd, uv_os_fd_t dst_fd,
     return ret;
 }
 
+JL_DLLEXPORT int jl_fs_hardlink(char *path, char *new_path)
+{
+    uv_fs_t req;
+    int ret = uv_fs_link(unused_uv_loop_arg, &req, path, new_path, NULL);
+    uv_fs_req_cleanup(&req);
+    return ret;
+}
+
 JL_DLLEXPORT int jl_fs_symlink(char *path, char *new_path, int flags)
 {
     uv_fs_t req;
@@ -403,9 +410,9 @@ JL_DLLEXPORT int jl_fs_access(char *path, int mode)
 JL_DLLEXPORT int jl_fs_write(uv_os_fd_t handle, const char *data, size_t len,
                              int64_t offset) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_get_current_task();
     // TODO: fix this cheating
-    if (ptls->safe_restore || ptls->tid != 0)
+    if (jl_get_safe_restore() || ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0)
 #ifdef _OS_WINDOWS_
         return WriteFile(handle, data, len, NULL, NULL);
 #else
@@ -474,7 +481,7 @@ JL_DLLEXPORT int jl_uv_write(uv_stream_t *stream, const char *data, size_t n,
     return err;
 }
 
-JL_DLLEXPORT void jl_uv_writecb(uv_write_t *req, int status)
+static void jl_uv_writecb(uv_write_t *req, int status) JL_NOTSAFEPOINT
 {
     free(req);
     if (status < 0) {
@@ -504,8 +511,8 @@ JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n)
     }
 
     // TODO: Hack to make CoreIO thread-safer
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->tid != 0) {
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL || jl_atomic_load_relaxed(&ct->tid) != 0) {
         if (stream == JL_STDOUT) {
             fd = UV_STDOUT_FD;
         }
@@ -601,7 +608,7 @@ JL_DLLEXPORT int jl_printf(uv_stream_t *s, const char *format, ...)
     return c;
 }
 
-JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...)
 {
     static char buf[1000];
     buf[0] = '\0';
@@ -633,15 +640,6 @@ JL_DLLEXPORT void jl_exit(int exitcode)
     exit(exitcode);
 }
 
-JL_DLLEXPORT int jl_getpid(void) JL_NOTSAFEPOINT
-{
-#ifdef _OS_WINDOWS_
-    return GetCurrentProcessId();
-#else
-    return getpid();
-#endif
-}
-
 typedef union {
     struct sockaddr in;
     struct sockaddr_in v4;
@@ -861,7 +859,7 @@ JL_DLLEXPORT int jl_tcp_quickack(uv_tcp_t *handle, int on)
 
 JL_DLLEXPORT int jl_has_so_reuseport(void)
 {
-#if defined(SO_REUSEPORT)
+#if defined(SO_REUSEPORT) && !defined(_OS_DARWIN_)
     return 1;
 #else
     return 0;
@@ -965,12 +963,13 @@ JL_DLLEXPORT int jl_tty_set_mode(uv_tty_t *handle, int mode)
     return uv_tty_set_mode(handle, mode_enum);
 }
 
-typedef int (*work_cb_t)(void *, void *);
+typedef int (*work_cb_t)(void *, void *, void *);
 typedef void (*notify_cb_t)(int);
 
 struct work_baton {
     uv_work_t req;
     work_cb_t work_func;
+    void      *ccall_fptr;
     void      *work_args;
     void      *work_retval;
     notify_cb_t notify_func;
@@ -984,7 +983,7 @@ struct work_baton {
 void jl_work_wrapper(uv_work_t *req)
 {
     struct work_baton *baton = (struct work_baton*) req->data;
-    baton->work_func(baton->work_args, baton->work_retval);
+    baton->work_func(baton->ccall_fptr, baton->work_args, baton->work_retval);
 }
 
 void jl_work_notifier(uv_work_t *req, int status)
@@ -994,12 +993,13 @@ void jl_work_notifier(uv_work_t *req, int status)
     free(baton);
 }
 
-JL_DLLEXPORT int jl_queue_work(work_cb_t work_func, void *work_args, void *work_retval,
+JL_DLLEXPORT int jl_queue_work(work_cb_t work_func, void *ccall_fptr, void *work_args, void *work_retval,
                                notify_cb_t notify_func, int notify_idx)
 {
     struct work_baton *baton = (struct work_baton*)malloc_s(sizeof(struct work_baton));
     baton->req.data = (void*) baton;
     baton->work_func = work_func;
+    baton->ccall_fptr = ccall_fptr;
     baton->work_args = work_args;
     baton->work_retval = work_retval;
     baton->notify_func = notify_func;
diff --git a/src/jlapi.c b/src/jlapi.c
index d52489a865d1dd..89c6e90684b691 100644
--- a/src/jlapi.c
+++ b/src/jlapi.c
@@ -22,15 +22,6 @@ extern "C" {
 #include <fenv.h>
 #endif
 
-#if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
-JL_DLLEXPORT char * __cdecl dirname(char *);
-#else
-#include <libgen.h>
-#endif
-#ifndef _OS_WINDOWS_
-#include <dlfcn.h>
-#endif
-
 JL_DLLEXPORT int jl_is_initialized(void)
 {
     return jl_main_module != NULL;
@@ -79,21 +70,9 @@ JL_DLLEXPORT void jl_init(void)
 {
     char *libbindir = NULL;
 #ifdef _OS_WINDOWS_
-    void *hdl = (void*)jl_load_dynamic_library(NULL, JL_RTLD_DEFAULT, 0);
-    if (hdl) {
-        char *to_free = (char*)jl_pathname_for_handle(hdl);
-        if (to_free) {
-            libbindir = strdup(dirname(to_free));
-            free(to_free);
-        }
-    }
+    libbindir = strdup(jl_get_libdir());
 #else
-    Dl_info dlinfo;
-    if (dladdr((void*)jl_init, &dlinfo) != 0 && dlinfo.dli_fname) {
-        char *to_free = strdup(dlinfo.dli_fname);
-        (void)asprintf(&libbindir, "%s" PATHSEPSTRING ".." PATHSEPSTRING "%s", dirname(to_free), "bin");
-        free(to_free);
-    }
+    (void)asprintf(&libbindir, "%s" PATHSEPSTRING ".." PATHSEPSTRING "%s", jl_get_libdir(), "bin");
 #endif
     if (!libbindir) {
         printf("jl_init unable to find libjulia!\n");
@@ -103,20 +82,33 @@ JL_DLLEXPORT void jl_init(void)
     free(libbindir);
 }
 
+// HACK: remove this for Julia 1.8 (see <https://github.com/JuliaLang/julia/issues/40730>)
+JL_DLLEXPORT void jl_init__threading(void)
+{
+    jl_init();
+}
+
+// HACK: remove this for Julia 1.8 (see <https://github.com/JuliaLang/julia/issues/40730>)
+JL_DLLEXPORT void jl_init_with_image__threading(const char *julia_bindir,
+                                     const char *image_relative_path)
+{
+    jl_init_with_image(julia_bindir, image_relative_path);
+}
+
 JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 {
     jl_value_t *r;
     JL_TRY {
         const char filename[] = "none";
         jl_value_t *ast = jl_parse_all(str, strlen(str),
-                filename, strlen(filename));
+                filename, strlen(filename), 1);
         JL_GC_PUSH1(&ast);
         r = jl_toplevel_eval_in(jl_main_module, ast);
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        jl_current_task->ptls->previous_exception = jl_current_exception();
         r = NULL;
     }
     return r;
@@ -124,18 +116,18 @@ JL_DLLEXPORT jl_value_t *jl_eval_string(const char *str)
 
 JL_DLLEXPORT jl_value_t *jl_current_exception(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
 {
-    jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
+    jl_excstack_t *s = jl_current_task->excstack;
     return s && s->top != 0 ? jl_excstack_exception(s, s->top) : jl_nothing;
 }
 
 JL_DLLEXPORT jl_value_t *jl_exception_occurred(void)
 {
-    return jl_get_ptls_states()->previous_exception;
+    return jl_current_task->ptls->previous_exception;
 }
 
 JL_DLLEXPORT void jl_exception_clear(void)
 {
-    jl_get_ptls_states()->previous_exception = NULL;
+    jl_current_task->ptls->previous_exception = NULL;
 }
 
 // get the name of a type as a string
@@ -172,24 +164,26 @@ JL_DLLEXPORT const char *jl_string_ptr(jl_value_t *s)
     return jl_string_data(s);
 }
 
-JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs)
+JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, uint32_t nargs)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
+    nargs++; // add f to args
     JL_TRY {
         jl_value_t **argv;
-        JL_GC_PUSHARGS(argv, nargs+1);
+        JL_GC_PUSHARGS(argv, nargs);
         argv[0] = (jl_value_t*)f;
-        for(int i=1; i<nargs+1; i++)
-            argv[i] = args[i-1];
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
-        v = jl_apply(argv, nargs+1);
-        jl_get_ptls_states()->world_age = last_age;
+        for (int i = 1; i < nargs; i++)
+            argv[i] = args[i - 1];
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
+        v = jl_apply(argv, nargs);
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -198,17 +192,18 @@ JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t na
 JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         JL_GC_PUSH1(&f);
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
-        v = jl_apply(&f, 1);
-        jl_get_ptls_states()->world_age = last_age;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
+        v = jl_apply_generic(f, NULL, 0);
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -217,19 +212,21 @@ JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f)
 JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 2);
-        argv[0] = f; argv[1] = a;
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        argv[0] = f;
+        argv[1] = a;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 2);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -238,19 +235,22 @@ JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a)
 JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b)
 {
     jl_value_t *v;
+    jl_task_t *ct = jl_current_task;
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 3);
-        argv[0] = f; argv[1] = a; argv[2] = b;
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        argv[0] = f;
+        argv[1] = a;
+        argv[2] = b;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 3);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        ct->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -263,16 +263,20 @@ JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
     JL_TRY {
         jl_value_t **argv;
         JL_GC_PUSHARGS(argv, 4);
-        argv[0] = f; argv[1] = a; argv[2] = b; argv[3] = c;
-        size_t last_age = jl_get_ptls_states()->world_age;
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
+        argv[0] = f;
+        argv[1] = a;
+        argv[2] = b;
+        argv[3] = c;
+        jl_task_t *ct = jl_current_task;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_get_world_counter();
         v = jl_apply(argv, 4);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
         JL_GC_POP();
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        jl_current_task->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -297,7 +301,7 @@ JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld)
         jl_exception_clear();
     }
     JL_CATCH {
-        jl_get_ptls_states()->previous_exception = jl_current_exception();
+        jl_current_task->ptls->previous_exception = jl_current_exception();
         v = NULL;
     }
     return v;
@@ -310,8 +314,8 @@ JL_DLLEXPORT void jl_sigatomic_begin(void)
 
 JL_DLLEXPORT void jl_sigatomic_end(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->defer_signal == 0)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->defer_signal == 0)
         jl_error("sigatomic_end called in non-sigatomic region");
     JL_SIGATOMIC_END();
 }
@@ -398,7 +402,7 @@ JL_DLLEXPORT const char *jl_git_commit(void)
     return commit;
 }
 
-// Create function versions of some useful macros
+// Create function versions of some useful macros for GDB or FFI use
 JL_DLLEXPORT jl_taggedvalue_t *(jl_astaggedvalue)(jl_value_t *v)
 {
     return jl_astaggedvalue(v);
@@ -419,37 +423,42 @@ JL_DLLEXPORT jl_value_t *(jl_get_fieldtypes)(jl_value_t *v)
     return (jl_value_t*)jl_get_fieldtypes((jl_datatype_t*)v);
 }
 
+JL_DLLEXPORT int ijl_egal(jl_value_t *a, jl_value_t *b)
+{
+    return jl_egal(a, b);
+}
+
 
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
 JL_DLLEXPORT int8_t (jl_gc_unsafe_enter)(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return jl_gc_unsafe_enter(ptls);
+    jl_task_t *ct = jl_current_task;
+    return jl_gc_unsafe_enter(ct->ptls);
 }
 
 JL_DLLEXPORT void (jl_gc_unsafe_leave)(int8_t state)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_gc_unsafe_leave(ptls, state);
+    jl_task_t *ct = jl_current_task;
+    jl_gc_unsafe_leave(ct->ptls, state);
 }
 
 JL_DLLEXPORT int8_t (jl_gc_safe_enter)(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return jl_gc_safe_enter(ptls);
+    jl_task_t *ct = jl_current_task;
+    return jl_gc_safe_enter(ct->ptls);
 }
 
 JL_DLLEXPORT void (jl_gc_safe_leave)(int8_t state)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_gc_safe_leave(ptls, state);
+    jl_task_t *ct = jl_current_task;
+    jl_gc_safe_leave(ct->ptls, state);
 }
 #endif
 
-JL_DLLEXPORT void (jl_gc_safepoint)(void)
+JL_DLLEXPORT void jl_gc_safepoint(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_gc_safepoint_(ptls);
+    jl_task_t *ct = jl_current_task;
+    jl_gc_safepoint_(ct->ptls);
 }
 
 JL_DLLEXPORT void (jl_cpu_pause)(void)
@@ -462,6 +471,28 @@ JL_DLLEXPORT void (jl_cpu_wake)(void)
     jl_cpu_wake();
 }
 
+JL_DLLEXPORT void jl_cumulative_compile_timing_enable(void)
+{
+    // Increment the flag to allow reentrant callers to `@time`.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, 1);
+}
+
+JL_DLLEXPORT void jl_cumulative_compile_timing_disable(void)
+{
+    // Decrement the flag when done measuring, allowing other callers to continue measuring.
+    jl_atomic_fetch_add(&jl_measure_compile_time_enabled, -1);
+}
+
+JL_DLLEXPORT uint64_t jl_cumulative_compile_time_ns(void)
+{
+    return jl_atomic_load_relaxed(&jl_cumulative_compile_time);
+}
+
+JL_DLLEXPORT uint64_t jl_cumulative_recompile_time_ns(void)
+{
+    return jl_atomic_load_relaxed(&jl_cumulative_recompile_time);
+}
+
 JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
 {
     ret[0] = FE_INEXACT;
@@ -475,6 +506,212 @@ JL_DLLEXPORT void jl_get_fenv_consts(int *ret)
     ret[8] = FE_TOWARDZERO;
 }
 
+// TODO: Windows binaries currently load msvcrt which doesn't have these C99 functions.
+//       the mingw compiler ships additional definitions, but only for use in C code.
+//       remove this when we switch to ucrt, make the version in openlibm portable,
+//       or figure out how to reexport the defs from libmingwex (see JuliaLang/julia#38466).
+JL_DLLEXPORT int jl_get_fenv_rounding(void)
+{
+    return fegetround();
+}
+JL_DLLEXPORT int jl_set_fenv_rounding(int i)
+{
+    return fesetround(i);
+}
+
+static int exec_program(char *program)
+{
+    JL_TRY {
+        jl_load(jl_main_module, program);
+    }
+    JL_CATCH {
+        // TODO: It is possible for this output to be mangled due to `jl_print_backtrace`
+        //       printing directly to STDERR_FILENO.
+        int shown_err = 0;
+        jl_printf(JL_STDERR, "error during bootstrap:\n");
+        jl_value_t *exc = jl_current_exception();
+        jl_value_t *showf = jl_base_module ? jl_get_function(jl_base_module, "show") : NULL;
+        if (showf) {
+            jl_value_t *errs = jl_stderr_obj();
+            if (errs) {
+                if (jl_call2(showf, errs, exc)) {
+                    jl_printf(JL_STDERR, "\n");
+                    shown_err = 1;
+                }
+            }
+        }
+        if (!shown_err) {
+            jl_static_show((JL_STREAM*)STDERR_FILENO, exc);
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        }
+        jl_print_backtrace(); // written to STDERR_FILENO
+        jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+        return 1;
+    }
+    return 0;
+}
+
+static NOINLINE int true_main(int argc, char *argv[])
+{
+    jl_set_ARGS(argc, argv);
+
+    jl_function_t *start_client = jl_base_module ?
+        (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
+
+    if (start_client) {
+        JL_TRY {
+            jl_task_t *ct = jl_current_task;
+            size_t last_age = ct->world_age;
+            ct->world_age = jl_get_world_counter();
+            jl_apply(&start_client, 1);
+            ct->world_age = last_age;
+        }
+        JL_CATCH {
+            jl_no_exc_handler(jl_current_exception());
+        }
+        return 0;
+    }
+
+    // run program if specified, otherwise enter REPL
+    if (argc > 0) {
+        if (strcmp(argv[0], "-")) {
+            return exec_program(argv[0]);
+        }
+    }
+
+    jl_printf(JL_STDOUT, "WARNING: Base._start not defined, falling back to economy mode repl.\n");
+    if (!jl_errorexception_type)
+        jl_printf(JL_STDOUT, "WARNING: jl_errorexception_type not defined; any errors will be fatal.\n");
+
+    while (!ios_eof(ios_stdin)) {
+        char *volatile line = NULL;
+        JL_TRY {
+            ios_puts("\njulia> ", ios_stdout);
+            ios_flush(ios_stdout);
+            line = ios_readline(ios_stdin);
+            jl_value_t *val = (jl_value_t*)jl_eval_string(line);
+            JL_GC_PUSH1(&val);
+            if (jl_exception_occurred()) {
+                jl_printf(JL_STDERR, "error during run:\n");
+                jl_static_show(JL_STDERR, jl_exception_occurred());
+                jl_exception_clear();
+            }
+            else if (val) {
+                jl_static_show(JL_STDOUT, val);
+            }
+            JL_GC_POP();
+            jl_printf(JL_STDOUT, "\n");
+            free(line);
+            line = NULL;
+            jl_process_events();
+        }
+        JL_CATCH {
+            if (line) {
+                free(line);
+                line = NULL;
+            }
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\nparser error:\n");
+            jl_static_show((JL_STREAM*)STDERR_FILENO, jl_current_exception());
+            jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+            jl_print_backtrace(); // written to STDERR_FILENO
+        }
+    }
+    return 0;
+}
+
+static void lock_low32(void)
+{
+#if defined(_OS_WINDOWS_) && defined(_P64) && defined(JL_DEBUG_BUILD)
+    // Prevent usage of the 32-bit address space on Win64, to catch pointer cast errors.
+    char *const max32addr = (char*)0xffffffffL;
+    SYSTEM_INFO info;
+    MEMORY_BASIC_INFORMATION meminfo;
+    GetNativeSystemInfo(&info);
+    memset(&meminfo, 0, sizeof(meminfo));
+    meminfo.BaseAddress = info.lpMinimumApplicationAddress;
+    while ((char*)meminfo.BaseAddress < max32addr) {
+        size_t nbytes = VirtualQuery(meminfo.BaseAddress, &meminfo, sizeof(meminfo));
+        assert(nbytes == sizeof(meminfo));
+        if (meminfo.State == MEM_FREE) { // reserve all free pages in the first 4GB of memory
+            char *first = (char*)meminfo.BaseAddress;
+            char *last = first + meminfo.RegionSize;
+            if (last > max32addr)
+                last = max32addr;
+            // adjust first up to the first allocation granularity boundary
+            // adjust last down to the last allocation granularity boundary
+            first = (char*)(((long long)first + info.dwAllocationGranularity - 1) & ~(info.dwAllocationGranularity - 1));
+            last = (char*)((long long)last & ~(info.dwAllocationGranularity - 1));
+            if (last != first) {
+                void *p = VirtualAlloc(first, last - first, MEM_RESERVE, PAGE_NOACCESS); // reserve all memory in between
+                if ((char*)p != first)
+                    // Wine and Windows10 seem to have issues with reporting memory access information correctly
+                    // so we sometimes end up with unexpected results - this is just ignore those and continue
+                    // this is just a debugging aid to help find accidental pointer truncation anyways,
+                    // so it is not critical
+                    VirtualFree(p, 0, MEM_RELEASE);
+            }
+        }
+        meminfo.BaseAddress = (void*)((char*)meminfo.BaseAddress + meminfo.RegionSize);
+    }
+#endif
+    return;
+}
+
+// Actual definition in `ast.c`
+void jl_lisp_prompt(void);
+
+#ifdef _OS_LINUX_
+static void rr_detach_teleport(void) {
+#define RR_CALL_BASE 1000
+#define SYS_rrcall_detach_teleport (RR_CALL_BASE + 9)
+    int err = syscall(SYS_rrcall_detach_teleport, 0, 0, 0, 0, 0, 0);
+    if (err < 0 || jl_running_under_rr(1)) {
+        jl_error("Failed to detach from rr session");
+    }
+}
+#endif
+
+JL_DLLEXPORT int jl_repl_entrypoint(int argc, char *argv[])
+{
+    // no-op on Windows, note that the caller must have already converted
+    // from `wchar_t` to `UTF-8` already if we're running on Windows.
+    uv_setup_args(argc, argv);
+
+    // No-op on non-windows
+    lock_low32();
+
+    libsupport_init();
+    int lisp_prompt = (argc >= 2 && strcmp((char*)argv[1],"--lisp") == 0);
+    if (lisp_prompt) {
+        memmove(&argv[1], &argv[2], (argc-2)*sizeof(void*));
+        argc--;
+    }
+    char **new_argv = argv;
+    jl_parse_opts(&argc, (char***)&new_argv);
+
+    // The parent process requested that we detach from the rr session.
+    // N.B.: In a perfect world, we would only do this for the portion of
+    // the execution where we actually need to exclude rr (e.g. because we're
+    // testing for the absence of a memory-model-dependent bug).
+    if (jl_options.rr_detach && jl_running_under_rr(0)) {
+#ifdef _OS_LINUX_
+        rr_detach_teleport();
+        execv("/proc/self/exe", argv);
+#endif
+        jl_error("Failed to self-execute");
+    }
+
+    julia_init(jl_options.image_file_specified ? JL_IMAGE_CWD : JL_IMAGE_JULIA_HOME);
+    if (lisp_prompt) {
+        jl_current_task->world_age = jl_get_world_counter();
+        jl_lisp_prompt();
+        return 0;
+    }
+    int ret = true_main(argc, (char**)new_argv);
+    jl_atexit_hook(ret);
+    return ret;
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/jlfrontend.scm b/src/jlfrontend.scm
index 39a3adc3ee4677..f72c79f2814808 100644
--- a/src/jlfrontend.scm
+++ b/src/jlfrontend.scm
@@ -37,9 +37,10 @@
 ;; parser entry points
 
 ;; parse one expression (if greedy) or atom, returning end position
-(define (jl-parse-one str filename pos0 greedy)
+(define (jl-parse-one str filename pos0 greedy (lineno 1))
   (let ((inp (open-input-string str)))
     (io.seek inp pos0)
+    (io.set-lineno! inp lineno)
     (with-bindings ((current-filename (symbol filename)))
      (let ((expr (error-wrap (lambda ()
                                (if greedy
@@ -78,13 +79,17 @@
    (io.close io)))
 
 ;; parse all expressions in a string, the same way files are parsed
-(define (jl-parse-all str filename)
-  (parse-all- (open-input-string str) filename))
+(define (jl-parse-all str filename (lineno 1))
+  (let ((io (open-input-string str)))
+    (io.set-lineno! io lineno)
+    (parse-all- io filename)))
 
-(define (jl-parse-file filename)
+(define (jl-parse-file filename (lineno 1))
   (trycatch
-   (parse-all- (open-input-file filename) filename)
-   (lambda (e) #f)))
+    (let ((io (open-input-string str)))
+      (io.set-lineno! io lineno)
+      (parse-all- io filename))
+    (lambda (e) #f)))
 
 ;; lowering entry points
 
@@ -153,13 +158,14 @@
 (define (jl-expand-to-thunk-warn expr file line stmt)
   (let ((warnings '()))
     (with-bindings
-     ((lowering-warning (lambda lst (set! warnings (cons lst warnings)))))
-     (begin0
-      (if stmt
-          (expand-to-thunk-stmt- expr file line)
-          (expand-to-thunk- expr file line))
-      (for-each (lambda (args) (apply julia-logmsg args))
-                (reverse warnings))))))
+     ;; Abuse scm_to_julia here to convert arguments to warn. This is meant for
+     ;; `Expr`s but should be good enough provided we're only passing simple
+     ;; numbers, symbols and strings.
+     ((lowering-warning (lambda lst (set! warnings (cons (cons 'warn lst) warnings)))))
+     (let ((thunk (if stmt
+                      (expand-to-thunk-stmt- expr file line)
+                      (expand-to-thunk- expr file line))))
+       (if (pair? warnings) `(warn ,@(reverse warnings) ,thunk) thunk)))))
 
 (define (jl-expand-to-thunk expr file line)
   (expand-to-thunk- expr file line))
@@ -191,11 +197,11 @@
        (= (call include ,x)
           (block
            ,@loc
-           (call (core _apply_latest) (top include) (call (core svec) ,name ,x))))
+           (call (core _call_latest) (top include) ,name ,x)))
        (= (call include (:: ,mex (top Function)) ,x)
           (block
            ,@loc
-           (call (core _apply_latest) (top include) (call (core svec) ,mex ,name ,x))))))
+           (call (core _call_latest) (top include) ,mex ,name ,x)))))
    'none 0))
 
 ; run whole frontend on a string. useful for testing.
@@ -214,16 +220,6 @@
 ; Utilities for logging messages from the frontend, in a way which can be
 ; controlled from julia code.
 
-; Log a general deprecation message at line node location `lno`
-(define (deprecation-message msg lno)
-  (let* ((lf (extract-line-file lno)) (line (car lf)) (file (cadr lf)))
-    (frontend-depwarn msg file line)))
-
-; Log a syntax deprecation from line node location `lno`
-(define (syntax-deprecation what instead lno)
-  (let* ((lf (extract-line-file lno)) (line (car lf)) (file (cadr lf)))
-    (deprecation-message (format-syntax-deprecation what instead file line #f) lno)))
-
 ; Extract line and file from a line number node, defaulting to (0, none)
 ; respectively if lno is absent (`#f`) or doesn't contain a file
 (define (extract-line-file lno)
@@ -241,21 +237,4 @@
       ""
       (string (if exactloc " at " " around ") file ":" line)))
 
-(define (format-syntax-deprecation what instead file line exactloc)
-  (string "Deprecated syntax `" what "`"
-          (format-file-line file line exactloc)
-          "."
-          (if (equal? instead "") ""
-              (string #\newline "Use `" instead "` instead."))))
-
 (define *scopewarn-opt* 1)
-
-; Corresponds to --depwarn 0="no", 1="yes", 2="error"
-(define *depwarn-opt* 1)
-
-; Emit deprecation warning via julia logging layer.
-(define (frontend-depwarn msg file line)
-  ; (display (string msg "; file = " file "; line = " line #\newline)))
-  (case *depwarn-opt*
-    (1 (julia-logmsg 1000 'depwarn (symbol (string file line)) file line msg))
-    (2 (error msg))))
diff --git a/src/jloptions.c b/src/jloptions.c
index da8c7a8560973b..ff7896e55fa55a 100644
--- a/src/jloptions.c
+++ b/src/jloptions.c
@@ -4,13 +4,10 @@
 #include <errno.h>
 
 #include "julia.h"
+#include "julia_internal.h"
 
-#ifndef _MSC_VER
 #include <unistd.h>
 #include <getopt.h>
-#else
-#include "getopt.h"
-#endif
 #include "julia_assert.h"
 
 #ifdef _OS_WINDOWS_
@@ -27,144 +24,182 @@ JL_DLLEXPORT const char *jl_get_default_sysimg_path(void)
     return &system_image_path[1];
 }
 
+static int jl_options_initialized = 0;
 
-jl_options_t jl_options = { 0,    // quiet
-                            -1,   // banner
-                            NULL, // julia_bindir
-                            NULL, // julia_bin
-                            NULL, // cmds
-                            NULL, // image_file (will be filled in below)
-                            NULL, // cpu_target ("native", "core2", etc...)
-                            0,    // nthreads
-                            0,    // nprocs
-                            NULL, // machine_file
-                            NULL, // project
-                            0,    // isinteractive
-                            0,    // color
-                            JL_OPTIONS_HISTORYFILE_ON, // history file
-                            0,    // startup file
-                            JL_OPTIONS_COMPILE_DEFAULT, // compile_enabled
-                            0,    // code_coverage
-                            0,    // malloc_log
-                            2,    // opt_level
+JL_DLLEXPORT void jl_init_options(void)
+{
+    if (jl_options_initialized)
+        return;
+    jl_options =
+        (jl_options_t){ 0,    // quiet
+                        -1,   // banner
+                        NULL, // julia_bindir
+                        NULL, // julia_bin
+                        NULL, // cmds
+                        NULL, // image_file (will be filled in below)
+                        NULL, // cpu_target ("native", "core2", etc...)
+                        0,    // nthreadpools
+                        0,    // nthreads
+                        NULL, // nthreads_per_pool
+                        0,    // nprocs
+                        NULL, // machine_file
+                        NULL, // project
+                        0,    // isinteractive
+                        0,    // color
+                        JL_OPTIONS_HISTORYFILE_ON, // history file
+                        0,    // startup file
+                        JL_OPTIONS_COMPILE_DEFAULT, // compile_enabled
+                        0,    // code_coverage
+                        0,    // malloc_log
+                        NULL, // tracked_path
+                        2,    // opt_level
+                        0,    // opt_level_min
 #ifdef JL_DEBUG_BUILD
-                            2,    // debug_level [debug build]
+                        2,    // debug_level [debug build]
 #else
-                            1,    // debug_level [release build]
+                        1,    // debug_level [release build]
 #endif
-                            JL_OPTIONS_CHECK_BOUNDS_DEFAULT, // check_bounds
-                            JL_OPTIONS_DEPWARN_OFF,    // deprecation warning
-                            0,    // method overwrite warning
-                            1,    // can_inline
-                            JL_OPTIONS_POLLY_ON, // polly
-                            NULL, // trace_compile
-                            JL_OPTIONS_FAST_MATH_DEFAULT,
-                            0,    // worker
-                            NULL, // cookie
-                            JL_OPTIONS_HANDLE_SIGNALS_ON,
-                            JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
-                            JL_OPTIONS_USE_COMPILED_MODULES_YES,
-                            NULL, // bind-to
-                            NULL, // output-bc
-                            NULL, // output-unopt-bc
-                            NULL, // output-o
-                            NULL, // output-asm
-                            NULL, // output-ji
-                            NULL,    // output-code_coverage
-                            0, // incremental
-                            0, // image_file_specified
-                            JL_OPTIONS_WARN_SCOPE_ON,  // ambiguous scope warning
-                            0, // image-codegen
-};
+                        JL_OPTIONS_CHECK_BOUNDS_DEFAULT, // check_bounds
+                        JL_OPTIONS_DEPWARN_OFF,    // deprecation warning
+                        0,    // method overwrite warning
+                        1,    // can_inline
+                        JL_OPTIONS_POLLY_ON, // polly
+                        NULL, // trace_compile
+                        JL_OPTIONS_FAST_MATH_DEFAULT,
+                        0,    // worker
+                        NULL, // cookie
+                        JL_OPTIONS_HANDLE_SIGNALS_ON,
+                        JL_OPTIONS_USE_SYSIMAGE_NATIVE_CODE_YES,
+                        JL_OPTIONS_USE_COMPILED_MODULES_YES,
+                        NULL, // bind-to
+                        NULL, // output-bc
+                        NULL, // output-unopt-bc
+                        NULL, // output-o
+                        NULL, // output-asm
+                        NULL, // output-ji
+                        NULL, // output-code_coverage
+                        0, // incremental
+                        0, // image_file_specified
+                        JL_OPTIONS_WARN_SCOPE_ON,  // ambiguous scope warning
+                        0, // image-codegen
+                        0, // rr-detach
+                        0, // strip-metadata
+                        0, // strip-ir
+    };
+    jl_options_initialized = 1;
+}
 
-static const char usage[] = "julia [switches] -- [programfile] [args...]\n";
+static const char usage[] = "\n    julia [switches] -- [programfile] [args...]\n\n";
 static const char opts[]  =
-    " -v, --version             Display version information\n"
-    " -h, --help                Print this message (--help-hidden for more)\n"
-    " --help-hidden             Uncommon options not shown by `-h`\n\n"
+    "Switches (a '*' marks the default value, if applicable):\n\n"
+    " -v, --version              Display version information\n"
+    " -h, --help                 Print this message (--help-hidden for more)\n"
+    " --help-hidden              Uncommon options not shown by `-h`\n\n"
 
     // startup options
-    " --project[={<dir>|@.}]    Set <dir> as the home project/environment\n"
-    " -J, --sysimage <file>     Start up with the given system image file\n"
-    " -H, --home <dir>          Set location of `julia` executable\n"
-    " --startup-file={yes|no}   Load `~/.julia/config/startup.jl`\n"
-    " --handle-signals={yes|no} Enable or disable Julia's default signal handlers\n"
-    " --sysimage-native-code={yes|no}\n"
-    "                           Use native code from system image if available\n"
-    " --compiled-modules={yes|no}\n"
-    "                           Enable or disable incremental precompilation of modules\n\n"
+    " --project[={<dir>|@.}]     Set <dir> as the home project/environment\n"
+    " -J, --sysimage <file>      Start up with the given system image file\n"
+    " -H, --home <dir>           Set location of `julia` executable\n"
+    " --startup-file={yes*|no}   Load `JULIA_DEPOT_PATH/config/startup.jl`; if `JULIA_DEPOT_PATH`\n"
+    "                            environment variable is unset, load `~/.julia/config/startup.jl`\n"
+    " --handle-signals={yes*|no} Enable or disable Julia's default signal handlers\n"
+    " --sysimage-native-code={yes*|no}\n"
+    "                            Use native code from system image if available\n"
+    " --compiled-modules={yes*|no}\n"
+    "                            Enable or disable incremental precompilation of modules\n\n"
 
     // actions
-    " -e, --eval <expr>         Evaluate <expr>\n"
-    " -E, --print <expr>        Evaluate <expr> and display the result\n"
-    " -L, --load <file>         Load <file> immediately on all processors\n\n"
+    " -e, --eval <expr>          Evaluate <expr>\n"
+    " -E, --print <expr>         Evaluate <expr> and display the result\n"
+    " -L, --load <file>          Load <file> immediately on all processors\n\n"
 
     // parallel options
-    " -t, --threads {N|auto}    Enable N threads; \"auto\" currently sets N to the number of local\n"
-    "                           CPU threads but this might change in the future\n"
+    " -t, --threads {auto|N[,auto|M]}\n"
+    "                           Enable N[+M] threads; N threads are assigned to the `default`\n"
+    "                           threadpool, and if M is specified, M threads are assigned to the\n"
+    "                           `interactive` threadpool; \"auto\" tries to infer a useful\n"
+    "                           default number of threads to use but the exact behavior might change\n"
+    "                           in the future. Currently sets N to the number of CPUs assigned to\n"
+    "                           this Julia process based on the OS-specific affinity assignment\n"
+    "                           interface if supported (Linux and Windows) or to the number of CPU\n"
+    "                           threads if not supported (MacOS) or if process affinity is not\n"
+    "                           configured, and sets M to 1.\n"
     " -p, --procs {N|auto}      Integer value N launches N additional local worker processes\n"
     "                           \"auto\" launches as many workers as the number of local CPU threads (logical cores)\n"
     " --machine-file <file>     Run processes on hosts listed in <file>\n\n"
 
     // interactive options
-    " -i                        Interactive mode; REPL runs and isinteractive() is true\n"
-    " -q, --quiet               Quiet startup: no banner, suppress REPL warnings\n"
-    " --banner={yes|no|auto}    Enable or disable startup banner\n"
-    " --color={yes|no|auto}     Enable or disable color text\n"
-    " --history-file={yes|no}   Load or save history\n\n"
+    " -i                         Interactive mode; REPL runs and `isinteractive()` is true\n"
+    " -q, --quiet                Quiet startup: no banner, suppress REPL warnings\n"
+    " --banner={yes|no|auto*}    Enable or disable startup banner\n"
+    " --color={yes|no|auto*}     Enable or disable color text\n"
+    " --history-file={yes*|no}   Load or save history\n\n"
 
     // error and warning options
-    " --depwarn={yes|no|error}  Enable or disable syntax and method deprecation warnings (\"error\" turns warnings into errors)\n"
-    " --warn-overwrite={yes|no} Enable or disable method overwrite warnings\n"
-    " --warn-scope={yes|no}     Enable or disable warning for ambiguous top-level scope\n\n"
+    " --depwarn={yes|no*|error}  Enable or disable syntax and method deprecation warnings (`error` turns warnings into errors)\n"
+    " --warn-overwrite={yes|no*} Enable or disable method overwrite warnings\n"
+    " --warn-scope={yes*|no}     Enable or disable warning for ambiguous top-level scope\n\n"
 
     // code generation options
-    " -C, --cpu-target <target> Limit usage of CPU features up to <target>; set to \"help\" to see the available options\n"
-    " -O, --optimize={0,1,2,3}  Set the optimization level (default level is 2 if unspecified or 3 if used without a level)\n"
-    " -g, -g <level>            Enable / Set the level of debug info generation"
+    " -C, --cpu-target <target>  Limit usage of CPU features up to <target>; set to `help` to see the available options\n"
+    " -O, --optimize={0,1,2*,3}  Set the optimization level (level 3 if `-O` is used without a level)\n"
+    " --min-optlevel={0*,1,2,3}  Set a lower bound on the optimization level\n"
 #ifdef JL_DEBUG_BUILD
-        " (default level for julia-debug is 2 if unspecified or if used without a level)\n"
+        " -g [{0,1,2*}]              Set the level of debug info generation in the julia-debug build\n"
 #else
-        " (default level is 1 if unspecified or 2 if used without a level)\n"
+        " -g [{0,1*,2}]              Set the level of debug info generation (level 2 if `-g` is used without a level)\n"
 #endif
-    " --inline={yes|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
-    " --check-bounds={yes|no}   Emit bounds checks always or never (ignoring declarations)\n"
+    " --inline={yes*|no}         Control whether inlining is permitted, including overriding @inline declarations\n"
+    " --check-bounds={yes|no|auto*}\n"
+    "                            Emit bounds checks always, never, or respect @inbounds declarations\n"
 #ifdef USE_POLLY
-    " --polly={yes|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
+    " --polly={yes*|no}          Enable or disable the polyhedral optimizer Polly (overrides @polly declaration)\n"
 #endif
-    " --math-mode={ieee,fast}   Disallow or enable unsafe floating point optimizations (overrides @fastmath declaration)\n\n"
 
     // instrumentation options
-    " --code-coverage={none|user|all}, --code-coverage\n"
-    "                           Count executions of source lines (omitting setting is equivalent to \"user\")\n"
+    " --code-coverage[={none*|user|all}]\n"
+    "                            Count executions of source lines (omitting setting is equivalent to `user`)\n"
+    " --code-coverage=@<path>\n"
+    "                            Count executions but only in files that fall under the given file path/directory.\n"
+    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
+    "                            current directory.\n"
+
     " --code-coverage=tracefile.info\n"
-    "                           Append coverage information to the LCOV tracefile (filename supports format tokens).\n"
+    "                            Append coverage information to the LCOV tracefile (filename supports format tokens)\n"
 // TODO: These TOKENS are defined in `runtime_ccall.cpp`. A more verbose `--help` should include that list here.
-    " --track-allocation={none|user|all}, --track-allocation\n"
-    "                           Count bytes allocated by each source line (omitting setting is equivalent to \"user\")\n"
-    " --bug-report=KIND         Launch a bug report session. It can be used to start a REPL, run a script, or evaluate\n"
-    "                           expressions. It first tries to use BugReporting.jl installed in current environment and\n"
-    "                           fallbacks to the latest compatible BugReporting.jl if not. For more information, see\n"
-    "                           --bug-report=help.\n\n"
+    " --track-allocation[={none*|user|all}]\n"
+    "                            Count bytes allocated by each source line (omitting setting is equivalent to `user`)\n"
+    " --track-allocation=@<path>\n"
+    "                            Count bytes but only in files that fall under the given file path/directory.\n"
+    "                            The `@` prefix is required to select this option. A `@` with no path will track the\n"
+    "                            current directory.\n"
+    " --bug-report=KIND          Launch a bug report session. It can be used to start a REPL, run a script, or evaluate\n"
+    "                            expressions. It first tries to use BugReporting.jl installed in current environment and\n"
+    "                            fallbacks to the latest compatible BugReporting.jl if not. For more information, see\n"
+    "                            --bug-report=help.\n\n"
 ;
 
 static const char opts_hidden[]  =
+    "Switches (a '*' marks the default value, if applicable):\n\n"
     // code generation options
-    " --compile={yes|no|all|min}Enable or disable JIT compiler, or request exhaustive or minimal compilation\n"
+    " --compile={yes*|no|all|min}\n"
+    "                          Enable or disable JIT compiler, or request exhaustive or minimal compilation\n\n"
 
     // compiler output options
-    " --output-o name           Generate an object file (including system image data)\n"
-    " --output-ji name          Generate a system image data file (.ji)\n"
+    " --output-o <name>        Generate an object file (including system image data)\n"
+    " --output-ji <name>       Generate a system image data file (.ji)\n"
+    " --strip-metadata         Remove docstrings and source location info from system image\n"
+    " --strip-ir               Remove IR (intermediate representation) of compiled functions\n\n"
 
     // compiler debugging (see the devdocs for tips on using these options)
-    " --output-unopt-bc name    Generate unoptimized LLVM bitcode (.bc)\n"
-    " --output-jit-bc name      Dump all IR generated by the frontend (not including system image)\n"
-    " --output-bc name          Generate LLVM bitcode (.bc)\n"
-    " --output-asm name         Generate an assembly file (.s)\n"
-    " --output-incremental=no   Generate an incremental output file (rather than complete)\n"
+    " --output-unopt-bc <name> Generate unoptimized LLVM bitcode (.bc)\n"
+    " --output-bc <name>       Generate LLVM bitcode (.bc)\n"
+    " --output-asm <name>      Generate an assembly file (.s)\n"
+    " --output-incremental={yes|no*}\n"
+    "                          Generate an incremental output file (rather than complete)\n"
     " --trace-compile={stderr,name}\n"
-    "                           Print precompile statements for methods compiled during execution or save to a path\n\n"
-    " --image-codegen           Force generate code in imaging mode\n"
+    "                          Print precompile statements for methods compiled during execution or save to a path\n"
+    " --image-codegen          Force generate code in imaging mode\n"
 ;
 
 JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
@@ -189,6 +224,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_worker,
            opt_bind_to,
            opt_handle_signals,
+           opt_optlevel_min,
            opt_output_o,
            opt_output_asm,
            opt_output_ji,
@@ -203,12 +239,15 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
            opt_project,
            opt_bug_report,
            opt_image_codegen,
+           opt_rr_detach,
+           opt_strip_metadata,
+           opt_strip_ir,
     };
     static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:";
     static const struct option longopts[] = {
         // exposed command line options
         // NOTE: This set of required arguments need to be kept in sync
-        // with the required arguments defined in base/client.jl `process_options()`
+        // with the required arguments defined in base/options.jl `struct JLOptions`
         { "version",         no_argument,       0, 'v' },
         { "help",            no_argument,       0, 'h' },
         { "help-hidden",     no_argument,       0, opt_help_hidden },
@@ -221,7 +260,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "bug-report",      required_argument, 0, opt_bug_report },
         { "sysimage",        required_argument, 0, 'J' },
         { "sysimage-native-code", required_argument, 0, opt_sysimage_native_code },
-        { "compiled-modules",    required_argument, 0, opt_compiled_modules },
+        { "compiled-modules",required_argument, 0, opt_compiled_modules },
         { "cpu-target",      required_argument, 0, 'C' },
         { "procs",           required_argument, 0, 'p' },
         { "threads",         required_argument, 0, 't' },
@@ -234,6 +273,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "code-coverage",   optional_argument, 0, opt_code_coverage },
         { "track-allocation",optional_argument, 0, opt_track_allocation },
         { "optimize",        optional_argument, 0, 'O' },
+        { "min-optlevel",    optional_argument, 0, opt_optlevel_min },
         { "check-bounds",    required_argument, 0, opt_check_bounds },
         { "output-bc",       required_argument, 0, opt_output_bc },
         { "output-unopt-bc", required_argument, 0, opt_output_unopt_bc },
@@ -254,6 +294,9 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         { "bind-to",         required_argument, 0, opt_bind_to },
         { "lisp",            no_argument,       0, 1 },
         { "image-codegen",   no_argument,       0, opt_image_codegen },
+        { "rr-detach",       no_argument,       0, opt_rr_detach },
+        { "strip-metadata",  no_argument,       0, opt_strip_metadata },
+        { "strip-ir",        no_argument,       0, opt_strip_ir },
         { 0, 0, 0, 0 }
     };
 
@@ -409,24 +452,54 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             break;
         case 't': // threads
             errno = 0;
-            if (!strcmp(optarg,"auto")) {
+            jl_options.nthreadpools = 1;
+            long nthreads = -1, nthreadsi = 0;
+            if (!strncmp(optarg, "auto", 4)) {
                 jl_options.nthreads = -1;
+                if (optarg[4] == ',') {
+                    if (!strncmp(&optarg[5], "auto", 4))
+                        nthreadsi = 1;
+                    else {
+                        errno = 0;
+                        nthreadsi = strtol(&optarg[5], &endptr, 10);
+                        if (errno != 0 || endptr == &optarg[5] || *endptr != 0 || nthreadsi < 1 || nthreadsi >= INT16_MAX)
+                            jl_errorf("julia: -t,--threads=auto,<m>; m must be an integer >= 1");
+                    }
+                    jl_options.nthreadpools++;
+                }
             }
             else {
-                long nthreads = strtol(optarg, &endptr, 10);
-                if (errno != 0 || optarg == endptr || *endptr != 0 || nthreads < 1 || nthreads >= INT_MAX)
-                    jl_errorf("julia: -t,--threads=<n> must be an integer >= 1");
-                jl_options.nthreads = (int)nthreads;
+                nthreads = strtol(optarg, &endptr, 10);
+                if (errno != 0 || optarg == endptr || nthreads < 1 || nthreads >= INT16_MAX)
+                    jl_errorf("julia: -t,--threads=<n>[,auto|<m>]; n must be an integer >= 1");
+                if (*endptr == ',') {
+                    if (!strncmp(&endptr[1], "auto", 4))
+                        nthreadsi = 1;
+                    else {
+                        errno = 0;
+                        char *endptri;
+                        nthreadsi = strtol(&endptr[1], &endptri, 10);
+                        if (errno != 0 || endptri == &endptr[1] || *endptri != 0 || nthreadsi < 1 || nthreadsi >= INT16_MAX)
+                            jl_errorf("julia: -t,--threads=<n>,<m>; n and m must be integers >= 1");
+                    }
+                    jl_options.nthreadpools++;
+                }
+                jl_options.nthreads = nthreads + nthreadsi;
             }
+            int16_t *ntpp = (int16_t *)malloc_s(jl_options.nthreadpools * sizeof(int16_t));
+            ntpp[0] = (int16_t)nthreads;
+            if (jl_options.nthreadpools == 2)
+                ntpp[1] = (int16_t)nthreadsi;
+            jl_options.nthreads_per_pool = ntpp;
             break;
         case 'p': // procs
             errno = 0;
             if (!strcmp(optarg,"auto")) {
-                jl_options.nprocs = jl_cpu_threads();
+                jl_options.nprocs = jl_effective_threads();
             }
             else {
                 long nprocs = strtol(optarg, &endptr, 10);
-                if (errno != 0 || optarg == endptr || *endptr != 0 || nprocs < 1 || nprocs >= INT_MAX)
+                if (errno != 0 || optarg == endptr || *endptr != 0 || nprocs < 1 || nprocs >= INT16_MAX)
                     jl_errorf("julia: -p,--procs=<n> must be an integer >= 1");
                 jl_options.nprocs = (int)nprocs;
             }
@@ -491,6 +564,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                         codecov = JL_LOG_ALL;
                     jl_options.output_code_coverage = optarg;
                 }
+                else if (!strncmp(optarg, "@", 1)) {
+                    codecov = JL_LOG_PATH;
+                    jl_options.tracked_path = optarg + 1; // skip `@`
+                }
                 else
                     jl_errorf("julia: invalid argument to --code-coverage (%s)", optarg);
                 break;
@@ -507,6 +584,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                     malloclog = JL_LOG_ALL;
                 else if (!strcmp(optarg,"none"))
                     malloclog = JL_LOG_NONE;
+                else if (!strncmp(optarg, "@", 1)) {
+                    malloclog = JL_LOG_PATH;
+                    jl_options.tracked_path = optarg + 1; // skip `@`
+                }
                 else
                     jl_errorf("julia: invalid argument to --track-allocation (%s)", optarg);
                 break;
@@ -533,6 +614,24 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.opt_level = 3;
             }
             break;
+        case opt_optlevel_min: // minimum module optimize level
+            if (optarg != NULL) {
+                if (!strcmp(optarg,"0"))
+                    jl_options.opt_level_min = 0;
+                else if (!strcmp(optarg,"1"))
+                    jl_options.opt_level_min = 1;
+                else if (!strcmp(optarg,"2"))
+                    jl_options.opt_level_min = 2;
+                else if (!strcmp(optarg,"3"))
+                    jl_options.opt_level_min = 3;
+                else
+                    jl_errorf("julia: invalid argument to --min-optlevel (%s)", optarg);
+                break;
+            }
+            else {
+                jl_options.opt_level_min = 0;
+            }
+            break;
         case 'i': // isinteractive
             jl_options.isinteractive = 1;
             break;
@@ -541,8 +640,10 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
                 jl_options.check_bounds = JL_OPTIONS_CHECK_BOUNDS_ON;
             else if (!strcmp(optarg,"no"))
                 jl_options.check_bounds = JL_OPTIONS_CHECK_BOUNDS_OFF;
+            else if (!strcmp(optarg,"auto"))
+                jl_options.check_bounds = JL_OPTIONS_CHECK_BOUNDS_DEFAULT;
             else
-                jl_errorf("julia: invalid argument to --check-bounds={yes|no} (%s)", optarg);
+                jl_errorf("julia: invalid argument to --check-bounds={yes|no|auto} (%s)", optarg);
             break;
         case opt_output_bc:
             jl_options.outputbc = optarg;
@@ -625,7 +726,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
             if (!strcmp(optarg,"ieee"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_OFF;
             else if (!strcmp(optarg,"fast"))
-                jl_options.fast_math = JL_OPTIONS_FAST_MATH_ON;
+                jl_options.fast_math = JL_OPTIONS_FAST_MATH_DEFAULT;
             else if (!strcmp(optarg,"user"))
                 jl_options.fast_math = JL_OPTIONS_FAST_MATH_DEFAULT;
             else
@@ -655,6 +756,15 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp)
         case opt_image_codegen:
             jl_options.image_codegen = 1;
             break;
+        case opt_rr_detach:
+            jl_options.rr_detach = 1;
+            break;
+        case opt_strip_metadata:
+            jl_options.strip_metadata = 1;
+            break;
+        case opt_strip_ir:
+            jl_options.strip_ir = 1;
+            break;
         default:
             jl_errorf("julia: unhandled option -- %c\n"
                       "This is a bug, please report it.", c);
diff --git a/src/jloptions.h b/src/jloptions.h
new file mode 100644
index 00000000000000..9ac681c4ffacf4
--- /dev/null
+++ b/src/jloptions.h
@@ -0,0 +1,61 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_JLOPTIONS_H
+#define JL_JLOPTIONS_H
+
+// NOTE: This struct needs to be kept in sync with JLOptions type in base/options.jl
+
+typedef struct {
+    int8_t quiet;
+    int8_t banner;
+    const char *julia_bindir;
+    const char *julia_bin;
+    const char **cmds;
+    const char *image_file;
+    const char *cpu_target;
+    int8_t nthreadpools;
+    int16_t nthreads;
+    const int16_t *nthreads_per_pool;
+    int32_t nprocs;
+    const char *machine_file;
+    const char *project;
+    int8_t isinteractive;
+    int8_t color;
+    int8_t historyfile;
+    int8_t startupfile;
+    int8_t compile_enabled;
+    int8_t code_coverage;
+    int8_t malloc_log;
+    const char *tracked_path;
+    int8_t opt_level;
+    int8_t opt_level_min;
+    int8_t debug_level;
+    int8_t check_bounds;
+    int8_t depwarn;
+    int8_t warn_overwrite;
+    int8_t can_inline;
+    int8_t polly;
+    const char *trace_compile;
+    int8_t fast_math;
+    int8_t worker;
+    const char *cookie;
+    int8_t handle_signals;
+    int8_t use_sysimage_native_code;
+    int8_t use_compiled_modules;
+    const char *bindto;
+    const char *outputbc;
+    const char *outputunoptbc;
+    const char *outputo;
+    const char *outputasm;
+    const char *outputji;
+    const char *output_code_coverage;
+    int8_t incremental;
+    int8_t image_file_specified;
+    int8_t warn_scope;
+    int8_t image_codegen;
+    int8_t rr_detach;
+    int8_t strip_metadata;
+    int8_t strip_ir;
+} jl_options_t;
+
+#endif
diff --git a/src/jltypes.c b/src/jltypes.c
index dd36776f1bac1f..1c0255cd0cb4b0 100644
--- a/src/jltypes.c
+++ b/src/jltypes.c
@@ -19,132 +19,12 @@
 extern "C" {
 #endif
 
+_Atomic(jl_value_t*) cmpswap_names JL_GLOBALLY_ROOTED;
+
 // compute empirical max-probe for a given size
 #define max_probe(size) ((size) <= 1024 ? 16 : (size) >> 6)
 #define h2index(hv, sz) (size_t)((hv) & ((sz)-1))
 
-jl_datatype_t *jl_any_type;
-jl_unionall_t *jl_type_type;
-jl_typename_t *jl_type_typename;
-jl_methtable_t *jl_type_type_mt;
-jl_methtable_t *jl_nonfunction_mt;
-jl_datatype_t *jl_typename_type;
-jl_datatype_t *jl_symbol_type;
-jl_datatype_t *jl_ssavalue_type;
-jl_datatype_t *jl_abstractslot_type;
-jl_datatype_t *jl_slotnumber_type;
-jl_datatype_t *jl_typedslot_type;
-jl_datatype_t *jl_argument_type;
-jl_datatype_t *jl_const_type;
-jl_datatype_t *jl_partial_struct_type;
-jl_datatype_t *jl_method_match_type;
-jl_datatype_t *jl_simplevector_type;
-jl_typename_t *jl_tuple_typename;
-jl_datatype_t *jl_anytuple_type;
-jl_datatype_t *jl_emptytuple_type;
-jl_unionall_t *jl_anytuple_type_type;
-jl_typename_t *jl_vecelement_typename;
-jl_unionall_t *jl_vararg_type;
-jl_typename_t *jl_vararg_typename;
-jl_datatype_t *jl_tvar_type;
-jl_datatype_t *jl_uniontype_type;
-jl_datatype_t *jl_unionall_type;
-jl_datatype_t *jl_datatype_type;
-jl_datatype_t *jl_function_type;
-jl_datatype_t *jl_builtin_type;
-
-jl_datatype_t *jl_typeofbottom_type;
-jl_value_t *jl_bottom_type;
-jl_unionall_t *jl_abstractarray_type;
-jl_unionall_t *jl_densearray_type;
-
-jl_datatype_t *jl_bool_type;
-jl_datatype_t *jl_char_type;
-jl_datatype_t *jl_int8_type;
-jl_datatype_t *jl_uint8_type;
-jl_datatype_t *jl_int16_type;
-jl_datatype_t *jl_uint16_type;
-jl_datatype_t *jl_int32_type;
-jl_datatype_t *jl_uint32_type;
-jl_datatype_t *jl_int64_type;
-jl_datatype_t *jl_uint64_type;
-jl_datatype_t *jl_float16_type;
-jl_datatype_t *jl_float32_type;
-jl_datatype_t *jl_float64_type;
-jl_datatype_t *jl_floatingpoint_type;
-jl_datatype_t *jl_number_type;
-jl_datatype_t *jl_signed_type;
-
-JL_DLLEXPORT jl_value_t *jl_emptytuple;
-jl_svec_t *jl_emptysvec;
-jl_value_t *jl_nothing;
-
-JL_DLLEXPORT jl_value_t *jl_true;
-JL_DLLEXPORT jl_value_t *jl_false;
-
-jl_unionall_t *jl_array_type;
-jl_typename_t *jl_array_typename;
-jl_value_t *jl_array_uint8_type;
-jl_value_t *jl_array_any_type;
-jl_value_t *jl_array_symbol_type;
-jl_value_t *jl_array_int32_type;
-jl_datatype_t *jl_weakref_type;
-jl_datatype_t *jl_abstractstring_type;
-jl_datatype_t *jl_string_type;
-jl_datatype_t *jl_expr_type;
-jl_datatype_t *jl_globalref_type;
-jl_datatype_t *jl_linenumbernode_type;
-jl_datatype_t *jl_gotonode_type;
-jl_datatype_t *jl_gotoifnot_type;
-jl_datatype_t *jl_returnnode_type;
-jl_datatype_t *jl_pinode_type;
-jl_datatype_t *jl_phinode_type;
-jl_datatype_t *jl_phicnode_type;
-jl_datatype_t *jl_upsilonnode_type;
-jl_datatype_t *jl_quotenode_type;
-jl_datatype_t *jl_newvarnode_type;
-jl_datatype_t *jl_intrinsic_type;
-jl_datatype_t *jl_method_type;
-jl_datatype_t *jl_methtable_type;
-jl_datatype_t *jl_typemap_entry_type;
-jl_datatype_t *jl_typemap_level_type;
-jl_datatype_t *jl_method_instance_type;
-jl_datatype_t *jl_code_instance_type;
-jl_datatype_t *jl_code_info_type;
-jl_datatype_t *jl_module_type;
-jl_datatype_t *jl_errorexception_type;
-jl_datatype_t *jl_argumenterror_type;
-jl_datatype_t *jl_typeerror_type;
-jl_datatype_t *jl_methoderror_type;
-jl_datatype_t *jl_loaderror_type;
-jl_datatype_t *jl_initerror_type;
-jl_datatype_t *jl_undefvarerror_type;
-jl_datatype_t *jl_lineinfonode_type;
-jl_unionall_t *jl_ref_type;
-jl_unionall_t *jl_pointer_type;
-jl_typename_t *jl_pointer_typename;
-jl_unionall_t *jl_llvmpointer_type;
-jl_typename_t *jl_llvmpointer_typename;
-jl_datatype_t *jl_void_type; // deprecated
-jl_datatype_t *jl_nothing_type;
-jl_datatype_t *jl_voidpointer_type;
-jl_datatype_t *jl_uint8pointer_type;
-jl_typename_t *jl_namedtuple_typename;
-jl_unionall_t *jl_namedtuple_type;
-jl_datatype_t *jl_task_type;
-jl_value_t *jl_an_empty_vec_any;
-jl_value_t *jl_an_empty_string;
-jl_value_t *jl_stackovf_exception;
-#ifdef SEGV_EXCEPTION
-jl_value_t *jl_segv_exception;
-#endif
-JL_DLLEXPORT jl_value_t *jl_diverror_exception;
-JL_DLLEXPORT jl_value_t *jl_undefref_exception;
-jl_value_t *jl_interrupt_exception;
-jl_datatype_t *jl_boundserror_type;
-jl_value_t *jl_memory_exception;
-jl_value_t *jl_readonlymemory_exception;
-
 // --- type properties and predicates ---
 
 static int typeenv_has(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
@@ -157,6 +37,51 @@ static int typeenv_has(jl_typeenv_t *env, jl_tvar_t *v) JL_NOTSAFEPOINT
     return 0;
 }
 
+static int layout_uses_free_typevars(jl_value_t *v, jl_typeenv_t *env)
+{
+    if (jl_typeis(v, jl_tvar_type))
+        return !typeenv_has(env, (jl_tvar_t*)v);
+    if (jl_is_uniontype(v))
+        return layout_uses_free_typevars(((jl_uniontype_t*)v)->a, env) ||
+               layout_uses_free_typevars(((jl_uniontype_t*)v)->b, env);
+    if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        if (vm->T && layout_uses_free_typevars(vm->T, env))
+            return 1;
+        if (vm->N && layout_uses_free_typevars(vm->N, env))
+            return 1;
+        return 0;
+    }
+    if (jl_is_unionall(v)) {
+        jl_unionall_t *ua = (jl_unionall_t*)v;
+        jl_typeenv_t newenv = { ua->var, NULL, env };
+        return layout_uses_free_typevars(ua->body, &newenv);
+    }
+    if (jl_is_datatype(v)) {
+        jl_datatype_t *dt = (jl_datatype_t*)v;
+        if (dt->layout || dt->isconcretetype || !dt->name->mayinlinealloc)
+            return 0;
+        if (dt->name == jl_namedtuple_typename)
+            return layout_uses_free_typevars(jl_tparam0(dt), env) || layout_uses_free_typevars(jl_tparam1(dt), env);
+        if (dt->name == jl_tuple_typename)
+            // conservative, since we don't want to inline an abstract tuple,
+            // and we currently declare !has_fixed_layout for these, but that
+            // means we also won't be able to inline a tuple which is concrete
+            // except for the use of free type-vars
+            return 1;
+        jl_svec_t *types = jl_get_fieldtypes(dt);
+        size_t i, l = jl_svec_len(types);
+        for (i = 0; i < l; i++) {
+            jl_value_t *ft = jl_svecref(types, i);
+            if (layout_uses_free_typevars(ft, env)) {
+                // This might be inline-alloc, but we don't know the layout
+                return 1;
+            }
+        }
+    }
+    return 0;
+}
+
 static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
 {
     if (jl_typeis(v, jl_tvar_type)) {
@@ -165,6 +90,14 @@ static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
     if (jl_is_uniontype(v))
         return has_free_typevars(((jl_uniontype_t*)v)->a, env) ||
             has_free_typevars(((jl_uniontype_t*)v)->b, env);
+    if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        if (vm->T) {
+            if (has_free_typevars(vm->T, env))
+                return 1;
+            return vm->N && has_free_typevars(vm->N, env);
+        }
+    }
     if (jl_is_unionall(v)) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
         jl_typeenv_t newenv = { ua->var, NULL, env };
@@ -176,9 +109,8 @@ static int has_free_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOINT
         if (expect == 0 || env == NULL)
             return expect;
         size_t i;
-        for (i=0; i < jl_nparams(v); i++) {
-            if (has_free_typevars(jl_tparam(v,i), env)) {
-                assert(expect);
+        for (i = 0; i < jl_nparams(v); i++) {
+            if (has_free_typevars(jl_tparam(v, i), env)) {
                 return 1;
             }
         }
@@ -201,6 +133,15 @@ static void find_free_typevars(jl_value_t *v, jl_typeenv_t *env, jl_array_t *out
         find_free_typevars(((jl_uniontype_t*)v)->a, env, out);
         find_free_typevars(((jl_uniontype_t*)v)->b, env, out);
     }
+    else if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t *)v;
+        if (vm->T) {
+            find_free_typevars(vm->T, env, out);
+            if (vm->N) {
+                find_free_typevars(vm->N, env, out);
+            }
+        }
+    }
     else if (jl_is_unionall(v)) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
         jl_typeenv_t newenv = { ua->var, NULL, env };
@@ -234,6 +175,11 @@ static int jl_has_bound_typevars(jl_value_t *v, jl_typeenv_t *env) JL_NOTSAFEPOI
     if (jl_is_uniontype(v))
         return jl_has_bound_typevars(((jl_uniontype_t*)v)->a, env) ||
             jl_has_bound_typevars(((jl_uniontype_t*)v)->b, env);
+    if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t *)v;
+        return vm->T && (jl_has_bound_typevars(vm->T, env) ||
+            (vm->N && jl_has_bound_typevars(vm->N, env)));
+    }
     if (jl_is_unionall(v)) {
         jl_unionall_t *ua = (jl_unionall_t*)v;
         if (jl_has_bound_typevars(ua->var->lb, env) || jl_has_bound_typevars(ua->var->ub, env))
@@ -281,6 +227,43 @@ JL_DLLEXPORT int jl_has_typevar_from_unionall(jl_value_t *t, jl_unionall_t *ua)
     return _jl_has_typevar_from_ua(t, ua, NULL);
 }
 
+int jl_has_fixed_layout(jl_datatype_t *dt)
+{
+    if (dt->layout || dt->isconcretetype)
+        return 1;
+    if (dt->name->abstract)
+        return 0;
+    if (dt->name == jl_namedtuple_typename)
+        return !layout_uses_free_typevars(jl_tparam0(dt), NULL) && !layout_uses_free_typevars(jl_tparam1(dt), NULL);
+    if (dt->name == jl_tuple_typename)
+        return 0;
+    jl_svec_t *types = jl_get_fieldtypes(dt);
+    size_t i, l = jl_svec_len(types);
+    for (i = 0; i < l; i++) {
+        jl_value_t *ft = jl_svecref(types, i);
+        if (layout_uses_free_typevars(ft, NULL)) {
+            // This might be inline-alloc, but we don't know the layout
+            return 0;
+        }
+    }
+    return 1;
+}
+
+int jl_type_mappable_to_c(jl_value_t *ty)
+{
+    assert(!jl_is_typevar(ty) && jl_is_type(ty));
+    if (jl_is_structtype(ty))
+        return jl_has_fixed_layout((jl_datatype_t*)ty) && ((jl_datatype_t*)ty)->name->atomicfields == NULL;
+    if (jl_is_primitivetype(ty))
+        return 1;
+    if (ty == (jl_value_t*)jl_any_type || ty == (jl_value_t*)jl_bottom_type)
+        return 1; // as boxed
+    if (jl_is_abstract_ref_type(ty) || jl_is_array_type(ty) ||
+        (jl_is_datatype(ty) && ((jl_datatype_t*)ty)->layout != NULL &&
+            jl_is_layout_opaque(((jl_datatype_t*)ty)->layout)))
+        return 1; // as boxed
+    return 0; // refuse to map Union and UnionAll to C
+}
 
 // Return true for any type (Integer or Unsigned) that can fit in a
 // size_t and pass back value, else return false
@@ -481,7 +464,7 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
     size_t i;
     for(i=0; i < n; i++) {
         jl_value_t *pi = ts[i];
-        if (!(jl_is_type(pi) || jl_is_typevar(pi)) || jl_is_vararg_type(pi))
+        if (!(jl_is_type(pi) || jl_is_typevar(pi)))
             jl_type_error("Union", (jl_value_t*)jl_type_type, pi);
     }
     if (n == 1) return ts[0];
@@ -497,7 +480,7 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
         int has_free = temp[i]!=NULL && jl_has_free_typevars(temp[i]);
         for(j=0; j < nt; j++) {
             if (j != i && temp[i] && temp[j]) {
-                if (temp[i] == temp[j] || temp[i] == jl_bottom_type ||
+                if (temp[i] == jl_bottom_type ||
                     temp[j] == (jl_value_t*)jl_any_type ||
                     jl_egal(temp[i], temp[j]) ||
                     (!has_free && !jl_has_free_typevars(temp[j]) &&
@@ -529,6 +512,34 @@ JL_DLLEXPORT jl_value_t *jl_type_union(jl_value_t **ts, size_t n)
 
 JL_DLLEXPORT jl_value_t *jl_type_unionall(jl_tvar_t *v, jl_value_t *body)
 {
+    if (jl_is_vararg(body)) {
+        if (jl_options.depwarn) {
+            if (jl_options.depwarn == JL_OPTIONS_DEPWARN_ERROR)
+                jl_error("Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).");
+            jl_printf(JL_STDERR, "WARNING: Wrapping `Vararg` directly in UnionAll is deprecated (wrap the tuple instead).\n");
+        }
+        jl_vararg_t *vm = (jl_vararg_t*)body;
+        int T_has_tv = vm->T && jl_has_typevar(vm->T, v);
+        int N_has_tv = vm->N && jl_has_typevar(vm->N, v);
+        if (!T_has_tv && !N_has_tv) {
+            return body;
+        }
+        if (T_has_tv && N_has_tv) {
+            jl_error("Wrapping `Vararg` directly in UnionAll is disallowed if the typevar occurs in both `T` and `N`");
+        }
+        if (T_has_tv) {
+            jl_value_t *wrapped = jl_type_unionall(v, vm->T);
+            JL_GC_PUSH1(&wrapped);
+            wrapped = (jl_value_t*)jl_wrap_vararg(wrapped, vm->N);
+            JL_GC_POP();
+            return wrapped;
+        }
+        else {
+            assert(N_has_tv);
+            assert(vm->N == (jl_value_t*)v);
+            return (jl_value_t*)jl_wrap_vararg(vm->T, NULL);
+        }
+    }
     if (!jl_is_type(body) && !jl_is_typevar(body))
         jl_type_error("UnionAll", (jl_value_t*)jl_type_type, body);
     // normalize `T where T<:S` => S
@@ -625,7 +636,7 @@ static jl_datatype_t *lookup_type_set(jl_svec_t *cache, jl_value_t **key, size_t
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    jl_datatype_t **tab = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *tab = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t index = h2index(hv, sz);
     size_t orig = index;
     size_t iter = 0;
@@ -633,7 +644,7 @@ static jl_datatype_t *lookup_type_set(jl_svec_t *cache, jl_value_t **key, size_t
         jl_datatype_t *val = jl_atomic_load_relaxed(&tab[index]);
         if (val == NULL)
             return NULL;
-        if (val->hash == hv && typekey_eq(val, key, n))
+        if ((jl_value_t*)val != jl_nothing && val->hash == hv && typekey_eq(val, key, n))
             return val;
         index = (index + 1) & (sz - 1);
         iter++;
@@ -648,7 +659,7 @@ static jl_datatype_t *lookup_type_setvalue(jl_svec_t *cache, jl_value_t *key1, j
     if (sz == 0)
         return NULL;
     size_t maxprobe = max_probe(sz);
-    jl_datatype_t **tab = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *tab = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t index = h2index(hv, sz);
     size_t orig = index;
     size_t iter = 0;
@@ -656,7 +667,7 @@ static jl_datatype_t *lookup_type_setvalue(jl_svec_t *cache, jl_value_t *key1, j
         jl_datatype_t *val = jl_atomic_load_relaxed(&tab[index]);
         if (val == NULL)
             return NULL;
-        if (val->hash == hv && typekeyvalue_eq(val, key1, key, n, leaf))
+        if ((jl_value_t*)val != jl_nothing && val->hash == hv && typekeyvalue_eq(val, key1, key, n, leaf))
             return val;
         index = (index + 1) & (sz - 1);
         iter++;
@@ -671,7 +682,7 @@ static ssize_t lookup_type_idx_linear(jl_svec_t *cache, jl_value_t **key, size_t
 {
     if (n == 0)
         return -1;
-    jl_datatype_t **data = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *data = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t cl = jl_svec_len(cache);
     ssize_t i;
     for (i = 0; i < cl; i++) {
@@ -688,7 +699,7 @@ static ssize_t lookup_type_idx_linearvalue(jl_svec_t *cache, jl_value_t *key1, j
 {
     if (n == 0)
         return -1;
-    jl_datatype_t **data = (jl_datatype_t**)jl_svec_data(cache);
+    _Atomic(jl_datatype_t*) *data = (_Atomic(jl_datatype_t*)*)jl_svec_data(cache);
     size_t cl = jl_svec_len(cache);
     ssize_t i;
     for (i = 0; i < cl; i++) {
@@ -701,9 +712,15 @@ static ssize_t lookup_type_idx_linearvalue(jl_svec_t *cache, jl_value_t *key1, j
     return ~cl;
 }
 
-static jl_value_t *lookup_type(jl_typename_t *tn, jl_value_t **key, size_t n)
+static jl_value_t *lookup_type(jl_typename_t *tn JL_PROPAGATES_ROOT, jl_value_t **key, size_t n)
 {
     JL_TIMING(TYPE_CACHE_LOOKUP);
+    if (tn == jl_type_typename) {
+        assert(n == 1);
+        jl_value_t *uw = jl_unwrap_unionall(key[0]);
+        if (jl_is_datatype(uw) && key[0] == ((jl_datatype_t*)uw)->name->wrapper)
+            return jl_atomic_load_acquire(&((jl_datatype_t*)uw)->name->Typeofwrapper);
+    }
     unsigned hv = typekey_hash(tn, key, n, 0);
     if (hv) {
         jl_svec_t *cache = jl_atomic_load_relaxed(&tn->cache);
@@ -732,9 +749,9 @@ static jl_value_t *lookup_typevalue(jl_typename_t *tn, jl_value_t *key1, jl_valu
     }
 }
 
-static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv)
+static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv, int atomic)
 {
-    jl_datatype_t **tab = (jl_datatype_t**)jl_svec_data(a);
+    _Atomic(jl_value_t*) *tab = (_Atomic(jl_value_t*)*)jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     if (sz <= 1)
         return 0;
@@ -744,8 +761,12 @@ static int cache_insert_type_set_(jl_svec_t *a, jl_datatype_t *val, uint_t hv)
     orig = index;
     size_t maxprobe = max_probe(sz);
     do {
-        if (tab[index] == NULL) {
-            jl_atomic_store_release(&tab[index], val);
+        jl_value_t *tab_i = jl_atomic_load_relaxed(&tab[index]);
+        if (tab_i == NULL || tab_i == jl_nothing) {
+            if (atomic)
+                jl_atomic_store_release(&tab[index], (jl_value_t*)val);
+            else
+                jl_atomic_store_relaxed(&tab[index], (jl_value_t*)val);
             jl_gc_wb(a, val);
             return 1;
         }
@@ -760,10 +781,10 @@ static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz);
 
 static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 {
-    jl_svec_t *a = val->name->cache;
+    jl_svec_t *a = jl_atomic_load_relaxed(&val->name->cache);
     while (1) {
         JL_GC_PROMISE_ROOTED(a);
-        if (cache_insert_type_set_(a, val, hv))
+        if (cache_insert_type_set_(a, val, hv, 1))
             return;
 
         /* table full */
@@ -786,17 +807,17 @@ static void cache_insert_type_set(jl_datatype_t *val, uint_t hv)
 
 static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 {
-    jl_datatype_t **ol = (jl_datatype_t**)jl_svec_data(a);
+    jl_value_t **ol = jl_svec_data(a);
     size_t sz = jl_svec_len(a);
     while (1) {
         size_t i;
         jl_svec_t *newa = jl_alloc_svec(newsz);
         JL_GC_PUSH1(&newa);
         for (i = 0; i < sz; i += 1) {
-            jl_datatype_t *val = ol[i];
-            if (val != NULL) {
-                uint_t hv = val->hash;
-                if (!cache_insert_type_set_(newa, val, hv)) {
+            jl_value_t *val = ol[i];
+            if (val != NULL && val != jl_nothing) {
+                uint_t hv = ((jl_datatype_t*)val)->hash;
+                if (!cache_insert_type_set_(newa, (jl_datatype_t*)val, hv, 0)) {
                     break;
                 }
             }
@@ -810,7 +831,7 @@ static jl_svec_t *cache_rehash_set(jl_svec_t *a, size_t newsz)
 
 static void cache_insert_type_linear(jl_datatype_t *type, ssize_t insert_at)
 {
-    jl_svec_t *cache = type->name->linearcache;
+    jl_svec_t *cache = jl_atomic_load_relaxed(&type->name->linearcache);
     assert(jl_is_svec(cache));
     size_t n = jl_svec_len(cache);
     if (n == 0 || jl_svecref(cache, n - 1) != NULL) {
@@ -841,13 +862,23 @@ void jl_cache_type_(jl_datatype_t *type)
     assert(is_cacheable(type));
     jl_value_t **key = jl_svec_data(type->parameters);
     int n = jl_svec_len(type->parameters);
+    if (type->name == jl_type_typename) {
+        assert(n == 1);
+        jl_value_t *uw = jl_unwrap_unionall(key[0]);
+        if (jl_is_datatype(uw) && key[0] == ((jl_datatype_t*)uw)->name->wrapper) {
+            jl_typename_t *tn2 = ((jl_datatype_t*)uw)->name;
+            jl_atomic_store_release(&tn2->Typeofwrapper, (jl_value_t*)type);
+            jl_gc_wb(tn2, type);
+            return;
+        }
+    }
     unsigned hv = typekey_hash(type->name, key, n, 0);
     if (hv) {
         assert(hv == type->hash);
         cache_insert_type_set(type, hv);
     }
     else {
-        ssize_t idx = lookup_type_idx_linear(type->name->linearcache, key, n);
+        ssize_t idx = lookup_type_idx_linear(jl_atomic_load_relaxed(&type->name->linearcache), key, n);
         assert(idx < 0);
         cache_insert_type_linear(type, ~idx);
     }
@@ -896,19 +927,19 @@ struct _jl_typestack_t;
 typedef struct _jl_typestack_t jl_typestack_t;
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env);
+                                       jl_typestack_t *stack, jl_typeenv_t *env);
 
 // Build an environment mapping a TypeName's parameters to parameter values.
 // This is the environment needed for instantiating a type's supertype and field types.
 static jl_value_t *inst_datatype_env(jl_value_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                     int cacheable, jl_typestack_t *stack, jl_typeenv_t *env, int c)
+                                     jl_typestack_t *stack, jl_typeenv_t *env, int c)
 {
     if (jl_is_datatype(dt))
-        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, cacheable, stack, env);
+        return inst_datatype_inner((jl_datatype_t*)dt, p, iparams, ntp, stack, env);
     assert(jl_is_unionall(dt));
     jl_unionall_t *ua = (jl_unionall_t*)dt;
     jl_typeenv_t e = { ua->var, iparams[c], env };
-    return inst_datatype_env(ua->body, p, iparams, ntp, cacheable, stack, &e, c + 1);
+    return inst_datatype_env(ua->body, p, iparams, ntp, stack, &e, c + 1);
 }
 
 jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
@@ -924,14 +955,7 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
         jl_value_t *u = jl_unwrap_unionall(tc);
         if (jl_is_datatype(u) && n == jl_nparams((jl_datatype_t*)u) &&
             ((jl_datatype_t*)u)->name->wrapper == tc) {
-            int cacheable = 1;
-            for (i = 0; i < n; i++) {
-                if (jl_has_free_typevars(params[i])) {
-                    cacheable = 0;
-                    break;
-                }
-            }
-            return inst_datatype_env(tc, NULL, params, n, cacheable, NULL, NULL, 0);
+            return inst_datatype_env(tc, NULL, params, n, NULL, NULL, 0);
         }
     }
     JL_GC_PUSH1(&tc);
@@ -967,7 +991,8 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
             }
             // if this is a wrapper, let check_datatype_parameters give the error
             if (!iswrapper)
-                jl_type_error_rt("Type", jl_symbol_name(ua->var->name), (jl_value_t*)ua->var, pi);
+                jl_type_error_rt(jl_is_datatype(inner) ? jl_symbol_name(inner->name->name) : "Type",
+                                 jl_symbol_name(ua->var->name), (jl_value_t*)ua->var, pi);
         }
 
         tc = jl_instantiate_unionall(ua, pi);
@@ -978,20 +1003,42 @@ jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n)
 
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1)
 {
-    JL_GC_PUSH1(&p1);
-    jl_value_t *t = jl_apply_type(tc, &p1, 1);
-    JL_GC_POP();
-    return t;
+    return jl_apply_type(tc, &p1, 1);
 }
 
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2)
 {
-    jl_value_t **args;
-    JL_GC_PUSHARGS(args, 2);
-    args[0] = p1; args[1] = p2;
-    jl_value_t *t = jl_apply_type(tc, args, 2);
-    JL_GC_POP();
-    return t;
+    jl_value_t *args[2];
+    args[0] = p1;
+    args[1] = p2;
+    return jl_apply_type(tc, args, 2);
+}
+
+jl_datatype_t *jl_apply_modify_type(jl_value_t *dt)
+{
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2(jl_pair_type, dt, dt);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
+}
+
+jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt)
+{
+    jl_value_t *params[2];
+    jl_value_t *names = jl_atomic_load_relaxed(&cmpswap_names);
+    if (names == NULL) {
+        params[0] = (jl_value_t*)jl_symbol("old");
+        params[1] = (jl_value_t*)jl_symbol("success");
+        jl_value_t *lnames = jl_f_tuple(NULL, params, 2);
+        if (jl_atomic_cmpswap(&cmpswap_names, &names, lnames))
+            names = jl_atomic_load_relaxed(&cmpswap_names); // == lnames
+    }
+    params[0] = dt;
+    params[1] = (jl_value_t*)jl_bool_type;
+    jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
+    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_datatype_t *rettyp = (jl_datatype_t*)jl_apply_type2((jl_value_t*)jl_namedtuple_type, names, (jl_value_t*)tuptyp);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    return rettyp;
 }
 
 JL_DLLEXPORT jl_value_t *jl_tupletype_fill(size_t n, jl_value_t *v)
@@ -1087,6 +1134,15 @@ static unsigned type_hash(jl_value_t *kj, int *failed) JL_NOTSAFEPOINT
         // ignore var and lb, since those might get normalized out in equality testing
         return type_hash(((jl_tvar_t*)uw)->ub, failed);
     }
+    else if (jl_is_vararg(uw)) {
+        if (!*failed) {
+            *failed = 1;
+            return 0;
+        }
+        jl_vararg_t *vm = (jl_vararg_t *)uw;
+        // 0x064eeaab is just a randomly chosen constant
+        return bitmix(type_hash(vm->T ? vm->T : (jl_value_t*)jl_any_type, failed), vm->N ? type_hash(vm->N, failed) : 0x064eeaab);
+    }
     else if (jl_is_uniontype(uw)) {
         if (!*failed) {
             *failed = 1;
@@ -1144,10 +1200,8 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
 {
     int istuple = (dt->name == jl_tuple_typename);
     dt->hasfreetypevars = 0;
-    dt->isconcretetype = !dt->abstract;
+    dt->isconcretetype = !dt->name->abstract;
     dt->isdispatchtuple = istuple;
-    if (dt->name == jl_vararg_typename)
-        dt->isconcretetype = 0;
     size_t i, l = jl_nparams(dt);
     for (i = 0; i < l; i++) {
         jl_value_t *p = jl_tparam(dt, i);
@@ -1163,9 +1217,20 @@ void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable)
                 ((!jl_is_kind(p) && ((jl_datatype_t*)p)->isconcretetype) ||
                  (((jl_datatype_t*)p)->name == jl_type_typename && !((jl_datatype_t*)p)->hasfreetypevars));
         }
+        if (istuple && dt->has_concrete_subtype) {
+            if (jl_is_vararg(p))
+                p = ((jl_vararg_t*)p)->T;
+            // tuple types like Tuple{:x} cannot have instances
+            if (p && !jl_is_type(p) && !jl_is_typevar(p))
+                dt->has_concrete_subtype = 0;
+        }
     }
-    if (dt->name == jl_type_typename)
+    if (dt->name == jl_type_typename) {
         cacheable = 0; // the cache for Type ignores parameter normalization, so it can't be used as a regular hash
+        jl_value_t *p = jl_tparam(dt, 0);
+        if (!jl_is_type(p) && !jl_is_typevar(p)) // Type{v} has no subtypes, if v is not a Type
+            dt->has_concrete_subtype = 0;
+    }
     dt->hash = typekey_hash(dt->name, jl_svec_data(dt->parameters), l, cacheable);
     dt->cached_by_hash = cacheable ? (typekey_hash(dt->name, jl_svec_data(dt->parameters), l, 0) != 0) : (dt->hash != 0);
 }
@@ -1184,16 +1249,19 @@ static void check_datatype_parameters(jl_typename_t *tn, jl_value_t **params, si
     }
     assert(i == np*2);
     wrapper = tn->wrapper;
-    for(i=0; i < np; i++) {
+    for (i = 0; i < np; i++) {
         assert(jl_is_unionall(wrapper));
         jl_tvar_t *tv = ((jl_unionall_t*)wrapper)->var;
         if (!within_typevar(params[i], bounds[2*i], bounds[2*i+1])) {
-            // TODO: pass a new version of `tv` containing the instantiated bounds
+            if (tv->lb != bounds[2*i] || tv->ub != bounds[2*i+1])
+                // pass a new version of `tv` containing the instantiated bounds
+                tv = jl_new_typevar(tv->name, bounds[2*i], bounds[2*i+1]);
+            JL_GC_PUSH1(&tv);
             jl_type_error_rt(jl_symbol_name(tn->name), jl_symbol_name(tv->name), (jl_value_t*)tv, params[i]);
         }
         int j;
-        for(j=2*i+2; j < 2*np; j++) {
-            jl_value_t*bj = bounds[j];
+        for (j = 2*i + 2; j < 2*np; j++) {
+            jl_value_t *bj = bounds[j];
             if (bj != (jl_value_t*)jl_any_type && bj != jl_bottom_type)
                 bounds[j] = jl_substitute_var(bj, tv, params[i]);
         }
@@ -1217,40 +1285,128 @@ static jl_value_t *extract_wrapper(jl_value_t *t JL_PROPAGATES_ROOT) JL_GLOBALLY
     return NULL;
 }
 
-// convert `Vararg{X, Y} where T` to `Vararg{X where T, Y}` where T doesn't occur free in Y
-static jl_value_t *normalize_vararg(jl_value_t *va)
+int _may_substitute_ub(jl_value_t *v, jl_tvar_t *var, int inside_inv, int *cov_count) JL_NOTSAFEPOINT
 {
-    assert(jl_is_vararg_type(va));
-    if (!jl_is_unionall(va)) return va;
-    jl_value_t *body=NULL;
-    JL_GC_PUSH2(&va, &body);
-    jl_unionall_t *ua = (jl_unionall_t*)va;
-    body = normalize_vararg(ua->body);
-    jl_value_t *unw = jl_unwrap_unionall(body);
-    jl_value_t *va0 = jl_tparam0(unw), *va1 = jl_tparam1(unw);
-    if (jl_has_typevar(va1, ua->var)) {
-        if (body != ua->body)
-            va = jl_type_unionall(ua->var, body);
+    if (v == (jl_value_t*)var) {
+        if (inside_inv) {
+            return 0;
+        }
+        else {
+            (*cov_count)++;
+            return *cov_count <= 1 || jl_is_concrete_type(var->ub);
+        }
     }
-    else {
-        va = jl_type_unionall(ua->var, va0);
-        va = jl_wrap_vararg(va, va1);
-        va = jl_rewrap_unionall(va, body);
+    else if (jl_is_uniontype(v)) {
+        return _may_substitute_ub(((jl_uniontype_t*)v)->a, var, inside_inv, cov_count) &&
+            _may_substitute_ub(((jl_uniontype_t*)v)->b, var, inside_inv, cov_count);
+    }
+    else if (jl_is_unionall(v)) {
+        jl_unionall_t *ua = (jl_unionall_t*)v;
+        if (ua->var == var)
+            return 1;
+        return _may_substitute_ub(ua->var->lb, var, inside_inv, cov_count) &&
+            _may_substitute_ub(ua->var->ub, var, inside_inv, cov_count) &&
+            _may_substitute_ub(ua->body, var, inside_inv, cov_count);
+    }
+    else if (jl_is_datatype(v)) {
+        int invar = inside_inv || !jl_is_tuple_type(v);
+        for (size_t i = 0; i < jl_nparams(v); i++) {
+            if (!_may_substitute_ub(jl_tparam(v,i), var, invar, cov_count))
+                return 0;
+        }
+    }
+    else if (jl_is_vararg(v)) {
+        jl_vararg_t *va = (jl_vararg_t*)v;
+        int old_count = *cov_count;
+        if (va->T && !_may_substitute_ub(va->T, var, inside_inv, cov_count))
+            return 0;
+        if (*cov_count > old_count && !jl_is_concrete_type(var->ub))
+            return 0;
+        if (va->N && !_may_substitute_ub(va->N, var, 1, cov_count))
+            return 0;
+    }
+    return 1;
+}
+
+// Check whether `var` may be replaced with its upper bound `ub` in `v where var<:ub`
+// Conditions:
+//  * `var` does not appear in invariant position
+//  * `var` appears at most once (in covariant position) and not in a `Vararg`
+//    unless the upper bound is concrete (diagonal rule)
+int may_substitute_ub(jl_value_t *v, jl_tvar_t *var) JL_NOTSAFEPOINT
+{
+    int cov_count = 0;
+    return _may_substitute_ub(v, var, 0, &cov_count);
+}
+
+jl_value_t *normalize_unionalls(jl_value_t *t)
+{
+    JL_GC_PUSH1(&t);
+    if (jl_is_uniontype(t)) {
+        jl_uniontype_t *u = (jl_uniontype_t*)t;
+        jl_value_t *a = NULL;
+        jl_value_t *b = NULL;
+        JL_GC_PUSH2(&a, &b);
+        a = normalize_unionalls(u->a);
+        b = normalize_unionalls(u->b);
+        if (a != u->a || b != u->b) {
+            t = jl_new_struct(jl_uniontype_type, a, b);
+        }
+        JL_GC_POP();
+    }
+    else if (jl_is_unionall(t)) {
+        jl_unionall_t *u = (jl_unionall_t*)t;
+        jl_value_t *body = normalize_unionalls(u->body);
+        if (body != u->body) {
+            JL_GC_PUSH1(&body);
+            t = jl_new_struct(jl_unionall_type, u->var, body);
+            JL_GC_POP();
+            u = (jl_unionall_t*)t;
+        }
+
+        if (u->var->lb == u->var->ub || may_substitute_ub(body, u->var)) {
+            JL_TRY {
+                t = jl_instantiate_unionall(u, u->var->ub);
+            }
+            JL_CATCH {
+                // just skip normalization
+                // (may happen for bounds inconsistent with the wrapper's bounds)
+            }
+        }
     }
     JL_GC_POP();
-    return va;
+    return t;
 }
 
 static jl_value_t *_jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals, jl_typeenv_t *prev, jl_typestack_t *stack);
 
 static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value_t **iparams, size_t ntp,
-                                       int cacheable, jl_typestack_t *stack, jl_typeenv_t *env)
+                                       jl_typestack_t *stack, jl_typeenv_t *env)
 {
     jl_typestack_t top;
     jl_typename_t *tn = dt->name;
     int istuple = (tn == jl_tuple_typename);
     int isnamedtuple = (tn == jl_namedtuple_typename);
-    // check type cache
+    if (tn != jl_type_typename) {
+        size_t i;
+        for (i = 0; i < ntp; i++)
+            iparams[i] = normalize_unionalls(iparams[i]);
+    }
+
+    // check type cache, if applicable
+    int cacheable = 1;
+    if (istuple) {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (!jl_is_concrete_type(iparams[i]) && iparams[i] != jl_bottom_type)
+                cacheable = 0;
+    }
+    else {
+        size_t i;
+        for (i = 0; cacheable && i < ntp; i++)
+            if (jl_has_free_typevars(iparams[i]))
+                cacheable = 0;
+    }
     if (cacheable) {
         size_t i;
         for (i = 0; i < ntp; i++) {
@@ -1259,7 +1415,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 continue;
             if (jl_is_datatype(pi))
                 continue;
-            if (jl_is_vararg_type(pi)) {
+            if (jl_is_vararg(pi)) {
                 pi = jl_unwrap_vararg(pi);
                 if (jl_has_free_typevars(pi))
                     continue;
@@ -1270,11 +1426,16 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                     jl_types_equal(pi, tw)) {
                 // This would require some special handling, but is never used at
                 // the moment.
-                assert(!jl_is_vararg_type(iparams[i]));
+                assert(!jl_is_vararg(iparams[i]));
                 iparams[i] = tw;
                 if (p) jl_gc_wb(p, tw);
             }
         }
+        if (tn == jl_type_typename && jl_is_datatype(iparams[0]) && ((jl_datatype_t*)iparams[0])->name == jl_type_typename &&
+            jl_tparam0(iparams[0]) == jl_bottom_type) {
+            // normalize Type{Type{Union{}}} to Type{TypeofBottom}
+            iparams[0] = (jl_value_t*)jl_typeofbottom_type;
+        }
         jl_value_t *lkup = (jl_value_t*)lookup_type(tn, iparams, ntp);
         if (lkup != NULL)
             return lkup;
@@ -1284,20 +1445,6 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
         return stack_lkup;
 
     if (!istuple) {
-        if (jl_is_vararg_type((jl_value_t*)dt) && ntp == 2) {
-            jl_value_t *lenparam = iparams[1];
-            if (jl_is_typevar(lenparam)) {
-                jl_tvar_t *N = (jl_tvar_t*)lenparam;
-                if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
-                    jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
-            }
-            else if (!jl_is_long(lenparam)) {
-                jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, lenparam);
-            }
-            else if (jl_unbox_long(lenparam) < 0) {
-                jl_errorf("Vararg length is negative: %zd", jl_unbox_long(lenparam));
-            }
-        }
         // check parameters against bounds in type definition
         check_datatype_parameters(tn, iparams, ntp);
     }
@@ -1310,30 +1457,16 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     jl_value_t *last = iparams[ntp - 1];
     JL_GC_PUSH3(&p, &ndt, &last);
 
-    int isvatuple = 0;
-    if (istuple && ntp > 0 && jl_is_vararg_type(last)) {
-        isvatuple = 1;
+    if (istuple && ntp > 0 && jl_is_vararg(last)) {
         // normalize Tuple{..., Vararg{Int, 3}} to Tuple{..., Int, Int, Int}
         jl_value_t *va = jl_unwrap_unionall(last);
-        jl_value_t *va0 = jl_tparam0(va), *va1 = jl_tparam1(va);
+        jl_value_t *va0 = jl_unwrap_vararg(va), *va1 = jl_unwrap_vararg_num(va);
         // return same `Tuple` object for types equal to it
-        if (ntp == 1 &&
-            (last == (jl_value_t*)jl_vararg_type ||  // Tuple{Vararg} == Tuple
-             (va0 == (jl_value_t*)jl_any_type &&
-              jl_is_unionall(last) && va1 == (jl_value_t*)((jl_unionall_t*)last)->var))) {
+        if (ntp == 1 && va0 == (jl_value_t*)jl_any_type && !va1) {
             JL_GC_POP();
             return (jl_value_t*)jl_anytuple_type;
         }
-        int did_normalize = 0;
-        jl_value_t *last2 = normalize_vararg(last);
-        assert(!jl_is_unionall(last2) || !jl_is_unionall(((jl_unionall_t*)last2)->body));
-        if (last2 != last) {
-            last = last2;
-            did_normalize = 1;
-            va = jl_unwrap_unionall(last);
-            va0 = jl_tparam0(va); va1 = jl_tparam1(va);
-        }
-        if (jl_is_long(va1)) {
+        if (va1 && jl_is_long(va1)) {
             ssize_t nt = jl_unbox_long(va1);
             assert(nt >= 0);
             if (nt == 0 || !jl_has_free_typevars(va0)) {
@@ -1353,12 +1486,6 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
                 return ndt;
             }
         }
-        if (did_normalize) {
-            p = jl_alloc_svec(ntp);
-            for (size_t i = 0; i < ntp-1; i++)
-                jl_svecset(p, i, iparams[i]);
-            jl_svecset(p, ntp-1, last);
-        }
     }
 
     // move array of instantiated parameters to heap; we need to keep it
@@ -1394,7 +1521,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     jl_gc_wb(ndt, ndt->parameters);
     ndt->types = NULL; // to be filled in below
     if (istuple) {
-        ndt->types = p;
+        ndt->types = p; // TODO: this may need to filter out certain types
     }
     else if (isnamedtuple) {
         jl_value_t *names_tup = jl_svecref(p, 0);
@@ -1403,48 +1530,33 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
             if (!jl_is_tuple(names_tup))
                 jl_type_error_rt("NamedTuple", "names", (jl_value_t*)jl_anytuple_type, names_tup);
             size_t nf = jl_nfields(names_tup);
-            jl_svec_t *names = jl_alloc_svec_uninit(nf);
             for (size_t i = 0; i < nf; i++) {
                 jl_value_t *ni = jl_fieldref(names_tup, i);
                 if (!jl_is_symbol(ni))
                     jl_type_error_rt("NamedTuple", "name", (jl_value_t*)jl_symbol_type, ni);
                 for (size_t j = 0; j < i; j++) {
-                    if (ni == jl_svecref(names, j))
+                    if (ni == jl_fieldref_noalloc(names_tup, j))
                         jl_errorf("duplicate field name in NamedTuple: \"%s\" is not unique", jl_symbol_name((jl_sym_t*)ni));
                 }
-                jl_svecset(names, i, ni);
             }
             if (!jl_is_datatype(values_tt))
                 jl_error("NamedTuple field type must be a tuple type");
             if (jl_is_va_tuple((jl_datatype_t*)values_tt) || jl_nparams(values_tt) != nf)
                 jl_error("NamedTuple names and field types must have matching lengths");
-            ndt->names = names;
-            jl_gc_wb(ndt, ndt->names);
             ndt->types = ((jl_datatype_t*)values_tt)->parameters;
             jl_gc_wb(ndt, ndt->types);
         }
         else {
-            ndt->types = jl_emptysvec;
+            ndt->types = jl_emptysvec; // XXX: this is essentially always false
         }
     }
-    ndt->mutabl = dt->mutabl;
-    ndt->abstract = dt->abstract;
-    ndt->size = 0;
-    jl_precompute_memoized_dt(ndt, cacheable);
-    if (istuple)
-        ndt->ninitialized = ntp - isvatuple;
-    else if (isnamedtuple)
-        ndt->ninitialized = jl_svec_len(ndt->types);
-    else
-        ndt->ninitialized = dt->ninitialized;
-
-    if (jl_is_primitivetype(dt)) {
-        ndt->size = dt->size;
-        ndt->layout = dt->layout;
-        ndt->isbitstype = ndt->isinlinealloc = ndt->isconcretetype;
-    }
 
     jl_datatype_t *primarydt = ((jl_datatype_t*)jl_unwrap_unionall(tn->wrapper));
+    jl_precompute_memoized_dt(ndt, cacheable);
+    ndt->size = 0;
+    if (primarydt->layout)
+        jl_compute_field_offsets(ndt);
+
     if (istuple || isnamedtuple) {
         ndt->super = jl_any_type;
     }
@@ -1466,7 +1578,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
     else if (!isnamedtuple && !istuple) {
         assert(ftypes != jl_emptysvec || jl_field_names(ndt) == jl_emptysvec);
-        assert(ftypes == jl_emptysvec || !ndt->abstract);
+        assert(ftypes == jl_emptysvec || !ndt->name->abstract);
         if (ftypes == jl_emptysvec) {
             ndt->types = ftypes;
         }
@@ -1481,10 +1593,12 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
     }
 
     // now publish the finished result
+    // XXX: if the stack was used, this will publish in the wrong order,
+    // leading to incorrect layouts and data races (#40050: the A{T} should be
+    // an isbitstype singleton of size 0)
     if (cacheable) {
-        if (!jl_is_primitivetype(dt) && ndt->types != NULL && !ndt->abstract) {
+        if (ndt->layout == NULL && ndt->types != NULL && ndt->isconcretetype)
             jl_compute_field_offsets(ndt);
-        }
         jl_cache_type_(ndt);
         JL_UNLOCK(&typecache_lock); // Might GC
     }
@@ -1495,13 +1609,7 @@ static jl_value_t *inst_datatype_inner(jl_datatype_t *dt, jl_svec_t *p, jl_value
 
 static jl_tupletype_t *jl_apply_tuple_type_v_(jl_value_t **p, size_t np, jl_svec_t *params)
 {
-    int cacheable = 1;
-    for (size_t i = 0; i < np; i++) {
-        assert(p[i]);
-        if (!jl_is_concrete_type(p[i]) && p[i] != jl_bottom_type)
-            cacheable = 0;
-    }
-    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, cacheable, NULL, NULL);
+    return (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, p, np, NULL, NULL);
 }
 
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params)
@@ -1523,7 +1631,6 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
 {
     jl_tupletype_t *tt = (jl_datatype_t*)lookup_typevalue(jl_tuple_typename, arg1, args, nargs, leaf);
     if (tt == NULL) {
-        int cacheable = 1;
         size_t i;
         jl_svec_t *params = jl_alloc_svec(nargs);
         JL_GC_PUSH1(&params);
@@ -1535,14 +1642,13 @@ jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size
                 // `jl_typeof(ai)`, but that will require some redesign of the caching
                 // logic.
                 ai = (jl_value_t*)jl_wrap_Type(ai);
-                cacheable = 0;
             }
             else {
                 ai = jl_typeof(ai);
             }
             jl_svecset(params, i, ai);
         }
-        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, cacheable, NULL, NULL);
+        tt = (jl_datatype_t*)inst_datatype_inner(jl_anytuple_type, params, jl_svec_data(params), nargs, NULL, NULL);
         JL_GC_POP();
     }
     return tt;
@@ -1558,6 +1664,9 @@ static jl_svec_t *inst_ftypes(jl_svec_t *p, jl_typeenv_t *env, jl_typestack_t *s
         jl_value_t *pi = jl_svecref(p, i);
         JL_TRY {
             pi = inst_type_w_(pi, env, stack, 1);
+            if (!jl_is_type(pi) && !jl_is_typevar(pi)) {
+                pi = jl_bottom_type;
+            }
         }
         JL_CATCH {
             pi = jl_bottom_type;
@@ -1580,8 +1689,8 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         // a fixed-length tuple
         jl_value_t *T=NULL, *N=NULL;
         jl_value_t *va = jl_unwrap_unionall(jl_tparam0(tt));
-        jl_value_t *ttT = jl_tparam0(va);
-        jl_value_t *ttN = jl_tparam1(va);
+        jl_value_t *ttT = jl_unwrap_vararg(va);
+        jl_value_t *ttN = jl_unwrap_vararg_num(va);
         jl_typeenv_t *e = env;
         while (e != NULL) {
             if ((jl_value_t*)e->var == ttT)
@@ -1607,9 +1716,6 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         iparams = jl_svec_data(ip_heap);
     }
     int bound = 0;
-    int cacheable = 1;
-    if (jl_is_va_tuple(tt))
-        cacheable = 0;
     int i;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
@@ -1618,11 +1724,9 @@ static jl_value_t *inst_tuple_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_
         if (ip_heap)
             jl_gc_wb(ip_heap, pi);
         bound |= (pi != elt);
-        if (cacheable && !jl_is_concrete_type(pi))
-            cacheable = 0;
     }
     if (bound)
-        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, ip_heap, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -1681,6 +1785,22 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
         JL_GC_POP();
         return t;
     }
+    if (jl_is_vararg(t)) {
+        jl_vararg_t *v = (jl_vararg_t*)t;
+        jl_value_t *T = NULL;
+        jl_value_t *N = NULL;
+        JL_GC_PUSH2(&T, &N);
+        if (v->T) {
+            T = inst_type_w_(v->T, env, stack, check);
+            if (v->N)
+                N = inst_type_w_(v->N, env, stack, check);
+        }
+        if (T != v->T || N != v->N) {
+            t = (jl_value_t*)jl_wrap_vararg(T, N);
+        }
+        JL_GC_POP();
+        return t;
+    }
     if (!jl_is_datatype(t))
         return t;
     jl_datatype_t *tt = (jl_datatype_t*)t;
@@ -1693,18 +1813,16 @@ static jl_value_t *inst_type_w_(jl_value_t *t, jl_typeenv_t *env, jl_typestack_t
     size_t ntp = jl_svec_len(tp);
     jl_value_t **iparams;
     JL_GC_PUSHARGS(iparams, ntp);
-    int cacheable = 1, bound = 0;
+    int bound = 0;
     for (i = 0; i < ntp; i++) {
         jl_value_t *elt = jl_svecref(tp, i);
         jl_value_t *pi = inst_type_w_(elt, env, stack, check);
         iparams[i] = pi;
         bound |= (pi != elt);
-        if (cacheable && jl_has_free_typevars(pi))
-            cacheable = 0;
     }
     // if t's parameters are not bound in the environment, return it uncopied (#9378)
     if (bound)
-        t = inst_datatype_inner(tt, NULL, iparams, ntp, cacheable, stack, env);
+        t = inst_datatype_inner(tt, NULL, iparams, ntp, stack, env);
     JL_GC_POP();
     return t;
 }
@@ -1751,19 +1869,36 @@ jl_datatype_t *jl_wrap_Type(jl_value_t *t)
     return (jl_datatype_t*)jl_instantiate_unionall(jl_type_type, t);
 }
 
-jl_value_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n)
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n)
 {
-    if (t == NULL) {
-        assert(n == NULL);
-        return (jl_value_t*)jl_vararg_type;
+    if (n) {
+        if (jl_is_typevar(n)) {
+            // TODO: this is disabled due to #39698; it is also inconsistent
+            // with other similar checks, where we usually only check substituted
+            // values and not the bounds of variables.
+            /*
+            jl_tvar_t *N = (jl_tvar_t*)n;
+            if (!(N->lb == jl_bottom_type && N->ub == (jl_value_t*)jl_any_type))
+                jl_error("TypeVar in Vararg length must have bounds Union{} and Any");
+            */
+        }
+        else if (!jl_is_long(n)) {
+            jl_type_error_rt("Vararg", "count", (jl_value_t*)jl_long_type, n);
+        }
+        else if (jl_unbox_long(n) < 0) {
+            jl_errorf("Vararg length is negative: %zd", jl_unbox_long(n));
+        }
     }
-    jl_value_t *vt = jl_instantiate_unionall(jl_vararg_type, t);
-    if (n == NULL)
-        return vt;
-    JL_GC_PUSH1(&vt);
-    jl_value_t *vn = jl_instantiate_unionall((jl_unionall_t*)vt, n);
-    JL_GC_POP();
-    return vn;
+    if (t) {
+        if (!jl_valid_type_param(t)) {
+            jl_type_error_rt("Vararg", "type", (jl_value_t*)jl_type_type, t);
+        }
+    }
+    jl_task_t *ct = jl_current_task;
+    jl_vararg_t *vm = (jl_vararg_t *)jl_gc_alloc(ct->ptls, sizeof(jl_vararg_t), jl_vararg_type);
+    vm->T = t;
+    vm->N = n;
+    return vm;
 }
 
 JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_ROOT, void *stack)
@@ -1820,7 +1955,7 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
         for (i = 0; i < n; i++)
             env[i].val = jl_svecref(ndt->parameters, i);
 
-        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, env, &top, 1);
+        ndt->super = (jl_datatype_t*)inst_type_w_((jl_value_t*)t->super, &env[n - 1], &top, 1);
         jl_gc_wb(ndt, ndt->super);
     }
 
@@ -1830,7 +1965,7 @@ void jl_reinstantiate_inner_types(jl_datatype_t *t) // can throw!
             for (i = 0; i < n; i++)
                 env[i].val = jl_svecref(ndt->parameters, i);
             assert(ndt->types == NULL);
-            ndt->types = inst_ftypes(t->types, env, &top);
+            ndt->types = inst_ftypes(t->types, &env[n - 1], &top);
             jl_gc_wb(ndt, ndt->types);
             if (ndt->isconcretetype) { // cacheable
                 jl_compute_field_offsets(ndt);
@@ -1861,7 +1996,6 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_symbol_type = jl_new_uninitialized_datatype();
     jl_simplevector_type = jl_new_uninitialized_datatype();
     jl_methtable_type = jl_new_uninitialized_datatype();
-    jl_nothing = jl_gc_permobj(0, NULL);
 
     jl_emptysvec = (jl_svec_t*)jl_gc_permobj(sizeof(void*), jl_simplevector_type);
     jl_svec_set_len_unsafe(jl_emptysvec, 0);
@@ -1877,69 +2011,66 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_type_typename->mt = jl_type_type_mt;
 
     // initialize them. lots of cycles.
-    jl_datatype_type->name = jl_new_typename_in(jl_symbol("DataType"), core);
+    // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
+    jl_datatype_type->name = jl_new_typename_in(jl_symbol("DataType"), core, 0, 1);
     jl_datatype_type->name->wrapper = (jl_value_t*)jl_datatype_type;
     jl_datatype_type->super = (jl_datatype_t*)jl_type_type;
     jl_datatype_type->parameters = jl_emptysvec;
-    jl_datatype_type->name->names = jl_perm_symsvec(20,
-                                                    "name",
-                                                    "super",
-                                                    "parameters",
-                                                    "types",
-                                                    "names",
-                                                    "instance",
-                                                    "layout",
-                                                    "size",
-                                                    "ninitialized",
-                                                    "hash",
-                                                    "abstract",
-                                                    "mutable",
-                                                    "hasfreetypevars",
-                                                    "isconcretetype",
-                                                    "isdispatchtuple",
-                                                    "isbitstype",
-                                                    "zeroinit",
-                                                    "isinlinealloc",
-                                                    "has_concrete_subtype",
-                                                    "cached_by_hash");
-    jl_datatype_type->types = jl_svec(20,
-                                      jl_typename_type,
-                                      jl_datatype_type,
-                                      jl_simplevector_type,
-                                      jl_simplevector_type, jl_simplevector_type,
-                                      jl_any_type, // instance
-                                      jl_any_type, jl_any_type, jl_any_type, jl_any_type, // properties
-                                      jl_any_type, jl_any_type, jl_any_type, jl_any_type,
-                                      jl_any_type, jl_any_type, jl_any_type, jl_any_type,
-                                      jl_any_type, jl_any_type);
-    jl_datatype_type->abstract = 0;
-    // NOTE: types are not actually mutable, but we want to ensure they are heap-allocated with stable addresses
-    jl_datatype_type->mutabl = 1;
-    jl_datatype_type->ninitialized = 3;
+    jl_datatype_type->name->n_uninitialized = 9 - 3;
+    jl_datatype_type->name->names = jl_perm_symsvec(9,
+            "name",
+            "super",
+            "parameters",
+            "types",
+            "instance",
+            "layout",
+            "size",
+            "hash",
+            "flags"); // "hasfreetypevars", "isconcretetype", "isdispatchtuple", "isbitstype", "zeroinit", "has_concrete_subtype", "cached_by_hash"
+    jl_datatype_type->types = jl_svec(9,
+            jl_typename_type,
+            jl_datatype_type,
+            jl_simplevector_type,
+            jl_simplevector_type,
+            jl_any_type, // instance
+            jl_any_type /*jl_voidpointer_type*/,
+            jl_any_type /*jl_int32_type*/,
+            jl_any_type /*jl_int32_type*/,
+            jl_any_type /*jl_uint8_type*/);
+    const static uint32_t datatype_constfields[1] = { 0x00000097 }; // (1<<0)|(1<<1)|(1<<2)|(1<<4)|(1<<7)
+    jl_datatype_type->name->constfields = datatype_constfields;
     jl_precompute_memoized_dt(jl_datatype_type, 1);
 
-    jl_typename_type->name = jl_new_typename_in(jl_symbol("TypeName"), core);
+    jl_typename_type->name = jl_new_typename_in(jl_symbol("TypeName"), core, 0, 1);
     jl_typename_type->name->wrapper = (jl_value_t*)jl_typename_type;
     jl_typename_type->name->mt = jl_nonfunction_mt;
     jl_typename_type->super = jl_any_type;
     jl_typename_type->parameters = jl_emptysvec;
-    jl_typename_type->name->names = jl_perm_symsvec(9, "name", "module",
-                                                    "names", "wrapper",
-                                                    "cache", "linearcache",
-                                                    "hash", "mt", "partial");
-    jl_typename_type->types = jl_svec(9, jl_symbol_type, jl_any_type, jl_simplevector_type,
-                                      jl_type_type, jl_simplevector_type, jl_simplevector_type,
-                                      jl_any_type, jl_methtable_type, jl_any_type);
-    jl_typename_type->abstract = 0;
-    jl_typename_type->mutabl = 1;
-    jl_typename_type->ninitialized = 2;
+    jl_typename_type->name->n_uninitialized = 15 - 2;
+    jl_typename_type->name->names = jl_perm_symsvec(15, "name", "module",
+                                                    "names", "atomicfields", "constfields",
+                                                    "wrapper", "Typeofwrapper", "cache", "linearcache",
+                                                    "mt", "partial",
+                                                    "hash", "n_uninitialized",
+                                                    "flags", // "abstract", "mutable", "mayinlinealloc",
+                                                    "max_methods");
+    jl_typename_type->types = jl_svec(15, jl_symbol_type, jl_any_type /*jl_module_type*/,
+                                      jl_simplevector_type, jl_any_type/*jl_voidpointer_type*/, jl_any_type/*jl_voidpointer_type*/,
+                                      jl_type_type, jl_type_type, jl_simplevector_type, jl_simplevector_type,
+                                      jl_methtable_type, jl_any_type,
+                                      jl_any_type /*jl_long_type*/, jl_any_type /*jl_int32_type*/,
+                                      jl_any_type /*jl_uint8_type*/,
+                                      jl_any_type /*jl_uint8_type*/);
+    const static uint32_t typename_constfields[1] = { 0x00003a3f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<9)|(1<<11)|(1<<12)|(1<<13)
+    jl_typename_type->name->constfields = typename_constfields;
     jl_precompute_memoized_dt(jl_typename_type, 1);
 
-    jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core);
+    jl_methtable_type->name = jl_new_typename_in(jl_symbol("MethodTable"), core, 0, 1);
     jl_methtable_type->name->wrapper = (jl_value_t*)jl_methtable_type;
     jl_methtable_type->name->mt = jl_nonfunction_mt;
     jl_methtable_type->super = jl_any_type;
     jl_methtable_type->parameters = jl_emptysvec;
+    jl_methtable_type->name->n_uninitialized = 12 - 5;
     jl_methtable_type->name->names = jl_perm_symsvec(12, "name", "defs",
                                                      "leafcache", "cache", "max_args",
                                                      "kwsorter", "module",
@@ -1947,74 +2078,69 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_methtable_type->types = jl_svec(12, jl_symbol_type, jl_any_type, jl_any_type,
                                        jl_any_type, jl_any_type/*jl_long*/,
                                        jl_any_type, jl_any_type/*module*/,
-                                       jl_any_type/*any vector*/, jl_any_type/*long*/, jl_any_type/*int32*/,
+                                       jl_any_type/*any vector*/, jl_any_type/*voidpointer*/, jl_any_type/*int32*/,
                                        jl_any_type/*uint8*/, jl_any_type/*uint8*/);
-    jl_methtable_type->abstract = 0;
-    jl_methtable_type->mutabl = 1;
-    jl_methtable_type->ninitialized = 5;
+    const static uint32_t methtable_constfields[1] = { 0x00000040 }; // (1<<6);
+    jl_methtable_type->name->constfields = methtable_constfields;
     jl_precompute_memoized_dt(jl_methtable_type, 1);
 
-    jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core);
+    jl_symbol_type->name = jl_new_typename_in(jl_symbol("Symbol"), core, 0, 1);
     jl_symbol_type->name->wrapper = (jl_value_t*)jl_symbol_type;
     jl_symbol_type->name->mt = jl_nonfunction_mt;
     jl_symbol_type->super = jl_any_type;
     jl_symbol_type->parameters = jl_emptysvec;
+    jl_symbol_type->name->n_uninitialized = 0;
     jl_symbol_type->name->names = jl_emptysvec;
     jl_symbol_type->types = jl_emptysvec;
     jl_symbol_type->size = 0;
-    jl_symbol_type->abstract = 0;
-    jl_symbol_type->mutabl = 1;
-    jl_symbol_type->ninitialized = 0;
     jl_precompute_memoized_dt(jl_symbol_type, 1);
 
-    jl_simplevector_type->name = jl_new_typename_in(jl_symbol("SimpleVector"), core);
+    jl_simplevector_type->name = jl_new_typename_in(jl_symbol("SimpleVector"), core, 0, 1);
     jl_simplevector_type->name->wrapper = (jl_value_t*)jl_simplevector_type;
     jl_simplevector_type->name->mt = jl_nonfunction_mt;
     jl_simplevector_type->super = jl_any_type;
     jl_simplevector_type->parameters = jl_emptysvec;
+    jl_simplevector_type->name->n_uninitialized = 0;
     jl_simplevector_type->name->names = jl_emptysvec;
     jl_simplevector_type->types = jl_emptysvec;
-    jl_simplevector_type->abstract = 0;
-    jl_simplevector_type->mutabl = 1;
-    jl_simplevector_type->ninitialized = 0;
     jl_precompute_memoized_dt(jl_simplevector_type, 1);
 
     // now they can be used to create the remaining base kinds and types
     jl_nothing_type = jl_new_datatype(jl_symbol("Nothing"), core, jl_any_type, jl_emptysvec,
-                                      jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                      jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     jl_void_type = jl_nothing_type; // deprecated alias
     jl_astaggedvalue(jl_nothing)->header = ((uintptr_t)jl_nothing_type) | GC_OLD_MARKED;
     jl_nothing_type->instance = jl_nothing;
 
     jl_datatype_t *type_type = (jl_datatype_t*)jl_type_type;
     jl_typeofbottom_type = jl_new_datatype(jl_symbol("TypeofBottom"), core, type_type, jl_emptysvec,
-                                         jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     jl_bottom_type = jl_new_struct(jl_typeofbottom_type);
     jl_typeofbottom_type->instance = jl_bottom_type;
 
     jl_uniontype_type = jl_new_datatype(jl_symbol("Union"), core, type_type, jl_emptysvec,
                                         jl_perm_symsvec(2, "a", "b"),
                                         jl_svec(2, jl_any_type, jl_any_type),
-                                        0, 0, 2);
+                                        jl_emptysvec, 0, 0, 2);
 
     jl_tvar_type = jl_new_datatype(jl_symbol("TypeVar"), core, jl_any_type, jl_emptysvec,
                                    jl_perm_symsvec(3, "name", "lb", "ub"),
                                    jl_svec(3, jl_symbol_type, jl_any_type, jl_any_type),
-                                   0, 1, 3);
+                                   jl_emptysvec, 0, 1, 3);
 
     jl_unionall_type = jl_new_datatype(jl_symbol("UnionAll"), core, type_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "var", "body"),
                                        jl_svec(2, jl_tvar_type, jl_any_type),
-                                       0, 0, 2);
+                                       jl_emptysvec, 0, 0, 2);
 
-    jl_svec_t *tv;
-    tv = jl_svec2(tvar("T"),tvar("N"));
-    jl_vararg_type = (jl_unionall_t*)jl_new_abstracttype((jl_value_t*)jl_symbol("Vararg"), core, jl_any_type, tv)->name->wrapper;
-    jl_vararg_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_vararg_type))->name;
+    jl_vararg_type = jl_new_datatype(jl_symbol("TypeofVararg"), core, jl_any_type, jl_emptysvec,
+                                            jl_perm_symsvec(2, "T", "N"),
+                                            jl_svec(2, jl_any_type, jl_any_type),
+                                            jl_emptysvec, 0, 0, 0);
 
     jl_svec_t *anytuple_params = jl_svec(1, jl_wrap_vararg((jl_value_t*)jl_any_type, (jl_value_t*)NULL));
     jl_anytuple_type = jl_new_datatype(jl_symbol("Tuple"), core, jl_any_type, anytuple_params,
-                                       jl_emptysvec, anytuple_params, 0, 0, 0);
+                                       jl_emptysvec, anytuple_params, jl_emptysvec, 0, 0, 0);
     jl_tuple_typename = jl_anytuple_type->name;
     // fix some miscomputed values, since we didn't know this was going to be a Tuple in jl_precompute_memoized_dt
     jl_tuple_typename->wrapper = (jl_value_t*)jl_anytuple_type; // remove UnionAll wrappers
@@ -2047,25 +2173,31 @@ void jl_init_types(void) JL_GC_DISABLED
                                           jl_any_type, jl_emptysvec, 64);
     jl_uint8_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt8"), core,
                                          jl_any_type, jl_emptysvec, 8);
+    jl_uint16_type = jl_new_primitivetype((jl_value_t*)jl_symbol("UInt16"), core,
+                                          jl_any_type, jl_emptysvec, 16);
 
     jl_ssavalue_type = jl_new_datatype(jl_symbol("SSAValue"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "id"),
-                                       jl_svec1(jl_long_type), 0, 0, 1);
+                                       jl_svec1(jl_long_type),
+                                       jl_emptysvec, 0, 0, 1);
 
     jl_abstractslot_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Slot"), core, jl_any_type,
                                                jl_emptysvec);
 
     jl_slotnumber_type = jl_new_datatype(jl_symbol("SlotNumber"), core, jl_abstractslot_type, jl_emptysvec,
                                          jl_perm_symsvec(1, "id"),
-                                         jl_svec1(jl_long_type), 0, 0, 1);
+                                         jl_svec1(jl_long_type),
+                                         jl_emptysvec, 0, 0, 1);
 
     jl_typedslot_type = jl_new_datatype(jl_symbol("TypedSlot"), core, jl_abstractslot_type, jl_emptysvec,
                                         jl_perm_symsvec(2, "id", "typ"),
-                                        jl_svec(2, jl_long_type, jl_any_type), 0, 0, 2);
+                                        jl_svec(2, jl_long_type, jl_any_type),
+                                        jl_emptysvec, 0, 0, 2);
 
     jl_argument_type = jl_new_datatype(jl_symbol("Argument"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "n"),
-                                       jl_svec1(jl_long_type), 0, 0, 1);
+                                       jl_svec1(jl_long_type),
+                                       jl_emptysvec, 0, 0, 1);
 
     jl_init_int32_int64_cache();
 
@@ -2077,7 +2209,7 @@ void jl_init_types(void) JL_GC_DISABLED
 
     jl_abstractstring_type = jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractString"), core, jl_any_type, jl_emptysvec);
     jl_string_type = jl_new_datatype(jl_symbol("String"), core, jl_abstractstring_type, jl_emptysvec,
-                                     jl_emptysvec, jl_emptysvec, 0, 1, 0);
+                                     jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
     jl_string_type->instance = NULL;
     jl_compute_field_offsets(jl_string_type);
     jl_an_empty_string = jl_pchar_to_string("\0", 1);
@@ -2099,6 +2231,7 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             jl_any_type,
                             jl_any_type),
+                        jl_emptysvec,
                         0, 1, 6);
 
     jl_typemap_entry_type =
@@ -2125,14 +2258,17 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 4);
+    const static uint32_t typemap_entry_constfields[1] = { 0x000003fe }; // (1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9);
+    jl_typemap_entry_type->name->constfields = typemap_entry_constfields;
 
     jl_function_type = jl_new_abstracttype((jl_value_t*)jl_symbol("Function"), core, jl_any_type, jl_emptysvec);
     jl_builtin_type  = jl_new_abstracttype((jl_value_t*)jl_symbol("Builtin"), core, jl_function_type, jl_emptysvec);
     jl_function_type->name->mt = NULL; // subtypes of Function have independent method tables
     jl_builtin_type->name->mt = NULL;  // so they don't share the Any type table
 
-    tv = jl_svec2(tvar("T"), tvar("N"));
+    jl_svec_t *tv = jl_svec2(tvar("T"), tvar("N"));
     jl_abstractarray_type = (jl_unionall_t*)
         jl_new_abstracttype((jl_value_t*)jl_symbol("AbstractArray"), core,
                             jl_any_type, tv)->name->wrapper;
@@ -2146,10 +2282,8 @@ void jl_init_types(void) JL_GC_DISABLED
     tv = jl_svec2(tvar("T"), tvar("N"));
     jl_array_type = (jl_unionall_t*)
         jl_new_datatype(jl_symbol("Array"), core,
-                        (jl_datatype_t*)
-                        jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
-                        tv,
-                        jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
+                        (jl_datatype_t*)jl_apply_type((jl_value_t*)jl_densearray_type, jl_svec_data(tv), 2),
+                        tv, jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0)->name->wrapper;
     jl_array_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type))->name;
     jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_array_type));
 
@@ -2157,88 +2291,101 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_array_symbol_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_symbol_type, jl_box_long(1));
     jl_array_uint8_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint8_type, jl_box_long(1));
     jl_array_int32_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_int32_type, jl_box_long(1));
+    jl_array_uint64_type = jl_apply_type2((jl_value_t*)jl_array_type, (jl_value_t*)jl_uint64_type, jl_box_long(1));
     jl_an_empty_vec_any = (jl_value_t*)jl_alloc_vec_any(0); // used internally
-    jl_nonfunction_mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
-    jl_type_type_mt->leafcache = (jl_array_t*)jl_an_empty_vec_any;
+    jl_atomic_store_relaxed(&jl_nonfunction_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&jl_type_type_mt->leafcache, (jl_array_t*)jl_an_empty_vec_any);
 
     jl_expr_type =
         jl_new_datatype(jl_symbol("Expr"), core,
                         jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "head", "args"),
                         jl_svec(2, jl_symbol_type, jl_array_any_type),
-                        0, 1, 2);
+                        jl_emptysvec, 0, 1, 2);
 
     jl_module_type =
         jl_new_datatype(jl_symbol("Module"), core, jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(2, "name", "parent"),
-                        jl_svec(2, jl_symbol_type, jl_any_type), 0, 1, 2);
+                        jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 1, 0);
+    jl_module_type->instance = NULL;
+    jl_compute_field_offsets(jl_module_type);
 
     jl_value_t *symornothing[2] = { (jl_value_t*)jl_symbol_type, (jl_value_t*)jl_void_type };
     jl_linenumbernode_type =
         jl_new_datatype(jl_symbol("LineNumberNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "line", "file"),
-                        jl_svec(2, jl_long_type, jl_type_union(symornothing, 2)), 0, 0, 2);
+                        jl_svec(2, jl_long_type, jl_type_union(symornothing, 2)),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_lineinfonode_type =
         jl_new_datatype(jl_symbol("LineInfoNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(5, "module", "method", "file", "line", "inlined_at"),
-                        jl_svec(5, jl_module_type, jl_any_type, jl_symbol_type, jl_long_type, jl_long_type),
-                        0, 0, 5);
+                        jl_svec(5, jl_module_type, jl_any_type, jl_symbol_type, jl_int32_type, jl_int32_type),
+                        jl_emptysvec, 0, 0, 5);
 
     jl_gotonode_type =
         jl_new_datatype(jl_symbol("GotoNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "label"),
-                        jl_svec(1, jl_long_type), 0, 0, 1);
+                        jl_svec(1, jl_long_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_gotoifnot_type =
         jl_new_datatype(jl_symbol("GotoIfNot"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "cond", "dest"),
-                        jl_svec(2, jl_any_type, jl_long_type), 0, 0, 2);
+                        jl_svec(2, jl_any_type, jl_long_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_returnnode_type =
         jl_new_datatype(jl_symbol("ReturnNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
-                        jl_svec(1, jl_any_type), 0, 0, 0);
+                        jl_svec(1, jl_any_type),
+                        jl_emptysvec, 0, 0, 0);
 
     jl_pinode_type =
         jl_new_datatype(jl_symbol("PiNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "val", "typ"),
-                        jl_svec(2, jl_any_type, jl_any_type), 0, 0, 2);
+                        jl_svec(2, jl_any_type, jl_any_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_phinode_type =
         jl_new_datatype(jl_symbol("PhiNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "edges", "values"),
-                        jl_svec(2, jl_array_int32_type, jl_array_any_type), 0, 0, 2);
+                        jl_svec(2, jl_array_int32_type, jl_array_any_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_phicnode_type =
         jl_new_datatype(jl_symbol("PhiCNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "values"),
-                        jl_svec(1, jl_array_any_type), 0, 0, 1);
+                        jl_svec(1, jl_array_any_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_upsilonnode_type =
         jl_new_datatype(jl_symbol("UpsilonNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "val"),
-                        jl_svec(1, jl_any_type), 0, 0, 0);
+                        jl_svec(1, jl_any_type),
+                        jl_emptysvec, 0, 0, 0);
 
     jl_quotenode_type =
         jl_new_datatype(jl_symbol("QuoteNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "value"),
-                        jl_svec(1, jl_any_type), 0, 0, 1);
+                        jl_svec(1, jl_any_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_newvarnode_type =
         jl_new_datatype(jl_symbol("NewvarNode"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(1, "slot"),
-                        jl_svec(1, jl_slotnumber_type), 0, 0, 1);
+                        jl_svec(1, jl_slotnumber_type),
+                        jl_emptysvec, 0, 0, 1);
 
     jl_globalref_type =
         jl_new_datatype(jl_symbol("GlobalRef"), core, jl_any_type, jl_emptysvec,
                         jl_perm_symsvec(2, "mod", "name"),
-                        jl_svec(2, jl_module_type, jl_symbol_type), 0, 0, 2);
+                        jl_svec(2, jl_module_type, jl_symbol_type),
+                        jl_emptysvec, 0, 0, 2);
 
     jl_code_info_type =
         jl_new_datatype(jl_symbol("CodeInfo"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(18,
+                        jl_perm_symsvec(20,
                             "code",
                             "codelocs",
                             "ssavaluetypes",
@@ -2256,8 +2403,10 @@ void jl_init_types(void) JL_GC_DISABLED
                             "inferred",
                             "inlineable",
                             "propagate_inbounds",
-                            "pure"),
-                        jl_svec(18,
+                            "pure",
+                            "constprop",
+                            "purity"),
+                        jl_svec(20,
                             jl_array_any_type,
                             jl_array_int32_type,
                             jl_any_type,
@@ -2275,36 +2424,46 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_bool_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_bool_type),
-                        0, 1, 18);
+                            jl_bool_type,
+                            jl_uint8_type,
+                            jl_uint8_type),
+                        jl_emptysvec,
+                        0, 1, 20);
 
     jl_method_type =
         jl_new_datatype(jl_symbol("Method"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(22,
+                        jl_perm_symsvec(29,
                             "name",
                             "module",
                             "file",
                             "line",
                             "primary_world",
-                            "deleted_world",
+                            "deleted_world", // !const
                             "sig",
-                            "specializations",
-                            "speckeyset",
+                            "specializations", // !const
+                            "speckeyset", // !const
                             "slot_syms",
-                            "source",
-                            "unspecialized",
-                            "generator",
-                            "roots",
-                            "ccallable",
-                            "invokes",
+                            "external_mt",
+                            "source", // !const
+                            "unspecialized", // !const
+                            "generator", // !const
+                            "roots", // !const
+                            "root_blocks", // !const
+                            "nroots_sysimg",
+                            "ccallable", // !const
+                            "invokes", // !const
+                            "recursion_relation", // !const
                             "nargs",
                             "called",
                             "nospecialize",
                             "nkw",
                             "isva",
-                            "pure"),
-                        jl_svec(22,
+                            "pure",
+                            "is_for_opaque_closure",
+                            "constprop",
+                            "purity"),
+                        jl_svec(29,
                             jl_symbol_type,
                             jl_module_type,
                             jl_symbol_type,
@@ -2316,44 +2475,61 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_array_type,
                             jl_string_type,
                             jl_any_type,
+                            jl_any_type,
                             jl_any_type, // jl_method_instance_type
                             jl_any_type,
                             jl_array_any_type,
+                            jl_array_uint64_type,
+                            jl_int32_type,
                             jl_simplevector_type,
                             jl_any_type,
+                            jl_any_type,
                             jl_int32_type,
                             jl_int32_type,
                             jl_int32_type,
                             jl_int32_type,
                             jl_bool_type,
-                            jl_bool_type),
+                            jl_bool_type,
+                            jl_bool_type,
+                            jl_uint8_type,
+                            jl_uint8_type),
+                        jl_emptysvec,
                         0, 1, 10);
+    //const static uint32_t method_constfields[1] = { 0x03fc065f }; // (1<<0)|(1<<1)|(1<<2)|(1<<3)|(1<<4)|(1<<6)|(1<<9)|(1<<10)|(1<<18)|(1<<19)|(1<<20)|(1<<21)|(1<<22)|(1<<23)|(1<<24)|(1<<25);
+    //jl_method_type->name->constfields = method_constfields;
 
     jl_method_instance_type =
         jl_new_datatype(jl_symbol("MethodInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(7,
+                        jl_perm_symsvec(9,
                             "def",
                             "specTypes",
                             "sparam_vals",
                             "uninferred",
                             "backedges",
+                            "callbacks",
                             "cache",
-                            "inInference"),
-                        jl_svec(7,
+                            "inInference",
+                            "precompiled"),
+                        jl_svec(9,
                             jl_new_struct(jl_uniontype_type, jl_method_type, jl_module_type),
                             jl_any_type,
                             jl_simplevector_type,
                             jl_any_type,
                             jl_any_type,
                             jl_any_type,
+                            jl_any_type,
+                            jl_bool_type,
                             jl_bool_type),
+                        jl_emptysvec,
                         0, 1, 3);
+    //const static uint32_t method_instance_constfields[1] = { 0x00000007 }; // (1<<0)|(1<<1)|(1<<2);
+    //jl_method_instance_type->name->constfields = method_instance_constfields;
 
     jl_code_instance_type =
         jl_new_datatype(jl_symbol("CodeInstance"), core,
                         jl_any_type, jl_emptysvec,
-                        jl_perm_symsvec(11,
+                        jl_perm_symsvec(15,
                             "def",
                             "next",
                             "min_world",
@@ -2363,8 +2539,11 @@ void jl_init_types(void) JL_GC_DISABLED
                             "inferred",
                             //"edges",
                             //"absolute_max",
-                            "isspecsig", "precompile", "invoke", "specptr"), // function object decls
-                        jl_svec(11,
+                            "ipo_purity_bits", "purity_bits",
+                            "argescapes",
+                            "isspecsig", "precompile", "invoke", "specptr", // function object decls
+                            "relocatability"),
+                        jl_svec(15,
                             jl_method_instance_type,
                             jl_any_type,
                             jl_ulong_type,
@@ -2374,24 +2553,37 @@ void jl_init_types(void) JL_GC_DISABLED
                             jl_any_type,
                             //jl_any_type,
                             //jl_bool_type,
+                            jl_uint32_type, jl_uint32_type,
+                            jl_any_type,
                             jl_bool_type,
                             jl_bool_type,
-                            jl_any_type, jl_any_type), // fptrs
+                            jl_any_type, jl_any_type, // fptrs
+                            jl_uint8_type),
+                        jl_emptysvec,
                         0, 1, 1);
     jl_svecset(jl_code_instance_type->types, 1, jl_code_instance_type);
+    const static uint32_t code_instance_constfields[1] = { 0x00000001 }; // (1<<1);
+    jl_code_instance_type->name->constfields = code_instance_constfields;
 
     jl_const_type = jl_new_datatype(jl_symbol("Const"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(1, "val"),
-                                       jl_svec1(jl_any_type), 0, 0, 1);
+                                       jl_svec1(jl_any_type),
+                                       jl_emptysvec, 0, 0, 1);
 
     jl_partial_struct_type = jl_new_datatype(jl_symbol("PartialStruct"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(2, "typ", "fields"),
-                                       jl_svec2(jl_any_type, jl_array_any_type), 0, 0, 2);
+                                       jl_svec2(jl_datatype_type, jl_array_any_type),
+                                       jl_emptysvec, 0, 0, 2);
+
+    jl_interconditional_type = jl_new_datatype(jl_symbol("InterConditional"), core, jl_any_type, jl_emptysvec,
+                                          jl_perm_symsvec(3, "slot", "vtype", "elsetype"),
+                                          jl_svec(3, jl_long_type, jl_any_type, jl_any_type),
+                                          jl_emptysvec, 0, 0, 3);
 
     jl_method_match_type = jl_new_datatype(jl_symbol("MethodMatch"), core, jl_any_type, jl_emptysvec,
                                        jl_perm_symsvec(4, "spec_types", "sparams", "method", "fully_covers"),
-                                       jl_svec(4, jl_type_type, jl_simplevector_type, jl_method_type, jl_bool_type), 0, 0, 4);
-
+                                       jl_svec(4, jl_type_type, jl_simplevector_type, jl_method_type, jl_bool_type),
+                                       jl_emptysvec, 0, 0, 4);
 
     // all Kinds share the Type method table (not the nonfunction one)
     jl_unionall_type->name->mt = jl_uniontype_type->name->mt = jl_datatype_type->name->mt =
@@ -2431,7 +2623,7 @@ void jl_init_types(void) JL_GC_DISABLED
                                           (jl_value_t*)jl_anytuple_type);
     tv = jl_svec2(tvar("names"), ntval_var);
     jl_datatype_t *ntt = jl_new_datatype(jl_symbol("NamedTuple"), core, jl_any_type, tv,
-                                         jl_emptysvec, jl_emptysvec, 0, 0, 0);
+                                         jl_emptysvec, jl_emptysvec, jl_emptysvec, 0, 0, 0);
     jl_namedtuple_type = (jl_unionall_t*)ntt->name->wrapper;
     ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_namedtuple_type))->layout = NULL;
     jl_namedtuple_typename = ntt->name;
@@ -2441,19 +2633,23 @@ void jl_init_types(void) JL_GC_DISABLED
                         NULL,
                         jl_any_type,
                         jl_emptysvec,
-                        jl_perm_symsvec(10,
+                        jl_perm_symsvec(15,
                                         "next",
                                         "queue",
                                         "storage",
                                         "donenotify",
                                         "result",
-                                        "exception",
                                         "logstate",
                                         "code",
+                                        "rngState0",
+                                        "rngState1",
+                                        "rngState2",
+                                        "rngState3",
                                         "_state",
-                                        "sticky"),
-                        jl_svec(10,
-                                jl_any_type,
+                                        "sticky",
+                                        "_isexception",
+                                        "priority"),
+                        jl_svec(15,
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
@@ -2461,49 +2657,62 @@ void jl_init_types(void) JL_GC_DISABLED
                                 jl_any_type,
                                 jl_any_type,
                                 jl_any_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
+                                jl_uint64_type,
                                 jl_uint8_type,
-                                jl_bool_type),
-                        0, 1, 7);
+                                jl_bool_type,
+                                jl_bool_type,
+                                jl_uint16_type),
+                        jl_emptysvec,
+                        0, 1, 6);
     jl_value_t *listt = jl_new_struct(jl_uniontype_type, jl_task_type, jl_nothing_type);
     jl_svecset(jl_task_type->types, 0, listt);
+    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
 
-    // complete builtin type metadata
     jl_value_t *pointer_void = jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_nothing_type);
+
+    tv = jl_svec2(tvar("A"), tvar("R"));
+    jl_opaque_closure_type = (jl_unionall_t*)jl_new_datatype(jl_symbol("OpaqueClosure"), core, jl_function_type, tv,
+        jl_perm_symsvec(5, "captures", "world", "source", "invoke", "specptr"),
+        jl_svec(5, jl_any_type, jl_long_type, jl_any_type, pointer_void, pointer_void),
+        jl_emptysvec, 0, 0, 5)->name->wrapper;
+    jl_opaque_closure_typename = ((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type))->name;
+    jl_compute_field_offsets((jl_datatype_t*)jl_unwrap_unionall((jl_value_t*)jl_opaque_closure_type));
+
+    jl_partial_opaque_type = jl_new_datatype(jl_symbol("PartialOpaque"), core, jl_any_type, jl_emptysvec,
+        jl_perm_symsvec(4, "typ", "env", "parent", "source"),
+        jl_svec(4, jl_type_type, jl_any_type, jl_method_instance_type, jl_method_type),
+        jl_emptysvec, 0, 0, 4);
+
+    // complete builtin type metadata
     jl_voidpointer_type = (jl_datatype_t*)pointer_void;
     jl_uint8pointer_type = (jl_datatype_t*)jl_apply_type1((jl_value_t*)jl_pointer_type, (jl_value_t*)jl_uint8_type);
-    jl_svecset(jl_datatype_type->types, 6, jl_voidpointer_type);
+    jl_svecset(jl_datatype_type->types, 5, jl_voidpointer_type);
+    jl_svecset(jl_datatype_type->types, 6, jl_int32_type);
     jl_svecset(jl_datatype_type->types, 7, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 8, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 9, jl_int32_type);
-    jl_svecset(jl_datatype_type->types, 10, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 11, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 12, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 13, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 14, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 15, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 16, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 17, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 18, jl_bool_type);
-    jl_svecset(jl_datatype_type->types, 19, jl_bool_type);
+    jl_svecset(jl_datatype_type->types, 8, jl_uint8_type);
     jl_svecset(jl_typename_type->types, 1, jl_module_type);
-    jl_svecset(jl_typename_type->types, 6, jl_long_type);
-    jl_svecset(jl_typename_type->types, 3, jl_type_type);
+    jl_svecset(jl_typename_type->types, 3, jl_voidpointer_type);
+    jl_svecset(jl_typename_type->types, 4, jl_voidpointer_type);
+    jl_svecset(jl_typename_type->types, 5, jl_type_type);
+    jl_svecset(jl_typename_type->types, 6, jl_type_type);
+    jl_svecset(jl_typename_type->types, 11, jl_long_type);
+    jl_svecset(jl_typename_type->types, 12, jl_int32_type);
+    jl_svecset(jl_typename_type->types, 13, jl_uint8_type);
+    jl_svecset(jl_typename_type->types, 14, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 4, jl_long_type);
     jl_svecset(jl_methtable_type->types, 6, jl_module_type);
     jl_svecset(jl_methtable_type->types, 7, jl_array_any_type);
-#ifdef __LP64__
-    jl_svecset(jl_methtable_type->types, 8, jl_int64_type); // unsigned long
-    jl_svecset(jl_methtable_type->types, 9, jl_int64_type); // uint32_t plus alignment
-#else
-    jl_svecset(jl_methtable_type->types, 8, jl_int32_type); // DWORD
-    jl_svecset(jl_methtable_type->types, 9, jl_int32_type); // uint32_t
-#endif
+    jl_svecset(jl_methtable_type->types, 8, jl_long_type); // voidpointer
+    jl_svecset(jl_methtable_type->types, 9, jl_long_type); // uint32_t plus alignment
     jl_svecset(jl_methtable_type->types, 10, jl_uint8_type);
     jl_svecset(jl_methtable_type->types, 11, jl_uint8_type);
-    jl_svecset(jl_method_type->types, 11, jl_method_instance_type);
-    jl_svecset(jl_method_instance_type->types, 5, jl_code_instance_type);
-    jl_svecset(jl_code_instance_type->types, 9, jl_voidpointer_type);
-    jl_svecset(jl_code_instance_type->types, 10, jl_voidpointer_type);
+    jl_svecset(jl_method_type->types, 12, jl_method_instance_type);
+    jl_svecset(jl_method_instance_type->types, 6, jl_code_instance_type);
+    jl_svecset(jl_code_instance_type->types, 12, jl_voidpointer_type);
+    jl_svecset(jl_code_instance_type->types, 13, jl_voidpointer_type);
 
     jl_compute_field_offsets(jl_datatype_type);
     jl_compute_field_offsets(jl_typename_type);
@@ -2518,10 +2727,10 @@ void jl_init_types(void) JL_GC_DISABLED
     jl_compute_field_offsets(jl_symbol_type);
 
     // override the preferred layout for a couple types
-    jl_lineinfonode_type->isinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
+    jl_lineinfonode_type->name->mayinlinealloc = 0; // FIXME: assumed to be a pointer by codegen
     // It seems like we probably usually end up needing the box for kinds (used in an Any context)--but is that true?
-    jl_uniontype_type->isinlinealloc = 0;
-    jl_unionall_type->isinlinealloc = 0;
+    jl_uniontype_type->name->mayinlinealloc = 0;
+    jl_unionall_type->name->mayinlinealloc = 0;
 }
 
 #ifdef __cplusplus
diff --git a/src/julia-parser.scm b/src/julia-parser.scm
index d4f83a3bfdbbcb..97a11df701a371 100644
--- a/src/julia-parser.scm
+++ b/src/julia-parser.scm
@@ -4,24 +4,24 @@
 (define (add-dots ops) (append! ops (map (lambda (op) (symbol (string "." op))) ops)))
 
 (define prec-assignment
-  (append! (add-dots '(= += -= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻= ≔ ⩴ ≕))
+  (append! (add-dots '(= += -= −= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻= ≔ ⩴ ≕))
            (add-dots '(~))
            '(:= $=)))
 ;; comma - higher than assignment outside parentheses, lower when inside
 (define prec-pair (add-dots '(=>)))
 (define prec-conditional '(?))
 (define prec-arrow       (add-dots '(← → ↔ ↚ ↛ ↞ ↠ ↢ ↣ ↦ ↤ ↮ ⇎ ⇍ ⇏ ⇐ ⇒ ⇔ ⇴ ⇶ ⇷ ⇸ ⇹ ⇺ ⇻ ⇼ ⇽ ⇾ ⇿ ⟵ ⟶ ⟷ ⟹ ⟺ ⟻ ⟼ ⟽ ⟾ ⟿ ⤀ ⤁ ⤂ ⤃ ⤄ ⤅ ⤆ ⤇ ⤌ ⤍ ⤎ ⤏ ⤐ ⤑ ⤔ ⤕ ⤖ ⤗ ⤘ ⤝ ⤞ ⤟ ⤠ ⥄ ⥅ ⥆ ⥇ ⥈ ⥊ ⥋ ⥎ ⥐ ⥒ ⥓ ⥖ ⥗ ⥚ ⥛ ⥞ ⥟ ⥢ ⥤ ⥦ ⥧ ⥨ ⥩ ⥪ ⥫ ⥬ ⥭ ⥰ ⧴ ⬱ ⬰ ⬲ ⬳ ⬴ ⬵ ⬶ ⬷ ⬸ ⬹ ⬺ ⬻ ⬼ ⬽ ⬾ ⬿ ⭀ ⭁ ⭂ ⭃ ⭄ ⭇ ⭈ ⭉ ⭊ ⭋ ⭌ ￩ ￫ ⇜ ⇝ ↜ ↝ ↩ ↪ ↫ ↬ ↼ ↽ ⇀ ⇁ ⇄ ⇆ ⇇ ⇉ ⇋ ⇌ ⇚ ⇛ ⇠ ⇢ ↷ ↶ ↺ ↻ --> <-- <-->)))
-(define prec-lazy-or     '(|\|\||))
-(define prec-lazy-and    '(&&))
+(define prec-lazy-or     (add-dots '(|\|\||)))
+(define prec-lazy-and    (add-dots '(&&)))
 (define prec-comparison
   (append! '(in isa)
-           (add-dots '(> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ <: >:))))
+           (add-dots '(> < >= ≥ <= ≤ == === ≡ != ≠ !== ≢ ∈ ∉ ∋ ∌ ⊆ ⊈ ⊂ ⊄ ⊊ ∝ ∊ ∍ ∥ ∦ ∷ ∺ ∻ ∽ ∾ ≁ ≃ ≂ ≄ ≅ ≆ ≇ ≈ ≉ ≊ ≋ ≌ ≍ ≎ ≐ ≑ ≒ ≓ ≖ ≗ ≘ ≙ ≚ ≛ ≜ ≝ ≞ ≟ ≣ ≦ ≧ ≨ ≩ ≪ ≫ ≬ ≭ ≮ ≯ ≰ ≱ ≲ ≳ ≴ ≵ ≶ ≷ ≸ ≹ ≺ ≻ ≼ ≽ ≾ ≿ ⊀ ⊁ ⊃ ⊅ ⊇ ⊉ ⊋ ⊏ ⊐ ⊑ ⊒ ⊜ ⊩ ⊬ ⊮ ⊰ ⊱ ⊲ ⊳ ⊴ ⊵ ⊶ ⊷ ⋍ ⋐ ⋑ ⋕ ⋖ ⋗ ⋘ ⋙ ⋚ ⋛ ⋜ ⋝ ⋞ ⋟ ⋠ ⋡ ⋢ ⋣ ⋤ ⋥ ⋦ ⋧ ⋨ ⋩ ⋪ ⋫ ⋬ ⋭ ⋲ ⋳ ⋴ ⋵ ⋶ ⋷ ⋸ ⋹ ⋺ ⋻ ⋼ ⋽ ⋾ ⋿ ⟈ ⟉ ⟒ ⦷ ⧀ ⧁ ⧡ ⧣ ⧤ ⧥ ⩦ ⩧ ⩪ ⩫ ⩬ ⩭ ⩮ ⩯ ⩰ ⩱ ⩲ ⩳ ⩵ ⩶ ⩷ ⩸ ⩹ ⩺ ⩻ ⩼ ⩽ ⩾ ⩿ ⪀ ⪁ ⪂ ⪃ ⪄ ⪅ ⪆ ⪇ ⪈ ⪉ ⪊ ⪋ ⪌ ⪍ ⪎ ⪏ ⪐ ⪑ ⪒ ⪓ ⪔ ⪕ ⪖ ⪗ ⪘ ⪙ ⪚ ⪛ ⪜ ⪝ ⪞ ⪟ ⪠ ⪡ ⪢ ⪣ ⪤ ⪥ ⪦ ⪧ ⪨ ⪩ ⪪ ⪫ ⪬ ⪭ ⪮ ⪯ ⪰ ⪱ ⪲ ⪳ ⪴ ⪵ ⪶ ⪷ ⪸ ⪹ ⪺ ⪻ ⪼ ⪽ ⪾ ⪿ ⫀ ⫁ ⫂ ⫃ ⫄ ⫅ ⫆ ⫇ ⫈ ⫉ ⫊ ⫋ ⫌ ⫍ ⫎ ⫏ ⫐ ⫑ ⫒ ⫓ ⫔ ⫕ ⫖ ⫗ ⫘ ⫙ ⫷ ⫸ ⫹ ⫺ ⊢ ⊣ ⟂ ⫪ ⫫ <: >:))))
 (define prec-pipe<       '(|.<\|| |<\||))
 (define prec-pipe>       '(|.\|>| |\|>|))
 (define prec-colon       (append! '(: |..|) (add-dots '(… ⁝ ⋮ ⋱ ⋰ ⋯))))
 (define prec-plus        (append! '($)
-                          (add-dots '(+ - |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
-(define prec-times       (add-dots '(* / ÷ % & ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟)))
+                          (add-dots '(+ - − ¦ |\|| ⊕ ⊖ ⊞ ⊟ |++| ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣))))
+(define prec-times       (add-dots '(* / ⌿ ÷ % & · · ⋅ ∘ × |\\| ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗ ⨟)))
 (define prec-rational    (add-dots '(//)))
 (define prec-bitshift    (add-dots '(<< >> >>>)))
 ;; `where`
@@ -101,6 +101,8 @@
 
 (define unary-op? (Set unary-ops))
 
+(define radical-op? (Set '(√ ∛ ∜)))
+
 ; operators that are both unary and binary
 (define unary-and-binary-ops (append! '($ & ~)
                                       (add-dots '(+ - ⋆ ± ∓))))
@@ -109,8 +111,8 @@
 
 ; operators that are special forms, not function names
 (define syntactic-operators
-  (append! (add-dots '(= += -= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻=))
-           '(:= $= && |\|\|| |.| ... ->)))
+  (append! (add-dots '(&& |\|\|| = += -= *= /= //= |\\=| ^= ÷= %= <<= >>= >>>= |\|=| &= ⊻=))
+           '(:= $= |.| ... ->)))
 (define syntactic-unary-operators '($ & |::|))
 
 (define syntactic-op? (Set syntactic-operators))
@@ -219,21 +221,14 @@
 
 (define (newline? c) (eqv? c #\newline))
 
-(define (skip-to-eol port)
-  (let ((c (peek-char port)))
-    (cond ((eof-object? c)    c)
-          ((eqv? c #\newline) c)
-          (else               (read-char port)
-                              (skip-to-eol port)))))
-
 (define (op-or-sufchar? c) (or (op-suffix-char? c) (opchar? c)))
 
-(define (read-operator port c)
-  (if (and (eqv? c #\*) (eqv? (peek-char port) #\*))
+(define (read-operator port c0 (postfix? #f))
+  (if (and (eqv? c0 #\*) (eqv? (peek-char port) #\*))
       (error "use \"x^y\" instead of \"x**y\" for exponentiation, and \"x...\" instead of \"**x\" for splatting."))
   (if (or (eof-object? (peek-char port)) (not (op-or-sufchar? (peek-char port))))
-      (symbol (string c)) ; 1-char operator
-      (let ((str (let loop ((str (string c))
+      (string->normsymbol (string c0)) ; 1-char operator
+      (let ((str (let loop ((str (string c0))
                             (c   (peek-char port))
                             (in-suffix? #f))
                    (if (eof-object? c)
@@ -251,18 +246,21 @@
                                        (and (or (eq? opsym '--) (eq? opsym '.--))
                                             (read-char port)
                                             (or (begin0 (eqv? (peek-char port) #\>)
-                                                        (io.ungetc port #\-))
+                                                        (io.skip port -1)) ; unget -, leaving -
                                                 (error (string "invalid operator \"" newop "\""))))
                                        ;; <- is not an operator but <-- and <--> are
                                        (and (or (eq? opsym '<-) (eq? opsym '.<-))
                                             (read-char port)
                                             (begin0 (eqv? (peek-char port) #\-)
-                                                    (io.ungetc port #\-))))
+                                                    (io.skip port -1))) ; unget -, leaving <
+                                       ;; consume suffixes after ', only if parsing a call chain
+                                       ;; otherwise 'ᵀ' would parse as (|'| |'ᵀ|)
+                                       (and postfix? (eqv? c0 #\') sufchar?))
                                    (begin (read-char port)
                                           (loop newop (peek-char port) sufchar?))
                                    str))
                              str))))))
-        (string->symbol str))))
+        (string->normsymbol str))))
 
 (define (accum-digits c pred port _-digit-sep)
   (let loop ((str '())
@@ -273,7 +271,7 @@
                  (if (and (not (eof-object? c)) (pred c))
                      (loop str c)
                      (begin
-                       (io.ungetc port #\_)
+                       (io.skip port -1) ; unget _
                        (list->string (reverse str))))))
         (if (and (not (eof-object? c)) (pred c))
             (begin (read-char port)
@@ -306,6 +304,9 @@
 (define (numchk n s)
   (or n (error (string "invalid numeric constant \"" s "\""))))
 
+(define (string-lastchar s)
+  (string.char s (string.dec s (length s))))
+
 (define (read-number port leadingdot neg)
   (let ((str  (open-output-string))
         (pred char-numeric?)
@@ -320,7 +321,7 @@
       (if (eqv? (peek-char port) #\.)
           (begin (read-char port)
                  (if (dot-opchar? (peek-char port))
-                     (io.ungetc port #\.)
+                     (io.skip port -1) ; unget .
                      (error (string "invalid numeric constant \""
                                     (get-output-string str) #\. "\""))))))
     (define (read-digs lz _-digit-sep)
@@ -360,7 +361,7 @@
                                           (if (eqv? (peek-char port) #\')
                                               ""
                                               "; add space(s) to clarify")))))
-                     (io.ungetc port #\.))
+                     (io.skip port -1)) ; unget .
                    (begin (write-char #\. str)
                           (read-digs #f #t)
                           (if (eq? pred char-hex?)
@@ -372,16 +373,20 @@
               (and (eq? pred char-hex?) ispP)
               (memv c '(#\e #\E #\f)))
           (begin (read-char port)
-                 (let ((d (peek-char port)))
+                 (let* ((d (peek-char port))
+                        (is-minus-sign (or (eqv? d #\-) (eqv? d #\u2212))))
                    (if (and (not (eof-object? d))
-                            (or (char-numeric? d) (eqv? d #\+) (eqv? d #\-)))
+                            (or (char-numeric? d) (eqv? d #\+) is-minus-sign))
                        (begin (set! is-float32-literal (eqv? c #\f))
                               (set! is-hex-float-literal ispP)
                               (write-char c str)
-                              (write-char (read-char port) str)
+                              (if is-minus-sign
+                                  (begin (read-char port)
+                                         (write-char #\- str))
+                                  (write-char (read-char port) str))
                               (read-digs #t #f)
                               (disallow-dot))
-                       (io.ungetc port c)))))
+                       (io.skip port -1))))) ; unget c
       (if (and (char? c)
                (or (eq? pred char-bin?) (eq? pred char-oct?)
                    (and (eq? pred char-hex?) (not is-hex-float-literal)))
@@ -403,7 +408,7 @@
                    (string.sub s 1)
                    s)
                r is-float32-literal)))
-      (if (and (eqv? #\. (string.char s (string.dec s (length s))))
+      (if (and (eqv? #\. (string-lastchar s))
                (let ((nxt (peek-char port)))
                  (and (not (eof-object? nxt))
                       (or (identifier-start-char? nxt)
@@ -436,7 +441,7 @@
           ((<= l 32)  (numchk n s) (uint32 n))
           ((<= l 64)  (numchk n s) (uint64 n))
           ((<= l 128) `(macrocall (core @uint128_str) (null) ,s))
-          (else       (error "Hex or binary literal too large for UInt128")))))
+          (else       `(macrocall (core @big_str) (null) ,s)))))
 
 (define (sized-uint-oct-literal n s)
   (if (string.find s "o0")
@@ -449,7 +454,7 @@
           (begin (if (equal? s "0o") (numchk n s))
                  (if (oct-within-uint128? s)
                      `(macrocall (core @uint128_str) (null) ,s)
-                     (error "Octal literal too large for UInt128"))))))
+                     `(macrocall (core @big_str) (null) ,s))))))
 
 (define (strip-leading-0s s)
   (define (loop i)
@@ -483,33 +488,56 @@
        (pair? (cadr t)) (eq? (car (cadr t)) 'core)
        (memq (cadadr t) '(@int128_str @uint128_str @big_str))))
 
+(define (make-bidi-state) '(0 . 0))
+
+(define (update-bidi-state st c)
+  (case c
+    ((#\U202A #\U202B #\U202D #\U202E) (cons (+ (car st) 1) (cdr st))) ;; LRE RLE LRO RLO
+    ((#\U2066 #\U2067 #\U2068)         (cons (car st) (+ (cdr st) 1))) ;; LRI RLI FSI
+    ((#\U202C)                         (cons (- (car st) 1) (cdr st))) ;; PDF
+    ((#\U2069)                         (cons (car st) (- (cdr st) 1))) ;; PDI
+    ((#\newline)                       '(0 . 0))
+    (else st)))
+
+(define (bidi-state-terminated? st) (equal? st '(0 . 0)))
+
+(define (skip-line-comment port)
+  (let ((c (peek-char port)))
+    (cond ((eof-object? c)    c)
+          ((eqv? c #\newline) c)
+          (else               (read-char port)
+                              (skip-line-comment port)))))
+
+(define (skip-multiline-comment port count bds)
+  (let ((c (read-char port)))
+    (if (eof-object? c)
+        (error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
+        (if (eqv? c #\=)
+            (let ((c (peek-char port)))
+              (if (eqv? c #\#)
+                  (begin
+                    (read-char port)
+                    (if (> count 1)
+                        (skip-multiline-comment port (- count 1) bds)
+                        (if (not (bidi-state-terminated? bds))
+                            (error "unbalanced bidirectional formatting in comment"))))
+                  (skip-multiline-comment port count (update-bidi-state bds c))))
+            (if (eqv? c #\#)
+                (skip-multiline-comment port
+                                        (if (eqv? (peek-char port) #\=)
+                                            (begin (read-char port)
+                                                   (+ count 1))
+                                            count)
+                                        bds)
+                (skip-multiline-comment port count (update-bidi-state bds c)))))))
+
 ;; skip to end of comment, starting at #:  either #...<eol> or #= .... =#.
 (define (skip-comment port)
-  (define (skip-multiline-comment port count)
-    (let ((c (read-char port)))
-      (if (eof-object? c)
-          (error "incomplete: unterminated multi-line comment #= ... =#") ; NOTE: changing this may affect code in base/client.jl
-          (begin (if (eqv? c #\=)
-                     (let ((c (peek-char port)))
-                       (if (eqv? c #\#)
-                           (begin
-                             (read-char port)
-                             (if (> count 1)
-                                 (skip-multiline-comment port (- count 1))))
-                           (skip-multiline-comment port count)))
-                     (if (eqv? c #\#)
-                         (skip-multiline-comment port
-                                                 (if (eqv? (peek-char port) #\=)
-                                                     (begin (read-char port)
-                                                            (+ count 1))
-                                                     count))
-                         (skip-multiline-comment port count)))))))
-
   (read-char port) ; read # that was already peeked
   (if (eqv? (peek-char port) #\=)
       (begin (read-char port) ; read initial =
-             (skip-multiline-comment port 1))
-      (skip-to-eol port)))
+             (skip-multiline-comment port 1 (make-bidi-state)))
+      (skip-line-comment port)))
 
 (define (skip-ws-and-comments port)
   (skip-ws port #t)
@@ -529,42 +557,44 @@
 (define (scolno port) (string " near column " (input-port-column port)))
 
 (define (next-token port s)
-  (aset! s 2 (eq? (skip-ws port whitespace-newline) #t))
-  (let ((c (peek-char port)))
-    (cond ((or (eof-object? c) (eqv? c #\newline))  (read-char port))
-
-          ((identifier-start-char? c)     (accum-julia-symbol c port))
+  (let loop ((comment-induced-whitespace #f))
+    (aset! s 2 (or (eq? (skip-ws port whitespace-newline) #t)
+                   comment-induced-whitespace))
+    (let ((c (peek-char port)))
+      (cond ((or (eof-object? c) (eqv? c #\newline))  (read-char port))
+
+            ((identifier-start-char? c)     (accum-julia-symbol c port))
+
+            ((string.find "()[]{},;\"`@" c) (read-char port))
+
+            ((string.find "0123456789" c)   (read-number port #f #f))
+
+            ((eqv? c #\#)                   (skip-comment port) (loop #t))
+
+            ;; . is difficult to handle; it could start a number or operator
+            ((and (eqv? c #\.)
+                  (let ((c (read-char port))
+                        (nextc (peek-char port)))
+                    (cond ((eof-object? nextc)
+                           '|.|)
+                          ((char-numeric? nextc)
+                           (read-number port #t #f))
+                          ((opchar? nextc)
+                           (let* ((op (read-operator port c))
+                                  (nx (peek-char port)))
+                             (if (and (eq? op '..) (opchar? nx) (not (memv nx '(#\' #\:))))
+                                 (error (string "invalid operator \"" op nx "\"" (scolno port))))
+                             op))
+                          (else '|.|)))))
+
+            ((opchar? c)  (read-operator port (read-char port)))
 
-          ((string.find "()[]{},;\"`@" c) (read-char port))
-
-          ((string.find "0123456789" c)   (read-number port #f #f))
-
-          ((eqv? c #\#)                   (skip-comment port) (next-token port s))
-
-          ;; . is difficult to handle; it could start a number or operator
-          ((and (eqv? c #\.)
-                (let ((c (read-char port))
-                      (nextc (peek-char port)))
-                  (cond ((eof-object? nextc)
-                         '|.|)
-                        ((char-numeric? nextc)
-                         (read-number port #t #f))
-                        ((opchar? nextc)
-                         (let* ((op (read-operator port c))
-                                (nx (peek-char port)))
-                           (if (and (eq? op '..) (opchar? nx) (not (memv nx '(#\' #\:))))
-                               (error (string "invalid operator \"" op nx "\"" (scolno port))))
-                           op))
-                        (else '|.|)))))
-
-          ((opchar? c)  (read-operator port (read-char port)))
-
-          (else
-           (let ((cn (input-port-column port)))
-             (read-char port)
-             (if (default-ignorable-char? c)
-                 (error (string "invisible character \\u" (number->string (fixnum c) 16) " near column " (+ 1 cn)))
-                 (error (string "invalid character \"" c "\" near column " (+ 1 cn)))))))))
+            (else
+              (let ((cn (input-port-column port)))
+                (read-char port)
+                (if (default-ignorable-char? c)
+                    (error (string "invisible character \\u" (number->string (fixnum c) 16) " near column " (+ 1 cn)))
+                    (error (string "invalid character \"" c "\" near column " (+ 1 cn))))))))))
 
 ;; --- token stream ---
 
@@ -606,15 +636,6 @@
 (define (space-before-next-token? s)
   (or (skip-ws (ts:port s) #f) (eqv? #\newline (peek-char (ts:port s)))))
 
-;; --- misc ---
-
-; Log a syntax deprecation, attributing it to current-filename and the line
-; number of the stream `s`
-(define (parser-depwarn s what instead)
-  (let ((line (if (number? s) s (input-port-line (if (port? s) s (ts:port s)))))
-        (file current-filename))
-    (frontend-depwarn (format-syntax-deprecation what instead file line #t) file line)))
-
 ;; --- parser ---
 
 ;; parse left-to-right binary operator
@@ -968,7 +989,7 @@
                 (not (memv t '(#\( #\[ #\{))))
            )
        (not (ts:space? s))
-       (not (operator? t))
+       (or (not (operator? t)) (radical-op? t))
        (not (closing-token? t))
        (not (newline? t))
        (or (and (not (string? expr)) (not (eqv? t #\")))
@@ -991,7 +1012,7 @@
             (begin
               #;(if (and (number? ex) (= ex 0))
                     (error "juxtaposition with literal \"0\""))
-              (let ((next (parse-factor s)))
+              (let ((next (if (radical-op? next) (parse-unary s) (parse-factor s))))
                 (loop `(call * ,ex ,next)
                       (cons next args))))
             (if (length= args 1)
@@ -1045,7 +1066,10 @@
 (define (parse-unary-call s op un spc)
   (let ((next (peek-token s)))
     (cond ((or (closing-token? next) (newline? next) (eq? next '=))
-           op)  ; return operator by itself, as in (+)
+           (if (dotop? op)
+               ;; standalone dotted operators are parsed as (|.| op)
+               (list '|.| (undotop op))
+               op))  ; return operator by itself, as in (+)
           ((or (eqv? next #\{)  ;; this case is +{T}(x::T) = ...
                (and (not un) (eqv? next #\( )))
            (ts:put-back! s op spc)
@@ -1208,6 +1232,8 @@
                           (loop (list* 'typed_vcat ex (cdr al))))
                          ((comprehension)
                           (loop (list* 'typed_comprehension ex (cdr al))))
+                         ((ncat)
+                          (loop (list* 'typed_ncat ex (cdr al))))
                          (else (error "unknown parse-cat result (internal error)")))))))
             ((|.|)
              (disallow-space s ex t)
@@ -1233,15 +1259,25 @@
                        `(|.| ,ex (inert ($ ,dollarex)))))
                     (else
                      (let ((name (parse-atom s #f)))
-                       (if (and (pair? name) (eq? (car name) 'macrocall))
-                           `(macrocall (|.| ,ex (quote ,(cadr name))) ; move macrocall outside by rewriting A.@B as @A.B
-                                       ,@(cddr name))
-                           `(|.| ,ex (quote ,name))))))))
+                       (cond ((and (pair? name) (eq? (car name) 'macrocall))
+                              `(macrocall (|.| ,ex (quote ,(cadr name))) ; move macrocall outside by rewriting A.@B as @A.B
+                                          ,@(cddr name)))
+                             ((and (pair? name) (eq? (car name) 'do) (eq? (caadr name) 'macrocall))
+                              `(do ,(let ((name (cadr name)))
+                                      `(macrocall (|.| ,ex (quote ,(cadr name))) ; move macrocall outside by rewriting `A.@B() do; end` as `@A.B() do; end`
+                                                  ,@(cddr name)))
+                                   ,(caddr name)))
+                           (else
+                            `(|.| ,ex (quote ,name)))))))))
             ((|'|)
-             (if (ts:space? s)
-                 (error (string "space not allowed before \"" t "\"")))
-             (take-token s)
-             (loop (list t ex)))
+             (if (not (ts:space? s))
+                 (begin
+                   (take-token s)
+                   (let ((t (read-operator (ts:port s) #\' #t)))
+                     (loop (if (eq? t '|'|)
+                               (list t ex)
+                               (list 'call t ex)))))
+                 ex))
             ((|.'|) (error "the \".'\" operator is discontinued"))
             ((#\{ )
              (disallow-space s ex t)
@@ -1260,11 +1296,13 @@
                                        (parse-raw-literal s t)))
                         (nxt (peek-token s))
                         (macname (macroify-name ex (macsuffix t))))
-                   (if (and (symbol? nxt) (not (operator? nxt))
+                   (if (and (or (symbol? nxt) (number? nxt) (large-number? nxt)) (not (operator? nxt))
                             (not (ts:space? s)))
                        ;; string literal suffix, "s"x
                        (loop `(macrocall ,macname ,startloc ,macstr
-                                         ,(string (take-token s))))
+                                         ,(if (symbol? nxt)
+                                              (string (take-token s))
+                                              (take-token s))))
                        (loop `(macrocall ,macname ,startloc ,macstr))))
                  ex))
             (else ex))))))
@@ -1304,7 +1342,8 @@
 (define (valid-1arg-func-sig? sig)
   (or (symbol? sig)
       (and (pair? sig) (eq? (car sig) '|::|)
-           (symbol? (cadr sig)))))
+           (or (symbol? (cadr sig))
+               (length= sig 2)))))
 
 (define (unwrap-where x)
   (if (and (pair? x) (eq? (car x) 'where))
@@ -1350,14 +1389,14 @@
             (if (eq? word 'quote)
                 (list 'quote blk)
                 blk))))
-       ((while)  (begin0 (list 'while (parse-cond s) (parse-block s))
+       ((while)  (begin0 (list 'while (parse-cond s) (append (parse-block s) (list (line-number-node s))))
                          (expect-end s word)))
        ((for)
         (let* ((ranges (parse-comma-separated-iters s))
                (body   (parse-block s)))
           (expect-end s word)
           `(for ,(if (length= ranges 1) (car ranges) (cons 'block ranges))
-                ,body)))
+                ,(append body (list (line-number-node s))))))
 
        ((let)
         (let ((binds (if (memv (peek-token s) '(#\newline #\;))
@@ -1413,17 +1452,11 @@
                (expr  (if (and (pair? assgn) (eq? (car assgn) 'tuple))
                           (cons word (cdr assgn))
                           (list word assgn))))
-          (if const
+          (if const ;; normalize `global const` and `const global`
               `(const ,expr)
               expr)))
        ((const)
-        (let ((assgn (parse-eq s)))
-          (if (not (and (pair? assgn)
-                        (or (eq? (car assgn) '=)
-                            (eq? (car assgn) 'global)
-                            (eq? (car assgn) 'local))))
-              (error "expected assignment after \"const\"")
-              `(const ,assgn))))
+        `(const ,(parse-eq s)))
 
        ((function macro)
         (let* ((loc   (line-number-node s))
@@ -1444,7 +1477,9 @@
                                    ;; function foo  =>  syntax error
                                    (error (string "expected \"(\" in " word " definition")))
                                (if (not (valid-func-sig? paren sig))
-                                   (error (string "expected \"(\" in " word " definition"))
+                                   (if paren
+                                       (error (string "ambiguous signature in " word " definition. Try adding a comma if this is a 1-argument anonymous function."))
+                                       (error (string "expected \"(\" in " word " definition")))
                                    sig)))
                      (body (parse-block s)))
                 (expect-end s word)
@@ -1481,29 +1516,33 @@
           (let loop ((nxt    (peek-token s))
                      (catchb #f)
                      (catchv #f)
-                     (finalb #f))
+                     (finalb #f)
+                     (elseb #f))
             (take-token s)
             (cond
              ((eq? nxt 'end)
               (list* 'try try-block (or catchv '(false))
                      (or catchb (if finalb '(false) (error "try without catch or finally")))
-                     (if finalb (list finalb) '())))
+                     (cond (elseb  (list (or finalb '(false)) elseb))
+                           (finalb (list finalb))
+                           (else   '()))))
              ((and (eq? nxt 'catch)
                    (not catchb))
               (let ((nl (memv (peek-token s) '(#\newline #\;))))
                 (if (eqv? (peek-token s) #\;)
                     (take-token s))
-                (if (memq (require-token s) '(end finally))
+                (if (memq (require-token s) '(end finally else))
                     (loop (require-token s)
                           '(block)
                           #f
-                          finalb)
+                          finalb
+                          elseb)
                     (let* ((loc (line-number-node s))
                            (var (if nl #f (parse-eq* s)))
                            (var? (and (not nl) (or (symbol? var)
                                                    (and (length= var 2) (eq? (car var) '$))
                                                    (error (string "invalid syntax \"catch " (deparse var) "\"")))))
-                           (catch-block (if (eq? (require-token s) 'finally)
+                           (catch-block (if (memq (require-token s) '(finally else))
                                             `(block ,(line-number-node s))
                                             (parse-block s))))
                       (loop (require-token s)
@@ -1515,16 +1554,30 @@
                                               '()
                                               (cdr catch-block))))
                             (if var? var '(false))
-                            finalb)))))
+                            finalb
+                            elseb)))))
              ((and (eq? nxt 'finally)
                    (not finalb))
-              (let ((fb (if (eq? (require-token s) 'catch)
+              (let ((fb (if (memq (require-token s) '(catch else))
                             '(block)
                             (parse-block s))))
                 (loop (require-token s)
                       catchb
                       catchv
-                      fb)))
+                      fb
+                      elseb)))
+             ((and (eq? nxt 'else)
+                   (not elseb))
+              (if (or (not catchb) finalb)
+                  (error "else inside try block needs to be immediately after catch"))
+              (let ((eb (if (eq? (require-token s) 'finally)
+                            '(block)
+                            (parse-block s))))
+                (loop (require-token s)
+                      catchb
+                      catchv
+                      finalb
+                      eb)))
              (else (expect-end-error nxt 'try))))))
        ((return)          (let ((t (peek-token s)))
                             (if (or (eqv? t #\newline) (closing-token? t))
@@ -1576,18 +1629,21 @@
       e))
 
 (define (parse-imports s word)
-  (let* ((first (parse-import s word))
+  (let* ((first (parse-import s word #f))
          (next  (peek-token s))
-         (from  (and (eq? next ':) (not (ts:space? s))))
+         (from  (and (eq? next ':) (not (ts:space? s))
+                     (or (not (and (pair? first) (eq? (car first) 'as)))
+                         (error (string "invalid syntax \"" word " " (deparse first) ":\"")))))
          (done  (cond ((or from (eqv? next #\,))
                        (begin (take-token s) #f))
-                      ((or (eq? next '|.|)
+                      ;; TODO: this seems to be wrong; figure out if it's needed
+                      #;((or (eq? next '|.|)
                            (eqv? (string.sub (string next) 0 1) ".")) #f)
                       (else #t)))
          (rest  (if done
                     '()
                     (let ((ex (parse-comma-separated s (lambda (s)
-                                                         (parse-import s word)))))
+                                                         (parse-import s word from)))))
                       (if (eq? (peek-token s) ':)
                           (error (string "\":\" in \"" word "\" syntax can only be used "
                                          "when importing a single module. "
@@ -1595,7 +1651,24 @@
                           ex)))))
     (if from
         `(,word (|:| ,first ,@rest))
-        (list* word first rest))))
+        (begin (if (and (eq? word 'using) (pair? first) (eq? (car first) 'as))
+                   (error (string "invalid syntax \"using " (deparse-import-path (cadr first)) " as ...\"")))
+               (list* word first rest)))))
+
+(define (parse-macro-name s)
+  (let ((nxt (peek-token s)))
+    (disallow-space s '@ nxt))
+  (with-space-sensitive
+   (if (eq? (peek-token s) '|.|)
+       (begin (take-token s) '__dot__)
+       (parse-atom s #f))))
+
+(define (parse-atsym s)
+  (let ((t (peek-token s)))
+    (if (eqv? t #\@)
+        (begin (take-token s)
+               (macroify-name (parse-macro-name s)))
+        (parse-unary-prefix s))))
 
 (define (parse-import-dots s)
   (let loop ((l '())
@@ -1613,9 +1686,9 @@
            (begin (take-token s)
                   (loop (list* '|.| '|.| '|.| '|.| l) (peek-token s))))
           (else
-           (cons (macrocall-to-atsym (parse-unary-prefix s)) l)))))
+           (cons (parse-atsym s) l)))))
 
-(define (parse-import s word)
+(define (parse-import-path s word)
   (let loop ((path (parse-import-dots s)))
     (if (not (symbol-or-interpolate? (car path)))
         (error (string "invalid \"" word "\" statement: expected identifier")))
@@ -1624,7 +1697,7 @@
        ((eq? nxt '|.|)
         (disallow-space s (car path) nxt)
         (take-token s)
-        (loop (cons (unquote (macrocall-to-atsym (parse-unary-prefix s))) path)))
+        (loop (cons (unquote (parse-atsym s)) path)))
        ((or (memv nxt '(#\newline #\; #\, :))
             (eof-object? nxt))
         (cons '|.| (reverse path)))
@@ -1635,6 +1708,16 @@
        (else
         (cons '|.| (reverse path)))))))
 
+(define (parse-import s word from)
+  (let ((path (parse-import-path s word)))
+    (if (eq? (peek-token s) 'as)
+        (begin
+          (if (and (not from) (eq? word 'using))
+              (error (string "invalid syntax \"using " (deparse-import-path path) " as ...\"")))
+          (take-token s)
+          `(as ,path ,(parse-atsym s)))
+        path)))
+
 ;; parse comma-separated assignments, like "i=1:n,j=1:m,..."
 (define (parse-comma-separated s what)
   (let loop ((exprs '()))
@@ -1650,33 +1733,27 @@
 
 ;; as above, but allows both "i=r" and "i in r"
 (define (parse-iteration-spec s)
-  (let* ((outer? (if (eq? (peek-token s) 'outer)
-                     (begin
-                       (take-token s)
-                       (let ((nxt (peek-token s)))
-                         (if (or (memq nxt '(= in ∈))
-                                 (not (symbol? nxt))
-                                 (operator? nxt))
-                             (begin (ts:put-back! s 'outer #t)
-                                    #f)
-                             #t)))
-                     #f))
-         (lhs (parse-pipe< s))
-         (t   (peek-token s)))
-    (cond ((memq t '(= in ∈))
-           (take-token s)
-           (let* ((rhs (parse-pipe< s))
-                  (t   (peek-token s)))
-             #;(if (not (or (closing-token? t) (newline? t)))
-                 ;; should be: (error "invalid iteration specification")
-                 (parser-depwarn s (string "for " (deparse `(= ,lhs ,rhs)) " " t)
-                                 (string "for " (deparse `(= ,lhs ,rhs)) "; " t)))
-             (if outer?
-                 `(= (outer ,lhs) ,rhs)
-                 `(= ,lhs ,rhs))))
-          ((and (eq? lhs ':) (closing-token? t))
-           ':)
-          (else (error "invalid iteration specification")))))
+  ;; FIXME: this is just for backwards compatibility, allows newline before =/in/∈ in
+  ;; generator expressions
+  (define (peek-token- s)
+    (let ((t (peek-token s)))
+      (if (and for-generator (newline? t))
+          (begin (take-token s)
+                 (peek-token s))
+          t)))
+  (let* ((lhs (let ((lhs- (with-space-sensitive (parse-pipe< s))))
+                (if (eq? lhs- 'outer)
+                    (let ((nxt (peek-token- s)))
+                      (if (memq nxt '(= in ∈))
+                          lhs-
+                          `(outer ,(parse-pipe< s))))
+                    lhs-)))
+         (t (peek-token- s)))
+    (if (memq t '(= in ∈))
+        (begin
+          (take-token s)
+          `(= ,lhs ,(parse-pipe< s)))
+        (error "invalid iteration specification"))))
 
 (define (parse-comma-separated-iters s)
   (let loop ((ranges '()))
@@ -1809,52 +1886,125 @@
          (take-token s))
      `(comprehension ,gen))))
 
-(define (parse-matrix s first closer gotnewline last-end-symbol)
-  (define (fix head v) (cons head (reverse v)))
-  (define (update-outer v outer)
-    (cond ((null? v)       outer)
-          ((null? (cdr v)) (cons (car v) outer))
-          (else            (cons (fix 'row v) outer))))
-  (define semicolon (eqv? (peek-token s) #\;))
+(define (parse-array s first closer gotnewline last-end-symbol)
+  (define (fix head v)
+    (cons head (reverse v)))
+  (define (unfixrow l)
+    (cons (reverse (cdaar l)) (if (and (null? (cdar l)) (null? (cdr l)))
+                                  '()
+                                  (cons (cdar l) (cdr l)))))
+  (define (fixcat head d v)
+    (cons head (cons d (reverse v))))
+  (define (ncons a n l)
+    (if (< n 1)
+        l
+        (ncons a (1- n) (cons a l))))
+  (define (fix-level ah n)
+     (if (length= ah 1)
+         (car ah)
+         (if (= n 1)
+             (fix 'row ah)
+             (fixcat 'nrow (1- n) ah))))
+  (define (collapse-level n l i)
+    (if (> n 0)
+        (let* ((lhfix (fix-level (car l) i))
+               (lnew (if (null? (cdr l))
+                         (list (list lhfix))
+                         (cons (cons lhfix (cadr l)) (cddr l)))))
+          (collapse-level (1- n) lnew (1+ i)))
+        l))
+  (define (parse-array-inner s a is-row-first semicolon-count max-level closer gotnewline gotlinesep)
+    (define (process-semicolon next)
+      (set! semicolon-count (1+ semicolon-count))
+      (set! max-level (max max-level semicolon-count))
+      (if (and (null? is-row-first) (= semicolon-count 2) (not (eqv? next #\;)))
+          ; finding ;; that isn't a row-separator makes it column-first
+          (set! is-row-first #f))
+      (set! a (collapse-level 1 a semicolon-count)))
+    (define (restore-lower-dim-lists next)
+      (if (and (not gotlinesep) (not (memv next (list #\; 'for closer #\newline))))
+          (set! a (ncons '() semicolon-count a))))
+    (let ((t (if (or gotnewline (eqv? (peek-token s) #\newline))
+                 #\newline
+                 (require-token s))))
+      (if (eqv? t closer)
+          (begin
+            (take-token s)
+            (set! a (collapse-level (- max-level semicolon-count) a (1+ semicolon-count)))
+            (cond ((= max-level 0)
+                   (if (length= (car a) 1)
+                       (fix 'vect (car a))
+                       (fix 'hcat (car a))))
+                  ((= max-level 1)
+                   (fix 'vcat (car a)))
+                  (else
+                   (fixcat 'ncat max-level (car a)))))
+      (case t
+        ((#\newline)
+         (or gotnewline (take-token s))
+         (let ((next (peek-token s)))
+           (if (and (> semicolon-count 0) (eqv? next #\;))
+               (error (string "semicolons may appear before or after a line break in an array expression, "
+                              "but not both")))
+           (if (and (= semicolon-count 0)
+                    (not (memv next (list #\; 'for closer #\newline))))
+               ; treat a linebreak prior to a value as a semicolon if no previous semicolons observed
+                (process-semicolon next))
+           (restore-lower-dim-lists next)
+           (parse-array-inner s a is-row-first semicolon-count max-level closer #f gotlinesep)))
+        ((#\;)
+         (or gotnewline (take-token s))
+         (if (and (> semicolon-count 0) (ts:space? s)) ; disallow [a; ;b]
+             (error "multiple semicolons must be adjacent in an array expression"))
+         (let ((next (peek-token s)))
+           (let ((is-line-sep
+                 (if (and (not (null? is-row-first)) is-row-first (= semicolon-count 1))
+                     (cond ((eqv? next #\newline) #t) ; [a b ;;<newline>...
+                           ((not (or (eof-object? next) (eqv? next #\;))) ; [a b ;;...
+                             (error (string "cannot mix space and ;; separators in an array expression, "
+                                            "except to wrap a line")))
+                           (else #f)) ; [a b ;;<eof> for REPL,  [a ;;...
+                     #f))) ; [a ; b ;; c ; d...
+             (if is-line-sep
+                 (begin (set! a (unfixrow a))
+                        (set! max-level
+                              (if (null? (cdr a))
+                                  0 ; no prior single semicolon
+                                  max-level)))
+                 (begin (process-semicolon next)
+                        (restore-lower-dim-lists next)))
+           (parse-array-inner s a is-row-first semicolon-count max-level closer #f is-line-sep))))
+        ((#\,)
+         (error "unexpected comma in array expression"))
+        ((#\] #\})
+         (error (string "unexpected \"" t "\"")))
+        ((for)
+         (if (and (length= (car a) 1)
+                  (null? (cdr a)))
+             (begin ;; if we get here, there must have been some kind of space or separator
+               ;;(expect-space-before s 'for)
+               (take-token s)
+               (parse-comprehension s (caar a) closer))
+             (error "invalid comprehension syntax")))
+        (else
+         (if (and (not gotlinesep) (pair? (car a)) (not (ts:space? s)))
+            (error (string "expected \"" closer "\" or separator in arguments to \""
+                           (if (eqv? closer #\]) #\[ #\{) " " closer
+                           "\"; got \""
+                           (deparse (caar a)) t "\"")))
+         (let ((u (parse-eq* s)))
+           (set! a (cons (cons u (car a)) (cdr a)))
+           (if (= (length (car a)) 2)
+               ; at least 2 elements separated by space found [a b...], [a; b c...]
+               (if (null? is-row-first)
+                   (set! is-row-first #t)
+                   (if (not is-row-first)
+                       (error (string "cannot mix space and \";;\" separators in an array expression, "
+                                      "except to wrap a line"))))))
+         (parse-array-inner s a is-row-first 0 max-level closer #f #f))))))
   ;; if a [ ] expression is a cat expression, `end` is not special
   (with-bindings ((end-symbol last-end-symbol))
-  (let loop ((vec   (list first))
-             (outer '()))
-    (let ((t  (if (or (eqv? (peek-token s) #\newline) gotnewline)
-                  #\newline
-                  (require-token s))))
-      (if (eqv? t closer)
-          (begin (take-token s)
-                 (if (pair? outer)
-                     (fix 'vcat (update-outer vec outer))
-                     (if (or (null? vec) (null? (cdr vec)))
-                         (fix 'vect vec)     ; [x]   => (vect x)
-                         (fix 'hcat vec))))  ; [x y] => (hcat x y)
-          (case t
-            ((#\; #\newline)
-             (or gotnewline (take-token s))
-             (set! gotnewline #f)
-             (loop '() (update-outer vec outer)))
-            ((#\,)
-             (error "unexpected comma in matrix expression"))
-            ((#\] #\})
-             (error (string "unexpected \"" t "\"")))
-            ((for)
-             (if (and (not semicolon)
-                      (length= outer 1)
-                      (null? vec))
-                 (begin ;; if we get here, there must have been some kind of space or separator
-                        ;;(expect-space-before s 'for)
-                        (take-token s)
-                        (parse-comprehension s (car outer) closer))
-                 (error "invalid comprehension syntax")))
-            (else
-             (if (and (pair? vec) (not (ts:space? s)))
-                 (error (string "expected \"" closer "\" or separator in arguments to \""
-                                (if (eqv? closer #\]) #\[ #\{) " " closer
-                                "\"; got \""
-                                (deparse (car vec)) t "\"")))
-             (loop (cons (parse-eq* s) vec) outer))))))))
+    (parse-array-inner s (list (list first)) '() 0 0 closer gotnewline #f)))
 
 (define (expect-space-before s t)
   (if (not (ts:space? s))
@@ -1866,24 +2016,40 @@
                   (where-enabled #t)
                   (whitespace-newline #f)
                   (for-generator #t))
-    (if (eqv? (require-token s) closer)
-        (begin (take-token s)
-               '())
-        (let* ((first (parse-eq* s))
-               (t (peek-token s)))
-          (cond ((or (eqv? t #\,) (eqv? t closer))
-                 (parse-vect s first closer))
-                ((eq? t 'for)
-                 (expect-space-before s 'for)
-                 (take-token s)
-                 (parse-comprehension s first closer))
-                ((eqv? t #\newline)
+    (let ((t (require-token s)))
+      (cond ((eqv? t closer)
+             (take-token s)
+             '())
+            ((eqv? t #\;)
+             (take-token s)
+             (define (loop (n 1))
+               (let ((t (with-whitespace-newline (require-token s))))
                  (take-token s)
-                 (if (memv (peek-token s) (list #\, closer))
-                     (parse-vect s first closer)
-                     (parse-matrix s first closer #t last-end-symbol)))
-                (else
-                 (parse-matrix s first closer #f last-end-symbol)))))))
+                 (cond ((eqv? t #\;)
+                        (if (ts:space? s)
+                            (error (string "unexpected space inside "
+                                           (deparse `(ncat ,n)) " expression")))
+                        (loop (+ n 1)))
+                       ((eqv? t closer) `(ncat ,n))
+                       (else (error (string "unexpected \"" t "\" inside "
+                                            (deparse `(ncat ,n)) " expression"))))))
+             (loop))
+            (else
+             (let* ((first (parse-eq* s))
+                    (t (peek-token s)))
+               (cond ((or (eqv? t #\,) (eqv? t closer))
+                      (parse-vect s first closer))
+                     ((eq? t 'for)
+                      (expect-space-before s 'for)
+                      (take-token s)
+                      (parse-comprehension s first closer))
+                     ((eqv? t #\newline)
+                      (take-token s)
+                      (if (memv (peek-token s) (list #\, closer))
+                          (parse-vect s first closer)
+                          (parse-array s first closer #t last-end-symbol)))
+                     (else
+                      (parse-array s first closer #f last-end-symbol)))))))))
 
 (define (kw-to-= e) (if (kwarg? e) (cons '= (cdr e)) e))
 (define (=-to-kw e) (if (assignment? e) (cons 'kw (cdr e)) e))
@@ -2063,16 +2229,44 @@
 (define (unescape-parsed-string-literal strs)
   (map-at even? unescape-string strs))
 
+(define (strip-escaped-newline s raw)
+  (if raw s (map (lambda (s)
+                   (if (string? s) (strip-escaped-newline- s) s))
+                 s)))
+
+;; remove `\` followed by a newline
+(define (strip-escaped-newline- s)
+  (let ((in  (open-input-string s))
+        (out (open-output-string)))
+    (define (loop preceding-backslash?)
+          (let ((c (read-char in)))
+            (cond ((eof-object? c))
+                  (preceding-backslash?
+                   (if (not (eqv? c #\newline))
+                       (begin (write-char #\\ out) (write-char c out))
+                       ((define (loop-)
+                          (if (memv (peek-char in) '(#\space #\tab))
+                              (begin (take-char in) (loop-))))))
+                   (loop #f))
+                  ((eqv? c #\\) (loop #t))
+                  (else (write-char c out) (loop #f)))))
+    (loop #f)
+    (io.tostring! out)))
+
 (define (parse-string-literal s delim raw)
-  (let ((p (ts:port s)))
-    ((if raw identity unescape-parsed-string-literal)
-     (if (eqv? (peek-char p) delim)
-         (if (eqv? (peek-char (take-char p)) delim)
-             (map-first strip-leading-newline
-                        (dedent-triplequoted-string
-                         (parse-string-literal- 2 (take-char p) s delim raw)))
-             (list ""))
-         (parse-string-literal- 0 p s delim raw)))))
+  (let* ((p (ts:port s))
+         (str (if (eqv? (peek-char p) delim)
+                  (if (eqv? (peek-char (take-char p)) delim)
+                      (map-first strip-leading-newline
+                                 (dedent-triplequoted-string
+                                   (strip-escaped-newline
+                                     (parse-string-literal- 2 (take-char p) s delim raw)
+                                     raw)))
+                      (list ""))
+                  (strip-escaped-newline
+                    (parse-string-literal- 0 p s delim raw)
+                    raw))))
+    (if raw str (unescape-parsed-string-literal str))))
 
 (define (strip-leading-newline s)
   (let ((n (sizeof s)))
@@ -2183,24 +2377,28 @@
   (let loop ((c (read-char p))
              (b (open-output-string))
              (e ())
-             (quotes 0))
+             (quotes 0)
+             (bds (make-bidi-state)))
     (cond
       ((eqv? c delim)
        (if (< quotes n)
-           (loop (read-char p) b e (+ quotes 1))
-           (reverse (cons (io.tostring! b) e))))
+           (loop (read-char p) b e (+ quotes 1) bds)
+           (begin
+             (if (not (bidi-state-terminated? bds))
+                 (error "unbalanced bidirectional formatting in string literal"))
+             (reverse (cons (io.tostring! b) e)))))
 
       ((= quotes 1)
        (if (not raw) (write-char #\\ b))
        (write-char delim b)
-       (loop c b e 0))
+       (loop c b e 0 (update-bidi-state bds c)))
 
       ((= quotes 2)
        (if (not raw) (write-char #\\ b))
        (write-char delim b)
        (if (not raw) (write-char #\\ b))
        (write-char delim b)
-       (loop c b e 0))
+       (loop c b e 0 (update-bidi-state bds c)))
 
       ((eqv? c #\\)
        (if raw
@@ -2213,23 +2411,29 @@
                     (io.write b (string.rep "\\" (div count 2)))
                     (if (odd? count)
                         (begin (write-char delim b)
-                               (loop (read-char p) b e 0))
-                        (loop nxch b e 0)))
+                               (loop (read-char p) b e 0 bds))
+                        (loop nxch b e 0 bds)))
                    (else
                     (io.write b (string.rep "\\" count))
                     (write-char nxch b)
-                    (loop (read-char p) b e 0))))
+                    (loop (read-char p) b e 0 (update-bidi-state bds nxch)))))
            (let ((nxch (not-eof-for delim (read-char p))))
              (write-char #\\ b)
-             (write-char nxch b)
-             (loop (read-char p) b e 0))))
+             (if (eqv? nxch #\return)
+                 (loop nxch b e 0 bds)
+                 (begin
+                   (write-char nxch b)
+                   (loop (read-char p) b e 0 (update-bidi-state bds nxch)))))))
 
       ((and (eqv? c #\$) (not raw))
-       (let ((ex (parse-interpolate s)))
+       (let* ((ex (parse-interpolate s))
+              ;; wrap interpolated literal strings in (string ) so we can
+              ;; distinguish them from the surrounding text (issue #38501)
+              (ex (if (string? ex) `(string ,ex) ex)))
          (loop (read-char p)
                (open-output-string)
                (list* ex (io.tostring! b) e)
-               0)))
+               0 bds)))
 
       ; convert literal \r and \r\n in strings to \n (issue #11988)
       ((eqv? c #\return) ; \r
@@ -2237,11 +2441,11 @@
          (if (eqv? (peek-char p) #\linefeed) ; \r\n
              (read-char p))
          (write-char #\newline b)
-         (loop (read-char p) b e 0)))
+         (loop (read-char p) b e 0 bds)))
 
       (else
        (write-char (not-eof-for delim c) b)
-       (loop (read-char p) b e 0)))))
+       (loop (read-char p) b e 0 (update-bidi-state bds c))))))
 
 (define (not-eof-1 c)
   (if (eof-object? c)
@@ -2369,13 +2573,9 @@
           ;; macro call
           ((eqv? t #\@)
            (take-token s)
-           (let ((nxt (peek-token s)))
-             (disallow-space s '@ nxt))
            (with-space-sensitive
             (let ((startloc  (line-number-node s))
-                  (head (if (eq? (peek-token s) '|.|)
-                            (begin (take-token s) '__dot__)
-                            (parse-atom s #f))))
+                  (head (parse-macro-name s)))
               (peek-token s)
               (if (ts:space? s)
                   (maybe-docstring
diff --git a/src/julia-syntax.scm b/src/julia-syntax.scm
index 24602860a9c3bd..254d622dd7e9e3 100644
--- a/src/julia-syntax.scm
+++ b/src/julia-syntax.scm
@@ -105,7 +105,7 @@
   (if (null? tuples)
       (if (and last (= n 1))
           `(call (top firstindex) ,a)
-          `(call (top first) (call (top axes) ,a ,n)))
+          `(call (top firstindex) ,a ,n))
       (let ((dimno `(call (top +) ,(- n (length tuples))
                           ,.(map (lambda (t) `(call (top length) ,t))
                                  tuples))))
@@ -120,6 +120,10 @@
          ;; inside ref only replace within the first argument
          (list* 'ref (replace-beginend (cadr ex) a n tuples last)
                 (cddr ex)))
+        ;; TODO: this probably should not be allowed since keyword args aren't
+        ;; positional, but in this context we have just used their positions anyway
+        ((eq? (car ex) 'kw)
+         (list 'kw (cadr ex) (replace-beginend (caddr ex) a n tuples last)))
         (else
          (cons (car ex)
                (map (lambda (x) (replace-beginend x a n tuples last))
@@ -130,31 +134,33 @@
 ;; returns (values index-list stmts) where stmts are statements that need
 ;; to execute first.
 (define (process-indices a i)
-  (let loop ((lst i)
-             (n   1)
-             (stmts '())
-             (tuples '())
-             (ret '()))
-    (if (null? lst)
-        (values (reverse ret) (reverse stmts))
-        (let ((idx  (car lst))
-              (last (null? (cdr lst))))
-          (if (and (pair? idx) (eq? (car idx) '...))
-              (if (symbol-like? (cadr idx))
-                  (loop (cdr lst) (+ n 1)
-                        stmts
-                        (cons (cadr idx) tuples)
-                        (cons `(... ,(replace-beginend (cadr idx) a n tuples last))
-                              ret))
-                  (let ((g (make-ssavalue)))
-                    (loop (cdr lst) (+ n 1)
-                          (cons `(= ,g ,(replace-beginend (cadr idx) a n tuples last))
-                                stmts)
-                          (cons g tuples)
-                          (cons `(... ,g) ret))))
-              (loop (cdr lst) (+ n 1)
-                    stmts tuples
-                    (cons (replace-beginend idx a n tuples last) ret)))))))
+  (let ((has-va? (any vararg? i)))
+    (let loop ((lst i)
+               (n   1)
+               (stmts '())
+               (tuples '())
+               (ret '()))
+      (if (null? lst)
+          (values (reverse ret) (reverse stmts))
+          (let* ((idx0 (car lst))
+                 (idx  (if (vararg? idx0) (cadr idx0) idx0))
+                 (last (null? (cdr lst)))
+                 (replaced (replace-beginend idx a n tuples last))
+                 (val      (if (kwarg? replaced) (caddr replaced) replaced))
+                 (idx      (if (or (not has-va?) (simple-atom? val))
+                               val (make-ssavalue))))
+            (loop (cdr lst) (+ n 1)
+                  (if (eq? idx val)
+                      stmts
+                      (cons `(= ,idx ,val)
+                            stmts))
+                  (if (vararg? idx0) (cons idx tuples) tuples)
+                  (cons (if (vararg? idx0)
+                            `(... ,idx)
+                            (if (eq? val replaced)
+                                idx
+                                (list 'kw (cadr replaced) idx)))
+                        ret)))))))
 
 ;; GF method does not need to keep decl expressions on lambda args
 ;; except for rest arg
@@ -221,10 +227,11 @@
 
 (define (method-expr-name m)
   (let ((name (cadr m)))
+     (let ((name (if (or (length= m 2) (not (pair? name)) (not (quoted? name))) name (cadr name))))
        (cond ((not (pair? name)) name)
              ((eq? (car name) 'outerref) (cadr name))
              ;((eq? (car name) 'globalref) (caddr name))
-             (else name))))
+             (else name)))))
 
 ;; extract static parameter names from a (method ...) expression
 (define (method-expr-static-parameters m)
@@ -252,6 +259,19 @@
             (pair? (caddr e)) (memq (car (caddr e)) '(quote inert))
             (symbol? (cadr (caddr e))))))
 
+(define (overlay? e)
+  (and (pair? e) (eq? (car e) 'overlay)))
+
+(define (sym-ref-or-overlay? e)
+  (or (overlay? e)
+      (sym-ref? e)))
+
+(define (binding-to-globalref e)
+  (and (nodot-sym-ref? e)
+       (let ((mod (if (globalref? e) (cadr e) '(thismodule)))
+             (sym (if (symbol? e) e (last e))))
+         `(globalref ,mod ,sym))))
+
 ;; convert final (... x) to (curly Vararg x)
 (define (dots->vararg a)
   (if (null? a) a
@@ -341,14 +361,15 @@
    (let* ((names (map car sparams))
           (anames (map (lambda (x) (if (underscore-symbol? x) UNUSED x)) (llist-vars argl)))
           (unused_anames (filter (lambda (x) (not (eq? x UNUSED))) anames))
-          (ename (if (nodot-sym-ref? name) name `(null))))
+          (ename (if (nodot-sym-ref? name) name
+                    (if (overlay? name) (cadr name) `(null)))))
      (if (has-dups unused_anames)
          (error (string "function argument name not unique: \"" (car (has-dups unused_anames)) "\"")))
      (if (has-dups names)
          (error "function static parameter names not unique"))
      (if (any (lambda (x) (and (not (eq? x UNUSED)) (memq x names))) anames)
          (error "function argument and static parameter names must be distinct"))
-     (if (or (and name (not (sym-ref? name))) (not (valid-name? name)))
+     (if (or (and name (not (sym-ref-or-overlay? name))) (not (valid-name? name)))
          (error (string "invalid function name \"" (deparse name) "\"")))
      (let* ((loc (maybe-remove-functionloc! body))
             (generator (if (expr-contains-p if-generated? body (lambda (x) (not (function-def? x))))
@@ -420,6 +441,11 @@
          (block
           ,(scopenest (cdr names) (cdr vals) expr)))))
 
+(define (make-assignments names vals expr)
+  `(block
+    ,@(map make-assignment names vals)
+    ,expr))
+
 (define (keywords-method-def-expr name sparams argl body rett)
   (let* ((kargl (cdar argl))  ;; keyword expressions (= k v)
          (annotations (map (lambda (a) `(meta ,(cadr a) ,(arg-name (cadr (caddr a)))))
@@ -431,9 +457,14 @@
          (body  (blockify body))
          (ftype (decl-type (car pargl)))
          ;; 1-element list of vararg argument, or empty if none
-         (vararg (let ((l (if (null? pargl) '() (last pargl))))
-                   (if (or (vararg? l) (varargexpr? l))
+         (vararg (let* ((l (if (null? pargl) '() (last pargl)))
+                        ;; handle vararg with default value
+                        (l- (if (kwarg? l) (cadr l) l)))
+                   (if (or (vararg? l-) (varargexpr? l-))
                        (list l) '())))
+         ;; expression to forward varargs to another call
+         (splatted-vararg (if (null? vararg) '()
+                              (list `(... ,(arg-name (car vararg))))))
          ;; positional args with vararg
          (pargl-all pargl)
          ;; positional args without vararg
@@ -461,6 +492,15 @@
                                              (lambda (x) (eq? x v))
                                              vals))
                                 keynames))
+         ;; if keyword args don't depend on each other and the default
+         ;; values don't have embedded assignments (ick) then we can use
+         ;; ssavalues instead of slots in the sorter method.
+         (ssa-keyvars? (and (not ordered-defaults)
+                            (not (contains assignment? vals))))
+         (keyvars (if ssa-keyvars?
+                      (map (lambda (x) (make-ssavalue)) keynames)
+                      keynames))
+         (tempslot (gensy))
          ;; list of function's initial line number and meta nodes (empty if none)
          (prologue (extract-method-prologue body))
          ;; body statements
@@ -470,15 +510,16 @@
           (filter (lambda (s)
                     (not (any (lambda (p) (eq? (car p) (car s)))
                               positional-sparams)))
-                  sparams)))
-    (let ((kw      (gensy))
-          (rkw     (if (null? restkw) (make-ssavalue) (symbol (string (car restkw) "..."))))
-          (mangled (let ((und (and name (undot-name name))))
-                     (symbol (string (if (and name (= (string.char (string name) 0) #\#))
-                                         ""
-                                         "#")
-                                     (or und '_) "#"
-                                     (string (current-julia-module-counter)))))))
+                  sparams))
+         (kw      (gensy))
+         (rkw     (if (null? restkw) (make-ssavalue) (symbol (string (car restkw) "..."))))
+         (restkw  (map (lambda (v) `(|::| ,v (call (top pairs) (core NamedTuple)))) restkw))
+         (mangled (let ((und (and name (undot-name name))))
+                    (symbol (string (if (and name (= (string.char (string name) 0) #\#))
+                                        ""
+                                        "#")
+                                    (or und '_) "#"
+                                    (string (current-julia-module-counter)))))))
       ;; this is a hack: nest these statements inside a call so they get closure
       ;; converted together, allowing all needed types to be defined before any methods.
       `(call (core ifelse) (false) (false) (block
@@ -508,8 +549,7 @@
                                        ,@(if ordered-defaults keynames vals)
                                        ,@(if (null? restkw) '() `((call (top pairs) (call (core NamedTuple)))))
                                        ,@(map arg-name pargl)
-                                       ,@(if (null? vararg) '()
-                                             (list `(... ,(arg-name (car vararg)))))))))
+                                       ,@splatted-vararg))))
                (if ordered-defaults
                    (scopenest keynames vals ret)
                    ret))))
@@ -528,11 +568,16 @@
                      `(meta ,(cadr m) ,@(filter (lambda (v) (not (memq v keynames)))
                                                 (cddr m))))
                    (filter nospecialize-meta? prologue))
-            ,(scopenest
-              keynames
+            ;; If not using slots for the keyword argument values, still declare them
+            ;; for reflection purposes.
+            ,@(if ssa-keyvars?
+                  (map (lambda (v) `(local ,v)) (reverse keynames))
+                  '())
+            ,((if ssa-keyvars? make-assignments scopenest)
+              keyvars
               (map (lambda (v dflt)
                      (let* ((k     (decl-var v))
-                            (rval0 `(call (top getindex) ,kw (inert ,k)))
+                            (rval0 `(call (core getfield) ,kw (inert ,k)))
                             ;; note: if the "declared" type of a KW arg includes something
                             ;; from keyword-sparams then don't assert it here, since those
                             ;; static parameters don't have values yet. instead, the type
@@ -554,9 +599,10 @@
                                                                ,temp)))
                                                 ,temp))
                                       rval0)))
-                       `(if (call (top haskey) ,kw (quote ,k))
-                            ,rval
-                            ,dflt)))
+                       `(block (if (call (core isdefined) ,kw (quote ,k))
+                                   (= ,tempslot ,rval)
+                                   (= ,tempslot ,dflt))
+                               ,tempslot)))
                    vars vals)
               `(block
                 (= ,rkw (call (top pairs)
@@ -567,19 +613,16 @@
                 ,@(if (null? restkw)
                       `((if (call (top isempty) ,rkw)
                             (null)
-                            (call (top kwerr) ,kw ,@(map arg-name pargl)
-                                  ,@(if (null? vararg) '()
-                                        (list `(... ,(arg-name (car vararg))))))))
+                            (call (top kwerr) ,kw ,@(map arg-name pargl) ,@splatted-vararg)))
                       '())
                 (return (call ,mangled  ;; finally, call the core function
-                              ,@keynames
+                              ,@keyvars
                               ,@(if (null? restkw) '() (list rkw))
                               ,@(map arg-name pargl)
-                              ,@(if (null? vararg) '()
-                                    (list `(... ,(arg-name (car vararg)))))))))))
+                              ,@splatted-vararg))))))
         ;; return primary function
         ,(if (not (symbol? name))
-             '(null) name))))))
+             '(null) name)))))
 
 ;; prologue includes line number node and eventual meta nodes
 (define (extract-method-prologue body)
@@ -663,8 +706,7 @@
   (define (throw-unassigned argname)
     `(call (core throw) (call (core UndefKeywordError) (inert ,argname))))
   (define (to-kw x)
-    (cond ((symbol? x) `(kw ,x ,(throw-unassigned x)))
-          ((decl? x) `(kw ,x ,(throw-unassigned (cadr x))))
+    (cond ((symdecl? x) `(kw ,x ,(throw-unassigned (decl-var x))))
           ((nospecialize-meta? x) `(meta ,(cadr x) ,(to-kw (caddr x))))
           (else x)))
   (if (has-parameters? argl)
@@ -710,12 +752,13 @@
   (let* ((field-names (safe-field-names field-names field-types))
          (any-ctor
           ;; definition with Any for all arguments
-          `(function ,(with-wheres
-                       `(call ,(if (pair? params)
-                                   `(curly ,name ,@params)
-                                   name)
-                              ,@field-names)
-                       (map (lambda (b) (cons 'var-bounds b)) bounds))
+          `(function (call (|::| |#ctor-self#|
+                            ,(with-wheres
+                              `(curly (core Type) ,(if (pair? params)
+                                                       `(curly ,name ,@params)
+                                                       name))
+                              (map (lambda (b) (cons 'var-bounds b)) bounds)))
+                           ,@field-names)
                      (block
                       ,@locs
                       (call new ,@field-names)))))
@@ -746,28 +789,36 @@
                 ,@locs
                 (call (curly ,name ,@params) ,@field-names)))))
 
-(define (new-call Tname type-params sparams params args field-names field-types)
+(define (num-non-varargs args)
+  (count (lambda (a) (not (vararg? a))) args))
+
+;; selftype?: tells us whether the called object is the type being constructed,
+;; i.e. `new()` and not `new{...}()`.
+(define (new-call Tname type-params sparams params args field-names field-types selftype?)
   (if (any kwarg? args)
       (error "\"new\" does not accept keyword arguments"))
-  (if (length> params (length type-params))
-      (error "too few type parameters specified in \"new{...}\""))
-  (if (length> type-params (length params))
-      (error "too many type parameters specified in \"new{...}\""))
+  (let ((nnv (num-non-varargs type-params)))
+    (if (and (not (any vararg? type-params)) (length> params nnv))
+        (error "too few type parameters specified in \"new{...}\""))
+    (if (> nnv (length params))
+        (error "too many type parameters specified in \"new{...}\"")))
   (let* ((Texpr (if (null? type-params)
                     `(outerref ,Tname)
-                    `(curly (outerref ,Tname)
-                            ,@type-params)))
-         (tn (make-ssavalue))
+                    (if selftype?
+                        '|#ctor-self#|
+                        `(curly (outerref ,Tname)
+                                ,@type-params))))
+         (tn (if (symbol? Texpr) Texpr (make-ssavalue)))
          (field-convert (lambda (fld fty val)
                           (if (equal? fty '(core Any))
                               val
                               `(call (top convert)
-                                     ,(if (and (equal? type-params params) (memq fty params) (memq fty sparams))
+                                     ,(if (and (not selftype?) (equal? type-params params) (memq fty params) (memq fty sparams))
                                           fty ; the field type is a simple parameter, the usage here is of a
                                               ; local variable (currently just handles sparam) for the bijection of params to type-params
                                           `(call (core fieldtype) ,tn ,(+ fld 1)))
                                      ,val)))))
-    (cond ((length> (filter (lambda (a) (not (vararg? a))) args) (length field-names))
+    (cond ((> (num-non-varargs args) (length field-names))
            `(call (core throw) (call (top ArgumentError)
                                      ,(string "new: too many arguments (expected " (length field-names) ")"))))
           ((any vararg? args)
@@ -777,7 +828,7 @@
                (let ((argt (make-ssavalue))
                      (nf (make-ssavalue)))
                  `(block
-                   (= ,tn ,Texpr)
+                   ,@(if (symbol? tn) '() `((= ,tn ,Texpr)))
                    (= ,argt (call (core tuple) ,@args))
                    (= ,nf (call (core nfields) ,argt))
                    (if (call (top ult_int) ,nf ,(length field-names))
@@ -789,9 +840,9 @@
                    (new ,tn ,@(map (lambda (fld fty) (field-convert fld fty `(call (core getfield) ,argt ,(+ fld 1) (false))))
                                    (iota (length field-names)) (list-head field-types (length field-names))))))))
           (else
-            `(block
-              (= ,tn ,Texpr)
-              (new ,tn ,@(map field-convert (iota (length args)) (list-head field-types (length args)) args)))))))
+           `(block
+             ,@(if (symbol? tn) '() `((= ,tn ,Texpr)))
+             (new ,tn ,@(map field-convert (iota (length args)) (list-head field-types (length args)) args)))))))
 
 ;; insert item at start of arglist
 (define (arglist-unshift sig item)
@@ -804,56 +855,69 @@
         ((length= lno 3) (string " around " (caddr lno) ":" (cadr lno)))
         (else "")))
 
+;; convert constructor signature from X(...) to (|#ctor-self#|::Type{X})(...),
+;; or return #f if we can't
+(define (ctor-sig sig)
+  (cond ((or (eq? (car sig) '|::|) (eq? (car sig) 'where))
+         (let ((s2 (ctor-sig (cadr sig))))
+           (and s2 `(,(car sig) ,s2 ,@(cddr sig)))))
+        ((eq? (car sig) 'call)
+         (let ((head (cadr sig)))
+           (if (decl? head)
+               (if (eq? (cadr head) '|#ctor-self#|)
+                   sig  ;; already in the required form
+                   #f)
+               `(call (|::| |#ctor-self#| (curly (core Type) ,head)) ,@(cddr sig)))))
+        (else #f)))
+
 (define (ctor-def name Tname ctor-body sig body wheres)
   (let* ((curly?     (and (pair? name) (eq? (car name) 'curly)))
          (curlyargs  (if curly? (cddr name) '()))
          (name       (if curly? (cadr name) name))
          (sparams (map car (map analyze-typevar wheres))))
     (cond ((not (eq? name Tname))
-           `(function ,(with-wheres `(call ,(if curly?
-                                                `(curly ,name ,@curlyargs)
-                                                name)
-                                           ,@sig)
-                                    wheres)
+           `(function ,sig
                       ;; pass '() in order to require user-specified parameters with
                       ;; new{...} inside a non-ctor inner definition.
-                      ,(ctor-body body '() sparams)))
+                      ,(ctor-body body '() sparams #f)))
           (else
-           `(function ,(with-wheres `(call ,(if curly?
-                                                `(curly ,name ,@curlyargs)
-                                                name)
-                                           ,@sig)
-                                    wheres)
-                      ,(ctor-body body curlyargs sparams))))))
+           (let ((newsig (ctor-sig sig)))
+             `(function ,(or newsig sig)
+                        ,(ctor-body body curlyargs sparams (not (not newsig)))))))))
 
 ;; rewrite calls to `new( ... )` to `new` expressions on the appropriate
 ;; type, determined by the containing constructor definition.
 (define (rewrite-ctor ctor Tname params field-names field-types)
-  (define (ctor-body body type-params sparams)
+  (define (ctor-body body type-params sparams selftype?)
     (pattern-replace (pattern-set
                       (pattern-lambda
                        (call (-/ new) . args)
                        (new-call Tname type-params sparams params
-                                 (map (lambda (a) (ctor-body a type-params sparams)) args)
-                                 field-names field-types))
+                                 (map (lambda (a) (ctor-body a type-params sparams selftype?)) args)
+                                 field-names field-types selftype?))
                       (pattern-lambda
                        (call (curly (-/ new) . p) . args)
                        (new-call Tname p sparams params
-                                 (map (lambda (a) (ctor-body a type-params sparams)) args)
-                                 field-names field-types)))
+                                 (map (lambda (a) (ctor-body a type-params sparams selftype?)) args)
+                                 field-names field-types #f)))
                      body))
   (pattern-replace
    (pattern-set
+    ;; recognize `(t::(Type{X{T}} where T))(...)` as an inner-style constructor for X
+    (pattern-lambda (function       (-$ (call (|::| self (where (curly (core (-/ Type)) name) . wheres)) . sig)
+                                        (|::| (call (|::| self (where (curly (core (-/ Type)) name) . wheres)) . sig) _t))
+                                    body)
+                    (ctor-def name Tname ctor-body (cadr __) body wheres))
     ;; definitions without `where`
     (pattern-lambda (function       (-$ (call name . sig) (|::| (call name . sig) _t)) body)
-                    (ctor-def name Tname ctor-body sig body #f))
+                    (ctor-def name Tname ctor-body (cadr __) body #f))
     (pattern-lambda (= (-$ (call name . sig) (|::| (call name . sig) _t)) body)
-                    (ctor-def name Tname ctor-body sig body #f))
+                    (ctor-def name Tname ctor-body (cadr __) body #f))
     ;; definitions with `where`
     (pattern-lambda (function       (where (-$ (call name . sig) (|::| (call name . sig) _t)) . wheres) body)
-                    (ctor-def name Tname ctor-body sig body wheres))
+                    (ctor-def name Tname ctor-body (cadr __) body wheres))
     (pattern-lambda (= (where (-$ (call name . sig) (|::| (call name . sig) _t)) . wheres) body)
-                    (ctor-def name Tname ctor-body sig body wheres)))
+                    (ctor-def name Tname ctor-body (cadr __) body wheres)))
 
    ;; flatten `where`s first
    (pattern-replace
@@ -877,9 +941,20 @@
 
 (define (struct-def-expr- name params bounds super fields0 mut)
   (receive
-   (fields defs) (separate (lambda (x) (or (symbol? x) (decl? x)))
-                           fields0)
-   (let* ((defs        (filter (lambda (x) (not (effect-free? x))) defs))
+   (fields defs) (separate eventually-decl? fields0)
+   (let* ((attrs ())
+          (fields (let ((n 0))
+                    (map (lambda (x)
+                           (set! n (+ n 1))
+                           (let loop ((x x))
+                             (if (and (pair? x) (not (decl? x)))
+                                 (begin
+                                   (set! attrs (cons (quotify (car x)) (cons n attrs)))
+                                   (loop (cadr x)))
+                                 x)))
+                         fields)))
+          (attrs (reverse attrs))
+          (defs        (filter (lambda (x) (not (or (effect-free? x) (eq? (car x) 'string)))) defs))
           (locs        (if (and (pair? fields0) (linenum? (car fields0)))
                            (list (car fields0))
                            '()))
@@ -900,12 +975,14 @@
        (global ,name) (const ,name)
        (scope-block
         (block
+         (hardscope)
          (local-def ,name)
          ,@(map (lambda (v) `(local ,v)) params)
          ,@(map (lambda (n v) (make-assignment n (bounds-to-TypeVar v #t))) params bounds)
          (toplevel-only struct (outerref ,name))
          (= ,name (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@params)
                         (call (core svec) ,@(map quotify field-names))
+                        (call (core svec) ,@attrs)
                         ,mut ,min-initialized))
          (call (core _setsuper!) ,name ,super)
          (if (isdefined (outerref ,name))
@@ -930,6 +1007,7 @@
        ;; "inner" constructors
        (scope-block
         (block
+         (hardscope)
          (global ,name)
          ,@(map (lambda (c)
                   (rewrite-ctor c name params field-names field-types))
@@ -1043,16 +1121,16 @@
                   (loop (if isseq F (cdr F)) (cdr A) stmts
                         (list* ty T) (list* ca C) (list* g GC)))))))))
 
+(define (just-arglist? ex)
+  (and (pair? ex)
+       (or (memq (car ex) '(tuple block ...))
+           (and (eq? (car ex) 'where)
+                (just-arglist? (cadr ex))))))
+
 (define (expand-function-def e)   ;; handle function definitions
-  (define (just-arglist? ex)
-    (and (pair? ex)
-         (or (memq (car ex) '(tuple block ...))
-             (and (eq? (car ex) 'where)
-                  (just-arglist? (cadr ex))))))
-  (let ((name (cadr e)))
-    (if (just-arglist? name)
-        (expand-forms (cons '-> (cdr e)))
-        (expand-function-def- e))))
+  (if (just-arglist? (cadr e))
+      (expand-forms (cons '-> (cdr e)))
+      (expand-function-def- e)))
 
 ;; convert (where (where x S) T) to (where x T S)
 (define (flatten-where-expr e)
@@ -1123,13 +1201,14 @@
                   (argl-stmts (lower-destructuring-args argl))
                   (argl       (car argl-stmts))
                   (name       (check-dotop (car argl)))
+                  (argname    (if (overlay? name) (caddr name) name))
                   ;; fill in first (closure) argument
                   (adj-decl (lambda (n) (if (and (decl? n) (length= n 2))
                                             `(|::| |#self#| ,(cadr n))
                                             n)))
-                  (farg    (if (decl? name)
-                               (adj-decl name)
-                               `(|::| |#self#| (call (core Typeof) ,name))))
+                  (farg    (if (decl? argname)
+                               (adj-decl argname)
+                               `(|::| |#self#| (call (core Typeof) ,argname))))
                   (body       (insert-after-meta body (cdr argl-stmts)))
                   (argl    (cdr argl))
                   (argl    (fix-arglist
@@ -1168,6 +1247,21 @@
                    `(call ,name ,@argl))
               ,body)))))
 
+(define (function-arglist e)
+  (cond ((eq? (car e) 'function)
+         (if (just-arglist? (cadr e))
+             (function-arglist (cons '-> (cdr e)))
+             (let* ((name  (cadr e))
+                    (dcl   (and (pair? name) (eq? (car name) '|::|)))
+                    (name  (if dcl (cadr name) name)))
+               (cddr name))))
+        ((eq? (car e) '->)
+         (let* ((a (cadr e)))
+           (if (pair? a)
+               (tuple-to-arglist (filter (lambda (x) (not (linenum? x))) a))
+               (list a))))
+        (else '())))
+
 (define (let-binds e)
   (if (and (pair? (cadr e))
            (eq? (car (cadr e)) 'block))
@@ -1187,7 +1281,7 @@
         (if (null? binds)
             blk
             (cond
-             ((or (symbol? (car binds)) (decl? (car binds)))
+             ((symdecl? (car binds))
               ;; just symbol -> add local
               (loop (cdr binds)
                     `(scope-block
@@ -1211,45 +1305,28 @@
                                 `(local-def ,name))
                            ,(car binds)
                            ,blk)))))
-               ((or (symbol? (cadar binds))
-                    (decl?   (cadar binds)))
+               ((symdecl?   (cadar binds))
                 (let ((vname (decl-var (cadar binds))))
                   (loop (cdr binds)
-                        (if (expr-contains-eq vname (caddar binds))
-                            (let ((tmp (make-ssavalue)))
-                              `(scope-block
-                                (block ,@hs
-                                       (= ,tmp ,(caddar binds))
-                                       (scope-block
-                                        (block
-                                         (local-def ,(cadar binds))
-                                         (= ,vname ,tmp)
-                                         ,blk)))))
-                            `(scope-block
-                              (block ,@hs
-                               (local-def ,(cadar binds))
-                               (= ,vname ,(caddar binds))
-                               ,blk))))))
+                        (let ((tmp (make-ssavalue)))
+                          `(block (= ,tmp ,(caddar binds))
+                                  (scope-block
+                                   (block ,@hs
+                                          (local-def ,(cadar binds))
+                                          (= ,vname ,tmp)
+                                          ,blk)))))))
                ;; (a, b, c, ...) = rhs
                ((and (pair? (cadar binds))
                      (eq? (caadar binds) 'tuple))
                 (let ((vars (lhs-vars (cadar binds))))
                   (loop (cdr binds)
-                        (if (expr-contains-p (lambda (x) (memq x vars)) (caddr (car binds)))
-                            ;; use more careful lowering if there are name conflicts. issue #25652
-                            (let ((temp (make-ssavalue)))
-                              `(block
-                                (= ,temp ,(caddr (car binds)))
-                                (scope-block
-                                 (block ,@hs
-                                  ,@(map (lambda (v) `(local-def ,v)) vars)
-                                  (= ,(cadr (car binds)) ,temp)
-                                  ,blk))))
-                            `(scope-block
-                              (block ,@hs
-                               ,@(map (lambda (v) `(local-def ,v)) vars)
-                               ,(car binds)
-                               ,blk))))))
+                        (let ((tmp (make-ssavalue)))
+                          `(block (= ,tmp ,(caddar binds))
+                                  (scope-block
+                                   (block ,@hs
+                                          ,@(map (lambda (v) `(local-def ,v)) vars)
+                                          (= ,(cadar binds) ,tmp)
+                                          ,blk)))))))
                (else (error "invalid let syntax"))))
              (else (error "invalid let syntax")))))))))
 
@@ -1283,9 +1360,9 @@
       (if (null? f)
           '()
           (let ((x (car f)))
-            (cond ((or (symbol? x) (decl? x) (linenum? x))
+            (cond ((or (eventually-decl? x) (linenum? x))
                    (loop (cdr f)))
-                  ((and (assignment? x) (or (symbol? (cadr x)) (decl? (cadr x))))
+                  ((and (assignment? x) (eventually-decl? (cadr x)))
                    (error (string "\"" (deparse x) "\" inside type definition is reserved")))
                   (else '())))))
     (expand-forms
@@ -1320,25 +1397,29 @@
   (let ((tryb   (cadr e))
         (var    (caddr e))
         (catchb (cadddr e)))
-    (cond ((length= e 5)
+    (cond ((and (length> e 4) (not (equal? (caddddr e) '(false))))
            (if (has-unmatched-symbolic-goto? tryb)
                (error "goto from a try/finally block is not permitted"))
-           (let ((finalb (cadddr (cdr e))))
+           (let ((finalb (caddddr e)))
              (expand-forms
               `(tryfinally
-                ,(if (not (equal? catchb '(false)))
-                     `(try ,tryb ,var ,catchb)
-                     `(scope-block ,tryb))
+                ,(if (and (equal? catchb '(false)) (length= e 5))
+                     `(scope-block ,tryb)
+                     `(try ,tryb ,var ,catchb (false) ,@(cdddddr e)))
                 (scope-block ,finalb)))))
-          ((length= e 4)
-           (expand-forms
-            (if (symbol-like? var)
-                `(trycatch (scope-block ,tryb)
-                           (scope-block
-                            (block (= ,var (the_exception))
-                                   ,catchb)))
-                `(trycatch (scope-block ,tryb)
-                           (scope-block ,catchb)))))
+          ((length> e 3)
+           (and (length> e 6) (error "invalid \"try\" form"))
+           (let ((elseb (if (length= e 6) (cdddddr e) '())))
+             (expand-forms
+               `(,(if (null? elseb) 'trycatch 'trycatchelse)
+                 (scope-block ,tryb)
+                 (scope-block
+                   ,(if (symbol-like? var)
+                        `(scope-block
+                          (block (= ,var (the_exception))
+                                 ,catchb))
+                        `(scope-block ,catchb)))
+                 ,@elseb))))
           (else
            (error "invalid \"try\" form")))))
 
@@ -1369,7 +1450,10 @@
            (expand-forms (expand-decls (car arg) (cdr arg) #t)))
           ((= |::|)
            (expand-forms (expand-decls 'const (cdr e) #f)))
-          (else e)))))
+          (else (error "expected assignment after \"const\""))))))
+
+(define (expand-atomic-decl e)
+  (error "unimplemented or unsupported atomic declaration"))
 
 (define (expand-local-or-global-decl e)
   (if (and (symbol? (cadr e)) (length= e 2))
@@ -1426,7 +1510,8 @@
                 ,@(reverse after)
                 (unnecessary (tuple ,@(reverse elts))))
         (let ((L (car lhss))
-              (R (car rhss)))
+              ;; rhss can be null iff L is a vararg
+              (R (if (null? rhss) '() (car rhss))))
           (cond ((and (symbol-like? L)
                       (or (not (pair? R)) (quoted? R) (equal? R '(null)))
                       ;; overwrite var immediately if it doesn't occur elsewhere
@@ -1438,6 +1523,60 @@
                        (cons (make-assignment L R) stmts)
                        after
                        (cons R elts)))
+                ((vararg? L)
+                 (if (any vararg? (cdr lhss))
+                     (error "multiple \"...\" on lhs of assignment"))
+                 (if (null? (cdr lhss))
+                     (let ((temp (if (eventually-call? (cadr L)) (gensy) (make-ssavalue))))
+                       `(block ,@(reverse stmts)
+                               (= ,temp (tuple ,@rhss))
+                               ,@(reverse after)
+                               (= ,(cadr L) ,temp)
+                               (unnecessary (tuple ,@(reverse elts) (... ,temp)))))
+                     (let ((lhss- (reverse lhss))
+                           (rhss- (reverse rhss))
+                           (lhs-tail '())
+                           (rhs-tail '()))
+                       (define (extract-tail)
+                         (if (not (or (null? lhss-) (null? rhss-)
+                                      (vararg? (car lhss-)) (vararg? (car rhss-))))
+                             (begin
+                               (set! lhs-tail (cons (car lhss-) lhs-tail))
+                               (set! rhs-tail (cons (car rhss-) rhs-tail))
+                               (set! lhss- (cdr lhss-))
+                               (set! rhss- (cdr rhss-))
+                               (extract-tail))))
+                       (extract-tail)
+                       (let* ((temp (if (any (lambda (x)
+                                               (or (eventually-call? x)
+                                                   (and (vararg? x) (eventually-call? (cadr x)))))
+                                             lhss-)
+                                        (gensy)
+                                        (make-ssavalue)))
+                              (assigns (make-assignment temp `(tuple ,@(reverse rhss-))))
+                              (assigns (if (symbol? temp)
+                                          `((local-def ,temp) ,assigns)
+                                          (list assigns)))
+                              (n (length lhss-))
+                              (st (gensy))
+                              (end (list after))
+                              (assigns (if (and (length= lhss- 1) (vararg? (car lhss-)))
+                                           (begin
+                                             (set-car! end
+                                                       (cons `(= ,(cadar lhss-) ,temp) (car end)))
+                                             assigns)
+                                           (append (if (> n 0)
+                                                       `(,@assigns (local ,st))
+                                                       assigns)
+                                                   (destructure- 1 (reverse lhss-) temp
+                                                                 n st end)))))
+                         (loop lhs-tail
+                               (append (map (lambda (x) (if (vararg? x) (cadr x) x)) lhss-) assigned)
+                               rhs-tail
+                               (append (reverse assigns) stmts)
+                               (car end)
+                               (cons `(... ,temp) elts))))))
+
                 ((vararg? R)
                  (let ((temp (make-ssavalue)))
                    `(block ,@(reverse stmts)
@@ -1450,7 +1589,9 @@
                    (loop (cdr lhss)
                          (cons L assigned)
                          (cdr rhss)
-                         (cons (make-assignment temp R) stmts)
+                         (if (symbol? temp)
+                             (list* (make-assignment temp R) `(local-def ,temp) stmts)
+                             (cons  (make-assignment temp R) stmts))
                          (cons (make-assignment L temp) after)
                          (cons temp elts)))))))))
 
@@ -1468,6 +1609,7 @@
                         ;; issue #22032
                         (let ((temp (gensy)))
                           `(block
+                            (local-def ,temp)
                             (= ,temp (call (core getfield) ,t ,i))
                             (= ,(car lhs) ,temp)))
                         `(= ,(car lhs)
@@ -1480,7 +1622,7 @@
 ;; for example a[f(x)] => (temp=f(x); a[temp])
 ;; returns a pair (expr . assignments)
 ;; where 'assignments' is a list of needed assignment statements
-(define (remove-argument-side-effects e (tup #f))
+(define (remove-argument-side-effects e)
   (if (not (pair? e))
       (cons e '())
       (let ((a '()))
@@ -1488,14 +1630,8 @@
           (cond ((effect-free? x)  x)
                 ((or (eq? (car x) '...) (eq? (car x) '&))
                  `(,(car x) ,(arg-to-temp (cadr x))))
-                ((or (eq? (car x) 'kw) (and tup (eq? (car x) '=)))
+                ((eq? (car x) 'kw)
                  `(,(car x) ,(cadr x) ,(arg-to-temp (caddr x))))
-                ((eq? (car x) 'parameters)
-                 `(parameters ,@(map arg-to-temp (cdr x))))
-                ((eq? (car x) 'tuple)
-                 (let ((tmp (remove-argument-side-effects x #t)))
-                   (set! a (revappend (cdr tmp) a))
-                   (car tmp)))
                 (else
                  (let ((g (make-ssavalue)))
                    (begin (set! a (cons `(= ,g ,x) a))
@@ -1530,7 +1666,8 @@
                                            (lambda (name) (string "keyword argument \"" name
                                                                   "\" repeated in call to \"" (deparse fexpr) "\""))
                                            "keyword argument"
-                                           "keyword argument syntax"))
+                                           "keyword argument syntax"
+                                           #t))
       ,(if (every vararg? kw)
            (kwcall-unless-empty f pa kw-container kw-container)
            `(call (call (core kwfunc) ,f) ,kw-container ,f ,@pa)))))
@@ -1539,7 +1676,10 @@
 (define (expand-update-operator- op op= lhs rhs declT)
   (let* ((e      (remove-argument-side-effects lhs))
          (newlhs (car e))
-         (temp   (and (eq? op= '|.=|) (pair? newlhs) (not (eq? (car newlhs) 'ref))
+         (temp   (and (eq? op= '|.=|)
+                      (pair? newlhs)
+                      (not (or (eq? (car newlhs) 'ref)
+                               (and (eq? (car newlhs) '|.|) (length= newlhs 3))))
                       (make-ssavalue)))
          (e      (if temp
                      (cons temp (append (cdr e) (list `(= ,temp ,newlhs))))
@@ -1590,7 +1730,7 @@
                    ,(expand-update-operator op op= (car e) rhs T))))
         (else
          (if (and (pair? lhs) (eq? op= '=)
-                  (not (memq (car lhs) '(|.| tuple vcat typed_hcat typed_vcat))))
+                  (not (memq (car lhs) '(|.| tuple vcat ncat typed_hcat typed_vcat typed_ncat))))
              (error (string "invalid assignment location \"" (deparse lhs) "\"")))
          (expand-update-operator- op op= lhs rhs declT))))
 
@@ -1664,15 +1804,16 @@
                              ,body))
                          `(scope-block ,body))))
                `(block (= ,coll ,(car itrs))
+                       (local ,next)
                        (= ,next (call (top iterate) ,coll))
                        ;; TODO avoid `local declared twice` error from this
                        ;;,@(if outer `((local ,lhs)) '())
                        ,@(if outer `((require-existing-local ,lhs)) '())
                        (if (call (top not_int) (call (core ===) ,next (null)))
                            (_do_while
-			    (block ,body
-				   (= ,next (call (top iterate) ,coll ,state)))
-			    (call (top not_int) (call (core ===) ,next (null))))))))))))
+                            (block ,body
+                                   (= ,next (call (top iterate) ,coll ,state)))
+                            (call (top not_int) (call (core ===) ,next (null))))))))))))
 
 ;; wrap `expr` in a function appropriate for consuming values from given ranges
 (define (func-for-generator-ranges expr range-exprs flat outervars)
@@ -1692,12 +1833,14 @@
      ((eq? expr argname)
       ;; use `identity` for x->x
       `(top identity))
+      ;; TODO: deprecate this (#18621):
      ((and (null? splat)
            (length= expr 3) (eq? (car expr) 'call)
            (eq? (caddr expr) argname)
+           (underscore-symbol? argname)
            (not (dotop-named? (cadr expr)))
            (not (expr-contains-eq argname (cadr expr))))
-      ;; eta reduce `x->f(x)` => `f`
+      ;; eta reduce `_->f(_)` => `f`
       (cadr expr))
      (else
       (let ((expr (cond ((and flat (pair? expr) (eq? (car expr) 'generator))
@@ -1733,13 +1876,15 @@
          gen))))
 
 (define (ref-to-view expr)
-  (if (and (pair? expr) (eq? (car expr) 'ref))
-      (let* ((ex (partially-expand-ref expr))
-             (stmts (butlast (cdr ex)))
-             (refex (last    (cdr ex)))
-             (nuref `(call (top dotview) ,(caddr refex) ,@(cdddr refex))))
-        `(block ,@stmts ,nuref))
-      expr))
+  (cond ((and (pair? expr) (eq? (car expr) 'ref))
+         (let* ((ex (partially-expand-ref expr))
+                (stmts (butlast (cdr ex)))
+                (refex (last    (cdr ex)))
+                (nuref `(call (top dotview) ,(caddr refex) ,@(cdddr refex))))
+           `(block ,@stmts ,nuref)))
+        ((and (length= expr 3) (eq? (car expr) '|.|))
+         `(call (top dotgetproperty) ,(cadr expr) ,(caddr expr)))
+        (else expr)))
 
 ; lazily fuse nested calls to expr == f.(args...) into a single broadcast call,
 ; or a broadcast! call if lhs is non-null.
@@ -1763,24 +1908,38 @@
              (args (map dot-to-fuse (cdr kws+args)))
              (make `(call (top ,(if (null? kws) 'broadcasted 'broadcasted_kwsyntax)) ,@kws ,f ,@args)))
         (if top (cons 'fuse make) make)))
-    (if (and (pair? e) (eq? (car e) '|.|))
-        (let ((f (cadr e)) (x (caddr e)))
-          (cond ((or (atom? x) (eq? (car x) 'quote) (eq? (car x) 'inert) (eq? (car x) '$))
-                 `(call (top getproperty) ,f ,x))
-                ((eq? (car x) 'tuple)
-                 (if (and (eq? (identifier-name f) '^) (length= x 3) (integer? (caddr x)))
-                     (make-fuse '(top literal_pow)
-                                (list f (cadr x) (expand-forms `(call (call (core apply_type) (top Val) ,(caddr x))))))
-                     (make-fuse f (cdr x))))
-                (else
-                 (error (string "invalid syntax \"" (deparse e) "\"")))))
-        (if (and (pair? e) (eq? (car e) 'call) (dotop-named? (cadr e)))
-            (let ((f (undotop (cadr e))) (x (cddr e)))
-              (if (and (eq? (identifier-name f) '^) (length= x 2) (integer? (cadr x)))
-                  (make-fuse '(top literal_pow)
-                             (list f (car x) (expand-forms `(call (call (core apply_type) (top Val) ,(cadr x))))))
-                  (make-fuse f x)))
-            e)))
+    (cond ((and (length= e 3) (eq? (car e) '|.|))
+           (let ((f (cadr e)) (x (caddr e)))
+             (cond ((or (atom? x) (eq? (car x) 'quote) (eq? (car x) 'inert) (eq? (car x) '$))
+                    `(call (top getproperty) ,f ,x))
+                   ((eq? (car x) 'tuple)
+                    (if (and (eq? (identifier-name f) '^) (length= x 3) (integer? (caddr x)))
+                        (make-fuse '(top literal_pow)
+                                   (list f (cadr x) (expand-forms `(call (call (core apply_type) (top Val) ,(caddr x))))))
+                        (make-fuse f (cdr x))))
+                   (else
+                    (error (string "invalid syntax \"" (deparse e) "\""))))))
+          ((and (pair? e) (eq? (car e) 'call))
+           (define (make-fuse- f x)
+             (if (and (eq? (identifier-name f) '^) (length= x 2) (integer? (cadr x)))
+                 (make-fuse '(top literal_pow)
+                            (list f (car x) (expand-forms `(call (call (core apply_type) (top Val) ,(cadr x))))))
+                 (make-fuse f x)))
+           (let ((f (cadr e)))
+             (cond ((dotop-named? f)
+                    (make-fuse- (undotop f) (cddr e)))
+                   ;; (.+)(a, b) is parsed as (call (|.| +) a b), but we still want it to fuse
+                   ((and (length= f 2) (eq? (car f) '|.|))
+                    (make-fuse- (cadr f) (cddr e)))
+                   (else
+                     e))))
+          ((and (pair? e) (eq? (car e) 'comparison))
+           (dot-to-fuse (expand-compare-chain (cdr e)) top))
+          ((and (pair? e) (eq? (car e) '.&&))
+           (make-fuse '(top andand) (cdr e)))
+          ((and (pair? e) (eq? (car e) '|.\|\||))
+           (make-fuse '(top oror) (cdr e)))
+          (else e)))
   (let ((e (dot-to-fuse rhs #t)) ; an expression '(fuse func args) if expr is a dot call
         (lhs-view (ref-to-view lhs))) ; x[...] expressions on lhs turn in to view(x, ...) to update x in-place
     (if (fuse? e)
@@ -1829,7 +1988,8 @@
 (define (lower-named-tuple lst
                            (dup-error-fn (lambda (name) (string "field name \"" name "\" repeated in named tuple")))
                            (name-str     "named tuple field")
-                           (syntax-str   "named tuple element"))
+                           (syntax-str   "named tuple element")
+                           (call-with-keyword-arguments? #f))
   (let* ((names (apply append
                        (map (lambda (x)
                               (cond ((symbol? x) (list x))
@@ -1898,21 +2058,287 @@
                          (if current
                              (merge current (cadr el))
                              `(call (top merge) (call (top NamedTuple)) ,(cadr el))))))
+                ((and call-with-keyword-arguments? (has-parameters? L))
+                 (error "more than one semicolon in argument list"))
                 (else
                  (error (string "invalid " syntax-str " \"" (deparse el) "\""))))))))
 
+(define (expand-condition cnd)
+  (let* ((blk? (and (pair? cnd) (eq? (car cnd) 'block)))
+         (stmts (if blk? (cdr (butlast cnd)) '()))
+         (test  (if blk? (last cnd) cnd)))
+    (if (and (pair? test) (memq (car test) '(&& |\|\||)))
+        (let* ((clauses `(,(car test) ,@(map expand-forms (cdr (flatten-ex (car test) test)))))
+               (clauses (if (null? (cdr clauses))
+                            (if (eq? (car clauses) '&&) '(true) '(false))
+                            clauses)))
+          (if blk?
+              `(block ,@(map expand-forms stmts) ,clauses)
+              clauses))
+        (expand-forms cnd))))
+
 (define (expand-if e)
-  (let* ((test (cadr e))
-         (blk? (and (pair? test) (eq? (car test) 'block)))
-         (stmts (if blk? (cdr (butlast test)) '()))
-         (test  (if blk? (last test) test)))
-    (if (and (pair? test) (eq? (car test) '&&))
-        (let ((clauses `(&& ,@(map expand-forms (cdr (flatten-ex '&& test))))))
-          `(if ,(if blk?
-                    `(block ,@(map expand-forms stmts) ,clauses)
-                    clauses)
-               ,@(map expand-forms (cddr e))))
-        (cons (car e) (map expand-forms (cdr e))))))
+  (list* (car e) (expand-condition (cadr e)) (map expand-forms (cddr e))))
+
+(define (expand-while e)
+  `(break-block loop-exit
+                (_while ,(expand-condition (cadr e))
+                        (break-block loop-cont
+                                     (scope-block ,(blockify (expand-forms (caddr e))))))))
+
+(define (expand-vcat e
+                     (vcat '((top vcat)))
+                     (hvcat '((top hvcat)))
+                     (hvcat_rows '((top hvcat_rows))))
+  (let ((a (cdr e)))
+    (if (any assignment? a)
+        (error (string "misplaced assignment statement in \"" (deparse e) "\"")))
+    (if (has-parameters? a)
+        (error "unexpected semicolon in array expression")
+        (expand-forms
+         (if (any (lambda (x)
+                    (and (pair? x) (eq? (car x) 'row)))
+                  a)
+             ;; convert nested hcat inside vcat to hvcat
+             (let ((rows (map (lambda (x)
+                                (if (and (pair? x) (eq? (car x) 'row))
+                                    (cdr x)
+                                    (list x)))
+                              a)))
+               ;; in case there is splatting inside `hvcat`, collect each row as a
+               ;; separate tuple and pass those to `hvcat_rows` instead (ref #38844)
+               (if (any (lambda (row) (any vararg? row)) rows)
+                   `(call ,@hvcat_rows ,@(map (lambda (x) `(tuple ,@x)) rows))
+                   `(call ,@hvcat
+                          (tuple ,@(map length rows))
+                          ,@(apply append rows))))
+             `(call ,@vcat ,@a))))))
+
+(define (expand-ncat e (hvncat '((top hvncat))))
+  (define (is-row a) (and (pair? a)
+                          (or (eq? (car a) 'row)
+                              (eq? (car a) 'nrow))))
+  (define (is-1d a) (not (any is-row a)))
+  (define (sum xs) (foldl + 0 xs))
+  (define (get-shape a is-row-first d)
+    (define (zip xss) (apply map list xss))
+    (define (get-next x)
+      (cond ((or (not (is-row x))
+                 (and (eq? (car x) 'nrow) (> d (1+ (cadr x))))
+                 (and (eq? (car x) 'row) (> d 1)))
+             (list x))
+            ((eq? (car x) 'nrow) (cddr x))
+            (else (cdr x))))
+    ; describe the shape of the concatenation
+    (cond ((or (= d 0)
+               (and (not is-row-first) (= d 1)))
+           (length a))
+          ((and is-row-first (= d 3))
+           (get-shape a is-row-first (1- d)))
+          (else
+           (let ((ashape
+                 (map (lambda (x)
+                        (get-shape (get-next x) is-row-first (1- d)))
+                      a)))
+             (if (pair? (car ashape))
+                 (let ((zipashape (zip ashape)))
+                   (cons (sum (car zipashape))
+                         (cons (car zipashape)
+                               (map (lambda (x)
+                                      (apply append x))
+                                    (cdr zipashape)))))
+                 (list (sum ashape) ashape))))))
+  (define (get-dims a is-row-first d)
+    (cond ((and (< d 2) (not (is-row (car a))))
+           (list (length a)))
+          ((= d 1)
+           (list (car (get-dims (cdar a) is-row-first 0)) (length a)))
+          ((and (= d 3) is-row-first)
+           (get-dims a is-row-first 2))
+          (else
+           (let ((anext (if (and (pair? (car a))
+                                 (eq? (caar a) 'nrow)
+                                 (= d (1+ (cadar a))))
+                            (cddar a)
+                            (list (car a)))))
+             (cons (length a) (get-dims anext is-row-first (1- d)))))))
+  (define (is-balanced s)
+    ; determine whether there are exactly the same number of elements along each axis
+    (= 0 (sum (map (lambda (x y)
+                     (sum (map (lambda (z)
+                                 (- z y))
+                               x)))
+                   (cdr s) (map car (cdr s))))))
+  (define (hasrows-flatten a)
+    ; (car <result>) stores if a row was observed
+    (foldl (lambda (x y)
+             (let ((r (car y))
+                   (yt (cdr y)))
+               (if (is-row x)
+                   (if (eq? (car x) 'nrow)
+                       (let* ((raflat (append (hasrows-flatten (cddr x))))
+                              (aflat (cdr raflat))
+                              (rinner (car raflat))
+                              (r (if (null? (or r rinner))
+                                     (and r rinner)
+                                     r)))
+                         (if (and (not (null? r))
+                                  (or (null? rinner) (and (not r) rinner))
+                                  (and (= (cadr x) 2) r))
+                             (error "cannot mix space and ;; separators in an array expression, except to wrap a line"))
+                         (cons (if (and (= (cadr x) 2) (null? r))
+                                   #f
+                                   r)
+                               (append aflat yt)))
+                     (if (or (null? r) r)
+                         (cons #t (append (reverse (cdr x)) yt))
+                         (error "cannot mix space and ;; separators in an array expression, except to wrap a line")))
+                 (cons r (cons x yt)))))
+           (list '()) a))
+  (define (tf a) (if a '(true) '(false)))
+  (define (tuplize s)
+    (cons 'tuple (reverse (map (lambda (x)
+                                 (cons 'tuple x))
+                               (cons (list (car s)) (cdr s))))))
+  (let* ((d (cadr e))
+         (a (cddr e))
+         (raflat (hasrows-flatten a))
+         (r (car raflat))
+         (is-row-first (if (null? r) #f r))
+         (aflat (reverse (cdr raflat))))
+    (if (any assignment? aflat)
+        (error (string "misplaced assignment statement in \"" (deparse e) "\"")))
+    (if (has-parameters? aflat)
+        (error "unexpected parameters in array expression"))
+    (expand-forms
+      (if (is-1d a)
+          `(call ,@hvncat ,d ,@aflat)
+          (if (any vararg? aflat)
+              (error (string "Splatting ... in an hvncat with multiple dimensions is not supported"))
+              (let ((shape (get-shape a is-row-first d)))
+                (if (is-balanced shape)
+                    (let ((dims `(tuple ,@(reverse (get-dims a is-row-first d)))))
+                     `(call ,@hvncat ,dims ,(tf is-row-first) ,@aflat))
+                    `(call ,@hvncat ,(tuplize shape) ,(tf is-row-first) ,@aflat))))))))
+
+(define (maybe-ssavalue lhss x in-lhs?)
+  (cond ((or (and (not (in-lhs? x lhss)) (symbol? x))
+             (ssavalue? x))
+          x)
+        ((and (pair? lhss) (vararg? (last lhss))
+              (eventually-call? (cadr (last lhss))))
+         (gensy))
+        (else (make-ssavalue))))
+
+(define (expand-property-destruct lhs x)
+  (if (not (length= lhs 1))
+      (error (string "invalid assignment location \"" (deparse `(tuple ,lhs)) "\"")))
+  (let* ((lhss (cdar lhs))
+         (xx   (maybe-ssavalue lhss x memq))
+         (ini  (if (eq? x xx) '() (list (sink-assignment xx (expand-forms x))))))
+    `(block
+       ,@ini
+       ,@(map
+           (lambda (field)
+             (let ((prop (cond ((symbol? field) field)
+                               ((and (pair? field) (eq? (car field) '|::|) (symbol? (cadr field)))
+                                (cadr field))
+                               (else
+                                (error (string "invalid assignment location \"" (deparse `(tuple ,lhs)) "\""))))))
+               (expand-forms `(= ,field (call (top getproperty) ,xx (quote ,prop))))))
+           lhss)
+       (unnecessary ,xx))))
+
+;; implement tuple destructuring, possibly with slurping
+;;
+;; `i`:    index of the current lhs arg
+;; `lhss`: remaining lhs args
+;; `xx`:   the rhs, already either an ssavalue or something simple
+;; `st`:   empty list if i=1, otherwise contains the iteration state
+;; `n`:    total nr of lhs args
+;; `end`:  car collects statements to be executed afterwards.
+;;         In general, actual assignments should only happen after
+;;         the whole iterater is desctructured (https://github.com/JuliaLang/julia/issues/40574)
+(define (destructure- i lhss xx n st end)
+  (if (null? lhss)
+      '()
+      (let* ((lhs  (car lhss))
+             (lhs- (cond ((or (symbol? lhs) (ssavalue? lhs))
+                          lhs)
+                         ((vararg? lhs)
+                          (let ((lhs- (cadr lhs)))
+                            (if (or (symbol? lhs-) (ssavalue? lhs-))
+                                lhs
+                                `(|...| ,(if (eventually-call? lhs-)
+                                             (gensy)
+                                             (make-ssavalue))))))
+                         ;; can't use ssavalues if it's a function definition
+                         ((eventually-call? lhs) (gensy))
+                         (else (make-ssavalue)))))
+        (if (and (vararg? lhs) (any vararg? (cdr lhss)))
+            (error "multiple \"...\" on lhs of assignment"))
+        (if (not (eq? lhs lhs-))
+            (if (vararg? lhs)
+                (set-car! end (cons (expand-forms `(= ,(cadr lhs) ,(cadr lhs-))) (car end)))
+                (set-car! end (cons (expand-forms `(= ,lhs ,lhs-)) (car end)))))
+        (if (vararg? lhs-)
+            (if (= i n)
+                (if (underscore-symbol? (cadr lhs-))
+                    '()
+                    (list (expand-forms
+                            `(= ,(cadr lhs-) (call (top rest) ,xx ,@(if (eq? i 1) '() `(,st)))))))
+                (let ((tail (if (eventually-call? lhs) (gensy) (make-ssavalue))))
+                  (cons (expand-forms
+                          (lower-tuple-assignment
+                            (list (cadr lhs-) tail)
+                            `(call (top split_rest) ,xx ,(- n i) ,@(if (eq? i 1) '() `(,st)))))
+                        (destructure- 1 (cdr lhss) tail (- n i) st end))))
+            (cons (expand-forms
+                    (lower-tuple-assignment
+                      (if (= i n)
+                          (list lhs-)
+                          (list lhs- st))
+                      `(call (top indexed_iterate)
+                             ,xx ,i ,@(if (eq? i 1) '() `(,st)))))
+                  (destructure- (+ i 1) (cdr lhss) xx n st end))))))
+
+(define (expand-tuple-destruct lhss x)
+  (define (sides-match? l r)
+    ;; l and r either have equal lengths, or r has a trailing ...
+    (cond ((null? l)          (null? r))
+          ((vararg? (car l))  #t)
+          ((null? r)          #f)
+          ((vararg? (car r))  (null? (cdr r)))
+          (else               (sides-match? (cdr l) (cdr r)))))
+  (if (and (pair? x) (pair? lhss) (eq? (car x) 'tuple) (not (any assignment? (cdr x)))
+           (not (has-parameters? (cdr x)))
+           (sides-match? lhss (cdr x)))
+      ;; (a, b, ...) = (x, y, ...)
+      (expand-forms
+       (tuple-to-assignments lhss x))
+      ;; (a, b, ...) = other
+      (begin
+        ;; like memq, but if lhs is (... sym), check against sym instead
+        (define (in-lhs? x lhss)
+          (if (null? lhss)
+              #f
+              (let ((l (car lhss)))
+                (cond ((and (pair? l) (eq? (car l) '|...|))
+                       (eq? (cadr l) x))
+                      ((eq? l x) #t)
+                      (else (in-lhs? x (cdr lhss)))))))
+        ;; in-lhs? also checks for invalid syntax, so always call it first
+        (let* ((xx  (maybe-ssavalue lhss x in-lhs?))
+               (ini (if (eq? x xx) '() (list (sink-assignment xx (expand-forms x)))))
+               (n   (length lhss))
+               (st  (gensy))
+               (end (list (list))))
+          `(block
+            ,@(if (> n 0) `((local ,st)) '())
+            ,@ini
+            ,@(destructure- 1 lhss xx n st end)
+            ,@(reverse (car end))
+            (unnecessary ,xx))))))
 
 ;; move an assignment into the last statement of a block to keep more statements at top level
 (define (sink-assignment lhs rhs)
@@ -1948,6 +2374,36 @@
         ,@(if (length= e 3) '(()) '())
         ,@(map expand-forms (cddr e))))
 
+   'opaque_closure
+   (lambda (e)
+     (let* ((ty   (and (length> e 2) (expand-forms (cadr e))))
+            (F    (if (length> e 2) (caddr e) (cadr e)))
+            (isva (let* ((arglist (function-arglist F))
+                         (lastarg (and (pair? arglist) (last arglist))))
+                    (if (and ty (any (lambda (arg)
+                                       (let ((arg (if (vararg? arg) (cadr arg) arg)))
+                                         (not (equal? (arg-type arg) '(core Any)))))
+                                     arglist))
+                        (error "Opaque closure argument type may not be specified both in the method signature and separately"))
+                    (if (or (varargexpr? lastarg) (vararg? lastarg))
+                        '(true) '(false))))
+            (meth (caddr (caddr (expand-forms F)))) ;; `method` expr
+            (lam       (cadddr meth))
+            (sig-block (caddr meth))
+            (sig-block (if (and (pair? sig-block) (eq? (car sig-block) 'block))
+                           sig-block
+                           `(block ,sig-block)))
+            (stmts     (cdr (butlast sig-block)))
+            (sig-svec  (last sig-block))
+            (typ-svec  (caddr sig-svec))
+            (tvars     (cddr (cadddr sig-svec)))
+            (argtypes  (cdddr typ-svec))
+            (functionloc (cadr (caddddr sig-svec))))
+       (let* ((argtype   (foldl (lambda (var ex) `(call (core UnionAll) ,var ,ex))
+                                (expand-forms `(curly (core Tuple) ,@argtypes))
+                                (reverse tvars))))
+         `(_opaque_closure ,(or ty argtype) ,isva ,(length argtypes) ,functionloc ,lam))))
+
    'block
    (lambda (e)
      (cond ((null? (cdr e)) '(null))
@@ -1959,8 +2415,17 @@
                   (map expand-forms (cdr e))))))
 
    '|.|
-   (lambda (e) ; e = (|.| f x)
-     (expand-fuse-broadcast '() e))
+   (lambda (e)
+     (if (length= e 2)
+         ;; e = (|.| op)
+         `(call (top BroadcastFunction) ,(cadr e))
+         ;; e = (|.| f x)
+         (expand-fuse-broadcast '() e)))
+
+   '.&&
+   (lambda (e) (expand-fuse-broadcast '() e))
+   '|.\|\||
+   (lambda (e) (expand-fuse-broadcast '() e))
 
    '.=
    (lambda (e)
@@ -1977,6 +2442,7 @@
    (lambda (e) (expand-forms (expand-wheres (cadr e) (cddr e))))
 
    'const  expand-const-decl
+   'atomic expand-atomic-decl
    'local  expand-local-or-global-decl
    'global expand-local-or-global-decl
    'local-def expand-local-or-global-decl
@@ -2039,44 +2505,16 @@
                 (call (top setproperty!) ,aa ,bb ,rr)
                 (unnecessary ,rr)))))
          ((tuple)
-          ;; multiple assignment
           (let ((lhss (cdr lhs))
                 (x    (caddr e)))
-            (define (sides-match? l r)
-              ;; l and r either have equal lengths, or r has a trailing ...
-              (cond ((null? l)          (null? r))
-                    ((null? r)          #f)
-                    ((vararg? (car r))  (null? (cdr r)))
-                    (else               (sides-match? (cdr l) (cdr r)))))
-            (if (and (pair? x) (pair? lhss) (eq? (car x) 'tuple)
-                     (sides-match? lhss (cdr x)))
-                ;; (a, b, ...) = (x, y, ...)
-                (expand-forms
-                 (tuple-to-assignments lhss x))
-                ;; (a, b, ...) = other
-                (let* ((xx  (if (or (and (symbol? x) (not (memq x lhss)))
-                                    (ssavalue? x))
-                                x (make-ssavalue)))
-                       (ini (if (eq? x xx) '() (list (sink-assignment xx (expand-forms x)))))
-                       (n   (length lhss))
-                       (st  (gensy)))
-                  `(block
-                    (local ,st)
-                    ,@ini
-                    ,.(map (lambda (i lhs)
-                             (expand-forms
-                              (lower-tuple-assignment
-                               (if (= i (- n 1))
-                                   (list lhs)
-                                   (list lhs st))
-                               `(call (top indexed_iterate)
-                                      ,xx ,(+ i 1) ,.(if (eq? i 0) '() `(,st))))))
-                           (iota n)
-                           lhss)
-                    (unnecessary ,xx))))))
+            (if (has-parameters? lhss)
+                ;; property destructuring
+                (expand-property-destruct lhss x)
+                ;; multiple assignment
+                (expand-tuple-destruct lhss x))))
          ((typed_hcat)
           (error "invalid spacing in left side of indexed assignment"))
-         ((typed_vcat)
+         ((typed_vcat typed_ncat)
           (error "unexpected \";\" in left side of indexed assignment"))
          ((ref)
           ;; (= (ref a . idxs) rhs)
@@ -2101,6 +2539,9 @@
                    `(call (top setindex!) ,arr ,r ,@new-idxs))
                  (unnecessary ,r))))))
          ((|::|)
+          ;; (= (|::| T) rhs) is an error
+          (if (null? (cddr lhs))
+              (error (string "invalid assignment location \"" (deparse lhs) "\"")))
           ;; (= (|::| x T) rhs)
           (let ((x (cadr lhs))
                 (T (caddr lhs))
@@ -2110,7 +2551,7 @@
                `(block ,@(cdr e)
                        (decl ,(car e) ,T)
                        (= ,(car e) ,rhs))))))
-         ((vcat)
+         ((vcat ncat)
           ;; (= (vcat . args) rhs)
           (error "use \"(a, b) = ...\" to assign multiple values"))
          (else
@@ -2160,6 +2601,9 @@
          (let ((f (cadr e)))
            (cond ((dotop-named? f)
                   (expand-fuse-broadcast '() `(|.| ,(undotop f) (tuple ,@(cddr e)))))
+                 ;; "(.op)(...)"
+                 ((and (length= f 2) (eq? (car f) '|.|))
+                  (expand-fuse-broadcast '() `(|.| ,(cadr f) (tuple ,@(cddr e)))))
                  ((eq? f 'ccall)
                   (if (not (length> e 4)) (error "too few arguments to ccall"))
                   (let* ((cconv (cadddr e))
@@ -2241,7 +2685,14 @@
    'bracescat (lambda (e) (error "{ } matrix syntax is discontinued"))
 
    'string
-   (lambda (e) (expand-forms `(call (top string) ,@(cdr e))))
+   (lambda (e)
+     (expand-forms
+       `(call (top string)
+              ,@(map (lambda (s)
+                       (if (and (length= s 2) (eq? (car s) 'string) (string? (cadr s)))
+                           (cadr s)
+                           s))
+                     (cdr e)))))
 
    '|::|
    (lambda (e)
@@ -2254,13 +2705,7 @@
 
    'if expand-if
    'elseif expand-if
-
-   'while
-   (lambda (e)
-     `(break-block loop-exit
-                   (_while ,(expand-forms (cadr e))
-                           (break-block loop-cont
-                                        (scope-block ,(blockify (expand-forms (caddr e))))))))
+   'while expand-while
 
    'break
    (lambda (e)
@@ -2334,27 +2779,9 @@
          (error (string "misplaced assignment statement in \"" (deparse e) "\"")))
      (expand-forms `(call (top hcat) ,@(cdr e))))
 
-   'vcat
-   (lambda (e)
-     (let ((a (cdr e)))
-       (if (any assignment? a)
-           (error (string "misplaced assignment statement in \"" (deparse e) "\"")))
-       (if (has-parameters? a)
-           (error "unexpected semicolon in array expression")
-           (expand-forms
-            (if (any (lambda (x)
-                       (and (pair? x) (eq? (car x) 'row)))
-                     a)
-                ;; convert nested hcat inside vcat to hvcat
-                (let ((rows (map (lambda (x)
-                                   (if (and (pair? x) (eq? (car x) 'row))
-                                       (cdr x)
-                                       (list x)))
-                                 a)))
-                  `(call (top hvcat)
-                         (tuple ,.(map length rows))
-                         ,.(apply append rows)))
-                `(call (top vcat) ,@a))))))
+   'vcat expand-vcat
+
+   'ncat expand-ncat
 
    'typed_hcat
    (lambda (e)
@@ -2365,23 +2792,14 @@
    'typed_vcat
    (lambda (e)
      (let ((t (cadr e))
-           (a (cddr e)))
-       (if (any assignment? (cddr e))
-           (error (string "misplaced assignment statement in \"" (deparse e) "\"")))
-       (expand-forms
-        (if (any (lambda (x)
-                   (and (pair? x) (eq? (car x) 'row)))
-                 a)
-            ;; convert nested hcat inside vcat to hvcat
-            (let ((rows (map (lambda (x)
-                               (if (and (pair? x) (eq? (car x) 'row))
-                                   (cdr x)
-                                   (list x)))
-                             a)))
-              `(call (top typed_hvcat) ,t
-                     (tuple ,.(map length rows))
-                     ,.(apply append rows)))
-            `(call (top typed_vcat) ,t ,@a)))))
+           (e (cdr e)))
+       (expand-vcat e `((top typed_vcat) ,t) `((top typed_hvcat) ,t) `((top typed_hvcat_rows) ,t))))
+
+   'typed_ncat
+   (lambda (e)
+     (let ((t (cadr e))
+           (e (cdr e)))
+       (expand-ncat e `((top typed_hvncat) ,t))))
 
    '|'|  (lambda (e) (expand-forms `(call |'| ,(cadr e))))
 
@@ -2450,7 +2868,7 @@
   (check-no-return expr)
   (if (has-break-or-continue? expr)
       (error "break or continue outside loop"))
-  (let ((result    (gensy))
+  (let ((result    (make-ssavalue))
         (idx       (gensy))
         (oneresult (make-ssavalue))
         (prod      (make-ssavalue))
@@ -2474,30 +2892,26 @@
     (let ((overall-itr (if (length= itrs 1) (car iv) prod)))
       `(scope-block
         (block
-         (local ,result) (local ,idx)
+         (local ,idx)
          ,.(map (lambda (v r) `(= ,v ,(caddr r))) iv itrs)
          ,.(if (length= itrs 1)
                '()
                `((= ,prod (call (top product) ,@iv))))
          (= ,isz (call (top IteratorSize) ,overall-itr))
          (= ,szunk (call (core isa) ,isz (top SizeUnknown)))
-         (if ,szunk
-             (= ,result (call (curly (core Array) ,ty 1) (core undef) 0))
-             (= ,result (call (top _array_for) ,ty ,overall-itr ,isz)))
+         (= ,result (call (top _array_for) ,ty ,overall-itr ,isz))
          (= ,idx (call (top first) (call (top LinearIndices) ,result)))
          ,(construct-loops (reverse itrs) (reverse iv))
          ,result)))))
 
 (define (lhs-vars e)
-  (cond ((symbol? e) (list e))
-        ((decl? e)   (list (decl-var e)))
+  (cond ((symdecl? e)   (list (decl-var e)))
         ((and (pair? e) (eq? (car e) 'tuple))
          (apply append (map lhs-vars (cdr e))))
         (else '())))
 
 (define (lhs-decls e)
-  (cond ((symbol? e) (list e))
-        ((decl? e)   (list e))
+  (cond ((symdecl? e)   (list e))
         ((and (pair? e) (eq? (car e) 'tuple))
          (apply append (map lhs-decls (cdr e))))
         (else '())))
@@ -2566,9 +2980,27 @@
   (or (valid-name? e)
       (error (string "invalid identifier name \"" e "\""))))
 
+(define (push-var! tab var val) (put! tab var (cons val (get tab var #f))))
+(define (pop-var! tab var) (put! tab var (cdr (get tab var))))
+
 (define (make-scope (lam #f) (args '()) (locals '()) (globals '()) (sp '()) (renames '()) (prev #f)
                     (soft? #f) (hard? #f) (implicit-globals '()) (warn-vars #f))
-  (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars))
+  (let ((tab (if prev (scope:table prev) (table))))
+    (for-each (lambda (v) (push-var! tab v v)) sp)
+    (for-each (lambda (v) (push-var! tab v v)) locals)
+    (for-each (lambda (pair) (push-var! tab (car pair) (cdr pair))) renames)
+    (for-each (lambda (v) (push-var! tab v `(outerref ,v))) globals)
+    (for-each (lambda (v) (push-var! tab v v)) args)
+    (vector lam args locals globals sp renames prev soft? hard? implicit-globals warn-vars tab)))
+
+(define (pop-scope! scope)
+  (let ((tab (scope:table scope)))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:sp scope))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:locals scope))
+    (for-each (lambda (pair) (pop-var! tab (car pair))) (scope:renames scope))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:globals scope))
+    (for-each (lambda (v) (pop-var! tab v)) (scope:args scope))))
+
 (define (scope:lam s)     (aref s 0))
 (define (scope:args s)    (aref s 1))
 (define (scope:locals s)  (aref s 2))
@@ -2580,6 +3012,7 @@
 (define (scope:hard? s)   (aref s 8))
 (define (scope:implicit-globals s) (aref s 9))
 (define (scope:warn-vars s) (aref s 10))
+(define (scope:table s)   (aref s 11))
 
 (define (var-kind var scope (exclude-top-level-globals #f))
   (if scope
@@ -2617,20 +3050,10 @@
 ;; returns lambdas in the form (lambda (args...) (locals...) body)
 (define (resolve-scopes- e scope (sp '()) (loc #f))
   (cond ((symbol? e)
-         (let lookup ((scope scope))
-           (if scope
-               (cond ((memq e (scope:args scope)) e)
-                     ((memq e (scope:globals scope)) `(outerref ,e))
-                     (else
-                      (let ((r (assq e (scope:renames scope))))
-                        (cond (r (cdr r))
-                              ((memq e (scope:locals scope)) e)
-                              ((memq e (scope:sp scope)) e)
-                              (else
-                               (lookup (scope:prev scope)))))))
-               (if (underscore-symbol? e)
-                   e
-                   `(outerref ,e)))))
+         (let ((val (and scope (get (scope:table scope) e #f))))
+           (cond (val (car val))
+                 ((underscore-symbol? e) e)
+                 (else `(outerref ,e)))))
         ((or (not (pair? e)) (quoted? e) (memq (car e) '(toplevel symbolicgoto symboliclabel toplevel-only)))
          e)
         ((eq? (car e) 'global)
@@ -2668,7 +3091,9 @@
              '(true)))
         ((eq? (car e) 'lambda)
          (let* ((args (lam:argnames e))
-                (body (resolve-scopes- (lam:body e) (make-scope e args '() '() sp '() scope))))
+                (new-scope (make-scope e args '() '() sp '() scope))
+                (body (resolve-scopes- (lam:body e) new-scope)))
+           (pop-scope! new-scope)
            `(lambda ,(cadr e) ,(caddr e) ,body)))
         ((eq? (car e) 'scope-block)
          (let* ((blok            (cadr e)) ;; body of scope-block expression
@@ -2694,8 +3119,7 @@
                 (implicit-globals (if toplevel? nonloc-assigned '()))
                 (implicit-locals
                  (filter (if toplevel?
-                             ;; make only assigned gensyms implicitly local at top level
-                             some-gensym?
+                             (lambda (v) #f)  ;; no implicit locals at top level
                              (lambda (v) (and (memq (var-kind v scope #t) '(none static-parameter))
                                               (not (and soft?
                                                         (or (memq v (scope:implicit-globals scope))
@@ -2750,8 +3174,7 @@
                          (append (caddr lam) newnames newnames-def)))
            (insert-after-meta ;; return the new, expanded scope-block
             (blockify
-             (resolve-scopes- blok
-                              (make-scope lam
+             (let ((new-scope (make-scope lam
                                           '()
                                           (append locals-nondef locals-def)
                                           globals
@@ -2764,9 +3187,10 @@
                                           (if toplevel?
                                               implicit-globals
                                               (scope:implicit-globals scope))
-                                          warn-vars)
-                              '()
-                              loc))
+                                          warn-vars)))
+               (begin0
+                (resolve-scopes- blok new-scope '() loc)
+                (pop-scope! new-scope))))
             (append! (map (lambda (v) `(local ,v)) newnames)
                      (map (lambda (v) `(local-def ,v)) newnames-def)))
            ))
@@ -2836,14 +3260,20 @@
 (define (free-vars e)
   (table.keys (free-vars- e (table))))
 
-(define (analyze-vars-lambda e env captvars sp new-sp (methsig #f))
+(define (vinfo-to-table vi)
+  (let ((tab (table)))
+    (for-each (lambda (v) (put! tab (car v) v))
+              vi)
+    tab))
+
+(define (analyze-vars-lambda e env captvars sp new-sp methsig tab)
   (let* ((args (lam:args e))
          (locl (caddr e))
          (allv (nconc (map arg-name args) locl))
          (fv   (let* ((fv (diff (free-vars (lam:body e)) allv))
                       ;; add variables referenced in declared types for free vars
                       (dv (apply nconc (map (lambda (v)
-                                              (let ((vi (var-info-for v env)))
+                                              (let ((vi (get tab v #f)))
                                                 (if vi (free-vars (vinfo:type vi)) '())))
                                             fv))))
                  (append (diff dv fv) fv)))
@@ -2863,15 +3293,17 @@
                                                  (not (memq (vinfo:name v) new-sp))
                                                  (not (memq (vinfo:name v) glo))))
                                 env)
-                        (map make-var-info capt-sp))))
-    (analyze-vars (lam:body e)
-                  (append vi
+                        (map make-var-info capt-sp)))
+         (new-env (append vi
                           ;; new environment: add our vars
                           (filter (lambda (v)
                                     (and (not (memq (vinfo:name v) allv))
                                          (not (memq (vinfo:name v) glo))))
-                                  env))
-                  cv (delete-duplicates (append new-sp sp)))
+                                  env))))
+    (analyze-vars (lam:body e)
+                  new-env
+                  cv (delete-duplicates (append new-sp sp))
+                  (vinfo-to-table new-env))
     ;; mark all the vars we capture as captured
     (for-each (lambda (v) (vinfo:set-capt! v #t))
               cv)
@@ -2886,36 +3318,36 @@
 ;; in-place to
 ;;   (var-info-lst captured-var-infos ssavalues static_params)
 ;; where var-info-lst is a list of var-info records
-(define (analyze-vars e env captvars sp)
+(define (analyze-vars e env captvars sp tab)
   (if (or (atom? e) (quoted? e))
       (begin
         (if (symbol? e)
-            (let ((vi (var-info-for e env)))
+            (let ((vi (get tab e #f)))
               (if vi
                   (vinfo:set-read! vi #t))))
         e)
       (case (car e)
         ((local-def) ;; a local that we know has an assignment that dominates all usages
-         (let ((vi (var-info-for (cadr e) env)))
+         (let ((vi (get tab (cadr e) #f)))
               (vinfo:set-never-undef! vi #t)))
         ((=)
-         (let ((vi (and (symbol? (cadr e)) (var-info-for (cadr e) env))))
+         (let ((vi (and (symbol? (cadr e)) (get tab (cadr e) #f))))
            (if vi ; if local or captured
                (begin (if (vinfo:asgn vi)
                           (vinfo:set-sa! vi #f)
                           (vinfo:set-sa! vi #t))
                       (vinfo:set-asgn! vi #t))))
-         (analyze-vars (caddr e) env captvars sp))
+         (analyze-vars (caddr e) env captvars sp tab))
         ((call)
-         (let ((vi (var-info-for (cadr e) env)))
+         (let ((vi (get tab (cadr e) #f)))
            (if vi
                (vinfo:set-called! vi #t))
-           (for-each (lambda (x) (analyze-vars x env captvars sp))
+           (for-each (lambda (x) (analyze-vars x env captvars sp tab))
                      (cdr e))))
         ((decl)
          ;; handle var::T declaration by storing the type in the var-info
          ;; record. for non-symbols or globals, emit a type assertion.
-         (let ((vi (var-info-for (cadr e) env)))
+         (let ((vi (get tab (cadr e) #f)))
            (if vi
                (begin (if (not (equal? (vinfo:type vi) '(core Any)))
                           (error (string "multiple type declarations for \""
@@ -2925,31 +3357,31 @@
                                          "\" declared in inner scope")))
                       (vinfo:set-type! vi (caddr e))))))
         ((lambda)
-         (analyze-vars-lambda e env captvars sp '()))
+         (analyze-vars-lambda e env captvars sp '() #f tab))
         ((with-static-parameters)
          ;; (with-static-parameters func_expr sp_1 sp_2 ...)
          (assert (eq? (car (cadr e)) 'lambda))
          (analyze-vars-lambda (cadr e) env captvars sp
-                              (cddr e)))
+                              (cddr e) #f tab))
         ((method)
          (if (length= e 2)
-             (let ((vi (var-info-for (method-expr-name e) env)))
+             (let ((vi (get tab (method-expr-name e) #f)))
                (if vi
                    (begin (if (vinfo:asgn vi)
                               (vinfo:set-sa! vi #f)
                               (vinfo:set-sa! vi #t))
                           (vinfo:set-asgn! vi #t)))
                e)
-             (begin (analyze-vars (caddr e) env captvars sp)
+             (begin (analyze-vars (caddr e) env captvars sp tab)
                     (assert (eq? (car (cadddr e)) 'lambda))
                     (analyze-vars-lambda (cadddr e) env captvars sp
                                          (method-expr-static-parameters e)
-                                         (caddr e)))))
+                                         (caddr e) tab))))
         ((module toplevel) e)
-        (else (for-each (lambda (x) (analyze-vars x env captvars sp))
+        (else (for-each (lambda (x) (analyze-vars x env captvars sp tab))
                         (cdr e))))))
 
-(define (analyze-variables! e) (analyze-vars e '() '() '()) e)
+(define (analyze-variables! e) (analyze-vars e '() '() '() (table)) e)
 
 ;; pass 4: closure conversion
 
@@ -2975,10 +3407,11 @@ f(x) = yt(x)
                 ,@(map (lambda (p n) `(= ,p (call (core TypeVar) ',n (core Any)))) P names)
                 (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec) ,@P)
                             (call (core svec) ,@(map quotify fields))
+                            (call (core svec))
                             (false) ,(length fields)))
+                (call (core _setsuper!) ,s ,super)
                 (= (outerref ,name) ,s)
-                (call (core _setsuper!) ,name ,super)
-                (call (core _typebody!) ,name (call (core svec) ,@types))
+                (call (core _typebody!) ,s (call (core svec) ,@types))
                 (return (null))))))))
 
 (define (type-for-closure name fields super)
@@ -2988,10 +3421,11 @@ f(x) = yt(x)
                (block (global ,name) (const ,name)
                       (= ,s (call (core _structtype) (thismodule) (inert ,name) (call (core svec))
                                   (call (core svec) ,@(map quotify fields))
+                                  (call (core svec))
                                   (false) ,(length fields)))
+                      (call (core _setsuper!) ,s ,super)
                       (= (outerref ,name) ,s)
-                      (call (core _setsuper!) ,name ,super)
-                      (call (core _typebody!) ,name
+                      (call (core _typebody!) ,s
                             (call (core svec) ,@(map (lambda (v) '(core Box)) fields)))
                       (return (null))))))))
 
@@ -3026,9 +3460,9 @@ f(x) = yt(x)
 (define (clear-capture-bits vinfos)
   (map vinfo:not-capt vinfos))
 
-(define (convert-lambda lam fname interp capt-sp)
+(define (convert-lambda lam fname interp capt-sp opaq)
   (let ((body (add-box-inits-to-body
-                lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp))))
+               lam (cl-convert (cadddr lam) fname lam (table) (table) #f interp opaq))))
     `(lambda ,(lam:args lam)
        (,(clear-capture-bits (car (lam:vinfo lam)))
         ()
@@ -3068,12 +3502,34 @@ f(x) = yt(x)
             `(block (= ,temp ,(renumber-assigned-ssavalues t)) ,ex)
             ex))))
 
+(define (capt-var-access var fname opaq)
+  (if opaq
+      `(call (core getfield) ,fname ,(get opaq var))
+      `(call (core getfield) ,fname (inert ,var))))
+
+(define (convert-global-assignment var rhs0 globals)
+  (let* ((rhs1 (if (or (simple-atom? rhs0)
+                       (equal? rhs0 '(the_exception)))
+                   rhs0
+                   (make-ssavalue)))
+         (ref   (binding-to-globalref var))
+         (ty   `(call (core get_binding_type) ,(cadr ref) (inert ,(caddr ref))))
+         (rhs  (if (get globals ref #t) ;; no type declaration for constants
+                   (convert-for-type-decl rhs1 ty)
+                   rhs1))
+         (ex   `(= ,var ,rhs)))
+    (if (eq? rhs1 rhs0)
+        `(block ,ex ,rhs0)
+        `(block (= ,rhs1 ,rhs0)
+                ,ex
+                ,rhs1))))
+
 ;; convert assignment to a closed variable to a setfield! call.
 ;; while we're at it, generate `convert` calls for variables with
 ;; declared types.
 ;; when doing this, the original value needs to be preserved, to
 ;; ensure the expression `a=b` always returns exactly `b`.
-(define (convert-assignment var rhs0 fname lam interp)
+(define (convert-assignment var rhs0 fname lam interp opaq globals)
   (cond
     ((symbol? var)
      (let* ((vi (assq var (car  (lam:vinfo lam))))
@@ -3084,18 +3540,20 @@ f(x) = yt(x)
             (closed (and cv (vinfo:asgn cv) (vinfo:capt cv)))
             (capt   (and vi (vinfo:asgn vi) (vinfo:capt vi))))
        (if (and (not closed) (not capt) (equal? vt '(core Any)))
-           `(= ,var ,rhs0)
+           (if (or (local-in? var lam) (underscore-symbol? var))
+               `(= ,var ,rhs0)
+               (convert-global-assignment var rhs0 globals))
            (let* ((rhs1 (if (or (simple-atom? rhs0)
                                 (equal? rhs0 '(the_exception)))
                             rhs0
                             (make-ssavalue)))
                   (rhs  (if (equal? vt '(core Any))
                             rhs1
-                            (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp))))
+                            (convert-for-type-decl rhs1 (cl-convert vt fname lam #f #f #f interp opaq))))
                   (ex (cond (closed `(call (core setfield!)
                                            ,(if interp
                                                 `($ ,var)
-                                                `(call (core getfield) ,fname (inert ,var)))
+                                                (capt-var-access var fname opaq))
                                            (inert contents)
                                            ,rhs))
                             (capt `(call (core setfield!) ,var (inert contents) ,rhs))
@@ -3105,10 +3563,8 @@ f(x) = yt(x)
                  `(block (= ,rhs1 ,rhs0)
                          ,ex
                          ,rhs1))))))
-     ((and (pair? var) (or (eq? (car var) 'outerref)
-                           (eq? (car var) 'globalref)))
-
-      `(= ,var ,rhs0))
+     ((or (outerref? var) (globalref? var))
+      (convert-global-assignment var rhs0 globals))
      ((ssavalue? var)
       `(= ,var ,rhs0))
      (else
@@ -3205,12 +3661,12 @@ f(x) = yt(x)
                    (lambda (x) (and (pair? x) (not (eq? (car x) 'lambda)))))))
 
 (define lambda-opt-ignored-exprs
-  (Set '(quote top core line inert local local-def unnecessary copyast
+  (Set '(quote top core line inert local-def unnecessary copyast
          meta inbounds boundscheck loopinfo decl aliasscope popaliasscope
          thunk with-static-parameters toplevel-only
          global globalref outerref const-if-global thismodule
-         const null true false ssavalue isdefined toplevel module lambda error
-         gc_preserve_begin gc_preserve_end import using export)))
+         const atomic null true false ssavalue isdefined toplevel module lambda
+         error gc_preserve_begin gc_preserve_end import using export inline noinline)))
 
 (define (local-in? s lam)
   (or (assq s (car  (lam:vinfo lam)))
@@ -3233,6 +3689,7 @@ f(x) = yt(x)
   ;; are never used undef.
   (let ((vi     (car (lam:vinfo lam)))
         (args   (lam:argnames lam))
+        (decl   (table))
         (unused (table))  ;; variables not (yet) used (read from) in the current block
         (live   (table))  ;; variables that have been set in the current block
         (seen   (table))) ;; all variables we've seen assignments to
@@ -3267,6 +3724,17 @@ f(x) = yt(x)
           (begin (put! live var #t)
                  (put! seen var #t)
                  (del! unused var))))
+    (define (declare! var)
+      (if (has? unused var)
+          (put! decl var #t)))
+    (define (leave-loop! old-decls)
+      ;; at the end of a loop, remove live variables that were declared outside,
+      ;; since those might be assigned multiple times (issue #37690)
+      (for-each (lambda (k)
+                  (if (has? old-decls k)
+                      (del! live k)))
+                (table.keys live))
+      (set! decl old-decls))
     (define (visit e)
       ;; returns whether e contained a symboliclabel
       (cond ((atom? e) (if (symbol? e) (mark-used e))
@@ -3275,7 +3743,7 @@ f(x) = yt(x)
              #f)
             ((eq? (car e) 'scope-block)
              (visit (cadr e)))
-            ((memq (car e) '(block call new splatnew _do_while))
+            ((memq (car e) '(block call new splatnew new_opaque_closure))
              (eager-any visit (cdr e)))
             ((eq? (car e) 'break-block)
              (visit (caddr e)))
@@ -3288,7 +3756,7 @@ f(x) = yt(x)
             ((eq? (car e) 'symboliclabel)
              (kill)
              #t)
-            ((memq (car e) '(if elseif _while trycatch tryfinally))
+            ((memq (car e) '(if elseif trycatch tryfinally trycatchelse))
              (let ((prev (table.clone live)))
                (if (eager-any (lambda (e) (begin0 (visit e)
                                                   (kill)))
@@ -3297,9 +3765,22 @@ f(x) = yt(x)
                    ;; variable initialization
                    (begin (kill) #t)
                    (begin (restore prev) #f))))
+            ((or (eq? (car e) '_while) (eq? (car e) '_do_while))
+             (let ((prev  (table.clone live))
+                   (decl- (table.clone decl)))
+               (let ((result (eager-any visit (cdr e))))
+                 (if (eq? (car e) '_while)
+                     (kill))  ;; body might not have run
+                 (leave-loop! decl-)
+                 (if result
+                     #t
+                     (begin (restore prev) #f)))))
             ((eq? (car e) '=)
              (begin0 (visit (caddr e))
                      (assign! (cadr e))))
+            ((eq? (car e) 'local)
+             (declare! (cadr e))
+             #f)
             ((eq? (car e) 'method)
              (if (length> e 2)
                  (let* ((mn          (method-expr-name e))
@@ -3341,25 +3822,32 @@ f(x) = yt(x)
         (and cv (vinfo:asgn cv) (vinfo:capt cv)))))
 
 (define (toplevel-preserving? e)
-  (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally))))
+  (and (pair? e) (memq (car e) '(if elseif block trycatch tryfinally trycatchelse))))
 
-(define (map-cl-convert exprs fname lam namemap defined toplevel interp)
+(define (map-cl-convert exprs fname lam namemap defined toplevel interp opaq (globals (table)))
   (if toplevel
       (map (lambda (x)
              (let ((tl (lift-toplevel (cl-convert x fname lam namemap defined
                                                   (and toplevel (toplevel-preserving? x))
-                                                  interp))))
+                                                  interp opaq globals))))
                (if (null? (cdr tl))
                    (car tl)
                    `(block ,@(cdr tl) ,(car tl)))))
            exprs)
-      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp)) exprs)))
+      (map (lambda (x) (cl-convert x fname lam namemap defined #f interp opaq globals)) exprs)))
+
+(define (prepare-lambda! lam)
+  ;; mark all non-arguments as assigned, since locals that are never assigned
+  ;; need to be handled the same as those that are (i.e., boxed).
+  (for-each (lambda (vi) (vinfo:set-asgn! vi #t))
+            (list-tail (car (lam:vinfo lam)) (length (lam:args lam))))
+  (lambda-optimize-vars! lam))
 
-(define (cl-convert e fname lam namemap defined toplevel interp)
+(define (cl-convert e fname lam namemap defined toplevel interp opaq (globals (table)))
   (if (and (not lam)
-           (not (and (pair? e) (memq (car e) '(lambda method macro)))))
+           (not (and (pair? e) (memq (car e) '(lambda method macro opaque_closure)))))
       (if (atom? e) e
-          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp)))
+          (cons (car e) (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals)))
       (cond
        ((symbol? e)
         (define (new-undef-var name)
@@ -3378,7 +3866,7 @@ f(x) = yt(x)
                  (val (if (equal? typ '(core Any))
                           val
                           `(call (core typeassert) ,val
-                                 ,(cl-convert typ fname lam namemap defined toplevel interp)))))
+                                 ,(cl-convert typ fname lam namemap defined toplevel interp opaq globals)))))
             `(block
                ,@(if (eq? box access) '() `((= ,access ,box)))
                ,undefcheck
@@ -3390,7 +3878,7 @@ f(x) = yt(x)
                 (cv
                  (let ((access (if interp
                                    `($ (call (core QuoteNode) ,e))
-                                   `(call (core getfield) ,fname (inert ,e)))))
+                                   (capt-var-access e fname opaq))))
                    (if (and (vinfo:asgn cv) (vinfo:capt cv))
                        (get-box-contents access (vinfo:type cv))
                        access)))
@@ -3410,8 +3898,8 @@ f(x) = yt(x)
            e)
           ((=)
            (let ((var (cadr e))
-                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp)))
-             (convert-assignment var rhs fname lam interp)))
+                 (rhs (cl-convert (caddr e) fname lam namemap defined toplevel interp opaq globals)))
+             (convert-assignment var rhs fname lam interp opaq globals)))
           ((local-def) ;; make new Box for local declaration of defined variable
            (let ((vi (assq (cadr e) (car (lam:vinfo lam)))))
              (if (and vi (vinfo:asgn vi) (vinfo:capt vi))
@@ -3424,7 +3912,10 @@ f(x) = yt(x)
                  (if (vinfo:never-undef vi)
                      '(null)
                      `(newvar ,(cadr e))))))
-          ((const) e)
+          ((const)
+           (put! globals (binding-to-globalref (cadr e)) #f)
+           e)
+          ((atomic) e)
           ((const-if-global)
            (if (local-in? (cadr e) lam)
                '(null)
@@ -3439,7 +3930,7 @@ f(x) = yt(x)
                     (if (and (vinfo:asgn cv) (vinfo:capt cv))
                         (let ((access (if interp
                                           `($ (call (core QuoteNode) ,sym))
-                                          `(call (core getfield) ,fname (inert ,sym)))))
+                                          (capt-var-access sym fname opaq))))
                           `(call (core isdefined) ,access (inert contents)))
                         '(true)))
                    (vi
@@ -3447,6 +3938,24 @@ f(x) = yt(x)
                         `(call (core isdefined) ,sym (inert contents))
                         e))
                    (else e))))
+          ((_opaque_closure)
+           (let* ((isva  (caddr e))
+                  (nargs (cadddr e))
+                  (functionloc (caddddr e))
+                  (lam2  (last e))
+                  (vis   (lam:vinfo lam2))
+                  (cvs   (map car (cadr vis))))
+             (prepare-lambda! lam2)
+             (let ((var-exprs (map (lambda (v)
+                                     (let ((cv (assq v (cadr (lam:vinfo lam)))))
+                                       (if cv
+                                           (capt-var-access v fname opaq)
+                                           v)))
+                                   cvs)))
+               `(new_opaque_closure
+                 ,(cadr e) (call (core apply_type) (core Union)) (core Any)
+                 (opaque_closure_method (null) ,nargs ,isva ,functionloc ,(convert-lambda lam2 (car (lam:args lam2)) #f '() (symbol-to-idx-map cvs)))
+                 ,@var-exprs))))
           ((method)
            (let* ((name  (method-expr-name e))
                   (short (length= e 2))  ;; function f end
@@ -3459,7 +3968,7 @@ f(x) = yt(x)
                   (sp-inits (if (or short (not (eq? (car sig) 'block)))
                                 '()
                                 (map-cl-convert (butlast (cdr sig))
-                                                fname lam namemap defined toplevel interp)))
+                                                fname lam namemap defined toplevel interp opaq globals)))
                   (sig      (and sig (if (eq? (car sig) 'block)
                                          (last sig)
                                          sig))))
@@ -3468,13 +3977,7 @@ f(x) = yt(x)
                             (error (string "cannot add method to function argument " name)))
                         (if (eqv? (string.char (string name) 0) #\@)
                             (error "macro definition not allowed inside a local scope"))))
-             (if lam2
-                 (begin
-                   ;; mark all non-arguments as assigned, since locals that are never assigned
-                   ;; need to be handled the same as those that are (i.e., boxed).
-                   (for-each (lambda (vi) (vinfo:set-asgn! vi #t))
-                             (list-tail (car (lam:vinfo lam2)) (length (lam:args lam2))))
-                   (lambda-optimize-vars! lam2)))
+             (if lam2 (prepare-lambda! lam2))
              (if (not local) ;; not a local function; will not be closure converted to a new type
                  (cond (short (if (has? defined (cadr e))
                                   e
@@ -3492,21 +3995,21 @@ f(x) = yt(x)
                                           ;; anonymous functions with keyword args generate global
                                           ;; functions that refer to the type of a local function
                                           (rename-sig-types sig namemap)
-                                          fname lam namemap defined toplevel interp)
+                                          fname lam namemap defined toplevel interp opaq globals)
                                   ,(let ((body (add-box-inits-to-body
                                                 lam2
-                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp))))
+                                                (cl-convert (cadddr lam2) 'anon lam2 (table) (table) #f interp opaq))))
                                      `(lambda ,(cadr lam2)
                                         (,(clear-capture-bits (car vis))
                                          ,@(cdr vis))
                                         ,body)))))
                        (else
-                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '())))
+                        (let* ((exprs     (lift-toplevel (convert-lambda lam2 '|#anon| #t '() #f)))
                                (top-stmts (cdr exprs))
                                (newlam    (compact-and-renumber (linearize (car exprs)) 'none 0)))
                           `(toplevel-butfirst
                             (block ,@sp-inits
-                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp)
+                                   (method ,name ,(cl-convert sig fname lam namemap defined toplevel interp opaq globals)
                                            ,(julia-bq-macro newlam)))
                             ,@top-stmts))))
 
@@ -3609,19 +4112,19 @@ f(x) = yt(x)
                                (append (map (lambda (gs tvar)
                                               (make-assignment gs `(call (core TypeVar) ',tvar (core Any))))
                                             closure-param-syms closure-param-names)
-                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp)
+                                       `((method #f ,(cl-convert arg-defs fname lam namemap defined toplevel interp opaq globals)
                                                  ,(convert-lambda lam2
                                                                   (if iskw
                                                                       (caddr (lam:args lam2))
                                                                       (car (lam:args lam2)))
-                                                                  #f closure-param-names)))))))
+                                                                  #f closure-param-names #f)))))))
                         (mk-closure  ;; expression to make the closure
                          (let* ((var-exprs (map (lambda (v)
                                                   (let ((cv (assq v (cadr (lam:vinfo lam)))))
                                                     (if cv
                                                         (if interp
                                                             `($ (call (core QuoteNode) ,v))
-                                                            `(call (core getfield) ,fname (inert ,v)))
+                                                            (capt-var-access v fname opaq))
                                                         v)))
                                                 capt-vars))
                                 (P (append
@@ -3648,7 +4151,7 @@ f(x) = yt(x)
                        (begin
                          (put! defined name #t)
                          `(toplevel-butfirst
-                           ,(convert-assignment name mk-closure fname lam interp)
+                           ,(convert-assignment name mk-closure fname lam interp opaq globals)
                            ,@typedef
                            ,@(map (lambda (v) `(moved-local ,v)) moved-vars)
                            ,@sp-inits
@@ -3661,14 +4164,14 @@ f(x) = yt(x)
                                        (table)
                                        (table)
                                        (null? (cadr e)) ;; only toplevel thunks have 0 args
-                                       interp)))
+                                       interp opaq globals)))
              `(lambda ,(cadr e)
                 (,(clear-capture-bits (car (lam:vinfo e)))
                  () ,@(cddr (lam:vinfo e)))
                 (block ,@body))))
           ;; remaining `::` expressions are type assertions
           ((|::|)
-           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp))
+           (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp opaq globals))
           ;; remaining `decl` expressions are only type assertions if the
           ;; argument is global or a non-symbol.
           ((decl)
@@ -3676,17 +4179,24 @@ f(x) = yt(x)
                        (local-in? (cadr e) lam))
                   '(null))
                  (else
-                  (if (or (symbol? (cadr e)) (and (pair? (cadr e)) (eq? (caadr e) 'outerref)))
-                      (error "type declarations on global variables are not yet supported"))
-                  (cl-convert `(call (core typeassert) ,@(cdr e)) fname lam namemap defined toplevel interp))))
+                  (cl-convert
+                    (let ((ref (binding-to-globalref (cadr e))))
+                      (if ref
+                          (begin
+                            (put! globals ref #t)
+                            `(block
+                               (toplevel-only set_binding_type! ,(cadr e))
+                               (call (core set_binding_type!) ,(cadr ref) (inert ,(caddr ref)) ,(caddr e))))
+                          `(call (core typeassert) ,@(cdr e))))
+                    fname lam namemap defined toplevel interp opaq globals))))
           ;; `with-static-parameters` expressions can be removed now; used only by analyze-vars
           ((with-static-parameters)
-           (cl-convert (cadr e) fname lam namemap defined toplevel interp))
+           (cl-convert (cadr e) fname lam namemap defined toplevel interp opaq globals))
           (else
            (cons (car e)
-                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp))))))))
+                 (map-cl-convert (cdr e) fname lam namemap defined toplevel interp opaq globals))))))))
 
-(define (closure-convert e) (cl-convert e #f #f #f #f #f #f))
+(define (closure-convert e) (cl-convert e #f #f (table) (table) #f #f #f))
 
 ;; pass 5: convert to linear IR
 
@@ -3709,7 +4219,7 @@ f(x) = yt(x)
   (or (ssavalue? lhs)
       (valid-ir-argument? e)
       (and (symbol? lhs) (pair? e)
-           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast)))))
+           (memq (car e) '(new splatnew the_exception isdefined call invoke foreigncall cfunction gc_preserve_begin copyast new_opaque_closure)))))
 
 (define (valid-ir-return? e)
   ;; returning lambda directly is needed for @generated
@@ -3786,17 +4296,21 @@ f(x) = yt(x)
                                      (loop (cdr s))))))
             `(pop_exception ,restore-token))))
     (define (emit-return x)
-      (define (actually-return x)
-        (let* ((x   (if rett
-                        (compile (convert-for-type-decl x rett) '() #t #f)
-                        x))
-               (tmp (if ((if (null? catch-token-stack) valid-ir-return? simple-atom?) x)
+      (define (emit- x)
+        (let* ((tmp (if ((if (null? catch-token-stack) valid-ir-return? simple-atom?) x)
                         #f
                         (make-ssavalue))))
-          (if tmp (emit `(= ,tmp ,x)))
+          (if tmp
+              (begin (emit `(= ,tmp ,x)) tmp)
+              x)))
+      (define (actually-return x)
+        (let* ((x (if rett
+                      (compile (convert-for-type-decl (emit- x) rett) '() #t #f)
+                      x))
+               (x (emit- x)))
           (let ((pexc (pop-exc-expr catch-token-stack '())))
             (if pexc (emit pexc)))
-          (emit `(return ,(or tmp x)))))
+          (emit `(return ,x))))
       (if x
           (if (> handler-level 0)
               (let ((tmp (cond ((and (simple-atom? x) (or (not (ssavalue? x)) (not finally-handler))) #f)
@@ -3828,13 +4342,14 @@ f(x) = yt(x)
     (define (check-top-level e)
       (define (head-to-text h)
         (case h
-          ((abstract_type)  "\"abstract type\"")
-          ((primitive_type) "\"primitive type\"")
-          ((struct_type)    "\"struct\"")
-          ((method)         "method definition")
-          (else             (string "\"" h "\""))))
+          ((abstract_type)     "\"abstract type\" expression")
+          ((primitive_type)    "\"primitive type\" expression")
+          ((struct_type)       "\"struct\" expression")
+          ((method)            "method definition")
+          ((set_binding_type!) (string "type declaration for global \"" (deparse (cadr e)) "\""))
+          (else                (string "\"" h "\" expression"))))
       (if (not (null? (cadr lam)))
-          (error (string (head-to-text (car e)) " expression not at top level"))))
+          (error (string (head-to-text (car e)) " not at top level"))))
     ;; evaluate the arguments of a call, creating temporary locations as needed
     (define (compile-args lst break-labels)
       (if (null? lst) '()
@@ -3875,6 +4390,29 @@ f(x) = yt(x)
               (emit `(= ,tmp ,cnd))
               tmp)
             cnd)))
+    (define (emit-cond cnd break-labels endl)
+      (let* ((cnd (if (and (pair? cnd) (eq? (car cnd) 'block))
+                       (begin (if (length> cnd 2) (compile (butlast cnd) break-labels #f #f))
+                              (last cnd))
+                       cnd))
+             (or? (and (pair? cnd) (eq? (car cnd) '|\|\||)))
+             (tests (if or?
+                        (let ((short-circuit `(goto _)))
+                          (for-each
+                            (lambda (clause)
+                              (let ((jmp (emit `(gotoifnot ,(compile-cond clause break-labels) ,endl))))
+                                (emit short-circuit)
+                                (set-car! (cddr jmp) (make&mark-label))))
+                            (butlast (cdr cnd)))
+                          (let ((last-jmp (emit `(gotoifnot ,(compile-cond (last (cdr cnd)) break-labels) ,endl))))
+                            (set-car! (cdr short-circuit) (make&mark-label))
+                            (list last-jmp)))
+                        (map (lambda (clause)
+                               (emit `(gotoifnot ,(compile-cond clause break-labels) ,endl)))
+                             (if (and (pair? cnd) (eq? (car cnd) '&&))
+                                 (cdr cnd)
+                                 (list cnd))))))
+          tests))
     (define (emit-assignment lhs rhs)
       (if rhs
           (if (valid-ir-rvalue? lhs rhs)
@@ -3907,14 +4445,15 @@ f(x) = yt(x)
                   ((and (pair? e1) (eq? (car e1) 'globalref)) (emit e1) #f) ;; keep globals for undefined-var checking
                   (else #f)))
           (case (car e)
-            ((call new splatnew foreigncall cfunction)
+            ((call new splatnew foreigncall cfunction new_opaque_closure)
+             (define (atom-or-not-tuple-call? fptr)
+               (or (atom? fptr)
+                   (not (tuple-call? fptr))))
              (let* ((args
                      (cond ((eq? (car e) 'foreigncall)
                             ;; NOTE: 2nd to 5th arguments of ccall must be left in place
                             ;;       the 1st should be compiled if an atom.
-                            (append (if (let ((fptr (cadr e)))
-                                          (or (atom? fptr)
-                                              (not (tuple-call? fptr))))
+                            (append (if (atom-or-not-tuple-call? (cadr e))
                                         (compile-args (list (cadr e)) break-labels)
                                         (list (cadr e)))
                                     (list-head (cddr e) 4)
@@ -3924,12 +4463,24 @@ f(x) = yt(x)
                            ((eq? (car e) 'cfunction)
                             (let ((fptr (car (compile-args (list (caddr e)) break-labels))))
                               (cons (cadr e) (cons fptr (cdddr e)))))
-                           ;; TODO: evaluate first argument to cglobal some other way
+                           ;; Leave a literal lambda in place for later global expansion
+                           ((eq? (car e) 'new_opaque_closure)
+                            (let* ((oc_method (car (list-tail (cdr e) 3))) ;; opaque_closure_method
+                                   (lambda (list-ref oc_method 5))
+                                   (lambda (linearize lambda)))
+                              (append
+                               (compile-args (list-head (cdr e) 3) break-labels)
+                               (list (append (butlast oc_method) (list lambda)))
+                               (compile-args (list-tail (cdr e) 4) break-labels))))
+                           ;; NOTE: 1st argument to cglobal treated same as for ccall
                            ((and (length> e 2)
                                  (or (eq? (cadr e) 'cglobal)
                                      (equal? (cadr e) '(outerref cglobal))))
-                            (list* (cadr e) (caddr e)
-                                   (compile-args (cdddr e) break-labels)))
+                            (append (list (cadr e))
+                                    (if (atom-or-not-tuple-call? (caddr e))
+                                        (compile-args (list (caddr e)) break-labels)
+                                        (list (caddr e)))
+                                    (compile-args (cdddr e) break-labels)))
                            (else
                             (compile-args (cdr e) break-labels))))
                     (callex (cons (car e) args)))
@@ -3966,32 +4517,36 @@ f(x) = yt(x)
                                      (not (eq? e (lam:body lam))))))
                (if file-diff (set! filename fname))
                (if need-meta (emit `(meta push_loc ,fname)))
-               (begin0
-                (let loop ((xs (cdr e)))
+               (let ((v (let loop ((xs (cdr e)))
                   (if (null? (cdr xs))
                       (compile (car xs) break-labels value tail)
                       (begin (compile (car xs) break-labels #f #f)
-                             (loop (cdr xs)))))
-                (if need-meta
-                    (if (or (not tail)
-                            (and (pair? (car code))
-                                 (or (eq? (cdar code) 'meta)
-                                     (eq? (cdar code) 'line))))
-                        (emit '(meta pop_loc))
-                        ;; If we need to return the last non-meta expression
-                        ;; splice the pop before the result
-                        (let ((retv (car code))
-                              (body (cdr code)))
-                          (set! code body)
-                          (if (complex-return? retv)
-                              (let ((tmp (make-ssavalue)))
-                                (emit `(= ,tmp ,(cadr retv)))
-                                (emit '(meta pop_loc))
-                                (emit `(return ,tmp)))
-                              (begin
-                                (emit '(meta pop_loc))
-                                (emit retv))))))
-                (if file-diff (set! filename last-fname)))))
+                             (loop (cdr xs)))))))
+                  (if need-meta
+                    (cond (tail
+                           ;; If we need to return the last non-meta expression
+                           ;; attempt to splice the pop_loc before the return
+                           ;; so that the return location always gets
+                           ;; attributed to the right level of macro
+                           (if (and (pair? code) (return? (car code)))
+                               (let ((retv (cadr (car code))))
+                                 (set! code (cdr code))
+                                 (if (not (simple-atom? retv))
+                                   (let ((tmp (make-ssavalue)))
+                                     (emit `(= ,tmp ,retv))
+                                     (set! retv tmp)))
+                                 (emit '(meta pop_loc))
+                                 (emit `(return ,retv)))
+                               (emit '(meta pop_loc))))
+                          ((and value (not (simple-atom? v)))
+                           (let ((tmp (make-ssavalue)))
+                             (emit `(= ,tmp ,v))
+                             (set! v tmp)
+                             (emit `(meta pop_loc))))
+                          (else
+                           (emit `(meta pop_loc)))))
+                  (if file-diff (set! filename last-fname))
+                  v)))
             ((return)
              (compile (cadr e) break-labels #t #t)
              #f)
@@ -4002,18 +4557,9 @@ f(x) = yt(x)
                  (compile (cadr e) break-labels value tail)
                  #f))
             ((if elseif)
-             (let ((tests (let* ((cond (cadr e))
-                                 (cond (if (and (pair? cond) (eq? (car cond) 'block))
-                                           (begin (compile (butlast cond) break-labels #f #f)
-                                                  (last cond))
-                                           cond)))
-                            (map (lambda (clause)
-                                   (emit `(gotoifnot ,(compile-cond clause break-labels) _)))
-                                 (if (and (pair? cond) (eq? (car cond) '&&))
-                                     (cdr cond)
-                                     (list cond)))))
-                   (end-jump `(goto _))
-                   (val (if (and value (not tail)) (new-mutable-var) #f)))
+             (let* ((tests (emit-cond (cadr e) break-labels '_))
+                    (end-jump `(goto _))
+                    (val (if (and value (not tail)) (new-mutable-var) #f)))
                (let ((v1 (compile (caddr e) break-labels value tail)))
                  (if val (emit-assignment val v1))
                  (if (and (not tail) (or (length> e 3) val))
@@ -4033,9 +4579,8 @@ f(x) = yt(x)
                    val))))
             ((_while)
              (let* ((endl (make-label))
-                    (topl (make&mark-label))
-                    (test (compile-cond (cadr e) break-labels)))
-               (emit `(gotoifnot ,test ,endl))
+                    (topl (make&mark-label)))
+               (emit-cond (cadr e) break-labels endl)
                (compile (caddr e) break-labels #f #f)
                (emit `(goto ,topl))
                (mark-label endl))
@@ -4089,9 +4634,10 @@ f(x) = yt(x)
             ;; (= tok (enter L)) - push handler with catch block at label L, yielding token
             ;; (leave n) - pop N exception handlers
             ;; (pop_exception tok) - pop exception stack back to state of associated enter
-            ((trycatch tryfinally)
+            ((trycatch tryfinally trycatchelse)
              (let ((handler-token (make-ssavalue))
                    (catch (make-label))
+                   (els   (and (eq? (car e) 'trycatchelse) (make-label)))
                    (endl  (make-label))
                    (last-finally-handler finally-handler)
                    (finally           (if (eq? (car e) 'tryfinally) (new-mutable-var) #f))
@@ -4108,11 +4654,20 @@ f(x) = yt(x)
                  ;; handler block postfix
                  (if (and val v1) (emit-assignment val v1))
                  (if tail
-                     (begin (if v1 (emit-return v1))
+                     (begin (if els
+                                (begin (if (and (not val) v1) (emit v1))
+                                       (emit '(leave 1)))
+                                (if v1 (emit-return v1)))
                             (if (not finally) (set! endl #f)))
                      (begin (emit '(leave 1))
-                            (emit `(goto ,endl))))
+                            (emit `(goto ,(or els endl)))))
                  (set! handler-level (- handler-level 1))
+                 ;; emit else block
+                 (if els
+                     (begin (mark-label els)
+                            (let ((v3 (compile (cadddr e) break-labels value tail))) ;; emit else block code
+                              (if val (emit-assignment val v3)))
+                            (emit `(goto ,endl))))
                  ;; emit either catch or finally block
                  (mark-label catch)
                  (emit `(leave 1))
@@ -4172,6 +4727,7 @@ f(x) = yt(x)
                      (if (not global-const-error)
                          (set! global-const-error current-loc))
                      (emit e))))
+            ((atomic) (error "misplaced atomic declaration"))
             ((isdefined) (if tail (emit-return e) e))
             ((boundscheck) (if tail (emit-return e) e))
 
@@ -4234,7 +4790,7 @@ f(x) = yt(x)
                (cons (car e) args)))
 
             ;; metadata expressions
-            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope)
+            ((line meta inbounds loopinfo gc_preserve_end aliasscope popaliasscope inline noinline)
              (let ((have-ret? (and (pair? code) (pair? (car code)) (eq? (caar code) 'return))))
                (cond ((eq? (car e) 'line)
                       (set! current-loc e)
@@ -4355,7 +4911,13 @@ f(x) = yt(x)
              (list ,@(cadr vi)) ,(caddr vi) (list ,@(cadddr vi)))
        ,@(cdddr lam))))
 
-(define (compact-ir body file line)
+(define (make-lineinfo name file line (inlined-at #f))
+  `(lineinfo (thismodule) ,(if inlined-at '|macro expansion| name) ,file ,line ,(or inlined-at 0)))
+
+(define (set-lineno! lineinfo num)
+  (set-car! (cddddr lineinfo) num))
+
+(define (compact-ir body name file line)
   (let ((code         '(block))
         (locs         '(list))
         (linetable    '(list))
@@ -4370,10 +4932,10 @@ f(x) = yt(x)
     (define (emit e)
       (if (and (null? (cdr linetable))
                (not (and (pair? e) (eq? (car e) 'meta))))
-          (begin (set! linetable (cons `(line ,line ,file) linetable))
+          (begin (set! linetable (cons (make-lineinfo name file line) linetable))
                  (set! current-loc 1)))
       (if (or reachable
-              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope))))
+              (and (pair? e) (memq (car e) '(meta inbounds gc_preserve_begin gc_preserve_end aliasscope popaliasscope inline noinline))))
           (begin (set! code (cons e code))
                  (set! i (+ i 1))
                  (set! locs (cons current-loc locs)))))
@@ -4384,22 +4946,22 @@ f(x) = yt(x)
                   ((eq? (car e) 'line)
                    (if (and (= current-line 0) (length= e 2) (pair? linetable))
                        ;; (line n) after push_loc just updates the line for the new file
-                       (begin (set-car! (cdr (car linetable)) (cadr e))
+                       (begin (set-lineno! (car linetable) (cadr e))
                               (set! current-line (cadr e)))
                        (begin
                          (set! current-line (cadr e))
                          (if (pair? (cddr e))
                              (set! current-file (caddr e)))
                          (set! linetable (cons (if (null? locstack)
-                                                   `(line ,current-line ,current-file)
-                                                   `(line ,current-line ,current-file ,(caar locstack)))
+                                                   (make-lineinfo name current-file current-line)
+                                                   (make-lineinfo name current-file current-line (caar locstack)))
                                                linetable))
                          (set! current-loc (- (length linetable) 1)))))
                   ((and (length> e 2) (eq? (car e) 'meta) (eq? (cadr e) 'push_loc))
                    (set! locstack (cons (list current-loc current-line current-file) locstack))
                    (set! current-file (caddr e))
                    (set! current-line 0)
-                   (set! linetable (cons `(line ,current-line ,current-file ,current-loc) linetable))
+                   (set! linetable (cons (make-lineinfo name current-file current-line current-loc) linetable))
                    (set! current-loc (- (length linetable) 1)))
                   ((and (length= e 2) (eq? (car e) 'meta) (eq? (cadr e) 'pop_loc))
                    (let ((l (car locstack)))
@@ -4425,16 +4987,10 @@ f(x) = yt(x)
             (loop (cdr stmts)))))
     (vector (reverse code) (reverse locs) (reverse linetable) ssavtable labltable)))
 
-(define (symbol-to-idx-map lst)
-  (let ((tbl (table)))
-    (let loop ((xs lst) (i 1))
-      (if (pair? xs)
-          (begin (put! tbl (car xs) i)
-                 (loop (cdr xs) (+ i 1)))))
-    tbl))
-
 (define (renumber-lambda lam file line)
-  (let* ((stuff (compact-ir (lam:body lam) file line))
+  (let* ((stuff (compact-ir (lam:body lam)
+                            (if (null? (cadr lam)) '|top-level scope| 'none)
+                            file line))
          (code (aref stuff 0))
          (locs (aref stuff 1))
          (linetab (aref stuff 2))
diff --git a/src/julia.expmap b/src/julia.expmap
index daf3a01749ed9d..13de1b873f7c3f 100644
--- a/src/julia.expmap
+++ b/src/julia.expmap
@@ -1,20 +1,13 @@
 {
   global:
-    __asan*;
-    __tsan*;
     pthread*;
     __stack_chk_guard;
     asprintf;
     bitvector_*;
-    ev_break;
-    get_exename;
-    getlocalip;
-    int32hash;
-    int64hash;
-    int64to32hash;
     ios_*;
-    iswprint;
+    small_arraylist_grow;
     jl_*;
+    ijl_*;
     rec_backtrace;
     julia_*;
     libsupport_init;
@@ -32,15 +25,24 @@
     add_library_mapping;
     utf8proc_*;
     jlbacktrace;
-    julia_type_to_llvm;
+    jlbacktracet;
     _IO_stdin_used;
-    __ZN4llvm23createLowerSimdLoopPassEv;
+    _Z24jl_coverage_data_pointerN4llvm9StringRefEi;
+    _Z22jl_coverage_alloc_lineN4llvm9StringRefEi;
+    _Z22jl_malloc_data_pointerN4llvm9StringRefEi;
     LLVMExtra*;
+    llvmGetPassPluginInfo;
 
     /* freebsd */
     environ;
     __progname;
 
+    /* compiler run-time intrinsics */
+    __gnu_h2f_ieee;
+    __extendhfsf2;
+    __gnu_f2h_ieee;
+    __truncdfhf2;
+
   local:
     *;
 };
diff --git a/src/julia.h b/src/julia.h
index eacc76eb0e0f2c..3587bfb0370c20 100644
--- a/src/julia.h
+++ b/src/julia.h
@@ -3,12 +3,14 @@
 #ifndef JULIA_H
 #define JULIA_H
 
-//** Configuration options that affect the Julia ABI **//
-// if this is not defined, only individual dimension sizes are
-// stored and not total length, to save space.
-#define STORE_ARRAY_LEN
-//** End Configuration options **//
+#ifdef LIBRARY_EXPORTS
+#include "jl_internal_funcs.inc"
+#undef jl_setjmp
+#undef jl_longjmp
+#undef jl_egal
+#endif
 
+#include "julia_fasttls.h"
 #include "libsupport.h"
 #include <stdint.h>
 #include <string.h>
@@ -37,6 +39,13 @@
 #  define MAX_ALIGN 8
 #endif
 
+// Define the largest size (bytes) of a properly aligned object that the
+// processor family and compiler typically supports without a lock
+// (assumed to be at least a pointer size). Since C is bad at handling 16-byte
+// types, we currently use 8 here as the default.
+#define MAX_ATOMIC_SIZE 8
+#define MAX_POINTERATOMIC_SIZE 8
+
 #ifdef _P64
 #define NWORDS(sz) (((sz)+7)>>3)
 #else
@@ -47,50 +56,18 @@
 #  define JL_NORETURN __attribute__ ((noreturn))
 #  define JL_CONST_FUNC __attribute__((const))
 #  define JL_USED_FUNC __attribute__((used))
-#  define JL_SECTION(name) __attribute__((section(name)))
-#  define JL_THREAD_LOCAL __thread
-#elif defined(_COMPILER_MICROSOFT_)
-#  define JL_NORETURN __declspec(noreturn)
-// This is the closest I can find for __attribute__((const))
-#  define JL_CONST_FUNC __declspec(noalias)
-// Does MSVC have this?
-#  define JL_USED_FUNC
-// TODO: Figure out what to do on MSVC
-#  define JL_SECTION(x)
-#  define JL_THREAD_LOCAL __declspec(threaD)
 #else
 #  define JL_NORETURN
 #  define JL_CONST_FUNC
 #  define JL_USED_FUNC
-#  define JL_THREAD_LOCAL
-#endif
-
-#if defined(__has_feature) // Clang flavor
-#if __has_feature(address_sanitizer)
-#define JL_ASAN_ENABLED
-#endif
-#if __has_feature(memory_sanitizer)
-#define JL_MSAN_ENABLED
 #endif
-#if __has_feature(thread_sanitizer)
-#if __clang_major__ < 11
-#error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
-#endif
-#define JL_TSAN_ENABLED
-#endif
-#else // GCC flavor
-#if defined(__SANITIZE_ADDRESS__)
-#define JL_ASAN_ENABLED
-#endif
-#endif // __has_feature
 
 #define container_of(ptr, type, member) \
     ((type *) ((char *)(ptr) - offsetof(type, member)))
 
 typedef struct _jl_taggedvalue_t jl_taggedvalue_t;
 
-#include "atomics.h"
-#include "tls.h"
+#include "julia_atomics.h"
 #include "julia_threads.h"
 #include "julia_assert.h"
 
@@ -120,7 +97,7 @@ JL_EXTENSION struct _jl_taggedvalue_t {
     // jl_value_t value;
 };
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 JL_DLLEXPORT jl_taggedvalue_t *_jl_astaggedvalue(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 #define jl_astaggedvalue(v) _jl_astaggedvalue((jl_value_t*)(v))
 jl_value_t *_jl_valueof(jl_taggedvalue_t *tv JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
@@ -139,7 +116,7 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 {
     // Do not call this on a value that is already initialized.
     jl_taggedvalue_t *tag = jl_astaggedvalue(v);
-    jl_atomic_store_relaxed(&tag->type, (jl_value_t*)t);
+    jl_atomic_store_relaxed((_Atomic(jl_value_t*)*)&tag->type, (jl_value_t*)t);
 }
 #define jl_typeis(v,t) (jl_typeof(v)==(jl_value_t*)(t))
 
@@ -147,8 +124,8 @@ static inline void jl_set_typeof(void *v, void *t) JL_NOTSAFEPOINT
 // The string data is nul-terminated and hangs off the end of the struct.
 typedef struct _jl_sym_t {
     JL_DATA_TYPE
-    struct _jl_sym_t *left;
-    struct _jl_sym_t *right;
+    _Atomic(struct _jl_sym_t*) left;
+    _Atomic(struct _jl_sym_t*) right;
     uintptr_t hash;    // precomputed hash value
     // JL_ATTRIBUTE_ALIGN_PTRSIZE(char name[]);
 } jl_sym_t;
@@ -189,9 +166,7 @@ typedef struct {
 JL_EXTENSION typedef struct {
     JL_DATA_TYPE
     void *data;
-#ifdef STORE_ARRAY_LEN
     size_t length;
-#endif
     jl_array_flags_t flags;
     uint16_t elsize;  // element size including alignment (dim 1 memory stride)
     uint32_t offset;  // for 1-d only. does not need to get big.
@@ -231,23 +206,19 @@ typedef jl_call_t *jl_callptr_t;
 
 // "speccall" calling convention signatures.
 // This describes some of the special ABI used by compiled julia functions.
-JL_DLLEXPORT extern jl_call_t jl_fptr_args;
+extern jl_call_t jl_fptr_args;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_args_addr;
 typedef jl_value_t *(*jl_fptr_args_t)(jl_value_t*, jl_value_t**, uint32_t);
 
-JL_DLLEXPORT extern jl_call_t jl_fptr_const_return;
+extern jl_call_t jl_fptr_const_return;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_const_return_addr;
 
-JL_DLLEXPORT extern jl_call_t jl_fptr_sparam;
+extern jl_call_t jl_fptr_sparam;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_sparam_addr;
 typedef jl_value_t *(*jl_fptr_sparam_t)(jl_value_t*, jl_value_t**, uint32_t, jl_svec_t*);
 
-JL_DLLEXPORT extern jl_call_t jl_fptr_interpret_call;
-
-JL_EXTENSION typedef union {
-    void* fptr;
-    jl_fptr_args_t fptr1;
-    // 2 constant
-    jl_fptr_sparam_t fptr3;
-    // 4 interpreter
-} jl_generic_specptr_t;
+extern jl_call_t jl_fptr_interpret_call;
+JL_DLLEXPORT extern jl_callptr_t jl_fptr_interpret_call_addr;
 
 typedef struct _jl_method_instance_t jl_method_instance_t;
 
@@ -259,17 +230,35 @@ typedef struct _jl_line_info_node_t {
     intptr_t inlined_at;
 } jl_line_info_node_t;
 
+// the following mirrors `struct EffectsOverride` in `base/compiler/types.jl`
+typedef union __jl_purity_overrides_t {
+    struct {
+        uint8_t ipo_consistent  : 1;
+        uint8_t ipo_effect_free : 1;
+        uint8_t ipo_nothrow     : 1;
+        uint8_t ipo_terminates  : 1;
+        // Weaker form of `terminates` that asserts
+        // that any control flow syntactically in the method
+        // is guaranteed to terminate, but does not make
+        // assertions about any called functions.
+        uint8_t ipo_terminates_locally : 1;
+    } overrides;
+    uint8_t bits;
+} _jl_purity_overrides_t;
+
 // This type describes a single function body
 typedef struct _jl_code_info_t {
     // ssavalue-indexed arrays of properties:
     jl_array_t *code;  // Any array of statements
-    jl_value_t *codelocs; // Int32 array of indicies into the line table
+    jl_value_t *codelocs; // Int32 array of indices into the line table
     jl_value_t *ssavaluetypes; // types of ssa values (or count of them)
     jl_array_t *ssaflags; // flags associated with each statement:
         // 0 = inbounds
-        // 1,2 = <reserved> inlinehint,always-inline,noinline
+        // 1 = inline
+        // 2 = noinline
         // 3 = <reserved> strict-ieee (strictfp)
-        // 4-6 = <unused>
+        // 4 = effect-free (may be deleted if unused)
+        // 5-6 = <unused>
         // 7 = has out-of-band info
     // miscellaneous data:
     jl_value_t *method_for_inference_limit_heuristics; // optional method used during inference
@@ -288,6 +277,9 @@ typedef struct _jl_code_info_t {
     uint8_t inlineable;
     uint8_t propagate_inbounds;
     uint8_t pure;
+    // uint8 settings
+    uint8_t constprop; // 0 = use heuristic; 1 = aggressive; 2 = none
+    _jl_purity_overrides_t purity;
 } jl_code_info_t;
 
 // This type describes a single method definition, and stores data
@@ -305,28 +297,46 @@ typedef struct _jl_method_t {
     jl_value_t *sig;
 
     // table of all jl_method_instance_t specializations we have
-    jl_svec_t *specializations; // allocated as [hashable, ..., NULL, linear, ....]
-    jl_array_t *speckeyset; // index lookup by hash into specializations
+    _Atomic(jl_svec_t*) specializations; // allocated as [hashable, ..., NULL, linear, ....]
+    _Atomic(jl_array_t*) speckeyset; // index lookup by hash into specializations
 
     jl_value_t *slot_syms; // compacted list of slot names (String)
+    jl_value_t *external_mt; // reference to the method table this method is part of, null if part of the internal table
     jl_value_t *source;  // original code template (jl_code_info_t, but may be compressed), null for builtins
-    struct _jl_method_instance_t *unspecialized;  // unspecialized executable method instance, or null
+    _Atomic(struct _jl_method_instance_t*) unspecialized;  // unspecialized executable method instance, or null
     jl_value_t *generator;  // executable code-generating function if available
     jl_array_t *roots;  // pointers in generated code (shared to reduce memory), or null
+    // Identify roots by module-of-origin. We only track the module for roots added during incremental compilation.
+    // May be NULL if no external roots have been added, otherwise it's a Vector{UInt64}
+    jl_array_t *root_blocks;   // RLE (build_id, offset) pairs (even/odd indexing)
+    int32_t nroots_sysimg;     // # of roots stored in the system image
     jl_svec_t *ccallable; // svec(rettype, sig) if a ccallable entry point is requested for this
 
     // cache of specializations of this method for invoke(), i.e.
     // cases where this method was called even though it was not necessarily
     // the most specific for the argument types.
-    jl_typemap_t *invokes;
-
-    int32_t nargs;
-    int32_t called;        // bit flags: whether each of the first 8 arguments is called
-    int32_t nospecialize;  // bit flags: which arguments should not be specialized
-    int32_t nkw;           // # of leading arguments that are actually keyword arguments
-                           // of another method.
+    _Atomic(jl_typemap_t*) invokes;
+
+    // A function that compares two specializations of this method, returning
+    // `true` if the first signature is to be considered "smaller" than the
+    // second for purposes of recursion analysis. Set to NULL to use
+    // the default recusion relation.
+    jl_value_t *recursion_relation;
+
+    uint32_t nargs;
+    uint32_t called;        // bit flags: whether each of the first 8 arguments is called
+    uint32_t nospecialize;  // bit flags: which arguments should not be specialized
+    uint32_t nkw;           // # of leading arguments that are actually keyword arguments
+                            // of another method.
     uint8_t isva;
     uint8_t pure;
+    uint8_t is_for_opaque_closure;
+    // uint8 settings
+    uint8_t constprop;     // 0x00 = use heuristic; 0x01 = aggressive; 0x02 = none
+
+    // Override the conclusions of inter-procedural effect analysis,
+    // forcing the conclusion to always true.
+    _jl_purity_overrides_t purity;
 
 // hidden fields:
     // lock for modifications to the method
@@ -347,15 +357,27 @@ struct _jl_method_instance_t {
     jl_svec_t *sparam_vals; // static parameter values, indexed by def.method->sparam_syms
     jl_value_t *uninferred; // cached uncompressed code, for generated functions, top-level thunks, or the interpreter
     jl_array_t *backedges; // list of method-instances which contain a call into this method-instance
-    struct _jl_code_instance_t *cache;
+    jl_array_t *callbacks; // list of callback functions to inform external caches about invalidations
+    _Atomic(struct _jl_code_instance_t*) cache;
     uint8_t inInference; // flags to tell if inference is running on this object
+    uint8_t precompiled; // true if this instance was generated by an explicit `precompile(...)` call
 };
 
+// OpaqueClosure
+typedef struct jl_opaque_closure_t {
+    JL_DATA_TYPE
+    jl_value_t *captures;
+    size_t world;
+    jl_method_t *source;
+    jl_fptr_args_t invoke;
+    void *specptr;
+} jl_opaque_closure_t;
+
 // This type represents an executable operation
 typedef struct _jl_code_instance_t {
     JL_DATA_TYPE
     jl_method_instance_t *def; // method this is specialized from
-    struct _jl_code_instance_t *next; // pointer to the next cache entry
+    _Atomic(struct _jl_code_instance_t*) next; // pointer to the next cache entry
 
     // world range for which this object is valid to use
     size_t min_world;
@@ -368,11 +390,47 @@ typedef struct _jl_code_instance_t {
     //TODO: jl_array_t *edges; // stored information about edges from this object
     //TODO: uint8_t absolute_max; // whether true max world is unknown
 
+    // purity results
+#ifdef JL_USE_ANON_UNIONS_FOR_PURITY_FLAGS
+    // see also encode_effects() and decode_effects() in `base/compiler/types.jl`,
+    union {
+        uint32_t ipo_purity_bits;
+        struct {
+            uint8_t ipo_consistent:2;
+            uint8_t ipo_effect_free:2;
+            uint8_t ipo_nothrow:2;
+            uint8_t ipo_terminates:2;
+            uint8_t ipo_nonoverlayed:1;
+        } ipo_purity_flags;
+    };
+    union {
+        uint32_t purity_bits;
+        struct {
+            uint8_t consistent:2;
+            uint8_t effect_free:2;
+            uint8_t nothrow:2;
+            uint8_t terminates:2;
+            uint8_t nonoverlayed:1;
+        } purity_flags;
+    };
+#else
+    uint32_t ipo_purity_bits;
+    uint32_t purity_bits;
+#endif
+    jl_value_t *argescapes; // escape information of call arguments
+
     // compilation state cache
     uint8_t isspecsig; // if specptr is a specialized function signature for specTypes->rettype
-    uint8_t precompile;  // if set, this will be added to the output system image
-    jl_callptr_t invoke; // jlcall entry point
-    jl_generic_specptr_t specptr; // private data for `jlcall entry point`
+    _Atomic(uint8_t) precompile;  // if set, this will be added to the output system image
+    _Atomic(jl_callptr_t) invoke; // jlcall entry point
+    union _jl_generic_specptr_t {
+        _Atomic(void*) fptr;
+        _Atomic(jl_fptr_args_t) fptr1;
+        // 2 constant
+        _Atomic(jl_fptr_sparam_t) fptr3;
+        // 4 interpreter
+    } specptr; // private data for `jlcall entry point
+    uint8_t relocatability;  // nonzero if all roots are built into sysimg or tagged by module key
 } jl_code_instance_t;
 
 // all values are callable as Functions
@@ -401,14 +459,23 @@ typedef struct {
     jl_sym_t *name;
     struct _jl_module_t *module;
     jl_svec_t *names;  // field names
+    const uint32_t *atomicfields; // if any fields are atomic, we record them here
+    const uint32_t *constfields; // if any fields are const, we record them here
     // `wrapper` is either the only instantiation of the type (if no parameters)
     // or a UnionAll accepting parameters to make an instantiation.
     jl_value_t *wrapper;
-    jl_svec_t *cache;        // sorted array
-    jl_svec_t *linearcache;  // unsorted array
-    intptr_t hash;
+    _Atomic(jl_value_t*) Typeofwrapper;  // cache for Type{wrapper}
+    _Atomic(jl_svec_t*) cache;        // sorted array
+    _Atomic(jl_svec_t*) linearcache;  // unsorted array
     struct _jl_methtable_t *mt;
     jl_array_t *partial;     // incomplete instantiations of this type
+    intptr_t hash;
+    int32_t n_uninitialized;
+    // type properties
+    uint8_t abstract:1;
+    uint8_t mutabl:1;
+    uint8_t mayinlinealloc:1;
+    uint8_t max_methods; // override for inference's max_methods setting (0 = no additional limit or relaxation)
 } jl_typename_t;
 
 typedef struct {
@@ -442,7 +509,7 @@ typedef struct {
     int32_t first_ptr; // index of the first pointer (or -1)
     uint16_t alignment; // strictest alignment over all fields
     uint16_t haspadding : 1; // has internal undefined bytes
-    uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32
+    uint16_t fielddesc_type : 2; // 0 -> 8, 1 -> 16, 2 -> 32, 3 -> foreign type
     // union {
     //     jl_fielddesc8_t field8[nfields];
     //     jl_fielddesc16_t field16[nfields];
@@ -461,25 +528,26 @@ typedef struct _jl_datatype_t {
     struct _jl_datatype_t *super;
     jl_svec_t *parameters;
     jl_svec_t *types;
-    jl_svec_t *names;
     jl_value_t *instance;  // for singletons
     const jl_datatype_layout_t *layout;
     int32_t size; // TODO: move to _jl_datatype_layout_t
-    int32_t ninitialized;
-    uint32_t hash;
-    uint8_t abstract;
-    uint8_t mutabl;
     // memoized properties
-    uint8_t hasfreetypevars; // majority part of isconcrete computation
-    uint8_t isconcretetype; // whether this type can have instances
-    uint8_t isdispatchtuple; // aka isleaftupletype
-    uint8_t isbitstype; // relevant query for C-api and type-parameters
-    uint8_t zeroinit; // if one or more fields requires zero-initialization
-    uint8_t isinlinealloc; // if this is allocated inline
-    uint8_t has_concrete_subtype; // If clear, no value will have this datatype
-    uint8_t cached_by_hash; // stored in hash-based set cache (instead of linear cache)
+    uint32_t hash;
+    uint8_t hasfreetypevars:1; // majority part of isconcrete computation
+    uint8_t isconcretetype:1; // whether this type can have instances
+    uint8_t isdispatchtuple:1; // aka isleaftupletype
+    uint8_t isbitstype:1; // relevant query for C-api and type-parameters
+    uint8_t zeroinit:1; // if one or more fields requires zero-initialization
+    uint8_t has_concrete_subtype:1; // If clear, no value will have this datatype
+    uint8_t cached_by_hash:1; // stored in hash-based set cache (instead of linear cache)
 } jl_datatype_t;
 
+typedef struct _jl_vararg_t {
+    JL_DATA_TYPE
+    jl_value_t *T;
+    jl_value_t *N;
+} jl_vararg_t;
+
 typedef struct {
     JL_DATA_TYPE
     jl_value_t *value;
@@ -488,10 +556,11 @@ typedef struct {
 typedef struct {
     // not first-class
     jl_sym_t *name;
-    jl_value_t *value;
-    jl_value_t *globalref;  // cached GlobalRef for this binding
-    struct _jl_module_t *owner;  // for individual imported bindings
-    uint8_t constp;
+    _Atomic(jl_value_t*) value;
+    _Atomic(jl_value_t*) globalref;  // cached GlobalRef for this binding
+    struct _jl_module_t* owner;  // for individual imported bindings -- TODO: make _Atomic
+    _Atomic(jl_value_t*) ty;  // binding type
+    uint8_t constp:1;
     uint8_t exportp:1;
     uint8_t imported:1;
     uint8_t deprecated:2; // 0=not deprecated, 1=renamed, 2=moved to another package
@@ -512,19 +581,20 @@ typedef struct _jl_module_t {
     uint64_t build_id;
     jl_uuid_t uuid;
     size_t primary_world;
-    uint32_t counter;
+    _Atomic(uint32_t) counter;
     int32_t nospecialize;  // global bit flags: initialization for new methods
     int8_t optlevel;
     int8_t compile;
     int8_t infer;
     uint8_t istopmod;
+    int8_t max_methods;
     jl_mutex_t lock;
 } jl_module_t;
 
 // one Type-to-Value entry
 typedef struct _jl_typemap_entry_t {
     JL_DATA_TYPE
-    struct _jl_typemap_entry_t *next; // invasive linked list
+    _Atomic(struct _jl_typemap_entry_t*) next; // invasive linked list
     jl_tupletype_t *sig; // the type signature for this entry
     jl_tupletype_t *simplesig; // a simple signature for fast rejection
     jl_svec_t *guardsigs;
@@ -549,23 +619,23 @@ typedef struct _jl_typemap_level_t {
     // next split may be on Type{T} as LeafTypes then TypeName's parents up to Any
     // next split may be on LeafType
     // next split may be on TypeName
-    jl_array_t *arg1; // contains LeafType
-    jl_array_t *targ; // contains Type{LeafType}
-    jl_array_t *name1; // contains non-abstract TypeName, for parents up to (excluding) Any
-    jl_array_t *tname; // contains a dict of Type{TypeName}, for parents up to Any
+    _Atomic(jl_array_t*) arg1; // contains LeafType
+    _Atomic(jl_array_t*) targ; // contains Type{LeafType}
+    _Atomic(jl_array_t*) name1; // contains non-abstract TypeName, for parents up to (excluding) Any
+    _Atomic(jl_array_t*) tname; // contains a dict of Type{TypeName}, for parents up to Any
     // next a linear list of things too complicated at this level for analysis (no more levels)
-    jl_typemap_entry_t *linear;
+    _Atomic(jl_typemap_entry_t*) linear;
     // finally, start a new level if the type at offs is Any
-    jl_typemap_t *any;
+    _Atomic(jl_typemap_t*) any;
 } jl_typemap_level_t;
 
 // contains the TypeMap for one Type
 typedef struct _jl_methtable_t {
     JL_DATA_TYPE
     jl_sym_t *name; // sometimes a hack used by serialization to handle kwsorter
-    jl_typemap_t *defs;
-    jl_array_t *leafcache;
-    jl_typemap_t *cache;
+    _Atomic(jl_typemap_t*) defs;
+    _Atomic(jl_array_t*) leafcache;
+    _Atomic(jl_typemap_t*) cache;
     intptr_t max_args;  // max # of non-vararg arguments in a signature
     jl_value_t *kwsorter;  // keyword argument sorter function
     jl_module_t *module; // used for incremental serialization to locate original binding
@@ -594,134 +664,137 @@ typedef struct {
 // constants and type objects -------------------------------------------------
 
 // kinds
-extern JL_DLLEXPORT jl_datatype_t *jl_typeofbottom_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_datatype_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_uniontype_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_unionall_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_tvar_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLEXPORT jl_datatype_t *jl_any_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_type_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_typename_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_type_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_symbol_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_ssavalue_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_abstractslot_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_slotnumber_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_typedslot_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_argument_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_const_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_partial_struct_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_method_match_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_simplevector_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_tuple_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_vecelement_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_anytuple_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_emptytuple_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_typeofbottom_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_datatype_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_uniontype_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_unionall_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_tvar_type JL_GLOBALLY_ROOTED;
+
+extern JL_DLLIMPORT jl_datatype_t *jl_any_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_type_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_typename_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_type_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_symbol_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_ssavalue_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_abstractslot_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_slotnumber_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_typedslot_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_argument_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_const_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_partial_struct_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_partial_opaque_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_interconditional_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_method_match_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_simplevector_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_tuple_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_vecelement_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_anytuple_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_emptytuple_type JL_GLOBALLY_ROOTED;
 #define jl_tuple_type jl_anytuple_type
-extern JL_DLLEXPORT jl_unionall_t *jl_anytuple_type_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_vararg_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_vararg_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_function_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_builtin_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLEXPORT jl_value_t *jl_bottom_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_method_instance_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_code_instance_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_code_info_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_method_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_module_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_abstractarray_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_densearray_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_array_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_array_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_weakref_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_abstractstring_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_string_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_errorexception_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_argumenterror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_loaderror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_readonlymemory_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_diverror_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_undefref_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_interrupt_exception JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_boundserror_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_an_empty_vec_any JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_an_empty_string JL_GLOBALLY_ROOTED;
-
-extern JL_DLLEXPORT jl_datatype_t *jl_bool_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_char_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_int8_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_uint8_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_int16_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_uint16_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_int32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_uint32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_int64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_uint64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_float16_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_float32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_float64_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_floatingpoint_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_number_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_void_type JL_GLOBALLY_ROOTED;  // deprecated
-extern JL_DLLEXPORT jl_datatype_t *jl_nothing_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_signed_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_voidpointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_uint8pointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_pointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_llvmpointer_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_ref_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_pointer_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_llvmpointer_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_typename_t *jl_namedtuple_typename JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_unionall_t *jl_namedtuple_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLEXPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_array_symbol_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_array_int32_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_gotoifnot_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_returnnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_phinode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_pinode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_phicnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_upsilonnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_quotenode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_newvarnode_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_intrinsic_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_methtable_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_typemap_level_type JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_datatype_t *jl_typemap_entry_type JL_GLOBALLY_ROOTED;
-
-extern JL_DLLEXPORT jl_svec_t *jl_emptysvec JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_emptytuple JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_true JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_false JL_GLOBALLY_ROOTED;
-extern JL_DLLEXPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
-
-// some important symbols
-extern JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
+extern JL_DLLIMPORT jl_unionall_t *jl_anytuple_type_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_vararg_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_function_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_builtin_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_opaque_closure_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_opaque_closure_typename JL_GLOBALLY_ROOTED;
+
+extern JL_DLLIMPORT jl_value_t *jl_bottom_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_method_instance_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_code_instance_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_code_info_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_method_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_module_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_abstractarray_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_densearray_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_array_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_array_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_weakref_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_abstractstring_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_string_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_errorexception_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_argumenterror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_loaderror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_initerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_typeerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_methoderror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_undefvarerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_atomicerror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_lineinfonode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_stackovf_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_memory_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_readonlymemory_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_diverror_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_undefref_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_interrupt_exception JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_boundserror_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_an_empty_vec_any JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_an_empty_string JL_GLOBALLY_ROOTED;
+
+extern JL_DLLIMPORT jl_datatype_t *jl_bool_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_char_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_int8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_uint8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_int16_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_uint16_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_int32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_uint32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_int64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_uint64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_float16_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_float32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_float64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_floatingpoint_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_number_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_void_type JL_GLOBALLY_ROOTED;  // deprecated
+extern JL_DLLIMPORT jl_datatype_t *jl_nothing_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_signed_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_voidpointer_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_uint8pointer_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_pointer_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_llvmpointer_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_ref_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_pointer_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_llvmpointer_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_typename_t *jl_namedtuple_typename JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_unionall_t *jl_namedtuple_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_task_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_pair_type JL_GLOBALLY_ROOTED;
+
+extern JL_DLLIMPORT jl_value_t *jl_array_uint8_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_array_any_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_array_symbol_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_array_int32_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_array_uint64_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_expr_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_globalref_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_linenumbernode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_gotonode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_gotoifnot_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_returnnode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_phinode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_pinode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_phicnode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_upsilonnode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_quotenode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_newvarnode_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_intrinsic_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_methtable_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_typemap_level_type JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_datatype_t *jl_typemap_entry_type JL_GLOBALLY_ROOTED;
+
+extern JL_DLLIMPORT jl_svec_t *jl_emptysvec JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_emptytuple JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_true JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_false JL_GLOBALLY_ROOTED;
+extern JL_DLLIMPORT jl_value_t *jl_nothing JL_GLOBALLY_ROOTED;
 
 // gc -------------------------------------------------------------------------
 
-typedef struct _jl_gcframe_t {
+struct _jl_gcframe_t {
     size_t nroots;
     struct _jl_gcframe_t *prev;
     // actual roots go here
-} jl_gcframe_t;
+};
 
 // NOTE: it is the caller's responsibility to make sure arguments are
 // rooted such that the gc can see them on the stack.
@@ -732,12 +805,12 @@ typedef struct _jl_gcframe_t {
 // jl_value_t *x=NULL, *y=NULL; JL_GC_PUSH2(&x, &y);
 // x = f(); y = g(); foo(x, y)
 
-#define jl_pgcstack (jl_get_ptls_states()->pgcstack)
+#define jl_pgcstack (jl_current_task->gcstack)
 
 #define JL_GC_ENCODE_PUSHARGS(n)   (((size_t)(n))<<2)
 #define JL_GC_ENCODE_PUSH(n)       ((((size_t)(n))<<2)|1)
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 // When running with the analyzer make these real function calls, that are
 // easier to detect in the analyzer
@@ -746,7 +819,7 @@ extern void JL_GC_PUSH2(void *, void *) JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH3(void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH4(void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void JL_GC_PUSH5(void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
-extern void JL_GC_PUSH6(void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
+extern void JL_GC_PUSH7(void *, void *, void *, void *, void *, void *, void *)  JL_NOTSAFEPOINT;
 extern void _JL_GC_PUSHARGS(jl_value_t **, size_t) JL_NOTSAFEPOINT;
 // This is necessary, because otherwise the analyzer considers this undefined
 // behavior and terminates the exploration
@@ -783,6 +856,11 @@ extern void JL_GC_POP() JL_NOTSAFEPOINT;
   void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(6), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6};   \
   jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
 
+#define JL_GC_PUSH7(arg1, arg2, arg3, arg4, arg5, arg6, arg7)                                           \
+  void *__gc_stkf[] = {(void*)JL_GC_ENCODE_PUSH(7), jl_pgcstack, arg1, arg2, arg3, arg4, arg5, arg6, arg7}; \
+  jl_pgcstack = (jl_gcframe_t*)__gc_stkf;
+
+
 #define JL_GC_PUSHARGS(rts_var,n)                                                                       \
   rts_var = ((jl_value_t**)alloca(((n)+2)*sizeof(jl_value_t*)))+2;                                      \
   ((void**)rts_var)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(n);                                              \
@@ -805,7 +883,8 @@ typedef enum {
 
 JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t);
 
-JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f);
+JL_DLLEXPORT void jl_gc_add_finalizer(jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_finalize(jl_value_t *o);
 JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref(jl_value_t *value);
 JL_DLLEXPORT jl_value_t *jl_gc_alloc_0w(void);
@@ -820,10 +899,10 @@ JL_DLLEXPORT void jl_gc_use(jl_value_t *a);
 JL_DLLEXPORT void jl_clear_malloc_data(void);
 
 // GC write barriers
-JL_DLLEXPORT void jl_gc_queue_root(jl_value_t *root) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jl_gc_queue_multiroot(jl_value_t *root, jl_value_t *stored) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *root) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_queue_multiroot(const jl_value_t *root, const jl_value_t *stored) JL_NOTSAFEPOINT;
 
-STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) JL_NOTSAFEPOINT
+STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
 {
     // parent and ptr isa jl_value_t*
     if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 && // parent is old and not in remset
@@ -831,7 +910,7 @@ STATIC_INLINE void jl_gc_wb(void *parent, void *ptr) JL_NOTSAFEPOINT
         jl_gc_queue_root((jl_value_t*)parent);
 }
 
-STATIC_INLINE void jl_gc_wb_back(void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
+STATIC_INLINE void jl_gc_wb_back(const void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_t*
 {
     // if ptr is old
     if (__unlikely(jl_astaggedvalue(ptr)->bits.gc == 3)) {
@@ -839,7 +918,7 @@ STATIC_INLINE void jl_gc_wb_back(void *ptr) JL_NOTSAFEPOINT // ptr isa jl_value_
     }
 }
 
-STATIC_INLINE void jl_gc_multi_wb(void *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
+STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_NOTSAFEPOINT
 {
     // ptr is an immutable object
     if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
@@ -855,6 +934,7 @@ STATIC_INLINE void jl_gc_multi_wb(void *parent, jl_value_t *ptr) JL_NOTSAFEPOINT
 JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz);
 JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
                                          int isaligned, jl_value_t *owner);
+JL_DLLEXPORT void jl_gc_safepoint(void);
 
 // object accessors -----------------------------------------------------------
 
@@ -862,7 +942,7 @@ JL_DLLEXPORT void *jl_gc_managed_realloc(void *d, size_t sz, size_t oldsz,
 #define jl_svec_set_len_unsafe(t,n) (((jl_svec_t*)(t))->length=(n))
 #define jl_svec_data(t) ((jl_value_t**)((char*)(t) + sizeof(jl_svec_t)))
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 STATIC_INLINE jl_value_t *jl_svecset(
     void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT,
@@ -874,7 +954,7 @@ STATIC_INLINE jl_value_t *jl_svecref(void *t JL_PROPAGATES_ROOT, size_t i) JL_NO
     assert(i < jl_svec_len(t));
     // while svec is supposedly immutable, in practice we sometimes publish it first
     // and set the values lazily
-    return jl_atomic_load_relaxed(jl_svec_data(t) + i);
+    return jl_atomic_load_relaxed((_Atomic(jl_value_t*)*)jl_svec_data(t) + i);
 }
 STATIC_INLINE jl_value_t *jl_svecset(
     void *t JL_ROOTING_ARGUMENT JL_PROPAGATES_ROOT,
@@ -885,17 +965,12 @@ STATIC_INLINE jl_value_t *jl_svecset(
     // TODO: while svec is supposedly immutable, in practice we sometimes publish it first
     // and set the values lazily. Those users should be using jl_atomic_store_release here.
     jl_svec_data(t)[i] = (jl_value_t*)x;
-    if (x) jl_gc_wb(t, x);
+    jl_gc_wb(t, x);
     return (jl_value_t*)x;
 }
 #endif
 
-#ifdef STORE_ARRAY_LEN
 #define jl_array_len(a)   (((jl_array_t*)(a))->length)
-#else
-JL_DLLEXPORT size_t jl_array_len_(jl_array_t *a);
-#define jl_array_len(a)   jl_array_len_((jl_array_t*)(a))
-#endif
 #define jl_array_data(a)  ((void*)((jl_array_t*)(a))->data)
 #define jl_array_dim(a,i) ((&((jl_array_t*)(a))->nrows)[i])
 #define jl_array_dim0(a)  (((jl_array_t*)(a))->nrows)
@@ -906,7 +981,7 @@ JL_DLLEXPORT size_t jl_array_len_(jl_array_t *a);
 
 JL_DLLEXPORT char *jl_array_typetagdata(jl_array_t *a) JL_NOTSAFEPOINT;
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 jl_value_t **jl_array_ptr_data(jl_array_t *a JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
@@ -918,7 +993,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_ref(void *a JL_PROPAGATES_ROOT, size_t i)
 {
     assert(((jl_array_t*)a)->flags.ptrarray);
     assert(i < jl_array_len(a));
-    return jl_atomic_load_relaxed(((jl_value_t**)(jl_array_data(a))) + i);
+    return jl_atomic_load_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i);
 }
 STATIC_INLINE jl_value_t *jl_array_ptr_set(
     void *a JL_ROOTING_ARGUMENT, size_t i,
@@ -926,7 +1001,7 @@ STATIC_INLINE jl_value_t *jl_array_ptr_set(
 {
     assert(((jl_array_t*)a)->flags.ptrarray);
     assert(i < jl_array_len(a));
-    jl_atomic_store_relaxed(((jl_value_t**)(jl_array_data(a))) + i, (jl_value_t*)x);
+    jl_atomic_store_relaxed(((_Atomic(jl_value_t*)*)(jl_array_data(a))) + i, (jl_value_t*)x);
     if (x) {
         if (((jl_array_t*)a)->flags.how == 3) {
             a = jl_array_data_owner(a);
@@ -990,14 +1065,7 @@ JL_DLLEXPORT jl_svec_t *jl_compute_fieldtypes(jl_datatype_t *st JL_PROPAGATES_RO
 #define jl_get_fieldtypes(st) ((st)->types ? (st)->types : jl_compute_fieldtypes((st), NULL))
 STATIC_INLINE jl_svec_t *jl_field_names(jl_datatype_t *st) JL_NOTSAFEPOINT
 {
-    jl_svec_t *names = st->names;
-    if (!names)
-        names = st->name->names;
-    return names;
-}
-STATIC_INLINE jl_sym_t *jl_field_name(jl_datatype_t *st, size_t i) JL_NOTSAFEPOINT
-{
-    return (jl_sym_t*)jl_svecref(jl_field_names(st), i);
+    return st->name->names;
 }
 STATIC_INLINE jl_value_t *jl_field_type(jl_datatype_t *st JL_PROPAGATES_ROOT, size_t i)
 {
@@ -1013,7 +1081,6 @@ STATIC_INLINE jl_value_t *jl_field_type_concrete(jl_datatype_t *st JL_PROPAGATES
 #define jl_datatype_align(t)   (((jl_datatype_t*)t)->layout->alignment)
 #define jl_datatype_nbits(t)   ((((jl_datatype_t*)t)->size)*8)
 #define jl_datatype_nfields(t) (((jl_datatype_t*)(t))->layout->nfields)
-#define jl_datatype_isinlinealloc(t) (((jl_datatype_t *)(t))->isinlinealloc)
 
 JL_DLLEXPORT void *jl_symbol_name(jl_sym_t *s);
 // inline version with strong type check to detect typos in a `->name` chain
@@ -1025,6 +1092,7 @@ STATIC_INLINE char *jl_symbol_name_(jl_sym_t *s) JL_NOTSAFEPOINT
 
 static inline uint32_t jl_fielddesc_size(int8_t fielddesc_type) JL_NOTSAFEPOINT
 {
+    assert(fielddesc_type >= 0 && fielddesc_type <= 2);
     return 2 << fielddesc_type;
     //if (fielddesc_type == 0) {
     //    return sizeof(jl_fielddesc8_t);
@@ -1056,6 +1124,7 @@ static inline const char *jl_dt_layout_ptrs(const jl_datatype_layout_t *l) JL_NO
             return ((const jl_fielddesc16_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
         else {                                                                \
+            assert(ly->fielddesc_type == 2);                                  \
             return ((const jl_fielddesc32_t*)jl_dt_layout_fields(ly))[i].f;   \
         }                                                                     \
     }                                                                         \
@@ -1088,6 +1157,30 @@ static inline uint32_t jl_ptr_offset(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
     }
 }
 
+static inline int jl_field_isatomic(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
+{
+    const uint32_t *atomicfields = st->name->atomicfields;
+    if (atomicfields != NULL) {
+        if (atomicfields[i / 32] & (1 << (i % 32)))
+            return 1;
+    }
+    return 0;
+}
+
+static inline int jl_field_isconst(jl_datatype_t *st, int i) JL_NOTSAFEPOINT
+{
+    jl_typename_t *tn = st->name;
+    if (!tn->mutabl)
+        return 1;
+    const uint32_t *constfields = tn->constfields;
+    if (constfields != NULL) {
+        if (constfields[i / 32] & (1 << (i % 32)))
+            return 1;
+    }
+    return 0;
+}
+
+
 static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEPOINT
 {
     return l->nfields == 0 && l->npointers > 0;
@@ -1100,10 +1193,10 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_svec(v)        jl_typeis(v,jl_simplevector_type)
 #define jl_is_simplevector(v) jl_is_svec(v)
 #define jl_is_datatype(v)    jl_typeis(v,jl_datatype_type)
-#define jl_is_mutable(t)     (((jl_datatype_t*)t)->mutabl)
-#define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->mutabl))
-#define jl_is_immutable(t)   (!((jl_datatype_t*)t)->mutabl)
-#define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->mutabl))
+#define jl_is_mutable(t)     (((jl_datatype_t*)t)->name->mutabl)
+#define jl_is_mutable_datatype(t) (jl_is_datatype(t) && (((jl_datatype_t*)t)->name->mutabl))
+#define jl_is_immutable(t)   (!((jl_datatype_t*)t)->name->mutabl)
+#define jl_is_immutable_datatype(t) (jl_is_datatype(t) && (!((jl_datatype_t*)t)->name->mutabl))
 #define jl_is_uniontype(v)   jl_typeis(v,jl_uniontype_type)
 #define jl_is_typevar(v)     jl_typeis(v,jl_tvar_type)
 #define jl_is_unionall(v)    jl_typeis(v,jl_unionall_type)
@@ -1144,7 +1237,7 @@ static inline int jl_is_layout_opaque(const jl_datatype_layout_t *l) JL_NOTSAFEP
 #define jl_is_cpointer(v)    jl_is_cpointer_type(jl_typeof(v))
 #define jl_is_pointer(v)     jl_is_cpointer_type(jl_typeof(v))
 #define jl_is_uint8pointer(v)jl_typeis(v,jl_uint8pointer_type)
-#define jl_is_llvmpointer(v) jl_typeis(v,jl_llvmpointer_type)
+#define jl_is_llvmpointer(v) (((jl_datatype_t*)jl_typeof(v))->name == jl_llvmpointer_typename)
 #define jl_is_intrinsic(v)   jl_typeis(v,jl_intrinsic_type)
 #define jl_array_isbitsunion(a) (!(((jl_array_t*)(a))->flags.ptrarray) && jl_is_uniontype(jl_tparam0(jl_typeof(a))))
 
@@ -1172,7 +1265,7 @@ STATIC_INLINE int jl_is_primitivetype(void *v) JL_NOTSAFEPOINT
 STATIC_INLINE int jl_is_structtype(void *v) JL_NOTSAFEPOINT
 {
     return (jl_is_datatype(v) &&
-            !((jl_datatype_t*)(v))->abstract &&
+            !((jl_datatype_t*)(v))->name->abstract &&
             !jl_is_primitivetype(v));
 }
 
@@ -1188,7 +1281,7 @@ STATIC_INLINE int jl_is_datatype_singleton(jl_datatype_t *d) JL_NOTSAFEPOINT
 
 STATIC_INLINE int jl_is_abstracttype(void *v) JL_NOTSAFEPOINT
 {
-    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->abstract);
+    return (jl_is_datatype(v) && ((jl_datatype_t*)(v))->name->abstract);
 }
 
 STATIC_INLINE int jl_is_array_type(void *t) JL_NOTSAFEPOINT
@@ -1203,6 +1296,19 @@ STATIC_INLINE int jl_is_array(void *v) JL_NOTSAFEPOINT
     return jl_is_array_type(t);
 }
 
+
+STATIC_INLINE int jl_is_opaque_closure_type(void *t) JL_NOTSAFEPOINT
+{
+    return (jl_is_datatype(t) &&
+            ((jl_datatype_t*)(t))->name == jl_opaque_closure_typename);
+}
+
+STATIC_INLINE int jl_is_opaque_closure(void *v) JL_NOTSAFEPOINT
+{
+    jl_value_t *t = jl_typeof(v);
+    return jl_is_opaque_closure_type(t);
+}
+
 STATIC_INLINE int jl_is_cpointer_type(jl_value_t *t) JL_NOTSAFEPOINT
 {
     return (jl_is_datatype(t) &&
@@ -1245,10 +1351,42 @@ STATIC_INLINE int jl_is_type_type(jl_value_t *v) JL_NOTSAFEPOINT
             ((jl_datatype_t*)(v))->name == ((jl_datatype_t*)jl_type_type->body)->name);
 }
 
+STATIC_INLINE int jl_is_array_zeroinit(jl_array_t *a) JL_NOTSAFEPOINT
+{
+    if (a->flags.ptrarray || a->flags.hasptr)
+        return 1;
+    jl_value_t *elty = jl_tparam0(jl_typeof(a));
+    return jl_is_datatype(elty) && ((jl_datatype_t*)elty)->zeroinit;
+}
+
 // object identity
-JL_DLLEXPORT int jl_egal(jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__bits(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__special(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_egal__unboxed(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT;
 JL_DLLEXPORT uintptr_t jl_object_id(jl_value_t *v) JL_NOTSAFEPOINT;
 
+STATIC_INLINE int jl_egal__unboxed_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED, jl_datatype_t *dt) JL_NOTSAFEPOINT
+{
+    if (dt->name->mutabl) {
+        if (dt == jl_simplevector_type || dt == jl_string_type || dt == jl_datatype_type)
+            return jl_egal__special(a, b, dt);
+        return 0;
+    }
+    return jl_egal__bits(a, b, dt);
+}
+
+STATIC_INLINE int jl_egal_(const jl_value_t *a JL_MAYBE_UNROOTED, const jl_value_t *b JL_MAYBE_UNROOTED) JL_NOTSAFEPOINT
+{
+    if (a == b)
+        return 1;
+    jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(a);
+    if (dt != (jl_datatype_t*)jl_typeof(b))
+        return 0;
+    return jl_egal__unboxed_(a, b, dt);
+}
+#define jl_egal(a, b) jl_egal_((a), (b))
+
 // type predicates and basic operations
 JL_DLLEXPORT int jl_type_equality_is_identity(jl_value_t *t1, jl_value_t *t2) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_has_free_typevars(jl_value_t *v) JL_NOTSAFEPOINT;
@@ -1280,19 +1418,23 @@ STATIC_INLINE int jl_is_concrete_type(jl_value_t *v) JL_NOTSAFEPOINT
 JL_DLLEXPORT int jl_isa_compileable_sig(jl_tupletype_t *type, jl_method_t *definition);
 
 // type constructors
-JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *inmodule);
+JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *inmodule, int abstract, int mutabl);
 JL_DLLEXPORT jl_tvar_t *jl_new_typevar(jl_sym_t *name, jl_value_t *lb, jl_value_t *ub);
 JL_DLLEXPORT jl_value_t *jl_instantiate_unionall(jl_unionall_t *u, jl_value_t *p);
 JL_DLLEXPORT jl_value_t *jl_apply_type(jl_value_t *tc, jl_value_t **params, size_t n);
 JL_DLLEXPORT jl_value_t *jl_apply_type1(jl_value_t *tc, jl_value_t *p1);
 JL_DLLEXPORT jl_value_t *jl_apply_type2(jl_value_t *tc, jl_value_t *p1, jl_value_t *p2);
+JL_DLLEXPORT jl_datatype_t *jl_apply_modify_type(jl_value_t *dt);
+JL_DLLEXPORT jl_datatype_t *jl_apply_cmpswap_type(jl_value_t *dt);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type(jl_svec_t *params);
 JL_DLLEXPORT jl_tupletype_t *jl_apply_tuple_type_v(jl_value_t **p, size_t np);
 JL_DLLEXPORT jl_datatype_t *jl_new_datatype(jl_sym_t *name,
                                             jl_module_t *module,
                                             jl_datatype_t *super,
                                             jl_svec_t *parameters,
-                                            jl_svec_t *fnames, jl_svec_t *ftypes,
+                                            jl_svec_t *fnames,
+                                            jl_svec_t *ftypes,
+                                            jl_svec_t *fattrs,
                                             int abstract, int mutabl,
                                             int ninitialized);
 JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name,
@@ -1301,7 +1443,12 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name,
                                                  jl_svec_t *parameters, size_t nbits);
 
 // constructors
-JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *bt, void *data);
+JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *bt, const void *src);
+JL_DLLEXPORT jl_value_t *jl_atomic_new_bits(jl_value_t *dt, const char *src);
+JL_DLLEXPORT void jl_atomic_store_bits(char *dst, const jl_value_t *src, int nb);
+JL_DLLEXPORT jl_value_t *jl_atomic_swap_bits(jl_value_t *dt, char *dst, const jl_value_t *src, int nb);
+JL_DLLEXPORT int jl_atomic_bool_cmpswap_bits(char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
+JL_DLLEXPORT jl_value_t *jl_atomic_cmpswap_bits(jl_datatype_t *dt, jl_datatype_t *rettype, char *dst, const jl_value_t *expected, const jl_value_t *src, int nb);
 JL_DLLEXPORT jl_value_t *jl_new_struct(jl_datatype_t *type, ...);
 JL_DLLEXPORT jl_value_t *jl_new_structv(jl_datatype_t *type, jl_value_t **args, uint32_t na);
 JL_DLLEXPORT jl_value_t *jl_new_structt(jl_datatype_t *type, jl_value_t *tup);
@@ -1323,9 +1470,9 @@ JL_DLLEXPORT jl_sym_t *jl_tagged_gensym(const char *str, size_t len);
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void);
 JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
-                                                 jl_value_t **bp, jl_value_t *bp_owner,
+                                                 _Atomic(jl_value_t*) *bp, jl_value_t *bp_owner,
                                                  jl_binding_t *bnd);
-JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata, jl_code_info_t *f, jl_module_t *module);
+JL_DLLEXPORT jl_method_t *jl_method_def(jl_svec_t *argdata, jl_methtable_t *mt, jl_code_info_t *f, jl_module_t *module);
 JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo);
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src);
 JL_DLLEXPORT size_t jl_get_world_counter(void) JL_NOTSAFEPOINT;
@@ -1388,12 +1535,12 @@ JL_DLLEXPORT jl_value_t *jl_get_nth_field(jl_value_t *v, size_t i);
 // Like jl_get_nth_field above, but asserts if it needs to allocate
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_noalloc(jl_value_t *v JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_nth_field_checked(jl_value_t *v, size_t i);
-JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i,
-                                          jl_value_t *rhs) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void        jl_set_nth_field(jl_value_t *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int         jl_field_isdefined(jl_value_t *v, size_t i) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_get_field(jl_value_t *o, const char *fld);
 JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a);
-JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al) JL_NOTSAFEPOINT;
+int jl_uniontype_size(jl_value_t *ty, size_t *sz);
+JL_DLLEXPORT int jl_islayout_inline(jl_value_t *eltype, size_t *fsz, size_t *al);
 
 // arrays
 JL_DLLEXPORT jl_array_t *jl_new_array(jl_value_t *atype, jl_value_t *dims);
@@ -1428,6 +1575,7 @@ JL_DLLEXPORT void jl_array_sizehint(jl_array_t *a, size_t sz);
 JL_DLLEXPORT void jl_array_ptr_1d_push(jl_array_t *a, jl_value_t *item);
 JL_DLLEXPORT void jl_array_ptr_1d_append(jl_array_t *a, jl_array_t *a2);
 JL_DLLEXPORT jl_value_t *jl_apply_array_type(jl_value_t *type, size_t dim);
+JL_DLLEXPORT int jl_array_validate_dims(size_t *nel, size_t *tot, uint32_t ndims, size_t *dims, size_t elsz);
 // property access
 JL_DLLEXPORT void *jl_array_ptr(jl_array_t *a);
 JL_DLLEXPORT void *jl_array_eltype(jl_value_t *a);
@@ -1450,30 +1598,33 @@ JL_DLLEXPORT void jl_set_module_compile(jl_module_t *self, int value);
 JL_DLLEXPORT int jl_get_module_compile(jl_module_t *m);
 JL_DLLEXPORT void jl_set_module_infer(jl_module_t *self, int value);
 JL_DLLEXPORT int jl_get_module_infer(jl_module_t *m);
+JL_DLLEXPORT void jl_set_module_max_methods(jl_module_t *self, int value);
+JL_DLLEXPORT int jl_get_module_max_methods(jl_module_t *m);
 // get binding for reading
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT jl_binding_t *jl_get_binding_or_error(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT jl_value_t *jl_binding_type(jl_module_t *m, jl_sym_t *var);
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int error);
-JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT,
-                                                         jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr_or_error(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
 JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var);
-JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var);
+JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT int jl_is_const(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
-JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT);
-JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b JL_ROOTING_ARGUMENT, jl_value_t *rhs JL_ROOTED_ARGUMENT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void jl_declare_constant(jl_binding_t *b);
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from);
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
-JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from,
-                                   jl_sym_t *s);
+JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
+JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t *s);
+JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname);
 JL_DLLEXPORT void jl_module_export(jl_module_t *from, jl_sym_t *s);
 JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s);
-JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var);
 JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m);
 STATIC_INLINE jl_function_t *jl_get_function(jl_module_t *m, const char *name)
 {
@@ -1489,11 +1640,16 @@ JL_DLLEXPORT int jl_errno(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_set_errno(int e) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int32_t jl_stat(const char *path, char *statbuf) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT long jl_getpagesize(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT long jl_getallocationgranularity(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int jl_is_debugbuild(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_UNAME(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_sym_t *jl_get_ARCH(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_get_libllvm(void) JL_NOTSAFEPOINT;
+extern JL_DLLIMPORT int jl_n_threadpools;
+extern JL_DLLIMPORT int jl_n_threads;
+extern JL_DLLIMPORT int *jl_n_threads_per_pool;
 
 // environment entries
 JL_DLLEXPORT jl_value_t *jl_environ(int i);
@@ -1515,6 +1671,7 @@ JL_DLLEXPORT void JL_NORETURN jl_type_error_rt(const char *fname,
                                                jl_value_t *ty JL_MAYBE_UNROOTED,
                                                jl_value_t *got JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var);
+JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v JL_MAYBE_UNROOTED,
                                               jl_value_t *t JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error_v(jl_value_t *v JL_MAYBE_UNROOTED,
@@ -1546,9 +1703,9 @@ JL_DLLEXPORT void jl_exception_clear(void) JL_NOTSAFEPOINT;
 #define JL_NARGSV(fname, min)                           \
     if (nargs < min) jl_too_few_args(#fname, min);
 
-#define JL_TYPECHK(fname, type, v)                                      \
-    if (!jl_is_##type(v)) {                                             \
-        jl_type_error(#fname, (jl_value_t*)jl_##type##_type, (v));      \
+#define JL_TYPECHK(fname, type, v)                                 \
+    if (!jl_is_##type(v)) {                                        \
+        jl_type_error(#fname, (jl_value_t*)jl_##type##_type, (v)); \
     }
 #define JL_TYPECHKS(fname, type, v)                                     \
     if (!jl_is_##type(v)) {                                             \
@@ -1561,11 +1718,8 @@ typedef enum {
     JL_IMAGE_JULIA_HOME = 1,
     //JL_IMAGE_LIBJULIA = 2,
 } JL_IMAGE_SEARCH;
-// this helps turn threading compilation mismatches into linker errors
-#define julia_init julia_init__threading
-#define jl_init jl_init__threading
-#define jl_init_with_image jl_init_with_image__threading
 
+JL_DLLEXPORT const char *jl_get_libdir(void);
 JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel);
 JL_DLLEXPORT void jl_init(void);
 JL_DLLEXPORT void jl_init_with_image(const char *julia_bindir,
@@ -1583,13 +1737,14 @@ JL_DLLEXPORT ios_t *jl_create_system_image(void *);
 JL_DLLEXPORT void jl_save_system_image(const char *fname);
 JL_DLLEXPORT void jl_restore_system_image(const char *fname);
 JL_DLLEXPORT void jl_restore_system_image_data(const char *buf, size_t len);
+JL_DLLEXPORT void jl_set_newly_inferred(jl_value_t *newly_inferred);
 JL_DLLEXPORT int jl_save_incremental(const char *fname, jl_array_t *worklist);
 JL_DLLEXPORT jl_value_t *jl_restore_incremental(const char *fname, jl_array_t *depmods);
 JL_DLLEXPORT jl_value_t *jl_restore_incremental_from_buf(const char *buf, size_t sz, jl_array_t *depmods);
 
 // parsing
 JL_DLLEXPORT jl_value_t *jl_parse_all(const char *text, size_t text_len,
-                                      const char *filename, size_t filename_len);
+                                      const char *filename, size_t filename_len, size_t lineno);
 JL_DLLEXPORT jl_value_t *jl_parse_string(const char *text, size_t text_len,
                                          int offset, int greedy);
 // lowering
@@ -1598,6 +1753,8 @@ JL_DLLEXPORT jl_value_t *jl_expand_with_loc(jl_value_t *expr, jl_module_t *inmod
                                             const char *file, int line);
 JL_DLLEXPORT jl_value_t *jl_expand_with_loc_warn(jl_value_t *expr, jl_module_t *inmodule,
                                                  const char *file, int line);
+JL_DLLEXPORT jl_value_t *jl_expand_in_world(jl_value_t *expr, jl_module_t *inmodule,
+                                            const char *file, int line, size_t world);
 JL_DLLEXPORT jl_value_t *jl_expand_stmt(jl_value_t *expr, jl_module_t *inmodule);
 JL_DLLEXPORT jl_value_t *jl_expand_stmt_with_loc(jl_value_t *expr, jl_module_t *inmodule,
                                                  const char *file, int line);
@@ -1621,11 +1778,11 @@ enum JL_RTLD_CONSTANT {
 };
 #define JL_RTLD_DEFAULT (JL_RTLD_LAZY | JL_RTLD_DEEPBIND)
 
-typedef void *jl_uv_libhandle; // compatible with dlopen (void*) / LoadLibrary (HMODULE)
-JL_DLLEXPORT jl_uv_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err) JL_NOTSAFEPOINT;
-JL_DLLEXPORT jl_uv_libhandle jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_dlclose(jl_uv_libhandle handle) JL_NOTSAFEPOINT;
-JL_DLLEXPORT int jl_dlsym(jl_uv_libhandle handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT;
+typedef void *jl_libhandle; // compatible with dlopen (void*) / LoadLibrary (HMODULE)
+JL_DLLEXPORT jl_libhandle jl_load_dynamic_library(const char *fname, unsigned flags, int throw_err);
+JL_DLLEXPORT jl_libhandle jl_dlopen(const char *filename, unsigned flags) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_dlclose(jl_libhandle handle) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_dlsym(jl_libhandle handle, const char *symbol, void ** value, int throw_err) JL_NOTSAFEPOINT;
 
 // evaluation
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval(jl_module_t *m, jl_value_t *v);
@@ -1659,6 +1816,7 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i);
 JL_DLLEXPORT int jl_is_operator(char *sym);
 JL_DLLEXPORT int jl_is_unary_operator(char *sym);
 JL_DLLEXPORT int jl_is_unary_and_binary_operator(char *sym);
+JL_DLLEXPORT int jl_is_syntactic_operator(char *sym);
 JL_DLLEXPORT int jl_operator_precedence(char *sym);
 
 STATIC_INLINE int jl_vinfo_sa(uint8_t vi)
@@ -1682,12 +1840,12 @@ STATIC_INLINE jl_value_t *jl_apply(jl_value_t **args, uint32_t nargs)
     return jl_apply_generic(args[0], &args[1], nargs - 1);
 }
 
-JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f, jl_value_t **args, int32_t nargs);
-JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f);
-JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f, jl_value_t *a);
-JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f, jl_value_t *a, jl_value_t *b);
-JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f, jl_value_t *a,
-                                  jl_value_t *b, jl_value_t *c);
+JL_DLLEXPORT jl_value_t *jl_call(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t **args, uint32_t nargs);
+JL_DLLEXPORT jl_value_t *jl_call0(jl_function_t *f JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_call1(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_call2(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED, jl_value_t *b JL_MAYBE_UNROOTED);
+JL_DLLEXPORT jl_value_t *jl_call3(jl_function_t *f JL_MAYBE_UNROOTED, jl_value_t *a JL_MAYBE_UNROOTED,
+                                  jl_value_t *b JL_MAYBE_UNROOTED, jl_value_t *c JL_MAYBE_UNROOTED);
 
 // interfacing with Task runtime
 JL_DLLEXPORT void jl_yield(void);
@@ -1711,7 +1869,6 @@ typedef struct _jl_handler_t {
     int8_t gc_state;
     size_t locks_len;
     sig_atomic_t defer_signal;
-    int finalizers_inhibited;
     jl_timing_block_t *timing_stack;
     size_t world_age;
 } jl_handler_t;
@@ -1723,34 +1880,35 @@ typedef struct _jl_task_t {
     jl_value_t *tls;
     jl_value_t *donenotify;
     jl_value_t *result;
-    jl_value_t *exception;
     jl_value_t *logstate;
     jl_function_t *start;
-    uint8_t _state;
+    uint64_t rngState[4];
+    _Atomic(uint8_t) _state;
     uint8_t sticky; // record whether this Task can be migrated to a new thread
+    _Atomic(uint8_t) _isexception; // set if `result` is an exception to throw or that we exited with
+    // multiqueue priority
+    uint16_t priority;
 
 // hidden state:
     // id of owning thread - does not need to be defined until the task runs
-    int16_t tid;
-    // multiqueue priority
-    int16_t prio;
-    // current world age
+    _Atomic(int16_t) tid;
+    // threadpool id
+    int8_t threadpoolid;
+    // saved gc stack top for context switches
+    jl_gcframe_t *gcstack;
     size_t world_age;
+    // quick lookup for current ptls
+    jl_ptls_t ptls; // == jl_all_tls_states[tid]
     // saved exception stack
     jl_excstack_t *excstack;
-
-    jl_ucontext_t ctx; // saved thread state
+    // current exception handler
+    jl_handler_t *eh;
+    // saved thread state
+    jl_ucontext_t ctx;
     void *stkbuf; // malloc'd memory (either copybuf or stack)
     size_t bufsz; // actual sizeof stkbuf
     unsigned int copy_stack:31; // sizeof stack for copybuf
     unsigned int started:1;
-
-    // current exception handler
-    jl_handler_t *eh;
-    // saved gc stack top for context switches
-    jl_gcframe_t *gcstack;
-
-    jl_timing_block_t *timing_stack;
 } jl_task_t;
 
 #define JL_TASK_STATE_RUNNABLE 0
@@ -1759,13 +1917,17 @@ typedef struct _jl_task_t {
 
 JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t*, jl_value_t*, size_t);
 JL_DLLEXPORT void jl_switchto(jl_task_t **pt);
+JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void JL_NORETURN jl_throw(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow(void);
 JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void);
 JL_DLLEXPORT void JL_NORETURN jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED);
 JL_DLLEXPORT void JL_NORETURN jl_no_exc_handler(jl_value_t *e);
+JL_DLLEXPORT JL_CONST_FUNC jl_gcframe_t **(jl_get_pgcstack)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
+#define jl_current_task (container_of(jl_get_pgcstack(), jl_task_t, gcstack))
 
-#include "locks.h"   // requires jl_task_t definition
+#include "julia_locks.h"   // requires jl_task_t definition
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh);
 JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh);
@@ -1775,16 +1937,27 @@ JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT;
 
 #if defined(_OS_WINDOWS_)
 #if defined(_COMPILER_GCC_)
-int __attribute__ ((__nothrow__,__returns_twice__)) (jl_setjmp)(jmp_buf _Buf);
+JL_DLLEXPORT int __attribute__ ((__nothrow__,__returns_twice__)) (jl_setjmp)(jmp_buf _Buf);
 __declspec(noreturn) __attribute__ ((__nothrow__)) void (jl_longjmp)(jmp_buf _Buf, int _Value);
+JL_DLLEXPORT int __attribute__ ((__nothrow__,__returns_twice__)) (ijl_setjmp)(jmp_buf _Buf);
+__declspec(noreturn) __attribute__ ((__nothrow__)) void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #else
-int (jl_setjmp)(jmp_buf _Buf);
+JL_DLLEXPORT int (jl_setjmp)(jmp_buf _Buf);
 void (jl_longjmp)(jmp_buf _Buf, int _Value);
+JL_DLLEXPORT int (ijl_setjmp)(jmp_buf _Buf);
+void (ijl_longjmp)(jmp_buf _Buf, int _Value);
 #endif
+#ifdef LIBRARY_EXPORTS
+#define jl_setjmp_f ijl_setjmp
+#define jl_setjmp_name "ijl_setjmp"
+#define jl_setjmp(a,b) ijl_setjmp(a)
+#define jl_longjmp(a,b) ijl_longjmp(a,b)
+#else
 #define jl_setjmp_f jl_setjmp
 #define jl_setjmp_name "jl_setjmp"
 #define jl_setjmp(a,b) jl_setjmp(a)
 #define jl_longjmp(a,b) jl_longjmp(a,b)
+#endif
 #elif defined(_OS_EMSCRIPTEN_)
 #define jl_setjmp(a,b) setjmp(a)
 #define jl_longjmp(a,b) longjmp(a,b)
@@ -1804,7 +1977,7 @@ void (jl_longjmp)(jmp_buf _Buf, int _Value);
 #endif
 
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 // This is hard. Ideally we'd teach the static analyzer about the extra control
 // flow edges. But for now, just hide this as best we can
@@ -1829,24 +2002,32 @@ extern int had_exception;
 
 // I/O system -----------------------------------------------------------------
 
-#define JL_STREAM uv_stream_t
+struct uv_loop_s;
+struct uv_handle_s;
+struct uv_stream_s;
+#ifdef _OS_WINDOWS_
+typedef HANDLE jl_uv_os_fd_t;
+#else
+typedef int jl_uv_os_fd_t;
+#endif
+#define JL_STREAM struct uv_stream_s
 #define JL_STDOUT jl_uv_stdout
 #define JL_STDERR jl_uv_stderr
 #define JL_STDIN  jl_uv_stdin
 
 JL_DLLEXPORT int jl_process_events(void);
 
-JL_DLLEXPORT uv_loop_t *jl_global_event_loop(void);
+JL_DLLEXPORT struct uv_loop_s *jl_global_event_loop(void);
 
-JL_DLLEXPORT void jl_close_uv(uv_handle_t *handle);
+JL_DLLEXPORT void jl_close_uv(struct uv_handle_s *handle);
 
 JL_DLLEXPORT jl_array_t *jl_take_buffer(ios_t *s);
 
 typedef struct {
     void *data;
-    uv_loop_t *loop;
-    uv_handle_type type;
-    uv_os_fd_t file;
+    struct uv_loop_s *loop;
+    int type; // enum uv_handle_type
+    jl_uv_os_fd_t file;
 } jl_uv_file_t;
 
 #ifdef __GNUC__
@@ -1856,10 +2037,10 @@ typedef struct {
 #define _JL_FORMAT_ATTR(type, str, arg)
 #endif
 
-JL_DLLEXPORT void jl_uv_puts(uv_stream_t *stream, const char *str, size_t n);
-JL_DLLEXPORT int jl_printf(uv_stream_t *s, const char *format, ...)
+JL_DLLEXPORT void jl_uv_puts(struct uv_stream_s *stream, const char *str, size_t n);
+JL_DLLEXPORT int jl_printf(struct uv_stream_s *s, const char *format, ...)
     _JL_FORMAT_ATTR(printf, 2, 3);
-JL_DLLEXPORT int jl_vprintf(uv_stream_t *s, const char *format, va_list args)
+JL_DLLEXPORT int jl_vprintf(struct uv_stream_s *s, const char *format, va_list args)
     _JL_FORMAT_ATTR(printf, 2, 0);
 JL_DLLEXPORT void jl_safe_printf(const char *str, ...) JL_NOTSAFEPOINT
     _JL_FORMAT_ATTR(printf, 1, 2);
@@ -1878,59 +2059,17 @@ JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT;
-JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT; // deprecated
 // Mainly for debugging, use `void*` so that no type cast is needed in C++.
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT;
 
 // julia options -----------------------------------------------------------
-// NOTE: This struct needs to be kept in sync with JLOptions type in base/options.jl
-typedef struct {
-    int8_t quiet;
-    int8_t banner;
-    const char *julia_bindir;
-    const char *julia_bin;
-    const char **cmds;
-    const char *image_file;
-    const char *cpu_target;
-    int32_t nthreads;
-    int32_t nprocs;
-    const char *machine_file;
-    const char *project;
-    int8_t isinteractive;
-    int8_t color;
-    int8_t historyfile;
-    int8_t startupfile;
-    int8_t compile_enabled;
-    int8_t code_coverage;
-    int8_t malloc_log;
-    int8_t opt_level;
-    int8_t debug_level;
-    int8_t check_bounds;
-    int8_t depwarn;
-    int8_t warn_overwrite;
-    int8_t can_inline;
-    int8_t polly;
-    const char *trace_compile;
-    int8_t fast_math;
-    int8_t worker;
-    const char *cookie;
-    int8_t handle_signals;
-    int8_t use_sysimage_native_code;
-    int8_t use_compiled_modules;
-    const char *bindto;
-    const char *outputbc;
-    const char *outputunoptbc;
-    const char *outputo;
-    const char *outputasm;
-    const char *outputji;
-    const char *output_code_coverage;
-    int8_t incremental;
-    int8_t image_file_specified;
-    int8_t warn_scope;
-    int8_t image_codegen;
-} jl_options_t;
-
-extern JL_DLLEXPORT jl_options_t jl_options;
+
+#include "jloptions.h"
+
+extern JL_DLLIMPORT jl_options_t jl_options;
+
 JL_DLLEXPORT ssize_t jl_sizeof_jl_options(void);
 
 // Parse an argc/argv pair to extract general julia options, passing back out
@@ -1949,6 +2088,7 @@ JL_DLLEXPORT int jl_generating_output(void) JL_NOTSAFEPOINT;
 #define JL_LOG_NONE 0
 #define JL_LOG_USER 1
 #define JL_LOG_ALL  2
+#define JL_LOG_PATH 3
 
 #define JL_OPTIONS_CHECK_BOUNDS_DEFAULT 0
 #define JL_OPTIONS_CHECK_BOUNDS_ON 1
@@ -2025,13 +2165,13 @@ typedef struct {
     float value;
 } jl_nullable_float32_t;
 
-#define jl_current_task (jl_get_ptls_states()->current_task)
-#define jl_root_task (jl_get_ptls_states()->root_task)
+#define jl_root_task (jl_current_task->ptls->root_task)
 
-JL_DLLEXPORT jl_value_t *jl_get_current_task(void);
+JL_DLLEXPORT jl_task_t *jl_get_current_task(void) JL_NOTSAFEPOINT;
 
-JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void);
-JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *);
+// TODO: we need to pin the task while using this (set pure bit)
+JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *) JL_NOTSAFEPOINT;
 
 // codegen interface ----------------------------------------------------------
 // The root propagation here doesn't have to be literal, but callers should
@@ -2055,26 +2195,8 @@ typedef struct {
     // generic_context(f, args...) instead of f(args...).
     jl_value_t *generic_context;
 } jl_cgparams_t;
-extern JL_DLLEXPORT jl_cgparams_t jl_default_cgparams;
 extern JL_DLLEXPORT int jl_default_debug_info_kind;
 
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
-#define JULIA_DEFINE_FAST_TLS()                                                             \
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_static(void)                        \
-{                                                                                           \
-    static __attribute__((tls_model("local-exec"))) __thread jl_tls_states_t tls_states;    \
-    return &tls_states;                                                                     \
-}                                                                                           \
-__attribute__((constructor)) void jl_register_ptls_states_getter(void)                      \
-{                                                                                           \
-    /* We need to make sure this function is called before any reference to */              \
-    /* TLS variables. */                                                                    \
-    jl_set_ptls_states_getter(jl_get_ptls_states_static);                                   \
-}
-#else
-#define JULIA_DEFINE_FAST_TLS()
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/julia_assert.h b/src/julia_assert.h
index 6cf89d0e470a59..4b120fd9e845b3 100644
--- a/src/julia_assert.h
+++ b/src/julia_assert.h
@@ -21,7 +21,10 @@
 #  endif
 #else
 #  ifdef JL_NDEBUG
-#    undef JL_NDEBUG
+#    define NDEBUG
+#    include <assert.h>
+#    undef NDEBUG
+#  else
+#    include <assert.h>
 #  endif
-#  include <assert.h>
 #endif
diff --git a/src/julia_atomics.h b/src/julia_atomics.h
new file mode 100644
index 00000000000000..cb14e535cd0103
--- /dev/null
+++ b/src/julia_atomics.h
@@ -0,0 +1,310 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_ATOMICS_H
+#define JL_ATOMICS_H
+
+#if defined(__i386__) && defined(__GNUC__) && !defined(__SSE2__)
+#  error Julia can only be built for architectures above Pentium 4. Pass -march=pentium4, or set MARCH=pentium4 and ensure that -march is not passed separately with an older architecture.
+#endif
+
+// Low-level atomic operations
+#ifdef __cplusplus
+#include <atomic>
+using std::memory_order_relaxed;
+using std::memory_order_consume;
+using std::memory_order_acquire;
+using std::memory_order_release;
+using std::memory_order_acq_rel;
+using std::memory_order_seq_cst;
+using std::atomic_thread_fence;
+using std::atomic_signal_fence;
+using std::atomic_load;
+using std::atomic_load_explicit;
+using std::atomic_store;
+using std::atomic_store_explicit;
+using std::atomic_fetch_add;
+using std::atomic_fetch_add_explicit;
+using std::atomic_fetch_and;
+using std::atomic_fetch_and_explicit;
+using std::atomic_fetch_or;
+using std::atomic_fetch_or_explicit;
+using std::atomic_compare_exchange_strong;
+using std::atomic_compare_exchange_strong_explicit;
+using std::atomic_exchange;
+using std::atomic_exchange_explicit;
+extern "C" {
+#define _Atomic(T) std::atomic<T>
+#else
+#include <stdatomic.h>
+#endif
+#include <signal.h> // for sig_atomic_t
+
+#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
+#  include <immintrin.h>
+#endif
+
+enum jl_memory_order {
+    jl_memory_order_unspecified = -2,
+    jl_memory_order_invalid = -1,
+    jl_memory_order_notatomic = 0,
+    jl_memory_order_unordered,
+    jl_memory_order_monotonic,
+    jl_memory_order_consume,
+    jl_memory_order_acquire,
+    jl_memory_order_release,
+    jl_memory_order_acq_rel,
+    jl_memory_order_seq_cst
+};
+
+/**
+ * Thread synchronization primitives:
+ *
+ * These roughly follows the c11/c++11 memory model and the act as memory
+ * barriers at both the compiler level and the hardware level.
+ * The only exception is the GC safepoint and GC state transitions for which
+ * we use only a compiler (signal) barrier and use the signal handler to do the
+ * synchronization in order to lower the mutator overhead as much as possible.
+ *
+ * We use the compiler intrinsics to implement a similar API to the c11/c++11
+ * one instead of using it directly because, we need interoperability between
+ * code written in different languages. The current c++ standard (c++14) does
+ * not allow using c11 atomic functions or types and there's currently no
+ * guarantee that the two types are compatible (although most of them probably
+ * are). We also need to access these atomic variables from the LLVM JIT code
+ * which is very hard unless the layout of the object is fully specified.
+ */
+#define jl_fence() atomic_thread_fence(memory_order_seq_cst)
+#define jl_fence_release() atomic_thread_fence(memory_order_release)
+#define jl_signal_fence() atomic_signal_fence(memory_order_seq_cst)
+
+#ifdef __cplusplus
+}
+// implicit conversion wasn't correctly specified 2017, so many compilers get
+// this wrong thus we include the correct definitions here (with implicit
+// conversion), instead of using the macro version
+template<class T>
+T jl_atomic_load(std::atomic<T> *ptr)
+{
+     return std::atomic_load<T>(ptr);
+}
+template<class T>
+T jl_atomic_load_explicit(std::atomic<T> *ptr, std::memory_order order)
+{
+     return std::atomic_load_explicit<T>(ptr, order);
+}
+#define jl_atomic_load_relaxed(ptr) jl_atomic_load_explicit(ptr, memory_order_relaxed)
+#define jl_atomic_load_acquire(ptr) jl_atomic_load_explicit(ptr, memory_order_acquire)
+template<class T, class S>
+void jl_atomic_store(std::atomic<T> *ptr, S desired)
+{
+     std::atomic_store<T>(ptr, desired);
+}
+template<class T, class S>
+void jl_atomic_store_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+     std::atomic_store_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_store_relaxed(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_relaxed)
+#define jl_atomic_store_release(ptr, val) jl_atomic_store_explicit(ptr, val, memory_order_release)
+template<class T, class S>
+T jl_atomic_fetch_add(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_add<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_add_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_add_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_add_relaxed(ptr, val) jl_atomic_fetch_add_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_and(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_and<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_and_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_and_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_and_relaxed(ptr, val) jl_atomic_fetch_and_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_fetch_or(std::atomic<T> *ptr, S val)
+{
+     return std::atomic_fetch_or<T>(ptr, val);
+}
+template<class T, class S>
+T jl_atomic_fetch_or_explicit(std::atomic<T> *ptr, S val, std::memory_order order)
+{
+     return std::atomic_fetch_or_explicit<T>(ptr, val, order);
+}
+#define jl_atomic_fetch_or_relaxed(ptr, val) jl_atomic_fetch_or_explicit(ptr, val, memory_order_relaxed)
+template<class T, class S>
+bool jl_atomic_cmpswap(std::atomic<T> *ptr, T *expected, S val)
+{
+     return std::atomic_compare_exchange_strong<T>(ptr, expected, val);
+}
+template<class T, class S>
+bool jl_atomic_cmpswap_explicit(std::atomic<T> *ptr, T *expected, S val, std::memory_order order)
+{
+     return std::atomic_compare_exchange_strong_explicit<T>(ptr, expected, val, order, order);
+}
+#define jl_atomic_cmpswap_relaxed(ptr, expected, val) jl_atomic_cmpswap_explicit(ptr, expected, val, memory_order_relaxed)
+template<class T, class S>
+T jl_atomic_exchange(std::atomic<T> *ptr, S desired)
+{
+     return std::atomic_exchange<T>(ptr, desired);
+}
+template<class T, class S>
+T jl_atomic_exchange_explicit(std::atomic<T> *ptr, S desired, std::memory_order order)
+{
+     return std::atomic_exchange_explicit<T>(ptr, desired, order);
+}
+#define jl_atomic_exchange_relaxed(ptr, val) jl_atomic_exchange_explicit(ptr, val, memory_order_relaxed)
+extern "C" {
+#else
+
+#  define jl_atomic_fetch_add_relaxed(obj, arg)         \
+    atomic_fetch_add_explicit(obj, arg, memory_order_relaxed)
+#  define jl_atomic_fetch_add(obj, arg)                 \
+    atomic_fetch_add(obj, arg)
+#  define jl_atomic_fetch_and_relaxed(obj, arg)         \
+    atomic_fetch_and_explicit(obj, arg, memory_order_relaxed)
+#  define jl_atomic_fetch_and(obj, arg)                 \
+    atomic_fetch_and(obj, arg)
+#  define jl_atomic_fetch_or_relaxed(obj, arg)          \
+    atomic_fetch_or_explicit(obj, arg, __ATOMIC_RELAXED)
+#  define jl_atomic_fetch_or(obj, arg)                  \
+    atomic_fetch_or(obj, arg)
+#  define jl_atomic_cmpswap(obj, expected, desired)     \
+    atomic_compare_exchange_strong(obj, expected, desired)
+#  define jl_atomic_cmpswap_relaxed(obj, expected, desired) \
+    atomic_compare_exchange_strong_explicit(obj, expected, desired, memory_order_relaxed, memory_order_relaxed)
+// TODO: Maybe add jl_atomic_cmpswap_weak for spin lock
+#  define jl_atomic_exchange(obj, desired)       \
+    atomic_exchange(obj, desired)
+#  define jl_atomic_exchange_relaxed(obj, desired)      \
+    atomic_exchange_explicit(obj, desired, memory_order_relaxed)
+#  define jl_atomic_store(obj, val)                     \
+    atomic_store(obj, val)
+#  define jl_atomic_store_relaxed(obj, val)             \
+    atomic_store_explicit(obj, val, memory_order_relaxed)
+
+#  if defined(__clang__) || !(defined(_CPU_X86_) || defined(_CPU_X86_64_))
+// Clang doesn't have this bug...
+#    define jl_atomic_store_release(obj, val)           \
+    atomic_store_explicit(obj, val, memory_order_release)
+#  else
+// Workaround a GCC bug when using store with release order by using the
+// stronger version instead.
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67458
+// fixed in https://gcc.gnu.org/git/?p=gcc.git&a=commit;h=d8c40eff56f69877b33c697ded756d50fde90c27
+#    define jl_atomic_store_release(obj, val) do {      \
+        jl_signal_fence();                              \
+        atomic_store_explicit(obj, val, memory_order_release);   \
+    } while (0)
+#  endif
+#  define jl_atomic_load(obj)                   \
+    atomic_load(obj)
+#  define jl_atomic_load_acquire(obj)           \
+    atomic_load_explicit(obj, memory_order_acquire)
+#ifdef _COMPILER_TSAN_ENABLED_
+// For the sake of tsan, call these loads consume ordering since they will act
+// as such on the processors we support while normally, the compiler would
+// upgrade this to acquire ordering, which is strong (and slower) than we want.
+#  define jl_atomic_load_relaxed(obj)           \
+    atomic_load_explicit(obj, memory_order_consume)
+#else
+#  define jl_atomic_load_relaxed(obj)           \
+    atomic_load_explicit(obj, memory_order_relaxed)
+#endif
+#endif
+
+#ifdef __clang_gcanalyzer__
+// for the purposes of the GC analyzer, we can turn these into non-atomic
+// expressions with similar properties (for the sake of the analyzer, we don't
+// care if it is an exact match for behavior)
+
+#undef _Atomic
+#define _Atomic(T) T
+
+#undef jl_atomic_exchange
+#undef jl_atomic_exchange_relaxed
+#define jl_atomic_exchange(obj, desired) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *p__analyzer__ = (desired); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_exchange_relaxed jl_atomic_exchange
+
+#undef jl_atomic_cmpswap
+#undef jl_atomic_cmpswap_relaxed
+#define jl_atomic_cmpswap(obj, expected, desired) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            __typeof__((expected)) x__analyzer__ = (expected); \
+            int eq__analyzer__ = memcmp(&temp__analyzer__, x__analyzer__, sizeof(temp__analyzer__)) == 0; \
+            if (eq__analyzer__) \
+                *p__analyzer__ = (desired); \
+            else \
+                *x__analyzer__ = temp__analyzer__; \
+            eq__analyzer__; \
+        }))
+#define jl_atomic_cmpswap_relaxed jl_atomic_cmpswap
+
+#undef jl_atomic_store
+#undef jl_atomic_store_release
+#undef jl_atomic_store_relaxed
+#define jl_atomic_store(obj, val)         (*(obj) = (val))
+#define jl_atomic_store_release jl_atomic_store
+#define jl_atomic_store_relaxed jl_atomic_store
+
+#undef jl_atomic_load
+#undef jl_atomic_load_acquire
+#undef jl_atomic_load_relaxed
+#define jl_atomic_load(obj)         (*(obj))
+#define jl_atomic_load_acquire jl_atomic_load
+#define jl_atomic_load_relaxed jl_atomic_load
+
+#undef jl_atomic_fetch_add
+#undef jl_atomic_fetch_and
+#undef jl_atomic_fetch_or
+#undef jl_atomic_fetch_add_relaxed
+#undef jl_atomic_fetch_and_relaxed
+#undef jl_atomic_fetch_or_relaxed
+#define jl_atomic_fetch_add(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ + (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_and(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ & (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_or(obj, val) \
+    (__extension__({ \
+            __typeof__((obj)) p__analyzer__ = (obj); \
+            __typeof__(*p__analyzer__) temp__analyzer__ = *p__analyzer__; \
+            *(p__analyzer__) = temp__analyzer__ | (val); \
+            temp__analyzer__; \
+        }))
+#define jl_atomic_fetch_add_relaxed jl_atomic_fetch_add
+#define jl_atomic_fetch_and_relaxed jl_atomic_fetch_and
+#define jl_atomic_fetch_or_relaxed jl_atomic_fetch_or
+
+#endif
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // JL_ATOMICS_H
diff --git a/src/julia_fasttls.h b/src/julia_fasttls.h
new file mode 100644
index 00000000000000..08f465badf8d3f
--- /dev/null
+++ b/src/julia_fasttls.h
@@ -0,0 +1,44 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_FASTTLS_H
+#define JL_FASTTLS_H
+
+// Thread-local storage access
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Bring in definitions for `_OS_X_`, `JL_PATH_MAX` and `PATHSEPSTRING`, `jl_ptls_t`, etc... */
+#include "platform.h"
+#include "dirpath.h"
+
+typedef struct _jl_gcframe_t jl_gcframe_t;
+
+#if defined(_OS_DARWIN_)
+#include <pthread.h>
+typedef void *(jl_get_pgcstack_func)(pthread_key_t); // aka typeof(pthread_getspecific)
+#else
+typedef jl_gcframe_t **(jl_get_pgcstack_func)(void);
+#endif
+
+#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
+#define JULIA_DEFINE_FAST_TLS                                                                   \
+static __attribute__((tls_model("local-exec"))) __thread jl_gcframe_t **jl_pgcstack_localexec;  \
+JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack_static(void)                                        \
+{                                                                                               \
+    return jl_pgcstack_localexec;                                                               \
+}                                                                                               \
+JL_DLLEXPORT jl_gcframe_t ***jl_pgcstack_addr_static(void)                                      \
+{                                                                                               \
+    return &jl_pgcstack_localexec;                                                              \
+}
+#else
+#define JULIA_DEFINE_FAST_TLS
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/julia_gcext.h b/src/julia_gcext.h
index f931b1a4d90616..6787dafb4b7ee6 100644
--- a/src/julia_gcext.h
+++ b/src/julia_gcext.h
@@ -49,6 +49,8 @@ JL_DLLEXPORT jl_datatype_t *jl_new_foreign_type(
         int haspointers,
         int large);
 
+JL_DLLEXPORT int jl_is_foreign_type(jl_datatype_t *dt);
+
 JL_DLLEXPORT size_t jl_gc_max_internal_obj_size(void);
 JL_DLLEXPORT size_t jl_gc_external_obj_hdr_size(void);
 
@@ -125,8 +127,18 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p);
 // the size of that stack buffer upon return. Also, if task is a thread's
 // current task, that thread's id will be stored in *tid; otherwise,
 // *tid will be set to -1.
+//
+// DEPRECATED: use jl_active_task_stack() instead.
 JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid);
 
+// Query the active and total stack range for the given task, and set
+// *active_start and *active_end respectively *total_start and *total_end
+// accordingly. The range for the active part is a best-effort approximation
+// and may not be tight.
+JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
+                                       char **active_start, char **active_end,
+                                       char **total_start, char **total_end);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/julia_internal.h b/src/julia_internal.h
index 79079504c9bbad..02130ef963198f 100644
--- a/src/julia_internal.h
+++ b/src/julia_internal.h
@@ -4,8 +4,16 @@
 #define JL_INTERNAL_H
 
 #include "options.h"
-#include "locks.h"
+#include "julia_locks.h"
+#include "support/utils.h"
+#include "support/hashing.h"
+#include "support/ptrhash.h"
+#include "support/strtod.h"
+#include "gc-alloc-profiler.h"
+#include "support/rle.h"
 #include <uv.h>
+#include <llvm-c/Types.h>
+#include <llvm-c/Orc.h>
 #if !defined(_WIN32)
 #include <unistd.h>
 #else
@@ -15,11 +23,11 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
-#ifdef JL_ASAN_ENABLED
+#ifdef _COMPILER_ASAN_ENABLED_
 void __sanitizer_start_switch_fiber(void**, const void*, size_t);
 void __sanitizer_finish_switch_fiber(void*, const void**, size_t*);
 #endif
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 void *__tsan_create_fiber(unsigned flags);
 void *__tsan_get_current_fiber(void);
 void __tsan_destroy_fiber(void *fiber);
@@ -69,13 +77,45 @@ void __tsan_switch_to_fiber(void *fiber, unsigned flags);
 #  define JL_USE_IFUNC 0
 #endif
 
+// If we've smashed the stack, (and not just normal NORETURN)
+// this will smash stack-unwind too
+#ifdef _OS_WINDOWS_
+#if defined(_CPU_X86_64_)
+    // install the unhandled exception handler at the top of our stack
+    // to call directly into our personality handler
+#define CFI_NORETURN \
+    asm volatile ("\t.seh_handler __julia_personality, @except\n\t.text");
+#else
+#define CFI_NORETURN
+#endif
+#else
+// wipe out the call-stack unwind capability beyond this function
+// (we are noreturn, so it is not a total lie)
+#if defined(_CPU_X86_64_)
+// per nongnu libunwind: "x86_64 ABI specifies that end of call-chain is marked with a NULL RBP or undefined return address"
+// so we do all 3, to be extra certain of it
+#define CFI_NORETURN \
+    asm volatile ("\t.cfi_undefined rip"); \
+    asm volatile ("\t.cfi_undefined rbp"); \
+    asm volatile ("\t.cfi_return_column rbp");
+#else
+    // per nongnu libunwind: "DWARF spec says undefined return address location means end of stack"
+    // we use whatever happens to be register 1 on this platform for this
+#define CFI_NORETURN \
+    asm volatile ("\t.cfi_undefined 1"); \
+    asm volatile ("\t.cfi_return_column 1");
+#endif
+#endif
+
+extern JL_DLLEXPORT uintptr_t __stack_chk_guard;
+
 // If this is detected in a backtrace of segfault, it means the functions
 // that use this value must be reworked into their async form with cb arg
 // provided and with JL_UV_LOCK used around the calls
 static uv_loop_t *const unused_uv_loop_arg = (uv_loop_t *)0xBAD10;
 
 extern jl_mutex_t jl_uv_mutex;
-extern int jl_uv_n_waiters;
+extern _Atomic(int) jl_uv_n_waiters;
 void JL_UV_LOCK(void);
 #define JL_UV_UNLOCK() JL_UNLOCK(&jl_uv_mutex)
 
@@ -83,13 +123,19 @@ void JL_UV_LOCK(void);
 extern "C" {
 #endif
 
+int jl_running_under_rr(int recheck) JL_NOTSAFEPOINT;
+
 //--------------------------------------------------
 // timers
 // Returns time in nanosec
-JL_DLLEXPORT uint64_t jl_hrtime(void);
+JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT void jl_set_peek_cond(uintptr_t);
+JL_DLLEXPORT double jl_get_profile_peek_duration(void);
+JL_DLLEXPORT void jl_set_profile_peek_duration(double);
 
 // number of cycles since power-on
-static inline uint64_t cycleclock(void)
+static inline uint64_t cycleclock(void) JL_NOTSAFEPOINT
 {
 #if defined(_CPU_X86_64_)
     uint64_t low, high;
@@ -107,6 +153,12 @@ static inline uint64_t cycleclock(void)
     int64_t virtual_timer_value;
     __asm__ volatile("mrs %0, cntvct_el0" : "=r"(virtual_timer_value));
     return virtual_timer_value;
+#elif defined(_CPU_PPC64_)
+    // This returns a time-base, which is not always precisely a cycle-count.
+    // https://reviews.llvm.org/D78084
+    int64_t tb;
+    asm volatile("mfspr %0, 268" : "=r" (tb));
+    return tb;
 #else
     #warning No cycleclock() definition for your platform
     // copy from https://github.com/google/benchmark/blob/v1.5.0/src/cycleclock.h
@@ -116,11 +168,12 @@ static inline uint64_t cycleclock(void)
 
 #include "timing.h"
 
-#ifdef _COMPILER_MICROSOFT_
-#  define jl_return_address() ((uintptr_t)_ReturnAddress())
-#else
-#  define jl_return_address() ((uintptr_t)__builtin_return_address(0))
-#endif
+// Global *atomic* integers controlling *process-wide* measurement of compilation time.
+extern JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled;
+extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time;
+extern JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time;
+
+#define jl_return_address() ((uintptr_t)__builtin_return_address(0))
 
 STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 {
@@ -134,6 +187,28 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 }
 
 
+// this is a version of memcpy that preserves atomic memory ordering
+// which makes it safe to use for objects that can contain memory references
+// without risk of creating pointers out of thin air
+// TODO: replace with LLVM's llvm.memmove.element.unordered.atomic.p0i8.p0i8.i32
+//       aka `__llvm_memmove_element_unordered_atomic_8` (for 64 bit)
+static inline void memmove_refs(void **dstp, void *const *srcp, size_t n) JL_NOTSAFEPOINT
+{
+    size_t i;
+    _Atomic(void*) *srcpa = (_Atomic(void*)*)srcp;
+    _Atomic(void*) *dstpa = (_Atomic(void*)*)dstp;
+    if (dstp < srcp || dstp > srcp + n) {
+        for (i = 0; i < n; i++) {
+            jl_atomic_store_relaxed(dstpa + i, jl_atomic_load_relaxed(srcpa + i));
+        }
+    }
+    else {
+        for (i = 0; i < n; i++) {
+            jl_atomic_store_relaxed(dstpa + n - i - 1, jl_atomic_load_relaxed(srcpa + n - i - 1));
+        }
+    }
+}
+
 // -- gc.c -- //
 
 #define GC_CLEAN  0 // freshly allocated
@@ -144,28 +219,29 @@ STATIC_INLINE uint32_t jl_int32hash_fast(uint32_t a)
 // useful constants
 extern jl_methtable_t *jl_type_type_mt JL_GLOBALLY_ROOTED;
 extern jl_methtable_t *jl_nonfunction_mt JL_GLOBALLY_ROOTED;
-JL_DLLEXPORT extern size_t jl_world_counter;
+extern JL_DLLEXPORT _Atomic(size_t) jl_world_counter;
 
 typedef void (*tracer_cb)(jl_value_t *tracee);
 extern tracer_cb jl_newmeth_tracer;
 void jl_call_tracer(tracer_cb callback, jl_value_t *tracee);
 void print_func_loc(JL_STREAM *s, jl_method_t *m);
 extern jl_array_t *_jl_debug_method_invalidation JL_GLOBALLY_ROOTED;
+void invalidate_backedges(void (*f)(jl_code_instance_t*), jl_method_instance_t *replaced_mi, size_t max_world, const char *why);
 
-extern size_t jl_page_size;
+extern JL_DLLEXPORT size_t jl_page_size;
 extern jl_function_t *jl_typeinf_func;
-extern size_t jl_typeinf_world;
-extern jl_typemap_entry_t *call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
+extern JL_DLLEXPORT size_t jl_typeinf_world;
+extern _Atomic(jl_typemap_entry_t*) call_cache[N_CALL_CACHE] JL_GLOBALLY_ROOTED;
 extern jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 
 JL_DLLEXPORT extern int jl_lineno;
 JL_DLLEXPORT extern const char *jl_filename;
 
-JL_DLLEXPORT jl_value_t *jl_gc_pool_alloc(jl_ptls_t ptls, int pool_offset,
-                                          int osize);
-JL_DLLEXPORT jl_value_t *jl_gc_big_alloc(jl_ptls_t ptls, size_t allocsz);
-int jl_gc_classify_pools(size_t sz, int *osize);
-extern jl_mutex_t gc_perm_lock;
+jl_value_t *jl_gc_pool_alloc_noinline(jl_ptls_t ptls, int pool_offset,
+                                   int osize);
+jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
+JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize);
+extern uv_mutex_t gc_perm_lock;
 void *jl_gc_perm_alloc_nolock(size_t sz, int zero,
     unsigned align, unsigned offset) JL_NOTSAFEPOINT;
 void *jl_gc_perm_alloc(size_t sz, int zero,
@@ -186,12 +262,15 @@ static const int jl_gc_sizeclasses[] = {
     4, 8, 12,
 #endif
 
-    // 16 pools at 16-byte spacing
-    16, 32, 48, 64, 80, 96, 112, 128,
+    // 16 pools at 8-byte spacing
+    // the 8-byte aligned pools are only used for Strings
+    16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136,
+    // 8 pools at 16-byte spacing
     144, 160, 176, 192, 208, 224, 240, 256,
 
     // the following tables are computed for maximum packing efficiency via the formula:
-    // sz=(div(2^14-8,rng)÷16)*16; hcat(sz, (2^14-8)÷sz, 2^14-(2^14-8)÷sz.*sz)'
+    // pg = 2^14
+    // sz = (div.(pg-8, rng).÷16)*16; hcat(sz, (pg-8).÷sz, pg .- (pg-8).÷sz.*sz)'
 
     // rng = 60:-4:32 (8 pools)
     272, 288, 304, 336, 368, 400, 448, 496,
@@ -232,15 +311,14 @@ STATIC_INLINE int jl_gc_alignment(size_t sz)
 }
 JL_DLLEXPORT int jl_alignment(size_t sz);
 
-// the following table is computed from jl_gc_sizeclasses via the formula:
-// [searchsortedfirst(TABLE, i) for i = 0:16:table[end]]
-static const uint8_t szclass_table[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 30, 31, 31, 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 34, 34, 34, 34, 34, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 36, 37, 37, 37, 37, 37, 37, 37, 37, 38, 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40};
+// the following table is computed as:
+// [searchsortedfirst(jl_gc_sizeclasses, i) - 1 for i = 0:16:jl_gc_sizeclasses[end]]
+static const uint8_t szclass_table[] = {0, 1, 3, 5, 7, 9, 11, 13, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 29, 29, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 37, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, 41, 41, 42, 42, 42, 42, 42, 43, 43, 43, 43, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, 46, 46, 46, 46, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48};
 static_assert(sizeof(szclass_table) == 128, "");
 
 STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
 {
     assert(sz <= 2032);
-    uint8_t klass = szclass_table[(sz + 15) / 16];
 #ifdef _P64
     if (sz <= 8)
         return 0;
@@ -254,14 +332,31 @@ STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass(unsigned sz)
         return (sz >= 8 ? 2 : (sz >= 4 ? 1 : 0));
     const int N = 2;
 #endif
+    uint8_t klass = szclass_table[(sz + 15) / 16];
     return klass + N;
 }
 
+STATIC_INLINE uint8_t JL_CONST_FUNC jl_gc_szclass_align8(unsigned sz)
+{
+    if (sz >= 16 && sz <= 152) {
+#ifdef _P64
+        const int N = 0;
+#elif MAX_ALIGN == 8
+        const int N = 1;
+#else
+        const int N = 2;
+#endif
+        return (sz + 7)/8 - 1 + N;
+    }
+    return jl_gc_szclass(sz);
+}
+
 #define JL_SMALL_BYTE_ALIGNMENT 16
 #define JL_CACHE_BYTE_ALIGNMENT 64
 // JL_HEAP_ALIGNMENT is the maximum alignment that the GC can provide
 #define JL_HEAP_ALIGNMENT JL_SMALL_BYTE_ALIGNMENT
 #define GC_MAX_SZCLASS (2032-sizeof(void*))
+static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, "");
 
 STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
 {
@@ -271,16 +366,26 @@ STATIC_INLINE jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty)
         int pool_id = jl_gc_szclass(allocsz);
         jl_gc_pool_t *p = &ptls->heap.norm_pools[pool_id];
         int osize = jl_gc_sizeclasses[pool_id];
-        v = jl_gc_pool_alloc(ptls, (char*)p - (char*)ptls, osize);
+        // We call `jl_gc_pool_alloc_noinline` instead of `jl_gc_pool_alloc` to avoid double-counting in
+        // the Allocations Profiler. (See https://github.com/JuliaLang/julia/pull/43868 for more details.)
+        v = jl_gc_pool_alloc_noinline(ptls, (char*)p - (char*)ptls, osize);
     }
     else {
         if (allocsz < sz) // overflow in adding offs, size was "negative"
             jl_throw(jl_memory_exception);
-        v = jl_gc_big_alloc(ptls, allocsz);
+        v = jl_gc_big_alloc_noinline(ptls, allocsz);
     }
     jl_set_typeof(v, ty);
+    maybe_record_alloc_to_profile(v, sz, (jl_datatype_t*)ty);
     return v;
 }
+
+/* Programming style note: When using jl_gc_alloc, do not JL_GC_PUSH it into a
+ * gc frame, until it has been fully initialized. An uninitialized value in a
+ * gc frame can crash upon encountering the first safepoint. By delaying use of
+ * the JL_GC_PUSH macro until the value has been initialized, any accidental
+ * safepoints will be caught by the GC analyzer.
+ */
 JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty);
 // On GCC, only inline when sz is constant
 #ifdef __GNUC__
@@ -301,7 +406,7 @@ STATIC_INLINE jl_gc_tracked_buffer_t *jl_gc_alloc_buf(jl_ptls_t ptls, size_t sz)
     return jl_gc_alloc(ptls, sz, (void*)jl_buff_tag);
 }
 
-STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty)
+STATIC_INLINE jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT
 {
     const size_t allocsz = sz + sizeof(jl_taggedvalue_t);
     unsigned align = (sz == 0 ? sizeof(void*) : (allocsz <= sizeof(void*) * 2 ?
@@ -319,8 +424,8 @@ jl_value_t *jl_permbox64(jl_datatype_t *t, int64_t x);
 jl_svec_t *jl_perm_symsvec(size_t n, ...);
 
 // this sizeof(__VA_ARGS__) trick can't be computed until C11, but that only matters to Clang in some situations
-#if !defined(__clang_analyzer__) && !(defined(JL_ASAN_ENABLED) || defined(JL_TSAN_ENABLED))
-#ifdef __GNUC__
+#if !defined(__clang_analyzer__) && !(defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_TSAN_ENABLED_))
+#ifdef _COMPILER_GCC_
 #define jl_perm_symsvec(n, ...) \
     (jl_perm_symsvec)(__extension__({                                         \
             static_assert(                                                    \
@@ -328,6 +433,16 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
                 "Number of passed arguments does not match expected number"); \
             n;                                                                \
         }), __VA_ARGS__)
+#ifdef jl_svec
+#undef jl_svec
+#define jl_svec(n, ...) \
+    (ijl_svec)(__extension__({                                                \
+            static_assert(                                                    \
+                n == sizeof((void *[]){ __VA_ARGS__ })/sizeof(void *),        \
+                "Number of passed arguments does not match expected number"); \
+            n;                                                                \
+        }), __VA_ARGS__)
+#else
 #define jl_svec(n, ...) \
     (jl_svec)(__extension__({                                                 \
             static_assert(                                                    \
@@ -337,6 +452,7 @@ jl_svec_t *jl_perm_symsvec(size_t n, ...);
         }), __VA_ARGS__)
 #endif
 #endif
+#endif
 
 jl_value_t *jl_gc_realloc_string(jl_value_t *s, size_t sz);
 JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz);
@@ -348,30 +464,30 @@ JL_DLLEXPORT int64_t jl_gc_diff_total_bytes(void) JL_NOTSAFEPOINT;
 JL_DLLEXPORT int64_t jl_gc_sync_total_bytes(int64_t offset) JL_NOTSAFEPOINT;
 void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT;
 void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT;
-void jl_gc_run_all_finalizers(jl_ptls_t ptls);
+void jl_gc_run_all_finalizers(jl_task_t *ct);
 void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task);
 
-void gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT;
 void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT;
 
 STATIC_INLINE void jl_gc_wb_binding(jl_binding_t *bnd, void *val) JL_NOTSAFEPOINT // val isa jl_value_t*
 {
     if (__unlikely(jl_astaggedvalue(bnd)->bits.gc == 3 &&
                    (jl_astaggedvalue(val)->bits.gc & 1) == 0))
-        gc_queue_binding(bnd);
+        jl_gc_queue_binding(bnd);
 }
 
 STATIC_INLINE void jl_gc_wb_buf(void *parent, void *bufptr, size_t minsz) JL_NOTSAFEPOINT // parent isa jl_value_t*
 {
     // if parent is marked and buf is not
     if (__unlikely(jl_astaggedvalue(parent)->bits.gc & 1)) {
-        jl_ptls_t ptls = jl_get_ptls_states();
-        gc_setmark_buf(ptls, bufptr, 3, minsz);
+        jl_task_t *ct = jl_current_task;
+        gc_setmark_buf(ct->ptls, bufptr, 3, minsz);
     }
 }
 
-void gc_debug_print_status(void);
-void gc_debug_critical_error(void);
+void jl_gc_debug_print_status(void);
+JL_DLLEXPORT void jl_gc_debug_critical_error(void);
 void jl_print_gc_stats(JL_STREAM *s);
 void jl_gc_reset_alloc_count(void);
 uint32_t jl_get_gs_ctr(void);
@@ -382,22 +498,38 @@ STATIC_INLINE jl_value_t *undefref_check(jl_datatype_t *dt, jl_value_t *v) JL_NO
      if (dt->layout->first_ptr >= 0) {
         jl_value_t *nullp = ((jl_value_t**)v)[dt->layout->first_ptr];
         if (__unlikely(nullp == NULL))
-            jl_throw(jl_undefref_exception);
+            return NULL;
     }
     return v;
 }
 
+// -- helper types -- //
+
+typedef struct {
+    uint8_t pure:1;
+    uint8_t propagate_inbounds:1;
+    uint8_t inlineable:1;
+    uint8_t inferred:1;
+    uint8_t constprop:2; // 0 = use heuristic; 1 = aggressive; 2 = none
+} jl_code_info_flags_bitfield_t;
+
+typedef union {
+    jl_code_info_flags_bitfield_t bits;
+    uint8_t packed;
+} jl_code_info_flags_t;
 
 // -- functions -- //
 
-jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
-jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
+// jl_code_info_flag_t code_info_flags(uint8_t pure, uint8_t propagate_inbounds, uint8_t inlineable, uint8_t inferred, uint8_t constprop);
+JL_DLLEXPORT jl_code_info_t *jl_type_infer(jl_method_instance_t *li, size_t world, int force);
+JL_DLLEXPORT jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *meth JL_PROPAGATES_ROOT, size_t world);
 jl_code_instance_t *jl_generate_fptr(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
 void jl_generate_fptr_for_unspecialized(jl_code_instance_t *unspec);
 JL_DLLEXPORT jl_code_instance_t *jl_get_method_inferred(
         jl_method_instance_t *mi JL_PROPAGATES_ROOT, jl_value_t *rettype,
         size_t min_world, size_t max_world);
-jl_method_instance_t *jl_get_unspecialized(jl_method_instance_t *method JL_PROPAGATES_ROOT);
+jl_method_instance_t *jl_get_unspecialized_from_mi(jl_method_instance_t *method JL_PROPAGATES_ROOT);
+jl_method_instance_t *jl_get_unspecialized(jl_method_t *def JL_PROPAGATES_ROOT);
 
 JL_DLLEXPORT int jl_compile_hint(jl_tupletype_t *types);
 jl_code_info_t *jl_code_for_interpreter(jl_method_instance_t *lam JL_PROPAGATES_ROOT);
@@ -407,10 +539,18 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void);
 void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
                               int binding_effects);
 
+JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root);
+void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots);
+int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i);
+jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index);
+int nroots_with_key(jl_method_t *m, uint64_t key);
+
+int jl_valid_type_param(jl_value_t *v);
+
 JL_DLLEXPORT jl_value_t *jl_apply_2va(jl_value_t *f, jl_value_t **args, uint32_t nargs);
 
 void JL_NORETURN jl_method_error(jl_function_t *f, jl_value_t **args, size_t na, size_t world);
-jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
+JL_DLLEXPORT jl_value_t *jl_get_exceptionf(jl_datatype_t *exception_type, const char *fmt, ...);
 
 JL_DLLEXPORT jl_value_t *jl_get_keyword_sorter(jl_value_t *f);
 JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
@@ -420,15 +560,15 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t);
 
 JL_CALLABLE(jl_f_tuple);
 JL_CALLABLE(jl_f_intrinsic_call);
+JL_CALLABLE(jl_f_opaque_closure_call);
 void jl_install_default_signal_handlers(void);
 void restore_signals(void);
 void jl_install_thread_signal_handler(jl_ptls_t ptls);
 
-jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
+JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b);
 
 extern uv_loop_t *jl_io_loop;
 void jl_uv_flush(uv_stream_t *stream);
-void jl_uv_call_close_callback(jl_value_t *val);
 
 typedef struct jl_typeenv_t {
     jl_tvar_t *var;
@@ -439,7 +579,7 @@ typedef struct jl_typeenv_t {
 int jl_tuple_isa(jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 int jl_tuple1_isa(jl_value_t *child1, jl_value_t **child, size_t cl, jl_datatype_t *pdt);
 
-int jl_has_intersect_type_not_kind(jl_value_t *t);
+JL_DLLEXPORT int jl_has_intersect_type_not_kind(jl_value_t *t);
 int jl_subtype_invariant(jl_value_t *a, jl_value_t *b, int ta);
 int jl_has_concrete_subtype(jl_value_t *typ);
 jl_tupletype_t *jl_inst_arg_tuple_type(jl_value_t *arg1, jl_value_t **args, size_t nargs, int leaf);
@@ -448,72 +588,89 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
 jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_args_t fptr) JL_GC_DISABLED;
 int jl_obviously_unequal(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_array_t *jl_find_free_typevars(jl_value_t *v);
+int jl_has_fixed_layout(jl_datatype_t *t);
+JL_DLLEXPORT int jl_struct_try_layout(jl_datatype_t *dt);
+JL_DLLEXPORT int jl_type_mappable_to_c(jl_value_t *ty);
 jl_svec_t *jl_outer_unionall_vars(jl_value_t *u);
 jl_value_t *jl_type_intersection_env_s(jl_value_t *a, jl_value_t *b, jl_svec_t **penv, int *issubty);
 jl_value_t *jl_type_intersection_env(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
 int jl_subtype_matching(jl_value_t *a, jl_value_t *b, jl_svec_t **penv);
+JL_DLLEXPORT int jl_types_egal(jl_value_t *a, jl_value_t *b);
 // specificity comparison assuming !(a <: b) and !(b <: a)
 JL_DLLEXPORT int jl_type_morespecific_no_subtype(jl_value_t *a, jl_value_t *b);
 jl_value_t *jl_instantiate_type_with(jl_value_t *t, jl_value_t **env, size_t n);
 JL_DLLEXPORT jl_value_t *jl_instantiate_type_in_env(jl_value_t *ty, jl_unionall_t *env, jl_value_t **vals);
 jl_value_t *jl_substitute_var(jl_value_t *t, jl_tvar_t *var, jl_value_t *val);
-jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
-jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
+JL_DLLEXPORT jl_value_t *jl_unwrap_unionall(jl_value_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_rewrap_unionall(jl_value_t *t, jl_value_t *u);
 int jl_count_union_components(jl_value_t *v);
-jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_value_t *jl_nth_union_component(jl_value_t *v JL_PROPAGATES_ROOT, int i) JL_NOTSAFEPOINT;
 int jl_find_union_component(jl_value_t *haystack, jl_value_t *needle, unsigned *nth) JL_NOTSAFEPOINT;
 jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module,
                                    jl_datatype_t *super, jl_svec_t *parameters);
 jl_datatype_t *jl_new_uninitialized_datatype(void);
 void jl_precompute_memoized_dt(jl_datatype_t *dt, int cacheable);
-jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
-jl_value_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n);
+JL_DLLEXPORT jl_datatype_t *jl_wrap_Type(jl_value_t *t);  // x -> Type{x}
+jl_vararg_t *jl_wrap_vararg(jl_value_t *t, jl_value_t *n);
 void jl_reinstantiate_inner_types(jl_datatype_t *t);
 jl_datatype_t *jl_lookup_cache_type_(jl_datatype_t *type);
 void jl_cache_type_(jl_datatype_t *type);
-void jl_assign_bits(void *dest, jl_value_t *bits) JL_NOTSAFEPOINT;
-void set_nth_field(jl_datatype_t *st, void *v, size_t i, jl_value_t *rhs) JL_NOTSAFEPOINT;
+void set_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic) JL_NOTSAFEPOINT;
+jl_value_t *swap_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *rhs, int isatomic);
+jl_value_t *modify_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *op, jl_value_t *rhs, int isatomic);
+jl_value_t *replace_nth_field(jl_datatype_t *st, jl_value_t *v, size_t i, jl_value_t *expected, jl_value_t *rhs, int isatomic);
 jl_expr_t *jl_exprn(jl_sym_t *head, size_t n);
 jl_function_t *jl_new_generic_function(jl_sym_t *name, jl_module_t *module);
 jl_function_t *jl_new_generic_function_with_supertype(jl_sym_t *name, jl_module_t *module, jl_datatype_t *st);
-void jl_foreach_reachable_mtable(void (*visit)(jl_methtable_t *mt, void *env), void *env);
+int jl_foreach_reachable_mtable(int (*visit)(jl_methtable_t *mt, void *env), void *env);
 void jl_init_main_module(void);
-int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_is_submodule(jl_module_t *child, jl_module_t *parent) JL_NOTSAFEPOINT;
 jl_array_t *jl_get_loaded_modules(void);
+JL_DLLEXPORT int jl_datatype_isinlinealloc(jl_datatype_t *ty, int pointerfree);
 
+void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type);
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *m, jl_value_t *e, int fast, int expanded);
 
 jl_value_t *jl_eval_global_var(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *e);
+jl_value_t *jl_interpret_opaque_closure(jl_opaque_closure_t *clos, jl_value_t **args, size_t nargs);
 jl_value_t *jl_interpret_toplevel_thunk(jl_module_t *m, jl_code_info_t *src);
 jl_value_t *jl_interpret_toplevel_expr_in(jl_module_t *m, jl_value_t *e,
                                           jl_code_info_t *src,
                                           jl_svec_t *sparam_vals);
-int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT;
 jl_value_t *jl_call_scm_on_ast(const char *funcname, jl_value_t *expr, jl_module_t *inmodule);
-void jl_linenumber_to_lineinfo(jl_code_info_t *ci, jl_module_t *mod, jl_value_t *name);
 
 jl_method_instance_t *jl_method_lookup(jl_value_t **args, size_t nargs, size_t world);
+
+jl_value_t *jl_gf_invoke_by_method(jl_method_t *method, jl_value_t *gf, jl_value_t **args, size_t nargs);
 jl_value_t *jl_gf_invoke(jl_value_t *types, jl_value_t *f, jl_value_t **args, size_t nargs);
-JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, int lim, int include_ambiguous,
+JL_DLLEXPORT jl_value_t *jl_matching_methods(jl_tupletype_t *types, jl_value_t *mt, int lim, int include_ambiguous,
                                              size_t world, size_t *min_valid, size_t *max_valid, int *ambig);
 
 JL_DLLEXPORT jl_datatype_t *jl_first_argument_datatype(jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
 JL_DLLEXPORT jl_methtable_t *jl_method_table_for(
     jl_value_t *argtypes JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT;
+JL_DLLEXPORT jl_methtable_t *jl_method_get_table(
+    jl_method_t *method) JL_NOTSAFEPOINT;
 jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT);
 
-int jl_pointer_egal(jl_value_t *t);
-jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_pointer_egal(jl_value_t *t);
+JL_DLLEXPORT jl_value_t *jl_nth_slot_type(jl_value_t *sig JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
 void jl_compute_field_offsets(jl_datatype_t *st);
 jl_array_t *jl_new_array_for_deserialization(jl_value_t *atype, uint32_t ndims, size_t *dims,
                                              int isunboxed, int hasptr, int isunion, int elsz);
 void jl_module_run_initializer(jl_module_t *m);
-jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
-void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b);
+jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var);
+JL_DLLEXPORT void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b);
 extern jl_array_t *jl_module_init_order JL_GLOBALLY_ROOTED;
 extern htable_t jl_current_modules JL_GLOBALLY_ROOTED;
-JL_DLLEXPORT void jl_compile_extern_c(void *llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+extern JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module JL_GLOBALLY_ROOTED;
+int jl_compile_extern_c(LLVMOrcThreadSafeModuleRef llvmmod, void *params, void *sysimg, jl_value_t *declrt, jl_value_t *sigt);
+
+jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
+    jl_value_t *source,  jl_value_t **env, size_t nenv);
+JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source);
 
 // Each tuple can exist in one of 4 Vararg states:
 //   NONE: no vararg                            Tuple{Int,Float32}
@@ -527,51 +684,49 @@ typedef enum {
     JL_VARARG_UNBOUND = 3
 } jl_vararg_kind_t;
 
-STATIC_INLINE int jl_is_vararg_type(jl_value_t *v) JL_NOTSAFEPOINT
+STATIC_INLINE int jl_is_vararg(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    v = jl_unwrap_unionall(v);
-    return (jl_is_datatype(v) &&
-            ((jl_datatype_t*)(v))->name == jl_vararg_typename);
+    return jl_typeof(v) == (jl_value_t*)jl_vararg_type;
 }
 
-STATIC_INLINE jl_value_t *jl_unwrap_vararg(jl_value_t *v) JL_NOTSAFEPOINT
+STATIC_INLINE jl_value_t *jl_unwrap_vararg(jl_vararg_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
-    return jl_tparam0(jl_unwrap_unionall(v));
+    assert(jl_is_vararg((jl_value_t*)v));
+    jl_value_t *T = ((jl_vararg_t*)v)->T;
+    return T ? T : (jl_value_t*)jl_any_type;
 }
+#define jl_unwrap_vararg(v) (jl_unwrap_vararg)((jl_vararg_t *)v)
+
+STATIC_INLINE jl_value_t *jl_unwrap_vararg_num(jl_vararg_t *v JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    assert(jl_is_vararg((jl_value_t*)v));
+    return ((jl_vararg_t*)v)->N;
+}
+#define jl_unwrap_vararg_num(v) (jl_unwrap_vararg_num)((jl_vararg_t *)v)
 
 STATIC_INLINE jl_vararg_kind_t jl_vararg_kind(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    if (!jl_is_vararg_type(v))
+    if (!jl_is_vararg(v))
         return JL_VARARG_NONE;
-    jl_tvar_t *v1=NULL, *v2=NULL;
-    if (jl_is_unionall(v)) {
-        v1 = ((jl_unionall_t*)v)->var;
-        v = ((jl_unionall_t*)v)->body;
-        if (jl_is_unionall(v)) {
-            v2 = ((jl_unionall_t*)v)->var;
-            v = ((jl_unionall_t*)v)->body;
-        }
-    }
-    assert(jl_is_datatype(v));
-    jl_value_t *lenv = jl_tparam1(v);
-    if (jl_is_long(lenv))
+    jl_vararg_t *vm = (jl_vararg_t *)v;
+    if (!vm->N)
+        return JL_VARARG_UNBOUND;
+    if (jl_is_long(vm->N))
         return JL_VARARG_INT;
-    if (jl_is_typevar(lenv) && lenv != (jl_value_t*)v1 && lenv != (jl_value_t*)v2)
-        return JL_VARARG_BOUND;
-    return JL_VARARG_UNBOUND;
+    return JL_VARARG_BOUND;
 }
 
 STATIC_INLINE int jl_is_va_tuple(jl_datatype_t *t) JL_NOTSAFEPOINT
 {
     assert(jl_is_tuple_type(t));
     size_t l = jl_svec_len(t->parameters);
-    return (l>0 && jl_is_vararg_type(jl_tparam(t,l-1)));
+    return (l>0 && jl_is_vararg(jl_tparam(t,l-1)));
 }
 
 STATIC_INLINE size_t jl_vararg_length(jl_value_t *v) JL_NOTSAFEPOINT
 {
-    assert(jl_is_vararg_type(v));
-    jl_value_t *len = jl_tparam1(jl_unwrap_unionall(v));
+    assert(jl_is_vararg(v));
+    jl_value_t *len = jl_unwrap_vararg_num(v);
     assert(jl_is_long(len));
     return jl_unbox_long(len);
 }
@@ -586,17 +741,6 @@ STATIC_INLINE jl_vararg_kind_t jl_va_tuple_kind(jl_datatype_t *t) JL_NOTSAFEPOIN
     return jl_vararg_kind(jl_tparam(t,l-1));
 }
 
-#ifdef JL_USE_INTEL_JITEVENTS
-extern char jl_using_intel_jitevents;
-#endif
-#ifdef JL_USE_OPROFILE_JITEVENTS
-extern char jl_using_oprofile_jitevents;
-#endif
-#ifdef JL_USE_PERF_JITEVENTS
-extern char jl_using_perf_jitevents;
-#endif
-extern size_t jl_arr_xtralloc_limit;
-
 // -- init.c -- //
 
 void jl_init_types(void) JL_GC_DISABLED;
@@ -606,28 +750,28 @@ void jl_init_common_symbols(void);
 void jl_init_primitives(void) JL_GC_DISABLED;
 void jl_init_llvm(void);
 void jl_init_codegen(void);
+void jl_init_runtime_ccall(void);
 void jl_init_intrinsic_functions(void);
 void jl_init_intrinsic_properties(void);
 void jl_init_tasks(void) JL_GC_DISABLED;
 void jl_init_stack_limits(int ismaster, void **stack_hi, void **stack_lo);
-void jl_init_root_task(void *stack_lo, void *stack_hi);
+jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi);
 void jl_init_serializer(void);
 void jl_gc_init(void);
 void jl_init_uv(void);
-void jl_init_debuginfo(void);
 void jl_init_thread_heap(jl_ptls_t ptls);
 void jl_init_int32_int64_cache(void);
+JL_DLLEXPORT void jl_init_options(void);
 
 void jl_teardown_codegen(void);
 
-void _julia_init(JL_IMAGE_SEARCH rel);
-
 void jl_set_base_ctx(char *__stk);
 
-extern ssize_t jl_tls_offset;
-extern const int jl_tls_elf_support;
+extern JL_DLLEXPORT ssize_t jl_tls_offset;
+extern JL_DLLEXPORT const int jl_tls_elf_support;
 void jl_init_threading(void);
 void jl_start_threads(void);
+int jl_effective_threads(void);
 
 // Whether the GC is running
 extern char *jl_safepoint_pages;
@@ -636,7 +780,7 @@ STATIC_INLINE int jl_addr_is_safepoint(uintptr_t addr)
     uintptr_t safepoint_addr = (uintptr_t)jl_safepoint_pages;
     return addr >= safepoint_addr && addr < safepoint_addr + jl_page_size * 3;
 }
-extern uint32_t jl_gc_running;
+extern _Atomic(uint32_t) jl_gc_running;
 // All the functions are safe to be called from within a signal handler
 // provided that the thread will not be interrupted by another asynchronous
 // signal.
@@ -673,59 +817,71 @@ void jl_safepoint_defer_sigint(void);
 int jl_safepoint_consume_sigint(void);
 void jl_wake_libuv(void);
 
-#if !defined(__clang_analyzer__)
-jl_get_ptls_states_func jl_get_ptls_states_getter(void);
+void jl_set_pgcstack(jl_gcframe_t **) JL_NOTSAFEPOINT;
+#if defined(_OS_DARWIN_)
+typedef pthread_key_t jl_pgcstack_key_t;
+#elif defined(_OS_WINDOWS_)
+typedef DWORD jl_pgcstack_key_t;
+#else
+typedef jl_gcframe_t ***(*jl_pgcstack_key_t)(void) JL_NOTSAFEPOINT;
+#endif
+JL_DLLEXPORT void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k);
+
+#if !defined(__clang_gcanalyzer__) && !defined(_OS_DARWIN_)
 static inline void jl_set_gc_and_wait(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     // reading own gc state doesn't need atomic ops since no one else
     // should store to it.
-    int8_t state = ptls->gc_state;
-    jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
+    int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
     jl_safepoint_wait_gc();
-    jl_atomic_store_release(&ptls->gc_state, state);
+    jl_atomic_store_release(&ct->ptls->gc_state, state);
 }
 #endif
 void jl_gc_set_permalloc_region(void *start, void *end);
 
 JL_DLLEXPORT jl_value_t *jl_dump_method_asm(jl_method_instance_t *linfo, size_t world,
-        int raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo);
+        char raw_mc, char getwrapper, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLEXPORT void *jl_get_llvmf_defn(jl_method_instance_t *linfo, size_t world, char getwrapper, char optimize, const jl_cgparams_t params);
-JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, int raw_mc, const char* asm_variant, const char *debuginfo);
-JL_DLLEXPORT jl_value_t *jl_dump_llvm_asm(void *F, const char* asm_variant, const char *debuginfo);
+JL_DLLEXPORT jl_value_t *jl_dump_fptr_asm(uint64_t fptr, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
 JL_DLLEXPORT jl_value_t *jl_dump_function_ir(void *f, char strip_ir_metadata, char dump_module, const char *debuginfo);
+JL_DLLEXPORT jl_value_t *jl_dump_function_asm(void *F, char raw_mc, const char* asm_variant, const char *debuginfo, char binary);
 
-void *jl_create_native(jl_array_t *methods, const jl_cgparams_t cgparams, int policy);
+void *jl_create_native(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvmmod, const jl_cgparams_t *cgparams, int policy);
 void jl_dump_native(void *native_code,
         const char *bc_fname, const char *unopt_bc_fname, const char *obj_fname, const char *asm_fname,
         const char *sysimg_data, size_t sysimg_len);
 int32_t jl_get_llvm_gv(void *native_code, jl_value_t *p) JL_NOTSAFEPOINT;
-void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
+JL_DLLEXPORT void jl_get_function_id(void *native_code, jl_code_instance_t *ncode,
         int32_t *func_idx, int32_t *specfunc_idx);
 
 // the first argument to jl_idtable_rehash is used to return a value
 // make sure it is rooted if it is used after the function returns
 JL_DLLEXPORT jl_array_t *jl_idtable_rehash(jl_array_t *a, size_t newsz);
-jl_value_t **jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
+_Atomic(jl_value_t*) *jl_table_peek_bp(jl_array_t *a, jl_value_t *key) JL_NOTSAFEPOINT;
+
+JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t*);
 
 JL_DLLEXPORT jl_methtable_t *jl_new_method_table(jl_sym_t *name, jl_module_t *module);
-jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
+JL_DLLEXPORT jl_method_instance_t *jl_get_specialization1(jl_tupletype_t *types, size_t world, size_t *min_valid, size_t *max_valid, int mt_cache);
 jl_method_instance_t *jl_get_specialized(jl_method_t *m, jl_value_t *types, jl_svec_t *sp);
 JL_DLLEXPORT jl_value_t *jl_rettype_inferred(jl_method_instance_t *li JL_PROPAGATES_ROOT, size_t min_world, size_t max_world);
-jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
+JL_DLLEXPORT jl_code_instance_t *jl_method_compiled(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world);
 JL_DLLEXPORT jl_value_t *jl_methtable_lookup(jl_methtable_t *mt, jl_value_t *type, size_t world);
 JL_DLLEXPORT jl_method_instance_t *jl_specializations_get_linfo(
     jl_method_t *m JL_PROPAGATES_ROOT, jl_value_t *type, jl_svec_t *sparams);
+jl_method_instance_t *jl_specializations_get_or_insert(jl_method_instance_t *mi_ins);
 JL_DLLEXPORT void jl_method_instance_add_backedge(jl_method_instance_t *callee, jl_method_instance_t *caller);
 JL_DLLEXPORT void jl_method_table_add_backedge(jl_methtable_t *mt, jl_value_t *typ, jl_value_t *caller);
 
-uint32_t jl_module_next_counter(jl_module_t *m);
+uint32_t jl_module_next_counter(jl_module_t *m) JL_NOTSAFEPOINT;
 jl_tupletype_t *arg_type_tuple(jl_value_t *arg1, jl_value_t **args, size_t nargs);
 
-int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
+JL_DLLEXPORT int jl_has_meta(jl_array_t *body, jl_sym_t *sym) JL_NOTSAFEPOINT;
 
 jl_value_t *jl_parse(const char *text, size_t text_len, jl_value_t *filename,
-                     size_t offset, jl_value_t *options);
+                     size_t lineno, size_t offset, jl_value_t *options);
 
 //--------------------------------------------------
 // Backtraces
@@ -846,7 +1002,7 @@ typedef struct {
 uint64_t jl_getUnwindInfo(uint64_t dwBase);
 #ifdef _OS_WINDOWS_
 #include <dbghelp.h>
-JL_DLLEXPORT EXCEPTION_DISPOSITION __julia_personality(
+JL_DLLEXPORT EXCEPTION_DISPOSITION NTAPI __julia_personality(
         PEXCEPTION_RECORD ExceptionRecord, void *EstablisherFrame, PCONTEXT ContextRecord, void *DispatcherContext);
 extern HANDLE hMainThread;
 typedef CONTEXT bt_context_t;
@@ -858,7 +1014,7 @@ typedef struct {
     CONTEXT context;
 } bt_cursor_t;
 #endif
-extern jl_mutex_t jl_in_stackwalk;
+extern JL_DLLEXPORT uv_mutex_t jl_in_stackwalk;
 #elif !defined(JL_DISABLE_LIBUNWIND)
 // This gives unwind only local unwinding options ==> faster code
 #  define UNW_LOCAL_ONLY
@@ -881,18 +1037,18 @@ size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip) JL_NOTS
 // which was asynchronously interrupted.
 size_t rec_backtrace_ctx(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx,
                          jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
 size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize, bt_context_t *ctx, jl_gcframe_t *pgcstack) JL_NOTSAFEPOINT;
 #endif
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void);
-void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, size_t *bt_size);
+void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct);
 JL_DLLEXPORT void jl_raise_debugger(void);
 int jl_getFunctionInfo(jl_frame_t **frames, uintptr_t pointer, int skipC, int noInline) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_gdblookup(void* ip) JL_NOTSAFEPOINT;
 void jl_print_native_codeloc(uintptr_t ip) JL_NOTSAFEPOINT;
 void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_data) JL_NOTSAFEPOINT;
 #ifdef _OS_WINDOWS_
-void jl_refresh_dbg_module_list(void);
+JL_DLLEXPORT void jl_refresh_dbg_module_list(void);
 #endif
 // *to is NULL or malloc'd pointer, from is allowed to be NULL
 STATIC_INLINE char *jl_copy_str(char **to, const char *from) JL_NOTSAFEPOINT
@@ -957,37 +1113,35 @@ void jl_push_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT JL_ROOTING_AR
 //--------------------------------------------------
 // congruential random number generator
 // for a small amount of thread-local randomness
-// we could just use libc:`rand()`, but we want to ensure this is fast
-STATIC_INLINE void seed_cong(uint64_t *seed)
-{
-    *seed = rand();
-}
-STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias)
+STATIC_INLINE void unbias_cong(uint64_t max, uint64_t *unbias) JL_NOTSAFEPOINT
 {
     *unbias = UINT64_MAX - ((UINT64_MAX % max) + 1);
 }
-STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed)
+STATIC_INLINE uint64_t cong(uint64_t max, uint64_t unbias, uint64_t *seed) JL_NOTSAFEPOINT
 {
     while ((*seed = 69069 * (*seed) + 362437) > unbias)
         ;
     return *seed % max;
 }
+JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_srand(uint64_t) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void jl_init_rand(void);
 
-// libuv stuff:
-JL_DLLEXPORT extern void *jl_dl_handle;
+JL_DLLEXPORT extern void *jl_libjulia_internal_handle;
 JL_DLLEXPORT extern void *jl_RTLD_DEFAULT_handle;
 #if defined(_OS_WINDOWS_)
 JL_DLLEXPORT extern void *jl_exe_handle;
+JL_DLLEXPORT extern void *jl_libjulia_handle;
+JL_DLLEXPORT extern const char *jl_crtdll_basename;
 extern void *jl_ntdll_handle;
 extern void *jl_kernel32_handle;
 extern void *jl_crtdll_handle;
 extern void *jl_winsock_handle;
 #endif
 
-void *jl_get_library_(const char *f_lib, int throw_err) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *jl_get_library_(const char *f_lib, int throw_err);
 #define jl_get_library(f_lib) jl_get_library_(f_lib, 1)
-JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name,
-                                      void **hnd) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd);
 JL_DLLEXPORT void *jl_lazy_load_and_lookup(jl_value_t *lib_val, const char *f_name);
 JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
     jl_value_t *fobj, jl_datatype_t *result, htable_t *cache, jl_svec_t *fill,
@@ -996,13 +1150,13 @@ JL_DLLEXPORT jl_value_t *jl_get_cfunction_trampoline(
 
 
 // Windows only
-#define JL_EXE_LIBNAME ((const char*)1)
-#define JL_DL_LIBNAME ((const char*)2)
-const char *jl_dlfind_win32(const char *name);
+#define JL_EXE_LIBNAME                  ((const char*)1)
+#define JL_LIBJULIA_DL_LIBNAME          ((const char*)2)
+#define JL_LIBJULIA_INTERNAL_DL_LIBNAME ((const char*)3)
+JL_DLLEXPORT const char *jl_dlfind_win32(const char *name);
 
 // libuv wrappers:
 JL_DLLEXPORT int jl_fs_rename(const char *src_path, const char *dst_path);
-int jl_getpid(void) JL_NOTSAFEPOINT;
 
 #ifdef SEGV_EXCEPTION
 extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
@@ -1010,11 +1164,21 @@ extern JL_DLLEXPORT jl_value_t *jl_segv_exception;
 
 // -- Runtime intrinsics -- //
 JL_DLLEXPORT const char *jl_intrinsic_name(int f) JL_NOTSAFEPOINT;
-unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
+JL_DLLEXPORT unsigned jl_intrinsic_nargs(int f) JL_NOTSAFEPOINT;
 
+STATIC_INLINE int is_valid_intrinsic_elptr(jl_value_t *ety)
+{
+    return ety == (jl_value_t*)jl_any_type || (jl_is_concrete_type(ety) && !jl_is_layout_opaque(((jl_datatype_t*)ety)->layout));
+}
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v);
 JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t *align);
 JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *align, jl_value_t *i);
+JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerref(jl_value_t *p, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order);
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *x, jl_value_t *expected, jl_value_t *success_order, jl_value_t *failure_order);
 JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty);
 JL_DLLEXPORT jl_value_t *jl_cglobal_auto(jl_value_t *v);
 
@@ -1051,7 +1215,6 @@ JL_DLLEXPORT jl_value_t *jl_ne_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_lt_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_le_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_fpiseq(jl_value_t *a, jl_value_t *b);
-JL_DLLEXPORT jl_value_t *jl_fpislt(jl_value_t *a, jl_value_t *b);
 
 JL_DLLEXPORT jl_value_t *jl_not_int(jl_value_t *a);
 JL_DLLEXPORT jl_value_t *jl_and_int(jl_value_t *a, jl_value_t *b);
@@ -1098,9 +1261,11 @@ JL_DLLEXPORT jl_value_t *jl_copysign_float(jl_value_t *a, jl_value_t *b);
 JL_DLLEXPORT jl_value_t *jl_flipsign_int(jl_value_t *a, jl_value_t *b);
 
 JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a);
+JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *a);
 JL_DLLEXPORT int jl_stored_inline(jl_value_t *el_type);
 JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a);
 JL_DLLEXPORT int jl_array_isassigned(jl_array_t *a, size_t i);
+JL_DLLEXPORT jl_array_t *jl_array_copy(jl_array_t *ary);
 
 JL_DLLEXPORT uintptr_t jl_object_id_(jl_value_t *tv, jl_value_t *v) JL_NOTSAFEPOINT;
 JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
@@ -1108,8 +1273,8 @@ JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT;
 // -- synchronization utilities -- //
 
 extern jl_mutex_t typecache_lock;
-extern jl_mutex_t codegen_lock;
-extern jl_mutex_t safepoint_lock;
+extern JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
+extern uv_mutex_t safepoint_lock;
 
 #if defined(__APPLE__)
 void jl_mach_gc_end(void);
@@ -1120,20 +1285,12 @@ void jl_mach_gc_end(void);
 typedef uint_t (*smallintset_hash)(size_t val, jl_svec_t *data);
 typedef int (*smallintset_eq)(size_t val, const void *key, jl_svec_t *data, uint_t hv);
 ssize_t jl_smallintset_lookup(jl_array_t *cache, smallintset_eq eq, const void *key, jl_svec_t *data, uint_t hv);
-void jl_smallintset_insert(jl_array_t **pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
+void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data);
 
 // -- typemap.c -- //
 
-// a descriptor of a jl_typemap_t that gets
-// passed around as self-documentation of the parameters of the type
-struct jl_typemap_info {
-    int8_t unsorted; // whether this should be unsorted
-    jl_datatype_t **jl_contains; // the type that is being put in this
-};
-
-void jl_typemap_insert(jl_typemap_t **cache, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams);
+void jl_typemap_insert(_Atomic(jl_typemap_t*) *cache, jl_value_t *parent,
+        jl_typemap_entry_t *newrec, int8_t offs);
 jl_typemap_entry_t *jl_typemap_alloc(
         jl_tupletype_t *type, jl_tupletype_t *simpletype, jl_svec_t *guardsigs,
         jl_value_t *newvalue, size_t min_world, size_t max_world);
@@ -1195,7 +1352,7 @@ JL_DLLEXPORT int8_t jl_svec_isassigned(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t
 JL_DLLEXPORT jl_value_t *jl_svec_ref(jl_svec_t *t JL_PROPAGATES_ROOT, ssize_t i);
 
 
-unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
+JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *field_type);
 
 void register_eh_frames(uint8_t *Addr, size_t Size);
 void deregister_eh_frames(uint8_t *Addr, size_t Size);
@@ -1222,46 +1379,103 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
             jl_value_t *file, jl_value_t *line, jl_value_t *kwargs,
             jl_value_t *msg);
 
-int isabspath(const char *in) JL_NOTSAFEPOINT;
-
-extern jl_sym_t *call_sym;    extern jl_sym_t *invoke_sym;
-extern jl_sym_t *empty_sym;   extern jl_sym_t *top_sym;
-extern jl_sym_t *module_sym;  extern jl_sym_t *slot_sym;
-extern jl_sym_t *export_sym;  extern jl_sym_t *import_sym;
-extern jl_sym_t *toplevel_sym; extern jl_sym_t *quote_sym;
-extern jl_sym_t *line_sym;    extern jl_sym_t *jl_incomplete_sym;
-extern jl_sym_t *goto_sym;    extern jl_sym_t *goto_ifnot_sym;
-extern jl_sym_t *return_sym;
-extern jl_sym_t *lambda_sym;  extern jl_sym_t *assign_sym;
-extern jl_sym_t *globalref_sym; extern jl_sym_t *do_sym;
-extern jl_sym_t *method_sym;  extern jl_sym_t *core_sym;
-extern jl_sym_t *enter_sym;   extern jl_sym_t *leave_sym;
-extern jl_sym_t *exc_sym;     extern jl_sym_t *error_sym;
-extern jl_sym_t *new_sym;     extern jl_sym_t *using_sym;
-extern jl_sym_t *splatnew_sym;
-extern jl_sym_t *pop_exception_sym;
-extern jl_sym_t *const_sym;   extern jl_sym_t *thunk_sym;
-extern jl_sym_t *foreigncall_sym;
-extern jl_sym_t *global_sym; extern jl_sym_t *list_sym;
-extern jl_sym_t *dot_sym;    extern jl_sym_t *newvar_sym;
-extern jl_sym_t *boundscheck_sym; extern jl_sym_t *inbounds_sym;
-extern jl_sym_t *aliasscope_sym; extern jl_sym_t *popaliasscope_sym;
-extern jl_sym_t *copyast_sym; extern jl_sym_t *cfunction_sym;
-extern jl_sym_t *pure_sym; extern jl_sym_t *loopinfo_sym;
-extern jl_sym_t *meta_sym; extern jl_sym_t *inert_sym;
-extern jl_sym_t *polly_sym; extern jl_sym_t *unused_sym;
-extern jl_sym_t *static_parameter_sym; extern jl_sym_t *inline_sym;
-extern jl_sym_t *noinline_sym; extern jl_sym_t *generated_sym;
-extern jl_sym_t *generated_only_sym; extern jl_sym_t *isdefined_sym;
-extern jl_sym_t *propagate_inbounds_sym; extern jl_sym_t *specialize_sym;
-extern jl_sym_t *nospecialize_sym; extern jl_sym_t *macrocall_sym;
-extern jl_sym_t *colon_sym; extern jl_sym_t *hygienicscope_sym;
-extern jl_sym_t *throw_undef_if_not_sym; extern jl_sym_t *getfield_undefref_sym;
-extern jl_sym_t *gc_preserve_begin_sym; extern jl_sym_t *gc_preserve_end_sym;
-extern jl_sym_t *coverageeffect_sym; extern jl_sym_t *escape_sym;
-extern jl_sym_t *optlevel_sym; extern jl_sym_t *compile_sym;
-extern jl_sym_t *infer_sym;
-extern jl_sym_t *atom_sym; extern jl_sym_t *statement_sym; extern jl_sym_t *all_sym;
+JL_DLLEXPORT int jl_isabspath(const char *in) JL_NOTSAFEPOINT;
+
+extern JL_DLLEXPORT jl_sym_t *jl_call_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_invoke_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_invoke_modify_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_empty_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_top_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_module_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_slot_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_export_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_import_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_toplevel_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_quote_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_line_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_incomplete_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_goto_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_goto_ifnot_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_return_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_lineinfo_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_lambda_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_assign_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_globalref_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_do_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_method_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_core_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_enter_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_leave_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_pop_exception_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_exc_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_error_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_new_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_using_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_splatnew_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_block_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_new_opaque_closure_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_opaque_closure_method_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_const_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_thunk_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_foreigncall_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_as_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_global_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_list_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_dot_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_newvar_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_boundscheck_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_inbounds_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_copyast_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_cfunction_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_pure_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_loopinfo_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_meta_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_inert_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_polly_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_unused_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_static_parameter_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_inline_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_noinline_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_generated_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_generated_only_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_isdefined_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_propagate_inbounds_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_specialize_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_aggressive_constprop_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_no_constprop_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_purity_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_nospecialize_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_macrocall_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_colon_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_hygienicscope_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_throw_undef_if_not_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_getfield_undefref_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_gc_preserve_begin_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_gc_preserve_end_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_coverageeffect_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_escape_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_aliasscope_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_popaliasscope_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_optlevel_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_thismodule_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_atom_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_statement_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_all_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_compile_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_force_compile_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_infer_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_max_methods_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_atomic_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_not_atomic_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_unordered_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_monotonic_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_acquire_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_release_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_acquire_release_sym;
+extern JL_DLLEXPORT jl_sym_t *jl_sequentially_consistent_sym;
+
+JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order(jl_sym_t *order, char loading, char storing);
+JL_DLLEXPORT enum jl_memory_order jl_get_atomic_order_checked(jl_sym_t *order, char loading, char storing);
 
 struct _jl_sysimg_fptrs_t;
 
@@ -1299,7 +1513,7 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
 #define JL_GCC_IGNORE_STOP
 #endif // _COMPILER_GCC_
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
   // Not a safepoint (so it dosn't free other values), but an artificial use.
   // Usually this is unnecessary because the analyzer can see all real uses,
   // but sometimes real uses are harder for the analyzer to see, or it may
@@ -1309,8 +1523,76 @@ jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT;
   #define JL_GC_ASSERT_LIVE(x) (void)(x)
 #endif
 
+float __gnu_h2f_ieee(uint16_t param) JL_NOTSAFEPOINT;
+uint16_t __gnu_f2h_ieee(float param) JL_NOTSAFEPOINT;
+
 #ifdef __cplusplus
 }
 #endif
 
+#ifdef USE_DTRACE
+#include "uprobes.h.gen"
+
+// uprobes.h.gen on systems with DTrace, is auto-generated to include
+// `JL_PROBE_{PROBE}` and `JL_PROBE_{PROBE}_ENABLED()` macros for every probe
+// defined in uprobes.d
+//
+// If the arguments to `JL_PROBE_{PROBE}` are expensive to compute, the call to
+// these functions must be guarded by a JL_PROBE_{PROBE}_ENABLED() check, to
+// minimize performance impact when probing is off. As an example:
+//
+//    if (JL_PROBE_GC_STOP_THE_WORLD_ENABLED())
+//        JL_PROBE_GC_STOP_THE_WORLD();
+
+#else
+// define a dummy version of the probe functions
+#define JL_PROBE_GC_BEGIN(collection) do ; while (0)
+#define JL_PROBE_GC_STOP_THE_WORLD() do ; while (0)
+#define JL_PROBE_GC_MARK_BEGIN() do ; while (0)
+#define JL_PROBE_GC_MARK_END(scanned_bytes, perm_scanned_bytes) do ; while (0)
+#define JL_PROBE_GC_SWEEP_BEGIN(full) do ; while (0)
+#define JL_PROBE_GC_SWEEP_END() do ; while (0)
+#define JL_PROBE_GC_END() do ; while (0)
+#define JL_PROBE_GC_FINALIZER() do ; while (0)
+#define JL_PROBE_RT_RUN_TASK(task) do ; while (0)
+#define JL_PROBE_RT_PAUSE_TASK(task) do ; while (0)
+#define JL_PROBE_RT_NEW_TASK(parent, child) do ; while (0)
+#define JL_PROBE_RT_START_TASK(task) do ; while (0)
+#define JL_PROBE_RT_FINISH_TASK(task) do ; while (0)
+#define JL_PROBE_RT_START_PROCESS_EVENTS(task) do ; while (0)
+#define JL_PROBE_RT_FINISH_PROCESS_EVENTS(task) do ; while (0)
+#define JL_PROBE_RT_TASKQ_INSERT(ptls, task) do ; while (0)
+#define JL_PROBE_RT_TASKQ_GET(ptls, task) do ; while (0)
+#define JL_PROBE_RT_SLEEP_CHECK_WAKE(other, old_state) do ; while (0)
+#define JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls) do ; while (0)
+#define JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls) do ; while (0)
+#define JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls) do ; while (0)
+#define JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls) do ; while (0)
+#define JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls) do ; while (0)
+
+#define JL_PROBE_GC_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_STOP_THE_WORLD_ENABLED() (0)
+#define JL_PROBE_GC_MARK_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_MARK_END_ENABLED() (0)
+#define JL_PROBE_GC_SWEEP_BEGIN_ENABLED() (0)
+#define JL_PROBE_GC_SWEEP_END_ENABLED()  (0)
+#define JL_PROBE_GC_END_ENABLED() (0)
+#define JL_PROBE_GC_FINALIZER_ENABLED() (0)
+#define JL_PROBE_RT_RUN_TASK_ENABLED() (0)
+#define JL_PROBE_RT_PAUSE_TASK_ENABLED() (0)
+#define JL_PROBE_RT_NEW_TASK_ENABLED() (0)
+#define JL_PROBE_RT_START_TASK_ENABLED() (0)
+#define JL_PROBE_RT_FINISH_TASK_ENABLED() (0)
+#define JL_PROBE_RT_START_PROCESS_EVENTS_ENABLED() (0)
+#define JL_PROBE_RT_FINISH_PROCESS_EVENTS_ENABLED() (0)
+#define JL_PROBE_RT_TASKQ_INSERT_ENABLED() (0)
+#define JL_PROBE_RT_TASKQ_GET_ENABLED() (0)
+#define JL_PROBE_RT_SLEEP_CHECK_WAKE_ENABLED() (0)
+#define JL_PROBE_RT_SLEEP_CHECK_WAKEUP_ENABLED() (0)
+#define JL_PROBE_RT_SLEEP_CHECK_SLEEP_ENABLED() (0)
+#define JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE_ENABLED() (0)
+#define JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE_ENABLED() (0)
+#define JL_PROBE_RT_SLEEP_CHECK_UV_WAKE_ENABLED() (0)
+#endif
+
 #endif
diff --git a/src/julia_locks.h b/src/julia_locks.h
new file mode 100644
index 00000000000000..8da0fc8ac9537d
--- /dev/null
+++ b/src/julia_locks.h
@@ -0,0 +1,151 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_LOCKS_H
+#define JL_LOCKS_H
+
+#include "julia_assert.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Lock acquire and release primitives
+
+// JL_LOCK and jl_mutex_lock are GC safe points, use uv_mutex_t if that is not desired.
+// Always use JL_LOCK unless no one holding the lock can trigger a GC or GC
+// safepoint. uv_mutex_t should only be needed for GC internal locks.
+// The JL_LOCK* and JL_UNLOCK* macros are no-op for non-threading build
+// while the jl_mutex_* functions are always locking and unlocking the locks.
+
+static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint)
+{
+    jl_task_t *self = jl_current_task;
+    jl_task_t *owner = jl_atomic_load_relaxed(&lock->owner);
+    if (owner == self) {
+        lock->count++;
+        return;
+    }
+    while (1) {
+        if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
+            lock->count = 1;
+            return;
+        }
+        if (safepoint) {
+            jl_gc_safepoint_(self->ptls);
+        }
+        jl_cpu_pause();
+        owner = jl_atomic_load_relaxed(&lock->owner);
+    }
+}
+
+static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
+{
+#ifndef __clang_gcanalyzer__
+    // Hide this body from the analyzer, otherwise it complains that we're calling
+    // a non-safepoint from this function. The 0 arguments guarantees that we do
+    // not reach the safepoint, but the analyzer can't figure that out
+    jl_mutex_wait(lock, 0);
+#endif
+}
+
+static inline void jl_lock_frame_push(jl_mutex_t *lock)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    small_arraylist_t *locks = &ptls->locks;
+    uint32_t len = locks->len;
+    if (__unlikely(len >= locks->max)) {
+        small_arraylist_grow(locks, 1);
+    }
+    else {
+        locks->len = len + 1;
+    }
+    locks->items[len] = (void*)lock;
+}
+static inline void jl_lock_frame_pop(void)
+{
+    jl_ptls_t ptls = jl_current_task->ptls;
+    assert(ptls->locks.len > 0);
+    ptls->locks.len--;
+}
+
+#define JL_SIGATOMIC_BEGIN() do {               \
+        jl_current_task->ptls->defer_signal++;  \
+        jl_signal_fence();                      \
+    } while (0)
+#define JL_SIGATOMIC_END() do {                                 \
+        jl_signal_fence();                                      \
+        if (--jl_current_task->ptls->defer_signal == 0) {       \
+            jl_sigint_safepoint(jl_current_task->ptls);         \
+        }                                                       \
+    } while (0)
+
+static inline void jl_mutex_lock(jl_mutex_t *lock)
+{
+    JL_SIGATOMIC_BEGIN();
+    jl_mutex_wait(lock, 1);
+    jl_lock_frame_push(lock);
+}
+
+static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
+{
+    jl_task_t *self = jl_current_task;
+    jl_task_t *owner = jl_atomic_load_acquire(&lock->owner);
+    if (owner == self) {
+        lock->count++;
+        return 1;
+    }
+    if (owner == NULL && jl_atomic_cmpswap(&lock->owner, &owner, self)) {
+        lock->count = 1;
+        return 1;
+    }
+    return 0;
+}
+
+static inline int jl_mutex_trylock(jl_mutex_t *lock)
+{
+    int got = jl_mutex_trylock_nogc(lock);
+    if (got) {
+        JL_SIGATOMIC_BEGIN();
+        jl_lock_frame_push(lock);
+    }
+    return got;
+}
+static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
+{
+#ifndef __clang_gcanalyzer__
+    assert(jl_atomic_load_relaxed(&lock->owner) == jl_current_task &&
+           "Unlocking a lock in a different thread.");
+    if (--lock->count == 0) {
+        jl_atomic_store_release(&lock->owner, (jl_task_t*)NULL);
+        jl_cpu_wake();
+    }
+#endif
+}
+
+static inline void jl_mutex_unlock(jl_mutex_t *lock)
+{
+    jl_mutex_unlock_nogc(lock);
+    jl_lock_frame_pop();
+    JL_SIGATOMIC_END();
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers)) {
+        jl_gc_run_pending_finalizers(jl_current_task); // may GC
+    }
+}
+
+static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&lock->owner, (jl_task_t*)NULL);
+    lock->count = 0;
+}
+
+#define JL_MUTEX_INIT(m) jl_mutex_init(m)
+#define JL_LOCK(m) jl_mutex_lock(m)
+#define JL_UNLOCK(m) jl_mutex_unlock(m)
+#define JL_LOCK_NOGC(m) jl_mutex_lock_nogc(m)
+#define JL_UNLOCK_NOGC(m) jl_mutex_unlock_nogc(m)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/julia_threads.h b/src/julia_threads.h
index 62310f35a82582..8228d1e056cb5a 100644
--- a/src/julia_threads.h
+++ b/src/julia_threads.h
@@ -4,9 +4,20 @@
 #ifndef JL_THREADS_H
 #define JL_THREADS_H
 
-#include <atomics.h>
+#include "julia_atomics.h"
+#ifndef _OS_WINDOWS_
+#include "pthread.h"
+#endif
 // threading ------------------------------------------------------------------
 
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+JL_DLLEXPORT int16_t jl_threadid(void);
+JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT;
+
 // JULIA_ENABLE_THREADING may be controlled by altering JULIA_THREADS in Make.user
 
 // When running into scheduler issues, this may help provide information on the
@@ -16,15 +27,20 @@
 
 //  Options for task switching algorithm (in order of preference):
 // JL_HAVE_ASM -- mostly setjmp
-// JL_HAVE_ASYNCIFY -- task switching based on the binaryen asyncify transform
-// JL_HAVE_UNW_CONTEXT -- hybrid of libunwind for start, setjmp for resume
+// JL_HAVE_ASM && JL_HAVE_UNW_CONTEXT -- libunwind-based
+// JL_HAVE_UNW_CONTEXT -- libunwind-based
+// JL_HAVE_ASYNCIFY -- task switching based on the binary asyncify transform
 // JL_HAVE_UCONTEXT -- posix standard API, requires syscall for resume
 // JL_HAVE_SIGALTSTACK -- requires several syscall for start, setjmp for resume
 
 #ifdef _OS_WINDOWS_
 #define JL_HAVE_UCONTEXT
-typedef win32_ucontext_t jl_ucontext_t;
+typedef win32_ucontext_t jl_stack_context_t;
+typedef jl_stack_context_t _jl_ucontext_t;
 #else
+typedef struct {
+    jl_jmp_buf uc_mcontext;
+} jl_stack_context_t;
 #if !defined(JL_HAVE_UCONTEXT) && \
     !defined(JL_HAVE_ASM) && \
     !defined(JL_HAVE_UNW_CONTEXT) && \
@@ -33,27 +49,25 @@ typedef win32_ucontext_t jl_ucontext_t;
 #if (defined(_CPU_X86_64_) || defined(_CPU_X86_) || defined(_CPU_AARCH64_) ||  \
      defined(_CPU_ARM_) || defined(_CPU_PPC64_))
 #define JL_HAVE_ASM
-#elif defined(_OS_DARWIN_)
-#define JL_HAVE_UNW_CONTEXT
-#elif defined(_OS_LINUX_)
-#define JL_HAVE_UCONTEXT
+#endif
+#if 0
+// very slow, but more debugging
+//#elif defined(_OS_DARWIN_)
+//#define JL_HAVE_UNW_CONTEXT
+//#elif defined(_OS_LINUX_)
+//#define JL_HAVE_UNW_CONTEXT
 #elif defined(_OS_EMSCRIPTEN_)
 #define JL_HAVE_ASYNCIFY
-#else
-#define JL_HAVE_UNW_CONTEXT
+#elif !defined(JL_HAVE_ASM)
+#define JL_HAVE_UNW_CONTEXT // optimistically?
 #endif
 #endif
 
-#if defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK)
-typedef struct {
-    jl_jmp_buf uc_mcontext;
-#if defined(JL_TSAN_ENABLED)
-    void *tsan_state;
-#endif
-} jl_ucontext_t;
+#if (!defined(JL_HAVE_UNW_CONTEXT) && defined(JL_HAVE_ASM)) || defined(JL_HAVE_SIGALTSTACK)
+typedef jl_stack_context_t _jl_ucontext_t;
 #endif
 #if defined(JL_HAVE_ASYNCIFY)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN not currently supported with asyncify
 #endif
 typedef struct {
@@ -63,24 +77,42 @@ typedef struct {
     // __asyncify_data struct.
     void *stackbottom;
     void *stacktop;
-} jl_ucontext_t;
+} _jl_ucontext_t;
 #endif
-#if defined(JL_HAVE_UCONTEXT) || defined(JL_HAVE_UNW_CONTEXT)
+#if defined(JL_HAVE_UNW_CONTEXT)
 #define UNW_LOCAL_ONLY
 #include <libunwind.h>
+typedef unw_context_t _jl_ucontext_t;
+#endif
+#if defined(JL_HAVE_UCONTEXT)
+#include <ucontext.h>
+typedef ucontext_t _jl_ucontext_t;
+#endif
+#endif
+
 typedef struct {
-    ucontext_t ctx;
-#if defined(JL_TSAN_ENABLED)
+    union {
+        _jl_ucontext_t ctx;
+        jl_stack_context_t copy_ctx;
+    };
+#if defined(_COMPILER_TSAN_ENABLED_)
     void *tsan_state;
 #endif
 } jl_ucontext_t;
-#endif
+
+
+// handle to reference an OS thread
+#ifdef _OS_WINDOWS_
+typedef DWORD jl_thread_t;
+#else
+typedef pthread_t jl_thread_t;
 #endif
 
+struct _jl_task_t;
 
 // Recursive spin lock
 typedef struct {
-    volatile unsigned long owner;
+    _Atomic(struct _jl_task_t*) owner;
     uint32_t count;
 } jl_mutex_t;
 
@@ -91,13 +123,13 @@ typedef struct {
 } jl_gc_pool_t;
 
 typedef struct {
-    int64_t     allocd;
-    int64_t     freed;
-    uint64_t    malloc;
-    uint64_t    realloc;
-    uint64_t    poolalloc;
-    uint64_t    bigalloc;
-    uint64_t    freecall;
+    _Atomic(int64_t) allocd;
+    _Atomic(int64_t) freed;
+    _Atomic(uint64_t) malloc;
+    _Atomic(uint64_t) realloc;
+    _Atomic(uint64_t) poolalloc;
+    _Atomic(uint64_t) bigalloc;
+    _Atomic(uint64_t) freecall;
 } jl_thread_gc_num_t;
 
 typedef struct {
@@ -124,11 +156,11 @@ typedef struct {
 
     // variables for allocating objects from pools
 #ifdef _P64
-#  define JL_GC_N_POOLS 41
+#  define JL_GC_N_POOLS 49
 #elif MAX_ALIGN == 8
-#  define JL_GC_N_POOLS 42
+#  define JL_GC_N_POOLS 50
 #else
-#  define JL_GC_N_POOLS 43
+#  define JL_GC_N_POOLS 51
 #endif
     jl_gc_pool_t norm_pools[JL_GC_N_POOLS];
 
@@ -163,7 +195,6 @@ typedef struct {
     // this makes sure that a single objects can only appear once in
     // the lists (the mark bit cannot be flipped to `0` without sweeping)
     void *big_obj[1024];
-    jl_mutex_t stack_lock;
     void **pc_stack;
     void **pc_stack_end;
     jl_gc_mark_data_t *data_stack;
@@ -173,13 +204,12 @@ struct _jl_bt_element_t;
 // This includes all the thread local states we care about for a thread.
 // Changes to TLS field types must be reflected in codegen.
 #define JL_MAX_BT_SIZE 80000
-struct _jl_tls_states_t {
-    struct _jl_gcframe_t *pgcstack;
-    size_t world_age;
+typedef struct _jl_tls_states_t {
     int16_t tid;
+    int8_t threadpoolid;
     uint64_t rngseed;
     volatile size_t *safepoint;
-    int8_t sleep_check_state; // read/write from foreign threads
+    _Atomic(int8_t) sleep_check_state; // read/write from foreign threads
     // Whether it is safe to execute GC at the same time.
 #define JL_GC_STATE_WAITING 1
     // gc_state = 1 means the thread is doing GC or is waiting for the GC to
@@ -187,31 +217,40 @@ struct _jl_tls_states_t {
 #define JL_GC_STATE_SAFE 2
     // gc_state = 2 means the thread is running unmanaged code that can be
     //              execute at the same time with the GC.
-    int8_t gc_state; // read from foreign threads
+    _Atomic(int8_t) gc_state; // read from foreign threads
+    // execution of certain certain impure
+    // statements is prohibited from certain
+    // callbacks (such as generated functions)
+    // as it may make compilation undecidable
+    int8_t in_pure_callback;
     int8_t in_finalizer;
     int8_t disable_gc;
-    jl_thread_heap_t heap;
+    // Counter to disable finalizer **on the current thread**
+    int finalizers_inhibited;
+    jl_thread_heap_t heap; // this is very large, and the offset is baked into codegen
     jl_thread_gc_num_t gc_num;
-    uv_mutex_t sleep_lock;
-    uv_cond_t wake_signal;
     volatile sig_atomic_t defer_signal;
-    struct _jl_task_t *current_task;
+    _Atomic(struct _jl_task_t*) current_task;
     struct _jl_task_t *next_task;
-#ifdef MIGRATE_TASKS
     struct _jl_task_t *previous_task;
-#endif
     struct _jl_task_t *root_task;
+    struct _jl_timing_block_t *timing_stack;
     void *stackbase;
     size_t stacksize;
-    jl_ucontext_t base_ctx; // base context of stack
-    jl_jmp_buf *safe_restore;
+    union {
+        _jl_ucontext_t base_ctx; // base context of stack
+        // This hack is needed to support always_copy_stacks:
+        jl_stack_context_t copy_stack_ctx;
+    };
     // Temp storage for exception thrown in signal handler. Not rooted.
     struct _jl_value_t *sig_exception;
     // Temporary backtrace buffer. Scanned for gc roots when bt_size > 0.
     struct _jl_bt_element_t *bt_data; // JL_MAX_BT_SIZE + 1 elements long
     size_t bt_size;    // Size for backtrace in transit in bt_data
+    // Temporary backtrace buffer used only for allocations profiler.
+    struct _jl_bt_element_t *profiling_bt_buffer;
     // Atomically set by the sender, reset by the handler.
-    volatile sig_atomic_t signal_request;
+    volatile _Atomic(sig_atomic_t) signal_request; // TODO: no actual reason for this to be _Atomic
     // Allow the sigint to be raised asynchronously
     // this is limited to the few places we do synchronous IO
     // we can make this more general (similar to defer_signal) if necessary
@@ -221,19 +260,12 @@ struct _jl_tls_states_t {
 #else
     void *signal_stack;
 #endif
-    unsigned long system_id;
-    // execution of certain certain impure
-    // statements is prohibited from certain
-    // callbacks (such as generated functions)
-    // as it may make compilation undecidable
-    int in_pure_callback;
-    // Counter to disable finalizer **on the current thread**
-    int finalizers_inhibited;
+    jl_thread_t system_id;
     arraylist_t finalizers;
     jl_gc_mark_cache_t gc_cache;
     arraylist_t sweep_objs;
     jl_gc_mark_sp_t gc_mark_sp;
-    // Saved exception for previous external API call or NULL if cleared.
+    // Saved exception for previous *external* API call or NULL if cleared.
     // Access via jl_exception_occurred().
     struct _jl_value_t *previous_exception;
 
@@ -246,7 +278,14 @@ struct _jl_tls_states_t {
         uint64_t sleep_enter;
         uint64_t sleep_leave;
     )
-};
+} jl_tls_states_t;
+
+typedef jl_tls_states_t *jl_ptls_t;
+
+#ifndef LIBRARY_EXPORTS
+// deprecated (only for external consumers)
+JL_DLLEXPORT void *jl_get_ptls_states(void);
+#endif
 
 // Update codegen version in `ccall.cpp` after changing either `pause` or `wake`
 #ifdef __MIC__
@@ -267,14 +306,10 @@ struct _jl_tls_states_t {
 #  define JL_CPU_WAKE_NOOP 1
 #endif
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 JL_DLLEXPORT void (jl_cpu_pause)(void);
 JL_DLLEXPORT void (jl_cpu_wake)(void);
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 // Note that the sigint safepoint can also trigger GC, albeit less likely
 void jl_gc_safepoint_(jl_ptls_t tls);
 void jl_sigint_safepoint(jl_ptls_t tls);
@@ -309,9 +344,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
 STATIC_INLINE int8_t jl_gc_state_save_and_set(jl_ptls_t ptls,
                                               int8_t state)
 {
-    return jl_gc_state_set(ptls, state, ptls->gc_state);
+    return jl_gc_state_set(ptls, state, jl_atomic_load_relaxed(&ptls->gc_state));
 }
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 int8_t jl_gc_unsafe_enter(jl_ptls_t ptls); // Can be a safepoint
 int8_t jl_gc_unsafe_leave(jl_ptls_t ptls, int8_t state) JL_NOTSAFEPOINT;
 int8_t jl_gc_safe_enter(jl_ptls_t ptls) JL_NOTSAFEPOINT;
@@ -324,7 +359,11 @@ int8_t jl_gc_safe_leave(jl_ptls_t ptls, int8_t state); // Can be a safepoint
 #endif
 JL_DLLEXPORT void (jl_gc_safepoint)(void);
 
-JL_DLLEXPORT void jl_gc_enable_finalizers(jl_ptls_t ptls, int on);
+JL_DLLEXPORT void jl_gc_enable_finalizers(struct _jl_task_t *ct, int on);
+JL_DLLEXPORT void jl_gc_disable_finalizers_internal(void);
+JL_DLLEXPORT void jl_gc_enable_finalizers_internal(void);
+JL_DLLEXPORT void jl_gc_run_pending_finalizers(struct _jl_task_t *ct);
+extern JL_DLLEXPORT _Atomic(int) jl_gc_have_pending_finalizers;
 
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid);
 
diff --git a/src/llvm-alloc-helpers.cpp b/src/llvm-alloc-helpers.cpp
new file mode 100644
index 00000000000000..b2aded025c0d18
--- /dev/null
+++ b/src/llvm-alloc-helpers.cpp
@@ -0,0 +1,307 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "llvm-version.h"
+#include "llvm-alloc-helpers.h"
+#include "codegen_shared.h"
+#include "julia_assert.h"
+
+#include <llvm/IR/IntrinsicInst.h>
+
+using namespace llvm;
+using namespace jl_alloc;
+
+static bool hasObjref(Type *ty)
+{
+    if (auto ptrty = dyn_cast<PointerType>(ty))
+        return ptrty->getAddressSpace() == AddressSpace::Tracked;
+    if (isa<ArrayType>(ty) || isa<VectorType>(ty))
+        return hasObjref(GetElementPtrInst::getTypeAtIndex(ty, (uint64_t)0));
+    if (auto structty = dyn_cast<StructType>(ty)) {
+        for (auto elty: structty->elements()) {
+            if (hasObjref(elty)) {
+                return true;
+            }
+        }
+    }
+    return false;
+}
+
+std::pair<const uint32_t,Field>&
+AllocUseInfo::getField(uint32_t offset, uint32_t size, Type *elty)
+{
+    auto it = findLowerField(offset);
+    auto end = memops.end();
+    auto lb = end; // first overlap
+    auto ub = end; // last overlap
+    if (it != end) {
+        // The slot found contains the current location
+        if (it->first + it->second.size >= offset + size) {
+            if (it->second.elty != elty)
+                it->second.elty = nullptr;
+            assert(it->second.elty == nullptr || (it->first == offset && it->second.size == size));
+            return *it;
+        }
+        if (it->first + it->second.size > offset) {
+            lb = it;
+            ub = it;
+        }
+    }
+    else {
+        it = memops.begin();
+    }
+    // Now find the last slot that overlaps with the current memory location.
+    // Also set `lb` if we didn't find any above.
+    for (; it != end && it->first < offset + size; ++it) {
+        if (lb == end)
+            lb = it;
+        ub = it;
+    }
+    // no overlap found just create a new one.
+    if (lb == end)
+        return *memops.emplace(offset, Field(size, elty)).first;
+    // We find overlapping but not containing slot we need to merge slot/create new one
+    uint32_t new_offset = std::min(offset, lb->first);
+    uint32_t new_addrub = std::max(offset + uint32_t(size), ub->first + ub->second.size);
+    uint32_t new_size = new_addrub - new_offset;
+    Field field(new_size, nullptr);
+    field.multiloc = true;
+    ++ub;
+    for (it = lb; it != ub; ++it) {
+        field.hasobjref |= it->second.hasobjref;
+        field.hasload |= it->second.hasload;
+        field.hasaggr |= it->second.hasaggr;
+        field.accesses.append(it->second.accesses.begin(), it->second.accesses.end());
+    }
+    memops.erase(lb, ub);
+    return *memops.emplace(new_offset, std::move(field)).first;
+}
+
+bool AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
+                                       Type *elty, bool isstore, const DataLayout &DL)
+{
+    MemOp memop(inst, opno);
+    memop.offset = offset;
+    uint64_t size = DL.getTypeStoreSize(elty);
+    memop.size = size;
+    memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
+    memop.isobjref = hasObjref(elty);
+    auto &field = getField(offset, size, elty);
+    if (field.second.hasobjref != memop.isobjref)
+        field.second.multiloc = true; // can't split this field, since it contains a mix of references and bits
+    if (!isstore)
+        field.second.hasload = true;
+    if (memop.isobjref) {
+        if (isstore) {
+            refstore = true;
+        }
+        else {
+            refload = true;
+        }
+        if (memop.isaggr)
+            field.second.hasaggr = true;
+        field.second.hasobjref = true;
+    }
+    else if (memop.isaggr) {
+        field.second.hasaggr = true;
+    }
+    field.second.accesses.push_back(memop);
+    if (size >= UINT32_MAX - offset)
+        return false;
+    return true;
+}
+
+JL_USED_FUNC void AllocUseInfo::dump()
+{
+    jl_safe_printf("escaped: %d\n", escaped);
+    jl_safe_printf("addrescaped: %d\n", addrescaped);
+    jl_safe_printf("returned: %d\n", returned);
+    jl_safe_printf("haserror: %d\n", haserror);
+    jl_safe_printf("hasload: %d\n", hasload);
+    jl_safe_printf("haspreserve: %d\n", haspreserve);
+    jl_safe_printf("hasunknownmem: %d\n", hasunknownmem);
+    jl_safe_printf("hastypeof: %d\n", hastypeof);
+    jl_safe_printf("refload: %d\n", refload);
+    jl_safe_printf("refstore: %d\n", refstore);
+    jl_safe_printf("Uses: %d\n", (unsigned)uses.size());
+    for (auto inst: uses)
+        llvm_dump(inst);
+    if (!preserves.empty()) {
+        jl_safe_printf("Preserves: %d\n", (unsigned)preserves.size());
+        for (auto inst: preserves) {
+            llvm_dump(inst);
+        }
+    }
+    if (!memops.empty()) {
+        jl_safe_printf("Memops: %d\n", (unsigned)memops.size());
+        for (auto &field: memops) {
+            jl_safe_printf("  Field %d @ %d\n", field.second.size, field.first);
+            jl_safe_printf("    Accesses:\n");
+            for (auto memop: field.second.accesses) {
+                jl_safe_printf("    ");
+                llvm_dump(memop.inst);
+            }
+        }
+    }
+}
+
+void jl_alloc::runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options) {
+    required.use_info.reset();
+    if (I->use_empty())
+        return;
+    CheckInst::Frame cur{I, 0, I->use_begin(), I->use_end()};
+    required.check_stack.clear();
+
+    // Recursion
+    auto push_inst = [&] (Instruction *inst) {
+        if (cur.use_it != cur.use_end)
+            required.check_stack.push_back(cur);
+        cur.parent = inst;
+        cur.use_it = inst->use_begin();
+        cur.use_end = inst->use_end();
+    };
+
+    auto check_inst = [&] (Instruction *inst, Use *use) {
+        if (isa<LoadInst>(inst)) {
+            required.use_info.hasload = true;
+            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, 0, cur.offset,
+                                                               inst->getType(),
+                                                               false, required.DL))
+                required.use_info.hasunknownmem = true;
+            return true;
+        }
+        if (auto call = dyn_cast<CallInst>(inst)) {
+            // TODO handle `memcmp`
+            // None of the intrinsics should care if the memory is stack or heap allocated.
+            auto callee = call->getCalledOperand();
+            if (auto II = dyn_cast<IntrinsicInst>(call)) {
+                if (auto id = II->getIntrinsicID()) {
+                    if (id == Intrinsic::memset) {
+                        assert(call->arg_size() == 4);
+                        if (cur.offset == UINT32_MAX ||
+                            !isa<ConstantInt>(call->getArgOperand(2)) ||
+                            !isa<ConstantInt>(call->getArgOperand(1)) ||
+                            (cast<ConstantInt>(call->getArgOperand(2))->getLimitedValue() >=
+                             UINT32_MAX - cur.offset))
+                            required.use_info.hasunknownmem = true;
+                        return true;
+                    }
+                    if (id == Intrinsic::lifetime_start || id == Intrinsic::lifetime_end ||
+                        isa<DbgInfoIntrinsic>(II))
+                        return true;
+                    required.use_info.addrescaped = true;
+                    return true;
+                }
+                if (required.pass.gc_preserve_begin_func == callee) {
+                    for (auto user: call->users())
+                        required.use_info.uses.insert(cast<Instruction>(user));
+                    required.use_info.preserves.insert(call);
+                    required.use_info.haspreserve = true;
+                    return true;
+                }
+            }
+            if (required.pass.pointer_from_objref_func == callee) {
+                required.use_info.addrescaped = true;
+                return true;
+            }
+            if (required.pass.typeof_func == callee) {
+                required.use_info.hastypeof = true;
+                assert(use->get() == I);
+                return true;
+            }
+            if (required.pass.write_barrier_func == callee ||
+                required.pass.write_barrier_binding_func == callee)
+                return true;
+            auto opno = use->getOperandNo();
+            // Uses in `jl_roots` operand bundle are not counted as escaping, everything else is.
+            if (!call->isBundleOperand(opno) ||
+                call->getOperandBundleForOperand(opno).getTagName() != "jl_roots") {
+                if (isa<UnreachableInst>(call->getParent()->getTerminator())) {
+                    required.use_info.haserror = true;
+                    return true;
+                }
+                required.use_info.escaped = true;
+                return false;
+            }
+            required.use_info.haspreserve = true;
+            return true;
+        }
+        if (auto store = dyn_cast<StoreInst>(inst)) {
+            // Only store value count
+            if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
+                required.use_info.escaped = true;
+                return false;
+            }
+            auto storev = store->getValueOperand();
+            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
+                                                               cur.offset, storev->getType(),
+                                                               true, required.DL))
+                required.use_info.hasunknownmem = true;
+            return true;
+        }
+        if (isa<AtomicCmpXchgInst>(inst) || isa<AtomicRMWInst>(inst)) {
+            // Only store value count
+            if (use->getOperandNo() != isa<AtomicCmpXchgInst>(inst) ? AtomicCmpXchgInst::getPointerOperandIndex() : AtomicRMWInst::getPointerOperandIndex()) {
+                required.use_info.escaped = true;
+                return false;
+            }
+            required.use_info.hasload = true;
+            auto storev = isa<AtomicCmpXchgInst>(inst) ? cast<AtomicCmpXchgInst>(inst)->getNewValOperand() : cast<AtomicRMWInst>(inst)->getValOperand();
+            if (cur.offset == UINT32_MAX || !required.use_info.addMemOp(inst, use->getOperandNo(),
+                                                               cur.offset, storev->getType(),
+                                                               true, required.DL))
+                required.use_info.hasunknownmem = true;
+            required.use_info.refload = true;
+            return true;
+        }
+        if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
+            push_inst(inst);
+            return true;
+        }
+        if (auto gep = dyn_cast<GetElementPtrInst>(inst)) {
+            uint64_t next_offset = cur.offset;
+            if (cur.offset != UINT32_MAX) {
+                APInt apoffset(sizeof(void*) * 8, cur.offset, true);
+                if (!gep->accumulateConstantOffset(required.DL, apoffset) || apoffset.isNegative()) {
+                    next_offset = UINT32_MAX;
+                }
+                else {
+                    next_offset = apoffset.getLimitedValue();
+                    if (next_offset > UINT32_MAX) {
+                        next_offset = UINT32_MAX;
+                    }
+                }
+            }
+            push_inst(inst);
+            cur.offset = (uint32_t)next_offset;
+            return true;
+        }
+        if (isa<ReturnInst>(inst)) {
+            required.use_info.returned = true;
+            return true;
+        }
+        required.use_info.escaped = true;
+        return false;
+    };
+
+    while (true) {
+        assert(cur.use_it != cur.use_end);
+        auto use = &*cur.use_it;
+        auto inst = dyn_cast<Instruction>(use->getUser());
+        ++cur.use_it;
+        if (!inst) {
+            required.use_info.escaped = true;
+            return;
+        }
+        if (!options.valid_set || options.valid_set->contains(inst->getParent())) {
+            if (!check_inst(inst, use))
+                return;
+            required.use_info.uses.insert(inst);
+        }
+        if (cur.use_it == cur.use_end) {
+            if (required.check_stack.empty())
+                return;
+            cur = required.check_stack.back();
+            required.check_stack.pop_back();
+        }
+    }
+}
diff --git a/src/llvm-alloc-helpers.h b/src/llvm-alloc-helpers.h
new file mode 100644
index 00000000000000..7238d71de973f9
--- /dev/null
+++ b/src/llvm-alloc-helpers.h
@@ -0,0 +1,145 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef LLVM_ALLOC_HELPERS_H
+#define LLVM_ALLOC_HELPERS_H
+#include <llvm-c/Types.h>
+
+#include <llvm/ADT/SmallSet.h>
+#include <llvm/ADT/SmallVector.h>
+#include <llvm/IR/Instructions.h>
+
+#include <utility>
+#include <map>
+
+#include "llvm-pass-helpers.h"
+
+namespace jl_alloc {
+
+    struct CheckInst {
+        struct Frame {
+            llvm::Instruction *parent;
+            uint32_t offset;
+            llvm::Instruction::use_iterator use_it;
+            llvm::Instruction::use_iterator use_end;
+        };
+        typedef llvm::SmallVector<Frame,4> Stack;
+    };
+
+    struct MemOp {
+        llvm::Instruction *inst;
+        uint64_t offset = 0;
+        unsigned opno;
+        uint32_t size = 0;
+        bool isobjref:1;
+        bool isaggr:1;
+        MemOp(llvm::Instruction *inst, unsigned opno)
+            : inst(inst),
+              opno(opno),
+              isobjref(false),
+              isaggr(false)
+        {}
+    };
+    struct Field {
+        uint32_t size;
+        bool hasobjref:1;
+        bool hasaggr:1;
+        bool multiloc:1;
+        bool hasload:1;
+        llvm::Type *elty;
+        llvm::SmallVector<MemOp,4> accesses;
+        Field(uint32_t size, llvm::Type *elty)
+            : size(size),
+              hasobjref(false),
+              hasaggr(false),
+              multiloc(false),
+              hasload(false),
+              elty(elty)
+        {
+        }
+    };
+
+    struct AllocUseInfo {
+        llvm::SmallSet<llvm::Instruction*,16> uses;
+        llvm::SmallSet<llvm::CallInst*,4> preserves;
+        std::map<uint32_t,Field> memops;
+        // Completely unknown use
+        bool escaped:1;
+        // Address is leaked to functions that doesn't care where the object is allocated.
+        bool addrescaped:1;
+        // There are reader of the memory
+        bool hasload:1;
+        // There are uses in gc_preserve intrinsics or ccall roots
+        bool haspreserve:1;
+        // There are objects fields being loaded
+        bool refload:1;
+        // There are objects fields being stored
+        bool refstore:1;
+        // There are typeof call
+        // This can be optimized without optimizing out the allocation itself
+        bool hastypeof:1;
+        // There are store/load/memset on this object with offset or size (or value for memset)
+        // that cannot be statically computed.
+        // This is a weaker form of `addrescaped` since `hasload` can still be used
+        // to see if the memory is actually being used
+        bool hasunknownmem:1;
+        // The object is returned
+        bool returned:1;
+        // The object is used in an error function
+        bool haserror:1;
+
+        void reset()
+        {
+            escaped = false;
+            addrescaped = false;
+            hasload = false;
+            haspreserve = false;
+            refload = false;
+            refstore = false;
+            hastypeof = false;
+            hasunknownmem = false;
+            returned = false;
+            haserror = false;
+            uses.clear();
+            preserves.clear();
+            memops.clear();
+        }
+        void dump();
+        bool addMemOp(llvm::Instruction *inst, unsigned opno, uint32_t offset, llvm::Type *elty,
+                      bool isstore, const llvm::DataLayout &DL);
+        std::pair<const uint32_t,Field> &getField(uint32_t offset, uint32_t size, llvm::Type *elty);
+        std::map<uint32_t,Field>::iterator findLowerField(uint32_t offset)
+        {
+            // Find the last field that starts no higher than `offset`.
+            auto it = memops.upper_bound(offset);
+            if (it != memops.begin())
+                return --it;
+            return memops.end();
+        }
+    };
+
+    struct EscapeAnalysisRequiredArgs {
+        AllocUseInfo &use_info; // The returned escape analysis data
+        CheckInst::Stack &check_stack; // A preallocated stack to be used for escape analysis
+        JuliaPassContext &pass; // The current optimization pass (for accessing intrinsic functions)
+        const llvm::DataLayout &DL; // The module's data layout (for handling GEPs/memory operations)
+    };
+
+    struct EscapeAnalysisOptionalArgs {
+        //A set of basic blocks to run escape analysis over. Uses outside these basic blocks
+        //will not be considered. Defaults to nullptr, which means all uses of the allocation
+        //are considered
+        const llvm::SmallPtrSetImpl<const llvm::BasicBlock*> *valid_set;
+
+        EscapeAnalysisOptionalArgs() = default;
+
+        EscapeAnalysisOptionalArgs &with_valid_set(decltype(valid_set) valid_set) {
+            this->valid_set = valid_set;
+            return *this;
+        }
+    };
+
+    void runEscapeAnalysis(llvm::Instruction *I, EscapeAnalysisRequiredArgs required, EscapeAnalysisOptionalArgs options=EscapeAnalysisOptionalArgs());
+}
+
+
+#endif
diff --git a/src/llvm-alloc-opt.cpp b/src/llvm-alloc-opt.cpp
index bd9f37aea80d81..86e6ff4b1c5375 100644
--- a/src/llvm-alloc-opt.cpp
+++ b/src/llvm-alloc-opt.cpp
@@ -1,6 +1,5 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#define DEBUG_TYPE "alloc_opt"
 #undef DEBUG
 #include "llvm-version.h"
 
@@ -10,6 +9,7 @@
 #include <llvm/ADT/SmallSet.h>
 #include <llvm/ADT/SmallVector.h>
 #include <llvm/ADT/SetVector.h>
+#include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/CFG.h>
 #include <llvm/IR/LegacyPassManager.h>
@@ -20,33 +20,45 @@
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Operator.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 #include <llvm/Transforms/Utils/PromoteMemToReg.h>
 
-#if JL_LLVM_VERSION >= 100000
 #include <llvm/InitializePasses.h>
-#endif
 
 #include "codegen_shared.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "llvm-pass-helpers.h"
+#include "llvm-alloc-helpers.h"
+#include "passes.h"
 
 #include <map>
 #include <set>
 
+#define DEBUG_TYPE "alloc_opt"
 #include "julia_assert.h"
 
 using namespace llvm;
+using namespace jl_alloc;
+
+STATISTIC(RemovedAllocs, "Total number of heap allocations elided");
+STATISTIC(DeletedAllocs, "Total number of heap allocations fully deleted");
+STATISTIC(SplitAllocs, "Total number of allocations split into registers");
+STATISTIC(StackAllocs, "Total number of allocations moved to the stack");
+STATISTIC(RemovedTypeofs, "Total number of typeofs removed");
+STATISTIC(RemovedWriteBarriers, "Total number of write barriers removed");
+STATISTIC(RemovedGCPreserve, "Total number of GC preserve instructions removed");
 
 namespace {
 
 static void removeGCPreserve(CallInst *call, Instruction *val)
 {
+    ++RemovedGCPreserve;
     auto replace = Constant::getNullValue(val->getType());
     call->replaceUsesOfWith(val, replace);
-    for (auto &arg: call->arg_operands()) {
+    for (auto &arg: call->args()) {
         if (!isa<Constant>(arg.get())) {
             return;
         }
@@ -60,27 +72,6 @@ static void removeGCPreserve(CallInst *call, Instruction *val)
     call->eraseFromParent();
 }
 
-static bool hasObjref(Type *ty)
-{
-    if (auto ptrty = dyn_cast<PointerType>(ty))
-        return ptrty->getAddressSpace() == AddressSpace::Tracked;
-#if JL_LLVM_VERSION >= 110000
-    if (isa<ArrayType>(ty) || isa<VectorType>(ty))
-        return GetElementPtrInst::getTypeAtIndex(ty, (uint64_t)0);
-#else
-    if (auto seqty = dyn_cast<SequentialType>(ty))
-        return hasObjref(seqty->getElementType());
-#endif
-    if (auto structty = dyn_cast<StructType>(ty)) {
-        for (auto elty: structty->elements()) {
-            if (hasObjref(elty)) {
-                return true;
-            }
-        }
-    }
-    return false;
-}
-
 /**
  * Promote `julia.gc_alloc_obj` which do not have escaping root to a alloca.
  * Uses that are not considered to escape the object (i.e. heap address) includes,
@@ -106,37 +97,22 @@ static bool hasObjref(Type *ty)
  * * Handle jl_box*
  */
 
-struct AllocOpt : public FunctionPass, public JuliaPassContext {
-    static char ID;
-    AllocOpt()
-        : FunctionPass(ID)
-    {
-        llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
+struct AllocOpt : public JuliaPassContext {
 
     const DataLayout *DL;
 
     Function *lifetime_start;
     Function *lifetime_end;
 
-    Type *T_int64;
-
-private:
-    bool doInitialization(Module &m) override;
-    bool runOnFunction(Function &F) override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.addRequired<DominatorTreeWrapperPass>();
-        AU.addPreserved<DominatorTreeWrapperPass>();
-        AU.setPreservesCFG();
-    }
+    bool doInitialization(Module &m);
+    bool runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT);
 };
 
 struct Optimizer {
-    Optimizer(Function &F, AllocOpt &pass)
+    Optimizer(Function &F, AllocOpt &pass, function_ref<DominatorTree&()> GetDT)
         : F(F),
-          pass(pass)
+          pass(pass),
+          GetDT(std::move(GetDT))
     {}
 
     void initialize();
@@ -164,23 +140,14 @@ struct Optimizer {
     Function &F;
     AllocOpt &pass;
     DominatorTree *_DT = nullptr;
+    function_ref<DominatorTree &()> GetDT;
 
     DominatorTree &getDomTree()
     {
         if (!_DT)
-            _DT = &pass.getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+            _DT = &GetDT();
         return *_DT;
     }
-
-    struct CheckInst {
-        struct Frame {
-            Instruction *parent;
-            uint32_t offset;
-            Instruction::use_iterator use_it;
-            Instruction::use_iterator use_end;
-        };
-        typedef SmallVector<Frame,4> Stack;
-    };
     struct Lifetime {
         struct Frame {
             BasicBlock *bb;
@@ -213,90 +180,6 @@ struct Optimizer {
         typedef SmallVector<Frame,4> Stack;
     };
 
-    struct MemOp {
-        Instruction *inst;
-        unsigned opno;
-        uint32_t offset = 0;
-        uint32_t size = 0;
-        bool isobjref:1;
-        bool isaggr:1;
-        MemOp(Instruction *inst, unsigned opno)
-            : inst(inst),
-              opno(opno),
-              isobjref(false),
-              isaggr(false)
-        {}
-    };
-    struct Field {
-        uint32_t size;
-        bool hasobjref:1;
-        bool hasaggr:1;
-        bool multiloc:1;
-        bool hasload:1;
-        Type *elty;
-        SmallVector<MemOp,4> accesses;
-        Field(uint32_t size, Type *elty)
-            : size(size),
-              hasobjref(false),
-              hasaggr(false),
-              multiloc(false),
-              hasload(false),
-              elty(elty)
-        {
-        }
-    };
-    struct AllocUseInfo {
-        SmallSet<Instruction*,16> uses;
-        SmallSet<CallInst*,4> preserves;
-        std::map<uint32_t,Field> memops;
-        // Completely unknown use
-        bool escaped:1;
-        // Address is leaked to functions that doesn't care where the object is allocated.
-        bool addrescaped:1;
-        // There are reader of the memory
-        bool hasload:1;
-        // There are uses in gc_preserve intrinsics or ccall roots
-        bool haspreserve:1;
-        // There are objects fields being loaded
-        bool refload:1;
-        // There are objects fields being stored
-        bool refstore:1;
-        // There are typeof call
-        // This can be optimized without optimizing out the allocation itself
-        bool hastypeof:1;
-        // There are store/load/memset on this object with offset or size (or value for memset)
-        // that cannot be statically computed.
-        // This is a weaker form of `addrescaped` since `hasload` can still be used
-        // to see if the memory is actually being used
-        bool hasunknownmem:1;
-        void reset()
-        {
-            escaped = false;
-            addrescaped = false;
-            hasload = false;
-            haspreserve = false;
-            refload = false;
-            refstore = false;
-            hastypeof = false;
-            hasunknownmem = false;
-            uses.clear();
-            preserves.clear();
-            memops.clear();
-        }
-        void dump();
-        bool addMemOp(Instruction *inst, unsigned opno, uint32_t offset, Type *elty,
-                      bool isstore, const DataLayout &DL);
-        std::pair<const uint32_t,Field> &getField(uint32_t offset, uint32_t size, Type *elty);
-        std::map<uint32_t,Field>::iterator findLowerField(uint32_t offset)
-        {
-            // Find the last field that starts no higher than `offset`.
-            auto it = memops.upper_bound(offset);
-            if (it != memops.begin())
-                return --it;
-            return memops.end();
-        }
-    };
-
     SetVector<std::pair<CallInst*,size_t>> worklist;
     SmallVector<CallInst*,6> removed;
     AllocUseInfo use_info;
@@ -335,6 +218,11 @@ void Optimizer::optimizeAll()
                 optimizeTag(orig);
             continue;
         }
+        if (use_info.haserror || use_info.returned) {
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
+        }
         if (!use_info.addrescaped && !use_info.hasload && (!use_info.haspreserve ||
                                                            !use_info.refstore)) {
             // No one took the address, no one reads anything and there's no meaningful
@@ -350,7 +238,8 @@ void Optimizer::optimizeAll()
             if (field.hasobjref) {
                 has_ref = true;
                 // This can be relaxed a little based on hasload
-                if (field.hasaggr || field.multiloc) {
+                // TODO: add support for hasaggr load/store
+                if (field.hasaggr || field.multiloc || field.size != sizeof(void*)) {
                     has_refaggr = true;
                     break;
                 }
@@ -361,15 +250,12 @@ void Optimizer::optimizeAll()
             splitOnStack(orig);
             continue;
         }
-        if (has_ref) {
-            if (use_info.memops.size() != 1 || has_refaggr ||
-                use_info.memops.begin()->second.size != sz) {
-                if (use_info.hastypeof)
-                    optimizeTag(orig);
-                continue;
-            }
-            // The object only has a single field that's a reference with only one kind of access.
+        if (has_refaggr) {
+            if (use_info.hastypeof)
+                optimizeTag(orig);
+            continue;
         }
+        // The object has no fields with mix reference access
         moveToStack(orig, sz, has_ref);
     }
 }
@@ -425,262 +311,17 @@ ssize_t Optimizer::getGCAllocSize(Instruction *I)
         return -1;
     if (call->getCalledOperand() != pass.alloc_obj_func)
         return -1;
-    assert(call->getNumArgOperands() == 3);
+    assert(call->arg_size() == 3);
     size_t sz = (size_t)cast<ConstantInt>(call->getArgOperand(1))->getZExtValue();
     if (sz < IntegerType::MAX_INT_BITS / 8 && sz < INT32_MAX)
         return sz;
     return -1;
 }
 
-std::pair<const uint32_t,Optimizer::Field>&
-Optimizer::AllocUseInfo::getField(uint32_t offset, uint32_t size, Type *elty)
-{
-    auto it = findLowerField(offset);
-    auto end = memops.end();
-    auto lb = end; // first overlap
-    auto ub = end; // last overlap
-    if (it != end) {
-        // The slot found contains the current location
-        if (it->first + it->second.size >= offset + size) {
-            if (it->second.elty != elty)
-                it->second.elty = nullptr;
-            return *it;
-        }
-        if (it->first + it->second.size > offset) {
-            lb = it;
-            ub = it;
-        }
-    }
-    else {
-        it = memops.begin();
-    }
-    // Now fine the last slot that overlaps with the current memory location.
-    // Also set `lb` if we didn't find any above.
-    for (; it != end && it->first < offset + size; ++it) {
-        if (lb == end)
-            lb = it;
-        ub = it;
-    }
-    // no overlap found just create a new one.
-    if (lb == end)
-        return *memops.emplace(offset, Field(size, elty)).first;
-    // We find overlapping but not containing slot we need to merge slot/create new one
-    uint32_t new_offset = std::min(offset, lb->first);
-    uint32_t new_addrub = std::max(offset + uint32_t(size), ub->first + ub->second.size);
-    uint32_t new_size = new_addrub - new_offset;
-    Field field(new_size, nullptr);
-    field.multiloc = true;
-    ++ub;
-    for (it = lb; it != ub; ++it) {
-        field.hasobjref |= it->second.hasobjref;
-        field.hasload |= it->second.hasload;
-        field.hasaggr |= it->second.hasaggr;
-        field.accesses.append(it->second.accesses.begin(), it->second.accesses.end());
-    }
-    memops.erase(lb, ub);
-    return *memops.emplace(new_offset, std::move(field)).first;
-}
-
-bool Optimizer::AllocUseInfo::addMemOp(Instruction *inst, unsigned opno, uint32_t offset,
-                                       Type *elty, bool isstore, const DataLayout &DL)
-{
-    MemOp memop(inst, opno);
-    memop.offset = offset;
-    uint64_t size = DL.getTypeStoreSize(elty);
-    if (size >= UINT32_MAX - offset)
-        return false;
-    memop.size = size;
-    memop.isaggr = isa<StructType>(elty) || isa<ArrayType>(elty) || isa<VectorType>(elty);
-    memop.isobjref = hasObjref(elty);
-    auto &field = getField(offset, size, elty);
-    if (field.first != offset || field.second.size != size)
-        field.second.multiloc = true;
-    if (!isstore)
-        field.second.hasload = true;
-    if (memop.isobjref) {
-        if (isstore) {
-            refstore = true;
-        }
-        else {
-            refload = true;
-        }
-        if (memop.isaggr)
-            field.second.hasaggr = true;
-        field.second.hasobjref = true;
-    }
-    else if (memop.isaggr) {
-        field.second.hasaggr = true;
-    }
-    field.second.accesses.push_back(memop);
-    return true;
-}
-
-JL_USED_FUNC void Optimizer::AllocUseInfo::dump()
-{
-    jl_safe_printf("escaped: %d\n", escaped);
-    jl_safe_printf("addrescaped: %d\n", addrescaped);
-    jl_safe_printf("hasload: %d\n", hasload);
-    jl_safe_printf("haspreserve: %d\n", haspreserve);
-    jl_safe_printf("refload: %d\n", refload);
-    jl_safe_printf("refstore: %d\n", refstore);
-    jl_safe_printf("hasunknownmem: %d\n", hasunknownmem);
-    jl_safe_printf("Uses: %d\n", (unsigned)uses.size());
-    for (auto inst: uses)
-        llvm_dump(inst);
-    if (!preserves.empty()) {
-        jl_safe_printf("Preserves: %d\n", (unsigned)preserves.size());
-        for (auto inst: preserves) {
-            llvm_dump(inst);
-        }
-    }
-    if (!memops.empty()) {
-        jl_safe_printf("Memops: %d\n", (unsigned)memops.size());
-        for (auto &field: memops) {
-            jl_safe_printf("  Field %d @ %d\n", field.second.size, field.first);
-            jl_safe_printf("    Accesses:\n");
-            for (auto memop: field.second.accesses) {
-                jl_safe_printf("    ");
-                llvm_dump(memop.inst);
-            }
-        }
-    }
-}
-
 void Optimizer::checkInst(Instruction *I)
 {
-    use_info.reset();
-    if (I->use_empty())
-        return;
-    CheckInst::Frame cur{I, 0, I->use_begin(), I->use_end()};
-    check_stack.clear();
-
-    // Recursion
-    auto push_inst = [&] (Instruction *inst) {
-        if (cur.use_it != cur.use_end)
-            check_stack.push_back(cur);
-        cur.parent = inst;
-        cur.use_it = inst->use_begin();
-        cur.use_end = inst->use_end();
-    };
-
-    auto check_inst = [&] (Instruction *inst, Use *use) {
-        if (isa<LoadInst>(inst)) {
-            use_info.hasload = true;
-            if (cur.offset == UINT32_MAX || !use_info.addMemOp(inst, 0, cur.offset,
-                                                               inst->getType(),
-                                                               false, *pass.DL))
-                use_info.hasunknownmem = true;
-            return true;
-        }
-        if (auto call = dyn_cast<CallInst>(inst)) {
-            // TODO handle `memcmp`
-            // None of the intrinsics should care if the memory is stack or heap allocated.
-            auto callee = call->getCalledOperand();
-            if (auto II = dyn_cast<IntrinsicInst>(call)) {
-                if (auto id = II->getIntrinsicID()) {
-                    if (id == Intrinsic::memset) {
-                        assert(call->getNumArgOperands() == 4);
-                        if (cur.offset == UINT32_MAX ||
-                            !isa<ConstantInt>(call->getArgOperand(2)) ||
-                            !isa<ConstantInt>(call->getArgOperand(1)) ||
-                            (cast<ConstantInt>(call->getArgOperand(2))->getLimitedValue() >=
-                             UINT32_MAX - cur.offset))
-                            use_info.hasunknownmem = true;
-                        return true;
-                    }
-                    if (id == Intrinsic::lifetime_start || id == Intrinsic::lifetime_end ||
-                        isa<DbgInfoIntrinsic>(II))
-                        return true;
-                    use_info.addrescaped = true;
-                    return true;
-                }
-                if (pass.gc_preserve_begin_func == callee) {
-                    for (auto user: call->users())
-                        use_info.uses.insert(cast<Instruction>(user));
-                    use_info.preserves.insert(call);
-                    use_info.haspreserve = true;
-                    return true;
-                }
-            }
-            if (pass.pointer_from_objref_func == callee) {
-                use_info.addrescaped = true;
-                return true;
-            }
-            if (pass.typeof_func == callee) {
-                use_info.hastypeof = true;
-                assert(use->get() == I);
-                return true;
-            }
-            if (pass.write_barrier_func == callee)
-                return true;
-            auto opno = use->getOperandNo();
-            // Uses in `jl_roots` operand bundle are not counted as escaping, everything else is.
-            if (!call->isBundleOperand(opno) ||
-                call->getOperandBundleForOperand(opno).getTagName() != "jl_roots") {
-                use_info.escaped = true;
-                return false;
-            }
-            use_info.haspreserve = true;
-            return true;
-        }
-        if (auto store = dyn_cast<StoreInst>(inst)) {
-            // Only store value count
-            if (use->getOperandNo() != StoreInst::getPointerOperandIndex()) {
-                use_info.escaped = true;
-                return false;
-            }
-            auto storev = store->getValueOperand();
-            if (cur.offset == UINT32_MAX || !use_info.addMemOp(inst, use->getOperandNo(),
-                                                               cur.offset, storev->getType(),
-                                                               true, *pass.DL))
-                use_info.hasunknownmem = true;
-            return true;
-        }
-        if (isa<AddrSpaceCastInst>(inst) || isa<BitCastInst>(inst)) {
-            push_inst(inst);
-            return true;
-        }
-        if (auto gep = dyn_cast<GetElementPtrInst>(inst)) {
-            uint64_t next_offset = cur.offset;
-            if (cur.offset != UINT32_MAX) {
-                APInt apoffset(sizeof(void*) * 8, cur.offset, true);
-                if (!gep->accumulateConstantOffset(*pass.DL, apoffset) || apoffset.isNegative()) {
-                    next_offset = UINT32_MAX;
-                }
-                else {
-                    next_offset = apoffset.getLimitedValue();
-                    if (next_offset > UINT32_MAX) {
-                        next_offset = UINT32_MAX;
-                    }
-                }
-            }
-            push_inst(inst);
-            cur.offset = (uint32_t)next_offset;
-            return true;
-        }
-        use_info.escaped = true;
-        return false;
-    };
-
-    while (true) {
-        assert(cur.use_it != cur.use_end);
-        auto use = &*cur.use_it;
-        auto inst = dyn_cast<Instruction>(use->getUser());
-        ++cur.use_it;
-        if (!inst) {
-            use_info.escaped = true;
-            return;
-        }
-        if (!check_inst(inst, use))
-            return;
-        use_info.uses.insert(inst);
-        if (cur.use_it == cur.use_end) {
-            if (check_stack.empty())
-                return;
-            cur = check_stack.back();
-            check_stack.pop_back();
-        }
-    }
+    jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, pass, *pass.DL};
+    jl_alloc::runEscapeAnalysis(I, required);
 }
 
 void Optimizer::insertLifetimeEnd(Value *ptr, Constant *sz, Instruction *insert)
@@ -863,7 +504,7 @@ void Optimizer::insertLifetime(Value *ptr, Constant *sz, Instruction *orig)
 void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
                                         Instruction *orig_i, Instruction *new_i)
 {
-    auto nargs = call->getNumArgOperands();
+    auto nargs = call->arg_size();
     SmallVector<Value*, 8> args(nargs);
     SmallVector<Type*, 8> argTys(nargs);
     for (unsigned i = 0; i < nargs; i++) {
@@ -884,21 +525,11 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
         SmallVector<Intrinsic::IITDescriptor, 8> Table;
         getIntrinsicInfoTableEntries(ID, Table);
         ArrayRef<Intrinsic::IITDescriptor> TableRef = Table;
-#if JL_LLVM_VERSION >= 90000
         auto res = Intrinsic::matchIntrinsicSignature(newfType, TableRef, overloadTys);
         assert(res == Intrinsic::MatchIntrinsicTypes_Match);
         (void)res;
-#else
-        bool res = Intrinsic::matchIntrinsicType(oldfType->getReturnType(), TableRef, overloadTys);
-        assert(!res);
-        for (auto Ty : newfType->params()) {
-            res = Intrinsic::matchIntrinsicType(Ty, TableRef, overloadTys);
-            assert(!res);
-        }
-        (void)res;
-#endif
-        bool matchvararg = Intrinsic::matchIntrinsicVarArg(newfType->isVarArg(), TableRef);
-        assert(!matchvararg);
+        bool matchvararg = !Intrinsic::matchIntrinsicVarArg(newfType->isVarArg(), TableRef);
+        assert(matchvararg);
         (void)matchvararg;
     }
     auto newF = Intrinsic::getDeclaration(call->getModule(), ID, overloadTys);
@@ -907,8 +538,8 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
     auto newCall = CallInst::Create(newF, args, "", call);
     newCall->setTailCallKind(call->getTailCallKind());
     auto old_attrs = call->getAttributes();
-    newCall->setAttributes(AttributeList::get(pass.getLLVMContext(), old_attrs.getFnAttributes(),
-                                              old_attrs.getRetAttributes(), {}));
+    newCall->setAttributes(AttributeList::get(pass.getLLVMContext(), getFnAttrs(old_attrs),
+                                              getRetAttrs(old_attrs), {}));
     newCall->setDebugLoc(call->getDebugLoc());
     call->replaceAllUsesWith(newCall);
     call->eraseFromParent();
@@ -918,6 +549,8 @@ void Optimizer::replaceIntrinsicUseWith(IntrinsicInst *call, Intrinsic::ID ID,
 // all the original safepoints.
 void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
 {
+    ++RemovedAllocs;
+    ++StackAllocs;
     auto tag = orig_inst->getArgOperand(2);
     removed.push_back(orig_inst);
     // The allocation does not escape or get used in a phi node so none of the derived
@@ -933,8 +566,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
     AllocaInst *buff;
     Instruction *ptr;
     if (sz == 0) {
-        buff = prolog_builder.CreateAlloca(pass.T_int8, ConstantInt::get(pass.T_int64, 0));
-        ptr = buff;
+        ptr = buff = prolog_builder.CreateAlloca(Type::getInt8Ty(prolog_builder.getContext()), ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), 0));
     }
     else if (has_ref) {
         // Allocate with the correct type so that the GC frame lowering pass will
@@ -943,15 +575,20 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
         // the alloca isn't optimized out.
         buff = prolog_builder.CreateAlloca(pass.T_prjlvalue);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, pass.T_pint8));
+        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
     }
     else {
-        buff = prolog_builder.CreateAlloca(Type::getIntNTy(pass.getLLVMContext(), sz * 8));
+        Type *buffty;
+        if (pass.DL->isLegalInteger(sz * 8))
+            buffty = Type::getIntNTy(pass.getLLVMContext(), sz * 8);
+        else
+            buffty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), sz);
+        buff = prolog_builder.CreateAlloca(buffty);
         buff->setAlignment(Align(align));
-        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, pass.T_pint8));
+        ptr = cast<Instruction>(prolog_builder.CreateBitCast(buff, Type::getInt8PtrTy(prolog_builder.getContext())));
     }
-    insertLifetime(ptr, ConstantInt::get(pass.T_int64, sz), orig_inst);
-    auto new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, pass.T_pjlvalue));
+    insertLifetime(ptr, ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), sz), orig_inst);
+    auto new_inst = cast<Instruction>(prolog_builder.CreateBitCast(ptr, JuliaType::get_pjlvalue_ty(prolog_builder.getContext())));
     new_inst->takeName(orig_inst);
 
     auto simple_replace = [&] (Instruction *orig_i, Instruction *new_i) {
@@ -1002,6 +639,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
                 return;
             }
             if (pass.typeof_func == callee) {
+                ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
                 call->eraseFromParent();
                 return;
@@ -1016,7 +654,9 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
                 }
                 return;
             }
-            if (pass.write_barrier_func == callee) {
+            if (pass.write_barrier_func == callee ||
+                pass.write_barrier_binding_func == callee) {
+                ++RemovedWriteBarriers;
                 call->eraseFromParent();
                 return;
             }
@@ -1031,8 +671,7 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
             user->replaceUsesOfWith(orig_i, replace);
         }
         else if (isa<AddrSpaceCastInst>(user) || isa<BitCastInst>(user)) {
-            auto cast_t = PointerType::get(cast<PointerType>(user->getType())->getElementType(),
-                                           0);
+            auto cast_t = PointerType::getWithSamePointeeType(cast<PointerType>(user->getType()), AddressSpace::Generic);
             auto replace_i = new_i;
             Type *new_t = new_i->getType();
             if (cast_t != new_t) {
@@ -1073,6 +712,8 @@ void Optimizer::moveToStack(CallInst *orig_inst, size_t sz, bool has_ref)
 // all the original safepoints.
 void Optimizer::removeAlloc(CallInst *orig_inst)
 {
+    ++RemovedAllocs;
+    ++DeletedAllocs;
     auto tag = orig_inst->getArgOperand(2);
     removed.push_back(orig_inst);
     auto simple_remove = [&] (Instruction *orig_i) {
@@ -1117,11 +758,14 @@ void Optimizer::removeAlloc(CallInst *orig_inst)
                 return;
             }
             if (pass.typeof_func == callee) {
+                ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
                 call->eraseFromParent();
                 return;
             }
-            if (pass.write_barrier_func == callee) {
+            if (pass.write_barrier_func == callee ||
+                pass.write_barrier_binding_func == callee) {
+                ++RemovedWriteBarriers;
                 call->eraseFromParent();
                 return;
             }
@@ -1162,10 +806,12 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
 {
     auto tag = orig_inst->getArgOperand(2);
     // `julia.typeof` is only legal on the original pointer, no need to scan recursively
+    size_t last_deleted = removed.size();
     for (auto user: orig_inst->users()) {
         if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             if (pass.typeof_func == callee) {
+                ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
                 // Push to the removed instructions to trigger `finalize` to
                 // return the correct result.
@@ -1174,11 +820,15 @@ void Optimizer::optimizeTag(CallInst *orig_inst)
             }
         }
     }
+    while (last_deleted < removed.size())
+        removed[last_deleted++]->replaceUsesOfWith(orig_inst, UndefValue::get(orig_inst->getType()));
 }
 
 void Optimizer::splitOnStack(CallInst *orig_inst)
 {
     auto tag = orig_inst->getArgOperand(2);
+    ++RemovedAllocs;
+    ++SplitAllocs;
     removed.push_back(orig_inst);
     IRBuilder<> prolog_builder(&F.getEntryBlock().front());
     struct SplitSlot {
@@ -1203,12 +853,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
         else if (field.elty && !field.multiloc) {
             allocty = field.elty;
         }
-        else {
+        else if (pass.DL->isLegalInteger(field.size * 8)) {
             allocty = Type::getIntNTy(pass.getLLVMContext(), field.size * 8);
+        } else {
+            allocty = ArrayType::get(Type::getInt8Ty(pass.getLLVMContext()), field.size);
         }
         slot.slot = prolog_builder.CreateAlloca(allocty);
-        insertLifetime(prolog_builder.CreateBitCast(slot.slot, pass.T_pint8),
-                       ConstantInt::get(pass.T_int64, field.size), orig_inst);
+        insertLifetime(prolog_builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(prolog_builder.getContext())),
+                       ConstantInt::get(Type::getInt64Ty(prolog_builder.getContext()), field.size), orig_inst);
         slots.push_back(std::move(slot));
     }
     const auto nslots = slots.size();
@@ -1264,8 +916,8 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             }
         }
         else {
-            addr = builder.CreateBitCast(slot.slot, pass.T_pint8);
-            addr = builder.CreateConstInBoundsGEP1_32(pass.T_int8, addr, offset);
+            addr = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
+            addr = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), addr, offset);
             addr = builder.CreateBitCast(addr, elty->getPointerTo());
         }
         return addr;
@@ -1293,11 +945,7 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 val = newload;
             }
             // TODO: should we use `load->clone()`, or manually copy any other metadata?
-#if JL_LLVM_VERSION >= 100000
             newload->setAlignment(load->getAlign());
-#else
-            newload->setAlignment(load->getAlignment());
-#endif
             // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic
             newload->setOrdering(AtomicOrdering::NotAtomic);
             load->replaceAllUsesWith(val);
@@ -1319,14 +967,14 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
             StoreInst *newstore;
             if (slot.isref) {
                 assert(slot.offset == offset);
+                auto T_pjlvalue = JuliaType::get_pjlvalue_ty(builder.getContext());
                 if (!isa<PointerType>(store_ty)) {
-                    store_val = builder.CreateBitCast(store_val, pass.T_size);
-                    store_val = builder.CreateIntToPtr(store_val, pass.T_pjlvalue);
-                    store_ty = pass.T_pjlvalue;
+                    store_val = builder.CreateBitCast(store_val, getSizeTy(builder.getContext()));
+                    store_val = builder.CreateIntToPtr(store_val, T_pjlvalue);
+                    store_ty = T_pjlvalue;
                 }
                 else {
-                    store_ty = cast<PointerType>(pass.T_pjlvalue)->getElementType()
-                        ->getPointerTo(cast<PointerType>(store_ty)->getAddressSpace());
+                    store_ty = PointerType::getWithSamePointeeType(T_pjlvalue, cast<PointerType>(store_ty)->getAddressSpace());
                     store_val = builder.CreateBitCast(store_val, store_ty);
                 }
                 if (cast<PointerType>(store_ty)->getAddressSpace() != AddressSpace::Tracked)
@@ -1337,16 +985,28 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 newstore = builder.CreateStore(store_val, slot_gep(slot, offset, store_ty, builder));
             }
             // TODO: should we use `store->clone()`, or manually copy any other metadata?
-#if JL_LLVM_VERSION >= 100000
             newstore->setAlignment(store->getAlign());
-#else
-            newstore->setAlignment(store->getAlignment());
-#endif
             // since we're moving heap-to-stack, it is safe to downgrade the atomic level to NotAtomic
             newstore->setOrdering(AtomicOrdering::NotAtomic);
             store->eraseFromParent();
             return;
         }
+        else if (isa<AtomicCmpXchgInst>(user) || isa<AtomicRMWInst>(user)) {
+            auto slot_idx = find_slot(offset);
+            auto &slot = slots[slot_idx];
+            assert(slot.offset <= offset && slot.offset + slot.size >= offset);
+            IRBuilder<> builder(user);
+            Value *newptr;
+            if (slot.isref) {
+                assert(slot.offset == offset);
+                newptr = slot.slot;
+            }
+            else {
+                Value *Val = isa<AtomicCmpXchgInst>(user) ? cast<AtomicCmpXchgInst>(user)->getNewValOperand() : cast<AtomicRMWInst>(user)->getValOperand();
+                newptr = slot_gep(slot, offset, Val->getType(), builder);
+            }
+            *use = newptr;
+        }
         else if (auto call = dyn_cast<CallInst>(user)) {
             auto callee = call->getCalledOperand();
             assert(callee); // makes it clear for clang analyser that `callee` is not NULL
@@ -1372,26 +1032,22 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                                 else {
                                     uint64_t intval;
                                     memset(&intval, val, 8);
-                                    Constant *val = ConstantInt::get(pass.T_size, intval);
-                                    val = ConstantExpr::getIntToPtr(val, pass.T_pjlvalue);
+                                    Constant *val = ConstantInt::get(getSizeTy(builder.getContext()), intval);
+                                    val = ConstantExpr::getIntToPtr(val, JuliaType::get_pjlvalue_ty(builder.getContext()));
                                     ptr = ConstantExpr::getAddrSpaceCast(val, pass.T_prjlvalue);
                                 }
                                 StoreInst *store = builder.CreateAlignedStore(ptr, slot.slot, Align(sizeof(void*)));
                                 store->setOrdering(AtomicOrdering::NotAtomic);
                                 continue;
                             }
-                            auto ptr8 = builder.CreateBitCast(slot.slot, pass.T_pint8);
+                            auto ptr8 = builder.CreateBitCast(slot.slot, Type::getInt8PtrTy(builder.getContext()));
                             if (offset > slot.offset)
-                                ptr8 = builder.CreateConstInBoundsGEP1_32(pass.T_int8, ptr8,
+                                ptr8 = builder.CreateConstInBoundsGEP1_32(Type::getInt8Ty(builder.getContext()), ptr8,
                                                                           offset - slot.offset);
                             auto sub_size = std::min(slot.offset + slot.size, offset + size) -
                                 std::max(offset, slot.offset);
                             // TODO: alignment computation
-#if JL_LLVM_VERSION >= 100000
                             builder.CreateMemSet(ptr8, val_arg, sub_size, MaybeAlign(0));
-#else
-                            builder.CreateMemSet(ptr8, val_arg, sub_size, 0);
-#endif
                         }
                         call->eraseFromParent();
                         return;
@@ -1401,17 +1057,20 @@ void Optimizer::splitOnStack(CallInst *orig_inst)
                 }
             }
             if (pass.typeof_func == callee) {
+                ++RemovedTypeofs;
                 call->replaceAllUsesWith(tag);
                 call->eraseFromParent();
                 return;
             }
-            if (pass.write_barrier_func == callee) {
+            if (pass.write_barrier_func == callee ||
+                pass.write_barrier_binding_func == callee) {
+                ++RemovedWriteBarriers;
                 call->eraseFromParent();
                 return;
             }
             if (pass.gc_preserve_begin_func == callee) {
                 SmallVector<Value*,8> operands;
-                for (auto &arg: call->arg_operands()) {
+                for (auto &arg: call->args()) {
                     if (arg.get() == orig_i || isa<Constant>(arg.get()))
                         continue;
                     operands.push_back(arg.get());
@@ -1504,26 +1163,47 @@ bool AllocOpt::doInitialization(Module &M)
 
     DL = &M.getDataLayout();
 
-    T_int64 = Type::getInt64Ty(getLLVMContext());
-
-    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
-    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
+    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { Type::getInt8PtrTy(M.getContext()) });
+    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { Type::getInt8PtrTy(M.getContext()) });
 
     return true;
 }
 
-bool AllocOpt::runOnFunction(Function &F)
+bool AllocOpt::runOnFunction(Function &F, function_ref<DominatorTree&()> GetDT)
 {
     if (!alloc_obj_func)
         return false;
-    Optimizer optimizer(F, *this);
+    Optimizer optimizer(F, *this, std::move(GetDT));
     optimizer.initialize();
     optimizer.optimizeAll();
-    return optimizer.finalize();
+    bool modified = optimizer.finalize();
+    assert(!verifyFunction(F));
+    return modified;
 }
 
-char AllocOpt::ID = 0;
-static RegisterPass<AllocOpt> X("AllocOpt", "Promote heap allocation to stack",
+struct AllocOptLegacy : public FunctionPass {
+    static char ID;
+    AllocOpt opt;
+    AllocOptLegacy() : FunctionPass(ID) {
+        llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
+    }
+    bool doInitialization(Module &m) override {
+        return opt.doInitialization(m);
+    }
+    bool runOnFunction(Function &F) override {
+        return opt.runOnFunction(F, [this]() -> DominatorTree & {return getAnalysis<DominatorTreeWrapperPass>().getDomTree();});
+    }
+    void getAnalysisUsage(AnalysisUsage &AU) const override
+    {
+        FunctionPass::getAnalysisUsage(AU);
+        AU.addRequired<DominatorTreeWrapperPass>();
+        AU.addPreserved<DominatorTreeWrapperPass>();
+        AU.setPreservesCFG();
+    }
+};
+
+char AllocOptLegacy::ID = 0;
+static RegisterPass<AllocOptLegacy> X("AllocOpt", "Promote heap allocation to stack",
                                 false /* Only looks at CFG */,
                                 false /* Analysis Pass */);
 
@@ -1531,10 +1211,25 @@ static RegisterPass<AllocOpt> X("AllocOpt", "Promote heap allocation to stack",
 
 Pass *createAllocOptPass()
 {
-    return new AllocOpt();
+    return new AllocOptLegacy();
+}
+
+PreservedAnalyses AllocOptPass::run(Function &F, FunctionAnalysisManager &AM) {
+    AllocOpt opt;
+    bool modified = opt.doInitialization(*F.getParent());
+    if (opt.runOnFunction(F, [&]()->DominatorTree &{ return AM.getResult<DominatorTreeAnalysis>(F); })) {
+        modified = true;
+    }
+    if (modified) {
+        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
+        preserved.preserve<DominatorTreeAnalysis>();
+        return preserved;
+    } else {
+        return PreservedAnalyses::all();
+    }
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddAllocOptPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddAllocOptPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createAllocOptPass());
 }
diff --git a/src/llvm-api.cpp b/src/llvm-api.cpp
deleted file mode 100644
index 7f0ed5207c7a37..00000000000000
--- a/src/llvm-api.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-// Extensions of the LLVM C API for LLVM.jl
-//
-// These are part of the Julia repository as they need to be
-// built with the same C++ toolchain Julia & LLVM are built with
-//
-// They are not to be considered a stable API, and will be removed
-// when better package build systems are available
-
-#include "llvm-version.h"
-#include <llvm-c/Core.h>
-#include <llvm-c/Types.h>
-
-#include <llvm/ADT/Triple.h>
-#include <llvm/Analysis/TargetLibraryInfo.h>
-#include <llvm/Analysis/TargetTransformInfo.h>
-#include <llvm/IR/Attributes.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
-#include <llvm/IR/DebugInfo.h>
-#include <llvm/IR/Function.h>
-#include <llvm/IR/GlobalValue.h>
-#include <llvm/IR/Instruction.h>
-#include <llvm/IR/LegacyPassManager.h>
-#include <llvm/IR/Module.h>
-#include <llvm/Support/TargetSelect.h>
-#include <llvm/Transforms/IPO.h>
-#include <llvm/Transforms/Utils/ModuleUtils.h>
-
-#include "julia.h"
-
-using namespace llvm::legacy;
-
-namespace llvm {
-
-
-// Initialization functions
-//
-// The LLVMInitialize* functions and friends are defined `static inline`
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllTargetInfos()
-{
-    InitializeAllTargetInfos();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllTargets()
-{
-    InitializeAllTargets();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllTargetMCs()
-{
-    InitializeAllTargetMCs();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllAsmPrinters()
-{
-    InitializeAllAsmPrinters();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllAsmParsers()
-{
-    InitializeAllAsmParsers();
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraInitializeAllDisassemblers()
-{
-    InitializeAllDisassemblers();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeTarget()
-{
-    return InitializeNativeTarget();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeAsmParser()
-{
-    return InitializeNativeTargetAsmParser();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeAsmPrinter()
-{
-    return InitializeNativeTargetAsmPrinter();
-}
-
-extern "C" JL_DLLEXPORT LLVMBool LLVMExtraInitializeNativeDisassembler()
-{
-    return InitializeNativeTargetDisassembler();
-}
-
-// Exporting the Barrier LLVM pass
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddBarrierNoopPass(LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(createBarrierNoopPass());
-}
-
-// Infrastructure for writing LLVM passes in Julia
-
-typedef struct LLVMOpaquePass *LLVMPassRef;
-DEFINE_STDCXX_CONVERSION_FUNCTIONS(Pass, LLVMPassRef)
-
-extern "C" JL_DLLEXPORT void
-LLVMExtraAddPass(LLVMPassManagerRef PM, LLVMPassRef P)
-{
-    unwrap(PM)->add(unwrap(P));
-}
-
-typedef LLVMBool (*LLVMPassCallback)(void* Ref, void* Data);
-
-StringMap<char *> PassIDs;
-char &CreatePassID(const char *Name)
-{
-    std::string NameStr(Name);
-    if (PassIDs.find(NameStr) != PassIDs.end())
-        return *PassIDs[NameStr];
-    else
-        return *(PassIDs[NameStr] = new char);
-}
-
-class JuliaModulePass : public ModulePass {
-public:
-    JuliaModulePass(const char *Name, LLVMPassCallback Callback, void* Data)
-        : ModulePass(CreatePassID(Name)), Callback(Callback), Data(Data)
-    {
-    }
-
-    bool runOnModule(Module &M)
-    {
-        void *Ref = (void*)wrap(&M);
-        bool Changed = Callback(Ref, Data);
-        return Changed;
-    }
-
-private:
-    LLVMPassCallback Callback;
-    void* Data;
-};
-
-extern "C" JL_DLLEXPORT LLVMPassRef
-LLVMExtraCreateModulePass2(const char *Name, LLVMPassCallback Callback, void *Data)
-{
-    return wrap(new JuliaModulePass(Name, Callback, Data));
-}
-
-class JuliaFunctionPass : public FunctionPass {
-public:
-    JuliaFunctionPass(const char *Name, LLVMPassCallback Callback, void* Data)
-        : FunctionPass(CreatePassID(Name)), Callback(Callback), Data(Data)
-    {
-    }
-
-    bool runOnFunction(Function &Fn)
-    {
-        void *Ref = (void*)wrap(&Fn);
-        bool Changed = Callback(Ref, Data);
-        return Changed;
-    }
-
-private:
-    LLVMPassCallback Callback;
-    void* Data;
-};
-
-extern "C" JL_DLLEXPORT LLVMPassRef
-LLVMExtraCreateFunctionPass2(const char *Name, LLVMPassCallback Callback, void *Data)
-{
-    return wrap(new JuliaFunctionPass(Name, Callback, Data));
-}
-
-
-// Various missing functions
-
-extern "C" JL_DLLEXPORT unsigned int LLVMExtraGetDebugMDVersion()
-{
-    return DEBUG_METADATA_VERSION;
-}
-
-extern "C" JL_DLLEXPORT LLVMContextRef LLVMExtraGetValueContext(LLVMValueRef V)
-{
-    return wrap(&unwrap(V)->getContext());
-}
-
-extern "C" JL_DLLEXPORT void
-LLVMExtraAddTargetLibraryInfoByTiple(const char *T, LLVMPassManagerRef PM)
-{
-    unwrap(PM)->add(new TargetLibraryInfoWrapperPass(Triple(T)));
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddInternalizePassWithExportList(
-        LLVMPassManagerRef PM, const char **ExportList, size_t Length)
-{
-    auto PreserveFobj = [=](const GlobalValue &GV) {
-        for (size_t i = 0; i < Length; i++) {
-            if (strcmp(ExportList[i], GV.getName().data()) == 0)
-                return true;
-        }
-        return false;
-    };
-    unwrap(PM)->add(createInternalizePass(PreserveFobj));
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAppendToUsed(LLVMModuleRef Mod,
-                                                   LLVMValueRef* Values,
-                                                   size_t Count) {
-    SmallVector<GlobalValue *, 1> GlobalValues;
-    for (auto *Value : makeArrayRef(Values, Count))
-        GlobalValues.push_back(cast<GlobalValue>(unwrap(Value)));
-    appendToUsed(*unwrap(Mod), GlobalValues);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAppendToCompilerUsed(LLVMModuleRef Mod,
-                                                           LLVMValueRef* Values,
-                                                           size_t Count) {
-    SmallVector<GlobalValue *, 1> GlobalValues;
-    for (auto *Value : makeArrayRef(Values, Count))
-        GlobalValues.push_back(cast<GlobalValue>(unwrap(Value)));
-    appendToCompilerUsed(*unwrap(Mod), GlobalValues);
-}
-
-extern "C" JL_DLLEXPORT void LLVMExtraAddGenericAnalysisPasses(LLVMPassManagerRef PM) {
-    unwrap(PM)->add(createTargetTransformInfoWrapperPass(TargetIRAnalysis()));
-}
-
-
-// Awaiting D46627
-
-extern "C" JL_DLLEXPORT int LLVMExtraGetSourceLocation(LLVMValueRef V, int index,
-                                                        const char** Name,
-                                                        const char** Filename,
-                                                        unsigned int* Line,
-                                                        unsigned int* Column)
-{
-    if (auto I = dyn_cast<Instruction>(unwrap(V))) {
-        const DILocation* DIL = I->getDebugLoc();
-        if (!DIL)
-            return 0;
-
-        for (int i = index; i > 0; i--) {
-            DIL = DIL->getInlinedAt();
-            if (!DIL)
-                return 0;
-        }
-
-        *Name = DIL->getScope()->getName().data();
-        *Filename = DIL->getScope()->getFilename().data();
-        *Line = DIL->getLine();
-        *Column = DIL->getColumn();
-
-        return 1;
-
-    } else {
-        jl_exceptionf(jl_argumenterror_type, "Can only get source location information of instructions");
-    }
-}
-
-} // namespace llvm
diff --git a/src/llvm-cpufeatures.cpp b/src/llvm-cpufeatures.cpp
new file mode 100644
index 00000000000000..75ac96e1b30605
--- /dev/null
+++ b/src/llvm-cpufeatures.cpp
@@ -0,0 +1,156 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// Lower intrinsics that expose subtarget information to the language. This makes it
+// possible to write code that changes behavior based on, e.g., the availability of
+// specific CPU features.
+//
+// The following intrinsics are supported:
+// - julia.cpu.have_fma.$typ: returns 1 if the platform supports hardware-accelerated FMA.
+//
+// Some of these intrinsics are overloaded, i.e., they are suffixed with a type name.
+// To extend support, make sure codegen (in intrinsics.cpp) knows how to emit them.
+//
+// XXX: can / do we want to make this a codegen pass to enable querying TargetPassConfig
+//      instead of using the global target machine?
+
+#include "llvm-version.h"
+#include "passes.h"
+
+#include <llvm/ADT/Statistic.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/Instructions.h>
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Target/TargetMachine.h>
+#include <llvm/Support/Debug.h>
+
+#include "julia.h"
+#include "jitlayers.h"
+
+#define DEBUG_TYPE "cpufeatures"
+
+using namespace llvm;
+
+STATISTIC(LoweredWithFMA, "Number of have_fma's that were lowered to true");
+STATISTIC(LoweredWithoutFMA, "Number of have_fma's that were lowered to false");
+
+extern JuliaOJIT *jl_ExecutionEngine;
+
+// whether this platform unconditionally (i.e. without needing multiversioning) supports FMA
+Optional<bool> always_have_fma(Function &intr) {
+    auto intr_name = intr.getName();
+    auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+
+#if defined(_CPU_AARCH64_)
+    return typ == "f32" || typ == "f64";
+#else
+    (void)typ;
+    return {};
+#endif
+}
+
+bool have_fma(Function &intr, Function &caller) {
+    auto unconditional = always_have_fma(intr);
+    if (unconditional.hasValue())
+        return unconditional.getValue();
+
+    auto intr_name = intr.getName();
+    auto typ = intr_name.substr(strlen("julia.cpu.have_fma."));
+
+    Attribute FSAttr = caller.getFnAttribute("target-features");
+    StringRef FS =
+        FSAttr.isValid() ? FSAttr.getValueAsString() : jl_ExecutionEngine->getTargetFeatureString();
+
+    SmallVector<StringRef, 6> Features;
+    FS.split(Features, ',');
+    for (StringRef Feature : Features)
+#if defined _CPU_ARM_
+      if (Feature == "+vfp4")
+        return typ == "f32" || typ == "f64";
+      else if (Feature == "+vfp4sp")
+        return typ == "f32";
+#else
+      if (Feature == "+fma" || Feature == "+fma4")
+        return typ == "f32" || typ == "f64";
+#endif
+
+    return false;
+}
+
+void lowerHaveFMA(Function &intr, Function &caller, CallInst *I) {
+    if (have_fma(intr, caller)) {
+        ++LoweredWithFMA;
+        I->replaceAllUsesWith(ConstantInt::get(I->getType(), 1));
+    } else {
+        ++LoweredWithoutFMA;
+        I->replaceAllUsesWith(ConstantInt::get(I->getType(), 0));
+    }
+    return;
+}
+
+bool lowerCPUFeatures(Module &M)
+{
+    SmallVector<Instruction*,6> Materialized;
+
+    for (auto &F: M.functions()) {
+        auto FN = F.getName();
+
+        if (FN.startswith("julia.cpu.have_fma.")) {
+            for (Use &U: F.uses()) {
+                User *RU = U.getUser();
+                CallInst *I = cast<CallInst>(RU);
+                lowerHaveFMA(F, *I->getParent()->getParent(), I);
+                Materialized.push_back(I);
+            }
+        }
+    }
+
+    if (!Materialized.empty()) {
+        for (auto I: Materialized) {
+            I->eraseFromParent();
+        }
+        assert(!verifyModule(M));
+        return true;
+    } else {
+        return false;
+    }
+}
+
+PreservedAnalyses CPUFeatures::run(Module &M, ModuleAnalysisManager &AM)
+{
+    if (lowerCPUFeatures(M)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    }
+    return PreservedAnalyses::all();
+}
+
+namespace {
+struct CPUFeaturesLegacy : public ModulePass {
+    static char ID;
+    CPUFeaturesLegacy() : ModulePass(ID) {};
+
+    bool runOnModule(Module &M)
+    {
+        return lowerCPUFeatures(M);
+    }
+};
+
+char CPUFeaturesLegacy::ID = 0;
+static RegisterPass<CPUFeaturesLegacy>
+        Y("CPUFeatures",
+          "Lower calls to CPU feature testing intrinsics.",
+          false,
+          false);
+}
+
+Pass *createCPUFeaturesPass()
+{
+    return new CPUFeaturesLegacy();
+}
+
+extern "C" JL_DLLEXPORT void LLVMExtraAddCPUFeaturesPass_impl(LLVMPassManagerRef PM)
+{
+    unwrap(PM)->add(createCPUFeaturesPass());
+}
diff --git a/src/llvm-demote-float16.cpp b/src/llvm-demote-float16.cpp
new file mode 100644
index 00000000000000..300be27cf90793
--- /dev/null
+++ b/src/llvm-demote-float16.cpp
@@ -0,0 +1,195 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// This pass finds floating-point operations on 16-bit (half precision) values, and replaces
+// them by equivalent operations on 32-bit (single precision) values surrounded by a fpext
+// and fptrunc. This ensures that the exact semantics of IEEE floating-point are preserved.
+//
+// Without this pass, back-ends that do not natively support half-precision (e.g. x86_64)
+// similarly pattern-match half-precision operations with single-precision equivalents, but
+// without truncating after every operation. Doing so breaks floating-point operations that
+// assume precise semantics, such as Dekker arithmetic (as used in twiceprecision.jl).
+//
+// This pass is intended to run late in the pipeline, and should not be followed by
+// instcombine. A run of GVN is recommended to clean-up identical conversions.
+
+#include "llvm-version.h"
+
+#define DEBUG_TYPE "demote_float16"
+
+#include "support/dtypes.h"
+#include "passes.h"
+
+#include <llvm/Pass.h>
+#include <llvm/ADT/Statistic.h>
+#include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassManager.h>
+#include <llvm/IR/Module.h>
+#include <llvm/IR/Verifier.h>
+#include <llvm/Support/Debug.h>
+
+using namespace llvm;
+
+STATISTIC(TotalChanged, "Total number of instructions changed");
+STATISTIC(TotalExt, "Total number of FPExt instructions inserted");
+STATISTIC(TotalTrunc, "Total number of FPTrunc instructions inserted");
+#define INST_STATISTIC(Opcode) STATISTIC(Opcode##Changed, "Number of " #Opcode " instructions changed")
+INST_STATISTIC(FNeg);
+INST_STATISTIC(FAdd);
+INST_STATISTIC(FSub);
+INST_STATISTIC(FMul);
+INST_STATISTIC(FDiv);
+INST_STATISTIC(FRem);
+INST_STATISTIC(FCmp);
+#undef INST_STATISTIC
+
+namespace {
+
+static bool demoteFloat16(Function &F)
+{
+    auto &ctx = F.getContext();
+    auto T_float16 = Type::getHalfTy(ctx);
+    auto T_float32 = Type::getFloatTy(ctx);
+
+    SmallVector<Instruction *, 0> erase;
+    for (auto &BB : F) {
+        for (auto &I : BB) {
+            switch (I.getOpcode()) {
+            case Instruction::FNeg:
+            case Instruction::FAdd:
+            case Instruction::FSub:
+            case Instruction::FMul:
+            case Instruction::FDiv:
+            case Instruction::FRem:
+            case Instruction::FCmp:
+                break;
+            default:
+                continue;
+            }
+
+            // skip @fastmath operations
+            // TODO: more fine-grained check (afn?)
+            if (I.isFast())
+                continue;
+
+            IRBuilder<> builder(&I);
+
+            // extend Float16 operands to Float32
+            bool OperandsChanged = false;
+            SmallVector<Value *, 2> Operands(I.getNumOperands());
+            for (size_t i = 0; i < I.getNumOperands(); i++) {
+                Value *Op = I.getOperand(i);
+                if (Op->getType() == T_float16) {
+                    ++TotalExt;
+                    Op = builder.CreateFPExt(Op, T_float32);
+                    OperandsChanged = true;
+                }
+                Operands[i] = (Op);
+            }
+
+            // recreate the instruction if any operands changed,
+            // truncating the result back to Float16
+            if (OperandsChanged) {
+                Value *NewI;
+                ++TotalChanged;
+                switch (I.getOpcode()) {
+                case Instruction::FNeg:
+                    assert(Operands.size() == 1);
+                    ++FNegChanged;
+                    NewI = builder.CreateFNeg(Operands[0]);
+                    break;
+                case Instruction::FAdd:
+                    assert(Operands.size() == 2);
+                    ++FAddChanged;
+                    NewI = builder.CreateFAdd(Operands[0], Operands[1]);
+                    break;
+                case Instruction::FSub:
+                    assert(Operands.size() == 2);
+                    ++FSubChanged;
+                    NewI = builder.CreateFSub(Operands[0], Operands[1]);
+                    break;
+                case Instruction::FMul:
+                    assert(Operands.size() == 2);
+                    ++FMulChanged;
+                    NewI = builder.CreateFMul(Operands[0], Operands[1]);
+                    break;
+                case Instruction::FDiv:
+                    assert(Operands.size() == 2);
+                    ++FDivChanged;
+                    NewI = builder.CreateFDiv(Operands[0], Operands[1]);
+                    break;
+                case Instruction::FRem:
+                    assert(Operands.size() == 2);
+                    ++FRemChanged;
+                    NewI = builder.CreateFRem(Operands[0], Operands[1]);
+                    break;
+                case Instruction::FCmp:
+                    assert(Operands.size() == 2);
+                    ++FCmpChanged;
+                    NewI = builder.CreateFCmp(cast<FCmpInst>(&I)->getPredicate(),
+                                              Operands[0], Operands[1]);
+                    break;
+                default:
+                    abort();
+                }
+                cast<Instruction>(NewI)->copyMetadata(I);
+                cast<Instruction>(NewI)->copyFastMathFlags(&I);
+                if (NewI->getType() != I.getType()) {
+                    ++TotalTrunc;
+                    NewI = builder.CreateFPTrunc(NewI, I.getType());
+                }
+                I.replaceAllUsesWith(NewI);
+                erase.push_back(&I);
+            }
+        }
+    }
+
+    if (erase.size() > 0) {
+        for (auto V : erase)
+            V->eraseFromParent();
+        assert(!verifyFunction(F));
+        return true;
+    }
+    else
+        return false;
+}
+
+} // end anonymous namespace
+
+PreservedAnalyses DemoteFloat16::run(Function &F, FunctionAnalysisManager &AM)
+{
+    if (demoteFloat16(F)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    }
+    return PreservedAnalyses::all();
+}
+
+namespace {
+
+struct DemoteFloat16Legacy : public FunctionPass {
+    static char ID;
+    DemoteFloat16Legacy() : FunctionPass(ID){};
+
+private:
+    bool runOnFunction(Function &F) override {
+        return demoteFloat16(F);
+    }
+};
+
+char DemoteFloat16Legacy::ID = 0;
+static RegisterPass<DemoteFloat16Legacy>
+        Y("DemoteFloat16",
+          "Demote Float16 operations to Float32 equivalents.",
+          false,
+          false);
+} // end anonymous namespac
+
+Pass *createDemoteFloat16Pass()
+{
+    return new DemoteFloat16Legacy();
+}
+
+extern "C" JL_DLLEXPORT void LLVMExtraAddDemoteFloat16Pass_impl(LLVMPassManagerRef PM)
+{
+    unwrap(PM)->add(createDemoteFloat16Pass());
+}
diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp
index e11df11dcc9762..b542d478fc68c1 100644
--- a/src/llvm-final-gc-lowering.cpp
+++ b/src/llvm-final-gc-lowering.cpp
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
+#include "passes.h"
 
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
@@ -28,20 +29,18 @@ using namespace llvm;
 // This pass targets typical back-ends for which the standard Julia
 // runtime library is available. Atypical back-ends should supply
 // their own lowering pass.
-struct FinalLowerGC: public FunctionPass, private JuliaPassContext {
-    static char ID;
-    FinalLowerGC() : FunctionPass(ID)
-    { }
+
+struct FinalLowerGC: private JuliaPassContext {
+    bool runOnFunction(Function &F);
+    bool doInitialization(Module &M);
+    bool doFinalization(Module &M);
 
 private:
     Function *queueRootFunc;
+    Function *queueBindingFunc;
     Function *poolAllocFunc;
     Function *bigAllocFunc;
-    CallInst *ptlsStates;
-
-    bool doInitialization(Module &M) override;
-    bool doFinalization(Module &M) override;
-    bool runOnFunction(Function &F) override;
+    Instruction *pgcstack;
 
     // Lowers a `julia.new_gc_frame` intrinsic.
     Value *lowerNewGCFrame(CallInst *target, Function &F);
@@ -61,19 +60,20 @@ struct FinalLowerGC: public FunctionPass, private JuliaPassContext {
     // Lowers a `julia.queue_gc_root` intrinsic.
     Value *lowerQueueGCRoot(CallInst *target, Function &F);
 
-    Instruction *getPgcstack(Instruction *ptlsStates);
+    // Lowers a `julia.queue_gc_binding` intrinsic.
+    Value *lowerQueueGCBinding(CallInst *target, Function &F);
 };
 
 Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 {
-    assert(target->getNumArgOperands() == 1);
+    assert(target->arg_size() == 1);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(0))->getLimitedValue(INT_MAX);
 
     // Create the GC frame.
     AllocaInst *gcframe = new AllocaInst(
         T_prjlvalue,
         0,
-        ConstantInt::get(T_int32, nRoots + 2),
+        ConstantInt::get(Type::getInt32Ty(F.getContext()), nRoots + 2),
         Align(16));
     gcframe->insertAfter(target);
     gcframe->takeName(target);
@@ -81,12 +81,12 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
     // Zero out the GC frame.
     BitCastInst *tempSlot_i8 = new BitCastInst(gcframe, Type::getInt8PtrTy(F.getContext()), "");
     tempSlot_i8->insertAfter(gcframe);
-    Type *argsT[2] = {tempSlot_i8->getType(), T_int32};
+    Type *argsT[2] = {tempSlot_i8->getType(), Type::getInt32Ty(F.getContext())};
     Function *memset = Intrinsic::getDeclaration(F.getParent(), Intrinsic::memset, makeArrayRef(argsT));
     Value *args[4] = {
         tempSlot_i8, // dest
         ConstantInt::get(Type::getInt8Ty(F.getContext()), 0), // val
-        ConstantInt::get(T_int32, sizeof(jl_value_t*) * (nRoots + 2)), // len
+        ConstantInt::get(Type::getInt32Ty(F.getContext()), sizeof(jl_value_t*) * (nRoots + 2)), // len
         ConstantInt::get(Type::getInt1Ty(F.getContext()), 0)}; // volatile
     CallInst *zeroing = CallInst::Create(memset, makeArrayRef(args));
     cast<MemSetInst>(zeroing)->setDestAlignment(16);
@@ -98,22 +98,22 @@ Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F)
 
 void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
 {
-    assert(target->getNumArgOperands() == 2);
+    assert(target->arg_size() == 2);
     auto gcframe = target->getArgOperand(0);
     unsigned nRoots = cast<ConstantInt>(target->getArgOperand(1))->getLimitedValue(INT_MAX);
 
     IRBuilder<> builder(target->getContext());
     builder.SetInsertPoint(&*(++BasicBlock::iterator(target)));
     StoreInst *inst = builder.CreateAlignedStore(
-                ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)),
+                ConstantInt::get(getSizeTy(F.getContext()), JL_GC_ENCODE_PUSHARGS(nRoots)),
                 builder.CreateBitCast(
                         builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0),
-                        T_size->getPointerTo()),
+                        getSizeTy(F.getContext())->getPointerTo()),
                 Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
-    Value *pgcstack = builder.Insert(getPgcstack(ptlsStates));
+    auto T_ppjlvalue = JuliaType::get_ppjlvalue_ty(F.getContext());
     inst = builder.CreateAlignedStore(
-            builder.CreateAlignedLoad(pgcstack, Align(sizeof(void*))),
+            builder.CreateAlignedLoad(T_ppjlvalue, pgcstack, Align(sizeof(void*))),
             builder.CreatePointerCast(
                     builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1),
                     PointerType::get(T_ppjlvalue, 0)),
@@ -127,19 +127,18 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F)
 
 void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
 {
-    assert(target->getNumArgOperands() == 1);
+    assert(target->arg_size() == 1);
     auto gcframe = target->getArgOperand(0);
 
     IRBuilder<> builder(target->getContext());
     builder.SetInsertPoint(target);
     Instruction *gcpop =
         cast<Instruction>(builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 1));
-    Instruction *inst = builder.CreateAlignedLoad(gcpop, Align(sizeof(void*)));
+    Instruction *inst = builder.CreateAlignedLoad(T_prjlvalue, gcpop, Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
     inst = builder.CreateAlignedStore(
         inst,
-        builder.CreateBitCast(
-            builder.Insert(getPgcstack(ptlsStates)),
+        builder.CreateBitCast(pgcstack,
             PointerType::get(T_prjlvalue, 0)),
         Align(sizeof(void*)));
     inst->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
@@ -147,7 +146,7 @@ void FinalLowerGC::lowerPopGCFrame(CallInst *target, Function &F)
 
 Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
 {
-    assert(target->getNumArgOperands() == 2);
+    assert(target->arg_size() == 2);
     auto gcframe = target->getArgOperand(0);
     auto index = target->getArgOperand(1);
 
@@ -156,7 +155,7 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
     builder.SetInsertPoint(target);
 
     // The first two slots are reserved, so we'll add two to the index.
-    index = builder.CreateAdd(index, ConstantInt::get(T_int32, 2));
+    index = builder.CreateAdd(index, ConstantInt::get(Type::getInt32Ty(F.getContext()), 2));
 
     // Lower the intrinsic as a GEP.
     auto gep = builder.CreateInBoundsGEP(T_prjlvalue, gcframe, index);
@@ -166,24 +165,21 @@ Value *FinalLowerGC::lowerGetGCFrameSlot(CallInst *target, Function &F)
 
 Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F)
 {
-    assert(target->getNumArgOperands() == 1);
+    assert(target->arg_size() == 1);
     target->setCalledFunction(queueRootFunc);
     return target;
 }
 
-Instruction *FinalLowerGC::getPgcstack(Instruction *ptlsStates)
+Value *FinalLowerGC::lowerQueueGCBinding(CallInst *target, Function &F)
 {
-    Constant *offset = ConstantInt::getSigned(T_int32, offsetof(jl_tls_states_t, pgcstack) / sizeof(void*));
-    return GetElementPtrInst::CreateInBounds(
-        T_ppjlvalue,
-        ptlsStates,
-        ArrayRef<Value*>(offset),
-        "jl_pgcstack");
+    assert(target->arg_size() == 1);
+    target->setCalledFunction(queueBindingFunc);
+    return target;
 }
 
 Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
 {
-    assert(target->getNumArgOperands() == 2);
+    assert(target->arg_size() == 2);
     auto sz = (size_t)cast<ConstantInt>(target->getArgOperand(1))->getZExtValue();
     // This is strongly architecture and OS dependent
     int osize;
@@ -195,11 +191,11 @@ Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F)
     if (offset < 0) {
         newI = builder.CreateCall(
             bigAllocFunc,
-            { ptls, ConstantInt::get(T_size, sz + sizeof(void*)) });
+            { ptls, ConstantInt::get(getSizeTy(F.getContext()), sz + sizeof(void*)) });
     }
     else {
-        auto pool_offs = ConstantInt::get(T_int32, offset);
-        auto pool_osize = ConstantInt::get(T_int32, osize);
+        auto pool_offs = ConstantInt::get(Type::getInt32Ty(F.getContext()), offset);
+        auto pool_osize = ConstantInt::get(Type::getInt32Ty(F.getContext()), osize);
         newI = builder.CreateCall(poolAllocFunc, { ptls, pool_offs, pool_osize });
     }
     newI->setAttributes(newI->getCalledFunction()->getAttributes());
@@ -213,10 +209,11 @@ bool FinalLowerGC::doInitialization(Module &M) {
 
     // Initialize platform-specific references.
     queueRootFunc = getOrDeclare(jl_well_known::GCQueueRoot);
+    queueBindingFunc = getOrDeclare(jl_well_known::GCQueueBinding);
     poolAllocFunc = getOrDeclare(jl_well_known::GCPoolAlloc);
     bigAllocFunc = getOrDeclare(jl_well_known::GCBigAlloc);
 
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
+    GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc};
     unsigned j = 0;
     for (unsigned i = 0; i < sizeof(functionList) / sizeof(void*); i++) {
         if (!functionList[i])
@@ -232,16 +229,17 @@ bool FinalLowerGC::doInitialization(Module &M) {
 
 bool FinalLowerGC::doFinalization(Module &M)
 {
+    GlobalValue *functionList[] = {queueRootFunc, queueBindingFunc, poolAllocFunc, bigAllocFunc};
+    queueRootFunc = queueBindingFunc = poolAllocFunc = bigAllocFunc = nullptr;
     auto used = M.getGlobalVariable("llvm.compiler.used");
     if (!used)
         return false;
-    GlobalValue *functionList[] = {queueRootFunc, poolAllocFunc, bigAllocFunc};
     SmallPtrSet<Constant*, 16> InitAsSet(
         functionList,
         functionList + sizeof(functionList) / sizeof(void*));
     bool changed = false;
     SmallVector<Constant*, 16> init;
-    ConstantArray *CA = dyn_cast<ConstantArray>(used->getInitializer());
+    ConstantArray *CA = cast<ConstantArray>(used->getInitializer());
     for (auto &Op : CA->operands()) {
         Constant *C = cast_or_null<Constant>(Op);
         if (InitAsSet.count(C->stripPointerCasts())) {
@@ -255,7 +253,7 @@ bool FinalLowerGC::doFinalization(Module &M)
     used->eraseFromParent();
     if (init.empty())
         return true;
-    ArrayType *ATy = ArrayType::get(T_pint8, init.size());
+    ArrayType *ATy = ArrayType::get(Type::getInt8PtrTy(M.getContext()), init.size());
     used = new GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
                                     ConstantArray::get(ATy, init), "llvm.compiler.used");
     used->setSection("llvm.metadata");
@@ -282,13 +280,13 @@ bool FinalLowerGC::runOnFunction(Function &F)
     LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n");
     // Check availability of functions again since they might have been deleted.
     initFunctions(*F.getParent());
-    if (!ptls_getter)
-        return true;
+    if (!pgcstack_getter)
+        return false;
 
-    // Look for a call to 'julia.ptls_states'.
-    ptlsStates = getPtls(F);
-    if (!ptlsStates)
-        return true;
+    // Look for a call to 'julia.get_pgcstack'.
+    pgcstack = getPGCstack(F);
+    if (!pgcstack)
+        return false;
 
     // Acquire intrinsic functions.
     auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame);
@@ -297,6 +295,7 @@ bool FinalLowerGC::runOnFunction(Function &F)
     auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot);
     auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes);
     auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot);
+    auto queueGCBindingFunc = getOrNull(jl_intrinsics::queueGCBinding);
 
     // Lower all calls to supported intrinsics.
     for (BasicBlock &BB : F) {
@@ -329,6 +328,9 @@ bool FinalLowerGC::runOnFunction(Function &F)
             else if (callee == queueGCRootFunc) {
                 replaceInstruction(CI, lowerQueueGCRoot(CI, F), it);
             }
+            else if (callee == queueGCBindingFunc) {
+                replaceInstruction(CI, lowerQueueGCBinding(CI, F), it);
+            }
             else {
                 ++it;
             }
@@ -338,15 +340,62 @@ bool FinalLowerGC::runOnFunction(Function &F)
     return true;
 }
 
-char FinalLowerGC::ID = 0;
-static RegisterPass<FinalLowerGC> X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false);
+struct FinalLowerGCLegacy: public FunctionPass {
+    static char ID;
+    FinalLowerGCLegacy() : FunctionPass(ID), finalLowerGC(FinalLowerGC()) {}
+
+protected:
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+        FunctionPass::getAnalysisUsage(AU);
+    }
+
+private:
+    bool runOnFunction(Function &F) override;
+    bool doInitialization(Module &M) override;
+    bool doFinalization(Module &M) override;
+
+    FinalLowerGC finalLowerGC;
+};
+
+bool FinalLowerGCLegacy::runOnFunction(Function &F) {
+    return finalLowerGC.runOnFunction(F);
+}
+
+bool FinalLowerGCLegacy::doInitialization(Module &M) {
+    return finalLowerGC.doInitialization(M);
+}
+
+bool FinalLowerGCLegacy::doFinalization(Module &M) {
+    return finalLowerGC.doFinalization(M);
+}
+
+
+PreservedAnalyses FinalLowerGCPass::run(Module &M, ModuleAnalysisManager &AM)
+{
+    auto finalLowerGC = FinalLowerGC();
+    bool modified = false;
+    modified |= finalLowerGC.doInitialization(M);
+    for (auto &F : M.functions()) {
+        if (F.isDeclaration())
+            continue;
+        modified |= finalLowerGC.runOnFunction(F);
+    }
+    modified |= finalLowerGC.doFinalization(M);
+    if (modified) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    }
+    return PreservedAnalyses::all();
+}
+
+char FinalLowerGCLegacy::ID = 0;
+static RegisterPass<FinalLowerGCLegacy> X("FinalLowerGC", "Final GC intrinsic lowering pass", false, false);
 
 Pass *createFinalLowerGCPass()
 {
-    return new FinalLowerGC();
+    return new FinalLowerGCLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddFinalLowerGCPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createFinalLowerGCPass());
 }
diff --git a/src/llvm-gc-invariant-verifier.cpp b/src/llvm-gc-invariant-verifier.cpp
index c1386efef8e726..0c6c7e27f50cf9 100644
--- a/src/llvm-gc-invariant-verifier.cpp
+++ b/src/llvm-gc-invariant-verifier.cpp
@@ -4,6 +4,7 @@
 // See the devdocs for a description of these invariants.
 
 #include "llvm-version.h"
+#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
@@ -19,9 +20,6 @@
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/InstVisitor.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
 #include <llvm/IR/Module.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Verifier.h>
@@ -36,11 +34,10 @@
 
 using namespace llvm;
 
-struct GCInvariantVerifier : public FunctionPass, public InstVisitor<GCInvariantVerifier> {
-    static char ID;
+struct GCInvariantVerifier : public InstVisitor<GCInvariantVerifier> {
     bool Broken = false;
     bool Strong;
-    GCInvariantVerifier(bool Strong = false) : FunctionPass(ID), Strong(Strong) {}
+    GCInvariantVerifier(bool Strong = false) : Strong(Strong) {}
 
 private:
     void Check(bool Cond, const char *message, Value *Val) {
@@ -51,20 +48,18 @@ struct GCInvariantVerifier : public FunctionPass, public InstVisitor<GCInvariant
     }
 
 public:
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-        FunctionPass::getAnalysisUsage(AU);
-        AU.setPreservesAll();
-    }
-
-    bool runOnFunction(Function &F) override;
     void visitAddrSpaceCastInst(AddrSpaceCastInst &I);
-    void visitStoreInst(StoreInst &SI);
     void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI);
+    void visitAtomicRMWInst(AtomicRMWInst &SI);
     void visitReturnInst(ReturnInst &RI);
     void visitGetElementPtrInst(GetElementPtrInst &GEP);
     void visitIntToPtrInst(IntToPtrInst &IPI);
     void visitPtrToIntInst(PtrToIntInst &PII);
     void visitCallInst(CallInst &CI);
+
+    void checkStoreInst(Type *VTy, unsigned AS, Value &SI);
 };
 
 void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
@@ -83,8 +78,7 @@ void GCInvariantVerifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) {
           "Illegal address space cast from decayed ptr", &I);
 }
 
-void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
-    Type *VTy = SI.getValueOperand()->getType();
+void GCInvariantVerifier::checkStoreInst(Type *VTy, unsigned AS, Value &SI) {
     if (VTy->isPointerTy()) {
         /* We currently don't obey this for arguments. That's ok - they're
            externally rooted. */
@@ -93,12 +87,23 @@ void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
               AS != AddressSpace::Derived,
               "Illegal store of decayed value", &SI);
     }
-    VTy = SI.getPointerOperand()->getType();
-    if (VTy->isPointerTy()) {
-        unsigned AS = cast<PointerType>(VTy)->getAddressSpace();
-        Check(AS != AddressSpace::CalleeRooted,
-              "Illegal store to callee rooted value", &SI);
-    }
+    Check(AS != AddressSpace::CalleeRooted,
+          "Illegal store to callee rooted value", &SI);
+}
+
+void GCInvariantVerifier::visitStoreInst(StoreInst &SI) {
+    Type *VTy = SI.getValueOperand()->getType();
+    checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
+}
+
+void GCInvariantVerifier::visitAtomicRMWInst(AtomicRMWInst &SI) {
+    Type *VTy = SI.getValOperand()->getType();
+    checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
+}
+
+void GCInvariantVerifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
+    Type *VTy = SI.getNewValOperand()->getType();
+    checkStoreInst(VTy, SI.getPointerAddressSpace(), SI);
 }
 
 void GCInvariantVerifier::visitLoadInst(LoadInst &LI) {
@@ -157,7 +162,7 @@ void GCInvariantVerifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
 void GCInvariantVerifier::visitCallInst(CallInst &CI) {
     CallingConv::ID CC = CI.getCallingConv();
     if (CC == JLCALL_F_CC || CC == JLCALL_F2_CC) {
-        for (Value *Arg : CI.arg_operands()) {
+        for (Value *Arg : CI.args()) {
             Type *Ty = Arg->getType();
             Check(Ty->isPointerTy() && cast<PointerType>(Ty)->getAddressSpace() == AddressSpace::Tracked,
                 "Invalid derived pointer in jlcall", &CI);
@@ -177,22 +182,44 @@ void GCInvariantVerifier::visitPtrToIntInst(PtrToIntInst &PII) {
           "Illegal inttoptr", &PII);
 }
 
-bool GCInvariantVerifier::runOnFunction(Function &F) {
-    visit(F);
-    if (Broken) {
+PreservedAnalyses GCInvariantVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
+    GCInvariantVerifier GIV(Strong);
+    GIV.visit(F);
+    if (GIV.Broken) {
         abort();
     }
-    return false;
+    return PreservedAnalyses::all();
 }
 
-char GCInvariantVerifier::ID = 0;
-static RegisterPass<GCInvariantVerifier> X("GCInvariantVerifier", "GC Invariant Verification Pass", false, false);
+struct GCInvariantVerifierLegacy : public FunctionPass {
+    static char ID;
+    bool Strong;
+    GCInvariantVerifierLegacy(bool Strong=false) : FunctionPass(ID), Strong(Strong) {}
+
+public:
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+        FunctionPass::getAnalysisUsage(AU);
+        AU.setPreservesAll();
+    }
+
+    bool runOnFunction(Function &F) override {
+        GCInvariantVerifier GIV(Strong);
+        GIV.visit(F);
+        if (GIV.Broken) {
+            abort();
+        }
+        return false;
+    }
+};
+
+char GCInvariantVerifierLegacy::ID = 0;
+static RegisterPass<GCInvariantVerifierLegacy> X("GCInvariantVerifier", "GC Invariant Verification Pass", false, false);
 
 Pass *createGCInvariantVerifierPass(bool Strong) {
-    return new GCInvariantVerifier(Strong);
+    return new GCInvariantVerifierLegacy(Strong);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass(LLVMPassManagerRef PM, LLVMBool Strong)
+extern "C" JL_DLLEXPORT void LLVMExtraAddGCInvariantVerifierPass_impl(LLVMPassManagerRef PM, LLVMBool Strong)
 {
     unwrap(PM)->add(createGCInvariantVerifierPass(Strong));
 }
diff --git a/src/llvm-julia-licm.cpp b/src/llvm-julia-licm.cpp
index 3242f68df3697b..c74a12b3bca611 100644
--- a/src/llvm-julia-licm.cpp
+++ b/src/llvm-julia-licm.cpp
@@ -1,20 +1,33 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
+#include "passes.h"
 
+#include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/LoopInfo.h>
 #include <llvm/Analysis/LoopPass.h>
 #include "llvm/Analysis/LoopIterator.h"
 #include <llvm/IR/Dominators.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/Utils/LoopUtils.h>
+#include <llvm/Analysis/ValueTracking.h>
 
 #include "llvm-pass-helpers.h"
+#include "julia.h"
+#include "llvm-alloc-helpers.h"
+#include "codegen_shared.h"
 
 #define DEBUG_TYPE "julia-licm"
 
 using namespace llvm;
 
+STATISTIC(HoistedPreserveBegin, "Number of gc_preserve_begin instructions hoisted out of a loop");
+STATISTIC(SunkPreserveEnd, "Number of gc_preserve_end instructions sunk out of a loop");
+STATISTIC(ErasedPreserveEnd, "Number of gc_preserve_end instructions removed from nonterminating loops");
+STATISTIC(HoistedWriteBarrier, "Number of write barriers hoisted out of a loop");
+STATISTIC(HoistedAllocation, "Number of allocations hoisted out of a loop");
+
 /*
  * Julia LICM pass.
  * This takes care of some julia intrinsics that is safe to move around/out of loops but
@@ -24,11 +37,25 @@ using namespace llvm;
 
 namespace {
 
-struct JuliaLICMPass : public LoopPass, public JuliaPassContext {
+struct JuliaLICMPassLegacy : public LoopPass {
     static char ID;
-    JuliaLICMPass() : LoopPass(ID) {};
+    JuliaLICMPassLegacy() : LoopPass(ID) {};
+
+    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
+
+    protected:
+        void getAnalysisUsage(AnalysisUsage &AU) const override {
+            getLoopAnalysisUsage(AU);
+        }
+};
+
+struct JuliaLICM : public JuliaPassContext {
+    function_ref<DominatorTree &()> GetDT;
+    function_ref<LoopInfo &()> GetLI;
+    JuliaLICM(function_ref<DominatorTree &()> GetDT,
+              function_ref<LoopInfo &()> GetLI) : GetDT(GetDT), GetLI(GetLI) {}
 
-    bool runOnLoop(Loop *L, LPPassManager &LPM) override
+    bool runOnLoop(Loop *L)
     {
         // Get the preheader block to move instructions into,
         // required to run this pass.
@@ -36,14 +63,17 @@ struct JuliaLICMPass : public LoopPass, public JuliaPassContext {
         if (!preheader)
             return false;
         BasicBlock *header = L->getHeader();
+        const llvm::DataLayout &DL = header->getModule()->getDataLayout();
         initFunctions(*header->getModule());
         // Also require `gc_preserve_begin_func` whereas
         // `gc_preserve_end_func` is optional since the input to
         // `gc_preserve_end_func` must be from `gc_preserve_begin_func`.
-        if (!gc_preserve_begin_func)
+        // We also hoist write barriers here, so we don't exit if write_barrier_func exists
+        if (!gc_preserve_begin_func && !write_barrier_func && !write_barrier_binding_func &&
+            !alloc_obj_func)
             return false;
-        auto LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-        auto DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+        auto LI = &GetLI();
+        auto DT = &GetDT();
 
         // Lazy initialization of exit blocks insertion points.
         bool exit_pts_init = false;
@@ -80,7 +110,7 @@ struct JuliaLICMPass : public LoopPass, public JuliaPassContext {
                 // corresponding `end` can be moved to the loop exit.
                 if (callee == gc_preserve_begin_func) {
                     bool canhoist = true;
-                    for (Use &U : call->arg_operands()) {
+                    for (Use &U : call->args()) {
                         // Check if all arguments are generated outside the loop
                         auto origin = dyn_cast<Instruction>(U.get());
                         if (!origin)
@@ -92,6 +122,7 @@ struct JuliaLICMPass : public LoopPass, public JuliaPassContext {
                     }
                     if (!canhoist)
                         continue;
+                    ++HoistedPreserveBegin;
                     call->moveBefore(preheader->getTerminator());
                     changed = true;
                 }
@@ -102,33 +133,107 @@ struct JuliaLICMPass : public LoopPass, public JuliaPassContext {
                     changed = true;
                     auto exit_pts = get_exit_pts();
                     if (exit_pts.empty()) {
+                        ++ErasedPreserveEnd;
                         call->eraseFromParent();
                         continue;
                     }
+                    ++SunkPreserveEnd;
                     call->moveBefore(exit_pts[0]);
                     for (unsigned i = 1; i < exit_pts.size(); i++) {
                         // Clone exit
                         CallInst::Create(call, {}, exit_pts[i]);
                     }
                 }
+                else if (callee == write_barrier_func ||
+                         callee == write_barrier_binding_func) {
+                    bool valid = true;
+                    for (std::size_t i = 0; i < call->arg_size(); i++) {
+                        if (!L->makeLoopInvariant(call->getArgOperand(i), changed)) {
+                            valid = false;
+                            break;
+                        }
+                    }
+                    if (valid) {
+                        ++HoistedWriteBarrier;
+                        call->moveBefore(preheader->getTerminator());
+                        changed = true;
+                    }
+                }
+                else if (callee == alloc_obj_func) {
+                    jl_alloc::AllocUseInfo use_info;
+                    jl_alloc::CheckInst::Stack check_stack;
+                    jl_alloc::EscapeAnalysisRequiredArgs required{use_info, check_stack, *this, DL};
+                    jl_alloc::runEscapeAnalysis(call, required, jl_alloc::EscapeAnalysisOptionalArgs().with_valid_set(&L->getBlocksSet()));
+                    if (use_info.escaped || use_info.addrescaped) {
+                        continue;
+                    }
+                    bool valid = true;
+                    for (std::size_t i = 0; i < call->arg_size(); i++) {
+                        if (!L->makeLoopInvariant(call->getArgOperand(i), changed)) {
+                            valid = false;
+                            break;
+                        }
+                    }
+                    if (use_info.refstore) {
+                        // We need to add write barriers to any stores
+                        // that may start crossing generations
+                        continue;
+                    }
+                    if (valid) {
+                        ++HoistedAllocation;
+                        call->moveBefore(preheader->getTerminator());
+                        changed = true;
+                    }
+                }
             }
         }
+        assert(!verifyFunction(*L->getHeader()->getParent()));
         return changed;
     }
-
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        getLoopAnalysisUsage(AU);
-    }
 };
 
-char JuliaLICMPass::ID = 0;
-static RegisterPass<JuliaLICMPass>
+bool JuliaLICMPassLegacy::runOnLoop(Loop *L, LPPassManager &LPM) {
+    auto GetDT = [this]() -> DominatorTree & {
+        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    };
+    auto GetLI = [this]() -> LoopInfo & {
+        return getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+    };
+    auto juliaLICM = JuliaLICM(GetDT, GetLI);
+    return juliaLICM.runOnLoop(L);
+}
+
+char JuliaLICMPassLegacy::ID = 0;
+static RegisterPass<JuliaLICMPassLegacy>
         Y("JuliaLICM", "LICM for julia specific intrinsics.",
           false, false);
+} //namespace
+
+PreservedAnalyses JuliaLICMPass::run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U)
+{
+    auto GetDT = [&AR]() -> DominatorTree & {
+        return AR.DT;
+    };
+    auto GetLI = [&AR]() -> LoopInfo & {
+        return AR.LI;
+    };
+    auto juliaLICM = JuliaLICM(GetDT, GetLI);
+    if (juliaLICM.runOnLoop(&L)) {
+        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
+        preserved.preserve<LoopAnalysis>();
+        preserved.preserve<DominatorTreeAnalysis>();
+        return preserved;
+    }
+    return PreservedAnalyses::all();
 }
 
 Pass *createJuliaLICMPass()
 {
-    return new JuliaLICMPass();
+    return new JuliaLICMPassLegacy();
+}
+
+extern "C" JL_DLLEXPORT void LLVMExtraJuliaLICMPass_impl(LLVMPassManagerRef PM)
+{
+    unwrap(PM)->add(createJuliaLICMPass());
 }
diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp
index ecf93e50eefded..cf4d771f02a89f 100644
--- a/src/llvm-late-gc-lowering.cpp
+++ b/src/llvm-late-gc-lowering.cpp
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
+#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
@@ -16,9 +17,6 @@
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/MDBuilder.h>
 #include <llvm/IR/Module.h>
@@ -29,15 +27,14 @@
 #include <llvm/Transforms/Utils/BasicBlockUtils.h>
 #include <llvm/Transforms/Utils/ModuleUtils.h>
 
-#if JL_LLVM_VERSION >= 100000
 #include <llvm/InitializePasses.h>
-#endif
 
 #include "codegen_shared.h"
 #include "julia.h"
 #include "julia_internal.h"
 #include "julia_assert.h"
 #include "llvm-pass-helpers.h"
+#include <map>
 
 #define DEBUG_TYPE "late_lower_gcroot"
 
@@ -306,16 +303,11 @@ struct State {
     State(Function &F) : F(&F), DT(nullptr), MaxPtrNumber(-1), MaxSafepointNumber(-1) {}
 };
 
-namespace llvm {
-    void initializeLateLowerGCFramePass(PassRegistry &Registry);
-}
 
-struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext {
+
+struct LateLowerGCFrameLegacy: public FunctionPass {
     static char ID;
-    LateLowerGCFrame() : FunctionPass(ID)
-    {
-        llvm::initializeDominatorTreeWrapperPassPass(*PassRegistry::getPassRegistry());
-    }
+    LateLowerGCFrameLegacy() : FunctionPass(ID) {}
 
 protected:
     void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -326,7 +318,18 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext {
     }
 
 private:
-    CallInst *ptlsStates;
+    bool runOnFunction(Function &F) override;
+};
+
+struct LateLowerGCFrame:  private JuliaPassContext {
+    function_ref<DominatorTree &()> GetDT;
+    LateLowerGCFrame(function_ref<DominatorTree &()> GetDT) : GetDT(GetDT) {}
+
+public:
+    bool runOnFunction(Function &F, bool *CFGModified = nullptr);
+
+private:
+    CallInst *pgcstack;
 
     void MaybeNoteDef(State &S, BBState &BBS, Value *Def, const std::vector<int> &SafepointsSoFar, SmallVector<int, 1> &&RefinedPtr = SmallVector<int, 1>());
     void NoteUse(State &S, BBState &BBS, Value *V, BitVector &Uses);
@@ -355,9 +358,7 @@ struct LateLowerGCFrame: public FunctionPass, private JuliaPassContext {
     void PlaceGCFrameStore(State &S, unsigned R, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame, Instruction *InsertBefore);
     void PlaceGCFrameStores(State &S, unsigned MinColorRoot, const std::vector<int> &Colors, Value *GCFrame);
     void PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>);
-    bool doInitialization(Module &M) override;
-    bool runOnFunction(Function &F) override;
-    bool CleanupIR(Function &F, State *S=nullptr);
+    bool CleanupIR(Function &F, State *S, bool *CFGModified);
     void NoteUseChain(State &S, BBState &BBS, User *TheUser);
     SmallVector<int, 1> GetPHIRefinements(PHINode *phi, State &S);
     void FixUpRefinements(ArrayRef<int> PHINumbers, State &S);
@@ -401,8 +402,10 @@ CountTrackedPointers::CountTrackedPointers(Type *T) {
         }
         if (isa<ArrayType>(T))
             count *= cast<ArrayType>(T)->getNumElements();
-        else if (isa<VectorType>(T))
-            count *= cast<VectorType>(T)->getNumElements();
+        else if (isa<VectorType>(T)) {
+            ElementCount EC = cast<VectorType>(T)->getElementCount();
+            count *= EC.getKnownMinValue();
+        }
     }
     if (count == 0)
         all = false;
@@ -413,8 +416,10 @@ unsigned getCompositeNumElements(Type *T) {
         return ST->getNumElements();
     else if (auto *AT = dyn_cast<ArrayType>(T))
         return AT->getNumElements();
-    else
-        return cast<VectorType>(T)->getNumElements();
+    else {
+        ElementCount EC = cast<VectorType>(T)->getElementCount();
+        return EC.getKnownMinValue();
+    }
 }
 
 // Walk through a Type, and record the element path to every tracked value inside
@@ -427,11 +432,7 @@ void TrackCompositeType(Type *T, std::vector<unsigned> &Idxs, std::vector<std::v
         unsigned Idx, NumEl = getCompositeNumElements(T);
         for (Idx = 0; Idx < NumEl; Idx++) {
             Idxs.push_back(Idx);
-#if JL_LLVM_VERSION >= 110000
             Type *ElT = GetElementPtrInst::getTypeAtIndex(T, Idx);
-#else
-            Type *ElT = cast<CompositeType>(T)->getTypeAtIndex(Idx);
-#endif
             TrackCompositeType(ElT, Idxs, Numberings);
             Idxs.pop_back();
         }
@@ -474,6 +475,8 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             if (getValueAddrSpace(NewV) == 0)
                 break;
             CurrentV = NewV;
+        } else if (auto *Freeze = dyn_cast<FreezeInst>(CurrentV)) {
+            CurrentV = Freeze->getOperand(0); // Can be formed by optimizations, treat as a no-op
         } else if (auto *GEP = dyn_cast<GetElementPtrInst>(CurrentV)) {
             CurrentV = GEP->getOperand(0);
             // GEP can make vectors from a single base pointer
@@ -505,6 +508,16 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
             // In general a load terminates a walk
             break;
         }
+        else if (auto LI = dyn_cast<AtomicCmpXchgInst>(CurrentV)) {
+            // In general a load terminates a walk
+            (void)LI;
+            break;
+        }
+        else if (auto LI = dyn_cast<AtomicRMWInst>(CurrentV)) {
+            // In general a load terminates a walk
+            (void)LI;
+            break;
+        }
         else if (auto II = dyn_cast<IntrinsicInst>(CurrentV)) {
             // Some intrinsics behave like LoadInst followed by a SelectInst
             // This should never happen in a derived addrspace (since those cannot be stored to memory)
@@ -547,6 +560,7 @@ static std::pair<Value*,int> FindBaseValue(const State &S, Value *V, bool UseCac
         }
     }
     assert(isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) ||
+           isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV) ||
            isa<Argument>(CurrentV) || isa<SelectInst>(CurrentV) ||
            isa<PHINode>(CurrentV) || isa<AddrSpaceCastInst>(CurrentV) ||
            isa<Constant>(CurrentV) || isa<AllocaInst>(CurrentV) ||
@@ -576,11 +590,7 @@ Value *LateLowerGCFrame::MaybeExtractScalar(State &S, std::pair<Value*,int> ValE
         Type *FinalT = ExtractValueInst::getIndexedType(V->getType(), IdxsNotVec);
         bool IsVector = isa<VectorType>(FinalT);
         PointerType *T = cast<PointerType>(
-#if JL_LLVM_VERSION >= 110000
             GetElementPtrInst::getTypeAtIndex(FinalT, Idxs.back()));
-#else
-            cast<CompositeType>(FinalT)->getTypeAtIndex(Idxs.back()));
-#endif
         if (T->getAddressSpace() != AddressSpace::Tracked) {
             // if V isn't tracked, get the shadow def
             auto Numbers = NumberAllBase(S, V);
@@ -606,7 +616,7 @@ std::vector<Value*> LateLowerGCFrame::MaybeExtractVector(State &S, Value *BaseVe
     std::vector<Value*> V{Numbers.size()};
     Value *V_rnull = ConstantPointerNull::get(cast<PointerType>(T_prjlvalue));
     for (unsigned i = 0; i < V.size(); ++i) {
-        if (Numbers[i] >= 0)
+        if (Numbers[i] >= 0) // ignores undef and poison values
             V[i] = GetPtrForNumber(S, Numbers[i], InsertBefore);
         else
             V[i] = V_rnull;
@@ -638,8 +648,10 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
     }
     std::vector<int> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(SI->getType()))
-        Numbers.resize(VTy->getNumElements(), -1);
+    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
+        ElementCount EC = VTy->getElementCount();
+        Numbers.resize(EC.getKnownMinValue(), -1);
+    }
     else
         assert(isa<PointerType>(SI->getType()) && "unimplemented");
     assert(!isTrackedValue(SI));
@@ -693,13 +705,14 @@ void LateLowerGCFrame::LiftSelect(State &S, SelectInst *SI) {
         else
             Numbers[i] = Number;
     }
-    if (auto VTy = dyn_cast<VectorType>(SI->getType())) {
+    if (auto VTy = dyn_cast<FixedVectorType>(SI->getType())) {
         if (NumRoots != Numbers.size()) {
             // broadcast the scalar root number to fill the vector
             assert(NumRoots == 1);
             int Number = Numbers[0];
             Numbers.resize(0);
-            Numbers.resize(VTy->getNumElements(), Number);
+            ElementCount EC = VTy->getElementCount();
+            Numbers.resize(EC.getKnownMinValue(), Number);
         }
     }
     if (!isa<PointerType>(SI->getType()))
@@ -715,11 +728,12 @@ void LateLowerGCFrame::LiftPhi(State &S, PHINode *Phi) {
     SmallVector<PHINode *, 2> lifted;
     std::vector<int> Numbers;
     unsigned NumRoots = 1;
-    if (auto VTy = dyn_cast<VectorType>(Phi->getType())) {
+    if (auto VTy = dyn_cast<FixedVectorType>(Phi->getType())) {
         NumRoots = VTy->getNumElements();
         Numbers.resize(NumRoots);
     }
     else {
+        // TODO: SVE
         assert(isa<PointerType>(Phi->getType()) && "unimplemented");
     }
     for (unsigned i = 0; i < NumRoots; ++i) {
@@ -838,8 +852,9 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
         std::vector<int> Numbers2 = NumberAll(S, SVI->getOperand(1));
         auto Mask = SVI->getShuffleMask();
         for (auto idx : Mask) {
-            assert(idx != -1 && "Undef tracked value is invalid");
-            if ((unsigned)idx < Numbers1.size()) {
+            if (idx == -1) {
+                Numbers.push_back(-1);
+            } else if ((unsigned)idx < Numbers1.size()) {
                 Numbers.push_back(Numbers1.at(idx));
             } else {
                 Numbers.push_back(Numbers2.at(idx - Numbers1.size()));
@@ -896,7 +911,8 @@ std::vector<int> LateLowerGCFrame::NumberAllBase(State &S, Value *CurrentV) {
             Numbers = S.AllCompositeNumbering.at(CurrentV);
         }
     } else {
-        assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV))
+        assert((isa<LoadInst>(CurrentV) || isa<CallInst>(CurrentV) || isa<PHINode>(CurrentV) || isa<SelectInst>(CurrentV) ||
+                isa<AtomicCmpXchgInst>(CurrentV) || isa<AtomicRMWInst>(CurrentV))
                 && "unexpected def expression");
         // This is simple, we can just number them sequentially
         for (unsigned i = 0; i < tracked.count; ++i) {
@@ -943,8 +959,9 @@ std::vector<int> LateLowerGCFrame::NumberAll(State &S, Value *V) {
             Number = Numbers[CurrentV.second]; // only needed a subset of the values
             Numbers.resize(tracked.count, Number);
         }
-        else
+        else {
             assert(!isa<PointerType>(V->getType()));
+        }
     }
     if (CurrentV.first != V) {
         if (isa<PointerType>(V->getType())) {
@@ -1055,7 +1072,8 @@ void RecursivelyVisit(callback f, Value *V) {
             f(VU);
         if (isa<CallInst>(TheUser) || isa<LoadInst>(TheUser) ||
             isa<SelectInst>(TheUser) || isa<PHINode>(TheUser) ||
-            isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser))
+            isa<StoreInst>(TheUser) || isa<PtrToIntInst>(TheUser) ||
+            isa<AtomicCmpXchgInst>(TheUser) || isa<AtomicRMWInst>(TheUser))
             continue;
         if (isa<GetElementPtrInst>(TheUser) || isa<BitCastInst>(TheUser) || isa<AddrSpaceCastInst>(TheUser)) {
             RecursivelyVisit<VisitInst, callback>(f, TheUser);
@@ -1129,12 +1147,14 @@ static bool isConstGV(GlobalVariable *gv)
     return gv->isConstant() || gv->getMetadata("julia.constgv");
 }
 
-static bool isLoadFromConstGV(LoadInst *LI, bool &task_local);
-static bool isLoadFromConstGV(Value *v, bool &task_local)
+typedef llvm::SmallPtrSet<PHINode*, 1> PhiSet;
+
+static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen = nullptr);
+static bool isLoadFromConstGV(Value *v, bool &task_local, PhiSet *seen = nullptr)
 {
     v = v->stripInBoundsOffsets();
     if (auto LI = dyn_cast<LoadInst>(v))
-        return isLoadFromConstGV(LI, task_local);
+        return isLoadFromConstGV(LI, task_local, seen);
     if (auto gv = dyn_cast<GlobalVariable>(v))
         return isConstGV(gv);
     // null pointer
@@ -1145,12 +1165,19 @@ static bool isLoadFromConstGV(Value *v, bool &task_local)
         return (CE->getOpcode() == Instruction::IntToPtr &&
                 isa<ConstantData>(CE->getOperand(0)));
     if (auto SL = dyn_cast<SelectInst>(v))
-        return (isLoadFromConstGV(SL->getTrueValue(), task_local) &&
-                isLoadFromConstGV(SL->getFalseValue(), task_local));
+        return (isLoadFromConstGV(SL->getTrueValue(), task_local, seen) &&
+                isLoadFromConstGV(SL->getFalseValue(), task_local, seen));
     if (auto Phi = dyn_cast<PHINode>(v)) {
+        PhiSet ThisSet(&Phi, &Phi);
+        if (!seen)
+            seen = &ThisSet;
+        else if (seen->count(Phi))
+            return true;
+        else
+            seen->insert(Phi);
         auto n = Phi->getNumIncomingValues();
         for (unsigned i = 0; i < n; ++i) {
-            if (!isLoadFromConstGV(Phi->getIncomingValue(i), task_local)) {
+            if (!isLoadFromConstGV(Phi->getIncomingValue(i), task_local, seen)) {
                 return false;
             }
         }
@@ -1161,7 +1188,7 @@ static bool isLoadFromConstGV(Value *v, bool &task_local)
         if (callee && callee->getName() == "julia.typeof") {
             return true;
         }
-        if (callee && callee->getName() == "julia.ptls_states") {
+        if (callee && callee->getName() == "julia.get_pgcstack") {
             task_local = true;
             return true;
         }
@@ -1182,7 +1209,7 @@ static bool isLoadFromConstGV(Value *v, bool &task_local)
 //
 // The white list implemented here and above in `isLoadFromConstGV(Value*)` should
 // cover all the cases we and LLVM generates.
-static bool isLoadFromConstGV(LoadInst *LI, bool &task_local)
+static bool isLoadFromConstGV(LoadInst *LI, bool &task_local, PhiSet *seen)
 {
     // We only emit single slot GV in codegen
     // but LLVM global merging can change the pointer operands to GEPs/bitcasts
@@ -1192,7 +1219,7 @@ static bool isLoadFromConstGV(LoadInst *LI, bool &task_local)
                {"jtbaa_immut", "jtbaa_const", "jtbaa_datatype"})) {
         if (gv)
             return true;
-        return isLoadFromConstGV(load_base, task_local);
+        return isLoadFromConstGV(load_base, task_local, seen);
     }
     if (gv)
         return isConstGV(gv);
@@ -1364,7 +1391,7 @@ void LateLowerGCFrame::FixUpRefinements(ArrayRef<int> PHINumbers, State &S)
             j++;
             if (auto inst = dyn_cast<Instruction>(S.ReversePtrNumbering[refine])) {
                 if (!S.DT)
-                    S.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+                    S.DT = &GetDT();
                 if (S.DT->dominates(inst, Phi))
                     continue;
                 // Decrement `j` so we'll overwrite/ignore it.
@@ -1446,7 +1473,8 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     MaybeNoteDef(S, BBS, CI, BBS.Safepoints);
                 }
                 if (CI->hasStructRetAttr()) {
-                    Type *ElT = (CI->arg_begin()[0])->getType()->getPointerElementType();
+                    Type *ElT = getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType();
+                    assert(cast<PointerType>(CI->getArgOperand(0)->getType())->isOpaqueOrPointeeTypeMatches(getAttributeAtIndex(CI->getAttributes(), 1, Attribute::StructRet).getValueAsType()));
                     auto tracked = CountTrackedPointers(ElT);
                     if (tracked.count) {
                         AllocaInst *SRet = dyn_cast<AllocaInst>((CI->arg_begin()[0])->stripInBoundsOffsets());
@@ -1501,7 +1529,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                 if (callee) {
                     if (callee == gc_preserve_begin_func) {
                         std::vector<int> args;
-                        for (Use &U : CI->arg_operands()) {
+                        for (Use &U : CI->args()) {
                             Value *V = U;
                             if (isa<Constant>(V))
                                 continue;
@@ -1526,8 +1554,10 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     // Known functions emitted in codegen that are not safepoints
                     if (callee == pointer_from_objref_func || callee == gc_preserve_begin_func ||
                         callee == gc_preserve_end_func || callee == typeof_func ||
-                        callee == ptls_getter ||
-                        callee == write_barrier_func || callee->getName() == "memcmp") {
+                        callee == pgcstack_getter || callee->getName() == XSTR(jl_egal__unboxed) ||
+                        callee->getName() == XSTR(jl_lock_value) || callee->getName() == XSTR(jl_unlock_value) ||
+                        callee == write_barrier_func || callee == write_barrier_binding_func ||
+                        callee->getName() == "memcmp") {
                         continue;
                     }
                     if (callee->hasFnAttribute(Attribute::ReadNone) ||
@@ -1545,10 +1575,10 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     continue;
                 }
                 std::vector<int> CalleeRoots;
-                for (Use &U : CI->arg_operands()) {
+                for (Use &U : CI->args()) {
                     // Find all callee rooted arguments.
                     // Record them instead of simply remove them from live values here
-                    // since they can be useful during refinment
+                    // since they can be useful during refinement
                     // (e.g. to remove roots of objects that are refined to these)
                     Value *V = U;
                     if (isa<Constant>(V) || !isa<PointerType>(V->getType()) ||
@@ -1594,6 +1624,20 @@ State LateLowerGCFrame::LocalScan(Function &F) {
                     MaybeNoteDef(S, BBS, LI, BBS.Safepoints, std::move(RefinedPtr));
                 }
                 NoteOperandUses(S, BBS, I);
+            } else if (auto *LI = dyn_cast<AtomicCmpXchgInst>(&I)) {
+                Type *Ty = LI->getNewValOperand()->getType()->getScalarType();
+                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
+                }
+                NoteOperandUses(S, BBS, I);
+                // TODO: do we need MaybeTrackStore(S, LI);
+            } else if (auto *LI = dyn_cast<AtomicRMWInst>(&I)) {
+                Type *Ty = LI->getType()->getScalarType();
+                if (!Ty->isPointerTy() || Ty->getPointerAddressSpace() != AddressSpace::Loaded) {
+                    MaybeNoteDef(S, BBS, LI, BBS.Safepoints);
+                }
+                NoteOperandUses(S, BBS, I);
+                // TODO: do we need MaybeTrackStore(S, LI);
             } else if (SelectInst *SI = dyn_cast<SelectInst>(&I)) {
                 auto tracked = CountTrackedPointers(SI->getType());
                 if (tracked.count && !tracked.derived) {
@@ -1669,11 +1713,7 @@ State LateLowerGCFrame::LocalScan(Function &F) {
     return S;
 }
 
-#if JL_LLVM_VERSION >= 110000
 static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> &irbuilder) {
-#else
-static Value *ExtractScalar(Value *V, Type *VTy, bool isptr, ArrayRef<unsigned> Idxs, IRBuilder<> irbuilder) {
-#endif
     Type *T_int32 = Type::getInt32Ty(V->getContext());
     if (isptr) {
         std::vector<Value*> IdxList{Idxs.size() + 1};
@@ -1716,11 +1756,7 @@ static unsigned getFieldOffset(const DataLayout &DL, Type *STy, ArrayRef<unsigne
     return (unsigned)offset;
 }
 
-#if JL_LLVM_VERSION >= 110000
 std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> &irbuilder, ArrayRef<unsigned> perm_offsets) {
-#else
-std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBuilder<> irbuilder, ArrayRef<unsigned> perm_offsets) {
-#endif
     auto Tracked = TrackCompositeType(STy);
     std::vector<Value*> Ptrs;
     unsigned perm_idx = 0;
@@ -1753,16 +1789,12 @@ std::vector<Value*> ExtractTrackedValues(Value *Src, Type *STy, bool isptr, IRBu
     return Ptrs;
 }
 
-#if JL_LLVM_VERSION >= 110000
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> &irbuilder) {
-#else
-unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, IRBuilder<> irbuilder) {
-#endif
+unsigned TrackWithShadow(Value *Src, Type *STy, bool isptr, Value *Dst, Type *DTy, IRBuilder<> &irbuilder) {
     auto Ptrs = ExtractTrackedValues(Src, STy, isptr, irbuilder);
     for (unsigned i = 0; i < Ptrs.size(); ++i) {
-        Value *Elem = Ptrs[i];
-        assert(Elem->getType()->isPointerTy());
-        Value *Slot = irbuilder.CreateConstInBoundsGEP1_32(Elem->getType(), Dst, i);
+        Value *Elem = Ptrs[i];// Dst has type `[n x {}*]*`
+        Value *Slot = irbuilder.CreateConstInBoundsGEP2_32(DTy, Dst, 0, i);
+        assert(cast<PointerType>(Dst->getType())->isOpaqueOrPointeeTypeMatches(DTy));
         StoreInst *shadowStore = irbuilder.CreateAlignedStore(Elem, Slot, Align(sizeof(void*)));
         shadowStore->setOrdering(AtomicOrdering::NotAtomic);
         // TODO: shadowStore->setMetadata(LLVMContext::MD_tbaa, tbaa_gcframe);
@@ -1789,7 +1821,7 @@ void LateLowerGCFrame::MaybeTrackDst(State &S, MemTransferInst *MI) {
     //            Src = new BitCastInst(Src, STy->getPointerTo(MI->getSourceAddressSpace()), "", MI);
     //            auto &Shadow = S.ShadowAllocas[AI];
     //            if (!Shadow)
-    //                Shadow = new AllocaInst(T_prjlvalue, 0, ConstantInt::get(T_int32, nroots), "", MI);
+    //                Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, nroots), 0, "", MI);
     //            AI = Shadow;
     //            unsigned count = TrackWithShadow(Src, STy, true, AI, IRBuilder<>(MI));
     //            assert(count == tracked.count); (void)count;
@@ -1825,7 +1857,7 @@ void LateLowerGCFrame::MaybeTrackStore(State &S, StoreInst *I) {
     // track the Store with a Shadow
     //auto &Shadow = S.ShadowAllocas[AI];
     //if (!Shadow)
-    //    Shadow = new AllocaInst(T_prjlvalue, 0, ConstantInt::get(T_int32, tracked.count), "", MI);
+    //    Shadow = new AllocaInst(ArrayType::get(T_prjlvalue, tracked.count), 0, "", MI);
     //AI = Shadow;
     //Value *Src = I->getValueOperand();
     //unsigned count = TrackWithShadow(Src, Src->getType(), false, AI, MI, TODO which slots are we actually clobbering?);
@@ -1847,29 +1879,27 @@ void LateLowerGCFrame::ComputeLiveness(State &S) {
      * perform this iteration.
      */
     ReversePostOrderTraversal<Function *> RPOT(S.F);
+    BitVector NewLive;
     while (!Converged) {
         bool AnyChanged = false;
         for (BasicBlock *BB : RPOT) {
             // This could all be done more efficiently, by only updating what
             // changed - Let's get it working first though.
             BBState &BBS = S.BBStates[BB];
-            BitVector NewLiveOut = BBS.PhiOuts;
+            NewLive = BBS.PhiOuts;
             for (BasicBlock *Succ : successors(BB)) {
-                NewLiveOut |= S.BBStates[Succ].LiveIn;
+                NewLive |= S.BBStates[Succ].LiveIn;
             }
-            if (NewLiveOut != BBS.LiveOut) {
+            if (NewLive != BBS.LiveOut) {
                 AnyChanged = true;
-                BBS.LiveOut = NewLiveOut;
+		BBS.LiveOut = NewLive;
                 MaybeResize(BBS, BBS.LiveOut.size() - 1);
             }
-            BitVector NewLiveIn = BBS.LiveOut;
-            BitVector FlippedDefs = BBS.Defs;
-            FlippedDefs.flip();
-            NewLiveIn &= FlippedDefs;
-            NewLiveIn |= BBS.UpExposedUses;
-            if (NewLiveIn != BBS.LiveIn) {
+            NewLive.reset(BBS.Defs);
+            NewLive |= BBS.UpExposedUses;
+            if (NewLive != BBS.LiveIn) {
                 AnyChanged = true;
-                BBS.LiveIn = NewLiveIn;
+		std::swap(BBS.LiveIn, NewLive);
             }
         }
         Converged = !AnyChanged;
@@ -1972,7 +2002,7 @@ void LateLowerGCFrame::ComputeLiveSets(State &S) {
         // add in any extra live values.
         if (!S.GCPreserves.empty()) {
             if (!S.DT) {
-                S.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+                S.DT = &GetDT();
             }
             for (auto it2 : S.GCPreserves) {
                 if (!S.DT->dominates(it2.first, Safepoint))
@@ -2137,6 +2167,7 @@ std::vector<int> LateLowerGCFrame::ColorRoots(const State &S) {
 // Size of T is assumed to be `sizeof(void*)`
 Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
 {
+    auto T_size = getSizeTy(T->getContext());
     assert(T == T_size || isa<PointerType>(T));
     auto TV = cast<PointerType>(V->getType());
     auto cast = builder.CreateBitCast(V, T->getPointerTo(TV->getAddressSpace()));
@@ -2145,6 +2176,7 @@ Value *LateLowerGCFrame::EmitTagPtr(IRBuilder<> &builder, Type *T, Value *V)
 
 Value *LateLowerGCFrame::EmitLoadTag(IRBuilder<> &builder, Value *V)
 {
+    auto T_size = getSizeTy(builder.getContext());
     auto addr = EmitTagPtr(builder, T_size, V);
     LoadInst *load = builder.CreateAlignedLoad(T_size, addr, Align(sizeof(size_t)));
     load->setOrdering(AtomicOrdering::Unordered);
@@ -2203,7 +2235,9 @@ MDNode *createMutableTBAAAccessTag(MDNode *Tag) {
 }
 
 
-bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
+bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) {
+    auto T_int32 = Type::getInt32Ty(F.getContext());
+    auto T_size = getSizeTy(F.getContext());
     bool ChangesMade = false;
     // We create one alloca for all the jlcall frames that haven't been processed
     // yet. LLVM would merge them anyway later, so might as well save it a bit
@@ -2233,6 +2267,18 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                         I->setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
                 }
             }
+            // FCA chains created by SROA start with an undef value
+            // if the type contains an tracked pointer that can lead to a partial
+            // initialisation and LateLower might have inserted an extractvalue
+            // of an undef field. Fix this by changing it to start with an zero-init
+            if (auto *IV = dyn_cast<InsertValueInst>(*&it)) {
+                Value *SourceAggregate = IV->getAggregateOperand();
+                if (isa<UndefValue>(SourceAggregate)) {
+                    IV->setOperand(IV->getAggregateOperandIndex(), ConstantAggregateZero::get(IV->getType()));
+                    ChangesMade = true;
+                }
+            }
+
             auto *CI = dyn_cast<CallInst>(&*it);
             if (!CI) {
                 ++it;
@@ -2245,12 +2291,12 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 /* No replacement */
             } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) {
                 auto *obj = CI->getOperand(0);
-                auto *ASCI = new AddrSpaceCastInst(obj, T_pjlvalue, "", CI);
+                auto *ASCI = new AddrSpaceCastInst(obj, JuliaType::get_pjlvalue_ty(obj->getContext()), "", CI);
                 ASCI->takeName(CI);
                 CI->replaceAllUsesWith(ASCI);
                 UpdatePtrNumbering(CI, ASCI, S);
             } else if (alloc_obj_func && callee == alloc_obj_func) {
-                assert(CI->getNumArgOperands() == 3);
+                assert(CI->arg_size() == 3);
 
                 // Initialize an IR builder.
                 IRBuilder<> builder(CI);
@@ -2259,10 +2305,12 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 // Create a call to the `julia.gc_alloc_bytes` intrinsic, which is like
                 // `julia.gc_alloc_obj` except it doesn't set the tag.
                 auto allocBytesIntrinsic = getOrDeclare(jl_intrinsics::GCAllocBytes);
+                auto ptlsLoad = get_current_ptls_from_task(builder, CI->getArgOperand(0), tbaa_gcframe);
+                auto ptls = builder.CreateBitCast(ptlsLoad, Type::getInt8PtrTy(builder.getContext()));
                 auto newI = builder.CreateCall(
                     allocBytesIntrinsic,
                     {
-                        CI->getArgOperand(0),
+                        ptls,
                         builder.CreateIntCast(
                             CI->getArgOperand(1),
                             allocBytesIntrinsic->getFunctionType()->getParamType(1),
@@ -2274,11 +2322,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 // to remove write barrier because of it.
                 // We pretty much only load using `T_size` so try our best to strip
                 // as many cast as possible.
-#if JL_LLVM_VERSION >= 100000
                 auto tag = CI->getArgOperand(2)->stripPointerCastsAndAliases();
-#else
-                auto tag = CI->getArgOperand(2)->stripPointerCasts();
-#endif
                 if (auto C = dyn_cast<ConstantExpr>(tag)) {
                     if (C->getOpcode() == Instruction::IntToPtr) {
                         tag = C->getOperand(0);
@@ -2304,13 +2348,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 auto tag_type = tag->getType();
                 if (tag_type->isPointerTy()) {
                     auto &DL = CI->getModule()->getDataLayout();
-#if JL_LLVM_VERSION >= 110000
                     auto align = tag->getPointerAlignment(DL).value();
-#elif JL_LLVM_VERSION >= 100000
-                    auto align = tag->getPointerAlignment(DL).valueOrOne().value();
-#else
-                    auto align = tag->getPointerAlignment(DL);
-#endif
                     if (align < 16) {
                         // On 5 <= LLVM < 12, it is illegal to call this on
                         // non-integral pointer. This relies on stripping the
@@ -2331,20 +2369,21 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 // Update the pointer numbering.
                 UpdatePtrNumbering(CI, newI, S);
             } else if (typeof_func && callee == typeof_func) {
-                assert(CI->getNumArgOperands() == 1);
+                assert(CI->arg_size() == 1);
                 IRBuilder<> builder(CI);
                 builder.SetCurrentDebugLocation(CI->getDebugLoc());
                 auto tag = EmitLoadTag(builder, CI->getArgOperand(0));
                 auto masked = builder.CreateAnd(tag, ConstantInt::get(T_size, ~(uintptr_t)15));
-                auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, T_pjlvalue),
+                auto typ = builder.CreateAddrSpaceCast(builder.CreateIntToPtr(masked, JuliaType::get_pjlvalue_ty(masked->getContext())),
                                                        T_prjlvalue);
                 typ->takeName(CI);
                 CI->replaceAllUsesWith(typ);
                 UpdatePtrNumbering(CI, typ, S);
-            } else if (write_barrier_func && callee == write_barrier_func) {
+            } else if ((write_barrier_func && callee == write_barrier_func) ||
+                       (write_barrier_binding_func && callee == write_barrier_binding_func)) {
                 // The replacement for this requires creating new BasicBlocks
                 // which messes up the loop. Queue all of them to be replaced later.
-                assert(CI->getNumArgOperands() >= 1);
+                assert(CI->arg_size() >= 1);
                 write_barriers.push_back(CI);
                 ChangesMade = true;
                 ++it;
@@ -2352,7 +2391,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
             } else if (CC == JLCALL_F_CC ||
                        CC == JLCALL_F2_CC) {
                 assert(T_prjlvalue);
-                size_t nargs = CI->getNumArgOperands();
+                size_t nargs = CI->arg_size();
                 size_t nframeargs = nargs;
                 if (CC == JLCALL_F_CC)
                     nframeargs -= 1;
@@ -2394,12 +2433,12 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
                 NewCall->setTailCallKind(CI->getTailCallKind());
                 auto old_attrs = CI->getAttributes();
                 NewCall->setAttributes(AttributeList::get(CI->getContext(),
-                                                          old_attrs.getFnAttributes(),
-                                                          old_attrs.getRetAttributes(), {}));
+                                                          getFnAttrs(old_attrs),
+                                                          getRetAttrs(old_attrs), {}));
                 NewCall->copyMetadata(*CI);
                 CI->replaceAllUsesWith(NewCall);
                 UpdatePtrNumbering(CI, NewCall, S);
-            } else if (CI->getNumArgOperands() == CI->getNumOperands()) {
+            } else if (CI->arg_size() == CI->getNumOperands()) {
                 /* No operand bundle to lower */
                 ++it;
                 continue;
@@ -2425,6 +2464,9 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
             CI->eraseFromParent();
             continue;
         }
+        if (CFGModified) {
+            *CFGModified = true;
+        }
         IRBuilder<> builder(CI);
         builder.SetCurrentDebugLocation(CI->getDebugLoc());
         auto parBits = builder.CreateAnd(EmitLoadTag(builder, parent), 3);
@@ -2432,7 +2474,7 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
         auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, CI, false);
         builder.SetInsertPoint(mayTrigTerm);
         Value *anyChldNotMarked = NULL;
-        for (unsigned i = 1; i < CI->getNumArgOperands(); i++) {
+        for (unsigned i = 1; i < CI->arg_size(); i++) {
             Value *child = CI->getArgOperand(i);
             Value *chldBit = builder.CreateAnd(EmitLoadTag(builder, child), 1);
             Value *chldNotMarked = builder.CreateICmpEQ(chldBit, ConstantInt::get(T_size, 0));
@@ -2444,7 +2486,15 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S) {
         auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
                                                   MDB.createBranchWeights(Weights));
         builder.SetInsertPoint(trigTerm);
-        builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
+        if (CI->getCalledOperand() == write_barrier_func) {
+            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCRoot), parent);
+        }
+        else if (CI->getCalledOperand() == write_barrier_binding_func) {
+            builder.CreateCall(getOrDeclare(jl_intrinsics::queueGCBinding), parent);
+        }
+        else {
+            assert(false);
+        }
         CI->eraseFromParent();
     }
     if (maxframeargs == 0 && Frame) {
@@ -2497,7 +2547,7 @@ void LateLowerGCFrame::PlaceGCFrameStore(State &S, unsigned R, unsigned MinColor
     // Get the slot address.
     auto slotAddress = CallInst::Create(
         getOrDeclare(jl_intrinsics::getGCFrameSlot),
-        {GCFrame, ConstantInt::get(T_int32, Colors[R] + MinColorRoot)},
+        {GCFrame, ConstantInt::get(Type::getInt32Ty(InsertBefore->getContext()), Colors[R] + MinColorRoot)},
         "", InsertBefore);
 
     Value *Val = GetPtrForNumber(S, R, InsertBefore);
@@ -2536,6 +2586,7 @@ void LateLowerGCFrame::PlaceGCFrameStores(State &S, unsigned MinColorRoot,
 
 void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State &S, std::map<Value *, std::pair<int, int>>) {
     auto F = S.F;
+    auto T_int32 = Type::getInt32Ty(F->getContext());
     int MaxColor = -1;
     for (auto C : Colors)
         if (C > MaxColor)
@@ -2553,11 +2604,11 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
         auto pushGcframe = CallInst::Create(
             getOrDeclare(jl_intrinsics::pushGCFrame),
             {gcframe, ConstantInt::get(T_int32, 0)});
-        pushGcframe->insertAfter(ptlsStates);
+        pushGcframe->insertAfter(pgcstack);
 
         // Replace Allocas
         unsigned AllocaSlot = 2; // first two words are metadata
-        auto replace_alloca = [this, gcframe, &AllocaSlot](AllocaInst *&AI) {
+        auto replace_alloca = [this, gcframe, &AllocaSlot, T_int32](AllocaInst *&AI) {
             // Pick a slot for the alloca.
             unsigned align = AI->getAlignment() / sizeof(void*); // TODO: use DataLayout pointer size
             assert(align <= 16 / sizeof(void*) && "Alignment exceeds llvm-final-gc-lowering abilities");
@@ -2640,40 +2691,59 @@ void LateLowerGCFrame::PlaceRootsAndUpdateCalls(std::vector<int> &Colors, State
     }
 }
 
-bool LateLowerGCFrame::doInitialization(Module &M) {
-    // Initialize platform-agnostic references.
-    initAll(M);
-    return true;
-}
-
-bool LateLowerGCFrame::runOnFunction(Function &F) {
+bool LateLowerGCFrame::runOnFunction(Function &F, bool *CFGModified) {
+    initAll(*F.getParent());
     LLVM_DEBUG(dbgs() << "GC ROOT PLACEMENT: Processing function " << F.getName() << "\n");
-    // Check availability of functions again since they might have been deleted.
-    initFunctions(*F.getParent());
-    if (!ptls_getter)
-        return CleanupIR(F);
+    if (!pgcstack_getter)
+        return CleanupIR(F, nullptr, CFGModified);
 
-    ptlsStates = getPtls(F);
-    if (!ptlsStates)
-        return CleanupIR(F);
+    pgcstack = getPGCstack(F);
+    if (!pgcstack)
+        return CleanupIR(F, nullptr, CFGModified);
 
     State S = LocalScan(F);
     ComputeLiveness(S);
     std::vector<int> Colors = ColorRoots(S);
     std::map<Value *, std::pair<int, int>> CallFrames; // = OptimizeCallFrames(S, Ordering);
     PlaceRootsAndUpdateCalls(Colors, S, CallFrames);
-    CleanupIR(F, &S);
+    CleanupIR(F, &S, CFGModified);
     return true;
 }
 
-char LateLowerGCFrame::ID = 0;
-static RegisterPass<LateLowerGCFrame> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
+bool LateLowerGCFrameLegacy::runOnFunction(Function &F) {
+    auto GetDT = [this]() -> DominatorTree & {
+        return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    };
+    auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
+    return lateLowerGCFrame.runOnFunction(F);
+}
+
+PreservedAnalyses LateLowerGC::run(Function &F, FunctionAnalysisManager &AM)
+{
+    auto GetDT = [&AM, &F]() -> DominatorTree & {
+        return AM.getResult<DominatorTreeAnalysis>(F);
+    };
+    auto lateLowerGCFrame = LateLowerGCFrame(GetDT);
+    bool CFGModified = false;
+    if (lateLowerGCFrame.runOnFunction(F, &CFGModified)) {
+        if (CFGModified) {
+            return PreservedAnalyses::none();
+        } else {
+            return PreservedAnalyses::allInSet<CFGAnalyses>();
+        }
+    }
+    return PreservedAnalyses::all();
+}
+
+
+char LateLowerGCFrameLegacy::ID = 0;
+static RegisterPass<LateLowerGCFrameLegacy> X("LateLowerGCFrame", "Late Lower GCFrame Pass", false, false);
 
 Pass *createLateLowerGCFramePass() {
-    return new LateLowerGCFrame();
+    return new LateLowerGCFrameLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLateLowerGCFramePass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLateLowerGCFramePass());
 }
diff --git a/src/llvm-lower-handlers.cpp b/src/llvm-lower-handlers.cpp
index 7382f3b74c0801..747066e7318926 100644
--- a/src/llvm-lower-handlers.cpp
+++ b/src/llvm-lower-handlers.cpp
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
+#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
@@ -21,6 +22,8 @@
 
 #include "julia.h"
 #include "julia_assert.h"
+#include "codegen_shared.h"
+#include <map>
 
 #define DEBUG_TYPE "lower_handlers"
 #undef DEBUG
@@ -68,23 +71,8 @@ using namespace llvm;
  * handler structures to tell LLVM that it is free to re-use the stack slot
  * while the handler is not being used.
  */
-struct LowerExcHandlers : public FunctionPass {
-    static char ID;
-    LowerExcHandlers() : FunctionPass(ID)
-    {}
-
-private:
-    Function *except_enter_func;
-    Function *leave_func;
-    Function *jlenter_func;
-    Function *setjmp_func;
-    Function *lifetime_start;
-    Function *lifetime_end;
-
-    bool doInitialization(Module &M) override;
-    bool runOnFunction(Function &F) override;
-};
 
+namespace {
 /*
  * If the module doesn't have declarations for the jl_enter_handler and setjmp
  * functions, insert them.
@@ -95,11 +83,11 @@ static void ensure_enter_function(Module &M)
     auto T_pint8 = PointerType::get(T_int8, 0);
     auto T_void = Type::getVoidTy(M.getContext());
     auto T_int32 = Type::getInt32Ty(M.getContext());
-    if (!M.getNamedValue("jl_enter_handler")) {
+    if (!M.getNamedValue(XSTR(jl_enter_handler))) {
         std::vector<Type*> ehargs(0);
         ehargs.push_back(T_pint8);
         Function::Create(FunctionType::get(T_void, ehargs, false),
-                         Function::ExternalLinkage, "jl_enter_handler", &M);
+                         Function::ExternalLinkage, XSTR(jl_enter_handler), &M);
     }
     if (!M.getNamedValue(jl_setjmp_name)) {
         std::vector<Type*> args2(0);
@@ -113,24 +101,19 @@ static void ensure_enter_function(Module &M)
     }
 }
 
-bool LowerExcHandlers::doInitialization(Module &M) {
-    except_enter_func = M.getFunction("julia.except_enter");
+static bool lowerExcHandlers(Function &F) {
+    Module &M = *F.getParent();
+    Function *except_enter_func = M.getFunction("julia.except_enter");
     if (!except_enter_func)
-        return false;
+        return false; // No EH frames in this module
     ensure_enter_function(M);
-    leave_func = M.getFunction("jl_pop_handler");
-    jlenter_func = M.getFunction("jl_enter_handler");
-    setjmp_func = M.getFunction(jl_setjmp_name);
+    Function *leave_func = M.getFunction(XSTR(jl_pop_handler));
+    Function *jlenter_func = M.getFunction(XSTR(jl_enter_handler));
+    Function *setjmp_func = M.getFunction(jl_setjmp_name);
 
     auto T_pint8 = Type::getInt8PtrTy(M.getContext(), 0);
-    lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
-    lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
-    return true;
-}
-
-bool LowerExcHandlers::runOnFunction(Function &F) {
-    if (!except_enter_func)
-        return false; // No EH frames in this module
+    Function *lifetime_start = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_start, { T_pint8 });
+    Function *lifetime_end = Intrinsic::getDeclaration(&M, Intrinsic::lifetime_end, { T_pint8 });
 
     /* Step 1: EH Depth Numbering */
     std::map<llvm::CallInst *, int> EnterDepth;
@@ -176,7 +159,7 @@ bool LowerExcHandlers::runOnFunction(Function &F) {
 
     /* Step 2: EH Frame lowering */
     // Allocate stack space for each handler. We allocate these as separate
-    // allocas so the optimizer can later merge and reaarange them if it wants
+    // allocas so the optimizer can later merge and rearrange them if it wants
     // to.
     Value *handler_sz = ConstantInt::get(Type::getInt32Ty(F.getContext()),
                                          sizeof(jl_handler_t));
@@ -235,17 +218,37 @@ bool LowerExcHandlers::runOnFunction(Function &F) {
     return true;
 }
 
-char LowerExcHandlers::ID = 0;
-static RegisterPass<LowerExcHandlers> X("LowerExcHandlers", "Lower Julia Exception Handlers",
+} // anonymous namespace
+
+PreservedAnalyses LowerExcHandlers::run(Function &F, FunctionAnalysisManager &AM)
+{
+    if (lowerExcHandlers(F)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    }
+    return PreservedAnalyses::all();
+}
+
+
+struct LowerExcHandlersLegacy : public FunctionPass {
+    static char ID;
+    LowerExcHandlersLegacy() : FunctionPass(ID)
+    {}
+    bool runOnFunction(Function &F) {
+        return lowerExcHandlers(F);
+    }
+};
+
+char LowerExcHandlersLegacy::ID = 0;
+static RegisterPass<LowerExcHandlersLegacy> X("LowerExcHandlers", "Lower Julia Exception Handlers",
                                          false /* Only looks at CFG */,
                                          false /* Analysis Pass */);
 
 Pass *createLowerExcHandlersPass()
 {
-    return new LowerExcHandlers();
+    return new LowerExcHandlersLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLowerExcHandlersPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLowerExcHandlersPass());
 }
diff --git a/src/llvm-muladd.cpp b/src/llvm-muladd.cpp
index e5d63667df4764..6f9658b0f3bb80 100644
--- a/src/llvm-muladd.cpp
+++ b/src/llvm-muladd.cpp
@@ -3,18 +3,22 @@
 #define DEBUG_TYPE "combine_muladd"
 #undef DEBUG
 #include "llvm-version.h"
+#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
 
+#include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Value.h>
 #include <llvm/IR/LegacyPassManager.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/IR/Function.h>
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/Operator.h>
 #include <llvm/IR/IRBuilder.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Pass.h>
 #include <llvm/Support/Debug.h>
 
@@ -22,6 +26,7 @@
 #include "julia_assert.h"
 
 using namespace llvm;
+STATISTIC(TotalContracted, "Total number of multiplies marked for FMA");
 
 /**
  * Combine
@@ -34,19 +39,8 @@ using namespace llvm;
  * when `%v0` has no other use
  */
 
-struct CombineMulAdd : public FunctionPass {
-    static char ID;
-    CombineMulAdd() : FunctionPass(ID)
-    {}
-
-private:
-    bool runOnFunction(Function &F) override;
-};
-
-// Return true if this function shouldn't be called again on the other operand
-// This will always return false on LLVM 5.0+
-static bool checkCombine(Module *m, Instruction *addOp, Value *maybeMul, Value *addend,
-                         bool negadd, bool negres)
+// Return true if we changed the mulOp
+static bool checkCombine(Value *maybeMul)
 {
     auto mulOp = dyn_cast<Instruction>(maybeMul);
     if (!mulOp || mulOp->getOpcode() != Instruction::FMul)
@@ -55,14 +49,18 @@ static bool checkCombine(Module *m, Instruction *addOp, Value *maybeMul, Value *
         return false;
     // On 5.0+ we only need to mark the mulOp as contract and the backend will do the work for us.
     auto fmf = mulOp->getFastMathFlags();
-    fmf.setAllowContract(true);
-    mulOp->copyFastMathFlags(fmf);
+    if (!fmf.allowContract()) {
+        ++TotalContracted;
+        fmf.setAllowContract(true);
+        mulOp->copyFastMathFlags(fmf);
+        return true;
+    }
     return false;
 }
 
-bool CombineMulAdd::runOnFunction(Function &F)
+static bool combineMulAdd(Function &F)
 {
-    Module *m = F.getParent();
+    bool modified = false;
     for (auto &BB: F) {
         for (auto it = BB.begin(); it != BB.end();) {
             auto &I = *it;
@@ -71,15 +69,13 @@ bool CombineMulAdd::runOnFunction(Function &F)
             case Instruction::FAdd: {
                 if (!I.isFast())
                     continue;
-                checkCombine(m, &I, I.getOperand(0), I.getOperand(1), false, false) ||
-                    checkCombine(m, &I, I.getOperand(1), I.getOperand(0), false, false);
+                modified |= checkCombine(I.getOperand(0)) || checkCombine(I.getOperand(1));
                 break;
             }
             case Instruction::FSub: {
                 if (!I.isFast())
                     continue;
-                checkCombine(m, &I, I.getOperand(0), I.getOperand(1), true, false) ||
-                    checkCombine(m, &I, I.getOperand(1), I.getOperand(0), true, true);
+                modified |= checkCombine(I.getOperand(0)) || checkCombine(I.getOperand(1));
                 break;
             }
             default:
@@ -87,20 +83,41 @@ bool CombineMulAdd::runOnFunction(Function &F)
             }
         }
     }
-    return true;
+    assert(!verifyFunction(F));
+    return modified;
 }
 
-char CombineMulAdd::ID = 0;
-static RegisterPass<CombineMulAdd> X("CombineMulAdd", "Combine mul and add to muladd",
+PreservedAnalyses CombineMulAdd::run(Function &F, FunctionAnalysisManager &AM)
+{
+    if (combineMulAdd(F)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    }
+    return PreservedAnalyses::all();
+}
+
+
+struct CombineMulAddLegacy : public FunctionPass {
+    static char ID;
+    CombineMulAddLegacy() : FunctionPass(ID)
+    {}
+
+private:
+    bool runOnFunction(Function &F) override {
+        return combineMulAdd(F);
+    }
+};
+
+char CombineMulAddLegacy::ID = 0;
+static RegisterPass<CombineMulAddLegacy> X("CombineMulAdd", "Combine mul and add to muladd",
                                      false /* Only looks at CFG */,
                                      false /* Analysis Pass */);
 
 Pass *createCombineMulAddPass()
 {
-    return new CombineMulAdd();
+    return new CombineMulAddLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddCombineMulAddPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createCombineMulAddPass());
 }
diff --git a/src/llvm-multiversioning.cpp b/src/llvm-multiversioning.cpp
index 68081eb53d3a5b..740c2b5a83646a 100644
--- a/src/llvm-multiversioning.cpp
+++ b/src/llvm-multiversioning.cpp
@@ -7,11 +7,13 @@
 // LLVM pass to clone function for different archs
 
 #include "llvm-version.h"
+#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
 
 #include <llvm/Pass.h>
+#include <llvm/ADT/Statistic.h>
 #include <llvm/IR/Module.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Function.h>
@@ -23,6 +25,7 @@
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/DebugInfoMetadata.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Transforms/Utils/Cloning.h>
 
 #include "julia.h"
@@ -40,16 +43,11 @@
 
 using namespace llvm;
 
-extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr,
-                                                  bool isConstant=false);
+extern Optional<bool> always_have_fma(Function&);
 
 namespace {
-
-// These are valid detail cloning conditions in the target flags.
 constexpr uint32_t clone_mask =
-    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH;
-
-struct MultiVersioning;
+    JL_TARGET_CLONE_LOOP | JL_TARGET_CLONE_SIMD | JL_TARGET_CLONE_MATH | JL_TARGET_CLONE_CPU;
 
 // Treat identical mapping as missing and return `def` in that case.
 // We mainly need this to identify cloned function using value map after LLVM cloning
@@ -246,7 +244,7 @@ struct CloneCtx {
             return cast<Function>(vmap->lookup(orig_f));
         }
     };
-    CloneCtx(MultiVersioning *pass, Module &M);
+    CloneCtx(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars);
     void clone_bases();
     void collect_func_infos();
     void clone_all_partials();
@@ -272,20 +270,17 @@ struct CloneCtx {
     Constant *get_ptrdiff32(Constant *ptr, Constant *base) const;
     template<typename T>
     Constant *emit_offset_table(const std::vector<T*> &vars, StringRef name) const;
+    void rewrite_alias(GlobalAlias *alias, Function* F);
 
-    LLVMContext &ctx;
-    Type *T_size;
-    Type *T_int32;
-    Type *T_void;
-    PointerType *T_psize;
-    PointerType *T_pvoidfunc;
     MDNode *tbaa_const;
-    MultiVersioning *pass;
     std::vector<jl_target_spec_t> specs;
     std::vector<Group> groups{};
     std::vector<Function*> fvars;
     std::vector<Constant*> gvars;
     Module &M;
+    function_ref<LoopInfo&(Function&)> GetLI;
+    function_ref<CallGraph&()> GetCG;
+
     // Map from original functiton to one based index in `fvars`
     std::map<const Function*,uint32_t> func_ids{};
     std::vector<Function*> orig_funcs{};
@@ -295,29 +290,15 @@ struct CloneCtx {
     std::vector<std::pair<Constant*,uint32_t>> gv_relocs{};
     // Mapping from function id (i.e. 0-based index in `fvars`) to GVs to be initialized.
     std::map<uint32_t,GlobalVariable*> const_relocs;
+    // Functions that were referred to by a global alias, and might not have other uses.
+    std::set<uint32_t> alias_relocs;
     bool has_veccall{false};
     bool has_cloneall{false};
-};
-
-struct MultiVersioning: public ModulePass {
-    static char ID;
-    MultiVersioning()
-        : ModulePass(ID)
-    {}
-
-private:
-    bool runOnModule(Module &M) override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        AU.addRequired<LoopInfoWrapperPass>();
-        AU.addRequired<CallGraphWrapperPass>();
-        AU.addPreserved<LoopInfoWrapperPass>();
-    }
-    friend struct CloneCtx;
+    bool allow_bad_fvars{false};
 };
 
 template<typename T>
-static inline std::vector<T*> consume_gv(Module &M, const char *name)
+static inline std::vector<T*> consume_gv(Module &M, const char *name, bool allow_bad_fvars)
 {
     // Get information about sysimg export functions from the two global variables.
     // Strip them from the Module so that it's easier to handle the uses.
@@ -326,8 +307,17 @@ static inline std::vector<T*> consume_gv(Module &M, const char *name)
     auto *ary = cast<ConstantArray>(gv->getInitializer());
     unsigned nele = ary->getNumOperands();
     std::vector<T*> res(nele);
-    for (unsigned i = 0; i < nele; i++)
-        res[i] = cast<T>(ary->getOperand(i)->stripPointerCasts());
+    unsigned i = 0;
+    while (i < nele) {
+        llvm::Value *val = ary->getOperand(i)->stripPointerCasts();
+        if (allow_bad_fvars && (!isa<T>(val) || (isa<Function>(val) && cast<Function>(val)->isDeclaration()))) {
+            // Shouldn't happen in regular use, but can happen in bugpoint.
+            nele--;
+            continue;
+        }
+        res[i++] = cast<T>(val);
+    }
+    res.resize(nele);
     assert(gv->use_empty());
     gv->eraseFromParent();
     if (ary->use_empty())
@@ -336,19 +326,15 @@ static inline std::vector<T*> consume_gv(Module &M, const char *name)
 }
 
 // Collect basic information about targets and functions.
-CloneCtx::CloneCtx(MultiVersioning *pass, Module &M)
-    : ctx(M.getContext()),
-      T_size(M.getDataLayout().getIntPtrType(ctx, 0)),
-      T_int32(Type::getInt32Ty(ctx)),
-      T_void(Type::getVoidTy(ctx)),
-      T_psize(PointerType::get(T_size, 0)),
-      T_pvoidfunc(FunctionType::get(T_void, false)->getPointerTo()),
-      tbaa_const(tbaa_make_child("jtbaa_const", nullptr, true).first),
-      pass(pass),
+CloneCtx::CloneCtx(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars)
+    : tbaa_const(tbaa_make_child_with_context(M.getContext(), "jtbaa_const", nullptr, true).first),
       specs(jl_get_llvm_clone_targets()),
-      fvars(consume_gv<Function>(M, "jl_sysimg_fvars")),
-      gvars(consume_gv<Constant>(M, "jl_sysimg_gvars")),
-      M(M)
+      fvars(consume_gv<Function>(M, "jl_sysimg_fvars", allow_bad_fvars)),
+      gvars(consume_gv<Constant>(M, "jl_sysimg_gvars", false)),
+      M(M),
+      GetLI(GetLI),
+      GetCG(GetCG),
+      allow_bad_fvars(allow_bad_fvars)
 {
     groups.emplace_back(0, specs[0]);
     uint32_t ntargets = specs.size();
@@ -403,7 +389,12 @@ void CloneCtx::clone_function(Function *F, Function *new_f, ValueToValueMapTy &v
         vmap[&*J] = &*DestI++;
     }
     SmallVector<ReturnInst*,8> Returns;
+#if JL_LLVM_VERSION >= 130000
+    // We are cloning into the same module
+    CloneFunctionInto(new_f, F, vmap, CloneFunctionChangeType::GlobalChanges, Returns);
+#else
     CloneFunctionInto(new_f, F, vmap, true, Returns);
+#endif
 }
 
 // Clone all clone_all targets. Makes sure that the base targets are all available.
@@ -446,7 +437,7 @@ bool CloneCtx::is_vector(FunctionType *ty) const
 uint32_t CloneCtx::collect_func_info(Function &F)
 {
     uint32_t flag = 0;
-    if (!pass->getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo().empty())
+    if (!GetLI(F).empty())
         flag |= JL_TARGET_CLONE_LOOP;
     if (is_vector(F.getFunctionType())) {
         flag |= JL_TARGET_CLONE_SIMD;
@@ -464,6 +455,16 @@ uint32_t CloneCtx::collect_func_info(Function &F)
                     if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) {
                         flag |= JL_TARGET_CLONE_MATH;
                     }
+                    else if (name.startswith("julia.cpu.")) {
+                        if (name.startswith("julia.cpu.have_fma.")) {
+                            // for some platforms we know they always do (or don't) support
+                            // FMA. in those cases we don't need to clone the function.
+                            if (!always_have_fma(*callee).hasValue())
+                                flag |= JL_TARGET_CLONE_CPU;
+                        } else {
+                            flag |= JL_TARGET_CLONE_CPU;
+                        }
+                    }
                 }
             }
             else if (auto store = dyn_cast<StoreInst>(&I)) {
@@ -550,7 +551,7 @@ void CloneCtx::check_partial(Group &grp, Target &tgt)
     auto *next_set = &sets[1];
     // Reduce dispatch by expand the cloning set to functions that are directly called by
     // and calling cloned functions.
-    auto &graph = pass->getAnalysis<CallGraphWrapperPass>().getCallGraph();
+    auto &graph = GetCG();
     while (!cur_set->empty()) {
         for (auto orig_f: *cur_set) {
             // Use the uncloned function since it's already in the call graph
@@ -648,6 +649,12 @@ uint32_t CloneCtx::get_func_id(Function *F)
 {
     auto &ref = func_ids[F];
     if (!ref) {
+        if (allow_bad_fvars && F->isDeclaration()) {
+            // This should never happen in regular use, but can happen if
+            // bugpoint deletes the function. Just do something here to
+            // allow bugpoint to proceed.
+            return (uint32_t)-1;
+        }
         fvars.push_back(F);
         ref = fvars.size();
     }
@@ -697,6 +704,54 @@ Constant *CloneCtx::rewrite_gv_init(const Stack& stack)
     return res;
 }
 
+// replace an alias to a function with a trampoline and (uninitialized) global variable slot
+void CloneCtx::rewrite_alias(GlobalAlias *alias, Function *F)
+{
+    assert(!is_vector(F->getFunctionType()));
+
+    Function *trampoline =
+        Function::Create(F->getFunctionType(), alias->getLinkage(), "", &M);
+    trampoline->copyAttributesFrom(F);
+    trampoline->takeName(alias);
+    alias->eraseFromParent();
+
+    uint32_t id;
+    GlobalVariable *slot;
+    std::tie(id, slot) = get_reloc_slot(F);
+    for (auto &grp: groups) {
+        grp.relocs.insert(id);
+        for (auto &tgt: grp.clones) {
+            tgt.relocs.insert(id);
+        }
+    }
+    alias_relocs.insert(id);
+
+    auto BB = BasicBlock::Create(F->getContext(), "top", trampoline);
+    IRBuilder<> irbuilder(BB);
+
+    auto ptr = irbuilder.CreateLoad(F->getType(), slot);
+    ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(F->getContext(), None));
+
+    std::vector<Value *> Args;
+    for (auto &arg : trampoline->args())
+        Args.push_back(&arg);
+    auto call = irbuilder.CreateCall(F->getFunctionType(), ptr, makeArrayRef(Args));
+    if (F->isVarArg())
+#if (defined(_CPU_ARM_) || defined(_CPU_PPC_) || defined(_CPU_PPC64_))
+        abort();    // musttail support is very bad on ARM, PPC, PPC64 (as of LLVM 3.9)
+#else
+        call->setTailCallKind(CallInst::TCK_MustTail);
+#endif
+    else
+        call->setTailCallKind(CallInst::TCK_Tail);
+
+    if (F->getReturnType() == Type::getVoidTy(F->getContext()))
+        irbuilder.CreateRetVoid();
+    else
+        irbuilder.CreateRet(call);
+}
+
 void CloneCtx::fix_gv_uses()
 {
     auto single_pass = [&] (Function *orig_f) {
@@ -707,14 +762,20 @@ void CloneCtx::fix_gv_uses()
             auto info = uses.get_info();
             // We only support absolute pointer relocation.
             assert(info.samebits);
-            // And only for non-constant global variable initializers
-            auto val = cast<GlobalVariable>(info.val);
+            GlobalVariable *val;
+            if (auto alias = dyn_cast<GlobalAlias>(info.val)) {
+                rewrite_alias(alias, orig_f);
+                continue;
+            }
+            else {
+                val = cast<GlobalVariable>(info.val);
+            }
             assert(info.use->getOperandNo() == 0);
             assert(!val->isConstant());
             auto fid = get_func_id(orig_f);
-            auto addr = ConstantExpr::getPtrToInt(val, T_size);
+            auto addr = ConstantExpr::getPtrToInt(val, getSizeTy(val->getContext()));
             if (info.offset)
-                addr = ConstantExpr::getAdd(addr, ConstantInt::get(T_size, info.offset));
+                addr = ConstantExpr::getAdd(addr, ConstantInt::get(getSizeTy(val->getContext()), info.offset));
             gv_relocs.emplace_back(addr, fid);
             val->setInitializer(rewrite_gv_init(stack));
         }
@@ -734,8 +795,8 @@ std::pair<uint32_t,GlobalVariable*> CloneCtx::get_reloc_slot(Function *F)
     auto id = get_func_id(F);
     auto &slot = const_relocs[id];
     if (!slot)
-        slot = new GlobalVariable(M, T_pvoidfunc, false, GlobalVariable::InternalLinkage,
-                                  ConstantPointerNull::get(T_pvoidfunc),
+        slot = new GlobalVariable(M, F->getType(), false, GlobalVariable::InternalLinkage,
+                                  ConstantPointerNull::get(F->getType()),
                                   F->getName() + ".reloc_slot");
     return std::make_pair(id, slot);
 }
@@ -778,7 +839,7 @@ Value *CloneCtx::rewrite_inst_use(const Stack& stack, Value *replace, Instructio
         }
         else if (isa<ConstantVector>(val)) {
             replace = InsertElementInst::Create(ConstantVector::get(args), replace,
-                                                ConstantInt::get(T_size, idx), "",
+                                                ConstantInt::get(getSizeTy(insert_before->getContext()), idx), "",
                                                 insert_before);
         }
         else {
@@ -815,10 +876,9 @@ void CloneCtx::fix_inst_uses()
                     uint32_t id;
                     GlobalVariable *slot;
                     std::tie(id, slot) = get_reloc_slot(orig_f);
-                    Instruction *ptr = new LoadInst(T_pvoidfunc, slot, "", false, insert_before);
+                    Instruction *ptr = new LoadInst(orig_f->getType(), slot, "", false, insert_before);
                     ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-                    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ctx, None));
-                    ptr = new BitCastInst(ptr, F->getType(), "", insert_before);
+                    ptr->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(ptr->getContext(), None));
                     use_i->setOperand(info.use->getOperandNo(),
                                       rewrite_inst_use(uses.get_stack(), ptr,
                                                        insert_before));
@@ -843,15 +903,6 @@ template<typename T>
 inline T *CloneCtx::add_comdat(T *G) const
 {
 #if defined(_OS_WINDOWS_)
-    // Add comdat information to make MSVC link.exe happy
-    // it's valid to emit this for ld.exe too,
-    // but makes it very slow to link for no benefit
-#if defined(_COMPILER_MICROSOFT_)
-    Comdat *jl_Comdat = G->getParent()->getOrInsertComdat(G->getName());
-    // ELF only supports Comdat::Any
-    jl_Comdat->setSelectionKind(Comdat::NoDuplicates);
-    G->setComdat(jl_Comdat);
-#endif
     // add __declspec(dllexport) to everything marked for export
     if (G->getLinkage() == GlobalValue::ExternalLinkage)
         G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
@@ -864,18 +915,20 @@ inline T *CloneCtx::add_comdat(T *G) const
 Constant *CloneCtx::get_ptrdiff32(Constant *ptr, Constant *base) const
 {
     if (ptr->getType()->isPointerTy())
-        ptr = ConstantExpr::getPtrToInt(ptr, T_size);
+        ptr = ConstantExpr::getPtrToInt(ptr, getSizeTy(ptr->getContext()));
     auto ptrdiff = ConstantExpr::getSub(ptr, base);
-    return sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, T_int32) : ptrdiff;
+    return sizeof(void*) == 8 ? ConstantExpr::getTrunc(ptrdiff, Type::getInt32Ty(ptr->getContext())) : ptrdiff;
 }
 
 template<typename T>
 Constant *CloneCtx::emit_offset_table(const std::vector<T*> &vars, StringRef name) const
 {
+    auto T_int32 = Type::getInt32Ty(M.getContext());
+    auto T_size = getSizeTy(M.getContext());
     assert(!vars.empty());
     add_comdat(GlobalAlias::create(T_size, 0, GlobalVariable::ExternalLinkage,
                                    name + "_base",
-                                   ConstantExpr::getBitCast(vars[0], T_psize), &M));
+                                   ConstantExpr::getBitCast(vars[0], T_size->getPointerTo()), &M));
     auto vbase = ConstantExpr::getPtrToInt(vars[0], T_size);
     uint32_t nvars = vars.size();
     std::vector<Constant*> offsets(nvars + 1);
@@ -893,10 +946,15 @@ Constant *CloneCtx::emit_offset_table(const std::vector<T*> &vars, StringRef nam
 
 void CloneCtx::emit_metadata()
 {
+    uint32_t nfvars = fvars.size();
+    if (allow_bad_fvars && nfvars == 0) {
+        // Will result in a non-loadable sysimg, but `allow_bad_fvars` is for bugpoint only
+        return;
+    }
+
     // Store back the information about exported functions.
     auto fbase = emit_offset_table(fvars, "jl_sysimg_fvars");
     auto gbase = emit_offset_table(gvars, "jl_sysimg_gvars");
-    uint32_t nfvars = fvars.size();
 
     uint32_t ntargets = specs.size();
     SmallVector<Target*, 8> targets(ntargets);
@@ -922,7 +980,7 @@ void CloneCtx::emit_metadata()
             auto &specdata = specs[i].data;
             data.insert(data.end(), specdata.begin(), specdata.end());
         }
-        auto value = ConstantDataArray::get(ctx, data);
+        auto value = ConstantDataArray::get(M.getContext(), data);
         add_comdat(new GlobalVariable(M, value->getType(), true,
                                       GlobalVariable::ExternalLinkage,
                                       value, "jl_dispatch_target_ids"));
@@ -931,6 +989,7 @@ void CloneCtx::emit_metadata()
     // Generate `jl_dispatch_reloc_slots`
     std::set<uint32_t> shared_relocs;
     {
+        auto T_int32 = Type::getInt32Ty(M.getContext());
         std::stable_sort(gv_relocs.begin(), gv_relocs.end(),
                          [] (const std::pair<Constant*,uint32_t> &lhs,
                              const std::pair<Constant*,uint32_t> &rhs) {
@@ -955,6 +1014,9 @@ void CloneCtx::emit_metadata()
                 values.push_back(id_v);
                 values.push_back(get_ptrdiff32(it->second, gbase));
             }
+            if (alias_relocs.find(id) != alias_relocs.end()) {
+                shared_relocs.insert(id);
+            }
         }
         values[0] = ConstantInt::get(T_int32, values.size() / 2);
         ArrayType *vars_type = ArrayType::get(T_int32, values.size());
@@ -992,7 +1054,7 @@ void CloneCtx::emit_metadata()
                 idxs.push_back(baseidx);
                 for (uint32_t j = 0; j < nfvars; j++) {
                     auto base_f = grp->base_func(fvars[j]);
-                    if (shared_relocs.count(j)) {
+                    if (shared_relocs.count(j) || tgt->relocs.count(j)) {
                         count++;
                         idxs.push_back(jl_sysimg_tag_mask | j);
                         auto f = map_get(*tgt->vmap, base_f, base_f);
@@ -1000,18 +1062,18 @@ void CloneCtx::emit_metadata()
                     }
                     else if (auto f = map_get(*tgt->vmap, base_f)) {
                         count++;
-                        idxs.push_back(tgt->relocs.count(j) ? (jl_sysimg_tag_mask | j) : j);
+                        idxs.push_back(j);
                         offsets.push_back(get_ptrdiff32(cast<Function>(f), fbase));
                     }
                 }
             }
             idxs[len_idx] = count;
         }
-        auto idxval = ConstantDataArray::get(ctx, idxs);
+        auto idxval = ConstantDataArray::get(M.getContext(), idxs);
         add_comdat(new GlobalVariable(M, idxval->getType(), true,
                                       GlobalVariable::ExternalLinkage,
                                       idxval, "jl_dispatch_fvars_idxs"));
-        ArrayType *offsets_type = ArrayType::get(T_int32, offsets.size());
+        ArrayType *offsets_type = ArrayType::get(Type::getInt32Ty(M.getContext()), offsets.size());
         add_comdat(new GlobalVariable(M, offsets_type, true,
                                       GlobalVariable::ExternalLinkage,
                                       ConstantArray::get(offsets_type, offsets),
@@ -1019,7 +1081,7 @@ void CloneCtx::emit_metadata()
     }
 }
 
-bool MultiVersioning::runOnModule(Module &M)
+static bool runMultiVersioning(Module &M, function_ref<LoopInfo&(Function&)> GetLI, function_ref<CallGraph&()> GetCG, bool allow_bad_fvars)
 {
     // Group targets and identify cloning bases.
     // Also initialize function info maps (we'll update these maps as we go)
@@ -1032,7 +1094,13 @@ bool MultiVersioning::runOnModule(Module &M)
     if (M.getName() == "sysimage")
         return false;
 
-    CloneCtx clone(this, M);
+    GlobalVariable *fvars = M.getGlobalVariable("jl_sysimg_fvars");
+    GlobalVariable *gvars = M.getGlobalVariable("jl_sysimg_gvars");
+    if (allow_bad_fvars && (!fvars || !fvars->hasInitializer() || !isa<ConstantArray>(fvars->getInitializer()) ||
+                            !gvars || !gvars->hasInitializer() || !isa<ConstantArray>(gvars->getInitializer())))
+        return false;
+
+    CloneCtx clone(M, GetLI, GetCG, allow_bad_fvars);
 
     // Collect a list of original functions and clone base functions
     clone.clone_bases();
@@ -1067,22 +1135,70 @@ bool MultiVersioning::runOnModule(Module &M)
     // and collected all the shared/target-specific relocations.
     clone.emit_metadata();
 
+    assert(!verifyModule(M));
+
     return true;
 }
 
-char MultiVersioning::ID = 0;
-static RegisterPass<MultiVersioning> X("JuliaMultiVersioning", "JuliaMultiVersioning Pass",
+struct MultiVersioningLegacy: public ModulePass {
+    static char ID;
+    MultiVersioningLegacy(bool allow_bad_fvars=false)
+        : ModulePass(ID), allow_bad_fvars(allow_bad_fvars)
+    {}
+
+private:
+    bool runOnModule(Module &M) override;
+    void getAnalysisUsage(AnalysisUsage &AU) const override
+    {
+        AU.addRequired<LoopInfoWrapperPass>();
+        AU.addRequired<CallGraphWrapperPass>();
+        AU.addPreserved<LoopInfoWrapperPass>();
+    }
+    bool allow_bad_fvars;
+};
+
+bool MultiVersioningLegacy::runOnModule(Module &M)
+{
+    auto GetLI = [this](Function &F) -> LoopInfo & {
+        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
+    };
+    auto GetCG = [this]() -> CallGraph & {
+        return getAnalysis<CallGraphWrapperPass>().getCallGraph();
+    };
+    return runMultiVersioning(M, GetLI, GetCG, allow_bad_fvars);
+}
+
+
+char MultiVersioningLegacy::ID = 0;
+static RegisterPass<MultiVersioningLegacy> X("JuliaMultiVersioning", "JuliaMultiVersioning Pass",
                                        false /* Only looks at CFG */,
                                        false /* Analysis Pass */);
 
+} // anonymous namespace
+
+PreservedAnalyses MultiVersioning::run(Module &M, ModuleAnalysisManager &AM)
+{
+    auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+    auto GetLI = [&](Function &F) -> LoopInfo & {
+        return FAM.getResult<LoopAnalysis>(F);
+    };
+    auto GetCG = [&]() -> CallGraph & {
+        return AM.getResult<CallGraphAnalysis>(M);
+    };
+    if (runMultiVersioning(M, GetLI, GetCG, false)) {
+        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
+        preserved.preserve<LoopAnalysis>();
+        return preserved;
+    }
+    return PreservedAnalyses::all();
 }
 
-Pass *createMultiVersioningPass()
+Pass *createMultiVersioningPass(bool allow_bad_fvars)
 {
-    return new MultiVersioning();
+    return new MultiVersioningLegacy(allow_bad_fvars);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddMultiVersioningPass_impl(LLVMPassManagerRef PM)
 {
-    unwrap(PM)->add(createMultiVersioningPass());
+    unwrap(PM)->add(createMultiVersioningPass(false));
 }
diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp
index d594408a20992a..d176773c1bf5f1 100644
--- a/src/llvm-pass-helpers.cpp
+++ b/src/llvm-pass-helpers.cpp
@@ -18,35 +18,38 @@
 
 using namespace llvm;
 
-extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false);
-
 JuliaPassContext::JuliaPassContext()
-    : T_size(nullptr), T_int8(nullptr), T_int32(nullptr),
-        T_pint8(nullptr), T_jlvalue(nullptr), T_prjlvalue(nullptr),
-        T_ppjlvalue(nullptr), T_pjlvalue(nullptr), T_pjlvalue_der(nullptr),
-        T_ppjlvalue_der(nullptr), ptls_getter(nullptr), gc_flush_func(nullptr),
+    : T_prjlvalue(nullptr),
+
+        tbaa_gcframe(nullptr), tbaa_tag(nullptr),
+
+        pgcstack_getter(nullptr), gc_flush_func(nullptr),
         gc_preserve_begin_func(nullptr), gc_preserve_end_func(nullptr),
         pointer_from_objref_func(nullptr), alloc_obj_func(nullptr),
-        typeof_func(nullptr), write_barrier_func(nullptr), module(nullptr)
+        typeof_func(nullptr), write_barrier_func(nullptr),
+        write_barrier_binding_func(nullptr), module(nullptr)
 {
-    tbaa_gcframe = tbaa_make_child("jtbaa_gcframe").first;
-    MDNode *tbaa_data;
-    MDNode *tbaa_data_scalar;
-    std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child("jtbaa_data");
-    tbaa_tag = tbaa_make_child("jtbaa_tag", tbaa_data_scalar).first;
 }
 
 void JuliaPassContext::initFunctions(Module &M)
 {
     module = &M;
+    LLVMContext &llvmctx = M.getContext();
+
+    tbaa_gcframe = tbaa_make_child_with_context(llvmctx, "jtbaa_gcframe").first;
+    MDNode *tbaa_data;
+    MDNode *tbaa_data_scalar;
+    std::tie(tbaa_data, tbaa_data_scalar) = tbaa_make_child_with_context(llvmctx, "jtbaa_data");
+    tbaa_tag = tbaa_make_child_with_context(llvmctx, "jtbaa_tag", tbaa_data_scalar).first;
 
-    ptls_getter = M.getFunction("julia.ptls_states");
+    pgcstack_getter = M.getFunction("julia.get_pgcstack");
     gc_flush_func = M.getFunction("julia.gcroot_flush");
     gc_preserve_begin_func = M.getFunction("llvm.julia.gc_preserve_begin");
     gc_preserve_end_func = M.getFunction("llvm.julia.gc_preserve_end");
     pointer_from_objref_func = M.getFunction("julia.pointer_from_objref");
     typeof_func = M.getFunction("julia.typeof");
     write_barrier_func = M.getFunction("julia.write_barrier");
+    write_barrier_binding_func = M.getFunction("julia.write_barrier_binding");
     alloc_obj_func = M.getFunction("julia.gc_alloc_obj");
 }
 
@@ -57,26 +60,17 @@ void JuliaPassContext::initAll(Module &M)
 
     // Then initialize types and metadata nodes.
     auto &ctx = M.getContext();
-    T_size = M.getDataLayout().getIntPtrType(ctx);
-    T_int8 = Type::getInt8Ty(ctx);
-    T_pint8 = PointerType::get(T_int8, 0);
-    T_int32 = Type::getInt32Ty(ctx);
 
     // Construct derived types.
-    T_jlvalue = StructType::get(ctx);
-    T_pjlvalue = PointerType::get(T_jlvalue, 0);
-    T_prjlvalue = PointerType::get(T_jlvalue, AddressSpace::Tracked);
-    T_ppjlvalue = PointerType::get(T_pjlvalue, 0);
-    T_pjlvalue_der = PointerType::get(T_jlvalue, AddressSpace::Derived);
-    T_ppjlvalue_der = PointerType::get(T_prjlvalue, AddressSpace::Derived);
+    T_prjlvalue = JuliaType::get_prjlvalue_ty(ctx);
 }
 
-llvm::CallInst *JuliaPassContext::getPtls(llvm::Function &F) const
+llvm::CallInst *JuliaPassContext::getPGCstack(llvm::Function &F) const
 {
     for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end();
-         ptls_getter && I != E; ++I) {
+         pgcstack_getter && I != E; ++I) {
         if (CallInst *callInst = dyn_cast<CallInst>(&*I)) {
-            if (callInst->getCalledOperand() == ptls_getter) {
+            if (callInst->getCalledOperand() == pgcstack_getter) {
                 return callInst;
             }
         }
@@ -117,14 +111,15 @@ namespace jl_intrinsics {
     static const char *PUSH_GC_FRAME_NAME = "julia.push_gc_frame";
     static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame";
     static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root";
+    static const char *QUEUE_GC_BINDING_NAME = "julia.queue_gc_binding";
 
     // Annotates a function with attributes suitable for GC allocation
     // functions. Specifically, the return value is marked noalias and nonnull.
     // The allocation size is set to the first argument.
     static Function *addGCAllocAttributes(Function *target, LLVMContext &context)
     {
-        target->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
-        target->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+        addRetAttr(target, Attribute::NoAlias);
+        addRetAttr(target, Attribute::NonNull);
         target->addFnAttr(Attribute::getWithAllocSizeArgs(context, 1, None)); // returns %1 bytes
         return target;
     }
@@ -135,7 +130,7 @@ namespace jl_intrinsics {
             return Function::Create(
                 FunctionType::get(
                     PointerType::get(context.T_prjlvalue, 0),
-                    {PointerType::get(context.T_prjlvalue, 0), context.T_int32},
+                    {PointerType::get(context.T_prjlvalue, 0), Type::getInt32Ty(context.getLLVMContext())},
                     false),
                 Function::ExternalLinkage,
                 GET_GC_FRAME_SLOT_NAME);
@@ -147,7 +142,10 @@ namespace jl_intrinsics {
             auto intrinsic = Function::Create(
                 FunctionType::get(
                     context.T_prjlvalue,
-                    { context.T_pint8, context.T_size },
+                    { Type::getInt8PtrTy(context.getLLVMContext()),
+                        sizeof(size_t) == sizeof(uint32_t) ?
+                        Type::getInt32Ty(context.getLLVMContext()) :
+                        Type::getInt64Ty(context.getLLVMContext()) },
                     false),
                 Function::ExternalLinkage,
                 GC_ALLOC_BYTES_NAME);
@@ -159,11 +157,11 @@ namespace jl_intrinsics {
         NEW_GC_FRAME_NAME,
         [](const JuliaPassContext &context) {
             auto intrinsic = Function::Create(
-                FunctionType::get(PointerType::get(context.T_prjlvalue, 0), {context.T_int32}, false),
+                FunctionType::get(PointerType::get(context.T_prjlvalue, 0), {Type::getInt32Ty(context.getLLVMContext())}, false),
                 Function::ExternalLinkage,
                 NEW_GC_FRAME_NAME);
-            intrinsic->addAttribute(AttributeList::ReturnIndex, Attribute::NoAlias);
-            intrinsic->addAttribute(AttributeList::ReturnIndex, Attribute::NonNull);
+            addRetAttr(intrinsic, Attribute::NoAlias);
+            addRetAttr(intrinsic, Attribute::NonNull);
 
             return intrinsic;
         });
@@ -174,7 +172,7 @@ namespace jl_intrinsics {
             return Function::Create(
                 FunctionType::get(
                     Type::getVoidTy(context.getLLVMContext()),
-                    {PointerType::get(context.T_prjlvalue, 0), context.T_int32},
+                    {PointerType::get(context.T_prjlvalue, 0), Type::getInt32Ty(context.getLLVMContext())},
                     false),
                 Function::ExternalLinkage,
                 PUSH_GC_FRAME_NAME);
@@ -205,12 +203,27 @@ namespace jl_intrinsics {
             intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
             return intrinsic;
         });
+
+    const IntrinsicDescription queueGCBinding(
+        QUEUE_GC_BINDING_NAME,
+        [](const JuliaPassContext &context) {
+            auto intrinsic = Function::Create(
+                FunctionType::get(
+                    Type::getVoidTy(context.getLLVMContext()),
+                    { context.T_prjlvalue },
+                    false),
+                Function::ExternalLinkage,
+                QUEUE_GC_BINDING_NAME);
+            intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            return intrinsic;
+        });
 }
 
 namespace jl_well_known {
-    static const char *GC_BIG_ALLOC_NAME = "jl_gc_big_alloc";
-    static const char *GC_POOL_ALLOC_NAME = "jl_gc_pool_alloc";
-    static const char *GC_QUEUE_ROOT_NAME = "jl_gc_queue_root";
+    static const char *GC_BIG_ALLOC_NAME = XSTR(jl_gc_big_alloc);
+    static const char *GC_POOL_ALLOC_NAME = XSTR(jl_gc_pool_alloc);
+    static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root);
+    static const char *GC_QUEUE_BINDING_NAME = XSTR(jl_gc_queue_binding);
 
     using jl_intrinsics::addGCAllocAttributes;
 
@@ -220,7 +233,10 @@ namespace jl_well_known {
             auto bigAllocFunc = Function::Create(
                 FunctionType::get(
                     context.T_prjlvalue,
-                    { context.T_pint8, context.T_size },
+                    { Type::getInt8PtrTy(context.getLLVMContext()),
+                        sizeof(size_t) == sizeof(uint32_t) ?
+                        Type::getInt32Ty(context.getLLVMContext()) :
+                        Type::getInt64Ty(context.getLLVMContext()) },
                     false),
                 Function::ExternalLinkage,
                 GC_BIG_ALLOC_NAME);
@@ -234,7 +250,7 @@ namespace jl_well_known {
             auto poolAllocFunc = Function::Create(
                 FunctionType::get(
                     context.T_prjlvalue,
-                    { context.T_pint8, context.T_int32, context.T_int32 },
+                    { Type::getInt8PtrTy(context.getLLVMContext()), Type::getInt32Ty(context.getLLVMContext()), Type::getInt32Ty(context.getLLVMContext()) },
                     false),
                 Function::ExternalLinkage,
                 GC_POOL_ALLOC_NAME);
@@ -242,6 +258,20 @@ namespace jl_well_known {
             return addGCAllocAttributes(poolAllocFunc, context.getLLVMContext());
         });
 
+    const WellKnownFunctionDescription GCQueueBinding(
+        GC_QUEUE_BINDING_NAME,
+        [](const JuliaPassContext &context) {
+            auto func = Function::Create(
+                FunctionType::get(
+                    Type::getVoidTy(context.getLLVMContext()),
+                    { context.T_prjlvalue },
+                    false),
+                Function::ExternalLinkage,
+                GC_QUEUE_BINDING_NAME);
+            func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly);
+            return func;
+        });
+
     const WellKnownFunctionDescription GCQueueRoot(
         GC_QUEUE_ROOT_NAME,
         [](const JuliaPassContext &context) {
diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h
index 71cab27e76ceba..75399de5c9f609 100644
--- a/src/llvm-pass-helpers.h
+++ b/src/llvm-pass-helpers.h
@@ -40,26 +40,16 @@ namespace jl_intrinsics {
 // from modules or add them if they're not available yet.
 // Mainly useful for building Julia-specific LLVM passes.
 struct JuliaPassContext {
-    // Standard types.
-    llvm::Type *T_size;
-    llvm::Type *T_int8;
-    llvm::Type *T_int32;
-    llvm::PointerType *T_pint8;
 
     // Types derived from 'jl_value_t'.
-    llvm::Type *T_jlvalue;
     llvm::PointerType *T_prjlvalue;
-    llvm::PointerType *T_ppjlvalue;
-    llvm::PointerType *T_pjlvalue;
-    llvm::PointerType *T_pjlvalue_der;
-    llvm::PointerType *T_ppjlvalue_der;
 
     // TBAA metadata nodes.
     llvm::MDNode *tbaa_gcframe;
     llvm::MDNode *tbaa_tag;
 
     // Intrinsics.
-    llvm::Function *ptls_getter;
+    llvm::Function *pgcstack_getter;
     llvm::Function *gc_flush_func;
     llvm::Function *gc_preserve_begin_func;
     llvm::Function *gc_preserve_end_func;
@@ -67,6 +57,7 @@ struct JuliaPassContext {
     llvm::Function *alloc_obj_func;
     llvm::Function *typeof_func;
     llvm::Function *write_barrier_func;
+    llvm::Function *write_barrier_binding_func;
 
     // Creates a pass context. Type and function pointers
     // are set to `nullptr`. Metadata nodes are initialized.
@@ -86,10 +77,10 @@ struct JuliaPassContext {
         return module->getContext();
     }
 
-    // Gets a call to the `julia.ptls_states` intrinisc in the entry
+    // Gets a call to the `julia.get_pgcstack' intrinsic in the entry
     // point of the given function, if there exists such a call.
     // Otherwise, `nullptr` is returned.
-    llvm::CallInst *getPtls(llvm::Function &F) const;
+    llvm::CallInst *getPGCstack(llvm::Function &F) const;
 
     // Gets the intrinsic or well-known function that conforms to
     // the given description if it exists in the module. If not,
@@ -132,6 +123,9 @@ namespace jl_intrinsics {
 
     // `julia.queue_gc_root`: an intrinsic that queues a GC root.
     extern const IntrinsicDescription queueGCRoot;
+
+    // `julia.queue_gc_binding`: an intrinsic that queues a binding for GC.
+    extern const IntrinsicDescription queueGCBinding;
 }
 
 // A namespace for well-known Julia runtime function descriptions.
@@ -152,6 +146,9 @@ namespace jl_well_known {
 
     // `jl_gc_queue_root`: queues a GC root.
     extern const WellKnownFunctionDescription GCQueueRoot;
+
+    // `jl_gc_queue_binding`: queues a binding for GC.
+    extern const WellKnownFunctionDescription GCQueueBinding;
 }
 
 #endif
diff --git a/src/llvm-propagate-addrspaces.cpp b/src/llvm-propagate-addrspaces.cpp
index 95182f144d6ec9..8da0e108c94d56 100644
--- a/src/llvm-propagate-addrspaces.cpp
+++ b/src/llvm-propagate-addrspaces.cpp
@@ -16,9 +16,6 @@
 #include <llvm/IR/Instructions.h>
 #include <llvm/IR/IntrinsicInst.h>
 #include <llvm/IR/InstVisitor.h>
-#if JL_LLVM_VERSION < 110000
-#include <llvm/IR/CallSite.h>
-#endif
 #include <llvm/IR/Module.h>
 #include <llvm/IR/IRBuilder.h>
 #include <llvm/IR/Verifier.h>
@@ -27,6 +24,7 @@
 
 #include "codegen_shared.h"
 #include "julia.h"
+#include "passes.h"
 
 #define DEBUG_TYPE "propagate_julia_addrspaces"
 
@@ -43,19 +41,19 @@ using namespace llvm;
     optimizations.
 */
 
-struct PropagateJuliaAddrspaces : public FunctionPass, public InstVisitor<PropagateJuliaAddrspaces> {
-    static char ID;
+struct PropagateJuliaAddrspacesVisitor : public InstVisitor<PropagateJuliaAddrspacesVisitor> {
     DenseMap<Value *, Value *> LiftingMap;
     SmallPtrSet<Value *, 4> Visited;
     std::vector<Instruction *> ToDelete;
     std::vector<std::pair<Instruction *, Instruction *>> ToInsert;
-    PropagateJuliaAddrspaces() : FunctionPass(ID) {};
 
 public:
-    bool runOnFunction(Function &F) override;
-    Value *LiftPointer(Value *V, Type *LocTy = nullptr, Instruction *InsertPt=nullptr);
-    void visitStoreInst(StoreInst &SI);
+    Value *LiftPointer(Value *V, Instruction *InsertPt=nullptr);
+    void visitMemop(Instruction &I, Type *T, unsigned OpIndex);
     void visitLoadInst(LoadInst &LI);
+    void visitStoreInst(StoreInst &SI);
+    void visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI);
+    void visitAtomicRMWInst(AtomicRMWInst &SI);
     void visitMemSetInst(MemSetInst &MI);
     void visitMemTransferInst(MemTransferInst &MTI);
 
@@ -63,19 +61,6 @@ struct PropagateJuliaAddrspaces : public FunctionPass, public InstVisitor<Propag
     void PoisonValues(std::vector<Value *> &Worklist);
 };
 
-bool PropagateJuliaAddrspaces::runOnFunction(Function &F) {
-    visit(F);
-    for (auto it : ToInsert)
-        it.first->insertBefore(it.second);
-    for (Instruction *I : ToDelete)
-        I->eraseFromParent();
-    ToInsert.clear();
-    ToDelete.clear();
-    LiftingMap.clear();
-    Visited.clear();
-    return true;
-}
-
 static unsigned getValueAddrSpace(Value *V) {
     return cast<PointerType>(V->getType())->getAddressSpace();
 }
@@ -84,7 +69,7 @@ static bool isSpecialAS(unsigned AS) {
     return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial;
 }
 
-void PropagateJuliaAddrspaces::PoisonValues(std::vector<Value *> &Worklist) {
+void PropagateJuliaAddrspacesVisitor::PoisonValues(std::vector<Value *> &Worklist) {
     while (!Worklist.empty()) {
         Value *CurrentV = Worklist.back();
         Worklist.pop_back();
@@ -97,7 +82,7 @@ void PropagateJuliaAddrspaces::PoisonValues(std::vector<Value *> &Worklist) {
     }
 }
 
-Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction *InsertPt) {
+Value *PropagateJuliaAddrspacesVisitor::LiftPointer(Value *V, Instruction *InsertPt) {
     SmallVector<Value *, 4> Stack;
     std::vector<Value *> Worklist;
     std::set<Value *> LocalVisited;
@@ -180,7 +165,7 @@ Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction
             Instruction *InstV = cast<Instruction>(V);
             Instruction *NewV = InstV->clone();
             ToInsert.push_back(std::make_pair(NewV, InstV));
-            Type *NewRetTy = cast<PointerType>(InstV->getType())->getElementType()->getPointerTo(0);
+            Type *NewRetTy = PointerType::getWithSamePointeeType(cast<PointerType>(InstV->getType()), AddressSpace::Generic);
             NewV->mutateType(NewRetTy);
             LiftingMap[InstV] = NewV;
             ToRevisit.push_back(NewV);
@@ -188,7 +173,7 @@ Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction
     }
 
     auto CollapseCastsAndLift = [&](Value *CurrentV, Instruction *InsertPt) -> Value * {
-        PointerType *TargetType = cast<PointerType>(CurrentV->getType())->getElementType()->getPointerTo(0);
+        PointerType *TargetType = PointerType::getWithSamePointeeType(cast<PointerType>(CurrentV->getType()), AddressSpace::Generic);
         while (!LiftingMap.count(CurrentV)) {
             if (isa<BitCastInst>(CurrentV))
                 CurrentV = cast<BitCastInst>(CurrentV)->getOperand(0);
@@ -232,27 +217,34 @@ Value *PropagateJuliaAddrspaces::LiftPointer(Value *V, Type *LocTy, Instruction
     return CollapseCastsAndLift(V, InsertPt);
 }
 
-void PropagateJuliaAddrspaces::visitLoadInst(LoadInst &LI) {
-    unsigned AS = LI.getPointerAddressSpace();
+void PropagateJuliaAddrspacesVisitor::visitMemop(Instruction &I, Type *T, unsigned OpIndex) {
+    Value *Original = I.getOperand(OpIndex);
+    unsigned AS = Original->getType()->getPointerAddressSpace();
     if (!isSpecialAS(AS))
         return;
-    Value *Replacement = LiftPointer(LI.getPointerOperand(), LI.getType(), &LI);
+    Value *Replacement = LiftPointer(Original, &I);
     if (!Replacement)
         return;
-    LI.setOperand(LoadInst::getPointerOperandIndex(), Replacement);
+    I.setOperand(OpIndex, Replacement);
 }
 
-void PropagateJuliaAddrspaces::visitStoreInst(StoreInst &SI) {
-    unsigned AS = SI.getPointerAddressSpace();
-    if (!isSpecialAS(AS))
-        return;
-    Value *Replacement = LiftPointer(SI.getPointerOperand(), SI.getValueOperand()->getType(), &SI);
-    if (!Replacement)
-        return;
-    SI.setOperand(StoreInst::getPointerOperandIndex(), Replacement);
+void PropagateJuliaAddrspacesVisitor::visitLoadInst(LoadInst &LI) {
+    visitMemop(LI, LI.getType(), LoadInst::getPointerOperandIndex());
+}
+
+void PropagateJuliaAddrspacesVisitor::visitStoreInst(StoreInst &SI) {
+    visitMemop(SI, SI.getValueOperand()->getType(), StoreInst::getPointerOperandIndex());
+}
+
+void PropagateJuliaAddrspacesVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &SI) {
+    visitMemop(SI, SI.getNewValOperand()->getType(), AtomicCmpXchgInst::getPointerOperandIndex());
 }
 
-void PropagateJuliaAddrspaces::visitMemSetInst(MemSetInst &MI) {
+void PropagateJuliaAddrspacesVisitor::visitAtomicRMWInst(AtomicRMWInst &SI) {
+    visitMemop(SI, SI.getType(), AtomicRMWInst::getPointerOperandIndex());
+}
+
+void PropagateJuliaAddrspacesVisitor::visitMemSetInst(MemSetInst &MI) {
     unsigned AS = MI.getDestAddressSpace();
     if (!isSpecialAS(AS))
         return;
@@ -265,20 +257,20 @@ void PropagateJuliaAddrspaces::visitMemSetInst(MemSetInst &MI) {
     MI.setArgOperand(0, Replacement);
 }
 
-void PropagateJuliaAddrspaces::visitMemTransferInst(MemTransferInst &MTI) {
+void PropagateJuliaAddrspacesVisitor::visitMemTransferInst(MemTransferInst &MTI) {
     unsigned DestAS = MTI.getDestAddressSpace();
     unsigned SrcAS = MTI.getSourceAddressSpace();
     if (!isSpecialAS(DestAS) && !isSpecialAS(SrcAS))
         return;
     Value *Dest = MTI.getRawDest();
     if (isSpecialAS(DestAS)) {
-        Value *Replacement = LiftPointer(Dest, cast<PointerType>(Dest->getType())->getElementType(), &MTI);
+        Value *Replacement = LiftPointer(Dest, &MTI);
         if (Replacement)
             Dest = Replacement;
     }
     Value *Src = MTI.getRawSource();
     if (isSpecialAS(SrcAS)) {
-        Value *Replacement = LiftPointer(Src, cast<PointerType>(Src->getType())->getElementType(), &MTI);
+        Value *Replacement = LiftPointer(Src, &MTI);
         if (Replacement)
             Src = Replacement;
     }
@@ -292,14 +284,45 @@ void PropagateJuliaAddrspaces::visitMemTransferInst(MemTransferInst &MTI) {
     MTI.setArgOperand(1, Src);
 }
 
-char PropagateJuliaAddrspaces::ID = 0;
-static RegisterPass<PropagateJuliaAddrspaces> X("PropagateJuliaAddrspaces", "Propagate (non-)rootedness information", false, false);
+bool propagateJuliaAddrspaces(Function &F) {
+    PropagateJuliaAddrspacesVisitor visitor;
+    visitor.visit(F);
+    for (auto it : visitor.ToInsert)
+        it.first->insertBefore(it.second);
+    for (Instruction *I : visitor.ToDelete)
+        I->eraseFromParent();
+    visitor.ToInsert.clear();
+    visitor.ToDelete.clear();
+    visitor.LiftingMap.clear();
+    visitor.Visited.clear();
+    return true;
+}
+
+struct PropagateJuliaAddrspacesLegacy : FunctionPass {
+    static char ID;
+
+    PropagateJuliaAddrspacesLegacy() : FunctionPass(ID) {}
+    bool runOnFunction(Function &F) override {
+        return propagateJuliaAddrspaces(F);
+    }
+};
+
+char PropagateJuliaAddrspacesLegacy::ID = 0;
+static RegisterPass<PropagateJuliaAddrspacesLegacy> X("PropagateJuliaAddrspaces", "Propagate (non-)rootedness information", false, false);
 
 Pass *createPropagateJuliaAddrspaces() {
-    return new PropagateJuliaAddrspaces();
+    return new PropagateJuliaAddrspacesLegacy();
+}
+
+PreservedAnalyses PropagateJuliaAddrspacesPass::run(Function &F, FunctionAnalysisManager &AM) {
+    if (propagateJuliaAddrspaces(F)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    } else {
+        return PreservedAnalyses::all();
+    }
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddPropagateJuliaAddrspaces_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createPropagateJuliaAddrspaces());
 }
diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp
index 6fbc40ceff0c4b..2c022ca7fa660b 100644
--- a/src/llvm-ptls.cpp
+++ b/src/llvm-ptls.cpp
@@ -7,6 +7,7 @@
 
 #include "llvm-version.h"
 #include "support/dtypes.h"
+#include "passes.h"
 
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
@@ -32,48 +33,41 @@ using namespace llvm;
 
 typedef Instruction TerminatorInst;
 
-std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr,
-                                           bool isConstant=false);
-
 namespace {
 
-struct LowerPTLS: public ModulePass {
-    static char ID;
+struct LowerPTLS {
     LowerPTLS(bool imaging_mode=false)
-        : ModulePass(ID),
-          imaging_mode(imaging_mode)
+        : imaging_mode(imaging_mode)
     {}
 
+    bool runOnModule(Module &M, bool *CFGModified);
 private:
     const bool imaging_mode;
     Module *M;
-    Function *ptls_getter;
-    LLVMContext *ctx;
+    Function *pgcstack_getter;
     MDNode *tbaa_const;
-    PointerType *T_ptls_getter;
-    PointerType *T_ppjlvalue;
+    FunctionType *FT_pgcstack_getter;
+    PointerType *T_pgcstack_getter;
     PointerType *T_pppjlvalue;
-    Type *T_int8;
-    Type *T_size;
-    PointerType *T_pint8;
-    GlobalVariable *ptls_slot{nullptr};
-    GlobalVariable *ptls_offset{nullptr};
-    void set_ptls_attrs(CallInst *ptlsStates) const;
-    Instruction *emit_ptls_tp(Value *offset, Instruction *insertBefore) const;
+    GlobalVariable *pgcstack_func_slot{nullptr};
+    GlobalVariable *pgcstack_key_slot{nullptr};
+    GlobalVariable *pgcstack_offset{nullptr};
+    void set_pgcstack_attrs(CallInst *pgcstack) const;
+    Instruction *emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const;
     template<typename T> T *add_comdat(T *G) const;
     GlobalVariable *create_aliased_global(Type *T, StringRef name) const;
-    void fix_ptls_use(CallInst *ptlsStates);
-    bool runOnModule(Module &M) override;
+    void fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified);
 };
 
-void LowerPTLS::set_ptls_attrs(CallInst *ptlsStates) const
+void LowerPTLS::set_pgcstack_attrs(CallInst *pgcstack) const
 {
-    ptlsStates->addAttribute(AttributeList::FunctionIndex, Attribute::ReadNone);
-    ptlsStates->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+    addFnAttr(pgcstack, Attribute::ReadNone);
+    addFnAttr(pgcstack, Attribute::NoUnwind);
 }
 
-Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) const
+Instruction *LowerPTLS::emit_pgcstack_tp(Value *offset, Instruction *insertBefore) const
 {
+    Value *tls;
 #if defined(_CPU_X86_64_) || defined(_CPU_X86_)
     if (insertBefore->getFunction()->callsFunctionThatReturnsTwice()) {
         // Workaround LLVM bug by hiding the offset computation
@@ -95,47 +89,49 @@ Instruction *LowerPTLS::emit_ptls_tp(Value *offset, Instruction *insertBefore) c
 #  endif
 
         // The add instruction clobbers flags
-        Value *tls;
         if (offset) {
             std::vector<Type*> args(0);
             args.push_back(offset->getType());
-            auto tp = InlineAsm::get(FunctionType::get(T_pint8, args, false),
+            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), args, false),
                                      dyn_asm_str, "=&r,r,~{dirflag},~{fpsr},~{flags}", false);
-            tls = CallInst::Create(tp, offset, "ptls_i8", insertBefore);
+            tls = CallInst::Create(tp, offset, "pgcstack_i8", insertBefore);
         }
         else {
-            auto tp = InlineAsm::get(FunctionType::get(T_pint8, false),
+            auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false),
                                      const_asm_str.c_str(), "=r,~{dirflag},~{fpsr},~{flags}",
                                      false);
-            tls = CallInst::Create(tp, "ptls_i8", insertBefore);
+            tls = CallInst::Create(tp, "pgcstack_i8", insertBefore);
         }
-        return new BitCastInst(tls, T_pppjlvalue, "ptls", insertBefore);
     }
+    else
 #endif
-    // AArch64/ARM doesn't seem to have this issue.
-    // (Possibly because there are many more registers and the offset is
-    // positive and small)
-    // It's also harder to emit the offset in a generic way on ARM/AArch64
-    // (need to generate one or two `add` with shift) so let llvm emit
-    // the add for now.
+    {
+        // AArch64/ARM doesn't seem to have this issue.
+        // (Possibly because there are many more registers and the offset is
+        // positive and small)
+        // It's also harder to emit the offset in a generic way on ARM/AArch64
+        // (need to generate one or two `add` with shift) so let llvm emit
+        // the add for now.
 #if defined(_CPU_AARCH64_)
-    const char *asm_str = "mrs $0, tpidr_el0";
+        const char *asm_str = "mrs $0, tpidr_el0";
 #elif defined(__ARM_ARCH) && __ARM_ARCH >= 7
-    const char *asm_str = "mrc p15, 0, $0, c13, c0, 3";
+        const char *asm_str = "mrc p15, 0, $0, c13, c0, 3";
 #elif defined(_CPU_X86_64_)
-    const char *asm_str = "movq %fs:0, $0";
+        const char *asm_str = "movq %fs:0, $0";
 #elif defined(_CPU_X86_)
-    const char *asm_str = "movl %gs:0, $0";
+        const char *asm_str = "movl %gs:0, $0";
 #else
-    const char *asm_str = nullptr;
-    assert(0 && "Cannot emit thread pointer for this architecture.");
+        const char *asm_str = nullptr;
+        assert(0 && "Cannot emit thread pointer for this architecture.");
 #endif
-    if (!offset)
-        offset = ConstantInt::getSigned(T_size, jl_tls_offset);
-    auto tp = InlineAsm::get(FunctionType::get(T_pint8, false), asm_str, "=r", false);
-    Value *tls = CallInst::Create(tp, "thread_ptr", insertBefore);
-    tls = GetElementPtrInst::Create(T_int8, tls, {offset}, "ptls_i8", insertBefore);
-    return new BitCastInst(tls, T_pppjlvalue, "ptls", insertBefore);
+        if (!offset)
+            offset = ConstantInt::getSigned(getSizeTy(insertBefore->getContext()), jl_tls_offset);
+        auto tp = InlineAsm::get(FunctionType::get(Type::getInt8PtrTy(insertBefore->getContext()), false), asm_str, "=r", false);
+        tls = CallInst::Create(tp, "thread_ptr", insertBefore);
+        tls = GetElementPtrInst::Create(Type::getInt8Ty(insertBefore->getContext()), tls, {offset}, "ppgcstack_i8", insertBefore);
+    }
+    tls = new BitCastInst(tls, T_pppjlvalue->getPointerTo(), "ppgcstack", insertBefore);
+    return new LoadInst(T_pppjlvalue, tls, "pgcstack", false, insertBefore);
 }
 
 GlobalVariable *LowerPTLS::create_aliased_global(Type *T, StringRef name) const
@@ -155,15 +151,6 @@ template<typename T>
 inline T *LowerPTLS::add_comdat(T *G) const
 {
 #if defined(_OS_WINDOWS_)
-    // Add comdat information to make MSVC link.exe happy
-    // it's valid to emit this for ld.exe too,
-    // but makes it very slow to link for no benefit
-#if defined(_COMPILER_MICROSOFT_)
-    Comdat *jl_Comdat = G->getParent()->getOrInsertComdat(G->getName());
-    // ELF only supports Comdat::Any
-    jl_Comdat->setSelectionKind(Comdat::NoDuplicates);
-    G->setComdat(jl_Comdat);
-#endif
     // add __declspec(dllexport) to everything marked for export
     if (G->getLinkage() == GlobalValue::ExternalLinkage)
         G->setDLLStorageClass(GlobalValue::DLLExportStorageClass);
@@ -173,115 +160,169 @@ inline T *LowerPTLS::add_comdat(T *G) const
     return G;
 }
 
-void LowerPTLS::fix_ptls_use(CallInst *ptlsStates)
+void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, bool *CFGModified)
 {
-    if (ptlsStates->use_empty()) {
-        ptlsStates->eraseFromParent();
+    if (pgcstack->use_empty()) {
+        pgcstack->eraseFromParent();
         return;
     }
 
     if (imaging_mode) {
         if (jl_tls_elf_support) {
             // if (offset != 0)
-            //     ptls = tp + offset;
+            //     pgcstack = tp + offset;
             // else
-            //     ptls = getter();
-            auto offset = new LoadInst(T_size, ptls_offset, "", false, ptlsStates);
+            //     pgcstack = getter();
+            auto offset = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_offset, "", false, pgcstack);
             offset->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-            offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
-            auto cmp = new ICmpInst(ptlsStates, CmpInst::ICMP_NE, offset,
+            offset->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
+            auto cmp = new ICmpInst(pgcstack, CmpInst::ICMP_NE, offset,
                                     Constant::getNullValue(offset->getType()));
-            MDBuilder MDB(*ctx);
+            MDBuilder MDB(pgcstack->getContext());
             SmallVector<uint32_t, 2> Weights{9, 1};
             TerminatorInst *fastTerm;
             TerminatorInst *slowTerm;
-            SplitBlockAndInsertIfThenElse(cmp, ptlsStates, &fastTerm, &slowTerm,
+            SplitBlockAndInsertIfThenElse(cmp, pgcstack, &fastTerm, &slowTerm,
                                           MDB.createBranchWeights(Weights));
-
-            auto fastTLS = emit_ptls_tp(offset, fastTerm);
-            auto phi = PHINode::Create(T_pppjlvalue, 2, "", ptlsStates);
-            ptlsStates->replaceAllUsesWith(phi);
-            ptlsStates->moveBefore(slowTerm);
-            auto getter = new LoadInst(T_ptls_getter, ptls_slot, "", false, ptlsStates);
+            if (CFGModified)
+            *CFGModified = true;
+
+            auto fastTLS = emit_pgcstack_tp(offset, fastTerm);
+            auto phi = PHINode::Create(T_pppjlvalue, 2, "", pgcstack);
+            pgcstack->replaceAllUsesWith(phi);
+            pgcstack->moveBefore(slowTerm);
+            auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
             getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-            getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
-            ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), getter);
-            set_ptls_attrs(ptlsStates);
+            getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
+            pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
+            set_pgcstack_attrs(pgcstack);
 
             phi->addIncoming(fastTLS, fastTLS->getParent());
-            phi->addIncoming(ptlsStates, ptlsStates->getParent());
+            phi->addIncoming(pgcstack, pgcstack->getParent());
 
             return;
         }
         // In imaging mode, we emit the function address as a load of a static
         // variable to be filled (in `staticdata.c`) at initialization time of the sysimg.
-        // This way we can by pass the extra indirection in `jl_get_ptls_states`
+        // This way we can bypass the extra indirection in `jl_get_pgcstack`
         // since we may not know which getter function to use ahead of time.
-        auto getter = new LoadInst(T_ptls_getter, ptls_slot, "", false, ptlsStates);
+        auto getter = new LoadInst(T_pgcstack_getter, pgcstack_func_slot, "", false, pgcstack);
         getter->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
-        getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(*ctx, None));
-        ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), getter);
-        set_ptls_attrs(ptlsStates);
+        getter->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
+#if defined(_OS_DARWIN_)
+        auto key = new LoadInst(getSizeTy(pgcstack->getContext()), pgcstack_key_slot, "", false, pgcstack);
+        key->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const);
+        key->setMetadata(llvm::LLVMContext::MD_invariant_load, MDNode::get(pgcstack->getContext(), None));
+        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, getter, {key}, "", pgcstack);
+        new_pgcstack->takeName(pgcstack);
+        pgcstack->replaceAllUsesWith(new_pgcstack);
+        pgcstack->eraseFromParent();
+        pgcstack = new_pgcstack;
+#else
+        pgcstack->setCalledFunction(pgcstack->getFunctionType(), getter);
+#endif
+        set_pgcstack_attrs(pgcstack);
     }
     else if (jl_tls_offset != -1) {
-        ptlsStates->replaceAllUsesWith(emit_ptls_tp(nullptr, ptlsStates));
-        ptlsStates->eraseFromParent();
+        pgcstack->replaceAllUsesWith(emit_pgcstack_tp(nullptr, pgcstack));
+        pgcstack->eraseFromParent();
     }
     else {
         // use the address of the actual getter function directly
-        auto val = ConstantInt::get(T_size, (uintptr_t)jl_get_ptls_states_getter());
-        ptlsStates->setCalledFunction(ptlsStates->getFunctionType(), ConstantExpr::getIntToPtr(val, T_ptls_getter));
-        set_ptls_attrs(ptlsStates);
+        jl_get_pgcstack_func *f;
+        jl_pgcstack_key_t k;
+        jl_pgcstack_getkey(&f, &k);
+        Constant *val = ConstantInt::get(getSizeTy(pgcstack->getContext()), (uintptr_t)f);
+        val = ConstantExpr::getIntToPtr(val, T_pgcstack_getter);
+#if defined(_OS_DARWIN_)
+        assert(sizeof(k) == sizeof(uintptr_t));
+        Constant *key = ConstantInt::get(getSizeTy(pgcstack->getContext()), (uintptr_t)k);
+        auto new_pgcstack = CallInst::Create(FT_pgcstack_getter, val, {key}, "", pgcstack);
+        new_pgcstack->takeName(pgcstack);
+        pgcstack->replaceAllUsesWith(new_pgcstack);
+        pgcstack->eraseFromParent();
+        pgcstack = new_pgcstack;
+#else
+        pgcstack->setCalledFunction(pgcstack->getFunctionType(), val);
+#endif
+        set_pgcstack_attrs(pgcstack);
     }
 }
 
-bool LowerPTLS::runOnModule(Module &_M)
+bool LowerPTLS::runOnModule(Module &_M, bool *CFGModified)
 {
     M = &_M;
-    ptls_getter = M->getFunction("julia.ptls_states");
-    if (!ptls_getter)
+    pgcstack_getter = M->getFunction("julia.get_pgcstack");
+    if (!pgcstack_getter)
         return false;
 
-    ctx = &M->getContext();
-    tbaa_const = tbaa_make_child("jtbaa_const", nullptr, true).first;
+    tbaa_const = tbaa_make_child_with_context(_M.getContext(), "jtbaa_const", nullptr, true).first;
 
-    auto FT_ptls_getter = ptls_getter->getFunctionType();
-    T_ptls_getter = FT_ptls_getter->getPointerTo();
-    T_pppjlvalue = cast<PointerType>(FT_ptls_getter->getReturnType());
-    T_ppjlvalue = cast<PointerType>(T_pppjlvalue->getElementType());
-    T_int8 = Type::getInt8Ty(*ctx);
-    T_size = sizeof(size_t) == 8 ? Type::getInt64Ty(*ctx) : Type::getInt32Ty(*ctx);
-    T_pint8 = T_int8->getPointerTo();
+    FT_pgcstack_getter = pgcstack_getter->getFunctionType();
+#if defined(_OS_DARWIN_)
+    assert(sizeof(jl_pgcstack_key_t) == sizeof(uintptr_t));
+    FT_pgcstack_getter = FunctionType::get(FT_pgcstack_getter->getReturnType(), {getSizeTy(_M.getContext())}, false);
+#endif
+    T_pgcstack_getter = FT_pgcstack_getter->getPointerTo();
+    T_pppjlvalue = cast<PointerType>(FT_pgcstack_getter->getReturnType());
     if (imaging_mode) {
-        ptls_slot = create_aliased_global(T_ptls_getter, "jl_get_ptls_states_slot");
-        ptls_offset = create_aliased_global(T_size, "jl_tls_offset");
+        pgcstack_func_slot = create_aliased_global(T_pgcstack_getter, "jl_pgcstack_func_slot");
+        pgcstack_key_slot = create_aliased_global(getSizeTy(_M.getContext()), "jl_pgcstack_key_slot"); // >= sizeof(jl_pgcstack_key_t)
+        pgcstack_offset = create_aliased_global(getSizeTy(_M.getContext()), "jl_tls_offset");
     }
 
-    for (auto it = ptls_getter->user_begin(); it != ptls_getter->user_end();) {
+    for (auto it = pgcstack_getter->user_begin(); it != pgcstack_getter->user_end();) {
         auto call = cast<CallInst>(*it);
         ++it;
-        assert(call->getCalledOperand() == ptls_getter);
-        fix_ptls_use(call);
+        assert(call->getCalledOperand() == pgcstack_getter);
+        fix_pgcstack_use(call, CFGModified);
     }
-    assert(ptls_getter->use_empty());
-    ptls_getter->eraseFromParent();
+    assert(pgcstack_getter->use_empty());
+    pgcstack_getter->eraseFromParent();
     return true;
 }
 
-char LowerPTLS::ID = 0;
+struct LowerPTLSLegacy: public ModulePass {
+    static char ID;
+    LowerPTLSLegacy(bool imaging_mode=false)
+        : ModulePass(ID),
+          imaging_mode(imaging_mode)
+    {}
+
+    bool imaging_mode;
+    bool runOnModule(Module &M) override {
+        LowerPTLS lower(imaging_mode);
+        return lower.runOnModule(M, nullptr);
+    }
+};
+
+char LowerPTLSLegacy::ID = 0;
 
-static RegisterPass<LowerPTLS> X("LowerPTLS", "LowerPTLS Pass",
+static RegisterPass<LowerPTLSLegacy> X("LowerPTLS", "LowerPTLS Pass",
                                  false /* Only looks at CFG */,
                                  false /* Analysis Pass */);
 
 } // anonymous namespace
 
+PreservedAnalyses LowerPTLSPass::run(Module &M, ModuleAnalysisManager &AM) {
+    LowerPTLS lower(imaging_mode);
+    bool CFGModified = false;
+    if (lower.runOnModule(M, &CFGModified)) {
+        if (CFGModified) {
+            return PreservedAnalyses::none();
+        } else {
+            return PreservedAnalyses::allInSet<CFGAnalyses>();
+        }
+    }
+    return PreservedAnalyses::all();
+}
+
 Pass *createLowerPTLSPass(bool imaging_mode)
 {
-    return new LowerPTLS(imaging_mode);
+    return new LowerPTLSLegacy(imaging_mode);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass(LLVMPassManagerRef PM, LLVMBool imaging_mode)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLowerPTLSPass_impl(LLVMPassManagerRef PM, LLVMBool imaging_mode)
 {
     unwrap(PM)->add(createLowerPTLSPass(imaging_mode));
 }
diff --git a/src/llvm-remove-addrspaces.cpp b/src/llvm-remove-addrspaces.cpp
index 18d461a545a341..a3f3cbb1fee72b 100644
--- a/src/llvm-remove-addrspaces.cpp
+++ b/src/llvm-remove-addrspaces.cpp
@@ -14,6 +14,7 @@
 
 #include "codegen_shared.h"
 #include "julia.h"
+#include "passes.h"
 
 #define DEBUG_TYPE "remove_addrspaces"
 
@@ -43,10 +44,17 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
             return DstTy;
 
         DstTy = SrcTy;
-        if (auto Ty = dyn_cast<PointerType>(SrcTy))
-            DstTy = PointerType::get(
-                    remapType(Ty->getElementType()),
-                    ASRemapper(Ty->getAddressSpace()));
+        if (auto Ty = dyn_cast<PointerType>(SrcTy)) {
+            if (Ty->isOpaque()) {
+                DstTy = PointerType::get(Ty->getContext(), ASRemapper(Ty->getAddressSpace()));
+            }
+            else {
+                //Remove once opaque pointer transition is complete
+                DstTy = PointerType::get(
+                        remapType(Ty->getPointerElementType()),
+                        ASRemapper(Ty->getAddressSpace()));
+            }
+        }
         else if (auto Ty = dyn_cast<FunctionType>(SrcTy)) {
             SmallVector<Type *, 4> Params;
             for (unsigned Index = 0; Index < Ty->getNumParams(); ++Index)
@@ -55,34 +63,44 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
                     remapType(Ty->getReturnType()), Params, Ty->isVarArg());
         }
         else if (auto Ty = dyn_cast<StructType>(SrcTy)) {
-            if (!Ty->isOpaque()) {
-                auto Els = Ty->getNumElements();
-                SmallVector<Type *, 4> NewElTys(Els);
-                for (unsigned i = 0; i < Els; ++i)
-                    NewElTys[i] = remapType(Ty->getElementType(i));
+            if (Ty->isLiteral()) {
+                // Since a literal type has to have the body when it is created,
+                // we need to remap the element types first. This is safe only
+                // for literal types (i.e., no self-reference) and thus treated
+                // separately.
+                assert(!Ty->hasName()); // literal type has no name.
+                SmallVector<Type *, 4> NewElTys;
+                NewElTys.reserve(Ty->getNumElements());
+                for (auto E: Ty->elements())
+                    NewElTys.push_back(remapType(E));
+                DstTy = StructType::get(Ty->getContext(), NewElTys, Ty->isPacked());
+            } else if (!Ty->isOpaque()) {
+                // If the struct type is not literal and not opaque, it can have
+                // self-referential fields (i.e., pointer type of itself as a
+                // field).
+                StructType *DstTy_ = StructType::create(Ty->getContext());
                 if (Ty->hasName()) {
                     auto Name = std::string(Ty->getName());
                     Ty->setName(Name + ".bad");
-                    DstTy = StructType::create(
-                            Ty->getContext(), NewElTys, Name, Ty->isPacked());
+                    DstTy_->setName(Name);
                 }
-                else
-                    DstTy = StructType::get(
-                            Ty->getContext(), NewElTys, Ty->isPacked());
+                // To avoid infinite recursion, shove the placeholder of the DstTy before
+                // recursing into the element types:
+                MappedTypes[SrcTy] = DstTy_;
+
+                auto Els = Ty->getNumElements();
+                SmallVector<Type *, 4> NewElTys(Els);
+                for (unsigned i = 0; i < Els; ++i)
+                    NewElTys[i] = remapType(Ty->getElementType(i));
+                DstTy_->setBody(NewElTys, Ty->isPacked());
+                DstTy = DstTy_;
             }
         }
         else if (auto Ty = dyn_cast<ArrayType>(SrcTy))
             DstTy = ArrayType::get(
                     remapType(Ty->getElementType()), Ty->getNumElements());
         else if (auto Ty = dyn_cast<VectorType>(SrcTy))
-            DstTy = VectorType::get(remapType(Ty->getElementType()),
-#if JL_LLVM_VERSION >= 110000
-                     Ty
-#else
-                     Ty->getNumElements(),
-                     Ty->isScalable()
-#endif
-                    );
+            DstTy = VectorType::get(remapType(Ty->getElementType()), Ty);
 
         if (DstTy != SrcTy)
             LLVM_DEBUG(
@@ -95,10 +113,9 @@ class AddrspaceRemoveTypeRemapper : public ValueMapTypeRemapper {
     }
 
 private:
-    static DenseMap<Type *, Type *> MappedTypes;
+    DenseMap<Type *, Type *> MappedTypes;
 };
 
-DenseMap<Type *, Type *> AddrspaceRemoveTypeRemapper::MappedTypes;
 
 class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
     ValueToValueMapTy &VM;
@@ -141,10 +158,12 @@ class AddrspaceRemoveValueMaterializer : public ValueMaterializer {
                     // GEP const exprs need to know the type of the source.
                     // asserts remapType(typeof arg0) == typeof mapValue(arg0).
                     Constant *Src = CE->getOperand(0);
-                    Type *SrcTy = remapType(
-                            cast<PointerType>(Src->getType()->getScalarType())
-                                    ->getElementType());
-                    DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
+                    auto ptrty = cast<PointerType>(Src->getType()->getScalarType());
+                    //Remove once opaque pointer transition is complete
+                    if (!ptrty->isOpaque()) {
+                        Type *SrcTy = remapType(ptrty->getPointerElementType());
+                        DstV = CE->getWithOperands(Ops, Ty, false, SrcTy);
+                    }
                 }
                 else
                     DstV = CE->getWithOperands(Ops, Ty);
@@ -221,18 +240,7 @@ unsigned removeAllAddrspaces(unsigned AS)
     return AddressSpace::Generic;
 }
 
-struct RemoveAddrspacesPass : public ModulePass {
-    static char ID;
-    AddrspaceRemapFunction ASRemapper;
-    RemoveAddrspacesPass(
-            AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
-        : ModulePass(ID), ASRemapper(ASRemapper){};
-
-public:
-    bool runOnModule(Module &M) override;
-};
-
-bool RemoveAddrspacesPass::runOnModule(Module &M)
+bool removeAddrspaces(Module &M, AddrspaceRemapFunction ASRemapper)
 {
     ValueToValueMapTy VMap;
     AddrspaceRemoveTypeRemapper TypeRemapper(ASRemapper);
@@ -315,7 +323,7 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
 
         Function *NF = Function::Create(
                 NFTy, F->getLinkage(), F->getAddressSpace(), Name, &M);
-        NF->copyAttributesFrom(F);
+        // no need to copy attributes here, that's done by CloneFunctionInto
         VMap[F] = NF;
     }
 
@@ -335,7 +343,11 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
         for (auto MD : MDs)
             NGV->addMetadata(
                     MD.first,
+#if JL_LLVM_VERSION >= 130000
+                    *MapMetadata(MD.second, VMap));
+#else
                     *MapMetadata(MD.second, VMap, RF_MoveDistinctMDs));
+#endif
 
         copyComdat(NGV, GV);
 
@@ -362,13 +374,43 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
                 NF,
                 F,
                 VMap,
+#if JL_LLVM_VERSION >= 130000
+                CloneFunctionChangeType::GlobalChanges,
+#else
                 /*ModuleLevelChanges=*/true,
+#endif
                 Returns,
                 "",
                 nullptr,
                 &TypeRemapper,
                 &Materializer);
 
+        // CloneFunctionInto unconditionally copies the attributes from F to NF,
+        // without considering e.g. the byval attribute type.
+        AttributeList Attrs = F->getAttributes();
+        LLVMContext &C = F->getContext();
+        for (unsigned i = 0; i < Attrs.getNumAttrSets(); ++i) {
+            for (Attribute::AttrKind TypedAttr :
+                 {Attribute::ByVal, Attribute::StructRet, Attribute::ByRef}) {
+#if JL_LLVM_VERSION >= 140000
+                auto Attr = Attrs.getAttributeAtIndex(i, TypedAttr);
+#else
+                auto Attr = Attrs.getAttribute(i, TypedAttr);
+#endif
+                if (Type *Ty = Attr.getValueAsType()) {
+#if JL_LLVM_VERSION >= 140000
+                    Attrs = Attrs.replaceAttributeTypeAtIndex(
+                        C, i, TypedAttr, TypeRemapper.remapType(Ty));
+#else
+                    Attrs = Attrs.replaceAttributeType(
+                        C, i, TypedAttr, TypeRemapper.remapType(Ty));
+#endif
+                    break;
+                }
+            }
+        }
+        NF->setAttributes(Attrs);
+
         if (F->hasPersonalityFn())
             NF->setPersonalityFn(MapValue(F->getPersonalityFn(), VMap));
 
@@ -413,8 +455,22 @@ bool RemoveAddrspacesPass::runOnModule(Module &M)
     return true;
 }
 
-char RemoveAddrspacesPass::ID = 0;
-static RegisterPass<RemoveAddrspacesPass>
+
+struct RemoveAddrspacesPassLegacy : public ModulePass {
+    static char ID;
+    AddrspaceRemapFunction ASRemapper;
+    RemoveAddrspacesPassLegacy(
+            AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
+        : ModulePass(ID), ASRemapper(ASRemapper){};
+
+public:
+    bool runOnModule(Module &M) override {
+        return removeAddrspaces(M, ASRemapper);
+    }
+};
+
+char RemoveAddrspacesPassLegacy::ID = 0;
+static RegisterPass<RemoveAddrspacesPassLegacy>
         X("RemoveAddrspaces",
           "Remove IR address space information.",
           false,
@@ -423,7 +479,17 @@ static RegisterPass<RemoveAddrspacesPass>
 Pass *createRemoveAddrspacesPass(
         AddrspaceRemapFunction ASRemapper = removeAllAddrspaces)
 {
-    return new RemoveAddrspacesPass(ASRemapper);
+    return new RemoveAddrspacesPassLegacy(ASRemapper);
+}
+
+RemoveAddrspacesPass::RemoveAddrspacesPass() : RemoveAddrspacesPass(removeAllAddrspaces) {}
+
+PreservedAnalyses RemoveAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
+    if (removeAddrspaces(M, ASRemapper)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    } else {
+        return PreservedAnalyses::all();
+    }
 }
 
 
@@ -439,16 +505,16 @@ unsigned removeJuliaAddrspaces(unsigned AS)
         return AS;
 }
 
-struct RemoveJuliaAddrspacesPass : public ModulePass {
+struct RemoveJuliaAddrspacesPassLegacy : public ModulePass {
     static char ID;
-    RemoveAddrspacesPass Pass;
-    RemoveJuliaAddrspacesPass() : ModulePass(ID), Pass(removeJuliaAddrspaces){};
+    RemoveAddrspacesPassLegacy Pass;
+    RemoveJuliaAddrspacesPassLegacy() : ModulePass(ID), Pass(removeJuliaAddrspaces){};
 
-    bool runOnModule(Module &M) { return Pass.runOnModule(M); }
+    bool runOnModule(Module &M) override { return Pass.runOnModule(M); }
 };
 
-char RemoveJuliaAddrspacesPass::ID = 0;
-static RegisterPass<RemoveJuliaAddrspacesPass>
+char RemoveJuliaAddrspacesPassLegacy::ID = 0;
+static RegisterPass<RemoveJuliaAddrspacesPassLegacy>
         Y("RemoveJuliaAddrspaces",
           "Remove IR address space information.",
           false,
@@ -456,10 +522,14 @@ static RegisterPass<RemoveJuliaAddrspacesPass>
 
 Pass *createRemoveJuliaAddrspacesPass()
 {
-    return new RemoveJuliaAddrspacesPass();
+    return new RemoveJuliaAddrspacesPassLegacy();
+}
+
+PreservedAnalyses RemoveJuliaAddrspacesPass::run(Module &M, ModuleAnalysisManager &AM) {
+    return RemoveAddrspacesPass(removeJuliaAddrspaces).run(M, AM);
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveJuliaAddrspacesPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createRemoveJuliaAddrspacesPass());
 }
diff --git a/src/llvm-remove-ni.cpp b/src/llvm-remove-ni.cpp
index 2da30f25e75afe..c252905dc75f95 100644
--- a/src/llvm-remove-ni.cpp
+++ b/src/llvm-remove-ni.cpp
@@ -1,42 +1,62 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
+#include "passes.h"
 
+#include <llvm/Pass.h>
 #include <llvm/IR/Module.h>
+#include <llvm/IR/PassManager.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/Support/Debug.h>
 
+#include "julia.h"
+
 #define DEBUG_TYPE "remove_ni"
 
 using namespace llvm;
 
 namespace {
 
-struct RemoveNIPass : public ModulePass {
+static bool removeNI(Module &M)
+{
+    auto dlstr = M.getDataLayoutStr();
+    auto nistart = dlstr.find("-ni:");
+    if (nistart == std::string::npos)
+        return false;
+    auto len = dlstr.size();
+    auto niend = nistart + 1;
+    for (; niend < len; niend++) {
+        if (dlstr[niend] == '-') {
+            break;
+        }
+    }
+    dlstr.erase(nistart, niend - nistart);
+    M.setDataLayout(dlstr);
+    return true;
+}
+}
+
+PreservedAnalyses RemoveNI::run(Module &M, ModuleAnalysisManager &AM)
+{
+    if (removeNI(M)) {
+        return PreservedAnalyses::allInSet<CFGAnalyses>();
+    }
+    return PreservedAnalyses::all();
+}
+
+namespace {
+struct RemoveNILegacy : public ModulePass {
     static char ID;
-    RemoveNIPass() : ModulePass(ID) {};
+    RemoveNILegacy() : ModulePass(ID) {};
 
     bool runOnModule(Module &M)
     {
-        auto dlstr = M.getDataLayoutStr();
-        auto nistart = dlstr.find("-ni:");
-        if (nistart == std::string::npos)
-            return false;
-        auto len = dlstr.size();
-        auto niend = nistart + 1;
-        for (; niend < len; niend++) {
-            if (dlstr[niend] == '-') {
-                break;
-            }
-        }
-        dlstr.erase(nistart, niend - nistart);
-        M.setDataLayout(dlstr);
-        return true;
+        return removeNI(M);
     }
 };
 
-char RemoveNIPass::ID = 0;
-static RegisterPass<RemoveNIPass>
+char RemoveNILegacy::ID = 0;
+static RegisterPass<RemoveNILegacy>
         Y("RemoveNI",
           "Remove non-integral address space.",
           false,
@@ -45,5 +65,10 @@ static RegisterPass<RemoveNIPass>
 
 Pass *createRemoveNIPass()
 {
-    return new RemoveNIPass();
+    return new RemoveNILegacy();
+}
+
+extern "C" JL_DLLEXPORT void LLVMExtraAddRemoveNIPass_impl(LLVMPassManagerRef PM)
+{
+    unwrap(PM)->add(createRemoveNIPass());
 }
diff --git a/src/llvm-simdloop.cpp b/src/llvm-simdloop.cpp
index afcfa60082ad85..a96335b91f36ed 100644
--- a/src/llvm-simdloop.cpp
+++ b/src/llvm-simdloop.cpp
@@ -1,6 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 #include "llvm-version.h"
+#include "passes.h"
 
 #define DEBUG_TYPE "lower_simd_loop"
 
@@ -19,46 +20,28 @@
 #include <llvm-c/Core.h>
 #include <llvm-c/Types.h>
 
+#include <llvm/ADT/Statistic.h>
 #include <llvm/Analysis/LoopPass.h>
 #include <llvm/IR/LegacyPassManager.h>
 #include <llvm/IR/Instructions.h>
-#include <llvm/IR/LLVMContext.h>
 #include <llvm/IR/Metadata.h>
+#include <llvm/IR/Verifier.h>
 #include <llvm/Support/Debug.h>
 
 #include "julia_assert.h"
 
-namespace llvm {
+using namespace llvm;
 
+STATISTIC(TotalMarkedLoops, "Total number of loops marked with simdloop");
+STATISTIC(IVDepLoops, "Number of loops with no loop-carried dependencies");
+STATISTIC(SimdLoops, "Number of loops with SIMD instructions");
+STATISTIC(IVDepInstructions, "Number of instructions marked ivdep");
+STATISTIC(ReductionChains, "Number of reduction chains folded");
+STATISTIC(ReductionChainLength, "Total sum of instructions folded from reduction chain");
+STATISTIC(AddChains, "Addition reduction chains");
+STATISTIC(MulChains, "Multiply reduction chains");
 
-/// This pass should run after reduction variables have been converted to phi nodes,
-/// otherwise floating-point reductions might not be recognized as such and
-/// prevent SIMDization.
-struct LowerSIMDLoop : public ModulePass {
-    static char ID;
-    LowerSIMDLoop() : ModulePass(ID)
-    {
-    }
-
-    protected:
-    void getAnalysisUsage(AnalysisUsage &AU) const override
-    {
-        ModulePass::getAnalysisUsage(AU);
-        AU.addRequired<LoopInfoWrapperPass>();
-        AU.addPreserved<LoopInfoWrapperPass>();
-        AU.setPreservesCFG();
-    }
-
-    private:
-    bool runOnModule(Module &M) override;
-
-    bool markLoopInfo(Module &M, Function *marker);
-
-    /// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
-    /// mark the ops as permitting reassociation/commuting.
-    /// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
-    void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const;
-};
+namespace {
 
 static unsigned getReduceOpcode(Instruction *J, Instruction *operand)
 {
@@ -80,7 +63,10 @@ static unsigned getReduceOpcode(Instruction *J, Instruction *operand)
     }
 }
 
-void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
+/// If Phi is part of a reduction cycle of FAdd, FSub, FMul or FDiv,
+/// mark the ops as permitting reassociation/commuting.
+/// As of LLVM 4.0, FDiv is not handled by the loop vectorizer
+static void enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L)
 {
     typedef SmallVector<Instruction*, 8> chainVector;
     chainVector chain;
@@ -124,32 +110,32 @@ void LowerSIMDLoop::enableUnsafeAlgebraIfReduction(PHINode *Phi, Loop *L) const
         }
         chain.push_back(J);
     }
+    switch (opcode) {
+        case Instruction::FAdd:
+            ++AddChains;
+            break;
+        case Instruction::FMul:
+            ++MulChains;
+            break;
+    }
+    ++ReductionChains;
     for (chainVector::const_iterator K=chain.begin(); K!=chain.end(); ++K) {
         LLVM_DEBUG(dbgs() << "LSL: marking " << **K << "\n");
         (*K)->setFast(true);
+        ++ReductionChainLength;
     }
 }
 
-bool LowerSIMDLoop::runOnModule(Module &M)
-{
-    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
-
-    bool Changed = false;
-    if (loopinfo_marker)
-        Changed |= markLoopInfo(M, loopinfo_marker);
-
-    return Changed;
-}
-
-bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
+static bool markLoopInfo(Module &M, Function *marker, function_ref<LoopInfo &(Function &)> GetLI)
 {
     bool Changed = false;
     std::vector<Instruction*> ToDelete;
     for (User *U : marker->users()) {
+        ++TotalMarkedLoops;
         Instruction *I = cast<Instruction>(U);
         ToDelete.push_back(I);
 
-        LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>(*I->getParent()->getParent()).getLoopInfo();
+        LoopInfo &LI = GetLI(*I->getParent()->getParent());
         Loop *L = LI.getLoopFor(I->getParent());
         I->removeFromParent();
         if (!L)
@@ -212,10 +198,12 @@ bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
         // If ivdep is true we assume that there is no memory dependency between loop iterations
         // This is a fairly strong assumption and does often not hold true for generic code.
         if (ivdep) {
+            ++IVDepLoops;
             // Mark memory references so that Loop::isAnnotatedParallel will return true for this loop.
             for (BasicBlock *BB : L->blocks()) {
                for (Instruction &I : *BB) {
                    if (I.mayReadOrWriteMemory()) {
+                       ++IVDepInstructions;
                        I.setMetadata(LLVMContext::MD_mem_parallel_loop_access, m);
                    }
                }
@@ -224,6 +212,7 @@ bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
         }
 
         if (simd) {
+            ++SimdLoops;
             // Mark floating-point reductions as okay to reassociate/commute.
             for (BasicBlock::iterator I = Lh->begin(), E = Lh->end(); I != E; ++I) {
                 if (PHINode *Phi = dyn_cast<PHINode>(I))
@@ -240,23 +229,89 @@ bool LowerSIMDLoop::markLoopInfo(Module &M, Function *marker)
         I->deleteValue();
     marker->eraseFromParent();
 
+    assert(!verifyModule(M));
     return Changed;
 }
 
-char LowerSIMDLoop::ID = 0;
+} // end anonymous namespace
+
+
+/// This pass should run after reduction variables have been converted to phi nodes,
+/// otherwise floating-point reductions might not be recognized as such and
+/// prevent SIMDization.
+
+
+PreservedAnalyses LowerSIMDLoop::run(Module &M, ModuleAnalysisManager &AM)
+{
+    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
+
+    if (!loopinfo_marker)
+        return PreservedAnalyses::all();
+
+    FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+
+    auto GetLI = [&FAM](Function &F) -> LoopInfo & {
+        return FAM.getResult<LoopAnalysis>(F);
+    };
+
+    if (markLoopInfo(M, loopinfo_marker, GetLI)) {
+        auto preserved = PreservedAnalyses::allInSet<CFGAnalyses>();
+        preserved.preserve<LoopAnalysis>();
+        return preserved;
+    }
+
+    return PreservedAnalyses::all();
+}
+
+namespace {
+class LowerSIMDLoopLegacy : public ModulePass {
+    //LowerSIMDLoop Impl;
+
+public:
+  static char ID;
+
+  LowerSIMDLoopLegacy() : ModulePass(ID) {
+  }
 
-static RegisterPass<LowerSIMDLoop> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
+  bool runOnModule(Module &M) override {
+    bool Changed = false;
+
+    Function *loopinfo_marker = M.getFunction("julia.loopinfo_marker");
+
+    auto GetLI = [this](Function &F) -> LoopInfo & {
+        return getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo();
+    };
+
+    if (loopinfo_marker)
+        Changed |= markLoopInfo(M, loopinfo_marker, GetLI);
+
+    return Changed;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override
+  {
+      ModulePass::getAnalysisUsage(AU);
+      AU.addRequired<LoopInfoWrapperPass>();
+      AU.addPreserved<LoopInfoWrapperPass>();
+      AU.setPreservesCFG();
+  }
+};
+
+} // end anonymous namespace
+
+char LowerSIMDLoopLegacy::ID = 0;
+
+static RegisterPass<LowerSIMDLoopLegacy> X("LowerSIMDLoop", "LowerSIMDLoop Pass",
                                      false /* Only looks at CFG */,
                                      false /* Analysis Pass */);
 
 JL_DLLEXPORT Pass *createLowerSimdLoopPass()
 {
-    return new LowerSIMDLoop();
+    return new LowerSIMDLoopLegacy();
 }
 
-extern "C" JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass(LLVMPassManagerRef PM)
+extern "C" JL_DLLEXPORT void LLVMExtraAddLowerSimdLoopPass_impl(LLVMPassManagerRef PM)
 {
     unwrap(PM)->add(createLowerSimdLoopPass());
 }
-
-} // namespace llvm
diff --git a/src/llvm-version.h b/src/llvm-version.h
index 8700b3ef416ecb..4e15e787b7de80 100644
--- a/src/llvm-version.h
+++ b/src/llvm-version.h
@@ -9,22 +9,17 @@
 #define JL_LLVM_VERSION (LLVM_VERSION_MAJOR * 10000 + LLVM_VERSION_MINOR * 100 \
                         + LLVM_VERSION_PATCH)
 
-#if JL_LLVM_VERSION < 80000
-    #error Only LLVM versions >= 8.0.0 are supported by Julia
-#endif
-#if JL_LLVM_VERSION < 100000
-#define Align(a) (a)
-#endif
-
-#ifndef LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING
-#define LLVM_DISABLE_ABI_BREAKING_CHECKS_ENFORCING 0
+#if JL_LLVM_VERSION < 120000
+    #error Only LLVM versions >= 12.0.0 are supported by Julia
 #endif
 
-#ifndef LLVM_ENABLE_STATS
-#define LLVM_ENABLE_STATS 0
+#if JL_LLVM_VERSION >= 150000
+#define JL_LLVM_OPAQUE_POINTERS 1
 #endif
 
+#ifdef __cplusplus
 #if defined(__GNUC__) && (__GNUC__ >= 9)
 // Added in GCC 9, this warning is annoying
 #pragma GCC diagnostic ignored "-Winit-list-lifetime"
 #endif
+#endif
diff --git a/src/llvmcalltest.cpp b/src/llvmcalltest.cpp
index fee0fed72c0d9b..b225111520c394 100644
--- a/src/llvmcalltest.cpp
+++ b/src/llvmcalltest.cpp
@@ -13,14 +13,23 @@
 
 using namespace llvm;
 
+// Borrow definition from `support/dtypes.h`
+#ifdef _OS_WINDOWS_
+#  define DLLEXPORT __declspec(dllexport)
+#else
+# if defined(_OS_LINUX_)
+#  define DLLEXPORT __attribute__ ((visibility("protected")))
+# else
+#  define DLLEXPORT __attribute__ ((visibility("default")))
+# endif
+#endif
+
 extern "C" {
 
-JL_DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
+DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
     LLVMContext Ctx;
-    PointerType *AnyTy = PointerType::get(StructType::get(Ctx), 0);
-    // FIXME: get AnyTy via jl_type_to_llvm(Ctx, jl_AnyTy)
-
-    Type *TrackedTy = PointerType::get(AnyTy->getElementType(), AddressSpace::Tracked);
+    // FIXME: get TrackedTy via jl_type_to_llvm(Ctx, jl_AnyTy)
+    Type *TrackedTy = PointerType::get(StructType::get(Ctx), AddressSpace::Tracked);
     Module *M = new llvm::Module("shadow", Ctx);
     Function *F = Function::Create(
         FunctionType::get(
@@ -40,7 +49,7 @@ JL_DLLEXPORT const char *MakeIdentityFunction(jl_value_t* jl_AnyTy) {
     return strdup(buf.c_str());
 }
 
-JL_DLLEXPORT const char *MakeLoadGlobalFunction() {
+DLLEXPORT const char *MakeLoadGlobalFunction() {
     LLVMContext Ctx;
 
     auto M = new Module("shadow", Ctx);
diff --git a/src/locks.h b/src/locks.h
deleted file mode 100644
index 938ad45cd3e379..00000000000000
--- a/src/locks.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#ifndef JL_LOCKS_H
-#define JL_LOCKS_H
-
-#include "julia_assert.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-// Lock acquire and release primitives
-
-// JL_LOCK and jl_mutex_lock are GC safe points while JL_LOCK_NOGC
-// and jl_mutex_lock_nogc are not.
-// Always use JL_LOCK unless no one holding the lock can trigger a GC or GC
-// safepoint. JL_LOCK_NOGC should only be needed for GC internal locks.
-// The JL_LOCK* and JL_UNLOCK* macros are no-op for non-threading build
-// while the jl_mutex_* functions are always locking and unlocking the locks.
-
-static inline void jl_mutex_wait(jl_mutex_t *lock, int safepoint)
-{
-    unsigned long self = jl_thread_self();
-    unsigned long owner = jl_atomic_load_relaxed(&lock->owner);
-    if (owner == self) {
-        lock->count++;
-        return;
-    }
-    while (1) {
-        if (owner == 0 &&
-            jl_atomic_compare_exchange(&lock->owner, 0, self) == 0) {
-            lock->count = 1;
-            return;
-        }
-        if (safepoint) {
-            jl_ptls_t ptls = jl_get_ptls_states();
-            jl_gc_safepoint_(ptls);
-        }
-        jl_cpu_pause();
-        owner = jl_atomic_load_relaxed(&lock->owner);
-    }
-}
-
-static inline void jl_mutex_lock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
-{
-#ifndef __clang_analyzer__
-    // Hide this body from the analyzer, otherwise it complains that we're calling
-    // a non-safepoint from this function. The 0 arguments guarantees that we do
-    // not reach the safepoint, but the analyzer can't figure that out
-    jl_mutex_wait(lock, 0);
-#endif
-}
-
-static inline void jl_lock_frame_push(jl_mutex_t *lock)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    small_arraylist_t *locks = &ptls->locks;
-    uint32_t len = locks->len;
-    if (__unlikely(len >= locks->max)) {
-        small_arraylist_grow(locks, 1);
-    }
-    else {
-        locks->len = len + 1;
-    }
-    locks->items[len] = (void*)lock;
-}
-static inline void jl_lock_frame_pop(void)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    assert(ptls->locks.len > 0);
-    ptls->locks.len--;
-}
-
-#define JL_SIGATOMIC_BEGIN() do {               \
-        jl_get_ptls_states()->defer_signal++;   \
-        jl_signal_fence();                      \
-    } while (0)
-#define JL_SIGATOMIC_END() do {                                 \
-        jl_signal_fence();                                      \
-        if (--jl_get_ptls_states()->defer_signal == 0) {        \
-            jl_sigint_safepoint(jl_get_ptls_states());          \
-        }                                                       \
-    } while (0)
-
-static inline void jl_mutex_lock(jl_mutex_t *lock)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    JL_SIGATOMIC_BEGIN();
-    jl_mutex_wait(lock, 1);
-    jl_lock_frame_push(lock);
-    jl_gc_enable_finalizers(ptls, 0);
-}
-
-static inline int jl_mutex_trylock_nogc(jl_mutex_t *lock)
-{
-    unsigned long self = jl_thread_self();
-    unsigned long owner = jl_atomic_load_acquire(&lock->owner);
-    if (owner == self) {
-        lock->count++;
-        return 1;
-    }
-    if (owner == 0 &&
-        jl_atomic_compare_exchange(&lock->owner, 0, self) == 0) {
-        lock->count = 1;
-        return 1;
-    }
-    return 0;
-}
-
-static inline int jl_mutex_trylock(jl_mutex_t *lock)
-{
-    int got = jl_mutex_trylock_nogc(lock);
-    if (got) {
-        jl_ptls_t ptls = jl_get_ptls_states();
-        JL_SIGATOMIC_BEGIN();
-        jl_lock_frame_push(lock);
-        jl_gc_enable_finalizers(ptls, 0);
-    }
-    return got;
-}
-static inline void jl_mutex_unlock_nogc(jl_mutex_t *lock) JL_NOTSAFEPOINT
-{
-#ifndef __clang_analyzer__
-    assert(lock->owner == jl_thread_self() &&
-           "Unlocking a lock in a different thread.");
-    if (--lock->count == 0) {
-        jl_atomic_store_release(&lock->owner, 0);
-        jl_cpu_wake();
-    }
-#endif
-}
-
-static inline void jl_mutex_unlock(jl_mutex_t *lock)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_mutex_unlock_nogc(lock);
-    jl_gc_enable_finalizers(ptls, 1);
-    jl_lock_frame_pop();
-    JL_SIGATOMIC_END();
-}
-
-static inline void jl_mutex_init(jl_mutex_t *lock) JL_NOTSAFEPOINT
-{
-    lock->owner = 0;
-    lock->count = 0;
-}
-
-#define JL_MUTEX_INIT(m) jl_mutex_init(m)
-#define JL_LOCK(m) jl_mutex_lock(m)
-#define JL_UNLOCK(m) jl_mutex_unlock(m)
-#define JL_LOCK_NOGC(m) jl_mutex_lock_nogc(m)
-#define JL_UNLOCK_NOGC(m) jl_mutex_unlock_nogc(m)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/mach_dyld_atfork.tbd b/src/mach_dyld_atfork.tbd
new file mode 100644
index 00000000000000..9a5d18099dbcfa
--- /dev/null
+++ b/src/mach_dyld_atfork.tbd
@@ -0,0 +1,25 @@
+--- !tapi-tbd
+# copied from XCode's libSystem.tbd (current-version: 1311)
+# to provide weak-linkage info for new symbols on old systems
+tbd-version:     4
+targets:         [ x86_64-macos, x86_64-maccatalyst, arm64-macos, arm64-maccatalyst,
+                   arm64e-macos, arm64e-maccatalyst ]
+uuids:
+  - target:          x86_64-macos
+    value:           AFE6C76A-B47A-35F5-91D0-4E9FC439E90D
+  - target:          x86_64-maccatalyst
+    value:           AFE6C76A-B47A-35F5-91D0-4E9FC439E90D
+  - target:          arm64-macos
+    value:           2EA09BDB-811B-33AA-BB58-4B53AA2DB522
+  - target:          arm64-maccatalyst
+    value:           2EA09BDB-811B-33AA-BB58-4B53AA2DB522
+  - target:          arm64e-macos
+    value:           09AB3723-C26D-3762-93BA-98E9C38B89C1
+  - target:          arm64e-maccatalyst
+    value:           09AB3723-C26D-3762-93BA-98E9C38B89C1
+install-name:    '/usr/lib/libSystem.B.dylib'
+exports:
+  - targets:         [ arm64-macos, arm64e-macos, x86_64-macos, x86_64-maccatalyst,
+                       arm64-maccatalyst, arm64e-maccatalyst ]
+    symbols:         [ __dyld_atfork_parent, __dyld_atfork_prepare ]
+...
diff --git a/src/mach_excServer.c b/src/mach_excServer.c
new file mode 100644
index 00000000000000..7e99331fa85549
--- /dev/null
+++ b/src/mach_excServer.c
@@ -0,0 +1,863 @@
+/*
+ * IDENTIFICATION:
+ * stub generated Fri Apr  1 18:55:39 2022
+ * with a MiG generated by bootstrap_cmds-122
+ * from mach/mach_exc.defs
+ * OPTIONS:
+ */
+/*
+ * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+/*
+ * Mach Operating System
+ * Copyright (c) 1991,1990,1989,1988,1987 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+/* Module mach_exc */
+
+#define __MIG_check__Request__mach_exc_subsystem__ 1
+
+#include <string.h>
+#include <mach/ndr.h>
+#include <mach/boolean.h>
+#include <mach/kern_return.h>
+#include <mach/notify.h>
+#include <mach/mach_types.h>
+#include <mach/message.h>
+#include <mach/mig_errors.h>
+#include <mach/port.h>
+
+/* BEGIN VOUCHER CODE */
+
+#ifndef KERNEL
+#if defined(__has_include)
+#if __has_include(<mach/mig_voucher_support.h>)
+#ifndef USING_VOUCHERS
+#define USING_VOUCHERS
+#endif
+#ifndef __VOUCHER_FORWARD_TYPE_DECLS__
+#define __VOUCHER_FORWARD_TYPE_DECLS__
+#ifdef __cplusplus
+extern "C" {
+#endif
+        extern boolean_t voucher_mach_msg_set(mach_msg_header_t *msg) __attribute__((weak_import));
+#ifdef __cplusplus
+}
+#endif
+#endif // __VOUCHER_FORWARD_TYPE_DECLS__
+#endif // __has_include(<mach/mach_voucher_types.h>)
+#endif // __has_include
+#endif // !KERNEL
+
+/* END VOUCHER CODE */
+
+
+/* BEGIN MIG_STRNCPY_ZEROFILL CODE */
+
+#if defined(__has_include)
+#if __has_include(<mach/mig_strncpy_zerofill_support.h>)
+#ifndef USING_MIG_STRNCPY_ZEROFILL
+#define USING_MIG_STRNCPY_ZEROFILL
+#endif
+#ifndef __MIG_STRNCPY_ZEROFILL_FORWARD_TYPE_DECLS__
+#define __MIG_STRNCPY_ZEROFILL_FORWARD_TYPE_DECLS__
+#ifdef __cplusplus
+extern "C" {
+#endif
+        extern int mig_strncpy_zerofill(char *dest, const char *src, int len) __attribute__((weak_import));
+#ifdef __cplusplus
+}
+#endif
+#endif /* __MIG_STRNCPY_ZEROFILL_FORWARD_TYPE_DECLS__ */
+#endif /* __has_include(<mach/mig_strncpy_zerofill_support.h>) */
+#endif /* __has_include */
+
+/* END MIG_STRNCPY_ZEROFILL CODE */
+
+
+#include <Availability.h>
+#include <mach/std_types.h>
+#include <mach/mig.h>
+#include <mach/mig.h>
+#include <mach/mach_types.h>
+
+#ifndef mig_internal
+#define mig_internal    static __inline__
+#endif  /* mig_internal */
+
+#ifndef mig_external
+#define mig_external
+#endif  /* mig_external */
+
+#if     !defined(__MigTypeCheck) && defined(TypeCheck)
+#define __MigTypeCheck          TypeCheck       /* Legacy setting */
+#endif  /* !defined(__MigTypeCheck) */
+
+#if     !defined(__MigKernelSpecificCode) && defined(_MIG_KERNEL_SPECIFIC_CODE_)
+#define __MigKernelSpecificCode _MIG_KERNEL_SPECIFIC_CODE_      /* Legacy setting */
+#endif  /* !defined(__MigKernelSpecificCode) */
+
+#ifndef LimitCheck
+#define LimitCheck 0
+#endif  /* LimitCheck */
+
+#ifndef min
+#define min(a,b)  ( ((a) < (b))? (a): (b) )
+#endif  /* min */
+
+#if !defined(_WALIGN_)
+#define _WALIGN_(x) (((x) + 3) & ~3)
+#endif /* !defined(_WALIGN_) */
+
+#if !defined(_WALIGNSZ_)
+#define _WALIGNSZ_(x) _WALIGN_(sizeof(x))
+#endif /* !defined(_WALIGNSZ_) */
+
+#ifndef UseStaticTemplates
+#define UseStaticTemplates      0
+#endif  /* UseStaticTemplates */
+
+#ifndef MIG_SERVER_ROUTINE
+#define MIG_SERVER_ROUTINE
+#endif
+
+#ifndef __DeclareRcvRpc
+#define __DeclareRcvRpc(_NUM_, _NAME_)
+#endif  /* __DeclareRcvRpc */
+
+#ifndef __BeforeRcvRpc
+#define __BeforeRcvRpc(_NUM_, _NAME_)
+#endif  /* __BeforeRcvRpc */
+
+#ifndef __AfterRcvRpc
+#define __AfterRcvRpc(_NUM_, _NAME_)
+#endif  /* __AfterRcvRpc */
+
+#ifndef __DeclareRcvSimple
+#define __DeclareRcvSimple(_NUM_, _NAME_)
+#endif  /* __DeclareRcvSimple */
+
+#ifndef __BeforeRcvSimple
+#define __BeforeRcvSimple(_NUM_, _NAME_)
+#endif  /* __BeforeRcvSimple */
+
+#ifndef __AfterRcvSimple
+#define __AfterRcvSimple(_NUM_, _NAME_)
+#endif  /* __AfterRcvSimple */
+
+#define novalue void
+
+#define msgh_request_port       msgh_local_port
+#define MACH_MSGH_BITS_REQUEST(bits)    MACH_MSGH_BITS_LOCAL(bits)
+#define msgh_reply_port         msgh_remote_port
+#define MACH_MSGH_BITS_REPLY(bits)      MACH_MSGH_BITS_REMOTE(bits)
+
+#define MIG_RETURN_ERROR(X, code)       {\
+                                ((mig_reply_error_t *)X)->RetCode = code;\
+                                ((mig_reply_error_t *)X)->NDR = NDR_record;\
+                                return;\
+                                }
+
+/* typedefs for all requests */
+
+#ifndef __Request__mach_exc_subsystem__defined
+#define __Request__mach_exc_subsystem__defined
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                /* start of the kernel processed data */
+                mach_msg_body_t msgh_body;
+                mach_msg_port_descriptor_t thread;
+                mach_msg_port_descriptor_t task;
+                /* end of the kernel processed data */
+                NDR_record_t NDR;
+                exception_type_t exception;
+                mach_msg_type_number_t codeCnt;
+                int64_t code[2];
+        } __Request__mach_exception_raise_t __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                NDR_record_t NDR;
+                exception_type_t exception;
+                mach_msg_type_number_t codeCnt;
+                int64_t code[2];
+                int flavor;
+                mach_msg_type_number_t old_stateCnt;
+                natural_t old_state[1296];
+        } __Request__mach_exception_raise_state_t __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                /* start of the kernel processed data */
+                mach_msg_body_t msgh_body;
+                mach_msg_port_descriptor_t thread;
+                mach_msg_port_descriptor_t task;
+                /* end of the kernel processed data */
+                NDR_record_t NDR;
+                exception_type_t exception;
+                mach_msg_type_number_t codeCnt;
+                int64_t code[2];
+                int flavor;
+                mach_msg_type_number_t old_stateCnt;
+                natural_t old_state[1296];
+        } __Request__mach_exception_raise_state_identity_t __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+#endif /* !__Request__mach_exc_subsystem__defined */
+
+/* typedefs for all replies */
+
+#ifndef __Reply__mach_exc_subsystem__defined
+#define __Reply__mach_exc_subsystem__defined
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                NDR_record_t NDR;
+                kern_return_t RetCode;
+        } __Reply__mach_exception_raise_t __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                NDR_record_t NDR;
+                kern_return_t RetCode;
+                int flavor;
+                mach_msg_type_number_t new_stateCnt;
+                natural_t new_state[1296];
+        } __Reply__mach_exception_raise_state_t __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                NDR_record_t NDR;
+                kern_return_t RetCode;
+                int flavor;
+                mach_msg_type_number_t new_stateCnt;
+                natural_t new_state[1296];
+        } __Reply__mach_exception_raise_state_identity_t __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+#endif /* !__Reply__mach_exc_subsystem__defined */
+
+
+/* union of all replies */
+
+#ifndef __ReplyUnion__catch_mach_exc_subsystem__defined
+#define __ReplyUnion__catch_mach_exc_subsystem__defined
+union __ReplyUnion__catch_mach_exc_subsystem {
+        __Reply__mach_exception_raise_t Reply_mach_exception_raise;
+        __Reply__mach_exception_raise_state_t Reply_mach_exception_raise_state;
+        __Reply__mach_exception_raise_state_identity_t Reply_mach_exception_raise_state_identity;
+};
+#endif /* __ReplyUnion__catch_mach_exc_subsystem__defined */
+/* Forward Declarations */
+
+
+mig_internal novalue _Xmach_exception_raise
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP);
+
+mig_internal novalue _Xmach_exception_raise_state
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP);
+
+mig_internal novalue _Xmach_exception_raise_state_identity
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP);
+
+
+#if ( __MigTypeCheck )
+#if __MIG_check__Request__mach_exc_subsystem__
+#if !defined(__MIG_check__Request__mach_exception_raise_t__defined)
+#define __MIG_check__Request__mach_exception_raise_t__defined
+
+mig_internal kern_return_t __MIG_check__Request__mach_exception_raise_t(__attribute__((__unused__)) __Request__mach_exception_raise_t *In0P)
+{
+
+        typedef __Request__mach_exception_raise_t __Request;
+#if     __MigTypeCheck
+        unsigned int msgh_size;
+#endif  /* __MigTypeCheck */
+
+#if     __MigTypeCheck
+        msgh_size = In0P->Head.msgh_size;
+        if (!(In0P->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) ||
+            (In0P->msgh_body.msgh_descriptor_count != 2) ||
+            (msgh_size < (mach_msg_size_t)(sizeof(__Request) - 16)) ||  (msgh_size > (mach_msg_size_t)sizeof(__Request)))
+                return MIG_BAD_ARGUMENTS;
+#endif  /* __MigTypeCheck */
+
+#if     __MigTypeCheck
+        if (In0P->thread.type != MACH_MSG_PORT_DESCRIPTOR ||
+            In0P->thread.disposition != 17)
+                return MIG_TYPE_ERROR;
+#endif  /* __MigTypeCheck */
+
+#if     __MigTypeCheck
+        if (In0P->task.type != MACH_MSG_PORT_DESCRIPTOR ||
+            In0P->task.disposition != 17)
+                return MIG_TYPE_ERROR;
+#endif  /* __MigTypeCheck */
+
+#if defined(__NDR_convert__int_rep__Request__mach_exception_raise_t__codeCnt__defined)
+        if (In0P->NDR.int_rep != NDR_record.int_rep)
+                __NDR_convert__int_rep__Request__mach_exception_raise_t__codeCnt(&In0P->codeCnt, In0P->NDR.int_rep);
+#endif  /* __NDR_convert__int_rep__Request__mach_exception_raise_t__codeCnt__defined */
+#if     __MigTypeCheck
+        if ( In0P->codeCnt > 2 )
+                return MIG_BAD_ARGUMENTS;
+        if (((msgh_size - (mach_msg_size_t)(sizeof(__Request) - 16)) / 8 < In0P->codeCnt) ||
+            (msgh_size != (mach_msg_size_t)(sizeof(__Request) - 16) + (8 * In0P->codeCnt)))
+                return MIG_BAD_ARGUMENTS;
+#endif  /* __MigTypeCheck */
+
+        return MACH_MSG_SUCCESS;
+}
+#endif /* !defined(__MIG_check__Request__mach_exception_raise_t__defined) */
+#endif /* __MIG_check__Request__mach_exc_subsystem__ */
+#endif /* ( __MigTypeCheck ) */
+
+
+/* Routine mach_exception_raise */
+#ifdef  mig_external
+mig_external
+#else
+extern
+#endif  /* mig_external */
+MIG_SERVER_ROUTINE
+kern_return_t catch_mach_exception_raise
+(
+        mach_port_t exception_port,
+        mach_port_t thread,
+        mach_port_t task,
+        exception_type_t exception,
+        mach_exception_data_t code,
+        mach_msg_type_number_t codeCnt
+);
+
+/* Routine mach_exception_raise */
+mig_internal novalue _Xmach_exception_raise
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP)
+{
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                /* start of the kernel processed data */
+                mach_msg_body_t msgh_body;
+                mach_msg_port_descriptor_t thread;
+                mach_msg_port_descriptor_t task;
+                /* end of the kernel processed data */
+                NDR_record_t NDR;
+                exception_type_t exception;
+                mach_msg_type_number_t codeCnt;
+                int64_t code[2];
+                mach_msg_trailer_t trailer;
+        } Request __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+        typedef __Request__mach_exception_raise_t __Request;
+        typedef __Reply__mach_exception_raise_t Reply __attribute__((unused));
+
+        /*
+         * typedef struct {
+         *      mach_msg_header_t Head;
+         *      NDR_record_t NDR;
+         *      kern_return_t RetCode;
+         * } mig_reply_error_t;
+         */
+
+        Request *In0P = (Request *) InHeadP;
+        Reply *OutP = (Reply *) OutHeadP;
+#ifdef  __MIG_check__Request__mach_exception_raise_t__defined
+        kern_return_t check_result;
+#endif  /* __MIG_check__Request__mach_exception_raise_t__defined */
+
+        __DeclareRcvRpc(2405, "mach_exception_raise")
+        __BeforeRcvRpc(2405, "mach_exception_raise")
+
+#if     defined(__MIG_check__Request__mach_exception_raise_t__defined)
+        check_result = __MIG_check__Request__mach_exception_raise_t((__Request *)In0P);
+        if (check_result != MACH_MSG_SUCCESS)
+                { MIG_RETURN_ERROR(OutP, check_result); }
+#endif  /* defined(__MIG_check__Request__mach_exception_raise_t__defined) */
+
+        OutP->RetCode = catch_mach_exception_raise(In0P->Head.msgh_request_port, In0P->thread.name, In0P->task.name, In0P->exception, In0P->code, In0P->codeCnt);
+
+        OutP->NDR = NDR_record;
+
+
+        __AfterRcvRpc(2405, "mach_exception_raise")
+}
+
+#if ( __MigTypeCheck )
+#if __MIG_check__Request__mach_exc_subsystem__
+#if !defined(__MIG_check__Request__mach_exception_raise_state_t__defined)
+#define __MIG_check__Request__mach_exception_raise_state_t__defined
+
+mig_internal kern_return_t __MIG_check__Request__mach_exception_raise_state_t(__attribute__((__unused__)) __Request__mach_exception_raise_state_t *In0P, __attribute__((__unused__)) __Request__mach_exception_raise_state_t **In1PP)
+{
+
+        typedef __Request__mach_exception_raise_state_t __Request;
+        __Request *In1P;
+#if     __MigTypeCheck
+        unsigned int msgh_size;
+#endif  /* __MigTypeCheck */
+        unsigned int msgh_size_delta;
+
+#if     __MigTypeCheck
+        msgh_size = In0P->Head.msgh_size;
+        if ((In0P->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) ||
+            (msgh_size < (mach_msg_size_t)(sizeof(__Request) - 5200)) ||  (msgh_size > (mach_msg_size_t)sizeof(__Request)))
+                return MIG_BAD_ARGUMENTS;
+#endif  /* __MigTypeCheck */
+
+#if defined(__NDR_convert__int_rep__Request__mach_exception_raise_state_t__codeCnt__defined)
+        if (In0P->NDR.int_rep != NDR_record.int_rep)
+                __NDR_convert__int_rep__Request__mach_exception_raise_state_t__codeCnt(&In0P->codeCnt, In0P->NDR.int_rep);
+#endif  /* __NDR_convert__int_rep__Request__mach_exception_raise_state_t__codeCnt__defined */
+        msgh_size_delta = (8 * In0P->codeCnt);
+#if     __MigTypeCheck
+        if ( In0P->codeCnt > 2 )
+                return MIG_BAD_ARGUMENTS;
+        if (((msgh_size - (mach_msg_size_t)(sizeof(__Request) - 5200)) / 8 < In0P->codeCnt) ||
+            (msgh_size < (mach_msg_size_t)(sizeof(__Request) - 5200) + (8 * In0P->codeCnt)))
+                return MIG_BAD_ARGUMENTS;
+        msgh_size -= msgh_size_delta;
+#endif  /* __MigTypeCheck */
+
+        *In1PP = In1P = (__Request *) ((pointer_t) In0P + msgh_size_delta - 16);
+
+#if defined(__NDR_convert__int_rep__Request__mach_exception_raise_state_t__old_stateCnt__defined)
+        if (In0P->NDR.int_rep != NDR_record.int_rep)
+                __NDR_convert__int_rep__Request__mach_exception_raise_state_t__old_stateCnt(&In1P->old_stateCnt, In1P->NDR.int_rep);
+#endif  /* __NDR_convert__int_rep__Request__mach_exception_raise_state_t__old_stateCnt__defined */
+#if     __MigTypeCheck
+        if ( In1P->old_stateCnt > 1296 )
+                return MIG_BAD_ARGUMENTS;
+        if (((msgh_size - (mach_msg_size_t)(sizeof(__Request) - 5200)) / 4 < In1P->old_stateCnt) ||
+            (msgh_size != (mach_msg_size_t)(sizeof(__Request) - 5200) + (4 * In1P->old_stateCnt)))
+                return MIG_BAD_ARGUMENTS;
+#endif  /* __MigTypeCheck */
+
+        return MACH_MSG_SUCCESS;
+}
+#endif /* !defined(__MIG_check__Request__mach_exception_raise_state_t__defined) */
+#endif /* __MIG_check__Request__mach_exc_subsystem__ */
+#endif /* ( __MigTypeCheck ) */
+
+
+/* Routine mach_exception_raise_state */
+#ifdef  mig_external
+mig_external
+#else
+extern
+#endif  /* mig_external */
+MIG_SERVER_ROUTINE
+kern_return_t catch_mach_exception_raise_state
+(
+        mach_port_t exception_port,
+        exception_type_t exception,
+        const mach_exception_data_t code,
+        mach_msg_type_number_t codeCnt,
+        int *flavor,
+        const thread_state_t old_state,
+        mach_msg_type_number_t old_stateCnt,
+        thread_state_t new_state,
+        mach_msg_type_number_t *new_stateCnt
+);
+
+/* Routine mach_exception_raise_state */
+mig_internal novalue _Xmach_exception_raise_state
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP)
+{
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                NDR_record_t NDR;
+                exception_type_t exception;
+                mach_msg_type_number_t codeCnt;
+                int64_t code[2];
+                int flavor;
+                mach_msg_type_number_t old_stateCnt;
+                natural_t old_state[1296];
+                mach_msg_trailer_t trailer;
+        } Request __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+        typedef __Request__mach_exception_raise_state_t __Request;
+        typedef __Reply__mach_exception_raise_state_t Reply __attribute__((unused));
+
+        /*
+         * typedef struct {
+         *      mach_msg_header_t Head;
+         *      NDR_record_t NDR;
+         *      kern_return_t RetCode;
+         * } mig_reply_error_t;
+         */
+
+        Request *In0P = (Request *) InHeadP;
+        Request *In1P;
+        Reply *OutP = (Reply *) OutHeadP;
+#ifdef  __MIG_check__Request__mach_exception_raise_state_t__defined
+        kern_return_t check_result;
+#endif  /* __MIG_check__Request__mach_exception_raise_state_t__defined */
+
+        __DeclareRcvRpc(2406, "mach_exception_raise_state")
+        __BeforeRcvRpc(2406, "mach_exception_raise_state")
+
+#if     defined(__MIG_check__Request__mach_exception_raise_state_t__defined)
+        check_result = __MIG_check__Request__mach_exception_raise_state_t((__Request *)In0P, (__Request **)&In1P);
+        if (check_result != MACH_MSG_SUCCESS)
+                { MIG_RETURN_ERROR(OutP, check_result); }
+#endif  /* defined(__MIG_check__Request__mach_exception_raise_state_t__defined) */
+
+        OutP->new_stateCnt = 1296;
+
+        OutP->RetCode = catch_mach_exception_raise_state(In0P->Head.msgh_request_port, In0P->exception, In0P->code, In0P->codeCnt, &In1P->flavor, In1P->old_state, In1P->old_stateCnt, OutP->new_state, &OutP->new_stateCnt);
+        if (OutP->RetCode != KERN_SUCCESS) {
+                MIG_RETURN_ERROR(OutP, OutP->RetCode);
+        }
+
+        OutP->NDR = NDR_record;
+
+
+        OutP->flavor = In1P->flavor;
+        OutP->Head.msgh_size = (mach_msg_size_t)(sizeof(Reply) - 5184) + (((4 * OutP->new_stateCnt)));
+
+        __AfterRcvRpc(2406, "mach_exception_raise_state")
+}
+
+#if ( __MigTypeCheck )
+#if __MIG_check__Request__mach_exc_subsystem__
+#if !defined(__MIG_check__Request__mach_exception_raise_state_identity_t__defined)
+#define __MIG_check__Request__mach_exception_raise_state_identity_t__defined
+
+mig_internal kern_return_t __MIG_check__Request__mach_exception_raise_state_identity_t(__attribute__((__unused__)) __Request__mach_exception_raise_state_identity_t *In0P, __attribute__((__unused__)) __Request__mach_exception_raise_state_identity_t **In1PP)
+{
+
+        typedef __Request__mach_exception_raise_state_identity_t __Request;
+        __Request *In1P;
+#if     __MigTypeCheck
+        unsigned int msgh_size;
+#endif  /* __MigTypeCheck */
+        unsigned int msgh_size_delta;
+
+#if     __MigTypeCheck
+        msgh_size = In0P->Head.msgh_size;
+        if (!(In0P->Head.msgh_bits & MACH_MSGH_BITS_COMPLEX) ||
+            (In0P->msgh_body.msgh_descriptor_count != 2) ||
+            (msgh_size < (mach_msg_size_t)(sizeof(__Request) - 5200)) ||  (msgh_size > (mach_msg_size_t)sizeof(__Request)))
+                return MIG_BAD_ARGUMENTS;
+#endif  /* __MigTypeCheck */
+
+#if     __MigTypeCheck
+        if (In0P->thread.type != MACH_MSG_PORT_DESCRIPTOR ||
+            In0P->thread.disposition != 17)
+                return MIG_TYPE_ERROR;
+#endif  /* __MigTypeCheck */
+
+#if     __MigTypeCheck
+        if (In0P->task.type != MACH_MSG_PORT_DESCRIPTOR ||
+            In0P->task.disposition != 17)
+                return MIG_TYPE_ERROR;
+#endif  /* __MigTypeCheck */
+
+#if defined(__NDR_convert__int_rep__Request__mach_exception_raise_state_identity_t__codeCnt__defined)
+        if (In0P->NDR.int_rep != NDR_record.int_rep)
+                __NDR_convert__int_rep__Request__mach_exception_raise_state_identity_t__codeCnt(&In0P->codeCnt, In0P->NDR.int_rep);
+#endif  /* __NDR_convert__int_rep__Request__mach_exception_raise_state_identity_t__codeCnt__defined */
+        msgh_size_delta = (8 * In0P->codeCnt);
+#if     __MigTypeCheck
+        if ( In0P->codeCnt > 2 )
+                return MIG_BAD_ARGUMENTS;
+        if (((msgh_size - (mach_msg_size_t)(sizeof(__Request) - 5200)) / 8 < In0P->codeCnt) ||
+            (msgh_size < (mach_msg_size_t)(sizeof(__Request) - 5200) + (8 * In0P->codeCnt)))
+                return MIG_BAD_ARGUMENTS;
+        msgh_size -= msgh_size_delta;
+#endif  /* __MigTypeCheck */
+
+        *In1PP = In1P = (__Request *) ((pointer_t) In0P + msgh_size_delta - 16);
+
+#if defined(__NDR_convert__int_rep__Request__mach_exception_raise_state_identity_t__old_stateCnt__defined)
+        if (In0P->NDR.int_rep != NDR_record.int_rep)
+                __NDR_convert__int_rep__Request__mach_exception_raise_state_identity_t__old_stateCnt(&In1P->old_stateCnt, In1P->NDR.int_rep);
+#endif  /* __NDR_convert__int_rep__Request__mach_exception_raise_state_identity_t__old_stateCnt__defined */
+#if     __MigTypeCheck
+        if ( In1P->old_stateCnt > 1296 )
+                return MIG_BAD_ARGUMENTS;
+        if (((msgh_size - (mach_msg_size_t)(sizeof(__Request) - 5200)) / 4 < In1P->old_stateCnt) ||
+            (msgh_size != (mach_msg_size_t)(sizeof(__Request) - 5200) + (4 * In1P->old_stateCnt)))
+                return MIG_BAD_ARGUMENTS;
+#endif  /* __MigTypeCheck */
+
+        return MACH_MSG_SUCCESS;
+}
+#endif /* !defined(__MIG_check__Request__mach_exception_raise_state_identity_t__defined) */
+#endif /* __MIG_check__Request__mach_exc_subsystem__ */
+#endif /* ( __MigTypeCheck ) */
+
+
+/* Routine mach_exception_raise_state_identity */
+#ifdef  mig_external
+mig_external
+#else
+extern
+#endif  /* mig_external */
+MIG_SERVER_ROUTINE
+kern_return_t catch_mach_exception_raise_state_identity
+(
+        mach_port_t exception_port,
+        mach_port_t thread,
+        mach_port_t task,
+        exception_type_t exception,
+        mach_exception_data_t code,
+        mach_msg_type_number_t codeCnt,
+        int *flavor,
+        thread_state_t old_state,
+        mach_msg_type_number_t old_stateCnt,
+        thread_state_t new_state,
+        mach_msg_type_number_t *new_stateCnt
+);
+
+/* Routine mach_exception_raise_state_identity */
+mig_internal novalue _Xmach_exception_raise_state_identity
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP)
+{
+
+#ifdef  __MigPackStructs
+#pragma pack(push, 4)
+#endif
+        typedef struct {
+                mach_msg_header_t Head;
+                /* start of the kernel processed data */
+                mach_msg_body_t msgh_body;
+                mach_msg_port_descriptor_t thread;
+                mach_msg_port_descriptor_t task;
+                /* end of the kernel processed data */
+                NDR_record_t NDR;
+                exception_type_t exception;
+                mach_msg_type_number_t codeCnt;
+                int64_t code[2];
+                int flavor;
+                mach_msg_type_number_t old_stateCnt;
+                natural_t old_state[1296];
+                mach_msg_trailer_t trailer;
+        } Request __attribute__((unused));
+#ifdef  __MigPackStructs
+#pragma pack(pop)
+#endif
+        typedef __Request__mach_exception_raise_state_identity_t __Request;
+        typedef __Reply__mach_exception_raise_state_identity_t Reply __attribute__((unused));
+
+        /*
+         * typedef struct {
+         *      mach_msg_header_t Head;
+         *      NDR_record_t NDR;
+         *      kern_return_t RetCode;
+         * } mig_reply_error_t;
+         */
+
+        Request *In0P = (Request *) InHeadP;
+        Request *In1P;
+        Reply *OutP = (Reply *) OutHeadP;
+#ifdef  __MIG_check__Request__mach_exception_raise_state_identity_t__defined
+        kern_return_t check_result;
+#endif  /* __MIG_check__Request__mach_exception_raise_state_identity_t__defined */
+
+        __DeclareRcvRpc(2407, "mach_exception_raise_state_identity")
+        __BeforeRcvRpc(2407, "mach_exception_raise_state_identity")
+
+#if     defined(__MIG_check__Request__mach_exception_raise_state_identity_t__defined)
+        check_result = __MIG_check__Request__mach_exception_raise_state_identity_t((__Request *)In0P, (__Request **)&In1P);
+        if (check_result != MACH_MSG_SUCCESS)
+                { MIG_RETURN_ERROR(OutP, check_result); }
+#endif  /* defined(__MIG_check__Request__mach_exception_raise_state_identity_t__defined) */
+
+        OutP->new_stateCnt = 1296;
+
+        OutP->RetCode = catch_mach_exception_raise_state_identity(In0P->Head.msgh_request_port, In0P->thread.name, In0P->task.name, In0P->exception, In0P->code, In0P->codeCnt, &In1P->flavor, In1P->old_state, In1P->old_stateCnt, OutP->new_state, &OutP->new_stateCnt);
+        if (OutP->RetCode != KERN_SUCCESS) {
+                MIG_RETURN_ERROR(OutP, OutP->RetCode);
+        }
+
+        OutP->NDR = NDR_record;
+
+
+        OutP->flavor = In1P->flavor;
+        OutP->Head.msgh_size = (mach_msg_size_t)(sizeof(Reply) - 5184) + (((4 * OutP->new_stateCnt)));
+
+        __AfterRcvRpc(2407, "mach_exception_raise_state_identity")
+}
+
+
+#ifdef  mig_external
+mig_external
+#else
+extern
+#endif  /* mig_external */
+boolean_t mach_exc_server(
+                mach_msg_header_t *InHeadP,
+                mach_msg_header_t *OutHeadP);
+
+#ifdef  mig_external
+mig_external
+#else
+extern
+#endif  /* mig_external */
+mig_routine_t mach_exc_server_routine(
+                mach_msg_header_t *InHeadP);
+
+
+/* Description of this subsystem, for use in direct RPC */
+const struct catch_mach_exc_subsystem {
+        mig_server_routine_t    server; /* Server routine */
+        mach_msg_id_t   start;  /* Min routine number */
+        mach_msg_id_t   end;    /* Max routine number + 1 */
+        unsigned int    maxsize;        /* Max msg size */
+        vm_address_t    reserved;       /* Reserved */
+        struct routine_descriptor       /*Array of routine descriptors */
+                routine[4];
+} catch_mach_exc_subsystem = {
+        mach_exc_server_routine,
+        2405,
+        2409,
+        (mach_msg_size_t)sizeof(union __ReplyUnion__catch_mach_exc_subsystem),
+        (vm_address_t)0,
+        {
+          { (mig_impl_routine_t) 0,
+          (mig_stub_routine_t) _Xmach_exception_raise, 6, 0, (routine_arg_descriptor_t)0, (mach_msg_size_t)sizeof(__Reply__mach_exception_raise_t)},
+          { (mig_impl_routine_t) 0,
+          (mig_stub_routine_t) _Xmach_exception_raise_state, 9, 0, (routine_arg_descriptor_t)0, (mach_msg_size_t)sizeof(__Reply__mach_exception_raise_state_t)},
+          { (mig_impl_routine_t) 0,
+          (mig_stub_routine_t) _Xmach_exception_raise_state_identity, 11, 0, (routine_arg_descriptor_t)0, (mach_msg_size_t)sizeof(__Reply__mach_exception_raise_state_identity_t)},
+                {0, 0, 0, 0, 0, 0},
+        }
+};
+
+mig_external boolean_t mach_exc_server
+        (mach_msg_header_t *InHeadP, mach_msg_header_t *OutHeadP)
+{
+        /*
+         * typedef struct {
+         *      mach_msg_header_t Head;
+         *      NDR_record_t NDR;
+         *      kern_return_t RetCode;
+         * } mig_reply_error_t;
+         */
+
+        mig_routine_t routine;
+
+        OutHeadP->msgh_bits = MACH_MSGH_BITS(MACH_MSGH_BITS_REPLY(InHeadP->msgh_bits), 0);
+        OutHeadP->msgh_remote_port = InHeadP->msgh_reply_port;
+        /* Minimal size: routine() will update it if different */
+        OutHeadP->msgh_size = (mach_msg_size_t)sizeof(mig_reply_error_t);
+        OutHeadP->msgh_local_port = MACH_PORT_NULL;
+        OutHeadP->msgh_id = InHeadP->msgh_id + 100;
+        OutHeadP->msgh_reserved = 0;
+
+        if ((InHeadP->msgh_id > 2408) || (InHeadP->msgh_id < 2405) ||
+            ((routine = catch_mach_exc_subsystem.routine[InHeadP->msgh_id - 2405].stub_routine) == 0)) {
+                ((mig_reply_error_t *)OutHeadP)->NDR = NDR_record;
+                ((mig_reply_error_t *)OutHeadP)->RetCode = MIG_BAD_ID;
+                return FALSE;
+        }
+        (*routine) (InHeadP, OutHeadP);
+        return TRUE;
+}
+
+mig_external mig_routine_t mach_exc_server_routine
+        (mach_msg_header_t *InHeadP)
+{
+        int msgh_id;
+
+        msgh_id = InHeadP->msgh_id - 2405;
+
+        if ((msgh_id > 3) || (msgh_id < 0))
+                return 0;
+
+        return catch_mach_exc_subsystem.routine[msgh_id].stub_routine;
+}
diff --git a/src/macroexpand.scm b/src/macroexpand.scm
index e6c4fde732611f..516dd9b29f354f 100644
--- a/src/macroexpand.scm
+++ b/src/macroexpand.scm
@@ -210,7 +210,8 @@
         ((atom? v) '())
         (else
          (case (car v)
-           ((... kw |::| =) (try-arg-name (cadr v)))
+           ((|::|) (if (length= v 2) '() (try-arg-name (cadr v))))
+           ((... kw =) (try-arg-name (cadr v)))
            ((escape) (list v))
            ((hygienic-scope) (try-arg-name (cadr v)))
            ((meta)  ;; allow certain per-argument annotations
@@ -324,7 +325,7 @@
    m parent-scope inarg))
 
 (define (resolve-expansion-vars- e env m parent-scope inarg)
-  (cond ((or (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal))
+  (cond ((or (eq? e 'begin) (eq? e 'end) (eq? e 'ccall) (eq? e 'cglobal) (underscore-symbol? e))
          e)
         ((symbol? e)
          (let ((a (assq e env)))
@@ -351,7 +352,7 @@
                                    ,(resolve-expansion-vars-with-new-env (caddr arg) env m parent-scope inarg))))
                              (else
                               `(global ,(resolve-expansion-vars-with-new-env arg env m parent-scope inarg))))))
-           ((using import export meta line inbounds boundscheck loopinfo) (map unescape e))
+           ((using import export meta line inbounds boundscheck loopinfo inline noinline) (map unescape e))
            ((macrocall) e) ; invalid syntax anyways, so just act like it's quoted.
            ((symboliclabel) e)
            ((symbolicgoto) e)
@@ -383,7 +384,7 @@
                  ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
 
            ((= function)
-            (if (and (pair? (cadr e)) (function-def? e))
+            (if (and (pair? (cadr e)) (function-def? e) (length> e 2))
                 ;; in (kw x 1) inside an arglist, the x isn't actually a kwarg
                 `(,(car e) ,(resolve-in-function-lhs (cadr e) env m parent-scope inarg)
                   ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))
@@ -396,13 +397,18 @@
              ((not (length> e 2)) e)
              ((and (pair? (cadr e))
                    (eq? (caadr e) '|::|))
-              `(kw (|::|
-                    ,(if inarg
-                         (resolve-expansion-vars- (cadr (cadr e)) env m parent-scope inarg)
-                         ;; in keyword arg A=B, don't transform "A"
-                         (unescape (cadr (cadr e))))
-                    ,(resolve-expansion-vars- (caddr (cadr e)) env m parent-scope inarg))
-                   ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg)))
+              (let* ((type-decl (cadr e)) ;; [argname]::type
+                     (argname   (and (length> type-decl 2) (cadr type-decl)))
+                     (type      (if argname (caddr type-decl) (cadr type-decl))))
+                `(kw (|::|
+                      ,@(if argname
+                            (list (if inarg
+                                      (resolve-expansion-vars- argname env m parent-scope inarg)
+                                      ;; in keyword arg A=B, don't transform "A"
+                                      (unescape argname)))
+                            '())
+                      ,(resolve-expansion-vars- type env m parent-scope inarg))
+                     ,(resolve-expansion-vars-with-new-env (caddr e) env m parent-scope inarg))))
              (else
               `(kw ,(if inarg
                         (resolve-expansion-vars- (cadr e) env m parent-scope inarg)
@@ -449,14 +455,16 @@
 
 ;; decl-var that also identifies f in f()=...
 (define (decl-var* e)
-  (cond ((not (pair? e))       e)
-        ((eq? (car e) 'escape) '())
-        ((eq? (car e) 'call)   (decl-var* (cadr e)))
-        ((eq? (car e) '=)      (decl-var* (cadr e)))
-        ((eq? (car e) 'curly)  (decl-var* (cadr e)))
-        ((eq? (car e) '|::|)   (decl-var* (cadr e)))
-        ((eq? (car e) 'where)  (decl-var* (cadr e)))
-        (else                  (decl-var e))))
+  (if (pair? e)
+      (case (car e)
+        ((escape) '())
+        ((call)   (decl-var* (cadr e)))
+        ((=)      (decl-var* (cadr e)))
+        ((curly)  (decl-var* (cadr e)))
+        ((|::|)   (if (length= e 2) '() (decl-var* (cadr e))))
+        ((where)  (decl-var* (cadr e)))
+        (else     (decl-var e)))
+      e))
 
 (define (decl-vars* e)
   (if (and (pair? e) (eq? (car e) 'tuple))
diff --git a/src/method.c b/src/method.c
index a01ec5c3578acf..18cf3846bd33fd 100644
--- a/src/method.c
+++ b/src/method.c
@@ -18,6 +18,28 @@ extern "C" {
 extern jl_value_t *jl_builtin_getfield;
 extern jl_value_t *jl_builtin_tuple;
 
+jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
+
+static void check_c_types(const char *where, jl_value_t *rt, jl_value_t *at)
+{
+    if (jl_is_svec(rt))
+        jl_errorf("%s: missing return type", where);
+    JL_TYPECHKS(where, type, rt);
+    if (!jl_type_mappable_to_c(rt))
+        jl_errorf("%s: return type doesn't correspond to a C type", where);
+    JL_TYPECHKS(where, simplevector, at);
+    int i, l = jl_svec_len(at);
+    for (i = 0; i < l; i++) {
+        jl_value_t *ati = jl_svecref(at, i);
+        if (jl_is_vararg(ati))
+            jl_errorf("%s: Vararg not allowed for argument list", where);
+        JL_TYPECHKS(where, type, ati);
+        if (!jl_type_mappable_to_c(ati))
+            jl_errorf("%s: argument %d type doesn't correspond to a C type", where, i + 1);
+    }
+}
+
 // Resolve references to non-locally-defined variables to become references to global
 // variables in `module` (unless the rvalue is one of the type parameters in `sparam_vals`).
 static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_svec_t *sparam_vals,
@@ -29,41 +51,64 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
         return jl_module_globalref(module, (jl_sym_t*)expr);
     }
     else if (jl_is_returnnode(expr)) {
-        jl_value_t *val = resolve_globals(jl_returnnode_value(expr), module, sparam_vals, binding_effects, eager_resolve);
-        JL_GC_PUSH1(&val);
-        expr = jl_new_struct(jl_returnnode_type, val);
-        JL_GC_POP();
+        jl_value_t *retval = jl_returnnode_value(expr);
+        if (retval) {
+            jl_value_t *val = resolve_globals(retval, module, sparam_vals, binding_effects, eager_resolve);
+            if (val != retval) {
+                JL_GC_PUSH1(&val);
+                expr = jl_new_struct(jl_returnnode_type, val);
+                JL_GC_POP();
+            }
+        }
         return expr;
     }
     else if (jl_is_gotoifnot(expr)) {
         jl_value_t *cond = resolve_globals(jl_gotoifnot_cond(expr), module, sparam_vals, binding_effects, eager_resolve);
-        intptr_t label = jl_gotoifnot_label(expr);
-        JL_GC_PUSH1(&cond);
-        expr = jl_new_struct_uninit(jl_gotoifnot_type);
-        set_nth_field(jl_gotoifnot_type, expr, 0, cond);
-        jl_gotoifnot_label(expr) = label;
-        JL_GC_POP();
+        if (cond != jl_gotoifnot_cond(expr)) {
+            intptr_t label = jl_gotoifnot_label(expr);
+            JL_GC_PUSH1(&cond);
+            expr = jl_new_struct_uninit(jl_gotoifnot_type);
+            set_nth_field(jl_gotoifnot_type, expr, 0, cond, 0);
+            jl_gotoifnot_label(expr) = label;
+            JL_GC_POP();
+        }
         return expr;
     }
     else if (jl_is_expr(expr)) {
         jl_expr_t *e = (jl_expr_t*)expr;
-        if (e->head == global_sym && binding_effects) {
+        if (e->head == jl_global_sym && binding_effects) {
             // execute the side-effects of "global x" decl immediately:
             // creates uninitialized mutable binding in module for each global
-            jl_toplevel_eval_flex(module, expr, 0, 1);
+            jl_eval_global_expr(module, e, 1);
             expr = jl_nothing;
         }
-        if (jl_is_toplevel_only_expr(expr) || e->head == const_sym ||
-            e->head == coverageeffect_sym || e->head == copyast_sym ||
-            e->head == quote_sym || e->head == inert_sym ||
-            e->head == meta_sym || e->head == inbounds_sym ||
-            e->head == boundscheck_sym || e->head == loopinfo_sym ||
-            e->head == aliasscope_sym || e->head == popaliasscope_sym) {
+        if (jl_is_toplevel_only_expr(expr) || e->head == jl_const_sym ||
+            e->head == jl_coverageeffect_sym || e->head == jl_copyast_sym ||
+            e->head == jl_quote_sym || e->head == jl_inert_sym ||
+            e->head == jl_meta_sym || e->head == jl_inbounds_sym ||
+            e->head == jl_boundscheck_sym || e->head == jl_loopinfo_sym ||
+            e->head == jl_aliasscope_sym || e->head == jl_popaliasscope_sym ||
+            e->head == jl_inline_sym || e->head == jl_noinline_sym) {
             // ignore these
         }
         else {
             size_t i = 0, nargs = jl_array_len(e->args);
-            if (e->head == cfunction_sym) {
+            if (e->head == jl_opaque_closure_method_sym) {
+                if (nargs != 5) {
+                    jl_error("opaque_closure_method: invalid syntax");
+                }
+                jl_value_t *name = jl_exprarg(e, 0);
+                jl_value_t *nargs = jl_exprarg(e, 1);
+                int isva = jl_exprarg(e, 2) == jl_true;
+                jl_value_t *functionloc = jl_exprarg(e, 3);
+                jl_value_t *ci = jl_exprarg(e, 4);
+                if (!jl_is_code_info(ci)) {
+                    jl_error("opaque_closure_method: lambda should be a CodeInfo");
+                }
+                jl_method_t *m = jl_make_opaque_closure_method(module, name, jl_unbox_long(nargs), functionloc, (jl_code_info_t*)ci, isva);
+                return (jl_value_t*)m;
+            }
+            if (e->head == jl_cfunction_sym) {
                 JL_NARGS(cfunction method definition, 5, 5); // (type, func, rt, at, cc)
                 jl_value_t *typ = jl_exprarg(e, 0);
                 if (!jl_is_type(typ))
@@ -100,16 +145,13 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                     }
                     jl_exprargset(e, 3, at);
                 }
-                if (jl_is_svec(rt))
-                    jl_error("cfunction: missing return type");
-                JL_TYPECHK(cfunction method definition, type, rt);
-                JL_TYPECHK(cfunction method definition, simplevector, at);
+                check_c_types("cfunction method definition", rt, at);
                 JL_TYPECHK(cfunction method definition, quotenode, jl_exprarg(e, 4));
                 JL_TYPECHK(cfunction method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
                 return expr;
             }
-            if (e->head == foreigncall_sym) {
-                JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, cc, narg)
+            if (e->head == jl_foreigncall_sym) {
+                JL_NARGSV(ccall method definition, 5); // (fptr, rt, at, nreq, (cc, effects))
                 jl_value_t *rt = jl_exprarg(e, 1);
                 jl_value_t *at = jl_exprarg(e, 2);
                 if (!jl_is_type(rt)) {
@@ -136,24 +178,29 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                     }
                     jl_exprargset(e, 2, at);
                 }
-                if (jl_is_svec(rt))
-                    jl_error("ccall: missing return type");
-                JL_TYPECHK(ccall method definition, type, rt);
-                JL_TYPECHK(ccall method definition, simplevector, at);
+                check_c_types("ccall method definition", rt, at);
                 JL_TYPECHK(ccall method definition, long, jl_exprarg(e, 3));
                 JL_TYPECHK(ccall method definition, quotenode, jl_exprarg(e, 4));
-                JL_TYPECHK(ccall method definition, symbol, *(jl_value_t**)jl_exprarg(e, 4));
+                jl_value_t *cc = jl_quotenode_value(jl_exprarg(e, 4));
+                if (!jl_is_symbol(cc)) {
+                    JL_TYPECHK(ccall method definition, tuple, cc);
+                    if (jl_nfields(cc) != 2) {
+                        jl_error("In ccall calling convention, expected two argument tuple or symbol.");
+                    }
+                    JL_TYPECHK(ccall method definition, symbol, jl_get_nth_field(cc, 0));
+                    JL_TYPECHK(ccall method definition, uint8, jl_get_nth_field(cc, 1));
+                }
                 jl_exprargset(e, 0, resolve_globals(jl_exprarg(e, 0), module, sparam_vals, binding_effects, 1));
                 i++;
             }
-            if (e->head == method_sym || e->head == module_sym) {
+            if (e->head == jl_method_sym || e->head == jl_module_sym) {
                 i++;
             }
             for (; i < nargs; i++) {
                 // TODO: this should be making a copy, not mutating the source
                 jl_exprargset(e, i, resolve_globals(jl_exprarg(e, i), module, sparam_vals, binding_effects, eager_resolve));
             }
-            if (e->head == call_sym && jl_expr_nargs(e) == 3 &&
+            if (e->head == jl_call_sym && jl_expr_nargs(e) == 3 &&
                     jl_is_globalref(jl_exprarg(e, 0)) &&
                     jl_is_globalref(jl_exprarg(e, 1)) &&
                     jl_is_quotenode(jl_exprarg(e, 2))) {
@@ -171,13 +218,15 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (fe_mod->istopmod && !strcmp(jl_symbol_name(fe_sym), "getproperty") && jl_is_symbol(s)) {
                     if (eager_resolve || jl_binding_resolved_p(me_mod, me_sym)) {
                         jl_binding_t *b = jl_get_binding(me_mod, me_sym);
-                        if (b && b->constp && b->value && jl_is_module(b->value)) {
-                            return jl_module_globalref((jl_module_t*)b->value, (jl_sym_t*)s);
+                        if (b && b->constp) {
+                            jl_value_t *v = jl_atomic_load_relaxed(&b->value);
+                            if (v && jl_is_module(v))
+                                return jl_module_globalref((jl_module_t*)v, (jl_sym_t*)s);
                         }
                     }
                 }
             }
-            if (e->head == call_sym && nargs > 0 &&
+            if (e->head == jl_call_sym && nargs > 0 &&
                     jl_is_globalref(jl_exprarg(e, 0))) {
                 // TODO: this hack should be deleted once llvmcall is fixed
                 jl_value_t *fe = jl_exprarg(e, 0);
@@ -186,7 +235,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
                 if (jl_binding_resolved_p(fe_mod, fe_sym)) {
                     // look at some known called functions
                     jl_binding_t *b = jl_get_binding(fe_mod, fe_sym);
-                    if (b && b->constp && b->value == jl_builtin_tuple) {
+                    if (b && b->constp && jl_atomic_load_relaxed(&b->value) == jl_builtin_tuple) {
                         size_t j;
                         for (j = 1; j < nargs; j++) {
                             if (!jl_is_quotenode(jl_exprarg(e, j)))
@@ -210,7 +259,7 @@ static jl_value_t *resolve_globals(jl_value_t *expr, jl_module_t *module, jl_sve
     return expr;
 }
 
-void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
+JL_DLLEXPORT void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *sparam_vals,
                               int binding_effects)
 {
     size_t i, l = jl_array_len(stmts);
@@ -220,6 +269,11 @@ void jl_resolve_globals_in_ir(jl_array_t *stmts, jl_module_t *m, jl_svec_t *spar
     }
 }
 
+jl_value_t *expr_arg1(jl_value_t *expr) {
+    jl_array_t *args = ((jl_expr_t*)expr)->args;
+    return jl_array_ptr_ref(args, 0);
+}
+
 // copy a :lambda Expr into its CodeInfo representation,
 // including popping of known meta nodes
 static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
@@ -241,19 +295,41 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_gc_wb(li, li->code);
     size_t n = jl_array_len(body);
     jl_value_t **bd = (jl_value_t**)jl_array_ptr_data((jl_array_t*)li->code);
+    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, n);
+    jl_gc_wb(li, li->ssaflags);
+    int inbounds_depth = 0; // number of stacked inbounds
+    // isempty(inline_flags): no user annotation
+    // last(inline_flags) == 1: inline region
+    // last(inline_flags) == 0: noinline region
+    arraylist_t *inline_flags = arraylist_new((arraylist_t*)malloc_s(sizeof(arraylist_t)), 0);
     for (j = 0; j < n; j++) {
         jl_value_t *st = bd[j];
-        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) {
+        int is_flag_stmt = 0;
+        // check :meta expression
+        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_meta_sym) {
             size_t k, ins = 0, na = jl_expr_nargs(st);
             jl_array_t *meta = ((jl_expr_t*)st)->args;
             for (k = 0; k < na; k++) {
                 jl_value_t *ma = jl_array_ptr_ref(meta, k);
-                if (ma == (jl_value_t*)pure_sym)
+                if (ma == (jl_value_t*)jl_pure_sym)
                     li->pure = 1;
-                else if (ma == (jl_value_t*)inline_sym)
+                else if (ma == (jl_value_t*)jl_inline_sym)
                     li->inlineable = 1;
-                else if (ma == (jl_value_t*)propagate_inbounds_sym)
+                else if (ma == (jl_value_t*)jl_propagate_inbounds_sym)
                     li->propagate_inbounds = 1;
+                else if (ma == (jl_value_t*)jl_aggressive_constprop_sym)
+                    li->constprop = 1;
+                else if (ma == (jl_value_t*)jl_no_constprop_sym)
+                    li->constprop = 2;
+                else if (jl_is_expr(ma) && ((jl_expr_t*)ma)->head == jl_purity_sym) {
+                    if (jl_expr_nargs(ma) == 5) {
+                        li->purity.overrides.ipo_consistent = jl_unbox_bool(jl_exprarg(ma, 0));
+                        li->purity.overrides.ipo_effect_free = jl_unbox_bool(jl_exprarg(ma, 1));
+                        li->purity.overrides.ipo_nothrow = jl_unbox_bool(jl_exprarg(ma, 2));
+                        li->purity.overrides.ipo_terminates = jl_unbox_bool(jl_exprarg(ma, 3));
+                        li->purity.overrides.ipo_terminates_locally = jl_unbox_bool(jl_exprarg(ma, 4));
+                    }
+                }
                 else
                     jl_array_ptr_set(meta, ins++, ma);
             }
@@ -262,10 +338,60 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
             else
                 jl_array_del_end(meta, na - ins);
         }
-        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == return_sym) {
+        // check other flag expressions
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_inbounds_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true)       // push
+                inbounds_depth += 1;
+            else if (arg1 == (jl_value_t*)jl_false) // clear
+                inbounds_depth = 0;
+            else if (inbounds_depth > 0)            // pop
+                inbounds_depth -= 1;
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_inline_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true) // enter inline region
+                arraylist_push(inline_flags, (void*)1);
+            else {                            // exit inline region
+                assert(arg1 == (jl_value_t*)jl_false);
+                arraylist_pop(inline_flags);
+            }
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_noinline_sym) {
+            is_flag_stmt = 1;
+            jl_value_t *arg1 = expr_arg1(st);
+            if (arg1 == (jl_value_t*)jl_true) // enter noinline region
+                arraylist_push(inline_flags, (void*)0);
+            else {                             // exit noinline region
+                assert(arg1 == (jl_value_t*)jl_false);
+                arraylist_pop(inline_flags);
+            }
+            bd[j] = jl_nothing;
+        }
+        else if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_return_sym) {
             jl_array_ptr_set(body, j, jl_new_struct(jl_returnnode_type, jl_exprarg(st, 0)));
         }
+
+        if (is_flag_stmt)
+            jl_array_uint8_set(li->ssaflags, j, 0);
+        else {
+            uint8_t flag = 0;
+            if (inbounds_depth > 0)
+                flag |= 1 << 0;
+            if (inline_flags->len > 0) {
+                void* inline_flag = inline_flags->items[inline_flags->len - 1];
+                flag |= 1 << (inline_flag ? 1 : 2);
+            }
+            jl_array_uint8_set(li->ssaflags, j, flag);
+        }
     }
+    assert(inline_flags->len == 0); // malformed otherwise
+    arraylist_free(inline_flags);
+    free(inline_flags);
     jl_array_t *vinfo = (jl_array_t*)jl_exprarg(ir, 1);
     jl_array_t *vis = (jl_array_t*)jl_array_ptr_ref(vinfo, 0);
     size_t nslots = jl_array_len(vis);
@@ -278,7 +404,6 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
     jl_gc_wb(li, li->slotflags);
     li->ssavaluetypes = jl_box_long(nssavalue);
     jl_gc_wb(li, li->ssavaluetypes);
-    li->ssaflags = jl_alloc_array_1d(jl_array_uint8_type, 0);
 
     // Flags that need to be copied to slotflags
     const uint8_t vinfo_mask = 8 | 16 | 32 | 64;
@@ -288,14 +413,14 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
         jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(vi, 0);
         assert(jl_is_symbol(name));
         char *str = jl_symbol_name(name);
-        if (i > 0 && name != unused_sym) {
+        if (i > 0 && name != jl_unused_sym) {
             if (str[0] == '#') {
                 // convention for renamed variables: #...#original_name
                 char *nxt = strchr(str + 1, '#');
                 if (nxt)
                     name = jl_symbol(nxt+1);
                 else if (str[1] == 's')  // compiler-generated temporaries, #sXXX
-                    name = empty_sym;
+                    name = jl_empty_sym;
             }
         }
         jl_array_ptr_set(li->slotnames, i, name);
@@ -305,25 +430,27 @@ static void jl_code_info_set_ir(jl_code_info_t *li, jl_expr_t *ir)
 
 JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_method_instance_t *li =
-        (jl_method_instance_t*)jl_gc_alloc(ptls, sizeof(jl_method_instance_t),
+        (jl_method_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_instance_t),
                                            jl_method_instance_type);
     li->def.value = NULL;
     li->specTypes = NULL;
     li->sparam_vals = jl_emptysvec;
     li->uninferred = NULL;
     li->backedges = NULL;
-    li->cache = NULL;
+    li->callbacks = NULL;
+    jl_atomic_store_relaxed(&li->cache, NULL);
     li->inInference = 0;
+    li->precompiled = 0;
     return li;
 }
 
 JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_code_info_t *src =
-        (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t),
+        (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
                                        jl_code_info_type);
     src->code = NULL;
     src->codelocs = NULL;
@@ -343,6 +470,8 @@ JL_DLLEXPORT jl_code_info_t *jl_new_code_info_uninit(void)
     src->propagate_inbounds = 0;
     src->pure = 0;
     src->edges = jl_nothing;
+    src->constprop = 0;
+    src->purity.bits = 0;
     return src;
 }
 
@@ -356,24 +485,22 @@ jl_code_info_t *jl_new_code_info_from_ir(jl_expr_t *ir)
     return src;
 }
 
-void jl_linenumber_to_lineinfo(jl_code_info_t *ci, jl_module_t *mod, jl_value_t *name)
+void jl_add_function_name_to_lineinfo(jl_code_info_t *ci, jl_value_t *name)
 {
     jl_array_t *li = (jl_array_t*)ci->linetable;
     size_t i, n = jl_array_len(li);
-    jl_value_t *rt = NULL;
-    JL_GC_PUSH1(&rt);
+    jl_value_t *rt = NULL, *lno = NULL, *inl = NULL;
+    JL_GC_PUSH3(&rt, &lno, &inl);
     for (i = 0; i < n; i++) {
         jl_value_t *ln = jl_array_ptr_ref(li, i);
-        if (jl_is_linenode(ln)) {
-            rt = jl_box_long(jl_linenode_line(ln));
-            rt = jl_new_struct(jl_lineinfonode_type, mod, name, jl_linenode_file(ln), rt, jl_box_long(0));
-            jl_array_ptr_set(li, i, rt);
-        }
-        else if (jl_is_expr(ln) && ((jl_expr_t*)ln)->head == line_sym && jl_expr_nargs(ln) == 3) {
-            rt = jl_new_struct(jl_lineinfonode_type, mod, jl_symbol("macro expansion"),
-                               jl_exprarg(ln, 1), jl_exprarg(ln, 0), jl_exprarg(ln, 2));
-            jl_array_ptr_set(li, i, rt);
-        }
+        assert(jl_typeis(ln, jl_lineinfonode_type));
+        jl_value_t *mod = jl_fieldref_noalloc(ln, 0);
+        jl_value_t *file = jl_fieldref_noalloc(ln, 2);
+        lno = jl_fieldref(ln, 3);
+        inl = jl_fieldref(ln, 4);
+        jl_value_t *ln_name = (jl_is_int32(inl) && jl_unbox_int32(inl) == 0) ? name : jl_fieldref_noalloc(ln, 1);
+        rt = jl_new_struct(jl_lineinfonode_type, mod, ln_name, file, lno, inl);
+        jl_array_ptr_set(li, i, rt);
     }
     JL_GC_POP();
 }
@@ -419,6 +546,10 @@ JL_DLLEXPORT jl_code_info_t *jl_expand_and_resolve(jl_value_t *ex, jl_module_t *
 // effectively described by the tuple (specTypes, env, Method) inside linfo
 JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 {
+    if (linfo->uninferred) {
+        return (jl_code_info_t*)jl_copy_ast((jl_value_t*)linfo->uninferred);
+    }
+
     JL_TIMING(STAGED_FUNCTION);
     jl_value_t *tt = linfo->specTypes;
     jl_method_t *def = linfo->def.method;
@@ -428,15 +559,15 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
     jl_code_info_t *func = NULL;
     jl_value_t *ex = NULL;
     JL_GC_PUSH2(&ex, &func);
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     int last_lineno = jl_lineno;
-    int last_in = ptls->in_pure_callback;
-    size_t last_age = jl_get_ptls_states()->world_age;
+    int last_in = ct->ptls->in_pure_callback;
+    size_t last_age = ct->world_age;
 
     JL_TRY {
-        ptls->in_pure_callback = 1;
+        ct->ptls->in_pure_callback = 1;
         // and the right world
-        ptls->world_age = def->primary_world;
+        ct->world_age = def->primary_world;
 
         // invoke code generator
         jl_tupletype_t *ttdt = (jl_tupletype_t*)jl_unwrap_unionall(tt);
@@ -444,27 +575,40 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
         if (jl_is_code_info(ex)) {
             func = (jl_code_info_t*)ex;
+            jl_array_t *stmts = (jl_array_t*)func->code;
+            jl_resolve_globals_in_ir(stmts, def->module, linfo->sparam_vals, 1);
         }
         else {
             // Lower the user's expression and resolve references to the type parameters
             func = jl_expand_and_resolve(ex, def->module, linfo->sparam_vals);
 
             if (!jl_is_code_info(func)) {
-                if (jl_is_expr(func) && ((jl_expr_t*)func)->head == error_sym) {
-                    ptls->in_pure_callback = 0;
+                if (jl_is_expr(func) && ((jl_expr_t*)func)->head == jl_error_sym) {
+                    ct->ptls->in_pure_callback = 0;
                     jl_toplevel_eval(def->module, (jl_value_t*)func);
                 }
-                jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure or comprehension.");
+                jl_error("The function body AST defined by this @generated function is not pure. This likely means it contains a closure, a comprehension or a generator.");
             }
         }
 
-        ptls->in_pure_callback = last_in;
+        // If this generated function has an opaque closure, cache it for
+        // correctness of method identity
+        for (int i = 0; i < jl_array_len(func->code); ++i) {
+            jl_value_t *stmt = jl_array_ptr_ref(func->code, i);
+            if (jl_is_expr(stmt) && ((jl_expr_t*)stmt)->head == jl_new_opaque_closure_sym) {
+                linfo->uninferred = jl_copy_ast((jl_value_t*)func);
+                jl_gc_wb(linfo, linfo->uninferred);
+                break;
+            }
+        }
+
+        ct->ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
-        ptls->world_age = last_age;
-        jl_linenumber_to_lineinfo(func, def->module, (jl_value_t*)def->name);
+        ct->world_age = last_age;
+        jl_add_function_name_to_lineinfo(func, (jl_value_t*)def->name);
     }
     JL_CATCH {
-        ptls->in_pure_callback = last_in;
+        ct->ptls->in_pure_callback = last_in;
         jl_lineno = last_lineno;
         jl_rethrow();
     }
@@ -474,9 +618,9 @@ JL_DLLEXPORT jl_code_info_t *jl_code_for_staged(jl_method_instance_t *linfo)
 
 JL_DLLEXPORT jl_code_info_t *jl_copy_code_info(jl_code_info_t *src)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_code_info_t *newsrc =
-        (jl_code_info_t*)jl_gc_alloc(ptls, sizeof(jl_code_info_t),
+        (jl_code_info_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_info_t),
                                        jl_code_info_type);
     *newsrc = *src;
     return newsrc;
@@ -500,7 +644,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     int gen_only = 0;
     for (j = 1; j < m->nargs && j <= sizeof(m->nospecialize) * 8; j++) {
         jl_value_t *ai = jl_array_ptr_ref(src->slotnames, j);
-        if (ai == (jl_value_t*)unused_sym) {
+        if (ai == (jl_value_t*)jl_unused_sym) {
             // TODO: enable this. currently it triggers a bug on arguments like
             // ::Type{>:Missing}
             //int sn = j-1;
@@ -514,7 +658,9 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     }
     m->called = called;
     m->pure = src->pure;
-    jl_linenumber_to_lineinfo(src, m->module, (jl_value_t*)m->name);
+    m->constprop = src->constprop;
+    m->purity.bits = src->purity.bits;
+    jl_add_function_name_to_lineinfo(src, (jl_value_t*)m->name);
 
     jl_array_t *copy = NULL;
     jl_svec_t *sparam_vars = jl_outer_unionall_vars(m->sig);
@@ -525,9 +671,9 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
     copy = jl_alloc_vec_any(n);
     for (i = 0; i < n; i++) {
         jl_value_t *st = jl_array_ptr_ref(stmts, i);
-        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == meta_sym) {
+        if (jl_is_expr(st) && ((jl_expr_t*)st)->head == jl_meta_sym) {
             size_t nargs = jl_expr_nargs(st);
-            if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)nospecialize_sym) {
+            if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_nospecialize_sym) {
                 if (nargs == 1) // bare `@nospecialize` is special: it prevents specialization on all args
                     m->nospecialize = -1;
                 size_t j;
@@ -551,12 +697,12 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 }
                 st = jl_nothing;
             }
-            else if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)specialize_sym) {
+            else if (nargs >= 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_specialize_sym) {
                 if (nargs == 1) // bare `@specialize` is special: it causes specialization on all args
                     m->nospecialize = 0;
                 st = jl_nothing;
             }
-            else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)generated_sym) {
+            else if (nargs == 2 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_sym) {
                 m->generator = NULL;
                 jl_value_t *gexpr = jl_exprarg(st, 1);
                 if (jl_expr_nargs(gexpr) == 7) {
@@ -573,7 +719,7 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
                 }
                 st = jl_nothing;
             }
-            else if (nargs == 1 && jl_exprarg(st, 0) == (jl_value_t*)generated_only_sym) {
+            else if (nargs == 1 && jl_exprarg(st, 0) == (jl_value_t*)jl_generated_only_sym) {
                 gen_only = 1;
                 st = jl_nothing;
             }
@@ -602,57 +748,89 @@ static void jl_method_set_source(jl_method_t *m, jl_code_info_t *src)
 
 JL_DLLEXPORT jl_method_t *jl_new_method_uninit(jl_module_t *module)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_method_t *m =
-        (jl_method_t*)jl_gc_alloc(ptls, sizeof(jl_method_t), jl_method_type);
-    m->specializations = jl_emptysvec;
-    m->speckeyset = (jl_array_t*)jl_an_empty_vec_any;
+        (jl_method_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_t), jl_method_type);
+    jl_atomic_store_relaxed(&m->specializations, jl_emptysvec);
+    jl_atomic_store_relaxed(&m->speckeyset, (jl_array_t*)jl_an_empty_vec_any);
     m->sig = NULL;
     m->slot_syms = NULL;
     m->roots = NULL;
+    m->root_blocks = NULL;
+    m->nroots_sysimg = 0;
     m->ccallable = NULL;
     m->module = module;
+    m->external_mt = NULL;
     m->source = NULL;
-    m->unspecialized = NULL;
+    jl_atomic_store_relaxed(&m->unspecialized, NULL);
     m->generator = NULL;
     m->name = NULL;
-    m->file = empty_sym;
+    m->file = jl_empty_sym;
     m->line = 0;
     m->called = 0xff;
     m->nospecialize = module->nospecialize;
     m->nkw = 0;
-    m->invokes = NULL;
+    jl_atomic_store_relaxed(&m->invokes, jl_nothing);
+    m->recursion_relation = NULL;
     m->isva = 0;
     m->nargs = 0;
     m->primary_world = 1;
     m->deleted_world = ~(size_t)0;
+    m->is_for_opaque_closure = 0;
+    m->constprop = 0;
     JL_MUTEX_INIT(&m->writelock);
     return m;
 }
 
 // method definition ----------------------------------------------------------
 
+jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva)
+{
+    jl_method_t *m = jl_new_method_uninit(module);
+    JL_GC_PUSH1(&m);
+    // TODO: Maybe have a signature of (parent method, stmt#)?
+    m->sig = (jl_value_t*)jl_anytuple_type;
+    m->isva = isva;
+    m->is_for_opaque_closure = 1;
+    if (name == jl_nothing) {
+        m->name = jl_symbol("opaque closure");
+    } else {
+        assert(jl_is_symbol(name));
+        m->name = (jl_sym_t*)name;
+    }
+    m->nargs = nargs + 1;
+    assert(jl_is_linenode(functionloc));
+    jl_value_t *file = jl_linenode_file(functionloc);
+    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
+    m->line = jl_linenode_line(functionloc);
+    jl_method_set_source(m, ci);
+    JL_GC_POP();
+    return m;
+}
+
 // empty generic function def
 JL_DLLEXPORT jl_value_t *jl_generic_function_def(jl_sym_t *name,
                                                  jl_module_t *module,
-                                                 jl_value_t **bp, jl_value_t *bp_owner,
+                                                 _Atomic(jl_value_t*) *bp,
+                                                 jl_value_t *bp_owner,
                                                  jl_binding_t *bnd)
 {
     jl_value_t *gf = NULL;
 
     assert(name && bp);
-    if (bnd && bnd->value != NULL && !bnd->constp)
+    if (bnd && jl_atomic_load_relaxed(&bnd->value) != NULL && !bnd->constp)
         jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(bnd->name));
-    if (*bp != NULL) {
-        gf = *bp;
+    gf = jl_atomic_load_relaxed(bp);
+    if (gf != NULL) {
         if (!jl_is_datatype_singleton((jl_datatype_t*)jl_typeof(gf)) && !jl_is_type(gf))
             jl_errorf("cannot define function %s; it already has a value", jl_symbol_name(name));
     }
     if (bnd)
         bnd->constp = 1;
-    if (*bp == NULL) {
+    if (gf == NULL) {
         gf = (jl_value_t*)jl_new_generic_function(name, module);
-        *bp = gf;
+        jl_atomic_store(bp, gf); // TODO: fix constp assignment data race
         if (bp_owner) jl_gc_wb(bp_owner, gf);
     }
     return gf;
@@ -695,6 +873,11 @@ JL_DLLEXPORT jl_methtable_t *jl_method_table_for(jl_value_t *argtypes JL_PROPAGA
     return first_methtable(argtypes, 0);
 }
 
+JL_DLLEXPORT jl_methtable_t *jl_method_get_table(jl_method_t *method JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
+{
+    return method->external_mt ? (jl_methtable_t*)method->external_mt : jl_method_table_for(method->sig);
+}
+
 // get the MethodTable implied by a single given type, or `nothing`
 JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAGATES_ROOT) JL_NOTSAFEPOINT
 {
@@ -703,16 +886,17 @@ JL_DLLEXPORT jl_methtable_t *jl_argument_method_table(jl_value_t *argt JL_PROPAG
 
 jl_array_t *jl_all_methods JL_GLOBALLY_ROOTED;
 
-JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
-                                jl_code_info_t *f,
-                                jl_module_t *module)
+JL_DLLEXPORT jl_method_t* jl_method_def(jl_svec_t *argdata,
+                                        jl_methtable_t *mt,
+                                        jl_code_info_t *f,
+                                        jl_module_t *module)
 {
     // argdata is svec(svec(types...), svec(typevars...), functionloc)
     jl_svec_t *atypes = (jl_svec_t*)jl_svecref(argdata, 0);
     jl_svec_t *tvars = (jl_svec_t*)jl_svecref(argdata, 1);
     jl_value_t *functionloc = jl_svecref(argdata, 2);
     size_t nargs = jl_svec_len(atypes);
-    int isva = jl_is_vararg_type(jl_svecref(atypes, nargs - 1));
+    int isva = jl_is_vararg(jl_svecref(atypes, nargs - 1));
     assert(jl_is_svec(atypes));
     assert(nargs > 0);
     assert(jl_is_svec(tvars));
@@ -732,7 +916,9 @@ JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
         argtype = jl_new_struct(jl_unionall_type, tv, argtype);
     }
 
-    jl_methtable_t *mt = jl_method_table_for(argtype);
+    jl_methtable_t *external_mt = mt;
+    if (!mt)
+        mt = jl_method_table_for(argtype);
     if ((jl_value_t*)mt == jl_nothing)
         jl_error("Method dispatch is unimplemented currently for this method signature");
     if (mt->frozen)
@@ -740,7 +926,7 @@ JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
 
     // TODO: derive our debug name from the syntax instead of the type
     name = mt->name;
-    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt) {
+    if (mt == jl_type_type_mt || mt == jl_nonfunction_mt || external_mt) {
         // our value for `name` is bad, try to guess what the syntax might have had,
         // like `jl_static_show_func_sig` might have come up with
         jl_datatype_t *dt = jl_first_argument_datatype(argtype);
@@ -761,13 +947,16 @@ JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
         f = jl_new_code_info_from_ir((jl_expr_t*)f);
     }
     m = jl_new_method_uninit(module);
+    m->external_mt = (jl_value_t*)external_mt;
+    if (external_mt)
+        jl_gc_wb(m, external_mt);
     m->sig = argtype;
     m->name = name;
     m->isva = isva;
     m->nargs = nargs;
     assert(jl_is_linenode(functionloc));
     jl_value_t *file = jl_linenode_file(functionloc);
-    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : empty_sym;
+    m->file = jl_is_symbol(file) ? (jl_sym_t*)file : jl_empty_sym;
     m->line = jl_linenode_line(functionloc);
     jl_method_set_source(m, f);
 
@@ -781,9 +970,9 @@ JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
 
     for (i = 0; i < na; i++) {
         jl_value_t *elt = jl_svecref(atypes, i);
-        if (!jl_is_type(elt) && !jl_is_typevar(elt)) {
+        if (!jl_is_type(elt) && !jl_is_typevar(elt) && !jl_is_vararg(elt)) {
             jl_sym_t *argname = (jl_sym_t*)jl_array_ptr_ref(f->slotnames, i);
-            if (argname == unused_sym)
+            if (argname == jl_unused_sym)
                 jl_exceptionf(jl_argumenterror_type,
                               "invalid type for argument number %d in method definition for %s at %s:%d",
                               i,
@@ -798,7 +987,7 @@ JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
                               jl_symbol_name(m->file),
                               m->line);
         }
-        if (jl_is_vararg_type(elt) && i < na-1)
+        if (jl_is_vararg(elt) && i < na-1)
             jl_exceptionf(jl_argumenterror_type,
                           "Vararg on non-final argument in method definition for %s at %s:%d",
                           jl_symbol_name(name),
@@ -820,6 +1009,116 @@ JL_DLLEXPORT void jl_method_def(jl_svec_t *argdata,
     if (jl_newmeth_tracer)
         jl_call_tracer(jl_newmeth_tracer, (jl_value_t*)m);
     JL_GC_POP();
+
+    return m;
+}
+
+// root blocks
+
+static uint64_t current_root_id(jl_array_t *root_blocks)
+{
+    if (!root_blocks)
+        return 0;
+    assert(jl_is_array(root_blocks));
+    size_t nx2 = jl_array_len(root_blocks);
+    if (nx2 == 0)
+        return 0;
+    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
+    return blocks[nx2-2];
+}
+
+static void add_root_block(jl_array_t *root_blocks, uint64_t modid, size_t len)
+{
+    assert(jl_is_array(root_blocks));
+    jl_array_grow_end(root_blocks, 2);
+    uint64_t *blocks = (uint64_t*)jl_array_data(root_blocks);
+    int nx2 = jl_array_len(root_blocks);
+    blocks[nx2-2] = modid;
+    blocks[nx2-1] = len;
+}
+
+static void prepare_method_for_roots(jl_method_t *m, uint64_t modid)
+{
+    if (!m->roots) {
+        m->roots = jl_alloc_vec_any(0);
+        jl_gc_wb(m, m->roots);
+    }
+    if (!m->root_blocks && modid != 0) {
+        m->root_blocks = jl_alloc_array_1d(jl_array_uint64_type, 0);
+        jl_gc_wb(m, m->root_blocks);
+    }
+}
+
+JL_DLLEXPORT void jl_add_method_root(jl_method_t *m, jl_module_t *mod, jl_value_t* root)
+{
+    JL_GC_PUSH2(&m, &root);
+    uint64_t modid = 0;
+    if (mod) {
+        assert(jl_is_module(mod));
+        modid = mod->build_id;
+    }
+    assert(jl_is_method(m));
+    prepare_method_for_roots(m, modid);
+    if (current_root_id(m->root_blocks) != modid)
+        add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+    jl_array_ptr_1d_push(m->roots, root);
+    JL_GC_POP();
+}
+
+void jl_append_method_roots(jl_method_t *m, uint64_t modid, jl_array_t* roots)
+{
+    JL_GC_PUSH2(&m, &roots);
+    assert(jl_is_method(m));
+    assert(jl_is_array(roots));
+    prepare_method_for_roots(m, modid);
+    add_root_block(m->root_blocks, modid, jl_array_len(m->roots));
+    jl_array_ptr_1d_append(m->roots, roots);
+    JL_GC_POP();
+}
+
+// given the absolute index i of a root, retrieve its relocatable reference
+// returns 1 if the root is relocatable
+int get_root_reference(rle_reference *rr, jl_method_t *m, size_t i)
+{
+    if (!m->root_blocks) {
+        rr->key = 0;
+        rr->index = i;
+        return i < m->nroots_sysimg;
+    }
+    rle_index_to_reference(rr, i, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    if (rr->key)
+        return 1;
+    return i < m->nroots_sysimg;
+}
+
+// get a root, given its key and index relative to the key
+// this is the relocatable way to get a root from m->roots
+jl_value_t *lookup_root(jl_method_t *m, uint64_t key, int index)
+{
+    if (!m->root_blocks) {
+        assert(key == 0);
+        return jl_array_ptr_ref(m->roots, index);
+    }
+    rle_reference rr = {key, index};
+    size_t i = rle_reference_to_index(&rr, (uint64_t*)jl_array_data(m->root_blocks), jl_array_len(m->root_blocks), 0);
+    return jl_array_ptr_ref(m->roots, i);
+}
+
+int nroots_with_key(jl_method_t *m, uint64_t key)
+{
+    size_t nroots = 0;
+    if (m->roots)
+        nroots = jl_array_len(m->roots);
+    if (!m->root_blocks)
+        return key == 0 ? nroots : 0;
+    uint64_t *rletable = (uint64_t*)jl_array_data(m->root_blocks);
+    size_t j, nblocks2 = jl_array_len(m->root_blocks);
+    int nwithkey = 0;
+    for (j = 0; j < nblocks2; j+=2) {
+        if (rletable[j] == key)
+            nwithkey += (j+3 < nblocks2 ? rletable[j+3] : nroots) - rletable[j+1];
+    }
+    return nwithkey;
 }
 
 #ifdef __cplusplus
diff --git a/src/module.c b/src/module.c
index 097a73e5fe89b9..19db961dceac5d 100644
--- a/src/module.c
+++ b/src/module.c
@@ -11,18 +11,12 @@
 extern "C" {
 #endif
 
-jl_module_t *jl_main_module = NULL;
-jl_module_t *jl_core_module = NULL;
-jl_module_t *jl_base_module = NULL;
-jl_module_t *jl_top_module = NULL;
-
-JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
+JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, uint8_t default_names)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     const jl_uuid_t uuid_zero = {0, 0};
-    jl_module_t *m = (jl_module_t*)jl_gc_alloc(ptls, sizeof(jl_module_t),
+    jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t),
                                                jl_module_type);
-    JL_GC_PUSH1(&m);
     assert(jl_is_symbol(name));
     m->name = name;
     m->parent = NULL;
@@ -38,28 +32,37 @@ JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
     m->optlevel = -1;
     m->compile = -1;
     m->infer = -1;
+    m->max_methods = -1;
     JL_MUTEX_INIT(&m->lock);
     htable_new(&m->bindings, 0);
     arraylist_new(&m->usings, 0);
-    if (jl_core_module) {
+    JL_GC_PUSH1(&m);
+    if (jl_core_module && default_names) {
         jl_module_using(m, jl_core_module);
     }
     // export own name, so "using Foo" makes "Foo" itself visible
-    jl_set_const(m, name, (jl_value_t*)m);
+    if (default_names) {
+        jl_set_const(m, name, (jl_value_t*)m);
+    }
     jl_module_export(m, name);
     JL_GC_POP();
     return m;
 }
 
+JL_DLLEXPORT jl_module_t *jl_new_module(jl_sym_t *name)
+{
+    return jl_new_module_(name, 1);
+}
+
 uint32_t jl_module_next_counter(jl_module_t *m)
 {
     return jl_atomic_fetch_add(&m->counter, 1);
 }
 
-JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports)
+JL_DLLEXPORT jl_value_t *jl_f_new_module(jl_sym_t *name, uint8_t std_imports, uint8_t default_names)
 {
     // TODO: should we prohibit this during incremental compilation?
-    jl_module_t *m = jl_new_module(name);
+    jl_module_t *m = jl_new_module_(name, default_names);
     JL_GC_PUSH1(&m);
     m->parent = jl_main_module; // TODO: this is a lie
     jl_gc_wb(m, m->parent);
@@ -123,6 +126,21 @@ JL_DLLEXPORT int jl_get_module_infer(jl_module_t *m)
     return value;
 }
 
+JL_DLLEXPORT void jl_set_module_max_methods(jl_module_t *self, int value)
+{
+    self->max_methods = value;
+}
+
+JL_DLLEXPORT int jl_get_module_max_methods(jl_module_t *m)
+{
+    int value = m->max_methods;
+    while (value == -1 && m->parent != m && m != jl_base_module) {
+        m = m->parent;
+        value = m->max_methods;
+    }
+    return value;
+}
+
 JL_DLLEXPORT void jl_set_istopmod(jl_module_t *self, uint8_t isprimary)
 {
     self->istopmod = 1;
@@ -138,12 +156,13 @@ JL_DLLEXPORT uint8_t jl_istopmod(jl_module_t *mod)
 
 static jl_binding_t *new_binding(jl_sym_t *name)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(jl_is_symbol(name));
-    jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ptls, sizeof(jl_binding_t));
+    jl_binding_t *b = (jl_binding_t*)jl_gc_alloc_buf(ct->ptls, sizeof(jl_binding_t));
     b->name = name;
     b->value = NULL;
     b->owner = NULL;
+    b->ty = NULL;
     b->globalref = NULL;
     b->constp = 0;
     b->exportp = 0;
@@ -153,9 +172,9 @@ static jl_binding_t *new_binding(jl_sym_t *name)
 }
 
 // get binding for assignment
-JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int error)
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, int alloc)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&m->bindings, var);
     jl_binding_t *b = *bp;
 
@@ -164,39 +183,38 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_wr(jl_module_t *m JL_PROPAGATES_ROOT,
             if (b->owner == NULL) {
                 b->owner = m;
             }
-            else if (error) {
-                JL_UNLOCK_NOGC(&m->lock);
-                jl_errorf("cannot assign a value to variable %s.%s from module %s",
+            else if (alloc) {
+                JL_UNLOCK(&m->lock);
+                jl_errorf("cannot assign a value to imported variable %s.%s from module %s",
                           jl_symbol_name(b->owner->name), jl_symbol_name(var), jl_symbol_name(m->name));
             }
         }
     }
-    else {
+    else if (alloc) {
         b = new_binding(var);
         b->owner = m;
         *bp = b;
+        JL_GC_PROMISE_ROOTED(b);
         jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
     }
+    else {
+        b = NULL;
+    }
 
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b;
 }
 
 // Hash tables don't generically root their contents, but they do for bindings.
 // Express this to the analyzer.
 // NOTE: Must hold m->lock while calling these.
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 jl_binding_t *_jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
-jl_binding_t **_jl_get_module_binding_bp(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT;
 #else
 static inline jl_binding_t *_jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT
 {
     return (jl_binding_t*)ptrhash_get(&m->bindings, var);
 }
-static inline jl_binding_t **_jl_get_module_binding_bp(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT
-{
-    return (jl_binding_t**)ptrhash_bp(&m->bindings, var);
-}
 #endif
 
 
@@ -213,9 +231,10 @@ JL_DLLEXPORT jl_module_t *jl_get_module_of_binding(jl_module_t *m, jl_sym_t *var
 // like jl_get_binding_wr, but has different error paths
 JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
-    jl_binding_t **bp = _jl_get_module_binding_bp(m, var);
+    JL_LOCK(&m->lock);
+    jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&m->bindings, var);
     jl_binding_t *b = *bp;
+    JL_GC_PROMISE_ROOTED(b);
 
     if (b != HT_NOTFOUND) {
         if (b->owner != m) {
@@ -223,15 +242,15 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_
                 b->owner = m;
             }
             else {
-                JL_UNLOCK_NOGC(&m->lock);
-                jl_binding_t *b2 = jl_get_binding(b->owner, var);
+                JL_UNLOCK(&m->lock);
+                jl_binding_t *b2 = jl_get_binding(b->owner, b->name);
                 if (b2 == NULL || b2->value == NULL)
                     jl_errorf("invalid method definition: imported function %s.%s does not exist",
-                              jl_symbol_name(b->owner->name), jl_symbol_name(var));
+                              jl_symbol_name(b->owner->name), jl_symbol_name(b->name));
                 // TODO: we might want to require explicitly importing types to add constructors
                 if (!b->imported && !jl_is_type(b2->value)) {
                     jl_errorf("error in method definition: function %s.%s must be explicitly imported to be extended",
-                              jl_symbol_name(b->owner->name), jl_symbol_name(var));
+                              jl_symbol_name(b->owner->name), jl_symbol_name(b->name));
                 }
                 return b2;
             }
@@ -241,23 +260,35 @@ JL_DLLEXPORT jl_binding_t *jl_get_binding_for_method_def(jl_module_t *m, jl_sym_
         b = new_binding(var);
         b->owner = m;
         *bp = b;
+        JL_GC_PROMISE_ROOTED(b);
         jl_gc_wb_buf(m, b, sizeof(jl_binding_t));
     }
 
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b;
 }
 
-static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s,
+static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname,
                            int explici);
 
 typedef struct _modstack_t {
     jl_module_t *m;
+    jl_sym_t *var;
     struct _modstack_t *prev;
 } modstack_t;
 
 static jl_binding_t *jl_get_binding_(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st);
 
+static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT;
+
+#ifndef __clang_gcanalyzer__
+// The analyzer doesn't like looking through the arraylist, so just model the
+// access for it using this function
+static inline jl_module_t *module_usings_getidx(jl_module_t *m JL_PROPAGATES_ROOT, size_t i) JL_NOTSAFEPOINT {
+    return (jl_module_t*)m->usings.items[i];
+}
+#endif
+
 // find a binding from a module's `usings` list
 // called while holding m->lock
 static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var, modstack_t *st, int warn)
@@ -265,7 +296,7 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
     jl_binding_t *b = NULL;
     jl_module_t *owner = NULL;
     for(int i=(int)m->usings.len-1; i >= 0; --i) {
-        jl_module_t *imp = (jl_module_t*)m->usings.items[i];
+        jl_module_t *imp = module_usings_getidx(m, i);
         // TODO: make sure this can't deadlock
         JL_LOCK(&imp->lock);
         jl_binding_t *tempb = _jl_get_module_binding(imp, var);
@@ -279,14 +310,14 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
                 !tempb->deprecated && !b->deprecated &&
                 !(tempb->constp && tempb->value && b->constp && b->value == tempb->value)) {
                 if (warn) {
+                    // mark this binding resolved (by creating it or setting the owner), to avoid repeating the warning
+                    (void)jl_get_binding_wr(m, var, 1);
                     JL_UNLOCK(&m->lock);
                     jl_printf(JL_STDERR,
                               "WARNING: both %s and %s export \"%s\"; uses of it in module %s must be qualified\n",
                               jl_symbol_name(owner->name),
                               jl_symbol_name(imp->name), jl_symbol_name(var),
                               jl_symbol_name(m->name));
-                    // mark this binding resolved, to avoid repeating the warning
-                    (void)jl_get_binding_wr(m, var, 0);
                     JL_LOCK(&m->lock);
                 }
                 return NULL;
@@ -303,10 +334,10 @@ static jl_binding_t *using_resolve_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl
 // get binding for reading. might return NULL for unbound.
 static jl_binding_t *jl_get_binding_(jl_module_t *m, jl_sym_t *var, modstack_t *st)
 {
-    modstack_t top = { m, st };
+    modstack_t top = { m, var, st };
     modstack_t *tmp = st;
     while (tmp != NULL) {
-        if (tmp->m == m) {
+        if (tmp->m == m && tmp->var == var) {
             // import cycle without finding actual location
             return NULL;
         }
@@ -321,14 +352,14 @@ static jl_binding_t *jl_get_binding_(jl_module_t *m, jl_sym_t *var, modstack_t *
             // do a full import to prevent the result of this lookup
             // from changing, for example if this var is assigned to
             // later.
-            module_import_(m, b->owner, var, 0);
+            module_import_(m, b->owner, b->name, var, 0);
             return b;
         }
         return NULL;
     }
     JL_UNLOCK(&m->lock);
-    if (b->owner != m)
-        return jl_get_binding_(b->owner, var, &top);
+    if (b->owner != m || b->name != var)
+        return jl_get_binding_(b->owner, b->name, &top);
     return b;
 }
 
@@ -345,6 +376,25 @@ JL_DLLEXPORT jl_value_t *jl_binding_owner(jl_module_t *m, jl_sym_t *var)
     return (jl_value_t*)b->owner;
 }
 
+// get type of binding m.var, without resolving the binding
+JL_DLLEXPORT jl_value_t *jl_binding_type(jl_module_t *m, jl_sym_t *var)
+{
+    JL_LOCK(&m->lock);
+    jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
+    if (b == HT_NOTFOUND || b->owner == NULL)
+        b = using_resolve_binding(m, var, NULL, 0);
+    JL_UNLOCK(&m->lock);
+    if (b == NULL)
+        return jl_nothing;
+    jl_value_t *ty = jl_atomic_load_relaxed(&b->ty);
+    return ty ? ty : jl_nothing;
+}
+
+JL_DLLEXPORT jl_binding_t *jl_get_binding_wr_or_error(jl_module_t *m, jl_sym_t *var)
+{
+    return jl_get_binding_wr(m, var, 1);
+}
+
 JL_DLLEXPORT jl_binding_t *jl_get_binding(jl_module_t *m, jl_sym_t *var)
 {
     return jl_get_binding_(m, var, NULL);
@@ -368,12 +418,17 @@ JL_DLLEXPORT jl_value_t *jl_module_globalref(jl_module_t *m, jl_sym_t *var)
         JL_UNLOCK(&m->lock);
         return jl_new_struct(jl_globalref_type, m, var);
     }
-    if (b->globalref == NULL) {
-        b->globalref = jl_new_struct(jl_globalref_type, m, var);
-        jl_gc_wb(m, b->globalref);
+    jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref);
+    if (globalref == NULL) {
+        jl_value_t *newref = jl_new_struct(jl_globalref_type, m, var);
+        if (jl_atomic_cmpswap_relaxed(&b->globalref, &globalref, newref)) {
+            JL_GC_PROMISE_ROOTED(newref);
+            globalref = newref;
+            jl_gc_wb(m, globalref);
+        }
     }
-    JL_UNLOCK(&m->lock);
-    return b->globalref;
+    JL_UNLOCK(&m->lock); // may GC
+    return globalref;
 }
 
 static int eq_bindings(jl_binding_t *a, jl_binding_t *b)
@@ -394,7 +449,7 @@ JL_DLLEXPORT int jl_is_imported(jl_module_t *m, jl_sym_t *s)
 }
 
 // NOTE: we use explici since explicit is a C++ keyword
-static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, int explici)
+static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname, int explici)
 {
     jl_binding_t *b = jl_get_binding(from, s);
     if (b == NULL) {
@@ -421,19 +476,27 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, int
         }
 
         JL_LOCK(&to->lock);
-        jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&to->bindings, s);
+        jl_binding_t **bp = (jl_binding_t**)ptrhash_bp(&to->bindings, asname);
         jl_binding_t *bto = *bp;
         if (bto != HT_NOTFOUND) {
             if (bto == b) {
                 // importing a binding on top of itself. harmless.
             }
+            else if (bto->name != s) {
+                JL_UNLOCK(&to->lock);
+                jl_printf(JL_STDERR,
+                          "WARNING: ignoring conflicting import of %s.%s into %s\n",
+                          jl_symbol_name(from->name), jl_symbol_name(s),
+                          jl_symbol_name(to->name));
+                return;
+            }
             else if (bto->owner == b->owner) {
                 // already imported
                 bto->imported = (explici!=0);
             }
             else if (bto->owner != to && bto->owner != NULL) {
                 // already imported from somewhere else
-                jl_binding_t *bval = jl_get_binding(to, s);
+                jl_binding_t *bval = jl_get_binding(to, asname);
                 if (bval->constp && bval->value && b->constp && b->value == bval->value) {
                     // equivalent binding
                     bto->imported = (explici!=0);
@@ -470,7 +533,7 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, int
             }
         }
         else {
-            jl_binding_t *nb = new_binding(s);
+            jl_binding_t *nb = new_binding(b->name);
             nb->owner = b->owner;
             nb->imported = (explici!=0);
             nb->deprecated = b->deprecated;
@@ -483,12 +546,22 @@ static void module_import_(jl_module_t *to, jl_module_t *from, jl_sym_t *s, int
 
 JL_DLLEXPORT void jl_module_import(jl_module_t *to, jl_module_t *from, jl_sym_t *s)
 {
-    module_import_(to, from, s, 1);
+    module_import_(to, from, s, s, 1);
+}
+
+JL_DLLEXPORT void jl_module_import_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname)
+{
+    module_import_(to, from, s, asname, 1);
 }
 
 JL_DLLEXPORT void jl_module_use(jl_module_t *to, jl_module_t *from, jl_sym_t *s)
 {
-    module_import_(to, from, s, 0);
+    module_import_(to, from, s, s, 0);
+}
+
+JL_DLLEXPORT void jl_module_use_as(jl_module_t *to, jl_module_t *from, jl_sym_t *s, jl_sym_t *asname)
+{
+    module_import_(to, from, s, asname, 0);
 }
 
 JL_DLLEXPORT void jl_module_using(jl_module_t *to, jl_module_t *from)
@@ -559,33 +632,33 @@ JL_DLLEXPORT int jl_boundp(jl_module_t *m, jl_sym_t *var)
 
 JL_DLLEXPORT int jl_defines_or_exports_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = (jl_binding_t*)ptrhash_get(&m->bindings, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b != HT_NOTFOUND && (b->exportp || b->owner==m);
 }
 
-JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_module_exports_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b != HT_NOTFOUND && b->exportp;
 }
 
-JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_binding_resolved_p(jl_module_t *m, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b != HT_NOTFOUND && b->owner != NULL;
 }
 
-JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var) JL_NOTSAFEPOINT
+JL_DLLEXPORT jl_binding_t *jl_get_module_binding(jl_module_t *m JL_PROPAGATES_ROOT, jl_sym_t *var)
 {
-    JL_LOCK_NOGC(&m->lock);
+    JL_LOCK(&m->lock);
     jl_binding_t *b = _jl_get_module_binding(m, var);
-    JL_UNLOCK_NOGC(&m->lock);
+    JL_UNLOCK(&m->lock);
     return b == HT_NOTFOUND ? NULL : b;
 }
 
@@ -597,23 +670,21 @@ JL_DLLEXPORT jl_value_t *jl_get_global(jl_module_t *m, jl_sym_t *var)
     return b->value;
 }
 
-JL_DLLEXPORT void jl_set_global(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
-{
-    jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
-    JL_GC_PROMISE_ROOTED(bp);
-    jl_checked_assignment(bp, val);
-}
-
 JL_DLLEXPORT void jl_set_const(jl_module_t *m JL_ROOTING_ARGUMENT, jl_sym_t *var, jl_value_t *val JL_ROOTED_ARGUMENT)
 {
     jl_binding_t *bp = jl_get_binding_wr(m, var, 1);
     if (bp->value == NULL) {
-        if (jl_atomic_bool_compare_exchange(&bp->constp, 0, 1)) {
-            if (jl_atomic_bool_compare_exchange(&bp->value, NULL, val)) {
+        uint8_t constp = 0;
+        // if (jl_atomic_cmpswap(&bp->constp, &constp, 1)) {
+        if (constp = bp->constp, bp->constp = 1, constp == 0) {
+            jl_value_t *old = NULL;
+            if (jl_atomic_cmpswap(&bp->value, &old, val)) {
                 jl_gc_wb_binding(bp, val);
                 return;
             }
         }
+        jl_value_t *old_ty = NULL;
+        jl_atomic_cmpswap_relaxed(&bp->ty, &old_ty, (jl_value_t*)jl_any_type);
     }
     jl_errorf("invalid redefinition of constant %s",
               jl_symbol_name(bp->name));
@@ -730,18 +801,28 @@ void jl_binding_deprecation_warning(jl_module_t *m, jl_binding_t *b)
     }
 }
 
-JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs) JL_NOTSAFEPOINT
+JL_DLLEXPORT void jl_checked_assignment(jl_binding_t *b, jl_value_t *rhs)
 {
+    jl_value_t *old_ty = NULL;
+    if (!jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type)) {
+        if (old_ty != (jl_value_t*)jl_any_type && jl_typeof(rhs) != old_ty) {
+            JL_GC_PUSH1(&rhs);
+            if (!jl_isa(rhs, old_ty))
+                jl_errorf("cannot assign an incompatible value to the global %s.",
+                          jl_symbol_name(b->name));
+            JL_GC_POP();
+        }
+    }
     if (b->constp) {
-        jl_value_t *old = jl_atomic_compare_exchange(&b->value, NULL, rhs);
-        if (old == NULL) {
+        jl_value_t *old = NULL;
+        if (jl_atomic_cmpswap(&b->value, &old, rhs)) {
             jl_gc_wb_binding(b, rhs);
             return;
         }
         if (jl_egal(rhs, old))
             return;
         if (jl_typeof(rhs) != jl_typeof(old) || jl_is_type(rhs) || jl_is_module(rhs)) {
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
             jl_errorf("invalid redefinition of constant %s",
                       jl_symbol_name(b->name));
 #endif
@@ -792,9 +873,10 @@ JL_DLLEXPORT jl_value_t *jl_module_names(jl_module_t *m, int all, int imported)
                  (imported && b->imported) ||
                  (b->owner == m && !b->imported && (all || m == jl_main_module))) &&
                 (all || (!b->deprecated && !hidden))) {
+                jl_sym_t *in_module_name = (jl_sym_t*)table[i-1]; // the name in the module may not be b->name, use the httable key instead
                 jl_array_grow_end(a, 1);
                 //XXX: change to jl_arrayset if array storage allocation for Array{Symbols,1} changes:
-                jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)b->name);
+                jl_array_ptr_set(a, jl_array_dim0(a)-1, (jl_value_t*)in_module_name);
             }
         }
     }
diff --git a/src/opaque_closure.c b/src/opaque_closure.c
new file mode 100644
index 00000000000000..d34989181b7ad1
--- /dev/null
+++ b/src/opaque_closure.c
@@ -0,0 +1,184 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "julia.h"
+#include "julia_internal.h"
+
+jl_value_t *jl_fptr_const_opaque_closure(jl_opaque_closure_t *oc, jl_value_t **args, size_t nargs)
+{
+    return oc->captures;
+}
+
+// determine whether `argt` is a valid argument type tuple for the given opaque closure method
+JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source)
+{
+    if (!source->isva) {
+        if (jl_is_va_tuple(argt))
+            return 0;
+        if (jl_nparams(argt)+1 > source->nargs)
+            return 0;
+    }
+    if (jl_nparams(argt) + 1 - jl_is_va_tuple(argt) < source->nargs - source->isva)
+        return 0;
+    return 1;
+}
+
+static jl_value_t *prepend_type(jl_value_t *t0, jl_tupletype_t *t)
+{
+    jl_svec_t *sig_args = NULL;
+    JL_GC_PUSH1(&sig_args);
+    size_t nsig = 1 + jl_svec_len(t->parameters);
+    sig_args = jl_alloc_svec_uninit(nsig);
+    jl_svecset(sig_args, 0, t0);
+    for (size_t i = 0; i < nsig-1; ++i) {
+        jl_svecset(sig_args, 1+i, jl_tparam(t, i));
+    }
+    jl_value_t *sigtype = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(sig_args), nsig);
+    JL_GC_POP();
+    return sigtype;
+}
+
+static jl_opaque_closure_t *new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
+    jl_value_t *source_, jl_value_t *captures)
+{
+    if (!jl_is_tuple_type((jl_value_t*)argt)) {
+        jl_error("OpaqueClosure argument tuple must be a tuple type");
+    }
+    JL_TYPECHK(new_opaque_closure, type, rt_lb);
+    JL_TYPECHK(new_opaque_closure, type, rt_ub);
+    JL_TYPECHK(new_opaque_closure, method, source_);
+    jl_method_t *source = (jl_method_t*)source_;
+    if (!source->isva) {
+        if (jl_is_va_tuple(argt))
+            jl_error("Argument type tuple is vararg but method is not");
+        if (jl_nparams(argt)+1 > source->nargs)
+            jl_error("Argument type tuple has too many required arguments for method");
+    }
+    if (jl_nparams(argt) + 1 - jl_is_va_tuple(argt) < source->nargs - source->isva)
+        jl_error("Argument type tuple has too few required arguments for method");
+    jl_value_t *sigtype = NULL;
+    JL_GC_PUSH1(&sigtype);
+    sigtype = prepend_type(jl_typeof(captures), argt);
+
+    jl_value_t *oc_type JL_ALWAYS_LEAFTYPE;
+    oc_type = jl_apply_type2((jl_value_t*)jl_opaque_closure_type, (jl_value_t*)argt, rt_ub);
+    JL_GC_PROMISE_ROOTED(oc_type);
+
+    jl_method_instance_t *mi = jl_specializations_get_linfo(source, sigtype, jl_emptysvec);
+    size_t world = jl_atomic_load_acquire(&jl_world_counter);
+    jl_code_instance_t *ci = jl_compile_method_internal(mi, world);
+
+    jl_task_t *ct = jl_current_task;
+    jl_opaque_closure_t *oc = (jl_opaque_closure_t*)jl_gc_alloc(ct->ptls, sizeof(jl_opaque_closure_t), oc_type);
+    JL_GC_POP();
+    oc->source = source;
+    oc->captures = captures;
+    oc->specptr = NULL;
+    if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_interpret_call) {
+        oc->invoke = (jl_fptr_args_t)jl_interpret_opaque_closure;
+    }
+    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_args) {
+        oc->invoke = jl_atomic_load_relaxed(&ci->specptr.fptr1);
+    }
+    else if (jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) {
+        oc->invoke = (jl_fptr_args_t)jl_fptr_const_opaque_closure;
+        oc->captures = ci->rettype_const;
+    }
+    else {
+        oc->invoke = (jl_fptr_args_t)jl_atomic_load_relaxed(&ci->invoke);
+    }
+    oc->world = world;
+    return oc;
+}
+
+jl_opaque_closure_t *jl_new_opaque_closure(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
+    jl_value_t *source_, jl_value_t **env, size_t nenv)
+{
+    jl_value_t *captures = jl_f_tuple(NULL, env, nenv);
+    JL_GC_PUSH1(&captures);
+    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, source_, captures);
+    JL_GC_POP();
+    return oc;
+}
+
+jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name,
+    int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva);
+
+JL_DLLEXPORT jl_code_instance_t* jl_new_codeinst(
+        jl_method_instance_t *mi, jl_value_t *rettype,
+        jl_value_t *inferred_const, jl_value_t *inferred,
+        int32_t const_flags, size_t min_world, size_t max_world,
+        uint32_t ipo_effects, uint32_t effects, jl_value_t *argescapes,
+        uint8_t relocatability);
+
+JL_DLLEXPORT void jl_mi_cache_insert(jl_method_instance_t *mi JL_ROOTING_ARGUMENT,
+                                     jl_code_instance_t *ci JL_ROOTED_ARGUMENT JL_MAYBE_UNROOTED);
+
+JL_DLLEXPORT jl_opaque_closure_t *jl_new_opaque_closure_from_code_info(jl_tupletype_t *argt, jl_value_t *rt_lb, jl_value_t *rt_ub,
+    jl_module_t *mod, jl_code_info_t *ci, int lineno, jl_value_t *file, int nargs, int isva, jl_value_t *env)
+{
+    if (!ci->inferred)
+        jl_error("CodeInfo must already be inferred");
+    jl_value_t *root = NULL, *sigtype = NULL;
+    jl_code_instance_t *inst = NULL;
+    JL_GC_PUSH3(&root, &sigtype, &inst);
+    root = jl_box_long(lineno);
+    root = jl_new_struct(jl_linenumbernode_type, root, file);
+    root = (jl_value_t*)jl_make_opaque_closure_method(mod, jl_nothing, nargs, root, ci, isva);
+
+    sigtype = prepend_type(jl_typeof(env), argt);
+    jl_method_instance_t *mi = jl_specializations_get_linfo((jl_method_t*)root, sigtype, jl_emptysvec);
+    inst = jl_new_codeinst(mi, rt_ub, NULL, (jl_value_t*)ci,
+        0, ((jl_method_t*)root)->primary_world, -1, 0, 0, jl_nothing, 0);
+    jl_mi_cache_insert(mi, inst);
+
+    jl_opaque_closure_t *oc = new_opaque_closure(argt, rt_lb, rt_ub, root, env);
+    JL_GC_POP();
+    return oc;
+}
+
+JL_CALLABLE(jl_new_opaque_closure_jlcall)
+{
+    if (nargs < 4)
+        jl_error("new_opaque_closure: Not enough arguments");
+    return (jl_value_t*)jl_new_opaque_closure((jl_tupletype_t*)args[0],
+        args[1], args[2], args[3], &args[4], nargs-4);
+}
+
+
+// check whether the specified number of arguments is compatible with the
+// specified number of parameters of the tuple type
+STATIC_INLINE int jl_tupletype_length_compat(jl_value_t *v, size_t nargs) JL_NOTSAFEPOINT
+{
+    v = jl_unwrap_unionall(v);
+    assert(jl_is_tuple_type(v));
+    size_t nparams = jl_nparams(v);
+    if (nparams == 0)
+        return nargs == 0;
+    jl_value_t *va = jl_tparam(v,nparams-1);
+    if (jl_is_vararg(va)) {
+        jl_value_t *len = jl_unwrap_vararg_num(va);
+        if (len &&jl_is_long(len))
+            return nargs == nparams - 1 + jl_unbox_long(len);
+        return nargs >= nparams - 1;
+    }
+    return nparams == nargs;
+}
+
+JL_CALLABLE(jl_f_opaque_closure_call)
+{
+    jl_opaque_closure_t* oc = (jl_opaque_closure_t*)F;
+    jl_value_t *argt = jl_tparam0(jl_typeof(oc));
+    if (!jl_tupletype_length_compat(argt, nargs))
+        jl_method_error(F, args, nargs + 1, oc->world);
+    argt = jl_unwrap_unionall(argt);
+    assert(jl_is_datatype(argt));
+    jl_svec_t *types = jl_get_fieldtypes((jl_datatype_t*)argt);
+    size_t ntypes = jl_svec_len(types);
+    for (int i = 0; i < nargs; ++i) {
+        jl_value_t *typ = i >= ntypes ? jl_svecref(types, ntypes-1) : jl_svecref(types, i);
+        if (jl_is_vararg(typ))
+            typ = jl_unwrap_vararg(typ);
+        jl_typeassert(args[i], typ);
+    }
+    return oc->invoke(F, args, nargs);
+}
diff --git a/src/options.h b/src/options.h
index 3ffbf05b2249ff..5a1700708d9e7e 100644
--- a/src/options.h
+++ b/src/options.h
@@ -1,5 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#include "platform.h"
+
 #ifndef JL_OPTIONS_H
 #define JL_OPTIONS_H
 
@@ -12,10 +14,14 @@
 
 // object layout options ------------------------------------------------------
 
-// how much space we're willing to waste if an array outgrows its
-// original object
+// The data for an array this size or below will be allocated within the
+// Array object. If the array outgrows that space, it will be wasted.
 #define ARRAY_INLINE_NBYTES (2048*sizeof(void*))
 
+// Arrays at least this size will get larger alignment (JL_CACHE_BYTE_ALIGNMENT).
+// Must be bigger than GC_MAX_SZCLASS.
+#define ARRAY_CACHE_ALIGN_THRESHOLD 2048
+
 // codegen options ------------------------------------------------------------
 
 // (Experimental) Use MCJIT ELF, even where it's not the native format
@@ -114,7 +120,7 @@
 #endif
 
 // allow a suspended Task to restart on a different thread
-//#define MIGRATE_TASKS
+#define MIGRATE_TASKS
 
 // threading options ----------------------------------------------------------
 
@@ -128,6 +134,9 @@
 #  define JULIA_NUM_THREADS 1
 #endif
 
+// threadpools specification
+#define THREADPOOLS_NAME                "JULIA_THREADPOOLS"
+
 // affinitization behavior
 #define MACHINE_EXCLUSIVE_NAME          "JULIA_EXCLUSIVE"
 #define DEFAULT_MACHINE_EXCLUSIVE       0
@@ -154,23 +163,19 @@
 
 // sanitizer defaults ---------------------------------------------------------
 
-#ifndef JULIA_H
-#error "Must be included after julia.h"
-#endif
-
 // Automatically enable MEMDEBUG and KEEP_BODIES for the sanitizers
-#if defined(JL_ASAN_ENABLED) || defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_) || defined(_COMPILER_MSAN_ENABLED_)
 #define MEMDEBUG
 #define KEEP_BODIES
 #endif
 
 // TSAN doesn't like COPY_STACKS
-#if defined(JL_TSAN_ENABLED) && defined(COPY_STACKS)
+#if defined(_COMPILER_TSAN_ENABLED_) && defined(COPY_STACKS)
 #undef COPY_STACKS
 #endif
 
 // Memory sanitizer needs TLS, which llvm only supports for the small memory model
-#if defined(JL_MSAN_ENABLED)
+#if defined(_COMPILER_MSAN_ENABLED_)
 // todo: fix the llvm MemoryManager to work with small memory model
 #endif
 
diff --git a/src/partr.c b/src/partr.c
index f5ac6fd70012f0..9250ff11071087 100644
--- a/src/partr.c
+++ b/src/partr.c
@@ -17,6 +17,9 @@ extern "C" {
 
 // thread sleep state
 
+// default to DEFAULT_THREAD_SLEEP_THRESHOLD; set via $JULIA_THREAD_SLEEP_THRESHOLD
+uint64_t sleep_threshold;
+
 // thread should not be sleeping--it might need to do work.
 static const int16_t not_sleeping = 0;
 
@@ -28,6 +31,14 @@ static const int16_t sleeping = 1;
 // invariant: The transition of a thread state to sleeping must be followed by a check that there wasn't work pending for it.
 // information: Observing thread not-sleeping is sufficient to ensure the target thread will subsequently inspect its local queue.
 // information: Observing thread is-sleeping says it may be necessary to notify it at least once to wakeup. It may already be awake however for a variety of reasons.
+// information: These observations require sequentially-consistent fences to be inserted between each of those operational phases.
+// [^store_buffering_1]: These fences are used to avoid the cycle 2b -> 1a -> 1b -> 2a -> 2b where
+// * Dequeuer:
+//   * 1: `jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping)`
+// * Enqueuer:
+//   * 2: `jl_atomic_load_relaxed(&ptls->sleep_check_state)` in `jl_wakeup_thread` returns `not_sleeping`
+// i.e., the dequeuer misses the enqueue and enqueuer misses the sleep state transition.
+
 
 JULIA_DEBUG_SLEEPWAKE(
 uint64_t wakeup_enter;
@@ -36,205 +47,71 @@ uint64_t io_wakeup_enter;
 uint64_t io_wakeup_leave;
 );
 
+uv_mutex_t *sleep_locks;
+uv_cond_t *wake_signals;
 
-JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int tid) JL_NOTSAFEPOINT
+JL_DLLEXPORT int jl_set_task_tid(jl_task_t *task, int16_t tid) JL_NOTSAFEPOINT
 {
     // Try to acquire the lock on this task.
-    int16_t was = task->tid;
+    int16_t was = jl_atomic_load_relaxed(&task->tid);
     if (was == tid)
         return 1;
     if (was == -1)
-        return jl_atomic_bool_compare_exchange(&task->tid, -1, tid);
+        return jl_atomic_cmpswap(&task->tid, &was, tid);
     return 0;
 }
 
+JL_DLLEXPORT int jl_set_task_threadpoolid(jl_task_t *task, int8_t tpid) JL_NOTSAFEPOINT
+{
+    if (tpid < 0 || tpid >= jl_n_threadpools)
+        return 0;
+    task->threadpoolid = tpid;
+    return 1;
+}
+
 // GC functions used
 extern int jl_gc_mark_queue_obj_explicit(jl_gc_mark_cache_t *gc_cache,
                                          jl_gc_mark_sp_t *sp, jl_value_t *obj) JL_NOTSAFEPOINT;
 
-// multiq
+// parallel task runtime
 // ---
 
-/* a task heap */
-typedef struct taskheap_tag {
-    jl_mutex_t lock;
-    jl_task_t **tasks;
-    int32_t ntasks;
-    int16_t prio;
-} taskheap_t;
-
-/* multiqueue parameters */
-static const int32_t heap_d = 8;
-static const int heap_c = 2;
-
-/* size of each heap */
-static const int tasks_per_heap = 65536; // TODO: this should be smaller by default, but growable!
-
-/* the multiqueue's heaps */
-static taskheap_t *heaps;
-static int32_t heap_p;
-
-/* unbias state for the RNG */
-static uint64_t cong_unbias;
-
-
-static inline void multiq_init(void)
-{
-    heap_p = heap_c * jl_n_threads;
-    heaps = (taskheap_t *)calloc(heap_p, sizeof(taskheap_t));
-    for (int32_t i = 0; i < heap_p; ++i) {
-        jl_mutex_init(&heaps[i].lock);
-        heaps[i].tasks = (jl_task_t **)calloc(tasks_per_heap, sizeof(jl_task_t*));
-        heaps[i].ntasks = 0;
-        heaps[i].prio = INT16_MAX;
-    }
-    unbias_cong(heap_p, &cong_unbias);
-}
-
-
-static inline void sift_up(taskheap_t *heap, int32_t idx)
-{
-    if (idx > 0) {
-        int32_t parent = (idx-1)/heap_d;
-        if (heap->tasks[idx]->prio < heap->tasks[parent]->prio) {
-            jl_task_t *t = heap->tasks[parent];
-            heap->tasks[parent] = heap->tasks[idx];
-            heap->tasks[idx] = t;
-            sift_up(heap, parent);
-        }
-    }
-}
-
-
-static inline void sift_down(taskheap_t *heap, int32_t idx)
+JL_DLLEXPORT uint32_t jl_rand_ptls(uint32_t max, uint32_t unbias)
 {
-    if (idx < heap->ntasks) {
-        for (int32_t child = heap_d*idx + 1;
-                child < tasks_per_heap && child <= heap_d*idx + heap_d;
-                ++child) {
-            if (heap->tasks[child]
-                    &&  heap->tasks[child]->prio < heap->tasks[idx]->prio) {
-                jl_task_t *t = heap->tasks[idx];
-                heap->tasks[idx] = heap->tasks[child];
-                heap->tasks[child] = t;
-                sift_down(heap, child);
-            }
-        }
-    }
+    jl_ptls_t ptls = jl_current_task->ptls;
+    // one-extend unbias back to 64-bits
+    return cong(max, -(uint64_t)-unbias, &ptls->rngseed);
 }
 
-
-static inline int multiq_insert(jl_task_t *task, int16_t priority)
+// initialize the threading infrastructure
+// (used only by the main thread)
+void jl_init_threadinginfra(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    uint64_t rn;
-
-    task->prio = priority;
-    do {
-        rn = cong(heap_p, cong_unbias, &ptls->rngseed);
-    } while (!jl_mutex_trylock_nogc(&heaps[rn].lock));
-
-    if (heaps[rn].ntasks >= tasks_per_heap) {
-        jl_mutex_unlock_nogc(&heaps[rn].lock);
-        // multiq insertion failed, increase #tasks per heap
-        return -1;
+    /* initialize the synchronization trees pool */
+
+    sleep_threshold = DEFAULT_THREAD_SLEEP_THRESHOLD;
+    char *cp = getenv(THREAD_SLEEP_THRESHOLD_NAME);
+    if (cp) {
+        if (!strncasecmp(cp, "infinite", 8))
+            sleep_threshold = UINT64_MAX;
+        else
+            sleep_threshold = (uint64_t)strtol(cp, NULL, 10);
     }
 
-    heaps[rn].tasks[heaps[rn].ntasks++] = task;
-    sift_up(&heaps[rn], heaps[rn].ntasks-1);
-    int16_t prio = jl_atomic_load(&heaps[rn].prio);
-    if (task->prio < prio)
-        jl_atomic_store(&heaps[rn].prio, task->prio);
-    jl_mutex_unlock_nogc(&heaps[rn].lock);
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_install_thread_signal_handler(ptls);
 
-    return 0;
-}
-
-
-static inline jl_task_t *multiq_deletemin(void)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    uint64_t rn1 = 0, rn2;
-    int32_t i;
-    int16_t prio1, prio2;
-    jl_task_t *task;
- retry:
-    for (i = 0; i < heap_p; ++i) {
-        rn1 = cong(heap_p, cong_unbias, &ptls->rngseed);
-        rn2 = cong(heap_p, cong_unbias, &ptls->rngseed);
-        prio1 = jl_atomic_load(&heaps[rn1].prio);
-        prio2 = jl_atomic_load(&heaps[rn2].prio);
-        if (prio1 > prio2) {
-            prio1 = prio2;
-            rn1 = rn2;
-        }
-        else if (prio1 == prio2 && prio1 == INT16_MAX)
-            continue;
-        if (jl_mutex_trylock_nogc(&heaps[rn1].lock)) {
-            if (prio1 == heaps[rn1].prio)
-                break;
-            jl_mutex_unlock_nogc(&heaps[rn1].lock);
-        }
-    }
-    if (i == heap_p)
-        return NULL;
-
-    task = heaps[rn1].tasks[0];
-    if (!jl_set_task_tid(task, ptls->tid)) {
-        jl_mutex_unlock_nogc(&heaps[rn1].lock);
-        goto retry;
+    int16_t tid;
+    sleep_locks = (uv_mutex_t*)calloc(jl_n_threads, sizeof(uv_mutex_t));
+    wake_signals = (uv_cond_t*)calloc(jl_n_threads, sizeof(uv_cond_t));
+    for (tid = 0; tid < jl_n_threads; tid++) {
+        uv_mutex_init(&sleep_locks[tid]);
+        uv_cond_init(&wake_signals[tid]);
     }
-    heaps[rn1].tasks[0] = heaps[rn1].tasks[--heaps[rn1].ntasks];
-    heaps[rn1].tasks[heaps[rn1].ntasks] = NULL;
-    prio1 = INT16_MAX;
-    if (heaps[rn1].ntasks > 0) {
-        sift_down(&heaps[rn1], 0);
-        prio1 = heaps[rn1].tasks[0]->prio;
-    }
-    jl_atomic_store(&heaps[rn1].prio, prio1);
-    jl_mutex_unlock_nogc(&heaps[rn1].lock);
-
-    return task;
 }
 
 
-void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp)
-{
-    int32_t i, j;
-    for (i = 0; i < heap_p; ++i)
-        for (j = 0; j < heaps[i].ntasks; ++j)
-            jl_gc_mark_queue_obj_explicit(gc_cache, sp, (jl_value_t *)heaps[i].tasks[j]);
-}
-
-
-static int multiq_check_empty(void)
-{
-    int32_t i;
-    for (i = 0; i < heap_p; ++i) {
-        if (heaps[i].ntasks != 0)
-            return 0;
-    }
-    return 1;
-}
-
-
-
-// parallel task runtime
-// ---
-
-// initialize the threading infrastructure
-void jl_init_threadinginfra(void)
-{
-    /* initialize the synchronization trees pool and the multiqueue */
-    multiq_init();
-
-    jl_ptls_t ptls = jl_get_ptls_states();
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
-}
-
-
-void JL_NORETURN jl_finish_task(jl_task_t *t, jl_value_t *resultval JL_MAYBE_UNROOTED);
+void JL_NORETURN jl_finish_task(jl_task_t *t);
 
 // thread function: used by all except the main thread
 void jl_threadfun(void *arg)
@@ -242,16 +119,13 @@ void jl_threadfun(void *arg)
     jl_threadarg_t *targ = (jl_threadarg_t*)arg;
 
     // initialize this thread (set tid, create heap, set up root task)
-    jl_init_threadtls(targ->tid);
+    jl_ptls_t ptls = jl_init_threadtls(targ->tid);
     void *stack_lo, *stack_hi;
     jl_init_stack_limits(0, &stack_lo, &stack_hi);
-    jl_init_root_task(stack_lo, stack_hi);
-
-    jl_ptls_t ptls = jl_get_ptls_states();
-
-    // set up sleep mechanism for this thread
-    uv_mutex_init(&ptls->sleep_lock);
-    uv_cond_init(&ptls->wake_signal);
+    // warning: this changes `jl_current_task`, so be careful not to call that from this function
+    jl_task_t *ct = jl_init_root_task(ptls, stack_lo, stack_hi);
+    JL_GC_PROMISE_ROOTED(ct);
+    jl_install_thread_signal_handler(ptls);
 
     // wait for all threads
     jl_gc_state_set(ptls, JL_GC_STATE_SAFE, 0);
@@ -261,41 +135,29 @@ void jl_threadfun(void *arg)
     free(targ);
 
     (void)jl_gc_unsafe_enter(ptls);
-    jl_current_task->exception = jl_nothing;
-    jl_finish_task(jl_current_task, jl_nothing); // noreturn
-}
-
-
-// enqueue the specified task for execution
-JL_DLLEXPORT int jl_enqueue_task(jl_task_t *task)
-{
-    if (multiq_insert(task, task->prio) == -1)
-        return 1;
-    return 0;
+    jl_finish_task(ct); // noreturn
 }
 
 
-static int running_under_rr(void)
+int jl_running_under_rr(int recheck)
 {
 #ifdef _OS_LINUX_
 #define RR_CALL_BASE 1000
 #define SYS_rrcall_check_presence (RR_CALL_BASE + 8)
-    static int checked_running_under_rr = 0;
-    static int is_running_under_rr = 0;
-    if (!checked_running_under_rr) {
+    static _Atomic(int) is_running_under_rr = 0;
+    int rr = jl_atomic_load_relaxed(&is_running_under_rr);
+    if (rr == 0 || recheck) {
         int ret = syscall(SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0);
-        if (ret == -1) {
+        if (ret == -1)
             // Should always be ENOSYS, but who knows what people do for
             // unknown syscalls with their seccomp filters, so just say
             // that we don't have rr.
-            is_running_under_rr = 0;
-        }
-        else {
-            is_running_under_rr = 1;
-        }
-        checked_running_under_rr = 1;
+            rr = 2;
+        else
+            rr = 1;
+        jl_atomic_store_relaxed(&is_running_under_rr, rr);
     }
-    return is_running_under_rr;
+    return rr == 1;
 #else
     return 0;
 #endif
@@ -312,14 +174,14 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
      * scheduling logic from switching to other threads. Just don't bother
      * trying to wait here
      */
-    if (running_under_rr())
+    if (jl_running_under_rr(0))
         return 1;
     if (!(*start_cycles)) {
         *start_cycles = jl_hrtime();
         return 0;
     }
     uint64_t elapsed_cycles = jl_hrtime() - (*start_cycles);
-    if (elapsed_cycles >= DEFAULT_THREAD_SLEEP_THRESHOLD) {
+    if (elapsed_cycles >= sleep_threshold) {
         *start_cycles = 0;
         return 1;
     }
@@ -327,14 +189,21 @@ static int sleep_check_after_threshold(uint64_t *start_cycles)
 }
 
 
-static void wake_thread(int16_t tid)
+static int wake_thread(int16_t tid)
 {
     jl_ptls_t other = jl_all_tls_states[tid];
-    if (jl_atomic_bool_compare_exchange(&other->sleep_check_state, sleeping, not_sleeping)) {
-        uv_mutex_lock(&other->sleep_lock);
-        uv_cond_signal(&other->wake_signal);
-        uv_mutex_unlock(&other->sleep_lock);
+    int8_t state = sleeping;
+
+    if (jl_atomic_load_relaxed(&other->sleep_check_state) == sleeping) {
+        if (jl_atomic_cmpswap_relaxed(&other->sleep_check_state, &state, not_sleeping)) {
+            JL_PROBE_RT_SLEEP_CHECK_WAKE(other, state);
+            uv_mutex_lock(&sleep_locks[tid]);
+            uv_cond_signal(&wake_signals[tid]);
+            uv_mutex_unlock(&sleep_locks[tid]);
+            return 1;
+        }
     }
+    return 0;
 }
 
 
@@ -348,108 +217,154 @@ static void wake_libuv(void)
 /* ensure thread tid is awake if necessary */
 JL_DLLEXPORT void jl_wakeup_thread(int16_t tid)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    int16_t uvlock = jl_atomic_load(&jl_uv_mutex.owner);
-    int16_t self = ptls->tid;
-    unsigned long system_self = jl_all_tls_states[self]->system_id;
+    jl_task_t *ct = jl_current_task;
+    int16_t self = jl_atomic_load_relaxed(&ct->tid);
+    if (tid != self)
+        jl_fence(); // [^store_buffering_1]
+    jl_task_t *uvlock = jl_atomic_load_relaxed(&jl_uv_mutex.owner);
     JULIA_DEBUG_SLEEPWAKE( wakeup_enter = cycleclock() );
     if (tid == self || tid == -1) {
         // we're already awake, but make sure we'll exit uv_run
-        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping)
-            jl_atomic_store(&ptls->sleep_check_state, not_sleeping);
-        if (uvlock == system_self)
+        jl_ptls_t ptls = ct->ptls;
+        if (jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping) {
+            jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping);
+            JL_PROBE_RT_SLEEP_CHECK_WAKEUP(ptls);
+        }
+        if (uvlock == ct)
             uv_stop(jl_global_event_loop());
     }
     else {
         // something added to the sticky-queue: notify that thread
-        wake_thread(tid);
-        // check if we need to notify uv_run too
-        unsigned long system_tid = jl_all_tls_states[tid]->system_id;
-        if (uvlock != system_self && jl_atomic_load(&jl_uv_mutex.owner) == system_tid)
-            wake_libuv();
+        if (wake_thread(tid)) {
+            // check if we need to notify uv_run too
+            jl_fence();
+            jl_task_t *tid_task = jl_atomic_load_relaxed(&jl_all_tls_states[tid]->current_task);
+            // now that we have changed the thread to not-sleeping, ensure that
+            // either it has not yet acquired the libuv lock, or that it will
+            // observe the change of state to not_sleeping
+            if (uvlock != ct && jl_atomic_load_relaxed(&jl_uv_mutex.owner) == tid_task)
+                wake_libuv();
+        }
     }
     // check if the other threads might be sleeping
     if (tid == -1) {
         // something added to the multi-queue: notify all threads
         // in the future, we might want to instead wake some fraction of threads,
         // and let each of those wake additional threads if they find work
+        int anysleep = 0;
         for (tid = 0; tid < jl_n_threads; tid++) {
             if (tid != self)
-                wake_thread(tid);
+                anysleep |= wake_thread(tid);
         }
         // check if we need to notify uv_run too
-        if (uvlock != system_self && jl_atomic_load(&jl_uv_mutex.owner) != 0)
-            wake_libuv();
+        if (uvlock != ct && anysleep) {
+            jl_fence();
+            if (jl_atomic_load_relaxed(&jl_uv_mutex.owner) != NULL)
+                wake_libuv();
+        }
     }
     JULIA_DEBUG_SLEEPWAKE( wakeup_leave = cycleclock() );
 }
 
 
-// get the next runnable task from the multiq
+// get the next runnable task
 static jl_task_t *get_next_task(jl_value_t *trypoptask, jl_value_t *q)
 {
     jl_gc_safepoint();
-    jl_value_t *args[2] = { trypoptask, q };
-    jl_task_t *task = (jl_task_t*)jl_apply(args, 2);
+    jl_task_t *task = (jl_task_t*)jl_apply_generic(trypoptask, &q, 1);
     if (jl_typeis(task, jl_task_type)) {
-        int self = jl_get_ptls_states()->tid;
+        int self = jl_atomic_load_relaxed(&jl_current_task->tid);
         jl_set_task_tid(task, self);
         return task;
     }
-    jl_gc_safepoint();
-    return multiq_deletemin();
+    return NULL;
+}
+
+static int check_empty(jl_value_t *checkempty)
+{
+    return jl_apply_generic(checkempty, NULL, 0) == jl_true;
 }
 
-static int may_sleep(jl_ptls_t ptls)
+static int may_sleep(jl_ptls_t ptls) JL_NOTSAFEPOINT
 {
     // sleep_check_state is only transitioned from not_sleeping to sleeping
     // by the thread itself. As a result, if this returns false, it will
-    // continue returning false. If it returns true, there are no guarantees.
+    // continue returning false. If it returns true, we know the total
+    // modification order of the fences.
+    jl_fence(); // [^store_buffering_1]
     return jl_atomic_load_relaxed(&ptls->sleep_check_state) == sleeping;
 }
 
-extern volatile unsigned _threadedregion;
+extern _Atomic(unsigned) _threadedregion;
 
-JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
+JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q, jl_value_t *checkempty)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     uint64_t start_cycles = 0;
-    jl_task_t *task;
 
     while (1) {
-        task = get_next_task(trypoptask, q);
+        jl_task_t *task = get_next_task(trypoptask, q);
         if (task)
             return task;
 
         // quick, race-y check to see if there seems to be any stuff in there
         jl_cpu_pause();
-        if (!multiq_check_empty()) {
+        if (!check_empty(checkempty)) {
             start_cycles = 0;
             continue;
         }
 
         jl_cpu_pause();
-        if (sleep_check_after_threshold(&start_cycles) || (!_threadedregion && ptls->tid == 0)) {
-            jl_atomic_store(&ptls->sleep_check_state, sleeping); // acquire sleep-check lock
-            if (!multiq_check_empty()) {
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
-                    jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+        jl_ptls_t ptls = ct->ptls;
+        if (sleep_check_after_threshold(&start_cycles) || (!jl_atomic_load_relaxed(&_threadedregion) && ptls->tid == 0)) {
+            // acquire sleep-check lock
+            jl_atomic_store_relaxed(&ptls->sleep_check_state, sleeping);
+            jl_fence(); // [^store_buffering_1]
+            JL_PROBE_RT_SLEEP_CHECK_SLEEP(ptls);
+            if (!check_empty(checkempty)) { // uses relaxed loads
+                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                    JL_PROBE_RT_SLEEP_CHECK_TASKQ_WAKE(ptls);
+                }
+                continue;
+            }
+            task = get_next_task(trypoptask, q); // note: this should not yield
+            if (ptls != ct->ptls) {
+                // sigh, a yield was detected, so let's go ahead and handle it anyway by starting over
+                ptls = ct->ptls;
+                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                }
+                if (task)
+                    return task;
                 continue;
             }
-            task = get_next_task(trypoptask, q);
             if (task) {
-                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
-                    jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+                    jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                    JL_PROBE_RT_SLEEP_CHECK_TASK_WAKE(ptls);
+                }
                 return task;
             }
 
-            // one thread should win this race and watch the event loop
-            // inside a threaded region, any thread can listen for IO messages,
-            // although none are allowed to create new ones
-            // outside of threaded regions, all IO is permitted,
-            // but only on thread 1
+
+            // IO is always permitted, but outside a threaded region, only
+            // thread 0 will process messages.
+            // Inside a threaded region, any thread can listen for IO messages,
+            // and one thread should win this race and watch the event loop,
+            // but we bias away from idle threads getting parked here.
+            //
+            // The reason this works is somewhat convoluted, and closely tied to [^store_buffering_1]:
+            //  - After decrementing _threadedregion, the thread is required to
+            //    call jl_wakeup_thread(0), that will kick out any thread who is
+            //    already there, and then eventually thread 0 will get here.
+            //  - Inside a _threadedregion, there must exist at least one
+            //    thread that has a happens-before relationship on the libuv lock
+            //    before reaching this decision point in the code who will see
+            //    the lock as unlocked and thus must win this race here.
             int uvlock = 0;
-            if (_threadedregion) {
+            if (jl_atomic_load_relaxed(&_threadedregion)) {
                 uvlock = jl_mutex_trylock(&jl_uv_mutex);
             }
             else if (ptls->tid == 0) {
@@ -458,59 +373,51 @@ JL_DLLEXPORT jl_task_t *jl_task_get_next(jl_value_t *trypoptask, jl_value_t *q)
             }
             if (uvlock) {
                 int active = 1;
-                if (jl_atomic_load(&jl_uv_n_waiters) != 0) {
-                    // but if we won the race against someone who actually needs
-                    // the lock to do real work, we need to let them have it instead
-                    JL_UV_UNLOCK();
-                }
-                else {
-                    // otherwise, we may block until someone asks us for the lock
-                    uv_loop_t *loop = jl_global_event_loop();
+                // otherwise, we block until someone asks us for the lock
+                uv_loop_t *loop = jl_global_event_loop();
+                while (active && may_sleep(ptls)) {
+                    if (jl_atomic_load_relaxed(&jl_uv_n_waiters) != 0)
+                        // but if we won the race against someone who actually needs
+                        // the lock to do real work, we need to let them have it instead
+                        break;
+                    loop->stop_flag = 0;
+                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
+                    active = uv_run(loop, UV_RUN_ONCE);
+                    JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
                     jl_gc_safepoint();
-                    if (may_sleep(ptls)) {
-                        loop->stop_flag = 0;
-                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_enter = cycleclock() );
-                        active = uv_run(loop, UV_RUN_ONCE);
-                        JULIA_DEBUG_SLEEPWAKE( ptls->uv_run_leave = cycleclock() );
-                    }
-                    JL_UV_UNLOCK();
-                    // optimization: check again first if we may have work to do
-                    if (!may_sleep(ptls)) {
-                        assert(ptls->sleep_check_state == not_sleeping);
-                        start_cycles = 0;
-                        continue;
-                    }
-                    // otherwise, we got a spurious wakeup since some other
-                    // thread that just wanted to steal libuv from us,
-                    // just go right back to sleep on the other wake signal
-                    // to let them take it from us without conflict
-                    // TODO: this relinquishes responsibility for all event
-                    //       to the last thread to do an explicit operation,
-                    //       which may starve other threads of critical work
-                    if (jl_atomic_load(&jl_uv_n_waiters) == 0) {
-                        continue;
-                    }
                 }
-                if (!_threadedregion && active && ptls->tid == 0) {
+                JL_UV_UNLOCK();
+                // optimization: check again first if we may have work to do.
+                // Otherwise we got a spurious wakeup since some other thread
+                // that just wanted to steal libuv from us. We will just go
+                // right back to sleep on the individual wake signal to let
+                // them take it from us without conflict.
+                if (!may_sleep(ptls)) {
+                    start_cycles = 0;
+                    continue;
+                }
+                if (!jl_atomic_load_relaxed(&_threadedregion) && active && ptls->tid == 0) {
                     // thread 0 is the only thread permitted to run the event loop
-                    // so it needs to stay alive
-                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping)
-                        jl_atomic_store(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                    // so it needs to stay alive, just spin-looping if necessary
+                    if (jl_atomic_load_relaxed(&ptls->sleep_check_state) != not_sleeping) {
+                        jl_atomic_store_relaxed(&ptls->sleep_check_state, not_sleeping); // let other threads know they don't need to wake us
+                        JL_PROBE_RT_SLEEP_CHECK_UV_WAKE(ptls);
+                    }
                     start_cycles = 0;
                     continue;
                 }
             }
 
-            // the other threads will just wait for on signal to resume
+            // the other threads will just wait for an individual wake signal to resume
             JULIA_DEBUG_SLEEPWAKE( ptls->sleep_enter = cycleclock() );
             int8_t gc_state = jl_gc_safe_enter(ptls);
-            uv_mutex_lock(&ptls->sleep_lock);
+            uv_mutex_lock(&sleep_locks[ptls->tid]);
             while (may_sleep(ptls)) {
-                uv_cond_wait(&ptls->wake_signal, &ptls->sleep_lock);
+                uv_cond_wait(&wake_signals[ptls->tid], &sleep_locks[ptls->tid]);
                 // TODO: help with gc work here, if applicable
             }
-            assert(ptls->sleep_check_state == not_sleeping);
-            uv_mutex_unlock(&ptls->sleep_lock);
+            assert(jl_atomic_load_relaxed(&ptls->sleep_check_state) == not_sleeping);
+            uv_mutex_unlock(&sleep_locks[ptls->tid]);
             JULIA_DEBUG_SLEEPWAKE( ptls->sleep_leave = cycleclock() );
             jl_gc_safe_leave(ptls, gc_state); // contains jl_gc_safepoint
             start_cycles = 0;
diff --git a/src/passes.h b/src/passes.h
new file mode 100644
index 00000000000000..3b229377f7cdc7
--- /dev/null
+++ b/src/passes.h
@@ -0,0 +1,100 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_PASSES_H
+#define JL_PASSES_H
+
+#include <llvm/IR/PassManager.h>
+#include <llvm/Transforms/Scalar/LoopPassManager.h>
+
+using namespace llvm;
+
+// Function Passes
+struct DemoteFloat16 : PassInfoMixin<DemoteFloat16> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct CombineMulAdd : PassInfoMixin<CombineMulAdd> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+struct LateLowerGC : PassInfoMixin<LateLowerGC> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct AllocOptPass : PassInfoMixin<AllocOptPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+struct PropagateJuliaAddrspacesPass : PassInfoMixin<PropagateJuliaAddrspacesPass> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct LowerExcHandlers : PassInfoMixin<LowerExcHandlers> {
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct GCInvariantVerifierPass : PassInfoMixin<GCInvariantVerifierPass> {
+    bool Strong;
+    GCInvariantVerifierPass(bool Strong = false) : Strong(Strong) {}
+
+    PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+// Module Passes
+struct CPUFeatures : PassInfoMixin<CPUFeatures> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct RemoveNI : PassInfoMixin<RemoveNI> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+struct LowerSIMDLoop : PassInfoMixin<LowerSIMDLoop> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
+struct FinalLowerGCPass : PassInfoMixin<LateLowerGC> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct MultiVersioning : PassInfoMixin<MultiVersioning> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct RemoveJuliaAddrspacesPass : PassInfoMixin<RemoveJuliaAddrspacesPass> {
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct RemoveAddrspacesPass : PassInfoMixin<RemoveAddrspacesPass> {
+    std::function<unsigned(unsigned)> ASRemapper;
+    RemoveAddrspacesPass();
+    RemoveAddrspacesPass(std::function<unsigned(unsigned)> ASRemapper) : ASRemapper(std::move(ASRemapper)) {}
+
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+struct LowerPTLSPass : PassInfoMixin<LowerPTLSPass> {
+    bool imaging_mode;
+    LowerPTLSPass(bool imaging_mode=false) : imaging_mode(imaging_mode) {}
+
+    PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+    static bool isRequired() { return true; }
+};
+
+// Loop Passes
+struct JuliaLICMPass : PassInfoMixin<JuliaLICMPass> {
+    PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                          LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+#endif
diff --git a/src/precompile.c b/src/precompile.c
index 886a0677b18990..7eb3261f5a29ba 100644
--- a/src/precompile.c
+++ b/src/precompile.c
@@ -260,16 +260,16 @@ static void _compile_all_deq(jl_array_t *found)
         jl_method_t *m = ml->func.method;
         if (m->source == NULL) // TODO: generic implementations of generated functions
             continue;
-        mi = jl_get_unspecialized(mi);
-        assert(mi == m->unspecialized); // make sure we didn't get tricked by a generated function, since we can't handle those
+        mi = jl_get_unspecialized(m);
+        assert(mi == jl_atomic_load_relaxed(&m->unspecialized)); // make sure we didn't get tricked by a generated function, since we can't handle those
         jl_code_instance_t *ucache = jl_get_method_inferred(mi, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
-        if (ucache->invoke != NULL)
+        if (jl_atomic_load_relaxed(&ucache->invoke) != NULL)
             continue;
         src = m->source;
         assert(src);
         // TODO: we could now enable storing inferred function pointers in the `unspecialized` cache
-        //src = jl_type_infer(mi, jl_world_counter, 1);
-        //if (ucache->invoke != NULL)
+        //src = jl_type_infer(mi, jl_atomic_load_acquire(&jl_world_counter), 1);
+        //if (jl_atomic_load_relaxed(&ucache->invoke) != NULL)
         //    continue;
 
         // first try to create leaf signatures from the signature declaration and compile those
@@ -294,9 +294,10 @@ static int compile_all_enq__(jl_typemap_entry_t *ml, void *env)
 }
 
 
-static void compile_all_enq_(jl_methtable_t *mt, void *env)
+static int compile_all_enq_(jl_methtable_t *mt, void *env)
 {
-    jl_typemap_visitor(mt->defs, compile_all_enq__, env);
+    jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), compile_all_enq__, env);
+    return 1;
 }
 
 static void jl_compile_all_defs(void)
@@ -305,13 +306,24 @@ static void jl_compile_all_defs(void)
     // TypeMapEntries for Methods and MethodInstances that need to be compiled
     jl_array_t *m = jl_alloc_vec_any(0);
     JL_GC_PUSH1(&m);
+    int _changes = -1;
+    int attempts = 0;
     while (1) {
         jl_foreach_reachable_mtable(compile_all_enq_, m);
         size_t changes = jl_array_len(m);
         if (!changes)
             break;
+        if (changes == _changes) {
+            if (++attempts > 5) {
+                jl_printf(JL_STDERR, "unable to compile %d methods for compile-all\n", (int)changes);
+                break;
+            }
+        } else {
+            attempts = 0;
+        }
         _compile_all_deq(m);
         jl_array_del_end(m, changes);
+        _changes = changes;
     }
     JL_GC_POP();
 }
@@ -319,16 +331,16 @@ static void jl_compile_all_defs(void)
 static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closure)
 {
     assert(jl_is_method_instance(mi));
-    jl_code_instance_t *codeinst = mi->cache;
+    jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache);
     while (codeinst) {
         int do_compile = 0;
-        if (codeinst->invoke != jl_fptr_const_return) {
+        if (jl_atomic_load_relaxed(&codeinst->invoke) != jl_fptr_const_return) {
             if (codeinst->inferred && codeinst->inferred != jl_nothing &&
                 jl_ir_flag_inferred((jl_array_t*)codeinst->inferred) &&
                 !jl_ir_flag_inlineable((jl_array_t*)codeinst->inferred)) {
                 do_compile = 1;
             }
-            else if (codeinst->invoke != NULL || codeinst->precompile) {
+            else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) {
                 do_compile = 1;
             }
         }
@@ -344,18 +356,18 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur
 static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *closure)
 {
     jl_method_t *m = def->func.method;
-    if (m->name == jl_symbol("__init__") && jl_is_dispatch_tupletype(m->sig)) {
-        // ensure `__init__()` gets strongly-hinted, specialized, and compiled
+    if ((m->name == jl_symbol("__init__") || m->ccallable) && jl_is_dispatch_tupletype(m->sig)) {
+        // ensure `__init__()` and @ccallables get strongly-hinted, specialized, and compiled
         jl_method_instance_t *mi = jl_specializations_get_linfo(m, m->sig, jl_emptysvec);
         jl_array_ptr_1d_push((jl_array_t*)closure, (jl_value_t*)mi);
     }
     else {
-        jl_svec_t *specializations = def->func.method->specializations;
+        jl_svec_t *specializations = jl_atomic_load_relaxed(&def->func.method->specializations);
         size_t i, l = jl_svec_len(specializations);
         for (i = 0; i < l; i++) {
-            jl_method_instance_t *mi = (jl_method_instance_t*)jl_svecref(specializations, i);
-            if (mi != NULL)
-                precompile_enq_specialization_(mi, closure);
+            jl_value_t *mi = jl_svecref(specializations, i);
+            if (mi != jl_nothing)
+                precompile_enq_specialization_((jl_method_instance_t*)mi, closure);
         }
     }
     if (m->ccallable)
@@ -363,9 +375,9 @@ static int precompile_enq_all_specializations__(jl_typemap_entry_t *def, void *c
     return 1;
 }
 
-static void precompile_enq_all_specializations_(jl_methtable_t *mt, void *env)
+static int precompile_enq_all_specializations_(jl_methtable_t *mt, void *env)
 {
-    jl_typemap_visitor(mt->defs, precompile_enq_all_specializations__, env);
+    return jl_typemap_visitor(jl_atomic_load_relaxed(&mt->defs), precompile_enq_all_specializations__, env);
 }
 
 static void *jl_precompile(int all)
@@ -387,7 +399,7 @@ static void *jl_precompile(int all)
             size_t min_world = 0;
             size_t max_world = ~(size_t)0;
             if (!jl_isa_compileable_sig((jl_tupletype_t*)mi->specTypes, mi->def.method))
-                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_world_counter, &min_world, &max_world, 0);
+                mi = jl_get_specialization1((jl_tupletype_t*)mi->specTypes, jl_atomic_load_acquire(&jl_world_counter), &min_world, &max_world, 0);
             if (mi)
                 jl_array_ptr_1d_push(m2, (jl_value_t*)mi);
         }
@@ -398,7 +410,7 @@ static void *jl_precompile(int all)
         }
     }
     m = NULL;
-    void *native_code = jl_create_native(m2, jl_default_cgparams, 0);
+    void *native_code = jl_create_native(m2, NULL, NULL, 0);
     JL_GC_POP();
     return native_code;
 }
diff --git a/src/processor.cpp b/src/processor.cpp
index 9d4ac476ef3165..b9dfc2b7f0b4e6 100644
--- a/src/processor.cpp
+++ b/src/processor.cpp
@@ -73,7 +73,7 @@
 //
 //     Optimize only for size. Clang's `-Oz`.
 
-bool jl_processor_print_help = false;
+JL_DLLEXPORT bool jl_processor_print_help = false;
 
 namespace {
 
@@ -401,6 +401,8 @@ static inline std::vector<uint8_t> serialize_target_data(llvm::StringRef name,
 {
     std::vector<uint8_t> res;
     auto add_data = [&] (const void *data, size_t sz) {
+        if (sz == 0)
+            return;
         size_t old_sz = res.size();
         res.resize(old_sz + sz);
         memcpy(&res[old_sz], data, sz);
diff --git a/src/processor.h b/src/processor.h
index 6c95a0b6003b62..f3b571cf9b9374 100644
--- a/src/processor.h
+++ b/src/processor.h
@@ -1,5 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+#ifndef JL_PROCESSOR_H
+#define JL_PROCESSOR_H
+
 #include "support/dtypes.h"
 
 #include "julia.h"
@@ -14,9 +17,9 @@ extern "C" {
 /**
  * Related sysimg exported symbols
  *
- * In the following text function refer to an abstract identity.
- * It corresponds to a `Function` that we emit in the codegen and there might be multiple copy
- * of it in the system image. Only one of those copy will be used in a given session.
+ * In the following text, function refers to an abstract entity.
+ * It corresponds to a `Function` that we emit in the codegen, and there might be multiple copies
+ * of it in the system image. Only one of those copies will be used in a given session.
  * Function pointers refer to a real piece of code in the system image.
  * Each function might have multiple function pointers in the system image
  * and each function pointer will correspond to only one function.
@@ -34,28 +37,28 @@ extern "C" {
  *     The default function pointer is used if the function is cloned.
  *     The first element is the size of the array, which should **NOT** be used as the number
  *     of julia functions in the sysimg.
- *     Each entry in this array uniquely identifies a function which we are interested in
+ *     Each entry in this array uniquely identifies a function we are interested in
  *     (the function may have multiple function pointers corresponding to different versions).
- *     In other sysimg info, all information of functions are stored as function index which are
- *     `uint32_t` index in this array.
+ *     In other sysimg info, all references to functions are stored as their `uint32_t` index
+ *     in this array.
  *
  * # Target data and dispatch slots (Only needed by runtime during loading)
  * `jl_dispatch_target_ids`: [static data] serialize target data.
  *     This contains the number of targets which is needed to decode `jl_dispatch_fvars_idxs`
- *     in additional to the name and feature set of each target.
+ *     in addition to the name and feature set of each target.
  * `jl_dispatch_reloc_slots`: [static data] location and index of relocation slots.
  *     Stored as pairs of function indices and `int32_t` offsets from `jl_sysimg_gvars_base`.
  *     The first element is an `uint32_t` giving the number of relocations.
  *     This is needed for functions whose address is used in a way that requires dispatch.
  *     We currently only support one type of relocation (i.e. absolute pointer) which is enough
- *     for all use in functions as well as global GOT slot (for "PLT" callback).
+ *     for all use in functions as well as GOT slot (for "PLT" callback).
  *     Note that not all functions being cloned are assigned a slot.
  *     This array is sorted by the function indices.
  *     There can be more than one slot per-function,
  *     i.e. there can be duplicated function indices.
  *
  * # Target functions
- * `jl_dispatch_fvars_idxs`: [static data] Target specific functions indices.
+ * `jl_dispatch_fvars_idxs`: [static data] Target-specific function indices.
  *     For each target, this includes a tagged `uint32_t` length, an optional `uint32_t` index
  *     of the base target followed by an array of tagged function indices.
  *     The base target index is required to be smaller than the index of the current target
@@ -74,25 +77,25 @@ extern "C" {
  *     the base one since this is the only way we currently represent relocations.)
  *     A tagged length implicitly tags all the indices and the indices will not have the tag bit
  *     set. The lengths in this variable is needed to decode `jl_dispatch_fvars_offsets`.
- * `jl_dispatch_fvars_offsets`: [static data] Target specific function pointer offsets.
- *     This contains all the cloned functions that we are interested and it needs to be decoded
+ * `jl_dispatch_fvars_offsets`: [static data] Target-specific function pointer offsets.
+ *     This contains all the cloned functions that we are interested in and it needs to be decoded
  *     and used along with `jl_dispatch_fvars_idxs`.
  *     For the default target, there's no entries in this variable, if there's any relocations
  *     needed for the default target, the function pointers are taken from the global offset
  *     arrays directly.
  *     For a `clone_all` target (i.e. with the length in `jl_dispatch_fvars_idxs` tagged), this
- *     variable contains an offset array the same length as the global one. Only the indices
- *     appeared in `jl_dispatch_fvars_idxs` needs relocation and the dispatch code should return
+ *     variable contains an offset array of the same length as the global one. Only the indices
+ *     appearing in `jl_dispatch_fvars_idxs` need relocation and the dispatch code should return
  *     this array as the original/base function offsets.
  *     For other targets, this variable contains an offset array with the length defined in
- *     `jl_dispatch_fvars_idxs`. Tagged indices needs relocations.
+ *     `jl_dispatch_fvars_idxs`. Tagged indices need relocations.
  */
 
 enum {
     JL_TARGET_VEC_CALL = 1 << 0,
     // Clone all functions
     JL_TARGET_CLONE_ALL = 1 << 1,
-    // Clone when there's scalar math operations that can benefit from target specific
+    // Clone when there's scalar math operations that can benefit from target-specific
     // optimizations. This includes `muladd`, `fma`, `fast`/`contract` flags.
     JL_TARGET_CLONE_MATH = 1 << 2,
     // Clone when the function has a loop
@@ -107,6 +110,8 @@ enum {
     JL_TARGET_OPTSIZE = 1 << 6,
     // Only optimize for size for this target
     JL_TARGET_MINSIZE = 1 << 7,
+    // Clone when the function queries CPU features
+    JL_TARGET_CLONE_CPU = 1 << 8,
 };
 
 #define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) JL_FEATURE_DEF(name, bit, llvmver)
@@ -166,6 +171,10 @@ JL_DLLEXPORT jl_value_t *jl_get_cpu_name(void);
 // For debugging only
 JL_DLLEXPORT void jl_dump_host_cpu(void);
 
+JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero);
+JL_DLLEXPORT int32_t jl_get_zero_subnormals(void);
+JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault);
+JL_DLLEXPORT int32_t jl_get_default_nans(void);
 #ifdef __cplusplus
 }
 
@@ -173,7 +182,7 @@ JL_DLLEXPORT void jl_dump_host_cpu(void);
 #include <string>
 #include <vector>
 
-extern bool jl_processor_print_help;
+extern JL_DLLEXPORT bool jl_processor_print_help;
 
 /**
  * Returns the CPU name and feature string to be used by LLVM JIT.
@@ -181,14 +190,14 @@ extern bool jl_processor_print_help;
  * If the detected/specified CPU name is not available on the LLVM version specified,
  * a fallback CPU name will be used. Unsupported features will be ignored.
  */
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags);
+extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags);
 
 /**
  * Returns the CPU name and feature string to be used by LLVM disassembler.
  *
  * This will return a generic CPU name and a full feature string.
  */
-const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void);
+extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void);
 
 struct jl_target_spec_t {
     // LLVM target name
@@ -205,8 +214,9 @@ struct jl_target_spec_t {
 /**
  * Return the list of targets to clone
  */
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void);
+extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void);
 std::string jl_get_cpu_name_llvm(void);
 std::string jl_get_cpu_features_llvm(void);
-std::string jl_format_filename(llvm::StringRef output_pattern);
+#endif
+
 #endif
diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp
index 0b5b959097727b..ea8dddf629d62a 100644
--- a/src/processor_arm.cpp
+++ b/src/processor_arm.cpp
@@ -19,6 +19,9 @@
 #    undef USE_DYN_GETAUXVAL
 #    include <sys/auxv.h>
 #  endif
+#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_
+#include <sys/sysctl.h>
+#include <string.h>
 #endif
 
 namespace ARM {
@@ -89,7 +92,8 @@ enum class CPU : uint32_t {
     arm_cortex_x1,
     arm_neoverse_e1,
     arm_neoverse_n1,
-    arm_zeus,
+    arm_neoverse_v1,
+    arm_neoverse_n2,
 
     // Cavium
     // aarch64
@@ -159,6 +163,8 @@ enum class CPU : uint32_t {
     apple_a11,
     apple_a12,
     apple_a13,
+    apple_a14,
+    apple_m1,
     apple_s4,
     apple_s5,
 
@@ -214,7 +220,7 @@ static constexpr FeatureDep deps[] = {
     {ccdp, ccpp},
     {sve, fullfp16},
     {fp16fml, fullfp16},
-    {altnzcv, fmi},
+    {altnzcv, flagm},
     {sve2, sve},
     {sve2_aes, sve2},
     {sve2_aes, aes},
@@ -236,9 +242,10 @@ constexpr auto armv8_2a = armv8_1a | get_feature_masks(v8_2a, ccpp);
 constexpr auto armv8_2a_crypto = armv8_2a | get_feature_masks(aes, sha2);
 constexpr auto armv8_3a = armv8_2a | get_feature_masks(v8_3a, jsconv, complxnum, rcpc);
 constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
-constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, fmi);
+constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
 constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
 constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
+constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2);
 constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
 
 // For ARM cores, the features required can be found in the technical reference manual
@@ -255,7 +262,7 @@ constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
 //     .SM4: sm4
 //     .DP: dotprod
 //     .FHM: fp16fml
-//     .TS: fmi, altnzcz
+//     .TS: flagm, altnzcz
 //     .RNDR: rand
 
 // ID_AA64ISAR1_EL1
@@ -277,7 +284,9 @@ constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);
 //     .DIT: dit
 //     .BT: bti
 
-// ID_AA64PFR1_EL1.SSBS: ssbs
+// ID_AA64PFR1_EL1
+//     .SSBS: ssbs
+//     .MTE: mte
 
 // ID_AA64MMFR2_EL1.AT: uscat
 
@@ -305,7 +314,9 @@ constexpr auto arm_cortex_a78 = armv8_2a | get_feature_masks(dotprod, rcpc, full
 constexpr auto arm_cortex_x1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs); // spe
 constexpr auto arm_neoverse_e1 = armv8_2a | get_feature_masks(rcpc, fullfp16, ssbs);
 constexpr auto arm_neoverse_n1 = armv8_2a | get_feature_masks(dotprod, rcpc, fullfp16, ssbs);
-constexpr auto arm_zeus = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
+constexpr auto arm_neoverse_v1 = armv8_4a | get_feature_masks(sve, i8mm, bf16, fullfp16, ssbs, rand);
+constexpr auto arm_neoverse_n2 = armv8_5a | get_feature_masks(sve, i8mm, bf16, fullfp16, sve2,
+                                                              sve2_bitperm, rand, mte);
 constexpr auto cavium_thunderx = armv8a_crc_crypto;
 constexpr auto cavium_thunderx88 = armv8a_crc_crypto;
 constexpr auto cavium_thunderx88p1 = armv8a_crc_crypto;
@@ -337,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
 constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
 constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
 constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
+constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
+constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
+// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
+// and sysctl -a hw.optional
 constexpr auto apple_s4 = apple_a12;
 constexpr auto apple_s5 = apple_a12;
 
@@ -360,14 +375,15 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"cortex-a72", CPU::arm_cortex_a72, CPU::generic, 0, Feature::arm_cortex_a72},
     {"cortex-a73", CPU::arm_cortex_a73, CPU::generic, 0, Feature::arm_cortex_a73},
     {"cortex-a75", CPU::arm_cortex_a75, CPU::generic, 0, Feature::arm_cortex_a75},
-    {"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
-    {"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
+    {"cortex-a76", CPU::arm_cortex_a76, CPU::generic, 0, Feature::arm_cortex_a76},
+    {"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::generic, 0, Feature::arm_cortex_a76},
     {"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
     {"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
     {"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
     {"neoverse-e1", CPU::arm_neoverse_e1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_e1},
     {"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
-    {"zeus", CPU::arm_zeus, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_zeus},
+    {"neoverse-v1", CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
+    {"neoverse-n2", CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
     {"thunderx", CPU::cavium_thunderx, CPU::generic, 0, Feature::cavium_thunderx},
     {"thunderxt88", CPU::cavium_thunderx88, CPU::generic, 0, Feature::cavium_thunderx88},
     {"thunderxt88p1", CPU::cavium_thunderx88p1, CPU::cavium_thunderx88, UINT32_MAX,
@@ -414,6 +430,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11},
     {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
     {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
+    {"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
+    {"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
     {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
     {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
     {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
@@ -558,6 +576,8 @@ constexpr auto arm_cortex_a77 = armv8_2a;
 constexpr auto arm_cortex_a78 = armv8_2a;
 constexpr auto arm_cortex_x1 = armv8_2a;
 constexpr auto arm_neoverse_n1 = armv8_2a;
+constexpr auto arm_neoverse_v1 = armv8_4a;
+constexpr auto arm_neoverse_n2 = armv8_5a;
 constexpr auto nvidia_denver1 = armv8a; // TODO? (crc, crypto)
 constexpr auto nvidia_denver2 = armv8a_crc_crypto;
 constexpr auto apm_xgene1 = armv8a;
@@ -634,12 +654,14 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"cortex-a72", CPU::arm_cortex_a72, CPU::generic, 0, Feature::arm_cortex_a72},
     {"cortex-a73", CPU::arm_cortex_a73, CPU::generic, 0, Feature::arm_cortex_a73},
     {"cortex-a75", CPU::arm_cortex_a75, CPU::generic, 0, Feature::arm_cortex_a75},
-    {"cortex-a76", CPU::arm_cortex_a76, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
-    {"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::arm_cortex_a75, 90000, Feature::arm_cortex_a76},
+    {"cortex-a76", CPU::arm_cortex_a76, CPU::generic, 0, Feature::arm_cortex_a76},
+    {"cortex-a76ae", CPU::arm_cortex_a76ae, CPU::generic, 0, Feature::arm_cortex_a76},
     {"cortex-a77", CPU::arm_cortex_a77, CPU::arm_cortex_a76, 110000, Feature::arm_cortex_a77},
     {"cortex-a78", CPU::arm_cortex_a78, CPU::arm_cortex_a77, 110000, Feature::arm_cortex_a78},
     {"cortex-x1", CPU::arm_cortex_x1, CPU::arm_cortex_a78, 110000, Feature::arm_cortex_x1},
     {"neoverse-n1", CPU::arm_neoverse_n1, CPU::arm_cortex_a76, 100000, Feature::arm_neoverse_n1},
+    {"neoverse-v1", CPU::arm_neoverse_v1, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_v1},
+    {"neoverse-n2", CPU::arm_neoverse_n2, CPU::arm_neoverse_n1, UINT32_MAX, Feature::arm_neoverse_n2},
     {"denver1", CPU::nvidia_denver1, CPU::arm_cortex_a53, UINT32_MAX, Feature::nvidia_denver1},
     {"denver2", CPU::nvidia_denver2, CPU::arm_cortex_a57, UINT32_MAX, Feature::nvidia_denver2},
     {"xgene1", CPU::apm_xgene1, CPU::armv8_a, UINT32_MAX, Feature::apm_xgene1},
@@ -652,13 +674,47 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2},
     {"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3},
     {"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4},
-    {"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000,
-     Feature::samsung_exynos_m5},
+    {"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5},
     {"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7},
 };
 #endif
 static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
 
+static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
+{
+    return ::find_cpu(cpu, cpus, ncpu_names);
+}
+
+static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
+{
+    return ::find_cpu(name, cpus, ncpu_names);
+}
+
+static inline const char *find_cpu_name(uint32_t cpu)
+{
+    return ::find_cpu_name(cpu, cpus, ncpu_names);
+}
+
+#if defined _CPU_AARCH64_ && defined _OS_DARWIN_
+
+static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
+{
+    char buffer[128];
+    size_t bufferlen = 128;
+    sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);
+
+    if(strcmp(buffer,"Apple M1") == 0)
+        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
+    else if(strcmp(buffer,"Apple M1 Max") == 0)
+        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
+    else if(strcmp(buffer,"Apple M1 Pro") == 0)
+        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
+    else
+        return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
+}
+
+#else
+
 // auxval reader
 
 #ifndef AT_HWCAP
@@ -809,7 +865,7 @@ static std::set<CPUID> get_cpuinfo(void)
 static CPU get_cpu_name(CPUID cpuid)
 {
     switch (cpuid.implementer) {
-    case 0x41: // ARM
+    case 0x41: // 'A': ARM
         switch (cpuid.part) {
         case 0xb02: return CPU::arm_mpcore;
         case 0xb36: return CPU::arm_1136jf_s;
@@ -849,20 +905,22 @@ static CPU get_cpu_name(CPUID cpuid)
         case 0xd20: return CPU::arm_cortex_m23;
         case 0xd21: return CPU::arm_cortex_m33;
             // case 0xd22: return CPU::arm_cortex_m55;
-        case 0xd40: return CPU::arm_zeus;
+        case 0xd40: return CPU::arm_neoverse_v1;
         case 0xd41: return CPU::arm_cortex_a78;
         case 0xd43: return CPU::arm_cortex_a65ae;
         case 0xd44: return CPU::arm_cortex_x1;
+        case 0xd49: return CPU::arm_neoverse_n2;
         case 0xd4a: return CPU::arm_neoverse_e1;
         default: return CPU::generic;
         }
-    case 0x42: // Broadcom (Cavium)
+    case 0x42: // 'B': Broadcom (Cavium)
         switch (cpuid.part) {
+            // case 0x00f: return CPU::broadcom_brahma_b15;
             // case 0x100: return CPU::broadcom_brahma_b53;
         case 0x516: return CPU::cavium_thunderx2t99p1;
         default: return CPU::generic;
         }
-    case 0x43: // Cavium
+    case 0x43: // 'C': Cavium
         switch (cpuid.part) {
         case 0xa0: return CPU::cavium_thunderx;
         case 0xa1:
@@ -881,31 +939,32 @@ static CPU get_cpu_name(CPUID cpuid)
         case 0xb8: return CPU::marvell_thunderx3t110;
         default: return CPU::generic;
         }
-    case 0x46: // Fujitsu
+    case 0x46: // 'F': Fujitsu
         switch (cpuid.part) {
         case 0x1: return CPU::fujitsu_a64fx;
         default: return CPU::generic;
         }
-    case 0x48: // HiSilicon
+    case 0x48: // 'H': HiSilicon
         switch (cpuid.part) {
         case 0xd01: return CPU::hisilicon_tsv110;
+        case 0xd40: return CPU::arm_cortex_a76; // Kirin 980
         default: return CPU::generic;
         }
-    case 0x4e: // NVIDIA
+    case 0x4e: // 'N': NVIDIA
         switch (cpuid.part) {
         case 0x000: return CPU::nvidia_denver1;
         case 0x003: return CPU::nvidia_denver2;
         case 0x004: return CPU::nvidia_carmel;
         default: return CPU::generic;
         }
-    case 0x50: // AppliedMicro
+    case 0x50: // 'P': AppliedMicro
         // x-gene 2
         // x-gene 3
         switch (cpuid.part) {
         case 0x000: return CPU::apm_xgene1;
         default: return CPU::generic;
         }
-    case 0x51: // Qualcomm
+    case 0x51: // 'Q': Qualcomm
         switch (cpuid.part) {
         case 0x00f:
         case 0x02d:
@@ -913,42 +972,55 @@ static CPU get_cpu_name(CPUID cpuid)
         case 0x04d:
         case 0x06f:
             return CPU::qualcomm_krait;
-        case 0x201:
-        case 0x205:
-        case 0x211:
+        case 0x201: // silver
+        case 0x205: // gold
+        case 0x211: // silver
             return CPU::qualcomm_kyro;
-        case 0x800:
-        case 0x801:
-        case 0x802:
-        case 0x803:
-        case 0x804:
-        case 0x805:
-            return CPU::arm_cortex_a73; // second-generation Kryo
+            // kryo 2xx
+        case 0x800: // gold
+            return CPU::arm_cortex_a73;
+        case 0x801: // silver
+            return CPU::arm_cortex_a53;
+            // kryo 3xx
+        case 0x802: // gold
+            return CPU::arm_cortex_a75;
+        case 0x803: // silver
+            return CPU::arm_cortex_a55;
+            // kryo 4xx
+        case 0x804: // gold
+            return CPU::arm_cortex_a76;
+        case 0x805: // silver
+            return CPU::arm_cortex_a55;
+            // kryo 5xx seems to be using ID for cortex-a77 directly
         case 0xc00:
             return CPU::qualcomm_falkor;
         case 0xc01:
             return CPU::qualcomm_saphira;
         default: return CPU::generic;
         }
-    case 0x53: // Samsung
-        if (cpuid.part == 1)
+    case 0x53: // 'S': Samsung
+        if (cpuid.part == 1) {
+            if (cpuid.variant == 4)
+                return CPU::samsung_exynos_m2;
             return CPU::samsung_exynos_m1;
+        }
         if (cpuid.variant != 1)
             return CPU::generic;
         switch (cpuid.part) {
         case 0x2: return CPU::samsung_exynos_m3;
         case 0x3: return CPU::samsung_exynos_m4;
+        case 0x4: return CPU::samsung_exynos_m5;
         default: return CPU::generic;
         }
-    case 0x56: // Marvell
+    case 0x56: // 'V': Marvell
         switch (cpuid.part) {
         case 0x581:
         case 0x584:
             return CPU::marvell_pj4;
         default: return CPU::generic;
         }
-    case 0x61: // Apple
-        // https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
+    case 0x61: // 'a': Apple
+        // https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
         switch (cpuid.part) {
         case 0x0: // Swift
             return CPU::apple_swift;
@@ -976,14 +1048,20 @@ static CPU get_cpu_name(CPUID cpuid)
         case 0x12: // Lightning
         case 0x13: // Thunder
             return CPU::apple_a13;
+        case 0x20: // Icestorm
+        case 0x21: // Firestorm
+            return CPU::apple_a14;
+        case 0x22: // Icestorm m1
+        case 0x23: // Firestorm m1
+            return CPU::apple_m1;
         default: return CPU::generic;
         }
-    case 0x68: // Huaxintong Semiconductor
+    case 0x68: // 'h': Huaxintong Semiconductor
         switch (cpuid.part) {
         case 0x0: return CPU::hxt_phecda;
         default: return CPU::generic;
         }
-    case 0x69: // Intel
+    case 0x69: // 'i': Intel
         switch (cpuid.part) {
         case 0x001: return CPU::intel_3735d;
         default: return CPU::generic;
@@ -993,6 +1071,9 @@ static CPU get_cpu_name(CPUID cpuid)
     }
 }
 
+
+
+
 namespace {
 
 struct arm_arch {
@@ -1036,21 +1117,6 @@ static arm_arch get_elf_arch(void)
 #endif
 }
 
-static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
-{
-    return ::find_cpu(cpu, cpus, ncpu_names);
-}
-
-static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
-{
-    return ::find_cpu(name, cpus, ncpu_names);
-}
-
-static inline const char *find_cpu_name(uint32_t cpu)
-{
-    return ::find_cpu_name(cpu, cpus, ncpu_names);
-}
-
 static arm_arch feature_arch_version(const FeatureList<feature_sz> &feature)
 {
 #ifdef _CPU_AARCH64_
@@ -1165,7 +1231,7 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
     features[1] = (uint32_t)jl_getauxval(AT_HWCAP2);
 #ifdef _CPU_AARCH64_
     if (test_nbit(features, 31)) // HWCAP_PACG
-        set_bit(features, Feature::pa, true);
+        set_bit(features, Feature::pauth, true);
 #endif
     auto cpuinfo = get_cpuinfo();
     auto arch = get_elf_arch();
@@ -1241,6 +1307,8 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
         CPU::arm_cortex_a75,
         CPU::arm_cortex_a76,
         CPU::arm_neoverse_n1,
+        CPU::arm_neoverse_n2,
+        CPU::arm_neoverse_v1,
         CPU::nvidia_denver2,
         CPU::nvidia_carmel,
         CPU::samsung_exynos_m1,
@@ -1275,9 +1343,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
     }
     // Ignore feature bits that we are not interested in.
     mask_features(feature_masks, &features[0]);
-
     return std::make_pair(cpu, features);
 }
+#endif
 
 static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu()
 {
@@ -1327,6 +1395,8 @@ static inline const char *normalize_cpu_name(llvm::StringRef name)
 {
     if (name == "ares")
         return "neoverse-n1";
+    if (name == "zeus")
+        return "neoverse-v1";
     if (name == "cyclone")
         return "apple-a7";
     if (name == "typhoon")
@@ -1389,7 +1459,7 @@ static inline void enable_depends(FeatureList<n> &features)
     if (test_nbit(features, Feature::v8_4a)) {
         set_bit(features, Feature::dit, true);
         set_bit(features, Feature::rcpc_immo, true);
-        set_bit(features, Feature::fmi, true);
+        set_bit(features, Feature::flagm, true);
     }
     if (test_nbit(features, Feature::v8_5a)) {
         set_bit(features, Feature::sb, true);
@@ -1532,6 +1602,8 @@ static void ensure_jit_target(bool imaging)
         auto &t = jit_targets[i];
         if (t.en.flags & JL_TARGET_CLONE_ALL)
             continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
         // The most useful one in general...
         t.en.flags |= JL_TARGET_CLONE_LOOP;
 #ifdef _CPU_ARM_
@@ -1586,17 +1658,6 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
         const char *fename_str = fename.name;
         bool enable = test_nbit(features, fename.bit);
         bool disable = test_nbit(data.dis.features, fename.bit);
-#if defined(_CPU_ARM_) && JL_LLVM_VERSION < 90000
-        if (fename.bit == Feature::d32) {
-            if (enable) {
-                feature_strs.push_back("-d16");
-            }
-            else if (disable) {
-                feature_strs.push_back("+d16");
-            }
-            continue;
-        }
-#endif
         if (enable) {
             feature_strs.insert(feature_strs.begin(), std::string("+") + fename_str);
         }
@@ -1604,10 +1665,8 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
             feature_strs.push_back(std::string("-") + fename_str);
         }
     }
-#if JL_LLVM_VERSION >= 110000
     if (test_nbit(features, Feature::v8_6a))
         feature_strs.push_back("+v8.6a");
-#endif
     if (test_nbit(features, Feature::v8_5a))
         feature_strs.push_back("+v8.5a");
     if (test_nbit(features, Feature::v8_4a))
@@ -1741,13 +1800,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     auto max_feature = get_max_feature();
     static const auto res = get_llvm_target_str(TargetData<feature_sz>{host_cpu_name(),
 #ifdef _CPU_AARCH64_
-#  if JL_LLVM_VERSION > 110000
-                "+ecv,"
-#  endif
-#  if JL_LLVM_VERSION > 100000
-                "+tme,"
-#  endif
-                "+am,+specrestrict,+predres,+mte,+lor,+perfmon,+spe,+tracev8.4",
+                "+ecv,+tme,+am,+specrestrict,+predres,+lor,+perfmon,+spe,+tracev8.4",
 #else
                 "+dotprod",
 #endif
@@ -1789,17 +1842,21 @@ extern "C" int jl_test_cpu_feature(jl_cpu_feature_t feature)
 }
 
 #ifdef _CPU_AARCH64_
-// FZ, bit [24]
-static constexpr uint32_t fpcr_fz_mask = 1 << 24;
-
-static inline uint32_t get_fpcr_aarch64(void)
+// FPCR FZ, bit [24]
+static constexpr uint64_t fpcr_fz_mask = 1 << 24;
+// FPCR FZ16, bit [19]
+static constexpr uint64_t fpcr_fz16_mask = 1 << 19;
+// FPCR DN, bit [25]
+static constexpr uint64_t fpcr_dn_mask = 1 << 25;
+
+static inline uint64_t get_fpcr_aarch64(void)
 {
-    uint32_t fpcr;
+    uint64_t fpcr;
     asm volatile("mrs %0, fpcr" : "=r"(fpcr));
     return fpcr;
 }
 
-static inline void set_fpcr_aarch64(uint32_t fpcr)
+static inline void set_fpcr_aarch64(uint64_t fpcr)
 {
     asm volatile("msr fpcr, %0" :: "r"(fpcr));
 }
@@ -1811,8 +1868,22 @@ extern "C" JL_DLLEXPORT int32_t jl_get_zero_subnormals(void)
 
 extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
 {
-    uint32_t fpcr = get_fpcr_aarch64();
-    fpcr = isZero ? (fpcr | fpcr_fz_mask) : (fpcr & ~fpcr_fz_mask);
+    uint64_t fpcr = get_fpcr_aarch64();
+    static uint64_t mask = fpcr_fz_mask | (jl_test_cpu_feature(JL_AArch64_fullfp16) ? fpcr_fz16_mask : 0);
+    fpcr = isZero ? (fpcr | mask) : (fpcr & ~mask);
+    set_fpcr_aarch64(fpcr);
+    return 0;
+}
+
+extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
+{
+    return (get_fpcr_aarch64() & fpcr_dn_mask) != 0;
+}
+
+extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
+{
+    uint64_t fpcr = get_fpcr_aarch64();
+    fpcr = isDefault ? (fpcr | fpcr_dn_mask) : (fpcr & ~fpcr_dn_mask);
     set_fpcr_aarch64(fpcr);
     return 0;
 }
@@ -1826,4 +1897,14 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
 {
     return isZero;
 }
+
+extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
+{
+    return 0;
+}
+
+extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
+{
+    return isDefault;
+}
 #endif
diff --git a/src/processor_fallback.cpp b/src/processor_fallback.cpp
index 416f1dd211a2b4..1f314eb460f0f2 100644
--- a/src/processor_fallback.cpp
+++ b/src/processor_fallback.cpp
@@ -47,7 +47,6 @@ static uint32_t sysimg_init_cb(const void *id)
             best_idx = i;
         }
     }
-    target = sysimg[best_idx];
     jit_targets.push_back(std::move(target));
     return best_idx;
 }
@@ -118,7 +117,7 @@ const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+extern "C" std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
@@ -160,3 +159,13 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
 {
     return isZero;
 }
+
+extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
+{
+    return 0;
+}
+
+extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
+{
+    return isDefault;
+}
diff --git a/src/processor_x86.cpp b/src/processor_x86.cpp
index cca7d7722f15e4..f18c7069fa2c2c 100644
--- a/src/processor_x86.cpp
+++ b/src/processor_x86.cpp
@@ -6,9 +6,6 @@
 
 extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
 {
-#if defined _MSC_VER
-    __cpuid(CPUInfo, InfoType);
-#else
     asm volatile (
 #if defined(__i386__) && defined(__PIC__)
         "xchg %%ebx, %%esi;"
@@ -24,14 +21,10 @@ extern "C" JL_DLLEXPORT void jl_cpuid(int32_t CPUInfo[4], int32_t InfoType)
         "=d" (CPUInfo[3]) :
         "a" (InfoType)
         );
-#endif
 }
 
 extern "C" JL_DLLEXPORT void jl_cpuidex(int32_t CPUInfo[4], int32_t InfoType, int32_t subInfoType)
 {
-#if defined _MSC_VER
-    __cpuidex(CPUInfo, InfoType, subInfoType);
-#else
     asm volatile (
 #if defined(__i386__) && defined(__PIC__)
         "xchg %%ebx, %%esi;"
@@ -48,7 +41,6 @@ extern "C" JL_DLLEXPORT void jl_cpuidex(int32_t CPUInfo[4], int32_t InfoType, in
         "a" (InfoType),
         "c" (subInfoType)
         );
-#endif
 }
 
 namespace X86 {
@@ -79,6 +71,7 @@ enum class CPU : uint32_t {
     intel_corei7_icelake_client,
     intel_corei7_icelake_server,
     intel_corei7_tigerlake,
+    intel_corei7_sapphirerapids,
     intel_knights_landing,
     intel_knights_mill,
 
@@ -209,6 +202,9 @@ constexpr auto icelake = cannonlake | get_feature_masks(avx512bitalg, vaes, avx5
 constexpr auto icelake_server = icelake | get_feature_masks(pconfig, wbnoinvd);
 constexpr auto tigerlake = icelake | get_feature_masks(avx512vp2intersect, movdiri,
                                                        movdir64b, shstk);
+constexpr auto sapphirerapids = icelake_server |
+    get_feature_masks(amx_tile, amx_int8, amx_bf16, avx512bf16, serialize, cldemote, waitpkg,
+                      ptwrite, tsxldtrk, enqcmd, shstk, avx512vp2intersect, movdiri, movdir64b);
 
 constexpr auto k8_sse3 = get_feature_masks(sse3, cx16);
 constexpr auto amdfam10 = k8_sse3 | get_feature_masks(sse4a, lzcnt, popcnt, sahf);
@@ -252,14 +248,15 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"knm", CPU::intel_knights_mill, CPU::generic, 0, Feature::knm},
     {"skylake-avx512", CPU::intel_corei7_skylake_avx512, CPU::generic, 0, Feature::skx},
     {"cascadelake", CPU::intel_corei7_cascadelake, CPU::generic, 0, Feature::cascadelake},
-    {"cooperlake", CPU::intel_corei7_cooperlake, CPU::intel_corei7_cascadelake,
-     90000, Feature::cooperlake},
+    {"cooperlake", CPU::intel_corei7_cooperlake, CPU::generic, 0, Feature::cooperlake},
     {"cannonlake", CPU::intel_corei7_cannonlake, CPU::generic, 0, Feature::cannonlake},
     {"icelake-client", CPU::intel_corei7_icelake_client, CPU::generic, 0, Feature::icelake},
     {"icelake-server", CPU::intel_corei7_icelake_server, CPU::generic, 0,
      Feature::icelake_server},
     {"tigerlake", CPU::intel_corei7_tigerlake, CPU::intel_corei7_icelake_client, 100000,
      Feature::tigerlake},
+    {"sapphirerapids", CPU::intel_corei7_sapphirerapids, CPU::intel_corei7_icelake_server, 120000,
+     Feature::sapphirerapids},
 
     {"athlon64", CPU::amd_athlon_64, CPU::generic, 0, Feature::generic},
     {"athlon-fx", CPU::amd_athlon_fx, CPU::generic, 0, Feature::generic},
@@ -282,7 +279,7 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
     {"bdver4", CPU::amd_bdver4, CPU::generic, 0, Feature::bdver4},
 
     {"znver1", CPU::amd_znver1, CPU::generic, 0, Feature::znver1},
-    {"znver2", CPU::amd_znver2, CPU::amd_znver1, 90000, Feature::znver2},
+    {"znver2", CPU::amd_znver2, CPU::generic, 0, Feature::znver2},
 };
 static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);
 
@@ -293,13 +290,9 @@ const int SIG_AMD = 0x68747541; // Auth
 
 static uint64_t get_xcr0(void)
 {
-#if defined _MSC_VER
-    return _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
-#else
     uint32_t eax, edx;
     asm volatile ("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
     return (uint64_t(edx) << 32) | eax;
-#endif
 }
 
 static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t brand_id,
@@ -419,6 +412,10 @@ static CPU get_intel_processor_name(uint32_t family, uint32_t model, uint32_t br
         case 0x8d:
             return CPU::intel_corei7_tigerlake;
 
+            // Sapphire Rapids
+        case 0x8f:
+            return CPU::intel_corei7_sapphirerapids;
+
         case 0x1c: // Most 45 nm Intel Atom processors
         case 0x26: // 45 nm Atom Lincroft
         case 0x27: // 32 nm Atom Medfield
@@ -880,6 +877,8 @@ static void ensure_jit_target(bool imaging)
         auto &t = jit_targets[i];
         if (t.en.flags & JL_TARGET_CLONE_ALL)
             continue;
+        // Always clone when code checks CPU features
+        t.en.flags |= JL_TARGET_CLONE_CPU;
         // The most useful one in general...
         t.en.flags |= JL_TARGET_CLONE_LOOP;
         auto &features0 = jit_targets[t.base].en.features;
@@ -959,9 +958,7 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
     // This can happen with virtualization.
     features.push_back("+64bit");
 #endif
-#if JL_LLVM_VERSION >= 90000
     features.push_back("+cx8");
-#endif
     return std::make_pair(std::move(name), std::move(features));
 }
 
@@ -1004,21 +1001,21 @@ jl_sysimg_fptrs_t jl_init_processor_sysimg(void *hdl)
     return parse_sysimg(hdl, sysimg_init_cb);
 }
 
-std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
+extern "C" JL_DLLEXPORT std::pair<std::string,std::vector<std::string>> jl_get_llvm_target(bool imaging, uint32_t &flags)
 {
     ensure_jit_target(imaging);
     flags = jit_targets[0].en.flags;
     return get_llvm_target_vec(jit_targets[0]);
 }
 
-const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
+extern "C" JL_DLLEXPORT const std::pair<std::string,std::string> &jl_get_llvm_disasm_target(void)
 {
     static const auto res = get_llvm_target_str(TargetData<feature_sz>{"generic", "",
             {feature_masks, 0}, {{}, 0}, 0});
     return res;
 }
 
-std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
+extern "C" JL_DLLEXPORT std::vector<jl_target_spec_t> jl_get_llvm_clone_targets(void)
 {
     if (jit_targets.empty())
         jl_error("JIT targets not initialized");
@@ -1095,3 +1092,14 @@ extern "C" JL_DLLEXPORT int32_t jl_set_zero_subnormals(int8_t isZero)
         return isZero;
     }
 }
+
+// X86 does not support default NaNs
+extern "C" JL_DLLEXPORT int32_t jl_get_default_nans(void)
+{
+    return 0;
+}
+
+extern "C" JL_DLLEXPORT int32_t jl_set_default_nans(int8_t isDefault)
+{
+    return isDefault;
+}
diff --git a/src/rtutils.c b/src/rtutils.c
index c4ed21a78347bb..f3a2e745ed651a 100644
--- a/src/rtutils.c
+++ b/src/rtutils.c
@@ -132,6 +132,14 @@ JL_DLLEXPORT void JL_NORETURN jl_undefined_var_error(jl_sym_t *var)
     jl_throw(jl_new_struct(jl_undefvarerror_type, var));
 }
 
+JL_DLLEXPORT void JL_NORETURN jl_atomic_error(char *str) // == jl_exceptionf(jl_atomicerror_type, "%s", str)
+{
+    jl_value_t *msg = jl_pchar_to_string((char*)str, strlen(str));
+    JL_GC_PUSH1(&msg);
+    jl_throw(jl_new_struct(jl_atomicerror_type, msg));
+}
+
+
 JL_DLLEXPORT void JL_NORETURN jl_bounds_error(jl_value_t *v, jl_value_t *t)
 {
     JL_GC_PUSH2(&v, &t); // root arguments so the caller doesn't need to
@@ -206,23 +214,33 @@ JL_DLLEXPORT void jl_typeassert(jl_value_t *x, jl_value_t *t)
         jl_type_error("typeassert", t, x);
 }
 
+#ifndef HAVE_SSP
+JL_DLLEXPORT uintptr_t __stack_chk_guard = (uintptr_t)0xBAD57ACCBAD67ACC; // 0xBADSTACKBADSTACK
+
+JL_DLLEXPORT void __stack_chk_fail(void)
+{
+    /* put your panic function or similar in here */
+    fprintf(stderr, "fatal error: stack corruption detected\n");
+    jl_gc_debug_critical_error();
+    abort(); // end with abort, since the compiler destroyed the stack upon entry to this function, there's no going back now
+}
+#endif
+
 // exceptions -----------------------------------------------------------------
 
 JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_task_t *current_task = ptls->current_task;
+    jl_task_t *ct = jl_current_task;
     // Must have no safepoint
-    eh->prev = current_task->eh;
-    eh->gcstack = ptls->pgcstack;
-    eh->gc_state = ptls->gc_state;
-    eh->locks_len = ptls->locks.len;
-    eh->defer_signal = ptls->defer_signal;
-    eh->finalizers_inhibited = ptls->finalizers_inhibited;
-    eh->world_age = ptls->world_age;
-    current_task->eh = eh;
+    eh->prev = ct->eh;
+    eh->gcstack = ct->gcstack;
+    eh->gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    eh->locks_len = ct->ptls->locks.len;
+    eh->defer_signal = ct->ptls->defer_signal;
+    eh->world_age = ct->world_age;
+    ct->eh = eh;
 #ifdef ENABLE_TIMINGS
-    eh->timing_stack = current_task->timing_stack;
+    eh->timing_stack = ct->ptls->timing_stack;
 #endif
 }
 
@@ -233,47 +251,50 @@ JL_DLLEXPORT void jl_enter_handler(jl_handler_t *eh)
 //   there's additional cleanup required, eg pushing the exception stack.
 JL_DLLEXPORT void jl_eh_restore_state(jl_handler_t *eh)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
 #ifdef _OS_WINDOWS_
-    if (ptls->needs_resetstkoflw) {
+    if (ct->ptls->needs_resetstkoflw) {
         _resetstkoflw();
-        ptls->needs_resetstkoflw = 0;
+        ct->ptls->needs_resetstkoflw = 0;
     }
 #endif
-    jl_task_t *current_task = ptls->current_task;
-    // `eh` may be not equal to `ptls->current_task->eh`. See `jl_pop_handler`
+    // `eh` may be not equal to `ct->eh`. See `jl_pop_handler`
     // This function should **NOT** have any safepoint before the ones at the
     // end.
-    sig_atomic_t old_defer_signal = ptls->defer_signal;
-    int8_t old_gc_state = ptls->gc_state;
-    current_task->eh = eh->prev;
-    ptls->pgcstack = eh->gcstack;
-    small_arraylist_t *locks = &ptls->locks;
-    if (locks->len > eh->locks_len) {
-        for (size_t i = locks->len;i > eh->locks_len;i--)
+    sig_atomic_t old_defer_signal = ct->ptls->defer_signal;
+    int8_t old_gc_state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
+    ct->eh = eh->prev;
+    ct->gcstack = eh->gcstack;
+    small_arraylist_t *locks = &ct->ptls->locks;
+    int unlocks = locks->len > eh->locks_len;
+    if (unlocks) {
+        for (size_t i = locks->len; i > eh->locks_len; i--)
             jl_mutex_unlock_nogc((jl_mutex_t*)locks->items[i - 1]);
         locks->len = eh->locks_len;
     }
-    ptls->world_age = eh->world_age;
-    ptls->defer_signal = eh->defer_signal;
-    ptls->finalizers_inhibited = eh->finalizers_inhibited;
+    ct->world_age = eh->world_age;
+    ct->ptls->defer_signal = eh->defer_signal;
     if (old_gc_state != eh->gc_state) {
-        jl_atomic_store_release(&ptls->gc_state, eh->gc_state);
+        jl_atomic_store_release(&ct->ptls->gc_state, eh->gc_state);
         if (old_gc_state) {
-            jl_gc_safepoint_(ptls);
+            jl_gc_safepoint_(ct->ptls);
         }
     }
     if (old_defer_signal && !eh->defer_signal) {
-        jl_sigint_safepoint(ptls);
+        jl_sigint_safepoint(ct->ptls);
+    }
+    if (jl_atomic_load_relaxed(&jl_gc_have_pending_finalizers) &&
+            unlocks && eh->locks_len == 0) {
+        jl_gc_run_pending_finalizers(ct);
     }
 }
 
 JL_DLLEXPORT void jl_pop_handler(int n)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (__unlikely(n <= 0))
         return;
-    jl_handler_t *eh = ptls->current_task->eh;
+    jl_handler_t *eh = ct->eh;
     while (--n > 0)
         eh = eh->prev;
     jl_eh_restore_state(eh);
@@ -281,15 +302,15 @@ JL_DLLEXPORT void jl_pop_handler(int n)
 
 JL_DLLEXPORT size_t jl_excstack_state(void) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_excstack_t *s = ptls->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *s = ct->excstack;
     return s ? s->top : 0;
 }
 
 JL_DLLEXPORT void jl_restore_excstack(size_t state) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_excstack_t *s = ptls->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *s = ct->excstack;
     if (s) {
         assert(s->top >= state);
         s->top = state;
@@ -310,7 +331,8 @@ static void jl_reserve_excstack(jl_excstack_t **stack JL_REQUIRE_ROOTED_SLOT,
     if (s && s->reserved_size >= reserved_size)
         return;
     size_t bufsz = sizeof(jl_excstack_t) + sizeof(uintptr_t)*reserved_size;
-    jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(jl_get_ptls_states(), bufsz);
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *new_s = (jl_excstack_t*)jl_gc_alloc_buf(ct->ptls, bufsz);
     new_s->top = 0;
     new_s->reserved_size = reserved_size;
     if (s)
@@ -352,15 +374,17 @@ JL_DLLEXPORT jl_value_t *jl_value_ptr(jl_value_t *a)
 JL_DLLEXPORT void jl_set_nth_field(jl_value_t *v, size_t idx0, jl_value_t *rhs)
 {
     jl_datatype_t *st = (jl_datatype_t*)jl_typeof(v);
-    if (!st->mutabl)
-        jl_errorf("setfield! immutable struct of type %s cannot be changed", jl_symbol_name(st->name->name));
+    if (!st->name->mutabl)
+        jl_errorf("setfield!: immutable struct of type %s cannot be changed", jl_symbol_name(st->name->name));
     if (idx0 >= jl_datatype_nfields(st))
         jl_bounds_error_int(v, idx0 + 1);
     //jl_value_t *ft = jl_field_type(st, idx0);
     //if (!jl_isa(rhs, ft)) {
     //    jl_type_error("setfield!", ft, rhs);
     //}
-    set_nth_field(st, (void*)v, idx0, rhs);
+    //int isatomic = jl_field_isatomic(st, idx0);
+    //if (isatomic) ...
+    set_nth_field(st, v, idx0, rhs, 0);
 }
 
 
@@ -507,7 +531,7 @@ JL_DLLEXPORT jl_value_t *jl_stdout_obj(void) JL_NOTSAFEPOINT
     if (jl_base_module == NULL)
         return NULL;
     jl_binding_t *stdout_obj = jl_get_module_binding(jl_base_module, jl_symbol("stdout"));
-    return stdout_obj ? stdout_obj->value : NULL;
+    return stdout_obj ? jl_atomic_load_relaxed(&stdout_obj->value) : NULL;
 }
 
 JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
@@ -515,7 +539,7 @@ JL_DLLEXPORT jl_value_t *jl_stderr_obj(void) JL_NOTSAFEPOINT
     if (jl_base_module == NULL)
         return NULL;
     jl_binding_t *stderr_obj = jl_get_module_binding(jl_base_module, jl_symbol("stderr"));
-    return stderr_obj ? stderr_obj->value : NULL;
+    return stderr_obj ? jl_atomic_load_relaxed(&stderr_obj->value) : NULL;
 }
 
 // toys for debugging ---------------------------------------------------------
@@ -604,6 +628,75 @@ JL_DLLEXPORT jl_value_t *jl_argument_datatype(jl_value_t *argt JL_PROPAGATES_ROO
     return (jl_value_t*)dt;
 }
 
+static int is_globname_binding(jl_value_t *v, jl_datatype_t *dv) JL_NOTSAFEPOINT
+{
+    jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
+    if (globname && dv->name->module && jl_binding_resolved_p(dv->name->module, globname)) {
+        jl_binding_t *b = jl_get_module_binding(dv->name->module, globname);
+        if (b && b->constp) {
+            jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
+            // The `||` makes this function work for both function instances and function types.
+            if (bv == v || jl_typeof(bv) == v)
+                return 1;
+        }
+    }
+    return 0;
+}
+
+static int is_globfunction(jl_value_t *v, jl_datatype_t *dv, jl_sym_t **globname_out) JL_NOTSAFEPOINT
+{
+    jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
+    *globname_out = globname;
+    if (globname && !strchr(jl_symbol_name(globname), '#') && !strchr(jl_symbol_name(globname), '@')) {
+        return 1;
+    }
+    return 0;
+}
+
+static size_t jl_static_show_x_sym_escaped(JL_STREAM *out, jl_sym_t *name) JL_NOTSAFEPOINT
+{
+    size_t n = 0;
+
+    char *sn = jl_symbol_name(name);
+    int hidden = 0;
+    if (!(jl_is_identifier(sn) || jl_is_operator(sn))) {
+        hidden = 1;
+    }
+
+    if (hidden) {
+        n += jl_printf(out, "var\"");
+    }
+    n += jl_printf(out, "%s", sn);
+    if (hidden) {
+        n += jl_printf(out, "\"");
+    }
+    return n;
+}
+
+// `jl_static_show()` cannot call `jl_subtype()`, for the GC reasons
+// explained in the comment on `jl_static_show_x_()`, below.
+// This function checks if `vt <: Function` without triggering GC.
+static int jl_static_is_function_(jl_datatype_t *vt) JL_NOTSAFEPOINT {
+    if (!jl_function_type) {  // Make sure there's a Function type defined.
+        return 0;
+    }
+    int _iter_count = 0;  // To prevent infinite loops from corrupt type objects.
+    while (vt != jl_any_type) {
+        if (vt == NULL) {
+            return 0;
+        } else if (_iter_count > 10000) {
+            // We are very likely stuck in a cyclic datastructure, so we assume this is
+            // _not_ a Function.
+            return 0;
+        } else if (vt == jl_function_type) {
+            return 1;
+        }
+        vt = vt->super;
+        _iter_count += 1;
+    }
+    return 0;
+}
+
 // `v` might be pointing to a field inlined in a structure therefore
 // `jl_typeof(v)` may not be the same with `vt` and only `vt` should be
 // used to determine the type of the value.
@@ -669,54 +762,80 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         // avoid printing `typeof(Type)` for `UnionAll`.
         n += jl_printf(out, "UnionAll");
     }
+    else if (vt == jl_vararg_type) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        n += jl_printf(out, "Vararg");
+        if (vm->T) {
+            n += jl_printf(out, "{");
+            n += jl_static_show_x(out, vm->T, depth);
+            if (vm->N) {
+                n += jl_printf(out, ", ");
+                n += jl_static_show_x(out, vm->N, depth);
+            }
+            n += jl_printf(out, "}");
+        }
+    }
     else if (vt == jl_datatype_type) {
+        // typeof(v) == DataType, so v is a Type object.
+        // Types are printed as a fully qualified name, with parameters, e.g.
+        // `Base.Set{Int}`, and function types are printed as e.g. `typeof(Main.f)`
         jl_datatype_t *dv = (jl_datatype_t*)v;
-        jl_sym_t *globname = dv->name->mt != NULL ? dv->name->mt->name : NULL;
-        int globfunc = 0;
-        if (globname && !strchr(jl_symbol_name(globname), '#') &&
-            !strchr(jl_symbol_name(globname), '@') && dv->name->module &&
-            jl_binding_resolved_p(dv->name->module, globname)) {
-            jl_binding_t *b = jl_get_module_binding(dv->name->module, globname);
-            if (b && b->value && jl_typeof(b->value) == v)
-                globfunc = 1;
-        }
+        jl_sym_t *globname;
+        int globfunc = is_globname_binding(v, dv) && is_globfunction(v, dv, &globname);
         jl_sym_t *sym = globfunc ? globname : dv->name->name;
         char *sn = jl_symbol_name(sym);
-        int hidden = !globfunc && strchr(sn, '#');
-        size_t i = 0;
-        int quote = 0;
-        if (hidden) {
-            n += jl_printf(out, "getfield(");
+        size_t quote = 0;
+        if (dv->name == jl_tuple_typename) {
+            if (dv == jl_tuple_type)
+                return jl_printf(out, "Tuple");
+            int taillen = 1, tlen = jl_nparams(dv), i;
+            for (i = tlen-2; i >= 0; i--) {
+                if (jl_tparam(dv, i) == jl_tparam(dv, tlen-1))
+                    taillen++;
+                else
+                    break;
+            }
+            if (taillen == tlen && taillen > 3) {
+                n += jl_printf(out, "NTuple{%d, ", tlen);
+                n += jl_static_show_x(out, jl_tparam0(dv), depth);
+                n += jl_printf(out, "}");
+            }
+            else {
+                n += jl_printf(out, "Tuple{");
+                for (i = 0; i < (taillen > 3 ? tlen-taillen : tlen); i++) {
+                    if (i > 0)
+                        n += jl_printf(out, ", ");
+                    n += jl_static_show_x(out, jl_tparam(dv, i), depth);
+                }
+                if (taillen > 3) {
+                    n += jl_printf(out, ", Vararg{");
+                    n += jl_static_show_x(out, jl_tparam(dv, tlen-1), depth);
+                    n += jl_printf(out, ", %d}", taillen);
+                }
+                n += jl_printf(out, "}");
+            }
+            return n;
         }
-        else if (globfunc) {
+        if (globfunc) {
             n += jl_printf(out, "typeof(");
         }
         if (jl_core_module && (dv->name->module != jl_core_module || !jl_module_exports_p(jl_core_module, sym))) {
             n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth);
-            if (!hidden) {
-                n += jl_printf(out, ".");
-                if (globfunc && !jl_id_start_char(u8_nextchar(sn, &i))) {
-                    n += jl_printf(out, ":(");
-                    quote = 1;
-                }
+            n += jl_printf(out, ".");
+            size_t i = 0;
+            if (globfunc && !jl_id_start_char(u8_nextchar(sn, &i))) {
+                n += jl_printf(out, ":(");
+                quote = 1;
             }
         }
-        if (hidden) {
-            n += jl_printf(out, ", Symbol(\"");
-            n += jl_printf(out, "%s", sn);
-            n += jl_printf(out, "\"))");
-        }
-        else {
-            n += jl_printf(out, "%s", sn);
-            if (globfunc) {
+        n += jl_static_show_x_sym_escaped(out, sym);
+        if (globfunc) {
+            n += jl_printf(out, ")");
+            if (quote) {
                 n += jl_printf(out, ")");
-                if (quote)
-                    n += jl_printf(out, ")");
             }
         }
-        if (dv->parameters && (jl_value_t*)dv != dv->name->wrapper &&
-            (jl_has_free_typevars(v) ||
-             (jl_value_t*)dv != (jl_value_t*)jl_tuple_type)) {
+        if (dv->parameters && (jl_value_t*)dv != dv->name->wrapper) {
             size_t j, tlen = jl_nparams(dv);
             if (tlen > 0) {
                 n += jl_printf(out, "{");
@@ -728,9 +847,6 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 }
                 n += jl_printf(out, "}");
             }
-            else if (dv->name == jl_tuple_typename) {
-                n += jl_printf(out, "{}");
-            }
         }
     }
     else if (vt == jl_intrinsic_type) {
@@ -832,7 +948,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
                 n += jl_printf(out, ")");
             n += jl_printf(out, "<:");
         }
-        n += jl_printf(out, "%s", jl_symbol_name(var->name));
+        n += jl_static_show_x_sym_escaped(out, var->name);
         if (showbounds && (ub != (jl_value_t*)jl_any_type || lb != jl_bottom_type)) {
             // show type-var upper bound if it is defined, or if we showed the lower bound
             int ua = jl_is_unionall(ub);
@@ -903,7 +1019,7 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
     }
     else if (vt == jl_expr_type) {
         jl_expr_t *e = (jl_expr_t*)v;
-        if (e->head == assign_sym && jl_array_len(e->args) == 2) {
+        if (e->head == jl_assign_sym && jl_array_len(e->args) == 2) {
             n += jl_static_show_x(out, jl_exprarg(e,0), depth);
             n += jl_printf(out, " = ");
             n += jl_static_show_x(out, jl_exprarg(e,1), depth);
@@ -932,12 +1048,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_printf(out, ")}[");
         size_t j, tlen = jl_array_len(v);
         jl_array_t *av = (jl_array_t*)v;
-        jl_datatype_t *el_type = (jl_datatype_t*)jl_tparam0(vt);
+        jl_value_t *el_type = jl_tparam0(vt);
+        char *typetagdata = (!av->flags.ptrarray && jl_is_uniontype(el_type)) ? jl_array_typetagdata(av) : NULL;
         int nlsep = 0;
         if (av->flags.ptrarray) {
             // print arrays with newlines, unless the elements are probably small
             for (j = 0; j < tlen; j++) {
-                jl_value_t *p = jl_array_ptr_ref(av, j);
+                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                jl_value_t *p = *ptr;
                 if (p != NULL && (uintptr_t)p >= 4096U) {
                     jl_value_t *p_ty = jl_typeof(p);
                     if ((uintptr_t)p_ty >= 4096U) {
@@ -953,11 +1071,14 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             n += jl_printf(out, "\n  ");
         for (j = 0; j < tlen; j++) {
             if (av->flags.ptrarray) {
-                n += jl_static_show_x(out, jl_array_ptr_ref(v, j), depth);
+                jl_value_t **ptr = ((jl_value_t**)av->data) + j;
+                n += jl_static_show_x(out, *ptr, depth);
             }
             else {
                 char *ptr = ((char*)av->data) + j * av->elsize;
-                n += jl_static_show_x_(out, (jl_value_t*)ptr, el_type, depth);
+                n += jl_static_show_x_(out, (jl_value_t*)ptr,
+                        typetagdata ? (jl_datatype_t*)jl_nth_union_component(el_type, typetagdata[j]) : (jl_datatype_t*)el_type,
+                        depth);
             }
             if (j != tlen - 1)
                 n += jl_printf(out, nlsep ? ",\n  " : ", ");
@@ -978,7 +1099,36 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
         n += jl_static_show_x(out, *(jl_value_t**)v, depth);
         n += jl_printf(out, ")");
     }
+    else if (jl_static_is_function_(vt) && is_globname_binding(v, (jl_datatype_t*)vt)) {
+        // v is function instance (an instance of a Function type).
+        jl_datatype_t *dv = (jl_datatype_t*)vt;
+        jl_sym_t *sym;
+        int globfunc = is_globfunction(v, dv, &sym);
+        int quote = 0;
+        if (jl_core_module && (dv->name->module != jl_core_module || !jl_module_exports_p(jl_core_module, sym))) {
+            n += jl_static_show_x(out, (jl_value_t*)dv->name->module, depth);
+            n += jl_printf(out, ".");
+
+            size_t i = 0;
+            char *sn = jl_symbol_name(sym);
+            if (globfunc && !jl_id_start_char(u8_nextchar(sn, &i))) {
+                n += jl_printf(out, ":(");
+                quote = 1;
+            }
+        }
+
+        n += jl_static_show_x_sym_escaped(out, sym);
+
+        if (globfunc) {
+            if (quote) {
+                n += jl_printf(out, ")");
+            }
+        }
+    }
     else if (jl_datatype_type && jl_is_datatype(vt)) {
+        // typeof(v) isa DataType, so v is an *instance of* a type that is a Datatype,
+        // meaning v is e.g. an instance of a struct. These are printed as a call to a
+        // type constructor, such as e.g. `Base.UnitRange{Int64}(start=1, stop=2)`
         int istuple = jl_is_tuple_type(vt), isnamedtuple = jl_is_namedtuple_type(vt);
         size_t tlen = jl_datatype_nfields(vt);
         if (isnamedtuple) {
@@ -1000,10 +1150,11 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             size_t i = 0;
             if (vt == jl_typemap_entry_type)
                 i = 1;
+            jl_value_t *names = isnamedtuple ? jl_tparam0(vt) : (jl_value_t*)jl_field_names(vt);
             for (; i < tlen; i++) {
                 if (!istuple) {
-                    n += jl_printf(out, "%s", jl_symbol_name(jl_field_name(vt, i)));
-                    n += jl_printf(out, "=");
+                    jl_value_t *fname = isnamedtuple ? jl_fieldref_noalloc(names, i) : jl_svecref(names, i);
+                    n += jl_printf(out, "%s=", jl_symbol_name((jl_sym_t*)fname));
                 }
                 size_t offs = jl_field_offset(vt, i);
                 char *fld_ptr = (char*)v + offs;
@@ -1025,7 +1176,8 @@ static size_t jl_static_show_x_(JL_STREAM *out, jl_value_t *v, jl_datatype_t *vt
             }
             if (vt == jl_typemap_entry_type) {
                 n += jl_printf(out, ", next=↩︎\n  ");
-                n += jl_static_show_next_(out, (jl_value_t*)((jl_typemap_entry_t*)v)->next, v, depth);
+                jl_value_t *next = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)v)->next);
+                n += jl_static_show_next_(out, next, v, depth);
             }
         }
         n += jl_printf(out, ")");
@@ -1074,12 +1226,12 @@ static size_t jl_static_show_next_(JL_STREAM *out, jl_value_t *v, jl_value_t *pr
                 }
                 // verify that we aren't trying to follow a circular list
                 // by following the list again, and ensuring this is the only link to next
-                jl_value_t *mnext = (jl_value_t*)((jl_typemap_entry_t*)m)->next;
+                jl_value_t *mnext = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)m)->next);
                 jl_value_t *m2 = p->v;
                 if (m2 == mnext)
                     break;
                 while (m2 && jl_typeis(m2, jl_typemap_entry_type)) {
-                    jl_value_t *mnext2 = (jl_value_t*)((jl_typemap_entry_t*)m2)->next;
+                    jl_value_t *mnext2 = (jl_value_t*)jl_atomic_load_relaxed(&((jl_typemap_entry_t*)m2)->next);
                     if (mnext2 == mnext) {
                         if (m2 != m)
                             mnext = NULL;
@@ -1106,55 +1258,71 @@ JL_DLLEXPORT size_t jl_static_show(JL_STREAM *out, jl_value_t *v) JL_NOTSAFEPOIN
 
 JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_NOTSAFEPOINT
 {
+    size_t n = 0;
+    size_t i;
     jl_value_t *ftype = (jl_value_t*)jl_first_argument_datatype(type);
     if (ftype == NULL)
         return jl_static_show(s, type);
-    size_t n = 0;
+    jl_unionall_t *tvars = (jl_unionall_t*)type;
+    int nvars = jl_subtype_env_size(type);
+    struct recur_list *depth = NULL;
+    if (nvars > 0)  {
+        depth = (struct recur_list*)alloca(sizeof(struct recur_list) * nvars);
+        for (i = 0; i < nvars; i++) {
+            depth[i].prev = i == 0 ? NULL : &depth[i - 1];
+            depth[i].v = type;
+            type = ((jl_unionall_t*)type)->body;
+        }
+        depth += nvars - 1;
+    }
+    if (!jl_is_datatype(type)) {
+        n += jl_static_show(s, type);
+        return n;
+    }
     if (jl_nparams(ftype) == 0 || ftype == ((jl_datatype_t*)ftype)->name->wrapper) {
         n += jl_printf(s, "%s", jl_symbol_name(((jl_datatype_t*)ftype)->name->mt->name));
     }
     else {
         n += jl_printf(s, "(::");
-        n += jl_static_show(s, ftype);
+        n += jl_static_show_x(s, ftype, depth);
         n += jl_printf(s, ")");
     }
-    jl_unionall_t *tvars = (jl_unionall_t*)type;
-    type = jl_unwrap_unionall(type);
-    if (!jl_is_datatype(type)) {
-        n += jl_printf(s, " ");
-        n += jl_static_show(s, type);
-        return n;
-    }
     size_t tl = jl_nparams(type);
     n += jl_printf(s, "(");
-    size_t i;
     for (i = 1; i < tl; i++) {
         jl_value_t *tp = jl_tparam(type, i);
         if (i != tl - 1) {
-            n += jl_static_show(s, tp);
+            n += jl_static_show_x(s, tp, depth);
             n += jl_printf(s, ", ");
         }
         else {
-            if (jl_is_vararg_type(tp)) {
-                n += jl_static_show(s, jl_unwrap_vararg(tp));
+            if (jl_vararg_kind(tp) == JL_VARARG_UNBOUND) {
+                tp = jl_unwrap_vararg(tp);
+                if (jl_is_unionall(tp))
+                    n += jl_printf(s, "(");
+                n += jl_static_show_x(s, tp, depth);
+                if (jl_is_unionall(tp))
+                    n += jl_printf(s, ")");
                 n += jl_printf(s, "...");
             }
             else {
-                n += jl_static_show(s, tp);
+                n += jl_static_show_x(s, tp, depth);
             }
         }
     }
     n += jl_printf(s, ")");
     if (jl_is_unionall(tvars)) {
+        depth -= nvars - 1;
         int first = 1;
         n += jl_printf(s, " where {");
         while (jl_is_unionall(tvars)) {
-            if (first)
-                first = 0;
-            else
+            if (!first)
                 n += jl_printf(s, ", ");
-            n += jl_static_show(s, (jl_value_t*)tvars->var);
+            n += jl_static_show_x(s, (jl_value_t*)tvars->var, first ? NULL : depth);
             tvars = (jl_unionall_t*)tvars->body;
+            if (!first)
+                depth += 1;
+            first = 0;
         }
         n += jl_printf(s, "}");
     }
@@ -1163,10 +1331,9 @@ JL_DLLEXPORT size_t jl_static_show_func_sig(JL_STREAM *s, jl_value_t *type) JL_N
 
 JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     if (!jl_setjmp(buf, 0)) {
         jl_static_show((JL_STREAM*)STDERR_FILENO, (jl_value_t*)jl_value);
         jl_printf((JL_STREAM*)STDERR_FILENO,"\n");
@@ -1174,7 +1341,7 @@ JL_DLLEXPORT void jl_(void *jl_value) JL_NOTSAFEPOINT
     else {
         jl_printf((JL_STREAM*)STDERR_FILENO, "\n!!! ERROR in jl_ -- ABORTING !!!\n");
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 }
 
 JL_DLLEXPORT void jl_breakpoint(jl_value_t *v)
@@ -1182,6 +1349,11 @@ JL_DLLEXPORT void jl_breakpoint(jl_value_t *v)
     // put a breakpoint in your debugger here
 }
 
+JL_DLLEXPORT void jl_test_failure_breakpoint(jl_value_t *v)
+{
+    // put a breakpoint in your debugger here
+}
+
 // logging tools --------------------------------------------------------------
 
 void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
@@ -1239,27 +1411,6 @@ void jl_log(int level, jl_value_t *module, jl_value_t *group, jl_value_t *id,
     JL_GC_POP();
 }
 
-#if 0
-void jl_depwarn(const char *msg, jl_value_t *sym)
-{
-    static jl_value_t *depwarn_func = NULL;
-    if (!depwarn_func && jl_base_module) {
-        depwarn_func = jl_get_global(jl_base_module, jl_symbol("depwarn"));
-    }
-    if (!depwarn_func) {
-        jl_safe_printf("WARNING: %s\n", msg);
-        return;
-    }
-    jl_value_t **depwarn_args;
-    JL_GC_PUSHARGS(depwarn_args, 3);
-    depwarn_args[0] = depwarn_func;
-    depwarn_args[1] = jl_cstr_to_string(msg);
-    depwarn_args[2] = sym;
-    jl_apply(depwarn_args, 3);
-    JL_GC_POP();
-}
-#endif
-
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp
index b9b3ed4dea41ab..e3543c9f626565 100644
--- a/src/runtime_ccall.cpp
+++ b/src/runtime_ccall.cpp
@@ -27,34 +27,34 @@ using namespace llvm;
 static std::map<std::string, void*> libMap;
 static jl_mutex_t libmap_lock;
 extern "C"
-void *jl_get_library_(const char *f_lib, int throw_err) JL_NOTSAFEPOINT
+void *jl_get_library_(const char *f_lib, int throw_err)
 {
-    void *hnd;
+    if (f_lib == NULL)
+        return jl_RTLD_DEFAULT_handle;
 #ifdef _OS_WINDOWS_
     if (f_lib == JL_EXE_LIBNAME)
         return jl_exe_handle;
-    if (f_lib == JL_DL_LIBNAME)
-        return jl_dl_handle;
+    if (f_lib == JL_LIBJULIA_INTERNAL_DL_LIBNAME)
+        return jl_libjulia_internal_handle;
+    if (f_lib == JL_LIBJULIA_DL_LIBNAME)
+        return jl_libjulia_handle;
 #endif
-    if (f_lib == NULL)
-        return jl_RTLD_DEFAULT_handle;
-    JL_LOCK_NOGC(&libmap_lock);
+    JL_LOCK(&libmap_lock);
     // This is the only operation we do on the map, which doesn't invalidate
     // any references or iterators.
     void **map_slot = &libMap[f_lib];
-    JL_UNLOCK_NOGC(&libmap_lock);
-    hnd = jl_atomic_load_acquire(map_slot);
-    if (hnd != NULL)
-        return hnd;
-    // We might run this concurrently on two threads but it doesn't matter.
-    hnd = jl_load_dynamic_library(f_lib, JL_RTLD_DEFAULT, throw_err);
-    if (hnd != NULL)
-        jl_atomic_store_release(map_slot, hnd);
+    void *hnd = *map_slot;
+    if (hnd == NULL) {
+        hnd = jl_load_dynamic_library(f_lib, JL_RTLD_DEFAULT, throw_err);
+        if (hnd != NULL)
+            *map_slot = hnd;
+    }
+    JL_UNLOCK(&libmap_lock);
     return hnd;
 }
 
 extern "C" JL_DLLEXPORT
-void *jl_load_and_lookup(const char *f_lib, const char *f_name, void **hnd) JL_NOTSAFEPOINT
+void *jl_load_and_lookup(const char *f_lib, const char *f_name, _Atomic(void*) *hnd)
 {
     void *handle = jl_atomic_load_acquire(hnd);
     if (!handle)
@@ -157,7 +157,7 @@ std::string jl_format_filename(StringRef output_pattern)
             }
             switch (c) {
             case 'p':
-                outfile << jl_getpid();
+                outfile << uv_os_getpid();
                 break;
             case 'd':
                 if (got_pwd)
@@ -208,9 +208,11 @@ extern "C" JL_DLLEXPORT char *jl_format_filename(const char *output_pattern)
 }
 
 
+static uv_mutex_t trampoline_lock; // for accesses to the cache and freelist
+
 static void *trampoline_freelist;
 
-static void *trampoline_alloc()
+static void *trampoline_alloc() JL_NOTSAFEPOINT // lock taken by caller
 {
     const int sz = 64; // oversized for most platforms. todo: use precise value?
     if (!trampoline_freelist) {
@@ -231,6 +233,7 @@ static void *trampoline_alloc()
 #endif
         errno = last_errno;
         void *next = NULL;
+        assert(sz < jl_page_size);
         for (size_t i = 0; i + sz <= jl_page_size; i += sz) {
             void **curr = (void**)((char*)mem + i);
             *curr = next;
@@ -243,13 +246,13 @@ static void *trampoline_alloc()
     return tramp;
 }
 
-static void trampoline_free(void *tramp)
+static void trampoline_free(void *tramp) JL_NOTSAFEPOINT    // lock taken by caller
 {
     *(void**)tramp = trampoline_freelist;
     trampoline_freelist = tramp;
 }
 
-static void trampoline_deleter(void **f)
+static void trampoline_deleter(void **f) JL_NOTSAFEPOINT
 {
     void *tramp = f[0];
     void *fobj = f[1];
@@ -258,17 +261,20 @@ static void trampoline_deleter(void **f)
     f[0] = NULL;
     f[2] = NULL;
     f[3] = NULL;
+    uv_mutex_lock(&trampoline_lock);
     if (tramp)
         trampoline_free(tramp);
     if (fobj && cache)
         ptrhash_remove((htable_t*)cache, fobj);
     if (nval)
         free(nval);
+    uv_mutex_unlock(&trampoline_lock);
 }
 
+typedef void *(*init_trampoline_t)(void *tramp, void **nval) JL_NOTSAFEPOINT;
+
 // Use of `cache` is not clobbered in JL_TRY
 JL_GCC_IGNORE_START("-Wclobbered")
-// TODO: need a thread lock around the cache access parts of this function
 extern "C" JL_DLLEXPORT
 jl_value_t *jl_get_cfunction_trampoline(
     // dynamic inputs:
@@ -277,11 +283,12 @@ jl_value_t *jl_get_cfunction_trampoline(
     // call-site constants:
     htable_t *cache, // weakref htable indexed by (fobj, vals)
     jl_svec_t *fill,
-    void *(*init_trampoline)(void *tramp, void **nval),
+    init_trampoline_t init_trampoline,
     jl_unionall_t *env,
     jl_value_t **vals)
 {
     // lookup (fobj, vals) in cache
+    uv_mutex_lock(&trampoline_lock);
     if (!cache->table)
         htable_new(cache, 1);
     if (fill != jl_emptysvec) {
@@ -293,6 +300,7 @@ jl_value_t *jl_get_cfunction_trampoline(
         }
     }
     void *tramp = ptrhash_get(cache, (void*)fobj);
+    uv_mutex_unlock(&trampoline_lock);
     if (tramp != HT_NOTFOUND) {
         assert((jl_datatype_t*)jl_typeof(tramp) == result_type);
         return (jl_value_t*)tramp;
@@ -332,11 +340,8 @@ jl_value_t *jl_get_cfunction_trampoline(
             ((void**)result)[1] = (void*)fobj;
         }
         if (!permanent) {
-            void *ptr_finalizer[2] = {
-                    (void*)jl_voidpointer_type,
-                    (void*)&trampoline_deleter
-                };
-            jl_gc_add_finalizer(result, (jl_value_t*)&ptr_finalizer[1]);
+            jl_task_t *ct = jl_current_task;
+            jl_gc_add_ptr_finalizer(ct->ptls, result, (void*)(uintptr_t)&trampoline_deleter);
             ((void**)result)[2] = (void*)cache;
             ((void**)result)[3] = (void*)nval;
         }
@@ -345,10 +350,18 @@ jl_value_t *jl_get_cfunction_trampoline(
         free(nval);
         jl_rethrow();
     }
+    uv_mutex_lock(&trampoline_lock);
     tramp = trampoline_alloc();
     ((void**)result)[0] = tramp;
     tramp = init_trampoline(tramp, nval);
     ptrhash_put(cache, (void*)fobj, result);
+    uv_mutex_unlock(&trampoline_lock);
     return result;
 }
 JL_GCC_IGNORE_STOP
+
+void jl_init_runtime_ccall(void)
+{
+    JL_MUTEX_INIT(&libmap_lock);
+    uv_mutex_init(&trampoline_lock);
+}
diff --git a/src/runtime_intrinsics.c b/src/runtime_intrinsics.c
index 17d9c609110228..89c9449e55920c 100644
--- a/src/runtime_intrinsics.c
+++ b/src/runtime_intrinsics.c
@@ -8,12 +8,225 @@
 //
 // TODO: add half-float support
 
+#include "APInt-C.h"
 #include "julia.h"
 #include "julia_internal.h"
-#include "APInt-C.h"
 
 const unsigned int host_char_bit = 8;
 
+// float16 intrinsics
+// TODO: use LLVM's compiler-rt on all platforms (Xcode already links compiler-rt)
+
+#if !defined(_OS_DARWIN_)
+
+static inline float half_to_float(uint16_t ival) JL_NOTSAFEPOINT
+{
+    uint32_t sign = (ival & 0x8000) >> 15;
+    uint32_t exp = (ival & 0x7c00) >> 10;
+    uint32_t sig = (ival & 0x3ff) >> 0;
+    uint32_t ret;
+
+    if (exp == 0) {
+        if (sig == 0) {
+            sign = sign << 31;
+            ret = sign | exp | sig;
+        }
+        else {
+            int n_bit = 1;
+            uint16_t bit = 0x0200;
+            while ((bit & sig) == 0) {
+                n_bit = n_bit + 1;
+                bit = bit >> 1;
+            }
+            sign = sign << 31;
+            exp = ((-14 - n_bit + 127) << 23);
+            sig = ((sig & (~bit)) << n_bit) << (23 - 10);
+            ret = sign | exp | sig;
+        }
+    }
+    else if (exp == 0x1f) {
+        if (sig == 0) { // Inf
+            if (sign == 0)
+                ret = 0x7f800000;
+            else
+                ret = 0xff800000;
+        }
+        else // NaN
+            ret = 0x7fc00000 | (sign << 31) | (sig << (23 - 10));
+    }
+    else {
+        sign = sign << 31;
+        exp = ((exp - 15 + 127) << 23);
+        sig = sig << (23 - 10);
+        ret = sign | exp | sig;
+    }
+
+    float fret;
+    memcpy(&fret, &ret, sizeof(float));
+    return fret;
+}
+
+// float to half algorithm from:
+//   "Fast Half Float Conversion" by Jeroen van der Zijp
+//   ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+//
+// With adjustments for round-to-nearest, ties to even.
+
+static uint16_t basetable[512] = {
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+    0x0000, 0x0000, 0x0000, 0x0400, 0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000,
+    0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00,
+    0x5000, 0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
+    0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+    0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400,
+    0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000,
+    0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
+    0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
+    0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
+
+static uint8_t shifttable[512] = {
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f,
+    0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x0d, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19,
+    0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
+    0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+    0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+    0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
+
+static inline uint16_t float_to_half(float param) JL_NOTSAFEPOINT
+{
+    uint32_t f;
+    memcpy(&f, &param, sizeof(float));
+    if (isnan(param)) {
+        uint32_t t = 0x8000 ^ (0x8000 & ((uint16_t)(f >> 0x10)));
+        return t ^ ((uint16_t)(f >> 0xd));
+    }
+    int i = ((f & ~0x007fffff) >> 23);
+    uint8_t sh = shifttable[i];
+    f &= 0x007fffff;
+    // If `val` is subnormal, the tables are set up to force the
+    // result to 0, so the significand has an implicit `1` in the
+    // cases we care about.
+    f |= 0x007fffff + 0x1;
+    uint16_t h = (uint16_t)(basetable[i] + ((f >> sh) & 0x03ff));
+    // round
+    // NOTE: we maybe should ignore NaNs here, but the payload is
+    // getting truncated anyway so "rounding" it might not matter
+    int nextbit = (f >> (sh - 1)) & 1;
+    if (nextbit != 0 && (h & 0x7C00) != 0x7C00) {
+        // Round halfway to even or check lower bits
+        if ((h & 1) == 1 || (f & ((1 << (sh - 1)) - 1)) != 0)
+            h += UINT16_C(1);
+    }
+    return h;
+}
+
+JL_DLLEXPORT float __gnu_h2f_ieee(uint16_t param)
+{
+    return half_to_float(param);
+}
+
+JL_DLLEXPORT float __extendhfsf2(uint16_t param)
+{
+    return half_to_float(param);
+}
+
+JL_DLLEXPORT uint16_t __gnu_f2h_ieee(float param)
+{
+    return float_to_half(param);
+}
+
+JL_DLLEXPORT uint16_t __truncdfhf2(double param)
+{
+    float res = (float)param;
+    uint32_t resi;
+    memcpy(&resi, &res, sizeof(res));
+    if ((resi&0x7fffffffu) < 0x38800000u){ // if Float16(res) is subnormal
+        // shift so that the mantissa lines up where it would for normal Float16
+        uint32_t shift = 113u-((resi & 0x7f800000u)>>23u);
+        if (shift<23u) {
+            resi |= 0x00800000; // set implicit bit
+            resi >>= shift;
+        }
+    }
+    if ((resi & 0x1fffu) == 0x1000u) { // if we are halfway between 2 Float16 values
+        memcpy(&resi, &res, sizeof(res));
+        // adjust the value by 1 ULP in the direction that will make Float16(res) give the right answer
+        resi += (fabs(res) < fabs(param)) - (fabs(param) < fabs(res));
+        memcpy(&res, &resi, sizeof(res));
+    }
+    return float_to_half(res);
+}
+
+#endif
+
 // run time version of bitcast intrinsic
 JL_DLLEXPORT jl_value_t *jl_bitcast(jl_value_t *ty, jl_value_t *v)
 {
@@ -43,7 +256,7 @@ JL_DLLEXPORT jl_value_t *jl_pointerref(jl_value_t *p, jl_value_t *i, jl_value_t
         return *pp;
     }
     else {
-        if (!jl_is_datatype(ety))
+        if (!is_valid_intrinsic_elptr(ety))
             jl_error("pointerref: invalid pointer");
         size_t nb = LLT_ALIGN(jl_datatype_size(ety), jl_datatype_align(ety));
         char *pp = (char*)jl_unbox_long(p) + (jl_unbox_long(i)-1)*nb;
@@ -56,25 +269,194 @@ JL_DLLEXPORT jl_value_t *jl_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t
 {
     JL_TYPECHK(pointerset, pointer, p);
     JL_TYPECHK(pointerset, long, i);
-    JL_TYPECHK(pointerref, long, align);
+    JL_TYPECHK(pointerset, long, align);
     jl_value_t *ety = jl_tparam0(jl_typeof(p));
     if (ety == (jl_value_t*)jl_any_type) {
         jl_value_t **pp = (jl_value_t**)(jl_unbox_long(p) + (jl_unbox_long(i)-1)*sizeof(void*));
         *pp = x;
     }
     else {
-        if (!jl_is_datatype(ety))
+        if (!is_valid_intrinsic_elptr(ety))
             jl_error("pointerset: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("pointerset", ety, x);
         size_t elsz = jl_datatype_size(ety);
         size_t nb = LLT_ALIGN(elsz, jl_datatype_align(ety));
         char *pp = (char*)jl_unbox_long(p) + (jl_unbox_long(i)-1)*nb;
-        if (jl_typeof(x) != ety)
-            jl_type_error("pointerset", ety, x);
         memcpy(pp, x, elsz);
     }
     return p;
 }
 
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerref(jl_value_t *p, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerref, pointer, p);
+    JL_TYPECHK(atomic_pointerref, symbol, order)
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 0);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    if (ety == (jl_value_t*)jl_any_type) {
+        return jl_atomic_load((_Atomic(jl_value_t*)*)pp);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerref: invalid pointer");
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerref: invalid pointer for atomic operation");
+        return jl_atomic_new_bits(ety, pp);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerset(jl_value_t *p, jl_value_t *x, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerset, pointer, p);
+    JL_TYPECHK(atomic_pointerset, symbol, order);
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 0, 1);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    if (ety == (jl_value_t*)jl_any_type) {
+        jl_atomic_store((_Atomic(jl_value_t*)*)pp, x);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerset: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("atomic_pointerset", ety, x);
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerset: invalid pointer for atomic operation");
+        jl_atomic_store_bits(pp, x, nb);
+    }
+    return p;
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerswap(jl_value_t *p, jl_value_t *x, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointerswap, pointer, p);
+    JL_TYPECHK(atomic_pointerswap, symbol, order);
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 1);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    jl_value_t *y;
+    char *pp = (char*)jl_unbox_long(p);
+    if (ety == (jl_value_t*)jl_any_type) {
+        y = jl_atomic_exchange((_Atomic(jl_value_t*)*)pp, x);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerswap: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("atomic_pointerswap", ety, x);
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerswap: invalid pointer for atomic operation");
+        y = jl_atomic_swap_bits(ety, pp, x, nb);
+    }
+    return y;
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointermodify(jl_value_t *p, jl_value_t *f, jl_value_t *x, jl_value_t *order)
+{
+    JL_TYPECHK(atomic_pointermodify, pointer, p);
+    JL_TYPECHK(atomic_pointermodify, symbol, order)
+    (void)jl_get_atomic_order_checked((jl_sym_t*)order, 1, 1);
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    jl_value_t *expected;
+    if (ety == (jl_value_t*)jl_any_type) {
+        expected = jl_atomic_load((_Atomic(jl_value_t*)*)pp);
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointermodify: invalid pointer");
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointermodify: invalid pointer for atomic operation");
+        expected = jl_atomic_new_bits(ety, pp);
+    }
+    jl_value_t **args;
+    JL_GC_PUSHARGS(args, 2);
+    args[0] = expected;
+    while (1) {
+        args[1] = x;
+        jl_value_t *y = jl_apply_generic(f, args, 2);
+        args[1] = y;
+        if (ety == (jl_value_t*)jl_any_type) {
+            if (jl_atomic_cmpswap((_Atomic(jl_value_t*)*)pp, &expected, y))
+                break;
+        }
+        else {
+            //if (!is_valid_intrinsic_elptr(ety)) // handled by jl_atomic_pointerref earlier
+            //    jl_error("atomic_pointermodify: invalid pointer");
+            if (jl_typeof(y) != ety)
+                jl_type_error("atomic_pointermodify", ety, y);
+            size_t nb = jl_datatype_size(ety);
+            if (jl_atomic_bool_cmpswap_bits(pp, expected, y, nb))
+                break;
+            expected = jl_atomic_new_bits(ety, pp);
+        }
+        args[0] = expected;
+        jl_gc_safepoint();
+    }
+    // args[0] == expected (old)
+    // args[1] == y (new)
+    jl_datatype_t *rettyp = jl_apply_modify_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    args[0] = jl_new_struct(rettyp, args[0], args[1]);
+    JL_GC_POP();
+    return args[0];
+}
+
+
+JL_DLLEXPORT jl_value_t *jl_atomic_pointerreplace(jl_value_t *p, jl_value_t *expected, jl_value_t *x, jl_value_t *success_order_sym, jl_value_t *failure_order_sym)
+{
+    JL_TYPECHK(atomic_pointerreplace, pointer, p);
+    JL_TYPECHK(atomic_pointerreplace, symbol, success_order_sym);
+    JL_TYPECHK(atomic_pointerreplace, symbol, failure_order_sym);
+    enum jl_memory_order success_order = jl_get_atomic_order_checked((jl_sym_t*)success_order_sym, 1, 1);
+    enum jl_memory_order failure_order = jl_get_atomic_order_checked((jl_sym_t*)failure_order_sym, 1, 0);
+    if (failure_order > success_order)
+        jl_atomic_error("atomic_pointerreplace: invalid atomic ordering");
+    // TODO: filter other invalid orderings
+    jl_value_t *ety = jl_tparam0(jl_typeof(p));
+    char *pp = (char*)jl_unbox_long(p);
+    jl_datatype_t *rettyp = jl_apply_cmpswap_type(ety);
+    JL_GC_PROMISE_ROOTED(rettyp); // (JL_ALWAYS_LEAFTYPE)
+    if (ety == (jl_value_t*)jl_any_type) {
+        jl_value_t *result;
+        JL_GC_PUSH1(&result);
+        result = expected;
+        int success;
+        while (1) {
+            success = jl_atomic_cmpswap((_Atomic(jl_value_t*)*)pp, &result, x);
+            if (success || !jl_egal(result, expected))
+                break;
+        }
+        result = jl_new_struct(rettyp, result, success ? jl_true : jl_false);
+        JL_GC_POP();
+        return result;
+    }
+    else {
+        if (!is_valid_intrinsic_elptr(ety))
+            jl_error("atomic_pointerreplace: invalid pointer");
+        if (jl_typeof(x) != ety)
+            jl_type_error("atomic_pointerreplace", ety, x);
+        size_t nb = jl_datatype_size(ety);
+        if ((nb & (nb - 1)) != 0 || nb > MAX_POINTERATOMIC_SIZE)
+            jl_error("atomic_pointerreplace: invalid pointer for atomic operation");
+        return jl_atomic_cmpswap_bits((jl_datatype_t*)ety, rettyp, pp, expected, x, nb);
+    }
+}
+
+JL_DLLEXPORT jl_value_t *jl_atomic_fence(jl_value_t *order_sym)
+{
+    JL_TYPECHK(fence, symbol, order_sym);
+    enum jl_memory_order order = jl_get_atomic_order_checked((jl_sym_t*)order_sym, 1, 1);
+    if (order > jl_memory_order_monotonic)
+        jl_fence();
+    return jl_nothing;
+}
+
 JL_DLLEXPORT jl_value_t *jl_cglobal(jl_value_t *v, jl_value_t *ty)
 {
     JL_TYPECHK(cglobal, type, ty);
@@ -211,6 +593,20 @@ static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
     OP((c_type*)pr, a); \
 }
 
+#define un_fintrinsic_half(OP, name) \
+static inline void name(unsigned osize, void *pa, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    float A = __gnu_h2f_ieee(a); \
+    if (osize == 16) { \
+        float R; \
+        OP(&R, A); \
+        *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+    } else { \
+        OP((uint16_t*)pr, A); \
+    } \
+    }
+
 // float or integer inputs
 // OP::Function macro(inputa, inputb)
 // name::unique string
@@ -224,6 +620,18 @@ static void jl_##name##nbits(unsigned runtime_nbits, void *pa, void *pb, void *p
     *(c_type*)pr = (c_type)OP(a, b); \
 }
 
+#define bi_intrinsic_half(OP, name) \
+static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = __gnu_h2f_ieee(a); \
+    float B = __gnu_h2f_ieee(b); \
+    runtime_nbits = 16; \
+    float R = OP(A, B); \
+    *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+}
+
 // float or integer inputs, bool output
 // OP::Function macro(inputa, inputb)
 // name::unique string
@@ -237,6 +645,18 @@ static int jl_##name##nbits(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSA
     return OP(a, b); \
 }
 
+#define bool_intrinsic_half(OP, name) \
+static int jl_##name##16(unsigned runtime_nbits, void *pa, void *pb) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    float A = __gnu_h2f_ieee(a); \
+    float B = __gnu_h2f_ieee(b); \
+    runtime_nbits = 16; \
+    return OP(A, B); \
+}
+
+
 // integer inputs, with precondition test
 // OP::Function macro(inputa, inputb)
 // name::unique string
@@ -265,6 +685,20 @@ static void jl_##name##nbits(unsigned runtime_nbits, void *pa, void *pb, void *p
     *(c_type*)pr = (c_type)OP(a, b, c); \
 }
 
+#define ter_intrinsic_half(OP, name) \
+static void jl_##name##16(unsigned runtime_nbits, void *pa, void *pb, void *pc, void *pr) JL_NOTSAFEPOINT \
+{ \
+    uint16_t a = *(uint16_t*)pa; \
+    uint16_t b = *(uint16_t*)pb; \
+    uint16_t c = *(uint16_t*)pc; \
+    float A = __gnu_h2f_ieee(a); \
+    float B = __gnu_h2f_ieee(b); \
+    float C = __gnu_h2f_ieee(c); \
+    runtime_nbits = 16; \
+    float R = OP(A, B, C); \
+    *(uint16_t*)pr = __gnu_f2h_ieee(R); \
+}
+
 
 // unary operator generator //
 
@@ -362,7 +796,7 @@ static inline jl_value_t *jl_intrinsiclambda_ty1(jl_value_t *ty, void *pa, unsig
 
 static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     intrinsic_u1_t op = select_intrinsic_u1(osize2, (const intrinsic_u1_t*)voidlist);
     uint64_t cnt = op(osize * host_char_bit, pa);
     // TODO: the following assume little-endian
@@ -370,7 +804,7 @@ static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsign
     if (osize <= sizeof(cnt)) {
         return jl_new_bits(ty, &cnt);
     }
-    jl_value_t *newv = jl_gc_alloc(ptls, osize, ty);
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, osize, ty);
     // perform zext, if needed
     memset((char*)jl_data_ptr(newv) + sizeof(cnt), 0, osize - sizeof(cnt));
     memcpy(jl_data_ptr(newv), &cnt, sizeof(cnt));
@@ -407,11 +841,12 @@ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const
 // floating point
 
 #define un_fintrinsic_withtype(OP, name) \
+un_fintrinsic_half(OP, jl_##name##16) \
 un_fintrinsic_ctype(OP, jl_##name##32, float) \
 un_fintrinsic_ctype(OP, jl_##name##64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
 { \
-    return jl_fintrinsic_1(ty, a, #name, jl_##name##32, jl_##name##64); \
+    return jl_fintrinsic_1(ty, a, #name, jl_##name##16, jl_##name##32, jl_##name##64); \
 }
 
 #define un_fintrinsic(OP, name) \
@@ -423,19 +858,22 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a) \
 
 typedef void (fintrinsic_op1)(unsigned, void*, void*);
 
-static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
+static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name, fintrinsic_op1 *halfop, fintrinsic_op1 *floatop, fintrinsic_op1 *doubleop)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_primitivetype(jl_typeof(a)))
         jl_errorf("%s: value is not a primitive type", name);
     if (!jl_is_primitivetype(ty))
         jl_errorf("%s: type is not a primitive type", name);
     unsigned sz2 = jl_datatype_size(ty);
-    jl_value_t *newv = jl_gc_alloc(ptls, sz2, ty);
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, sz2, ty);
     void *pa = jl_data_ptr(a), *pr = jl_data_ptr(newv);
     unsigned sz = jl_datatype_size(jl_typeof(a));
     switch (sz) {
     /* choose the right size c-type operation based on the input */
+    case 2:
+        halfop(sz2 * host_char_bit, pa, pr);
+        break;
     case 4:
         floatop(sz2 * host_char_bit, pa, pr);
         break;
@@ -443,7 +881,7 @@ static inline jl_value_t *jl_fintrinsic_1(jl_value_t *ty, jl_value_t *a, const c
         doubleop(sz2 * host_char_bit, pa, pr);
         break;
     default:
-        jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64", name);
+        jl_errorf("%s: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64", name);
     }
     return newv;
 }
@@ -588,9 +1026,9 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v
     params[0] = ty;
     params[1] = (jl_value_t*)jl_bool_type;
     jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
-    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALAWYS_LEAFTYPE)
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_value_t *newv = jl_gc_alloc(ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp);
+    JL_GC_PROMISE_ROOTED(tuptyp); // (JL_ALWAYS_LEAFTYPE)
+    jl_task_t *ct = jl_current_task;
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp);
 
     intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist);
     int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv));
@@ -612,21 +1050,25 @@ static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa
 // floating point
 
 #define bi_fintrinsic(OP, name) \
+    bi_intrinsic_half(OP, name) \
     bi_intrinsic_ctype(OP, name, 32, float) \
     bi_intrinsic_ctype(OP, name, 64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 { \
-    jl_ptls_t ptls = jl_get_ptls_states();\
+    jl_task_t *ct = jl_current_task; \
     jl_value_t *ty = jl_typeof(a); \
     if (jl_typeof(b) != ty) \
         jl_error(#name ": types of a and b must match"); \
     if (!jl_is_primitivetype(ty)) \
         jl_error(#name ": values are not primitive types"); \
     int sz = jl_datatype_size(ty); \
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);          \
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pr = jl_data_ptr(newv); \
     switch (sz) { \
     /* choose the right size c-type operation */ \
+    case 2: \
+        jl_##name##16(16, pa, pb, pr); \
+        break; \
     case 4: \
         jl_##name##32(32, pa, pb, pr); \
         break; \
@@ -634,12 +1076,13 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
         jl_##name##64(64, pa, pb, pr); \
         break; \
     default: \
-        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); \
+        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); \
     } \
     return newv; \
 }
 
 #define bool_fintrinsic(OP, name) \
+    bool_intrinsic_half(OP, name) \
     bool_intrinsic_ctype(OP, name, 32, float) \
     bool_intrinsic_ctype(OP, name, 64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
@@ -654,6 +1097,9 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
     int cmp; \
     switch (sz) { \
     /* choose the right size c-type operation */ \
+    case 2: \
+        cmp = jl_##name##16(16, pa, pb); \
+        break; \
     case 4: \
         cmp = jl_##name##32(32, pa, pb); \
         break; \
@@ -667,21 +1113,25 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b) \
 }
 
 #define ter_fintrinsic(OP, name) \
+    ter_intrinsic_half(OP, name) \
     ter_intrinsic_ctype(OP, name, 32, float) \
     ter_intrinsic_ctype(OP, name, 64, double) \
 JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c) \
 { \
-    jl_ptls_t ptls = jl_get_ptls_states();\
+    jl_task_t *ct = jl_current_task; \
     jl_value_t *ty = jl_typeof(a); \
     if (jl_typeof(b) != ty || jl_typeof(c) != ty) \
         jl_error(#name ": types of a, b, and c must match"); \
     if (!jl_is_primitivetype(ty)) \
         jl_error(#name ": values are not primitive types"); \
-    int sz = jl_datatype_size(ty);                                      \
-    jl_value_t *newv = jl_gc_alloc(ptls, sz, ty);                       \
+    int sz = jl_datatype_size(ty); \
+    jl_value_t *newv = jl_gc_alloc(ct->ptls, sz, ty); \
     void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b), *pc = jl_data_ptr(c), *pr = jl_data_ptr(newv); \
     switch (sz) { \
     /* choose the right size c-type operation */ \
+    case 2: \
+        jl_##name##16(16, pa, pb, pc, pr); \
+        break; \
     case 4: \
         jl_##name##32(32, pa, pb, pc, pr); \
         break; \
@@ -689,7 +1139,7 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *a, jl_value_t *b, jl_value_t *c)
         jl_##name##64(64, pa, pb, pc, pr); \
         break; \
     default: \
-        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64"); \
+        jl_error(#name ": runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64"); \
     } \
     return newv; \
 }
@@ -725,8 +1175,112 @@ bi_fintrinsic(div,div_float)
 bi_fintrinsic(frem,rem_float)
 
 // ternary operators //
+// runtime fma is broken on windows, define julia_fma(f) ourself with fma_emulated as reference.
+#if defined(_OS_WINDOWS_)
+// reinterpret(UInt64, ::Float64)
+uint64_t bitcast_d2u(double d) {
+    uint64_t r;
+    memcpy(&r, &d, 8);
+    return r;
+}
+// reinterpret(Float64, ::UInt64)
+double bitcast_u2d(uint64_t d) {
+    double r;
+    memcpy(&r, &d, 8);
+    return r;
+}
+// Base.splitbits(::Float64)
+void splitbits(double *hi, double *lo, double d) {
+    *hi = bitcast_u2d(bitcast_d2u(d) & 0xfffffffff8000000);
+    *lo = d - *hi;
+}
+// Base.exponent(::Float64)
+int exponent(double a) {
+    int e;
+    frexp(a, &e);
+    return e - 1;
+}
+// Base.fma_emulated(::Float32, ::Float32, ::Float32)
+float julia_fmaf(float a, float b, float c) {
+    double ab, res;
+    ab = (double)a * b;
+    res = ab + (double)c;
+    if ((bitcast_d2u(res) & 0x1fffffff) == 0x10000000){
+        double reslo = fabsf(c) > fabs(ab) ? ab-(res - c) : c-(res - ab);
+        if (reslo != 0)
+            res = nextafter(res, copysign(1.0/0.0, reslo));
+    }
+    return (float)res;
+}
+// Base.twomul(::Float64, ::Float64)
+void two_mul(double *abhi, double *ablo, double a, double b) {
+    double ahi, alo, bhi, blo, blohi, blolo;
+    splitbits(&ahi, &alo, a);
+    splitbits(&bhi, &blo, b);
+    splitbits(&blohi, &blolo, blo);
+    *abhi = a*b;
+    *ablo = alo*blohi - (((*abhi - ahi*bhi) - alo*bhi) - ahi*blo) + blolo*alo;
+}
+// Base.issubnormal(::Float64) (Win32's fpclassify seems broken)
+int issubnormal(double d) {
+    uint64_t y = bitcast_d2u(d);
+    return ((y & 0x7ff0000000000000) == 0) & ((y & 0x000fffffffffffff) != 0);
+}
+#if defined(_WIN32)
+// Win32 needs volatile (avoid over optimization?)
+#define VDOUBLE volatile double
+#else
+#define VDOUBLE double
+#endif
+
+// Base.fma_emulated(::Float64, ::Float64, ::Float64)
+double julia_fma(double a, double b, double c) {
+    double abhi, ablo, r, s;
+    two_mul(&abhi, &ablo, a, b);
+    if (!isfinite(abhi+c) || fabs(abhi) < 2.0041683600089732e-292 ||
+        issubnormal(a) || issubnormal(b)) {
+        int aandbfinite = isfinite(a) && isfinite(b);
+        if (!(aandbfinite && isfinite(c)))
+            return aandbfinite ? c : abhi+c;
+        if (a == 0 || b == 0)
+            return abhi+c;
+        int bias = exponent(a) + exponent(b);
+        VDOUBLE c_denorm = ldexp(c, -bias);
+        if (isfinite(c_denorm)) {
+            if (issubnormal(a))
+                a *= 4.503599627370496e15;
+            if (issubnormal(b))
+                b *= 4.503599627370496e15;
+            a = bitcast_u2d((bitcast_d2u(a) & 0x800fffffffffffff) | 0x3ff0000000000000);
+            b = bitcast_u2d((bitcast_d2u(b) & 0x800fffffffffffff) | 0x3ff0000000000000);
+            c = c_denorm;
+            two_mul(&abhi, &ablo, a, b);
+            r = abhi+c;
+            s = (fabs(abhi) > fabs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo);
+            double sumhi = r+s;
+            if (issubnormal(ldexp(sumhi, bias))) {
+                double sumlo = r-sumhi+s;
+                int bits_lost = -bias-exponent(sumhi)-1022;
+                if ((bits_lost != 1) ^ ((bitcast_d2u(sumhi)&1) == 1))
+                    if (sumlo != 0)
+                        sumhi = nextafter(sumhi, copysign(1.0/0.0, sumlo));
+            }
+            return ldexp(sumhi, bias);
+        }
+        if (isinf(abhi) && signbit(c) == signbit(a*b))
+            return abhi;
+    }
+    r = abhi+c;
+    s = (fabs(abhi) > fabs(c)) ? (abhi-r+c+ablo) : (c-r+abhi+ablo);
+    return r+s;
+}
+#define fma(a, b, c) \
+    sizeof(a) == sizeof(float) ? julia_fmaf(a, b, c) : julia_fma(a, b, c)
+#else // On other systems use fma(f) directly
 #define fma(a, b, c) \
     sizeof(a) == sizeof(float) ? fmaf(a, b, c) : fma(a, b, c)
+#endif
+
 #define muladd(a, b, c) a * b + c
 ter_fintrinsic(fma,fma_float)
 ter_fintrinsic(muladd,muladd_float)
@@ -766,33 +1320,11 @@ fpiseq_n(double, 64)
 #define fpiseq(a,b) \
     sizeof(a) == sizeof(float) ? fpiseq32(a, b) : fpiseq64(a, b)
 
-#define fpislt_n(c_type, nbits)                                         \
-    static inline int fpislt##nbits(c_type a, c_type b) JL_NOTSAFEPOINT \
-    {                                                                   \
-        bits##nbits ua, ub;                                             \
-        ua.f = a;                                                       \
-        ub.f = b;                                                       \
-        if (!isnan(a) && isnan(b))                                      \
-            return 1;                                                   \
-        if (isnan(a) || isnan(b))                                       \
-            return 0;                                                   \
-        if (ua.d >= 0 && ua.d < ub.d)                                   \
-            return 1;                                                   \
-        if (ua.d < 0 && ua.ud > ub.ud)                                  \
-            return 1;                                                   \
-        return 0;                                                       \
-    }
-fpislt_n(float, 32)
-fpislt_n(double, 64)
-#define fpislt(a, b) \
-    sizeof(a) == sizeof(float) ? fpislt32(a, b) : fpislt64(a, b)
-
 bool_fintrinsic(eq,eq_float)
 bool_fintrinsic(ne,ne_float)
 bool_fintrinsic(lt,lt_float)
 bool_fintrinsic(le,le_float)
 bool_fintrinsic(fpiseq,fpiseq)
-bool_fintrinsic(fpislt,fpislt)
 
 // bitwise operators
 #define and_op(a,b) a & b
@@ -834,15 +1366,17 @@ cvt_iintrinsic(LLVMFPtoUI, fptoui)
 #define fptrunc(pr, a) \
         if (!(osize < 8 * sizeof(a))) \
             jl_error("fptrunc: output bitsize must be < input bitsize"); \
-        if (osize == 32) \
+        else if (osize == 16) \
+            *(uint16_t*)pr = __gnu_f2h_ieee(a); \
+        else if (osize == 32) \
             *(float*)pr = a; \
         else if (osize == 64) \
             *(double*)pr = a; \
         else \
-            jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 32 and 64");
+            jl_error("fptrunc: runtime floating point intrinsics are not implemented for bit sizes other than 16, 32 and 64");
 #define fpext(pr, a) \
-        if (!(osize > 8 * sizeof(a))) \
-            jl_error("fpext: output bitsize must be > input bitsize"); \
+        if (!(osize >= 8 * sizeof(a))) \
+            jl_error("fpext: output bitsize must be >= input bitsize"); \
         if (osize == 32) \
             *(float*)pr = a; \
         else if (osize == 64) \
@@ -919,3 +1453,10 @@ JL_DLLEXPORT jl_value_t *jl_arraylen(jl_value_t *a)
     JL_TYPECHK(arraylen, array, a);
     return jl_box_long(jl_array_len((jl_array_t*)a));
 }
+
+JL_DLLEXPORT jl_value_t *jl_have_fma(jl_value_t *typ)
+{
+    JL_TYPECHK(have_fma, datatype, typ);
+    // TODO: run-time feature check?
+    return jl_false;
+}
diff --git a/src/safepoint.c b/src/safepoint.c
index 2f90afaf508e06..17c37a66c3a16f 100644
--- a/src/safepoint.c
+++ b/src/safepoint.c
@@ -19,7 +19,7 @@ extern "C" {
 // 1: at least one sigint is pending, only the sigint page is enabled.
 // 2: at least one sigint is pending, both safepoint pages are enabled.
 JL_DLLEXPORT sig_atomic_t jl_signal_pending = 0;
-uint32_t jl_gc_running = 0;
+_Atomic(uint32_t) jl_gc_running = 0;
 char *jl_safepoint_pages = NULL;
 // The number of safepoints enabled on the three pages.
 // The first page, is the SIGINT page, only used by the master thread.
@@ -42,11 +42,9 @@ uint8_t jl_safepoint_enable_cnt[3] = {0, 0, 0};
 // Additionally accessing `jl_gc_running` should use acquire/release
 // load/store so that threads waiting for the GC doesn't have to also
 // fight on the safepoint lock...
-//
-// Acquiring and releasing this lock should use the `jl_mutex_*_nogc` functions
-jl_mutex_t safepoint_lock;
+uv_mutex_t safepoint_lock;
 
-static void jl_safepoint_enable(int idx)
+static void jl_safepoint_enable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
     assert(0 <= idx && idx < 3);
@@ -67,7 +65,7 @@ static void jl_safepoint_enable(int idx)
 #endif
 }
 
-static void jl_safepoint_disable(int idx)
+static void jl_safepoint_disable(int idx) JL_NOTSAFEPOINT
 {
     // safepoint_lock should be held
     assert(0 <= idx && idx < 3);
@@ -88,6 +86,7 @@ static void jl_safepoint_disable(int idx)
 
 void jl_safepoint_init(void)
 {
+    uv_mutex_init(&safepoint_lock);
     // jl_page_size isn't available yet.
     size_t pgsz = jl_getpagesize();
 #ifdef _OS_WINDOWS_
@@ -100,7 +99,7 @@ void jl_safepoint_init(void)
 #endif
     if (addr == NULL) {
         jl_printf(JL_STDERR, "could not allocate GC synchronization page\n");
-        gc_debug_critical_error();
+        jl_gc_debug_critical_error();
         abort();
     }
     // The signal page is for the gc safepoint.
@@ -111,35 +110,36 @@ void jl_safepoint_init(void)
 int jl_safepoint_start_gc(void)
 {
     if (jl_n_threads == 1) {
-        jl_gc_running = 1;
+        jl_atomic_store_relaxed(&jl_gc_running, 1);
         return 1;
     }
     // The thread should have set this already
-    assert(jl_get_ptls_states()->gc_state == JL_GC_STATE_WAITING);
-    jl_mutex_lock_nogc(&safepoint_lock);
+    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) == JL_GC_STATE_WAITING);
+    uv_mutex_lock(&safepoint_lock);
     // In case multiple threads enter the GC at the same time, only allow
     // one of them to actually run the collection. We can't just let the
     // master thread do the GC since it might be running unmanaged code
     // and can take arbitrarily long time before hitting a safe point.
-    if (jl_atomic_compare_exchange(&jl_gc_running, 0, 1) != 0) {
-        jl_mutex_unlock_nogc(&safepoint_lock);
+    uint32_t running = 0;
+    if (!jl_atomic_cmpswap(&jl_gc_running, &running, 1)) {
+        uv_mutex_unlock(&safepoint_lock);
         jl_safepoint_wait_gc();
         return 0;
     }
     jl_safepoint_enable(1);
     jl_safepoint_enable(2);
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
     return 1;
 }
 
 void jl_safepoint_end_gc(void)
 {
-    assert(jl_gc_running);
+    assert(jl_atomic_load_relaxed(&jl_gc_running));
     if (jl_n_threads == 1) {
-        jl_gc_running = 0;
+        jl_atomic_store_relaxed(&jl_gc_running, 0);
         return;
     }
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Need to reset the page protection before resetting the flag since
     // the thread will trigger a segfault immediately after returning from
     // the signal handler.
@@ -150,13 +150,13 @@ void jl_safepoint_end_gc(void)
     // This wakes up other threads on mac.
     jl_mach_gc_end();
 #  endif
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
 }
 
 void jl_safepoint_wait_gc(void)
 {
     // The thread should have set this is already
-    assert(jl_get_ptls_states()->gc_state != 0);
+    assert(jl_atomic_load_relaxed(&jl_current_task->ptls->gc_state) != 0);
     // Use normal volatile load in the loop for speed until GC finishes.
     // Then use an acquire load to make sure the GC result is visible on this thread.
     while (jl_atomic_load_relaxed(&jl_gc_running) || jl_atomic_load_acquire(&jl_gc_running)) {
@@ -166,7 +166,7 @@ void jl_safepoint_wait_gc(void)
 
 void jl_safepoint_enable_sigint(void)
 {
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Make sure both safepoints are enabled exactly once for SIGINT.
     switch (jl_signal_pending) {
     default:
@@ -182,24 +182,24 @@ void jl_safepoint_enable_sigint(void)
     case 2:
         jl_signal_pending = 2;
     }
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
 }
 
 void jl_safepoint_defer_sigint(void)
 {
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Make sure the GC safepoint is disabled for SIGINT.
     if (jl_signal_pending == 2) {
         jl_safepoint_disable(1);
         jl_signal_pending = 1;
     }
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
 }
 
 int jl_safepoint_consume_sigint(void)
 {
     int has_signal = 0;
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     // Make sure both safepoints are disabled for SIGINT.
     switch (jl_signal_pending) {
     default:
@@ -216,7 +216,7 @@ int jl_safepoint_consume_sigint(void)
     case 0:
         jl_signal_pending = 0;
     }
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
     return has_signal;
 }
 
diff --git a/src/serialize.h b/src/serialize.h
index 07ff46fdf96db2..69aaeb4c39787f 100644
--- a/src/serialize.h
+++ b/src/serialize.h
@@ -62,52 +62,65 @@ extern "C" {
 #define TAG_GOTOIFNOT          54
 #define TAG_RETURNNODE         55
 #define TAG_ARGUMENT           56
+#define TAG_RELOC_METHODROOT   57
 
-#define LAST_TAG 56
+#define LAST_TAG 57
 
 #define write_uint8(s, n) ios_putc((n), (s))
-#define read_uint8(s) ((uint8_t)ios_getc(s))
-#define write_int8(s, n) write_uint8(s, n)
-#define read_int8(s) read_uint8(s)
+#define read_uint8(s) ((uint8_t)ios_getc((s)))
+#define write_int8(s, n) write_uint8((s), (n))
+#define read_int8(s) read_uint8((s))
 
 /* read and write in host byte order */
 
-static void write_int32(ios_t *s, int32_t i) JL_NOTSAFEPOINT
+static inline void write_int32(ios_t *s, int32_t i) JL_NOTSAFEPOINT
 {
     ios_write(s, (char*)&i, 4);
 }
 
-static int32_t read_int32(ios_t *s) JL_NOTSAFEPOINT
+static inline int32_t read_int32(ios_t *s) JL_NOTSAFEPOINT
 {
     int32_t x = 0;
     ios_read(s, (char*)&x, 4);
     return x;
 }
 
-static uint64_t read_uint64(ios_t *s) JL_NOTSAFEPOINT
+static inline uint64_t read_uint64(ios_t *s) JL_NOTSAFEPOINT
 {
     uint64_t x = 0;
     ios_read(s, (char*)&x, 8);
     return x;
 }
 
-static void write_int64(ios_t *s, int64_t i) JL_NOTSAFEPOINT
+static inline void write_int64(ios_t *s, int64_t i) JL_NOTSAFEPOINT
 {
     ios_write(s, (char*)&i, 8);
 }
 
-static void write_uint16(ios_t *s, uint16_t i) JL_NOTSAFEPOINT
+static inline void write_uint16(ios_t *s, uint16_t i) JL_NOTSAFEPOINT
 {
     ios_write(s, (char*)&i, 2);
 }
 
-static uint16_t read_uint16(ios_t *s) JL_NOTSAFEPOINT
+static inline uint16_t read_uint16(ios_t *s) JL_NOTSAFEPOINT
 {
     int16_t x = 0;
     ios_read(s, (char*)&x, 2);
     return x;
 }
 
+static inline void write_uint32(ios_t *s, uint32_t i) JL_NOTSAFEPOINT
+{
+    ios_write(s, (char*)&i, 4);
+}
+
+static inline uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT
+{
+    uint32_t x = 0;
+    ios_read(s, (char*)&x, 4);
+    return x;
+}
+
 void *jl_lookup_ser_tag(jl_value_t *v);
 void *jl_lookup_common_symbol(jl_value_t *v);
 jl_value_t *jl_deser_tag(uint8_t tag);
diff --git a/src/signal-handling.c b/src/signal-handling.c
index 1f809354361d14..d7876fa299a0b3 100644
--- a/src/signal-handling.c
+++ b/src/signal-handling.c
@@ -17,18 +17,35 @@ extern "C" {
 
 #include <threading.h>
 
-// Profiler control variables //
+// Profiler control variables
+// Note: these "static" variables are also used in "signals-*.c"
 static volatile jl_bt_element_t *bt_data_prof = NULL;
 static volatile size_t bt_size_max = 0;
 static volatile size_t bt_size_cur = 0;
 static volatile uint64_t nsecprof = 0;
 static volatile int running = 0;
 static const    uint64_t GIGA = 1000000000ULL;
+static uint64_t profile_cong_rng_seed = 0;
+static uint64_t profile_cong_rng_unbias = 0;
+static volatile uint64_t *profile_round_robin_thread_order = NULL;
 // Timers to take samples at intervals
 JL_DLLEXPORT void jl_profile_stop_timer(void);
 JL_DLLEXPORT int jl_profile_start_timer(void);
 void jl_lock_profile(void);
 void jl_unlock_profile(void);
+void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed);
+
+JL_DLLEXPORT int jl_profile_is_buffer_full(void)
+{
+    // declare buffer full if there isn't enough room to take samples across all threads
+    #if defined(_OS_WINDOWS_)
+        uint64_t nthreads = 1; // windows only profiles the main thread
+    #else
+        uint64_t nthreads = jl_n_threads;
+    #endif
+    // the `+ 6` is for the two block terminators `0` plus 4 metadata entries
+    return bt_size_cur + (((JL_BT_MAX_ENTRY_SIZE + 1) + 6) * nthreads) > bt_size_max;
+}
 
 static uint64_t jl_last_sigint_trigger = 0;
 static uint64_t jl_disable_sigint_time = 0;
@@ -97,21 +114,54 @@ JL_DLLEXPORT void jl_exit_on_sigint(int on)
 
 static uintptr_t jl_get_pc_from_ctx(const void *_ctx);
 void jl_show_sigill(void *_ctx);
+#if defined(_CPU_X86_64_) || defined(_CPU_X86_) \
+    || (defined(_OS_LINUX_) && defined(_CPU_AARCH64_)) \
+    || (defined(_OS_LINUX_) && defined(_CPU_ARM_))
 static size_t jl_safe_read_mem(const volatile char *ptr, char *out, size_t len)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
-    ptls->safe_restore = &buf;
+    jl_set_safe_restore(&buf);
     volatile size_t i = 0;
     if (!jl_setjmp(buf, 0)) {
-        for (;i < len;i++) {
+        for (; i < len; i++) {
             out[i] = ptr[i];
         }
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
     return i;
 }
+#endif
+
+static double profile_autostop_time = -1.0;
+static double profile_peek_duration = 1.0; // seconds
+
+double jl_get_profile_peek_duration(void)
+{
+    return profile_peek_duration;
+}
+void jl_set_profile_peek_duration(double t)
+{
+    profile_peek_duration = t;
+}
+
+uintptr_t profile_show_peek_cond_loc;
+JL_DLLEXPORT void jl_set_peek_cond(uintptr_t cond)
+{
+    profile_show_peek_cond_loc = cond;
+}
+
+static void jl_check_profile_autostop(void)
+{
+    if ((profile_autostop_time != -1.0) && (jl_hrtime() > profile_autostop_time)) {
+        profile_autostop_time = -1.0;
+        jl_profile_stop_timer();
+        jl_safe_printf("\n==============================================================\n");
+        jl_safe_printf("Profile collected. A report will print at the next yield point\n");
+        jl_safe_printf("==============================================================\n\n");
+        uv_async_send((uv_async_t*)profile_show_peek_cond_loc);
+    }
+}
 
 #if defined(_WIN32)
 #include "signals-win.c"
@@ -224,17 +274,48 @@ void jl_show_sigill(void *_ctx)
 #endif
 }
 
-// what to do on a critical error
-void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data, size_t *bt_size)
+// what to do on a critical error on a thread
+void jl_critical_error(int sig, bt_context_t *context, jl_task_t *ct)
 {
-    // This function is not allowed to reference any TLS variables.
-    // We need to explicitly pass in the TLS buffer pointer when
-    // we make `jl_filename` and `jl_lineno` thread local.
-    size_t i, n = *bt_size;
-    if (sig)
+    jl_bt_element_t *bt_data = ct ? ct->ptls->bt_data : NULL;
+    size_t *bt_size = ct ? &ct->ptls->bt_size : NULL;
+    size_t i, n = ct ? *bt_size : 0;
+    if (sig) {
+        // kill this task, so that we cannot get back to it accidentally (via an untimely ^C or jlbacktrace in jl_exit)
+        jl_set_safe_restore(NULL);
+        if (ct) {
+            ct->gcstack = NULL;
+            ct->eh = NULL;
+            ct->excstack = NULL;
+            ct->ptls->locks.len = 0;
+            ct->ptls->in_pure_callback = 0;
+            ct->ptls->in_finalizer = 1;
+            ct->world_age = 1;
+        }
+#ifndef _OS_WINDOWS_
+        sigset_t sset;
+        sigemptyset(&sset);
+        // n.b. In `abort()`, Apple's libSystem "helpfully" blocks all signals
+        // on all threads but SIGABRT. But we also don't know what the thread
+        // was doing, so unblock all critical signals so that they will crash
+        // hard, and not just get stuck.
+        sigaddset(&sset, SIGSEGV);
+        sigaddset(&sset, SIGBUS);
+        sigaddset(&sset, SIGILL);
+        // also unblock fatal signals now, so we won't get back here twice
+        sigaddset(&sset, SIGTERM);
+        sigaddset(&sset, SIGABRT);
+        sigaddset(&sset, SIGQUIT);
+        // and the original signal is now fatal too, in case it wasn't
+        // something already listed (?)
+        if (sig != SIGINT)
+            sigaddset(&sset, sig);
+        pthread_sigmask(SIG_UNBLOCK, &sset, NULL);
+#endif
         jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
+    }
     jl_safe_printf("in expression starting at %s:%d\n", jl_filename, jl_lineno);
-    if (context) {
+    if (context && ct) {
         // Must avoid extended backtrace frames here unless we're sure bt_data
         // is properly rooted.
         *bt_size = n = rec_backtrace_ctx(bt_data, JL_MAX_BT_SIZE, context, NULL);
@@ -242,8 +323,8 @@ void jl_critical_error(int sig, bt_context_t *context, jl_bt_element_t *bt_data,
     for (i = 0; i < n; i += jl_bt_entry_size(bt_data + i)) {
         jl_print_bt_entry_codeloc(bt_data + i);
     }
-    gc_debug_print_status();
-    gc_debug_critical_error();
+    jl_gc_debug_print_status();
+    jl_gc_debug_critical_error();
 }
 
 ///////////////////////
@@ -255,6 +336,17 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
     nsecprof = delay_nsec;
     if (bt_data_prof != NULL)
         free((void*)bt_data_prof);
+    if (profile_round_robin_thread_order == NULL) {
+        // NOTE: We currently only allocate this once, since jl_n_threads cannot change
+        // during execution of a julia process. If/when this invariant changes in the
+        // future, this will have to be adjusted.
+        profile_round_robin_thread_order = (uint64_t*) calloc(jl_n_threads, sizeof(uint64_t));
+        for (int i = 0; i < jl_n_threads; i++) {
+            profile_round_robin_thread_order[i] = i;
+        }
+    }
+    profile_cong_rng_seed = jl_rand();
+    unbias_cong(jl_n_threads, &profile_cong_rng_unbias);
     bt_data_prof = (jl_bt_element_t*) calloc(maxsize, sizeof(jl_bt_element_t));
     if (bt_data_prof == NULL && maxsize > 0)
         return -1;
@@ -262,6 +354,17 @@ JL_DLLEXPORT int jl_profile_init(size_t maxsize, uint64_t delay_nsec)
     return 0;
 }
 
+void jl_shuffle_int_array_inplace(volatile uint64_t *carray, size_t size, uint64_t *seed) {
+    // The "modern Fisher–Yates shuffle" - O(n) algorithm
+    // https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm
+    for (size_t i = size - 1; i >= 1; --i) {
+        size_t j = cong(i, profile_cong_rng_unbias, seed);
+        uint64_t tmp = carray[j];
+        carray[j] = carray[i];
+        carray[i] = tmp;
+    }
+}
+
 JL_DLLEXPORT uint8_t *jl_profile_get_data(void)
 {
     return (uint8_t*) bt_data_prof;
diff --git a/src/signals-mach.c b/src/signals-mach.c
index 257d3e9637a1a5..d1231fe969c1a8 100644
--- a/src/signals-mach.c
+++ b/src/signals-mach.c
@@ -1,5 +1,7 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+// Note that this file is `#include`d by "signals-unix.c"
+
 #include <mach/clock.h>
 #include <mach/clock_types.h>
 #include <mach/clock_reply.h>
@@ -7,6 +9,7 @@
 #include <mach/task.h>
 #include <mach/mig_errors.h>
 #include <AvailabilityMacros.h>
+#include "mach_excServer.c"
 
 #ifdef MAC_OS_X_VERSION_10_9
 #include <sys/_types/_ucontext64.h>
@@ -29,6 +32,11 @@ extern void *_keymgr_get_and_lock_processwide_ptr(unsigned int key);
 extern int _keymgr_get_and_lock_processwide_ptr_2(unsigned int key, void **result);
 extern int _keymgr_set_lockmode_processwide_ptr(unsigned int key, unsigned int mode);
 
+// private dyld3/dyld4 stuff
+extern void _dyld_atfork_prepare(void) __attribute__((weak_import));
+extern void _dyld_atfork_parent(void) __attribute__((weak_import));
+//extern void _dyld_fork_child(void) __attribute__((weak_import));
+
 static void attach_exception_port(thread_port_t thread, int segv_only);
 
 // low 16 bits are the thread id, the next 8 bits are the original gc_state
@@ -42,7 +50,7 @@ void jl_mach_gc_end(void)
         int8_t gc_state = (int8_t)(item >> 8);
         jl_ptls_t ptls2 = jl_all_tls_states[tid];
         jl_atomic_store_release(&ptls2->gc_state, gc_state);
-        thread_resume(pthread_mach_thread_np((pthread_t)ptls2->system_id));
+        thread_resume(pthread_mach_thread_np(ptls2->system_id));
     }
     suspended_threads.len = 0;
 }
@@ -52,28 +60,34 @@ void jl_mach_gc_end(void)
 static int jl_mach_gc_wait(jl_ptls_t ptls2,
                            mach_port_t thread, int16_t tid)
 {
-    jl_mutex_lock_nogc(&safepoint_lock);
+    uv_mutex_lock(&safepoint_lock);
     if (!jl_atomic_load_relaxed(&jl_gc_running)) {
         // relaxed, since gets set to zero only while the safepoint_lock was held
         // this means we can tell if GC is done before we got the message or
         // the safepoint was enabled for SIGINT.
-        jl_mutex_unlock_nogc(&safepoint_lock);
+        uv_mutex_unlock(&safepoint_lock);
         return 0;
     }
     // Otherwise, set the gc state of the thread, suspend and record it
+    // TODO: TSAN will complain that it never saw the faulting task do an
+    // atomic release (it was in the kernel). And our attempt here does
+    // nothing, since we are a different thread, and it is not transitive).
+    //
+    // This also means we are not making this thread available for GC work.
+    // Eventually, we should probably release this signal to the original
+    // thread, (return KERN_FAILURE instead of KERN_SUCCESS) so that it
+    // triggers a SIGSEGV and gets handled by the usual codepath for unix.
     int8_t gc_state = ptls2->gc_state;
     jl_atomic_store_release(&ptls2->gc_state, JL_GC_STATE_WAITING);
     uintptr_t item = tid | (((uintptr_t)gc_state) << 16);
     arraylist_push(&suspended_threads, (void*)item);
     thread_suspend(thread);
-    jl_mutex_unlock_nogc(&safepoint_lock);
+    uv_mutex_unlock(&safepoint_lock);
     return 1;
 }
 
 static mach_port_t segv_port = 0;
 
-extern boolean_t exc_server(mach_msg_header_t *, mach_msg_header_t *);
-
 #define STR(x) #x
 #define XSTR(x) STR(x)
 #define HANDLE_MACH_ERROR(msg, retval) \
@@ -83,13 +97,14 @@ void *mach_segv_listener(void *arg)
 {
     (void)arg;
     while (1) {
-        int ret = mach_msg_server(exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE);
+        int ret = mach_msg_server(mach_exc_server, 2048, segv_port, MACH_MSG_TIMEOUT_NONE);
         jl_safe_printf("mach_msg_server: %s\n", mach_error_string(ret));
         jl_exit(128 + SIGSEGV);
     }
 }
 
-static void allocate_segv_handler()
+
+static void allocate_mach_handler()
 {
     // ensure KEYMGR_GCC3_DW2_OBJ_LIST is initialized, as this requires malloc
     // and thus can deadlock when used without first initializing it.
@@ -120,41 +135,35 @@ static void allocate_segv_handler()
         jl_error("pthread_create failed");
     }
     pthread_attr_destroy(&attr);
-    for (int16_t tid = 0;tid < jl_n_threads;tid++) {
-        attach_exception_port(pthread_mach_thread_np((pthread_t)jl_all_tls_states[tid]->system_id), 0);
+    for (int16_t tid = 0; tid < jl_n_threads; tid++) {
+        attach_exception_port(pthread_mach_thread_np(jl_all_tls_states[tid]->system_id), 0);
     }
 }
 
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
 volatile mach_port_t mach_profiler_thread = 0;
-static kern_return_t profiler_segv_handler
-                (mach_port_t                          exception_port,
-                 mach_port_t                                  thread,
-                 mach_port_t                                    task,
-                 exception_type_t                          exception,
-                 exception_data_t                               code,
-                 mach_msg_type_number_t                   code_count);
+static kern_return_t profiler_segv_handler(
+    mach_port_t exception_port,
+    mach_port_t thread,
+    mach_port_t task,
+    exception_type_t exception,
+    mach_exception_data_t code,
+    mach_msg_type_number_t codeCnt);
 #endif
 
 #if defined(_CPU_X86_64_)
 typedef x86_thread_state64_t host_thread_state_t;
 typedef x86_exception_state64_t host_exception_state_t;
-#define THREAD_STATE x86_THREAD_STATE64
-#define THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
+#define MACH_THREAD_STATE x86_THREAD_STATE64
+#define MACH_THREAD_STATE_COUNT x86_THREAD_STATE64_COUNT
 #define HOST_EXCEPTION_STATE x86_EXCEPTION_STATE64
 #define HOST_EXCEPTION_STATE_COUNT x86_EXCEPTION_STATE64_COUNT
 
-enum x86_trap_flags {
-    USER_MODE = 0x4,
-    WRITE_FAULT = 0x2,
-    PAGE_PRESENT = 0x1
-};
-
 #elif defined(_CPU_AARCH64_)
 typedef arm_thread_state64_t host_thread_state_t;
 typedef arm_exception_state64_t host_exception_state_t;
-#define THREAD_STATE ARM_THREAD_STATE64
-#define THREAD_STATE_COUNT ARM_THREAD_STATE64_COUNT
+#define MACH_THREAD_STATE ARM_THREAD_STATE64
+#define MACH_THREAD_STATE_COUNT ARM_THREAD_STATE64_COUNT
 #define HOST_EXCEPTION_STATE ARM_EXCEPTION_STATE64
 #define HOST_EXCEPTION_STATE_COUNT ARM_EXCEPTION_STATE64_COUNT
 #endif
@@ -162,64 +171,102 @@ typedef arm_exception_state64_t host_exception_state_t;
 static void jl_call_in_state(jl_ptls_t ptls2, host_thread_state_t *state,
                              void (*fptr)(void))
 {
-    uint64_t rsp = (uint64_t)ptls2->signal_stack + sig_stack_size;
+#ifdef _CPU_X86_64_
+    uintptr_t rsp = state->__rsp;
+#elif defined(_CPU_AARCH64_)
+    uintptr_t rsp = state->__sp;
+#else
+#error "julia: throw-in-context not supported on this platform"
+#endif
+    if (ptls2 == NULL || ptls2->signal_stack == NULL || is_addr_on_sigstack(ptls2, (void*)rsp)) {
+        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
+    }
+    else {
+        rsp = (uintptr_t)ptls2->signal_stack + sig_stack_size;
+    }
     assert(rsp % 16 == 0);
 
-    // push (null) $RIP onto the stack
-    rsp -= sizeof(void*);
-    *(void**)rsp = NULL;
-
 #ifdef _CPU_X86_64_
+    rsp -= sizeof(void*);
     state->__rsp = rsp; // set stack pointer
     state->__rip = (uint64_t)fptr; // "call" the function
-#else
+#elif defined(_CPU_AARCH64_)
     state->__sp = rsp;
     state->__pc = (uint64_t)fptr;
+    state->__lr = 0;
+#else
+#error "julia: throw-in-context not supported on this platform"
 #endif
 }
 
+#ifdef _CPU_X86_64_
+int is_write_fault(host_exception_state_t exc_state) {
+    return exc_reg_is_write_fault(exc_state.__err);
+}
+#elif defined(_CPU_AARCH64_)
+int is_write_fault(host_exception_state_t exc_state) {
+    return exc_reg_is_write_fault(exc_state.__esr);
+}
+#else
+#warning Implement this query for consistent PROT_NONE handling
+int is_write_fault(host_exception_state_t exc_state) {
+    return 0;
+}
+#endif
+
 static void jl_throw_in_thread(int tid, mach_port_t thread, jl_value_t *exception)
 {
-    unsigned int count = THREAD_STATE_COUNT;
+    unsigned int count = MACH_THREAD_STATE_COUNT;
     host_thread_state_t state;
-    kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count);
+    kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
     HANDLE_MACH_ERROR("thread_get_state", ret);
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    if (!ptls2->safe_restore) {
+    if (!jl_get_safe_restore()) {
         assert(exception);
-        ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE,
-                                           (bt_context_t*)&state, ptls2->pgcstack);
+        ptls2->bt_size =
+            rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, (bt_context_t *)&state,
+                              NULL /*current_task?*/);
         ptls2->sig_exception = exception;
     }
     jl_call_in_state(ptls2, &state, &jl_sig_throw);
-    ret = thread_set_state(thread, THREAD_STATE,
-                           (thread_state_t)&state, count);
+    ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 }
 
-//exc_server uses dlsym to find symbol
-JL_DLLEXPORT
-kern_return_t catch_exception_raise(mach_port_t            exception_port,
-                                    mach_port_t            thread,
-                                    mach_port_t            task,
-                                    exception_type_t       exception,
-                                    exception_data_t       code,
-                                    mach_msg_type_number_t code_count)
+static void segv_handler(int sig, siginfo_t *info, void *context)
+{
+    assert(sig == SIGSEGV || sig == SIGBUS);
+    if (jl_get_safe_restore()) { // restarting jl_ or jl_unwind_stepn
+        jl_task_t *ct = jl_get_current_task();
+        jl_ptls_t ptls = ct == NULL ? NULL : ct->ptls;
+        jl_call_in_state(ptls, (host_thread_state_t*)jl_to_bt_context(context), &jl_sig_throw);
+    }
+    else {
+        sigdie_handler(sig, info, context);
+    }
+}
+
+//mach_exc_server expects us to define this symbol locally
+kern_return_t catch_mach_exception_raise(
+    mach_port_t exception_port,
+    mach_port_t thread,
+    mach_port_t task,
+    exception_type_t exception,
+    mach_exception_data_t code,
+    mach_msg_type_number_t codeCnt)
 {
-    unsigned int count = THREAD_STATE_COUNT;
     unsigned int exc_count = HOST_EXCEPTION_STATE_COUNT;
     host_exception_state_t exc_state;
-    host_thread_state_t state;
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
     if (thread == mach_profiler_thread) {
-        return profiler_segv_handler(exception_port, thread, task, exception, code, code_count);
+        return profiler_segv_handler(exception_port, thread, task, exception, code, codeCnt);
     }
 #endif
     int16_t tid;
     jl_ptls_t ptls2 = NULL;
-    for (tid = 0;tid < jl_n_threads;tid++) {
+    for (tid = 0; tid < jl_n_threads; tid++) {
         jl_ptls_t _ptls2 = jl_all_tls_states[tid];
-        if (pthread_mach_thread_np((pthread_t)_ptls2->system_id) == thread) {
+        if (pthread_mach_thread_np(_ptls2->system_id) == thread) {
             ptls2 = _ptls2;
             break;
         }
@@ -257,7 +304,7 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
         }
         return KERN_SUCCESS;
     }
-    if (ptls2->safe_restore) {
+    if (jl_get_safe_restore()) {
         jl_throw_in_thread(tid, thread, jl_stackovf_exception);
         return KERN_SUCCESS;
     }
@@ -267,7 +314,7 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
     if (msync((void*)(fault_addr & ~(jl_page_size - 1)), 1, MS_ASYNC) == 0) { // check if this was a valid address
 #endif
         jl_value_t *excpt;
-        if (is_addr_on_stack(ptls2, (void*)fault_addr)) {
+        if (is_addr_on_stack(jl_atomic_load_relaxed(&ptls2->current_task), (void*)fault_addr)) {
             excpt = jl_stackovf_exception;
         }
 #ifdef SEGV_EXCEPTION
@@ -277,8 +324,8 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
         }
 #endif
         else {
-            if (!(exc_state.__err & WRITE_FAULT))
-                return KERN_INVALID_ARGUMENT; // rethrow the SEGV since it wasn't an error with writing to read-only memory
+            if (!is_write_fault(exc_state))
+                return KERN_INVALID_ARGUMENT;
             excpt = jl_readonlymemory_exception;
         }
         jl_throw_in_thread(tid, thread, excpt);
@@ -286,14 +333,44 @@ kern_return_t catch_exception_raise(mach_port_t            exception_port,
         return KERN_SUCCESS;
     }
     else {
-        kern_return_t ret = thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count);
-        HANDLE_MACH_ERROR("thread_get_state", ret);
-        jl_critical_error(SIGSEGV, (unw_context_t*)&state,
-                          ptls2->bt_data, &ptls2->bt_size);
-        return KERN_INVALID_ARGUMENT;
+        thread0_exit_count++;
+        jl_exit_thread0(128 + SIGSEGV, NULL, 0);
+        return KERN_SUCCESS;
     }
 }
 
+//mach_exc_server expects us to define this symbol locally
+kern_return_t catch_mach_exception_raise_state(
+    mach_port_t exception_port,
+    exception_type_t exception,
+    const mach_exception_data_t code,
+    mach_msg_type_number_t codeCnt,
+    int *flavor,
+    const thread_state_t old_state,
+    mach_msg_type_number_t old_stateCnt,
+    thread_state_t new_state,
+    mach_msg_type_number_t *new_stateCnt)
+{
+    return KERN_INVALID_ARGUMENT; // we only use EXCEPTION_DEFAULT
+}
+
+//mach_exc_server expects us to define this symbol locally
+kern_return_t catch_mach_exception_raise_state_identity(
+    mach_port_t exception_port,
+    mach_port_t thread,
+    mach_port_t task,
+    exception_type_t exception,
+    mach_exception_data_t code,
+    mach_msg_type_number_t codeCnt,
+    int *flavor,
+    thread_state_t old_state,
+    mach_msg_type_number_t old_stateCnt,
+    thread_state_t new_state,
+    mach_msg_type_number_t *new_stateCnt)
+{
+    return KERN_INVALID_ARGUMENT; // we only use EXCEPTION_DEFAULT
+}
+
 static void attach_exception_port(thread_port_t thread, int segv_only)
 {
     kern_return_t ret;
@@ -301,34 +378,37 @@ static void attach_exception_port(thread_port_t thread, int segv_only)
     exception_mask_t mask = EXC_MASK_BAD_ACCESS;
     if (!segv_only)
         mask |= EXC_MASK_ARITHMETIC;
-    ret = thread_set_exception_ports(thread, mask, segv_port, EXCEPTION_DEFAULT, MACHINE_THREAD_STATE);
+    ret = thread_set_exception_ports(thread, mask, segv_port, EXCEPTION_DEFAULT | MACH_EXCEPTION_CODES, MACH_THREAD_STATE);
     HANDLE_MACH_ERROR("thread_set_exception_ports", ret);
 }
 
-static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
+static void jl_thread_suspend_and_get_state2(int tid, host_thread_state_t *ctx)
 {
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    mach_port_t tid_port = pthread_mach_thread_np((pthread_t)ptls2->system_id);
+    mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
-    kern_return_t ret = thread_suspend(tid_port);
+    kern_return_t ret = thread_suspend(thread);
     HANDLE_MACH_ERROR("thread_suspend", ret);
 
     // Do the actual sampling
-    unsigned int count = THREAD_STATE_COUNT;
-    static unw_context_t state;
-    memset(&state, 0, sizeof(unw_context_t));
+    unsigned int count = MACH_THREAD_STATE_COUNT;
+    memset(ctx, 0, sizeof(*ctx));
 
     // Get the state of the suspended thread
-    ret = thread_get_state(tid_port, THREAD_STATE, (thread_state_t)&state, &count);
+    ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)ctx, &count);
+}
 
-    // Initialize the unwind context with the suspend thread's state
-    *ctx = &state;
+static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
+{
+    static host_thread_state_t state;
+    jl_thread_suspend_and_get_state2(tid, &state);
+    *ctx = (unw_context_t*)&state;
 }
 
 static void jl_thread_resume(int tid, int sig)
 {
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    mach_port_t thread = pthread_mach_thread_np((pthread_t)ptls2->system_id);
+    mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
     kern_return_t ret = thread_resume(thread);
     HANDLE_MACH_ERROR("thread_resume", ret);
 }
@@ -338,7 +418,7 @@ static void jl_thread_resume(int tid, int sig)
 static void jl_try_deliver_sigint(void)
 {
     jl_ptls_t ptls2 = jl_all_tls_states[0];
-    mach_port_t thread = pthread_mach_thread_np((pthread_t)ptls2->system_id);
+    mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
 
     kern_return_t ret = thread_suspend(thread);
     HANDLE_MACH_ERROR("thread_suspend", ret);
@@ -364,29 +444,46 @@ static void jl_try_deliver_sigint(void)
     HANDLE_MACH_ERROR("thread_resume", ret);
 }
 
-static void jl_exit_thread0(int exitstate)
+static void JL_NORETURN jl_exit_thread0_cb(int exitstate)
+{
+CFI_NORETURN
+    jl_critical_error(exitstate - 128, NULL, jl_current_task);
+    jl_exit(exitstate);
+}
+
+static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size)
 {
     jl_ptls_t ptls2 = jl_all_tls_states[0];
-    mach_port_t thread = pthread_mach_thread_np((pthread_t)ptls2->system_id);
-    kern_return_t ret = thread_suspend(thread);
-    HANDLE_MACH_ERROR("thread_suspend", ret);
+    mach_port_t thread = pthread_mach_thread_np(ptls2->system_id);
+
+    host_thread_state_t state;
+    jl_thread_suspend_and_get_state2(0, &state);
+    unw_context_t *uc = (unw_context_t*)&state;
 
     // This aborts `sleep` and other syscalls.
-    ret = thread_abort(thread);
+    kern_return_t ret = thread_abort(thread);
     HANDLE_MACH_ERROR("thread_abort", ret);
 
-    unsigned int count = THREAD_STATE_COUNT;
-    host_thread_state_t state;
-    ret = thread_get_state(thread, THREAD_STATE,
-                           (thread_state_t)&state, &count);
+    if (bt_data == NULL) {
+        // Must avoid extended backtrace frames here unless we're sure bt_data
+        // is properly rooted.
+        ptls2->bt_size = rec_backtrace_ctx(ptls2->bt_data, JL_MAX_BT_SIZE, uc, NULL);
+    }
+    else {
+        ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
+        memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
+    }
 
     void (*exit_func)(int) = &_exit;
     if (thread0_exit_count <= 1) {
-        exit_func = &jl_exit;
+        exit_func = &jl_exit_thread0_cb;
     }
     else if (thread0_exit_count == 2) {
         exit_func = &exit;
     }
+    else {
+        exit_func = &_exit;
+    }
 
 #ifdef _CPU_X86_64_
     // First integer argument. Not portable but good enough =)
@@ -397,8 +494,8 @@ static void jl_exit_thread0(int exitstate)
 #error Fill in first integer argument here
 #endif
     jl_call_in_state(ptls2, &state, (void (*)(void))exit_func);
-    ret = thread_set_state(thread, THREAD_STATE,
-                           (thread_state_t)&state, count);
+    unsigned int count = MACH_THREAD_STATE_COUNT;
+    ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 
     ret = thread_resume(thread);
@@ -411,17 +508,17 @@ static pthread_t profiler_thread;
 clock_serv_t clk;
 static mach_port_t profile_port = 0;
 
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
 volatile static int forceDwarf = -2;
 static unw_context_t profiler_uc;
 
-static kern_return_t profiler_segv_handler
-                (mach_port_t                          exception_port,
-                 mach_port_t                                  thread,
-                 mach_port_t                                    task,
-                 exception_type_t                          exception,
-                 exception_data_t                               code,
-                 mach_msg_type_number_t                   code_count)
+static kern_return_t profiler_segv_handler(
+    mach_port_t exception_port,
+    mach_port_t thread,
+    mach_port_t task,
+    exception_type_t exception,
+    mach_exception_data_t code,
+    mach_msg_type_number_t codeCnt)
 {
     assert(thread == mach_profiler_thread);
     host_thread_state_t state;
@@ -435,9 +532,9 @@ static kern_return_t profiler_segv_handler
     else
         forceDwarf = -1;
 
-    unsigned int count = THREAD_STATE_COUNT;
+    unsigned int count = MACH_THREAD_STATE_COUNT;
 
-    thread_get_state(thread, THREAD_STATE, (thread_state_t)&state, &count);
+    thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
 
 #ifdef _CPU_X86_64_
     // don't change cs fs gs rflags
@@ -462,20 +559,44 @@ static kern_return_t profiler_segv_handler
     state.__cpsr = cpsr;
 #endif
 
-    kern_return_t ret = thread_set_state(thread, THREAD_STATE, (thread_state_t)&state, count);
+    kern_return_t ret = thread_set_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, count);
     HANDLE_MACH_ERROR("thread_set_state", ret);
 
     return KERN_SUCCESS;
 }
 #endif
 
+// WARNING: we are unable to handle sigsegv while the dlsymlock is held
+static int jl_lock_profile_mach(int dlsymlock)
+{
+    jl_lock_profile();
+    // workaround for old keymgr bugs
+    void *unused = NULL;
+    int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
+    // workaround for new dlsym4 bugs (API and bugs introduced in macOS 12.1)
+    if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+        _dyld_atfork_prepare();
+    return keymgr_locked;
+}
+
+static void jl_unlock_profile_mach(int dlsymlock, int keymgr_locked)
+{
+    if (dlsymlock && _dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL) \
+        _dyld_atfork_parent(); \
+    if (keymgr_locked)
+        _keymgr_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
+    jl_unlock_profile();
+}
+
+#define jl_lock_profile()       int keymgr_locked = jl_lock_profile_mach(1)
+#define jl_unlock_profile()     jl_unlock_profile_mach(1, keymgr_locked)
+
 void *mach_profile_listener(void *arg)
 {
     (void)arg;
-    int i;
     const int max_size = 512;
     attach_exception_port(mach_thread_self(), 1);
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
     mach_profiler_thread = mach_thread_self();
 #endif
     mig_reply_error_t *bufRequest = (mig_reply_error_t*)malloc_s(max_size);
@@ -486,18 +607,27 @@ void *mach_profile_listener(void *arg)
         HANDLE_MACH_ERROR("mach_msg", ret);
         // sample each thread, round-robin style in reverse order
         // (so that thread zero gets notified last)
-        jl_lock_profile();
-        void *unused = NULL;
-        int keymgr_locked = _keymgr_get_and_lock_processwide_ptr_2(KEYMGR_GCC3_DW2_OBJ_LIST, &unused) == 0;
-        for (i = jl_n_threads; i-- > 0; ) {
+        int keymgr_locked = jl_lock_profile_mach(0);
+        jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
+        for (int idx = jl_n_threads; idx-- > 0; ) {
+            // Stop the threads in the random round-robin order.
+            int i = profile_round_robin_thread_order[idx];
             // if there is no space left, break early
-            if (bt_size_cur >= bt_size_max - 1)
+            if (jl_profile_is_buffer_full()) {
+                jl_profile_stop_timer();
                 break;
+            }
+
+            if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+                _dyld_atfork_prepare(); // briefly acquire the dlsym lock
+            host_thread_state_t state;
+            jl_thread_suspend_and_get_state2(i, &state);
+            unw_context_t *uc = (unw_context_t*)&state;
+            if (_dyld_atfork_prepare != NULL && _dyld_atfork_parent != NULL)
+                _dyld_atfork_parent(); // quickly release the dlsym lock
 
-            unw_context_t *uc;
-            jl_thread_suspend_and_get_state(i, &uc);
             if (running) {
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
                 /*
                  *  Unfortunately compact unwind info is incorrectly generated for quite a number of
                  *  libraries by quite a large number of compilers. We can fall back to DWARF unwind info
@@ -530,17 +660,30 @@ void *mach_profile_listener(void *arg)
 #else
                 bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur, bt_size_max - bt_size_cur - 1, uc, NULL);
 #endif
+                jl_ptls_t ptls = jl_all_tls_states[i];
+
+                // store threadid but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
 
-                // Mark the end of this block with 0
+                // store task id
+                bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+
+                // store cpu cycle clock
+                bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+
+                // Mark the end of this block with two 0's
+                bt_data_prof[bt_size_cur++].uintptr = 0;
                 bt_data_prof[bt_size_cur++].uintptr = 0;
             }
             // We're done! Resume the thread.
             jl_thread_resume(i, 0);
         }
-        if (keymgr_locked)
-            _keymgr_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST);
-        jl_unlock_profile();
+        jl_unlock_profile_mach(0, keymgr_locked);
         if (running) {
+            jl_check_profile_autostop();
             // Reset the alarm
             kern_return_t ret = clock_alarm(clk, TIME_RELATIVE, timerprof, profile_port);
             HANDLE_MACH_ERROR("clock_alarm", ret)
diff --git a/src/signals-unix.c b/src/signals-unix.c
index d5a9798b9812f8..2b399bf76190d8 100644
--- a/src/signals-unix.c
+++ b/src/signals-unix.c
@@ -1,10 +1,13 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
+// Note that this file is `#include`d by "signal-handling.c"
+
 #include <signal.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <pthread.h>
+#include <time.h>
 #include <errno.h>
 #if defined(_OS_DARWIN_) && !defined(MAP_ANONYMOUS)
 #define MAP_ANONYMOUS MAP_ANON
@@ -41,7 +44,9 @@
 
 #include "julia_assert.h"
 
-static bt_context_t *jl_to_bt_context(void *sigctx)
+// helper function for returning the unw_context_t inside a ucontext_t
+// (also used by stackwalk.c)
+bt_context_t *jl_to_bt_context(void *sigctx)
 {
 #ifdef __APPLE__
     return (bt_context_t*)&((ucontext64_t*)sigctx)->uc_mcontext64->__ss;
@@ -57,6 +62,7 @@ static bt_context_t *jl_to_bt_context(void *sigctx)
 }
 
 static int thread0_exit_count = 0;
+static void jl_exit_thread0(int exitstate, jl_bt_element_t *bt_data, size_t bt_size);
 
 static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *_ctx)
 {
@@ -78,14 +84,26 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void *
 #elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)
     const ucontext64_t *ctx = (const ucontext64_t*)_ctx;
     return ctx->uc_mcontext64->__ss.__sp;
+#elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
+    const ucontext_t *ctx = (const ucontext_t*)_ctx;
+    return ctx->uc_mcontext.mc_rsp;
 #else
-    // TODO Add support for FreeBSD and PowerPC(64)?
+    // TODO Add support for PowerPC(64)?
     return 0;
 #endif
 }
 
+static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
+{
+    // One guard page for signal_stack.
+    return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size ||
+             (char*)ptr > (char*)ptls->signal_stack + sig_stack_size);
+}
+
 // Modify signal context `_ctx` so that `fptr` will execute when the signal
 // returns. `fptr` will execute on the signal stack, and must not return.
+// jl_call_in_ctx is also currently executing on that signal stack,
+// so be careful not to smash it
 static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_ctx)
 {
     // Modifying the ucontext should work but there is concern that
@@ -95,7 +113,7 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
     // checks that the syscall is made in the signal handler and that
     // the ucontext address is valid. Hopefully the value of the ucontext
     // will not be part of the validation...
-    if (!ptls->signal_stack) {
+    if (!ptls || !ptls->signal_stack) {
         sigset_t sset;
         sigemptyset(&sset);
         sigaddset(&sset, sig);
@@ -103,30 +121,32 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
         fptr();
         return;
     }
-    uintptr_t rsp = (uintptr_t)ptls->signal_stack + sig_stack_size;
+    uintptr_t rsp = jl_get_rsp_from_ctx(_ctx);
+    if (is_addr_on_sigstack(ptls, (void*)rsp)) {
+        rsp = (rsp - 256) & ~(uintptr_t)15; // redzone and re-alignment
+    }
+    else {
+        rsp = (uintptr_t)ptls->signal_stack + sig_stack_size;
+    }
     assert(rsp % 16 == 0);
 #if defined(_OS_LINUX_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
-    *(void**)rsp = NULL;
     ctx->uc_mcontext.gregs[REG_RSP] = rsp;
     ctx->uc_mcontext.gregs[REG_RIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_64_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
-    *(void**)rsp = NULL;
     ctx->uc_mcontext.mc_rsp = rsp;
     ctx->uc_mcontext.mc_rip = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
-    *(void**)rsp = NULL;
     ctx->uc_mcontext.gregs[REG_ESP] = rsp;
     ctx->uc_mcontext.gregs[REG_EIP] = (uintptr_t)fptr;
 #elif defined(_OS_FREEBSD_) && defined(_CPU_X86_)
     ucontext_t *ctx = (ucontext_t*)_ctx;
     rsp -= sizeof(void*);
-    *(void**)rsp = NULL;
     ctx->uc_mcontext.mc_esp = rsp;
     ctx->uc_mcontext.mc_eip = (uintptr_t)fptr;
 #elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
@@ -160,14 +180,14 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
     // `catch_exception_raise`. It works fine when a signal is received
     // due to `kill`/`raise` though.
     ucontext64_t *ctx = (ucontext64_t*)_ctx;
-    rsp -= sizeof(void*);
-    *(void**)rsp = NULL;
 #if defined(_CPU_X86_64_)
+    rsp -= sizeof(void*);
     ctx->uc_mcontext64->__ss.__rsp = rsp;
     ctx->uc_mcontext64->__ss.__rip = (uintptr_t)fptr;
 #else
     ctx->uc_mcontext64->__ss.__sp = rsp;
     ctx->uc_mcontext64->__ss.__pc = (uintptr_t)fptr;
+    ctx->uc_mcontext64->__ss.__lr = 0;
 #endif
 #else
 #warning "julia: throw-in-context not supported on this platform"
@@ -180,40 +200,38 @@ static void jl_call_in_ctx(jl_ptls_t ptls, void (*fptr)(void), int sig, void *_c
 #endif
 }
 
-static void jl_throw_in_ctx(jl_ptls_t ptls, jl_value_t *e, int sig, void *sigctx)
+static void jl_throw_in_ctx(jl_task_t *ct, jl_value_t *e, int sig, void *sigctx)
 {
-    if (!ptls->safe_restore)
-        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE,
-                                          jl_to_bt_context(sigctx), ptls->pgcstack);
-    ptls->sig_exception = e;
+    jl_ptls_t ptls = ct->ptls;
+    if (!jl_get_safe_restore()) {
+        ptls->bt_size =
+            rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, jl_to_bt_context(sigctx),
+                              ct->gcstack);
+        ptls->sig_exception = e;
+    }
     jl_call_in_ctx(ptls, &jl_sig_throw, sig, sigctx);
 }
 
 static pthread_t signals_thread;
 
-static int is_addr_on_stack(jl_ptls_t ptls, void *addr)
+static int is_addr_on_stack(jl_task_t *ct, void *addr)
 {
-    jl_task_t *t = ptls->current_task;
-    if (t->copy_stack)
+    if (ct->copy_stack) {
+        jl_ptls_t ptls = ct->ptls;
         return ((char*)addr > (char*)ptls->stackbase - ptls->stacksize &&
                 (char*)addr < (char*)ptls->stackbase);
-    else
-        return ((char*)addr > (char*)t->stkbuf &&
-                (char*)addr < (char*)t->stkbuf + t->bufsz);
+    }
+    return ((char*)addr > (char*)ct->stkbuf &&
+            (char*)addr < (char*)ct->stkbuf + ct->bufsz);
 }
 
 static void sigdie_handler(int sig, siginfo_t *info, void *context)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    sigset_t sset;
+    signal(sig, SIG_DFL);
     uv_tty_reset_mode();
     if (sig == SIGILL)
         jl_show_sigill(context);
-    jl_critical_error(sig, jl_to_bt_context(context),
-                      ptls->bt_data, &ptls->bt_size);
-    sigfillset(&sset);
-    sigprocmask(SIG_UNBLOCK, &sset, NULL);
-    signal(sig, SIG_DFL);
+    jl_critical_error(sig, jl_to_bt_context(context), jl_get_current_task());
     if (sig != SIGSEGV &&
         sig != SIGBUS &&
         sig != SIGILL) {
@@ -222,16 +240,69 @@ static void sigdie_handler(int sig, siginfo_t *info, void *context)
     // fall-through return to re-execute faulting statement (but without the error handler)
 }
 
+#if defined(_CPU_X86_64_) || defined(_CPU_X86_)
+enum x86_trap_flags {
+    USER_MODE = 0x4,
+    WRITE_FAULT = 0x2,
+    PAGE_PRESENT = 0x1
+};
+
+int exc_reg_is_write_fault(uintptr_t err) {
+    return err & WRITE_FAULT;
+}
+#elif defined(_CPU_AARCH64_)
+enum aarch64_esr_layout {
+    EC_MASK = ((uint32_t)0b111111) << 26,
+    EC_DATA_ABORT = ((uint32_t)0b100100) << 26,
+    ISR_DA_WnR = ((uint32_t)1) << 6
+};
+
+int exc_reg_is_write_fault(uintptr_t esr) {
+    return (esr & EC_MASK) == EC_DATA_ABORT && (esr & ISR_DA_WnR);
+}
+#endif
+
 #if defined(HAVE_MACH)
-#include <signals-mach.c>
+#include "signals-mach.c"
 #else
 
-static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr)
-{
-    // One guard page for signal_stack.
-    return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size ||
-             (char*)ptr > (char*)ptls->signal_stack + sig_stack_size);
+
+#if defined(_OS_LINUX_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
+int is_write_fault(void *context) {
+    ucontext_t *ctx = (ucontext_t*)context;
+    return exc_reg_is_write_fault(ctx->uc_mcontext.gregs[REG_ERR]);
+}
+#elif defined(_OS_LINUX_) && defined(_CPU_AARCH64_)
+struct linux_aarch64_ctx_header {
+	uint32_t magic;
+	uint32_t size;
+};
+const uint32_t linux_esr_magic = 0x45535201;
+
+int is_write_fault(void *context) {
+    ucontext_t *ctx = (ucontext_t*)context;
+    struct linux_aarch64_ctx_header *extra =
+        (struct linux_aarch64_ctx_header *)ctx->uc_mcontext.__reserved;
+    while (extra->magic != 0) {
+        if (extra->magic == linux_esr_magic) {
+            return exc_reg_is_write_fault(*(uint64_t*)&extra[1]);
+        }
+        extra = (struct linux_aarch64_ctx_header *)
+            (((uint8_t*)extra) + extra->size);
+    }
+    return 0;
+}
+#elif defined(_OS_FREEBSD_) && (defined(_CPU_X86_64_) || defined(_CPU_X86_))
+int is_write_fault(void *context) {
+    ucontext_t *ctx = (ucontext_t*)context;
+    return exc_reg_is_write_fault(ctx->uc_mcontext.mc_err);
+}
+#else
+#warning Implement this query for consistent PROT_NONE handling
+int is_write_fault(void *context) {
+    return 0;
 }
+#endif
 
 static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
 {
@@ -241,27 +312,34 @@ static int jl_is_on_sigstack(jl_ptls_t ptls, void *ptr, void *context)
 
 static void segv_handler(int sig, siginfo_t *info, void *context)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    if (jl_get_safe_restore()) { // restarting jl_ or profile
+        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+        return;
+    }
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL) {
+        sigdie_handler(sig, info, context);
+        return;
+    }
     assert(sig == SIGSEGV || sig == SIGBUS);
-
     if (jl_addr_is_safepoint((uintptr_t)info->si_addr)) {
         jl_set_gc_and_wait();
         // Do not raise sigint on worker thread
-        if (ptls->tid != 0)
+        if (jl_atomic_load_relaxed(&ct->tid) != 0)
             return;
-        if (ptls->defer_signal) {
+        if (ct->ptls->defer_signal) {
             jl_safepoint_defer_sigint();
         }
         else if (jl_safepoint_consume_sigint()) {
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ptls, jl_interrupt_exception, sig, context);
+            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, context);
         }
         return;
     }
-    if (ptls->safe_restore || is_addr_on_stack(ptls, info->si_addr)) { // stack overflow, or restarting jl_
-        jl_throw_in_ctx(ptls, jl_stackovf_exception, sig, context);
+    if (is_addr_on_stack(ct, info->si_addr)) { // stack overflow
+        jl_throw_in_ctx(ct, jl_stackovf_exception, sig, context);
     }
-    else if (jl_is_on_sigstack(ptls, info->si_addr, context)) {
+    else if (jl_is_on_sigstack(ct->ptls, info->si_addr, context)) {
         // This mainly happens when one of the finalizers during final cleanup
         // on the signal stack has a deep/infinite recursion.
         // There isn't anything more we can do
@@ -270,34 +348,18 @@ static void segv_handler(int sig, siginfo_t *info, void *context)
         jl_safe_printf("ERROR: Signal stack overflow, exit\n");
         _exit(sig + 128);
     }
-    else if (sig == SIGSEGV && info->si_code == SEGV_ACCERR) {  // writing to read-only memory (e.g., mmap)
-        jl_throw_in_ctx(ptls, jl_readonlymemory_exception, sig, context);
+    else if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && is_write_fault(context)) {  // writing to read-only memory (e.g., mmap)
+        jl_throw_in_ctx(ct, jl_readonlymemory_exception, sig, context);
     }
     else {
 #ifdef SEGV_EXCEPTION
-        jl_throw_in_ctx(ptls, jl_segv_exception, sig, context);
+        jl_throw_in_ctx(ct, jl_segv_exception, sig, context);
 #else
         sigdie_handler(sig, info, context);
 #endif
     }
 }
 
-static void allocate_segv_handler(void)
-{
-    struct sigaction act;
-    memset(&act, 0, sizeof(struct sigaction));
-    sigemptyset(&act.sa_mask);
-    act.sa_sigaction = segv_handler;
-    act.sa_flags = SA_ONSTACK | SA_SIGINFO;
-    if (sigaction(SIGSEGV, &act, NULL) < 0) {
-        jl_errorf("fatal error: sigaction: %s", strerror(errno));
-    }
-    // On AArch64, stack overflow triggers a SIGBUS
-    if (sigaction(SIGBUS, &act, NULL) < 0) {
-        jl_errorf("fatal error: sigaction: %s", strerror(errno));
-    }
-}
-
 #if !defined(JL_DISABLE_LIBUNWIND)
 static unw_context_t *volatile signal_context;
 static pthread_mutex_t in_signal_lock;
@@ -306,20 +368,33 @@ static pthread_cond_t signal_caught_cond;
 
 static void jl_thread_suspend_and_get_state(int tid, unw_context_t **ctx)
 {
+    struct timespec ts;
+    clock_gettime(CLOCK_REALTIME, &ts);
+    ts.tv_sec += 1;
     pthread_mutex_lock(&in_signal_lock);
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
     jl_atomic_store_release(&ptls2->signal_request, 1);
     pthread_kill(ptls2->system_id, SIGUSR2);
-    pthread_cond_wait(&signal_caught_cond, &in_signal_lock);  // wait for thread to acknowledge
+    // wait for thread to acknowledge
+    int err = pthread_cond_timedwait(&signal_caught_cond, &in_signal_lock, &ts);
+    if (err == ETIMEDOUT) {
+        sig_atomic_t request = 1;
+        if (jl_atomic_cmpswap(&ptls2->signal_request, &request, 0)) {
+            *ctx = NULL;
+            pthread_mutex_unlock(&in_signal_lock);
+            return;
+        }
+        err = pthread_cond_wait(&signal_caught_cond, &in_signal_lock);
+    }
+    assert(!err);
     assert(jl_atomic_load_acquire(&ptls2->signal_request) == 0);
     *ctx = signal_context;
 }
 
 static void jl_thread_resume(int tid, int sig)
 {
-    (void)sig;
     jl_ptls_t ptls2 = jl_all_tls_states[tid];
-    jl_atomic_store_release(&ptls2->signal_request, 1);
+    jl_atomic_store_release(&ptls2->signal_request, sig == -1 ? 3 : 1);
     pthread_cond_broadcast(&exit_signal_cond);
     pthread_cond_wait(&signal_caught_cond, &in_signal_lock); // wait for thread to acknowledge
     assert(jl_atomic_load_acquire(&ptls2->signal_request) == 0);
@@ -342,12 +417,14 @@ static void jl_try_deliver_sigint(void)
 // Write only by signal handling thread, read only by main thread
 // no sync necessary.
 static int thread0_exit_state = 0;
-static void jl_exit_thread0_cb(void)
+static void JL_NORETURN jl_exit_thread0_cb(void)
 {
+CFI_NORETURN
     // This can get stuck if it happens at an unfortunate spot
     // (unavoidable due to its async nature).
     // Try harder to exit each time if we get multiple exit requests.
     if (thread0_exit_count <= 1) {
+        jl_critical_error(thread0_exit_state - 128, NULL, jl_current_task);
         jl_exit(thread0_exit_state);
     }
     else if (thread0_exit_count == 2) {
@@ -358,11 +435,23 @@ static void jl_exit_thread0_cb(void)
     }
 }
 
-static void jl_exit_thread0(int state)
+static void jl_exit_thread0(int state, jl_bt_element_t *bt_data, size_t bt_size)
 {
     jl_ptls_t ptls2 = jl_all_tls_states[0];
+    if (thread0_exit_count <= 1) {
+        unw_context_t *signal_context;
+        jl_thread_suspend_and_get_state(0, &signal_context);
+        if (signal_context != NULL) {
+            thread0_exit_state = state;
+            ptls2->bt_size = bt_size; // <= JL_MAX_BT_SIZE
+            memcpy(ptls2->bt_data, bt_data, ptls2->bt_size * sizeof(bt_data[0]));
+            jl_thread_resume(0, -1);
+            return;
+        }
+    }
     thread0_exit_state = state;
     jl_atomic_store_release(&ptls2->signal_request, 3);
+    // This also makes sure `sleep` is aborted.
     pthread_kill(ptls2->system_id, SIGUSR2);
 }
 
@@ -374,7 +463,12 @@ static void jl_exit_thread0(int state)
 // 3: exit with `thread0_exit_state`
 void usr2_handler(int sig, siginfo_t *info, void *ctx)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL)
+        return;
+    jl_ptls_t ptls = ct->ptls;
+    if (ptls == NULL)
+        return;
     int errno_save = errno;
     sig_atomic_t request = jl_atomic_exchange(&ptls->signal_request, 0);
 #if !defined(JL_DISABLE_LIBUNWIND)
@@ -385,12 +479,10 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
         pthread_cond_broadcast(&signal_caught_cond);
         pthread_cond_wait(&exit_signal_cond, &in_signal_lock);
         request = jl_atomic_exchange(&ptls->signal_request, 0);
-        assert(request == 1);
-        (void)request;
+        assert(request == 1 || request == 3);
         pthread_cond_broadcast(&signal_caught_cond);
         pthread_mutex_unlock(&in_signal_lock);
     }
-    else
 #endif
     if (request == 2) {
         int force = jl_check_force_sigint();
@@ -400,15 +492,25 @@ void usr2_handler(int sig, siginfo_t *info, void *ctx)
                 jl_safe_printf("WARNING: Force throwing a SIGINT\n");
             // Force a throw
             jl_clear_force_sigint();
-            jl_throw_in_ctx(ptls, jl_interrupt_exception, sig, ctx);
+            jl_throw_in_ctx(ct, jl_interrupt_exception, sig, ctx);
         }
     }
     else if (request == 3) {
-        jl_call_in_ctx(ptls, jl_exit_thread0_cb, sig, ctx);
+        jl_call_in_ctx(ct->ptls, jl_exit_thread0_cb, sig, ctx);
     }
     errno = errno_save;
 }
 
+// Because SIGUSR1 is dual-purpose, and the timer can have trailing signals after being deleted,
+// a 2-second grace period is imposed to ignore any trailing timer-created signals so they don't get
+// confused for user triggers
+uint64_t last_timer_delete_time = 0;
+
+int timer_graceperiod_elapsed(void)
+{
+    return jl_hrtime() > (last_timer_delete_time + 2e9);
+}
+
 #if defined(HAVE_TIMER)
 // Linux-style
 #include <time.h>
@@ -426,26 +528,32 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
     sigprof.sigev_notify = SIGEV_SIGNAL;
     sigprof.sigev_signo = SIGUSR1;
     sigprof.sigev_value.sival_ptr = &timerprof;
-    if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1)
+    // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
+    running = 1;
+    if (timer_create(CLOCK_REALTIME, &sigprof, &timerprof) == -1) {
+        running = 0;
         return -2;
+    }
 
     // Start the timer
-    itsprof.it_interval.tv_sec = nsecprof/GIGA;
-    itsprof.it_interval.tv_nsec = nsecprof%GIGA;
-    itsprof.it_value.tv_sec = nsecprof/GIGA;
-    itsprof.it_value.tv_nsec = nsecprof%GIGA;
-    if (timer_settime(timerprof, 0, &itsprof, NULL) == -1)
+    itsprof.it_interval.tv_sec = 0;
+    itsprof.it_interval.tv_nsec = 0;
+    itsprof.it_value.tv_sec = nsecprof / GIGA;
+    itsprof.it_value.tv_nsec = nsecprof % GIGA;
+    if (timer_settime(timerprof, 0, &itsprof, NULL) == -1) {
+        running = 0;
         return -3;
-
-    running = 1;
+    }
     return 0;
 }
 
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
-    if (running)
+    if (running) {
         timer_delete(timerprof);
-    running = 0;
+        last_timer_delete_time = jl_hrtime();
+        running = 0;
+    }
 }
 
 #elif defined(HAVE_ITIMER)
@@ -456,15 +564,16 @@ struct itimerval timerprof;
 
 JL_DLLEXPORT int jl_profile_start_timer(void)
 {
-    timerprof.it_interval.tv_sec = nsecprof/GIGA;
-    timerprof.it_interval.tv_usec = (nsecprof%GIGA)/1000;
-    timerprof.it_value.tv_sec = nsecprof/GIGA;
-    timerprof.it_value.tv_usec = (nsecprof%GIGA)/1000;
-    if (setitimer(ITIMER_PROF, &timerprof, 0) == -1)
-        return -3;
-
+    timerprof.it_interval.tv_sec = 0;
+    timerprof.it_interval.tv_usec = 0;
+    timerprof.it_value.tv_sec = nsecprof / GIGA;
+    timerprof.it_value.tv_usec = ((nsecprof % GIGA) + 999) / 1000;
+    // Because SIGUSR1 is multipurpose, set `running` before so that we know that the first SIGUSR1 came from the timer
     running = 1;
-
+    if (setitimer(ITIMER_PROF, &timerprof, NULL) == -1) {
+        running = 0;
+        return -3;
+    }
     return 0;
 }
 
@@ -472,9 +581,10 @@ JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
     if (running) {
         memset(&timerprof, 0, sizeof(timerprof));
-        setitimer(ITIMER_PROF, &timerprof, 0);
+        setitimer(ITIMER_PROF, &timerprof, NULL);
+        last_timer_delete_time = jl_hrtime();
+        running = 0;
     }
-    running = 0;
 }
 
 #else
@@ -484,43 +594,42 @@ JL_DLLEXPORT void jl_profile_stop_timer(void)
 #endif
 #endif // HAVE_MACH
 
-static void *alloc_sigstack(size_t size)
+static void allocate_segv_handler(void)
 {
-    size_t pagesz = jl_getpagesize();
-    // Add one guard page to catch stack overflow in the signal handler
-    size = LLT_ALIGN(size, pagesz) + pagesz;
-    void *stackbuff = mmap(0, size, PROT_READ | PROT_WRITE,
-                           MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
-    if (stackbuff == MAP_FAILED)
-        jl_errorf("fatal error allocating signal stack: mmap: %s",
-                  strerror(errno));
-    mprotect(stackbuff, pagesz, PROT_NONE);
-    return (void*)((char*)stackbuff + pagesz);
+    struct sigaction act;
+    memset(&act, 0, sizeof(struct sigaction));
+    sigemptyset(&act.sa_mask);
+    act.sa_sigaction = segv_handler;
+    act.sa_flags = SA_ONSTACK | SA_SIGINFO;
+    if (sigaction(SIGSEGV, &act, NULL) < 0) {
+        jl_errorf("fatal error: sigaction: %s", strerror(errno));
+    }
+    // On AArch64, stack overflow triggers a SIGBUS
+    if (sigaction(SIGBUS, &act, NULL) < 0) {
+        jl_errorf("fatal error: sigaction: %s", strerror(errno));
+    }
+}
+
+static void *alloc_sigstack(size_t *ssize)
+{
+    void *stk = jl_malloc_stack(ssize, NULL);
+    if (stk == MAP_FAILED)
+        jl_errorf("fatal error allocating signal stack: mmap: %s", strerror(errno));
+    return stk;
 }
 
 void jl_install_thread_signal_handler(jl_ptls_t ptls)
 {
-    void *signal_stack = alloc_sigstack(sig_stack_size);
+    size_t ssize = sig_stack_size;
+    void *signal_stack = alloc_sigstack(&ssize);
+    ptls->signal_stack = signal_stack;
     stack_t ss;
     ss.ss_flags = 0;
-    ss.ss_size = sig_stack_size - 16;
+    ss.ss_size = ssize - 16;
     ss.ss_sp = signal_stack;
     if (sigaltstack(&ss, NULL) < 0) {
         jl_errorf("fatal error: sigaltstack: %s", strerror(errno));
     }
-
-#if !defined(HAVE_MACH)
-    struct sigaction act;
-    memset(&act, 0, sizeof(struct sigaction));
-    sigemptyset(&act.sa_mask);
-    act.sa_sigaction = usr2_handler;
-    act.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART;
-    if (sigaction(SIGUSR2, &act, NULL) < 0) {
-        jl_errorf("fatal error: sigaction: %s", strerror(errno));
-    }
-#endif
-
-    ptls->signal_stack = signal_stack;
 }
 
 static void jl_sigsetset(sigset_t *sset)
@@ -559,6 +668,18 @@ static void kqueue_signal(int *sigqueue, struct kevent *ev, int sig)
 }
 #endif
 
+void trigger_profile_peek(void)
+{
+    jl_safe_printf("\n======================================================================================\n");
+    jl_safe_printf("Information request received. A stacktrace will print followed by a %.1f second profile\n", profile_peek_duration);
+    jl_safe_printf("======================================================================================\n");
+    bt_size_cur = 0; // clear profile buffer
+    if (jl_profile_start_timer() < 0)
+        jl_safe_printf("ERROR: Could not start profile timer\n");
+    else
+        profile_autostop_time = jl_hrtime() + (profile_peek_duration * 1e9);
+}
+
 static void *signal_listener(void *arg)
 {
     static jl_bt_element_t bt_data[JL_MAX_BT_SIZE + 1];
@@ -566,6 +687,9 @@ static void *signal_listener(void *arg)
     sigset_t sset;
     int sig, critical, profile;
     jl_sigsetset(&sset);
+#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L
+    siginfo_t info;
+#endif
 #ifdef HAVE_KEVENT
     struct kevent ev;
     int sigqueue = kqueue();
@@ -610,7 +734,6 @@ static void *signal_listener(void *arg)
         else
 #endif
 #if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L
-        siginfo_t info;
         sig = sigwaitinfo(&sset, &info);
 #else
         if (sigwait(&sset, &sig))
@@ -621,10 +744,11 @@ static void *signal_listener(void *arg)
                 continue;
             sig = SIGABRT; // this branch can't occur, unless we had stack memory corruption of sset
         }
+        profile = 0;
 #ifndef HAVE_MACH
 #if defined(HAVE_TIMER)
         profile = (sig == SIGUSR1);
-#if _POSIX_C_SOURCE >= 199309L
+#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309L
         if (profile && !(info.si_code == SI_TIMER &&
 	            info.si_value.sival_ptr == &timerprof))
             profile = 0;
@@ -661,11 +785,17 @@ static void *signal_listener(void *arg)
 
         int doexit = critical;
 #ifdef SIGINFO
-        if (sig == SIGINFO)
+        if (sig == SIGINFO) {
+            if (running != 1)
+                trigger_profile_peek();
             doexit = 0;
+        }
 #else
-        if (sig == SIGUSR1)
+        if (sig == SIGUSR1) {
+            if (running != 1 && timer_graceperiod_elapsed())
+                trigger_profile_peek();
             doexit = 0;
+        }
 #endif
 
         bt_size = 0;
@@ -673,63 +803,108 @@ static void *signal_listener(void *arg)
         unw_context_t *signal_context;
         // sample each thread, round-robin style in reverse order
         // (so that thread zero gets notified last)
-        if (critical || profile)
+        if (critical || profile) {
             jl_lock_profile();
-        for (int i = jl_n_threads; i-- > 0; ) {
-            // notify thread to stop
-            jl_thread_suspend_and_get_state(i, &signal_context);
-
-            // do backtrace on thread contexts for critical signals
-            // this part must be signal-handler safe
-            if (critical) {
-                bt_size += rec_backtrace_ctx(bt_data + bt_size,
-                        JL_MAX_BT_SIZE / jl_n_threads - 1,
-                        signal_context, NULL);
-                bt_data[bt_size++].uintptr = 0;
-            }
-
-            // do backtrace for profiler
-            if (profile && running) {
-                if (bt_size_cur < bt_size_max - 1) {
-                    // unwinding can fail, so keep track of the current state
-                    // and restore from the SEGV handler if anything happens.
-                    jl_ptls_t ptls = jl_get_ptls_states();
-                    jl_jmp_buf *old_buf = ptls->safe_restore;
-                    jl_jmp_buf buf;
-
-                    ptls->safe_restore = &buf;
-                    if (jl_setjmp(buf, 0)) {
-                        jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
-                    } else {
-                        // Get backtrace data
-                        bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
-                                bt_size_max - bt_size_cur - 1, signal_context, NULL);
-                    }
-                    ptls->safe_restore = old_buf;
+            if (!critical)
+                jl_shuffle_int_array_inplace(profile_round_robin_thread_order, jl_n_threads, &profile_cong_rng_seed);
+            for (int idx = jl_n_threads; idx-- > 0; ) {
+                // Stop the threads in the random round-robin order.
+                int i = critical ? idx : profile_round_robin_thread_order[idx];
+                // notify thread to stop
+                jl_thread_suspend_and_get_state(i, &signal_context);
+                if (signal_context == NULL)
+                    continue;
 
-                    // Mark the end of this block with 0
-                    bt_data_prof[bt_size_cur++].uintptr = 0;
+                // do backtrace on thread contexts for critical signals
+                // this part must be signal-handler safe
+                if (critical) {
+                    bt_size += rec_backtrace_ctx(bt_data + bt_size,
+                            JL_MAX_BT_SIZE / jl_n_threads - 1,
+                            signal_context, NULL);
+                    bt_data[bt_size++].uintptr = 0;
                 }
-                if (bt_size_cur >= bt_size_max - 1) {
-                    // Buffer full: Delete the timer
-                    jl_profile_stop_timer();
+
+                // do backtrace for profiler
+                if (profile && running) {
+                    if (jl_profile_is_buffer_full()) {
+                        // Buffer full: Delete the timer
+                        jl_profile_stop_timer();
+                    }
+                    else {
+                        // unwinding can fail, so keep track of the current state
+                        // and restore from the SEGV handler if anything happens.
+                        jl_jmp_buf *old_buf = jl_get_safe_restore();
+                        jl_jmp_buf buf;
+
+                        jl_set_safe_restore(&buf);
+                        if (jl_setjmp(buf, 0)) {
+                            jl_safe_printf("WARNING: profiler attempt to access an invalid memory location\n");
+                        } else {
+                            // Get backtrace data
+                            bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
+                                    bt_size_max - bt_size_cur - 1, signal_context, NULL);
+                        }
+                        jl_set_safe_restore(old_buf);
+
+                        jl_ptls_t ptls2 = jl_all_tls_states[i];
+
+                        // store threadid but add 1 as 0 is preserved to indicate end of block
+                        bt_data_prof[bt_size_cur++].uintptr = ptls2->tid + 1;
+
+                        // store task id
+                        bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls2->current_task);
+
+                        // store cpu cycle clock
+                        bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                        // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                        bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls2->sleep_check_state) + 1;
+
+                        // Mark the end of this block with two 0's
+                        bt_data_prof[bt_size_cur++].uintptr = 0;
+                        bt_data_prof[bt_size_cur++].uintptr = 0;
+                    }
                 }
-            }
 
-            // notify thread to resume
-            jl_thread_resume(i, sig);
-        }
-        if (critical || profile)
+                // notify thread to resume
+                jl_thread_resume(i, sig);
+            }
             jl_unlock_profile();
+        }
+#ifndef HAVE_MACH
+        if (profile && running) {
+            jl_check_profile_autostop();
+#if defined(HAVE_TIMER)
+            timer_settime(timerprof, 0, &itsprof, NULL);
+#elif defined(HAVE_ITIMER)
+            setitimer(ITIMER_PROF, &timerprof, NULL);
+#endif
+        }
+#endif
 #endif
 
         // this part is async with the running of the rest of the program
         // and must be thread-safe, but not necessarily signal-handler safe
         if (critical) {
-            jl_critical_error(sig, NULL, bt_data, &bt_size);
             if (doexit) {
                 thread0_exit_count++;
-                jl_exit_thread0(128 + sig);
+                jl_exit_thread0(128 + sig, bt_data, bt_size);
+            }
+            else {
+#ifndef SIGINFO // SIGINFO already prints this automatically
+                int nrunning = 0;
+                for (int idx = jl_n_threads; idx-- > 0; ) {
+                    jl_ptls_t ptls2 = jl_all_tls_states[idx];
+                    nrunning += !jl_atomic_load_relaxed(&ptls2->sleep_check_state);
+                }
+                jl_safe_printf("\ncmd: %s %d running %d of %d\n", jl_options.julia_bin ? jl_options.julia_bin : "julia", uv_os_getpid(), nrunning, jl_n_threads);
+#endif
+
+                jl_safe_printf("\nsignal (%d): %s\n", sig, strsignal(sig));
+                size_t i;
+                for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
+                    jl_print_bt_entry_codeloc(bt_data + i);
+                }
             }
         }
     }
@@ -761,8 +936,15 @@ void restore_signals(void)
 static void fpe_handler(int sig, siginfo_t *info, void *context)
 {
     (void)info;
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_throw_in_ctx(ptls, jl_diverror_exception, sig, context);
+    if (jl_get_safe_restore()) { // restarting jl_ or profile
+        jl_call_in_ctx(NULL, &jl_sig_throw, sig, context);
+        return;
+    }
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL) // exception on foreign thread is fatal
+        sigdie_handler(sig, info, context);
+    else
+        jl_throw_in_ctx(ct, jl_diverror_exception, sig, context);
 }
 
 static void sigint_handler(int sig)
@@ -776,7 +958,7 @@ void jl_install_default_signal_handlers(void)
     memset(&actf, 0, sizeof(struct sigaction));
     sigemptyset(&actf.sa_mask);
     actf.sa_sigaction = fpe_handler;
-    actf.sa_flags = SA_SIGINFO;
+    actf.sa_flags = SA_ONSTACK | SA_SIGINFO;
     if (sigaction(SIGFPE, &actf, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
@@ -795,13 +977,26 @@ void jl_install_default_signal_handlers(void)
         jl_error("fatal error: Couldn't set SIGTRAP");
     }
 
+#if defined(HAVE_MACH)
+    allocate_mach_handler();
+#else
+    struct sigaction act;
+    memset(&act, 0, sizeof(struct sigaction));
+    sigemptyset(&act.sa_mask);
+    act.sa_sigaction = usr2_handler;
+    act.sa_flags = SA_ONSTACK | SA_SIGINFO | SA_RESTART;
+    if (sigaction(SIGUSR2, &act, NULL) < 0) {
+        jl_errorf("fatal error: sigaction: %s", strerror(errno));
+    }
+#endif
+
     allocate_segv_handler();
 
     struct sigaction act_die;
     memset(&act_die, 0, sizeof(struct sigaction));
     sigemptyset(&act_die.sa_mask);
     act_die.sa_sigaction = sigdie_handler;
-    act_die.sa_flags = SA_SIGINFO;
+    act_die.sa_flags = SA_SIGINFO | SA_RESETHAND;
     if (sigaction(SIGILL, &act_die, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
@@ -812,7 +1007,7 @@ void jl_install_default_signal_handlers(void)
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
     }
     // need to ensure the following signals are not SIG_IGN, even though they will be blocked
-    act_die.sa_flags = SA_SIGINFO | SA_RESTART;
+    act_die.sa_flags = SA_SIGINFO | SA_RESTART | SA_RESETHAND;
 #if defined(HAVE_ITIMER)
     if (sigaction(SIGPROF, &act_die, NULL) < 0) {
         jl_errorf("fatal error: sigaction: %s", strerror(errno));
diff --git a/src/signals-win.c b/src/signals-win.c
index 9ea8a38813ff3b..178a7463b8d502 100644
--- a/src/signals-win.c
+++ b/src/signals-win.c
@@ -1,6 +1,8 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
 // Windows
+// Note that this file is `#include`d by "signal-handling.c"
+#include <mmsystem.h> // hidden by LEAN_AND_MEAN
 
 #define sig_stack_size 131072 // 128k reserved for SEGV handling
 
@@ -42,11 +44,11 @@ static char *strsignal(int sig)
 
 static void jl_try_throw_sigint(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_safepoint_enable_sigint();
     jl_wake_libuv();
     int force = jl_check_force_sigint();
-    if (force || (!ptls->defer_signal && ptls->io_wait)) {
+    if (force || (!ct->ptls->defer_signal && ct->ptls->io_wait)) {
         jl_safepoint_consume_sigint();
         if (force)
             jl_safe_printf("WARNING: Force throwing a SIGINT\n");
@@ -58,7 +60,6 @@ static void jl_try_throw_sigint(void)
 
 void __cdecl crt_sig_handler(int sig, int num)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
     CONTEXT Context;
     switch (sig) {
     case SIGFPE:
@@ -85,29 +86,31 @@ void __cdecl crt_sig_handler(int sig, int num)
         }
         break;
     default: // SIGSEGV, (SSIGTERM, IGILL)
-        if (ptls->safe_restore)
+        if (jl_get_safe_restore())
             jl_rethrow();
         memset(&Context, 0, sizeof(Context));
         RtlCaptureContext(&Context);
         if (sig == SIGILL)
             jl_show_sigill(&Context);
-        jl_critical_error(sig, &Context, ptls->bt_data, &ptls->bt_size);
+        jl_critical_error(sig, &Context, jl_get_current_task());
         raise(sig);
     }
 }
 
 // StackOverflowException needs extra stack space to record the backtrace
 // so we keep one around, shared by all threads
-static jl_mutex_t backtrace_lock;
-static jl_ucontext_t collect_backtrace_fiber;
-static jl_ucontext_t error_return_fiber;
+static uv_mutex_t backtrace_lock;
+static win32_ucontext_t collect_backtrace_fiber;
+static win32_ucontext_t error_return_fiber;
 static PCONTEXT stkerror_ctx;
 static jl_ptls_t stkerror_ptls;
 static int have_backtrace_fiber;
 static void JL_NORETURN start_backtrace_fiber(void)
 {
     // collect the backtrace
-    stkerror_ptls->bt_size = rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx, stkerror_ptls->pgcstack);
+    stkerror_ptls->bt_size =
+        rec_backtrace_ctx(stkerror_ptls->bt_data, JL_MAX_BT_SIZE, stkerror_ctx,
+                          NULL /*current_task?*/);
     // switch back to the execution fiber
     jl_setcontext(&error_return_fiber);
     abort();
@@ -121,7 +124,8 @@ void restore_signals(void)
 
 void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
 #if defined(_CPU_X86_64_)
     DWORD64 Rsp = (ctxThread->Rsp & (DWORD64)-16) - 8;
 #elif defined(_CPU_X86_)
@@ -129,18 +133,19 @@ void jl_throw_in_ctx(jl_value_t *excpt, PCONTEXT ctxThread)
 #else
 #error WIN16 not supported :P
 #endif
-    if (!ptls->safe_restore) {
+    if (!jl_get_safe_restore()) {
         assert(excpt != NULL);
         ptls->bt_size = 0;
         if (excpt != jl_stackovf_exception) {
-            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread, ptls->pgcstack);
+            ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, ctxThread,
+                                              ct->gcstack);
         }
         else if (have_backtrace_fiber) {
-            JL_LOCK(&backtrace_lock);
+            uv_mutex_lock(&backtrace_lock);
             stkerror_ctx = ctxThread;
             stkerror_ptls = ptls;
             jl_swapcontext(&error_return_fiber, &collect_backtrace_fiber);
-            JL_UNLOCK_NOGC(&backtrace_lock);
+            uv_mutex_unlock(&backtrace_lock);
         }
         ptls->sig_exception = excpt;
     }
@@ -221,7 +226,8 @@ static BOOL WINAPI sigint_handler(DWORD wsig) //This needs winapi types to guara
 
 LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     if (ExceptionInfo->ExceptionRecord->ExceptionFlags == 0) {
         switch (ExceptionInfo->ExceptionRecord->ExceptionCode) {
             case EXCEPTION_INT_DIVIDE_BY_ZERO:
@@ -247,7 +253,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
                     }
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
-                if (ptls->safe_restore) {
+                if (jl_get_safe_restore()) {
                     jl_throw_in_ctx(NULL, ExceptionInfo->ContextRecord);
                     return EXCEPTION_CONTINUE_EXECUTION;
                 }
@@ -308,8 +314,7 @@ LONG WINAPI jl_exception_handler(struct _EXCEPTION_POINTERS *ExceptionInfo)
         jl_safe_printf(" at 0x%Ix -- ", (size_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
         jl_print_native_codeloc((uintptr_t)ExceptionInfo->ExceptionRecord->ExceptionAddress);
 
-        jl_critical_error(0, ExceptionInfo->ContextRecord,
-                          ptls->bt_data, &ptls->bt_size);
+        jl_critical_error(0, ExceptionInfo->ContextRecord, ct);
         static int recursion = 0;
         if (recursion++)
             exit(1);
@@ -324,72 +329,89 @@ JL_DLLEXPORT void jl_install_sigint_handler(void)
     SetConsoleCtrlHandler((PHANDLER_ROUTINE)sigint_handler,1);
 }
 
-volatile HANDLE hBtThread = 0;
+static volatile HANDLE hBtThread = 0;
+
 static DWORD WINAPI profile_bt( LPVOID lparam )
 {
-    // Note: illegal to use jl_* functions from this thread
-
-    TIMECAPS tc;
-    if (MMSYSERR_NOERROR != timeGetDevCaps(&tc, sizeof(tc))) {
-        fputs("failed to get timer resolution", stderr);
-        hBtThread = 0;
-        return 0;
-    }
-    timeBeginPeriod(tc.wPeriodMin);
+    // Note: illegal to use jl_* functions from this thread except for profiling-specific functions
     while (1) {
-        DWORD timeout = nsecprof / GIGA;
-        timeout += tc.wPeriodMin;
-        Sleep(timeout);
-        if (bt_size_cur < bt_size_max && running) {
-            JL_LOCK_NOGC(&jl_in_stackwalk);
-            jl_lock_profile();
-            if ((DWORD)-1 == SuspendThread(hMainThread)) {
-                fputs("failed to suspend main thread. aborting profiling.", stderr);
-                break;
+        DWORD timeout_ms = nsecprof / (GIGA / 1000);
+        Sleep(timeout_ms > 0 ? timeout_ms : 1);
+        if (running) {
+            if (jl_profile_is_buffer_full()) {
+                jl_profile_stop_timer(); // does not change the thread state
+                SuspendThread(GetCurrentThread());
+                continue;
             }
-            if (running) {
+            else {
+                uv_mutex_lock(&jl_in_stackwalk);
+                jl_lock_profile();
+                if ((DWORD)-1 == SuspendThread(hMainThread)) {
+                    fputs("failed to suspend main thread. aborting profiling.", stderr);
+                    break;
+                }
                 CONTEXT ctxThread;
                 memset(&ctxThread, 0, sizeof(CONTEXT));
                 ctxThread.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER;
                 if (!GetThreadContext(hMainThread, &ctxThread)) {
                     fputs("failed to get context from main thread. aborting profiling.", stderr);
-                    running = 0;
+                    jl_profile_stop_timer();
                 }
                 else {
                     // Get backtrace data
                     bt_size_cur += rec_backtrace_ctx((jl_bt_element_t*)bt_data_prof + bt_size_cur,
                             bt_size_max - bt_size_cur - 1, &ctxThread, NULL);
-                    // Mark the end of this block with 0
-                    if (bt_size_cur < bt_size_max)
-                        bt_data_prof[bt_size_cur++].uintptr = 0;
+
+                    jl_ptls_t ptls = jl_all_tls_states[0]; // given only profiling hMainThread
+
+                    // store threadid but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = ptls->tid + 1;
+
+                    // store task id
+                    bt_data_prof[bt_size_cur++].jlvalue = (jl_value_t*)jl_atomic_load_relaxed(&ptls->current_task);
+
+                    // store cpu cycle clock
+                    bt_data_prof[bt_size_cur++].uintptr = cycleclock();
+
+                    // store whether thread is sleeping but add 1 as 0 is preserved to indicate end of block
+                    bt_data_prof[bt_size_cur++].uintptr = jl_atomic_load_relaxed(&ptls->sleep_check_state) + 1;
+
+                    // Mark the end of this block with two 0's
+                    bt_data_prof[bt_size_cur++].uintptr = 0;
+                    bt_data_prof[bt_size_cur++].uintptr = 0;
                 }
+                jl_unlock_profile();
+                uv_mutex_unlock(&jl_in_stackwalk);
+                if ((DWORD)-1 == ResumeThread(hMainThread)) {
+                    jl_profile_stop_timer();
+                    fputs("failed to resume main thread! aborting.", stderr);
+                    jl_gc_debug_critical_error();
+                    abort();
+                }
+                jl_check_profile_autostop();
             }
-            jl_unlock_profile();
-            JL_UNLOCK_NOGC(&jl_in_stackwalk);
-            if ((DWORD)-1 == ResumeThread(hMainThread)) {
-                timeEndPeriod(tc.wPeriodMin);
-                fputs("failed to resume main thread! aborting.", stderr);
-                gc_debug_critical_error();
-                abort();
-            }
-        }
-        else {
-            timeEndPeriod(tc.wPeriodMin);
-            SuspendThread(GetCurrentThread());
-            timeBeginPeriod(tc.wPeriodMin);
         }
     }
     jl_unlock_profile();
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
-    timeEndPeriod(tc.wPeriodMin);
+    uv_mutex_unlock(&jl_in_stackwalk);
+    jl_profile_stop_timer();
     hBtThread = 0;
     return 0;
 }
 
+static volatile TIMECAPS timecaps;
+
 JL_DLLEXPORT int jl_profile_start_timer(void)
 {
-    running = 1;
-    if (hBtThread == 0) {
+    if (hBtThread == NULL) {
+
+        TIMECAPS _timecaps;
+        if (MMSYSERR_NOERROR != timeGetDevCaps(&_timecaps, sizeof(_timecaps))) {
+            fputs("failed to get timer resolution", stderr);
+            return -2;
+        }
+        timecaps = _timecaps;
+
         hBtThread = CreateThread(
             NULL,                   // default security attributes
             0,                      // use default stack size
@@ -397,6 +419,8 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
             0,                      // argument to thread function
             0,                      // use default creation flags
             0);                     // returns the thread identifier
+        if (hBtThread == NULL)
+            return -1;
         (void)SetThreadPriority(hBtThread, THREAD_PRIORITY_ABOVE_NORMAL);
     }
     else {
@@ -405,10 +429,19 @@ JL_DLLEXPORT int jl_profile_start_timer(void)
             return -2;
         }
     }
-    return (hBtThread != NULL ? 0 : -1);
+    if (running == 0) {
+        // Failure to change the timer resolution is not fatal. However, it is important to
+        // ensure that the timeBeginPeriod/timeEndPeriod is paired.
+        if (TIMERR_NOERROR != timeBeginPeriod(timecaps.wPeriodMin))
+            timecaps.wPeriodMin = 0;
+    }
+    running = 1; // set `running` finally
+    return 0;
 }
 JL_DLLEXPORT void jl_profile_stop_timer(void)
 {
+    if (running && timecaps.wPeriodMin)
+        timeEndPeriod(timecaps.wPeriodMin);
     running = 0;
 }
 
@@ -442,6 +475,6 @@ void jl_install_thread_signal_handler(jl_ptls_t ptls)
     collect_backtrace_fiber.uc_stack.ss_sp = (void*)stk;
     collect_backtrace_fiber.uc_stack.ss_size = ssize;
     jl_makecontext(&collect_backtrace_fiber, start_backtrace_fiber);
-    JL_MUTEX_INIT(&backtrace_lock);
+    uv_mutex_init(&backtrace_lock);
     have_backtrace_fiber = 1;
 }
diff --git a/src/simplevector.c b/src/simplevector.c
index 41b1be14da7f4c..988cf18ccc9b64 100644
--- a/src/simplevector.c
+++ b/src/simplevector.c
@@ -7,13 +7,13 @@
 #include "julia_internal.h"
 #include "julia_assert.h"
 
-JL_DLLEXPORT jl_svec_t *(jl_svec)(size_t n, ...)
+JL_DLLEXPORT jl_svec_t *(ijl_svec)(size_t n, ...)
 {
     va_list args;
     if (n == 0) return jl_emptysvec;
     va_start(args, n);
     jl_svec_t *jv = jl_alloc_svec_uninit(n);
-    for(size_t i=0; i < n; i++)
+    for (size_t i = 0; i < n; i++)
         jl_svecset(jv, i, va_arg(args, jl_value_t*));
     va_end(args);
     return jv;
@@ -34,30 +34,30 @@ jl_svec_t *(jl_perm_symsvec)(size_t n, ...)
 
 JL_DLLEXPORT jl_svec_t *jl_svec1(void *a)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 2,
+    jl_task_t *ct = jl_current_task;
+    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 2,
                                            jl_simplevector_type);
     jl_svec_set_len_unsafe(v, 1);
-    jl_svecset(v, 0, a);
+    jl_svec_data(v)[0] = (jl_value_t*)a;
     return v;
 }
 
 JL_DLLEXPORT jl_svec_t *jl_svec2(void *a, void *b)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ptls, sizeof(void*) * 3,
+    jl_task_t *ct = jl_current_task;
+    jl_svec_t *v = (jl_svec_t*)jl_gc_alloc(ct->ptls, sizeof(void*) * 3,
                                            jl_simplevector_type);
     jl_svec_set_len_unsafe(v, 2);
-    jl_svecset(v, 0, a);
-    jl_svecset(v, 1, b);
+    jl_svec_data(v)[0] = (jl_value_t*)a;
+    jl_svec_data(v)[1] = (jl_value_t*)b;
     return v;
 }
 
 JL_DLLEXPORT jl_svec_t *jl_alloc_svec_uninit(size_t n)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (n == 0) return jl_emptysvec;
-    jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ptls, (n + 1) * sizeof(void*),
+    jl_svec_t *jv = (jl_svec_t*)jl_gc_alloc(ct->ptls, (n + 1) * sizeof(void*),
                                             jl_simplevector_type);
     jl_svec_set_len_unsafe(jv, n);
     return jv;
@@ -67,26 +67,24 @@ JL_DLLEXPORT jl_svec_t *jl_alloc_svec(size_t n)
 {
     if (n == 0) return jl_emptysvec;
     jl_svec_t *jv = jl_alloc_svec_uninit(n);
-    for(size_t i=0; i < n; i++)
-        jl_svecset(jv, i, NULL);
+    memset(jl_assume_aligned(jl_svec_data(jv), sizeof(void*)), 0, n * sizeof(void*));
     return jv;
 }
 
 JL_DLLEXPORT jl_svec_t *jl_svec_copy(jl_svec_t *a)
 {
-    size_t i, n=jl_svec_len(a);
+    size_t n = jl_svec_len(a);
     jl_svec_t *c = jl_alloc_svec_uninit(n);
-    for(i=0; i < n; i++)
-        jl_svecset(c, i, jl_svecref(a,i));
+    memmove_refs((void**)jl_svec_data(c), (void**)jl_svec_data(a), n);
     return c;
 }
 
 JL_DLLEXPORT jl_svec_t *jl_svec_fill(size_t n, jl_value_t *x)
 {
-    if (n==0) return jl_emptysvec;
+    if (n == 0) return jl_emptysvec;
     jl_svec_t *v = jl_alloc_svec_uninit(n);
-    for(size_t i=0; i < n; i++)
-        jl_svecset(v, i, x);
+    for (size_t i = 0; i < n; i++)
+        jl_svec_data(v)[i] = x;
     return v;
 }
 
diff --git a/src/smallintset.c b/src/smallintset.c
index 7598d8fd85ce4f..54fdad616a7581 100644
--- a/src/smallintset.c
+++ b/src/smallintset.c
@@ -130,14 +130,16 @@ static int smallintset_insert_(jl_array_t *a, uint_t hv, size_t val1)
     return 0;
 }
 
-static void smallintset_rehash(jl_array_t **cache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
+static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np);
 
-void jl_smallintset_insert(jl_array_t **cache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
+void jl_smallintset_insert(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, size_t val, jl_svec_t *data)
 {
-    if (val + 1 >  jl_max_int(*cache))
-        smallintset_rehash(cache, parent, hash, data, jl_array_len(*cache), val + 1);
+    jl_array_t *a = jl_atomic_load_relaxed(pcache);
+    if (val + 1 >  jl_max_int(a))
+        smallintset_rehash(pcache, parent, hash, data, jl_array_len(a), val + 1);
     while (1) {
-        if (smallintset_insert_(*cache, hash(val, data), val + 1))
+        a = jl_atomic_load_relaxed(pcache);
+        if (smallintset_insert_(a, hash(val, data), val + 1))
             return;
 
         /* table full */
@@ -145,20 +147,21 @@ void jl_smallintset_insert(jl_array_t **cache, jl_value_t *parent, smallintset_h
         /* it's important to grow the table really fast; otherwise we waste */
         /* lots of time rehashing all the keys over and over. */
         size_t newsz;
-        size_t sz = jl_array_len(*cache);
+        a = jl_atomic_load_relaxed(pcache);
+        size_t sz = jl_array_len(a);
         if (sz < HT_N_INLINE)
             newsz = HT_N_INLINE;
         else if (sz >= (1 << 19) || (sz <= (1 << 8)))
             newsz = sz << 1;
         else
             newsz = sz << 2;
-        smallintset_rehash(cache, parent, hash, data, newsz, 0);
+        smallintset_rehash(pcache, parent, hash, data, newsz, 0);
     }
 }
 
-static void smallintset_rehash(jl_array_t **cache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
+static void smallintset_rehash(_Atomic(jl_array_t*) *pcache, jl_value_t *parent, smallintset_hash hash, jl_svec_t *data, size_t newsz, size_t np)
 {
-    jl_array_t *a = *cache;
+    jl_array_t *a = jl_atomic_load_relaxed(pcache);
     size_t sz = jl_array_len(a);
     size_t i;
     for (i = 0; i < sz; i += 1) {
@@ -179,7 +182,7 @@ static void smallintset_rehash(jl_array_t **cache, jl_value_t *parent, smallints
         }
         JL_GC_POP();
         if (i == sz) {
-            *cache = newa;
+            jl_atomic_store_release(pcache, newa);
             jl_gc_wb(parent, newa);
             return;
         }
diff --git a/src/stackwalk.c b/src/stackwalk.c
index 5ed280c14b1fe0..0574a8393a15ac 100644
--- a/src/stackwalk.c
+++ b/src/stackwalk.c
@@ -13,12 +13,12 @@
 // define `jl_unw_get` as a macro, since (like setjmp)
 // returning from the callee function will invalidate the context
 #ifdef _OS_WINDOWS_
-jl_mutex_t jl_in_stackwalk;
-#define jl_unw_get(context) RtlCaptureContext(context)
+uv_mutex_t jl_in_stackwalk;
+#define jl_unw_get(context) (RtlCaptureContext(context), 0)
 #elif !defined(JL_DISABLE_LIBUNWIND)
 #define jl_unw_get(context) unw_getcontext(context)
 #else
-void jl_unw_get(void *context) {};
+int jl_unw_get(void *context) { return -1; }
 #endif
 
 #ifdef __cplusplus
@@ -26,7 +26,7 @@ extern "C" {
 #endif
 
 static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *context) JL_NOTSAFEPOINT;
-static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp) JL_NOTSAFEPOINT;
+static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *ip, uintptr_t *sp) JL_NOTSAFEPOINT;
 
 static jl_gcframe_t *is_enter_interpreter_frame(jl_gcframe_t **ppgcstack, uintptr_t sp) JL_NOTSAFEPOINT
 {
@@ -54,7 +54,7 @@ static jl_gcframe_t *is_enter_interpreter_frame(jl_gcframe_t **ppgcstack, uintpt
 // the call instruction. The first `skip` frames are not included in `bt_data`.
 //
 // `maxsize` is the size of the buffer `bt_data` (and `sp` if non-NULL). It
-// must be at least JL_BT_MAX_ENTRY_SIZE to accommodate extended backtrace
+// must be at least `JL_BT_MAX_ENTRY_SIZE + 1` to accommodate extended backtrace
 // entries.  If `sp != NULL`, the stack pointer corresponding `bt_data[i]` is
 // stored in `sp[i]`.
 //
@@ -75,7 +75,7 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
     uintptr_t return_ip = 0;
     uintptr_t thesp = 0;
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     if (!from_signal_handler) {
         // Workaround 32-bit windows bug missing top frame
         // See for example https://bugs.chromium.org/p/crashpad/issues/detail?id=53
@@ -83,11 +83,10 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
     }
 #endif
 #if !defined(_OS_WINDOWS_)
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_jmp_buf *old_buf = ptls->safe_restore;
+    jl_jmp_buf *old_buf = jl_get_safe_restore();
     jl_jmp_buf buf;
+    jl_set_safe_restore(&buf);
     if (!jl_setjmp(buf, 0)) {
-        ptls->safe_restore = &buf;
 #endif
         int have_more_frames = 1;
         while (have_more_frames) {
@@ -96,13 +95,25 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
                 need_more_space = 1;
                 break;
             }
-            have_more_frames = jl_unw_step(cursor, &return_ip, &thesp);
+            uintptr_t oldsp = thesp;
+            have_more_frames = jl_unw_step(cursor, from_signal_handler, &return_ip, &thesp);
+            if (oldsp >= thesp && !jl_running_under_rr(0)) {
+                // The stack pointer is clearly bad, as it must grow downwards.
+                // But sometimes the external unwinder doesn't check that.
+                have_more_frames = 0;
+            }
+            if (return_ip == 0) {
+                // The return address is clearly wrong, and while the unwinder
+                // might try to continue (by popping another stack frame), that
+                // likely won't work well, and it'll confuse the stack frame
+                // separator detection logic (double-NULL).
+                have_more_frames = 0;
+            }
             if (skip > 0) {
                 skip--;
+                from_signal_handler = 0;
                 continue;
             }
-            if (sp)
-                sp[n] = thesp;
             // For the purposes of looking up debug info for functions, we want
             // to harvest addresses for the *call* instruction `call_ip` during
             // stack walking.  However, this information isn't directly
@@ -132,12 +143,12 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
             //   which we can get from the return address via `call_ip = return_ip - 1`.
             // * Code which was interrupted asynchronously (eg, via a signal)
             //   is expected to have `call_ip == return_ip`.
-            if (n != 0 || !from_signal_handler) {
-                // normal frame
-                call_ip -= 1;
-            }
-            if (call_ip == JL_BT_NON_PTR_ENTRY) {
+            if (!from_signal_handler)
+                call_ip -= 1; // normal frame
+            from_signal_handler = 0;
+            if (call_ip == JL_BT_NON_PTR_ENTRY || call_ip == 0) {
                 // Never leave special marker in the bt data as it can corrupt the GC.
+                have_more_frames = 0;
                 call_ip = 0;
             }
             jl_bt_element_t *bt_entry = bt_data + n;
@@ -155,6 +166,8 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
                 }
             }
             bt_entry->uintptr = call_ip;
+            if (sp)
+                sp[n] = thesp;
             n++;
         }
         // NOTE: if we have some pgcstack entries remaining (because the
@@ -169,10 +182,10 @@ static int jl_unw_stepn(bt_cursor_t *cursor, jl_bt_element_t *bt_data, size_t *b
         // reader happy.
         if (n > 0) n -= 1;
     }
-    ptls->safe_restore = old_buf;
+    jl_set_safe_restore(old_buf);
 #endif
 #if defined(_OS_WINDOWS_) && !defined(_CPU_X86_64_)
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
 #endif
     *bt_size = n;
     return need_more_space;
@@ -198,7 +211,9 @@ NOINLINE size_t rec_backtrace(jl_bt_element_t *bt_data, size_t maxsize, int skip
 {
     bt_context_t context;
     memset(&context, 0, sizeof(context));
-    jl_unw_get(&context);
+    int r = jl_unw_get(&context);
+    if (r < 0)
+        return 0;
     jl_gcframe_t *pgcstack = jl_pgcstack;
     bt_cursor_t cursor;
     if (!jl_unw_init(&cursor, &context))
@@ -233,9 +248,9 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
     bt_context_t context;
     bt_cursor_t cursor;
     memset(&context, 0, sizeof(context));
-    jl_unw_get(&context);
+    int r = jl_unw_get(&context);
     jl_gcframe_t *pgcstack = jl_pgcstack;
-    if (jl_unw_init(&cursor, &context)) {
+    if (r == 0 && jl_unw_init(&cursor, &context)) {
         // Skip frame for jl_backtrace_from_here itself
         skip += 1;
         size_t offset = 0;
@@ -244,8 +259,8 @@ JL_DLLEXPORT jl_value_t *jl_backtrace_from_here(int returnsp, int skip)
             jl_array_grow_end(ip, maxincr);
             uintptr_t *sp_ptr = NULL;
             if (returnsp) {
-                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
                 jl_array_grow_end(sp, maxincr);
+                sp_ptr = (uintptr_t*)jl_array_data(sp) + offset;
             }
             size_t size_incr = 0;
             have_more_frames = jl_unw_stepn(&cursor, (jl_bt_element_t*)jl_array_data(ip) + offset,
@@ -306,7 +321,7 @@ static void decode_backtrace(jl_bt_element_t *bt_data, size_t bt_size,
 
 JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 {
-    jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
+    jl_excstack_t *s = jl_current_task->excstack;
     jl_bt_element_t *bt_data = NULL;
     size_t bt_size = 0;
     if (s && s->top) {
@@ -327,9 +342,9 @@ JL_DLLEXPORT jl_value_t *jl_get_backtrace(void)
 // interleaved.
 JL_DLLEXPORT jl_value_t *jl_get_excstack(jl_task_t* task, int include_bt, int max_entries)
 {
-    JL_TYPECHK(catch_stack, task, (jl_value_t*)task);
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (task != ptls->current_task && task->_state == JL_TASK_STATE_RUNNABLE) {
+    JL_TYPECHK(current_exceptions, task, (jl_value_t*)task);
+    jl_task_t *ct = jl_current_task;
+    if (task != ct && jl_atomic_load_relaxed(&task->_state) == JL_TASK_STATE_RUNNABLE) {
         jl_error("Inspecting the exception stack of a task which might "
                  "be running concurrently isn't allowed.");
     }
@@ -377,9 +392,9 @@ static PVOID CALLBACK JuliaFunctionTableAccess64(
     PRUNTIME_FUNCTION fn = RtlLookupFunctionEntry(AddrBase, &ImageBase, &HistoryTable);
     if (fn)
         return fn;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     PVOID ftable = SymFunctionTableAccess64(hProcess, AddrBase);
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return ftable;
 #else
     return SymFunctionTableAccess64(hProcess, AddrBase);
@@ -395,9 +410,9 @@ static DWORD64 WINAPI JuliaGetModuleBase64(
     PRUNTIME_FUNCTION fn = RtlLookupFunctionEntry(dwAddr, &ImageBase, &HistoryTable);
     if (fn)
         return ImageBase;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     DWORD64 fbase = SymGetModuleBase64(hProcess, dwAddr);
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return fbase;
 #else
     if (dwAddr == HistoryTable.dwAddr)
@@ -416,7 +431,7 @@ static DWORD64 WINAPI JuliaGetModuleBase64(
 volatile int needsSymRefreshModuleList;
 BOOL (WINAPI *hSymRefreshModuleList)(HANDLE);
 
-void jl_refresh_dbg_module_list(void)
+JL_DLLEXPORT void jl_refresh_dbg_module_list(void)
 {
     if (needsSymRefreshModuleList && hSymRefreshModuleList != NULL) {
         hSymRefreshModuleList(GetCurrentProcess());
@@ -426,7 +441,7 @@ void jl_refresh_dbg_module_list(void)
 static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *Context)
 {
     int result;
-    JL_LOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_lock(&jl_in_stackwalk);
     jl_refresh_dbg_module_list();
 #if !defined(_CPU_X86_64_)
     memset(&cursor->stackframe, 0, sizeof(cursor->stackframe));
@@ -444,7 +459,7 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *Context)
     *cursor = *Context;
     result = 1;
 #endif
-    JL_UNLOCK_NOGC(&jl_in_stackwalk);
+    uv_mutex_unlock(&jl_in_stackwalk);
     return result;
 }
 
@@ -462,7 +477,7 @@ static int readable_pointer(LPCVOID pointer)
     return 1;
 }
 
-static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
+static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *ip, uintptr_t *sp)
 {
     // Might be called from unmanaged thread.
 #ifndef _CPU_X86_64_
@@ -482,7 +497,7 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
 #else
     *ip = (uintptr_t)cursor->Rip;
     *sp = (uintptr_t)cursor->Rsp;
-    if (*ip == 0) {
+    if (*ip == 0 && from_signal_handler) {
         if (!readable_pointer((LPCVOID)*sp))
             return 0;
         cursor->Rip = *(DWORD64*)*sp;      // POP RIP (aka RET)
@@ -490,12 +505,12 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
         return cursor->Rip != 0;
     }
 
-    DWORD64 ImageBase = JuliaGetModuleBase64(GetCurrentProcess(), cursor->Rip);
+    DWORD64 ImageBase = JuliaGetModuleBase64(GetCurrentProcess(), cursor->Rip - !from_signal_handler);
     if (!ImageBase)
         return 0;
 
     PRUNTIME_FUNCTION FunctionEntry = (PRUNTIME_FUNCTION)JuliaFunctionTableAccess64(
-        GetCurrentProcess(), cursor->Rip);
+        GetCurrentProcess(), cursor->Rip - !from_signal_handler);
     if (!FunctionEntry) {
         // Not code or bad unwind?
         return 0;
@@ -525,8 +540,9 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *context)
     return unw_init_local(cursor, context) == 0;
 }
 
-static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
+static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *ip, uintptr_t *sp)
 {
+    (void)from_signal_handler; // libunwind also tracks this
     unw_word_t reg;
     if (unw_get_reg(cursor, UNW_REG_IP, &reg) < 0)
         return 0;
@@ -537,7 +553,7 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
     return unw_step(cursor) > 0;
 }
 
-#ifdef LIBOSXUNWIND
+#ifdef LLVMLIBUNWIND
 NOINLINE size_t rec_backtrace_ctx_dwarf(jl_bt_element_t *bt_data, size_t maxsize,
                                         bt_context_t *context, jl_gcframe_t *pgcstack)
 {
@@ -557,7 +573,7 @@ static int jl_unw_init(bt_cursor_t *cursor, bt_context_t *context)
     return 0;
 }
 
-static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
+static int jl_unw_step(bt_cursor_t *cursor, int from_signal_handler, uintptr_t *ip, uintptr_t *sp)
 {
     return 0;
 }
@@ -565,11 +581,11 @@ static int jl_unw_step(bt_cursor_t *cursor, uintptr_t *ip, uintptr_t *sp)
 
 JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_frame_t *frames = NULL;
-    int8_t gc_state = jl_gc_safe_enter(ptls);
+    int8_t gc_state = jl_gc_safe_enter(ct->ptls);
     int n = jl_getFunctionInfo(&frames, (uintptr_t)ip, skipC, 0);
-    jl_gc_safe_leave(ptls, gc_state);
+    jl_gc_safe_leave(ct->ptls, gc_state);
     jl_value_t *rs = (jl_value_t*)jl_alloc_svec(n);
     JL_GC_PUSH1(&rs);
     for (int i = 0; i < n; i++) {
@@ -579,12 +595,12 @@ JL_DLLEXPORT jl_value_t *jl_lookup_code_address(void *ip, int skipC)
         if (frame.func_name)
             jl_svecset(r, 0, jl_symbol(frame.func_name));
         else
-            jl_svecset(r, 0, empty_sym);
+            jl_svecset(r, 0, jl_empty_sym);
         free(frame.func_name);
         if (frame.file_name)
             jl_svecset(r, 1, jl_symbol(frame.file_name));
         else
-            jl_svecset(r, 1, empty_sym);
+            jl_svecset(r, 1, jl_empty_sym);
         free(frame.file_name);
         jl_svecset(r, 2, jl_box_long(frame.line));
         jl_svecset(r, 3, frame.linfo != NULL ? (jl_value_t*)frame.linfo : jl_nothing);
@@ -681,8 +697,379 @@ void jl_print_bt_entry_codeloc(jl_bt_element_t *bt_entry) JL_NOTSAFEPOINT
     }
 }
 
+
+#ifdef _OS_LINUX_
+#if defined(__GLIBC__) && defined(_CPU_AARCH64_)
+#define LONG_JMP_SP_ENV_SLOT 13
+static uintptr_t julia_longjmp_xor_key;
+// GLIBC mangles the function pointers in jmp_buf (used in {set,long}*jmp
+// functions) by XORing them with a random key.  For AArch64 it is a global
+// variable rather than a TCB one (as for x86_64/powerpc).  We obtain the key by
+// issuing a setjmp and XORing the SP pointer values to derive the key.
+static void JuliaInitializeLongjmpXorKey(void)
+{
+    // 1. Call REAL(setjmp), which stores the mangled SP in env.
+    jmp_buf env;
+    _setjmp(env);
+
+    // 2. Retrieve vanilla/mangled SP.
+    uintptr_t sp;
+    asm("mov  %0, sp" : "=r" (sp));
+    uintptr_t mangled_sp = ((uintptr_t*)&env)[LONG_JMP_SP_ENV_SLOT];
+
+    // 3. xor SPs to obtain key.
+    julia_longjmp_xor_key = mangled_sp ^ sp;
+}
+#endif
+
+JL_UNUSED static uintptr_t ptr_demangle(uintptr_t p)
+{
+#if defined(__GLIBC__)
+#if defined(_CPU_X86_)
+// from https://github.com/bminor/glibc/blame/master/sysdeps/unix/sysv/linux/i386/sysdep.h
+// last changed for GLIBC_2.6 on 2007-02-01
+    asm(" rorl $9, %0\n"
+        " xorl %%gs:0x18, %0"
+        : "=r"(p) : "0"(p) : );
+#elif defined(_CPU_X86_64_)
+// from https://github.com/bminor/glibc/blame/master/sysdeps/unix/sysv/linux/i386/sysdep.h
+    asm(" rorq $17, %0\n"
+        " xorq %%fs:0x30, %0"
+        : "=r"(p) : "0"(p) : );
+#elif defined(_CPU_AARCH64_)
+// from https://github.com/bminor/glibc/blame/master/sysdeps/unix/sysv/linux/aarch64/sysdep.h
+// We need to use a trick like this (from GCC/LLVM TSAN) to get access to it:
+// https://github.com/llvm/llvm-project/commit/daa3ebce283a753f280c549cdb103fbb2972f08e
+    static pthread_once_t once = PTHREAD_ONCE_INIT;
+    pthread_once(&once, &JuliaInitializeLongjmpXorKey);
+    p ^= julia_longjmp_xor_key;
+#elif defined(_CPU_ARM_)
+// from https://github.com/bminor/glibc/blame/master/sysdeps/unix/sysv/linux/arm/sysdep.h
+    ; // nothing to do
+#endif
+#endif
+    return p;
+}
+#endif
+
+// n.b. musl does not mangle pointers, but intentionally makes that impossible
+// to determine (https://www.openwall.com/lists/musl/2013/03/29/13) so we do
+// not support musl here.
+
+// n.b. We have not looked at other libc (e.g. ulibc), though they are probably
+// often compatible with glibc (perhaps with or without pointer mangling).
+
+
+#ifdef _OS_DARWIN_
+// from https://github.com/apple/darwin-xnu/blame/main/libsyscall/os/tsd.h
+#define __TSD_PTR_MUNGE 7
+
+#if defined(__i386__) || defined(__x86_64__)
+
+#if defined(__has_attribute)
+#if __has_attribute(address_space)
+#define OS_GS_RELATIVE  __attribute__((address_space(256)))
+#endif
+#endif
+
+#ifdef OS_GS_RELATIVE
+#define _os_tsd_get_base() ((void * OS_GS_RELATIVE *)0)
+#else
+__attribute__((always_inline))
+static __inline__ void*
+_os_tsd_get_direct(unsigned long slot)
+{
+    void *ret;
+    __asm__("mov %%gs:%1, %0" : "=r" (ret) : "m" (*(void **)(slot * sizeof(void *))));
+    return ret;
+}
+#endif
+
+#elif defined(__arm__) || defined(__arm64__)
+// Unconditionally defined ptrauth_strip (instead of using the ptrauth.h header)
+// since libsystem will likely be compiled with -mbranch-protection, and we currently are not.
+// code from https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/compiler-rt/lib/sanitizer_common/sanitizer_ptrauth.h
+inline uint64_t ptrauth_strip(uint64_t __value, unsigned int __key) {
+  // On the stack the link register is protected with Pointer
+  // Authentication Code when compiled with -mbranch-protection.
+  // Let's strip the PAC unconditionally because xpaclri is in the NOP space,
+  // so will do nothing when it is not enabled or not available.
+  uint64_t ret;
+  asm volatile(
+      "mov x30, %1\n\t"
+      "hint #7\n\t"  // xpaclri
+      "mov %0, x30\n\t"
+      : "=r"(ret)
+      : "r"(__value)
+      : "x30");
+  return ret;
+}
+
+__attribute__((always_inline, pure))
+static __inline__ void**
+_os_tsd_get_base(void)
+{
+#if defined(__arm__)
+    uintptr_t tsd;
+    __asm__("mrc p15, 0, %0, c13, c0, 3\n"
+            "bic %0, %0, #0x3\n" : "=r" (tsd));
+    /* lower 2-bits contain CPU number */
+#elif defined(__arm64__)
+    uint64_t tsd;
+    __asm__("mrs %0, TPIDRRO_EL0\n"
+            "bic %0, %0, #0x7\n" : "=r" (tsd));
+    /* lower 3-bits contain CPU number */
+#endif
+
+    return (void**)(uintptr_t)tsd;
+}
+#define _os_tsd_get_base()  _os_tsd_get_base()
+#endif
+
+#ifdef _os_tsd_get_base
+__attribute__((always_inline))
+static __inline__ void*
+_os_tsd_get_direct(unsigned long slot)
+{
+    return _os_tsd_get_base()[slot];
+}
+#endif
+
+__attribute__((always_inline, pure))
+static __inline__ uintptr_t
+_os_ptr_munge_token(void)
+{
+    return (uintptr_t)_os_tsd_get_direct(__TSD_PTR_MUNGE);
+}
+
+__attribute__((always_inline, pure))
+JL_UNUSED static __inline__ uintptr_t
+_os_ptr_munge(uintptr_t ptr)
+{
+    return ptr ^ _os_ptr_munge_token();
+}
+#define _OS_PTR_UNMUNGE(_ptr) _os_ptr_munge((uintptr_t)(_ptr))
+#endif
+
+
+extern bt_context_t *jl_to_bt_context(void *sigctx);
+
+void jl_rec_backtrace(jl_task_t *t)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    ptls->bt_size = 0;
+    if (t == ct) {
+        ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, 0);
+        return;
+    }
+    if (t->copy_stack || !t->started || t->stkbuf == NULL)
+        return;
+    int16_t old = -1;
+    if (!jl_atomic_cmpswap(&t->tid, &old, ptls->tid) && old != ptls->tid)
+        return;
+    bt_context_t *context = NULL;
+#if defined(_OS_WINDOWS_)
+    bt_context_t c;
+    memset(&c, 0, sizeof(c));
+    _JUMP_BUFFER *mctx = (_JUMP_BUFFER*)&t->ctx.ctx.uc_mcontext;
+#if defined(_CPU_X86_64_)
+    c.Rbx = mctx->Rbx;
+    c.Rsp = mctx->Rsp;
+    c.Rbp = mctx->Rbp;
+    c.Rsi = mctx->Rsi;
+    c.Rdi = mctx->Rdi;
+    c.R12 = mctx->R12;
+    c.R13 = mctx->R13;
+    c.R14 = mctx->R14;
+    c.R15 = mctx->R15;
+    c.Rip = mctx->Rip;
+    memcpy(&c.Xmm6, &mctx->Xmm6, 10 * sizeof(mctx->Xmm6)); // Xmm6-Xmm15
+#else
+    c.Eip = mctx->Eip;
+    c.Esp = mctx->Esp;
+    c.Ebp = mctx->Ebp;
+#endif
+    context = &c;
+#elif defined(JL_HAVE_UNW_CONTEXT)
+    context = &t->ctx.ctx;
+#elif defined(JL_HAVE_UCONTEXT)
+    context = jl_to_bt_context(&t->ctx.ctx);
+#elif defined(JL_HAVE_ASM)
+    bt_context_t c;
+    memset(&c, 0, sizeof(c));
+ #if defined(_OS_LINUX_) && defined(__GLIBC__)
+    __jmp_buf *mctx = &t->ctx.ctx.uc_mcontext->__jmpbuf;
+    mcontext_t *mc = &c.uc_mcontext;
+  #if defined(_CPU_X86_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/i386/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/i386/longjmp.s
+    mc->gregs[REG_EBX] = (*mctx)[0];
+    mc->gregs[REG_ESI] = (*mctx)[1];
+    mc->gregs[REG_EDI] = (*mctx)[2];
+    mc->gregs[REG_EBP] = (*mctx)[3];
+    mc->gregs[REG_ESP] = (*mctx)[4];
+    mc->gregs[REG_EIP] = (*mctx)[5];
+    // ifdef PTR_DEMANGLE ?
+    mc->gregs[REG_ESP] = ptr_demangle(mc->gregs[REG_ESP]);
+    mc->gregs[REG_EIP] = ptr_demangle(mc->gregs[REG_EIP]);
+    context = &c;
+  #elif defined(_CPU_X86_64_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/x86_64/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/x86_64/setjmp.s
+    mc->gregs[REG_RBX] = (*mctx)[0];
+    mc->gregs[REG_RBP] = (*mctx)[1];
+    mc->gregs[REG_R12] = (*mctx)[2];
+    mc->gregs[REG_R13] = (*mctx)[3];
+    mc->gregs[REG_R14] = (*mctx)[4];
+    mc->gregs[REG_R15] = (*mctx)[5];
+    mc->gregs[REG_RSP] = (*mctx)[6];
+    mc->gregs[REG_RIP] = (*mctx)[7];
+    // ifdef PTR_DEMANGLE ?
+    mc->gregs[REG_RBP] = ptr_demangle(mc->gregs[REG_RBP]);
+    mc->gregs[REG_RSP] = ptr_demangle(mc->gregs[REG_RSP]);
+    mc->gregs[REG_RIP] = ptr_demangle(mc->gregs[REG_RIP]);
+    context = &c;
+  #elif defined(_CPU_ARM_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/arm/include/bits/setjmp.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/arm/longjmp.S
+    mc->arm_sp = (*mctx)[0];
+    mc->arm_lr = (*mctx)[1];
+    mc->arm_r4 = (*mctx)[2]; // aka v1
+    mc->arm_r5 = (*mctx)[3]; // aka v2
+    mc->arm_r6 = (*mctx)[4]; // aka v3
+    mc->arm_r7 = (*mctx)[5]; // aka v4
+    mc->arm_r8 = (*mctx)[6]; // aka v5
+    mc->arm_r9 = (*mctx)[7]; // aka v6 aka sb
+    mc->arm_r10 = (*mctx)[8]; // aka v7 aka sl
+    mc->arm_fp = (*mctx)[10]; // aka v8 aka r11
+    // ifdef PTR_DEMANGLE ?
+    mc->arm_sp = ptr_demangle(mc->arm_sp);
+    mc->arm_lr = ptr_demangle(mc->arm_lr);
+    mc->arm_pc = mc->arm_lr;
+    context = &c;
+  #elif defined(_CPU_AARCH64_)
+    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/__longjmp.S
+    // https://github.com/bminor/glibc/blame/master/sysdeps/aarch64/jmpbuf-offsets.h
+    // https://github.com/bminor/musl/blame/master/src/setjmp/aarch64/longjmp.s
+    // https://github.com/libunwind/libunwind/blob/ec171c9ba7ea3abb2a1383cee2988a7abd483a1f/src/aarch64/unwind_i.h#L62
+    unw_fpsimd_context_t *mcfp = (unw_fpsimd_context_t*)&mc->__reserved;
+    mc->regs[19] = (*mctx)[0];
+    mc->regs[20] = (*mctx)[1];
+    mc->regs[21] = (*mctx)[2];
+    mc->regs[22] = (*mctx)[3];
+    mc->regs[23] = (*mctx)[4];
+    mc->regs[24] = (*mctx)[5];
+    mc->regs[25] = (*mctx)[6];
+    mc->regs[26] = (*mctx)[7];
+    mc->regs[27] = (*mctx)[8];
+    mc->regs[28] = (*mctx)[9];
+    mc->regs[29] = (*mctx)[10]; // aka fp
+    mc->regs[30] = (*mctx)[11]; // aka lr
+    // Yes, they did skip 12 why writing the code originally; and, no, I do not know why.
+    mc->sp = (*mctx)[13];
+    mcfp->vregs[7] = (*mctx)[14]; // aka d8
+    mcfp->vregs[8] = (*mctx)[15]; // aka d9
+    mcfp->vregs[9] = (*mctx)[16]; // aka d10
+    mcfp->vregs[10] = (*mctx)[17]; // aka d11
+    mcfp->vregs[11] = (*mctx)[18]; // aka d12
+    mcfp->vregs[12] = (*mctx)[19]; // aka d13
+    mcfp->vregs[13] = (*mctx)[20]; // aka d14
+    mcfp->vregs[14] = (*mctx)[21]; // aka d15
+    // ifdef PTR_DEMANGLE ?
+    mc->sp = ptr_demangle(mc->sp);
+    mc->regs[30] = ptr_demangle(mc->regs[30]);
+    mc->pc = mc->regs[30];
+    context = &c;
+  #else
+   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown linux")
+   (void)mc;
+   (void)c;
+  #endif
+ #elif defined(_OS_DARWIN_)
+    sigjmp_buf *mctx = &t->ctx.ctx.uc_mcontext;
+  #if defined(_CPU_X86_64_)
+    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/x86_64/_setjmp.s
+    x86_thread_state64_t *mc = (x86_thread_state64_t*)&c;
+    mc->__rbx = ((uint64_t*)mctx)[0];
+    mc->__rbp = ((uint64_t*)mctx)[1];
+    mc->__rsp = ((uint64_t*)mctx)[2];
+    mc->__r12 = ((uint64_t*)mctx)[3];
+    mc->__r13 = ((uint64_t*)mctx)[4];
+    mc->__r14 = ((uint64_t*)mctx)[5];
+    mc->__r15 = ((uint64_t*)mctx)[6];
+    mc->__rip = ((uint64_t*)mctx)[7];
+    // added in libsystem_plaform 177.200.16 (macOS Mojave 10.14.3)
+    // prior to that _os_ptr_munge_token was (hopefully) typically 0,
+    // so x ^ 0 == x and this is a no-op
+    mc->__rbp = _OS_PTR_UNMUNGE(mc->__rbp);
+    mc->__rsp = _OS_PTR_UNMUNGE(mc->__rsp);
+    mc->__rip = _OS_PTR_UNMUNGE(mc->__rip);
+    context = &c;
+  #elif defined(_CPU_AARCH64_)
+    // from https://github.com/apple/darwin-libplatform/blob/main/src/setjmp/arm64/setjmp.s
+    // https://github.com/apple/darwin-xnu/blob/main/osfmk/mach/arm/_structs.h
+    // https://github.com/llvm/llvm-project/blob/7714e0317520207572168388f22012dd9e152e9e/libunwind/src/Registers.hpp -> Registers_arm64
+    arm_thread_state64_t *mc = (arm_thread_state64_t*)&c;
+    mc->__x[19] = ((uint64_t*)mctx)[0];
+    mc->__x[20] = ((uint64_t*)mctx)[1];
+    mc->__x[21] = ((uint64_t*)mctx)[2];
+    mc->__x[22] = ((uint64_t*)mctx)[3];
+    mc->__x[23] = ((uint64_t*)mctx)[4];
+    mc->__x[24] = ((uint64_t*)mctx)[5];
+    mc->__x[25] = ((uint64_t*)mctx)[6];
+    mc->__x[26] = ((uint64_t*)mctx)[7];
+    mc->__x[27] = ((uint64_t*)mctx)[8];
+    mc->__x[28] = ((uint64_t*)mctx)[9];
+    mc->__x[10] = ((uint64_t*)mctx)[10];
+    mc->__x[11] = ((uint64_t*)mctx)[11];
+    mc->__x[12] = ((uint64_t*)mctx)[12];
+    // 13 is reserved/unused
+    double *mcfp = (double*)&mc[1];
+    mcfp[7] = ((uint64_t*)mctx)[14]; // aka d8
+    mcfp[8] = ((uint64_t*)mctx)[15]; // aka d9
+    mcfp[9] = ((uint64_t*)mctx)[16]; // aka d10
+    mcfp[10] = ((uint64_t*)mctx)[17]; // aka d11
+    mcfp[11] = ((uint64_t*)mctx)[18]; // aka d12
+    mcfp[12] = ((uint64_t*)mctx)[19]; // aka d13
+    mcfp[13] = ((uint64_t*)mctx)[20]; // aka d14
+    mcfp[14] = ((uint64_t*)mctx)[21]; // aka d15
+    mc->__fp = _OS_PTR_UNMUNGE(mc->__x[10]);
+    mc->__lr = _OS_PTR_UNMUNGE(mc->__x[11]);
+    mc->__x[12] = _OS_PTR_UNMUNGE(mc->__x[12]);
+    mc->__sp = mc->__x[12];
+    // libunwind is broken for signed-pointers, but perhaps best not to leave the signed pointer lying around either
+    mc->__pc = ptrauth_strip(mc->__lr, 0);
+    mc->__pad = 0; // aka __ra_sign_state = not signed
+    context = &c;
+  #else
+   #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown darwin")
+    (void)mctx;
+    (void)c;
+  #endif
+ #else
+  #pragma message("jl_rec_backtrace not defined for ASM/SETJMP on unknown system")
+  (void)c;
+ #endif
+#elif defined(JL_HAVE_ASYNCIFY)
+ #pragma message("jl_rec_backtrace not defined for ASYNCIFY")
+#elif defined(JL_HAVE_SIGALTSTACK)
+ #pragma message("jl_rec_backtrace not defined for SIGALTSTACK")
+#else
+ #pragma message("jl_rec_backtrace not defined for unknown task system")
+#endif
+    if (context)
+        ptls->bt_size = rec_backtrace_ctx(ptls->bt_data, JL_MAX_BT_SIZE, context, t->gcstack);
+    if (old == -1)
+        jl_atomic_store_relaxed(&t->tid, old);
+}
+
 //--------------------------------------------------
 // Tools for interactive debugging in gdb
+
 JL_DLLEXPORT void jl_gdblookup(void* ip)
 {
     jl_print_native_codeloc((uintptr_t)ip);
@@ -691,16 +1078,35 @@ JL_DLLEXPORT void jl_gdblookup(void* ip)
 // Print backtrace for current exception in catch block
 JL_DLLEXPORT void jlbacktrace(void) JL_NOTSAFEPOINT
 {
-    jl_excstack_t *s = jl_get_ptls_states()->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls == NULL)
+        return;
+    jl_excstack_t *s = ct->excstack;
     if (!s)
         return;
-    size_t bt_size = jl_excstack_bt_size(s, s->top);
+    size_t i, bt_size = jl_excstack_bt_size(s, s->top);
     jl_bt_element_t *bt_data = jl_excstack_bt_data(s, s->top);
-    for (size_t i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
+    for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
+        jl_print_bt_entry_codeloc(bt_data + i);
+    }
+}
+JL_DLLEXPORT void jlbacktracet(jl_task_t *t)
+{
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
+    jl_rec_backtrace(t);
+    size_t i, bt_size = ptls->bt_size;
+    jl_bt_element_t *bt_data = ptls->bt_data;
+    for (i = 0; i < bt_size; i += jl_bt_entry_size(bt_data + i)) {
         jl_print_bt_entry_codeloc(bt_data + i);
     }
 }
 
+JL_DLLEXPORT void jl_print_backtrace(void) JL_NOTSAFEPOINT
+{
+    jlbacktrace();
+}
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/staticdata.c b/src/staticdata.c
index 2b70975c7d7abb..27fbb0fb336cf1 100644
--- a/src/staticdata.c
+++ b/src/staticdata.c
@@ -2,6 +2,59 @@
 
 /*
   saving and restoring system images
+
+  This performs serialization and deserialization of in-memory data. The dump.c file is similar, but has less complete coverage:
+  dump.c has no knowledge of native code (and simply discards it), whereas this supports native code caching in .o files.
+  Duplication is avoided by elevating the .o-serialized versions of global variables and native-compiled functions to become
+  the authoritative source for such entities in the system image, with references to these objects appropriately inserted into
+  the (de)serialized version of Julia's internal data. This makes deserialization simple and fast: we only need to deal with
+  pointer relocation, registering with the garbage collector, and making note of special internal types. During serialization,
+  we also need to pay special attention to things like builtin functions, C-implemented types (those in jltypes.c), the metadata
+  for documentation, optimal layouts, integration with native system image generation, and preparing other preprocessing
+  directives.
+
+  dump.c has capabilities missing from this serializer, most notably the ability to handle external references. This is not needed
+  for system images as they are self-contained. However, it would be needed to support incremental compilation of packages.
+
+  During serialization, the flow has several steps:
+
+  - step 1 inserts relevant items into `backref_table`, an `obj` => `id::Int` mapping. `id` is assigned by
+    order of insertion. This is effectively a recursive traversal, singling out items like pointers and symbols
+    that need restoration when the system image is loaded. This stage is implemented by `jl_serialize_value`
+    and its callees; while it would be simplest to use recursion, this risks stack overflow, so recursion is mimicked
+    using a work-queue managed by `jl_serialize_reachable`.
+
+    It's worth emphasizing that despite the name `jl_serialize_value`, the only goal of this stage is to
+    insert objects into `backref_table`. The entire system gets inserted, either directly or indirectly via
+    fields of other objects. Objects requiring pointer relocation or gc registration must be inserted directly.
+    In later stages, such objects get referenced by their `id`.
+
+  - step 2 (the biggest of four steps) takes all items in `backref_table` and actually serializes them ordered
+    by `id`. The system is serialized into several distinct streams (see `jl_serializer_state`), a "main stream"
+    (the `s` field) as well as parallel streams for writing specific categories of additional internal data (e.g.,
+    global data invisible to codegen, as well as deserialization "touch-up" tables, see below). These different streams
+    will be concatenated in later steps. Certain key items (e.g., builtin types & functions associated with `INSERT_TAG`
+    below, integers smaller than 512) get serialized via a hard-coded tag table.
+
+    Serialization builds "touch up" tables used during deserialization. Pointers and items requiring gc
+    registration get encoded as `(location, target)` pairs in `relocs_list` and `gctags_list`, respectively.
+    `location` is the site that needs updating (e.g., the address of a pointer referencing an object), and is
+    set to `position(s)`, the offset of the object from the beginning of the deserialized blob.
+    `target` is a bitfield-encoded index into lists of different categories of data (e.g., mutable data, constant data,
+    symbols, functions, etc.) to which the pointer at `location` refers. The different lists and their bitfield flags
+    are given by the `RefTags` enum: if `t` is the category tag (one of the `RefTags` enums) and `i` is the index into
+    one of the corresponding categorical list, then `index = t << RELOC_TAG_OFFSET + i`. The simplest source for the
+    details of this encoding can be found in the pair of functions `get_reloc_for_item` and `get_item_for_reloc`.
+
+    Most of step 2 is handled by `jl_write_values`, followed by special handling of the dedicated parallel streams.
+
+  - step 3 combines the different sections (fields of `jl_serializer_state`) into one
+
+  - step 4 writes the values of the hard-coded tagged items and `reinit_list`/`ccallable_list`
+
+The tables written to the serializer stream make deserialization fairly straightforward. Much of the "real work" is
+done by `get_item_for_reloc`.
+
 */
 #include <stdlib.h>
 #include <string.h>
@@ -11,16 +64,13 @@
 #include "julia_internal.h"
 #include "builtin_proto.h"
 #include "processor.h"
+#include "serialize.h"
 
 #ifndef _OS_WINDOWS_
 #include <dlfcn.h>
 #endif
 
-#ifndef _COMPILER_MICROSOFT_
 #include "valgrind.h"
-#else
-#define RUNNING_ON_VALGRIND 0
-#endif
 #include "julia_assert.h"
 
 #ifdef __cplusplus
@@ -30,63 +80,193 @@ extern "C" {
 // TODO: put WeakRefs on the weak_refs list during deserialization
 // TODO: handle finalizers
 
+#define NUM_TAGS    155
+
 // An array of references that need to be restored from the sysimg
 // This is a manually constructed dual of the gvars array, which would be produced by codegen for Julia code, for C.
-static void *const _tags[] = {
-         // builtin types
-         &jl_any_type, &jl_symbol_type, &jl_ssavalue_type, &jl_datatype_type, &jl_slotnumber_type,
-         &jl_simplevector_type, &jl_array_type, &jl_typedslot_type,
-         &jl_expr_type, &jl_globalref_type, &jl_string_type,
-         &jl_module_type, &jl_tvar_type, &jl_method_instance_type, &jl_method_type, &jl_code_instance_type,
-         &jl_linenumbernode_type, &jl_lineinfonode_type,
-         &jl_gotonode_type, &jl_quotenode_type, &jl_gotoifnot_type, &jl_argument_type, &jl_returnnode_type,
-         &jl_const_type, &jl_partial_struct_type, &jl_method_match_type,
-         &jl_pinode_type, &jl_phinode_type, &jl_phicnode_type, &jl_upsilonnode_type,
-         &jl_type_type, &jl_bottom_type, &jl_ref_type, &jl_pointer_type, &jl_llvmpointer_type,
-         &jl_vararg_type, &jl_abstractarray_type,
-         &jl_densearray_type, &jl_nothing_type, &jl_function_type, &jl_typeofbottom_type,
-         &jl_unionall_type, &jl_typename_type, &jl_builtin_type, &jl_code_info_type,
-         &jl_task_type, &jl_uniontype_type, &jl_abstractstring_type,
-         &jl_array_any_type, &jl_intrinsic_type, &jl_abstractslot_type,
-         &jl_methtable_type, &jl_typemap_level_type, &jl_typemap_entry_type,
-         &jl_voidpointer_type, &jl_uint8pointer_type, &jl_newvarnode_type,
-         &jl_anytuple_type_type, &jl_anytuple_type, &jl_namedtuple_type, &jl_emptytuple_type,
-         &jl_array_symbol_type, &jl_array_uint8_type, &jl_array_int32_type,
-         &jl_int32_type, &jl_int64_type, &jl_bool_type, &jl_uint8_type,
-         &jl_uint32_type, &jl_uint64_type, &jl_char_type, &jl_weakref_type,
-         &jl_int8_type, &jl_int16_type, &jl_uint16_type,
-         &jl_float16_type, &jl_float32_type, &jl_float64_type, &jl_floatingpoint_type,
-         &jl_number_type, &jl_signed_type,
-         // special typenames
-         &jl_tuple_typename, &jl_pointer_typename, &jl_llvmpointer_typename, &jl_array_typename, &jl_type_typename,
-         &jl_vararg_typename, &jl_namedtuple_typename,
-         &jl_vecelement_typename,
-         // special exceptions
-         &jl_errorexception_type, &jl_argumenterror_type, &jl_typeerror_type,
-         &jl_methoderror_type, &jl_loaderror_type, &jl_initerror_type,
-         &jl_undefvarerror_type, &jl_stackovf_exception, &jl_diverror_exception,
-         &jl_interrupt_exception, &jl_boundserror_type, &jl_memory_exception,
-         &jl_undefref_exception, &jl_readonlymemory_exception,
+jl_value_t **const*const get_tags(void) {
+    // Make sure to keep an extra slot at the end to sentinel length
+    static void * _tags[NUM_TAGS] = {NULL};
+
+    // Lazyily-initialize this list
+    if (_tags[0] == NULL) {
+        unsigned int i = 0;
+#define INSERT_TAG(sym) _tags[i++] = &(sym)
+        // builtin types
+        INSERT_TAG(jl_any_type);
+        INSERT_TAG(jl_symbol_type);
+        INSERT_TAG(jl_ssavalue_type);
+        INSERT_TAG(jl_datatype_type);
+        INSERT_TAG(jl_slotnumber_type);
+        INSERT_TAG(jl_simplevector_type);
+        INSERT_TAG(jl_array_type);
+        INSERT_TAG(jl_typedslot_type);
+        INSERT_TAG(jl_expr_type);
+        INSERT_TAG(jl_globalref_type);
+        INSERT_TAG(jl_string_type);
+        INSERT_TAG(jl_module_type);
+        INSERT_TAG(jl_tvar_type);
+        INSERT_TAG(jl_method_instance_type);
+        INSERT_TAG(jl_method_type);
+        INSERT_TAG(jl_code_instance_type);
+        INSERT_TAG(jl_linenumbernode_type);
+        INSERT_TAG(jl_lineinfonode_type);
+        INSERT_TAG(jl_gotonode_type);
+        INSERT_TAG(jl_quotenode_type);
+        INSERT_TAG(jl_gotoifnot_type);
+        INSERT_TAG(jl_argument_type);
+        INSERT_TAG(jl_returnnode_type);
+        INSERT_TAG(jl_const_type);
+        INSERT_TAG(jl_partial_struct_type);
+        INSERT_TAG(jl_partial_opaque_type);
+        INSERT_TAG(jl_interconditional_type);
+        INSERT_TAG(jl_method_match_type);
+        INSERT_TAG(jl_pinode_type);
+        INSERT_TAG(jl_phinode_type);
+        INSERT_TAG(jl_phicnode_type);
+        INSERT_TAG(jl_upsilonnode_type);
+        INSERT_TAG(jl_type_type);
+        INSERT_TAG(jl_bottom_type);
+        INSERT_TAG(jl_ref_type);
+        INSERT_TAG(jl_pointer_type);
+        INSERT_TAG(jl_llvmpointer_type);
+        INSERT_TAG(jl_vararg_type);
+        INSERT_TAG(jl_abstractarray_type);
+        INSERT_TAG(jl_densearray_type);
+        INSERT_TAG(jl_nothing_type);
+        INSERT_TAG(jl_function_type);
+        INSERT_TAG(jl_typeofbottom_type);
+        INSERT_TAG(jl_unionall_type);
+        INSERT_TAG(jl_typename_type);
+        INSERT_TAG(jl_builtin_type);
+        INSERT_TAG(jl_code_info_type);
+        INSERT_TAG(jl_opaque_closure_type);
+        INSERT_TAG(jl_task_type);
+        INSERT_TAG(jl_uniontype_type);
+        INSERT_TAG(jl_abstractstring_type);
+        INSERT_TAG(jl_array_any_type);
+        INSERT_TAG(jl_intrinsic_type);
+        INSERT_TAG(jl_abstractslot_type);
+        INSERT_TAG(jl_methtable_type);
+        INSERT_TAG(jl_typemap_level_type);
+        INSERT_TAG(jl_typemap_entry_type);
+        INSERT_TAG(jl_voidpointer_type);
+        INSERT_TAG(jl_uint8pointer_type);
+        INSERT_TAG(jl_newvarnode_type);
+        INSERT_TAG(jl_anytuple_type_type);
+        INSERT_TAG(jl_anytuple_type);
+        INSERT_TAG(jl_namedtuple_type);
+        INSERT_TAG(jl_emptytuple_type);
+        INSERT_TAG(jl_array_symbol_type);
+        INSERT_TAG(jl_array_uint8_type);
+        INSERT_TAG(jl_array_int32_type);
+        INSERT_TAG(jl_array_uint64_type);
+        INSERT_TAG(jl_int32_type);
+        INSERT_TAG(jl_int64_type);
+        INSERT_TAG(jl_bool_type);
+        INSERT_TAG(jl_uint8_type);
+        INSERT_TAG(jl_uint16_type);
+        INSERT_TAG(jl_uint32_type);
+        INSERT_TAG(jl_uint64_type);
+        INSERT_TAG(jl_char_type);
+        INSERT_TAG(jl_weakref_type);
+        INSERT_TAG(jl_int8_type);
+        INSERT_TAG(jl_int16_type);
+        INSERT_TAG(jl_float16_type);
+        INSERT_TAG(jl_float32_type);
+        INSERT_TAG(jl_float64_type);
+        INSERT_TAG(jl_floatingpoint_type);
+        INSERT_TAG(jl_number_type);
+        INSERT_TAG(jl_signed_type);
+        INSERT_TAG(jl_pair_type);
+
+        // special typenames
+        INSERT_TAG(jl_tuple_typename);
+        INSERT_TAG(jl_pointer_typename);
+        INSERT_TAG(jl_llvmpointer_typename);
+        INSERT_TAG(jl_array_typename);
+        INSERT_TAG(jl_type_typename);
+        INSERT_TAG(jl_namedtuple_typename);
+        INSERT_TAG(jl_vecelement_typename);
+        INSERT_TAG(jl_opaque_closure_typename);
+
+        // special exceptions
+        INSERT_TAG(jl_errorexception_type);
+        INSERT_TAG(jl_argumenterror_type);
+        INSERT_TAG(jl_typeerror_type);
+        INSERT_TAG(jl_methoderror_type);
+        INSERT_TAG(jl_loaderror_type);
+        INSERT_TAG(jl_initerror_type);
+        INSERT_TAG(jl_undefvarerror_type);
+        INSERT_TAG(jl_stackovf_exception);
+        INSERT_TAG(jl_diverror_exception);
+        INSERT_TAG(jl_interrupt_exception);
+        INSERT_TAG(jl_boundserror_type);
+        INSERT_TAG(jl_memory_exception);
+        INSERT_TAG(jl_undefref_exception);
+        INSERT_TAG(jl_readonlymemory_exception);
+        INSERT_TAG(jl_atomicerror_type);
+
+        // other special values
+        INSERT_TAG(jl_emptysvec);
+        INSERT_TAG(jl_emptytuple);
+        INSERT_TAG(jl_false);
+        INSERT_TAG(jl_true);
+        INSERT_TAG(jl_an_empty_string);
+        INSERT_TAG(jl_an_empty_vec_any);
+        INSERT_TAG(jl_module_init_order);
+        INSERT_TAG(jl_core_module);
+        INSERT_TAG(jl_base_module);
+        INSERT_TAG(jl_main_module);
+        INSERT_TAG(jl_top_module);
+        INSERT_TAG(jl_typeinf_func);
+        INSERT_TAG(jl_type_type_mt);
+        INSERT_TAG(jl_nonfunction_mt);
+
+        // some Core.Builtin Functions that we want to be able to reference:
+        INSERT_TAG(jl_builtin_throw);
+        INSERT_TAG(jl_builtin_is);
+        INSERT_TAG(jl_builtin_typeof);
+        INSERT_TAG(jl_builtin_sizeof);
+        INSERT_TAG(jl_builtin_issubtype);
+        INSERT_TAG(jl_builtin_isa);
+        INSERT_TAG(jl_builtin_typeassert);
+        INSERT_TAG(jl_builtin__apply_iterate);
+        INSERT_TAG(jl_builtin_isdefined);
+        INSERT_TAG(jl_builtin_nfields);
+        INSERT_TAG(jl_builtin_tuple);
+        INSERT_TAG(jl_builtin_svec);
+        INSERT_TAG(jl_builtin_getfield);
+        INSERT_TAG(jl_builtin_setfield);
+        INSERT_TAG(jl_builtin_swapfield);
+        INSERT_TAG(jl_builtin_modifyfield);
+        INSERT_TAG(jl_builtin_replacefield);
+        INSERT_TAG(jl_builtin_fieldtype);
+        INSERT_TAG(jl_builtin_arrayref);
+        INSERT_TAG(jl_builtin_const_arrayref);
+        INSERT_TAG(jl_builtin_arrayset);
+        INSERT_TAG(jl_builtin_arraysize);
+        INSERT_TAG(jl_builtin_apply_type);
+        INSERT_TAG(jl_builtin_applicable);
+        INSERT_TAG(jl_builtin_invoke);
+        INSERT_TAG(jl_builtin__expr);
+        INSERT_TAG(jl_builtin_ifelse);
+        INSERT_TAG(jl_builtin__typebody);
+        INSERT_TAG(jl_builtin_donotdelete);
+        INSERT_TAG(jl_builtin_getglobal);
+        INSERT_TAG(jl_builtin_setglobal);
+        // n.b. must update NUM_TAGS when you add something here
+
+        // All optional tags must be placed at the end, so that we
+        // don't accidentally have a `NULL` in the middle
 #ifdef SEGV_EXCEPTION
-         &jl_segv_exception,
+        INSERT_TAG(jl_segv_exception);
 #endif
-         // other special values
-         &jl_emptysvec, &jl_emptytuple, &jl_false, &jl_true, &jl_nothing,
-         &jl_an_empty_string, &jl_an_empty_vec_any,
-
-         &jl_module_init_order, &jl_core_module, &jl_base_module, &jl_main_module, &jl_top_module,
-         &jl_typeinf_func, &jl_type_type_mt, &jl_nonfunction_mt,
-         // some Core.Builtin Functions that we want to be able to reference:
-         &jl_builtin_throw, &jl_builtin_is, &jl_builtin_typeof, &jl_builtin_sizeof,
-         &jl_builtin_issubtype, &jl_builtin_isa, &jl_builtin_typeassert, &jl_builtin__apply,
-         &jl_builtin__apply_iterate,
-         &jl_builtin_isdefined, &jl_builtin_nfields, &jl_builtin_tuple, &jl_builtin_svec,
-         &jl_builtin_getfield, &jl_builtin_setfield, &jl_builtin_fieldtype, &jl_builtin_arrayref,
-         &jl_builtin_const_arrayref, &jl_builtin_arrayset, &jl_builtin_arraysize,
-         &jl_builtin_apply_type, &jl_builtin_applicable, &jl_builtin_invoke,
-         &jl_builtin__expr, &jl_builtin_ifelse, &jl_builtin__typebody,
-         NULL };
-static jl_value_t **const*const tags = (jl_value_t**const*const)_tags;
+#undef INSERT_TAG
+        assert(i >= (NUM_TAGS-2) && i < NUM_TAGS);
+    }
+    return (jl_value_t**const*const) _tags;
+}
 
 // hash of definitions for predefined tagged object
 static htable_t symbol_table;
@@ -98,12 +278,15 @@ static arraylist_t deser_sym;
 // table of all objects that are serialized
 static htable_t backref_table;
 static int backref_table_numel;
-static arraylist_t layout_table;
-static arraylist_t builtin_typenames;
-
-// list of (size_t pos, (void *f)(jl_value_t*)) entries
-// for the serializer to mark values in need of rework by function f
-// during deserialization later
+static arraylist_t layout_table;     // cache of `position(s)` for each `id` in `backref_table`
+static arraylist_t object_worklist;  // used to mimic recursion by jl_serialize_reachable
+
+// Both `reinit_list` and `ccallable_list` are lists of (size_t pos, code) entries
+// for the serializer to mark values in need of rework during deserialization
+// codes:
+//   1: typename   (reinit_list)
+//   2: module     (reinit_list)
+//   3: method     (ccallable_list)
 static arraylist_t reinit_list;
 
 // @ccallable entry points to install
@@ -111,32 +294,38 @@ static arraylist_t ccallable_list;
 
 // hash of definitions for predefined function pointers
 static htable_t fptr_to_id;
-void *native_functions;
+void *native_functions;   // opaque jl_native_code_desc_t blob used for fetching data from LLVM
+
+// table of struct field addresses to rewrite during saving
+static htable_t field_replace;
 
 // array of definitions for the predefined function pointers
 // (reverse of fptr_to_id)
 // This is a manually constructed dual of the fvars array, which would be produced by codegen for Julia code, for C.
 static const jl_fptr_args_t id_to_fptrs[] = {
     &jl_f_throw, &jl_f_is, &jl_f_typeof, &jl_f_issubtype, &jl_f_isa,
-    &jl_f_typeassert, &jl_f__apply, &jl_f__apply_iterate, &jl_f__apply_pure,
-    &jl_f__apply_latest, &jl_f__apply_in_world, &jl_f_isdefined,
+    &jl_f_typeassert, &jl_f__apply_iterate, &jl_f__apply_pure,
+    &jl_f__call_latest, &jl_f__call_in_world, &jl_f__call_in_world_total, &jl_f_isdefined,
     &jl_f_tuple, &jl_f_svec, &jl_f_intrinsic_call, &jl_f_invoke_kwsorter,
-    &jl_f_getfield, &jl_f_setfield, &jl_f_fieldtype, &jl_f_nfields,
+    &jl_f_getfield, &jl_f_setfield, &jl_f_swapfield, &jl_f_modifyfield,
+    &jl_f_replacefield, &jl_f_fieldtype, &jl_f_nfields,
     &jl_f_arrayref, &jl_f_const_arrayref, &jl_f_arrayset, &jl_f_arraysize, &jl_f_apply_type,
     &jl_f_applicable, &jl_f_invoke, &jl_f_sizeof, &jl_f__expr, &jl_f__typevar,
     &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype,
-    &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef,
+    &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type,
+    &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete,
+    &jl_f_getglobal, &jl_f_setglobal,
     NULL };
 
 typedef struct {
-    ios_t *s;
-    ios_t *const_data;
-    ios_t *symbols;
-    ios_t *relocs;
-    ios_t *gvar_record;
-    ios_t *fptr_record;
-    arraylist_t relocs_list;
-    arraylist_t gctags_list;
+    ios_t *s;                   // the main stream
+    ios_t *const_data;          // codegen-invisible internal data (e.g., datatype layouts, list-like typename fields, foreign types, internal arrays)
+    ios_t *symbols;             // names (char*) of symbols (some may be referenced by pointer in generated code)
+    ios_t *relocs;              // for (de)serializing relocs_list and gctags_list
+    ios_t *gvar_record;         // serialized array mapping gvid => spos
+    ios_t *fptr_record;         // serialized array mapping fptrid => spos
+    arraylist_t relocs_list;    // a list of (location, target) pairs, see description at top
+    arraylist_t gctags_list;    //      "
     jl_ptls_t ptls;
 } jl_serializer_state;
 
@@ -145,14 +334,18 @@ static jl_typename_t *jl_idtable_typename = NULL;
 static jl_value_t *jl_bigint_type = NULL;
 static int gmp_limb_size = 0;
 
+static jl_sym_t *jl_docmeta_sym = NULL;
+
+// Tags of category `t` are located at offsets `t << RELOC_TAG_OFFSET`
+// Consequently there is room for 2^RELOC_TAG_OFFSET pointers, etc
 enum RefTags {
-    DataRef,
-    ConstDataRef,
-    TagRef,
-    SymbolRef,
-    BindingRef,
-    FunctionRef,
-    BuiltinFunctionRef
+    DataRef,           // mutable data
+    ConstDataRef,      // constant data (e.g., layouts)
+    TagRef,            // items serialized via their tags
+    SymbolRef,         // symbols
+    BindingRef,        // module bindings
+    FunctionRef,       // generic functions
+    BuiltinFunctionRef // builtin functions
 };
 
 // calling conventions for internal entry points.
@@ -168,28 +361,9 @@ typedef enum {
 } jl_callingconv_t;
 
 
-// this supports up to 1 GB images and 16 RefTags
+// this supports up to 8 RefTags, 512MB of pointer data, and 4/2 (64/32-bit) GB of constant data.
 // if a larger size is required, will need to add support for writing larger relocations in many cases below
-#define RELOC_TAG_OFFSET 28
-
-
-/* read and write in host byte order */
-
-#define write_uint8(s, n) ios_putc((n), (s))
-#define read_uint8(s) ((uint8_t)ios_getc((s)))
-
-static void write_uint32(ios_t *s, uint32_t i) JL_NOTSAFEPOINT
-{
-    ios_write(s, (char*)&i, 4);
-}
-
-static uint32_t read_uint32(ios_t *s) JL_NOTSAFEPOINT
-{
-    uint32_t x = 0;
-    ios_read(s, (char*)&x, 4);
-    return x;
-}
-
+#define RELOC_TAG_OFFSET 29
 
 // --- Static Compile ---
 
@@ -218,9 +392,13 @@ static void jl_load_sysimg_so(void)
         jl_dlsym(jl_sysimg_handle, "jl_sysimg_gvars_offsets", (void **)&sysimg_gvars_offsets, 1);
         sysimg_gvars_offsets += 1;
         assert(sysimg_fptrs.base);
-        uintptr_t *tls_getter_slot;
-        jl_dlsym(jl_sysimg_handle, "jl_get_ptls_states_slot", (void **)&tls_getter_slot, 1);
-        *tls_getter_slot = (uintptr_t)jl_get_ptls_states_getter();
+
+        void *pgcstack_func_slot;
+        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_func_slot", &pgcstack_func_slot, 1);
+        void *pgcstack_key_slot;
+        jl_dlsym(jl_sysimg_handle, "jl_pgcstack_key_slot", &pgcstack_key_slot, 1);
+        jl_pgcstack_getkey((jl_get_pgcstack_func**)pgcstack_func_slot, (jl_pgcstack_key_t*)pgcstack_key_slot);
+
         size_t *tls_offset_idx;
         jl_dlsym(jl_sysimg_handle, "jl_tls_offset", (void **)&tls_offset_idx, 1);
         *tls_offset_idx = (uintptr_t)(jl_tls_offset == -1 ? 0 : jl_tls_offset);
@@ -259,8 +437,8 @@ static uintptr_t jl_fptr_id(void *fptr)
         return *(uintptr_t*)pbp;
 }
 
-#define jl_serialize_value(s, v) jl_serialize_value_(s,(jl_value_t*)(v))
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v);
+#define jl_serialize_value(s, v) jl_serialize_value_(s,(jl_value_t*)(v),1)
+static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive);
 
 
 static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
@@ -269,13 +447,18 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
     jl_serialize_value(s, m->parent);
     size_t i;
     void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i += 2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
+    for (i = 0; i < m->bindings.size; i += 2) {
+        if (table[i+1] != HT_NOTFOUND) {
+            jl_serialize_value(s, (jl_value_t*)table[i]);
+            jl_binding_t *b = (jl_binding_t*)table[i+1];
             jl_serialize_value(s, b->name);
-            jl_serialize_value(s, b->value);
-            jl_serialize_value(s, b->globalref);
+            if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
+                jl_serialize_value(s, jl_nothing);
+            else
+                jl_serialize_value(s, jl_atomic_load_relaxed(&b->value));
+            jl_serialize_value(s, jl_atomic_load_relaxed(&b->globalref));
             jl_serialize_value(s, b->owner);
+            jl_serialize_value(s, jl_atomic_load_relaxed(&b->ty));
         }
     }
 
@@ -284,13 +467,20 @@ static void jl_serialize_module(jl_serializer_state *s, jl_module_t *m)
     }
 }
 
+static jl_value_t *get_replaceable_field(jl_value_t **addr)
+{
+    jl_value_t *fld = (jl_value_t*)ptrhash_get(&field_replace, addr);
+    if (fld == HT_NOTFOUND)
+        return *addr;
+    return fld;
+}
 
 #define NBOX_C 1024
 
-static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v)
+static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v, int recursive)
 {
     // ignore items that are given a special representation
-    if (v == NULL || jl_is_symbol(v)) {
+    if (v == NULL || jl_is_symbol(v) || v == jl_nothing) {
         return;
     }
     else if (jl_typeis(v, jl_task_type)) {
@@ -312,7 +502,11 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v)
     else if (jl_typeis(v, jl_uint8_type)) {
         return;
     }
+    arraylist_push(&object_worklist, (void*)((uintptr_t)v | recursive));
+}
 
+static void jl_serialize_value__(jl_serializer_state *s, jl_value_t *v, int recursive)
+{
     void **bp = ptrhash_bp(&backref_table, v);
     if (*bp != HT_NOTFOUND) {
         return;
@@ -331,6 +525,8 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v)
         // skip it
     }
     else if (jl_is_svec(v)) {
+        if (!recursive)
+            return;
         size_t i, l = jl_svec_len(v);
         jl_value_t **data = jl_svec_data(v);
         for (i = 0; i < l; i++) {
@@ -366,17 +562,52 @@ static void jl_serialize_value_(jl_serializer_state *s, jl_value_t *v)
     else if (jl_typeis(v, jl_module_type)) {
         jl_serialize_module(s, (jl_module_t*)v);
     }
+    else if (jl_is_typename(v)) {
+        jl_typename_t *tn = (jl_typename_t*)v;
+        jl_serialize_value(s, tn->name);
+        jl_serialize_value(s, tn->module);
+        jl_serialize_value(s, tn->names);
+        jl_serialize_value(s, tn->wrapper);
+        jl_serialize_value(s, tn->Typeofwrapper);
+        jl_serialize_value_(s, (jl_value_t*)tn->cache, 0);
+        jl_serialize_value_(s, (jl_value_t*)tn->linearcache, 0);
+        jl_serialize_value(s, tn->mt);
+        jl_serialize_value(s, tn->partial);
+    }
     else if (t->layout->nfields > 0) {
         char *data = (char*)jl_data_ptr(v);
         size_t i, np = t->layout->npointers;
         for (i = 0; i < np; i++) {
             uint32_t ptr = jl_ptr_offset(t, i);
-            jl_value_t *fld = ((jl_value_t* const*)data)[ptr];
+            jl_value_t *fld = get_replaceable_field(&((jl_value_t**)data)[ptr]);
             jl_serialize_value(s, fld);
         }
     }
 }
 
+// Do a pre-order traversal of the to-serialize worklist, in the identical order
+// to the calls to jl_serialize_value would occur in a purely recursive
+// implementation, but without potentially running out of stack.
+static void jl_serialize_reachable(jl_serializer_state *s)
+{
+    size_t i, prevlen = 0;
+    while (object_worklist.len) {
+        // reverse!(object_worklist.items, prevlen:end);
+        // prevlen is the index of the first new object
+        for (i = prevlen; i < object_worklist.len; i++) {
+            size_t j = object_worklist.len - i + prevlen - 1;
+            void *tmp = object_worklist.items[i];
+            object_worklist.items[i] = object_worklist.items[j];
+            object_worklist.items[j] = tmp;
+        }
+        prevlen = --object_worklist.len;
+        uintptr_t v = (uintptr_t)object_worklist.items[prevlen];
+        int recursive = v & 1;
+        v &= ~(uintptr_t)1; // untag v
+        jl_serialize_value__(s, (jl_value_t*)v, recursive);
+    }
+}
+
 static void ios_ensureroom(ios_t *s, size_t newsize) JL_NOTSAFEPOINT
 {
     size_t prevsize = s->size;
@@ -387,6 +618,9 @@ static void ios_ensureroom(ios_t *s, size_t newsize) JL_NOTSAFEPOINT
     }
 }
 
+// Maybe encode a global variable. `gid` is the LLVM index, 0 if the object is not serialized
+// in the generated code (and thus not a gvar from that standpoint, maybe only stored in the internal-data sysimg).
+// `reloc_id` is the RefTags-encoded `target`.
 static void record_gvar(jl_serializer_state *s, int gid, uintptr_t reloc_id) JL_NOTSAFEPOINT
 {
     if (gid == 0)
@@ -416,7 +650,9 @@ static void write_pointer(ios_t *s) JL_NOTSAFEPOINT
     write_padding(s, sizeof(void*));
 }
 
-
+// Return the integer `id` for `v`. Generically this is looked up in `backref_table`,
+// but symbols, small integers, and a couple of special items (`nothing` and the root Task)
+// have special handling.
 #define backref_id(s, v) _backref_id(s, (jl_value_t*)(v))
 static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPOINT
 {
@@ -438,19 +674,22 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO
     else if (v == (jl_value_t*)s->ptls->root_task) {
         return (uintptr_t)TagRef << RELOC_TAG_OFFSET;
     }
+    else if (v == jl_nothing) {
+        return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + 1;
+    }
     else if (jl_typeis(v, jl_int64_type)) {
         int64_t i64 = *(int64_t*)v + NBOX_C / 2;
         if ((uint64_t)i64 < NBOX_C)
-            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 1;
+            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i64 + 2;
     }
     else if (jl_typeis(v, jl_int32_type)) {
         int32_t i32 = *(int32_t*)v + NBOX_C / 2;
         if ((uint32_t)i32 < NBOX_C)
-            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 1 + NBOX_C;
+            return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + i32 + 2 + NBOX_C;
     }
     else if (jl_typeis(v, jl_uint8_type)) {
         uint8_t u8 = *(uint8_t*)v;
-        return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 1 + NBOX_C + NBOX_C;
+        return ((uintptr_t)TagRef << RELOC_TAG_OFFSET) + u8 + 2 + NBOX_C + NBOX_C;
     }
     if (idx == HT_NOTFOUND) {
         idx = ptrhash_get(&backref_table, v);
@@ -460,6 +699,8 @@ static uintptr_t _backref_id(jl_serializer_state *s, jl_value_t *v) JL_NOTSAFEPO
 }
 
 
+// Save blank space in stream `s` for a pointer `fld`, storing both location and target
+// in `relocs_list`.
 static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSAFEPOINT
 {
     if (fld != NULL) {
@@ -469,6 +710,8 @@ static void write_pointerfield(jl_serializer_state *s, jl_value_t *fld) JL_NOTSA
     write_pointer(s->s);
 }
 
+// Save blank space in stream `s` for a pointer `fld`, storing both location and target
+// in `gctags_list`.
 static void write_gctaggedfield(jl_serializer_state *s, uintptr_t ref) JL_NOTSAFEPOINT
 {
     arraylist_push(&s->gctags_list, (void*)(uintptr_t)ios_pos(s->s));
@@ -476,13 +719,14 @@ static void write_gctaggedfield(jl_serializer_state *s, uintptr_t ref) JL_NOTSAF
     write_pointer(s->s);
 }
 
-
+// Special handling from `jl_write_values` for modules
 static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t *m)
 {
     size_t reloc_offset = ios_pos(s->s);
     size_t tot = sizeof(jl_module_t);
-    ios_write(s->s, (char*)m, tot);
+    ios_write(s->s, (char*)m, tot);     // raw memory dump of the `jl_module_t` structure
 
+    // Handle the fields requiring special attention
     jl_module_t *newm = (jl_module_t*)&s->s->buf[reloc_offset];
     newm->name = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, name)));
@@ -490,7 +734,7 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     newm->parent = NULL;
     arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_module_t, parent)));
     arraylist_push(&s->relocs_list, (void*)backref_id(s, m->parent));
-    newm->primary_world = jl_world_counter;
+    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
 
     // write out the bindings table as a list
     // immediately after jl_module_t
@@ -498,19 +742,25 @@ static void jl_write_module(jl_serializer_state *s, uintptr_t item, jl_module_t
     size_t count = 0;
     size_t i;
     void **table = m->bindings.table;
-    for (i = 1; i < m->bindings.size; i += 2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
+    for (i = 0; i < m->bindings.size; i += 2) {
+        if (table[i+1] != HT_NOTFOUND) {
+            jl_binding_t *b = (jl_binding_t*)table[i+1];
+            write_pointerfield(s, (jl_value_t*)table[i]);
+            tot += sizeof(void*);
             write_gctaggedfield(s, (uintptr_t)BindingRef << RELOC_TAG_OFFSET);
             tot += sizeof(void*);
             size_t binding_reloc_offset = ios_pos(s->s);
             record_gvar(s, jl_get_llvm_gv(native_functions, (jl_value_t*)b),
                     ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + binding_reloc_offset);
             write_pointerfield(s, (jl_value_t*)b->name);
-            write_pointerfield(s, b->value);
-            write_pointerfield(s, b->globalref);
+            if (jl_docmeta_sym && b->name == jl_docmeta_sym && jl_options.strip_metadata)
+                write_pointerfield(s, jl_nothing);
+            else
+                write_pointerfield(s, jl_atomic_load_relaxed(&b->value));
+            write_pointerfield(s, jl_atomic_load_relaxed(&b->globalref));
             write_pointerfield(s, (jl_value_t*)b->owner);
-            size_t flag_offset = offsetof(jl_binding_t, owner) + sizeof(b->owner);
+            write_pointerfield(s, jl_atomic_load_relaxed(&b->ty));
+            size_t flag_offset = offsetof(jl_binding_t, ty) + sizeof(b->ty);
             ios_write(s->s, (char*)b + flag_offset, sizeof(*b) - flag_offset);
             tot += sizeof(jl_binding_t);
             count += 1;
@@ -567,6 +817,7 @@ static size_t jl_sort_size(jl_datatype_t *dt)
 }
 #endif
 
+// Used by `qsort` to order `backref_table` by `id`
 static int sysimg_sort_order(const void *pa, const void *pb)
 {
     uintptr_t sa = ((uintptr_t*)pa)[1];
@@ -588,6 +839,7 @@ static int sysimg_sort_order(const void *pa, const void *pb)
 }
 
 jl_value_t *jl_find_ptr = NULL;
+// The main function for serializing all the items queued in `backref_table`
 static void jl_write_values(jl_serializer_state *s)
 {
     arraylist_t objects_list;
@@ -597,6 +849,7 @@ static void jl_write_values(jl_serializer_state *s)
     arraylist_grow(&layout_table, backref_table_numel);
     memset(layout_table.items, 0, backref_table_numel * sizeof(void*));
 
+    // Order `backref_table` by `id`
     size_t i, len = backref_table.size;
     void **p = backref_table.table;
     for (i = 0; i < len; i += 2) {
@@ -611,10 +864,11 @@ static void jl_write_values(jl_serializer_state *s)
     assert(backref_table_numel * 2 == objects_list.len);
     qsort(objects_list.items, backref_table_numel, sizeof(void*) * 2, sysimg_sort_order);
 
+    // Serialize all entries
     for (i = 0, len = backref_table_numel * 2; i < len; i += 2) {
-        jl_value_t *v = (jl_value_t*)objects_list.items[i];
+        jl_value_t *v = (jl_value_t*)objects_list.items[i];           // the object
         JL_GC_PROMISE_ROOTED(v);
-        uintptr_t item = (uintptr_t)objects_list.items[i + 1];
+        uintptr_t item = (uintptr_t)objects_list.items[i + 1];        // the id
         jl_datatype_t *t = (jl_datatype_t*)jl_typeof(v);
         assert((t->instance == NULL || t->instance == v) && "detected singleton construction corruption");
         // realign stream to expected gc alignment (16 bytes)
@@ -624,7 +878,7 @@ static void jl_write_values(jl_serializer_state *s)
         write_gctaggedfield(s, backref_id(s, t));
         size_t reloc_offset = ios_pos(s->s);
         assert(item < layout_table.len && layout_table.items[item] == NULL);
-        layout_table.items[item] = (void*)reloc_offset;
+        layout_table.items[item] = (void*)reloc_offset;               // store the inverse mapping of `backref_table` (`id` => object)
         record_gvar(s, jl_get_llvm_gv(native_functions, v), ((uintptr_t)DataRef << RELOC_TAG_OFFSET) + reloc_offset);
 
         // write data
@@ -632,17 +886,27 @@ static void jl_write_values(jl_serializer_state *s)
             write_pointer(s->s);
         }
         else if (jl_is_array(v)) {
+            // Internal data for types in julia.h with `jl_array_t` field(s)
 #define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
             jl_array_t *ar = (jl_array_t*)v;
             jl_value_t *et = jl_tparam0(jl_typeof(v));
+            size_t alen = jl_array_len(ar);
+            size_t datasize = alen * ar->elsize;
+            size_t tot = datasize;
+            int isbitsunion = jl_array_isbitsunion(ar);
+            if (isbitsunion)
+                tot += alen;
+            else if (ar->elsize == 1)
+                tot += 1;
             int ndimwords = jl_array_ndimwords(ar->flags.ndims);
-            size_t tsz = JL_ARRAY_ALIGN(sizeof(jl_array_t) + ndimwords * sizeof(size_t), JL_CACHE_BYTE_ALIGNMENT);
+            size_t headersize = sizeof(jl_array_t) + ndimwords*sizeof(size_t);
             // copy header
-            ios_write(s->s, (char*)v, tsz);
+            ios_write(s->s, (char*)v, headersize);
+            size_t alignment_amt = JL_SMALL_BYTE_ALIGNMENT;
+            if (tot >= ARRAY_CACHE_ALIGN_THRESHOLD)
+                alignment_amt = JL_CACHE_BYTE_ALIGNMENT;
             // make some header modifications in-place
             jl_array_t *newa = (jl_array_t*)&s->s->buf[reloc_offset];
-            size_t alen = jl_array_len(ar);
-            size_t tot = alen * ar->elsize;
             if (newa->flags.ndims == 1)
                 newa->maxsize = alen;
             newa->offset = 0;
@@ -652,8 +916,8 @@ static void jl_write_values(jl_serializer_state *s)
 
             // write data
             if (!ar->flags.ptrarray && !ar->flags.hasptr) {
-                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), 16);
-                // realign stream to max(data-align(array), sizeof(void*))
+                // Non-pointer eltypes get encoded in the const_data section
+                uintptr_t data = LLT_ALIGN(ios_pos(s->const_data), alignment_amt);
                 write_padding(s->const_data, data - ios_pos(s->const_data));
                 // write data and relocations
                 newa->data = NULL; // relocation offset
@@ -668,22 +932,28 @@ static void jl_write_values(jl_serializer_state *s)
                         write_pointer(s->const_data);
                 }
                 else {
-                    int isbitsunion = jl_array_isbitsunion(ar);
-                    if (ar->elsize == 1 && !isbitsunion)
-                        tot += 1;
-                    ios_write(s->const_data, (char*)jl_array_data(ar), tot);
-                    if (isbitsunion)
+                    if (isbitsunion) {
+                        ios_write(s->const_data, (char*)jl_array_data(ar), datasize);
                         ios_write(s->const_data, jl_array_typetagdata(ar), alen);
+                    }
+                    else {
+                        ios_write(s->const_data, (char*)jl_array_data(ar), tot);
+                    }
                 }
             }
             else {
-                newa->data = (void*)tsz; // relocation offset
+                // Pointer eltypes are encoded in the mutable data section
+                size_t data = LLT_ALIGN(ios_pos(s->s), alignment_amt);
+                size_t padding_amt = data - ios_pos(s->s);
+                write_padding(s->s, padding_amt);
+                headersize += padding_amt;
+                newa->data = (void*)headersize; // relocation offset
                 arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_array_t, data))); // relocation location
                 arraylist_push(&s->relocs_list, (void*)(((uintptr_t)DataRef << RELOC_TAG_OFFSET) + item)); // relocation target
                 if (ar->flags.hasptr) {
                     // copy all of the data first
                     const char *data = (const char*)jl_array_data(ar);
-                    ios_write(s->s, data, tot);
+                    ios_write(s->s, data, datasize);
                     // the rewrite all of the embedded pointers to null+relocation
                     uint16_t elsz = ar->elsize;
                     size_t j, np = ((jl_datatype_t*)et)->layout->npointers;
@@ -693,12 +963,12 @@ static void jl_write_values(jl_serializer_state *s)
                             size_t offset = i * elsz + jl_ptr_offset(((jl_datatype_t*)et), j) * sizeof(jl_value_t*);
                             jl_value_t *fld = *(jl_value_t**)&data[offset];
                             if (fld != NULL) {
-                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + tsz + offset)); // relocation location
+                                arraylist_push(&s->relocs_list, (void*)(uintptr_t)(reloc_offset + headersize + offset)); // relocation location
                                 arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
-                                memset(&s->s->buf[reloc_offset + tsz + offset], 0, sizeof(fld)); // relocation offset (none)
+                                memset(&s->s->buf[reloc_offset + headersize + offset], 0, sizeof(fld)); // relocation offset (none)
                             }
                             else {
-                                assert(*(jl_value_t**)&s->s->buf[reloc_offset + tsz + offset] == NULL);
+                                assert(*(jl_value_t**)&s->s->buf[reloc_offset + headersize + offset] == NULL);
                             }
                         }
                     }
@@ -739,6 +1009,7 @@ static void jl_write_values(jl_serializer_state *s)
                 ios_write(s->s, (char*)v, t->size);
         }
         else if (jl_bigint_type && jl_typeis(v, jl_bigint_type)) {
+            // foreign types require special handling
             jl_value_t *sizefield = jl_get_nth_field(v, 1);
             int32_t sz = jl_unbox_int32(sizefield);
             int32_t nw = (sz == 0 ? 1 : (sz < 0 ? -sz : sz));
@@ -756,6 +1027,7 @@ static void jl_write_values(jl_serializer_state *s)
             write_pointer(s->s);
         }
         else {
+            // Generic object::DataType serialization by field
             const char *data = (const char*)v;
             size_t i, nf = jl_datatype_nfields(t);
             size_t tot = 0;
@@ -765,7 +1037,7 @@ static void jl_write_values(jl_serializer_state *s)
                 write_padding(s->s, offset - tot);
                 tot = offset;
                 size_t fsz = jl_field_size(t, i);
-                if (t->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) {
+                if (t->name->mutabl && jl_is_cpointer_type(jl_field_type(t, i))) {
                     // reset Ptr fields to C_NULL
                     assert(!jl_field_isptr(t, i));
                     write_pointer(s->s);
@@ -779,7 +1051,7 @@ static void jl_write_values(jl_serializer_state *s)
             size_t np = t->layout->npointers;
             for (i = 0; i < np; i++) {
                 size_t offset = jl_ptr_offset(t, i) * sizeof(jl_value_t*);
-                jl_value_t *fld = *(jl_value_t**)&data[offset];
+                jl_value_t *fld = get_replaceable_field((jl_value_t**)&data[offset]);
                 if (fld != NULL) {
                     arraylist_push(&s->relocs_list, (void*)(uintptr_t)(offset + reloc_offset)); // relocation location
                     arraylist_push(&s->relocs_list, (void*)backref_id(s, fld)); // relocation target
@@ -787,6 +1059,7 @@ static void jl_write_values(jl_serializer_state *s)
                 }
             }
 
+            // A few objects need additional handling beyond the generic serialization above
             if (jl_is_method(v)) {
                 write_padding(s->s, sizeof(jl_method_t) - tot);
                 if (((jl_method_t*)v)->ccallable) {
@@ -795,6 +1068,7 @@ static void jl_write_values(jl_serializer_state *s)
                 }
             }
             else if (jl_is_code_instance(v)) {
+                // Handle the native-code pointers
                 jl_code_instance_t *m = (jl_code_instance_t*)v;
                 jl_code_instance_t *newm = (jl_code_instance_t*)&s->s->buf[reloc_offset];
 
@@ -876,6 +1150,30 @@ static void jl_write_values(jl_serializer_state *s)
                     ios_write(s->const_data, flddesc, fldsize);
                 }
             }
+            else if (jl_is_typename(v)) {
+                jl_typename_t *tn = (jl_typename_t*)v;
+                jl_typename_t *newtn = (jl_typename_t*)&s->s->buf[reloc_offset];
+                if (tn->atomicfields != NULL) {
+                    size_t nb = (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t);
+                    uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
+                    write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
+                    newtn->atomicfields = NULL; // relocation offset
+                    layout /= sizeof(void*);
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_typename_t, atomicfields))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target
+                    ios_write(s->const_data, (char*)tn->atomicfields, nb);
+                }
+                if (tn->constfields != NULL) {
+                    size_t nb = (jl_svec_len(tn->names) + 31) / 32 * sizeof(uint32_t);
+                    uintptr_t layout = LLT_ALIGN(ios_pos(s->const_data), sizeof(void*));
+                    write_padding(s->const_data, layout - ios_pos(s->const_data)); // realign stream
+                    newtn->constfields = NULL; // relocation offset
+                    layout /= sizeof(void*);
+                    arraylist_push(&s->relocs_list, (void*)(reloc_offset + offsetof(jl_typename_t, constfields))); // relocation location
+                    arraylist_push(&s->relocs_list, (void*)(((uintptr_t)ConstDataRef << RELOC_TAG_OFFSET) + layout)); // relocation target
+                    ios_write(s->const_data, (char*)tn->constfields, nb);
+                }
+            }
             else if (((jl_datatype_t*)(jl_typeof(v)))->name == jl_idtable_typename) {
                 // will need to rehash this, later (after types are fully constructed)
                 arraylist_push(&reinit_list, (void*)item);
@@ -889,6 +1187,8 @@ static void jl_write_values(jl_serializer_state *s)
 }
 
 
+// Record all symbols that get referenced by the generated code
+// and queue them for pointer relocation
 static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v)
 {
     // since symbols are static, they might not have had a
@@ -905,7 +1205,9 @@ static void jl_write_gv_syms(jl_serializer_state *s, jl_sym_t *v)
         jl_write_gv_syms(s, v->right);
 }
 
-static void jl_write_gv_int(jl_serializer_state *s, jl_value_t *v)
+// Record all hardcoded-tagged items that get referenced by
+// the generated code and queue them for pointer relocation
+static void jl_write_gv_tagref(jl_serializer_state *s, jl_value_t *v)
 {
     int32_t gv = jl_get_llvm_gv(native_functions, (jl_value_t*)v);
     if (gv != 0) {
@@ -914,7 +1216,7 @@ static void jl_write_gv_int(jl_serializer_state *s, jl_value_t *v)
         record_gvar(s, gv, item);
     }
 }
-static void jl_write_gv_ints(jl_serializer_state *s)
+static void jl_write_gv_tagrefs(jl_serializer_state *s)
 {
     // this also ensures all objects referenced in the code have
     // references in the system image to their global variable
@@ -922,12 +1224,15 @@ static void jl_write_gv_ints(jl_serializer_state *s)
     // they might not have had a reference anywhere in the code
     // image other than here
     size_t i;
+    jl_write_gv_tagref(s, (jl_value_t*)s->ptls->root_task);
+    jl_write_gv_tagref(s, s->ptls->root_task->tls);
+    jl_write_gv_tagref(s, jl_nothing);
     for (i = 0; i < NBOX_C; i++) {
-        jl_write_gv_int(s, jl_box_int32((int32_t)i - NBOX_C / 2));
-        jl_write_gv_int(s, jl_box_int64((int64_t)i - NBOX_C / 2));
+        jl_write_gv_tagref(s, jl_box_int32((int32_t)i - NBOX_C / 2));
+        jl_write_gv_tagref(s, jl_box_int64((int64_t)i - NBOX_C / 2));
     }
     for (i = 0; i < 256; i++) {
-        jl_write_gv_int(s, jl_box_uint8(i));
+        jl_write_gv_tagref(s, jl_box_uint8(i));
     }
 }
 
@@ -939,6 +1244,8 @@ static inline uint32_t load_uint32(uintptr_t *base)
 }
 
 
+// In deserialization, create Symbols and set up the
+// index for backreferencing
 static void jl_read_symbols(jl_serializer_state *s)
 {
     assert(deser_sym.len == nsym_tag);
@@ -955,10 +1262,13 @@ static void jl_read_symbols(jl_serializer_state *s)
 }
 
 
+// In serialization, extract the appropriate serializer position for RefTags-encoded index `reloc_item`.
+// Used for hard-coded tagged items, `relocs_list`, and `gctags_list`
 static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
 {
     enum RefTags tag = (enum RefTags)(reloc_item >> RELOC_TAG_OFFSET);
     if (tag == DataRef) {
+        // first serialized segment
         // need to compute the final relocation offset via the layout table
         assert(reloc_item < layout_table.len);
         uintptr_t reloc_base = (uintptr_t)layout_table.items[reloc_item];
@@ -978,7 +1288,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
             assert(offset < nsym_tag && "corrupt relocation item id");
             break;
         case TagRef:
-            assert(offset < 2 * NBOX_C + 257 && "corrupt relocation item id");
+            assert(offset < 2 * NBOX_C + 258 && "corrupt relocation item id");
             break;
         case BindingRef:
             assert(offset == 0 && "corrupt relocation offset");
@@ -999,7 +1309,7 @@ static uintptr_t get_reloc_for_item(uintptr_t reloc_item, size_t reloc_offset)
     }
 }
 
-
+// Compute target location at deserialization
 static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t base, size_t size, uint32_t reloc_id)
 {
     enum RefTags tag = (enum RefTags)(reloc_id >> RELOC_TAG_OFFSET);
@@ -1018,7 +1328,9 @@ static inline uintptr_t get_item_for_reloc(jl_serializer_state *s, uintptr_t bas
     case TagRef:
         if (offset == 0)
             return (uintptr_t)s->ptls->root_task;
-        offset -= 1;
+        if (offset == 1)
+            return (uintptr_t)jl_nothing;
+        offset -= 2;
         if (offset < NBOX_C)
             return (uintptr_t)jl_box_int64((int64_t)offset - NBOX_C / 2);
         offset -= NBOX_C;
@@ -1157,6 +1469,8 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
     int sysimg_fvars_max = s->fptr_record->size / sizeof(void*);
     size_t i;
     uintptr_t base = (uintptr_t)&s->s->buf[0];
+    // These will become MethodInstance references, but they start out as a list of
+    // offsets into `s` for CodeInstances
     jl_method_instance_t **linfos = (jl_method_instance_t**)&s->fptr_record->buf[0];
     uint32_t clone_idx = 0;
     for (i = 0; i < sysimg_fvars_max; i++) {
@@ -1174,7 +1488,7 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
             uintptr_t base = (uintptr_t)fvars.base;
             assert(jl_is_method(codeinst->def->def.method) && codeinst->invoke != jl_fptr_const_return);
             assert(specfunc ? codeinst->invoke != NULL : codeinst->invoke == NULL);
-            linfos[i] = codeinst->def;
+            linfos[i] = codeinst->def;     // now it's a MethodInstance
             int32_t offset = fvars.offsets[i];
             for (; clone_idx < fvars.nclones; clone_idx++) {
                 uint32_t idx = fvars.clone_idxs[clone_idx] & jl_sysimg_val_mask;
@@ -1194,10 +1508,12 @@ static void jl_update_all_fptrs(jl_serializer_state *s)
             }
         }
     }
+    // Tell LLVM about the native code
     jl_register_fptrs(sysimage_base, &fvars, linfos, sysimg_fvars_max);
 }
 
 
+// Pointer relocation for native-code referenced global variables
 static void jl_update_all_gvars(jl_serializer_state *s)
 {
     if (sysimg_gvars_base == NULL)
@@ -1218,6 +1534,7 @@ static void jl_update_all_gvars(jl_serializer_state *s)
 }
 
 
+// Reinitialization
 static void jl_finalize_serializer(jl_serializer_state *s, arraylist_t *list)
 {
     size_t i, l;
@@ -1235,7 +1552,7 @@ static void jl_finalize_serializer(jl_serializer_state *s, arraylist_t *list)
 }
 
 
-static void jl_reinit_item(jl_value_t *v, int how)
+static void jl_reinit_item(jl_value_t *v, int how) JL_GC_DISABLED
 {
     switch (how) {
         case 1: { // rehash IdDict
@@ -1252,12 +1569,13 @@ static void jl_reinit_item(jl_value_t *v, int how)
             size_t nbindings = mod->bindings.size;
             htable_new(&mod->bindings, nbindings);
             struct binding {
+                jl_sym_t *asname;
                 uintptr_t tag;
                 jl_binding_t b;
             } *b;
             b = (struct binding*)&mod[1];
             while (nbindings > 0) {
-                ptrhash_put(&mod->bindings, (char*)b->b.name, &b->b);
+                ptrhash_put(&mod->bindings, b->asname, &b->b);
                 b += 1;
                 nbindings -= 1;
             }
@@ -1270,7 +1588,8 @@ static void jl_reinit_item(jl_value_t *v, int how)
         }
         case 3: { // install ccallable entry point in JIT
             jl_svec_t *sv = ((jl_method_t*)v)->ccallable;
-            jl_compile_extern_c(NULL, NULL, jl_sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1));
+            int success = jl_compile_extern_c(NULL, NULL, jl_sysimg_handle, jl_svecref(sv, 0), jl_svecref(sv, 1));
+            assert(success); (void)success;
             break;
         }
         default:
@@ -1280,7 +1599,7 @@ static void jl_reinit_item(jl_value_t *v, int how)
 }
 
 
-static void jl_finalize_deserializer(jl_serializer_state *s)
+static void jl_finalize_deserializer(jl_serializer_state *s) JL_GC_DISABLED
 {
     // run reinitialization functions
     uintptr_t base = (uintptr_t)&s->s->buf[0];
@@ -1295,46 +1614,209 @@ static void jl_finalize_deserializer(jl_serializer_state *s)
 
 
 
-// --- helper functions ---
+// Code below helps slim down the images
+static void jl_scan_type_cache_gv(jl_serializer_state *s, jl_svec_t *cache)
+{
+    size_t l = jl_svec_len(cache), i;
+    for (i = 0; i < l; i++) {
+        jl_value_t *ti = jl_svecref(cache, i);
+        if (ti == NULL || ti == jl_nothing)
+            continue;
+        if (jl_get_llvm_gv(native_functions, ti)) {
+            jl_serialize_value(s, ti);
+        }
+        else if (jl_is_datatype(ti)) {
+            jl_value_t *singleton = ((jl_datatype_t*)ti)->instance;
+            if (singleton && jl_get_llvm_gv(native_functions, singleton))
+                jl_serialize_value(s, ti);
+        }
+    }
+}
 
 // remove cached types not referenced in the stream
-static void jl_prune_type_cache(jl_svec_t *cache)
+static void jl_prune_type_cache_hash(jl_svec_t *cache)
+{
+    size_t l = jl_svec_len(cache), i;
+    for (i = 0; i < l; i++) {
+        jl_value_t *ti = jl_svecref(cache, i);
+        if (ti == NULL || ti == jl_nothing)
+            continue;
+        if (ptrhash_get(&backref_table, ti) == HT_NOTFOUND)
+            jl_svecset(cache, i, jl_nothing);
+    }
+}
+
+static void jl_prune_type_cache_linear(jl_svec_t *cache)
 {
     size_t l = jl_svec_len(cache), ins = 0, i;
     for (i = 0; i < l; i++) {
         jl_value_t *ti = jl_svecref(cache, i);
         if (ti == NULL)
             break;
-        if (ptrhash_get(&backref_table, ti) != HT_NOTFOUND || jl_get_llvm_gv(native_functions, ti) != 0)
+        if (ptrhash_get(&backref_table, ti) != HT_NOTFOUND)
             jl_svecset(cache, ins++, ti);
-        else if (jl_is_datatype(ti)) {
-            jl_value_t *singleton = ((jl_datatype_t*)ti)->instance;
-            if (singleton && (ptrhash_get(&backref_table, singleton) != HT_NOTFOUND ||
-                        jl_get_llvm_gv(native_functions, singleton) != 0))
-                jl_svecset(cache, ins++, ti);
-        }
     }
     if (i > ins) {
         memset(&jl_svec_data(cache)[ins], 0, (i - ins) * sizeof(jl_value_t*));
     }
 }
 
+static jl_value_t *strip_codeinfo_meta(jl_method_t *m, jl_value_t *ci_, int orig)
+{
+    jl_code_info_t *ci = NULL;
+    JL_GC_PUSH1(&ci);
+    int compressed = 0;
+    if (!jl_is_code_info(ci_)) {
+        compressed = 1;
+        ci = jl_uncompress_ir(m, NULL, (jl_array_t*)ci_);
+    }
+    else {
+        ci = (jl_code_info_t*)ci_;
+    }
+    // leave codelocs length the same so the compiler can assume that; just zero it
+    memset(jl_array_data(ci->codelocs), 0, jl_array_len(ci->codelocs)*sizeof(int32_t));
+    // empty linetable
+    if (jl_is_array(ci->linetable))
+        jl_array_del_end((jl_array_t*)ci->linetable, jl_array_len(ci->linetable));
+    // replace slot names with `?`, except unused_sym since the compiler looks at it
+    jl_sym_t *questionsym = jl_symbol("?");
+    int i, l = jl_array_len(ci->slotnames);
+    for (i = 0; i < l; i++) {
+        jl_value_t *s = jl_array_ptr_ref(ci->slotnames, i);
+        if (s != (jl_value_t*)jl_unused_sym)
+            jl_array_ptr_set(ci->slotnames, i, questionsym);
+    }
+    if (orig) {
+        m->slot_syms = jl_compress_argnames(ci->slotnames);
+        jl_gc_wb(m, m->slot_syms);
+    }
+    jl_value_t *ret = (jl_value_t*)ci;
+    if (compressed)
+        ret = (jl_value_t*)jl_compress_ir(m, ci);
+    JL_GC_POP();
+    return ret;
+}
+
+static void record_field_change(jl_value_t **addr, jl_value_t *newval)
+{
+    ptrhash_put(&field_replace, (void*)addr, newval);
+}
+
+static void strip_specializations_(jl_method_instance_t *mi)
+{
+    assert(jl_is_method_instance(mi));
+    jl_code_instance_t *codeinst = mi->cache;
+    while (codeinst) {
+        if (codeinst->inferred && codeinst->inferred != jl_nothing) {
+            if (jl_options.strip_ir) {
+                record_field_change(&codeinst->inferred, jl_nothing);
+            }
+            else if (jl_options.strip_metadata) {
+                codeinst->inferred = strip_codeinfo_meta(mi->def.method, codeinst->inferred, 0);
+                jl_gc_wb(codeinst, codeinst->inferred);
+            }
+        }
+        codeinst = jl_atomic_load_relaxed(&codeinst->next);
+    }
+    if (jl_options.strip_ir) {
+        record_field_change(&mi->uninferred, NULL);
+    }
+}
+
+static int strip_all_codeinfos__(jl_typemap_entry_t *def, void *_env)
+{
+    jl_method_t *m = def->func.method;
+    if (m->source) {
+        int stripped_ir = 0;
+        if (jl_options.strip_ir) {
+            if (m->unspecialized) {
+                jl_code_instance_t *unspec = jl_atomic_load_relaxed(&m->unspecialized->cache);
+                if (unspec && jl_atomic_load_relaxed(&unspec->invoke)) {
+                    // we have a generic compiled version, so can remove the IR
+                    record_field_change(&m->source, jl_nothing);
+                    stripped_ir = 1;
+                }
+            }
+            if (!stripped_ir) {
+                int mod_setting = jl_get_module_compile(m->module);
+                // if the method is declared not to be compiled, keep IR for interpreter
+                if (!(mod_setting == JL_OPTIONS_COMPILE_OFF || mod_setting == JL_OPTIONS_COMPILE_MIN)) {
+                    record_field_change(&m->source, jl_nothing);
+                    stripped_ir = 1;
+                }
+            }
+        }
+        if (jl_options.strip_metadata && !stripped_ir) {
+            m->source = strip_codeinfo_meta(m, m->source, 1);
+            jl_gc_wb(m, m->source);
+        }
+    }
+    jl_svec_t *specializations = m->specializations;
+    size_t i, l = jl_svec_len(specializations);
+    for (i = 0; i < l; i++) {
+        jl_value_t *mi = jl_svecref(specializations, i);
+        if (mi != jl_nothing)
+            strip_specializations_((jl_method_instance_t*)mi);
+    }
+    if (m->unspecialized)
+        strip_specializations_(m->unspecialized);
+    return 1;
+}
+
+static int strip_all_codeinfos_(jl_methtable_t *mt, void *_env)
+{
+    return jl_typemap_visitor(mt->defs, strip_all_codeinfos__, NULL);
+}
+
+static void jl_strip_all_codeinfos(void)
+{
+    jl_foreach_reachable_mtable(strip_all_codeinfos_, NULL);
+}
+
+// Method roots created during sysimg construction are exempted from
+// triggering non-relocatability of compressed CodeInfos.
+// Set the number of such roots in each method when the sysimg is
+// serialized.
+static int set_nroots_sysimg__(jl_typemap_entry_t *def, void *_env)
+{
+    jl_method_t *m = def->func.method;
+    m->nroots_sysimg = m->roots ? jl_array_len(m->roots) : 0;
+    return 1;
+}
+
+static int set_nroots_sysimg_(jl_methtable_t *mt, void *_env)
+{
+    return jl_typemap_visitor(mt->defs, set_nroots_sysimg__, NULL);
+}
+
+static void jl_set_nroots_sysimg(void)
+{
+    jl_foreach_reachable_mtable(set_nroots_sysimg_, NULL);
+}
 
 // --- entry points ---
 
 static void jl_init_serializer2(int);
 static void jl_cleanup_serializer2(void);
 
-static void jl_save_system_image_to_stream(ios_t *f)
+static void jl_save_system_image_to_stream(ios_t *f) JL_GC_DISABLED
 {
     jl_gc_collect(JL_GC_FULL);
     jl_gc_collect(JL_GC_INCREMENTAL);   // sweep finalizers
     JL_TIMING(SYSIMG_DUMP);
+
+    htable_new(&field_replace, 10000);
+    // strip metadata and IR when requested
+    if (jl_options.strip_metadata || jl_options.strip_ir)
+        jl_strip_all_codeinfos();
+    jl_set_nroots_sysimg();
+
     int en = jl_gc_enable(0);
     jl_init_serializer2(1);
     htable_reset(&backref_table, 250000);
     arraylist_new(&reinit_list, 0);
     arraylist_new(&ccallable_list, 0);
+    arraylist_new(&object_worklist, 0);
     backref_table_numel = 0;
     ios_t sysimg, const_data, symbols, relocs, gvar_record, fptr_record;
     ios_mem(&sysimg,     1000000);
@@ -1350,9 +1832,10 @@ static void jl_save_system_image_to_stream(ios_t *f)
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_get_ptls_states();
+    s.ptls = jl_current_task->ptls;
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
+    jl_value_t **const*const tags = get_tags();
 
     // empty!(Core.ARGS)
     if (jl_core_module != NULL) {
@@ -1369,6 +1852,12 @@ static void jl_save_system_image_to_stream(ios_t *f)
         gmp_limb_size = jl_unbox_long(jl_get_global((jl_module_t*)jl_get_global(jl_base_module, jl_symbol("GMP")),
                                                     jl_symbol("BITS_PER_LIMB"))) / 8;
     }
+    if (jl_base_module) {
+        jl_value_t *docs = jl_get_global(jl_base_module, jl_symbol("Docs"));
+        if (docs && jl_is_module(docs)) {
+            jl_docmeta_sym = (jl_sym_t*)jl_get_global((jl_module_t*)docs, jl_symbol("META"));
+        }
+    }
 
     { // step 1: record values (recursively) that need to go in the image
         size_t i;
@@ -1376,16 +1865,30 @@ static void jl_save_system_image_to_stream(ios_t *f)
             jl_value_t *tag = *tags[i];
             jl_serialize_value(&s, tag);
         }
-        for (i = 0; i < builtin_typenames.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)builtin_typenames.items[i];
-            jl_prune_type_cache(tn->cache);
-            jl_prune_type_cache(tn->linearcache);
+        jl_serialize_reachable(&s);
+        // step 1.1: check for values only found in the generated code
+        arraylist_t typenames;
+        arraylist_new(&typenames, 0);
+        for (i = 0; i < backref_table.size; i += 2) {
+            jl_typename_t *tn = (jl_typename_t*)backref_table.table[i];
+            if (tn == HT_NOTFOUND || !jl_is_typename(tn))
+                continue;
+            arraylist_push(&typenames, tn);
         }
-        for (i = 0; i < builtin_typenames.len; i++) {
-            jl_typename_t *tn = (jl_typename_t*)builtin_typenames.items[i];
-            jl_serialize_value(&s, tn->cache);
-            jl_serialize_value(&s, tn->linearcache);
+        for (i = 0; i < typenames.len; i++) {
+            jl_typename_t *tn = (jl_typename_t*)typenames.items[i];
+            jl_scan_type_cache_gv(&s, tn->cache);
+            jl_scan_type_cache_gv(&s, tn->linearcache);
         }
+        jl_serialize_reachable(&s);
+        // step 1.2: prune (garbage collect) some special weak references from
+        // built-in type caches
+        for (i = 0; i < typenames.len; i++) {
+            jl_typename_t *tn = (jl_typename_t*)typenames.items[i];
+            jl_prune_type_cache_hash(tn->cache);
+            jl_prune_type_cache_linear(tn->linearcache);
+        }
+        arraylist_free(&typenames);
     }
 
     { // step 2: build all the sysimg sections
@@ -1393,7 +1896,13 @@ static void jl_save_system_image_to_stream(ios_t *f)
         jl_write_values(&s);
         jl_write_relocations(&s);
         jl_write_gv_syms(&s, jl_get_root_symbol());
-        jl_write_gv_ints(&s);
+        jl_write_gv_tagrefs(&s);
+    }
+
+    if (sysimg.size > ((uintptr_t)1 << RELOC_TAG_OFFSET) ||
+        const_data.size > ((uintptr_t)1 << RELOC_TAG_OFFSET)*sizeof(void*)) {
+        jl_printf(JL_STDERR, "ERROR: system image too large\n");
+        jl_exit(1);
     }
 
     // step 3: combine all of the sections into one file
@@ -1438,17 +1947,20 @@ static void jl_save_system_image_to_stream(ios_t *f)
         }
         jl_write_value(&s, s.ptls->root_task->tls);
         write_uint32(f, jl_get_gs_ctr());
-        write_uint32(f, jl_world_counter);
+        write_uint32(f, jl_atomic_load_acquire(&jl_world_counter));
         write_uint32(f, jl_typeinf_world);
         jl_finalize_serializer(&s, &reinit_list);
         jl_finalize_serializer(&s, &ccallable_list);
     }
 
+    assert(object_worklist.len == 0);
+    arraylist_free(&object_worklist);
     arraylist_free(&layout_table);
     arraylist_free(&reinit_list);
     arraylist_free(&ccallable_list);
     arraylist_free(&s.relocs_list);
     arraylist_free(&s.gctags_list);
+    htable_free(&field_replace);
     jl_cleanup_serializer2();
 
     jl_gc_enable(en);
@@ -1503,7 +2015,7 @@ JL_DLLEXPORT void jl_set_sysimg_so(void *handle)
     sysimg_fptrs = jl_init_processor_sysimg(handle);
 }
 
-static void jl_restore_system_image_from_stream(ios_t *f)
+static void jl_restore_system_image_from_stream(ios_t *f) JL_GC_DISABLED
 {
     JL_TIMING(SYSIMG_LOAD);
     int en = jl_gc_enable(0);
@@ -1516,9 +2028,10 @@ static void jl_restore_system_image_from_stream(ios_t *f)
     s.relocs = &relocs;
     s.gvar_record = &gvar_record;
     s.fptr_record = &fptr_record;
-    s.ptls = jl_get_ptls_states();
+    s.ptls = jl_current_task->ptls;
     arraylist_new(&s.relocs_list, 0);
     arraylist_new(&s.gctags_list, 0);
+    jl_value_t **const*const tags = get_tags();
 
     // step 1: read section map
     assert(ios_pos(f) == 0 && f->bm == bm_mem);
@@ -1558,14 +2071,16 @@ static void jl_restore_system_image_from_stream(ios_t *f)
         jl_value_t **tag = tags[i];
         *tag = jl_read_value(&s);
     }
-    s.ptls->root_task = (jl_task_t*)jl_gc_alloc(s.ptls, sizeof(jl_task_t), jl_task_type);
-    memset(s.ptls->root_task, 0, sizeof(jl_task_t));
+    // set typeof extra-special values now that we have the type set by tags above
+    jl_astaggedvalue(jl_current_task)->header = (uintptr_t)jl_task_type | jl_astaggedvalue(jl_current_task)->header;
+    jl_astaggedvalue(jl_nothing)->header = (uintptr_t)jl_nothing_type | jl_astaggedvalue(jl_nothing)->header;
     s.ptls->root_task->tls = jl_read_value(&s);
+    jl_gc_wb(s.ptls->root_task, s.ptls->root_task->tls);
     jl_init_int32_int64_cache();
     jl_init_box_caches();
 
     uint32_t gs_ctr = read_uint32(f);
-    jl_world_counter = read_uint32(f);
+    jl_atomic_store_release(&jl_world_counter, read_uint32(f));
     jl_typeinf_world = read_uint32(f);
     jl_set_gs_ctr(gs_ctr);
     s.s = NULL;
@@ -1680,7 +2195,6 @@ static void jl_init_serializer2(int for_serialize)
         htable_new(&symbol_table, 0);
         htable_new(&fptr_to_id, sizeof(id_to_fptrs) / sizeof(*id_to_fptrs));
         htable_new(&backref_table, 0);
-        arraylist_new(&builtin_typenames, 0);
         uintptr_t i;
         for (i = 0; id_to_fptrs[i] != NULL; i++) {
             ptrhash_put(&fptr_to_id, (void*)(uintptr_t)id_to_fptrs[i], (void*)(i + 2));
@@ -1698,7 +2212,6 @@ static void jl_cleanup_serializer2(void)
     htable_reset(&fptr_to_id, 0);
     htable_reset(&backref_table, 0);
     arraylist_free(&deser_sym);
-    arraylist_free(&builtin_typenames);
 }
 
 #ifdef __cplusplus
diff --git a/src/subtype.c b/src/subtype.c
index c9a6180e4a27c0..c43d307e6d4213 100644
--- a/src/subtype.c
+++ b/src/subtype.c
@@ -42,11 +42,19 @@ extern "C" {
 // TODO: the stack probably needs to be artificially large because of some
 // deeper problem (see #21191) and could be shrunk once that is fixed
 typedef struct {
-    int depth;
-    int more;
+    int16_t depth;
+    int16_t more;
+    int16_t used;
     uint32_t stack[100];  // stack of bits represented as a bit vector
 } jl_unionstate_t;
 
+typedef struct {
+    int16_t depth;
+    int16_t more;
+    int16_t used;
+    void *stack;
+} jl_saved_unionstate_t;
+
 // Linked list storing the type variable environment. A new jl_varbinding_t
 // is pushed for each UnionAll type we encounter. `lb` and `ub` are updated
 // during the computation.
@@ -60,22 +68,21 @@ typedef struct jl_varbinding_t {
     int8_t occurs_inv;  // occurs in invariant position
     int8_t occurs_cov;  // # of occurrences in covariant position
     int8_t concrete;    // 1 if another variable has a constraint forcing this one to be concrete
-    // in covariant position, we need to try constraining a variable in different ways:
-    // 0 - unconstrained
-    // 1 - less than
-    // 2 - greater than
-    // 3 - inexpressible - occurs when the var has non-trivial overlap with another type,
-    //                     and we would need to return `intersect(var,other)`. in this case
-    //                     we choose to over-estimate the intersection by returning the var.
+    // constraintkind: in covariant position, we try three different ways to compute var ∩ type:
+    // let ub = var.ub ∩ type
+    // 0 - var.ub <: type ? var : ub
+    // 1 - var.ub = ub; return var
+    // 2 - either (var.ub = ub; return var), or return ub
     int8_t constraintkind;
-    int depth0;         // # of invariant constructors nested around the UnionAll type for this var
+    int8_t intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
+    int8_t limited;
+    int16_t depth0;         // # of invariant constructors nested around the UnionAll type for this var
     // when this variable's integer value is compared to that of another,
     // it equals `other + offset`. used by vararg length parameters.
-    int offset;
+    int16_t offset;
     // array of typevars that our bounds depend on, whose UnionAlls need to be
     // moved outside ours.
     jl_array_t *innervars;
-    int intvalued;      // must be integer-valued; i.e. occurs as N in Vararg{_,N}
     struct jl_varbinding_t *prev;
 } jl_varbinding_t;
 
@@ -94,12 +101,13 @@ typedef struct jl_stenv_t {
     int ignore_free;          // treat free vars as black boxes; used during intersection
     int intersection;         // true iff subtype is being called from intersection
     int emptiness_only;       // true iff intersection only needs to test for emptiness
+    int triangular;           // when intersecting Ref{X} with Ref{<:Y}
 } jl_stenv_t;
 
 // state manipulation utilities
 
 // look up a type variable in an environment
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 static jl_varbinding_t *lookup(jl_stenv_t *e, jl_tvar_t *v) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
 #else
 static jl_varbinding_t *lookup(jl_stenv_t *e, jl_tvar_t *v) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
@@ -117,17 +125,34 @@ static int statestack_get(jl_unionstate_t *st, int i) JL_NOTSAFEPOINT
 {
     assert(i >= 0 && i < sizeof(st->stack) * 8);
     // get the `i`th bit in an array of 32-bit words
-    return (st->stack[i>>5] & (1<<(i&31))) != 0;
+    return (st->stack[i>>5] & (1u<<(i&31))) != 0;
 }
 
 static void statestack_set(jl_unionstate_t *st, int i, int val) JL_NOTSAFEPOINT
 {
     assert(i >= 0 && i < sizeof(st->stack) * 8);
     if (val)
-        st->stack[i>>5] |= (1<<(i&31));
+        st->stack[i>>5] |= (1u<<(i&31));
     else
-        st->stack[i>>5] &= ~(1<<(i&31));
-}
+        st->stack[i>>5] &= ~(1u<<(i&31));
+}
+
+#define push_unionstate(saved, src)                                     \
+    do {                                                                \
+        (saved)->depth = (src)->depth;                                  \
+        (saved)->more = (src)->more;                                    \
+        (saved)->used = (src)->used;                                    \
+        (saved)->stack = alloca(((src)->used+7)/8);                     \
+        memcpy((saved)->stack, &(src)->stack, ((src)->used+7)/8);       \
+    } while (0);
+
+#define pop_unionstate(dst, saved)                                      \
+    do {                                                                \
+        (dst)->depth = (saved)->depth;                                  \
+        (dst)->more = (saved)->more;                                    \
+        (dst)->used = (saved)->used;                                    \
+        memcpy(&(dst)->stack, (saved)->stack, ((saved)->used+7)/8);     \
+    } while (0);
 
 typedef struct {
     int8_t *buf;
@@ -146,7 +171,7 @@ static void save_env(jl_stenv_t *e, jl_value_t **root, jl_savedenv_t *se)
     if (root)
         *root = (jl_value_t*)jl_alloc_svec(len * 3);
     se->buf = (int8_t*)(len > 8 ? malloc_s(len * 2) : &se->_space);
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
     memset(se->buf, 0, len * 2);
 #endif
     int i=0, j=0; v = e->vars;
@@ -222,6 +247,12 @@ static int obviously_egal(jl_value_t *a, jl_value_t *b)
         return ((jl_unionall_t*)a)->var == ((jl_unionall_t*)b)->var &&
             obviously_egal(((jl_unionall_t*)a)->body, ((jl_unionall_t*)b)->body);
     }
+    if (jl_is_vararg(a)) {
+        jl_vararg_t *vma = (jl_vararg_t *)a;
+        jl_vararg_t *vmb = (jl_vararg_t *)b;
+        return obviously_egal(jl_unwrap_vararg(vma), jl_unwrap_vararg(vmb)) &&
+            ((!vma->N && !vmb->N) || (vma->N && vmb->N && obviously_egal(vma->N, vmb->N)));
+    }
     if (jl_is_typevar(a)) return 0;
     return !jl_is_type(a) && jl_egal(a,b);
 }
@@ -480,6 +511,10 @@ static jl_value_t *pick_union_element(jl_value_t *u JL_PROPAGATES_ROOT, jl_stenv
 {
     jl_unionstate_t *state = R ? &e->Runions : &e->Lunions;
     do {
+        if (state->depth >= state->used) {
+            statestack_set(state, state->used, 0);
+            state->used++;
+        }
         int ui = statestack_get(state, state->depth);
         state->depth++;
         if (ui == 0) {
@@ -504,15 +539,14 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         return 1;
     if (y == (jl_value_t*)jl_any_type && jl_is_type(x))
         return 1;
-    if (jl_is_uniontype(x) && jl_is_uniontype(y) && jl_egal(x,y))
+    if (jl_is_uniontype(x) && jl_egal(x, y))
         return 1;
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
-    jl_unionstate_t oldLunions = e->Lunions;
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int sub;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Lunions.used = e->Runions.used = 0;
     e->Runions.depth = 0;
     e->Runions.more = 0;
     e->Lunions.depth = 0;
@@ -520,8 +554,8 @@ static int subtype_ccheck(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
     sub = forall_exists_subtype(x, y, e, 0);
 
-    e->Runions = oldRunions;
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Runions, &oldRunions);
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub;
 }
 
@@ -533,7 +567,7 @@ static int subtype_left_var(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int par
         return 1;
     if (y == (jl_value_t*)jl_any_type && jl_is_type(x))
         return 1;
-    if (jl_is_uniontype(x) && jl_is_uniontype(y) && jl_egal(x,y))
+    if (jl_is_uniontype(x) && jl_egal(x, y))
         return 1;
     if (x == (jl_value_t*)jl_any_type && jl_is_datatype(y))
         return 0;
@@ -605,6 +639,8 @@ static int var_lt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     return 1;
 }
 
+static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT;
+
 // check that type var `b` is >: `a`, and update b's lower bound.
 static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
 {
@@ -622,7 +658,10 @@ static int var_gt(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int param)
     }
     if (!((bb->ub == (jl_value_t*)jl_any_type && !jl_is_type(a) && !jl_is_typevar(a)) || subtype_ccheck(a, bb->ub, e)))
         return 0;
-    bb->lb = simple_join(bb->lb, a);
+    jl_value_t *lb = simple_join(bb->lb, a);
+    if (!e->intersection || !subtype_by_bounds(lb, (jl_value_t*)b, e))
+        bb->lb = lb;
+    // this bound should not be directly circular
     assert(bb->lb != (jl_value_t*)b);
     if (jl_is_typevar(a)) {
         jl_varbinding_t *aa = lookup(e, (jl_tvar_t*)a);
@@ -640,7 +679,7 @@ static int is_leaf_bound(jl_value_t *v) JL_NOTSAFEPOINT
     if (v == jl_bottom_type)
         return 1;
     if (jl_is_datatype(v)) {
-        if (((jl_datatype_t*)v)->abstract) {
+        if (((jl_datatype_t*)v)->name->abstract) {
             if (jl_is_type_type(v))
                 return 1;//!jl_has_free_typevars(jl_tparam0(v));
             return 0;
@@ -702,16 +741,37 @@ static int var_occurs_invariant(jl_value_t *v, jl_tvar_t *var, int inv) JL_NOTSA
     return var_occurs_inside(v, var, 0, 1);
 }
 
-static int with_tvar(tvar_callback callback, void *context, jl_unionall_t *u, int8_t R, jl_stenv_t *e, int param)
+static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
 {
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
+    jl_varbinding_t *btemp = e->vars;
+    // if the var for this unionall (based on identity) already appears somewhere
+    // in the environment, rename to get a fresh var.
+    JL_GC_PUSH1(&u);
+    while (btemp != NULL) {
+        if (btemp->var == u->var ||
+            // outer var can only refer to inner var if bounds changed
+            (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
+            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
+            u = rename_unionall(u);
+            break;
+        }
+        btemp = btemp->prev;
+    }
+    JL_GC_POP();
+    return u;
+}
+
+static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
+{
+    u = unalias_unionall(u, e);
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
     JL_GC_PUSH4(&u, &vb.lb, &vb.ub, &vb.innervars);
     e->vars = &vb;
     int ans;
     if (R) {
         e->envidx++;
-        ans = callback(context, R, e, param);
+        ans = subtype(t, u->body, e, param);
         e->envidx--;
         // widen Type{x} to typeof(x) in argument position
         if (!vb.occurs_inv)
@@ -719,7 +779,9 @@ static int with_tvar(tvar_callback callback, void *context, jl_unionall_t *u, in
         // fill variable values into `envout` up to `envsz`
         if (e->envidx < e->envsz) {
             jl_value_t *val;
-            if (!vb.occurs_inv && vb.lb != jl_bottom_type)
+            if (vb.intvalued && vb.lb == (jl_value_t*)jl_any_type)
+                val = (jl_value_t*)jl_wrap_vararg(NULL, NULL);
+            else if (!vb.occurs_inv && vb.lb != jl_bottom_type)
                 val = is_leaf_bound(vb.lb) ? vb.lb : (jl_value_t*)jl_new_typevar(u->var->name, jl_bottom_type, vb.lb);
             else if (vb.lb == vb.ub)
                 val = vb.lb;
@@ -742,7 +804,8 @@ static int with_tvar(tvar_callback callback, void *context, jl_unionall_t *u, in
         }
     }
     else {
-        ans = callback(context, R, e, param);
+        ans = R ? subtype(t, u->body, e, param) :
+                  subtype(u->body, t, e, param);
     }
 
     // handle the "diagonal dispatch" rule, which says that a type var occurring more
@@ -794,73 +857,12 @@ static int with_tvar(tvar_callback callback, void *context, jl_unionall_t *u, in
     return ans;
 }
 
-static jl_unionall_t *unalias_unionall(jl_unionall_t *u, jl_stenv_t *e)
-{
-    jl_varbinding_t *btemp = e->vars;
-    // if the var for this unionall (based on identity) already appears somewhere
-    // in the environment, rename to get a fresh var.
-    JL_GC_PUSH1(&u);
-    while (btemp != NULL) {
-        if (btemp->var == u->var ||
-            // outer var can only refer to inner var if bounds changed
-            (btemp->lb != btemp->var->lb && jl_has_typevar(btemp->lb, u->var)) ||
-            (btemp->ub != btemp->var->ub && jl_has_typevar(btemp->ub, u->var))) {
-            u = rename_unionall(u);
-            break;
-        }
-        btemp = btemp->prev;
-    }
-    JL_GC_POP();
-    return u;
-}
-
-struct subtype_unionall_env {
-    jl_value_t *t;
-    jl_value_t *ubody;
-};
-
-static int subtype_unionall_callback(struct subtype_unionall_env *env, int8_t R, jl_stenv_t *s, int param) {
-    JL_GC_PROMISE_ROOTED(env->t);
-    JL_GC_PROMISE_ROOTED(env->ubody);
-    if (R) {
-        return subtype(env->t, env->ubody, s, param);
-    }
-    else {
-        return subtype(env->ubody, env->t, s, param);
-    }
-}
-
-// compare UnionAll type `u` to `t`. `R==1` if `u` came from the right side of A <: B.
-static int subtype_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
-{
-    u = unalias_unionall(u, e);
-    struct subtype_unionall_env env = {t, u->body};
-    JL_GC_PUSH1(&u);
-    int res = with_tvar((tvar_callback)subtype_unionall_callback, (void*)&env, u, R, e, param);
-    JL_GC_POP();
-    return res;
-}
-
-// unwrap <=1 layers of UnionAlls, leaving the var in *p1 the body
-static jl_datatype_t *unwrap_1_unionall(jl_value_t *t, jl_tvar_t **p1) JL_NOTSAFEPOINT
-{
-    assert(t);
-    if (jl_is_unionall(t)) {
-        *p1 = ((jl_unionall_t*)t)->var;
-        t = ((jl_unionall_t*)t)->body;
-    }
-    assert(jl_is_datatype(t));
-    return (jl_datatype_t*)t;
-}
-
 // check n <: (length of vararg type v)
 static int check_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e)
 {
-    jl_tvar_t *va_p1=NULL;
-    jl_datatype_t *tail = unwrap_1_unionall(v, &va_p1);
-    jl_value_t *N = jl_tparam1(tail);
+    jl_value_t *N = jl_unwrap_vararg_num(v);
     // only do the check if N is free in the tuple type's last parameter
-    if (N != (jl_value_t*)va_p1) {
+    if (N) {
         jl_value_t *nn = jl_box_long(n);
         JL_GC_PUSH1(&nn);
         e->invdepth++;
@@ -888,52 +890,51 @@ struct subtype_tuple_env {
     jl_vararg_kind_t vvx, vvy;
 } JL_ROOTED_VALUE_COLLECTION;
 
-static int subtype_tuple_varargs(struct subtype_tuple_env *env, jl_stenv_t *e, int param)
+static int subtype_tuple_varargs(
+    jl_vararg_t *vtx, jl_vararg_t *vty,
+    size_t vx, size_t vy,
+    jl_stenv_t *e, int param)
 {
-    jl_tvar_t *yv1=NULL;
-    jl_datatype_t *yva = unwrap_1_unionall(env->vty, &yv1);
-    jl_tvar_t *xv1=NULL;
-    jl_datatype_t *xva = unwrap_1_unionall(env->vtx, &xv1);
-
-    jl_value_t *xp0 = jl_tparam0(xva); jl_value_t *xp1 = jl_tparam1(xva);
-    jl_value_t *yp0 = jl_tparam0(yva); jl_value_t *yp1 = jl_tparam1(yva);
+    jl_value_t *xp0 = jl_unwrap_vararg(vtx); jl_value_t *xp1 = jl_unwrap_vararg_num(vtx);
+    jl_value_t *yp0 = jl_unwrap_vararg(vty); jl_value_t *yp1 = jl_unwrap_vararg_num(vty);
 
-    if (!jl_is_datatype(env->vtx)) {
-        // Unconstrained on the left, constrained on the right
+    if (!xp1) {
         jl_value_t *yl = yp1;
-        if (jl_is_typevar(yl)) {
-            jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
-            if (ylv)
-                yl = ylv->lb;
-        }
-        if (jl_is_long(yl)) {
-            return 0;
+        if (yl) {
+            // Unconstrained on the left, constrained on the right
+            if (jl_is_typevar(yl)) {
+                jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
+                if (ylv)
+                    yl = ylv->lb;
+            }
+            if (jl_is_long(yl)) {
+                return 0;
+            }
         }
     }
     else {
-        jl_value_t *xl = jl_tparam1(env->vtx);
+        jl_value_t *xl = jl_unwrap_vararg_num(vtx);
         if (jl_is_typevar(xl)) {
             jl_varbinding_t *xlv = lookup(e, (jl_tvar_t*)xl);
             if (xlv)
                 xl = xlv->lb;
         }
         if (jl_is_long(xl)) {
-            if (jl_unbox_long(xl) + 1 == env->vx) {
+            if (jl_unbox_long(xl) + 1 == vx) {
                 // LHS is exhausted. We're a subtype if the RHS is either
                 // exhausted as well or unbounded (in which case we need to
                 // set it to 0).
-                if (jl_is_datatype(env->vty)) {
-                    jl_value_t *yl = jl_tparam1(env->vty);
+                jl_value_t *yl = jl_unwrap_vararg_num(vty);
+                if (yl) {
                     if (jl_is_typevar(yl)) {
                         jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
                         if (ylv)
                             yl = ylv->lb;
                     }
                     if (jl_is_long(yl)) {
-                        return jl_unbox_long(yl) + 1 == env->vy;
+                        return jl_unbox_long(yl) + 1 == vy;
                     }
-                }
-                else {
+                } else {
                     // We can skip the subtype check, but we still
                     // need to make sure to constrain the length of y
                     // to 0.
@@ -950,10 +951,14 @@ static int subtype_tuple_varargs(struct subtype_tuple_env *env, jl_stenv_t *e, i
     if (!subtype(xp0, yp0, e, 1)) return 0;
 
 constrain_length:
-    if (!jl_is_datatype(env->vtx)) {
+    if (!yp1) {
+        return 1;
+    }
+    if (!xp1) {
         jl_value_t *yl = yp1;
+        jl_varbinding_t *ylv = NULL;
         if (jl_is_typevar(yl)) {
-            jl_varbinding_t *ylv = lookup(e, (jl_tvar_t*)yl);
+            ylv = lookup(e, (jl_tvar_t*)yl);
             if (ylv)
                 yl = ylv->lb;
         }
@@ -963,16 +968,30 @@ static int subtype_tuple_varargs(struct subtype_tuple_env *env, jl_stenv_t *e, i
             // as a result of the subtype call above).
             return 0;
         }
+
+        if (ylv) {
+            if (ylv->depth0 != e->invdepth || ylv->occurs_inv)
+                return 0;
+            ylv->intvalued = 1;
+        }
+        // set lb to Any. Since `intvalued` is set, we'll interpret that
+        // appropriately.
+        e->invdepth++;
+        e->Rinvdepth++;
+        int ans = subtype((jl_value_t*)jl_any_type, yp1, e, 2);
+        e->invdepth--;
+        e->Rinvdepth--;
+        return ans;
     }
 
     // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
     e->invdepth++;
     e->Rinvdepth++;
     JL_GC_PUSH2(&xp1, &yp1);
-    if (jl_is_long(xp1) && env->vx != 1)
-        xp1 = jl_box_long(jl_unbox_long(xp1) - env->vx + 1);
-    if (jl_is_long(yp1) && env->vy != 1)
-        yp1 = jl_box_long(jl_unbox_long(yp1) - env->vy + 1);
+    if (xp1 && jl_is_long(xp1) && vx != 1)
+        xp1 = jl_box_long(jl_unbox_long(xp1) - vx + 1);
+    if (jl_is_long(yp1) && vy != 1)
+        yp1 = jl_box_long(jl_unbox_long(yp1) - vy + 1);
     int ans = forall_exists_equal(xp1, yp1, e);
     JL_GC_POP();
     e->invdepth--;
@@ -980,88 +999,63 @@ static int subtype_tuple_varargs(struct subtype_tuple_env *env, jl_stenv_t *e, i
     return ans;
 }
 
-static int subtype_tuple_tail(struct subtype_tuple_env *env, int8_t R, jl_stenv_t *e, int param)
-{
-    int x_reps = 1;
-loop: // while (i <= lx) {
-        if (env->i >= env->lx)
-            goto done;
-
-        /* Get the type in the current index. If necessary introduce tvars for
-           varargs */
-        jl_value_t *xi = NULL;
-        if (env->i == env->lx-1 && env->vvx) {
-            if (!env->vtx) {
-                xi = jl_tparam(env->xd, env->i);
-                // Unbounded vararg on the LHS without vararg on the RHS should
-                // have been caught earlier.
-                assert(env->vvy || !jl_is_unionall(xi));
-                if (jl_is_unionall(xi)) {
-                    // TODO: If !var_occurs_inside(jl_tparam0(xid), p1, 0, 1),
-                    // we could avoid introducing the tvar into the environment
-                    jl_unionall_t *u = (jl_unionall_t*)xi;
-                    u = unalias_unionall(u, e);
-                    env->vtx = (jl_value_t*)u;
-                    // goto loop, but with the tvar introduced
-                    JL_GC_PUSH1(&u);
-                    int res = with_tvar((tvar_callback)subtype_tuple_tail, env, u, 0, e, param);
-                    JL_GC_POP();
-                    return res;
-                }
-                env->vtx = xi;
+static int subtype_tuple_tail(jl_datatype_t *xd, jl_datatype_t *yd, int8_t R, jl_stenv_t *e, int param)
+{
+    size_t lx = jl_nparams(xd);
+    size_t ly = jl_nparams(yd);
+    size_t i = 0, j = 0, vx = 0, vy = 0, x_reps = 1;
+    jl_value_t *lastx = NULL, *lasty = NULL;
+    jl_value_t *xi = NULL, *yi = NULL;
+
+    for (;;) {
+        if (i < lx) {
+            xi = jl_tparam(xd, i);
+            if (i == lx-1 && (vx || jl_is_vararg(xi))) {
+                vx += 1;
             }
-            xi = env->vtx;
         }
-        else {
-            xi = jl_tparam(env->xd, env->i);
-        }
-
-        jl_value_t *yi = NULL;
-        if (env->j < env->ly) {
-            if (env->j == env->ly-1 && env->vvy) {
-                if (!env->vty) {
-                    yi = jl_tparam(env->yd, env->j);
-                    if (jl_is_unionall(yi)) {
-                        jl_unionall_t *u = (jl_unionall_t*)yi;
-                        u = unalias_unionall(u, e);
-                        env->vty = (jl_value_t*)u;
-                        // goto loop, but with the tvar introduced
-                        JL_GC_PUSH1(&u);
-                        int res = with_tvar((tvar_callback)subtype_tuple_tail, env, u, 1, e, param);
-                        JL_GC_POP();
-                        return res;
-                    }
-                    env->vty = yi;
-                }
-                yi = env->vty;
-            }
-            else {
-                yi = jl_tparam(env->yd, env->j);
+
+        if (j < ly) {
+            yi = jl_tparam(yd, j);
+            if (j == ly-1 && (vy || jl_is_vararg(yi))) {
+                vy += 1;
             }
         }
 
-        if (env->vtx)
-            env->vx += 1;
-        if (env->vty)
-            env->vy += 1;
+        if (i >= lx)
+            break;
 
-        if (env->vx && env->vy) {
-            return subtype_tuple_varargs(env, e, param);
+        int all_varargs = vx && vy;
+        if (!all_varargs && vy == 1) {
+            if (jl_unwrap_vararg(yi) == (jl_value_t*)jl_any_type) {
+                // Tuple{...} <: Tuple{..., Vararg{Any, _}}
+                // fast path all the type checks away
+                xi = jl_tparam(xd, lx-1);
+                if (jl_is_vararg(xi)) {
+                    all_varargs = 1;
+                    vy += lx - i;
+                    vx = 1;
+                } else {
+                    break;
+                }
+            }
         }
 
-        if (env->vx) {
-            xi = jl_tparam0(jl_unwrap_unionall(env->vtx));
-            if (env->j >= env->ly)
-               return 1;
+        if (all_varargs) {
+            // Tuple{..., Vararg{xi, _}} <: Tuple{..., Vararg{yi, _}}
+            return subtype_tuple_varargs(
+                (jl_vararg_t*)xi,
+                (jl_vararg_t*)yi,
+                vx, vy, e, param);
         }
-        else if (env->j >= env->ly) {
-            return 0;
-        }
-        int x_same = env->lastx && jl_egal(xi, env->lastx);
-        if (env->vy) {
-            yi = jl_tparam0(jl_unwrap_unionall(env->vty));
-            if (!env->vvx && yi == (jl_value_t*)jl_any_type)
-                goto done;  // if y ends in `Vararg{Any}` skip checking everything
+
+        if (j >= ly)
+            return !!vx;
+
+        xi = vx ? jl_unwrap_vararg(xi) : xi;
+        int x_same = lastx && jl_egal(xi, lastx);
+        if (vy) {
+            yi = jl_unwrap_vararg(yi);
             // keep track of number of consecutive identical types compared to Vararg
             if (x_same)
                 x_reps++;
@@ -1072,9 +1066,9 @@ static int subtype_tuple_tail(struct subtype_tuple_env *env, int8_t R, jl_stenv_
             // an identical type on the left doesn't need to be compared to a Vararg
             // element type on the right more than twice.
         }
-        else if (x_same &&
-            ((yi == env->lasty && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) ||
-             (yi == env->lasty && !env->vx && env->vy && jl_is_concrete_type(xi)))) {
+        else if (x_same && e->Runions.depth == 0 &&
+            ((yi == lasty && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) ||
+             (yi == lastx && !vx && vy && jl_is_concrete_type(xi)))) {
             // fast path for repeated elements
         }
         else if (e->Runions.depth == 0 && e->Lunions.depth == 0 && !jl_has_free_typevars(xi) && !jl_has_free_typevars(yi)) {
@@ -1085,73 +1079,72 @@ static int subtype_tuple_tail(struct subtype_tuple_env *env, int8_t R, jl_stenv_
         else if (!subtype(xi, yi, e, param)) {
             return 0;
         }
-        env->lastx = xi; env->lasty = yi;
-        if (env->i < env->lx-1 || !env->vx)
-            env->i++;
-        if (env->j < env->ly-1 || !env->vy)
-            env->j++;
-
-        goto loop;
-    // } (from loop:)
+        lastx = xi; lasty = yi;
+        if (i < lx-1 || !vx)
+            i++;
+        if (j < ly-1 || !vy)
+            j++;
+    }
 
-done:
-    if (!env->vy && env->j < env->ly && jl_is_vararg_type(jl_tparam(env->yd, env->j)))
-        env->vy += 1;
-    if (env->vy && !env->vx && env->lx+1 >= env->ly) {
+    if (vy && !vx && lx+1 >= ly) {
         // in Tuple{...,tn} <: Tuple{...,Vararg{T,N}}, check (lx+1-ly) <: N
-        if (!check_vararg_length(jl_tparam(env->yd,env->ly-1), env->lx+1-env->ly, e))
+        if (!check_vararg_length(yi, lx+1-ly, e))
             return 0;
     }
-    return (env->lx + env->vx == env->ly + env->vy) || (env->vy && (env->lx >= (env->vx ? env->ly : (env->ly-1))));
+    assert((lx + vx == ly + vy) || (vy && (lx >= (vx ? ly : (ly-1)))));
+    return 1;
 }
 
 static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int param)
 {
-    struct subtype_tuple_env env;
-    env.xd = xd;
-    env.yd = yd;
-    env.lx = jl_nparams(xd);
-    env.ly = jl_nparams(yd);
-    if (env.lx == 0 && env.ly == 0)
+    // Check tuple compatibility based on tuple length only (fastpath)
+    size_t lx = jl_nparams(xd);
+    size_t ly = jl_nparams(yd);
+
+    if (lx == 0 && ly == 0)
         return 1;
-    env.i = env.j = 0;
-    env.vx = env.vy = 0;
-    env.vvx = env.vvy = JL_VARARG_NONE;
+
+    jl_vararg_kind_t vvx = JL_VARARG_NONE;
+    jl_vararg_kind_t vvy = JL_VARARG_NONE;
     jl_varbinding_t *xbb = NULL;
-    if (env.lx > 0) {
-        env.vvx = jl_vararg_kind(jl_tparam(env.xd, env.lx-1));
-        if (env.vvx == JL_VARARG_BOUND)
-            xbb = lookup(e, (jl_tvar_t *)jl_tparam1(jl_tparam(env.xd, env.lx - 1)));
-    }
-    if (env.ly > 0)
-        env.vvy = jl_vararg_kind(jl_tparam(env.yd, env.ly-1));
-    if (env.vvx != JL_VARARG_NONE && env.vvx != JL_VARARG_INT &&
+    jl_value_t *xva = NULL, *yva = NULL;
+    if (lx > 0) {
+        xva = jl_tparam(xd, lx-1);
+        vvx = jl_vararg_kind(xva);
+        if (vvx == JL_VARARG_BOUND)
+            xbb = lookup(e, (jl_tvar_t *)jl_unwrap_vararg_num(xva));
+    }
+    if (ly > 0) {
+        yva = jl_tparam(yd, ly-1);
+        vvy = jl_vararg_kind(yva);
+    }
+    if (vvx != JL_VARARG_NONE && vvx != JL_VARARG_INT &&
         (!xbb || !jl_is_long(xbb->lb))) {
-        if (env.vvx == JL_VARARG_UNBOUND || (xbb && !xbb->right)) {
+        if (vvx == JL_VARARG_UNBOUND || (xbb && !xbb->right)) {
             // Unbounded on the LHS, bounded on the RHS
-            if (env.vvy == JL_VARARG_NONE || env.vvy == JL_VARARG_INT)
+            if (vvy == JL_VARARG_NONE || vvy == JL_VARARG_INT)
                 return 0;
-            else if (env.lx < env.ly) // Unbounded includes N == 0
+            else if (lx < ly) // Unbounded includes N == 0
                 return 0;
         }
-        else if (env.vvy == JL_VARARG_NONE && !check_vararg_length(jl_tparam(env.xd, env.lx-1), env.ly+1-env.lx, e)) {
+        else if (vvy == JL_VARARG_NONE && !check_vararg_length(xva, ly+1-lx, e)) {
             return 0;
         }
     }
     else {
-        size_t nx = env.lx;
-        if (env.vvx == JL_VARARG_INT)
-            nx += jl_vararg_length(jl_tparam(env.xd, env.lx-1)) - 1;
+        size_t nx = lx;
+        if (vvx == JL_VARARG_INT)
+            nx += jl_vararg_length(xva) - 1;
         else if (xbb && jl_is_long(xbb->lb))
             nx += jl_unbox_long(xbb->lb) - 1;
         else
-            assert(env.vvx == JL_VARARG_NONE);
-        size_t ny = env.ly;
-        if (env.vvy == JL_VARARG_INT)
-            ny += jl_vararg_length(jl_tparam(env.yd, env.ly-1)) - 1;
-        else if (env.vvy != JL_VARARG_NONE)
+            assert(vvx == JL_VARARG_NONE);
+        size_t ny = ly;
+        if (vvy == JL_VARARG_INT)
+            ny += jl_vararg_length(yva) - 1;
+        else if (vvy != JL_VARARG_NONE)
             ny -= 1;
-        if (env.vvy == JL_VARARG_NONE || env.vvy == JL_VARARG_INT) {
+        if (vvy == JL_VARARG_NONE || vvy == JL_VARARG_INT) {
             if (nx != ny)
                 return 0;
         }
@@ -1162,29 +1155,7 @@ static int subtype_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, in
     }
 
     param = (param == 0 ? 1 : param);
-    env.lastx = env.lasty = NULL;
-    env.vtx = env.vty = NULL;
-    JL_GC_PUSH2(&env.vtx, &env.vty);
-    int ans = subtype_tuple_tail(&env, 0, e, param);
-    JL_GC_POP();
-    return ans;
-}
-
-static int subtype_naked_vararg(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int param)
-{
-    // Vararg: covariant in first parameter, invariant in second
-    jl_value_t *xp1=jl_tparam0(xd), *xp2=jl_tparam1(xd), *yp1=jl_tparam0(yd), *yp2=jl_tparam1(yd);
-    // in Vararg{T1} <: Vararg{T2}, need to check subtype twice to
-    // simulate the possibility of multiple arguments, which is needed
-    // to implement the diagonal rule correctly.
-    if (!subtype(xp1, yp1, e, param)) return 0;
-    if (!subtype(xp1, yp1, e, 1)) return 0;
-    e->invdepth++;
-    e->Rinvdepth++;
-    // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
-    int ans = forall_exists_equal(xp2, yp2, e);
-    e->invdepth--;
-    e->Rinvdepth--;
+    int ans = subtype_tuple_tail(xd, yd, 0, e, param);
     return ans;
 }
 
@@ -1210,6 +1181,10 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
             // union against the variable before trying to take it apart to see if there are any
             // variables lurking inside.
             jl_unionstate_t *state = &e->Runions;
+            if (state->depth >= state->used) {
+                statestack_set(state, state->used, 0);
+                state->used++;
+            }
             ui = statestack_get(state, state->depth);
             state->depth++;
             if (ui == 0)
@@ -1321,13 +1296,6 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
         if (xd == jl_any_type) return 0;
         if (xd->name == jl_tuple_typename)
             return subtype_tuple(xd, yd, e, param);
-        if (xd->name == jl_vararg_typename) {
-            // N.B.: This case is only used for raw varargs that are not part
-            // of a tuple (those that are have special handling in subtype_tuple).
-            // Vararg isn't really a proper type, but it does sometimes show up
-            // as e.g. Type{Vararg}, so we'd like to handle that correctly.
-            return subtype_naked_vararg(xd, yd, e, param);
-        }
         size_t i, np = jl_nparams(xd);
         int ans = 1;
         e->invdepth++;
@@ -1344,7 +1312,7 @@ static int subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int param)
     }
     if (jl_is_type(y))
         return x == jl_bottom_type;
-    return x == y || jl_egal(x, y);
+    return jl_egal(x, y);
 }
 
 static int is_indefinite_length_tuple_type(jl_value_t *x)
@@ -1378,13 +1346,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         (is_definite_length_tuple_type(x) && is_indefinite_length_tuple_type(y)))
         return 0;
 
-    jl_unionstate_t oldLunions = e->Lunions;
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    jl_saved_unionstate_t oldLunions; push_unionstate(&oldLunions, &e->Lunions);
+    e->Lunions.used = 0;
     int sub;
 
     if (!jl_has_free_typevars(x) || !jl_has_free_typevars(y)) {
-        jl_unionstate_t oldRunions = e->Runions;
-        memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
+        e->Runions.used = 0;
         e->Runions.depth = 0;
         e->Runions.more = 0;
         e->Lunions.depth = 0;
@@ -1392,7 +1360,7 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 
         sub = forall_exists_subtype(x, y, e, 2);
 
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
     }
     else {
         int lastset = 0;
@@ -1410,13 +1378,13 @@ static int forall_exists_equal(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
     }
 
-    e->Lunions = oldLunions;
+    pop_unionstate(&e->Lunions, &oldLunions);
     return sub && subtype(y, x, e, 0);
 }
 
 static int exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, jl_value_t *saved, jl_savedenv_t *se, int param)
 {
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     int lastset = 0;
     while (1) {
         e->Runions.depth = 0;
@@ -1447,7 +1415,7 @@ static int forall_exists_subtype(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, in
     JL_GC_PUSH1(&saved);
     save_env(e, &saved, &se);
 
-    memset(e->Lunions.stack, 0, sizeof(e->Lunions.stack));
+    e->Lunions.used = 0;
     int lastset = 0;
     int sub;
     while (1) {
@@ -1481,8 +1449,10 @@ static void init_stenv(jl_stenv_t *e, jl_value_t **env, int envsz)
     e->ignore_free = 0;
     e->intersection = 0;
     e->emptiness_only = 0;
+    e->triangular = 0;
     e->Lunions.depth = 0;      e->Runions.depth = 0;
     e->Lunions.more = 0;       e->Runions.more = 0;
+    e->Lunions.used = 0;       e->Runions.used = 0;
 }
 
 // subtyping entry points
@@ -1510,6 +1480,8 @@ static int concrete_min(jl_value_t *t)
             return 0; // Type{T} may have the concrete supertype `typeof(T)`, so don't try to handle them here
         return jl_is_concrete_type(t) ? 1 : 2;
     }
+    if (jl_is_vararg(t))
+        return 0;
     if (jl_is_typevar(t))
         return 0; // could be 0 or more, since we didn't track if it was unbound
     if (jl_is_uniontype(t)) {
@@ -1518,7 +1490,8 @@ static int concrete_min(jl_value_t *t)
             return count;
         return count + concrete_min(((jl_uniontype_t*)t)->b);
     }
-    return 2; // up to infinite
+    assert(!jl_is_kind(t));
+    return 1; // a non-Type is also considered concrete
 }
 
 static jl_value_t *find_var_body(jl_value_t *t, jl_tvar_t *v)
@@ -1537,6 +1510,16 @@ static jl_value_t *find_var_body(jl_value_t *t, jl_tvar_t *v)
         if (b) return b;
         return find_var_body(((jl_uniontype_t*)t)->b, v);
     }
+    else if (jl_is_vararg(t)) {
+        jl_vararg_t *vm = (jl_vararg_t *)t;
+        if (vm->T) {
+            jl_value_t *b = find_var_body(vm->T, v);
+            if (b) return b;
+            if (vm->N) {
+                return find_var_body(vm->N, v);
+            }
+        }
+    }
     else if (jl_is_datatype(t)) {
         size_t i;
         for (i=0; i < jl_nparams(t); i++) {
@@ -1593,6 +1576,13 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
         *subtype = 0;
         return 1;
     }
+    if (jl_is_vararg(x)) {
+        if (!jl_is_vararg(y)) {
+            *subtype = 0;
+            return 1;
+        }
+        return 0;
+    }
     if (!jl_is_type(x) || !jl_is_type(y)) {
         *subtype = jl_egal(x, y);
         return 1;
@@ -1634,7 +1624,7 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
     }
     if (jl_is_datatype(y)) {
         int istuple = (((jl_datatype_t*)y)->name == jl_tuple_typename);
-        int iscov = istuple || (((jl_datatype_t*)y)->name == jl_vararg_typename);
+        int iscov = istuple;
         // TODO: this would be a nice fast-path to have, unfortuanately,
         //       datatype allocation fails to correctly hash-cons them
         //       and the subtyping tests include tests for this case
@@ -1787,17 +1777,19 @@ static int obvious_subtype(jl_value_t *x, jl_value_t *y, jl_value_t *y0, int *su
                     *subtype = 0;
                     return 1;
                 }
-                if (jl_is_type_type(a1) && jl_is_type(jl_tparam0(a1))) {
-                    a1 = jl_typeof(jl_tparam0(a1));
+                jl_value_t *a1u = jl_unwrap_unionall(a1);
+                if (jl_is_type_type(a1u) && jl_is_type(jl_tparam0(a1u))) {
+                    a1 = jl_typeof(jl_tparam0(a1u));
                 }
                 for (; i < nparams_expanded_x; i++) {
                     jl_value_t *a = (vx != JL_VARARG_NONE && i >= npx - 1) ? vxt : jl_tparam(x, i);
                     if (i > npy && jl_is_typevar(b)) { // i == npy implies a == a1
                         // diagonal rule: all the later parameters are also constrained to be type-equal to the first
                         jl_value_t *a2 = a;
-                        if (jl_is_type_type(a) && jl_is_type(jl_tparam0(a))) {
+                        jl_value_t *au = jl_unwrap_unionall(a);
+                        if (jl_is_type_type(au) && jl_is_type(jl_tparam0(au))) {
                             // if a is exactly Type{T}, then use the concrete typeof(T) instead here
-                            a2 = jl_typeof(jl_tparam0(a));
+                            a2 = jl_typeof(jl_tparam0(au));
                         }
                         if (!obviously_egal(a1, a2)) {
                             if (obvious_subtype(a2, a1, y0, subtype)) {
@@ -1857,7 +1849,7 @@ JL_DLLEXPORT int jl_subtype_env(jl_value_t *x, jl_value_t *y, jl_value_t **env,
     if (x == y ||
         (jl_typeof(x) == jl_typeof(y) &&
          (jl_is_unionall(y) || jl_is_uniontype(y)) &&
-         jl_egal(x, y))) {
+         jl_types_egal(x, y))) {
         if (envsz != 0) { // quickly copy env from x
             jl_unionall_t *ua = (jl_unionall_t*)x;
             int i;
@@ -1923,7 +1915,9 @@ JL_DLLEXPORT int jl_subtype(jl_value_t *x, jl_value_t *y)
 
 JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
 {
-    if (obviously_egal(a, b))
+    if (a == b)
+        return 1;
+    if (jl_typeof(a) == jl_typeof(b) && jl_types_egal(a, b))
         return 1;
     if (obviously_unequal(a, b))
         return 0;
@@ -1942,11 +1936,6 @@ JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
     if (b == (jl_value_t*)jl_any_type || a == jl_bottom_type) {
         subtype_ab = 1;
     }
-    else if (jl_typeof(a) == jl_typeof(b) &&
-        (jl_is_unionall(b) || jl_is_uniontype(b)) &&
-        jl_egal(a, b)) {
-        subtype_ab = 1;
-    }
     else if (jl_obvious_subtype(a, b, &subtype_ab)) {
 #ifdef NDEBUG
         if (subtype_ab == 0)
@@ -1961,11 +1950,6 @@ JL_DLLEXPORT int jl_types_equal(jl_value_t *a, jl_value_t *b)
     if (a == (jl_value_t*)jl_any_type || b == jl_bottom_type) {
         subtype_ba = 1;
     }
-    else if (jl_typeof(b) == jl_typeof(a) &&
-        (jl_is_unionall(a) || jl_is_uniontype(a)) &&
-        jl_egal(b, a)) {
-        subtype_ba = 1;
-    }
     else if (jl_obvious_subtype(b, a, &subtype_ba)) {
 #ifdef NDEBUG
         if (subtype_ba == 0)
@@ -2138,14 +2122,14 @@ static jl_value_t *intersect_aside(jl_value_t *x, jl_value_t *y, jl_stenv_t *e,
     if (y == (jl_value_t*)jl_any_type && !jl_is_typevar(x))
         return x;
 
-    jl_unionstate_t oldRunions = e->Runions;
+    jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
     int savedepth = e->invdepth, Rsavedepth = e->Rinvdepth;
     // TODO: this doesn't quite make sense
     e->invdepth = e->Rinvdepth = d;
 
     jl_value_t *res = intersect_all(x, y, e);
 
-    e->Runions = oldRunions;
+    pop_unionstate(&e->Runions, &oldRunions);
     e->invdepth = savedepth;
     e->Rinvdepth = Rsavedepth;
     return res;
@@ -2156,10 +2140,10 @@ static jl_value_t *intersect_union(jl_value_t *x, jl_uniontype_t *u, jl_stenv_t
     if (param == 2 || (!jl_has_free_typevars(x) && !jl_has_free_typevars((jl_value_t*)u))) {
         jl_value_t *a=NULL, *b=NULL;
         JL_GC_PUSH2(&a, &b);
-        jl_unionstate_t oldRunions = e->Runions;
+        jl_saved_unionstate_t oldRunions; push_unionstate(&oldRunions, &e->Runions);
         a = R ? intersect_all(x, u->a, e) : intersect_all(u->a, x, e);
         b = R ? intersect_all(x, u->b, e) : intersect_all(u->b, x, e);
-        e->Runions = oldRunions;
+        pop_unionstate(&e->Runions, &oldRunions);
         jl_value_t *i = simple_join(a,b);
         JL_GC_POP();
         return i;
@@ -2191,6 +2175,22 @@ static jl_value_t *set_var_to_const(jl_varbinding_t *bb, jl_value_t *v JL_MAYBE_
     return v;
 }
 
+static jl_value_t *bound_var_below(jl_tvar_t *tv, jl_varbinding_t *bb, jl_stenv_t *e) {
+    if (!bb)
+        return (jl_value_t*)tv;
+    if (bb->depth0 != e->invdepth)
+        return jl_bottom_type;
+    record_var_occurrence(bb, e, 2);
+    if (jl_is_long(bb->lb)) {
+        if (bb->offset == 0)
+            return bb->lb;
+        if (jl_unbox_long(bb->lb) < bb->offset)
+            return jl_bottom_type;
+        return jl_box_long(jl_unbox_long(bb->lb) - bb->offset);
+    }
+    return (jl_value_t*)tv;
+}
+
 static int try_subtype_in_env(jl_value_t *a, jl_value_t *b, jl_stenv_t *e, int R, int d)
 {
     jl_value_t *root=NULL; jl_savedenv_t se;
@@ -2209,7 +2209,7 @@ static void set_bound(jl_value_t **bound, jl_value_t *val, jl_tvar_t *v, jl_sten
         return;
     jl_varbinding_t *btemp = e->vars;
     while (btemp != NULL) {
-        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+        if ((btemp->lb == (jl_value_t*)v || btemp->ub == (jl_value_t*)v) &&
             in_union(val, (jl_value_t*)btemp->var))
             return;
         btemp = btemp->prev;
@@ -2248,13 +2248,44 @@ static int subtype_in_env_existential(jl_value_t *x, jl_value_t *y, jl_stenv_t *
     return issub;
 }
 
+// See if var y is reachable from x via bounds; used to avoid cycles.
+static int reachable_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e)
+{
+    if (in_union(x, (jl_value_t*)y))
+        return 1;
+    if (!jl_is_typevar(x))
+        return 0;
+    jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
+    if (xv == NULL)
+        return 0;
+    return reachable_var(xv->ub, y, e) || reachable_var(xv->lb, y, e);
+}
+
+// check whether setting v == t implies v == SomeType{v}, which is unsatisfiable.
+static int check_unsat_bound(jl_value_t *t, jl_tvar_t *v, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    if (var_occurs_inside(t, v, 0, 0))
+        return 1;
+    jl_varbinding_t *btemp = e->vars;
+    while (btemp != NULL) {
+        if (btemp->lb == (jl_value_t*)v && btemp->ub == (jl_value_t*)v &&
+            var_occurs_inside(t, btemp->var, 0, 0))
+            return 1;
+        btemp = btemp->prev;
+    }
+    return 0;
+}
+
 static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int8_t R, int param)
 {
     jl_varbinding_t *bb = lookup(e, b);
     if (bb == NULL)
         return R ? intersect_aside(a, b->ub, e, 1, 0) : intersect_aside(b->ub, a, e, 0, 0);
-    if (bb->lb == bb->ub && jl_is_typevar(bb->lb) && bb->lb != (jl_value_t*)b)
+    if (reachable_var(bb->lb, b, e) || reachable_var(bb->ub, b, e))
+        return a;
+    if (bb->lb == bb->ub && jl_is_typevar(bb->lb)) {
         return intersect(a, bb->lb, e, param);
+    }
     if (!jl_is_type(a) && !jl_is_typevar(a))
         return set_var_to_const(bb, a, NULL);
     int d = bb->depth0;
@@ -2274,25 +2305,24 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
             ub = a;
         }
         else {
+            e->triangular++;
             ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-            // TODO: we should probably check `bb->lb <: ub` here; find a test case for that
+            e->triangular--;
+            save_env(e, &root, &se);
+            int issub = subtype_in_env_existential(bb->lb, ub, e, 0, d);
+            restore_env(e, root, &se);
+            free_env(&se);
+            if (!issub) {
+                JL_GC_POP();
+                return jl_bottom_type;
+            }
         }
         if (ub != (jl_value_t*)b) {
             if (jl_has_free_typevars(ub)) {
-                // constraint X == Ref{X} is unsatisfiable. also check variables set equal to X.
-                if (var_occurs_inside(ub, b, 0, 0)) {
+                if (check_unsat_bound(ub, b, e)) {
                     JL_GC_POP();
                     return jl_bottom_type;
                 }
-                jl_varbinding_t *btemp = e->vars;
-                while (btemp != NULL) {
-                    if (btemp->lb == (jl_value_t*)b && btemp->ub == (jl_value_t*)b &&
-                        var_occurs_inside(ub, btemp->var, 0, 0)) {
-                        JL_GC_POP();
-                        return jl_bottom_type;
-                    }
-                    btemp = btemp->prev;
-                }
             }
             bb->ub = ub;
             bb->lb = ub;
@@ -2300,67 +2330,33 @@ static jl_value_t *intersect_var(jl_tvar_t *b, jl_value_t *a, jl_stenv_t *e, int
         JL_GC_POP();
         return ub;
     }
-    else if (bb->constraintkind == 0) {
-        if (!jl_is_typevar(bb->ub) && !jl_is_typevar(a)) {
-            if (try_subtype_in_env(bb->ub, a, e, 0, d))
-                return (jl_value_t*)b;
-        }
-        return R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-    }
-    else if (bb->concrete || bb->constraintkind == 1) {
-        jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
-        if (ub == jl_bottom_type)
-            return jl_bottom_type;
-        JL_GC_PUSH1(&ub);
-        if (!R && !subtype_bounds_in_env(bb->lb, a, e, 0, d)) {
-            // this fixes issue #30122. TODO: better fix for R flag.
-            JL_GC_POP();
-            return jl_bottom_type;
-        }
-        JL_GC_POP();
-        set_bound(&bb->ub, ub, b, e);
-        return (jl_value_t*)b;
-    }
-    else if (bb->constraintkind == 2) {
-        // TODO: removing this case fixes many test_brokens in test/subtype.jl
-        // but breaks other tests.
-        if (!subtype_bounds_in_env(a, bb->ub, e, 1, d)) {
-            // mark var as unsatisfiable by making it circular
-            bb->lb = (jl_value_t*)b;
-            return jl_bottom_type;
-        }
-        jl_value_t *lb = simple_join(bb->lb, a);
-        set_bound(&bb->lb, lb, b, e);
-        return a;
-    }
-    assert(bb->constraintkind == 3);
     jl_value_t *ub = R ? intersect_aside(a, bb->ub, e, 1, d) : intersect_aside(bb->ub, a, e, 0, d);
     if (ub == jl_bottom_type)
         return jl_bottom_type;
-    if (jl_is_typevar(a))
+    if (bb->constraintkind == 1 || e->triangular) {
+        if (e->triangular && check_unsat_bound(ub, b, e))
+            return jl_bottom_type;
+        set_bound(&bb->ub, ub, b, e);
         return (jl_value_t*)b;
-    if (ub == a) {
-        if (bb->lb == jl_bottom_type) {
-            set_bound(&bb->ub, a, b, e);
+    }
+    else if (bb->constraintkind == 0) {
+        JL_GC_PUSH1(&ub);
+        if (!jl_is_typevar(a) && try_subtype_in_env(bb->ub, a, e, 0, d)) {
+            JL_GC_POP();
             return (jl_value_t*)b;
         }
+        JL_GC_POP();
         return ub;
     }
-    else if (bb->ub == bb->lb) {
-        return ub;
-    }
-    root = NULL;
-    JL_GC_PUSH2(&root, &ub);
-    save_env(e, &root, &se);
-    jl_value_t *ii = R ? intersect_aside(a, bb->lb, e, 1, d) : intersect_aside(bb->lb, a, e, 0, d);
-    if (ii == jl_bottom_type) {
-        restore_env(e, root, &se);
-        ii = (jl_value_t*)b;
+    assert(bb->constraintkind == 2);
+    if (!jl_is_typevar(a)) {
+        if (ub == a && bb->lb != jl_bottom_type)
+            return ub;
+        else if (jl_egal(bb->ub, bb->lb))
+            return ub;
         set_bound(&bb->ub, ub, b, e);
     }
-    free_env(&se);
-    JL_GC_POP();
-    return ii;
+    return (jl_value_t*)b;
 }
 
 // test whether `var` occurs inside constructors. `want_inv` tests only inside
@@ -2383,13 +2379,19 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
             return 1;
         return var_occurs_inside(ua->body, var, inside, want_inv);
     }
+    else if (jl_is_vararg(v)) {
+        jl_vararg_t *vm = (jl_vararg_t*)v;
+        if (vm->T) {
+            if (var_occurs_inside(vm->T, var, inside || !want_inv, want_inv))
+                return 1;
+            return vm->N && var_occurs_inside(vm->N, var, 1, want_inv);
+        }
+    }
     else if (jl_is_datatype(v)) {
         size_t i;
         int istuple = jl_is_tuple_type(v);
-        int isva = jl_is_vararg_type(v);
         for (i=0; i < jl_nparams(v); i++) {
-            int invar = isva ? i == 1 : !istuple;
-            int ins_i = inside || !want_inv || invar;
+            int ins_i = inside || !want_inv || !istuple;
             if (var_occurs_inside(jl_tparam(v,i), var, ins_i, want_inv))
                 return 1;
         }
@@ -2398,7 +2400,7 @@ static int var_occurs_inside(jl_value_t *v, jl_tvar_t *var, int inside, int want
 }
 
 // Caller might not have rooted `res`
-static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_stenv_t *e)
+static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbinding_t *vb, jl_unionall_t *u, jl_stenv_t *e)
 {
     jl_value_t *varval = NULL;
     jl_tvar_t *newvar = vb->var;
@@ -2411,7 +2413,10 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         // given x<:T<:x, substitute x for T
         varval = vb->ub;
     }
-    else if (!vb->occurs_inv && is_leaf_bound(vb->ub)) {
+    // TODO: `vb.occurs_cov == 1` here allows substituting Tuple{<:X} => Tuple{X},
+    // which is valid but changes some ambiguity errors so we don't need to do it yet.
+    else if ((/*vb->occurs_cov == 1 || */is_leaf_bound(vb->ub)) &&
+             !var_occurs_invariant(u->body, u->var, 0)) {
         // replace T<:x with x in covariant position when possible
         varval = vb->ub;
     }
@@ -2429,6 +2434,7 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         }
     }
 
+    // TODO: this can prevent us from matching typevar identities later
     if (!varval && (vb->lb != vb->var->lb || vb->ub != vb->var->ub))
         newvar = jl_new_typevar(vb->var->name, vb->lb, vb->ub);
 
@@ -2513,7 +2519,7 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
                 res = jl_substitute_var(res, vb->var, (jl_value_t*)newvar);
             varval = (jl_value_t*)newvar;
             if (wrap)
-                res = jl_new_struct(jl_unionall_type, (jl_tvar_t*)newvar, res);
+                res = jl_type_unionall((jl_tvar_t*)newvar, res);
         }
     }
 
@@ -2522,7 +2528,7 @@ static jl_value_t *finish_unionall(jl_value_t *res JL_MAYBE_UNROOTED, jl_varbind
         for(i=0; i < jl_array_len(vb->innervars); i++) {
             jl_tvar_t *var = (jl_tvar_t*)jl_array_ptr_ref(vb->innervars, i);
             if (jl_has_typevar(res, var))
-                res = jl_new_struct(jl_unionall_type, (jl_tvar_t*)var, res);
+                res = jl_type_unionall((jl_tvar_t*)var, res);
         }
     }
 
@@ -2544,7 +2550,13 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     // if the var for this unionall (based on identity) already appears somewhere
     // in the environment, rename to get a fresh var.
     // TODO: might need to look inside types in btemp->lb and btemp->ub
+    int envsize = 0;
     while (btemp != NULL) {
+        envsize++;
+        if (envsize > 120) {
+            vb->limited = 1;
+            return t;
+        }
         if (btemp->var == u->var || btemp->lb == (jl_value_t*)u->var ||
             btemp->ub == (jl_value_t*)u->var) {
             u = rename_unionall(u);
@@ -2594,46 +2606,37 @@ static jl_value_t *intersect_unionall_(jl_value_t *t, jl_unionall_t *u, jl_stenv
     }
     if (res != jl_bottom_type)
         // res is rooted by callee
-        res = finish_unionall(res, vb, e);
+        res = finish_unionall(res, vb, u, e);
     JL_GC_POP();
     return res;
 }
 
 static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_t *e, int8_t R, int param)
 {
-    jl_value_t *res=NULL, *res2=NULL, *save=NULL, *save2=NULL;
-    jl_savedenv_t se, se2;
-    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0,
-                           R ? e->Rinvdepth : e->invdepth, 0, NULL, 0, e->vars };
-    JL_GC_PUSH6(&res, &save2, &vb.lb, &vb.ub, &save, &vb.innervars);
+    jl_value_t *res=NULL, *save=NULL;
+    jl_savedenv_t se;
+    jl_varbinding_t vb = { u->var, u->var->lb, u->var->ub, R, 0, 0, 0, 0, 0, 0,
+                           R ? e->Rinvdepth : e->invdepth, 0, NULL, e->vars };
+    JL_GC_PUSH5(&res, &vb.lb, &vb.ub, &save, &vb.innervars);
     save_env(e, &save, &se);
     res = intersect_unionall_(t, u, e, R, param, &vb);
-    if (res != jl_bottom_type) {
+    if (vb.limited) {
+        // if the environment got too big, avoid tree recursion and propagate the flag
+        if (e->vars)
+            e->vars->limited = 1;
+    }
+    else if (res != jl_bottom_type) {
         if (vb.concrete || vb.occurs_inv>1 || u->var->lb != jl_bottom_type || (vb.occurs_inv && vb.occurs_cov)) {
             restore_env(e, NULL, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.constraintkind = 3;
+            vb.constraintkind = vb.concrete ? 1 : 2;
             res = intersect_unionall_(t, u, e, R, param, &vb);
         }
-        else if (vb.occurs_cov) {
-            save_env(e, &save2, &se2);
+        else if (vb.occurs_cov && !var_occurs_invariant(u->body, u->var, 0)) {
             restore_env(e, save, &se);
             vb.occurs_cov = vb.occurs_inv = 0;
-            vb.lb = u->var->lb; vb.ub = u->var->ub;
             vb.constraintkind = 1;
-            res2 = intersect_unionall_(t, u, e, R, param, &vb);
-            if (res2 == jl_bottom_type) {
-                restore_env(e, save, &se);
-                vb.occurs_cov = vb.occurs_inv = 0;
-                vb.lb = u->var->lb; vb.ub = u->var->ub;
-                vb.constraintkind = 2;
-                res2 = intersect_unionall_(t, u, e, R, param, &vb);
-                if (res2 == jl_bottom_type)
-                    restore_env(e, save2, &se2);
-            }
-            if (res2 != jl_bottom_type)
-                res = res2;
-            free_env(&se2);
+            res = intersect_unionall_(t, u, e, R, param, &vb);
         }
     }
     free_env(&se);
@@ -2644,11 +2647,9 @@ static jl_value_t *intersect_unionall(jl_value_t *t, jl_unionall_t *u, jl_stenv_
 // check n = (length of vararg type v)
 static int intersect_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e, int8_t R)
 {
-    jl_tvar_t *va_p1=NULL;
-    jl_datatype_t *tail = unwrap_1_unionall(v, &va_p1);
-    jl_value_t *N = jl_tparam1(tail);
+    jl_value_t *N = jl_unwrap_vararg_num(v);
     // only do the check if N is free in the tuple type's last parameter
-    if (jl_is_typevar(N) && N != (jl_value_t*)va_p1) {
+    if (N && jl_is_typevar(N)) {
         jl_value_t *len = jl_box_long(n);
         JL_GC_PUSH1(&len);
         jl_value_t *il = R ? intersect(len, N, e, 2) : intersect(N, len, e, 2);
@@ -2659,13 +2660,61 @@ static int intersect_vararg_length(jl_value_t *v, ssize_t n, jl_stenv_t *e, int8
     return 1;
 }
 
+static jl_value_t *intersect_invariant(jl_value_t *x, jl_value_t *y, jl_stenv_t *e);
+static jl_value_t *intersect_varargs(jl_vararg_t *vmx, jl_vararg_t *vmy, jl_stenv_t *e, int param)
+{
+    // Vararg: covariant in first parameter, invariant in second
+    jl_value_t *xp1=jl_unwrap_vararg(vmx), *xp2=jl_unwrap_vararg_num(vmx),
+                *yp1=jl_unwrap_vararg(vmy), *yp2=jl_unwrap_vararg_num(vmy);
+    // in Vararg{T1} <: Vararg{T2}, need to check subtype twice to
+    // simulate the possibility of multiple arguments, which is needed
+    // to implement the diagonal rule correctly.
+    if (intersect(xp1, yp1, e, param==0 ? 1 : param) == jl_bottom_type)
+        return jl_bottom_type;
+    jl_value_t *i2=NULL, *ii = intersect(xp1, yp1, e, 1);
+    if (ii == jl_bottom_type) return jl_bottom_type;
+    JL_GC_PUSH2(&ii, &i2);
+    if (!xp2 && !yp2) {
+        ii = (jl_value_t*)jl_wrap_vararg(ii, NULL);
+        JL_GC_POP();
+        return ii;
+    }
+    if (xp2 && jl_is_typevar(xp2)) {
+        jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)xp2);
+        if (xb) xb->intvalued = 1;
+        if (!yp2) {
+            i2 = bound_var_below((jl_tvar_t*)xp2, xb, e);
+        }
+    }
+    if (yp2 && jl_is_typevar(yp2)) {
+        jl_varbinding_t *yb = lookup(e, (jl_tvar_t*)yp2);
+        if (yb) yb->intvalued = 1;
+        if (!xp2) {
+            i2 = bound_var_below((jl_tvar_t*)yp2, yb, e);
+        }
+    }
+    if (xp2 && yp2) {
+        // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
+        i2 = intersect_invariant(xp2, yp2, e);
+        if (i2 == NULL || i2 == jl_bottom_type || (jl_is_long(i2) && jl_unbox_long(i2) < 0) ||
+            !((jl_is_typevar(i2) && ((jl_tvar_t*)i2)->lb == jl_bottom_type &&
+                ((jl_tvar_t*)i2)->ub == (jl_value_t*)jl_any_type) || jl_is_long(i2))) {
+            i2 = jl_bottom_type;
+        }
+    }
+    ii = i2 == jl_bottom_type ? (jl_value_t*)jl_bottom_type : (jl_value_t*)jl_wrap_vararg(ii, i2);
+    JL_GC_POP();
+    return ii;
+}
+
+
 static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_stenv_t *e, int param)
 {
     size_t lx = jl_nparams(xd), ly = jl_nparams(yd);
     if (lx == 0 && ly == 0)
         return (jl_value_t*)yd;
-    int vx=0, vy=0, vvx = (lx > 0 && jl_is_vararg_type(jl_tparam(xd, lx-1)));
-    int vvy = (ly > 0 && jl_is_vararg_type(jl_tparam(yd, ly-1)));
+    int vx=0, vy=0, vvx = (lx > 0 && jl_is_vararg(jl_tparam(xd, lx-1)));
+    int vvy = (ly > 0 && jl_is_vararg(jl_tparam(yd, ly-1)));
     if (!vvx && !vvy && lx != ly)
         return jl_bottom_type;
     jl_svec_t *params = jl_alloc_svec(lx > ly ? lx : ly);
@@ -2674,14 +2723,15 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
     size_t i=0, j=0;
     jl_value_t *xi, *yi;
     while (1) {
+        vx = vy = 0;
         xi = i < lx ? jl_tparam(xd, i) : NULL;
         yi = j < ly ? jl_tparam(yd, j) : NULL;
         if (xi == NULL && yi == NULL) {
             assert(i == j && i == jl_svec_len(params));
             break;
         }
-        if (xi && jl_is_vararg_type(xi)) vx = 1;
-        if (yi && jl_is_vararg_type(yi)) vy = 1;
+        if (xi && jl_is_vararg(xi)) vx = 1;
+        if (yi && jl_is_vararg(yi)) vy = 1;
         if (xi == NULL || yi == NULL) {
             res = jl_bottom_type;
             if (vx && intersect_vararg_length(xi, ly+1-lx, e, 0))
@@ -2690,29 +2740,34 @@ static jl_value_t *intersect_tuple(jl_datatype_t *xd, jl_datatype_t *yd, jl_sten
                 res = (jl_value_t*)jl_apply_tuple_type_v(jl_svec_data(params), i);
             break;
         }
-        if (vx && !vy)
-            xi = jl_unwrap_vararg(xi);
-        if (vy && !vx)
-            yi = jl_unwrap_vararg(yi);
         jl_varbinding_t *xb=NULL, *yb=NULL;
+        jl_value_t *ii = NULL;
         if (vx && vy) {
             // {A^n...,Vararg{T,N}} ∩ {Vararg{S,M}} = {(A∩S)^n...,Vararg{T∩S,N}} plus N = M-n
-            jl_value_t *xlen = jl_tparam1(jl_unwrap_unionall(xi));
-            if (jl_is_typevar(xlen)) {
+            jl_value_t *xlen = jl_unwrap_vararg_num(xi);
+            if (xlen && jl_is_typevar(xlen)) {
                 xb = lookup(e, (jl_tvar_t*)xlen);
                 if (xb)
                     xb->offset = ly-lx;
             }
-            jl_value_t *ylen = jl_tparam1(jl_unwrap_unionall(yi));
-            if (jl_is_typevar(ylen)) {
+            jl_value_t *ylen = jl_unwrap_vararg_num(yi);
+            if (ylen && jl_is_typevar(ylen)) {
                 yb = lookup(e, (jl_tvar_t*)ylen);
                 if (yb)
                     yb->offset = lx-ly;
             }
+            ii = intersect_varargs((jl_vararg_t*)xi,
+                                   (jl_vararg_t*)yi,
+                                   e, param);
+            if (xb) xb->offset = 0;
+            if (yb) yb->offset = 0;
+        } else {
+            if (vx)
+                xi = jl_unwrap_vararg(xi);
+            if (vy)
+                yi = jl_unwrap_vararg(yi);
+            ii = intersect(xi, yi, e, param == 0 ? 1 : param);
         }
-        jl_value_t *ii = intersect(xi, yi, e, param == 0 ? 1 : param);
-        if (xb) xb->offset = 0;
-        if (yb) yb->offset = 0;
         if (ii == jl_bottom_type) {
             if (vx && vy) {
                 int len = i > j ? i : j;
@@ -2865,6 +2920,35 @@ static jl_value_t *intersect_type_type(jl_value_t *x, jl_value_t *y, jl_stenv_t
     */
 }
 
+// cmp <= 0: is x already <= y in this environment
+// cmp >= 0: is x already >= y in this environment
+static int compareto_var(jl_value_t *x, jl_tvar_t *y, jl_stenv_t *e, int cmp) JL_NOTSAFEPOINT
+{
+    if (x == (jl_value_t*)y)
+        return 1;
+    if (!jl_is_typevar(x))
+        return 0;
+    jl_varbinding_t *xv = lookup(e, (jl_tvar_t*)x);
+    if (xv == NULL)
+        return 0;
+    int ans = 1;
+    if (cmp <= 0)
+        ans &= compareto_var(xv->ub, y, e, cmp);
+    if (cmp >= 0)
+        ans &= compareto_var(xv->lb, y, e, cmp);
+    return ans;
+}
+
+// Check whether the environment already asserts x <: y via recorded bounds.
+// This is used to avoid adding redundant constraints that lead to cycles.
+// Note this is a semi-predicate: 1 => is a subtype, 0 => unknown
+static int subtype_by_bounds(jl_value_t *x, jl_value_t *y, jl_stenv_t *e) JL_NOTSAFEPOINT
+{
+    if (!jl_is_typevar(x) || !jl_is_typevar(y))
+        return 0;
+    return compareto_var(x, (jl_tvar_t*)y, e, -1) || compareto_var(y, (jl_tvar_t*)x, e, 1);
+}
+
 // `param` means we are currently looking at a parameter of a type constructor
 // (as opposed to being outside any type constructor, or comparing variable bounds).
 // this is used to record the positions where type variables occur for the
@@ -2918,9 +3002,16 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                         return xlb;
                     return jl_bottom_type;
                 }
-                if (R) flip_vars(e);
-                int ccheck = subtype_in_env(xlb, yub, e) && subtype_in_env(ylb, xub, e);
-                if (R) flip_vars(e);
+                int ccheck;
+                if (yub == xub ||
+                    (subtype_by_bounds(xlb, yub, e) && subtype_by_bounds(ylb, xub, e))) {
+                    ccheck = 1;
+                }
+                else {
+                    if (R) flip_vars(e);
+                    ccheck = subtype_in_env(xlb, yub, e) && subtype_in_env(ylb, xub, e);
+                    if (R) flip_vars(e);
+                }
                 if (!ccheck)
                     return jl_bottom_type;
                 if (var_occurs_inside(xub, (jl_tvar_t*)y, 0, 0) && var_occurs_inside(yub, (jl_tvar_t*)x, 0, 0)) {
@@ -2931,16 +3022,18 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
                 jl_value_t *ub=NULL, *lb=NULL;
                 JL_GC_PUSH2(&lb, &ub);
                 ub = intersect_aside(xub, yub, e, 0, xx ? xx->depth0 : 0);
-                lb = simple_join(xlb, ylb);
+                if (reachable_var(xlb, (jl_tvar_t*)y, e))
+                    lb = ylb;
+                else
+                    lb = simple_join(xlb, ylb);
                 if (yy) {
-                    if (lb != y)
-                        yy->lb = lb;
-                    if (ub != y)
+                    yy->lb = lb;
+                    if (!reachable_var(ub, (jl_tvar_t*)y, e))
                         yy->ub = ub;
                     assert(yy->ub != y);
                     assert(yy->lb != y);
                 }
-                if (xx) {
+                if (xx && !reachable_var(y, (jl_tvar_t*)x, e)) {
                     xx->lb = y;
                     xx->ub = y;
                     assert(xx->ub != x);
@@ -2962,7 +3055,7 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
         record_var_occurrence(lookup(e, (jl_tvar_t*)y), e, param);
         return intersect_var((jl_tvar_t*)y, x, e, 1, param);
     }
-    if (!jl_has_free_typevars(x) && !jl_has_free_typevars(y) && !jl_is_vararg_type(x) && !jl_is_vararg_type(y)) {
+    if (!jl_has_free_typevars(x) && !jl_has_free_typevars(y)) {
         if (jl_subtype(x, y)) return x;
         if (jl_subtype(y, x)) return y;
     }
@@ -3027,36 +3120,6 @@ static jl_value_t *intersect(jl_value_t *x, jl_value_t *y, jl_stenv_t *e, int pa
         if (xd->name == yd->name) {
             if (jl_is_tuple_type(xd))
                 return intersect_tuple(xd, yd, e, param);
-            if (jl_is_vararg_type(x)) {
-                // Vararg: covariant in first parameter, invariant in second
-                jl_value_t *xp1=jl_tparam0(xd), *xp2=jl_tparam1(xd), *yp1=jl_tparam0(yd), *yp2=jl_tparam1(yd);
-                // in Vararg{T1} <: Vararg{T2}, need to check subtype twice to
-                // simulate the possibility of multiple arguments, which is needed
-                // to implement the diagonal rule correctly.
-                if (intersect(xp1, yp1, e, param==0 ? 1 : param) == jl_bottom_type)
-                    return jl_bottom_type;
-                jl_value_t *i2=NULL, *ii = intersect(xp1, yp1, e, 1);
-                if (ii == jl_bottom_type) return jl_bottom_type;
-                JL_GC_PUSH2(&ii, &i2);
-                if (jl_is_typevar(xp2)) {
-                    jl_varbinding_t *xb = lookup(e, (jl_tvar_t*)xp2);
-                    if (xb) xb->intvalued = 1;
-                }
-                if (jl_is_typevar(yp2)) {
-                    jl_varbinding_t *yb = lookup(e, (jl_tvar_t*)yp2);
-                    if (yb) yb->intvalued = 1;
-                }
-                // Vararg{T,N} <: Vararg{T2,N2}; equate N and N2
-                i2 = intersect_invariant(xp2, yp2, e);
-                if (i2 == NULL || i2 == jl_bottom_type || (jl_is_long(i2) && jl_unbox_long(i2) < 0) ||
-                    !((jl_is_typevar(i2) && ((jl_tvar_t*)i2)->lb == jl_bottom_type &&
-                       ((jl_tvar_t*)i2)->ub == (jl_value_t*)jl_any_type) || jl_is_long(i2)))
-                    ii = jl_bottom_type;
-                else
-                    ii = jl_apply_type2((jl_value_t*)jl_vararg_type, ii, i2);
-                JL_GC_POP();
-                return ii;
-            }
             size_t i, np = jl_nparams(xd);
             jl_value_t **newparams;
             JL_GC_PUSHARGS(newparams, np);
@@ -3100,7 +3163,7 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
 {
     e->Runions.depth = 0;
     e->Runions.more = 0;
-    memset(e->Runions.stack, 0, sizeof(e->Runions.stack));
+    e->Runions.used = 0;
     jl_value_t **is;
     JL_GC_PUSHARGS(is, 3);
     jl_value_t **saved = &is[2];
@@ -3117,11 +3180,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         save_env(e, saved, &se);
     }
     while (e->Runions.more) {
-        if (e->emptiness_only && ii != jl_bottom_type) {
-            free_env(&se);
-            JL_GC_POP();
-            return ii;
-        }
+        if (e->emptiness_only && ii != jl_bottom_type)
+            break;
         e->Runions.depth = 0;
         int set = e->Runions.more - 1;
         e->Runions.more = 0;
@@ -3150,9 +3210,8 @@ static jl_value_t *intersect_all(jl_value_t *x, jl_value_t *y, jl_stenv_t *e)
         }
         total_iter++;
         if (niter > 3 || total_iter > 400000) {
-            free_env(&se);
-            JL_GC_POP();
-            return y;
+            ii = y;
+            break;
         }
     }
     free_env(&se);
@@ -3214,11 +3273,25 @@ jl_svec_t *jl_outer_unionall_vars(jl_value_t *u)
 static jl_value_t *switch_union_tuple(jl_value_t *a, jl_value_t *b)
 {
     if (jl_is_unionall(a)) {
-        jl_value_t *ans = switch_union_tuple(((jl_unionall_t*)a)->body, b);
+        jl_unionall_t *ua = (jl_unionall_t*)a;
+        if (jl_is_unionall(b)) {
+            jl_unionall_t *ub = (jl_unionall_t*)b;
+            if (ub->var->lb == ua->var->lb && ub->var->ub == ua->var->ub) {
+                jl_value_t *ub2 = jl_instantiate_unionall(ub, (jl_value_t*)ua->var);
+                jl_value_t *ans = NULL;
+                JL_GC_PUSH2(&ub2, &ans);
+                ans = switch_union_tuple(ua->body, ub2);
+                if (ans != NULL)
+                    ans = jl_type_unionall(ua->var, ans);
+                JL_GC_POP();
+                return ans;
+            }
+        }
+        jl_value_t *ans = switch_union_tuple(ua->body, b);
         if (ans == NULL)
             return NULL;
         JL_GC_PUSH1(&ans);
-        ans = jl_type_unionall(((jl_unionall_t*)a)->var, ans);
+        ans = jl_type_unionall(ua->var, ans);
         JL_GC_POP();
         return ans;
     }
@@ -3280,8 +3353,8 @@ static int might_intersect_concrete(jl_value_t *a)
     if (jl_is_uniontype(a))
         return might_intersect_concrete(((jl_uniontype_t*)a)->a) ||
                might_intersect_concrete(((jl_uniontype_t*)a)->b);
-    if (jl_is_vararg_type(a))
-        return might_intersect_concrete(jl_tparam0(a));
+    if (jl_is_vararg(a))
+        return might_intersect_concrete(jl_unwrap_vararg(a));
     if (jl_is_type_type(a))
         return 1;
     if (jl_is_datatype(a)) {
@@ -3479,11 +3552,11 @@ static jl_value_t *nth_tuple_elt(jl_datatype_t *t JL_PROPAGATES_ROOT, size_t i)
     if (i < len-1)
         return jl_tparam(t, i);
     jl_value_t *last = jl_unwrap_unionall(jl_tparam(t, len-1));
-    if (jl_is_vararg_type(last)) {
-        jl_value_t *n = jl_tparam1(last);
-        if (jl_is_long(n) && i >= len-1+jl_unbox_long(n))
+    if (jl_is_vararg(last)) {
+        jl_value_t *n = jl_unwrap_vararg_num(last);
+        if (n && jl_is_long(n) && i >= len-1+jl_unbox_long(n))
             return NULL;
-        return jl_tparam0(last);
+        return jl_unwrap_vararg(last);
     }
     if (i == len-1)
         return jl_tparam(t, i);
@@ -3545,7 +3618,7 @@ static int tuple_morespecific(jl_datatype_t *cdt, jl_datatype_t *pdt, int invari
               C = Tuple{AbstractArray, Int, Array}
               we need A < B < C and A < C.
             */
-            return some_morespecific && cva && ckind == JL_VARARG_BOUND && num_occurs((jl_tvar_t*)jl_tparam1(jl_unwrap_unionall(clast)), env) > 1;
+            return some_morespecific && cva && ckind == JL_VARARG_BOUND && num_occurs((jl_tvar_t*)jl_unwrap_vararg_num(clast), env) > 1;
         }
 
         // Tuple{..., T} not more specific than Tuple{..., Vararg{S}} if S is diagonal
@@ -3568,8 +3641,8 @@ static size_t tuple_full_length(jl_value_t *t)
     size_t n = jl_nparams(t);
     if (n == 0) return 0;
     jl_value_t *last = jl_unwrap_unionall(jl_tparam(t,n-1));
-    if (jl_is_vararg_type(last)) {
-        jl_value_t *N = jl_tparam1(last);
+    if (jl_is_vararg(last)) {
+        jl_value_t *N = jl_unwrap_vararg_num(last);
         if (jl_is_long(N))
             n += jl_unbox_long(N)-1;
     }
@@ -3586,7 +3659,7 @@ static int args_morespecific_fix1(jl_value_t *a, jl_value_t *b, int swap, jl_typ
         return -1;
     assert(jl_is_va_tuple((jl_datatype_t*)a));
     jl_datatype_t *new_a = NULL;
-    jl_value_t *e[2] = { jl_tparam1(jl_unwrap_unionall(jl_tparam(a, n-1))), jl_box_long(taillen) };
+    jl_value_t *e[2] = { jl_unwrap_vararg_num(jl_unwrap_unionall(jl_tparam(a, n-1))), jl_box_long(taillen) };
     JL_GC_PUSH2(&new_a, &e[1]);
     new_a = (jl_datatype_t*)jl_instantiate_type_with((jl_value_t*)a, e, 1);
     int changed = 0;
@@ -3623,6 +3696,12 @@ static int count_occurs(jl_value_t *t, jl_tvar_t *v)
             return 0;
         return count_occurs(((jl_unionall_t*)t)->body, v);
     }
+    if (jl_is_vararg(t)) {
+        jl_vararg_t *vm = (jl_vararg_t*)t;
+        if (vm->T) {
+            return count_occurs(vm->T, v) + (vm->N ? count_occurs(vm->N, v) : 0);
+        }
+    }
     if (jl_is_datatype(t)) {
         int i, c=0;
         for(i=0; i < jl_nparams(t); i++)
diff --git a/src/support/END.h b/src/support/END.h
index 090bbc02eeb1c7..3a7c3ac00a9ace 100644
--- a/src/support/END.h
+++ b/src/support/END.h
@@ -36,18 +36,13 @@
 #if defined(__linux__) || defined(__FreeBSD__) || defined(__ELF__)
 .size CNAME, . - CNAME
 #else
-#ifdef _MSC_VER
-CNAME endp
-#else
 #ifdef _WIN64
 .seh_endproc
 #endif
 #endif
-#endif
 
 
 #undef CNAME
-#undef HIDENAME
 #undef STR
 #undef XSTR
 #undef _START_ENTRY
diff --git a/src/support/ENTRY.amd64.h b/src/support/ENTRY.amd64.h
index b8049f0711f89e..d4decb98e973ac 100644
--- a/src/support/ENTRY.amd64.h
+++ b/src/support/ENTRY.amd64.h
@@ -41,7 +41,6 @@
 #define EXT_(csym)          csym
 #define EXT(csym)           EXT_(csym)
 #endif
-#define HIDENAME(asmsym)    .asmsym
 .text
 _START_ENTRY
 .globl EXT(CNAME)
@@ -51,9 +50,7 @@ EXT(CNAME):
 #elif defined(_WIN32)
 #define EXT_(csym)          csym
 #define EXT(csym)           EXT_(csym)
-#define HIDENAME(asmsym)    .asmsym
 
-#ifndef _MSC_VER
 .intel_syntax noprefix
 .text
 _START_ENTRY
@@ -69,9 +66,5 @@ _START_ENTRY
 .seh_proc EXT(CNAME)
 EXT(CNAME):
 .seh_endprologue
-#else
-.code
-CNAME proc
-#endif
 
 #endif
diff --git a/src/support/ENTRY.i387.h b/src/support/ENTRY.i387.h
index d80038671247ae..7a857f22f855b2 100644
--- a/src/support/ENTRY.i387.h
+++ b/src/support/ENTRY.i387.h
@@ -41,7 +41,6 @@
 #define EXT_(csym)          csym
 #define EXT(csym)           EXT_(csym)
 #endif
-#define HIDENAME(asmsym)    .asmsym
 .text
 _START_ENTRY
 .globl EXT(CNAME)
@@ -51,9 +50,7 @@ EXT(CNAME):
 #elif defined(_WIN32)
 #define EXT_(csym)          _##csym
 #define EXT(csym)           EXT_(csym)
-#define HIDENAME(asmsym)    .asmsym
 
-#ifndef _MSC_VER
 .intel_syntax
 .text
 _START_ENTRY
@@ -66,11 +63,5 @@ _START_ENTRY
 .type 32
 .endef
 EXT(CNAME):
-#else
-.586
-.model small,C
-.code
-CNAME proc
-#endif
 
 #endif
diff --git a/src/support/Makefile b/src/support/Makefile
index 1ccfdeed3f3da0..a884aa5fd47e00 100644
--- a/src/support/Makefile
+++ b/src/support/Makefile
@@ -9,18 +9,15 @@ JCPPFLAGS += $(CPPFLAGS)
 JLDFLAGS += $(LDFLAGS)
 
 SRCS := hashing timefuncs ptrhash operators utf8 ios htable bitvector \
-	int2str libsupportinit arraylist strtod
+	int2str libsupportinit arraylist strtod rle
 ifeq ($(OS),WINNT)
-SRCS += asprintf strptime win32_ucontext
+SRCS += asprintf strptime
 ifeq ($(ARCH),i686)
 SRCS += _setjmp.win32
 else ifeq ($(ARCH),x86_64)
 SRCS += _setjmp.win64
 endif
 endif
-ifeq ($(USEMSVC), 1)
-SRCS += dirname
-endif
 
 HEADERS := $(wildcard *.h) $(LIBUV_INC)/uv.h
 
@@ -28,10 +25,8 @@ OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
 DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
 
 FLAGS := $(HFILEDIRS:%=-I%) -I$(LIBUV_INC) -I$(UTF8PROC_INC) -DLIBRARY_EXPORTS -DUTF8PROC_EXPORTS
-ifneq ($(USEMSVC), 1)
 FLAGS += -Wall -Wno-strict-aliasing -fvisibility=hidden -Wpointer-arith -Wundef
 JCFLAGS += -Wold-style-definition -Wstrict-prototypes -Wc++-compat
-endif
 
 DEBUGFLAGS += $(FLAGS)
 SHIPFLAGS += $(FLAGS)
@@ -45,19 +40,10 @@ $(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(SHIPFLAGS) $(DISABLE_ASSERTIONS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS) | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(JCFLAGS) $(DEBUGFLAGS) -c $< -o $@)
-ifneq ($(USEMSVC), 1)
 $(BUILDDIR)/%.o: $(SRCDIR)/%.S | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(SHIPFLAGS) -c $< -o $@)
 $(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.S | $(BUILDDIR)
 	@$(call PRINT_CC, $(CC) $(JCPPFLAGS) $(DEBUGFLAGS) -c $< -o $@)
-else
-$(BUILDDIR)/%.o: $(SRCDIR)/%.S | $(BUILDDIR)
-	@$(call PRINT_CC, $(CPP) -P $(JCPPFLAGS) $(SHIPFLAGS) $<)
-	@$(call PRINT_CC, $(AS) $(JCPPFLAGS) $(SHIPFLAGS) -Fo $@ -c $*.i)
-$(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.S | $(BUILDDIR)
-	@$(call PRINT_CC, $(CPP) -P $(JCPPFLAGS) $(DEBUGFLAGS) $<)
-	@$(call PRINT_CC, $(AS) $(JCPPFLAGS) $(DEBUGFLAGS) -Fo $@ -c $*.i)
-endif
 
 $(BUILDDIR)/host/Makefile:
 	mkdir -p $(BUILDDIR)/host
diff --git a/src/support/MurmurHash3.c b/src/support/MurmurHash3.c
index 94069eab027324..fce7351f90ffe2 100644
--- a/src/support/MurmurHash3.c
+++ b/src/support/MurmurHash3.c
@@ -12,23 +12,6 @@
 //-----------------------------------------------------------------------------
 // Platform-specific functions and macros
 
-// Microsoft Visual Studio
-
-#if defined(_MSC_VER)
-
-#define FORCE_INLINE    __forceinline
-
-#include <stdlib.h>
-
-#define ROTL32(x,y)     _rotl(x,y)
-#define ROTL64(x,y)     _rotl64(x,y)
-
-#define BIG_CONSTANT(x) (x)
-
-// Other compilers
-
-#else   // defined(_MSC_VER)
-
 #define FORCE_INLINE inline __attribute__((always_inline))
 
 static inline uint32_t rotl32 ( uint32_t x, int8_t r )
@@ -46,8 +29,6 @@ static inline uint64_t rotl64 ( uint64_t x, int8_t r )
 
 #define BIG_CONSTANT(x) (x##LLU)
 
-#endif // !defined(_MSC_VER)
-
 //-----------------------------------------------------------------------------
 // Finalization mix - force all bits of a hash block to avalanche
 
diff --git a/src/support/_setjmp.win32.S b/src/support/_setjmp.win32.S
index 441872dd4261a6..33ed50ed3deab1 100644
--- a/src/support/_setjmp.win32.S
+++ b/src/support/_setjmp.win32.S
@@ -56,8 +56,10 @@
  * and update fs:[0xEOC] to contain the address of the stack
  */
 
-#define CNAME jl_setjmp
+#define CNAME ijl_setjmp
 #include "ENTRY.i387.h"
+.globl _jl_setjmp
+_jl_setjmp:
     mov    eax,DWORD PTR [esp+4] // arg 1
     mov    edx,DWORD PTR [esp+0] // rta
     mov    DWORD PTR [eax+0],ebp
@@ -73,8 +75,10 @@
 #include "END.h"
 
 
-#define CNAME jl_longjmp
+#define CNAME ijl_longjmp
 #include "ENTRY.i387.h"
+.globl _jl_longjmp
+_jl_longjmp:
     mov    edx,DWORD PTR [esp+4] // arg 1
     mov    eax,DWORD PTR [esp+8] // arg 2
     mov    ebp,DWORD PTR [edx+24] // seh registration
@@ -87,14 +91,16 @@
     mov    ebp,DWORD PTR [edx+0]
     mov    DWORD PTR [esp],ecx
     test   eax,eax
-    jne    a
+    jne    1f
     inc    eax
-a:  ret    // jmp ecx
+1:  ret    // jmp ecx
 #include "END.h"
 
 
-#define CNAME jl_swapcontext
+#define CNAME ijl_swapcontext
 #include "ENTRY.i387.h"
+.globl _jl_swapcontext
+_jl_swapcontext:
     mov    eax,DWORD PTR [esp+4]
     // save stack registers
     mov    edx,DWORD PTR fs:[8] // stack top (low)
@@ -118,8 +124,10 @@ a:  ret    // jmp ecx
 #include "END.h"
 
 
-#define CNAME jl_setcontext
+#define CNAME ijl_setcontext
 #include "ENTRY.i387.h"
+.globl _jl_setcontext
+_jl_setcontext:
     mov    eax,DWORD PTR [esp+4]
     // restore stack registers
     mov    edx,DWORD PTR [eax+0]
diff --git a/src/support/_setjmp.win64.S b/src/support/_setjmp.win64.S
index cb512cfe4ab3e2..f5e5c69c7cff38 100644
--- a/src/support/_setjmp.win64.S
+++ b/src/support/_setjmp.win64.S
@@ -6,8 +6,10 @@
  * and update gs:[0x1478] to contain the address of the stack
  */
 
-#define CNAME jl_setjmp
+#define CNAME ijl_setjmp
 #include "ENTRY.amd64.h"
+.globl jl_setjmp
+jl_setjmp:
     mov    rdx,QWORD PTR [rsp] // rta
     mov    rax,QWORD PTR gs:[0] // SEH
     mov    QWORD PTR [rcx+0],rax
@@ -37,8 +39,10 @@
 #include "END.h"
 
 
-#define CNAME jl_longjmp
+#define CNAME ijl_longjmp
 #include "ENTRY.amd64.h"
+.globl jl_longjmp
+jl_longjmp:
     mov    rax,QWORD PTR [rcx+0]
     mov    rbx,QWORD PTR [rcx+8]
     mov    rsp,QWORD PTR [rcx+16]
@@ -63,15 +67,17 @@
     mov    QWORD PTR gs:[0],rax
     mov    eax,edx // move arg2 to return
     test   eax,eax
-    jne    a
+    jne    1f
     inc    eax
-a:  mov    QWORD PTR [rsp],r8
+1:  mov    QWORD PTR [rsp],r8
     ret
 #include "END.h"
 
 
-#define CNAME jl_swapcontext
+#define CNAME ijl_swapcontext
 #include "ENTRY.amd64.h"
+.globl jl_swapcontext
+jl_swapcontext:
     // save stack registers
     mov    r8,QWORD PTR gs:[16] // stack top (low)
     mov    rax,QWORD PTR gs:[8] // stack bottom (high)
@@ -109,8 +115,10 @@ a:  mov    QWORD PTR [rsp],r8
 #include "END.h"
 
 
-#define CNAME jl_setcontext
+#define CNAME ijl_setcontext
 #include "ENTRY.amd64.h"
+.globl jl_setcontext
+jl_setcontext:
     // restore stack registers
     mov    r8,QWORD PTR [rcx+0]
     mov    rax,QWORD PTR [rcx+8]
diff --git a/src/support/analyzer_annotations.h b/src/support/analyzer_annotations.h
index 1579584a572a91..70b5a273953f14 100644
--- a/src/support/analyzer_annotations.h
+++ b/src/support/analyzer_annotations.h
@@ -8,7 +8,7 @@
 #endif
 #define JL_NONNULL _Nonnull
 
-#ifdef __clang_analyzer__
+#ifdef __clang_gcanalyzer__
 
 #define JL_PROPAGATES_ROOT __attribute__((annotate("julia_propagates_root")))
 #define JL_NOTSAFEPOINT __attribute__((annotate("julia_not_safepoint")))
diff --git a/src/support/arraylist.c b/src/support/arraylist.c
index 343ee59ab6540f..230c4ed3a16f53 100644
--- a/src/support/arraylist.c
+++ b/src/support/arraylist.c
@@ -104,7 +104,7 @@ void small_arraylist_free(small_arraylist_t *a)
     a->items = &a->_space[0];
 }
 
-void small_arraylist_grow(small_arraylist_t *a, uint32_t n)
+JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n)
 {
     size_t len = a->len;
     size_t newlen = len + n;
diff --git a/src/support/arraylist.h b/src/support/arraylist.h
index f996fb397c6e0f..03bfd45f8f525d 100644
--- a/src/support/arraylist.h
+++ b/src/support/arraylist.h
@@ -39,7 +39,7 @@ void small_arraylist_free(small_arraylist_t *a) JL_NOTSAFEPOINT;
 
 void small_arraylist_push(small_arraylist_t *a, void *elt) JL_NOTSAFEPOINT;
 void *small_arraylist_pop(small_arraylist_t *a) JL_NOTSAFEPOINT;
-void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
+JL_DLLEXPORT void small_arraylist_grow(small_arraylist_t *a, uint32_t n) JL_NOTSAFEPOINT;
 
 #ifdef __cplusplus
 }
diff --git a/src/support/dirname.c b/src/support/dirname.c
deleted file mode 100644
index e023b842ce13d3..00000000000000
--- a/src/support/dirname.c
+++ /dev/null
@@ -1,249 +0,0 @@
-/**
- * @file dirname.c
- * Copyright 2012, 2013 MinGW.org project
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-/*
- * Provides an implementation of the "dirname" function, conforming
- * to SUSv3, with extensions to accommodate Win32 drive designators,
- * and suitable for use on native Microsoft(R) Win32 platforms.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <locale.h>
-#include <malloc.h>
-#include "dtypes.h"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-JL_DLLEXPORT char *dirname( char *path )
-{
-    size_t len;
-    static JL_THREAD_LOCAL char *retfail = NULL;
-
-    /* to handle path names for files in multibyte character locales,
-     * we need to set up LC_CTYPE to match the host file system locale.
-     */
-
-    char *locale = setlocale( LC_CTYPE, NULL );
-    if( locale != NULL ) locale = strdup( locale );
-    setlocale( LC_CTYPE, "" );
-
-    if( path && *path )
-    {
-        /* allocate sufficient local storage space,
-         * in which to create a wide character reference copy of path
-         */
-
-        wchar_t* refcopy = (wchar_t*)alloca((1 + (len = mbstowcs(NULL, path, 0)))*sizeof(wchar_t));
-
-        /* create the wide character reference copy of path */
-
-        wchar_t *refpath = refcopy;
-        len = mbstowcs( refpath, path, len );
-        refcopy[ len ] = L'\0';
-
-        /* SUSv3 identifies a special case, where path is exactly equal to "//";
-         * (we will also accept "\\" in the Win32 context, but not "/\" or "\/",
-         *  and neither will we consider paths with an initial drive designator).
-         * For this special case, SUSv3 allows the implementation to choose to
-         * return "/" or "//", (or "\" or "\\", since this is Win32); we will
-         * simply return the path unchanged, (i.e. "//" or "\\").
-         */
-
-        if( (len > 1) && ((refpath[0] == L'/') || (refpath[0] == L'\\')) )
-        {
-            if( (refpath[1] == refpath[0]) && (refpath[2] == L'\0') )
-            {
-                setlocale( LC_CTYPE, locale );
-                free( locale );
-                return path;
-            }
-        }
-
-        /* For all other cases ...
-         * step over the drive designator, if present ...
-         */
-
-        else if( (len > 1) && (refpath[1] == L':') )
-        {
-            /* FIXME: maybe should confirm *refpath is a valid drive designator */
-
-            refpath += 2;
-        }
-
-        /* check again, just to ensure we still have a non-empty path name ... */
-
-        if( *refpath )
-        {
-            /* reproduce the scanning logic of the "basename" function
-             * to locate the basename component of the current path string,
-             * (but also remember where the dirname component starts).
-             */
-
-            wchar_t *refname, *basename;
-            for( refname = basename = refpath ; *refpath ; ++refpath )
-            {
-                if( (*refpath == L'/') || (*refpath == L'\\') )
-                {
-                    /* we found a dir separator ...
-                     * step over it, and any others which immediately follow it
-                     */
-
-                    while( (*refpath == L'/') || (*refpath == L'\\') )
-                        ++refpath;
-
-                    /* if we didn't reach the end of the path string ... */
-
-                    if( *refpath )
-
-                        /* then we have a new candidate for the base name */
-
-                        basename = refpath;
-
-                    else
-
-                        /* we struck an early termination of the path string,
-                         * with trailing dir separators following the base name,
-                         * so break out of the for loop, to avoid overrun.
-                         */
-
-                        break;
-                }
-            }
-
-            /* now check,
-             * to confirm that we have distinct dirname and basename components
-             */
-
-            if( basename > refname )
-            {
-                /* and, when we do ...
-                 * backtrack over all trailing separators on the dirname component,
-                 * (but preserve exactly two initial dirname separators, if identical),
-                 * and add a NUL terminator in their place.
-                 */
-
-                do --basename;
-                while( (basename > refname) && ((*basename == L'/') || (*basename == L'\\')) );
-                if( (basename == refname) && ((refname[0] == L'/') || (refname[0] == L'\\'))
-                    &&  (refname[1] == refname[0]) && (refname[2] != L'/') && (refname[2] != L'\\') )
-                    ++basename;
-                *++basename = L'\0';
-
-                /* if the resultant dirname begins with EXACTLY two dir separators,
-                 * AND both are identical, then we preserve them.
-                 */
-
-                refpath = refcopy;
-                while( ((*refpath == L'/') || (*refpath == L'\\')) )
-                    ++refpath;
-                if( ((refpath - refcopy) > 2) || (refcopy[1] != refcopy[0]) )
-                    refpath = refcopy;
-
-                /* and finally ...
-                 * we remove any residual, redundantly duplicated separators from the dirname,
-                 * reterminate, and return it.
-                 */
-
-                refname = refpath;
-                while( *refpath )
-                {
-                    if( ((*refname++ = *refpath) == L'/') || (*refpath++ == L'\\') )
-                    {
-                        while( (*refpath == L'/') || (*refpath == L'\\') )
-                            ++refpath;
-                    }
-                }
-                *refname = L'\0';
-
-                /* finally ...
-                 * transform the resolved dirname back into the multibyte char domain,
-                 * restore the caller's locale, and return the resultant dirname
-                 */
-
-                if( (len = wcstombs( path, refcopy, len )) != (size_t)(-1) )
-                    path[ len ] = '\0';
-            }
-
-            else
-            {
-                /* either there were no dirname separators in the path name,
-                 * or there was nothing else ...
-                 */
-
-                if( (*refname == L'/') || (*refname == L'\\') )
-                {
-                    /* it was all separators, so return one */
-
-                    ++refname;
-                }
-
-                else
-                {
-                    /* there were no separators, so return '.' */
-
-                    *refname++ = L'.';
-                }
-
-                /* add a NUL terminator, in either case,
-                 * then transform to the multibyte char domain,
-                 * using our own buffer
-                 */
-
-                *refname = L'\0';
-                retfail = (char*)realloc( retfail, len = 1 + wcstombs( NULL, refcopy, 0 ));
-                wcstombs( path = retfail, refcopy, len );
-            }
-
-            /* restore caller's locale, clean up, and return the resolved dirname */
-
-            setlocale( LC_CTYPE, locale );
-            free( locale );
-            return path;
-        }
-    }
-
-    /* path is NULL, or an empty string; default return value is "." ...
-     * return this in our own buffer, regenerated by wide char transform,
-     * in case the caller trashed it after a previous call.
-     */
-
-    retfail = (char*)realloc( retfail, len = 1 + wcstombs( NULL, L".", 0 ));
-    wcstombs( retfail, L".", len );
-
-    /* restore caller's locale, clean up, and return the default dirname */
-
-    setlocale( LC_CTYPE, locale );
-    free( locale );
-    return retfail;
-}
-
-#ifdef __cplusplus
-}
-#endif
-
-/* $RCSfile: dirname.c,v $$Revision: 1.2 $: end of file */
diff --git a/src/support/dirpath.h b/src/support/dirpath.h
index 0657cc26de0b1b..b2314d571c6492 100644
--- a/src/support/dirpath.h
+++ b/src/support/dirpath.h
@@ -6,14 +6,18 @@
 #ifdef _OS_WINDOWS_
 #define PATHSEPSTRING "\\"
 #define PATHLISTSEPSTRING ";"
-#if defined(_MSC_VER) || defined(_COMPILER_CLANG_)
-#define PATH_MAX MAX_PATH
+#if defined(PATH_MAX)
+#define JL_PATH_MAX PATH_MAX
+#else // _COMPILER_CLANG_ may have the name reversed
+#define JL_PATH_MAX MAX_PATH
 #endif
 #else
 #define PATHSEPSTRING "/"
 #define PATHLISTSEPSTRING ":"
-#ifndef PATH_MAX // many platforms don't have a max path, we define one anyways
-#define PATH_MAX 1024
+#if defined(PATH_MAX)
+#define JL_PATH_MAX PATH_MAX
+#else // many platforms don't have a max path, we define one anyways
+#define JL_PATH_MAX 1024
 #endif
 #endif
 
diff --git a/src/support/dtypes.h b/src/support/dtypes.h
index b252776404d682..d49ae0b22b5f95 100644
--- a/src/support/dtypes.h
+++ b/src/support/dtypes.h
@@ -10,11 +10,7 @@
 #include <errno.h>
 #include <stdlib.h>
 #include <stdio.h>
-#if defined(_COMPILER_INTEL_)
-#include <mathimf.h>
-#else
-#include <math.h>
-#endif
+#include <math.h> // NAN and INF constants
 
 #include "platform.h"
 #include "analyzer_annotations.h"
@@ -28,6 +24,18 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <sys/stat.h>
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#if defined(_COMPILER_MICROSOFT_) && !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED)
+
+/* See https://github.com/JuliaLang/julia/pull/44587 */
+typedef intptr_t ssize_t;
+#define SSIZE_MAX INTPTR_MAX
+#define _SSIZE_T_
+#define _SSIZE_T_DEFINED
+
+#endif /* defined(_COMPILER_MICROSOFT_) && !defined(_SSIZE_T_) && !defined(_SSIZE_T_DEFINED) */
 
 #if !defined(_COMPILER_GCC_)
 
@@ -63,15 +71,17 @@
 */
 
 #ifdef _OS_WINDOWS_
-#define STDCALL __stdcall
+#define STDCALL  __stdcall
 # ifdef LIBRARY_EXPORTS
 #  define JL_DLLEXPORT __declspec(dllexport)
 # else
 #  define JL_DLLEXPORT __declspec(dllimport)
 # endif
+#define JL_DLLIMPORT   __declspec(dllimport)
 #else
 #define STDCALL
-#define JL_DLLEXPORT __attribute__ ((visibility("default")))
+# define JL_DLLEXPORT __attribute__ ((visibility("default")))
+#define JL_DLLIMPORT
 #endif
 
 #ifdef _OS_LINUX_
@@ -106,13 +116,7 @@
 #define LLT_REALLOC(p,n) realloc((p),(n))
 #define LLT_FREE(x) free(x)
 
-#if defined(_OS_WINDOWS_) && defined(_COMPILER_INTEL_)
-#  define STATIC_INLINE static
-#elif defined(_OS_WINDOWS_) && defined(_COMPILER_MICROSOFT_)
-#  define STATIC_INLINE static __inline
-#else
-#  define STATIC_INLINE static inline
-#endif
+#define STATIC_INLINE static inline
 
 #if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
 #  define NOINLINE __declspec(noinline)
@@ -146,20 +150,6 @@
                 __builtin_assume(!!(cond_));            \
                 cond_;                                  \
             }))
-#elif defined(_COMPILER_MICROSOFT_) && defined(__cplusplus)
-template<typename T>
-static inline T
-jl_assume(T v)
-{
-    __assume(!!v);
-    return v;
-}
-#elif defined(_COMPILER_INTEL_)
-#define jl_assume(cond) (__extension__ ({               \
-                __typeof__(cond) cond_ = (cond);        \
-                __assume(!!(cond_));                    \
-                cond_;                                  \
-            }))
 #elif defined(__GNUC__)
 static inline void jl_assume_(int cond)
 {
@@ -178,12 +168,6 @@ static inline void jl_assume_(int cond)
 
 #if jl_has_builtin(__builtin_assume_aligned) || defined(_COMPILER_GCC_)
 #define jl_assume_aligned(ptr, align) __builtin_assume_aligned(ptr, align)
-#elif defined(_COMPILER_INTEL_)
-#define jl_assume_aligned(ptr, align) (__extension__ ({         \
-                __typeof__(ptr) ptr_ = (ptr);                   \
-                __assume_aligned(ptr_, align);                  \
-                ptr_;                                           \
-            }))
 #elif defined(__GNUC__)
 #define jl_assume_aligned(ptr, align) (__extension__ ({         \
                 __typeof__(ptr) ptr_ = (ptr);                   \
@@ -347,16 +331,12 @@ STATIC_INLINE void jl_store_unaligned_i16(void *ptr, uint16_t val) JL_NOTSAFEPOI
     memcpy(ptr, &val, 2);
 }
 
-#ifdef _OS_WINDOWS_
-#include <errhandlingapi.h>
-#endif
-
 STATIC_INLINE void *malloc_s(size_t sz) JL_NOTSAFEPOINT {
     int last_errno = errno;
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    void *p = malloc(sz);
+    void *p = malloc(sz == 0 ? 1 : sz);
     if (p == NULL) {
         perror("(julia) malloc");
         abort();
@@ -373,7 +353,7 @@ STATIC_INLINE void *realloc_s(void *p, size_t sz) JL_NOTSAFEPOINT {
 #ifdef _OS_WINDOWS_
     DWORD last_error = GetLastError();
 #endif
-    p = realloc(p, sz);
+    p = realloc(p, sz == 0 ? 1 : sz);
     if (p == NULL) {
         perror("(julia) realloc");
         abort();
diff --git a/src/support/hashing.h b/src/support/hashing.h
index 8686c746f48986..bed688e94f5b23 100644
--- a/src/support/hashing.h
+++ b/src/support/hashing.h
@@ -12,9 +12,9 @@ extern "C" {
 #endif
 
 uint_t nextipow2(uint_t i) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint32_t int32hash(uint32_t a) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint64_t int64hash(uint64_t key) JL_NOTSAFEPOINT;
-JL_DLLEXPORT uint32_t int64to32hash(uint64_t key) JL_NOTSAFEPOINT;
+uint32_t int32hash(uint32_t a) JL_NOTSAFEPOINT;
+uint64_t int64hash(uint64_t key) JL_NOTSAFEPOINT;
+uint32_t int64to32hash(uint64_t key) JL_NOTSAFEPOINT;
 #ifdef _P64
 #define inthash int64hash
 #else
diff --git a/src/support/htable.h b/src/support/htable.h
index 3b52efabac8a1b..0b5196374e2b6e 100644
--- a/src/support/htable.h
+++ b/src/support/htable.h
@@ -32,7 +32,7 @@ typedef struct {
 
 // initialize hash table, reserving space for `size` expected number of
 // elements. (Expect `h->size > size` for efficient occupancy factor.)
-htable_t *htable_new(htable_t *h, size_t size);
+htable_t *htable_new(htable_t *h, size_t size) JL_NOTSAFEPOINT;
 void htable_free(htable_t *h);
 
 // clear and (possibly) change size
diff --git a/src/support/htable.inc b/src/support/htable.inc
index fa59624a4998f5..7a9be2514e2f0d 100644
--- a/src/support/htable.inc
+++ b/src/support/htable.inc
@@ -13,67 +13,77 @@
 static void **HTNAME##_lookup_bp_r(htable_t *h, void *key, void *ctx)   \
 {                                                                       \
     uint_t hv;                                                          \
-    size_t i, orig, index, iter;                                        \
+    size_t i, orig, index, iter, empty_slot;                            \
     size_t newsz, sz = hash_size(h);                                    \
     size_t maxprobe = max_probe(sz);                                    \
     void **tab = h->table;                                              \
     void **ol;                                                          \
                                                                         \
     hv = HFUNC((uintptr_t)key, ctx);                                    \
- retry_bp:                                                              \
-    iter = 0;                                                           \
-    index = (size_t)(hv & (sz-1)) * 2;                                  \
-    sz *= 2;                                                            \
-    orig = index;                                                       \
-                                                                        \
-    do {                                                                \
-        if (tab[index+1] == HT_NOTFOUND) {                              \
-            tab[index] = key;                                           \
-            return &tab[index+1];                                       \
+    while (1) {                                                         \
+        iter = 0;                                                       \
+        index = (size_t)(hv & (sz-1)) * 2;                              \
+        sz *= 2;                                                        \
+        orig = index;                                                   \
+        empty_slot = -1;                                                \
+                                                                        \
+        do {                                                            \
+            if (tab[index] == HT_NOTFOUND) {                            \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+                break;                                                  \
+            }                                                           \
+            if (tab[index+1] == HT_NOTFOUND) {                          \
+                if (empty_slot == -1)                                   \
+                    empty_slot = index;                                 \
+            }                                                           \
+                                                                        \
+            if (EQFUNC(key, tab[index], ctx))                           \
+                return &tab[index+1];                                   \
+                                                                        \
+            index = (index+2) & (sz-1);                                 \
+            iter++;                                                     \
+            if (iter > maxprobe)                                        \
+                break;                                                  \
+        } while (index != orig);                                        \
+                                                                        \
+        if (empty_slot != -1) {                                         \
+            tab[empty_slot] = key;                                      \
+            return &tab[empty_slot+1];                                  \
         }                                                               \
                                                                         \
-        if (EQFUNC(key, tab[index], ctx))                               \
-            return &tab[index+1];                                       \
-                                                                        \
-        index = (index+2) & (sz-1);                                     \
-        iter++;                                                         \
-        if (iter > maxprobe)                                            \
-            break;                                                      \
-    } while (index != orig);                                            \
-                                                                        \
-    /* table full */                                                    \
-    /* quadruple size, rehash, retry the insert */                      \
-    /* it's important to grow the table really fast; otherwise we waste */ \
-    /* lots of time rehashing all the keys over and over. */            \
-    sz = h->size;                                                       \
-    ol = h->table;                                                      \
-    if (sz < HT_N_INLINE)                                              \
-        newsz = HT_N_INLINE;                                            \
-    else if (sz >= (1<<19) || (sz <= (1<<8)))                           \
-        newsz = sz<<1;                                                  \
-    else                                                                \
-        newsz = sz<<2;                                                  \
-    /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
-    tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                       \
-    if (tab == NULL)                                                    \
-        return NULL;                                                    \
-    for(i=0; i < newsz; i++)                                            \
-        tab[i] = HT_NOTFOUND;                                           \
-    h->table = tab;                                                     \
-    h->size = newsz;                                                    \
-    for(i=0; i < sz; i+=2) {                                            \
-        if (ol[i+1] != HT_NOTFOUND) {                                   \
-            (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];           \
+        /* table full */                                                \
+        /* quadruple size, rehash, retry the insert */                  \
+        /* it's important to grow the table really fast; otherwise we waste */ \
+        /* lots of time rehashing all the keys over and over. */        \
+        sz = h->size;                                                   \
+        ol = h->table;                                                  \
+        if (sz < HT_N_INLINE)                                           \
+            newsz = HT_N_INLINE;                                        \
+        else if (sz >= (1<<19) || (sz <= (1<<8)))                       \
+            newsz = sz<<1;                                              \
+        else                                                            \
+            newsz = sz<<2;                                              \
+        /*printf("trying to allocate %d words.\n", newsz); fflush(stdout);*/ \
+        tab = (void**)LLT_ALLOC(newsz*sizeof(void*));                   \
+        if (tab == NULL)                                                \
+            return NULL;                                                \
+        for (i = 0; i < newsz; i++)                                     \
+            tab[i] = HT_NOTFOUND;                                       \
+        h->table = tab;                                                 \
+        h->size = newsz;                                                \
+        for (i = 0; i < sz; i += 2) {                                   \
+            if (ol[i+1] != HT_NOTFOUND) {                               \
+                (*HTNAME##_lookup_bp_r(h, ol[i], ctx)) = ol[i+1];       \
+            }                                                           \
         }                                                               \
-    }                                                                   \
-    if (ol != &h->_space[0])                                            \
-        LLT_FREE(ol);                                                   \
+        if (ol != &h->_space[0])                                        \
+            LLT_FREE(ol);                                               \
                                                                         \
-    sz = hash_size(h);                                                  \
-    maxprobe = max_probe(sz);                                           \
-    tab = h->table;                                                     \
-                                                                        \
-    goto retry_bp;                                                      \
+        sz = hash_size(h);                                              \
+        maxprobe = max_probe(sz);                                       \
+        tab = h->table;                                                 \
+    }                                                                   \
                                                                         \
     return NULL;                                                        \
 }                                                                       \
diff --git a/src/support/ios.c b/src/support/ios.c
index 4ab093ff40e78c..c0f1c92572b781 100644
--- a/src/support/ios.c
+++ b/src/support/ios.c
@@ -1105,9 +1105,12 @@ int ios_ungetc(int c, ios_t *s)
 {
     if (s->state == bst_wr)
         return IOS_EOF;
+    if (c == '\n') s->lineno--;
+    if (s->u_colno > 0) s->u_colno--;
     if (s->bpos > 0) {
         s->bpos--;
-        s->buf[s->bpos] = (char)c;
+        if (s->buf[s->bpos] != (char)c)
+            s->buf[s->bpos] = (char)c;
         s->_eof = 0;
         return c;
     }
@@ -1129,11 +1132,14 @@ int ios_getutf8(ios_t *s, uint32_t *pwc)
     char c0;
     char buf[8];
 
-    c = ios_getc(s);
-    if (c == IOS_EOF)
+    c = ios_peekc(s);
+    if (c == IOS_EOF) {
+        s->_eof = 1;
         return IOS_EOF;
+    }
     c0 = (char)c;
     if ((unsigned char)c0 < 0x80) {
+        (void)ios_getc(s); // consume peeked char, increment lineno
         *pwc = (uint32_t)(unsigned char)c0;
         if (c == '\n')
             s->u_colno = 0;
@@ -1141,13 +1147,12 @@ int ios_getutf8(ios_t *s, uint32_t *pwc)
             s->u_colno += utf8proc_charwidth(*pwc);
         return 1;
     }
-    if (ios_ungetc(c, s) == IOS_EOF)
-        return IOS_EOF;
     sz = u8_seqlen(&c0);
     if (!isutf(c0) || sz > 4)
         return 0;
     if (ios_readprep(s, sz) < sz)
-        // NOTE: this can return EOF even if some bytes are available
+        // NOTE: this returns EOF even though some bytes are available,
+        // so we do not set s->_eof on this code path
         return IOS_EOF;
     int valid = u8_isvalid(&s->buf[s->bpos], sz);
     if (valid) {
diff --git a/src/support/ios.h b/src/support/ios.h
index 3ba5ab4884284d..9d0f42d6d1bc40 100644
--- a/src/support/ios.h
+++ b/src/support/ios.h
@@ -4,7 +4,7 @@
 #define JL_IOS_H
 
 #include <stdarg.h>
-#include "uv.h"
+#include <sys/types.h>
 #include "analyzer_annotations.h"
 
 #ifdef __cplusplus
@@ -16,7 +16,7 @@ extern "C" {
 // never moves out.
 
 //make it compatible with UV Handles
-typedef enum { bm_none=UV_HANDLE_TYPE_MAX+1, bm_line, bm_block, bm_mem } bufmode_t;
+typedef enum { bm_none=1000, bm_line, bm_block, bm_mem } bufmode_t;
 typedef enum { bst_none, bst_rd, bst_wr } bufstate_t;
 
 #define IOS_INLSIZE 54
diff --git a/src/support/libsupport.h b/src/support/libsupport.h
index 880c8560cd23ca..043a1e6a426f9b 100644
--- a/src/support/libsupport.h
+++ b/src/support/libsupport.h
@@ -8,15 +8,8 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include "dtypes.h"
-#include "utils.h"
 #include "utf8.h"
 #include "ios.h"
-#include "timefuncs.h"
-#include "hashing.h"
-#include "ptrhash.h"
-#include "bitvector.h"
-#include "dirpath.h"
-#include "strtod.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/src/support/libsupportinit.c b/src/support/libsupportinit.c
index 1dbce675be34ac..c1afde186e6cd9 100644
--- a/src/support/libsupportinit.c
+++ b/src/support/libsupportinit.c
@@ -3,22 +3,78 @@
 #include <locale.h>
 #include "libsupport.h"
 
+#ifndef _OS_WINDOWS_
+#include <sys/resource.h>
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-static int isInitialized = 0;
+static const char *jl_strchrnul(const char *s, int c)
+{
+    char *p = strchr(s, c);
+    if (p)
+        return p;
+    return s + strlen(s);
+}
 
 void libsupport_init(void)
 {
+    static int isInitialized = 0;
     if (!isInitialized) {
-
-        setlocale(LC_ALL, ""); // set to user locale
-        setlocale(LC_NUMERIC, "C"); // use locale-independent numeric formats
-
         ios_init_stdstreams();
-
-        isInitialized=1;
+        isInitialized = 1;
+#ifndef _OS_WINDOWS_
+        // Raise the open file descriptor limit.
+        {
+            struct rlimit rl;
+            if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_cur != rl.rlim_max) {
+                // Do a binary search for the limit.
+                rlim_t min = rl.rlim_cur;
+                rlim_t max = 1 << 20;
+                // But if there's a defined upper bound, don't search, just set it.
+                if (rl.rlim_max != RLIM_INFINITY) {
+                    min = rl.rlim_max;
+                    max = rl.rlim_max;
+                }
+                do {
+                    rl.rlim_cur = min + (max - min) / 2;
+                    if (setrlimit(RLIMIT_NOFILE, &rl)) {
+                        max = rl.rlim_cur;
+                    } else {
+                        min = rl.rlim_cur;
+                    }
+                } while (min + 1 < max);
+            }
+        }
+#endif
+        // adopt the user's locale for most formatting
+        setlocale(LC_ALL, "");
+        // but use locale-independent numeric formats (for parsing)
+        setlocale(LC_NUMERIC, "C");
+        // and try to specify ASCII or UTF-8 (preferred) for our Libc and Cstring functions
+        char *ctype = setlocale(LC_CTYPE, NULL);
+        if (ctype) {
+            size_t codeset = jl_strchrnul(ctype, '.') - ctype;
+            if (strncmp(ctype + codeset, ".UTF-8", strlen(".UTF-8")) == 0 ||
+                strncmp(ctype + codeset, ".utf-8", strlen(".utf-8")) == 0 ||
+                strncmp(ctype + codeset, ".utf8", strlen(".utf8")) == 0)
+                return; // already UTF-8
+            ctype = (char*)memcpy(malloc_s(codeset + sizeof(".UTF-8")), ctype, codeset);
+            strcpy(ctype + codeset, ".UTF-8");
+        }
+        setlocale(LC_CTYPE, "C"); // ASCII
+#ifndef _OS_WINDOWS_
+        if (setlocale(LC_CTYPE, "C.UTF-8") == NULL && // Linux/FreeBSD name
+            setlocale(LC_CTYPE, "en_US.UTF-8") == NULL && // Common name
+            setlocale(LC_CTYPE, "UTF-8") == NULL && // Apple name
+            (ctype == NULL || setlocale(LC_CTYPE, ctype) == NULL)) { // attempt to form it manually
+            ios_puts("WARNING: failed to select UTF-8 encoding, using ASCII\n", ios_stderr);
+        }
+#endif
+        if (ctype)
+            free(ctype);
     }
 }
 
diff --git a/src/support/platform.h b/src/support/platform.h
index 1bb46d3bc648c2..cf65fa01423feb 100644
--- a/src/support/platform.h
+++ b/src/support/platform.h
@@ -14,8 +14,6 @@
  *      Compiler:
  *          _COMPILER_CLANG_
  *          _COMPILER_GCC_
- *          _COMPILER_INTEL_
- *          _COMPILER_MICROSOFT_
  *      OS:
  *          _OS_FREEBSD_
  *          _OS_LINUX_
@@ -35,20 +33,34 @@
 *                               Compiler                                       *
 *******************************************************************************/
 
-/*
- * Note: Checking for Intel's compiler should be done before checking for
- * Microsoft's. On Windows Intel's compiler also defines _MSC_VER as the
- * acknowledgement of the fact that it is integrated with Visual Studio.
- */
 #if defined(__clang__)
 #define _COMPILER_CLANG_
-#elif defined(__INTEL_COMPILER) || defined(__ICC)
-#define _COMPILER_INTEL_
-#elif defined(_MSC_VER)
-#define _COMPILER_MICROSOFT_
 #elif defined(__GNUC__)
 #define _COMPILER_GCC_
+#elif defined(_MSC_VER)
+#define _COMPILER_MICROSOFT_
+#else
+#error Unsupported compiler
+#endif
+
+#if defined(__has_feature) // Clang flavor
+#if __has_feature(address_sanitizer)
+#define _COMPILER_ASAN_ENABLED_
+#endif
+#if __has_feature(memory_sanitizer)
+#define _COMPILER_MSAN_ENABLED_
+#endif
+#if __has_feature(thread_sanitizer)
+#if __clang_major__ < 11
+#error Thread sanitizer runtime libraries in clang < 11 leak memory and cannot be used
+#endif
+#define _COMPILER_TSAN_ENABLED_
+#endif
+#else // GCC flavor
+#if defined(__SANITIZE_ADDRESS__)
+#define _COMPILER_ASAN_ENABLED_
 #endif
+#endif // __has_feature
 
 /*******************************************************************************
 *                               OS                                             *
diff --git a/src/support/rle.c b/src/support/rle.c
new file mode 100644
index 00000000000000..6b64fa8cf97005
--- /dev/null
+++ b/src/support/rle.c
@@ -0,0 +1,92 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#include "rle.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* iteration */
+
+rle_iter_state rle_iter_init(uint64_t key0)
+{
+    rle_iter_state state = {-1, 0, key0};
+    return state;
+}
+
+int rle_iter_increment(rle_iter_state *state, size_t len, uint64_t *rletable, size_t npairs)
+{
+    state->i += 1;
+    size_t i = state->i, j = state->j;
+    if (i >= len)
+        return 0;
+    if (rletable) {
+        while (j < npairs && i >= rletable[j+1]) {
+            state->key = rletable[j];
+            j += 2;
+        }
+        state->j = j;
+    }
+    return 1;
+}
+
+/* indexing */
+
+void rle_index_to_reference(rle_reference *rr, size_t i, uint64_t *rletable, size_t npairs, uint64_t key0)
+{
+    if (!rletable) {
+        rr->key = key0;
+        rr->index = i;
+        return;
+    }
+    // Determine the active key
+    uint64_t key = key0;
+    size_t jj = 0;
+    while (jj < npairs && i >= rletable[jj+1]) {
+        key = rletable[jj];
+        jj += 2;
+    }
+    // Subtract the number of preceding items with different keys
+    uint64_t ckey = key0;
+    size_t j, start = 0, index = i;
+    for (j = 0; j < jj; j+=2) {
+        if (key != ckey)
+            index -= rletable[j+1] - start;
+        ckey = rletable[j];
+        start = rletable[j+1];
+    }
+    // Return the result
+    rr->key = key;
+    rr->index = index;
+    return;
+}
+
+size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0)
+{
+    uint64_t key = rr->key;
+    size_t index = rr->index, i = index;
+    if (!rletable) {
+        assert(key == key0);
+        return i;
+    }
+    uint64_t ckey = key0;
+    size_t j, start = 0, n;
+    for (j = 0; j < npairs; j+=2) {
+        n = rletable[j+1] - start;
+        if (key != ckey)
+            i += n;
+        else {
+            if (index < n)
+                break;
+            index -= n;
+        }
+        ckey = rletable[j];
+        start = rletable[j+1];
+    }
+    return i;
+}
+
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/src/support/rle.h b/src/support/rle.h
new file mode 100644
index 00000000000000..f85d9f35c4b803
--- /dev/null
+++ b/src/support/rle.h
@@ -0,0 +1,48 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+#ifndef JL_RLE_H
+#define JL_RLE_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+#include <stdint.h>
+#include <assert.h>
+
+/* Run-length encoding (RLE) utilities */
+/* In the RLE table, even indexes encode the key (the item classification), odd indexes encode the item index */
+/* For example, a table
+
+       {77, 3, 88, 5, 77, 8}
+
+   would represent a list where items at indexes 3-4 have key 77, items at indexes 5-7 have key 88,
+   and items from 8 onward have key 77. Items prior to index 3 have an implicit key passed in as `key0`.
+*/
+
+/* iteration */
+typedef struct _rle_iter_state_t {
+    size_t i;      // index for the items
+    size_t j;      // index for the rle table
+    uint64_t key;  // current identifier
+} rle_iter_state;
+
+rle_iter_state rle_iter_init(/* implicit value of key for indexes prior to first explicit rle pair */ uint64_t key0);
+int rle_iter_increment(rle_iter_state *state, /* number of items */ size_t len, uint64_t *rletable, /*length of rletable */ size_t npairs);
+
+/* indexing */
+typedef struct {
+    uint64_t key;
+    int index;     // number of preceding items in the list with the same key
+} rle_reference;
+
+void rle_index_to_reference(rle_reference *rr, /* item index */ size_t i, uint64_t *rletable, size_t npairs, uint64_t key0);
+size_t rle_reference_to_index(rle_reference *rr, uint64_t *rletable, size_t npairs, uint64_t key0);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/support/test/rletest.c b/src/support/test/rletest.c
new file mode 100644
index 00000000000000..c5c5caa97fde80
--- /dev/null
+++ b/src/support/test/rletest.c
@@ -0,0 +1,60 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include "../rle.h"
+
+int main()
+{
+    /* Iteration */
+    rle_iter_state state = rle_iter_init(22);
+    int i = 0;
+    while (rle_iter_increment(&state, 5, NULL, 0)) {
+        assert(state.key == 22);
+        assert(state.i == i);
+        i++;
+    }
+
+    uint64_t rletable1[4] = {-1, 2, 22, 3};
+    state = rle_iter_init(22);
+    i = 0;
+    while (rle_iter_increment(&state, 5, rletable1, 4)) {
+        assert(state.key == (i < 2 ? 22 : (i < 3 ? -1 : 22)));
+        assert(state.i == i);
+        i++;
+    }
+
+    uint64_t rletable2[4] = {-1, 0, 22, 3};
+    state = rle_iter_init(22);
+    i = 0;
+    while (rle_iter_increment(&state, 5, rletable2, 4)) {
+        assert(state.key == (i < 3 ? -1 : 22));
+        assert(state.i == i);
+        i++;
+    }
+
+    state = rle_iter_init(22);
+    i = 0;
+    while (rle_iter_increment(&state, 0, rletable2, 4)) {
+        abort();
+    }
+
+    /* Indexing */
+    rle_reference rr;
+    uint64_t rletable3[8] = {0, 0, 5, 2, 22, 3, 0, 5};
+    uint64_t keys3[7] = {0, 0, 5, 22, 22, 0, 0};
+    int counts3[7] = {0, 1, 0, 0, 1, 2, 3};
+    for (i = 0; i < 7; i++) {
+        rle_index_to_reference(&rr, i, rletable3, 8, 0);
+        assert(rr.key == keys3[i]);
+        assert(rr.index == counts3[i]);
+        assert(rle_reference_to_index(&rr, rletable3, 8, 0) == i);
+    }
+    uint64_t rletable4[6] = {5, 2, 22, 3, 0, 5};  // implicit first block
+    for (i = 0; i < 7; i++) {
+        rle_index_to_reference(&rr, i, rletable4, 6, 0);
+        assert(rr.key == keys3[i]);
+        assert(rr.index == counts3[i]);
+        assert(rle_reference_to_index(&rr, rletable4, 6, 0) == i);
+    }
+
+    return 0;
+}
diff --git a/src/support/timefuncs.c b/src/support/timefuncs.c
index 031967638ec9e9..b353ce8f49cec0 100644
--- a/src/support/timefuncs.c
+++ b/src/support/timefuncs.c
@@ -1,25 +1,14 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
 #include <assert.h>
-#include <errno.h>
-#include <limits.h>
-#include <sys/stat.h>
-#include <sys/types.h>
 
 #include "dtypes.h"
 
 #if defined(_OS_WINDOWS_)
-#include <malloc.h>
 #include <sys/timeb.h>
-#include <windows.h>
 #else
 #include <sys/time.h>
-#include <sys/poll.h>
-#include <unistd.h>
+#include <sys/select.h>
 #endif
 
 #include "timefuncs.h"
diff --git a/src/support/utils.h b/src/support/utils.h
index 4d77a1bb9fb9b7..b7e9de2cfdb799 100644
--- a/src/support/utils.h
+++ b/src/support/utils.h
@@ -7,7 +7,7 @@
 extern "C" {
 #endif
 
-JL_DLLEXPORT char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
+char *uint2str(char *dest, size_t len, uint64_t num, uint32_t base);
 int str2int(char *str, size_t len, int64_t *res, uint32_t base);
 int isdigit_base(char c, int base);
 
@@ -33,14 +33,6 @@ int cmp_eq(void *a, numerictype_t atag, void *b, numerictype_t btag,
 #define bswap_16(x) __builtin_bswap16(x)
 #define bswap_32(x) __builtin_bswap32(x)
 #define bswap_64(x) __builtin_bswap64(x)
-#elif defined(_MSC_VER)
-#define bswap_16(x) _byteswap_ushort(x)
-#define bswap_32(x) _byteswap_ulong(x)
-#define bswap_64(x) _byteswap_uint64(x)
-#elif defined(__INTEL_COMPILER)
-#define bswap_16(x) _bswap16(x)
-#define bswap_32(x) _bswap(x)
-#define bswap_64(x) _bswap64(x)
 #else
 #define bswap_16(x) (((x) & 0x00ff) << 8 | ((x) & 0xff00) >> 8)
 #define bswap_32(x) \
diff --git a/src/symbol.c b/src/symbol.c
index f1a4343a39e8e3..14606c82b97784 100644
--- a/src/symbol.c
+++ b/src/symbol.c
@@ -15,13 +15,15 @@
 extern "C" {
 #endif
 
-static jl_sym_t *symtab = NULL;
+static _Atomic(jl_sym_t*) symtab = NULL;
 
 #define MAX_SYM_LEN ((size_t)INTPTR_MAX - sizeof(jl_taggedvalue_t) - sizeof(jl_sym_t) - 1)
 
 static uintptr_t hash_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
 {
-    return memhash(str, len) ^ ~(uintptr_t)0/3*2;
+    uintptr_t oid = memhash(str, len) ^ ~(uintptr_t)0/3*2;
+    // compute the same hash value as v1.6 and earlier, which used `hash_uint(3h - objectid(sym))`
+    return inthash(-oid);
 }
 
 static size_t symbol_nbytes(size_t len) JL_NOTSAFEPOINT
@@ -39,16 +41,17 @@ static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT
     sym = (jl_sym_t*)jl_valueof(tag);
     // set to old marked so that we won't look at it in the GC or write barrier.
     tag->header = ((uintptr_t)jl_symbol_type) | GC_OLD_MARKED;
-    sym->left = sym->right = NULL;
+    jl_atomic_store_relaxed(&sym->left, NULL);
+    jl_atomic_store_relaxed(&sym->right, NULL);
     sym->hash = hash_symbol(str, len);
     memcpy(jl_symbol_name(sym), str, len);
     jl_symbol_name(sym)[len] = 0;
     return sym;
 }
 
-static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl_sym_t ***slot) JL_NOTSAFEPOINT
+static jl_sym_t *symtab_lookup(_Atomic(jl_sym_t*) *ptree, const char *str, size_t len, _Atomic(jl_sym_t*) **slot) JL_NOTSAFEPOINT
 {
-    jl_sym_t *node = jl_atomic_load_acquire(ptree); // consume
+    jl_sym_t *node = jl_atomic_load_relaxed(ptree); // consume
     uintptr_t h = hash_symbol(str, len);
 
     // Tree nodes sorted by major key of (int(hash)) and minor key of (str).
@@ -66,7 +69,7 @@ static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl
             ptree = &node->left;
         else
             ptree = &node->right;
-        node = jl_atomic_load_acquire(ptree); // consume
+        node = jl_atomic_load_relaxed(ptree); // consume
     }
     if (slot != NULL)
         *slot = ptree;
@@ -75,25 +78,25 @@ static jl_sym_t *symtab_lookup(jl_sym_t **ptree, const char *str, size_t len, jl
 
 jl_sym_t *_jl_symbol(const char *str, size_t len) JL_NOTSAFEPOINT // (or throw)
 {
-#ifndef __clang_analyzer__
+#ifndef __clang_gcanalyzer__
     // Hide the error throwing from the analyser since there isn't a way to express
     // "safepoint only when throwing error" currently.
     if (len > MAX_SYM_LEN)
         jl_exceptionf(jl_argumenterror_type, "Symbol name too long");
 #endif
     assert(!memchr(str, 0, len));
-    jl_sym_t **slot;
+    _Atomic(jl_sym_t*) *slot;
     jl_sym_t *node = symtab_lookup(&symtab, str, len, &slot);
     if (node == NULL) {
-        JL_LOCK_NOGC(&gc_perm_lock);
+        uv_mutex_lock(&gc_perm_lock);
         // Someone might have updated it, check and look up again
-        if (*slot != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
-            JL_UNLOCK_NOGC(&gc_perm_lock);
+        if (jl_atomic_load_relaxed(slot) != NULL && (node = symtab_lookup(slot, str, len, &slot))) {
+            uv_mutex_unlock(&gc_perm_lock);
             return node;
         }
         node = mk_symbol(str, len);
         jl_atomic_store_release(slot, node);
-        JL_UNLOCK_NOGC(&gc_perm_lock);
+        uv_mutex_unlock(&gc_perm_lock);
     }
     return node;
 }
@@ -117,12 +120,12 @@ JL_DLLEXPORT jl_sym_t *jl_symbol_n(const char *str, size_t len)
 
 JL_DLLEXPORT jl_sym_t *jl_get_root_symbol(void)
 {
-    return symtab;
+    return jl_atomic_load_relaxed(&symtab);
 }
 
-static uint32_t gs_ctr = 0;  // TODO: per-thread
-uint32_t jl_get_gs_ctr(void) { return gs_ctr; }
-void jl_set_gs_ctr(uint32_t ctr) { gs_ctr = ctr; }
+static _Atomic(uint32_t) gs_ctr = 0;  // TODO: per-module?
+uint32_t jl_get_gs_ctr(void) { return jl_atomic_load_relaxed(&gs_ctr); }
+void jl_set_gs_ctr(uint32_t ctr) { jl_atomic_store_relaxed(&gs_ctr, ctr); }
 
 JL_DLLEXPORT jl_sym_t *jl_gensym(void)
 {
diff --git a/src/sys.c b/src/sys.c
index a9d3a857968bbd..bc21d065f55a30 100644
--- a/src/sys.c
+++ b/src/sys.c
@@ -26,6 +26,7 @@
 #include <sys/ptrace.h>
 #include <sys/mman.h>
 #include <dlfcn.h>
+#include <grp.h>
 #endif
 
 #ifndef _OS_WINDOWS_
@@ -47,12 +48,7 @@
 #include <xmmintrin.h>
 #endif
 
-#if defined _MSC_VER
-#include <io.h>
-#include <intrin.h>
-#endif
-
-#ifdef JL_MSAN_ENABLED
+#ifdef _COMPILER_MSAN_ENABLED_
 #include <sanitizer/msan_interface.h>
 #endif
 
@@ -62,12 +58,6 @@
 extern "C" {
 #endif
 
-#if defined(_OS_WINDOWS_) && !defined(_COMPILER_GCC_)
-JL_DLLEXPORT char *dirname(char *);
-#else
-#include <libgen.h>
-#endif
-
 JL_DLLEXPORT int jl_sizeof_off_t(void) { return sizeof(off_t); }
 #ifndef _OS_WINDOWS_
 JL_DLLEXPORT int jl_sizeof_mode_t(void) { return sizeof(mode_t); }
@@ -228,6 +218,24 @@ JL_DLLEXPORT double jl_stat_ctime(char *statbuf)
     return (double)s->st_ctim.tv_sec + (double)s->st_ctim.tv_nsec * 1e-9;
 }
 
+JL_DLLEXPORT unsigned long jl_getuid(void)
+{
+#ifdef _OS_WINDOWS_
+    return -1;
+#else
+    return getuid();
+#endif
+}
+
+JL_DLLEXPORT unsigned long jl_geteuid(void)
+{
+#ifdef _OS_WINDOWS_
+    return -1;
+#else
+    return geteuid();
+#endif
+}
+
 // --- buffer manipulation ---
 
 JL_DLLEXPORT jl_array_t *jl_take_buffer(ios_t *s)
@@ -291,9 +299,7 @@ JL_DLLEXPORT jl_value_t *jl_readuntil(ios_t *s, uint8_t delim, uint8_t str, uint
             a = jl_take_buffer(&dest);
         }
         else {
-#ifdef STORE_ARRAY_LEN
             a->length = n;
-#endif
             a->nrows = n;
             ((char*)a->data)[n] = '\0';
         }
@@ -358,6 +364,15 @@ typedef DWORD (WINAPI *GAPC)(WORD);
 #endif
 #endif
 
+// Apple's M1 processor is a big.LITTLE style processor, with 4x "performance"
+// cores, and 4x "efficiency" cores.  Because Julia expects to be able to run
+// things like heavy linear algebra workloads on all cores, it's best for us
+// to only spawn as many threads as there are performance cores.  Once macOS
+// 12 is released, we'll be able to query the multiple "perf levels" of the
+// cores of a CPU (see this PR [0] to pytorch/cpuinfo for an example) but
+// until it's released, we will just recognize the M1 by its CPU family
+// identifier, then subtract how many efficiency cores we know it has.
+
 JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 {
 #if defined(HW_AVAILCPU) && defined(HW_NCPU)
@@ -370,6 +385,28 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
         sysctl(nm, 2, &count, &len, NULL, 0);
         if (count < 1) { count = 1; }
     }
+
+#if defined(__APPLE__) && defined(_CPU_AARCH64_)
+//MacOS 12 added a way to query performance cores
+    char buf[7];
+    len = 7;
+    sysctlbyname("kern.osrelease", buf, &len, NULL, 0);
+    if (buf[0] > 1 && buf[1] > 0){
+        len = 4;
+        sysctlbyname("hw.perflevel0.physicalcpu", &count, &len, NULL, 0);
+    }
+    else {
+        int32_t family = 0;
+        len = 4;
+        sysctlbyname("hw.cpufamily", &family, &len, NULL, 0);
+        if (family >= 1 && count > 1) {
+            if (family == CPUFAMILY_ARM_FIRESTORM_ICESTORM) {
+                // We know the Apple M1 has 4 efficiency cores, so subtract them out.
+                count -= 4;
+            }
+        }
+    }
+#endif
     return count;
 #elif defined(_SC_NPROCESSORS_ONLN)
     long count = sysconf(_SC_NPROCESSORS_ONLN);
@@ -393,10 +430,33 @@ JL_DLLEXPORT int jl_cpu_threads(void) JL_NOTSAFEPOINT
 #endif
 }
 
+JL_DLLEXPORT int jl_effective_threads(void) JL_NOTSAFEPOINT
+{
+    int cpu = jl_cpu_threads();
+    int masksize = uv_cpumask_size();
+    if (masksize < 0 || jl_running_under_rr(0))
+        return cpu;
+    uv_thread_t tid = uv_thread_self();
+    char *cpumask = (char *)calloc(masksize, sizeof(char));
+    int err = uv_thread_getaffinity(&tid, cpumask, masksize);
+    if (err) {
+        free(cpumask);
+        jl_safe_printf("WARNING: failed to get thread affinity (%s %d)\n", uv_err_name(err),
+                       err);
+        return cpu;
+    }
+    int n = 0;
+    for (size_t i = 0; i < masksize; i++) {
+        n += cpumask[i];
+    }
+    free(cpumask);
+    return n < cpu ? n : cpu;
+}
+
 
 // -- high resolution timers --
 // Returns time in nanosec
-JL_DLLEXPORT uint64_t jl_hrtime(void)
+JL_DLLEXPORT uint64_t jl_hrtime(void) JL_NOTSAFEPOINT
 {
     return uv_hrtime();
 }
@@ -422,7 +482,7 @@ JL_DLLEXPORT jl_value_t *jl_environ(int i)
 
 // -- child process status --
 
-#if defined _MSC_VER || defined _OS_WINDOWS_
+#if defined _OS_WINDOWS_
 /* Native Woe32 API.  */
 #include <process.h>
 #define waitpid(pid,statusp,options) _cwait (statusp, pid, WAIT_CHILD)
@@ -543,7 +603,7 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 #elif defined(_OS_WINDOWS_)
 
     wchar_t *pth16 = (wchar_t*)malloc_s(32768 * sizeof(*pth16)); // max long path length
-    DWORD n16 = GetModuleFileNameW((HMODULE)handle,pth16,32768);
+    DWORD n16 = GetModuleFileNameW((HMODULE)handle, pth16, 32768);
     if (n16 <= 0) {
         free(pth16);
         return NULL;
@@ -567,7 +627,7 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 
     struct link_map *map;
     dlinfo(handle, RTLD_DI_LINKMAP, &map);
-#ifdef JL_MSAN_ENABLED
+#ifdef _COMPILER_MSAN_ENABLED_
     __msan_unpoison(&map,sizeof(struct link_map*));
     if (map) {
         __msan_unpoison(map, sizeof(struct link_map));
@@ -582,23 +642,32 @@ JL_DLLEXPORT const char *jl_pathname_for_handle(void *handle)
 }
 
 #ifdef _OS_WINDOWS_
-static BOOL CALLBACK jl_EnumerateLoadedModulesProc64(
-  _In_      PCTSTR ModuleName,
-  _In_      DWORD64 ModuleBase,
-  _In_      ULONG ModuleSize,
-  _In_opt_  PVOID a
-)
-{
-    jl_array_grow_end((jl_array_t*)a, 1);
-    //XXX: change to jl_arrayset if array storage allocation for Array{String,1} changes:
-    jl_value_t *v = jl_cstr_to_string(ModuleName);
-    jl_array_ptr_set(a, jl_array_dim0(a)-1, v);
-    return TRUE;
-}
-// Takes a handle (as returned from dlopen()) and returns the absolute path to the image loaded
+// Get a list of all the modules in this process.
 JL_DLLEXPORT int jl_dllist(jl_array_t *list)
 {
-    return EnumerateLoadedModules64(GetCurrentProcess(), jl_EnumerateLoadedModulesProc64, list);
+    DWORD cb, cbNeeded;
+    HMODULE *hMods = NULL;
+    unsigned int i;
+    cbNeeded = 1024 * sizeof(*hMods);
+    do {
+        cb = cbNeeded;
+        hMods = (HMODULE*)realloc_s(hMods, cb);
+        if (!EnumProcessModulesEx(GetCurrentProcess(), hMods, cb, &cbNeeded, LIST_MODULES_ALL)) {
+          free(hMods);
+          return FALSE;
+        }
+    } while (cb < cbNeeded);
+    for (i = 0; i < cbNeeded / sizeof(HMODULE); i++) {
+        const char *path = jl_pathname_for_handle(hMods[i]);
+        if (path == NULL)
+            continue;
+        jl_array_grow_end((jl_array_t*)list, 1);
+        jl_value_t *v = jl_cstr_to_string(path);
+        free((char*)path);
+        jl_array_ptr_set(list, jl_array_dim0(list) - 1, v);
+    }
+    free(hMods);
+    return TRUE;
 }
 #endif
 
@@ -646,9 +715,38 @@ JL_DLLEXPORT size_t jl_maxrss(void)
 #endif
 }
 
-JL_DLLEXPORT int jl_threading_enabled(void)
+// Simple `rand()` like function, with global seed and added thread-safety
+// (but slow and insecure)
+static _Atomic(uint64_t) g_rngseed;
+JL_DLLEXPORT uint64_t jl_rand(void) JL_NOTSAFEPOINT
 {
-    return 1;
+    uint64_t max = UINT64_MAX;
+    uint64_t unbias = UINT64_MAX;
+    uint64_t rngseed0 = jl_atomic_load_relaxed(&g_rngseed);
+    uint64_t rngseed;
+    uint64_t rnd;
+    do {
+        rngseed = rngseed0;
+        rnd = cong(max, unbias, &rngseed);
+    } while (!jl_atomic_cmpswap_relaxed(&g_rngseed, &rngseed0, rngseed));
+    return rnd;
+}
+
+JL_DLLEXPORT void jl_srand(uint64_t rngseed) JL_NOTSAFEPOINT
+{
+    jl_atomic_store_relaxed(&g_rngseed, rngseed);
+}
+
+void jl_init_rand(void) JL_NOTSAFEPOINT
+{
+    uint64_t rngseed;
+    if (uv_random(NULL, NULL, &rngseed, sizeof(rngseed), 0, NULL)) {
+        ios_puts("WARNING: Entropy pool not available to seed RNG; using ad-hoc entropy sources.\n", ios_stderr);
+        rngseed = uv_hrtime();
+        rngseed ^= int64hash(uv_os_getpid());
+    }
+    jl_srand(rngseed);
+    srand(rngseed);
 }
 
 #ifdef __cplusplus
diff --git a/src/task.c b/src/task.c
index 7ab569687b570a..349f6ab5451969 100644
--- a/src/task.c
+++ b/src/task.c
@@ -29,6 +29,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <signal.h>
+#include <unistd.h>
 #include <errno.h>
 #include <inttypes.h>
 #include "julia.h"
@@ -40,7 +41,7 @@
 extern "C" {
 #endif
 
-#if defined(JL_ASAN_ENABLED)
+#if defined(_COMPILER_ASAN_ENABLED_)
 static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size) {
     __sanitizer_start_switch_fiber(NULL, bottom, size);
 }
@@ -52,31 +53,58 @@ static inline void sanitizer_start_switch_fiber(const void* bottom, size_t size)
 static inline void sanitizer_finish_switch_fiber(void) {}
 #endif
 
-#if defined(JL_TSAN_ENABLED)
-static inline void tsan_destroy_ctx(jl_ptls_t ptls, jl_ucontext_t *ctx) {
-    if (ctx != &ptls->root_task->ctx) {
-        __tsan_destroy_fiber(ctx->tsan_state);
-    }
-    ctx->tsan_state = NULL;
-}
-static inline void tsan_switch_to_ctx(jl_ucontext_t *ctx)  {
-    __tsan_switch_to_fiber(ctx->tsan_state, 0);
-}
-#else
-static inline void tsan_destroy_ctx(jl_ptls_t ptls, jl_ucontext_t *ctx) {}
-static inline void tsan_switch_to_ctx(jl_ucontext_t *ctx) {}
+#if defined(_COMPILER_TSAN_ENABLED_)
+// must defined as macros, since the function containing them must not return before the longjmp
+#define tsan_destroy_ctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
+            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
+        } \
+        _tsan_macro_ctx->tsan_state = NULL; \
+    } while (0)
+#define tsan_switch_to_ctx(_ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
+    } while (0)
+#ifdef COPY_STACKS
+#define tsan_destroy_copyctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        if (_tsan_macro_ctx != &(_ptls)->root_task->ctx) { \
+            __tsan_destroy_fiber(_tsan_macro_ctx->tsan_state); \
+        } \
+        _tsan_macro_ctx->tsan_state = NULL; \
+    } while (0)
+#define tsan_switch_to_copyctx(_ctx) do { \
+        struct jl_stack_context_t *_tsan_macro_ctx = (_ctx); \
+        __tsan_switch_to_fiber(_tsan_macro_ctx->tsan_state, 0); \
+    } while (0)
 #endif
-
-#if !defined(_OS_WINDOWS_)
-#ifdef JL_HAVE_UCONTEXT
-#include <ucontext.h>
+#else
+// just do minimal type-checking on the arguments
+#define tsan_destroy_ctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#define tsan_switch_to_ctx(_ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#ifdef COPY_STACKS
+#define tsan_destroy_copyctx(_ptls, _ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
+#define tsan_switch_to_copyctx(_ctx) do { \
+        jl_ucontext_t *_tsan_macro_ctx = (_ctx); \
+        (void)_tsan_macro_ctx; \
+    } while (0)
 #endif
 #endif
 
 // empirically, jl_finish_task needs about 64k stack space to infer/run
 // and additionally, gc-stack reserves 64k for the guard pages
-#if defined(MINSIGSTKSZ) && MINSIGSTKSZ > 131072
-#define MINSTKSZ MINSIGSTKSZ
+#if defined(MINSIGSTKSZ)
+#define MINSTKSZ (MINSIGSTKSZ > 131072 ? MINSIGSTKSZ : 131072)
 #else
 #define MINSTKSZ 131072
 #endif
@@ -90,17 +118,12 @@ static inline void tsan_switch_to_ctx(jl_ucontext_t *ctx) {}
 #define STATIC_OR_JS static
 #endif
 
-extern size_t jl_page_size;
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT;
 STATIC_OR_JS void jl_set_fiber(jl_ucontext_t *t);
 STATIC_OR_JS void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t);
 STATIC_OR_JS void jl_start_fiber_swap(jl_ucontext_t *savet, jl_ucontext_t *t);
 STATIC_OR_JS void jl_start_fiber_set(jl_ucontext_t *t);
 
-#ifdef JL_HAVE_UNW_CONTEXT
-static JL_THREAD_LOCAL unw_cursor_t jl_basecursor;
-#endif
-
 #ifdef ALWAYS_COPY_STACKS
 # ifndef COPY_STACKS
 # error "ALWAYS_COPY_STACKS requires COPY_STACKS"
@@ -111,7 +134,6 @@ static int always_copy_stacks = 0;
 #endif
 
 #ifdef COPY_STACKS
-
 static void memcpy_a16(uint64_t *to, uint64_t *from, size_t nb)
 {
     memcpy((char*)jl_assume_aligned(to, 16), (char*)jl_assume_aligned(from, 16), nb);
@@ -162,41 +184,63 @@ static void NOINLINE JL_NORETURN restore_stack(jl_task_t *t, jl_ptls_t ptls, cha
     memcpy_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
 
     sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
-    jl_set_fiber(&t->ctx);
+#if defined(_OS_WINDOWS_)
+    jl_setcontext(&t->ctx.copy_ctx);
+#else
+    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+#endif
     abort(); // unreachable
 }
+
 static void restore_stack2(jl_task_t *t, jl_ptls_t ptls, jl_task_t *lastt)
 {
+    assert(t->copy_stack && !lastt->copy_stack);
     size_t nb = t->copy_stack;
     char *_x = (char*)ptls->stackbase - nb;
     void *_y = t->stkbuf;
     assert(_x != NULL && _y != NULL);
     memcpy_a16((uint64_t*)_x, (uint64_t*)_y, nb); // destroys all but the current stackframe
+#if defined(JL_HAVE_UNW_CONTEXT)
+    volatile int returns = 0;
+    int r = unw_getcontext(&lastt->ctx.ctx);
+    if (++returns == 2) // r is garbage after the first return
+        return;
+    if (r != 0 || returns != 1)
+        abort();
+#elif defined(JL_HAVE_ASM) || defined(JL_HAVE_SIGALTSTACK) || defined(_OS_WINDOWS_)
+    if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0))
+        return;
+#else
+#error COPY_STACKS is incompatible with this platform
+#endif
     sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
-    jl_swap_fiber(&lastt->ctx, &t->ctx);
-    sanitizer_finish_switch_fiber();
+    tsan_switch_to_copyctx(&t->ctx);
+#if defined(_OS_WINDOWS_)
+    jl_setcontext(&t->ctx.copy_ctx);
+#else
+    jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
+#endif
 }
 #endif
 
 /* Rooted by the base module */
-static jl_function_t *task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
+static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL;
 
-void JL_NORETURN jl_finish_task(jl_task_t *t, jl_value_t *resultval JL_MAYBE_UNROOTED)
+void JL_NORETURN jl_finish_task(jl_task_t *t)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    JL_PROBE_RT_FINISH_TASK(ct);
     JL_SIGATOMIC_BEGIN();
-    t->result = resultval;
-    jl_gc_wb(t, t->result);
-    if (t->exception != jl_nothing)
+    if (jl_atomic_load_relaxed(&t->_isexception))
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_FAILED);
     else
         jl_atomic_store_release(&t->_state, JL_TASK_STATE_DONE);
     if (t->copy_stack) // early free of stkbuf
         t->stkbuf = NULL;
     // ensure that state is cleared
-    ptls->in_finalizer = 0;
-    ptls->in_pure_callback = 0;
-    jl_get_ptls_states()->world_age = jl_world_counter;
+    ct->ptls->in_finalizer = 0;
+    ct->ptls->in_pure_callback = 0;
+    ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
     // let the runtime know this task is dead and find a new task to run
     jl_function_t *done = jl_atomic_load_relaxed(&task_done_hook_func);
     if (done == NULL) {
@@ -213,11 +257,11 @@ void JL_NORETURN jl_finish_task(jl_task_t *t, jl_value_t *resultval JL_MAYBE_UNR
             jl_no_exc_handler(jl_current_exception());
         }
     }
-    gc_debug_critical_error();
+    jl_gc_debug_critical_error();
     abort();
 }
 
-JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
+JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid)
 {
     size_t off = 0;
 #ifndef _OS_WINDOWS_
@@ -229,24 +273,70 @@ JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid)
         off = ROOT_TASK_STACK_ADJUSTMENT;
     }
 #endif
-    *tid = -1;
-    for (int i = 0; i < jl_n_threads; i++) {
-        jl_ptls_t ptls = jl_all_tls_states[i];
-        if (ptls->current_task == task) {
-            *tid = i;
+    jl_ptls_t ptls2 = task->ptls;
+    *ptid = -1;
+    if (ptls2) {
+        *ptid = jl_atomic_load_relaxed(&task->tid);
 #ifdef COPY_STACKS
-            if (task->copy_stack) {
-                *size = ptls->stacksize;
-                return (char *)ptls->stackbase - *size;
-            }
-#endif
-            break; // continue with normal return
+        if (task->copy_stack) {
+            *size = ptls2->stacksize;
+            return (char *)ptls2->stackbase - *size;
         }
+#endif
     }
     *size = task->bufsz - off;
     return (void *)((char *)task->stkbuf + off);
 }
 
+JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
+                                       char **active_start, char **active_end,
+                                       char **total_start, char **total_end)
+{
+    if (!task->started) {
+        *total_start = *active_start = 0;
+        *total_end = *active_end = 0;
+        return;
+    }
+
+    jl_ptls_t ptls2 = task->ptls;
+    if (task->copy_stack && ptls2) {
+        *total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
+        *total_end = *active_end = (char*)ptls2->stackbase;
+    }
+    else if (task->stkbuf) {
+        *total_start = *active_start = (char*)task->stkbuf;
+#ifndef _OS_WINDOWS_
+        if (jl_all_tls_states[0]->root_task == task) {
+            // See jl_init_root_task(). The root task of the main thread
+            // has its buffer enlarged by an artificial 3000000 bytes, but
+            // that means that the start of the buffer usually points to
+            // inaccessible memory. We need to correct for this.
+            *active_start += ROOT_TASK_STACK_ADJUSTMENT;
+            *total_start += ROOT_TASK_STACK_ADJUSTMENT;
+        }
+#endif
+
+        *total_end = *active_end = (char*)task->stkbuf + task->bufsz;
+#ifdef COPY_STACKS
+        // save_stack stores the stack of an inactive task in stkbuf, and the
+        // actual number of used bytes in copy_stack.
+        if (task->copy_stack > 1)
+            *active_end = (char*)task->stkbuf + task->copy_stack;
+#endif
+    }
+    else {
+        // no stack allocated yet
+        *total_start = *active_start = 0;
+        *total_end = *active_end = 0;
+        return;
+    }
+
+    if (task == jl_current_task) {
+        // scan up to current `sp` for current thread and task
+        *active_start = (char*)jl_get_frame_addr();
+    }
+}
+
 // Marked noinline so we can consistently skip the associated frame.
 // `skip` is number of additional frames to skip.
 NOINLINE static void record_backtrace(jl_ptls_t ptls, int skip) JL_NOTSAFEPOINT
@@ -255,38 +345,33 @@ NOINLINE static void record_backtrace(jl_ptls_t ptls, int skip) JL_NOTSAFEPOINT
     ptls->bt_size = rec_backtrace(ptls->bt_data, JL_MAX_BT_SIZE, skip + 1);
 }
 
-JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel)
-{
-    _julia_init(rel);
-}
-
 JL_DLLEXPORT void jl_set_next_task(jl_task_t *task) JL_NOTSAFEPOINT
 {
-    jl_get_ptls_states()->next_task = task;
+    jl_current_task->ptls->next_task = task;
 }
 
 JL_DLLEXPORT jl_task_t *jl_get_next_task(void) JL_NOTSAFEPOINT
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->next_task)
-        return ptls->next_task;
-    return ptls->current_task;
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->next_task)
+        return ct->ptls->next_task;
+    return ct;
 }
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
 const char tsan_state_corruption[] = "TSAN state corrupted. Exiting HARD!\n";
 #endif
 
-static void ctx_switch(jl_ptls_t ptls)
+static void ctx_switch(jl_task_t *lastt)
 {
+    jl_ptls_t ptls = lastt->ptls;
     jl_task_t **pt = &ptls->next_task;
     jl_task_t *t = *pt;
-    assert(t != ptls->current_task);
-    jl_task_t *lastt = ptls->current_task;
+    assert(t != lastt);
     // none of these locks should be held across a task switch
     assert(ptls->locks.len == 0);
 
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     if (lastt->ctx.tsan_state != __tsan_get_current_fiber()) {
         // Something went really wrong - don't even assume that we can
         // use assert/abort which involve lots of signal handling that
@@ -296,22 +381,21 @@ static void ctx_switch(jl_ptls_t ptls)
     }
 #endif
 
-    int killed = lastt->_state != JL_TASK_STATE_RUNNABLE;
+    int killed = jl_atomic_load_relaxed(&lastt->_state) != JL_TASK_STATE_RUNNABLE;
     if (!t->started && !t->copy_stack) {
         // may need to allocate the stack
         if (t->stkbuf == NULL) {
-            t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
+            t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
             if (t->stkbuf == NULL) {
 #ifdef COPY_STACKS
                 // fall back to stack copying if mmap fails
                 t->copy_stack = 1;
                 t->sticky = 1;
                 t->bufsz = 0;
-#ifdef JL_TSAN_ENABLED
-                memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx) - sizeof(t->ctx.tsan_state));
-#else
-                memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx));
-#endif
+                if (always_copy_stacks)
+                    memcpy(&t->ctx.copy_ctx, &ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
+                else
+                    memcpy(&t->ctx.ctx, &ptls->base_ctx, sizeof(t->ctx.ctx));
 #else
                 jl_throw(jl_memory_exception);
 #endif
@@ -331,7 +415,7 @@ static void ctx_switch(jl_ptls_t ptls)
 #ifdef COPY_STACKS
         if (lastt->copy_stack) { // save the old copy-stack
             save_stack(ptls, lastt, pt); // allocates (gc-safepoint, and can also fail)
-            if (jl_setjmp(lastt->ctx.uc_mcontext, 0)) {
+            if (jl_setjmp(lastt->ctx.copy_ctx.uc_mcontext, 0)) {
                 sanitizer_finish_switch_fiber();
                 // TODO: mutex unlock the thread we just switched from
                 return;
@@ -340,35 +424,36 @@ static void ctx_switch(jl_ptls_t ptls)
         else
 #endif
         *pt = NULL; // can't fail after here: clear the gc-root for the target task now
-        lastt->gcstack = ptls->pgcstack;
     }
 
-    // set up global state for new task
-    lastt->world_age = ptls->world_age;
-    ptls->pgcstack = t->gcstack;
-    ptls->world_age = t->world_age;
-    t->gcstack = NULL;
+    // set up global state for new task and clear global state for old task
+    t->ptls = ptls;
+    jl_atomic_store_relaxed(&ptls->current_task, t);
+    JL_GC_PROMISE_ROOTED(t);
+    jl_signal_fence();
+    jl_set_pgcstack(&t->gcstack);
+    jl_signal_fence();
+    lastt->ptls = NULL;
 #ifdef MIGRATE_TASKS
     ptls->previous_task = lastt;
 #endif
-    ptls->current_task = t;
 
     if (t->started) {
 #ifdef COPY_STACKS
         if (t->copy_stack) {
             if (!killed && !lastt->copy_stack)
                 restore_stack2(t, ptls, lastt);
-            else if (lastt->copy_stack) {
-                tsan_switch_to_ctx(&t->ctx);
-                if (killed)
-                    tsan_destroy_ctx(ptls, &lastt->ctx);
-                restore_stack(t, ptls, NULL);     // (doesn't return)
-            }
             else {
-                tsan_switch_to_ctx(&t->ctx);
+                tsan_switch_to_copyctx(&t->ctx);
                 if (killed)
-                    tsan_destroy_ctx(ptls, &lastt->ctx);
-                restore_stack(t, ptls, (char*)1); // (doesn't return)
+                    tsan_destroy_copyctx(ptls, &lastt->ctx);
+
+                if (lastt->copy_stack) {
+                    restore_stack(t, ptls, NULL); // (doesn't return)
+                }
+                else {
+                    restore_stack(t, ptls, (char*)1); // (doesn't return)
+                }
             }
         }
         else
@@ -390,24 +475,25 @@ static void ctx_switch(jl_ptls_t ptls)
                 }
                 else {
                     jl_swap_fiber(&lastt->ctx, &t->ctx);
-                    sanitizer_finish_switch_fiber();
                 }
             }
         }
     }
     else {
         sanitizer_start_switch_fiber(t->stkbuf, t->bufsz);
-        if (always_copy_stacks) {
+        if (t->copy_stack && always_copy_stacks) {
             tsan_switch_to_ctx(&t->ctx);
             if (killed) {
                 tsan_destroy_ctx(ptls, &lastt->ctx);
             }
 #ifdef COPY_STACKS
-            jl_longjmp(ptls->base_ctx.uc_mcontext, 1);
-            abort(); // unreachable
+#if defined(_OS_WINDOWS_)
+            jl_setcontext(&t->ctx.copy_ctx);
 #else
-            abort(); // Should never happen
+            jl_longjmp(t->ctx.copy_ctx.uc_mcontext, 1);
 #endif
+#endif
+            abort(); // unreachable
         }
         else {
             if (killed) {
@@ -424,69 +510,67 @@ static void ctx_switch(jl_ptls_t ptls)
             }
             else {
                 jl_start_fiber_swap(&lastt->ctx, &t->ctx);
-                sanitizer_finish_switch_fiber();
             }
         }
     }
-}
-
-static jl_ptls_t NOINLINE refetch_ptls(void)
-{
-    return jl_get_ptls_states();
+    sanitizer_finish_switch_fiber();
 }
 
 JL_DLLEXPORT void jl_switch(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     jl_task_t *t = ptls->next_task;
-    jl_task_t *ct = ptls->current_task;
     if (t == ct) {
         return;
     }
-    if (t->_state != JL_TASK_STATE_RUNNABLE || (t->started && t->stkbuf == NULL)) {
-        ct->exception = t->exception;
-        ct->result = t->result;
-        return;
-    }
+    if (t->started && t->stkbuf == NULL)
+        jl_error("attempt to switch to exited task");
     if (ptls->in_finalizer)
         jl_error("task switch not allowed from inside gc finalizer");
     if (ptls->in_pure_callback)
         jl_error("task switch not allowed from inside staged nor pure functions");
-    if (t->sticky && jl_atomic_load_acquire(&t->tid) == -1) {
-        // manually yielding to a task
-        if (jl_atomic_compare_exchange(&t->tid, -1, ptls->tid) != -1)
-            jl_error("cannot switch to task running on another thread");
-    }
-    else if (t->tid != ptls->tid) {
+    if (!jl_set_task_tid(t, jl_atomic_load_relaxed(&ct->tid))) // manually yielding to a task
         jl_error("cannot switch to task running on another thread");
-    }
+
+    JL_PROBE_RT_PAUSE_TASK(ct);
+
+    // Store old values on the stack and reset
     sig_atomic_t defer_signal = ptls->defer_signal;
     int8_t gc_state = jl_gc_unsafe_enter(ptls);
+    int finalizers_inhibited = ptls->finalizers_inhibited;
+    ptls->finalizers_inhibited = 0;
 
 #ifdef ENABLE_TIMINGS
-    jl_timing_block_t *blk = ct->timing_stack;
+    jl_timing_block_t *blk = ptls->timing_stack;
     if (blk)
         jl_timing_block_stop(blk);
+    ptls->timing_stack = NULL;
 #endif
 
-    ctx_switch(ptls);
+    ctx_switch(ct);
 
 #ifdef MIGRATE_TASKS
-    ptls = refetch_ptls();
+    ptls = ct->ptls;
     t = ptls->previous_task;
-    assert(t->tid == ptls->tid);
+    ptls->previous_task = NULL;
+    assert(t != ct);
+    assert(jl_atomic_load_relaxed(&t->tid) == ptls->tid);
     if (!t->sticky && !t->copy_stack)
-        t->tid = -1;
-#elif defined(NDEBUG)
-    (void)refetch_ptls();
+        jl_atomic_store_release(&t->tid, -1);
 #else
-    assert(ptls == refetch_ptls());
+    assert(ptls == ct->ptls);
 #endif
 
-    ct = ptls->current_task;
+    // Pop old values back off the stack
+    assert(ct == jl_current_task &&
+           0 != ct->ptls &&
+           0 == ptls->finalizers_inhibited);
+    ptls->finalizers_inhibited = finalizers_inhibited;
 
 #ifdef ENABLE_TIMINGS
-    assert(blk == ct->timing_stack);
+    assert(ptls->timing_stack == NULL);
+    ptls->timing_stack = blk;
     if (blk)
         jl_timing_block_start(blk);
 #else
@@ -498,6 +582,8 @@ JL_DLLEXPORT void jl_switch(void)
     ptls->defer_signal = defer_signal;
     if (other_defer_signal && !defer_signal)
         jl_sigint_safepoint(ptls);
+
+    JL_PROBE_RT_RUN_TASK(ct);
 }
 
 JL_DLLEXPORT void jl_switchto(jl_task_t **pt)
@@ -508,39 +594,39 @@ JL_DLLEXPORT void jl_switchto(jl_task_t **pt)
 
 JL_DLLEXPORT JL_NORETURN void jl_no_exc_handler(jl_value_t *e)
 {
-    jl_printf(JL_STDERR, "fatal: error thrown and no exception handler available.\n");
-    jl_static_show(JL_STDERR, e);
-    jl_printf(JL_STDERR, "\n");
-    jlbacktrace();
+    // NULL exception objects are used when rethrowing. we don't have a handler to process
+    // the exception stack, so at least report the exception at the top of the stack.
+    if (!e)
+        e = jl_current_exception();
+
+    jl_printf((JL_STREAM*)STDERR_FILENO, "fatal: error thrown and no exception handler available.\n");
+    jl_static_show((JL_STREAM*)STDERR_FILENO, e);
+    jl_printf((JL_STREAM*)STDERR_FILENO, "\n");
+    jlbacktrace(); // written to STDERR_FILENO
     jl_exit(1);
 }
 
 // yield to exception handler
-static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED)
+static void JL_NORETURN throw_internal(jl_task_t *ct, jl_value_t *exception JL_MAYBE_UNROOTED)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    assert(!jl_get_safe_restore());
+    jl_ptls_t ptls = ct->ptls;
     ptls->io_wait = 0;
-    if (ptls->safe_restore)
-        jl_longjmp(*ptls->safe_restore, 1);
-    // During startup
-    if (!ptls->current_task)
-        jl_no_exc_handler(exception);
     JL_GC_PUSH1(&exception);
     jl_gc_unsafe_enter(ptls);
     if (exception) {
         // The temporary ptls->bt_data is rooted by special purpose code in the
         // GC. This exists only for the purpose of preserving bt_data until we
         // set ptls->bt_size=0 below.
-        assert(ptls->current_task);
-        jl_push_excstack(&ptls->current_task->excstack, exception,
+        jl_push_excstack(&ct->excstack, exception,
                           ptls->bt_data, ptls->bt_size);
         ptls->bt_size = 0;
     }
-    assert(ptls->current_task->excstack && ptls->current_task->excstack->top);
-    jl_handler_t *eh = ptls->current_task->eh;
+    assert(ct->excstack && ct->excstack->top);
+    jl_handler_t *eh = ct->eh;
     if (eh != NULL) {
 #ifdef ENABLE_TIMINGS
-        jl_timing_block_t *cur_block = ptls->current_task->timing_stack;
+        jl_timing_block_t *cur_block = ptls->timing_stack;
         while (cur_block && eh->timing_stack != cur_block) {
             cur_block = jl_pop_timing_block(cur_block);
         }
@@ -557,51 +643,122 @@ static void JL_NORETURN throw_internal(jl_value_t *exception JL_MAYBE_UNROOTED)
 // record backtrace and raise an error
 JL_DLLEXPORT void jl_throw(jl_value_t *e JL_MAYBE_UNROOTED)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
     assert(e != NULL);
-    if (ptls->safe_restore)
-        throw_internal(NULL);
-    record_backtrace(ptls, 1);
-    throw_internal(e);
+    jl_jmp_buf *safe_restore = jl_get_safe_restore();
+    if (safe_restore)
+        jl_longjmp(*safe_restore, 1);
+    jl_task_t *ct = jl_get_current_task();
+    if (ct == NULL) // During startup
+        jl_no_exc_handler(e);
+    JL_GC_PROMISE_ROOTED(ct);
+    record_backtrace(ct->ptls, 1);
+    throw_internal(ct, e);
 }
 
 // rethrow with current excstack state
 JL_DLLEXPORT void jl_rethrow(void)
 {
-    jl_excstack_t *excstack = jl_get_ptls_states()->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *excstack = ct->excstack;
     if (!excstack || excstack->top == 0)
         jl_error("rethrow() not allowed outside a catch block");
-    throw_internal(NULL);
+    throw_internal(ct, NULL);
 }
 
 // Special case throw for errors detected inside signal handlers.  This is not
 // (cannot be) called directly in the signal handler itself, but is returned to
 // after the signal handler exits.
-JL_DLLEXPORT void jl_sig_throw(void)
+JL_DLLEXPORT void JL_NORETURN jl_sig_throw(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+CFI_NORETURN
+    jl_jmp_buf *safe_restore = jl_get_safe_restore();
+    if (safe_restore)
+        jl_longjmp(*safe_restore, 1);
+    jl_task_t *ct = jl_current_task;
+    jl_ptls_t ptls = ct->ptls;
     jl_value_t *e = ptls->sig_exception;
     ptls->sig_exception = NULL;
-    throw_internal(e);
+    throw_internal(ct, e);
 }
 
 JL_DLLEXPORT void jl_rethrow_other(jl_value_t *e JL_MAYBE_UNROOTED)
 {
     // TODO: Should uses of `rethrow(exc)` be replaced with a normal throw, now
     // that exception stacks allow root cause analysis?
-    jl_excstack_t *excstack = jl_get_ptls_states()->current_task->excstack;
+    jl_task_t *ct = jl_current_task;
+    jl_excstack_t *excstack = ct->excstack;
     if (!excstack || excstack->top == 0)
         jl_error("rethrow(exc) not allowed outside a catch block");
     // overwrite exception on top of stack. see jl_excstack_exception
     jl_excstack_raw(excstack)[excstack->top-1].jlvalue = e;
     JL_GC_PROMISE_ROOTED(e);
-    throw_internal(NULL);
+    throw_internal(ct, NULL);
+}
+
+/* This is xoshiro256++ 1.0, used for tasklocal random number generation in Julia.
+   This implementation is intended for embedders and internal use by the runtime, and is
+   based on the reference implementation at https://prng.di.unimi.it
+
+   Credits go to David Blackman and Sebastiano Vigna for coming up with this PRNG.
+   They described xoshiro256++ in "Scrambled Linear Pseudorandom Number Generators",
+   ACM Trans. Math. Softw., 2021.
+
+   There is a pure Julia implementation in stdlib that tends to be faster when used from
+   within Julia, due to inlining and more agressive architecture-specific optimizations.
+*/
+uint64_t jl_genrandom(uint64_t rngState[4]) JL_NOTSAFEPOINT
+{
+    uint64_t s0 = rngState[0];
+    uint64_t s1 = rngState[1];
+    uint64_t s2 = rngState[2];
+    uint64_t s3 = rngState[3];
+
+    uint64_t t = s1 << 17;
+    uint64_t tmp = s0 + s3;
+    uint64_t res = ((tmp << 23) | (tmp >> 41)) + s0;
+    s2 ^= s0;
+    s3 ^= s1;
+    s1 ^= s2;
+    s0 ^= s3;
+    s2 ^= t;
+    s3 = (s3 << 45) | (s3 >> 19);
+
+    rngState[0] = s0;
+    rngState[1] = s1;
+    rngState[2] = s2;
+    rngState[3] = s3;
+    return res;
+}
+
+static void rng_split(jl_task_t *from, jl_task_t *to) JL_NOTSAFEPOINT
+{
+    /* TODO: consider a less ad-hoc construction
+       Ideally we could just use the output of the random stream to seed the initial
+       state of the child. Out of an overabundance of caution we multiply with
+       effectively random coefficients, to break possible self-interactions.
+
+       It is not the goal to mix bits -- we work under the assumption that the
+       source is well-seeded, and its output looks effectively random.
+       However, xoshiro has never been studied in the mode where we seed the
+       initial state with the output of another xoshiro instance.
+
+       Constants have nothing up their sleeve:
+       0x02011ce34bce797f == hash(UInt(1))|0x01
+       0x5a94851fb48a6e05 == hash(UInt(2))|0x01
+       0x3688cf5d48899fa7 == hash(UInt(3))|0x01
+       0x867b4bb4c42e5661 == hash(UInt(4))|0x01
+    */
+    to->rngState[0] = 0x02011ce34bce797f * jl_genrandom(from->rngState);
+    to->rngState[1] = 0x5a94851fb48a6e05 * jl_genrandom(from->rngState);
+    to->rngState[2] = 0x3688cf5d48899fa7 * jl_genrandom(from->rngState);
+    to->rngState[3] = 0x867b4bb4c42e5661 * jl_genrandom(from->rngState);
 }
 
 JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_task_t *t = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
+    jl_task_t *ct = jl_current_task;
+    jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type);
+    JL_PROBE_RT_NEW_TASK(ct, t);
     t->copy_stack = 0;
     if (ssize == 0) {
         // stack size unspecified; use default
@@ -612,81 +769,77 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
         else {
             t->bufsz = JL_STACK_SIZE;
         }
+        t->stkbuf = NULL;
     }
     else {
         // user requested dedicated stack of a certain size
         if (ssize < MINSTKSZ)
             ssize = MINSTKSZ;
         t->bufsz = ssize;
-        t->stkbuf = jl_alloc_fiber(&t->ctx, &t->bufsz, t);
+        t->stkbuf = jl_alloc_fiber(&t->ctx.ctx, &t->bufsz, t);
         if (t->stkbuf == NULL)
             jl_throw(jl_memory_exception);
     }
     t->next = jl_nothing;
     t->queue = jl_nothing;
     t->tls = jl_nothing;
-    t->_state = JL_TASK_STATE_RUNNABLE;
+    jl_atomic_store_relaxed(&t->_state, JL_TASK_STATE_RUNNABLE);
     t->start = start;
     t->result = jl_nothing;
     t->donenotify = completion_future;
-    t->exception = jl_nothing;
+    jl_atomic_store_relaxed(&t->_isexception, 0);
     // Inherit logger state from parent task
-    t->logstate = ptls->current_task->logstate;
+    t->logstate = ct->logstate;
+    // Fork task-local random state from parent
+    rng_split(ct, t);
     // there is no active exception handler available on this stack yet
     t->eh = NULL;
     t->sticky = 1;
     t->gcstack = NULL;
     t->excstack = NULL;
-    t->stkbuf = NULL;
     t->started = 0;
-    t->prio = -1;
-    t->tid = -1;
-#ifdef ENABLE_TIMINGS
-    t->timing_stack = jl_root_timing;
-#endif
+    t->priority = 0;
+    jl_atomic_store_relaxed(&t->tid, t->copy_stack ? jl_atomic_load_relaxed(&ct->tid) : -1); // copy_stacks are always pinned since they can't be moved
+    t->threadpoolid = ct->threadpoolid;
+    t->ptls = NULL;
+    t->world_age = ct->world_age;
 
+#ifdef COPY_STACKS
+    if (!t->copy_stack) {
 #if defined(JL_DEBUG_BUILD)
-    if (!t->copy_stack)
         memset(&t->ctx, 0, sizeof(t->ctx));
 #endif
-#ifdef COPY_STACKS
-    if (t->copy_stack)
-        memcpy(&t->ctx, &ptls->base_ctx, sizeof(t->ctx));
+    }
+    else {
+        if (always_copy_stacks)
+            memcpy(&t->ctx.copy_ctx, &ct->ptls->copy_stack_ctx, sizeof(t->ctx.copy_ctx));
+        else
+            memcpy(&t->ctx.ctx, &ct->ptls->base_ctx, sizeof(t->ctx.ctx));
+    }
 #endif
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     t->ctx.tsan_state = __tsan_create_fiber(0);
 #endif
     return t;
 }
 
-JL_DLLEXPORT jl_value_t *jl_get_current_task(void)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return (jl_value_t*)ptls->current_task;
-}
-
-JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return ptls->safe_restore;
-}
-
-JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
+// a version of jl_current_task safe for unmanaged threads
+JL_DLLEXPORT jl_task_t *jl_get_current_task(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->safe_restore = sr;
+    jl_gcframe_t **pgcstack = jl_get_pgcstack();
+    return pgcstack == NULL ? NULL : container_of(pgcstack, jl_task_t, gcstack);
 }
 
 #ifdef JL_HAVE_ASYNCIFY
 JL_DLLEXPORT jl_ucontext_t *task_ctx_ptr(jl_task_t *t)
 {
-    return &t->ctx;
+    return &t->ctx.ctx;
 }
 
 JL_DLLEXPORT jl_value_t *jl_get_root_task(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return (jl_value_t*)ptls->root_task;
+    jl_task_t *ct = jl_current_task;
+    return (jl_value_t*)ct->ptls->root_task;
 }
 
 JL_DLLEXPORT void jl_task_wait()
@@ -695,10 +848,11 @@ JL_DLLEXPORT void jl_task_wait()
     if (!wait_func) {
         wait_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("wait"));
     }
-    size_t last_age = jl_get_ptls_states()->world_age;
-    jl_get_ptls_states()->world_age = jl_get_world_counter();
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
     jl_apply(&wait_func, 1);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
 }
 
 JL_DLLEXPORT void jl_schedule_task(jl_task_t *task)
@@ -707,11 +861,12 @@ JL_DLLEXPORT void jl_schedule_task(jl_task_t *task)
     if (!sched_func) {
         sched_func = (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("schedule"));
     }
-    size_t last_age = jl_get_ptls_states()->world_age;
-    jl_get_ptls_states()->world_age = jl_get_world_counter();
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
+    ct->world_age = jl_get_world_counter();
     jl_value_t *args[] = {(jl_value_t*)sched_func, (jl_value_t*)task};
     jl_apply(args, 2);
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
 }
 #endif
 
@@ -725,40 +880,47 @@ void jl_init_tasks(void) JL_GC_DISABLED
         else if (!strcmp(acs, "0") || !strcmp(acs, "no"))
             always_copy_stacks = 0;
         else {
-            jl_printf(JL_STDERR, "invalid JULIA_COPY_STACKS value: %s\n", acs);
+            jl_safe_printf("invalid JULIA_COPY_STACKS value: %s\n", acs);
             exit(1);
         }
     }
+#ifndef COPY_STACKS
+    if (always_copy_stacks) {
+        jl_safe_printf("Julia built without COPY_STACKS support");
+        exit(1);
+    }
+#endif
 }
 
 STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
 {
-#ifdef _OS_WINDOWS_
-#if defined(_CPU_X86_64_)
-    // install the unhandled exception hanlder at the top of our stack
-    // to call directly into our personality handler
-    asm volatile ("\t.seh_handler __julia_personality, @except\n\t.text");
-#endif
-#endif
-
+CFI_NORETURN
     // this runs the first time we switch to a task
     sanitizer_finish_switch_fiber();
-    jl_ptls_t ptls = jl_get_ptls_states();
-    jl_task_t *t = ptls->current_task;
+#ifdef __clang_gcanalyzer__
+    jl_task_t *ct = jl_get_current_task();
+    JL_GC_PROMISE_ROOTED(ct);
+#else
+    jl_task_t *ct = jl_current_task;
+#endif
+    jl_ptls_t ptls = ct->ptls;
     jl_value_t *res;
+    assert(ptls->finalizers_inhibited == 0);
 
 #ifdef MIGRATE_TASKS
     jl_task_t *pt = ptls->previous_task;
+    ptls->previous_task = NULL;
     if (!pt->sticky && !pt->copy_stack)
-        pt->tid = -1;
+        jl_atomic_store_release(&pt->tid, -1);
 #endif
 
-    t->started = 1;
-    if (t->exception != jl_nothing) {
+    ct->started = 1;
+    JL_PROBE_RT_START_TASK(ct);
+    if (jl_atomic_load_relaxed(&ct->_isexception)) {
         record_backtrace(ptls, 0);
-        jl_push_excstack(&t->excstack, t->exception,
+        jl_push_excstack(&ct->excstack, ct->result,
                          ptls->bt_data, ptls->bt_size);
-        res = t->exception;
+        res = ct->result;
     }
     else {
         JL_TRY {
@@ -767,19 +929,19 @@ STATIC_OR_JS void NOINLINE JL_NORETURN start_task(void)
                 jl_sigint_safepoint(ptls);
             }
             JL_TIMING(ROOT);
-            ptls->world_age = jl_world_counter;
-            res = jl_apply(&t->start, 1);
+            res = jl_apply(&ct->start, 1);
         }
         JL_CATCH {
             res = jl_current_exception();
-            t->exception = res;
-            jl_gc_wb(t, res);
+            jl_atomic_store_relaxed(&ct->_isexception, 1);
             goto skip_pop_exception;
         }
 skip_pop_exception:;
     }
-    jl_finish_task(t, res);
-    gc_debug_critical_error();
+    ct->result = res;
+    jl_gc_wb(ct, ct->result);
+    jl_finish_task(ct);
+    jl_gc_debug_critical_error();
     abort();
 }
 
@@ -787,11 +949,10 @@ skip_pop_exception:;
 #if defined(JL_HAVE_UCONTEXT)
 #ifdef _OS_WINDOWS_
 #define setcontext jl_setcontext
-#define getcontext jl_getcontext
 #define swapcontext jl_swapcontext
 #define makecontext jl_makecontext
 #endif
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
 {
 #ifndef _OS_WINDOWS_
     int r = getcontext(t);
@@ -813,43 +974,81 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) J
 }
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    setcontext(t);
+    setcontext(&t->ctx);
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
     tsan_switch_to_ctx(t);
-    swapcontext(lastt, t);
+    swapcontext(&lastt->ctx, &t->ctx);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     tsan_switch_to_ctx(t);
-    swapcontext(lastt, t);
+    swapcontext(&lastt->ctx, &t->ctx);
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    setcontext(t);
+    setcontext(&t->ctx);
 }
-static void jl_init_basefiber(size_t ssize)
+#endif
+
+#if defined(JL_HAVE_UNW_CONTEXT) || defined(JL_HAVE_ASM)
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
-    ptls->stackbase = stkbuf + ssize;
-    ptls->stacksize = ssize;
+    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
+    if (stkbuf == NULL)
+        return NULL;
+#ifndef __clang_gcanalyzer__
+    ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
+    ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
+#endif
+    return stkbuf;
 }
 #endif
 
 #if defined(JL_HAVE_UNW_CONTEXT)
-static void start_basefiber(void)
+static inline void jl_unw_swapcontext(unw_context_t *old, unw_cursor_t *c)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
-        start_task(); // sanitizer_finish_switch_fiber is part of start_task
-    sanitizer_start_switch_fiber(jl_root_task->stkbuf, jl_root_task->bufsz);
-    tsan_switch_to_ctx(&jl_root_task->ctx);
-    jl_longjmp(jl_root_task->ctx.uc_mcontext, 1);
-    abort(); // unreachable
+    volatile int returns = 0;
+    int r = unw_getcontext(old);
+    if (++returns == 2) // r is garbage after the first return
+        return;
+    if (r != 0 || returns != 1)
+        abort();
+    unw_resume(c);
+}
+static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
+{
+    unw_cursor_t c;
+    int r = unw_init_local(&c, &t->ctx);
+    if (r < 0)
+        abort();
+    jl_unw_swapcontext(&lastt->ctx, &c);
+}
+static void jl_set_fiber(jl_ucontext_t *t)
+{
+    unw_cursor_t c;
+    int r = unw_init_local(&c, &t->ctx);
+    if (r < 0)
+        abort();
+    unw_resume(&c);
+}
+#elif defined(JL_HAVE_ASM)
+static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
+{
+    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+        return;
+    tsan_switch_to_ctx(t);
+    jl_set_fiber(t); // doesn't return
+}
+static void jl_set_fiber(jl_ucontext_t *t)
+{
+    jl_longjmp(t->ctx.uc_mcontext, 1);
 }
+#endif
+
+#if defined(JL_HAVE_UNW_CONTEXT) && !defined(JL_HAVE_ASM)
 #if defined(_CPU_X86_) || defined(_CPU_X86_64_)
 #define PUSH_RET(ctx, stk) \
     do { \
@@ -858,98 +1057,83 @@ static void start_basefiber(void)
     } while (0)
 #elif defined(_CPU_ARM_)
 #define PUSH_RET(ctx, stk) \
-    unw_set_reg(ctx, UNW_ARM_R14, 0) /* put NULL into the LR */
+    if (unw_set_reg(ctx, UNW_ARM_R14, 0)) /* put NULL into the LR */ \
+        abort();
 #else
 #error please define how to simulate a CALL on this platform
 #endif
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
-{
-    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
-    if (stkbuf == NULL)
-        return NULL;
-    char *stk = stkbuf;
-    stk += *ssize;
-    PUSH_RET(&jl_basecursor, stk);
-    if (unw_set_reg(&jl_basecursor, UNW_REG_SP, (uintptr_t)stk) != 0) {
-        jl_free_stack((void*)stkbuf, *ssize);
-        jl_error("unw_set_reg UNW_REG_SP failed");
-    }
-    uintptr_t fn;
-    if (t == &ptls->base_ctx)
-        fn = (uintptr_t)&start_basefiber;
-    else
-        fn = (uintptr_t)&start_task;
-    if (unw_set_reg(&jl_basecursor, UNW_REG_IP, fn) != 0) {
-        jl_free_stack((void*)stkbuf, *ssize);
-        jl_error("unw_set_reg UNW_REG_IP failed");
-    }
-    return stkbuf;
-}
-
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-    unw_resume(&jl_basecursor); // (doesn't return)
+    unw_cursor_t c;
+    char *stk = ((char**)&t->ctx)[0];
+    size_t ssize = ((size_t*)&t->ctx)[1];
+    uintptr_t fn = (uintptr_t)&start_task;
+    stk += ssize;
+    int r = unw_getcontext(&t->ctx);
+    if (r)
+        abort();
+    if (unw_init_local(&c, &t->ctx))
+        abort();
+    PUSH_RET(&c, stk);
+#if defined __linux__
+#error savannah nongnu libunwind is incapable of setting UNW_REG_SP, as required
+#endif
+    if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
+        abort();
+    if (unw_set_reg(&c, UNW_REG_IP, fn))
+        abort();
+    unw_resume(&c); // (doesn't return)
 }
-
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
     assert(lastt);
-    if (jl_setjmp(lastt->uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t); // doesn't return
-}
-
-static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    if (jl_setjmp(lastt->uc_mcontext, 0))
+    unw_cursor_t c;
+    char *stk = ((char**)&t->ctx)[0];
+    size_t ssize = ((size_t*)&t->ctx)[1];
+    uintptr_t fn = (uintptr_t)&start_task;
+    stk += ssize;
+    volatile int returns = 0;
+    int r = unw_getcontext(&lastt->ctx);
+    if (++returns == 2) // r is garbage after the first return
         return;
-    tsan_switch_to_ctx(t);
-    jl_longjmp(t->uc_mcontext, 1); // (doesn't return)
-}
-static void jl_set_fiber(jl_ucontext_t *t)
-{
-    tsan_switch_to_ctx(t);
-    jl_longjmp(t->uc_mcontext, 1);
-}
-static void jl_init_basefiber(size_t ssize)
-{
-    int r = unw_getcontext(&ptls->base_ctx);
+    if (r != 0 || returns != 1)
+        abort();
+    r = unw_getcontext(&t->ctx);
     if (r != 0)
-        jl_error("unw_getcontext failed");
-    r = unw_init_local(&jl_basecursor, &ptls->base_ctx);
-    if (r != 0)
-        jl_error("unw_init_local failed");
-#ifdef COPY_STACKS
-    jl_ptls_t ptls = jl_get_ptls_states();
-    char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
-    ptls->stackbase = stkbuf + ssize;
-    ptls->stacksize = ssize;
-    sanitizer_start_switch_fiber(stkbuf, sksize);
-    jl_start_fiber_swap(jl_root_task, &ptls->base_ctx); // finishes initializing jl_basectx
-    sanitizer_finish_switch_fiber();
-#endif
+        abort();
+    if (unw_init_local(&c, &t->ctx))
+        abort();
+    PUSH_RET(&c, stk);
+    if (unw_set_reg(&c, UNW_REG_SP, (uintptr_t)stk))
+        abort();
+    if (unw_set_reg(&c, UNW_REG_IP, fn))
+        abort();
+    jl_unw_swapcontext(&lastt->ctx, &c);
 }
 #endif
 
 #if defined(JL_HAVE_ASM)
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
+static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    char *stkbuf = (char*)jl_malloc_stack(ssize, owner);
-    if (stkbuf == NULL)
-        return NULL;
-#ifndef __clang_analyzer__
-    ((char**)t)[0] = stkbuf; // stash the stack pointer somewhere for start_fiber
-    ((size_t*)t)[1] = *ssize; // stash the stack size somewhere for start_fiber
+    assert(lastt);
+#ifdef JL_HAVE_UNW_CONTEXT
+    volatile int returns = 0;
+    int r = unw_getcontext(&lastt->ctx);
+    if (++returns == 2) // r is garbage after the first return
+        return;
+    if (r != 0 || returns != 1)
+        abort();
+#else
+    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
+        return;
 #endif
-    return stkbuf;
+    tsan_switch_to_ctx(t);
+    jl_start_fiber_set(t); // doesn't return
 }
-
 static void jl_start_fiber_set(jl_ucontext_t *t)
 {
-
-    char *stk = ((char**)t)[0];
-    size_t ssize = ((size_t*)t)[1];
+    char *stk = ((char**)&t->ctx)[0];
+    size_t ssize = ((size_t*)&t->ctx)[1];
     uintptr_t fn = (uintptr_t)&start_task;
     stk += ssize;
 #ifdef _CPU_X86_64_
@@ -1016,50 +1200,20 @@ static void jl_start_fiber_set(jl_ucontext_t *t)
 #endif
     __builtin_unreachable();
 }
-
-static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    assert(lastt);
-    if (jl_setjmp(lastt->uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_start_fiber_set(t);
-}
-
-static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
-{
-    if (jl_setjmp(lastt->uc_mcontext, 0))
-        return;
-    tsan_switch_to_ctx(t);
-    jl_longjmp(t->uc_mcontext, 1); // (doesn't return)
-}
-static void jl_set_fiber(jl_ucontext_t *t)
-{
-    jl_longjmp(t->uc_mcontext, 1);
-}
-static void jl_init_basefiber(size_t ssize)
-{
-#ifdef COPY_STACKS
-    jl_ptls_t ptls = jl_get_ptls_states();
-    char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
-    ptls->stackbase = stkbuf + ssize;
-    ptls->stacksize = ssize;
-#endif
-}
 #endif
 
 #if defined(JL_HAVE_SIGALTSTACK)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN support not currently implemented for this tasking model
 #endif
 
-static void start_basefiber(void)
+static void start_basefiber(int sig)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_ptls_t ptls = jl_current_task->ptls;
     if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
         start_task(); // sanitizer_finish_switch_fiber is part of start_task
 }
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
 {
     stack_t uc_stack, osigstk;
     struct sigaction sa, osa;
@@ -1068,8 +1222,9 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
     if (stk == NULL)
         return NULL;
     // setup
-    jl_ucontext_t base_ctx;
-    memcpy(&base_ctx, &ptls->base_ctx, sizeof(ptls->base_ctx));
+    jl_ptls_t ptls = jl_current_task->ptls;
+    _jl_ucontext_t base_ctx;
+    memcpy(&base_ctx, &ptls->base_ctx, sizeof(base_ctx));
     sigfillset(&set);
     if (sigprocmask(SIG_BLOCK, &set, &oset) != 0) {
        jl_free_stack(stk, *ssize);
@@ -1109,50 +1264,42 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner)
        jl_free_stack(stk, *ssize);
        jl_error("sigprocmask failed");
     }
-    memcpy(&t, &ptls->base_ctx, sizeof(ptls->base_ctx));
-    memcpy(&ptls->base_ctx, &base_ctx, sizeof(ptls->base_ctx));
+    if (&ptls->base_ctx != t) {
+        memcpy(&t, &ptls->base_ctx, sizeof(base_ctx));
+        memcpy(&ptls->base_ctx, &base_ctx, sizeof(base_ctx)); // restore COPY_STACKS context
+    }
     return (char*)stk;
 }
 static void jl_start_fiber_set(jl_ucontext_t *t) {
-    jl_longjmp(t->uc_mcontext, 1); // (doesn't return)
+    jl_longjmp(t->ctx.uc_mcontext, 1); // (doesn't return)
 }
 static void jl_start_fiber_swap(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (lastt && jl_setjmp(lastt->uc_mcontext, 0))
+    assert(lastt);
+    if (lastt && jl_setjmp(lastt->ctx.uc_mcontext, 0))
         return;
+    tsan_switch_to_ctx(t);
     jl_start_fiber_set(t);
 }
 static void jl_swap_fiber(jl_ucontext_t *lastt, jl_ucontext_t *t)
 {
-    if (jl_setjmp(lastt->uc_mcontext, 0))
+    if (jl_setjmp(lastt->ctx.uc_mcontext, 0))
         return;
-    jl_longjmp(t->uc_mcontext, 1); // (doesn't return)
+    tsan_switch_to_ctx(t);
+    jl_start_fiber_set(t); // doesn't return
 }
 static void jl_set_fiber(jl_ucontext_t *t)
 {
-    jl_longjmp(t->uc_mcontext, 1);
-}
-static void jl_init_basefiber(size_t ssize)
-{
-#ifdef COPY_STACKS
-    jl_ptls_t ptls = jl_get_ptls_states();
-    char *stkbuf = jl_alloc_fiber(jl_root_task, &ssize, NULL);
-    ptls->stackbase = stkbuf + ssize;
-    ptls->stacksize = ssize;
-    memcpy(&ptls->base_ctx, &jl_root_task->ctx, sizeof(ptls->base_ctx));
-#endif
+    jl_longjmp(t->ctx.uc_mcontext, 1);
 }
 #endif
 
 #if defined(JL_HAVE_ASYNCIFY)
-#if defined(JL_TSAN_ENABLED)
+#if defined(_COMPILER_TSAN_ENABLED_)
 #error TSAN support not currently implemented for this tasking model
 #endif
 
-static void jl_init_basefiber(size_t ssize)
-{
-}
-static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
+static char *jl_alloc_fiber(_jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) JL_NOTSAFEPOINT
 {
     void *stk = jl_malloc_stack(ssize, owner);
     if (stk == NULL)
@@ -1165,15 +1312,24 @@ static char *jl_alloc_fiber(jl_ucontext_t *t, size_t *ssize, jl_task_t *owner) J
 #endif
 
 // Initialize a root task using the given stack.
-void jl_init_root_task(void *stack_lo, void *stack_hi)
+jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->root_task == NULL) {
-        ptls->root_task = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
-        memset(ptls->root_task, 0, sizeof(jl_task_t));
-        ptls->root_task->tls = jl_nothing;
-    }
-    ptls->current_task = ptls->root_task;
+    assert(ptls->root_task == NULL);
+    // We need `gcstack` in `Task` to allocate Julia objects; *including* the `Task` type.
+    // However, to allocate a `Task` via `jl_gc_alloc` as done in `jl_init_root_task`,
+    // we need the `Task` type itself. We use stack-allocated "raw" `jl_task_t` struct to
+    // workaround this chicken-and-egg problem. Note that this relies on GC to be turned
+    // off as GC fails because we don't/can't allocate the type tag.
+    struct {
+        jl_value_t *type;
+        jl_task_t value;
+    } bootstrap_task = {0};
+    jl_set_pgcstack(&bootstrap_task.value.gcstack);
+    bootstrap_task.value.ptls = ptls;
+    if (jl_nothing == NULL) // make a placeholder
+        jl_nothing = jl_gc_permobj(0, jl_nothing_type);
+    jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type);
+    memset(ct, 0, sizeof(jl_task_t));
     void *stack = stack_lo;
     size_t ssize = (char*)stack_hi - (char*)stack_lo;
 #ifndef _OS_WINDOWS_
@@ -1183,45 +1339,64 @@ void jl_init_root_task(void *stack_lo, void *stack_hi)
     }
 #endif
     if (always_copy_stacks) {
-        ptls->current_task->copy_stack = 1;
-        ptls->current_task->stkbuf = NULL;
-        ptls->current_task->bufsz = 0;
+        ct->copy_stack = 1;
+        ct->stkbuf = NULL;
+        ct->bufsz = 0;
     }
     else {
-        ptls->current_task->copy_stack = 0;
-        ptls->current_task->stkbuf = stack;
-        ptls->current_task->bufsz = ssize;
+        ct->copy_stack = 0;
+        ct->stkbuf = stack;
+        ct->bufsz = ssize;
     }
-    ptls->current_task->started = 1;
-    ptls->current_task->next = jl_nothing;
-    ptls->current_task->queue = jl_nothing;
-    ptls->current_task->_state = JL_TASK_STATE_RUNNABLE;
-    ptls->current_task->start = NULL;
-    ptls->current_task->result = jl_nothing;
-    ptls->current_task->donenotify = jl_nothing;
-    ptls->current_task->exception = jl_nothing;
-    ptls->current_task->logstate = jl_nothing;
-    ptls->current_task->eh = NULL;
-    ptls->current_task->gcstack = NULL;
-    ptls->current_task->excstack = NULL;
-    ptls->current_task->tid = ptls->tid;
-    ptls->current_task->sticky = 1;
-
-#ifdef JL_TSAN_ENABLED
-    ptls->current_task->ctx.tsan_state = __tsan_get_current_fiber();
+    ct->started = 1;
+    ct->next = jl_nothing;
+    ct->queue = jl_nothing;
+    ct->tls = jl_nothing;
+    jl_atomic_store_relaxed(&ct->_state, JL_TASK_STATE_RUNNABLE);
+    ct->start = NULL;
+    ct->result = jl_nothing;
+    ct->donenotify = jl_nothing;
+    jl_atomic_store_relaxed(&ct->_isexception, 0);
+    ct->logstate = jl_nothing;
+    ct->eh = NULL;
+    ct->gcstack = NULL;
+    ct->excstack = NULL;
+    jl_atomic_store_relaxed(&ct->tid, ptls->tid);
+    ct->threadpoolid = jl_threadpoolid(ptls->tid);
+    ct->sticky = 1;
+    ct->ptls = ptls;
+    ct->world_age = 1; // OK to run Julia code on this task
+    ptls->root_task = ct;
+    jl_atomic_store_relaxed(&ptls->current_task, ct);
+    JL_GC_PROMISE_ROOTED(ct);
+    jl_set_pgcstack(&ct->gcstack);
+    assert(jl_current_task == ct);
+
+#ifdef _COMPILER_TSAN_ENABLED_
+    ct->ctx.tsan_state = __tsan_get_current_fiber();
 #endif
 
 #ifdef COPY_STACKS
+    // initialize the base_ctx from which all future copy_stacks will be copies
     if (always_copy_stacks) {
+        // when this is set, we will attempt to corrupt the process stack to switch tasks,
+        // although this is unreliable, and thus not recommended
         ptls->stackbase = stack_hi;
         ptls->stacksize = ssize;
-        if (jl_setjmp(ptls->base_ctx.uc_mcontext, 0))
+#ifdef _OS_WINDOWS_
+        ptls->copy_stack_ctx.uc_stack.ss_sp = stack_hi;
+        ptls->copy_stack_ctx.uc_stack.ss_size = ssize;
+#endif
+        if (jl_setjmp(ptls->copy_stack_ctx.uc_mcontext, 0))
             start_task(); // sanitizer_finish_switch_fiber is part of start_task
-        return;
+        return ct;
     }
+    ssize = JL_STACK_SIZE;
+    char *stkbuf = jl_alloc_fiber(&ptls->base_ctx, &ssize, NULL);
+    ptls->stackbase = stkbuf + ssize;
+    ptls->stacksize = ssize;
 #endif
-
-    jl_init_basefiber(JL_STACK_SIZE);
+    return ct;
 }
 
 JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
@@ -1231,7 +1406,12 @@ JL_DLLEXPORT int jl_is_task_started(jl_task_t *t) JL_NOTSAFEPOINT
 
 JL_DLLEXPORT int16_t jl_get_task_tid(jl_task_t *t) JL_NOTSAFEPOINT
 {
-    return t->tid;
+    return jl_atomic_load_relaxed(&t->tid);
+}
+
+JL_DLLEXPORT int8_t jl_get_task_threadpoolid(jl_task_t *t)
+{
+    return t->threadpoolid;
 }
 
 
@@ -1278,6 +1458,7 @@ JL_DLLEXPORT void jl_gdb_dump_threadinfo(void)
 }
 #endif
 
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/threading.c b/src/threading.c
index 5fd5c2fa59cd83..4464406d21a76e 100644
--- a/src/threading.c
+++ b/src/threading.c
@@ -36,6 +36,32 @@ extern "C" {
 
 #include "threading.h"
 
+JL_DLLEXPORT void *jl_get_ptls_states(void)
+{
+    // mostly deprecated: use current_task instead
+    return jl_current_task->ptls;
+}
+
+#if !defined(_OS_WINDOWS_)
+static pthread_key_t jl_safe_restore_key;
+
+__attribute__((constructor)) void _jl_init_safe_restore(void)
+{
+    pthread_key_create(&jl_safe_restore_key, NULL);
+}
+
+JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
+{
+    return (jl_jmp_buf*)pthread_getspecific(jl_safe_restore_key);
+}
+
+JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
+{
+    pthread_setspecific(jl_safe_restore_key, (void*)sr);
+}
+#endif
+
+
 // The tls_states buffer:
 //
 // On platforms that do not use ELF (i.e. where `__thread` is emulated with
@@ -53,39 +79,42 @@ extern "C" {
 // Mac doesn't seem to have static TLS model so the runtime TLS getter
 // registration will only add overhead to TLS access. The `__thread` variables
 // are emulated with `pthread_key_t` so it is actually faster to use it directly.
-static pthread_key_t jl_tls_key;
+static pthread_key_t jl_pgcstack_key;
 
-__attribute__((constructor)) void jl_mac_init_tls(void)
+__attribute__((constructor)) void jl_init_tls(void)
 {
-    pthread_key_create(&jl_tls_key, NULL);
+    pthread_key_create(&jl_pgcstack_key, NULL);
 }
 
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
+JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
 {
-    void *ptls = pthread_getspecific(jl_tls_key);
-    if (__unlikely(!ptls)) {
-        ptls = calloc(1, sizeof(jl_tls_states_t));
-        pthread_setspecific(jl_tls_key, ptls);
-    }
-    return (jl_ptls_t)ptls;
+    return (jl_gcframe_t**)pthread_getspecific(jl_pgcstack_key);
 }
 
-// This is only used after the tls is already initialized on the thread
-static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_fast(void) JL_NOTSAFEPOINT
+void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
 {
-    return (jl_ptls_t)pthread_getspecific(jl_tls_key);
+    pthread_setspecific(jl_pgcstack_key, (void*)pgcstack);
 }
 
-jl_get_ptls_states_func jl_get_ptls_states_getter(void)
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, pthread_key_t *k)
 {
     // for codegen
-    return &jl_get_ptls_states_fast;
+    *f = pthread_getspecific;
+    *k = jl_pgcstack_key;
 }
+
+
+JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, pthread_key_t k)
+{
+    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
+}
+
 #elif defined(_OS_WINDOWS_)
 // Apparently windows doesn't have a static TLS model (or one that can be
 // reliably used from a shared library) either..... Use `TLSAlloc` instead.
 
-static DWORD jl_tls_key;
+static DWORD jl_pgcstack_key;
+static DWORD jl_safe_restore_key;
 
 // Put this here for now. We can move this out later if we find more use for it.
 BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason,
@@ -93,167 +122,199 @@ BOOLEAN WINAPI DllMain(IN HINSTANCE hDllHandle, IN DWORD nReason,
 {
     switch (nReason) {
     case DLL_PROCESS_ATTACH:
-        jl_tls_key = TlsAlloc();
-        assert(jl_tls_key != TLS_OUT_OF_INDEXES);
+        jl_pgcstack_key = TlsAlloc();
+        assert(jl_pgcstack_key != TLS_OUT_OF_INDEXES);
+        jl_safe_restore_key = TlsAlloc();
+        assert(jl_safe_restore_key != TLS_OUT_OF_INDEXES);
         // Fall through
     case DLL_THREAD_ATTACH:
-        TlsSetValue(jl_tls_key, calloc(1, sizeof(jl_tls_states_t)));
         break;
     case DLL_THREAD_DETACH:
-        free(TlsGetValue(jl_tls_key));
-        TlsSetValue(jl_tls_key, NULL);
         break;
     case DLL_PROCESS_DETACH:
-        free(TlsGetValue(jl_tls_key));
-        TlsFree(jl_tls_key);
+        TlsFree(jl_pgcstack_key);
+        TlsFree(jl_safe_restore_key);
         break;
     }
     return 1; // success
 }
 
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
-{
 #if defined(_CPU_X86_64_)
-    DWORD *plast_error = (DWORD*)(__readgsqword(0x30) + 0x68);
-    DWORD last_error = *plast_error;
+#define SAVE_ERRNO \
+    DWORD *plast_error = (DWORD*)(__readgsqword(0x30) + 0x68); \
+    DWORD last_error = *plast_error
+#define LOAD_ERRNO \
+    *plast_error = last_error
 #elif defined(_CPU_X86_)
-    DWORD *plast_error = (DWORD*)(__readfsdword(0x18) + 0x34);
-    DWORD last_error = *plast_error;
+#define SAVE_ERRNO \
+    DWORD *plast_error = (DWORD*)(__readfsdword(0x18) + 0x34); \
+    DWORD last_error = *plast_error
+#define LOAD_ERRNO \
+    *plast_error = last_error
 #else
-    DWORD last_error = GetLastError();
+#define SAVE_ERRNO \
+    DWORD last_error = GetLastError()
+#define LOAD_ERRNO \
+    SetLastError(last_error)
 #endif
-    jl_ptls_t state = (jl_ptls_t)TlsGetValue(jl_tls_key);
-#if defined(_CPU_X86_64_)
-    *plast_error = last_error;
-#elif defined(_CPU_X86_)
-    *plast_error = last_error;
-#else
-    SetLastError(last_error);
-#endif
-    return state;
+
+JL_DLLEXPORT jl_jmp_buf *jl_get_safe_restore(void)
+{
+    SAVE_ERRNO;
+    jl_jmp_buf *sr = (jl_jmp_buf*)TlsGetValue(jl_safe_restore_key);
+    LOAD_ERRNO;
+    return sr;
+}
+
+JL_DLLEXPORT void jl_set_safe_restore(jl_jmp_buf *sr)
+{
+    SAVE_ERRNO;
+    TlsSetValue(jl_safe_restore_key, (void*)sr);
+    LOAD_ERRNO;
+}
+
+JL_CONST_FUNC jl_gcframe_t **jl_get_pgcstack(void) JL_NOTSAFEPOINT
+{
+    SAVE_ERRNO;
+    jl_gcframe_t **pgcstack = (jl_gcframe_t**)TlsGetValue(jl_pgcstack_key);
+    LOAD_ERRNO;
+    return pgcstack;
+}
+
+void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
+{
+    // n.b.: this smashes GetLastError
+    TlsSetValue(jl_pgcstack_key, (void*)pgcstack);
 }
 
-jl_get_ptls_states_func jl_get_ptls_states_getter(void)
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, DWORD *k)
 {
     // for codegen
-    return &jl_get_ptls_states;
+    *f = jl_get_pgcstack;
+    *k = jl_pgcstack_key;
 }
+
+JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, DWORD k)
+{
+    jl_safe_printf("ERROR: Attempt to change TLS address.\n");
+}
+
+
 #else
 // We use the faster static version in the main executable to replace
 // the slower version in the shared object. The code in different libraries
 // or executables, however, have to agree on which version to use.
-// The general solution is to add one more indirection in the C entry point
-// (see `jl_get_ptls_states_wrapper`).
+// The general solution is to add one more indirection in the C entry point.
 //
 // When `ifunc` is available, we can use it to trick the linker to use the
-// real address (`jl_get_ptls_states_static`) directly as the symbol address.
-// (see `jl_get_ptls_states_resolve`).
+// real address (`jl_get_pgcstack_static`) directly as the symbol address.
 //
 // However, since the detection of the static version in `ifunc`
 // is not guaranteed to be reliable, we still need to fallback to the wrapper
 // version as the symbol address if we didn't find the static version in `ifunc`.
 
 // fallback provided for embedding
-static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_fallback(void)
+static jl_pgcstack_key_t jl_pgcstack_key;
+static __thread jl_gcframe_t **pgcstack_;
+static jl_gcframe_t **jl_get_pgcstack_fallback(void) JL_NOTSAFEPOINT
 {
-    static __thread jl_tls_states_t tls_states;
-    return &tls_states;
+    return pgcstack_;
+}
+static jl_gcframe_t ***jl_pgcstack_addr_fallback(void) JL_NOTSAFEPOINT
+{
+    return &pgcstack_;
+}
+void jl_set_pgcstack(jl_gcframe_t **pgcstack) JL_NOTSAFEPOINT
+{
+    *jl_pgcstack_key() = pgcstack;
 }
 #  if JL_USE_IFUNC
-JL_DLLEXPORT JL_CONST_FUNC __attribute__((weak))
-jl_ptls_t jl_get_ptls_states_static(void);
+JL_DLLEXPORT __attribute__((weak))
+void jl_register_pgcstack_getter(void);
 #  endif
-static jl_ptls_t jl_get_ptls_states_init(void);
-static jl_get_ptls_states_func jl_tls_states_cb = jl_get_ptls_states_init;
-static jl_ptls_t jl_get_ptls_states_init(void)
+static jl_gcframe_t **jl_get_pgcstack_init(void);
+static jl_get_pgcstack_func *jl_get_pgcstack_cb = jl_get_pgcstack_init;
+static jl_gcframe_t **jl_get_pgcstack_init(void)
 {
     // This 2-step initialization is used to detect calling
-    // `jl_set_ptls_states_getter` after the address of the TLS variables
+    // `jl_pgcstack_getkey` after the address of the TLS variables
     // are used. Since the address of TLS variables should be constant,
     // changing the getter address can result in weird crashes.
 
     // This is clearly not thread safe but should be fine since we
     // make sure the tls states callback is finalized before adding
     // multiple threads
-    jl_get_ptls_states_func cb = jl_get_ptls_states_fallback;
 #  if JL_USE_IFUNC
-    if (jl_get_ptls_states_static)
-        cb = jl_get_ptls_states_static;
+    if (jl_register_pgcstack_getter)
+        jl_register_pgcstack_getter();
+    else
 #  endif
-    jl_tls_states_cb = cb;
-    return cb();
-}
-
-static JL_CONST_FUNC jl_ptls_t jl_get_ptls_states_wrapper(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT
-{
-#ifndef __clang_analyzer__
-    return (*jl_tls_states_cb)();
-#endif
+    {
+        jl_get_pgcstack_cb = jl_get_pgcstack_fallback;
+        jl_pgcstack_key = &jl_pgcstack_addr_fallback;
+    }
+    return jl_get_pgcstack_cb();
 }
 
-JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f)
+JL_DLLEXPORT void jl_pgcstack_setkey(jl_get_pgcstack_func *f, jl_pgcstack_key_t k)
 {
-    if (f == jl_tls_states_cb || !f)
+    if (f == jl_get_pgcstack_cb || !f)
         return;
     // only allow setting this once
-    if (jl_tls_states_cb == jl_get_ptls_states_init) {
-        jl_tls_states_cb = f;
-    }
-    else {
+    if (jl_get_pgcstack_cb != jl_get_pgcstack_init) {
         jl_safe_printf("ERROR: Attempt to change TLS address.\n");
         exit(1);
     }
+    jl_get_pgcstack_cb = f;
+    jl_pgcstack_key = k;
 }
 
-#  if JL_USE_IFUNC
-static jl_get_ptls_states_func jl_get_ptls_states_resolve(void)
+JL_DLLEXPORT jl_gcframe_t **jl_get_pgcstack(void) JL_GLOBALLY_ROOTED
 {
-    if (jl_tls_states_cb != jl_get_ptls_states_init)
-        return jl_tls_states_cb;
-    // If we can't find the static version, return the wrapper instead
-    // of the slow version so that we won't resolve to the slow version
-    // due to issues in the relocation order.
-    // This may not be necessary once `ifunc` support in glibc is more mature.
-    if (!jl_get_ptls_states_static)
-        return jl_get_ptls_states_wrapper;
-    jl_tls_states_cb = jl_get_ptls_states_static;
-    return jl_tls_states_cb;
+#ifndef __clang_gcanalyzer__
+    return jl_get_pgcstack_cb();
+#endif
 }
 
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
-    __attribute__((ifunc ("jl_get_ptls_states_resolve")));
-#  else // JL_TLS_USE_IFUNC
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED
+void jl_pgcstack_getkey(jl_get_pgcstack_func **f, jl_pgcstack_key_t *k)
 {
-    return jl_get_ptls_states_wrapper();
-}
-#  endif // JL_TLS_USE_IFUNC
-jl_get_ptls_states_func jl_get_ptls_states_getter(void)
-{
-    if (jl_tls_states_cb == jl_get_ptls_states_init)
-        jl_get_ptls_states_init();
+    if (jl_get_pgcstack_cb == jl_get_pgcstack_init)
+        jl_get_pgcstack_init();
     // for codegen
-    return jl_tls_states_cb;
+    *f = jl_get_pgcstack_cb;
+    *k = jl_pgcstack_key;
 }
 #endif
 
-JL_DLLEXPORT int jl_n_threads;
 jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED;
+JL_DLLEXPORT _Atomic(uint8_t) jl_measure_compile_time_enabled = 0;
+JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_compile_time = 0;
+JL_DLLEXPORT _Atomic(uint64_t) jl_cumulative_recompile_time = 0;
 
 // return calling thread's ID
-// Also update the suspended_threads list in signals-mach when changing the
-// type of the thread id.
 JL_DLLEXPORT int16_t jl_threadid(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    return ptls->tid;
+    return jl_atomic_load_relaxed(&jl_current_task->tid);
+}
+
+JL_DLLEXPORT int8_t jl_threadpoolid(int16_t tid) JL_NOTSAFEPOINT
+{
+    if (tid < 0 || tid >= jl_n_threads)
+        jl_error("invalid tid");
+    int n = 0;
+    for (int i = 0; i < jl_n_threadpools; i++) {
+        n += jl_n_threads_per_pool[i];
+        if (tid < n)
+            return (int8_t)i;
+    }
+    jl_error("internal error: couldn't determine threadpool id");
 }
 
-void jl_init_threadtls(int16_t tid)
+jl_ptls_t jl_init_threadtls(int16_t tid)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->system_id = jl_thread_self();
-    seed_cong(&ptls->rngseed);
+    jl_ptls_t ptls = (jl_ptls_t)calloc(1, sizeof(jl_tls_states_t));
+    ptls->system_id = (jl_thread_t)(uintptr_t)uv_thread_self();
+    ptls->rngseed = jl_rand();
 #ifdef _OS_WINDOWS_
     if (tid == 0) {
         if (!DuplicateHandle(GetCurrentProcess(), GetCurrentThread(),
@@ -264,11 +325,8 @@ void jl_init_threadtls(int16_t tid)
         }
     }
 #endif
-    assert(ptls->world_age == 0);
-    ptls->world_age = 1; // OK to run Julia code on this thread
     ptls->tid = tid;
-    ptls->pgcstack = NULL;
-    ptls->gc_state = 0; // GC unsafe
+    jl_atomic_store_relaxed(&ptls->gc_state, 0); // GC unsafe
     // Conditionally initialize the safepoint address. See comment in
     // `safepoint.c`
     if (tid == 0) {
@@ -278,32 +336,25 @@ void jl_init_threadtls(int16_t tid)
         ptls->safepoint = (size_t*)(jl_safepoint_pages + jl_page_size * 2 +
                                     sizeof(size_t));
     }
-    ptls->defer_signal = 0;
     jl_bt_element_t *bt_data = (jl_bt_element_t*)
         malloc_s(sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
     memset(bt_data, 0, sizeof(jl_bt_element_t) * (JL_MAX_BT_SIZE + 1));
     ptls->bt_data = bt_data;
-    ptls->sig_exception = NULL;
-    ptls->previous_exception = NULL;
-    ptls->next_task = NULL;
-#ifdef _OS_WINDOWS_
-    ptls->needs_resetstkoflw = 0;
-#endif
     small_arraylist_new(&ptls->locks, 0);
     jl_init_thread_heap(ptls);
-    jl_install_thread_signal_handler(ptls);
 
     jl_all_tls_states[tid] = ptls;
+
+    return ptls;
 }
 
-// lock for code generation
-jl_mutex_t codegen_lock;
+JL_DLLEXPORT jl_mutex_t jl_codegen_lock;
 jl_mutex_t typecache_lock;
 
-ssize_t jl_tls_offset = -1;
+JL_DLLEXPORT ssize_t jl_tls_offset = -1;
 
 #ifdef JL_ELF_TLS_VARIANT
-const int jl_tls_elf_support = 1;
+JL_DLLEXPORT const int jl_tls_elf_support = 1;
 // Optimize TLS access in codegen if the TLS buffer is using a IE or LE model.
 // To detect such case, we find the size of the TLS segment in the main
 // executable and the thread pointer (TP) and then see if the TLS pointer on the
@@ -318,11 +369,11 @@ static inline size_t jl_add_tls_size(size_t orig_size, size_t size, size_t align
 {
     return LLT_ALIGN(orig_size, align) + size;
 }
-static inline ssize_t jl_check_tls_bound(void *tp, void *ptls, size_t tls_size)
+static inline ssize_t jl_check_tls_bound(void *tp, jl_gcframe_t ***k0, size_t tls_size)
 {
-    ssize_t offset = (char*)ptls - (char*)tp;
+    ssize_t offset = (char*)k0 - (char*)tp;
     if (offset < JL_ELF_TLS_INIT_SIZE ||
-        (size_t)offset + sizeof(jl_tls_states_t) > tls_size)
+        (size_t)offset + sizeof(*k0) > tls_size)
         return -1;
     return offset;
 }
@@ -333,10 +384,10 @@ static inline size_t jl_add_tls_size(size_t orig_size, size_t size, size_t align
 {
     return LLT_ALIGN(orig_size + size, align);
 }
-static inline ssize_t jl_check_tls_bound(void *tp, void *ptls, size_t tls_size)
+static inline ssize_t jl_check_tls_bound(void *tp, jl_gcframe_t ***k0, size_t tls_size)
 {
-    ssize_t offset = (char*)tp - (char*)ptls;
-    if (offset < sizeof(jl_tls_states_t) || offset > tls_size)
+    ssize_t offset = (char*)tp - (char*)k0;
+    if (offset < sizeof(*k0) || offset > tls_size)
         return -1;
     return -offset;
 }
@@ -371,7 +422,12 @@ static int check_tls_cb(struct dl_phdr_info *info, size_t size, void *_data)
 
 static void jl_check_tls(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_get_pgcstack_func *f;
+    jl_gcframe_t ***(*k)(void);
+    jl_pgcstack_getkey(&f, &k);
+    jl_gcframe_t ***k0 = k();
+    if (k0 == NULL)
+        return;
     check_tls_cb_t data = {0};
     dl_iterate_phdr(check_tls_cb, &data);
     if (data.total_size == 0)
@@ -388,13 +444,14 @@ static void jl_check_tls(void)
 #else
 #  error "Cannot emit thread pointer for this architecture."
 #endif
-    ssize_t offset = jl_check_tls_bound(tp, ptls, data.total_size);
+    ssize_t offset = jl_check_tls_bound(tp, k0, data.total_size);
     if (offset == -1)
         return;
     jl_tls_offset = offset;
 }
 #else
-const int jl_tls_elf_support = 0;
+// !JL_ELF_TLS_VARIANT
+JL_DLLEXPORT const int jl_tls_elf_support = 0;
 #endif
 
 // interface to Julia; sets up to make the runtime thread-safe
@@ -406,24 +463,58 @@ void jl_init_threading(void)
     jl_check_tls();
 #endif
 
-    // how many threads available, usable
+    // Determine how many threads and pools are requested. This may have been
+    // specified on the command line (and so are in `jl_options`) or by the
+    // environment variable. Set the globals `jl_n_threadpools`, `jl_n_threads`
+    // and `jl_n_threads_per_pool`.
+    jl_n_threadpools = 1;
     jl_n_threads = JULIA_NUM_THREADS;
-    if (jl_options.nthreads < 0) // --threads=auto
-        jl_n_threads = jl_cpu_threads();
-    else if (jl_options.nthreads > 0) // --threads=N
-        jl_n_threads = jl_options.nthreads;
-    else if ((cp = getenv(NUM_THREADS_NAME)))
-        jl_n_threads = (uint64_t)strtol(cp, NULL, 10);
-    if (jl_n_threads <= 0)
-        jl_n_threads = 1;
-#ifndef __clang_analyzer__
+    int16_t nthreads = jl_n_threads, nthreadsi = 0;
+    char *endptr, *endptri;
+
+    if (jl_options.nthreads != 0) { // --threads specified
+        jl_n_threadpools = jl_options.nthreadpools;
+        nthreads = jl_options.nthreads_per_pool[0];
+        if (nthreads < 0)
+            nthreads = jl_effective_threads();
+        if (jl_n_threadpools == 2)
+            nthreadsi = jl_options.nthreads_per_pool[1];
+    }
+    else if ((cp = getenv(NUM_THREADS_NAME))) { // ENV[NUM_THREADS_NAME] specified
+        if (!strncmp(cp, "auto", 4)) {
+            nthreads = jl_effective_threads();
+            cp += 4;
+        }
+        else {
+            errno = 0;
+            nthreads = strtol(cp, &endptr, 10);
+            if (errno != 0 || endptr == cp || nthreads <= 0)
+                nthreads = 1;
+            cp = endptr;
+        }
+        if (*cp == ',') {
+            cp++;
+            if (!strncmp(cp, "auto", 4))
+                nthreadsi = 1;
+            else {
+                errno = 0;
+                nthreadsi = strtol(cp, &endptri, 10);
+                if (errno != 0 || endptri == cp || nthreadsi < 0)
+                    nthreadsi = 0;
+            }
+            if (nthreadsi > 0)
+                jl_n_threadpools++;
+        }
+    }
+
+    jl_n_threads = nthreads + nthreadsi;
+    jl_n_threads_per_pool = (int *)malloc(2 * sizeof(int));
+    jl_n_threads_per_pool[0] = nthreads;
+    jl_n_threads_per_pool[1] = nthreadsi;
+
+#ifndef __clang_gcanalyzer__
     jl_all_tls_states = (jl_ptls_t*)calloc(jl_n_threads, sizeof(void*));
 #endif
-    // initialize this thread (set tid, create heap, etc.)
-    jl_init_threadtls(0);
-
-    // initialize threading infrastructure
-    jl_init_threadinginfra();
 }
 
 static uv_barrier_t thread_init_done;
@@ -441,8 +532,8 @@ void jl_start_threads(void)
     // do we have exclusive use of the machine? default is no
     exclusive = DEFAULT_MACHINE_EXCLUSIVE;
     cp = getenv(MACHINE_EXCLUSIVE_NAME);
-    if (cp)
-        exclusive = strtol(cp, NULL, 10);
+    if (cp && strcmp(cp, "0") != 0)
+        exclusive = 1;
 
     // exclusive use: affinitize threads, master thread on proc 0, rest
     // according to a 'compact' policy
@@ -454,7 +545,7 @@ void jl_start_threads(void)
         }
         memset(mask, 0, cpumasksize);
         mask[0] = 1;
-        uvtid = (uv_thread_t)uv_thread_self();
+        uvtid = uv_thread_self();
         uv_thread_setaffinity(&uvtid, mask, NULL, cpumasksize);
         mask[0] = 0;
     }
@@ -466,7 +557,7 @@ void jl_start_threads(void)
     uv_barrier_init(&thread_init_done, nthreads);
 
     for (i = 1; i < nthreads; ++i) {
-        jl_threadarg_t *t = (jl_threadarg_t*)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
+        jl_threadarg_t *t = (jl_threadarg_t *)malloc_s(sizeof(jl_threadarg_t)); // ownership will be passed to the thread
         t->tid = i;
         t->barrier = &thread_init_done;
         uv_thread_create(&uvtid, jl_threadfun, t);
@@ -481,26 +572,29 @@ void jl_start_threads(void)
     uv_barrier_wait(&thread_init_done);
 }
 
-unsigned volatile _threadedregion; // HACK: keep track of whether it is safe to do IO
+_Atomic(unsigned) _threadedregion; // HACK: keep track of whether to prioritize IO or threading
 
 JL_DLLEXPORT int jl_in_threaded_region(void)
 {
-    return _threadedregion != 0;
+    return jl_atomic_load_relaxed(&_threadedregion) != 0;
 }
 
 JL_DLLEXPORT void jl_enter_threaded_region(void)
 {
-    _threadedregion += 1;
+    jl_atomic_fetch_add(&_threadedregion, 1);
 }
 
 JL_DLLEXPORT void jl_exit_threaded_region(void)
 {
-    _threadedregion -= 1;
-    jl_wake_libuv();
-    // make sure no more callbacks will run while user code continues
-    // outside thread region and might touch an I/O object.
-    JL_UV_LOCK();
-    JL_UV_UNLOCK();
+    if (jl_atomic_fetch_add(&_threadedregion, -1) == 1) {
+        // make sure no more callbacks will run while user code continues
+        // outside thread region and might touch an I/O object.
+        JL_UV_LOCK();
+        JL_UV_UNLOCK();
+        // make sure thread 0 is not using the sleep_lock
+        // so that it may enter the libuv event loop instead
+        jl_wakeup_thread(0);
+    }
 }
 
 
diff --git a/src/threading.h b/src/threading.h
index 072b7264841c2e..4c6f1e19881f5c 100644
--- a/src/threading.h
+++ b/src/threading.h
@@ -13,7 +13,6 @@ extern "C" {
 #define PROFILE_JL_THREADING            0
 
 extern jl_ptls_t *jl_all_tls_states JL_GLOBALLY_ROOTED; /* thread local storage */
-extern JL_DLLEXPORT int jl_n_threads;   /* # threads we're actually using */
 
 typedef struct _jl_threadarg_t {
     int16_t tid;
@@ -22,7 +21,7 @@ typedef struct _jl_threadarg_t {
 } jl_threadarg_t;
 
 // each thread must initialize its TLS
-void jl_init_threadtls(int16_t tid);
+jl_ptls_t jl_init_threadtls(int16_t tid);
 
 // provided by a threading infrastructure
 void jl_init_threadinginfra(void);
diff --git a/src/timing.c b/src/timing.c
index 70fb8df213796c..929a09305f9932 100644
--- a/src/timing.c
+++ b/src/timing.c
@@ -17,8 +17,8 @@ extern "C" {
 #error Timings are not supported on your compiler
 #endif
 
-jl_timing_block_t *jl_root_timing;
-uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
+static uint64_t t0;
+JL_DLLEXPORT uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
 const char *jl_timing_names[(int)JL_TIMING_LAST] =
     {
 #define X(name) #name
@@ -28,32 +28,32 @@ const char *jl_timing_names[(int)JL_TIMING_LAST] =
 
 void jl_print_timings(void)
 {
-    uint64_t total_time = 0;
+    uint64_t total_time = cycleclock() - t0;
+    uint64_t root_time = total_time;
     for (int i = 0; i < JL_TIMING_LAST; i++) {
-        total_time += jl_timing_data[i];
+        root_time -= jl_timing_data[i];
     }
+    jl_timing_data[0] = root_time;
     for (int i = 0; i < JL_TIMING_LAST; i++) {
         if (jl_timing_data[i] != 0)
-            fprintf(stderr,"%-25s : %5.2f %%   %" PRIu64 "\n", jl_timing_names[i],
+            fprintf(stderr, "%-25s : %5.2f %%   %" PRIu64 "\n", jl_timing_names[i],
                     100 * (((double)jl_timing_data[i]) / total_time), jl_timing_data[i]);
     }
 }
 
 void jl_init_timing(void)
 {
-    jl_root_timing = (jl_timing_block_t*)malloc_s(sizeof(jl_timing_block_t));
-    _jl_timing_block_init(jl_root_timing, JL_TIMING_ROOT);
-    jl_root_timing->prev = NULL;
+    t0 = cycleclock();
 }
 
 void jl_destroy_timing(void)
 {
-    jl_timing_block_t *stack = jl_current_task ? jl_current_task->timing_stack : jl_root_timing;
+    jl_ptls_t ptls = jl_current_task->ptls;
+    jl_timing_block_t *stack = ptls->timing_stack;
     while (stack) {
         _jl_timing_block_destroy(stack);
         stack = stack->prev;
     }
-    free(jl_root_timing);
 }
 
 jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block)
diff --git a/src/timing.h b/src/timing.h
index 4239e635c86e48..fd84707ad5d2c2 100644
--- a/src/timing.h
+++ b/src/timing.h
@@ -23,7 +23,6 @@ extern "C" {
 #endif
 void jl_print_timings(void);
 jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block);
-extern jl_timing_block_t *jl_root_timing;
 void jl_timing_block_start(jl_timing_block_t *cur_block);
 void jl_timing_block_stop(jl_timing_block_t *cur_block);
 #ifdef __cplusplus
@@ -53,8 +52,6 @@ void jl_timing_block_stop(jl_timing_block_t *cur_block);
         X(METHOD_LOOKUP_FAST),    \
         X(LLVM_OPT),              \
         X(LLVM_MODULE_FINISH),    \
-        X(LLVM_EMIT),             \
-        X(METHOD_LOOKUP_COMPILE), \
         X(METHOD_MATCH),          \
         X(TYPE_CACHE_LOOKUP),     \
         X(TYPE_CACHE_INSERT),     \
@@ -119,7 +116,8 @@ STATIC_INLINE uint64_t _jl_timing_block_init(jl_timing_block_t *block, int owner
 
 STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) {
     uint64_t t = _jl_timing_block_init(block, owner);
-    jl_timing_block_t **prevp = jl_current_task ? &jl_current_task->timing_stack : &jl_root_timing;
+    jl_task_t *ct = jl_current_task;
+    jl_timing_block_t **prevp = &ct->ptls->timing_stack;
     block->prev = *prevp;
     if (block->prev)
         _jl_timing_block_stop(block->prev, t);
@@ -128,9 +126,10 @@ STATIC_INLINE void _jl_timing_block_ctor(jl_timing_block_t *block, int owner) {
 
 STATIC_INLINE void _jl_timing_block_destroy(jl_timing_block_t *block) {
     uint64_t t = cycleclock();
+    jl_task_t *ct = jl_current_task;
     _jl_timing_block_stop(block, t);
     jl_timing_data[block->owner] += block->total;
-    jl_timing_block_t **pcur = jl_current_task ? &jl_current_task->timing_stack : &jl_root_timing;
+    jl_timing_block_t **pcur = &ct->ptls->timing_stack;
     assert(*pcur == block);
     *pcur = block->prev;
     if (block->prev)
diff --git a/src/tls.h b/src/tls.h
deleted file mode 100644
index febf4f15208f7e..00000000000000
--- a/src/tls.h
+++ /dev/null
@@ -1,41 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-#ifndef JL_TLS_H
-#define JL_TLS_H
-
-// Thread-local storage access
-
-// Copied from libuv. Add `JL_CONST_FUNC` so that the compiler
-// can optimize this better.
-static inline unsigned long JL_CONST_FUNC jl_thread_self(void)
-{
-#ifdef _OS_WINDOWS_
-    return (unsigned long)GetCurrentThreadId();
-#else
-    return (unsigned long)pthread_self();
-#endif
-}
-
-typedef struct _jl_tls_states_t jl_tls_states_t;
-
-typedef jl_tls_states_t *jl_ptls_t;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-JL_DLLEXPORT int16_t jl_threadid(void);
-JL_DLLEXPORT void jl_threading_profile(void);
-
-JL_DLLEXPORT JL_CONST_FUNC jl_ptls_t (jl_get_ptls_states)(void) JL_GLOBALLY_ROOTED JL_NOTSAFEPOINT;
-
-typedef jl_ptls_t (*jl_get_ptls_states_func)(void);
-#if !defined(_OS_DARWIN_) && !defined(_OS_WINDOWS_)
-JL_DLLEXPORT void jl_set_ptls_states_getter(jl_get_ptls_states_func f);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/toplevel.c b/src/toplevel.c
index 56002f60ac9b1e..ff089c1aebfa67 100644
--- a/src/toplevel.c
+++ b/src/toplevel.c
@@ -17,7 +17,6 @@
 #endif
 #include "julia.h"
 #include "julia_internal.h"
-#include "uv.h"
 #include "julia_assert.h"
 #include "intrinsics.h"
 #include "builtin_proto.h"
@@ -34,6 +33,9 @@ JL_DLLEXPORT const char *jl_filename = "none"; // need to update jl_critical_err
 htable_t jl_current_modules;
 jl_mutex_t jl_modules_mutex;
 
+// During incremental compilation, the following gets set
+JL_DLLEXPORT jl_module_t *jl_precompile_toplevel_module = NULL;   // the toplevel module currently being defined
+
 JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m)
 {
     jl_module_t *base_module = jl_base_relative_to(m);
@@ -45,15 +47,13 @@ JL_DLLEXPORT void jl_add_standard_imports(jl_module_t *m)
 // create a new top-level module
 void jl_init_main_module(void)
 {
-    if (jl_main_module != NULL)
-        jl_error("Main module already initialized.");
-
+    assert(jl_main_module == NULL);
     jl_main_module = jl_new_module(jl_symbol("Main"));
     jl_main_module->parent = jl_main_module;
     jl_set_const(jl_main_module, jl_symbol("Core"),
                  (jl_value_t*)jl_core_module);
-    jl_set_global(jl_core_module, jl_symbol("Main"),
-                  (jl_value_t*)jl_main_module);
+    jl_set_const(jl_core_module, jl_symbol("Main"),
+                 (jl_value_t*)jl_main_module);
 }
 
 static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROOT)
@@ -68,11 +68,12 @@ void jl_module_run_initializer(jl_module_t *m)
     jl_function_t *f = jl_module_get_initializer(m);
     if (f == NULL)
         return;
-    size_t last_age = jl_get_ptls_states()->world_age;
+    jl_task_t *ct = jl_current_task;
+    size_t last_age = ct->world_age;
     JL_TRY {
-        jl_get_ptls_states()->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         jl_apply(&f, 1);
-        jl_get_ptls_states()->world_age = last_age;
+        ct->world_age = last_age;
     }
     JL_CATCH {
         if (jl_initerror_type == NULL) {
@@ -117,8 +118,8 @@ static int jl_is__toplevel__mod(jl_module_t *mod)
 // TODO: add locks around global state mutation operations
 static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    assert(ex->head == module_sym);
+    jl_task_t *ct = jl_current_task;
+    assert(ex->head == jl_module_sym);
     if (jl_array_len(ex->args) != 3 || !jl_is_expr(jl_exprarg(ex, 2))) {
         jl_error("syntax: malformed module expression");
     }
@@ -140,18 +141,23 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     ptrhash_put(&jl_current_modules, (void*)newm, (void*)((uintptr_t)HT_NOTFOUND + 1));
     JL_UNLOCK(&jl_modules_mutex);
 
+    jl_module_t *old_toplevel_module = jl_precompile_toplevel_module;
+
     // copy parent environment info into submodule
     newm->uuid = parent_module->uuid;
     if (jl_is__toplevel__mod(parent_module)) {
         newm->parent = newm;
         jl_register_root_module(newm);
+        if (jl_options.incremental) {
+            jl_precompile_toplevel_module = newm;
+        }
     }
     else {
         newm->parent = parent_module;
         jl_binding_t *b = jl_get_binding_wr(parent_module, name, 1);
         jl_declare_constant(b);
-        jl_value_t *old = jl_atomic_compare_exchange(&b->value, NULL, (jl_value_t*)newm);
-        if (old != NULL) {
+        jl_value_t *old = NULL;
+        if (!jl_atomic_cmpswap(&b->value, &old, (jl_value_t*)newm)) {
             if (!jl_is_module(old)) {
                 jl_errorf("invalid redefinition of constant %s", jl_symbol_name(name));
             }
@@ -175,7 +181,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         jl_base_module = newm;
     }
 
-    size_t last_age = ptls->world_age;
+    size_t last_age = ct->world_age;
 
     // add standard imports unless baremodule
     if (std_imports) {
@@ -191,13 +197,13 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
     jl_array_t *exprs = ((jl_expr_t*)jl_exprarg(ex, 2))->args;
     for (int i = 0; i < jl_array_len(exprs); i++) {
         // process toplevel form
-        ptls->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         form = jl_expand_stmt_with_loc(jl_array_ptr_ref(exprs, i), newm, jl_filename, jl_lineno);
-        ptls->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         (void)jl_toplevel_eval_flex(newm, form, 1, 1);
     }
-    newm->primary_world = jl_world_counter;
-    ptls->world_age = last_age;
+    newm->primary_world = jl_atomic_load_acquire(&jl_world_counter);
+    ct->world_age = last_age;
 
 #if 0
     // some optional post-processing steps
@@ -263,32 +269,61 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex
         }
     }
 
+    jl_precompile_toplevel_module = old_toplevel_module;
+
     JL_GC_POP();
     return (jl_value_t*)newm;
 }
 
 static jl_value_t *jl_eval_dot_expr(jl_module_t *m, jl_value_t *x, jl_value_t *f, int fast)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_value_t **args;
     JL_GC_PUSHARGS(args, 3);
     args[1] = jl_toplevel_eval_flex(m, x, fast, 0);
     args[2] = jl_toplevel_eval_flex(m, f, fast, 0);
     if (jl_is_module(args[1])) {
-        JL_TYPECHK(getfield, symbol, args[2]);
+        JL_TYPECHK(getglobal, symbol, args[2]);
         args[0] = jl_eval_global_var((jl_module_t*)args[1], (jl_sym_t*)args[2]);
     }
     else {
         args[0] = jl_eval_global_var(jl_base_relative_to(m), jl_symbol("getproperty"));
-        size_t last_age = ptls->world_age;
-        ptls->world_age = jl_world_counter;
+        size_t last_age = ct->world_age;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         args[0] = jl_apply(args, 3);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     JL_GC_POP();
     return args[0];
 }
 
+void jl_eval_global_expr(jl_module_t *m, jl_expr_t *ex, int set_type) {
+    // create uninitialized mutable binding for "global x" decl sometimes or probably
+    size_t i, l = jl_array_len(ex->args);
+    for (i = 0; i < l; i++) {
+        jl_value_t *arg = jl_exprarg(ex, i);
+        jl_module_t *gm;
+        jl_sym_t *gs;
+        if (jl_is_globalref(arg)) {
+            gm = jl_globalref_mod(arg);
+            gs = jl_globalref_name(arg);
+        }
+        else {
+            assert(jl_is_symbol(arg));
+            gm = m;
+            gs = (jl_sym_t*)arg;
+        }
+        if (!jl_binding_resolved_p(gm, gs)) {
+            jl_binding_t *b = jl_get_binding_wr(gm, gs, 1);
+            if (set_type) {
+                jl_value_t *old_ty = NULL;
+                // maybe set the type too, perhaps
+                jl_atomic_cmpswap_relaxed(&b->ty, &old_ty, (jl_value_t*)jl_any_type);
+            }
+        }
+    }
+}
+
 // module referenced by (top ...) from within m
 // this is only needed because of the bootstrapping process:
 // - initially Base doesn't exist and top === Core
@@ -305,38 +340,42 @@ JL_DLLEXPORT jl_module_t *jl_base_relative_to(jl_module_t *m)
     return jl_top_module;
 }
 
-static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs)
+static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs, int *has_opaque)
 {
     if (!jl_is_expr(v))
         return;
     jl_expr_t *e = (jl_expr_t*)v;
     jl_sym_t *head = e->head;
-    if (head == toplevel_sym || head == thunk_sym) {
+    if (head == jl_toplevel_sym || head == jl_thunk_sym) {
         return;
     }
-    else if (head == global_sym) {
+    else if (head == jl_global_sym) {
         // this could be considered has_defs, but loops that assign to globals
         // might still need to be optimized.
         return;
     }
-    else if (head == const_sym || head == copyast_sym) {
+    else if (head == jl_const_sym || head == jl_copyast_sym) {
         // Note: `copyast` is included here since it indicates the presence of
         // `quote` and probably `eval`.
         *has_defs = 1;
         return;
     }
-    else if (head == method_sym || jl_is_toplevel_only_expr(v)) {
+    else if (head == jl_method_sym || jl_is_toplevel_only_expr(v)) {
         *has_defs = 1;
     }
-    else if (head == cfunction_sym) {
+    else if (head == jl_cfunction_sym) {
         *has_intrinsics = 1;
         return;
     }
-    else if (head == foreigncall_sym) {
+    else if (head == jl_foreigncall_sym) {
         *has_intrinsics = 1;
         return;
     }
-    else if (head == call_sym && jl_expr_nargs(e) > 0) {
+    else if (head == jl_new_opaque_closure_sym) {
+        *has_opaque = 1;
+        return;
+    }
+    else if (head == jl_call_sym && jl_expr_nargs(e) > 0) {
         jl_value_t *called = NULL;
         jl_value_t *f = jl_exprarg(e, 0);
         if (jl_is_globalref(f)) {
@@ -344,14 +383,15 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs)
             jl_sym_t *name = jl_globalref_name(f);
             if (jl_binding_resolved_p(mod, name)) {
                 jl_binding_t *b = jl_get_binding(mod, name);
-                if (b && b->value && b->constp)
-                    called = b->value;
+                if (b && b->constp) {
+                    called = jl_atomic_load_relaxed(&b->value);
+                }
             }
         }
         else if (jl_is_quotenode(f)) {
             called = jl_quotenode_value(f);
         }
-        if (called) {
+        if (called != NULL) {
             if (jl_is_intrinsic(called) && jl_unbox_int32(called) == (int)llvmcall) {
                 *has_intrinsics = 1;
             }
@@ -365,7 +405,7 @@ static void expr_attributes(jl_value_t *v, int *has_intrinsics, int *has_defs)
     for (i = 0; i < jl_array_len(e->args); i++) {
         jl_value_t *a = jl_exprarg(e, i);
         if (jl_is_expr(a))
-            expr_attributes(a, has_intrinsics, has_defs);
+            expr_attributes(a, has_intrinsics, has_defs, has_opaque);
     }
 }
 
@@ -374,17 +414,19 @@ int jl_code_requires_compiler(jl_code_info_t *src)
     jl_array_t *body = src->code;
     assert(jl_typeis(body, jl_array_any_type));
     size_t i;
-    int has_intrinsics = 0, has_defs = 0;
+    int has_intrinsics = 0, has_defs = 0, has_opaque = 0;
+    if (jl_has_meta(body, jl_force_compile_sym))
+        return 1;
     for(i=0; i < jl_array_len(body); i++) {
         jl_value_t *stmt = jl_array_ptr_ref(body,i);
-        expr_attributes(stmt, &has_intrinsics, &has_defs);
+        expr_attributes(stmt, &has_intrinsics, &has_defs, &has_opaque);
         if (has_intrinsics)
             return 1;
     }
     return 0;
 }
 
-static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs, int *has_loops)
+static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs, int *has_loops, int *has_opaque, int *forced_compile)
 {
     size_t i;
     *has_loops = 0;
@@ -400,8 +442,9 @@ static void body_attributes(jl_array_t *body, int *has_intrinsics, int *has_defs
                     *has_loops = 1;
             }
         }
-        expr_attributes(stmt, has_intrinsics, has_defs);
+        expr_attributes(stmt, has_intrinsics, has_defs, has_opaque);
     }
+    *forced_compile = jl_has_meta(body, jl_force_compile_sym);
 }
 
 static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_ROOTED
@@ -409,19 +452,19 @@ static jl_module_t *call_require(jl_module_t *mod, jl_sym_t *var) JL_GLOBALLY_RO
     static jl_value_t *require_func = NULL;
     int build_mode = jl_generating_output();
     jl_module_t *m = NULL;
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (require_func == NULL && jl_base_module != NULL) {
         require_func = jl_get_global(jl_base_module, jl_symbol("require"));
     }
     if (require_func != NULL) {
-        size_t last_age = ptls->world_age;
-        ptls->world_age = (build_mode ? jl_base_module->primary_world : jl_world_counter);
+        size_t last_age = ct->world_age;
+        ct->world_age = (build_mode ? jl_base_module->primary_world : jl_atomic_load_acquire(&jl_world_counter));
         jl_value_t *reqargs[3];
         reqargs[0] = require_func;
         reqargs[1] = (jl_value_t*)mod;
         reqargs[2] = (jl_value_t*)var;
         m = (jl_module_t*)jl_apply(reqargs, 3);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     if (m == NULL || !jl_is_module(m)) {
         jl_errorf("failed to load module %s", jl_symbol_name(var));
@@ -448,7 +491,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
         m = from;
         i = 0;
     }
-    else if (var != dot_sym) {
+    else if (var != jl_dot_sym) {
         // `A.B`: call the loader to obtain the root A in the current environment.
         if (jl_core_module && var == jl_core_module->name) {
             m = jl_core_module;
@@ -469,7 +512,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
             if (i >= jl_array_len(args))
                 jl_error("invalid module path");
             var = (jl_sym_t*)jl_array_ptr_ref(args, i);
-            if (var != dot_sym)
+            if (var != jl_dot_sym)
                 break;
             i++;
             assert(m);
@@ -481,7 +524,7 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
         var = (jl_sym_t*)jl_array_ptr_ref(args, i);
         if (!jl_is_symbol(var))
             jl_type_error(keyword, (jl_value_t*)jl_symbol_type, (jl_value_t*)var);
-        if (var == dot_sym)
+        if (var == jl_dot_sym)
             jl_errorf("invalid %s path: \".\" in identifier path", keyword);
         if (i == jl_array_len(args)-1)
             break;
@@ -498,15 +541,15 @@ static jl_module_t *eval_import_path(jl_module_t *where, jl_module_t *from JL_PR
 int jl_is_toplevel_only_expr(jl_value_t *e) JL_NOTSAFEPOINT
 {
     return jl_is_expr(e) &&
-        (((jl_expr_t*)e)->head == module_sym ||
-         ((jl_expr_t*)e)->head == import_sym ||
-         ((jl_expr_t*)e)->head == using_sym ||
-         ((jl_expr_t*)e)->head == export_sym ||
-         ((jl_expr_t*)e)->head == thunk_sym ||
-         ((jl_expr_t*)e)->head == global_sym ||
-         ((jl_expr_t*)e)->head == const_sym ||
-         ((jl_expr_t*)e)->head == toplevel_sym ||
-         ((jl_expr_t*)e)->head == error_sym ||
+        (((jl_expr_t*)e)->head == jl_module_sym ||
+         ((jl_expr_t*)e)->head == jl_import_sym ||
+         ((jl_expr_t*)e)->head == jl_using_sym ||
+         ((jl_expr_t*)e)->head == jl_export_sym ||
+         ((jl_expr_t*)e)->head == jl_thunk_sym ||
+         ((jl_expr_t*)e)->head == jl_global_sym ||
+         ((jl_expr_t*)e)->head == jl_const_sym ||
+         ((jl_expr_t*)e)->head == jl_toplevel_sym ||
+         ((jl_expr_t*)e)->head == jl_error_sym ||
          ((jl_expr_t*)e)->head == jl_incomplete_sym);
 }
 
@@ -516,12 +559,12 @@ int jl_needs_lowering(jl_value_t *e) JL_NOTSAFEPOINT
         return 0;
     jl_expr_t *ex = (jl_expr_t*)e;
     jl_sym_t *head = ex->head;
-    if (head == module_sym || head == import_sym || head == using_sym ||
-        head == export_sym || head == thunk_sym || head == toplevel_sym ||
-        head == error_sym || head == jl_incomplete_sym || head == method_sym) {
+    if (head == jl_module_sym || head == jl_import_sym || head == jl_using_sym ||
+        head == jl_export_sym || head == jl_thunk_sym || head == jl_toplevel_sym ||
+        head == jl_error_sym || head == jl_incomplete_sym || head == jl_method_sym) {
         return 0;
     }
-    if (head == global_sym || head == const_sym) {
+    if (head == jl_global_sym || head == jl_const_sym) {
         size_t i, l = jl_array_len(ex->args);
         for (i = 0; i < l; i++) {
             jl_value_t *a = jl_exprarg(ex, i);
@@ -542,15 +585,16 @@ static jl_method_instance_t *method_instance_for_thunk(jl_code_info_t *src, jl_m
     return li;
 }
 
-static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import)
+static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import, jl_sym_t *asname)
 {
     assert(m);
-    jl_sym_t *name = import->name;
+    jl_sym_t *name = asname ? asname : import->name;
     jl_binding_t *b;
     if (jl_binding_resolved_p(m, name)) {
         b = jl_get_binding(m, name);
-        if ((!b->constp && b->owner != m) || (b->value && b->value != (jl_value_t*)import)) {
-            jl_errorf("importing %s into %s conflicts with an existing identifier",
+        jl_value_t *bv = jl_atomic_load_relaxed(&b->value);
+        if ((!b->constp && b->owner != m) || (bv && bv != (jl_value_t*)import)) {
+            jl_errorf("importing %s into %s conflicts with an existing global",
                       jl_symbol_name(name), jl_symbol_name(m->name));
         }
     }
@@ -559,7 +603,8 @@ static void import_module(jl_module_t *JL_NONNULL m, jl_module_t *import)
         b->imported = 1;
     }
     if (!b->constp) {
-        b->value = (jl_value_t*)import;
+        // TODO: constp is not threadsafe
+        jl_atomic_store_release(&b->value, (jl_value_t*)import);
         b->constp = 1;
         jl_gc_wb(m, (jl_value_t*)import);
     }
@@ -570,10 +615,10 @@ static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_
 {
     if (jl_expr_nargs(ex) == 1 && jl_is_expr(jl_exprarg(ex, 0))) {
         jl_expr_t *fr = (jl_expr_t*)jl_exprarg(ex, 0);
-        if (fr->head == colon_sym) {
+        if (fr->head == jl_colon_sym) {
             if (jl_expr_nargs(fr) > 0 && jl_is_expr(jl_exprarg(fr, 0))) {
                 jl_expr_t *path = (jl_expr_t*)jl_exprarg(fr, 0);
-                if (((jl_expr_t*)path)->head == dot_sym) {
+                if (((jl_expr_t*)path)->head == jl_dot_sym) {
                     jl_sym_t *name = NULL;
                     jl_module_t *from = eval_import_path(m, NULL, path->args, &name, keyword);
                     if (name != NULL) {
@@ -590,12 +635,21 @@ static jl_module_t *eval_import_from(jl_module_t *m JL_PROPAGATES_ROOT, jl_expr_
     return NULL;
 }
 
+static void check_macro_rename(jl_sym_t *from, jl_sym_t *to, const char *keyword)
+{
+    char *n1 = jl_symbol_name(from), *n2 = jl_symbol_name(to);
+    if (n1[0] == '@' && n2[0] != '@')
+        jl_errorf("cannot rename macro \"%s\" to non-macro \"%s\" in \"%s\"", n1, n2, keyword);
+    if (n1[0] != '@' && n2[0] == '@')
+        jl_errorf("cannot rename non-macro \"%s\" to macro \"%s\" in \"%s\"", n1, n2, keyword);
+}
+
 // Format msg and eval `throw(ErrorException(msg)))` in module `m`.
 // Used in `jl_toplevel_eval_flex` instead of `jl_errorf` so that the error
 // location in julia code gets into the backtrace.
 static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
 {
-    jl_value_t *throw_ex = (jl_value_t*)jl_exprn(call_sym, 2);
+    jl_value_t *throw_ex = (jl_value_t*)jl_exprn(jl_call_sym, 2);
     JL_GC_PUSH1(&throw_ex);
     jl_exprargset(throw_ex, 0, jl_builtin_throw);
     va_list args;
@@ -608,7 +662,7 @@ static void jl_eval_errorf(jl_module_t *m, const char* fmt, ...)
 
 jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int fast, int expanded)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     if (!jl_is_expr(e)) {
         if (jl_is_linenode(e)) {
             jl_lineno = jl_linenode_line(e);
@@ -630,7 +684,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
 
     jl_expr_t *ex = (jl_expr_t*)e;
 
-    if (ex->head == dot_sym) {
+    if (ex->head == jl_dot_sym && jl_expr_nargs(ex) != 1) {
         if (jl_expr_nargs(ex) != 2)
             jl_eval_errorf(m, "syntax: malformed \".\" expression");
         jl_value_t *lhs = jl_exprarg(ex, 0);
@@ -641,7 +695,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         }
     }
 
-    if (ptls->in_pure_callback) {
+    if (ct->ptls->in_pure_callback) {
         jl_error("eval cannot be used in a generated function");
     }
 
@@ -649,20 +703,20 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
     jl_code_info_t *thk = NULL;
     JL_GC_PUSH3(&mfunc, &thk, &ex);
 
-    size_t last_age = ptls->world_age;
+    size_t last_age = ct->world_age;
     if (!expanded && jl_needs_lowering(e)) {
-        ptls->world_age = jl_world_counter;
+        ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
         ex = (jl_expr_t*)jl_expand_with_loc_warn(e, m, jl_filename, jl_lineno);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     jl_sym_t *head = jl_is_expr(ex) ? ex->head : NULL;
 
-    if (head == module_sym) {
+    if (head == jl_module_sym) {
         jl_value_t *val = jl_eval_module_expr(m, ex);
         JL_GC_POP();
         return val;
     }
-    else if (head == using_sym) {
+    else if (head == jl_using_sym) {
         jl_sym_t *name = NULL;
         jl_module_t *from = eval_import_from(m, ex, "using");
         size_t i = 0;
@@ -672,7 +726,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         }
         for (; i < jl_expr_nargs(ex); i++) {
             jl_value_t *a = jl_exprarg(ex, i);
-            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == dot_sym) {
+            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
                 name = NULL;
                 jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "using");
                 jl_module_t *u = import;
@@ -691,18 +745,31 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
                     if (m == jl_main_module && name == NULL) {
                         // TODO: for now, `using A` in Main also creates an explicit binding for `A`
                         // This will possibly be extended to all modules.
-                        import_module(m, u);
+                        import_module(m, u, NULL);
                     }
                 }
+                continue;
             }
-            else {
-                jl_eval_errorf(m, "syntax: malformed \"using\" statement");
+            else if (from && jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_as_sym && jl_expr_nargs(a) == 2 &&
+                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == jl_dot_sym) {
+                jl_sym_t *asname = (jl_sym_t*)jl_exprarg(a, 1);
+                if (jl_is_symbol(asname)) {
+                    jl_expr_t *path = (jl_expr_t*)jl_exprarg(a, 0);
+                    name = NULL;
+                    jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)path)->args, &name, "using");
+                    assert(name);
+                    check_macro_rename(name, asname, "using");
+                    // `using A: B as C` syntax
+                    jl_module_use_as(m, import, name, asname);
+                    continue;
+                }
             }
+            jl_eval_errorf(m, "syntax: malformed \"using\" statement");
         }
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == import_sym) {
+    else if (head == jl_import_sym) {
         jl_sym_t *name = NULL;
         jl_module_t *from = eval_import_from(m, ex, "import");
         size_t i = 0;
@@ -712,24 +779,44 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         }
         for (; i < jl_expr_nargs(ex); i++) {
             jl_value_t *a = jl_exprarg(ex, i);
-            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == dot_sym) {
+            if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_dot_sym) {
                 name = NULL;
                 jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)a)->args, &name, "import");
                 if (name == NULL) {
-                    import_module(m, import);
+                    // `import A` syntax
+                    import_module(m, import, NULL);
                 }
                 else {
+                    // `import A.B` or `import A: B` syntax
                     jl_module_import(m, import, name);
                 }
+                continue;
             }
-            else {
-                jl_eval_errorf(m, "syntax: malformed \"import\" statement");
+            else if (jl_is_expr(a) && ((jl_expr_t*)a)->head == jl_as_sym && jl_expr_nargs(a) == 2 &&
+                     jl_is_expr(jl_exprarg(a, 0)) && ((jl_expr_t*)jl_exprarg(a, 0))->head == jl_dot_sym) {
+                jl_sym_t *asname = (jl_sym_t*)jl_exprarg(a, 1);
+                if (jl_is_symbol(asname)) {
+                    jl_expr_t *path = (jl_expr_t*)jl_exprarg(a, 0);
+                    name = NULL;
+                    jl_module_t *import = eval_import_path(m, from, ((jl_expr_t*)path)->args, &name, "import");
+                    if (name == NULL) {
+                        // `import A as B` syntax
+                        import_module(m, import, asname);
+                    }
+                    else {
+                        check_macro_rename(name, asname, "import");
+                        // `import A.B as C` syntax
+                        jl_module_import_as(m, import, name, asname);
+                    }
+                    continue;
+                }
             }
+            jl_eval_errorf(m, "syntax: malformed \"import\" statement");
         }
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == export_sym) {
+    else if (head == jl_export_sym) {
         for (size_t i = 0; i < jl_array_len(ex->args); i++) {
             jl_sym_t *name = (jl_sym_t*)jl_array_ptr_ref(ex->args, i);
             if (!jl_is_symbol(name))
@@ -739,28 +826,12 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == global_sym) {
-        // create uninitialized mutable binding for "global x" decl
-        size_t i, l = jl_array_len(ex->args);
-        for (i = 0; i < l; i++) {
-            jl_value_t *arg = jl_exprarg(ex, i);
-            jl_module_t *gm;
-            jl_sym_t *gs;
-            if (jl_is_globalref(arg)) {
-                gm = jl_globalref_mod(arg);
-                gs = jl_globalref_name(arg);
-            }
-            else {
-                assert(jl_is_symbol(arg));
-                gm = m;
-                gs = (jl_sym_t*)arg;
-            }
-            jl_get_binding_wr(gm, gs, 0);
-        }
+    else if (head == jl_global_sym) {
+        jl_eval_global_expr(m, ex, 0);
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == const_sym) {
+    else if (head == jl_const_sym) {
         jl_sym_t *arg = (jl_sym_t*)jl_exprarg(ex, 0);
         jl_module_t *gm;
         jl_sym_t *gs;
@@ -778,7 +849,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return jl_nothing;
     }
-    else if (head == toplevel_sym) {
+    else if (head == jl_toplevel_sym) {
         jl_value_t *res = jl_nothing;
         int i;
         for (i = 0; i < jl_array_len(ex->args); i++) {
@@ -787,7 +858,7 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         JL_GC_POP();
         return res;
     }
-    else if (head == error_sym || head == jl_incomplete_sym) {
+    else if (head == jl_error_sym || head == jl_incomplete_sym) {
         if (jl_expr_nargs(ex) == 0)
             jl_eval_errorf(m, "malformed \"%s\" expression", jl_symbol_name(head));
         if (jl_is_string(jl_exprarg(ex, 0)))
@@ -803,19 +874,20 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         return (jl_value_t*)ex;
     }
 
-    int has_intrinsics = 0, has_defs = 0, has_loops = 0;
-    assert(head == thunk_sym);
+    int has_intrinsics = 0, has_defs = 0, has_loops = 0, has_opaque = 0, forced_compile = 0;
+    assert(head == jl_thunk_sym);
     thk = (jl_code_info_t*)jl_exprarg(ex, 0);
     assert(jl_is_code_info(thk));
     assert(jl_typeis(thk->code, jl_array_any_type));
-    body_attributes((jl_array_t*)thk->code, &has_intrinsics, &has_defs, &has_loops);
+    body_attributes((jl_array_t*)thk->code, &has_intrinsics, &has_defs, &has_loops, &has_opaque, &forced_compile);
 
     jl_value_t *result;
-    if (has_intrinsics || (!has_defs && fast && has_loops &&
-                           jl_options.compile_enabled != JL_OPTIONS_COMPILE_OFF &&
-                           jl_options.compile_enabled != JL_OPTIONS_COMPILE_MIN &&
-                           jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
-                           jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
+    if (forced_compile || has_intrinsics ||
+            (!has_defs && fast && has_loops &&
+            jl_options.compile_enabled != JL_OPTIONS_COMPILE_OFF &&
+            jl_options.compile_enabled != JL_OPTIONS_COMPILE_MIN &&
+            jl_get_module_compile(m) != JL_OPTIONS_COMPILE_OFF &&
+            jl_get_module_compile(m) != JL_OPTIONS_COMPILE_MIN)) {
         // use codegen
         mfunc = method_instance_for_thunk(thk, m);
         jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
@@ -823,17 +895,20 @@ jl_value_t *jl_toplevel_eval_flex(jl_module_t *JL_NONNULL m, jl_value_t *e, int
         // worthwhile and also unsound (see #24316).
         // TODO: This is still not correct since an `eval` can happen elsewhere, but it
         // helps in common cases.
-        size_t world = jl_world_counter;
-        ptls->world_age = world;
+        size_t world = jl_atomic_load_acquire(&jl_world_counter);
+        ct->world_age = world;
         if (!has_defs && jl_get_module_infer(m) != 0) {
             (void)jl_type_infer(mfunc, world, 0);
         }
         result = jl_invoke(/*func*/NULL, /*args*/NULL, /*nargs*/0, mfunc);
-        ptls->world_age = last_age;
+        ct->world_age = last_age;
     }
     else {
         // use interpreter
         assert(thk);
+        if (has_opaque) {
+            jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
+        }
         result = jl_interpret_toplevel_thunk(m, thk);
     }
 
@@ -853,6 +928,15 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname)
         if (m != jl_main_module) { // TODO: this was grand-fathered in
             JL_LOCK(&jl_modules_mutex);
             int open = ptrhash_has(&jl_current_modules, (void*)m);
+            if (!open && jl_module_init_order != NULL) {
+                size_t i, l = jl_array_len(jl_module_init_order);
+                for (i = 0; i < l; i++) {
+                    if (m == (jl_module_t*)jl_array_ptr_ref(jl_module_init_order, i)) {
+                        open = 1;
+                        break;
+                    }
+                }
+            }
             JL_UNLOCK(&jl_modules_mutex);
             if (!open && !jl_is__toplevel__mod(m)) {
                 const char* name = jl_symbol_name(m->name);
@@ -865,10 +949,17 @@ static void jl_check_open_for(jl_module_t *m, const char* funcname)
     }
 }
 
+JL_DLLEXPORT void jl_check_top_level_effect(jl_module_t *m, char *fname)
+{
+    if (jl_current_task->ptls->in_pure_callback)
+        jl_errorf("%s cannot be used in a generated function", fname);
+    jl_check_open_for(m, fname);
+}
+
 JL_DLLEXPORT jl_value_t *jl_toplevel_eval_in(jl_module_t *m, jl_value_t *ex)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->in_pure_callback)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->in_pure_callback)
         jl_error("eval cannot be used in a generated function");
     jl_check_open_for(m, "eval");
     jl_value_t *v = NULL;
@@ -895,7 +986,8 @@ JL_DLLEXPORT jl_value_t *jl_infer_thunk(jl_code_info_t *thk, jl_module_t *m)
     jl_method_instance_t *li = method_instance_for_thunk(thk, m);
     JL_GC_PUSH1(&li);
     jl_resolve_globals_in_ir((jl_array_t*)thk->code, m, NULL, 0);
-    jl_code_info_t *src = jl_type_infer(li, jl_get_ptls_states()->world_age, 0);
+    jl_task_t *ct = jl_current_task;
+    jl_code_info_t *src = jl_type_infer(li, ct->world_age, 0);
     JL_GC_POP();
     if (src)
         return src->rettype;
@@ -915,8 +1007,8 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     if (!jl_is_string(text) || !jl_is_string(filename)) {
         jl_errorf("Expected `String`s for `text` and `filename`");
     }
-    jl_ptls_t ptls = jl_get_ptls_states();
-    if (ptls->in_pure_callback)
+    jl_task_t *ct = jl_current_task;
+    if (ct->ptls->in_pure_callback)
         jl_error("cannot use include inside a generated function");
     jl_check_open_for(module, "include");
 
@@ -926,14 +1018,14 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
     JL_GC_PUSH3(&ast, &result, &expression);
 
     ast = jl_svecref(jl_parse(jl_string_data(text), jl_string_len(text),
-                              filename, 0, (jl_value_t*)all_sym), 0);
-    if (!jl_is_expr(ast) || ((jl_expr_t*)ast)->head != toplevel_sym) {
+                              filename, 1, 0, (jl_value_t*)jl_all_sym), 0);
+    if (!jl_is_expr(ast) || ((jl_expr_t*)ast)->head != jl_toplevel_sym) {
         jl_errorf("jl_parse_all() must generate a top level expression");
     }
 
     int last_lineno = jl_lineno;
     const char *last_filename = jl_filename;
-    size_t last_age = jl_get_ptls_states()->world_age;
+    size_t last_age = ct->world_age;
     int lineno = 0;
     jl_lineno = 0;
     jl_filename = jl_string_data(filename);
@@ -950,7 +1042,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
             }
             expression = jl_expand_with_loc_warn(expression, module,
                                                  jl_string_data(filename), lineno);
-            jl_get_ptls_states()->world_age = jl_world_counter;
+            ct->world_age = jl_atomic_load_acquire(&jl_world_counter);
             result = jl_toplevel_eval_flex(module, expression, 1, 1);
         }
     }
@@ -960,7 +1052,7 @@ static jl_value_t *jl_parse_eval_all(jl_module_t *module, jl_value_t *text,
         goto finally; // skip jl_restore_excstack
     }
 finally:
-    jl_get_ptls_states()->world_age = last_age;
+    ct->world_age = last_age;
     jl_lineno = last_lineno;
     jl_filename = last_filename;
     if (err) {
diff --git a/src/typemap.c b/src/typemap.c
index 347b2147d9aea1..dfa8ac67f6abcf 100644
--- a/src/typemap.c
+++ b/src/typemap.c
@@ -27,8 +27,8 @@ static jl_value_t *jl_type_extract_name(jl_value_t *t1 JL_PROPAGATES_ROOT)
 {
     if (jl_is_unionall(t1))
         t1 = jl_unwrap_unionall(t1);
-    if (jl_is_vararg_type(t1)) {
-        return jl_type_extract_name(jl_tparam0(t1));
+    if (jl_is_vararg(t1)) {
+        return jl_type_extract_name(jl_unwrap_vararg(t1));
     }
     else if (jl_is_typevar(t1)) {
         return jl_type_extract_name(((jl_tvar_t*)t1)->ub);
@@ -57,15 +57,15 @@ static int jl_type_extract_name_precise(jl_value_t *t1, int invariant)
 {
     if (jl_is_unionall(t1))
         t1 = jl_unwrap_unionall(t1);
-    if (jl_is_vararg_type(t1)) {
-        return jl_type_extract_name_precise(jl_tparam0(t1), invariant);
+    if (jl_is_vararg(t1)) {
+        return jl_type_extract_name_precise(jl_unwrap_vararg(t1), invariant);
     }
     else if (jl_is_typevar(t1)) {
         return jl_type_extract_name_precise(((jl_tvar_t*)t1)->ub, 0);
     }
     else if (jl_is_datatype(t1)) {
         jl_datatype_t *dt = (jl_datatype_t*)t1;
-        if ((invariant || !dt->abstract) && !jl_is_kind(t1))
+        if ((invariant || !dt->name->abstract) && !jl_is_kind(t1))
             return 1;
         return 0;
     }
@@ -109,6 +109,8 @@ static int sig_match_by_type_simple(jl_value_t **types, size_t n, jl_tupletype_t
         jl_value_t *decl = jl_tparam(sig, i);
         jl_value_t *a = types[i];
         jl_value_t *unw = jl_is_unionall(decl) ? ((jl_unionall_t*)decl)->body : decl;
+        if (jl_is_vararg(a))
+            return 0;
         if (jl_is_type_type(unw)) {
             jl_value_t *tp0 = jl_tparam0(unw);
             if (jl_is_type_type(a)) {
@@ -144,11 +146,14 @@ static int sig_match_by_type_simple(jl_value_t **types, size_t n, jl_tupletype_t
             if (n - i != jl_unbox_long(jl_tparam1(decl)))
                 return 0;
         }
-        jl_value_t *t = jl_tparam0(decl);
+        jl_value_t *t = jl_unwrap_vararg(decl);
         if (jl_is_typevar(t))
             t = ((jl_tvar_t*)t)->ub;
         for (; i < n; i++) {
-            if (!jl_subtype(types[i], t))
+            jl_value_t *ti = types[i];
+            if (i == n - 1 && jl_is_vararg(ti))
+                ti = jl_unwrap_vararg(ti);
+            if (!jl_subtype(ti, t))
                 return 0;
         }
         return 1;
@@ -254,28 +259,28 @@ static int is_cache_leaf(jl_value_t *ty, int tparam)
     return (jl_is_concrete_type(ty) && (tparam || !jl_is_kind(ty)));
 }
 
-static jl_typemap_t **mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
+static _Atomic(jl_typemap_t*) *mtcache_hash_lookup_bp(jl_array_t *cache JL_PROPAGATES_ROOT, jl_value_t *ty) JL_NOTSAFEPOINT
 {
     if (cache == (jl_array_t*)jl_an_empty_vec_any)
         return NULL;
-    jl_typemap_t **pml = jl_table_peek_bp(cache, ty);
+    _Atomic(jl_typemap_t*) *pml = jl_table_peek_bp(cache, ty);
     JL_GC_PROMISE_ROOTED(pml); // clang-sa doesn't trust our JL_PROPAGATES_ROOT claim
     return pml;
 }
 
-static void mtcache_hash_insert(jl_array_t **cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
+static void mtcache_hash_insert(_Atomic(jl_array_t*) *cache, jl_value_t *parent, jl_value_t *key, jl_typemap_t *val)
 {
     int inserted = 0;
-    jl_array_t *a = *cache;
+    jl_array_t *a = jl_atomic_load_relaxed(cache);
     if (a == (jl_array_t*)jl_an_empty_vec_any) {
         a = jl_alloc_vec_any(16);
-        *cache = a;
+        jl_atomic_store_release(cache, a);
         jl_gc_wb(parent, a);
     }
     a = jl_eqtable_put(a, key, val, &inserted);
     assert(inserted);
-    if (a != *cache) {
-        *cache = a;
+    if (a != jl_atomic_load_relaxed(cache)) {
+        jl_atomic_store_release(cache, a);
         jl_gc_wb(parent, a);
     }
 }
@@ -294,7 +299,7 @@ static jl_typemap_t *mtcache_hash_lookup(jl_array_t *cache JL_PROPAGATES_ROOT, j
 static int jl_typemap_array_visitor(jl_array_t *a, jl_typemap_visitor_fptr fptr, void *closure)
 {
     size_t i, l = jl_array_len(a);
-    jl_typemap_t **data = (jl_typemap_t **)jl_array_data(a);
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
     for (i = 1; i < l; i += 2) {
         jl_value_t *d = jl_atomic_load_relaxed(&data[i]);
         JL_GC_PROMISE_ROOTED(d);
@@ -389,7 +394,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
 {
     JL_GC_PUSH1(&a);
     size_t i, l = jl_array_len(a);
-    jl_typemap_t **data = (jl_typemap_t **)jl_array_data(a);
+    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_data(a);
     unsigned height = tparam & 2 ? jl_supertype_height((jl_datatype_t*)ty) : 0;
     for (i = 0; i < l; i += 2) {
         jl_value_t *t = jl_atomic_load_relaxed(&data[i]);
@@ -397,7 +402,7 @@ static int jl_typemap_intersection_array_visitor(jl_array_t *a, jl_value_t *ty,
         if (t == jl_nothing || t == NULL)
             continue;
         if (tparam & 2) {
-            jl_typemap_t *ml = data[i + 1];
+            jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i + 1]);
             JL_GC_PROMISE_ROOTED(ml);
             if (ty == (jl_value_t*)jl_any_type || // easy case: Any always matches
                 tname_intersection((jl_datatype_t*)ty, (jl_typename_t*)t, height)) {
@@ -643,14 +648,14 @@ static jl_typemap_entry_t *jl_typemap_entry_assoc_by_type(
     jl_value_t *unw = jl_unwrap_unionall((jl_value_t*)types);
     int isua = jl_is_unionall(types);
     size_t n = jl_nparams(unw);
-    int typesisva = n == 0 ? 0 : jl_is_vararg_type(jl_tparam(unw, n-1));
+    int typesisva = n == 0 ? 0 : jl_is_vararg(jl_tparam(unw, n-1));
     for (; ml != (void*)jl_nothing; ml = jl_atomic_load_relaxed(&ml->next)) {
         size_t lensig = jl_nparams(jl_unwrap_unionall((jl_value_t*)ml->sig));
         if (lensig == n || (ml->va && lensig <= n+1)) {
             int resetenv = 0, ismatch = 1;
             if (ml->simplesig != (void*)jl_nothing && !isua) {
                 size_t lensimplesig = jl_nparams(ml->simplesig);
-                int isva = lensimplesig > 0 && jl_is_vararg_type(jl_tparam(ml->simplesig, lensimplesig - 1));
+                int isva = lensimplesig > 0 && jl_is_vararg(jl_tparam(ml->simplesig, lensimplesig - 1));
                 if (lensig == n || (isva && lensimplesig <= n + 1))
                     ismatch = sig_match_by_type_simple(jl_svec_data(((jl_datatype_t*)types)->parameters), n,
                                                        ml->simplesig, lensimplesig, isva);
@@ -722,8 +727,8 @@ static jl_typemap_entry_t *jl_typemap_entry_lookup_by_type(
         jl_value_t *b = jl_unwrap_unionall((jl_value_t*)ml->sig);
         size_t na = jl_nparams(a);
         size_t nb = jl_nparams(b);
-        int va_a = na > 0 && jl_is_vararg_type(jl_tparam(a, na - 1));
-        int va_b = nb > 0 && jl_is_vararg_type(jl_tparam(b, nb - 1));
+        int va_a = na > 0 && jl_is_vararg(jl_tparam(a, na - 1));
+        int va_b = nb > 0 && jl_is_vararg(jl_tparam(b, nb - 1));
         if (!va_a && !va_b) {
             if (na != nb)
                 continue;
@@ -766,7 +771,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
         // compute the type at offset `offs` into `types`, which may be a Vararg
         if (l <= offs + 1) {
             ty = jl_tparam(ttypes, l - 1);
-            if (jl_is_vararg_type(ty)) {
+            if (jl_is_vararg(ty)) {
                 ty = jl_unwrap_vararg(ty);
                 isva = 1;
             }
@@ -840,7 +845,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                     if (!ty || !jl_has_empty_intersection((jl_value_t*)jl_type_type, ty)) {
                         // couldn't figure out unique `a0` initial point, so scan all for matches
                         size_t i, l = jl_array_len(tname);
-                        jl_typemap_t **data = (jl_typemap_t **)jl_array_ptr_data(tname);
+                        _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
                         JL_GC_PUSH1(&tname);
                         for (i = 1; i < l; i += 2) {
                             jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -879,7 +884,7 @@ jl_typemap_entry_t *jl_typemap_assoc_by_type(
                 else {
                     // doing subtype, but couldn't figure out unique `ty`, so scan all for supertypes
                     size_t i, l = jl_array_len(name1);
-                    jl_typemap_t **data = (jl_typemap_t **)jl_array_ptr_data(name1);
+                    _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(name1);
                     JL_GC_PUSH1(&name1);
                     for (i = 1; i < l; i += 2) {
                         jl_typemap_t *ml = jl_atomic_load_relaxed(&data[i]);
@@ -949,7 +954,7 @@ jl_typemap_entry_t *jl_typemap_entry_assoc_exact(jl_typemap_entry_t *ml, jl_valu
         if (lensig == n || (ml->va && lensig <= n+1)) {
             if (ml->simplesig != (void*)jl_nothing) {
                 size_t lensimplesig = jl_nparams(ml->simplesig);
-                int isva = lensimplesig > 0 && jl_is_vararg_type(jl_tparam(ml->simplesig, lensimplesig - 1));
+                int isva = lensimplesig > 0 && jl_is_vararg(jl_tparam(ml->simplesig, lensimplesig - 1));
                 if (lensig == n || (isva && lensimplesig <= n + 1)) {
                     if (!sig_match_simple(arg1, args, n, jl_svec_data(ml->simplesig->parameters), isva, lensimplesig))
                         continue;
@@ -1029,10 +1034,10 @@ jl_typemap_entry_t *jl_typemap_level_assoc_exact(jl_typemap_level_t *cache, jl_v
             else {
                 // couldn't figure out unique `name` initial point, so must scan all for matches
                 size_t i, l = jl_array_len(tname);
-                jl_typemap_t **data = (jl_typemap_t **)jl_array_ptr_data(tname);
+                _Atomic(jl_typemap_t*) *data = (_Atomic(jl_typemap_t*)*)jl_array_ptr_data(tname);
                 JL_GC_PUSH1(&tname);
                 for (i = 1; i < l; i += 2) {
-                    jl_typemap_t *ml_or_cache = data[i];
+                    jl_typemap_t *ml_or_cache = jl_atomic_load_relaxed(&data[i]);
                     if (ml_or_cache == NULL || ml_or_cache == jl_nothing)
                         continue;
                     jl_typemap_entry_t *ml = jl_typemap_assoc_exact(ml_or_cache, arg1, args, n, offs + 1, world);
@@ -1077,43 +1082,40 @@ static unsigned jl_typemap_list_count_locked(jl_typemap_entry_t *ml) JL_NOTSAFEP
     unsigned count = 0;
     while (ml != (void*)jl_nothing) {
         count++;
-        ml = ml->next;
+        ml = jl_atomic_load_relaxed(&ml->next);
     }
     return count;
 }
 
-static void jl_typemap_level_insert_(jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs, const struct jl_typemap_info *tparams);
-static void jl_typemap_list_insert_sorted(
-        jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, const struct jl_typemap_info *tparams);
+static void jl_typemap_level_insert_(jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs);
 
 static jl_typemap_level_t *jl_new_typemap_level(void)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     jl_typemap_level_t *cache =
-        (jl_typemap_level_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_level_t),
+        (jl_typemap_level_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_level_t),
                                          jl_typemap_level_type);
-    cache->arg1 = (jl_array_t*)jl_an_empty_vec_any;
-    cache->targ = (jl_array_t*)jl_an_empty_vec_any;
-    cache->name1 = (jl_array_t*)jl_an_empty_vec_any;
-    cache->tname = (jl_array_t*)jl_an_empty_vec_any;
-    cache->linear = (jl_typemap_entry_t*)jl_nothing;
-    cache->any = jl_nothing;
+    jl_atomic_store_relaxed(&cache->arg1, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->targ, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->name1, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->tname, (jl_array_t*)jl_an_empty_vec_any);
+    jl_atomic_store_relaxed(&cache->linear, (jl_typemap_entry_t*)jl_nothing);
+    jl_atomic_store_relaxed(&cache->any, jl_nothing);
     return cache;
 }
 
 static jl_typemap_level_t *jl_method_convert_list_to_cache(
-        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, jl_typemap_entry_t *ml, int8_t offs)
 {
     jl_typemap_level_t *cache = jl_new_typemap_level();
     jl_typemap_entry_t *next = NULL;
     JL_GC_PUSH3(&cache, &next, &ml);
     while (ml != (void*)jl_nothing) {
-        next = ml->next;
-        ml->next = (jl_typemap_entry_t*)jl_nothing;
+        next = jl_atomic_load_relaxed(&ml->next);
+        jl_atomic_store_relaxed(&ml->next, (jl_typemap_entry_t*)jl_nothing);
+        // n.b. this is being done concurrently with lookups!
         // TODO: is it safe to be doing this concurrently with lookups?
-        jl_typemap_level_insert_(map, cache, ml, offs, tparams);
+        jl_typemap_level_insert_(map, cache, ml, offs);
         ml = next;
     }
     JL_GC_POP();
@@ -1121,59 +1123,62 @@ static jl_typemap_level_t *jl_method_convert_list_to_cache(
 }
 
 static void jl_typemap_list_insert_(
-        jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, _Atomic(jl_typemap_entry_t*) *pml, jl_value_t *parent,
+        jl_typemap_entry_t *newrec)
 {
-    if (*pml == (void*)jl_nothing || newrec->isleafsig || (tparams && tparams->unsorted)) {
-        newrec->next = *pml;
-        jl_gc_wb(newrec, newrec->next);
-        jl_atomic_store_release(pml, newrec);
-        jl_gc_wb(parent, newrec);
-    }
-    else {
-        jl_typemap_list_insert_sorted(map, pml, parent, newrec, tparams);
+    jl_typemap_entry_t *l = jl_atomic_load_relaxed(pml);
+    while ((jl_value_t*)l != jl_nothing) {
+        if (newrec->isleafsig || !l->isleafsig)
+            if (newrec->issimplesig || !l->issimplesig)
+                break;
+        pml = &l->next;
+        parent = (jl_value_t*)l;
+        l = jl_atomic_load_relaxed(&l->next);
     }
+    jl_atomic_store_relaxed(&newrec->next, l);
+    jl_gc_wb(newrec, l);
+    jl_atomic_store_release(pml, newrec);
+    jl_gc_wb(parent, newrec);
 }
 
 static void jl_typemap_insert_generic(
-        jl_typemap_t *map, jl_typemap_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, _Atomic(jl_typemap_t*) *pml, jl_value_t *parent,
+        jl_typemap_entry_t *newrec, int8_t offs)
 {
-    if (jl_typeof(*pml) == (jl_value_t*)jl_typemap_level_type) {
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs, tparams);
+    jl_typemap_t *ml = jl_atomic_load_relaxed(pml);
+    if (jl_typeof(ml) == (jl_value_t*)jl_typemap_level_type) {
+        jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
-    unsigned count = jl_typemap_list_count_locked((jl_typemap_entry_t*)*pml);
+    unsigned count = jl_typemap_list_count_locked((jl_typemap_entry_t*)ml);
     if (count > MAX_METHLIST_COUNT) {
-        *pml = (jl_typemap_t*)jl_method_convert_list_to_cache(
-            map, (jl_typemap_entry_t *)*pml,
-            offs, tparams);
-        jl_gc_wb(parent, *pml);
-        jl_typemap_level_insert_(map, (jl_typemap_level_t*)*pml, newrec, offs, tparams);
+        ml = (jl_typemap_t*)jl_method_convert_list_to_cache(
+            map, (jl_typemap_entry_t*)ml, offs);
+        jl_atomic_store_release(pml, ml);
+        jl_gc_wb(parent, ml);
+        jl_typemap_level_insert_(map, (jl_typemap_level_t*)ml, newrec, offs);
         return;
     }
 
-    jl_typemap_list_insert_(map, (jl_typemap_entry_t **)pml,
-        parent, newrec, tparams);
+    jl_typemap_list_insert_(map, (_Atomic(jl_typemap_entry_t*)*)pml,
+        parent, newrec);
 }
 
 static void jl_typemap_array_insert_(
-        jl_typemap_t *map, jl_array_t **cache, jl_value_t *key, jl_typemap_entry_t *newrec,
-        jl_value_t *parent, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, _Atomic(jl_array_t*) *pcache, jl_value_t *key, jl_typemap_entry_t *newrec,
+        jl_value_t *parent, int8_t offs)
 {
-    jl_typemap_t **pml = mtcache_hash_lookup_bp(*cache, key);
+    jl_array_t *cache = jl_atomic_load_relaxed(pcache);
+    _Atomic(jl_typemap_t*) *pml = mtcache_hash_lookup_bp(cache, key);
     if (pml != NULL)
-        jl_typemap_insert_generic(map, pml, (jl_value_t*)*cache, newrec, offs+1, tparams);
+        jl_typemap_insert_generic(map, pml, (jl_value_t*)cache, newrec, offs+1);
     else
-        mtcache_hash_insert(cache, parent, key, (jl_typemap_t*)newrec);
+        mtcache_hash_insert(pcache, parent, key, (jl_typemap_t*)newrec);
 }
 
 static void jl_typemap_level_insert_(
-        jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams)
+        jl_typemap_t *map, jl_typemap_level_t *cache, jl_typemap_entry_t *newrec, int8_t offs)
 {
     jl_value_t *ttypes = jl_unwrap_unionall((jl_value_t*)newrec->sig);
     size_t l = jl_nparams(ttypes);
@@ -1182,7 +1187,7 @@ static void jl_typemap_level_insert_(
     int isva = 0;
     if (l <= offs + 1) {
         t1 = jl_tparam(ttypes, l - 1);
-        if (jl_is_vararg_type(t1)) {
+        if (jl_is_vararg(t1)) {
             isva = 1;
             t1 = jl_unwrap_vararg(t1);
         }
@@ -1200,7 +1205,7 @@ static void jl_typemap_level_insert_(
         t1 = (jl_value_t*)jl_assume(jl_typeofbottom_type)->super;
     // If the type at `offs` is Any, put it in the Any list
     if (t1 && jl_is_any(t1)) {
-        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, offs+1, tparams);
+        jl_typemap_insert_generic(map, &cache->any, (jl_value_t*)cache, newrec, offs+1);
         return;
     }
     // Don't put Varargs in the optimized caches (too hard to handle in lookup and bp)
@@ -1211,12 +1216,12 @@ static void jl_typemap_level_insert_(
             // and we use the table indexed for that purpose.
             jl_value_t *a0 = jl_tparam0(t1);
             if (is_cache_leaf(a0, 1)) {
-                jl_typemap_array_insert_(map, &cache->targ, a0, newrec, (jl_value_t*)cache, offs, tparams);
+                jl_typemap_array_insert_(map, &cache->targ, a0, newrec, (jl_value_t*)cache, offs);
                 return;
             }
         }
         if (is_cache_leaf(t1, 0)) {
-            jl_typemap_array_insert_(map, &cache->arg1, t1, newrec, (jl_value_t*)cache, offs, tparams);
+            jl_typemap_array_insert_(map, &cache->arg1, t1, newrec, (jl_value_t*)cache, offs);
             return;
         }
 
@@ -1226,23 +1231,23 @@ static void jl_typemap_level_insert_(
         if (jl_is_type_type(t1)) {
             a0 = jl_type_extract_name(jl_tparam0(t1));
             jl_datatype_t *super = a0 ? (jl_datatype_t*)jl_unwrap_unionall(((jl_typename_t*)a0)->wrapper) : jl_any_type;
-            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, offs, tparams);
+            jl_typemap_array_insert_(map, &cache->tname, (jl_value_t*)super->name, newrec, (jl_value_t*)cache, offs);
             return;
         }
         a0 = jl_type_extract_name(t1);
         if (a0 && a0 != (jl_value_t*)jl_any_type->name) {
-            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, offs, tparams);
+            jl_typemap_array_insert_(map, &cache->name1, a0, newrec, (jl_value_t*)cache, offs);
             return;
         }
     }
-    jl_typemap_list_insert_(map, &cache->linear, (jl_value_t*)cache, newrec, tparams);
+    jl_typemap_list_insert_(map, &cache->linear, (jl_value_t*)cache, newrec);
 }
 
 jl_typemap_entry_t *jl_typemap_alloc(
         jl_tupletype_t *type, jl_tupletype_t *simpletype, jl_svec_t *guardsigs,
         jl_value_t *newvalue, size_t min_world, size_t max_world)
 {
-    jl_ptls_t ptls = jl_get_ptls_states();
+    jl_task_t *ct = jl_current_task;
     assert(min_world > 0 && max_world > 0);
     if (!simpletype)
         simpletype = (jl_tupletype_t*)jl_nothing;
@@ -1259,7 +1264,7 @@ jl_typemap_entry_t *jl_typemap_alloc(
             isleafsig = 0; // Type{} may have a higher priority than a kind
         else if (jl_is_type_type(decl))
             isleafsig = 0; // Type{} may need special processing to compute the match
-        else if (jl_is_vararg_type(decl))
+        else if (jl_is_vararg(decl))
             isleafsig = 0; // makes iteration easier when the endpoints are the same
         else if (decl == (jl_value_t*)jl_any_type)
             isleafsig = 0; // Any needs to go in the general cache
@@ -1268,13 +1273,13 @@ jl_typemap_entry_t *jl_typemap_alloc(
     }
 
     jl_typemap_entry_t *newrec =
-        (jl_typemap_entry_t*)jl_gc_alloc(ptls, sizeof(jl_typemap_entry_t),
+        (jl_typemap_entry_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typemap_entry_t),
                                          jl_typemap_entry_type);
     newrec->sig = type;
     newrec->simplesig = simpletype;
     newrec->func.value = newvalue;
     newrec->guardsigs = guardsigs;
-    newrec->next = (jl_typemap_entry_t*)jl_nothing;
+    jl_atomic_store_relaxed(&newrec->next, (jl_typemap_entry_t*)jl_nothing);
     newrec->min_world = min_world;
     newrec->max_world = max_world;
     newrec->va = isva;
@@ -1283,44 +1288,11 @@ jl_typemap_entry_t *jl_typemap_alloc(
     return newrec;
 }
 
-void jl_typemap_insert(jl_typemap_t **cache, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, int8_t offs,
-        const struct jl_typemap_info *tparams)
-{
-    jl_typemap_insert_generic(*cache, cache, parent, newrec, offs, tparams);
-}
-
-static void jl_typemap_list_insert_sorted(
-        jl_typemap_t *map, jl_typemap_entry_t **pml, jl_value_t *parent,
-        jl_typemap_entry_t *newrec, const struct jl_typemap_info *tparams)
+void jl_typemap_insert(_Atomic(jl_typemap_t *) *pcache, jl_value_t *parent,
+        jl_typemap_entry_t *newrec, int8_t offs)
 {
-    jl_typemap_entry_t *l, **pl;
-    pl = pml;
-    l = *pml;
-    jl_value_t *pa = parent;
-    while ((jl_value_t*)l != jl_nothing) {
-        if (!l->isleafsig) { // quickly ignore all of the leafsig entries (these were handled by caller)
-            if (jl_type_morespecific((jl_value_t*)newrec->sig, (jl_value_t*)l->sig)) {
-                if (l->simplesig == (void*)jl_nothing ||
-                    newrec->simplesig != (void*)jl_nothing ||
-                    !jl_types_equal((jl_value_t*)l->sig, (jl_value_t*)newrec->sig)) {
-                    // might need to insert multiple entries for a lookup differing only by their simplesig
-                    // when simplesig contains a kind
-                    // TODO: make this test more correct or figure out a better way to compute this
-                    break;
-                }
-            }
-        }
-        pl = &l->next;
-        pa = (jl_value_t*)l;
-        l = l->next;
-    }
-
-    // insert newrec at the first point it is more specific than the following method
-    newrec->next = l;
-    jl_gc_wb(newrec, l);
-    jl_atomic_store_release(pl, newrec);
-    jl_gc_wb(pa, newrec);
+    jl_typemap_t *cache = jl_atomic_load_relaxed(pcache);
+    jl_typemap_insert_generic(cache, pcache, parent, newrec, offs);
 }
 
 #ifdef __cplusplus
diff --git a/src/uprobes.d b/src/uprobes.d
new file mode 100644
index 00000000000000..29dc39598c1da6
--- /dev/null
+++ b/src/uprobes.d
@@ -0,0 +1,34 @@
+/* Julia DTrace provider */
+
+provider julia {
+    probe gc__begin(int collection);
+    probe gc__stop_the_world();
+    probe gc__mark__begin();
+    probe gc__mark__end(int64_t scanned_bytes, int64_t perm_scanned_bytes);
+    probe gc__sweep__begin(int full);
+    probe gc__sweep__end();
+    probe gc__end();
+    probe gc__finalizer();
+
+    probe rt__run__task(jl_task_t *task);
+    probe rt__pause__task(jl_task_t *task);
+    probe rt__new__task(jl_task_t *parent, jl_task_t *child);
+    probe rt__start__task(jl_task_t *task);
+    probe rt__finish__task(jl_task_t *task);
+    probe rt__start__process__events(jl_task_t *task);
+    probe rt__finish__process__events(jl_task_t *task);
+    probe rt__taskq__insert(jl_ptls_t ptls, jl_task_t *task);
+    probe rt__taskq__get(jl_ptls_t ptls, jl_task_t *task);
+    probe rt__sleep__check__wake(jl_ptls_t other, int8_t old_state);
+    probe rt__sleep__check__wakeup(jl_ptls_t ptls);
+    probe rt__sleep__check__sleep(jl_ptls_t ptls);
+    probe rt__sleep__check__taskq__wake(jl_ptls_t ptls);
+    probe rt__sleep__check__task__wake(jl_ptls_t ptls);
+    probe rt__sleep__check__uv__wake(jl_ptls_t ptls);
+};
+
+#pragma D attributes Evolving/Evolving/Common provider julia provider
+#pragma D attributes Evolving/Evolving/Common provider julia module
+#pragma D attributes Evolving/Evolving/Common provider julia function
+#pragma D attributes Evolving/Evolving/Common provider julia name
+#pragma D attributes Evolving/Evolving/Common provider julia args
diff --git a/src/utils.scm b/src/utils.scm
index 43c3214cf5ebb5..7be6b2999a90c2 100644
--- a/src/utils.scm
+++ b/src/utils.scm
@@ -78,6 +78,8 @@
         (else '())))
 
 (define (caddddr x) (car (cdr (cdr (cdr (cdr x))))))
+(define (cdddddr x) (cdr (cdr (cdr (cdr (cdr x))))))
+(define (cadddddr x) (car (cdddddr x)))
 
 (define (table.clone t)
   (let ((nt (table)))
@@ -93,3 +95,12 @@
         any
         (loop (cdr lst)
               (or (pred (car lst)) any)))))
+
+;; construct a table mapping each element of `lst` to its index (1-indexed)
+(define (symbol-to-idx-map lst)
+  (let ((tbl (table)))
+    (let loop ((xs lst) (i 1))
+      (if (pair? xs)
+          (begin (put! tbl (car xs) i)
+                 (loop (cdr xs) (+ i 1)))))
+    tbl))
diff --git a/src/support/win32_ucontext.c b/src/win32_ucontext.c
similarity index 97%
rename from src/support/win32_ucontext.c
rename to src/win32_ucontext.c
index df50eb209341ee..c6d43723080044 100644
--- a/src/support/win32_ucontext.c
+++ b/src/win32_ucontext.c
@@ -26,11 +26,9 @@ JL_DLLEXPORT EXCEPTION_DISPOSITION NTAPI __julia_personality(
 
     EXCEPTION_DISPOSITION rval;
     switch (jl_exception_handler(&ExceptionInfo)) {
-#ifndef _MSC_VER
         case EXCEPTION_EXECUTE_HANDLER:
             rval = ExceptionExecuteHandler;
             break;
-#endif
         case EXCEPTION_CONTINUE_EXECUTION:
             rval = ExceptionContinueExecution;
             break;
@@ -73,7 +71,7 @@ void jl_makecontext(win32_ucontext_t *ucp, void (*func)(void))
     jmpbuf->Rip = (unsigned long long)func;
     jmpbuf->Rsp = (unsigned long long)stack_top;
     jmpbuf->Rbp = 0;
-    jmpbuf->Frame = 0; // SEH frame
+    jmpbuf->Frame = ~(uint64_t)0; // SEH frame
 #elif defined(_CPU_X86_)
     jmpbuf->Eip = (unsigned long)func;
     jmpbuf->Esp = (unsigned long)stack_top;
diff --git a/src/support/win32_ucontext.h b/src/win32_ucontext.h
similarity index 95%
rename from src/support/win32_ucontext.h
rename to src/win32_ucontext.h
index 6730cb96ee873e..b856abdc26eef5 100644
--- a/src/support/win32_ucontext.h
+++ b/src/win32_ucontext.h
@@ -16,7 +16,7 @@ typedef struct {
         size_t ss_size;
     } uc_stack;
     jmp_buf uc_mcontext;
-#ifdef JL_TSAN_ENABLED
+#ifdef _COMPILER_TSAN_ENABLED_
     void *tsan_state;
 #endif
 } win32_ucontext_t;
diff --git a/stdlib/.gitignore b/stdlib/.gitignore
index 6679403487f0f3..ffbc2f12f52da2 100644
--- a/stdlib/.gitignore
+++ b/stdlib/.gitignore
@@ -7,3 +7,17 @@
 /LibCURL
 /Downloads-*
 /Downloads
+/ArgTools-*
+/ArgTools
+/Tar-*
+/Tar
+/NetworkOptions-*
+/NetworkOptions
+/SuiteSparse-*
+/SuiteSparse
+/SparseArrays-*
+/SparseArrays
+/SHA-*
+/SHA
+/*_jll/StdlibArtifacts.toml
+/*/Manifest.toml
diff --git a/stdlib/ArgTools.version b/stdlib/ArgTools.version
new file mode 100644
index 00000000000000..0ae273bb18db64
--- /dev/null
+++ b/stdlib/ArgTools.version
@@ -0,0 +1,4 @@
+ARGTOOLS_BRANCH = master
+ARGTOOLS_SHA1 = 08b11b2707593d4d7f92e5f1b9dba7668285ff82
+ARGTOOLS_GIT_URL := https://github.com/JuliaIO/ArgTools.jl.git
+ARGTOOLS_TAR_URL = https://api.github.com/repos/JuliaIO/ArgTools.jl/tarball/$1
diff --git a/stdlib/Artifacts/docs/src/index.md b/stdlib/Artifacts/docs/src/index.md
new file mode 100644
index 00000000000000..80f4c62cbf77fb
--- /dev/null
+++ b/stdlib/Artifacts/docs/src/index.md
@@ -0,0 +1,21 @@
+# Artifacts
+
+```@meta
+DocTestSetup = :(using Artifacts)
+```
+
+Starting with Julia 1.6, the artifacts support has moved from `Pkg.jl` to Julia itself.
+Until proper documentation can be added here, you can learn more about artifacts in the
+`Pkg.jl` manual at <https://julialang.github.io/Pkg.jl/v1/artifacts/>.
+
+!!! compat "Julia 1.6"
+    Julia's artifacts API requires at least Julia 1.6. In Julia
+    versions 1.3 to 1.5, you can use `Pkg.Artifacts` instead.
+
+
+```@docs
+Artifacts.artifact_meta
+Artifacts.artifact_hash
+Artifacts.find_artifacts_toml
+Artifacts.@artifact_str
+```
diff --git a/stdlib/Artifacts/src/Artifacts.jl b/stdlib/Artifacts/src/Artifacts.jl
index 531524ca32a07e..6d3bdb5fb674b4 100644
--- a/stdlib/Artifacts/src/Artifacts.jl
+++ b/stdlib/Artifacts/src/Artifacts.jl
@@ -1,20 +1,21 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 module Artifacts
 
 import Base: get, SHA1
 using Base.BinaryPlatforms, Base.TOML
 
 export artifact_exists, artifact_path, artifact_meta, artifact_hash,
-       find_artifacts_toml, @artifact_str
+       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
 
 """
     parse_toml(path::String)
 
-Uses Base.TOML to parse a TOML file
+Uses Base.TOML to parse a TOML file. Do not mutate the returned dictionary.
 """
 function parse_toml(path::String)
-    p = Base.TOML.Parser()
-    Base.TOML.reinit!(p, read(path, String); filepath=path)
-    return Base.TOML.parse(p)
+    # Uses the caching mechanics for toml files in Base
+    Base.parsed_toml(path)
 end
 
 # keep in sync with Base.project_names and Base.manifest_names
@@ -72,7 +73,7 @@ a `~/.julia/artifacts/Override.toml` file with the following contents:
 This file defines four overrides; two which override specific artifacts identified
 through their content hashes, two which override artifacts based on their bound names
 within a particular package's UUID.  In both cases, there are two different targets of
-the override: overriding to an on-disk location through an absolutet path, and
+the override: overriding to an on-disk location through an absolute path, and
 overriding to another artifact by its content-hash.
 """
 const ARTIFACT_OVERRIDES = Ref{Union{Dict{Symbol,Any},Nothing}}(nothing)
@@ -271,17 +272,17 @@ function unpack_platform(entry::Dict{String,Any}, name::String,
     end
 
     # Collect all String-valued mappings in `entry` and use them as tags
-    tags = Dict{Symbol, String}()
+    tags = Dict{String, String}()
     for (k, v) in entry
         if v isa String
-            tags[Symbol(k)] = v
+            tags[k] = v
         end
     end
     # Removing some known entries that shouldn't be passed through `tags`
-    delete!(tags, :os)
-    delete!(tags, :arch)
-    delete!(tags, Symbol("git-tree-sha1"))
-    return Platform(entry["arch"], entry["os"]; tags...)
+    delete!(tags, "os")
+    delete!(tags, "arch")
+    delete!(tags, "git-tree-sha1")
+    return Platform(entry["arch"], entry["os"], tags)
 end
 
 function pack_platform!(meta::Dict, p::AbstractPlatform)
@@ -407,7 +408,7 @@ end
 
 """
     artifact_hash(name::String, artifacts_toml::String;
-                  platform::AbstractPlatform = platform_key_abi())
+                  platform::AbstractPlatform = HostPlatform())
 
 Thin wrapper around `artifact_meta()` to return the hash of the specified, platform-
 collapsed artifact.  Returns `nothing` if no mapping can be found.
@@ -417,7 +418,7 @@ collapsed artifact.  Returns `nothing` if no mapping can be found.
 """
 function artifact_hash(name::String, artifacts_toml::String;
                        platform::AbstractPlatform = HostPlatform(),
-                       pkg_uuid::Union{Base.UUID,Nothing}=nothing)
+                       pkg_uuid::Union{Base.UUID,Nothing}=nothing)::Union{Nothing, SHA1}
     meta = artifact_meta(name, artifacts_toml; platform=platform)
     if meta === nothing
         return nothing
@@ -426,6 +427,50 @@ function artifact_hash(name::String, artifacts_toml::String;
     return SHA1(meta["git-tree-sha1"])
 end
 
+function select_downloadable_artifacts(artifact_dict::Dict, artifacts_toml::String;
+                                       platform::AbstractPlatform = HostPlatform(),
+                                       pkg_uuid::Union{Nothing,Base.UUID} = nothing,
+                                       include_lazy::Bool = false)
+    artifacts = Dict{String,Any}()
+    for name in keys(artifact_dict)
+        # Get the metadata about this name for the requested platform
+        meta = artifact_meta(name, artifact_dict, artifacts_toml; platform=platform)
+
+        # If there are no instances of this name for the desired platform, skip it
+        # Also skip if there's no `download` stanza (e.g. it's only a local artifact)
+        # or if it's lazy and we're not explicitly looking for lazy artifacts.
+        if meta === nothing || !haskey(meta, "download") || (get(meta, "lazy", false) && !include_lazy)
+            continue
+        end
+
+        # Else, welcome it into the meta-fold
+        artifacts[name] = meta
+    end
+    return artifacts
+end
+
+"""
+    select_downloadable_artifacts(artifacts_toml::String;
+                                  platform = HostPlatform,
+                                  include_lazy = false,
+                                  pkg_uuid = nothing)
+
+Returns a dictionary where every entry is an artifact from the given `Artifacts.toml`
+that should be downloaded for the requested platform.  Lazy artifacts are included if
+`include_lazy` is set.
+"""
+function select_downloadable_artifacts(artifacts_toml::String;
+                                       platform::AbstractPlatform = HostPlatform(),
+                                       include_lazy::Bool = false,
+                                       pkg_uuid::Union{Nothing,Base.UUID} = nothing)
+    if !isfile(artifacts_toml)
+        return Dict{String,Any}()
+    end
+    artifact_dict = load_artifacts_toml(artifacts_toml; pkg_uuid=pkg_uuid)
+    return select_downloadable_artifacts(artifact_dict, artifacts_toml; platform, pkg_uuid, include_lazy)
+end
+
+
 """
     find_artifacts_toml(path::String)
 
@@ -478,7 +523,7 @@ function jointail(dir, tail)
     end
 end
 
-function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash)
+function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dict, hash, platform, @nospecialize(lazyartifacts))
     if haskey(Base.module_keys, __module__)
         # Process overrides for this UUID, if we know what it is
         process_overrides(artifact_dict, Base.module_keys[__module__].uuid)
@@ -492,13 +537,21 @@ function _artifact_str(__module__, artifacts_toml, name, path_tail, artifact_dic
         end
     end
 
-    # If not, we need to download it.  We look up the Pkg module through `Base.loaded_modules()`
-    # then invoke `ensure_artifact_installed()`:
-    Pkg = first(filter(p-> p[1].name == "Pkg", Base.loaded_modules))[2]
-    return jointail(Pkg.Artifacts.ensure_artifact_installed(string(name), artifacts_toml), path_tail)
+    # If not, try determining what went wrong:
+    meta = artifact_meta(name, artifact_dict, artifacts_toml; platform)
+    if meta !== nothing && get(meta, "lazy", false)
+        if lazyartifacts isa Module && isdefined(lazyartifacts, :ensure_artifact_installed)
+            if nameof(lazyartifacts) in (:Pkg, :Artifacts)
+                Base.depwarn("using Pkg instead of using LazyArtifacts is deprecated", :var"@artifact_str", force=true)
+            end
+            return jointail(lazyartifacts.ensure_artifact_installed(string(name), artifacts_toml; platform), path_tail)
+        end
+        error("Artifact $(repr(name)) is a lazy artifact; package developers must call `using LazyArtifacts` in $(__module__) before using lazy artifacts.")
+    end
+    error("Artifact $(repr(name)) was not installed correctly. Try `using Pkg; Pkg.instantiate()` to re-install all missing resources.")
 end
 
-"""
+raw"""
     split_artifact_slash(name::String)
 
 Splits an artifact indexing string by path deliminters, isolates the first path element,
@@ -506,7 +559,7 @@ returning that and the `joinpath()` of the remaining arguments.  This normalizes
 separators to the native path separator for the current platform.  Examples:
 
 # Examples
-```jldoctest
+```jldoctest; setup = :(using Artifacts: split_artifact_slash)
 julia> split_artifact_slash("Foo")
 ("Foo", "")
 
@@ -539,18 +592,20 @@ function split_artifact_slash(name::String)
 end
 
 """
-    artifact_slash_lookup(name::String, artifacts_toml::String)
+    artifact_slash_lookup(name::String, atifact_dict::Dict,
+                          artifacts_toml::String, platform::Platform)
 
 Returns `artifact_name`, `artifact_path_tail`, and `hash` by looking the results up in
 the given `artifacts_toml`, first extracting the name and path tail from the given `name`
 to support slash-indexing within the given artifact.
 """
-function artifact_slash_lookup(name::String, artifact_dict::Dict, artifacts_toml::String)
+function artifact_slash_lookup(name::String, artifact_dict::Dict,
+                               artifacts_toml::String, platform::Platform)
     artifact_name, artifact_path_tail = split_artifact_slash(name)
 
-    meta = artifact_meta(artifact_name, artifact_dict, artifacts_toml)
+    meta = artifact_meta(artifact_name, artifact_dict, artifacts_toml; platform)
     if meta === nothing
-        error("Cannot locate artifact '$(name)' in '$(artifacts_toml)'")
+        error("Cannot locate artifact '$(name)' for $(triplet(platform)) in '$(artifacts_toml)'")
     end
     hash = SHA1(meta["git-tree-sha1"])
     return artifact_name, artifact_path_tail, hash
@@ -559,11 +614,15 @@ end
 """
     macro artifact_str(name)
 
-Macro that is used to automatically ensure an artifact is installed, and return its
-location on-disk.  Automatically looks the artifact up by name in the project's
-`(Julia)Artifacts.toml` file.  Throws an error on inability to install the requested
-artifact.  If run in the REPL, searches for the toml file starting in the current
-directory, see `find_artifacts_toml()` for more.
+Return the on-disk path to an artifact. Automatically looks the artifact up by
+name in the project's `(Julia)Artifacts.toml` file. Throws an error on if the
+requested artifact is not present. If run in the REPL, searches for the toml
+file starting in the current directory, see `find_artifacts_toml()` for more.
+
+If the artifact is marked "lazy" and the package has `using LazyArtifacts`
+defined, the artifact will be downloaded on-demand with `Pkg` the first time
+this macro tries to compute the path. The files will then be left installed
+locally for later.
 
 If `name` contains a forward or backward slash, all elements after the first slash will
 be taken to be path names indexing into the artifact, allowing for an easy one-liner to
@@ -577,7 +636,7 @@ access a single file/directory within an artifact.  Example:
 !!! compat "Julia 1.6"
     Slash-indexing requires at least Julia 1.6.
 """
-macro artifact_str(name)
+macro artifact_str(name, platform=nothing)
     # Find Artifacts.toml file we're going to load from
     srcfile = string(__source__.file)
     if ((isinteractive() && startswith(srcfile, "REPL[")) || (!isinteractive() && srcfile == "none")) && !isfile(srcfile)
@@ -601,18 +660,33 @@ macro artifact_str(name)
     # Invalidate calling .ji file if Artifacts.toml file changes
     Base.include_dependency(artifacts_toml)
 
-    # If `name` is a constant, we can actually load and parse the `Artifacts.toml` file now,
-    # saving the work from runtime.
-    if isa(name, AbstractString)
+    # Check if the user has provided `LazyArtifacts`, and thus supports lazy artifacts
+    # If not, check to see if `Pkg` or `Pkg.Artifacts` has been imported.
+    lazyartifacts = nothing
+    for module_name in (:LazyArtifacts, :Pkg, :Artifacts)
+        if isdefined(__module__, module_name)
+            lazyartifacts = GlobalRef(__module__, module_name)
+            break
+        end
+    end
+
+    # If `name` is a constant, (and we're using the default `Platform`) we can actually load
+    # and parse the `Artifacts.toml` file now, saving the work from runtime.
+    if isa(name, AbstractString) && platform === nothing
         # To support slash-indexing, we need to split the artifact name from the path tail:
-        local artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml)
+        platform = HostPlatform()
+        artifact_name, artifact_path_tail, hash = artifact_slash_lookup(name, artifact_dict, artifacts_toml, platform)
         return quote
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash))
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), $(artifact_name), $(artifact_path_tail), $(artifact_dict), $(hash), $(platform), $(lazyartifacts))::String
         end
     else
+        if platform === nothing
+            platform = :($(HostPlatform)())
+        end
         return quote
-            local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml))
-            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash)
+            local platform = $(esc(platform))
+            local artifact_name, artifact_path_tail, hash = artifact_slash_lookup($(esc(name)), $(artifact_dict), $(artifacts_toml), platform)
+            Base.invokelatest(_artifact_str, $(__module__), $(artifacts_toml), artifact_name, artifact_path_tail, $(artifact_dict), hash, platform, $(lazyartifacts))::String
         end
     end
 end
@@ -633,6 +707,10 @@ artifact_meta(name::AbstractString, artifact_dict::Dict, artifacts_toml::Abstrac
     artifact_meta(String(name)::String, artifact_dict, String(artifacts_toml)::String; kwargs...)
 artifact_hash(name::AbstractString, artifacts_toml::AbstractString; kwargs...) =
     artifact_hash(String(name)::String, String(artifacts_toml)::String; kwargs...)
+select_downloadable_artifacts(artifact_dict::Dict, artifacts_toml::AbstractString; kwargs...) =
+    select_downloadable_artifacts(artifact_dict, String(artifacts_toml)::String, kwargs...)
+select_downloadable_artifacts(artifacts_toml::AbstractString; kwargs...) =
+    select_downloadable_artifacts(String(artifacts_toml)::String, kwargs...)
 find_artifacts_toml(path::AbstractString) =
     find_artifacts_toml(String(path)::String)
 split_artifact_slash(name::AbstractString) =
@@ -640,4 +718,9 @@ split_artifact_slash(name::AbstractString) =
 artifact_slash_lookup(name::AbstractString, artifact_dict::Dict, artifacts_toml::AbstractString) =
     artifact_slash_lookup(String(name)::String, artifact_dict, String(artifacts_toml)::String)
 
+# Precompilation to reduce latency
+precompile(load_artifacts_toml, (String,))
+precompile(NamedTuple{(:pkg_uuid,)}, (Tuple{Base.UUID},))
+precompile(Core.kwfunc(load_artifacts_toml), (NamedTuple{(:pkg_uuid,), Tuple{Base.UUID}}, typeof(load_artifacts_toml), String))
+
 end # module Artifacts
diff --git a/stdlib/Artifacts/test/Artifacts.toml b/stdlib/Artifacts/test/Artifacts.toml
index ee0fffb9ed92a2..4b715b74c128b2 100644
--- a/stdlib/Artifacts/test/Artifacts.toml
+++ b/stdlib/Artifacts/test/Artifacts.toml
@@ -1,129 +1,146 @@
-[[c_simple]]
-arch = "armv7l"
-git-tree-sha1 = "0c509b3302db90a9393d6036c3ffcd14d190523d"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
+os = "macos"
+
+    [[HelloWorldC.download]]
+    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
 libc = "glibc"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "b0cfa3a2d9b5bc0632b0ee45b5d049eecbf72ed9c8cbc968b374ea995257a635"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.arm-linux-gnueabihf.tar.gz"
-
-[[c_simple]]
-arch = "x86_64"
-git-tree-sha1 = "e5a893fdac080fa0d4ae1cbd8bd67cfba5945af2"
-os = "freebsd"
-
-    [[c_simple.download]]
-    sha256 = "fde6e4ed00227b98e25ffdbf4e2b8b24a4e2bfa4c532c733d3626d6157e448ce"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.x86_64-unknown-freebsd11.1.tar.gz"
+    [[HelloWorldC.download]]
+    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
+libc = "musl"
+os = "linux"
 
-[[c_simple]]
-arch = "x86_64"
-git-tree-sha1 = "7ba74e239348ea6c060f994c083260be3abe3095"
-os = "macos"
+    [[HelloWorldC.download]]
+    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "armv6l"
+call_abi = "eabihf"
+git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
+libc = "glibc"
+os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "e88816a1492eecb4569bb24b3e52b757e59c87419dba962e99148b338369f326"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.x86_64-apple-darwin14.tar.gz"
-
-# NOTE: We explicitly comment this out, to test porous platform support.  Don't un-comment this!
-#[[c_simple]]
-#arch = "powerpc64le"
-#git-tree-sha1 = "dc9f84891c8215f90095b619533e141179b6cc06"
-#libc = "glibc"
-#os = "linux"
-#
-#    [[c_simple.download]]
-#    sha256 = "715af8f0405cff35feef5ad5e93836bb1bb0f93c77218bfdad411c8a4368ab4b"
-#    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.powerpc64le-linux-gnu.tar.gz"
-
-[[c_simple]]
-arch = "i686"
-git-tree-sha1 = "78e282b79c16febc54a56ed244088ff92a55533f"
+    [[HelloWorldC.download]]
+    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
+[[HelloWorldC]]
+arch = "armv6l"
+call_abi = "eabihf"
+git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
 libc = "musl"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "900f2e55f72af0c723f9db7e9f44b1c16155010de212b430f02091dc24ff324c"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.i686-linux-musl.tar.gz"
+    [[HelloWorldC.download]]
+    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
+[[HelloWorldC]]
+arch = "armv7l"
+call_abi = "eabihf"
+git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+libc = "glibc"
+os = "linux"
 
-[[c_simple]]
-arch = "x86_64"
-git-tree-sha1 = "9d0075fdafe8af6430afba41fea2f32811141145"
+    [[HelloWorldC.download]]
+    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
+[[HelloWorldC]]
+arch = "armv7l"
+call_abi = "eabihf"
+git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
 libc = "musl"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "2769be12e00ebb0a3c7ab43b90b71bba3a6883844416457e08b880945d129689"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.x86_64-linux-musl.tar.gz"
-
-[[c_simple]]
+    [[HelloWorldC.download]]
+    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
+[[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "0c890d3e6c5ee00fd06a7d418fad424159e447ce"
+git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
 libc = "glibc"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "45d42cbb5cfafefeadfd46cd91445466d0e245f1f640cd4f91cdae01a654e001"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.i686-linux-gnu.tar.gz"
+    [[HelloWorldC.download]]
+    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "i686"
+git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
+libc = "musl"
+os = "linux"
 
-[[c_simple]]
+    [[HelloWorldC.download]]
+    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
+[[HelloWorldC]]
 arch = "i686"
-git-tree-sha1 = "956a97e2d56c0fa3b634f57e10a174dff0052ba4"
+git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
 os = "windows"
 
-    [[c_simple.download]]
-    sha256 = "b0214fa6f48359f2cf328b2ec2255ed975939939333db03711f63671c5d4fed9"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.i686-w64-mingw32.tar.gz"
-
-[[c_simple]]
-arch = "x86_64"
-git-tree-sha1 = "0ed63482ad1916dba12b4959d2704af4e41252da"
+    [[HelloWorldC.download]]
+    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
+[[HelloWorldC]]
+arch = "powerpc64le"
+git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
 libc = "glibc"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "edbaf461c5c33fd7030bcd197b849396f8328648a2e04462b1bea9650f782a3b"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.x86_64-linux-gnu.tar.gz"
-
-[[c_simple]]
+    [[HelloWorldC.download]]
+    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
+[[HelloWorldC]]
 arch = "x86_64"
-git-tree-sha1 = "444cecb70ff39e8961dd33e230e151775d959f37"
-os = "windows"
-
-    [[c_simple.download]]
-    sha256 = "39b75afda9f0619f042c47bef2cdd0931a33c5c5acb7dc2977f1cd6274835c1f"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.x86_64-w64-mingw32.tar.gz"
-
-[[c_simple]]
-arch = "aarch64"
-git-tree-sha1 = "efc8df78802fae852abd8e213e4b6f3f1da48125"
-libc = "musl"
-os = "linux"
-
-    [[c_simple.download]]
-    sha256 = "53f54d76b4f9edd2a5b8c20575ef9f6a05c524a454c9126f4ecaa83a8aa72f52"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.aarch64-linux-musl.tar.gz"
+git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+os = "macos"
 
-[[c_simple]]
-arch = "aarch64"
-git-tree-sha1 = "ca19bcae2bc6af88d6ace2648c7cc639b3cf1dfb"
+    [[HelloWorldC.download]]
+    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
 libc = "glibc"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "94e303d2d779734281b60ef1880ad0e12681a2d3d6eed3a3ee3129a3efb016f7"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.aarch64-linux-gnu.tar.gz"
-
-[[c_simple]]
-arch = "armv7l"
-git-tree-sha1 = "0d8cebd76188d1bade6057b70605e553bbdfdd02"
+    [[HelloWorldC.download]]
+    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
 libc = "musl"
 os = "linux"
 
-    [[c_simple.download]]
-    sha256 = "da1a04400ca8bcf51d2c39783e1fcc7d51fba5cf4f328d6bff2512ea5342532b"
-    url = "https://github.com/JuliaBinaryWrappers/c_simple_jll.jl/releases/download/c_simple+v1.2.3-pkgtest/c_simple.v1.2.3.arm-linux-musleabihf.tar.gz"
+    [[HelloWorldC.download]]
+    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
+os = "freebsd"
+
+    [[HelloWorldC.download]]
+    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
+os = "windows"
+
+    [[HelloWorldC.download]]
+    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
 
 [socrates]
 git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
@@ -135,4 +152,4 @@ lazy = true
 
     [[socrates.download]]
     url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2"
-    sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
\ No newline at end of file
+    sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
diff --git a/stdlib/Artifacts/test/refresh_artifacts.jl b/stdlib/Artifacts/test/refresh_artifacts.jl
new file mode 100644
index 00000000000000..7078912c000724
--- /dev/null
+++ b/stdlib/Artifacts/test/refresh_artifacts.jl
@@ -0,0 +1,23 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Artifacts: with_artifacts_directory
+using Pkg.Artifacts: load_artifacts_toml, ensure_artifact_installed
+let
+    toml = joinpath(@__DIR__, "Artifacts.toml")
+    unused = Base.BinaryPlatforms.Platform(string(Sys.ARCH), "linux")
+    with_artifacts_directory(ARGS[1]) do
+        # ensure_all_artifacts_installed(toml; include_lazy=false)
+        dict = load_artifacts_toml(toml)
+        for (name, meta) in dict
+            if meta isa Array
+                for meta in meta
+                    get(meta, "lazy", false) && continue
+                    ensure_artifact_installed(name, meta, toml; platform=unused, io = devnull)
+                end
+            else; meta::Dict
+                get(meta, "lazy", false) && continue
+                ensure_artifact_installed(name, meta, toml; platform=unused, io = devnull)
+            end
+        end
+    end
+end
diff --git a/stdlib/Artifacts/test/runtests.jl b/stdlib/Artifacts/test/runtests.jl
index b81acabca31750..7527b548061ce4 100644
--- a/stdlib/Artifacts/test/runtests.jl
+++ b/stdlib/Artifacts/test/runtests.jl
@@ -1,6 +1,12 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Artifacts, Test, Base.BinaryPlatforms
 using Artifacts: with_artifacts_directory, pack_platform!, unpack_platform
 
+# prepare for the package tests by ensuring the required artifacts are downloaded now
+artifacts_dir = mktempdir()
+run(addenv(`$(Base.julia_cmd()) --color=no $(joinpath(@__DIR__, "refresh_artifacts.jl")) $(artifacts_dir)`, "TERM"=>"dumb"))
+
 @testset "Artifact Paths" begin
     mktempdir() do tempdir
         with_artifacts_directory(tempdir) do
@@ -76,33 +82,85 @@ end
 end
 
 @testset "Artifact Slash-indexing" begin
-    mktempdir() do tempdir
-        with_artifacts_directory(tempdir) do
-            exeext = Sys.iswindows() ? ".exe" : ""
-
-            # simple lookup, gives us the directory for `c_simple` for the current architecture
-            c_simple_dir = artifact"c_simple"
-            @test isdir(c_simple_dir)
-            c_simple_exe_path = joinpath(c_simple_dir, "bin", "c_simple$(exeext)")
-            @test isfile(c_simple_exe_path)
-
-            # Simple slash-indexed lookup
-            c_simple_bin_path = artifact"c_simple/bin"
-            @test isdir(c_simple_bin_path)
-            # Test that forward and backward slash are equivalent
-            @test artifact"c_simple\\bin" == artifact"c_simple/bin"
-
-            # Dynamically-computed lookup; not done at compile-time
-            generate_artifact_name() = "c_simple"
-            c_simple_dir = @artifact_str(generate_artifact_name())
-            @test isdir(c_simple_dir)
-            c_simple_exe_path = joinpath(c_simple_dir, "bin", "c_simple$(exeext)")
-            @test isfile(c_simple_exe_path)
-
-            # Dynamically-computed slash-indexing:
-            generate_bin_path(pathsep) = "c_simple$(pathsep)bin$(pathsep)c_simple$(exeext)"
-            @test isfile(@artifact_str(generate_bin_path("/")))
-            @test isfile(@artifact_str(generate_bin_path("\\")))
+    with_artifacts_directory(artifacts_dir) do
+        exeext = Sys.iswindows() ? ".exe" : ""
+
+        # simple lookup, gives us the directory for `HelloWorldC` for the current architecture
+        HelloWorldC_dir = artifact"HelloWorldC"
+        @test isdir(HelloWorldC_dir)
+        HelloWorldC_exe_path = joinpath(HelloWorldC_dir, "bin", "hello_world$(exeext)")
+        @test isfile(HelloWorldC_exe_path)
+
+        # Simple slash-indexed lookup
+        HelloWorldC_bin_path = artifact"HelloWorldC/bin"
+        @test isdir(HelloWorldC_bin_path)
+        # Test that forward and backward slash are equivalent
+        @test artifact"HelloWorldC\\bin" == artifact"HelloWorldC/bin"
+
+        # Dynamically-computed lookup; not done at compile-time
+        generate_artifact_name() = "HelloWorldC"
+        HelloWorldC_dir = @artifact_str(generate_artifact_name())
+        @test isdir(HelloWorldC_dir)
+        HelloWorldC_exe_path = joinpath(HelloWorldC_dir, "bin", "hello_world$(exeext)")
+        @test isfile(HelloWorldC_exe_path)
+
+        # Dynamically-computed slash-indexing:
+        generate_bin_path(pathsep) = "HelloWorldC$(pathsep)bin$(pathsep)hello_world$(exeext)"
+        @test isfile(@artifact_str(generate_bin_path("/")))
+        @test isfile(@artifact_str(generate_bin_path("\\")))
+    end
+end
+
+@testset "@artifact_str Platform passing" begin
+    with_artifacts_directory(artifacts_dir) do
+        win64 = Platform("x86_64", "windows")
+        mac64 = Platform("x86_64", "macos")
+        @test basename(@artifact_str("HelloWorldC", win64)) == "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
+        @test basename(@artifact_str("HelloWorldC", mac64)) == "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+    end
+end
+
+@testset "select_downloadable_artifacts()" begin
+    armv7l_linux = Platform("armv7l", "linux")
+    artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux)
+    @test length(keys(artifacts)) == 1
+    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+
+    artifacts = select_downloadable_artifacts(joinpath(@__DIR__, "Artifacts.toml"); platform=armv7l_linux, include_lazy=true)
+    @test length(keys(artifacts)) == 2
+    @test artifacts["HelloWorldC"]["git-tree-sha1"] == "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+    @test artifacts["socrates"]["git-tree-sha1"] == "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
+end
+
+@testset "@artifact_str install errors" begin
+    for imports in ("Artifacts, Pkg", "Pkg, Pkg.Artifacts", "Pkg.Artifacts")
+        mktempdir() do tempdir
+            with_artifacts_directory(tempdir) do
+                ex = @test_throws ErrorException artifact"HelloWorldC"
+                @test startswith(ex.value.msg, "Artifact \"HelloWorldC\" was not installed correctly. ")
+                ex = @test_throws ErrorException artifact"socrates"
+                @test startswith(ex.value.msg, "Artifact \"socrates\" is a lazy artifact; ")
+
+                # Can install if we load `Pkg` or `Pkg.Artifacts`
+                anon = Module(:__anon__)
+                Core.eval(anon, Meta.parse("using $(imports), Test"))
+                # Ensure that we get the expected exception, since this test runs with --depwarn=error
+                Core.eval(anon, quote
+                    try
+                        artifact"socrates"
+                        @assert false "this @artifact_str macro invocation should have failed!"
+                    catch e
+                        @test startswith("using Pkg instead of using LazyArtifacts is deprecated", e.msg)
+                    end
+                end)
+            end
         end
     end
 end
+
+@testset "`Artifacts.artifact_names` and friends" begin
+    n = length(Artifacts.artifact_names)
+    @test length(Base.project_names) == n
+    @test length(Base.manifest_names) == n
+    @test length(Base.preferences_names) == n
+end
diff --git a/stdlib/Base64/src/Base64.jl b/stdlib/Base64/src/Base64.jl
index 3f5a8f369130a8..108faa18f5b856 100644
--- a/stdlib/Base64/src/Base64.jl
+++ b/stdlib/Base64/src/Base64.jl
@@ -3,7 +3,8 @@
 """
     Base64
 
-Functionality for base-64 encoded strings and IO.
+Functionality for [base64 encoding and decoding](https://en.wikipedia.org/wiki/Base64),
+a method to represent binary data using text, common on the web.
 """
 module Base64
 
diff --git a/stdlib/Base64/src/decode.jl b/stdlib/Base64/src/decode.jl
index c66f8ad9904b85..056293528e1427 100644
--- a/stdlib/Base64/src/decode.jl
+++ b/stdlib/Base64/src/decode.jl
@@ -150,7 +150,6 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest)
             b4 = decode(read(input, UInt8))
         else
             b4 = BASE64_CODE_END
-            break
         end
     end
 
@@ -158,13 +157,13 @@ function decode_slow(b1, b2, b3, b4, buffer, i, input, ptr, n, rest)
     k = 0
     if b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 < 0x40
         k = 3
-    elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && b4 == BASE64_CODE_PAD
+    elseif b1 < 0x40 && b2 < 0x40 && b3 < 0x40 && (b4 == BASE64_CODE_PAD || b4 == BASE64_CODE_END)
         b4 = 0x00
         k = 2
-    elseif b1 < 0x40 && b2 < 0x40 && b3 == b4 == BASE64_CODE_PAD
+    elseif b1 < 0x40 && b2 < 0x40 && (b3 == BASE64_CODE_PAD || b3 == BASE64_CODE_END) && (b4 == BASE64_CODE_PAD || b4 == BASE64_CODE_END)
         b3 = b4 = 0x00
         k = 1
-    elseif b1 == b2 == b3 == BASE64_CODE_IGN && b4 == BASE64_CODE_END
+    elseif b1 == b2 == b3 == b4 == BASE64_CODE_END
         b1 = b2 = b3 = b4 = 0x00
     else
         throw(ArgumentError("malformed base64 sequence"))
diff --git a/stdlib/Base64/test/runtests.jl b/stdlib/Base64/test/runtests.jl
index ba6e178f2917ce..11d0a3cca4348e 100644
--- a/stdlib/Base64/test/runtests.jl
+++ b/stdlib/Base64/test/runtests.jl
@@ -87,6 +87,21 @@ const longDecodedText = "name = \"Genie\"\nuuid = \"c43c736e-a2d1-11e8-161f-af95
 
     # issue #32397
     @test String(base64decode(longEncodedText)) == longDecodedText;
+
+    # Optional padding
+    @test base64decode("AQ==") == base64decode("AQ")
+    @test base64decode("zzzzAQ==") == base64decode("zzzzAQ")
+    @test base64decode("AQI=") == base64decode("AQI")
+
+    # Too short, 6 bits do not cover a full byte.
+    @test_throws ArgumentError base64decode("a")
+    @test_throws ArgumentError base64decode("a===")
+    @test_throws ArgumentError base64decode("ZZZZa")
+    @test_throws ArgumentError base64decode("ZZZZa===")
+
+    # Bit padding should be ignored, which means that `jl` and `jk` should give the same result.
+    @test base64decode("jl") == base64decode("jk") == base64decode("jk==") == [142]
+    @test base64decode("Aa") == base64decode("AS") == base64decode("AS==") == [1]
 end
 
 @testset "Random data" begin
diff --git a/stdlib/CompilerSupportLibraries_jll/Project.toml b/stdlib/CompilerSupportLibraries_jll/Project.toml
new file mode 100644
index 00000000000000..877a1ab5b005c3
--- /dev/null
+++ b/stdlib/CompilerSupportLibraries_jll/Project.toml
@@ -0,0 +1,20 @@
+name = "CompilerSupportLibraries_jll"
+uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+
+# NOTE: When updating this, also make sure to update the value
+# `CSL_NEXT_GLIBCXX_VERSION` in `deps/csl.mk`, to properly disable
+# automatic usage of BB-built CSLs on extremely up-to-date systems!
+version = "0.5.2+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
new file mode 100644
index 00000000000000..1b2c0cd41cbe20
--- /dev/null
+++ b/stdlib/CompilerSupportLibraries_jll/src/CompilerSupportLibraries_jll.jl
@@ -0,0 +1,75 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/CompilerSupportLibraries_jll.jl
+
+baremodule CompilerSupportLibraries_jll
+using Base, Libdl, Base.BinaryPlatforms
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libgfortran, libstdcxx, libgomp
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libgfortran_handle = C_NULL
+libgfortran_path = ""
+libstdcxx_handle = C_NULL
+libstdcxx_path = ""
+libgomp_handle = C_NULL
+libgomp_path = ""
+
+if Sys.iswindows()
+    if arch(HostPlatform()) == "x86_64"
+        const libgcc_s = "libgcc_s_seh-1.dll"
+    else
+        const libgcc_s = "libgcc_s_sjlj-1.dll"
+    end
+    const libgfortran = string("libgfortran-", libgfortran_version(HostPlatform()).major, ".dll")
+    const libstdcxx = "libstdc++-6.dll"
+    const libgomp = "libgomp-1.dll"
+elseif Sys.isapple()
+    if arch(HostPlatform()) == "aarch64"
+        const libgcc_s = "@rpath/libgcc_s.1.1.dylib"
+    else
+        const libgcc_s = "@rpath/libgcc_s.1.dylib"
+    end
+    const libgfortran = string("@rpath/", "libgfortran.", libgfortran_version(HostPlatform()).major, ".dylib")
+    const libstdcxx = "@rpath/libstdc++.6.dylib"
+    const libgomp = "@rpath/libgomp.1.dylib"
+else
+    const libgcc_s = "libgcc_s.so.1"
+    const libgfortran = string("libgfortran.so.", libgfortran_version(HostPlatform()).major)
+    const libstdcxx = "libstdc++.so.6"
+    const libgomp = "libgomp.so.1"
+end
+
+function __init__()
+    global libgcc_s_handle = dlopen(libgcc_s)
+    global libgcc_s_path = dlpath(libgcc_s_handle)
+    global libgfortran_handle = dlopen(libgfortran)
+    global libgfortran_path = dlpath(libgfortran_handle)
+    global libstdcxx_handle = dlopen(libstdcxx)
+    global libstdcxx_path = dlpath(libstdcxx_handle)
+    global libgomp_handle = dlopen(libgomp)
+    global libgomp_path = dlpath(libgomp_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libgcc_s_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libgfortran_path() = libgfortran_path
+get_libstdcxx_path() = libstdcxx_path
+get_libgomp_path() = libgomp_path
+
+end  # module CompilerSupportLibraries_jll
diff --git a/stdlib/CompilerSupportLibraries_jll/test/runtests.jl b/stdlib/CompilerSupportLibraries_jll/test/runtests.jl
new file mode 100644
index 00000000000000..85cf132c3a5bd5
--- /dev/null
+++ b/stdlib/CompilerSupportLibraries_jll/test/runtests.jl
@@ -0,0 +1,10 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, CompilerSupportLibraries_jll
+
+@testset "CompilerSupportLibraries_jll" begin
+    @test isfile(CompilerSupportLibraries_jll.libgcc_s_path)
+    @test isfile(CompilerSupportLibraries_jll.libgfortran_path)
+    @test isfile(CompilerSupportLibraries_jll.libstdcxx_path)
+    @test isfile(CompilerSupportLibraries_jll.libgomp_path)
+end
diff --git a/stdlib/Dates/docs/src/index.md b/stdlib/Dates/docs/src/index.md
index 99ab0fbff4fd56..4975f175bbf16a 100644
--- a/stdlib/Dates/docs/src/index.md
+++ b/stdlib/Dates/docs/src/index.md
@@ -83,15 +83,24 @@ julia> Date(Dates.Month(7),Dates.Year(2013))
 [`Date`](@ref) or [`DateTime`](@ref) parsing is accomplished by the use of format strings. Format
 strings work by the notion of defining *delimited* or *fixed-width* "slots" that contain a period
 to parse and passing the text to parse and format string to a [`Date`](@ref) or [`DateTime`](@ref)
-constructor, of the form `Date("2015-01-01","y-m-d")` or `DateTime("20150101","yyyymmdd")`.
+constructor, of the form `Date("2015-01-01",dateformat"y-m-d")` or
+`DateTime("20150101",dateformat"yyyymmdd")`.
 
 Delimited slots are marked by specifying the delimiter the parser should expect between two subsequent
 periods; so `"y-m-d"` lets the parser know that between the first and second slots in a date string
 like `"2014-07-16"`, it should find the `-` character. The `y`, `m`, and `d` characters let the
 parser know which periods to parse in each slot.
 
+As in the case of constructors above such as `Date(2013)`, delimited `DateFormat`s allow for
+missing parts of dates and times so long as the preceding parts are given. The other parts are given the usual
+default values.  For example, `Date("1981-03", dateformat"y-m-d")` returns `1981-03-01`, whilst
+`Date("31/12", dateformat"d/m/y")` gives `0001-12-31`.  (Note that the default year is
+1 AD/CE.)
+Consequently, an empty string will always return `0001-01-01` for `Date`s,
+and `0001-01-01T00:00:00.000` for `DateTime`s.
+
 Fixed-width slots are specified by repeating the period character the number of times corresponding
-to the width with no delimiter between characters. So `"yyyymmdd"` would correspond to a date
+to the width with no delimiter between characters. So `dateformat"yyyymmdd"` would correspond to a date
 string like `"20140716"`. The parser distinguishes a fixed-width slot by the absence of a delimiter,
 noting the transition `"yyyymm"` from one period character to the next.
 
@@ -102,10 +111,16 @@ supported, so `u` corresponds to "Jan", "Feb", "Mar", etc. And `U` corresponds t
 custom locales can be loaded by passing in the `locale=>Dict{String,Int}` mapping to the `MONTHTOVALUEABBR`
 and `MONTHTOVALUE` dicts for abbreviated and full-name month names, respectively.
 
-One note on parsing performance: using the `Date(date_string,format_string)` function is fine
-if only called a few times. If there are many similarly formatted date strings to parse however,
-it is much more efficient to first create a [`Dates.DateFormat`](@ref), and pass it instead of
-a raw format string.
+The above examples used the `dateformat""` string macro. This macro creates a `DateFormat` object once when
+the macro is expanded and uses the same `DateFormat` object even if a code snippet is run multiple times.
+
+```jldoctest
+julia> for i = 1:10^5
+           Date("2015-01-01", dateformat"y-m-d")
+       end
+```
+
+Or you can create the DateFormat object explicitly:
 
 ```jldoctest
 julia> df = DateFormat("y-m-d");
@@ -117,13 +132,35 @@ julia> dt2 = Date("2015-01-02",df)
 2015-01-02
 ```
 
-You can also use the `dateformat""` string macro. This macro creates the `DateFormat` object once when the macro is expanded and uses the same `DateFormat` object even if a code snippet is run multiple times.
+Alternatively, use broadcasting:
 
 ```jldoctest
-julia> for i = 1:10^5
-           Date("2015-01-01", dateformat"y-m-d")
-       end
-```
+julia> years = ["2015", "2016"];
+
+julia> Date.(years, DateFormat("yyyy"))
+2-element Vector{Date}:
+ 2015-01-01
+ 2016-01-01
+```
+
+For convenience, you may pass the format string directly (e.g., `Date("2015-01-01","y-m-d")`),
+although this form incurs performance costs if you are parsing the same format repeatedly, as
+it internally creates a new `DateFormat` object each time.
+
+As well as via the constructors, a `Date` or `DateTime` can be constructed from
+strings using the [`parse`](@ref) and [`tryparse`](@ref) functions, but with
+an optional third argument of type `DateFormat` specifying the format; for example,
+`parse(Date, "06.23.2013", dateformat"m.d.y")`, or
+`tryparse(DateTime, "1999-12-31T23:59:59")` which uses the default format.
+The notable difference between the functions is that with [`tryparse`](@ref),
+an error is not thrown if the string is in an invalid format;
+instead `nothing` is returned.  Note however that as with the constructors
+above, empty date and time parts assume
+default values and consequently an empty string (`""`) is valid
+for _any_ `DateFormat`, giving for example a `Date` of `0001-01-01`.  Code
+relying on `parse` or `tryparse` for `Date` and `DateTime` parsing should
+therefore also check whether parsed strings are empty before using the
+result.
 
 A full suite of parsing and formatting tests and examples is available in [`stdlib/Dates/test/io.jl`](https://github.com/JuliaLang/julia/blob/master/stdlib/Dates/test/io.jl).
 
@@ -220,8 +257,7 @@ julia> Dates.Day(t)
 31 days
 ```
 
-Compound methods are provided, as they provide a measure of efficiency if multiple fields are
-needed at the same time:
+Compound methods are provided because it is more efficient to access multiple fields at the same time than individually:
 
 ```jldoctest tdate
 julia> Dates.yearmonth(t)
@@ -350,7 +386,7 @@ calculation in a conversation. Why all the fuss about this? Let's take a classic
 1 month to January 31st, 2014. What's the answer? Javascript will say [March 3](https://markhneedham.com/blog/2009/01/07/javascript-add-a-month-to-a-date/)
 (assumes 31 days). PHP says [March 2](https://stackoverflow.com/questions/5760262/php-adding-months-to-a-date-while-not-exceeding-the-last-day-of-the-month)
 (assumes 30 days). The fact is, there is no right answer. In the `Dates` module, it gives
-the result of February 28th. How does it figure that out? I like to think of the classic 7-7-7
+the result of February 28th. How does it figure that out? Consider the classic 7-7-7
 gambling game in casinos.
 
 Now just imagine that instead of 7-7-7, the slots are Year-Month-Day, or in our example, 2014-01-31.
@@ -543,6 +579,26 @@ julia> Dates.value(Dates.Millisecond(10))
 10
 ```
 
+Representing periods or durations that are not integer multiples of the basic types can be achieved
+with the [`Dates.CompoundPeriod`](@ref) type. Compound periods may be constructed manually from simple
+[`Period`](@ref) types. Additionally, the [`canonicalize`](@ref) function can be used to break down a
+period into a [`Dates.CompoundPeriod`](@ref). This is particularly useful to convert a duration, e.g.,
+a difference of two `DateTime`, into a more convenient representation.
+
+```jldoctest
+julia> cp = Dates.CompoundPeriod(Day(1),Minute(1))
+1 day, 1 minute
+
+julia> t1 = DateTime(2018,8,8,16,58,00)
+2018-08-08T16:58:00
+
+julia> t2 = DateTime(2021,6,23,10,00,00)
+2021-06-23T10:00:00
+
+julia> canonicalize(t2-t1) # creates a CompoundPeriod
+149 weeks, 6 days, 17 hours, 2 minutes
+```
+
 ## Rounding
 
 [`Date`](@ref) and [`DateTime`](@ref) values can be rounded to a specified resolution (e.g., 1
@@ -669,6 +725,8 @@ Dates.Time(::Int64::Int64, ::Int64, ::Int64, ::Int64, ::Int64)
 Dates.Time(::Dates.TimePeriod)
 Dates.Time(::Function, ::Any...)
 Dates.Time(::Dates.DateTime)
+Dates.Time(::AbstractString, ::AbstractString)
+Dates.Time(::AbstractString, ::Dates.DateFormat)
 Dates.now()
 Dates.now(::Type{Dates.UTC})
 Base.eps(::Union{Type{DateTime}, Type{Date}, Type{Time}, TimeType})
@@ -746,8 +804,10 @@ Dates.toprev(::Function, ::Dates.TimeType)
 ```@docs
 Dates.Period(::Any)
 Dates.CompoundPeriod(::Vector{<:Dates.Period})
+Dates.canonicalize
 Dates.value
 Dates.default
+Dates.periods
 ```
 
 ### Rounding Functions
@@ -822,6 +882,15 @@ Months of the Year:
 | `November`  | `Nov` | 11          |
 | `December`  | `Dec` | 12          |
 
+#### Common Date Formatters
+
+```@docs
+ISODateTimeFormat
+ISODateFormat
+ISOTimeFormat
+RFC1123Format
+```
+
 ```@meta
 DocTestSetup = nothing
 ```
diff --git a/stdlib/Dates/src/Dates.jl b/stdlib/Dates/src/Dates.jl
index 15276a5a3be556..6164216cbd1af5 100644
--- a/stdlib/Dates/src/Dates.jl
+++ b/stdlib/Dates/src/Dates.jl
@@ -32,7 +32,7 @@ for more information.
 """
 module Dates
 
-import Base: ==, div, fld, mod, rem, gcd, lcm, +, -, *, /, %, broadcast
+import Base: ==, isless, div, fld, mod, rem, gcd, lcm, +, -, *, /, %, broadcast
 using Printf: @sprintf
 
 using Base.Iterators
diff --git a/stdlib/Dates/src/adjusters.jl b/stdlib/Dates/src/adjusters.jl
index 4687b22badad54..d5617ba8cf93c7 100644
--- a/stdlib/Dates/src/adjusters.jl
+++ b/stdlib/Dates/src/adjusters.jl
@@ -2,10 +2,12 @@
 
 ### truncation
 Base.trunc(dt::Date, p::Type{Year}) = Date(UTD(totaldays(year(dt), 1, 1)))
+Base.trunc(dt::Date, p::Type{Quarter}) = firstdayofquarter(dt)
 Base.trunc(dt::Date, p::Type{Month}) = firstdayofmonth(dt)
 Base.trunc(dt::Date, p::Type{Day}) = dt
 
 Base.trunc(dt::DateTime, p::Type{Year}) = DateTime(trunc(Date(dt), Year))
+Base.trunc(dt::DateTime, p::Type{Quarter}) = DateTime(trunc(Date(dt), Quarter))
 Base.trunc(dt::DateTime, p::Type{Month}) = DateTime(trunc(Date(dt), Month))
 Base.trunc(dt::DateTime, p::Type{Day}) = DateTime(Date(dt))
 Base.trunc(dt::DateTime, p::Type{Hour}) = dt - Minute(dt) - Second(dt) - Millisecond(dt)
diff --git a/stdlib/Dates/src/io.jl b/stdlib/Dates/src/io.jl
index 316d6d8a66fd2f..7e007ced0bbee3 100644
--- a/stdlib/Dates/src/io.jl
+++ b/stdlib/Dates/src/io.jl
@@ -150,7 +150,7 @@ struct Decimal3 end
     len = ii - i
     if len > 3
         ms, r = divrem(ms0, Int64(10) ^ (len - 3))
-        r == 0 || throw(InexactError(:convert, Decimal3, ms0))
+        r == 0 || return nothing
     else
         ms = ms0 * Int64(10) ^ (3 - len)
     end
@@ -332,6 +332,23 @@ const CONVERSION_TRANSLATIONS = IdDict{Type, Any}(
     Time => (Hour, Minute, Second, Millisecond, Microsecond, Nanosecond, AMPM),
 )
 
+# The `DateFormat(format, locale)` method just below consumes the following Regex.
+# Constructing this Regex is fairly expensive; doing so in the method itself can
+# consume half or better of `DateFormat(format, locale)`'s runtime. So instead we
+# construct and cache it outside the method body. Note, however, that when
+# `keys(CONVERSION_SPECIFIERS)` changes, the cached Regex must be updated accordingly;
+# hence the mutability (Ref-ness) of the cache, the helper method with which to populate
+# the cache, the cache of the hash of `keys(CONVERSION_SPECIFIERS)` (to facilitate checking
+# for changes), and the lock (to maintain consistency of these objects across threads when
+# threads simultaneously modify `CONVERSION_SPECIFIERS` and construct `DateFormat`s).
+function compute_dateformat_regex(conversion_specifiers)
+    letters = String(collect(keys(conversion_specifiers)))
+    return Regex("(?<!\\\\)([\\Q$letters\\E])\\1*")
+end
+const DATEFORMAT_REGEX_LOCK = ReentrantLock()
+const DATEFORMAT_REGEX_HASH = Ref(hash(keys(CONVERSION_SPECIFIERS)))
+const DATEFORMAT_REGEX_CACHE = Ref(compute_dateformat_regex(CONVERSION_SPECIFIERS))
+
 """
     DateFormat(format::AbstractString, locale="english") -> DateFormat
 
@@ -367,8 +384,9 @@ When parsing a time with a `p` specifier, any hour (either `H` or `I`) is interp
 as a 12-hour clock, so the `I` code is mainly useful for output.
 
 Creating a DateFormat object is expensive. Whenever possible, create it once and use it many times
-or try the `dateformat""` string macro. Using this macro creates the DateFormat object once at
-macro expansion time and reuses it later. see [`@dateformat_str`](@ref).
+or try the [`dateformat""`](@ref @dateformat_str) string macro. Using this macro creates the DateFormat
+object once at macro expansion time and reuses it later. There are also several [pre-defined formatters](@ref
+Common-Date-Formatters), listed later.
 
 See [`DateTime`](@ref) and [`format`](@ref) for how to use a DateFormat object to parse and write Date strings
 respectively.
@@ -378,8 +396,20 @@ function DateFormat(f::AbstractString, locale::DateLocale=ENGLISH)
     prev = ()
     prev_offset = 1
 
-    letters = String(collect(keys(CONVERSION_SPECIFIERS)))
-    for m in eachmatch(Regex("(?<!\\\\)([\\Q$letters\\E])\\1*"), f)
+    # To understand this block, please see the comments attached to the definitions of
+    # DATEFORMAT_REGEX_LOCK, DATEFORMAT_REGEX_HASH, and DATEFORMAT_REGEX_CACHE.
+    lock(DATEFORMAT_REGEX_LOCK)
+    try
+        dateformat_regex_hash = hash(keys(CONVERSION_SPECIFIERS))
+        if dateformat_regex_hash != DATEFORMAT_REGEX_HASH[]
+            DATEFORMAT_REGEX_HASH[] = dateformat_regex_hash
+            DATEFORMAT_REGEX_CACHE[] = compute_dateformat_regex(CONVERSION_SPECIFIERS)
+        end
+    finally
+        unlock(DATEFORMAT_REGEX_LOCK)
+    end
+
+    for m in eachmatch(DATEFORMAT_REGEX_CACHE[], f)
         tran = replace(f[prev_offset:prevind(f, m.offset)], r"\\(.)" => s"\1")
 
         if !isempty(prev)
@@ -443,14 +473,63 @@ macro dateformat_str(str)
 end
 
 # Standard formats
+
+"""
+    Dates.ISODateTimeFormat
+
+Describes the ISO8601 formatting for a date and time. This is the default value for `Dates.format`
+of a `DateTime`.
+
+# Example
+```jldoctest
+julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), ISODateTimeFormat)
+"2018-08-08T12:00:43.001"
+```
+"""
 const ISODateTimeFormat = DateFormat("yyyy-mm-dd\\THH:MM:SS.s")
+default_format(::Type{DateTime}) = ISODateTimeFormat
+
+"""
+    Dates.ISODateFormat
+
+Describes the ISO8601 formatting for a date. This is the default value for `Dates.format` of a `Date`.
+
+# Example
+```jldoctest
+julia> Dates.format(Date(2018, 8, 8), ISODateFormat)
+"2018-08-08"
+```
+"""
 const ISODateFormat = DateFormat("yyyy-mm-dd")
+default_format(::Type{Date}) = ISODateFormat
+
+"""
+    Dates.ISOTimeFormat
+
+Describes the ISO8601 formatting for a time. This is the default value for `Dates.format` of a `Time`.
+
+# Example
+```jldoctest
+julia> Dates.format(Time(12, 0, 43, 1), ISOTimeFormat)
+"12:00:43.001"
+```
+"""
 const ISOTimeFormat = DateFormat("HH:MM:SS.s")
+default_format(::Type{Time}) = ISOTimeFormat
+
+"""
+    Dates.RFC1123Format
+
+Describes the RFC1123 formatting for a date and time.
+
+# Example
+```jldoctest
+julia> Dates.format(DateTime(2018, 8, 8, 12, 0, 43, 1), RFC1123Format)
+"Wed, 08 Aug 2018 12:00:43"
+```
+"""
 const RFC1123Format = DateFormat("e, dd u yyyy HH:MM:SS")
 
-default_format(::Type{DateTime}) = ISODateTimeFormat
-default_format(::Type{Date}) = ISODateFormat
-default_format(::Type{Time}) = ISOTimeFormat
 
 ### API
 
@@ -460,22 +539,37 @@ const Locale = Union{DateLocale, String}
     DateTime(dt::AbstractString, format::AbstractString; locale="english") -> DateTime
 
 Construct a `DateTime` by parsing the `dt` date time string following the
-pattern given in the `format` string.
+pattern given in the `format` string (see [`DateFormat`](@ref)  for syntax).
+
+!!! note
+    This method creates a `DateFormat` object each time it is called. It is recommended
+    that you create a [`DateFormat`](@ref) object instead and use that as the second
+    argument to avoid performance loss when using the same format repeatedly.
+
+# Example
+```jldoctest
+julia> DateTime("2020-01-01", "yyyy-mm-dd")
+2020-01-01T00:00:00
+
+julia> a = ("2020-01-01", "2020-01-02");
 
-This method creates a `DateFormat` object each time it is called. If you are
-parsing many date time strings of the same format, consider creating a
-[`DateFormat`](@ref) object once and using that as the second argument instead.
+julia> [DateTime(d, dateformat"yyyy-mm-dd") for d ∈ a] # preferred
+2-element Vector{DateTime}:
+ 2020-01-01T00:00:00
+ 2020-01-02T00:00:00
+```
 """
 function DateTime(dt::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
     return parse(DateTime, dt, DateFormat(format, locale))
 end
 
 """
-    DateTime(dt::AbstractString, df::DateFormat) -> DateTime
+    DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) -> DateTime
 
 Construct a `DateTime` by parsing the `dt` date time string following the
-pattern given in the [`DateFormat`](@ref) object. Similar to
-`DateTime(::AbstractString, ::AbstractString)` but more efficient when
+pattern given in the [`DateFormat`](@ref) object, or $ISODateTimeFormat if omitted.
+
+Similar to `DateTime(::AbstractString, ::AbstractString)` but more efficient when
 repeatedly parsing similarly formatted date time strings with a pre-created
 `DateFormat` object.
 """
@@ -485,20 +579,39 @@ DateTime(dt::AbstractString, df::DateFormat=ISODateTimeFormat) = parse(DateTime,
     Date(d::AbstractString, format::AbstractString; locale="english") -> Date
 
 Construct a `Date` by parsing the `d` date string following the pattern given
-in the `format` string.
+in the `format` string (see [`DateFormat`](@ref) for syntax).
+
+!!! note
+    This method creates a `DateFormat` object each time it is called. It is recommended
+    that you create a [`DateFormat`](@ref) object instead and use that as the second
+    argument to avoid performance loss when using the same format repeatedly.
+
+# Example
+```jldoctest
+julia> Date("2020-01-01", "yyyy-mm-dd")
+2020-01-01
 
-This method creates a `DateFormat` object each time it is called. If you are
-parsing many date strings of the same format, consider creating a
-[`DateFormat`](@ref) object once and using that as the second argument instead.
+julia> a = ("2020-01-01", "2020-01-02");
+
+julia> [Date(d, dateformat"yyyy-mm-dd") for d ∈ a] # preferred
+2-element Vector{Date}:
+ 2020-01-01
+ 2020-01-02
+```
 """
 function Date(d::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
     parse(Date, d, DateFormat(format, locale))
 end
 
 """
-    Date(d::AbstractString, df::DateFormat) -> Date
+    Date(d::AbstractString, df::DateFormat=ISODateFormat) -> Date
+
+Construct a `Date` by parsing the `d` date string following the
+pattern given in the [`DateFormat`](@ref) object, or $ISODateFormat if omitted.
 
-Parse a date from a date string `d` using a `DateFormat` object `df`.
+Similar to `Date(::AbstractString, ::AbstractString)` but more efficient when
+repeatedly parsing similarly formatted date strings with a pre-created
+`DateFormat` object.
 """
 Date(d::AbstractString, df::DateFormat=ISODateFormat) = parse(Date, d, df)
 
@@ -506,20 +619,39 @@ Date(d::AbstractString, df::DateFormat=ISODateFormat) = parse(Date, d, df)
     Time(t::AbstractString, format::AbstractString; locale="english") -> Time
 
 Construct a `Time` by parsing the `t` time string following the pattern given
-in the `format` string.
+in the `format` string (see [`DateFormat`](@ref) for syntax).
 
-This method creates a `DateFormat` object each time it is called. If you are
-parsing many time strings of the same format, consider creating a
-[`DateFormat`](@ref) object once and using that as the second argument instead.
+!!! note
+    This method creates a `DateFormat` object each time it is called. It is recommended
+    that you create a [`DateFormat`](@ref) object instead and use that as the second
+    argument to avoid performance loss when using the same format repeatedly.
+
+# Example
+```jldoctest
+julia> Time("12:34pm", "HH:MMp")
+12:34:00
+
+julia> a = ("12:34pm", "2:34am");
+
+julia> [Time(d, dateformat"HH:MMp") for d ∈ a] # preferred
+2-element Vector{Time}:
+ 12:34:00
+ 02:34:00
+```
 """
 function Time(t::AbstractString, format::AbstractString; locale::Locale=ENGLISH)
     parse(Time, t, DateFormat(format, locale))
 end
 
 """
-    Time(t::AbstractString, df::DateFormat) -> Time
+    Time(t::AbstractString, df::DateFormat=ISOTimeFormat) -> Time
 
-Parse a time from a time string `t` using a `DateFormat` object `df`.
+Construct a `Time` by parsing the `t` date time string following the
+pattern given in the [`DateFormat`](@ref) object, or $ISOTimeFormat if omitted.
+
+Similar to `Time(::AbstractString, ::AbstractString)` but more efficient when
+repeatedly parsing similarly formatted time strings with a pre-created
+`DateFormat` object.
 """
 Time(t::AbstractString, df::DateFormat=ISOTimeFormat) = parse(Time, t, df)
 
diff --git a/stdlib/Dates/src/parse.jl b/stdlib/Dates/src/parse.jl
index 07a65a73b70c33..a5bbc686c955d5 100644
--- a/stdlib/Dates/src/parse.jl
+++ b/stdlib/Dates/src/parse.jl
@@ -282,7 +282,7 @@ function Base.parse(::Type{T}, str::AbstractString, df::DateFormat=default_forma
     val = tryparsenext_internal(T, str, pos, len, df, true)
     @assert val !== nothing
     values, endpos = val
-    return T(values...)
+    return T(values...)::T
 end
 
 function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_format(T)) where T<:TimeType
@@ -292,7 +292,7 @@ function Base.tryparse(::Type{T}, str::AbstractString, df::DateFormat=default_fo
     values, endpos = res
     if validargs(T, values...) === nothing
         # TODO: validargs gets called twice, since it's called again in the T constructor
-        return T(values...)
+        return T(values...)::T
     end
     return nothing
 end
diff --git a/stdlib/Dates/src/periods.jl b/stdlib/Dates/src/periods.jl
index 27d6099d78459f..021e91924ce595 100644
--- a/stdlib/Dates/src/periods.jl
+++ b/stdlib/Dates/src/periods.jl
@@ -70,9 +70,7 @@ default(p::Union{T,Type{T}}) where {T<:TimePeriod} = T(0)
 
 (-)(x::P) where {P<:Period} = P(-value(x))
 ==(x::P, y::P) where {P<:Period} = value(x) == value(y)
-==(x::Period, y::Period) = (==)(promote(x, y)...)
 Base.isless(x::P, y::P) where {P<:Period} = isless(value(x), value(y))
-Base.isless(x::Period, y::Period) = isless(promote(x, y)...)
 
 # Period Arithmetic, grouped by dimensionality:
 for op in (:+, :-, :lcm, :gcd)
@@ -97,6 +95,11 @@ end
 (*)(A::Period, B::AbstractArray) = Broadcast.broadcast_preserving_zero_d(*, A, B)
 (*)(A::AbstractArray, B::Period) = Broadcast.broadcast_preserving_zero_d(*, A, B)
 
+for op in (:(==), :isless, :/, :rem, :mod, :lcm, :gcd)
+    @eval ($op)(x::Period, y::Period) = ($op)(promote(x, y)...)
+end
+div(x::Period, y::Period, r::RoundingMode) = div(promote(x, y)..., r)
+
 # intfuncs
 Base.gcdx(a::T, b::T) where {T<:Period} = ((g, x, y) = gcdx(value(a), value(b)); return T(g), x, y)
 Base.abs(a::T) where {T<:Period} = T(abs(value(a)))
@@ -196,6 +199,16 @@ struct CompoundPeriod <: AbstractTime
     end
 end
 
+"""
+    Dates.periods(::CompoundPeriod) -> Vector{Period}
+
+Return the `Vector` of `Period`s that comprise the given `CompoundPeriod`.
+
+!!! compat "Julia 1.7"
+    This function requires Julia 1.7 or later.
+"""
+periods(x::CompoundPeriod) = x.periods
+
 """
     CompoundPeriod(periods) -> CompoundPeriod
 
@@ -250,6 +263,7 @@ julia> Dates.canonicalize(Dates.CompoundPeriod(Dates.Minute(50000)))
 4 weeks, 6 days, 17 hours, 20 minutes
 ```
 """
+canonicalize(x::Period) = canonicalize(CompoundPeriod(x))
 function canonicalize(x::CompoundPeriod)
     # canonicalize Periods by pushing "overflow" into a coarser period.
     p = x.periods
@@ -346,6 +360,9 @@ function Base.string(x::CompoundPeriod)
 end
 Base.show(io::IO,x::CompoundPeriod) = print(io, string(x))
 
+Base.convert(::Type{T}, x::CompoundPeriod) where T<:Period =
+    isconcretetype(T) ? sum(T, x.periods) : throw(MethodError(convert,(T,x)))
+
 # E.g. Year(1) + Day(1)
 (+)(x::Period,y::Period) = CompoundPeriod(Period[x, y])
 (+)(x::CompoundPeriod, y::Period) = CompoundPeriod(vcat(x.periods, y))
@@ -472,9 +489,20 @@ Base.hash(x::Year, h::UInt) = hash(12 * value(x), h + otherperiod_seed(x))
 Base.hash(x::Quarter, h::UInt) = hash(3 * value(x), h + otherperiod_seed(x))
 Base.hash(x::Month, h::UInt) = hash(value(x), h + otherperiod_seed(x))
 
+function Base.hash(x::CompoundPeriod, h::UInt)
+    isempty(x.periods) && return hash(0, h + zero_or_fixedperiod_seed)
+    for p in x.periods
+        h = hash(p, h)
+    end
+    return h
+end
+
 Base.isless(x::FixedPeriod, y::OtherPeriod) = throw(MethodError(isless, (x, y)))
 Base.isless(x::OtherPeriod, y::FixedPeriod) = throw(MethodError(isless, (x, y)))
 
+Base.isless(x::Period, y::CompoundPeriod) = CompoundPeriod(x) < y
+Base.isless(x::CompoundPeriod, y::Period) = x < CompoundPeriod(y)
+Base.isless(x::CompoundPeriod, y::CompoundPeriod) = tons(x) < tons(y)
 # truncating conversions to milliseconds, nanoseconds and days:
 # overflow can happen for periods longer than ~300,000 years
 toms(c::Nanosecond)  = div(value(c), 1000000)
diff --git a/stdlib/Dates/src/ranges.jl b/stdlib/Dates/src/ranges.jl
index 018a3a46fd8b52..3939d3661ec66e 100644
--- a/stdlib/Dates/src/ranges.jl
+++ b/stdlib/Dates/src/ranges.jl
@@ -24,10 +24,11 @@ end
 Base.length(r::StepRange{<:TimeType}) = isempty(r) ? Int64(0) : len(r.start, r.stop, r.step) + 1
 # Period ranges hook into Int64 overflow detection
 Base.length(r::StepRange{<:Period}) = length(StepRange(value(r.start), value(r.step), value(r.stop)))
+Base.checked_length(r::StepRange{<:Period}) = Base.checked_length(StepRange(value(r.start), value(r.step), value(r.stop)))
 
-# Overload Base.steprange_last because `rem` is not overloaded for `TimeType`s
+# Overload Base.steprange_last because `step::Period` may be a variable amount of time (e.g. for Month and Year)
 function Base.steprange_last(start::T, step, stop) where T<:TimeType
-    if isa(step,AbstractFloat)
+    if isa(step, AbstractFloat)
         throw(ArgumentError("StepRange should not be used with floating point"))
     end
     z = zero(step)
@@ -47,7 +48,7 @@ function Base.steprange_last(start::T, step, stop) where T<:TimeType
             last = stop - remain
         end
     end
-    last
+    return last
 end
 
 import Base.in
diff --git a/stdlib/Dates/src/types.jl b/stdlib/Dates/src/types.jl
index 4c338d5b765d7c..0cdac884fb7fe8 100644
--- a/stdlib/Dates/src/types.jl
+++ b/stdlib/Dates/src/types.jl
@@ -19,7 +19,34 @@ abstract type AbstractTime end
 `Period` types represent discrete, human representations of time.
 """
 abstract type Period     <: AbstractTime end
+
+"""
+    DatePeriod
+    Year
+    Quarter
+    Month
+    Week
+    Day
+
+Intervals of time greater than or equal to a day.
+Conventional comparisons between `DatePeriod`s are not all valid.
+(eg `Week(1) == Day(7)`, but `Year(1) != Day(365)`)
+"""
 abstract type DatePeriod <: Period end
+
+"""
+    TimePeriod
+    Hour
+    Minute
+    Second
+    Millisecond
+    Microsecond
+    Nanosecond
+
+Intervals of time less than a day.
+Conversions between all `TimePeriod`s are permissible.
+(eg `Hour(1) == Minute(60) == Second(3600)`)
+"""
 abstract type TimePeriod <: Period end
 
 for T in (:Year, :Quarter, :Month, :Week, :Day)
@@ -156,7 +183,7 @@ function totaldays(y, m, d)
 end
 
 # If the year is divisible by 4, except for every 100 years, except for every 400 years
-isleapyear(y) = ((y % 4 == 0) && (y % 100 != 0)) || (y % 400 == 0)
+isleapyear(y) = (y % 4 == 0) && ((y % 100 != 0) || (y % 400 == 0))
 
 # Number of days in month
 const DAYSINMONTH = (31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31)
@@ -448,7 +475,7 @@ function Base.Timer(delay::Period; interval::Period=Second(0))
     Timer(toms(delay) / 1000, interval=toms(interval) / 1000)
 end
 
-function Base.timedwait(testcb::Function, timeout::Period; pollint::Period=Millisecond(100))
+function Base.timedwait(testcb, timeout::Period; pollint::Period=Millisecond(100))
     timedwait(testcb, toms(timeout) / 1000, pollint=toms(pollint) / 1000)
 end
 
diff --git a/stdlib/Dates/test/adjusters.jl b/stdlib/Dates/test/adjusters.jl
index 3caf7ba5497c2f..145db53785502e 100644
--- a/stdlib/Dates/test/adjusters.jl
+++ b/stdlib/Dates/test/adjusters.jl
@@ -8,10 +8,12 @@ using Dates
 @testset "trunc" begin
     dt = Dates.Date(2012, 12, 21)
     @test trunc(dt, Dates.Year) == Dates.Date(2012)
+    @test trunc(dt, Dates.Quarter) == Dates.Date(2012, 10)
     @test trunc(dt, Dates.Month) == Dates.Date(2012, 12)
     @test trunc(dt, Dates.Day) == Dates.Date(2012, 12, 21)
     dt = Dates.DateTime(2012, 12, 21, 16, 30, 20, 200)
     @test trunc(dt, Dates.Year) == Dates.DateTime(2012)
+    @test trunc(dt, Dates.Quarter) == Dates.DateTime(2012, 10)
     @test trunc(dt, Dates.Month) == Dates.DateTime(2012, 12)
     @test trunc(dt, Dates.Day) == Dates.DateTime(2012, 12, 21)
     @test trunc(dt, Dates.Hour) == Dates.DateTime(2012, 12, 21, 16)
diff --git a/stdlib/Dates/test/arithmetic.jl b/stdlib/Dates/test/arithmetic.jl
index a1a6884d4b0c36..485fea56240660 100644
--- a/stdlib/Dates/test/arithmetic.jl
+++ b/stdlib/Dates/test/arithmetic.jl
@@ -508,4 +508,13 @@ end
     end
 end
 
+@testset "Diff of dates" begin
+    for t ∈ [Day, Week, Hour, Minute]
+        a = DateTime(2021,1,1):t(1):DateTime(2021,2,1)
+        d = diff(a)
+        @test d == diff(collect(a))
+        @test eltype(d) === typeof(a[1] - a[2])
+    end
+end
+
 end
diff --git a/stdlib/Dates/test/io.jl b/stdlib/Dates/test/io.jl
index 450b2b9c92eeef..1c50676eb8346d 100644
--- a/stdlib/Dates/test/io.jl
+++ b/stdlib/Dates/test/io.jl
@@ -5,6 +5,9 @@ module IOTests
 using Test
 using Dates
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+include(joinpath(BASE_TEST_PATH, "testhelpers", "withlocales.jl"))
+
 @testset "string/show representation of Date" begin
     @test string(Dates.Date(1, 1, 1)) == "0001-01-01" # January 1st, 1 AD/CE
     @test sprint(show, Dates.Date(1, 1, 1)) == "Dates.Date(\"0001-01-01\")"
@@ -63,6 +66,7 @@ end
     @test sprint(show, DateFormat("ddxmm").tokens[2]) == "Delim(x)"
     @test sprint(show, DateFormat("xxmmxx").tokens[2]) == "DatePart(mm)"
 end
+
 @testset "Common Parsing Patterns" begin
     #'1996-January-15'
     dt = Dates.DateTime(1996, 1, 15)
@@ -249,6 +253,7 @@ end
     @test Dates.Date(string(Dates.Date(dt))) == Dates.Date(dt)
     @test Dates.DateTime(string(dt)) == dt
 end
+
 @testset "prefix." begin
     s = "/1996/1/15"
     f = "/yyyy/m/d"
@@ -257,6 +262,7 @@ end
     @test Dates.format(dt, f) == s
     @test_throws ArgumentError Dates.DateTime("1996/1/15", f)
 end
+
 @testset "French and Chinese" begin
     # from Jiahao
     @test Dates.Date("2009年12月01日", "yyyy年mm月dd日") == Dates.Date(2009, 12, 1)
@@ -299,6 +305,7 @@ end
     # doesn't parse month name greater than 4 chars
     @test_throws ArgumentError Dates.Date("28avril2014", f; locale="french")
 end
+
 @testset "year digits parsing" begin
     # From Tony Fong
     f = "dduuuyy"
@@ -354,6 +361,7 @@ end
 
     @test typeof(Dates.Date.(dr)) == Array{Date, 1}
 end
+
 @testset "Issue 13" begin
     t = Dates.DateTime(1, 1, 1, 14, 51, 0, 118)
     @test Dates.DateTime("[14:51:00.118]", "[HH:MM:SS.sss]") == t
@@ -363,6 +371,7 @@ end
     @test Dates.DateTime("x14:51:00.118", "xHH:MM:SS.sss") == t
     @test Dates.DateTime("14:51:00.118]", "HH:MM:SS.sss]") == t
 end
+
 @testset "RFC1123Format" begin
     dt = Dates.DateTime(2014, 8, 23, 17, 22, 15)
     @test Dates.format(dt, Dates.RFC1123Format) == "Sat, 23 Aug 2014 17:22:15"
@@ -391,6 +400,7 @@ end
     @test parse(Dates.DateTime, "Mon, 12 Nov 2016 07:45:36", Dates.RFC1123Format) == dt  # Wrong day of week
     @test_throws ArgumentError parse(Date, "Foo, 12 Nov 2016 07:45:36", Dates.RFC1123Format)
 end
+
 @testset "Issue 15195" begin
     f = "YY"
     @test Dates.format(Dates.Date(1999), f) == "1999"
@@ -441,6 +451,7 @@ end
     @test_throws ArgumentError Dates.Date("Apr 01 xx 2014", "uuu dd zz yyyy")
     @test_throws ArgumentError Dates.Date("Apr 01 xx 2014", "uuu dd    yyyy")
 end
+
 @testset "Issue 21001" begin
     for (ms, str) in zip([0, 1, 20, 300, 450, 678], ["0", "001", "02", "3", "45", "678"])
         local dt = DateTime(2000, 1, 1, 0, 0, 0, ms)
@@ -450,16 +461,21 @@ end
         @test Dates.format(dt, "ssss") == rpad(str, 4, '0')
     end
 end
+
 # Issue #21504
 @test tryparse(Dates.Date, "0-1000") === nothing
 
+# Issue #44003
+@test tryparse(Dates.Date, "2017", Dates.DateFormat(".s")) === nothing
+
 @testset "parse milliseconds, Issue #22100" begin
     @test Dates.DateTime("2017-Mar-17 00:00:00.0000", "y-u-d H:M:S.s") == Dates.DateTime(2017, 3, 17)
     @test Dates.parse_components(".1", Dates.DateFormat(".s")) == [Dates.Millisecond(100)]
     @test Dates.parse_components(".12", Dates.DateFormat(".s")) == [Dates.Millisecond(120)]
     @test Dates.parse_components(".123", Dates.DateFormat(".s")) == [Dates.Millisecond(123)]
     @test Dates.parse_components(".1230", Dates.DateFormat(".s")) == [Dates.Millisecond(123)]
-    @test_throws InexactError Dates.parse_components(".1234", Dates.DateFormat(".s"))
+    # Issue #44003
+    @test_throws ArgumentError Dates.parse_components(".1234", Dates.DateFormat(".s"))
 
     # Ensure that no overflow occurs when using Int32 literals: Int32(10)^10
     @test Dates.parse_components("." * rpad(999, 10, '0'), Dates.DateFormat(".s")) == [Dates.Millisecond(999)]
@@ -507,44 +523,67 @@ end
 
 @testset "midnight" begin
     # issue #28203: 24:00 is a valid ISO 8601 time
-    @test DateTime("2018-01-01 24:00","yyyy-mm-dd HH:MM") == DateTime("2018-01-02T00:00:00") ==
-          DateTime(2018, 1, 1, 24) == DateTime(2018, 1, 2)
-    @test_throws ArgumentError DateTime("2018-01-01 24:01","yyyy-mm-dd HH:MM")
+    @test DateTime("2018-01-01 24:00", "yyyy-mm-dd HH:MM") ==
+          DateTime("2018-01-02T00:00:00") ==
+          DateTime(2018, 1, 1, 24) ==
+          DateTime(2018, 1, 2)
+    @test_throws ArgumentError DateTime("2018-01-01 24:01", "yyyy-mm-dd HH:MM")
     @test_throws ArgumentError DateTime(2018, 1, 1, 24, 0, 1)
     @test_throws ArgumentError DateTime(2018, 1, 1, 24, 0, 0, 1)
 end
 
 @testset "AM/PM" begin
-    # get the current locale
-    LC_TIME = 2
-    time_locale = ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, C_NULL)
-    try
-        # set the locale
-        ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, "C")
-
-        for (t12,t24) in (("12:00am","00:00"), ("12:07am","00:07"), ("01:24AM","01:24"),
-                        ("12:00pm","12:00"), ("12:15pm","12:15"), ("11:59PM","23:59"))
-            d = DateTime("2018-01-01T$t24:00")
-            t = Time("$t24:00")
-            for HH in ("HH","II")
-                @test DateTime("2018-01-01 $t12","yyyy-mm-dd $HH:MMp") == d
-                @test Time("$t12","$HH:MMp") == t
-            end
-            tmstruct = Libc.strptime("%I:%M%p", t12)
-            @test Time(tmstruct) == t
-            @test uppercase(t12) == Dates.format(t, "II:MMp") ==
-                                    Dates.format(d, "II:MMp") ==
-                Libc.strftime("%I:%M%p", tmstruct)
+    for (t12, t24) in (
+        ("12:00am", "00:00"),
+        ("12:07am", "00:07"),
+        ("01:24AM", "01:24"),
+        ("12:00pm", "12:00"),
+        ("12:15pm", "12:15"),
+        ("11:59PM", "23:59"),
+    )
+        d = DateTime("2018-01-01T$t24:00")
+        t = Time("$t24:00")
+        for HH in ("HH", "II")
+            @test DateTime("2018-01-01 $t12", "yyyy-mm-dd $HH:MMp") == d
+            @test Time("$t12", "$HH:MMp") == t
         end
-        for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ")
-            @eval @test_throws ArgumentError Time($bad, "II:MMp")
+        local tmstruct, strftime
+        withlocales(["C"]) do
+            # test am/pm comparison handling
+            tmstruct = Libc.strptime("%I:%M%p", t12)
+            strftime = Libc.strftime("%I:%M%p", tmstruct)
+            nothing
         end
-        # if am/pm is missing, defaults to 24-hour clock
-        @eval Time("13:24", "II:MMp") == Time("13:24", "HH:MM")
-    finally
-        # recover the locale
-        ccall(:setlocale, Cstring, (Cint, Cstring), LC_TIME, time_locale)
+        @test Time(tmstruct) == t
+        @test uppercase(t12) ==
+              Dates.format(t, "II:MMp") ==
+              Dates.format(d, "II:MMp") ==
+              strftime
+    end
+    for bad in ("00:24am", "00:24pm", "13:24pm", "2pm", "12:24p.m.", "12:24 pm", "12:24pµ")
+        @test_throws ArgumentError Time(bad, "II:MMp")
     end
+    # if am/pm is missing, defaults to 24-hour clock
+    @test Time("13:24", "II:MMp") == Time("13:24", "HH:MM")
+end
+
+@testset "Issue #10561, two-digit year parsing ambiguities" begin
+    # All two-digit dates (whether full or resulting from truncation, e.g. 2010 -> 10)
+    # encoded in two digit year format YY are parsed as year 00YY
+
+    for test_year in ("00", "01", "99", "2021")
+        @test Date(test_year, dateformat"yy") == Date(parse(Int, test_year))
+    end
+end
+
+@testset "inference with dynamic dateformat string" begin
+    datetime = DateTime(2020, 4, 7)
+    f1() = DateTime("2020-04-07", "yyyy-mm-dd")
+    f2() = DateTime("2020-04-07", DateFormat("yyyy-mm-dd"))
+    f3() = parse(DateTime, "2020-04-07", DateFormat("yyyy-mm-dd"))
+    @test (@inferred f1()) == (@inferred f2()) == (@inferred f3()) == datetime
+    g() = tryparse(DateTime, "2020-04-07", DateFormat("yyyy-mm-dd"))
+    @test (@inferred Nothing g()) == datetime
 end
 
 end
diff --git a/stdlib/Dates/test/periods.jl b/stdlib/Dates/test/periods.jl
index 4b84de7ad8f85f..c37a1666375a9a 100644
--- a/stdlib/Dates/test/periods.jl
+++ b/stdlib/Dates/test/periods.jl
@@ -179,10 +179,8 @@ end
     @test_throws InexactError y * 3//4
     @test (1:1:5)*Second(5) === Second(5)*(1:1:5) === Second(5):Second(5):Second(25) === (1:5)*Second(5)
     @test collect(1:1:5)*Second(5) == Second(5)*collect(1:1:5) == (1:5)*Second(5)
-    @test (Second(2):Second(2):Second(10))/Second(2) === 1.0:1.0:5.0
-    @test collect(Second(2):Second(2):Second(10))/Second(2) == 1:1:5
-    @test (Second(2):Second(2):Second(10)) / 2 === Second(1):Second(1):Second(5)
-    @test collect(Second(2):Second(2):Second(10)) / 2 == Second(1):Second(1):Second(5)
+    @test (Second(2):Second(2):Second(10))/Second(2) === 1.0:1.0:5.0 == collect(Second(2):Second(2):Second(10))/Second(2)
+    @test (Second(2):Second(2):Second(10)) / 2 == Second(1):Second(1):Second(5) == collect(Second(2):Second(2):Second(10)) / 2
     @test Dates.Year(4) / 2 == Dates.Year(2)
     @test Dates.Year(4) / 2f0 == Dates.Year(2)
     @test Dates.Year(4) / 0.5 == Dates.Year(8)
@@ -366,6 +364,7 @@ end
     @test isequal(d - h, 2d - 2h - 1d + 1h)
     @test sprint(show, y + m) == string(y + m)
     @test convert(Dates.CompoundPeriod, y) + m == y + m
+    @test Dates.periods(convert(Dates.CompoundPeriod, y)) == convert(Dates.CompoundPeriod, y).periods
 end
 @testset "compound period simplification" begin
     # reduce compound periods into the most basic form
@@ -380,6 +379,18 @@ end
     @test Dates.Date(2009, 2, 1) - (Dates.Month(1) + Dates.Day(1)) == Dates.Date(2008, 12, 31)
     @test_throws MethodError (Dates.Month(1) + Dates.Day(1)) - Dates.Date(2009,2,1)
 end
+
+@testset "canonicalize Period" begin
+    # reduce individual Period into most basic CompoundPeriod
+    @test Dates.canonicalize(Dates.Nanosecond(1000000)) == Dates.canonicalize(Dates.Millisecond(1))
+    @test Dates.canonicalize(Dates.Millisecond(1000)) == Dates.canonicalize(Dates.Second(1))
+    @test Dates.canonicalize(Dates.Second(60)) == Dates.canonicalize(Dates.Minute(1))
+    @test Dates.canonicalize(Dates.Minute(60)) == Dates.canonicalize(Dates.Hour(1))
+    @test Dates.canonicalize(Dates.Hour(24)) == Dates.canonicalize(Dates.Day(1))
+    @test Dates.canonicalize(Dates.Day(7)) == Dates.canonicalize(Dates.Week(1))
+    @test Dates.canonicalize(Dates.Month(12)) == Dates.canonicalize(Dates.Year(1))
+    @test Dates.canonicalize(Dates.Minute(24*60*1 + 12*60)) == Dates.canonicalize(Dates.CompoundPeriod([Dates.Day(1),Dates.Hour(12)]))
+end
 @testset "unary ops and vectorized period arithmetic" begin
     pa = [1y 1m 1w 1d; 1h 1mi 1s 1ms]
     cpa = [1y + 1s 1m + 1s 1w + 1s 1d + 1s; 1h + 1s 1mi + 1s 2m + 1s 1s + 1ms]
@@ -477,6 +488,14 @@ end
         end
     end
 end
+@testset "Hashing for CompoundPeriod (#37447)" begin
+    periods = [Dates.Year(0), Dates.Minute(0), Dates.Second(0), Dates.CompoundPeriod(),
+               Dates.Minute(2), Dates.Second(120), Dates.CompoundPeriod(Dates.Minute(2)),
+               Dates.CompoundPeriod(Dates.Second(120)), Dates.CompoundPeriod(Dates.Minute(1), Dates.Second(60))]
+    for x = periods, y = periods
+        @test isequal(x,y) == (hash(x) == hash(y))
+    end
+end
 
 @testset "#30832" begin
     @test Dates.toms(Dates.Second(1) + Dates.Nanosecond(1)) == 1e3
@@ -484,4 +503,31 @@ end
     @test Dates.toms(Dates.Second(1) + Dates.Microsecond(1)) == 1e3
 end
 
+@testset "CompoundPeriod and Period isless()" begin
+    #tests for allowed comparisons
+    #FixedPeriod
+    @test (h - ms < h + ns) == true
+    @test (h + ns < h -ms) == false
+    @test (h  < h -ms) == false
+    @test (h-ms  < h) == true
+    #OtherPeriod
+    @test (2y-m < 25m+1y) == true
+    @test (2y < 25m+1y) == true
+    @test (25m+1y < 2y) == false
+    #Test combined Fixed and Other Periods
+    @test (1m + 1d < 1m + 1s) == false
+end
+
+@testset "Convert CompoundPeriod to Period" begin
+    @test convert(Month, Year(1) + Month(1)) === Month(13)
+    @test convert(Second, Minute(1) + Second(30)) === Second(90)
+    @test convert(Minute, Minute(1) + Second(60)) === Minute(2)
+    @test convert(Millisecond, Minute(1) + Second(30)) === Millisecond(90_000)
+    @test_throws InexactError convert(Minute, Minute(1) + Second(30))
+    @test_throws MethodError convert(Month, Minute(1) + Second(30))
+    @test_throws MethodError convert(Second, Month(1) + Second(30))
+    @test_throws MethodError convert(Period, Minute(1) + Second(30))
+    @test_throws MethodError convert(Dates.FixedPeriod, Minute(1) + Second(30))
+end
+
 end
diff --git a/stdlib/Dates/test/ranges.jl b/stdlib/Dates/test/ranges.jl
index 6eb63713768677..52416fc95ec0ca 100644
--- a/stdlib/Dates/test/ranges.jl
+++ b/stdlib/Dates/test/ranges.jl
@@ -515,7 +515,7 @@ end
 @test length(Dates.Year(1):Dates.Year(1):Dates.Year(10)) == 10
 @test length(Dates.Year(10):Dates.Year(-1):Dates.Year(1)) == 10
 @test length(Dates.Year(10):Dates.Year(-2):Dates.Year(1)) == 5
-@test_throws OverflowError length(typemin(Dates.Year):Dates.Year(1):typemax(Dates.Year))
+@test length(typemin(Dates.Year):Dates.Year(1):typemax(Dates.Year)) == 0 # overflow
 @test_throws MethodError Dates.Date(0):Dates.DateTime(2000)
 @test_throws MethodError Dates.Date(0):Dates.Year(10)
 @test length(range(Dates.Date(2000), step=Dates.Day(1), length=366)) == 366
diff --git a/stdlib/Dates/test/types.jl b/stdlib/Dates/test/types.jl
index 19575428305f79..8823e56e41a2f0 100644
--- a/stdlib/Dates/test/types.jl
+++ b/stdlib/Dates/test/types.jl
@@ -257,7 +257,7 @@ end
 
 @testset "issue #31524" begin
     dt1 = Libc.strptime("%Y-%M-%dT%H:%M:%SZ", "2018-11-16T10:26:14Z")
-    dt2 = Base.Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
+    dt2 = Libc.TmStruct(14, 30, 5, 10, 1, 99, 3, 40, 0)
 
     time = Time(dt1)
     @test typeof(time) == Time
diff --git a/stdlib/DelimitedFiles/src/DelimitedFiles.jl b/stdlib/DelimitedFiles/src/DelimitedFiles.jl
index b13eae35052135..7c0e3e39b6b86a 100644
--- a/stdlib/DelimitedFiles/src/DelimitedFiles.jl
+++ b/stdlib/DelimitedFiles/src/DelimitedFiles.jl
@@ -190,8 +190,9 @@ Specifying `skipstart` will ignore the corresponding number of initial lines fro
 If `skipblanks` is `true`, blank lines in the input will be ignored.
 
 If `use_mmap` is `true`, the file specified by `source` is memory mapped for potential
-speedups. Default is `true` except on Windows. On Windows, you may want to specify `true` if
-the file is large, and is only read once and not written to.
+speedups if the file is large. Default is `false`. On a Windows filesystem, `use_mmap` should not be set
+to `true` unless the file is only read once and is also not written to.
+Some edge cases exist where an OS is Unix-like but the filesystem is Windows-like.
 
 If `quotes` is `true`, columns enclosed within double-quote (\") characters are allowed to
 contain new lines and column delimiters. Double-quote characters within a quoted field must
@@ -232,11 +233,11 @@ readdlm_auto(input::IO, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Boo
 function readdlm_auto(input::AbstractString, dlm::AbstractChar, T::Type, eol::AbstractChar, auto::Bool; opts...)
     isfile(input) || throw(ArgumentError("Cannot open \'$input\': not a file"))
     optsd = val_opts(opts)
-    use_mmap = get(optsd, :use_mmap, Sys.iswindows() ? false : true)
+    use_mmap = get(optsd, :use_mmap, false)
     fsz = filesize(input)
     if use_mmap && fsz > 0 && fsz < typemax(Int)
         a = open(input, "r") do f
-            Mmap.mmap(f, Vector{UInt8}, (Int(fsz),))
+            mmap(f, Vector{UInt8}, (Int(fsz),))
         end
         # TODO: It would be nicer to use String(a) without making a copy,
         # but because the mmap'ed array is not NUL-terminated this causes
diff --git a/stdlib/DelimitedFiles/test/runtests.jl b/stdlib/DelimitedFiles/test/runtests.jl
index f06804d8956471..3bb8381354c556 100644
--- a/stdlib/DelimitedFiles/test/runtests.jl
+++ b/stdlib/DelimitedFiles/test/runtests.jl
@@ -194,7 +194,7 @@ end
             "Ireland", "Sinead O'Connor", "Éire", "Sinéad O'Connor",
             "Israel", "Yehoram Gaon", "ישראל", "יהורם גאון",
             "Italy", "Fabrizio DeAndre", "Italia", "Fabrizio De André",
-            "Japan", "KUBOTA Toshinobu", "日本", "久保田    利伸",
+            "Japan", "KUBOTA Toshinobu", "日本", "久保田    利伸",
             "Japan", "HAYASHIBARA Megumi", "日本", "林原 めぐみ",
             "Japan", "Mori Ogai", "日本", "森鷗外",
             "Japan", "Tex Texin", "日本", "テクス テクサン",
@@ -321,10 +321,10 @@ end
 # issue #11484: useful error message for invalid readdlm filepath arguments
 @test_throws ArgumentError readdlm(tempdir())
 
-# displaying as text/csv
-let d = TextDisplay(IOBuffer())
-    display(d, "text/csv", [3 1 4])
-    @test String(take!(d.io)) == "3,1,4\n"
+# showing as text/csv
+let d = TextDisplay(PipeBuffer())
+    show(d.io, "text/csv", [3 1 4])
+    @test read(d.io, String) == "3,1,4\n"
 end
 
 @testset "complex" begin
diff --git a/stdlib/Distributed/docs/src/index.md b/stdlib/Distributed/docs/src/index.md
index 114ac1683a54a2..dc8cef5e22d927 100644
--- a/stdlib/Distributed/docs/src/index.md
+++ b/stdlib/Distributed/docs/src/index.md
@@ -1,4 +1,4 @@
-# Distributed Computing
+# [Distributed Computing](@id man-distributed)
 
 ```@docs
 Distributed.addprocs
@@ -65,4 +65,5 @@ Distributed.connect(::ClusterManager, ::Int, ::WorkerConfig)
 Distributed.init_worker
 Distributed.start_worker
 Distributed.process_messages
+Distributed.default_addprocs_params
 ```
diff --git a/stdlib/Distributed/src/Distributed.jl b/stdlib/Distributed/src/Distributed.jl
index 4b1d5a8fce1fe0..3bcbc7b67f60d7 100644
--- a/stdlib/Distributed/src/Distributed.jl
+++ b/stdlib/Distributed/src/Distributed.jl
@@ -10,11 +10,12 @@ import Base: getindex, wait, put!, take!, fetch, isready, push!, length,
              hash, ==, kill, close, isopen, showerror
 
 # imports for use
-using Base: Process, Semaphore, JLOptions, buffer_writes, @sync_add,
+using Base: Process, Semaphore, JLOptions, buffer_writes, @async_unwrap,
             VERSION_STRING, binding_module, atexit, julia_exename,
             julia_cmd, AsyncGenerator, acquire, release, invokelatest,
-            shell_escape_posixly, uv_error, something, notnothing, isbuffered,
-            mapany
+            shell_escape_posixly, shell_escape_csh,
+            shell_escape_wincmd, escape_microsoft_c_args,
+            uv_error, something, notnothing, isbuffered, mapany
 using Base.Threads: Event
 
 using Serialization, Sockets
@@ -75,7 +76,7 @@ function _require_callback(mod::Base.PkgId)
         # broadcast top-level (e.g. from Main) import/using from node 1 (only)
         @sync for p in procs()
             p == 1 && continue
-            @sync_add remotecall(p) do
+            @async_unwrap remotecall_wait(p) do
                 Base.require(mod)
                 nothing
             end
@@ -83,15 +84,15 @@ function _require_callback(mod::Base.PkgId)
     end
 end
 
-const REF_ID = Ref(1)
-next_ref_id() = (id = REF_ID[]; REF_ID[] = id+1; id)
+const REF_ID = Threads.Atomic{Int}(1)
+next_ref_id() = Threads.atomic_add!(REF_ID, 1)
 
 struct RRID
     whence::Int
     id::Int
 
-    RRID() = RRID(myid(),next_ref_id())
-    RRID(whence, id) = new(whence,id)
+    RRID() = RRID(myid(), next_ref_id())
+    RRID(whence, id) = new(whence, id)
 end
 
 hash(r::RRID, h::UInt) = hash(r.whence, hash(r.id, h))
diff --git a/stdlib/Distributed/src/cluster.jl b/stdlib/Distributed/src/cluster.jl
index 6e4680816f2d52..37f1660e19478c 100644
--- a/stdlib/Distributed/src/cluster.jl
+++ b/stdlib/Distributed/src/cluster.jl
@@ -15,7 +15,7 @@ abstract type ClusterManager end
 Type used by [`ClusterManager`](@ref)s to control workers added to their clusters. Some fields
 are used by all cluster managers to access a host:
   * `io` -- the connection used to access the worker (a subtype of `IO` or `Nothing`)
-  * `host` -- the host address (either an `AbstractString` or `Nothing`)
+  * `host` -- the host address (either a `String` or `Nothing`)
   * `port` -- the port on the host used to connect to the worker (either an `Int` or `Nothing`)
 
 Some are used by the cluster manager to add workers to an already-initialized host:
@@ -95,9 +95,10 @@ end
 @enum WorkerState W_CREATED W_CONNECTED W_TERMINATING W_TERMINATED
 mutable struct Worker
     id::Int
-    del_msgs::Array{Any,1}
+    msg_lock::Threads.ReentrantLock # Lock for del_msgs, add_msgs, and gcflag
+    del_msgs::Array{Any,1} # XXX: Could del_msgs and add_msgs be Channels?
     add_msgs::Array{Any,1}
-    gcflag::Bool
+    @atomic gcflag::Bool
     state::WorkerState
     c_state::Condition      # wait for state changes
     ct_time::Float64        # creation time
@@ -133,7 +134,7 @@ mutable struct Worker
         if haskey(map_pid_wrkr, id)
             return map_pid_wrkr[id]
         end
-        w=new(id, [], [], false, W_CREATED, Condition(), time(), conn_func)
+        w=new(id, Threads.ReentrantLock(), [], [], false, W_CREATED, Condition(), time(), conn_func)
         w.initialized = Event()
         register_worker(w)
         w
@@ -160,17 +161,18 @@ function check_worker_state(w::Worker)
         else
             w.ct_time = time()
             if myid() > w.id
-                @async exec_conn_func(w)
+                t = @async exec_conn_func(w)
             else
                 # route request via node 1
-                @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
+                t = @async remotecall_fetch((p,to_id) -> remotecall_fetch(exec_conn_func, p, to_id), 1, w.id, myid())
             end
+            errormonitor(t)
             wait_for_conn(w)
         end
     end
 end
 
-exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id))
+exec_conn_func(id::Int) = exec_conn_func(worker_from_id(id)::Worker)
 function exec_conn_func(w::Worker)
     try
         f = notnothing(w.conn_func)
@@ -230,7 +232,10 @@ start_worker(cookie::AbstractString=readline(stdin); kwargs...) = start_worker(s
 function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_stdin::Bool=true, stderr_to_stdout::Bool=true)
     init_multi()
 
-    close_stdin && close(stdin) # workers will not use it
+    if close_stdin # workers will not use it
+        redirect_stdin(devnull)
+        close(stdin)
+    end
     stderr_to_stdout && redirect_stderr(stdout)
 
     init_worker(cookie)
@@ -242,10 +247,10 @@ function start_worker(out::IO, cookie::AbstractString=readline(stdin); close_std
     else
         sock = listen(interface, LPROC.bind_port)
     end
-    @async while isopen(sock)
+    errormonitor(@async while isopen(sock)
         client = accept(sock)
         process_messages(client, client, true)
-    end
+    end)
     print(out, "julia_worker:")  # print header
     print(out, "$(string(LPROC.bind_port))#") # print port
     print(out, LPROC.bind_addr)
@@ -274,7 +279,7 @@ end
 
 
 function redirect_worker_output(ident, stream)
-    @async while !eof(stream)
+    t = @async while !eof(stream)
         line = readline(stream)
         if startswith(line, "      From worker ")
             # stdout's of "additional" workers started from an initial worker on a host are not available
@@ -284,6 +289,7 @@ function redirect_worker_output(ident, stream)
             println("      From worker $(ident):\t$line")
         end
     end
+    errormonitor(t)
 end
 
 struct LaunchWorkerError <: Exception
@@ -349,7 +355,7 @@ end
 function parse_connection_info(str)
     m = match(r"^julia_worker:(\d+)#(.*)", str)
     if m !== nothing
-        (m.captures[2], parse(UInt16, m.captures[1]))
+        (String(m.captures[2]), parse(UInt16, m.captures[1]))
     else
         ("", UInt16(0))
     end
@@ -431,7 +437,7 @@ if istaskdone(t)   # Check if `addprocs` has completed to ensure `fetch` doesn't
     else
         fetch(t)
     end
-  end
+end
 ```
 """
 function addprocs(manager::ClusterManager; kwargs...)
@@ -448,7 +454,7 @@ function addprocs(manager::ClusterManager; kwargs...)
 end
 
 function addprocs_locked(manager::ClusterManager; kwargs...)
-    params = merge(default_addprocs_params(), Dict{Symbol,Any}(kwargs))
+    params = merge(default_addprocs_params(manager), Dict{Symbol,Any}(kwargs))
     topology(Symbol(params[:topology]))
 
     if PGRP.topology !== :all_to_all
@@ -469,6 +475,10 @@ function addprocs_locked(manager::ClusterManager; kwargs...)
     # The `launch` method should add an object of type WorkerConfig for every
     # worker launched. It provides information required on how to connect
     # to it.
+
+    # FIXME: launched should be a Channel, launch_ntfy should be a Threads.Condition
+    # but both are part of the public interface. This means we currently can't use
+    # `Threads.@spawn` in the code below.
     launched = WorkerConfig[]
     launch_ntfy = Condition()
 
@@ -513,11 +523,20 @@ function set_valid_processes(plist::Array{Int})
     end
 end
 
+"""
+    default_addprocs_params(mgr::ClusterManager) -> Dict{Symbol, Any}
+
+Implemented by cluster managers. The default keyword parameters passed when calling
+`addprocs(mgr)`. The minimal set of options is available by calling
+`default_addprocs_params()`
+"""
+default_addprocs_params(::ClusterManager) = default_addprocs_params()
 default_addprocs_params() = Dict{Symbol,Any}(
     :topology => :all_to_all,
     :dir      => pwd(),
     :exename  => joinpath(Sys.BINDIR, julia_exename()),
     :exeflags => ``,
+    :env      => [],
     :enable_threaded_blas => false,
     :lazy => true)
 
@@ -841,7 +860,7 @@ julia> nprocs()
 3
 
 julia> workers()
-5-element Array{Int64,1}:
+2-element Array{Int64,1}:
  2
  3
 ```
@@ -869,13 +888,13 @@ Get the number of available worker processes. This is one less than [`nprocs()`]
 
 # Examples
 ```julia-repl
-\$ julia -p 5
+\$ julia -p 2
 
 julia> nprocs()
-6
+3
 
 julia> nworkers()
-5
+2
 ```
 """
 function nworkers()
@@ -890,7 +909,7 @@ Return a list of all process identifiers, including pid 1 (which is not included
 
 # Examples
 ```julia-repl
-\$ julia -p 5
+\$ julia -p 2
 
 julia> procs()
 3-element Array{Int64,1}:
@@ -952,7 +971,7 @@ Return a list of all worker process identifiers.
 
 # Examples
 ```julia-repl
-\$ julia -p 5
+\$ julia -p 2
 
 julia> workers()
 2-element Array{Int64,1}:
diff --git a/stdlib/Distributed/src/clusterserialize.jl b/stdlib/Distributed/src/clusterserialize.jl
index 8cc52ad5a177b4..0acd4ce68c45b8 100644
--- a/stdlib/Distributed/src/clusterserialize.jl
+++ b/stdlib/Distributed/src/clusterserialize.jl
@@ -102,19 +102,6 @@ function serialize(s::ClusterSerializer, t::Core.TypeName)
     nothing
 end
 
-function serialize(s::ClusterSerializer, t::Task)
-    serialize_cycle(s, t) && return
-    if istaskstarted(t) && !istaskdone(t)
-        error("cannot serialize a running Task")
-    end
-    writetag(s.io, TASK_TAG)
-    serialize(s, t.code)
-    serialize(s, t.storage)
-    serialize(s, t._state)
-    serialize(s, t.result)
-    serialize(s, t.exception)
-end
-
 function serialize(s::ClusterSerializer, g::GlobalRef)
     # Record if required and then invoke the default GlobalRef serializer.
     sym = g.name
@@ -183,7 +170,7 @@ function deserialize_global_from_main(s::ClusterSerializer, sym)
     if sym_isconst
         ccall(:jl_set_const, Cvoid, (Any, Any, Any), Main, sym, v)
     else
-        ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, sym, v)
+        setglobal!(Main, sym, v)
     end
     return nothing
 end
@@ -244,17 +231,6 @@ function deserialize(s::ClusterSerializer, t::Type{<:CapturedException})
     return CapturedException(capex, bt)
 end
 
-function deserialize(s::ClusterSerializer, ::Type{Task})
-    t = Task(nothing)
-    deserialize_cycle(s, t)
-    t.code = deserialize(s)
-    t.storage = deserialize(s)
-    t._state = deserialize(s)::UInt8
-    t.result = deserialize(s)
-    t.exception = deserialize(s)
-    t
-end
-
 """
     clear!(syms, pids=workers(); mod=Main)
 
@@ -267,7 +243,7 @@ An exception is raised if a global constant is requested to be cleared.
 """
 function clear!(syms, pids=workers(); mod=Main)
     @sync for p in pids
-        @sync_add remotecall(clear_impl!, p, syms, mod)
+        @async_unwrap remotecall_wait(clear_impl!, p, syms, mod)
     end
 end
 clear!(sym::Symbol, pid::Int; mod=Main) = clear!([sym], [pid]; mod=mod)
diff --git a/stdlib/Distributed/src/macros.jl b/stdlib/Distributed/src/macros.jl
index 7e988bc173a91d..a767c7a40d9c9f 100644
--- a/stdlib/Distributed/src/macros.jl
+++ b/stdlib/Distributed/src/macros.jl
@@ -1,14 +1,10 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-let nextidx = 0
+let nextidx = Threads.Atomic{Int}(0)
     global nextproc
     function nextproc()
-        p = -1
-        if p == -1
-            p = workers()[(nextidx % nworkers()) + 1]
-            nextidx += 1
-        end
-        p
+        idx = Threads.atomic_add!(nextidx, 1)
+        return workers()[(idx % nworkers()) + 1]
     end
 end
 
@@ -202,7 +198,7 @@ macro everywhere(procs, ex)
     imps = extract_imports(ex)
     return quote
         $(isempty(imps) ? nothing : Expr(:toplevel, imps...)) # run imports locally first
-        let ex = Expr(:toplevel, :(task_local_storage()[:SOURCE_PATH] = $(get(task_local_storage(), :SOURCE_PATH, nothing))), $(Expr(:quote, ex))),
+        let ex = Expr(:toplevel, :(task_local_storage()[:SOURCE_PATH] = $(get(task_local_storage(), :SOURCE_PATH, nothing))), $(esc(Expr(:quote, ex)))),
             procs = $(esc(procs))
             remotecall_eval(Main, procs, ex)
         end
@@ -226,10 +222,10 @@ function remotecall_eval(m::Module, procs, ex)
             if pid == myid()
                 run_locally += 1
             else
-                @sync_add remotecall(Core.eval, pid, m, ex)
+                @async_unwrap remotecall_wait(Core.eval, pid, m, ex)
             end
         end
-        yield() # ensure that the remotecall_fetch have had a chance to start
+        yield() # ensure that the remotecalls have had a chance to start
 
         # execute locally last as we do not want local execution to block serialization
         # of the request to remote nodes.
@@ -279,9 +275,10 @@ function preduce(reducer, f, R)
 end
 
 function pfor(f, R)
-    @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
+    t = @async @sync for c in splitrange(Int(firstindex(R)), Int(lastindex(R)), nworkers())
         @spawnat :any f(R, first(c), last(c))
     end
+    errormonitor(t)
 end
 
 function make_preduce_body(var, body)
@@ -346,6 +343,9 @@ macro distributed(args...)
     var = loop.args[1].args[1]
     r = loop.args[1].args[2]
     body = loop.args[2]
+    if Meta.isexpr(body, :block) && body.args[end] isa LineNumberNode
+        resize!(body.args, length(body.args) - 1)
+    end
     if na==1
         syncvar = esc(Base.sync_varname)
         return quote
diff --git a/stdlib/Distributed/src/managers.jl b/stdlib/Distributed/src/managers.jl
index 12866973e77a0b..4dde09665f95c3 100644
--- a/stdlib/Distributed/src/managers.jl
+++ b/stdlib/Distributed/src/managers.jl
@@ -34,8 +34,8 @@ struct SSHManager <: ClusterManager
 end
 
 
-function check_addprocs_args(kwargs)
-    valid_kw_names = collect(keys(default_addprocs_params()))
+function check_addprocs_args(manager, kwargs)
+    valid_kw_names = keys(default_addprocs_params(manager))
     for keyname in keys(kwargs)
         !(keyname in valid_kw_names) && throw(ArgumentError("Invalid keyword argument $(keyname)"))
     end
@@ -51,32 +51,55 @@ end
 """
     addprocs(machines; tunnel=false, sshflags=\`\`, max_parallel=10, kwargs...) -> List of process identifiers
 
-Add processes on remote machines via SSH. See `exename` to set the path to the `julia` installation on remote machines.
-
-`machines` is a vector of machine specifications. Workers are started for each specification.
-
-A machine specification is either a string `machine_spec` or a tuple - `(machine_spec, count)`.
-
-`machine_spec` is a string of the form `[user@]host[:port] [bind_addr[:port]]`. `user`
-defaults to current user, `port` to the standard ssh port. If `[bind_addr[:port]]` is
-specified, other workers will connect to this worker at the specified `bind_addr` and
-`port`.
-
-`count` is the number of workers to be launched on the specified host. If specified as
-`:auto` it will launch as many workers as the number of CPU threads on the specific host.
-
-Keyword arguments:
+Add worker processes on remote machines via SSH. Configuration is done with keyword
+arguments (see below). In particular, the `exename` keyword can be used to specify
+the path to the `julia` binary on the remote machine(s).
+
+`machines` is a vector of "machine specifications" which are given as strings of
+the form `[user@]host[:port] [bind_addr[:port]]`. `user` defaults to current user and `port`
+to the standard SSH port. If `[bind_addr[:port]]` is specified, other workers will connect
+to this worker at the specified `bind_addr` and `port`.
+
+It is possible to launch multiple processes on a remote host by using a tuple in the
+`machines` vector or the form `(machine_spec, count)`, where `count` is the number of
+workers to be launched on the specified host. Passing `:auto` as the worker count will
+launch as many workers as the number of CPU threads on the remote host.
+
+**Examples**:
+```julia
+addprocs([
+    "remote1",               # one worker on 'remote1' logging in with the current username
+    "user@remote2",          # one worker on 'remote2' logging in with the 'user' username
+    "user@remote3:2222",     # specifying SSH port to '2222' for 'remote3'
+    ("user@remote4", 4),     # launch 4 workers on 'remote4'
+    ("user@remote5", :auto), # launch as many workers as CPU threads on 'remote5'
+])
+```
+
+**Keyword arguments**:
 
 * `tunnel`: if `true` then SSH tunneling will be used to connect to the worker from the
   master process. Default is `false`.
 
 * `multiplex`: if `true` then SSH multiplexing is used for SSH tunneling. Default is `false`.
 
-* `sshflags`: specifies additional ssh options, e.g. ```sshflags=\`-i /home/foo/bar.pem\````
+* `ssh`: the name or path of the SSH client executable used to start the workers.
+  Default is `"ssh"`.
+
+* `sshflags`: specifies additional ssh options, e.g. ``` sshflags=\`-i /home/foo/bar.pem\` ```
 
 * `max_parallel`: specifies the maximum number of workers connected to in parallel at a
   host. Defaults to 10.
 
+* `shell`: specifies the type of shell to which ssh connects on the workers.
+
+    + `shell=:posix`: a POSIX-compatible Unix/Linux shell
+      (sh, ksh, bash, dash, zsh, etc.). The default.
+
+    + `shell=:csh`: a Unix C shell (csh, tcsh).
+
+    + `shell=:wincmd`: Microsoft Windows `cmd.exe`.
+
 * `dir`: specifies the working directory on the workers. Defaults to the host's current
   directory (as found by `pwd()`)
 
@@ -105,8 +128,22 @@ Keyword arguments:
   are setup lazily, i.e. they are setup at the first instance of a remote call between
   workers. Default is true.
 
+* `env`: provide an array of string pairs such as
+  `env=["JULIA_DEPOT_PATH"=>"/depot"]` to request that environment variables
+  are set on the remote machine. By default only the environment variable
+  `JULIA_WORKER_TIMEOUT` is passed automatically from the local to the remote
+  environment.
 
-Environment variables :
+* `cmdline_cookie`: pass the authentication cookie via the `--worker` commandline
+   option. The (more secure) default behaviour of passing the cookie via ssh stdio
+   may hang with Windows workers that use older (pre-ConPTY) Julia or Windows versions,
+   in which case `cmdline_cookie=true` offers a work-around.
+
+!!! compat "Julia 1.6"
+    The keyword arguments `ssh`, `shell`, `env` and `cmdline_cookie`
+    were added in Julia 1.6.
+
+Environment variables:
 
 If the master process fails to establish a connection with a newly launched worker within
 60.0 seconds, the worker treats it as a fatal situation and terminates.
@@ -114,31 +151,36 @@ This timeout can be controlled via environment variable `JULIA_WORKER_TIMEOUT`.
 The value of `JULIA_WORKER_TIMEOUT` on the master process specifies the number of seconds a
 newly launched worker waits for connection establishment.
 """
-function addprocs(machines::AbstractVector; tunnel=false, multiplex=false, sshflags=``, max_parallel=10, kwargs...)
-    check_addprocs_args(kwargs)
-    addprocs(SSHManager(machines); tunnel=tunnel, multiplex=multiplex, sshflags=sshflags, max_parallel=max_parallel, kwargs...)
+function addprocs(machines::AbstractVector; kwargs...)
+    manager = SSHManager(machines)
+    check_addprocs_args(manager, kwargs)
+    addprocs(manager; kwargs...)
 end
 
+default_addprocs_params(::SSHManager) =
+    merge(default_addprocs_params(),
+          Dict{Symbol,Any}(
+              :ssh            => "ssh",
+              :sshflags       => ``,
+              :shell          => :posix,
+              :cmdline_cookie => false,
+              :env            => [],
+              :tunnel         => false,
+              :multiplex      => false,
+              :max_parallel   => 10))
 
 function launch(manager::SSHManager, params::Dict, launched::Array, launch_ntfy::Condition)
     # Launch one worker on each unique host in parallel. Additional workers are launched later.
     # Wait for all launches to complete.
-    launch_tasks = Vector{Any}(undef, length(manager.machines))
-
-    for (i, (machine, cnt)) in enumerate(manager.machines)
+    @sync for (i, (machine, cnt)) in enumerate(manager.machines)
         let machine=machine, cnt=cnt
-            launch_tasks[i] = @async try
-                    launch_on_machine(manager, machine, cnt, params, launched, launch_ntfy)
-                catch e
-                    print(stderr, "exception launching on machine $(machine) : $(e)\n")
-                end
+             @async try
+                launch_on_machine(manager, $machine, $cnt, params, launched, launch_ntfy)
+            catch e
+                print(stderr, "exception launching on machine $(machine) : $(e)\n")
+            end
         end
     end
-
-    for t in launch_tasks
-        wait(t::Task)
-    end
-
     notify(launch_ntfy)
 end
 
@@ -152,7 +194,7 @@ function parse_machine(machine::AbstractString)
 
     if machine[begin] == '['  # ipv6 bracket notation (RFC 2732)
         ipv6_end = findlast(']', machine)
-        if ipv6_end == nothing
+        if ipv6_end === nothing
             throw(ArgumentError("invalid machine definition format string: invalid port format \"$machine_def\""))
         end
         hoststr = machine[begin+1 : prevind(machine,ipv6_end)]
@@ -170,7 +212,7 @@ function parse_machine(machine::AbstractString)
         portstr = machine_def[2]
 
         portnum = tryparse(Int, portstr)
-        if portnum == nothing
+        if portnum === nothing
             msg = "invalid machine definition format string: invalid port format \"$machine_def\""
             throw(ArgumentError(msg))
         end
@@ -184,11 +226,15 @@ function parse_machine(machine::AbstractString)
 end
 
 function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, params::Dict, launched::Array, launch_ntfy::Condition)
+    shell = params[:shell]
+    ssh = params[:ssh]
     dir = params[:dir]
     exename = params[:exename]
     exeflags = params[:exeflags]
     tunnel = params[:tunnel]
     multiplex = params[:multiplex]
+    cmdline_cookie = params[:cmdline_cookie]
+    env = Dict{String,String}(params[:env])
 
     # machine could be of the format [user@]host[:port] bind_addr[:bind_port]
     # machine format string is split on whitespace
@@ -199,7 +245,11 @@ function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, pa
     if length(machine_bind) > 1
         exeflags = `--bind-to $(machine_bind[2]) $exeflags`
     end
-    exeflags = `$exeflags --worker`
+    if cmdline_cookie
+        exeflags = `$exeflags --worker=$(cluster_cookie())`
+    else
+        exeflags = `$exeflags --worker`
+    end
 
     host, portnum = parse_machine(machine_bind[1])
     portopt = portnum === nothing ? `` : `-p $portnum`
@@ -210,7 +260,7 @@ function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, pa
         # If it's already running, later ssh sessions also use the same ssh multiplexing session even if
         # `multiplex` is not explicitly specified; otherwise the tunneling session launched later won't
         # go to background and hang. This is because of OpenSSH implementation.
-        if success(`ssh $sshflags -O check $host`)
+        if success(`$ssh $sshflags -O check $host`)
             multiplex = true
         elseif multiplex
             # automatically create an SSH multiplexing session at the next SSH connection
@@ -221,17 +271,66 @@ function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, pa
 
     # Build up the ssh command
 
-    # the default worker timeout
-    tval = get(ENV, "JULIA_WORKER_TIMEOUT", "")
+    # pass on some environment variables by default
+    for var in ["JULIA_WORKER_TIMEOUT"]
+        if !haskey(env, var) && haskey(ENV, var)
+            env[var] = ENV[var]
+        end
+    end
 
     # Julia process with passed in command line flag arguments
-    cmds = """
-        cd -- $(shell_escape_posixly(dir))
-        $(isempty(tval) ? "" : "export JULIA_WORKER_TIMEOUT=$(shell_escape_posixly(tval))")
-        $(shell_escape_posixly(exename)) $(shell_escape_posixly(exeflags))"""
+    if shell == :posix
+        # ssh connects to a POSIX shell
+
+        cmds = "exec $(shell_escape_posixly(exename)) $(shell_escape_posixly(exeflags))"
+        # set environment variables
+        for (var, val) in env
+            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
+                throw(ArgumentError("invalid env key $var"))
+            cmds = "export $(var)=$(shell_escape_posixly(val))\n$cmds"
+        end
+        # change working directory
+        cmds = "cd -- $(shell_escape_posixly(dir))\n$cmds"
+
+        # shell login (-l) with string command (-c) to launch julia process
+        remotecmd = shell_escape_posixly(`sh -l -c $cmds`)
+
+    elseif shell == :csh
+        # ssh connects to (t)csh
+
+        remotecmd = "exec $(shell_escape_csh(exename)) $(shell_escape_csh(exeflags))"
+
+        # set environment variables
+        for (var, val) in env
+            occursin(r"^[a-zA-Z_][a-zA-Z_0-9]*\z", var) ||
+                throw(ArgumentError("invalid env key $var"))
+            remotecmd = "setenv $(var) $(shell_escape_csh(val))\n$remotecmd"
+        end
+        # change working directory
+        if dir !== nothing && dir != ""
+            remotecmd = "cd $(shell_escape_csh(dir))\n$remotecmd"
+        end
+
+    elseif shell == :wincmd
+        # ssh connects to Windows cmd.exe
+
+        any(c -> c == '"', exename) && throw(ArgumentError("invalid exename"))
 
-    # shell login (-l) with string command (-c) to launch julia process
-    cmd = `sh -l -c $cmds`
+        remotecmd = shell_escape_wincmd(escape_microsoft_c_args(exename, exeflags...))
+        # change working directory
+        if dir !== nothing && dir != ""
+            any(c -> c == '"', dir) && throw(ArgumentError("invalid dir"))
+            remotecmd = "pushd \"$(dir)\" && $remotecmd"
+        end
+        # set environment variables
+        for (var, val) in env
+            occursin(r"^[a-zA-Z0-9_()[\]{}\$\\/#',;\.@!?*+-]+\z", var) || throw(ArgumentError("invalid env key $var"))
+            remotecmd = "set $(var)=$(shell_escape_wincmd(val))&& $remotecmd"
+        end
+
+    else
+        throw(ArgumentError("invalid shell"))
+    end
 
     # remote launch with ssh with given ssh flags / host / port information
     # -T → disable pseudo-terminal allocation
@@ -239,7 +338,7 @@ function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, pa
     # -x → disable X11 forwarding
     # -o ClearAllForwardings → option if forwarding connections and
     #                          forwarded connections are causing collisions
-    cmd = `ssh -T -a -x -o ClearAllForwardings=yes $sshflags $host $(shell_escape_posixly(cmd))`
+    cmd = `$ssh -T -a -x -o ClearAllForwardings=yes $sshflags $host $remotecmd`
 
     # launch the remote Julia process
 
@@ -247,7 +346,7 @@ function launch_on_machine(manager::SSHManager, machine::AbstractString, cnt, pa
     # the initial julia process (Ctrl-C and teardown methods are handled through messages)
     # for the launched processes.
     io = open(detach(cmd), "r+")
-    write_cookie(io)
+    cmdline_cookie || write_cookie(io)
 
     wconfig = WorkerConfig()
     wconfig.io = io.out
@@ -342,28 +441,26 @@ struct LocalManager <: ClusterManager
 end
 
 """
-    addprocs(; kwargs...) -> List of process identifiers
+    addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...) -> List of process identifiers
 
-Equivalent to `addprocs(Sys.CPU_THREADS; kwargs...)`
+Launch `np` workers on the local host using the in-built `LocalManager`.
 
-Note that workers do not run a `.julia/config/startup.jl` startup script, nor do they synchronize
-their global state (such as global variables, new method definitions, and loaded modules) with any
-of the other running processes.
-"""
-addprocs(; kwargs...) = addprocs(Sys.CPU_THREADS; kwargs...)
+Local workers inherit the current package environment (i.e., active project,
+[`LOAD_PATH`](@ref), and [`DEPOT_PATH`](@ref)) from the main process.
 
-"""
-    addprocs(np::Integer; restrict=true, kwargs...) -> List of process identifiers
+**Keyword arguments**:
+ - `restrict::Bool`: if `true` (default) binding is restricted to `127.0.0.1`.
+ - `dir`, `exename`, `exeflags`, `env`, `topology`, `lazy`, `enable_threaded_blas`: same effect
+   as for `SSHManager`, see documentation for [`addprocs(machines::AbstractVector)`](@ref).
 
-Launches workers using the in-built `LocalManager` which only launches workers on the
-local host. This can be used to take advantage of multiple cores. `addprocs(4)` will add 4
-processes on the local machine. If `restrict` is `true`, binding is restricted to
-`127.0.0.1`. Keyword args `dir`, `exename`, `exeflags`, `topology`, `lazy` and
-`enable_threaded_blas` have the same effect as documented for `addprocs(machines)`.
+!!! compat "Julia 1.9"
+    The inheriting of the package environment and the `env` keyword argument were
+    added in Julia 1.9.
 """
-function addprocs(np::Integer; restrict=true, kwargs...)
-    check_addprocs_args(kwargs)
-    addprocs(LocalManager(np, restrict); kwargs...)
+function addprocs(np::Integer=Sys.CPU_THREADS; restrict=true, kwargs...)
+    manager = LocalManager(np, restrict)
+    check_addprocs_args(manager, kwargs)
+    addprocs(manager; kwargs...)
 end
 
 Base.show(io::IO, manager::LocalManager) = print(io, "LocalManager()")
@@ -373,10 +470,32 @@ function launch(manager::LocalManager, params::Dict, launched::Array, c::Conditi
     exename = params[:exename]
     exeflags = params[:exeflags]
     bind_to = manager.restrict ? `127.0.0.1` : `$(LPROC.bind_addr)`
+    env = Dict{String,String}(params[:env])
+
+    # TODO: Maybe this belongs in base/initdefs.jl as a package_environment() function
+    #       together with load_path() etc. Might be useful to have when spawning julia
+    #       processes outside of Distributed.jl too.
+    # JULIA_(LOAD|DEPOT)_PATH are used to populate (LOAD|DEPOT)_PATH on startup,
+    # but since (LOAD|DEPOT)_PATH might have changed they are re-serialized here.
+    # Users can opt-out of this by passing `env = ...` to addprocs(...).
+    pathsep = Sys.iswindows() ? ";" : ":"
+    if get(env, "JULIA_LOAD_PATH", nothing) === nothing
+        env["JULIA_LOAD_PATH"] = join(LOAD_PATH, pathsep)
+    end
+    if get(env, "JULIA_DEPOT_PATH", nothing) === nothing
+        env["JULIA_DEPOT_PATH"] = join(DEPOT_PATH, pathsep)
+    end
+    # Set the active project on workers using JULIA_PROJECT.
+    # Users can opt-out of this by (i) passing `env = ...` or (ii) passing
+    # `--project=...` as `exeflags` to addprocs(...).
+    project = Base.ACTIVE_PROJECT[]
+    if project !== nothing && get(env, "JULIA_PROJECT", nothing) === nothing
+        env["JULIA_PROJECT"] = project
+    end
 
     for i in 1:manager.np
         cmd = `$(julia_cmd(exename)) $exeflags --bind-to $bind_to --worker`
-        io = open(detach(setenv(cmd, dir=dir)), "r+")
+        io = open(detach(setenv(addenv(cmd, env), dir=dir)), "r+")
         write_cookie(io)
 
         wconfig = WorkerConfig()
diff --git a/stdlib/Distributed/src/messages.jl b/stdlib/Distributed/src/messages.jl
index 5d590ac6a4e27b..fe3e5ab90b0285 100644
--- a/stdlib/Distributed/src/messages.jl
+++ b/stdlib/Distributed/src/messages.jl
@@ -8,7 +8,7 @@ abstract type AbstractMsg end
 # Each message has three parts, which are written in order to the worker's stream.
 #  1) A header of type MsgHeader is serialized to the stream (via `serialize`).
 #  2) A message of type AbstractMsg is then serialized.
-#  3) Finally, a fixed bounday of 10 bytes is written.
+#  3) Finally, a fixed boundary of 10 bytes is written.
 
 # Message header stored separately from body to be able to send back errors if
 # a deserialization error occurs when reading the message body.
@@ -80,18 +80,18 @@ for (idx, tname) in enumerate(msgtypes)
     end
 end
 
-let msg_cases = :(@assert false)
+let msg_cases = :(@assert false "Message type index ($idx) expected to be between 1:$($(length(msgtypes)))")
     for i = length(msgtypes):-1:1
         mti = msgtypes[i]
         msg_cases = :(if idx == $i
-                          return $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
+                          $(Expr(:call, QuoteNode(mti), fill(:(deserialize(s)), fieldcount(mti))...))
                       else
                           $msg_cases
                       end)
     end
     @eval function deserialize_msg(s::AbstractSerializer)
         idx = read(s.io, UInt8)
-        $msg_cases
+        return $msg_cases
     end
 end
 
@@ -126,22 +126,30 @@ function flush_gc_msgs(w::Worker)
     if !isdefined(w, :w_stream)
         return
     end
-    w.gcflag = false
-    new_array = Any[]
-    msgs = w.add_msgs
-    w.add_msgs = new_array
-    if !isempty(msgs)
-        remote_do(add_clients, w, msgs)
-    end
+    add_msgs = nothing
+    del_msgs = nothing
+    @lock w.msg_lock begin
+        if !w.gcflag # No work needed for this worker
+            return
+        end
+        @atomic w.gcflag = false
+        if !isempty(w.add_msgs)
+            add_msgs = w.add_msgs
+            w.add_msgs = Any[]
+        end
 
-    # del_msgs gets populated by finalizers, so be very careful here about ordering of allocations
-    new_array = Any[]
-    msgs = w.del_msgs
-    w.del_msgs = new_array
-    if !isempty(msgs)
-        #print("sending delete of $msgs\n")
-        remote_do(del_clients, w, msgs)
+        if !isempty(w.del_msgs)
+            del_msgs = w.del_msgs
+            w.del_msgs = Any[]
+        end
+    end
+    if add_msgs !== nothing
+        remote_do(add_clients, w, add_msgs)
+    end
+    if del_msgs !== nothing
+        remote_do(del_clients, w, del_msgs)
     end
+    return
 end
 
 # Boundary inserted between messages on the wire, used for recovering
@@ -166,7 +174,7 @@ function send_msg_(w::Worker, header, msg, now::Bool)
         wait(w.initialized)
     end
     io = w.w_stream
-    lock(io.lock)
+    lock(io)
     try
         reset_state(w.w_serializer)
         serialize_hdr_raw(io, header)
@@ -179,14 +187,14 @@ function send_msg_(w::Worker, header, msg, now::Bool)
             flush(io)
         end
     finally
-        unlock(io.lock)
+        unlock(io)
     end
 end
 
 function flush_gc_msgs()
     try
         for w in (PGRP::ProcessGroup).workers
-            if isa(w,Worker) && w.gcflag && (w.state == W_CONNECTED)
+            if isa(w,Worker) && (w.state == W_CONNECTED) && w.gcflag
                 flush_gc_msgs(w)
             end
         end
diff --git a/stdlib/Distributed/src/process_messages.jl b/stdlib/Distributed/src/process_messages.jl
index 7361d4d057e656..7bbf7cfde943b4 100644
--- a/stdlib/Distributed/src/process_messages.jl
+++ b/stdlib/Distributed/src/process_messages.jl
@@ -44,6 +44,13 @@ struct RemoteException <: Exception
     captured::CapturedException
 end
 
+"""
+    capture_exception(ex::RemoteException, bt)
+
+Returns `ex::RemoteException` which has already captured a backtrace (via it's [`CapturedException`](@ref) field `captured`).
+"""
+Base.capture_exception(ex::RemoteException, bt) = ex
+
 """
     RemoteException(captured)
 
@@ -54,26 +61,10 @@ remote exception and a serializable form of the call stack when the exception wa
 RemoteException(captured) = RemoteException(myid(), captured)
 function showerror(io::IO, re::RemoteException)
     (re.pid != myid()) && print(io, "On worker ", re.pid, ":\n")
-    showerror(io, get_root_exception(re.captured))
+    showerror(io, re.captured)
 end
 
-isa_exception_container(ex) = (isa(ex, RemoteException) ||
-                               isa(ex, CapturedException) ||
-                               isa(ex, CompositeException))
-
-function get_root_exception(ex)
-    if isa(ex, RemoteException)
-        return get_root_exception(ex.captured)
-    elseif isa(ex, CapturedException) && isa_exception_container(ex.ex)
-        return get_root_exception(ex.ex)
-    elseif isa(ex, CompositeException) && length(ex.exceptions) > 0 && isa_exception_container(ex.exceptions[1])
-        return get_root_exception(ex.exceptions[1])
-    else
-        return ex
-    end
-end
-
-function run_work_thunk(thunk, print_error)
+function run_work_thunk(thunk::Function, print_error::Bool)
     local result
     try
         result = thunk()
@@ -94,7 +85,7 @@ function schedule_call(rid, thunk)
         rv = RemoteValue(def_rv_channel())
         (PGRP::ProcessGroup).refs[rid] = rv
         push!(rv.clientset, rid.whence)
-        @async run_work_thunk(rv, thunk)
+        errormonitor(@async run_work_thunk(rv, thunk))
         return rv
     end
 end
@@ -127,7 +118,7 @@ end
 
 ## message event handlers ##
 function process_messages(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool=true)
-    @async process_tcp_streams(r_stream, w_stream, incoming)
+    errormonitor(@async process_tcp_streams(r_stream, w_stream, incoming))
 end
 
 function process_tcp_streams(r_stream::TCPSocket, w_stream::TCPSocket, incoming::Bool)
@@ -157,7 +148,7 @@ Julia version number to perform the authentication handshake.
 See also [`cluster_cookie`](@ref).
 """
 function process_messages(r_stream::IO, w_stream::IO, incoming::Bool=true)
-    @async message_handler_loop(r_stream, w_stream, incoming)
+    errormonitor(@async message_handler_loop(r_stream, w_stream, incoming))
 end
 
 function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
@@ -246,8 +237,8 @@ function message_handler_loop(r_stream::IO, w_stream::IO, incoming::Bool)
             deregister_worker(wpid)
         end
 
-        isopen(r_stream) && close(r_stream)
-        isopen(w_stream) && close(w_stream)
+        close(r_stream)
+        close(w_stream)
 
         if (myid() == 1) && (wpid > 1)
             if oldstate != W_TERMINATING
@@ -287,11 +278,11 @@ function process_hdr(s, validate_cookie)
 end
 
 function handle_msg(msg::CallMsg{:call}, header, r_stream, w_stream, version)
-    schedule_call(header.response_oid, ()->msg.f(msg.args...; msg.kwargs...))
+    schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
 end
 function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, version)
-    @async begin
-        v = run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), false)
+    errormonitor(@async begin
+        v = run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), false)
         if isa(v, SyncTake)
             try
                 deliver_result(w_stream, :call_fetch, header.notify_oid, v.v)
@@ -301,18 +292,20 @@ function handle_msg(msg::CallMsg{:call_fetch}, header, r_stream, w_stream, versi
         else
             deliver_result(w_stream, :call_fetch, header.notify_oid, v)
         end
-    end
+        nothing
+    end)
 end
 
 function handle_msg(msg::CallWaitMsg, header, r_stream, w_stream, version)
-    @async begin
-        rv = schedule_call(header.response_oid, ()->msg.f(msg.args...; msg.kwargs...))
+    errormonitor(@async begin
+        rv = schedule_call(header.response_oid, ()->invokelatest(msg.f, msg.args...; msg.kwargs...))
         deliver_result(w_stream, :call_wait, header.notify_oid, fetch(rv.c))
-    end
+        nothing
+    end)
 end
 
 function handle_msg(msg::RemoteDoMsg, header, r_stream, w_stream, version)
-    @async run_work_thunk(()->msg.f(msg.args...; msg.kwargs...), true)
+    errormonitor(@async run_work_thunk(()->invokelatest(msg.f, msg.args...; msg.kwargs...), true))
 end
 
 function handle_msg(msg::ResultMsg, header, r_stream, w_stream, version)
@@ -346,8 +339,7 @@ function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
     lazy = msg.lazy
     PGRP.lazy = lazy
 
-    wait_tasks = Task[]
-    for (connect_at, rpid) in msg.other_workers
+    @sync for (connect_at, rpid) in msg.other_workers
         wconfig = WorkerConfig()
         wconfig.connect_at = connect_at
 
@@ -356,14 +348,11 @@ function handle_msg(msg::JoinPGRPMsg, header, r_stream, w_stream, version)
                 # The constructor registers the object with a global registry.
                 Worker(rpid, ()->connect_to_peer(cluster_manager, rpid, wconfig))
             else
-                t = @async connect_to_peer(cluster_manager, rpid, wconfig)
-                push!(wait_tasks, t)
+                @async connect_to_peer(cluster_manager, rpid, wconfig)
             end
         end
     end
 
-    for wt in wait_tasks; Base.wait(wt); end
-
     send_connection_hdr(controller, false)
     send_msg_now(controller, MsgHeader(RRID(0,0), header.notify_oid), JoinCompleteMsg(Sys.CPU_THREADS, getpid()))
 end
diff --git a/stdlib/Distributed/src/remotecall.jl b/stdlib/Distributed/src/remotecall.jl
index f4845221a611a4..d4bf767537c1d5 100644
--- a/stdlib/Distributed/src/remotecall.jl
+++ b/stdlib/Distributed/src/remotecall.jl
@@ -26,12 +26,13 @@ mutable struct Future <: AbstractRemoteRef
     where::Int
     whence::Int
     id::Int
-    v::Union{Some{Any}, Nothing}
+    lock::ReentrantLock
+    @atomic v::Union{Some{Any}, Nothing}
 
     Future(w::Int, rrid::RRID, v::Union{Some, Nothing}=nothing) =
-        (r = new(w,rrid.whence,rrid.id,v); return test_existing_ref(r))
+        (r = new(w,rrid.whence,rrid.id,ReentrantLock(),v); return test_existing_ref(r))
 
-    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],t[4])  # Useful for creating dummy, zeroed-out instances
+    Future(t::NTuple{4, Any}) = new(t[1],t[2],t[3],ReentrantLock(),t[4])  # Useful for creating dummy, zeroed-out instances
 end
 
 """
@@ -69,10 +70,17 @@ function test_existing_ref(r::AbstractRemoteRef)
     found = getkey(client_refs, r, nothing)
     if found !== nothing
         @assert r.where > 0
-        if isa(r, Future) && found.v === nothing && r.v !== nothing
-            # we have recd the value from another source, probably a deserialized ref, send a del_client message
-            send_del_client(r)
-            found.v = r.v
+        if isa(r, Future)
+            # this is only for copying the reference from Future to RemoteRef (just created)
+            fv_cache = @atomic :acquire found.v
+            rv_cache = @atomic :monotonic r.v
+            if fv_cache === nothing && rv_cache !== nothing
+                # we have recd the value from another source, probably a deserialized ref, send a del_client message
+                send_del_client(r)
+                @lock found.lock begin
+                    @atomicreplace found.v nothing => rv_cache
+                end
+            end
         end
         return found::typeof(r)
     end
@@ -84,20 +92,25 @@ end
 
 function finalize_ref(r::AbstractRemoteRef)
     if r.where > 0 # Handle the case of the finalizer having been called manually
-        if islocked(client_refs)
-            # delay finalizer for later, when it's not already locked
+        if trylock(client_refs.lock) # trylock doesn't call wait which causes yields
+            try
+                delete!(client_refs.ht, r) # direct removal avoiding locks
+                if isa(r, RemoteChannel)
+                    send_del_client_no_lock(r)
+                else
+                    # send_del_client only if the reference has not been set
+                    v_cache = @atomic :monotonic r.v
+                    v_cache === nothing && send_del_client_no_lock(r)
+                    @atomic :monotonic r.v = nothing
+                end
+                r.where = 0
+            finally
+                unlock(client_refs.lock)
+            end
+        else
             finalizer(finalize_ref, r)
             return nothing
         end
-        delete!(client_refs, r)
-        if isa(r, RemoteChannel)
-            send_del_client(r)
-        else
-            # send_del_client only if the reference has not been set
-            r.v === nothing && send_del_client(r)
-            r.v = nothing
-        end
-        r.where = 0
     end
     nothing
 end
@@ -192,12 +205,13 @@ or to use a local [`Channel`](@ref) as a proxy:
 ```julia
 p = 1
 f = Future(p)
-@async put!(f, remotecall_fetch(long_computation, p))
+errormonitor(@async put!(f, remotecall_fetch(long_computation, p)))
 isready(f)  # will not block
 ```
 """
 function isready(rr::Future)
-    rr.v === nothing || return true
+    v_cache = @atomic rr.v
+    v_cache === nothing || return true
 
     rid = remoteref_id(rr)
     return if rr.where == myid()
@@ -229,13 +243,18 @@ del_client(rr::AbstractRemoteRef) = del_client(remoteref_id(rr), myid())
 del_client(id, client) = del_client(PGRP, id, client)
 function del_client(pg, id, client)
     lock(client_refs) do
-        rv = get(pg.refs, id, false)
-        if rv !== false
-            delete!(rv.clientset, client)
-            if isempty(rv.clientset)
-                delete!(pg.refs, id)
-                #print("$(myid()) collected $id\n")
-            end
+        _del_client(pg, id, client)
+    end
+    nothing
+end
+
+function _del_client(pg, id, client)
+    rv = get(pg.refs, id, false)
+    if rv !== false
+        delete!(rv.clientset, client)
+        if isempty(rv.clientset)
+            delete!(pg.refs, id)
+            #print("$(myid()) collected $id\n")
         end
     end
     nothing
@@ -247,25 +266,70 @@ function del_clients(pairs::Vector)
     end
 end
 
-const any_gc_flag = Condition()
+# The task below is coalescing the `flush_gc_msgs` call
+# across multiple producers, see `send_del_client`,
+# and `send_add_client`.
+# XXX: Is this worth the additional complexity?
+#      `flush_gc_msgs` has to iterate over all connected workers.
+const any_gc_flag = Threads.Condition()
 function start_gc_msgs_task()
-    @async while true
-        wait(any_gc_flag)
-        flush_gc_msgs()
-    end
+    errormonitor(
+        Threads.@spawn begin
+            while true
+                lock(any_gc_flag) do
+                    # this might miss events
+                    wait(any_gc_flag)
+                end
+                # Use invokelatest() so that custom message transport streams
+                # for workers can be defined in a newer world age than the Task
+                # which runs the loop here.
+                invokelatest(flush_gc_msgs) # handles throws internally
+            end
+        end
+    )
 end
 
+# Function can be called within a finalizer
 function send_del_client(rr)
     if rr.where == myid()
         del_client(rr)
     elseif id_in_procs(rr.where) # process only if a valid worker
-        w = worker_from_id(rr.where)
-        push!(w.del_msgs, (remoteref_id(rr), myid()))
-        w.gcflag = true
+        process_worker(rr)
+    end
+end
+
+function send_del_client_no_lock(rr)
+    # for gc context to avoid yields
+    if rr.where == myid()
+        _del_client(PGRP, remoteref_id(rr), myid())
+    elseif id_in_procs(rr.where) # process only if a valid worker
+        process_worker(rr)
+    end
+end
+
+function publish_del_msg!(w::Worker, msg)
+    lock(w.msg_lock) do
+        push!(w.del_msgs, msg)
+        @atomic w.gcflag = true
+    end
+    lock(any_gc_flag) do
         notify(any_gc_flag)
     end
 end
 
+function process_worker(rr)
+    w = worker_from_id(rr.where)::Worker
+    msg = (remoteref_id(rr), myid())
+
+    # Needs to aquire a lock on the del_msg queue
+    T = Threads.@spawn begin
+        publish_del_msg!($w, $msg)
+    end
+    Base.errormonitor(T)
+
+    return
+end
+
 function add_client(id, client)
     lock(client_refs) do
         rv = lookup_ref(id)
@@ -288,34 +352,44 @@ function send_add_client(rr::AbstractRemoteRef, i)
         # to the processor that owns the remote ref. it will add_client
         # itself inside deserialize().
         w = worker_from_id(rr.where)
-        push!(w.add_msgs, (remoteref_id(rr), i))
-        w.gcflag = true
-        notify(any_gc_flag)
+        lock(w.msg_lock) do
+            push!(w.add_msgs, (remoteref_id(rr), i))
+            @atomic w.gcflag = true
+        end
+        lock(any_gc_flag) do
+            notify(any_gc_flag)
+        end
     end
 end
 
 channel_type(rr::RemoteChannel{T}) where {T} = T
 
-serialize(s::ClusterSerializer, f::Future) = serialize(s, f, f.v === nothing)
-serialize(s::ClusterSerializer, rr::RemoteChannel) = serialize(s, rr, true)
-function serialize(s::ClusterSerializer, rr::AbstractRemoteRef, addclient)
-    if addclient
+function serialize(s::ClusterSerializer, f::Future)
+    v_cache = @atomic f.v
+    if v_cache === nothing
         p = worker_id_from_socket(s.io)
-        (p !== rr.where) && send_add_client(rr, p)
+        (p !== f.where) && send_add_client(f, p)
     end
+    invoke(serialize, Tuple{ClusterSerializer, Any}, s, f)
+end
+
+function serialize(s::ClusterSerializer, rr::RemoteChannel)
+    p = worker_id_from_socket(s.io)
+    (p !== rr.where) && send_add_client(rr, p)
     invoke(serialize, Tuple{ClusterSerializer, Any}, s, rr)
 end
 
 function deserialize(s::ClusterSerializer, t::Type{<:Future})
-    f = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t)
-    f2 = Future(f.where, RRID(f.whence, f.id), f.v) # ctor adds to client_refs table
+    fc = invoke(deserialize, Tuple{ClusterSerializer, DataType}, s, t) # deserialized copy
+    f2 = Future(fc.where, RRID(fc.whence, fc.id), fc.v) # ctor adds to client_refs table
 
     # 1) send_add_client() is not executed when the ref is being serialized
     #    to where it exists, hence do it here.
     # 2) If we have received a 'fetch'ed Future or if the Future ctor found an
     #    already 'fetch'ed instance in client_refs (Issue #25847), we should not
     #    track it in the backing RemoteValue store.
-    if f2.where == myid() && f2.v === nothing
+    f2v_cache = @atomic f2.v
+    if f2.where == myid() && f2v_cache === nothing
         add_client(remoteref_id(f2), myid())
     end
     f2
@@ -348,10 +422,7 @@ end
 # make a thunk to call f on args in a way that simulates what would happen if
 # the function were sent elsewhere
 function local_remotecall_thunk(f, args, kwargs)
-    if isempty(args) && isempty(kwargs)
-        return f
-    end
-    return ()->f(args...; kwargs...)
+    return ()->invokelatest(f, args...; kwargs...)
 end
 
 function remotecall(f, w::LocalProcess, args...; kwargs...)
@@ -512,7 +583,7 @@ end
 
 Wait for a value to become available for the specified [`Future`](@ref).
 """
-wait(r::Future) = (r.v !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
+wait(r::Future) = (v_cache = @atomic r.v; v_cache !== nothing && return r; call_on_owner(wait_ref, r, myid()); r)
 
 """
     wait(r::RemoteChannel, args...)
@@ -529,11 +600,49 @@ Further calls to `fetch` on the same reference return the cached value. If the r
 is an exception, throws a [`RemoteException`](@ref) which captures the remote exception and backtrace.
 """
 function fetch(r::Future)
-    r.v !== nothing && return something(r.v)
-    v = call_on_owner(fetch_ref, r)
-    r.v = Some(v)
+    v_cache = @atomic r.v
+    v_cache !== nothing && return something(v_cache)
+
+    if r.where == myid()
+        rv, v_cache = @lock r.lock begin
+            v_cache = @atomic :monotonic r.v
+            rv = v_cache === nothing ? lookup_ref(remoteref_id(r)) : nothing
+            rv, v_cache
+        end
+
+        if v_cache !== nothing
+            return something(v_cache)
+        else
+            v_local = fetch(rv.c)
+        end
+    else
+        v_local = call_on_owner(fetch_ref, r)
+    end
+
+    v_cache = @atomic r.v
+
+    if v_cache === nothing # call_on_owner case
+        v_old, status = @lock r.lock begin
+            @atomicreplace r.v nothing => Some(v_local)
+        end
+        # status == true - when value obtained through call_on_owner
+        # status == false - any other situation: atomicreplace fails, because by the time the lock is obtained cache will be populated
+        # why? local put! performs caching and putting into channel under r.lock
+
+        # for local put! use the cached value, for call_on_owner cases just take the v_local as it was just cached in r.v
+
+        # remote calls getting the value from `call_on_owner` used to return the value directly without wrapping it in `Some(x)`
+        # so we're doing the same thing here
+        if status
+            send_del_client(r)
+            return v_local
+        else # this `v_cache` is returned at the end of the function
+            v_cache = v_old
+        end
+    end
+
     send_del_client(r)
-    v
+    something(v_cache)
 end
 
 fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
@@ -544,7 +653,7 @@ fetch_ref(rid, args...) = fetch(lookup_ref(rid).c, args...)
 Wait for and get a value from a [`RemoteChannel`](@ref). Exceptions raised are the
 same as for a [`Future`](@ref). Does not remove the item fetched.
 """
-fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)
+fetch(r::RemoteChannel, args...) = call_on_owner(fetch_ref, r, args...)::eltype(r)
 
 isready(rv::RemoteValue, args...) = isready(rv.c, args...)
 
@@ -557,12 +666,30 @@ A `put!` on an already set `Future` throws an `Exception`.
 All asynchronous remote calls return `Future`s and set the
 value to the return value of the call upon completion.
 """
-function put!(rr::Future, v)
-    rr.v !== nothing && error("Future can be set only once")
-    call_on_owner(put_future, rr, v, myid())
-    rr.v = Some(v)
-    rr
+function put!(r::Future, v)
+    if r.where == myid()
+        rid = remoteref_id(r)
+        rv = lookup_ref(rid)
+        isready(rv) && error("Future can be set only once")
+        @lock r.lock begin
+            put!(rv, v) # this notifies the tasks waiting on the channel in fetch
+            set_future_cache(r, v) # set the cache before leaving the lock, so that the notified tasks already see it cached
+        end
+        del_client(rid, myid())
+    else
+        @lock r.lock begin # same idea as above if there were any local tasks fetching on this Future
+            call_on_owner(put_future, r, v, myid())
+            set_future_cache(r, v)
+        end
+    end
+    r
+end
+
+function set_future_cache(r::Future, v)
+    _, ok = @atomicreplace r.v nothing => Some(v)
+    ok || error("internal consistency error detected for Future")
 end
+
 function put_future(rid, v, caller)
     rv = lookup_ref(rid)
     isready(rv) && error("Future can be set only once")
@@ -607,7 +734,15 @@ function take_ref(rid, caller, args...)
         lock(rv.synctake)
     end
 
-    v=take!(rv, args...)
+    v = try
+        take!(rv, args...)
+    catch e
+        # avoid unmatched unlock when exception occurs
+        # github issue #33972
+        synctake && unlock(rv.synctake)
+        rethrow(e)
+    end
+
     isa(v, RemoteException) && (myid() == caller) && throw(v)
 
     if synctake
@@ -623,7 +758,7 @@ end
 Fetch value(s) from a [`RemoteChannel`](@ref) `rr`,
 removing the value(s) in the process.
 """
-take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)
+take!(rr::RemoteChannel, args...) = call_on_owner(take_ref, rr, myid(), args...)::eltype(rr)
 
 # close and isopen are not supported on Future
 
diff --git a/stdlib/Distributed/src/workerpool.jl b/stdlib/Distributed/src/workerpool.jl
index 6250e99a1d7c00..354c61c8451139 100644
--- a/stdlib/Distributed/src/workerpool.jl
+++ b/stdlib/Distributed/src/workerpool.jl
@@ -309,7 +309,7 @@ For global variables, only the bindings are captured in a closure, not the data.
 const foo = rand(10^8);
 wp = CachingPool(workers())
 let foo = foo
-    pmap(wp, i -> sum(foo) + i, 1:100);
+    pmap(i -> sum(foo) + i, wp, 1:100);
 end
 ```
 
diff --git a/stdlib/Distributed/test/distributed_exec.jl b/stdlib/Distributed/test/distributed_exec.jl
index 4dca335314ddbb..203ea6de665335 100644
--- a/stdlib/Distributed/test/distributed_exec.jl
+++ b/stdlib/Distributed/test/distributed_exec.jl
@@ -63,7 +63,7 @@ let
     count_condition = Condition()
 
     function remote_wait(c)
-        @async begin
+        @async_logerr begin
             count += 1
             remote(take!)(c)
             count -= 1
@@ -87,7 +87,7 @@ let
         @test count == testcount
         put!(c, "foo")
         testcount -= 1
-        wait(count_condition)
+        (count == testcount) || wait(count_condition)
         @test count == testcount
         @test isready(pool) == true
     end
@@ -106,7 +106,7 @@ let
         @test count == testcount
         put!(c, "foo")
         testcount -= 1
-        wait(count_condition)
+        (count == testcount) || wait(count_condition)
         @test count == testcount
         @test isready(pool) == true
     end
@@ -132,6 +132,39 @@ end
 testf(id_me)
 testf(id_other)
 
+function poll_while(f::Function; timeout_seconds::Integer = 120)
+    start_time = time_ns()
+    while f()
+        sleep(1)
+        if ( ( time_ns() - start_time )/1e9 ) > timeout_seconds
+            @error "Timed out" timeout_seconds
+            return false
+        end
+    end
+    return true
+end
+
+function _getenv_include_thread_unsafe()
+    environment_variable_name = "JULIA_TEST_INCLUDE_THREAD_UNSAFE"
+    default_value = "false"
+    environment_variable_value = strip(get(ENV, environment_variable_name, default_value))
+    b = parse(Bool, environment_variable_value)::Bool
+    return b
+end
+const _env_include_thread_unsafe = _getenv_include_thread_unsafe()
+function include_thread_unsafe_tests()
+    if Threads.nthreads() > 1
+        if _env_include_thread_unsafe
+            return true
+        end
+        msg = "Skipping a thread-unsafe test because `Threads.nthreads() > 1`"
+        @warn msg Threads.nthreads()
+        Test.@test_broken false
+        return false
+    end
+    return true
+end
+
 # Distributed GC tests for Futures
 function test_futures_dgc(id)
     f = remotecall(myid, id)
@@ -143,8 +176,7 @@ function test_futures_dgc(id)
     @test fetch(f) == id
     @test f.v !== nothing
     yield(); # flush gc msgs
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == false
-
+    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
 
     # if unfetched, it should be deleted after a finalize
     f = remotecall(myid, id)
@@ -153,7 +185,7 @@ function test_futures_dgc(id)
     @test f.v === nothing
     finalize(f)
     yield(); # flush gc msgs
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid) == false
+    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, fid))
 end
 
 test_futures_dgc(id_me)
@@ -228,8 +260,7 @@ remotecall_fetch(f25847, id_other, f)
 
 finalize(f)
 yield() # flush gc msgs
-@test false == remotecall_fetch(chk_rrid->(yield(); haskey(Distributed.PGRP.refs, chk_rrid)), id_other, rrid)
-
+@test poll_while(() -> remotecall_fetch(chk_rrid->(yield(); haskey(Distributed.PGRP.refs, chk_rrid)), id_other, rrid))
 
 # Distributed GC tests for RemoteChannels
 function test_remoteref_dgc(id)
@@ -243,7 +274,7 @@ function test_remoteref_dgc(id)
     @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == true
     finalize(rr)
     yield(); # flush gc msgs
-    @test remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid) == false
+    @test poll_while(() -> remotecall_fetch(k->(yield();haskey(Distributed.PGRP.refs, k)), id, rrid))
 end
 test_remoteref_dgc(id_me)
 test_remoteref_dgc(id_other)
@@ -256,18 +287,22 @@ let wid1 = workers()[1],
     fstore = RemoteChannel(wid2)
 
     put!(fstore, rr)
-    @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    if include_thread_unsafe_tests()
+        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    end
     finalize(rr) # finalize locally
     yield() # flush gc msgs
-    @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    if include_thread_unsafe_tests()
+        @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == true
+    end
     remotecall_fetch(r -> (finalize(take!(r)); yield(); nothing), wid2, fstore) # finalize remotely
     sleep(0.5) # to ensure that wid2 messages have been executed on wid1
-    @test remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid) == false
+    @test poll_while(() -> remotecall_fetch(k -> haskey(Distributed.PGRP.refs, k), wid1, rrid))
 end
 
 # Tests for issue #23109 - should not hang.
 f = @spawnat :any rand(1, 1)
-@sync begin
+@Base.Experimental.sync begin
     for _ in 1:10
         @async fetch(f)
     end
@@ -275,7 +310,7 @@ end
 
 wid1, wid2 = workers()[1:2]
 f = @spawnat wid1 rand(1,1)
-@sync begin
+@Base.Experimental.sync begin
     @async fetch(f)
     @async remotecall_fetch(()->fetch(f), wid2)
 end
@@ -314,6 +349,9 @@ function test_regular_io_ser(ref::Distributed.AbstractRemoteRef)
         v = getfield(ref2, fld)
         if isa(v, Number)
             @test v === zero(typeof(v))
+        elseif fld == :lock
+            @test v isa ReentrantLock
+            @test !islocked(v)
         elseif v !== nothing
             error(string("Add test for field ", fld))
         end
@@ -439,7 +477,7 @@ catch ex
     # test showerror
     err_str = sprint(showerror, ex)
     err_one_str = sprint(showerror, ex.exceptions[1])
-    @test err_str == err_one_str * "\n\n...and 4 more exception(s).\n"
+    @test err_str == err_one_str * "\n\n...and 4 more exceptions.\n"
 end
 @test sprint(showerror, CompositeException()) == "CompositeException()\n"
 
@@ -802,6 +840,16 @@ v15406 = remotecall_wait(() -> 1, id_other)
 fetch(v15406)
 remotecall_wait(fetch, id_other, v15406)
 
+
+# issue #43396
+# Covers the remote fetch where the value returned is `nothing`
+# May be caused by attempting to unwrap a non-`Some` type with `something`
+# `call_on_owner` ref fetches return values not wrapped in `Some`
+# and have to be returned directly
+@test nothing === fetch(remotecall(() -> nothing, workers()[1]))
+@test 10 === fetch(remotecall(() -> 10, workers()[1]))
+
+
 # Test various forms of remotecall* invocations
 
 @everywhere f_args(v1, v2=0; kw1=0, kw2=0) = v1+v2+kw1+kw2
@@ -853,6 +901,13 @@ end
         return :OK
     end, id_other, rc_unbuffered) == :OK
 
+# github issue 33972
+rc_unbuffered_other = RemoteChannel(()->Channel{Int}(0), id_other)
+close(rc_unbuffered_other)
+try; take!(rc_unbuffered_other); catch; end
+@test !remotecall_fetch(rc -> islocked(Distributed.lookup_ref(remoteref_id(rc)).synctake),
+                        id_other, rc_unbuffered_other)
+
 # github PR #14456
 n = DoFullTest ? 6 : 5
 for i = 1:10^n
@@ -1005,30 +1060,8 @@ end
 
 # Test addprocs enable_threaded_blas parameter
 
-const get_num_threads = function() # anonymous so it will be serialized when called
-    blas = LinearAlgebra.BLAS.vendor()
-    # Wrap in a try to catch unsupported blas versions
-    try
-        if blas == :openblas
-            return ccall((:openblas_get_num_threads, Base.libblas_name), Cint, ())
-        elseif blas == :openblas64
-            return ccall((:openblas_get_num_threads64_, Base.libblas_name), Cint, ())
-        elseif blas == :mkl
-            return ccall((:MKL_Get_Max_Num_Threads, Base.libblas_name), Cint, ())
-        end
-
-        # OSX BLAS looks at an environment variable
-        if Sys.isapple()
-            return tryparse(Cint, get(ENV, "VECLIB_MAXIMUM_THREADS", "1"))
-        end
-    catch
-    end
-
-    return nothing
-end
-
 function get_remote_num_threads(processes_added)
-    return [remotecall_fetch(get_num_threads, proc_id) for proc_id in processes_added]
+    return [remotecall_fetch(BLAS.get_num_threads, proc_id) for proc_id in processes_added]
 end
 
 function test_blas_config(pid, expected)
@@ -1041,12 +1074,11 @@ function test_blas_config(pid, expected)
 end
 
 function test_add_procs_threaded_blas()
-    master_blas_thread_count = get_num_threads()
+    master_blas_thread_count = BLAS.get_num_threads()
     if master_blas_thread_count === nothing
         @warn "Skipping blas num threads tests due to unsupported blas version"
         return
     end
-    @test master_blas_thread_count <= 8 # check that Base set the environment variable in __init__ before LinearAlgebra dlopen'd it
 
     # Test with default enable_threaded_blas false
     processes_added = addprocs_with_testenv(2)
@@ -1055,7 +1087,7 @@ function test_add_procs_threaded_blas()
     end
 
     # Master thread should not have changed
-    @test get_num_threads() == master_blas_thread_count
+    @test BLAS.get_num_threads() == master_blas_thread_count
 
     # Threading disabled in children by default
     thread_counts_by_process = get_remote_num_threads(processes_added)
@@ -1069,9 +1101,9 @@ function test_add_procs_threaded_blas()
         test_blas_config(proc_id, true)
     end
 
-    @test get_num_threads() == master_blas_thread_count
+    @test BLAS.get_num_threads() == master_blas_thread_count
 
-    # BLAS.set_num_threads(`num`) doesn't  cause get_num_threads to return `num`
+    # BLAS.set_num_threads(`num`) doesn't  cause BLAS.get_num_threads to return `num`
     # depending on the machine, the BLAS version, and BLAS configuration, so
     # we need a very lenient test.
     thread_counts_by_process = get_remote_num_threads(processes_added)
@@ -1583,7 +1615,11 @@ cluster_cookie("")
 for close_stdin in (true, false), stderr_to_stdout in (true, false)
     local npids = addprocs_with_testenv(RetainStdioTester(close_stdin,stderr_to_stdout))
     @test remotecall_fetch(myid, npids[1]) == npids[1]
-    @test close_stdin != remotecall_fetch(()->isopen(stdin), npids[1])
+    if close_stdin
+        @test remotecall_fetch(()->stdin === devnull && !isreadable(stdin), npids[1])
+    else
+        @test remotecall_fetch(()->stdin !== devnull && isopen(stdin) && isreadable(stdin), npids[1])
+    end
     @test stderr_to_stdout == remotecall_fetch(()->(stderr === stdout), npids[1])
     rmprocs(npids)
 end
@@ -1690,16 +1726,14 @@ let (h, t) = Distributed.head_and_tail(Int[], 0)
 end
 
 # issue #35937
-let e
-    try
-        pmap(1) do _
+let e = @test_throws RemoteException pmap(1) do _
             wait(@async error(42))
         end
-    catch ex
-        e = ex
-    end
     # check that the inner TaskFailedException is correctly formed & can be printed
-    @test sprint(showerror, e) isa String
+    es = sprint(showerror, e.value)
+    @test contains(es, ":\nTaskFailedException\nStacktrace:\n")
+    @test contains(es, "\n\n    nested task error:")
+    @test contains(es, "\n\n    nested task error: 42\n")
 end
 
 # issue #27429, propagate relative `include` path to workers
@@ -1708,6 +1742,117 @@ for p in procs()
     @test @fetchfrom(p, i27429) == 27429
 end
 
+# Propagation of package environments for local workers (#28781)
+let julia = `$(Base.julia_cmd()) --startup-file=no`; mktempdir() do tmp
+    project = mkdir(joinpath(tmp, "project"))
+    depots = [mkdir(joinpath(tmp, "depot1")), mkdir(joinpath(tmp, "depot2"))]
+    load_path = [mkdir(joinpath(tmp, "load_path")), "@stdlib", "@"]
+    pathsep = Sys.iswindows() ? ";" : ":"
+    env = Dict(
+        "JULIA_DEPOT_PATH" => join(depots, pathsep),
+        "JULIA_LOAD_PATH" => join(load_path, pathsep),
+        # Explicitly propagate `TMPDIR`, in the event that we're running on a
+        # CI system where `TMPDIR` is special.
+        "TMPDIR" => dirname(tmp),
+    )
+    setupcode = """
+    using Distributed, Test
+    @everywhere begin
+        depot_path() = DEPOT_PATH
+        load_path() = LOAD_PATH
+        active_project() = Base.ACTIVE_PROJECT[]
+    end
+    """
+    testcode = setupcode * """
+    for w in workers()
+        @test remotecall_fetch(depot_path, w)          == DEPOT_PATH
+        @test remotecall_fetch(load_path, w)           == LOAD_PATH
+        @test remotecall_fetch(Base.load_path, w)      == Base.load_path()
+        @test remotecall_fetch(active_project, w)      == Base.ACTIVE_PROJECT[]
+        @test remotecall_fetch(Base.active_project, w) == Base.active_project()
+    end
+    """
+    # No active project
+    extracode = """
+    for w in workers()
+        @test remotecall_fetch(active_project, w) === Base.ACTIVE_PROJECT[] === nothing
+    end
+    """
+    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`, env)
+    @test success(cmd)
+    # --project
+    extracode = """
+    for w in workers()
+        @test remotecall_fetch(active_project, w) == Base.ACTIVE_PROJECT[] ==
+              $(repr(project))
+    end
+    """
+    cmd = setenv(`$(julia) --project=$(project) -p1 -e $(testcode * extracode)`, env)
+    @test success(cmd)
+    # JULIA_PROJECT
+    cmd = setenv(`$(julia) -p1 -e $(testcode * extracode)`,
+                 (env["JULIA_PROJECT"] = project; env))
+    @test success(cmd)
+    # Pkg.activate(...)
+    activateish = """
+    Base.ACTIVE_PROJECT[] = $(repr(project))
+    using Distributed
+    addprocs(1)
+    """
+    cmd = setenv(`$(julia) -e $(activateish * testcode * extracode)`, env)
+    @test success(cmd)
+    # JULIA_(LOAD|DEPOT)_PATH
+    shufflecode = """
+    d = reverse(DEPOT_PATH)
+    append!(empty!(DEPOT_PATH), d)
+    l = reverse(LOAD_PATH)
+    append!(empty!(LOAD_PATH), l)
+    """
+    addcode = """
+    using Distributed
+    addprocs(1) # after shuffling
+    """
+    extracode = """
+    for w in workers()
+        @test remotecall_fetch(load_path, w) == $(repr(reverse(load_path)))
+        @test remotecall_fetch(depot_path, w) == $(repr(reverse(depots)))
+    end
+    """
+    cmd = setenv(`$(julia) -e $(shufflecode * addcode * testcode * extracode)`, env)
+    @test success(cmd)
+    # Mismatch when shuffling after proc addition
+    failcode = shufflecode * setupcode * """
+    for w in workers()
+        @test remotecall_fetch(load_path, w) == reverse(LOAD_PATH) == $(repr(load_path))
+        @test remotecall_fetch(depot_path, w) == reverse(DEPOT_PATH) == $(repr(depots))
+    end
+    """
+    cmd = setenv(`$(julia) -p1 -e $(failcode)`, env)
+    @test success(cmd)
+    # Passing env or exeflags to addprocs(...) to override defaults
+    envcode = """
+    using Distributed
+    project = mktempdir()
+    env = Dict(
+        "JULIA_LOAD_PATH" => LOAD_PATH[1],
+        "JULIA_DEPOT_PATH" => DEPOT_PATH[1],
+        "TMPDIR" => ENV["TMPDIR"],
+    )
+    addprocs(1; env = env, exeflags = `--project=\$(project)`)
+    env["JULIA_PROJECT"] = project
+    addprocs(1; env = env)
+    """ * setupcode * """
+    for w in workers()
+        @test remotecall_fetch(depot_path, w)          == [DEPOT_PATH[1]]
+        @test remotecall_fetch(load_path, w)           == [LOAD_PATH[1]]
+        @test remotecall_fetch(active_project, w)      == project
+        @test remotecall_fetch(Base.active_project, w) == joinpath(project, "Project.toml")
+    end
+    """
+    cmd = setenv(`$(julia) -e $(envcode)`, env)
+    @test success(cmd)
+end end
+
 include("splitrange.jl")
 
 # Run topology tests last after removing all workers, since a given
diff --git a/stdlib/Distributed/test/includefile.jl b/stdlib/Distributed/test/includefile.jl
index ea9f4b3d63f4bc..faea6c11aaf6a9 100644
--- a/stdlib/Distributed/test/includefile.jl
+++ b/stdlib/Distributed/test/includefile.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # this is used to test that relative include paths work on other processes
 
 i27429 = 27429
diff --git a/stdlib/Distributed/test/managers.jl b/stdlib/Distributed/test/managers.jl
index 2397bf118e2e15..efc354356c6182 100644
--- a/stdlib/Distributed/test/managers.jl
+++ b/stdlib/Distributed/test/managers.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 using Distributed
 using Sockets
diff --git a/stdlib/Distributed/test/splitrange.jl b/stdlib/Distributed/test/splitrange.jl
index 7b15593d21eafc..9f3c9c92a3ffad 100644
--- a/stdlib/Distributed/test/splitrange.jl
+++ b/stdlib/Distributed/test/splitrange.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 using Distributed
 using Distributed: splitrange
@@ -22,7 +24,7 @@ using Distributed: splitrange
 @test splitrange(-1, 1, 4) == Array{UnitRange{Int64},1}([-1:-1,0:0,1:1])
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+isdefined(Main, :OffsetArrays) || @eval Main @everywhere include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
 oa = OffsetArray([123, -345], (-2,))
diff --git a/stdlib/Downloads.version b/stdlib/Downloads.version
index 00ebee6d3aa32b..14f74bccb4c7b2 100644
--- a/stdlib/Downloads.version
+++ b/stdlib/Downloads.version
@@ -1,2 +1,4 @@
 DOWNLOADS_BRANCH = master
-DOWNLOADS_SHA1 = 1a1d2e0a10209512f5b29e585bfd78e7a47f8f61
+DOWNLOADS_SHA1 = 9f738d30e1256a4c122dff9f38536cfc1feeca8e
+DOWNLOADS_GIT_URL := https://github.com/JuliaLang/Downloads.jl.git
+DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
diff --git a/stdlib/FileWatching/docs/src/index.md b/stdlib/FileWatching/docs/src/index.md
index 3944f5d3ed1c97..6c332511f578f3 100644
--- a/stdlib/FileWatching/docs/src/index.md
+++ b/stdlib/FileWatching/docs/src/index.md
@@ -7,3 +7,31 @@ FileWatching.watch_file
 FileWatching.watch_folder
 FileWatching.unwatch_folder
 ```
+
+# Pidfile
+
+```@meta
+CurrentModule = FileWatching.Pidfile
+```
+
+A simple utility tool for creating advisory pidfiles (lock files).
+
+## Primary Functions
+
+```@docs
+mkpidlock
+close(lock::LockMonitor)
+```
+
+
+## Helper Functions
+
+```@docs
+Pidfile.open_exclusive
+Pidfile.tryopen_exclusive
+Pidfile.write_pidfile
+Pidfile.parse_pidfile
+Pidfile.stale_pidfile
+Pidfile.isvalidpid
+Base.touch(::Pidfile.LockMonitor)
+```
diff --git a/stdlib/FileWatching/src/FileWatching.jl b/stdlib/FileWatching/src/FileWatching.jl
index 1c8a684310feca..e266bff7ec7d1c 100644
--- a/stdlib/FileWatching/src/FileWatching.jl
+++ b/stdlib/FileWatching/src/FileWatching.jl
@@ -16,20 +16,24 @@ export
     FileMonitor,
     FolderMonitor,
     PollingFileWatcher,
-    FDWatcher
+    FDWatcher,
+    # pidfile:
+    mkpidlock
 
 import Base: @handle_as, wait, close, eventloop, notify_error, IOError,
     _sizeof_uv_poll, _sizeof_uv_fs_poll, _sizeof_uv_fs_event, _uv_hook_close, uv_error, _UVError,
     iolock_begin, iolock_end, associate_julia_struct, disassociate_julia_struct,
-    preserve_handle, unpreserve_handle, isreadable, iswritable, |
+    preserve_handle, unpreserve_handle, isreadable, iswritable, isopen,
+    |, getproperty, propertynames
 import Base.Filesystem.StatStruct
 if Sys.iswindows()
     import Base.WindowsRawSocket
 end
 
+
 # libuv file watching event flags
-const UV_RENAME = 1
-const UV_CHANGE = 2
+const UV_RENAME = Int32(1)
+const UV_CHANGE = Int32(2)
 struct FileEvent
     renamed::Bool
     changed::Bool
@@ -45,33 +49,36 @@ FileEvent(flags::Integer) = FileEvent((flags & UV_RENAME) != 0,
               a.changed | b.changed,
               a.timedout | b.timedout)
 
+# libuv file descriptor event flags
+const UV_READABLE = Int32(1)
+const UV_WRITABLE = Int32(2)
+const UV_DISCONNECT = Int32(4)
+const UV_PRIORITIZED = Int32(8)
 struct FDEvent
-    readable::Bool
-    writable::Bool
-    disconnect::Bool
-    timedout::Bool
-    FDEvent(r::Bool, w::Bool, d::Bool, t::Bool) = new(r, w, d, t)
+    events::Int32
+    FDEvent(flags::Integer=0) = new(flags)
 end
-# libuv file descriptor event flags
-const UV_READABLE = 1
-const UV_WRITABLE = 2
-const UV_DISCONNECT = 4
+
+FDEvent(r::Bool, w::Bool, d::Bool, t::Bool) = FDEvent((UV_READABLE * r) | (UV_WRITABLE * w) | (UV_DISCONNECT * d)) # deprecated method
+
+function getproperty(f::FDEvent, field::Symbol)
+    events = getfield(f, :events)
+    field === :readable && return (events & UV_READABLE) != 0
+    field === :writable && return (events & UV_WRITABLE) != 0
+    field === :disconnect && return (events & UV_DISCONNECT) != 0
+    field === :prioritized && return (events & UV_PRIORITIZED) != 0
+    field === :timedout && return events == 0
+    field === :events && return Int(events)
+    getfield(f, field)::Union{}
+end
+propertynames(f::FDEvent) = (:readable, :writable, :disconnect, :prioritized, :timedout, :events)
 
 isreadable(f::FDEvent) = f.readable
 iswritable(f::FDEvent) = f.writable
-FDEvent() = FDEvent(false, false, false, true)
-FDEvent(flags::Integer) = FDEvent((flags & UV_READABLE) != 0,
-                                  (flags & UV_WRITABLE) != 0,
-                                  (flags & UV_DISCONNECT) != 0,
-                                  false)
-|(a::FDEvent, b::FDEvent) =
-    FDEvent(a.readable | b.readable,
-            a.writable | b.writable,
-            a.disconnect | b.disconnect,
-            a.timedout | b.timedout)
+|(a::FDEvent, b::FDEvent) = FDEvent(getfield(a, :events) | getfield(b, :events))
 
 mutable struct FileMonitor
-    handle::Ptr{Cvoid}
+    @atomic handle::Ptr{Cvoid}
     file::String
     notify::Base.ThreadSynchronizer
     events::Int32
@@ -93,15 +100,15 @@ mutable struct FileMonitor
     end
 end
 
-
 mutable struct FolderMonitor
-    handle::Ptr{Cvoid}
-    notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
-    open::Bool
+    @atomic handle::Ptr{Cvoid}
+    # notify::Channel{Any} # eltype = Union{Pair{String, FileEvent}, IOError}
+    notify::Base.ThreadSynchronizer
+    channel::Vector{Any} # eltype = Pair{String, FileEvent}
     FolderMonitor(folder::AbstractString) = FolderMonitor(String(folder))
     function FolderMonitor(folder::String)
         handle = Libc.malloc(_sizeof_uv_fs_event)
-        this = new(handle, Channel(Inf), false)
+        this = new(handle, Base.ThreadSynchronizer(), [])
         associate_julia_struct(handle, this)
         iolock_begin()
         err = ccall(:uv_fs_event_init, Cint, (Ptr{Cvoid}, Ptr{Cvoid}), eventloop(), handle)
@@ -109,7 +116,6 @@ mutable struct FolderMonitor
             Libc.free(handle)
             throw(_UVError("FolderMonitor", err))
         end
-        this.open = true
         finalizer(uvfinalize, this)
         uv_error("FolderMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
@@ -120,7 +126,7 @@ mutable struct FolderMonitor
 end
 
 mutable struct PollingFileWatcher
-    handle::Ptr{Cvoid}
+    @atomic handle::Ptr{Cvoid}
     file::String
     interval::UInt32
     notify::Base.ThreadSynchronizer
@@ -145,16 +151,17 @@ mutable struct PollingFileWatcher
 end
 
 mutable struct _FDWatcher
-    handle::Ptr{Cvoid}
+    @atomic handle::Ptr{Cvoid}
     fdnum::Int # this is NOT the file descriptor
     refcount::Tuple{Int, Int}
     notify::Base.ThreadSynchronizer
     events::Int32
     active::Tuple{Bool, Bool}
 
-    let FDWatchers = Vector{Any}()
+    let FDWatchers = Vector{Any}() # n.b.: this structure and the refcount are protected by the iolock
         global _FDWatcher, uvfinalize
         @static if Sys.isunix()
+            _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
             function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
                 if !readable && !writable
                     throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
@@ -181,7 +188,7 @@ mutable struct _FDWatcher
                     fdnum,
                     (Int(readable), Int(writable)),
                     Base.ThreadSynchronizer(),
-                    0,
+                    Int32(0),
                     (false, false))
                 associate_julia_struct(handle, this)
                 err = ccall(:uv_poll_init, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, RawFD), eventloop(), handle, fd)
@@ -203,7 +210,7 @@ mutable struct _FDWatcher
                 if t.handle != C_NULL
                     disassociate_julia_struct(t)
                     ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
-                    t.handle = C_NULL
+                    @atomic :monotonic t.handle = C_NULL
                 end
                 t.refcount = (0, 0)
                 t.active = (false, false)
@@ -212,7 +219,7 @@ mutable struct _FDWatcher
                         FDWatchers[t.fdnum] = nothing
                     end
                 end
-                notify(t.notify, FDEvent())
+                notify(t.notify, Int32(0))
             finally
                 unlock(t.notify)
             end
@@ -222,10 +229,12 @@ mutable struct _FDWatcher
     end
 
     @static if Sys.iswindows()
+        _FDWatcher(fd::RawFD, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
             handle = Libc._get_osfhandle(fd)
             return _FDWatcher(handle, readable, writable)
         end
+        _FDWatcher(fd::WindowsRawSocket, mask::FDEvent) = _FDWatcher(fd, mask.readable, mask.writable)
         function _FDWatcher(fd::WindowsRawSocket, readable::Bool, writable::Bool)
             if !readable && !writable
                 throw(ArgumentError("must specify at least one of readable or writable to create a FDWatcher"))
@@ -255,25 +264,38 @@ mutable struct _FDWatcher
 end
 
 mutable struct FDWatcher
-    watcher::_FDWatcher
-    readable::Bool
-    writable::Bool
     # WARNING: make sure `close` has been manually called on this watcher before closing / destroying `fd`
+    watcher::_FDWatcher
+    mask::FDEvent
     function FDWatcher(fd::RawFD, readable::Bool, writable::Bool)
-        this = new(_FDWatcher(fd, readable, writable), readable, writable)
+        return FDWatcher(fd, FDEvent(readable, writable, false, false))
+    end
+    function FDWatcher(fd::RawFD, mask::FDEvent)
+        this = new(_FDWatcher(fd, mask), mask)
         finalizer(close, this)
         return this
     end
     @static if Sys.iswindows()
         function FDWatcher(fd::WindowsRawSocket, readable::Bool, writable::Bool)
-            this = new(_FDWatcher(fd, readable, writable), readable, writable)
+            return FDWatcher(fd, FDEvent(readable, writable, false, false))
+        end
+        function FDWatcher(fd::WindowsRawSocket, mask::FDEvent)
+            this = new(_FDWatcher(fd, mask), mask)
             finalizer(close, this)
             return this
         end
     end
 end
 
+function getproperty(fdw::FDWatcher, s::Symbol)
+    # support deprecated field names
+    s === :readable && return fdw.mask.readable
+    s === :writable && return fdw.mask.writable
+    return getfield(fdw, s)
+end
+
 
+close(t::_FDWatcher, mask::FDEvent) = close(t, mask.readable, mask.writable)
 function close(t::_FDWatcher, readable::Bool, writable::Bool)
     iolock_begin()
     if t.refcount != (0, 0)
@@ -281,31 +303,39 @@ function close(t::_FDWatcher, readable::Bool, writable::Bool)
     end
     if t.refcount == (0, 0)
         uvfinalize(t)
+    else
+        @lock t.notify notify(t.notify, Int32(0))
     end
     iolock_end()
     nothing
 end
 
 function close(t::FDWatcher)
-    r, w = t.readable, t.writable
-    t.readable = t.writable = false
-    close(t.watcher, r, w)
+    mask = t.mask
+    t.mask = FDEvent()
+    close(t.watcher, mask)
 end
 
 function uvfinalize(uv::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
-    disassociate_julia_struct(uv)
-    close(uv)
+    iolock_begin()
+    if uv.handle != C_NULL
+        disassociate_julia_struct(uv) # close (and free) without notify
+        ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), uv.handle)
+    end
+    iolock_end()
 end
 
 function close(t::Union{FileMonitor, FolderMonitor, PollingFileWatcher})
+    iolock_begin()
     if t.handle != C_NULL
         ccall(:jl_close_uv, Cvoid, (Ptr{Cvoid},), t.handle)
     end
+    iolock_end()
 end
 
 function _uv_hook_close(uv::_FDWatcher)
     # fyi: jl_atexit_hook can cause this to get called too
-    uv.handle = C_NULL
+    Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
     uvfinalize(uv)
     nothing
 end
@@ -313,8 +343,8 @@ end
 function _uv_hook_close(uv::PollingFileWatcher)
     lock(uv.notify)
     try
-        uv.handle = C_NULL
         uv.active = false
+        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
         notify(uv.notify, StatStruct())
     finally
         unlock(uv.notify)
@@ -325,8 +355,8 @@ end
 function _uv_hook_close(uv::FileMonitor)
     lock(uv.notify)
     try
-        uv.handle = C_NULL
         uv.active = false
+        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
         notify(uv.notify, FileEvent())
     finally
         unlock(uv.notify)
@@ -335,12 +365,22 @@ function _uv_hook_close(uv::FileMonitor)
 end
 
 function _uv_hook_close(uv::FolderMonitor)
-    uv.open = false
-    uv.handle = C_NULL
-    close(uv.notify)
+    lock(uv.notify)
+    try
+        Libc.free(@atomicswap :monotonic uv.handle = C_NULL)
+        notify_error(uv.notify, EOFError())
+    finally
+        unlock(uv.notify)
+    end
     nothing
 end
 
+isopen(fm::FileMonitor) = fm.handle != C_NULL
+isopen(fm::FolderMonitor) = fm.handle != C_NULL
+isopen(pfw::PollingFileWatcher) = pfw.handle != C_NULL
+isopen(pfw::_FDWatcher) = pfw.refcount != (0, 0)
+isopen(pfw::FDWatcher) = !pfw.mask.timedout
+
 function uv_fseventscb_file(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
     t = @handle_as handle FileMonitor
     lock(t.notify)
@@ -359,11 +399,17 @@ end
 
 function uv_fseventscb_folder(handle::Ptr{Cvoid}, filename::Ptr, events::Int32, status::Int32)
     t = @handle_as handle FolderMonitor
-    if status != 0
-        put!(t.notify, _UVError("FolderMonitor", status))
-    else
-        fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
-        put!(t.notify, fname => FileEvent(events))
+    lock(t.notify)
+    try
+        if status != 0
+            notify_error(t.notify, _UVError("FolderMonitor", status))
+        else
+            fname = (filename == C_NULL) ? "" : unsafe_string(convert(Cstring, filename))
+            push!(t.channel, fname => FileEvent(events))
+            notify(t.notify)
+        end
+    finally
+        unlock(t.notify)
     end
     nothing
 end
@@ -384,7 +430,7 @@ function uv_pollcb(handle::Ptr{Cvoid}, status::Int32, events::Int32)
                     ccall(:uv_poll_stop, Int32, (Ptr{Cvoid},), t.handle)
                 end
             end
-            notify(t.notify, FDEvent(events))
+            notify(t.notify, events)
         end
     finally
         unlock(t.notify)
@@ -421,7 +467,7 @@ end
 
 function start_watching(t::_FDWatcher)
     iolock_begin()
-    t.handle == C_NULL && return throw(ArgumentError("FDWatcher is closed"))
+    t.handle == C_NULL && throw(ArgumentError("FDWatcher is closed"))
     readable = t.refcount[1] > 0
     writable = t.refcount[2] > 0
     if t.active[1] != readable || t.active[2] != writable
@@ -439,7 +485,7 @@ end
 
 function start_watching(t::PollingFileWatcher)
     iolock_begin()
-    t.handle == C_NULL && return throw(ArgumentError("PollingFileWatcher is closed"))
+    t.handle == C_NULL && throw(ArgumentError("PollingFileWatcher is closed"))
     if !t.active
         uv_error("PollingFileWatcher (start)",
                  ccall(:uv_fs_poll_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, UInt32),
@@ -468,7 +514,7 @@ end
 
 function start_watching(t::FileMonitor)
     iolock_begin()
-    t.handle == C_NULL && return throw(ArgumentError("FileMonitor is closed"))
+    t.handle == C_NULL && throw(ArgumentError("FileMonitor is closed"))
     if !t.active
         uv_error("FileMonitor (start)",
                  ccall(:uv_fs_event_start, Int32, (Ptr{Cvoid}, Ptr{Cvoid}, Cstring, Int32),
@@ -495,67 +541,78 @@ function stop_watching(t::FileMonitor)
     nothing
 end
 
-function wait(fdw::FDWatcher)
-    GC.@preserve fdw begin
-        return wait(fdw.watcher, readable = fdw.readable, writable = fdw.writable)
-    end
-end
-
-function wait(fdw::_FDWatcher; readable=true, writable=true)
-    events = FDEvent(Int32(0))
+# n.b. this _wait may return spuriously early with a timedout event
+function _wait(fdw::_FDWatcher, mask::FDEvent)
     iolock_begin()
     preserve_handle(fdw)
     lock(fdw.notify)
     try
-        while true
-            haveevent = false
-            events |= FDEvent(fdw.events)
-            if readable && isreadable(events)
-                fdw.events &= ~UV_READABLE
-                haveevent = true
-            end
-            if writable && iswritable(events)
-                fdw.events &= ~UV_WRITABLE
-                haveevent = true
-            end
-            if haveevent
-                break
-            end
-            if fdw.refcount == (0, 0) # !open
-                throw(EOFError())
-            else
-                start_watching(fdw) # make sure the poll is active
-                iolock_end()
-                events = wait(fdw.notify)::FDEvent
-                unlock(fdw.notify)
-                iolock_begin()
-                lock(fdw.notify)
-            end
+        events = FDEvent(fdw.events & mask.events)
+        if !isopen(fdw) # !open
+            throw(EOFError())
+        elseif events.timedout
+            start_watching(fdw) # make sure the poll is active
+            iolock_end()
+            return FDEvent(wait(fdw.notify)::Int32)
+        else
+            iolock_end()
+            return events
         end
     finally
         unlock(fdw.notify)
         unpreserve_handle(fdw)
     end
-    iolock_end()
-    return events
 end
 
-function wait(fd::RawFD; readable=false, writable=false)
-    fdw = _FDWatcher(fd, readable, writable)
+function wait(fdw::_FDWatcher; readable=true, writable=true)
+    return wait(fdw, FDEvent(readable, writable, false, false))
+end
+function wait(fdw::_FDWatcher, mask::FDEvent)
+    while true
+        mask.timedout && return mask
+        events = _wait(fdw, mask)
+        if !events.timedout
+            @lock fdw.notify fdw.events &= ~events.events
+            return events
+        end
+    end
+end
+
+function wait(fdw::FDWatcher)
+    isopen(fdw) || throw(EOFError())
+    while true
+        events = GC.@preserve fdw _wait(fdw.watcher, fdw.mask)
+        isopen(fdw) || throw(EOFError())
+        if !events.timedout
+            @lock fdw.watcher.notify fdw.watcher.events &= ~events.events
+            return events
+        end
+    end
+end
+
+function wait(socket::RawFD; readable=false, writable=false)
+    return wait(socket, FDEvent(readable, writable, false, false))
+end
+function wait(fd::RawFD, mask::FDEvent)
+    fdw = _FDWatcher(fd, mask)
     try
-        return wait(fdw, readable=readable, writable=writable)
+        return wait(fdw, mask)
     finally
-        close(fdw, readable, writable)
+        close(fdw, mask)
     end
 end
 
+
 if Sys.iswindows()
     function wait(socket::WindowsRawSocket; readable=false, writable=false)
-        fdw = _FDWatcher(socket, readable, writable)
+        return wait(socket, FDEvent(readable, writable, false, false))
+    end
+    function wait(socket::WindowsRawSocket, mask::FDEvent)
+        fdw = _FDWatcher(socket, mask)
         try
-            return wait(fdw, readable=readable, writable=writable)
+            return wait(fdw, mask)
         finally
-            close(fdw, readable, writable)
+            close(fdw, mask)
         end
     end
 end
@@ -611,26 +668,20 @@ function wait(m::FileMonitor)
 end
 
 function wait(m::FolderMonitor)
-    m.handle == C_NULL && return throw(ArgumentError("FolderMonitor is closed"))
-    if isready(m.notify)
-        evt = take!(m.notify) # non-blocking fast-path
-    else
-        preserve_handle(m)
-        evt = try
-                take!(m.notify)
-            catch ex
-                unpreserve_handle(m)
-                if ex isa InvalidStateException && ex.state === :closed
-                    rethrow(EOFError()) # `wait(::Channel)` throws the wrong exception
-                end
-                rethrow()
+    m.handle == C_NULL && throw(EOFError())
+    preserve_handle(m)
+    lock(m.notify)
+    evt = try
+            m.handle == C_NULL && throw(EOFError())
+            while isempty(m.channel)
+                wait(m.notify)
             end
-    end
-    if evt isa Pair{String, FileEvent}
-        return evt
-    else
-        throw(evt)
-    end
+            popfirst!(m.channel)
+        finally
+            unlock(m.notify)
+            unpreserve_handle(m)
+        end
+    return evt::Pair{String, FileEvent}
 end
 
 
@@ -647,32 +698,45 @@ The returned value is an object with boolean fields `readable`, `writable`, and
 giving the result of the polling.
 """
 function poll_fd(s::Union{RawFD, Sys.iswindows() ? WindowsRawSocket : Union{}}, timeout_s::Real=-1; readable=false, writable=false)
-    wt = Condition()
-    fdw = _FDWatcher(s, readable, writable)
+    mask = FDEvent(readable, writable, false, false)
+    mask.timedout && return mask
+    fdw = _FDWatcher(s, mask)
     local timer
+    # we need this flag to explicitly track whether we call `close` already, to update the internal refcount correctly
+    timedout = false # TODO: make this atomic
     try
         if timeout_s >= 0
-            result::FDEvent = FDEvent()
+            # delay creating the timer until shortly before we start the poll wait
             timer = Timer(timeout_s) do t
-                notify(wt)
+                timedout && return
+                timedout = true
+                close(fdw, mask)
             end
-            @async begin
-                try
-                    result = wait(fdw, readable=readable, writable=writable)
-                catch e
-                    notify_error(wt, e)
-                    return
+            try
+                while true
+                    events = _wait(fdw, mask)
+                    if timedout || !events.timedout
+                        @lock fdw.notify fdw.events &= ~events.events
+                        return events
+                    end
                 end
-                notify(wt)
+            catch ex
+                ex isa EOFError() || rethrow()
+                return FDEvent()
             end
-            wait(wt)
-            return result
         else
-            return wait(fdw, readable=readable, writable=writable)
+            return wait(fdw, mask)
         end
     finally
-        close(fdw, readable, writable)
-        @isdefined(timer) && close(timer)
+        if @isdefined(timer)
+            if !timedout
+                timedout = true
+                close(timer)
+                close(fdw, mask)
+            end
+        else
+            close(fdw, mask)
+        end
     end
 end
 
@@ -726,37 +790,39 @@ function watch_folder(s::String, timeout_s::Real=-1)
     fm = get!(watched_folders, s) do
         return FolderMonitor(s)
     end
-    if timeout_s >= 0 && !isready(fm.notify)
+    local timer
+    if timeout_s >= 0
+        @lock fm.notify isempty(fm.channel) || return popfirst!(fm.channel)
         if timeout_s <= 0.010
             # for very small timeouts, we can just sleep for the whole timeout-interval
             (timeout_s == 0) ? yield() : sleep(timeout_s)
-            if !isready(fm.notify)
-                return "" => FileEvent() # timeout
-            end
-            # fall-through to a guaranteed non-blocking fast-path call to wait
+            @lock fm.notify isempty(fm.channel) || return popfirst!(fm.channel)
+            return "" => FileEvent() # timeout
         else
-            # If we may need to be able to cancel via a timeout,
-            # create a second monitor object just for that purpose.
-            # We still take the events from the primary stream.
-            fm2 = FileMonitor(s)
             timer = Timer(timeout_s) do t
-                close(fm2)
+                @lock fm.notify notify(fm.notify)
             end
-            try
-                while isopen(fm.notify) && !isready(fm.notify)
-                    fm2.handle == C_NULL && return "" => FileEvent() # timeout
-                    wait(fm2)
+        end
+    end
+    # inline a copy of `wait` with added support for checking timer
+    fm.handle == C_NULL && throw(EOFError())
+    preserve_handle(fm)
+    lock(fm.notify)
+    evt = try
+            fm.handle == C_NULL && throw(EOFError())
+            while isempty(fm.channel)
+                if @isdefined(timer)
+                    isopen(timer) || return "" => FileEvent() # timeout
                 end
-            finally
-                close(fm2)
-                close(timer)
+                wait(fm.notify)
             end
-            # guaranteed that next call to `wait(fm)` is non-blocking
-            # since we haven't entered the libuv event loop yet
-            # or the Base scheduler workqueue since last testing `isready`
+            popfirst!(fm.channel)
+        finally
+            unlock(fm.notify)
+            unpreserve_handle(fm)
+            @isdefined(timer) && close(timer)
         end
-    end
-    return wait(fm)
+    return evt::Pair{String, FileEvent}
 end
 
 """
@@ -814,4 +880,7 @@ function poll_file(s::AbstractString, interval_seconds::Real=5.007, timeout_s::R
     end
 end
 
+include("pidfile.jl")
+import .Pidfile: mkpidlock
+
 end
diff --git a/stdlib/FileWatching/src/pidfile.jl b/stdlib/FileWatching/src/pidfile.jl
new file mode 100644
index 00000000000000..8416765a57b97a
--- /dev/null
+++ b/stdlib/FileWatching/src/pidfile.jl
@@ -0,0 +1,317 @@
+module Pidfile
+
+
+export mkpidlock
+
+using Base:
+    IOError, UV_EEXIST, UV_ESRCH,
+    Process
+
+using Base.Libc: rand
+
+using Base.Filesystem:
+    File, open, JL_O_CREAT, JL_O_RDWR, JL_O_RDONLY, JL_O_EXCL,
+    rename, samefile, path_separator
+
+using ..FileWatching: watch_file
+using Base.Sys: iswindows
+
+"""
+    mkpidlock([f::Function], at::String, [pid::Cint, proc::Process]; kwopts...)
+
+Create a pidfile lock for the path "at" for the current process
+or the process identified by pid or proc. Can take a function to execute once locked,
+for usage in `do` blocks, after which the lock will be automatically closed. If the lock fails
+and `wait` is false, then an error is thrown.
+
+The lock will be released by either `close`, a `finalizer`, or shortly after `proc` exits.
+Make sure the return value is live through the end of the critical section of
+your program, so the `finalizer` does not reclaim it early.
+
+Optional keyword arguments:
+ - `mode`: file access mode (modified by the process umask). Defaults to world-readable.
+ - `poll_interval`: Specify the maximum time to between attempts (if `watch_file` doesn't work)
+ - `stale_age`: Delete an existing pidfile (ignoring the lock) if its mtime is older than this.
+     The file won't be deleted until 25x longer than this if the pid in the file appears that it may be valid.
+     By default this is disabled (`stale_age` = 0), but a typical recommended value would be about 3-5x an
+     estimated normal completion time.
+ - `refresh`: Keeps a lock from becoming stale by updating the mtime every interval of time that passes.
+     By default, this is set to `stale_age/2`, which is the recommended value.
+ - `wait`: If true, block until we get the lock, if false, raise error if lock fails.
+"""
+function mkpidlock end
+
+
+# mutable only because we want to add a finalizer
+mutable struct LockMonitor
+    const path::String
+    const fd::File
+    const update::Union{Nothing,Timer}
+
+    global function mkpidlock(at::String, pid::Cint; stale_age::Real=0, refresh::Real=stale_age/2, kwopts...)
+        local lock
+        atdir, atname = splitdir(at)
+        isempty(atdir) && (atdir = pwd())
+        at = realpath(atdir) * path_separator * atname
+        fd = open_exclusive(at; stale_age=stale_age, kwopts...)
+        update = nothing
+        try
+            write_pidfile(fd, pid)
+            if refresh > 0
+                # N.b.: to ensure our finalizer works we are careful to capture
+                # `fd` here instead of `lock`.
+                update = Timer(t -> isopen(t) && touch(fd), refresh; interval=refresh)
+            end
+            lock = new(at, fd, update)
+            finalizer(close, lock)
+        catch ex
+            tryrmopenfile(at)
+            close(fd)
+            rethrow(ex)
+        end
+        return lock
+    end
+end
+
+mkpidlock(at::String; kwopts...) = mkpidlock(at, getpid(); kwopts...)
+mkpidlock(f::Function, at::String; kwopts...) = mkpidlock(f, at, getpid(); kwopts...)
+
+function mkpidlock(f::Function, at::String, pid::Cint; kwopts...)
+    lock = mkpidlock(at, pid; kwopts...)
+    try
+        return f()
+    finally
+        close(lock)
+    end
+end
+
+function mkpidlock(at::String, proc::Process; kwopts...)
+    lock = mkpidlock(at, getpid(proc); kwopts...)
+    closer = @async begin
+        wait(proc)
+        close(lock)
+    end
+    isdefined(Base, :errormonitor) && Base.errormonitor(closer)
+    return lock
+end
+
+"""
+    Base.touch(::Pidfile.LockMonitor)
+
+Update the `mtime` on the lock, to indicate it is still fresh.
+
+See also the `refresh` keyword in the [`mkpidlock`](@ref) constructor.
+"""
+Base.touch(lock::LockMonitor) = (touch(lock.fd); lock)
+
+"""
+    write_pidfile(io, pid)
+
+Write our pidfile format to an open IO descriptor.
+"""
+function write_pidfile(io::IO, pid::Cint)
+    print(io, "$pid $(gethostname())")
+end
+
+"""
+    parse_pidfile(file::Union{IO, String}) => (pid, hostname, age)
+
+Attempt to parse our pidfile format,
+replaced an element with (0, "", 0.0), respectively, for any read that failed.
+"""
+function parse_pidfile(io::IO)
+    fields = split(read(io, String), ' ', limit = 2)
+    pid = tryparse(Cuint, fields[1])
+    pid === nothing && (pid = Cuint(0))
+    hostname = (length(fields) == 2) ? fields[2] : ""
+    when = mtime(io)
+    age = time() - when
+    return (pid, hostname, age)
+end
+
+function parse_pidfile(path::String)
+    try
+        existing = open(path, JL_O_RDONLY)
+        try
+            return parse_pidfile(existing)
+        finally
+            close(existing)
+        end
+    catch ex
+        isa(ex, EOFError) || isa(ex, IOError) || rethrow(ex)
+        return (Cuint(0), "", 0.0)
+    end
+end
+
+"""
+    isvalidpid(hostname::String, pid::Cuint) :: Bool
+
+Attempt to conservatively estimate whether pid is a valid process id.
+"""
+function isvalidpid(hostname::AbstractString, pid::Cuint)
+    # can't inspect remote hosts
+    (hostname == "" || hostname == gethostname()) || return true
+    # pid < 0 is never valid (must be a parser error or different OS),
+    # and would have a completely different meaning when passed to kill
+    !iswindows() && pid > typemax(Cint) && return false
+    # (similarly for pid 0)
+    pid == 0 && return false
+    # see if the process id exists by querying kill without sending a signal
+    # and checking if it returned ESRCH (no such process)
+    return ccall(:uv_kill, Cint, (Cuint, Cint), pid, 0) != UV_ESRCH
+end
+
+"""
+    stale_pidfile(path::String, stale_age::Real) :: Bool
+
+Helper function for `open_exclusive` for deciding if a pidfile is stale.
+"""
+function stale_pidfile(path::String, stale_age::Real)
+    pid, hostname, age = parse_pidfile(path)
+    age < -stale_age && @warn "filesystem time skew detected" path=path
+    if age > stale_age
+        if (age > stale_age * 25) || !isvalidpid(hostname, pid)
+            return true
+        end
+    end
+    return false
+end
+
+"""
+    tryopen_exclusive(path::String, mode::Integer = 0o444) :: Union{Void, File}
+
+Try to create a new file for read-write advisory-exclusive access,
+return nothing if it already exists.
+"""
+function tryopen_exclusive(path::String, mode::Integer = 0o444)
+    try
+        return open(path, JL_O_RDWR | JL_O_CREAT | JL_O_EXCL, mode)
+    catch ex
+        (isa(ex, IOError) && ex.code == UV_EEXIST) || rethrow(ex)
+    end
+    return nothing
+end
+
+"""
+    open_exclusive(path::String; mode, poll_interval, stale_age) :: File
+
+Create a new a file for read-write advisory-exclusive access.
+If `wait` is `false` then error out if the lock files exist
+otherwise block until we get the lock.
+
+For a description of the keyword arguments, see [`mkpidlock`](@ref).
+"""
+function open_exclusive(path::String;
+                        mode::Integer = 0o444 #= read-only =#,
+                        poll_interval::Real = 10 #= seconds =#,
+                        wait::Bool = true #= return on failure if false =#,
+                        stale_age::Real = 0 #= disabled =#)
+    # fast-path: just try to open it
+    file = tryopen_exclusive(path, mode)
+    file === nothing || return file
+    if !wait
+        if file === nothing && stale_age > 0
+            if stale_age > 0 && stale_pidfile(path, stale_age)
+                @warn "attempting to remove probably stale pidfile" path=path
+                tryrmopenfile(path)
+            end
+            file = tryopen_exclusive(path, mode)
+        end
+        if file === nothing
+            error("Failed to get pidfile lock for $(repr(path)).")
+        else
+            return file
+        end
+    end
+    # fall-back: wait for the lock
+
+    while true
+        # start the file-watcher prior to checking for the pidfile existence
+        t = @async try
+            watch_file(path, poll_interval)
+        catch ex
+            isa(ex, IOError) || rethrow(ex)
+            sleep(poll_interval) # if the watch failed, convert to just doing a sleep
+        end
+        # now try again to create it
+        file = tryopen_exclusive(path, mode)
+        file === nothing || return file
+        Base.wait(t) # sleep for a bit before trying again
+        if stale_age > 0 && stale_pidfile(path, stale_age)
+            # if the file seems stale, try to remove it before attempting again
+            # set stale_age to zero so we won't attempt again, even if the attempt fails
+            stale_age -= stale_age
+            @warn "attempting to remove probably stale pidfile" path=path
+            tryrmopenfile(path)
+        end
+    end
+end
+
+function _rand_filename(len::Int=4) # modified from Base.Libc
+    slug = Base.StringVector(len)
+    chars = b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+    for i = 1:len
+        slug[i] = chars[(Libc.rand() % length(chars)) + 1]
+    end
+    return String(slug)
+end
+
+function tryrmopenfile(path::String)
+    # Deleting open file on Windows is a bit hard
+    # if we want to reuse the name immediately after:
+    # we need to first rename it, then delete it.
+    if Sys.iswindows()
+        try
+            local rmpath
+            rmdir, rmname = splitdir(path)
+            while true
+                rmpath = string(rmdir, isempty(rmdir) ? "" : path_separator,
+                    "\$", _rand_filename(), rmname, ".deleted")
+                ispath(rmpath) || break
+            end
+            rename(path, rmpath)
+            path = rmpath
+        catch ex
+            isa(ex, IOError) || rethrow(ex)
+        end
+    end
+    return try
+        rm(path)
+        true
+    catch ex
+        isa(ex, IOError) || rethrow(ex)
+        ex
+    end
+end
+
+"""
+    close(lock::LockMonitor)
+
+Release a pidfile lock.
+"""
+function Base.close(lock::LockMonitor)
+    update = lock.update
+    update === nothing || close(update)
+    isopen(lock.fd) || return false
+    removed = false
+    path = lock.path
+    pathstat = try
+            # Windows sometimes likes to return EACCES here,
+            # if the path is in the process of being deleted
+            stat(path)
+        catch ex
+            ex isa IOError || rethrow()
+            removed = ex
+            nothing
+        end
+    if pathstat !== nothing && samefile(stat(lock.fd), pathstat)
+        # try not to delete someone else's lock
+        removed = tryrmopenfile(path)
+    end
+    close(lock.fd)
+    havelock = removed === true
+    havelock || @warn "failed to remove pidfile on close" path=path removed=removed
+    return havelock
+end
+
+end # module
diff --git a/stdlib/FileWatching/test/pidfile.jl b/stdlib/FileWatching/test/pidfile.jl
new file mode 100644
index 00000000000000..febc082518edf0
--- /dev/null
+++ b/stdlib/FileWatching/test/pidfile.jl
@@ -0,0 +1,364 @@
+using FileWatching.Pidfile
+
+using Test
+
+using Base.Filesystem: File
+using FileWatching.Pidfile: iswindows,
+    write_pidfile, parse_pidfile,
+    isvalidpid, stale_pidfile,
+    tryopen_exclusive, open_exclusive
+
+# helper utilities
+struct MemoryFile <: Base.AbstractPipe
+    io::IOBuffer
+    mtime::Float64
+end
+Base.pipe_reader(io::MemoryFile) = io.io
+Base.Filesystem.mtime(io::MemoryFile) = io.mtime
+
+# set the process umask so we can test the behavior of
+# open mask without interference from parent's state
+# and create a test environment temp directory
+umask(new_mask) = ccall((@static iswindows() ? :_umask : :umask), Cint, (Cint,), new_mask)
+
+# TODO: Use targeted @test_log tests instead of suppressing all logs to hide the expected warnings
+Base.CoreLogging.with_logger(Base.CoreLogging.NullLogger()) do
+
+@testset "Pidfile.jl" begin
+old_umask = umask(0o002)
+try
+    mktempdir() do dir
+        cd(dir) do
+
+# now start tests definitions:
+
+@testset "validpid" begin
+    mypid = getpid() % Cuint
+    @test isvalidpid(gethostname(), mypid)
+    @test isvalidpid("", mypid)
+    @test !isvalidpid("", 0 % Cuint)
+    @test isvalidpid("NOT" * gethostname(), mypid)
+    @test isvalidpid("NOT" * gethostname(), 0 % Cuint)
+    @test isvalidpid("NOT" * gethostname(), -1 % Cuint)
+    if !iswindows()
+        @test isvalidpid("", 1 % Cuint)
+        @test !isvalidpid("", -1 % Cuint)
+        @test !isvalidpid("", -mypid)
+    end
+end
+
+@testset "write_pidfile" begin
+    buf = IOBuffer()
+    pid, host, age = 0, "", 123
+    pid2, host2, age2 = parse_pidfile(MemoryFile(seekstart(buf), time() - age))
+    @test pid == pid2
+    @test host == host2
+    @test age ≈ age2 atol=5
+
+    host = " host\r\n"
+    write(buf, "-1 $host")
+    pid2, host2, age2 = parse_pidfile(MemoryFile(seekstart(buf), time() - age))
+    @test pid == pid2
+    @test host == host2
+    @test age ≈ age2 atol=5
+    truncate(seekstart(buf), 0)
+
+    pid, host = getpid(), gethostname()
+    write_pidfile(buf, pid)
+    @test read(seekstart(buf), String) == "$pid $host"
+    pid2, host2, age2 = parse_pidfile(MemoryFile(seekstart(buf), time() - age))
+    @test pid == pid2
+    @test host == host2
+    @test age ≈ age2 atol=5
+    truncate(seekstart(buf), 0)
+
+    @testset "parse_pidfile" begin
+        age = 0
+        @test parse_pidfile("nonexist") === (Cuint(0), "", 0.0)
+        open(io -> write_pidfile(io, pid), "pidfile", "w")
+        pid2, host2, age2 = parse_pidfile("pidfile")
+        @test pid == pid2
+        @test host == host2
+        @test age ≈ age2 atol=10
+        rm("pidfile")
+    end
+end
+
+@assert !ispath("pidfile")
+@testset "open_exclusive" begin
+    f = open_exclusive("pidfile")::File
+    try
+        # check that f is open and read-writable
+        @test isfile("pidfile")
+        @test filemode("pidfile") & 0o777 == 0o444
+        @test filemode(f) & 0o777 == 0o444
+        @test filesize(f) == 0
+        @test write(f, "a") == 1
+        @test filesize(f) == 1
+        @test read(seekstart(f), String) == "a"
+        chmod("pidfile", 0o600)
+        @test filemode(f) & 0o777 == (iswindows() ? 0o666 : 0o600)
+    finally
+        close(f)
+    end
+
+    # release the pidfile after a short delay
+    deleted = false
+    rmtask = @async begin
+        sleep(3)
+        rm("pidfile")
+        deleted = true
+    end
+    isdefined(Base, :errormonitor) && Base.errormonitor(rmtask)
+    @test isfile("pidfile")
+    @test !deleted
+
+    # open the pidfile again (should wait for it to disappear first)
+    t = @elapsed f2 = open_exclusive(joinpath(dir, "pidfile"))::File
+    try
+        @test deleted
+        @test isfile("pidfile")
+        @test t > 2
+        if t > 6
+            println("INFO: watch_file optimization appears to have NOT succeeded")
+        end
+        @test filemode(f2) & 0o777 == 0o444
+        @test filesize(f2) == 0
+        @test write(f2, "bc") == 2
+        @test read(seekstart(f2), String) == "bc"
+        @test filesize(f2) == 2
+    finally
+        close(f2)
+    end
+    rm("pidfile")
+    wait(rmtask)
+
+    # now test with a long delay and other non-default options
+    f = open_exclusive("pidfile", mode = 0o000)::File
+    try
+        @test filemode(f) & 0o777 == (iswindows() ? 0o444 : 0o000)
+    finally
+        close(f)
+    end
+    deleted = false
+    rmtask = @async begin
+        sleep(8)
+        rm("pidfile")
+        deleted = true
+    end
+    isdefined(Base, :errormonitor) && Base.errormonitor(rmtask)
+    @test isfile("pidfile")
+    @test !deleted
+    # open the pidfile again (should wait for it to disappear first)
+    t = @elapsed f2 = open_exclusive("pidfile", mode = 0o777, poll_interval = 1.0)::File
+    try
+        @test deleted
+        @test isfile("pidfile")
+        @test filemode(f2) & 0o777 == (iswindows() ? 0o666 : 0o775)
+        @test write(f2, "def") == 3
+        @test read(seekstart(f2), String) == "def"
+        @test t > 7
+    finally
+        close(f2)
+    end
+    rm("pidfile")
+    wait(rmtask)
+
+    @testset "test for wait == false cases" begin
+        f = open_exclusive("pidfile", wait=false)
+        @test isfile("pidfile")
+        close(f)
+        rm("pidfile")
+
+        f = open_exclusive("pidfile")::File
+        deleted = false
+        rmtask = @async begin
+            sleep(2)
+            @test Pidfile.tryrmopenfile("pidfile")
+            deleted = true
+        end
+        isdefined(Base, :errormonitor) && Base.errormonitor(rmtask)
+
+        t1 = time()
+        @test_throws ErrorException open_exclusive("pidfile", wait=false)
+        @test time()-t1 ≈ 0 atol=1
+
+        sleep(1)
+        @test !deleted
+
+        t1 = time()
+        @test_throws ErrorException open_exclusive("pidfile", wait=false)
+        @test time()-t1 ≈ 0 atol=1
+
+        wait(rmtask)
+        @test deleted
+        t = @elapsed f2 = open_exclusive("pidfile", wait=false)::File
+        @test isfile("pidfile")
+        @test t ≈ 0 atol=1
+        close(f)
+        close(f2)
+        rm("pidfile")
+    end
+end
+
+@assert !ispath("pidfile")
+@testset "open_exclusive: break lock" begin
+    # test for stale_age
+    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
+    try
+        write_pidfile(f, getpid())
+    finally
+        close(f)
+    end
+    @test t < 2
+    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=1)::File
+    close(f)
+    @test 20 < t < 50
+    rm("pidfile")
+
+    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
+    close(f)
+    @test t < 2
+    t = @elapsed f = open_exclusive("pidfile", poll_interval=3, stale_age=10)::File
+    close(f)
+    @test 8 < t < 20
+    rm("pidfile")
+end
+
+@testset "open_exclusive: other errors" begin
+    error = @test_throws(Base.IOError, open_exclusive("nonexist/folder"))
+    @test error.value.code == Base.UV_ENOENT
+
+    error = @test_throws(Base.IOError, open_exclusive(""))
+    @test error.value.code == Base.UV_ENOENT
+end
+
+@assert !ispath("pidfile")
+@testset "mkpidlock" begin
+    lockf = mkpidlock("pidfile")
+    @test lockf.update === nothing
+    waittask = @async begin
+        sleep(3)
+        cd(homedir()) do
+            return close(lockf)
+        end
+    end
+    isdefined(Base, :errormonitor) && Base.errormonitor(waittask)
+
+    # mkpidlock with no waiting
+    t = @elapsed @test_throws ErrorException mkpidlock("pidfile", wait=false)
+    @test t ≈ 0 atol=1
+
+    t = @elapsed lockf1 = mkpidlock(joinpath(dir, "pidfile"))
+    @test t > 2
+    @test istaskdone(waittask) && fetch(waittask)
+    @test !close(lockf)
+    finalize(lockf1)
+    t = @elapsed lockf2 = mkpidlock("pidfile")
+    @test t < 2
+    @test !close(lockf1)
+
+    # test manual breakage of the lock
+    # is correctly handled
+    @test Pidfile.tryrmopenfile("pidfile")
+    t = @elapsed lockf3 = mkpidlock("pidfile")
+    @test t < 2
+    @test isopen(lockf2.fd)
+    @test !close(lockf2)
+    @test !isopen(lockf2.fd)
+    @test isfile("pidfile")
+    @test close(lockf3)
+    @test !isfile("pidfile")
+
+    # Just for coverage's sake, run a test with do-block syntax
+    lock_times = Float64[]
+    t_loop = @async begin
+        for idx in 1:100
+            t = @elapsed mkpidlock("do_block_pidfile") do
+                # nothing
+            end
+            sleep(0.01)
+            push!(lock_times, t)
+        end
+    end
+    isdefined(Base, :errormonitor) && Base.errormonitor(t_loop)
+    mkpidlock("do_block_pidfile") do
+        sleep(3)
+    end
+    wait(t_loop)
+    @test maximum(lock_times) > 2
+    @test minimum(lock_times) < 1
+end
+
+@assert !ispath("pidfile")
+@testset "mkpidlock update" begin
+    lockf = mkpidlock("pidfile")
+    @test lockf.update === nothing
+    new = mtime(lockf.fd)
+    @test new ≈ time() atol=1
+    sleep(1)
+    @test mtime(lockf.fd) == new
+    touch(lockf)
+    old, new = new, mtime(lockf.fd)
+    @test new != old
+    @test new ≈ time() atol=1
+    close(lockf)
+
+    lockf = mkpidlock("pidfile"; refresh=0.2)
+    new = mtime(lockf.fd)
+    @test new ≈ time() atol=1
+    for i = 1:10
+        sleep(0.5)
+        old, new = new, mtime(lockf.fd)
+        @test new != old
+        @test new ≈ time() atol=1
+    end
+    @test isopen(lockf.update::Timer)
+    close(lockf)
+    @test !isopen(lockf.update::Timer)
+
+    lockf = mkpidlock("pidfile"; stale_age=10)
+    @test lockf.update isa Timer
+    close(lockf.update) # simulate a finalizer running in an undefined order
+    close(lockf)
+end
+
+@assert !ispath("pidfile")
+@testset "mkpidlock for child" begin
+    proc = open(`cat`, "w", devnull)
+    lock = mkpidlock("pidfile", proc)
+    @test isopen(lock.fd)
+    @test isfile("pidfile")
+    close(proc)
+    @test success(proc)
+    sleep(1) # give some time for the other task to finish releasing the lock resources
+    @test !isopen(lock.fd)
+    @test !isfile("pidfile")
+
+    error = @test_throws Base.IOError mkpidlock("pidfile", proc)
+    @test error.value.code == Base.UV_ESRCH
+end
+
+@assert !ispath("pidfile-2")
+@testset "mkpidlock non-blocking stale lock break" begin
+    # mkpidlock with no waiting
+    lockf = mkpidlock("pidfile-2", wait=false)
+    @test lockf.update === nothing
+
+    sleep(1)
+    t = @elapsed @test_throws ErrorException mkpidlock("pidfile-2", wait=false, stale_age=1, poll_interval=1, refresh=0)
+    @test t ≈ 0 atol=1
+
+    sleep(5)
+    t = @elapsed (lockf2 = mkpidlock("pidfile-2", wait=false, stale_age=.1, poll_interval=1, refresh=0))
+    @test t ≈ 0 atol=1
+    close(lockf)
+    close(lockf2)
+end
+
+end; end # cd(tempdir)
+finally
+    umask(old_umask)
+end; end # testset
+
+end # with_logger
diff --git a/stdlib/FileWatching/test/runtests.jl b/stdlib/FileWatching/test/runtests.jl
index ea6985235627de..419ae48dd0a757 100644
--- a/stdlib/FileWatching/test/runtests.jl
+++ b/stdlib/FileWatching/test/runtests.jl
@@ -3,6 +3,8 @@
 using Test, FileWatching
 using Base: uv_error, Experimental
 
+@testset "FileWatching" begin
+
 # This script does the following
 # Sets up N unix pipes (or WSA sockets)
 # For the odd pipes, a byte is written to the write end at intervals specified in intvls
@@ -12,7 +14,7 @@ using Base: uv_error, Experimental
 # Writable ends are always tested for write-ability before a write
 
 n = 20
-intvls = [2, .2, .1, .005]
+intvls = [2, .2, .1, .005, .00001]
 
 pipe_fds = fill((Base.INVALID_OS_HANDLE, Base.INVALID_OS_HANDLE), n)
 for i in 1:n
@@ -22,9 +24,15 @@ for i in 1:n
         uv_error("pipe", ccall(:uv_pipe, Cint, (Ptr{NTuple{2, Base.OS_HANDLE}}, Cint, Cint), Ref(pipe_fds, i), 0, 0))
     end
     Ctype = Sys.iswindows() ? Ptr{Cvoid} : Cint
-    FDmax = Sys.iswindows() ? 0x7fff : (n + 60) # expectations on reasonable values
-    @test 0 <= Int(Base.cconvert(Ctype, pipe_fds[i][1])) <= FDmax
-    @test 0 <= Int(Base.cconvert(Ctype, pipe_fds[i][2])) <= FDmax
+    FDmax = Sys.iswindows() ? 0x7fff : (n + 60 + (isdefined(Main, :Revise) * 30)) # expectations on reasonable values
+    fd_in_limits =
+        0 <= Int(Base.cconvert(Ctype, pipe_fds[i][1])) <= FDmax &&
+        0 <= Int(Base.cconvert(Ctype, pipe_fds[i][2])) <= FDmax
+    # Dump out what file descriptors are open for easier debugging of failure modes
+    if !fd_in_limits && Sys.islinux()
+        run(`ls -la /proc/$(getpid())/fd`)
+    end
+    @test fd_in_limits
 end
 
 function pfd_tst_reads(idx, intvl)
@@ -151,8 +159,8 @@ test2_12992()
 #######################################################################
 # This section tests file watchers.                                   #
 #######################################################################
-const F_GETPATH = Sys.islinux() || Sys.iswindows() || Sys.isapple()  # platforms where F_GETPATH is available
-const F_PATH = F_GETPATH ? "afile.txt" : ""
+F_GETPATH = Sys.islinux() || Sys.iswindows() || Sys.isapple()  # platforms where F_GETPATH is available
+F_PATH = F_GETPATH ? "afile.txt" : ""
 dir = mktempdir()
 file = joinpath(dir, "afile.txt")
 
@@ -425,3 +433,9 @@ unwatch_folder(dir)
 @test isempty(FileWatching.watched_folders)
 rm(file)
 rm(dir)
+
+@testset "Pidfile" begin
+    include("pidfile.jl")
+end
+
+end # testset
diff --git a/stdlib/Future/src/Future.jl b/stdlib/Future/src/Future.jl
index 1d70dba7c84de5..746f6e149a47da 100644
--- a/stdlib/Future/src/Future.jl
+++ b/stdlib/Future/src/Future.jl
@@ -36,7 +36,10 @@ One such step corresponds to the generation of two `Float64` numbers.
 For each different value of `steps`, a large polynomial has to be generated internally.
 One is already pre-computed for `steps=big(10)^20`.
 """
-randjump(r::MersenneTwister, steps::Integer) =
-    Random._randjump(r, Random.DSFMT.calc_jump(steps))
+function randjump(r::MersenneTwister, steps::Integer)
+    j = Random._randjump(r, Random.DSFMT.calc_jump(steps))
+    j.adv_jump += 2*big(steps) # convert to BigInt to prevent overflow
+    j
+end
 
 end # module Future
diff --git a/stdlib/GMP_jll/Project.toml b/stdlib/GMP_jll/Project.toml
new file mode 100644
index 00000000000000..0fc262e562da77
--- /dev/null
+++ b/stdlib/GMP_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "GMP_jll"
+uuid = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+version = "6.2.1+1"
+
+[deps]
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/GMP_jll/src/GMP_jll.jl b/stdlib/GMP_jll/src/GMP_jll.jl
new file mode 100644
index 00000000000000..90daa24b150ed4
--- /dev/null
+++ b/stdlib/GMP_jll/src/GMP_jll.jl
@@ -0,0 +1,53 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/GMP_jll.jl
+baremodule GMP_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libgmp, libgmpxx
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libgmp_handle = C_NULL
+libgmp_path = ""
+libgmpxx_handle = C_NULL
+libgmpxx_path = ""
+
+if Sys.iswindows()
+    const libgmp = "libgmp-10.dll"
+    const libgmpxx = "libgmpxx-4.dll"
+elseif Sys.isapple()
+    const libgmp = "@rpath/libgmp.10.dylib"
+    const libgmpxx = "@rpath/libgmpxx.4.dylib"
+else
+    const libgmp = "libgmp.so.10"
+    const libgmpxx = "libgmpxx.so.4"
+end
+
+function __init__()
+    global libgmp_handle = dlopen(libgmp)
+    global libgmp_path = dlpath(libgmp_handle)
+    global libgmpxx_handle = dlopen(libgmpxx)
+    global libgmpxx_path = dlpath(libgmpxx_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libgmp_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libgmp_path() = libgmp_path
+get_libgmpxx_path() = libgmpxx_path
+
+end  # module GMP_jll
diff --git a/stdlib/GMP_jll/test/runtests.jl b/stdlib/GMP_jll/test/runtests.jl
new file mode 100644
index 00000000000000..7c0d8779452318
--- /dev/null
+++ b/stdlib/GMP_jll/test/runtests.jl
@@ -0,0 +1,8 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, GMP_jll
+
+@testset "GMP_jll" begin
+    vn = VersionNumber(unsafe_string(unsafe_load(cglobal((:__gmp_version, libgmp), Ptr{Cchar}))))
+    @test vn == v"6.2.1"
+end
diff --git a/stdlib/InteractiveUtils/docs/src/index.md b/stdlib/InteractiveUtils/docs/src/index.md
index a8a675e207a18b..9ad4b5a7cea80b 100644
--- a/stdlib/InteractiveUtils/docs/src/index.md
+++ b/stdlib/InteractiveUtils/docs/src/index.md
@@ -1,4 +1,6 @@
-# Interactive Utilities
+# [Interactive Utilities](@id man-interactive-utils)
+
+This module is intended for interactive work. It is loaded automaticaly in [interactive mode](@ref command-line-options).
 
 ```@docs
 InteractiveUtils.apropos
@@ -24,5 +26,6 @@ InteractiveUtils.code_llvm
 InteractiveUtils.@code_llvm
 InteractiveUtils.code_native
 InteractiveUtils.@code_native
+InteractiveUtils.@time_imports
 InteractiveUtils.clipboard
 ```
diff --git a/stdlib/InteractiveUtils/src/InteractiveUtils.jl b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
index af9e720112e338..6c742660ca73c7 100644
--- a/stdlib/InteractiveUtils/src/InteractiveUtils.jl
+++ b/stdlib/InteractiveUtils/src/InteractiveUtils.jl
@@ -6,7 +6,7 @@ Base.Experimental.@optlevel 1
 
 export apropos, edit, less, code_warntype, code_llvm, code_native, methodswith, varinfo,
     versioninfo, subtypes, supertypes, @which, @edit, @less, @functionloc, @code_warntype,
-    @code_typed, @code_lowered, @code_llvm, @code_native, clipboard
+    @code_typed, @code_lowered, @code_llvm, @code_native, @time_imports, clipboard
 
 import Base.Docs.apropos
 
@@ -21,27 +21,61 @@ include("macros.jl")
 include("clipboard.jl")
 
 """
-    varinfo(m::Module=Main, pattern::Regex=r"")
+    varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, minsize::Int = 0)
 
 Return a markdown table giving information about exported global variables in a module, optionally restricted
 to those matching `pattern`.
 
 The memory consumption estimate is an approximate lower bound on the size of the internal structure of the object.
-"""
-function varinfo(m::Module=Main, pattern::Regex=r"")
-    rows =
-        Any[ let value = getfield(m, v)
-                 Any[string(v),
-                     (value===Base || value===Main || value===Core ? "" : format_bytes(summarysize(value))),
-                     summary(value)]
-             end
-             for v in sort!(names(m)) if isdefined(m, v) && occursin(pattern, string(v)) ]
 
+- `all` : also list non-exported objects defined in the module, deprecated objects, and compiler-generated objects.
+- `imported` : also list objects explicitly imported from other modules.
+- `recursive` : recursively include objects in sub-modules, observing the same settings in each.
+- `sortby` : the column to sort results by. Options are `:name` (default), `:size`, and `:summary`.
+- `minsize` : only includes objects with size at least `minsize` bytes. Defaults to `0`.
+"""
+function varinfo(m::Module=Main, pattern::Regex=r""; all::Bool = false, imported::Bool = false, sortby::Symbol = :name, recursive::Bool = false, minsize::Int=0)
+    sortby in (:name, :size, :summary) || throw(ArgumentError("Unrecognized `sortby` value `:$sortby`. Possible options are `:name`, `:size`, and `:summary`"))
+    rows = Vector{Any}[]
+    workqueue = [(m, ""),]
+    while !isempty(workqueue)
+        m2, prep = popfirst!(workqueue)
+        for v in names(m2; all, imported)
+            if !isdefined(m2, v) || !occursin(pattern, string(v))
+                continue
+            end
+            value = getfield(m2, v)
+            isbuiltin = value === Base || value === Main || value === Core
+            if recursive && !isbuiltin && isa(value, Module) && value !== m2 && nameof(value) === v && parentmodule(value) === m2
+                push!(workqueue, (value, "$prep$v."))
+            end
+            ssize_str, ssize = if isbuiltin
+                    ("", typemax(Int))
+                else
+                    ss = summarysize(value)
+                    (format_bytes(ss), ss)
+                end
+            if ssize >= minsize
+                push!(rows, Any[string(prep, v), ssize_str, summary(value), ssize])
+            end
+        end
+    end
+    let (col, rev) = if sortby == :name
+            1, false
+        elseif sortby == :size
+            4, true
+        elseif sortby == :summary
+            3, false
+        else
+            @assert "unreachable"
+        end
+        sort!(rows; by=r->r[col], rev)
+    end
     pushfirst!(rows, Any["name", "size", "summary"])
 
-    return Markdown.MD(Any[Markdown.Table(rows, Symbol[:l, :r, :l])])
+    return Markdown.MD(Any[Markdown.Table(map(r->r[1:3], rows), Symbol[:l, :r, :l])])
 end
-varinfo(pat::Regex) = varinfo(Main, pat)
+varinfo(pat::Regex; kwargs...) = varinfo(Main, pat, kwargs...)
 
 """
     versioninfo(io::IO=stdout; verbose::Bool=false)
@@ -50,6 +84,12 @@ Print information about the version of Julia in use. The output is
 controlled with boolean keyword arguments:
 
 - `verbose`: print all additional information
+
+!!! warning "Warning"
+    The output of this function may contain sensitive information. Before sharing the output,
+    please review the output and remove any data that should not be shared publicly.
+
+See also: [`VERSION`](@ref).
 """
 function versioninfo(io::IO=stdout; verbose::Bool=false)
     println(io, "Julia Version $VERSION")
@@ -82,13 +122,13 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     if verbose
         cpuio = IOBuffer() # print cpu_summary with correct alignment
         Sys.cpu_summary(cpuio)
-        for (i, line) in enumerate(split(String(take!(cpuio)), "\n"))
+        for (i, line) in enumerate(split(chomp(String(take!(cpuio))), "\n"))
             prefix = i == 1 ? "  CPU: " : "       "
             println(io, prefix, line)
         end
     else
         cpu = Sys.cpu_info()
-        println(io, "  CPU: ", cpu[1].model)
+        println(io, "  CPU: ", length(cpu), " × ", cpu[1].model)
     end
 
     if verbose
@@ -101,11 +141,20 @@ function versioninfo(io::IO=stdout; verbose::Bool=false)
     println(io, "  WORD_SIZE: ", Sys.WORD_SIZE)
     println(io, "  LIBM: ",Base.libm_name)
     println(io, "  LLVM: libLLVM-",Base.libllvm_version," (", Sys.JIT, ", ", Sys.CPU_NAME, ")")
+    println(io, "  Threads: ", Threads.nthreads(), " on ", Sys.CPU_THREADS, " virtual cores")
 
-    env_strs = [String[ "  $(k) = $(v)" for (k,v) in ENV if occursin(r"JULIA", k)];
-                (verbose ?
-                 String[ "  $(k) = $(v)" for (k,v) in ENV if occursin(r"PATH|FLAG|^TERM$|HOME", k)] :
-                 [])]
+    function is_nonverbose_env(k::String)
+        return occursin(r"^JULIA_|^DYLD_|^LD_", k)
+    end
+    function is_verbose_env(k::String)
+        return occursin(r"PATH|FLAG|^TERM$|HOME", k) && !is_nonverbose_env(k)
+    end
+    env_strs = String[
+        String["  $(k) = $(v)" for (k,v) in ENV if is_nonverbose_env(uppercase(k))];
+        (verbose ?
+         String["  $(k) = $(v)" for (k,v) in ENV if is_verbose_env(uppercase(k))] :
+         String[]);
+    ]
     if !isempty(env_strs)
         println(io, "Environment:")
         for str in env_strs
@@ -134,12 +183,12 @@ The optional second argument restricts the search to a particular module or func
 If keyword `supertypes` is `true`, also return arguments with a parent type of `typ`,
 excluding type `Any`.
 """
-function methodswith(t::Type, f::Function, meths = Method[]; supertypes::Bool=false)
+function methodswith(t::Type, f::Base.Callable, meths = Method[]; supertypes::Bool=false)
     for d in methods(f)
         if any(function (x)
                    let x = rewrap_unionall(x, d.sig)
                        (type_close_enough(x, t) ||
-                        (supertypes ? (t <: x && (!isa(x,TypeVar) || x.ub != Any)) :
+                        (supertypes ? (isa(x, Type) && t <: x && (!isa(x,TypeVar) || x.ub != Any)) :
                          (isa(x,TypeVar) && x.ub != Any && t == x.ub)) &&
                         x != Any)
                    end
@@ -156,7 +205,7 @@ function _methodswith(t::Type, m::Module, supertypes::Bool)
     for nm in names(m)
         if isdefined(m, nm)
             f = getfield(m, nm)
-            if isa(f, Function)
+            if isa(f, Base.Callable)
                 methodswith(t, f, meths; supertypes = supertypes)
             end
         end
@@ -175,54 +224,35 @@ function methodswith(t::Type; supertypes::Bool=false)
 end
 
 # subtypes
-function _subtypes(m::Module, x::Type, sts=Base.IdSet{Any}(), visited=Base.IdSet{Module}())
-    push!(visited, m)
+function _subtypes_in!(mods::Array, x::Type)
     xt = unwrap_unionall(x)
-    if !isa(xt, DataType)
-        return sts
+    if !isabstracttype(x) || !isa(xt, DataType)
+        # Fast path
+        return Type[]
     end
-    xt = xt::DataType
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if isa(t, DataType)
-                t = t::DataType
-                if t.name.name === s && supertype(t).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
-                end
-            elseif isa(t, UnionAll)
-                t = t::UnionAll
-                tt = unwrap_unionall(t)
-                isa(tt, DataType) || continue
-                tt = tt::DataType
-                if tt.name.name === s && supertype(tt).name == xt.name
-                    ti = typeintersect(t, x)
-                    ti != Bottom && push!(sts, ti)
+    sts = Vector{Any}()
+    while !isempty(mods)
+        m = pop!(mods)
+        xt = xt::DataType
+        for s in names(m, all = true)
+            if isdefined(m, s) && !isdeprecated(m, s)
+                t = getfield(m, s)
+                dt = isa(t, UnionAll) ? unwrap_unionall(t) : t
+                if isa(dt, DataType)
+                    if dt.name.name === s && dt.name.module == m && supertype(dt).name == xt.name
+                        ti = typeintersect(t, x)
+                        ti != Bottom && push!(sts, ti)
+                    end
+                elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m && t !== m
+                    t === Base || push!(mods, t) # exclude Base, since it also parented by Main
                 end
-            elseif isa(t, Module)
-                t = t::Module
-                in(t, visited) || _subtypes(t, x, sts, visited)
             end
         end
     end
-    return sts
+    return permute!(sts, sortperm(map(string, sts)))
 end
 
-function _subtypes_in(mods::Array, x::Type)
-    if !isabstracttype(x)
-        # Fast path
-        return Type[]
-    end
-    sts = Base.IdSet{Any}()
-    visited = Base.IdSet{Module}()
-    for m in mods
-        _subtypes(m, x, sts, visited)
-    end
-    return sort!(collect(sts), by=string)
-end
-
-subtypes(m::Module, x::Type) = _subtypes_in([m], x)
+subtypes(m::Module, x::Type) = _subtypes_in!([m], x)
 
 """
     subtypes(T::DataType)
@@ -230,6 +260,8 @@ subtypes(m::Module, x::Type) = _subtypes_in([m], x)
 Return a list of immediate subtypes of DataType `T`. Note that all currently loaded subtypes
 are included, including those not visible in the current module.
 
+See also [`supertype`](@ref), [`supertypes`](@ref), [`methodswith`](@ref).
+
 # Examples
 ```jldoctest
 julia> subtypes(Integer)
@@ -239,7 +271,7 @@ julia> subtypes(Integer)
  Unsigned
 ```
 """
-subtypes(x::Type) = _subtypes_in(Base.loaded_modules_array(), x)
+subtypes(x::Type) = _subtypes_in!(Base.loaded_modules_array(), x)
 
 """
     supertypes(T::Type)
@@ -248,6 +280,8 @@ Return a tuple `(T, ..., Any)` of `T` and all its supertypes, as determined by
 successive calls to the [`supertype`](@ref) function, listed in order of `<:`
 and terminated by `Any`.
 
+See also [`subtypes`](@ref).
+
 # Examples
 ```jldoctest
 julia> supertypes(Int)
@@ -261,74 +295,6 @@ function supertypes(T::Type)
     return S === T ? (T,) : (T, supertypes(S)...)
 end
 
-# dumptype is for displaying abstract type hierarchies,
-# based on Jameson Nash's typetree.jl in https://github.com/JuliaArchive/Examples
-function dumptype(io::IO, @nospecialize(x), n::Int, indent)
-    print(io, x)
-    n == 0 && return  # too deeply nested
-    isa(x, DataType) && x.abstract && dumpsubtypes(io, x, Main, n, indent)
-    nothing
-end
-
-directsubtype(a::DataType, b::DataType) = supertype(a).name === b.name
-directsubtype(a::UnionAll, b::DataType) = directsubtype(a.body, b)
-directsubtype(a::Union, b::DataType) = directsubtype(a.a, b) || directsubtype(a.b, b)
-# Fallback to handle TypeVar's
-directsubtype(a, b::DataType) = false
-function dumpsubtypes(io::IO, x::DataType, m::Module, n::Int, indent)
-    for s in names(m, all = true)
-        if isdefined(m, s) && !isdeprecated(m, s)
-            t = getfield(m, s)
-            if t === x || t === m
-                continue
-            elseif isa(t, Module) && nameof(t) === s && parentmodule(t) === m
-                # recurse into primary module bindings
-                dumpsubtypes(io, x, t, n, indent)
-            elseif isa(t, UnionAll) && directsubtype(t::UnionAll, x)
-                dt = unwrap_unionall(t)
-                println(io)
-                if isa(dt, DataType) && dt.name.wrapper === t
-                    # primary type binding
-                    print(io, indent, "  ")
-                    dumptype(io, dt, n - 1, string(indent, "  "))
-                else
-                    # aliases to types
-                    print(io, indent, "  ", m, ".", s, "{")
-                    tvar_io::IOContext = io
-                    tp = t
-                    while true
-                        show(tvar_io, tp.var)
-                        tvar_io = IOContext(tvar_io, :unionall_env => tp.var)
-                        tp = tp.body
-                        if isa(tp, UnionAll)
-                            print(io, ", ")
-                        else
-                            print(io, "} = ")
-                            break
-                        end
-                    end
-                    show(tvar_io, tp)
-                end
-            elseif isa(t, Union) && directsubtype(t::Union, x)
-                println(io)
-                print(io, indent, "  ", m, ".", s, " = ", t)
-            elseif isa(t, DataType) && directsubtype(t::DataType, x)
-                println(io)
-                if t.name.module !== m || t.name.name != s
-                    # aliases to types
-                    print(io, indent, "  ", m, ".", s, " = ")
-                    show(io, t)
-                else
-                    # primary type binding
-                    print(io, indent, "  ")
-                    dumptype(io, t, n - 1, string(indent, "  "))
-                end
-            end
-        end
-    end
-    nothing
-end
-
 # TODO: @deprecate peakflops to LinearAlgebra
 export peakflops
 """
@@ -365,15 +331,18 @@ function report_bug(kind)
             mktempdir() do tmp
                 old_load_path = copy(LOAD_PATH)
                 push!(empty!(LOAD_PATH), joinpath(tmp, "Project.toml"))
+                old_active_project = Base.ACTIVE_PROJECT[]
+                Base.ACTIVE_PROJECT[] = nothing
                 Pkg.add(Pkg.PackageSpec(BugReportingId.name, BugReportingId.uuid))
                 BugReporting = Base.require(BugReportingId)
                 append!(empty!(LOAD_PATH), old_load_path)
+                Base.ACTIVE_PROJECT[] = old_active_project
             end
         end
     else
         BugReporting = Base.require(BugReportingId)
     end
-    return Base.invokelatest(BugReporting.make_interactive_report, kind)
+    return Base.invokelatest(BugReporting.make_interactive_report, kind, ARGS)
 end
 
 end
diff --git a/stdlib/InteractiveUtils/src/clipboard.jl b/stdlib/InteractiveUtils/src/clipboard.jl
index b4c67da4d16dd3..7bc718b91b2bd5 100644
--- a/stdlib/InteractiveUtils/src/clipboard.jl
+++ b/stdlib/InteractiveUtils/src/clipboard.jl
@@ -37,23 +37,27 @@ elseif Sys.islinux() || Sys.KERNEL === :FreeBSD
                 `xsel --input --clipboard` :
                 `xsel -c`,
             :xclip => `xclip -silent -in -selection clipboard`,
+            :wlclipboard => `wl-copy`
         )
     const _clipboard_paste = Dict(
             :xsel  => Sys.islinux() ?
                 `xsel --nodetach --output --clipboard` :
                 `xsel -p`,
             :xclip => `xclip -quiet -out -selection clipboard`,
+            :wlclipboard => `wl-paste`
         )
     function clipboardcmd()
         global _clipboardcmd
         _clipboardcmd !== nothing && return _clipboardcmd
-        for cmd in (:xclip, :xsel)
-            success(pipeline(`which $cmd`, devnull)) && return _clipboardcmd = cmd
+        for cmd in (:xclip, :xsel, :wlclipboard)
+            # wl-clipboard ships wl-copy/paste individually
+            c = cmd == :wlclipboard ? Symbol("wl-copy") : cmd
+            success(pipeline(`which $c`, devnull)) && return _clipboardcmd = cmd
         end
         pkgs = @static if Sys.KERNEL === :FreeBSD
             "x11/xsel or x11/xclip"
         else
-            "xsel or xclip"
+            "xsel or xclip or wl-clipboard"
         end
         error("no clipboard command found, please install $pkgs")
     end
diff --git a/stdlib/InteractiveUtils/src/codeview.jl b/stdlib/InteractiveUtils/src/codeview.jl
index 99874fd5877f58..cfb3835c671a6b 100644
--- a/stdlib/InteractiveUtils/src/codeview.jl
+++ b/stdlib/InteractiveUtils/src/codeview.jl
@@ -1,17 +1,43 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# highlighting settings
+const highlighting = Dict{Symbol, Bool}(
+    :warntype => true,
+    :llvm => true,
+    :native => true,
+)
+
+const llstyle = Dict{Symbol, Tuple{Bool, Union{Symbol, Int}}}(
+    :default     => (false, :normal), # e.g. comma, equal sign, unknown token
+    :comment     => (false, :light_black),
+    :label       => (false, :light_red),
+    :instruction => ( true, :light_cyan),
+    :type        => (false, :cyan),
+    :number      => (false, :yellow),
+    :bracket     => (false, :yellow),
+    :variable    => (false, :normal), # e.g. variable, register
+    :keyword     => (false, :light_magenta),
+    :funcname    => (false, :light_yellow),
+)
+
+function printstyled_ll(io::IO, x, s::Symbol, trailing_spaces="")
+    printstyled(io, x, bold=llstyle[s][1], color=llstyle[s][2])
+    print(io, trailing_spaces)
+end
+
 # displaying type warnings
 
 function warntype_type_printer(io::IO, @nospecialize(ty), used::Bool)
     used || return
-    if ty isa Type && (!Base.isdispatchelem(ty) || ty == Core.Box)
-        if ty isa Union && Base.is_expected_union(ty)
-            Base.emphasize(io, "::$ty", Base.warn_color()) # more mild user notification
-        else
-            Base.emphasize(io, "::$ty")
-        end
+    str = "::$ty"
+    if !highlighting[:warntype]
+        print(io, str)
+    elseif ty isa Union && Base.is_expected_union(ty)
+        Base.emphasize(io, str, Base.warn_color()) # more mild user notification
+    elseif ty isa Type && (!Base.isdispatchelem(ty) || ty == Core.Box)
+        Base.emphasize(io, str)
     else
-        Base.printstyled(io, "::$ty", color=:cyan) # show the "good" type
+        Base.printstyled(io, str, color=:cyan) # show the "good" type
     end
     nothing
 end
@@ -31,78 +57,166 @@ Keyword argument `debuginfo` may be one of `:source` or `:none` (default), to sp
 
 See [`@code_warntype`](@ref man-code-warntype) for more information.
 """
-function code_warntype(io::IO, @nospecialize(f), @nospecialize(t); debuginfo::Symbol=:default, optimize::Bool=false)
+function code_warntype(io::IO, @nospecialize(f), @nospecialize(t=Base.default_tt(f));
+                       debuginfo::Symbol=:default, optimize::Bool=false, kwargs...)
     debuginfo = Base.IRShow.debuginfo(debuginfo)
     lineprinter = Base.IRShow.__debuginfo[debuginfo]
-    for (src, rettype) in code_typed(f, t, optimize=optimize)
+    for (src, rettype) in code_typed(f, t; optimize, kwargs...)
+        if !(src isa Core.CodeInfo)
+            println(io, src)
+            println(io, "  failed to infer")
+            continue
+        end
         lambda_io::IOContext = io
+        p = src.parent
+        nargs::Int = 0
+        if p isa Core.MethodInstance
+            println(io, p)
+            print(io, "  from ")
+            println(io, p.def)
+            p.def isa Method && (nargs = p.def.nargs)
+            if !isempty(p.sparam_vals)
+                println(io, "Static Parameters")
+                sig = p.def.sig
+                warn_color = Base.warn_color() # more mild user notification
+                for i = 1:length(p.sparam_vals)
+                    sig = sig::UnionAll
+                    name = sig.var.name
+                    val = p.sparam_vals[i]
+                    print_highlighted(io::IO, v::String, color::Symbol) =
+                        if highlighting[:warntype]
+                            Base.printstyled(io, v; color)
+                        else
+                            Base.print(io, v)
+                        end
+                    if val isa TypeVar
+                        if val.lb === Union{}
+                            print(io, "  ", name, " <: ")
+                            print_highlighted(io, "$(val.ub)", warn_color)
+                        elseif val.ub === Any
+                            print(io, "  ", sig.var.name, " >: ")
+                            print_highlighted(io, "$(val.lb)", warn_color)
+                        else
+                            print(io, "  ")
+                            print_highlighted(io, "$(val.lb)", warn_color)
+                            print(io, " <: ", sig.var.name, " <: ")
+                            print_highlighted(io, "$(val.ub)", warn_color)
+                        end
+                    elseif val isa typeof(Vararg)
+                        print(io, "  ", name, "::")
+                        print_highlighted(io, "Int", warn_color)
+                    else
+                        print(io, "  ", sig.var.name, " = ")
+                        print_highlighted(io, "$(val)", :cyan) # show the "good" type
+                    end
+                    println(io)
+                    sig = sig.body
+                end
+            end
+        end
         if src.slotnames !== nothing
             slotnames = Base.sourceinfo_slotnames(src)
             lambda_io = IOContext(lambda_io, :SOURCE_SLOTNAMES => slotnames)
-            println(io, "Variables")
             slottypes = src.slottypes
+            nargs > 0 && println(io, "Arguments")
             for i = 1:length(slotnames)
+                if i == nargs + 1
+                    println(io, "Locals")
+                end
                 print(io, "  ", slotnames[i])
                 if isa(slottypes, Vector{Any})
                     warntype_type_printer(io, slottypes[i], true)
                 end
                 println(io)
             end
-            println(io)
         end
         print(io, "Body")
         warntype_type_printer(io, rettype, true)
         println(io)
-        # TODO: static parameter values
-        Base.IRShow.show_ir(lambda_io, src, lineprinter(src), warntype_type_printer)
+        irshow_config = Base.IRShow.IRShowConfig(lineprinter(src), warntype_type_printer)
+        Base.IRShow.show_ir(lambda_io, src, irshow_config)
+        println(io)
     end
     nothing
 end
-code_warntype(@nospecialize(f), @nospecialize(t); kwargs...) =
+code_warntype(@nospecialize(f), @nospecialize(t=Base.default_tt(f)); kwargs...) =
     code_warntype(stdout, f, t; kwargs...)
 
 import Base.CodegenParams
 
+const GENERIC_SIG_WARNING = "; WARNING: This code may not match what actually runs.\n"
+const OC_MISMATCH_WARNING =
+"""
+; WARNING: The pre-inferred opaque closure is not callable with the given arguments
+;          and will error on dispatch with this signature.
+"""
+
 # Printing code representations in IR and assembly
 function _dump_function(@nospecialize(f), @nospecialize(t), native::Bool, wrapper::Bool,
                         strip_ir_metadata::Bool, dump_module::Bool, syntax::Symbol,
-                        optimize::Bool, debuginfo::Symbol,
-                        params::CodegenParams=CodegenParams())
+                        optimize::Bool, debuginfo::Symbol, binary::Bool,
+                        params::CodegenParams=CodegenParams(debug_info_kind=Cint(0)))
     ccall(:jl_is_in_pure_context, Bool, ()) && error("code reflection cannot be used from generated functions")
     if isa(f, Core.Builtin)
         throw(ArgumentError("argument is not a generic function"))
     end
+    warning = ""
     # get the MethodInstance for the method match
-    world = typemax(UInt)
-    meth = which(f, t)
-    t = to_tuple_type(t)
-    tt = signature_type(f, t)
-    (ti, env) = ccall(:jl_type_intersection_with_env, Any, (Any, Any), tt, meth.sig)::Core.SimpleVector
-    meth = Base.func_for_method_checked(meth, ti, env)
-    linfo = ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, (Any, Any, Any, UInt), meth, ti, env, world)
+    if !isa(f, Core.OpaqueClosure)
+        world = Base.get_world_counter()
+        match = Base._which(signature_type(f, t), world)
+        linfo = Core.Compiler.specialize_method(match)
+        # TODO: use jl_is_cacheable_sig instead of isdispatchtuple
+        isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING)
+    else
+        world = UInt64(f.world)
+        if Core.Compiler.is_source_inferred(f.source.source)
+            # OC was constructed from inferred source. There's only one
+            # specialization and we can't infer anything more precise either.
+            world = f.source.primary_world
+            linfo = f.source.specializations[1]
+            Core.Compiler.hasintersect(typeof(f).parameters[1], t) || (warning = OC_MISMATCH_WARNING)
+        else
+            linfo = Core.Compiler.specialize_method(f.source, Tuple{typeof(f.captures), t.parameters...}, Core.svec())
+            actual = isdispatchtuple(linfo.specTypes)
+            isdispatchtuple(linfo.specTypes) || (warning = GENERIC_SIG_WARNING)
+        end
+    end
     # get the code for it
+    if debuginfo === :default
+        debuginfo = :source
+    elseif debuginfo !== :source && debuginfo !== :none
+        throw(ArgumentError("'debuginfo' must be either :source or :none"))
+    end
     if native
-        str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo)
+        if syntax !== :att && syntax !== :intel
+            throw(ArgumentError("'syntax' must be either :intel or :att"))
+        end
+        if dump_module
+            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary, params)
+        else
+            str = _dump_function_linfo_native(linfo, world, wrapper, syntax, debuginfo, binary)
+        end
     else
         str = _dump_function_linfo_llvm(linfo, world, wrapper, strip_ir_metadata, dump_module, optimize, debuginfo, params)
     end
-    # TODO: use jl_is_cacheable_sig instead of isdispatchtuple
-    isdispatchtuple(linfo.specTypes) || (str = "; WARNING: This code may not match what actually runs.\n" * str)
+    str = warning * str
     return str
 end
 
-function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol)
-    if syntax !== :att && syntax !== :intel
-        throw(ArgumentError("'syntax' must be either :intel or :att"))
-    end
-    if debuginfo === :default
-        debuginfo = :source
-    elseif debuginfo !== :source && debuginfo !== :none
-        throw(ArgumentError("'debuginfo' must be either :source or :none"))
-    end
+function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool)
     str = ccall(:jl_dump_method_asm, Ref{String},
-                (Any, UInt, Cint, Bool, Ptr{UInt8}, Ptr{UInt8}),
-                linfo, world, 0, wrapper, syntax, debuginfo)
+                (Any, UInt, Bool, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
+                linfo, world, false, wrapper, syntax, debuginfo, binary)
+    return str
+end
+
+function _dump_function_linfo_native(linfo::Core.MethodInstance, world::UInt, wrapper::Bool, syntax::Symbol, debuginfo::Symbol, binary::Bool, params::CodegenParams)
+    llvmf = ccall(:jl_get_llvmf_defn, Ptr{Cvoid}, (Any, UInt, Bool, Bool, CodegenParams), linfo, world, wrapper, true, params)
+    llvmf == C_NULL && error("could not compile the specified method")
+    str = ccall(:jl_dump_function_asm, Ref{String},
+                (Ptr{Cvoid}, Bool, Ptr{UInt8}, Ptr{UInt8}, Bool),
+                llvmf, false, syntax, debuginfo, binary)
     return str
 end
 
@@ -111,11 +225,6 @@ function _dump_function_linfo_llvm(
         strip_ir_metadata::Bool, dump_module::Bool,
         optimize::Bool, debuginfo::Symbol,
         params::CodegenParams)
-    if debuginfo === :default
-        debuginfo = :source
-    elseif debuginfo !== :source && debuginfo !== :none
-        throw(ArgumentError("'debuginfo' must be either :source or :none"))
-    end
     llvmf = ccall(:jl_get_llvmf_defn, Ptr{Cvoid}, (Any, UInt, Bool, Bool, CodegenParams), linfo, world, wrapper, optimize, params)
     llvmf == C_NULL && error("could not compile the specified method")
     str = ccall(:jl_dump_function_ir, Ref{String},
@@ -135,24 +244,246 @@ All metadata and dbg.* calls are removed from the printed bitcode. For the full
 To dump the entire module that encapsulates the function (with declarations), set the `dump_module` keyword to true.
 Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
 """
-code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
-    print(io, _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo))
-code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Tuple); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
+function code_llvm(io::IO, @nospecialize(f), @nospecialize(types), raw::Bool,
+                   dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default)
+    d = _dump_function(f, types, false, false, !raw, dump_module, :att, optimize, debuginfo, false)
+    if highlighting[:llvm] && get(io, :color, false)
+        print_llvm(io, d)
+    else
+        print(io, d)
+    end
+end
+code_llvm(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw::Bool=false, dump_module::Bool=false, optimize::Bool=true, debuginfo::Symbol=:default) =
     code_llvm(io, f, types, raw, dump_module, optimize, debuginfo)
-code_llvm(@nospecialize(f), @nospecialize(types=Tuple); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
-    code_llvm(stdout, f, types; raw=raw, dump_module=dump_module, optimize=optimize, debuginfo=debuginfo)
-
+code_llvm(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); raw=false, dump_module=false, optimize=true, debuginfo::Symbol=:default) =
+    code_llvm(stdout, f, types; raw, dump_module, optimize, debuginfo)
 
 """
-    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default)
+    code_native([io=stdout,], f, types; syntax=:att, debuginfo=:default, binary=false, dump_module=true)
 
 Prints the native assembly instructions generated for running the method matching the given
 generic function and type signature to `io`.
-Switch assembly syntax using `syntax` symbol parameter set to `:att` for AT&T syntax or `:intel` for Intel syntax.
-Keyword argument `debuginfo` may be one of source (default) or none, to specify the verbosity of code comments.
+
+* Set assembly syntax by setting `syntax` to `:att` (default) for AT&T syntax or `:intel` for Intel syntax.
+* Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`.
+* If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
+* If `dump_module` is `false`, do not print metadata such as rodata or directives.
+
+See also: [`@code_native`](@ref), [`code_llvm`](@ref), [`code_typed`](@ref) and [`code_lowered`](@ref)
 """
-code_native(io::IO, @nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default) =
-    print(io, _dump_function(f, types, true, false, false, false, syntax, true, debuginfo))
-code_native(@nospecialize(f), @nospecialize(types=Tuple); syntax::Symbol=:att, debuginfo::Symbol=:default) =
-    code_native(stdout, f, types; syntax=syntax, debuginfo=debuginfo)
-code_native(::IO, ::Any, ::Symbol) = error("illegal code_native call") # resolve ambiguous call
+function code_native(io::IO, @nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                     dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false)
+    d = _dump_function(f, types, true, false, false, dump_module, syntax, true, debuginfo, binary)
+    if highlighting[:native] && get(io, :color, false)
+        print_native(io, d)
+    else
+        print(io, d)
+    end
+end
+code_native(@nospecialize(f), @nospecialize(types=Base.default_tt(f)); dump_module::Bool=true, syntax::Symbol=:att, debuginfo::Symbol=:default, binary::Bool=false) =
+    code_native(stdout, f, types; dump_module, syntax, debuginfo, binary)
+code_native(::IO, ::Any, ::Symbol) = error("invalid code_native call") # resolve ambiguous call
+
+## colorized IR and assembly printing
+
+const num_regex = r"^(?:\$?-?\d+|0x[0-9A-Fa-f]+|-?(?:\d+\.?\d*|\.\d+)(?:[eE][+-]?\d+)?)$"
+
+function print_llvm(io::IO, code::String)
+    buf = IOBuffer(code)
+    for line in eachline(buf)
+        m = match(r"^(\s*)((?:[^;]|;\")*)(.*)$", line)
+        m === nothing && continue
+        indent, tokens, comment = m.captures
+        print(io, indent)
+        print_llvm_tokens(io, tokens)
+        printstyled_ll(io, comment, :comment)
+        println(io)
+    end
+end
+
+const llvm_types =
+    r"^(?:void|half|float|double|x86_\w+|ppc_\w+|label|metadata|type|opaque|token|i\d+)$"
+const llvm_cond = r"^(?:[ou]?eq|[ou]?ne|[uso][gl][te]|ord|uno)$" # true|false
+
+function print_llvm_tokens(io, tokens)
+    m = match(r"^((?:[^\s:]+:)?)(\s*)(.*)", tokens)
+    if m !== nothing
+        label, spaces, tokens = m.captures
+        printstyled_ll(io, label, :label, spaces)
+    end
+    m = match(r"^(%[^\s=]+)(\s*)=(\s*)(.*)", tokens)
+    if m !== nothing
+        result, spaces, spaces2, tokens = m.captures
+        printstyled_ll(io, result, :variable, spaces)
+        printstyled_ll(io, '=', :default, spaces2)
+    end
+    m = match(r"^([a-z]\w*)(\s*)(.*)", tokens)
+    if m !== nothing
+        inst, spaces, tokens = m.captures
+        iskeyword = occursin(r"^(?:define|declare|type)$", inst) || occursin("=", tokens)
+        printstyled_ll(io, inst, iskeyword ? :keyword : :instruction, spaces)
+    end
+
+    print_llvm_operands(io, tokens)
+end
+
+function print_llvm_operands(io, tokens)
+    while !isempty(tokens)
+        tokens = print_llvm_operand(io, tokens)
+    end
+    return tokens
+end
+
+function print_llvm_operand(io, tokens)
+    islabel = false
+    while !isempty(tokens)
+        m = match(r"^,(\s*)(.*)", tokens)
+        if m !== nothing
+            spaces, tokens = m.captures
+            printstyled_ll(io, ',', :default, spaces)
+            break
+        end
+        m = match(r"^(\*+|=)(\s*)(.*)", tokens)
+        if m !== nothing
+            sym, spaces, tokens = m.captures
+            printstyled_ll(io, sym, :default, spaces)
+            continue
+        end
+        m = match(r"^(\"[^\"]*\")(\s*)(.*)", tokens)
+        if m !== nothing
+            str, spaces, tokens = m.captures
+            printstyled_ll(io, str, :variable, spaces)
+            continue
+        end
+        m = match(r"^([({\[<])(\s*)(.*)", tokens)
+        if m !== nothing
+            bracket, spaces, tokens = m.captures
+            printstyled_ll(io, bracket, :bracket, spaces)
+            tokens = print_llvm_operands(io, tokens) # enter
+            continue
+        end
+        m = match(r"^([)}\]>])(\s*)(.*)", tokens)
+        if m !== nothing
+            bracket, spaces, tokens = m.captures
+            printstyled_ll(io, bracket, :bracket, spaces)
+            break # leave
+        end
+
+        m = match(r"^([^\s,*=(){}\[\]<>]+)(\s*)(.*)", tokens)
+        m === nothing && break
+        token, spaces, tokens = m.captures
+        if occursin(llvm_types, token)
+            printstyled_ll(io, token, :type)
+            islabel = token == "label"
+        elseif occursin(llvm_cond, token) # condition code is instruction-level
+            printstyled_ll(io, token, :instruction)
+        elseif occursin(num_regex, token)
+            printstyled_ll(io, token, :number)
+        elseif occursin(r"^@.+$", token)
+            printstyled_ll(io, token, :funcname)
+        elseif occursin(r"^%.+$", token)
+            islabel |= occursin(r"^%[^\d].*$", token) & occursin(r"^\]", tokens)
+            printstyled_ll(io, token, islabel ? :label : :variable)
+            islabel = false
+        elseif occursin(r"^[a-z]\w+$", token)
+            printstyled_ll(io, token, :keyword)
+        else
+            printstyled_ll(io, token, :default)
+        end
+        print(io, spaces)
+    end
+    return tokens
+end
+
+function print_native(io::IO, code::String, arch::Symbol=sys_arch_category())
+    archv = Val(arch)
+    buf = IOBuffer(code)
+    for line in eachline(buf)
+        m = match(r"^(\s*)((?:[^;#/]|#\S|;\"|/[^/])*)(.*)$", line)
+        m === nothing && continue
+        indent, tokens, comment = m.captures
+        print(io, indent)
+        print_native_tokens(io, tokens, archv)
+        printstyled_ll(io, comment, :comment)
+        println(io)
+    end
+end
+
+function sys_arch_category()
+    if Sys.ARCH === :x86_64 || Sys.ARCH === :i686
+        :x86
+    elseif Sys.ARCH === :aarch64 || startswith(string(Sys.ARCH), "arm")
+        :arm
+    else
+        :unsupported
+    end
+end
+
+print_native_tokens(io, line, ::Val) = print(io, line)
+
+const x86_ptr = r"^(?:(?:[xyz]mm|[dq])?word|byte|ptr|offset)$"
+const avx512flags = r"^(?:z|r[nduz]-sae|sae|1to1?\d)$"
+const arm_cond = r"^(?:eq|ne|cs|ho|cc|lo|mi|pl|vs|vc|hi|ls|[lg][te]|al|nv)$"
+const arm_keywords = r"^(?:lsl|lsr|asr|ror|rrx|!|/[zm])$"
+
+function print_native_tokens(io, tokens, arch::Union{Val{:x86}, Val{:arm}})
+    x86 = arch isa Val{:x86}
+    m = match(r"^((?:[^\s:]+:|\"[^\"]+\":)?)(\s*)(.*)", tokens)
+    if m !== nothing
+        label, spaces, tokens = m.captures
+        printstyled_ll(io, label, :label, spaces)
+    end
+    haslabel = false
+    m = match(r"^([a-z][\w.]*)(\s*)(.*)", tokens)
+    if m !== nothing
+        instruction, spaces, tokens = m.captures
+        printstyled_ll(io, instruction, :instruction, spaces)
+        haslabel = occursin(r"^(?:bl?|bl?\.\w{2,5}|[ct]bn?z)?$", instruction)
+    end
+
+    isfuncname = false
+    while !isempty(tokens)
+        m = match(r"^([,:*])(\s*)(.*)", tokens)
+        if m !== nothing
+            sym, spaces, tokens = m.captures
+            printstyled_ll(io, sym, :default, spaces)
+            isfuncname = false
+            continue
+        end
+        m = match(r"^([(){}\[\]])(\s*)(.*)", tokens)
+        if m !== nothing
+            bracket, spaces, tokens = m.captures
+            printstyled_ll(io, bracket, :bracket, spaces)
+            continue
+        end
+        m = match(r"^#([0-9a-fx.-]+)(\s*)(.*)", tokens)
+        if !x86 && m !== nothing && occursin(num_regex, m.captures[1])
+            num, spaces, tokens = m.captures
+            printstyled_ll(io, "#" * num, :number, spaces)
+            continue
+        end
+
+        m = match(r"^([^\s,:*(){}\[\]][^\s,:*/(){}\[\]]*)(\s*)(.*)", tokens)
+        m === nothing && break
+        token, spaces, tokens = m.captures
+        if occursin(num_regex, token)
+            printstyled_ll(io, token, :number)
+        elseif x86 && occursin(x86_ptr, token) || occursin(avx512flags, token)
+            printstyled_ll(io, token, :keyword)
+            isfuncname = token == "offset"
+        elseif !x86 && (occursin(arm_keywords, token) || occursin(arm_cond, token))
+            printstyled_ll(io, token, :keyword)
+        elseif occursin(r"^L.+$", token)
+            printstyled_ll(io, token, :label)
+        elseif occursin(r"^\$.+$", token)
+            printstyled_ll(io, token, :funcname)
+        elseif occursin(r"^%?(?:[a-z][\w.]+|\"[^\"]+\")$", token)
+            islabel = haslabel & !occursin(',', tokens)
+            printstyled_ll(io, token, islabel ? :label : isfuncname ? :funcname : :variable)
+            isfuncname = false
+        else
+            printstyled_ll(io, token, :default)
+        end
+        print(io, spaces)
+    end
+end
diff --git a/stdlib/InteractiveUtils/src/editless.jl b/stdlib/InteractiveUtils/src/editless.jl
index 9d749aa5551d97..7a96323c9cdeb2 100644
--- a/stdlib/InteractiveUtils/src/editless.jl
+++ b/stdlib/InteractiveUtils/src/editless.jl
@@ -62,6 +62,8 @@ already work:
 - vim
 - nvim
 - nano
+- micro
+- kak
 - textmate
 - mate
 - kate
@@ -119,9 +121,10 @@ function define_default_editors()
     end
     # Must check that emacs not running in -t/-nw before regex match for general emacs
     define_editor(Any[
-        "vim", "vi", "nvim", "mvim", "nano", "micro",
+        "vim", "vi", "nvim", "mvim", "nano", "micro", "kak",
         r"\bemacs\b.*\s(-nw|--no-window-system)\b",
-        r"\bemacsclient\b.\s*-(-?nw|t|-?tty)\b"], wait=true) do cmd, path, line
+        r"\bemacsclient\b.\s*-(-?nw|t|-?tty)\b",
+    ], wait=true) do cmd, path, line
         `$cmd +$line $path`
     end
     define_editor(["textmate", "mate", "kate"]) do cmd, path, line
@@ -159,6 +162,7 @@ function define_default_editors()
         end
     end
 end
+define_default_editors()
 
 """
     editor()
@@ -188,10 +192,9 @@ Edit a file or directory optionally providing a line number to edit the file at.
 Return to the `julia` prompt when you quit the editor. The editor can be changed
 by setting `JULIA_EDITOR`, `VISUAL` or `EDITOR` as an environment variable.
 
-See also: [`define_editor`](@ref)
+See also [`define_editor`](@ref).
 """
 function edit(path::AbstractString, line::Integer=0)
-    isempty(EDITOR_CALLBACKS) && define_default_editors()
     path isa String || (path = convert(String, path))
     if endswith(path, ".jl")
         p = find_source_file(path)
@@ -219,9 +222,17 @@ method to edit. For modules, open the main source file. The module needs to be l
 To ensure that the file can be opened at the given line, you may need to call
 `define_editor` first.
 """
-edit(f)                   = edit(functionloc(f)...)
-edit(f, @nospecialize t)  = edit(functionloc(f,t)...)
-edit(file, line::Integer) = error("could not find source file for function")
+function edit(@nospecialize f)
+    ms = methods(f).ms
+    length(ms) == 1 && edit(functionloc(ms[1])...)
+    length(ms) > 1 && return ms
+    length(ms) == 0 && functionloc(f) # throws
+    nothing
+end
+edit(m::Method) = edit(functionloc(m)...)
+edit(@nospecialize(f), idx::Integer) = edit(methods(f).ms[idx])
+edit(f, t)  = (@nospecialize; edit(functionloc(f, t)...))
+edit(file::Nothing, line::Integer) = error("could not find source file for function")
 edit(m::Module) = edit(pathof(m))
 
 # terminal pager
diff --git a/stdlib/InteractiveUtils/src/macros.jl b/stdlib/InteractiveUtils/src/macros.jl
index dd637c8169b874..b0005e6d7d783a 100644
--- a/stdlib/InteractiveUtils/src/macros.jl
+++ b/stdlib/InteractiveUtils/src/macros.jl
@@ -4,7 +4,7 @@
 
 import Base: typesof, insert!
 
-separate_kwargs(args...; kwargs...) = (args, kwargs.data)
+separate_kwargs(args...; kwargs...) = (args, values(kwargs))
 
 """
 Transform a dot expression into one where each argument has been replaced by a
@@ -42,7 +42,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
             insert!(args, (isnothing(i) ? 2 : i+1), ex0.args[2])
             ex0 = Expr(:call, args...)
         end
-        if ex0.head === :. || (ex0.head === :call && string(ex0.args[1])[1] == '.')
+        if ex0.head === :. || (ex0.head === :call && ex0.args[1] !== :.. && string(ex0.args[1])[1] == '.')
             codemacro = startswith(string(fcn), "code_")
             if codemacro && ex0.args[2] isa Expr
                 # Manually wrap a dot call in a function
@@ -153,12 +153,12 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
     exret = Expr(:none)
     if ex.head === :call
         if any(e->(isa(e, Expr) && e.head === :(...)), ex0.args) &&
-            (ex.args[1] === GlobalRef(Core,:_apply) ||
-             ex.args[1] === GlobalRef(Base,:_apply))
+            (ex.args[1] === GlobalRef(Core,:_apply_iterate) ||
+             ex.args[1] === GlobalRef(Base,:_apply_iterate))
             # check for splatting
-            exret = Expr(:call, ex.args[1], fcn,
-                        Expr(:tuple, esc(ex.args[2]),
-                            Expr(:call, typesof, map(esc, ex.args[3:end])...)))
+            exret = Expr(:call, ex.args[2], fcn,
+                        Expr(:tuple, esc(ex.args[3]),
+                            Expr(:call, typesof, map(esc, ex.args[4:end])...)))
         else
             exret = Expr(:call, fcn, esc(ex.args[1]),
                          Expr(:call, typesof, map(esc, ex.args[2:end])...), kws...)
@@ -174,7 +174,7 @@ function gen_call_with_extracted_types(__module__, fcn, ex0, kws=Expr[])
 end
 
 """
-Same behaviour as gen_call_with_extracted_types except that keyword arguments
+Same behaviour as `gen_call_with_extracted_types` except that keyword arguments
 of the form "foo=bar" are passed on to the called function as well.
 The keyword arguments must be given before the mandatory argument.
 """
@@ -187,7 +187,7 @@ function gen_call_with_extracted_types_and_kwargs(__module__, fcn, ex0)
             if length(x.args) != 2
                 return Expr(:call, :error, "Invalid keyword argument: $x")
             end
-            push!(kws, Expr(:kw, x.args[1], x.args[2]))
+            push!(kws, Expr(:kw, esc(x.args[1]), esc(x.args[2])))
         else
             return Expr(:call, :error, "@$fcn expects only one non-keyword argument")
         end
@@ -232,6 +232,17 @@ macro code_lowered(ex0...)
     end
 end
 
+macro time_imports(ex)
+    quote
+        try
+            Base.Threads.atomic_add!(Base.TIMING_IMPORTS, 1)
+            $(esc(ex))
+        finally
+            Base.Threads.atomic_sub!(Base.TIMING_IMPORTS, 1)
+        end
+    end
+end
+
 """
     @functionloc
 
@@ -247,7 +258,9 @@ It calls out to the `functionloc` function.
 Applied to a function or macro call, it evaluates the arguments to the specified call, and
 returns the `Method` object for the method that would be called for those arguments. Applied
 to a variable, it returns the module in which the variable was bound. It calls out to the
-`which` function.
+[`which`](@ref) function.
+
+See also: [`@less`](@ref), [`@edit`](@ref).
 """
 :@which
 
@@ -256,6 +269,8 @@ to a variable, it returns the module in which the variable was bound. It calls o
 
 Evaluates the arguments to the function or macro call, determines their types, and calls the `less`
 function on the resulting expression.
+
+See also: [`@edit`](@ref), [`@which`](@ref), [`@code_lowered`](@ref).
 """
 :@less
 
@@ -264,6 +279,8 @@ function on the resulting expression.
 
 Evaluates the arguments to the function or macro call, determines their types, and calls the `edit`
 function on the resulting expression.
+
+See also: [`@less`](@ref), [`@which`](@ref).
 """
 :@edit
 
@@ -319,10 +336,59 @@ by putting them and their value before the function call, like this:
 Evaluates the arguments to the function or macro call, determines their types, and calls
 [`code_native`](@ref) on the resulting expression.
 
-Set the optional keyword argument `debuginfo` by putting it before the function call, like this:
+Set any of the optional keyword arguments `syntax`, `debuginfo`, `binary` or `dump_module`
+by putting it before the function call, like this:
+
+    @code_native syntax=:intel debuginfo=:default binary=true dump_module=false f(x)
 
-    @code_native debuginfo=:default f(x)
+* Set assembly syntax by setting `syntax` to `:att` (default) for AT&T syntax or `:intel` for Intel syntax.
+* Specify verbosity of code comments by setting `debuginfo` to `:source` (default) or `:none`.
+* If `binary` is `true`, also print the binary machine code for each instruction precedented by an abbreviated address.
+* If `dump_module` is `false`, do not print metadata such as rodata or directives.
 
-`debuginfo` may be one of `:source` (default) or `:none`, to specify the verbosity of code comments.
+See also: [`code_native`](@ref), [`@code_llvm`](@ref), [`@code_typed`](@ref) and [`@code_lowered`](@ref)
 """
 :@code_native
+
+"""
+    @time_imports
+
+A macro to execute an expression and produce a report of any time spent importing packages and their
+dependencies. Any compilation time will be reported as a percentage, and how much of which was recompilation, if any.
+
+If a package's dependencies have already been imported either globally or by another dependency they will
+not appear under that package and the package will accurately report a faster load time than if it were to
+be loaded in isolation.
+
+!!! compat "Julia 1.9"
+    Reporting of any compilation and recompilation time was added in Julia 1.9
+
+```julia-repl
+julia> @time_imports using CSV
+      0.4 ms    ┌ IteratorInterfaceExtensions
+     11.1 ms  ┌ TableTraits 84.88% compilation time
+    145.4 ms  ┌ SentinelArrays 66.73% compilation time
+     42.3 ms  ┌ Parsers 19.66% compilation time
+      4.1 ms  ┌ Compat
+      8.2 ms  ┌ OrderedCollections
+      1.4 ms    ┌ Zlib_jll
+      2.3 ms    ┌ TranscodingStreams
+      6.1 ms  ┌ CodecZlib
+      0.3 ms  ┌ DataValueInterfaces
+     15.2 ms  ┌ FilePathsBase 30.06% compilation time
+      9.3 ms    ┌ InlineStrings
+      1.5 ms    ┌ DataAPI
+     31.4 ms  ┌ WeakRefStrings
+     14.8 ms  ┌ Tables
+     24.2 ms  ┌ PooledArrays
+   2002.4 ms  CSV 83.49% compilation time
+```
+
+!!! note
+    During the load process a package sequentially imports where necessary all of its dependencies, not just
+    its direct dependencies. That is also true for the dependencies themselves so nested importing will likely
+    occur, but not always. Therefore the nesting shown in this output report is not equivalent to the dependency
+    tree, but does indicate where import time has accumulated.
+
+"""
+:@time_imports
diff --git a/stdlib/InteractiveUtils/test/highlighting.jl b/stdlib/InteractiveUtils/test/highlighting.jl
new file mode 100644
index 00000000000000..1ab7dc4292ced6
--- /dev/null
+++ b/stdlib/InteractiveUtils/test/highlighting.jl
@@ -0,0 +1,492 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using InteractiveUtils, Test
+
+myzeros(::Type{T}, ::Type{S}, ::Type{R}, dims::Tuple{Vararg{Integer, N}}, dims2::Tuple{Vararg{Integer, M}}) where {T, R, S, M, N} = (x = 1)
+@testset "warntype content" begin
+    io = IOBuffer()
+    code_warntype(IOContext(io, :color => true), myzeros,
+                  Tuple{Type{<:Integer}, Type{>:String}, Type{T} where Signed<:T<:Real, Tuple{Vararg{Int}}, NTuple{4,Int}})
+    seekstart(io)
+    @test startswith(readline(io), "MethodInstance for ")
+    @test startswith(readline(io), "  from myzeros(::Type{T}, ::")
+    @test occursin(r"^Static Parameters$", readline(io))
+    @test occursin(r"^  T <: .*Integer", readline(io))
+    @test occursin(r"^  .*Signed.* <: R <: .*Real", readline(io))
+    @test occursin(r"^  S >: .*String", readline(io))
+    @test occursin(r"^  M = .*4", readline(io))
+    @test occursin(r"^  N::.*Int", readline(io))
+    @test occursin(r"^Arguments$", readline(io))
+    @test occursin(r"^  #self#.*::Core.Const", readline(io))
+    while readline(io) != "Locals"
+        eof(io) && throw(EOFError())
+    end
+    @test occursin(r"^  x.*::Int", readline(io))
+    @test occursin(r"^Body.*::Int", readline(io))
+    code = read(io, String)
+    @test endswith(code, "\n\n")
+end
+
+@testset "warntype highlighting" begin
+    # Make sure that "expected" unions are highlighted with warning color instead of error color
+    io = IOBuffer()
+    f(x) = x > 1 ? "foo" : nothing
+    c = Base.text_colors[Base.warn_color()]
+    InteractiveUtils.highlighting[:warntype] = false
+    code_warntype(IOContext(io, :color => true), f, Tuple{Int64})
+    @test !occursin(c, String(take!(io)))
+    InteractiveUtils.highlighting[:warntype] = true
+    code_warntype(IOContext(io, :color => true), f, Tuple{Int64})
+    @test occursin(c, String(take!(io)))
+end
+
+@testset "no colors" begin
+    io = IOBuffer()
+    code_llvm(io, sqrt, (Float32,))
+    @test !occursin("\e", String(take!(io)))
+
+    InteractiveUtils.print_llvm(io, "; comment")
+    @test String(take!(io)) == "; comment\n"
+
+    InteractiveUtils.highlighting[:llvm] = false
+    code_llvm(IOContext(io, :color => true), abs, (Float64,))
+    @test !occursin("\e", String(take!(io)))
+    InteractiveUtils.highlighting[:llvm] = true
+    code_llvm(IOContext(io, :color => true), abs, (Float64,))
+    @test occursin("\e", String(take!(io)))
+
+    code_native(io, sqrt, (Float32,))
+    @test !occursin("\e", String(take!(io)))
+
+    InteractiveUtils.print_native(io, "; comment", :x86)
+    @test String(take!(io)) == "; comment\n"
+
+    InteractiveUtils.print_native(io, "; comment", :unknown)
+    @test String(take!(io)) == "; comment\n"
+
+    InteractiveUtils.highlighting[:native] = false
+    code_native(IOContext(io, :color => true), abs, (Float64,))
+    @test !occursin("\e", String(take!(io)))
+    InteractiveUtils.highlighting[:native] = true
+    code_native(IOContext(io, :color => true), abs, (Float64,))
+    @test occursin("\e", String(take!(io)))
+end
+
+function hilight_llvm(s)
+    io = IOBuffer()
+    InteractiveUtils.print_llvm(IOContext(io, :color=>true), s)
+    r = String(take!(io))
+    get(ENV, "JULIA_DEBUG", "") == "" && return r
+    println(stdout, " input: ", s)
+    println(stdout, "result: ", r)
+    flush(stdout)
+    r
+end
+function hilight_native(s, arch)
+    io = IOBuffer()
+    InteractiveUtils.print_native(IOContext(io, :color=>true), s, arch)
+    r = String(take!(io))
+    get(ENV, "JULIA_DEBUG", "") == "" && return r
+    println(stdout, " input: ", s)
+    println(stdout, "result: ", r)
+    flush(stdout)
+    r
+end
+hilight_x86(s) = hilight_native(s, :x86)
+hilight_arm(s) = hilight_native(s, :arm)
+
+function esc_code(s)
+    io = IOBuffer()
+    InteractiveUtils.printstyled_ll(IOContext(io, :color=>true), "!", s)
+    split(String(take!(io)), "!")
+end
+
+const D, XD = esc_code(:default)
+const C, XC = esc_code(:comment)
+const L, XL = esc_code(:label)
+const I, XI = esc_code(:instruction)
+const T, XT = esc_code(:type)
+const N, XN = esc_code(:number)
+const B, XB = esc_code(:bracket)
+const V, XV = esc_code(:variable)
+const K, XK = esc_code(:keyword)
+const F, XF = esc_code(:funcname)
+
+const COM = D * "," * XD
+const COL = D * ":" * XD
+const EQU = D * "=" * XD
+const P = B * "(" * XB
+const XP = B * ")" * XB
+const S = B * "[" * XB
+const XS = B * "]" * XB
+const U = B * "{" * XB
+const XU = B * "}" * XB
+
+@testset "LLVM IR" begin
+    @testset "comment" begin
+        @test hilight_llvm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+    end
+    @testset "lavel" begin
+        @test hilight_llvm("top:") == "$(L)top:$(XL)\n"
+
+        @test hilight_llvm("L7:\t\t; preds = %top") ==
+            "$(L)L7:$(XL)\t\t$(C); preds = %top$(XC)\n"
+    end
+    @testset "define" begin
+        @test hilight_llvm("define double @julia_func_1234(float) {") ==
+            "$(K)define$(XK) $(T)double$(XT) " *
+            "$(F)@julia_func_1234$(XF)$P$(T)float$(XT)$XP $U\n"
+
+        @test hilight_llvm("}") == "$XU\n"
+    end
+
+    @testset "declare" begin
+        @test hilight_llvm("declare i32 @jl_setjmp(i8*) #2") ==
+            "$(K)declare$(XK) $(T)i32$(XT) " *
+            "$(F)@jl_setjmp$(XF)$P$(T)i8$(XT)$(D)*$(XD)$XP $(D)#2$(XD)\n"
+    end
+
+    @testset "type" begin
+        @test hilight_llvm("%jl_value_t = type opaque") ==
+            "$(V)%jl_value_t$(XV) $EQU $(K)type$(XK) $(T)opaque$(XT)\n"
+    end
+
+    @testset "target" begin
+        datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
+        @test hilight_llvm("target datalayout = \"$datalayout\"") ==
+            "$(K)target$(XK) $(K)datalayout$(XK) $EQU $(V)\"$datalayout\"$(XV)\n"
+    end
+
+    @testset "attributes" begin
+        @test hilight_llvm(
+            """attributes #1 = { uwtable "frame-pointer"="all" "thunk" }""") ==
+            "$(K)attributes$(XK) $(D)#1$(XD) $EQU " *
+            "$U $(K)uwtable$(XK) $(V)\"frame-pointer\"$(XV)$EQU" *
+            "$(V)\"all\"$(XV) $(V)\"thunk\"$(XV) $XU\n"
+    end
+
+    @testset "terminator" begin
+        @test hilight_llvm("  ret i8 %12") ==
+            "  $(I)ret$(XI) $(T)i8$(XT) $(V)%12$(XV)\n"
+
+        @test hilight_llvm("  br i1 %2, label %L6, label %L4") ==
+            "  $(I)br$(XI) $(T)i1$(XT) $(V)%2$(XV)$COM " *
+            "$(T)label$(XT) $(L)%L6$(XL)$COM $(T)label$(XT) $(L)%L4$(XL)\n"
+
+        @test hilight_llvm("  br label %L5") ==
+            "  $(I)br$(XI) $(T)label$(XT) $(L)%L5$(XL)\n"
+
+        @test hilight_llvm("  unreachable") == "  $(I)unreachable$(XI)\n"
+    end
+
+    @testset "arithmetic" begin
+        @test hilight_llvm("   %11 = add nuw nsw i64 %value_phi10, 1") ==
+            "   $(V)%11$(XV) $EQU $(I)add$(XI) $(K)nuw$(XK) $(K)nsw$(XK) " *
+            "$(T)i64$(XT) $(V)%value_phi10$(XV)$COM $(N)1$(XN)\n"
+
+        @test hilight_llvm("   %13 = fadd double %12, -2.000000e+00") ==
+            "   $(V)%13$(XV) $EQU $(I)fadd$(XI) " *
+            "$(T)double$(XT) $(V)%12$(XV)$COM $(N)-2.000000e+00$(XN)\n"
+
+        @test hilight_llvm("      %21 = fmul contract double %20, 0x0123456789ABCDEF") ==
+            "      $(V)%21$(XV) $EQU $(I)fmul$(XI) $(K)contract$(XK) " *
+            "$(T)double$(XT) $(V)%20$(XV)$COM $(N)0x0123456789ABCDEF$(XN)\n"
+    end
+
+    @testset "bitwise" begin
+        @test hilight_llvm("   %31 = shl i64 %value_phi4, 52") ==
+            "   $(V)%31$(XV) $EQU " *
+            "$(I)shl$(XI) $(T)i64$(XT) $(V)%value_phi4$(XV)$COM $(N)52$(XN)\n"
+    end
+
+    @testset "aggregate" begin
+        @test hilight_llvm("    %4 = extractvalue { i64, i1 } %1, 0") ==
+            "    $(V)%4$(XV) $EQU $(I)extractvalue$(XI) " *
+            "$U $(T)i64$(XT)$COM $(T)i1$(XT) $XU $(V)%1$(XV)$COM $(N)0$(XN)\n"
+    end
+
+    @testset "memory access" begin
+        @test hilight_llvm("  %dims = alloca [1 x i64], align 8") ==
+            "  $(V)%dims$(XV) $EQU $(I)alloca$(XI) " *
+            "$S$(N)1$(XN) $(D)x$(XD) $(T)i64$(XT)$XS$COM $(K)align$(XK) $(N)8$(XN)\n"
+
+        @test hilight_llvm("    %51 = load i32," *
+                           " i32* inttoptr (i64 226995504 to i32*), align 16") ==
+            "    $(V)%51$(XV) $EQU $(I)load$(XI) $(T)i32$(XT)$COM " *
+            "$(T)i32$(XT)$(D)*$(XD) $(K)inttoptr$(XK) $P$(T)i64$(XT) $(N)226995504$(XN) " *
+            "$(K)to$(XK) $(T)i32$(XT)$(D)*$(XD)$XP$COM $(K)align$(XK) $(N)16$(XN)\n"
+
+        @test hilight_llvm("    %53 = load %jl_value_t addrspace(10)*, " *
+                           "%jl_value_t addrspace(10)* addrspace(11)* %52, align 8") ==
+            "    $(V)%53$(XV) $EQU $(I)load$(XI) $(V)%jl_value_t$(XV) " *
+            "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD)$COM " *
+            "$(V)%jl_value_t$(XV) $(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD) " *
+            "$(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
+            "$(V)%52$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
+
+        @test hilight_llvm("    store i64 %61, i64 addrspace(11)* %60, align 8") ==
+            "    $(I)store$(XI) $(T)i64$(XT) $(V)%61$(XV)$COM " *
+            "$(T)i64$(XT) $(K)addrspace$(XK)$P$(N)11$(XN)$XP$(D)*$(XD) " *
+            "$(V)%60$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
+
+        @test hilight_llvm("  store volatile %jl_value_t addrspace(10)** %62, " *
+                           "%jl_value_t addrspace(10)*** %63, align 8") ==
+            "  $(I)store$(XI) $(K)volatile$(XK) $(V)%jl_value_t$(XV) " *
+            "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)**$(XD) $(V)%62$(XV)$COM " *
+            "$(V)%jl_value_t$(XV) $(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)***$(XD) " *
+            "$(V)%63$(XV)$COM $(K)align$(XK) $(N)8$(XN)\n"
+
+        @test hilight_llvm("     %71 = getelementptr i8, i8* %70, i64 8") ==
+            "     $(V)%71$(XV) $EQU $(I)getelementptr$(XI) $(T)i8$(XT)$COM " *
+            "$(T)i8$(XT)$(D)*$(XD) $(V)%70$(XV)$COM $(T)i64$(XT) $(N)8$(XN)\n"
+    end
+
+    @testset "conversion" begin
+        @test hilight_llvm("  %22 = zext i1 %21 to i8") ==
+            "  $(V)%22$(XV) $EQU $(I)zext$(XI) $(T)i1$(XT) $(V)%21$(XV) " *
+            "$(K)to$(XK) $(T)i8$(XT)\n"
+
+        @test hilight_llvm("     %24 = sitofp i64 %23 to double") ==
+            "     $(V)%24$(XV) $EQU $(I)sitofp$(XI) $(T)i64$(XT) $(V)%23$(XV) " *
+            "$(K)to$(XK) $(T)double$(XT)\n"
+
+        @test hilight_llvm("  %26 = ptrtoint i8* %25 to i64") ==
+            "  $(V)%26$(XV) $EQU $(I)ptrtoint$(XI) $(T)i8$(XT)$(D)*$(XD) " *
+            "$(V)%25$(XV) $(K)to$(XK) $(T)i64$(XT)\n"
+
+        @test hilight_llvm("  %28 = bitcast %jl_value_t addrspace(10)* %27 " *
+                           "to [2 x i16] addrspace(10)*") ==
+            "  $(V)%28$(XV) $EQU $(I)bitcast$(XI) $(V)%jl_value_t$(XV) " *
+            "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD) $(V)%27$(XV) " *
+            "$(K)to$(XK) $S$(N)2$(XN) $(D)x$(XD) $(T)i16$(XT)$XS " *
+            "$(K)addrspace$(XK)$P$(N)10$(XN)$XP$(D)*$(XD)\n"
+    end
+
+    @testset "other" begin
+        @test hilight_llvm("  %31 = icmp slt i64 %30, 0") ==
+            "  $(V)%31$(XV) $EQU $(I)icmp$(XI) $(I)slt$(XI) " *
+            "$(T)i64$(XT) $(V)%30$(XV)$COM $(N)0$(XN)\n"
+
+        @test hilight_llvm("  %value_phi34 = phi double [ %33, %L50 ], [ %32, %L60 ]") ==
+            "  $(V)%value_phi34$(XV) $EQU $(I)phi$(XI) $(T)double$(XT) " *
+            "$S $(V)%33$(XV)$COM $(L)%L50$(XL) $XS$COM " *
+            "$S $(V)%32$(XV)$COM $(L)%L60$(XL) $XS\n"
+
+        @test hilight_llvm("   %.v = select i1 %35, i64 %36, i64 63") ==
+            "   $(V)%.v$(XV) $EQU $(I)select$(XI) $(T)i1$(XT) $(V)%35$(XV)$COM " *
+            "$(T)i64$(XT) $(V)%36$(XV)$COM $(T)i64$(XT) $(N)63$(XN)\n"
+
+        @test hilight_llvm("   %38 = call i64 @llvm.cttz.i64(i64 %37, i1 false)") ==
+            "   $(V)%38$(XV) $EQU $(I)call$(XI) $(T)i64$(XT) " *
+            "$(F)@llvm.cttz.i64$(XF)$P$(T)i64$(XT) $(V)%37$(XV)$COM " *
+            "$(T)i1$(XT) $(K)false$(XK)$XP\n"
+    end
+end
+
+@testset "x86 ASM" begin
+    @testset "comment" begin
+        @test hilight_x86("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+    end
+    @testset "label" begin
+        @test hilight_x86("L123:") == "$(L)L123:$(XL)\n"
+    end
+    @testset "directive" begin
+        @test hilight_x86("\t.text") == "\t$(D).text$(XD)\n"
+    end
+
+    @testset "0-operand" begin
+        # AT&T
+        @test hilight_x86("\tretq") == "\t$(I)retq$(XI)\n"
+
+        # Intel
+        @test hilight_x86("\tret") == "\t$(I)ret$(XI)\n"
+    end
+    @testset "1-operand" begin
+        # AT&T
+        @test hilight_x86("\tpopq\t%rax") == "\t$(I)popq$(XI)\t$(V)%rax$(XV)\n"
+
+        @test hilight_x86("\tpushl\t\$4294967295\t# imm = 0xFFFFFFFF") ==
+            "\t$(I)pushl$(XI)\t$(N)\$4294967295$(XN)\t$(C)# imm = 0xFFFFFFFF$(XC)\n"
+
+        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+
+        @test hilight_x86("\tnopw\t%cs:(%rax,%rax)") ==
+            "\t$(I)nopw$(XI)\t$(V)%cs$(XV)$COL$P$(V)%rax$(XV)$COM$(V)%rax$(XV)$XP\n"
+
+        # Intel
+        @test hilight_x86("\tpop\trax") == "\t$(I)pop$(XI)\t$(V)rax$(XV)\n"
+
+        @test hilight_x86("\tpush\t4294967295") ==
+            "\t$(I)push$(XI)\t$(N)4294967295$(XN)\n"
+
+        @test hilight_x86("\tja\tL234") == "\t$(I)ja$(XI)\t$(L)L234$(XL)\n"
+
+        @test hilight_x86("\tnop\tword ptr cs:[rax + rax]") ==
+            "\t$(I)nop$(XI)\t$(K)word$(XK) $(K)ptr$(XK) " *
+            "$(V)cs$(XV)$COL$S$(V)rax$(XV) $(D)+$(XD) $(V)rax$(XV)$XS\n"
+    end
+    @testset "2-operand" begin
+        # AT&T
+        @test hilight_x86("\tshrq\t\$63, %rcx") ==
+            "\t$(I)shrq$(XI)\t$(N)\$63$(XN)$COM $(V)%rcx$(XV)\n"
+
+        @test hilight_x86("\tvmovsd\t(%rsi,%rdx,8), %xmm1\t# xmm1 = mem[0],zero") ==
+            "\t$(I)vmovsd$(XI)\t$P$(V)%rsi$(XV)$COM$(V)%rdx$(XV)$COM$(N)8$(XN)$XP" *
+            "$COM $(V)%xmm1$(XV)\t$(C)# xmm1 = mem[0],zero$(XC)\n"
+
+        @test hilight_x86("\tmovabsq\t\$\"#string#338\", %rax") ==
+            "\t$(I)movabsq$(XI)\t$(F)\$\"#string#338\"$(XF)$COM $(V)%rax$(XV)\n"
+
+        # Intel
+        @test hilight_x86("\tshr\trcx, 63") ==
+            "\t$(I)shr$(XI)\t$(V)rcx$(XV)$COM $(N)63$(XN)\n"
+
+        @test hilight_x86(
+            "\tvmovsd\txmm1, dword ptr [rsi + 8*rdx]\t# xmm1 = mem[0],zero") ==
+            "\t$(I)vmovsd$(XI)\t$(V)xmm1$(XV)$COM $(K)dword$(XK) $(K)ptr$(XK) " *
+            "$S$(V)rsi$(XV) $(D)+$(XD) $(N)8$(XN)$(D)*$(XD)$(V)rdx$(XV)$XS" *
+            "\t$(C)# xmm1 = mem[0],zero$(XC)\n"
+
+        @test hilight_x86("\tmovabs\trax, offset \"#string#338\"") ==
+            "\t$(I)movabs$(XI)\t$(V)rax$(XV)$COM " *
+            "$(K)offset$(XK) $(F)\"#string#338\"$(XF)\n"
+    end
+    @testset "3-operand" begin
+        # AT&T
+        @test hilight_x86("\tvaddsd\t(%rax), %xmm0, %xmm0") ==
+            "\t$(I)vaddsd$(XI)\t$P$(V)%rax$(XV)$XP$COM " *
+            "$(V)%xmm0$(XV)$COM $(V)%xmm0$(XV)\n"
+
+        # Intel
+        @test hilight_x86("\tvaddsd\txmm0, xmm0, qword ptr [rax]") ==
+            "\t$(I)vaddsd$(XI)\t$(V)xmm0$(XV)$COM $(V)xmm0$(XV)$COM " *
+            "$(K)qword$(XK) $(K)ptr$(XK) $S$(V)rax$(XV)$XS\n"
+    end
+    @testset "4-operand" begin
+        # AT&T
+        @test hilight_x86("\tvroundsd\t\$4, %xmm1, %xmm1, %xmm1") ==
+            "\t$(I)vroundsd$(XI)\t$(N)\$4$(XN)$COM " *
+            "$(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)$COM $(V)%xmm1$(XV)\n"
+
+        # Intel
+        @test hilight_x86("\tvroundsd\txmm1, xmm1, xmm1, 4") ==
+            "\t$(I)vroundsd$(XI)\t" *
+            "$(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(V)xmm1$(XV)$COM $(N)4$(XN)\n"
+    end
+    @testset "AVX-512" begin
+        # AT&T
+        @test hilight_x86("\tvmovaps\t(%eax), %zmm0") ==
+            "\t$(I)vmovaps$(XI)\t$P$(V)%eax$(XV)$XP$COM $(V)%zmm0$(XV)\n"
+
+        @test hilight_x86("\tvpaddd\t%zmm3, %zmm1, %zmm1 {%k1}") ==
+            "\t$(I)vpaddd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
+            "$(V)%zmm1$(XV) $U$(V)%k1$(XV)$XU\n"
+
+        @test hilight_x86("\tvdivpd\t%zmm3, %zmm1, %zmm0 {%k1} {z}") ==
+            "\t$(I)vdivpd$(XI)\t$(V)%zmm3$(XV)$COM $(V)%zmm1$(XV)$COM " *
+            "$(V)%zmm0$(XV) $U$(V)%k1$(XV)$XU $U$(K)z$(XK)$XU\n"
+
+        @test hilight_x86("\tvdivps\t(%ebx){1to16}, %zmm5, %zmm4") ==
+            "\t$(I)vdivps$(XI)\t$P$(V)%ebx$(XV)$XP$U$(K)1to16$(XK)$XU$COM " *
+            "$(V)%zmm5$(XV)$COM $(V)%zmm4$(XV)\n"
+
+        @test hilight_x86("\tvcvtsd2si\t{rn-sae}, %xmm0, %eax") ==
+            "\t$(I)vcvtsd2si$(XI)\t$U$(K)rn-sae$(XK)$XU$COM " *
+            "$(V)%xmm0$(XV)$COM $(V)%eax$(XV)\n"
+
+        # Intel
+        @test hilight_x86("\tvmovaps\tzmm0, zmmword ptr [eax]") ==
+            "\t$(I)vmovaps$(XI)\t$(V)zmm0$(XV)$COM " *
+            "$(K)zmmword$(XK) $(K)ptr$(XK) $S$(V)eax$(XV)$XS\n"
+
+        @test hilight_x86("\tvpaddd\tzmm1 {k1}, zmm1, zmm3") ==
+            "\t$(I)vpaddd$(XI)\t$(V)zmm1$(XV) $U$(V)k1$(XV)$XU$COM " *
+            "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
+
+        @test hilight_x86("\tvdivpd\tzmm0 {k1} {z}, zmm1, zmm3") ==
+            "\t$(I)vdivpd$(XI)\t$(V)zmm0$(XV) $U$(V)k1$(XV)$XU $U$(K)z$(XK)$XU$COM " *
+            "$(V)zmm1$(XV)$COM $(V)zmm3$(XV)\n"
+
+        @test hilight_x86("\tvdivps\tzmm4, zmm5, dword ptr [ebx]{1to16}") ==
+            "\t$(I)vdivps$(XI)\t$(V)zmm4$(XV)$COM $(V)zmm5$(XV)$COM " *
+            "$(K)dword$(XK) $(K)ptr$(XK) $S$(V)ebx$(XV)$XS$U$(K)1to16$(XK)$XU\n"
+
+        @test hilight_x86("\tvcvtsd2si\teax, xmm0$(XV), {rn-sae}") ==
+            "\t$(I)vcvtsd2si$(XI)\t$(V)eax$(XV)$COM " *
+            "$(V)xmm0$(XV)$COM $U$(K)rn-sae$(XK)$XU\n"
+    end
+end
+
+@testset "ARM ASM" begin
+    @testset "comment" begin
+        @test hilight_arm("; comment ; // # ") == "$(C); comment ; // # $(XC)\n"
+    end
+    @testset "label" begin
+        @test hilight_arm("L45:") == "$(L)L45:$(XL)\n"
+    end
+    @testset "directive" begin
+        @test hilight_arm("\t.text") == "\t$(D).text$(XD)\n"
+    end
+
+    @testset "0-operand" begin
+        @test hilight_arm("\tret") == "\t$(I)ret$(XI)\n"
+    end
+    @testset "1-operand" begin
+        @test hilight_arm("\tbl\t0x12") == "\t$(I)bl$(XI)\t$(N)0x12$(XN)\n"
+
+        @test hilight_arm("\tb\tL345") == "\t$(I)b$(XI)\t$(L)L345$(XL)\n"
+
+        @test hilight_arm("\tb.gt\tL67") == "\t$(I)b.gt$(XI)\t$(L)L67$(XL)\n"
+
+        @test hilight_arm("\tpop\t{r11, pc}") ==
+            "\t$(I)pop$(XI)\t$U$(V)r11$(XV)$COM $(V)pc$(XV)$XU\n"
+    end
+    @testset "2-operand" begin
+        @test hilight_arm("\tcmp\tx10, #2047\t// =2047") ==
+            "\t$(I)cmp$(XI)\t$(V)x10$(XV)$COM $(N)#2047$(XN)\t$(C)// =2047$(XC)\n"
+
+        @test hilight_arm("\tldr\td1, [x10]") ==
+            "\t$(I)ldr$(XI)\t$(V)d1$(XV)$COM $S$(V)x10$(XV)$XS\n"
+
+        @test hilight_arm("\tstr\tx30, [sp, #-16]!") ==
+            "\t$(I)str$(XI)\t$(V)x30$(XV)$COM " *
+            "$S$(V)sp$(XV)$COM $(N)#-16$(XN)$XS$(K)!$(XK)\n"
+
+        @test hilight_arm("\tmov\tv0.16b, v1.16b") ==
+            "\t$(I)mov$(XI)\t$(V)v0.16b$(XV)$COM $(V)v1.16b$(XV)\n"
+    end
+    @testset "3-operand" begin
+        @test hilight_arm("\tfmul\td2, d0, d2") ==
+            "\t$(I)fmul$(XI)\t$(V)d2$(XV)$COM $(V)d0$(XV)$COM $(V)d2$(XV)\n"
+
+        @test hilight_arm("\tmovk\tx10, #65535, lsl #32") ==
+            "\t$(I)movk$(XI)\t$(V)x10$COM $(N)#65535$(XN)$COM $(K)lsl$(XK) $(N)#32$(XN)\n"
+
+        @test hilight_arm("\tcneg\tx8, x8, ge") ==
+            "\t$(I)cneg$(XI)\t$(V)x8$(XV)$COM $(V)x8$(XV)$COM $(K)ge$(XK)\n"
+    end
+    @testset "4-operand" begin
+        @test hilight_arm("\tadd\tx8, x9, x8, lsl #52") ==
+            "\t$(I)add$(XI)\t$(V)x8$(XV)$COM $(V)x9$(XV)$COM $(V)x8$(XV)$COM " *
+            "$(K)lsl$(XK) $(N)#52$(XN)\n"
+
+        @test hilight_arm("\tfcsel\td1, d0, d1, eq") ==
+            "\t$(I)fcsel$(XI)\t" *
+            "$(V)d1$(XV)$COM $(V)d0$(XV)$COM $(V)d1$(XV)$COM $(K)eq$(XK)\n"
+    end
+    @testset "NEON" begin
+        hilight_arm("\tvmul.f32\tq8, q9, q8") ==
+            "\t$(I)vmul.f32$(XI)\t$(V)q8$(XV)$COM $(V)q9$(XV)$COM $(V)q8$(XV)\n"
+        hilight_arm("\tvcvt.s32.f64\ts2, d20") ==
+            "\t$(I)vcvt.s32.f64$(XI)\t$(V)s2$(XV)$COM $(V)d20$(XV)\n"
+        hilight_arm("\tvld1.32\t{d18, d19}, [r1]") ==
+            "\t$(I)vld1.32$(XI)\t$U$(V)d18$(XV)$COM $(V)d19$(XV)$XU$COM $S$(V)r1$(XV)$XS\n"
+    end
+    @testset "SVE" begin
+        hilight_arm("\tld1d\tz1.d, p0/z, [x0, x4, lsl #3]") ==
+            "\t$(I)ld1d$(XI)\t$(V)z1.d$(XV)$COM " *
+            "$(V)p0$(XV)$(K)/z$(XK)$COM " *
+            "$S$(V)x0$(XV)$COM $(V)x4$(XV)$COM $(K)lsl$(XK) $(N)#3$(XN)$XS\n"
+        hilight_arm("\tb.first\tL123") == "\t$(I)b.first$(XI)\t$(L)L123$(XL)"
+    end
+end
diff --git a/stdlib/InteractiveUtils/test/runtests.jl b/stdlib/InteractiveUtils/test/runtests.jl
index a4401353011869..d31e07bd1b55d4 100644
--- a/stdlib/InteractiveUtils/test/runtests.jl
+++ b/stdlib/InteractiveUtils/test/runtests.jl
@@ -2,6 +2,10 @@
 
 using Test, InteractiveUtils
 
+@testset "highlighting" begin
+    include("highlighting.jl")
+end
+
 # test methodswith
 # `methodswith` relies on exported symbols
 export func4union, Base
@@ -56,7 +60,7 @@ end
 Base.getindex(A::Stable, i) = A.A[i]
 Base.getindex(A::Unstable, i) = A.A[i]
 
-tag = "ARRAY{FLOAT64, N}"
+tag = "ARRAY"
 @test warntype_hastag(getindex, Tuple{Unstable{Float64},Int}, tag)
 @test !warntype_hastag(getindex, Tuple{Stable{Float64,2},Int}, tag)
 @test warntype_hastag(getindex, Tuple{Stable{Float64},Int}, tag)
@@ -75,11 +79,6 @@ has_unused() = (a = rand(5))
 # some of this info with debug info.
 #@test warntype_hastag(has_unused, Tuple{}, "<optimized out>")
 
-# Make sure that "expected" unions are highlighted with warning color instead of error color
-iob = IOBuffer()
-code_warntype(IOContext(iob, :color => true), x -> (x > 1 ? "foo" : nothing), Tuple{Int64})
-str = String(take!(iob))
-@test occursin(Base.text_colors[Base.warn_color()], str)
 
 # Make sure getproperty and setproperty! works with @code_... macros
 struct T1234321
@@ -107,6 +106,7 @@ end # module ImportIntrinsics15819
 foo11122(x) = @fastmath x - 1.0
 
 # issue #11122, #13568 and #15819
+tag = "ANY"
 @test !warntype_hastag(+, Tuple{Int,Int}, tag)
 @test !warntype_hastag(-, Tuple{Int,Int}, tag)
 @test !warntype_hastag(*, Tuple{Int,Int}, tag)
@@ -154,15 +154,54 @@ mktemp() do f, io
 end
 
 module _test_varinfo_
-export x
-x = 1.0
+module inner_mod
+inner_x = 1
+end
+import Test: @test
+export x_exported
+x_exported = 1.0
+y_not_exp = 1.0
+z_larger = Vector{Float64}(undef, 3)
+a_smaller = Vector{Float64}(undef, 2)
 end
+
+using Test
+
 @test repr(varinfo(Main, r"^$")) == """
 | name | size | summary |
 |:---- | ----:|:------- |
 """
 let v = repr(varinfo(_test_varinfo_))
-    @test occursin("| x              |   8 bytes | Float64 |", v)
+    @test occursin("| x_exported     |   8 bytes | Float64 |", v)
+    @test !occursin("y_not_exp", v)
+    @test !occursin("@test", v)
+    @test !occursin("inner_x", v)
+end
+let v = repr(varinfo(_test_varinfo_, all = true))
+    @test occursin("x_exported", v)
+    @test occursin("y_not_exp", v)
+    @test !occursin("@test", v)
+    @test findfirst("a_smaller", v)[1] < findfirst("z_larger", v)[1] # check for alphabetical
+    @test !occursin("inner_x", v)
+end
+let v = repr(varinfo(_test_varinfo_, imported = true))
+    @test occursin("x_exported", v)
+    @test !occursin("y_not_exp", v)
+    @test occursin("@test", v)
+    @test !occursin("inner_x", v)
+end
+let v = repr(varinfo(_test_varinfo_, all = true, sortby = :size))
+    @test findfirst("z_larger", v)[1] < findfirst("a_smaller", v)[1] # check for size order
+end
+let v = repr(varinfo(_test_varinfo_, sortby = :summary))
+    @test findfirst("Float64", v)[1] < findfirst("Module", v)[1] # check for summary order
+end
+let v = repr(varinfo(_test_varinfo_, all = true, recursive = true))
+    @test occursin("inner_x", v)
+end
+let v = repr(varinfo(_test_varinfo_, all = true, minsize = 9))
+    @test !occursin("x_exported", v) # excluded: 8 bytes
+    @test occursin("a_smaller", v)
 end
 
 # Issue 14173
@@ -218,6 +257,10 @@ const curmod_str = curmod === Main ? "Main" : join(curmod_name, ".")
 @test (@which Int[1; 2]).name === :typed_vcat
 @test (@which [1 2;3 4]).name === :hvcat
 @test (@which Int[1 2;3 4]).name === :typed_hvcat
+# issue #39426
+let x..y = 0
+    @test (@which 1..2).name === :..
+end
 
 # issue #13464
 try
@@ -271,7 +314,7 @@ end
 
 # manually generate a broken function, which will break codegen
 # and make sure Julia doesn't crash
-@eval @noinline f_broken_code() = 0
+@eval @noinline @Base.constprop :none f_broken_code() = 0
 let m = which(f_broken_code, ())
    let src = Base.uncompressed_ast(m)
        src.code = Any[
@@ -292,7 +335,7 @@ let err = tempname(),
         redirect_stderr(new_stderr)
         println(new_stderr, "start")
         flush(new_stderr)
-        @eval @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
+        @test occursin("h_broken_code", sprint(code_native, h_broken_code, ()))
         Libc.flush_cstdio()
         println(new_stderr, "end")
         flush(new_stderr)
@@ -302,10 +345,11 @@ let err = tempname(),
         close(new_stderr)
         let errstr = read(err, String)
             @test startswith(errstr, """start
+                end
                 Internal error: encountered unexpected error during compilation of f_broken_code:
                 ErrorException(\"unsupported or misplaced expression \"invalid\" in function f_broken_code\")
                 """) || errstr
-            @test endswith(errstr, "\nend\n") || errstr
+            @test !endswith(errstr, "\nend\n") || errstr
         end
         rm(err)
     end
@@ -403,23 +447,36 @@ if Sys.ARCH === :x86_64 || occursin(ix86, string(Sys.ARCH))
 
     rgx = r"%"
     buf = IOBuffer()
-    output = ""
     #test that the string output is at&t syntax by checking for occurrences of '%'s
     code_native(buf, linear_foo, (), syntax = :att, debuginfo = :none)
-    output = String(take!(buf))
-
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test occursin(rgx, output)
 
     #test that the code output is intel syntax by checking it has no occurrences of '%'
     code_native(buf, linear_foo, (), syntax = :intel, debuginfo = :none)
-    output = String(take!(buf))
-
+    output = replace(String(take!(buf)), r"#[^\r\n]+" => "")
     @test !occursin(rgx, output)
 
     code_native(buf, linear_foo, ())
     output = String(take!(buf))
-
     @test occursin(rgx, output)
+
+    @testset "binary" begin
+        # check the RET instruction (opcode: C3)
+        ret = r"^; [0-9a-f]{4}: c3$"m
+
+        # without binary flag (default)
+        code_native(buf, linear_foo, (), dump_module=false)
+        output = String(take!(buf))
+        @test !occursin(ret, output)
+
+        # with binary flag
+        for binary in false:true
+            code_native(buf, linear_foo, (); binary, dump_module=false)
+            output = String(take!(buf))
+            @test occursin(ret, output) == binary
+        end
+    end
 end
 
 @testset "error message" begin
@@ -508,9 +565,120 @@ end
 # buildbot path updating
 file, ln = functionloc(versioninfo, Tuple{})
 @test isfile(file)
+@test isfile(pathof(InteractiveUtils))
+@test isdir(pkgdir(InteractiveUtils))
+
+@testset "buildbot path updating" begin
+    file, ln = functionloc(versioninfo, Tuple{})
+    @test isfile(file)
+
+    e = try versioninfo("wat")
+    catch e
+        e
+    end
+    @test e isa MethodError
+    m = @which versioninfo()
+    s = sprint(showerror, e)
+    m = match(Regex("at (.*?):$(m.line)"), s)
+    @test isfile(expanduser(m.captures[1]))
+
+    g() = x
+    e, bt = try code_llvm(g, Tuple{Int})
+    catch e
+        e, catch_backtrace()
+    end
+    @test e isa Exception
+    s = sprint(showerror, e, bt)
+    m = match(r"(\S*InteractiveUtils[\/\\]src\S*):", s)
+    @test isfile(expanduser(m.captures[1]))
+end
 
 @testset "Issue #34434" begin
     io = IOBuffer()
     code_native(io, eltype, Tuple{Int})
     @test occursin("eltype", String(take!(io)))
 end
+
+@testset "Issue #41010" begin
+    struct A41010 end
+
+    struct B41010
+        a::A41010
+    end
+    export B41010
+
+    ms = methodswith(A41010, @__MODULE__) |> collect
+    @test ms[1].name == :B41010
+end
+
+# macro options should accept both literals and variables
+let
+    opt = false
+    @test !(first(@code_typed optimize=opt sum(1:10)).inferred)
+end
+
+@testset "@time_imports" begin
+    mktempdir() do dir
+        cd(dir) do
+            try
+                pushfirst!(LOAD_PATH, dir)
+                foo_file = joinpath(dir, "Foo3242.jl")
+                write(foo_file,
+                    """
+                    module Foo3242
+                    foo() = 1
+                    end
+                    """)
+
+                Base.compilecache(Base.PkgId("Foo3242"))
+
+                fname = tempname()
+                f = open(fname, "w")
+                redirect_stdout(f) do
+                    @eval @time_imports using Foo3242
+                end
+                close(f)
+                buf = read(fname)
+                rm(fname)
+
+                @test occursin("ms  Foo3242", String(buf))
+
+            finally
+                filter!((≠)(dir), LOAD_PATH)
+            end
+        end
+    end
+end
+
+let # `default_tt` should work with any function with one method
+    @test (code_warntype(devnull, function ()
+        sin(42)
+    end); true)
+    @test (code_warntype(devnull, function (a::Int)
+        sin(a)
+    end); true)
+    @test (code_llvm(devnull, function ()
+        sin(42)
+    end); true)
+    @test (code_llvm(devnull, function (a::Int)
+        sin(a)
+    end); true)
+    @test (code_native(devnull, function ()
+        sin(42)
+    end); true)
+    @test (code_native(devnull, function (a::Int)
+        sin(a)
+    end); true)
+end
+
+@testset "code_llvm on opaque_closure" begin
+    let ci = code_typed(+, (Int, Int))[1][1]
+        ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Int])
+        oc = Core.OpaqueClosure(ir)
+        @test (code_llvm(devnull, oc, Tuple{Int, Int}); true)
+        let io = IOBuffer()
+            code_llvm(io, oc, Tuple{})
+            @test occursin(InteractiveUtils.OC_MISMATCH_WARNING, String(take!(io)))
+        end
+    end
+end
diff --git a/stdlib/LLVMLibUnwind_jll/Project.toml b/stdlib/LLVMLibUnwind_jll/Project.toml
new file mode 100644
index 00000000000000..36c24111d4d311
--- /dev/null
+++ b/stdlib/LLVMLibUnwind_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "LLVMLibUnwind_jll"
+uuid = "47c5dbc3-30ba-59ef-96a6-123e260183d9"
+version = "12.0.1+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
new file mode 100644
index 00000000000000..2196323ad35aa6
--- /dev/null
+++ b/stdlib/LLVMLibUnwind_jll/src/LLVMLibUnwind_jll.jl
@@ -0,0 +1,43 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/LLVMLibUnwind_jll.jl
+
+baremodule LLVMLibUnwind_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export llvmlibunwind
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+llvmlibunwind_handle = C_NULL
+llvmlibunwind_path = ""
+
+const llvmlibunwind = "libunwind"
+
+function __init__()
+    # We only dlopen something on MacOS
+    @static if Sys.isapple()
+        global llvmlibunwind_handle = dlopen(llvmlibunwind)
+        global llvmlibunwind_path = dlpath(llvmlibunwind_handle)
+        global artifact_dir = dirname(Sys.BINDIR)
+        LIBPATH[] = dirname(llvmlibunwind_path)
+        push!(LIBPATH_list, LIBPATH[])
+    end
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = @static Sys.isapple() ? true : false
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_llvmlibunwind_path() = llvmlibunwind_path
+
+end  # module LLVMLibUnwind_jll
diff --git a/stdlib/LLVMLibUnwind_jll/test/runtests.jl b/stdlib/LLVMLibUnwind_jll/test/runtests.jl
new file mode 100644
index 00000000000000..e984593ab2c25e
--- /dev/null
+++ b/stdlib/LLVMLibUnwind_jll/test/runtests.jl
@@ -0,0 +1,16 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl
+using LLVMLibUnwind_jll: llvmlibunwind_handle
+
+@testset "LLVMLibUnwind_jll" begin
+    if Sys.isapple()
+        @test dlsym(llvmlibunwind_handle, :unw_getcontext; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind_handle, :unw_init_local; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind_handle, :unw_init_local_dwarf; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind_handle, :unw_step; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind_handle, :unw_get_reg; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind_handle, :unw_set_reg; throw_error=false) !== nothing
+        @test dlsym(llvmlibunwind_handle, :unw_resume; throw_error=false) !== nothing
+    end
+end
diff --git a/stdlib/LazyArtifacts/Project.toml b/stdlib/LazyArtifacts/Project.toml
new file mode 100644
index 00000000000000..ea9afc9d12dbad
--- /dev/null
+++ b/stdlib/LazyArtifacts/Project.toml
@@ -0,0 +1,12 @@
+name = "LazyArtifacts"
+uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
+
+[deps]
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LazyArtifacts/docs/src/index.md b/stdlib/LazyArtifacts/docs/src/index.md
new file mode 100644
index 00000000000000..9de6b219c69885
--- /dev/null
+++ b/stdlib/LazyArtifacts/docs/src/index.md
@@ -0,0 +1,10 @@
+# Lazy Artifacts
+
+```@meta
+DocTestSetup = :(using LazyArtifacts)
+```
+
+In order for a package to download artifacts lazily, `LazyArtifacts` must be
+explicitly listed as a dependency of that package.
+
+For further information on artifacts, see [Artifacts](@ref).
diff --git a/stdlib/LazyArtifacts/src/LazyArtifacts.jl b/stdlib/LazyArtifacts/src/LazyArtifacts.jl
new file mode 100644
index 00000000000000..b783276ac60818
--- /dev/null
+++ b/stdlib/LazyArtifacts/src/LazyArtifacts.jl
@@ -0,0 +1,15 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module LazyArtifacts
+
+# reexport the Artifacts API
+using Artifacts: Artifacts,
+       artifact_exists, artifact_path, artifact_meta, artifact_hash,
+       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
+export artifact_exists, artifact_path, artifact_meta, artifact_hash,
+       select_downloadable_artifacts, find_artifacts_toml, @artifact_str
+
+# define a function for satisfying lazy Artifact downloads
+using Pkg.Artifacts: ensure_artifact_installed
+
+end
diff --git a/stdlib/LazyArtifacts/test/Artifacts.toml b/stdlib/LazyArtifacts/test/Artifacts.toml
new file mode 100644
index 00000000000000..4b715b74c128b2
--- /dev/null
+++ b/stdlib/LazyArtifacts/test/Artifacts.toml
@@ -0,0 +1,155 @@
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "95fce80ec703eeb5f4270fef6821b38d51387499"
+os = "macos"
+
+    [[HelloWorldC.download]]
+    sha256 = "23f45918421881de8e9d2d471c70f6b99c26edd1dacd7803d2583ba93c8bbb28"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-apple-darwin.tar.gz"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "1ccbaad776766366943fd5a66a8cbc9877ee8df9"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "82bca07ff25a75875936116ca977285160a2afcc4f58dd160c7b1600f55da655"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "aarch64"
+git-tree-sha1 = "dc43ab874611cfc26641741c31b8230276d7d664"
+libc = "musl"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "36b7c554f1cb04d5282b991c66a10b2100085ac8deb2156bf52b4f7c4e406c04"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.aarch64-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "armv6l"
+call_abi = "eabihf"
+git-tree-sha1 = "b7128521583d02d2dbe9c8de6fe156b79df781d9"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "5e094b9c6e4c6a77ecc8dfc2b841ac1f2157f6a81f4c47f1e0d3e9a04eec7945"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-gnueabihf.tar.gz"
+[[HelloWorldC]]
+arch = "armv6l"
+call_abi = "eabihf"
+git-tree-sha1 = "edb3893a154519d6786234f5c83994c34e11feed"
+libc = "musl"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "0a2203f061ba2ef7ce4c452ec7874be3acc6db1efac8091f85d113c3404e6bb6"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv6l-linux-musleabihf.tar.gz"
+[[HelloWorldC]]
+arch = "armv7l"
+call_abi = "eabihf"
+git-tree-sha1 = "5a8288c8a30578c0d0f24a9cded29579517ce7a8"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "a4392a4c8f834c97f9d8822ddfb1813d8674fa602eeaf04d6359c0a9e98478ec"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-gnueabihf.tar.gz"
+[[HelloWorldC]]
+arch = "armv7l"
+call_abi = "eabihf"
+git-tree-sha1 = "169c261b321c4dc95894cdd2db9d0d0caa84677f"
+libc = "musl"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "ed1aacbf197a6c78988725a39defad130ed31a2258f8e7846f73b459821f21d3"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.armv7l-linux-musleabihf.tar.gz"
+[[HelloWorldC]]
+arch = "i686"
+git-tree-sha1 = "fd35f9155dc424602d01fbf983eb76be3217a28f"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "048fcff5ff47a3cc1e84a2688935fcd658ad1c7e7c52c0e81fe88ce6c3697aba"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "i686"
+git-tree-sha1 = "8db14df0f1d2a3ed9c6a7b053a590ca6527eb95e"
+libc = "musl"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "d521b4420392b8365de5ed0ef38a3b6c822665d7c257d3eef6f725c205bb3d78"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "i686"
+git-tree-sha1 = "56f82168947b8dc7bb98038f063209b9f864eaff"
+os = "windows"
+
+    [[HelloWorldC.download]]
+    sha256 = "de578cf5ee2f457e9ff32089cbe17d03704a929980beddf4c41f4c0eb32f19c6"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.i686-w64-mingw32.tar.gz"
+[[HelloWorldC]]
+arch = "powerpc64le"
+git-tree-sha1 = "9c8902b62f5b1aaa7c2839c804bed7c3a0912c7b"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "63ddbfbb6ea0cafef544cc25415e7ebee6ee0a69db0878d0d4e1ed27c0ae0ab5"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.powerpc64le-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "f8ab5a03697f9afc82210d8a2be1d94509aea8bc"
+os = "macos"
+
+    [[HelloWorldC.download]]
+    sha256 = "f5043338613672b12546c59359c7997c5381a9a60b86aeb951dee74de428d5e3"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-apple-darwin.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "1ed3d81088f16e3a1fa4e3d4c4c509b8c117fecf"
+libc = "glibc"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "a18212e7984b08b23bec06e8bf9286a89b9fa2e8ee0dd46af3b852fe22013a4f"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-gnu.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "c04ef757b8bb773d17a0fd0ea396e52db1c7c385"
+libc = "musl"
+os = "linux"
+
+    [[HelloWorldC.download]]
+    sha256 = "7a3d1b09410989508774f00e073ea6268edefcaba7617fc5085255ec8e82555b"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-linux-musl.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "5f7e7abf7d545a1aaa368f22e3e01ea0268870b1"
+os = "freebsd"
+
+    [[HelloWorldC.download]]
+    sha256 = "56aedffe38fe20294e93cfc2eb0a193c8e2ddda5a697b302e77ff48ac1195198"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-unknown-freebsd.tar.gz"
+[[HelloWorldC]]
+arch = "x86_64"
+git-tree-sha1 = "2f1a6d4f82cd1eea785a5141b992423c09491f1b"
+os = "windows"
+
+    [[HelloWorldC.download]]
+    sha256 = "aad77a16cbc9752f6ec62549a28c7e9f3f7f57919f6fa9fb924e0c669b11f8c4"
+    url = "https://github.com/JuliaBinaryWrappers/HelloWorldC_jll.jl/releases/download/HelloWorldC-v1.1.2+0/HelloWorldC.v1.1.2.x86_64-w64-mingw32.tar.gz"
+
+[socrates]
+git-tree-sha1 = "43563e7631a7eafae1f9f8d9d332e3de44ad7239"
+lazy = true
+
+    [[socrates.download]]
+    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.gz"
+    sha256 = "e65d2f13f2085f2c279830e863292312a72930fee5ba3c792b14c33ce5c5cc58"
+
+    [[socrates.download]]
+    url = "https://github.com/staticfloat/small_bin/raw/master/socrates.tar.bz2"
+    sha256 = "13fc17b97be41763b02cbb80e9d048302cec3bd3d446c2ed6e8210bddcd3ac76"
diff --git a/stdlib/LazyArtifacts/test/runtests.jl b/stdlib/LazyArtifacts/test/runtests.jl
new file mode 100644
index 00000000000000..53898082cd3467
--- /dev/null
+++ b/stdlib/LazyArtifacts/test/runtests.jl
@@ -0,0 +1,31 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using LazyArtifacts
+using Test
+
+mktempdir() do tempdir
+    LazyArtifacts.Artifacts.with_artifacts_directory(tempdir) do
+        redirect_stderr(devnull) do
+            socrates_dir = artifact"socrates"
+            @test isdir(socrates_dir)
+        end
+        ex = @test_throws ErrorException artifact"HelloWorldC"
+        @test startswith(ex.value.msg, "Artifact \"HelloWorldC\" was not installed correctly. ")
+    end
+end
+
+# Need to set depwarn flag before testing deprecations
+@test success(run(setenv(`$(Base.julia_cmd()) --depwarn=no --startup-file=no -e '
+    using Artifacts, Pkg
+    using Test
+    mktempdir() do tempdir
+        Artifacts.with_artifacts_directory(tempdir) do
+            redirect_stderr(devnull) do
+                socrates_dir = @test_logs(
+                        (:warn, "using Pkg instead of using LazyArtifacts is deprecated"),
+                        artifact"socrates")
+                @test isdir(socrates_dir)
+            end
+        end
+    end'`,
+    dir=@__DIR__)))
diff --git a/stdlib/LibCURL.version b/stdlib/LibCURL.version
index 7d0ba888bac0bf..715ca76a40cdf2 100644
--- a/stdlib/LibCURL.version
+++ b/stdlib/LibCURL.version
@@ -1,2 +1,4 @@
 LIBCURL_BRANCH = master
-LIBCURL_SHA1 = 70aa2cc3f8fa4488897136a0a700b5425bfffdfa
+LIBCURL_SHA1 = fd8af649b38ae20c3ff7f5dca53753512ca00376
+LIBCURL_GIT_URL := https://github.com/JuliaWeb/LibCURL.jl.git
+LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
diff --git a/stdlib/LibCURL_jll/Project.toml b/stdlib/LibCURL_jll/Project.toml
index 00db12fa7abbfe..e4da34909a7eb7 100644
--- a/stdlib/LibCURL_jll/Project.toml
+++ b/stdlib/LibCURL_jll/Project.toml
@@ -1,11 +1,20 @@
 name = "LibCURL_jll"
 uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
+version = "7.81.0+0"
 
 [deps]
+LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+nghttp2_jll = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+Zlib_jll = "83775a58-1f1d-513f-b197-d71354ab007a"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
 test = ["Test"]
+
+[compat]
+julia = "1.8"
diff --git a/stdlib/LibCURL_jll/src/LibCURL_jll.jl b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
index 6692565fc03f14..0911e686786571 100644
--- a/stdlib/LibCURL_jll/src/LibCURL_jll.jl
+++ b/stdlib/LibCURL_jll/src/LibCURL_jll.jl
@@ -2,31 +2,45 @@
 
 ## dummy stub for https://github.com/JuliaBinaryWrappers/LibCURL_jll.jl
 
-module LibCURL_jll
+baremodule LibCURL_jll
+using Base, Libdl, nghttp2_jll
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
 
-using Libdl
+const PATH_list = String[]
+const LIBPATH_list = String[]
 
 export libcurl
 
 # These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
 libcurl_handle = C_NULL
 libcurl_path = ""
 
 if Sys.iswindows()
-    const libnghttp2 = "libnghttp2-14.dll"
     const libcurl = "libcurl-4.dll"
 elseif Sys.isapple()
-    const libnghttp2 = "libnghttp2.14.dylib"
-    const libcurl = "libcurl.4.dylib"
+    const libcurl = "@rpath/libcurl.4.dylib"
 else
-    const libnghttp2 = "libnghttp2.so"
     const libcurl = "libcurl.so"
 end
 
 function __init__()
-    dlopen(libnghttp2)
     global libcurl_handle = dlopen(libcurl)
     global libcurl_path = dlpath(libcurl_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libcurl_path)
+    push!(LIBPATH_list, LIBPATH[])
 end
 
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libcurl_path() = libcurl_path
+
 end  # module LibCURL_jll
diff --git a/stdlib/LibCURL_jll/test/runtests.jl b/stdlib/LibCURL_jll/test/runtests.jl
index 0e762827d346e5..2ef1be4dd85b63 100644
--- a/stdlib/LibCURL_jll/test/runtests.jl
+++ b/stdlib/LibCURL_jll/test/runtests.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 using LibCURL_jll
 
diff --git a/stdlib/LibGit2/Project.toml b/stdlib/LibGit2/Project.toml
index e0100f642c77b2..da78f70fa1005e 100644
--- a/stdlib/LibGit2/Project.toml
+++ b/stdlib/LibGit2/Project.toml
@@ -2,13 +2,16 @@ name = "LibGit2"
 uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
 
 [deps]
+Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+NetworkOptions = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
 Sockets = "6462fe0b-24de-5631-8697-dd941f90decc"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
 test = ["Test", "Random", "Serialization", "Sockets"]
diff --git a/stdlib/LibGit2/src/LibGit2.jl b/stdlib/LibGit2/src/LibGit2.jl
index e4ea76560b1a04..5970ae19359bfd 100644
--- a/stdlib/LibGit2/src/LibGit2.jl
+++ b/stdlib/LibGit2/src/LibGit2.jl
@@ -7,7 +7,10 @@ module LibGit2
 
 import Base: ==
 using Base: something, notnothing
+using Base64: base64decode
+using NetworkOptions
 using Printf: @printf
+using SHA: sha1, sha256
 
 export with, GitRepo, GitConfig
 
@@ -958,53 +961,66 @@ end
 
 ## lazy libgit2 initialization
 
+const ENSURE_INITIALIZED_LOCK = ReentrantLock()
+
+@noinline function throw_negative_refcount_error(x::Int)
+    error("Negative LibGit2 REFCOUNT $x\nThis shouldn't happen, please file a bug report!")
+end
+
 function ensure_initialized()
-    x = Threads.atomic_cas!(REFCOUNT, 0, 1)
-    if x < 0
-        negative_refcount_error(x)::Union{}
-    end
-    if x == 0
-        initialize()
+    lock(ENSURE_INITIALIZED_LOCK) do
+        x = Threads.atomic_cas!(REFCOUNT, 0, 1)
+        x > 0 && return
+        x < 0 && throw_negative_refcount_error(x)
+        try initialize()
+        catch
+            Threads.atomic_sub!(REFCOUNT, 1)
+            @assert REFCOUNT[] == 0
+            rethrow()
+        end
     end
     return nothing
 end
 
-@noinline function negative_refcount_error(x::Int)
-    error("Negative LibGit2 REFCOUNT $x\nThis shouldn't happen, please file a bug report!")
-end
-
 @noinline function initialize()
     @check ccall((:git_libgit2_init, :libgit2), Cint, ())
 
+    cert_loc = NetworkOptions.ca_roots()
+    cert_loc !== nothing && set_ssl_cert_locations(cert_loc)
+
     atexit() do
         # refcount zero, no objects to be finalized
         if Threads.atomic_sub!(REFCOUNT, 1) == 1
             ccall((:git_libgit2_shutdown, :libgit2), Cint, ())
         end
     end
-
-    # Look for OpenSSL env variable for CA bundle (linux only)
-    # windows and macOS use the OS native security backends
-    @static if Sys.islinux()
-        cert_loc = if "SSL_CERT_DIR" in keys(ENV)
-            ENV["SSL_CERT_DIR"]
-        elseif "SSL_CERT_FILE" in keys(ENV)
-            ENV["SSL_CERT_FILE"]
-        else
-            # If we have a bundled ca cert file, point libgit2 at that so SSL connections work.
-            abspath(ccall(:jl_get_julia_bindir, Any, ()), Base.DATAROOTDIR, "julia", "cert.pem")
-        end
-        set_ssl_cert_locations(cert_loc)
-    end
 end
 
 function set_ssl_cert_locations(cert_loc)
-    cert_file = isfile(cert_loc) ? cert_loc : Cstring(C_NULL)
-    cert_dir  = isdir(cert_loc) ? cert_loc : Cstring(C_NULL)
-    cert_file == C_NULL && cert_dir == C_NULL && return
-    @check ccall((:git_libgit2_opts, :libgit2), Cint,
-          (Cint, Cstring...),
-          Cint(Consts.SET_SSL_CERT_LOCATIONS), cert_file, cert_dir)
+    cert_file = cert_dir = Cstring(C_NULL)
+    if isdir(cert_loc) # directories
+        cert_dir = cert_loc
+    else # files, /dev/null, non-existent paths, etc.
+        cert_file = cert_loc
+    end
+    ret = @ccall "libgit2".git_libgit2_opts(
+        Consts.SET_SSL_CERT_LOCATIONS::Cint;
+        cert_file::Cstring,
+        cert_dir::Cstring)::Cint
+    ret >= 0 && return ret
+    err = Error.GitError(ret)
+    err.class == Error.SSL &&
+        err.msg == "TLS backend doesn't support certificate locations" ||
+        throw(err)
+    var = nothing
+    for v in NetworkOptions.CA_ROOTS_VARS
+        haskey(ENV, v) && (var = v)
+    end
+    @assert var !== nothing # otherwise we shouldn't be here
+    msg = """
+    Your Julia is built with a SSL/TLS engine that libgit2 doesn't know how to configure to use a file or directory of certificate authority roots, but your environment specifies one via the $var variable. If you believe your system's root certificates are safe to use, you can `export JULIA_SSL_CA_ROOTS_PATH=""` in your environment to use those instead.
+    """
+    throw(Error.GitError(err.class, err.code, chomp(msg)))
 end
 
 end # module
diff --git a/stdlib/LibGit2/src/callbacks.jl b/stdlib/LibGit2/src/callbacks.jl
index 634c841e61e8ba..5da032d3143e29 100644
--- a/stdlib/LibGit2/src/callbacks.jl
+++ b/stdlib/LibGit2/src/callbacks.jl
@@ -275,8 +275,8 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
         p.url = unsafe_string(url_ptr)
         m = match(URL_REGEX, p.url)
 
-        p.scheme = something(m[:scheme], "")
-        p.username = something(m[:user], "")
+        p.scheme = something(m[:scheme], SubString(""))
+        p.username = something(m[:user], SubString(""))
         p.host = m[:host]
 
         # When an explicit credential is supplied we will make sure to use the given
@@ -287,7 +287,8 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
             cred = p.explicit
 
             # Copy explicit credentials to avoid mutating approved credentials.
-            p.credential = deepcopy(cred)
+            # invalidation fix from cred being non-inferrable
+            p.credential = Base.invokelatest(deepcopy, cred)
 
             if isa(cred, SSHCredential)
                 allowed_types &= Cuint(Consts.CREDTYPE_SSH_KEY)
@@ -301,7 +302,8 @@ function credentials_callback(libgit2credptr::Ptr{Ptr{Cvoid}}, url_ptr::Cstring,
 
             # Perform a deepcopy as we do not want to mutate approved cached credentials
             if haskey(p.cache, cred_id)
-                p.credential = deepcopy(p.cache[cred_id])
+                # invalidation fix from p.cache[cred_id] being non-inferrable
+                p.credential = Base.invokelatest(deepcopy, p.cache[cred_id])
             end
         end
 
@@ -357,9 +359,151 @@ function fetchhead_foreach_callback(ref_name::Cstring, remote_url::Cstring,
     return Cint(0)
 end
 
+struct CertHostKey
+    parent  :: Cint
+    mask    :: Cint
+    md5     :: NTuple{16,UInt8}
+    sha1    :: NTuple{20,UInt8}
+    sha256  :: NTuple{32,UInt8}
+    type    :: Cint
+    hostkey :: Ptr{Cchar}
+    len     :: Csize_t
+end
+
+function verify_host_error(message::AbstractString)
+    printstyled(stderr, "$message\n", color = :cyan, bold = true)
+end
+
+function certificate_callback(
+    cert_p :: Ptr{CertHostKey},
+    valid  :: Cint,
+    host_p :: Ptr{Cchar},
+    data_p :: Ptr{Cvoid},
+)::Cint
+    valid != 0 && return Consts.CERT_ACCEPT
+    host = unsafe_string(host_p)
+    cert_type = unsafe_load(convert(Ptr{Cint}, cert_p))
+    transport = cert_type == Consts.CERT_TYPE_TLS ? "TLS" :
+                cert_type == Consts.CERT_TYPE_SSH ? "SSH" : nothing
+    if !NetworkOptions.verify_host(host, transport)
+        # user has opted out of host verification
+        return Consts.CERT_ACCEPT
+    end
+    if transport == "TLS"
+        # TLS verification is done before the callback and indicated with the
+        # incoming `valid` flag, so if we get here then host verification failed
+        verify_host_error("TLS host verification: the identity of the server `$host` could not be verified. Someone could be trying to man-in-the-middle your connection. It is also possible that the correct server is using an invalid certificate or that your system's certificate authority root store is misconfigured.")
+        return Consts.CERT_REJECT
+    elseif transport == "SSH"
+        # SSH verification has to be done here
+        files = NetworkOptions.ssh_known_hosts_files()
+        cert = unsafe_load(cert_p)
+        check = ssh_knownhost_check(files, host, cert)
+        valid = false
+        if check == Consts.LIBSSH2_KNOWNHOST_CHECK_MATCH
+            valid = true
+        elseif check == Consts.LIBSSH2_KNOWNHOST_CHECK_NOTFOUND
+            if Sys.which("ssh-keyscan") !== nothing
+                msg = "Please run `ssh-keyscan $host >> $(files[1])` in order to add the server to your known hosts file and then try again."
+            else
+                msg = "Please connect once using `ssh $host` in order to add the server to your known hosts file and then try again. You may not be allowed to log in (wrong user and/or no login allowed), but ssh will prompt you to add a host key for the server which will allow libgit2 to verify the server."
+            end
+            verify_host_error("SSH host verification: the server `$host` is not a known host. $msg")
+        elseif check == Consts.LIBSSH2_KNOWNHOST_CHECK_MISMATCH
+            verify_host_error("SSH host verification: the identity of the server `$host` does not match its known hosts record. Someone could be trying to man-in-the-middle your connection. It is also possible that the server has changed its key, in which case you should check with the server administrator and if they confirm that the key has been changed, update your known hosts file.")
+        else
+            @error("unexpected SSH known host check result", check)
+        end
+        return valid ? Consts.CERT_ACCEPT : Consts.CERT_REJECT
+    end
+    @error("unexpected transport encountered, refusing to validate", cert_type)
+    return Consts.CERT_REJECT
+end
+
+struct KnownHost
+    magic :: Cuint
+    node  :: Ptr{Cvoid}
+    name  :: Ptr{Cchar}
+    key   :: Ptr{Cchar}
+    type  :: Cint
+end
+
+function ssh_knownhost_check(
+    files :: AbstractVector{<:AbstractString},
+    host  :: AbstractString,
+    cert  :: CertHostKey,
+)
+    key = unsafe_wrap(Array, cert.hostkey, cert.len)
+    return ssh_knownhost_check(files, host, key)
+end
+
+function ssh_knownhost_check(
+    files :: AbstractVector{<:AbstractString},
+    host  :: AbstractString,
+    key   :: Vector{Cchar},
+)
+    if (m = match(r"^(.+):(\d+)$", host)) !== nothing
+        host = m.captures[1]
+        port = parse(Int, m.captures[2])
+    else
+        port = 22 # default SSH port
+    end
+    len = length(key)
+    mask = Consts.LIBSSH2_KNOWNHOST_TYPE_PLAIN |
+           Consts.LIBSSH2_KNOWNHOST_KEYENC_RAW
+    session = @ccall "libssh2".libssh2_session_init_ex(
+        C_NULL :: Ptr{Cvoid},
+        C_NULL :: Ptr{Cvoid},
+        C_NULL :: Ptr{Cvoid},
+        C_NULL :: Ptr{Cvoid},
+    ) :: Ptr{Cvoid}
+    for file in files
+        ispath(file) || continue
+        hosts = @ccall "libssh2".libssh2_knownhost_init(
+            session :: Ptr{Cvoid},
+        ) :: Ptr{Cvoid}
+        count = @ccall "libssh2".libssh2_knownhost_readfile(
+            hosts :: Ptr{Cvoid},
+            file  :: Cstring,
+            1     :: Cint, # standard OpenSSH format
+        ) :: Cint
+        if count < 0
+            @warn("Error parsing SSH known hosts file `$file`")
+            @ccall "libssh2".libssh2_knownhost_free(hosts::Ptr{Cvoid})::Cvoid
+            continue
+        end
+        check = @ccall "libssh2".libssh2_knownhost_checkp(
+            hosts  :: Ptr{Cvoid},
+            host   :: Cstring,
+            port   :: Cint,
+            key    :: Ptr{Cchar},
+            len    :: Csize_t,
+            mask   :: Cint,
+            C_NULL :: Ptr{Ptr{KnownHost}},
+        ) :: Cint
+        if check == Consts.LIBSSH2_KNOWNHOST_CHECK_MATCH ||
+            check == Consts.LIBSSH2_KNOWNHOST_CHECK_MISMATCH
+            @ccall "libssh2".libssh2_knownhost_free(hosts::Ptr{Cvoid})::Cvoid
+            @assert 0 == @ccall "libssh2".libssh2_session_free(session::Ptr{Cvoid})::Cint
+            return check
+        else
+            @ccall "libssh2".libssh2_knownhost_free(hosts::Ptr{Cvoid})::Cvoid
+            if check == Consts.LIBSSH2_KNOWNHOST_CHECK_FAILURE
+                @warn("Error searching SSH known hosts file `$file`")
+            end
+            continue
+        end
+    end
+    # name not found in any known hosts files
+    @assert 0 == @ccall "libssh2".libssh2_session_free(session::Ptr{Cvoid})::Cint
+    return Consts.LIBSSH2_KNOWNHOST_CHECK_NOTFOUND
+end
+
 "C function pointer for `mirror_callback`"
 mirror_cb() = @cfunction(mirror_callback, Cint, (Ptr{Ptr{Cvoid}}, Ptr{Cvoid}, Cstring, Cstring, Ptr{Cvoid}))
 "C function pointer for `credentials_callback`"
 credentials_cb() = @cfunction(credentials_callback, Cint, (Ptr{Ptr{Cvoid}}, Cstring, Cstring, Cuint, Any))
 "C function pointer for `fetchhead_foreach_callback`"
 fetchhead_foreach_cb() = @cfunction(fetchhead_foreach_callback, Cint, (Cstring, Cstring, Ptr{GitHash}, Cuint, Any))
+"C function pointer for `certificate_callback`"
+certificate_cb() = @cfunction(certificate_callback, Cint, (Ptr{CertHostKey}, Cint, Ptr{Cchar}, Ptr{Cvoid}))
diff --git a/stdlib/LibGit2/src/consts.jl b/stdlib/LibGit2/src/consts.jl
index 2aaa3e15a1d631..2bc9edaf8950bd 100644
--- a/stdlib/LibGit2/src/consts.jl
+++ b/stdlib/LibGit2/src/consts.jl
@@ -308,6 +308,34 @@ const STATUS_OPT_UPDATE_INDEX                     = Cuint(1 << 13)
 const STATUS_OPT_INCLUDE_UNREADABLE               = Cuint(1 << 14)
 const STATUS_OPT_INCLUDE_UNREADABLE_AS_UNTRACKED  = Cuint(1 << 15)
 
+# certificate types from `enum git_cert_t` in `cert.h`.
+const CERT_TYPE_TLS = 1 # GIT_CERT_X509
+const CERT_TYPE_SSH = 2 # GIT_CERT_HOSTKEY_LIBSSH2
+
+# certificate callback return values
+const PASSTHROUGH = -30
+const CERT_REJECT = -1
+const CERT_ACCEPT =  0
+
+# certificate hash flags
+const CERT_SSH_MD5    = 1 << 0
+const CERT_SSH_SHA1   = 1 << 1
+const CERT_SSH_SHA256 = 1 << 2
+
+# libssh2 known host constants
+const LIBSSH2_KNOWNHOST_TYPE_PLAIN  = 1
+const LIBSSH2_KNOWNHOST_TYPE_SHA1   = 2
+const LIBSSH2_KNOWNHOST_TYPE_CUSTOM = 3
+
+const LIBSSH2_KNOWNHOST_KEYENC_RAW    = 1 << 16
+const LIBSSH2_KNOWNHOST_KEYENC_BASE64 = 2 << 16
+
+# libssh2 host check return values
+const LIBSSH2_KNOWNHOST_CHECK_MATCH    = 0
+const LIBSSH2_KNOWNHOST_CHECK_MISMATCH = 1
+const LIBSSH2_KNOWNHOST_CHECK_NOTFOUND = 2
+const LIBSSH2_KNOWNHOST_CHECK_FAILURE  = 3
+
 @enum(GIT_SUBMODULE_IGNORE, SUBMODULE_IGNORE_UNSPECIFIED  = -1, # use the submodule's configuration
                             SUBMODULE_IGNORE_NONE         = 1,  # any change or untracked == dirty
                             SUBMODULE_IGNORE_UNTRACKED    = 2,  # dirty if tracked files change
diff --git a/stdlib/LibGit2/src/error.jl b/stdlib/LibGit2/src/error.jl
index 3e56999d5a52c9..d742cde1605b83 100644
--- a/stdlib/LibGit2/src/error.jl
+++ b/stdlib/LibGit2/src/error.jl
@@ -77,7 +77,7 @@ end
 struct GitError <: Exception
     class::Class
     code::Code
-    msg::AbstractString
+    msg::String
 end
 Base.show(io::IO, err::GitError) = print(io, "GitError(Code:$(err.code), Class:$(err.class), $(err.msg))")
 
@@ -95,8 +95,8 @@ function last_error()
     return (err_class, err_msg)
 end
 
-function GitError(code::Integer)
-    err_code = Code(code)
+GitError(err_code::Integer) = GitError(Code(err_code))
+function GitError(err_code::Code)
     err_class, err_msg = last_error()
     return GitError(err_class, err_code, err_msg)
 end
diff --git a/stdlib/LibGit2/src/gitcredential.jl b/stdlib/LibGit2/src/gitcredential.jl
index 0a442337531a79..1b97c29cd933e6 100644
--- a/stdlib/LibGit2/src/gitcredential.jl
+++ b/stdlib/LibGit2/src/gitcredential.jl
@@ -30,7 +30,12 @@ function GitCredential(cfg::GitConfig, url::AbstractString)
     fill!(cfg, parse(GitCredential, url))
 end
 
-GitCredential(cred::UserPasswordCredential, url::AbstractString) = parse(GitCredential, url)
+function GitCredential(user_pass_cred::UserPasswordCredential, url::AbstractString)
+    cred = parse(GitCredential, url)
+    cred.username = user_pass_cred.user
+    cred.password = deepcopy(user_pass_cred.pass)
+    return cred
+end
 
 Base.:(==)(c1::GitCredential, c2::GitCredential) = (c1.protocol, c1.host, c1.path, c1.username, c1.password, c1.use_http_path) ==
                                                    (c2.protocol, c2.host, c2.path, c2.username, c2.password, c2.use_http_path)
diff --git a/stdlib/LibGit2/src/rebase.jl b/stdlib/LibGit2/src/rebase.jl
index 8151217b3950bc..51b52ef006c382 100644
--- a/stdlib/LibGit2/src/rebase.jl
+++ b/stdlib/LibGit2/src/rebase.jl
@@ -83,7 +83,7 @@ function commit(rb::GitRebase, sig::GitSignature)
                       oid_ptr, rb.ptr, C_NULL, sig.ptr, C_NULL, C_NULL)
     catch err
         # TODO: return current HEAD instead
-        err.code === Error.EAPPLIED && return nothing
+        err isa GitError && err.code === Error.EAPPLIED && return nothing
         rethrow()
     end
     return oid_ptr[]
diff --git a/stdlib/LibGit2/src/types.jl b/stdlib/LibGit2/src/types.jl
index d43ca9c90dab68..2d95596cb276d9 100644
--- a/stdlib/LibGit2/src/types.jl
+++ b/stdlib/LibGit2/src/types.jl
@@ -192,7 +192,7 @@ The fields represent:
     perfdata_cb::Ptr{Cvoid}      = C_NULL
     perfdata_payload::Any        = Nothing
 end
-@assert CheckoutOptions.isinlinealloc
+@assert Base.allocatedinline(CheckoutOptions)
 
 """
     LibGit2.TransferProgress
@@ -209,7 +209,7 @@ Matches the [`git_indexer_progress`](https://libgit2.org/libgit2/#HEAD/type/git_
     indexed_deltas::Cuint   = Cuint(0)
     received_bytes::Csize_t = Csize_t(0)
 end
-@assert TransferProgress.isinlinealloc
+@assert Base.allocatedinline(TransferProgress)
 
 """
     LibGit2.RemoteCallbacks
@@ -222,7 +222,7 @@ Matches the [`git_remote_callbacks`](https://libgit2.org/libgit2/#HEAD/type/git_
     sideband_progress::Ptr{Cvoid}      = C_NULL
     completion::Ptr{Cvoid}             = C_NULL
     credentials::Ptr{Cvoid}            = C_NULL
-    certificate_check::Ptr{Cvoid}      = C_NULL
+    certificate_check::Ptr{Cvoid}      = certificate_cb()
     transfer_progress::Ptr{Cvoid}      = C_NULL
     update_tips::Ptr{Cvoid}            = C_NULL
     pack_progress::Ptr{Cvoid}          = C_NULL
@@ -230,12 +230,15 @@ Matches the [`git_remote_callbacks`](https://libgit2.org/libgit2/#HEAD/type/git_
     push_update_reference::Ptr{Cvoid}  = C_NULL
     push_negotiation::Ptr{Cvoid}       = C_NULL
     transport::Ptr{Cvoid}              = C_NULL
+    @static if LibGit2.VERSION >= v"1.2.0"
+        remote_ready::Ptr{Cvoid}       = C_NULL
+    end
     payload::Any                       = nothing
     @static if LibGit2.VERSION >= v"0.99.0"
         resolve_url::Ptr{Cvoid}        = C_NULL
     end
 end
-@assert RemoteCallbacks.isinlinealloc
+@assert Base.allocatedinline(RemoteCallbacks)
 
 """
     LibGit2.Callbacks
@@ -248,7 +251,7 @@ distinct payload. Each callback, when called, will receive `Dict` which will hol
 callback's custom payload which can be accessed using the callback name.
 
 # Examples
-```julia
+```julia-repl
 julia> c = LibGit2.Callbacks(:credentials => (LibGit2.credentials_cb(), LibGit2.CredentialPayload()));
 
 julia> LibGit2.clone(url, callbacks=c);
@@ -310,10 +313,10 @@ julia> fetch(remote, "master", options=fo)
     proxytype::Consts.GIT_PROXY  = Consts.PROXY_AUTO
     url::Cstring                 = Cstring(C_NULL)
     credential_cb::Ptr{Cvoid}    = C_NULL
-    certificate_cb::Ptr{Cvoid}   = C_NULL
+    certificate_cb::Ptr{Cvoid}   = certificate_cb()
     payload::Any                 = nothing
 end
-@assert ProxyOptions.isinlinealloc
+@assert Base.allocatedinline(ProxyOptions)
 
 """
     LibGit2.FetchOptions
@@ -347,7 +350,7 @@ The fields represent:
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
 end
-@assert FetchOptions.isinlinealloc
+@assert Base.allocatedinline(FetchOptions)
 
 
 """
@@ -384,7 +387,7 @@ The fields represent:
     remote_cb::Ptr{Cvoid}               = C_NULL
     remote_cb_payload::Any              = nothing
 end
-@assert CloneOptions.isinlinealloc
+@assert Base.allocatedinline(CloneOptions)
 
 """
     LibGit2.DiffOptionsStruct
@@ -438,7 +441,7 @@ The fields represent:
     old_prefix::Cstring                      = Cstring(C_NULL)
     new_prefix::Cstring                      = Cstring(C_NULL)
 end
-@assert DiffOptionsStruct.isinlinealloc
+@assert Base.allocatedinline(DiffOptionsStruct)
 
 """
     LibGit2.DescribeOptions
@@ -468,7 +471,7 @@ The fields represent:
     only_follow_first_parent::Cint    = Cint(0)
     show_commit_oid_as_fallback::Cint = Cint(0)
 end
-@assert DescribeOptions.isinlinealloc
+@assert Base.allocatedinline(DescribeOptions)
 
 """
     LibGit2.DescribeFormatOptions
@@ -487,7 +490,7 @@ The fields represent:
     always_use_long_format::Cint = Cint(0)
     dirty_suffix::Cstring        = Cstring(C_NULL)
 end
-@assert DescribeFormatOptions.isinlinealloc
+@assert Base.allocatedinline(DescribeFormatOptions)
 
 """
     LibGit2.DiffFile
@@ -520,7 +523,7 @@ end
 
 function Base.show(io::IO, df::DiffFile)
     println(io, "DiffFile:")
-    println(io, "Oid: $(df.id))")
+    println(io, "Oid: $(df.id)")
     println(io, "Path: $(df.path)")
     println(io, "Size: $(df.size)")
 end
@@ -617,7 +620,7 @@ The fields represent:
     file_favor::GIT_MERGE_FILE_FAVOR  = Consts.MERGE_FILE_FAVOR_NORMAL
     file_flags::GIT_MERGE_FILE        = Consts.MERGE_FILE_DEFAULT
 end
-@assert MergeOptions.isinlinealloc
+@assert Base.allocatedinline(MergeOptions)
 
 """
     LibGit2.BlameOptions
@@ -647,7 +650,7 @@ The fields represent:
     min_line::Csize_t                 = Csize_t(1)
     max_line::Csize_t                 = Csize_t(0)
 end
-@assert BlameOptions.isinlinealloc
+@assert Base.allocatedinline(BlameOptions)
 
 
 """
@@ -678,7 +681,7 @@ The fields represent:
         custom_headers::StrArrayStruct = StrArrayStruct()
     end
 end
-@assert PushOptions.isinlinealloc
+@assert Base.allocatedinline(PushOptions)
 
 
 """
@@ -701,7 +704,7 @@ The fields represent:
     merge_opts::MergeOptions = MergeOptions()
     checkout_opts::CheckoutOptions = CheckoutOptions()
 end
-@assert CherrypickOptions.isinlinealloc
+@assert Base.allocatedinline(CherrypickOptions)
 
 
 """
@@ -771,7 +774,7 @@ The fields represent:
     end
     checkout_opts::CheckoutOptions = CheckoutOptions()
 end
-@assert RebaseOptions.isinlinealloc
+@assert Base.allocatedinline(RebaseOptions)
 
 """
     LibGit2.RebaseOperation
@@ -834,7 +837,7 @@ The fields represent:
         baseline::Ptr{Cvoid} = C_NULL
     end
 end
-@assert StatusOptions.isinlinealloc
+@assert Base.allocatedinline(StatusOptions)
 
 """
     LibGit2.StatusEntry
@@ -902,7 +905,7 @@ Matches the [`git_config_entry`](https://libgit2.org/libgit2/#HEAD/type/git_conf
     free::Ptr{Cvoid}    = C_NULL
     payload::Any        = nothing
 end
-@assert ConfigEntry.isinlinealloc
+@assert Base.allocatedinline(ConfigEntry)
 
 function Base.show(io::IO, ce::ConfigEntry)
     print(io, "ConfigEntry(\"", unsafe_string(ce.name), "\", \"", unsafe_string(ce.value), "\")")
@@ -1136,7 +1139,7 @@ The fields represent:
 
     boundary::Char                        = '\0'
 end
-@assert BlameHunk.isinlinealloc
+@assert Base.allocatedinline(BlameHunk)
 
 """
     with(f::Function, obj)
diff --git a/stdlib/LibGit2/src/walker.jl b/stdlib/LibGit2/src/walker.jl
index e977590eafb884..468e6899a7aa84 100644
--- a/stdlib/LibGit2/src/walker.jl
+++ b/stdlib/LibGit2/src/walker.jl
@@ -5,17 +5,17 @@
 
 A `GitRevWalker` *walks* through the *revisions* (i.e. commits) of
 a git repository `repo`. It is a collection of the commits
-in the repository, and supports iteration and calls to [`map`](@ref LibGit2.map)
-and [`count`](@ref LibGit2.count) (for instance, `count` could be used to determine
+in the repository, and supports iteration and calls to [`LibGit2.map`](@ref)
+and [`LibGit2.count`](@ref) (for instance, `LibGit2.count` could be used to determine
 what percentage of commits in a repository were made by a certain
 author).
 
 ```julia
 cnt = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-    count((oid,repo)->(oid == commit_oid1), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME)
+    LibGit2.count((oid,repo)->(oid == commit_oid1), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME)
 end
 ```
-Here, `count` finds the number of commits along the walk with a certain `GitHash`.
+Here, `LibGit2.count` finds the number of commits along the walk with a certain `GitHash`.
 Since the `GitHash` is unique to a commit, `cnt` will be `1`.
 """
 function GitRevWalker(repo::GitRepo)
@@ -60,7 +60,7 @@ end
 
 Start the [`GitRevWalker`](@ref) `walker` at commit `cid`. This function can be used
 to apply a function to all commits since a certain year, by passing the first commit
-of that year as `cid` and then passing the resulting `w` to [`map`](@ref LibGit2.map).
+of that year as `cid` and then passing the resulting `w` to [`LibGit2.map`](@ref).
 """
 function push!(w::GitRevWalker, cid::GitHash)
     ensure_initialized()
@@ -104,7 +104,7 @@ oids = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
     LibGit2.map((oid, repo)->string(oid), walker, by=LibGit2.Consts.SORT_TIME)
 end
 ```
-Here, `map` visits each commit using the `GitRevWalker` and finds its `GitHash`.
+Here, `LibGit2.map` visits each commit using the `GitRevWalker` and finds its `GitHash`.
 """
 function map(f::Function, walker::GitRevWalker;
                   oid::GitHash=GitHash(),
@@ -146,10 +146,10 @@ are:
 # Examples
 ```julia
 cnt = LibGit2.with(LibGit2.GitRevWalker(repo)) do walker
-    count((oid, repo)->(oid == commit_oid1), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME)
+    LibGit2.count((oid, repo)->(oid == commit_oid1), walker, oid=commit_oid1, by=LibGit2.Consts.SORT_TIME)
 end
 ```
-`count` finds the number of commits along the walk with a certain `GitHash` `commit_oid1`, starting
+`LibGit2.count` finds the number of commits along the walk with a certain `GitHash` `commit_oid1`, starting
 the walk from that commit and moving forwards in time from it. Since the `GitHash` is unique to
 a commit, `cnt` will be `1`.
 """
diff --git a/stdlib/LibGit2/test/bad_ca_roots.jl b/stdlib/LibGit2/test/bad_ca_roots.jl
new file mode 100644
index 00000000000000..4882065167bdb3
--- /dev/null
+++ b/stdlib/LibGit2/test/bad_ca_roots.jl
@@ -0,0 +1,55 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module Test_LibGit2_https
+
+using Test, LibGit2, NetworkOptions
+
+# we currently use system SSL/TLS on macOS and Windows platforms
+# and libgit2 cannot set the CA roots path on those systems
+# if that changes, this may need to be adjusted
+const CAN_SET_CA_ROOTS_PATH = !Sys.isapple() && !Sys.iswindows()
+
+# Given this is a sub-processed test file, not using @testsets avoids
+# leaking the report print into the Base test runner report
+begin # empty CA roots file
+    # these fail for different reasons on different platforms:
+    # - on Apple & Windows you cannot set the CA roots path location
+    # - on Linux & FreeBSD you you can but these are invalid files
+    ENV["JULIA_SSL_CA_ROOTS_PATH"] = "/dev/null"
+    @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+    ENV["JULIA_SSL_CA_ROOTS_PATH"] = tempname()
+    @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+    # test that it still fails if called a second time
+    @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+    if !CAN_SET_CA_ROOTS_PATH
+        # test that this doesn't work on macOS & Windows
+        ENV["JULIA_SSL_CA_ROOTS_PATH"] = NetworkOptions.bundled_ca_roots()
+        @test_throws LibGit2.GitError LibGit2.ensure_initialized()
+        delete!(ENV, "JULIA_SSL_CA_ROOTS_PATH")
+        @test LibGit2.ensure_initialized() === nothing
+    end
+end
+
+if CAN_SET_CA_ROOTS_PATH
+    begin # non-empty but bad CA roots file
+        # should still be possible to initialize
+        ENV["JULIA_SSL_CA_ROOTS_PATH"] = joinpath(@__DIR__, "bad_ca_roots.pem")
+        @test LibGit2.ensure_initialized() === nothing
+    end
+    mktempdir() do dir
+        repo_url = "https://github.com/JuliaLang/Example.jl"
+        begin # HTTPS clone with bad CA roots fails
+            repo_path = joinpath(dir, "Example.HTTPS")
+            c = LibGit2.CredentialPayload(allow_prompt=false, allow_git_helpers=false)
+            redirect_stderr(devnull)
+            err = try LibGit2.clone(repo_url, repo_path, credentials=c)
+            catch err
+                err
+            end
+            @test err isa LibGit2.GitError
+            @test err.msg == "user rejected certificate for github.com"
+        end
+    end
+end
+
+end # module
diff --git a/stdlib/LibGit2/test/bad_ca_roots.pem b/stdlib/LibGit2/test/bad_ca_roots.pem
new file mode 100644
index 00000000000000..36ca4150efaf09
--- /dev/null
+++ b/stdlib/LibGit2/test/bad_ca_roots.pem
@@ -0,0 +1,22 @@
+-----BEGIN CERTIFICATE-----
+MIIDtDCCApwCCQDeWk9ywtjrpTANBgkqhkiG9w0BAQsFADCBmzELMAkGA1UEBhMC
+VVMxETAPBgNVBAgMCE5ldyBZb3JrMREwDwYDVQQHDAhOZXcgWW9yazEnMCUGA1UE
+CgweVGhlIEp1bGlhIFByb2dyYW1taW5nIExhbmd1YWdlMRYwFAYDVQQDDA1qdWxp
+YWxhbmcub3JnMSUwIwYJKoZIhvcNAQkBFhZzZWN1cml0eUBqdWxpYWxhbmcub3Jn
+MB4XDTIwMTIxMTE3NTgxN1oXDTI1MTIxMDE3NTgxN1owgZsxCzAJBgNVBAYTAlVT
+MREwDwYDVQQIDAhOZXcgWW9yazERMA8GA1UEBwwITmV3IFlvcmsxJzAlBgNVBAoM
+HlRoZSBKdWxpYSBQcm9ncmFtbWluZyBMYW5ndWFnZTEWMBQGA1UEAwwNanVsaWFs
+YW5nLm9yZzElMCMGCSqGSIb3DQEJARYWc2VjdXJpdHlAanVsaWFsYW5nLm9yZzCC
+ASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBANCFgRMFlNGIgmZtMzR+Xx+t
+cPXpYnw9sZXlGy4y+P+UVW5rnFtf+OL4WkcJykmL3n/iLBKpdrndhzL7zuc6lGVv
+G6u+Gvwg5uCZ4RqiFSPP9xK7tl7H+CwrtWL/2vF1wlYC228A+NMpPyQw4XtX1L8G
+xAvJbFz8JrJ+WH1wCmVpkxA6pnpK+DZ/QKPVwa/qhB80ur3bYwlHXWwDBf8bq98f
+7wDBpJoEc3IG3GYopP6ie5KTENKxbDZjr306ZuxTLjXKrAE/OJkAiGKJ7gPlwT/E
+kFI/x/No9Y/fPWFRGiFo2L4fhP2Mohcph3PQswFKfnQlMQzztetDKWCZveB5HisC
+AwEAATANBgkqhkiG9w0BAQsFAAOCAQEAqAaFA93Q3VWWKAZBqORT+6N2iHDiOxMu
+Ol8Jjqp3Spj552NbyPPpfF2a2Q/Bh2ZAmncCoGTpuXdnowSHyXuxPey6BIvEbq0L
+FizTNuIzaA95fO/ce9LNujxliDHhKMJBZtCqBJYJ4dgd9sA4/LeAG/P3ltIY6K8P
+22AAx2bzWbeRJSqxeBodm19rOb9Yz2SOaZIam42E+xia+hsUFdGf6Zkfpa02azDm
+93EjS+DwapqxAKgkps6JuKqpRFdZd8QsVmgAcapnIt77w8sfBu9eyITF/Tm+MA8k
+IRieSypM7TK0jQ6QrNV7FKSI6eEPaqWBMwkLg3S5H6KQMntVRlcc0A==
+-----END CERTIFICATE-----
diff --git a/stdlib/LibGit2/test/known_hosts b/stdlib/LibGit2/test/known_hosts
new file mode 100644
index 00000000000000..833846c26cf0c4
--- /dev/null
+++ b/stdlib/LibGit2/test/known_hosts
@@ -0,0 +1,4 @@
+github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==
+gitlab.com ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCsj2bNKTBSpIYDEGk9KxsGh3mySTRgMtXL583qmBpzeQ+jqCMRgBqB98u3z++J1sKlXHWfM9dyhSevkMwSbhoR8XIq/U0tCNyokEi/ueaBMCvbcTHhO7FcwzY92WK4Yt0aGROY5qX2UKSeOvuP4D6TPqKF1onrSzH9bx9XUf2lEdWT/ia1NEKjunUqu1xOB/StKDHMoX4/OKyIzuS0q/T1zOATthvasJFoPrAjkohTyaDUz2LN5JoH839hViyEG82yB+MjcFV5MU3N1l1QL3cVUCh93xSaua1N85qivl+siMkPGbO5xR/En4iEY6K2XPASUEMaieWVNTRCtJ4S8H+9
+gitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY=
+gitlab.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIAfuCHKVTjquxvt6CM6tdG4SLp1Btn/nOeHHE5UOzRdf
diff --git a/stdlib/LibGit2/test/libgit2.jl b/stdlib/LibGit2/test/libgit2.jl
index 54950132ea3f84..2a74ed4908dfc4 100644
--- a/stdlib/LibGit2/test/libgit2.jl
+++ b/stdlib/LibGit2/test/libgit2.jl
@@ -124,18 +124,21 @@ end
 
 function get_global_dir()
     buf = Ref(LibGit2.Buffer())
-    LibGit2.@check ccall((:git_libgit2_opts, :libgit2), Cint,
-                         (Cint, Cint, Ptr{LibGit2.Buffer}),
-                         LibGit2.Consts.GET_SEARCH_PATH, LibGit2.Consts.CONFIG_LEVEL_GLOBAL, buf)
+
+    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
+        LibGit2.Consts.GET_SEARCH_PATH::Cint;
+        LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
+        buf::Ptr{LibGit2.Buffer})::Cint
     path = unsafe_string(buf[].ptr)
     LibGit2.free(buf)
     return path
 end
 
 function set_global_dir(dir)
-    LibGit2.@check ccall((:git_libgit2_opts, :libgit2), Cint,
-                         (Cint, Cint, Cstring),
-                         LibGit2.Consts.SET_SEARCH_PATH, LibGit2.Consts.CONFIG_LEVEL_GLOBAL, dir)
+    LibGit2.@check @ccall "libgit2".git_libgit2_opts(
+        LibGit2.Consts.SET_SEARCH_PATH::Cint;
+        LibGit2.Consts.CONFIG_LEVEL_GLOBAL::Cint,
+        dir::Cstring)::Cint
     return
 end
 
@@ -597,6 +600,23 @@ end
         github_regex_test("ssh://git@github.com/$user/$repo", user, repo)
         @test !occursin(LibGit2.GITHUB_REGEX, "git@notgithub.com/$user/$repo.git")
     end
+
+    @testset "UserPasswordCredential/url constructor" begin
+        user_pass_cred = LibGit2.UserPasswordCredential("user", "*******")
+        url = "https://github.com"
+        expected_cred = LibGit2.GitCredential("https", "github.com", nothing, "user", "*******")
+
+        cred = LibGit2.GitCredential(user_pass_cred, url)
+        @test cred == expected_cred
+
+        # Shredding the UserPasswordCredential shouldn't result in information being lost
+        # inside of a GitCredential.
+        Base.shred!(user_pass_cred)
+        @test cred == expected_cred
+
+        Base.shred!(cred)
+        Base.shred!(expected_cred)
+    end
 end
 
 mktempdir() do dir
@@ -614,7 +634,7 @@ mktempdir() do dir
     commit_oid1 = LibGit2.GitHash()
     commit_oid2 = LibGit2.GitHash()
     commit_oid3 = LibGit2.GitHash()
-    master_branch = "master"
+    default_branch = LibGit2.getconfig("init.defaultBranch", "master")
     test_branch = "test_branch"
     test_branch2 = "test_branch_two"
     tag1 = "tag1"
@@ -938,19 +958,19 @@ mktempdir() do dir
                     # various branch properties
                     @test LibGit2.isbranch(brref)
                     @test !LibGit2.isremote(brref)
-                    @test LibGit2.name(brref) == "refs/heads/master"
-                    @test LibGit2.shortname(brref) == master_branch
+                    @test LibGit2.name(brref) == "refs/heads/$(default_branch)"
+                    @test LibGit2.shortname(brref) == default_branch
                     @test LibGit2.ishead(brref)
                     @test LibGit2.upstream(brref) === nothing
 
                     # showing the GitReference to this branch
                     show_strs = split(sprint(show, brref), "\n")
                     @test show_strs[1] == "GitReference:"
-                    @test show_strs[2] == "Branch with name refs/heads/master"
+                    @test show_strs[2] == "Branch with name refs/heads/$(default_branch)"
                     @test show_strs[3] == "Branch is HEAD."
                     @test repo.ptr == LibGit2.repository(brref).ptr
-                    @test brnch == master_branch
-                    @test LibGit2.headname(repo) == master_branch
+                    @test brnch == default_branch
+                    @test LibGit2.headname(repo) == default_branch
 
                     # create a branch *without* setting its tip as HEAD
                     LibGit2.branch!(repo, test_branch, string(commit_oid1), set_head=false)
@@ -971,7 +991,7 @@ mktempdir() do dir
                     end
                 end
                 branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
-                @test master_branch in branches
+                @test default_branch in branches
                 @test test_branch in branches
             end
         end
@@ -1030,7 +1050,7 @@ mktempdir() do dir
                 @test tag2 in tags
 
                 refs = LibGit2.ref_list(repo)
-                @test refs == ["refs/heads/master", "refs/heads/test_branch", "refs/tags/tag1", "refs/tags/tag2"]
+                @test refs == ["refs/heads/$(default_branch)", "refs/heads/test_branch", "refs/tags/tag1", "refs/tags/tag2"]
                 # test deleting a tag
                 LibGit2.tag_delete(repo, tag1)
                 tags = LibGit2.tag_list(repo)
@@ -1314,7 +1334,7 @@ mktempdir() do dir
             add_and_commit_file(repo, "file1", "111\n")
             # switch back, add a commit, try to merge
             # from branch/merge_a
-            LibGit2.branch!(repo, "master")
+            LibGit2.branch!(repo, default_branch)
 
             # test for showing a Reference to a non-HEAD branch
             brref = LibGit2.GitReference(repo, "refs/heads/branch/merge_a")
@@ -1327,7 +1347,7 @@ mktempdir() do dir
 
             add_and_commit_file(repo, "file2", "222\n")
             upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
-            head_ann = LibGit2.GitAnnotated(repo, "master")
+            head_ann = LibGit2.GitAnnotated(repo, default_branch)
 
             # (fail to) merge them because we can't fastforward
             @test_logs (:warn,"Cannot perform fast-forward merge") !LibGit2.merge!(repo, [upst_ann], true)
@@ -1340,7 +1360,7 @@ mktempdir() do dir
             mv(joinpath(LibGit2.path(repo),"file1"),joinpath(LibGit2.path(repo),"mvfile1"))
             LibGit2.add!(repo, "mvfile1")
             LibGit2.commit(repo, "move file1")
-            LibGit2.branch!(repo, "master")
+            LibGit2.branch!(repo, default_branch)
             upst_ann = LibGit2.GitAnnotated(repo, "branch/merge_b")
             rename_flag = Cint(0)
             rename_flag = LibGit2.toggle(rename_flag, Cint(0)) # turns on the find renames opt
@@ -1418,14 +1438,14 @@ mktempdir() do dir
             # the rebase should fail.
             @test_throws LibGit2.GitError LibGit2.rebase!(repo)
             # Try rebasing on master instead
-            newhead = LibGit2.rebase!(repo, master_branch)
+            newhead = LibGit2.rebase!(repo, default_branch)
             @test newhead == head_oid
 
             # Switch to the master branch
-            LibGit2.branch!(repo, master_branch)
+            LibGit2.branch!(repo, default_branch)
 
             fetch_heads = LibGit2.fetchheads(repo)
-            @test fetch_heads[1].name == "refs/heads/master"
+            @test fetch_heads[1].name == "refs/heads/$(default_branch)"
             @test fetch_heads[1].ismerge == true # we just merged master
             @test fetch_heads[2].name == "refs/heads/test_branch"
             @test fetch_heads[2].ismerge == false
@@ -1465,7 +1485,7 @@ mktempdir() do dir
 
                 # all tag in place
                 branches = map(b->LibGit2.shortname(b[1]), LibGit2.GitBranchIter(repo))
-                @test master_branch in branches
+                @test default_branch in branches
                 @test test_branch in branches
 
                 # issue #16337
@@ -1663,7 +1683,7 @@ mktempdir() do dir
             # add yet another file
             add_and_commit_file(repo, "file4", "444\n")
             # rebase with onto
-            newhead = LibGit2.rebase!(repo, "branch/a", "master")
+            newhead = LibGit2.rebase!(repo, "branch/a", default_branch)
 
             newerhead = LibGit2.head_oid(repo)
             @test newerhead == newhead
@@ -1673,7 +1693,7 @@ mktempdir() do dir
             pre_abort_head = add_and_commit_file(repo, "file6", "666\n")
             # Rebase type
             head_ann = LibGit2.GitAnnotated(repo, "branch/a")
-            upst_ann = LibGit2.GitAnnotated(repo, "master")
+            upst_ann = LibGit2.GitAnnotated(repo, default_branch)
             rb = LibGit2.GitRebase(repo, head_ann, upst_ann)
             @test_throws BoundsError rb[3]
             @test_throws BoundsError rb[0]
@@ -1698,7 +1718,7 @@ mktempdir() do dir
 
             a_head = LibGit2.head_oid(repo)
             add_and_commit_file(repo, "merge_file1", "111\n")
-            LibGit2.branch!(repo, "master")
+            LibGit2.branch!(repo, default_branch)
             a_head_ann = LibGit2.GitAnnotated(repo, "branch/merge_a")
             # merge returns true if successful
             @test_logs (:info,"Review and commit merged changes") LibGit2.merge!(repo, [a_head_ann])
@@ -1723,6 +1743,19 @@ mktempdir() do dir
         end
     end
 
+    @testset "checkout_head" begin
+        LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
+            # modify file
+            repo_file = open(joinpath(cache_repo,test_file), "a")
+            println(repo_file, commit_msg1 * randstring(10))
+            close(repo_file)
+            # and checkout HEAD once more
+            LibGit2.checkout_head(repo, options=LibGit2.CheckoutOptions(checkout_strategy=LibGit2.Consts.CHECKOUT_FORCE))
+            @test LibGit2.headname(repo) == default_branch
+            @test !LibGit2.isdirty(repo)
+        end
+    end
+
     @testset "checkout/headname" begin
         LibGit2.with(LibGit2.GitRepo(cache_repo)) do repo
             LibGit2.checkout!(repo, string(commit_oid1))
@@ -1731,7 +1764,6 @@ mktempdir() do dir
         end
     end
 
-
     if Sys.isunix()
         @testset "checkout/proptest" begin
             LibGit2.with(LibGit2.GitRepo(test_repo)) do repo
@@ -2118,6 +2150,50 @@ mktempdir() do dir
                 end
             end
         end
+
+        @testset "approve/reject with UserPasswordCredential" begin
+            # In order to use the "store" credential helper `git` needs to be installed and
+            # on the path.
+            if GIT_INSTALLED
+                config_path = joinpath(dir, config_file)
+                isfile(config_path) && rm(config_path)
+
+                credential_path = joinpath(dir, ".git-credentials")
+                isfile(credential_path) && rm(credential_path)
+
+                LibGit2.with(LibGit2.GitConfig(config_path, LibGit2.Consts.CONFIG_LEVEL_APP)) do cfg
+                    query = LibGit2.GitCredential("https", "mygithost")
+                    filled = LibGit2.GitCredential("https", "mygithost", nothing, "alice", "1234")
+                    user_pass_cred = LibGit2.UserPasswordCredential("alice", "1234")
+                    url = "https://mygithost"
+
+                    # Requires `git` to be installed and available on the path.
+                    LibGit2.set!(cfg, "credential.helper", "store --file \"$credential_path\"")
+                    helper = only(LibGit2.credential_helpers(cfg, query))
+
+                    @test !isfile(credential_path)
+
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    LibGit2.approve(cfg, user_pass_cred, url)
+                    @test isfile(credential_path)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == filled
+                    end
+
+                    LibGit2.reject(cfg, user_pass_cred, url)
+                    Base.shred!(LibGit2.fill!(helper, deepcopy(query))) do result
+                        @test result == query
+                    end
+
+                    Base.shred!(query)
+                    Base.shred!(filled)
+                    Base.shred!(user_pass_cred)
+                end
+            end
+        end
     end
 
     # The following tests require that we can fake a TTY so that we can provide passwords
@@ -2390,6 +2466,76 @@ mktempdir() do dir
             Base.shred!(valid_p_cred)
         end
 
+        @testset "SSH known host checking" begin
+            CHECK_MATCH    = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_MATCH
+            CHECK_MISMATCH = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_MISMATCH
+            CHECK_NOTFOUND = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_NOTFOUND
+            CHECK_FAILURE  = LibGit2.Consts.LIBSSH2_KNOWNHOST_CHECK_FAILURE
+
+            # randomly generated hashes matching no hosts
+            random_key = collect(reinterpret(Cchar, codeunits("\0\0\0\assh-rsa\0\0\0\x01#\0\0\0\x81\0¿\x95\xbe9\xfc9g\n:\xcf&\x06YA\xb5`\x97\xc13A\xbf;T+C\xc9Ut J>\xc5ҍ\xc4_S\x8a \xc1S\xeb\x15FH\xd2a\x04.D\xeeb\xac\x8f\xdb\xcc\xef\xc4l G\x9bR\xafp\x17s<=\x12\xab\x04ڳif\\A\x9ba0\xde%\xdei\x04\xc3\r\xb3\x81w\x88\xec\xc0f\x15A;AÝ\xc0r\xa1\u5fe\xd3\xf6)8\x8e\xa3\xcbc\xee\xdd\$\x04\x0f\xc1\xb4\x1f\xcc\xecK\xe0\x99")))
+            # hashes of the unique github.com fingerprint
+            github_key = collect(reinterpret(Cchar, codeunits("\0\0\0\assh-rsa\0\0\0\x01#\0\0\x01\x01\0\xab`;\x85\x11\xa6vy\xbd\xb5@\xdb;\xd2\x03K\0J\xe96\xd0k\xe3\xd7`\xf0\x8f˪\xdbN\xb4\xedóǑ\xc7\n\xae\x9at\xc9Xi\xe4wD!«\xea\x92\xe5T0_8\xb5\xfdAK2\b\xe5t\xc37\xe3 \x93e\x18F,vRɋ1\xe1n}\xa6R;\xd2\0t*dD\xd8?\xcd^\x172\xd06sǷ\x81\x15UH{U\xf0\xc4IO8)\xec\xe6\x0f\x94%Z\x95˚\xf57\xd7\xfc\x8c\x7f\xe4\x9e\xf3\x18GN\xf2\x92\t\x92\x05\"e\xb0\xa0n\xa6mJ\x16\x7f\xd9\xf3\xa4\x8a\x1aJ0~\xc1\xea\xaaQI\xa9i\xa6\xac]V\xa5\xefb~Q}\x81\xfbdO[t\\OG\x8e\xcd\b*\x94\x92\xf7D\xaa\xd3&\xf7l\x8cM\xc9\x10\vƫyF\x1d&W\xcbo\x06\xde\xc9.kd\xa6V/\xf0\xe3 \x84\xea\x06\xce\x0e\xa9\xd3ZX;\xfb\0\xbaӌ\x9d\x19p<T\x98\x92\xe5\xaaxܕ\xe2PQ@i")))
+            # hashes of the middle github.com fingerprint
+            gitlab_key = collect(reinterpret(Cchar, codeunits("\0\0\0\vssh-ed25519\0\0\0 \a\xee\br\x95N:\xae\xc6\xfbz\bέtn\x12.\x9dA\xb6\x7f\xe79\xe1\xc7\x13\x95\x0e\xcd\x17_")))
+
+            # various known hosts files
+            no_file = tempname()
+            empty_file = tempname(); touch(empty_file)
+            known_hosts = joinpath(@__DIR__, "known_hosts")
+            wrong_hosts = tempname()
+            open(wrong_hosts, write=true) do io
+                for line in eachline(known_hosts)
+                    words = split(line)
+                    words[1] = words[1] == "github.com" ? "gitlab.com" :
+                               words[1] == "gitlab.com" ? "github.com" :
+                               words[1]
+                    println(io, join(words, " "))
+                end
+            end
+
+            @testset "unknown host" begin
+                host = "unknown.host"
+                for key in [github_key, gitlab_key, random_key],
+                    files in [[no_file], [empty_file], [known_hosts]]
+                    check = LibGit2.ssh_knownhost_check(files, host, key)
+                    @test check == CHECK_NOTFOUND
+                end
+            end
+
+            @testset "known hosts" begin
+                for (host, key) in [
+                        "github.com" => github_key,
+                        "gitlab.com" => gitlab_key,
+                    ]
+                    for files in [[no_file], [empty_file]]
+                        check = LibGit2.ssh_knownhost_check(files, host, key)
+                        @test check == CHECK_NOTFOUND
+                    end
+                    for files in [
+                            [known_hosts],
+                            [empty_file, known_hosts],
+                            [known_hosts, empty_file],
+                            [known_hosts, wrong_hosts],
+                        ]
+                        check = LibGit2.ssh_knownhost_check(files, host, key)
+                        @test check == CHECK_MATCH
+                    end
+                    for files in [
+                            [wrong_hosts],
+                            [empty_file, wrong_hosts],
+                            [wrong_hosts, empty_file],
+                            [wrong_hosts, known_hosts],
+                        ]
+                        check = LibGit2.ssh_knownhost_check(files, host, key)
+                        @test check == CHECK_MISMATCH
+                    end
+                end
+            end
+
+            rm(empty_file)
+        end
+
         @testset "HTTPS credential prompt" begin
             url = "https://github.com/test/package.jl"
 
diff --git a/stdlib/LibGit2/test/online.jl b/stdlib/LibGit2/test/online.jl
index 888af97fe0a694..96b6bf5b22371d 100644
--- a/stdlib/LibGit2/test/online.jl
+++ b/stdlib/LibGit2/test/online.jl
@@ -90,4 +90,12 @@ mktempdir() do dir
     end
 end
 
+# needs to be run in separate process so it can re-initialize libgit2
+# with a useless self-signed certificate authority root certificate
+file = joinpath(@__DIR__, "bad_ca_roots.jl")
+cmd = `$(Base.julia_cmd()) --depwarn=no --startup-file=no $file`
+if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
+    error("bad CA roots tests failed, cmd : $cmd")
+end
+
 end # module
diff --git a/stdlib/LibGit2_jll/Project.toml b/stdlib/LibGit2_jll/Project.toml
new file mode 100644
index 00000000000000..5c4c42945a2a91
--- /dev/null
+++ b/stdlib/LibGit2_jll/Project.toml
@@ -0,0 +1,18 @@
+name = "LibGit2_jll"
+uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
+version = "1.3.0+0"
+
+[deps]
+MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+LibSSH2_jll = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.8"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibGit2_jll/src/LibGit2_jll.jl b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
new file mode 100644
index 00000000000000..1cd7aaa79f8141
--- /dev/null
+++ b/stdlib/LibGit2_jll/src/LibGit2_jll.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/LibGit2_jll.jl
+
+baremodule LibGit2_jll
+using Base, Libdl, MbedTLS_jll, LibSSH2_jll
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libgit2
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libgit2_handle = C_NULL
+libgit2_path = ""
+
+if Sys.iswindows()
+    const libgit2 = "libgit2.dll"
+elseif Sys.isapple()
+    const libgit2 = "@rpath/libgit2.1.3.dylib"
+else
+    const libgit2 = "libgit2.so.1.3"
+end
+
+function __init__()
+    global libgit2_handle = dlopen(libgit2)
+    global libgit2_path = dlpath(libgit2_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libgit2_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libgit2_path() = libgit2_path
+
+end  # module LibGit2_jll
diff --git a/stdlib/LibGit2_jll/test/runtests.jl b/stdlib/LibGit2_jll/test/runtests.jl
new file mode 100644
index 00000000000000..3a26e26d87ebdc
--- /dev/null
+++ b/stdlib/LibGit2_jll/test/runtests.jl
@@ -0,0 +1,11 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, LibGit2_jll
+
+@testset "LibGit2_jll" begin
+    major = Ref{Cint}(0)
+    minor = Ref{Cint}(0)
+    patch = Ref{Cint}(0)
+    @test ccall((:git_libgit2_version, libgit2), Cint, (Ref{Cint}, Ref{Cint}, Ref{Cint}), major, minor, patch) == 0
+    @test VersionNumber(major[], minor[], patch[]) == v"1.3.0"
+end
diff --git a/stdlib/LibSSH2_jll/Project.toml b/stdlib/LibSSH2_jll/Project.toml
new file mode 100644
index 00000000000000..8334a86d1c23ad
--- /dev/null
+++ b/stdlib/LibSSH2_jll/Project.toml
@@ -0,0 +1,17 @@
+name = "LibSSH2_jll"
+uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
+version = "1.10.2+0"
+
+[deps]
+MbedTLS_jll = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.8"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
new file mode 100644
index 00000000000000..66987b30d090cd
--- /dev/null
+++ b/stdlib/LibSSH2_jll/src/LibSSH2_jll.jl
@@ -0,0 +1,47 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/LibSSH2_jll.jl
+
+baremodule LibSSH2_jll
+using Base, Libdl, MbedTLS_jll
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libssh2
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libssh2_handle = C_NULL
+libssh2_path = ""
+
+if Sys.iswindows()
+    const libssh2 = "libssh2.dll"
+elseif Sys.isapple()
+    const libssh2 = "@rpath/libssh2.1.dylib"
+else
+    const libssh2 = "libssh2.so.1"
+end
+
+function __init__()
+    global libssh2_handle = dlopen(libssh2)
+    global libssh2_path = dlpath(libssh2_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libssh2_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libssh2_path() = libssh2_path
+
+end  # module LibSSH2_jll
diff --git a/stdlib/LibSSH2_jll/test/runtests.jl b/stdlib/LibSSH2_jll/test/runtests.jl
new file mode 100644
index 00000000000000..58cfd9ac024cc4
--- /dev/null
+++ b/stdlib/LibSSH2_jll/test/runtests.jl
@@ -0,0 +1,8 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, LibSSH2_jll
+
+@testset "LibSSH2_jll" begin
+    # We use a `startswith()` here because when built from source, this returns "1.9.0_DEV"
+    vn = startswith(unsafe_string(ccall((:libssh2_version, libssh2), Cstring, (Cint,), 0)), "1.9.0")
+end
diff --git a/stdlib/LibUV_jll/Project.toml b/stdlib/LibUV_jll/Project.toml
new file mode 100644
index 00000000000000..9441fbf8572630
--- /dev/null
+++ b/stdlib/LibUV_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "LibUV_jll"
+uuid = "183b4373-6708-53ba-ad28-60e28bb38547"
+version = "2.0.1+6"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibUV_jll/src/LibUV_jll.jl b/stdlib/LibUV_jll/src/LibUV_jll.jl
new file mode 100644
index 00000000000000..e4897138cc6cc2
--- /dev/null
+++ b/stdlib/LibUV_jll/src/LibUV_jll.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/LibUV_jll.jl
+
+baremodule LibUV_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libuv
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libuv_handle = C_NULL
+libuv_path = ""
+
+if Sys.iswindows()
+    const libuv = "libuv-2.dll"
+elseif Sys.isapple()
+    const libuv = "@rpath/libuv.2.dylib"
+else
+    const libuv = "libuv.so.2"
+end
+
+function __init__()
+    global libuv_handle = dlopen(libuv)
+    global libuv_path = dlpath(libuv_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libuv_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libuv_path() = libuv_path
+
+end  # module LibUV_jll
diff --git a/stdlib/LibUV_jll/test/runtests.jl b/stdlib/LibUV_jll/test/runtests.jl
new file mode 100644
index 00000000000000..26c50b92c0c2dc
--- /dev/null
+++ b/stdlib/LibUV_jll/test/runtests.jl
@@ -0,0 +1,8 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, LibUV_jll
+
+@testset "LibUV_jll" begin
+    vn = VersionNumber(unsafe_string(ccall((:uv_version_string, libuv), Cstring, ())))
+    @test vn == v"2.0.0-dev"
+end
diff --git a/stdlib/LibUnwind_jll/Project.toml b/stdlib/LibUnwind_jll/Project.toml
new file mode 100644
index 00000000000000..e22105ddacd473
--- /dev/null
+++ b/stdlib/LibUnwind_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "LibUnwind_jll"
+uuid = "745a5e78-f969-53e9-954f-d19f2f74f4e3"
+version = "1.5.0+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
new file mode 100644
index 00000000000000..ae79e790a999ba
--- /dev/null
+++ b/stdlib/LibUnwind_jll/src/LibUnwind_jll.jl
@@ -0,0 +1,43 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/LibUnwind_jll.jl
+
+baremodule LibUnwind_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libunwind
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libunwind_handle = C_NULL
+libunwind_path = ""
+
+const libunwind = "libunwind.so.8"
+
+function __init__()
+    # We only do something on Linux/FreeBSD
+    @static if Sys.islinux() || Sys.isfreebsd()
+        global libunwind_handle = dlopen(libunwind)
+        global libunwind_path = dlpath(libunwind_handle)
+        global artifact_dir = dirname(Sys.BINDIR)
+        LIBPATH[] = dirname(libunwind_path)
+        push!(LIBPATH_list, LIBPATH[])
+    end
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = @static (Sys.islinux() || Sys.isfreebsd()) ? true : false
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libunwind_path() = libunwind_path
+
+end  # module LibUnwind_jll
diff --git a/stdlib/LibUnwind_jll/test/runtests.jl b/stdlib/LibUnwind_jll/test/runtests.jl
new file mode 100644
index 00000000000000..1cb33dd6729e3a
--- /dev/null
+++ b/stdlib/LibUnwind_jll/test/runtests.jl
@@ -0,0 +1,9 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, LibUnwind_jll
+
+@testset "LibUnwind_jll" begin
+    if !Sys.isapple() && !Sys.iswindows()
+        @test dlsym(LibUnwind_jll.libunwind_handle, :unw_backtrace; throw_error=false) !== nothing
+    end
+end
diff --git a/stdlib/Libdl/src/Libdl.jl b/stdlib/Libdl/src/Libdl.jl
index 71e648a3eca3e9..df3f62c807fede 100644
--- a/stdlib/Libdl/src/Libdl.jl
+++ b/stdlib/Libdl/src/Libdl.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+
 module Libdl
 # Just re-export Base.Libc.Libdl:
 export DL_LOAD_PATH, RTLD_DEEPBIND, RTLD_FIRST, RTLD_GLOBAL, RTLD_LAZY, RTLD_LOCAL,
diff --git a/stdlib/Libdl/test/runtests.jl b/stdlib/Libdl/test/runtests.jl
index 6d2b83b28b323f..5c06dd929f1a1c 100644
--- a/stdlib/Libdl/test/runtests.jl
+++ b/stdlib/Libdl/test/runtests.jl
@@ -19,7 +19,7 @@ end
       if Base.DARWIN_FRAMEWORK
           return occursin(Regex("^$(Base.DARWIN_FRAMEWORK_NAME)(?:_debug)?\$"), basename(dl))
       else
-          return occursin(Regex("^libjulia(?:.*)\\.$(Libdl.dlext)(?:\\..+)?\$"), basename(dl))
+          return occursin(Regex("^libjulia-internal(?:.*)\\.$(Libdl.dlext)(?:\\..+)?\$"), basename(dl))
       end
     end) == 1 # look for something libjulia-like (but only one)
 
@@ -29,9 +29,8 @@ end
 
 cd(@__DIR__) do
 
-# Find the library directory by finding the path of libjulia (or libjulia-debug, as the case may be)
-# and then adding on /julia to that directory path to get the private library directory, if we need
-# to (where "need to" is defined as private_libdir/julia/libccalltest.dlext exists
+# Find the library directory by finding the path of libjulia-internal (or libjulia-internal-debug,
+# as the case may be) to get the private library directory
 private_libdir = if Base.DARWIN_FRAMEWORK
     if ccall(:jl_is_debugbuild, Cint, ()) != 0
         dirname(abspath(Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME * "_debug")))
@@ -39,13 +38,9 @@ private_libdir = if Base.DARWIN_FRAMEWORK
         joinpath(dirname(abspath(Libdl.dlpath(Base.DARWIN_FRAMEWORK_NAME))),"Frameworks")
     end
 elseif ccall(:jl_is_debugbuild, Cint, ()) != 0
-    dirname(abspath(Libdl.dlpath("libjulia-debug")))
+    dirname(abspath(Libdl.dlpath("libjulia-internal-debug")))
 else
-    dirname(abspath(Libdl.dlpath("libjulia")))
-end
-
-if isfile(joinpath(private_libdir,"julia","libccalltest."*Libdl.dlext))
-    private_libdir = joinpath(private_libdir, "julia")
+    dirname(abspath(Libdl.dlpath("libjulia-internal")))
 end
 
 @test !isempty(Libdl.find_library(["libccalltest"], [private_libdir]))
diff --git a/stdlib/LinearAlgebra/Project.toml b/stdlib/LinearAlgebra/Project.toml
index eb18e1b2ff0855..46653aa7952091 100644
--- a/stdlib/LinearAlgebra/Project.toml
+++ b/stdlib/LinearAlgebra/Project.toml
@@ -3,11 +3,12 @@ uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 
 [deps]
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
+OpenBLAS_jll = "4536629a-c528-5b80-bd46-f80d51c5b363"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 
 [targets]
-test = ["Test", "Random", "SparseArrays"]
+test = ["Test", "Random"]
diff --git a/stdlib/LinearAlgebra/docs/src/index.md b/stdlib/LinearAlgebra/docs/src/index.md
index 52e78609992878..88e700685a0d3f 100644
--- a/stdlib/LinearAlgebra/docs/src/index.md
+++ b/stdlib/LinearAlgebra/docs/src/index.md
@@ -1,7 +1,7 @@
 # [Linear Algebra](@id man-linalg)
 
 ```@meta
-DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse)
+DocTestSetup = :(using LinearAlgebra)
 ```
 
 In addition to (and as part of) its support for multi-dimensional arrays, Julia provides native implementations
@@ -60,7 +60,7 @@ julia> A = [1.5 2 -4; 3 -1 -6; -10 2.3 4]
  -10.0   2.3   4.0
 
 julia> factorize(A)
-LU{Float64, Matrix{Float64}}
+LU{Float64, Matrix{Float64}, Vector{Int64}}
 L factor:
 3×3 Matrix{Float64}:
   1.0    0.0       0.0
@@ -84,7 +84,7 @@ julia> B = [1.5 2 -4; 2 -1 -3; -4 -3 5]
  -4.0  -3.0   5.0
 
 julia> factorize(B)
-BunchKaufman{Float64, Matrix{Float64}}
+BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
 D factor:
 3×3 Tridiagonal{Float64, Vector{Float64}}:
  -1.64286   0.0   ⋅
@@ -183,10 +183,10 @@ as well as whether hooks to various optimized methods for them in LAPACK are ava
 |:----------------------------- |:--- |:--- |:--- |:--- |:----------------------------------------------------------- |
 | [`Symmetric`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
 | [`Hermitian`](@ref)           |     |     |     | MV  | [`inv`](@ref), [`sqrt`](@ref), [`exp`](@ref)                |
-| [`UpperTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref)                                |
-| [`UnitUpperTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref)                                |
-| [`LowerTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref)                                |
-| [`UnitLowerTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref)                                |
+| [`UpperTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
+| [`UnitUpperTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
+| [`LowerTriangular`](@ref)     |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
+| [`UnitLowerTriangular`](@ref) |     |     | MV  | MV  | [`inv`](@ref), [`det`](@ref), [`logdet`](@ref)                                |
 | [`UpperHessenberg`](@ref)     |     |     |     | MM  | [`inv`](@ref), [`det`](@ref)                                |
 | [`SymTridiagonal`](@ref)      | M   | M   | MS  | MV  | [`eigmax`](@ref), [`eigmin`](@ref)                          |
 | [`Tridiagonal`](@ref)         | M   | M   | MS  | MV  |                                                             |
@@ -266,7 +266,7 @@ julia> b = [1 2 3; 4 5 6]
  4  5  6
 
 julia> b - U
-ERROR: DimensionMismatch("matrix is not square: dimensions are (2, 3)")
+ERROR: DimensionMismatch: matrix is not square: dimensions are (2, 3)
 Stacktrace:
 [...]
 ```
@@ -308,17 +308,22 @@ of the Linear Algebra documentation.
 
 ## Standard functions
 
-Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/). Sparse matrix factorizations call functions from [SuiteSparse](http://suitesparse.com). Other sparse solvers are available as Julia packages.
+Linear algebra functions in Julia are largely implemented by calling functions from [LAPACK](http://www.netlib.org/lapack/).
+Sparse matrix factorizations call functions from [SuiteSparse](http://suitesparse.com).
+Other sparse solvers are available as Julia packages.
 
 ```@docs
 Base.:*(::AbstractMatrix, ::AbstractMatrix)
 Base.:\(::AbstractMatrix, ::AbstractVecOrMat)
+Base.:/(::AbstractVecOrMat, ::AbstractVecOrMat)
 LinearAlgebra.SingularException
 LinearAlgebra.PosDefException
 LinearAlgebra.ZeroPivotException
 LinearAlgebra.dot
 LinearAlgebra.dot(::Any, ::Any, ::Any)
 LinearAlgebra.cross
+LinearAlgebra.axpy!
+LinearAlgebra.axpby!
 LinearAlgebra.factorize
 LinearAlgebra.Diagonal
 LinearAlgebra.Bidiagonal
@@ -410,10 +415,11 @@ LinearAlgebra.nullspace
 Base.kron
 Base.kron!
 LinearAlgebra.exp(::StridedMatrix{<:LinearAlgebra.BlasFloat})
+Base.cis(::AbstractMatrix)
 Base.:^(::AbstractMatrix, ::Number)
 Base.:^(::Number, ::AbstractMatrix)
 LinearAlgebra.log(::StridedMatrix)
-LinearAlgebra.sqrt(::StridedMatrix{<:Real})
+LinearAlgebra.sqrt(::StridedMatrix)
 LinearAlgebra.cos(::StridedMatrix{<:Real})
 LinearAlgebra.sin(::StridedMatrix{<:Real})
 LinearAlgebra.sincos(::StridedMatrix{<:Real})
@@ -528,8 +534,8 @@ LinearAlgebra.BLAS.dotc
 LinearAlgebra.BLAS.blascopy!
 LinearAlgebra.BLAS.nrm2
 LinearAlgebra.BLAS.asum
-LinearAlgebra.axpy!
-LinearAlgebra.axpby!
+LinearAlgebra.BLAS.axpy!
+LinearAlgebra.BLAS.axpby!
 LinearAlgebra.BLAS.scal!
 LinearAlgebra.BLAS.scal
 LinearAlgebra.BLAS.iamax
@@ -576,6 +582,7 @@ LinearAlgebra.BLAS.trmv
 LinearAlgebra.BLAS.trsv!
 LinearAlgebra.BLAS.trsv
 LinearAlgebra.BLAS.set_num_threads
+LinearAlgebra.BLAS.get_num_threads
 ```
 
 ## LAPACK functions
diff --git a/stdlib/LinearAlgebra/src/LinearAlgebra.jl b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
index 2b0d6e15f62693..ec935569884858 100644
--- a/stdlib/LinearAlgebra/src/LinearAlgebra.jl
+++ b/stdlib/LinearAlgebra/src/LinearAlgebra.jl
@@ -9,15 +9,20 @@ module LinearAlgebra
 
 import Base: \, /, *, ^, +, -, ==
 import Base: USE_BLAS64, abs, acos, acosh, acot, acoth, acsc, acsch, adjoint, asec, asech,
-    asin, asinh, atan, atanh, axes, big, broadcast, ceil, conj, convert, copy, copyto!, cos,
-    cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat,
-    getproperty, imag, inv, isapprox, isone, iszero, IndexStyle, kron, kron!, length, log, map, ndims,
-    oneunit, parent, power_by_squaring, print_matrix, promote_rule, real, round, sec, sech,
-    setindex!, show, similar, sin, sincos, sinh, size, sqrt,
-    strides, stride, tan, tanh, transpose, trunc, typed_hcat, vec
-using Base: hvcat_fill, IndexLinear, promote_op, promote_typeof,
-    @propagate_inbounds, @pure, reduce, typed_vcat, require_one_based_indexing
+    asin, asinh, atan, atanh, axes, big, broadcast, ceil, cis, conj, convert, copy, copyto!,
+    copymutable, cos, cosh, cot, coth, csc, csch, eltype, exp, fill!, floor, getindex, hcat,
+    getproperty, imag, inv, isapprox, isequal, isone, iszero, IndexStyle, kron, kron!,
+    length, log, map, ndims, one, oneunit, parent, permutedims, power_by_squaring,
+    print_matrix, promote_rule, real, round, sec, sech, setindex!, show, similar, sin,
+    sincos, sinh, size, sqrt, strides, stride, tan, tanh, transpose, trunc, typed_hcat,
+    vec, zero
+using Base: IndexLinear, promote_eltype, promote_op, promote_typeof,
+    @propagate_inbounds, reduce, typed_hvcat, typed_vcat, require_one_based_indexing,
+    splat
 using Base.Broadcast: Broadcasted, broadcasted
+using OpenBLAS_jll
+using libblastrampoline_jll
+import Libdl
 
 export
 # Modules
@@ -34,6 +39,7 @@ export
     BunchKaufman,
     Cholesky,
     CholeskyPivoted,
+    ColumnNorm,
     Eigen,
     GeneralizedEigen,
     GeneralizedSVD,
@@ -41,12 +47,14 @@ export
     Hessenberg,
     LU,
     LDLt,
+    NoPivot,
     QR,
     QRPivoted,
     LQ,
     Schur,
     SVD,
     Hermitian,
+    RowMaximum,
     Symmetric,
     LowerTriangular,
     UpperTriangular,
@@ -163,6 +171,10 @@ abstract type Algorithm end
 struct DivideAndConquer <: Algorithm end
 struct QRIteration <: Algorithm end
 
+abstract type PivotingStrategy end
+struct NoPivot <: PivotingStrategy end
+struct RowMaximum <: PivotingStrategy end
+struct ColumnNorm <: PivotingStrategy end
 
 # Check that stride of matrix/vector is 1
 # Writing like this to avoid splatting penalty when called with multiple arguments,
@@ -253,10 +265,8 @@ function sym_uplo(uplo::Char)
     end
 end
 
-
 @noinline throw_uplo() = throw(ArgumentError("uplo argument must be either :U (upper) or :L (lower)"))
 
-
 """
     ldiv!(Y, A, B) -> Y
 
@@ -346,9 +356,48 @@ control over the factorization of `B`.
 """
 rdiv!(A, B)
 
+"""
+    copy_oftype(A, T)
+
+Creates a copy of `A` with eltype `T`. No assertions about mutability of the result are
+made. When `eltype(A) == T`, then this calls `copy(A)` which may be overloaded for custom
+array types. Otherwise, this calls `convert(AbstractArray{T}, A)`.
+"""
 copy_oftype(A::AbstractArray{T}, ::Type{T}) where {T} = copy(A)
 copy_oftype(A::AbstractArray{T,N}, ::Type{S}) where {T,N,S} = convert(AbstractArray{S,N}, A)
 
+"""
+    copymutable_oftype(A, T)
+
+Copy `A` to a mutable array with eltype `T` based on `similar(A, T)`.
+
+The resulting matrix typically has similar algebraic structure as `A`. For
+example, supplying a tridiagonal matrix results in another tridiagonal matrix.
+In general, the type of the output corresponds to that of `similar(A, T)`.
+
+In LinearAlgebra, mutable copies (of some desired eltype) are created to be passed
+to in-place algorithms (such as `ldiv!`, `rdiv!`, `lu!` and so on). If the specific
+algorithm is known to preserve the algebraic structure, use `copymutable_oftype`.
+If the algorithm is known to return a dense matrix (or some wrapper backed by a dense
+matrix), then use `copy_similar`.
+
+See also: `Base.copymutable`, `copy_similar`.
+"""
+copymutable_oftype(A::AbstractArray, ::Type{S}) where {S} = copyto!(similar(A, S), A)
+
+"""
+    copy_similar(A, T)
+
+Copy `A` to a mutable array with eltype `T` based on `similar(A, T, size(A))`.
+
+Compared to `copymutable_oftype`, the result can be more flexible. In general, the type
+of the output corresponds to that of the three-argument method `similar(A, T, size(A))`.
+
+See also: `copymutable_oftype`.
+"""
+copy_similar(A::AbstractArray, ::Type{T}) where {T} = copyto!(similar(A, T, size(A)), A)
+
+
 include("adjtrans.jl")
 include("transpose.jl")
 
@@ -373,6 +422,7 @@ include("cholesky.jl")
 include("lu.jl")
 include("bunchkaufman.jl")
 include("diagonal.jl")
+include("symmetriceigen.jl")
 include("bidiag.jl")
 include("uniformscaling.jl")
 include("hessenberg.jl")
@@ -388,6 +438,69 @@ const ⋅ = dot
 const × = cross
 export ⋅, ×
 
+## convenience methods
+## return only the solution of a least squares problem while avoiding promoting
+## vectors to matrices.
+_cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
+_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
+
+# SymTridiagonal ev can be the same length as dv, but the last element is
+# ignored. However, some methods can fail if they read the entired ev
+# rather than just the meaningful elements. This is a helper function
+# for getting only the meaningful elements of ev. See #41089
+_evview(S::SymTridiagonal) = @view S.ev[begin:length(S.dv) - 1]
+
+## append right hand side with zeros if necessary
+_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
+_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
+
+# General fallback definition for handling under- and overdetermined system as well as square problems
+# While this definition is pretty general, it does e.g. promote to common element type of lhs and rhs
+# which is required by LAPACK but not SuiteSpase which allows real-complex solves in some cases. Hence,
+# we restrict this method to only the LAPACK factorizations in LinearAlgebra.
+# The definition is put here since it explicitly references all the Factorizion structs so it has
+# to be located after all the files that define the structs.
+const LAPACKFactorizations{T,S} = Union{
+    BunchKaufman{T,S},
+    Cholesky{T,S},
+    LQ{T,S},
+    LU{T,S},
+    QR{T,S},
+    QRCompactWY{T,S},
+    QRPivoted{T,S},
+    SVD{T,<:Real,S}}
+function (\)(F::Union{<:LAPACKFactorizations,Adjoint{<:Any,<:LAPACKFactorizations}}, B::AbstractVecOrMat)
+    require_one_based_indexing(B)
+    m, n = size(F)
+    if m != size(B, 1)
+        throw(DimensionMismatch("arguments must have the same number of rows"))
+    end
+
+    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
+    FF = Factorization{TFB}(F)
+
+    # For wide problem we (often) compute a minimum norm solution. The solution
+    # is larger than the right hand side so we use size(F, 2).
+    BB = _zeros(TFB, B, n)
+
+    if n > size(B, 1)
+        # Underdetermined
+        copyto!(view(BB, 1:m, :), B)
+    else
+        copyto!(BB, B)
+    end
+
+    ldiv!(FF, BB)
+
+    # For tall problems, we compute a least squares solution so only part
+    # of the rhs should be returned from \ while ldiv! uses (and returns)
+    # the complete rhs
+    return _cut_B(BB, 1:n)
+end
+# disambiguate
+(\)(F::LAPACKFactorizations{T}, B::VecOrMat{Complex{T}}) where {T<:BlasReal} =
+    invoke(\, Tuple{Factorization{T}, VecOrMat{Complex{T}}}, F, B)
+
 """
     LinearAlgebra.peakflops(n::Integer=2000; parallel::Bool=false)
 
@@ -424,27 +537,44 @@ end
 
 
 function versioninfo(io::IO=stdout)
-    if Base.libblas_name == "libopenblas" || BLAS.vendor() === :openblas || BLAS.vendor() === :openblas64
-        openblas_config = BLAS.openblas_get_config()
-        println(io, "BLAS: libopenblas (", openblas_config, ")")
-    else
-        println(io, "BLAS: ",Base.libblas_name)
+    indent = "  "
+    config = BLAS.get_config()
+    build_flags = join(string.(config.build_flags), ", ")
+    println(io, "BLAS: ", BLAS.libblastrampoline, " (", build_flags, ")")
+    for lib in config.loaded_libs
+        interface = uppercase(string(lib.interface))
+        println(io, indent, "--> ", lib.libname, " (", interface, ")")
     end
-    println(io, "LAPACK: ",Base.liblapack_name)
+    println(io, "Threading:")
+    println(io, indent, "Threads.nthreads() = ", Base.Threads.nthreads())
+    println(io, indent, "LinearAlgebra.BLAS.get_num_threads() = ", BLAS.get_num_threads())
+    println(io, "Relevant environment variables:")
+    env_var_names = [
+        "JULIA_NUM_THREADS",
+        "MKL_DYNAMIC",
+        "MKL_NUM_THREADS",
+        "OPENBLAS_NUM_THREADS",
+    ]
+    printed_at_least_one_env_var = false
+    for name in env_var_names
+        if haskey(ENV, name)
+            value = ENV[name]
+            println(io, indent, name, " = ", value)
+            printed_at_least_one_env_var = true
+        end
+    end
+    if !printed_at_least_one_env_var
+        println(io, indent, "[none]")
+    end
+    return nothing
 end
 
 function __init__()
     try
+        BLAS.lbt_forward(OpenBLAS_jll.libopenblas_path; clear=true)
         BLAS.check()
-        if BLAS.vendor() === :mkl
-            ccall((:MKL_Set_Interface_Layer, Base.libblas_name), Cvoid, (Cint,), USE_BLAS64 ? 1 : 0)
-        end
-        Threads.resize_nthreads!(Abuf)
-        Threads.resize_nthreads!(Bbuf)
-        Threads.resize_nthreads!(Cbuf)
     catch ex
-        Base.showerror_nostdio(ex,
-            "WARNING: Error during initialization of module LinearAlgebra")
+        Base.showerror_nostdio(ex, "WARNING: Error during initialization of module LinearAlgebra")
     end
     # register a hook to disable BLAS threading
     Base.at_disable_library_threading(() -> BLAS.set_num_threads(1))
diff --git a/stdlib/LinearAlgebra/src/adjtrans.jl b/stdlib/LinearAlgebra/src/adjtrans.jl
index c9f7326116bff4..b6a4548833eacd 100644
--- a/stdlib/LinearAlgebra/src/adjtrans.jl
+++ b/stdlib/LinearAlgebra/src/adjtrans.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Base: @propagate_inbounds, @_inline_meta
+using Base: @propagate_inbounds
 import Base: length, size, axes, IndexStyle, getindex, setindex!, parent, vec, convert, similar
 
 ### basic definitions (types, aliases, constructors, abstractarray interface, sundry similar)
@@ -27,17 +27,13 @@ julia> A = [3+2im 9+2im; 8+7im  4+6im]
  8+7im  4+6im
 
 julia> adjoint(A)
-2×2 Adjoint{Complex{Int64}, Matrix{Complex{Int64}}}:
+2×2 adjoint(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
  3-2im  8-7im
  9-2im  4-6im
 ```
 """
 struct Adjoint{T,S} <: AbstractMatrix{T}
     parent::S
-    function Adjoint{T,S}(A::S) where {T,S}
-        checkeltype_adjoint(T, eltype(A))
-        new(A)
-    end
 end
 """
     Transpose
@@ -58,37 +54,13 @@ julia> A = [3+2im 9+2im; 8+7im  4+6im]
  8+7im  4+6im
 
 julia> transpose(A)
-2×2 Transpose{Complex{Int64}, Matrix{Complex{Int64}}}:
+2×2 transpose(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
  3+2im  8+7im
  9+2im  4+6im
 ```
 """
 struct Transpose{T,S} <: AbstractMatrix{T}
     parent::S
-    function Transpose{T,S}(A::S) where {T,S}
-        checkeltype_transpose(T, eltype(A))
-        new(A)
-    end
-end
-
-function checkeltype_adjoint(::Type{ResultEltype}, ::Type{ParentEltype}) where {ResultEltype,ParentEltype}
-    Expected = Base.promote_op(adjoint, ParentEltype)
-    ResultEltype === Expected || error(string(
-        "Element type mismatch. Tried to create an `Adjoint{", ResultEltype, "}` ",
-        "from an object with eltype `", ParentEltype, "`, but the element type of ",
-        "the adjoint of an object with eltype `", ParentEltype, "` must be ",
-        "`", Expected, "`."))
-    return nothing
-end
-
-function checkeltype_transpose(::Type{ResultEltype}, ::Type{ParentEltype}) where {ResultEltype, ParentEltype}
-    Expected = Base.promote_op(transpose, ParentEltype)
-    ResultEltype === Expected || error(string(
-        "Element type mismatch. Tried to create a `Transpose{", ResultEltype, "}` ",
-        "from an object with eltype `", ParentEltype, "`, but the element type of ",
-        "the transpose of an object with eltype `", ParentEltype, "` must be ",
-        "`", Expected, "`."))
-    return nothing
 end
 
 # basic outer constructors
@@ -120,7 +92,7 @@ julia> A = [3+2im 9+2im; 8+7im  4+6im]
  8+7im  4+6im
 
 julia> adjoint(A)
-2×2 Adjoint{Complex{Int64}, Matrix{Complex{Int64}}}:
+2×2 adjoint(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
  3-2im  8-7im
  9-2im  4-6im
 
@@ -153,7 +125,7 @@ julia> A = [3+2im 9+2im; 8+7im  4+6im]
  8+7im  4+6im
 
 julia> transpose(A)
-2×2 Transpose{Complex{Int64}, Matrix{Complex{Int64}}}:
+2×2 transpose(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
  3+2im  8+7im
  9+2im  4+6im
 ```
@@ -166,17 +138,35 @@ transpose(A::Transpose) = A.parent
 adjoint(A::Transpose{<:Real}) = A.parent
 transpose(A::Adjoint{<:Real}) = A.parent
 
+# printing
+function Base.showarg(io::IO, v::Adjoint, toplevel)
+    print(io, "adjoint(")
+    Base.showarg(io, parent(v), false)
+    print(io, ')')
+    toplevel && print(io, " with eltype ", eltype(v))
+    return nothing
+end
+function Base.showarg(io::IO, v::Transpose, toplevel)
+    print(io, "transpose(")
+    Base.showarg(io, parent(v), false)
+    print(io, ')')
+    toplevel && print(io, " with eltype ", eltype(v))
+    return nothing
+end
 
 # some aliases for internal convenience use
 const AdjOrTrans{T,S} = Union{Adjoint{T,S},Transpose{T,S}} where {T,S}
 const AdjointAbsVec{T} = Adjoint{T,<:AbstractVector}
+const AdjointAbsMat{T} = Adjoint{T,<:AbstractMatrix}
 const TransposeAbsVec{T} = Transpose{T,<:AbstractVector}
+const TransposeAbsMat{T} = Transpose{T,<:AbstractMatrix}
 const AdjOrTransAbsVec{T} = AdjOrTrans{T,<:AbstractVector}
 const AdjOrTransAbsMat{T} = AdjOrTrans{T,<:AbstractMatrix}
 
 # for internal use below
-wrapperop(A::Adjoint) = adjoint
-wrapperop(A::Transpose) = transpose
+wrapperop(_) = identity
+wrapperop(::Adjoint) = adjoint
+wrapperop(::Transpose) = transpose
 
 # AbstractArray interface, basic definitions
 length(A::AdjOrTrans) = length(A.parent)
@@ -186,8 +176,8 @@ axes(v::AdjOrTransAbsVec) = (Base.OneTo(1), axes(v.parent)...)
 axes(A::AdjOrTransAbsMat) = reverse(axes(A.parent))
 IndexStyle(::Type{<:AdjOrTransAbsVec}) = IndexLinear()
 IndexStyle(::Type{<:AdjOrTransAbsMat}) = IndexCartesian()
-@propagate_inbounds getindex(v::AdjOrTransAbsVec, i::Int) = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])
-@propagate_inbounds getindex(A::AdjOrTransAbsMat, i::Int, j::Int) = wrapperop(A)(A.parent[j, i])
+@propagate_inbounds getindex(v::AdjOrTransAbsVec{T}, i::Int) where {T} = wrapperop(v)(v.parent[i-1+first(axes(v.parent)[1])])::T
+@propagate_inbounds getindex(A::AdjOrTransAbsMat{T}, i::Int, j::Int) where {T} = wrapperop(A)(A.parent[j, i])::T
 @propagate_inbounds setindex!(v::AdjOrTransAbsVec, x, i::Int) = (setindex!(v.parent, wrapperop(v)(x), i-1+first(axes(v.parent)[1])); v)
 @propagate_inbounds setindex!(A::AdjOrTransAbsMat, x, i::Int, j::Int) = (setindex!(A.parent, wrapperop(A)(x), j, i); A)
 # AbstractArray interface, additional definitions to retain wrapper over vectors where appropriate
@@ -221,9 +211,12 @@ similar(A::AdjOrTrans) = similar(A.parent, eltype(A), axes(A))
 similar(A::AdjOrTrans, ::Type{T}) where {T} = similar(A.parent, T, axes(A))
 similar(A::AdjOrTrans, ::Type{T}, dims::Dims{N}) where {T,N} = similar(A.parent, T, dims)
 
+# AbstractMatrix{T} constructor for adjtrans vector: preserve wrapped type
+AbstractMatrix{T}(A::AdjOrTransAbsVec) where {T} = wrapperop(A)(AbstractVector{T}(A.parent))
+
 # sundry basic definitions
 parent(A::AdjOrTrans) = A.parent
-vec(v::TransposeAbsVec) = parent(v)
+vec(v::TransposeAbsVec{<:Number}) = parent(v)
 vec(v::AdjointAbsVec{<:Real}) = parent(v)
 
 ### concatenation
@@ -235,7 +228,7 @@ _adjoint_hcat(avs::Union{Number,AdjointAbsVec}...) = adjoint(vcat(map(adjoint, a
 _transpose_hcat(tvs::Union{Number,TransposeAbsVec}...) = transpose(vcat(map(transpose, tvs)...))
 typed_hcat(::Type{T}, avs::Union{Number,AdjointAbsVec}...) where {T} = adjoint(typed_vcat(T, map(adjoint, avs)...))
 typed_hcat(::Type{T}, tvs::Union{Number,TransposeAbsVec}...) where {T} = transpose(typed_vcat(T, map(transpose, tvs)...))
-# otherwise-redundant definitions necessary to prevent hitting the concat methods in sparse/sparsevector.jl
+# otherwise-redundant definitions necessary to prevent hitting the concat methods in LinearAlgebra/special.jl
 hcat(avs::Adjoint{<:Any,<:Vector}...) = _adjoint_hcat(avs...)
 hcat(tvs::Transpose{<:Any,<:Vector}...) = _transpose_hcat(tvs...)
 hcat(avs::Adjoint{T,Vector{T}}...) where {T} = _adjoint_hcat(avs...)
@@ -260,6 +253,25 @@ Broadcast.broadcast_preserving_zero_d(f, avs::Union{Number,AdjointAbsVec}...) =
 Broadcast.broadcast_preserving_zero_d(f, tvs::Union{Number,TransposeAbsVec}...) = transpose(broadcast((xs...) -> transpose(f(transpose.(xs)...)), quasiparentt.(tvs)...))
 # TODO unify and allow mixed combinations with a broadcast style
 
+
+### reductions
+# faster to sum the Array than to work through the wrapper
+Base._mapreduce_dim(f, op, init::Base._InitialValue, A::Transpose, dims::Colon) =
+    transpose(Base._mapreduce_dim(_sandwich(transpose, f), _sandwich(transpose, op), init, parent(A), dims))
+Base._mapreduce_dim(f, op, init::Base._InitialValue, A::Adjoint, dims::Colon) =
+    adjoint(Base._mapreduce_dim(_sandwich(adjoint, f), _sandwich(adjoint, op), init, parent(A), dims))
+# sum(A'; dims)
+Base.mapreducedim!(f, op, B::AbstractArray, A::TransposeAbsMat) =
+    transpose(Base.mapreducedim!(_sandwich(transpose, f), _sandwich(transpose, op), transpose(B), parent(A)))
+Base.mapreducedim!(f, op, B::AbstractArray, A::AdjointAbsMat) =
+    adjoint(Base.mapreducedim!(_sandwich(adjoint, f), _sandwich(adjoint, op), adjoint(B), parent(A)))
+
+_sandwich(adj::Function, fun) = (xs...,) -> adj(fun(map(adj, xs)...))
+for fun in [:identity, :add_sum, :mul_prod] #, :max, :min]
+    @eval _sandwich(::Function, ::typeof(Base.$fun)) = Base.$fun
+end
+
+
 ### linear algebra
 
 (-)(A::Adjoint)   = Adjoint(  -A.parent)
diff --git a/stdlib/LinearAlgebra/src/bidiag.jl b/stdlib/LinearAlgebra/src/bidiag.jl
index 12a9c887d41d2c..317ed15af770ce 100644
--- a/stdlib/LinearAlgebra/src/bidiag.jl
+++ b/stdlib/LinearAlgebra/src/bidiag.jl
@@ -7,9 +7,10 @@ struct Bidiagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
     uplo::Char # upper bidiagonal ('U') or lower ('L')
     function Bidiagonal{T,V}(dv, ev, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
         require_one_based_indexing(dv, ev)
-        if length(ev) != length(dv)-1
+        if length(ev) != max(length(dv)-1, 0)
             throw(DimensionMismatch("length of diagonal vector is $(length(dv)), length of off-diagonal vector is $(length(ev))"))
         end
+        (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,V}(dv, ev, uplo)
     end
 end
@@ -62,7 +63,7 @@ julia> Bl = Bidiagonal(dv, ev, :L) # ev is on the first subdiagonal
 ```
 """
 function Bidiagonal(dv::V, ev::V, uplo::Symbol) where {T,V<:AbstractVector{T}}
-    Bidiagonal{T,V}(dv, ev, char_uplo(uplo))
+    Bidiagonal{T,V}(dv, ev, uplo)
 end
 function Bidiagonal(dv::V, ev::V, uplo::AbstractChar) where {T,V<:AbstractVector{T}}
     Bidiagonal{T,V}(dv, ev, uplo)
@@ -70,7 +71,7 @@ end
 
 #To allow Bidiagonal's where the "dv" is Vector{T} and "ev" Vector{S},
 #where T and S can be promoted
-function LinearAlgebra.Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
+function Bidiagonal(dv::Vector{T}, ev::Vector{S}, uplo::Symbol) where {T,S}
     TS = promote_type(T,S)
     return Bidiagonal{TS,Vector{TS}}(dv, ev, uplo)
 end
@@ -109,26 +110,37 @@ function Bidiagonal(A::AbstractMatrix, uplo::Symbol)
     Bidiagonal(diag(A, 0), diag(A, uplo === :U ? 1 : -1), uplo)
 end
 
+
 Bidiagonal(A::Bidiagonal) = A
 Bidiagonal{T}(A::Bidiagonal{T}) where {T} = A
 Bidiagonal{T}(A::Bidiagonal) where {T} = Bidiagonal{T}(A.dv, A.ev, A.uplo)
 
-function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
-    if !((1 <= i <= size(A,2)) && (1 <= j <= size(A,2)))
-        throw(BoundsError(A,(i,j)))
+bidiagzero(::Bidiagonal{T}, i, j) where {T} = zero(T)
+function bidiagzero(A::Bidiagonal{<:AbstractMatrix}, i, j)
+    Tel = eltype(eltype(A.dv))
+    if i < j && A.uplo == 'U' #= top right zeros =#
+        return zeros(Tel, size(A.ev[i], 1), size(A.ev[j-1], 2))
+    elseif j < i && A.uplo == 'L' #= bottom left zeros =#
+        return zeros(Tel, size(A.ev[i-1], 1), size(A.ev[j], 2))
+    else
+        return zeros(Tel, size(A.dv[i], 1), size(A.dv[j], 2))
     end
+end
+
+@inline function getindex(A::Bidiagonal{T}, i::Integer, j::Integer) where T
+    @boundscheck checkbounds(A, i, j)
     if i == j
-        return A.dv[i]
+        return @inbounds A.dv[i]
     elseif A.uplo == 'U' && (i == j - 1)
-        return A.ev[i]
+        return @inbounds A.ev[i]
     elseif A.uplo == 'L' && (i == j + 1)
-        return A.ev[j]
+        return @inbounds A.ev[j]
     else
-        return zero(T)
+        return bidiagzero(A, i, j)
     end
 end
 
-function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
+@inline function setindex!(A::Bidiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
@@ -156,21 +168,19 @@ end
 function Matrix{T}(A::Bidiagonal) where T
     n = size(A, 1)
     B = zeros(T, n, n)
-    if n == 0
-        return B
-    end
-    for i = 1:n - 1
+    n == 0 && return B
+    @inbounds for i = 1:n - 1
         B[i,i] = A.dv[i]
         if A.uplo == 'U'
-            B[i, i + 1] = A.ev[i]
+            B[i,i+1] = A.ev[i]
         else
-            B[i + 1, i] = A.ev[i]
+            B[i+1,i] = A.ev[i]
         end
     end
     B[n,n] = A.dv[n]
     return B
 end
-Matrix(A::Bidiagonal{T}) where {T} = Matrix{T}(A)
+Matrix(A::Bidiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(A)
 Array(A::Bidiagonal) = Matrix(A)
 promote_rule(::Type{Matrix{T}}, ::Type{<:Bidiagonal{S}}) where {T,S} =
     @isdefined(T) && @isdefined(S) ? Matrix{promote_type(T,S)} : Matrix
@@ -192,12 +202,8 @@ AbstractMatrix{T}(A::Bidiagonal) where {T} = convert(Bidiagonal{T}, A)
 
 convert(T::Type{<:Bidiagonal}, m::AbstractMatrix) = m isa T ? m : T(m)
 
-# For B<:Bidiagonal, similar(B[, neweltype]) should yield a Bidiagonal matrix.
-# On the other hand, similar(B, [neweltype,] shape...) should yield a sparse matrix.
-# The first method below effects the former, and the second the latter.
 similar(B::Bidiagonal, ::Type{T}) where {T} = Bidiagonal(similar(B.dv, T), similar(B.ev, T), B.uplo)
-# The method below is moved to SparseArrays for now
-# similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
+similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
 
 
 ###################
@@ -220,8 +226,8 @@ end
 
 function show(io::IO, M::Bidiagonal)
     # TODO: make this readable and one-line
-    summary(io, M); println(io, ":")
-    print(io, " diag:")
+    summary(io, M)
+    print(io, ":\n diag:")
     print_matrix(io, (M.dv)')
     print(io, M.uplo == 'U' ? "\n super:" : "\n sub:")
     print_matrix(io, (M.ev)')
@@ -245,8 +251,13 @@ end
 
 adjoint(B::Bidiagonal) = Adjoint(B)
 transpose(B::Bidiagonal) = Transpose(B)
-adjoint(B::Bidiagonal{<:Real}) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? :L : :U)
+adjoint(B::Bidiagonal{<:Number}) = Bidiagonal(conj(B.dv), conj(B.ev), B.uplo == 'U' ? :L : :U)
 transpose(B::Bidiagonal{<:Number}) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? :L : :U)
+permutedims(B::Bidiagonal) = Bidiagonal(B.dv, B.ev, B.uplo == 'U' ? 'L' : 'U')
+function permutedims(B::Bidiagonal, perm)
+    Base.checkdims_perm(B, B, perm)
+    NTuple{2}(perm) == (2, 1) ? permutedims(B) : B
+end
 function Base.copy(aB::Adjoint{<:Any,<:Bidiagonal})
     B = aB.parent
     return Bidiagonal(map(x -> copy.(adjoint.(x)), (B.dv, B.ev))..., B.uplo == 'U' ? :L : :U)
@@ -298,45 +309,45 @@ function istril(M::Bidiagonal, k::Integer=0)
 end
 isdiag(M::Bidiagonal) = iszero(M.ev)
 
-function tril!(M::Bidiagonal, k::Integer=0)
+function tril!(M::Bidiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif M.uplo == 'U' && k < 0
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif k < -1
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'U' && k == 0
-        fill!(M.ev,0)
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'L' && k == -1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
     end
     return M
 end
 
-function triu!(M::Bidiagonal, k::Integer=0)
+function triu!(M::Bidiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif M.uplo == 'L' && k > 0
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif k > 1
-        fill!(M.dv,0)
-        fill!(M.ev,0)
+        fill!(M.dv, zero(T))
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'L' && k == 0
-        fill!(M.ev,0)
+        fill!(M.ev, zero(T))
     elseif M.uplo == 'U' && k == 1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
     end
     return M
 end
 
-function diag(M::Bidiagonal, n::Integer=0)
+function diag(M::Bidiagonal{T}, n::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     if n == 0
@@ -344,7 +355,7 @@ function diag(M::Bidiagonal, n::Integer=0)
     elseif (n == 1 && M.uplo == 'U') ||  (n == -1 && M.uplo == 'L')
         return copyto!(similar(M.ev, length(M.ev)), M.ev)
     elseif -size(M,1) <= n <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-abs(n)), 0)
+        return fill!(similar(M.dv, size(M,1)-abs(n)), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
             "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
@@ -352,7 +363,7 @@ function diag(M::Bidiagonal, n::Integer=0)
 end
 
 function +(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo
+    if A.uplo == B.uplo || length(A.dv) == 0
         Bidiagonal(A.dv+B.dv, A.ev+B.ev, A.uplo)
     else
         newdv = A.dv+B.dv
@@ -361,7 +372,7 @@ function +(A::Bidiagonal, B::Bidiagonal)
 end
 
 function -(A::Bidiagonal, B::Bidiagonal)
-    if A.uplo == B.uplo
+    if A.uplo == B.uplo || length(A.dv) == 0
         Bidiagonal(A.dv-B.dv, A.ev-B.ev, A.uplo)
     else
         newdv = A.dv-B.dv
@@ -371,8 +382,9 @@ end
 
 -(A::Bidiagonal)=Bidiagonal(-A.dv,-A.ev,A.uplo)
 *(A::Bidiagonal, B::Number) = Bidiagonal(A.dv*B, A.ev*B, A.uplo)
-*(B::Number, A::Bidiagonal) = A*B
+*(B::Number, A::Bidiagonal) = Bidiagonal(B*A.dv, B*A.ev, A.uplo)
 /(A::Bidiagonal, B::Number) = Bidiagonal(A.dv/B, A.ev/B, A.uplo)
+\(B::Number, A::Bidiagonal) = Bidiagonal(B\A.dv, B\A.ev, A.uplo)
 
 function ==(A::Bidiagonal, B::Bidiagonal)
     if A.uplo == B.uplo
@@ -389,18 +401,15 @@ const BiTri = Union{Bidiagonal,Tridiagonal}
 @inline mul!(C::AbstractMatrix,   A::AbstractTriangular, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix,   A::AbstractMatrix,     B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix,   A::Diagonal,           B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractTriangular}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractTriangular}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, B::BiTriSym, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVector,   A::BiTriSym,              B::AbstractVector, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix,   A::BiTriSym,              B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractVecOrMat, A::BiTriSym,              B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta)) # around bidiag line 330
+@inline mul!(C::AbstractVector, A::BiTriSym, B::AbstractVector, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BiTriSym, B::AbstractVecOrMat, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Diagonal, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractMatrix, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = A_mul_B_td!(C, A, B, MulAddMul(alpha, beta))
 @inline mul!(C::AbstractVector, A::BiTriSym, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = throw(MethodError(mul!, (C, A, B)), MulAddMul(alpha, beta))
+@inline mul!(C::AbstractVector, A::BiTriSym, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) = throw(MethodError(mul!, (C, A, B)), MulAddMul(alpha, beta))
 
 function check_A_mul_B!_sizes(C, A, B)
     require_one_based_indexing(C)
@@ -629,89 +638,39 @@ end
 
 function *(A::AbstractTriangular, B::Union{SymTridiagonal, Tridiagonal})
     TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(zeros(TS, size(A)...), A, B)
+    A_mul_B_td!(zeros(TS, size(A)), A, B)
 end
 
-const UpperOrUnitUpperTriangular = Union{UpperTriangular, UnitUpperTriangular}
-const LowerOrUnitLowerTriangular = Union{LowerTriangular, UnitLowerTriangular}
-const AdjOrTransUpperOrUnitUpperTriangular = Union{Adjoint{<:Any, <:UpperOrUnitUpperTriangular}, Transpose{<:Any, <:UpperOrUnitUpperTriangular}}
-const AdjOrTransLowerOrUnitLowerTriangular = Union{Adjoint{<:Any, <:LowerOrUnitLowerTriangular}, Transpose{<:Any, <:LowerOrUnitLowerTriangular}}
+const UpperOrUnitUpperTriangular{T} = Union{UpperTriangular{T}, UnitUpperTriangular{T}}
+const LowerOrUnitLowerTriangular{T} = Union{LowerTriangular{T}, UnitLowerTriangular{T}}
 
 function *(A::UpperOrUnitUpperTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    if B.uplo == 'U'
-        A_mul_B_td!(UpperTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
-function *(A::AdjOrTransUpperOrUnitUpperTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if B.uplo == 'L'
-        A_mul_B_td!(LowerTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
+    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    return B.uplo == 'U' ? UpperTriangular(C) : C
 end
 
 function *(A::LowerOrUnitLowerTriangular, B::Bidiagonal)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    if B.uplo == 'L'
-        A_mul_B_td!(LowerTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
-function *(A::AdjOrTransLowerOrUnitLowerTriangular, B::Bidiagonal)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if B.uplo == 'U'
-        A_mul_B_td!(UpperTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
+    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    return B.uplo == 'L' ? LowerTriangular(C) : C
 end
 
 function *(A::Union{SymTridiagonal, Tridiagonal}, B::AbstractTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    A_mul_B_td!(zeros(TS, size(A)...), A, B)
+    A_mul_B_td!(zeros(TS, size(A)), A, B)
 end
 
 function *(A::Bidiagonal, B::UpperOrUnitUpperTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    if A.uplo == 'U'
-        A_mul_B_td!(UpperTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
-function *(A::Bidiagonal, B::AdjOrTransUpperOrUnitUpperTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if A.uplo == 'L'
-        A_mul_B_td!(LowerTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
+    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    return A.uplo == 'U' ? UpperTriangular(C) : C
 end
 
 function *(A::Bidiagonal, B::LowerOrUnitLowerTriangular)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    if A.uplo == 'L'
-        A_mul_B_td!(LowerTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
-end
-
-function *(A::Bidiagonal, B::AdjOrTransLowerOrUnitLowerTriangular)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    if A.uplo == 'U'
-        A_mul_B_td!(UpperTriangular(zeros(TS, size(A)...)), A, B)
-    else
-        A_mul_B_td!(zeros(TS, size(A)...), A, B)
-    end
+    C = A_mul_B_td!(zeros(TS, size(A)), A, B)
+    return A.uplo == 'L' ? LowerTriangular(C) : C
 end
 
 function *(A::BiTri, B::Diagonal)
@@ -734,6 +693,11 @@ function *(A::SymTridiagonal, B::Diagonal)
     A_mul_B_td!(Tridiagonal(zeros(TS, size(A, 1)-1), zeros(TS, size(A, 1)), zeros(TS, size(A, 1)-1)), A, B)
 end
 
+function *(A::BiTriSym, B::BiTriSym)
+    TS = promote_op(matprod, eltype(A), eltype(B))
+    mul!(similar(A, TS, size(A)), A, B)
+end
+
 function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
     require_one_based_indexing(x, y)
     nx, ny = length(x), length(y)
@@ -765,128 +729,214 @@ function dot(x::AbstractVector, B::Bidiagonal, y::AbstractVector)
 end
 
 #Linear solvers
-ldiv!(A::Union{Bidiagonal, AbstractTriangular}, b::AbstractVector) = naivesub!(A, b)
-ldiv!(A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVector) = ldiv!(copy(A), b)
-ldiv!(A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVector) = ldiv!(copy(A), b)
-function ldiv!(A::Union{Bidiagonal,AbstractTriangular}, B::AbstractMatrix)
-    require_one_based_indexing(A, B)
-    nA,mA = size(A)
-    tmp = similar(B,size(B,1))
-    n = size(B, 1)
-    if nA != n
-        throw(DimensionMismatch("size of A is ($nA,$mA), corresponding dimension of B is $n"))
-    end
-    for i = 1:size(B,2)
-        copyto!(tmp, 1, B, (i - 1)*n + 1, n)
-        ldiv!(A, tmp)
-        copyto!(B, (i - 1)*n + 1, tmp, 1, n) # Modify this when array view are implemented.
-    end
-    B
-end
-function ldiv!(adjA::Adjoint{<:Any,<:Union{Bidiagonal,AbstractTriangular}}, B::AbstractMatrix)
-    require_one_based_indexing(adjA, B)
-    A = adjA.parent
-    nA,mA = size(A)
-    tmp = similar(B,size(B,1))
-    n = size(B, 1)
-    if mA != n
-        throw(DimensionMismatch("size of adjoint of A is ($mA,$nA), corresponding dimension of B is $n"))
-    end
-    for i = 1:size(B,2)
-        copyto!(tmp, 1, B, (i - 1)*n + 1, n)
-        ldiv!(adjoint(A), tmp)
-        copyto!(B, (i - 1)*n + 1, tmp, 1, n) # Modify this when array view are implemented.
-    end
-    B
-end
-function ldiv!(transA::Transpose{<:Any,<:Union{Bidiagonal,AbstractTriangular}}, B::AbstractMatrix)
-    require_one_based_indexing(transA, B)
-    A = transA.parent
-    nA,mA = size(A)
-    tmp = similar(B,size(B,1))
-    n = size(B, 1)
-    if mA != n
-        throw(DimensionMismatch("size of transpose of A is ($mA,$nA), corresponding dimension of B is $n"))
-    end
-    for i = 1:size(B,2)
-        copyto!(tmp, 1, B, (i - 1)*n + 1, n)
-        ldiv!(transpose(A), tmp)
-        copyto!(B, (i - 1)*n + 1, tmp, 1, n) # Modify this when array view are implemented.
-    end
-    B
-end
 #Generic solver using naive substitution
-function naivesub!(A::Bidiagonal{T}, b::AbstractVector, x::AbstractVector = b) where T
-    require_one_based_indexing(A, b, x)
+ldiv!(A::Bidiagonal, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
+function ldiv!(c::AbstractVecOrMat, A::Bidiagonal, b::AbstractVecOrMat)
+    require_one_based_indexing(c, A, b)
     N = size(A, 2)
-    if N != length(b) || N != length(x)
-        throw(DimensionMismatch("second dimension of A, $N, does not match one of the lengths of x, $(length(x)), or b, $(length(b))"))
+    mb, nb = size(b, 1), size(b, 2)
+    if N != mb
+        throw(DimensionMismatch("second dimension of A, $N, does not match first dimension of b, $mb"))
+    end
+    mc, nc = size(c, 1), size(c, 2)
+    if mc != mb || nc != nb
+        throw(DimensionMismatch("size of result, ($mc, $nc), does not match the size of b, ($mb, $nb)"))
     end
 
     if N == 0
-        return x
+        return copyto!(c, b)
     end
 
-    @inbounds begin
-        if A.uplo == 'L' #do forward substitution
-            x[1] = xj1 = A.dv[1]\b[1]
-            for j = 2:N
-                xj  = b[j]
-                xj -= A.ev[j - 1] * xj1
-                dvj = A.dv[j]
-                if iszero(dvj)
-                    throw(SingularException(j))
-                end
-                xj   = dvj\xj
-                x[j] = xj1 = xj
+    zi = findfirst(iszero, A.dv)
+    isnothing(zi) || throw(SingularException(zi))
+
+    @inbounds for j in 1:nb
+        if A.uplo == 'L' #do colwise forward substitution
+            c[1,j] = bi1 = A.dv[1] \ b[1,j]
+            for i in 2:N
+                c[i,j] = bi1 = A.dv[i] \ (b[i,j] - A.ev[i - 1] * bi1)
             end
-        else #do backward substitution
-            x[N] = xj1 = A.dv[N]\b[N]
-            for j = (N - 1):-1:1
-                xj  = b[j]
-                xj -= A.ev[j] * xj1
-                dvj = A.dv[j]
-                if iszero(dvj)
-                    throw(SingularException(j))
-                end
-                xj   = dvj\xj
-                x[j] = xj1 = xj
+        else #do colwise backward substitution
+            c[N,j] = bi1 = A.dv[N] \ b[N,j]
+            for i in (N - 1):-1:1
+                c[i,j] = bi1 = A.dv[i] \ (b[i,j] - A.ev[i] * bi1)
             end
         end
     end
-    return x
+    return c
 end
+ldiv!(A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
+ldiv!(A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) = @inline ldiv!(b, A, b)
+ldiv!(c::AbstractVecOrMat, A::Transpose{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
+    (_rdiv!(transpose(c), transpose(b), transpose(A)); return c)
+ldiv!(c::AbstractVecOrMat, A::Adjoint{<:Any,<:Bidiagonal}, b::AbstractVecOrMat) =
+    (_rdiv!(adjoint(c), adjoint(b), adjoint(A)); return c)
 
 ### Generic promotion methods and fallbacks
 function \(A::Bidiagonal{<:Number}, B::AbstractVecOrMat{<:Number})
     TA, TB = eltype(A), eltype(B)
-    TAB = typeof((zero(TA)*zero(TB) + zero(TA)*zero(TB))/one(TA))
-    ldiv!(convert(AbstractArray{TAB}, A), copy_oftype(B, TAB))
+    TAB = typeof((oneunit(TA))\oneunit(TB))
+    ldiv!(zeros(TAB, size(B)), A, B)
 end
-\(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(A, copy(B))
-function \(transA::Transpose{<:Number,<:Bidiagonal{<:Number}}, B::AbstractVecOrMat{<:Number})
-    A = transA.parent
-    TA, TB = eltype(A), eltype(B)
-    TAB = typeof((zero(TA)*zero(TB) + zero(TA)*zero(TB))/one(TA))
-    ldiv!(transpose(convert(AbstractArray{TAB}, A)), copy_oftype(B, TAB))
+\(A::Bidiagonal, B::AbstractVecOrMat) = ldiv!(copy(B), A, B)
+\(tA::Transpose{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(tA) \ B
+\(adjA::Adjoint{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = copy(adjA) \ B
+
+### Triangular specializations
+function \(B::Bidiagonal{<:Number}, U::UpperOrUnitUpperTriangular{<:Number})
+    T = typeof((oneunit(eltype(B)))\oneunit(eltype(U)))
+    A = ldiv!(zeros(T, size(U)), B, U)
+    return B.uplo == 'U' ? UpperTriangular(A) : A
 end
-\(transA::Transpose{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = ldiv!(transpose(transA.parent), copy(B))
-function \(adjA::Adjoint{<:Number,<:Bidiagonal{<:Number}}, B::AbstractVecOrMat{<:Number})
-    A = adjA.parent
-    TA, TB = eltype(A), eltype(B)
-    TAB = typeof((zero(TA)*zero(TB) + zero(TA)*zero(TB))/one(TA))
-    ldiv!(adjoint(convert(AbstractArray{TAB}, A)), copy_oftype(B, TAB))
+function \(B::Bidiagonal, U::UpperOrUnitUpperTriangular)
+    A = ldiv!(copy(parent(U)), B, U)
+    return B.uplo == 'U' ? UpperTriangular(A) : A
+end
+function \(B::Bidiagonal{<:Number}, L::LowerOrUnitLowerTriangular{<:Number})
+    T = typeof((oneunit(eltype(B)))\oneunit(eltype(L)))
+    A = ldiv!(zeros(T, size(L)), B, L)
+    return B.uplo == 'L' ? LowerTriangular(A) : A
+end
+function \(B::Bidiagonal, L::LowerOrUnitLowerTriangular)
+    A = ldiv!(copy(parent(L)), B, L)
+    return B.uplo == 'L' ? LowerTriangular(A) : A
 end
-\(adjA::Adjoint{<:Any,<:Bidiagonal}, B::AbstractVecOrMat) = ldiv!(adjoint(adjA.parent), copy(B))
+
+function \(U::UpperOrUnitUpperTriangular{<:Number}, B::Bidiagonal{<:Number})
+    T = typeof((oneunit(eltype(U)))/oneunit(eltype(B)))
+    A = ldiv!(U, copy_similar(B, T))
+    return B.uplo == 'U' ? UpperTriangular(A) : A
+end
+function \(L::LowerOrUnitLowerTriangular{<:Number}, B::Bidiagonal{<:Number})
+    T = typeof((oneunit(eltype(L)))/oneunit(eltype(B)))
+    A = ldiv!(L, copy_similar(B, T))
+    return B.uplo == 'L' ? LowerTriangular(A) : A
+end
+### Diagonal specialization
+function \(B::Bidiagonal{<:Number}, D::Diagonal{<:Number})
+    T = typeof((oneunit(eltype(B)))\oneunit(eltype(D)))
+    A = ldiv!(zeros(T, size(D)), B, D)
+    return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
+end
+
+function _rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Bidiagonal)
+    require_one_based_indexing(C, A, B)
+    m, n = size(A)
+    if size(B, 1) != n
+        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
+    end
+    mc, nc = size(C)
+    if mc != m || nc != n
+        throw(DimensionMismatch("expect output to have size ($m, $n), but got ($mc, $nc)"))
+    end
+
+    zi = findfirst(iszero, B.dv)
+    isnothing(zi) || throw(SingularException(zi))
+
+    if B.uplo == 'L'
+        diagB = B.dv[n]
+        for i in 1:m
+            C[i,n] = A[i,n] / diagB
+        end
+        for j in n-1:-1:1
+            diagB = B.dv[j]
+            offdiagB = B.ev[j]
+            for i in 1:m
+                C[i,j] = (A[i,j] - C[i,j+1]*offdiagB)/diagB
+            end
+        end
+    else
+        diagB = B.dv[1]
+        for i in 1:m
+            C[i,1] = A[i,1] / diagB
+        end
+        for j in 2:n
+            diagB = B.dv[j]
+            offdiagB = B.ev[j-1]
+            for i = 1:m
+                C[i,j] = (A[i,j] - C[i,j-1]*offdiagB)/diagB
+            end
+        end
+    end
+    C
+end
+rdiv!(A::AbstractMatrix, B::Bidiagonal) = @inline _rdiv!(A, A, B)
+rdiv!(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
+rdiv!(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = @inline _rdiv!(A, A, B)
+_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) =
+    (ldiv!(adjoint(C), adjoint(B), adjoint(A)); return C)
+_rdiv!(C::AbstractMatrix, A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) =
+    (ldiv!(transpose(C), transpose(B), transpose(A)); return C)
+
+function /(A::AbstractMatrix{<:Number}, B::Bidiagonal{<:Number})
+    TA, TB = eltype(A), eltype(B)
+    TAB = typeof((oneunit(TA))/oneunit(TB))
+    _rdiv!(zeros(TAB, size(A)), A, B)
+end
+/(A::AbstractMatrix, B::Bidiagonal) = _rdiv!(copy(A), A, B)
+
+### Triangular specializations
+function /(U::UpperOrUnitUpperTriangular{<:Number}, B::Bidiagonal{<:Number})
+    T = typeof((oneunit(eltype(U)))/oneunit(eltype(B)))
+    A = _rdiv!(zeros(T, size(U)), U, B)
+    return B.uplo == 'U' ? UpperTriangular(A) : A
+end
+function /(U::UpperOrUnitUpperTriangular, B::Bidiagonal)
+    A = _rdiv!(copy(parent(U)), U, B)
+    return B.uplo == 'U' ? UpperTriangular(A) : A
+end
+function /(L::LowerOrUnitLowerTriangular{<:Number}, B::Bidiagonal{<:Number})
+    T = typeof((oneunit(eltype(L)))/oneunit(eltype(B)))
+    A = _rdiv!(zeros(T, size(L)), L, B)
+    return B.uplo == 'L' ? LowerTriangular(A) : A
+end
+function /(L::LowerOrUnitLowerTriangular, B::Bidiagonal)
+    A = _rdiv!(copy(parent(L)), L, B)
+    return B.uplo == 'L' ? LowerTriangular(A) : A
+end
+function /(B::Bidiagonal{<:Number}, U::UpperOrUnitUpperTriangular{<:Number})
+    T = typeof((oneunit(eltype(B)))/oneunit(eltype(U)))
+    A = rdiv!(copy_similar(B, T), U)
+    return B.uplo == 'U' ? UpperTriangular(A) : A
+end
+function /(B::Bidiagonal{<:Number}, L::LowerOrUnitLowerTriangular{<:Number})
+    T = typeof((oneunit(eltype(B)))\oneunit(eltype(L)))
+    A = rdiv!(copy_similar(B, T), L)
+    return B.uplo == 'L' ? LowerTriangular(A) : A
+end
+### Diagonal specialization
+function /(D::Diagonal{<:Number}, B::Bidiagonal{<:Number})
+    T = typeof((oneunit(eltype(D)))/oneunit(eltype(B)))
+    A = _rdiv!(zeros(T, size(D)), D, B)
+    return B.uplo == 'U' ? UpperTriangular(A) : LowerTriangular(A)
+end
+
+/(A::AbstractMatrix, B::Transpose{<:Any,<:Bidiagonal}) = A / copy(B)
+/(A::AbstractMatrix, B::Adjoint{<:Any,<:Bidiagonal}) = A / copy(B)
+# disambiguation
+/(A::AdjointAbsVec{<:Number}, B::Bidiagonal{<:Number}) = adjoint(adjoint(B) \ parent(A))
+/(A::TransposeAbsVec{<:Number}, B::Bidiagonal{<:Number}) = transpose(transpose(B) \ parent(A))
+/(A::AdjointAbsVec, B::Bidiagonal) = adjoint(adjoint(B) \ parent(A))
+/(A::TransposeAbsVec, B::Bidiagonal) = transpose(transpose(B) \ parent(A))
+/(A::AdjointAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
+/(A::TransposeAbsVec, B::Transpose{<:Any,<:Bidiagonal}) = transpose(transpose(B) \ parent(A))
+/(A::AdjointAbsVec, B::Adjoint{<:Any,<:Bidiagonal}) = adjoint(adjoint(B) \ parent(A))
+/(A::TransposeAbsVec, B::Adjoint{<:Any,<:Bidiagonal}) = transpose(transpose(B) \ parent(A))
 
 factorize(A::Bidiagonal) = A
+function inv(B::Bidiagonal{T}) where T
+    n = size(B, 1)
+    dest = zeros(typeof(oneunit(T)\one(T)), (n, n))
+    ldiv!(dest, B, Diagonal{typeof(one(T)\one(T))}(I, n))
+    return B.uplo == 'U' ? UpperTriangular(dest) : LowerTriangular(dest)
+end
 
 # Eigensystems
 eigvals(M::Bidiagonal) = M.dv
 function eigvecs(M::Bidiagonal{T}) where T
     n = length(M.dv)
     Q = Matrix{T}(undef, n,n)
-    blks = [0; findall(x -> x == 0, M.ev); n]
+    blks = [0; findall(iszero, M.ev); n]
     v = zeros(T, n)
     if M.uplo == 'U'
         for idx_block = 1:length(blks) - 1, i = blks[idx_block] + 1:blks[idx_block + 1] #index of eigenvector
diff --git a/stdlib/LinearAlgebra/src/blas.jl b/stdlib/LinearAlgebra/src/blas.jl
index fd1b85d65410eb..2710559e57d6b1 100644
--- a/stdlib/LinearAlgebra/src/blas.jl
+++ b/stdlib/LinearAlgebra/src/blas.jl
@@ -5,9 +5,8 @@ Interface to BLAS subroutines.
 """
 module BLAS
 
-import ..axpy!, ..axpby!
 import Base: copyto!
-using Base: require_one_based_indexing
+using Base: require_one_based_indexing, USE_BLAS64
 
 export
 # Level 1
@@ -33,6 +32,7 @@ export
     sbmv!,
     sbmv,
     spmv!,
+    spr!,
     symv!,
     symv,
     trsv!,
@@ -62,39 +62,23 @@ export
     trsm!,
     trsm
 
+using ..LinearAlgebra: libblastrampoline, BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1
 
-const libblas = Base.libblas_name
-const liblapack = Base.liblapack_name
+include("lbt.jl")
 
-import LinearAlgebra
-import LinearAlgebra: BlasReal, BlasComplex, BlasFloat, BlasInt, DimensionMismatch, checksquare, stride1, chkstride1, axpy!
+vendor() = :lbt
 
-import Libdl
+"""
+    get_config()
 
-# utility routines
-let lib = C_NULL
-global function determine_vendor()
-    if lib == C_NULL
-        lib = something(Libdl.dlopen(libblas; throw_error=false), C_NULL)
-    end
-    vend = :unknown
-    if lib != C_NULL
-        if Libdl.dlsym(lib, :openblas_set_num_threads; throw_error=false) !== nothing
-            vend = :openblas
-        elseif Libdl.dlsym(lib, :openblas_set_num_threads64_; throw_error=false) !== nothing
-            vend = :openblas64
-        elseif Libdl.dlsym(lib, :MKL_Set_Num_Threads; throw_error=false) !== nothing
-            vend = :mkl
-        end
-    end
-    return vend
-end
-end
+Return an object representing the current `libblastrampoline` configuration.
 
-const _vendor = determine_vendor()
-vendor() = _vendor
+!!! compat "Julia 1.7"
+    `get_config()` requires at least Julia 1.7.
+"""
+get_config() = lbt_get_config()
 
-if vendor() === :openblas64
+if USE_BLAS64
     macro blasfunc(x)
         return Expr(:quote, Symbol(x, "64_"))
     end
@@ -104,17 +88,7 @@ else
     end
 end
 
-openblas_get_config() = strip(unsafe_string(ccall((@blasfunc(openblas_get_config), libblas), Ptr{UInt8}, () )))
-
-function guess_vendor()
-    # like determine_vendor, but guesses blas in some cases
-    # where determine_vendor returns :unknown
-    ret = vendor()
-    if Sys.isapple() && (ret == :unknown)
-        ret = :osxblas
-    end
-    ret
-end
+_tryparse_env_int(key) = tryparse(Int, get(ENV, key, ""))
 
 
 """
@@ -124,52 +98,17 @@ end
 Set the number of threads the BLAS library should use equal to `n::Integer`.
 
 Also accepts `nothing`, in which case julia tries to guess the default number of threads.
-Passing `nothing` is discouraged and mainly exists for the following reason:
-
-On exotic variants of BLAS, `nothing` may be returned by `get_num_threads()`.
-Thus on exotic variants of BLAS, the following pattern may fail to set the number of threads:
-
-```julia
-old = get_num_threads()
-set_num_threads(1)
-@threads for i in 1:10
-    # single-threaded BLAS calls
-end
-set_num_threads(old)
-```
-Because `set_num_threads` accepts `nothing`, this code can still run
-on exotic variants of BLAS without error. Warnings will be raised instead.
-
-!!! compat "Julia 1.6"
-    `set_num_threads(::Nothing)` requires at least Julia 1.6.
-"""
-set_num_threads(n)::Nothing = _set_num_threads(n)
-
-function _set_num_threads(n::Integer; _blas = guess_vendor())
-    if _blas === :openblas || _blas == :openblas64
-        return ccall((@blasfunc(openblas_set_num_threads), libblas), Cvoid, (Cint,), n)
-    elseif _blas === :mkl
-        # MKL may let us set the number of threads in several ways
-        return ccall((:MKL_Set_Num_Threads, libblas), Cvoid, (Cint,), n)
-    elseif _blas === :osxblas
-        # OSX BLAS looks at an environment variable
-        ENV["VECLIB_MAXIMUM_THREADS"] = n
-    else
-        @assert _blas === :unknown
-        @warn "Failed to set number of BLAS threads." maxlog=1
-    end
-    return nothing
-end
-
-_tryparse_env_int(key) = tryparse(Int, get(ENV, key, ""))
-
-function _set_num_threads(::Nothing; _blas = guess_vendor())
-    n = something(
+Passing `nothing` is discouraged and mainly exists for historical reasons.
+"""
+set_num_threads(nt::Integer)::Nothing = lbt_set_num_threads(Int32(nt))
+function set_num_threads(::Nothing)
+    nt = something(
         _tryparse_env_int("OPENBLAS_NUM_THREADS"),
         _tryparse_env_int("OMP_NUM_THREADS"),
+        _tryparse_env_int("VECLIB_MAXIMUM_THREADS"),
         max(1, Sys.CPU_THREADS ÷ 2),
     )
-    _set_num_threads(n; _blas)
+    return set_num_threads(nt)
 end
 
 """
@@ -177,78 +116,49 @@ end
 
 Get the number of threads the BLAS library is using.
 
-On exotic variants of `BLAS` this function can fail, which is indicated by returning `nothing`.
-
 !!! compat "Julia 1.6"
     `get_num_threads` requires at least Julia 1.6.
 """
-get_num_threads(;_blas=guess_vendor())::Union{Int, Nothing} = _get_num_threads()
-
-function _get_num_threads(; _blas = guess_vendor())::Union{Int, Nothing}
-    if _blas === :openblas || _blas === :openblas64
-        return Int(ccall((@blasfunc(openblas_get_num_threads), libblas), Cint, ()))
-    elseif _blas === :mkl
-        return Int(ccall((:mkl_get_max_threads, libblas), Cint, ()))
-    elseif _blas === :osxblas
-        key = "VECLIB_MAXIMUM_THREADS"
-        nt = _tryparse_env_int(key)
-        if nt === nothing
-            @warn "Failed to read environment variable $key" maxlog=1
-        else
-            return nt
-        end
-    else
-        @assert _blas === :unknown
-    end
-    @warn "Could not get number of BLAS threads. Returning `nothing` instead." maxlog=1
-    return nothing
-end
+get_num_threads()::Int = lbt_get_num_threads()
 
-const _testmat = [1.0 0.0; 0.0 -1.0]
 function check()
-    blas = vendor()
-    if blas === :openblas || blas === :openblas64
-        openblas_config = openblas_get_config()
-        openblas64 = occursin(r".*USE64BITINT.*", openblas_config)
-        if Base.USE_BLAS64 != openblas64
-            if !openblas64
-                @error """
-                    OpenBLAS was not built with 64bit integer support.
-                    You're seeing this error because Julia was built with USE_BLAS64=1.
-                    Please rebuild Julia with USE_BLAS64=0"""
-            else
-                @error """
-                    Julia was not built with support for OpenBLAS with 64bit integer support.
-                    You're seeing this error because Julia was built with USE_BLAS64=0.
-                    Please rebuild Julia with USE_BLAS64=1"""
-            end
-            println("Quitting.")
-            exit()
-        end
-    elseif blas === :mkl
-        if Base.USE_BLAS64
-            ENV["MKL_INTERFACE_LAYER"] = "ILP64"
-        end
+    # TODO: once we have bitfields of the BLAS functions that are actually forwarded,
+    # ensure that we have a complete set here (warning on an incomplete BLAS implementation)
+    config = get_config()
+
+    # Ensure that one of our loaded libraries satisfies our interface requirement
+    interface = USE_BLAS64 ? :ilp64 : :lp64
+    if !any(lib.interface == interface for lib in config.loaded_libs)
+        interfacestr = uppercase(string(interface))
+        @error("No loaded BLAS libraries were built with $(interfacestr) support")
+        println("Quitting.")
+        exit()
     end
+end
 
-    #
-    # Check if BlasInt is the expected bitsize, by triggering an error
-    #
-    (_, info) = LinearAlgebra.LAPACK.potrf!('U', _testmat)
-    if info != 2 # mangled info code
-        if info == 2^33
-            error("BLAS and LAPACK are compiled with 32-bit integer support, but Julia expects 64-bit integers. Please build Julia with USE_BLAS64=0.")
-        elseif info == 0
-            error("BLAS and LAPACK are compiled with 64-bit integer support but Julia expects 32-bit integers. Please build Julia with USE_BLAS64=1.")
-        else
-            error("The LAPACK library produced an undefined error code. Please verify the installation of BLAS and LAPACK.")
-        end
+"Check that upper/lower (for special matrices) is correctly specified"
+function chkuplo(uplo::AbstractChar)
+    if !(uplo == 'U' || uplo == 'L')
+        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
     end
-
+    uplo
 end
 
-
 # Level 1
+# A help function to pick the pointer and inc for 1d like inputs.
+@inline function vec_pointer_stride(x::AbstractArray, stride0check = nothing)
+    isdense(x) && return pointer(x), 1 # simpify runtime check when possibe
+    ndims(x) == 1 || strides(x) == Base.size_to_strides(stride(x, 1), size(x)...) ||
+        throw(ArgumentError("only support vector like inputs"))
+    st = stride(x, 1)
+    isnothing(stride0check) || (st == 0 && throw(stride0check))
+    ptr = st > 0 ? pointer(x) : pointer(x, lastindex(x))
+    ptr, st
+end
+isdense(x) = x isa DenseArray
+isdense(x::Base.FastContiguousSubArray) = isdense(parent(x))
+isdense(x::Base.ReshapedArray) = isdense(parent(x))
+isdense(x::Base.ReinterpretArray) = isdense(parent(x))
 ## copy
 
 """
@@ -265,7 +175,7 @@ for (fname, elty) in ((:dcopy_,:Float64),
     @eval begin
         # SUBROUTINE DCOPY(N,DX,INCX,DY,INCY)
         function blascopy!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
                  n, DX, incx, DY, incy)
             DY
@@ -287,12 +197,12 @@ first `n` elements of array `Y` with stride `incy`. Returns `X` and `Y`.
 """
 function rot! end
 
-for (fname, elty, cty, sty, lib) in ((:drot_, :Float64, :Float64, :Float64, libblas),
-                                     (:srot_, :Float32, :Float32, :Float32, libblas),
-                                     (:zdrot_, :ComplexF64, :Float64, :Float64, libblas),
-                                     (:csrot_, :ComplexF32, :Float32, :Float32, libblas),
-                                     (:zrot_, :ComplexF64, :Float64, :ComplexF64, liblapack),
-                                     (:crot_, :ComplexF32, :Float32, :ComplexF32, liblapack))
+for (fname, elty, cty, sty, lib) in ((:drot_, :Float64, :Float64, :Float64, libblastrampoline),
+                                     (:srot_, :Float32, :Float32, :Float32, libblastrampoline),
+                                     (:zdrot_, :ComplexF64, :Float64, :Float64, libblastrampoline),
+                                     (:csrot_, :ComplexF32, :Float32, :Float32, libblastrampoline),
+                                     (:zrot_, :ComplexF64, :Float64, :ComplexF64, libblastrampoline),
+                                     (:crot_, :ComplexF32, :Float32, :ComplexF32, libblastrampoline))
     @eval begin
         # SUBROUTINE DROT(N,DX,INCX,DY,INCY,C,S)
         function rot!(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer, C::$cty, S::$sty)
@@ -308,15 +218,21 @@ end
 
 """
     scal!(n, a, X, incx)
+    scal!(a, X)
 
 Overwrite `X` with `a*X` for the first `n` elements of array `X` with stride `incx`. Returns `X`.
+
+If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
 """
 function scal! end
 
 """
     scal(n, a, X, incx)
+    scal(a, X)
 
 Return `X` scaled by `a` for the first `n` elements of array `X` with stride `incx`.
+
+If `n` and `incx` are not provided, `length(X)` and `stride(X,1)` are used.
 """
 function scal end
 
@@ -327,14 +243,21 @@ for (fname, elty) in ((:dscal_,:Float64),
     @eval begin
         # SUBROUTINE DSCAL(N,DA,DX,INCX)
         function scal!(n::Integer, DA::$elty, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
                   n, DA, DX, incx)
             DX
         end
+
+        function scal!(DA::$elty, DX::AbstractArray{$elty})
+            p, st = vec_pointer_stride(DX, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve DX scal!(length(DX), DA, p, abs(st))
+            DX
+        end
     end
 end
 scal(n, DA, DX, incx) = scal!(n, DA, copy(DX), incx)
+scal(DA, DX) = scal!(DA, copy(DX))
 
 ## dot
 
@@ -381,8 +304,8 @@ julia> BLAS.dotu(10, fill(1.0im, 10), 1, fill(1.0+im, 20), 2)
 """
 function dotu end
 
-for (fname, elty) in ((:ddot_,:Float64),
-                      (:sdot_,:Float32))
+for (fname, elty) in ((:cblas_ddot,:Float64),
+                      (:cblas_sdot,:Float32))
     @eval begin
                 #       DOUBLE PRECISION FUNCTION DDOT(N,DX,INCX,DY,INCY)
                 # *     .. Scalar Arguments ..
@@ -391,8 +314,8 @@ for (fname, elty) in ((:ddot_,:Float64),
                 # *     .. Array Arguments ..
                 #       DOUBLE PRECISION DX(*),DY(*)
         function dot(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblas), $elty,
-                (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
+            ccall((@blasfunc($fname), libblastrampoline), $elty,
+                (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt),
                  n, DX, incx, DY, incy)
         end
     end
@@ -408,7 +331,7 @@ for (fname, elty) in ((:cblas_zdotc_sub,:ComplexF64),
                 #       DOUBLE PRECISION DX(*),DY(*)
         function dotc(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
             result = Ref{$elty}()
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}),
                  n, DX, incx, DY, incy, result)
             result[]
@@ -426,7 +349,7 @@ for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64),
                 #       DOUBLE PRECISION DX(*),DY(*)
         function dotu(n::Integer, DX::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer, DY::Union{Ptr{$elty},AbstractArray{$elty}}, incy::Integer)
             result = Ref{$elty}()
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}, BlasInt, Ptr{$elty}),
                  n, DX, incx, DY, incy, result)
             result[]
@@ -434,29 +357,16 @@ for (fname, elty) in ((:cblas_zdotu_sub,:ComplexF64),
     end
 end
 
-function dot(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasReal
-    require_one_based_indexing(DX, DY)
-    n = length(DX)
-    if n != length(DY)
-        throw(DimensionMismatch("dot product arguments have lengths $(length(DX)) and $(length(DY))"))
-    end
-    return dot(n, DX, stride(DX, 1), DY, stride(DY, 1))
-end
-function dotc(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasComplex
-    require_one_based_indexing(DX, DY)
-    n = length(DX)
-    if n != length(DY)
-        throw(DimensionMismatch("dot product arguments have lengths $(length(DX)) and $(length(DY))"))
-    end
-    return dotc(n, DX, stride(DX, 1), DY, stride(DY, 1))
-end
-function dotu(DX::Union{DenseArray{T},AbstractVector{T}}, DY::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasComplex
-    require_one_based_indexing(DX, DY)
-    n = length(DX)
-    if n != length(DY)
-        throw(DimensionMismatch("dot product arguments have lengths $(length(DX)) and $(length(DY))"))
+for (elty, f) in ((Float32, :dot), (Float64, :dot),
+                  (ComplexF32, :dotc), (ComplexF64, :dotc),
+                  (ComplexF32, :dotu), (ComplexF64, :dotu))
+    @eval begin
+        function $f(x::AbstractArray{$elty}, y::AbstractArray{$elty})
+            n, m = length(x), length(y)
+            n == m || throw(DimensionMismatch(lazy"dot product arguments have lengths $n and $m"))
+            GC.@preserve x y $f(n, vec_pointer_stride(x)..., vec_pointer_stride(y)...)
+        end
     end
-    return dotu(n, DX, stride(DX, 1), DY, stride(DY, 1))
 end
 
 ## nrm2
@@ -484,13 +394,17 @@ for (fname, elty, ret_type) in ((:dnrm2_,:Float64,:Float64),
     @eval begin
         # SUBROUTINE DNRM2(N,X,INCX)
         function nrm2(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblas), $ret_type,
+            ccall((@blasfunc($fname), libblastrampoline), $ret_type,
                 (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
                  n, X, incx)
         end
     end
 end
-nrm2(x::Union{AbstractVector,DenseArray}) = nrm2(length(x), x, stride1(x))
+# openblas returns 0 for negative stride
+function nrm2(x::AbstractArray)
+    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    GC.@preserve x nrm2(length(x), p, abs(st))
+end
 
 ## asum
 
@@ -521,13 +435,16 @@ for (fname, elty, ret_type) in ((:dasum_,:Float64,:Float64),
     @eval begin
         # SUBROUTINE ASUM(N, X, INCX)
         function asum(n::Integer, X::Union{Ptr{$elty},AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblas), $ret_type,
+            ccall((@blasfunc($fname), libblastrampoline), $ret_type,
                 (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
                  n, X, incx)
         end
     end
 end
-asum(x::Union{AbstractVector,DenseArray}) = asum(length(x), x, stride1(x))
+function asum(x::AbstractArray)
+    p, st = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    GC.@preserve x asum(length(x), p, abs(st))
+end
 
 ## axpy
 
@@ -538,15 +455,13 @@ Overwrite `Y` with `X*a + Y`, where `a` is a scalar. Return `Y`.
 
 # Examples
 ```jldoctest
-julia> x = [1; 2; 3];
+julia> x = [1.; 2; 3];
 
-julia> y = [4; 5; 6];
+julia> y = [4. ;; 5 ;; 6];
 
 julia> BLAS.axpy!(2, x, y)
-3-element Vector{Int64}:
-  6
-  9
- 12
+1×3 Matrix{Float64}:
+ 6.0  9.0  12.0
 ```
 """
 function axpy! end
@@ -564,37 +479,40 @@ for (fname, elty) in ((:daxpy_,:Float64),
                 #*     .. Array Arguments ..
                 #      DOUBLE PRECISION DX(*),DY(*)
         function axpy!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer, dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
                  n, alpha, dx, incx, dy, incy)
             dy
         end
     end
 end
-function axpy!(alpha::Number, x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where T<:BlasFloat
+
+function axpy!(alpha::Number, x::AbstractArray{T}, y::AbstractArray{T}) where T<:BlasFloat
     if length(x) != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
-    return axpy!(length(x), convert(T,alpha), x, stride(x, 1), y, stride(y, 1))
+    GC.@preserve x y axpy!(length(x), T(alpha), vec_pointer_stride(x)...,
+        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
+    y
 end
 
-function axpy!(alpha::Number, x::Array{T}, rx::Union{UnitRange{Ti},AbstractRange{Ti}},
-               y::Array{T}, ry::Union{UnitRange{Ti},AbstractRange{Ti}}) where {T<:BlasFloat,Ti<:Integer}
+function axpy!(alpha::Number, x::Array{T}, rx::AbstractRange{Ti},
+               y::Array{T}, ry::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
     if length(rx) != length(ry)
         throw(DimensionMismatch("ranges of differing lengths"))
     end
     if minimum(rx) < 1 || maximum(rx) > length(x)
-        throw(ArgumentError("range out of bounds for x, of length $(length(x))"))
+        throw(ArgumentError(lazy"range out of bounds for x, of length $(length(x))"))
     end
     if minimum(ry) < 1 || maximum(ry) > length(y)
-        throw(ArgumentError("range out of bounds for y, of length $(length(y))"))
+        throw(ArgumentError(lazy"range out of bounds for y, of length $(length(y))"))
     end
     GC.@preserve x y axpy!(
         length(rx),
-        convert(T, alpha),
-        pointer(x) + (first(rx) - 1)*sizeof(T),
+        T(alpha),
+        pointer(x, minimum(rx)),
         step(rx),
-        pointer(y) + (first(ry) - 1)*sizeof(T),
+        pointer(y, minimum(ry)),
         step(ry))
 
     return y
@@ -633,7 +551,7 @@ for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32),
         function axpby!(n::Integer, alpha::($elty), dx::Union{Ptr{$elty},
                         AbstractArray{$elty}}, incx::Integer, beta::($elty),
                         dy::Union{Ptr{$elty}, AbstractArray{$elty}}, incy::Integer)
-            ccall((@blasfunc($fname), libblas), Cvoid, (Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid, (Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
                 n, alpha, dx, incx, beta, dy, incy)
             dy
@@ -641,12 +559,14 @@ for (fname, elty) in ((:daxpby_,:Float64), (:saxpby_,:Float32),
     end
 end
 
-function axpby!(alpha::Number, x::Union{DenseArray{T},AbstractVector{T}}, beta::Number, y::Union{DenseArray{T},AbstractVector{T}}) where T<:BlasFloat
+function axpby!(alpha::Number, x::AbstractArray{T}, beta::Number, y::AbstractArray{T}) where T<:BlasFloat
     require_one_based_indexing(x, y)
     if length(x) != length(y)
-        throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(length(x)), but y has length $(length(y))"))
     end
-    return axpby!(length(x), convert(T, alpha), x, stride(x, 1), convert(T, beta), y, stride(y, 1))
+    GC.@preserve x y axpby!(length(x), T(alpha), vec_pointer_stride(x)..., T(beta),
+        vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))...)
+    y
 end
 
 ## iamax
@@ -656,13 +576,17 @@ for (fname, elty) in ((:idamax_,:Float64),
                       (:icamax_,:ComplexF32))
     @eval begin
         function iamax(n::Integer, dx::Union{Ptr{$elty}, AbstractArray{$elty}}, incx::Integer)
-            ccall((@blasfunc($fname), libblas),BlasInt,
+            ccall((@blasfunc($fname), libblastrampoline),BlasInt,
                 (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
                 n, dx, incx)
         end
     end
 end
-iamax(dx::Union{AbstractVector,DenseArray}) = iamax(length(dx), dx, stride1(dx))
+function iamax(dx::AbstractArray)
+    p, st = vec_pointer_stride(dx)
+    st <= 0 && return BlasInt(0)
+    iamax(length(dx), p, st)
+end
 
 """
     iamax(n, dx, incx)
@@ -694,20 +618,31 @@ for (fname, elty) in ((:dgemv_,:Float64),
             require_one_based_indexing(A, X, Y)
             m,n = size(A,1),size(A,2)
             if trans == 'N' && (length(X) != n || length(Y) != m)
-                throw(DimensionMismatch("A has dimensions $(size(A)), X has length $(length(X)) and Y has length $(length(Y))"))
+                throw(DimensionMismatch(lazy"A has dimensions $(size(A)), X has length $(length(X)) and Y has length $(length(Y))"))
             elseif trans == 'C' && (length(X) != m || length(Y) != n)
-                throw(DimensionMismatch("the adjoint of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
+                throw(DimensionMismatch(lazy"the adjoint of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
             elseif trans == 'T' && (length(X) != m || length(Y) != n)
-                throw(DimensionMismatch("the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
+                throw(DimensionMismatch(lazy"the transpose of A has dimensions $n, $m, X has length $(length(X)) and Y has length $(length(Y))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            lda = stride(A,2)
+            pX, sX = vec_pointer_stride(X, ArgumentError("input vector with 0 stride is not allowed"))
+            pY, sY = vec_pointer_stride(Y, ArgumentError("dest vector with 0 stride is not allowed"))
+            pA = pointer(A)
+            if lda < 0
+                pA += (size(A, 2) - 1) * lda * sizeof($elty)
+                lda = -lda
+                trans == 'N' ? (sX = -sX) : (sY = -sY)
+            end
+            lda >= size(A,1) || size(A,2) <= 1 || error("when `size(A,2) > 1`, `abs(stride(A,2))` must be at least `size(A,1)`")
+            lda = max(1, size(A,1), lda)
+            GC.@preserve A X Y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
+                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  trans, size(A,1), size(A,2), alpha,
-                 A, max(1,stride(A,2)), X, stride(X,1),
-                 beta, Y, stride(Y,1))
+                 pA, lda, pX, sX,
+                 beta, pY, sY, 1)
             Y
         end
         function gemv(trans::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, X::AbstractVector{$elty})
@@ -781,14 +716,16 @@ for (fname, elty) in ((:dgbmv_,:Float64),
                        y::AbstractVector{$elty})
             require_one_based_indexing(A, x, y)
             chkstride1(A)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}),
+                 Ref{BlasInt}, Clong),
                  trans, m, size(A,2), kl,
                  ku, alpha, A, max(1,stride(A,2)),
-                 x, stride(x,1), beta, y, stride(y,1))
+                 px, stx, beta, py, sty, 1)
             y
         end
         function gbmv(trans::AbstractChar, m::Integer, kl::Integer, ku::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -813,10 +750,10 @@ Only the [`ul`](@ref stdlib-blas-uplo) triangle of `A` is used.
 """
 function symv! end
 
-for (fname, elty, lib) in ((:dsymv_,:Float64,libblas),
-                           (:ssymv_,:Float32,libblas),
-                           (:zsymv_,:ComplexF64,liblapack),
-                           (:csymv_,:ComplexF32,liblapack))
+for (fname, elty, lib) in ((:dsymv_,:Float64,libblastrampoline),
+                           (:ssymv_,:Float32,libblastrampoline),
+                           (:zsymv_,:ComplexF64,libblastrampoline),
+                           (:csymv_,:ComplexF32,libblastrampoline))
     # Note that the complex symv are not BLAS but auiliary functions in LAPACK
     @eval begin
              #      SUBROUTINE DSYMV(UPLO,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
@@ -829,25 +766,28 @@ for (fname, elty, lib) in ((:dsymv_,:Float64,libblas),
         function symv!(uplo::AbstractChar, alpha::Union{($elty), Bool},
                        A::AbstractMatrix{$elty}, x::AbstractVector{$elty},
                        beta::Union{($elty), Bool}, y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             m, n = size(A)
             if m != n
-                throw(DimensionMismatch("matrix A is $m by $n but must be square"))
+                throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square"))
             end
             if n != length(x)
-                throw(DimensionMismatch("A has size $(size(A)), and x has length $(length(x))"))
+                throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))"))
             end
             if m != length(y)
-                throw(DimensionMismatch("A has size $(size(A)), and y has length $(length(y))"))
+                throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), $lib), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), $lib), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}),
+                 Ptr{$elty}, Ref{BlasInt}, Clong),
                  uplo, n, alpha, A,
-                 max(1,stride(A,2)), x, stride(x,1), beta,
-                 y, stride(y,1))
+                 max(1,stride(A,2)), px, stx, beta,
+                 py, sty, 1)
             y
         end
         function symv(uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -890,28 +830,29 @@ for (fname, elty) in ((:zhemv_,:ComplexF64),
                       (:chemv_,:ComplexF32))
     @eval begin
         function hemv!(uplo::AbstractChar, α::Union{$elty, Bool}, A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, β::Union{$elty, Bool}, y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             m, n = size(A)
             if m != n
-                throw(DimensionMismatch("matrix A is $m by $n but must be square"))
+                throw(DimensionMismatch(lazy"matrix A is $m by $n but must be square"))
             end
             if n != length(x)
-                throw(DimensionMismatch("A has size $(size(A)), and x has length $(length(x))"))
+                throw(DimensionMismatch(lazy"A has size $(size(A)), and x has length $(length(x))"))
             end
             if m != length(y)
-                throw(DimensionMismatch("A has size $(size(A)), and y has length $(length(y))"))
+                throw(DimensionMismatch(lazy"A has size $(size(A)), and y has length $(length(y))"))
             end
             chkstride1(A)
             lda = max(1, stride(A, 2))
-            incx = stride(x, 1)
-            incy = stride(y, 1)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}),
+                 Ptr{$elty}, Ref{BlasInt}, Clong),
                 uplo, n, α, A,
-                lda, x, incx, β,
-                y, incy)
+                lda, px, stx, β,
+                py, sty, 1)
             y
         end
         function hemv(uplo::AbstractChar, α::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -962,7 +903,7 @@ for (fname, elty) in ((:zhpmv_, :ComplexF64),
                        y::Union{Ptr{$elty}, AbstractArray{$elty}},
                        incy::Integer)
 
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                   (Ref{UInt8},     # uplo,
                    Ref{BlasInt},   # n,
                    Ref{$elty},     # α,
@@ -971,7 +912,8 @@ for (fname, elty) in ((:zhpmv_, :ComplexF64),
                    Ref{BlasInt},   # incx,
                    Ref{$elty},     # β,
                    Ptr{$elty},     # y, output
-                   Ref{BlasInt}),  # incy
+                   Ref{BlasInt},   # incy
+                   Clong),         # length of uplo
                   uplo,
                   n,
                   α,
@@ -980,24 +922,29 @@ for (fname, elty) in ((:zhpmv_, :ComplexF64),
                   incx,
                   β,
                   y,
-                  incy)
+                  incy,
+                  1)
             return y
         end
     end
 end
 
 function hpmv!(uplo::AbstractChar,
-               α::Number, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}},
-               β::Number, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasComplex}
+               α::Number, AP::AbstractArray{T}, x::AbstractArray{T},
+               β::Number, y::AbstractArray{T}) where {T <: BlasComplex}
     require_one_based_indexing(AP, x, y)
     N = length(x)
     if N != length(y)
-        throw(DimensionMismatch("x has length $(N), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
     end
     if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch("Packed Hermitian matrix A has size smaller than length(x) =  $(N)."))
+        throw(DimensionMismatch(lazy"Packed hermitian matrix A has size smaller than length(x) = $(N)."))
     end
-    return hpmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1))
+    chkstride1(AP)
+    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+    GC.@preserve x y hpmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
+    y
 end
 
 """
@@ -1036,15 +983,18 @@ for (fname, elty) in ((:dsbmv_,:Float64),
              # *     .. Array Arguments ..
              #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
         function sbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             chkstride1(A)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
+                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), x, stride(x,1),
-                 beta, y, stride(y,1))
+                 A, max(1,stride(A,2)), px, stx,
+                 beta, py, sty, 1)
             y
         end
         function sbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1110,7 +1060,7 @@ for (fname, elty) in ((:dspmv_, :Float64),
                        y::Union{Ptr{$elty}, AbstractArray{$elty}},
                        incy::Integer)
 
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                   (Ref{UInt8},     # uplo,
                    Ref{BlasInt},   # n,
                    Ref{$elty},     # α,
@@ -1119,7 +1069,8 @@ for (fname, elty) in ((:dspmv_, :Float64),
                    Ref{BlasInt},   # incx,
                    Ref{$elty},     # β,
                    Ptr{$elty},     # y, out
-                   Ref{BlasInt}),  # incy
+                   Ref{BlasInt},   # incy
+                   Clong),         # length of uplo
                   uplo,
                   n,
                   α,
@@ -1128,24 +1079,29 @@ for (fname, elty) in ((:dspmv_, :Float64),
                   incx,
                   β,
                   y,
-                  incy)
+                  incy,
+                  1)
             return y
         end
     end
 end
 
 function spmv!(uplo::AbstractChar,
-               α::Real, AP::Union{DenseArray{T}, AbstractVector{T}}, x::Union{DenseArray{T}, AbstractVector{T}},
-               β::Real, y::Union{DenseArray{T}, AbstractVector{T}}) where {T <: BlasReal}
+               α::Real, AP::AbstractArray{T}, x::AbstractArray{T},
+               β::Real, y::AbstractArray{T}) where {T <: BlasReal}
     require_one_based_indexing(AP, x, y)
     N = length(x)
     if N != length(y)
-        throw(DimensionMismatch("x has length $(N), but y has length $(length(y))"))
+        throw(DimensionMismatch(lazy"x has length $(N), but y has length $(length(y))"))
     end
     if 2*length(AP) < N*(N + 1)
-        throw(DimensionMismatch("Packed symmetric matrix A has size smaller than length(x) = $(N)."))
+        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
     end
-    return spmv!(uplo, N, convert(T, α), AP, x, stride(x, 1), convert(T, β), y, stride(y, 1))
+    chkstride1(AP)
+    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+    GC.@preserve x y spmv!(uplo, N, T(α), AP, px, stx, T(β), py, sty)
+    y
 end
 
 """
@@ -1172,6 +1128,74 @@ Return the updated `y`.
 """
 spmv!
 
+### spr!, (SP) symmetric packed matrix-vector operation defined as A := alpha*x*x' + A
+for (fname, elty) in ((:dspr_, :Float64),
+                      (:sspr_, :Float32))
+    @eval begin
+        function spr!(uplo::AbstractChar,
+                      n::Integer,
+                      α::$elty,
+                      x::Union{Ptr{$elty}, AbstractArray{$elty}},
+                      incx::Integer,
+                      AP::Union{Ptr{$elty}, AbstractArray{$elty}})
+
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                  (Ref{UInt8},     # uplo,
+                   Ref{BlasInt},   # n,
+                   Ref{$elty},     # α,
+                   Ptr{$elty},     # x,
+                   Ref{BlasInt},   # incx,
+                   Ptr{$elty},     # AP,
+                   Clong),         # length of uplo
+                  uplo,
+                  n,
+                  α,
+                  x,
+                  incx,
+                  AP,
+                  1)
+            return AP
+        end
+    end
+end
+
+function spr!(uplo::AbstractChar,
+              α::Real, x::AbstractArray{T},
+              AP::AbstractArray{T}) where {T <: BlasReal}
+    chkuplo(uplo)
+    require_one_based_indexing(AP, x)
+    N = length(x)
+    if 2*length(AP) < N*(N + 1)
+        throw(DimensionMismatch(lazy"Packed symmetric matrix A has size smaller than length(x) = $(N)."))
+    end
+    chkstride1(AP)
+    px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+    return GC.@preserve x spr!(uplo, N, T(α), px, stx , AP)
+end
+
+"""
+    spr!(uplo, α, x, AP)
+
+Update matrix `A` as `A+α*x*x'`, where `A` is a symmetric matrix provided
+in packed format `AP` and `x` is a vector.
+
+With `uplo = 'U'`, the array AP must contain the upper triangular part of the
+symmetric matrix packed sequentially, column by column, so that `AP[1]`
+contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[1, 2]` and `A[2, 2]`
+respectively, and so on.
+
+With `uplo = 'L'`, the array AP must contain the lower triangular part of the
+symmetric matrix packed sequentially, column by column, so that `AP[1]`
+contains `A[1, 1]`, `AP[2]` and `AP[3]` contain `A[2, 1]` and `A[3, 1]`
+respectively, and so on.
+
+The scalar input `α` must be real.
+
+The array inputs `x` and `AP` must all be of `Float32` or `Float64` type.
+Return the updated `AP`.
+"""
+spr!
+
 ### hbmv, (HB) Hermitian banded matrix-vector multiplication
 for (fname, elty) in ((:zhbmv_,:ComplexF64),
                       (:chbmv_,:ComplexF32))
@@ -1184,15 +1208,18 @@ for (fname, elty) in ((:zhbmv_,:ComplexF64),
              # *     .. Array Arguments ..
              #       DOUBLE PRECISION A(LDA,*),X(*),Y(*)
         function hbmv!(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty}, beta::($elty), y::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x, y)
             chkstride1(A)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("dest vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
+                 Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Clong),
                  uplo, size(A,2), k, alpha,
-                 A, max(1,stride(A,2)), x, stride(x,1),
-                 beta, y, stride(y,1))
+                 A, max(1,stride(A,2)), px, stx,
+                 beta, py, sty, 1)
             y
         end
         function hbmv(uplo::AbstractChar, k::Integer, alpha::($elty), A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1240,17 +1267,20 @@ for (fname, elty) in ((:dtrmv_,:Float64),
                 # *     .. Array Arguments ..
                 #       DOUBLE PRECISION A(LDA,*),X(*)
         function trmv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if n != length(x)
-                throw(DimensionMismatch("A has size ($n,$n), x has length $(length(x))"))
+                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
+                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                 Clong, Clong, Clong),
                  uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), x, max(1,stride(x, 1)))
+                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
             x
         end
         function trmv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1294,17 +1324,20 @@ for (fname, elty) in ((:dtrsv_,:Float64),
                 #       .. Array Arguments ..
                 #       DOUBLE PRECISION A(LDA,*),X(*)
         function trsv!(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if n != length(x)
-                throw(DimensionMismatch("size of A is $n != length(x) = $(length(x))"))
+                throw(DimensionMismatch(lazy"size of A is $n != length(x) = $(length(x))"))
             end
             chkstride1(A)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
+                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                 Clong, Clong, Clong),
                  uplo, trans, diag, n,
-                 A, max(1,stride(A,2)), x, stride(x, 1))
+                 A, max(1,stride(A,2)), px, stx, 1, 1, 1)
             x
         end
         function trsv(uplo::AbstractChar, trans::AbstractChar, diag::AbstractChar, A::AbstractMatrix{$elty}, x::AbstractVector{$elty})
@@ -1331,15 +1364,15 @@ for (fname, elty) in ((:dger_,:Float64),
             require_one_based_indexing(A, x, y)
             m, n = size(A)
             if m != length(x) || n != length(y)
-                throw(DimensionMismatch("A has size ($m,$n), x has length $(length(x)), y has length $(length(y))"))
+                throw(DimensionMismatch(lazy"A has size ($m,$n), x has length $(length(x)), y has length $(length(y))"))
             end
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            py, sty = vec_pointer_stride(y, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x y ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                  Ref{BlasInt}),
-                 m, n, α, x,
-                 stride(x, 1), y, stride(y, 1), A,
-                 max(1,stride(A,2)))
+                 m, n, α, px, stx, py, sty, A, max(1,stride(A,2)))
             A
         end
     end
@@ -1355,22 +1388,23 @@ Rank-1 update of the symmetric matrix `A` with vector `x` as `alpha*x*transpose(
 """
 function syr! end
 
-for (fname, elty, lib) in ((:dsyr_,:Float64,libblas),
-                           (:ssyr_,:Float32,libblas),
-                           (:zsyr_,:ComplexF64,liblapack),
-                           (:csyr_,:ComplexF32,liblapack))
+for (fname, elty, lib) in ((:dsyr_,:Float64,libblastrampoline),
+                           (:ssyr_,:Float32,libblastrampoline),
+                           (:zsyr_,:ComplexF64,libblastrampoline),
+                           (:csyr_,:ComplexF32,libblastrampoline))
     @eval begin
         function syr!(uplo::AbstractChar, α::$elty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if length(x) != n
-                throw(DimensionMismatch("A has size ($n,$n), x has length $(length(x))"))
+                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
             end
-            ccall((@blasfunc($fname), $lib), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), $lib), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 uplo, n, α, x,
-                 stride(x, 1), A, max(1,stride(A, 2)))
+                 uplo, n, α, px, stx, A, max(1,stride(A, 2)))
             A
         end
     end
@@ -1391,16 +1425,17 @@ for (fname, elty, relty) in ((:zher_,:ComplexF64, :Float64),
                              (:cher_,:ComplexF32, :Float32))
     @eval begin
         function her!(uplo::AbstractChar, α::$relty, x::AbstractVector{$elty}, A::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, x)
             n = checksquare(A)
             if length(x) != n
-                throw(DimensionMismatch("A has size ($n,$n), x has length $(length(x))"))
+                throw(DimensionMismatch(lazy"A has size ($n,$n), x has length $(length(x))"))
             end
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            px, stx = vec_pointer_stride(x, ArgumentError("input vector with 0 stride is not allowed"))
+            GC.@preserve x ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 uplo, n, α, x,
-                 stride(x, 1), A, max(1,stride(A,2)))
+                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Clong),
+                 uplo, n, α, px, stx, A, max(1,stride(A,2)), 1)
             A
         end
     end
@@ -1444,20 +1479,20 @@ for (gemm, elty) in
             kb = size(B, transB == 'N' ? 1 : 2)
             n = size(B, transB == 'N' ? 2 : 1)
             if ka != kb || m != size(C,1) || n != size(C,2)
-                throw(DimensionMismatch("A has size ($m,$ka), B has size ($kb,$n), C has size $(size(C))"))
+                throw(DimensionMismatch(lazy"A has size ($m,$ka), B has size ($kb,$n), C has size $(size(C))"))
             end
             chkstride1(A)
             chkstride1(B)
             chkstride1(C)
-            ccall((@blasfunc($gemm), libblas), Cvoid,
+            ccall((@blasfunc($gemm), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}),
+                 Ref{BlasInt}, Clong, Clong),
                  transA, transB, m, n,
                  ka, alpha, A, max(1,stride(A,2)),
                  B, max(1,stride(B,2)), beta, C,
-                 max(1,stride(C,2)))
+                 max(1,stride(C,2)), 1, 1)
             C
         end
         function gemm(transA::AbstractChar, transB::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
@@ -1500,25 +1535,28 @@ for (mfname, elty) in ((:dsymm_,:Float64),
         function symm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
                        A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
                        beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B, C)
             m, n = size(C)
             j = checksquare(A)
             if j != (side == 'L' ? m : n)
-                throw(DimensionMismatch("A has size $(size(A)), C has size ($m,$n)"))
+                throw(DimensionMismatch(lazy"A has size $(size(A)), C has size ($m,$n)"))
             end
             if size(B,2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs to match second dimension of C, $n"))
+                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs to match second dimension of C, $n"))
             end
             chkstride1(A)
             chkstride1(B)
             chkstride1(C)
-            ccall((@blasfunc($mfname), libblas), Cvoid,
+            ccall((@blasfunc($mfname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
+                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
+                 Clong, Clong),
                  side, uplo, m, n,
                  alpha, A, max(1,stride(A,2)), B,
-                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)))
+                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)),
+                 1, 1)
             C
         end
         function symm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
@@ -1571,25 +1609,28 @@ for (mfname, elty) in ((:zhemm_,:ComplexF64),
         function hemm!(side::AbstractChar, uplo::AbstractChar, alpha::Union{($elty), Bool},
                        A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty},
                        beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B, C)
             m, n = size(C)
             j = checksquare(A)
             if j != (side == 'L' ? m : n)
-                throw(DimensionMismatch("A has size $(size(A)), C has size ($m,$n)"))
+                throw(DimensionMismatch(lazy"A has size $(size(A)), C has size ($m,$n)"))
             end
             if size(B,2) != n
-                throw(DimensionMismatch("B has second dimension $(size(B,2)) but needs to match second dimension of C, $n"))
+                throw(DimensionMismatch(lazy"B has second dimension $(size(B,2)) but needs to match second dimension of C, $n"))
             end
             chkstride1(A)
             chkstride1(B)
             chkstride1(C)
-            ccall((@blasfunc($mfname), libblas), Cvoid,
+            ccall((@blasfunc($mfname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}),
+                 Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt},
+                 Clong, Clong),
                  side, uplo, m, n,
                  alpha, A, max(1,stride(A,2)), B,
-                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)))
+                 max(1,stride(B,2)), beta, C, max(1,stride(C,2)),
+                 1, 1)
             C
         end
         function hemm(side::AbstractChar, uplo::AbstractChar, alpha::($elty), A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
@@ -1652,31 +1693,32 @@ for (fname, elty) in ((:dsyrk_,:Float64),
                       (:ssyrk_,:Float32),
                       (:zsyrk_,:ComplexF64),
                       (:csyrk_,:ComplexF32))
-   @eval begin
-       # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-       # *     .. Scalar Arguments ..
-       #       REAL ALPHA,BETA
-       #       INTEGER K,LDA,LDC,N
-       #       CHARACTER TRANS,UPLO
-       # *     .. Array Arguments ..
-       #       REAL A(LDA,*),C(LDC,*)
-       function syrk!(uplo::AbstractChar, trans::AbstractChar,
+    @eval begin
+        # SUBROUTINE DSYRK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
+        # *     .. Scalar Arguments ..
+        #       REAL ALPHA,BETA
+        #       INTEGER K,LDA,LDC,N
+        #       CHARACTER TRANS,UPLO
+        # *     .. Array Arguments ..
+        #       REAL A(LDA,*),C(LDC,*)
+        function syrk!(uplo::AbstractChar, trans::AbstractChar,
                       alpha::Union{($elty), Bool}, A::AbstractVecOrMat{$elty},
                       beta::Union{($elty), Bool}, C::AbstractMatrix{$elty})
-           require_one_based_indexing(A, C)
-           n = checksquare(C)
-           nn = size(A, trans == 'N' ? 1 : 2)
-           if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
-           k  = size(A, trans == 'N' ? 2 : 1)
-           chkstride1(A)
-           chkstride1(C)
-           ccall((@blasfunc($fname), libblas), Cvoid,
-                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                  Ptr{$elty}, Ref{BlasInt}),
-                 uplo, trans, n, k,
-                 alpha, A, max(1,stride(A,2)), beta,
-                 C, max(1,stride(C,2)))
+            chkuplo(uplo)
+            require_one_based_indexing(A, C)
+            n = checksquare(C)
+            nn = size(A, trans == 'N' ? 1 : 2)
+            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
+            k  = size(A, trans == 'N' ? 2 : 1)
+            chkstride1(A)
+            chkstride1(C)
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                  (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
+                   Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
+                  uplo, trans, n, k,
+                  alpha, A, max(1,stride(A,2)), beta,
+                  C, max(1,stride(C,2)), 1, 1)
             C
         end
     end
@@ -1707,42 +1749,43 @@ function herk end
 
 for (fname, elty, relty) in ((:zherk_, :ComplexF64, :Float64),
                              (:cherk_, :ComplexF32, :Float32))
-   @eval begin
-       # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
-       # *     .. Scalar Arguments ..
-       #       REAL ALPHA,BETA
-       #       INTEGER K,LDA,LDC,N
-       #       CHARACTER TRANS,UPLO
-       # *     ..
-       # *     .. Array Arguments ..
-       #       COMPLEX A(LDA,*),C(LDC,*)
-       function herk!(uplo::AbstractChar, trans::AbstractChar,
-                      α::Union{$relty, Bool}, A::AbstractVecOrMat{$elty},
-                      β::Union{$relty, Bool}, C::AbstractMatrix{$elty})
-           require_one_based_indexing(A, C)
-           n = checksquare(C)
-           nn = size(A, trans == 'N' ? 1 : 2)
-           if nn != n
-               throw(DimensionMismatch("the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))"))
-           end
-           chkstride1(A)
-           chkstride1(C)
-           k  = size(A, trans == 'N' ? 2 : 1)
-           ccall((@blasfunc($fname), libblas), Cvoid,
-                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                  Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty},
-                  Ptr{$elty}, Ref{BlasInt}),
-                 uplo, trans, n, k,
-                 α, A, max(1,stride(A,2)), β,
-                 C, max(1,stride(C,2)))
-           C
-       end
-       function herk(uplo::AbstractChar, trans::AbstractChar, α::$relty, A::AbstractVecOrMat{$elty})
-           n = size(A, trans == 'N' ? 1 : 2)
-           herk!(uplo, trans, α, A, zero($relty), similar(A, (n,n)))
-       end
-       herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A)
-   end
+    @eval begin
+        # SUBROUTINE CHERK(UPLO,TRANS,N,K,ALPHA,A,LDA,BETA,C,LDC)
+        # *     .. Scalar Arguments ..
+        #       REAL ALPHA,BETA
+        #       INTEGER K,LDA,LDC,N
+        #       CHARACTER TRANS,UPLO
+        # *     ..
+        # *     .. Array Arguments ..
+        #       COMPLEX A(LDA,*),C(LDC,*)
+        function herk!(uplo::AbstractChar, trans::AbstractChar,
+                        α::Union{$relty, Bool}, A::AbstractVecOrMat{$elty},
+                        β::Union{$relty, Bool}, C::AbstractMatrix{$elty})
+            chkuplo(uplo)
+            require_one_based_indexing(A, C)
+            n = checksquare(C)
+            nn = size(A, trans == 'N' ? 1 : 2)
+            if nn != n
+                throw(DimensionMismatch(lazy"the matrix to update has dimension $n but the implied dimension of the update is $(size(A, trans == 'N' ? 1 : 2))"))
+            end
+            chkstride1(A)
+            chkstride1(C)
+            k  = size(A, trans == 'N' ? 2 : 1)
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                    Ref{$relty}, Ptr{$elty}, Ref{BlasInt}, Ref{$relty},
+                    Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
+                    uplo, trans, n, k,
+                    α, A, max(1,stride(A,2)), β,
+                    C, max(1,stride(C,2)), 1, 1)
+            C
+        end
+        function herk(uplo::AbstractChar, trans::AbstractChar, α::$relty, A::AbstractVecOrMat{$elty})
+            n = size(A, trans == 'N' ? 1 : 2)
+            herk!(uplo, trans, α, A, zero($relty), similar(A, (n,n)))
+        end
+        herk(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty}) = herk(uplo, trans, one($relty), A)
+    end
 end
 
 ## syr2k
@@ -1751,33 +1794,34 @@ for (fname, elty) in ((:dsyr2k_,:Float64),
                       (:zsyr2k_,:ComplexF64),
                       (:csyr2k_,:ComplexF32))
     @eval begin
-             #       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-             #
-             #       .. Scalar Arguments ..
-             #       REAL PRECISION ALPHA,BETA
-             #       INTEGER K,LDA,LDB,LDC,N
-             #       CHARACTER TRANS,UPLO
-             #       ..
-             #       .. Array Arguments ..
-             #       REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
+            #       SUBROUTINE DSYR2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+            #
+            #       .. Scalar Arguments ..
+            #       REAL PRECISION ALPHA,BETA
+            #       INTEGER K,LDA,LDB,LDC,N
+            #       CHARACTER TRANS,UPLO
+            #       ..
+            #       .. Array Arguments ..
+            #       REAL PRECISION A(LDA,*),B(LDB,*),C(LDC,*)
         function syr2k!(uplo::AbstractChar, trans::AbstractChar,
                         alpha::($elty), A::AbstractVecOrMat{$elty}, B::AbstractVecOrMat{$elty},
                         beta::($elty), C::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B, C)
             n = checksquare(C)
             nn = size(A, trans == 'N' ? 1 : 2)
-            if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
+            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
             k  = size(A, trans == 'N' ? 2 : 1)
             chkstride1(A)
             chkstride1(B)
             chkstride1(C)
-            ccall((@blasfunc($fname), libblas), Cvoid,
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty},
-                 Ptr{$elty}, Ref{BlasInt}),
+                 Ptr{$elty}, Ref{BlasInt}, Clong, Clong),
                  uplo, trans, n, k,
                  alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)), beta,
-                 C, max(1,stride(C,2)))
+                 C, max(1,stride(C,2)), 1, 1)
             C
         end
     end
@@ -1816,43 +1860,45 @@ or `transpose(A)*B + transpose(B)*A`, according to [`trans`](@ref stdlib-blas-tr
 syr2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat, B::AbstractVecOrMat) = syr2k(uplo, trans, one(eltype(A)), A, B)
 
 for (fname, elty1, elty2) in ((:zher2k_,:ComplexF64,:Float64), (:cher2k_,:ComplexF32,:Float32))
-   @eval begin
-       # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
-       #
-       #       .. Scalar Arguments ..
-       #       COMPLEX ALPHA
-       #       REAL BETA
-       #       INTEGER K,LDA,LDB,LDC,N
-       #       CHARACTER TRANS,UPLO
-       #       ..
-       #       .. Array Arguments ..
-       #       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
-       function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1),
-                       A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1},
-                       beta::($elty2), C::AbstractMatrix{$elty1})
-           require_one_based_indexing(A, B, C)
-           n = checksquare(C)
-           nn = size(A, trans == 'N' ? 1 : 2)
-           if nn != n throw(DimensionMismatch("C has size ($n,$n), corresponding dimension of A is $nn")) end
-           chkstride1(A)
-           chkstride1(B)
-           chkstride1(C)
-           k  = size(A, trans == 'N' ? 2 : 1)
-           ccall((@blasfunc($fname), libblas), Cvoid,
-                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                  Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt},
-                  Ref{$elty2},  Ptr{$elty1}, Ref{BlasInt}),
-                 uplo, trans, n, k,
-                 alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
-                 beta, C, max(1,stride(C,2)))
-           C
-       end
-       function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1})
-           n = size(A, trans == 'N' ? 1 : 2)
-           her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n)))
-       end
-       her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) = her2k(uplo, trans, one($elty1), A, B)
-   end
+    @eval begin
+        # SUBROUTINE CHER2K(UPLO,TRANS,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
+        #
+        #       .. Scalar Arguments ..
+        #       COMPLEX ALPHA
+        #       REAL BETA
+        #       INTEGER K,LDA,LDB,LDC,N
+        #       CHARACTER TRANS,UPLO
+        #       ..
+        #       .. Array Arguments ..
+        #       COMPLEX A(LDA,*),B(LDB,*),C(LDC,*)
+        function her2k!(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1),
+                        A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1},
+                        beta::($elty2), C::AbstractMatrix{$elty1})
+            chkuplo(uplo)
+            require_one_based_indexing(A, B, C)
+            n = checksquare(C)
+            nn = size(A, trans == 'N' ? 1 : 2)
+            if nn != n throw(DimensionMismatch(lazy"C has size ($n,$n), corresponding dimension of A is $nn")) end
+            chkstride1(A)
+            chkstride1(B)
+            chkstride1(C)
+            k  = size(A, trans == 'N' ? 2 : 1)
+            ccall((@blasfunc($fname), libblastrampoline), Cvoid,
+                    (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
+                    Ref{$elty1}, Ptr{$elty1}, Ref{BlasInt}, Ptr{$elty1}, Ref{BlasInt},
+                    Ref{$elty2},  Ptr{$elty1}, Ref{BlasInt}, Clong, Clong),
+                    uplo, trans, n, k,
+                    alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
+                    beta, C, max(1,stride(C,2)), 1, 1)
+            C
+        end
+        function her2k(uplo::AbstractChar, trans::AbstractChar, alpha::($elty1), A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1})
+            n = size(A, trans == 'N' ? 1 : 2)
+            her2k!(uplo, trans, alpha, A, B, zero($elty2), similar(A, $elty1, (n,n)))
+        end
+        her2k(uplo::AbstractChar, trans::AbstractChar, A::AbstractVecOrMat{$elty1}, B::AbstractVecOrMat{$elty1}) =
+            her2k(uplo, trans, one($elty1), A, B)
+    end
 end
 
 """
@@ -1944,19 +1990,22 @@ for (mmname, smname, elty) in
         #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
         function trmm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::Number,
                        A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B)
             m, n = size(B)
             nA = checksquare(A)
             if nA != (side == 'L' ? m : n)
-                throw(DimensionMismatch("size of A, $(size(A)), doesn't match $side size of B with dims, $(size(B))"))
+                throw(DimensionMismatch(lazy"size of A, $(size(A)), doesn't match $side size of B with dims, $(size(B))"))
             end
             chkstride1(A)
             chkstride1(B)
-            ccall((@blasfunc($mmname), libblas), Cvoid,
+            ccall((@blasfunc($mmname), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
+                   Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                   Clong, Clong, Clong, Clong),
                   side, uplo, transa, diag, m, n,
-                  alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)))
+                  alpha, A, max(1,stride(A,2)), B, max(1,stride(B,2)),
+                  1, 1, 1, 1)
             B
         end
         function trmm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
@@ -1972,21 +2021,24 @@ for (mmname, smname, elty) in
         #       DOUBLE PRECISION A(LDA,*),B(LDB,*)
         function trsm!(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar,
                        alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
+            chkuplo(uplo)
             require_one_based_indexing(A, B)
             m, n = size(B)
             k = checksquare(A)
             if k != (side == 'L' ? m : n)
-                throw(DimensionMismatch("size of A is ($k,$k), size of B is ($m,$n), side is $side, and transa='$transa'"))
+                throw(DimensionMismatch(lazy"size of A is ($k,$k), size of B is ($m,$n), side is $side, and transa='$transa'"))
             end
             chkstride1(A)
             chkstride1(B)
-            ccall((@blasfunc($smname), libblas), Cvoid,
-                (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
-                 Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}),
-                 side, uplo, transa, diag,
-                 m, n, alpha, A,
-                 max(1,stride(A,2)), B, max(1,stride(B,2)))
+            ccall((@blasfunc($smname), libblastrampoline), Cvoid,
+                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
+                    Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty},
+                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
+                    Clong, Clong, Clong, Clong),
+                   side, uplo, transa, diag,
+                   m, n, alpha, A,
+                   max(1,stride(A,2)), B, max(1,stride(B,2)),
+                   1, 1, 1, 1)
             B
         end
         function trsm(side::AbstractChar, uplo::AbstractChar, transa::AbstractChar, diag::AbstractChar, alpha::$elty, A::AbstractMatrix{$elty}, B::AbstractMatrix{$elty})
@@ -1997,22 +2049,22 @@ end
 
 end # module
 
-function copyto!(dest::Array{T}, rdest::Union{UnitRange{Ti},AbstractRange{Ti}},
-                 src::Array{T}, rsrc::Union{UnitRange{Ti},AbstractRange{Ti}}) where {T<:BlasFloat,Ti<:Integer}
+function copyto!(dest::Array{T}, rdest::AbstractRange{Ti},
+                 src::Array{T}, rsrc::AbstractRange{Ti}) where {T<:BlasFloat,Ti<:Integer}
     if minimum(rdest) < 1 || maximum(rdest) > length(dest)
-        throw(ArgumentError("range out of bounds for dest, of length $(length(dest))"))
+        throw(ArgumentError(lazy"range out of bounds for dest, of length $(length(dest))"))
     end
     if minimum(rsrc) < 1 || maximum(rsrc) > length(src)
-        throw(ArgumentError("range out of bounds for src, of length $(length(src))"))
+        throw(ArgumentError(lazy"range out of bounds for src, of length $(length(src))"))
     end
     if length(rdest) != length(rsrc)
-        throw(DimensionMismatch("ranges must be of the same length"))
+        throw(DimensionMismatch(lazy"ranges must be of the same length"))
     end
     GC.@preserve src dest BLAS.blascopy!(
         length(rsrc),
-        pointer(src) + (first(rsrc) - 1) * sizeof(T),
+        pointer(src, minimum(rsrc)),
         step(rsrc),
-        pointer(dest) + (first(rdest) - 1) * sizeof(T),
+        pointer(dest, minimum(rdest)),
         step(rdest))
 
     return dest
diff --git a/stdlib/LinearAlgebra/src/bunchkaufman.jl b/stdlib/LinearAlgebra/src/bunchkaufman.jl
index ead62ab0bc5289..7961f97e582994 100644
--- a/stdlib/LinearAlgebra/src/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/src/bunchkaufman.jl
@@ -28,7 +28,7 @@ julia> A = [1 2; 2 3]
  2  3
 
 julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
-BunchKaufman{Float64, Matrix{Float64}}
+BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
 D factor:
 2×2 Tridiagonal{Float64, Vector{Float64}}:
  -0.333333  0.0
@@ -48,7 +48,7 @@ julia> d == S.D && u == S.U && p == S.p
 true
 
 julia> S = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}}
+BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
 D factor:
 2×2 Tridiagonal{Float64, Vector{Float64}}:
  3.0   0.0
@@ -63,22 +63,25 @@ permutation:
  1
 ```
 """
-struct BunchKaufman{T,S<:AbstractMatrix} <: Factorization{T}
+struct BunchKaufman{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
     LD::S
-    ipiv::Vector{BlasInt}
+    ipiv::P
     uplo::Char
     symmetric::Bool
     rook::Bool
     info::BlasInt
 
-    function BunchKaufman{T,S}(LD, ipiv, uplo, symmetric, rook, info) where {T,S<:AbstractMatrix}
+    function BunchKaufman{T,S,P}(LD, ipiv, uplo, symmetric, rook, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
         require_one_based_indexing(LD)
-        new(LD, ipiv, uplo, symmetric, rook, info)
+        new{T,S,P}(LD, ipiv, uplo, symmetric, rook, info)
     end
 end
-BunchKaufman(A::AbstractMatrix{T}, ipiv::Vector{BlasInt}, uplo::AbstractChar, symmetric::Bool,
-             rook::Bool, info::BlasInt) where {T} =
-        BunchKaufman{T,typeof(A)}(A, ipiv, uplo, symmetric, rook, info)
+BunchKaufman(A::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, uplo::AbstractChar,
+             symmetric::Bool, rook::Bool, info::BlasInt) where {T} =
+        BunchKaufman{T,typeof(A),typeof(ipiv)}(A, ipiv, uplo, symmetric, rook, info)
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(BunchKaufman(LD, ipiv, uplo, symmetric, rook, info) where {T,S},
+           BunchKaufman{T,S,typeof(ipiv)}(LD, ipiv, uplo, symmetric, rook, info))
 
 # iteration for destructuring into components
 Base.iterate(S::BunchKaufman) = (S.D, Val(:UL))
@@ -148,7 +151,7 @@ julia> A = [1 2; 2 3]
  2  3
 
 julia> S = bunchkaufman(A) # A gets wrapped internally by Symmetric(A)
-BunchKaufman{Float64, Matrix{Float64}}
+BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
 D factor:
 2×2 Tridiagonal{Float64, Vector{Float64}}:
  -0.333333  0.0
@@ -167,8 +170,13 @@ julia> d, u, p = S; # destructuring via iteration
 julia> d == S.D && u == S.U && p == S.p
 true
 
+julia> S.U*S.D*S.U' - S.P*A*S.P'
+2×2 Matrix{Float64}:
+ 0.0  0.0
+ 0.0  0.0
+
 julia> S = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}}
+BunchKaufman{Float64, Matrix{Float64}, Vector{Int64}}
 D factor:
 2×2 Tridiagonal{Float64, Vector{Float64}}:
  3.0   0.0
@@ -181,21 +189,24 @@ permutation:
 2-element Vector{Int64}:
  2
  1
+
+julia> S.L*S.D*S.L' - A[S.p, S.p]
+2×2 Matrix{Float64}:
+ 0.0  0.0
+ 0.0  0.0
 ```
 """
 bunchkaufman(A::AbstractMatrix{T}, rook::Bool=false; check::Bool = true) where {T} =
-    bunchkaufman!(copy_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
+    bunchkaufman!(copymutable_oftype(A, typeof(sqrt(oneunit(T)))), rook; check = check)
 
-convert(::Type{BunchKaufman{T}}, B::BunchKaufman{T}) where {T} = B
-convert(::Type{BunchKaufman{T}}, B::BunchKaufman) where {T} =
+BunchKaufman{T}(B::BunchKaufman) where {T} =
     BunchKaufman(convert(Matrix{T}, B.LD), B.ipiv, B.uplo, B.symmetric, B.rook, B.info)
-convert(::Type{Factorization{T}}, B::BunchKaufman{T}) where {T} = B
-convert(::Type{Factorization{T}}, B::BunchKaufman) where {T} = convert(BunchKaufman{T}, B)
+Factorization{T}(B::BunchKaufman) where {T} = BunchKaufman{T}(B)
 
 size(B::BunchKaufman) = size(getfield(B, :LD))
 size(B::BunchKaufman, d::Integer) = size(getfield(B, :LD), d)
 issymmetric(B::BunchKaufman) = B.symmetric
-ishermitian(B::BunchKaufman) = !B.symmetric
+ishermitian(B::BunchKaufman{T}) where T = T<:Real || !B.symmetric
 
 function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar, rook::Bool) where T
     require_one_based_indexing(v)
@@ -226,62 +237,6 @@ function _ipiv2perm_bk(v::AbstractVector{T}, maxi::Integer, uplo::AbstractChar,
     return p
 end
 
-"""
-    getproperty(B::BunchKaufman, d::Symbol)
-
-Extract the factors of the Bunch-Kaufman factorization `B`. The factorization can take the
-two forms `P'*L*D*L'*P` or `P'*U*D*U'*P` (or `L*D*transpose(L)` in the complex symmetric case)
-where `P` is a (symmetric) permutation matrix, `L` is a [`UnitLowerTriangular`](@ref) matrix, `U` is a
-[`UnitUpperTriangular`](@ref), and `D` is a block diagonal symmetric or Hermitian matrix with
-1x1 or 2x2 blocks. The argument `d` can be
-
-- `:D`: the block diagonal matrix
-- `:U`: the upper triangular factor (if factorization is `U*D*U'`)
-- `:L`: the lower triangular factor (if factorization is `L*D*L'`)
-- `:p`: permutation vector
-- `:P`: permutation matrix
-
-# Examples
-```jldoctest
-julia> A = [1 2 3; 2 1 2; 3 2 1]
-3×3 Matrix{Int64}:
- 1  2  3
- 2  1  2
- 3  2  1
-
-julia> F = bunchkaufman(Symmetric(A, :L))
-BunchKaufman{Float64, Matrix{Float64}}
-D factor:
-3×3 Tridiagonal{Float64, Vector{Float64}}:
- 1.0  3.0    ⋅
- 3.0  1.0   0.0
-  ⋅   0.0  -1.0
-L factor:
-3×3 UnitLowerTriangular{Float64, Matrix{Float64}}:
- 1.0   ⋅    ⋅
- 0.0  1.0   ⋅
- 0.5  0.5  1.0
-permutation:
-3-element Vector{Int64}:
- 1
- 3
- 2
-
-julia> F.L*F.D*F.L' - A[F.p, F.p]
-3×3 Matrix{Float64}:
- 0.0  0.0  0.0
- 0.0  0.0  0.0
- 0.0  0.0  0.0
-
-julia> F = bunchkaufman(Symmetric(A));
-
-julia> F.U*F.D*F.U' - F.P*A*F.P'
-3×3 Matrix{Float64}:
- 0.0  0.0  0.0
- 0.0  0.0  0.0
- 0.0  0.0  0.0
-```
-"""
 function getproperty(B::BunchKaufman{T}, d::Symbol) where {T<:BlasFloat}
     n = size(B, 1)
     if d === :p
@@ -325,6 +280,14 @@ Base.propertynames(B::BunchKaufman, private::Bool=false) =
 
 issuccess(B::BunchKaufman) = B.info == 0
 
+function adjoint(B::BunchKaufman)
+    if ishermitian(B)
+        return B
+    else
+        throw(ArgumentError("adjoint not implemented for complex symmetric matrices"))
+    end
+end
+
 function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, B::BunchKaufman)
     if issuccess(B)
         summary(io, B); println(io)
diff --git a/stdlib/LinearAlgebra/src/cholesky.jl b/stdlib/LinearAlgebra/src/cholesky.jl
index 10aabb35928b84..d11630fcb6a5fa 100644
--- a/stdlib/LinearAlgebra/src/cholesky.jl
+++ b/stdlib/LinearAlgebra/src/cholesky.jl
@@ -10,10 +10,10 @@
 # In the methods below, LAPACK is called when possible, i.e. StridedMatrices with Float32,
 # Float64, ComplexF32, and ComplexF64 element types. For other element or
 # matrix types, the unblocked Julia implementation in _chol! is used. For cholesky
-# and cholesky! pivoting is supported through a Val(Bool) argument. A type argument is
+# and cholesky! pivoting is supported through a RowMaximum() argument. A type argument is
 # necessary for type stability since the output of cholesky and cholesky! is either
 # Cholesky or CholeskyPivoted. The latter is only
-# supported for the four LAPACK element types. For other types, e.g. BigFloats Val(true) will
+# supported for the four LAPACK element types. For other types, e.g. BigFloats RowMaximum() will
 # give an error. It is required that the input is Hermitian (including real symmetric) either
 # through the Hermitian and Symmetric views or exact symmetric or Hermitian elements which
 # is checked for and an error is thrown if the check fails.
@@ -35,7 +35,10 @@ positive definite matrix `A`. This is the return type of [`cholesky`](@ref),
 the corresponding matrix factorization function.
 
 The triangular Cholesky factor can be obtained from the factorization `F::Cholesky`
-via `F.L` and `F.U`.
+via `F.L` and `F.U`, where `A ≈ F.U' * F.U ≈ F.L * F.L'`.
+
+The following functions are available for `Cholesky` objects: [`size`](@ref), [`\\`](@ref),
+[`inv`](@ref), [`det`](@ref), [`logdet`](@ref) and [`isposdef`](@ref).
 
 Iterating the decomposition produces the components `L` and `U`.
 
@@ -90,7 +93,8 @@ Cholesky(A::AbstractMatrix{T}, uplo::Symbol, info::Integer) where {T} =
     Cholesky{T,typeof(A)}(A, char_uplo(uplo), info)
 Cholesky(A::AbstractMatrix{T}, uplo::AbstractChar, info::Integer) where {T} =
     Cholesky{T,typeof(A)}(A, uplo, info)
-
+Cholesky(U::UpperTriangular{T}) where {T} = Cholesky{T,typeof(U.data)}(U.data, 'U', 0)
+Cholesky(L::LowerTriangular{T}) where {T} = Cholesky{T,typeof(L.data)}(L.data, 'L', 0)
 
 # iteration for destructuring into components
 Base.iterate(C::Cholesky) = (C.L, Val(:U))
@@ -102,58 +106,69 @@ Base.iterate(C::Cholesky, ::Val{:done}) = nothing
     CholeskyPivoted
 
 Matrix factorization type of the pivoted Cholesky factorization of a dense symmetric/Hermitian
-positive semi-definite matrix `A`. This is the return type of [`cholesky(_, Val(true))`](@ref),
+positive semi-definite matrix `A`. This is the return type of [`cholesky(_, ::RowMaximum)`](@ref),
 the corresponding matrix factorization function.
 
 The triangular Cholesky factor can be obtained from the factorization `F::CholeskyPivoted`
-via `F.L` and `F.U`.
+via `F.L` and `F.U`, and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'`
+with `Ur = F.U[1:F.rank, :]` and `Lr = F.L[:, 1:F.rank]`, or alternatively
+`A ≈ Up' * Up ≈ Lp * Lp'` with `Up = F.U[1:F.rank, invperm(F.p)]` and
+`Lp = F.L[invperm(F.p), 1:F.rank]`.
+
+The following functions are available for `CholeskyPivoted` objects:
+[`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
 
 Iterating the decomposition produces the components `L` and `U`.
 
 # Examples
 ```jldoctest
-julia> A = [4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
-3×3 Matrix{Float64}:
-   4.0   12.0  -16.0
-  12.0   37.0  -43.0
- -16.0  -43.0   98.0
-
-julia> C = cholesky(A, Val(true))
-CholeskyPivoted{Float64, Matrix{Float64}}
-U factor with rank 3:
-3×3 UpperTriangular{Float64, Matrix{Float64}}:
- 9.89949  -4.34366  -1.61624
-  ⋅        4.25825   1.1694
-  ⋅         ⋅        0.142334
+julia> X = [1.0, 2.0, 3.0, 4.0];
+
+julia> A = X * X';
+
+julia> C = cholesky(A, RowMaximum(), check = false)
+CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}
+U factor with rank 1:
+4×4 UpperTriangular{Float64, Matrix{Float64}}:
+ 4.0  2.0  3.0  1.0
+  ⋅   0.0  6.0  2.0
+  ⋅    ⋅   9.0  3.0
+  ⋅    ⋅    ⋅   1.0
 permutation:
-3-element Vector{Int64}:
- 3
+4-element Vector{Int64}:
+ 4
  2
+ 3
  1
 
+julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
+true
+
 julia> l, u = C; # destructuring via iteration
 
 julia> l == C.L && u == C.U
 true
 ```
 """
-struct CholeskyPivoted{T,S<:AbstractMatrix} <: Factorization{T}
+struct CholeskyPivoted{T,S<:AbstractMatrix,P<:AbstractVector{<:Integer}} <: Factorization{T}
     factors::S
     uplo::Char
-    piv::Vector{BlasInt}
+    piv::P
     rank::BlasInt
     tol::Real
     info::BlasInt
 
-    function CholeskyPivoted{T,S}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix}
+    function CholeskyPivoted{T,S,P}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix,P<:AbstractVector}
         require_one_based_indexing(factors)
-        new(factors, uplo, piv, rank, tol, info)
+        new{T,S,P}(factors, uplo, piv, rank, tol, info)
     end
 end
-function CholeskyPivoted(A::AbstractMatrix{T}, uplo::AbstractChar, piv::Vector{<:Integer},
-                            rank::Integer, tol::Real, info::Integer) where T
-    CholeskyPivoted{T,typeof(A)}(A, uplo, piv, rank, tol, info)
-end
+CholeskyPivoted(A::AbstractMatrix{T}, uplo::AbstractChar, piv::AbstractVector{<:Integer},
+                rank::Integer, tol::Real, info::Integer) where T =
+    CholeskyPivoted{T,typeof(A),typeof(piv)}(A, uplo, piv, rank, tol, info)
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(CholeskyPivoted{T,S}(factors, uplo, piv, rank, tol, info) where {T,S<:AbstractMatrix},
+           CholeskyPivoted{T,S,typeof(piv)}(factors, uplo, piv, rank, tol, info))
 
 
 # iteration for destructuring into components
@@ -164,7 +179,9 @@ Base.iterate(C::CholeskyPivoted, ::Val{:done}) = nothing
 
 # make a copy that allow inplace Cholesky factorization
 @inline choltype(A) = promote_type(typeof(sqrt(oneunit(eltype(A)))), Float32)
-@inline cholcopy(A) = copy_oftype(A, choltype(A))
+@inline cholcopy(A::StridedMatrix) = copymutable_oftype(A, choltype(A))
+@inline cholcopy(A::RealHermSymComplexHerm) = copymutable_oftype(A, choltype(A))
+@inline cholcopy(A::AbstractMatrix) = copy_similar(A, choltype(A))
 
 # _chol!. Internal methods for calling unpivoted Cholesky
 ## BLAS/LAPACK element types
@@ -248,15 +265,15 @@ end
 # cholesky!. Destructive methods for computing Cholesky factorization of real symmetric
 # or Hermitian matrix
 ## No pivoting (default)
-function cholesky!(A::RealHermSymComplexHerm, ::Val{false}=Val(false); check::Bool = true)
+function cholesky!(A::RealHermSymComplexHerm, ::NoPivot = NoPivot(); check::Bool = true)
     C, info = _chol!(A.data, A.uplo == 'U' ? UpperTriangular : LowerTriangular)
     check && checkpositivedefinite(info)
     return Cholesky(C.data, A.uplo, info)
 end
 
-### for StridedMatrices, check that matrix is symmetric/Hermitian
+### for AbstractMatrix, check that matrix is symmetric/Hermitian
 """
-    cholesky!(A::StridedMatrix, Val(false); check = true) -> Cholesky
+    cholesky!(A::AbstractMatrix, NoPivot(); check = true) -> Cholesky
 
 The same as [`cholesky`](@ref), but saves space by overwriting the input `A`,
 instead of creating a copy. An [`InexactError`](@ref) exception is thrown if
@@ -276,41 +293,45 @@ Stacktrace:
 [...]
 ```
 """
-function cholesky!(A::StridedMatrix, ::Val{false}=Val(false); check::Bool = true)
+function cholesky!(A::AbstractMatrix, ::NoPivot = NoPivot(); check::Bool = true)
     checksquare(A)
     if !ishermitian(A) # return with info = -1 if not Hermitian
         check && checkpositivedefinite(-1)
         return Cholesky(A, 'U', convert(BlasInt, -1))
     else
-        return cholesky!(Hermitian(A), Val(false); check = check)
+        return cholesky!(Hermitian(A), NoPivot(); check = check)
     end
 end
+@deprecate cholesky!(A::StridedMatrix, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
+@deprecate cholesky!(A::RealHermSymComplexHerm, ::Val{false}; check::Bool = true) cholesky!(A, NoPivot(); check) false
 
 ## With pivoting
 ### BLAS/LAPACK element types
 function cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix},
-                   ::Val{true}; tol = 0.0, check::Bool = true)
+                   ::RowMaximum; tol = 0.0, check::Bool = true)
     AA, piv, rank, info = LAPACK.pstrf!(A.uplo, A.data, tol)
-    C = CholeskyPivoted{eltype(AA),typeof(AA)}(AA, A.uplo, piv, rank, tol, info)
+    C = CholeskyPivoted{eltype(AA),typeof(AA),typeof(piv)}(AA, A.uplo, piv, rank, tol, info)
     check && chkfullrank(C)
     return C
 end
+@deprecate cholesky!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
 
 ### Non BLAS/LAPACK element types (generic). Since generic fallback for pivoted Cholesky
 ### is not implemented yet we throw an error
-cholesky!(A::RealHermSymComplexHerm{<:Real}, ::Val{true}; tol = 0.0, check::Bool = true) =
+cholesky!(A::RealHermSymComplexHerm{<:Real}, ::RowMaximum; tol = 0.0, check::Bool = true) =
     throw(ArgumentError("generic pivoted Cholesky factorization is not implemented yet"))
+@deprecate cholesky!(A::RealHermSymComplexHerm{<:Real}, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
 
-### for StridedMatrices, check that matrix is symmetric/Hermitian
+### for AbstractMatrix, check that matrix is symmetric/Hermitian
 """
-    cholesky!(A::StridedMatrix, Val(true); tol = 0.0, check = true) -> CholeskyPivoted
+    cholesky!(A::AbstractMatrix, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
 
 The same as [`cholesky`](@ref), but saves space by overwriting the input `A`,
 instead of creating a copy. An [`InexactError`](@ref) exception is thrown if the
 factorization produces a number not representable by the element type of `A`,
 e.g. for integer types.
 """
-function cholesky!(A::StridedMatrix, ::Val{true}; tol = 0.0, check::Bool = true)
+function cholesky!(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true)
     checksquare(A)
     if !ishermitian(A)
         C = CholeskyPivoted(A, 'U', Vector{BlasInt}(),convert(BlasInt, 1),
@@ -318,20 +339,24 @@ function cholesky!(A::StridedMatrix, ::Val{true}; tol = 0.0, check::Bool = true)
         check && chkfullrank(C)
         return C
     else
-        return cholesky!(Hermitian(A), Val(true); tol = tol, check = check)
+        return cholesky!(Hermitian(A), RowMaximum(); tol = tol, check = check)
     end
 end
+@deprecate cholesky!(A::StridedMatrix, ::Val{true}; kwargs...) cholesky!(A, RowMaximum(); kwargs...) false
 
 # cholesky. Non-destructive methods for computing Cholesky factorization of real symmetric
 # or Hermitian matrix
 ## No pivoting (default)
 """
-    cholesky(A, Val(false); check = true) -> Cholesky
+    cholesky(A, NoPivot(); check = true) -> Cholesky
 
 Compute the Cholesky factorization of a dense symmetric positive definite matrix `A`
 and return a [`Cholesky`](@ref) factorization. The matrix `A` can either be a [`Symmetric`](@ref) or [`Hermitian`](@ref)
-[`StridedMatrix`](@ref) or a *perfectly* symmetric or Hermitian `StridedMatrix`.
-The triangular Cholesky factor can be obtained from the factorization `F` with: `F.L` and `F.U`.
+[`AbstractMatrix`](@ref) or a *perfectly* symmetric or Hermitian `AbstractMatrix`.
+
+The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
+where `A ≈ F.U' * F.U ≈ F.L * F.L'`.
+
 The following functions are available for `Cholesky` objects: [`size`](@ref), [`\\`](@ref),
 [`inv`](@ref), [`det`](@ref), [`logdet`](@ref) and [`isposdef`](@ref).
 
@@ -374,20 +399,32 @@ julia> C.L * C.U == A
 true
 ```
 """
-cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}},
-    ::Val{false}=Val(false); check::Bool = true) = cholesky!(cholcopy(A); check = check)
+cholesky(A::AbstractMatrix, ::NoPivot=NoPivot(); check::Bool = true) =
+    cholesky!(cholcopy(A); check)
+@deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
 
+function cholesky(A::AbstractMatrix{Float16}, ::NoPivot=NoPivot(); check::Bool = true)
+    X = cholesky!(cholcopy(A); check = check)
+    return Cholesky{Float16}(X)
+end
+@deprecate cholesky(A::Union{StridedMatrix{Float16},RealHermSymComplexHerm{Float16,<:StridedMatrix}}, ::Val{false}; check::Bool = true) cholesky(A, NoPivot(); check) false
 
 ## With pivoting
 """
-    cholesky(A, Val(true); tol = 0.0, check = true) -> CholeskyPivoted
+    cholesky(A, RowMaximum(); tol = 0.0, check = true) -> CholeskyPivoted
 
 Compute the pivoted Cholesky factorization of a dense symmetric positive semi-definite matrix `A`
 and return a [`CholeskyPivoted`](@ref) factorization. The matrix `A` can either be a [`Symmetric`](@ref)
-or [`Hermitian`](@ref) [`StridedMatrix`](@ref) or a *perfectly* symmetric or Hermitian `StridedMatrix`.
-The triangular Cholesky factor can be obtained from the factorization `F` with: `F.L` and `F.U`.
+or [`Hermitian`](@ref) [`AbstractMatrix`](@ref) or a *perfectly* symmetric or Hermitian `AbstractMatrix`.
+
+The triangular Cholesky factor can be obtained from the factorization `F` via `F.L` and `F.U`,
+and the permutation via `F.p`, where `A[F.p, F.p] ≈ Ur' * Ur ≈ Lr * Lr'` with `Ur = F.U[1:F.rank, :]`
+and `Lr = F.L[:, 1:F.rank]`, or alternatively `A ≈ Up' * Up ≈ Lp * Lp'` with
+`Up = F.U[1:F.rank, invperm(F.p)]` and `Lp = F.L[invperm(F.p), 1:F.rank]`.
+
 The following functions are available for `CholeskyPivoted` objects:
 [`size`](@ref), [`\\`](@ref), [`inv`](@ref), [`det`](@ref), and [`rank`](@ref).
+
 The argument `tol` determines the tolerance for determining the rank.
 For negative values, the tolerance is the machine precision.
 
@@ -397,10 +434,45 @@ wrap it in `Hermitian(A)` before passing it to `cholesky` in order to treat it a
 When `check = true`, an error is thrown if the decomposition fails.
 When `check = false`, responsibility for checking the decomposition's
 validity (via [`issuccess`](@ref)) lies with the user.
+
+# Examples
+```jldoctest
+julia> X = [1.0, 2.0, 3.0, 4.0];
+
+julia> A = X * X';
+
+julia> C = cholesky(A, RowMaximum(), check = false)
+CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}
+U factor with rank 1:
+4×4 UpperTriangular{Float64, Matrix{Float64}}:
+ 4.0  2.0  3.0  1.0
+  ⋅   0.0  6.0  2.0
+  ⋅    ⋅   9.0  3.0
+  ⋅    ⋅    ⋅   1.0
+permutation:
+4-element Vector{Int64}:
+ 4
+ 2
+ 3
+ 1
+
+julia> C.U[1:C.rank, :]' * C.U[1:C.rank, :] ≈ A[C.p, C.p]
+true
+
+julia> l, u = C; # destructuring via iteration
+
+julia> l == C.L && u == C.U
+true
+```
 """
-cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}},
-    ::Val{true}; tol = 0.0, check::Bool = true) =
-    cholesky!(cholcopy(A), Val(true); tol = tol, check = check)
+cholesky(A::AbstractMatrix, ::RowMaximum; tol = 0.0, check::Bool = true) =
+    cholesky!(cholcopy(A), RowMaximum(); tol, check)
+@deprecate cholesky(A::Union{StridedMatrix,RealHermSymComplexHerm{<:Real,<:StridedMatrix}}, ::Val{true}; tol = 0.0, check::Bool = true) cholesky(A, RowMaximum(); tol, check) false
+
+function cholesky(A::AbstractMatrix{Float16}, ::RowMaximum; tol = 0.0, check::Bool = true)
+    X = cholesky!(cholcopy(A), RowMaximum(); tol, check)
+    return CholeskyPivoted{Float16}(X)
+end
 
 ## Number
 function cholesky(x::Number, uplo::Symbol=:U)
@@ -458,7 +530,7 @@ end
 Base.propertynames(F::Cholesky, private::Bool=false) =
     (:U, :L, :UL, (private ? fieldnames(typeof(F)) : ())...)
 
-function getproperty(C::CholeskyPivoted{T}, d::Symbol) where T<:BlasFloat
+function getproperty(C::CholeskyPivoted{T}, d::Symbol) where {T}
     Cfactors = getfield(C, :factors)
     Cuplo    = getfield(C, :uplo)
     if d === :U
@@ -483,6 +555,8 @@ Base.propertynames(F::CholeskyPivoted, private::Bool=false) =
 
 issuccess(C::Union{Cholesky,CholeskyPivoted}) = C.info == 0
 
+adjoint(C::Union{Cholesky,CholeskyPivoted}) = C
+
 function show(io::IO, mime::MIME{Symbol("text/plain")}, C::Cholesky{<:Any,<:AbstractMatrix})
     if issuccess(C)
         summary(io, C); println(io)
@@ -501,7 +575,7 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, C::CholeskyPivoted{<:Any
     show(io, mime, C.p)
 end
 
-ldiv!(C::Cholesky{T,<:AbstractMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+ldiv!(C::Cholesky{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.potrs!(C.uplo, C.factors, B)
 
 function ldiv!(C::Cholesky{<:Any,<:AbstractMatrix}, B::StridedVecOrMat)
@@ -527,7 +601,7 @@ function ldiv!(C::CholeskyPivoted{T}, B::StridedMatrix{T}) where T<:BlasFloat
     B
 end
 
-function ldiv!(C::CholeskyPivoted, B::StridedVector)
+function ldiv!(C::CholeskyPivoted, B::AbstractVector)
     if C.uplo == 'L'
         ldiv!(adjoint(LowerTriangular(C.factors)),
             ldiv!(LowerTriangular(C.factors), permute!(B, C.piv)))
@@ -538,7 +612,7 @@ function ldiv!(C::CholeskyPivoted, B::StridedVector)
     invpermute!(B, C.piv)
 end
 
-function ldiv!(C::CholeskyPivoted, B::StridedMatrix)
+function ldiv!(C::CholeskyPivoted, B::AbstractMatrix)
     n = size(C, 1)
     for i in 1:size(B, 2)
         permute!(view(B, 1:n, i), C.piv)
@@ -556,7 +630,7 @@ function ldiv!(C::CholeskyPivoted, B::StridedMatrix)
     B
 end
 
-function rdiv!(B::StridedMatrix, C::Cholesky{<:Any,<:AbstractMatrix})
+function rdiv!(B::AbstractMatrix, C::Cholesky{<:Any,<:AbstractMatrix})
     if C.uplo == 'L'
         return rdiv!(rdiv!(B, adjoint(LowerTriangular(C.factors))), LowerTriangular(C.factors))
     else
@@ -564,7 +638,7 @@ function rdiv!(B::StridedMatrix, C::Cholesky{<:Any,<:AbstractMatrix})
     end
 end
 
-function LinearAlgebra.rdiv!(B::StridedMatrix, C::CholeskyPivoted)
+function LinearAlgebra.rdiv!(B::AbstractMatrix, C::CholeskyPivoted)
     n = size(C, 2)
     for i in 1:size(B, 1)
         permute!(view(B, i, 1:n), C.piv)
@@ -624,6 +698,8 @@ function logdet(C::CholeskyPivoted)
     end
 end
 
+logabsdet(C::Union{Cholesky, CholeskyPivoted}) = logdet(C), one(eltype(C)) # since C is p.s.d.
+
 inv!(C::Cholesky{<:BlasFloat,<:StridedMatrix}) =
     copytri!(LAPACK.potri!(C.uplo, C.factors), C.uplo, true)
 
@@ -643,14 +719,14 @@ end
 rank(C::CholeskyPivoted) = C.rank
 
 """
-    lowrankupdate!(C::Cholesky, v::StridedVector) -> CC::Cholesky
+    lowrankupdate!(C::Cholesky, v::AbstractVector) -> CC::Cholesky
 
 Update a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U` then
 `CC = cholesky(C.U'C.U + v*v')` but the computation of `CC` only uses `O(n^2)`
 operations. The input factorization `C` is updated in place such that on exit `C == CC`.
 The vector `v` is destroyed during the computation.
 """
-function lowrankupdate!(C::Cholesky, v::StridedVector)
+function lowrankupdate!(C::Cholesky, v::AbstractVector)
     A = C.factors
     n = length(v)
     if size(C, 1) != n
@@ -689,14 +765,14 @@ function lowrankupdate!(C::Cholesky, v::StridedVector)
 end
 
 """
-    lowrankdowndate!(C::Cholesky, v::StridedVector) -> CC::Cholesky
+    lowrankdowndate!(C::Cholesky, v::AbstractVector) -> CC::Cholesky
 
 Downdate a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U` then
 `CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses `O(n^2)`
 operations. The input factorization `C` is updated in place such that on exit `C == CC`.
 The vector `v` is destroyed during the computation.
 """
-function lowrankdowndate!(C::Cholesky, v::StridedVector)
+function lowrankdowndate!(C::Cholesky, v::AbstractVector)
     A = C.factors
     n = length(v)
     if size(C, 1) != n
@@ -742,19 +818,19 @@ function lowrankdowndate!(C::Cholesky, v::StridedVector)
 end
 
 """
-    lowrankupdate(C::Cholesky, v::StridedVector) -> CC::Cholesky
+    lowrankupdate(C::Cholesky, v::AbstractVector) -> CC::Cholesky
 
 Update a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U`
 then `CC = cholesky(C.U'C.U + v*v')` but the computation of `CC` only uses
 `O(n^2)` operations.
 """
-lowrankupdate(C::Cholesky, v::StridedVector) = lowrankupdate!(copy(C), copy(v))
+lowrankupdate(C::Cholesky, v::AbstractVector) = lowrankupdate!(copy(C), copy(v))
 
 """
-    lowrankdowndate(C::Cholesky, v::StridedVector) -> CC::Cholesky
+    lowrankdowndate(C::Cholesky, v::AbstractVector) -> CC::Cholesky
 
 Downdate a Cholesky factorization `C` with the vector `v`. If `A = C.U'C.U`
 then `CC = cholesky(C.U'C.U - v*v')` but the computation of `CC` only uses
 `O(n^2)` operations.
 """
-lowrankdowndate(C::Cholesky, v::StridedVector) = lowrankdowndate!(copy(C), copy(v))
+lowrankdowndate(C::Cholesky, v::AbstractVector) = lowrankdowndate!(copy(C), copy(v))
diff --git a/stdlib/LinearAlgebra/src/dense.jl b/stdlib/LinearAlgebra/src/dense.jl
index aa215735d3daf9..d23dca5e6488e8 100644
--- a/stdlib/LinearAlgebra/src/dense.jl
+++ b/stdlib/LinearAlgebra/src/dense.jl
@@ -13,7 +13,7 @@ const NRM2_CUTOFF = 32
 # This constant should ideally be determined by the actual CPU cache size
 const ISONE_CUTOFF = 2^21 # 2M
 
-function isone(A::StridedMatrix)
+function isone(A::AbstractMatrix)
     m, n = size(A)
     m != n && return false # only square matrices can satisfy x == one(x)
     if sizeof(A) < ISONE_CUTOFF
@@ -23,7 +23,7 @@ function isone(A::StridedMatrix)
     end
 end
 
-@inline function _isone_triacheck(A::StridedMatrix, m::Int)
+@inline function _isone_triacheck(A::AbstractMatrix, m::Int)
     @inbounds for i in 1:m, j in i:m
         if i == j
             isone(A[i,i]) || return false
@@ -35,7 +35,7 @@ end
 end
 
 # Inner loop over rows to be friendly to the CPU cache
-@inline function _isone_cachefriendly(A::StridedMatrix, m::Int)
+@inline function _isone_cachefriendly(A::AbstractMatrix, m::Int)
     @inbounds for i in 1:m, j in 1:m
         if i == j
             isone(A[i,i]) || return false
@@ -75,7 +75,8 @@ isposdef!(A::AbstractMatrix) =
 
 Test whether a matrix is positive definite (and Hermitian) by trying to perform a
 Cholesky factorization of `A`.
-See also [`isposdef!`](@ref)
+
+See also [`isposdef!`](@ref), [`cholesky`](@ref).
 
 # Examples
 ```jldoctest
@@ -205,6 +206,8 @@ diagind(m::Integer, n::Integer, k::Integer=0) =
 
 An `AbstractRange` giving the indices of the `k`th diagonal of the matrix `M`.
 
+See also: [`diag`](@ref), [`diagm`](@ref), [`Diagonal`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 2 3; 4 5 6; 7 8 9]
@@ -227,7 +230,7 @@ end
 
 The `k`th diagonal of a matrix, as a vector.
 
-See also: [`diagm`](@ref)
+See also [`diagm`](@ref), [`diagind`](@ref), [`Diagonal`](@ref), [`isdiag`](@ref).
 
 # Examples
 ```jldoctest
@@ -254,6 +257,8 @@ Vector `kv.second` will be placed on the `kv.first` diagonal.
 By default the matrix is square and its size is inferred
 from `kv`, but a non-square size `m`×`n` (padded with zeros as needed)
 can be specified by passing `m,n` as the first arguments.
+For repeated diagonal indices `kv.first` the values in the corresponding
+vectors `kv.second` will be added.
 
 `diagm` constructs a full matrix; if you want storage-efficient
 versions with fast arithmetic, see [`Diagonal`](@ref), [`Bidiagonal`](@ref)
@@ -274,6 +279,13 @@ julia> diagm(1 => [1,2,3], -1 => [4,5])
  4  0  2  0
  0  5  0  3
  0  0  0  0
+
+julia> diagm(1 => [1,2,3], 1 => [1,2,3])
+4×4 Matrix{Int64}:
+ 0  2  0  0
+ 0  0  4  0
+ 0  0  0  6
+ 0  0  0  0
 ```
 """
 diagm(kv::Pair{<:Integer,<:AbstractVector}...) = _diagm(nothing, kv...)
@@ -347,6 +359,9 @@ overwriting the existing value of `C`.
 !!! tip
     Bounds checking can be disabled by [`@inbounds`](@ref), but you need to take care of the shape
     of `C`, `A`, `B` yourself.
+
+!!! compat "Julia 1.6"
+    This function requires Julia 1.6 or later.
 """
 @inline function kron!(C::AbstractMatrix, A::AbstractMatrix, B::AbstractMatrix)
     require_one_based_indexing(A, B)
@@ -445,7 +460,7 @@ function (^)(A::AbstractMatrix{T}, p::Integer) where T<:Integer
 end
 function integerpow(A::AbstractMatrix{T}, p) where T
     TT = promote_op(^, T, typeof(p))
-    return (TT == T ? A : copyto!(similar(A, TT), A))^Integer(p)
+    return (TT == T ? A : convert(AbstractMatrix{TT}, A))^Integer(p)
 end
 function schurpow(A::AbstractMatrix, p)
     if istriu(A)
@@ -459,7 +474,7 @@ function schurpow(A::AbstractMatrix, p)
             retmat = retmat * powm!(UpperTriangular(float.(A)), real(p - floor(p)))
         end
     else
-        S,Q,d = schur(complex(A))
+        S,Q,d = Schur{Complex}(schur(A))
         # Integer part
         R = S ^ floor(p)
         # Real part
@@ -485,7 +500,7 @@ function (^)(A::AbstractMatrix{T}, p::Real) where T
     # Quicker return if A is diagonal
     if isdiag(A)
         TT = promote_op(^, T, typeof(p))
-        retmat = copy_oftype(A, TT)
+        retmat = copymutable_oftype(A, TT)
         for i in 1:n
             retmat[i, i] = retmat[i, i] ^ p
         end
@@ -553,6 +568,31 @@ julia> exp(A)
 """
 exp(A::StridedMatrix{<:BlasFloat}) = exp!(copy(A))
 exp(A::StridedMatrix{<:Union{Integer,Complex{<:Integer}}}) = exp!(float.(A))
+exp(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(exp(parent(A)))
+exp(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(exp(parent(A)))
+
+"""
+    cis(A::AbstractMatrix)
+
+More efficient method for `exp(im*A)` of square matrix `A`
+(especially if `A` is `Hermitian` or real-`Symmetric`).
+
+See also [`cispi`](@ref), [`sincos`](@ref), [`exp`](@ref).
+
+!!! compat "Julia 1.7"
+    Support for using `cis` with matrices was added in Julia 1.7.
+
+# Examples
+```jldoctest
+julia> cis([π 0; 0 π]) ≈ -I
+true
+```
+"""
+cis(A::AbstractMatrix) = exp(im * A)  # fallback
+cis(A::AbstractMatrix{<:Base.HWNumber}) = exp_maybe_inplace(float.(im .* A))
+
+exp_maybe_inplace(A::StridedMatrix{<:Union{ComplexF32, ComplexF64}}) = exp!(A)
+exp_maybe_inplace(A) = exp(A)
 
 """
     ^(b::Number, A::AbstractMatrix)
@@ -589,7 +629,6 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
     end
     ilo, ihi, scale = LAPACK.gebal!('B', A)    # modifies A
     nA   = opnorm(A, 1)
-    Inn    = Matrix{T}(I, n, n)
     ## For sufficiently small nA, use lower order Padé-Approximations
     if (nA <= 2.1)
         if nA > 0.95
@@ -606,23 +645,29 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
             C = T[120.,60.,12.,1.]
         end
         A2 = A * A
-        P  = copy(Inn)
-        U  = C[2] * P
-        V  = C[1] * P
-        for k in 1:(div(size(C, 1), 2) - 1)
-            k2 = 2 * k
+        # Compute U and V: Even/odd terms in Padé numerator & denom
+        # Expansion of k=1 in for loop
+        P = A2
+        U = mul!(C[4]*P, true, C[2]*I, true, true) #U = C[2]*I + C[4]*P
+        V = mul!(C[3]*P, true, C[1]*I, true, true) #V = C[1]*I + C[3]*P
+        for k in 2:(div(length(C), 2) - 1)
             P *= A2
-            U += C[k2 + 2] * P
-            V += C[k2 + 1] * P
+            mul!(U, C[2k + 2], P, true, true) # U += C[2k+2]*P
+            mul!(V, C[2k + 1], P, true, true) # V += C[2k+1]*P
         end
+
         U = A * U
-        X = V + U
-        LAPACK.gesv!(V-U, X)
+
+        # Padé approximant:  (V-U)\(V+U)
+        tmp1, tmp2 = A, A2 # Reuse already allocated arrays
+        tmp1 .= V .- U
+        tmp2 .= V .+ U
+        X = LAPACK.gesv!(tmp1, tmp2)[1]
     else
         s  = log2(nA/5.4)               # power of 2 later reversed by squaring
         if s > 0
             si = ceil(Int,s)
-            A /= convert(T,2^si)
+            A ./= convert(T,2^si)
         end
         CC = T[64764752532480000.,32382376266240000.,7771770303897600.,
                 1187353796428800.,  129060195264000.,  10559470521600.,
@@ -632,16 +677,35 @@ function exp!(A::StridedMatrix{T}) where T<:BlasFloat
         A2 = A * A
         A4 = A2 * A2
         A6 = A2 * A4
-        U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
-                  CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2 .+ CC[2].*Inn)
-        V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
-                   CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2 .+ CC[1].*Inn
+        tmp1, tmp2 = similar(A6), similar(A6)
+
+        # Allocation economical version of:
+        # U  = A * (A6 * (CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2) .+
+        #           CC[8].*A6 .+ CC[6].*A4 .+ CC[4]*A2+CC[2]*I)
+        tmp1 .= CC[14].*A6 .+ CC[12].*A4 .+ CC[10].*A2
+        tmp2 .= CC[8].*A6 .+ CC[6].*A4 .+ CC[4].*A2
+        mul!(tmp2, true,CC[2]*I, true, true) # tmp2 .+= CC[2]*I
+        U = mul!(tmp2, A6, tmp1, true, true)
+        U, tmp1 = mul!(tmp1, A, U), A # U = A * U0
+
+        # Allocation economical version of:
+        # V  = A6 * (CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2) .+
+        #           CC[7].*A6 .+ CC[5].*A4 .+ CC[3]*A2 .+ CC[1]*I
+        tmp1 .= CC[13].*A6 .+ CC[11].*A4 .+ CC[9].*A2
+        tmp2 .= CC[7].*A6 .+ CC[5].*A4 .+ CC[3].*A2
+        mul!(tmp2, true, CC[1]*I, true, true) # tmp2 .+= CC[1]*I
+        V = mul!(tmp2, A6, tmp1, true, true)
+
+        tmp1 .= V .+ U
+        tmp2 .= V .- U # tmp2 aleady contained V but this seems more readable
+        X = LAPACK.gesv!(tmp2, tmp1)[1] # X now contains r_13 in Higham 2008
 
-        X = V + U
-        LAPACK.gesv!(V-U, X)
-
-        if s > 0            # squaring to reverse dividing by power of 2
-            for t=1:si; X *= X end
+        if s > 0
+            # Repeated squaring to compute X = r_13^(2^si)
+            for t=1:si
+                mul!(tmp2, X, X)
+                X, tmp2 = tmp2, X
+            end
         end
     end
 
@@ -676,7 +740,7 @@ function rcswap!(i::Integer, j::Integer, X::StridedMatrix{<:Number})
 end
 
 """
-    log(A{T}::StridedMatrix{T})
+    log(A::StridedMatrix)
 
 If `A` has no negative real eigenvalue, compute the principal matrix logarithm of `A`, i.e.
 the unique matrix ``X`` such that ``e^X = A`` and ``-\\pi < Im(\\lambda) < \\pi`` for all
@@ -685,9 +749,10 @@ matrix function is returned whenever possible.
 
 If `A` is symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
 used, if `A` is triangular an improved version of the inverse scaling and squaring method is
-employed (see [^AH12] and [^AHR13]). For general matrices, the complex Schur form
-([`schur`](@ref)) is computed and the triangular algorithm is used on the
-triangular factor.
+employed (see [^AH12] and [^AHR13]). If `A` is real with no negative eigenvalues, then
+the real Schur form is computed. Otherwise, the complex Schur form is computed. Then
+the upper (quasi-)triangular algorithm in [^AHR13] is used on the upper (quasi-)triangular
+factor.
 
 [^AH12]: Awad H. Al-Mohy and Nicholas J. Higham, "Improved inverse  scaling and squaring algorithms for the matrix logarithm", SIAM Journal on Scientific Computing, 34(4), 2012, C153-C169. [doi:10.1137/110852553](https://doi.org/10.1137/110852553)
 
@@ -710,30 +775,33 @@ function log(A::StridedMatrix)
     # If possible, use diagonalization
     if ishermitian(A)
         logHermA = log(Hermitian(A))
-        return isa(logHermA, Hermitian) ? copytri!(parent(logHermA), 'U', true) : parent(logHermA)
-    end
-
-    # Use Schur decomposition
-    n = checksquare(A)
-    if istriu(A)
-        return triu!(parent(log(UpperTriangular(complex(A)))))
-    else
-        if isreal(A)
-            SchurF = schur(real(A))
-        else
-            SchurF = schur(A)
-        end
-        if !istriu(SchurF.T)
-            SchurS = schur(complex(SchurF.T))
-            logT = SchurS.Z * log(UpperTriangular(SchurS.T)) * SchurS.Z'
-            return SchurF.Z * logT * SchurF.Z'
+        return ishermitian(logHermA) ? copytri!(parent(logHermA), 'U', true) : parent(logHermA)
+    elseif istriu(A)
+        return triu!(parent(log(UpperTriangular(A))))
+    elseif isreal(A)
+        SchurF = schur(real(A))
+        if istriu(SchurF.T)
+            logA = SchurF.Z * log(UpperTriangular(SchurF.T)) * SchurF.Z'
         else
-            R = log(UpperTriangular(complex(SchurF.T)))
-            return SchurF.Z * R * SchurF.Z'
+            # real log exists whenever all eigenvalues are positive
+            is_log_real = !any(x -> isreal(x) && real(x) ≤ 0, SchurF.values)
+            if is_log_real
+                logA = SchurF.Z * log_quasitriu(SchurF.T) * SchurF.Z'
+            else
+                SchurS = Schur{Complex}(SchurF)
+                logA = SchurS.Z * log(UpperTriangular(SchurS.T)) * SchurS.Z'
+            end
         end
+        return eltype(A) <: Complex ? complex(logA) : logA
+    else
+        SchurF = schur(A)
+        return SchurF.vectors * log(UpperTriangular(SchurF.T)) * SchurF.vectors'
     end
 end
 
+log(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(log(parent(A)))
+log(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(log(parent(A)))
+
 """
     sqrt(A::AbstractMatrix)
 
@@ -743,7 +811,7 @@ that is the unique matrix ``X`` with eigenvalues having positive real part such
 
 If `A` is real-symmetric or Hermitian, its eigendecomposition ([`eigen`](@ref)) is
 used to compute the square root.   For such matrices, eigenvalues λ that
-appear to be slightly negative due to roundoff errors are treated as if they were zero
+appear to be slightly negative due to roundoff errors are treated as if they were zero.
 More precisely, matrices with all eigenvalues `≥ -rtol*(max |λ|)` are treated as semidefinite
 (yielding a Hermitian square root), with negative eigenvalues taken to be zero.
 `rtol` is a keyword argument to `sqrt` (in the Hermitian/real-symmetric case only) that
@@ -752,6 +820,8 @@ defaults to machine precision scaled by `size(A,1)`.
 Otherwise, the square root is determined by means of the
 Björck-Hammarling method [^BH83], which computes the complex Schur form ([`schur`](@ref))
 and then the complex square root of the triangular factor.
+If a real square root exists, then an extension of this method [^H87] that computes the real
+Schur form and then the real square root of the quasi-triangular factor is instead used.
 
 [^BH83]:
 
@@ -759,6 +829,12 @@ and then the complex square root of the triangular factor.
     Linear Algebra and its Applications, 52-53, 1983, 127-140.
     [doi:10.1016/0024-3795(83)80010-X](https://doi.org/10.1016/0024-3795(83)80010-X)
 
+[^H87]:
+
+    Nicholas J. Higham, "Computing real square roots of a real matrix",
+    Linear Algebra and its Applications, 88-89, 1987, 405-430.
+    [doi:10.1016/0024-3795(87)90118-2](https://doi.org/10.1016/0024-3795(87)90118-2)
+
 # Examples
 ```jldoctest
 julia> A = [4 0; 0 4]
@@ -772,34 +848,39 @@ julia> sqrt(A)
  0.0  2.0
 ```
 """
-function sqrt(A::StridedMatrix{<:Real})
-    if issymmetric(A)
-        return copytri!(parent(sqrt(Symmetric(A))), 'U')
-    end
-    n = checksquare(A)
-    if istriu(A)
-        return triu!(parent(sqrt(UpperTriangular(A))))
-    else
-        SchurF = schur(complex(A))
-        R = triu!(parent(sqrt(UpperTriangular(SchurF.T)))) # unwrapping unnecessary?
-        return SchurF.vectors * R * SchurF.vectors'
-    end
-end
-function sqrt(A::StridedMatrix{<:Complex})
+sqrt(::StridedMatrix)
+
+function sqrt(A::StridedMatrix{T}) where {T<:Union{Real,Complex}}
     if ishermitian(A)
         sqrtHermA = sqrt(Hermitian(A))
-        return isa(sqrtHermA, Hermitian) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA)
-    end
-    n = checksquare(A)
-    if istriu(A)
+        return ishermitian(sqrtHermA) ? copytri!(parent(sqrtHermA), 'U', true) : parent(sqrtHermA)
+    elseif istriu(A)
         return triu!(parent(sqrt(UpperTriangular(A))))
+    elseif isreal(A)
+        SchurF = schur(real(A))
+        if istriu(SchurF.T)
+            sqrtA = SchurF.Z * sqrt(UpperTriangular(SchurF.T)) * SchurF.Z'
+        else
+            # real sqrt exists whenever no eigenvalues are negative
+            is_sqrt_real = !any(x -> isreal(x) && real(x) < 0, SchurF.values)
+            # sqrt_quasitriu uses LAPACK functions for non-triu inputs
+            if typeof(sqrt(zero(T))) <: BlasFloat && is_sqrt_real
+                sqrtA = SchurF.Z * sqrt_quasitriu(SchurF.T) * SchurF.Z'
+            else
+                SchurS = Schur{Complex}(SchurF)
+                sqrtA = SchurS.Z * sqrt(UpperTriangular(SchurS.T)) * SchurS.Z'
+            end
+        end
+        return eltype(A) <: Complex ? complex(sqrtA) : sqrtA
     else
         SchurF = schur(A)
-        R = triu!(parent(sqrt(UpperTriangular(SchurF.T)))) # unwrapping unnecessary?
-        return SchurF.vectors * R * SchurF.vectors'
+        return SchurF.vectors * sqrt(UpperTriangular(SchurF.T)) * SchurF.vectors'
     end
 end
 
+sqrt(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint(sqrt(parent(A)))
+sqrt(A::Transpose{<:Any,<:AbstractMatrix}) = transpose(sqrt(parent(A)))
+
 function inv(A::StridedMatrix{T}) where T
     checksquare(A)
     S = typeof((one(T)*zero(T) + one(T)*zero(T))/one(T))
@@ -1032,7 +1113,7 @@ function acos(A::AbstractMatrix)
         acosHermA = acos(Hermitian(A))
         return isa(acosHermA, Hermitian) ? copytri!(parent(acosHermA), 'U', true) : parent(acosHermA)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(-im * log(U + im * sqrt(I - U^2))))
     return SchurF.Z * R * SchurF.Z'
@@ -1063,7 +1144,7 @@ function asin(A::AbstractMatrix)
         asinHermA = asin(Hermitian(A))
         return isa(asinHermA, Hermitian) ? copytri!(parent(asinHermA), 'U', true) : parent(asinHermA)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(-im * log(im * U + sqrt(I - U^2))))
     return SchurF.Z * R * SchurF.Z'
@@ -1093,7 +1174,7 @@ function atan(A::AbstractMatrix)
     if ishermitian(A)
         return copytri!(parent(atan(Hermitian(A))), 'U', true)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = im * UpperTriangular(SchurF.T)
     R = triu!(parent(log((I + U) / (I - U)) / 2im))
     return SchurF.Z * R * SchurF.Z'
@@ -1112,7 +1193,7 @@ function acosh(A::AbstractMatrix)
         acoshHermA = acosh(Hermitian(A))
         return isa(acoshHermA, Hermitian) ? copytri!(parent(acoshHermA), 'U', true) : parent(acoshHermA)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(log(U + sqrt(U - I) * sqrt(U + I))))
     return SchurF.Z * R * SchurF.Z'
@@ -1130,7 +1211,7 @@ function asinh(A::AbstractMatrix)
     if ishermitian(A)
         return copytri!(parent(asinh(Hermitian(A))), 'U', true)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(log(U + sqrt(I + U^2))))
     return SchurF.Z * R * SchurF.Z'
@@ -1148,7 +1229,7 @@ function atanh(A::AbstractMatrix)
     if ishermitian(A)
         return copytri!(parent(atanh(Hermitian(A))), 'U', true)
     end
-    SchurF = schur(complex(A))
+    SchurF = Schur{Complex}(schur(A))
     U = UpperTriangular(SchurF.T)
     R = triu!(parent(log((I + U) / (I - U)) / 2))
     return SchurF.Z * R * SchurF.Z'
@@ -1284,12 +1365,10 @@ function factorize(A::StridedMatrix{T}) where T
                 return Bidiagonal(diag(A), diag(A, 1), :U)
             end
             if utri1
-                if (herm & (T <: Complex)) | sym
-                    try
-                        return ldlt!(SymTridiagonal(diag(A), diag(A, -1)))
-                    catch
-                    end
-                end
+                # TODO: enable once a specialized, non-dense bunchkaufman method exists
+                # if (herm & (T <: Complex)) | sym
+                    # return bunchkaufman(SymTridiagonal(diag(A), diag(A, -1)))
+                # end
                 return lu(Tridiagonal(diag(A, -1), diag(A), diag(A, 1)))
             end
         end
@@ -1309,10 +1388,11 @@ function factorize(A::StridedMatrix{T}) where T
         end
         return lu(A)
     end
-    qr(A, Val(true))
+    qr(A, ColumnNorm())
 end
 factorize(A::Adjoint)   =   adjoint(factorize(parent(A)))
 factorize(A::Transpose) = transpose(factorize(parent(A)))
+factorize(a::Number)    = a # same as how factorize behaves on Diagonal types
 
 ## Moore-Penrose pseudoinverse
 
@@ -1366,31 +1446,24 @@ function pinv(A::AbstractMatrix{T}; atol::Real = 0.0, rtol::Real = (eps(real(flo
     m, n = size(A)
     Tout = typeof(zero(T)/sqrt(one(T) + one(T)))
     if m == 0 || n == 0
-        return Matrix{Tout}(undef, n, m)
+        return similar(A, Tout, (n, m))
     end
-    if istril(A)
-        if istriu(A)
-            maxabsA = maximum(abs.(diag(A)))
-            tol = max(rtol*maxabsA, atol)
-            B = zeros(Tout, n, m)
-            for i = 1:min(m, n)
-                if abs(A[i,i]) > tol
-                    Aii = inv(A[i,i])
-                    if isfinite(Aii)
-                        B[i,i] = Aii
-                    end
-                end
-            end
-            return B
-        end
+    if isdiag(A)
+        indA = diagind(A)
+        dA = view(A, indA)
+        maxabsA = maximum(abs, dA)
+        tol = max(rtol * maxabsA, atol)
+        B = fill!(similar(A, Tout, (n, m)), 0)
+        indB = diagind(B)
+        B[indB] .= (x -> abs(x) > tol ? pinv(x) : zero(x)).(dA)
+        return B
     end
-    SVD         = svd(A, full = false)
+    SVD         = svd(A)
     tol         = max(rtol*maximum(SVD.S), atol)
     Stype       = eltype(SVD.S)
-    Sinv        = zeros(Stype, length(SVD.S))
+    Sinv        = fill!(similar(A, Stype, length(SVD.S)), 0)
     index       = SVD.S .> tol
-    Sinv[index] = one(Stype) ./ SVD.S[index]
-    Sinv[findall(.!isfinite.(Sinv))] .= zero(Stype)
+    Sinv[index] .= pinv.(view(SVD.S, index))
     return SVD.Vt' * (Diagonal(Sinv) * SVD.U')
 end
 function pinv(x::Number)
@@ -1405,7 +1478,7 @@ end
     nullspace(M, rtol::Real) = nullspace(M; rtol=rtol) # to be deprecated in Julia 2.0
 
 Computes a basis for the nullspace of `M` by including the singular
-vectors of `M` whose singular values have magnitudes greater than `max(atol, rtol*σ₁)`,
+vectors of `M` whose singular values have magnitudes smaller than `max(atol, rtol*σ₁)`,
 where `σ₁` is `M`'s largest singular value.
 
 By default, the relative tolerance `rtol` is `n*ϵ`, where `n`
@@ -1439,17 +1512,15 @@ julia> nullspace(M, atol=0.95)
  1.0
 ```
 """
-function nullspace(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)*eps(real(float(one(eltype(A))))))*iszero(atol))
-    m, n = size(A)
-    (m == 0 || n == 0) && return Matrix{eltype(A)}(I, n, n)
-    SVD = svd(A, full=true)
+function nullspace(A::AbstractVecOrMat; atol::Real = 0.0, rtol::Real = (min(size(A, 1), size(A, 2))*eps(real(float(one(eltype(A))))))*iszero(atol))
+    m, n = size(A, 1), size(A, 2)
+    (m == 0 || n == 0) && return Matrix{eigtype(eltype(A))}(I, n, n)
+    SVD = svd(A; full=true)
     tol = max(atol, SVD.S[1]*rtol)
     indstart = sum(s -> s .> tol, SVD.S) + 1
     return copy(SVD.Vt[indstart:end,:]')
 end
 
-nullspace(A::AbstractVector; atol::Real = 0.0, rtol::Real = (min(size(A)...)*eps(real(float(one(eltype(A))))))*iszero(atol)) = nullspace(reshape(A, length(A), 1), rtol= rtol, atol= atol)
-
 """
     cond(M, p::Real=2)
 
@@ -1525,6 +1596,34 @@ function sylvester(A::StridedMatrix{T},B::StridedMatrix{T},C::StridedMatrix{T})
 end
 sylvester(A::StridedMatrix{T}, B::StridedMatrix{T}, C::StridedMatrix{T}) where {T<:Integer} = sylvester(float(A), float(B), float(C))
 
+Base.@propagate_inbounds function _sylvester_2x1!(A, B, C)
+    b = B[1]
+    a21, a12 = A[2, 1], A[1, 2]
+    m11 = b + A[1, 1]
+    m22 = b + A[2, 2]
+    d = m11 * m22 - a12 * a21
+    c1, c2 = C
+    C[1] = (a12 * c2 - m22 * c1) / d
+    C[2] = (a21 * c1 - m11 * c2) / d
+    return C
+end
+Base.@propagate_inbounds function _sylvester_1x2!(A, B, C)
+    a = A[1]
+    b21, b12 = B[2, 1], B[1, 2]
+    m11 = a + B[1, 1]
+    m22 = a + B[2, 2]
+    d = m11 * m22 - b21 * b12
+    c1, c2 = C
+    C[1] = (b21 * c2 - m22 * c1) / d
+    C[2] = (b12 * c1 - m11 * c2) / d
+    return C
+end
+function _sylvester_2x2!(A, B, C)
+    _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
+    rmul!(C, -inv(scale))
+    return C
+end
+
 sylvester(a::Union{Real,Complex}, b::Union{Real,Complex}, c::Union{Real,Complex}) = -c / (a + b)
 
 # AX + XA' + C = 0
@@ -1567,4 +1666,4 @@ function lyap(A::StridedMatrix{T}, C::StridedMatrix{T}) where {T<:BlasFloat}
     rmul!(Q*(Y * adjoint(Q)), inv(scale))
 end
 lyap(A::StridedMatrix{T}, C::StridedMatrix{T}) where {T<:Integer} = lyap(float(A), float(C))
-lyap(a::T, c::T) where {T<:Number} = -c/(2a)
+lyap(a::Union{Real,Complex}, c::Union{Real,Complex}) = -c/(2real(a))
diff --git a/stdlib/LinearAlgebra/src/diagonal.jl b/stdlib/LinearAlgebra/src/diagonal.jl
index fc35e567c41f4e..b3d54b58421123 100644
--- a/stdlib/LinearAlgebra/src/diagonal.jl
+++ b/stdlib/LinearAlgebra/src/diagonal.jl
@@ -10,69 +10,97 @@ struct Diagonal{T,V<:AbstractVector{T}} <: AbstractMatrix{T}
         new{T,V}(diag)
     end
 end
+Diagonal{T,V}(d::Diagonal) where {T,V<:AbstractVector{T}} = Diagonal{T,V}(d.diag)
 Diagonal(v::AbstractVector{T}) where {T} = Diagonal{T,typeof(v)}(v)
 Diagonal{T}(v::AbstractVector) where {T} = Diagonal(convert(AbstractVector{T}, v)::AbstractVector{T})
 
+function Base.promote_rule(A::Type{<:Diagonal{<:Any,V}}, B::Type{<:Diagonal{<:Any,W}}) where {V,W}
+    X = promote_type(V, W)
+    T = eltype(X)
+    isconcretetype(T) && return Diagonal{T,X}
+    return typejoin(A, B)
+end
+
 """
-    Diagonal(A::AbstractMatrix)
+    Diagonal(V::AbstractVector)
 
-Construct a matrix from the diagonal of `A`.
+Construct a matrix with `V` as its diagonal.
+
+See also [`diag`](@ref), [`diagm`](@ref).
 
 # Examples
 ```jldoctest
-julia> A = [1 2 3; 4 5 6; 7 8 9]
-3×3 Matrix{Int64}:
- 1  2  3
- 4  5  6
- 7  8  9
-
-julia> Diagonal(A)
-3×3 Diagonal{Int64, Vector{Int64}}:
- 1  ⋅  ⋅
- ⋅  5  ⋅
- ⋅  ⋅  9
+julia> Diagonal([1, 10, 100])
+3×3 Diagonal{$Int, Vector{$Int}}:
+ 1   ⋅    ⋅
+ ⋅  10    ⋅
+ ⋅   ⋅  100
+
+julia> diagm([7, 13])
+2×2 Matrix{$Int}:
+ 7   0
+ 0  13
 ```
 """
-Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
+Diagonal(V::AbstractVector)
 
 """
-    Diagonal(V::AbstractVector)
+    Diagonal(A::AbstractMatrix)
 
-Construct a matrix with `V` as its diagonal.
+Construct a matrix from the diagonal of `A`.
 
 # Examples
 ```jldoctest
-julia> V = [1, 2]
-2-element Vector{Int64}:
- 1
- 2
-
-julia> Diagonal(V)
-2×2 Diagonal{Int64, Vector{Int64}}:
- 1  ⋅
- ⋅  2
+julia> A = permutedims(reshape(1:15, 5, 3))
+3×5 Matrix{Int64}:
+  1   2   3   4   5
+  6   7   8   9  10
+ 11  12  13  14  15
+
+julia> Diagonal(A)
+3×3 Diagonal{$Int, Vector{$Int}}:
+ 1  ⋅   ⋅
+ ⋅  7   ⋅
+ ⋅  ⋅  13
+
+julia> diag(A, 2)
+3-element Vector{$Int}:
+  3
+  9
+ 15
 ```
 """
-Diagonal(V::AbstractVector)
+Diagonal(A::AbstractMatrix) = Diagonal(diag(A))
 
 Diagonal(D::Diagonal) = D
 Diagonal{T}(D::Diagonal{T}) where {T} = D
 Diagonal{T}(D::Diagonal) where {T} = Diagonal{T}(D.diag)
 
 AbstractMatrix{T}(D::Diagonal) where {T} = Diagonal{T}(D)
-Matrix(D::Diagonal) = diagm(0 => D.diag)
-Array(D::Diagonal) = Matrix(D)
+Matrix(D::Diagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(D)
+Array(D::Diagonal{T}) where {T} = Matrix(D)
+function Matrix{T}(D::Diagonal) where {T}
+    n = size(D, 1)
+    B = zeros(T, n, n)
+    @inbounds for i in 1:n
+        B[i,i] = D.diag[i]
+    end
+    return B
+end
+
+"""
+    Diagonal{T}(undef, n)
+
+Construct an uninitialized `Diagonal{T}` of length `n`. See `undef`.
+"""
+Diagonal{T}(::UndefInitializer, n::Integer) where T = Diagonal(Vector{T}(undef, n))
 
-# For D<:Diagonal, similar(D[, neweltype]) should yield a Diagonal matrix.
-# On the other hand, similar(D, [neweltype,] shape...) should yield a sparse matrix.
-# The first method below effects the former, and the second the latter.
 similar(D::Diagonal, ::Type{T}) where {T} = Diagonal(similar(D.diag, T))
-# The method below is moved to SparseArrays for now
-# similar(D::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
+similar(::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
 
 copyto!(D1::Diagonal, D2::Diagonal) = (copyto!(D1.diag, D2.diag); D1)
 
-size(D::Diagonal) = (length(D.diag),length(D.diag))
+size(D::Diagonal) = (n = length(D.diag); (n,n))
 
 function size(D::Diagonal,d::Integer)
     if d<1
@@ -90,8 +118,8 @@ end
     end
     r
 end
-diagzero(::Diagonal{T},i,j) where {T} = zero(T)
-diagzero(D::Diagonal{Matrix{T}},i,j) where {T} = zeros(T, size(D.diag[i], 1), size(D.diag[j], 2))
+diagzero(::Diagonal{T}, i, j) where {T} = zero(T)
+diagzero(D::Diagonal{<:AbstractMatrix{T}}, i, j) where {T} = zeros(T, size(D.diag[i], 1), size(D.diag[j], 2))
 
 function setindex!(D::Diagonal, v, i::Int, j::Int)
     @boundscheck checkbounds(D, i, j)
@@ -129,24 +157,24 @@ isdiag(D::Diagonal) = all(isdiag, D.diag)
 isdiag(D::Diagonal{<:Number}) = true
 istriu(D::Diagonal, k::Integer=0) = k <= 0 || iszero(D.diag) ? true : false
 istril(D::Diagonal, k::Integer=0) = k >= 0 || iszero(D.diag) ? true : false
-function triu!(D::Diagonal,k::Integer=0)
+function triu!(D::Diagonal{T}, k::Integer=0) where T
     n = size(D,1)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 0
-        fill!(D.diag,0)
+        fill!(D.diag, zero(T))
     end
     return D
 end
 
-function tril!(D::Diagonal,k::Integer=0)
+function tril!(D::Diagonal{T}, k::Integer=0) where T
     n = size(D,1)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < 0
-        fill!(D.diag,0)
+        fill!(D.diag, zero(T))
     end
     return D
 end
@@ -174,79 +202,57 @@ end
 (*)(x::Number, D::Diagonal) = Diagonal(x * D.diag)
 (*)(D::Diagonal, x::Number) = Diagonal(D.diag * x)
 (/)(D::Diagonal, x::Number) = Diagonal(D.diag / x)
+(\)(x::Number, D::Diagonal) = Diagonal(x \ D.diag)
+(^)(D::Diagonal, a::Number) = Diagonal(D.diag .^ a)
+(^)(D::Diagonal, a::Real) = Diagonal(D.diag .^ a) # for disambiguation
+(^)(D::Diagonal, a::Integer) = Diagonal(D.diag .^ a) # for disambiguation
+Base.literal_pow(::typeof(^), D::Diagonal, valp::Val) =
+    Diagonal(Base.literal_pow.(^, D.diag, valp)) # for speed
+Base.literal_pow(::typeof(^), D::Diagonal, ::Val{-1}) = inv(D) # for disambiguation
+
+function _muldiag_size_check(A, B)
+    nA = size(A, 2)
+    mB = size(B, 1)
+    @noinline throw_dimerr(::AbstractMatrix, nA, mB) = throw(DimensionMismatch("second dimension of A, $nA, does not match first dimension of B, $mB"))
+    @noinline throw_dimerr(::AbstractVector, nA, mB) = throw(DimensionMismatch("second dimension of D, $nA, does not match length of V, $mB"))
+    nA == mB || throw_dimerr(B, nA, mB)
+    return nothing
+end
+# the output matrix should have the same size as the non-diagonal input matrix or vector
+@noinline throw_dimerr(szC, szA) = throw(DimensionMismatch("output matrix has size: $szC, but should have size $szA"))
+_size_check_out(C, ::Diagonal, A) = _size_check_out(C, A)
+_size_check_out(C, A, ::Diagonal) = _size_check_out(C, A)
+_size_check_out(C, A::Diagonal, ::Diagonal) = _size_check_out(C, A)
+function _size_check_out(C, A)
+    szA = size(A)
+    szC = size(C)
+    szA == szC || throw_dimerr(szC, szA)
+    return nothing
+end
+function _muldiag_size_check(C, A, B)
+    _muldiag_size_check(A, B)
+    _size_check_out(C, A, B)
+end
 
 function (*)(Da::Diagonal, Db::Diagonal)
-    nDa, mDb = size(Da, 2), size(Db, 1)
-    if nDa != mDb
-        throw(DimensionMismatch("second dimension of Da, $nDa, does not match first dimension of Db, $mDb"))
-    end
+    _muldiag_size_check(Da, Db)
     return Diagonal(Da.diag .* Db.diag)
 end
 
 function (*)(D::Diagonal, V::AbstractVector)
-    nD = size(D, 2)
-    if nD != length(V)
-        throw(DimensionMismatch("second dimension of D, $nD, does not match length of V, $(length(V))"))
-    end
+    _muldiag_size_check(D, V)
     return D.diag .* V
 end
 
-(*)(A::AbstractTriangular, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A), D)
-(*)(D::Diagonal, B::AbstractTriangular) =
-    lmul!(D, copyto!(similar(B, promote_op(*, eltype(B), eltype(D.diag))), B))
-
 (*)(A::AbstractMatrix, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A), D)
+    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A, D)
 (*)(D::Diagonal, A::AbstractMatrix) =
-    lmul!(D, copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), A))
+    mul!(similar(A, promote_op(*, eltype(A), eltype(D.diag)), size(A)), D, A)
 
-function rmul!(A::AbstractMatrix, D::Diagonal)
-    require_one_based_indexing(A)
-    A .= A .* permutedims(D.diag)
-    return A
-end
-
-function lmul!(D::Diagonal, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    B .= D.diag .* B
-    return B
-end
-
-rmul!(A::Union{LowerTriangular,UpperTriangular}, D::Diagonal) = typeof(A)(rmul!(A.data, D))
-function rmul!(A::UnitLowerTriangular, D::Diagonal)
-    rmul!(A.data, D)
-    for i = 1:size(A, 1)
-        A.data[i,i] = D.diag[i]
-    end
-    LowerTriangular(A.data)
-end
-function rmul!(A::UnitUpperTriangular, D::Diagonal)
-    rmul!(A.data, D)
-    for i = 1:size(A, 1)
-        A.data[i,i] = D.diag[i]
-    end
-    UpperTriangular(A.data)
-end
-
-function lmul!(D::Diagonal, B::UnitLowerTriangular)
-    lmul!(D, B.data)
-    for i = 1:size(B, 1)
-        B.data[i,i] = D.diag[i]
-    end
-    LowerTriangular(B.data)
-end
-function lmul!(D::Diagonal, B::UnitUpperTriangular)
-    lmul!(D, B.data)
-    for i = 1:size(B, 1)
-        B.data[i,i] = D.diag[i]
-    end
-    UpperTriangular(B.data)
-end
+rmul!(A::AbstractMatrix, D::Diagonal) = @inline mul!(A, A, D)
+lmul!(D::Diagonal, B::AbstractVecOrMat) = @inline mul!(B, D, B)
 
-*(D::Adjoint{<:Any,<:Diagonal}, B::Diagonal) = Diagonal(adjoint.(D.parent.diag) .* B.diag)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A), D)
+#TODO: It seems better to call (D' * adjA')' directly?
 function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
     A = adjA.parent
     Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -254,9 +260,6 @@ function *(adjA::Adjoint{<:Any,<:AbstractMatrix}, D::Diagonal)
     rmul!(Ac, D)
 end
 
-*(D::Transpose{<:Any,<:Diagonal}, B::Diagonal) = Diagonal(transpose.(D.parent.diag) .* B.diag)
-*(A::Transpose{<:Any,<:AbstractTriangular}, D::Diagonal) =
-    rmul!(copyto!(similar(A, promote_op(*, eltype(A), eltype(D.diag))), A), D)
 function *(transA::Transpose{<:Any,<:AbstractMatrix}, D::Diagonal)
     A = transA.parent
     At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -264,10 +267,9 @@ function *(transA::Transpose{<:Any,<:AbstractMatrix}, D::Diagonal)
     rmul!(At, D)
 end
 
-*(D::Diagonal, B::Adjoint{<:Any,<:Diagonal}) = Diagonal(D.diag .* adjoint.(B.parent.diag))
-*(D::Diagonal, B::Adjoint{<:Any,<:AbstractTriangular}) =
-    lmul!(D, copyto!(similar(B, promote_op(*, eltype(B), eltype(D.diag))), B))
-*(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) = (Q = adjQ.parent; rmul!(Array(D), adjoint(Q)))
+*(D::Diagonal, adjQ::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
+    rmul!(Array{promote_type(eltype(D), eltype(adjQ))}(D), adjQ)
+
 function *(D::Diagonal, adjA::Adjoint{<:Any,<:AbstractMatrix})
     A = adjA.parent
     Ac = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -275,9 +277,6 @@ function *(D::Diagonal, adjA::Adjoint{<:Any,<:AbstractMatrix})
     lmul!(D, Ac)
 end
 
-*(D::Diagonal, B::Transpose{<:Any,<:Diagonal}) = Diagonal(D.diag .* transpose.(B.parent.diag))
-*(D::Diagonal, B::Transpose{<:Any,<:AbstractTriangular}) =
-    lmul!(D, copyto!(similar(B, promote_op(*, eltype(B), eltype(D.diag))), B))
 function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
     A = transA.parent
     At = similar(A, promote_op(*, eltype(A), eltype(D.diag)), (size(A, 2), size(A, 1)))
@@ -285,237 +284,303 @@ function *(D::Diagonal, transA::Transpose{<:Any,<:AbstractMatrix})
     lmul!(D, At)
 end
 
-*(D::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:Diagonal}) =
-    Diagonal(adjoint.(D.parent.diag) .* adjoint.(B.parent.diag))
-*(D::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:Diagonal}) =
-    Diagonal(transpose.(D.parent.diag) .* transpose.(B.parent.diag))
-
-rmul!(A::Diagonal, B::Diagonal) = Diagonal(A.diag .*= B.diag)
-lmul!(A::Diagonal, B::Diagonal) = Diagonal(B.diag .= A.diag .* B.diag)
-
-function lmul!(adjA::Adjoint{<:Any,<:Diagonal}, B::AbstractMatrix)
-    A = adjA.parent
-    return lmul!(adjoint(A), B)
-end
-function lmul!(transA::Transpose{<:Any,<:Diagonal}, B::AbstractMatrix)
-    A = transA.parent
-    return lmul!(transpose(A), B)
-end
-
-function rmul!(A::AbstractMatrix, adjB::Adjoint{<:Any,<:Diagonal})
-    B = adjB.parent
-    return rmul!(A, adjoint(B))
-end
-function rmul!(A::AbstractMatrix, transB::Transpose{<:Any,<:Diagonal})
-    B = transB.parent
-    return rmul!(A, transpose(B))
-end
-
-# Elements of `out` may not be defined (e.g., for `BigFloat`). To make
-# `mul!(out, A, B)` work for such cases, `out .*ₛ beta` short-circuits
-# `out * beta`.  Using `broadcasted` to avoid the multiplication
-# inside this function.
-function *ₛ end
-Broadcast.broadcasted(::typeof(*ₛ), out, beta) =
-    iszero(beta::Number) ? false : broadcasted(*, out, beta)
-
-# Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
-@inline mul!(out::AbstractVector, A::Diagonal, in::AbstractVector,
-             alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractVector, A::Adjoint{<:Any,<:Diagonal}, in::AbstractVector,
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractVector, A::Transpose{<:Any,<:Diagonal}, in::AbstractVector,
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::StridedMatrix,
-             alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, in::StridedMatrix,
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, in::StridedMatrix,
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::Adjoint{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, in::Adjoint{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, in::Adjoint{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, A::Diagonal, in::Transpose{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (A.diag .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, in::Transpose{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (adjoint.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, in::Transpose{<:Any,<:StridedMatrix},
-             alpha::Number, beta::Number) =
-    out .= (transpose.(A.parent.diag) .* in) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, in::StridedMatrix, A::Diagonal,
-             alpha::Number, beta::Number) =
-    out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::StridedMatrix, A::Adjoint{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* adjoint(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::StridedMatrix, A::Transpose{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* transpose(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:StridedMatrix}, A::Diagonal,
-             alpha::Number, beta::Number) =
-    out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:StridedMatrix}, A::Adjoint{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* adjoint(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Adjoint{<:Any,<:StridedMatrix}, A::Transpose{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* transpose(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:StridedMatrix}, A::Diagonal,
-             alpha::Number, beta::Number) =
-    out .= (in .* permutedims(A.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:StridedMatrix}, A::Adjoint{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* adjoint(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-@inline mul!(out::AbstractMatrix, in::Transpose{<:Any,<:StridedMatrix}, A::Transpose{<:Any,<:Diagonal},
-             alpha::Number, beta::Number) =
-    out .= (in .* transpose(A.parent.diag)) .*ₛ alpha .+ out .*ₛ beta
-
-# ambiguities with Symmetric/Hermitian
-# RealHermSymComplex[Sym]/[Herm] only include Number; invariant to [c]transpose
-*(A::Diagonal, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = A * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::Diagonal) = transA.parent * B
-*(A::Diagonal, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Diagonal) = adjA.parent * B
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transD::Transpose{<:Any,<:Diagonal}) = transA.parent * transD
-*(transD::Transpose{<:Any,<:Diagonal}, transA::Transpose{<:Any,<:RealHermSymComplexSym}) = transD * transA.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjD::Adjoint{<:Any,<:Diagonal}) = adjA.parent * adjD
-*(adjD::Adjoint{<:Any,<:Diagonal}, adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjD * adjA.parent
-mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSymComplexSym}) = C .= adjoint.(A.parent.diag) .* B
-mul!(C::AbstractMatrix, A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSymComplexHerm}) = C .= transpose.(A.parent.diag) .* B
-
-@inline mul!(C::AbstractMatrix,
-             A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSym},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-@inline mul!(C::AbstractMatrix,
-             A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSymComplexHerm},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-@inline mul!(C::AbstractMatrix,
-             A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSym},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-@inline mul!(C::AbstractMatrix,
-             A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSymComplexSym},
-             alpha::Number, beta::Number) = mul!(C, A, B.parent, alpha, beta)
-
-@inline mul!(C::AbstractMatrix,
-             A::Adjoint{<:Any,<:Diagonal}, B::Adjoint{<:Any,<:RealHermSymComplexSym},
-             alpha::Number, beta::Number) =
-    C .= (adjoint.(A.parent.diag) .* B) .*ₛ alpha .+ C .*ₛ beta
-@inline mul!(C::AbstractMatrix,
-             A::Transpose{<:Any,<:Diagonal}, B::Transpose{<:Any,<:RealHermSymComplexHerm},
-             alpha::Number, beta::Number) =
-    C .= (transpose.(A.parent.diag) .* B) .*ₛ alpha .+ C .*ₛ beta
-
-(/)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag ./ Db.diag)
-
-function ldiv!(D::Diagonal{T}, v::AbstractVector{T}) where {T}
-    if length(v) != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $(length(v)) rows"))
-    end
-    for i = 1:length(D.diag)
-        d = D.diag[i]
-        if iszero(d)
-            throw(SingularException(i))
+@inline function __muldiag!(out, D::Diagonal, B, alpha, beta)
+    require_one_based_indexing(out)
+    if iszero(alpha)
+        _rmul_or_fill!(out, beta)
+    else
+        if iszero(beta)
+            @inbounds for j in axes(B, 2)
+                @simd for i in axes(B, 1)
+                    out[i,j] = D.diag[i] * B[i,j] * alpha
+                end
+            end
+        else
+            @inbounds for j in axes(B, 2)
+                @simd for i in axes(B, 1)
+                    out[i,j] = D.diag[i] * B[i,j] * alpha + out[i,j] * beta
+                end
+            end
         end
-        v[i] = d\v[i]
     end
-    v
+    return out
 end
-function ldiv!(D::Diagonal{T}, V::AbstractMatrix{T}) where {T}
-    require_one_based_indexing(V)
-    if size(V,1) != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $(size(V,1)) rows"))
+@inline function __muldiag!(out, A, D::Diagonal, alpha, beta)
+    require_one_based_indexing(out)
+    if iszero(alpha)
+        _rmul_or_fill!(out, beta)
+    else
+        if iszero(beta)
+            @inbounds for j in axes(A, 2)
+                dja = D.diag[j] * alpha
+                @simd for i in axes(A, 1)
+                    out[i,j] = A[i,j] * dja
+                end
+            end
+        else
+            @inbounds for j in axes(A, 2)
+                dja = D.diag[j] * alpha
+                @simd for i in axes(A, 1)
+                    out[i,j] = A[i,j] * dja + out[i,j] * beta
+                end
+            end
+        end
     end
-    for i = 1:length(D.diag)
-        d = D.diag[i]
-        if iszero(d)
-            throw(SingularException(i))
+    return out
+end
+@inline function __muldiag!(out::Diagonal, D1::Diagonal, D2::Diagonal, alpha, beta)
+    d1 = D1.diag
+    d2 = D2.diag
+    if iszero(alpha)
+        _rmul_or_fill!(out.diag, beta)
+    else
+        if iszero(beta)
+            @inbounds @simd for i in eachindex(out.diag)
+                out.diag[i] = d1[i] * d2[i] * alpha
+            end
+        else
+            @inbounds @simd for i in eachindex(out.diag)
+                out.diag[i] = d1[i] * d2[i] * alpha + out.diag[i] * beta
+            end
         end
-        for j = 1:size(V,2)
-            @inbounds V[i,j] = d\V[i,j]
+    end
+    return out
+end
+@inline function __muldiag!(out, D1::Diagonal, D2::Diagonal, alpha, beta)
+    require_one_based_indexing(out)
+    mA = size(D1, 1)
+    d1 = D1.diag
+    d2 = D2.diag
+    _rmul_or_fill!(out, beta)
+    if !iszero(alpha)
+        @inbounds @simd for i in 1:mA
+            out[i,i] += d1[i] * d2[i] * alpha
         end
     end
-    V
+    return out
 end
-ldiv!(x::AbstractArray, A::Diagonal, b::AbstractArray) = (x .= A.diag .\ b)
 
-ldiv!(adjD::Adjoint{<:Any,<:Diagonal{T}}, B::AbstractVecOrMat{T}) where {T} =
-    (D = adjD.parent; ldiv!(conj(D), B))
-ldiv!(transD::Transpose{<:Any,<:Diagonal{T}}, B::AbstractVecOrMat{T}) where {T} =
-    (D = transD.parent; ldiv!(D, B))
-
-function ldiv!(D::Diagonal, A::Union{LowerTriangular,UpperTriangular})
-    broadcast!(\, parent(A), D.diag, parent(A))
-    A
+@inline function _muldiag!(out, A, B, alpha, beta)
+    _muldiag_size_check(out, A, B)
+    __muldiag!(out, A, B, alpha, beta)
+    return out
 end
 
-function rdiv!(A::AbstractMatrix{T}, D::Diagonal{T}) where {T}
+# Get ambiguous method if try to unify AbstractVector/AbstractMatrix here using AbstractVecOrMat
+@inline mul!(out::AbstractVector, D::Diagonal, V::AbstractVector, alpha::Number, beta::Number) =
+    _muldiag!(out, D, V, alpha, beta)
+@inline mul!(out::AbstractMatrix, D::Diagonal, B::AbstractMatrix, alpha::Number, beta::Number) =
+    _muldiag!(out, D, B, alpha, beta)
+@inline mul!(out::AbstractMatrix, D::Diagonal, B::Adjoint{<:Any,<:AbstractVecOrMat},
+             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
+@inline mul!(out::AbstractMatrix, D::Diagonal, B::Transpose{<:Any,<:AbstractVecOrMat},
+             alpha::Number, beta::Number) = _muldiag!(out, D, B, alpha, beta)
+
+@inline mul!(out::AbstractMatrix, A::AbstractMatrix, D::Diagonal, alpha::Number, beta::Number) =
+    _muldiag!(out, A, D, alpha, beta)
+@inline mul!(out::AbstractMatrix, A::Adjoint{<:Any,<:AbstractVecOrMat}, D::Diagonal,
+             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
+@inline mul!(out::AbstractMatrix, A::Transpose{<:Any,<:AbstractVecOrMat}, D::Diagonal,
+             alpha::Number, beta::Number) = _muldiag!(out, A, D, alpha, beta)
+@inline mul!(C::Diagonal, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
+    _muldiag!(C, Da, Db, alpha, beta)
+
+mul!(C::AbstractMatrix, Da::Diagonal, Db::Diagonal, alpha::Number, beta::Number) =
+    _muldiag!(C, Da, Db, alpha, beta)
+
+_init(op, A::AbstractArray{<:Number}, B::AbstractArray{<:Number}) =
+    (_ -> zero(typeof(op(oneunit(eltype(A)), oneunit(eltype(B))))))
+_init(op, A::AbstractArray, B::AbstractArray) = promote_op(op, eltype(A), eltype(B))
+
+/(A::AbstractVecOrMat, D::Diagonal) = _rdiv!(_init(/, A, D).(A), A, D)
+rdiv!(A::AbstractVecOrMat, D::Diagonal) = @inline _rdiv!(A, A, D)
+# avoid copy when possible via internal 3-arg backend
+function _rdiv!(B::AbstractVecOrMat, A::AbstractVecOrMat, D::Diagonal)
     require_one_based_indexing(A)
     dd = D.diag
-    m, n = size(A)
-    if (k = length(dd)) ≠ n
+    m, n = size(A, 1), size(A, 2)
+    if (k = length(dd)) != n
         throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
     end
     @inbounds for j in 1:n
         ddj = dd[j]
-        if iszero(ddj)
-            throw(SingularException(j))
-        end
+        iszero(ddj) && throw(SingularException(j))
         for i in 1:m
-            A[i, j] /= ddj
+            B[i, j] = A[i, j] / ddj
         end
     end
-    A
+    B
 end
 
-function rdiv!(A::Union{LowerTriangular,UpperTriangular}, D::Diagonal)
-    broadcast!(/, parent(A), parent(A), permutedims(D.diag))
-    A
+function \(D::Diagonal, B::AbstractVector)
+    j = findfirst(iszero, D.diag)
+    isnothing(j) || throw(SingularException(j))
+    return D.diag .\ B
 end
+\(D::Diagonal, B::AbstractMatrix) =
+    ldiv!(_init(\, D, B).(B), D, B)
 
-rdiv!(A::AbstractMatrix{T}, adjD::Adjoint{<:Any,<:Diagonal{T}}) where {T} =
-    (D = adjD.parent; rdiv!(A, conj(D)))
-rdiv!(A::AbstractMatrix{T}, transD::Transpose{<:Any,<:Diagonal{T}}) where {T} =
-    (D = transD.parent; rdiv!(A, D))
-
-(/)(A::Union{StridedMatrix, AbstractTriangular}, D::Diagonal) =
-    rdiv!((typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A), D)
-
-(\)(F::Factorization, D::Diagonal) =
-    ldiv!(F, Matrix{typeof(oneunit(eltype(D))/oneunit(eltype(F)))}(D))
-\(adjF::Adjoint{<:Any,<:Factorization}, D::Diagonal) =
-    (F = adjF.parent; ldiv!(adjoint(F), Matrix{typeof(oneunit(eltype(D))/oneunit(eltype(F)))}(D)))
-(\)(A::Union{QR,QRCompactWY,QRPivoted}, B::Diagonal) =
-    invoke(\, Tuple{Union{QR,QRCompactWY,QRPivoted}, AbstractVecOrMat}, A, B)
-
+ldiv!(D::Diagonal, B::AbstractVecOrMat) = @inline ldiv!(B, D, B)
+function ldiv!(B::AbstractVecOrMat, D::Diagonal, A::AbstractVecOrMat)
+    require_one_based_indexing(A, B)
+    dd = D.diag
+    d = length(dd)
+    m, n = size(A, 1), size(A, 2)
+    m′, n′ = size(B, 1), size(B, 2)
+    m == d || throw(DimensionMismatch("right hand side has $m rows but D is $d by $d"))
+    (m, n) == (m′, n′) || throw(DimensionMismatch("expect output to be $m by $n, but got $m′ by $n′"))
+    j = findfirst(iszero, D.diag)
+    isnothing(j) || throw(SingularException(j))
+    @inbounds for j = 1:n, i = 1:m
+        B[i, j] = dd[i] \ A[i, j]
+    end
+    B
+end
+
+# Optimizations for \, / between Diagonals
+\(D::Diagonal, B::Diagonal) = ldiv!(similar(B, promote_op(\, eltype(D), eltype(B))), D, B)
+/(A::Diagonal, D::Diagonal) = _rdiv!(similar(A, promote_op(/, eltype(A), eltype(D))), A, D)
+function _rdiv!(Dc::Diagonal, Db::Diagonal, Da::Diagonal)
+    n, k = length(Db.diag), length(Da.diag)
+    n == k || throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+    j = findfirst(iszero, Da.diag)
+    isnothing(j) || throw(SingularException(j))
+    Dc.diag .= Db.diag ./ Da.diag
+    Dc
+end
+ldiv!(Dc::Diagonal, Da::Diagonal, Db::Diagonal) = Diagonal(ldiv!(Dc.diag, Da, Db.diag))
+
+# optimizations for (Sym)Tridiagonal and Diagonal
+@propagate_inbounds _getudiag(T::Tridiagonal, i) = T.du[i]
+@propagate_inbounds _getudiag(S::SymTridiagonal, i) = S.ev[i]
+@propagate_inbounds _getdiag(T::Tridiagonal, i) = T.d[i]
+@propagate_inbounds _getdiag(S::SymTridiagonal, i) = symmetric(S.dv[i], :U)::symmetric_type(eltype(S.dv))
+@propagate_inbounds _getldiag(T::Tridiagonal, i) = T.dl[i]
+@propagate_inbounds _getldiag(S::SymTridiagonal, i) = transpose(S.ev[i])
+
+function (\)(D::Diagonal, S::SymTridiagonal)
+    T = promote_op(\, eltype(D), eltype(S))
+    du = similar(S.ev, T, max(length(S.dv)-1, 0))
+    d  = similar(S.dv, T, length(S.dv))
+    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
+    ldiv!(Tridiagonal(dl, d, du), D, S)
+end
+(\)(D::Diagonal, T::Tridiagonal) = ldiv!(similar(T, promote_op(\, eltype(D), eltype(T))), D, T)
+function ldiv!(T::Tridiagonal, D::Diagonal, S::Union{SymTridiagonal,Tridiagonal})
+    m = size(S, 1)
+    dd = D.diag
+    if (k = length(dd)) != m
+        throw(DimensionMismatch("diagonal matrix is $k by $k but right hand side has $m rows"))
+    end
+    if length(T.d) != m
+        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
+    end
+    m == 0 && return T
+    j = findfirst(iszero, dd)
+    isnothing(j) || throw(SingularException(j))
+    ddj = dd[1]
+    T.d[1] = ddj \ _getdiag(S, 1)
+    @inbounds if m > 1
+        T.du[1] = ddj \ _getudiag(S, 1)
+        for j in 2:m-1
+            ddj = dd[j]
+            T.dl[j-1] = ddj \ _getldiag(S, j-1)
+            T.d[j]  = ddj \ _getdiag(S, j)
+            T.du[j] = ddj \ _getudiag(S, j)
+        end
+        ddj = dd[m]
+        T.dl[m-1] = ddj \ _getldiag(S, m-1)
+        T.d[m] = ddj \ _getdiag(S, m)
+    end
+    return T
+end
 
-@inline function kron!(C::AbstractMatrix{T}, A::Diagonal, B::Diagonal) where T
-    fill!(C, zero(T))
+function (/)(S::SymTridiagonal, D::Diagonal)
+    T = promote_op(\, eltype(D), eltype(S))
+    du = similar(S.ev, T, max(length(S.dv)-1, 0))
+    d  = similar(S.dv, T, length(S.dv))
+    dl = similar(S.ev, T, max(length(S.dv)-1, 0))
+    _rdiv!(Tridiagonal(dl, d, du), S, D)
+end
+(/)(T::Tridiagonal, D::Diagonal) = _rdiv!(similar(T, promote_op(/, eltype(T), eltype(D))), T, D)
+function _rdiv!(T::Tridiagonal, S::Union{SymTridiagonal,Tridiagonal}, D::Diagonal)
+    n = size(S, 2)
+    dd = D.diag
+    if (k = length(dd)) != n
+        throw(DimensionMismatch("left hand side has $n columns but D is $k by $k"))
+    end
+    if length(T.d) != n
+        throw(DimensionMismatch("target matrix size $(size(T)) does not match input matrix size $(size(S))"))
+    end
+    n == 0 && return T
+    j = findfirst(iszero, dd)
+    isnothing(j) || throw(SingularException(j))
+    ddj = dd[1]
+    T.d[1] = _getdiag(S, 1) / ddj
+    @inbounds if n > 1
+        T.dl[1] = _getldiag(S, 1) / ddj
+        for j in 2:n-1
+            ddj = dd[j]
+            T.dl[j] = _getldiag(S, j) / ddj
+            T.d[j] = _getdiag(S, j) / ddj
+            T.du[j-1] = _getudiag(S, j-1) / ddj
+        end
+        ddj = dd[n]
+        T.d[n] = _getdiag(S, n) / ddj
+        T.du[n-1] = _getudiag(S, n-1) / ddj
+    end
+    return T
+end
+
+# Optimizations for [l/r]mul!, l/rdiv!, *, / and \ between Triangular and Diagonal.
+# These functions are generally more efficient if we calculate the whole data field.
+# The following code implements them in a unified pattern to avoid missing.
+@inline function _setdiag!(data, f, diag, diag′ = nothing)
+    @inbounds for i in 1:length(diag)
+        data[i,i] = isnothing(diag′) ? f(diag[i]) : f(diag[i],diag′[i])
+    end
+    data
+end
+for Tri in (:UpperTriangular, :LowerTriangular)
+    UTri = Symbol(:Unit, Tri)
+    # 2 args
+    for (fun, f) in zip((:*, :rmul!, :rdiv!, :/), (:identity, :identity, :inv, :inv))
+        @eval $fun(A::$Tri, D::Diagonal) = $Tri($fun(A.data, D))
+        @eval $fun(A::$UTri, D::Diagonal) = $Tri(_setdiag!($fun(A.data, D), $f, D.diag))
+    end
+    for (fun, f) in zip((:*, :lmul!, :ldiv!, :\), (:identity, :identity, :inv, :inv))
+        @eval $fun(D::Diagonal, A::$Tri) = $Tri($fun(D, A.data))
+        @eval $fun(D::Diagonal, A::$UTri) = $Tri(_setdiag!($fun(D, A.data), $f, D.diag))
+    end
+    # 3-arg ldiv!
+    @eval ldiv!(C::$Tri, D::Diagonal, A::$Tri) = $Tri(ldiv!(C.data, D, A.data))
+    @eval ldiv!(C::$Tri, D::Diagonal, A::$UTri) = $Tri(_setdiag!(ldiv!(C.data, D, A.data), inv, D.diag))
+    # 3-arg mul!: invoke 5-arg mul! rather than lmul!
+    @eval mul!(C::$Tri, A::Union{$Tri,$UTri}, D::Diagonal) = mul!(C, A, D, true, false)
+    # 5-arg mul!
+    @eval @inline mul!(C::$Tri, D::Diagonal, A::$Tri, α::Number, β::Number) = $Tri(mul!(C.data, D, A.data, α, β))
+    @eval @inline function mul!(C::$Tri, D::Diagonal, A::$UTri, α::Number, β::Number)
+        iszero(α) && return _rmul_or_fill!(C, β)
+        diag′ = iszero(β) ? nothing : diag(C)
+        data = mul!(C.data, D, A.data, α, β)
+        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+    end
+    @eval @inline mul!(C::$Tri, A::$Tri, D::Diagonal, α::Number, β::Number) = $Tri(mul!(C.data, A.data, D, α, β))
+    @eval @inline function mul!(C::$Tri, A::$UTri, D::Diagonal, α::Number, β::Number)
+        iszero(α) && return _rmul_or_fill!(C, β)
+        diag′ = iszero(β) ? nothing : diag(C)
+        data = mul!(C.data, A.data, D, α, β)
+        $Tri(_setdiag!(data, MulAddMul(α, β), D.diag, diag′))
+    end
+end
+
+@inline function kron!(C::AbstractMatrix, A::Diagonal, B::Diagonal)
     valA = A.diag; nA = length(valA)
     valB = B.diag; nB = length(valB)
     nC = checksquare(C)
     @boundscheck nC == nA*nB ||
         throw(DimensionMismatch("expect C to be a $(nA*nB)x$(nA*nB) matrix, got size $(nC)x$(nC)"))
-
+    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     @inbounds for i = 1:nA, j = 1:nB
         idx = (i-1)*nB+j
         C[idx, idx] = valA[i] * valB[j]
@@ -523,19 +588,16 @@ rdiv!(A::AbstractMatrix{T}, transD::Transpose{<:Any,<:Diagonal{T}}) where {T} =
     return C
 end
 
-function kron(A::Diagonal{T1}, B::Diagonal{T2}) where {T1<:Number, T2<:Number}
-    valA = A.diag; nA = length(valA)
-    valB = B.diag; nB = length(valB)
-    valC = Vector{typeof(zero(T1)*zero(T2))}(undef,nA*nB)
-    C = Diagonal(valC)
-    return @inbounds kron!(C, A, B)
-end
+kron(A::Diagonal{<:Number}, B::Diagonal{<:Number}) = Diagonal(kron(A.diag, B.diag))
 
 @inline function kron!(C::AbstractMatrix, A::Diagonal, B::AbstractMatrix)
-    Base.require_one_based_indexing(B)
-    (mA, nA) = size(A); (mB, nB) = size(B); (mC, nC) = size(C);
+    require_one_based_indexing(B)
+    (mA, nA) = size(A)
+    (mB, nB) = size(B)
+    (mC, nC) = size(C)
     @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
         throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
+    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     m = 1
     @inbounds for j = 1:nA
         A_jj = A[j,j]
@@ -553,9 +615,12 @@ end
 
 @inline function kron!(C::AbstractMatrix, A::AbstractMatrix, B::Diagonal)
     require_one_based_indexing(A)
-    (mA, nA) = size(A); (mB, nB) = size(B); (mC, nC) = size(C);
+    (mA, nA) = size(A)
+    (mB, nB) = size(B)
+    (mC, nC) = size(C)
     @boundscheck (mC, nC) == (mA * mB, nA * nB) ||
         throw(DimensionMismatch("expect C to be a $(mA * mB)x$(nA * nB) matrix, got size $(mC)x$(nC)"))
+    isempty(A) || isempty(B) || fill!(C, zero(A[1,1] * B[1,1]))
     m = 1
     @inbounds for j = 1:nA
         for l = 1:mB
@@ -571,31 +636,21 @@ end
     return C
 end
 
-function kron(A::Diagonal{T}, B::AbstractMatrix{S}) where {T<:Number, S<:Number}
-    (mA, nA) = size(A); (mB, nB) = size(B)
-    R = zeros(Base.promote_op(*, T, S), mA * mB, nA * nB)
-    return @inbounds kron!(R, A, B)
-end
-
-function kron(A::AbstractMatrix{T}, B::Diagonal{S}) where {T<:Number, S<:Number}
-    (mA, nA) = size(A); (mB, nB) = size(B)
-    R = zeros(promote_op(*, T, S), mA * mB, nA * nB)
-    return @inbounds kron!(R, A, B)
-end
-
 conj(D::Diagonal) = Diagonal(conj(D.diag))
 transpose(D::Diagonal{<:Number}) = D
 transpose(D::Diagonal) = Diagonal(transpose.(D.diag))
 adjoint(D::Diagonal{<:Number}) = conj(D)
 adjoint(D::Diagonal) = Diagonal(adjoint.(D.diag))
+permutedims(D::Diagonal) = D
+permutedims(D::Diagonal, perm) = (Base.checkdims_perm(D, D, perm); D)
 
-function diag(D::Diagonal, k::Integer=0)
+function diag(D::Diagonal{T}, k::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of k
     if k == 0
         return copyto!(similar(D.diag, length(D.diag)), D.diag)
     elseif -size(D,1) <= k <= size(D,1)
-        return fill!(similar(D.diag, size(D,1)-abs(k)), 0)
+        return fill!(similar(D.diag, size(D,1)-abs(k)), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $k, must be at least $(-size(D, 1)) ",
             "and at most $(size(D, 2)) for an $(size(D, 1))-by-$(size(D, 2)) matrix")))
@@ -603,14 +658,13 @@ function diag(D::Diagonal, k::Integer=0)
 end
 tr(D::Diagonal) = sum(tr, D.diag)
 det(D::Diagonal) = prod(det, D.diag)
-logdet(D::Diagonal{<:Real}) = sum(log, D.diag)
 function logdet(D::Diagonal{<:Complex}) # make sure branch cut is correct
     z = sum(log, D.diag)
     complex(real(z), rem2pi(imag(z), RoundNearest))
 end
 
 # Matrix functions
-for f in (:exp, :log, :sqrt,
+for f in (:exp, :cis, :log, :sqrt,
           :cos, :sin, :tan, :csc, :sec, :cot,
           :cosh, :sinh, :tanh, :csch, :sech, :coth,
           :acos, :asin, :atan, :acsc, :asec, :acot,
@@ -618,30 +672,6 @@ for f in (:exp, :log, :sqrt,
     @eval $f(D::Diagonal) = Diagonal($f.(D.diag))
 end
 
-#Linear solver
-function ldiv!(D::Diagonal, B::StridedVecOrMat)
-    m, n = size(B, 1), size(B, 2)
-    if m != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $m rows"))
-    end
-    (m == 0 || n == 0) && return B
-    for j = 1:n
-        for i = 1:m
-            di = D.diag[i]
-            if di == 0
-                throw(SingularException(i))
-            end
-            B[i,j] = di \ B[i,j]
-        end
-    end
-    return B
-end
-(\)(D::Diagonal, A::AbstractMatrix) =
-    ldiv!(D, (typeof(oneunit(eltype(D))/oneunit(eltype(A)))).(A))
-
-(\)(D::Diagonal, b::AbstractVector) = D.diag .\ b
-(\)(Da::Diagonal, Db::Diagonal) = Diagonal(Da.diag .\ Db.diag)
-
 function inv(D::Diagonal{T}) where T
     Di = similar(D.diag, typeof(inv(zero(T))))
     for i = 1:length(D.diag)
@@ -674,7 +704,7 @@ function pinv(D::Diagonal{T}, tol::Real) where T
 end
 
 #Eigensystem
-eigvals(D::Diagonal{<:Number}; permute::Bool=true, scale::Bool=true) = D.diag
+eigvals(D::Diagonal{<:Number}; permute::Bool=true, scale::Bool=true) = copy(D.diag)
 eigvals(D::Diagonal; permute::Bool=true, scale::Bool=true) =
     [eigvals(x) for x in D.diag] #For block matrices, etc.
 eigvecs(D::Diagonal) = Matrix{eltype(D)}(I, size(D))
@@ -682,39 +712,70 @@ function eigen(D::Diagonal; permute::Bool=true, scale::Bool=true, sortby::Union{
     if any(!isfinite, D.diag)
         throw(ArgumentError("matrix contains Infs or NaNs"))
     end
-    Eigen(sorteig!(eigvals(D), eigvecs(D), sortby)...)
+    Td = Base.promote_op(/, eltype(D), eltype(D))
+    λ = eigvals(D)
+    if !isnothing(sortby)
+        p = sortperm(λ; alg=QuickSort, by=sortby)
+        λ = λ[p] # make a copy, otherwise this permutes D.diag
+        evecs = zeros(Td, size(D))
+        @inbounds for i in eachindex(p)
+            evecs[p[i],i] = one(Td)
+        end
+    else
+        evecs = Matrix{Td}(I, size(D))
+    end
+    Eigen(λ, evecs)
 end
 
 #Singular system
 svdvals(D::Diagonal{<:Number}) = sort!(abs.(D.diag), rev = true)
 svdvals(D::Diagonal) = [svdvals(v) for v in D.diag]
-function svd(D::Diagonal{<:Number})
-    S   = abs.(D.diag)
-    piv = sortperm(S, rev = true)
-    U   = Diagonal(D.diag ./ S)
-    Up  = hcat([U[:,i] for i = 1:length(D.diag)][piv]...)
-    V   = Diagonal(fill!(similar(D.diag), one(eltype(D.diag))))
-    Vp  = hcat([V[:,i] for i = 1:length(D.diag)][piv]...)
-    return SVD(Up, S[piv], copy(Vp'))
-end
-
-# disambiguation methods: * of Diagonal and Adj/Trans AbsVec
-*(x::Adjoint{<:Any,<:AbstractVector}, D::Diagonal) = Adjoint(map((t,s) -> t'*s, D.diag, parent(x)))
-*(x::Transpose{<:Any,<:AbstractVector}, D::Diagonal) = Transpose(map((t,s) -> transpose(t)*s, D.diag, parent(x)))
-*(x::Adjoint{<:Any,<:AbstractVector},   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
-*(x::Transpose{<:Any,<:AbstractVector}, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
+function svd(D::Diagonal{T}) where {T<:Number}
+    d = D.diag
+    s = abs.(d)
+    piv = sortperm(s, rev = true)
+    S = s[piv]
+    Td  = typeof(oneunit(T)/oneunit(T))
+    U = zeros(Td, size(D))
+    Vt = copy(U)
+    for i in 1:length(d)
+        j = piv[i]
+        U[j,i] = d[j] / S[i]
+        Vt[i,j] = one(Td)
+    end
+    return SVD(U, S, Vt)
+end
+
+# disambiguation methods: * and / of Diagonal and Adj/Trans AbsVec
+*(x::AdjointAbsVec, D::Diagonal) = Adjoint(map((t,s) -> t'*s, D.diag, parent(x)))
+*(x::TransposeAbsVec, D::Diagonal) = Transpose(map((t,s) -> transpose(t)*s, D.diag, parent(x)))
+*(x::AdjointAbsVec,   D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
+*(x::TransposeAbsVec, D::Diagonal, y::AbstractVector) = _mapreduce_prod(*, x, D, y)
+/(u::AdjointAbsVec, D::Diagonal) = adjoint(adjoint(D) \ u.parent)
+/(u::TransposeAbsVec, D::Diagonal) = transpose(transpose(D) \ u.parent)
+# disambiguation methods: Call unoptimized version for user defined AbstractTriangular.
+*(A::AbstractTriangular, D::Diagonal) = Base.@invoke *(A::AbstractMatrix, D::Diagonal)
+*(D::Diagonal, A::AbstractTriangular) = Base.@invoke *(D::Diagonal, A::AbstractMatrix)
+
 dot(x::AbstractVector, D::Diagonal, y::AbstractVector) = _mapreduce_prod(dot, x, D, y)
 
+dot(A::Diagonal, B::Diagonal) = dot(A.diag, B.diag)
+function dot(D::Diagonal, B::AbstractMatrix)
+    size(D) == size(B) || throw(DimensionMismatch("Matrix sizes $(size(D)) and $(size(B)) differ"))
+    return dot(D.diag, view(B, diagind(B)))
+end
+
+dot(A::AbstractMatrix, B::Diagonal) = conj(dot(B, A))
+
 function _mapreduce_prod(f, x, D::Diagonal, y)
     if isempty(x) && isempty(D) && isempty(y)
-        return zero(Base.promote_op(f, eltype(x), eltype(D), eltype(y)))
+        return zero(promote_op(f, eltype(x), eltype(D), eltype(y)))
     else
         return mapreduce(t -> f(t[1], t[2], t[3]), +, zip(x, D.diag, y))
     end
 end
 
-
-function cholesky!(A::Diagonal, ::Val{false} = Val(false); check::Bool = true)
+function cholesky!(A::Diagonal, ::NoPivot = NoPivot(); check::Bool = true)
     info = 0
     for (i, di) in enumerate(A.diag)
         if isreal(di) && real(di) > 0
@@ -728,9 +789,11 @@ function cholesky!(A::Diagonal, ::Val{false} = Val(false); check::Bool = true)
     end
     Cholesky(A, 'U', convert(BlasInt, info))
 end
+@deprecate cholesky!(A::Diagonal, ::Val{false}; check::Bool = true) cholesky!(A::Diagonal, NoPivot(); check) false
+@deprecate cholesky(A::Diagonal, ::Val{false}; check::Bool = true) cholesky(A::Diagonal, NoPivot(); check) false
 
-cholesky(A::Diagonal, ::Val{false} = Val(false); check::Bool = true) =
-    cholesky!(cholcopy(A), Val(false); check = check)
+@inline cholcopy(A::Diagonal) = copymutable_oftype(A, choltype(A))
+@inline cholcopy(A::RealHermSymComplexHerm{<:Real,<:Diagonal}) = copymutable_oftype(A, choltype(A))
 
 function getproperty(C::Cholesky{<:Any,<:Diagonal}, d::Symbol)
     Cfactors = getfield(C, :factors)
@@ -760,3 +823,7 @@ function logabsdet(A::Diagonal)
      mapreduce(x -> (log(abs(x)), sign(x)), ((d1, s1), (d2, s2)) -> (d1 + d2, s1 * s2),
                A.diag)
 end
+
+function Base.muladd(A::Diagonal, B::Diagonal, z::Diagonal)
+    Diagonal(A.diag .* B.diag .+ z.diag)
+end
diff --git a/stdlib/LinearAlgebra/src/eigen.jl b/stdlib/LinearAlgebra/src/eigen.jl
index 3a423a87295f35..14de91a9180afc 100644
--- a/stdlib/LinearAlgebra/src/eigen.jl
+++ b/stdlib/LinearAlgebra/src/eigen.jl
@@ -140,7 +140,8 @@ end
 sorteig!(λ::AbstractVector, sortby::Union{Function,Nothing}=eigsortby) = sortby === nothing ? λ : sort!(λ, by=sortby)
 
 """
-    eigen!(A, [B]; permute, scale, sortby)
+    eigen!(A; permute, scale, sortby)
+    eigen!(A, B; sortby)
 
 Same as [`eigen`](@ref), but saves space by overwriting the input `A` (and
 `B`), instead of creating a copy.
@@ -179,7 +180,7 @@ end
 """
     eigen(A; permute::Bool=true, scale::Bool=true, sortby) -> Eigen
 
-Computes the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
+Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
 which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
 matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
 
@@ -232,10 +233,18 @@ true
 ```
 """
 function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where T
-    AA = copy_oftype(A, eigtype(T))
+    AA = copymutable_oftype(A, eigtype(T))
     isdiag(AA) && return eigen(Diagonal(AA); permute=permute, scale=scale, sortby=sortby)
     return eigen!(AA; permute=permute, scale=scale, sortby=sortby)
 end
+function eigen(A::AbstractMatrix{T}; permute::Bool=true, scale::Bool=true, sortby::Union{Function,Nothing}=eigsortby) where {T <: Union{Float16,Complex{Float16}}}
+    AA = copymutable_oftype(A, eigtype(T))
+    isdiag(AA) && return eigen(Diagonal(AA); permute=permute, scale=scale, sortby=sortby)
+    A = eigen!(AA; permute, scale, sortby)
+    values = convert(AbstractVector{isreal(A.values) ? Float16 : Complex{Float16}}, A.values)
+    vectors = convert(AbstractMatrix{isreal(A.vectors) ? Float16 : Complex{Float16}}, A.vectors)
+    return Eigen(values, vectors)
+end
 eigen(x::Number) = Eigen([x], fill(one(x), 1, 1))
 
 """
@@ -308,7 +317,7 @@ Return the eigenvalues of `A`.
 
 For general non-symmetric matrices it is possible to specify how the matrix is balanced
 before the eigenvalue calculation. The `permute`, `scale`, and `sortby` keywords are
-the same as for [`eigen!`](@ref).
+the same as for [`eigen`](@ref).
 
 # Examples
 ```jldoctest
@@ -324,7 +333,7 @@ julia> eigvals(diag_matrix)
 ```
 """
 eigvals(A::AbstractMatrix{T}; kws...) where T =
-    eigvals!(copy_oftype(A, eigtype(T)); kws...)
+    eigvals!(copymutable_oftype(A, eigtype(T)); kws...)
 
 """
 For a scalar input, `eigvals` will return a scalar.
@@ -454,17 +463,18 @@ function eigen!(A::StridedMatrix{T}, B::StridedMatrix{T}; sortby::Union{Function
 end
 
 """
-    eigen(A, B) -> GeneralizedEigen
+    eigen(A, B; sortby) -> GeneralizedEigen
 
-Computes the generalized eigenvalue decomposition of `A` and `B`, returning a
+Compute the generalized eigenvalue decomposition of `A` and `B`, returning a
 [`GeneralizedEigen`](@ref) factorization object `F` which contains the generalized eigenvalues in
 `F.values` and the generalized eigenvectors in the columns of the matrix `F.vectors`.
 (The `k`th generalized eigenvector can be obtained from the slice `F.vectors[:, k]`.)
 
 Iterating the decomposition produces the components `F.values` and `F.vectors`.
 
-Any keyword arguments passed to `eigen` are passed through to the lower-level
-[`eigen!`](@ref) function.
+By default, the eigenvalues and vectors are sorted lexicographically by `(real(λ),imag(λ))`.
+A different comparison function `by(λ)` can be passed to `sortby`, or you can pass
+`sortby=nothing` to leave the eigenvalues in an arbitrary order.
 
 # Examples
 ```jldoctest
@@ -498,7 +508,7 @@ true
 """
 function eigen(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
     S = promote_type(eigtype(TA),TB)
-    eigen!(copy_oftype(A, S), copy_oftype(B, S); kws...)
+    eigen!(copymutable_oftype(A, S), copymutable_oftype(B, S); kws...)
 end
 
 eigen(A::Number, B::Number) = eigen(fill(A,1,1), fill(B,1,1))
@@ -555,7 +565,7 @@ end
 """
     eigvals(A, B) -> values
 
-Computes the generalized eigenvalues of `A` and `B`.
+Compute the generalized eigenvalues of `A` and `B`.
 
 # Examples
 ```jldoctest
@@ -577,7 +587,7 @@ julia> eigvals(A,B)
 """
 function eigvals(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}; kws...) where {TA,TB}
     S = promote_type(eigtype(TA),TB)
-    return eigvals!(copy_oftype(A, S), copy_oftype(B, S); kws...)
+    return eigvals!(copymutable_oftype(A, S), copymutable_oftype(B, S); kws...)
 end
 
 """
@@ -614,6 +624,16 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Union{Eigen,Generaliz
     show(io, mime, F.vectors)
 end
 
+function Base.hash(F::Eigen, h::UInt)
+    return hash(F.values, hash(F.vectors, hash(Eigen, h)))
+end
+function Base.:(==)(A::Eigen, B::Eigen)
+    return A.values == B.values && A.vectors == B.vectors
+end
+function Base.isequal(A::Eigen, B::Eigen)
+    return isequal(A.values, B.values) && isequal(A.vectors, B.vectors)
+end
+
 # Conversion methods
 
 ## Can we determine the source/result is Real?  This is not stored in the type Eigen
diff --git a/stdlib/LinearAlgebra/src/exceptions.jl b/stdlib/LinearAlgebra/src/exceptions.jl
index 6704a9ac6ae4dc..ae29b8bc2f7b92 100644
--- a/stdlib/LinearAlgebra/src/exceptions.jl
+++ b/stdlib/LinearAlgebra/src/exceptions.jl
@@ -59,4 +59,4 @@ struct ZeroPivotException <: Exception
 end
 function Base.showerror(io::IO, ex::ZeroPivotException)
     print(io, "ZeroPivotException: factorization encountered one or more zero pivots. Consider switching to a pivoted LU factorization.")
-end
\ No newline at end of file
+end
diff --git a/stdlib/LinearAlgebra/src/factorization.jl b/stdlib/LinearAlgebra/src/factorization.jl
index 3e335ed391ad69..bfaffd0dccd143 100644
--- a/stdlib/LinearAlgebra/src/factorization.jl
+++ b/stdlib/LinearAlgebra/src/factorization.jl
@@ -16,9 +16,9 @@ size(F::Adjoint{<:Any,<:Factorization}) = reverse(size(parent(F)))
 size(F::Transpose{<:Any,<:Factorization}) = reverse(size(parent(F)))
 
 checkpositivedefinite(info) = info == 0 || throw(PosDefException(info))
-checknonsingular(info, pivoted::Val{true}) = info == 0 || throw(SingularException(info))
-checknonsingular(info, pivoted::Val{false}) = info == 0 || throw(ZeroPivotException(info))
-checknonsingular(info) = checknonsingular(info, Val{true}())
+checknonsingular(info, ::RowMaximum) = info == 0 || throw(SingularException(info))
+checknonsingular(info, ::NoPivot) = info == 0 || throw(ZeroPivotException(info))
+checknonsingular(info) = checknonsingular(info, RowMaximum())
 
 """
     issuccess(F::Factorization)
@@ -59,6 +59,9 @@ convert(::Type{T}, f::Factorization) where {T<:AbstractArray} = T(f)
 
 ### General promotion rules
 Factorization{T}(F::Factorization{T}) where {T} = F
+# This is a bit odd since the return is not a Factorization but it works well in generic code
+Factorization{T}(A::Adjoint{<:Any,<:Factorization}) where {T} =
+    adjoint(Factorization{T}(parent(A)))
 inv(F::Factorization{T}) where {T} = (n = size(F, 1); ldiv!(F, Matrix{T}(I, n, n)))
 
 Base.hash(F::Factorization, h::UInt) = mapreduce(f -> hash(getfield(F, f)), hash, 1:nfields(F); init=h)
@@ -96,42 +99,33 @@ function (/)(B::VecOrMat{Complex{T}}, F::Factorization{T}) where T<:BlasReal
     return copy(reinterpret(Complex{T}, x))
 end
 
-function \(F::Factorization, B::AbstractVecOrMat)
+function \(F::Union{Factorization, Adjoint{<:Any,<:Factorization}}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    ldiv!(F, BB)
-end
-function \(adjF::Adjoint{<:Any,<:Factorization}, B::AbstractVecOrMat)
-    require_one_based_indexing(B)
-    F = adjF.parent
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    ldiv!(adjoint(F), BB)
+    ldiv!(F, copy_similar(B, TFB))
 end
 
-function /(B::AbstractMatrix, F::Factorization)
-    require_one_based_indexing(B)
-    TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    rdiv!(BB, F)
-end
-function /(B::AbstractMatrix, adjF::Adjoint{<:Any,<:Factorization})
+function /(B::AbstractMatrix, F::Union{Factorization, Adjoint{<:Any,<:Factorization}})
     require_one_based_indexing(B)
-    F = adjF.parent
     TFB = typeof(oneunit(eltype(B)) / oneunit(eltype(F)))
-    BB = similar(B, TFB, size(B))
-    copyto!(BB, B)
-    rdiv!(BB, adjoint(F))
+    rdiv!(copy_similar(B, TFB), F)
 end
 /(adjB::AdjointAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjB.parent)
 /(B::TransposeAbsVec, adjF::Adjoint{<:Any,<:Factorization}) = adjoint(adjF.parent \ adjoint(B))
 
-# support the same 3-arg idiom as in our other in-place A_*_B functions:
-function ldiv!(Y::AbstractVecOrMat, A::Factorization, B::AbstractVecOrMat)
+
+function ldiv!(Y::AbstractVector, A::Factorization, B::AbstractVector)
+    require_one_based_indexing(Y, B)
+    m, n = size(A, 1), size(A, 2)
+    if m > n
+        Bc = copy(B)
+        ldiv!(A, Bc)
+        return copyto!(Y, 1, Bc, 1, n)
+    else
+        return ldiv!(A, copyto!(Y, B))
+    end
+end
+function ldiv!(Y::AbstractMatrix, A::Factorization, B::AbstractMatrix)
     require_one_based_indexing(Y, B)
     m, n = size(A, 1), size(A, 2)
     if m > n
@@ -139,7 +133,8 @@ function ldiv!(Y::AbstractVecOrMat, A::Factorization, B::AbstractVecOrMat)
         ldiv!(A, Bc)
         return copyto!(Y, view(Bc, 1:n, :))
     else
-        return ldiv!(A, copyto!(Y, view(B, 1:m, :)))
+        copyto!(view(Y, 1:m, :), view(B, 1:m, :))
+        return ldiv!(A, Y)
     end
 end
 
diff --git a/stdlib/LinearAlgebra/src/generic.jl b/stdlib/LinearAlgebra/src/generic.jl
index 12a2040dd326ca..c79849535ad0aa 100644
--- a/stdlib/LinearAlgebra/src/generic.jl
+++ b/stdlib/LinearAlgebra/src/generic.jl
@@ -2,6 +2,14 @@
 
 ## linalg.jl: Some generic Linear Algebra definitions
 
+# Elements of `out` may not be defined (e.g., for `BigFloat`). To make
+# `mul!(out, A, B)` work for such cases, `out .*ₛ beta` short-circuits
+# `out * beta`.  Using `broadcasted` to avoid the multiplication
+# inside this function.
+function *ₛ end
+Broadcast.broadcasted(::typeof(*ₛ), out, beta) =
+    iszero(beta::Number) ? false : broadcasted(*, out, beta)
+
 """
     MulAddMul(alpha, beta)
 
@@ -121,10 +129,22 @@ match the length of the second, $(length(X))."))
     C
 end
 
-@inline mul!(C::AbstractArray, s::Number, X::AbstractArray, alpha::Number, beta::Number) =
-    generic_mul!(C, s, X, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractArray, X::AbstractArray, s::Number, alpha::Number, beta::Number) =
-    generic_mul!(C, X, s, MulAddMul(alpha, beta))
+@inline function mul!(C::AbstractArray, s::Number, X::AbstractArray, alpha::Number, beta::Number)
+    if axes(C) == axes(X)
+        C .= (s .* X) .*ₛ alpha .+ C .*ₛ beta
+    else
+        generic_mul!(C, s, X, MulAddMul(alpha, beta))
+    end
+    return C
+end
+@inline function mul!(C::AbstractArray, X::AbstractArray, s::Number, alpha::Number, beta::Number)
+    if axes(C) == axes(X)
+        C .= (X .* s) .*ₛ alpha .+ C .*ₛ beta
+    else
+        generic_mul!(C, X, s, MulAddMul(alpha, beta))
+    end
+    return C
+end
 
 # For better performance when input and output are the same array
 # See https://github.com/JuliaLang/julia/issues/8415#issuecomment-56608729
@@ -428,45 +448,11 @@ diag(A::AbstractVector) = throw(ArgumentError("use diagm instead of diag to cons
 # Dot products and norms
 
 # special cases of norm; note that they don't need to handle isempty(x)
-function generic_normMinusInf(x)
-    (v, s) = iterate(x)::Tuple
-    minabs = norm(v)
-    while true
-        y = iterate(x, s)
-        y === nothing && break
-        (v, s) = y
-        vnorm = norm(v)
-        minabs = ifelse(isnan(minabs) | (minabs < vnorm), minabs, vnorm)
-    end
-    return float(minabs)
-end
+generic_normMinusInf(x) = float(mapreduce(norm, min, x))
 
-function generic_normInf(x)
-    (v, s) = iterate(x)::Tuple
-    maxabs = norm(v)
-    while true
-        y = iterate(x, s)
-        y === nothing && break
-        (v, s) = y
-        vnorm = norm(v)
-        maxabs = ifelse(isnan(maxabs) | (maxabs > vnorm), maxabs, vnorm)
-    end
-    return float(maxabs)
-end
+generic_normInf(x) = float(mapreduce(norm, max, x))
 
-function generic_norm1(x)
-    (v, s) = iterate(x)::Tuple
-    av = float(norm(v))
-    T = typeof(av)
-    sum::promote_type(Float64, T) = av
-    while true
-        y = iterate(x, s)
-        y === nothing && break
-        (v, s) = y
-        sum += norm(v)
-    end
-    return convert(T, sum)
-end
+generic_norm1(x) = mapreduce(float ∘ norm, +, x)
 
 # faster computation of norm(x)^2, avoiding overflow for integers
 norm_sqr(x) = norm(x)^2
@@ -475,10 +461,10 @@ norm_sqr(x::Union{T,Complex{T},Rational{T}}) where {T<:Integer} = abs2(float(x))
 
 function generic_norm2(x)
     maxabs = normInf(x)
-    (maxabs == 0 || isinf(maxabs)) && return maxabs
+    (iszero(maxabs) || isinf(maxabs)) && return maxabs
     (v, s) = iterate(x)::Tuple
     T = typeof(maxabs)
-    if isfinite(length(x)*maxabs*maxabs) && maxabs*maxabs != 0 # Scaling not necessary
+    if isfinite(length(x)*maxabs*maxabs) && !iszero(maxabs*maxabs) # Scaling not necessary
         sum::promote_type(Float64, T) = norm_sqr(v)
         while true
             y = iterate(x, s)
@@ -505,13 +491,13 @@ function generic_normp(x, p)
     (v, s) = iterate(x)::Tuple
     if p > 1 || p < -1 # might need to rescale to avoid overflow
         maxabs = p > 1 ? normInf(x) : normMinusInf(x)
-        (maxabs == 0 || isinf(maxabs)) && return maxabs
+        (iszero(maxabs) || isinf(maxabs)) && return maxabs
         T = typeof(maxabs)
     else
         T = typeof(float(norm(v)))
     end
     spp::promote_type(Float64, T) = p
-    if -1 <= p <= 1 || (isfinite(length(x)*maxabs^spp) && maxabs^spp != 0) # scaling not necessary
+    if -1 <= p <= 1 || (isfinite(length(x)*maxabs^spp) && !iszero(maxabs^spp)) # scaling not necessary
         sum::promote_type(Float64, T) = norm(v)^spp
         while true
             y = iterate(x, s)
@@ -647,7 +633,7 @@ julia> norm(-2, Inf)
 @inline function norm(x::Number, p::Real=2)
     afx = abs(float(x))
     if p == 0
-        if x == 0
+        if iszero(x)
             return zero(afx)
         elseif !isnan(x)
             return oneunit(afx)
@@ -682,9 +668,10 @@ end
 function opnorm2(A::AbstractMatrix{T}) where T
     require_one_based_indexing(A)
     m,n = size(A)
-    if m == 1 || n == 1 return norm2(A) end
     Tnorm = typeof(float(real(zero(T))))
-    (m == 0 || n == 0) ? zero(Tnorm) : convert(Tnorm, svdvals(A)[1])
+    if m == 0 || n == 0 return zero(Tnorm) end
+    if m == 1 || n == 1 return norm2(A) end
+    return svdvals(A)[1]
 end
 
 function opnormInf(A::AbstractMatrix{T}) where T
@@ -898,6 +885,11 @@ function dot(x::AbstractArray, y::AbstractArray)
     s
 end
 
+function dot(x::Adjoint{<:Union{Real,Complex}}, y::Adjoint{<:Union{Real,Complex}})
+    return conj(dot(parent(x), parent(y)))
+end
+dot(x::Transpose, y::Transpose) = dot(parent(x), parent(y))
+
 """
     dot(x, A, y)
 
@@ -986,7 +978,7 @@ function rank(A::AbstractMatrix; atol::Real = 0.0, rtol::Real = (min(size(A)...)
     tol = max(atol, rtol*s[1])
     count(x -> x > tol, s)
 end
-rank(x::Number) = x == 0 ? 0 : 1
+rank(x::Number) = iszero(x) ? 0 : 1
 
 """
     tr(M)
@@ -1086,6 +1078,8 @@ When `A` is sparse, a similar polyalgorithm is used. For indefinite matrices, th
 factorization does not use pivoting during the numerical factorization and therefore the
 procedure can fail even for invertible matrices.
 
+See also: [`factorize`](@ref), [`pinv`](@ref).
+
 # Examples
 ```jldoctest
 julia> A = [1 0; 1 -2]; B = [32; -4];
@@ -1115,19 +1109,40 @@ function (\)(A::AbstractMatrix, B::AbstractVecOrMat)
         end
         return lu(A) \ B
     end
-    return qr(A,Val(true)) \ B
+    return qr(A, ColumnNorm()) \ B
 end
 
 (\)(a::AbstractVector, b::AbstractArray) = pinv(a) * b
+"""
+    A / B
+
+Matrix right-division: `A / B` is equivalent to `(B' \\ A')'` where [`\\`](@ref) is the left-division operator.
+For square matrices, the result `X` is such that `A == X*B`.
+
+See also: [`rdiv!`](@ref).
+
+# Examples
+```jldoctest
+julia> A = Float64[1 4 5; 3 9 2]; B = Float64[1 4 2; 3 4 2; 8 7 1];
+
+julia> X = A / B
+2×3 Matrix{Float64}:
+ -0.65   3.75  -1.2
+  3.25  -2.75   1.0
+
+julia> isapprox(A, X*B)
+true
+
+julia> isapprox(X, A*pinv(B))
+true
+```
+"""
 function (/)(A::AbstractVecOrMat, B::AbstractVecOrMat)
     size(A,2) != size(B,2) && throw(DimensionMismatch("Both inputs should have the same number of columns"))
     return copy(adjoint(adjoint(B) \ adjoint(A)))
 end
-# \(A::StridedMatrix,x::Number) = inv(A)*x Should be added at some point when the old elementwise version has been deprecated long enough
-# /(x::Number,A::StridedMatrix) = x*inv(A)
-/(x::Number, v::AbstractVector) = x*pinv(v)
 
-cond(x::Number) = x == 0 ? Inf : 1.0
+cond(x::Number) = iszero(x) ? Inf : 1.0
 cond(x::Number, p) = cond(x)
 
 #Skeel condition numbers
@@ -1266,15 +1281,17 @@ false
 """
 function istriu(A::AbstractMatrix, k::Integer = 0)
     require_one_based_indexing(A)
+    return _istriu(A, k)
+end
+istriu(x::Number) = true
+
+@inline function _istriu(A::AbstractMatrix, k)
     m, n = size(A)
     for j in 1:min(n, m + k - 1)
-        for i in max(1, j - k + 1):m
-            iszero(A[i, j]) || return false
-        end
+        all(iszero, view(A, max(1, j - k + 1):m, j)) || return false
     end
     return true
 end
-istriu(x::Number) = true
 
 """
     istril(A::AbstractMatrix, k::Integer = 0) -> Bool
@@ -1308,15 +1325,17 @@ false
 """
 function istril(A::AbstractMatrix, k::Integer = 0)
     require_one_based_indexing(A)
+    return _istril(A, k)
+end
+istril(x::Number) = true
+
+@inline function _istril(A::AbstractMatrix, k)
     m, n = size(A)
     for j in max(1, k + 2):n
-        for i in 1:min(j - k - 1, m)
-            iszero(A[i, j]) || return false
-        end
+        all(iszero, view(A, 1:min(j - k - 1, m), j)) || return false
     end
     return true
 end
-istril(x::Number) = true
 
 """
     isbanded(A::AbstractMatrix, kl::Integer, ku::Integer) -> Bool
@@ -1378,9 +1397,27 @@ true
 isdiag(A::AbstractMatrix) = isbanded(A, 0, 0)
 isdiag(x::Number) = true
 
+"""
+    axpy!(α, x::AbstractArray, y::AbstractArray)
+
+Overwrite `y` with `x * α + y` and return `y`.
+If `x` and `y` have the same axes, it's equivalent with `y .+= x .* a`
 
-# BLAS-like in-place y = x*α+y function (see also the version in blas.jl
-#                                          for BlasFloat Arrays)
+See also [`BLAS.axpy!`](@ref)
+
+# Examples
+```jldoctest
+julia> x = [1; 2; 3];
+
+julia> y = [4; 5; 6];
+
+julia> axpy!(2, x, y)
+3-element Vector{Int64}:
+  6
+  9
+ 12
+```
+"""
 function axpy!(α, x::AbstractArray, y::AbstractArray)
     n = length(x)
     if n != length(y)
@@ -1406,6 +1443,27 @@ function axpy!(α, x::AbstractArray, rx::AbstractArray{<:Integer}, y::AbstractAr
     y
 end
 
+"""
+    axpby!(α, x::AbstractArray, β, y::AbstractArray)
+
+Overwrite `y` with `x * α + y * β` and return `y`.
+If `x` and `y` have the same axes, it's equivalent with `y .= x .* a .+ y .* β`
+
+See also [`BLAS.axpby!`](@ref)
+
+# Examples
+```jldoctest
+julia> x = [1; 2; 3];
+
+julia> y = [4; 5; 6];
+
+julia> axpby!(2, x, 2, y)
+3-element Vector{Int64}:
+ 10
+ 14
+ 18
+```
+"""
 function axpby!(α, x::AbstractArray, β, y::AbstractArray)
     if length(x) != length(y)
         throw(DimensionMismatch("x has length $(length(x)), but y has length $(length(y))"))
@@ -1416,6 +1474,24 @@ function axpby!(α, x::AbstractArray, β, y::AbstractArray)
     y
 end
 
+DenseLike{T} = Union{DenseArray{T}, Base.StridedReshapedArray{T}, Base.StridedReinterpretArray{T}}
+StridedVecLike{T} = Union{DenseLike{T}, Base.FastSubArray{T,<:Any,<:DenseLike{T}}}
+axpy!(α::Number, x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasFloat} = BLAS.axpy!(α, x, y)
+axpby!(α::Number, x::StridedVecLike{T}, β::Number, y::StridedVecLike{T}) where {T<:BlasFloat} = BLAS.axpby!(α, x, β, y)
+function axpy!(α::Number,
+    x::StridedVecLike{T}, rx::AbstractRange{<:Integer},
+    y::StridedVecLike{T}, ry::AbstractRange{<:Integer},
+) where {T<:BlasFloat}
+    if Base.has_offset_axes(rx, ry)
+        return Base.@invoke axpy!(α,
+            x::AbstractArray, rx::AbstractArray{<:Integer},
+            y::AbstractArray, ry::AbstractArray{<:Integer},
+        )
+    end
+    @views BLAS.axpy!(α, x[rx], y[ry])
+    return y
+end
+
 """
     rotate!(x, y, c, s)
 
@@ -1464,20 +1540,17 @@ end
 
 # Elementary reflection similar to LAPACK. The reflector is not Hermitian but
 # ensures that tridiagonalization of Hermitian matrices become real. See lawn72
-@inline function reflector!(x::AbstractVector)
+@inline function reflector!(x::AbstractVector{T}) where {T}
     require_one_based_indexing(x)
     n = length(x)
+    n == 0 && return zero(eltype(x))
     @inbounds begin
         ξ1 = x[1]
-        normu = abs2(ξ1)
-        for i = 2:n
-            normu += abs2(x[i])
-        end
+        normu = norm(x)
         if iszero(normu)
             return zero(ξ1/normu)
         end
-        normu = sqrt(normu)
-        ν = copysign(normu, real(ξ1))
+        ν = T(copysign(normu, real(ξ1)))
         ξ1 += ν
         x[1] = -ν
         for i = 2:n
@@ -1488,28 +1561,18 @@ end
 end
 
 # apply reflector from left
-@inline function reflectorApply!(x::AbstractVector, τ::Number, A::StridedMatrix)
+@inline function reflectorApply!(x::AbstractVector, τ::Number, A::AbstractVecOrMat)
     require_one_based_indexing(x)
-    m, n = size(A)
+    m, n = size(A, 1), size(A, 2)
     if length(x) != m
         throw(DimensionMismatch("reflector has length $(length(x)), which must match the first dimension of matrix A, $m"))
     end
-    @inbounds begin
-        for j = 1:n
-            # dot
-            vAj = A[1, j]
-            for i = 2:m
-                vAj += x[i]'*A[i, j]
-            end
-
-            vAj = conj(τ)*vAj
-
-            # ger
-            A[1, j] -= vAj
-            for i = 2:m
-                A[i, j] -= x[i]*vAj
-            end
-        end
+    m == 0 && return A
+    @inbounds for j = 1:n
+        Aj, xj = view(A, 2:m, j), view(x, 2:m)
+        vAj = conj(τ)*(A[1, j] + dot(xj, Aj))
+        A[1, j] -= vAj
+        axpy!(-vAj, xj, Aj)
     end
     return A
 end
@@ -1519,6 +1582,8 @@ end
 
 Matrix determinant.
 
+See also: [`logdet`](@ref) and [`logabsdet`](@ref).
+
 # Examples
 ```jldoctest
 julia> M = [1 0; 2 2]
@@ -1530,15 +1595,18 @@ julia> det(M)
 2.0
 ```
 """
-function det(A::AbstractMatrix{T}) where T
+function det(A::AbstractMatrix{T}) where {T}
     if istriu(A) || istril(A)
-        S = typeof((one(T)*zero(T) + zero(T))/one(T))
+        S = promote_type(T, typeof((one(T)*zero(T) + zero(T))/one(T)))
         return convert(S, det(UpperTriangular(A)))
     end
     return det(lu(A; check = false))
 end
 det(x::Number) = x
 
+# Resolve Issue #40128
+det(A::AbstractMatrix{BigInt}) = det_bareiss(A)
+
 """
     logabsdet(M)
 
@@ -1572,6 +1640,8 @@ julia> logabsdet(B)
 """
 logabsdet(A::AbstractMatrix) = logabsdet(lu(A, check=false))
 
+logabsdet(a::Number) = log(abs(a)), sign(a)
+
 """
     logdet(M)
 
@@ -1601,6 +1671,55 @@ logdet(A) = log(det(A))
 
 const NumberArray{T<:Number} = AbstractArray{T}
 
+exactdiv(a, b) = a/b
+exactdiv(a::Integer, b::Integer) = div(a, b)
+
+"""
+    det_bareiss!(M)
+
+Calculates the determinant of a matrix using the
+[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm) using
+inplace operations.
+
+# Examples
+```jldoctest
+julia> M = [1 0; 2 2]
+2×2 Matrix{Int64}:
+ 1  0
+ 2  2
+
+julia> LinearAlgebra.det_bareiss!(M)
+2
+```
+"""
+function det_bareiss!(M)
+    n = checksquare(M)
+    sign, prev = Int8(1), one(eltype(M))
+    for i in 1:n-1
+        if iszero(M[i,i]) # swap with another col to make nonzero
+            swapto = findfirst(!iszero, @view M[i,i+1:end])
+            isnothing(swapto) && return zero(prev)
+            sign = -sign
+            Base.swapcols!(M, i, i + swapto)
+        end
+        for k in i+1:n, j in i+1:n
+            M[j,k] = exactdiv(M[j,k]*M[i,i] - M[j,i]*M[i,k], prev)
+        end
+        prev = M[i,i]
+    end
+    return sign * M[end,end]
+end
+"""
+    LinearAlgebra.det_bareiss(M)
+
+Calculates the determinant of a matrix using the
+[Bareiss Algorithm](https://en.wikipedia.org/wiki/Bareiss_algorithm).
+Also refer to [`det_bareiss!`](@ref).
+"""
+det_bareiss(M) = det_bareiss!(copy(M))
+
+
+
 """
     promote_leaf_eltypes(itr)
 
@@ -1653,7 +1772,7 @@ function normalize!(a::AbstractArray, p::Real=2)
     __normalize!(a, nrm)
 end
 
-@inline function __normalize!(a::AbstractArray, nrm::AbstractFloat)
+@inline function __normalize!(a::AbstractArray, nrm::Real)
     # The largest positive floating point number whose inverse is less than infinity
     δ = inv(prevfloat(typemax(nrm)))
 
@@ -1671,11 +1790,12 @@ end
 end
 
 """
-    normalize(a::AbstractArray, p::Real=2)
+    normalize(a, p::Real=2)
 
-Normalize the array `a` so that its `p`-norm equals unity,
-i.e. `norm(a, p) == 1`.
-See also [`normalize!`](@ref) and [`norm`](@ref).
+Normalize `a` so that its `p`-norm equals unity,
+i.e. `norm(a, p) == 1`. For scalars, this is similar to sign(a),
+except normalize(0) = NaN.
+See also [`normalize!`](@ref), [`norm`](@ref), and [`sign`](@ref).
 
 # Examples
 ```jldoctest
@@ -1712,15 +1832,26 @@ julia> normalize(a)
  0.154303  0.308607  0.617213
  0.154303  0.308607  0.617213
 
+julia> normalize(3, 1)
+1.0
+
+julia> normalize(-8, 1)
+-1.0
+
+julia> normalize(0, 1)
+NaN
 ```
 """
 function normalize(a::AbstractArray, p::Real = 2)
     nrm = norm(a, p)
     if !isempty(a)
-        aa = copy_oftype(a, typeof(first(a)/nrm))
+        aa = copymutable_oftype(a, typeof(first(a)/nrm))
         return __normalize!(aa, nrm)
     else
         T = typeof(zero(eltype(a))/nrm)
         return T[]
     end
 end
+
+normalize(x) = x / norm(x)
+normalize(x, p::Real) = x / norm(x, p)
diff --git a/stdlib/LinearAlgebra/src/givens.jl b/stdlib/LinearAlgebra/src/givens.jl
index 42709940188112..155d8d6f23ce63 100644
--- a/stdlib/LinearAlgebra/src/givens.jl
+++ b/stdlib/LinearAlgebra/src/givens.jl
@@ -8,7 +8,7 @@ transpose(R::AbstractRotation) = error("transpose not implemented for $(typeof(R
 
 function (*)(R::AbstractRotation{T}, A::AbstractVecOrMat{S}) where {T,S}
     TS = typeof(zero(T)*zero(S) + zero(T)*zero(S))
-    lmul!(convert(AbstractRotation{TS}, R), TS == S ? copy(A) : convert(AbstractArray{TS}, A))
+    lmul!(convert(AbstractRotation{TS}, R), copy_similar(A, TS))
 end
 (*)(A::AbstractVector, adjR::Adjoint{<:Any,<:AbstractRotation}) = _absvecormat_mul_adjrot(A, adjR)
 (*)(A::AbstractMatrix, adjR::Adjoint{<:Any,<:AbstractRotation}) = _absvecormat_mul_adjrot(A, adjR)
@@ -31,7 +31,7 @@ conjugated transpose right multiplication `A*G'`. The type doesn't have a `size`
 therefore be multiplied with matrices of arbitrary size as long as `i2<=size(A,2)` for
 `G*A` or `i2<=size(A,1)` for `A*G'`.
 
-See also: [`givens`](@ref)
+See also [`givens`](@ref).
 """
 struct Givens{T} <: AbstractRotation{T}
     i1::Int
@@ -282,7 +282,7 @@ y[i1] = r
 y[i2] = 0
 ```
 
-See also: [`LinearAlgebra.Givens`](@ref)
+See also [`LinearAlgebra.Givens`](@ref).
 """
 function givens(f::T, g::T, i1::Integer, i2::Integer) where T
     if i1 == i2
@@ -308,7 +308,7 @@ B[i1,j] = r
 B[i2,j] = 0
 ```
 
-See also: [`LinearAlgebra.Givens`](@ref)
+See also [`LinearAlgebra.Givens`](@ref).
 """
 givens(A::AbstractMatrix, i1::Integer, i2::Integer, j::Integer) =
     givens(A[i1,j], A[i2,j],i1,i2)
@@ -327,7 +327,7 @@ B[i1] = r
 B[i2] = 0
 ```
 
-See also: [`LinearAlgebra.Givens`](@ref)
+See also [`LinearAlgebra.Givens`](@ref).
 """
 givens(x::AbstractVector, i1::Integer, i2::Integer) =
     givens(x[i1], x[i2], i1, i2)
@@ -403,11 +403,6 @@ end
 *(A::Adjoint{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
 *(A::Transpose{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
 *(A::Transpose{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-# disambiguation methods: *(Adj/Trans of AbsTri or RealHermSymComplex{Herm|Sym}, Adj of AbstractRotation)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractRotation}) = copy(A) * B
 # disambiguation methods: *(Diag/AbsTri, Adj of AbstractRotation)
 *(A::Diagonal, B::Adjoint{<:Any,<:AbstractRotation}) = A * copy(B)
 *(A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractRotation}) = A * copy(B)
diff --git a/stdlib/LinearAlgebra/src/hessenberg.jl b/stdlib/LinearAlgebra/src/hessenberg.jl
index 7ffd5d4e792f5d..a95a73dfc8819d 100644
--- a/stdlib/LinearAlgebra/src/hessenberg.jl
+++ b/stdlib/LinearAlgebra/src/hessenberg.jl
@@ -9,6 +9,9 @@
 Construct an `UpperHessenberg` view of the matrix `A`.
 Entries of `A` below the first subdiagonal are ignored.
 
+!!! compat "Julia 1.3"
+    This type was added in Julia 1.3.
+
 Efficient algorithms are implemented for `H \\ b`, `det(H)`, and similar.
 
 See also the [`hessenberg`](@ref) function to factor any matrix into a similar
@@ -60,6 +63,8 @@ parent(H::UpperHessenberg) = H.data
 similar(H::UpperHessenberg, ::Type{T}) where {T} = UpperHessenberg(similar(H.data, T))
 similar(H::UpperHessenberg, ::Type{T}, dims::Dims{N}) where {T,N} = similar(H.data, T, dims)
 
+AbstractMatrix{T}(H::UpperHessenberg) where {T} = UpperHessenberg(AbstractMatrix{T}(H.data))
+
 copy(H::UpperHessenberg) = UpperHessenberg(copy(H.data))
 real(H::UpperHessenberg{<:Real}) = H
 real(H::UpperHessenberg{<:Complex}) = UpperHessenberg(triu!(real(H.data),-1))
@@ -94,17 +99,95 @@ Base.copy(A::Transpose{<:Any,<:UpperHessenberg}) = tril!(transpose!(similar(A.pa
 rmul!(H::UpperHessenberg, x::Number) = (rmul!(H.data, x); H)
 lmul!(x::Number, H::UpperHessenberg) = (lmul!(x, H.data); H)
 
-# (future: we could also have specialized routines for UpperHessenberg * UpperTriangular)
-
 fillstored!(H::UpperHessenberg, x) = (fillband!(H.data, x, -1, size(H,2)-1); H)
 
 +(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data+B.data)
 -(A::UpperHessenberg, B::UpperHessenberg) = UpperHessenberg(A.data-B.data)
-# (future: we could also have specialized routines for UpperHessenberg ± UpperTriangular)
 
-# shift Hessenberg by λI
-+(H::UpperHessenberg, J::UniformScaling) = UpperHessenberg(H.data + J)
--(J::UniformScaling, H::UpperHessenberg) = UpperHessenberg(J - H.data)
+for T = (:UniformScaling, :Diagonal, :Bidiagonal, :Tridiagonal, :SymTridiagonal,
+         :UpperTriangular, :UnitUpperTriangular)
+    for op = (:+, :-)
+        @eval begin
+            $op(H::UpperHessenberg, x::$T) = UpperHessenberg($op(H.data, x))
+            $op(x::$T, H::UpperHessenberg) = UpperHessenberg($op(x, H.data))
+        end
+    end
+end
+
+for T = (:Number, :UniformScaling, :Diagonal)
+    @eval begin
+        *(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data * x)
+        *(x::$T, H::UpperHessenberg) = UpperHessenberg(x * H.data)
+        /(H::UpperHessenberg, x::$T) = UpperHessenberg(H.data / x)
+        \(x::$T, H::UpperHessenberg) = UpperHessenberg(x \ H.data)
+    end
+end
+
+function *(H::UpperHessenberg, U::UpperOrUnitUpperTriangular)
+    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
+    HH = copy_similar(H, T)
+    rmul!(HH, U)
+    UpperHessenberg(HH)
+end
+function *(U::UpperOrUnitUpperTriangular, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(H))*oneunit(eltype(U)))
+    HH = copy_similar(H, T)
+    lmul!(U, HH)
+    UpperHessenberg(HH)
+end
+
+function /(H::UpperHessenberg, U::UpperTriangular)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
+    HH = copy_similar(H, T)
+    rdiv!(HH, U)
+    UpperHessenberg(HH)
+end
+function /(H::UpperHessenberg, U::UnitUpperTriangular)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(U)))
+    HH = copy_similar(H, T)
+    rdiv!(HH, U)
+    UpperHessenberg(HH)
+end
+
+function \(U::UpperTriangular, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
+    HH = copy_similar(H, T)
+    ldiv!(U, HH)
+    UpperHessenberg(HH)
+end
+function \(U::UnitUpperTriangular, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(U))\oneunit(eltype(H)))
+    HH = copy_similar(H, T)
+    ldiv!(U, HH)
+    UpperHessenberg(HH)
+end
+
+function *(H::UpperHessenberg, B::Bidiagonal)
+    TS = promote_op(matprod, eltype(H), eltype(B))
+    A = A_mul_B_td!(zeros(TS, size(H)), H, B)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+function *(B::Bidiagonal, H::UpperHessenberg)
+    TS = promote_op(matprod, eltype(B), eltype(H))
+    A = A_mul_B_td!(zeros(TS, size(H)), B, H)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
+/(H::UpperHessenberg, B::Bidiagonal) = _rdiv(H, B)
+/(H::UpperHessenberg{<:Number}, B::Bidiagonal{<:Number}) = _rdiv(H, B)
+function _rdiv(H::UpperHessenberg, B::Bidiagonal)
+    T = typeof(oneunit(eltype(H))/oneunit(eltype(B)))
+    A = _rdiv!(zeros(T, size(H)), H, B)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
+
+\(B::Bidiagonal{<:Number}, H::UpperHessenberg{<:Number}) = _ldiv(B, H)
+\(B::Bidiagonal, H::UpperHessenberg) = _ldiv(B, H)
+function _ldiv(B::Bidiagonal, H::UpperHessenberg)
+    T = typeof(oneunit(eltype(B))\oneunit(eltype(H)))
+    A = ldiv!(zeros(T, size(H)), B, H)
+    return B.uplo == 'U' ? UpperHessenberg(A) : A
+end
 
 # Solving (H+µI)x = b: we can do this in O(m²) time and O(m) memory
 # (in-place in x) by the RQ algorithm from:
@@ -419,7 +502,7 @@ true
 ```
 """
 hessenberg(A::AbstractMatrix{T}) where T =
-    hessenberg!(copy_oftype(A, eigtype(T)))
+    hessenberg!(copymutable_oftype(A, eigtype(T)))
 
 function show(io::IO, mime::MIME"text/plain", F::Hessenberg)
     summary(io, F)
@@ -457,6 +540,9 @@ function getproperty(F::Hessenberg, d::Symbol)
     return getfield(F, d)
 end
 
+size(Q::HessenbergQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
+size(Q::HessenbergQ) = size(Q, 1), size(Q, 2)
+
 Base.propertynames(F::Hessenberg, private::Bool=false) =
     (:Q, :H, :μ, (private ? (:τ, :factors, :uplo) : ())...)
 
@@ -485,28 +571,30 @@ function AbstractMatrix(F::Hessenberg)
     end
 end
 
+# adjoint(Q::HessenbergQ{<:Real})
+
 lmul!(Q::BlasHessenbergQ{T,false}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.ormhr!('L', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
-rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,false}) where {T<:BlasFloat} =
     LAPACK.ormhr!('R', 'N', 1, size(Q.factors, 1), Q.factors, Q.τ, X)
 lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormhr!('L', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
-rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,false}}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormhr!('R', ifelse(T<:Real, 'T', 'C'), 1, size(Q.factors, 1), Q.factors, Q.τ, X))
 
 lmul!(Q::BlasHessenbergQ{T,true}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     LAPACK.ormtr!('L', Q.uplo, 'N', Q.factors, Q.τ, X)
-rmul!(X::StridedMatrix{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, Q::BlasHessenbergQ{T,true}) where {T<:BlasFloat} =
     LAPACK.ormtr!('R', Q.uplo, 'N', Q.factors, Q.τ, X)
 lmul!(adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}, X::StridedVecOrMat{T}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormtr!('L', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
-rmul!(X::StridedMatrix{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
+rmul!(X::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:BlasHessenbergQ{T,true}}) where {T<:BlasFloat} =
     (Q = adjQ.parent; LAPACK.ormtr!('R', Q.uplo, ifelse(T<:Real, 'T', 'C'), Q.factors, Q.τ, X))
 
 lmul!(Q::HessenbergQ{T}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T} = rmul!(X', Q')'
-rmul!(X::Adjoint{T,<:StridedMatrix{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, Q::HessenbergQ{T}) where {T} = lmul!(Q', X')'
 lmul!(adjQ::Adjoint{<:Any,<:HessenbergQ{T}}, X::Adjoint{T,<:StridedVecOrMat{T}}) where {T}  = rmul!(X', adjQ')'
-rmul!(X::Adjoint{T,<:StridedMatrix{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
+rmul!(X::Adjoint{T,<:StridedVecOrMat{T}}, adjQ::Adjoint{<:Any,<:HessenbergQ{T}}) where {T} = lmul!(adjQ', X')'
 
 # multiply x by the entries of M in the upper-k triangle, which contains
 # the entries of the upper-Hessenberg matrix H for k=-1
diff --git a/stdlib/LinearAlgebra/src/lapack.jl b/stdlib/LinearAlgebra/src/lapack.jl
index 046022fa4edf43..cd438f142a793b 100644
--- a/stdlib/LinearAlgebra/src/lapack.jl
+++ b/stdlib/LinearAlgebra/src/lapack.jl
@@ -5,14 +5,10 @@ module LAPACK
 Interfaces to LAPACK subroutines.
 """ LAPACK
 
-const liblapack = Base.liblapack_name
+using ..LinearAlgebra.BLAS: @blasfunc, chkuplo
 
-import ..LinearAlgebra.BLAS.@blasfunc
-
-import ..LinearAlgebra: BlasFloat, BlasInt, LAPACKException,
-    DimensionMismatch, SingularException, PosDefException, chkstride1, checksquare
-
-using ..LinearAlgebra: triu, tril, dot
+using ..LinearAlgebra: libblastrampoline, BlasFloat, BlasInt, LAPACKException, DimensionMismatch,
+    SingularException, PosDefException, chkstride1, checksquare,triu, tril, dot
 
 using Base: iszero, require_one_based_indexing
 
@@ -51,14 +47,6 @@ function chkposdef(ret::BlasInt)
     end
 end
 
-"Check that upper/lower (for special matrices) is correctly specified"
-function chkuplo(uplo::AbstractChar)
-    if !(uplo == 'U' || uplo == 'L')
-        throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
-    end
-    uplo
-end
-
 "Check that {c}transpose is correctly specified"
 function chktrans(trans::AbstractChar)
     if !(trans == 'N' || trans == 'C' || trans == 'T')
@@ -95,12 +83,30 @@ function chkfinite(A::AbstractMatrix)
     return true
 end
 
+function chkuplofinite(A::AbstractMatrix, uplo::AbstractChar)
+    require_one_based_indexing(A)
+    m, n = size(A)
+    if uplo == 'U'
+        @inbounds for j in 1:n, i in 1:j
+            if !isfinite(A[i,j])
+                throw(ArgumentError("matrix contains Infs or NaNs"))
+            end
+        end
+    else
+        @inbounds for j in 1:n, i in j:m
+            if !isfinite(A[i,j])
+                throw(ArgumentError("matrix contains Infs or NaNs"))
+            end
+        end
+    end
+end
+
 # LAPACK version number
 function version()
     major = Ref{BlasInt}(0)
     minor = Ref{BlasInt}(0)
     patch = Ref{BlasInt}(0)
-    ccall((@blasfunc(ilaver_), liblapack), Cvoid,
+    ccall((@blasfunc(ilaver_), libblastrampoline), Cvoid,
           (Ptr{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
           major, minor, patch)
     return VersionNumber(major[], minor[], patch[])
@@ -126,7 +132,7 @@ for (gbtrf, gbtrs, elty) in
             mnmn = min(m, n)
             ipiv = similar(AB, BlasInt, mnmn)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gbtrf), liblapack), Cvoid,
+            ccall((@blasfunc($gbtrf), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
                   m, n, kl, ku, AB, max(1,stride(AB,2)), ipiv, info)
@@ -152,12 +158,12 @@ for (gbtrf, gbtrs, elty) in
             if m != n || m != size(B,1)
                 throw(DimensionMismatch("matrix AB has dimensions $(size(AB)), but right hand side matrix B has dimensions $(size(B))"))
             end
-            ccall((@blasfunc($gbtrs), liblapack), Cvoid,
+            ccall((@blasfunc($gbtrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}),
+                   Ptr{BlasInt}, Clong),
                   trans, n, kl, ku, size(B,2), AB, max(1,stride(AB,2)), ipiv,
-                  B, max(1,stride(B,2)), info)
+                  B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -206,10 +212,10 @@ for (gebal, gebak, elty, relty) in
             ilo = Ref{BlasInt}()
             scale = similar(A, $relty, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gebal), liblapack), Cvoid,
+            ccall((@blasfunc($gebal), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{BlasInt}),
-                  job, n, A, max(1,stride(A,2)), ilo, ihi, scale, info)
+                   Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong),
+                  job, n, A, max(1,stride(A,2)), ilo, ihi, scale, info, 1)
             chklapackerror(info[])
             ilo[], ihi[], scale
         end
@@ -229,10 +235,12 @@ for (gebal, gebak, elty, relty) in
             chkfinite(V) # balancing routines don't support NaNs and Infs
             n = checksquare(V)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gebak), liblapack), Cvoid,
+            ccall((@blasfunc($gebak), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  job, side, size(V,1), ilo, ihi, scale, n, V, max(1,stride(V,2)), info)
+                   Ptr{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
+                   Clong, Clong),
+                  job, side, size(V,1), ilo, ihi, scale, n, V, max(1,stride(V,2)), info,
+                  1, 1)
             chklapackerror(info[])
             V
         end
@@ -294,7 +302,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gebrd), liblapack), Cvoid,
+                ccall((@blasfunc($gebrd), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$elty},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
@@ -328,7 +336,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             work  = Vector{$elty}(undef, 1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelqf), liblapack), Cvoid,
+                ccall((@blasfunc($gelqf), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, A, lda, tau, work, lwork, info)
@@ -359,7 +367,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             work  = Vector{$elty}(undef, 1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geqlf), liblapack), Cvoid,
+                ccall((@blasfunc($geqlf), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, A, lda, tau, work, lwork, info)
@@ -401,7 +409,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
                 if cmplx
-                    ccall((@blasfunc($geqp3), liblapack), Cvoid,
+                    ccall((@blasfunc($geqp3), libblastrampoline), Cvoid,
                           (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{$relty}, Ptr{BlasInt}),
@@ -409,7 +417,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
                           jpvt, tau, work, lwork,
                           rwork, info)
                 else
-                    ccall((@blasfunc($geqp3), liblapack), Cvoid,
+                    ccall((@blasfunc($geqp3), libblastrampoline), Cvoid,
                           (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{BlasInt}),
@@ -439,7 +447,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             work = Vector{$elty}(undef, nb*n)
             if n > 0
                 info = Ref{BlasInt}()
-                ccall((@blasfunc($geqrt), liblapack), Cvoid,
+                ccall((@blasfunc($geqrt), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{BlasInt}),
@@ -465,7 +473,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             end
             if n > 0
                 info = Ref{BlasInt}()
-                ccall((@blasfunc($geqrt3), liblapack), Cvoid,
+                ccall((@blasfunc($geqrt3), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                      m, n, A, max(1, stride(A, 2)),
@@ -492,13 +500,13 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2                # first call returns lwork as work[1]
-                ccall((@blasfunc($geqrf), liblapack), Cvoid,
+                ccall((@blasfunc($geqrf), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
                 chklapackerror(info[])
                 if i == 1
-                    lwork = BlasInt(real(work[1]))
+                    lwork = max(BlasInt(1),BlasInt(real(work[1])))
                     resize!(work, lwork)
                 end
             end
@@ -521,13 +529,13 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             work  = Vector{$elty}(undef, 1)
             info  = Ref{BlasInt}()
             for i = 1:2                # first call returns lwork as work[1]
-                ccall((@blasfunc($gerqf), liblapack), Cvoid,
+                ccall((@blasfunc($gerqf), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, A, max(1,stride(A,2)), tau, work, lwork, info)
                 chklapackerror(info[])
                 if i == 1
-                    lwork = BlasInt(real(work[1]))
+                    lwork = max(BlasInt(m), BlasInt(real(work[1])))
                     resize!(work, lwork)
                 end
             end
@@ -547,7 +555,7 @@ for (gebrd, gelqf, geqlf, geqrf, geqp3, geqrt, geqrt3, gerqf, getrf, elty, relty
             lda  = max(1,stride(A, 2))
             ipiv = similar(A, BlasInt, min(m,n))
             info = Ref{BlasInt}()
-            ccall((@blasfunc($getrf), liblapack), Cvoid,
+            ccall((@blasfunc($getrf), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                    Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
                   m, n, A, lda, ipiv, info)
@@ -754,7 +762,7 @@ for (larfg, elty) in
             α    = Ref{$elty}(x[1])
             incx = BlasInt(1)
             τ    = Ref{$elty}(0)
-            ccall((@blasfunc($larfg), liblapack), Cvoid,
+            ccall((@blasfunc($larfg), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}),
                 N, α, pointer(x, 2), incx, τ)
             @inbounds x[1] = one($elty)
@@ -783,7 +791,7 @@ for (larf, elty) in
             ldc = max(1, stride(C, 2))
             l = side == 'L' ? n : m
             incv  = BlasInt(1)
-            ccall((@blasfunc($larf), liblapack), Cvoid,
+            ccall((@blasfunc($larf), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ref{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Clong),
                 side, m, n, v, incv,
@@ -828,7 +836,7 @@ for (tzrzf, ormrz, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($tzrzf), liblapack), Cvoid,
+                ccall((@blasfunc($tzrzf), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                     m, n, A, lda,
@@ -866,15 +874,15 @@ for (tzrzf, ormrz, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormrz), liblapack), Cvoid,
+                ccall((@blasfunc($ormrz), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                      Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}),
+                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                     side, trans, m, n,
                     k, l, A, lda,
                     tau, C, ldc, work,
-                    lwork, info)
+                    lwork, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -931,12 +939,12 @@ for (gels, gesv, getrs, getri, elty) in
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gels), liblapack), Cvoid,
+                ccall((@blasfunc($gels), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       (btrn ? 'T' : 'N'), m, n, size(B,2), A, max(1,stride(A,2)),
-                      B, max(1,stride(B,2)), work, lwork, info)
+                      B, max(1,stride(B,2)), work, lwork, info, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -972,7 +980,7 @@ for (gels, gesv, getrs, getri, elty) in
             end
             ipiv = similar(A, BlasInt, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gesv), liblapack), Cvoid,
+            ccall((@blasfunc($gesv), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                   n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
@@ -997,10 +1005,10 @@ for (gels, gesv, getrs, getri, elty) in
             end
             nrhs = size(B, 2)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($getrs), liblapack), Cvoid,
+            ccall((@blasfunc($getrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  trans, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -1023,7 +1031,7 @@ for (gels, gesv, getrs, getri, elty) in
             work  = Vector{$elty}(undef, 1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($getri), liblapack), Cvoid,
+                ccall((@blasfunc($getri), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       n, A, lda, ipiv, work, lwork, info)
@@ -1119,14 +1127,14 @@ for (gesvx, elty) in
             iwork = Vector{BlasInt}(undef, n)
             info  = Ref{BlasInt}()
             X = similar(A, $elty, n, nrhs)
-            ccall((@blasfunc($gesvx), liblapack), Cvoid,
+            ccall((@blasfunc($gesvx), libblastrampoline), Cvoid,
               (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                Ref{UInt8}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-               Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+               Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
               fact, trans, n, nrhs, A, lda, AF, ldaf, ipiv, equed, R, C, B,
-              ldb, X, n, rcond, ferr, berr, work, iwork, info)
+              ldb, X, n, rcond, ferr, berr, work, iwork, info, 1, 1, 1)
             chklapackerror(info[])
             if info[] == n + 1
                 @warn "Matrix is singular to working precision"
@@ -1189,14 +1197,14 @@ for (gesvx, elty, relty) in
             rwork = Vector{$relty}(undef, 2n)
             info  = Ref{BlasInt}()
             X = similar(A, $elty, n, nrhs)
-            ccall((@blasfunc($gesvx), liblapack), Cvoid,
+            ccall((@blasfunc($gesvx), libblastrampoline), Cvoid,
               (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                Ref{UInt8}, Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
                Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{$relty}, Ptr{$relty},
-               Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}),
+               Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong),
               fact, trans, n, nrhs, A, lda, AF, ldaf, ipiv, equed, R, C, B,
-              ldb, X, n, rcond, ferr, berr, work, rwork, info)
+              ldb, X, n, rcond, ferr, berr, work, rwork, info, 1, 1, 1)
             chklapackerror(info[])
             if info[] == n + 1
                 @warn "Matrix is singular to working precision"
@@ -1285,7 +1293,7 @@ for (gelsd, gelsy, elty) in
             lwork = BlasInt(-1)
             iwork = Vector{BlasInt}(undef, 1)
             for i = 1:2  # first call returns lwork as work[1] and iwork length as iwork[1]
-                ccall((@blasfunc($gelsd), liblapack), Cvoid,
+                ccall((@blasfunc($gelsd), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{$elty}, Ref{$elty}, Ref{BlasInt}, Ptr{$elty},
@@ -1331,7 +1339,7 @@ for (gelsd, gelsy, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelsy), liblapack), Cvoid,
+                ccall((@blasfunc($gelsy), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                      Ref{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
@@ -1381,7 +1389,7 @@ for (gelsd, gelsy, elty, relty) in
             rwork = Vector{$relty}(undef, 1)
             iwork = Vector{BlasInt}(undef, 1)
             for i = 1:2  # first call returns lwork as work[1], rwork length as rwork[1] and iwork length as iwork[1]
-                ccall((@blasfunc($gelsd), liblapack), Cvoid,
+                ccall((@blasfunc($gelsd), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
                        Ref{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
@@ -1429,7 +1437,7 @@ for (gelsd, gelsy, elty, relty) in
             rwork = Vector{$relty}(undef, 2n)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gelsy), liblapack), Cvoid,
+                ccall((@blasfunc($gelsy), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                      Ref{$relty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
@@ -1504,7 +1512,7 @@ for (gglse, elty) in ((:dgglse_, :Float64),
             work  = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gglse), liblapack), Cvoid,
+                ccall((@blasfunc($gglse), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
@@ -1567,21 +1575,21 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
                 if cmplx
-                    ccall((@blasfunc($geev), liblapack), Cvoid,
+                    ccall((@blasfunc($geev), libblastrampoline), Cvoid,
                           (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}),
+                           Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
                           jobvl, jobvr, n, A, max(1,stride(A,2)), W, VL, n, VR, n,
-                          work, lwork, rwork, info)
+                          work, lwork, rwork, info, 1, 1)
                 else
-                    ccall((@blasfunc($geev), liblapack), Cvoid,
+                    ccall((@blasfunc($geev), libblastrampoline), Cvoid,
                           (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{BlasInt}),
+                           Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                           jobvl, jobvr, n, A, max(1,stride(A,2)), WR, WI, VL, n,
-                          VR, n, work, lwork, info)
+                          VR, n, work, lwork, info, 1, 1)
                 end
                 chklapackerror(info[])
                 if i == 1
@@ -1631,21 +1639,21 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
             info   = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
                 if cmplx
-                    ccall((@blasfunc($gesdd), liblapack), Cvoid,
+                    ccall((@blasfunc($gesdd), libblastrampoline), Cvoid,
                           (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                           Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
                           job, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, rwork, iwork, info)
+                          work, lwork, rwork, iwork, info, 1)
                 else
-                    ccall((@blasfunc($gesdd), liblapack), Cvoid,
+                    ccall((@blasfunc($gesdd), libblastrampoline), Cvoid,
                           (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                            Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                           Ptr{BlasInt}, Ptr{BlasInt}),
+                           Ptr{BlasInt}, Ptr{BlasInt}, Clong),
                           job, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, iwork, info)
+                          work, lwork, iwork, info, 1)
                 end
                 chklapackerror(info[])
                 if i == 1
@@ -1700,21 +1708,21 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
             info   = Ref{BlasInt}()
             for i in 1:2  # first call returns lwork as work[1]
                 if cmplx
-                    ccall((@blasfunc($gesvd), liblapack), Cvoid,
+                    ccall((@blasfunc($gesvd), libblastrampoline), Cvoid,
                           (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                            Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}),
+                           Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
                           jobu, jobvt, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, rwork, info)
+                          work, lwork, rwork, info, 1, 1)
                 else
-                    ccall((@blasfunc($gesvd), liblapack), Cvoid,
+                    ccall((@blasfunc($gesvd), libblastrampoline), Cvoid,
                           (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                            Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                            Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                           Ref{BlasInt}, Ptr{BlasInt}),
+                           Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                           jobu, jobvt, m, n, A, max(1,stride(A,2)), S, U, max(1,stride(U,2)), VT, max(1,stride(VT,2)),
-                          work, lwork, info)
+                          work, lwork, info, 1, 1)
                 end
                 chklapackerror(info[])
                 if i == 1
@@ -1780,33 +1788,37 @@ for (geev, gesvd, gesdd, ggsvd, elty, relty) in
             iwork = Vector{BlasInt}(undef, n)
             info = Ref{BlasInt}()
             if cmplx
-                ccall((@blasfunc($ggsvd), liblapack), Cvoid,
+                ccall((@blasfunc($ggsvd), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                     Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                    Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Ptr{BlasInt},
+                    Clong, Clong, Clong),
                     jobu, jobv, jobq, m,
                     n, p, k, l,
                     A, lda, B, ldb,
                     alpha, beta, U, ldu,
                     V, ldv, Q, ldq,
-                    work, rwork, iwork, info)
+                    work, rwork, iwork, info,
+                    1, 1, 1)
             else
-                ccall((@blasfunc($ggsvd), liblapack), Cvoid,
+                ccall((@blasfunc($ggsvd), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                     Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                    Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
+                    Clong, Clong, Clong),
                     jobu, jobv, jobq, m,
                     n, p, k, l,
                     A, lda, B, ldb,
                     alpha, beta, U, ldu,
                     V, ldv, Q, ldq,
-                    work, iwork, info)
+                    work, iwork, info,
+                    1, 1, 1)
             end
             chklapackerror(info[])
             if m - k[1] - l[1] >= 0
@@ -1900,19 +1912,21 @@ for (f, elty) in ((:dggsvd3_, :Float64),
             iwork = Vector{BlasInt}(undef, n)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($f), liblapack), Cvoid,
+                ccall((@blasfunc($f), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                     Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}),
+                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
+                    Clong, Clong, Clong),
                     jobu, jobv, jobq, m,
                     n, p, k, l,
                     A, lda, B, ldb,
                     alpha, beta, U, ldu,
                     V, ldv, Q, ldq,
-                    work, lwork, iwork, info)
+                    work, lwork, iwork, info,
+                    1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -1958,21 +1972,21 @@ for (f, elty, relty) in ((:zggsvd3_, :ComplexF64, :Float64),
             iwork = Vector{BlasInt}(undef, n)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($f), liblapack), Cvoid,
+                ccall((@blasfunc($f), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                     Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt},
-                    Ptr{BlasInt}),
+                    Ptr{BlasInt}, Clong, Clong, Clong),
                     jobu, jobv, jobq, m,
                     n, p, k, l,
                     A, lda, B, ldb,
                     alpha, beta, U, ldu,
                     V, ldv, Q, ldq,
                     work, lwork, rwork, iwork,
-                    info)
+                    info, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -2066,19 +2080,21 @@ for (geevx, ggev, elty) in
             iwork = Vector{BlasInt}(undef, iworksize)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geevx), liblapack), Cvoid,
+                ccall((@blasfunc($geevx), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt}),
+                       Ref{BlasInt}, Ptr{BlasInt}, Ptr{BlasInt},
+                       Clong, Clong, Clong, Clong),
                        balanc, jobvl, jobvr, sense,
                        n, A, lda, wr,
                        wi, VL, max(1,ldvl), VR,
                        max(1,ldvr), ilo, ihi, scale,
                        abnrm, rconde, rcondv, work,
-                       lwork, iwork, info)
+                       lwork, iwork, info,
+                       1, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -2132,17 +2148,17 @@ for (geevx, ggev, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev), liblapack), Cvoid,
+                ccall((@blasfunc($ggev), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
+                     Ptr{BlasInt}, Clong, Clong),
                     jobvl, jobvr, n, A,
                     lda, B, ldb, alphar,
                     alphai, beta, vl, ldvl,
                     vr, ldvr, work, lwork,
-                    info)
+                    info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -2212,19 +2228,19 @@ for (geevx, ggev, elty, relty) in
             rwork = Vector{$relty}(undef, 2n)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($geevx), liblapack), Cvoid,
+                ccall((@blasfunc($geevx), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{UInt8},
                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                        Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$relty}, Ptr{$relty},
                        Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$relty}, Ptr{BlasInt}),
+                       Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong, Clong),
                        balanc, jobvl, jobvr, sense,
                        n, A, lda, w,
                        VL, max(1,ldvl), VR, max(1,ldvr),
                        ilo, ihi, scale, abnrm,
                        rconde, rcondv, work, lwork,
-                       rwork, info)
+                       rwork, info, 1, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -2279,17 +2295,17 @@ for (geevx, ggev, elty, relty) in
             rwork = Vector{$relty}(undef, 8n)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ggev), liblapack), Cvoid,
+                ccall((@blasfunc($ggev), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ptr{BlasInt}),
+                     Ptr{BlasInt}, Clong, Clong),
                     jobvl, jobvr, n, A,
                     lda, B, ldb, alpha,
                     beta, vl, ldvl, vr,
                     ldvr, work, lwork, rwork,
-                    info)
+                    info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -2353,7 +2369,7 @@ for (laic1, elty) in
             sestpr = Vector{$elty}(undef, 1)
             s = Vector{$elty}(undef, 1)
             c = Vector{$elty}(undef, 1)
-            ccall((@blasfunc($laic1), liblapack), Cvoid,
+            ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$elty},
                  Ptr{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{$elty},
                  Ptr{$elty}),
@@ -2387,7 +2403,7 @@ for (laic1, elty, relty) in
             sestpr = Vector{$relty}(undef, 1)
             s = Vector{$elty}(undef, 1)
             c = Vector{$elty}(undef, 1)
-            ccall((@blasfunc($laic1), liblapack), Cvoid,
+            ccall((@blasfunc($laic1), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{$relty},
                  Ptr{$elty}, Ref{$elty}, Ptr{$relty}, Ptr{$elty},
                  Ptr{$elty}),
@@ -2429,7 +2445,7 @@ for (gtsv, gttrf, gttrs, elty) in
                 return B # Early exit if possible
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gtsv), liblapack), Cvoid,
+            ccall((@blasfunc($gtsv), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                   n, size(B,2), dl, d, du, B, max(1,stride(B,2)), info)
@@ -2456,7 +2472,7 @@ for (gtsv, gttrf, gttrs, elty) in
             du2  = similar(d, $elty, n-2)
             ipiv = similar(d, BlasInt, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gttrf), liblapack), Cvoid,
+            ccall((@blasfunc($gttrf), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
                    Ptr{BlasInt}, Ptr{BlasInt}),
                   n, dl, d, du, du2, ipiv, info)
@@ -2488,11 +2504,11 @@ for (gtsv, gttrf, gttrs, elty) in
                 throw(DimensionMismatch("B has leading dimension $(size(B,1)), but should have $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gttrs), liblapack), Cvoid,
+            ccall((@blasfunc($gttrs), libblastrampoline), Cvoid,
                    (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                     Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                    Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                   trans, n, size(B,2), dl, d, du, du2, ipiv, B, max(1,stride(B,2)), info)
+                    Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                   trans, n, size(B,2), dl, d, du, du2, ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
          end
@@ -2555,7 +2571,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orglq), liblapack), Cvoid,
+                ccall((@blasfunc($orglq), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, k, A, max(1,stride(A,2)), tau, work, lwork, info)
@@ -2589,7 +2605,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgqr), liblapack), Cvoid,
+                ccall((@blasfunc($orgqr), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, k, A,
@@ -2625,7 +2641,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgql), liblapack), Cvoid,
+                ccall((@blasfunc($orgql), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, k, A,
@@ -2663,7 +2679,7 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgrq), liblapack), Cvoid,
+                ccall((@blasfunc($orgrq), libblastrampoline), Cvoid,
                       (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                        Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                       m, n, k, A,
@@ -2710,12 +2726,12 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormlq), liblapack), Cvoid,
+                ccall((@blasfunc($ormlq), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                       side, trans, m, n, k, A, max(1,stride(A,2)), tau,
-                      C, max(1,stride(C,2)), work, lwork, info)
+                      C, max(1,stride(C,2)), work, lwork, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -2757,15 +2773,15 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormqr), liblapack), Cvoid,
+                ccall((@blasfunc($ormqr), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}),
+                       Ptr{BlasInt}, Clong, Clong),
                       side, trans, m, n,
                       k, A, max(1,stride(A,2)), tau,
                       C, max(1, stride(C,2)), work, lwork,
-                      info)
+                      info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -2807,15 +2823,15 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormql), liblapack), Cvoid,
+                ccall((@blasfunc($ormql), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                        Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}),
+                       Ptr{BlasInt}, Clong, Clong),
                       side, trans, m, n,
                       k, A, max(1,stride(A,2)), tau,
                       C, max(1, stride(C,2)), work, lwork,
-                      info)
+                      info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -2857,12 +2873,12 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormrq), liblapack), Cvoid,
+                ccall((@blasfunc($ormrq), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                       side, trans, m, n, k, A, max(1,stride(A,2)), tau,
-                      C, max(1,stride(C,2)), work, lwork, info)
+                      C, max(1,stride(C,2)), work, lwork, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -2913,15 +2929,15 @@ for (orglq, orgqr, orgql, orgrq, ormlq, ormqr, ormql, ormrq, gemqrt, elty) in
             ldc = stride(C, 2)
             work = Vector{$elty}(undef, wss)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gemqrt), liblapack), Cvoid,
+            ccall((@blasfunc($gemqrt), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ptr{BlasInt}),
+                 Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
                 side, trans, m, n,
                 k, nb, V, ldv,
                 T, max(1,stride(T,2)), C, max(1,ldc),
-                work, info)
+                work, info, 1, 1)
             chklapackerror(info[])
             return C
         end
@@ -3032,10 +3048,10 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
                 throw(DimensionMismatch("first dimension of B, $(size(B,1)), and size of A, ($n,$n), must match!"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($posv), liblapack), Cvoid,
+            ccall((@blasfunc($posv), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), B, max(1,stride(B,2)), info)
+                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), B, max(1,stride(B,2)), info, 1)
             chkargsok(info[])
             chkposdef(info[])
             A, B
@@ -3057,9 +3073,9 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
                 return A, 0
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($potrf), liblapack), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, size(A,1), A, lda, info)
+            ccall((@blasfunc($potrf), libblastrampoline), Cvoid,
+                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, size(A,1), A, lda, info, 1)
             chkargsok(info[])
             #info[] > 0 means the leading minor of order info[] is not positive definite
             #ordinarily, throw Exception here, but return error code here
@@ -3078,9 +3094,9 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
             chkstride1(A)
             chkuplo(uplo)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($potri), liblapack), Cvoid,
-                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, size(A,1), A, max(1,stride(A,2)), info)
+            ccall((@blasfunc($potri), libblastrampoline), Cvoid,
+                  (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, size(A,1), A, max(1,stride(A,2)), info, 1)
             chkargsok(info[])
             chknonsingular(info[])
             A
@@ -3107,11 +3123,11 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
             end
             ldb = max(1,stride(B,2))
             info = Ref{BlasInt}()
-            ccall((@blasfunc($potrs), liblapack), Cvoid,
+            ccall((@blasfunc($potrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                    Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                    uplo, n, nrhs, A,
-                   lda, B, ldb, info)
+                   lda, B, ldb, info, 1)
             chklapackerror(info[])
             return B
         end
@@ -3132,10 +3148,10 @@ for (posv, potrf, potri, potrs, pstrf, elty, rtyp) in
             rank = Vector{BlasInt}(undef, 1)
             work = Vector{$rtyp}(undef, 2n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($pstrf), liblapack), Cvoid,
+            ccall((@blasfunc($pstrf), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                   Ptr{BlasInt}, Ref{$rtyp}, Ptr{$rtyp}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), piv, rank, tol, work, info)
+                   Ptr{BlasInt}, Ref{$rtyp}, Ptr{$rtyp}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), piv, rank, tol, work, info, 1)
             chkargsok(info[])
             A, piv, rank[1], info[] #Stored in CholeskyPivoted
         end
@@ -3221,7 +3237,7 @@ for (ptsv, pttrf, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($ptsv), liblapack), Cvoid,
+            ccall((@blasfunc($ptsv), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                   n, size(B,2), D, E, B, max(1,stride(B,2)), info)
@@ -3242,7 +3258,7 @@ for (ptsv, pttrf, elty, relty) in
                 throw(DimensionMismatch("E has length $(length(E)), but needs $(n - 1)"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrf), liblapack), Cvoid,
+            ccall((@blasfunc($pttrf), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ptr{$relty}, Ptr{$elty}, Ptr{BlasInt}),
                   n, D, E, info)
             chklapackerror(info[])
@@ -3289,7 +3305,7 @@ for (pttrs, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrs), liblapack), Cvoid,
+            ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
                   (Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
                   n, size(B,2), D, E, B, max(1,stride(B,2)), info)
@@ -3323,10 +3339,10 @@ for (pttrs, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($pttrs), liblapack), Cvoid,
+            ccall((@blasfunc($pttrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$relty}, Ptr{$elty},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), D, E, B, max(1,stride(B,2)), info)
+                   Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), D, E, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -3362,10 +3378,10 @@ for (trtri, trtrs, elty) in
             chkdiag(diag)
             lda = max(1,stride(A, 2))
             info = Ref{BlasInt}()
-            ccall((@blasfunc($trtri), liblapack), Cvoid,
+            ccall((@blasfunc($trtri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}),
-                  uplo, diag, n, A, lda, info)
+                   Ptr{BlasInt}, Clong, Clong),
+                  uplo, diag, n, A, lda, info, 1, 1)
             chklapackerror(info[])
             A
         end
@@ -3388,11 +3404,13 @@ for (trtri, trtrs, elty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)) but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($trtrs), liblapack), Cvoid,
+            ccall((@blasfunc($trtrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                   Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
+                   Clong, Clong, Clong),
                   uplo, trans, diag, n, size(B,2), A, max(1,stride(A,2)),
-                  B, max(1,stride(B,2)), info)
+                  B, max(1,stride(B,2)), info,
+                  1, 1, 1)
             chklapackerror(info[])
             B
         end
@@ -3443,11 +3461,13 @@ for (trcon, trevc, trrfs, elty) in
             work  = Vector{$elty}(undef, 3n)
             iwork = Vector{BlasInt}(undef, n)
             info  = Ref{BlasInt}()
-            ccall((@blasfunc($trcon), liblapack), Cvoid,
+            ccall((@blasfunc($trcon), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                   Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
+                   Clong, Clong, Clong),
                   norm, uplo, diag, n,
-                  A, max(1,stride(A,2)), rcond, work, iwork, info)
+                  A, max(1,stride(A,2)), rcond, work, iwork, info,
+                  1, 1, 1)
             chklapackerror(info[])
             rcond[]
         end
@@ -3482,15 +3502,15 @@ for (trcon, trevc, trrfs, elty) in
             work = Vector{$elty}(undef, 3n)
             info = Ref{BlasInt}()
 
-            ccall((@blasfunc($trevc), liblapack), Cvoid,
+            ccall((@blasfunc($trevc), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ref{BlasInt},Ref{BlasInt}, Ptr{BlasInt},
-                 Ptr{$elty}, Ptr{BlasInt}),
+                 Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
+                 Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
                 side, howmny, select, n,
                 T, ldt, VL, ldvl,
                 VR, ldvr, mm, m,
-                work, info)
+                work, info, 1, 1)
             chklapackerror(info[])
 
             #Decide what exactly to return
@@ -3539,13 +3559,13 @@ for (trcon, trevc, trrfs, elty) in
             work = Vector{$elty}(undef, 3n)
             iwork = Vector{BlasInt}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($trrfs), liblapack), Cvoid,
+            ccall((@blasfunc($trrfs), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                 Ptr{$elty}, Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
                 uplo, trans, diag, n,
                 nrhs, A, max(1,stride(A,2)), B, max(1,stride(B,2)), X, max(1,stride(X,2)),
-                Ferr, Berr, work, iwork, info)
+                Ferr, Berr, work, iwork, info, 1, 1, 1)
             chklapackerror(info[])
             Ferr, Berr
         end
@@ -3574,11 +3594,13 @@ for (trcon, trevc, trrfs, elty, relty) in
             work  = Vector{$elty}(undef, 2n)
             rwork = Vector{$relty}(undef, n)
             info  = Ref{BlasInt}()
-            ccall((@blasfunc($trcon), liblapack), Cvoid,
+            ccall((@blasfunc($trcon), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
-                   Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}),
+                   Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt},
+                   Clong, Clong, Clong),
                   norm, uplo, diag, n,
-                  A, max(1,stride(A,2)), rcond, work, rwork, info)
+                  A, max(1,stride(A,2)), rcond, work, rwork, info,
+                  1, 1, 1)
             chklapackerror(info[])
             rcond[]
         end
@@ -3614,15 +3636,15 @@ for (trcon, trevc, trrfs, elty, relty) in
             work = Vector{$elty}(undef, 2n)
             rwork = Vector{$relty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($trevc), liblapack), Cvoid,
+            ccall((@blasfunc($trevc), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
-                 Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}),
+                 Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
                 side, howmny, select, n,
                 T, ldt, VL, ldvl,
                 VR, ldvr, mm, m,
-                work, rwork, info)
+                work, rwork, info, 1, 1)
             chklapackerror(info[])
 
             #Decide what exactly to return
@@ -3671,13 +3693,13 @@ for (trcon, trevc, trrfs, elty, relty) in
             work  = Vector{$elty}(undef, 2n)
             rwork = Vector{$relty}(undef, n)
             info  = Ref{BlasInt}()
-            ccall((@blasfunc($trrfs), liblapack), Cvoid,
+            ccall((@blasfunc($trrfs), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}),
+                 Ptr{$relty}, Ptr{$relty}, Ptr{$elty}, Ptr{$relty}, Ptr{BlasInt}, Clong, Clong, Clong),
                 uplo, trans, diag, n,
                 nrhs, A, max(1,stride(A,2)), B, max(1,stride(B,2)), X, max(1,stride(X,2)),
-                Ferr, Berr, work, rwork, info)
+                Ferr, Berr, work, rwork, info, 1, 1, 1)
             chklapackerror(info[])
             Ferr, Berr
         end
@@ -3736,16 +3758,16 @@ for (stev, stebz, stegr, stein, elty) in
             require_one_based_indexing(dv, ev)
             chkstride1(dv, ev)
             n = length(dv)
-            if length(ev) != n - 1
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than dv's length, $n)"))
+            if length(ev) != n - 1 && length(ev) != n
+                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than or equal to dv's length, $n)"))
             end
             Zmat = similar(dv, $elty, (n, job != 'N' ? n : 0))
             work = Vector{$elty}(undef, max(1, 2n-2))
             info = Ref{BlasInt}()
-            ccall((@blasfunc($stev), liblapack), Cvoid,
+            ccall((@blasfunc($stev), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
-                   Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  job, n, dv, ev, Zmat, n, work, info)
+                   Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  job, n, dv, ev, Zmat, n, work, info, 1)
             chklapackerror(info[])
             dv, Zmat
         end
@@ -3770,17 +3792,17 @@ for (stev, stebz, stegr, stein, elty) in
             work = Vector{$elty}(undef, 4*n)
             iwork = Vector{BlasInt}(undef, 3*n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($stebz), liblapack), Cvoid,
+            ccall((@blasfunc($stebz), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{$elty},
                 Ref{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ref{$elty},
                 Ptr{$elty}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
                 Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                Ptr{BlasInt}, Ptr{BlasInt}),
+                Ptr{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                 range, order, n, vl,
                 vu, il, iu, abstol,
                 dv, ev, m, nsplit,
                 w, iblock, isplit, work,
-                iwork, info)
+                iwork, info, 1, 1)
             chklapackerror(info[])
             w[1:m[]], iblock[1:m[]], isplit[1:nsplit[1]]
         end
@@ -3789,10 +3811,16 @@ for (stev, stebz, stegr, stein, elty) in
             require_one_based_indexing(dv, ev)
             chkstride1(dv, ev)
             n = length(dv)
-            if length(ev) != n - 1
-                throw(DimensionMismatch("ev has length $(length(ev)) but needs one less than dv's length, $n)"))
+            ne = length(ev)
+            if ne == n - 1
+                eev = [ev; zero($elty)]
+            elseif ne == n
+                eev = copy(ev)
+                eev[n] = zero($elty)
+            else
+                throw(DimensionMismatch("ev has length $ne but needs one less than or equal to dv's length, $n)"))
             end
-            eev = [ev; zero($elty)]
+
             abstol = Vector{$elty}(undef, 1)
             m = Ref{BlasInt}()
             w = similar(dv, $elty, n)
@@ -3805,17 +3833,19 @@ for (stev, stebz, stegr, stein, elty) in
             liwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($stegr), liblapack), Cvoid,
+                ccall((@blasfunc($stegr), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
                     Ptr{$elty}, Ref{$elty}, Ref{$elty}, Ref{BlasInt},
                     Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
-                    Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}),
+                    Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
+                    Clong, Clong),
                     jobz, range, n, dv,
                     eev, vl, vu, il,
                     iu, abstol, m, w,
                     Z, ldz, isuppz, work,
-                    lwork, iwork, liwork, info)
+                    lwork, iwork, liwork, info,
+                    1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -3831,10 +3861,15 @@ for (stev, stebz, stegr, stein, elty) in
             require_one_based_indexing(dv, ev_in, w_in, iblock_in, isplit_in)
             chkstride1(dv, ev_in, w_in, iblock_in, isplit_in)
             n = length(dv)
-            if length(ev_in) != n - 1
-                throw(DimensionMismatch("ev_in has length $(length(ev_in)) but needs one less than dv's length, $n)"))
+            ne = length(ev_in)
+            if ne == n - 1
+                ev = [ev_in; zero($elty)]
+            elseif ne == n
+                ev = copy(ev_in)
+                ev[n] = zero($elty)
+            else
+                throw(DimensionMismatch("ev_in has length $ne but needs one less than or equal to dv's length, $n)"))
             end
-            ev = [ev_in; zeros($elty,1)]
             ldz = n #Leading dimension
             #Number of eigenvalues to find
             if !(1 <= length(w_in) <= n)
@@ -3863,7 +3898,7 @@ for (stev, stebz, stegr, stein, elty) in
             iwork = Vector{BlasInt}(undef, n)
             ifail = Vector{BlasInt}(undef, m)
             info  = Ref{BlasInt}()
-            ccall((@blasfunc($stein), liblapack), Cvoid,
+            ccall((@blasfunc($stein), libblastrampoline), Cvoid,
                 (Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                 Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
@@ -3951,10 +3986,10 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($syconv), liblapack), Cvoid,
+            ccall((@blasfunc($syconv), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
+                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info, 1, 1)
             chklapackerror(info[])
             A, work
         end
@@ -3980,11 +4015,11 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), liblapack), Cvoid,
+                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info)
+                      work, lwork, info,  1)
                 chkargsok(info[])
                 chknonsingular(info[])
                 if i == 1
@@ -4014,10 +4049,10 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), liblapack), Cvoid,
+                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      uplo, n, A, stride(A,2), ipiv, work, lwork, info)
+                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                      uplo, n, A, stride(A,2), ipiv, work, lwork, info, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4042,10 +4077,10 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
 #             lwork = BlasInt(-1)
 #             info  = Ref{BlasInt}()
 #             for i in 1:2
-#                 ccall((@blasfunc($sytri), liblapack), Cvoid,
+#                 ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
 #                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info)
+#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
+#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
 #                 @assertargsok
 #                 chknonsingular(info[])
 #                 if lwork < 0
@@ -4069,10 +4104,10 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), liblapack), Cvoid,
+            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
             chkargsok(info[])
             chknonsingular(info[])
             A
@@ -4096,10 +4131,10 @@ for (syconv, sysv, sytrf, sytri, sytrs, elty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), liblapack), Cvoid,
+            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -4132,11 +4167,11 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), liblapack), Cvoid,
+                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info)
+                      work, lwork, info, 1)
                 chkargsok(info[])
                 chknonsingular(info[])
                 if i == 1
@@ -4166,10 +4201,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), liblapack), Cvoid,
+                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      uplo, n, A, stride(A,2), ipiv, work, lwork, info)
+                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                      uplo, n, A, stride(A,2), ipiv, work, lwork, info, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4192,10 +4227,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), liblapack), Cvoid,
+            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
             chkargsok(info[])
             chknonsingular(info[])
             A
@@ -4219,10 +4254,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), liblapack), Cvoid,
+            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -4259,11 +4294,13 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty) in
             # allocate
             info = Ref{BlasInt}()
 
-            ccall((@blasfunc($syconvf), liblapack), Cvoid,
+            ccall((@blasfunc($syconvf), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
+                 Clong, Clong),
                 uplo, way, n, A,
-                lda, e, ipiv, info)
+                lda, e, ipiv, info,
+                1, 1)
 
             chklapackerror(info[])
             return A, e
@@ -4292,10 +4329,10 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($syconv), liblapack), Cvoid,
+            ccall((@blasfunc($syconv), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong, Clong),
+                  uplo, 'C', n, A, max(1,stride(A,2)), ipiv, work, info, 1, 1)
             chklapackerror(info[])
             A, work
         end
@@ -4321,11 +4358,11 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hesv), liblapack), Cvoid,
+                ccall((@blasfunc($hesv), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info)
+                      work, lwork, info, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4352,10 +4389,10 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i in 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrf), liblapack), Cvoid,
+                ccall((@blasfunc($hetrf), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info)
+                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4381,10 +4418,10 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
 #             lwork = BlasInt(-1)
 #             info  = Ref{BlasInt}()
 #             for i in 1:2
-#                 ccall((@blasfunc($hetri), liblapack), Cvoid,
+#                 ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
 #                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info)
+#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
+#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
 #                 chklapackerror(info[])
 #                 if lwork < 0
 #                     lwork = BlasInt(real(work[1]))
@@ -4409,10 +4446,10 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($hetri), liblapack), Cvoid,
+            ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
             chklapackerror(info[])
             A
         end
@@ -4434,10 +4471,10 @@ for (syconv, hesv, hetrf, hetri, hetrs, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($hetrs), liblapack), Cvoid,
+            ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -4469,11 +4506,11 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hesv), liblapack), Cvoid,
+                ccall((@blasfunc($hesv), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info)
+                      work, lwork, info, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4500,10 +4537,10 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i in 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrf), liblapack), Cvoid,
+                ccall((@blasfunc($hetrf), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info)
+                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4527,10 +4564,10 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($hetri), liblapack), Cvoid,
+            ccall((@blasfunc($hetri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
             chklapackerror(info[])
             A
         end
@@ -4552,10 +4589,10 @@ for (hesv, hetrf, hetri, hetrs, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($hetrs), liblapack), Cvoid,
+            ccall((@blasfunc($hetrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -4588,11 +4625,11 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), liblapack), Cvoid,
+                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info)
+                      work, lwork, info, 1)
                 chkargsok(info[])
                 chknonsingular(info[])
                 if i == 1
@@ -4623,10 +4660,10 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), liblapack), Cvoid,
+                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info)
+                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4652,10 +4689,10 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
 #             lwork = BlasInt(-1)
 #             info  = Ref{BlasInt}()
 #             for i in 1:2
-#                 ccall((@blasfunc($sytri), liblapack), Cvoid,
+#                 ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
 #                       (Ptr{UInt8}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt},
-#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
-#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info)
+#                        Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}, Clong),
+#                       &uplo, &n, A, &max(1,stride(A,2)), ipiv, work, &lwork, info, 1)
 #                 chklapackerror(info[])
 #                 if lwork < 0
 #                     lwork = BlasInt(real(work[1]))
@@ -4679,10 +4716,10 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), liblapack), Cvoid,
+            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
             chklapackerror(info[])
             A
         end
@@ -4705,10 +4742,10 @@ for (sysv, sytrf, sytri, sytrs, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), liblapack), Cvoid,
+            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info,  1)
             chklapackerror(info[])
             B
         end
@@ -4741,11 +4778,11 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sysv), liblapack), Cvoid,
+                ccall((@blasfunc($sysv), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
                       uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)),
-                      work, lwork, info)
+                      work, lwork, info, 1)
                 chkargsok(info[])
                 chknonsingular(info[])
                 if i == 1
@@ -4776,10 +4813,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($sytrf), liblapack), Cvoid,
+                ccall((@blasfunc($sytrf), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info)
+                       Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                      uplo, n, A, max(1,stride(A,2)), ipiv, work, lwork, info, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -4803,10 +4840,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             chkuplo(uplo)
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytri), liblapack), Cvoid,
+            ccall((@blasfunc($sytri), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}),
-                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Clong),
+                  uplo, n, A, max(1,stride(A,2)), ipiv, work, info, 1)
             chklapackerror(info[])
             A
         end
@@ -4829,10 +4866,10 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
                 throw(DimensionMismatch("B has first dimension $(size(B,1)), but needs $n"))
             end
             info = Ref{BlasInt}()
-            ccall((@blasfunc($sytrs), liblapack), Cvoid,
+            ccall((@blasfunc($sytrs), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info)
+                   Ptr{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                  uplo, n, size(B,2), A, max(1,stride(A,2)), ipiv, B, max(1,stride(B,2)), info, 1)
             chklapackerror(info[])
             B
         end
@@ -4871,11 +4908,13 @@ for (sysv, sytrf, sytri, sytrs, syconvf, elty, relty) in
             # allocate
             info = Ref{BlasInt}()
 
-            ccall((@blasfunc($syconvf), liblapack), Cvoid,
+            ccall((@blasfunc($syconvf), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                 Ref{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
+                 Clong, Clong),
                 uplo, way, n, A,
-                max(1, lda), e, ipiv, info)
+                max(1, lda), e, ipiv, info,
+                1, 1)
 
             chklapackerror(info[])
             return A, e
@@ -5002,10 +5041,10 @@ for (syev, syevr, sygvd, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($syev), liblapack), Cvoid,
+                ccall((@blasfunc($syev), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                      jobz, uplo, n, A, max(1,stride(A,2)), W, work, lwork, info)
+                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
+                      jobz, uplo, n, A, max(1,stride(A,2)), W, work, lwork, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5030,6 +5069,7 @@ for (syev, syevr, sygvd, elty) in
                         vl::AbstractFloat, vu::AbstractFloat, il::Integer, iu::Integer, abstol::AbstractFloat)
             chkstride1(A)
             n = checksquare(A)
+            chkuplofinite(A, uplo)
             if range == 'I' && !(1 <= il <= iu <= n)
                 throw(ArgumentError("illegal choice of eigenvalue indices (il = $il, iu = $iu), which must be between 1 and n = $n"))
             end
@@ -5052,19 +5092,19 @@ for (syev, syevr, sygvd, elty) in
             liwork = BlasInt(-1)
             info   = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevr), liblapack), Cvoid,
+                ccall((@blasfunc($syevr), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
                         Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{BlasInt},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                        Ptr{BlasInt}),
+                        Ptr{BlasInt}, Clong, Clong, Clong),
                     jobz, range, uplo, n,
                     A, max(1,lda), vl, vu,
                     il, iu, abstol, m,
                     w, Z, max(1,ldz), isuppz,
                     work, lwork, iwork, liwork,
-                    info)
+                    info, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5103,15 +5143,15 @@ for (syev, syevr, sygvd, elty) in
             liwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($sygvd), liblapack), Cvoid,
+                ccall((@blasfunc($sygvd), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
-                     Ref{BlasInt}, Ptr{BlasInt}),
+                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                     itype, jobz, uplo, n,
                     A, lda, B, ldb,
                     w, work, lwork, iwork,
-                    liwork, info)
+                    liwork, info, 1, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(work[1])
@@ -5147,10 +5187,12 @@ for (syev, syevr, sygvd, elty, relty) in
             rwork = Vector{$relty}(undef, max(1, 3n-2))
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($syev), liblapack), Cvoid,
+                ccall((@blasfunc($syev), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                      Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}),
-                      jobz, uplo, n, A, stride(A,2), W, work, lwork, rwork, info)
+                      Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt},
+                      Clong, Clong),
+                      jobz, uplo, n, A, stride(A,2), W, work, lwork, rwork, info,
+                      1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5202,19 +5244,21 @@ for (syev, syevr, sygvd, elty, relty) in
             liwork = BlasInt(-1)
             info   = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($syevr), liblapack), Cvoid,
+                ccall((@blasfunc($syevr), libblastrampoline), Cvoid,
                       (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
                        Ref{BlasInt}, Ref{BlasInt}, Ref{$elty}, Ptr{BlasInt},
                        Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt},
                        Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ref{BlasInt},
-                       Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}),
+                       Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
+                       Clong, Clong, Clong),
                       jobz, range, uplo, n,
                       A, lda, vl, vu,
                       il, iu, abstol, m,
                       w, Z, ldz, isuppz,
                       work, lwork, rwork, lrwork,
-                      iwork, liwork, info)
+                      iwork, liwork, info,
+                      1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5257,15 +5301,17 @@ for (syev, syevr, sygvd, elty, relty) in
             lrwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1], lrwork as rwork[1] and liwork as iwork[1]
-                ccall((@blasfunc($sygvd), liblapack), Cvoid,
+                ccall((@blasfunc($sygvd), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$relty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$relty},
-                     Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt}),
+                     Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
+                     Clong, Clong),
                     itype, jobz, uplo, n,
                     A, lda, B, ldb,
                     w, work, lwork, rwork,
-                    lrwork, iwork, liwork, info)
+                    lrwork, iwork, liwork, info,
+                    1, 1)
                 chkargsok(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5356,15 +5402,15 @@ for (bdsqr, relty, elty) in
             # Allocate
             work = Vector{$relty}(undef, 4n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($bdsqr), liblapack), Cvoid,
+            ccall((@blasfunc($bdsqr), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                  Ref{BlasInt}, Ptr{$relty}, Ptr{$relty}, Ptr{$elty},
                  Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                 Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}),
+                 Ref{BlasInt}, Ptr{$relty}, Ptr{BlasInt}, Clong),
                 uplo, n, ncvt, nru,
                 ncc, d, e_, Vt,
                 ldvt, U, ldu, C,
-                ldc, work, info)
+                ldc, work, info, 1)
             chklapackerror(info[])
             d, Vt, U, C #singular values in descending order, P**T * VT, U * Q, Q**T * C
         end
@@ -5427,13 +5473,15 @@ for (bdsdc, elty) in
             work  = Vector{$elty}(undef, lwork)
             iwork = Vector{BlasInt}(undef, 8n)
             info  = Ref{BlasInt}()
-            ccall((@blasfunc($bdsdc), liblapack), Cvoid,
+            ccall((@blasfunc($bdsdc), libblastrampoline), Cvoid,
                (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                 Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt}),
+                Ptr{$elty}, Ptr{BlasInt}, Ptr{$elty}, Ptr{BlasInt}, Ptr{BlasInt},
+                Clong, Clong),
                 uplo, compq, n, d, e_,
                 u, ldu, vt, ldvt,
-                q, iq, work, iwork, info)
+                q, iq, work, iwork, info,
+                1, 1)
             chklapackerror(info[])
             d, e_, u, vt, q, iq
         end
@@ -5477,12 +5525,12 @@ for (gecon, elty) in
             work = Vector{$elty}(undef, 4n)
             iwork = Vector{BlasInt}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gecon), liblapack), Cvoid,
+            ccall((@blasfunc($gecon), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                    Ref{$elty}, Ref{$elty}, Ptr{$elty}, Ptr{BlasInt},
-                   Ptr{BlasInt}),
+                   Ptr{BlasInt}, Clong),
                   normtype, n, A, lda, anorm, rcond, work, iwork,
-                  info)
+                  info, 1)
             chklapackerror(info[])
             rcond[]
         end
@@ -5511,12 +5559,12 @@ for (gecon, elty, relty) in
             work = Vector{$elty}(undef, 2n)
             rwork = Vector{$relty}(undef, 2n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($gecon), liblapack), Cvoid,
+            ccall((@blasfunc($gecon), libblastrampoline), Cvoid,
                   (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                    Ref{$relty}, Ref{$relty}, Ptr{$elty}, Ptr{$relty},
-                   Ptr{BlasInt}),
+                   Ptr{BlasInt}, Clong),
                   normtype, n, A, lda, anorm, rcond, work, rwork,
-                  info)
+                  info, 1)
             chklapackerror(info[])
             rcond[]
         end
@@ -5554,7 +5602,7 @@ for (gehrd, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gehrd), liblapack), Cvoid,
+                ccall((@blasfunc($gehrd), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{BlasInt}),
@@ -5605,7 +5653,7 @@ for (orghr, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orghr), liblapack), Cvoid,
+                ccall((@blasfunc($orghr), libblastrampoline), Cvoid,
                     (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{BlasInt}),
@@ -5662,15 +5710,15 @@ for (ormhr, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormhr), liblapack), Cvoid,
+                ccall((@blasfunc($ormhr), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                      Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}),
+                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong),
                     side, trans, mC, nC,
                     ilo, ihi, A, max(1, stride(A, 2)),
                     tau, C, max(1, stride(C, 2)), work,
-                    lwork, info)
+                    lwork, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5708,11 +5756,11 @@ for (hetrd, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($hetrd), liblapack), Cvoid,
+                ccall((@blasfunc($hetrd), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$relty}, Ptr{$relty},
-                    Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}),
-                    uplo, n, A, max(1, stride(A, 2)), d, e, tau, work, lwork, info)
+                    Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Clong),
+                    uplo, n, A, max(1, stride(A, 2)), d, e, tau, work, lwork, info, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5758,13 +5806,13 @@ for (orgtr, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($orgtr), liblapack), Cvoid,
+                ccall((@blasfunc($orgtr), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{BlasInt}, Ptr{$elty},
                      Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ref{BlasInt},
-                     Ptr{BlasInt}),
+                     Ptr{BlasInt}, Clong),
                     uplo, n, A,
                     max(1, stride(A, 2)), tau, work, lwork,
-                    info)
+                    info, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5816,15 +5864,15 @@ for (ormtr, elty) in
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($ormtr), liblapack), Cvoid,
+                ccall((@blasfunc($ormtr), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt},
                      Ptr{$elty}, Ref{BlasInt},
                      Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                     Ref{BlasInt}, Ptr{BlasInt}),
+                     Ref{BlasInt}, Ptr{BlasInt}, Clong, Clong, Clong),
                     side, uplo, trans, mC, nC,
                     A, max(1, stride(A, 2)),
                     tau, C, max(1, stride(C, 2)), work,
-                    lwork, info)
+                    lwork, info, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5861,15 +5909,15 @@ for (gees, gges, elty) in
             lwork = BlasInt(-1)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gees), liblapack), Cvoid,
+                ccall((@blasfunc($gees), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ptr{$elty},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
-                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{BlasInt}),
+                        Ref{BlasInt}, Ptr{Cvoid}, Ptr{BlasInt}, Clong, Clong),
                     jobvs, 'N', C_NULL, n,
                         A, max(1, stride(A, 2)), sdim, wr,
                         wi, vs, ldvs, work,
-                        lwork, C_NULL, info)
+                        lwork, C_NULL, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5898,27 +5946,27 @@ for (gees, gges, elty) in
             alphar = similar(A, $elty, n)
             alphai = similar(A, $elty, n)
             beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? n : 1
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
             vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? n : 1
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
             vsr = similar(A, $elty, ldvsr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges), liblapack), Cvoid,
+                ccall((@blasfunc($gges), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                         Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{Cvoid},
-                        Ptr{BlasInt}),
+                        Ptr{BlasInt}, Clong, Clong, Clong),
                     jobvsl, jobvsr, 'N', C_NULL,
                     n, A, max(1,stride(A, 2)), B,
                     max(1,stride(B, 2)), sdim, alphar, alphai,
                     beta, vsl, ldvsl, vsr,
                     ldvsr, work, lwork, C_NULL,
-                    info)
+                    info, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5956,15 +6004,15 @@ for (gees, gges, elty, relty) in
             rwork = Vector{$relty}(undef, n)
             info  = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gees), liblapack), Cvoid,
+                ccall((@blasfunc($gees), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                        Ptr{$relty}, Ptr{Cvoid}, Ptr{BlasInt}),
+                        Ptr{$relty}, Ptr{Cvoid}, Ptr{BlasInt}, Clong, Clong),
                     jobvs, sort, C_NULL, n,
                         A, max(1, stride(A, 2)), sdim, w,
                         vs, ldvs, work, lwork,
-                        rwork, C_NULL, info)
+                        rwork, C_NULL, info, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -5993,28 +6041,28 @@ for (gees, gges, elty, relty) in
             sdim = BlasInt(0)
             alpha = similar(A, $elty, n)
             beta = similar(A, $elty, n)
-            ldvsl = jobvsl == 'V' ? n : 1
+            ldvsl = jobvsl == 'V' ? max(1, n) : 1
             vsl = similar(A, $elty, ldvsl, n)
-            ldvsr = jobvsr == 'V' ? n : 1
+            ldvsr = jobvsr == 'V' ? max(1, n) : 1
             vsr = similar(A, $elty, ldvsr, n)
             work = Vector{$elty}(undef, 1)
             lwork = BlasInt(-1)
             rwork = Vector{$relty}(undef, 8n)
             info = Ref{BlasInt}()
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($gges), liblapack), Cvoid,
+                ccall((@blasfunc($gges), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ref{UInt8}, Ptr{Cvoid},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                         Ptr{$elty}, Ref{BlasInt}, Ptr{$relty}, Ptr{Cvoid},
-                        Ptr{BlasInt}),
+                        Ptr{BlasInt}, Clong, Clong, Clong),
                     jobvsl, jobvsr, 'N', C_NULL,
                     n, A, max(1, stride(A, 2)), B,
                     max(1, stride(B, 2)), sdim, alpha, beta,
                     vsl, ldvsl, vsr, ldvsr,
                     work, lwork, rwork, C_NULL,
-                    info)
+                    info, 1, 1, 1)
                 chklapackerror(info[])
                 if i == 1
                     lwork = BlasInt(real(work[1]))
@@ -6067,15 +6115,15 @@ for (trexc, trsen, tgsen, elty) in
             ldq = max(1, stride(Q, 2))
             work = Vector{$elty}(undef, n)
             info = Ref{BlasInt}()
-            ccall((@blasfunc($trexc), liblapack), Cvoid,
+            ccall((@blasfunc($trexc), libblastrampoline), Cvoid,
                   (Ref{UInt8},  Ref{BlasInt},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                    Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{$elty}, Ptr{BlasInt}),
+                   Ptr{$elty}, Ptr{BlasInt}, Clong),
                   compq, n,
                   T, ldt, Q, ldq,
                   ifst, ilst,
-                  work, info)
+                  work, info, 1)
             chklapackerror(info[])
             T, Q
         end
@@ -6109,17 +6157,17 @@ for (trexc, trsen, tgsen, elty) in
             s = Ref{$elty}(zero($elty))
             sep = Ref{$elty}(zero($elty))
             for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($trsen), liblapack), Cvoid,
+                ccall((@blasfunc($trsen), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ptr{$elty}, Ref{BlasInt}, Ref{$elty}, Ref{$elty},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{BlasInt}, Ref{BlasInt},
-                    Ptr{BlasInt}),
+                    Ptr{BlasInt}, Clong, Clong),
                     job, compq, select, n,
                     T, ldt, Q, ldq,
                     wr, wi, m, s, sep,
                     work, lwork, iwork, liwork,
-                    info)
+                    info, 1, 1)
                 chklapackerror(info[])
                 if i == 1 # only estimated optimal lwork, liwork
                     lwork  = BlasInt(real(work[1]))
@@ -6174,7 +6222,7 @@ for (trexc, trsen, tgsen, elty) in
             info = Ref{BlasInt}()
             select = convert(Array{BlasInt}, select)
             for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($tgsen), liblapack), Cvoid,
+                ccall((@blasfunc($tgsen), libblastrampoline), Cvoid,
                        (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ptr{$elty}, Ptr{$elty}, Ptr{$elty},
@@ -6218,15 +6266,15 @@ for (trexc, trsen, tgsen, elty, relty) in
             ldt = max(1, stride(T, 2))
             ldq = max(1, stride(Q, 2))
             info = Ref{BlasInt}()
-            ccall((@blasfunc($trexc), liblapack), Cvoid,
+            ccall((@blasfunc($trexc), libblastrampoline), Cvoid,
                   (Ref{UInt8},  Ref{BlasInt},
                    Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                    Ref{BlasInt}, Ref{BlasInt},
-                   Ptr{BlasInt}),
+                   Ptr{BlasInt}, Clong),
                   compq, n,
                   T, ldt, Q, ldq,
                   ifst, ilst,
-                  info)
+                  info,  1)
             chklapackerror(info[])
             T, Q
         end
@@ -6256,17 +6304,17 @@ for (trexc, trsen, tgsen, elty, relty) in
             s = Ref{$relty}(zero($relty))
             sep = Ref{$relty}(zero($relty))
             for i = 1:2  # first call returns lwork as work[1]
-                ccall((@blasfunc($trsen), liblapack), Cvoid,
+                ccall((@blasfunc($trsen), libblastrampoline), Cvoid,
                     (Ref{UInt8}, Ref{UInt8}, Ptr{BlasInt}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
                     Ptr{$elty}, Ref{BlasInt}, Ref{$relty}, Ref{$relty},
                     Ptr{$elty}, Ref{BlasInt},
-                    Ptr{BlasInt}),
+                    Ptr{BlasInt}, Clong, Clong),
                     job, compq, select, n,
                     T, ldt, Q, ldq,
                     w, m, s, sep,
                     work, lwork,
-                    info)
+                    info, 1, 1)
                 chklapackerror(info[])
                 if i == 1 # only estimated optimal lwork, liwork
                     lwork  = BlasInt(real(work[1]))
@@ -6318,7 +6366,7 @@ for (trexc, trsen, tgsen, elty, relty) in
             info = Ref{BlasInt}()
             select = convert(Array{BlasInt}, select)
             for i = 1:2  # first call returns lwork as work[1] and liwork as iwork[1]
-                ccall((@blasfunc($tgsen), liblapack), Cvoid,
+                ccall((@blasfunc($tgsen), libblastrampoline), Cvoid,
                        (Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt}, Ptr{BlasInt},
                         Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty},
                         Ref{BlasInt}, Ptr{$elty}, Ptr{$elty},
@@ -6348,15 +6396,18 @@ end
 
 """
     trexc!(compq, ifst, ilst, T, Q) -> (T, Q)
+    trexc!(ifst, ilst, T, Q) -> (T, Q)
 
-Reorder the Schur factorization of a matrix. If `compq = V`, the Schur
-vectors `Q` are reordered. If `compq = N` they are not modified. `ifst`
-and `ilst` specify the reordering of the vectors.
+Reorder the Schur factorization `T` of a matrix, such that the diagonal block
+of `T` with row index `ifst` is moved to row index `ilst`. If `compq = V`, the Schur
+vectors `Q` are reordered. If `compq = N` they are not modified. The 4-arg method
+calls the 5-arg method with `compq = V`.
 """
 trexc!(compq::AbstractChar, ifst::BlasInt, ilst::BlasInt, T::AbstractMatrix, Q::AbstractMatrix)
 
 """
-    trsen!(compq, job, select, T, Q) -> (T, Q, w, s, sep)
+    trsen!(job, compq, select, T, Q) -> (T, Q, w, s, sep)
+    trsen!(select, T, Q) -> (T, Q, w, s, sep)
 
 Reorder the Schur factorization of a matrix and optionally finds reciprocal
 condition numbers. If `job = N`, no condition numbers are found. If `job = E`,
@@ -6365,7 +6416,8 @@ only the condition number for this cluster of eigenvalues is found. If
 If `job = B` then the condition numbers for the cluster and subspace are
 found. If `compq = V` the Schur vectors `Q` are updated. If `compq = N`
 the Schur vectors are not modified. `select` determines which
-eigenvalues are in the cluster.
+eigenvalues are in the cluster. The 3-arg method calls the 5-arg method
+with `job = N` and `compq = V`.
 
 Returns `T`, `Q`, reordered eigenvalues in `w`, the condition number of the
 cluster of eigenvalues `s`, and the condition number of the invariant subspace
@@ -6390,7 +6442,7 @@ for (fn, elty, relty) in ((:dtrsyl_, :Float64, :Float64),
                         B::AbstractMatrix{$elty}, C::AbstractMatrix{$elty}, isgn::Int=1)
             require_one_based_indexing(A, B, C)
             chkstride1(A, B, C)
-            m, n = checksquare(A, B)
+            m, n = checksquare(A), checksquare(B)
             lda = max(1, stride(A, 2))
             ldb = max(1, stride(B, 2))
             m1, n1 = size(C)
@@ -6398,17 +6450,17 @@ for (fn, elty, relty) in ((:dtrsyl_, :Float64, :Float64),
                 throw(DimensionMismatch("dimensions of A, ($m,$n), and C, ($m1,$n1), must match"))
             end
             ldc = max(1, stride(C, 2))
-            scale = Vector{$relty}(undef, 1)
+            scale = Ref{$relty}()
             info  = Ref{BlasInt}()
-            ccall((@blasfunc($fn), liblapack), Cvoid,
+            ccall((@blasfunc($fn), libblastrampoline), Cvoid,
                 (Ref{UInt8}, Ref{UInt8}, Ref{BlasInt}, Ref{BlasInt}, Ref{BlasInt},
                  Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt}, Ptr{$elty}, Ref{BlasInt},
-                 Ptr{$relty}, Ptr{BlasInt}),
+                 Ptr{$relty}, Ptr{BlasInt}, Clong, Clong),
                 transa, transb, isgn, m, n,
                 A, lda, B, ldb, C, ldc,
-                scale, info)
+                scale, info, 1, 1)
             chklapackerror(info[])
-            C, scale[1]
+            C, scale[]
         end
     end
 end
diff --git a/stdlib/LinearAlgebra/src/lbt.jl b/stdlib/LinearAlgebra/src/lbt.jl
new file mode 100644
index 00000000000000..7648157a01a7d4
--- /dev/null
+++ b/stdlib/LinearAlgebra/src/lbt.jl
@@ -0,0 +1,307 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## This file contains libblastrampoline-specific APIs
+
+# Keep these in sync with `src/libblastrampoline_internal.h`
+struct lbt_library_info_t
+    libname::Cstring
+    handle::Ptr{Cvoid}
+    suffix::Cstring
+    active_forwards::Ptr{UInt8}
+    interface::Int32
+    complex_retstyle::Int32
+    f2c::Int32
+    cblas::Int32
+end
+const LBT_INTERFACE_LP64    = 32
+const LBT_INTERFACE_ILP64   = 64
+const LBT_INTERFACE_UNKNOWN = -1
+const LBT_INTERFACE_MAP = Dict(
+    LBT_INTERFACE_LP64    => :lp64,
+    LBT_INTERFACE_ILP64   => :ilp64,
+    LBT_INTERFACE_UNKNOWN => :unknown,
+)
+const LBT_INV_INTERFACE_MAP = Dict(v => k for (k, v) in LBT_INTERFACE_MAP)
+
+const LBT_F2C_PLAIN         =  0
+const LBT_F2C_REQUIRED      =  1
+const LBT_F2C_UNKNOWN       = -1
+const LBT_F2C_MAP = Dict(
+    LBT_F2C_PLAIN    => :plain,
+    LBT_F2C_REQUIRED => :required,
+    LBT_F2C_UNKNOWN  => :unknown,
+)
+const LBT_INV_F2C_MAP = Dict(v => k for (k, v) in LBT_F2C_MAP)
+
+const LBT_COMPLEX_RETSTYLE_NORMAL   =  0
+const LBT_COMPLEX_RETSTYLE_ARGUMENT =  1
+const LBT_COMPLEX_RETSTYLE_UNKNOWN  = -1
+const LBT_COMPLEX_RETSTYLE_MAP = Dict(
+    LBT_COMPLEX_RETSTYLE_NORMAL   => :normal,
+    LBT_COMPLEX_RETSTYLE_ARGUMENT => :argument,
+    LBT_COMPLEX_RETSTYLE_UNKNOWN  => :unknown,
+)
+const LBT_INV_COMPLEX_RETSTYLE_MAP = Dict(v => k for (k, v) in LBT_COMPLEX_RETSTYLE_MAP)
+
+const LBT_CBLAS_CONFORMANT =  0
+const LBT_CBLAS_DIVERGENT  =  1
+const LBT_CBLAS_UNKNOWN    = -1
+const LBT_CBLAS_MAP = Dict(
+    LBT_CBLAS_CONFORMANT => :conformant,
+    LBT_CBLAS_DIVERGENT  => :divergent,
+    LBT_CBLAS_UNKNOWN    => :unknown,
+)
+const LBT_INV_CBLAS_MAP = Dict(v => k for (k, v) in LBT_CBLAS_MAP)
+
+struct LBTLibraryInfo
+    libname::String
+    handle::Ptr{Cvoid}
+    suffix::String
+    active_forwards::Vector{UInt8}
+    interface::Symbol
+    complex_retstyle::Symbol
+    f2c::Symbol
+    cblas::Symbol
+
+    function LBTLibraryInfo(lib_info::lbt_library_info_t, num_exported_symbols::UInt32)
+        return new(
+            unsafe_string(lib_info.libname),
+            lib_info.handle,
+            unsafe_string(lib_info.suffix),
+            unsafe_wrap(Vector{UInt8}, lib_info.active_forwards, div(num_exported_symbols,8)+1),
+            LBT_INTERFACE_MAP[lib_info.interface],
+            LBT_COMPLEX_RETSTYLE_MAP[lib_info.complex_retstyle],
+            LBT_F2C_MAP[lib_info.f2c],
+            LBT_CBLAS_MAP[lib_info.cblas],
+        )
+    end
+end
+
+struct lbt_config_t
+    loaded_libs::Ptr{Ptr{lbt_library_info_t}}
+    build_flags::UInt32
+    exported_symbols::Ptr{Cstring}
+    num_exported_symbols::UInt32
+end
+const LBT_BUILDFLAGS_DEEPBINDLESS = 0x01
+const LBT_BUILDFLAGS_F2C_CAPABLE  = 0x02
+const LBT_BUILDFLAGS_MAP = Dict(
+    LBT_BUILDFLAGS_DEEPBINDLESS => :deepbindless,
+    LBT_BUILDFLAGS_F2C_CAPABLE => :f2c_capable,
+)
+
+struct LBTConfig
+    loaded_libs::Vector{LBTLibraryInfo}
+    build_flags::Vector{Symbol}
+    exported_symbols::Vector{String}
+
+    function LBTConfig(config::lbt_config_t)
+        # Decode OR'ed flags into a list of names
+        build_flag_names = Symbol[]
+        for (flag, name) in LBT_BUILDFLAGS_MAP
+            if config.build_flags & flag != 0x00
+                push!(build_flag_names, name)
+            end
+        end
+
+        # Load all exported symbol names
+        exported_symbols = String[]
+        for sym_idx in 1:config.num_exported_symbols
+            str_ptr = unsafe_load(config.exported_symbols, sym_idx)
+            if str_ptr != C_NULL
+                push!(exported_symbols, unsafe_string(str_ptr))
+            else
+                @error("NULL string in lbt_config.exported_symbols[$(sym_idx)]")
+            end
+        end
+
+        # Unpack library info structures
+        libs = LBTLibraryInfo[]
+        idx = 1
+        lib_ptr = unsafe_load(config.loaded_libs, idx)
+        while lib_ptr != C_NULL
+            push!(libs, LBTLibraryInfo(unsafe_load(lib_ptr), config.num_exported_symbols))
+
+            idx += 1
+            lib_ptr = unsafe_load(config.loaded_libs, idx)
+        end
+        return new(
+            libs,
+            build_flag_names,
+            exported_symbols,
+        )
+    end
+end
+
+Base.show(io::IO, lbt::LBTLibraryInfo) = print(io, "LBTLibraryInfo(", basename(lbt.libname), ", ", lbt.interface, ")")
+function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTLibraryInfo)
+    summary(io, lbt); println(io)
+    println(io, "├ Library: ", basename(lbt.libname))
+    println(io, "├ Interface: ", lbt.interface)
+    println(io, "├ Complex return style: ", lbt.complex_retstyle)
+    println(io, "├ F2C: ", lbt.f2c)
+      print(io, "└ CBLAS: ", lbt.cblas)
+end
+
+function Base.show(io::IO, lbt::LBTConfig)
+    if length(lbt.loaded_libs) <= 3
+        print(io, "LBTConfig(")
+        gen = (string("[", uppercase(string(l.interface)), "] ",
+            basename(l.libname)) for l in lbt.loaded_libs)
+        print(io, join(gen, ", "))
+        print(io, ")")
+    else
+        print(io, "LBTConfig(...)")
+    end
+end
+function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, lbt::LBTConfig)
+    summary(io, lbt); println(io)
+    println(io, "Libraries: ")
+    for (i,l) in enumerate(lbt.loaded_libs)
+        char = i == length(lbt.loaded_libs) ? "└" : "├"
+        interface_str = if l.interface == :ilp64
+            "ILP64"
+        elseif l.interface == :lp64
+            " LP64"
+        else
+            "UNKWN"
+        end
+        print(io, char, " [", interface_str,"] ", basename(l.libname))
+        i !== length(lbt.loaded_libs) && println()
+    end
+end
+
+mutable struct ConfigCache
+    @atomic config::Union{Nothing,LBTConfig}
+    lock::ReentrantLock
+end
+
+# In the event that users want to call `lbt_get_config()` multiple times (e.g. for
+# runtime checks of which BLAS vendor is providing a symbol), let's cache the value
+# and clear it only when someone calls something that would cause it to change.
+const _CACHED_CONFIG = ConfigCache(nothing, ReentrantLock())
+
+function lbt_get_config()
+    config = @atomic :acquire _CACHED_CONFIG.config
+    config === nothing || return config
+    return lock(_CACHED_CONFIG.lock) do
+        local config = @atomic :monotonic _CACHED_CONFIG.config
+        config === nothing || return config
+        config_ptr = ccall((:lbt_get_config, libblastrampoline), Ptr{lbt_config_t}, ())
+        @atomic :release _CACHED_CONFIG.config = LBTConfig(unsafe_load(config_ptr))
+    end
+end
+
+function _clear_config_with(f)
+    lock(_CACHED_CONFIG.lock) do
+        @atomic :release _CACHED_CONFIG.config = nothing
+        f()
+    end
+end
+
+function lbt_get_num_threads()
+    return ccall((:lbt_get_num_threads, libblastrampoline), Int32, ())
+end
+
+function lbt_set_num_threads(nthreads)
+    return ccall((:lbt_set_num_threads, libblastrampoline), Cvoid, (Int32,), nthreads)
+end
+
+function lbt_forward(path; clear::Bool = false, verbose::Bool = false, suffix_hint::Union{String,Nothing} = nothing)
+    _clear_config_with() do
+        return ccall((:lbt_forward, libblastrampoline), Int32, (Cstring, Int32, Int32, Cstring), path, clear ? 1 : 0, verbose ? 1 : 0, something(suffix_hint, C_NULL))
+    end
+end
+
+function lbt_set_default_func(addr)
+    _clear_config_with() do
+        return ccall((:lbt_set_default_func, libblastrampoline), Cvoid, (Ptr{Cvoid},), addr)
+    end
+end
+
+function lbt_get_default_func()
+    return ccall((:lbt_get_default_func, libblastrampoline), Ptr{Cvoid}, ())
+end
+
+"""
+    lbt_find_backing_library(symbol_name, interface; config::LBTConfig = lbt_get_config())
+
+Return the `LBTLibraryInfo` that represents the backing library for the given symbol
+exported from libblastrampoline.  This allows us to discover which library will service
+a particular BLAS call from Julia code.  This method returns `nothing` if either of the
+following conditions are met:
+
+ * No loaded library exports the desired symbol (the default function will be called)
+ * The symbol was set via `lbt_set_forward()`, which does not track library provenance.
+
+If the given `symbol_name` is not contained within the list of exported symbols, an
+`ArgumentError` will be thrown.
+"""
+function lbt_find_backing_library(symbol_name, interface::Symbol;
+                                  config::LBTConfig = lbt_get_config())
+    if interface ∉ (:ilp64, :lp64)
+        throw(Argument("Invalid interface specification: '$(interface)'"))
+    end
+    symbol_idx = findfirst(s -> s == symbol_name, config.exported_symbols)
+    if symbol_idx === nothing
+        throw(ArgumentError("Invalid exported symbol name '$(symbol_name)'"))
+    end
+    # Convert to zero-indexed
+    symbol_idx -= 1
+
+    forward_byte_offset = div(symbol_idx, 8)
+    forward_byte_mask = 1 << mod(symbol_idx, 8)
+    for lib in filter(l -> l.interface == interface, config.loaded_libs)
+        if lib.active_forwards[forward_byte_offset+1] & forward_byte_mask != 0x00
+            return lib
+        end
+    end
+
+    # No backing library was found
+    return nothing
+end
+
+
+## NOTE: Manually setting forwards is referred to as the 'footgun API'.  It allows truly
+## bizarre and complex setups to be created.  If you run into strange errors while using
+## it, the first thing you should ask yourself is whether you've set things up properly.
+function lbt_set_forward(symbol_name, addr, interface,
+                         complex_retstyle = LBT_COMPLEX_RETSTYLE_NORMAL,
+                         f2c = LBT_F2C_PLAIN; verbose::Bool = false)
+    _clear_config_with() do
+        return ccall(
+            (:lbt_set_forward, libblastrampoline),
+            Int32,
+            (Cstring, Ptr{Cvoid}, Int32, Int32, Int32, Int32),
+            string(symbol_name),
+            addr,
+            Int32(interface),
+            Int32(complex_retstyle),
+            Int32(f2c),
+            verbose ? Int32(1) : Int32(0),
+        )
+    end
+end
+function lbt_set_forward(symbol_name, addr, interface::Symbol,
+                         complex_retstyle::Symbol = :normal,
+                         f2c::Symbol = :plain; kwargs...)
+    return lbt_set_forward(symbol_name, addr,
+                           LBT_INV_INTERFACE_MAP[interface],
+                           LBT_INV_COMPLEX_RETSTYLE_MAP[complex_retstyle],
+                           LBT_INV_F2C_MAP[f2c];
+                           kwargs...)
+end
+
+function lbt_get_forward(symbol_name, interface, f2c = LBT_F2C_PLAIN)
+    return ccall(
+        (:lbt_get_forward, libblastrampoline),
+        Ptr{Cvoid},
+        (Cstring, Int32, Int32),
+        string(symbol_name),
+        Int32(interface),
+        Int32(f2c),
+    )
+end
+function lbt_get_forward(symbol_name, interface::Symbol, f2c::Symbol = :plain)
+    return lbt_get_forward(symbol_name, LBT_INV_INTERFACE_MAP[interface], LBT_INV_F2C_MAP[f2c])
+end
diff --git a/stdlib/LinearAlgebra/src/ldlt.jl b/stdlib/LinearAlgebra/src/ldlt.jl
index 8cef5408ecfa32..8c6bfee435186c 100644
--- a/stdlib/LinearAlgebra/src/ldlt.jl
+++ b/stdlib/LinearAlgebra/src/ldlt.jl
@@ -77,6 +77,9 @@ function getproperty(F::LDLt, d::Symbol)
     end
 end
 
+adjoint(F::LDLt{<:Real,<:SymTridiagonal}) = F
+adjoint(F::LDLt) = LDLt(copy(adjoint(F.data)))
+
 function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LDLt)
     summary(io, F); println(io)
     println(io, "L factor:")
@@ -115,7 +118,8 @@ function ldlt!(S::SymTridiagonal{T,V}) where {T,V}
     n = size(S,1)
     d = S.dv
     e = S.ev
-    @inbounds @simd for i = 1:n-1
+    @inbounds for i in 1:n-1
+        iszero(d[i]) && throw(ZeroPivotException(i))
         e[i] /= d[i]
         d[i+1] -= e[i]^2*d[i]
     end
@@ -125,10 +129,12 @@ end
 """
     ldlt(S::SymTridiagonal) -> LDLt
 
-Compute an `LDLt` factorization of the real symmetric tridiagonal matrix `S` such that `S = L*Diagonal(d)*L'`
+Compute an `LDLt` (i.e., ``LDL^T``) factorization of the real symmetric tridiagonal matrix `S` such that `S = L*Diagonal(d)*L'`
 where `L` is a unit lower triangular matrix and `d` is a vector. The main use of an `LDLt`
 factorization `F = ldlt(S)` is to solve the linear system of equations `Sx = b` with `F\\b`.
 
+See also [`bunchkaufman`](@ref) for a similar, but pivoted, factorization of arbitrary symmetric or Hermitian matrices.
+
 # Examples
 ```jldoctest
 julia> S = SymTridiagonal([3., 4., 5.], [1., 2.])
@@ -156,7 +162,7 @@ julia> S \\ b
 """
 function ldlt(M::SymTridiagonal{T}; shift::Number=false) where T
     S = typeof((zero(T)+shift)/one(T))
-    Mₛ = SymTridiagonal{S}(copy_oftype(M.dv, S), copy_oftype(M.ev, S))
+    Mₛ = SymTridiagonal{S}(copymutable_oftype(M.dv, S), copymutable_oftype(M.ev, S))
     if !iszero(shift)
         Mₛ.dv .+= shift
     end
diff --git a/stdlib/LinearAlgebra/src/lq.jl b/stdlib/LinearAlgebra/src/lq.jl
index 21ba6a27ac5805..80933cf3c6f469 100644
--- a/stdlib/LinearAlgebra/src/lq.jl
+++ b/stdlib/LinearAlgebra/src/lq.jl
@@ -22,9 +22,15 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> S = lq(A)
-LQ{Float64, Matrix{Float64}} with factors L and Q:
-[-8.60233 0.0; 4.41741 -0.697486]
-[-0.581238 -0.813733; -0.813733 0.581238]
+LQ{Float64, Matrix{Float64}, Vector{Float64}}
+L factor:
+2×2 Matrix{Float64}:
+ -8.60233   0.0
+  4.41741  -0.697486
+Q factor:
+2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
+ -0.581238  -0.813733
+ -0.813733   0.581238
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -37,31 +43,32 @@ julia> l == S.L &&  q == S.Q
 true
 ```
 """
-struct LQ{T,S<:AbstractMatrix{T}} <: Factorization{T}
+struct LQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
     factors::S
-    τ::Vector{T}
+    τ::C
 
-    function LQ{T,S}(factors, τ) where {T,S<:AbstractMatrix{T}}
+    function LQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
         require_one_based_indexing(factors)
-        new{T,S}(factors, τ)
+        new{T,S,C}(factors, τ)
     end
 end
-LQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = LQ{T,typeof(factors)}(factors, τ)
-function LQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T}
-    LQ(convert(AbstractMatrix{T}, factors), convert(Vector{T}, τ))
-end
+LQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
+    LQ{T,typeof(factors),typeof(τ)}(factors, τ)
+LQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
+    LQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(LQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
+           LQ{T,S,typeof(τ)}(factors, τ))
 
 # iteration for destructuring into components
 Base.iterate(S::LQ) = (S.L, Val(:Q))
 Base.iterate(S::LQ, ::Val{:Q}) = (S.Q, Val(:done))
 Base.iterate(S::LQ, ::Val{:done}) = nothing
 
-struct LQPackedQ{T,S<:AbstractMatrix} <: AbstractMatrix{T}
-    factors::Matrix{T}
-    τ::Vector{T}
-    LQPackedQ{T,S}(factors::AbstractMatrix{T}, τ::Vector{T}) where {T,S<:AbstractMatrix} = new(factors, τ)
+struct LQPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractMatrix{T}
+    factors::S
+    τ::C
 end
-LQPackedQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = LQPackedQ{T,typeof(factors)}(factors, τ)
 
 
 """
@@ -92,9 +99,15 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> S = lq(A)
-LQ{Float64, Matrix{Float64}} with factors L and Q:
-[-8.60233 0.0; 4.41741 -0.697486]
-[-0.581238 -0.813733; -0.813733 0.581238]
+LQ{Float64, Matrix{Float64}, Vector{Float64}}
+L factor:
+2×2 Matrix{Float64}:
+ -8.60233   0.0
+  4.41741  -0.697486
+Q factor:
+2×2 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
+ -0.581238  -0.813733
+ -0.813733   0.581238
 
 julia> S.L * S.Q
 2×2 Matrix{Float64}:
@@ -107,14 +120,16 @@ julia> l == S.L &&  q == S.Q
 true
 ```
 """
-lq(A::StridedMatrix{<:BlasFloat})  = lq!(copy(A))
-lq(x::Number) = lq(fill(x,1,1))
+lq(A::AbstractMatrix{T}) where {T}  = lq!(copymutable_oftype(A, lq_eltype(T)))
+lq(x::Number) = lq!(fill(convert(lq_eltype(typeof(x)), x), 1, 1))
+
+lq_eltype(::Type{T}) where {T} = typeof(zero(T) / sqrt(abs2(one(T))))
 
 copy(A::LQ) = LQ(copy(A.factors), copy(A.τ))
 
 LQ{T}(A::LQ) where {T} = LQ(convert(AbstractMatrix{T}, A.factors), convert(Vector{T}, A.τ))
-Factorization{T}(A::LQ{T}) where {T} = A
 Factorization{T}(A::LQ) where {T} = LQ{T}(A)
+
 AbstractMatrix(A::LQ) = A.L*A.Q
 AbstractArray(A::LQ) = AbstractMatrix(A)
 Matrix(A::LQ) = Array(AbstractArray(A))
@@ -122,7 +137,7 @@ Array(A::LQ) = Matrix(A)
 
 adjoint(A::LQ) = Adjoint(A)
 Base.copy(F::Adjoint{T,<:LQ{T}}) where {T} =
-    QR{T,typeof(F.parent.factors)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
+    QR{T,typeof(F.parent.factors),typeof(F.parent.τ)}(copy(adjoint(F.parent.factors)), copy(F.parent.τ))
 
 function getproperty(F::LQ, d::Symbol)
     m, n = size(F)
@@ -141,12 +156,12 @@ Base.propertynames(F::LQ, private::Bool=false) =
 getindex(A::LQPackedQ, i::Integer, j::Integer) =
     lmul!(A, setindex!(zeros(eltype(A), size(A, 2)), 1, j))[i]
 
-function show(io::IO, ::MIME"text/plain", C::LQ)
-    println(io, typeof(C), " with factors L and Q:")
-    io = IOContext(io, :compact => true)
-    show(io, C.L)
-    println(io)
-    show(io, C.Q)
+function show(io::IO, mime::MIME{Symbol("text/plain")}, F::LQ)
+    summary(io, F); println(io)
+    println(io, "L factor:")
+    show(io, mime, F.L)
+    println(io, "\nQ factor:")
+    show(io, mime, F.Q)
 end
 
 LQPackedQ{T}(Q::LQPackedQ) where {T} = LQPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
@@ -182,7 +197,7 @@ function lmul!(A::LQ, B::StridedVecOrMat)
 end
 function *(A::LQ{TA}, B::StridedVecOrMat{TB}) where {TA,TB}
     TAB = promote_type(TA, TB)
-    _cut_B(lmul!(Factorization{TAB}(A), copy_oftype(B, TAB)), 1:size(A,1))
+    _cut_B(lmul!(convert(Factorization{TAB}, A), copymutable_oftype(B, TAB)), 1:size(A,1))
 end
 
 ## Multiplication by Q
@@ -190,20 +205,20 @@ end
 lmul!(A::LQPackedQ{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} = LAPACK.ormlq!('L','N',A.factors,A.τ,B)
 function (*)(A::LQPackedQ, B::StridedVecOrMat)
     TAB = promote_type(eltype(A), eltype(B))
-    lmul!(AbstractMatrix{TAB}(A), copy_oftype(B, TAB))
+    lmul!(AbstractMatrix{TAB}(A), copymutable_oftype(B, TAB))
 end
 
 ### QcB
 lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-    (A = adjA.parent; LAPACK.ormlq!('L','T',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormlq!('L', 'T', A.factors, A.τ, B))
 lmul!(adjA::Adjoint{<:Any,<:LQPackedQ{T}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-    (A = adjA.parent; LAPACK.ormlq!('L','C',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormlq!('L', 'C', A.factors, A.τ, B))
 
 function *(adjA::Adjoint{<:Any,<:LQPackedQ}, B::StridedVecOrMat)
     A = adjA.parent
     TAB = promote_type(eltype(A), eltype(B))
     if size(B,1) == size(A.factors,2)
-        lmul!(adjoint(AbstractMatrix{TAB}(A)), copy_oftype(B, TAB))
+        lmul!(adjoint(AbstractMatrix{TAB}(A)), copymutable_oftype(B, TAB))
     elseif size(B,1) == size(A.factors,1)
         lmul!(adjoint(AbstractMatrix{TAB}(A)), [B; zeros(TAB, size(A.factors, 2) - size(A.factors, 1), size(B, 2))])
     else
@@ -220,11 +235,11 @@ function *(A::LQPackedQ, adjB::Adjoint{<:Any,<:StridedVecOrMat})
     return lmul!(A, BB)
 end
 function *(adjA::Adjoint{<:Any,<:LQPackedQ}, adjB::Adjoint{<:Any,<:StridedVecOrMat})
-    A, B = adjA.parent, adjB.parent
-    TAB = promote_type(eltype(A), eltype(B))
+    B = adjB.parent
+    TAB = promote_type(eltype(adjA.parent), eltype(B))
     BB = similar(B, TAB, (size(B, 2), size(B, 1)))
     adjoint!(BB, B)
-    return lmul!(adjoint(A), BB)
+    return lmul!(adjA, BB)
 end
 
 # in-place right-application of LQPackedQs
@@ -254,7 +269,7 @@ rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:LQPackedQ{T}}) where {T<:BlasCo
 function *(A::StridedVecOrMat, adjQ::Adjoint{<:Any,<:LQPackedQ})
     Q = adjQ.parent
     TR = promote_type(eltype(A), eltype(Q))
-    return rmul!(copy_oftype(A, TR), adjoint(AbstractMatrix{TR}(Q)))
+    return rmul!(copymutable_oftype(A, TR), adjoint(AbstractMatrix{TR}(Q)))
 end
 function *(adjA::Adjoint{<:Any,<:StridedMatrix}, adjQ::Adjoint{<:Any,<:LQPackedQ})
     A, Q = adjA.parent, adjQ.parent
@@ -278,7 +293,7 @@ end
 function *(A::StridedVecOrMat, Q::LQPackedQ)
     TR = promote_type(eltype(A), eltype(Q))
     if size(A, 2) == size(Q.factors, 2)
-        C = copy_oftype(A, TR)
+        C = copymutable_oftype(A, TR)
     elseif size(A, 2) == size(Q.factors, 1)
         C = zeros(TR, size(A, 1), size(Q.factors, 2))
         copyto!(C, 1, A, 1, length(A))
@@ -306,17 +321,6 @@ _rightappdimmismatch(rowsorcols) =
         "or (2) the number of rows of that (LQPackedQ) matrix's internal representation ",
         "(the factorization's originating matrix's number of rows)")))
 
-
-function (\)(A::LQ{TA},B::StridedVecOrMat{TB}) where {TA,TB}
-    S = promote_type(TA,TB)
-    m, n = size(A)
-    m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
-    m == size(B,1) || throw(DimensionMismatch("Both inputs should have the same number of rows"))
-    AA = Factorization{S}(A)
-    X = _zeros(S, B, n)
-    X[1:size(B, 1), :] = B
-    return ldiv!(AA, X)
-end
 # With a real lhs and complex rhs with the same precision, we can reinterpret
 # the complex rhs as a real rhs with twice the number of columns
 function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
@@ -330,12 +334,25 @@ function (\)(F::LQ{T}, B::VecOrMat{Complex{T}}) where T<:BlasReal
 end
 
 
-function ldiv!(A::LQ{T}, B::StridedVecOrMat{T}) where T
+function ldiv!(A::LQ, B::StridedVecOrMat)
     require_one_based_indexing(B)
+    m, n = size(A)
+    m ≤ n || throw(DimensionMismatch("LQ solver does not support overdetermined systems (more rows than columns)"))
+
     ldiv!(LowerTriangular(A.L), view(B, 1:size(A,1), axes(B,2)))
     return lmul!(adjoint(A.Q), B)
 end
 
+function ldiv!(Fadj::Adjoint{<:Any,<:LQ}, B::StridedVecOrMat)
+    require_one_based_indexing(B)
+    m, n = size(Fadj)
+    m >= n || throw(DimensionMismatch("solver does not support underdetermined systems (more columns than rows)"))
+
+    F = parent(Fadj)
+    lmul!(F.Q, B)
+    ldiv!(UpperTriangular(adjoint(F.L)), view(B, 1:size(F,1), axes(B,2)))
+    return B
+end
 
 # In LQ factorization, `Q` is expressed as the product of the adjoint of the
 # reflectors.  Thus, `det` has to be conjugated.
diff --git a/stdlib/LinearAlgebra/src/lu.jl b/stdlib/LinearAlgebra/src/lu.jl
index 853b4a3c12b2c6..72bfb54ad07351 100644
--- a/stdlib/LinearAlgebra/src/lu.jl
+++ b/stdlib/LinearAlgebra/src/lu.jl
@@ -28,7 +28,7 @@ julia> A = [4 3; 6 3]
  6  3
 
 julia> F = lu(A)
-LU{Float64, Matrix{Float64}}
+LU{Float64, Matrix{Float64}, Vector{Int64}}
 L factor:
 2×2 Matrix{Float64}:
  1.0       0.0
@@ -47,24 +47,24 @@ julia> l == F.L && u == F.U && p == F.p
 true
 ```
 """
-struct LU{T,S<:AbstractMatrix{T}} <: Factorization{T}
+struct LU{T,S<:AbstractMatrix{T},P<:AbstractVector{<:Integer}} <: Factorization{T}
     factors::S
-    ipiv::Vector{BlasInt}
+    ipiv::P
     info::BlasInt
 
-    function LU{T,S}(factors, ipiv, info) where {T,S<:AbstractMatrix{T}}
+    function LU{T,S,P}(factors, ipiv, info) where {T, S<:AbstractMatrix{T}, P<:AbstractVector{<:Integer}}
         require_one_based_indexing(factors)
-        new{T,S}(factors, ipiv, info)
+        new{T,S,P}(factors, ipiv, info)
     end
 end
-function LU(factors::AbstractMatrix{T}, ipiv::Vector{BlasInt}, info::BlasInt) where {T}
-    LU{T,typeof(factors)}(factors, ipiv, info)
-end
-function LU{T}(factors::AbstractMatrix, ipiv::AbstractVector{<:Integer}, info::Integer) where {T}
-    LU(convert(AbstractMatrix{T}, factors),
-       convert(Vector{BlasInt}, ipiv),
-       BlasInt(info))
-end
+LU(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer}, info::BlasInt) where {T} =
+    LU{T,typeof(factors),typeof(ipiv)}(factors, ipiv, info)
+LU{T}(factors::AbstractMatrix, ipiv::AbstractVector{<:Integer}, info::Integer) where {T} =
+    LU(convert(AbstractMatrix{T}, factors), ipiv, BlasInt(info))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(LU{T,S}(factors::AbstractMatrix{T}, ipiv::AbstractVector{<:Integer},
+                   info::BlasInt) where {T,S},
+           LU{T,S,typeof(ipiv)}(factors, ipiv, info))
 
 # iteration for destructuring into components
 Base.iterate(S::LU) = (S.L, Val(:U))
@@ -76,22 +76,27 @@ adjoint(F::LU) = Adjoint(F)
 transpose(F::LU) = Transpose(F)
 
 # StridedMatrix
-function lu!(A::StridedMatrix{T}, pivot::Union{Val{false}, Val{true}} = Val(true);
-             check::Bool = true) where T<:BlasFloat
-    if pivot === Val(false)
-        return generic_lufact!(A, pivot; check = check)
-    end
+lu!(A::StridedMatrix{<:BlasFloat}; check::Bool = true) = lu!(A, RowMaximum(); check=check)
+function lu!(A::StridedMatrix{T}, ::RowMaximum; check::Bool = true) where {T<:BlasFloat}
     lpt = LAPACK.getrf!(A)
     check && checknonsingular(lpt[3])
-    return LU{T,typeof(A)}(lpt[1], lpt[2], lpt[3])
+    return LU{T,typeof(lpt[1]),typeof(lpt[2])}(lpt[1], lpt[2], lpt[3])
 end
-function lu!(A::HermOrSym, pivot::Union{Val{false}, Val{true}} = Val(true); check::Bool = true)
+function lu!(A::StridedMatrix{<:BlasFloat}, pivot::NoPivot; check::Bool = true)
+    return generic_lufact!(A, pivot; check = check)
+end
+
+function lu!(A::HermOrSym, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true)
     copytri!(A.data, A.uplo, isa(A, Hermitian))
     lu!(A.data, pivot; check = check)
 end
+# for backward compatibility
+# TODO: remove towards Julia v2
+@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{true}; check::Bool = true) lu!(A, RowMaximum(); check=check)
+@deprecate lu!(A::Union{StridedMatrix,HermOrSym,Tridiagonal}, ::Val{false}; check::Bool = true) lu!(A, NoPivot(); check=check)
 
 """
-    lu!(A, pivot=Val(true); check = true) -> LU
+    lu!(A, pivot = RowMaximum(); check = true) -> LU
 
 `lu!` is the same as [`lu`](@ref), but saves space by overwriting the
 input `A`, instead of creating a copy. An [`InexactError`](@ref)
@@ -106,7 +111,7 @@ julia> A = [4. 3.; 6. 3.]
  6.0  3.0
 
 julia> F = lu!(A)
-LU{Float64, Matrix{Float64}}
+LU{Float64, Matrix{Float64}, Vector{Int64}}
 L factor:
 2×2 Matrix{Float64}:
  1.0       0.0
@@ -127,21 +132,24 @@ Stacktrace:
 [...]
 ```
 """
-lu!(A::StridedMatrix, pivot::Union{Val{false}, Val{true}} = Val(true); check::Bool = true) =
+lu!(A::StridedMatrix, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) =
     generic_lufact!(A, pivot; check = check)
-function generic_lufact!(A::StridedMatrix{T}, ::Val{Pivot} = Val(true);
-                         check::Bool = true) where {T,Pivot}
+function generic_lufact!(A::StridedMatrix{T}, pivot::Union{RowMaximum,NoPivot} = RowMaximum();
+                         check::Bool = true) where {T}
+    # Extract values
     m, n = size(A)
     minmn = min(m,n)
+
+    # Initialize variables
     info = 0
     ipiv = Vector{BlasInt}(undef, minmn)
     @inbounds begin
         for k = 1:minmn
             # find index max
             kp = k
-            if Pivot
-                amax = abs(zero(T))
-                for i = k:m
+            if pivot === RowMaximum() && k < m
+                amax = abs(A[k, k])
+                for i = k+1:m
                     absi = abs(A[i,k])
                     if absi > amax
                         kp = i
@@ -175,8 +183,8 @@ function generic_lufact!(A::StridedMatrix{T}, ::Val{Pivot} = Val(true);
             end
         end
     end
-    check && checknonsingular(info, Val{Pivot}())
-    return LU{T,typeof(A)}(A, ipiv, convert(BlasInt, info))
+    check && checknonsingular(info, pivot)
+    return LU{T,typeof(A),typeof(ipiv)}(A, ipiv, convert(BlasInt, info))
 end
 
 function lutype(T::Type)
@@ -200,7 +208,7 @@ end
 
 # for all other types we must promote to a type which is stable under division
 """
-    lu(A, pivot=Val(true); check = true) -> F::LU
+    lu(A, pivot = RowMaximum(); check = true) -> F::LU
 
 Compute the LU factorization of `A`.
 
@@ -211,7 +219,7 @@ validity (via [`issuccess`](@ref)) lies with the user.
 In most cases, if `A` is a subtype `S` of `AbstractMatrix{T}` with an element
 type `T` supporting `+`, `-`, `*` and `/`, the return type is `LU{T,S{T}}`. If
 pivoting is chosen (default) the element type should also support [`abs`](@ref) and
-[`<`](@ref).
+[`<`](@ref). Pivoting can be turned off by passing `pivot = NoPivot()`.
 
 The individual components of the factorization `F` can be accessed via [`getproperty`](@ref):
 
@@ -248,7 +256,7 @@ julia> A = [4 3; 6 3]
  6  3
 
 julia> F = lu(A)
-LU{Float64, Matrix{Float64}}
+LU{Float64, Matrix{Float64}, Vector{Int64}}
 L factor:
 2×2 Matrix{Float64}:
  1.0       0.0
@@ -267,11 +275,16 @@ julia> l == F.L && u == F.U && p == F.p
 true
 ```
 """
-function lu(A::AbstractMatrix{T}, pivot::Union{Val{false}, Val{true}}=Val(true);
-            check::Bool = true) where T
-    S = lutype(T)
-    lu!(copy_oftype(A, S), pivot; check = check)
+function lu(A::AbstractMatrix{T}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T}
+    lu!(_lucopy(A, lutype(T)), pivot; check = check)
 end
+# TODO: remove for Julia v2.0
+@deprecate lu(A::AbstractMatrix, ::Val{true}; check::Bool = true) lu(A, RowMaximum(); check=check)
+@deprecate lu(A::AbstractMatrix, ::Val{false}; check::Bool = true) lu(A, NoPivot(); check=check)
+
+_lucopy(A::AbstractMatrix, T) = copy_similar(A, T)
+_lucopy(A::HermOrSym, T)      = copymutable_oftype(A, T)
+_lucopy(A::Tridiagonal, T)    = copymutable_oftype(A, T)
 
 lu(S::LU) = S
 function lu(x::Number; check::Bool=true)
@@ -282,13 +295,13 @@ end
 
 function LU{T}(F::LU) where T
     M = convert(AbstractMatrix{T}, F.factors)
-    LU{T,typeof(M)}(M, F.ipiv, F.info)
+    LU{T,typeof(M),typeof(F.ipiv)}(M, F.ipiv, F.info)
 end
-LU{T,S}(F::LU) where {T,S} = LU{T,S}(convert(S, F.factors), F.ipiv, F.info)
+LU{T,S,P}(F::LU) where {T,S,P} = LU{T,S,P}(convert(S, F.factors), convert(P, F.ipiv), F.info)
 Factorization{T}(F::LU{T}) where {T} = F
 Factorization{T}(F::LU) where {T} = LU{T}(F)
 
-copy(A::LU{T,S}) where {T,S} = LU{T,S}(copy(A.factors), copy(A.ipiv), A.info)
+copy(A::LU{T,S,P}) where {T,S,P} = LU{T,S,P}(copy(A.factors), copy(A.ipiv), A.info)
 
 size(A::LU)    = size(getfield(A, :factors))
 size(A::LU, i) = size(getfield(A, :factors), i)
@@ -425,18 +438,18 @@ end
 
 function (/)(A::AbstractMatrix, F::Adjoint{<:Any,<:LU})
     T = promote_type(eltype(A), eltype(F))
-    return adjoint(ldiv!(F.parent, copy_oftype(adjoint(A), T)))
+    return adjoint(ldiv!(F.parent, copymutable_oftype(adjoint(A), T)))
 end
 # To avoid ambiguities with definitions in adjtrans.jl and factorizations.jl
 (/)(adjA::Adjoint{<:Any,<:AbstractVector}, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
 (/)(adjA::Adjoint{<:Any,<:AbstractMatrix}, F::Adjoint{<:Any,<:LU}) = adjoint(F.parent \ adjA.parent)
 function (/)(trA::Transpose{<:Any,<:AbstractVector}, F::Adjoint{<:Any,<:LU})
     T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copy_oftype(trA.parent, T))))
+    return adjoint(ldiv!(F.parent, conj!(copymutable_oftype(trA.parent, T))))
 end
 function (/)(trA::Transpose{<:Any,<:AbstractMatrix}, F::Adjoint{<:Any,<:LU})
     T = promote_type(eltype(trA), eltype(F))
-    return adjoint(ldiv!(F.parent, conj!(copy_oftype(trA.parent, T))))
+    return adjoint(ldiv!(F.parent, conj!(copymutable_oftype(trA.parent, T))))
 end
 
 function det(F::LU{T}) where T
@@ -481,9 +494,11 @@ inv(A::LU{<:BlasFloat,<:StridedMatrix}) = inv!(copy(A))
 # Tridiagonal
 
 # See dgttrf.f
-function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true);
-             check::Bool = true) where {T,V}
+function lu!(A::Tridiagonal{T,V}, pivot::Union{RowMaximum,NoPivot} = RowMaximum(); check::Bool = true) where {T,V}
+    # Extract values
     n = size(A, 1)
+
+    # Initialize variables
     info = 0
     ipiv = Vector{BlasInt}(undef, n)
     dl = A.dl
@@ -500,7 +515,7 @@ function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true
         end
         for i = 1:n-2
             # pivot or not?
-            if pivot === Val(false) || abs(d[i]) >= abs(dl[i])
+            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
                 # No interchange
                 if d[i] != 0
                     fact = dl[i]/d[i]
@@ -523,7 +538,7 @@ function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true
         end
         if n > 1
             i = n-1
-            if pivot === Val(false) || abs(d[i]) >= abs(dl[i])
+            if pivot === NoPivot() || abs(d[i]) >= abs(dl[i])
                 if d[i] != 0
                     fact = dl[i]/d[i]
                     dl[i] = fact
@@ -549,7 +564,7 @@ function lu!(A::Tridiagonal{T,V}, pivot::Union{Val{false}, Val{true}} = Val(true
     end
     B = Tridiagonal{T,V}(dl, d, du, du2)
     check && checknonsingular(info, pivot)
-    return LU{T,Tridiagonal{T,V}}(B, ipiv, convert(BlasInt, info))
+    return LU{T,Tridiagonal{T,V},typeof(ipiv)}(B, ipiv, convert(BlasInt, info))
 end
 
 factorize(A::Tridiagonal) = lu(A)
@@ -649,7 +664,7 @@ function ldiv!(transA::Transpose{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVec
 end
 
 # Ac_ldiv_B!(A::LU{T,Tridiagonal{T}}, B::AbstractVecOrMat) where {T<:Real} = At_ldiv_B!(A,B)
-function ldiv!(adjA::Adjoint{<:Any,LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
+function ldiv!(adjA::Adjoint{<:Any,<:LU{T,Tridiagonal{T,V}}}, B::AbstractVecOrMat) where {T,V}
     require_one_based_indexing(B)
     A = adjA.parent
     n = size(A,1)
diff --git a/stdlib/LinearAlgebra/src/matmul.jl b/stdlib/LinearAlgebra/src/matmul.jl
index f2ed2cebf42407..7646aae29d1b9a 100644
--- a/stdlib/LinearAlgebra/src/matmul.jl
+++ b/stdlib/LinearAlgebra/src/matmul.jl
@@ -1,17 +1,22 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+# Matrix-matrix multiplication
+
+AdjOrTransStridedMat{T} = Union{Adjoint{T, <:StridedMatrix}, Transpose{T, <:StridedMatrix}}
+StridedMaybeAdjOrTransMat{T} = Union{StridedMatrix{T}, Adjoint{T, <:StridedMatrix}, Transpose{T, <:StridedMatrix}}
+
 # matmul.jl: Everything to do with dense matrix multiplication
 
 matprod(x, y) = x*y + x*y
 
 # dot products
 
-dot(x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where {T<:BlasReal} = BLAS.dot(x, y)
-dot(x::Union{DenseArray{T},StridedVector{T}}, y::Union{DenseArray{T},StridedVector{T}}) where {T<:BlasComplex} = BLAS.dotc(x, y)
+dot(x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasReal} = BLAS.dot(x, y)
+dot(x::StridedVecLike{T}, y::StridedVecLike{T}) where {T<:BlasComplex} = BLAS.dotc(x, y)
 
-function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector{T}, ry::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasReal,TI<:Integer}
+function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasReal,TI<:Integer}
     if length(rx) != length(ry)
-        throw(DimensionMismatch("length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
+        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
     end
     if minimum(rx) < 1 || maximum(rx) > length(x)
         throw(BoundsError(x, rx))
@@ -22,9 +27,9 @@ function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector
     GC.@preserve x y BLAS.dot(length(rx), pointer(x)+(first(rx)-1)*sizeof(T), step(rx), pointer(y)+(first(ry)-1)*sizeof(T), step(ry))
 end
 
-function dot(x::Vector{T}, rx::Union{UnitRange{TI},AbstractRange{TI}}, y::Vector{T}, ry::Union{UnitRange{TI},AbstractRange{TI}}) where {T<:BlasComplex,TI<:Integer}
+function dot(x::Vector{T}, rx::AbstractRange{TI}, y::Vector{T}, ry::AbstractRange{TI}) where {T<:BlasComplex,TI<:Integer}
     if length(rx) != length(ry)
-        throw(DimensionMismatch("length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
+        throw(DimensionMismatch(lazy"length of rx, $(length(rx)), does not equal length of ry, $(length(ry))"))
     end
     if minimum(rx) < 1 || maximum(rx) > length(x)
         throw(BoundsError(x, rx))
@@ -52,88 +57,71 @@ function (*)(A::AbstractMatrix{T}, x::AbstractVector{S}) where {T,S}
 end
 
 # these will throw a DimensionMismatch unless B has 1 row (or 1 col for transposed case):
-function *(a::AbstractVector, transB::Transpose{<:Any,<:AbstractMatrix})
-    B = transB.parent
-    reshape(a,length(a),1)*transpose(B)
-end
-function *(a::AbstractVector, adjB::Adjoint{<:Any,<:AbstractMatrix})
-    B = adjB.parent
-    reshape(a,length(a),1)*adjoint(B)
-end
-(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a,length(a),1)*B
+(*)(a::AbstractVector, tB::Transpose{<:Any,<:AbstractMatrix}) = reshape(a, length(a), 1) * tB
+(*)(a::AbstractVector, adjB::Adjoint{<:Any,<:AbstractMatrix}) = reshape(a, length(a), 1) * adjB
+(*)(a::AbstractVector, B::AbstractMatrix) = reshape(a, length(a), 1) * B
 
 @inline mul!(y::StridedVector{T}, A::StridedVecOrMat{T}, x::StridedVector{T},
              alpha::Number, beta::Number) where {T<:BlasFloat} =
     gemv!(y, 'N', A, x, alpha, beta)
-# Complex matrix times real vector. Reinterpret the matrix as a real matrix and do real matvec compuation.
-for elty in (Float32,Float64)
-    @eval begin
-        @inline function mul!(y::StridedVector{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, x::StridedVector{$elty},
-                              alpha::Real, beta::Real)
-            Afl = reinterpret($elty, A)
-            yfl = reinterpret($elty, y)
-            mul!(yfl, Afl, x, alpha, beta)
-            return y
-        end
-    end
-end
+
+# Complex matrix times real vector.
+# Reinterpret the matrix as a real matrix and do real matvec compuation.
+@inline mul!(y::StridedVector{Complex{T}}, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+        alpha::Number, beta::Number) where {T<:BlasReal} =
+    gemv!(y, 'N', A, x, alpha, beta)
+
+# Real matrix times complex vector.
+# Multiply the matrix with the real and imaginary parts separately
+@inline mul!(y::StridedVector{Complex{T}}, A::StridedMaybeAdjOrTransMat{T}, x::StridedVector{Complex{T}},
+        alpha::Number, beta::Number) where {T<:BlasReal} =
+    gemv!(y, A isa StridedArray ? 'N' : 'T', A isa StridedArray ? A : parent(A), x, alpha, beta)
+
 @inline mul!(y::AbstractVector, A::AbstractVecOrMat, x::AbstractVector,
              alpha::Number, beta::Number) =
     generic_matvecmul!(y, 'N', A, x, MulAddMul(alpha, beta))
 
-function *(transA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    A = transA.parent
+function *(tA::Transpose{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), transpose(A), convert(AbstractVector{TS}, x))
+    mul!(similar(x, TS, size(tA, 1)), tA, convert(AbstractVector{TS}, x))
 end
-function *(transA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    A = transA.parent
+function *(tA::Transpose{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), transpose(A), x)
-end
-@inline function mul!(y::StridedVector{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    return gemv!(y, 'T', A, x, alpha, beta)
-end
-@inline function mul!(y::AbstractVector, transA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number)
-    A = transA.parent
-    return generic_matvecmul!(y, 'T', A, x, MulAddMul(alpha, beta))
+    mul!(similar(x, TS, size(tA, 1)), tA, x)
 end
+@inline mul!(y::StridedVector{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
+                      alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemv!(y, 'T', tA.parent, x, alpha, beta)
+@inline mul!(y::AbstractVector, tA::Transpose{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
+                      alpha::Number, beta::Number) =
+    generic_matvecmul!(y, 'T', tA.parent, x, MulAddMul(alpha, beta))
 
 function *(adjA::Adjoint{<:Any,<:StridedMatrix{T}}, x::StridedVector{S}) where {T<:BlasFloat,S}
-    A = adjA.parent
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), adjoint(A) ,convert(AbstractVector{TS},x))
+    mul!(similar(x, TS, size(adjA, 1)), adjA, convert(AbstractVector{TS}, x))
 end
 function *(adjA::Adjoint{<:Any,<:AbstractMatrix{T}}, x::AbstractVector{S}) where {T,S}
-    A = adjA.parent
     TS = promote_op(matprod, T, S)
-    mul!(similar(x,TS,size(A,2)), adjoint(A), x)
+    mul!(similar(x, TS, size(adjA, 1)), adjA, x)
 end
 
-@inline function mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasReal}
-    A = adjA.parent
-    return mul!(y, transpose(A), x, alpha, beta)
-end
-@inline function mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
-                      alpha::Number, beta::Number) where {T<:BlasComplex}
-    A = adjA.parent
-    return gemv!(y, 'C', A, x, alpha, beta)
-end
-@inline function mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
-                      alpha::Number, beta::Number)
-    A = adjA.parent
-    return generic_matvecmul!(y, 'C', A, x, MulAddMul(alpha, beta))
-end
+@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
+                      alpha::Number, beta::Number) where {T<:BlasReal} =
+    mul!(y, transpose(adjA.parent), x, alpha, beta)
+@inline mul!(y::StridedVector{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, x::StridedVector{T},
+                      alpha::Number, beta::Number) where {T<:BlasComplex} =
+    gemv!(y, 'C', adjA.parent, x, alpha, beta)
+@inline mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, x::AbstractVector,
+                      alpha::Number, beta::Number) =
+    generic_matvecmul!(y, 'C', adjA.parent, x, MulAddMul(alpha, beta))
 
 # Vector-Matrix multiplication
 (*)(x::AdjointAbsVec,   A::AbstractMatrix) = (A'*x')'
 (*)(x::TransposeAbsVec, A::AbstractMatrix) = transpose(transpose(A)*transpose(x))
 
-# Matrix-matrix multiplication
+_parent(A) = A
+_parent(A::Adjoint) = parent(A)
+_parent(A::Transpose) = parent(A)
 
 """
     *(A::AbstractMatrix, B::AbstractMatrix)
@@ -150,18 +138,22 @@ julia> [1 1; 0 1] * [1 0; 1 1]
 """
 function (*)(A::AbstractMatrix, B::AbstractMatrix)
     TS = promote_op(matprod, eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A,1), size(B,2))), A, B)
+    mul!(similar(B, TS, (size(A, 1), size(B, 2))), A, B)
 end
 # optimization for dispatching to BLAS, e.g. *(::Matrix{Float32}, ::Matrix{Float64})
 # but avoiding the case *(::Matrix{<:BlasComplex}, ::Matrix{<:BlasReal})
 # which is better handled by reinterpreting rather than promotion
-function (*)(A::StridedMatrix{<:BlasReal}, B::StridedMatrix{<:BlasFloat})
+function (*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
     TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A,1), size(B,2))), convert(AbstractArray{TS}, A), convert(AbstractArray{TS}, B))
+    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
+         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
+         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
 end
-function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMatrix{<:BlasComplex})
+function (*)(A::StridedMaybeAdjOrTransMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasComplex})
     TS = promote_type(eltype(A), eltype(B))
-    mul!(similar(B, TS, (size(A,1), size(B,2))), convert(AbstractArray{TS}, A), convert(AbstractArray{TS}, B))
+    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
+         wrapperop(A)(convert(AbstractArray{TS}, _parent(A))),
+         wrapperop(B)(convert(AbstractArray{TS}, _parent(B))))
 end
 
 @inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, B::StridedVecOrMat{T},
@@ -170,16 +162,92 @@ end
 end
 # Complex Matrix times real matrix: We use that it is generally faster to reinterpret the
 # first matrix as a real matrix and carry out real matrix matrix multiply
-for elty in (Float32,Float64)
-    @eval begin
-        @inline function mul!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, B::StridedVecOrMat{$elty},
-                              alpha::Real, beta::Real)
-            Afl = reinterpret($elty, A)
-            Cfl = reinterpret($elty, C)
-            mul!(Cfl, Afl, B, alpha, beta)
-            return C
-        end
+function (*)(A::StridedMatrix{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
+    TS = promote_type(eltype(A), eltype(B))
+    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
+         convert(AbstractArray{TS}, A),
+         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
+end
+function (*)(A::AdjOrTransStridedMat{<:BlasComplex}, B::StridedMaybeAdjOrTransMat{<:BlasReal})
+    TS = promote_type(eltype(A), eltype(B))
+    mul!(similar(B, TS, (size(A, 1), size(B, 2))),
+         copymutable_oftype(A, TS), # remove AdjOrTrans to use reinterpret trick below
+         wrapperop(B)(convert(AbstractArray{real(TS)}, _parent(B))))
+end
+# the following case doesn't seem to benefit from the translation A*B = (B' * A')'
+function (*)(A::StridedMatrix{<:BlasReal}, B::StridedMatrix{<:BlasComplex})
+    temp = real(B)
+    R = A * temp
+    temp .= imag.(B)
+    I = A * temp
+    Complex.(R, I)
+end
+(*)(A::AdjOrTransStridedMat{<:BlasReal}, B::StridedMatrix{<:BlasComplex}) = copy(transpose(transpose(B) * parent(A)))
+(*)(A::StridedMaybeAdjOrTransMat{<:BlasReal}, B::AdjOrTransStridedMat{<:BlasComplex}) = copy(wrapperop(B)(parent(B) * transpose(A)))
+
+"""
+    muladd(A, y, z)
+
+Combined multiply-add, `A*y .+ z`, for matrix-matrix or matrix-vector multiplication.
+The result is always the same size as `A*y`, but `z` may be smaller, or a scalar.
+
+!!! compat "Julia 1.6"
+     These methods require Julia 1.6 or later.
+
+# Examples
+```jldoctest
+julia> A=[1.0 2.0; 3.0 4.0]; B=[1.0 1.0; 1.0 1.0]; z=[0, 100];
+
+julia> muladd(A, B, z)
+2×2 Matrix{Float64}:
+   3.0    3.0
+ 107.0  107.0
+```
+"""
+function Base.muladd(A::AbstractMatrix, y::AbstractVecOrMat, z::Union{Number, AbstractArray})
+    Ay = A * y
+    for d in 1:ndims(Ay)
+        # Same error as Ay .+= z would give, to match StridedMatrix method:
+        size(z,d) > size(Ay,d) && throw(DimensionMismatch("array could not be broadcast to match destination"))
+    end
+    for d in ndims(Ay)+1:ndims(z)
+        # Similar error to what Ay + z would give, to match (Any,Any,Any) method:
+        size(z,d) > 1 && throw(DimensionMismatch(string("dimensions must match: z has dims ",
+            axes(z), ", must have singleton at dim ", d)))
+    end
+    Ay .+ z
+end
+
+function Base.muladd(u::AbstractVector, v::AdjOrTransAbsVec, z::Union{Number, AbstractArray})
+    if size(z,1) > length(u) || size(z,2) > length(v)
+        # Same error as (u*v) .+= z:
+        throw(DimensionMismatch("array could not be broadcast to match destination"))
+    end
+    for d in 3:ndims(z)
+        # Similar error to (u*v) + z:
+        size(z,d) > 1 && throw(DimensionMismatch(string("dimensions must match: z has dims ",
+            axes(z), ", must have singleton at dim ", d)))
     end
+    (u .* v) .+ z
+end
+
+Base.muladd(x::AdjointAbsVec, A::AbstractMatrix, z::Union{Number, AbstractVecOrMat}) =
+    muladd(A', x', z')'
+Base.muladd(x::TransposeAbsVec, A::AbstractMatrix, z::Union{Number, AbstractVecOrMat}) =
+    transpose(muladd(transpose(A), transpose(x), transpose(z)))
+
+function Base.muladd(A::StridedMaybeAdjOrTransMat{<:Number}, y::AbstractVector{<:Number}, z::Union{Number, AbstractVector})
+    T = promote_type(eltype(A), eltype(y), eltype(z))
+    C = similar(A, T, axes(A,1))
+    C .= z
+    mul!(C, A, y, true, true)
+end
+
+function Base.muladd(A::StridedMaybeAdjOrTransMat{<:Number}, B::StridedMaybeAdjOrTransMat{<:Number}, z::Union{Number, AbstractVecOrMat})
+    T = promote_type(eltype(A), eltype(B), eltype(z))
+    C = similar(A, T, axes(A,1), axes(B,2))
+    C .= z
+    mul!(C, A, B, true, true)
 end
 
 """
@@ -301,134 +369,104 @@ julia> lmul!(F.Q, B)
 """
 lmul!(A, B)
 
-@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
+@inline function mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
                  alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    if A===B
-        return syrk_wrapper!(C, 'T', A, alpha, beta)
+    A = tA.parent
+    if A === B
+        return syrk_wrapper!(C, 'T', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
     end
 end
-@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number)
-    A = transA.parent
-    return generic_matmatmul!(C, 'T', 'N', A, B, MulAddMul(alpha, beta))
-end
+@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'T', 'N', tA.parent, B, MulAddMul(alpha, beta))
 
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, transB::Transpose{<:Any,<:StridedVecOrMat{T}},
+@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
                  alpha::Number, beta::Number) where {T<:BlasFloat}
-    B = transB.parent
-    if A===B
-        return syrk_wrapper!(C, 'N', A, alpha, beta)
+    B = tB.parent
+    if A === B
+        return syrk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'N', 'T', A, B, MulAddMul(alpha, beta))
     end
 end
-# Complex matrix times transposed real matrix. Reinterpret the first matrix to real for efficiency.
-for elty in (Float32,Float64)
-    @eval begin
-        @inline function mul!(C::StridedMatrix{Complex{$elty}}, A::StridedVecOrMat{Complex{$elty}}, transB::Transpose{<:Any,<:StridedVecOrMat{$elty}},
-                         alpha::Real, beta::Real)
-            Afl = reinterpret($elty, A)
-            Cfl = reinterpret($elty, C)
-            mul!(Cfl, Afl, transB, alpha, beta)
-            return C
-        end
-    end
-end
+# Complex matrix times (transposed) real matrix. Reinterpret the first matrix to real for efficiency.
+@inline mul!(C::StridedMatrix{Complex{T}}, A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+                    alpha::Number, beta::Number) where {T<:BlasReal} =
+    gemm_wrapper!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+@inline mul!(C::StridedMatrix{Complex{T}}, A::StridedVecOrMat{Complex{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
+                    alpha::Number, beta::Number) where {T<:BlasReal} =
+    gemm_wrapper!(C, 'N', 'T', A, parent(tB), MulAddMul(alpha, beta))
+
 # collapsing the following two defs with C::AbstractVecOrMat yields ambiguities
-@inline mul!(C::AbstractVector, A::AbstractVecOrMat, transB::Transpose{<:Any,<:AbstractVecOrMat},
+@inline mul!(C::AbstractVector, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
              alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, transB.parent, MulAddMul(alpha, beta))
-@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, transB::Transpose{<:Any,<:AbstractVecOrMat},
+    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, tB::Transpose{<:Any,<:AbstractVecOrMat},
              alpha::Number, beta::Number) =
-    generic_matmatmul!(C, 'N', 'T', A, transB.parent, MulAddMul(alpha, beta))
-
-@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, transB::Transpose{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    B = transB.parent
-    return gemm_wrapper!(C, 'T', 'T', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, transB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = transA.parent
-    B = transB.parent
-    return generic_matmatmul!(C, 'T', 'T', A, B, MulAddMul(alpha, beta))
-end
-
-@inline function mul!(C::StridedMatrix{T}, transA::Transpose{<:Any,<:StridedVecOrMat{T}}, transB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = transA.parent
-    B = transB.parent
-    return gemm_wrapper!(C, 'T', 'C', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractVecOrMat}, transB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = transA.parent
-    B = transB.parent
-    return generic_matmatmul!(C, 'T', 'C', A, B, MulAddMul(alpha, beta))
-end
-
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
-                 alpha::Real, beta::Real) where {T<:BlasReal}
-    A = adjA.parent
-    return mul!(C, transpose(A), B, alpha, beta)
-end
+    generic_matmatmul!(C, 'N', 'T', A, tB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'T', 'T', tA.parent, tB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, tA::Transpose{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'T', 'C', tA.parent, adjB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, tA::Transpose{<:Any,<:AbstractVecOrMat}, tB::Adjoint{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'T', 'C', tA.parent, tB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
+                 alpha::Real, beta::Real) where {T<:BlasReal} =
+    mul!(C, transpose(adjA.parent), B, alpha, beta)
 @inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, B::StridedVecOrMat{T},
                  alpha::Number, beta::Number) where {T<:BlasComplex}
     A = adjA.parent
-    if A===B
-        return herk_wrapper!(C, 'C', A, alpha, beta)
+    if A === B
+        return herk_wrapper!(C, 'C', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
     end
 end
-@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
-                 alpha::Number, beta::Number)
-    A = adjA.parent
-    return generic_matmatmul!(C, 'C', 'N', A, B, MulAddMul(alpha, beta))
-end
+@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, B::AbstractVecOrMat,
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'C', 'N', adjA.parent, B, MulAddMul(alpha, beta))
 
-@inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    B = adjB.parent
-    return mul!(C, A, transpose(B), alpha, beta)
-end
+@inline mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{<:BlasReal}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    mul!(C, A, transpose(adjB.parent), alpha, beta)
 @inline function mul!(C::StridedMatrix{T}, A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
                  alpha::Number, beta::Number) where {T<:BlasComplex}
     B = adjB.parent
     if A === B
-        return herk_wrapper!(C, 'N', A, alpha, beta)
+        return herk_wrapper!(C, 'N', A, MulAddMul(alpha, beta))
     else
         return gemm_wrapper!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
     end
 end
-@inline function mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    B = adjB.parent
-    return generic_matmatmul!(C, 'N', 'C', A, B, MulAddMul(alpha, beta))
-end
+@inline mul!(C::AbstractMatrix, A::AbstractVecOrMat, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'N', 'C', A, adjB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'C', 'C', adjA.parent, adjB.parent, MulAddMul(alpha, beta))
+
+@inline mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, tB::Transpose{<:Any,<:StridedVecOrMat{T}},
+                 alpha::Number, beta::Number) where {T<:BlasFloat} =
+    gemm_wrapper!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
+@inline mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, tB::Transpose{<:Any,<:AbstractVecOrMat},
+                 alpha::Number, beta::Number) =
+    generic_matmatmul!(C, 'C', 'T', adjA.parent, tB.parent, MulAddMul(alpha, beta))
 
-@inline function mul!(C::StridedMatrix{T}, adjA::Adjoint{<:Any,<:StridedVecOrMat{T}}, adjB::Adjoint{<:Any,<:StridedVecOrMat{T}},
-                 alpha::Number, beta::Number) where {T<:BlasFloat}
-    A = adjA.parent
-    B = adjB.parent
-    return gemm_wrapper!(C, 'C', 'C', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, adjB::Adjoint{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = adjA.parent
-    B = adjB.parent
-    return generic_matmatmul!(C, 'C', 'C', A, B, MulAddMul(alpha, beta))
-end
-@inline function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractVecOrMat}, transB::Transpose{<:Any,<:AbstractVecOrMat},
-                 alpha::Number, beta::Number)
-    A = adjA.parent
-    B = transB.parent
-    return generic_matmatmul!(C, 'C', 'T', A, B, MulAddMul(alpha, beta))
-end
 # Supporting functions for matrix multiplication
 
 # copy transposed(adjoint) of upper(lower) side-digonals. Optionally include diagonal.
@@ -444,7 +482,7 @@ end
             A[i,j] = conjugate ? adjoint(A[j,i]) : transpose(A[j,i])
         end
     else
-        throw(ArgumentError("uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
+        throw(ArgumentError(lazy"uplo argument must be 'U' (upper) or 'L' (lower), got $uplo"))
     end
     A
 end
@@ -452,29 +490,68 @@ end
 function gemv!(y::StridedVector{T}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{T},
                α::Number=true, β::Number=false) where {T<:BlasFloat}
     mA, nA = lapack_size(tA, A)
-    if nA != length(x)
-        throw(DimensionMismatch("second dimension of A, $nA, does not match length of x, $(length(x))"))
-    end
-    if mA != length(y)
-        throw(DimensionMismatch("first dimension of A, $mA, does not match length of y, $(length(y))"))
+    nA != length(x) &&
+        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
+    mA != length(y) &&
+        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
+    mA == 0 && return y
+    nA == 0 && return _rmul_or_fill!(y, β)
+    alpha, beta = promote(α, β, zero(T))
+    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
+        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        !iszero(stride(x, 1)) # We only check input's stride here.
+        return BLAS.gemv!(tA, alpha, A, x, beta, y)
+    else
+        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
-    if mA == 0
+end
+
+function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{Complex{T}}, x::StridedVector{T},
+    α::Number = true, β::Number = false) where {T<:BlasReal}
+    mA, nA = lapack_size(tA, A)
+    nA != length(x) &&
+        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
+    mA != length(y) &&
+        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
+    mA == 0 && return y
+    nA == 0 && return _rmul_or_fill!(y, β)
+    alpha, beta = promote(α, β, zero(T))
+    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
+        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        stride(y, 1) == 1 && tA == 'N' && # reinterpret-based optimization is valid only for contiguous `y`
+        !iszero(stride(x, 1))
+        BLAS.gemv!(tA, alpha, reinterpret(T, A), x, beta, reinterpret(T, y))
         return y
+    else
+        return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
-    if nA == 0
-        return _rmul_or_fill!(y, β)
-    end
+end
 
+function gemv!(y::StridedVector{Complex{T}}, tA::AbstractChar, A::StridedVecOrMat{T}, x::StridedVector{Complex{T}},
+    α::Number = true, β::Number = false) where {T<:BlasFloat}
+    mA, nA = lapack_size(tA, A)
+    nA != length(x) &&
+        throw(DimensionMismatch(lazy"second dimension of A, $nA, does not match length of x, $(length(x))"))
+    mA != length(y) &&
+        throw(DimensionMismatch(lazy"first dimension of A, $mA, does not match length of y, $(length(y))"))
+    mA == 0 && return y
+    nA == 0 && return _rmul_or_fill!(y, β)
     alpha, beta = promote(α, β, zero(T))
-    if alpha isa Union{Bool,T} && beta isa Union{Bool,T} && stride(A, 1) == 1 && stride(A, 2) >= size(A, 1)
-        return BLAS.gemv!(tA, alpha, A, x, beta, y)
+    @views if alpha isa Union{Bool,T} && beta isa Union{Bool,T} &&
+        stride(A, 1) == 1 && abs(stride(A, 2)) >= size(A, 1) &&
+        !iszero(stride(x, 1))
+        xfl = reinterpret(reshape, T, x) # Use reshape here.
+        yfl = reinterpret(reshape, T, y)
+        BLAS.gemv!(tA, alpha, A, xfl[1, :], beta, yfl[1, :])
+        BLAS.gemv!(tA, alpha, A, xfl[2, :], beta, yfl[2, :])
+        return y
     else
         return generic_matvecmul!(y, tA, A, x, MulAddMul(α, β))
     end
 end
 
 function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat{T},
-                       α::Number=true, β::Number=false) where {T<:BlasFloat}
+        _add = MulAddMul()) where {T<:BlasFloat}
     nC = checksquare(C)
     if tA == 'T'
         (nA, mA) = size(A,1), size(A,2)
@@ -484,22 +561,22 @@ function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat
         tAt = 'T'
     end
     if nC != mA
-        throw(DimensionMismatch("output matrix has size: $(nC), but should have size $(mA)"))
+        throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
     end
-    if mA == 0 || nA == 0 || iszero(α)
-        return _rmul_or_fill!(C, β)
+    if mA == 0 || nA == 0 || iszero(_add.alpha)
+        return _rmul_or_fill!(C, _add.beta)
     end
     if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, MulAddMul(α, β))
+        return matmul2x2!(C, tA, tAt, A, A, _add)
     end
     if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, MulAddMul(α, β))
+        return matmul3x3!(C, tA, tAt, A, A, _add)
     end
 
     # BLAS.syrk! only updates symmetric C
     # alternatively, make non-zero β a show-stopper for BLAS.syrk!
-    if iszero(β) || issymmetric(C)
-        alpha, beta = promote(α, β, zero(T))
+    if iszero(_add.beta) || issymmetric(C)
+        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
         if (alpha isa Union{Bool,T} &&
             beta isa Union{Bool,T} &&
             stride(A, 1) == stride(C, 1) == 1 &&
@@ -508,11 +585,11 @@ function syrk_wrapper!(C::StridedMatrix{T}, tA::AbstractChar, A::StridedVecOrMat
             return copytri!(BLAS.syrk!('U', tA, alpha, A, beta, C), 'U')
         end
     end
-    return gemm_wrapper!(C, tA, tAt, A, A, MulAddMul(α, β))
+    return gemm_wrapper!(C, tA, tAt, A, A, _add)
 end
 
 function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA::AbstractChar, A::Union{StridedVecOrMat{T}, StridedVecOrMat{Complex{T}}},
-                       α::Number=true, β::Number=false) where {T<:BlasReal}
+        _add = MulAddMul()) where {T<:BlasReal}
     nC = checksquare(C)
     if tA == 'C'
         (nA, mA) = size(A,1), size(A,2)
@@ -522,23 +599,23 @@ function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA
         tAt = 'C'
     end
     if nC != mA
-        throw(DimensionMismatch("output matrix has size: $(nC), but should have size $(mA)"))
+        throw(DimensionMismatch(lazy"output matrix has size: $(nC), but should have size $(mA)"))
     end
-    if mA == 0 || nA == 0
-        return _rmul_or_fill!(C, β)
+    if mA == 0 || nA == 0 || iszero(_add.alpha)
+        return _rmul_or_fill!(C, _add.beta)
     end
     if mA == 2 && nA == 2
-        return matmul2x2!(C, tA, tAt, A, A, MulAddMul(α, β))
+        return matmul2x2!(C, tA, tAt, A, A, _add)
     end
     if mA == 3 && nA == 3
-        return matmul3x3!(C, tA, tAt, A, A, MulAddMul(α, β))
+        return matmul3x3!(C, tA, tAt, A, A, _add)
     end
 
     # Result array does not need to be initialized as long as beta==0
     #    C = Matrix{T}(undef, mA, mA)
 
-    if iszero(β) || issymmetric(C)
-        alpha, beta = promote(α, β, zero(T))
+    if iszero(_add.beta) || issymmetric(C)
+        alpha, beta = promote(_add.alpha, _add.beta, zero(T))
         if (alpha isa Union{Bool,T} &&
             beta isa Union{Bool,T} &&
             stride(A, 1) == stride(C, 1) == 1 &&
@@ -547,7 +624,7 @@ function herk_wrapper!(C::Union{StridedMatrix{T}, StridedMatrix{Complex{T}}}, tA
             return copytri!(BLAS.herk!('U', tA, alpha, A, beta, C), 'U', true)
         end
     end
-    return gemm_wrapper!(C, tA, tAt, A, A, MulAddMul(α, β))
+    return gemm_wrapper!(C, tA, tAt, A, A, _add)
 end
 
 function gemm_wrapper(tA::AbstractChar, tB::AbstractChar,
@@ -566,7 +643,7 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
     mB, nB = lapack_size(tB, B)
 
     if nA != mB
-        throw(DimensionMismatch("A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
+        throw(DimensionMismatch(lazy"A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
     end
 
     if C === A || B === C
@@ -575,7 +652,7 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
 
     if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
         if size(C) != (mA, nB)
-            throw(DimensionMismatch("C has dimensions $(size(C)), should have ($mA,$nB)"))
+            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
         end
         return _rmul_or_fill!(C, _add.beta)
     end
@@ -599,27 +676,70 @@ function gemm_wrapper!(C::StridedVecOrMat{T}, tA::AbstractChar, tB::AbstractChar
     generic_matmatmul!(C, tA, tB, A, B, _add)
 end
 
+function gemm_wrapper!(C::StridedVecOrMat{Complex{T}}, tA::AbstractChar, tB::AbstractChar,
+                       A::StridedVecOrMat{Complex{T}}, B::StridedVecOrMat{T},
+                       _add = MulAddMul()) where {T<:BlasReal}
+    mA, nA = lapack_size(tA, A)
+    mB, nB = lapack_size(tB, B)
+
+    if nA != mB
+        throw(DimensionMismatch(lazy"A has dimensions ($mA,$nA) but B has dimensions ($mB,$nB)"))
+    end
+
+    if C === A || B === C
+        throw(ArgumentError("output matrix must not be aliased with input matrix"))
+    end
+
+    if mA == 0 || nA == 0 || nB == 0 || iszero(_add.alpha)
+        if size(C) != (mA, nB)
+            throw(DimensionMismatch(lazy"C has dimensions $(size(C)), should have ($mA,$nB)"))
+        end
+        return _rmul_or_fill!(C, _add.beta)
+    end
+
+    if mA == 2 && nA == 2 && nB == 2
+        return matmul2x2!(C, tA, tB, A, B, _add)
+    end
+    if mA == 3 && nA == 3 && nB == 3
+        return matmul3x3!(C, tA, tB, A, B, _add)
+    end
+
+    alpha, beta = promote(_add.alpha, _add.beta, zero(T))
+
+    # Make-sure reinterpret-based optimization is BLAS-compatible.
+    if (alpha isa Union{Bool,T} &&
+        beta isa Union{Bool,T} &&
+        stride(A, 1) == stride(B, 1) == stride(C, 1) == 1 &&
+        stride(A, 2) >= size(A, 1) &&
+        stride(B, 2) >= size(B, 1) &&
+        stride(C, 2) >= size(C, 1)) && tA == 'N'
+        BLAS.gemm!(tA, tB, alpha, reinterpret(T, A), B, beta, reinterpret(T, C))
+        return C
+    end
+    generic_matmatmul!(C, tA, tB, A, B, _add)
+end
+
 # blas.jl defines matmul for floats; other integer and mixed precision
 # cases are handled here
 
 lapack_size(t::AbstractChar, M::AbstractVecOrMat) = (size(M, t=='N' ? 1 : 2), size(M, t=='N' ? 2 : 1))
 
-function copyto!(B::AbstractVecOrMat, ir_dest::UnitRange{Int}, jr_dest::UnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::UnitRange{Int}, jr_src::UnitRange{Int})
+function copyto!(B::AbstractVecOrMat, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
     if tM == 'N'
         copyto!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
         LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(B)
+        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
     end
     B
 end
 
-function copy_transpose!(B::AbstractMatrix, ir_dest::UnitRange{Int}, jr_dest::UnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::UnitRange{Int}, jr_src::UnitRange{Int})
+function copy_transpose!(B::AbstractMatrix, ir_dest::AbstractUnitRange{Int}, jr_dest::AbstractUnitRange{Int}, tM::AbstractChar, M::AbstractVecOrMat, ir_src::AbstractUnitRange{Int}, jr_src::AbstractUnitRange{Int})
     if tM == 'N'
         LinearAlgebra.copy_transpose!(B, ir_dest, jr_dest, M, ir_src, jr_src)
     else
         copyto!(B, ir_dest, jr_dest, M, jr_src, ir_src)
-        tM == 'C' && conj!(B)
+        tM == 'C' && conj!(@view B[ir_dest, jr_dest])
     end
     B
 end
@@ -636,10 +756,10 @@ function generic_matvecmul!(C::AbstractVector{R}, tA, A::AbstractVecOrMat, B::Ab
     mB = length(B)
     mA, nA = lapack_size(tA, A)
     if mB != nA
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA), vector B has length $mB"))
+        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), vector B has length $mB"))
     end
     if mA != length(C)
-        throw(DimensionMismatch("result C has length $(length(C)), needs length $mA"))
+        throw(DimensionMismatch(lazy"result C has length $(length(C)), needs length $mA"))
     end
 
     Astride = size(A, 1)
@@ -705,10 +825,6 @@ function generic_matmatmul(tA, tB, A::AbstractVecOrMat{T}, B::AbstractMatrix{S})
 end
 
 const tilebufsize = 10800  # Approximately 32k/3
-# per-thread arrays of buffers resized by __init__ if needed
-const Abuf = [Vector{UInt8}(undef, tilebufsize)]
-const Bbuf = [Vector{UInt8}(undef, tilebufsize)]
-const Cbuf = [Vector{UInt8}(undef, tilebufsize)]
 
 function generic_matmatmul!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMatrix,
                             _add::MulAddMul=MulAddMul())
@@ -737,10 +853,10 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat
     mA, nA = lapack_size(tA, A)
     mB, nB = lapack_size(tB, B)
     if mB != nA
-        throw(DimensionMismatch("matrix A has dimensions ($mA,$nA), matrix B has dimensions ($mB,$nB)"))
+        throw(DimensionMismatch(lazy"matrix A has dimensions ($mA,$nA), matrix B has dimensions ($mB,$nB)"))
     end
     if size(C,1) != mA || size(C,2) != nB
-        throw(DimensionMismatch("result C has dimensions $(size(C)), needs ($mA,$nB)"))
+        throw(DimensionMismatch(lazy"result C has dimensions $(size(C)), needs ($mA,$nB)"))
     end
 
     if iszero(_add.alpha) || isempty(A) || isempty(B)
@@ -749,14 +865,13 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat
 
     tile_size = 0
     if isbitstype(R) && isbitstype(T) && isbitstype(S) && (tA == 'N' || tB != 'N')
-        tile_size = floor(Int, sqrt(tilebufsize / max(sizeof(R), sizeof(S), sizeof(T))))
+        tile_size = floor(Int, sqrt(tilebufsize / max(sizeof(R), sizeof(S), sizeof(T), 1)))
     end
     @inbounds begin
     if tile_size > 0
         sz = (tile_size, tile_size)
-        # FIXME: This code is completely invalid!!!
-        Atile = unsafe_wrap(Array, convert(Ptr{T}, pointer(Abuf[Threads.threadid()])), sz)
-        Btile = unsafe_wrap(Array, convert(Ptr{S}, pointer(Bbuf[Threads.threadid()])), sz)
+        Atile = Array{T}(undef, sz)
+        Btile = Array{S}(undef, sz)
 
         z1 = zero(A[1, 1]*B[1, 1] + A[1, 1]*B[1, 1])
         z = convert(promote_type(typeof(z1), R), z1)
@@ -776,8 +891,7 @@ function _generic_matmatmul!(C::AbstractVecOrMat{R}, tA, tB, A::AbstractVecOrMat
                 end
             end
         else
-            # FIXME: This code is completely invalid!!!
-            Ctile = unsafe_wrap(Array, convert(Ptr{R}, pointer(Cbuf[Threads.threadid()])), sz)
+            Ctile = Array{R}(undef, sz)
             for jb = 1:tile_size:nB
                 jlim = min(jb+tile_size-1,nB)
                 jlen = jlim-jb+1
@@ -915,7 +1029,7 @@ function matmul2x2!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
                     _add::MulAddMul = MulAddMul())
     require_one_based_indexing(C, A, B)
     if !(size(A) == size(B) == size(C) == (2,2))
-        throw(DimensionMismatch("A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
+        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
     end
     @inbounds begin
     if tA == 'T'
@@ -958,7 +1072,7 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
                     _add::MulAddMul = MulAddMul())
     require_one_based_indexing(C, A, B)
     if !(size(A) == size(B) == size(C) == (3,3))
-        throw(DimensionMismatch("A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
+        throw(DimensionMismatch(lazy"A has size $(size(A)), B has size $(size(B)), C has size $(size(C))"))
     end
     @inbounds begin
     if tA == 'T'
@@ -1007,3 +1121,141 @@ function matmul3x3!(C::AbstractMatrix, tA, tB, A::AbstractMatrix, B::AbstractMat
     end # inbounds
     C
 end
+
+const RealOrComplex = Union{Real,Complex}
+
+# Three-argument *
+"""
+    *(A, B::AbstractMatrix, C)
+    A * B * C * D
+
+Chained multiplication of 3 or 4 matrices is done in the most efficient sequence,
+based on the sizes of the arrays. That is, the number of scalar multiplications needed
+for `(A * B) * C` (with 3 dense matrices) is compared to that for `A * (B * C)`
+to choose which of these to execute.
+
+If the last factor is a vector, or the first a transposed vector, then it is efficient
+to deal with these first. In particular `x' * B * y` means `(x' * B) * y`
+for an ordinary column-major `B::Matrix`. Unlike `dot(x, B, y)`, this
+allocates an intermediate array.
+
+If the first or last factor is a number, this will be fused with the matrix
+multiplication, using 5-arg [`mul!`](@ref).
+
+See also [`muladd`](@ref), [`dot`](@ref).
+
+!!! compat "Julia 1.7"
+    These optimisations require at least Julia 1.7.
+"""
+*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector) = A * (B*x)
+
+*(tu::AdjOrTransAbsVec, B::AbstractMatrix, v::AbstractVector) = (tu*B) * v
+*(tu::AdjOrTransAbsVec, B::AdjOrTransAbsMat, v::AbstractVector) = tu * (B*v)
+
+*(A::AbstractMatrix, x::AbstractVector, γ::Number) = mat_vec_scalar(A,x,γ)
+*(A::AbstractMatrix, B::AbstractMatrix, γ::Number) = mat_mat_scalar(A,B,γ)
+*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractVector{<:RealOrComplex}) =
+    mat_vec_scalar(B,C,α)
+*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}) =
+    mat_mat_scalar(B,C,α)
+
+*(α::Number, u::AbstractVector, tv::AdjOrTransAbsVec) = broadcast(*, α, u, tv)
+*(u::AbstractVector, tv::AdjOrTransAbsVec, γ::Number) = broadcast(*, u, tv, γ)
+*(u::AbstractVector, tv::AdjOrTransAbsVec, C::AbstractMatrix) = u * (tv*C)
+
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix) = _tri_matmul(A,B,C)
+*(tv::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix) = (tv*B) * C
+
+function _tri_matmul(A,B,C,δ=nothing)
+    n,m = size(A)
+    # m,k == size(B)
+    k,l = size(C)
+    costAB_C = n*m*k + n*k*l  # multiplications, allocations n*k + n*l
+    costA_BC = m*k*l + n*m*l  #                              m*l + n*l
+    if costA_BC < costAB_C
+        isnothing(δ) ? A * (B*C) : A * mat_mat_scalar(B,C,δ)
+    else
+        isnothing(δ) ? (A*B) * C : mat_mat_scalar(A*B, C, δ)
+    end
+end
+
+# Fast path for two arrays * one scalar is opt-in, via mat_vec_scalar and mat_mat_scalar.
+
+mat_vec_scalar(A, x, γ) = A * (x * γ)  # fallback
+mat_vec_scalar(A::StridedMaybeAdjOrTransMat, x::StridedVector, γ) = _mat_vec_scalar(A, x, γ)
+mat_vec_scalar(A::AdjOrTransAbsVec, x::StridedVector, γ) = (A * x) * γ
+
+function _mat_vec_scalar(A, x, γ)
+    T = promote_type(eltype(A), eltype(x), typeof(γ))
+    C = similar(A, T, axes(A,1))
+    mul!(C, A, x, γ, false)
+end
+
+mat_mat_scalar(A, B, γ) = (A*B) * γ # fallback
+mat_mat_scalar(A::StridedMaybeAdjOrTransMat, B::StridedMaybeAdjOrTransMat, γ) =
+    _mat_mat_scalar(A, B, γ)
+
+function _mat_mat_scalar(A, B, γ)
+    T = promote_type(eltype(A), eltype(B), typeof(γ))
+    C = similar(A, T, axes(A,1), axes(B,2))
+    mul!(C, A, B, γ, false)
+end
+
+mat_mat_scalar(A::AdjointAbsVec, B, γ) = (γ' * (A * B)')' # preserving order, adjoint reverses
+mat_mat_scalar(A::AdjointAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
+    mat_vec_scalar(B', A', γ')'
+
+mat_mat_scalar(A::TransposeAbsVec, B, γ) = transpose(γ * transpose(A * B))
+mat_mat_scalar(A::TransposeAbsVec{<:RealOrComplex}, B::StridedMaybeAdjOrTransMat{<:RealOrComplex}, γ::RealOrComplex) =
+    transpose(mat_vec_scalar(transpose(B), transpose(A), γ))
+
+
+# Four-argument *, by type
+*(α::Number, β::Number, C::AbstractMatrix, x::AbstractVector) = (α*β) * C * x
+*(α::Number, β::Number, C::AbstractMatrix, D::AbstractMatrix) = (α*β) * C * D
+*(α::Number, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = α * B * (C*x)
+*(α::Number, vt::AdjOrTransAbsVec, C::AbstractMatrix, x::AbstractVector) = α * (vt*C*x)
+*(α::RealOrComplex, vt::AdjOrTransAbsVec{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
+    (α*vt*C) * D # solves an ambiguity
+
+*(A::AbstractMatrix, x::AbstractVector, γ::Number, δ::Number) = A * x * (γ*δ)
+*(A::AbstractMatrix, B::AbstractMatrix, γ::Number, δ::Number) = A * B * (γ*δ)
+*(A::AbstractMatrix, B::AbstractMatrix, x::AbstractVector, δ::Number, ) = A * (B*x*δ)
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, x::AbstractVector, δ::Number) = (vt*B*x) * δ
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = (vt*B) * C * δ
+
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = A * B * (C*x)
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) = (vt*B) * C * D
+*(vt::AdjOrTransAbsVec, B::AbstractMatrix, C::AbstractMatrix, x::AbstractVector) = vt * B * (C*x)
+
+# Four-argument *, by size
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, δ::Number) = _tri_matmul(A,B,C,δ)
+*(α::RealOrComplex, B::AbstractMatrix{<:RealOrComplex}, C::AbstractMatrix{<:RealOrComplex}, D::AbstractMatrix{<:RealOrComplex}) =
+    _tri_matmul(B,C,D,α)
+*(A::AbstractMatrix, B::AbstractMatrix, C::AbstractMatrix, D::AbstractMatrix) =
+    _quad_matmul(A,B,C,D)
+
+function _quad_matmul(A,B,C,D)
+    c1 = _mul_cost((A,B),(C,D))
+    c2 = _mul_cost(((A,B),C),D)
+    c3 = _mul_cost(A,(B,(C,D)))
+    c4 = _mul_cost((A,(B,C)),D)
+    c5 = _mul_cost(A,((B,C),D))
+    cmin = min(c1,c2,c3,c4,c5)
+    if c1 == cmin
+        (A*B) * (C*D)
+    elseif c2 == cmin
+        ((A*B) * C) * D
+    elseif c3 == cmin
+        A * (B * (C*D))
+    elseif c4 == cmin
+        (A * (B*C)) * D
+    else
+        A * ((B*C) * D)
+    end
+end
+@inline _mul_cost(A::AbstractMatrix) = 0
+@inline _mul_cost((A,B)::Tuple) = _mul_cost(A,B)
+@inline _mul_cost(A,B) = _mul_cost(A) + _mul_cost(B) + *(_mul_sizes(A)..., last(_mul_sizes(B)))
+@inline _mul_sizes(A::AbstractMatrix) = size(A)
+@inline _mul_sizes((A,B)::Tuple) = first(_mul_sizes(A)), last(_mul_sizes(B))
diff --git a/stdlib/LinearAlgebra/src/qr.jl b/stdlib/LinearAlgebra/src/qr.jl
index 0647d8be2a7ad0..6334c8a3474ef7 100644
--- a/stdlib/LinearAlgebra/src/qr.jl
+++ b/stdlib/LinearAlgebra/src/qr.jl
@@ -34,19 +34,22 @@ The object has two fields:
 * `τ` is a vector  of length `min(m,n)` containing the coefficients ``\tau_i``.
 
 """
-struct QR{T,S<:AbstractMatrix{T}} <: Factorization{T}
+struct QR{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: Factorization{T}
     factors::S
-    τ::Vector{T}
+    τ::C
 
-    function QR{T,S}(factors, τ) where {T,S<:AbstractMatrix{T}}
+    function QR{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
         require_one_based_indexing(factors)
-        new{T,S}(factors, τ)
+        new{T,S,C}(factors, τ)
     end
 end
-QR(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = QR{T,typeof(factors)}(factors, τ)
-function QR{T}(factors::AbstractMatrix, τ::AbstractVector) where {T}
-    QR(convert(AbstractMatrix{T}, factors), convert(Vector{T}, τ))
-end
+QR(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
+    QR{T,typeof(factors),typeof(τ)}(factors, τ)
+QR{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
+    QR(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QR{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
+           QR{T,S,typeof(τ)}(factors, τ))
 
 # iteration for destructuring into components
 Base.iterate(S::QR) = (S.Q, Val(:R))
@@ -108,25 +111,62 @@ The object has two fields:
 
 [^Schreiber1989]: R Schreiber and C Van Loan, "A storage-efficient WY representation for products of Householder transformations", SIAM J Sci Stat Comput 10 (1989), 53-57. [doi:10.1137/0910005](https://doi.org/10.1137/0910005)
 """
-struct QRCompactWY{S,M<:AbstractMatrix{S}} <: Factorization{S}
+struct QRCompactWY{S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}} <: Factorization{S}
     factors::M
-    T::Matrix{S}
+    T::C
 
-    function QRCompactWY{S,M}(factors, T) where {S,M<:AbstractMatrix{S}}
+    function QRCompactWY{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
         require_one_based_indexing(factors)
-        new{S,M}(factors, T)
+        new{S,M,C}(factors, T)
     end
 end
-QRCompactWY(factors::AbstractMatrix{S}, T::Matrix{S}) where {S} = QRCompactWY{S,typeof(factors)}(factors, T)
-function QRCompactWY{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S}
-    QRCompactWY(convert(AbstractMatrix{S}, factors), convert(Matrix{S}, T))
-end
+QRCompactWY(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
+    QRCompactWY{S,typeof(factors),typeof(T)}(factors, T)
+QRCompactWY{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
+    QRCompactWY(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRCompactWY{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
+           QRCompactWY{S,M,typeof(T)}(factors, T))
 
 # iteration for destructuring into components
 Base.iterate(S::QRCompactWY) = (S.Q, Val(:R))
 Base.iterate(S::QRCompactWY, ::Val{:R}) = (S.R, Val(:done))
 Base.iterate(S::QRCompactWY, ::Val{:done}) = nothing
 
+# returns upper triangular views of all non-undef values of `qr(A).T`:
+#
+# julia> sparse(qr(A).T .== qr(A).T)
+# 36×100 SparseMatrixCSC{Bool, Int64} with 1767 stored entries:
+# ⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠀⠀⠀⠂⠛⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿
+# ⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⢀⠐⠙⢿⣿⣿⣿⣿
+# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⢀⢙⣿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠁⠀⡀⠀⠙⢿⣿⣿
+# ⠀⠀⠐⠀⠀⠀⠀⠀⠀⠀⠄⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⣿⣿⠀⠀⠀⠀⠀⠀⡀⠀⠀⢀⠀⠀⠙⢿
+# ⠀⡀⠀⠀⠀⠀⠀⠀⠂⠒⠒⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⣿⣿⠀⠀⠀⠀⠀⠀⠀⢀⠀⠀⠀⡀⠀⠀
+# ⠀⠀⠀⠀⠀⠀⠀⠀⣈⡀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠀⠙⢿⠀⠀⠀⠀⠀⠀⠀⠀⠀⡀⠂⠀⢀⠀
+#
+function _triuppers_qr(T)
+    blocksize, cols = size(T)
+    return Iterators.map(0:div(cols - 1, blocksize)) do i
+        n = min(blocksize, cols - i * blocksize)
+        return UpperTriangular(view(T, 1:n, (1:n) .+ i * blocksize))
+    end
+end
+
+function Base.hash(F::QRCompactWY, h::UInt)
+    return hash(F.factors, foldr(hash, _triuppers_qr(F.T); init=hash(QRCompactWY, h)))
+end
+function Base.:(==)(A::QRCompactWY, B::QRCompactWY)
+    return A.factors == B.factors && all(splat(==), zip(_triuppers_qr.((A.T, B.T))...))
+end
+function Base.isequal(A::QRCompactWY, B::QRCompactWY)
+    return isequal(A.factors, B.factors) && all(zip(_triuppers_qr.((A.T, B.T))...)) do (a, b)
+        isequal(a, b)::Bool
+    end
+end
+
 """
     QRPivoted <: Factorization
 
@@ -160,23 +200,26 @@ The object has three fields:
 
 * `jpvt` is an integer vector of length `n` corresponding to the permutation ``P``.
 """
-struct QRPivoted{T,S<:AbstractMatrix{T}} <: Factorization{T}
+struct QRPivoted{T,S<:AbstractMatrix{T},C<:AbstractVector{T},P<:AbstractVector{<:Integer}} <: Factorization{T}
     factors::S
-    τ::Vector{T}
-    jpvt::Vector{BlasInt}
+    τ::C
+    jpvt::P
 
-    function QRPivoted{T,S}(factors, τ, jpvt) where {T,S<:AbstractMatrix{T}}
+    function QRPivoted{T,S,C,P}(factors, τ, jpvt) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T},P<:AbstractVector{<:Integer}}
         require_one_based_indexing(factors, τ, jpvt)
-        new{T,S}(factors, τ, jpvt)
+        new{T,S,C,P}(factors, τ, jpvt)
     end
 end
-QRPivoted(factors::AbstractMatrix{T}, τ::Vector{T}, jpvt::Vector{BlasInt}) where {T} =
-    QRPivoted{T,typeof(factors)}(factors, τ, jpvt)
-function QRPivoted{T}(factors::AbstractMatrix, τ::AbstractVector, jpvt::AbstractVector) where {T}
-    QRPivoted(convert(AbstractMatrix{T}, factors),
-              convert(Vector{T}, τ),
-              convert(Vector{BlasInt}, jpvt))
-end
+QRPivoted(factors::AbstractMatrix{T}, τ::AbstractVector{T},
+          jpvt::AbstractVector{<:Integer}) where {T} =
+    QRPivoted{T,typeof(factors),typeof(τ),typeof(jpvt)}(factors, τ, jpvt)
+QRPivoted{T}(factors::AbstractMatrix, τ::AbstractVector,
+             jpvt::AbstractVector{<:Integer}) where {T} =
+    QRPivoted(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ), jpvt)
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRPivoted{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T},
+                          jpvt::AbstractVector{<:Integer}) where {T,S},
+           QRPivoted{T,S,typeof(τ),typeof(jpvt)}(factors, τ, jpvt))
 
 # iteration for destructuring into components
 Base.iterate(S::QRPivoted) = (S.Q, Val(:R))
@@ -198,7 +241,7 @@ function qrfactUnblocked!(A::AbstractMatrix{T}) where {T}
 end
 
 # Find index for columns with largest two norm
-function indmaxcolumn(A::StridedMatrix)
+function indmaxcolumn(A::AbstractMatrix)
     mm = norm(view(A, :, 1))
     ii = 1
     for i = 2:size(A, 2)
@@ -211,7 +254,7 @@ function indmaxcolumn(A::StridedMatrix)
     return ii
 end
 
-function qrfactPivotedUnblocked!(A::StridedMatrix)
+function qrfactPivotedUnblocked!(A::AbstractMatrix)
     m, n = size(A)
     piv = Vector(UnitRange{BlasInt}(1,n))
     τ = Vector{eltype(A)}(undef, min(m,n))
@@ -236,27 +279,27 @@ function qrfactPivotedUnblocked!(A::StridedMatrix)
 
         # Compute reflector of columns j
         x = view(A, j:m, j)
-        τj = LinearAlgebra.reflector!(x)
+        τj = reflector!(x)
         τ[j] = τj
 
         # Update trailing submatrix with reflector
-        LinearAlgebra.reflectorApply!(x, τj, view(A, j:m, j+1:n))
+        reflectorApply!(x, τj, view(A, j:m, j+1:n))
     end
-    return LinearAlgebra.QRPivoted{eltype(A), typeof(A)}(A, τ, piv)
+    return QRPivoted{eltype(A), typeof(A), typeof(τ), typeof(piv)}(A, τ, piv)
 end
 
 # LAPACK version
-qr!(A::StridedMatrix{<:BlasFloat}, ::Val{false} = Val(false); blocksize=36) =
+qr!(A::StridedMatrix{<:BlasFloat}, ::NoPivot; blocksize=36) =
     QRCompactWY(LAPACK.geqrt!(A, min(min(size(A)...), blocksize))...)
-qr!(A::StridedMatrix{<:BlasFloat}, ::Val{true}) = QRPivoted(LAPACK.geqp3!(A)...)
+qr!(A::StridedMatrix{<:BlasFloat}, ::ColumnNorm) = QRPivoted(LAPACK.geqp3!(A)...)
 
 # Generic fallbacks
 
 """
-    qr!(A, pivot=Val(false); blocksize)
+    qr!(A, pivot = NoPivot(); blocksize)
 
-`qr!` is the same as [`qr`](@ref) when `A` is a subtype of
-[`StridedMatrix`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
+`qr!` is the same as [`qr`](@ref) when `A` is a subtype of [`StridedMatrix`](@ref),
+but saves space by overwriting the input `A`, instead of creating a copy.
 An [`InexactError`](@ref) exception is thrown if the factorization produces a number not
 representable by the element type of `A`, e.g. for integer types.
 
@@ -271,9 +314,9 @@ julia> a = [1. 2.; 3. 4.]
  3.0  4.0
 
 julia> qr!(a)
-LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}}
+QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
 Q factor:
-2×2 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}}:
+2×2 QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}:
  -0.316228  -0.948683
  -0.948683   0.316228
 R factor:
@@ -287,19 +330,22 @@ julia> a = [1 2; 3 4]
  3  4
 
 julia> qr!(a)
-ERROR: InexactError: Int64(-3.1622776601683795)
+ERROR: InexactError: Int64(3.1622776601683795)
 Stacktrace:
 [...]
 ```
 """
-qr!(A::StridedMatrix, ::Val{false}) = qrfactUnblocked!(A)
-qr!(A::StridedMatrix, ::Val{true}) = qrfactPivotedUnblocked!(A)
-qr!(A::StridedMatrix) = qr!(A, Val(false))
+qr!(A::AbstractMatrix, ::NoPivot) = qrfactUnblocked!(A)
+qr!(A::AbstractMatrix, ::ColumnNorm) = qrfactPivotedUnblocked!(A)
+qr!(A::AbstractMatrix) = qr!(A, NoPivot())
+# TODO: Remove in Julia v2.0
+@deprecate qr!(A::AbstractMatrix, ::Val{true})  qr!(A, ColumnNorm())
+@deprecate qr!(A::AbstractMatrix, ::Val{false}) qr!(A, NoPivot())
 
 _qreltype(::Type{T}) where T = typeof(zero(T)/sqrt(abs2(one(T))))
 
 """
-    qr(A, pivot=Val(false); blocksize) -> F
+    qr(A, pivot = NoPivot(); blocksize) -> F
 
 Compute the QR factorization of the matrix `A`: an orthogonal (or unitary if `A` is
 complex-valued) matrix `Q`, and an upper triangular matrix `R` such that
@@ -310,7 +356,7 @@ A = Q R
 
 The returned object `F` stores the factorization in a packed format:
 
- - if `pivot == Val(true)` then `F` is a [`QRPivoted`](@ref) object,
+ - if `pivot == ColumnNorm()` then `F` is a [`QRPivoted`](@ref) object,
 
  - otherwise if the element type of `A` is a BLAS type ([`Float32`](@ref), [`Float64`](@ref),
    `ComplexF32` or `ComplexF64`), then `F` is a [`QRCompactWY`](@ref) object,
@@ -340,7 +386,7 @@ and `F.Q*A` are supported. A `Q` matrix can be converted into a regular matrix w
 orthogonal matrix.
 
 The block size for QR decomposition can be specified by keyword argument
-`blocksize :: Integer` when `pivot == Val(false)` and `A isa StridedMatrix{<:BlasFloat}`.
+`blocksize :: Integer` when `pivot == NoPivot()` and `A isa StridedMatrix{<:BlasFloat}`.
 It is ignored when `blocksize > minimum(size(A))`.  See [`QRCompactWY`](@ref).
 
 !!! compat "Julia 1.4"
@@ -355,9 +401,9 @@ julia> A = [3.0 -6.0; 4.0 -8.0; 0.0 1.0]
  0.0   1.0
 
 julia> F = qr(A)
-LinearAlgebra.QRCompactWY{Float64, Matrix{Float64}}
+QRCompactWY{Float64, Matrix{Float64}, Matrix{Float64}}
 Q factor:
-3×3 LinearAlgebra.QRCompactWYQ{Float64, Matrix{Float64}}:
+3×3 QRCompactWYQ{Float64, Matrix{Float64}, Matrix{Float64}}:
  -0.6   0.0   0.8
  -0.8   0.0  -0.6
   0.0  -1.0   0.0
@@ -378,10 +424,13 @@ true
 """
 function qr(A::AbstractMatrix{T}, arg...; kwargs...) where T
     require_one_based_indexing(A)
-    AA = similar(A, _qreltype(T), size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, _qreltype(T))
     return qr!(AA, arg...; kwargs...)
 end
+# TODO: remove in Julia v2.0
+@deprecate qr(A::AbstractMatrix, ::Val{false}; kwargs...) qr(A, NoPivot(); kwargs...)
+@deprecate qr(A::AbstractMatrix, ::Val{true}; kwargs...)  qr(A, ColumnNorm(); kwargs...)
+
 qr(x::Number) = qr(fill(x,1,1))
 function qr(v::AbstractVector)
     require_one_based_indexing(v)
@@ -465,6 +514,8 @@ end
 Base.propertynames(F::QRPivoted, private::Bool=false) =
     (:R, :Q, :p, :P, (private ? fieldnames(typeof(F)) : ())...)
 
+adjoint(F::Union{QR,QRPivoted,QRCompactWY}) = Adjoint(F)
+
 abstract type AbstractQ{T} <: AbstractMatrix{T} end
 
 inv(Q::AbstractQ) = Q'
@@ -475,19 +526,22 @@ inv(Q::AbstractQ) = Q'
 The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QR`](@ref) or
 [`QRPivoted`](@ref) format.
 """
-struct QRPackedQ{T,S<:AbstractMatrix{T}} <: AbstractQ{T}
+struct QRPackedQ{T,S<:AbstractMatrix{T},C<:AbstractVector{T}} <: AbstractQ{T}
     factors::S
-    τ::Vector{T}
+    τ::C
 
-    function QRPackedQ{T,S}(factors, τ) where {T,S<:AbstractMatrix{T}}
+    function QRPackedQ{T,S,C}(factors, τ) where {T,S<:AbstractMatrix{T},C<:AbstractVector{T}}
         require_one_based_indexing(factors)
-        new{T,S}(factors, τ)
+        new{T,S,C}(factors, τ)
     end
 end
-QRPackedQ(factors::AbstractMatrix{T}, τ::Vector{T}) where {T} = QRPackedQ{T,typeof(factors)}(factors, τ)
-function QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T}
-    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(Vector{T}, τ))
-end
+QRPackedQ(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T} =
+    QRPackedQ{T,typeof(factors),typeof(τ)}(factors, τ)
+QRPackedQ{T}(factors::AbstractMatrix, τ::AbstractVector) where {T} =
+    QRPackedQ(convert(AbstractMatrix{T}, factors), convert(AbstractVector{T}, τ))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRPackedQ{T,S}(factors::AbstractMatrix{T}, τ::AbstractVector{T}) where {T,S},
+           QRPackedQ{T,S,typeof(τ)}(factors, τ))
 
 """
     QRCompactWYQ <: AbstractMatrix
@@ -495,19 +549,22 @@ end
 The orthogonal/unitary ``Q`` matrix of a QR factorization stored in [`QRCompactWY`](@ref)
 format.
 """
-struct QRCompactWYQ{S, M<:AbstractMatrix{S}} <: AbstractQ{S}
+struct QRCompactWYQ{S, M<:AbstractMatrix{S}, C<:AbstractMatrix{S}} <: AbstractQ{S}
     factors::M
-    T::Matrix{S}
+    T::C
 
-    function QRCompactWYQ{S,M}(factors, T) where {S,M<:AbstractMatrix{S}}
+    function QRCompactWYQ{S,M,C}(factors, T) where {S,M<:AbstractMatrix{S},C<:AbstractMatrix{S}}
         require_one_based_indexing(factors)
-        new{S,M}(factors, T)
+        new{S,M,C}(factors, T)
     end
 end
-QRCompactWYQ(factors::AbstractMatrix{S}, T::Matrix{S}) where {S} = QRCompactWYQ{S,typeof(factors)}(factors, T)
-function QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S}
-    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(Matrix{S}, T))
-end
+QRCompactWYQ(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S} =
+    QRCompactWYQ{S,typeof(factors),typeof(T)}(factors, T)
+QRCompactWYQ{S}(factors::AbstractMatrix, T::AbstractMatrix) where {S} =
+    QRCompactWYQ(convert(AbstractMatrix{S}, factors), convert(AbstractMatrix{S}, T))
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(QRCompactWYQ{S,M}(factors::AbstractMatrix{S}, T::AbstractMatrix{S}) where {S,M},
+           QRCompactWYQ{S,M,typeof(T)}(factors, T))
 
 QRPackedQ{T}(Q::QRPackedQ) where {T} = QRPackedQ(convert(AbstractMatrix{T}, Q.factors), convert(Vector{T}, Q.τ))
 AbstractMatrix{T}(Q::QRPackedQ{T}) where {T} = Q
@@ -522,23 +579,54 @@ Array(Q::AbstractQ) = Matrix(Q)
 
 size(F::Union{QR,QRCompactWY,QRPivoted}, dim::Integer) = size(getfield(F, :factors), dim)
 size(F::Union{QR,QRCompactWY,QRPivoted}) = size(getfield(F, :factors))
-size(Q::AbstractQ, dim::Integer) = size(getfield(Q, :factors), dim == 2 ? 1 : dim)
-size(Q::AbstractQ) = size(Q, 1), size(Q, 2)
+size(Q::Union{QRCompactWYQ,QRPackedQ}, dim::Integer) =
+    size(getfield(Q, :factors), dim == 2 ? 1 : dim)
+size(Q::Union{QRCompactWYQ,QRPackedQ}) = size(Q, 1), size(Q, 2)
+
+copymutable(Q::AbstractQ{T}) where {T} = lmul!(Q, Matrix{T}(I, size(Q)))
+copy(Q::AbstractQ) = copymutable(Q)
+getindex(Q::AbstractQ, inds...) = copymutable(Q)[inds...]
+getindex(Q::AbstractQ, ::Colon, ::Colon) = copy(Q)
 
-function getindex(Q::AbstractQ, i::Integer, j::Integer)
-    x = zeros(eltype(Q), size(Q, 1))
-    x[i] = 1
+function getindex(Q::AbstractQ, ::Colon, j::Int)
     y = zeros(eltype(Q), size(Q, 2))
     y[j] = 1
-    return dot(x, lmul!(Q, y))
+    lmul!(Q, y)
+end
+
+getindex(Q::AbstractQ, i::Int, j::Int) = Q[:, j][i]
+
+# specialization avoiding the fallback using slow `getindex`
+function copyto!(dest::AbstractMatrix, src::AbstractQ)
+    copyto!(dest, I)
+    lmul!(src, dest)
+end
+# needed to resolve method ambiguities
+function copyto!(dest::PermutedDimsArray{T,2,perm}, src::AbstractQ) where {T,perm}
+    if perm == (1, 2)
+        copyto!(parent(dest), src)
+    else
+        @assert perm == (2, 1) # there are no other permutations of two indices
+        if T <: Real
+            copyto!(parent(dest), I)
+            lmul!(src', parent(dest))
+        else
+            # LAPACK does not offer inplace lmul!(transpose(Q), B) for complex Q
+            tmp = similar(parent(dest))
+            copyto!(tmp, I)
+            rmul!(tmp, src)
+            permutedims!(parent(dest), tmp, (2, 1))
+        end
+    end
+    return dest
 end
 
 ## Multiplication by Q
 ### QB
 lmul!(A::QRCompactWYQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.gemqrt!('L','N',A.factors,A.T,B)
+    LAPACK.gemqrt!('L', 'N', A.factors, A.T, B)
 lmul!(A::QRPackedQ{T,S}, B::StridedVecOrMat{T}) where {T<:BlasFloat, S<:StridedMatrix} =
-    LAPACK.ormqr!('L','N',A.factors,A.τ,B)
+    LAPACK.ormqr!('L', 'N', A.factors, A.τ, B)
 function lmul!(A::QRPackedQ, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     mA, nA = size(A.factors)
@@ -569,7 +657,7 @@ function (*)(A::AbstractQ, b::StridedVector)
     TAb = promote_type(eltype(A), eltype(b))
     Anew = convert(AbstractMatrix{TAb}, A)
     if size(A.factors, 1) == length(b)
-        bnew = copy_oftype(b, TAb)
+        bnew = copymutable_oftype(b, TAb)
     elseif size(A.factors, 2) == length(b)
         bnew = [b; zeros(TAb, size(A.factors, 1) - length(b))]
     else
@@ -581,7 +669,7 @@ function (*)(A::AbstractQ, B::StridedMatrix)
     TAB = promote_type(eltype(A), eltype(B))
     Anew = convert(AbstractMatrix{TAB}, A)
     if size(A.factors, 1) == size(B, 1)
-        Bnew = copy_oftype(B, TAB)
+        Bnew = copymutable_oftype(B, TAB)
     elseif size(A.factors, 2) == size(B, 1)
         Bnew = [B; zeros(TAB, size(A.factors, 1) - size(B,1), size(B, 2))]
     else
@@ -590,15 +678,22 @@ function (*)(A::AbstractQ, B::StridedMatrix)
     lmul!(Anew, Bnew)
 end
 
+function (*)(A::AbstractQ, b::Number)
+    TAb = promote_type(eltype(A), typeof(b))
+    dest = similar(A, TAb)
+    copyto!(dest, b*I)
+    lmul!(A, dest)
+end
+
 ### QcB
 lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L','T',A.factors,A.T,B))
+    (A = adjA.parent; LAPACK.gemqrt!('L', 'T', A.factors, A.T, B))
 lmul!(adjA::Adjoint{<:Any,<:QRCompactWYQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.gemqrt!('L','C',A.factors,A.T,B))
+    (A = adjA.parent; LAPACK.gemqrt!('L', 'C', A.factors, A.T, B))
 lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasReal,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L','T',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormqr!('L', 'T', A.factors, A.τ, B))
 lmul!(adjA::Adjoint{<:Any,<:QRPackedQ{T,S}}, B::StridedVecOrMat{T}) where {T<:BlasComplex,S<:StridedMatrix} =
-    (A = adjA.parent; LAPACK.ormqr!('L','C',A.factors,A.τ,B))
+    (A = adjA.parent; LAPACK.ormqr!('L', 'C', A.factors, A.τ, B))
 function lmul!(adjA::Adjoint{<:Any,<:QRPackedQ}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
     A = adjA.parent
@@ -628,7 +723,7 @@ end
 function *(adjQ::Adjoint{<:Any,<:AbstractQ}, B::StridedVecOrMat)
     Q = adjQ.parent
     TQB = promote_type(eltype(Q), eltype(B))
-    return lmul!(adjoint(convert(AbstractMatrix{TQB}, Q)), copy_oftype(B, TQB))
+    return lmul!(adjoint(convert(AbstractMatrix{TQB}, Q)), copymutable_oftype(B, TQB))
 end
 
 ### QBc/QcBc
@@ -649,7 +744,7 @@ end
 
 ### AQ
 rmul!(A::StridedVecOrMat{T}, B::QRCompactWYQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
-    LAPACK.gemqrt!('R','N', B.factors, B.T, A)
+    LAPACK.gemqrt!('R', 'N', B.factors, B.T, A)
 rmul!(A::StridedVecOrMat{T}, B::QRPackedQ{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
     LAPACK.ormqr!('R', 'N', B.factors, B.τ, A)
 function rmul!(A::StridedMatrix,Q::QRPackedQ)
@@ -680,18 +775,25 @@ end
 function (*)(A::StridedMatrix, Q::AbstractQ)
     TAQ = promote_type(eltype(A), eltype(Q))
 
-    return rmul!(copy_oftype(A, TAQ), convert(AbstractMatrix{TAQ}, Q))
+    return rmul!(copymutable_oftype(A, TAQ), convert(AbstractMatrix{TAQ}, Q))
+end
+
+function (*)(a::Number, B::AbstractQ)
+    TaB = promote_type(typeof(a), eltype(B))
+    dest = similar(B, TaB)
+    copyto!(dest, a*I)
+    rmul!(dest, B)
 end
 
 ### AQc
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.gemqrt!('R','T',B.factors,B.T,A))
+    (B = adjB.parent; LAPACK.gemqrt!('R', 'T', B.factors, B.T, A))
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRCompactWYQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.gemqrt!('R','C',B.factors,B.T,A))
+    (B = adjB.parent; LAPACK.gemqrt!('R', 'C', B.factors, B.T, A))
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasReal} =
-    (B = adjB.parent; LAPACK.ormqr!('R','T',B.factors,B.τ,A))
+    (B = adjB.parent; LAPACK.ormqr!('R', 'T', B.factors, B.τ, A))
 rmul!(A::StridedVecOrMat{T}, adjB::Adjoint{<:Any,<:QRPackedQ{T}}) where {T<:BlasComplex} =
-    (B = adjB.parent; LAPACK.ormqr!('R','C',B.factors,B.τ,A))
+    (B = adjB.parent; LAPACK.ormqr!('R', 'C', B.factors, B.τ, A))
 function rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRPackedQ})
     Q = adjQ.parent
     mQ, nQ = size(Q.factors)
@@ -722,8 +824,7 @@ function *(A::StridedMatrix, adjB::Adjoint{<:Any,<:AbstractQ})
     TAB = promote_type(eltype(A),eltype(B))
     BB = convert(AbstractMatrix{TAB}, B)
     if size(A,2) == size(B.factors, 1)
-        AA = similar(A, TAB, size(A))
-        copyto!(AA, A)
+        AA = copy_similar(A, TAB)
         return rmul!(AA, adjoint(BB))
     elseif size(A,2) == size(B.factors,2)
         return rmul!([A zeros(TAB, size(A, 1), size(B.factors, 1) - size(B.factors, 2))], adjoint(BB))
@@ -751,15 +852,33 @@ function *(adjA::Adjoint{<:Any,<:StridedVecOrMat}, adjQ::Adjoint{<:Any,<:Abstrac
 end
 
 ### mul!
-mul!(C::StridedVecOrMat{T}, Q::AbstractQ{T}, B::StridedVecOrMat{T}) where {T} = lmul!(Q, copyto!(C, B))
+function mul!(C::StridedVecOrMat{T}, Q::AbstractQ{T}, B::StridedVecOrMat{T}) where {T}
+    require_one_based_indexing(C, B)
+    mB = size(B, 1)
+    mC = size(C, 1)
+    if mB < mC
+        inds = CartesianIndices(B)
+        copyto!(C, inds, B, inds)
+        C[CartesianIndices((mB+1:mC, axes(C, 2)))] .= zero(T)
+        return lmul!(Q, C)
+    else
+        return lmul!(Q, copyto!(C, B))
+    end
+end
 mul!(C::StridedVecOrMat{T}, A::StridedVecOrMat{T}, Q::AbstractQ{T}) where {T} = rmul!(copyto!(C, A), Q)
 mul!(C::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:AbstractQ{T}}, B::StridedVecOrMat{T}) where {T} = lmul!(adjQ, copyto!(C, B))
 mul!(C::StridedVecOrMat{T}, A::StridedVecOrMat{T}, adjQ::Adjoint{<:Any,<:AbstractQ{T}}) where {T} = rmul!(copyto!(C, A), adjQ)
 
-ldiv!(A::QRCompactWY{T}, b::StridedVector{T}) where {T<:BlasFloat} =
-    (ldiv!(UpperTriangular(A.R), view(lmul!(adjoint(A.Q), b), 1:size(A, 2))); b)
-ldiv!(A::QRCompactWY{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-    (ldiv!(UpperTriangular(A.R), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2))); B)
+function ldiv!(A::QRCompactWY{T}, b::StridedVector{T}) where {T<:BlasFloat}
+    m,n = size(A)
+    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), b), 1:size(A, 2)))
+    return b
+end
+function ldiv!(A::QRCompactWY{T}, B::StridedMatrix{T}) where {T<:BlasFloat}
+    m,n = size(A)
+    ldiv!(UpperTriangular(view(A.factors, 1:min(m,n), 1:n)), view(lmul!(adjoint(A.Q), B), 1:size(A, 2), 1:size(B, 2)))
+    return B
+end
 
 # Julia implementation similar to xgelsy
 function ldiv!(A::QRPivoted{T}, B::StridedMatrix{T}, rcond::Real) where T<:BlasFloat
@@ -801,12 +920,12 @@ ldiv!(A::QRPivoted{T}, B::StridedVector{T}) where {T<:BlasFloat} =
     vec(ldiv!(A,reshape(B,length(B),1)))
 ldiv!(A::QRPivoted{T}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
     ldiv!(A, B, min(size(A)...)*eps(real(float(one(eltype(B))))))[1]
-function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
+function _wide_qr_ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
     m, n = size(A)
     minmn = min(m,n)
     mB, nB = size(B)
     lmul!(adjoint(A.Q), view(B, 1:m, :))
-    R = A.R
+    R = A.R # makes a copy, used as a buffer below
     @inbounds begin
         if n > m # minimum norm solution
             τ = zeros(T,m)
@@ -827,7 +946,7 @@ function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
                 end
             end
         end
-        LinearAlgebra.ldiv!(UpperTriangular(view(R, :, 1:minmn)), view(B, 1:minmn, :))
+        ldiv!(UpperTriangular(view(R, :, 1:minmn)), view(B, 1:minmn, :))
         if n > m # Apply elementary transformation to solution
             B[m + 1:mB,1:nB] .= zero(T)
             for j = 1:nB
@@ -847,10 +966,22 @@ function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
     end
     return B
 end
+
+
+function ldiv!(A::QR{T}, B::StridedMatrix{T}) where T
+    m, n = size(A)
+    m < n && return _wide_qr_ldiv!(A, B)
+
+    lmul!(adjoint(A.Q), view(B, 1:m, :))
+    R = A.factors
+    ldiv!(UpperTriangular(view(R,1:n,:)), view(B, 1:n, :))
+    return B
+end
 function ldiv!(A::QR, B::StridedVector)
     ldiv!(A, reshape(B, length(B), 1))
-    B
+    return B
 end
+
 function ldiv!(A::QRPivoted, b::StridedVector)
     ldiv!(QR(A.factors,A.τ), b)
     b[1:size(A.factors, 2)] = view(b, 1:size(A.factors, 2))[invperm(A.jpvt)]
@@ -862,28 +993,35 @@ function ldiv!(A::QRPivoted, B::StridedMatrix)
     B
 end
 
-# convenience methods
-## return only the solution of a least squares problem while avoiding promoting
-## vectors to matrices.
-_cut_B(x::AbstractVector, r::UnitRange) = length(x)  > length(r) ? x[r]   : x
-_cut_B(X::AbstractMatrix, r::UnitRange) = size(X, 1) > length(r) ? X[r,:] : X
-
-## append right hand side with zeros if necessary
-_zeros(::Type{T}, b::AbstractVector, n::Integer) where {T} = zeros(T, max(length(b), n))
-_zeros(::Type{T}, B::AbstractMatrix, n::Integer) where {T} = zeros(T, max(size(B, 1), n), size(B, 2))
+function _apply_permutation!(F::QRPivoted, B::AbstractVecOrMat)
+    # Apply permutation but only to the top part of the solution vector since
+    # it's padded with zeros for underdetermined problems
+    B[1:length(F.p), :] = B[F.p, :]
+    return B
+end
+_apply_permutation!(F::Factorization, B::AbstractVecOrMat) = B
 
-function (\)(A::Union{QR{TA},QRCompactWY{TA},QRPivoted{TA}}, B::AbstractVecOrMat{TB}) where {TA,TB}
+function ldiv!(Fadj::Adjoint{<:Any,<:Union{QR,QRCompactWY,QRPivoted}}, B::AbstractVecOrMat)
     require_one_based_indexing(B)
-    S = promote_type(TA,TB)
-    m, n = size(A)
-    m == size(B,1) || throw(DimensionMismatch("Both inputs should have the same number of rows"))
+    m, n = size(Fadj)
 
-    AA = Factorization{S}(A)
+    # We don't allow solutions overdetermined systems
+    if m > n
+        throw(DimensionMismatch("overdetermined systems are not supported"))
+    end
+    if n != size(B, 1)
+        throw(DimensionMismatch("inputs should have the same number of rows"))
+    end
+    F = parent(Fadj)
 
-    X = _zeros(S, B, n)
-    X[1:size(B, 1), :] = B
-    ldiv!(AA, X)
-    return _cut_B(X, 1:n)
+    B = _apply_permutation!(F, B)
+
+    # For underdetermined system, the triangular solve should only be applied to the top
+    # part of B that contains the rhs. For square problems, the view corresponds to B itself
+    ldiv!(LowerTriangular(adjoint(F.R)), view(B, 1:size(F.R, 2), :))
+    lmul!(F.Q, B)
+
+    return B
 end
 
 # With a real lhs and complex rhs with the same precision, we can reinterpret the complex
diff --git a/stdlib/LinearAlgebra/src/schur.jl b/stdlib/LinearAlgebra/src/schur.jl
index 403339d8b1debc..75cef93ee2f4bb 100644
--- a/stdlib/LinearAlgebra/src/schur.jl
+++ b/stdlib/LinearAlgebra/src/schur.jl
@@ -22,7 +22,7 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> F = schur(A)
-Schur{Float64, Matrix{Float64}}
+Schur{Float64, Matrix{Float64}, Vector{Float64}}
 T factor:
 2×2 Matrix{Float64}:
  3.0   9.0
@@ -47,13 +47,19 @@ julia> t == F.T && z == F.Z && vals == F.values
 true
 ```
 """
-struct Schur{Ty,S<:AbstractMatrix} <: Factorization{Ty}
+struct Schur{Ty,S<:AbstractMatrix,C<:AbstractVector} <: Factorization{Ty}
     T::S
     Z::S
-    values::Vector
-    Schur{Ty,S}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}, values::Vector) where {Ty,S} = new(T, Z, values)
+    values::C
+    Schur{Ty,S,C}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty},
+                  values::AbstractVector) where {Ty,S,C} = new(T, Z, values)
 end
-Schur(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}, values::Vector) where {Ty} = Schur{Ty, typeof(T)}(T, Z, values)
+Schur(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}, values::AbstractVector) where {Ty} =
+    Schur{Ty, typeof(T), typeof(values)}(T, Z, values)
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(Schur{Ty,S}(T::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty},
+                       values::AbstractVector) where {Ty,S},
+           Schur{Ty,S,typeof(values)}(T, Z, values))
 
 # iteration for destructuring into components
 Base.iterate(S::Schur) = (S.T, Val(:Z))
@@ -74,7 +80,7 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> F = schur!(A)
-Schur{Float64, Matrix{Float64}}
+Schur{Float64, Matrix{Float64}, Vector{Float64}}
 T factor:
 2×2 Matrix{Float64}:
  3.0   9.0
@@ -97,13 +103,20 @@ julia> A
 schur!(A::StridedMatrix{<:BlasFloat}) = Schur(LinearAlgebra.LAPACK.gees!('V', A)...)
 
 """
-    schur(A::StridedMatrix) -> F::Schur
+    schur(A) -> F::Schur
 
 Computes the Schur factorization of the matrix `A`. The (quasi) triangular Schur factor can
 be obtained from the `Schur` object `F` with either `F.Schur` or `F.T` and the
 orthogonal/unitary Schur vectors can be obtained with `F.vectors` or `F.Z` such that
 `A = F.vectors * F.Schur * F.vectors'`. The eigenvalues of `A` can be obtained with `F.values`.
 
+For real `A`, the Schur factorization is "quasitriangular", which means that it
+is upper-triangular except with 2×2 diagonal blocks for any conjugate pair
+of complex eigenvalues; this allows the factorization to be purely real even
+when there are complex eigenvalues.  To obtain the (complex) purely upper-triangular
+Schur factorization from a real quasitriangular factorization, you can use
+`Schur{Complex}(schur(A))`.
+
 Iterating the decomposition produces the components `F.T`, `F.Z`, and `F.values`.
 
 # Examples
@@ -114,7 +127,7 @@ julia> A = [5. 7.; -2. -4.]
  -2.0  -4.0
 
 julia> F = schur(A)
-Schur{Float64, Matrix{Float64}}
+Schur{Float64, Matrix{Float64}, Vector{Float64}}
 T factor:
 2×2 Matrix{Float64}:
  3.0   9.0
@@ -139,25 +152,20 @@ julia> t == F.T && z == F.Z && vals == F.values
 true
 ```
 """
-schur(A::StridedMatrix{<:BlasFloat}) = schur!(copy(A))
-schur(A::StridedMatrix{T}) where T = schur!(copy_oftype(A, eigtype(T)))
-
-schur(A::AbstractMatrix{T}) where {T} = schur!(copyto!(Matrix{eigtype(T)}(undef, size(A)...), A))
+schur(A::AbstractMatrix{T}) where {T} = schur!(copy_similar(A, eigtype(T)))
 function schur(A::RealHermSymComplexHerm)
     F = eigen(A; sortby=nothing)
     return Schur(typeof(F.vectors)(Diagonal(F.values)), F.vectors, F.values)
 end
 function schur(A::Union{UnitUpperTriangular{T},UpperTriangular{T}}) where {T}
     t = eigtype(T)
-    Z = Matrix{t}(undef, size(A)...)
-    copyto!(Z, A)
+    Z = copy_similar(A, t)
     return Schur(Z, Matrix{t}(I, size(A)), convert(Vector{t}, diag(A)))
 end
 function schur(A::Union{UnitLowerTriangular{T},LowerTriangular{T}}) where {T}
     t = eigtype(T)
     # double flip the matrix A
-    Z = Matrix{t}(undef, size(A)...)
-    copyto!(Z, A)
+    Z = copy_similar(A, t)
     reverse!(reshape(Z, :))
     # construct "reverse" identity
     n = size(A, 1)
@@ -207,6 +215,48 @@ function show(io::IO, mime::MIME{Symbol("text/plain")}, F::Schur)
     show(io, mime, F.values)
 end
 
+# convert a (standard-form) quasi-triangular real Schur factorization into a
+# triangular complex Schur factorization.
+#
+# Based on the "triangularize" function from GenericSchur.jl,
+# released under the MIT "Expat" license by @RalphAS
+function Schur{CT}(S::Schur{<:Real}) where {CT<:Complex}
+    Tr = S.T
+    T = CT.(Tr)
+    Z = CT.(S.Z)
+    n = size(T,1)
+    for j=n:-1:2
+        if !iszero(Tr[j,j-1])
+            # We want a unitary similarity transform from
+            # ┌   ┐      ┌     ┐
+            # │a b│      │w₁  x│
+            # │c a│ into │0  w₂│ where bc < 0 (a,b,c real)
+            # └   ┘      └     ┘
+            # If we write it as
+            # ┌     ┐
+            # │u  v'│
+            # │-v u'│
+            # └     ┘
+            # and make the Ansatz that u is real (so v is imaginary),
+            # we arrive at a Givens rotation:
+            # θ = atan(sqrt(-Tr[j,j-1]/Tr[j-1,j]))
+            # s,c = sin(θ), cos(θ)
+            s = sqrt(abs(Tr[j,j-1]))
+            c = sqrt(abs(Tr[j-1,j]))
+            r = hypot(s,c)
+            G = Givens(j-1,j,complex(c/r),im*(-s/r))
+            lmul!(G,T)
+            rmul!(T,G')
+            rmul!(Z,G')
+        end
+    end
+    return Schur(triu!(T),Z,diag(T))
+end
+
+Schur{Complex}(S::Schur{<:Complex}) = S
+Schur{T}(S::Schur{T}) where {T} = S
+Schur{T}(S::Schur) where {T} = Schur(T.(S.T), T.(S.Z), T <: Real && !(eltype(S.values) <: Real) ? complex(T).(S.values) : T.(S.values))
+
 """
     ordschur!(F::Schur, select::Union{Vector{Bool},BitVector}) -> F::Schur
 
@@ -254,22 +304,29 @@ with `F.α./F.β`.
 Iterating the decomposition produces the components `F.S`, `F.T`, `F.Q`, `F.Z`,
 `F.α`, and `F.β`.
 """
-struct GeneralizedSchur{Ty,M<:AbstractMatrix} <: Factorization{Ty}
+struct GeneralizedSchur{Ty,M<:AbstractMatrix,A<:AbstractVector,B<:AbstractVector{Ty}} <: Factorization{Ty}
     S::M
     T::M
-    α::Vector
-    β::Vector{Ty}
+    α::A
+    β::B
     Q::M
     Z::M
-    function GeneralizedSchur{Ty,M}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty}, alpha::Vector,
-                                    beta::Vector{Ty}, Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M}
-        new(S, T, alpha, beta, Q, Z)
+    function GeneralizedSchur{Ty,M,A,B}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
+                                        alpha::AbstractVector, beta::AbstractVector{Ty},
+                                        Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M,A,B}
+        new{Ty,M,A,B}(S, T, alpha, beta, Q, Z)
     end
 end
-function GeneralizedSchur(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty}, alpha::Vector,
-                          beta::Vector{Ty}, Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where Ty
-    GeneralizedSchur{Ty, typeof(S)}(S, T, alpha, beta, Q, Z)
+function GeneralizedSchur(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
+                          alpha::AbstractVector, beta::AbstractVector{Ty},
+                          Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where Ty
+    GeneralizedSchur{Ty, typeof(S), typeof(alpha), typeof(beta)}(S, T, alpha, beta, Q, Z)
 end
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(GeneralizedSchur{Ty,M}(S::AbstractMatrix{Ty}, T::AbstractMatrix{Ty},
+                                 alpha::AbstractVector, beta::AbstractVector{Ty},
+                                 Q::AbstractMatrix{Ty}, Z::AbstractMatrix{Ty}) where {Ty,M},
+           GeneralizedSchur{Ty,M,typeof(alpha),typeof(beta)}(S, T, alpha, beta, Q, Z))
 
 # iteration for destructuring into components
 Base.iterate(S::GeneralizedSchur) = (S.S, Val(:T))
@@ -289,7 +346,7 @@ schur!(A::StridedMatrix{T}, B::StridedMatrix{T}) where {T<:BlasFloat} =
     GeneralizedSchur(LinearAlgebra.LAPACK.gges!('V', 'V', A, B)...)
 
 """
-    schur(A::StridedMatrix, B::StridedMatrix) -> F::GeneralizedSchur
+    schur(A, B) -> F::GeneralizedSchur
 
 Computes the Generalized Schur (or QZ) factorization of the matrices `A` and `B`. The
 (quasi) triangular Schur factors can be obtained from the `Schur` object `F` with `F.S`
@@ -301,10 +358,9 @@ generalized eigenvalues of `A` and `B` can be obtained with `F.α./F.β`.
 Iterating the decomposition produces the components `F.S`, `F.T`, `F.Q`, `F.Z`,
 `F.α`, and `F.β`.
 """
-schur(A::StridedMatrix{T},B::StridedMatrix{T}) where {T<:BlasFloat} = schur!(copy(A),copy(B))
-function schur(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
+function schur(A::AbstractMatrix{TA}, B::AbstractMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA), TB)
-    return schur!(copy_oftype(A, S), copy_oftype(B, S))
+    return schur!(copy_similar(A, S), copy_similar(B, S))
 end
 
 """
diff --git a/stdlib/LinearAlgebra/src/special.jl b/stdlib/LinearAlgebra/src/special.jl
index 636c19159de739..098df785e557a3 100644
--- a/stdlib/LinearAlgebra/src/special.jl
+++ b/stdlib/LinearAlgebra/src/special.jl
@@ -28,6 +28,9 @@ Tridiagonal(A::Bidiagonal) =
 
 # conversions from SymTridiagonal to other special matrix types
 Diagonal(A::SymTridiagonal) = Diagonal(A.dv)
+
+# These can fail when ev has the same length as dv
+# TODO: Revisit when a good solution for #42477 is found
 Bidiagonal(A::SymTridiagonal) =
     iszero(A.ev) ? Bidiagonal(A.dv, A.ev, :U) :
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
@@ -47,6 +50,15 @@ Bidiagonal(A::AbstractTriangular) =
     isbanded(A, -1, 0) ? Bidiagonal(diag(A, 0), diag(A, -1), :L) : # is lower bidiagonal
         throw(ArgumentError("matrix cannot be represented as Bidiagonal"))
 
+_lucopy(A::Bidiagonal, T)     = copymutable_oftype(Tridiagonal(A), T)
+_lucopy(A::Diagonal, T)       = copymutable_oftype(Tridiagonal(A), T)
+function _lucopy(A::SymTridiagonal, T)
+    du = copy_similar(_evview(A), T)
+    dl = copy.(transpose.(du))
+    d  = copy_similar(A.dv, T)
+    return Tridiagonal(dl, d, du)
+end
+
 const ConvertibleSpecialMatrix = Union{Diagonal,Bidiagonal,SymTridiagonal,Tridiagonal,AbstractTriangular}
 const PossibleTriangularMatrix = Union{Diagonal, Bidiagonal, AbstractTriangular}
 
@@ -154,10 +166,10 @@ end
 
 # this set doesn't have the aforementioned problem
 
-+(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+B.ev, A.d+B.dv, A.du+B.ev)
--(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-B.ev, A.d-B.dv, A.du-B.ev)
-+(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(A.ev+B.dl, A.dv+B.d, A.ev+B.du)
--(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(A.ev-B.dl, A.dv-B.d, A.ev-B.du)
++(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl+_evview(B), A.d+B.dv, A.du+_evview(B))
+-(A::Tridiagonal, B::SymTridiagonal) = Tridiagonal(A.dl-_evview(B), A.d-B.dv, A.du-_evview(B))
++(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)+B.dl, A.dv+B.d, _evview(A)+B.du)
+-(A::SymTridiagonal, B::Tridiagonal) = Tridiagonal(_evview(A)-B.dl, A.dv-B.d, _evview(A)-B.du)
 
 
 function (+)(A::Diagonal, B::Tridiagonal)
@@ -202,22 +214,22 @@ end
 
 function (+)(A::Bidiagonal, B::SymTridiagonal)
     newdv = A.dv+B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(B.ev), A.dv+B.dv, A.ev+B.ev) : (A.ev+B.ev, A.dv+B.dv, typeof(newdv)(B.ev)))...)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(_evview(B)), A.dv+B.dv, A.ev+_evview(B)) : (A.ev+_evview(B), A.dv+B.dv, typeof(newdv)(_evview(B))))...)
 end
 
 function (-)(A::Bidiagonal, B::SymTridiagonal)
     newdv = A.dv-B.dv
-    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-B.ev), newdv, A.ev-B.ev) : (A.ev-B.ev, newdv, typeof(newdv)(-B.ev)))...)
+    Tridiagonal((A.uplo == 'U' ? (typeof(newdv)(-_evview(B)), newdv, A.ev-_evview(B)) : (A.ev-_evview(B), newdv, typeof(newdv)(-_evview(B))))...)
 end
 
 function (+)(A::SymTridiagonal, B::Bidiagonal)
     newdv = A.dv+B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.ev), newdv, A.ev+B.ev) : (A.ev+B.ev, newdv, typeof(newdv)(A.ev)))...)
+    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)+B.ev) : (_evview(A)+B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
 function (-)(A::SymTridiagonal, B::Bidiagonal)
     newdv = A.dv-B.dv
-    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(A.ev), newdv, A.ev-B.ev) : (A.ev-B.ev, newdv, typeof(newdv)(A.ev)))...)
+    Tridiagonal((B.uplo == 'U' ? (typeof(newdv)(_evview(A)), newdv, _evview(A)-B.ev) : (_evview(A)-B.ev, newdv, typeof(newdv)(_evview(A))))...)
 end
 
 # fixing uniform scaling problems from #28994
@@ -280,10 +292,65 @@ function (-)(A::UniformScaling, B::Diagonal{<:Number})
     Diagonal(A.λ .- B.diag)
 end
 
-rmul!(A::AbstractTriangular, adjB::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
-    (B = adjB.parent; rmul!(full!(A), adjoint(B)))
-*(A::AbstractTriangular, adjB::Adjoint{<:Any,<:Union{QRCompactWYQ,QRPackedQ}}) =
-    (B = adjB.parent; *(copyto!(similar(parent(A)), A), adjoint(B)))
+lmul!(Q::AbstractQ, B::AbstractTriangular) = lmul!(Q, full!(B))
+lmul!(Q::QRPackedQ, B::AbstractTriangular) = lmul!(Q, full!(B)) # disambiguation
+lmul!(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractTriangular) = lmul!(Q, full!(B))
+lmul!(Q::Adjoint{<:Any,<:QRPackedQ}, B::AbstractTriangular) = lmul!(Q, full!(B)) # disambiguation
+
+function _qlmul(Q::AbstractQ, B)
+    TQB = promote_type(eltype(Q), eltype(B))
+    if size(Q.factors, 1) == size(B, 1)
+        Bnew = Matrix{TQB}(B)
+    elseif size(Q.factors, 2) == size(B, 1)
+        Bnew = [Matrix{TQB}(B); zeros(TQB, size(Q.factors, 1) - size(B,1), size(B, 2))]
+    else
+        throw(DimensionMismatch("first dimension of matrix must have size either $(size(Q.factors, 1)) or $(size(Q.factors, 2))"))
+    end
+    lmul!(convert(AbstractMatrix{TQB}, Q), Bnew)
+end
+function _qlmul(adjQ::Adjoint{<:Any,<:AbstractQ}, B)
+    TQB = promote_type(eltype(adjQ), eltype(B))
+    lmul!(adjoint(convert(AbstractMatrix{TQB}, parent(adjQ))), Matrix{TQB}(B))
+end
+
+*(Q::AbstractQ, B::AbstractTriangular) = _qlmul(Q, B)
+*(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractTriangular) = _qlmul(Q, B)
+*(Q::AbstractQ, B::BiTriSym) = _qlmul(Q, B)
+*(Q::Adjoint{<:Any,<:AbstractQ}, B::BiTriSym) = _qlmul(Q, B)
+*(Q::AbstractQ, B::Diagonal) = _qlmul(Q, B)
+*(Q::Adjoint{<:Any,<:AbstractQ}, B::Diagonal) = _qlmul(Q, B)
+
+rmul!(A::AbstractTriangular, Q::AbstractQ) = rmul!(full!(A), Q)
+rmul!(A::AbstractTriangular, Q::Adjoint{<:Any,<:AbstractQ}) = rmul!(full!(A), Q)
+
+function _qrmul(A, Q::AbstractQ)
+    TAQ = promote_type(eltype(A), eltype(Q))
+    return rmul!(Matrix{TAQ}(A), convert(AbstractMatrix{TAQ}, Q))
+end
+function _qrmul(A, adjQ::Adjoint{<:Any,<:AbstractQ})
+    Q = adjQ.parent
+    TAQ = promote_type(eltype(A), eltype(Q))
+    if size(A,2) == size(Q.factors, 1)
+        Anew = Matrix{TAQ}(A)
+    elseif size(A,2) == size(Q.factors,2)
+        Anew = [Matrix{TAQ}(A) zeros(TAQ, size(A, 1), size(Q.factors, 1) - size(Q.factors, 2))]
+    else
+        throw(DimensionMismatch("matrix A has dimensions $(size(A)) but matrix B has dimensions $(size(Q))"))
+    end
+    return rmul!(Anew, adjoint(convert(AbstractMatrix{TAQ}, Q)))
+end
+
+*(A::AbstractTriangular, Q::AbstractQ) = _qrmul(A, Q)
+*(A::AbstractTriangular, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
+*(A::BiTriSym, Q::AbstractQ) = _qrmul(A, Q)
+*(A::BiTriSym, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
+*(A::Diagonal, Q::AbstractQ) = _qrmul(A, Q)
+*(A::Diagonal, Q::Adjoint{<:Any,<:AbstractQ}) = _qrmul(A, Q)
+
+*(Q::AbstractQ, B::AbstractQ) = _qlmul(Q, B)
+*(Q::Adjoint{<:Any,<:AbstractQ}, B::AbstractQ) = _qrmul(Q, B)
+*(Q::AbstractQ, B::Adjoint{<:Any,<:AbstractQ}) = _qlmul(Q, B)
+*(Q::Adjoint{<:Any,<:AbstractQ}, B::Adjoint{<:Any,<:AbstractQ}) = _qrmul(Q, B)
 
 # fill[stored]! methods
 fillstored!(A::Diagonal, x) = (fill!(A.diag, x); A)
@@ -302,17 +369,24 @@ function fill!(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}, x)
     not be filled with $x, since some of its entries are constrained."))
 end
 
-one(A::Diagonal{T}) where T = Diagonal(fill!(similar(A.diag, typeof(one(T))), one(T)))
+one(D::Diagonal) = Diagonal(one.(D.diag))
 one(A::Bidiagonal{T}) where T = Bidiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))), A.uplo)
 one(A::Tridiagonal{T}) where T = Tridiagonal(fill!(similar(A.du, typeof(one(T))), zero(one(T))), fill!(similar(A.d, typeof(one(T))), one(T)), fill!(similar(A.dl, typeof(one(T))), zero(one(T))))
 one(A::SymTridiagonal{T}) where T = SymTridiagonal(fill!(similar(A.dv, typeof(one(T))), one(T)), fill!(similar(A.ev, typeof(one(T))), zero(one(T))))
+for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular, :UnitUpperTriangular)
+    @eval one(A::$t) = $t(one(parent(A)))
+    @eval oneunit(A::$t) = $t(oneunit(parent(A)))
+end
+
+zero(D::Diagonal) = Diagonal(zero.(D.diag))
+oneunit(D::Diagonal) = Diagonal(oneunit.(D.diag))
+
 # equals and approx equals methods for structured matrices
 # SymTridiagonal == Tridiagonal is already defined in tridiag.jl
 
-# SymTridiagonal and Bidiagonal have the same field names
-==(A::Diagonal, B::Union{SymTridiagonal, Bidiagonal}) = iszero(B.ev) && A.diag == B.dv
+==(A::Diagonal, B::Bidiagonal) = iszero(B.ev) && A.diag == B.dv
+==(A::Diagonal, B::SymTridiagonal) = iszero(_evview(B)) && A.diag == B.dv
 ==(B::Bidiagonal, A::Diagonal) = A == B
-
 ==(A::Diagonal, B::Tridiagonal) = iszero(B.dl) && iszero(B.du) && A.diag == B.d
 ==(B::Tridiagonal, A::Diagonal) = A == B
 
@@ -325,5 +399,36 @@ function ==(A::Bidiagonal, B::Tridiagonal)
 end
 ==(B::Tridiagonal, A::Bidiagonal) = A == B
 
-==(A::Bidiagonal, B::SymTridiagonal) = iszero(B.ev) && iszero(A.ev) && A.dv == B.dv
+==(A::Bidiagonal, B::SymTridiagonal) = iszero(_evview(B)) && iszero(A.ev) && A.dv == B.dv
 ==(B::SymTridiagonal, A::Bidiagonal) = A == B
+
+# concatenation
+const _SpecialArrays = Union{Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal}
+const _Symmetric_DenseArrays{T,A<:Matrix} = Symmetric{T,A}
+const _Hermitian_DenseArrays{T,A<:Matrix} = Hermitian{T,A}
+const _Triangular_DenseArrays{T,A<:Matrix} = AbstractTriangular{T,A}
+const _Annotated_DenseArrays = Union{_SpecialArrays, _Triangular_DenseArrays, _Symmetric_DenseArrays, _Hermitian_DenseArrays}
+const _Annotated_Typed_DenseArrays{T} = Union{_Triangular_DenseArrays{T}, _Symmetric_DenseArrays{T}, _Hermitian_DenseArrays{T}}
+const _DenseConcatGroup = Union{Number, Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
+const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpose{T,Vector{T}}, Matrix{T}, _Annotated_Typed_DenseArrays{T}}
+
+promote_to_array_type(::Tuple{Vararg{Union{_DenseConcatGroup,UniformScaling}}}) = Matrix
+
+Base._cat(dims, xs::_DenseConcatGroup...) = Base._cat_t(dims, promote_eltype(xs...), xs...)
+vcat(A::Vector...) = Base.typed_vcat(promote_eltype(A...), A...)
+vcat(A::_DenseConcatGroup...) = Base.typed_vcat(promote_eltype(A...), A...)
+hcat(A::Vector...) = Base.typed_hcat(promote_eltype(A...), A...)
+hcat(A::_DenseConcatGroup...) = Base.typed_hcat(promote_eltype(A...), A...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::_DenseConcatGroup...) = Base.typed_hvcat(promote_eltype(xs...), rows, xs...)
+# For performance, specially handle the case where the matrices/vectors have homogeneous eltype
+Base._cat(dims, xs::_TypedDenseConcatGroup{T}...) where {T} = Base._cat_t(dims, T, xs...)
+vcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_vcat(T, A...)
+hcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hcat(T, A...)
+hvcat(rows::Tuple{Vararg{Int}}, xs::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hvcat(T, rows, xs...)
+
+# factorizations
+function cholesky(S::RealHermSymComplexHerm{<:Real,<:SymTridiagonal}, ::NoPivot = NoPivot(); check::Bool = true)
+    T = choltype(eltype(S))
+    B = Bidiagonal{T}(diag(S, 0), diag(S, S.uplo == 'U' ? 1 : -1), sym_uplo(S.uplo))
+    cholesky!(Hermitian(B, sym_uplo(S.uplo)), NoPivot(); check = check)
+end
diff --git a/stdlib/LinearAlgebra/src/structuredbroadcast.jl b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
index 57eddaabb8c325..95a18427022913 100644
--- a/stdlib/LinearAlgebra/src/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/src/structuredbroadcast.jl
@@ -70,11 +70,12 @@ find_uplo(a) = nothing
 find_uplo(bc::Broadcasted) = mapreduce(find_uplo, merge_uplos, bc.args, init=nothing)
 
 function structured_broadcast_alloc(bc, ::Type{<:Bidiagonal}, ::Type{ElType}, n) where {ElType}
-    uplo = find_uplo(bc)
+    uplo = n > 0 ? find_uplo(bc) : 'U'
+    n1 = max(n - 1, 0)
     if uplo == 'T'
-        return Tridiagonal(Array{ElType}(undef, n-1), Array{ElType}(undef, n), Array{ElType}(undef, n-1))
+        return Tridiagonal(Array{ElType}(undef, n1), Array{ElType}(undef, n), Array{ElType}(undef, n1))
     end
-    return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n-1), uplo)
+    return Bidiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n1), uplo)
 end
 structured_broadcast_alloc(bc, ::Type{<:SymTridiagonal}, ::Type{ElType}, n) where {ElType} =
     SymTridiagonal(Array{ElType}(undef, n),Array{ElType}(undef, n-1))
@@ -104,7 +105,29 @@ function isstructurepreserving(::typeof(Base.literal_pow), ::Ref{typeof(^)}, ::S
 end
 isstructurepreserving(f, args...) = false
 
-fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && iszero(v))
+"""
+    iszerodefined(T::Type)
+
+Return a `Bool` indicating whether `iszero` is well-defined for objects of type
+`T`. By default, this function returns `false` unless `T <: Number`. Note that
+this function may return `true` even if `zero(::T)` is not defined as long as
+`iszero(::T)` has a method that does not requires `zero(::T)`.
+
+This function is used to determine if mapping the elements of an array with
+a specific structure of nonzero elements preserve this structure.
+For instance, it is used to determine whether the output of
+`tuple.(Diagonal([1, 2]))` is `Diagonal([(1,), (2,)])` or
+`[(1,) (0,); (0,) (2,)]`. For this, we need to determine whether `(0,)` is
+considered to be zero. `iszero((0,))` falls back to `(0,) == zero((0,))` which
+fails as `zero(::Tuple{Int})` is not defined. However,
+`iszerodefined(::Tuple{Int})` is `false` hence we falls back to the comparison
+`(0,) == 0` which returns `false` and decides that the correct output is
+`[(1,) (0,); (0,) (2,)]`.
+"""
+iszerodefined(::Type) = false
+iszerodefined(::Type{<:Number}) = true
+
+fzeropreserving(bc) = (v = fzero(bc); !ismissing(v) && (iszerodefined(typeof(v)) ? iszero(v) : v == 0))
 # Like sparse matrices, we assume that the zero-preservation property of a broadcasted
 # expression is stable.  We can test the zero-preservability by applying the function
 # in cases where all other arguments are known scalars against a zero from the structured
diff --git a/stdlib/LinearAlgebra/src/svd.jl b/stdlib/LinearAlgebra/src/svd.jl
index 181bf281e9fba8..d0ac4d957e60d5 100644
--- a/stdlib/LinearAlgebra/src/svd.jl
+++ b/stdlib/LinearAlgebra/src/svd.jl
@@ -23,7 +23,7 @@ julia> A = [1. 0. 0. 0. 2.; 0. 0. 3. 0. 0.; 0. 0. 0. 0. 0.; 0. 2. 0. 0. 0.]
  0.0  2.0  0.0  0.0  0.0
 
 julia> F = svd(A)
-SVD{Float64, Float64, Matrix{Float64}}
+SVD{Float64, Float64, Matrix{Float64}, Vector{Float64}}
 U factor:
 4×4 Matrix{Float64}:
  0.0  1.0  0.0   0.0
@@ -56,22 +56,30 @@ julia> u == F.U && s == F.S && v == F.V
 true
 ```
 """
-struct SVD{T,Tr,M<:AbstractArray{T}} <: Factorization{T}
+struct SVD{T,Tr,M<:AbstractArray{T},C<:AbstractVector{Tr}} <: Factorization{T}
     U::M
-    S::Vector{Tr}
+    S::C
     Vt::M
-    function SVD{T,Tr,M}(U, S, Vt) where {T,Tr,M<:AbstractArray{T}}
+    function SVD{T,Tr,M,C}(U, S, Vt) where {T,Tr,M<:AbstractArray{T},C<:AbstractVector{Tr}}
         require_one_based_indexing(U, S, Vt)
-        new{T,Tr,M}(U, S, Vt)
+        new{T,Tr,M,C}(U, S, Vt)
     end
 end
-SVD(U::AbstractArray{T}, S::Vector{Tr}, Vt::AbstractArray{T}) where {T,Tr} = SVD{T,Tr,typeof(U)}(U, S, Vt)
-function SVD{T}(U::AbstractArray, S::AbstractVector{Tr}, Vt::AbstractArray) where {T,Tr}
+SVD(U::AbstractArray{T}, S::AbstractVector{Tr}, Vt::AbstractArray{T}) where {T,Tr} =
+    SVD{T,Tr,typeof(U),typeof(S)}(U, S, Vt)
+SVD{T}(U::AbstractArray, S::AbstractVector{Tr}, Vt::AbstractArray) where {T,Tr} =
     SVD(convert(AbstractArray{T}, U),
-        convert(Vector{Tr}, S),
+        convert(AbstractVector{Tr}, S),
         convert(AbstractArray{T}, Vt))
-end
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(SVD{T,Tr,M}(U::AbstractArray{T}, S::AbstractVector{Tr}, Vt::AbstractArray{T}) where {T,Tr,M},
+           SVD{T,Tr,M,typeof(S)}(U, S, Vt))
 
+SVD{T}(F::SVD) where {T} = SVD(
+    convert(AbstractMatrix{T}, F.U),
+    convert(AbstractVector{real(T)}, F.S),
+    convert(AbstractMatrix{T}, F.Vt))
+Factorization{T}(F::SVD) where {T} = SVD{T}(F)
 
 # iteration for destructuring into components
 Base.iterate(S::SVD) = (S.U, Val(:S))
@@ -89,22 +97,36 @@ default_svd_alg(A) = DivideAndConquer()
 `svd!` is the same as [`svd`](@ref), but saves space by
 overwriting the input `A`, instead of creating a copy. See documentation of [`svd`](@ref) for details.
 """
-function svd!(A::StridedMatrix{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where T<:BlasFloat
-    m,n = size(A)
+function svd!(A::StridedMatrix{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T<:BlasFloat}
+    m, n = size(A)
     if m == 0 || n == 0
-        u,s,vt = (Matrix{T}(I, m, full ? m : n), real(zeros(T,0)), Matrix{T}(I, n, n))
+        u, s, vt = (Matrix{T}(I, m, full ? m : n), real(zeros(T,0)), Matrix{T}(I, n, n))
     else
-        u,s,vt = _svd!(A,full,alg)
+        u, s, vt = _svd!(A, full, alg)
+    end
+    SVD(u, s, vt)
+end
+function svd!(A::StridedVector{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T<:BlasFloat}
+    m = length(A)
+    normA = norm(A)
+    if iszero(normA)
+        return SVD(Matrix{T}(I, m, full ? m : 1), [normA], ones(T, 1, 1))
+    elseif !full
+        normalize!(A)
+        return SVD(reshape(A, (m, 1)), [normA], ones(T, 1, 1))
+    else
+        u, s, vt = _svd!(reshape(A, (m, 1)), full, alg)
+        return SVD(u, s, vt)
     end
-    SVD(u,s,vt)
 end
 
-
-_svd!(A::StridedMatrix{T}, full::Bool, alg::Algorithm) where T<:BlasFloat = throw(ArgumentError("Unsupported value for `alg` keyword."))
-_svd!(A::StridedMatrix{T}, full::Bool, alg::DivideAndConquer) where T<:BlasFloat = LAPACK.gesdd!(full ? 'A' : 'S', A)
-function _svd!(A::StridedMatrix{T}, full::Bool, alg::QRIteration) where T<:BlasFloat
+_svd!(A::StridedMatrix{T}, full::Bool, alg::Algorithm) where {T<:BlasFloat} =
+    throw(ArgumentError("Unsupported value for `alg` keyword."))
+_svd!(A::StridedMatrix{T}, full::Bool, alg::DivideAndConquer) where {T<:BlasFloat} =
+    LAPACK.gesdd!(full ? 'A' : 'S', A)
+function _svd!(A::StridedMatrix{T}, full::Bool, alg::QRIteration) where {T<:BlasFloat}
     c = full ? 'A' : 'S'
-    u,s,vt = LAPACK.gesvd!(c, c, A)
+    u, s, vt = LAPACK.gesvd!(c, c, A)
 end
 
 
@@ -121,10 +143,10 @@ The singular values in `S` are sorted in descending order.
 
 Iterating the decomposition produces the components `U`, `S`, and `V`.
 
-If `full = false` (default), a "thin" SVD is returned. For a ``M
-\\times N`` matrix `A`, in the full factorization `U` is `M \\times M`
-and `V` is `N \\times N`, while in the thin factorization `U` is `M
-\\times K` and `V` is `N \\times K`, where `K = \\min(M,N)` is the
+If `full = false` (default), a "thin" SVD is returned. For an ``M
+\\times N`` matrix `A`, in the full factorization `U` is ``M \\times M``
+and `V` is ``N \\times N``, while in the thin factorization `U` is ``M
+\\times K`` and `V` is ``N \\times K``, where ``K = \\min(M,N)`` is the
 number of singular values.
 
 If `alg = DivideAndConquer()` a divide-and-conquer algorithm is used to calculate the SVD.
@@ -153,8 +175,12 @@ julia> Uonly == U
 true
 ```
 """
-function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where T
-    svd!(copy_oftype(A, eigtype(T)), full = full, alg = alg)
+function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T}
+    svd!(copymutable_oftype(A, eigtype(T)), full = full, alg = alg)
+end
+function svd(A::StridedVecOrMat{T}; full::Bool = false, alg::Algorithm = default_svd_alg(A)) where {T <: Union{Float16,Complex{Float16}}}
+    A = svd!(copymutable_oftype(A, eigtype(T)), full = full, alg = alg)
+    return SVD{T}(A)
 end
 function svd(x::Number; full::Bool = false, alg::Algorithm = default_svd_alg(x))
     SVD(x == 0 ? fill(one(x), 1, 1) : fill(x/abs(x), 1, 1), [abs(x)], fill(one(x), 1, 1))
@@ -190,7 +216,7 @@ See also [`svdvals`](@ref) and [`svd`](@ref).
 ```
 """
 svdvals!(A::StridedMatrix{T}) where {T<:BlasFloat} = isempty(A) ? zeros(real(T), 0) : LAPACK.gesdd!('N', A)[2]
-svdvals(A::AbstractMatrix{<:BlasFloat}) = svdvals!(copy(A))
+svdvals!(A::StridedVector{T}) where {T<:BlasFloat} = svdvals!(reshape(A, (length(A), 1)))
 
 """
     svdvals(A)
@@ -214,14 +240,19 @@ julia> svdvals(A)
  0.0
 ```
 """
-svdvals(A::AbstractMatrix{T}) where T = svdvals!(copy_oftype(A, eigtype(T)))
+svdvals(A::AbstractMatrix{T}) where {T} = svdvals!(copymutable_oftype(A, eigtype(T)))
+svdvals(A::AbstractVector{T}) where {T} = [convert(eigtype(T), norm(A))]
+svdvals(A::AbstractMatrix{<:BlasFloat}) = svdvals!(copy(A))
+svdvals(A::AbstractVector{<:BlasFloat}) = [norm(A)]
 svdvals(x::Number) = abs(x)
 svdvals(S::SVD{<:Any,T}) where {T} = (S.S)::Vector{T}
 
-# SVD least squares
+### SVD least squares ###
 function ldiv!(A::SVD{T}, B::StridedVecOrMat) where T
+    m, n = size(A)
     k = searchsortedlast(A.S, eps(real(T))*A.S[1], rev=true)
-    view(A.Vt,1:k,:)' * (view(A.S,1:k) .\ (view(A.U,:,1:k)' * B))
+    mul!(view(B, 1:n, :), view(A.Vt, 1:k, :)', view(A.S, 1:k) .\ (view(A.U, :, 1:k)' * _cut_B(B, 1:m)))
+    return B
 end
 
 function inv(F::SVD{T}) where T
@@ -235,7 +266,11 @@ end
 size(A::SVD, dim::Integer) = dim == 1 ? size(A.U, dim) : size(A.Vt, dim)
 size(A::SVD) = (size(A, 1), size(A, 2))
 
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::SVD{<:Any,<:Any,<:AbstractArray})
+function adjoint(F::SVD)
+    return SVD(F.Vt', F.S, F.U')
+end
+
+function show(io::IO, mime::MIME{Symbol("text/plain")}, F::SVD{<:Any,<:Any,<:AbstractArray,<:AbstractVector})
     summary(io, F); println(io)
     println(io, "U factor:")
     show(io, mime, F.U)
@@ -287,7 +322,7 @@ julia> B = [0. 1.; 1. 0.]
  1.0  0.0
 
 julia> F = svd(A, B)
-GeneralizedSVD{Float64, Matrix{Float64}}
+GeneralizedSVD{Float64, Matrix{Float64}, Float64, Vector{Float64}}
 U factor:
 2×2 Matrix{Float64}:
  1.0  0.0
@@ -301,13 +336,13 @@ Q factor:
  1.0  0.0
  0.0  1.0
 D1 factor:
-2×2 SparseArrays.SparseMatrixCSC{Float64, Int64} with 2 stored entries:
- 0.707107   ⋅
-  ⋅        0.707107
+2×2 Matrix{Float64}:
+ 0.707107  0.0
+ 0.0       0.707107
 D2 factor:
-2×2 SparseArrays.SparseMatrixCSC{Float64, Int64} with 2 stored entries:
- 0.707107   ⋅
-  ⋅        0.707107
+2×2 Matrix{Float64}:
+ 0.707107  0.0
+ 0.0       0.707107
 R0 factor:
 2×2 Matrix{Float64}:
  1.41421   0.0
@@ -320,28 +355,30 @@ julia> F.U*F.D1*F.R0*F.Q'
 
 julia> F.V*F.D2*F.R0*F.Q'
 2×2 Matrix{Float64}:
- 0.0  1.0
- 1.0  0.0
+ -0.0  1.0
+  1.0  0.0
 ```
 """
-struct GeneralizedSVD{T,S} <: Factorization{T}
+struct GeneralizedSVD{T,S<:AbstractMatrix,Tr,C<:AbstractVector{Tr}} <: Factorization{T}
     U::S
     V::S
     Q::S
-    a::Vector
-    b::Vector
+    a::C
+    b::C
     k::Int
     l::Int
     R::S
-    function GeneralizedSVD{T,S}(U::AbstractMatrix{T}, V::AbstractMatrix{T}, Q::AbstractMatrix{T},
-                                 a::Vector, b::Vector, k::Int, l::Int, R::AbstractMatrix{T}) where {T,S}
-        new(U, V, Q, a, b, k, l, R)
+    function GeneralizedSVD{T,S,Tr,C}(U, V, Q, a, b, k, l, R) where {T,S<:AbstractMatrix{T},Tr,C<:AbstractVector{Tr}}
+        new{T,S,Tr,C}(U, V, Q, a, b, k, l, R)
     end
 end
-function GeneralizedSVD(U::AbstractMatrix{T}, V::AbstractMatrix{T}, Q::AbstractMatrix{T},
-                        a::Vector, b::Vector, k::Int, l::Int, R::AbstractMatrix{T}) where T
-    GeneralizedSVD{T,typeof(U)}(U, V, Q, a, b, k, l, R)
-end
+GeneralizedSVD(U::AbstractMatrix{T}, V::AbstractMatrix{T}, Q::AbstractMatrix{T},
+              a::AbstractVector{Tr}, b::AbstractVector{Tr}, k::Int, l::Int,
+              R::AbstractMatrix{T}) where {T, Tr} =
+    GeneralizedSVD{T,typeof(U),Tr,typeof(a)}(U, V, Q, a, b, k, l, R)
+# backwards-compatible constructors (remove with Julia 2.0)
+@deprecate(GeneralizedSVD{T,S}(U, V, Q, a, b, k, l, R) where {T, S},
+           GeneralizedSVD{T,S,real(T),typeof(a)}(U, V, Q, a, b, k, l, R))
 
 # iteration for destructuring into components
 Base.iterate(S::GeneralizedSVD) = (S.U, Val(:V))
@@ -422,7 +459,7 @@ true
 """
 function svd(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA),TB)
-    return svd!(copy_oftype(A, S), copy_oftype(B, S))
+    return svd!(copymutable_oftype(A, S), copymutable_oftype(B, S))
 end
 # This method can be heavily optimized but it is probably not critical
 # and might introduce bugs or inconsistencies relative to the 1x1 matrix
@@ -494,7 +531,6 @@ end
 Return the generalized singular values from the generalized singular value
 decomposition of `A` and `B`, saving space by overwriting `A` and `B`.
 See also [`svd`](@ref) and [`svdvals`](@ref).
-```
 """
 function svdvals!(A::StridedMatrix{T}, B::StridedMatrix{T}) where T<:BlasFloat
     # xggsvd3 replaced xggsvd in LAPACK 3.6.0
@@ -533,7 +569,7 @@ julia> svdvals(A, B)
 """
 function svdvals(A::StridedMatrix{TA}, B::StridedMatrix{TB}) where {TA,TB}
     S = promote_type(eigtype(TA), TB)
-    return svdvals!(copy_oftype(A, S), copy_oftype(B, S))
+    return svdvals!(copymutable_oftype(A, S), copymutable_oftype(B, S))
 end
 svdvals(x::Number, y::Number) = abs(x/y)
 
diff --git a/stdlib/LinearAlgebra/src/symmetric.jl b/stdlib/LinearAlgebra/src/symmetric.jl
index 9d66cabd38b60b..7347dd6f78639e 100644
--- a/stdlib/LinearAlgebra/src/symmetric.jl
+++ b/stdlib/LinearAlgebra/src/symmetric.jl
@@ -7,6 +7,7 @@ struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
 
     function Symmetric{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}}
         require_one_based_indexing(data)
+        (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,S}(data, uplo)
     end
 end
@@ -88,6 +89,7 @@ struct Hermitian{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
 
     function Hermitian{T,S}(data, uplo) where {T,S<:AbstractMatrix{<:T}}
         require_one_based_indexing(data)
+        (uplo != 'U' && uplo != 'L') && throw_uplo()
         new{T,S}(data, uplo)
     end
 end
@@ -236,6 +238,9 @@ function setindex!(A::Hermitian, v, i::Integer, j::Integer)
     end
 end
 
+diag(A::Symmetric) = symmetric.(diag(parent(A)), sym_uplo(A.uplo))
+diag(A::Hermitian) = hermitian.(diag(parent(A)), sym_uplo(A.uplo))
+
 # For A<:Union{Symmetric,Hermitian}, similar(A[, neweltype]) should yield a matrix with the same
 # symmetry type, uplo flag, and underlying storage type as A. The following methods cover these cases.
 similar(A::Symmetric, ::Type{T}) where {T} = Symmetric(similar(parent(A), T), ifelse(A.uplo == 'U', :U, :L))
@@ -353,8 +358,6 @@ real(A::Symmetric) = Symmetric(real(A.data), sym_uplo(A.uplo))
 real(A::Hermitian) = Hermitian(real(A.data), sym_uplo(A.uplo))
 imag(A::Symmetric) = Symmetric(imag(A.data), sym_uplo(A.uplo))
 
-Base.copy(A::Adjoint{<:Any,<:Hermitian}) = copy(A.parent)
-Base.copy(A::Transpose{<:Any,<:Symmetric}) = copy(A.parent)
 Base.copy(A::Adjoint{<:Any,<:Symmetric}) =
     Symmetric(copy(adjoint(A.parent.data)), ifelse(A.parent.uplo == 'U', :L, :U))
 Base.copy(A::Transpose{<:Any,<:Hermitian}) =
@@ -479,6 +482,10 @@ for f in (:+, :-)
     @eval begin
         $f(A::Hermitian, B::Symmetric{<:Real}) = $f(A, Hermitian(parent(B), sym_uplo(B.uplo)))
         $f(A::Symmetric{<:Real}, B::Hermitian) = $f(Hermitian(parent(A), sym_uplo(A.uplo)), B)
+        $f(A::SymTridiagonal, B::Symmetric) = Symmetric($f(A, B.data), sym_uplo(B.uplo))
+        $f(A::Symmetric, B::SymTridiagonal) = Symmetric($f(A.data, B), sym_uplo(A.uplo))
+        $f(A::SymTridiagonal{<:Real}, B::Hermitian) = Hermitian($f(A, B.data), sym_uplo(B.uplo))
+        $f(A::Hermitian, B::SymTridiagonal{<:Real}) = Hermitian($f(A.data, B), sym_uplo(A.uplo))
     end
 end
 
@@ -593,36 +600,6 @@ function dot(x::AbstractVector, A::RealHermSymComplexHerm, y::AbstractVector)
     return r
 end
 
-# Fallbacks to avoid generic_matvecmul!/generic_matmatmul!
-## Symmetric{<:Number} and Hermitian{<:Real} are invariant to transpose; peel off the t
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::AbstractVector) = transA.parent * B
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::AbstractMatrix) = transA.parent * B
-*(A::AbstractMatrix, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = A * transB.parent
-## Hermitian{<:Number} and Symmetric{<:Real} are invariant to adjoint; peel off the c
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::AbstractVector) = adjA.parent * B
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::AbstractMatrix) = adjA.parent * B
-*(A::AbstractMatrix, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * adjB.parent
-
-# ambiguities with transposed AbstractMatrix methods in linalg/matmul.jl
-*(transA::Transpose{<:Any,<:RealHermSym}, transB::Transpose{<:Any,<:RealHermSym}) = transA * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSym}, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = transA * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = transA.parent * transB.parent
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transB::Transpose{<:Any,<:RealHermSym}) = transA.parent * transB
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, transB::Transpose{<:Any,<:RealHermSymComplexHerm}) = transA.parent * transB
-*(transA::Transpose{<:Any,<:RealHermSymComplexHerm}, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = transA * transB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSym}, adjB::Adjoint{<:Any,<:RealHermSym}) = adjA * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjA.parent * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSym}, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjA * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexSym}, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = adjA * adjB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjB::Adjoint{<:Any,<:RealHermSym}) = adjA.parent * adjB
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, adjB::Adjoint{<:Any,<:RealHermSymComplexSym}) = adjA.parent * adjB
-
-# ambiguities with AbstractTriangular
-*(transA::Transpose{<:Any,<:RealHermSymComplexSym}, B::AbstractTriangular) = transA.parent * B
-*(A::AbstractTriangular, transB::Transpose{<:Any,<:RealHermSymComplexSym}) = A * transB.parent
-*(adjA::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::AbstractTriangular) = adjA.parent * B
-*(A::AbstractTriangular, adjB::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * adjB.parent
-
 # Scaling with Number
 *(A::Symmetric, x::Number) = Symmetric(A.data*x, sym_uplo(A.uplo))
 *(x::Number, A::Symmetric) = Symmetric(x*A.data, sym_uplo(A.uplo))
@@ -669,175 +646,7 @@ end
 inv(A::Hermitian{<:Any,<:StridedMatrix}) = Hermitian(_inv(A), sym_uplo(A.uplo))
 inv(A::Symmetric{<:Any,<:StridedMatrix}) = Symmetric(_inv(A), sym_uplo(A.uplo))
 
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) = Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
-
-function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
-    T = eltype(A)
-    S = eigtype(T)
-    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
-end
-
-eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) = Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
-
-"""
-    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> Eigen
-
-Computes the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-The [`UnitRange`](@ref) `irange` specifies indices of the sorted eigenvalues to search for.
-
-!!! note
-    If `irange` is not `1:n`, where `n` is the dimension of `A`, then the returned factorization
-    will be a *truncated* factorization.
-"""
-function eigen(A::RealHermSymComplexHerm, irange::UnitRange)
-    T = eltype(A)
-    S = eigtype(T)
-    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), irange)
-end
-
-eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
-    Eigen(LAPACK.syevr!('V', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)...)
-
-"""
-    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> Eigen
-
-Computes the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
-which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
-matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
-
-Iterating the decomposition produces the components `F.values` and `F.vectors`.
-
-The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
-
-`vl` is the lower bound of the window of eigenvalues to search for, and `vu` is the upper bound.
-
-!!! note
-    If [`vl`, `vu`] does not contain all eigenvalues of `A`, then the returned factorization
-    will be a *truncated* factorization.
-"""
-function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    T = eltype(A)
-    S = eigtype(T)
-    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
-end
-
-eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}) =
-    LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
-
-function eigvals(A::RealHermSymComplexHerm)
-    T = eltype(A)
-    S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A))
-end
-
-"""
-    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-`irange` is a range of eigenvalue *indices* to search for - for instance, the 2nd to 8th eigenvalues.
-"""
-eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
-    LAPACK.syevr!('N', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)[1]
-
-"""
-    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
-
-Returns the eigenvalues of `A`. It is possible to calculate only a subset of the
-eigenvalues by specifying a [`UnitRange`](@ref) `irange` covering indices of the sorted eigenvalues,
-e.g. the 2nd to 8th eigenvalues.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A, 2:2)
-1-element Vector{Float64}:
- 0.9999999999999996
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-```
-"""
-function eigvals(A::RealHermSymComplexHerm, irange::UnitRange)
-    T = eltype(A)
-    S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), irange)
-end
-
-"""
-    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
-
-Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
-`vl` is the lower bound of the interval to search for eigenvalues, and `vu` is the upper bound.
-"""
-eigvals!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
-    LAPACK.syevr!('N', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)[1]
-
-"""
-    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
-
-Returns the eigenvalues of `A`. It is possible to calculate only a subset of the eigenvalues
-by specifying a pair `vl` and `vu` for the lower and upper boundaries of the eigenvalues.
-
-# Examples
-```jldoctest
-julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
-3×3 SymTridiagonal{Float64, Vector{Float64}}:
- 1.0  2.0   ⋅
- 2.0  2.0  3.0
-  ⋅   3.0  1.0
-
-julia> eigvals(A, -1, 2)
-1-element Vector{Float64}:
- 1.0000000000000009
-
-julia> eigvals(A)
-3-element Vector{Float64}:
- -2.1400549446402604
-  1.0000000000000002
-  5.140054944640259
-```
-"""
-function eigvals(A::RealHermSymComplexHerm, vl::Real, vh::Real)
-    T = eltype(A)
-    S = eigtype(T)
-    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
-end
-
-eigmax(A::RealHermSymComplexHerm{<:Real,<:StridedMatrix}) = eigvals(A, size(A, 1):size(A, 1))[1]
-eigmin(A::RealHermSymComplexHerm{<:Real,<:StridedMatrix}) = eigvals(A, 1:1)[1]
-
-function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
-    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
-    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
-    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
-end
-
-eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}) where {T<:BlasReal,S<:StridedMatrix} =
-    LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}) where {T<:BlasComplex,S<:StridedMatrix} =
-    LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
-
-eigvecs(A::HermOrSym) = eigvecs(eigen(A))
-
-function svd(A::RealHermSymComplexHerm, full::Bool=false)
+function svd(A::RealHermSymComplexHerm; full::Bool=false)
     vals, vecs = eigen(A)
     I = sortperm(vals; by=abs, rev=true)
     permute!(vals, I)
@@ -931,6 +740,12 @@ for func in (:exp, :cos, :sin, :tan, :cosh, :sinh, :tanh, :atan, :asinh, :atanh)
     end
 end
 
+function cis(A::Union{RealHermSymComplexHerm,SymTridiagonal{<:Real}})
+    F = eigen(A)
+    # The returned matrix is unitary, and is complex-symmetric for real A
+    return F.vectors .* cis.(F.values') * F.vectors'
+end
+
 for func in (:acos, :asin)
     @eval begin
         function ($func)(A::HermOrSym{<:Real})
@@ -1039,35 +854,3 @@ for func in (:log, :sqrt)
         end
     end
 end
-
-# disambiguation methods: *(Adj of RealHermSymComplexHerm, Trans of RealHermSymComplexSym) and symmetric partner
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A.parent * B.parent
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A.parent * B.parent
-# disambiguation methods: *(Adj/Trans of AbsVec/AbsMat, Adj/Trans of RealHermSymComplex{Herm|Sym})
-*(A::Adjoint{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractVector}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractVector}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-# disambiguation methods: *(Adj/Trans of RealHermSymComplex{Herm|Sym}, Adj/Trans of AbsVec/AbsMat)
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractMatrix}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:AbstractMatrix}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractMatrix}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Transpose{<:Any,<:AbstractVector}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Transpose{<:Any,<:AbstractMatrix}) = A.parent * B
-
-# disambiguation methods: *(Adj/Trans of AbsTri or RealHermSymComplex{Herm|Sym}, Adj/Trans of other)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:RealHermSymComplexHerm}) = A * B.parent
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:RealHermSymComplexSym}) = A * B.parent
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Adjoint{<:Any,<:AbstractTriangular}) = A.parent * B
-*(A::Adjoint{<:Any,<:RealHermSymComplexHerm}, B::Transpose{<:Any,<:AbstractTriangular}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Adjoint{<:Any,<:AbstractTriangular}) = A.parent * B
-*(A::Transpose{<:Any,<:RealHermSymComplexSym}, B::Transpose{<:Any,<:AbstractTriangular}) = A.parent * B
diff --git a/stdlib/LinearAlgebra/src/symmetriceigen.jl b/stdlib/LinearAlgebra/src/symmetriceigen.jl
new file mode 100644
index 00000000000000..8d90f370e06b69
--- /dev/null
+++ b/stdlib/LinearAlgebra/src/symmetriceigen.jl
@@ -0,0 +1,281 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Eigensolvers for symmetric and Hermitian matrices
+eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing) =
+    Eigen(sorteig!(LAPACK.syevr!('V', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)..., sortby)...)
+
+function eigen(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
+    T = eltype(A)
+    S = eigtype(T)
+    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
+end
+
+eigen!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
+    Eigen(LAPACK.syevr!('V', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)...)
+
+"""
+    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> Eigen
+
+Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
+which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
+matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
+
+Iterating the decomposition produces the components `F.values` and `F.vectors`.
+
+The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
+
+The [`UnitRange`](@ref) `irange` specifies indices of the sorted eigenvalues to search for.
+
+!!! note
+    If `irange` is not `1:n`, where `n` is the dimension of `A`, then the returned factorization
+    will be a *truncated* factorization.
+"""
+function eigen(A::RealHermSymComplexHerm, irange::UnitRange)
+    T = eltype(A)
+    S = eigtype(T)
+    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), irange)
+end
+
+eigen!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
+    Eigen(LAPACK.syevr!('V', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)...)
+
+"""
+    eigen(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> Eigen
+
+Compute the eigenvalue decomposition of `A`, returning an [`Eigen`](@ref) factorization object `F`
+which contains the eigenvalues in `F.values` and the eigenvectors in the columns of the
+matrix `F.vectors`. (The `k`th eigenvector can be obtained from the slice `F.vectors[:, k]`.)
+
+Iterating the decomposition produces the components `F.values` and `F.vectors`.
+
+The following functions are available for `Eigen` objects: [`inv`](@ref), [`det`](@ref), and [`isposdef`](@ref).
+
+`vl` is the lower bound of the window of eigenvalues to search for, and `vu` is the upper bound.
+
+!!! note
+    If [`vl`, `vu`] does not contain all eigenvalues of `A`, then the returned factorization
+    will be a *truncated* factorization.
+"""
+function eigen(A::RealHermSymComplexHerm, vl::Real, vh::Real)
+    T = eltype(A)
+    S = eigtype(T)
+    eigen!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
+end
+
+function eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}; sortby::Union{Function,Nothing}=nothing)
+    vals = LAPACK.syevr!('N', 'A', A.uplo, A.data, 0.0, 0.0, 0, 0, -1.0)[1]
+    !isnothing(sortby) && sort!(vals, by=sortby)
+    return vals
+end
+
+function eigvals(A::RealHermSymComplexHerm; sortby::Union{Function,Nothing}=nothing)
+    T = eltype(A)
+    S = eigtype(T)
+    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), sortby=sortby)
+end
+
+"""
+    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
+
+Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
+`irange` is a range of eigenvalue *indices* to search for - for instance, the 2nd to 8th eigenvalues.
+"""
+eigvals!(A::RealHermSymComplexHerm{<:BlasReal,<:StridedMatrix}, irange::UnitRange) =
+    LAPACK.syevr!('N', 'I', A.uplo, A.data, 0.0, 0.0, irange.start, irange.stop, -1.0)[1]
+
+"""
+    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, irange::UnitRange) -> values
+
+Return the eigenvalues of `A`. It is possible to calculate only a subset of the
+eigenvalues by specifying a [`UnitRange`](@ref) `irange` covering indices of the sorted eigenvalues,
+e.g. the 2nd to 8th eigenvalues.
+
+# Examples
+```jldoctest
+julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
+3×3 SymTridiagonal{Float64, Vector{Float64}}:
+ 1.0  2.0   ⋅
+ 2.0  2.0  3.0
+  ⋅   3.0  1.0
+
+julia> eigvals(A, 2:2)
+1-element Vector{Float64}:
+ 0.9999999999999996
+
+julia> eigvals(A)
+3-element Vector{Float64}:
+ -2.1400549446402604
+  1.0000000000000002
+  5.140054944640259
+```
+"""
+function eigvals(A::RealHermSymComplexHerm, irange::UnitRange)
+    T = eltype(A)
+    S = eigtype(T)
+    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), irange)
+end
+
+"""
+    eigvals!(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
+
+Same as [`eigvals`](@ref), but saves space by overwriting the input `A`, instead of creating a copy.
+`vl` is the lower bound of the interval to search for eigenvalues, and `vu` is the upper bound.
+"""
+eigvals!(A::RealHermSymComplexHerm{T,<:StridedMatrix}, vl::Real, vh::Real) where {T<:BlasReal} =
+    LAPACK.syevr!('N', 'V', A.uplo, A.data, convert(T, vl), convert(T, vh), 0, 0, -1.0)[1]
+
+"""
+    eigvals(A::Union{SymTridiagonal, Hermitian, Symmetric}, vl::Real, vu::Real) -> values
+
+Return the eigenvalues of `A`. It is possible to calculate only a subset of the eigenvalues
+by specifying a pair `vl` and `vu` for the lower and upper boundaries of the eigenvalues.
+
+# Examples
+```jldoctest
+julia> A = SymTridiagonal([1.; 2.; 1.], [2.; 3.])
+3×3 SymTridiagonal{Float64, Vector{Float64}}:
+ 1.0  2.0   ⋅
+ 2.0  2.0  3.0
+  ⋅   3.0  1.0
+
+julia> eigvals(A, -1, 2)
+1-element Vector{Float64}:
+ 1.0000000000000009
+
+julia> eigvals(A)
+3-element Vector{Float64}:
+ -2.1400549446402604
+  1.0000000000000002
+  5.140054944640259
+```
+"""
+function eigvals(A::RealHermSymComplexHerm, vl::Real, vh::Real)
+    T = eltype(A)
+    S = eigtype(T)
+    eigvals!(S != T ? convert(AbstractMatrix{S}, A) : copy(A), vl, vh)
+end
+
+eigmax(A::RealHermSymComplexHerm{<:Real,<:StridedMatrix}) = eigvals(A, size(A, 1):size(A, 1))[1]
+eigmin(A::RealHermSymComplexHerm{<:Real,<:StridedMatrix}) = eigvals(A, 1:1)[1]
+
+function eigen!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
+    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
+    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
+end
+function eigen!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
+    vals, vecs, _ = LAPACK.sygvd!(1, 'V', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))
+    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
+end
+
+function eigen!(A::RealHermSymComplexHerm{T,S}, B::AbstractMatrix{T}; sortby::Union{Function,Nothing}=nothing) where {T<:Number,S<:StridedMatrix}
+    U = cholesky(B).U
+    vals, w = eigen!(UtiAUi!(A, U))
+    vecs = U \ w
+    GeneralizedEigen(sorteig!(vals, vecs, sortby)...)
+end
+
+# Perform U' \ A / U in-place.
+UtiAUi!(As::Symmetric, Utr::UpperTriangular) = Symmetric(_UtiAsymUi!(As.uplo, parent(As), parent(Utr)), sym_uplo(As.uplo))
+UtiAUi!(As::Hermitian, Utr::UpperTriangular) = Hermitian(_UtiAsymUi!(As.uplo, parent(As), parent(Utr)), sym_uplo(As.uplo))
+UtiAUi!(As::Symmetric, Udi::Diagonal) = Symmetric(_UtiAsymUi_diag!(As.uplo, parent(As), Udi), sym_uplo(As.uplo))
+UtiAUi!(As::Hermitian, Udi::Diagonal) = Hermitian(_UtiAsymUi_diag!(As.uplo, parent(As), Udi), sym_uplo(As.uplo))
+
+# U is upper triangular
+function _UtiAsymUi!(uplo, A, U)
+    n = size(A, 1)
+    μ⁻¹ = 1 / U[1, 1]
+    αμ⁻² = A[1, 1] * μ⁻¹' * μ⁻¹
+
+    # Update (1, 1) element
+    A[1, 1] = αμ⁻²
+    if n > 1
+        Unext = view(U, 2:n, 2:n)
+
+        if uplo === 'U'
+            # Update submatrix
+            for j in 2:n, i in 2:j
+                A[i, j] = (
+                    A[i, j]
+                    - μ⁻¹' * U[1, j] * A[1, i]'
+                    - μ⁻¹ * A[1, j] * U[1, i]'
+                    + αμ⁻² * U[1, j] * U[1, i]'
+                )
+            end
+
+            # Update vector
+            for j in 2:n
+                A[1, j] = A[1, j] * μ⁻¹' - U[1, j] * αμ⁻²
+            end
+            ldiv!(view(A', 2:n, 1), UpperTriangular(Unext)', view(A', 2:n, 1))
+        else
+            # Update submatrix
+            for j in 2:n, i in 2:j
+                A[j, i] = (
+                    A[j, i]
+                    - μ⁻¹ * A[i, 1]' * U[1, j]'
+                    - μ⁻¹' * U[1, i] * A[j, 1]
+                    + αμ⁻² * U[1, i] * U[1, j]'
+                )
+            end
+
+            # Update vector
+            for j in 2:n
+                A[j, 1] = A[j, 1] * μ⁻¹ - U[1, j]' * αμ⁻²
+            end
+            ldiv!(view(A, 2:n, 1), UpperTriangular(Unext)', view(A, 2:n, 1))
+        end
+
+        # Recurse
+        _UtiAsymUi!(uplo, view(A, 2:n, 2:n), Unext)
+    end
+
+    return A
+end
+
+# U is diagonal
+function _UtiAsymUi_diag!(uplo, A, U)
+    n = size(A, 1)
+    μ⁻¹ = 1 / U[1, 1]
+    αμ⁻² = A[1, 1] * μ⁻¹' * μ⁻¹
+
+    # Update (1, 1) element
+    A[1, 1] = αμ⁻²
+    if n > 1
+        Unext = view(U, 2:n, 2:n)
+
+        if uplo === 'U'
+            # No need to update any submatrix when U is diagonal
+
+            # Update vector
+            for j in 2:n
+                A[1, j] = A[1, j] * μ⁻¹'
+            end
+            ldiv!(view(A', 2:n, 1), Diagonal(Unext)', view(A', 2:n, 1))
+        else
+            # No need to update any submatrix when U is diagonal
+
+            # Update vector
+            for j in 2:n
+                A[j, 1] = A[j, 1] * μ⁻¹
+            end
+            ldiv!(view(A, 2:n, 1), Diagonal(Unext)', view(A, 2:n, 1))
+        end
+
+        # Recurse
+        _UtiAsymUi!(uplo, view(A, 2:n, 2:n), Unext)
+    end
+
+    return A
+end
+
+function eigvals!(A::HermOrSym{T,S}, B::HermOrSym{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasReal,S<:StridedMatrix}
+    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
+    isnothing(sortby) || sort!(vals, by=sortby)
+    return vals
+end
+function eigvals!(A::Hermitian{T,S}, B::Hermitian{T,S}; sortby::Union{Function,Nothing}=nothing) where {T<:BlasComplex,S<:StridedMatrix}
+    vals = LAPACK.sygvd!(1, 'N', A.uplo, A.data, B.uplo == A.uplo ? B.data : copy(B.data'))[1]
+    isnothing(sortby) || sort!(vals, by=sortby)
+    return vals
+end
+eigvecs(A::HermOrSym) = eigvecs(eigen(A))
diff --git a/stdlib/LinearAlgebra/src/transpose.jl b/stdlib/LinearAlgebra/src/transpose.jl
index 89af811265e284..c7ca6339aac6a9 100644
--- a/stdlib/LinearAlgebra/src/transpose.jl
+++ b/stdlib/LinearAlgebra/src/transpose.jl
@@ -163,7 +163,7 @@ julia> A = [1 2im; -3im 4]
  0-3im  4+0im
 
 julia> T = transpose(A)
-2×2 Transpose{Complex{Int64}, Matrix{Complex{Int64}}}:
+2×2 transpose(::Matrix{Complex{Int64}}) with eltype Complex{Int64}:
  1+0im  0-3im
  0+2im  4+0im
 
@@ -181,11 +181,11 @@ Base.copy(A::Adjoint{<:Any,<:AbstractMatrix}) = adjoint!(similar(A.parent, rever
 function copy_transpose!(B::AbstractVecOrMat, ir_dest::AbstractRange{Int}, jr_dest::AbstractRange{Int},
                          A::AbstractVecOrMat, ir_src::AbstractRange{Int}, jr_src::AbstractRange{Int})
     if length(ir_dest) != length(jr_src)
-        throw(ArgumentError(string("source and destination must have same size (got ",
+        throw(ArgumentError(LazyString("source and destination must have same size (got ",
                                    length(jr_src)," and ",length(ir_dest),")")))
     end
     if length(jr_dest) != length(ir_src)
-        throw(ArgumentError(string("source and destination must have same size (got ",
+        throw(ArgumentError(LazyString("source and destination must have same size (got ",
                                    length(ir_src)," and ",length(jr_dest),")")))
     end
     @boundscheck checkbounds(B, ir_dest, jr_dest)
diff --git a/stdlib/LinearAlgebra/src/triangular.jl b/stdlib/LinearAlgebra/src/triangular.jl
index 0ab6164cacf474..d939a5df0da017 100644
--- a/stdlib/LinearAlgebra/src/triangular.jl
+++ b/stdlib/LinearAlgebra/src/triangular.jl
@@ -33,6 +33,8 @@ for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular,
         end
         Matrix(A::$t{T}) where {T} = Matrix{T}(A)
 
+        AbstractMatrix{T}(A::$t) where {T} = $t{T}(A)
+
         size(A::$t, d) = size(A.data, d)
         size(A::$t) = size(A.data)
 
@@ -50,13 +52,13 @@ for t in (:LowerTriangular, :UnitLowerTriangular, :UpperTriangular,
     end
 end
 
-similar(A::Union{Adjoint{Ti,Tv}, Transpose{Ti,Tv}}, ::Type{T}) where {T,Ti,Tv<:LowerTriangular} =
+similar(A::UpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
     UpperTriangular(similar(parent(parent(A)), T))
-similar(A::Union{Adjoint{Ti,Tv}, Transpose{Ti,Tv}}, ::Type{T}) where {T,Ti,Tv<:UnitLowerTriangular} =
+similar(A::UnitUpperTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
     UnitUpperTriangular(similar(parent(parent(A)), T))
-similar(A::Union{Adjoint{Ti,Tv}, Transpose{Ti,Tv}}, ::Type{T}) where {T,Ti,Tv<:UpperTriangular} =
+similar(A::LowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
     LowerTriangular(similar(parent(parent(A)), T))
-similar(A::Union{Adjoint{Ti,Tv}, Transpose{Ti,Tv}}, ::Type{T}) where {T,Ti,Tv<:UnitUpperTriangular} =
+similar(A::UnitLowerTriangular{<:Any,<:Union{Adjoint{Ti}, Transpose{Ti}}}, ::Type{T}) where {T,Ti} =
     UnitLowerTriangular(similar(parent(parent(A)), T))
 
 
@@ -285,23 +287,11 @@ end
 
 function istril(A::Union{LowerTriangular,UnitLowerTriangular}, k::Integer=0)
     k >= 0 && return true
-    m, n = size(A)
-    for j in max(1, k + 2):n
-        for i in 1:min(j - k - 1, m)
-            iszero(A[i, j]) || return false
-        end
-    end
-    return true
+    return _istril(A, k)
 end
 function istriu(A::Union{UpperTriangular,UnitUpperTriangular}, k::Integer=0)
     k <= 0 && return true
-    m, n = size(A)
-    for j in 1:min(n, m + k - 1)
-        for i in max(1, j - k + 1):m
-            iszero(A[i, j]) || return false
-        end
-    end
-    return true
+    return _istriu(A, k)
 end
 istril(A::Adjoint) = istriu(A.parent)
 istril(A::Transpose) = istriu(A.parent)
@@ -393,25 +383,14 @@ function tril!(A::UnitLowerTriangular, k::Integer=0)
     return tril!(LowerTriangular(A.data),k)
 end
 
-# TODO consolidate
-adjoint(A::LowerTriangular) = Adjoint(A)
-adjoint(A::UpperTriangular) = Adjoint(A)
-adjoint(A::UnitLowerTriangular) = Adjoint(A)
-adjoint(A::UnitUpperTriangular) = Adjoint(A)
-transpose(A::LowerTriangular) = Transpose(A)
-transpose(A::UpperTriangular) = Transpose(A)
-transpose(A::UnitLowerTriangular) = Transpose(A)
-transpose(A::UnitUpperTriangular) = Transpose(A)
-
-# TODO consolidate
-Base.copy(A::Adjoint{<:Any,<:LowerTriangular}) = adjoint!(copy(A.parent))
-Base.copy(A::Adjoint{<:Any,<:UpperTriangular}) = adjoint!(copy(A.parent))
-Base.copy(A::Adjoint{<:Any,<:UnitLowerTriangular}) = adjoint!(copy(A.parent))
-Base.copy(A::Adjoint{<:Any,<:UnitUpperTriangular}) = adjoint!(copy(A.parent))
-Base.copy(A::Transpose{<:Any,<:LowerTriangular}) = transpose!(copy(A.parent))
-Base.copy(A::Transpose{<:Any,<:UpperTriangular}) = transpose!(copy(A.parent))
-Base.copy(A::Transpose{<:Any,<:UnitLowerTriangular}) = transpose!(copy(A.parent))
-Base.copy(A::Transpose{<:Any,<:UnitUpperTriangular}) = transpose!(copy(A.parent))
+adjoint(A::LowerTriangular) = UpperTriangular(adjoint(A.data))
+adjoint(A::UpperTriangular) = LowerTriangular(adjoint(A.data))
+adjoint(A::UnitLowerTriangular) = UnitUpperTriangular(adjoint(A.data))
+adjoint(A::UnitUpperTriangular) = UnitLowerTriangular(adjoint(A.data))
+transpose(A::LowerTriangular) = UpperTriangular(transpose(A.data))
+transpose(A::UpperTriangular) = LowerTriangular(transpose(A.data))
+transpose(A::UnitLowerTriangular) = UnitUpperTriangular(transpose(A.data))
+transpose(A::UnitUpperTriangular) = UnitLowerTriangular(transpose(A.data))
 
 transpose!(A::LowerTriangular) = UpperTriangular(copytri!(A.data, 'L', false, true))
 transpose!(A::UnitLowerTriangular) = UnitUpperTriangular(copytri!(A.data, 'L', false, true))
@@ -695,39 +674,42 @@ mul!(C::AbstractMatrix, A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractVec
 mul!(C::AbstractVecOrMat, A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractVecOrMat}) =
     (B = adjB.parent; lmul!(A, adjoint!(C, B)))
 
-# The three methods for each op are neceesary to avoid ambiguities with definitions in matmul.jl
+# The three methods are neceesary to avoid ambiguities with definitions in matmul.jl
 mul!(C::AbstractVector  , A::AbstractTriangular, B::AbstractVector)   = lmul!(A, copyto!(C, B))
 mul!(C::AbstractMatrix  , A::AbstractTriangular, B::AbstractVecOrMat) = lmul!(A, copyto!(C, B))
 mul!(C::AbstractVecOrMat, A::AbstractTriangular, B::AbstractVecOrMat) = lmul!(A, copyto!(C, B))
-function mul!(C::AbstractVector, adjA::Adjoint{<:Any,<:AbstractTriangular}, B::AbstractVector)
-    return lmul!(adjA, copyto!(C, B))
-end
-function mul!(C::AbstractMatrix, adjA::Adjoint{<:Any,<:AbstractTriangular}, B::AbstractVecOrMat)
-    return lmul!(adjA, copyto!(C, B))
-end
-function mul!(C::AbstractVecOrMat, adjA::Adjoint{<:Any,<:AbstractTriangular}, B::AbstractVecOrMat)
-    return lmul!(adjA, copyto!(C, B))
-end
-function mul!(C::AbstractVector, transA::Transpose{<:Any,<:AbstractTriangular}, B::AbstractVector)
-    return lmul!(transA, copyto!(C, B))
-end
-function mul!(C::AbstractMatrix, transA::Transpose{<:Any,<:AbstractTriangular}, B::AbstractVecOrMat)
-    return lmul!(transA, copyto!(C, B))
-end
-function mul!(C::AbstractVecOrMat, transA::Transpose{<:Any,<:AbstractTriangular}, B::AbstractVecOrMat)
-    return lmul!(transA, copyto!(C, B))
-end
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
-    mul!(C, A, copy(B), alpha, beta)
-@inline mul!(C::AbstractMatrix, A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
-    mul!(C, A, copy(B), alpha, beta)
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
+
+@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
     mul!(C, A, copy(B), alpha, beta)
-@inline mul!(C::AbstractMatrix, A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
+@inline mul!(C::AbstractMatrix, A::AbstractTriangular, B::Transpose{<:Any,<:AbstractVecOrMat}, alpha::Number, beta::Number) =
     mul!(C, A, copy(B), alpha, beta)
-mul!(C::AbstractVector, A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractVecOrMat}) = throw(MethodError(mul!, (C, A, B)))
-mul!(C::AbstractVector, A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractVecOrMat}) = throw(MethodError(mul!, (C, A, B)))
+mul!(C::AbstractVector, A::AbstractTriangular{<:Any,<:Adjoint}, B::Transpose{<:Any,<:AbstractVecOrMat}) = throw(MethodError(mul!, (C, A, B)))
+mul!(C::AbstractVector, A::AbstractTriangular{<:Any,<:Transpose}, B::Transpose{<:Any,<:AbstractVecOrMat}) = throw(MethodError(mul!, (C, A, B)))
+
+# preserve triangular structure in in-place multiplication
+for (cty, aty, bty) in ((:UpperTriangular, :UpperTriangular, :UpperTriangular),
+                        (:UpperTriangular, :UpperTriangular, :UnitUpperTriangular),
+                        (:UpperTriangular, :UnitUpperTriangular, :UpperTriangular),
+                        (:UnitUpperTriangular, :UnitUpperTriangular, :UnitUpperTriangular),
+                        (:LowerTriangular, :LowerTriangular, :LowerTriangular),
+                        (:LowerTriangular, :LowerTriangular, :UnitLowerTriangular),
+                        (:LowerTriangular, :UnitLowerTriangular, :LowerTriangular),
+                        (:UnitLowerTriangular, :UnitLowerTriangular, :UnitLowerTriangular))
+    @eval function mul!(C::$cty, A::$aty, B::$bty)
+        lmul!(A, copyto!(parent(C), B))
+        return C
+    end
+
+    @eval @inline function mul!(C::$cty, A::$aty, B::$bty, alpha::Number, beta::Number)
+        if isone(alpha) && iszero(beta)
+            return mul!(C, A, B)
+        else
+            return generic_matmatmul!(C, 'N', 'N', A, B, MulAddMul(alpha, beta))
+        end
+    end
+end
 
+# direct multiplication/division
 for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
                             (:UnitLowerTriangular, 'L', 'U'),
                             (:UpperTriangular, 'U', 'N'),
@@ -736,12 +718,6 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
         # Vector multiplication
         lmul!(A::$t{T,<:StridedMatrix}, b::StridedVector{T}) where {T<:BlasFloat} =
             BLAS.trmv!($uploc, 'N', $isunitc, A.data, b)
-        lmul!(transA::Transpose{<:Any,<:$t{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasFloat} =
-            (A = transA.parent; BLAS.trmv!($uploc, 'T', $isunitc, A.data, b))
-        lmul!(adjA::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasReal} =
-            (A = adjA.parent; BLAS.trmv!($uploc, 'T', $isunitc, A.data, b))
-        lmul!(adjA::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasComplex} =
-            (A = adjA.parent; BLAS.trmv!($uploc, 'C', $isunitc, A.data, b))
 
         # Matrix multiplication
         lmul!(A::$t{T,<:StridedMatrix}, B::StridedMatrix{T}) where {T<:BlasFloat} =
@@ -749,39 +725,13 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
         rmul!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
             BLAS.trmm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
 
-        lmul!(transA::Transpose{<:Any,<:$t{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasFloat} =
-            (A = transA.parent; BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), A.data, B))
-        lmul!(adjA::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasComplex} =
-            (A = adjA.parent; BLAS.trmm!('L', $uploc, 'C', $isunitc, one(T), A.data, B))
-        lmul!(adjA::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasReal} =
-            (A = adjA.parent; BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), A.data, B))
-
-        rmul!(A::StridedMatrix{T}, transB::Transpose{<:Any,<:$t{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            (B = transB.parent; BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), B.data, A))
-        rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            (B = adjB.parent; BLAS.trmm!('R', $uploc, 'C', $isunitc, one(T), B.data, A))
-        rmul!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            (B = adjB.parent; BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), B.data, A))
-
         # Left division
         ldiv!(A::$t{T,<:StridedMatrix}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
             LAPACK.trtrs!($uploc, 'N', $isunitc, A.data, B)
-        ldiv!(transA::Transpose{<:Any,<:$t{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
-            (A = transA.parent; LAPACK.trtrs!($uploc, 'T', $isunitc, A.data, B))
-        ldiv!(adjA::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
-            (A = adjA.parent; LAPACK.trtrs!($uploc, 'T', $isunitc, A.data, B))
-        ldiv!(adjA::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
-            (A = adjA.parent; LAPACK.trtrs!($uploc, 'C', $isunitc, A.data, B))
 
         # Right division
         rdiv!(A::StridedMatrix{T}, B::$t{T,<:StridedMatrix}) where {T<:BlasFloat} =
             BLAS.trsm!('R', $uploc, 'N', $isunitc, one(T), B.data, A)
-        rdiv!(A::StridedMatrix{T}, transB::Transpose{<:Any,<:$t{T,<:StridedMatrix}}) where {T<:BlasFloat} =
-            (B = transB.parent; BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), B.data, A))
-        rdiv!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}) where {T<:BlasReal} =
-            (B = adjB.parent; BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), B.data, A))
-        rdiv!(A::StridedMatrix{T}, adjB::Adjoint{<:Any,<:$t{T,<:StridedMatrix}}) where {T<:BlasComplex} =
-            (B = adjB.parent; BLAS.trsm!('R', $uploc, 'C', $isunitc, one(T), B.data, A))
 
         # Matrix inverse
         inv!(A::$t{T,S}) where {T<:BlasFloat,S<:StridedMatrix} =
@@ -805,6 +755,53 @@ for (t, uploc, isunitc) in ((:LowerTriangular, 'L', 'N'),
     end
 end
 
+# adjoint/transpose multiplication ('uploc' reversed)
+for (t, uploc, isunitc) in ((:LowerTriangular, 'U', 'N'),
+                            (:UnitLowerTriangular, 'U', 'U'),
+                            (:UpperTriangular, 'L', 'N'),
+                            (:UnitUpperTriangular, 'L', 'U'))
+    @eval begin
+        # Vector multiplication
+        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasFloat} =
+            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
+        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasReal} =
+            BLAS.trmv!($uploc, 'T', $isunitc, parent(parent(A)), b)
+        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, b::StridedVector{T}) where {T<:BlasComplex} =
+            BLAS.trmv!($uploc, 'C', $isunitc, parent(parent(A)), b)
+
+        # Matrix multiplication
+        lmul!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasFloat} =
+            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
+        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasComplex} =
+            BLAS.trmm!('L', $uploc, 'C', $isunitc, one(T), parent(parent(A)), B)
+        lmul!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedMatrix{T}) where {T<:BlasReal} =
+            BLAS.trmm!('L', $uploc, 'T', $isunitc, one(T), parent(parent(A)), B)
+
+        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
+            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
+        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
+            BLAS.trmm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
+        rmul!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
+            BLAS.trmm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
+
+        # Left division
+        ldiv!(A::$t{<:Any,<:Transpose{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasFloat} =
+            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
+        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasReal} =
+            LAPACK.trtrs!($uploc, 'T', $isunitc, parent(parent(A)), B)
+        ldiv!(A::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}, B::StridedVecOrMat{T}) where {T<:BlasComplex} =
+            LAPACK.trtrs!($uploc, 'C', $isunitc, parent(parent(A)), B)
+
+        # Right division
+        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Transpose{T,<:StridedMatrix}}) where {T<:BlasFloat} =
+            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
+        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasReal} =
+            BLAS.trsm!('R', $uploc, 'T', $isunitc, one(T), parent(parent(B)), A)
+        rdiv!(A::StridedMatrix{T}, B::$t{<:Any,<:Adjoint{T,<:StridedMatrix}}) where {T<:BlasComplex} =
+            BLAS.trsm!('R', $uploc, 'C', $isunitc, one(T), parent(parent(B)), A)
+    end
+end
+
 function inv(A::LowerTriangular{T}) where T
     S = typeof((zero(T)*one(T) + zero(T))/one(T))
     LowerTriangular(ldiv!(convert(AbstractArray{S}, A), Matrix{S}(I, size(A, 1), size(A, 1))))
@@ -963,145 +960,83 @@ function lmul!(A::UnitLowerTriangular, B::StridedVecOrMat)
     B
 end
 
-function lmul!(adjA::Adjoint{<:Any,<:UpperTriangular}, B::StridedVecOrMat)
-    A = adjA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = m:-1:1
-            Bij = A.data[i,i]'B[i,j]
-            for k = 1:i - 1
-                Bij += A.data[k,i]'B[k,j]
+for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
+    @eval begin
+        function lmul!(xA::UpperTriangular{<:Any,<:$t}, B::StridedVecOrMat)
+            A = xA.data
+            m, n = size(B, 1), size(B, 2)
+            if m != size(A, 1)
+                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
             end
-            B[i,j] = Bij
-        end
-    end
-    B
-end
-
-function lmul!(adjA::Adjoint{<:Any,<:UnitUpperTriangular}, B::StridedVecOrMat)
-    A = adjA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = m:-1:1
-            Bij = B[i,j]
-            for k = 1:i - 1
-                Bij += A.data[k,i]'B[k,j]
+            pA = parent(A)
+            for j = 1:n
+                for i = 1:m
+                    Bij = $tfun(pA[i,i])*B[i,j]
+                    for k = i + 1:m
+                        Bij += $tfun(pA[k,i])*B[k,j]
+                    end
+                    B[i,j] = Bij
+                end
             end
-            B[i,j] = Bij
+            B
         end
-    end
-    B
-end
 
-function lmul!(adjA::Adjoint{<:Any,<:LowerTriangular}, B::StridedVecOrMat)
-    A = adjA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = 1:m
-            Bij = A.data[i,i]'B[i,j]
-            for k = i + 1:m
-                Bij += A.data[k,i]'B[k,j]
+        function lmul!(xA::UnitUpperTriangular{<:Any,<:$t}, B::StridedVecOrMat)
+            A = xA.data
+            m, n = size(B, 1), size(B, 2)
+            if m != size(A, 1)
+                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
             end
-            B[i,j] = Bij
-        end
-    end
-    B
-end
-function lmul!(adjA::Adjoint{<:Any,<:UnitLowerTriangular}, B::StridedVecOrMat)
-    A = adjA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = 1:m
-            Bij = B[i,j]
-            for k = i + 1:m
-                Bij += A.data[k,i]'B[k,j]
+            pA = parent(A)
+            for j = 1:n
+                for i = 1:m
+                    Bij = B[i,j]
+                    for k = i + 1:m
+                        Bij += $tfun(pA[k,i])*B[k,j]
+                    end
+                    B[i,j] = Bij
+                end
             end
-            B[i,j] = Bij
+            B
         end
-    end
-    B
-end
 
-function lmul!(transA::Transpose{<:Any,<:UpperTriangular}, B::StridedVecOrMat)
-    A = transA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = m:-1:1
-            Bij = transpose(A.data[i,i]) * B[i,j]
-            for k = 1:i - 1
-                Bij += transpose(A.data[k,i]) * B[k,j]
+        function lmul!(xA::LowerTriangular{<:Any,<:$t}, B::StridedVecOrMat)
+            A = xA.data
+            m, n = size(B, 1), size(B, 2)
+            if m != size(A, 1)
+                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
             end
-            B[i,j] = Bij
-        end
-    end
-    B
-end
-function lmul!(transA::Transpose{<:Any,<:UnitUpperTriangular}, B::StridedVecOrMat)
-    A = transA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = m:-1:1
-            Bij = B[i,j]
-            for k = 1:i - 1
-                Bij += transpose(A.data[k,i]) * B[k,j]
+            pA = parent(A)
+            for j = 1:n
+                for i = m:-1:1
+                    Bij = $tfun(pA[i,i])*B[i,j]
+                    for k = 1:i - 1
+                        Bij += $tfun(pA[k,i])*B[k,j]
+                    end
+                    B[i,j] = Bij
+                end
             end
-            B[i,j] = Bij
+            B
         end
-    end
-    B
-end
-
-function lmul!(transA::Transpose{<:Any,<:LowerTriangular}, B::StridedVecOrMat)
-    A = transA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = 1:m
-            Bij = transpose(A.data[i,i]) * B[i,j]
-            for k = i + 1:m
-                Bij += transpose(A.data[k,i]) * B[k,j]
+        function lmul!(xA::UnitLowerTriangular{<:Any,<:$t}, B::StridedVecOrMat)
+            A = xA.data
+            m, n = size(B, 1), size(B, 2)
+            if m != size(A, 1)
+                throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
             end
-            B[i,j] = Bij
-        end
-    end
-    B
-end
-function lmul!(transA::Transpose{<:Any,<:UnitLowerTriangular}, B::StridedVecOrMat)
-    A = transA.parent
-    m, n = size(B, 1), size(B, 2)
-    if m != size(A, 1)
-        throw(DimensionMismatch("right hand side B needs first dimension of size $(size(A,1)), has size $m"))
-    end
-    for j = 1:n
-        for i = 1:m
-            Bij = B[i,j]
-            for k = i + 1:m
-                Bij += transpose(A.data[k,i]) * B[k,j]
+            pA = parent(A)
+            for j = 1:n
+                for i = m:-1:1
+                    Bij = B[i,j]
+                    for k = 1:i - 1
+                        Bij += $tfun(pA[k,i])*B[k,j]
+                    end
+                    B[i,j] = Bij
+                end
             end
-            B[i,j] = Bij
+            B
         end
     end
-    B
 end
 
 function rmul!(A::StridedMatrix, B::UpperTriangular)
@@ -1170,364 +1105,236 @@ function rmul!(A::StridedMatrix, B::UnitLowerTriangular)
     A
 end
 
-function rmul!(A::StridedMatrix, adjB::Adjoint{<:Any,<:UpperTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]*B.data[j,j]'
-            for k = j + 1:n
-                Aij += A[i,k]*B.data[j,k]'
+for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
+    @eval begin
+        function rmul!(A::StridedMatrix, B::UpperTriangular{<:Any,<:$t})
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij
-        end
-    end
-    A
-end
-
-function rmul!(A::StridedMatrix, adjB::Adjoint{<:Any,<:UnitUpperTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij += A[i,k]*B.data[j,k]'
+            pB = parent(parent(B))
+            for i = 1:m
+                for j = n:-1:1
+                    Aij = A[i,j]*$tfun(pB[j,j])
+                    for k = 1:j - 1
+                        Aij += A[i,k]*$tfun(pB[j,k])
+                    end
+                    A[i,j] = Aij
+                end
             end
-            A[i,j] = Aij
+            A
         end
-    end
-    A
-end
 
-function rmul!(A::StridedMatrix, adjB::Adjoint{<:Any,<:LowerTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]*B.data[j,j]'
-            for k = 1:j - 1
-                Aij += A[i,k]*B.data[j,k]'
+        function rmul!(A::StridedMatrix, B::UnitUpperTriangular{<:Any,<:$t})
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij
-        end
-    end
-    A
-end
-
-function rmul!(A::StridedMatrix, adjB::Adjoint{<:Any,<:UnitLowerTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij += A[i,k]*B.data[j,k]'
+            pB = parent(parent(B))
+            for i = 1:m
+                for j = n:-1:1
+                    Aij = A[i,j]
+                    for k = 1:j - 1
+                        Aij += A[i,k]*$tfun(pB[j,k])
+                    end
+                    A[i,j] = Aij
+                end
             end
-            A[i,j] = Aij
+            A
         end
-    end
-    A
-end
 
-function rmul!(A::StridedMatrix, transB::Transpose{<:Any,<:UpperTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j] * transpose(B.data[j,j])
-            for k = j + 1:n
-                Aij += A[i,k] * transpose(B.data[j,k])
+        function rmul!(A::StridedMatrix, B::LowerTriangular{<:Any,<:$t})
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij
-        end
-    end
-    A
-end
-function rmul!(A::StridedMatrix, transB::Transpose{<:Any,<:UnitUpperTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij += A[i,k] * transpose(B.data[j,k])
+            pB = parent(parent(B))
+            for i = 1:m
+                for j = 1:n
+                    Aij = A[i,j]*$tfun(pB[j,j])
+                    for k = j + 1:n
+                        Aij += A[i,k]*$tfun(pB[j,k])
+                    end
+                    A[i,j] = Aij
+                end
             end
-            A[i,j] = Aij
+            A
         end
-    end
-    A
-end
 
-function rmul!(A::StridedMatrix, transB::Transpose{<:Any,<:LowerTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j] * transpose(B.data[j,j])
-            for k = 1:j - 1
-                Aij += A[i,k] * transpose(B.data[j,k])
+        function rmul!(A::StridedMatrix, B::UnitLowerTriangular{<:Any,<:$t})
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij
-        end
-    end
-    A
-end
-
-function rmul!(A::StridedMatrix, transB::Transpose{<:Any,<:UnitLowerTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij += A[i,k] * transpose(B.data[j,k])
+            pB = parent(parent(B))
+            for i = 1:m
+                for j = 1:n
+                    Aij = A[i,j]
+                    for k = j + 1:n
+                        Aij += A[i,k]*$tfun(pB[j,k])
+                    end
+                    A[i,j] = Aij
+                end
             end
-            A[i,j] = Aij
+            A
         end
     end
-    A
 end
 
 #Generic solver using naive substitution
-# manually hoisting x[j] significantly improves performance as of Dec 2015
+# manually hoisting b[j] significantly improves performance as of Dec 2015
 # manually eliding bounds checking significantly improves performance as of Dec 2015
 # directly indexing A.data rather than A significantly improves performance as of Dec 2015
 # replacing repeated references to A.data with [Adata = A.data and references to Adata]
 # does not significantly impact performance as of Dec 2015
 # replacing repeated references to A.data[j,j] with [Ajj = A.data[j,j] and references to Ajj]
 # does not significantly impact performance as of Dec 2015
-function naivesub!(A::UpperTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::UpperTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in n:-1:1
         iszero(A.data[j,j]) && throw(SingularException(j))
-        xj = x[j] = A.data[j,j] \ b[j]
+        bj = b[j] = A.data[j,j] \ b[j]
         for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-function naivesub!(A::UnitUpperTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::UnitUpperTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in n:-1:1
-        xj = x[j] = b[j]
+        bj = b[j]
         for i in j-1:-1:1 # counterintuitively 1:j-1 performs slightly better
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-function naivesub!(A::LowerTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::LowerTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in 1:n
         iszero(A.data[j,j]) && throw(SingularException(j))
-        xj = x[j] = A.data[j,j] \ b[j]
+        bj = b[j] = A.data[j,j] \ b[j]
         for i in j+1:n
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-function naivesub!(A::UnitLowerTriangular, b::AbstractVector, x::AbstractVector = b)
-    require_one_based_indexing(A, b, x)
+function ldiv!(A::UnitLowerTriangular, b::AbstractVector)
+    require_one_based_indexing(A, b)
     n = size(A, 2)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("second dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+    if !(n == length(b))
+        throw(DimensionMismatch("second dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
     end
     @inbounds for j in 1:n
-        xj = x[j] = b[j]
+        bj = b[j]
         for i in j+1:n
-            b[i] -= A.data[i,j] * xj
+            b[i] -= A.data[i,j] * bj
         end
     end
-    x
+    return b
 end
-# in the following transpose and conjugate transpose naive substitution variants,
-# accumulating in z rather than b[j] significantly improves performance as of Dec 2015
-function ldiv!(transA::Transpose{<:Any,<:LowerTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(transA, b, x)
-    A = transA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
+function ldiv!(A::AbstractTriangular, B::AbstractMatrix)
+    require_one_based_indexing(A, B)
+    nA, mA = size(A)
+    n = size(B, 1)
+    if nA != n
+        throw(DimensionMismatch("second dimension of left hand side A, $mA, and first dimension of right hand side B, $n, must be equal"))
     end
-    @inbounds for j in n:-1:1
-        z = b[j]
-        for i in n:-1:j+1
-            z -= A.data[i,j] * x[i]
-        end
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        x[j] = A.data[j,j] \ z
+    for b in eachcol(B)
+        ldiv!(A, b)
     end
-    x
+    B
 end
-ldiv!(transA::Transpose{<:Any,<:LowerTriangular}, b::AbstractVector) = ldiv!(transA, b, b)
 
-function ldiv!(transA::Transpose{<:Any,<:UnitLowerTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(transA, b, x)
-    A = transA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in n:-1:1
-        z = b[j]
-        for i in n:-1:j+1
-            z -= A.data[i,j] * x[i]
-        end
-        x[j] = z
-    end
-    x
-end
-ldiv!(transA::Transpose{<:Any,<:UnitLowerTriangular}, b::AbstractVector) = ldiv!(transA, b, b)
-
-function ldiv!(transA::Transpose{<:Any,<:UpperTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(transA, b, x)
-    A = transA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in 1:n
-        z = b[j]
-        for i in 1:j-1
-            z -= A.data[i,j] * x[i]
-        end
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        x[j] = A.data[j,j] \ z
-    end
-    x
-end
-ldiv!(transA::Transpose{<:Any,<:UpperTriangular}, b::AbstractVector) = ldiv!(transA, b, b)
-
-function ldiv!(transA::Transpose{<:Any,<:UnitUpperTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(transA, b, x)
-    A = transA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in 1:n
-        z = b[j]
-        for i in 1:j-1
-            z -= A.data[i,j] * x[i]
-        end
-        x[j] = z
-    end
-    x
-end
-ldiv!(transA::Transpose{<:Any,<:UnitUpperTriangular}, b::AbstractVector) = ldiv!(transA, b, b)
-
-function ldiv!(adjA::Adjoint{<:Any,<:LowerTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(adjA, b, x)
-    A = adjA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in n:-1:1
-        z = b[j]
-        for i in n:-1:j+1
-            z -= A.data[i,j]' * x[i]
+# in the following transpose and conjugate transpose naive substitution variants,
+# accumulating in z rather than b[j,k] significantly improves performance as of Dec 2015
+for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
+    @eval begin
+        function ldiv!(xA::UpperTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
+            A = parent(parent(xA))
+            n = size(A, 1)
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+            end
+            @inbounds for j in n:-1:1
+                z = b[j]
+                for i in n:-1:j+1
+                    z -= $tfun(A[i,j]) * b[i]
+                end
+                iszero(A[j,j]) && throw(SingularException(j))
+                b[j] = $tfun(A[j,j]) \ z
+            end
+            return b
         end
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        x[j] = A.data[j,j]' \ z
-    end
-    x
-end
-ldiv!(adjA::Adjoint{<:Any,<:LowerTriangular}, b::AbstractVector) = ldiv!(adjA, b, b)
 
-function ldiv!(adjA::Adjoint{<:Any,<:UnitLowerTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(adjA, b, x)
-    A = adjA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in n:-1:1
-        z = b[j]
-        for i in n:-1:j+1
-            z -= A.data[i,j]' * x[i]
+        function ldiv!(xA::UnitUpperTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
+            A = parent(parent(xA))
+            n = size(A, 1)
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+            end
+            @inbounds for j in n:-1:1
+                z = b[j]
+                for i in n:-1:j+1
+                    z -= $tfun(A[i,j]) * b[i]
+                end
+                b[j] = z
+            end
+            return b
         end
-        x[j] = z
-    end
-    x
-end
-ldiv!(adjA::Adjoint{<:Any,<:UnitLowerTriangular}, b::AbstractVector) = ldiv!(adjA, b, b)
 
-function ldiv!(adjA::Adjoint{<:Any,<:UpperTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(adjA, b, x)
-    A = adjA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in 1:n
-        z = b[j]
-        for i in 1:j-1
-            z -= A.data[i,j]' * x[i]
+        function ldiv!(xA::LowerTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
+            A = parent(parent(xA))
+            n = size(A, 1)
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+            end
+            @inbounds for j in 1:n
+                z = b[j]
+                for i in 1:j-1
+                    z -= $tfun(A[i,j]) * b[i]
+                end
+                iszero(A[j,j]) && throw(SingularException(j))
+                b[j] = $tfun(A[j,j]) \ z
+            end
+            return b
         end
-        iszero(A.data[j,j]) && throw(SingularException(j))
-        x[j] = A.data[j,j]' \ z
-    end
-    x
-end
-ldiv!(adjA::Adjoint{<:Any,<:UpperTriangular}, b::AbstractVector) = ldiv!(adjA, b, b)
 
-function ldiv!(adjA::Adjoint{<:Any,<:UnitUpperTriangular}, b::AbstractVector, x::AbstractVector)
-    require_one_based_indexing(adjA, b, x)
-    A = adjA.parent
-    n = size(A, 1)
-    if !(n == length(b) == length(x))
-        throw(DimensionMismatch("first dimension of left hand side A, $n, length of output x, $(length(x)), and length of right hand side b, $(length(b)), must be equal"))
-    end
-    @inbounds for j in 1:n
-        z = b[j]
-        for i in 1:j-1
-            z -= A.data[i,j]' * x[i]
+        function ldiv!(xA::UnitLowerTriangular{<:Any,<:$t}, b::AbstractVector)
+            require_one_based_indexing(xA, b)
+            A = parent(parent(xA))
+            n = size(A, 1)
+            if !(n == length(b))
+                throw(DimensionMismatch("first dimension of left hand side A, $n, and length of right hand side b, $(length(b)), must be equal"))
+            end
+            @inbounds for j in 1:n
+                z = b[j]
+                for i in 1:j-1
+                    z -= $tfun(A[i,j]) * b[i]
+                end
+                b[j] = z
+            end
+            return b
         end
-        x[j] = z
     end
-    x
 end
-ldiv!(adjA::Adjoint{<:Any,<:UnitUpperTriangular}, b::AbstractVector) = ldiv!(adjA, b, b)
 
 function rdiv!(A::StridedMatrix, B::UpperTriangular)
     m, n = size(A)
@@ -1595,169 +1402,91 @@ function rdiv!(A::StridedMatrix, B::UnitLowerTriangular)
     A
 end
 
-function rdiv!(A::StridedMatrix, adjB::Adjoint{<:Any,<:UpperTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k]*B.data[j,k]'
+for (t, tfun) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
+    @eval begin
+        function rdiv!(A::StridedMatrix, xB::LowerTriangular{<:Any,<:$t})
+            B = parent(parent(xB))
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij/B.data[j,j]'
-        end
-    end
-    A
-end
-function rdiv!(A::StridedMatrix, adjB::Adjoint{<:Any,<:UnitUpperTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k]*B.data[j,k]'
+            for i = 1:m
+                for j = n:-1:1
+                    Aij = A[i,j]
+                    for k = j + 1:n
+                        Aij -= A[i,k]*$tfun(B[j,k])
+                    end
+                    A[i,j] = Aij/$tfun(B[j,j])
+                end
             end
-            A[i,j] = Aij
+            A
         end
-    end
-    A
-end
-
-function rdiv!(A::StridedMatrix, adjB::Adjoint{<:Any,<:LowerTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k]*B.data[j,k]'
+        function rdiv!(A::StridedMatrix, xB::UnitLowerTriangular{<:Any,<:$t})
+            B = parent(parent(xB))
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij/B.data[j,j]'
-        end
-    end
-    A
-end
-function rdiv!(A::StridedMatrix, adjB::Adjoint{<:Any,<:UnitLowerTriangular})
-    B = adjB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k]*B.data[j,k]'
+            for i = 1:m
+                for j = n:-1:1
+                    Aij = A[i,j]
+                    for k = j + 1:n
+                        Aij -= A[i,k]*$tfun(B[j,k])
+                    end
+                    A[i,j] = Aij
+                end
             end
-            A[i,j] = Aij
+            A
         end
-    end
-    A
-end
 
-function rdiv!(A::StridedMatrix, transB::Transpose{<:Any,<:UpperTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k] * transpose(B.data[j,k])
+        function rdiv!(A::StridedMatrix, xB::UpperTriangular{<:Any,<:$t})
+            B = parent(parent(xB))
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij / transpose(B.data[j,j])
-        end
-    end
-    A
-end
-function rdiv!(A::StridedMatrix, transB::Transpose{<:Any,<:UnitUpperTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = n:-1:1
-            Aij = A[i,j]
-            for k = j + 1:n
-                Aij -= A[i,k] * transpose(B.data[j,k])
+            for i = 1:m
+                for j = 1:n
+                    Aij = A[i,j]
+                    for k = 1:j - 1
+                        Aij -= A[i,k]*$tfun(B[j,k])
+                    end
+                    A[i,j] = Aij/$tfun(B[j,j])
+                end
             end
-            A[i,j] = Aij
+            A
         end
-    end
-    A
-end
-
-function rdiv!(A::StridedMatrix, transB::Transpose{<:Any,<:LowerTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k] * transpose(B.data[j,k])
+        function rdiv!(A::StridedMatrix, xB::UnitUpperTriangular{<:Any,<:$t})
+            B = parent(parent(xB))
+            m, n = size(A)
+            if size(B, 1) != n
+                throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
             end
-            A[i,j] = Aij / transpose(B.data[j,j])
-        end
-    end
-    A
-end
-function rdiv!(A::StridedMatrix, transB::Transpose{<:Any,<:UnitLowerTriangular})
-    B = transB.parent
-    m, n = size(A)
-    if size(B, 1) != n
-        throw(DimensionMismatch("right hand side B needs first dimension of size $n, has size $(size(B,1))"))
-    end
-    for i = 1:m
-        for j = 1:n
-            Aij = A[i,j]
-            for k = 1:j - 1
-                Aij -= A[i,k] * transpose(B.data[j,k])
+            for i = 1:m
+                for j = 1:n
+                    Aij = A[i,j]
+                    for k = 1:j - 1
+                        Aij -= A[i,k]*$tfun(B[j,k])
+                    end
+                    A[i,j] = Aij
+                end
             end
-            A[i,j] = Aij
+            A
         end
     end
-    A
 end
 
-function lmul!(adjA::Adjoint{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}}, B::UpperTriangular)
-    return UpperTriangular(lmul!(adjA, triu!(B.data)))
-end
-function lmul!(adjA::Adjoint{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}}, B::LowerTriangular)
-    return LowerTriangular(lmul!(adjA, tril!(B.data)))
+function lmul!(A::Union{UpperTriangular,UnitUpperTriangular}, B::UpperTriangular)
+    UpperTriangular(lmul!(A, triu!(B.data)))
 end
-function lmul!(transA::Transpose{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}}, B::UpperTriangular)
-    return UpperTriangular(lmul!(transA, triu!(B.data)))
+function lmul!(A::Union{LowerTriangular,UnitLowerTriangular}, B::LowerTriangular)
+    return LowerTriangular(lmul!(A, tril!(B.data)))
 end
-function lmul!(transA::Transpose{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}}, B::LowerTriangular)
-    return LowerTriangular(lmul!(transA, tril!(B.data)))
+function ldiv!(xA::Union{UpperTriangular,UnitUpperTriangular}, B::UpperTriangular)
+    return UpperTriangular(ldiv!(xA, triu!(B.data)))
 end
-function ldiv!(adjA::Adjoint{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}}, B::UpperTriangular)
-    return UpperTriangular(ldiv!(adjA, triu!(B.data)))
-end
-function ldiv!(adjA::Adjoint{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}}, B::LowerTriangular)
-    return LowerTriangular(ldiv!(adjA, tril!(B.data)))
-end
-function ldiv!(transA::Transpose{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}}, B::UpperTriangular)
-    return UpperTriangular(ldiv!(transA, triu!(B.data)))
-end
-function ldiv!(transA::Transpose{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}}, B::LowerTriangular)
-    return LowerTriangular(ldiv!(transA, tril!(B.data)))
+function ldiv!(xA::Union{LowerTriangular,UnitLowerTriangular}, B::LowerTriangular)
+    return LowerTriangular(ldiv!(xA, tril!(B.data)))
 end
 
 function rdiv!(A::UpperTriangular, B::Union{UpperTriangular,UnitUpperTriangular})
@@ -1766,30 +1495,11 @@ end
 function rdiv!(A::LowerTriangular, B::Union{LowerTriangular,UnitLowerTriangular})
     return LowerTriangular(rdiv!(tril!(A.data), B))
 end
-
-function rmul!(A::UpperTriangular, adjB::Adjoint{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}})
-    return UpperTriangular(rmul!(triu!(A.data), adjB))
-end
-function rmul!(A::LowerTriangular, adjB::Adjoint{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}})
-    return LowerTriangular(rmul!(tril!(A.data), adjB))
-end
-function rmul!(A::UpperTriangular, transB::Transpose{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}})
-    return UpperTriangular(rmul!(triu!(A.data), transB))
+function rmul!(A::UpperTriangular, B::Union{UpperTriangular,UnitUpperTriangular})
+    return UpperTriangular(rmul!(triu!(A.data), B))
 end
-function rmul!(A::LowerTriangular, transB::Transpose{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}})
-    return LowerTriangular(rmul!(tril!(A.data), transB))
-end
-function rdiv!(A::UpperTriangular, adjB::Adjoint{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}})
-    return UpperTriangular(rdiv!(triu!(A.data), adjB))
-end
-function rdiv!(A::LowerTriangular, adjB::Adjoint{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}})
-    return LowerTriangular(rdiv!(tril!(A.data), adjB))
-end
-function rdiv!(A::UpperTriangular, transB::Transpose{<:Any,<:Union{LowerTriangular,UnitLowerTriangular}})
-    return UpperTriangular(rdiv!(triu!(A.data), transB))
-end
-function rdiv!(A::LowerTriangular, transB::Transpose{<:Any,<:Union{UpperTriangular,UnitUpperTriangular}})
-    return LowerTriangular(rdiv!(tril!(A.data), transB))
+function rmul!(A::LowerTriangular, B::Union{LowerTriangular,UnitLowerTriangular})
+    return LowerTriangular(rmul!(tril!(A.data), B))
 end
 
 # Promotion
@@ -1807,322 +1517,153 @@ for (f, f2!) in ((:*, :lmul!), (:\, :ldiv!))
         function ($f)(A::LowerTriangular, B::LowerTriangular)
             TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
                          ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+            BB = copy_similar(B, TAB)
             return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
         function $(f)(A::UnitLowerTriangular, B::LowerTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
+             BB = copy_similar(B, TAB)
             return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
-        function ($f)(A::UpperTriangular, B::UpperTriangular)
+        function $(f)(A::LowerTriangular, B::UnitLowerTriangular)
             TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
                          ($f)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
+             BB = copy_similar(B, TAB)
+            return LowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
-        function ($f)(A::UnitUpperTriangular, B::UpperTriangular)
+        function $(f)(A::UnitLowerTriangular, B::UnitLowerTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
+             BB = copy_similar(B, TAB)
+            return UnitLowerTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
-    end
-end
 
-for (ipop, op, xformtype, xformop) in (
-        (:lmul!, :*, :Adjoint, :adjoint),
-        (:lmul!, :*, :Transpose, :transpose),
-        (:ldiv!, :\, :Adjoint, :adjoint),
-        (:ldiv!, :\, :Transpose, :transpose))
-    @eval begin
-        function ($op)(xformA::($xformtype){<:Any,<:UpperTriangular}, B::LowerTriangular)
-            A = xformA.parent
-            TAB = typeof(($op)($xformop(zero(eltype(A))), zero(eltype(B))) +
-                         ($op)($xformop(zero(eltype(A))), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            return LowerTriangular(($ipop)($xformop(convert(AbstractMatrix{TAB}, A)), BB))
+        function ($f)(A::UpperTriangular, B::UpperTriangular)
+            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
+                         ($f)(zero(eltype(A)), zero(eltype(B))))
+            BB = copy_similar(B, TAB)
+            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
-        function ($op)(xformA::($xformtype){<:Any,<:UnitUpperTriangular}, B::LowerTriangular)
-            A = xformA.parent
+        function ($f)(A::UnitUpperTriangular, B::UpperTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            return LowerTriangular($ipop($xformop(convert(AbstractMatrix{TAB}, A)), BB))
+            BB = copy_similar(B, TAB)
+            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
-        function ($op)(xformA::($xformtype){<:Any,<:LowerTriangular}, B::UpperTriangular)
-            A = xformA.parent
-            TAB = typeof(($op)($xformop(zero(eltype(A))), zero(eltype(B))) +
-                         ($op)($xformop(zero(eltype(A))), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            return UpperTriangular($ipop($xformop(convert(AbstractMatrix{TAB}, A)), BB))
+        function ($f)(A::UpperTriangular, B::UnitUpperTriangular)
+            TAB = typeof(($f)(zero(eltype(A)), zero(eltype(B))) +
+                         ($f)(zero(eltype(A)), zero(eltype(B))))
+            BB = copy_similar(B, TAB)
+            return UpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
 
-        function ($op)(xformA::($xformtype){<:Any,<:UnitLowerTriangular}, B::UpperTriangular)
-            A = xformA.parent
+        function ($f)(A::UnitUpperTriangular, B::UnitUpperTriangular)
             TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                          (*)(zero(eltype(A)), zero(eltype(B))))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            return UpperTriangular($ipop($xformop(convert(AbstractMatrix{TAB}, A)), BB))
+            BB = copy_similar(B, TAB)
+            return UnitUpperTriangular($f2!(convert(AbstractMatrix{TAB}, A), BB))
         end
     end
 end
 
 function (/)(A::LowerTriangular, B::LowerTriangular)
-    TAB = typeof((/)(zero(eltype(A)), zero(eltype(B))) +
-                 (/)(zero(eltype(A)), zero(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
+                 (/)(zero(eltype(A)), one(eltype(B))))
+    AA = copy_similar(A, TAB)
+    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
+end
+function (/)(A::UnitLowerTriangular, B::LowerTriangular)
+    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
+                 (/)(zero(eltype(A)), one(eltype(B))))
+    AA = copy_similar(A, TAB)
     return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::LowerTriangular, B::UnitLowerTriangular)
+    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
+                 (/)(zero(eltype(A)), one(eltype(B))))
+    AA = copy_similar(A, TAB)
+    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
+end
+function (/)(A::UnitLowerTriangular, B::UnitLowerTriangular)
     TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
                  (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
-    return LowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
+    AA = copy_similar(A, TAB)
+    return UnitLowerTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UpperTriangular, B::UpperTriangular)
-    TAB = typeof((/)(zero(eltype(A)), zero(eltype(B))) +
-                 (/)(zero(eltype(A)), zero(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
+                 (/)(zero(eltype(A)), one(eltype(B))))
+    AA = copy_similar(A, TAB)
+    return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
+end
+function (/)(A::UnitUpperTriangular, B::UpperTriangular)
+    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
+                 (/)(zero(eltype(A)), one(eltype(B))))
+    AA = copy_similar(A, TAB)
     return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 function (/)(A::UpperTriangular, B::UnitUpperTriangular)
-    TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                 (*)(zero(eltype(A)), zero(eltype(B))))
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    TAB = typeof((/)(zero(eltype(A)), one(eltype(B))) +
+                 (/)(zero(eltype(A)), one(eltype(B))))
+    AA = copy_similar(A, TAB)
     return UpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
-
-for (ipop, op, xformtype, xformop) in (
-        (:rmul!, :*, :Adjoint, :adjoint),
-        (:rmul!, :*, :Transpose, :transpose),
-        (:rdiv!, :/, :Adjoint, :adjoint),
-        (:rdiv!, :/, :Transpose, :transpose))
-    @eval begin
-        function ($op)(A::LowerTriangular, xformB::($xformtype){<:Any,<:UpperTriangular})
-            B = xformB.parent
-            TAB = typeof(($op)(zero(eltype(A)), $xformop(zero(eltype(B)))) +
-                         ($op)(zero(eltype(A)), $xformop(zero(eltype(B)))))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            return LowerTriangular($ipop(AA, $xformop(convert(AbstractMatrix{TAB}, B))))
-        end
-
-        function ($op)(A::LowerTriangular, xformB::($xformtype){<:Any,<:UnitUpperTriangular})
-            B = xformB.parent
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            return LowerTriangular($ipop(AA, $xformop(convert(AbstractMatrix{TAB}, B))))
-        end
-
-        function ($op)(A::UpperTriangular, xformB::($xformtype){<:Any,<:LowerTriangular})
-            B = xformB.parent
-            TAB = typeof(($op)(zero(eltype(A)), $xformop(zero(eltype(B)))) +
-                         ($op)(zero(eltype(A)), $xformop(zero(eltype(B)))))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            return UpperTriangular($ipop(AA, $xformop(convert(AbstractMatrix{TAB}, B))))
-        end
-
-        function ($op)(A::UpperTriangular, xformB::($xformtype){<:Any,<:UnitLowerTriangular})
-            B = xformB.parent
-            TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
-                         (*)(zero(eltype(A)), zero(eltype(B))))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            return UpperTriangular($ipop(AA, $xformop(convert(AbstractMatrix{TAB}, B))))
-        end
-    end
+function (/)(A::UnitUpperTriangular, B::UnitUpperTriangular)
+    TAB = typeof((*)(zero(eltype(A)), zero(eltype(B))) +
+                 (*)(zero(eltype(A)), zero(eltype(B))))
+    AA = copy_similar(A, TAB)
+    return UnitUpperTriangular(rdiv!(AA, convert(AbstractMatrix{TAB}, B)))
 end
 
 _inner_type_promotion(A,B) = promote_type(eltype(A), eltype(B), typeof(zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B))))
 ## The general promotion methods
 function *(A::AbstractTriangular, B::AbstractTriangular)
     TAB = _inner_type_promotion(A,B)
-    BB = similar(B, TAB, size(B))
-    copyto!(BB, B)
+    BB = copy_similar(B, TAB)
     lmul!(convert(AbstractArray{TAB}, A), BB)
 end
-function *(adjA::Adjoint{<:Any,<:AbstractTriangular}, B::AbstractTriangular)
-    A = adjA.parent
-    TAB = _inner_type_promotion(A,B)
-    BB = similar(B, TAB, size(B))
-    copyto!(BB, B)
-    lmul!(adjoint(convert(AbstractArray{TAB}, A)), BB)
-end
-function *(transA::Transpose{<:Any,<:AbstractTriangular}, B::AbstractTriangular)
-    A = transA.parent
-    TAB = _inner_type_promotion(A,B)
-    BB = similar(B, TAB, size(B))
-    copyto!(BB, B)
-    lmul!(transpose(convert(AbstractArray{TAB}, A)), BB)
-end
-
-function *(A::AbstractTriangular, adjB::Adjoint{<:Any,<:AbstractTriangular})
-    B = adjB.parent
-    TAB = _inner_type_promotion(A,B)
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
-    rmul!(AA, adjoint(convert(AbstractArray{TAB}, B)))
-end
-function *(A::AbstractTriangular, transB::Transpose{<:Any,<:AbstractTriangular})
-    B = transB.parent
-    TAB = _inner_type_promotion(A,B)
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
-    rmul!(AA, transpose(convert(AbstractArray{TAB}, B)))
-end
 
 for mat in (:AbstractVector, :AbstractMatrix)
     ### Multiplication with triangle to the left and hence rhs cannot be transposed.
-    @eval begin
-        function *(A::AbstractTriangular, B::$mat)
-            require_one_based_indexing(B)
-            TAB = _inner_type_promotion(A,B)
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            lmul!(convert(AbstractArray{TAB}, A), BB)
-        end
-        function *(adjA::Adjoint{<:Any,<:AbstractTriangular}, B::$mat)
-            require_one_based_indexing(B)
-            A = adjA.parent
-            TAB = _inner_type_promotion(A,B)
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            lmul!(adjoint(convert(AbstractArray{TAB}, A)), BB)
-        end
-        function *(transA::Transpose{<:Any,<:AbstractTriangular}, B::$mat)
-            require_one_based_indexing(B)
-            A = transA.parent
-            TAB = _inner_type_promotion(A,B)
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            lmul!(transpose(convert(AbstractArray{TAB}, A)), BB)
-        end
+    @eval function *(A::AbstractTriangular, B::$mat)
+        require_one_based_indexing(B)
+        TAB = _inner_type_promotion(A,B)
+        BB = copy_similar(B, TAB)
+        lmul!(convert(AbstractArray{TAB}, A), BB)
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. No quotients.
-    @eval begin
-        function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
-            require_one_based_indexing(B)
-            TAB = _inner_type_promotion(A,B)
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            ldiv!(convert(AbstractArray{TAB}, A), BB)
-        end
-        function \(adjA::Adjoint{<:Any,<:Union{UnitUpperTriangular,UnitLowerTriangular}}, B::$mat)
-            require_one_based_indexing(B)
-            A = adjA.parent
-            TAB = _inner_type_promotion(A,B)
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            ldiv!(adjoint(convert(AbstractArray{TAB}, A)), BB)
-        end
-        function \(transA::Transpose{<:Any,<:Union{UnitUpperTriangular,UnitLowerTriangular}}, B::$mat)
-            require_one_based_indexing(B)
-            A = transA.parent
-            TAB = _inner_type_promotion(A,B)
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            ldiv!(transpose(convert(AbstractArray{TAB}, A)), BB)
-        end
+    @eval function \(A::Union{UnitUpperTriangular,UnitLowerTriangular}, B::$mat)
+        require_one_based_indexing(B)
+        TAB = _inner_type_promotion(A,B)
+        BB = copy_similar(B, TAB)
+        ldiv!(convert(AbstractArray{TAB}, A), BB)
     end
     ### Left division with triangle to the left hence rhs cannot be transposed. Quotients.
-    @eval begin
-        function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
-            require_one_based_indexing(B)
-            TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            ldiv!(convert(AbstractArray{TAB}, A), BB)
-        end
-        function \(adjA::Adjoint{<:Any,<:Union{UpperTriangular,LowerTriangular}}, B::$mat)
-            require_one_based_indexing(B)
-            A = adjA.parent
-            TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            ldiv!(adjoint(convert(AbstractArray{TAB}, A)), BB)
-        end
-        function \(transA::Transpose{<:Any,<:Union{UpperTriangular,LowerTriangular}}, B::$mat)
-            require_one_based_indexing(B)
-            A = transA.parent
-            TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-            BB = similar(B, TAB, size(B))
-            copyto!(BB, B)
-            ldiv!(transpose(convert(AbstractArray{TAB}, A)), BB)
-        end
+    @eval function \(A::Union{UpperTriangular,LowerTriangular}, B::$mat)
+        require_one_based_indexing(B)
+        TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
+        BB = copy_similar(B, TAB)
+        ldiv!(convert(AbstractArray{TAB}, A), BB)
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. No quotients.
-    @eval begin
-        function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
-            require_one_based_indexing(A)
-            TAB = _inner_type_promotion(A,B)
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            rdiv!(AA, convert(AbstractArray{TAB}, B))
-        end
-        function /(A::$mat, adjB::Adjoint{<:Any,<:Union{UnitUpperTriangular, UnitLowerTriangular}})
-            require_one_based_indexing(A)
-            B = adjB.parent
-            TAB = _inner_type_promotion(A,B)
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            rdiv!(AA, adjoint(convert(AbstractArray{TAB}, B)))
-        end
-        function /(A::$mat, transB::Transpose{<:Any,<:Union{UnitUpperTriangular, UnitLowerTriangular}})
-            require_one_based_indexing(A)
-            B = transB.parent
-            TAB = _inner_type_promotion(A,B)
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            rdiv!(AA, transpose(convert(AbstractArray{TAB}, B)))
-        end
+    @eval function /(A::$mat, B::Union{UnitUpperTriangular, UnitLowerTriangular})
+        require_one_based_indexing(A)
+        TAB = _inner_type_promotion(A,B)
+        AA = copy_similar(A, TAB)
+        rdiv!(AA, convert(AbstractArray{TAB}, B))
     end
     ### Right division with triangle to the right hence lhs cannot be transposed. Quotients.
-    @eval begin
-        function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
-            require_one_based_indexing(A)
-            TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            rdiv!(AA, convert(AbstractArray{TAB}, B))
-        end
-        function /(A::$mat, adjB::Adjoint{<:Any,<:Union{UpperTriangular,LowerTriangular}})
-            require_one_based_indexing(A)
-            B = adjB.parent
-            TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            rdiv!(AA, adjoint(convert(AbstractArray{TAB}, B)))
-        end
-        function /(A::$mat, transB::Transpose{<:Any,<:Union{UpperTriangular,LowerTriangular}})
-            require_one_based_indexing(A)
-            B = transB.parent
-            TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
-            AA = similar(A, TAB, size(A))
-            copyto!(AA, A)
-            rdiv!(AA, transpose(convert(AbstractArray{TAB}, B)))
-        end
+    @eval function /(A::$mat, B::Union{UpperTriangular,LowerTriangular})
+        require_one_based_indexing(A)
+        TAB = typeof((zero(eltype(A))*zero(eltype(B)) + zero(eltype(A))*zero(eltype(B)))/one(eltype(A)))
+        AA = copy_similar(A, TAB)
+        rdiv!(AA, convert(AbstractArray{TAB}, B))
     end
 end
 ### Multiplication with triangle to the right and hence lhs cannot be transposed.
@@ -2130,32 +1671,12 @@ end
 function *(A::AbstractMatrix, B::AbstractTriangular)
     require_one_based_indexing(A)
     TAB = _inner_type_promotion(A,B)
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
+    AA = copy_similar(A, TAB)
     rmul!(AA, convert(AbstractArray{TAB}, B))
 end
-function *(A::AbstractMatrix, adjB::Adjoint{<:Any,<:AbstractTriangular})
-    require_one_based_indexing(A)
-    B = adjB.parent
-    TAB = _inner_type_promotion(A,B)
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
-    rmul!(AA, adjoint(convert(AbstractArray{TAB}, B)))
-end
-function *(A::AbstractMatrix, transB::Transpose{<:Any,<:AbstractTriangular})
-    require_one_based_indexing(A)
-    B = transB.parent
-    TAB = _inner_type_promotion(A,B)
-    AA = similar(A, TAB, size(A))
-    copyto!(AA, A)
-    rmul!(AA, transpose(convert(AbstractArray{TAB}, B)))
-end
 # ambiguity resolution with definitions in linalg/rowvector.jl
 *(v::AdjointAbsVec, A::AbstractTriangular) = adjoint(adjoint(A) * v.parent)
 *(v::TransposeAbsVec, A::AbstractTriangular) = transpose(transpose(A) * v.parent)
-*(v::AdjointAbsVec, A::Adjoint{<:Any,<:AbstractTriangular}) = adjoint(A.parent * v.parent)
-*(v::TransposeAbsVec, A::Transpose{<:Any,<:AbstractTriangular}) = transpose(A.parent * v.parent)
-
 
 # If these are not defined, they will fallback to the versions in matmul.jl
 # and dispatch to generic_matmatmul! which is very costly to compile. The methods
@@ -2166,12 +1687,6 @@ end
 *(A::Transpose{<:Any,<:AbstractMatrix}, B::AbstractTriangular) = copy(A) * B
 *(A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractMatrix}) = A * copy(B)
 *(A::AbstractTriangular, B::Transpose{<:Any,<:AbstractMatrix}) = A * copy(B)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractTriangular}) = A * copy(B)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractMatrix}) = A * copy(B)
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractTriangular}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractTriangular}) = A * copy(B)
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractMatrix}) = A * copy(B)
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Transpose{<:Any,<:AbstractTriangular}) = copy(A) * B
 
 # Complex matrix power for upper triangular factor, see:
 #   Higham and Lin, "A Schur-Padé algorithm for fractional powers of a Matrix",
@@ -2243,7 +1758,7 @@ end
 powm(A::LowerTriangular, p::Real) = copy(transpose(powm!(copy(transpose(A)), p::Real)))
 
 # Complex matrix logarithm for the upper triangular factor, see:
-#   Al-Mohy and Higham, "Improved inverse  scaling and squaring algorithms for
+#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
 #     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
 #   Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
 #     logarithm and estimating the condition number", SIAM J. Sci. Comput.,
@@ -2252,7 +1767,90 @@ powm(A::LowerTriangular, p::Real) = copy(transpose(powm!(copy(transpose(A)), p::
 # Based on the code available at http://eprints.ma.man.ac.uk/1851/02/logm.zip,
 # Copyright (c) 2011, Awad H. Al-Mohy and Nicholas J. Higham
 # Julia version relicensed with permission from original authors
-function log(A0::UpperTriangular{T}) where T<:BlasFloat
+log(A::UpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
+log(A::UnitUpperTriangular{T}) where {T<:BlasFloat} = log_quasitriu(A)
+log(A::LowerTriangular) = copy(transpose(log(copy(transpose(A)))))
+log(A::UnitLowerTriangular) = copy(transpose(log(copy(transpose(A)))))
+
+function log_quasitriu(A0::AbstractMatrix{T}) where T<:BlasFloat
+    # allocate real A if log(A) will be real and complex A otherwise
+    n = checksquare(A0)
+    if isreal(A0) && (!istriu(A0) || !any(x -> real(x) < zero(real(T)), diag(A0)))
+        A = T <: Complex ? real(A0) : copy(A0)
+    else
+        A = T <: Complex ? copy(A0) : complex(A0)
+    end
+    if A0 isa UnitUpperTriangular
+        A = UpperTriangular(parent(A))
+        @inbounds for i in 1:n
+            A[i,i] = 1
+        end
+    end
+    Y0 = _log_quasitriu!(A0, A)
+    # return complex result for complex input
+    Y = T <: Complex ? complex(Y0) : Y0
+
+    if A0 isa UpperTriangular || A0 isa UnitUpperTriangular
+        return UpperTriangular(Y)
+    else
+        return Y
+    end
+end
+# type-stable implementation of log_quasitriu
+# A is a copy of A0 that is overwritten while computing the result. It has the same eltype
+# as the result.
+function _log_quasitriu!(A0, A)
+    # Find Padé degree m and s while replacing A with A^(1/2^s)
+    m, s = _find_params_log_quasitriu!(A)
+
+    # Compute accurate superdiagonal of A
+    _pow_superdiag_quasitriu!(A, A0, 0.5^s)
+
+    # Compute accurate block diagonal of A
+    _sqrt_pow_diag_quasitriu!(A, A0, s)
+
+    # Get the Gauss-Legendre quadrature points and weights
+    R = zeros(Float64, m, m)
+    for i = 1:m - 1
+        R[i,i+1] = i / sqrt((2 * i)^2 - 1)
+        R[i+1,i] = R[i,i+1]
+    end
+    x,V = eigen(R)
+    w = Vector{Float64}(undef, m)
+    for i = 1:m
+        x[i] = (x[i] + 1) / 2
+        w[i] = V[1,i]^2
+    end
+
+    # Compute the Padé approximation
+    t = eltype(A)
+    n = size(A, 1)
+    Y = zeros(t, n, n)
+    B = similar(A)
+    for k = 1:m
+        B .= t(x[k]) .* A
+        @inbounds for i in 1:n
+            B[i,i] += 1
+        end
+        Y .+= t(w[k]) .* rdiv_quasitriu!(A, B)
+    end
+
+    # Scale back
+    lmul!(2.0^s, Y)
+
+    # Compute accurate diagonal and superdiagonal of log(A)
+    _log_diag_quasitriu!(Y, A0)
+
+    return Y
+end
+
+# Auxiliary functions for matrix logarithm and matrix power
+
+# Find Padé degree m and s while replacing A with A^(1/2^s)
+#   Al-Mohy and Higham, "Improved inverse scaling and squaring algorithms for
+#     the matrix logarithm", SIAM J. Sci. Comput., 34(4), (2012), pp. C153–C169.
+#   from Algorithm 4.1
+function _find_params_log_quasitriu!(A)
     maxsqrt = 100
     theta = [1.586970738772063e-005,
          2.313807884242979e-003,
@@ -2262,13 +1860,13 @@ function log(A0::UpperTriangular{T}) where T<:BlasFloat
          2.060962623452836e-001,
          2.879093714241194e-001]
     tmax = size(theta, 1)
-    n = size(A0, 1)
-    A = copy(A0)
+    n = size(A, 1)
     p = 0
     m = 0
 
-    # Compute repeated roots
-    d = complex(diag(A))
+    # Find s0, the smallest s such that the ρ(triu(A)^(1/2^s) - I) ≤ theta[tmax], where ρ(X)
+    # is the spectral radius of X
+    d = complex.(@view(A[diagind(A)]))
     dm1 = d .- 1
     s = 0
     while norm(dm1, Inf) > theta[tmax] && s < maxsqrt
@@ -2277,13 +1875,18 @@ function log(A0::UpperTriangular{T}) where T<:BlasFloat
         s = s + 1
     end
     s0 = s
+
+    # Compute repeated roots
     for k = 1:min(s, maxsqrt)
-        A = sqrt(A)
+        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
     end
 
-    AmI = A - I
-    d2 = sqrt(opnorm(AmI^2, 1))
-    d3 = cbrt(opnorm(AmI^3, 1))
+    # these three never needed at the same time, so reuse the same temporary
+    AmI = AmI4 = AmI5 = A - I
+    AmI2 = AmI * AmI
+    AmI3 = AmI2 * AmI
+    d2 = sqrt(opnorm(AmI2, 1))
+    d3 = cbrt(opnorm(AmI3, 1))
     alpha2 = max(d2, d3)
     foundm = false
     if alpha2 <= theta[2]
@@ -2292,11 +1895,9 @@ function log(A0::UpperTriangular{T}) where T<:BlasFloat
     end
 
     while !foundm
-        more = false
-        if s > s0
-            d3 = cbrt(opnorm(AmI^3, 1))
-        end
-        d4 = opnorm(AmI^4, 1)^(1/4)
+        more_sqrt = false
+        mul!(AmI4, AmI2, AmI2)
+        d4 = opnorm(AmI4, 1)^(1/4)
         alpha3 = max(d3, d4)
         if alpha3 <= theta[tmax]
             local j
@@ -2309,13 +1910,14 @@ function log(A0::UpperTriangular{T}) where T<:BlasFloat
                 m = j
                 break
             elseif alpha3 / 2 <= theta[5] && p < 2
-                more = true
+                more_sqrt = true
                 p = p + 1
            end
         end
 
-        if !more
-            d5 = opnorm(AmI^5, 1)^(1/5)
+        if !more_sqrt
+            mul!(AmI5, AmI3, AmI2)
+            d5 = opnorm(AmI5, 1)^(1/5)
             alpha4 = max(d4, d5)
             eta = min(alpha3, alpha4)
             if eta <= theta[tmax]
@@ -2334,112 +1936,218 @@ function log(A0::UpperTriangular{T}) where T<:BlasFloat
             m = tmax
             break
         end
-        A = sqrt(A)
-        AmI = A - I
+        _sqrt_quasitriu!(A isa UpperTriangular ? parent(A) : A, A)
+        copyto!(AmI, A)
+        for i in 1:n
+            @inbounds AmI[i,i] -= 1
+        end
+        mul!(AmI2, AmI, AmI)
+        mul!(AmI3, AmI2, AmI)
+        d3 = cbrt(opnorm(AmI3, 1))
         s = s + 1
     end
+    return m, s
+end
 
-    # Compute accurate superdiagonal of T
-    blockpower!(A, A0, 0.5^s)
-
-    # Compute accurate diagonal of T
-    for i = 1:n
-        a = A0[i,i]
-        if s == 0
-            A[i,i] = a - 1
-            continue
-        end
-        s0 = s
-        if angle(a) >= pi / 2
-            a = sqrt(a)
-            s0 = s - 1
-        end
-        z0 = a - 1
-        a = sqrt(a)
-        r = 1 + a
-        for j = 1:s0-1
-            a = sqrt(a)
-            r = r * (1 + a)
+# Compute accurate diagonal of A = A0^s - I
+function sqrt_diag!(A0::UpperTriangular, A::UpperTriangular, s)
+    n = checksquare(A0)
+    T = eltype(A)
+    @inbounds for i = 1:n
+        a = complex(A0[i,i])
+        A[i,i] = _sqrt_pow(a, s)
+    end
+end
+# Compute accurate block diagonal of A = A0^s - I for upper quasi-triangular A0 produced
+# by the Schur decomposition. Diagonal is made of 1x1 and 2x2 blocks.
+# 2x2 blocks are real with non-negative conjugate pair eigenvalues
+function _sqrt_pow_diag_quasitriu!(A, A0, s)
+    n = checksquare(A0)
+    t = typeof(sqrt(zero(eltype(A))))
+    i = 1
+    @inbounds while i < n
+        if iszero(A0[i+1,i])  # 1x1 block
+            A[i,i] = _sqrt_pow(t(A0[i,i]), s)
+            i += 1
+        else  # real 2x2 block
+            @views _sqrt_pow_diag_block_2x2!(A[i:i+1,i:i+1], A0[i:i+1,i:i+1], s)
+            i += 2
         end
-        A[i,i] = z0 / r
     end
-
-    # Get the Gauss-Legendre quadrature points and weights
-    R = zeros(Float64, m, m)
-    for i = 1:m - 1
-        R[i,i+1] = i / sqrt((2 * i)^2 - 1)
-        R[i+1,i] = R[i,i+1]
+    if i == n  # last block is 1x1
+        @inbounds A[n,n] = _sqrt_pow(t(A0[n,n]), s)
     end
-    x,V = eigen(R)
-    w = Vector{Float64}(undef, m)
-    for i = 1:m
-        x[i] = (x[i] + 1) / 2
-        w[i] = V[1,i]^2
+    return A
+end
+# compute a^(1/2^s)-1
+#   Al-Mohy, "A more accurate Briggs method for the logarithm",
+#      Numer. Algorithms, 59, (2012), 393–402.
+#   Algorithm 2
+function _sqrt_pow(a::Number, s)
+    T = typeof(sqrt(zero(a)))
+    s == 0 && return T(a) - 1
+    s0 = s
+    if imag(a) >= 0 && real(a) <= 0 && !iszero(a)  # angle(a) ≥ π / 2
+        a = sqrt(a)
+        s0 = s - 1
     end
-
-    # Compute the Padé approximation
-    Y = zeros(T, n, n)
-    for k = 1:m
-        Y = Y + w[k] * (A / (x[k] * A + I))
+    z0 = a - 1
+    a = sqrt(a)
+    r = 1 + a
+    for j = 1:s0-1
+        a = sqrt(a)
+        r = r * (1 + a)
+    end
+    return z0 / r
+end
+# compute A0 = A^(1/2^s)-I for 2x2 real matrices A and A0
+# A has non-negative conjugate pair eigenvalues
+# "Improved Inverse Scaling and Squaring Algorithms for the Matrix Logarithm"
+# SIAM J. Sci. Comput., 34(4), (2012) C153–C169. doi: 10.1137/110852553
+# Algorithm 5.1
+Base.@propagate_inbounds function _sqrt_pow_diag_block_2x2!(A, A0, s)
+    _sqrt_real_2x2!(A, A0)
+    if isone(s)
+        A[1,1] -= 1
+        A[2,2] -= 1
+    else
+        # Z = A - I
+        z11, z21, z12, z22 = A[1,1] - 1, A[2,1], A[1,2], A[2,2] - 1
+        # A = sqrt(A)
+        _sqrt_real_2x2!(A, A)
+        # P = A + I
+        p11, p21, p12, p22 = A[1,1] + 1, A[2,1], A[1,2], A[2,2] + 1
+        for i in 1:(s - 2)
+            # A = sqrt(A)
+            _sqrt_real_2x2!(A, A)
+            a11, a21, a12, a22 = A[1,1], A[2,1], A[1,2], A[2,2]
+            # P += P * A
+            r11 = p11*(1 + a11) + p12*a21
+            r22 = p21*a12 + p22*(1 + a22)
+            p21 = p21*(1 + a11) + p22*a21
+            p12 = p11*a12 + p12*(1 + a22)
+            p11 = r11
+            p22 = r22
+        end
+        # A = Z / P
+        c = inv(p11*p22 - p21*p12)
+        A[1,1] = (p22*z11 - p21*z12) * c
+        A[2,1] = (p22*z21 - p21*z22) * c
+        A[1,2] = (p11*z12 - p12*z11) * c
+        A[2,2] = (p11*z22 - p12*z21) * c
     end
+    return A
+end
+# Compute accurate superdiagonal of A = A0^s - I for upper quasi-triangular A0 produced
+# by a Schur decomposition.
+# Higham and Lin, "A Schur–Padé Algorithm for Fractional Powers of a Matrix"
+# SIAM J. Matrix Anal. Appl., 32(3), (2011), 1056–1078.
+# Equation 5.6
+# see also blockpower for when A0 is upper triangular
+function _pow_superdiag_quasitriu!(A, A0, p)
+    n = checksquare(A0)
+    t = eltype(A)
+    k = 1
+    @inbounds while k < n
+        if !iszero(A[k+1,k])
+            k += 2
+            continue
+        end
+        if !(k == n - 1 || iszero(A[k+2,k+1]))
+            k += 3
+            continue
+        end
+        Ak = t(A0[k,k])
+        Akp1 = t(A0[k+1,k+1])
 
-    # Scale back
-    lmul!(2.0^s, Y)
+        Akp = Ak^p
+        Akp1p = Akp1^p
 
-    # Compute accurate diagonal and superdiagonal of log(T)
-    for k = 1:n-1
-        Ak = A0[k,k]
-        Akp1 = A0[k+1,k+1]
-        logAk = log(Ak)
-        logAkp1 = log(Akp1)
-        Y[k,k] = logAk
-        Y[k+1,k+1] = logAkp1
         if Ak == Akp1
-            Y[k,k+1] = A0[k,k+1] / Ak
+            A[k,k+1] = p * A0[k,k+1] * Ak^(p-1)
         elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
-            Y[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
+            A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
         else
+            logAk = log(Ak)
+            logAkp1 = log(Akp1)
             z = (Akp1 - Ak)/(Akp1 + Ak)
             if abs(z) > 1
-                Y[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
+                A[k,k+1] = A0[k,k+1] * (Akp1p - Akp) / (Akp1 - Ak)
             else
                 w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
-                Y[k,k+1] = 2 * A0[k,k+1] * w / (Akp1 - Ak)
+                dd = 2 * exp(p*(logAk+logAkp1)/2) * sinh(p*w) / (Akp1 - Ak);
+                A[k,k+1] = A0[k,k+1] * dd
             end
         end
+        k += 1
     end
-
-    return UpperTriangular(Y)
 end
-log(A::LowerTriangular) = copy(transpose(log(copy(transpose(A)))))
 
-# Auxiliary functions for matrix logarithm and matrix power
-
-# Compute accurate diagonal of A = A0^s - I
-#   Al-Mohy, "A more accurate Briggs method for the logarithm",
-#      Numer. Algorithms, 59, (2012), 393–402.
-function sqrt_diag!(A0::UpperTriangular, A::UpperTriangular, s)
+# Compute accurate block diagonal and superdiagonal of A = log(A0) for upper
+# quasi-triangular A0 produced by the Schur decomposition.
+function _log_diag_quasitriu!(A, A0)
     n = checksquare(A0)
-    @inbounds for i = 1:n
-        a = complex(A0[i,i])
-        if s == 0
-            A[i,i] = a - 1
-        else
-            s0 = s
-            if imag(a) >= 0 && real(a) <= 0 && a != 0
-                a = sqrt(a)
-                s0 = s - 1
-            end
-            z0 = a - 1
-            a = sqrt(a)
-            r = 1 + a
-            for j = 1:s0-1
-                a = sqrt(a)
-                r = r * (1 + a)
+    t = eltype(A)
+    k = 1
+    @inbounds while k < n
+        if iszero(A0[k+1,k])  # 1x1 block
+            Ak = t(A0[k,k])
+            logAk = log(Ak)
+            A[k,k] = logAk
+            if k < n - 2 && iszero(A0[k+2,k+1])
+                Akp1 = t(A0[k+1,k+1])
+                logAkp1 = log(Akp1)
+                A[k+1,k+1] = logAkp1
+                if Ak == Akp1
+                    A[k,k+1] = A0[k,k+1] / Ak
+                elseif 2 * abs(Ak) < abs(Akp1) || 2 * abs(Akp1) < abs(Ak) || iszero(Akp1 + Ak)
+                    A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
+                else
+                    z = (Akp1 - Ak)/(Akp1 + Ak)
+                    if abs(z) > 1
+                        A[k,k+1] = A0[k,k+1] * (logAkp1 - logAk) / (Akp1 - Ak)
+                    else
+                        w = atanh(z) + im * pi * (unw(logAkp1-logAk) - unw(log1p(z)-log1p(-z)))
+                        A[k,k+1] = 2 * A0[k,k+1] * w / (Akp1 - Ak)
+                    end
+                end
+                k += 2
+            else
+                k += 1
             end
-            A[i,i] = z0 / r
+        else  # real 2x2 block
+            @views _log_diag_block_2x2!(A[k:k+1,k:k+1], A0[k:k+1,k:k+1])
+            k += 2
         end
     end
+    if k == n  # last 1x1 block
+        @inbounds A[n,n] = log(t(A0[n,n]))
+    end
+    return A
+end
+# compute A0 = log(A) for 2x2 real matrices A and A0, where A0 is a diagonal 2x2 block
+# produced by real Schur decomposition.
+# Al-Mohy, Higham and Relton, "Computing the Frechet derivative of the matrix
+# logarithm and estimating the condition number", SIAM J. Sci. Comput.,
+# 35(4), (2013), C394–C410.
+# Eq. 6.1
+Base.@propagate_inbounds function _log_diag_block_2x2!(A, A0)
+    a, b, c = A0[1,1], A0[1,2], A0[2,1]
+    # avoid underflow/overflow for large/small b and c
+    s = sqrt(abs(b)) * sqrt(abs(c))
+    θ = atan(s, a)
+    t = θ / s
+    au = abs(a)
+    if au > s
+        a1 = log1p((s / au)^2) / 2 + log(au)
+    else
+        a1 = log1p((au / s)^2) / 2 + log(s)
+    end
+    A[1,1] = a1
+    A[2,1] = c*t
+    A[1,2] = b*t
+    A[2,2] = a1
+    return A
 end
 
 # Used only by powm at the moment
@@ -2573,48 +2281,24 @@ end
 unw(x::Real) = 0
 unw(x::Number) = ceil((imag(x) - pi) / (2 * pi))
 
-# End of auxiliary functions for matrix logarithm and matrix power
-
-function sqrt(A::UpperTriangular)
-    realmatrix = false
-    if isreal(A)
-        realmatrix = true
-        for i = 1:checksquare(A)
-            x = real(A[i,i])
-            if x < zero(x)
-                realmatrix = false
-                break
-            end
-        end
-    end
-    # Writing an explicit if instead of using Val(realmatrix) below
-    # makes the calls to sqrt(::UpperTriangular,::Val) type stable.
-    if realmatrix
-        return sqrt(A,Val(true))
-    else
-        return sqrt(A,Val(false))
+# compute A / B for upper quasi-triangular B, possibly overwriting B
+function rdiv_quasitriu!(A, B)
+    n = checksquare(A)
+    AG = copy(A)
+    # use Givens rotations to annihilate 2x2 blocks
+    @inbounds for k in 1:(n-1)
+        s = B[k+1,k]
+        iszero(s) && continue  # 1x1 block
+        G = first(givens(B[k+1,k+1], s, k, k+1))
+        rmul!(B, G)
+        rmul!(AG, G)
     end
+    return rdiv!(AG, UpperTriangular(B))
 end
-function sqrt(A::UpperTriangular{T},::Val{realmatrix}) where {T,realmatrix}
-    B = A.data
-    n = checksquare(B)
-    t = realmatrix ? typeof(sqrt(zero(T))) : typeof(sqrt(complex(zero(T))))
-    R = zeros(t, n, n)
-    tt = typeof(zero(t)*zero(t))
-    @inbounds for j = 1:n
-        R[j,j] = realmatrix ? sqrt(B[j,j]) : sqrt(complex(B[j,j]))
-        for i = j-1:-1:1
-            r::tt = B[i,j]
-            @simd for k = i+1:j-1
-                r -= R[i,k]*R[k,j]
-            end
-            if !(iszero(r) || (iszero(R[i,i]) && iszero(R[j,j])))
-                R[i,j] = sylvester(R[i,i],R[j,j],-r)
-            end
-        end
-    end
-    return UpperTriangular(R)
-end
+
+# End of auxiliary functions for matrix logarithm and matrix power
+
+sqrt(A::UpperTriangular) = sqrt_quasitriu(A)
 function sqrt(A::UnitUpperTriangular{T}) where T
     B = A.data
     n = checksquare(B)
@@ -2636,6 +2320,304 @@ end
 sqrt(A::LowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
 sqrt(A::UnitLowerTriangular) = copy(transpose(sqrt(copy(transpose(A)))))
 
+# Auxiliary functions for matrix square root
+
+# square root of upper triangular or real upper quasitriangular matrix
+function sqrt_quasitriu(A0; blockwidth = eltype(A0) <: Complex ? 512 : 256)
+    n = checksquare(A0)
+    T = eltype(A0)
+    Tr = typeof(sqrt(real(zero(T))))
+    Tc = typeof(sqrt(complex(zero(T))))
+    if isreal(A0)
+        is_sqrt_real = true
+        if istriu(A0)
+            for i in 1:n
+                Aii = real(A0[i,i])
+                if Aii < zero(Aii)
+                    is_sqrt_real = false
+                    break
+                end
+            end
+        end
+        if is_sqrt_real
+            R = zeros(Tr, n, n)
+            A = real(A0)
+        else
+            R = zeros(Tc, n, n)
+            A = A0
+        end
+    else
+        A = A0
+        R = zeros(Tc, n, n)
+    end
+    _sqrt_quasitriu!(R, A; blockwidth=blockwidth, n=n)
+    Rc = eltype(A0) <: Real ? R : complex(R)
+    if A0 isa UpperTriangular
+        return UpperTriangular(Rc)
+    elseif A0 isa UnitUpperTriangular
+        return UnitUpperTriangular(Rc)
+    else
+        return Rc
+    end
+end
+
+# in-place recursive sqrt of upper quasi-triangular matrix A from
+# Deadman E., Higham N.J., Ralha R. (2013) Blocked Schur Algorithms for Computing the Matrix
+# Square Root. Applied Parallel and Scientific Computing. PARA 2012. Lecture Notes in
+# Computer Science, vol 7782. https://doi.org/10.1007/978-3-642-36803-5_12
+function _sqrt_quasitriu!(R, A; blockwidth=64, n=checksquare(A))
+    if n ≤ blockwidth || !(eltype(R) <: BlasFloat) # base case, perform "point" algorithm
+        _sqrt_quasitriu_block!(R, A)
+    else  # compute blockwise recursion
+        split = div(n, 2)
+        iszero(A[split+1, split]) || (split += 1) # don't split 2x2 diagonal block
+        r1 = 1:split
+        r2 = (split + 1):n
+        n1, n2 = split, n - split
+        A11, A12, A22 = @views A[r1,r1], A[r1,r2], A[r2,r2]
+        R11, R12, R22 = @views R[r1,r1], R[r1,r2], R[r2,r2]
+        # solve diagonal blocks recursively
+        _sqrt_quasitriu!(R11, A11; blockwidth=blockwidth, n=n1)
+        _sqrt_quasitriu!(R22, A22; blockwidth=blockwidth, n=n2)
+        # solve off-diagonal block
+        R12 .= .- A12
+        _sylvester_quasitriu!(R11, R22, R12; blockwidth=blockwidth, nA=n1, nB=n2, raise=false)
+    end
+    return R
+end
+
+function _sqrt_quasitriu_block!(R, A)
+    _sqrt_quasitriu_diag_block!(R, A)
+    _sqrt_quasitriu_offdiag_block!(R, A)
+    return R
+end
+
+function _sqrt_quasitriu_diag_block!(R, A)
+    n = size(R, 1)
+    ta = eltype(R) <: Complex ? complex(eltype(A)) : eltype(A)
+    i = 1
+    @inbounds while i < n
+        if iszero(A[i + 1, i])
+            R[i, i] = sqrt(ta(A[i, i]))
+            i += 1
+        else
+            # this branch is never reached when A is complex triangular
+            @views _sqrt_real_2x2!(R[i:(i + 1), i:(i + 1)], A[i:(i + 1), i:(i + 1)])
+            i += 2
+        end
+    end
+    if i == n
+        R[n, n] = sqrt(ta(A[n, n]))
+    end
+    return R
+end
+
+function _sqrt_quasitriu_offdiag_block!(R, A)
+    n = size(R, 1)
+    j = 1
+    @inbounds while j ≤ n
+        jsize_is_2 = j < n && !iszero(A[j + 1, j])
+        i = j - 1
+        while i > 0
+            isize_is_2 = i > 1 && !iszero(A[i, i - 1])
+            if isize_is_2
+                if jsize_is_2
+                    _sqrt_quasitriu_offdiag_block_2x2!(R, A, i - 1, j)
+                else
+                    _sqrt_quasitriu_offdiag_block_2x1!(R, A, i - 1, j)
+                end
+                i -= 2
+            else
+                if jsize_is_2
+                    _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
+                else
+                    _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
+                end
+                i -= 1
+            end
+        end
+        j += 2 - !jsize_is_2
+    end
+    return R
+end
+
+# real square root of 2x2 diagonal block of quasi-triangular matrix from real Schur
+# decomposition. Eqs 6.8-6.9 and Algorithm 6.5 of
+# Higham, 2008, "Functions of Matrices: Theory and Computation", SIAM.
+Base.@propagate_inbounds function _sqrt_real_2x2!(R, A)
+    # in the real Schur form, A[1, 1] == A[2, 2], and A[2, 1] * A[1, 2] < 0
+    θ, a21, a12 = A[1, 1], A[2, 1], A[1, 2]
+    # avoid overflow/underflow of μ
+    # for real sqrt, |d| ≤ 2 max(|a12|,|a21|)
+    μ = sqrt(abs(a12)) * sqrt(abs(a21))
+    α = _real_sqrt(θ, μ)
+    c = 2α
+    R[1, 1] = α
+    R[2, 1] = a21 / c
+    R[1, 2] = a12 / c
+    R[2, 2] = α
+    return R
+end
+
+# real part of square root of θ+im*μ
+@inline function _real_sqrt(θ, μ)
+    t = sqrt((abs(θ) + hypot(θ, μ)) / 2)
+    return θ ≥ 0 ? t : μ / 2t
+end
+
+Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x1!(R, A, i, j)
+    Rii = R[i, i]
+    Rjj = R[j, j]
+    iszero(Rii) && iszero(Rjj) && return R
+    t = eltype(R)
+    tt = typeof(zero(t)*zero(t))
+    r = tt(-A[i, j])
+    @simd for k in (i + 1):(j - 1)
+        r += R[i, k] * R[k, j]
+    end
+    iszero(r) && return R
+    R[i, j] = sylvester(Rii, Rjj, r)
+    return R
+end
+
+Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_1x2!(R, A, i, j)
+    jrange = j:(j + 1)
+    t = eltype(R)
+    tt = typeof(zero(t)*zero(t))
+    r1 = tt(-A[i, j])
+    r2 = tt(-A[i, j + 1])
+    @simd for k in (i + 1):(j - 1)
+        rik = R[i, k]
+        r1 += rik * R[k, j]
+        r2 += rik * R[k, j + 1]
+    end
+    Rjj = @view R[jrange, jrange]
+    Rij = @view R[i, jrange]
+    Rij[1] = r1
+    Rij[2] = r2
+    _sylvester_1x2!(R[i, i], Rjj, Rij)
+    return R
+end
+
+Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x1!(R, A, i, j)
+    irange = i:(i + 1)
+    t = eltype(R)
+    tt = typeof(zero(t)*zero(t))
+    r1 = tt(-A[i, j])
+    r2 = tt(-A[i + 1, j])
+    @simd for k in (i + 2):(j - 1)
+        rkj = R[k, j]
+        r1 += R[i, k] * rkj
+        r2 += R[i + 1, k] * rkj
+    end
+    Rii = @view R[irange, irange]
+    Rij = @view R[irange, j]
+    Rij[1] = r1
+    Rij[2] = r2
+    @views _sylvester_2x1!(Rii, R[j, j], Rij)
+    return R
+end
+
+Base.@propagate_inbounds function _sqrt_quasitriu_offdiag_block_2x2!(R, A, i, j)
+    irange = i:(i + 1)
+    jrange = j:(j + 1)
+    t = eltype(R)
+    tt = typeof(zero(t)*zero(t))
+    for i′ in irange, j′ in jrange
+        Cij = tt(-A[i′, j′])
+        @simd for k in (i + 2):(j - 1)
+            Cij += R[i′, k] * R[k, j′]
+        end
+        R[i′, j′] = Cij
+    end
+    Rii = @view R[irange, irange]
+    Rjj = @view R[jrange, jrange]
+    Rij = @view R[irange, jrange]
+    if !iszero(Rij) && !all(isnan, Rij)
+        _sylvester_2x2!(Rii, Rjj, Rij)
+    end
+    return R
+end
+
+# solve Sylvester's equation AX + XB = -C using blockwise recursion until the dimension of
+# A and B are no greater than blockwidth, based on Algorithm 1 from
+# Jonsson I, Kågström B. Recursive blocked algorithms for solving triangular systems—
+# Part I: one-sided and coupled Sylvester-type matrix equations. (2002) ACM Trans Math Softw.
+# 28(4), https://doi.org/10.1145/592843.592845.
+# specify raise=false to avoid breaking the recursion if a LAPACKException is thrown when
+# computing one of the blocks.
+function _sylvester_quasitriu!(A, B, C; blockwidth=64, nA=checksquare(A), nB=checksquare(B), raise=true)
+    if 1 ≤ nA ≤ blockwidth && 1 ≤ nB ≤ blockwidth
+        _sylvester_quasitriu_base!(A, B, C; raise=raise)
+    elseif nA ≥ 2nB ≥ 2
+        _sylvester_quasitriu_split1!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
+    elseif nB ≥ 2nA ≥ 2
+        _sylvester_quasitriu_split2!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
+    else
+        _sylvester_quasitriu_splitall!(A, B, C; blockwidth=blockwidth, nA=nA, nB=nB, raise=raise)
+    end
+    return C
+end
+function _sylvester_quasitriu_base!(A, B, C; raise=true)
+    try
+        _, scale = LAPACK.trsyl!('N', 'N', A, B, C)
+        rmul!(C, -inv(scale))
+    catch e
+        if !(e isa LAPACKException) || raise
+            throw(e)
+        end
+    end
+    return C
+end
+function _sylvester_quasitriu_split1!(A, B, C; nA=checksquare(A), kwargs...)
+    iA = div(nA, 2)
+    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
+    rA1, rA2 = 1:iA, (iA + 1):nA
+    nA1, nA2 = iA, nA-iA
+    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
+    C1, C2 = @views C[rA1,:], C[rA2,:]
+    _sylvester_quasitriu!(A22, B, C2; nA=nA2, kwargs...)
+    mul!(C1, A12, C2, true, true)
+    _sylvester_quasitriu!(A11, B, C1; nA=nA1, kwargs...)
+    return C
+end
+function _sylvester_quasitriu_split2!(A, B, C; nB=checksquare(B), kwargs...)
+    iB = div(nB, 2)
+    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
+    rB1, rB2 = 1:iB, (iB + 1):nB
+    nB1, nB2 = iB, nB-iB
+    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
+    C1, C2 = @views C[:,rB1], C[:,rB2]
+    _sylvester_quasitriu!(A, B11, C1; nB=nB1, kwargs...)
+    mul!(C2, C1, B12, true, true)
+    _sylvester_quasitriu!(A, B22, C2; nB=nB2, kwargs...)
+    return C
+end
+function _sylvester_quasitriu_splitall!(A, B, C; nA=checksquare(A), nB=checksquare(B), kwargs...)
+    iA = div(nA, 2)
+    iszero(A[iA + 1, iA]) || (iA += 1)  # don't split 2x2 diagonal block
+    iB = div(nB, 2)
+    iszero(B[iB + 1, iB]) || (iB += 1)  # don't split 2x2 diagonal block
+    rA1, rA2 = 1:iA, (iA + 1):nA
+    nA1, nA2 = iA, nA-iA
+    rB1, rB2 = 1:iB, (iB + 1):nB
+    nB1, nB2 = iB, nB-iB
+    A11, A12, A22 = @views A[rA1,rA1], A[rA1,rA2], A[rA2,rA2]
+    B11, B12, B22 = @views B[rB1,rB1], B[rB1,rB2], B[rB2,rB2]
+    C11, C21, C12, C22 = @views C[rA1,rB1], C[rA2,rB1], C[rA1,rB2], C[rA2,rB2]
+    _sylvester_quasitriu!(A22, B11, C21; nA=nA2, nB=nB1, kwargs...)
+    mul!(C11, A12, C21, true, true)
+    _sylvester_quasitriu!(A11, B11, C11; nA=nA1, nB=nB1, kwargs...)
+    mul!(C22, C21, B12, true, true)
+    _sylvester_quasitriu!(A22, B22, C22; nA=nA2, nB=nB2, kwargs...)
+    mul!(C12, A12, C22, true, true)
+    mul!(C12, C11, B12, true, true)
+    _sylvester_quasitriu!(A11, B22, C12; nA=nA1, nB=nB2, kwargs...)
+    return C
+end
+
+# End of auxiliary functions for matrix square root
+
 # Generic eigensystems
 eigvals(A::AbstractTriangular) = diag(A)
 function eigvecs(A::AbstractTriangular{T}) where T
@@ -2677,37 +2659,20 @@ end
 factorize(A::AbstractTriangular) = A
 
 # disambiguation methods: *(AbstractTriangular, Adj/Trans of AbstractVector)
-*(A::AbstractTriangular, B::Adjoint{<:Any,<:AbstractVector}) = adjoint(adjoint(B) * adjoint(A))
-*(A::AbstractTriangular, B::Transpose{<:Any,<:AbstractVector}) = transpose(transpose(B) * transpose(A))
-# disambiguation methods: *(Adj/Trans of AbstractTriangular, Trans/Ajd of AbstractTriangular)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractTriangular}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractTriangular}) = copy(A) * B
-# disambiguation methods: *(Adj/Trans of AbstractTriangular, Adj/Trans of AbsVec or AbsMat)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractVector}) = adjoint(adjoint(B) * adjoint(A))
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractMatrix}) = A * copy(B)
-*(A::Adjoint{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractVector}) = transpose(transpose(B) * transpose(A))
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Transpose{<:Any,<:AbstractVector}) = transpose(transpose(B) * transpose(A))
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractVector}) = adjoint(adjoint(B) * adjoint(A))
-*(A::Transpose{<:Any,<:AbstractTriangular}, B::Adjoint{<:Any,<:AbstractMatrix}) = A * copy(B)
-# disambiguation methods: *(Adj/Trans of AbsVec or AbsMat, Adj/Trans of AbstractTriangular)
-*(A::Adjoint{<:Any,<:AbstractVector}, B::Transpose{<:Any,<:AbstractTriangular}) = adjoint(adjoint(B) * adjoint(A))
-*(A::Adjoint{<:Any,<:AbstractMatrix}, B::Transpose{<:Any,<:AbstractTriangular}) = copy(A) * B
-*(A::Transpose{<:Any,<:AbstractVector}, B::Adjoint{<:Any,<:AbstractTriangular}) = transpose(transpose(B) * transpose(A))
-*(A::Transpose{<:Any,<:AbstractMatrix}, B::Adjoint{<:Any,<:AbstractTriangular}) = copy(A) * B
+*(A::AbstractTriangular, B::AdjointAbsVec) = adjoint(adjoint(B) * adjoint(A))
+*(A::AbstractTriangular, B::TransposeAbsVec) = transpose(transpose(B) * transpose(A))
 
 # disambiguation methods: /(Adjoint of AbsVec, <:AbstractTriangular)
 /(u::AdjointAbsVec, A::Union{LowerTriangular,UpperTriangular}) = adjoint(adjoint(A) \ u.parent)
 /(u::AdjointAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = adjoint(adjoint(A) \ u.parent)
-# disambiguation methods: /(Adjoint of AbsVec, Adj/Trans of <:AbstractTriangular)
-/(u::AdjointAbsVec, A::Adjoint{<:Any,<:Union{LowerTriangular,UpperTriangular}}) = adjoint(A.parent \ u.parent)
-/(u::AdjointAbsVec, A::Adjoint{<:Any,<:Union{UnitLowerTriangular,UnitUpperTriangular}}) = adjoint(A.parent \ u.parent)
-/(u::AdjointAbsVec, A::Transpose{<:Any,<:Union{LowerTriangular,UpperTriangular}}) = adjoint(conj(A.parent) \ u.parent)
-/(u::AdjointAbsVec, A::Transpose{<:Any,<:Union{UnitLowerTriangular,UnitUpperTriangular}}) = adjoint(conj(A.parent) \ u.parent)
 # disambiguation methods: /(Transpose of AbsVec, <:AbstractTriangular)
 /(u::TransposeAbsVec, A::Union{LowerTriangular,UpperTriangular}) = transpose(transpose(A) \ u.parent)
 /(u::TransposeAbsVec, A::Union{UnitLowerTriangular,UnitUpperTriangular}) = transpose(transpose(A) \ u.parent)
 # disambiguation methods: /(Transpose of AbsVec, Adj/Trans of <:AbstractTriangular)
-/(u::TransposeAbsVec, A::Adjoint{<:Any,<:Union{LowerTriangular,UpperTriangular}}) = transpose(conj(A.parent) \ u.parent)
-/(u::TransposeAbsVec, A::Adjoint{<:Any,<:Union{UnitLowerTriangular,UnitUpperTriangular}}) = transpose(conj(A.parent) \ u.parent)
-/(u::TransposeAbsVec, A::Transpose{<:Any,<:Union{LowerTriangular,UpperTriangular}}) = transpose(A.parent \ u.parent)
-/(u::TransposeAbsVec, A::Transpose{<:Any,<:Union{UnitLowerTriangular,UnitUpperTriangular}}) = transpose(A.parent \ u.parent)
+for (tritype, comptritype) in ((:LowerTriangular, :UpperTriangular),
+                               (:UnitLowerTriangular, :UnitUpperTriangular),
+                               (:UpperTriangular, :LowerTriangular),
+                               (:UnitUpperTriangular, :UnitLowerTriangular))
+    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Adjoint}) = transpose($comptritype(conj(parent(parent(A)))) \ u.parent)
+    @eval /(u::TransposeAbsVec, A::$tritype{<:Any,<:Transpose}) = transpose(transpose(A) \ u.parent)
+end
diff --git a/stdlib/LinearAlgebra/src/tridiag.jl b/stdlib/LinearAlgebra/src/tridiag.jl
index dbdc4832c5fc34..a686ab44219543 100644
--- a/stdlib/LinearAlgebra/src/tridiag.jl
+++ b/stdlib/LinearAlgebra/src/tridiag.jl
@@ -125,20 +125,16 @@ AbstractMatrix{T}(S::SymTridiagonal) where {T} =
 function Matrix{T}(M::SymTridiagonal) where T
     n = size(M, 1)
     Mf = zeros(T, n, n)
-    if n == 0
-        return Mf
-    end
-    @inbounds begin
-        @simd for i = 1:n-1
-            Mf[i,i] = M.dv[i]
-            Mf[i+1,i] = M.ev[i]
-            Mf[i,i+1] = M.ev[i]
-        end
-        Mf[n,n] = M.dv[n]
+    n == 0 && return Mf
+    @inbounds for i = 1:n-1
+        Mf[i,i] = symmetric(M.dv[i], :U)
+        Mf[i+1,i] = transpose(M.ev[i])
+        Mf[i,i+1] = M.ev[i]
     end
+    Mf[n,n] = symmetric(M.dv[n], :U)
     return Mf
 end
-Matrix(M::SymTridiagonal{T}) where {T} = Matrix{T}(M)
+Matrix(M::SymTridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
 Array(M::SymTridiagonal) = Matrix(M)
 
 size(A::SymTridiagonal) = (length(A.dv), length(A.dv))
@@ -152,12 +148,11 @@ function size(A::SymTridiagonal, d::Integer)
     end
 end
 
-# For S<:SymTridiagonal, similar(S[, neweltype]) should yield a SymTridiagonal matrix.
-# On the other hand, similar(S, [neweltype,] shape...) should yield a sparse matrix.
-# The first method below effects the former, and the second the latter.
 similar(S::SymTridiagonal, ::Type{T}) where {T} = SymTridiagonal(similar(S.dv, T), similar(S.ev, T))
-# The method below is moved to SparseArrays for now
-# similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
+similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
+
+copyto!(dest::SymTridiagonal, src::SymTridiagonal) =
+    (copyto!(dest.dv, src.dv); copyto!(dest.ev, _evview(src)); dest)
 
 #Elementary operations
 for func in (:conj, :copy, :real, :imag)
@@ -167,19 +162,26 @@ end
 transpose(S::SymTridiagonal) = S
 adjoint(S::SymTridiagonal{<:Real}) = S
 adjoint(S::SymTridiagonal) = Adjoint(S)
+permutedims(S::SymTridiagonal) = S
+function permutedims(S::SymTridiagonal, perm)
+    Base.checkdims_perm(S, S, perm)
+    NTuple{2}(perm) == (2, 1) ? permutedims(S) : S
+end
 Base.copy(S::Adjoint{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(adjoint.(x)), (S.parent.dv, S.parent.ev))...)
-Base.copy(S::Transpose{<:Any,<:SymTridiagonal}) = SymTridiagonal(map(x -> copy.(transpose.(x)), (S.parent.dv, S.parent.ev))...)
 
-function diag(M::SymTridiagonal{<:Number}, n::Integer=0)
+ishermitian(S::SymTridiagonal) = isreal(S.dv) && isreal(_evview(S))
+issymmetric(S::SymTridiagonal) = true
+
+function diag(M::SymTridiagonal{T}, n::Integer=0) where T<:Number
     # every branch call similar(..., ::Int) to make sure the
     # same vector type is returned independent of n
     absn = abs(n)
     if absn == 0
         return copyto!(similar(M.dv, length(M.dv)), M.dv)
     elseif absn == 1
-        return copyto!(similar(M.ev, length(M.ev)), M.ev)
+        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
     elseif absn <= size(M,1)
-        return fill!(similar(M.dv, size(M,1)-absn), 0)
+        return fill!(similar(M.dv, size(M,1)-absn), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
             "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
@@ -191,9 +193,9 @@ function diag(M::SymTridiagonal, n::Integer=0)
     if n == 0
         return copyto!(similar(M.dv, length(M.dv)), symmetric.(M.dv, :U))
     elseif n == 1
-        return copyto!(similar(M.ev, length(M.ev)), M.ev)
+        return copyto!(similar(M.ev, length(M.dv)-1), _evview(M))
     elseif n == -1
-        return copyto!(similar(M.ev, length(M.ev)), transpose.(M.ev))
+        return copyto!(similar(M.ev, length(M.dv)-1), transpose.(_evview(M)))
     elseif n <= size(M,1)
         throw(ArgumentError("requested diagonal contains undefined zeros of an array type"))
     else
@@ -202,13 +204,14 @@ function diag(M::SymTridiagonal, n::Integer=0)
     end
 end
 
-+(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv+B.dv, A.ev+B.ev)
--(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv-B.dv, A.ev-B.ev)
++(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv+B.dv, _evview(A)+_evview(B))
+-(A::SymTridiagonal, B::SymTridiagonal) = SymTridiagonal(A.dv-B.dv, _evview(A)-_evview(B))
 -(A::SymTridiagonal) = SymTridiagonal(-A.dv, -A.ev)
 *(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv*B, A.ev*B)
-*(B::Number, A::SymTridiagonal) = A*B
+*(B::Number, A::SymTridiagonal) = SymTridiagonal(B*A.dv, B*A.ev)
 /(A::SymTridiagonal, B::Number) = SymTridiagonal(A.dv/B, A.ev/B)
-==(A::SymTridiagonal, B::SymTridiagonal) = (A.dv==B.dv) && (A.ev==B.ev)
+\(B::Number, A::SymTridiagonal) = SymTridiagonal(B\A.dv, B\A.ev)
+==(A::SymTridiagonal, B::SymTridiagonal) = (A.dv==B.dv) && (_evview(A)==_evview(B))
 
 @inline mul!(A::StridedVecOrMat, B::SymTridiagonal, C::StridedVecOrMat,
              alpha::Number, beta::Number) =
@@ -278,30 +281,30 @@ ldiv!(A::SymTridiagonal, B::AbstractVecOrMat; shift::Number=false) = ldiv!(ldlt(
 rdiv!(B::AbstractVecOrMat, A::SymTridiagonal; shift::Number=false) = rdiv!(B, ldlt(A, shift=shift))
 
 eigen!(A::SymTridiagonal{<:BlasReal}) = Eigen(LAPACK.stegr!('V', A.dv, A.ev)...)
-eigen(A::SymTridiagonal{T}) where T = eigen!(copy_oftype(A, eigtype(T)))
+eigen(A::SymTridiagonal{T}) where T = eigen!(copymutable_oftype(A, eigtype(T)))
 
 eigen!(A::SymTridiagonal{<:BlasReal}, irange::UnitRange) =
     Eigen(LAPACK.stegr!('V', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)...)
 eigen(A::SymTridiagonal{T}, irange::UnitRange) where T =
-    eigen!(copy_oftype(A, eigtype(T)), irange)
+    eigen!(copymutable_oftype(A, eigtype(T)), irange)
 
 eigen!(A::SymTridiagonal{<:BlasReal}, vl::Real, vu::Real) =
     Eigen(LAPACK.stegr!('V', 'V', A.dv, A.ev, vl, vu, 0, 0)...)
 eigen(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
-    eigen!(copy_oftype(A, eigtype(T)), vl, vu)
+    eigen!(copymutable_oftype(A, eigtype(T)), vl, vu)
 
 eigvals!(A::SymTridiagonal{<:BlasReal}) = LAPACK.stev!('N', A.dv, A.ev)[1]
-eigvals(A::SymTridiagonal{T}) where T = eigvals!(copy_oftype(A, eigtype(T)))
+eigvals(A::SymTridiagonal{T}) where T = eigvals!(copymutable_oftype(A, eigtype(T)))
 
 eigvals!(A::SymTridiagonal{<:BlasReal}, irange::UnitRange) =
     LAPACK.stegr!('N', 'I', A.dv, A.ev, 0.0, 0.0, irange.start, irange.stop)[1]
 eigvals(A::SymTridiagonal{T}, irange::UnitRange) where T =
-    eigvals!(copy_oftype(A, eigtype(T)), irange)
+    eigvals!(copymutable_oftype(A, eigtype(T)), irange)
 
 eigvals!(A::SymTridiagonal{<:BlasReal}, vl::Real, vu::Real) =
     LAPACK.stegr!('N', 'V', A.dv, A.ev, vl, vu, 0, 0)[1]
 eigvals(A::SymTridiagonal{T}, vl::Real, vu::Real) where T =
-    eigvals!(copy_oftype(A, eigtype(T)), vl, vu)
+    eigvals!(copymutable_oftype(A, eigtype(T)), vl, vu)
 
 #Computes largest and smallest eigenvalue
 eigmax(A::SymTridiagonal) = eigvals(A, size(A, 1):size(A, 1))[1]
@@ -353,33 +356,34 @@ function svdvals!(A::SymTridiagonal)
     return sort!(map!(abs, vals, vals); rev=true)
 end
 
-#tril and triu
+# tril and triu
 
 function istriu(M::SymTridiagonal, k::Integer=0)
     if k <= -1
         return true
     elseif k == 0
-        return iszero(M.ev)
+        return iszero(_evview(M))
     else # k >= 1
-        return iszero(M.ev) && iszero(M.dv)
+        return iszero(_evview(M)) && iszero(M.dv)
     end
 end
 istril(M::SymTridiagonal, k::Integer) = istriu(M, -k)
-iszero(M::SymTridiagonal) = iszero(M.ev) && iszero(M.dv)
-isone(M::SymTridiagonal) = iszero(M.ev) && all(isone, M.dv)
-isdiag(M::SymTridiagonal) = iszero(M.ev)
+iszero(M::SymTridiagonal) =  iszero(_evview(M)) && iszero(M.dv)
+isone(M::SymTridiagonal) =  iszero(_evview(M)) && all(isone, M.dv)
+isdiag(M::SymTridiagonal) =  iszero(_evview(M))
+
 
-function tril!(M::SymTridiagonal, k::Integer=0)
+function tril!(M::SymTridiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < -1
-        fill!(M.ev,0)
-        fill!(M.dv,0)
+        fill!(M.ev, zero(T))
+        fill!(M.dv, zero(T))
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     elseif k == -1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
     elseif k == 0
         return Tridiagonal(M.ev,M.dv,zero(M.ev))
@@ -388,17 +392,17 @@ function tril!(M::SymTridiagonal, k::Integer=0)
     end
 end
 
-function triu!(M::SymTridiagonal, k::Integer=0)
+function triu!(M::SymTridiagonal{T}, k::Integer=0) where T
     n = length(M.dv)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 1
-        fill!(M.ev,0)
-        fill!(M.dv,0)
+        fill!(M.ev, zero(T))
+        fill!(M.dv, zero(T))
         return Tridiagonal(M.ev,M.dv,copy(M.ev))
     elseif k == 1
-        fill!(M.dv,0)
+        fill!(M.dv, zero(T))
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
     elseif k == 0
         return Tridiagonal(zero(M.ev),M.dv,M.ev)
@@ -441,22 +445,20 @@ end
 det(A::SymTridiagonal; shift::Number=false) = det_usmani(A.ev, A.dv, A.ev, shift)
 logabsdet(A::SymTridiagonal; shift::Number=false) = logabsdet(ldlt(A; shift=shift))
 
-function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
-    if !(1 <= i <= size(A,2) && 1 <= j <= size(A,2))
-        throw(BoundsError(A, (i,j)))
-    end
+@inline function getindex(A::SymTridiagonal{T}, i::Integer, j::Integer) where T
+    @boundscheck checkbounds(A, i, j)
     if i == j
-        return symmetric(A.dv[i], :U)::symmetric_type(eltype(A.dv))
+        return symmetric((@inbounds A.dv[i]), :U)::symmetric_type(eltype(A.dv))
     elseif i == j + 1
-        return copy(transpose(A.ev[j])) # materialized for type stability
+        return copy(transpose(@inbounds A.ev[j])) # materialized for type stability
     elseif i + 1 == j
-        return A.ev[i]
+        return @inbounds A.ev[i]
     else
         return zero(T)
     end
 end
 
-function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
+@inline function setindex!(A::SymTridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.dv[i] = x
@@ -569,26 +571,23 @@ function size(M::Tridiagonal, d::Integer)
     end
 end
 
-function Matrix{T}(M::Tridiagonal{T}) where T
+function Matrix{T}(M::Tridiagonal) where {T}
     A = zeros(T, size(M))
-    for i = 1:length(M.d)
+    n = length(M.d)
+    n == 0 && return A
+    for i in 1:n-1
         A[i,i] = M.d[i]
-    end
-    for i = 1:length(M.d)-1
         A[i+1,i] = M.dl[i]
         A[i,i+1] = M.du[i]
     end
+    A[n,n] = M.d[n]
     A
 end
-Matrix(M::Tridiagonal{T}) where {T} = Matrix{T}(M)
+Matrix(M::Tridiagonal{T}) where {T} = Matrix{promote_type(T, typeof(zero(T)))}(M)
 Array(M::Tridiagonal) = Matrix(M)
 
-# For M<:Tridiagonal, similar(M[, neweltype]) should yield a Tridiagonal matrix.
-# On the other hand, similar(M, [neweltype,] shape...) should yield a sparse matrix.
-# The first method below effects the former, and the second the latter.
 similar(M::Tridiagonal, ::Type{T}) where {T} = Tridiagonal(similar(M.dl, T), similar(M.d, T), similar(M.du, T))
-# The method below is moved to SparseArrays for now
-# similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
+similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = zeros(T, dims...)
 
 # Operations on Tridiagonal matrices
 copyto!(dest::Tridiagonal, src::Tridiagonal) = (copyto!(dest.dl, src.dl); copyto!(dest.d, src.d); copyto!(dest.du, src.du); dest)
@@ -604,10 +603,18 @@ adjoint(S::Tridiagonal) = Adjoint(S)
 transpose(S::Tridiagonal) = Transpose(S)
 adjoint(S::Tridiagonal{<:Real}) = Tridiagonal(S.du, S.d, S.dl)
 transpose(S::Tridiagonal{<:Number}) = Tridiagonal(S.du, S.d, S.dl)
+permutedims(T::Tridiagonal) = Tridiagonal(T.du, T.d, T.dl)
+function permutedims(T::Tridiagonal, perm)
+    Base.checkdims_perm(T, T, perm)
+    NTuple{2}(perm) == (2, 1) ? permutedims(T) : T
+end
 Base.copy(aS::Adjoint{<:Any,<:Tridiagonal}) = (S = aS.parent; Tridiagonal(map(x -> copy.(adjoint.(x)), (S.du, S.d, S.dl))...))
 Base.copy(tS::Transpose{<:Any,<:Tridiagonal}) = (S = tS.parent; Tridiagonal(map(x -> copy.(transpose.(x)), (S.du, S.d, S.dl))...))
 
-\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:StridedVecOrMat}) = copy(A) \ copy(B)
+ishermitian(S::Tridiagonal) = all(ishermitian, S.d) && all(Iterators.map((x, y) -> x == y', S.du, S.dl))
+issymmetric(S::Tridiagonal) = all(issymmetric, S.d) && all(Iterators.map((x, y) -> x == transpose(y), S.du, S.dl))
+
+\(A::Adjoint{<:Any,<:Tridiagonal}, B::Adjoint{<:Any,<:StridedVecOrMat}) = copy(A) \ B
 
 function diag(M::Tridiagonal{T}, n::Integer=0) where T
     # every branch call similar(..., ::Int) to make sure the
@@ -619,29 +626,27 @@ function diag(M::Tridiagonal{T}, n::Integer=0) where T
     elseif n == 1
         return copyto!(similar(M.du, length(M.du)), M.du)
     elseif abs(n) <= size(M,1)
-        return fill!(similar(M.d, size(M,1)-abs(n)), 0)
+        return fill!(similar(M.d, size(M,1)-abs(n)), zero(T))
     else
         throw(ArgumentError(string("requested diagonal, $n, must be at least $(-size(M, 1)) ",
             "and at most $(size(M, 2)) for an $(size(M, 1))-by-$(size(M, 2)) matrix")))
     end
 end
 
-function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
-    if !(1 <= i <= size(A,2) && 1 <= j <= size(A,2))
-        throw(BoundsError(A, (i,j)))
-    end
+@inline function getindex(A::Tridiagonal{T}, i::Integer, j::Integer) where T
+    @boundscheck checkbounds(A, i, j)
     if i == j
-        return A.d[i]
+        return @inbounds A.d[i]
     elseif i == j + 1
-        return A.dl[j]
+        return @inbounds A.dl[j]
     elseif i + 1 == j
-        return A.du[i]
+        return @inbounds A.du[i]
     else
         return zero(T)
     end
 end
 
-function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
+@inline function setindex!(A::Tridiagonal, x, i::Integer, j::Integer)
     @boundscheck checkbounds(A, i, j)
     if i == j
         @inbounds A.d[i] = x
@@ -690,38 +695,38 @@ function istril(M::Tridiagonal, k::Integer=0)
 end
 isdiag(M::Tridiagonal) = iszero(M.dl) && iszero(M.du)
 
-function tril!(M::Tridiagonal, k::Integer=0)
+function tril!(M::Tridiagonal{T}, k::Integer=0) where T
     n = length(M.d)
     if !(-n - 1 <= k <= n - 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n - 1) and at most $(n - 1) in an $n-by-$n matrix")))
     elseif k < -1
-        fill!(M.dl,0)
-        fill!(M.d,0)
-        fill!(M.du,0)
+        fill!(M.dl, zero(T))
+        fill!(M.d, zero(T))
+        fill!(M.du, zero(T))
     elseif k == -1
-        fill!(M.d,0)
-        fill!(M.du,0)
+        fill!(M.d, zero(T))
+        fill!(M.du, zero(T))
     elseif k == 0
-        fill!(M.du,0)
+        fill!(M.du, zero(T))
     end
     return M
 end
 
-function triu!(M::Tridiagonal, k::Integer=0)
+function triu!(M::Tridiagonal{T}, k::Integer=0) where T
     n = length(M.d)
     if !(-n + 1 <= k <= n + 1)
         throw(ArgumentError(string("the requested diagonal, $k, must be at least ",
             "$(-n + 1) and at most $(n + 1) in an $n-by-$n matrix")))
     elseif k > 1
-        fill!(M.dl,0)
-        fill!(M.d,0)
-        fill!(M.du,0)
+        fill!(M.dl, zero(T))
+        fill!(M.d, zero(T))
+        fill!(M.du, zero(T))
     elseif k == 1
-        fill!(M.dl,0)
-        fill!(M.d,0)
+        fill!(M.dl, zero(T))
+        fill!(M.d, zero(T))
     elseif k == 0
-        fill!(M.dl,0)
+        fill!(M.dl, zero(T))
     end
     return M
 end
@@ -732,20 +737,26 @@ end
 
 +(A::Tridiagonal, B::Tridiagonal) = Tridiagonal(A.dl+B.dl, A.d+B.d, A.du+B.du)
 -(A::Tridiagonal, B::Tridiagonal) = Tridiagonal(A.dl-B.dl, A.d-B.d, A.du-B.du)
+-(A::Tridiagonal) = Tridiagonal(-A.dl, -A.d, -A.du)
 *(A::Tridiagonal, B::Number) = Tridiagonal(A.dl*B, A.d*B, A.du*B)
-*(B::Number, A::Tridiagonal) = A*B
+*(B::Number, A::Tridiagonal) = Tridiagonal(B*A.dl, B*A.d, B*A.du)
 /(A::Tridiagonal, B::Number) = Tridiagonal(A.dl/B, A.d/B, A.du/B)
+\(B::Number, A::Tridiagonal) = Tridiagonal(B\A.dl, B\A.d, B\A.du)
 
 ==(A::Tridiagonal, B::Tridiagonal) = (A.dl==B.dl) && (A.d==B.d) && (A.du==B.du)
-==(A::Tridiagonal, B::SymTridiagonal) = (A.dl==A.du==B.ev) && (A.d==B.dv)
-==(A::SymTridiagonal, B::Tridiagonal) = (B.dl==B.du==A.ev) && (B.d==A.dv)
+function ==(A::Tridiagonal, B::SymTridiagonal)
+    iseq = all(Iterators.map((x, y) -> x == transpose(y), A.du, A.dl))
+    iseq = iseq && A.du == _evview(B)
+    iseq && all(Iterators.map((x, y) -> x == symmetric(y, :U), A.d, B.dv))
+end
+==(A::SymTridiagonal, B::Tridiagonal) = B == A
 
 det(A::Tridiagonal) = det_usmani(A.dl, A.d, A.du)
 
 AbstractMatrix{T}(M::Tridiagonal) where {T} = Tridiagonal{T}(M)
 Tridiagonal{T}(M::SymTridiagonal{T}) where {T} = Tridiagonal(M)
 function SymTridiagonal{T}(M::Tridiagonal) where T
-    if M.dl == M.du
+    if issymmetric(M)
         return SymTridiagonal{T}(convert(AbstractVector{T},M.d), convert(AbstractVector{T},M.dl))
     else
         throw(ArgumentError("Tridiagonal is not symmetric, cannot convert to SymTridiagonal"))
@@ -753,7 +764,10 @@ function SymTridiagonal{T}(M::Tridiagonal) where T
 end
 
 Base._sum(A::Tridiagonal, ::Colon) = sum(A.d) + sum(A.dl) + sum(A.du)
-Base._sum(A::SymTridiagonal, ::Colon) = sum(A.dv) + 2sum(A.ev)
+function Base._sum(A::SymTridiagonal, ::Colon)
+    se = sum(_evview(A))
+    symmetric(sum(A.dv), :U) + se + transpose(se)
+end
 
 function Base._sum(A::Tridiagonal, dims::Integer)
     res = Base.reducedim_initarray(A, dims, zero(eltype(A)))
@@ -800,24 +814,24 @@ function Base._sum(A::SymTridiagonal, dims::Integer)
     end
     @inbounds begin
         if dims == 1
-            res[1] = A.ev[1] + A.dv[1]
+            res[1] = transpose(A.ev[1]) + symmetric(A.dv[1], :U)
             for i = 2:n-1
-                res[i] = A.ev[i] + A.dv[i] + A.ev[i-1]
+                res[i] = transpose(A.ev[i]) + symmetric(A.dv[i], :U) + A.ev[i-1]
             end
-            res[n] = A.dv[n] + A.ev[n-1]
+            res[n] = symmetric(A.dv[n], :U) + A.ev[n-1]
         elseif dims == 2
-            res[1] = A.dv[1] + A.ev[1]
+            res[1] = symmetric(A.dv[1], :U) + A.ev[1]
             for i = 2:n-1
-                res[i] = A.ev[i-1] + A.dv[i] + A.ev[i]
+                res[i] = transpose(A.ev[i-1]) + symmetric(A.dv[i], :U) + A.ev[i]
             end
-            res[n] = A.ev[n-1] + A.dv[n]
+            res[n] = transpose(A.ev[n-1]) + symmetric(A.dv[n], :U)
         elseif dims >= 3
             for i = 1:n-1
                 res[i,i+1] = A.ev[i]
-                res[i,i]   = A.dv[i]
-                res[i+1,i] = A.ev[i]
+                res[i,i]   = symmetric(A.dv[i], :U)
+                res[i+1,i] = transpose(A.ev[i])
             end
-            res[n,n] = A.dv[n]
+            res[n,n] = symmetric(A.dv[n], :U)
         end
     end
     res
@@ -841,3 +855,12 @@ function dot(x::AbstractVector, A::Tridiagonal, y::AbstractVector)
     r += dot(adjoint(du[nx-1])*x₀ + adjoint(d[nx])*x₊, y[nx])
     return r
 end
+
+function cholesky(S::SymTridiagonal, ::NoPivot = NoPivot(); check::Bool = true)
+    if !ishermitian(S)
+        check && checkpositivedefinite(-1)
+        return Cholesky(S, 'U', convert(BlasInt, -1))
+    end
+    T = choltype(eltype(S))
+    cholesky!(Hermitian(Bidiagonal{T}(diag(S, 0), diag(S, 1), :U)), NoPivot(); check = check)
+end
diff --git a/stdlib/LinearAlgebra/src/uniformscaling.jl b/stdlib/LinearAlgebra/src/uniformscaling.jl
index 262876ea410795..661bd28cb8f917 100644
--- a/stdlib/LinearAlgebra/src/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/src/uniformscaling.jl
@@ -88,23 +88,22 @@ ndims(J::UniformScaling) = 2
 Base.has_offset_axes(::UniformScaling) = false
 getindex(J::UniformScaling, i::Integer,j::Integer) = ifelse(i==j,J.λ,zero(J.λ))
 
-getindex(x::UniformScaling, n::Integer, m::AbstractRange{<:Integer}) = getindex(x, m, n)
-function getindex(x::UniformScaling{T}, n::AbstractRange{<:Integer}, m::Integer) where T
-    v = zeros(T, length(n))
-    @inbounds for (i,ii) in enumerate(n)
+getindex(J::UniformScaling, n::Integer, m::AbstractVector{<:Integer}) = getindex(J, m, n)
+function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::Integer) where T
+    v = zeros(T, axes(n))
+    @inbounds for (i,ii) in pairs(n)
         if ii == m
-            v[i] = x.λ
+            v[i] = J.λ
         end
     end
     return v
 end
 
-
-function getindex(x::UniformScaling{T}, n::AbstractRange{<:Integer}, m::AbstractRange{<:Integer}) where T
-    A = zeros(T, length(n), length(m))
-    @inbounds for (j,jj) in enumerate(m), (i,ii) in enumerate(n)
+function getindex(J::UniformScaling{T}, n::AbstractVector{<:Integer}, m::AbstractVector{<:Integer}) where T
+    A = zeros(T, axes(n)..., axes(m)...)
+    @inbounds for (j,jj) in pairs(m), (i,ii) in pairs(n)
         if ii == jj
-            A[i,j] = x.λ
+            A[i,j] = J.λ
         end
     end
     return A
@@ -180,7 +179,7 @@ for (t1, t2) in ((:UnitUpperTriangular, :UpperTriangular),
                  (:UnitLowerTriangular, :LowerTriangular))
     @eval begin
         function (+)(UL::$t1, J::UniformScaling)
-            ULnew = copy_oftype(UL.data, Base._return_type(+, Tuple{eltype(UL), typeof(J)}))
+            ULnew = copymutable_oftype(UL.data, Base._return_type(+, Tuple{eltype(UL), typeof(J)}))
             for i in axes(ULnew, 1)
                 ULnew[i,i] = one(ULnew[i,i]) + J
             end
@@ -195,7 +194,7 @@ end
 # UniformScaling{<:Complex} that happens to be real.
 function (+)(A::Hermitian, J::UniformScaling{<:Complex})
     TS = Base._return_type(+, Tuple{eltype(A), typeof(J)})
-    B = copytri!(copy_oftype(parent(A), TS), A.uplo, true)
+    B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
     for i in diagind(B)
         B[i] = A[i] + J
     end
@@ -204,7 +203,7 @@ end
 
 function (-)(J::UniformScaling{<:Complex}, A::Hermitian)
     TS = Base._return_type(+, Tuple{eltype(A), typeof(J)})
-    B = copytri!(copy_oftype(parent(A), TS), A.uplo, true)
+    B = copytri!(copymutable_oftype(parent(A), TS), A.uplo, true)
     B .= .-B
     for i in diagind(B)
         B[i] = J - A[i]
@@ -214,9 +213,9 @@ end
 
 function (+)(A::AbstractMatrix, J::UniformScaling)
     checksquare(A)
-    B = copy_oftype(A, Base._return_type(+, Tuple{eltype(A), typeof(J)}))
-    @inbounds for i in axes(A, 1)
-        B[i,i] += J
+    B = copymutable_oftype(A, Base._return_type(+, Tuple{eltype(A), typeof(J)}))
+    for i in intersect(axes(A,1), axes(A,2))
+        @inbounds B[i,i] += J
     end
     return B
 end
@@ -224,8 +223,8 @@ end
 function (-)(J::UniformScaling, A::AbstractMatrix)
     checksquare(A)
     B = convert(AbstractMatrix{Base._return_type(+, Tuple{eltype(A), typeof(J)})}, -A)
-    @inbounds for i in axes(A, 1)
-        B[i,i] += J
+    for i in intersect(axes(A,1), axes(A,2))
+        @inbounds B[i,i] += J
     end
     return B
 end
@@ -265,7 +264,8 @@ end
 *(J::UniformScaling, x::Number) = UniformScaling(J.λ*x)
 
 /(J1::UniformScaling, J2::UniformScaling) = J2.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ/J2.λ)
-/(J::UniformScaling, A::AbstractMatrix) = lmul!(J.λ, inv(A))
+/(J::UniformScaling, A::AbstractMatrix) =
+    (invA = inv(A); lmul!(J.λ, convert(AbstractMatrix{promote_type(eltype(J),eltype(invA))}, invA)))
 /(A::AbstractMatrix, J::UniformScaling) = J.λ == 0 ? throw(SingularException(1)) : A/J.λ
 /(v::AbstractVector, J::UniformScaling) = reshape(v, length(v), 1) / J
 
@@ -273,7 +273,8 @@ end
 
 \(J1::UniformScaling, J2::UniformScaling) = J1.λ == 0 ? throw(SingularException(1)) : UniformScaling(J1.λ\J2.λ)
 \(J::UniformScaling, A::AbstractVecOrMat) = J.λ == 0 ? throw(SingularException(1)) : J.λ\A
-\(A::AbstractMatrix, J::UniformScaling) = rmul!(inv(A), J.λ)
+\(A::AbstractMatrix, J::UniformScaling) =
+    (invA = inv(A); rmul!(convert(AbstractMatrix{promote_type(eltype(invA),eltype(J))}, invA), J.λ))
 \(F::Factorization, J::UniformScaling) = F \ J(size(F,1))
 
 \(x::Number, J::UniformScaling) = UniformScaling(x\J.λ)
@@ -282,6 +283,24 @@ end
     mul!(C, A, J.λ, alpha, beta)
 @inline mul!(C::AbstractVecOrMat, J::UniformScaling, B::AbstractVecOrMat, alpha::Number, beta::Number) =
     mul!(C, J.λ, B, alpha, beta)
+
+function mul!(out::AbstractMatrix{T}, a::Number, B::UniformScaling, α::Number, β::Number) where {T}
+    checksquare(out)
+    if iszero(β)  # zero contribution of the out matrix
+        fill!(out, zero(T))
+    elseif !isone(β)
+        rmul!(out, β)
+    end
+    s = convert(T, a*B.λ*α)
+    if !iszero(s)
+        @inbounds for i in diagind(out)
+            out[i] += s
+        end
+    end
+    return out
+end
+@inline mul!(out::AbstractMatrix, A::UniformScaling, b::Number, α::Number, β::Number)=
+    mul!(out, A.λ, UniformScaling(b), α, β)
 rmul!(A::AbstractMatrix, J::UniformScaling) = rmul!(A, J.λ)
 lmul!(J::UniformScaling, B::AbstractVecOrMat) = lmul!(J.λ, B)
 rdiv!(A::AbstractMatrix, J::UniformScaling) = rdiv!(A, J.λ)
@@ -324,6 +343,9 @@ function ==(A::StridedMatrix, J::UniformScaling)
     return true
 end
 
+isequal(A::AbstractMatrix, J::UniformScaling) = false
+isequal(J::UniformScaling, A::AbstractMatrix) = false
+
 function isapprox(J1::UniformScaling{T}, J2::UniformScaling{S};
             atol::Real=0, rtol::Real=Base.rtoldefault(T,S,atol), nans::Bool=false) where {T<:Number,S<:Number}
     isapprox(J1.λ, J2.λ, rtol=rtol, atol=atol, nans=nans)
@@ -368,7 +390,8 @@ end
 # in A to matrices of type T and sizes given by n[k:end].  n is an array
 # so that the same promotion code can be used for hvcat.  We pass the type T
 # so that we can re-use this code for sparse-matrix hcat etcetera.
-promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = copyto!(Matrix{T}(undef, n,n), J)
+promote_to_arrays_(n::Int, ::Type, a::Number) = a
+promote_to_arrays_(n::Int, ::Type{Matrix}, J::UniformScaling{T}) where {T} = Matrix(J, n, n)
 promote_to_arrays_(n::Int, ::Type, A::AbstractVecOrMat) = A
 promote_to_arrays(n,k, ::Type) = ()
 promote_to_arrays(n,k, ::Type{T}, A) where {T} = (promote_to_arrays_(n[k], T, A),)
@@ -378,11 +401,13 @@ promote_to_arrays(n,k, ::Type{T}, A, B, C) where {T} =
     (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays_(n[k+2], T, C))
 promote_to_arrays(n,k, ::Type{T}, A, B, Cs...) where {T} =
     (promote_to_arrays_(n[k], T, A), promote_to_arrays_(n[k+1], T, B), promote_to_arrays(n,k+2, T, Cs...)...)
-promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling}}}) = Matrix
+promote_to_array_type(A::Tuple{Vararg{Union{AbstractVecOrMat,UniformScaling,Number}}}) = Matrix
 
-for (f,dim,name) in ((:hcat,1,"rows"), (:vcat,2,"cols"))
+for (f, _f, dim, name) in ((:hcat, :_hcat, 1, "rows"), (:vcat, :_vcat, 2, "cols"))
     @eval begin
-        function $f(A::Union{AbstractVecOrMat,UniformScaling}...)
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling}...) = $_f(A...)
+        @inline $f(A::Union{AbstractVecOrMat,UniformScaling,Number}...) = $_f(A...)
+        function $_f(A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
             n = -1
             for a in A
                 if !isa(a, UniformScaling)
@@ -395,13 +420,14 @@ for (f,dim,name) in ((:hcat,1,"rows"), (:vcat,2,"cols"))
                 end
             end
             n == -1 && throw(ArgumentError($("$f of only UniformScaling objects cannot determine the matrix size")))
-            return $f(promote_to_arrays(fill(n,length(A)),1, promote_to_array_type(A), A...)...)
+            return cat(promote_to_arrays(fill(n, length(A)), 1, array_type, A...)..., dims=Val(3-$dim))
         end
     end
 end
 
-
-function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling}...) = _hvcat(rows, A...)
+hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...) = _hvcat(rows, A...)
+function _hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScaling,Number}...; array_type = promote_to_array_type(A))
     require_one_based_indexing(A...)
     nr = length(rows)
     sum(rows) == length(A) || throw(ArgumentError("mismatch between row sizes and number of arguments"))
@@ -444,8 +470,8 @@ function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScalin
         j = 0
         for i = 1:nr
             if rows[i] > 0 && n[j+1] == -1 # this row consists entirely of UniformScalings
-                nci = nc ÷ rows[i]
-                nci * rows[i] != nc && throw(DimensionMismatch("indivisible UniformScaling sizes"))
+                nci, r = divrem(nc, rows[i])
+                r != 0 && throw(DimensionMismatch("indivisible UniformScaling sizes"))
                 for k = 1:rows[i]
                     n[j+k] = nci
                 end
@@ -453,7 +479,17 @@ function hvcat(rows::Tuple{Vararg{Int}}, A::Union{AbstractVecOrMat,UniformScalin
             j += rows[i]
         end
     end
-    return hvcat(rows, promote_to_arrays(n,1, promote_to_array_type(A), A...)...)
+    Amat = promote_to_arrays(n, 1, array_type, A...)
+    # We have two methods for promote_to_array_type, one returning Matrix and
+    # another one returning SparseMatrixCSC (in SparseArrays.jl). In the dense
+    # case, we cannot call hvcat for the promoted UniformScalings because this
+    # causes a stack overflow. In the sparse case, however, we cannot call
+    # typed_hvcat because we need a sparse output.
+    if array_type == Matrix
+        return typed_hvcat(promote_eltype(Amat...), rows, Amat...)
+    else
+        return hvcat(rows, Amat...)
+    end
 end
 
 ## Matrix construction from UniformScaling
@@ -477,6 +513,18 @@ Array(s::UniformScaling, dims::Dims{2}) = Matrix(s, dims)
 Diagonal{T}(s::UniformScaling, m::Integer) where {T} = Diagonal{T}(fill(T(s.λ), m))
 Diagonal(s::UniformScaling, m::Integer) = Diagonal{eltype(s)}(s, m)
 
+dot(A::AbstractMatrix, J::UniformScaling) = dot(tr(A), J.λ)
+dot(J::UniformScaling, A::AbstractMatrix) = dot(J.λ, tr(A))
+
 dot(x::AbstractVector, J::UniformScaling, y::AbstractVector) = dot(x, J.λ, y)
 dot(x::AbstractVector, a::Number, y::AbstractVector) = sum(t -> dot(t[1], a, t[2]), zip(x, y))
 dot(x::AbstractVector, a::Union{Real,Complex}, y::AbstractVector) = a*dot(x, y)
+
+# muladd
+Base.muladd(A::UniformScaling, B::UniformScaling, z::UniformScaling) =
+    UniformScaling(A.λ * B.λ + z.λ)
+Base.muladd(A::Union{Diagonal, UniformScaling}, B::Union{Diagonal, UniformScaling}, z::Union{Diagonal, UniformScaling}) =
+    Diagonal(_diag_or_value(A) .* _diag_or_value(B) .+ _diag_or_value(z))
+
+_diag_or_value(A::Diagonal) = A.diag
+_diag_or_value(A::UniformScaling) = A.λ
diff --git a/stdlib/LinearAlgebra/test/addmul.jl b/stdlib/LinearAlgebra/test/addmul.jl
index 42529f3f4f334a..72fdf687bf5c30 100644
--- a/stdlib/LinearAlgebra/test/addmul.jl
+++ b/stdlib/LinearAlgebra/test/addmul.jl
@@ -6,7 +6,6 @@ using Base: rtoldefault
 using Test
 using LinearAlgebra
 using LinearAlgebra: AbstractTriangular
-using SparseArrays
 using Random
 
 _rand(::Type{T}) where {T <: AbstractFloat} = T(randn())
diff --git a/stdlib/LinearAlgebra/test/adjtrans.jl b/stdlib/LinearAlgebra/test/adjtrans.jl
index 0f95fa71d9baab..7b782d463768dc 100644
--- a/stdlib/LinearAlgebra/test/adjtrans.jl
+++ b/stdlib/LinearAlgebra/test/adjtrans.jl
@@ -2,20 +2,22 @@
 
 module TestAdjointTranspose
 
-using Test, LinearAlgebra, SparseArrays
+using Test, LinearAlgebra
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 
 @testset "Adjoint and Transpose inner constructor basics" begin
     intvec, intmat = [1, 2], [1 2; 3 4]
     # Adjoint/Transpose eltype must match the type of the Adjoint/Transpose of the input eltype
-    @test_throws ErrorException Adjoint{Float64,Vector{Int}}(intvec)
-    @test_throws ErrorException Adjoint{Float64,Matrix{Int}}(intmat)
-    @test_throws ErrorException Transpose{Float64,Vector{Int}}(intvec)
-    @test_throws ErrorException Transpose{Float64,Matrix{Int}}(intmat)
+    @test_throws TypeError Adjoint{Float64,Vector{Int}}(intvec)[1,1]
+    @test_throws TypeError Adjoint{Float64,Matrix{Int}}(intmat)[1,1]
+    @test_throws TypeError Transpose{Float64,Vector{Int}}(intvec)[1,1]
+    @test_throws TypeError Transpose{Float64,Matrix{Int}}(intmat)[1,1]
     # Adjoint/Transpose wrapped array type must match the input array type
-    @test_throws MethodError Adjoint{Int,Vector{Float64}}(intvec)
-    @test_throws MethodError Adjoint{Int,Matrix{Float64}}(intmat)
-    @test_throws MethodError Transpose{Int,Vector{Float64}}(intvec)
-    @test_throws MethodError Transpose{Int,Matrix{Float64}}(intmat)
+    @test_throws TypeError Adjoint{Int,Vector{Float64}}(intvec)[1,1]
+    @test_throws TypeError Adjoint{Int,Matrix{Float64}}(intmat)[1,1]
+    @test_throws TypeError Transpose{Int,Vector{Float64}}(intvec)[1,1]
+    @test_throws TypeError Transpose{Int,Matrix{Float64}}(intmat)[1,1]
     # Adjoint/Transpose inner constructor basic functionality, concrete scalar eltype
     @test (Adjoint{Int,Vector{Int}}(intvec)::Adjoint{Int,Vector{Int}}).parent === intvec
     @test (Adjoint{Int,Matrix{Int}}(intmat)::Adjoint{Int,Matrix{Int}}).parent === intmat
@@ -239,6 +241,25 @@ end
     @test convert(Transpose{Float64,Matrix{Float64}}, Transpose(intmat))::Transpose{Float64,Matrix{Float64}} == Transpose(intmat)
 end
 
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Adjoint and Transpose convert methods to AbstractArray" begin
+    # tests corresponding to #34995
+    intvec, intmat = [1, 2], [1 2 3; 4 5 6]
+    statvec = ImmutableArray(intvec)
+    statmat = ImmutableArray(intmat)
+
+    @test convert(AbstractArray{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
+    @test convert(AbstractArray{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
+    @test convert(AbstractArray{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
+    @test convert(AbstractArray{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
+    @test convert(AbstractMatrix{Float64}, Adjoint(statvec))::Adjoint{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Adjoint(statvec)
+    @test convert(AbstractMatrix{Float64}, Adjoint(statmat))::Array{Float64,2} == Adjoint(statmat)
+    @test convert(AbstractMatrix{Float64}, Transpose(statvec))::Transpose{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Transpose(statvec)
+    @test convert(AbstractMatrix{Float64}, Transpose(statmat))::Array{Float64,2} == Transpose(statmat)
+end
+
 @testset "Adjoint and Transpose similar methods" begin
     intvec, intmat = [1, 2], [1 2 3; 4 5 6]
     # similar with no additional specifications, vector (rewrapping) semantics
@@ -275,6 +296,9 @@ end
     @test vec(Transpose(intvec)) === intvec
     cvec = [1 + 1im]
     @test vec(cvec')[1] == cvec[1]'
+    mvec = [[1 2; 3 4+5im]];
+    @test vec(transpose(mvec))[1] == transpose(mvec[1])
+    @test vec(adjoint(mvec))[1] == adjoint(mvec[1])
 end
 
 @testset "horizontal concatenation of Adjoint/Transpose-wrapped vectors and Numbers" begin
@@ -330,14 +354,6 @@ end
     @test broadcast(+, Transpose(vec), 1, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec .+ 1
     @test broadcast(+, Adjoint(vec), 1im, Adjoint(vec))::Adjoint{Complex{Int},Vector{Complex{Int}}} == avec + avec .+ 1im
     @test broadcast(+, Transpose(vec), 1im, Transpose(vec))::Transpose{Complex{Int},Vector{Complex{Int}}} == tvec + tvec .+ 1im
-    # ascertain inference friendliness, ref. https://github.com/JuliaLang/julia/pull/25083#issuecomment-353031641
-    sparsevec = SparseVector([1.0, 2.0, 3.0])
-    @test map(-, Adjoint(sparsevec), Adjoint(sparsevec)) isa Adjoint{Float64,SparseVector{Float64,Int}}
-    @test map(-, Transpose(sparsevec), Transpose(sparsevec)) isa Transpose{Float64,SparseVector{Float64,Int}}
-    @test broadcast(-, Adjoint(sparsevec), Adjoint(sparsevec)) isa Adjoint{Float64,SparseVector{Float64,Int}}
-    @test broadcast(-, Transpose(sparsevec), Transpose(sparsevec)) isa Transpose{Float64,SparseVector{Float64,Int}}
-    @test broadcast(+, Adjoint(sparsevec), 1.0, Adjoint(sparsevec)) isa Adjoint{Float64,SparseVector{Float64,Int}}
-    @test broadcast(+, Transpose(sparsevec), 1.0, Transpose(sparsevec)) isa Transpose{Float64,SparseVector{Float64,Int}}
 end
 
 @testset "Adjoint/Transpose-wrapped vector multiplication" begin
@@ -483,6 +499,22 @@ end
                   "$t of "*sprint((io, t) -> show(io, MIME"text/plain"(), t), parent(Fop))
 end
 
+@testset "showarg" begin
+    io = IOBuffer()
+
+    A = ones(Float64, 3,3)
+
+    B = Adjoint(A)
+    @test summary(B) == "3×3 adjoint(::Matrix{Float64}) with eltype Float64"
+    @test Base.showarg(io, B, false) === nothing
+    @test String(take!(io)) == "adjoint(::Matrix{Float64})"
+
+    B = Transpose(A)
+    @test summary(B) == "3×3 transpose(::Matrix{Float64}) with eltype Float64"
+    @test Base.showarg(io, B, false) === nothing
+    @test String(take!(io)) == "transpose(::Matrix{Float64})"
+end
+
 @testset "strided transposes" begin
     for t in (Adjoint, Transpose)
         @test strides(t(rand(3))) == (3, 1)
@@ -508,7 +540,6 @@ end
     @test pointer(Transpose(D)) === pointer(D)
 end
 
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
 using .Main.OffsetArrays
 
@@ -557,4 +588,24 @@ end
     @test transpose(Int[]) * Int[] == 0
 end
 
+@testset "reductions: $adjtrans" for adjtrans in [transpose, adjoint]
+    mat = rand(ComplexF64, 3,5)
+    @test sum(adjtrans(mat)) ≈ sum(collect(adjtrans(mat)))
+    @test sum(adjtrans(mat), dims=1) ≈ sum(collect(adjtrans(mat)), dims=1)
+    @test sum(adjtrans(mat), dims=(1,2)) ≈ sum(collect(adjtrans(mat)), dims=(1,2))
+
+    @test sum(imag, adjtrans(mat)) ≈ sum(imag, collect(adjtrans(mat)))
+    @test sum(imag, adjtrans(mat), dims=1) ≈ sum(imag, collect(adjtrans(mat)), dims=1)
+
+    mat = [rand(ComplexF64,2,2) for _ in 1:3, _ in 1:5]
+    @test sum(adjtrans(mat)) ≈ sum(collect(adjtrans(mat)))
+    @test sum(adjtrans(mat), dims=1) ≈ sum(collect(adjtrans(mat)), dims=1)
+    @test sum(adjtrans(mat), dims=(1,2)) ≈ sum(collect(adjtrans(mat)), dims=(1,2))
+
+    @test sum(imag, adjtrans(mat)) ≈ sum(imag, collect(adjtrans(mat)))
+    @test sum(x -> x[1,2], adjtrans(mat)) ≈ sum(x -> x[1,2], collect(adjtrans(mat)))
+    @test sum(imag, adjtrans(mat), dims=1) ≈ sum(imag, collect(adjtrans(mat)), dims=1)
+    @test sum(x -> x[1,2], adjtrans(mat), dims=1) ≈ sum(x -> x[1,2], collect(adjtrans(mat)), dims=1)
+end
+
 end # module TestAdjointTranspose
diff --git a/stdlib/LinearAlgebra/test/ambiguous_exec.jl b/stdlib/LinearAlgebra/test/ambiguous_exec.jl
index 6dce4926f46103..7b89c0a457afbb 100644
--- a/stdlib/LinearAlgebra/test/ambiguous_exec.jl
+++ b/stdlib/LinearAlgebra/test/ambiguous_exec.jl
@@ -1,4 +1,21 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test, LinearAlgebra
-@test detect_ambiguities(LinearAlgebra; imported=true, recursive=true) == []
+let ambig = detect_ambiguities(LinearAlgebra; recursive=true)
+    @test isempty(ambig)
+    ambig = Set{Any}(((m1.sig, m2.sig) for (m1, m2) in ambig))
+    expect = []
+    good = true
+    while !isempty(ambig)
+        sigs = pop!(ambig)
+        i = findfirst(==(sigs), expect)
+        if i === nothing
+            println(stderr, "push!(expect, (", sigs[1], ", ", sigs[2], "))")
+            good = false
+            continue
+        end
+        deleteat!(expect, i)
+    end
+    @test isempty(expect)
+    @test good
+end
diff --git a/stdlib/LinearAlgebra/test/bidiag.jl b/stdlib/LinearAlgebra/test/bidiag.jl
index dda65439409f8a..59bc1a5cb13ecd 100644
--- a/stdlib/LinearAlgebra/test/bidiag.jl
+++ b/stdlib/LinearAlgebra/test/bidiag.jl
@@ -2,9 +2,17 @@
 
 module TestBidiagonal
 
-using Test, LinearAlgebra, SparseArrays, Random
+using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasReal, BlasFloat
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+
+isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
+using .Main.Furlongs
+
+isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
+using .Main.Quaternions
+
 include("testutils.jl") # test_approx_eq_modphase
 
 n = 10 #Size of test matrix
@@ -26,17 +34,20 @@ Random.seed!(1)
             ev += im*convert(Vector{elty}, rand(1:10, n-1))
         end
     end
+    dv0 = zeros(elty, 0)
+    ev0 = zeros(elty, 0)
 
     @testset "Constructors" begin
-        for (x, y) in ((dv, ev), (GenericArray(dv), GenericArray(ev)))
+        for (x, y) in ((dv0, ev0), (dv, ev), (GenericArray(dv), GenericArray(ev)))
             # from vectors
             ubd = Bidiagonal(x, y, :U)
             lbd = Bidiagonal(x, y, :L)
-            @test ubd != lbd
+            @test ubd != lbd || x === dv0
             @test ubd.dv === x
             @test lbd.ev === y
             @test_throws ArgumentError Bidiagonal(x, y, :R)
-            @test_throws DimensionMismatch Bidiagonal(x, x, :U)
+            @test_throws ArgumentError Bidiagonal(x, y, 'R')
+            x == dv0 || @test_throws DimensionMismatch Bidiagonal(x, x, :U)
             @test_throws MethodError Bidiagonal(x, y)
             # from matrix
             @test Bidiagonal(ubd, :U) == Bidiagonal(Matrix(ubd), :U) == ubd
@@ -90,8 +101,8 @@ Random.seed!(1)
         @test similar(ubd).uplo == ubd.uplo
         @test isa(similar(ubd, Int), Bidiagonal{Int})
         @test similar(ubd, Int).uplo == ubd.uplo
-        @test isa(similar(ubd, (3, 2)), SparseMatrixCSC)
-        @test isa(similar(ubd, Int, (3, 2)), SparseMatrixCSC{Int})
+        @test isa(similar(ubd, (3, 2)), Matrix)
+        @test isa(similar(ubd, Int, (3, 2)), Matrix{Int})
 
         # setindex! when off diagonal is zero bug
         Bu = Bidiagonal(rand(elty, 10), zeros(elty, 9), 'U')
@@ -127,6 +138,13 @@ Random.seed!(1)
             @test func(func(T)) == T
         end
 
+        @testset "permutedims(::Bidiagonal)" begin
+            @test permutedims(permutedims(T)) === T
+            @test permutedims(T) == transpose.(transpose(T))
+            @test permutedims(T, [1, 2]) === T
+            @test permutedims(T, (2, 1)) == permutedims(T)
+        end
+
         @testset "triu and tril" begin
             zerosdv = zeros(elty, length(dv))
             zerosev = zeros(elty, length(ev))
@@ -233,9 +251,9 @@ Random.seed!(1)
             @test_throws DimensionMismatch transpose(T) \ offsizemat
             @test_throws DimensionMismatch T' \ offsizemat
 
-            if elty <: BlasReal
-                @test_throws SingularException LinearAlgebra.naivesub!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :U), rand(elty, n))
-                @test_throws SingularException LinearAlgebra.naivesub!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :L), rand(elty, n))
+            if elty <: BigFloat
+                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :U), rand(elty, n))
+                @test_throws SingularException ldiv!(Bidiagonal(zeros(elty, n), ones(elty, n-1), :L), rand(elty, n))
             end
             let bb = b, cc = c
                 for atype in ("Array", "SubArray")
@@ -249,11 +267,18 @@ Random.seed!(1)
                 end
                 x = T \ b
                 tx = Tfull \ b
-                @test_throws DimensionMismatch LinearAlgebra.naivesub!(T,Vector{elty}(undef,n+1))
+                @test_throws DimensionMismatch ldiv!(T, Vector{elty}(undef, n+1))
                 @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
                 x = transpose(T) \ b
                 tx = transpose(Tfull) \ b
                 @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
+                x = copy(transpose(b)) / T
+                tx = copy(transpose(b)) / Tfull
+                @test_throws DimensionMismatch rdiv!(Matrix{elty}(undef, 1, n+1), T)
+                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
+                x = copy(transpose(b)) / transpose(T)
+                tx = copy(transpose(b)) / transpose(Tfull)
+                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
                 @testset "Generic Mat-vec ops" begin
                     @test T*b ≈ Tfull*b
                     @test T'*b ≈ Tfull'*b
@@ -262,6 +287,60 @@ Random.seed!(1)
                     end
                 end
             end
+            zdv = Vector{elty}(undef, 0)
+            zev = Vector{elty}(undef, 0)
+            zA  = Bidiagonal(zdv, zev, :U)
+            zb  = Vector{elty}(undef, 0)
+            @test ldiv!(zA, zb) === zb
+            @testset "linear solves with abstract matrices" begin
+                diag = b[:,1]
+                D = Diagonal(diag)
+                x = T \ D
+                tx = Tfull \ D
+                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
+                x = D / T
+                tx = D / Tfull
+                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
+                x = transpose(T) \ D
+                tx = transpose(Tfull) \ D
+                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
+                x = D / transpose(T)
+                tx = D / transpose(Tfull)
+                @test norm(x-tx,Inf) <= 4*condT*max(eps()*norm(tx,Inf), eps(promty)*norm(x,Inf))
+            end
+            @testset "Specialized multiplication/division" begin
+                function _bidiagdivmultest(T,
+                        x,
+                        typemul=T.uplo == 'U' ? UpperTriangular : Matrix,
+                        typediv=T.uplo == 'U' ? UpperTriangular : Matrix,
+                        typediv2=T.uplo == 'U' ? UpperTriangular : Matrix)
+                    TM = Matrix(T)
+                    @test (T*x)::typemul ≈  TM*x #broken=eltype(x) <: Furlong
+                    @test (x*T)::typemul ≈ x*TM #broken=eltype(x) <: Furlong
+                    @test (x\T)::typediv ≈ x\TM #broken=eltype(T) <: Furlong
+                    @test (T/x)::typediv ≈ TM/x #broken=eltype(T) <: Furlong
+                    if !isa(x, Number)
+                        @test (T\x)::typediv2 ≈ TM\x #broken=eltype(x) <: Furlong
+                        @test (x/T)::typediv2 ≈ x/TM #broken=eltype(x) <: Furlong
+                    end
+                    return nothing
+                end
+                A = randn(n,n)
+                d = randn(n)
+                dl = randn(n-1)
+                t = T
+                for t in (T, #=Furlong.(T)=#), (A, d, dl) in ((A, d, dl), #=(Furlong.(A), Furlong.(d), Furlong.(dl))=#)
+                    _bidiagdivmultest(t, 5, Bidiagonal, Bidiagonal)
+                    _bidiagdivmultest(t, 5I, Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
+                    _bidiagdivmultest(t, Diagonal(d), Bidiagonal, Bidiagonal, t.uplo == 'U' ? UpperTriangular : LowerTriangular)
+                    _bidiagdivmultest(t, UpperTriangular(A))
+                    _bidiagdivmultest(t, UnitUpperTriangular(A))
+                    _bidiagdivmultest(t, LowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
+                    _bidiagdivmultest(t, UnitLowerTriangular(A), t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix, t.uplo == 'L' ? LowerTriangular : Matrix)
+                    _bidiagdivmultest(t, Bidiagonal(d, dl, :U), Matrix, Matrix, Matrix)
+                    _bidiagdivmultest(t, Bidiagonal(d, dl, :L), Matrix, Matrix, Matrix)
+                end
+            end
         end
 
         if elty <: BlasReal
@@ -358,7 +437,7 @@ Random.seed!(1)
             Tridiag = Tridiagonal(rand(elty, 9), rand(elty, 10), rand(elty, 9))
             SymTri = SymTridiagonal(rand(elty, 10), rand(elty, 9))
 
-            mats = [Diag, BidiagU, BidiagL, Tridiag, SymTri]
+            mats = Any[Diag, BidiagU, BidiagL, Tridiag, SymTri]
             for a in mats
                 for b in mats
                     @test a*b ≈ Matrix(a)*Matrix(b)
@@ -419,9 +498,7 @@ using LinearAlgebra: fillstored!, UnitLowerTriangular
         exotic_arrays = Any[Tridiagonal(randn(3), randn(4), randn(3)),
         Bidiagonal(randn(3), randn(2), rand([:U,:L])),
         SymTridiagonal(randn(3), randn(2)),
-        sparse(randn(3,4)),
         Diagonal(randn(5)),
-        sparse(rand(3)),
         # LowerTriangular(randn(3,3)), # AbstractTriangular fill! deprecated, see below
         # UpperTriangular(randn(3,3)) # AbstractTriangular fill! deprecated, see below
         ]
@@ -477,6 +554,19 @@ end
     bb = Any[b[1:3], b[4:6], b[7:9]]
     @test vcat((Alb\bb)...) ≈ LowerTriangular(A)\b
     @test vcat((Aub\bb)...) ≈ UpperTriangular(A)\b
+    Alb = Bidiagonal([tril(A[1:3,1:3]), tril(A[4:6,4:6]), tril(A[7:9,7:9])],
+                     [triu(A[4:6,1:3]), triu(A[7:9,4:6])], 'L')
+    Aub = Bidiagonal([triu(A[1:3,1:3]), triu(A[4:6,4:6]), triu(A[7:9,7:9])],
+                     [tril(A[1:3,4:6]), tril(A[4:6,7:9])], 'U')
+    d = [randn(3,3) for _ in 1:3]
+    dl = [randn(3,3) for _ in 1:2]
+    B = [randn(3,3) for _ in 1:3, _ in 1:3]
+    for W in (UpperTriangular, LowerTriangular), t in (identity, adjoint, transpose)
+        @test Matrix(t(Alb) \ W(B)) ≈ t(Alb) \ Matrix(W(B))
+        @test Matrix(t(Aub) \ W(B)) ≈ t(Aub) \ Matrix(W(B))
+        @test Matrix(W(B) / t(Alb)) ≈ Matrix(W(B)) / t(Alb)
+        @test Matrix(W(B) / t(Aub)) ≈ Matrix(W(B)) / t(Aub)
+    end
 end
 
 @testset "sum, mapreduce" begin
@@ -542,6 +632,14 @@ end
             B = Bidiagonal(dv, ev, uplo)
             @test dot(x, B, y) ≈ dot(B'x, y) ≈ dot(x, Matrix(B), y)
         end
+        dv = Vector{elty}(undef, 0)
+        ev = Vector{elty}(undef, 0)
+        x = Vector{elty}(undef, 0)
+        y = Vector{elty}(undef, 0)
+        for uplo in (:U, :L)
+            B = Bidiagonal(dv, ev, uplo)
+            @test dot(x, B, y) ≈ dot(zero(elty), zero(elty), zero(elty))
+        end
     end
 end
 
@@ -593,4 +691,93 @@ Base.transpose(n::MyNotANumberType) = n
     @test transpose(copy(tB)) == B
 end
 
+@testset "empty bidiagonal matrices" begin
+    dv0 = zeros(0)
+    ev0 = zeros(0)
+    zm = zeros(0, 0)
+    ubd = Bidiagonal(dv0, ev0, :U)
+    lbd = Bidiagonal(dv0, ev0, :L)
+    @test size(ubd) == (0, 0)
+    @test_throws BoundsError getindex(ubd, 1, 1)
+    @test_throws BoundsError setindex!(ubd, 0.0, 1, 1)
+    @test similar(ubd) == ubd
+    @test similar(lbd, Int) == zeros(Int, 0, 0)
+    @test ubd == zm
+    @test lbd == zm
+    @test ubd == lbd
+    @test ubd * ubd == ubd
+    @test lbd + lbd == lbd
+    @test lbd' == ubd
+    @test ubd' == lbd
+    @test triu(ubd, 1) == ubd
+    @test triu(lbd, 1) == ubd
+    @test tril(ubd, -1) == ubd
+    @test tril(lbd, -1) == ubd
+    @test_throws ArgumentError triu(ubd)
+    @test_throws ArgumentError tril(ubd)
+    @test sum(ubd) == 0.0
+    @test reduce(+, ubd) == 0.0
+    @test reduce(+, ubd, dims=1) == zeros(1, 0)
+    @test reduce(+, ubd, dims=2) == zeros(0, 1)
+    @test hcat(ubd, ubd) == zm
+    @test vcat(ubd, lbd) == zm
+    @test hcat(lbd, ones(0, 3)) == ones(0, 3)
+    @test fill!(copy(ubd), 1.0) == ubd
+    @test map(abs, ubd) == zm
+    @test lbd .+ 1 == zm
+    @test lbd + ubd isa Bidiagonal
+    @test lbd .+ ubd isa Bidiagonal
+    @test ubd * 5 == ubd
+    @test ubd .* 3 == ubd
+end
+
+@testset "non-commutative algebra (#39701)" begin
+    A = Bidiagonal(Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4)), :U)
+    c = Quaternion(1,2,3,4)
+    @test A * c ≈ Matrix(A) * c
+    @test A / c ≈ Matrix(A) / c
+    @test c * A ≈ c * Matrix(A)
+    @test c \ A ≈ c \ Matrix(A)
+end
+
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    dv = ImmutableArray([1, 2, 3, 4])
+    ev = ImmutableArray([7, 8, 9])
+    Bu = Bidiagonal(dv, ev, :U)
+    Bl = Bidiagonal(dv, ev, :L)
+
+    @test convert(AbstractArray{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
+    @test convert(AbstractMatrix{Float64}, Bu)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bu
+    @test convert(AbstractArray{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
+    @test convert(AbstractMatrix{Float64}, Bl)::Bidiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Bl
+end
+
+@testset "block-bidiagonal matrix indexing" begin
+    dv = [ones(4,3), ones(2,2).*2, ones(2,3).*3, ones(4,4).*4]
+    evu = [ones(4,2), ones(2,3).*2, ones(2,4).*3]
+    evl = [ones(2,3), ones(2,2).*2, ones(4,3).*3]
+    BU = Bidiagonal(dv, evu, :U)
+    BL = Bidiagonal(dv, evl, :L)
+    # check that all the matrices along a column have the same number of columns,
+    # and the matrices along a row have the same number of rows
+    for j in axes(BU, 2), i in 2:size(BU, 1)
+        @test size(BU[i,j], 2) == size(BU[1,j], 2)
+        @test size(BU[i,j], 1) == size(BU[i,1], 1)
+        if j < i || j > i + 1
+            @test iszero(BU[i,j])
+        end
+    end
+    for j in axes(BL, 2), i in 2:size(BL, 1)
+        @test size(BL[i,j], 2) == size(BL[1,j], 2)
+        @test size(BL[i,j], 1) == size(BL[i,1], 1)
+        if j < i-1 || j > i
+            @test iszero(BL[i,j])
+        end
+    end
+end
+
 end # module TestBidiagonal
diff --git a/stdlib/LinearAlgebra/test/blas.jl b/stdlib/LinearAlgebra/test/blas.jl
index 61fd62b6be353d..0a2ac87c8026da 100644
--- a/stdlib/LinearAlgebra/test/blas.jl
+++ b/stdlib/LinearAlgebra/test/blas.jl
@@ -4,20 +4,31 @@ module TestBLAS
 
 using Test, LinearAlgebra, Random
 using LinearAlgebra: BlasReal, BlasComplex
+fabs(x::Real) = abs(x)
+fabs(x::Complex) = abs(real(x)) + abs(imag(x))
+
+# help function to build packed storage
+function pack(A, uplo)
+    AP = eltype(A)[]
+    n = size(A, 1)
+    for j in 1:n, i in (uplo==:L ? (j:n) : (1:j))
+        push!(AP, A[i,j])
+    end
+    return AP
+end
 
+@testset "vec_pointer_stride" begin
+    a = zeros(4,4,4)
+    @test BLAS.asum(view(a,1:2:4,:,:)) == 0 # vector like
+    @test_throws ArgumentError BLAS.asum(view(a,1:3:4,:,:)) # non-vector like
+end
 Random.seed!(100)
 ## BLAS tests - testing the interface code to BLAS routines
 @testset for elty in [Float32, Float64, ComplexF32, ComplexF64]
 
     @testset "syr2k!" begin
-        U = randn(5,2)
-        V = randn(5,2)
-        if elty == ComplexF32 || elty == ComplexF64
-            U = complex.(U, U)
-            V = complex.(V, V)
-        end
-        U = convert(Array{elty, 2}, U)
-        V = convert(Array{elty, 2}, V)
+        U = randn(elty, 5, 2)
+        V = randn(elty, 5, 2)
         @test tril(LinearAlgebra.BLAS.syr2k('L','N',U,V)) ≈ tril(U*transpose(V) + V*transpose(U))
         @test triu(LinearAlgebra.BLAS.syr2k('U','N',U,V)) ≈ triu(U*transpose(V) + V*transpose(U))
         @test tril(LinearAlgebra.BLAS.syr2k('L','T',U,V)) ≈ tril(transpose(U)*V + transpose(V)*U)
@@ -26,12 +37,8 @@ Random.seed!(100)
 
     if elty in (ComplexF32, ComplexF64)
         @testset "her2k!" begin
-            U = randn(5,2)
-            V = randn(5,2)
-            U = complex.(U, U)
-            V = complex.(V, V)
-            U = convert(Array{elty, 2}, U)
-            V = convert(Array{elty, 2}, V)
+            U = randn(elty, 5, 2)
+            V = randn(elty, 5, 2)
             @test tril(LinearAlgebra.BLAS.her2k('L','N',U,V)) ≈ tril(U*V' + V*U')
             @test triu(LinearAlgebra.BLAS.her2k('U','N',U,V)) ≈ triu(U*V' + V*U')
             @test tril(LinearAlgebra.BLAS.her2k('L','C',U,V)) ≈ tril(U'*V + V'*U)
@@ -48,21 +55,21 @@ Random.seed!(100)
     U4 = triu(fill(elty(1), 4,4))
     Z4 = zeros(elty, (4,4))
 
-    elm1 = convert(elty, -1)
-    el2 = convert(elty, 2)
-    v14 = convert(Vector{elty}, [1:4;])
-    v41 = convert(Vector{elty}, [4:-1:1;])
+    elm1 = elty(-1)
+    el2 = elty(2)
+    v14 = elty[1:4;]
+    v41 = elty[4:-1:1;]
 
     let n = 10
         @testset "dot products" begin
             if elty <: Real
-                x1 = convert(Vector{elty}, randn(n))
-                x2 = convert(Vector{elty}, randn(n))
+                x1 = randn(elty, n)
+                x2 = randn(elty, n)
                 @test BLAS.dot(x1,x2) ≈ sum(x1.*x2)
                 @test_throws DimensionMismatch BLAS.dot(x1,rand(elty, n + 1))
             else
-                z1 = convert(Vector{elty}, complex.(randn(n),randn(n)))
-                z2 = convert(Vector{elty}, complex.(randn(n),randn(n)))
+                z1 = randn(elty, n)
+                z2 = randn(elty, n)
                 @test BLAS.dotc(z1,z2) ≈ sum(conj(z1).*z2)
                 @test BLAS.dotu(z1,z2) ≈ sum(z1.*z2)
                 @test_throws DimensionMismatch BLAS.dotc(z1,rand(elty, n + 1))
@@ -70,92 +77,60 @@ Random.seed!(100)
             end
         end
         @testset "iamax" begin
-            if elty <: Real
-                x = convert(Vector{elty}, randn(n))
-                @test BLAS.iamax(x) == argmax(abs.(x))
-            else
-                z = convert(Vector{elty}, complex.(randn(n),randn(n)))
-                @test BLAS.iamax(z) == argmax(map(x -> abs(real(x)) + abs(imag(x)), z))
-            end
+            x = randn(elty, n)
+            @test BLAS.iamax(x) == findmax(fabs, x)[2]
         end
         @testset "rot!" begin
-            if elty <: Real
-                x = convert(Vector{elty}, randn(n))
-                y = convert(Vector{elty}, randn(n))
-                c = rand(elty)
-                s = rand(elty)
+            x = randn(elty, n)
+            y = randn(elty, n)
+            c = rand(real(elty))
+            for sty in unique!([real(elty), elty])
+                s = rand(sty)
                 x2 = copy(x)
                 y2 = copy(y)
                 BLAS.rot!(n, x, 1, y, 1, c, s)
                 @test x ≈ c*x2 + s*y2
-                @test y ≈ -s*x2 + c*y2
-            else
-                x = convert(Vector{elty}, complex.(randn(n),rand(n)))
-                y = convert(Vector{elty}, complex.(randn(n),rand(n)))
-                cty = (elty == ComplexF32) ? Float32 : Float64
-                c = rand(cty)
-                for sty in [cty, elty]
-                    s = rand(sty)
-                    x2 = copy(x)
-                    y2 = copy(y)
-                    BLAS.rot!(n, x, 1, y, 1, c, s)
-                    @test x ≈ c*x2 + s*y2
-                    @test y ≈ -conj(s)*x2 + c*y2
-                end
+                @test y ≈ -conj(s)*x2 + c*y2
             end
         end
         @testset "axp(b)y" begin
-            if elty <: Real
-                x1 = convert(Vector{elty}, randn(n))
-                x2 = convert(Vector{elty}, randn(n))
-                α  = rand(elty)
-                β  = rand(elty)
-                @test BLAS.axpy!(α,copy(x1),copy(x2)) ≈ α*x1 + x2
-                @test BLAS.axpby!(α,copy(x1),β,copy(x2)) ≈ α*x1 + β*x2
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n)
-                @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1))
-                @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1))
-                @test BLAS.axpy!(α,copy(x1),1:n,copy(x2),1:n) ≈ x2 + α*x1
-            else
-                z1 = convert(Vector{elty}, complex.(randn(n), randn(n)))
-                z2 = convert(Vector{elty}, complex.(randn(n), randn(n)))
-                α  = rand(elty)
-                @test BLAS.axpy!(α, copy(z1), copy(z2)) ≈ z2 + α * z1
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(z1), rand(elty, n + 1))
-                @test_throws DimensionMismatch BLAS.axpy!(α, copy(z1), 1:div(n, 2), copy(z2), 1:(div(n, 2) + 1))
-                @test_throws ArgumentError BLAS.axpy!(α, copy(z1), 0:div(n,2), copy(z2), 1:(div(n, 2) + 1))
-                @test_throws ArgumentError BLAS.axpy!(α, copy(z1), 1:div(n,2), copy(z2), 0:(div(n, 2) - 1))
-                @test BLAS.axpy!(α,copy(z1),1:n,copy(z2),1:n) ≈ z2 + α*z1
+            x1 = randn(elty, n)
+            x2 = randn(elty, n)
+            α  = rand(elty)
+            β  = rand(elty)
+            for X1 in (x1, view(x1,n:-1:1)), X2 in (x2, view(x2, n:-1:1))
+                @test BLAS.axpy!(α,deepcopy(X1),deepcopy(X2)) ≈ α*X1 + X2
+                @test BLAS.axpby!(α,deepcopy(X1),β,deepcopy(X2)) ≈ α*X1 + β*X2
+            end
+            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
+                @test BLAS.axpy!(α,copy(x1),ind1,copy(x2),ind2) ≈ x2 + α*(ind1 == ind2 ? x1 : reverse(x1))
             end
+            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), rand(elty, n + 1))
+            @test_throws DimensionMismatch BLAS.axpby!(α, copy(x1), β, rand(elty, n + 1))
+            @test_throws DimensionMismatch BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 1:n)
+            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 0:div(n,2), copy(x2), 1:(div(n, 2) + 1))
+            @test_throws ArgumentError BLAS.axpy!(α, copy(x1), 1:div(n,2), copy(x2), 0:(div(n, 2) - 1))
         end
         @testset "nrm2, iamax, and asum for StridedVectors" begin
             a = rand(elty,n)
-            b = view(a,2:2:n,1)
-            @test BLAS.nrm2(b) ≈ norm(b)
-            if elty <: Real
-                @test BLAS.asum(b) ≈ sum(abs.(b))
-                @test BLAS.iamax(b) ≈ argmax(abs.(b))
-            else
-                @test BLAS.asum(b) ≈ sum(abs.(real(b))) + sum(abs.(imag(b)))
-                @test BLAS.iamax(b) == argmax(map(x -> abs(real(x)) + abs(imag(x)), b))
+            for ind in (2:2:n, n:-2:2)
+                b = view(a, ind, 1)
+                @test BLAS.nrm2(b) ≈ sqrt(sum(abs2, b))
+                @test BLAS.asum(b) ≈ sum(fabs, b)
+                @test BLAS.iamax(b) == findmax(fabs, b)[2] * (step(ind) >= 0)
             end
         end
-        # scal
-        α = rand(elty)
-        a = rand(elty,n)
-        @test BLAS.scal(n,α,a,1) ≈ α * a
-
-        @testset "trsv" begin
-            A = triu(rand(elty,n,n))
-            @testset "Vector and SubVector" for x in (rand(elty, n), view(rand(elty,2n),1:2:2n))
-                @test A\x ≈ BLAS.trsv('U','N','N',A,x)
-                @test_throws DimensionMismatch BLAS.trsv('U','N','N',A,Vector{elty}(undef,n+1))
+        @testset "scal" begin
+            α = rand(elty)
+            a = rand(elty,n)
+            @test BLAS.scal(n,α,a,1) ≈ α * a
+            for v in (a, view(a, n:-1:1))
+                @test BLAS.scal!(α, deepcopy(v)) ≈ α * v
             end
         end
-        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n)),
-            y in (rand(elty,n), view(rand(elty,3n), 1:3:3n))
+
+        @testset "ger, her, syr" for x in (rand(elty, n), view(rand(elty,2n), 1:2:2n), view(rand(elty,n), n:-1:1)),
+            y in (rand(elty,n), view(rand(elty,3n), 1:3:3n), view(rand(elty,2n), 2n:-2:2))
 
             A = rand(elty,n,n)
             α = rand(elty)
@@ -178,32 +153,66 @@ Random.seed!(100)
             end
         end
         @testset "copy" begin
-            x1 = convert(Vector{elty}, randn(n))
-            x2 = convert(Vector{elty}, randn(n))
-            BLAS.copyto!(x2, 1:n, x1, 1:n)
-            @test x2 == x1
+            x1 = randn(elty, n)
+            x2 = randn(elty, n)
+            for ind1 in (1:n, n:-1:1), ind2 in (1:n, n:-1:1)
+                @test x2 === BLAS.copyto!(x2, ind1, x1, ind2) == (ind1 == ind2 ? x1 : reverse(x1))
+            end
             @test_throws DimensionMismatch BLAS.copyto!(x2, 1:n, x1, 1:(n - 1))
             @test_throws ArgumentError BLAS.copyto!(x1, 0:div(n, 2), x2, 1:(div(n, 2) + 1))
             @test_throws ArgumentError BLAS.copyto!(x1, 1:(div(n, 2) + 1), x2, 0:div(n, 2))
         end
-        # trmv
-        A = triu(rand(elty,n,n))
-        x = rand(elty,n)
-        @test BLAS.trmv('U','N','N',A,x) ≈ A*x
+        @testset "trmv and trsv" begin
+            A = rand(elty,n,n)
+            x = rand(elty,n)
+            xerr = Vector{elty}(undef,n+1)
+            for uplo in ('U', 'L'), diag in ('U','N'), trans in ('N', 'T', 'C')
+                Wrapper = if uplo == 'U'
+                    diag == 'U' ? UnitUpperTriangular : UpperTriangular
+                else
+                    diag == 'U' ? UnitLowerTriangular : LowerTriangular
+                end
+                fun = trans == 'N' ? identity : trans == 'T' ? transpose : adjoint
+                fullA = collect(fun(Wrapper(A)))
+                @testset "trmv" begin
+                    @test BLAS.trmv(uplo,trans,diag,A,x) ≈ fullA * x
+                    @test_throws DimensionMismatch BLAS.trmv(uplo,trans,diag,A,xerr)
+                    for xx in (x, view(x, n:-1:1))
+                        @test BLAS.trmv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA * xx
+                    end
+                end
+                @testset "trsv" begin
+                    @test BLAS.trsv(uplo,trans,diag,A,x) ≈ fullA \ x
+                    @test_throws DimensionMismatch BLAS.trsv(uplo,trans,diag,A,xerr)
+                    for xx in (x, view(x, n:-1:1))
+                        @test BLAS.trsv!(uplo,trans,diag,A,deepcopy(xx)) ≈ fullA \ xx
+                    end
+                end
+            end
+        end
         @testset "symmetric/Hermitian multiplication" begin
             x = rand(elty,n)
             A = rand(elty,n,n)
+            y = rand(elty, n)
+            α = randn(elty)
+            β = randn(elty)
             Aherm = A + A'
             Asymm = A + transpose(A)
-            @testset "symv and hemv" begin
-                @test BLAS.symv('U',Asymm,x) ≈ Asymm*x
-                offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1)))
-                @test_throws DimensionMismatch BLAS.symv!('U',one(elty),Asymm,x,one(elty),offsizevec)
-                @test_throws DimensionMismatch BLAS.symv('U',offsizemat,x)
+            offsizevec, offsizemat = Array{elty}.(undef,(n+1, (n,n+1)))
+            @testset "symv and hemv" for uplo in ('U', 'L')
+                @test BLAS.symv(uplo,Asymm,x) ≈ Asymm*x
+                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                    @test BLAS.symv!(uplo,α,Asymm,xx,β,deepcopy(yy)) ≈ α * Asymm * xx + β * yy
+                end
+                @test_throws DimensionMismatch BLAS.symv!(uplo,α,Asymm,x,β,offsizevec)
+                @test_throws DimensionMismatch BLAS.symv(uplo,offsizemat,x)
                 if elty <: BlasComplex
-                    @test BLAS.hemv('U',Aherm,x) ≈ Aherm*x
-                    @test_throws DimensionMismatch BLAS.hemv('U',offsizemat,x)
-                    @test_throws DimensionMismatch BLAS.hemv!('U',one(elty),Aherm,x,one(elty),offsizevec)
+                    @test BLAS.hemv(uplo,Aherm,x) ≈ Aherm*x
+                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                        @test BLAS.hemv!(uplo,α,Aherm,xx,β,deepcopy(yy)) ≈ α * Aherm * xx + β * yy
+                    end
+                    @test_throws DimensionMismatch BLAS.hemv(uplo,offsizemat,x)
+                    @test_throws DimensionMismatch BLAS.hemv!(uplo,one(elty),Aherm,x,one(elty),offsizevec)
                 end
             end
 
@@ -233,40 +242,24 @@ Random.seed!(100)
                 # Both matrix dimensions n coincide, as we have Hermitian matrices.
                 # Define the inputs and outputs of hpmv!, y = α*A*x+β*y
                 α = rand(elty)
-                M = rand(elty, n, n)
-                AL = Hermitian(M, :L)
-                AU = Hermitian(M, :U)
+                A = rand(elty, n, n)
                 x = rand(elty, n)
                 β = rand(elty)
                 y = rand(elty, n)
-
-                y_result_julia_lower = α*AL*x + β*y
-
-                # Create lower triangular packing of AL
-                AP = typeof(AL[1,1])[]
-                for j in 1:n
-                    for i in j:n
-                        push!(AP, AL[i,j])
+                for uplo in (:L, :U)
+                    Cuplo = String(uplo)[1]
+                    AH = Hermitian(A, uplo)
+                    # Create lower/upper triangular packing of AL
+                    AP = pack(AH, uplo)
+                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
+                        @test BLAS.hpmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AH*xx + β*yy
                     end
+                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
+                    @test_throws ErrorException BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
+                    AP′ = view(AP, 1:length(AP′) - 1)
+                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, y)
+                    @test_throws DimensionMismatch BLAS.hpmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
                 end
-
-                y_result_blas_lower = copy(y)
-                BLAS.hpmv!('L', α, AP, x, β, y_result_blas_lower)
-                @test y_result_julia_lower ≈ y_result_blas_lower
-
-                y_result_julia_upper = α*AU*x + β*y
-
-                # Create upper triangular packing of AU
-                AP = typeof(AU[1,1])[]
-                for j in 1:n
-                    for i in 1:j
-                        push!(AP, AU[i,j])
-                    end
-                end
-
-                y_result_blas_upper = copy(y)
-                BLAS.hpmv!('U', α, AP, x, β, y_result_blas_upper)
-                @test y_result_julia_upper ≈ y_result_blas_upper
             end
         end
 
@@ -276,41 +269,57 @@ Random.seed!(100)
                 # Both matrix dimensions n coincide, as we have symmetric matrices.
                 # Define the inputs and outputs of spmv!, y = α*A*x+β*y
                 α = rand(elty)
-                M = rand(elty, n, n)
-                AL = Symmetric(M, :L)
-                AU = Symmetric(M, :U)
+                A = rand(elty, n, n)
                 x = rand(elty, n)
                 β = rand(elty)
                 y = rand(elty, n)
-
-                y_result_julia_lower = α*AL*x + β*y
-
-                # Create lower triangular packing of AL
-                AP = typeof(M[1,1])[]
-                for j in 1:n
-                    for i in j:n
-                        push!(AP, AL[i,j])
+                for uplo in (:L, :U)
+                    Cuplo = String(uplo)[1]
+                    AS = Symmetric(A, uplo)
+                    # Create lower/upper triangular packing of AL
+                    AP = pack(AS, uplo)
+                    for xx in (x, view(x,n:-1:1)), yy in (y, view(y,n:-1:1))
+                        @test BLAS.spmv!(Cuplo, α, AP, xx, β, deepcopy(yy)) ≈ α*AS*xx + β*yy
                     end
+                    AP′ = view(zeros(elty, n*(n+1)),1:2:n*(n+1))
+                    @test_throws ErrorException BLAS.spmv!(Cuplo, α, AP′, x, β, y)
+                    AP′ = view(AP, 1:length(AP′) - 1)
+                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, y)
+                    @test_throws DimensionMismatch BLAS.spmv!(Cuplo, α, AP′, x, β, view(y,1:n-1))
                 end
+            end
+        end
 
-                y_result_blas_lower = copy(y)
-                BLAS.spmv!('L', α, AP, x, β, y_result_blas_lower)
-                @test y_result_julia_lower ≈ y_result_blas_lower
-
-
-                y_result_julia_upper = α*AU*x + β*y
-
-                # Create upper triangular packing of AU
-                AP = typeof(M[1,1])[]
-                for j in 1:n
-                    for i in 1:j
-                        push!(AP, AU[i,j])
-                    end
+        # spr!
+        if elty in (Float32, Float64)
+            @testset "spr! $elty" begin
+                α = rand(elty)
+                M = rand(elty, n, n)
+                AL = Symmetric(M, :L)
+                AU = Symmetric(M, :U)
+                for x in (rand(elty, n), view(rand(elty, n), n:-1:1))
+                    ALP_result_julia_lower = pack(α*x*x' + AL, :L)
+                    ALP_result_blas_lower = pack(AL, :L)
+                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower
+                    ALP_result_blas_lower = append!(pack(AL, :L), ones(elty, 10))
+                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                    @test ALP_result_julia_lower ≈ ALP_result_blas_lower[1:end-10]
+                    ALP_result_blas_lower = reshape(pack(AL, :L), 1, length(ALP_result_julia_lower), 1)
+                    BLAS.spr!('L', α, x, ALP_result_blas_lower)
+                    @test ALP_result_julia_lower ≈ vec(ALP_result_blas_lower)
+
+                    AUP_result_julia_upper = pack(α*x*x' + AU, :U)
+                    AUP_result_blas_upper = pack(AU, :U)
+                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper
+                    AUP_result_blas_upper = append!(pack(AU, :U), ones(elty, 10))
+                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                    @test AUP_result_julia_upper ≈ AUP_result_blas_upper[1:end-10]
+                    AUP_result_blas_upper = reshape(pack(AU, :U), 1, length(AUP_result_julia_upper), 1)
+                    BLAS.spr!('U', α, x, AUP_result_blas_upper)
+                    @test AUP_result_julia_upper ≈ vec(AUP_result_blas_upper)
                 end
-
-                y_result_blas_upper = copy(y)
-                BLAS.spmv!('U', α, AP, x, β, y_result_blas_upper)
-                @test y_result_julia_upper ≈ y_result_blas_upper
             end
         end
 
@@ -322,33 +331,51 @@ Random.seed!(100)
         #will work for SymTridiagonal,Tridiagonal,Bidiagonal!
         @testset "banded matrix mv" begin
             @testset "gbmv" begin
-                TD  = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1))
-                x   = rand(elty,n)
+                TD = Tridiagonal(rand(elty,n-1),rand(elty,n),rand(elty,n-1))
+                x  = rand(elty, n)
                 #put TD into the BLAS format!
                 fTD = zeros(elty,3,n)
                 fTD[1,2:n] = TD.du
                 fTD[2,:] = TD.d
                 fTD[3,1:n-1] = TD.dl
                 @test BLAS.gbmv('N',n,1,1,fTD,x) ≈ TD*x
+                y = rand(elty, n)
+                α = randn(elty)
+                β = randn(elty)
+                for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                    @test BLAS.gbmv!('N',n,1,1,α,fTD,xx,β,deepcopy(yy)) ≈ α * TD * xx + β * yy
+                end
             end
             #will work for SymTridiagonal only!
-            @testset "sbmv" begin
+            @testset "sbmv and hbmv" begin
+                x = rand(elty,n)
                 if elty <: BlasReal
                     ST  = SymTridiagonal(rand(elty,n),rand(elty,n-1))
-                    x   = rand(elty,n)
                     #put TD into the BLAS format!
                     fST = zeros(elty,2,n)
                     fST[1,2:n] = ST.ev
                     fST[2,:] = ST.dv
                     @test BLAS.sbmv('U',1,fST,x) ≈ ST*x
+                    y = rand(elty, n)
+                    α = randn(elty)
+                    β = randn(elty)
+                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                        @test BLAS.sbmv!('U',1,α,fST,xx,β,deepcopy(yy)) ≈ α * ST * xx + β * yy
+                    end
                 else
-                    dv = real(rand(elty,n))
+                    dv = rand(real(elty),n)
                     ev = rand(elty,n-1)
                     bH = zeros(elty,2,n)
                     bH[1,2:n] = ev
                     bH[2,:] = dv
                     fullH = diagm(0 => dv, -1 => conj(ev), 1 => ev)
                     @test BLAS.hbmv('U',1,bH,x) ≈ fullH*x
+                    y = rand(elty, n)
+                    α = randn(elty)
+                    β = randn(elty)
+                    for xx in (x, view(x, n:-1:1)), yy in (y, view(y, n:-1:1))
+                        @test BLAS.hbmv!('U',1,α,bH,xx,β,deepcopy(yy)) ≈ α * fullH * xx + β * yy
+                    end
                 end
             end
         end
@@ -370,6 +397,41 @@ Random.seed!(100)
         @test all(o4cp .== z4)
         @test all(BLAS.gemv('N', U4, o4) .== v41)
         @test all(BLAS.gemv('N', U4, o4) .== v41)
+        @testset "non-standard strides" begin
+            A = rand(elty, 3, 4)
+            x = rand(elty, 5)
+            for y = (view(ones(elty, 5), 1:2:5), view(ones(elty, 7), 6:-2:2))
+                ycopy = copy(y)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(x, 1:3:4), elty(3), y) ≈ 2*A[:,2:2:4]*x[1:3:4] + 3*ycopy
+                ycopy = copy(y)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 4:-2:2), view(x, 1:3:4), elty(3), y) ≈ 2*A[:,4:-2:2]*x[1:3:4] + 3*ycopy
+                ycopy = copy(y)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 2:2:4), view(x, 4:-3:1), elty(3), y) ≈ 2*A[:,2:2:4]*x[4:-3:1] + 3*ycopy
+                ycopy = copy(y)
+                @test BLAS.gemv!('N', elty(2), view(A, :, 4:-2:2), view(x, 4:-3:1), elty(3), y) ≈ 2*A[:,4:-2:2]*x[4:-3:1] + 3*ycopy
+                ycopy = copy(y)
+                @test BLAS.gemv!('N', elty(2), view(A, :, StepRangeLen(1,0,1)), view(x, 1:1), elty(3), y) ≈ 2*A[:,1:1]*x[1:1] + 3*ycopy # stride(A,2) == 0
+            end
+            @test BLAS.gemv!('N', elty(1), zeros(elty, 0, 5), zeros(elty, 5), elty(1), zeros(elty, 0)) == elty[] # empty matrix, stride(A,2) == 0
+            @test BLAS.gemv('N', elty(-1), view(A, 2:3, 1:2:3), view(x, 2:-1:1)) ≈ -1*A[2:3,1:2:3]*x[2:-1:1]
+            @test BLAS.gemv('N', view(A, 2:3, 3:-2:1), view(x, 1:2:3)) ≈ A[2:3,3:-2:1]*x[1:2:3]
+            for (trans, f) = (('T',transpose), ('C',adjoint))
+                for y = (view(ones(elty, 3), 1:2:3), view(ones(elty, 5), 4:-2:2))
+                    ycopy = copy(y)
+                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(x, 1:2:5), elty(3), y) ≈ 2*f(A[:,2:2:4])*x[1:2:5] + 3*ycopy
+                    ycopy = copy(y)
+                    @test BLAS.gemv!(trans, elty(2), view(A, :, 4:-2:2), view(x, 1:2:5), elty(3), y) ≈ 2*f(A[:,4:-2:2])*x[1:2:5] + 3*ycopy
+                    ycopy = copy(y)
+                    @test BLAS.gemv!(trans, elty(2), view(A, :, 2:2:4), view(x, 5:-2:1), elty(3), y) ≈ 2*f(A[:,2:2:4])*x[5:-2:1] + 3*ycopy
+                    ycopy = copy(y)
+                    @test BLAS.gemv!(trans, elty(2), view(A, :, 4:-2:2), view(x, 5:-2:1), elty(3), y) ≈ 2*f(A[:,4:-2:2])*x[5:-2:1] + 3*ycopy
+                end
+                @test BLAS.gemv!(trans, elty(2), view(A, :, StepRangeLen(1,0,1)), view(x, 1:2:5), elty(3), elty[1]) ≈ 2*f(A[:,1:1])*x[1:2:5] + elty[3] # stride(A,2) == 0
+            end
+            for trans = ('N', 'T', 'C')
+                @test_throws ErrorException BLAS.gemv(trans, view(A, 1:2:3, 1:2), view(x, 1:2)) # stride(A,1) must be 1
+            end
+        end
     end
     @testset "gemm" begin
         @test all(BLAS.gemm('N', 'N', I4, I4) .== I4)
@@ -459,6 +521,7 @@ Base.setindex!(A::WrappedArray{T, N}, v, I::Vararg{Int, N}) where {T, N} = setin
 Base.unsafe_convert(::Type{Ptr{T}}, A::WrappedArray{T}) where T = Base.unsafe_convert(Ptr{T}, A.A)
 
 Base.strides(A::WrappedArray) = strides(A.A)
+Base.elsize(::Type{WrappedArray{T,N}}) where {T,N} = Base.elsize(Array{T,N})
 
 @testset "strided interface adjtrans" begin
     x = WrappedArray([1, 2, 3, 4])
@@ -499,7 +562,7 @@ end
 
 @testset "strided interface blas" begin
     for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    # Level 1
+    # Level 1
         x = WrappedArray(elty[1, 2, 3, 4])
         y = WrappedArray(elty[5, 6, 7, 8])
         BLAS.blascopy!(2, x, 1, y, 2)
@@ -514,6 +577,11 @@ end
         BLAS.axpby!(elty(2), x, elty(3), y)
         @test y == WrappedArray(elty[19, 50, 30, 56])
         @test BLAS.iamax(x) == 2
+
+        M = fill(elty(1.0), 3, 3)
+        @test BLAS.scal!(elty(2), view(M,:,2)) === view(M,:,2)
+        @test BLAS.scal!(elty(3), view(M,3,:)) === view(M,3,:)
+        @test M == elty[1. 2. 1.; 1. 2. 1.; 3. 6. 3.]
     # Level 2
         A = WrappedArray(elty[1 2; 3 4])
         x = WrappedArray(elty[1, 2])
@@ -554,7 +622,7 @@ end
         x = WrappedArray(elty[1, 2, 3, 4])
         y = WrappedArray(elty[5, 6, 7, 8])
         @test BLAS.dot(2, x, 1, y, 2) == elty(19)
-    # Level 2
+    # Level 2
         A = WrappedArray(elty[1 2; 3 4])
         x = WrappedArray(elty[1, 2])
         y = WrappedArray(elty[3, 4])
@@ -598,23 +666,38 @@ end
     @test BLAS.get_num_threads() === 1
     BLAS.set_num_threads(default)
     @test BLAS.get_num_threads() === default
+end
 
-    @test_logs (:warn,) match_mode=:any BLAS._set_num_threads(1, _blas=:unknown)
-    if BLAS.guess_vendor() !== :osxblas
-        # test osxblas which is not covered by CI
-        withenv("VECLIB_MAXIMUM_THREADS" => nothing) do
-            @test @test_logs(
-                (:warn,),
-                (:warn,),
-                match_mode=:any,
-                BLAS._get_num_threads(_blas=:osxblas),
-            ) === nothing
-            @test_logs BLAS._set_num_threads(1, _blas=:osxblas)
-            @test @test_logs(BLAS._get_num_threads(_blas=:osxblas)) === 1
-            @test_logs BLAS._set_num_threads(2, _blas=:osxblas)
-            @test @test_logs(BLAS._get_num_threads(_blas=:osxblas)) === 2
+@testset "test for 0-strides" for elty in (Float32, Float64, ComplexF32, ComplexF64)
+    A = randn(elty, 10, 10);
+    a = view([randn(elty)], 1 .+ 0(1:10))
+    b = view([randn(elty)], 1 .+ 0(1:10))
+    α, β = randn(elty), randn(elty)
+    @testset "dot/dotc/dotu" begin
+        if elty <: Real
+            @test BLAS.dot(a,b) ≈ sum(a.*b)
+        else
+            @test BLAS.dotc(a,b) ≈ sum(conj(a).*b)
+            @test BLAS.dotu(a,b) ≈ sum(a.*b)
         end
     end
+    @testset "axp(b)y!" begin
+        @test BLAS.axpy!(α,a,copy(b)) ≈ α*a + b
+        @test BLAS.axpby!(α,a,β,copy(b)) ≈ α*a + β*b
+        @test_throws "dest" BLAS.axpy!(α,a,b)
+        @test_throws "dest" BLAS.axpby!(α,a,β,b)
+    end
+    @test BLAS.iamax(a) == 0
+    @test_throws "dest" BLAS.scal!(b[1], a)
+    @testset "nrm2/asum" begin # OpenBLAS allways return 0.0
+        @test_throws "input" BLAS.nrm2(a)
+        @test_throws "input" BLAS.asum(a)
+    end
+    # All level2 reject 0-stride array.
+    @testset "gemv!" begin
+        @test_throws "input" BLAS.gemv!('N', true, A, a, false, copy(b))
+        @test_throws "dest" BLAS.gemv!('N', true, A, copy(a), false, b)
+    end
 end
 
 end # module TestBLAS
diff --git a/stdlib/LinearAlgebra/test/bunchkaufman.jl b/stdlib/LinearAlgebra/test/bunchkaufman.jl
index 5098f818f18043..f1da22d8733e26 100644
--- a/stdlib/LinearAlgebra/test/bunchkaufman.jl
+++ b/stdlib/LinearAlgebra/test/bunchkaufman.jl
@@ -12,7 +12,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(12343210)
+Random.seed!(12343212)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -114,7 +114,8 @@ bimg  = randn(n,2)/2
                         @test logabsdet(bc2)[2] == sign(det(bc2))
                         @test inv(bc2)*apd ≈ Matrix(I, n, n)
                         @test apd*(bc2\b) ≈ b rtol=eps(cond(apd))
-                        @test ishermitian(bc2) == !issymmetric(bc2)
+                        @test ishermitian(bc2)
+                        @test !issymmetric(bc2) || eltya <: Real
                     end
                 end
             end
@@ -171,4 +172,22 @@ end
     end
 end
 
+@testset "adjoint of BunchKaufman" begin
+    Ar = randn(5, 5)
+    Ar = Ar + Ar'
+    Actmp = complex.(randn(5, 5), randn(5, 5))
+    Ac1 = Actmp + Actmp'
+    Ac2 = Actmp + transpose(Actmp)
+    b = ones(size(Ar, 1))
+
+    F = bunchkaufman(Ar)
+    @test F\b == F'\b
+
+    F = bunchkaufman(Ac1)
+    @test F\b == F'\b
+
+    F = bunchkaufman(Ac2)
+    @test_throws ArgumentError("adjoint not implemented for complex symmetric matrices") F'
+end
+
 end # module TestBunchKaufman
diff --git a/stdlib/LinearAlgebra/test/cholesky.jl b/stdlib/LinearAlgebra/test/cholesky.jl
index 86a78a9e954fe5..8e6cac65f7dfb3 100644
--- a/stdlib/LinearAlgebra/test/cholesky.jl
+++ b/stdlib/LinearAlgebra/test/cholesky.jl
@@ -12,6 +12,10 @@ function unary_ops_tests(a, ca, tol; n=size(a, 1))
     @test abs((det(ca) - det(a))/det(ca)) <= tol # Ad hoc, but statistically verified, revisit
     @test logdet(ca) ≈ logdet(a)
     @test logdet(ca) ≈ log(det(ca))  # logdet is less likely to overflow
+    logabsdet_ca = logabsdet(ca)
+    logabsdet_a = logabsdet(a)
+    @test logabsdet_ca[1] ≈ logabsdet_a[1]
+    @test logabsdet_ca[2] ≈ logabsdet_a[2]
     @test isposdef(ca)
     @test_throws ErrorException ca.Z
     @test size(ca) == size(a)
@@ -39,7 +43,7 @@ end
     n1 = div(n, 2)
     n2 = 2*n1
 
-    Random.seed!(12343)
+    Random.seed!(12344)
 
     areal = randn(n,n)/2
     aimg  = randn(n,n)/2
@@ -120,8 +124,15 @@ end
         end
 
         # test cholesky of 2x2 Strang matrix
-        S = Matrix{eltya}(SymTridiagonal([2, 2], [-1]))
+        S = SymTridiagonal{eltya}([2, 2], [-1])
+        for uplo in (:U, :L)
+            @test Matrix(@inferred cholesky(Hermitian(S, uplo))) ≈ S
+            if eltya <: Real
+                @test Matrix(@inferred cholesky(Symmetric(S, uplo))) ≈ S
+            end
+        end
         @test Matrix(cholesky(S).U) ≈ [2 -1; 0 sqrt(eltya(3))] / sqrt(eltya(2))
+        @test Matrix(cholesky(S)) ≈ S
 
         # test extraction of factor and re-creating original matrix
         if eltya <: Real
@@ -132,7 +143,7 @@ end
 
         #pivoted upper Cholesky
         if eltya != BigFloat
-            cpapd = cholesky(apdh, Val(true))
+            cpapd = cholesky(apdh, RowMaximum())
             unary_ops_tests(apdh, cpapd, ε*κ*n)
             @test rank(cpapd) == n
             @test all(diff(diag(real(cpapd.factors))).<=0.) # diagonal should be non-increasing
@@ -163,11 +174,11 @@ end
 
                 if eltya != BigFloat && eltyb != BigFloat # Note! Need to implement pivoted Cholesky decomposition in julia
 
-                    cpapd = cholesky(apdh, Val(true))
+                    cpapd = cholesky(apdh, RowMaximum())
                     @test norm(apd * (cpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
                     @test norm(apd * (cpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
 
-                    lpapd = cholesky(apdhL, Val(true))
+                    lpapd = cholesky(apdhL, RowMaximum())
                     @test norm(apd * (lpapd\b) - b)/norm(b) <= ε*κ*n # Ad hoc, revisit
                     @test norm(apd * (lpapd\b[1:n]) - b[1:n])/norm(b[1:n]) <= ε*κ*n
                 end
@@ -190,7 +201,7 @@ end
                 @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 @test norm(apd * BB - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 if eltya != BigFloat
-                    cpapd = cholesky(apdh, Val(true))
+                    cpapd = cholesky(apdh, RowMaximum())
                     BB = copy(B)
                     ldiv!(cpapd, BB)
                     @test norm(apd \ B - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
@@ -221,12 +232,12 @@ end
                 @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                 if eltya != BigFloat
-                    cpapd = cholesky(eltya <: Complex ? apdh : apds, Val(true))
+                    cpapd = cholesky(eltya <: Complex ? apdh : apds, RowMaximum())
                     BB = copy(B)
                     rdiv!(BB, cpapd)
                     @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
                     @test norm(BB * apd - B, 1) / norm(B, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
-                    cpapd = cholesky(eltya <: Complex ? apdhL : apdsL, Val(true))
+                    cpapd = cholesky(eltya <: Complex ? apdhL : apdsL, RowMaximum())
                     BB = copy(B)
                     rdiv!(BB, cpapd)
                     @test norm(B / apd - BB, 1) / norm(BB, 1) <= (3n^2 + n + n^3*ε)*ε/(1-(n+1)*ε)*κ
@@ -262,15 +273,15 @@ end
         @test !LinearAlgebra.issuccess(cholesky!(copy(M); check = false))
     end
     for M in (A, Hermitian(A), B)
-        @test_throws RankDeficientException cholesky(M, Val(true))
-        @test_throws RankDeficientException cholesky!(copy(M), Val(true))
-        @test_throws RankDeficientException cholesky(M, Val(true); check = true)
-        @test_throws RankDeficientException cholesky!(copy(M), Val(true); check = true)
-        @test !LinearAlgebra.issuccess(cholesky(M, Val(true); check = false))
-        @test !LinearAlgebra.issuccess(cholesky!(copy(M), Val(true); check = false))
-        C = cholesky(M, Val(true); check = false)
+        @test_throws RankDeficientException cholesky(M, RowMaximum())
+        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum())
+        @test_throws RankDeficientException cholesky(M, RowMaximum(); check = true)
+        @test_throws RankDeficientException cholesky!(copy(M), RowMaximum(); check = true)
+        @test !LinearAlgebra.issuccess(cholesky(M, RowMaximum(); check = false))
+        @test !LinearAlgebra.issuccess(cholesky!(copy(M), RowMaximum(); check = false))
+        C = cholesky(M, RowMaximum(); check = false)
         @test_throws RankDeficientException chkfullrank(C)
-        C = cholesky!(copy(M), Val(true); check = false)
+        C = cholesky!(copy(M), RowMaximum(); check = false)
         @test_throws RankDeficientException chkfullrank(C)
     end
     @test !isposdef(A)
@@ -287,20 +298,37 @@ end
     @test sum(sum(norm, U'*U - XX)) < eps()
 end
 
+@testset "Non-strided Cholesky solves" begin
+    B = randn(5, 5)
+    v = rand(5)
+    @test cholesky(Diagonal(v)) \ B ≈ Diagonal(v) \ B
+    @test B / cholesky(Diagonal(v)) ≈ B / Diagonal(v)
+end
+
+struct WrappedVector{T} <: AbstractVector{T}
+    data::Vector{T}
+end
+Base.copy(v::WrappedVector) = WrappedVector(copy(v.data))
+Base.size(v::WrappedVector) = size(v.data)
+Base.getindex(v::WrappedVector, i::Integer) = getindex(v.data, i)
+Base.setindex!(v::WrappedVector, val, i::Integer) = setindex!(v.data, val, i)
 
 @testset "cholesky up- and downdates" begin
     A = complex.(randn(10,5), randn(10, 5))
     v = complex.(randn(5), randn(5))
+    w = WrappedVector(v)
     for uplo in (:U, :L)
         AcA = A'*A
         BcB = AcA + v*v'
         BcB = (BcB + BcB')/2
         F = cholesky(Hermitian(AcA, uplo))
         G = cholesky(Hermitian(BcB, uplo))
-        @test Base.getproperty(LinearAlgebra.lowrankupdate(F, v), uplo) ≈ Base.getproperty(G, uplo)
-        @test_throws DimensionMismatch LinearAlgebra.lowrankupdate(F, Vector{eltype(v)}(undef,length(v)+1))
-        @test Base.getproperty(LinearAlgebra.lowrankdowndate(G, v), uplo) ≈ Base.getproperty(F, uplo)
-        @test_throws DimensionMismatch LinearAlgebra.lowrankdowndate(G, Vector{eltype(v)}(undef,length(v)+1))
+        @test getproperty(lowrankupdate(F, v), uplo) ≈ getproperty(G, uplo)
+        @test getproperty(lowrankupdate(F, w), uplo) ≈ getproperty(G, uplo)
+        @test_throws DimensionMismatch lowrankupdate(F, Vector{eltype(v)}(undef,length(v)+1))
+        @test getproperty(lowrankdowndate(G, v), uplo) ≈ getproperty(F, uplo)
+        @test getproperty(lowrankdowndate(G, w), uplo) ≈ getproperty(F, uplo)
+        @test_throws DimensionMismatch lowrankdowndate(G, Vector{eltype(v)}(undef,length(v)+1))
     end
 end
 
@@ -325,7 +353,7 @@ end
         0.25336108035924787 + 0.975317836492159im 0.0628393808469436 - 0.1253397353973715im
         0.11192755545114 - 0.1603741874112385im 0.8439562576196216 + 1.0850814110398734im
         -1.0568488936791578 - 0.06025820467086475im 0.12696236014017806 - 0.09853584666755086im]
-    cholesky(Hermitian(apd, :L), Val(true)) \ b
+    cholesky(Hermitian(apd, :L), RowMaximum()) \ b
     r = factorize(apd).U
     E = abs.(apd - r'*r)
     ε = eps(abs(float(one(ComplexF32))))
@@ -336,7 +364,7 @@ end
 end
 
 @testset "fail for non-BLAS element types" begin
-    @test_throws ArgumentError cholesky!(Hermitian(rand(Float16, 5,5)), Val(true))
+    @test_throws ArgumentError cholesky!(Hermitian(rand(Float16, 5,5)), RowMaximum())
 end
 
 @testset "cholesky Diagonal" begin
@@ -350,6 +378,10 @@ end
     @test D ≈ CD.L * CD.U
     @test CD.info == 0
 
+    F = cholesky(Hermitian(I(3)))
+    @test F isa Cholesky{Float64,<:Diagonal}
+    @test Matrix(F) ≈ I(3)
+
     # real, failing
     @test_throws PosDefException cholesky(Diagonal([1.0, -2.0]))
     Dnpd = cholesky(Diagonal([1.0, -2.0]); check = false)
@@ -384,14 +416,11 @@ end
     @test Cholesky(factors, uplo, Int32(info)) == chol
     @test Cholesky(factors, uplo, Int64(info)) == chol
 
-    cholp = cholesky(x'x, Val(true))
+    cholp = cholesky(x'x, RowMaximum())
 
     factors, uplo, piv, rank, tol, info =
         cholp.factors, cholp.uplo, cholp.piv, cholp.rank, cholp.tol, cholp.info
 
-    @test CholeskyPivoted(factors, uplo, Vector{Int32}(piv), rank, tol, info) == cholp
-    @test CholeskyPivoted(factors, uplo, Vector{Int64}(piv), rank, tol, info) == cholp
-
     @test CholeskyPivoted(factors, uplo, piv, Int32(rank), tol, info) == cholp
     @test CholeskyPivoted(factors, uplo, piv, Int64(rank), tol, info) == cholp
 
@@ -403,25 +432,25 @@ end
 @testset "issue #33704, casting low-rank CholeskyPivoted to Matrix" begin
     A = randn(1,8)
     B = A'A
-    C = cholesky(B, Val(true), check=false)
+    C = cholesky(B, RowMaximum(), check=false)
     @test B ≈ Matrix(C)
 end
 
 @testset "CholeskyPivoted and Factorization" begin
     A = randn(8,8)
     B = A'A
-    C = cholesky(B, Val(true), check=false)
+    C = cholesky(B, RowMaximum(), check=false)
     @test CholeskyPivoted{eltype(C)}(C) === C
     @test Factorization{eltype(C)}(C) === C
-    @test Array(CholeskyPivoted{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), Val(true), check=false))
-    @test Array(Factorization{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), Val(true), check=false))
+    @test Array(CholeskyPivoted{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), RowMaximum(), check=false))
+    @test Array(Factorization{complex(eltype(C))}(C)) ≈ Array(cholesky(complex(B), RowMaximum(), check=false))
     @test eltype(Factorization{complex(eltype(C))}(C)) == complex(eltype(C))
 end
 
 @testset "REPL printing of CholeskyPivoted" begin
     A = randn(8,8)
     B = A'A
-    C = cholesky(B, Val(true), check=false)
+    C = cholesky(B, RowMaximum(), check=false)
     cholstring = sprint((t, s) -> show(t, "text/plain", s), C)
     rankstring = "$(C.uplo) factor with rank $(rank(C)):"
     factorstring = sprint((t, s) -> show(t, "text/plain", s), C.uplo == 'U' ? C.U : C.L)
@@ -430,10 +459,10 @@ end
 end
 
 @testset "destructuring for Cholesky[Pivoted]" begin
-    for val in (true, false)
+    for val in (NoPivot(), RowMaximum())
         A = rand(8, 8)
         B = A'A
-        C = cholesky(B, Val(val), check=false)
+        C = cholesky(B, val, check=false)
         l, u = C
         @test l == C.L
         @test u == C.U
@@ -447,4 +476,66 @@ end
     @test Matrix(cholesky(C)) ≈ C
 end
 
+@testset "constructing a Cholesky factor from a triangular matrix" begin
+    A = [1.0 2.0; 3.0 4.0]
+    let
+        U = UpperTriangular(A)
+        C = Cholesky(U)
+        @test C isa Cholesky{Float64}
+        @test C.U == U
+        @test C.L == U'
+    end
+    let
+        L = LowerTriangular(A)
+        C = Cholesky(L)
+        @test C isa Cholesky{Float64}
+        @test C.L == L
+        @test C.U == L'
+    end
+end
+
+@testset "adjoint of Cholesky" begin
+    A = randn(5, 5)
+    A = A'A
+    F = cholesky(A)
+    b = ones(size(A, 1))
+    @test F\b == F'\b
+end
+
+@testset "Float16" begin
+    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
+    B = cholesky(A)
+    B32 = cholesky(Float32.(A))
+    @test B isa Cholesky{Float16, Matrix{Float16}}
+    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
+    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
+    @test B.UL isa UpperTriangular{Float16, Matrix{Float16}}
+    @test B.U ≈ B32.U
+    @test B.L ≈ B32.L
+    @test B.UL ≈ B32.UL
+    @test Matrix(B) ≈ A
+    B = cholesky(A, RowMaximum())
+    B32 = cholesky(Float32.(A), RowMaximum())
+    @test B isa CholeskyPivoted{Float16,Matrix{Float16}}
+    @test B.U isa UpperTriangular{Float16, Matrix{Float16}}
+    @test B.L isa LowerTriangular{Float16, Matrix{Float16}}
+    @test B.U ≈ B32.U
+    @test B.L ≈ B32.L
+    @test Matrix(B) ≈ A
+end
+
+@testset "det and logdet" begin
+    A = [4083 3825 5876 2048 4470 5490;
+         3825 3575 5520 1920 4200 5140;
+         5876 5520 8427 2940 6410 7903;
+         2048 1920 2940 1008 2240 2740;
+         4470 4200 6410 2240 4875 6015;
+         5490 5140 7903 2740 6015 7370]
+    B = cholesky(A, RowMaximum(), check=false)
+    @test det(B)  ==  0.0
+    @test det(B)  ≈  det(A) atol=eps()
+    @test logdet(B)  ==  -Inf
+    @test logabsdet(B)[1] == -Inf
+ end
+
 end # module TestCholesky
diff --git a/stdlib/LinearAlgebra/test/dense.jl b/stdlib/LinearAlgebra/test/dense.jl
index 8ab539e8e3d590..9bdc732d1f67ae 100644
--- a/stdlib/LinearAlgebra/test/dense.jl
+++ b/stdlib/LinearAlgebra/test/dense.jl
@@ -15,30 +15,42 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(1234323)
 
 @testset "Matrix condition number" begin
-    ainit = rand(n,n)
+    ainit = rand(n, n)
     @testset "for $elty" for elty in (Float32, Float64, ComplexF32, ComplexF64)
         ainit = convert(Matrix{elty}, ainit)
         for a in (copy(ainit), view(ainit, 1:n, 1:n))
-            @test cond(a,1) ≈ 4.837320054554436e+02 atol=0.01
-            @test cond(a,2) ≈ 1.960057871514615e+02 atol=0.01
-            @test cond(a,Inf) ≈ 3.757017682707787e+02 atol=0.01
-            @test cond(a[:,1:5]) ≈ 10.233059337453463 atol=0.01
+            ainv = inv(a)
+            @test cond(a, 1)   == opnorm(a, 1)  *opnorm(ainv, 1)
+            @test cond(a, Inf) == opnorm(a, Inf)*opnorm(ainv, Inf)
+            @test cond(a[:, 1:5]) == (\)(extrema(svdvals(a[:, 1:5]))...)
             @test_throws ArgumentError cond(a,3)
         end
     end
     @testset "Singular matrices" for p in (1, 2, Inf)
         @test cond(zeros(Int, 2, 2), p) == Inf
-        @test cond(zeros(2, 2), p) == Inf
-        @test cond([0 0; 1 1], p) == Inf
-        @test cond([0. 0.; 1. 1.], p) == Inf
+        @test cond(zeros(2, 2), p)      == Inf
+        @test cond([0 0; 1 1], p)       == Inf
+        @test cond([0. 0.; 1. 1.], p)   == Inf
     end
     @testset "Issue #33547, condition number of 2x2 matrix" begin
-        M = [1.0 -2.0; -2.0 -1.5]
+        M = [1.0 -2.0
+            -2.0 -1.5]
         @test cond(M, 1) ≈ 2.227272727272727
     end
+    @testset "Condition numbers of a non-random matrix" begin
+        # To ensure that we detect any regressions in the underlying functions
+        Mars= [11  24   7  20   3
+                4  12  25   8  16
+               17   5  13  21   9
+               10  18   1  14  22
+               23   6  19   2  15]
+        @test cond(Mars, 1)   ≈ 7.1
+        @test cond(Mars, 2)   ≈ 6.181867355918493
+        @test cond(Mars, Inf) ≈ 7.1
+    end
 end
 
 areal = randn(n,n)/2
@@ -88,41 +100,23 @@ bimg  = randn(n,2)/2
                 @test nullspace(zeros(eltya,n)) == Matrix(I, 1, 1)
                 @test nullspace(zeros(eltya,n), 0.1) == Matrix(I, 1, 1)
                 # test empty cases
-                @test nullspace(zeros(n, 0)) == Matrix(I, 0, 0)
-                @test nullspace(zeros(0, n)) == Matrix(I, n, n)
+                @test @inferred(nullspace(zeros(n, 0))) == Matrix(I, 0, 0)
+                @test @inferred(nullspace(zeros(0, n))) == Matrix(I, n, n)
+                # test vector cases
+                @test size(@inferred nullspace(a[:, 1])) == (1, 0)
+                @test size(@inferred nullspace(zero(a[:, 1]))) == (1, 1)
+                @test nullspace(zero(a[:, 1]))[1,1] == 1
+                # test adjortrans vectors, including empty ones
+                @test size(@inferred nullspace(a[:, 1]')) == (n, n - 1)
+                @test @inferred(nullspace(a[1:0, 1]')) == Matrix(I, 0, 0)
+                @test size(@inferred nullspace(b[1, :]')) == (2, 1)
+                @test @inferred(nullspace(b[1, 1:0]')) == Matrix(I, 0, 0)
+                @test size(@inferred nullspace(transpose(a[:, 1]))) == (n, n - 1)
+                @test size(@inferred nullspace(transpose(b[1, :]))) == (2, 1)
             end
         end
     end # for eltyb
 
-@testset "Test diagm for vectors" begin
-    @test diagm(zeros(50)) == diagm(0 => zeros(50))
-    @test diagm(ones(50)) == diagm(0 => ones(50))
-    v = randn(500)
-    @test diagm(v) == diagm(0 => v)
-    @test diagm(500, 501, v) == diagm(500, 501, 0 => v)
-end
-
-@testset "Non-square diagm" begin
-    x = [7, 8]
-    for m=1:4, n=2:4
-        if m < 2 || n < 3
-            @test_throws DimensionMismatch diagm(m,n, 0 => x,  1 => x)
-            @test_throws DimensionMismatch diagm(n,m, 0 => x,  -1 => x)
-        else
-            M = zeros(m,n)
-            M[1:2,1:3] = [7 7 0; 0 8 8]
-            @test diagm(m,n, 0 => x,  1 => x) == M
-            @test diagm(n,m, 0 => x,  -1 => x) == M'
-        end
-    end
-end
-
-@testset "Test pinv (rtol, atol)" begin
-    M = [1 0 0; 0 1 0; 0 0 0]
-    @test pinv(M,atol=1)== zeros(3,3)
-    @test pinv(M,rtol=0.5)== M
-end
-
     for (a, a2) in ((copy(ainit), copy(ainit2)), (view(ainit, 1:n, 1:n), view(ainit2, 1:n, 1:n)))
         @testset "Test pinv" begin
             pinva15 = pinv(a[:,1:n1])
@@ -145,9 +139,21 @@ end
         @testset "Matrix square root" begin
             asq = sqrt(a)
             @test asq*asq ≈ a
+            @test sqrt(transpose(a))*sqrt(transpose(a)) ≈ transpose(a)
+            @test sqrt(adjoint(a))*sqrt(adjoint(a)) ≈ adjoint(a)
             asym = a + a' # symmetric indefinite
             asymsq = sqrt(asym)
             @test asymsq*asymsq ≈ asym
+            @test sqrt(transpose(asym))*sqrt(transpose(asym)) ≈ transpose(asym)
+            @test sqrt(adjoint(asym))*sqrt(adjoint(asym)) ≈ adjoint(asym)
+            if eltype(a) <: Real  # real square root
+                apos = a * a
+                @test sqrt(apos)^2 ≈ apos
+                @test eltype(sqrt(apos)) <: Real
+                # test that real but Complex input produces Complex output
+                @test sqrt(complex(apos)) ≈ sqrt(apos)
+                @test eltype(sqrt(complex(apos))) <: Complex
+            end
         end
 
         @testset "Powers" begin
@@ -185,9 +191,41 @@ end
         @test Matrix(factorize(A)) ≈ Matrix(factorize(Tridiagonal(e2,d,e)))
         A = diagm(0 => d, 1 => e, 2 => f)
         @test factorize(A) == UpperTriangular(A)
+
+        x = rand(eltya)
+        @test factorize(x) == x
     end
 end # for eltya
 
+@testset "Test diagm for vectors" begin
+    @test diagm(zeros(50)) == diagm(0 => zeros(50))
+    @test diagm(ones(50)) == diagm(0 => ones(50))
+    v = randn(500)
+    @test diagm(v) == diagm(0 => v)
+    @test diagm(500, 501, v) == diagm(500, 501, 0 => v)
+end
+
+@testset "Non-square diagm" begin
+    x = [7, 8]
+    for m=1:4, n=2:4
+        if m < 2 || n < 3
+            @test_throws DimensionMismatch diagm(m,n, 0 => x,  1 => x)
+            @test_throws DimensionMismatch diagm(n,m, 0 => x,  -1 => x)
+        else
+            M = zeros(m,n)
+            M[1:2,1:3] = [7 7 0; 0 8 8]
+            @test diagm(m,n, 0 => x,  1 => x) == M
+            @test diagm(n,m, 0 => x,  -1 => x) == M'
+        end
+    end
+end
+
+@testset "Test pinv (rtol, atol)" begin
+    M = [1 0 0; 0 1 0; 0 0 0]
+    @test pinv(M,atol=1)== zeros(3,3)
+    @test pinv(M,rtol=0.5)== M
+end
+
 @testset "test out of bounds triu/tril" begin
     local m, n = 5, 7
     ainit = rand(m, n)
@@ -439,6 +477,11 @@ end
                                      183.765138646367 183.765138646366  163.679601723179;
                                       71.797032399996  91.8825693231832 111.968106246371]')
         @test exp(A1) ≈ eA1
+        @test exp(adjoint(A1)) ≈ adjoint(eA1)
+        @test exp(transpose(A1)) ≈ transpose(eA1)
+        for f in (sin, cos, sinh, cosh, tanh, tan)
+            @test f(adjoint(A1)) ≈ f(copy(adjoint(A1)))
+        end
 
         A2  = convert(Matrix{elty},
                       [29.87942128909879    0.7815750847907159 -2.289519314033932;
@@ -449,26 +492,45 @@ end
                        -18231880972009252.0  60605228702221920.0 101291842930249760.0;
                        -30475770808580480.0 101291842930249728.0 169294411240851968.0])
         @test exp(A2) ≈ eA2
+        @test exp(adjoint(A2)) ≈ adjoint(eA2)
+        @test exp(transpose(A2)) ≈ transpose(eA2)
 
         A3  = convert(Matrix{elty}, [-131 19 18;-390 56 54;-387 57 52])
         eA3 = convert(Matrix{elty}, [-1.50964415879218 -5.6325707998812  -4.934938326092;
                                       0.367879439109187 1.47151775849686  1.10363831732856;
                                       0.135335281175235 0.406005843524598 0.541341126763207]')
         @test exp(A3) ≈ eA3
+        @test exp(adjoint(A3)) ≈ adjoint(eA3)
+        @test exp(transpose(A3)) ≈ transpose(eA3)
 
         A4 = convert(Matrix{elty}, [0.25 0.25; 0 0])
         eA4 = convert(Matrix{elty}, [1.2840254166877416 0.2840254166877415; 0 1])
         @test exp(A4) ≈ eA4
+        @test exp(adjoint(A4)) ≈ adjoint(eA4)
+        @test exp(transpose(A4)) ≈ transpose(eA4)
 
         A5 = convert(Matrix{elty}, [0 0.02; 0 0])
         eA5 = convert(Matrix{elty}, [1 0.02; 0 1])
         @test exp(A5) ≈ eA5
+        @test exp(adjoint(A5)) ≈ adjoint(eA5)
+        @test exp(transpose(A5)) ≈ transpose(eA5)
 
         # Hessenberg
         @test hessenberg(A1).H ≈ convert(Matrix{elty},
                                                  [4.000000000000000  -1.414213562373094  -1.414213562373095
                                                   -1.414213562373095   4.999999999999996  -0.000000000000000
                                                   0  -0.000000000000002   3.000000000000000])
+
+        # cis always returns a complex matrix
+        if elty <: Real
+            eltyim = Complex{elty}
+        else
+            eltyim = elty
+        end
+
+        @test cis(A1) ≈ convert(Matrix{eltyim}, [-0.339938 + 0.000941506im   0.772659  - 0.8469im     0.52745  + 0.566543im;
+                                                  0.650054 - 0.140179im     -0.0762135 + 0.284213im   0.38633  - 0.42345im ;
+                                                  0.650054 - 0.140179im      0.913779  + 0.143093im  -0.603663 - 0.28233im ]) rtol=7e-7
     end
 
     @testset "Additional tests for $elty" for elty in (Float64, ComplexF64)
@@ -477,15 +539,23 @@ end
                                      1/4 1/5 1/6 1/7;
                                      1/5 1/6 1/7 1/8])
         @test exp(log(A4)) ≈ A4
+        @test exp(log(transpose(A4))) ≈ transpose(A4)
+        @test exp(log(adjoint(A4))) ≈ adjoint(A4)
 
         A5  = convert(Matrix{elty}, [1 1 0 1; 0 1 1 0; 0 0 1 1; 1 0 0 1])
         @test exp(log(A5)) ≈ A5
+        @test exp(log(transpose(A5))) ≈ transpose(A5)
+        @test exp(log(adjoint(A5))) ≈ adjoint(A5)
 
         A6  = convert(Matrix{elty}, [-5 2 0 0 ; 1/2 -7 3 0; 0 1/3 -9 4; 0 0 1/4 -11])
         @test exp(log(A6)) ≈ A6
+        @test exp(log(transpose(A6))) ≈ transpose(A6)
+        @test exp(log(adjoint(A6))) ≈ adjoint(A6)
 
         A7  = convert(Matrix{elty}, [1 0 0 1e-8; 0 1 0 0; 0 0 1 0; 0 0 0 1])
         @test exp(log(A7)) ≈ A7
+        @test exp(log(transpose(A7))) ≈ transpose(A7)
+        @test exp(log(adjoint(A7))) ≈ adjoint(A7)
     end
 
     @testset "Integer promotion tests" begin
@@ -552,8 +622,13 @@ end
             @test cos(A) ≈ cos(-A)
             @test sin(A) ≈ -sin(-A)
             @test tan(A) ≈ sin(A) / cos(A)
+
             @test cos(A) ≈ real(exp(im*A))
             @test sin(A) ≈ imag(exp(im*A))
+            @test cos(A) ≈ real(cis(A))
+            @test sin(A) ≈ imag(cis(A))
+            @test cis(A) ≈ cos(A) + im * sin(A)
+
             @test cosh(A) ≈ 0.5 * (exp(A) + exp(-A))
             @test sinh(A) ≈ 0.5 * (exp(A) - exp(-A))
             @test cosh(A) ≈ cosh(-A)
@@ -597,6 +672,9 @@ end
 
         @test cos(A5) ≈ 0.5 * (exp(im*A5) + exp(-im*A5))
         @test sin(A5) ≈ -0.5im * (exp(im*A5) - exp(-im*A5))
+        @test cos(A5) ≈ 0.5 * (cis(A5) + cis(-A5))
+        @test sin(A5) ≈ -0.5im * (cis(A5) - cis(-A5))
+
         @test cosh(A5) ≈ 0.5 * (exp(A5) + exp(-A5))
         @test sinh(A5) ≈ 0.5 * (exp(A5) - exp(-A5))
     end
@@ -708,9 +786,6 @@ end
     A11 = convert(Matrix{elty}, [3 2; -5 -3])
     @test exp(log(A11)) ≈ A11
 
-    A12 = convert(Matrix{elty}, [1 -1; 1 -1])
-    @test typeof(log(A12)) == Array{ComplexF64, 2}
-
     A13 = convert(Matrix{elty}, [2 0; 0 2])
     @test typeof(log(A13)) == Array{elty, 2}
 
@@ -723,6 +798,7 @@ end
                                     0.2310490602 0.1969543025 1.363756107])
     @test log(A1) ≈ logA1
     @test exp(log(A1)) ≈ A1
+    @test typeof(log(A1)) == Matrix{elty}
 
     A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
                                  1/3 1/4 1/5 1/6;
@@ -734,6 +810,166 @@ end
                                     0.2414170219 0.5865285289 3.318413247 -5.444632124])
     @test log(A4) ≈ logA4
     @test exp(log(A4)) ≈ A4
+    @test typeof(log(A4)) == Matrix{elty}
+
+    # real triu matrix
+    A5  = convert(Matrix{elty}, [1 2 3; 0 4 5; 0 0 6])  # triu
+    logA5 = convert(Matrix{elty}, [0.0 0.9241962407465937 0.5563245488984037;
+                                   0.0 1.3862943611198906 1.0136627702704109;
+                                   0.0 0.0 1.791759469228055])
+    @test log(A5) ≈ logA5
+    @test exp(log(A5)) ≈ A5
+    @test typeof(log(A5)) == Matrix{elty}
+
+    # real quasitriangular schur form with 2 2x2 blocks, 2 1x1 blocks, and all positive eigenvalues
+    A6 = convert(Matrix{elty}, [2 3 2 2 3 1;
+                                1 3 3 2 3 1;
+                                3 3 3 1 1 2;
+                                2 1 2 2 2 2;
+                                1 1 2 2 3 1;
+                                2 2 2 2 1 3])
+    @test exp(log(A6)) ≈ A6
+    @test typeof(log(A6)) == Matrix{elty}
+
+    # real quasitriangular schur form with a negative eigenvalue
+    A7 = convert(Matrix{elty}, [1 3 3 2 2 2;
+                                1 2 1 3 1 2;
+                                3 1 2 3 2 1;
+                                3 1 2 2 2 1;
+                                3 1 3 1 2 1;
+                                1 1 3 1 1 3])
+    @test exp(log(A7)) ≈ A7
+    @test typeof(log(A7)) == Matrix{complex(elty)}
+
+    if elty <: Complex
+        A8 = convert(Matrix{elty}, [1 + 1im 1 + 1im 1 - 1im;
+                                    1 + 1im -1 + 1im 1 + 1im;
+                                    1 - 1im 1 + 1im -1 - 1im])
+        logA8 = convert(
+            Matrix{elty},
+            [0.9478628953131517 + 1.3725201223387407im -0.2547157147532057 + 0.06352318334299434im 0.8560050197863862 - 1.0471975511965979im;
+             -0.2547157147532066 + 0.06352318334299467im -0.16285783922644065 + 0.2617993877991496im 0.2547157147532063 + 2.1579182857361894im;
+             0.8560050197863851 - 1.0471975511965974im 0.25471571475320665 + 2.1579182857361903im 0.9478628953131519 - 0.8489213467404436im],
+        )
+        @test log(A8) ≈ logA8
+        @test exp(log(A8)) ≈ A8
+        @test typeof(log(A8)) == Matrix{elty}
+    end
+end
+
+@testset "matrix logarithm is type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
+    A1 = randn(elty, 4, 4)
+    @inferred Union{Matrix{elty},Matrix{complex(elty)}} log(A1)
+end
+
+@testset "Additional matrix square root tests" for elty in (Float64, ComplexF64)
+    A11 = convert(Matrix{elty}, [3 2; -5 -3])
+    @test sqrt(A11)^2 ≈ A11
+
+    A13 = convert(Matrix{elty}, [2 0; 0 2])
+    @test typeof(sqrt(A13)) == Array{elty, 2}
+
+    T = elty == Float64 ? Symmetric : Hermitian
+    @test typeof(sqrt(T(A13))) == T{elty, Array{elty, 2}}
+
+    A1  = convert(Matrix{elty}, [4 2 0; 1 4 1; 1 1 4])
+    sqrtA1 = convert(Matrix{elty}, [1.971197119306979 0.5113118387140085 -0.03301921523780871;
+                                   0.23914631173809942 1.9546875116880718 0.2556559193570036;
+                                   0.23914631173810008 0.22263670411919556 1.9877067269258815])
+    @test sqrt(A1) ≈ sqrtA1
+    @test sqrt(A1)^2 ≈ A1
+    @test typeof(sqrt(A1)) == Matrix{elty}
+
+    A4  = convert(Matrix{elty}, [1/2 1/3 1/4 1/5+eps();
+                                 1/3 1/4 1/5 1/6;
+                                 1/4 1/5 1/6 1/7;
+                                 1/5 1/6 1/7 1/8])
+                                 sqrtA4 = convert(
+        Matrix{elty},
+        [0.590697761556362 0.3055006800405779 0.19525404749300546 0.14007621469988107;
+         0.30550068004057784 0.2825388389385975 0.21857572599211642 0.17048692323164674;
+         0.19525404749300565 0.21857572599211622 0.21155429252242863 0.18976816626246887;
+         0.14007621469988046 0.17048692323164724 0.1897681662624689 0.20075085592778794],
+    )
+    @test sqrt(A4) ≈ sqrtA4
+    @test sqrt(A4)^2 ≈ A4
+    @test typeof(sqrt(A4)) == Matrix{elty}
+
+    # real triu matrix
+    A5  = convert(Matrix{elty}, [1 2 3; 0 4 5; 0 0 6])  # triu
+    sqrtA5 = convert(Matrix{elty}, [1.0 0.6666666666666666 0.6525169217864183;
+                                   0.0 2.0 1.1237243569579454;
+                                   0.0 0.0 2.449489742783178])
+    @test sqrt(A5) ≈ sqrtA5
+    @test sqrt(A5)^2 ≈ A5
+    @test typeof(sqrt(A5)) == Matrix{elty}
+
+    # real quasitriangular schur form with 2 2x2 blocks, 2 1x1 blocks, and all positive eigenvalues
+    A6 = convert(Matrix{elty}, [2 3 2 2 3 1;
+                                1 3 3 2 3 1;
+                                3 3 3 1 1 2;
+                                2 1 2 2 2 2;
+                                1 1 2 2 3 1;
+                                2 2 2 2 1 3])
+    @test sqrt(A6)^2 ≈ A6
+    @test typeof(sqrt(A6)) == Matrix{elty}
+
+    # real quasitriangular schur form with a negative eigenvalue
+    A7 = convert(Matrix{elty}, [1 3 3 2 2 2;
+                                1 2 1 3 1 2;
+                                3 1 2 3 2 1;
+                                3 1 2 2 2 1;
+                                3 1 3 1 2 1;
+                                1 1 3 1 1 3])
+    @test sqrt(A7)^2 ≈ A7
+    @test typeof(sqrt(A7)) == Matrix{complex(elty)}
+
+    if elty <: Complex
+        A8 = convert(Matrix{elty}, [1 + 1im 1 + 1im 1 - 1im;
+                                    1 + 1im -1 + 1im 1 + 1im;
+                                    1 - 1im 1 + 1im -1 - 1im])
+        sqrtA8 = convert(
+            Matrix{elty},
+            [1.2559748527474284 + 0.6741878819930323im 0.20910077991005582 + 0.24969165051825476im 0.591784212275146 - 0.6741878819930327im;
+             0.2091007799100553 + 0.24969165051825515im 0.3320953202361413 + 0.2915044496279425im 0.33209532023614136 + 1.0568713143581219im;
+             0.5917842122751455 - 0.674187881993032im 0.33209532023614147 + 1.0568713143581223im 0.7147787526012315 - 0.6323750828833452im],
+        )
+        @test sqrt(A8) ≈ sqrtA8
+        @test sqrt(A8)^2 ≈ A8
+        @test typeof(sqrt(A8)) == Matrix{elty}
+    end
+end
+
+@testset "issue #40141" begin
+    x = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() -1]
+    @test sqrt(x)^2 ≈ x
+
+    x2 =  [-1 -eps() 0 0; 3eps() -1 0 0; 0 0 -1 -3eps(); 0 0 eps() -1]
+    @test sqrt(x2)^2 ≈ x2
+
+    x3 = [-1 -eps() 0 0; eps() -1 0 0; 0 0 -1 -eps(); 0 0 eps() Inf]
+    @test all(isnan, sqrt(x3))
+
+    # test overflow/underflow handled
+    x4 = [0 -1e200; 1e200 0]
+    @test sqrt(x4)^2 ≈ x4
+
+    x5 = [0 -1e-200; 1e-200 0]
+    @test sqrt(x5)^2 ≈ x5
+
+    x6 = [1.0 1e200; -1e-200 1.0]
+    @test sqrt(x6)^2 ≈ x6
+end
+
+@testset "matrix logarithm block diagonal underflow/overflow" begin
+    x1 = [0 -1e200; 1e200 0]
+    @test exp(log(x1)) ≈ x1
+
+    x2 = [0 -1e-200; 1e-200 0]
+    @test exp(log(x2)) ≈ x2
+
+    x3 = [1.0 1e200; -1e-200 1.0]
+    @test exp(log(x3)) ≈ x3
 end
 
 @testset "issue #7181" begin
@@ -767,6 +1003,10 @@ end
     @test diag(zeros(0,1),2) == []
 end
 
+@testset "issue #39857" begin
+    @test lyap(1.0+2.0im, 3.0+4.0im) == -1.5 - 2.0im
+end
+
 @testset "Matrix to real power" for elty in (Float64, ComplexF64)
 # Tests proposed at Higham, Deadman: Testing Matrix Function Algorithms Using Identities, March 2014
     #Aa : only positive real eigenvalues
@@ -868,12 +1108,12 @@ end
 end
 
 function test_rdiv_pinv_consistency(a, b)
-    @test (a*b)/b ≈ a*(b/b) ≈ (a*b)*pinv(b) ≈ a*(b*pinv(b))
-    @test typeof((a*b)/b) == typeof(a*(b/b)) == typeof((a*b)*pinv(b)) == typeof(a*(b*pinv(b)))
+    @test a*(b/b) ≈ (a*b)*pinv(b) ≈ a*(b*pinv(b))
+    @test typeof(a*(b/b)) == typeof((a*b)*pinv(b)) == typeof(a*(b*pinv(b)))
 end
 function test_ldiv_pinv_consistency(a, b)
-    @test a\(a*b) ≈ (a\a)*b ≈ (pinv(a)*a)*b ≈ pinv(a)*(a*b)
-    @test typeof(a\(a*b)) == typeof((a\a)*b) == typeof((pinv(a)*a)*b) == typeof(pinv(a)*(a*b))
+    @test (a\a)*b ≈ (pinv(a)*a)*b ≈ pinv(a)*(a*b)
+    @test typeof((a\a)*b) == typeof((pinv(a)*a)*b) == typeof(pinv(a)*(a*b))
 end
 function test_div_pinv_consistency(a, b)
     test_rdiv_pinv_consistency(a, b)
diff --git a/stdlib/LinearAlgebra/test/diagonal.jl b/stdlib/LinearAlgebra/test/diagonal.jl
index 440fdbc8337b22..8bc84d93c6348c 100644
--- a/stdlib/LinearAlgebra/test/diagonal.jl
+++ b/stdlib/LinearAlgebra/test/diagonal.jl
@@ -2,8 +2,12 @@
 
 module TestDiagonal
 
-using Test, LinearAlgebra, SparseArrays, Random
-using LinearAlgebra: mul!, mul!, rmul!, lmul!, ldiv!, rdiv!, BlasFloat, BlasComplex, SingularException
+using Test, LinearAlgebra, Random
+using LinearAlgebra: BlasFloat, BlasComplex
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
+using .Main.Furlongs
 
 n=12 #Size of matrix problem to test
 Random.seed!(1)
@@ -88,7 +92,7 @@ Random.seed!(1)
             @test func(D) ≈ func(DM) atol=n^2*eps(relty)*(1+(elty<:Complex))
         end
         if relty <: BlasFloat
-            for func in (exp, sinh, cosh, tanh, sech, csch, coth)
+            for func in (exp, cis, sinh, cosh, tanh, sech, csch, coth)
                 @test func(D) ≈ func(DM) atol=n^3*eps(relty)
             end
             @test log(Diagonal(abs.(D.diag))) ≈ log(abs.(DM)) atol=n^3*eps(relty)
@@ -102,6 +106,10 @@ Random.seed!(1)
         end
     end
 
+    @testset "Two-dimensional Euler formula for Diagonal" begin
+        @test cis(Diagonal([π, π])) ≈ -I
+    end
+
     @testset "Linear solve" begin
         for (v, U) in ((vv, UU), (view(vv, 1:n), view(UU, 1:n, 1:2)))
             @test D*v ≈ DM*v atol=n*eps(relty)*(1+(elty<:Complex))
@@ -143,7 +151,6 @@ Random.seed!(1)
                 @test_throws DimensionMismatch ldiv!(D, fill(elty(1), n + 1))
                 @test_throws SingularException ldiv!(Diagonal(zeros(relty, n)), copy(v))
                 b = rand(elty, n, n)
-                b = sparse(b)
                 @test ldiv!(D, copy(b)) ≈ Array(D)\Array(b)
                 @test_throws SingularException ldiv!(Diagonal(zeros(elty, n)), copy(b))
                 b = view(rand(elty, n), Vector(1:n))
@@ -153,7 +160,6 @@ Random.seed!(1)
                 @test c ≈ d
                 @test_throws SingularException ldiv!(Diagonal(zeros(elty, n)), b)
                 b = rand(elty, n+1, n+1)
-                b = sparse(b)
                 @test_throws DimensionMismatch ldiv!(D, copy(b))
                 b = view(rand(elty, n+1), Vector(1:n+1))
                 @test_throws DimensionMismatch ldiv!(D, b)
@@ -172,12 +178,24 @@ Random.seed!(1)
             @test Array(a*D) ≈ a*DM
             @test Array(D*a) ≈ DM*a
             @test Array(D/a) ≈ DM/a
-            if relty <: BlasFloat
-                for b in (rand(elty,n,n), sparse(rand(elty,n,n)), rand(elty,n), sparse(rand(elty,n)))
-                    @test lmul!(copy(D), copy(b)) ≈ Array(D)*Array(b)
-                    @test lmul!(transpose(copy(D)), copy(b)) ≈ transpose(Array(D))*Array(b)
-                    @test lmul!(adjoint(copy(D)), copy(b)) ≈ Array(D)'*Array(b)
-                end
+            if elty <: Real
+                @test Array(abs.(D)^a) ≈ abs.(DM)^a
+            else
+                @test Array(D^a) ≈ DM^a
+            end
+            @test Diagonal(1:100)^2 == Diagonal((1:100).^2)
+            p = 3
+            @test Diagonal(1:100)^p == Diagonal((1:100).^p)
+            @test Diagonal(1:100)^(-1) == Diagonal(inv.(1:100))
+            @test Diagonal(1:100)^2.0 == Diagonal((1:100).^2.0)
+            @test Diagonal(1:100)^(2.0+0im) == Diagonal((1:100).^(2.0+0im))
+        end
+
+        if relty <: BlasFloat
+            for b in (rand(elty,n,n), rand(elty,n))
+                @test lmul!(copy(D), copy(b)) ≈ Array(D)*Array(b)
+                @test lmul!(transpose(copy(D)), copy(b)) ≈ transpose(Array(D))*Array(b)
+                @test lmul!(adjoint(copy(D)), copy(b)) ≈ Array(D)'*Array(b)
             end
         end
 
@@ -213,14 +231,14 @@ Random.seed!(1)
                 @test Array(op(Dr, Aherm)) ≈ Array(Hermitian(op(Array(Dr), Array(Aherm))))
             end
         end
-        @test Array(D*Transpose(Asym)) ≈ Array(D) * Array(transpose(Asym))
-        @test Array(D*Adjoint(Asym)) ≈ Array(D) * Array(adjoint(Asym))
-        @test Array(D*Transpose(Aherm)) ≈ Array(D) * Array(transpose(Aherm))
-        @test Array(D*Adjoint(Aherm)) ≈ Array(D) * Array(adjoint(Aherm))
-        @test Array(Transpose(Asym)*Transpose(D)) ≈ Array(transpose(Asym)) * Array(transpose(D))
-        @test Array(Transpose(D)*Transpose(Asym)) ≈ Array(transpose(D)) * Array(transpose(Asym))
-        @test Array(Adjoint(Aherm)*Adjoint(D)) ≈ Array(adjoint(Aherm)) * Array(adjoint(D))
-        @test Array(Adjoint(D)*Adjoint(Aherm)) ≈ Array(adjoint(D)) * Array(adjoint(Aherm))
+        @test Array(D*transpose(Asym)) ≈ Array(D) * Array(transpose(Asym))
+        @test Array(D*adjoint(Asym)) ≈ Array(D) * Array(adjoint(Asym))
+        @test Array(D*transpose(Aherm)) ≈ Array(D) * Array(transpose(Aherm))
+        @test Array(D*adjoint(Aherm)) ≈ Array(D) * Array(adjoint(Aherm))
+        @test Array(transpose(Asym)*transpose(D)) ≈ Array(transpose(Asym)) * Array(transpose(D))
+        @test Array(transpose(D)*transpose(Asym)) ≈ Array(transpose(D)) * Array(transpose(Asym))
+        @test Array(adjoint(Aherm)*adjoint(D)) ≈ Array(adjoint(Aherm)) * Array(adjoint(D))
+        @test Array(adjoint(D)*adjoint(Aherm)) ≈ Array(adjoint(D)) * Array(adjoint(Aherm))
 
         # Performance specialisations for A*_mul_B!
         vvv = similar(vv)
@@ -230,7 +248,7 @@ Random.seed!(1)
 
         UUU = similar(UU)
         for transformA in (identity, adjoint, transpose)
-            for transformD in (identity, Adjoint, Transpose, adjoint, transpose)
+            for transformD in (identity, adjoint, transpose)
                 @test mul!(UUU, transformA(UU), transformD(D)) ≈  transformA(UU) * Matrix(transformD(D))
                 @test mul!(UUU, transformD(D), transformA(UU)) ≈  Matrix(transformD(D)) * transformA(UU)
             end
@@ -295,6 +313,10 @@ Random.seed!(1)
         M4 = rand(elty, n÷2, n÷2)
         @test kron(D3, M4) ≈ kron(DM3, M4)
         @test kron(M4, D3) ≈ kron(M4, DM3)
+        X = [ones(1,1) for i in 1:2, j in 1:2]
+        @test kron(I(2), X)[1,3] == zeros(1,1)
+        X = [ones(2,2) for i in 1:2, j in 1:2]
+        @test kron(I(2), X)[1,3] == zeros(2,2)
     end
     @testset "iszero, isone, triu, tril" begin
         Dzero = Diagonal(zeros(elty, 10))
@@ -326,8 +348,12 @@ Random.seed!(1)
 
     @testset "Eigensystem" begin
         eigD = eigen(D)
-        @test Diagonal(eigD.values) ≈ D
+        @test Diagonal(eigD.values) == D
         @test eigD.vectors == Matrix(I, size(D))
+        eigsortD = eigen(D, sortby=LinearAlgebra.eigsortby)
+        @test eigsortD.values !== D.diag
+        @test eigsortD.values == sort(D.diag, by=LinearAlgebra.eigsortby)
+        @test Matrix(eigsortD) == D
     end
 
     @testset "ldiv" begin
@@ -361,13 +387,15 @@ Random.seed!(1)
         d2, s2 = logabsdet(lM)
         @test d1 ≈ d2
         @test s1 == s2
+        @test logdet(Diagonal(relty[-1,-2])) ≈ log(2)
+        @test_throws DomainError logdet(Diagonal(relty[-1,-2,-3]))
     end
 
     @testset "similar" begin
         @test isa(similar(D), Diagonal{elty})
         @test isa(similar(D, Int), Diagonal{Int})
-        @test isa(similar(D, (3,2)), SparseMatrixCSC{elty})
-        @test isa(similar(D, Int, (3,2)), SparseMatrixCSC{Int})
+        @test isa(similar(D, (3,2)), Matrix{elty})
+        @test isa(similar(D, Int, (3,2)), Matrix{Int})
     end
 
     # Issue number 10036
@@ -391,6 +419,44 @@ Random.seed!(1)
         @test svd(D).V == V
     end
 
+    @testset "svd/eigen with Diagonal{Furlong}" begin
+        Du = Furlong.(D)
+        @test Du isa Diagonal{<:Furlong{1}}
+        F = svd(Du)
+        U, s, V = F
+        @test map(x -> x.val, Matrix(F)) ≈ map(x -> x.val, Du)
+        @test svdvals(Du) == s
+        @test U isa AbstractMatrix{<:Furlong{0}}
+        @test V isa AbstractMatrix{<:Furlong{0}}
+        @test s isa AbstractVector{<:Furlong{1}}
+        E = eigen(Du)
+        vals, vecs = E
+        @test Matrix(E) == Du
+        @test vals isa AbstractVector{<:Furlong{1}}
+        @test vecs isa AbstractMatrix{<:Furlong{0}}
+    end
+end
+
+@testset "rdiv! (#40887)" begin
+    @test rdiv!(Matrix(Diagonal([2.0, 3.0])), Diagonal(2:3)) == Diagonal([1.0, 1.0])
+    @test rdiv!(fill(3.0, 3, 3), 3.0I(3)) == ones(3,3)
+end
+
+@testset "kron (issue #40595)" begin
+    # custom array type to test that kron on Diagonal matrices preserves types of the parents if possible
+    struct KronTestArray{T, N, AT} <: AbstractArray{T, N}
+        data::AT
+    end
+    KronTestArray(data::AbstractArray) = KronTestArray{eltype(data), ndims(data), typeof(data)}(data)
+    Base.size(A::KronTestArray) = size(A.data)
+    LinearAlgebra.kron(A::KronTestArray, B::KronTestArray) = KronTestArray(kron(A.data, B.data))
+    Base.getindex(K::KronTestArray{<:Any,N}, i::Vararg{Int,N}) where {N} = K.data[i...]
+
+    A = KronTestArray([1, 2, 3]);
+    @test kron(A, A) isa KronTestArray
+    Ad = Diagonal(A);
+    @test kron(Ad, Ad).diag isa KronTestArray
+    @test kron(Ad, Ad).diag == kron([1, 2, 3], [1, 2, 3])
 end
 
 @testset "svdvals and eigvals (#11120/#11247)" begin
@@ -399,6 +465,13 @@ end
     @test sort([eigvals(D)...;], by=LinearAlgebra.eigsortby) ≈ eigvals([D.diag[1] zeros(3,2); zeros(2,3) D.diag[2]])
 end
 
+@testset "eigvals should return a copy of the diagonal" begin
+    D = Diagonal([1, 2, 3])
+    lam = eigvals(D)
+    D[3,3] = 4 # should not affect lam
+    @test lam == [1, 2, 3]
+end
+
 @testset "eigmin (#27847)" begin
     for _ in 1:100
         d = randn(rand(1:10))
@@ -534,11 +607,46 @@ let D1 = Diagonal(rand(5)), D2 = Diagonal(rand(5))
     @test LinearAlgebra.lmul!(adjoint(D1),copy(D2)) == adjoint(D1)*D2
 end
 
+@testset "multiplication of a Diagonal with a Matrix" begin
+    A = collect(reshape(1:8, 4, 2));
+    B = BigFloat.(A);
+    DL = Diagonal(collect(axes(A, 1)));
+    DR = Diagonal(Float16.(collect(axes(A, 2))));
+
+    @test DL * A == collect(DL) * A
+    @test A * DR == A * collect(DR)
+    @test DL * B == collect(DL) * B
+    @test B * DR == B * collect(DR)
+
+    A = reshape([ones(2,2), ones(2,2)*2, ones(2,2)*3, ones(2,2)*4], 2, 2)
+    Ac = collect(A)
+    D = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
+    Dc = collect(D)
+    @test A * D == Ac * Dc
+    @test D * A == Dc * Ac
+    @test D * D == Dc * Dc
+
+    AS = similar(A)
+    mul!(AS, A, D, true, false)
+    @test AS == A * D
+
+    D2 = similar(D)
+    mul!(D2, D, D)
+    @test D2 == D * D
+
+    copyto!(D2, D)
+    lmul!(D, D2)
+    @test D2 == D * D
+    copyto!(D2, D)
+    rmul!(D2, D)
+    @test D2 == D * D
+end
+
 @testset "multiplication of QR Q-factor and Diagonal (#16615 spot test)" begin
     D = Diagonal(randn(5))
     Q = qr(randn(5, 5)).Q
     @test D * Q' == Array(D) * Q'
-    Q = qr(randn(5, 5), Val(true)).Q
+    Q = qr(randn(5, 5), ColumnNorm()).Q
     @test_throws ArgumentError lmul!(Q, D)
 end
 
@@ -566,6 +674,7 @@ end
     @test ishermitian(Dsym) == false
 
     @test exp(D) == Diagonal([exp([1 2; 3 4]), exp([1 2; 3 4])])
+    @test cis(D) == Diagonal([cis([1 2; 3 4]), cis([1 2; 3 4])])
     @test log(D) == Diagonal([log([1 2; 3 4]), log([1 2; 3 4])])
     @test sqrt(D) == Diagonal([sqrt([1 2; 3 4]), sqrt([1 2; 3 4])])
 
@@ -609,15 +718,13 @@ end
         fullBB = copyto!(Matrix{Matrix{T}}(undef, 2, 2), BB)
         for (transform1, transform2) in ((identity,  identity),
                 (identity,  adjoint  ), (adjoint,   identity ), (adjoint,   adjoint  ),
-                (identity,  transpose), (transpose, identity ), (transpose, transpose),
-                (identity,  Adjoint  ), (Adjoint,   identity ), (Adjoint,   Adjoint  ),
-                (identity,  Transpose), (Transpose, identity ), (Transpose, Transpose))
+                (identity,  transpose), (transpose, identity ), (transpose, transpose))
             @test *(transform1(D), transform2(B))::typeof(D) ≈ *(transform1(Matrix(D)), transform2(Matrix(B))) atol=2 * eps()
             @test *(transform1(DD), transform2(BB))::typeof(DD) == *(transform1(fullDD), transform2(fullBB))
         end
         M = randn(T, 5, 5)
         MM = [randn(T, 2, 2) for _ in 1:2, _ in 1:2]
-        for transform in (identity, adjoint, transpose, Adjoint, Transpose)
+        for transform in (identity, adjoint, transpose)
             @test lmul!(transform(D), copy(M)) ≈ *(transform(Matrix(D)), M)
             @test rmul!(copy(M), transform(D)) ≈ *(M, transform(Matrix(D)))
             @test lmul!(transform(DD), copy(MM)) ≈ *(transform(fullDD), MM)
@@ -631,26 +738,51 @@ end
     @test Diagonal(transpose([1, 2, 3])) == Diagonal([1 2 3])
 end
 
-@testset "Multiplication with Adjoint and Transpose vectors (#26863)" begin
+@testset "Multiplication with adjoint and transpose vectors (#26863)" begin
     x = collect(1:2)
     xt = transpose(x)
     A = reshape([[1 2; 3 4], zeros(Int,2,2), zeros(Int, 2, 2), [5 6; 7 8]], 2, 2)
     D = Diagonal(A)
-    @test x'*D == x'*A == copy(x')*D == copy(x')*A
-    @test xt*D == xt*A == copy(xt)*D == copy(xt)*A
+    @test x'*D == x'*A == collect(x')*D == collect(x')*A
+    @test xt*D == xt*A == collect(xt)*D == collect(xt)*A
+    outadjxD = similar(x'*D); outtrxD = similar(xt*D);
+    mul!(outadjxD, x', D)
+    @test outadjxD == x'*D
+    mul!(outtrxD, xt, D)
+    @test outtrxD == xt*D
+
+    D1 = Diagonal([[1 2; 3 4]])
+    @test D1 * x' == D1 * collect(x') == collect(D1) * collect(x')
+    @test D1 * xt == D1 * collect(xt) == collect(D1) * collect(xt)
+    outD1adjx = similar(D1 * x'); outD1trx = similar(D1 * xt);
+    mul!(outadjxD, D1, x')
+    @test outadjxD == D1*x'
+    mul!(outtrxD, D1, xt)
+    @test outtrxD == D1*xt
+
     y = [x, x]
     yt = transpose(y)
     @test y'*D*y == (y'*D)*y == (y'*A)*y
     @test yt*D*y == (yt*D)*y == (yt*A)*y
+    outadjyD = similar(y'*D); outtryD = similar(yt*D);
+    outadjyD2 = similar(collect(y'*D)); outtryD2 = similar(collect(yt*D));
+    mul!(outadjyD, y', D)
+    mul!(outadjyD2, y', D)
+    @test outadjyD == outadjyD2 == y'*D
+    mul!(outtryD, yt, D)
+    mul!(outtryD2, yt, D)
+    @test outtryD == outtryD2 == yt*D
 end
 
-@testset "Multiplication of single element Diagonal (#36746)" begin
+@testset "Multiplication of single element Diagonal (#36746, #40726)" begin
     @test_throws DimensionMismatch Diagonal(randn(1)) * randn(5)
     @test_throws DimensionMismatch Diagonal(randn(1)) * Diagonal(randn(3, 3))
     A = [1 0; 0 2]
     v = [3, 4]
     @test Diagonal(A) * v == A * v
     @test Diagonal(A) * Diagonal(A) == A * A
+    @test_throws DimensionMismatch [1 0;0 1] * Diagonal([2 3])   # Issue #40726
+    @test_throws DimensionMismatch lmul!(Diagonal([1]), [1,2,3]) # nearby
 end
 
 @testset "Triangular division by Diagonal #27989" begin
@@ -666,6 +798,63 @@ end
     end
 end
 
+@testset "(Sym)Tridiagonal division by Diagonal" begin
+    for K in (5, 1), elty in (Float64, ComplexF32), overlength in (1, 0)
+        S = SymTridiagonal(randn(elty, K), randn(elty, K-overlength))
+        T = Tridiagonal(randn(elty, K-1), randn(elty, K), randn(elty, K-1))
+        D = Diagonal(randn(elty, K))
+        D0 = Diagonal(zeros(elty, K))
+        @test (D \ S)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(S))
+        @test (D \ T)::Tridiagonal{elty} == Tridiagonal(Matrix(D) \ Matrix(T))
+        @test (S / D)::Tridiagonal{elty} == Tridiagonal(Matrix(S) / Matrix(D))
+        @test (T / D)::Tridiagonal{elty} == Tridiagonal(Matrix(T) / Matrix(D))
+        @test_throws SingularException D0 \ S
+        @test_throws SingularException D0 \ T
+        @test_throws SingularException S / D0
+        @test_throws SingularException T / D0
+    end
+    # 0-length case
+    S = SymTridiagonal(Float64[], Float64[])
+    T = Tridiagonal(Float64[], Float64[], Float64[])
+    D = Diagonal(Float64[])
+    @test (D \ S)::Tridiagonal{Float64} == T
+    @test (D \ T)::Tridiagonal{Float64} == T
+    @test (S / D)::Tridiagonal{Float64} == T
+    @test (T / D)::Tridiagonal{Float64} == T
+    # matrix eltype case
+    K = 5
+    for elty in (Float64, ComplexF32), overlength in (1, 0)
+        S = SymTridiagonal([rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-overlength])
+        T = Tridiagonal([rand(elty, 2, 2) for _ in 1:K-1], [rand(elty, 2, 2) for _ in 1:K], [rand(elty, 2, 2) for _ in 1:K-1])
+        D = Diagonal(randn(elty, K))
+        SM = fill(zeros(elty, 2, 2), K, K)
+        TM = copy(SM)
+        SM[1,1] = S[1,1]; TM[1,1] = T[1,1]
+        for j in 2:K
+            SM[j,j-1] = S[j,j-1]; SM[j,j] = S[j,j]; SM[j-1,j] = S[j-1,j]
+            TM[j,j-1] = T[j,j-1]; TM[j,j] = T[j,j]; TM[j-1,j] = T[j-1,j]
+        end
+        for (M, Mm) in ((S, SM), (T, TM))
+            DS = D \ M
+            @test DS isa Tridiagonal
+            DM = D \ Mm
+            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
+            DS = M / D
+            @test DS isa Tridiagonal
+            DM = Mm / D
+            for i in -1:1; @test diag(DS, i) ≈ diag(DM, i) end
+        end
+    end
+    # eltype promotion case
+    S = SymTridiagonal(rand(-20:20, K), rand(-20:20, K-1))
+    T = Tridiagonal(rand(-20:20, K-1), rand(-20:20, K), rand(-20:20, K-1))
+    D = Diagonal(rand(1:20, K))
+    @test (D \ S)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(S))
+    @test (D \ T)::Tridiagonal{Float64} == Tridiagonal(Matrix(D) \ Matrix(T))
+    @test (S / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(S) / Matrix(D))
+    @test (T / D)::Tridiagonal{Float64} == Tridiagonal(Matrix(T) / Matrix(D))
+end
+
 @testset "eigenvalue sorting" begin
     D = Diagonal([0.4, 0.2, -1.3])
     @test eigvals(D) == eigen(D).values == [0.4, 0.2, -1.3] # not sorted by default
@@ -726,4 +915,127 @@ end
     @test dot(zeros(Int32, 0), Diagonal(zeros(Int, 0)), zeros(Int16, 0)) === 0
 end
 
+@testset "Diagonal(undef)" begin
+    d = Diagonal{Float32}(undef, 2)
+    @test length(d.diag) == 2
+end
+
+@testset "permutedims (#39447)" begin
+    for D in (Diagonal(zeros(5)), Diagonal(zeros(5) .+ 1im), Diagonal([[1,2],[3,4]]))
+        @test permutedims(D) === permutedims(D,(1,2)) === permutedims(D,(2,1)) === D
+        @test_throws ArgumentError permutedims(D,(1,3))
+    end
+end
+
+@testset "Inner product" begin
+    A = Diagonal(rand(10) .+ im)
+    B = Diagonal(rand(10) .+ im)
+    @test dot(A, B) ≈ dot(Matrix(A), B)
+    @test dot(A, B) ≈ dot(A, Matrix(B))
+    @test dot(A, B) ≈ dot(Matrix(A), Matrix(B))
+    @test dot(A, B) ≈ conj(dot(B, A))
+end
+
+@testset "eltype relaxation(#41015)" begin
+    A = rand(3,3)
+    for trans in (identity, adjoint, transpose)
+        @test ldiv!(trans(I(3)), A) == A
+        @test rdiv!(A, trans(I(3))) == A
+    end
+end
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    d = ImmutableArray([1, 2, 3, 4])
+    D = Diagonal(d)
+
+    @test convert(AbstractArray{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
+    @test convert(AbstractMatrix{Float64}, D)::Diagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == D
+end
+
+@testset "divisions functionality" for elty in (Int, Float64, ComplexF64)
+    B = Diagonal(rand(elty,5,5))
+    x = rand(elty)
+    @test \(x, B) == /(B, x)
+end
+
+@testset "promotion" begin
+    for (v1, v2) in (([true], [1]), ([zeros(2,2)], [zeros(Int, 2,2)]))
+        T = promote_type(eltype(v1), eltype(v2))
+        V = promote_type(typeof(v1), typeof(v2))
+        d1 = Diagonal(v1)
+        d2 = Diagonal(v2)
+        v = [d1, d2]
+        @test (@inferred eltype(v)) == Diagonal{T, V}
+    end
+    # test for a type for which promote_type doesn't lead to a concrete eltype
+    struct MyArrayWrapper{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
+       a :: A
+    end
+    Base.size(M::MyArrayWrapper) = size(M.a)
+    Base.axes(M::MyArrayWrapper) = axes(M.a)
+    Base.length(M::MyArrayWrapper) = length(M.a)
+    Base.getindex(M::MyArrayWrapper, i::Int...) = M.a[i...]
+    Base.setindex!(M::MyArrayWrapper, v, i::Int...) = M.a[i...] = v
+    d1 = Diagonal(MyArrayWrapper(1:3))
+    d2 = Diagonal(MyArrayWrapper(1.0:3.0))
+    c = [d1, d2]
+    @test c[1] == d1
+    @test c[2] == d2
+end
+
+@testset "zero and one" begin
+    D1 = Diagonal(rand(3))
+    @test D1 + zero(D1) == D1
+    @test D1 * one(D1) == D1
+    @test D1 * oneunit(D1) == D1
+    @test oneunit(D1) isa typeof(D1)
+    D2 = Diagonal([collect(reshape(1:4, 2, 2)), collect(reshape(5:8, 2, 2))])
+    @test D2 + zero(D2) == D2
+    @test D2 * one(D2) == D2
+    @test D2 * oneunit(D2) == D2
+    @test oneunit(D2) isa typeof(D2)
+    D3 = Diagonal([D2, D2]);
+    @test D3 + zero(D3) == D3
+    @test D3 * one(D3) == D3
+    @test D3 * oneunit(D3) == D3
+    @test oneunit(D3) isa typeof(D3)
+end
+
+@testset "AbstractTriangular" for (Tri, UTri) in ((UpperTriangular, UnitUpperTriangular), (LowerTriangular, UnitLowerTriangular))
+    A = randn(4, 4)
+    TriA = Tri(A)
+    UTriA = UTri(A)
+    D = Diagonal(1.0:4.0)
+    DM = Matrix(D)
+    DMF = factorize(DM)
+    outTri = similar(TriA)
+    out = similar(A)
+    # 2 args
+    for fun in (*, rmul!, rdiv!, /)
+        @test fun(copy(TriA), D)::Tri == fun(Matrix(TriA), D)
+        @test fun(copy(UTriA), D)::Tri == fun(Matrix(UTriA), D)
+    end
+    for fun in (*, lmul!, ldiv!, \)
+        @test fun(D, copy(TriA))::Tri == fun(D, Matrix(TriA))
+        @test fun(D, copy(UTriA))::Tri == fun(D, Matrix(UTriA))
+    end
+    # 3 args
+    @test outTri === ldiv!(outTri, D, TriA)::Tri == ldiv!(out, D, Matrix(TriA))
+    @test outTri === ldiv!(outTri, D, UTriA)::Tri == ldiv!(out, D, Matrix(UTriA))
+    @test outTri === mul!(outTri, D, TriA)::Tri == mul!(out, D, Matrix(TriA))
+    @test outTri === mul!(outTri, D, UTriA)::Tri == mul!(out, D, Matrix(UTriA))
+    @test outTri === mul!(outTri, TriA, D)::Tri == mul!(out, Matrix(TriA), D)
+    @test outTri === mul!(outTri, UTriA, D)::Tri == mul!(out, Matrix(UTriA), D)
+    # 5 args
+    @test outTri === mul!(outTri, D, TriA, 2, 1)::Tri == mul!(out, D, Matrix(TriA), 2, 1)
+    @test outTri === mul!(outTri, D, UTriA, 2, 1)::Tri == mul!(out, D, Matrix(UTriA), 2, 1)
+    @test outTri === mul!(outTri, TriA, D, 2, 1)::Tri == mul!(out, Matrix(TriA), D, 2, 1)
+    @test outTri === mul!(outTri, UTriA, D, 2, 1)::Tri == mul!(out, Matrix(UTriA), D, 2, 1)
+end
+
 end # module TestDiagonal
diff --git a/stdlib/LinearAlgebra/test/eigen.jl b/stdlib/LinearAlgebra/test/eigen.jl
index 2095a9304690db..4ee1845ecc385a 100644
--- a/stdlib/LinearAlgebra/test/eigen.jl
+++ b/stdlib/LinearAlgebra/test/eigen.jl
@@ -3,7 +3,7 @@
 module TestEigen
 
 using Test, LinearAlgebra, Random
-using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
+using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted, UtiAUi!
 
 n = 10
 
@@ -11,7 +11,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(12343219)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -73,6 +73,31 @@ aimg  = randn(n,n)/2
             d,v = eigen(asym_sg, a_sg'a_sg)
             @test d == f.values
             @test v == f.vectors
+
+            # solver for in-place U' \ A / U (#14896)
+            if !(eltya <: Integer)
+                for atyp in (eltya <: Real ? (Symmetric, Hermitian) : (Hermitian,))
+                    for utyp in (UpperTriangular, Diagonal)
+                        A = atyp(asym_sg)
+                        U = utyp(a_sg'a_sg)
+                        @test UtiAUi!(copy(A), U) ≈ U' \ A / U
+                    end
+                end
+            end
+
+            # matrices of different types (#14896)
+            if eltya <: Real
+                fs = eigen(Symmetric(asym_sg), a_sg'a_sg)
+                @test fs.values ≈ f.values
+                @test abs.(fs.vectors) ≈ abs.(f.vectors)  # may change sign
+                gs = eigen(Symmetric(asym_sg), Diagonal(a_sg'a_sg))
+                @test Symmetric(asym_sg)*gs.vectors ≈ (Diagonal(a_sg'a_sg)*gs.vectors) * Diagonal(gs.values)
+            end
+            fh = eigen(Hermitian(asym_sg), a_sg'a_sg)
+            @test fh.values ≈ f.values
+            @test abs.(fh.vectors) ≈ abs.(f.vectors)  # may change sign
+            gh = eigen(Hermitian(asym_sg), Diagonal(a_sg'a_sg))
+            @test Hermitian(asym_sg)*gh.vectors ≈ (Diagonal(a_sg'a_sg)*gh.vectors) * Diagonal(gh.values)
         end
         @testset "Non-symmetric generalized eigenproblem" begin
             if isa(a, Array)
@@ -82,14 +107,15 @@ aimg  = randn(n,n)/2
                 a1_nsg = view(a, 1:n1, 1:n1)
                 a2_nsg = view(a, n1+1:n2, n1+1:n2)
             end
-            f = eigen(a1_nsg, a2_nsg)
+            sortfunc = x -> real(x) + imag(x)
+            f = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
             @test a1_nsg*f.vectors ≈ (a2_nsg*f.vectors) * Diagonal(f.values)
-            @test f.values ≈ eigvals(a1_nsg, a2_nsg)
-            @test prod(f.values) ≈ prod(eigvals(a1_nsg/a2_nsg)) atol=50000ε
-            @test eigvecs(a1_nsg, a2_nsg) == f.vectors
+            @test f.values ≈ eigvals(a1_nsg, a2_nsg; sortby = sortfunc)
+            @test prod(f.values) ≈ prod(eigvals(a1_nsg/a2_nsg, sortby = sortfunc)) atol=50000ε
+            @test eigvecs(a1_nsg, a2_nsg; sortby = sortfunc) == f.vectors
             @test_throws ErrorException f.Z
 
-            d,v = eigen(a1_nsg, a2_nsg)
+            d,v = eigen(a1_nsg, a2_nsg; sortby = sortfunc)
             @test d == f.values
             @test v == f.vectors
         end
@@ -101,8 +127,12 @@ end
         @test_throws(ArgumentError, eigen(fill(eltya, 1, 1)))
         @test_throws(ArgumentError, eigen(fill(eltya, 2, 2)))
         test_matrix = rand(typeof(eltya),3,3)
-        test_matrix[2,2] = eltya
+        test_matrix[1,3] = eltya
         @test_throws(ArgumentError, eigen(test_matrix))
+        @test_throws(ArgumentError, eigen(Symmetric(test_matrix)))
+        @test_throws(ArgumentError, eigen(Hermitian(test_matrix)))
+        @test eigen(Symmetric(test_matrix, :L)) isa Eigen
+        @test eigen(Hermitian(test_matrix, :L)) isa Eigen
     end
 end
 
@@ -133,6 +163,7 @@ end
 end
 
 @testset "eigen of an Adjoint" begin
+    Random.seed!(4)
     A = randn(3,3)
     @test eigvals(A') == eigvals(copy(A'))
     @test eigen(A')   == eigen(copy(A'))
@@ -140,5 +171,37 @@ end
     @test eigmax(A') == eigmax(copy(A'))
 end
 
+@testset "equality of eigen factorizations" begin
+    A = randn(3, 3)
+    @test eigen(A) == eigen(A)
+    @test hash(eigen(A)) == hash(eigen(A))
+    @test isequal(eigen(A), eigen(A))
+end
+
+@testset "Float16" begin
+    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
+    B = eigen(A)
+    B32 = eigen(Float32.(A))
+    C = Float16[3 -2; 4 -1]
+    D = eigen(C)
+    D32 = eigen(Float32.(C))
+    F = eigen(complex(C))
+    F32 = eigen(complex(Float32.(C)))
+    @test B isa Eigen{Float16, Float16, Matrix{Float16}, Vector{Float16}}
+    @test B.values isa Vector{Float16}
+    @test B.vectors isa Matrix{Float16}
+    @test B.values ≈ B32.values
+    @test B.vectors ≈ B32.vectors
+    @test D isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
+    @test D.values isa Vector{ComplexF16}
+    @test D.vectors isa Matrix{ComplexF16}
+    @test D.values ≈ D32.values
+    @test D.vectors ≈ D32.vectors
+    @test F isa Eigen{ComplexF16, ComplexF16, Matrix{ComplexF16}, Vector{ComplexF16}}
+    @test F.values isa Vector{ComplexF16}
+    @test F.vectors isa Matrix{ComplexF16}
+    @test F.values ≈ F32.values
+    @test F.vectors ≈ F32.vectors
+end
 
 end # module TestEigen
diff --git a/stdlib/LinearAlgebra/test/factorization.jl b/stdlib/LinearAlgebra/test/factorization.jl
new file mode 100644
index 00000000000000..d200eff2f17bf6
--- /dev/null
+++ b/stdlib/LinearAlgebra/test/factorization.jl
@@ -0,0 +1,81 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module TestFactorization
+using Test, LinearAlgebra
+
+@testset "equality for factorizations - $f" for f in Any[
+    bunchkaufman,
+    cholesky,
+    x -> cholesky(x, RowMaximum()),
+    eigen,
+    hessenberg,
+    lq,
+    lu,
+    qr,
+    x -> qr(x, ColumnNorm()),
+    svd,
+    schur,
+]
+    A = randn(3, 3)
+    A = A * A' # ensure A is pos. def. and symmetric
+    F, G = f(A), f(A)
+
+    @test F == G
+    @test isequal(F, G)
+    @test hash(F) == hash(G)
+
+    f === hessenberg && continue
+
+    # change all arrays in F to have eltype Float32
+    F = typeof(F).name.wrapper(Base.mapany(1:nfields(F)) do i
+        x = getfield(F, i)
+        return x isa AbstractArray{Float64} ? Float32.(x) : x
+    end...)
+    # round all arrays in G to the nearest Float64 representable as Float32
+    G = typeof(G).name.wrapper(Base.mapany(1:nfields(G)) do i
+        x = getfield(G, i)
+        return x isa AbstractArray{Float64} ? Float64.(Float32.(x)) : x
+    end...)
+
+    @test F == G broken=!(f === eigen || f === qr)
+    @test isequal(F, G) broken=!(f === eigen || f === qr)
+    @test hash(F) == hash(G)
+end
+
+@testset "size for factorizations - $f" for f in Any[
+    bunchkaufman,
+    cholesky,
+    x -> cholesky(x, RowMaximum()),
+    hessenberg,
+    lq,
+    lu,
+    qr,
+    x -> qr(x, ColumnNorm()),
+    svd,
+]
+    A = randn(3, 3)
+    A = A * A' # ensure A is pos. def. and symmetric
+    F = f(A)
+    tF = Transpose(F)
+    aF = Adjoint(F)
+    @test size(F) == size(A)
+    @test size(tF) == size(Transpose(A))
+    @test size(aF) == size(Adjoint(A))
+end
+
+@testset "equality of QRCompactWY" begin
+    A = rand(100, 100)
+    F, G = qr(A), qr(A)
+
+    @test F == G
+    @test isequal(F, G)
+    @test hash(F) == hash(G)
+
+    G.T[28, 100] = 42
+
+    @test F != G
+    @test !isequal(F, G)
+    @test hash(F) != hash(G)
+end
+
+end
diff --git a/stdlib/LinearAlgebra/test/generic.jl b/stdlib/LinearAlgebra/test/generic.jl
index 2c2c9cc1a59a4a..77668cdb69b62b 100644
--- a/stdlib/LinearAlgebra/test/generic.jl
+++ b/stdlib/LinearAlgebra/test/generic.jl
@@ -70,6 +70,38 @@ n = 5 # should be odd
         else
             @test logabsdet(A)[2] ≈ sign(det(A))
         end
+        # logabsdet for Number"
+        x = A[1, 1] # getting a number of type elty
+        X = fill(x, 1, 1)
+        @test logabsdet(x)[1] ≈ logabsdet(X)[1]
+        @test logabsdet(x)[2] ≈ logabsdet(X)[2]
+    end
+
+    @testset "det with nonstandard Number type" begin
+        struct MyDual{T<:Real} <: Real
+            val::T
+            eps::T
+        end
+        Base.:+(x::MyDual, y::MyDual) = MyDual(x.val + y.val, x.eps + y.eps)
+        Base.:*(x::MyDual, y::MyDual) = MyDual(x.val * y.val, x.eps * y.val + y.eps * x.val)
+        Base.:/(x::MyDual, y::MyDual) = x.val / y.val
+        Base.:(==)(x::MyDual, y::MyDual) = x.val == y.val && x.eps == y.eps
+        Base.zero(::MyDual{T}) where {T} = MyDual(zero(T), zero(T))
+        Base.zero(::Type{MyDual{T}}) where {T} = MyDual(zero(T), zero(T))
+        Base.one(::MyDual{T}) where {T} = MyDual(one(T), zero(T))
+        Base.one(::Type{MyDual{T}}) where {T} = MyDual(one(T), zero(T))
+        # the following line is required for BigFloat, IDK why it doesn't work via
+        # promote_rule like for all other types
+        Base.promote_type(::Type{MyDual{BigFloat}}, ::Type{BigFloat}) = MyDual{BigFloat}
+        Base.promote_rule(::Type{MyDual{T}}, ::Type{S}) where {T,S<:Real} =
+            MyDual{promote_type(T, S)}
+        Base.promote_rule(::Type{MyDual{T}}, ::Type{MyDual{S}}) where {T,S} =
+            MyDual{promote_type(T, S)}
+        Base.convert(::Type{MyDual{T}}, x::MyDual) where {T} =
+            MyDual(convert(T, x.val), convert(T, x.eps))
+        if elty <: Real
+            @test det(triu(MyDual.(A, zero(A)))) isa MyDual
+        end
     end
 end
 
@@ -142,6 +174,10 @@ end
         @testset "Scaling with 5-argument mul!" begin
             @test mul!(copy(a), 5., a, 10, 100) == a*150
             @test mul!(copy(a), a, 5., 10, 100) == a*150
+            @test mul!(vec(copy(a)), 5., a, 10, 100) == vec(a*150)
+            @test mul!(vec(copy(a)), a, 5., 10, 100) == vec(a*150)
+            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], 5., a, 10, 100)
+            @test_throws DimensionMismatch mul!([vec(copy(a)); 0], a, 5., 10, 100)
             @test mul!(copy(a), Diagonal([1.; 2.]), a, 10, 100) == 10a.*[1; 2] .+ 100a
             @test mul!(copy(a), Diagonal([1; 2]), a, 10, 100)   == 10a.*[1; 2] .+ 100a
             @test mul!(copy(a), a, Diagonal(1.:an), 10, 100) == 10a.*Vector(1:an)' .+ 100a
@@ -189,6 +225,7 @@ end
         @test det(a) == a
         @test norm(a) == abs(a)
         @test norm(a, 0) == 1
+        @test norm(0, 0) == 0
     end
 
     @test !issymmetric(NaN16)
@@ -209,6 +246,19 @@ end
 @test norm([2.4e-322, 4.4e-323], 3) ≈ 2.4e-322
 @test_throws ArgumentError opnorm(Matrix{Float64}(undef,5,5),5)
 
+# operator norm for zero-dimensional domain is zero (see #40370)
+@testset "opnorm" begin
+    for m in (0, 1, 2)
+        @test @inferred(opnorm(fill(1,0,m))) == 0.0
+        @test @inferred(opnorm(fill(1,m,0))) == 0.0
+    end
+    for m in (1, 2)
+        @test @inferred(opnorm(fill(1im,1,m))) ≈ sqrt(m)
+        @test @inferred(opnorm(fill(1im,m,1))) ≈ sqrt(m)
+    end
+    @test @inferred(opnorm(fill(1,2,2))) ≈ 2
+end
+
 @testset "generic norm for arrays of arrays" begin
     x = Vector{Int}[[1,2], [3,4]]
     @test @inferred(norm(x)) ≈ sqrt(30)
@@ -228,12 +278,26 @@ end
     rotate!(x, y, c, s)
     @test x ≈ c*x2 + s*y2
     @test y ≈ -conj(s)*x2 + c*y2
+    @test_throws DimensionMismatch rotate!([x; x], y, c, s)
 
     x3 = copy(x)
     y3 = copy(y)
     reflect!(x, y, c, s)
     @test x ≈ c*x3 + s*y3
     @test y ≈ conj(s)*x3 - c*y3
+    @test_throws DimensionMismatch reflect!([x; x], y, c, s)
+end
+
+@testset "LinearAlgebra.reflectorApply!" begin
+    for T in (Float64, ComplexF64)
+        x = rand(T, 6)
+        τ = rand(T)
+        A = rand(T, 6)
+        B = LinearAlgebra.reflectorApply!(x, τ, copy(A))
+        C = LinearAlgebra.reflectorApply!(x, τ, reshape(copy(A), (length(A), 1)))
+        @test B[1] ≈ C[1] ≈ A[1] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))
+        @test B[2:end] ≈ C[2:end] ≈ A[2:end] - conj(τ)*(A[1] + dot(x[2:end], A[2:end]))*x[2:end]
+    end
 end
 
 @testset "LinearAlgebra.axp(b)y! for element type without commutative multiplication" begin
@@ -257,6 +321,26 @@ end
     ry = [2 8]
     @test LinearAlgebra.axpy!(α, x, rx, y, ry) == [1 1 1 1; 11 1 1 26]
 end
+
+@testset "LinearAlgebra.axp(b)y! for non strides input" begin
+    a = rand(5, 5)
+    @test LinearAlgebra.axpby!(1, Hermitian(a), 1, zeros(size(a))) == Hermitian(a)
+    @test LinearAlgebra.axpby!(1, 1.:5, 1, zeros(5)) == 1.:5
+    @test LinearAlgebra.axpy!(1, Hermitian(a), zeros(size(a))) == Hermitian(a)
+    @test LinearAlgebra.axpy!(1, 1.:5, zeros(5)) == 1.:5
+end
+
+@testset "LinearAlgebra.axp(b)y! for stride-vector like input" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        a = rand(T, 5, 5)
+        @test LinearAlgebra.axpby!(1, view(a, :, 1:5), 1, zeros(T, size(a))) == a
+        @test LinearAlgebra.axpy!(1, view(a, :, 1:5), zeros(T, size(a))) == a
+        b = view(a, 25:-2:1)
+        @test LinearAlgebra.axpby!(1, b, 1, zeros(T, size(b))) == b
+        @test LinearAlgebra.axpy!(1, b, zeros(T, size(b))) == b
+    end
+end
+
 @testset "norm and normalize!" begin
     vr = [3.0, 4.0]
     for Tr in (Float32, Float64)
@@ -291,6 +375,13 @@ end
     @test typeof(normalize([1 2 3; 4 5 6])) == Array{Float64,2}
 end
 
+@testset "normalize for scalars" begin
+    @test normalize(8.0) == 1.0
+    @test normalize(-3.0) == -1.0
+    @test normalize(-3.0, 1) == -1.0
+    @test isnan(normalize(0.0))
+end
+
 @testset "Issue #30466" begin
     @test norm([typemin(Int), typemin(Int)], Inf) == -float(typemin(Int))
     @test norm([typemin(Int), typemin(Int)], 1) == -2float(typemin(Int))
@@ -334,6 +425,11 @@ end
     @test [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]] ≈ [[1,2, [3,4]], 5.0, [6im, [7.0, 8.0]]]
 end
 
+@testset "Issue 40128" begin
+    @test det(BigInt[9 1 8 0; 0 0 8 7; 7 6 8 3; 2 9 7 7])::BigInt == -1
+    @test det(BigInt[1 big(2)^65+1; 3 4])::BigInt == (4 - 3*(big(2)^65+1))
+end
+
 # Minimal modulo number type - but not subtyping Number
 struct ModInt{n}
     k
@@ -361,13 +457,13 @@ LinearAlgebra.Transpose(a::ModInt{n}) where {n} = transpose(a)
     A = [ModInt{2}(1) ModInt{2}(0); ModInt{2}(1) ModInt{2}(1)]
     b = [ModInt{2}(1), ModInt{2}(0)]
 
-    @test A*(lu(A, Val(false))\b) == b
+    @test A*(lu(A, NoPivot())\b) == b
 
     # Needed for pivoting:
     Base.abs(a::ModInt{n}) where {n} = a
     Base.:<(a::ModInt{n}, b::ModInt{n}) where {n} = a.k < b.k
 
-    @test A*(lu(A, Val(true))\b) == b
+    @test A*(lu(A, RowMaximum())\b) == b
 end
 
 @testset "Issue 18742" begin
@@ -453,6 +549,25 @@ end
     @test all(!isnan, lmul!(false, Any[NaN]))
 end
 
+@testset "adjtrans dot" begin
+    for t in (transpose, adjoint), T in (ComplexF64, Quaternion{Float64})
+        x, y = t(rand(T, 10)), t(rand(T, 10))
+        X, Y = copy(x), copy(y)
+        @test dot(x, y) ≈ dot(X, Y)
+        x, y = t([rand(T, 2, 2) for _ in 1:5]), t([rand(T, 2, 2) for _ in 1:5])
+        X, Y = copy(x), copy(y)
+        @test dot(x, y) ≈ dot(X, Y)
+        x, y = t(rand(T, 10, 5)), t(rand(T, 10, 5))
+        X, Y = copy(x), copy(y)
+        @test dot(x, y) ≈ dot(X, Y)
+        x = t([rand(T, 2, 2) for _ in 1:5, _ in 1:5])
+        y = t([rand(T, 2, 2) for _ in 1:5, _ in 1:5])
+        X, Y = copy(x), copy(y)
+        @test dot(x, y) ≈ dot(X, Y)
+        x, y = t([rand(T, 2, 2) for _ in 1:5]), t([rand(T, 2, 2) for _ in 1:5])
+    end
+end
+
 @testset "generalized dot #32739" begin
     for elty in (Int, Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat})
         n = 10
diff --git a/stdlib/LinearAlgebra/test/hessenberg.jl b/stdlib/LinearAlgebra/test/hessenberg.jl
index 62cafecb576092..b2b23caac6865d 100644
--- a/stdlib/LinearAlgebra/test/hessenberg.jl
+++ b/stdlib/LinearAlgebra/test/hessenberg.jl
@@ -4,6 +4,10 @@ module TestHessenberg
 
 using Test, LinearAlgebra, Random
 
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
+using .Main.Furlongs
+
 # for tuple tests below
 ≅(x,y) = all(p -> p[1] ≈ p[2], zip(x,y))
 
@@ -55,6 +59,71 @@ let n = 10
         H = UpperHessenberg(Areal)
         @test Array(Hc + H) == Array(Hc) + Array(H)
         @test Array(Hc - H) == Array(Hc) - Array(H)
+        @testset "Preserve UpperHessenberg shape (issue #39388)" begin
+            for H = (UpperHessenberg(Areal), UpperHessenberg(Furlong.(Areal)))
+                if eltype(H) <: Furlong
+                    A = Furlong.(rand(n,n))
+                    d = Furlong.(rand(n))
+                    dl = Furlong.(rand(n-1))
+                    du = Furlong.(rand(n-1))
+                    us = Furlong(1)*I
+                else
+                    A = rand(n,n)
+                    d = rand(n)
+                    dl = rand(n-1)
+                    du = rand(n-1)
+                    us = 1*I
+                end
+                @testset "$op" for op = (+,-)
+                    for x = (us, Diagonal(d), Bidiagonal(d,dl,:U), Bidiagonal(d,dl,:L),
+                             Tridiagonal(dl,d,du), SymTridiagonal(d,dl),
+                             UpperTriangular(A), UnitUpperTriangular(A))
+                        @test op(H,x) == op(Array(H),x)
+                        @test op(x,H) == op(x,Array(H))
+                        @test op(H,x) isa UpperHessenberg
+                        @test op(x,H) isa UpperHessenberg
+                    end
+                end
+            end
+            H = UpperHessenberg(Areal)
+            A = randn(n,n)
+            d = randn(n)
+            dl = randn(n-1)
+            @testset "Multiplication/division" begin
+                for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U),
+                            UpperTriangular(A), UnitUpperTriangular(A))
+                    @test (H*x)::UpperHessenberg == Array(H)*x
+                    @test (x*H)::UpperHessenberg == x*Array(H)
+                    @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa UpperTriangular
+                    @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa UpperTriangular
+                    @test H/x isa UpperHessenberg
+                    @test x\H isa UpperHessenberg
+                end
+                x = Bidiagonal(d, dl, :L)
+                @test H*x == Array(H)*x
+                @test x*H == x*Array(H)
+                @test H/x == Array(H)/x
+                @test x\H == x\Array(H)
+            end
+            H = UpperHessenberg(Furlong.(Areal))
+            for A in (A, Furlong.(A))
+                @testset "Multiplication/division Furlong" begin
+                    for x = (5, 5I, Diagonal(d), Bidiagonal(d,dl,:U))
+                        @test (H*x)::UpperHessenberg == Array(H)*x
+                        @test (x*H)::UpperHessenberg == x*Array(H)
+                        @test H/x == Array(H)/x broken = eltype(H) <: Furlong && x isa UpperTriangular
+                        @test x\H == x\Array(H) broken = eltype(H) <: Furlong && x isa UpperTriangular
+                        @test H/x isa UpperHessenberg
+                        @test x\H isa UpperHessenberg
+                    end
+                    x = Bidiagonal(d, dl, :L)
+                    @test H*x == Array(H)*x
+                    @test x*H == x*Array(H)
+                    @test H/x == Array(H)/x
+                    @test x\H == x\Array(H)
+                end
+            end
+        end
     end
 
     @testset for eltya in (Float32, Float64, ComplexF32, ComplexF64, Int), herm in (false, true)
@@ -133,4 +202,27 @@ end
     @test Base.propertynames(F, true) == (:Q, :H, :μ, :τ, :factors, :uplo)
 end
 
+@testset "adjoint of Hessenberg" begin
+    Ar = randn(5, 5)
+    Ac = complex.(randn(5, 5), randn(5, 5))
+    b = ones(size(Ar, 1))
+
+    for A in (Ar, Ac)
+        F = hessenberg(A)
+        @test A'\b ≈ F'\b
+    end
+end
+
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    A = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
+    H = UpperHessenberg(A)
+
+    @test convert(AbstractArray{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
+    @test convert(AbstractMatrix{Float64}, H)::UpperHessenberg{Float64,ImmutableArray{Float64,2,Array{Float64,2}}} == H
+end
+
 end # module TestHessenberg
diff --git a/stdlib/LinearAlgebra/test/lapack.jl b/stdlib/LinearAlgebra/test/lapack.jl
index 207d95de17d158..dfcf3c89dac2a8 100644
--- a/stdlib/LinearAlgebra/test/lapack.jl
+++ b/stdlib/LinearAlgebra/test/lapack.jl
@@ -407,10 +407,10 @@ end
     @testset for elty in (Float32, Float64)
         d = rand(elty,10)
         e = rand(elty,9)
-        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,10))
+        @test_throws DimensionMismatch LAPACK.stev!('U',d,rand(elty,11))
         @test_throws DimensionMismatch LAPACK.stebz!('A','B',zero(elty),zero(elty),0,0,-1.,d,rand(elty,10))
-        @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,rand(elty,10),zero(elty),zero(elty),0,0)
-        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,10),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
+        @test_throws DimensionMismatch LAPACK.stegr!('N','A',d,rand(elty,11),zero(elty),zero(elty),0,0)
+        @test_throws DimensionMismatch LAPACK.stein!(d,zeros(elty,11),zeros(elty,10),zeros(BlasInt,10),zeros(BlasInt,10))
         @test_throws DimensionMismatch LAPACK.stein!(d,e,zeros(elty,11),zeros(BlasInt,10),zeros(BlasInt,10))
     end
 end
@@ -702,4 +702,10 @@ let A = [NaN NaN; NaN NaN]
     @test_throws ArgumentError eigen(A)
 end
 
+# Issue #42762 https://github.com/JuliaLang/julia/issues/42762
+# Tests geqrf! and gerqf! with null column dimensions
+a = zeros(2,0), zeros(0)
+@test LinearAlgebra.LAPACK.geqrf!(a...) === a
+@test LinearAlgebra.LAPACK.gerqf!(a...) === a
+
 end # module TestLAPACK
diff --git a/stdlib/LinearAlgebra/test/lq.jl b/stdlib/LinearAlgebra/test/lq.jl
index 6e21b9ac6c1ccd..96f31ded78d6dd 100644
--- a/stdlib/LinearAlgebra/test/lq.jl
+++ b/stdlib/LinearAlgebra/test/lq.jl
@@ -40,7 +40,7 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                 lqa   = lq(a)
                 x = lqa\b
                 l,q   = lqa.L, lqa.Q
-                qra   = qr(a, Val(true))
+                qra   = qr(a, ColumnNorm())
                 @testset "Basic ops" begin
                     @test size(lqa,1) == size(a,1)
                     @test size(lqa,3) == 1
@@ -56,9 +56,6 @@ rectangularQ(Q::LinearAlgebra.LQPackedQ) = convert(Array, Q)
                     @test l*q ≈ a
                     @test Array(lqa) ≈ a
                     @test Array(copy(lqa)) ≈ a
-                    lstring = sprint(show, l, context = :compact=>true)
-                    qstring = sprint(show, q, context = :compact=>true)
-                    @test sprint(show,MIME"text/plain"(),lqa) == "$(typeof(lqa)) with factors L and Q:\n$lstring\n$qstring"
                     @test LinearAlgebra.Factorization{eltya}(lqa) === lqa
                     @test Matrix{eltya}(q) isa Matrix{eltya}
                     # test Array{T}(LQPackedQ{T})
@@ -203,4 +200,42 @@ end
     end
 end
 
+@testset "REPL printing" begin
+    bf = IOBuffer()
+    show(bf, "text/plain", lq(Matrix(I, 4, 4)))
+    seekstart(bf)
+    @test String(take!(bf)) == """
+LinearAlgebra.LQ{Float64, Matrix{Float64}, Vector{Float64}}
+L factor:
+4×4 Matrix{Float64}:
+ 1.0  0.0  0.0  0.0
+ 0.0  1.0  0.0  0.0
+ 0.0  0.0  1.0  0.0
+ 0.0  0.0  0.0  1.0
+Q factor:
+4×4 LinearAlgebra.LQPackedQ{Float64, Matrix{Float64}, Vector{Float64}}:
+ 1.0  0.0  0.0  0.0
+ 0.0  1.0  0.0  0.0
+ 0.0  0.0  1.0  0.0
+ 0.0  0.0  0.0  1.0"""
+end
+
+@testset "adjoint of LQ" begin
+    n = 5
+
+    for b in (ones(n), ones(n, 2), ones(Complex{Float64}, n, 2))
+        for A in (
+            randn(n, n),
+            # Tall problems become least squares problems similarly to QR
+            randn(n - 2, n),
+            complex.(randn(n, n), randn(n, n)))
+
+            F = lq(A)
+            @test A'\b ≈ F'\b
+        end
+        @test_throws DimensionMismatch lq(randn(n, n + 2))'\b
+    end
+
+end
+
 end # module TestLQ
diff --git a/stdlib/LinearAlgebra/test/lu.jl b/stdlib/LinearAlgebra/test/lu.jl
index 55c62e21a68fbb..e86cd583c0904d 100644
--- a/stdlib/LinearAlgebra/test/lu.jl
+++ b/stdlib/LinearAlgebra/test/lu.jl
@@ -11,7 +11,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(1234324)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -37,7 +37,7 @@ dimg  = randn(n)/2
     else
         convert(Tridiagonal{eltya}, Tridiagonal(dlreal, dreal, dureal))
     end
-    ε = εa = eps(abs(float(one(eltya))))
+    εa = eps(abs(float(one(eltya))))
 
     if eltya <: BlasFloat
         @testset "LU factorization for Number" begin
@@ -61,7 +61,7 @@ dimg  = randn(n)/2
         lua   = factorize(a)
         @test_throws ErrorException lua.Z
         l,u,p = lua.L, lua.U, lua.p
-        ll,ul,pl = lu(a)
+        ll,ul,pl = @inferred lu(a)
         @test ll * ul ≈ a[pl,:]
         @test l*u ≈ a[p,:]
         @test (l*u)[invperm(p),:] ≈ a
@@ -71,7 +71,7 @@ dimg  = randn(n)/2
             # test conversion of LU factorization's numerical type
             bft = eltya <: Real ? LinearAlgebra.LU{BigFloat} : LinearAlgebra.LU{Complex{BigFloat}}
             bflua = convert(bft, lua)
-            @test bflua.L*bflua.U ≈ big.(a)[p,:] rtol=ε
+            @test bflua.L*bflua.U ≈ big.(a)[p,:] rtol=εa*norm(a)
             @test Factorization{eltya}(lua) === lua
             # test Factorization with different eltype
             if eltya <: BlasReal
@@ -85,9 +85,9 @@ dimg  = randn(n)/2
     end
     κd    = cond(Array(d),1)
     @testset "Tridiagonal LU" begin
-        lud   = lu(d)
+        lud = @inferred lu(d)
         @test LinearAlgebra.issuccess(lud)
-        @test lu(lud) == lud
+        @test @inferred(lu(lud)) == lud
         @test_throws ErrorException lud.Z
         @test lud.L*lud.U ≈ lud.P*Array(d)
         @test lud.L*lud.U ≈ Array(d)[lud.p,:]
@@ -175,7 +175,10 @@ dimg  = randn(n)/2
                         end
                     end
                     if eltya <: Complex
-                        @test norm((lud'\bb) - Array(d')\bb, 1) < ε*κd*n*2 # Two because the right hand side has two columns
+                        dummy_factor = 2.5
+                        # TODO: Remove dummy_factor, this test started failing when the RNG stream changed
+                        # so the factor was added.
+                        @test norm((lud'\bb) - Array(d')\bb, 1) < ε*κd*n*2*dummy_factor # Two because the right hand side has two columns
                     end
                 end
             end
@@ -199,17 +202,26 @@ dimg  = randn(n)/2
             @test lua.L*lua.U ≈ lua.P*a[:,1:n1]
         end
         @testset "Fat LU" begin
-            lua   = lu(a[1:n1,:])
+            lua   = @inferred lu(a[1:n1,:])
             @test lua.L*lua.U ≈ lua.P*a[1:n1,:]
         end
     end
 
     @testset "LU of Symmetric/Hermitian" begin
         for HS in (Hermitian(a'a), Symmetric(a'a))
-            luhs = lu(HS)
+            luhs = @inferred lu(HS)
             @test luhs.L*luhs.U ≈ luhs.P*Matrix(HS)
         end
     end
+
+    @testset "Factorization of symtridiagonal dense matrix with zero ldlt-pivot (#38026)" begin
+        A = [0.0 -1.0 0.0 0.0
+            -1.0 0.0 0.0 0.0
+            0.0 0.0 0.0 -1.0
+            0.0 0.0 -1.0 0.0]
+        F = factorize(A)
+        @test all((!isnan).(Matrix(F)))
+    end
 end
 
 @testset "Singular matrices" for T in (Float64, ComplexF64)
@@ -220,19 +232,19 @@ end
     @test_throws SingularException lu!(copy(A); check = true)
     @test !issuccess(lu(A; check = false))
     @test !issuccess(lu!(copy(A); check = false))
-    @test_throws ZeroPivotException lu(A, Val(false))
-    @test_throws ZeroPivotException lu!(copy(A), Val(false))
-    @test_throws ZeroPivotException lu(A, Val(false); check = true)
-    @test_throws ZeroPivotException lu!(copy(A), Val(false); check = true)
-    @test !issuccess(lu(A, Val(false); check = false))
-    @test !issuccess(lu!(copy(A), Val(false); check = false))
+    @test_throws ZeroPivotException lu(A, NoPivot())
+    @test_throws ZeroPivotException lu!(copy(A), NoPivot())
+    @test_throws ZeroPivotException lu(A, NoPivot(); check = true)
+    @test_throws ZeroPivotException lu!(copy(A), NoPivot(); check = true)
+    @test !issuccess(lu(A, NoPivot(); check = false))
+    @test !issuccess(lu!(copy(A), NoPivot(); check = false))
     F = lu(A; check = false)
     @test sprint((io, x) -> show(io, "text/plain", x), F) ==
         "Failed factorization of type $(typeof(F))"
 end
 
 @testset "conversion" begin
-    Random.seed!(3)
+    Random.seed!(4)
     a = Tridiagonal(rand(9),rand(10),rand(9))
     fa = Array(a)
     falu = lu(fa)
@@ -284,7 +296,7 @@ end
         show(bf, "text/plain", lu(Matrix(I, 4, 4)))
         seekstart(bf)
         @test String(take!(bf)) == """
-LinearAlgebra.LU{Float64, Matrix{Float64}}
+LinearAlgebra.LU{Float64, Matrix{Float64}, Vector{$Int}}
 L factor:
 4×4 Matrix{Float64}:
  1.0  0.0  0.0  0.0
@@ -311,10 +323,26 @@ include("trickyarithmetic.jl")
 @testset "lu with type whose sum is another type" begin
     A = TrickyArithmetic.A[1 2; 3 4]
     ElT = TrickyArithmetic.D{TrickyArithmetic.C,TrickyArithmetic.C}
-    B = lu(A, Val(false))
+    B = lu(A, NoPivot())
     @test B isa LinearAlgebra.LU{ElT,Matrix{ElT}}
 end
 
+# dimensional correctness:
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :Furlongs) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Furlongs.jl"))
+using .Main.Furlongs
+
+@testset "lu factorization with dimension type" begin
+    n = 4
+    A = Matrix(Furlong(1.0) * I, n, n)
+    F = lu(A).factors
+    @test Diagonal(F) == Diagonal(A)
+    # upper triangular part has a unit Furlong{1}
+    @test all(x -> typeof(x) == Furlong{1, Float64}, F[i,j] for j=1:n for i=1:j)
+    # lower triangular part is unitless Furlong{0}
+    @test all(x -> typeof(x) == Furlong{0, Float64}, F[i,j] for j=1:n for i=j+1:n)
+end
+
 @testset "Issue #30917. Determinant of integer matrix" begin
     @test det([1 1 0 0 1 0 0 0
                1 0 1 0 0 1 0 0
@@ -373,4 +401,38 @@ end
         @test a == c
     end
 end
+
+@testset "lu on *diagonal matrices" begin
+    dl = rand(3)
+    d = rand(4)
+    Bl = Bidiagonal(d, dl, :L)
+    Bu = Bidiagonal(d, dl, :U)
+    Tri = Tridiagonal(dl, d, dl)
+    Sym = SymTridiagonal(d, dl)
+    D = Diagonal(d)
+    b = ones(4)
+    B = rand(4,4)
+    for A in (Bl, Bu, Tri, Sym, D), pivot in (NoPivot(), RowMaximum())
+        @test A\b ≈ lu(A, pivot)\b
+        @test B/A ≈ B/lu(A, pivot)
+        @test B/A ≈ B/Matrix(A)
+        @test Matrix(lu(A, pivot)) ≈ A
+        @test @inferred(lu(A)) isa LU
+        if A isa Union{Bidiagonal, Diagonal, Tridiagonal, SymTridiagonal}
+            @test lu(A) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
+            @test lu(A, pivot) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
+            @test lu(A, pivot; check = false) isa LU{Float64, Tridiagonal{Float64, Vector{Float64}}}
+        end
+    end
+end
+
+@testset "can push to vector after 3-arg ldiv! (#43507)" begin
+    u = rand(3)
+    A = rand(3,3)
+    b = rand(3)
+    ldiv!(u,lu(A),b)
+    push!(b,4.0)
+    @test length(b) == 4
+end
+
 end # module TestLU
diff --git a/stdlib/LinearAlgebra/test/matmul.jl b/stdlib/LinearAlgebra/test/matmul.jl
index 766c7be26db426..20744f104936ea 100644
--- a/stdlib/LinearAlgebra/test/matmul.jl
+++ b/stdlib/LinearAlgebra/test/matmul.jl
@@ -10,101 +10,101 @@ using LinearAlgebra: mul!
 
 @testset "matrices with zero dimensions" begin
     for (dimsA, dimsB, dimsC) in (
-            ((0,5), (5,3), (0,3)),
-            ((3,5), (5,0), (3,0)),
-            ((3,0), (0,4), (3,4)),
-            ((0,5), (5,0), (0,0)),
-            ((0,0), (0,4), (0,4)),
-            ((3,0), (0,0), (3,0)),
-            ((0,0), (0,0), (0,0)) )
+        ((0, 5), (5, 3), (0, 3)),
+        ((3, 5), (5, 0), (3, 0)),
+        ((3, 0), (0, 4), (3, 4)),
+        ((0, 5), (5, 0), (0, 0)),
+        ((0, 0), (0, 4), (0, 4)),
+        ((3, 0), (0, 0), (3, 0)),
+        ((0, 0), (0, 0), (0, 0)))
         @test Matrix{Float64}(undef, dimsA) * Matrix{Float64}(undef, dimsB) == zeros(dimsC)
     end
-    @test Matrix{Float64}(undef, 5, 0) |> t -> t't == zeros(0,0)
-    @test Matrix{Float64}(undef, 5, 0) |> t -> t*t' == zeros(5,5)
-    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t't == zeros(0,0)
-    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t*t' == zeros(5,5)
+    @test Matrix{Float64}(undef, 5, 0) |> t -> t't == zeros(0, 0)
+    @test Matrix{Float64}(undef, 5, 0) |> t -> t * t' == zeros(5, 5)
+    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t't == zeros(0, 0)
+    @test Matrix{ComplexF64}(undef, 5, 0) |> t -> t * t' == zeros(5, 5)
 end
 @testset "2x2 matmul" begin
     AA = [1 2; 3 4]
     BB = [5 6; 7 8]
-    AAi = AA+(0.5*im).*BB
-    BBi = BB+(2.5*im).*AA[[2,1],[2,1]]
+    AAi = AA + (0.5 * im) .* BB
+    BBi = BB + (2.5 * im) .* AA[[2, 1], [2, 1]]
     for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2))
-        @test A*B == [19 22; 43 50]
+        @test A * B == [19 22; 43 50]
         @test *(transpose(A), B) == [26 30; 38 44]
         @test *(A, transpose(B)) == [17 23; 39 53]
         @test *(transpose(A), transpose(B)) == [23 31; 34 46]
     end
     for Ai in (copy(AAi), view(AAi, 1:2, 1:2)), Bi in (copy(BBi), view(BBi, 1:2, 1:2))
-        @test Ai*Bi == [-21+53.5im -4.25+51.5im; -12+95.5im 13.75+85.5im]
+        @test Ai * Bi == [-21+53.5im -4.25+51.5im; -12+95.5im 13.75+85.5im]
         @test *(adjoint(Ai), Bi) == [68.5-12im 57.5-28im; 88-3im 76.5-25im]
         @test *(Ai, adjoint(Bi)) == [64.5+5.5im 43+31.5im; 104-18.5im 80.5+31.5im]
         @test *(adjoint(Ai), adjoint(Bi)) == [-28.25-66im 9.75-58im; -26-89im 21-73im]
         @test_throws DimensionMismatch [1 2; 0 0; 0 0] * [1 2]
     end
-    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef,3,3), AA, BB)
+    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 3, 3), AA, BB)
 end
 @testset "3x3 matmul" begin
-    AA = [1 2 3; 4 5 6; 7 8 9].-5
+    AA = [1 2 3; 4 5 6; 7 8 9] .- 5
     BB = [1 0 5; 6 -10 3; 2 -4 -1]
-    AAi = AA+(0.5*im).*BB
-    BBi = BB+(2.5*im).*AA[[2,1,3],[2,3,1]]
+    AAi = AA + (0.5 * im) .* BB
+    BBi = BB + (2.5 * im) .* AA[[2, 1, 3], [2, 3, 1]]
     for A in (copy(AA), view(AA, 1:3, 1:3)), B in (copy(BB), view(BB, 1:3, 1:3))
-        @test A*B == [-26 38 -27; 1 -4 -6; 28 -46 15]
+        @test A * B == [-26 38 -27; 1 -4 -6; 28 -46 15]
         @test *(adjoint(A), B) == [-6 2 -25; 3 -12 -18; 12 -26 -11]
         @test *(A, adjoint(B)) == [-14 0 6; 4 -3 -3; 22 -6 -12]
         @test *(adjoint(A), adjoint(B)) == [6 -8 -6; 12 -9 -9; 18 -10 -12]
     end
     for Ai in (copy(AAi), view(AAi, 1:3, 1:3)), Bi in (copy(BBi), view(BBi, 1:3, 1:3))
-        @test Ai*Bi == [-44.75+13im 11.75-25im -38.25+30im; -47.75-16.5im -51.5+51.5im -56+6im; 16.75-4.5im -53.5+52im -15.5im]
+        @test Ai * Bi == [-44.75+13im 11.75-25im -38.25+30im; -47.75-16.5im -51.5+51.5im -56+6im; 16.75-4.5im -53.5+52im -15.5im]
         @test *(adjoint(Ai), Bi) == [-21+2im -1.75+49im -51.25+19.5im; 25.5+56.5im -7-35.5im 22+35.5im; -3+12im -32.25+43im -34.75-2.5im]
         @test *(Ai, adjoint(Bi)) == [-20.25+15.5im -28.75-54.5im 22.25+68.5im; -12.25+13im -15.5+75im -23+27im; 18.25+im 1.5+94.5im -27-54.5im]
         @test *(adjoint(Ai), adjoint(Bi)) == [1+2im 20.75+9im -44.75+42im; 19.5+17.5im -54-36.5im 51-14.5im; 13+7.5im 11.25+31.5im -43.25-14.5im]
         @test_throws DimensionMismatch [1 2 3; 0 0 0; 0 0 0] * [1 2 3]
     end
-    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef,4,4), AA, BB)
+    @test_throws DimensionMismatch mul!(Matrix{Float64}(undef, 4, 4), AA, BB)
 end
 
 # Generic AbstractArrays
 module MyArray15367
-    using Test, Random
+using Test, Random
 
-    struct MyArray{T,N} <: AbstractArray{T,N}
-        data::Array{T,N}
-    end
+struct MyArray{T,N} <: AbstractArray{T,N}
+    data::Array{T,N}
+end
 
-    Base.size(A::MyArray) = size(A.data)
-    Base.getindex(A::MyArray, indices...) = A.data[indices...]
+Base.size(A::MyArray) = size(A.data)
+Base.getindex(A::MyArray, indices...) = A.data[indices...]
 
-    A = MyArray(rand(4,5))
-    b = rand(5)
-    @test A*b ≈ A.data*b
+A = MyArray(rand(4, 5))
+b = rand(5)
+@test A * b ≈ A.data * b
 end
 
 @testset "Generic integer matrix multiplication" begin
     AA = [1 2 3; 4 5 6] .- 3
     BB = [2 -2; 3 -5; -4 7]
     for A in (copy(AA), view(AA, 1:2, 1:3)), B in (copy(BB), view(BB, 1:3, 1:2))
-        @test A*B == [-7 9; -4 9]
+        @test A * B == [-7 9; -4 9]
         @test *(transpose(A), transpose(B)) == [-6 -11 15; -6 -13 18; -6 -15 21]
     end
     AA = fill(1, 2, 100)
     BB = fill(1, 100, 3)
     for A in (copy(AA), view(AA, 1:2, 1:100)), B in (copy(BB), view(BB, 1:100, 1:3))
-        @test A*B == [100 100 100; 100 100 100]
+        @test A * B == [100 100 100; 100 100 100]
     end
     AA = rand(1:20, 5, 5) .- 10
     BB = rand(1:20, 5, 5) .- 10
     CC = Matrix{Int}(undef, size(AA, 1), size(BB, 2))
     for A in (copy(AA), view(AA, 1:5, 1:5)), B in (copy(BB), view(BB, 1:5, 1:5)), C in (copy(CC), view(CC, 1:5, 1:5))
-        @test *(transpose(A), B) == A'*B
-        @test *(A, transpose(B)) == A*B'
+        @test *(transpose(A), B) == A' * B
+        @test *(A, transpose(B)) == A * B'
         # Preallocated
-        @test mul!(C, A, B) == A*B
-        @test mul!(C, transpose(A), B) == A'*B
-        @test mul!(C, A, transpose(B)) == A*B'
-        @test mul!(C, transpose(A), transpose(B)) == A'*B'
-        @test LinearAlgebra.mul!(C, adjoint(A), transpose(B)) == A'*transpose(B)
+        @test mul!(C, A, B) == A * B
+        @test mul!(C, transpose(A), B) == A' * B
+        @test mul!(C, A, transpose(B)) == A * B'
+        @test mul!(C, transpose(A), transpose(B)) == A' * B'
+        @test LinearAlgebra.mul!(C, adjoint(A), transpose(B)) == A' * transpose(B)
 
         # Inplace multiply-add
         α = rand(-10:10)
@@ -113,17 +113,17 @@ end
         βC = β * C
         _C0 = copy(C)
         C0() = (C .= _C0; C)  # reset C but don't change the container type
-        @test mul!(C0(), A, B, α, β) == α*A*B .+ βC
-        @test mul!(C0(), transpose(A), B, α, β) == α*A'*B .+ βC
-        @test mul!(C0(), A, transpose(B), α, β) == α*A*B' .+ βC
-        @test mul!(C0(), transpose(A), transpose(B), α, β) == α*A'*B' .+ βC
-        @test mul!(C0(), adjoint(A), transpose(B), α, β) == α*A'*transpose(B) .+ βC
+        @test mul!(C0(), A, B, α, β) == α * A * B .+ βC
+        @test mul!(C0(), transpose(A), B, α, β) == α * A' * B .+ βC
+        @test mul!(C0(), A, transpose(B), α, β) == α * A * B' .+ βC
+        @test mul!(C0(), transpose(A), transpose(B), α, β) == α * A' * B' .+ βC
+        @test mul!(C0(), adjoint(A), transpose(B), α, β) == α * A' * transpose(B) .+ βC
 
         #test DimensionMismatch for generic_matmatmul
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(A), transpose(fill(1,4,4)))
-        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(fill(1,4,4)), transpose(B))
+        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(A), transpose(fill(1, 4, 4)))
+        @test_throws DimensionMismatch LinearAlgebra.mul!(C, adjoint(fill(1, 4, 4)), transpose(B))
     end
-    vv = [1,2]
+    vv = [1, 2]
     CC = Matrix{Int}(undef, 2, 2)
     for v in (copy(vv), view(vv, 1:2)), C in (copy(CC), view(CC, 1:2, 1:2))
         @test @inferred(mul!(C, v, adjoint(v))) == [1 2; 2 4]
@@ -134,36 +134,36 @@ end
 end
 
 @testset "generic_matvecmul" begin
-    AA = rand(5,5)
+    AA = rand(5, 5)
     BB = rand(5)
     for A in (copy(AA), view(AA, 1:5, 1:5)), B in (copy(BB), view(BB, 1:5))
-        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(zeros(6),'N',A,B)
-        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(B,'N',A,zeros(6))
+        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(zeros(6), 'N', A, B)
+        @test_throws DimensionMismatch LinearAlgebra.generic_matvecmul!(B, 'N', A, zeros(6))
     end
-    vv = [1,2,3]
+    vv = [1, 2, 3]
     CC = Matrix{Int}(undef, 3, 3)
     for v in (copy(vv), view(vv, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
-        @test mul!(C, v, transpose(v)) == v*v'
+        @test mul!(C, v, transpose(v)) == v * v'
         C .= C0 = rand(-10:10, size(C))
-        @test mul!(C, v, transpose(v), 2, 3) == 2v*v' .+ 3C0
+        @test mul!(C, v, transpose(v), 2, 3) == 2v * v' .+ 3C0
     end
-    vvf = map(Float64,vv)
+    vvf = map(Float64, vv)
     CC = Matrix{Float64}(undef, 3, 3)
     for vf in (copy(vvf), view(vvf, 1:3)), C in (copy(CC), view(CC, 1:3, 1:3))
-        @test mul!(C, vf, transpose(vf)) == vf*vf'
+        @test mul!(C, vf, transpose(vf)) == vf * vf'
         C .= C0 = rand(eltype(C), size(C))
-        @test mul!(C, vf, transpose(vf), 2, 3) == 2vf*vf' .+ 3C0
+        @test mul!(C, vf, transpose(vf), 2, 3) ≈ 2vf * vf' .+ 3C0
     end
 end
 
 @testset "fallbacks & such for BlasFloats" begin
-    AA = rand(Float64,6,6)
-    BB = rand(Float64,6,6)
-    CC = zeros(Float64,6,6)
+    AA = rand(Float64, 6, 6)
+    BB = rand(Float64, 6, 6)
+    CC = zeros(Float64, 6, 6)
     for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A)*transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A*transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A)*B
+        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
+        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
+        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
 
         # Inplace multiply-add
         α = rand(Float64)
@@ -172,98 +172,178 @@ end
         βC = β * C
         _C0 = copy(C)
         C0() = (C .= _C0; C)  # reset C but don't change the container type
-        @test mul!(C0(), transpose(A), transpose(B), α, β) ≈ α*transpose(A)*transpose(B) .+ βC
-        @test mul!(C0(), A, adjoint(B), α, β) ≈ α*A*transpose(B) .+ βC
-        @test mul!(C0(), adjoint(A), B, α, β) ≈ α*transpose(A)*B .+ βC
+        @test mul!(C0(), transpose(A), transpose(B), α, β) ≈ α * transpose(A) * transpose(B) .+ βC
+        @test mul!(C0(), A, adjoint(B), α, β) ≈ α * A * transpose(B) .+ βC
+        @test mul!(C0(), adjoint(A), B, α, β) ≈ α * transpose(A) * B .+ βC
     end
 end
 
 @testset "mixed Blas-non-Blas matmul" begin
-    AA = rand(-10:10,6,6)
-    BB = rand(Float64,6,6)
-    CC = zeros(Float64,6,6)
+    AA = rand(-10:10, 6, 6)
+    BB = rand(Float64, 6, 6)
+    CC = zeros(Float64, 6, 6)
     for A in (copy(AA), view(AA, 1:6, 1:6)), B in (copy(BB), view(BB, 1:6, 1:6)), C in (copy(CC), view(CC, 1:6, 1:6))
-        @test LinearAlgebra.mul!(C, A, B) == A*B
-        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A)*transpose(B)
-        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A*transpose(B)
-        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A)*B
+        @test LinearAlgebra.mul!(C, A, B) == A * B
+        @test LinearAlgebra.mul!(C, transpose(A), transpose(B)) == transpose(A) * transpose(B)
+        @test LinearAlgebra.mul!(C, A, adjoint(B)) == A * transpose(B)
+        @test LinearAlgebra.mul!(C, adjoint(A), B) == transpose(A) * B
     end
 end
 
 @testset "matrix algebra with subarrays of floats (stride != 1)" begin
-    A = reshape(map(Float64,1:20),5,4)
-    Aref = A[1:2:end,1:2:end]
+    A = reshape(map(Float64, 1:20), 5, 4)
+    Aref = A[1:2:end, 1:2:end]
     Asub = view(A, 1:2:5, 1:2:4)
-    b = [1.2,-2.5]
-    @test (Aref*b) == (Asub*b)
+    b = [1.2, -2.5]
+    @test (Aref * b) == (Asub * b)
     @test *(transpose(Asub), Asub) == *(transpose(Aref), Aref)
     @test *(Asub, transpose(Asub)) == *(Aref, transpose(Aref))
     Ai = A .+ im
-    Aref = Ai[1:2:end,1:2:end]
+    Aref = Ai[1:2:end, 1:2:end]
     Asub = view(Ai, 1:2:5, 1:2:4)
     @test *(adjoint(Asub), Asub) == *(adjoint(Aref), Aref)
     @test *(Asub, adjoint(Asub)) == *(Aref, adjoint(Aref))
 end
 
-@testset "Complex matrix x real MatOrVec etc (issue #29224)" for T1 in (Float32,Float64)
-    for T2 in (Float32,Float64)
-        for arg1_real in (true,false)
-            @testset "Combination $T1 $T2 $arg1_real $arg2_real" for arg2_real in (true,false)
-                A0 = reshape(Vector{T1}(1:25),5,5) .+
-                   (arg1_real ? 0 : 1im*reshape(Vector{T1}(-3:21),5,5))
-                A = view(A0,1:2,1:2)
-                B = Matrix{T2}([1.0 3.0; -1.0 2.0]).+
-                    (arg2_real ? 0 : 1im*Matrix{T2}([3.0 4; -1 10]))
-                AB_correct = copy(A)*B
-                AB = A*B;  # view times matrix
-                @test AB ≈ AB_correct
-                A1 = view(A0,:,1:2)  # rectangular view times matrix
-                @test A1*B ≈ copy(A1)*B
-                B1 = view(B,1:2,1:2);
-                AB1 = A*B1; # view times view
-                @test AB1 ≈ AB_correct
-                x = Vector{T2}([1.0;10.0]) .+ (arg2_real ? 0 : 1im*Vector{T2}([3;-1]))
-                Ax_exact = copy(A)*x
-                Ax = A*x  # view times vector
-                @test Ax ≈ Ax_exact
-                x1 = view(x,1:2)
-                Ax1 = A*x1  # view times viewed vector
-                @test Ax1 ≈ Ax_exact
-                @test copy(A)*x1 ≈ Ax_exact # matrix times viewed vector
-                # View times transposed matrix
-                Bt = transpose(B);
-                @test A*Bt ≈ A*copy(Bt)
+@testset "matrix x matrix with negative stride" begin
+    M = reshape(map(Float64, 1:77), 7, 11)
+    N = reshape(map(Float64, 1:63), 9, 7)
+    U = view(M, 7:-1:1, 11:-2:1)
+    V = view(N, 7:-1:2, 7:-1:1)
+    @test U * V ≈ Matrix(U) * Matrix(V)
+end
+
+@testset "dot product of subarrays of vectors (floats, negative stride, issue #37767)" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        a = Vector{T}(3:2:7)
+        b = Vector{T}(1:10)
+        v = view(b, 7:-2:3)
+        @test dot(a, Vector(v)) ≈ 67.0
+        @test dot(a, v) ≈ 67.0
+        @test dot(v, a) ≈ 67.0
+        @test dot(Vector(v), Vector(v)) ≈ 83.0
+        @test dot(v, v) ≈ 83.0
+    end
+end
+
+@testset "dot product of stride-vector like input" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        a = randn(T, 10)
+        b = view(a, 1:10)
+        c = reshape(b, 5, 2)
+        d = view(c, :, 1:2)
+        r = sum(abs2, a)
+        for x in (a,b,c,d), y in (a,b,c,d)
+            @test dot(x, y) ≈ r
+        end
+    end
+end
+
+@testset "Complex matrix x real MatOrVec etc (issue #29224)" for T in (Float32, Float64)
+    A0 = randn(complex(T), 10, 10)
+    B0 = randn(T, 10, 10)
+    @testset "Combination Mat{$(complex(T))} Mat{$T}" for Bax1 in (1:5, 2:2:10), Bax2 in (1:5, 2:2:10)
+        B = view(A0, Bax1, Bax2)
+        tB = transpose(B)
+        Bd, tBd = copy(B), copy(tB)
+        for Aax1 in (1:5, 2:2:10, (:)), Aax2 in (1:5, 2:2:10)
+            A = view(A0, Aax1, Aax2)
+            AB_correct = copy(A) * Bd
+            AtB_correct = copy(A) * tBd
+            @test A*Bd ≈ AB_correct # view times matrix
+            @test A*B ≈ AB_correct # view times view
+            @test A*tBd ≈ AtB_correct # view times transposed matrix
+            @test A*tB ≈ AtB_correct # view times transposed view
+        end
+    end
+    x = randn(T, 10)
+    y0 = similar(A0, 20)
+    @testset "Combination Mat{$(complex(T))} Vec{$T}" for Aax1 in (1:5, 2:2:10, (:)), Aax2 in (1:5, 2:2:10)
+        A = view(A0, Aax1, Aax2)
+        Ad = copy(A)
+        for indx in (1:5, 1:2:10, 6:-1:2)
+            vx = view(x, indx)
+            dx = x[indx]
+            Ax_correct = Ad*dx
+            @test A*vx ≈ A*dx ≈ Ad*vx ≈ Ax_correct # view/matrix times view/vector
+            for indy in (1:2:2size(A,1), size(A,1):-1:1)
+                y = view(y0, indy)
+                @test mul!(y, A, vx) ≈ mul!(y, A, dx) ≈ mul!(y, Ad, vx) ≈
+                    mul!(y, Ad, dx) ≈ Ax_correct   # test for uncontiguous dest
+            end
+        end
+    end
+end
+
+@testset "real matrix x complex vec" begin
+    _matmulres(M, v) = [mapreduce(*, +, row, v) for row in eachrow(M)]
+    testmatmul(M, v) = @test M * v ≈ _matmulres(M, v)
+
+    @testset for T in (Float32, Float64), n = (4, 5)
+        M1 = reshape(Vector{T}(1:n^2), n, n)
+        M2 = reinterpret(reshape, T, [Tuple(T(i + j) for j in 1:n) for i in 1:n])
+        v = convert(Vector{Complex{T}}, (1:n) .+ im .* (4 .+ (1:n)))
+
+        for M in (M1, M2)
+            M_view_cont = @view M[:, :]
+            v_view_cont = @view v[:]
+            for _M in (M, M_view_cont), _v in (v, v_view_cont)
+                testmatmul(_M, _v)
+            end
+
+            # construct a view with strides(M, 1) == 1 and strides(M, 2) != 1
+            ax_noncont = 1:2:n
+            n1 = length(ax_noncont)
+            M_view_noncont = @view M[1:n1, ax_noncont]
+            v_view_noncont = @view v[ax_noncont]
+            testmatmul(M_view_noncont, v_view_noncont)
+
+            @testset for op in (transpose, adjoint)
+                for _M in (M, M_view_cont), _v in (v, v_view_cont)
+                    _M2 = op(_M)
+                    testmatmul(_M2, _v)
+                end
+                _M2 = op(M_view_noncont)
+                testmatmul(_M2, v_view_noncont)
             end
         end
     end
 end
 
+@testset "matrix x vector with negative lda or 0 stride" for T in (Float32, Float64)
+    for TA in (T, complex(T)), TB in (T, complex(T))
+        A = view(randn(TA, 10, 10), 1:10, 10:-1:1) # negative lda
+        v = view([randn(TB)], 1 .+ 0(1:10)) # 0 stride
+        Ad, vd = copy(A), copy(v)
+        @test Ad * vd ≈ A * vd ≈ Ad * v ≈ A * v
+    end
+end
 
 @testset "issue #15286" begin
     A = reshape(map(Float64, 1:20), 5, 4)
     C = zeros(8, 8)
     sC = view(C, 1:2:8, 1:2:8)
-    B = reshape(map(Float64,-9:10),5,4)
-    @test mul!(sC, transpose(A), A) == A'*A
-    @test mul!(sC, transpose(A), B) == A'*B
+    B = reshape(map(Float64, -9:10), 5, 4)
+    @test mul!(sC, transpose(A), A) == A' * A
+    @test mul!(sC, transpose(A), B) == A' * B
 
     Aim = A .- im
-    C = zeros(ComplexF64,8,8)
+    C = zeros(ComplexF64, 8, 8)
     sC = view(C, 1:2:8, 1:2:8)
-    B = reshape(map(Float64,-9:10),5,4) .+ im
-    @test mul!(sC, adjoint(Aim), Aim) == Aim'*Aim
-    @test mul!(sC, adjoint(Aim), B) == Aim'*B
+    B = reshape(map(Float64, -9:10), 5, 4) .+ im
+    @test mul!(sC, adjoint(Aim), Aim) == Aim' * Aim
+    @test mul!(sC, adjoint(Aim), B) == Aim' * B
 end
 
 @testset "syrk & herk" begin
-    AA = reshape(1:1503, 501, 3).-750.0
+    AA = reshape(1:1503, 501, 3) .- 750.0
     res = Float64[135228751 9979252 -115270247; 9979252 10481254 10983256; -115270247 10983256 137236759]
     for A in (copy(AA), view(AA, 1:501, 1:3))
         @test *(transpose(A), A) == res
         @test *(adjoint(A), transpose(copy(A'))) == res
     end
     cutoff = 501
-    A = reshape(1:6*cutoff,2*cutoff,3).-(6*cutoff)/2
+    A = reshape(1:6*cutoff, 2 * cutoff, 3) .- (6 * cutoff) / 2
     Asub = view(A, 1:2:2*cutoff, 1:3)
     Aref = A[1:2:2*cutoff, 1:3]
     @test *(transpose(Asub), Asub) == *(transpose(Aref), Aref)
@@ -273,50 +353,154 @@ end
     @test *(adjoint(Asub), Asub) == *(adjoint(Aref), Aref)
 
     A5x5, A6x5 = Matrix{Float64}.(undef, ((5, 5), (6, 5)))
-    @test_throws DimensionMismatch LinearAlgebra.syrk_wrapper!(A5x5,'N',A6x5)
-    @test_throws DimensionMismatch LinearAlgebra.herk_wrapper!(A5x5,'N',A6x5)
+    @test_throws DimensionMismatch LinearAlgebra.syrk_wrapper!(A5x5, 'N', A6x5)
+    @test_throws DimensionMismatch LinearAlgebra.herk_wrapper!(A5x5, 'N', A6x5)
 end
 
 @testset "matmul for types w/o sizeof (issue #1282)" begin
-    AA = fill(complex(1,1), 10, 10)
+    AA = fill(complex(1, 1), 10, 10)
     for A in (copy(AA), view(AA, 1:10, 1:10))
         A2 = A^2
-        @test A2[1,1] == 20im
+        @test A2[1, 1] == 20im
     end
 end
 
 @testset "mul! (scaling)" begin
-    A5x5, b5, C5x6 = Array{Float64}.(undef,((5,5), 5, (5,6)))
-    for A in (A5x5, view(A5x5, :, :)), b in (b5,  view(b5, :)), C in (C5x6, view(C5x6, :, :))
+    A5x5, b5, C5x6 = Array{Float64}.(undef, ((5, 5), 5, (5, 6)))
+    for A in (A5x5, view(A5x5, :, :)), b in (b5, view(b5, :)), C in (C5x6, view(C5x6, :, :))
         @test_throws DimensionMismatch mul!(A, Diagonal(b), C)
     end
 end
 
+@testset "muladd" begin
+    A23 = reshape(1:6, 2, 3) .+ 0
+    B34 = reshape(1:12, 3, 4) .+ im
+    u2 = [10, 20]
+    v3 = [3, 5, 7] .+ im
+    w4 = [11, 13, 17, 19im]
+
+    @testset "matrix-matrix" begin
+        @test muladd(A23, B34, 0) == A23 * B34
+        @test muladd(A23, B34, 100) == A23 * B34 .+ 100
+        @test muladd(A23, B34, u2) == A23 * B34 .+ u2
+        @test muladd(A23, B34, w4') == A23 * B34 .+ w4'
+        @test_throws DimensionMismatch muladd(B34, A23, 1)
+        @test muladd(ones(1, 3), ones(3, 4), ones(1, 4)) == fill(4.0, 1, 4)
+        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(9, 4))
+
+        # broadcasting fallback method allows trailing dims
+        @test muladd(A23, B34, ones(2, 4, 1)) == A23 * B34 + ones(2, 4, 1)
+        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(9, 4, 1))
+        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3, 4), ones(1, 4, 9))
+        # and catches z::Array{T,0}
+        @test muladd(A23, B34, fill(0)) == A23 * B34
+    end
+    @testset "matrix-vector" begin
+        @test muladd(A23, v3, 0) == A23 * v3
+        @test muladd(A23, v3, 100) == A23 * v3 .+ 100
+        @test muladd(A23, v3, u2) == A23 * v3 .+ u2
+        @test muladd(A23, v3, im) isa Vector{Complex{Int}}
+        @test muladd(ones(1, 3), ones(3), ones(1)) == [4]
+        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(7))
+
+        # fallback
+        @test muladd(A23, v3, ones(2, 1, 1)) == A23 * v3 + ones(2, 1, 1)
+        @test_throws DimensionMismatch muladd(A23, v3, ones(2, 2))
+        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(7, 1))
+        @test_throws DimensionMismatch muladd(ones(1, 3), ones(3), ones(1, 7))
+        @test muladd(A23, v3, fill(0)) == A23 * v3
+    end
+    @testset "adjoint-matrix" begin
+        @test muladd(v3', B34, 0) isa Adjoint
+        @test muladd(v3', B34, 2im) == v3' * B34 .+ 2im
+        @test muladd(v3', B34, w4') == v3' * B34 .+ w4'
+
+        # via fallback
+        @test muladd(v3', B34, ones(1, 4)) == (B34' * v3 + ones(4, 1))'
+        @test_throws DimensionMismatch muladd(v3', B34, ones(7, 4))
+        @test_throws DimensionMismatch muladd(v3', B34, ones(1, 4, 7))
+        @test muladd(v3', B34, fill(0)) == v3' * B34 # does not make an Adjoint
+    end
+    @testset "vector-adjoint" begin
+        @test muladd(u2, v3', 0) isa Matrix
+        @test muladd(u2, v3', 99) == u2 * v3' .+ 99
+        @test muladd(u2, v3', A23) == u2 * v3' .+ A23
+
+        @test muladd(u2, v3', ones(2, 3, 1)) == u2 * v3' + ones(2, 3, 1)
+        @test_throws DimensionMismatch muladd(u2, v3', ones(2, 3, 4))
+        @test_throws DimensionMismatch muladd([1], v3', ones(7, 3))
+        @test muladd(u2, v3', fill(0)) == u2 * v3'
+    end
+    @testset "dot" begin # all use muladd(::Any, ::Any, ::Any)
+        @test muladd(u2', u2, 0) isa Number
+        @test muladd(v3', v3, im) == dot(v3, v3) + im
+        @test muladd(u2', u2, [1]) == [dot(u2, u2) + 1]
+        @test_throws DimensionMismatch muladd(u2', u2, [1, 1]) == [dot(u2, u2) + 1]
+        @test muladd(u2', u2, fill(0)) == dot(u2, u2)
+    end
+    @testset "arrays of arrays" begin
+        vofm = [rand(1:9, 2, 2) for _ in 1:3]
+        Mofm = [rand(1:9, 2, 2) for _ in 1:3, _ in 1:3]
+
+        @test muladd(vofm', vofm, vofm[1]) == vofm' * vofm .+ vofm[1] # inner
+        @test muladd(vofm, vofm', Mofm) == vofm * vofm' .+ Mofm       # outer
+        @test muladd(vofm', Mofm, vofm') == vofm' * Mofm .+ vofm'     # bra-mat
+        @test muladd(Mofm, Mofm, vofm) == Mofm * Mofm .+ vofm         # mat-mat
+        @test muladd(Mofm, vofm, vofm) == Mofm * vofm .+ vofm         # mat-vec
+    end
+end
+
+@testset "muladd & structured matrices" begin
+    A33 = reshape(1:9, 3, 3) .+ im
+    v3 = [3, 5, 7im]
+
+    # no special treatment
+    @test muladd(Symmetric(A33), Symmetric(A33), 1) == Symmetric(A33) * Symmetric(A33) .+ 1
+    @test muladd(Hermitian(A33), Hermitian(A33), v3) == Hermitian(A33) * Hermitian(A33) .+ v3
+    @test muladd(adjoint(A33), transpose(A33), A33) == A33' * transpose(A33) .+ A33
+
+    u1 = muladd(UpperTriangular(A33), UpperTriangular(A33), Diagonal(v3))
+    @test u1 isa UpperTriangular
+    @test u1 == UpperTriangular(A33) * UpperTriangular(A33) + Diagonal(v3)
+
+    # diagonal
+    @test muladd(Diagonal(v3), Diagonal(A33), Diagonal(v3)).diag == ([1, 5, 9] .+ im .+ 1) .* v3
+
+    # uniformscaling
+    @test muladd(Diagonal(v3), I, I).diag == v3 .+ 1
+    @test muladd(2 * I, 3 * I, I).λ == 7
+    @test muladd(A33, A33', I) == A33 * A33' + I
+
+    # https://github.com/JuliaLang/julia/issues/38426
+    @test @evalpoly(A33, 1.0 * I, 1.0 * I) == I + A33
+    @test @evalpoly(A33, 1.0 * I, 1.0 * I, 1.0 * I) == I + A33 + A33^2
+end
+
 # issue #6450
-@test dot(Any[1.0,2.0], Any[3.5,4.5]) === 12.5
+@test dot(Any[1.0, 2.0], Any[3.5, 4.5]) === 12.5
 
 @testset "dot" for elty in (Float32, Float64, ComplexF32, ComplexF64)
-    x = convert(Vector{elty},[1.0, 2.0, 3.0])
-    y = convert(Vector{elty},[3.5, 4.5, 5.5])
+    x = convert(Vector{elty}, [1.0, 2.0, 3.0])
+    y = convert(Vector{elty}, [3.5, 4.5, 5.5])
     @test_throws DimensionMismatch dot(x, 1:2, y, 1:3)
     @test_throws BoundsError dot(x, 1:4, y, 1:4)
     @test_throws BoundsError dot(x, 1:3, y, 2:4)
     @test dot(x, 1:2, y, 1:2) == convert(elty, 12.5)
-    @test transpose(x)*y == convert(elty, 29.0)
-    X = convert(Matrix{elty},[1.0 2.0; 3.0 4.0])
-    Y = convert(Matrix{elty},[1.5 2.5; 3.5 4.5])
+    @test transpose(x) * y == convert(elty, 29.0)
+    X = convert(Matrix{elty}, [1.0 2.0; 3.0 4.0])
+    Y = convert(Matrix{elty}, [1.5 2.5; 3.5 4.5])
     @test dot(X, Y) == convert(elty, 35.0)
-    Z = convert(Vector{Matrix{elty}},[reshape(1:4, 2, 2), fill(1, 2, 2)])
+    Z = Matrix{elty}[reshape(1:4, 2, 2), fill(1, 2, 2)]
     @test dot(Z, Z) == convert(elty, 34.0)
 end
 
-dot1(x,y) = invoke(dot, Tuple{Any,Any}, x,y)
-dot2(x,y) = invoke(dot, Tuple{AbstractArray,AbstractArray}, x,y)
+dot1(x, y) = invoke(dot, Tuple{Any,Any}, x, y)
+dot2(x, y) = invoke(dot, Tuple{AbstractArray,AbstractArray}, x, y)
 @testset "generic dot" begin
     AA = [1+2im 3+4im; 5+6im 7+8im]
     BB = [2+7im 4+1im; 3+8im 6+5im]
     for A in (copy(AA), view(AA, 1:2, 1:2)), B in (copy(BB), view(BB, 1:2, 1:2))
-        @test dot(A,B) == dot(vec(A),vec(B)) == dot1(A,B) == dot2(A,B) == dot(float.(A),float.(B))
+        @test dot(A, B) == dot(vec(A), vec(B)) == dot1(A, B) == dot2(A, B) == dot(float.(A), float.(B))
         @test dot(Int[], Int[]) == 0 == dot1(Int[], Int[]) == dot2(Int[], Int[])
         @test_throws MethodError dot(Any[], Any[])
         @test_throws MethodError dot1(Any[], Any[])
@@ -333,17 +517,17 @@ end
 
 @testset "Issue 11978" begin
     A = Matrix{Matrix{Float64}}(undef, 2, 2)
-    A[1,1] = Matrix(1.0I, 3, 3)
-    A[2,2] = Matrix(1.0I, 2, 2)
-    A[1,2] = Matrix(1.0I, 3, 2)
-    A[2,1] = Matrix(1.0I, 2, 3)
+    A[1, 1] = Matrix(1.0I, 3, 3)
+    A[2, 2] = Matrix(1.0I, 2, 2)
+    A[1, 2] = Matrix(1.0I, 3, 2)
+    A[2, 1] = Matrix(1.0I, 2, 3)
     b = Vector{Vector{Float64}}(undef, 2)
-    b[1] = fill(1., 3)
-    b[2] = fill(1., 2)
-    @test A*b == Vector{Float64}[[2,2,1], [2,2]]
+    b[1] = fill(1.0, 3)
+    b[2] = fill(1.0, 2)
+    @test A * b == Vector{Float64}[[2, 2, 1], [2, 2]]
 end
 
-@test_throws ArgumentError LinearAlgebra.copytri!(Matrix{Float64}(undef,10,10),'Z')
+@test_throws ArgumentError LinearAlgebra.copytri!(Matrix{Float64}(undef, 10, 10), 'Z')
 
 @testset "Issue 30055" begin
     B = [1+im 2+im 3+im; 4+im 5+im 6+im; 7+im 9+im im]
@@ -354,10 +538,10 @@ end
     @test copy(transpose(A)) == transpose(A)
     @test copy(A') == A'
     B = Matrix{Matrix{Complex{Int}}}(undef, 2, 2)
-    B[1,1] = [1+im 2+im; 3+im 4+im]
-    B[2,1] = [1+2im 1+3im;1+3im 1+4im]
-    B[1,2] = [7+im 8+2im; 9+3im 4im]
-    B[2,2] = [9+im 8+im; 7+im 6+im]
+    B[1, 1] = [1+im 2+im; 3+im 4+im]
+    B[2, 1] = [1+2im 1+3im; 1+3im 1+4im]
+    B[1, 2] = [7+im 8+2im; 9+3im 4im]
+    B[2, 2] = [9+im 8+im; 7+im 6+im]
     A = UpperTriangular(B)
     @test copy(transpose(A)) == transpose(A)
     @test copy(A') == A'
@@ -366,27 +550,27 @@ end
     @test copy(A') == A'
 end
 
-@testset "gemv! and gemm_wrapper for $elty" for elty in [Float32,Float64,ComplexF64,ComplexF32]
-    A10x10, x10, x11 = Array{elty}.(undef, ((10,10), 10, 11))
-    @test_throws DimensionMismatch LinearAlgebra.gemv!(x10,'N',A10x10,x11)
-    @test_throws DimensionMismatch LinearAlgebra.gemv!(x11,'N',A10x10,x10)
-    @test LinearAlgebra.gemv!(elty[], 'N', Matrix{elty}(undef,0,0), elty[]) == elty[]
-    @test LinearAlgebra.gemv!(x10, 'N', Matrix{elty}(undef,10,0), elty[]) == zeros(elty,10)
+@testset "gemv! and gemm_wrapper for $elty" for elty in [Float32, Float64, ComplexF64, ComplexF32]
+    A10x10, x10, x11 = Array{elty}.(undef, ((10, 10), 10, 11))
+    @test_throws DimensionMismatch LinearAlgebra.gemv!(x10, 'N', A10x10, x11)
+    @test_throws DimensionMismatch LinearAlgebra.gemv!(x11, 'N', A10x10, x10)
+    @test LinearAlgebra.gemv!(elty[], 'N', Matrix{elty}(undef, 0, 0), elty[]) == elty[]
+    @test LinearAlgebra.gemv!(x10, 'N', Matrix{elty}(undef, 10, 0), elty[]) == zeros(elty, 10)
 
     I0x0 = Matrix{elty}(I, 0, 0)
     I10x10 = Matrix{elty}(I, 10, 10)
     I10x11 = Matrix{elty}(I, 10, 11)
-    @test LinearAlgebra.gemm_wrapper('N','N', I10x10, I10x10) == I10x10
-    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10,'N','N', I10x11, I10x10)
-    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10,'N','N', I0x0, I0x0)
+    @test LinearAlgebra.gemm_wrapper('N', 'N', I10x10, I10x10) == I10x10
+    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10, 'N', 'N', I10x11, I10x10)
+    @test_throws DimensionMismatch LinearAlgebra.gemm_wrapper!(I10x10, 'N', 'N', I0x0, I0x0)
 
-    A = rand(elty,3,3)
-    @test LinearAlgebra.matmul3x3('T','N',A, Matrix{elty}(I, 3, 3)) == transpose(A)
+    A = rand(elty, 3, 3)
+    @test LinearAlgebra.matmul3x3('T', 'N', A, Matrix{elty}(I, 3, 3)) == transpose(A)
 end
 
 @testset "#13593, #13488" begin
-    aa = rand(3,3)
-    bb = rand(3,3)
+    aa = rand(3, 3)
+    bb = rand(3, 3)
     for a in (copy(aa), view(aa, 1:3, 1:3)), b in (copy(bb), view(bb, 1:3, 1:3))
         @test_throws ArgumentError mul!(a, a, b)
         @test_throws ArgumentError mul!(a, b, a)
@@ -407,7 +591,7 @@ struct RootInt
 end
 import Base: *, adjoint, transpose
 import LinearAlgebra: Adjoint, Transpose
-(*)(x::RootInt, y::RootInt) = x.i*y.i
+(*)(x::RootInt, y::RootInt) = x.i * y.i
 adjoint(x::RootInt) = x
 transpose(x::RootInt) = x
 Adjoint(x::RootInt) = x
@@ -425,16 +609,16 @@ Transpose(x::RootInt) = x
     C = [1]
     mul!(C, a, transpose(a), 2, 3)
     @test C[1] == 21
-    a = [RootInt(2),RootInt(10)]
-    @test a*adjoint(a) == [4 20; 20 100]
+    a = [RootInt(2), RootInt(10)]
+    @test a * adjoint(a) == [4 20; 20 100]
     A = [RootInt(3) RootInt(5)]
-    @test A*a == [56]
+    @test A * a == [56]
 end
 
 function test_mul(C, A, B)
     mul!(C, A, B)
     @test Array(A) * Array(B) ≈ C
-    @test A*B ≈ C
+    @test A * B ≈ C
 
     # This is similar to how `isapprox` choose `rtol` (when `atol=0`)
     # but consider all number types involved:
@@ -447,20 +631,20 @@ function test_mul(C, A, B)
     βArrayC = β * Array(C)
     βC = β * C
     mul!(C, A, B, α, β)
-    @test α * Array(A) * Array(B) .+ βArrayC ≈ C  rtol=rtol
-    @test α * A * B .+ βC ≈ C  rtol=rtol
+    @test α * Array(A) * Array(B) .+ βArrayC ≈ C rtol = rtol
+    @test α * A * B .+ βC ≈ C rtol = rtol
 end
 
 @testset "mul! vs * for special types" begin
     eltypes = [Float32, Float64, Int64]
     for k in [3, 4, 10]
         T = rand(eltypes)
-        bi1 = Bidiagonal(rand(T, k), rand(T, k-1), rand([:U, :L]))
-        bi2 = Bidiagonal(rand(T, k), rand(T, k-1), rand([:U, :L]))
-        tri1 = Tridiagonal(rand(T,k-1), rand(T, k), rand(T, k-1))
-        tri2 = Tridiagonal(rand(T,k-1), rand(T, k), rand(T, k-1))
-        stri1 = SymTridiagonal(rand(T, k), rand(T, k-1))
-        stri2 = SymTridiagonal(rand(T, k), rand(T, k-1))
+        bi1 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
+        bi2 = Bidiagonal(rand(T, k), rand(T, k - 1), rand([:U, :L]))
+        tri1 = Tridiagonal(rand(T, k - 1), rand(T, k), rand(T, k - 1))
+        tri2 = Tridiagonal(rand(T, k - 1), rand(T, k), rand(T, k - 1))
+        stri1 = SymTridiagonal(rand(T, k), rand(T, k - 1))
+        stri2 = SymTridiagonal(rand(T, k), rand(T, k - 1))
         C = rand(T, k, k)
         specialmatrices = (bi1, bi2, tri1, tri2, stri1, stri2)
         for A in specialmatrices
@@ -480,7 +664,7 @@ end
     for T in eltypes
         A = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
         B = Bidiagonal(rand(T, 2), rand(T, 1), rand([:U, :L]))
-        C = randn(2,2)
+        C = randn(2, 2)
         test_mul(C, A, B)
         B = randn(2, 9)
         C = randn(2, 9)
@@ -503,69 +687,69 @@ end
 
 # #18218
 module TestPR18218
-    using Test
-    import Base.*, Base.+, Base.zero
-    struct TypeA
-        x::Int
-    end
-    Base.convert(::Type{TypeA}, x::Int) = TypeA(x)
-    struct TypeB
-        x::Int
-    end
-    struct TypeC
-        x::Int
-    end
-    Base.convert(::Type{TypeC}, x::Int) = TypeC(x)
-    zero(c::TypeC) = TypeC(0)
-    zero(::Type{TypeC}) = TypeC(0)
-    (*)(x::Int, a::TypeA) = TypeB(x*a.x)
-    (*)(a::TypeA, x::Int) = TypeB(a.x*x)
-    (+)(a::Union{TypeB,TypeC}, b::Union{TypeB,TypeC}) = TypeC(a.x+b.x)
-    A = TypeA[1 2; 3 4]
-    b = [1, 2]
-    d = A * b
-    @test typeof(d) == Vector{TypeC}
-    @test d == TypeC[5, 11]
+using Test
+import Base.*, Base.+, Base.zero
+struct TypeA
+    x::Int
+end
+Base.convert(::Type{TypeA}, x::Int) = TypeA(x)
+struct TypeB
+    x::Int
+end
+struct TypeC
+    x::Int
+end
+Base.convert(::Type{TypeC}, x::Int) = TypeC(x)
+zero(c::TypeC) = TypeC(0)
+zero(::Type{TypeC}) = TypeC(0)
+(*)(x::Int, a::TypeA) = TypeB(x * a.x)
+(*)(a::TypeA, x::Int) = TypeB(a.x * x)
+(+)(a::Union{TypeB,TypeC}, b::Union{TypeB,TypeC}) = TypeC(a.x + b.x)
+A = TypeA[1 2; 3 4]
+b = [1, 2]
+d = A * b
+@test typeof(d) == Vector{TypeC}
+@test d == TypeC[5, 11]
 end
 
 @testset "VecOrMat of Vectors" begin
-    X   = rand(ComplexF64, 3, 3)
-    Xv1 = [X[:,j] for i in 1:1, j in 1:3]
-    Xv2 = [transpose(X[i,:]) for i in 1:3]
-    Xv3 = [transpose(X[i,:]) for i in 1:3, j in 1:1]
-
-    XX   = X*X
-    XtX  = transpose(X)*X
-    XcX  = X'*X
-    XXt  = X*transpose(X)
+    X = rand(ComplexF64, 3, 3)
+    Xv1 = [X[:, j] for i in 1:1, j in 1:3]
+    Xv2 = [transpose(X[i, :]) for i in 1:3]
+    Xv3 = [transpose(X[i, :]) for i in 1:3, j in 1:1]
+
+    XX = X * X
+    XtX = transpose(X) * X
+    XcX = X' * X
+    XXt = X * transpose(X)
     XtXt = transpose(XX)
-    XcXt = X'*transpose(X)
-    XXc  = X*X'
-    XtXc = transpose(X)*X'
-    XcXc = X'*X'
+    XcXt = X' * transpose(X)
+    XXc = X * X'
+    XtXc = transpose(X) * X'
+    XcXc = X' * X'
 
     @test (Xv1*Xv2)[1] ≈ XX
     @test (Xv1*Xv3)[1] ≈ XX
-    @test  transpose(Xv1)*Xv1     ≈ XtX
-    @test  transpose(Xv2)*Xv2     ≈ XtX
+    @test transpose(Xv1) * Xv1 ≈ XtX
+    @test transpose(Xv2) * Xv2 ≈ XtX
     @test (transpose(Xv3)*Xv3)[1] ≈ XtX
-    @test  Xv1'*Xv1     ≈ XcX
-    @test  Xv2'*Xv2     ≈ XcX
+    @test Xv1' * Xv1 ≈ XcX
+    @test Xv2' * Xv2 ≈ XcX
     @test (Xv3'*Xv3)[1] ≈ XcX
     @test (Xv1*transpose(Xv1))[1] ≈ XXt
-    @test  Xv2*transpose(Xv2)     ≈ XXt
-    @test  Xv3*transpose(Xv3)     ≈ XXt
-    @test transpose(Xv1)*transpose(Xv2) ≈ XtXt
-    @test transpose(Xv1)*transpose(Xv3) ≈ XtXt
-    @test  Xv1'*transpose(Xv2) ≈ XcXt
-    @test  Xv1'*transpose(Xv3) ≈ XcXt
+    @test Xv2 * transpose(Xv2) ≈ XXt
+    @test Xv3 * transpose(Xv3) ≈ XXt
+    @test transpose(Xv1) * transpose(Xv2) ≈ XtXt
+    @test transpose(Xv1) * transpose(Xv3) ≈ XtXt
+    @test Xv1' * transpose(Xv2) ≈ XcXt
+    @test Xv1' * transpose(Xv3) ≈ XcXt
     @test (Xv1*Xv1')[1] ≈ XXc
-    @test  Xv2*Xv2'     ≈ XXc
-    @test  Xv3*Xv3'     ≈ XXc
-    @test transpose(Xv1)*Xv2' ≈ XtXc
-    @test transpose(Xv1)*Xv3' ≈ XtXc
-    @test Xv1'*Xv2' ≈ XcXc
-    @test Xv1'*Xv3' ≈ XcXc
+    @test Xv2 * Xv2' ≈ XXc
+    @test Xv3 * Xv3' ≈ XXc
+    @test transpose(Xv1) * Xv2' ≈ XtXc
+    @test transpose(Xv1) * Xv3' ≈ XtXc
+    @test Xv1' * Xv2' ≈ XcXc
+    @test Xv1' * Xv3' ≈ XcXc
 end
 
 @testset "method ambiguity" begin
@@ -573,7 +757,7 @@ end
     # https://github.com/JuliaLang/julia/issues/28804
     script = joinpath(@__DIR__, "ambiguous_exec.jl")
     cmd = `$(Base.julia_cmd()) --startup-file=no $script`
-    @test success(pipeline(cmd; stdout=stdout, stderr=stderr))
+    @test success(pipeline(cmd; stdout = stdout, stderr = stderr))
 end
 
 struct A32092
@@ -616,6 +800,16 @@ end
     @test D ≈ C
 end
 
+@testset "size zero types in matrix mult (see issue 39362)" begin
+    A = [missing missing; missing missing]
+    v = [missing, missing]
+    @test (A * v == v) === missing
+    M = fill(1.0, 2, 2)
+    a = fill(missing, 2, 1)
+    @test (a' * M * a == fill(missing, 1, 1)) === missing
+end
+
+
 @testset "multiplication of empty matrices without calling zero" begin
     r, c = rand(0:9, 2)
     A = collect(Number, rand(r, c))
@@ -631,4 +825,117 @@ end
     @test Matrix{Int}(undef, 2, 0) * Matrix{Int}(undef, 0, 3) == zeros(Int, 2, 3)
 end
 
+struct BrokenInt <: Number
+   i::Int
+end
+Base.:*(::BrokenInt, ::BrokenInt) = BrokenInt(42)
+Base.:+(::BrokenInt, ::BrokenInt) = BrokenInt(42)
+Base.zero(::BrokenInt) = BrokenInt(0)
+Base.conj(b::BrokenInt) = b.i == 42 ? b : error()
+@testset "matmul uninit memory #40481" begin
+    @test fill(BrokenInt(42), 10,100)' * fill(BrokenInt(42), 100,10)' == fill(BrokenInt(42), 100, 100)
+end
+
+@testset "3-arg *, order by type" begin
+    x = [1, 2im]
+    y = [im, 20, 30 + 40im]
+    z = [-1, 200 + im, -3]
+    A = [1 2 3im; 4 5 6+im]
+    B = [-10 -20; -30 -40]
+    a = 3 + im * round(Int, 10^6 * (pi - 3))
+    b = 123
+
+    @test x' * A * y == (x' * A) * y == x' * (A * y)
+    @test y' * A' * x == (y' * A') * x == y' * (A' * x)
+    @test y' * transpose(A) * x == (y' * transpose(A)) * x == y' * (transpose(A) * x)
+
+    @test B * A * y == (B * A) * y == B * (A * y)
+
+    @test a * A * y == (a * A) * y == a * (A * y)
+    @test A * y * a == (A * y) * a == A * (y * a)
+
+    @test a * B * A == (a * B) * A == a * (B * A)
+    @test B * A * a == (B * A) * a == B * (A * a)
+
+    @test a * y' * z == (a * y') * z == a * (y' * z)
+    @test y' * z * a == (y' * z) * a == y' * (z * a)
+
+    @test a * y * z' == (a * y) * z' == a * (y * z')
+    @test y * z' * a == (y * z') * a == y * (z' * a)
+
+    @test a * x' * A == (a * x') * A == a * (x' * A)
+    @test x' * A * a == (x' * A) * a == x' * (A * a)
+    @test a * x' * A isa Adjoint{<:Any,<:Vector}
+
+    @test a * transpose(x) * A == (a * transpose(x)) * A == a * (transpose(x) * A)
+    @test transpose(x) * A * a == (transpose(x) * A) * a == transpose(x) * (A * a)
+    @test a * transpose(x) * A isa Transpose{<:Any,<:Vector}
+
+    @test x' * B * A == (x' * B) * A == x' * (B * A)
+    @test x' * B * A isa Adjoint{<:Any,<:Vector}
+
+    @test y * x' * A == (y * x') * A == y * (x' * A)
+    y31 = reshape(y, 3, 1)
+    @test y31 * x' * A == (y31 * x') * A == y31 * (x' * A)
+
+    vm = [rand(1:9, 2, 2) for _ in 1:3]
+    Mm = [rand(1:9, 2, 2) for _ in 1:3, _ in 1:3]
+
+    @test vm' * Mm * vm == (vm' * Mm) * vm == vm' * (Mm * vm)
+    @test Mm * Mm' * vm == (Mm * Mm') * vm == Mm * (Mm' * vm)
+    @test vm' * Mm * Mm == (vm' * Mm) * Mm == vm' * (Mm * Mm)
+    @test Mm * Mm' * Mm == (Mm * Mm') * Mm == Mm * (Mm' * Mm)
+end
+
+@testset "3-arg *, order by size" begin
+    M44 = randn(4, 4)
+    M24 = randn(2, 4)
+    M42 = randn(4, 2)
+    @test M44 * M44 * M44 ≈ (M44 * M44) * M44 ≈ M44 * (M44 * M44)
+    @test M42 * M24 * M44 ≈ (M42 * M24) * M44 ≈ M42 * (M24 * M44)
+    @test M44 * M42 * M24 ≈ (M44 * M42) * M24 ≈ M44 * (M42 * M24)
+end
+
+@testset "4-arg *, by type" begin
+    y = [im, 20, 30 + 40im]
+    z = [-1, 200 + im, -3]
+    a = 3 + im * round(Int, 10^6 * (pi - 3))
+    b = 123
+    M = rand(vcat(1:9, im .* [1, 2, 3]), 3, 3)
+    N = rand(vcat(1:9, im .* [1, 2, 3]), 3, 3)
+
+    @test a * b * M * y == (a * b) * (M * y)
+    @test a * b * M * N == (a * b) * (M * N)
+    @test a * M * N * y == (a * M) * (N * y)
+    @test a * y' * M * z == (a * y') * (M * z)
+    @test a * y' * M * N == (a * y') * (M * N)
+
+    @test M * y * a * b == (M * y) * (a * b)
+    @test M * N * a * b == (M * N) * (a * b)
+    @test M * N * y * a == (a * M) * (N * y)
+    @test y' * M * z * a == (a * y') * (M * z)
+    @test y' * M * N * a == (a * y') * (M * N)
+
+    @test M * N * conj(M) * y == (M * N) * (conj(M) * y)
+    @test y' * M * N * conj(M) == (y' * M) * (N * conj(M))
+    @test y' * M * N * z == (y' * M) * (N * z)
+end
+
+@testset "4-arg *, by size" begin
+    for shift in 1:5
+        s1, s2, s3, s4, s5 = circshift(3:7, shift)
+        a = randn(s1, s2)
+        b = randn(s2, s3)
+        c = randn(s3, s4)
+        d = randn(s4, s5)
+
+        # _quad_matmul
+        @test *(a, b, c, d) ≈ (a * b) * (c * d)
+
+        # _tri_matmul(A,B,B,δ)
+        @test *(11.1, b, c, d) ≈ (11.1 * b) * (c * d)
+        @test *(a, b, c, 99.9) ≈ (a * b) * (c * 99.9)
+    end
+end
+
 end # module TestMatmul
diff --git a/stdlib/LinearAlgebra/test/pinv.jl b/stdlib/LinearAlgebra/test/pinv.jl
index 9be74730127aa2..c7268865a05053 100644
--- a/stdlib/LinearAlgebra/test/pinv.jl
+++ b/stdlib/LinearAlgebra/test/pinv.jl
@@ -63,39 +63,23 @@ function tridiag(T::Type, m::Integer, n::Integer)
 end
 tridiag(m::Integer, n::Integer) = tridiag(Float64, m::Integer, n::Integer)
 
-function randn_float64(m::Integer, n::Integer)
-    a=randn(m,n)
-    b = Matrix{Float64}(undef, m, n)
-    for i=1:n
-        for j=1:m
-            b[j,i]=convert(Float64,a[j,i])
-        end
-    end
-    return b
-end
-
-function randn_float32(m::Integer, n::Integer)
-    a=randn(m,n)
-    b = Matrix{Float32}(undef, m, n)
-    for i=1:n
-        for j=1:m
-            b[j,i]=convert(Float32,a[j,i])
-        end
-    end
-    return b
-end
-
+function test_pinv(a,tol1,tol2)
+    m,n = size(a)
 
-function test_pinv(a,m,n,tol1,tol2,tol3)
     apinv = @inferred pinv(a)
-
+    @test size(apinv) == (n,m)
     @test norm(a*apinv*a-a)/norm(a) ≈ 0 atol=tol1
-    x0 = randn(n); b = a*x0; x = apinv*b
+    @test norm(apinv*a*apinv-apinv)/norm(apinv) ≈ 0 atol=tol1
+    b = a*randn(n)
+    x = apinv*b
     @test norm(a*x-b)/norm(b) ≈ 0 atol=tol1
-    apinv = pinv(a,sqrt(eps(real(one(eltype(a))))))
 
+    apinv = @inferred pinv(a,sqrt(eps(real(one(eltype(a))))))
+    @test size(apinv) == (n,m)
     @test norm(a*apinv*a-a)/norm(a) ≈ 0 atol=tol2
-    x0 = randn(n); b = a*x0; x = apinv*b
+    @test norm(apinv*a*apinv-apinv)/norm(apinv) ≈ 0 atol=tol2
+    b = a*randn(n)
+    x = apinv*b
     @test norm(a*x-b)/norm(b) ≈ 0 atol=tol2
 end
 
@@ -104,28 +88,25 @@ end
         default_tol = (real(one(eltya))) * max(m,n) * 10
         tol1 = 1e-2
         tol2 = 1e-5
-        tol3 = 1e-5
         if real(eltya) == Float32
             tol1 = 1e0
             tol2 = 1e-2
-            tol3 = 1e-2
         end
         @testset "dense/ill-conditioned matrix" begin
-        ###    a = randn_float64(m,n) * hilb(eltya,n)
             a = hilb(eltya, m, n)
-            test_pinv(a, m, n, tol1, tol2, tol3)
+            test_pinv(a, tol1, tol2)
         end
         @testset "dense/diagonal matrix" begin
             a = onediag(eltya, m, n)
-            test_pinv(a, m, n, default_tol, default_tol, default_tol)
+            test_pinv(a, default_tol, default_tol)
         end
         @testset "dense/tri-diagonal matrix" begin
             a = tridiag(eltya, m, n)
-            test_pinv(a, m, n, default_tol, tol2, default_tol)
+            test_pinv(a, default_tol, tol2)
         end
         @testset "Diagonal matrix" begin
             a = onediag_sparse(eltya, m)
-            test_pinv(a, m, m, default_tol, default_tol, default_tol)
+            test_pinv(a, default_tol, default_tol)
         end
         @testset "Vector" begin
             a = rand(eltya, m)
@@ -158,6 +139,24 @@ end
         @test a.diag[2] ≈ 0.0
     end
 
+    @testset "hermitian matrices" begin
+        Q = ones(2,2)
+        C = pinv(Hermitian(Q))/0.25
+        @test C ≈ ones(2,2)
+    end
+
+    @testset "non-square diagonal matrices" begin
+        A = eltya[1 0 ; 0 1 ; 0 0]
+        B = pinv(A)
+        @test A*B*A ≈ A
+        @test B*A*B ≈ B
+
+        A = eltya[1 0 0 ; 0 1 0]
+        B = pinv(A)
+        @test A*B*A ≈ A
+        @test B*A*B ≈ B
+    end
+
     if eltya <: LinearAlgebra.BlasReal
         @testset "sub-normal numbers/vectors/matrices" begin
             a = pinv(floatmin(eltya)/100)
diff --git a/stdlib/LinearAlgebra/test/qr.jl b/stdlib/LinearAlgebra/test/qr.jl
index f1d24a06a45bf9..b897803074ff94 100644
--- a/stdlib/LinearAlgebra/test/qr.jl
+++ b/stdlib/LinearAlgebra/test/qr.jl
@@ -11,7 +11,7 @@ n = 10
 n1 = div(n, 2)
 n2 = 2*n1
 
-Random.seed!(1234321)
+Random.seed!(1234325)
 
 areal = randn(n,n)/2
 aimg  = randn(n,n)/2
@@ -49,7 +49,6 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
             a_1 = size(a, 1)
             @testset "QR decomposition (without pivoting)" begin
                 qra   = @inferred qr(a)
-                @inferred qr(a)
                 q, r  = qra.Q, qra.R
                 @test_throws ErrorException qra.Z
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
@@ -78,8 +77,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test Base.propertynames(qra)       == (:R, :Q)
             end
             @testset "Thin QR decomposition (without pivoting)" begin
-                qra   = @inferred qr(a[:, 1:n1], Val(false))
-                @inferred qr(a[:, 1:n1], Val(false))
+                qra   = @inferred qr(a[:, 1:n1], NoPivot())
                 q,r   = qra.Q, qra.R
                 @test_throws ErrorException qra.Z
                 @test q'*squareQ(q) ≈ Matrix(I, a_1, a_1)
@@ -104,7 +102,7 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test Base.propertynames(qra)       == (:R, :Q)
             end
             @testset "(Automatic) Fat (pivoted) QR decomposition" begin
-                @inferred qr(a, Val(true))
+                @inferred qr(a, ColumnNorm())
 
                 qrpa  = factorize(a[1:n1,:])
                 q,r = qrpa.Q, qrpa.R
@@ -180,13 +178,17 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
 
                 b = similar(a); rand!(b)
                 c = similar(a)
+                d = similar(a[:,1:n1])
                 @test mul!(c, q, b) ≈ q*b
+                @test mul!(d, q, r) ≈ q*r ≈ a[:,qrpa.p]
                 @test mul!(c, q', b) ≈ q'*b
+                @test mul!(d, q', a[:,qrpa.p])[1:n1,:] ≈ r
+                @test all(x -> abs(x) < ε*norm(a), d[n1+1:end,:])
                 @test mul!(c, b, q) ≈ b*q
                 @test mul!(c, b, q') ≈ b*q'
                 @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
 
-                qra = qr(a[:,1:n1], Val(false))
+                qra = qr(a[:,1:n1], NoPivot())
                 q, r = qra.Q, qra.R
                 @test rmul!(copy(squareQ(q)'), q) ≈ Matrix(I, n, n)
                 @test_throws DimensionMismatch rmul!(Matrix{eltya}(I, n+1, n+1),q)
@@ -196,7 +198,10 @@ rectangularQ(Q::LinearAlgebra.AbstractQ) = convert(Array, Q)
                 @test_throws DimensionMismatch q * Matrix{Int8}(I, n+4, n+4)
 
                 @test mul!(c, q, b) ≈ q*b
+                @test mul!(d, q, r) ≈ a[:,1:n1]
                 @test mul!(c, q', b) ≈ q'*b
+                @test mul!(d, q', a[:,1:n1])[1:n1,:] ≈ r
+                @test all(x -> abs(x) < ε*norm(a), d[n1+1:end,:])
                 @test mul!(c, b, q) ≈ b*q
                 @test mul!(c, b, q') ≈ b*q'
                 @test_throws DimensionMismatch mul!(Matrix{eltya}(I, n+1, n), q, b)
@@ -207,11 +212,8 @@ end
 
 @testset "transpose errors" begin
     @test_throws MethodError transpose(qr(randn(3,3)))
-    @test_throws MethodError adjoint(qr(randn(3,3)))
-    @test_throws MethodError transpose(qr(randn(3,3), Val(false)))
-    @test_throws MethodError adjoint(qr(randn(3,3), Val(false)))
+    @test_throws MethodError transpose(qr(randn(3,3), NoPivot()))
     @test_throws MethodError transpose(qr(big.(randn(3,3))))
-    @test_throws MethodError adjoint(qr(big.(randn(3,3))))
 end
 
 @testset "Issue 7304" begin
@@ -249,7 +251,7 @@ end
     A = zeros(1, 2)
     B = zeros(1, 1)
     @test A \ B == zeros(2, 1)
-    @test qr(A, Val(true)) \ B == zeros(2, 1)
+    @test qr(A, ColumnNorm()) \ B == zeros(2, 1)
 end
 
 @testset "Issue 24107" begin
@@ -268,20 +270,39 @@ end
     b  = randn(3)
     b0 = copy(b)
     c  = randn(2)
+    B  = randn(3,3)
+    B0 = copy(B)
+    C  = randn(2,3)
     @test A \b ≈ ldiv!(c, qr(A ), b)
     @test b == b0
+    @test A \B ≈ ldiv!(C, qr(A ), B)
+    @test B == B0
     c0 = copy(c)
-    @test Ac\c ≈ ldiv!(b, qr(Ac, Val(true)), c)
+    C0 = copy(C)
+    @test Ac\c ≈ ldiv!(b, qr(Ac, ColumnNorm()), c)
     @test c0 == c
+    @test Ac\C ≈ ldiv!(B, qr(Ac, ColumnNorm()), C)
+    @test C0 == C
+end
+
+@testset "Issue reflector of zero-length vector" begin
+    a = [2.0]
+    x = view(a,1:0)
+    τ = LinearAlgebra.reflector!(view(x,1:0))
+    @test τ == 0.0
+
+    b = reshape([3.0],1,1)
+    @test isempty(LinearAlgebra.reflectorApply!(x, τ, view(b,1:0,:)))
+    @test b[1] == 3.0
 end
 
 @testset "det(Q::Union{QRCompactWYQ, QRPackedQ})" begin
     # 40 is the number larger than the default block size 36 of QRCompactWY
-    @testset for n in [1:3; 40], m in [1:3; 40], pivot in [false, true]
+    @testset for n in [1:3; 40], m in [1:3; 40], pivot in (NoPivot(), ColumnNorm())
         @testset "real" begin
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(n - k, m - k); dims=(1, 2))
-                Q, = qr(A, Val(pivot))
+                Q, = qr(A, pivot)
                 @test det(Q) ≈ det(collect(Q))
                 @test abs(det(Q)) ≈ 1
             end
@@ -289,7 +310,7 @@ end
         @testset "complex" begin
             @testset for k in 0:min(n, m, 5)
                 A = cat(Array(I(k)), randn(ComplexF64, n - k, m - k); dims=(1, 2))
-                Q, = qr(A, Val(pivot))
+                Q, = qr(A, pivot)
                 @test det(Q) ≈ det(collect(Q))
                 @test abs(det(Q)) ≈ 1
             end
@@ -304,4 +325,145 @@ end
     end
 end
 
+@testset "QR factorization of Q" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        Q1, R1 = qr(randn(T,5,5))
+        Q2, R2 = qr(Q1)
+        @test Q1 ≈ Q2
+        @test R2 ≈ I
+    end
+end
+
+@testset "Generation of orthogonal matrices" begin
+    for T in (Float32, Float64)
+        n = 5
+        Q, R = qr(randn(T,n,n))
+        O = Q * Diagonal(sign.(diag(R)))
+        @test O' * O ≈ I
+    end
+end
+
+@testset "Multiplication of Q by special matrices" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        n = 5
+        Q, R = qr(randn(T,n,n))
+        Qmat = Matrix(Q)
+        D = Diagonal(randn(T,n))
+        @test Q * D ≈ Qmat * D
+        @test D * Q ≈ D * Qmat
+        J = 2*I
+        @test Q * J ≈ Qmat * J
+        @test J * Q ≈ J * Qmat
+    end
+end
+
+@testset "copyto! for Q" begin
+    for T in (Float32, Float64, ComplexF32, ComplexF64)
+        n = 5
+        Q, R = qr(randn(T,n,n))
+        Qmat = Matrix(Q)
+        dest1 = similar(Q)
+        copyto!(dest1, Q)
+        @test dest1 ≈ Qmat
+        dest2 = PermutedDimsArray(similar(Q), (1, 2))
+        copyto!(dest2, Q)
+        @test dest2 ≈ Qmat
+        dest3 = PermutedDimsArray(similar(Q), (2, 1))
+        copyto!(dest3, Q)
+        @test dest3 ≈ Qmat
+    end
+end
+
+@testset "adjoint of QR" begin
+    n = 5
+    B = randn(5, 2)
+
+    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
+        @testset "size(A)=$(size(A))" for A in (
+            randn(n, n),
+            # Wide problems become minimum norm (in x) problems similarly to LQ
+            randn(n + 2, n),
+            complex.(randn(n, n), randn(n, n)))
+
+            @testset "QRCompactWY" begin
+                F = qr(A)
+                x = F'\b
+                @test x ≈ A'\b
+                @test length(size(x)) == length(size(b))
+            end
+
+            @testset "QR" begin
+                F = LinearAlgebra.qrfactUnblocked!(copy(A))
+                x = F'\b
+                @test x ≈ A'\b
+                @test length(size(x)) == length(size(b))
+            end
+
+            @testset "QRPivoted" begin
+                F = LinearAlgebra.qr(A, ColumnNorm())
+                x = F'\b
+                @test x ≈ A'\b
+                @test length(size(x)) == length(size(b))
+            end
+        end
+        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n))'\b
+        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1))'\b
+        @test_throws DimensionMismatch("overdetermined systems are not supported")    LinearAlgebra.qrfactUnblocked!(randn(n - 2, n))'\b
+        @test_throws DimensionMismatch("arguments must have the same number of rows") LinearAlgebra.qrfactUnblocked!(randn(n, n + 1))'\b
+        @test_throws DimensionMismatch("overdetermined systems are not supported")    qr(randn(n - 2, n), ColumnNorm())'\b
+        @test_throws DimensionMismatch("arguments must have the same number of rows") qr(randn(n, n + 1), ColumnNorm())'\b
+    end
+end
+
+@testset "issue #38974" begin
+    A = qr(ones(3, 1))
+    B = I(3)
+    C = B*A.Q'
+    @test C ≈ A.Q
+    @test A.Q' * B ≈ A.Q
+end
+
+@testset "convert between eltypes" begin
+    a = rand(Float64, 10, 5)
+    qra = qr(a)
+    qrwy = LinearAlgebra.QRCompactWY{Float32}(qra.factors, qra.T)
+    @test Array(qrwy) ≈ Array(qr(Float32.(a)))
+    @test eltype(qrwy.factors) == eltype(qrwy.T) == Float32
+    qra = qr(a, ColumnNorm())
+    qrp = QRPivoted{Float32}(qra.factors, qra.τ, qra.jpvt)
+    @test Array(qrp) ≈ Array(qr(Float32.(a), ColumnNorm()))
+    @test eltype(qrp.factors) == eltype(qrp.τ) == Float32
+    a = rand(Float16, 10, 5)
+    qra = qr(a)
+    qrnonblas = QR{ComplexF16}(qra.factors, qra.τ)
+    @test Array(qrnonblas) ≈ Array(qr(ComplexF16.(a)))
+    @test eltype(qrnonblas.factors) == eltype(qrnonblas.τ) == ComplexF16
+end
+
+# We use approximate equals to get MKL.jl tests to pass.
+@testset "optimized getindex for an AbstractQ" begin
+    for T in [Float64, ComplexF64]
+        Q = qr(rand(T, 4, 4))
+        Q2 = Q.Q
+        M = Matrix(Q2)
+        for j in axes(M, 2)
+            @test Q2[:, j] ≈ M[:, j]
+            for i in axes(M, 1)
+                @test Q2[i, :] ≈ M[i, :]
+                @test Q2[i, j] ≈ M[i, j]
+            end
+        end
+        @test Q2[:] ≈ M[:]
+        @test Q2[:, :] ≈ M[:, :]
+        @test Q2[:, :, :] ≈ M[:, :, :]
+    end
+    # Check that getindex works if copy returns itself (#44729)
+    struct MyIdentity{T} <: LinearAlgebra.AbstractQ{T} end
+    Base.size(::MyIdentity, dim::Integer) = dim in (1,2) ? 2 : 1
+    Base.size(::MyIdentity) = (2, 2)
+    Base.copy(J::MyIdentity) = J
+    LinearAlgebra.lmul!(::MyIdentity{T}, M::Array{T}) where {T} = M
+    @test MyIdentity{Float64}()[1,:] == [1.0, 0.0]
+end
+
 end # module TestQR
diff --git a/stdlib/LinearAlgebra/test/schur.jl b/stdlib/LinearAlgebra/test/schur.jl
index feb0ef8513b897..d047ca12abc1f2 100644
--- a/stdlib/LinearAlgebra/test/schur.jl
+++ b/stdlib/LinearAlgebra/test/schur.jl
@@ -132,6 +132,74 @@ aimg  = randn(n,n)/2
         @test Z == A
         @test λ == zeros(0)
     end
+
+    if eltya <: Real
+        @testset "quasitriangular to triangular" begin
+            S = schur(a)
+            SC = Schur{Complex}(S)
+            @test eltype(SC) == complex(eltype(S))
+            @test istriu(SC.T)
+            @test SC.Z*SC.Z' ≈ I
+            @test SC.Z*SC.T*SC.Z' ≈ a
+            @test sort(SC.values,by=LinearAlgebra.eigsortby) ≈ sort(S.values,by=LinearAlgebra.eigsortby)
+            @test Schur{Complex}(SC) === SC === Schur{eltype(SC)}(SC)
+            @test Schur{eltype(S)}(S) === S
+            if eltype(S) === Float32
+                S64 = Schur{Float64}(S)
+                @test eltype(S64) == Float64
+                @test S64.Z == S.Z
+                @test S64.T == S.T
+                @test S64.values == S.values
+            end
+        end
+    end
+
+    @testset "0x0 $eltya matrices" begin
+        A = zeros(eltya, 0, 0)
+        B = zeros(eltya, 0, 0)
+        S = LinearAlgebra.schur(A, B)
+        @test S.S == A
+        @test S.T == A
+        @test S.Q == A
+        @test S.Z == A
+        @test S.alpha == zeros(0)
+        @test S.beta == zeros(0)
+    end
+end
+
+@testset "Generalized Schur convergence" begin
+    # Check for convergence issues, #40279
+    problematic_pencils = [
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493963 -6.625127119626611 0.0 0.0 0.0 0.0 0.0 -1.0; -3.312563559813306 3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 3.7796350217469814 0.0 0.0 -3.3125635598133054 0.0 0.0 0.0 -1.0 6.418270043493963 0.0 0.0 -6.625127119626611 0.0 0.0; 0.0 0.0 0.0 3.779635021746982 -3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0 6.418270043493964 -6.625127119626612 0.0 -1.0 0.0; 0.0 0.0 0.0 -3.3125635598133054 3.7796350217469814 0.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626611 6.418270043493963 -1.0 0.0 0.0; 0.0 0.0 -3.312563559813306 0.0 0.0 3.779635021746982 0.0 0.0 0.0 0.0 -6.625127119626612 0.0 -1.0 6.418270043493964 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 3.7796350217469814 -3.3125635598133054 0.0 0.0 0.0 -1.0 0.0 0.0 6.418270043493963 -6.625127119626611; 0.0 0.0 0.0 0.0 0.0 0.0 -3.312563559813306 3.779635021746982 -1.0 0.0 0.0 0.0 0.0 0.0 -6.625127119626612 6.418270043493964],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 0.0 0.0 3.312563559813306 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.779635021746982 3.3125635598133054 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.312563559813306 -3.7796350217469814 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 0.0 0.0 -3.779635021746982 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -3.7796350217469814 3.312563559813306; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.3125635598133054 -3.779635021746982]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.62 -1.0 0.0 0.0 0.0 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 -1.0 -2.62 0.0 0.0 0.0 0.0 0.0; 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0 0.0; 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62 0.0; 0.0 0.0 0.0 0.0 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0 -1.0 0.0 0.0 0.0 0.0 0.0 -2.62],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 0.0 0.0 0.0 0.0 -1.0; -0.10323794456968927 0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.33748484079831426 0.0 0.0 -0.10323794456968927 0.0 0.0 0.0 -1.0 -2.5940303184033713 0.0 0.0 -0.20647588913937853 0.0 0.0; 0.0 0.0 0.0 0.3374848407983142 -0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853 0.0 -1.0 0.0; 0.0 0.0 0.0 -0.10323794456968927 0.33748484079831426 0.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713 -1.0 0.0 0.0; 0.0 0.0 -0.10323794456968927 0.0 0.0 0.3374848407983142 0.0 0.0 0.0 0.0 -0.20647588913937853 0.0 -1.0 -2.5940303184033713 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.33748484079831426 -0.10323794456968927 0.0 0.0 0.0 -1.0 0.0 0.0 -2.5940303184033713 -0.20647588913937853; 0.0 0.0 0.0 0.0 0.0 0.0 -0.10323794456968927 0.3374848407983142 -1.0 0.0 0.0 0.0 0.0 0.0 -0.20647588913937853 -2.5940303184033713],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.0 0.0 0.10323794456968927 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.3374848407983142 0.10323794456968927 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.33748484079831426 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 0.0 0.0 -0.3374848407983142 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -0.33748484079831426 0.10323794456968927; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.10323794456968927 -0.3374848407983142]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 1.7391668762048442 -1.309613611600033 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.619227223200066 0.0 -1.0 0.0 0.0 0.0 0.0 0.0 0.0; -1.3096136116000332 1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.6192272232000664 2.150333752409688 -1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.739166876204844 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 -1.0 2.150333752409688 0.0 0.0 -2.6192272232000664 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 -1.0 0.0 0.0 2.150333752409688 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0; 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 2.150333752409688 -1.0 0.0 0.0 0.0 -2.619227223200066; 0.0 0.0 -1.309613611600033 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 -1.0 2.150333752409688 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.739166876204844 -1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2.150333752409688 -2.6192272232000664 0.0 -1.0; 0.0 0.0 0.0 0.0 0.0 0.0 -1.309613611600033 1.7391668762048442 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -2.619227223200066 2.150333752409688 -1.0 0.0; 0.0 0.0 0.0 -1.309613611600033 0.0 0.0 0.0 0.0 1.7391668762048442 0.0 0.0 0.0 0.0 -2.619227223200066 0.0 0.0 0.0 -1.0 2.150333752409688 0.0; 0.0 0.0 0.0 0.0 -1.3096136116000332 0.0 0.0 0.0 0.0 1.739166876204844 0.0 0.0 0.0 0.0 -2.6192272232000664 0.0 -1.0 0.0 0.0 2.150333752409688],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 1.3096136116000332 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 -1.739166876204844 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 0.0 0.0 0.0 0.0 1.309613611600033 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0 1.3096136116000332; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 -1.7391668762048442 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.739166876204844 1.309613611600033 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 -1.7391668762048442 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.3096136116000332 0.0 0.0 0.0 0.0 -1.7391668762048442 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.309613611600033 0.0 0.0 0.0 0.0 -1.739166876204844]
+        ),
+        (   ComplexF64[0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0; 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007; 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769246 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230784 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.90076923076925 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -1.0000000000000007 -12.019230769230788; -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769244 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.490384615384624 -1.0000000000000007 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -12.019230769230784 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 11.900769230769248],
+            ComplexF64[1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615393 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615392 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384622 0.0 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624 0.0; 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6.009615384615394 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 -6.490384615384624]
+        )]
+
+    for (A, B) in problematic_pencils
+        f = schur(A, B)
+        @test f.Q*f.S*f.Z' ≈ A
+        @test f.Q*f.T*f.Z' ≈ B
+    end
+end
+
+@testset "adjoint and transpose for schur (#40941)" begin
+    A = rand(3, 3)
+    B = schur(A', A)
+    C = B.left*B.S*B.right'
+    D = schur(transpose(A), A)
+    E = D.left*D.S*D.right'
+    @test A' ≈ C ≈ E
 end
 
 end # module TestSchur
diff --git a/stdlib/LinearAlgebra/test/special.jl b/stdlib/LinearAlgebra/test/special.jl
index c23371f3d072e0..234f9f472557b7 100644
--- a/stdlib/LinearAlgebra/test/special.jl
+++ b/stdlib/LinearAlgebra/test/special.jl
@@ -2,7 +2,7 @@
 
 module TestSpecial
 
-using Test, LinearAlgebra, SparseArrays, Random
+using Test, LinearAlgebra, Random
 using LinearAlgebra: rmul!
 
 n= 10 #Size of matrix to test
@@ -104,6 +104,28 @@ Random.seed!(1)
             @test LowerTriangular(C) == LowerTriangular(Cdense)
         end
     end
+
+    @testset "Matrix constructor for !isa(zero(T), T)" begin
+        # the following models JuMP.jl's VariableRef and AffExpr, resp.
+        struct TypeWithoutZero end
+        struct TypeWithZero end
+        Base.promote_rule(::Type{TypeWithoutZero}, ::Type{TypeWithZero}) = TypeWithZero
+        Base.convert(::Type{TypeWithZero}, ::TypeWithoutZero) = TypeWithZero()
+        Base.zero(::Type{<:Union{TypeWithoutZero, TypeWithZero}}) = TypeWithZero()
+        LinearAlgebra.symmetric(::TypeWithoutZero, ::Symbol) = TypeWithoutZero()
+        Base.transpose(::TypeWithoutZero) = TypeWithoutZero()
+        d  = fill(TypeWithoutZero(), 3)
+        du = fill(TypeWithoutZero(), 2)
+        dl = fill(TypeWithoutZero(), 2)
+        D  = Diagonal(d)
+        Bu = Bidiagonal(d, du, :U)
+        Bl = Bidiagonal(d, dl, :L)
+        Tri = Tridiagonal(dl, d, du)
+        Sym = SymTridiagonal(d, dl)
+        for M in (D, Bu, Bl, Tri, Sym)
+            @test Matrix(M) == zeros(TypeWithZero, 3, 3)
+        end
+    end
 end
 
 @testset "Binary ops among special types" begin
@@ -145,7 +167,7 @@ end
     LoBi = Bidiagonal(rand(20,20), :L)
     Sym = SymTridiagonal(rand(20), rand(19))
     Dense = rand(20, 20)
-    mats = [UpTri, LoTri, Diag, Tridiag, UpBi, LoBi, Sym, Dense]
+    mats = Any[UpTri, LoTri, Diag, Tridiag, UpBi, LoBi, Sym, Dense]
 
     for op in (+,-,*)
         for A in mats
@@ -160,7 +182,7 @@ end
     diag = 1:5
     offdiag = 1:4
     uniformscalingmats = [UniformScaling(3), UniformScaling(1.0), UniformScaling(3//5), UniformScaling(ComplexF64(1.3, 3.5))]
-    mats = [Diagonal(diag), Bidiagonal(diag, offdiag, 'U'), Bidiagonal(diag, offdiag, 'L'), Tridiagonal(offdiag, diag, offdiag), SymTridiagonal(diag, offdiag)]
+    mats = Any[Diagonal(diag), Bidiagonal(diag, offdiag, 'U'), Bidiagonal(diag, offdiag, 'L'), Tridiagonal(offdiag, diag, offdiag), SymTridiagonal(diag, offdiag)]
     for T in [ComplexF64, Int64, Rational{Int64}, Float64]
         push!(mats, Diagonal(Vector{T}(diag)))
         push!(mats, Bidiagonal(Vector{T}(diag), Vector{T}(offdiag), 'U'))
@@ -188,20 +210,24 @@ end
 
 
 @testset "Triangular Types and QR" begin
-    for typ in [UpperTriangular,LowerTriangular,LinearAlgebra.UnitUpperTriangular,LinearAlgebra.UnitLowerTriangular]
+    for typ in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
         a = rand(n,n)
         atri = typ(a)
+        matri = Matrix(atri)
         b = rand(n,n)
-        qrb = qr(b,Val(true))
-        @test *(atri, adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
-        @test rmul!(copy(atri), adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
-        qrb = qr(b,Val(false))
-        @test *(atri, adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
-        @test rmul!(copy(atri), adjoint(qrb.Q)) ≈ Matrix(atri) * qrb.Q'
+        qrb = qr(b, ColumnNorm())
+        @test atri * qrb.Q ≈ matri * qrb.Q ≈ rmul!(copy(atri), qrb.Q)
+        @test atri * qrb.Q' ≈ matri * qrb.Q' ≈ rmul!(copy(atri), qrb.Q')
+        @test qrb.Q * atri ≈ qrb.Q * matri ≈ lmul!(qrb.Q, copy(atri))
+        @test qrb.Q' * atri ≈ qrb.Q' * matri ≈ lmul!(qrb.Q', copy(atri))
+        qrb = qr(b, NoPivot())
+        @test atri * qrb.Q ≈ matri * qrb.Q ≈ rmul!(copy(atri), qrb.Q)
+        @test atri * qrb.Q' ≈ matri * qrb.Q' ≈ rmul!(copy(atri), qrb.Q')
+        @test qrb.Q * atri ≈ qrb.Q * matri ≈ lmul!(qrb.Q, copy(atri))
+        @test qrb.Q' * atri ≈ qrb.Q' * matri ≈ lmul!(qrb.Q', copy(atri))
     end
 end
 
-# should all yield sparse arrays
 @testset "concatenations of combinations of special and other matrix types" begin
     N = 4
     # Test concatenating pairwise combinations of special matrices
@@ -211,123 +237,104 @@ end
     symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
     specialmats = (diagmat, bidiagmat, tridiagmat, symtridiagmat)
     for specialmata in specialmats, specialmatb in specialmats
-        @test issparse(hcat(specialmata, specialmatb))
-        @test issparse(vcat(specialmata, specialmatb))
-        @test issparse(hvcat((1,1), specialmata, specialmatb))
-        @test issparse(cat(specialmata, specialmatb; dims=(1,2)))
+        MA = Matrix(specialmata); MB = Matrix(specialmatb)
+        @test hcat(specialmata, specialmatb) == hcat(MA, MB)
+        @test vcat(specialmata, specialmatb) == vcat(MA, MB)
+        @test hvcat((1,1), specialmata, specialmatb) == hvcat((1,1), MA, MB)
+        @test cat(specialmata, specialmatb; dims=(1,2)) == cat(MA, MB; dims=(1,2))
     end
     # Test concatenating pairwise combinations of special matrices with sparse matrices,
     # dense matrices, or dense vectors
     densevec = fill(1., N)
     densemat = diagm(0 => densevec)
-    spmat = spdiagm(0 => densevec)
     for specialmat in specialmats
+        SM = Matrix(specialmat)
         # --> Tests applicable only to pairs of matrices
-        for othermat in (spmat, densemat)
-            @test issparse(vcat(specialmat, othermat))
-            @test issparse(vcat(othermat, specialmat))
-        end
+        @test vcat(specialmat, densemat) == vcat(SM, densemat)
+        @test vcat(densemat, specialmat) == vcat(densemat, SM)
         # --> Tests applicable also to pairs including vectors
-        for specialmat in specialmats, othermatorvec in (spmat, densemat, densevec)
-            @test issparse(hcat(specialmat, othermatorvec))
-            @test issparse(hcat(othermatorvec, specialmat))
-            @test issparse(hvcat((2,), specialmat, othermatorvec))
-            @test issparse(hvcat((2,), othermatorvec, specialmat))
-            @test issparse(cat(specialmat, othermatorvec; dims=(1,2)))
-            @test issparse(cat(othermatorvec, specialmat; dims=(1,2)))
+        for specialmat in specialmats, othermatorvec in (densemat, densevec)
+            SM = Matrix(specialmat); OM = Array(othermatorvec)
+            @test hcat(specialmat, othermatorvec) == hcat(SM, OM)
+            @test hcat(othermatorvec, specialmat) == hcat(OM, SM)
+            @test hvcat((2,), specialmat, othermatorvec) == hvcat((2,), SM, OM)
+            @test hvcat((2,), othermatorvec, specialmat) == hvcat((2,), OM, SM)
+            @test cat(specialmat, othermatorvec; dims=(1,2)) == cat(SM, OM; dims=(1,2))
+            @test cat(othermatorvec, specialmat; dims=(1,2)) == cat(OM, SM; dims=(1,2))
         end
     end
 end
 
-# Test that concatenations of annotated sparse/special matrix types with other matrix
-# types yield sparse arrays, and that the code which effects that does not make concatenations
-# strictly involving un/annotated dense matrices yield sparse arrays
-#
-# TODO: As with the associated code, these tests should be moved to a more appropriate
-# location, particularly some future equivalent of base/linalg/special.jl dedicated to
-# intereactions between a broader set of matrix types
 @testset "concatenations of annotated types" begin
     N = 4
     # The tested annotation types
     testfull = Bool(parse(Int,(get(ENV, "JULIA_TESTFULL", "0"))))
-    utriannotations = (UpperTriangular, LinearAlgebra.UnitUpperTriangular)
-    ltriannotations = (LowerTriangular, LinearAlgebra.UnitLowerTriangular)
+    utriannotations = (UpperTriangular, UnitUpperTriangular)
+    ltriannotations = (LowerTriangular, UnitLowerTriangular)
     triannotations = (utriannotations..., ltriannotations...)
     symannotations = (Symmetric, Hermitian)
     annotations = testfull ? (triannotations..., symannotations...) : (LowerTriangular, Symmetric)
-    # Concatenations involving these types, un/annotated, should yield sparse arrays
-    spvec = spzeros(N)
-    spmat = sparse(1.0I, N, N)
+    # Concatenations involving these types, un/annotated
     diagmat = Diagonal(1:N)
     bidiagmat = Bidiagonal(1:N, 1:(N-1), :U)
     tridiagmat = Tridiagonal(1:(N-1), 1:N, 1:(N-1))
     symtridiagmat = SymTridiagonal(1:N, 1:(N-1))
-    sparseconcatmats = testfull ? (spmat, diagmat, bidiagmat, tridiagmat, symtridiagmat) : (spmat, diagmat)
-    # Concatenations involving strictly these types, un/annotated, should yield dense arrays
+    specialconcatmats = testfull ? (diagmat, bidiagmat, tridiagmat, symtridiagmat) : (diagmat,)
+    # Concatenations involving strictly these types, un/annotated
     densevec = fill(1., N)
     densemat = fill(1., N, N)
     # Annotated collections
     annodmats = [annot(densemat) for annot in annotations]
-    annospcmats = [annot(spcmat) for annot in annotations, spcmat in sparseconcatmats]
-    # Test that concatenations of pairwise combinations of annotated sparse/special
-    # yield sparse matrices
+    annospcmats = [annot(spcmat) for annot in annotations, spcmat in specialconcatmats]
+    # Test concatenations of pairwise combinations of annotated special matrices
     for annospcmata in annospcmats, annospcmatb in annospcmats
-        @test issparse(vcat(annospcmata, annospcmatb))
-        @test issparse(hcat(annospcmata, annospcmatb))
-        @test issparse(hvcat((2,), annospcmata, annospcmatb))
-        @test issparse(cat(annospcmata, annospcmatb; dims=(1,2)))
+        AM = Array(annospcmata); BM = Array(annospcmatb)
+        @test vcat(annospcmata, annospcmatb) == vcat(AM, BM)
+        @test hcat(annospcmata, annospcmatb) == hcat(AM, BM)
+        @test hvcat((2,), annospcmata, annospcmatb) == hvcat((2,), AM, BM)
+        @test cat(annospcmata, annospcmatb; dims=(1,2)) == cat(AM, BM; dims=(1,2))
     end
-    # Test that concatenations of pairwise combinations of annotated sparse/special
-    # matrices and other matrix/vector types yield sparse matrices
+    # Test concatenations of pairwise combinations of annotated special matrices and other matrix/vector types
     for annospcmat in annospcmats
+        AM = Array(annospcmat)
         # --> Tests applicable to pairs including only matrices
-        for othermat in (densemat, annodmats..., sparseconcatmats...)
-            @test issparse(vcat(annospcmat, othermat))
-            @test issparse(vcat(othermat, annospcmat))
+        for othermat in (densemat, annodmats..., specialconcatmats...)
+            OM = Array(othermat)
+            @test vcat(annospcmat, othermat) == vcat(AM, OM)
+            @test vcat(othermat, annospcmat) == vcat(OM, AM)
         end
         # --> Tests applicable to pairs including other vectors or matrices
-        for other in (spvec, densevec, densemat, annodmats..., sparseconcatmats...)
-            @test issparse(hcat(annospcmat, other))
-            @test issparse(hcat(other, annospcmat))
-            @test issparse(hvcat((2,), annospcmat, other))
-            @test issparse(hvcat((2,), other, annospcmat))
-            @test issparse(cat(annospcmat, other; dims=(1,2)))
-            @test issparse(cat(other, annospcmat; dims=(1,2)))
+        for other in (densevec, densemat, annodmats..., specialconcatmats...)
+            OM = Array(other)
+            @test hcat(annospcmat, other) == hcat(AM, OM)
+            @test hcat(other, annospcmat) == hcat(OM, AM)
+            @test hvcat((2,), annospcmat, other) == hvcat((2,), AM, OM)
+            @test hvcat((2,), other, annospcmat) == hvcat((2,), OM, AM)
+            @test cat(annospcmat, other; dims=(1,2)) == cat(AM, OM; dims=(1,2))
+            @test cat(other, annospcmat; dims=(1,2)) == cat(OM, AM; dims=(1,2))
         end
     end
-    # The preceding tests should cover multi-way combinations of those types, but for good
-    # measure test a few multi-way combinations involving those types
-    @test issparse(vcat(spmat, densemat, annospcmats[1], annodmats[2]))
-    @test issparse(vcat(densemat, spmat, annodmats[1], annospcmats[2]))
-    @test issparse(hcat(spvec, annodmats[1], annospcmats[3], densevec, diagmat))
-    @test issparse(hcat(annodmats[2], annospcmats[4], spvec, densevec, diagmat))
-    @test issparse(hvcat((5,), diagmat, densevec, spvec, annodmats[1], annospcmats[1]))
-    @test issparse(hvcat((5,), spvec, annodmats[2], diagmat, densevec, annospcmats[2]))
-    @test issparse(cat(annodmats[1], diagmat, annospcmats[3], densevec, spvec; dims=(1,2)))
-    @test issparse(cat(spvec, diagmat, densevec, annospcmats[4], annodmats[2]; dims=(1,2)))
-    # Test that concatenations strictly involving un/annotated dense matrices/vectors
-    # yield dense arrays
+    # Test concatenations strictly involving un/annotated dense matrices/vectors
     for densemata in (densemat, annodmats...)
+        AM = Array(densemata)
         # --> Tests applicable to pairs including only matrices
         for densematb in (densemat, annodmats...)
-            @test !issparse(vcat(densemata, densematb))
-            @test !issparse(vcat(densematb, densemata))
+            BM = Array(densematb)
+            @test vcat(densemata, densematb) == vcat(AM, BM)
+            @test vcat(densematb, densemata) == vcat(BM, AM)
         end
         # --> Tests applicable to pairs including vectors or matrices
         for otherdense in (densevec, densemat, annodmats...)
-            @test !issparse(hcat(densemata, otherdense))
-            @test !issparse(hcat(otherdense, densemata))
-            @test !issparse(hvcat((2,), densemata, otherdense))
-            @test !issparse(hvcat((2,), otherdense, densemata))
-            @test !issparse(cat(densemata, otherdense; dims=(1,2)))
-            @test !issparse(cat(otherdense, densemata; dims=(1,2)))
+            OM = Array(otherdense)
+            @test hcat(densemata, otherdense) == hcat(AM, OM)
+            @test hcat(otherdense, densemata) == hcat(OM, AM)
+            @test hvcat((2,), densemata, otherdense) == hvcat((2,), AM, OM)
+            @test hvcat((2,), otherdense, densemata) == hvcat((2,), OM, AM)
+            @test cat(densemata, otherdense; dims=(1,2)) == cat(AM, OM; dims=(1,2))
+            @test cat(otherdense, densemata; dims=(1,2)) == cat(OM, AM; dims=(1,2))
         end
     end
 end
-@testset "vcat of Vectors with SparseVectors should yield SparseVector (#22225)" begin
-    @test isa((@inferred vcat(Float64[], spzeros(1))), SparseVector)
-end
-
 
 # for testing types with a dimension
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
@@ -341,7 +348,7 @@ using .Main.Furlongs
         Bl = Bidiagonal(rand(elty, 10), rand(elty, 9), 'L')
         T = Tridiagonal(rand(elty, 9),rand(elty, 10), rand(elty, 9))
         S = SymTridiagonal(rand(elty, 10), rand(elty, 9))
-        mats = [D, Bu, Bl, T, S]
+        mats = Any[D, Bu, Bl, T, S]
         for A in mats
             @test iszero(zero(A))
             @test isone(one(A))
@@ -395,12 +402,18 @@ using .Main.Furlongs
     @test one(S) isa SymTridiagonal
 
     # eltype with dimensions
-    D = Diagonal{Furlong{2, Int64}}([1, 2, 3, 4])
-    Bu = Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4], [1, 2, 3], 'U')
-    Bl =  Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4], [1, 2, 3], 'L')
-    T = Tridiagonal{Furlong{2, Int64}}([1, 2, 3], [1, 2, 3, 4], [1, 2, 3])
-    S = SymTridiagonal{Furlong{2, Int64}}([1, 2, 3, 4], [1, 2, 3])
-    mats = [D, Bu, Bl, T, S]
+    D0 = Diagonal{Furlong{0, Int64}}([1, 2, 3, 4])
+    Bu0 = Bidiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3], 'U')
+    Bl0 =  Bidiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3], 'L')
+    T0 = Tridiagonal{Furlong{0, Int64}}([1, 2, 3], [1, 2, 3, 4], [1, 2, 3])
+    S0 = SymTridiagonal{Furlong{0, Int64}}([1, 2, 3, 4], [1, 2, 3])
+    F2 = Furlongs.Furlong{2}(1)
+    D2 = Diagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2)
+    Bu2 = Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2, 'U')
+    Bl2 =  Bidiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2, 'L')
+    T2 = Tridiagonal{Furlong{2, Int64}}([1, 2, 3].*F2, [1, 2, 3, 4].*F2, [1, 2, 3].*F2)
+    S2 = SymTridiagonal{Furlong{2, Int64}}([1, 2, 3, 4].*F2, [1, 2, 3].*F2)
+    mats = Any[D0, Bu0, Bl0, T0, S0, D2, Bu2, Bl2, T2, S2]
     for A in mats
         @test iszero(zero(A))
         @test isone(one(A))
@@ -434,4 +447,69 @@ end
     end
 end
 
+@testset "BiTriSym*Q' and Q'*BiTriSym" begin
+    dl = [1, 1, 1]
+    d = [1, 1, 1, 1]
+    D = Diagonal(d)
+    Bi = Bidiagonal(d, dl, :L)
+    Tri = Tridiagonal(dl, d, dl)
+    Sym = SymTridiagonal(d, dl)
+    F = qr(ones(4, 1))
+    A = F.Q'
+    for A in (F.Q, F.Q'), B in (D, Bi, Tri, Sym)
+        @test B*A ≈ Matrix(B)*A
+        @test A*B ≈ A*Matrix(B)
+    end
+end
+
+@testset "Ops on SymTridiagonal ev has the same length as dv" begin
+    x = rand(3)
+    y = rand(3)
+    z = rand(2)
+
+    S = SymTridiagonal(x, y)
+    T = Tridiagonal(z, x, z)
+    Bu = Bidiagonal(x, z, :U)
+    Bl = Bidiagonal(x, z, :L)
+
+    Ms = Matrix(S)
+    Mt = Matrix(T)
+    Mbu = Matrix(Bu)
+    Mbl = Matrix(Bl)
+
+    @test S + T ≈ Ms + Mt
+    @test T + S ≈ Mt + Ms
+    @test S + Bu ≈ Ms + Mbu
+    @test Bu + S ≈ Mbu + Ms
+    @test S + Bl ≈ Ms + Mbl
+    @test Bl + S ≈ Mbl + Ms
+end
+
+@testset "Ensure Strided * (Sym)Tridiagonal is Dense" begin
+    x = rand(3)
+    y = rand(3)
+    z = rand(2)
+
+    l = rand(12, 12)
+    # strided but not a Matrix
+    v = @view l[1:4:end, 1:4:end]
+    M_v = Matrix(v)
+    m = rand(3, 3)
+
+    S = SymTridiagonal(x, y)
+    T = Tridiagonal(z, x, z)
+    M_S = Matrix(S)
+    M_T = Matrix(T)
+
+    @test m * T ≈ m * M_T
+    @test m * S ≈ m * M_S
+    @test v * T ≈ M_v * T
+    @test v * S ≈ M_v * S
+
+    @test m * T isa Matrix
+    @test m * S isa Matrix
+    @test v * T isa Matrix
+    @test v * S isa Matrix
+end
+
 end # module TestSpecial
diff --git a/stdlib/LinearAlgebra/test/structuredbroadcast.jl b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
index eed0048991525c..4aeca31a79a03d 100644
--- a/stdlib/LinearAlgebra/test/structuredbroadcast.jl
+++ b/stdlib/LinearAlgebra/test/structuredbroadcast.jl
@@ -206,21 +206,34 @@ end
 
 end
 
-struct Zero end
-Base.iszero(::Zero) = true
-Base.zero(::Type{Zero}) = Zero()
+struct Zero36193 end
+Base.iszero(::Zero36193) = true
+LinearAlgebra.iszerodefined(::Type{Zero36193}) = true
 @testset "PR #36193" begin
-    z = Zero()
-    Z = [z z
-         z z]
-    zz = [z, z]
-    U = UpperTriangular(Z)
-    L = LowerTriangular(Z)
-    D = Diagonal(zz)
-    for a in [U, L, D]
-        @test identity.(a) isa typeof(a)
-        @test map(identity, a) isa typeof(a)
+    f(::Union{Int, Zero36193}) = Zero36193()
+    function test(el)
+        M = [el el
+             el el]
+        v = [el, el]
+        U = UpperTriangular(M)
+        L = LowerTriangular(M)
+        D = Diagonal(v)
+        for (T, A) in [(UpperTriangular, U), (LowerTriangular, L), (Diagonal, D)]
+            @test identity.(A) isa typeof(A)
+            @test map(identity, A) isa typeof(A)
+            @test f.(A) isa T{Zero36193}
+            @test map(f, A) isa T{Zero36193}
+        end
     end
+    # This should not need `zero(::Type{Zero36193})` to be defined
+    test(1)
+    Base.zero(::Type{Zero36193}) = Zero36193()
+    # This should not need `==(::Zero36193, ::Int)` to be defined as `iszerodefined`
+    # returns true.
+    test(Zero36193())
 end
 
+# structured broadcast with function returning non-number type
+@test tuple.(Diagonal([1, 2])) == [(1,) (0,); (0,) (2,)]
+
 end
diff --git a/stdlib/LinearAlgebra/test/svd.jl b/stdlib/LinearAlgebra/test/svd.jl
index d83d2de0f3c880..8bd3edadc911df 100644
--- a/stdlib/LinearAlgebra/test/svd.jl
+++ b/stdlib/LinearAlgebra/test/svd.jl
@@ -8,6 +8,26 @@ using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
 @testset "Simple svdvals / svd tests" begin
     ≊(x,y) = isapprox(x,y,rtol=1e-15)
 
+    m = [2, 0]
+    @test @inferred(svdvals(m)) ≊ [2]
+    @test @inferred(svdvals!(float(m))) ≊ [2]
+    for sf in (@inferred(svd(m)), @inferred(svd!(float(m))))
+        @test sf.S ≊ [2]
+        @test sf.U'sf.U ≊ [1]
+        @test sf.Vt'sf.Vt ≊ [1]
+        @test sf.U*Diagonal(sf.S)*sf.Vt' ≊ m
+    end
+    F = @inferred svd(m, full=true)
+    @test size(F.U) == (2, 2)
+    @test F.S ≊ [2]
+    @test F.U'F.U ≊ Matrix(I, 2, 2)
+    @test F.Vt'*F.Vt ≊ [1]
+    @test @inferred(svdvals(3:4)) ≊ [5]
+    A = Matrix(1.0I, 2, 2)
+    Z = svd(Hermitian(A); full=true)
+    @test Z.S ≈ ones(2)
+    @test Z.U'Z.U ≈ I(2)
+
     m1 = [2 0; 0 0]
     m2 = [2 -2; 1 1]/sqrt(2)
     m2c = Complex.([2 -2; 1 1]/sqrt(2))
@@ -15,8 +35,8 @@ using LinearAlgebra: BlasComplex, BlasFloat, BlasReal, QRPivoted
     @test @inferred(svdvals(m2))  ≊ [2, 1]
     @test @inferred(svdvals(m2c)) ≊ [2, 1]
 
-    sf1 = svd(m1)
-    sf2 = svd(m2)
+    sf1 = @inferred svd(m1)
+    sf2 = @inferred svd(m2)
     @test sf1.S ≊ [2, 0]
     @test sf2.S ≊ [2, 1]
     # U & Vt are unitary
@@ -201,4 +221,44 @@ end
     @test Uc * diagm(0=>Sc) * transpose(V) ≈ complex.(A) rtol=1e-3
 end
 
+@testset "Issue 40944. ldiV!(SVD) should update rhs" begin
+    F = svd(randn(2, 2))
+    b = randn(2)
+    x = ldiv!(F, b)
+    @test x === b
+end
+
+@testset "adjoint of SVD" begin
+    n = 5
+    B = randn(5, 2)
+
+    @testset "size(b)=$(size(b))" for b in (B[:, 1], B)
+        @testset "size(A)=$(size(A))" for A in (
+            randn(n, n),
+            # Wide problems become minimum norm (in x) problems similarly to LQ
+            randn(n + 2, n),
+            randn(n - 2, n),
+            complex.(randn(n, n), randn(n, n)))
+
+            F = svd(A)
+            x = F'\b
+            @test x ≈ A'\b
+            @test length(size(x)) == length(size(b))
+        end
+    end
+end
+
+@testset "Float16" begin
+    A = Float16[4. 12. -16.; 12. 37. -43.; -16. -43. 98.]
+    B = svd(A)
+    B32 = svd(Float32.(A))
+    @test B isa SVD{Float16, Float16, Matrix{Float16}}
+    @test B.U isa Matrix{Float16}
+    @test B.Vt isa Matrix{Float16}
+    @test B.S isa Vector{Float16}
+    @test B.U ≈ B32.U
+    @test B.Vt ≈ B32.Vt
+    @test B.S ≈ B32.S
+end
+
 end # module TestSVD
diff --git a/stdlib/LinearAlgebra/test/symmetric.jl b/stdlib/LinearAlgebra/test/symmetric.jl
index 9f75d21d46b964..47a36df5e78836 100644
--- a/stdlib/LinearAlgebra/test/symmetric.jl
+++ b/stdlib/LinearAlgebra/test/symmetric.jl
@@ -2,7 +2,7 @@
 
 module TestSymmetric
 
-using Test, LinearAlgebra, SparseArrays, Random
+using Test, LinearAlgebra, Random
 
 Random.seed!(1010)
 
@@ -11,13 +11,19 @@ Random.seed!(1010)
     @test ishermitian(σ)
 end
 
+@testset "Two-dimensional Euler formula for Hermitian" begin
+    @test cis(Hermitian([π 0; 0 π])) ≈ -I
+end
+
 @testset "Hermitian matrix exponential/log" begin
     A1 = randn(4,4) + im*randn(4,4)
     A2 = A1 + A1'
     @test exp(A2) ≈ exp(Hermitian(A2))
+    @test cis(A2) ≈ cis(Hermitian(A2))
     @test log(A2) ≈ log(Hermitian(A2))
     A3 = A1 * A1' # posdef
     @test exp(A3) ≈ exp(Hermitian(A3))
+    @test cis(A3) ≈ cis(Hermitian(A3))
     @test log(A3) ≈ log(Hermitian(A3))
 
     A1 = randn(4,4)
@@ -54,6 +60,10 @@ end
                 @test Hermitian(Hermitian(aherm, :U), :U) === Hermitian(aherm, :U)
                 @test_throws ArgumentError Symmetric(Symmetric(asym, :U), :L)
                 @test_throws ArgumentError Hermitian(Hermitian(aherm, :U), :L)
+
+                @test_throws ArgumentError Symmetric(asym, :R)
+                @test_throws ArgumentError Hermitian(asym, :R)
+
                 # mixed cases with Hermitian/Symmetric
                 if eltya <: Real
                     @test Symmetric(Hermitian(aherm, :U))     === Symmetric(aherm, :U)
@@ -64,6 +74,11 @@ end
                     @test_throws ArgumentError Hermitian(Symmetric(aherm, :U), :L)
                 end
             end
+            @testset "diag" begin
+                D = Diagonal(x)
+                @test diag(Symmetric(D, :U))::Vector == x
+                @test diag(Hermitian(D, :U))::Vector == real(x)
+            end
             @testset "similar" begin
                 @test isa(similar(Symmetric(asym)), Symmetric{eltya})
                 @test isa(similar(Hermitian(aherm)), Hermitian{eltya})
@@ -257,6 +272,7 @@ end
                         @test abs.(eigen(Symmetric(asym), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                         @test abs.(eigen(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                         @test eigvals(Symmetric(asym), 1:2) ≈ d[1:2]
+                        @test eigvals(Symmetric(asym), sortby= x -> -x) ≈ eigvals(eigen(Symmetric(asym), sortby = x -> -x))
                         @test eigvals(Symmetric(asym), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
                         # eigen doesn't support Symmetric{Complex}
                         @test Matrix(eigen(asym)) ≈ asym
@@ -270,6 +286,7 @@ end
                     @test abs.(eigen(Hermitian(aherm), 1:2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                     @test abs.(eigen(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2).vectors'v[:,1:2]) ≈ Matrix(I, 2, 2)
                     @test eigvals(Hermitian(aherm), 1:2) ≈ d[1:2]
+                    @test eigvals(Hermitian(aherm), sortby= x -> -x) ≈ eigvals(eigen(Hermitian(aherm), sortby = x -> -x))
                     @test eigvals(Hermitian(aherm), d[1] - 1, (d[2] + d[3])/2) ≈ d[1:2]
                     @test Matrix(eigen(aherm)) ≈ aherm
                     @test eigvecs(Hermitian(aherm)) ≈ eigvecs(aherm)
@@ -526,20 +543,26 @@ end
     end
 end
 
-@testset "similar should preserve underlying storage type and uplo flag" begin
-    m, n = 4, 3
-    sparsemat = sprand(m, m, 0.5)
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    immutablemat = ImmutableArray([1 2 3; 4 5 6; 7 8 9])
     for SymType in (Symmetric, Hermitian)
-        symsparsemat = SymType(sparsemat)
-        @test isa(similar(symsparsemat), typeof(symsparsemat))
-        @test similar(symsparsemat).uplo == symsparsemat.uplo
-        @test isa(similar(symsparsemat, Float32), SymType{Float32,<:SparseMatrixCSC{Float32}})
-        @test similar(symsparsemat, Float32).uplo == symsparsemat.uplo
-        @test isa(similar(symsparsemat, (n, n)), typeof(sparsemat))
-        @test isa(similar(symsparsemat, Float32, (n, n)), SparseMatrixCSC{Float32})
+        S = Float64
+        symmat = SymType(immutablemat)
+        @test convert(AbstractArray{S}, symmat).data isa ImmutableArray{S}
+        @test convert(AbstractMatrix{S}, symmat).data isa ImmutableArray{S}
+        @test AbstractArray{S}(symmat).data isa ImmutableArray{S}
+        @test AbstractMatrix{S}(symmat).data isa ImmutableArray{S}
+        @test convert(AbstractArray{S}, symmat) == symmat
+        @test convert(AbstractMatrix{S}, symmat) == symmat
     end
 end
 
+
 @testset "#24572: eltype(A::HermOrSym) === eltype(parent(A))" begin
     A = rand(Float32, 3, 3)
     @test_throws TypeError Symmetric{Float64,Matrix{Float32}}(A, 'U')
@@ -678,4 +701,65 @@ end
     end
 end
 
+@testset "Multiplications symmetric/hermitian for $T and $S" for T in
+        (Float16, Float32, Float64, BigFloat), S in (ComplexF16, ComplexF32, ComplexF64)
+    let A = transpose(Symmetric(rand(S, 3, 3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
+        @test A * Bv ≈ Matrix(A) * Bv
+        @test A * Bm ≈ Matrix(A) * Bm
+        @test Bm * A ≈ Bm * Matrix(A)
+    end
+    let A = adjoint(Hermitian(rand(S, 3,3))), Bv = Vector(rand(T, 3)), Bm = Matrix(rand(T, 3,3))
+        @test A * Bv ≈ Matrix(A) * Bv
+        @test A * Bm ≈ Matrix(A) * Bm
+        @test Bm * A ≈ Bm * Matrix(A)
+    end
+    let Ahrs = transpose(Hermitian(Symmetric(rand(T, 3, 3)))),
+        Acs = transpose(Symmetric(rand(S, 3, 3))),
+        Ahcs = transpose(Hermitian(Symmetric(rand(S, 3, 3))))
+
+        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
+        @test Ahrs * Acs ≈ Ahrs * Matrix(Acs)
+        @test Acs * Acs ≈ Matrix(Acs) * Matrix(Acs)
+        @test Acs * Ahrs ≈ Matrix(Acs) * Ahrs
+        @test Ahrs * Ahcs ≈ Matrix(Ahrs) * Ahcs
+        @test Ahcs * Ahrs ≈ Ahcs * Matrix(Ahrs)
+    end
+    let Ahrs = adjoint(Hermitian(Symmetric(rand(T, 3, 3)))),
+        Acs = adjoint(Symmetric(rand(S, 3, 3))),
+        Ahcs = adjoint(Hermitian(Symmetric(rand(S, 3, 3))))
+
+        @test Ahrs * Ahrs ≈ Ahrs * Matrix(Ahrs)
+        @test Ahcs * Ahcs ≈ Matrix(Ahcs) * Matrix(Ahcs)
+        @test Ahrs * Ahcs ≈ Ahrs * Matrix(Ahcs)
+        @test Acs * Ahcs ≈ Acs * Matrix(Ahcs)
+        @test Ahcs * Ahrs ≈ Matrix(Ahcs) * Ahrs
+        @test Ahcs * Acs ≈ Matrix(Ahcs) * Acs
+    end
+end
+
+@testset "Addition/subtraction with SymTridiagonal" begin
+    TR = SymTridiagonal(randn(Float64,5), randn(Float64,4))
+    TC = SymTridiagonal(randn(ComplexF64,5), randn(ComplexF64,4))
+    SR = Symmetric(randn(Float64,5,5))
+    SC = Symmetric(randn(ComplexF64,5,5))
+    HR = Hermitian(randn(Float64,5,5))
+    HC = Hermitian(randn(ComplexF64,5,5))
+    for op = (+,-)
+        for T = (TR, TC), S = (SR, SC)
+            @test op(T, S) == op(Array(T), S)
+            @test op(S, T) == op(S, Array(T))
+            @test op(T, S) isa Symmetric
+            @test op(S, T) isa Symmetric
+        end
+        for H = (HR, HC)
+            for T = (TR, TC)
+                @test op(T, H) == op(Array(T), H)
+                @test op(H, T) == op(H, Array(T))
+            end
+            @test op(TR, H) isa Hermitian
+            @test op(H, TR) isa Hermitian
+        end
+    end
+end
+
 end # module TestSymmetric
diff --git a/stdlib/LinearAlgebra/test/testgroups b/stdlib/LinearAlgebra/test/testgroups
index b33dfecaa82eee..de082d8e7dce08 100644
--- a/stdlib/LinearAlgebra/test/testgroups
+++ b/stdlib/LinearAlgebra/test/testgroups
@@ -25,3 +25,4 @@ givens
 structuredbroadcast
 addmul
 ldlt
+factorization
diff --git a/stdlib/LinearAlgebra/test/triangular.jl b/stdlib/LinearAlgebra/test/triangular.jl
index 030b0229d140f6..d3c2817f894639 100644
--- a/stdlib/LinearAlgebra/test/triangular.jl
+++ b/stdlib/LinearAlgebra/test/triangular.jl
@@ -3,8 +3,8 @@
 module TestTriangular
 
 debug = false
-using Test, LinearAlgebra, SparseArrays, Random
-using LinearAlgebra: BlasFloat, errorbounds, full!, naivesub!, transpose!,
+using Test, LinearAlgebra, Random
+using LinearAlgebra: BlasFloat, errorbounds, full!, transpose!,
     UnitUpperTriangular, UnitLowerTriangular,
     mul!, rdiv!, rmul!, lmul!
 
@@ -158,9 +158,11 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
             @test copy(viewA1') == Matrix(viewA1)'
             # transpose!
             @test transpose!(copy(A1)) == transpose(A1)
+            @test typeof(transpose!(copy(A1))).name == typeof(transpose(A1)).name
             @test transpose!(t1(view(copy(A1).data, vrange, vrange))) == transpose(viewA1)
             # adjoint!
             @test adjoint!(copy(A1)) == adjoint(A1)
+            @test typeof(adjoint!(copy(A1))).name == typeof(adjoint(A1)).name
             @test adjoint!(t1(view(copy(A1).data, vrange, vrange))) == adjoint(viewA1)
         end
 
@@ -196,7 +198,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         end
 
         #exp/log
-        if (elty1 == Float64 || elty1 == ComplexF64) && (t1 == UpperTriangular || t1 == LowerTriangular)
+        if elty1 ∈ (Float32,Float64,ComplexF32,ComplexF64)
             @test exp(Matrix(log(A1))) ≈ A1
         end
 
@@ -277,10 +279,10 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
         @test ladb ≈ fladb atol=sqrt(eps(real(float(one(elty1)))))*n*n
 
         # Matrix square root
-        @test sqrt(A1) |> t -> t*t ≈ A1
+        @test sqrt(A1) |> (t -> (t*t)::typeof(t)) ≈ A1
 
         # naivesub errors
-        @test_throws DimensionMismatch naivesub!(A1,Vector{elty1}(undef,n+1))
+        @test_throws DimensionMismatch ldiv!(A1, Vector{elty1}(undef, n+1))
 
         # eigenproblems
         if !(elty1 in (BigFloat, Complex{BigFloat})) # Not handled yet
@@ -304,6 +306,10 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
             svdvals(A1)
         end
 
+        @test ((A1*A1)::t1) ≈ Matrix(A1) * Matrix(A1)
+        @test ((A1/A1)::t1) ≈ Matrix(A1) / Matrix(A1)
+        @test ((A1\A1)::t1) ≈ Matrix(A1) \ Matrix(A1)
+
         # Begin loop for second Triangular matrix
         for elty2 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
             for (t2, uplo2) in ((UpperTriangular, :U),
@@ -326,7 +332,7 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                 @test A1 + A2 == Matrix(A1) + Matrix(A2)
                 @test A1 - A2 == Matrix(A1) - Matrix(A2)
 
-                # Triangular-Triangualar multiplication and division
+                # Triangular-Triangular multiplication and division
                 @test A1*A2 ≈ Matrix(A1)*Matrix(A2)
                 @test transpose(A1)*A2 ≈ transpose(Matrix(A1))*Matrix(A2)
                 @test transpose(A1)*adjoint(A2) ≈ transpose(Matrix(A1))*adjoint(Matrix(A2))
@@ -338,6 +344,27 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                 @test A1'A2' ≈ Matrix(A1)'Matrix(A2)'
                 @test A1/A2 ≈ Matrix(A1)/Matrix(A2)
                 @test A1\A2 ≈ Matrix(A1)\Matrix(A2)
+                if uplo1 === :U && uplo2 === :U
+                    if t1 === UnitUpperTriangular && t2 === UnitUpperTriangular
+                        @test A1*A2 isa UnitUpperTriangular
+                        @test A1/A2 isa UnitUpperTriangular
+                        @test A1\A2 isa UnitUpperTriangular
+                    else
+                        @test A1*A2 isa UpperTriangular
+                        @test A1/A2 isa UpperTriangular
+                        @test A1\A2 isa UpperTriangular
+                    end
+                elseif uplo1 === :L && uplo2 === :L
+                    if t1 === UnitLowerTriangular && t2 === UnitLowerTriangular
+                        @test A1*A2 isa UnitLowerTriangular
+                        @test A1/A2 isa UnitLowerTriangular
+                        @test A1\A2 isa UnitLowerTriangular
+                    else
+                        @test A1*A2 isa LowerTriangular
+                        @test A1/A2 isa LowerTriangular
+                        @test A1\A2 isa LowerTriangular
+                    end
+                end
                 offsizeA = Matrix{Float64}(I, n+1, n+1)
                 @test_throws DimensionMismatch offsizeA / A2
                 @test_throws DimensionMismatch offsizeA / transpose(A2)
@@ -349,7 +376,8 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
                 @test_throws DimensionMismatch A2'  * offsizeA
                 @test_throws DimensionMismatch A2   * offsizeA
                 if (uplo1 == uplo2 && elty1 == elty2 != Int && t1 != UnitLowerTriangular && t1 != UnitUpperTriangular)
-                    @test rdiv!(copy(A1), copy(A2)) ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
+                    @test rdiv!(copy(A1), copy(A2))::t1 ≈ A1/A2 ≈ Matrix(A1)/Matrix(A2)
+                    @test ldiv!(copy(A2), copy(A1))::t1 ≈ A2\A1 ≈ Matrix(A2)\Matrix(A1)
                 end
                 if (uplo1 != uplo2 && elty1 == elty2 != Int && t2 != UnitLowerTriangular && t2 != UnitUpperTriangular)
                     @test lmul!(adjoint(copy(A1)), copy(A2)) ≈ A1'*A2 ≈ Matrix(A1)'*Matrix(A2)
@@ -449,7 +477,11 @@ for elty1 in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFlo
             @test_throws DimensionMismatch Ann'\bm
             @test_throws DimensionMismatch transpose(Ann)\bm
             if t1 == UpperTriangular || t1 == LowerTriangular
-                @test_throws LinearAlgebra.SingularException naivesub!(t1(zeros(elty1,n,n)),fill(eltyB(1),n))
+                if elty1 === eltyB <: BlasFloat
+                    @test_throws LAPACKException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
+                else
+                    @test_throws SingularException ldiv!(t1(zeros(elty1, n, n)), fill(eltyB(1), n))
+                end
             end
             @test B/A1 ≈ B/Matrix(A1)
             @test B/transpose(A1) ≈ B/transpose(Matrix(A1))
@@ -469,10 +501,75 @@ end
 # Matrix square root
 Atn = UpperTriangular([-1 1 2; 0 -2 2; 0 0 -3])
 Atp = UpperTriangular([1 1 2; 0 2 2; 0 0 3])
+Atu = UnitUpperTriangular([1 1 2; 0 1 2; 0 0 1])
 @test sqrt(Atn) |> t->t*t ≈ Atn
+@test sqrt(Atn) isa UpperTriangular
 @test typeof(sqrt(Atn)[1,1]) <: Complex
 @test sqrt(Atp) |> t->t*t ≈ Atp
+@test sqrt(Atp) isa UpperTriangular
 @test typeof(sqrt(Atp)[1,1]) <: Real
+@test typeof(sqrt(complex(Atp))[1,1]) <: Complex
+@test sqrt(Atu) |> t->t*t ≈ Atu
+@test sqrt(Atu) isa UnitUpperTriangular
+@test typeof(sqrt(Atu)[1,1]) <: Real
+@test typeof(sqrt(complex(Atu))[1,1]) <: Complex
+
+@testset "matrix square root quasi-triangular blockwise" begin
+    @testset for T in (Float32, Float64, ComplexF32, ComplexF64)
+        A = schur(rand(T, 100, 100)^2).T
+        @test LinearAlgebra.sqrt_quasitriu(A; blockwidth=16)^2 ≈ A
+    end
+    n = 256
+    A = rand(ComplexF64, n, n)
+    U = schur(A).T
+    Ubig = Complex{BigFloat}.(U)
+    @test LinearAlgebra.sqrt_quasitriu(U; blockwidth=64) ≈ LinearAlgebra.sqrt_quasitriu(Ubig; blockwidth=64)
+end
+
+@testset "sylvester quasi-triangular blockwise" begin
+    @testset for T in (Float32, Float64, ComplexF32, ComplexF64), m in (15, 40), n in (15, 45)
+        A = schur(rand(T, m, m)).T
+        B = schur(rand(T, n, n)).T
+        C = randn(T, m, n)
+        Ccopy = copy(C)
+        X = LinearAlgebra._sylvester_quasitriu!(A, B, C; blockwidth=16)
+        @test X === C
+        @test A * X + X * B ≈ -Ccopy
+
+        @testset "test raise=false does not break recursion" begin
+            Az = zero(A)
+            Bz = zero(B)
+            C2 = copy(Ccopy)
+            @test_throws LAPACKException LinearAlgebra._sylvester_quasitriu!(Az, Bz, C2; blockwidth=16)
+            m == n || @test any(C2 .== Ccopy)  # recursion broken
+            C3 = copy(Ccopy)
+            X3 = LinearAlgebra._sylvester_quasitriu!(Az, Bz, C3; blockwidth=16, raise=false)
+            @test !any(X3 .== Ccopy)  # recursion not broken
+        end
+    end
+end
+
+@testset "check matrix logarithm type-inferrable" for elty in (Float32,Float64,ComplexF32,ComplexF64)
+    A = UpperTriangular(exp(triu(randn(elty, n, n))))
+    @inferred Union{typeof(A),typeof(complex(A))} log(A)
+    @test exp(Matrix(log(A))) ≈ A
+    if elty <: Real
+        @test typeof(log(A)) <: UpperTriangular{elty}
+        @test typeof(log(complex(A))) <: UpperTriangular{complex(elty)}
+        @test isreal(log(complex(A)))
+        @test log(complex(A)) ≈ log(A)
+    end
+
+    Au = UnitUpperTriangular(exp(triu(randn(elty, n, n), 1)))
+    @inferred Union{typeof(A),typeof(complex(A))} log(Au)
+    @test exp(Matrix(log(Au))) ≈ Au
+    if elty <: Real
+        @test typeof(log(Au)) <: UpperTriangular{elty}
+        @test typeof(log(complex(Au))) <: UpperTriangular{complex(elty)}
+        @test isreal(log(complex(Au)))
+        @test log(complex(Au)) ≈ log(Au)
+    end
+end
 
 Areal   = randn(n, n)/2
 Aimg    = randn(n, n)/2
@@ -593,15 +690,63 @@ let A = UpperTriangular([Furlong(1) Furlong(4); Furlong(0) Furlong(1)])
     @test sqrt(A) == Furlong{1//2}.(UpperTriangular([1 2; 0 1]))
 end
 
-@testset "similar should preserve underlying storage type" begin
-    local m, n = 4, 3
-    sparsemat = sprand(m, m, 0.5)
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "AbstractArray constructor should preserve underlying storage type" begin
+    # tests corresponding to #34995
+    local m = 4
+    local T, S = Float32, Float64
+    immutablemat = ImmutableArray(randn(T,m,m))
     for TriType in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        trisparsemat = TriType(sparsemat)
-        @test isa(similar(trisparsemat), typeof(trisparsemat))
-        @test isa(similar(trisparsemat, Float32), TriType{Float32,<:SparseMatrixCSC{Float32}})
-        @test isa(similar(trisparsemat, (n, n)), typeof(sparsemat))
-        @test isa(similar(trisparsemat, Float32, (n, n)), SparseMatrixCSC{Float32})
+        trimat = TriType(immutablemat)
+        @test convert(AbstractArray{S}, trimat).data isa ImmutableArray{S}
+        @test convert(AbstractMatrix{S}, trimat).data isa ImmutableArray{S}
+        @test AbstractArray{S}(trimat).data isa ImmutableArray{S}
+        @test AbstractMatrix{S}(trimat).data isa ImmutableArray{S}
+        @test convert(AbstractArray{S}, trimat) == trimat
+        @test convert(AbstractMatrix{S}, trimat) == trimat
+    end
+end
+
+@testset "inplace mul of appropriate types should preserve triagular structure" begin
+    for elty1 in (Float64, ComplexF32), elty2 in (Float64, ComplexF32)
+        T = promote_type(elty1, elty2)
+        M1 = rand(elty1, 5, 5)
+        M2 = rand(elty2, 5, 5)
+        A = UpperTriangular(M1)
+        A2 = UpperTriangular(M2)
+        Au = UnitUpperTriangular(M1)
+        Au2 = UnitUpperTriangular(M2)
+        B = LowerTriangular(M1)
+        B2 = LowerTriangular(M2)
+        Bu = UnitLowerTriangular(M1)
+        Bu2 = UnitLowerTriangular(M2)
+
+        @test mul!(similar(A), A, A)::typeof(A) == A*A
+        @test mul!(similar(A, T), A, A2) ≈ A*A2
+        @test mul!(similar(A, T), A2, A) ≈ A2*A
+        @test mul!(typeof(similar(A, T))(A), A, A2, 2.0, 3.0) ≈ 2.0*A*A2 + 3.0*A
+        @test mul!(typeof(similar(A2, T))(A2), A2, A, 2.0, 3.0) ≈ 2.0*A2*A + 3.0*A2
+
+        @test mul!(similar(A), A, Au)::typeof(A) == A*Au
+        @test mul!(similar(A), Au, A)::typeof(A) == Au*A
+        @test mul!(similar(Au), Au, Au)::typeof(Au) == Au*Au
+        @test mul!(similar(A, T), A, Au2) ≈ A*Au2
+        @test mul!(similar(A, T), Au2, A) ≈ Au2*A
+        @test mul!(similar(Au2), Au2, Au2) == Au2*Au2
+
+        @test mul!(similar(B), B, B)::typeof(B) == B*B
+        @test mul!(similar(B, T), B, B2) ≈ B*B2
+        @test mul!(similar(B, T), B2, B) ≈ B2*B
+        @test mul!(typeof(similar(B, T))(B), B, B2, 2.0, 3.0) ≈ 2.0*B*B2 + 3.0*B
+        @test mul!(typeof(similar(B2, T))(B2), B2, B, 2.0, 3.0) ≈ 2.0*B2*B + 3.0*B2
+
+        @test mul!(similar(B), B, Bu)::typeof(B) == B*Bu
+        @test mul!(similar(B), Bu, B)::typeof(B) == Bu*B
+        @test mul!(similar(Bu), Bu, Bu)::typeof(Bu) == Bu*Bu
+        @test mul!(similar(B, T), B, Bu2) ≈ B*Bu2
+        @test mul!(similar(B, T), Bu2, B) ≈ Bu2*B
     end
 end
 
@@ -656,4 +801,39 @@ let A = [0.9999999999999998 4.649058915617843e-16 -1.3149405273715513e-16 9.9959
     B = [0.09648289218436859 0.023497875751503007 0.0 0.0; 0.023497875751503007 0.045787575150300804 0.0 0.0; 0.0 0.0 0.0 0.0; 0.0 0.0 0.0 0.0]
     @test sqrt(A*B*A')^2 ≈ A*B*A'
 end
+
+@testset "one and oneunit for triangular" begin
+    m = rand(4,4)
+    function test_one_oneunit_triangular(a)
+        b = Matrix(a)
+        @test (@inferred a^1) == b^1
+        @test (@inferred a^-1) == b^-1
+        @test one(a) == one(b)
+        @test one(a)*a == a
+        @test a*one(a) == a
+        @test oneunit(a) == oneunit(b)
+        @test oneunit(a) isa typeof(a)
+    end
+    for T in [UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular]
+        a = T(m)
+        test_one_oneunit_triangular(a)
+    end
+    # more complicated examples
+    b = UpperTriangular(LowerTriangular(m))
+    test_one_oneunit_triangular(b)
+    c = UpperTriangular(Diagonal(rand(2)))
+    test_one_oneunit_triangular(c)
+end
+
+@testset "LowerTriangular(Diagonal(...)) and friends (issue #28869)" begin
+    for elty in (Float32, Float64, BigFloat, ComplexF32, ComplexF64, Complex{BigFloat}, Int)
+        V = elty ≡ Int ? rand(1:10, 5) : elty.(randn(5))
+        D = Diagonal(V)
+        for dty in (UpperTriangular, LowerTriangular)
+            A = dty(D)
+            @test A * A' == D * D'
+        end
+    end
+end
+
 end # module TestTriangular
diff --git a/stdlib/LinearAlgebra/test/tridiag.jl b/stdlib/LinearAlgebra/test/tridiag.jl
index ed6fbebce4f4da..ecdf6b416baa54 100644
--- a/stdlib/LinearAlgebra/test/tridiag.jl
+++ b/stdlib/LinearAlgebra/test/tridiag.jl
@@ -2,7 +2,12 @@
 
 module TestTridiagonal
 
-using Test, LinearAlgebra, SparseArrays, Random
+using Test, LinearAlgebra, Random
+
+const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
+
+isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
+using .Main.Quaternions
 
 include("testutils.jl") # test_approx_eq_modphase
 
@@ -159,6 +164,19 @@ end
         @test !isdiag(Tridiagonal(dl,d,zerosdu))
         @test !isdiag(Tridiagonal(zerosdl,d,du))
         @test !isdiag(Tridiagonal(dl,d,du))
+
+        # Test methods that could fail due to dv and ev having the same length
+        # see #41089
+
+        badev = zero(d)
+        badev[end] = 1
+        S = SymTridiagonal(d, badev)
+
+        @test istriu(S, -2)
+        @test istriu(S, 0)
+        @test !istriu(S, 2)
+
+        @test isdiag(S)
     end
 
     @testset "iszero and isone" begin
@@ -185,6 +203,12 @@ end
         @test isone(Sone)
         @test !iszero(Smix)
         @test !isone(Smix)
+
+        badev = zeros(elty, 3)
+        badev[end] = 1
+
+        @test isone(SymTridiagonal(ones(elty, 3), badev))
+        @test iszero(SymTridiagonal(zeros(elty, 3), badev))
     end
 
     @testset for mat_type in (Tridiagonal, SymTridiagonal)
@@ -193,14 +217,12 @@ end
         @testset "similar, size, and copyto!" begin
             B = similar(A)
             @test size(B) == size(A)
-            if mat_type == Tridiagonal # doesn't work for SymTridiagonal yet
-                copyto!(B, A)
-                @test B == A
-            end
+            copyto!(B, A)
+            @test B == A
             @test isa(similar(A), mat_type{elty})
             @test isa(similar(A, Int), mat_type{Int})
-            @test isa(similar(A, (3, 2)), SparseMatrixCSC)
-            @test isa(similar(A, Int, (3, 2)), SparseMatrixCSC{Int})
+            @test isa(similar(A, (3, 2)), Matrix)
+            @test isa(similar(A, Int, (3, 2)), Matrix{Int})
             @test size(A, 3) == 1
             @test size(A, 1) == n
             @test size(A) == (n, n)
@@ -244,6 +266,12 @@ end
                 @test func(func(A)) == A
             end
         end
+        @testset "permutedims(::[Sym]Tridiagonal)" begin
+            @test permutedims(permutedims(A)) === A
+            @test permutedims(A) == transpose.(transpose(A))
+            @test permutedims(A, [1, 2]) === A
+            @test permutedims(A, (2, 1)) == permutedims(A)
+        end
         if elty != Int
             @testset "Simple unary functions" begin
                 for func in (det, inv)
@@ -378,8 +406,8 @@ end
                     @testset "similar" begin
                         @test isa(similar(Ts), SymTridiagonal{elty})
                         @test isa(similar(Ts, Int), SymTridiagonal{Int})
-                        @test isa(similar(Ts, (3, 2)), SparseMatrixCSC)
-                        @test isa(similar(Ts, Int, (3, 2)), SparseMatrixCSC{Int})
+                        @test isa(similar(Ts, (3, 2)), Matrix)
+                        @test isa(similar(Ts, Int, (3, 2)), Matrix{Int})
                     end
 
                     @test first(logabsdet(Tldlt)) ≈ first(logabsdet(Fs))
@@ -454,13 +482,6 @@ end
     @test SymTridiagonal(ones(0), ones(0)) * ones(0, 2) == ones(0, 2)
 end
 
-@testset "issue #29644" begin
-    F = lu(Tridiagonal(sparse(1.0I, 3, 3)))
-    @test F.L == Matrix(I, 3, 3)
-    @test startswith(sprint(show, MIME("text/plain"), F),
-          "LinearAlgebra.LU{Float64, LinearAlgebra.Tridiagonal{Float64, SparseArrays.SparseVector")
-end
-
 @testset "Issue 29630" begin
     function central_difference_discretization(N; dfunc = x -> 12x^2 - 2N^2,
                                                dufunc = x -> N^2 + 4N*x,
@@ -576,4 +597,132 @@ end
     @test_throws ArgumentError SymTridiagonal{Float32}(T)
 end
 
+# Issue #38765
+@testset "Eigendecomposition with different lengths" begin
+    # length(A.ev) can be either length(A.dv) or length(A.dv) - 1
+    A = SymTridiagonal(fill(1.0, 3), fill(-1.0, 3))
+    F = eigen(A)
+    A2 = SymTridiagonal(fill(1.0, 3), fill(-1.0, 2))
+    F2 = eigen(A2)
+    test_approx_eq_modphase(F.vectors, F2.vectors)
+    @test F.values ≈ F2.values ≈ eigvals(A) ≈ eigvals(A2)
+    @test eigvecs(A) ≈ eigvecs(A2)
+    @test eigvecs(A, eigvals(A)[1:1]) ≈ eigvecs(A2, eigvals(A2)[1:1])
+end
+
+@testset "non-commutative algebra (#39701)" begin
+    for A in (SymTridiagonal(Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4))),
+              Tridiagonal(Quaternion.(randn(4), randn(4), randn(4), randn(4)), Quaternion.(randn(5), randn(5), randn(5), randn(5)), Quaternion.(randn(4), randn(4), randn(4), randn(4))))
+        c = Quaternion(1,2,3,4)
+        @test A * c ≈ Matrix(A) * c
+        @test A / c ≈ Matrix(A) / c
+        @test c * A ≈ c * Matrix(A)
+        @test c \ A ≈ c \ Matrix(A)
+    end
+end
+
+@testset "adjoint of LDLt" begin
+    Sr = SymTridiagonal(randn(5), randn(4))
+    Sc = SymTridiagonal(complex.(randn(5)) .+ 1im, complex.(randn(4), randn(4)))
+    b = ones(size(Sr, 1))
+
+    F = ldlt(Sr)
+    @test F\b == F'\b
+
+    F = ldlt(Sc)
+    @test copy(Sc')\b == F'\b
+end
+
+@testset "symmetric and hermitian tridiagonals" begin
+    A = [im 0; 0 -im]
+    @test issymmetric(A)
+    @test !ishermitian(A)
+
+    # real
+    A = SymTridiagonal(randn(5), randn(4))
+    @test issymmetric(A)
+    @test ishermitian(A)
+
+    A = Tridiagonal(A.ev, A.dv, A.ev .+ 1)
+    @test !issymmetric(A)
+    @test !ishermitian(A)
+
+    # complex
+    # https://github.com/JuliaLang/julia/pull/41037#discussion_r645524081
+    S = SymTridiagonal(randn(5) .+ 0im, randn(5) .+ 0im)
+    S.ev[end] = im
+    @test issymmetric(S)
+    @test ishermitian(S)
+
+    S = SymTridiagonal(randn(5) .+ 1im, randn(4) .+ 1im)
+    @test issymmetric(S)
+    @test !ishermitian(S)
+
+    S = Tridiagonal(S.ev, S.dv, adjoint.(S.ev))
+    @test !issymmetric(S)
+    @test !ishermitian(S)
+
+    S = Tridiagonal(S.dl, real.(S.d) .+ 0im, S.du)
+    @test !issymmetric(S)
+    @test ishermitian(S)
+end
+
+isdefined(Main, :ImmutableArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "ImmutableArrays.jl"))
+using .Main.ImmutableArrays
+
+@testset "Conversion to AbstractArray" begin
+    # tests corresponding to #34995
+    v1 = ImmutableArray([1, 2])
+    v2 = ImmutableArray([3, 4, 5])
+    v3 = ImmutableArray([6, 7])
+    T = Tridiagonal(v1, v2, v3)
+    Tsym = SymTridiagonal(v2, v1)
+
+    @test convert(AbstractArray{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
+    @test convert(AbstractMatrix{Float64}, T)::Tridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == T
+    @test convert(AbstractArray{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
+    @test convert(AbstractMatrix{Float64}, Tsym)::SymTridiagonal{Float64,ImmutableArray{Float64,1,Array{Float64,1}}} == Tsym
+end
+
+@testset "dot(x,A,y) for A::Tridiagonal or SymTridiagonal" begin
+    for elty in (Float32, Float64, ComplexF32, ComplexF64, Int)
+        x = fill(convert(elty, 1), 0)
+        T = Tridiagonal(x, x, x)
+        Tsym = SymTridiagonal(x, x)
+        @test dot(x, T, x) == 0.0
+        @test dot(x, Tsym, x) == 0.0
+    end
+end
+
+isdefined(Main, :SizedArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "SizedArrays.jl"))
+using .Main.SizedArrays
+@testset "non-number eltype" begin
+    @testset "sum for SymTridiagonal" begin
+        dv = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
+        ev = [SizedArray{(2,2)}(rand(1:2048,2,2)) for i in 1:10]
+        S = SymTridiagonal(dv, ev)
+        Sdense = Matrix(S)
+        @test Sdense == collect(S)
+        @test sum(S) == sum(Sdense)
+        @test sum(S, dims = 1) == sum(Sdense, dims = 1)
+        @test sum(S, dims = 2) == sum(Sdense, dims = 2)
+    end
+    @testset "issymmetric/ishermitian for Tridiagonal" begin
+        @test !issymmetric(Tridiagonal([[1 2;3 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
+        @test !issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;3 4], [1 2;3 4]], [[1 2;3 4]]))
+        @test issymmetric(Tridiagonal([[1 3;2 4]], [[1 2;2 3], [1 2;2 3]], [[1 2;3 4]]))
+
+        @test ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
+        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+0im, [1 2;2 3].+0im], [[1 2;3 4].+im]))
+        @test !ishermitian(Tridiagonal([[1 3;2 4].+im], [[1 2;2 3].+im, [1 2;2 3].+0im], [[1 2;3 4].-im]))
+    end
+    @testset "== between Tridiagonal and SymTridiagonal" begin
+        dv = [SizedArray{(2,2)}([1 2;3 4]) for i in 1:4]
+        ev = [SizedArray{(2,2)}([3 4;1 2]) for i in 1:4]
+        S = SymTridiagonal(dv, ev)
+        Sdense = Matrix(S)
+        @test S == Tridiagonal(diag(Sdense, -1), diag(Sdense),  diag(Sdense, 1)) == S
+        @test S !== Tridiagonal(diag(Sdense, 1), diag(Sdense),  diag(Sdense, 1)) !== S
+    end
+end
 end # module TestTridiagonal
diff --git a/stdlib/LinearAlgebra/test/uniformscaling.jl b/stdlib/LinearAlgebra/test/uniformscaling.jl
index 726a7191484f3f..be1b9887d570f5 100644
--- a/stdlib/LinearAlgebra/test/uniformscaling.jl
+++ b/stdlib/LinearAlgebra/test/uniformscaling.jl
@@ -2,13 +2,15 @@
 
 module TestUniformscaling
 
-using Test, LinearAlgebra, Random, SparseArrays
+using Test, LinearAlgebra, Random
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
 using .Main.Quaternions
+isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
+using .Main.OffsetArrays
 
-Random.seed!(123)
+Random.seed!(1234543)
 
 @testset "basic functions" begin
     @test I === I' # transpose
@@ -20,8 +22,6 @@ Random.seed!(123)
     @test one(UniformScaling(rand(ComplexF64))) == one(UniformScaling{ComplexF64})
     @test eltype(one(UniformScaling(rand(ComplexF64)))) == ComplexF64
     @test -one(UniformScaling(2)) == UniformScaling(-1)
-    @test sparse(3I,4,5) == sparse(1:4, 1:4, 3, 4, 5)
-    @test sparse(3I,5,4) == sparse(1:4, 1:4, 3, 5, 4)
     @test opnorm(UniformScaling(1+im)) ≈ sqrt(2)
     @test convert(UniformScaling{Float64}, 2I) === 2.0I
 end
@@ -45,8 +45,19 @@ end
         (2:3, 1:2),
         (2:-1:1, 1:2),
         (1:2:9, 5:2:13),
+        (1, [1,2,5]),
+        (1, [1,10,5,2]),
+        (10, [10]),
+        ([1], 1),
+        ([15,1,5,2], 6),
+        ([2], [2]),
+        ([2,9,8,2,1], [2,8,4,3,1]),
+        ([8,3,5,3], 2:9),
     ]
         @test I[a,b] == J[a,b]
+        ndims(a) == 1 && @test I[OffsetArray(a,-10),b] == J[OffsetArray(a,-10),b]
+        ndims(b) == 1 && @test I[a,OffsetArray(b,-9)] == J[a,OffsetArray(b,-9)]
+        ndims(a) == ndims(b) == 1 && @test I[OffsetArray(a,-7),OffsetArray(b,-8)] == J[OffsetArray(a,-7),OffsetArray(b,-8)]
     end
 end
 
@@ -131,7 +142,7 @@ end
 end
 
 @testset "arithmetic with Number" begin
-    α = randn()
+    α = rand()
     @test α + I == α + 1
     @test I + α == α + 1
     @test α - I == α - 1
@@ -227,97 +238,91 @@ let
         @test B + I == B + Matrix(I, size(B))
         @test I + B == B + Matrix(I, size(B))
         AA = randn(2, 2)
-        for SS in (sprandn(3,3, 0.5), sparse(Int(1)I, 3, 3))
-            for (A, S) in ((AA, SS), (view(AA, 1:2, 1:2), view(SS, 1:3, 1:3)))
-                I22 = Matrix(I, size(A))
-                @test @inferred(A + I) == A + I22
-                @test @inferred(I + A) == A + I22
-                @test @inferred(I - I) === UniformScaling(0)
-                @test @inferred(B - I) == B - I22
-                @test @inferred(I - B) == I22 - B
-                @test @inferred(A - I) == A - I22
-                @test @inferred(I - A) == I22 - A
-                @test @inferred(I*J) === UniformScaling(λ)
-                @test @inferred(B*J) == B*λ
-                @test @inferred(J*B) == B*λ
-                @test @inferred(I*A) !== A # Don't alias
-                @test @inferred(I*S) !== S # Don't alias
-                @test @inferred(A*I) !== A # Don't alias
-                @test @inferred(S*I) !== S # Don't alias
-
-                @test @inferred(S*J) == S*λ
-                @test @inferred(J*S) == S*λ
-                @test @inferred(A*J) == A*λ
-                @test @inferred(J*A) == A*λ
-                @test @inferred(J*fill(1, 3)) == fill(λ, 3)
-                @test @inferred(λ*J) === UniformScaling(λ*J.λ)
-                @test @inferred(J*λ) === UniformScaling(λ*J.λ)
-                @test @inferred(J/I) === J
-                @test @inferred(I/A) == inv(A)
-                @test @inferred(A/I) == A
-                @test @inferred(I/λ) === UniformScaling(1/λ)
-                @test @inferred(I\J) === J
-
-                if isa(A, Array)
-                    T = LowerTriangular(randn(3,3))
-                else
-                    T = LowerTriangular(view(randn(3,3), 1:3, 1:3))
-                end
-                @test @inferred(T + J) == Array(T) + J
-                @test @inferred(J + T) == J + Array(T)
-                @test @inferred(T - J) == Array(T) - J
-                @test @inferred(J - T) == J - Array(T)
-                @test @inferred(T\I) == inv(T)
-
-                if isa(A, Array)
-                    T = LinearAlgebra.UnitLowerTriangular(randn(3,3))
-                else
-                    T = LinearAlgebra.UnitLowerTriangular(view(randn(3,3), 1:3, 1:3))
-                end
-                @test @inferred(T + J) == Array(T) + J
-                @test @inferred(J + T) == J + Array(T)
-                @test @inferred(T - J) == Array(T) - J
-                @test @inferred(J - T) == J - Array(T)
-                @test @inferred(T\I) == inv(T)
-
-                if isa(A, Array)
-                    T = UpperTriangular(randn(3,3))
-                else
-                    T = UpperTriangular(view(randn(3,3), 1:3, 1:3))
-                end
-                @test @inferred(T + J) == Array(T) + J
-                @test @inferred(J + T) == J + Array(T)
-                @test @inferred(T - J) == Array(T) - J
-                @test @inferred(J - T) == J - Array(T)
-                @test @inferred(T\I) == inv(T)
+        for A in (AA, view(AA, 1:2, 1:2))
+            I22 = Matrix(I, size(A))
+            @test @inferred(A + I) == A + I22
+            @test @inferred(I + A) == A + I22
+            @test @inferred(I - I) === UniformScaling(0)
+            @test @inferred(B - I) == B - I22
+            @test @inferred(I - B) == I22 - B
+            @test @inferred(A - I) == A - I22
+            @test @inferred(I - A) == I22 - A
+            @test @inferred(I*J) === UniformScaling(λ)
+            @test @inferred(B*J) == B*λ
+            @test @inferred(J*B) == B*λ
+            @test @inferred(I*A) !== A # Don't alias
+            @test @inferred(A*I) !== A # Don't alias
+
+            @test @inferred(A*J) == A*λ
+            @test @inferred(J*A) == A*λ
+            @test @inferred(J*fill(1, 3)) == fill(λ, 3)
+            @test @inferred(λ*J) === UniformScaling(λ*J.λ)
+            @test @inferred(J*λ) === UniformScaling(λ*J.λ)
+            @test @inferred(J/I) === J
+            @test @inferred(I/A) == inv(A)
+            @test @inferred(A/I) == A
+            @test @inferred(I/λ) === UniformScaling(1/λ)
+            @test @inferred(I\J) === J
+
+            if isa(A, Array)
+                T = LowerTriangular(randn(3,3))
+            else
+                T = LowerTriangular(view(randn(3,3), 1:3, 1:3))
+            end
+            @test @inferred(T + J) == Array(T) + J
+            @test @inferred(J + T) == J + Array(T)
+            @test @inferred(T - J) == Array(T) - J
+            @test @inferred(J - T) == J - Array(T)
+            @test @inferred(T\I) == inv(T)
+
+            if isa(A, Array)
+                T = LinearAlgebra.UnitLowerTriangular(randn(3,3))
+            else
+                T = LinearAlgebra.UnitLowerTriangular(view(randn(3,3), 1:3, 1:3))
+            end
+            @test @inferred(T + J) == Array(T) + J
+            @test @inferred(J + T) == J + Array(T)
+            @test @inferred(T - J) == Array(T) - J
+            @test @inferred(J - T) == J - Array(T)
+            @test @inferred(T\I) == inv(T)
+
+            if isa(A, Array)
+                T = UpperTriangular(randn(3,3))
+            else
+                T = UpperTriangular(view(randn(3,3), 1:3, 1:3))
+            end
+            @test @inferred(T + J) == Array(T) + J
+            @test @inferred(J + T) == J + Array(T)
+            @test @inferred(T - J) == Array(T) - J
+            @test @inferred(J - T) == J - Array(T)
+            @test @inferred(T\I) == inv(T)
+
+            if isa(A, Array)
+                T = LinearAlgebra.UnitUpperTriangular(randn(3,3))
+            else
+                T = LinearAlgebra.UnitUpperTriangular(view(randn(3,3), 1:3, 1:3))
+            end
+            @test @inferred(T + J) == Array(T) + J
+            @test @inferred(J + T) == J + Array(T)
+            @test @inferred(T - J) == Array(T) - J
+            @test @inferred(J - T) == J - Array(T)
+            @test @inferred(T\I) == inv(T)
 
+            for elty in (Float64, ComplexF64)
                 if isa(A, Array)
-                    T = LinearAlgebra.UnitUpperTriangular(randn(3,3))
+                    T = Hermitian(randn(elty, 3,3))
                 else
-                    T = LinearAlgebra.UnitUpperTriangular(view(randn(3,3), 1:3, 1:3))
+                    T = Hermitian(view(randn(elty, 3,3), 1:3, 1:3))
                 end
                 @test @inferred(T + J) == Array(T) + J
                 @test @inferred(J + T) == J + Array(T)
                 @test @inferred(T - J) == Array(T) - J
                 @test @inferred(J - T) == J - Array(T)
-                @test @inferred(T\I) == inv(T)
-
-                for elty in (Float64, ComplexF64)
-                    if isa(A, Array)
-                        T = Hermitian(randn(elty, 3,3))
-                    else
-                        T = Hermitian(view(randn(elty, 3,3), 1:3, 1:3))
-                    end
-                    @test @inferred(T + J) == Array(T) + J
-                    @test @inferred(J + T) == J + Array(T)
-                    @test @inferred(T - J) == Array(T) - J
-                    @test @inferred(J - T) == J - Array(T)
-                end
-
-                @test @inferred(I\A) == A
-                @test @inferred(A\I) == inv(A)
-                @test @inferred(λ\I) === UniformScaling(1/λ)
             end
+
+            @test @inferred(I\A) == A
+            @test @inferred(A\I) == inv(A)
+            @test @inferred(λ\I) === UniformScaling(1/λ)
         end
     end
 end
@@ -328,30 +333,41 @@ end
     @test_throws ArgumentError vcat(I)
     @test_throws ArgumentError [I; I]
     @test_throws ArgumentError [I I; I]
-    for T in (Matrix, SparseMatrixCSC)
-        A = T(rand(3,4))
-        B = T(rand(3,3))
-        C = T(rand(0,3))
-        D = T(rand(2,0))
-        @test (hcat(A, 2I))::T == hcat(A, Matrix(2I, 3, 3))
-        @test (vcat(A, 2I))::T == vcat(A, Matrix(2I, 4, 4))
-        @test (hcat(C, 2I))::T == C
-        @test (vcat(D, 2I))::T == D
-        @test (hcat(I, 3I, A, 2I))::T == hcat(Matrix(I, 3, 3), Matrix(3I, 3, 3), A, Matrix(2I, 3, 3))
-        @test (vcat(I, 3I, A, 2I))::T == vcat(Matrix(I, 4, 4), Matrix(3I, 4, 4), A, Matrix(2I, 4, 4))
-        @test (hvcat((2,1,2), B, 2I, I, 3I, 4I))::T ==
-            hvcat((2,1,2), B, Matrix(2I, 3, 3), Matrix(I, 6, 6), Matrix(3I, 3, 3), Matrix(4I, 3, 3))
-        @test hvcat((3,1), C, C, I, 3I)::T == hvcat((2,1), C, C, Matrix(3I, 6,6))
-        @test hvcat((2,2,2), I, 2I, 3I, 4I, C, C)::T ==
-            hvcat((2,2,2), Matrix(I, 3, 3), Matrix(2I, 3,3 ), Matrix(3I, 3,3), Matrix(4I, 3,3), C, C)
-        @test hvcat((2,2,4), C, C, I, 2I, 3I, 4I, 5I, D)::T ==
-            hvcat((2,2,4), C, C, Matrix(I, 3, 3), Matrix(2I,3,3),
-                Matrix(3I, 2, 2), Matrix(4I, 2, 2), Matrix(5I,2,2), D)
-        @test (hvcat((2,3,2), B, 2I, C, C, I, 3I, 4I))::T ==
-            hvcat((2,2,2), B, Matrix(2I, 3, 3), C, C, Matrix(3I, 3, 3), Matrix(4I, 3, 3))
-        @test hvcat((3,2,1), C, C, I, B ,3I, 2I)::T ==
-            hvcat((2,2,1), C, C, B, Matrix(3I,3,3), Matrix(2I,6,6))
-    end
+
+    A = rand(3,4)
+    B = rand(3,3)
+    C = rand(0,3)
+    D = rand(2,0)
+    E = rand(1,3)
+    F = rand(3,1)
+    α = rand()
+    @test (hcat(A, 2I))::Matrix == hcat(A, Matrix(2I, 3, 3))
+    @test (hcat(E, α))::Matrix == hcat(E, [α])
+    @test (hcat(E, α, 2I))::Matrix == hcat(E, [α], fill(2, 1, 1))
+    @test (vcat(A, 2I))::Matrix == vcat(A, Matrix(2I, 4, 4))
+    @test (vcat(F, α))::Matrix == vcat(F, [α])
+    @test (vcat(F, α, 2I))::Matrix == vcat(F, [α], fill(2, 1, 1))
+    @test (hcat(C, 2I))::Matrix == C
+    @test_throws DimensionMismatch hcat(C, α)
+    @test (vcat(D, 2I))::Matrix == D
+    @test_throws DimensionMismatch vcat(D, α)
+    @test (hcat(I, 3I, A, 2I))::Matrix == hcat(Matrix(I, 3, 3), Matrix(3I, 3, 3), A, Matrix(2I, 3, 3))
+    @test (vcat(I, 3I, A, 2I))::Matrix == vcat(Matrix(I, 4, 4), Matrix(3I, 4, 4), A, Matrix(2I, 4, 4))
+    @test (hvcat((2,1,2), B, 2I, I, 3I, 4I))::Matrix ==
+        hvcat((2,1,2), B, Matrix(2I, 3, 3), Matrix(I, 6, 6), Matrix(3I, 3, 3), Matrix(4I, 3, 3))
+    @test hvcat((3,1), C, C, I, 3I)::Matrix == hvcat((2,1), C, C, Matrix(3I, 6,6))
+    @test hvcat((2,2,2), I, 2I, 3I, 4I, C, C)::Matrix ==
+        hvcat((2,2,2), Matrix(I, 3, 3), Matrix(2I, 3,3 ), Matrix(3I, 3,3), Matrix(4I, 3,3), C, C)
+    @test hvcat((2,2,4), C, C, I, 2I, 3I, 4I, 5I, D)::Matrix ==
+        hvcat((2,2,4), C, C, Matrix(I, 3, 3), Matrix(2I,3,3),
+            Matrix(3I, 2, 2), Matrix(4I, 2, 2), Matrix(5I,2,2), D)
+    @test (hvcat((2,3,2), B, 2I, C, C, I, 3I, 4I))::Matrix ==
+        hvcat((2,2,2), B, Matrix(2I, 3, 3), C, C, Matrix(3I, 3, 3), Matrix(4I, 3, 3))
+    @test hvcat((3,2,1), C, C, I, B ,3I, 2I)::Matrix ==
+        hvcat((2,2,1), C, C, B, Matrix(3I,3,3), Matrix(2I,6,6))
+    @test (hvcat((1,2), A, E, α))::Matrix == hvcat((1,2), A, E, [α]) == hvcat((1,2), A, E, α*I)
+    @test (hvcat((2,2), α, E, F, 3I))::Matrix == hvcat((2,2), [α], E, F, Matrix(3I, 3, 3))
+    @test (hvcat((2,2), 3I, F, E, α))::Matrix == hvcat((2,2), Matrix(3I, 3, 3), F, E, [α])
 end
 
 @testset "Matrix/Array construction from UniformScaling" begin
@@ -400,6 +416,13 @@ end
     @test 0denseI != 2I != 0denseI # test generic path / inequality on diag
     @test alltwos != 2I != alltwos # test generic path / inequality off diag
     @test rdenseI !=  I != rdenseI # test square matrix check
+
+    # isequal
+    @test !isequal(I, I(3))
+    @test !isequal(I(1), I)
+    @test !isequal([1], I)
+    @test isequal(I, 1I)
+    @test !isequal(2I, 3I)
 end
 
 @testset "operations involving I should preserve eltype" begin
@@ -445,6 +468,17 @@ end
     target = J * A * alpha + C * beta
     @test mul!(copy(C), J, A, alpha, beta) ≈ target
     @test mul!(copy(C), A, J, alpha, beta) ≈ target
+
+    a = randn()
+    C = randn(3, 3)
+    target_5mul = a*alpha*J + beta*C
+    @test mul!(copy(C), a, J, alpha, beta) ≈ target_5mul
+    @test mul!(copy(C), J, a, alpha, beta) ≈ target_5mul
+    target_5mul = beta*C # alpha = 0
+    @test mul!(copy(C), a, J, 0, beta) ≈ target_5mul
+    target_5mul = a*alpha*Matrix(J, 3, 3) # beta = 0
+    @test mul!(copy(C), a, J, alpha, 0) ≈ target_5mul
+
 end
 
 @testset "Construct Diagonal from UniformScaling" begin
@@ -453,6 +487,20 @@ end
     @test I(3) == [1 0 0; 0 1 0; 0 0 1]
 end
 
+@testset "dot" begin
+    A = randn(3, 3)
+    λ = randn()
+    J = UniformScaling(λ)
+    @test dot(A, J) ≈ dot(J, A)
+    @test dot(A, J) ≈ tr(A' * J)
+
+    A = rand(ComplexF64, 3, 3)
+    λ = randn() + im * randn()
+    J = UniformScaling(λ)
+    @test dot(A, J) ≈ conj(dot(J, A))
+    @test dot(A, J) ≈ tr(A' * J)
+end
+
 @testset "generalized dot" begin
     x = rand(-10:10, 3)
     y = rand(-10:10, 3)
@@ -466,7 +514,7 @@ end
 
 @testset "Factorization solutions" begin
     J = complex(randn(),randn()) * I
-    qrp = A -> qr(A, Val(true))
+    qrp = A -> qr(A, ColumnNorm())
 
     # thin matrices
     X = randn(3,2)
@@ -497,4 +545,20 @@ end
     end
 end
 
+@testset "offset arrays" begin
+    A = OffsetArray(zeros(4,4), -1:2, 0:3)
+    @test sum(I + A) ≈ 3.0
+    @test sum(A + I) ≈ 3.0
+    @test sum(I - A) ≈ 3.0
+    @test sum(A - I) ≈ -3.0
+end
+
+@testset "type promotion when dividing UniformScaling by matrix" begin
+    A = randn(5,5)
+    cA = complex(A)
+    J = (5+2im)*I
+    @test J/A ≈ J/cA
+    @test A\J ≈ cA\J
+end
+
 end # module TestUniformscaling
diff --git a/stdlib/Logging/docs/src/index.md b/stdlib/Logging/docs/src/index.md
index e1d60223615ef9..9a269ee54571bf 100644
--- a/stdlib/Logging/docs/src/index.md
+++ b/stdlib/Logging/docs/src/index.md
@@ -1,4 +1,4 @@
-# Logging
+# [Logging](@id man-logging)
 
 The [`Logging`](@ref Logging.Logging) module provides a way to record the history and progress of a
 computation as a log of events.  Events are created by inserting a logging
@@ -59,16 +59,16 @@ automatically extracted. Let's examine the user-defined data first:
   filtering. There are several standard levels of type [`LogLevel`](@ref);
   user-defined levels are also possible.
   Each is distinct in purpose:
-  - `Debug` is information intended for the developer of the program.
-  These events are disabled by default.
-  - `Info` is for general information to the user.
-  Think of it as an alternative to using `println` directly.
-  - `Warn` means something is wrong and action is likely required
-  but that for now the program is still working.
-  - `Error` means something is wrong and it is unlikely to be recovered,
-  at least by this part of the code.
-  Often this log-level is unneeded as throwing an exception can convey
-  all the required information.
+  - [`Logging.Debug`](@ref) (log level -1000) is information intended for the developer of
+    the program. These events are disabled by default.
+  - [`Logging.Info`](@ref) (log level 0) is for general information to the user.
+    Think of it as an alternative to using `println` directly.
+  - [`Logging.Warn`](@ref) (log level 1000) means something is wrong and action is likely
+    required but that for now the program is still working.
+  - [`Logging.Error`](@ref) (log level 2000) means something is wrong and it is unlikely to
+    be recovered, at least by this part of the code.
+    Often this log-level is unneeded as throwing an exception can convey
+    all the required information.
 
 * The *message*  is an object describing the event. By convention
   `AbstractString`s passed as messages are assumed to be in markdown format.
@@ -182,8 +182,8 @@ pattern match against the log event stream.
 
 Message filtering can be influenced through the `JULIA_DEBUG` environment
 variable, and serves as an easy way to enable debug logging for a file or
-module. For example, loading julia with `JULIA_DEBUG=loading` will activate
-`@debug` log messages in `loading.jl`:
+module. Loading julia with `JULIA_DEBUG=loading` will activate
+`@debug` log messages in `loading.jl`. For example, in Linux shells:
 
 ```
 $ JULIA_DEBUG=loading julia -e 'using OhMyREPL'
@@ -195,6 +195,9 @@ $ JULIA_DEBUG=loading julia -e 'using OhMyREPL'
 ...
 ```
 
+On windows, the same can be achieved in `CMD` via first running `set JULIA_DEBUG="loading"` and in `Powershell` via
+`$env:JULIA_DEBUG="loading"`.
+
 Similarly, the environment variable can be used to enable debug logging of
 modules, such as `Pkg`, or module roots (see [`Base.moduleroot`](@ref)). To
 enable all debug logging, use the special value `all`.
@@ -217,7 +220,12 @@ julia> foo()
 
 ```
 
-## Writing log events to a file
+Use a comma separator to enable debug for multiple
+modules: `JULIA_DEBUG=loading,Main`.
+
+## Examples
+
+### Example: Writing log events to a file
 
 Sometimes it can be useful to write log events to a file. Here is an example
 of how to use a task-local and global logger to write information to a text
@@ -254,6 +262,25 @@ julia> @info("a global log message")
 julia> close(io)
 ```
 
+### Example: Enable debug-level messages
+
+Here is an example of creating a [`ConsoleLogger`](@ref) that lets through any messages
+with log level higher than, or equal, to [`Logging.Debug`](@ref).
+
+```julia-repl
+julia> using Logging
+
+# Create a ConsoleLogger that prints any log messages with level >= Debug to stderr
+julia> debuglogger = ConsoleLogger(stderr, Logging.Debug)
+
+# Enable debuglogger for a task
+julia> with_logger(debuglogger) do
+           @debug "a context specific log message"
+       end
+
+# Set the global logger
+julia> global_logger(debuglogger)
+```
 
 ## Reference
 
@@ -267,6 +294,10 @@ Logging.Logging
 ```@docs
 Logging.@logmsg
 Logging.LogLevel
+Logging.Debug
+Logging.Info
+Logging.Warn
+Logging.Error
 ```
 
 ### [Processing events with AbstractLogger](@id AbstractLogger-interface)
diff --git a/stdlib/Logging/src/ConsoleLogger.jl b/stdlib/Logging/src/ConsoleLogger.jl
index 4a73f10932dc76..86e3d587eb4520 100644
--- a/stdlib/Logging/src/ConsoleLogger.jl
+++ b/stdlib/Logging/src/ConsoleLogger.jl
@@ -1,7 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 """
-    ConsoleLogger(stream=stderr, min_level=Info; meta_formatter=default_metafmt,
+    ConsoleLogger([stream,] min_level=Info; meta_formatter=default_metafmt,
                   show_limited=true, right_justify=0)
 
 Logger with formatting optimized for readability in a text console, for example
@@ -30,12 +30,19 @@ struct ConsoleLogger <: AbstractLogger
     right_justify::Int
     message_limits::Dict{Any,Int}
 end
-function ConsoleLogger(stream::IO=stderr, min_level=Info;
+function ConsoleLogger(stream::IO, min_level=Info;
                        meta_formatter=default_metafmt, show_limited=true,
                        right_justify=0)
     ConsoleLogger(stream, min_level, meta_formatter,
                   show_limited, right_justify, Dict{Any,Int}())
 end
+function ConsoleLogger(min_level=Info;
+                       meta_formatter=default_metafmt, show_limited=true,
+                       right_justify=0)
+    ConsoleLogger(closed_stream, min_level, meta_formatter,
+                  show_limited, right_justify, Dict{Any,Int}())
+end
+
 
 shouldlog(logger::ConsoleLogger, level, _module, group, id) =
     get(logger.message_limits, id, 1) > 0
@@ -50,22 +57,23 @@ function showvalue(io, e::Tuple{Exception,Any})
 end
 showvalue(io, ex::Exception) = showerror(io, ex)
 
-function default_logcolor(level)
+function default_logcolor(level::LogLevel)
     level < Info  ? Base.debug_color() :
     level < Warn  ? Base.info_color()  :
     level < Error ? Base.warn_color()  :
                     Base.error_color()
 end
 
-function default_metafmt(level, _module, group, id, file, line)
+function default_metafmt(level::LogLevel, _module, group, id, file, line)
+    @nospecialize
     color = default_logcolor(level)
-    prefix = (level == Warn ? "Warning" : string(level))*':'
-    suffix = ""
+    prefix = string(level == Warn ? "Warning" : string(level), ':')
+    suffix::String = ""
     Info <= level < Warn && return color, prefix, suffix
-    _module !== nothing && (suffix *= "$(_module)")
+    _module !== nothing && (suffix *= string(_module)::String)
     if file !== nothing
         _module !== nothing && (suffix *= " ")
-        suffix *= Base.contractuser(file)
+        suffix *= Base.contractuser(file)::String
         if line !== nothing
             suffix *= ":$(isa(line, UnitRange) ? "$(first(line))-$(last(line))" : line)"
         end
@@ -95,52 +103,61 @@ function termlength(str)
     return N
 end
 
-function handle_message(logger::ConsoleLogger, level, message, _module, group, id,
-                        filepath, line; maxlog=nothing, kwargs...)
-    if maxlog !== nothing && maxlog isa Integer
-        remaining = get!(logger.message_limits, id, maxlog)
+function handle_message(logger::ConsoleLogger, level::LogLevel, message, _module, group, id,
+                        filepath, line; kwargs...)
+    @nospecialize
+    hasmaxlog = haskey(kwargs, :maxlog) ? 1 : 0
+    maxlog = get(kwargs, :maxlog, nothing)
+    if maxlog isa Core.BuiltinInts
+        remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
         logger.message_limits[id] = remaining - 1
         remaining > 0 || return
     end
 
     # Generate a text representation of the message and all key value pairs,
     # split into lines.
-    msglines = [(indent=0,msg=l) for l in split(chomp(string(message)), '\n')]
-    dsize = displaysize(logger.stream)::Tuple{Int,Int}
-    if !isempty(kwargs)
+    msglines = [(indent=0, msg=l) for l in split(chomp(convert(String, string(message))::String), '\n')]
+    stream = logger.stream
+    if !isopen(stream)
+        stream = stderr
+    end
+    dsize = displaysize(stream)::Tuple{Int,Int}
+    nkwargs = length(kwargs)::Int
+    if nkwargs > hasmaxlog
         valbuf = IOBuffer()
-        rows_per_value = max(1, dsize[1]÷(length(kwargs)+1))
-        valio = IOContext(IOContext(valbuf, logger.stream),
-                          :displaysize => (rows_per_value,dsize[2]-5),
+        rows_per_value = max(1, dsize[1] ÷ (nkwargs + 1 - hasmaxlog))
+        valio = IOContext(IOContext(valbuf, stream),
+                          :displaysize => (rows_per_value, dsize[2] - 5),
                           :limit => logger.show_limited)
-        for (key,val) in pairs(kwargs)
+        for (key, val) in kwargs
+            key === :maxlog && continue
             showvalue(valio, val)
             vallines = split(String(take!(valbuf)), '\n')
             if length(vallines) == 1
-                push!(msglines, (indent=2,msg=SubString("$key = $(vallines[1])")))
+                push!(msglines, (indent=2, msg=SubString("$key = $(vallines[1])")))
             else
-                push!(msglines, (indent=2,msg=SubString("$key =")))
-                append!(msglines, ((indent=3,msg=line) for line in vallines))
+                push!(msglines, (indent=2, msg=SubString("$key =")))
+                append!(msglines, ((indent=3, msg=line) for line in vallines))
             end
         end
     end
 
     # Format lines as text with appropriate indentation and with a box
     # decoration on the left.
-    color,prefix,suffix = logger.meta_formatter(level, _module, group, id, filepath, line)::Tuple{Union{Symbol,Int},String,String}
+    color, prefix, suffix = logger.meta_formatter(level, _module, group, id, filepath, line)::Tuple{Union{Symbol,Int},String,String}
     minsuffixpad = 2
     buf = IOBuffer()
-    iob = IOContext(buf, logger.stream)
+    iob = IOContext(buf, stream)
     nonpadwidth = 2 + (isempty(prefix) || length(msglines) > 1 ? 0 : length(prefix)+1) +
                   msglines[end].indent + termlength(msglines[end].msg) +
                   (isempty(suffix) ? 0 : length(suffix)+minsuffixpad)
     justify_width = min(logger.right_justify, dsize[2])
     if nonpadwidth > justify_width && !isempty(suffix)
-        push!(msglines, (indent=0,msg=SubString("")))
+        push!(msglines, (indent=0, msg=SubString("")))
         minsuffixpad = 0
         nonpadwidth = 2 + length(suffix)
     end
-    for (i,(indent,msg)) in enumerate(msglines)
+    for (i, (indent, msg)) in enumerate(msglines)
         boxstr = length(msglines) == 1 ? "[ " :
                  i == 1                ? "┌ " :
                  i < length(msglines)  ? "│ " :
@@ -158,6 +175,6 @@ function handle_message(logger::ConsoleLogger, level, message, _module, group, i
         println(iob)
     end
 
-    write(logger.stream, take!(buf))
+    write(stream, take!(buf))
     nothing
 end
diff --git a/stdlib/Logging/src/Logging.jl b/stdlib/Logging/src/Logging.jl
index b44b8ae67473c2..0743c650326cc9 100644
--- a/stdlib/Logging/src/Logging.jl
+++ b/stdlib/Logging/src/Logging.jl
@@ -12,7 +12,7 @@ module Logging
 # Doing it this way (rather than with import) makes these symbols accessible to
 # tab completion.
 for sym in [
-    :LogLevel, :BelowMinLevel, :Debug, :Info, :Warn, :Error, :AboveMaxLevel,
+    :LogLevel, :BelowMinLevel, :AboveMaxLevel,
     :AbstractLogger,
     :NullLogger,
     :handle_message, :shouldlog, :min_enabled_level, :catch_exceptions,
@@ -29,6 +29,35 @@ for sym in [
     @eval const $sym = Base.CoreLogging.$sym
 end
 
+# LogLevel aliases (re-)documented here (JuliaLang/julia#40978)
+"""
+    Debug
+
+Alias for [`LogLevel(-1000)`](@ref LogLevel).
+"""
+const Debug = Base.CoreLogging.Debug
+"""
+    Info
+
+Alias for [`LogLevel(0)`](@ref LogLevel).
+"""
+const Info = Base.CoreLogging.Info
+"""
+    Warn
+
+Alias for [`LogLevel(1000)`](@ref LogLevel).
+"""
+const Warn = Base.CoreLogging.Warn
+"""
+    Error
+
+Alias for [`LogLevel(2000)`](@ref LogLevel).
+"""
+const Error = Base.CoreLogging.Error
+
+using Base.CoreLogging:
+    closed_stream
+
 export
     AbstractLogger,
     LogLevel,
@@ -43,7 +72,13 @@ export
     global_logger,
     disable_logging,
     SimpleLogger,
-    ConsoleLogger
+    ConsoleLogger,
+    BelowMinLevel,
+    Debug,
+    Info,
+    Warn,
+    Error,
+    AboveMaxLevel
 
 include("ConsoleLogger.jl")
 
@@ -56,7 +91,7 @@ include("ConsoleLogger.jl")
 #  handle_message, shouldlog, min_enabled_level, catch_exceptions,
 
 function __init__()
-    global_logger(ConsoleLogger(stderr))
+    global_logger(ConsoleLogger())
 end
 
 end
diff --git a/stdlib/Logging/test/runtests.jl b/stdlib/Logging/test/runtests.jl
index 5c442de32c896e..b6b48139645368 100644
--- a/stdlib/Logging/test/runtests.jl
+++ b/stdlib/Logging/test/runtests.jl
@@ -193,6 +193,9 @@ end
     └ SUFFIX
     """
 
+    # Execute backtrace once before checking formatting, see #3885
+    backtrace()
+
     # Attaching backtraces
     bt = func1()
     @test startswith(genmsg("msg", exception=(DivideError(),bt)),
@@ -256,4 +259,20 @@ end
 
 end
 
+@testset "exported names" begin
+    m = Module(:ExportedLoggingNames)
+    include_string(m, """
+        using Logging
+        function run()
+            BelowMinLevel === Logging.BelowMinLevel &&
+            Debug === Logging.Debug &&
+            Info === Logging.Info &&
+            Warn === Logging.Warn &&
+            Error === Logging.Error &&
+            AboveMaxLevel === Logging.AboveMaxLevel
+        end
+        """)
+    @test m.run()
+end
+
 end
diff --git a/stdlib/MPFR_jll/Project.toml b/stdlib/MPFR_jll/Project.toml
new file mode 100644
index 00000000000000..22aa30d20511b9
--- /dev/null
+++ b/stdlib/MPFR_jll/Project.toml
@@ -0,0 +1,17 @@
+name = "MPFR_jll"
+uuid = "3a97d323-0669-5f0c-9066-3539efd106a3"
+version = "4.1.1+1"
+
+[deps]
+GMP_jll = "781609d7-10c4-51f6-84f2-b8444358ff6d"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/MPFR_jll/src/MPFR_jll.jl b/stdlib/MPFR_jll/src/MPFR_jll.jl
new file mode 100644
index 00000000000000..5b2dbd1e84b247
--- /dev/null
+++ b/stdlib/MPFR_jll/src/MPFR_jll.jl
@@ -0,0 +1,45 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/MPFR_jll.jl
+baremodule MPFR_jll
+using Base, Libdl, GMP_jll
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libmpfr
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libmpfr_handle = C_NULL
+libmpfr_path = ""
+
+if Sys.iswindows()
+    const libmpfr = "libmpfr-6.dll"
+elseif Sys.isapple()
+    const libmpfr = "@rpath/libmpfr.6.dylib"
+else
+    const libmpfr = "libmpfr.so.6"
+end
+
+function __init__()
+    global libmpfr_handle = dlopen(libmpfr)
+    global libmpfr_path = dlpath(libmpfr_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libmpfr_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libmpfr_path() = libmpfr_path
+
+end  # module MPFR_jll
diff --git a/stdlib/MPFR_jll/test/runtests.jl b/stdlib/MPFR_jll/test/runtests.jl
new file mode 100644
index 00000000000000..68bb6d3ec40e42
--- /dev/null
+++ b/stdlib/MPFR_jll/test/runtests.jl
@@ -0,0 +1,8 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, MPFR_jll
+
+@testset "MPFR_jll" begin
+    vn = VersionNumber(unsafe_string(ccall((:mpfr_get_version,libmpfr), Cstring, ())))
+    @test vn == v"4.1.0"
+end
diff --git a/stdlib/Makefile b/stdlib/Makefile
index e4e1b524ac93a0..9c18fa261b985a 100644
--- a/stdlib/Makefile
+++ b/stdlib/Makefile
@@ -6,6 +6,7 @@ SRCCACHE := $(abspath $(SRCDIR)/srccache)
 BUILDDIR := .
 
 include $(JULIAHOME)/Make.inc
+include $(JULIAHOME)/deps/Versions.make
 include $(JULIAHOME)/deps/tools/common.mk
 include $(JULIAHOME)/deps/tools/stdlib-external.mk
 
@@ -14,21 +15,36 @@ VERSDIR := v$(shell cut -d. -f1-2 < $(JULIAHOME)/VERSION)
 $(build_datarootdir)/julia/stdlib/$(VERSDIR):
 	mkdir -p $@
 
+JLLS = DSFMT GMP CURL LIBGIT2 LLVM LIBSSH2 LIBUV MBEDTLS MPFR NGHTTP2 \
+       BLASTRAMPOLINE OPENBLAS OPENLIBM P7ZIP PCRE LIBSUITESPARSE ZLIB \
+       LLVMUNWIND CSL UNWIND
+
+# Initialize this with JLLs that aren't in deps/Versions.make
+JLL_NAMES := MozillaCACerts_jll
+get-MozillaCACerts_jll:
+install-MozillaCACerts_jll:
+
+# Define rule to download `StdlibArtifacts.toml` files for each JLL we bundle.
+define download-artifacts-toml
+JLL_NAMES += $$($(1)_JLL_NAME)_jll
+$(1)_STDLIB_PATH := $$(JULIAHOME)/stdlib/$$($(1)_JLL_NAME)_jll
+$(1)_JLL_VER ?= $$(shell [ -f $$($(1)_STDLIB_PATH)/Project.toml ] && grep "^version" $$($(1)_STDLIB_PATH)/Project.toml | sed -E 's/version[[:space:]]*=[[:space:]]*"?([^"]+)"?/\1/')
+
+$$($(1)_STDLIB_PATH)/StdlibArtifacts.toml:
+	$(JLDOWNLOAD) $$@ https://github.com/JuliaBinaryWrappers/$$($(1)_JLL_NAME)_jll.jl/raw/$$($(1)_JLL_NAME)-v$$($(1)_JLL_VER)/Artifacts.toml
+get-$$($(1)_JLL_NAME)_jll: $$($(1)_STDLIB_PATH)/StdlibArtifacts.toml
+install-$$($(1)_JLL_NAME)_jll: get-$$($(1)_JLL_NAME)_jll
+endef
+$(foreach jll,$(JLLS),$(eval $(call download-artifacts-toml,$(jll))))
+
+
 STDLIBS = Artifacts Base64 CRC32c Dates DelimitedFiles Distributed FileWatching \
-          Future InteractiveUtils Libdl LibGit2 LinearAlgebra Logging \
+          Future InteractiveUtils LazyArtifacts Libdl LibGit2 LinearAlgebra Logging \
           Markdown Mmap Printf Profile Random REPL Serialization SHA \
           SharedArrays Sockets SparseArrays SuiteSparse Test TOML Unicode UUIDs \
-          MozillaCACerts_jll LibCURL_jll
-
-STDLIBS_EXT = Pkg Statistics LibCURL Downloads
-PKG_GIT_URL := git://github.com/JuliaLang/Pkg.jl.git
-PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
-STATISTICS_GIT_URL := git://github.com/JuliaLang/Statistics.jl.git
-STATISTICS_TAR_URL = https://api.github.com/repos/JuliaLang/Statistics.jl/tarball/$1
-LIBCURL_GIT_URL := git://github.com/JuliaWeb/LibCURL.jl.git
-LIBCURL_TAR_URL = https://api.github.com/repos/JuliaWeb/LibCURL.jl/tarball/$1
-DOWNLOADS_GIT_URL := git://github.com/JuliaLang/Downloads.jl.git
-DOWNLOADS_TAR_URL = https://api.github.com/repos/JuliaLang/Downloads.jl/tarball/$1
+          $(JLL_NAMES)
+
+STDLIBS_EXT = Pkg Statistics LibCURL Downloads ArgTools Tar NetworkOptions SuiteSparse SparseArrays SHA
 
 $(foreach module, $(STDLIBS_EXT), $(eval $(call stdlib-external,$(module),$(shell echo $(module) | tr a-z A-Z))))
 
@@ -37,10 +53,17 @@ $(foreach module, $(STDLIBS), $(eval $(call symlink_target,$$(JULIAHOME)/stdlib/
 
 STDLIBS_LINK_TARGETS := $(addprefix $(build_datarootdir)/julia/stdlib/$(VERSDIR)/,$(STDLIBS))
 
-getall get: $(addprefix get-, $(STDLIBS_EXT))
-install: $(addprefix install-, $(STDLIBS_EXT)) $(STDLIBS_LINK_TARGETS)
-clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS)
+getall get: $(addprefix get-, $(STDLIBS_EXT) $(JLL_NAMES))
+install: version-check $(addprefix install-, $(STDLIBS_EXT) $(JLL_NAMES)) $(STDLIBS_LINK_TARGETS)
+version-check: $(addprefix version-check-, $(STDLIBS_EXT))
+uninstall: $(addprefix uninstall-, $(STDLIBS_EXT))
+extstdlibclean:
+	for module in $(STDLIBS_EXT) ; do \
+		rm -rf $(JULIAHOME)/stdlib/$${module}-*; \
+	done
+clean: $(addprefix clean-, $(STDLIBS_EXT)) $(CLEAN_TARGETS) extstdlibclean
 distclean: $(addprefix distclean-, $(STDLIBS_EXT)) clean
+checksumall: $(addprefix checksum-, $(STDLIBS_EXT))
 
-DEP_LIBS_STAGED := $(STDLIBS_EXT)
+DEP_LIBS_STAGED_ALL := $(STDLIBS_EXT)
 include $(JULIAHOME)/deps/tools/uninstallers.mk
diff --git a/stdlib/Markdown/src/Common/Common.jl b/stdlib/Markdown/src/Common/Common.jl
index 0891765b277ba1..3036f2b4b730b1 100644
--- a/stdlib/Markdown/src/Common/Common.jl
+++ b/stdlib/Markdown/src/Common/Common.jl
@@ -8,4 +8,3 @@ include("inline.jl")
 
                 linebreak, escapes, inline_code,
                 asterisk_bold, underscore_bold, asterisk_italic, underscore_italic, image, footnote_link, link, autolink]
-
diff --git a/stdlib/Markdown/src/Common/inline.jl b/stdlib/Markdown/src/Common/inline.jl
index d2855f27a7addd..fd5134481e113b 100644
--- a/stdlib/Markdown/src/Common/inline.jl
+++ b/stdlib/Markdown/src/Common/inline.jl
@@ -146,13 +146,10 @@ function _is_link(s::AbstractString)
 end
 
 # non-normative regex from the HTML5 spec
-const _email_regex = r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
+const _email_regex = r"^mailto\:[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
 
 function _is_mailto(s::AbstractString)
-    length(s) < 6 && return false
-    # slicing strings is a bit risky, but this equality check is safe
-    lowercase(s[1:6]) == "mailto:" || return false
-    return occursin(_email_regex, s[6:end])
+    return occursin(_email_regex, s)
 end
 
 # –––––––––––
diff --git a/stdlib/Markdown/src/GitHub/GitHub.jl b/stdlib/Markdown/src/GitHub/GitHub.jl
index 493e01b0852586..61807d267511d6 100644
--- a/stdlib/Markdown/src/GitHub/GitHub.jl
+++ b/stdlib/Markdown/src/GitHub/GitHub.jl
@@ -62,4 +62,3 @@ end
 
                 linebreak, escapes, en_dash, inline_code, asterisk_bold,
                 underscore_bold, asterisk_italic, underscore_italic, image, footnote_link, link, autolink]
-
diff --git a/stdlib/Markdown/src/GitHub/table.jl b/stdlib/Markdown/src/GitHub/table.jl
index 08a375cca2fb8e..29f956e9a07107 100644
--- a/stdlib/Markdown/src/GitHub/table.jl
+++ b/stdlib/Markdown/src/GitHub/table.jl
@@ -45,7 +45,6 @@ function github_table(stream::IO, md::MD)
         align = nothing
         while (row = parserow(stream)) !== nothing
             if length(rows) == 0
-                isempty(row[1]) && return false
                 cols = length(row)
             end
             if align === nothing && length(rows) == 1 # Must have a --- row
diff --git a/stdlib/Markdown/src/Julia/Julia.jl b/stdlib/Markdown/src/Julia/Julia.jl
index 7ee049970277a2..3797c5a8a0f791 100644
--- a/stdlib/Markdown/src/Julia/Julia.jl
+++ b/stdlib/Markdown/src/Julia/Julia.jl
@@ -12,4 +12,3 @@ include("interp.jl")
 
                linebreak, escapes, tex, interp, en_dash, inline_code,
                asterisk_bold, underscore_bold, asterisk_italic, underscore_italic, image, footnote_link, link, autolink]
-
diff --git a/stdlib/Markdown/src/parse/parse.jl b/stdlib/Markdown/src/parse/parse.jl
index 9ecd7bdd5307dc..452d90d1176e10 100644
--- a/stdlib/Markdown/src/parse/parse.jl
+++ b/stdlib/Markdown/src/parse/parse.jl
@@ -2,7 +2,7 @@
 
 mutable struct MD
     content::Vector{Any}
-    meta::Dict{Any, Any}
+    meta::Dict{Symbol, Any}
 
     MD(content::AbstractVector, meta::Dict = Dict()) =
         new(content, meta)
diff --git a/stdlib/Markdown/src/render/latex.jl b/stdlib/Markdown/src/render/latex.jl
index ee546be555fa45..d18a2e760ef3df 100644
--- a/stdlib/Markdown/src/render/latex.jl
+++ b/stdlib/Markdown/src/render/latex.jl
@@ -33,8 +33,8 @@ function latex(io::IO, header::Header{l}) where l
 end
 
 function latex(io::IO, code::Code)
+    occursin("\\end{verbatim}", code.code) && error("Cannot include \"\\end{verbatim}\" in a latex code block")
     wrapblock(io, "verbatim") do
-        # TODO latex escape
         println(io, code.code)
     end
 end
diff --git a/stdlib/Markdown/src/render/terminal/formatting.jl b/stdlib/Markdown/src/render/terminal/formatting.jl
index 8c1fbc393d45bf..87022124b9c8a8 100644
--- a/stdlib/Markdown/src/render/terminal/formatting.jl
+++ b/stdlib/Markdown/src/render/terminal/formatting.jl
@@ -9,33 +9,30 @@ end
 words(s) = split(s, " ")
 lines(s) = split(s, "\n")
 
-function _wrapped_lines(s::AbstractString, width, i)
+function wrapped_line(io::IO, s::AbstractString, width, i)
     ws = words(s)
     lines = String[]
     for word in ws
         word_length = ansi_length(word)
-        if i + word_length + 1 > width
+        word_length == 0 && continue
+        if isempty(lines) || i + word_length + 1 > width
             i = word_length
             push!(lines, word)
         else
             i += word_length + 1
-            if isempty(lines)
-                push!(lines, word)
-            else
-                lines[end] *= " " * word   # this could be more efficient
-            end
+            lines[end] *= " " * word   # this could be more efficient
         end
     end
     return i, lines
 end
 
 function wrapped_lines(io::IO, s::AbstractString; width = 80, i = 0)
-    lines = String[]
-    for ss in split(s, "\n")
-        i, line = _wrapped_lines(ss, width, i)
-        append!(lines, line)
+    ls = String[]
+    for ss in lines(s)
+        i, line = wrapped_line(io, ss, width, i)
+        append!(ls, line)
     end
-    return lines
+    return ls
 end
 
 wrapped_lines(io::IO, f::Function, args...; width = 80, i = 0) =
@@ -43,6 +40,7 @@ wrapped_lines(io::IO, f::Function, args...; width = 80, i = 0) =
 
 function print_wrapped(io::IO, s...; width = 80, pre = "", i = 0)
     lines = wrapped_lines(io, s..., width = width, i = i)
+    isempty(lines) && return 0, 0
     print(io, lines[1])
     for line in lines[2:end]
         print(io, '\n', pre, line)
diff --git a/stdlib/Markdown/src/render/terminal/render.jl b/stdlib/Markdown/src/render/terminal/render.jl
index 434b5c3aa5f8d9..3fd274aee2a2e4 100644
--- a/stdlib/Markdown/src/render/terminal/render.jl
+++ b/stdlib/Markdown/src/render/terminal/render.jl
@@ -70,7 +70,7 @@ end
 function term(io::IO, md::List, columns)
     for (i, point) in enumerate(md.items)
         print(io, ' '^2margin, isordered(md) ? "$(i + md.ordered - 1). " : "•  ")
-        print_wrapped(io, width = columns-(4margin+2), pre = ' '^(2margin+2),
+        print_wrapped(io, width = columns-(4margin+2), pre = ' '^(2margin+3),
                           i = 2margin+2) do io
             term(io, point, columns - 10)
         end
diff --git a/stdlib/Markdown/test/runtests.jl b/stdlib/Markdown/test/runtests.jl
index ebc3ba98ef813f..dfe80430a00d6e 100644
--- a/stdlib/Markdown/test/runtests.jl
+++ b/stdlib/Markdown/test/runtests.jl
@@ -272,6 +272,42 @@ end
     | L |
 """) == "  │ Tables in admonitions\n  │\n  │  R\n  │  –\n  │  L"
 
+# Issue #38275
+function test_list_wrap(str, lenmin, lenmax)
+    strs = split(str, '\n')
+    l = length.(strs)
+    for i = 1:length(l)-1
+        if l[i] != 0 && l[i+1] != 0    # the next line isn't blank, so this line should be "full"
+            lenmin <= l[i] <= lenmax || return false
+        else
+            l[i] <= lenmax || return false   # this line isn't too long (but there is no min)
+        end
+    end
+    # Check consistent indentation
+    rngs = findfirst.((". ",), strs)
+    k = last(rngs[1])
+    rex = Regex('^' * " "^k * "\\w")
+    for (i, rng) in enumerate(rngs)
+        isa(rng, AbstractRange) && last(rng) == k && continue  # every numbered line starts the text at the same position
+        rng === nothing && (isempty(strs[i]) || match(rex, strs[i]) !== nothing) && continue  # every unnumbered line is indented to text in numbered lines
+        return false
+    end
+    return true
+end
+
+let doc =
+    md"""
+    1. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
+    2. a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij a bc def ghij
+    """
+    str = sprint(term, doc, 50)
+    @test test_list_wrap(str, 40, 50)
+    str = sprint(term, doc, 60)
+    @test test_list_wrap(str, 50, 60)
+    str = sprint(term, doc, 80)
+    @test test_list_wrap(str, 70, 80)
+end
+
 # HTML output
 @test md"foo *bar* baz" |> html == "<p>foo <em>bar</em> baz</p>\n"
 @test md"something ***" |> html == "<p>something ***</p>\n"
@@ -340,7 +376,7 @@ table = md"""
 # mime output
 let out =
     @test sprint(show, "text/plain", book) ==
-        "  Title\n  ≡≡≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  ===================\n\n  Some bolded\n\n    •    list1\n\n    •    list2"
+        "  Title\n  ≡≡≡≡≡≡≡\n\n  Some discussion\n\n  │  A quote\n\n  Section important\n  ===================\n\n  Some bolded\n\n    •  list1\n\n    •  list2"
     @test sprint(show, "text/markdown", book) ==
         """
         # Title
@@ -493,6 +529,12 @@ foo()
                                                           ["hgh",Bold("jhj"),"ge"],
                                                           "f"]],
                                                   [:l, :r, :r]))
+@test md"""
+    |   | b |
+    |:--|--:|
+    | 1 |   |""" == MD(Table(Any[[Any[],"b"],
+                                 ["1",Any[]]], [:l, :r]))
+
 @test md"""
 no|table
 no error
@@ -1172,10 +1214,49 @@ end
         """)
 end
 
+@testset "issue 40080: empty list item breaks display()" begin
+    d = TextDisplay(devnull)
+    display(d, md"""
+               1. hello
+               2.
+               """)
+end
+
 @testset "issue #37232: linebreaks" begin
     s = @md_str """
        Misc:\\
        - line\\
        """
-    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line\n  "
+    @test sprint(show, MIME("text/plain"), s) == "  Misc:\n  - line"
+end
+
+@testset "pullrequest #41552: a code block has \\end{verbatim}" begin
+    s1 = md"""
+         ```tex
+         \begin{document}
+         \end{document}
+         ```
+         """
+    s2 = md"""
+         ```tex
+         \begin{verbatim}
+         \end{verbatim}
+         ```
+         """
+    @test Markdown.latex(s1) == """
+                                \\begin{verbatim}
+                                \\begin{document}
+                                \\end{document}
+                                \\end{verbatim}
+                                """
+    @test_throws ErrorException Markdown.latex(s2)
+end
+
+@testset "issue #42139: autolink" begin
+    # ok
+    @test md"<mailto:foo@bar.com>" |> html == """<p><a href="mailto:foo@bar.com">mailto:foo@bar.com</a></p>\n"""
+    # not ok
+    @test md"<mailto foo@bar.com>" |> html == """<p>&lt;mailto foo@bar.com&gt;</p>\n"""
+    # see issue #42139
+    @test md"<一轮红日初升>" |> html == """<p>&lt;一轮红日初升&gt;</p>\n"""
 end
diff --git a/stdlib/MbedTLS_jll/Project.toml b/stdlib/MbedTLS_jll/Project.toml
new file mode 100644
index 00000000000000..00a6b29426d913
--- /dev/null
+++ b/stdlib/MbedTLS_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "MbedTLS_jll"
+uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
+version = "2.28.0+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.8"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
new file mode 100644
index 00000000000000..338bec9503c073
--- /dev/null
+++ b/stdlib/MbedTLS_jll/src/MbedTLS_jll.jl
@@ -0,0 +1,62 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/MbedTLS_jll.jl
+
+baremodule MbedTLS_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libmbedcrypto, libmbedtls, libmbedx509
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libmbedcrypto_handle = C_NULL
+libmbedcrypto_path = ""
+libmbedtls_handle = C_NULL
+libmbedtls_path = ""
+libmbedx509_handle = C_NULL
+libmbedx509_path = ""
+
+if Sys.iswindows()
+    const libmbedcrypto = "libmbedcrypto.dll"
+    const libmbedtls = "libmbedtls.dll"
+    const libmbedx509 = "libmbedx509.dll"
+elseif Sys.isapple()
+    const libmbedcrypto = "@rpath/libmbedcrypto.7.dylib"
+    const libmbedtls = "@rpath/libmbedtls.14.dylib"
+    const libmbedx509 = "@rpath/libmbedx509.1.dylib"
+else
+    const libmbedcrypto = "libmbedcrypto.so.7"
+    const libmbedtls = "libmbedtls.so.14"
+    const libmbedx509 = "libmbedx509.so.1"
+end
+
+function __init__()
+    global libmbedcrypto_handle = dlopen(libmbedcrypto)
+    global libmbedcrypto_path = dlpath(libmbedcrypto_handle)
+    global libmbedtls_handle = dlopen(libmbedtls)
+    global libmbedtls_path = dlpath(libmbedtls_handle)
+    global libmbedx509_handle = dlopen(libmbedx509)
+    global libmbedx509_path = dlpath(libmbedx509_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libmbedtls_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libmbedcrypto_path() =libmbedcrypto_path
+get_libmbedtls_path() = libmbedtls_path
+get_libmbedx509_path() = libmbedx509_path
+
+end  # module MbedTLS_jll
diff --git a/stdlib/MbedTLS_jll/test/runtests.jl b/stdlib/MbedTLS_jll/test/runtests.jl
new file mode 100644
index 00000000000000..b731d7f833043d
--- /dev/null
+++ b/stdlib/MbedTLS_jll/test/runtests.jl
@@ -0,0 +1,10 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, MbedTLS_jll
+
+@testset "MbedTLS_jll" begin
+    vstr = zeros(UInt8, 32)
+    ccall((:mbedtls_version_get_string, libmbedcrypto), Cvoid, (Ref{UInt8},), vstr)
+    vn = VersionNumber(unsafe_string(pointer(vstr)))
+    @test vn == v"2.28.0"
+end
diff --git a/stdlib/Mmap/src/Mmap.jl b/stdlib/Mmap/src/Mmap.jl
index 69d48b36568073..629f53e8371edd 100644
--- a/stdlib/Mmap/src/Mmap.jl
+++ b/stdlib/Mmap/src/Mmap.jl
@@ -7,11 +7,13 @@ module Mmap
 
 import Base: OS_HANDLE, INVALID_OS_HANDLE
 
+export mmap
+
 const PAGESIZE = Int(Sys.isunix() ? ccall(:jl_getpagesize, Clong, ()) : ccall(:jl_getallocationgranularity, Clong, ()))
 
 # for mmaps not backed by files
 mutable struct Anonymous <: IO
-    name::AbstractString
+    name::String
     readonly::Bool
     create::Bool
 end
@@ -20,7 +22,7 @@ end
     Mmap.Anonymous(name::AbstractString="", readonly::Bool=false, create::Bool=true)
 
 Create an `IO`-like object for creating zeroed-out mmapped-memory that is not tied to a file
-for use in [`Mmap.mmap`](@ref Mmap.mmap). Used by `SharedArray` for creating shared memory arrays.
+for use in [`mmap`](@ref mmap). Used by `SharedArray` for creating shared memory arrays.
 
 # Examples
 ```jldoctest
@@ -123,8 +125,8 @@ end # os-test
 # core implementation of mmap
 
 """
-    Mmap.mmap(io::Union{IOStream,AbstractString,Mmap.AnonymousMmap}[, type::Type{Array{T,N}}, dims, offset]; grow::Bool=true, shared::Bool=true)
-    Mmap.mmap(type::Type{Array{T,N}}, dims)
+    mmap(io::Union{IOStream,AbstractString,Mmap.AnonymousMmap}[, type::Type{Array{T,N}}, dims, offset]; grow::Bool=true, shared::Bool=true)
+    mmap(type::Type{Array{T,N}}, dims)
 
 Create an `Array` whose values are linked to a file, using memory-mapping. This provides a
 convenient way of working with data too large to fit in the computer's memory.
@@ -172,7 +174,7 @@ close(s)
 s = open("/tmp/mmap.bin")   # default is read-only
 m = read(s, Int)
 n = read(s, Int)
-A2 = Mmap.mmap(s, Matrix{Int}, (m,n))
+A2 = mmap(s, Matrix{Int}, (m,n))
 ```
 
 creates a `m`-by-`n` `Matrix{Int}`, linked to the file associated with stream `s`.
@@ -189,7 +191,11 @@ function mmap(io::IO,
     isopen(io) || throw(ArgumentError("$io must be open to mmap"))
     isbitstype(T)  || throw(ArgumentError("unable to mmap $T; must satisfy isbitstype(T) == true"))
 
-    len = prod(dims) * sizeof(T)
+    len = sizeof(T)
+    for l in dims
+        len, overflow = Base.Checked.mul_with_overflow(promote(len, l)...)
+        overflow && throw(ArgumentError("requested size prod($((sizeof(T), dims...))) too large, would overflow typeof(size(T)) == $(typeof(len))"))
+    end
     len >= 0 || throw(ArgumentError("requested size must be ≥ 0, got $len"))
     len == 0 && return Array{T}(undef, ntuple(x->0,Val(N)))
     len < typemax(Int) - PAGESIZE || throw(ArgumentError("requested size must be < $(typemax(Int)-PAGESIZE), got $len"))
@@ -254,10 +260,10 @@ mmap(::Type{T}, dims::NTuple{N,Integer}; shared::Bool=true) where {T<:Array,N} =
 mmap(::Type{T}, i::Integer...; shared::Bool=true) where {T<:Array} = mmap(Anonymous(), T, convert(Tuple{Vararg{Int}},i), Int64(0); shared=shared)
 
 """
-    Mmap.mmap(io, BitArray, [dims, offset])
+    mmap(io, BitArray, [dims, offset])
 
 Create a [`BitArray`](@ref) whose values are linked to a file, using memory-mapping; it has the same
-purpose, works in the same way, and has the same arguments, as [`mmap`](@ref Mmap.mmap), but
+purpose, works in the same way, and has the same arguments, as [`mmap`](@ref mmap), but
 the byte representation is different.
 
 # Examples
@@ -266,7 +272,7 @@ julia> using Mmap
 
 julia> io = open("mmap.bin", "w+");
 
-julia> B = Mmap.mmap(io, BitArray, (25,30000));
+julia> B = mmap(io, BitArray, (25,30000));
 
 julia> B[3, 4000] = true;
 
@@ -276,7 +282,7 @@ julia> close(io);
 
 julia> io = open("mmap.bin", "r+");
 
-julia> C = Mmap.mmap(io, BitArray, (25,30000));
+julia> C = mmap(io, BitArray, (25,30000));
 
 julia> C[3, 4000]
 true
@@ -335,10 +341,10 @@ const MS_SYNC = 4
 Forces synchronization between the in-memory version of a memory-mapped `Array` or
 [`BitArray`](@ref) and the on-disk version.
 """
-function sync!(m::Array{T}, flags::Integer=MS_SYNC) where T
+function sync!(m::Array, flags::Integer=MS_SYNC)
     offset = rem(UInt(pointer(m)), PAGESIZE)
     ptr = pointer(m) - offset
-    mmaplen = length(m) * sizeof(T) + offset
+    mmaplen = sizeof(m) + offset
     GC.@preserve m @static if Sys.isunix()
         systemerror("msync",
                     ccall(:msync, Cint, (Ptr{Cvoid}, Csize_t, Cint), ptr, mmaplen, flags) != 0)
@@ -349,4 +355,64 @@ function sync!(m::Array{T}, flags::Integer=MS_SYNC) where T
 end
 sync!(B::BitArray, flags::Integer=MS_SYNC) = sync!(B.chunks, flags)
 
+@static if Sys.isunix()
+const MADV_NORMAL = 0
+const MADV_RANDOM = 1
+const MADV_SEQUENTIAL = 2
+const MADV_WILLNEED = 3
+const MADV_DONTNEED = 4
+if Sys.islinux()
+    const MADV_FREE = 8
+    const MADV_REMOVE = 9
+    const MADV_DONTFORK = 10
+    const MADV_DOFORK = 11
+    const MADV_MERGEABLE = 12
+    const MADV_UNMERGEABLE = 13
+    const MADV_HUGEPAGE = 14
+    const MADV_NOHUGEPAGE = 15
+    const MADV_DONTDUMP = 16
+    const MADV_DODUMP = 17
+    const MADV_WIPEONFORK = 18
+    const MADV_KEEPONFORK = 19
+    const MADV_COLD = 20
+    const MADV_PAGEOUT = 21
+    const MADV_HWPOISON = 100
+    const MADV_SOFT_OFFLINE = 101
+elseif Sys.isapple()
+    const MADV_FREE = 5
+elseif Sys.isfreebsd() || Sys.isdragonfly()
+    const MADV_FREE = 5
+    const MADV_NOSYNC = 6
+    const MADV_AUTOSYNC = 7
+    const MADV_NOCORE = 8
+    const MADV_CORE = 9
+    if Sys.isfreebsd()
+        const MADV_PROTECT = 10
+    else
+        const MADV_INVAL = 10
+        const MADV_SETMAP = 11
+    end
+elseif Sys.isopenbsd() || Sys.isnetbsd()
+    const MADV_SPACEAVAIL = 5
+    const MADV_FREE = 6
+end
+
+"""
+    Mmap.madvise!(array, flag::Integer = Mmap.MADV_NORMAL)
+
+Advises the kernel on the intended usage of the memory-mapped `array`, with the intent
+`flag` being one of the available `MADV_*` constants.
+"""
+function madvise!(m::Array, flag::Integer=MADV_NORMAL)
+    offset = rem(UInt(pointer(m)), PAGESIZE)
+    ptr = pointer(m) - offset
+    mmaplen = sizeof(m) + offset
+    GC.@preserve m begin
+        systemerror("madvise",
+                    ccall(:madvise, Cint, (Ptr{Cvoid}, Csize_t, Cint), ptr, mmaplen, flag) != 0)
+    end
+end
+madvise!(B::BitArray, flag::Integer=MADV_NORMAL) = madvise!(B.chunks, flag)
+end # Sys.isunix()
+
 end # module
diff --git a/stdlib/Mmap/test/runtests.jl b/stdlib/Mmap/test/runtests.jl
index 46b08992dc6996..0b3cb0b9f1a426 100644
--- a/stdlib/Mmap/test/runtests.jl
+++ b/stdlib/Mmap/test/runtests.jl
@@ -5,53 +5,53 @@ using Test, Mmap, Random
 file = tempname()
 write(file, "Hello World\n")
 t = b"Hello World"
-@test Mmap.mmap(file, Array{UInt8,3}, (11,1,1)) == reshape(t,(11,1,1))
+@test mmap(file, Array{UInt8,3}, (11,1,1)) == reshape(t,(11,1,1))
 GC.gc(); GC.gc()
-@test Mmap.mmap(file, Array{UInt8,3}, (1,11,1)) == reshape(t,(1,11,1))
+@test mmap(file, Array{UInt8,3}, (1,11,1)) == reshape(t,(1,11,1))
 GC.gc(); GC.gc()
-@test Mmap.mmap(file, Array{UInt8,3}, (1,1,11)) == reshape(t,(1,1,11))
+@test mmap(file, Array{UInt8,3}, (1,1,11)) == reshape(t,(1,1,11))
 GC.gc(); GC.gc()
-@test Mmap.mmap(file, Array{UInt8,3}, (11,0,1)) == Array{UInt8}(undef, (0,0,0))
-@test Mmap.mmap(file, Vector{UInt8}, (11,)) == t
+@test mmap(file, Array{UInt8,3}, (11,0,1)) == Array{UInt8}(undef, (0,0,0))
+@test mmap(file, Vector{UInt8}, (11,)) == t
 GC.gc(); GC.gc()
-@test Mmap.mmap(file, Array{UInt8,2}, (1,11)) == t'
+@test mmap(file, Array{UInt8,2}, (1,11)) == t'
 GC.gc(); GC.gc()
-@test Mmap.mmap(file, Array{UInt8,2}, (0,12)) == Array{UInt8}(undef, (0,0))
-m = Mmap.mmap(file, Array{UInt8,3}, (1,2,1))
+@test mmap(file, Array{UInt8,2}, (0,12)) == Array{UInt8}(undef, (0,0))
+m = mmap(file, Array{UInt8,3}, (1,2,1))
 @test m == reshape(b"He",(1,2,1))
 finalize(m); m=nothing; GC.gc()
 
 # constructors
-@test length(@inferred Mmap.mmap(file)) == 12
-@test length(@inferred Mmap.mmap(file, Vector{Int8})) == 12
-@test length(@inferred Mmap.mmap(file, Matrix{Int8}, (12,1))) == 12
-@test length(@inferred Mmap.mmap(file, Matrix{Int8}, (12,1), 0)) == 12
-@test length(@inferred Mmap.mmap(file, Matrix{Int8}, (12,1), 0; grow=false)) == 12
-@test length(@inferred Mmap.mmap(file, Matrix{Int8}, (12,1), 0; shared=false)) == 12
-@test length(@inferred Mmap.mmap(file, Vector{Int8}, 12)) == 12
-@test length(@inferred Mmap.mmap(file, Vector{Int8}, 12, 0)) == 12
-@test length(@inferred Mmap.mmap(file, Vector{Int8}, 12, 0; grow=false)) == 12
-@test length(@inferred Mmap.mmap(file, Vector{Int8}, 12, 0; shared=false)) == 12
+@test length(@inferred mmap(file)) == 12
+@test length(@inferred mmap(file, Vector{Int8})) == 12
+@test length(@inferred mmap(file, Matrix{Int8}, (12,1))) == 12
+@test length(@inferred mmap(file, Matrix{Int8}, (12,1), 0)) == 12
+@test length(@inferred mmap(file, Matrix{Int8}, (12,1), 0; grow=false)) == 12
+@test length(@inferred mmap(file, Matrix{Int8}, (12,1), 0; shared=false)) == 12
+@test length(@inferred mmap(file, Vector{Int8}, 12)) == 12
+@test length(@inferred mmap(file, Vector{Int8}, 12, 0)) == 12
+@test length(@inferred mmap(file, Vector{Int8}, 12, 0; grow=false)) == 12
+@test length(@inferred mmap(file, Vector{Int8}, 12, 0; shared=false)) == 12
 s = open(file)
-@test length(@inferred Mmap.mmap(s)) == 12
-@test length(@inferred Mmap.mmap(s, Vector{Int8})) == 12
-@test length(@inferred Mmap.mmap(s, Matrix{Int8}, (12,1))) == 12
-@test length(@inferred Mmap.mmap(s, Matrix{Int8}, (12,1), 0)) == 12
-@test length(@inferred Mmap.mmap(s, Matrix{Int8}, (12,1), 0; grow=false)) == 12
-@test length(@inferred Mmap.mmap(s, Matrix{Int8}, (12,1), 0; shared=false)) == 12
-@test length(@inferred Mmap.mmap(s, Vector{Int8}, 12)) == 12
-@test length(@inferred Mmap.mmap(s, Vector{Int8}, 12, 0)) == 12
-@test length(@inferred Mmap.mmap(s, Vector{Int8}, 12, 0; grow=false)) == 12
-@test length(@inferred Mmap.mmap(s, Vector{Int8}, 12, 0; shared=false)) == 12
+@test length(@inferred mmap(s)) == 12
+@test length(@inferred mmap(s, Vector{Int8})) == 12
+@test length(@inferred mmap(s, Matrix{Int8}, (12,1))) == 12
+@test length(@inferred mmap(s, Matrix{Int8}, (12,1), 0)) == 12
+@test length(@inferred mmap(s, Matrix{Int8}, (12,1), 0; grow=false)) == 12
+@test length(@inferred mmap(s, Matrix{Int8}, (12,1), 0; shared=false)) == 12
+@test length(@inferred mmap(s, Vector{Int8}, 12)) == 12
+@test length(@inferred mmap(s, Vector{Int8}, 12, 0)) == 12
+@test length(@inferred mmap(s, Vector{Int8}, 12, 0; grow=false)) == 12
+@test length(@inferred mmap(s, Vector{Int8}, 12, 0; shared=false)) == 12
 close(s)
-@test_throws ErrorException Mmap.mmap(file, Vector{Ref}) # must be bit-type
+@test_throws ErrorException mmap(file, Vector{Ref}) # must be bit-type
 GC.gc(); GC.gc()
 
 s = open(f->f,file,"w")
-@test Mmap.mmap(file) == Vector{UInt8}() # requested len=0 on empty file
-@test Mmap.mmap(file,Vector{UInt8},0) == Vector{UInt8}()
+@test mmap(file) == Vector{UInt8}() # requested len=0 on empty file
+@test mmap(file,Vector{UInt8},0) == Vector{UInt8}()
 s = open(file, "r+")
-m = Mmap.mmap(s,Vector{UInt8},12)
+m = mmap(s,Vector{UInt8},12)
 m[:] = b"Hello World\n"
 Mmap.sync!(m)
 close(s); finalize(m); m=nothing; GC.gc()
@@ -59,55 +59,60 @@ close(s); finalize(m); m=nothing; GC.gc()
 
 s = open(file, "r")
 close(s)
-@test_throws Base.IOError Mmap.mmap(s) # closed IOStream
-@test_throws ArgumentError Mmap.mmap(s,Vector{UInt8},12,0) # closed IOStream
-@test_throws SystemError Mmap.mmap("")
+@test_throws Base.IOError mmap(s) # closed IOStream
+@test_throws ArgumentError mmap(s,Vector{UInt8},12,0) # closed IOStream
+@test_throws SystemError mmap("")
 
 # negative length
-@test_throws ArgumentError Mmap.mmap(file, Vector{UInt8}, -1)
+@test_throws ArgumentError mmap(file, Vector{UInt8}, -1)
 # negative offset
-@test_throws ArgumentError Mmap.mmap(file, Vector{UInt8}, 1, -1)
+@test_throws ArgumentError mmap(file, Vector{UInt8}, 1, -1)
 
 for i = 0x01:0x0c
-    @test length(Mmap.mmap(file, Vector{UInt8}, i)) == Int(i)
+    @test length(mmap(file, Vector{UInt8}, i)) == Int(i)
 end
 GC.gc(); GC.gc()
 
 sz = filesize(file)
 s = open(file, "r+")
-m = Mmap.mmap(s, Vector{UInt8}, sz+1)
+m = mmap(s, Vector{UInt8}, sz+1)
 @test length(m) == sz+1 # test growing
 @test m[end] == 0x00
 close(s); finalize(m); m=nothing; GC.gc()
 sz = filesize(file)
 s = open(file, "r+")
-m = Mmap.mmap(s, Vector{UInt8}, 1, sz)
+m = mmap(s, Vector{UInt8}, 1, sz)
 @test length(m) == 1
 @test m[1] == 0x00
 close(s); finalize(m); m=nothing; GC.gc()
 sz = filesize(file)
 # test where offset is actually > than size of file; file is grown with zeroed bytes
 s = open(file, "r+")
-m = Mmap.mmap(s, Vector{UInt8}, 1, sz+1)
+m = mmap(s, Vector{UInt8}, 1, sz+1)
 @test length(m) == 1
 @test m[1] == 0x00
 close(s); finalize(m); m=nothing; GC.gc()
 
-s = open(file, "r")
-m = Mmap.mmap(s)
-@test_throws ReadOnlyMemoryError m[5] = UInt8('x') # tries to setindex! on read-only array
-finalize(m); m=nothing; GC.gc()
+# See https://github.com/JuliaLang/julia/issues/32155
+# On PPC we receive `SEGV_MAPERR` instead of `SEGV_ACCERR` and
+# can thus not turn the segmentation fault into an exception.
+if !(Sys.ARCH === :powerpc64le || Sys.ARCH === :ppc64le)
+    s = open(file, "r")
+    m = mmap(s)
+    @test_throws ReadOnlyMemoryError m[5] = UInt8('x') # tries to setindex! on read-only array
+    finalize(m); m=nothing; GC.gc()
+end
 
 write(file, "Hello World\n")
 
 s = open(file, "r")
-m = Mmap.mmap(s)
+m = mmap(s)
 close(s)
 finalize(m); m=nothing; GC.gc()
-m = Mmap.mmap(file)
+m = mmap(file)
 s = open(file, "r+")
-c = Mmap.mmap(s)
-d = Mmap.mmap(s)
+c = mmap(s)
+d = mmap(s)
 c[1] = UInt8('J')
 Mmap.sync!(c)
 close(s)
@@ -120,18 +125,19 @@ write(file, "Hello World\n")
 
 s = open(file, "r")
 @test isreadonly(s) == true
-c = Mmap.mmap(s, Vector{UInt8}, (11,))
+c = mmap(s, Vector{UInt8}, (11,))
 @test c == b"Hello World"
 finalize(c); c=nothing; GC.gc()
-c = Mmap.mmap(s, Vector{UInt8}, (UInt16(11),))
+c = mmap(s, Vector{UInt8}, (UInt16(11),))
 @test c == b"Hello World"
 finalize(c); c=nothing; GC.gc()
-@test_throws ArgumentError Mmap.mmap(s, Vector{UInt8}, (Int16(-11),))
-@test_throws ArgumentError Mmap.mmap(s, Vector{UInt8}, (typemax(UInt),))
+@test_throws ArgumentError mmap(s, Vector{UInt8}, (Int16(-11),))
+@test_throws ArgumentError mmap(s, Vector{UInt8}, (typemax(UInt),))
+@test_throws ArgumentError mmap(s, Matrix{UInt8}, (typemax(Int) - Mmap.PAGESIZE - 1, 2)) # overflow
 close(s)
 s = open(file, "r+")
 @test isreadonly(s) == false
-c = Mmap.mmap(s, Vector{UInt8}, (11,))
+c = mmap(s, Vector{UInt8}, (11,))
 c[5] = UInt8('x')
 Mmap.sync!(c)
 close(s)
@@ -141,18 +147,18 @@ close(s)
 @test startswith(str, "Hellx World")
 finalize(c); c=nothing; GC.gc()
 
-c = Mmap.mmap(file)
+c = mmap(file)
 @test c == b"Hellx World\n"
 finalize(c); c=nothing; GC.gc()
-c = Mmap.mmap(file, Vector{UInt8}, 3)
+c = mmap(file, Vector{UInt8}, 3)
 @test c == b"Hel"
 finalize(c); c=nothing; GC.gc()
 s = open(file, "r")
-c = Mmap.mmap(s, Vector{UInt8}, 6)
+c = mmap(s, Vector{UInt8}, 6)
 @test c == b"Hellx "
 close(s)
 finalize(c); c=nothing; GC.gc()
-c = Mmap.mmap(file, Vector{UInt8}, 5, 6)
+c = mmap(file, Vector{UInt8}, 5, 6)
 @test c == b"World"
 finalize(c); c=nothing; GC.gc()
 
@@ -160,8 +166,8 @@ s = open(file, "w")
 write(s, "Hello World\n")
 close(s)
 
-# test Mmap.mmap
-m = Mmap.mmap(file)
+# test mmap
+m = mmap(file)
 tdata = b"Hello World\n"
 for i = 1:12
     @test m[i] == tdata[i]
@@ -169,7 +175,7 @@ end
 @test_throws BoundsError m[13]
 finalize(m); m=nothing; GC.gc()
 
-m = Mmap.mmap(file,Vector{UInt8},6)
+m = mmap(file,Vector{UInt8},6)
 @test m[1] == b"H"[1]
 @test m[2] == b"e"[1]
 @test m[3] == b"l"[1]
@@ -179,7 +185,7 @@ m = Mmap.mmap(file,Vector{UInt8},6)
 @test_throws BoundsError m[7]
 finalize(m); m=nothing; GC.gc()
 
-m = Mmap.mmap(file,Vector{UInt8},2,6)
+m = mmap(file,Vector{UInt8},2,6)
 @test m[1] == b"W"[1]
 @test m[2] == b"o"[1]
 @test_throws BoundsError m[3]
@@ -193,13 +199,13 @@ write(s, [0xffffffffffffffff,
 close(s)
 s = open(file, "r")
 @test isreadonly(s)
-b = @inferred Mmap.mmap(s, BitArray, (17,13))
+b = @inferred mmap(s, BitArray, (17,13))
 @test Test._check_bitarray_consistency(b)
 @test b == trues(17,13)
-@test_throws ArgumentError Mmap.mmap(s, BitArray, (7,3))
+@test_throws ArgumentError mmap(s, BitArray, (7,3))
 close(s)
 s = open(file, "r+")
-b = Mmap.mmap(s, BitArray, (17,19))
+b = mmap(s, BitArray, (17,19))
 @test Test._check_bitarray_consistency(b)
 rand!(b)
 Mmap.sync!(b)
@@ -208,7 +214,7 @@ b0 = copy(b)
 close(s)
 s = open(file, "r")
 @test isreadonly(s)
-b = Mmap.mmap(s, BitArray, (17,19))
+b = mmap(s, BitArray, (17,19))
 @test Test._check_bitarray_consistency(b)
 @test b == b0
 close(s)
@@ -222,29 +228,29 @@ open(file,"w") do f
 end
 @test filesize(file) == 9
 s = open(file, "r+")
-m = Mmap.mmap(s, BitArray, (72,))
+m = mmap(s, BitArray, (72,))
 @test Test._check_bitarray_consistency(m)
 @test length(m) == 72
 close(s); finalize(m); m = nothing; GC.gc()
 
-m = Mmap.mmap(file, BitArray, (72,))
+m = mmap(file, BitArray, (72,))
 @test Test._check_bitarray_consistency(m)
 @test length(m) == 72
 finalize(m); m = nothing; GC.gc()
 
 s = open(file, "r+")
-m = Mmap.mmap(s, BitArray, 72) # len integer instead of dims
+m = mmap(s, BitArray, 72) # len integer instead of dims
 @test Test._check_bitarray_consistency(m)
 @test length(m) == 72
 close(s); finalize(m); m = nothing; GC.gc()
 
-m = Mmap.mmap(file, BitArray, 72) # len integer instead of dims
+m = mmap(file, BitArray, 72) # len integer instead of dims
 @test Test._check_bitarray_consistency(m)
 @test length(m) == 72
 finalize(m); m = nothing; GC.gc()
 rm(file)
 
-# Mmap.mmap with an offset
+# mmap with an offset
 A = rand(1:20, 500, 300)
 fname = tempname()
 s = open(fname, "w+")
@@ -255,12 +261,12 @@ close(s)
 s = open(fname)
 m = read(s, Int)
 n = read(s, Int)
-A2 = Mmap.mmap(s, Matrix{Int}, (m,n))
+A2 = mmap(s, Matrix{Int}, (m,n))
 @test A == A2
 seek(s, 0)
-A3 = Mmap.mmap(s, Matrix{Int}, (m,n), convert(Int64, 2*sizeof(Int)))
+A3 = mmap(s, Matrix{Int}, (m,n), convert(Int64, 2*sizeof(Int)))
 @test A == A3
-A4 = Mmap.mmap(s, Matrix{Int}, (m,150), convert(Int64, (2+150*m)*sizeof(Int)))
+A4 = mmap(s, Matrix{Int}, (m,150), convert(Int64, (2+150*m)*sizeof(Int)))
 @test A[:, 151:end] == A4
 close(s)
 finalize(A2); finalize(A3); finalize(A4)
@@ -277,7 +283,7 @@ m = Mmap.Anonymous()
 @test isreadable(m)
 @test iswritable(m)
 
-m = Mmap.mmap(Vector{UInt8}, 12)
+m = mmap(Vector{UInt8}, 12)
 @test length(m) == 12
 @test all(m .== 0x00)
 @test m[1] === 0x00
@@ -285,16 +291,16 @@ m = Mmap.mmap(Vector{UInt8}, 12)
 m[1] = 0x0a
 Mmap.sync!(m)
 @test m[1] === 0x0a
-m = Mmap.mmap(Vector{UInt8}, 12; shared=false)
-m = Mmap.mmap(Vector{Int}, 12)
+m = mmap(Vector{UInt8}, 12; shared=false)
+m = mmap(Vector{Int}, 12)
 @test length(m) == 12
 @test all(m .== 0)
 @test m[1] === 0
 @test m[end] === 0
-m = Mmap.mmap(Vector{Float64}, 12)
+m = mmap(Vector{Float64}, 12)
 @test length(m) == 12
 @test all(m .== 0.0)
-m = Mmap.mmap(Matrix{Int8}, (12,12))
+m = mmap(Matrix{Int8}, (12,12))
 @test size(m) == (12,12)
 @test all(m == zeros(Int8, (12,12)))
 @test sizeof(m) == prod((12,12))
@@ -307,14 +313,28 @@ n = similar(m, 12)
 @test size(n) == (12,)
 finalize(m); m = nothing; GC.gc()
 
+if Sys.isunix()
+    file = tempname()
+    write(file, rand(Float64, 20))
+    A = mmap(file, Vector{Float64}, 20)
+    @test Mmap.madvise!(A, Mmap.MADV_WILLNEED) === nothing # checking for no error
+    finalize(A); A = nothing; GC.gc()
+
+    write(file, BitArray(rand(Bool, 20)))
+    b = mmap(file, BitArray, 20)
+    @test Mmap.madvise!(b, Mmap.MADV_WILLNEED) === nothing
+    finalize(b); b = nothing; GC.gc()
+    rm(file)
+end
+
 # test #14885
 file = tempname()
 touch(file)
 open(file, "r+") do s
-    A = Mmap.mmap(s, Vector{UInt8}, (10,), 0)
+    A = mmap(s, Vector{UInt8}, (10,), 0)
     Mmap.sync!(A)
     finalize(A); A = nothing; GC.gc()
-    A = Mmap.mmap(s, Vector{UInt8}, (10,), 1)
+    A = mmap(s, Vector{UInt8}, (10,), 1)
     Mmap.sync!(A)
     finalize(A); A = nothing; GC.gc()
 end
diff --git a/stdlib/MozillaCACerts_jll/Project.toml b/stdlib/MozillaCACerts_jll/Project.toml
index bf660f533776f3..0db86a1dd5319e 100644
--- a/stdlib/MozillaCACerts_jll/Project.toml
+++ b/stdlib/MozillaCACerts_jll/Project.toml
@@ -1,5 +1,6 @@
 name = "MozillaCACerts_jll"
 uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
+version = "2022.2.1"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
diff --git a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
index d89ba83689e713..244c1204563d5a 100644
--- a/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
+++ b/stdlib/MozillaCACerts_jll/src/MozillaCACerts_jll.jl
@@ -2,10 +2,30 @@
 
 ## dummy stub for https://github.com/JuliaBinaryWrappers/MozillaCACerts_jll.jl
 
-module MozillaCACerts_jll
+baremodule MozillaCACerts_jll
+using Base
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+global artifact_dir::String = ""
+global cacert::String = ""
 
 function __init__()
-	global cacert = normpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "cert.pem")
+    global artifact_dir = dirname(Sys.BINDIR)
+    global cacert = normpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "cert.pem")
 end
 
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+
 end # module
diff --git a/stdlib/MozillaCACerts_jll/test/runtests.jl b/stdlib/MozillaCACerts_jll/test/runtests.jl
index a34cac7ba6775a..fdf939755d9ab7 100644
--- a/stdlib/MozillaCACerts_jll/test/runtests.jl
+++ b/stdlib/MozillaCACerts_jll/test/runtests.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 using MozillaCACerts_jll
 
diff --git a/stdlib/NetworkOptions.version b/stdlib/NetworkOptions.version
new file mode 100644
index 00000000000000..483d6bd51694b0
--- /dev/null
+++ b/stdlib/NetworkOptions.version
@@ -0,0 +1,4 @@
+NETWORKOPTIONS_BRANCH = master
+NETWORKOPTIONS_SHA1 = 4d3df6445bbca0556a7a9f2abb0e48ba7d774f5a
+NETWORKOPTIONS_GIT_URL := https://github.com/JuliaLang/NetworkOptions.jl.git
+NETWORKOPTIONS_TAR_URL = https://api.github.com/repos/JuliaLang/NetworkOptions.jl/tarball/$1
diff --git a/stdlib/OpenBLAS_jll/Project.toml b/stdlib/OpenBLAS_jll/Project.toml
new file mode 100644
index 00000000000000..21fa9e9f0a0e68
--- /dev/null
+++ b/stdlib/OpenBLAS_jll/Project.toml
@@ -0,0 +1,17 @@
+name = "OpenBLAS_jll"
+uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
+version = "0.3.20+0"
+
+[deps]
+CompilerSupportLibraries_jll = "e66e0078-7015-5450-92f7-15fbd957f2ae"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
new file mode 100644
index 00000000000000..f656621d957d6c
--- /dev/null
+++ b/stdlib/OpenBLAS_jll/src/OpenBLAS_jll.jl
@@ -0,0 +1,56 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/OpenBLAS_jll.jl
+baremodule OpenBLAS_jll
+using Base, Libdl, CompilerSupportLibraries_jll, Base.BinaryPlatforms
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libopenblas
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libopenblas_handle = C_NULL
+libopenblas_path = ""
+
+if Base.USE_BLAS64
+    const libsuffix = "64_"
+else
+    const libsuffix = ""
+end
+
+if Sys.iswindows()
+    const libopenblas = "libopenblas$(libsuffix).dll"
+elseif Sys.isapple()
+    const libopenblas = "@rpath/libopenblas$(libsuffix).dylib"
+else
+    const libopenblas = "libopenblas$(libsuffix).so"
+end
+
+function __init__()
+    # make sure OpenBLAS does not set CPU affinity (#1070, #9639)
+    if !haskey(ENV, "OPENBLAS_MAIN_FREE")
+        ENV["OPENBLAS_MAIN_FREE"] = "1"
+    end
+
+    global libopenblas_handle = dlopen(libopenblas)
+    global libopenblas_path = dlpath(libopenblas_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libopenblas_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libopenblas_path() = libopenblas_path
+
+end  # module OpenBLAS_jll
diff --git a/stdlib/OpenBLAS_jll/test/runtests.jl b/stdlib/OpenBLAS_jll/test/runtests.jl
new file mode 100644
index 00000000000000..1d944bab8cd67d
--- /dev/null
+++ b/stdlib/OpenBLAS_jll/test/runtests.jl
@@ -0,0 +1,17 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, OpenBLAS_jll
+
+if Base.USE_BLAS64
+    macro blasfunc(x)
+        return Expr(:quote, Symbol(x, "64_"))
+    end
+else
+    macro blasfunc(x)
+        return Expr(:quote, x)
+    end
+end
+
+@testset "OpenBLAS_jll" begin
+    @test dlsym(OpenBLAS_jll.libopenblas_handle, @blasfunc(openblas_set_num_threads); throw_error=false) != nothing
+end
diff --git a/stdlib/OpenLibm_jll/Project.toml b/stdlib/OpenLibm_jll/Project.toml
new file mode 100644
index 00000000000000..7f02fbc81ce1bb
--- /dev/null
+++ b/stdlib/OpenLibm_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "OpenLibm_jll"
+uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
+version = "0.8.1+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.0"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
new file mode 100644
index 00000000000000..e3536021ad4c94
--- /dev/null
+++ b/stdlib/OpenLibm_jll/src/OpenLibm_jll.jl
@@ -0,0 +1,45 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/OpenLibm_jll.jl
+baremodule OpenLibm_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libopenlibm
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libopenlibm_handle = C_NULL
+libopenlibm_path = ""
+
+if Sys.iswindows()
+    const libopenlibm = "libopenlibm.dll"
+elseif Sys.isapple()
+    const libopenlibm = "@rpath/libopenlibm.4.dylib"
+else
+    const libopenlibm = "libopenlibm.so.4"
+end
+
+function __init__()
+    global libopenlibm_handle = dlopen(libopenlibm)
+    global libopenlibm_path = dlpath(libopenlibm_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libopenlibm_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libopenlibm_path() = libopenlibm_path
+
+end  # module OpenLibm_jll
diff --git a/stdlib/OpenLibm_jll/test/runtests.jl b/stdlib/OpenLibm_jll/test/runtests.jl
new file mode 100644
index 00000000000000..83603a50a52927
--- /dev/null
+++ b/stdlib/OpenLibm_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, OpenLibm_jll
+
+@testset "OpenLibm_jll" begin
+    @test ccall((:isopenlibm, libopenlibm), Cint, ()) == 1
+end
diff --git a/stdlib/PCRE2_jll/Project.toml b/stdlib/PCRE2_jll/Project.toml
new file mode 100644
index 00000000000000..b7718fcf79f480
--- /dev/null
+++ b/stdlib/PCRE2_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "PCRE2_jll"
+uuid = "efcefdf7-47ab-520b-bdef-62a2eaa19f15"
+version = "10.36.0+2"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/PCRE2_jll/src/PCRE2_jll.jl b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
new file mode 100644
index 00000000000000..81048a45998b54
--- /dev/null
+++ b/stdlib/PCRE2_jll/src/PCRE2_jll.jl
@@ -0,0 +1,45 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/PCRE2_jll.jl
+baremodule PCRE2_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libpcre2_8
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libpcre2_8_handle = C_NULL
+libpcre2_8_path = ""
+
+if Sys.iswindows()
+    const libpcre2_8 = "libpcre2-8-0.dll"
+elseif Sys.isapple()
+    const libpcre2_8 = "@rpath/libpcre2-8.0.dylib"
+else
+    const libpcre2_8 = "libpcre2-8.so.0"
+end
+
+function __init__()
+    global libpcre2_8_handle = dlopen(libpcre2_8)
+    global libpcre2_8_path = dlpath(libpcre2_8_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libpcre2_8_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libpcre2_8_path() = libpcre2_8_path
+
+end  # module PCRE2_jll
diff --git a/stdlib/PCRE2_jll/test/runtests.jl b/stdlib/PCRE2_jll/test/runtests.jl
new file mode 100644
index 00000000000000..b2446e7e5caab3
--- /dev/null
+++ b/stdlib/PCRE2_jll/test/runtests.jl
@@ -0,0 +1,10 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, PCRE2_jll
+
+@testset "PCRE2_jll" begin
+    vstr = zeros(UInt8, 32)
+    @test ccall((:pcre2_config_8, libpcre2_8), Cint, (UInt32, Ref{UInt8}), 11, vstr) > 0
+    vn = VersionNumber(split(unsafe_string(pointer(vstr)), " ")[1])
+    @test vn == v"10.36.0"
+end
diff --git a/stdlib/Pkg.version b/stdlib/Pkg.version
index 7853ccff92bc66..1b6a3fa3d0d5d6 100644
--- a/stdlib/Pkg.version
+++ b/stdlib/Pkg.version
@@ -1,2 +1,4 @@
 PKG_BRANCH = master
-PKG_SHA1 = 6a235eb813be335b54c97c5c7d631bdbd1059115
+PKG_SHA1 = 54d5c9e5175e94a05d6c9c9e54ad5b42d068eb17
+PKG_GIT_URL := https://github.com/JuliaLang/Pkg.jl.git
+PKG_TAR_URL = https://api.github.com/repos/JuliaLang/Pkg.jl/tarball/$1
diff --git a/stdlib/Printf/docs/src/index.md b/stdlib/Printf/docs/src/index.md
index 828e527ed0cadf..48e38e2b2ce5b4 100644
--- a/stdlib/Printf/docs/src/index.md
+++ b/stdlib/Printf/docs/src/index.md
@@ -1,4 +1,4 @@
-# Printf
+# [Printf](@id man-printf)
 
 ```@docs
 Printf.@printf
diff --git a/stdlib/Printf/src/Printf.jl b/stdlib/Printf/src/Printf.jl
index 40eb393d25edb4..05e1621dcb795c 100644
--- a/stdlib/Printf/src/Printf.jl
+++ b/stdlib/Printf/src/Printf.jl
@@ -13,6 +13,7 @@ const Chars = Union{Val{'c'}, Val{'C'}}
 const Strings = Union{Val{'s'}, Val{'S'}}
 const Pointer = Val{'p'}
 const HexBases = Union{Val{'x'}, Val{'X'}, Val{'a'}, Val{'A'}}
+const PositionCounter = Val{'n'}
 
 """
 Typed representation of a format specifier.
@@ -56,6 +57,9 @@ formatted string directly to `io`.
 
 For convenience, the `Printf.format"..."` string macro form can be used for building
 a `Printf.Format` object at macro-expansion-time.
+
+!!! compat "Julia 1.6"
+    `Printf.Format` requires Julia 1.6 or later.
 """
 struct Format{S, T}
     str::S # original full format string as CodeUnits
@@ -80,10 +84,19 @@ function Format(f::AbstractString)
     len = length(bytes)
     pos = 1
     b = 0x00
-    while true
+    while pos <= len
         b = bytes[pos]
         pos += 1
-        (pos > len || (b == UInt8('%') && pos <= len && bytes[pos] != UInt8('%'))) && break
+        if b == UInt8('%')
+            pos > len && throw(ArgumentError("invalid format string: '$f'"))
+            if bytes[pos] == UInt8('%')
+                # escaped '%'
+                b = bytes[pos]
+                pos += 1
+            else
+                break
+            end
+        end
     end
     strs = [1:pos - 1 - (b == UInt8('%'))]
     fmts = []
@@ -207,15 +220,17 @@ end
 
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Chars}
     leftalign, width = spec.leftalign, spec.width
-    if !leftalign && width > 1
-        for _ = 1:(width - 1)
+    c = Char(first(arg))
+    w = textwidth(c)
+    if !leftalign && width > w
+        for _ = 1:(width - w)
             buf[pos] = UInt8(' ')
             pos += 1
         end
     end
-    pos = writechar(buf, pos, arg isa String ? arg[1] : Char(arg))
-    if leftalign && width > 1
-        for _ = 1:(width - 1)
+    pos = writechar(buf, pos, c)
+    if leftalign && width > w
+        for _ = 1:(width - w)
             buf[pos] = UInt8(' ')
             pos += 1
         end
@@ -227,7 +242,8 @@ end
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Strings}
     leftalign, hash, width, prec = spec.leftalign, spec.hash, spec.width, spec.precision
     str = string(arg)
-    op = p = prec == -1 ? (length(str) + (hash ? arg isa AbstractString ? 2 : 1 : 0)) : prec
+    slen = textwidth(str) + (hash ? arg isa AbstractString ? 2 : 1 : 0)
+    op = p = prec == -1 ? slen : min(slen, prec)
     if !leftalign && width > p
         for _ = 1:(width - p)
             buf[pos] = UInt8(' ')
@@ -246,9 +262,9 @@ end
         end
     end
     for c in str
-        p == 0 && break
+        p -= textwidth(c)
+        p < 0 && break
         pos = writechar(buf, pos, c)
-        p -= 1
     end
     if hash && arg isa AbstractString && p > 0
         buf[pos] = UInt8('"')
@@ -276,7 +292,8 @@ fmt(buf, pos, arg::AbstractFloat, spec::Spec{T}) where {T <: Ints} =
     bs = base(T)
     arg2 = toint(arg)
     n = i = ndigits(arg2, base=bs, pad=1)
-    x, neg = arg2 < 0 ? (-arg2, true) : (arg2, false)
+    neg = arg2 < 0
+    x = arg2 isa Base.BitSigned ? unsigned(abs(arg2)) : abs(arg2)
     arglen = n + (neg || (plus | space)) +
         (T == Val{'o'} && hash ? 1 : 0) +
         (T == Val{'x'} && hash ? 2 : 0) + (T == Val{'X'} && hash ? 2 : 0)
@@ -365,21 +382,42 @@ For arbitrary precision numerics, you might extend the method like:
 ```julia
 Printf.tofloat(x::MyArbitraryPrecisionType) = BigFloat(x)
 ```
+
+!!! compat "Julia 1.6"
+    This function requires Julia 1.6 or later.
 """
 tofloat(x) = Float64(x)
 tofloat(x::Base.IEEEFloat) = x
 tofloat(x::BigFloat) = x
 
+_snprintf(ptr, siz, str, arg) =
+    @ccall "libmpfr".mpfr_snprintf(ptr::Ptr{UInt8}, siz::Csize_t, str::Ptr{UInt8};
+                                   arg::Ref{BigFloat})::Cint
+
+const __BIG_FLOAT_MAX__ = 8192
+
 @inline function fmt(buf, pos, arg, spec::Spec{T}) where {T <: Floats}
     leftalign, plus, space, zero, hash, width, prec =
         spec.leftalign, spec.plus, spec.space, spec.zero, spec.hash, spec.width, spec.precision
     x = tofloat(arg)
-    if x isa BigFloat && isfinite(x)
-        ptr = pointer(buf, pos)
-        newpos = @ccall "libmpfr".mpfr_snprintf(ptr::Ptr{UInt8}, (length(buf) - pos + 1)::Csize_t, string(spec; modifier="R")::Ptr{UInt8}; arg::Ref{BigFloat})::Cint
-        newpos > 0 || error("invalid printf formatting for BigFloat")
-        return pos + newpos
-    elseif x isa BigFloat
+    if x isa BigFloat
+        if isfinite(x)
+            GC.@preserve buf begin
+                siz = length(buf) - pos + 1
+                str = string(spec; modifier="R")
+                len = _snprintf(pointer(buf, pos), siz, str, x)
+                if len > siz
+                    maxout = max(__BIG_FLOAT_MAX__,
+                                 ceil(Int, precision(x) * log(2) / log(10)) + 25)
+                    len > maxout &&
+                        error("Over $maxout bytes $len needed to output BigFloat $x")
+                    resize!(buf, len + 1)
+                    len = _snprintf(pointer(buf, pos), len + 1, str, x)
+                end
+                len > 0 || throw(ArgumentError("invalid printf formatting $str for BigFloat"))
+                return pos + len
+            end
+        end
         x = Float64(x)
     end
     if T == Val{'e'} || T == Val{'E'}
@@ -387,11 +425,34 @@ tofloat(x::BigFloat) = x
     elseif T == Val{'f'} || T == Val{'F'}
         newpos = Ryu.writefixed(buf, pos, x, prec, plus, space, hash, UInt8('.'))
     elseif T == Val{'g'} || T == Val{'G'}
-        prec = prec == 0 ? 1 : prec
-        x = round(x, sigdigits=prec)
-        newpos = Ryu.writeshortest(buf, pos, x, plus, space, hash, prec, T == Val{'g'} ? UInt8('e') : UInt8('E'), true, UInt8('.'))
+        if isinf(x) || isnan(x)
+            newpos = Ryu.writeshortest(buf, pos, x, plus, space)
+        else
+            # C11-compliant general format
+            prec = prec == 0 ? 1 : prec
+            # format the value in scientific notation and parse the exponent part
+            exp = let p = Ryu.writeexp(buf, pos, x, prec)
+                b1, b2, b3, b4 = buf[p-4], buf[p-3], buf[p-2], buf[p-1]
+                Z = UInt8('0')
+                if b1 == UInt8('e')
+                    # two-digit exponent
+                    sign = b2 == UInt8('+') ? 1 : -1
+                    exp = 10 * (b3 - Z) + (b4 - Z)
+                else
+                    # three-digit exponent
+                    sign = b1 == UInt8('+') ? 1 : -1
+                    exp = 100 * (b2 - Z) + 10 * (b3 - Z) + (b4 - Z)
+                end
+                flipsign(exp, sign)
+            end
+            if -4 ≤ exp < prec
+                newpos = Ryu.writefixed(buf, pos, x, prec - (exp + 1), plus, space, hash, UInt8('.'), !hash)
+            else
+                newpos = Ryu.writeexp(buf, pos, x, prec - 1, plus, space, hash, T == Val{'g'} ? UInt8('e') : UInt8('E'), UInt8('.'), !hash)
+            end
+        end
     elseif T == Val{'a'} || T == Val{'A'}
-        x, neg = x < 0 ? (-x, true) : (x, false)
+        x, neg = x < 0 || x === -Base.zero(x) ? (-x, true) : (x, false)
         newpos = pos
         if neg
             buf[newpos] = UInt8('-')
@@ -422,6 +483,8 @@ tofloat(x::BigFloat) = x
                 buf[newpos] = UInt8('0')
                 newpos += 1
                 if prec > 0
+                    buf[newpos] = UInt8('.')
+                    newpos += 1
                     while prec > 0
                         buf[newpos] = UInt8('0')
                         newpos += 1
@@ -431,6 +494,7 @@ tofloat(x::BigFloat) = x
                 buf[newpos] = T <: Val{'a'} ? UInt8('p') : UInt8('P')
                 buf[newpos + 1] = UInt8('+')
                 buf[newpos + 2] = UInt8('0')
+                newpos += 3
             else
                 if prec > -1
                     s, p = frexp(x)
@@ -513,7 +577,13 @@ tofloat(x::BigFloat) = x
 end
 
 # pointers
-fmt(buf, pos, arg, spec::Spec{Pointer}) = fmt(buf, pos, Int(arg), ptrfmt(spec, arg))
+fmt(buf, pos, arg, spec::Spec{Pointer}) = fmt(buf, pos, UInt64(arg), ptrfmt(spec, arg))
+
+# position counters
+function fmt(buf, pos, arg::Ref{<:Integer}, ::Spec{PositionCounter})
+    arg[] = pos - 1
+    pos
+end
 
 # old Printf compat
 function fix_dec end
@@ -645,9 +715,16 @@ const UNROLL_UPTO = 16
 # if you have your own buffer + pos, write formatted args directly to it
 @inline function format(buf::Vector{UInt8}, pos::Integer, f::Format, args...)
     # write out first substring
+    escapechar = false
     for i in f.substringranges[1]
-        buf[pos] = f.str[i]
-        pos += 1
+        b = f.str[i]
+        if !escapechar
+            buf[pos] = b
+            pos += 1
+            escapechar = b === UInt8('%')
+        else
+            escapechar = false
+        end
     end
     # for each format, write out arg and next substring
     # unroll up to 16 formats
@@ -656,8 +733,14 @@ const UNROLL_UPTO = 16
         if N >= i
             pos = fmt(buf, pos, args[i], f.formats[i])
             for j in f.substringranges[i + 1]
-                buf[pos] = f.str[j]
-                pos += 1
+                b = f.str[j]
+                if !escapechar
+                    buf[pos] = b
+                    pos += 1
+                    escapechar = b === UInt8('%')
+                else
+                    escapechar = false
+                end
             end
         end
     end
@@ -665,21 +748,32 @@ const UNROLL_UPTO = 16
         for i = 17:length(f.formats)
             pos = fmt(buf, pos, args[i], f.formats[i])
             for j in f.substringranges[i + 1]
-                buf[pos] = f.str[j]
-                pos += 1
+                b = f.str[j]
+                if !escapechar
+                    buf[pos] = b
+                    pos += 1
+                    escapechar = b === UInt8('%')
+                else
+                    escapechar = false
+                end
             end
         end
     end
     return pos
 end
 
-plength(f::Spec{T}, x) where {T <: Chars} = max(f.width, 1) + (ncodeunits(x isa AbstractString ? x[1] : Char(x)) - 1)
+function plength(f::Spec{T}, x) where {T <: Chars}
+    c = Char(first(x))
+    w = textwidth(c)
+    return max(f.width, w) + (ncodeunits(c) - w)
+end
 plength(f::Spec{Pointer}, x) = max(f.width, 2 * sizeof(x) + 2)
 
 function plength(f::Spec{T}, x) where {T <: Strings}
     str = string(x)
-    p = f.precision == -1 ? (length(str) + (f.hash ? (x isa Symbol ? 1 : 2) : 0)) : f.precision
-    return max(f.width, p) + (sizeof(str) - length(str))
+    sw = textwidth(str)
+    p = f.precision == -1 ? (sw + (f.hash ? (x isa Symbol ? 1 : 2) : 0)) : f.precision
+    return max(f.width, p) + (sizeof(str) - sw)
 end
 
 function plength(f::Spec{T}, x) where {T <: Ints}
@@ -691,6 +785,7 @@ plength(f::Spec{T}, x::AbstractFloat) where {T <: Ints} =
     max(f.width, 0 + 309 + 17 + f.hash + 5)
 plength(f::Spec{T}, x) where {T <: Floats} =
     max(f.width, f.precision + 309 + 17 + f.hash + 5)
+plength(::Spec{PositionCounter}, x) = 0
 
 @inline function computelen(substringranges, formats, args)
     len = sum(length, substringranges)
@@ -740,27 +835,61 @@ end
 """
     @printf([io::IO], "%Fmt", args...)
 
-Print `args` using C `printf` style format specification string, with some caveats:
+Print `args` using C `printf` style format specification string.
+Optionally, an `IO` may be passed as the first argument to redirect output.
+
+# Examples
+```jldoctest
+julia> @printf "Hello %s" "world"
+Hello world
+
+julia> @printf "Scientific notation %e" 1.234
+Scientific notation 1.234000e+00
+
+julia> @printf "Scientific notation three digits %.3e" 1.23456
+Scientific notation three digits 1.235e+00
+
+julia> @printf "Decimal two digits %.2f" 1.23456
+Decimal two digits 1.23
+
+julia> @printf "Padded to length 5 %5i" 123
+Padded to length 5   123
+
+julia> @printf "Padded with zeros to length 6 %06i" 123
+Padded with zeros to length 6 000123
+
+julia> @printf "Use shorter of decimal or scientific %g %g" 1.23 12300000.0
+Use shorter of decimal or scientific 1.23 1.23e+07
+```
+
+For a systematic specification of the format, see [here](https://www.cplusplus.com/reference/cstdio/printf/).
+See also [`@sprintf`](@ref).
+
+# Caveats
 `Inf` and `NaN` are printed consistently as `Inf` and `NaN` for flags `%a`, `%A`,
 `%e`, `%E`, `%f`, `%F`, `%g`, and `%G`. Furthermore, if a floating point number is
 equally close to the numeric values of two possible output strings, the output
 string further away from zero is chosen.
-Optionally, an `IO`
-may be passed as the first argument to redirect output.
-See also: [`@sprintf`](@ref)
+
 # Examples
 ```jldoctest
-julia> @printf("%f %F %f %F\\n", Inf, Inf, NaN, NaN)
-Inf Inf NaN NaN\n
-julia> @printf "%.0f %.1f %f\\n" 0.5 0.025 -0.0078125
+julia> @printf("%f %F %f %F", Inf, Inf, NaN, NaN)
+Inf Inf NaN NaN
+
+julia> @printf "%.0f %.1f %f" 0.5 0.025 -0.0078125
 0 0.0 -0.007812
 ```
+
+!!! compat "Julia 1.8"
+    Starting in Julia 1.8, `%s` (string) and `%c` (character) widths are computed
+    using [`textwidth`](@ref), which e.g. ignores zero-width characters
+    (such as combining characters for diacritical marks) and treats certain
+    "wide" characters (e.g. emoji) as width `2`.
 """
 macro printf(io_or_fmt, args...)
     if io_or_fmt isa String
-        io = stdout
         fmt = Format(io_or_fmt)
-        return esc(:($Printf.format($io, $fmt, $(args...))))
+        return esc(:($Printf.format(stdout, $fmt, $(args...))))
     else
         io = io_or_fmt
         isempty(args) && throw(ArgumentError("must provide required format string"))
@@ -772,7 +901,8 @@ end
 """
     @sprintf("%Fmt", args...)
 
-Return `@printf` formatted output as string.
+Return [`@printf`](@ref) formatted output as string.
+
 # Examples
 ```jldoctest
 julia> @sprintf "this is a %s %15.1f" "test" 34.567
diff --git a/stdlib/Printf/test/runtests.jl b/stdlib/Printf/test/runtests.jl
index 603b3593502e8d..e80cbe96268234 100644
--- a/stdlib/Printf/test/runtests.jl
+++ b/stdlib/Printf/test/runtests.jl
@@ -19,11 +19,17 @@ using Test, Printf
         @test (@sprintf "%-20p" C_NULL) == "0x00000000          "
     end
 
+    #40318
+    @test @sprintf("%p", 0xfffffffffffe0000) == "0xfffffffffffe0000"
+
 end
 
 @testset "%a" begin
 
     # hex float
+    @test (Printf.@sprintf "%a" 0.0) == "0x0p+0"
+    @test (Printf.@sprintf "%a" -0.0) == "-0x0p+0"
+    @test (Printf.@sprintf "%.3a" 0.0) == "0x0.000p+0"
     @test (Printf.@sprintf "%a" 1.5) == "0x1.8p+0"
     @test (Printf.@sprintf "%a" 1.5f0) == "0x1.8p+0"
     @test (Printf.@sprintf "%a" big"1.5") == "0x1.8p+0"
@@ -88,6 +94,15 @@ end
     @test Printf.@sprintf("%g", 123456.7) == "123457"
     @test Printf.@sprintf("%g", 1234567.8) == "1.23457e+06"
 
+    # %g regression gh #41631
+    for (val, res) in ((Inf, "Inf"),
+                       (-Inf, "-Inf"),
+                       (NaN, "NaN"),
+                       (-NaN, "NaN"))
+        @test Printf.@sprintf("%g", val) == res
+        @test Printf.@sprintf("%G", val) == res
+    end
+
     # zeros
     @test Printf.@sprintf("%.15g", 0) == "0"
     @test Printf.@sprintf("%#.15g", 0) == "0.00000000000000"
@@ -104,9 +119,9 @@ end
     @test (Printf.@sprintf "%f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+f" -Inf) == "-Inf"
     @test (Printf.@sprintf "%f" NaN) == "NaN"
-    @test (Printf.@sprintf "%+f" NaN) == "NaN"
-    @test (Printf.@sprintf "% f" NaN) == "NaN"
-    @test (Printf.@sprintf "% #f" NaN) == "NaN"
+    @test (Printf.@sprintf "%+f" NaN) == "+NaN"
+    @test (Printf.@sprintf "% f" NaN) == " NaN"
+    @test (Printf.@sprintf "% #f" NaN) == " NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -141,6 +156,10 @@ end
     @test Printf.@sprintf("%+ 09.1f", 1.234) == "+000001.2"
     @test Printf.@sprintf("%+ 09.0f", 1.234) == "+00000001"
     @test Printf.@sprintf("%+ #09.0f", 1.234) == "+0000001."
+
+    #40303
+    @test Printf.@sprintf("%+7.1f", 9.96) == "  +10.0"
+    @test Printf.@sprintf("% 7.1f", 9.96) == "   10.0"
 end
 
 @testset "%e" begin
@@ -153,9 +172,9 @@ end
     @test (Printf.@sprintf "%e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%+e" -Inf) == "-Inf"
     @test (Printf.@sprintf "%e" NaN) == "NaN"
-    @test (Printf.@sprintf "%+e" NaN) == "NaN"
-    @test (Printf.@sprintf "% e" NaN) == "NaN"
-    @test (Printf.@sprintf "% #e" NaN) == "NaN"
+    @test (Printf.@sprintf "%+e" NaN) == "+NaN"
+    @test (Printf.@sprintf "% e" NaN) == " NaN"
+    @test (Printf.@sprintf "% #e" NaN) == " NaN"
     @test (Printf.@sprintf "%e" big"Inf") == "Inf"
     @test (Printf.@sprintf "%e" big"NaN") == "NaN"
 
@@ -202,6 +221,10 @@ end
     @test Printf.@sprintf("%+ 09.1e", 1.234) == "+01.2e+00"
     @test Printf.@sprintf("%+ 09.0e", 1.234) == "+0001e+00"
     @test Printf.@sprintf("%+ #09.0e", 1.234) == "+001.e+00"
+
+    #40303
+    @test Printf.@sprintf("%+9.1e", 9.96) == " +1.0e+01"
+    @test Printf.@sprintf("% 9.1e", 9.96) == "  1.0e+01"
 end
 
 @testset "strings" begin
@@ -247,6 +270,12 @@ end
     @test (Printf.@sprintf "%-.3s" "test") == "tes"
     @test (Printf.@sprintf "%#-.3s" "test") == "\"te"
 
+    # issue #41068
+    @test Printf.@sprintf("%.2s", "föó") == "fö"
+    @test Printf.@sprintf("%5s", "föó") == "  föó"
+    @test Printf.@sprintf("%6s", "😍🍕") == "  😍🍕"
+    @test Printf.@sprintf("%2c", '🍕') == "🍕"
+    @test Printf.@sprintf("%3c", '🍕') == " 🍕"
 end
 
 @testset "chars" begin
@@ -303,6 +332,12 @@ end
 @testset "basics" begin
 
     @test Printf.@sprintf("%%") == "%"
+    @test Printf.@sprintf("1%%") == "1%"
+    @test Printf.@sprintf("%%1") == "%1"
+    @test Printf.@sprintf("1%%2") == "1%2"
+    @test Printf.@sprintf("1%%%d", 2) == "1%2"
+    @test Printf.@sprintf("1%%2%%3") == "1%2%3"
+    @test Printf.@sprintf("GAP[%%]") == "GAP[%]"
     @test Printf.@sprintf("hey there") == "hey there"
     @test_throws ArgumentError Printf.Format("")
     @test_throws ArgumentError Printf.Format("%+")
@@ -405,6 +440,16 @@ end
     # Check bug with trailing nul printing BigFloat
     @test (Printf.@sprintf("%.330f", BigFloat(1)))[end] != '\0'
 
+    # Check bugs with truncated output printing BigFloat
+    @test (Printf.@sprintf("%f", parse(BigFloat, "1e400"))) ==
+           "10000000000000000000000000000000000000000000000000000000000000000000000000000025262527574416492004687051900140830217136998040684679611623086405387447100385714565637522507383770691831689647535911648520404034824470543643098638520633064715221151920028135130764414460468236314621044034960475540018328999334468948008954289495190631358190153259681118693204411689043999084305348398480210026863210192871358464.000000"
+
+    # Check that does not attempt to output incredibly large amounts of digits
+    @test_throws ErrorException Printf.@sprintf("%f", parse(BigFloat, "1e99999"))
+
+    # Check bug with precision > length of string
+    @test Printf.@sprintf("%4.2s", "a") == "   a"
+
     # issue #29662
     @test (Printf.@sprintf "%12.3e" pi*1e100) == "  3.142e+100"
 
@@ -419,9 +464,17 @@ end
     @test Printf.@sprintf("%e", 1) == "1.000000e+00"
     @test Printf.@sprintf("%g", 1) == "1"
 
+    # issue #39748
+    @test Printf.@sprintf("%.16g", 194.4778127560983) == "194.4778127560983"
+    @test Printf.@sprintf("%.17g", 194.4778127560983) == "194.4778127560983"
+    @test Printf.@sprintf("%.18g", 194.4778127560983) == "194.477812756098302"
+    @test Printf.@sprintf("%.1g", 1.7976931348623157e308) == "2e+308"
+    @test Printf.@sprintf("%.2g", 1.7976931348623157e308) == "1.8e+308"
+    @test Printf.@sprintf("%.3g", 1.7976931348623157e308) == "1.8e+308"
+
     # escaped '%'
     @test_throws ArgumentError @sprintf("%s%%%s", "a")
-    @test @sprintf("%s%%%s", "a", "b") == "a%%b"
+    @test @sprintf("%s%%%s", "a", "b") == "a%b"
 
     # print float as %d uses round(x)
     @test @sprintf("%d", 25.5) == "26"
@@ -709,6 +762,24 @@ end
     @test Printf.@sprintf("%20.0X",  UInt(3989525555)) == "            EDCB5433"
     @test Printf.@sprintf("%20.X",  UInt(0)) == "                   0"
 
+    # issue #41971
+    @test Printf.@sprintf("%4d", typemin(Int8)) == "-128"
+    @test Printf.@sprintf("%4d", typemax(Int8)) == " 127"
+    @test Printf.@sprintf("%6d", typemin(Int16)) == "-32768"
+    @test Printf.@sprintf("%6d", typemax(Int16)) == " 32767"
+    @test Printf.@sprintf("%11d", typemin(Int32)) == "-2147483648"
+    @test Printf.@sprintf("%11d", typemax(Int32)) == " 2147483647"
+    @test Printf.@sprintf("%20d", typemin(Int64)) == "-9223372036854775808"
+    @test Printf.@sprintf("%20d", typemax(Int64)) == " 9223372036854775807"
+    @test Printf.@sprintf("%40d", typemin(Int128)) == "-170141183460469231731687303715884105728"
+    @test Printf.@sprintf("%40d", typemax(Int128)) == " 170141183460469231731687303715884105727"
+end
+
+@testset "%n" begin
+    x = Ref{Int}()
+    @test (Printf.@sprintf("%d4%n", 123, x); x[] == 4)
+    @test (Printf.@sprintf("%s%n", "😉", x); x[] == 4)
+    @test (Printf.@sprintf("%s%n", "1234", x); x[] == 4)
 end
 
 end # @testset "Printf"
diff --git a/stdlib/Profile/Project.toml b/stdlib/Profile/Project.toml
index 6aca0601439e33..334d475832b6d0 100644
--- a/stdlib/Profile/Project.toml
+++ b/stdlib/Profile/Project.toml
@@ -5,8 +5,10 @@ uuid = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 
 [extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Base64 = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
+Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Test", "Serialization"]
+test = ["Base64", "Logging", "Serialization", "Test"]
diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md
index ac60bb92cb5ed8..8701dded0d4275 100644
--- a/stdlib/Profile/docs/src/index.md
+++ b/stdlib/Profile/docs/src/index.md
@@ -1,5 +1,80 @@
 # [Profiling](@id lib-profiling)
 
+## CPU Profiling
+
+There are two main approaches to CPU profiling julia code:
+
+## Via `@profile`
+
+Where profiling is enabled for a given call via the `@profile` macro.
+
+```julia-repl
+julia> using Profile
+
+julia> @profile foo()
+
+julia> Profile.print()
+Overhead ╎ [+additional indent] Count File:Line; Function
+=========================================================
+    ╎147  @Base/client.jl:506; _start()
+        ╎ 147  @Base/client.jl:318; exec_options(opts::Base.JLOptions)
+...
+```
+
+## Triggered During Execution
+
+Tasks that are already running can also be profiled for a fixed time period at any user-triggered time.
+
+To trigger the profiling:
+- MacOS & FreeBSD (BSD-based platforms): Use `ctrl-t` or pass a `SIGINFO` signal to the julia process i.e. `% kill -INFO $julia_pid`
+- Linux: Pass a `SIGUSR1` signal to the julia process i.e. `% kill -USR1 $julia_pid`
+- Windows: Not currently supported.
+
+First, a single stack trace at the instant that the signal was thrown is shown, then a 1 second profile is collected,
+followed by the profile report at the next yield point, which may be at task completion for code without yield points
+e.g. tight loops.
+
+```julia-repl
+julia> foo()
+##== the user sends a trigger while foo is running ==##
+load: 2.53  cmd: julia 88903 running 6.16u 0.97s
+
+======================================================================================
+Information request received. A stacktrace will print followed by a 1.0 second profile
+======================================================================================
+
+signal (29): Information request: 29
+__psynch_cvwait at /usr/lib/system/libsystem_kernel.dylib (unknown line)
+_pthread_cond_wait at /usr/lib/system/libsystem_pthread.dylib (unknown line)
+...
+
+======================================================================
+Profile collected. A report will print if the Profile module is loaded
+======================================================================
+
+Overhead ╎ [+additional indent] Count File:Line; Function
+=========================================================
+Thread 1 Task 0x000000011687c010 Total snapshots: 572. Utilization: 100%
+   ╎147 @Base/client.jl:506; _start()
+       ╎ 147 @Base/client.jl:318; exec_options(opts::Base.JLOptions)
+...
+
+Thread 2 Task 0x0000000116960010 Total snapshots: 572. Utilization: 0%
+   ╎572 @Base/task.jl:587; task_done_hook(t::Task)
+      ╎ 572 @Base/task.jl:879; wait()
+...
+```
+
+### Customization
+
+The duration of the profiling can be adjusted via [`Profile.set_peek_duration`](@ref)
+
+The profile report is broken down by thread and task. Pass a no-arg function to `Profile.peek_report[]` to override this.
+i.e. `Profile.peek_report[] = () -> Profile.print()` to remove any grouping. This could also be overridden by an external
+profile data consumer.
+
+## Reference
+
 ```@docs
 Profile.@profile
 ```
@@ -14,4 +89,21 @@ Profile.fetch
 Profile.retrieve
 Profile.callers
 Profile.clear_malloc_data
+Profile.get_peek_duration
+Profile.set_peek_duration
+```
+
+## Memory profiling
+
+```@docs
+Profile.Allocs.@profile
+```
+
+The methods in `Profile.Allocs` are not exported and need to be called e.g. as `Profile.Allocs.fetch()`.
+
+```@docs
+Profile.Allocs.clear
+Profile.Allocs.fetch
+Profile.Allocs.start
+Profile.Allocs.stop
 ```
diff --git a/stdlib/Profile/src/Allocs.jl b/stdlib/Profile/src/Allocs.jl
new file mode 100644
index 00000000000000..26dd90a821e018
--- /dev/null
+++ b/stdlib/Profile/src/Allocs.jl
@@ -0,0 +1,216 @@
+module Allocs
+
+using Base.StackTraces: StackTrace, StackFrame, lookup
+using Base: InterpreterIP
+
+# --- Raw results structs, originally defined in C ---
+
+# The C jl_bt_element_t object contains either an IP pointer (size_t) or a void*.
+const BTElement = Csize_t;
+
+# matches jl_raw_backtrace_t on the C side
+struct RawBacktrace
+    data::Ptr{BTElement} # in C: *jl_bt_element_t
+    size::Csize_t
+end
+
+# matches jl_raw_alloc_t on the C side
+struct RawAlloc
+    type::Ptr{Type}
+    backtrace::RawBacktrace
+    size::Csize_t
+    task::Ptr{Cvoid}
+    timestamp::UInt64
+end
+
+# matches jl_profile_allocs_raw_results_t on the C side
+struct RawResults
+    allocs::Ptr{RawAlloc}
+    num_allocs::Csize_t
+end
+
+"""
+    Profile.Allocs.@profile [sample_rate=0.0001] expr
+
+Profile allocations that happen during `expr`, returning
+both the result and and AllocResults struct.
+
+A sample rate of 1.0 will record everything; 0.0 will record nothing.
+
+```julia
+julia> Profile.Allocs.@profile sample_rate=0.01 peakflops()
+1.03733270279065e11
+
+julia> results = Profile.Allocs.fetch()
+
+julia> last(sort(results.allocs, by=x->x.size))
+Profile.Allocs.Alloc(Vector{Any}, Base.StackTraces.StackFrame[_new_array_ at array.c:127, ...], 5576)
+```
+
+!!! note
+    The current implementation of the Allocations Profiler does not
+    capture types for all allocations. Allocations for which the profiler
+    could not capture the type are represented as having type
+    `Profile.Allocs.UnknownType`.
+
+    You can read more about the missing types and the plan to improve this, here:
+    https://github.com/JuliaLang/julia/issues/43688.
+
+!!! compat "Julia 1.8"
+    The allocation profiler was added in Julia 1.8.
+"""
+macro profile(opts, ex)
+    _prof_expr(ex, opts)
+end
+macro profile(ex)
+    _prof_expr(ex, :(sample_rate=0.0001))
+end
+
+function _prof_expr(expr, opts)
+    quote
+        $start(; $(esc(opts)))
+        try
+            $(esc(expr))
+        finally
+            $stop()
+        end
+    end
+end
+
+"""
+    Profile.Allocs.start(sample_rate::Real)
+
+Begin recording allocations with the given sample rate
+A sample rate of 1.0 will record everything; 0.0 will record nothing.
+"""
+function start(; sample_rate::Real)
+    ccall(:jl_start_alloc_profile, Cvoid, (Cdouble,), Float64(sample_rate))
+end
+
+"""
+    Profile.Allocs.stop()
+
+Stop recording allocations.
+"""
+function stop()
+    ccall(:jl_stop_alloc_profile, Cvoid, ())
+end
+
+"""
+    Profile.Allocs.clear()
+
+Clear all previously profiled allocation information from memory.
+"""
+function clear()
+    ccall(:jl_free_alloc_profile, Cvoid, ())
+    return nothing
+end
+
+"""
+    Profile.Allocs.fetch()
+
+Retrieve the recorded allocations, and decode them into Julia
+objects which can be analyzed.
+"""
+function fetch()
+    raw_results = ccall(:jl_fetch_alloc_profile, RawResults, ())
+    return decode(raw_results)
+end
+
+# decoded results
+
+struct Alloc
+    type::Any
+    stacktrace::StackTrace
+    size::Int
+    task::Ptr{Cvoid} # N.B. unrooted, may not be valid
+    timestamp::UInt64
+end
+
+struct AllocResults
+    allocs::Vector{Alloc}
+end
+
+# Without this, the Alloc's stacktrace prints for lines and lines and lines...
+function Base.show(io::IO, a::Alloc)
+    stacktrace_sample = length(a.stacktrace) >= 1 ? "$(a.stacktrace[1]), ..." : ""
+    print(io, "$Alloc($(a.type), $StackFrame[$stacktrace_sample], $(a.size))")
+end
+
+const BacktraceCache = Dict{BTElement,Vector{StackFrame}}
+
+# copied from julia_internal.h
+const JL_BUFF_TAG = UInt(0x4eadc000)
+const JL_GC_UNKNOWN_TYPE_TAG = UInt(0xdeadaa03)
+
+struct CorruptType end
+struct BufferType end
+struct UnknownType end
+
+function load_type(ptr::Ptr{Type})
+    if UInt(ptr) < UInt(4096)
+        return CorruptType
+    elseif UInt(ptr) == JL_BUFF_TAG
+        return BufferType
+    elseif UInt(ptr) == JL_GC_UNKNOWN_TYPE_TAG
+        return UnknownType
+    end
+    return unsafe_pointer_to_objref(ptr)
+end
+
+function decode_alloc(cache::BacktraceCache, raw_alloc::RawAlloc)::Alloc
+    Alloc(
+        load_type(raw_alloc.type),
+        stacktrace_memoized(cache, load_backtrace(raw_alloc.backtrace)),
+        UInt(raw_alloc.size),
+        raw_alloc.task,
+        raw_alloc.timestamp
+    )
+end
+
+function decode(raw_results::RawResults)::AllocResults
+    cache = BacktraceCache()
+    allocs = [
+        decode_alloc(cache, unsafe_load(raw_results.allocs, i))
+        for i in 1:raw_results.num_allocs
+    ]
+    return AllocResults(allocs)
+end
+
+function load_backtrace(trace::RawBacktrace)::Vector{BTElement}
+    out = Vector{BTElement}()
+    for i in 1:trace.size
+        push!(out, unsafe_load(trace.data, i))
+    end
+
+    return out
+end
+
+function stacktrace_memoized(
+    cache::BacktraceCache,
+    trace::Vector{BTElement},
+    c_funcs::Bool=true
+)::StackTrace
+    stack = StackTrace()
+    for ip in trace
+        frames = get(cache, ip) do
+            res = lookup(ip)
+            cache[ip] = res
+            return res
+        end
+        for frame in frames
+            # Skip frames that come from C calls.
+            if c_funcs || !frame.from_c
+                push!(stack, frame)
+            end
+        end
+    end
+    return stack
+end
+
+# Precompile once for the package cache.
+@assert precompile(start, ())
+@assert precompile(stop, ())
+@assert precompile(fetch, ())
+
+end
diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl
index a0047f0f7767ef..d3d5300c875278 100644
--- a/stdlib/Profile/src/Profile.jl
+++ b/stdlib/Profile/src/Profile.jl
@@ -7,6 +7,8 @@ module Profile
 
 import Base.StackTraces: lookup, UNKNOWN, show_spec_linfo, StackFrame
 
+const nmeta = 4 # number of metadata fields per block (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+
 # deprecated functions: use `getdict` instead
 lookup(ip::UInt) = lookup(convert(Ptr{Cvoid}, ip))
 
@@ -32,43 +34,118 @@ macro profile(ex)
     end
 end
 
+# triggers printing the report after a SIGINFO/SIGUSR1 profile request
+const PROFILE_PRINT_COND = Ref{Base.AsyncCondition}()
+function profile_printing_listener()
+    try
+        while true
+            wait(PROFILE_PRINT_COND[])
+            peek_report[]()
+        end
+    catch ex
+        if !isa(ex, InterruptException)
+            @error "Profile printing listener crashed" exception=ex,catch_backtrace()
+        end
+    end
+end
+
+# An internal function called to show the report after an information request (SIGINFO or SIGUSR1).
+function _peek_report()
+    iob = IOBuffer()
+    ioc = IOContext(IOContext(iob, stdout), :displaysize=>displaysize(stdout))
+    print(ioc, groupby = [:thread, :task])
+    Base.print(stdout, String(take!(iob)))
+end
+# This is a ref so that it can be overridden by other profile info consumers.
+const peek_report = Ref{Function}(_peek_report)
+
+"""
+    get_peek_duration()
+
+Get the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
+"""
+get_peek_duration() = ccall(:jl_get_profile_peek_duration, Float64, ())
+"""
+    set_peek_duration(t::Float64)
+
+Set the duration in seconds of the profile "peek" that is triggered via `SIGINFO` or `SIGUSR1`, depending on platform.
+"""
+set_peek_duration(t::Float64) = ccall(:jl_set_profile_peek_duration, Cvoid, (Float64,), t)
+
+precompile_script = """
+import Profile
+Profile.@profile while Profile.len_data() < 1000; rand(10,10) * rand(10,10); end
+Profile.peek_report[]()
+Profile.clear()
+"""
+
 ####
 #### User-level functions
 ####
 
 """
-    init(; n::Integer, delay::Real))
+    init(; n::Integer, delay::Real)
+
+Configure the `delay` between backtraces (measured in seconds), and the number `n` of instruction pointers that may be
+stored per thread. Each instruction pointer corresponds to a single line of code; backtraces generally consist of a long
+list of instruction pointers. Note that 6 spaces for instruction pointers per backtrace are used to store metadata and two
+NULL end markers. Current settings can be obtained by calling this function with no arguments, and each can be set independently
+using keywords or in the order `(n, delay)`.
 
-Configure the `delay` between backtraces (measured in seconds), and the number `n` of
-instruction pointers that may be stored. Each instruction pointer corresponds to a single
-line of code; backtraces generally consist of a long list of instruction pointers. Current
-settings can be obtained by calling this function with no arguments, and each can be set
-independently using keywords or in the order `(n, delay)`.
+!!! compat "Julia 1.8"
+    As of Julia 1.8, this function allocates space for `n` instruction pointers per thread being profiled.
+    Previously this was `n` total.
 """
-function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing)
+function init(; n::Union{Nothing,Integer} = nothing, delay::Union{Nothing,Real} = nothing, limitwarn::Bool = true)
     n_cur = ccall(:jl_profile_maxlen_data, Csize_t, ())
     delay_cur = ccall(:jl_profile_delay_nsec, UInt64, ())/10^9
     if n === nothing && delay === nothing
-        return Int(n_cur), delay_cur
+        nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
+        return round(Int, n_cur / nthreads), delay_cur
     end
     nnew = (n === nothing) ? n_cur : n
     delaynew = (delay === nothing) ? delay_cur : delay
-    init(nnew, delaynew)
+    init(nnew, delaynew; limitwarn)
 end
 
-function init(n::Integer, delay::Real)
-    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), n, round(UInt64,10^9*delay))
+function init(n::Integer, delay::Real; limitwarn::Bool = true)
+    nthreads = Sys.iswindows() ? 1 : Threads.nthreads() # windows only profiles the main thread
+    sample_size_bytes = sizeof(Ptr) # == Sys.WORD_SIZE / 8
+    buffer_samples = n * nthreads
+    buffer_size_bytes = buffer_samples * sample_size_bytes
+    if buffer_size_bytes > 2^29 && Sys.WORD_SIZE == 32
+        buffer_size_bytes_per_thread = floor(Int, 2^29 / nthreads)
+        buffer_samples_per_thread = floor(Int, buffer_size_bytes_per_thread / sample_size_bytes)
+        buffer_samples = buffer_samples_per_thread * nthreads
+        buffer_size_bytes = buffer_samples * sample_size_bytes
+        limitwarn && @warn "Requested profile buffer limited to 512MB (n = $buffer_samples_per_thread per thread) given that this system is 32-bit"
+    end
+    status = ccall(:jl_profile_init, Cint, (Csize_t, UInt64), buffer_samples, round(UInt64,10^9*delay))
     if status == -1
-        error("could not allocate space for ", n, " instruction pointers")
+        error("could not allocate space for ", n, " instruction pointers per thread being profiled ($nthreads threads, $(Base.format_bytes(buffer_size_bytes)) total)")
     end
 end
 
-# init with default values
-# Use a max size of 1M profile samples, and fire timer every 1ms
-if Sys.iswindows()
-    __init__() = init(1_000_000, 0.01)
-else
-    __init__() = init(1_000_000, 0.001)
+function __init__()
+    # init with default values
+    # Use a max size of 10M profile samples, and fire timer every 1ms
+    # (that should typically give around 100 seconds of record)
+    @static if Sys.iswindows() && Sys.WORD_SIZE == 32
+        # The Win32 unwinder is 1000x slower than elsewhere (around 1ms/frame),
+        # so we don't want to slow the program down by quite that much
+        n = 1_000_000
+        delay = 0.01
+    else
+        n = 10_000_000
+        delay = 0.001
+    end
+    init(n, delay, limitwarn = false)
+    @static if !Sys.iswindows()
+        # triggering a profile via signals is not implemented on windows
+        PROFILE_PRINT_COND[] = Base.AsyncCondition()
+        ccall(:jl_set_peek_cond, Cvoid, (Ptr{Cvoid},), PROFILE_PRINT_COND[].handle)
+        errormonitor(Threads.@spawn(profile_printing_listener()))
+    end
 end
 
 """
@@ -101,8 +178,14 @@ struct ProfileFormat
     end
 end
 
+# offsets of the metadata in the data stream
+const META_OFFSET_SLEEPSTATE = 2
+const META_OFFSET_CPUCYCLECLOCK = 3
+const META_OFFSET_TASKID = 4
+const META_OFFSET_THREADID = 5
+
 """
-    print([io::IO = stdout,] [data::Vector]; kwargs...)
+    print([io::IO = stdout,] [data::Vector = fetch()], [lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data)]; kwargs...)
 
 Prints profiling results to `io` (by default, `stdout`). If you do not
 supply a `data` vector, the internal buffer of accumulated backtraces
@@ -123,6 +206,9 @@ The keyword arguments can be any combination of:
     line, `:count` sorts in order of number of collected samples, and `:overhead` sorts by the number of samples
     incurred by each function by itself.
 
+ - `groupby` -- Controls grouping over tasks and threads, or no grouping. Options are `:none` (default), `:thread`, `:task`,
+    `[:thread, :task]`, or `[:task, :thread]` where the last two provide nested grouping.
+
  - `noisefloor` -- Limits frames that exceed the heuristic noise floor of the sample (only applies to format `:tree`).
     A suggested value to try for this is 2.0 (the default is 0). This parameter hides samples for which `n <= noisefloor * √N`,
     where `n` is the number of samples on this line, and `N` is the number of samples for the callee.
@@ -132,6 +218,12 @@ The keyword arguments can be any combination of:
  - `recur` -- Controls the recursion handling in `:tree` format. `:off` (default) prints the tree as normal. `:flat` instead
     compresses any recursion (by ip), showing the approximate effect of converting any self-recursion into an iterator.
     `:flatc` does the same but also includes collapsing of C frames (may do odd things around `jl_apply`).
+
+ - `threads::Union{Int,AbstractVector{Int}}` -- Specify which threads to include snapshots from in the report. Note that
+    this does not control which threads samples are collected on.
+
+ - `tasks::Union{Int,AbstractVector{Int}}` -- Specify which tasks to include snapshots from in the report. Note that this
+    does not control which tasks samples are collected within.
 """
 function print(io::IO,
         data::Vector{<:Unsigned} = fetch(),
@@ -144,30 +236,75 @@ function print(io::IO,
         mincount::Int = 0,
         noisefloor = 0,
         sortedby::Symbol = :filefuncline,
-        recur::Symbol = :off)
-    print(io, data, lidict, ProfileFormat(
-            C = C,
-            combine = combine,
-            maxdepth = maxdepth,
-            mincount = mincount,
-            noisefloor = noisefloor,
-            sortedby = sortedby,
-            recur = recur),
-        format)
-end
-
-function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat, format::Symbol)
-    cols::Int = Base.displaysize(io)[2]
-    data = convert(Vector{UInt64}, data)
-    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
-    if format === :tree
-        tree(io, data, lidict, cols, fmt)
-    elseif format === :flat
-        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
-        flat(io, data, lidict, cols, fmt)
+        groupby::Union{Symbol,AbstractVector{Symbol}} = :none,
+        recur::Symbol = :off,
+        threads::Union{Int,AbstractVector{Int}} = 1:Threads.nthreads(),
+        tasks::Union{UInt,AbstractVector{UInt}} = typemin(UInt):typemax(UInt))
+
+    pf = ProfileFormat(;C, combine, maxdepth, mincount, noisefloor, sortedby, recur)
+    if groupby == :none
+        print(io, data, lidict, pf, format, threads, tasks, false)
     else
-        throw(ArgumentError("output format $(repr(format)) not recognized"))
+        if !in(groupby, [:thread, :task, [:task, :thread], [:thread, :task]])
+            error(ArgumentError("Unrecognized groupby option: $groupby. Options are :none (default), :task, :thread, [:task, :thread], or [:thread, :task]"))
+        elseif Sys.iswindows() && in(groupby, [:thread, [:task, :thread], [:thread, :task]])
+            @warn "Profiling on windows is limited to the main thread. Other threads have not been sampled and will not show in the report"
+        end
+        any_nosamples = false
+        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
+        println(io, "=========================================================")
+        if groupby == [:task, :thread]
+            for taskid in intersect(get_task_ids(data), tasks)
+                threadids = intersect(get_thread_ids(data, taskid), threads)
+                if length(threadids) == 0
+                    any_nosamples = true
+                else
+                    nl = length(threadids) > 1 ? "\n" : ""
+                    printstyled(io, "Task $(Base.repr(taskid))$nl"; bold=true, color=Base.debug_color())
+                    for threadid in threadids
+                        printstyled(io, " Thread $threadid "; bold=true, color=Base.info_color())
+                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples && (any_nosamples = true)
+                        println(io)
+                    end
+                end
+            end
+        elseif groupby == [:thread, :task]
+            for threadid in intersect(get_thread_ids(data), threads)
+                taskids = intersect(get_task_ids(data, threadid), tasks)
+                if length(taskids) == 0
+                    any_nosamples = true
+                else
+                    nl = length(taskids) > 1 ? "\n" : ""
+                    printstyled(io, "Thread $threadid$nl"; bold=true, color=Base.info_color())
+                    for taskid in taskids
+                        printstyled(io, " Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
+                        nosamples = print(io, data, lidict, pf, format, threadid, taskid, true)
+                        nosamples && (any_nosamples = true)
+                        println(io)
+                    end
+                end
+            end
+        elseif groupby == :task
+            threads = 1:typemax(Int)
+            for taskid in intersect(get_task_ids(data), tasks)
+                printstyled(io, "Task $(Base.repr(taskid)) "; bold=true, color=Base.debug_color())
+                nosamples = print(io, data, lidict, pf, format, threads, taskid, true)
+                nosamples && (any_nosamples = true)
+                println(io)
+            end
+        elseif groupby == :thread
+            tasks = 1:typemax(UInt)
+            for threadid in intersect(get_thread_ids(data), threads)
+                printstyled(io, "Thread $threadid "; bold=true, color=Base.info_color())
+                nosamples = print(io, data, lidict, pf, format, threadid, tasks, true)
+                nosamples && (any_nosamples = true)
+                println(io)
+            end
+        end
+        any_nosamples && warning_empty(summary = true)
     end
+    return
 end
 
 """
@@ -182,34 +319,120 @@ See `Profile.print([io], data)` for an explanation of the valid keyword argument
 print(data::Vector{<:Unsigned} = fetch(), lidict::Union{LineInfoDict, LineInfoFlatDict} = getdict(data); kwargs...) =
     print(stdout, data, lidict; kwargs...)
 
+function print(io::IO, data::Vector{<:Unsigned}, lidict::Union{LineInfoDict, LineInfoFlatDict}, fmt::ProfileFormat,
+                format::Symbol, threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}},
+                is_subsection::Bool = false)
+    cols::Int = Base.displaysize(io)[2]
+    data = convert(Vector{UInt64}, data)
+    fmt.recur ∈ (:off, :flat, :flatc) || throw(ArgumentError("recur value not recognized"))
+    if format === :tree
+        nosamples = tree(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
+        return nosamples
+    elseif format === :flat
+        fmt.recur === :off || throw(ArgumentError("format flat only implements recur=:off"))
+        nosamples = flat(io, data, lidict, cols, fmt, threads, tasks, is_subsection)
+        return nosamples
+    else
+        throw(ArgumentError("output format $(repr(format)) not recognized"))
+    end
+end
+
+function get_task_ids(data::Vector{<:Unsigned}, threadid = nothing)
+    taskids = UInt[]
+    for i in length(data):-1:1
+        if is_block_end(data, i)
+            if isnothing(threadid) || data[i - META_OFFSET_THREADID] == threadid
+                taskid = data[i - META_OFFSET_TASKID]
+                !in(taskid, taskids) && push!(taskids, taskid)
+            end
+        end
+    end
+    return taskids
+end
+
+function get_thread_ids(data::Vector{<:Unsigned}, taskid = nothing)
+    threadids = Int[]
+    for i in length(data):-1:1
+        if is_block_end(data, i)
+            if isnothing(taskid) || data[i - META_OFFSET_TASKID] == taskid
+                threadid = data[i - META_OFFSET_THREADID]
+                !in(threadid, threadids) && push!(threadids, threadid)
+            end
+        end
+    end
+    return sort(threadids)
+end
+
+function is_block_end(data, i)
+    i < nmeta + 1 && return false
+    # 32-bit linux has been seen to have rogue NULL ips, so we use two to
+    # indicate block end, where the 2nd is the actual end index.
+    # and we could have (though very unlikely):
+    # 1:<stack><metadata><null><null><NULL><metadata><null><null>:end
+    # and we want to ignore the triple NULL (which is an ip).
+    return data[i] == 0 && data[i - 1] == 0 && data[i - META_OFFSET_SLEEPSTATE] != 0
+end
+
+function has_meta(data)
+    for i in 6:length(data)
+        data[i] == 0 || continue            # first block end null
+        data[i - 1] == 0 || continue        # second block end null
+        data[i - META_OFFSET_SLEEPSTATE] in 1:2 || continue
+        data[i - META_OFFSET_CPUCYCLECLOCK] != 0 || continue
+        data[i - META_OFFSET_TASKID] != 0 || continue
+        data[i - META_OFFSET_THREADID] != 0 || continue
+        return true
+    end
+    return false
+end
+
 """
-    retrieve() -> data, lidict
+    retrieve(; kwargs...) -> data, lidict
 
 "Exports" profiling results in a portable format, returning the set of all backtraces
 (`data`) and a dictionary that maps the (session-specific) instruction pointers in `data` to
 `LineInfo` values that store the file name, function name, and line number. This function
 allows you to save profiling results for future analysis.
 """
-function retrieve()
-    data = fetch()
+function retrieve(; kwargs...)
+    data = fetch(; kwargs...)
     return (data, getdict(data))
 end
 
 function getdict(data::Vector{UInt})
-    # Lookup is expensive, so do it only once per ip.
-    udata = unique(data)
     dict = LineInfoDict()
-    for ip in udata
-        st = lookup(convert(Ptr{Cvoid}, ip))
-        # To correct line numbers for moving code, put it in the form expected by
-        # Base.update_stackframes_callback[]
-        stn = map(x->(x, 1), st)
-        try Base.invokelatest(Base.update_stackframes_callback[], stn) catch end
-        dict[UInt64(ip)] = map(first, stn)
+    return getdict!(dict, data)
+end
+
+function getdict!(dict::LineInfoDict, data::Vector{UInt})
+    # we don't want metadata here as we're just looking up ips
+    unique_ips = unique(has_meta(data) ? strip_meta(data) : data)
+    n_unique_ips = length(unique_ips)
+    n_unique_ips == 0 && return dict
+    iplookups = similar(unique_ips, Vector{StackFrame})
+    @sync for indexes_part in Iterators.partition(eachindex(unique_ips), div(n_unique_ips, Threads.nthreads(), RoundUp))
+        Threads.@spawn begin
+            for i in indexes_part
+                iplookups[i] = _lookup_corrected(unique_ips[i])
+            end
+        end
+    end
+    for i in eachindex(unique_ips)
+        dict[unique_ips[i]] = iplookups[i]
     end
     return dict
 end
 
+function _lookup_corrected(ip::UInt)
+    st = lookup(convert(Ptr{Cvoid}, ip))
+    # To correct line numbers for moving code, put it in the form expected by
+    # Base.update_stackframes_callback[]
+    stn = map(x->(x, 1), st)
+    # Note: Base.update_stackframes_callback[] should be data-race free
+    try Base.invokelatest(Base.update_stackframes_callback[], stn) catch end
+    return map(first, stn)
+end
+
 """
     flatten(btdata::Vector, lidict::LineInfoDict) -> (newdata::Vector{UInt64}, newdict::LineInfoFlatDict)
 
@@ -271,7 +494,7 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                     for proj in Base.project_names
                         project_file = joinpath(root, proj)
                         if Base.isfile_casesensitive(project_file)
-                            pkgid = Base.project_file_name_uuid(project_file, "", Base.TOMLCache())
+                            pkgid = Base.project_file_name_uuid(project_file, "")
                             isempty(pkgid.name) && return path # bad Project file
                             # return the joined the module name prefix and path suffix
                             path = path[nextind(path, sizeof(root)):end]
@@ -281,7 +504,7 @@ function short_path(spath::Symbol, filenamecache::Dict{Symbol, String})
                 end
             end
             return path
-        elseif isfile(joinpath(Sys.BINDIR::String, Base.DATAROOTDIR, "julia", "base", path))
+        elseif isfile(joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "base", path))
             # do the same mechanic for Base (or Core/Compiler) files as above,
             # but they start from a relative path
             return joinpath("@Base", normpath(path))
@@ -350,6 +573,8 @@ stop_timer() = ccall(:jl_profile_stop_timer, Cvoid, ())
 
 is_running() = ccall(:jl_profile_is_running, Cint, ())!=0
 
+is_buffer_full() = ccall(:jl_profile_is_buffer_full, Cint, ())!=0
+
 get_data_pointer() = convert(Ptr{UInt}, ccall(:jl_profile_get_data, Ptr{UInt8}, ()))
 
 len_data() = convert(Int, ccall(:jl_profile_len_data, Csize_t, ()))
@@ -364,31 +589,78 @@ error_codes = Dict(
 
 
 """
-    fetch() -> data
+    fetch(;include_meta = true) -> data
 
 Returns a copy of the buffer of profile backtraces. Note that the
 values in `data` have meaning only on this machine in the current session, because it
 depends on the exact memory addresses used in JIT-compiling. This function is primarily for
 internal use; [`retrieve`](@ref) may be a better choice for most users.
+By default metadata such as threadid and taskid is included. Set `include_meta` to `false` to strip metadata.
 """
-function fetch()
+function fetch(;include_meta = true, limitwarn = true)
     maxlen = maxlen_data()
     len = len_data()
-    if (len == maxlen)
+    if limitwarn && is_buffer_full()
         @warn """The profile data buffer is full; profiling probably terminated
                  before your program finished. To profile for longer runs, call
                  `Profile.init()` with a larger buffer and/or larger delay."""
     end
     data = Vector{UInt}(undef, len)
     GC.@preserve data unsafe_copyto!(pointer(data), get_data_pointer(), len)
-    return data
+    if include_meta || isempty(data)
+        return data
+    end
+    return strip_meta(data)
+end
+
+function strip_meta(data)
+    nblocks = count(Base.Fix1(is_block_end, data), eachindex(data))
+    data_stripped = Vector{UInt}(undef, length(data) - (nblocks * (nmeta + 1)))
+    j = length(data_stripped)
+    i = length(data)
+    while i > 0 && j > 0
+        data_stripped[j] = data[i]
+        if is_block_end(data, i)
+            i -= (nmeta + 1) # metadata fields and the extra NULL IP
+        end
+        i -= 1
+        j -= 1
+    end
+    @assert i == j == 0 "metadata stripping failed"
+    return data_stripped
 end
 
+"""
+    Profile.add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0) -> data_with_meta
+
+The converse of `Profile.fetch(;include_meta = false)`; this will add fake metadata, and can be used
+for compatibility and by packages (e.g., FlameGraphs.jl) that would rather not depend on the internal
+details of the metadata format.
+"""
+function add_fake_meta(data; threadid = 1, taskid = 0xf0f0f0f0)
+    threadid == 0 && error("Fake threadid cannot be 0")
+    taskid == 0 && error("Fake taskid cannot be 0")
+    !isempty(data) && has_meta(data) && error("input already has metadata")
+    cpu_clock_cycle = UInt64(99)
+    data_with_meta = similar(data, 0)
+    for i = 1:length(data)
+        val = data[i]
+        if iszero(val)
+            # (threadid, taskid, cpu_cycle_clock, thread_sleeping)
+            push!(data_with_meta, threadid, taskid, cpu_clock_cycle+=1, false+1, 0, 0)
+        else
+            push!(data_with_meta, val)
+        end
+    end
+    return data_with_meta
+end
 
 ## Print as a flat list
 # Counts the number of times each line appears, at any nesting level and at the topmost level
 # Merging multiple equivalent entries and recursive calls
-function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool) where {T}
+function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, C::Bool,
+                    threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}) where {T}
+    !isempty(data) && !has_meta(data) && error("Profile data is missing required metadata")
     lilist = StackFrame[]
     n = Int[]
     m = Int[]
@@ -396,44 +668,71 @@ function parse_flat(::Type{T}, data::Vector{UInt64}, lidict::Union{LineInfoDict,
     recursive = Set{T}()
     first = true
     totalshots = 0
-    for ip in data
-        if ip == 0
+    startframe = length(data)
+    skip = false
+    nsleeping = 0
+    for i in startframe:-1:1
+        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (its read ahead below) and extra block end NULL IP
+        ip = data[i]
+        if is_block_end(data, i)
+            # read metadata
+            thread_sleeping = data[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            # cpu_cycle_clock = data[i - META_OFFSET_CPUCYCLECLOCK]
+            taskid = data[i - META_OFFSET_TASKID]
+            threadid = data[i - META_OFFSET_THREADID]
+            if !in(threadid, threads) || !in(taskid, tasks)
+                skip = true
+                continue
+            end
+            if thread_sleeping == 1
+                nsleeping += 1
+            end
+            skip = false
             totalshots += 1
             empty!(recursive)
             first = true
-            continue
-        end
-        frames = lidict[ip]
-        nframes = (frames isa Vector ? length(frames) : 1)
-        for i = 1:nframes
-            frame = (frames isa Vector ? frames[i] : frames)
-            !C && frame.from_c && continue
-            key = (T === UInt64 ? ip : frame)
-            idx = get!(lilist_idx, key, length(lilist) + 1)
-            if idx > length(lilist)
-                push!(recursive, key)
-                push!(lilist, frame)
-                push!(n, 1)
-                push!(m, 0)
-            elseif !(key in recursive)
-                push!(recursive, key)
-                n[idx] += 1
-            end
-            if first
-                m[idx] += 1
-                first = false
+            startframe = i
+        elseif !skip
+            frames = lidict[ip]
+            nframes = (frames isa Vector ? length(frames) : 1)
+            for j = 1:nframes
+                frame = (frames isa Vector ? frames[j] : frames)
+                !C && frame.from_c && continue
+                key = (T === UInt64 ? ip : frame)
+                idx = get!(lilist_idx, key, length(lilist) + 1)
+                if idx > length(lilist)
+                    push!(recursive, key)
+                    push!(lilist, frame)
+                    push!(n, 1)
+                    push!(m, 0)
+                elseif !(key in recursive)
+                    push!(recursive, key)
+                    n[idx] += 1
+                end
+                if first
+                    m[idx] += 1
+                    first = false
+                end
             end
         end
     end
     @assert length(lilist) == length(n) == length(m) == length(lilist_idx)
-    return (lilist, n, m, totalshots)
+    return (lilist, n, m, totalshots, nsleeping)
 end
 
-function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat)
-    lilist, n, m, totalshots = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C)
+function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfoFlatDict}, cols::Int, fmt::ProfileFormat,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
+    lilist, n, m, totalshots, nsleeping = parse_flat(fmt.combine ? StackFrame : UInt64, data, lidict, fmt.C, threads, tasks)
+    util_perc = (1 - (nsleeping / totalshots)) * 100
     if isempty(lilist)
-        warning_empty()
-        return
+        if is_subsection
+            Base.print(io, "Total snapshots: ")
+            printstyled(io, "$(totalshots)", color=Base.warn_color())
+            Base.println(io, " (", round(Int, util_perc), "% utilization)")
+        else
+            warning_empty()
+        end
+        return true
     end
     if false # optional: drop the "non-interpretable" ones
         keep = map(frame -> frame != UNKNOWN && frame.line != 0, lilist)
@@ -443,8 +742,13 @@ function flat(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoDict, LineInfo
     end
     filenamemap = Dict{Symbol,String}()
     print_flat(io, lilist, n, m, cols, filenamemap, fmt)
-    Base.println(io, "Total snapshots: ", totalshots)
-    nothing
+    Base.print(io, "Total snapshots: ", totalshots, " (", round(Int, util_perc), "% utilization")
+    if is_subsection
+        println(io, ")")
+    else
+        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task)")
+    end
+    return false
 end
 
 function print_flat(io::IO, lilist::Vector{StackFrame},
@@ -607,14 +911,33 @@ function tree_format(frames::Vector{<:StackFrameTree}, level::Int, cols::Int, ma
 end
 
 # turn a list of backtraces into a tree (implicitly separated by NULL markers)
-function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol) where {T}
+function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, C::Bool, recur::Symbol,
+                threads::Union{Int,AbstractVector{Int},Nothing}=nothing, tasks::Union{UInt,AbstractVector{UInt},Nothing}=nothing) where {T}
+    !isempty(all) && !has_meta(all) && error("Profile data is missing required metadata")
     parent = root
     tops = Vector{StackFrameTree{T}}()
     build = Vector{StackFrameTree{T}}()
     startframe = length(all)
+    skip = false
+    nsleeping = 0
     for i in startframe:-1:1
+        (startframe - 1) >= i >= (startframe - (nmeta + 1)) && continue # skip metadata (it's read ahead below) and extra block end NULL IP
         ip = all[i]
-        if ip == 0
+        if is_block_end(all, i)
+            # read metadata
+            thread_sleeping = all[i - META_OFFSET_SLEEPSTATE] - 1 # subtract 1 as state is incremented to avoid being equal to 0
+            # cpu_cycle_clock = all[i - META_OFFSET_CPUCYCLECLOCK]
+            taskid = all[i - META_OFFSET_TASKID]
+            threadid = all[i - META_OFFSET_THREADID]
+            if (threads !== nothing && !in(threadid, threads)) ||
+               (tasks !== nothing && !in(taskid, tasks))
+                skip = true
+                continue
+            end
+            if thread_sleeping == 1
+                nsleeping += 1
+            end
+            skip = false
             # sentinel value indicates the start of a new backtrace
             empty!(build)
             root.recur = 0
@@ -641,7 +964,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
             parent = root
             root.count += 1
             startframe = i
-        else
+        elseif !skip
             pushfirst!(build, parent)
             if recur === :flat || recur === :flatc
                 # Rewind the `parent` tree back, if this exact ip was already present *higher* in the current tree
@@ -682,6 +1005,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
                 parent = this
                 continue
             end
+
             frames = lidict[ip]
             nframes = (frames isa Vector ? length(frames) : 1)
             this = parent
@@ -716,7 +1040,7 @@ function tree!(root::StackFrameTree{T}, all::Vector{UInt64}, lidict::Union{LineI
         nothing
     end
     cleanup!(root)
-    return root
+    return root, nsleeping
 end
 
 function maxstats(root::StackFrameTree)
@@ -738,12 +1062,14 @@ end
 
 # Print the stack frame tree starting at a particular root. Uses a worklist to
 # avoid stack overflows.
-function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat) where T
+function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat, is_subsection::Bool) where T
     maxes = maxstats(bt)
     filenamemap = Dict{Symbol,String}()
     worklist = [(bt, 0, 0, "")]
-    println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
-    println(io, "=========================================================")
+    if !is_subsection
+        println(io, "Overhead ╎ [+additional indent] Count File:Line; Function")
+        println(io, "=========================================================")
+    end
     while !isempty(worklist)
         (bt, level, noisefloor, str) = popfirst!(worklist)
         isempty(str) || println(io, str)
@@ -777,21 +1103,37 @@ function print_tree(io::IO, bt::StackFrameTree{T}, cols::Int, fmt::ProfileFormat
             pushfirst!(worklist, (down, level + 1, noisefloor_down, str))
         end
     end
+    return
 end
 
-function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat)
+function tree(io::IO, data::Vector{UInt64}, lidict::Union{LineInfoFlatDict, LineInfoDict}, cols::Int, fmt::ProfileFormat,
+                threads::Union{Int,AbstractVector{Int}}, tasks::Union{UInt,AbstractVector{UInt}}, is_subsection::Bool)
     if fmt.combine
-        root = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur)
+        root, nsleeping = tree!(StackFrameTree{StackFrame}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     else
-        root = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur)
+        root, nsleeping = tree!(StackFrameTree{UInt64}(), data, lidict, fmt.C, fmt.recur, threads, tasks)
     end
+    util_perc = (1 - (nsleeping / root.count)) * 100
+    !is_subsection && print_tree(io, root, cols, fmt, is_subsection)
     if isempty(root.down)
-        warning_empty()
-        return
+        if is_subsection
+            Base.print(io, "Total snapshots: ")
+            printstyled(io, "$(root.count)", color=Base.warn_color())
+            Base.println(io, ". Utilization: ", round(Int, util_perc), "%")
+        else
+            warning_empty()
+        end
+        return true
+    else
+        Base.print(io, "Total snapshots: ", root.count, ". Utilization: ", round(Int, util_perc), "%")
     end
-    print_tree(io, root, cols, fmt)
-    Base.println(io, "Total snapshots: ", root.count)
-    nothing
+    if is_subsection
+        println(io)
+        print_tree(io, root, cols, fmt, is_subsection)
+    else
+        println(io, " across all threads and tasks. Use the `groupby` kwarg to break down by thread and/or task")
+    end
+    return false
 end
 
 function callersf(matchfunc::Function, bt::Vector, lidict::LineInfoFlatDict)
@@ -855,9 +1197,21 @@ function liperm(lilist::Vector{StackFrame})
     return sortperm(lilist, lt = lt)
 end
 
-warning_empty() = @warn """
-            There were no samples collected. Run your program longer (perhaps by
-            running it multiple times), or adjust the delay between samples with
-            `Profile.init()`."""
+function warning_empty(;summary = false)
+    if summary
+        @warn """
+        There were no samples collected in one or more groups.
+        This may be due to idle threads, or you may need to run your
+        program longer (perhaps by running it multiple times),
+        or adjust the delay between samples with `Profile.init()`."""
+    else
+        @warn """
+        There were no samples collected.
+        Run your program longer (perhaps by running it multiple times),
+        or adjust the delay between samples with `Profile.init()`."""
+    end
+end
+
+include("Allocs.jl")
 
 end # module
diff --git a/stdlib/Profile/test/allocs.jl b/stdlib/Profile/test/allocs.jl
new file mode 100644
index 00000000000000..b8d6222d075674
--- /dev/null
+++ b/stdlib/Profile/test/allocs.jl
@@ -0,0 +1,122 @@
+using Test
+using Profile: Allocs
+
+@testset "alloc profiler doesn't segfault" begin
+    res = Allocs.@profile sample_rate=1.0 begin
+        # test the allocations during compilation
+        using Base64
+    end
+    profile = Allocs.fetch()
+
+    @test length(profile.allocs) > 0
+    first_alloc = profile.allocs[1]
+    @test first_alloc.size > 0
+    @test length(first_alloc.stacktrace) > 0
+    @test length(string(first_alloc.type)) > 0
+end
+
+@testset "alloc profiler works when there are multiple tasks on multiple threads" begin
+    NUM_TASKS = 1000
+
+    # This test is only really meaningful if we're running on
+    # multiple threads, but this isn't true on the windows tests,
+    # causing them to fail. So, commenting this assertion out.
+    # @test Threads.nthreads() > 1
+
+    function do_work()
+        ch = Channel{Vector{Float64}}(Inf)
+        @sync for i in 1:NUM_TASKS
+            Threads.@spawn begin
+                # generate garbage
+                put!(ch, zeros(100))
+            end
+        end
+        close(ch)
+    end
+
+    # call once to make sure it's compiled
+    precompile(do_work, ())
+    do_work()
+
+    res = Allocs.@profile sample_rate=1 begin
+        do_work()
+    end
+    profile = Allocs.fetch()
+
+    # expecting at least 2 allocations per task:
+    # 1. the task
+    # 2. the vector
+    @test length(profile.allocs) >= 2*NUM_TASKS
+    first_alloc = profile.allocs[1]
+    @test first_alloc.size > 0
+    @test length(first_alloc.stacktrace) > 0
+    @test length(string(first_alloc.type)) > 0
+
+    @testset for type in (Task, Vector{Float64},)
+        @test length(filter(a->a.type <: type, profile.allocs)) >= NUM_TASKS
+    end
+
+    # TODO: it would be nice to assert that these tasks
+    # were actually scheduled onto multiple threads,
+    # and we see allocs from all threads in the profile
+end
+
+@testset "alloc profiler start stop fetch clear" begin
+    function do_work()
+        # Compiling allocates a lot
+        for f in (gensym() for _ in 1:10)
+            @eval begin
+                $f() = 10
+                $f()
+            end
+        end
+    end
+
+    Allocs.@profile sample_rate=1 do_work()
+    @test length(Allocs.fetch().allocs) > 10
+
+    Allocs.clear()
+    @test length(Allocs.fetch().allocs) == 0
+    Allocs.clear()
+    @test length(Allocs.fetch().allocs) == 0
+
+    Allocs.@profile sample_rate=1 do_work()
+    curr_allocs = length(Allocs.fetch().allocs)
+    @test curr_allocs > 10
+
+    # Do _more_ work, adding into the same profile
+    Allocs.@profile sample_rate=1 do_work()
+    @test length(Allocs.fetch().allocs) > curr_allocs
+
+    Allocs.clear()
+    @test length(Allocs.fetch().allocs) == 0
+
+    # Clear without fetching
+
+    Allocs.@profile sample_rate=1 do_work()
+    Allocs.clear()
+    @test length(Allocs.fetch().allocs) == 0
+
+    # And things still work like normal afterwards
+
+    Allocs.@profile sample_rate=1 do_work()
+    Allocs.@profile sample_rate=1 do_work()
+    Allocs.@profile sample_rate=1 do_work()
+    @test length(Allocs.fetch().allocs) > 10
+
+    Allocs.@profile sample_rate=1 do_work()
+    Allocs.@profile sample_rate=1 do_work()
+    @test length(Allocs.fetch().allocs) > 10
+
+    Allocs.clear()
+end
+
+@testset "alloc profiler catches strings" begin
+    Allocs.@profile sample_rate=1 "$(rand())"
+
+    prof = Allocs.fetch()
+    Allocs.clear()
+
+    @test length(prof.allocs) >= 1
+    @test length([a for a in prof.allocs if a.type == String]) >= 1
+end
diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl
index 007492745ca470..6ad05a6b707cb2 100644
--- a/stdlib/Profile/test/runtests.jl
+++ b/stdlib/Profile/test/runtests.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Profile, Serialization
+using Test, Profile, Serialization, Logging
+using Base.StackTraces: StackFrame
 
 Profile.clear()
 Profile.init()
@@ -8,7 +9,7 @@ Profile.init()
 let iobuf = IOBuffer()
     for fmt in (:tree, :flat)
         Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, format=fmt, C=true)
-        Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, [0x0000000000000001], Dict(0x0000000000000001 => [Base.StackTraces.UNKNOWN]), format=fmt, C=false)
+        Test.@test_logs (:warn, r"^There were no samples collected\.") Profile.print(iobuf, Profile.add_fake_meta([0x0000000000000001, 0x0000000000000000]), Dict(0x0000000000000001 => [Base.StackTraces.UNKNOWN]), format=fmt, C=false)
     end
 end
 
@@ -59,6 +60,38 @@ let iobuf = IOBuffer()
     truncate(iobuf, 0)
 end
 
+@testset "Profile.print() groupby options" begin
+    iobuf = IOBuffer()
+    with_logger(NullLogger()) do
+        @testset for format in [:flat, :tree]
+            @testset for threads in Any[1:Threads.nthreads(), 1, 1:1, 1:2, [1,2]]
+                @testset for groupby in [:none, :thread, :task, [:thread, :task], [:task, :thread]]
+                    Profile.print(iobuf; groupby, threads, format)
+                    @test !isempty(String(take!(iobuf)))
+                end
+            end
+        end
+    end
+end
+
+@testset "Profile.fetch() with and without meta" begin
+    data_without = Profile.fetch(include_meta = false)
+    data_with = Profile.fetch()
+    @test data_without[1] == data_with[1]
+    @test data_without[end] == data_with[end]
+    nblocks = count(Base.Fix1(Profile.is_block_end, data_with), eachindex(data_with))
+    @test length(data_without) == length(data_with) - nblocks * (Profile.nmeta + 1)
+
+    data_with_fake = Profile.add_fake_meta(data_without)
+    @test_throws "input already has metadata" Profile.add_fake_meta(data_with)
+    data_stripped = Profile.strip_meta(data_with_fake)
+    @test data_stripped == data_without
+    # ideally the test below would be a test for equality, but real sample ips can be nulls, and thus
+    # adding metadata back in can convert those ips to new block ends, and the length is then longer
+    @test length(data_with_fake) >= length(data_with)
+
+end
+
 Profile.clear()
 @test isempty(Profile.fetch())
 
@@ -84,14 +117,34 @@ end
 
 @testset "setting sample count and delay in init" begin
     n_, delay_ = Profile.init()
-    @test n_ == 1_000_000
-    def_delay = Sys.iswindows() ? 0.01 : 0.001
+    n_original = n_
+    nthreads = Sys.iswindows() ? 1 : Threads.nthreads()
+    sample_size_bytes = sizeof(Ptr)
+    def_n = Sys.iswindows() && Sys.WORD_SIZE == 32 ? 1_000_000 : 10_000_000
+    if Sys.WORD_SIZE == 32 && (def_n * nthreads * sample_size_bytes) > 2^29
+        @test n_ * nthreads * sample_size_bytes <= 2^29
+    else
+        @test n_ == def_n
+    end
+
+    def_delay = Sys.iswindows() && Sys.WORD_SIZE == 32 ? 0.01 : 0.001
     @test delay_ == def_delay
     Profile.init(n=1_000_001, delay=0.0005)
     n_, delay_ = Profile.init()
-    @test n_ == 1_000_001
+    if Sys.WORD_SIZE == 32 && (1_000_001 * nthreads * sample_size_bytes) > 2^29
+        @test n_ * nthreads * sample_size_bytes <= 2^29
+    else
+        @test n_ == 1_000_001
+    end
     @test delay_ == 0.0005
-    Profile.init(n=1_000_000, delay=def_delay)
+    Profile.init(n=n_original, delay=def_delay)
+end
+
+@testset "warning for buffer full" begin
+    n_, delay_ = Profile.init()
+    Profile.init(n=17)
+    @test_logs (:warn, r"The profile data buffer is full") Profile.fetch()
+    Profile.init(n=n_, delay=delay_)
 end
 
 @testset "Line number correction" begin
@@ -117,7 +170,11 @@ let cmd = Base.julia_cmd()
     script = """
         using Profile
         f(::Val) = GC.safepoint()
-        @profile for i = 1:10^3; f(Val(i)); end
+        @profile for i = 1:10^3
+            println(i)
+            f(Val(i))
+        end
+        println("done")
         print(Profile.len_data())
         """
     p = open(`$cmd -e $script`)
@@ -131,5 +188,86 @@ let cmd = Base.julia_cmd()
     s = read(p, String)
     close(t)
     @test success(p)
-    @test parse(Int, s) > 1000
+    @test !isempty(s)
+    @test occursin("done", s)
+    @test parse(Int, split(s, '\n')[end]) > 100
+end
+
+if Sys.isbsd() || Sys.islinux()
+    @testset "SIGINFO/SIGUSR1 profile triggering" begin
+        let cmd = Base.julia_cmd()
+            script = """
+                x = rand(1000, 1000)
+                println("started")
+                while true
+                    x * x
+                    yield()
+                end
+                """
+            iob = Base.BufferStream()
+            p = run(pipeline(`$cmd -e $script`, stderr = devnull, stdout = iob), wait = false)
+            t = Timer(120) do t
+                # should be under 10 seconds, so give it 2 minutes then report failure
+                println("KILLING BY PROFILE TEST WATCHDOG\n")
+                kill(p, Base.SIGTERM)
+                sleep(10)
+                kill(p, Base.SIGKILL)
+                close(iob)
+            end
+            try
+                s = readuntil(iob, "started", keep = true)
+                @assert occursin("started", s)
+                @assert process_running(p)
+                for _ in 1:2
+                    sleep(2.5)
+                    if Sys.isbsd()
+                        kill(p, 29) # SIGINFO
+                    elseif Sys.islinux()
+                        kill(p, 10) # SIGUSR1
+                    end
+                    s = readuntil(iob, "Overhead ╎", keep = true)
+                    @test process_running(p)
+                    @test occursin("Overhead ╎", s)
+                end
+            finally
+                kill(p, Base.SIGKILL)
+                close(t)
+            end
+        end
+    end
+end
+
+@testset "FlameGraphs" begin
+    # FlameGraphs makes use of some Profile's internals. Detect possible breakage by mimicking some of its tests.
+    # Breakage is acceptable since these internals are not part of the stable API, but it's better to know, and ideally
+    # should be paired with an issue or PR in FlameGraphs.
+    #
+    # This also improves the thoroughness of our overall Profile tests.
+    stackframe(func, file, line; C=false) = StackFrame(Symbol(func), Symbol(file), line, nothing, C, false, 0)
+
+    backtraces = UInt64[   4, 3, 2, 1,   # order: callees then caller
+                        0, 6, 5, 1,
+                        0, 8, 7,
+                        0, 4, 3, 2, 1,
+                        0]
+    backtraces = Profile.add_fake_meta(backtraces)
+    lidict = Dict{UInt64,StackFrame}(1=>stackframe(:f1, :file1, 1),
+                                     2=>stackframe(:f2, :file1, 5),
+                                     3=>stackframe(:f3, :file2, 1),
+                                     4=>stackframe(:f2, :file1, 15),
+                                     5=>stackframe(:f4, :file1, 20),
+                                     6=>stackframe(:f5, :file3, 1),
+                                     7=>stackframe(:f1, :file1, 2),
+                                     8=>stackframe(:f6, :file3, 10))
+    root = Profile.StackFrameTree{StackFrame}()
+    Profile.tree!(root, backtraces, lidict, #= C =# true, :off)
+    @test length(root.down) == 2
+    for k in keys(root.down)
+        @test k.file == :file1
+        @test k.line ∈ (1, 2)
+    end
+    node = root.down[stackframe(:f1, :file1, 2)]
+    @test only(node.down).first == lidict[8]
 end
+
+include("allocs.jl")
diff --git a/stdlib/REPL/docs/src/index.md b/stdlib/REPL/docs/src/index.md
index ecdb7595fb62e2..af69ed0f67c734 100644
--- a/stdlib/REPL/docs/src/index.md
+++ b/stdlib/REPL/docs/src/index.md
@@ -22,7 +22,7 @@ and a `julia>` prompt.
 
 ### The Julian mode
 
-The REPL has four main modes of operation. The first and most common is the Julian prompt. It
+The REPL has five main modes of operation. The first and most common is the Julian prompt. It
 is the default mode of operation; each new line initially starts with `julia>`. It is here that
 you can enter Julia expressions. Hitting return or enter after a complete expression has been
 entered will evaluate the entry and show the result of the last expression.
@@ -32,7 +32,7 @@ julia> string(1 + 2)
 "3"
 ```
 
-There are a number useful features unique to interactive work. In addition to showing the result,
+There are a number of useful features unique to interactive work. In addition to showing the result,
 the REPL also binds the result to the variable `ans`. A trailing semicolon on the line can be
 used as a flag to suppress showing the result.
 
@@ -135,6 +135,8 @@ REPL.stripmd
 Base.Docs.apropos
 ```
 
+Another feature of help mode is the ability to access extended docstrings. You can do this by typing something like `??Print` rather than `?Print` which will display the `# Extended help` section from the source codes documentation.
+
 Help mode can be exited by pressing backspace at the beginning of the line.
 
 ### [Shell mode](@id man-shell-mode)
@@ -188,6 +190,14 @@ C:\Users\elm>dir
 02/02/2020  08:06    <DIR>          .atom
 ```
 
+### Pkg mode
+
+The Package manager mode accepts specialized commands for loading and updating packages. It is entered
+by pressing the `]` key at the Julian REPL prompt and exited by pressing CTRL-C or pressing the backspace key
+at the beginning of the line. The prompt for this mode is `pkg>`. It supports its own help-mode, which is
+entered by pressing `?` at the beginning  of the line of the `pkg>` prompt. The Package manager mode is
+documented in the Pkg manual, available at [https://julialang.github.io/Pkg.jl/v1/](https://julialang.github.io/Pkg.jl/v1/).
+
 ### Search modes
 
 In all of the above modes, the executed lines get saved to a history file, which can be searched.
@@ -201,6 +211,10 @@ Just as `^R` is a reverse search, `^S` is a forward search, with the prompt ```(
  The two may be used in conjunction with each other to move through the previous or next matching
 results, respectively.
 
+All executed commands in the Julia REPL are logged into `~/.julia/logs/repl_history.jl` along with a timestamp of when it was executed
+and the current REPL mode you were in. Search mode queries this log file in order to find the commands which you previously ran.
+This can be disabled at startup by passing the `--history-file=no` flag to Julia.
+
 ## Key bindings
 
 The Julia REPL makes great use of key bindings. Several control-key bindings were already introduced
@@ -211,7 +225,7 @@ to do so), or pressing Esc and then the key.
 
 | Keybinding          | Description                                                                                                |
 |:------------------- |:---------------------------------------------------------------------------------------------------------- |
-| **Program control** |                                                                                                            |
+| **Program control** |                                                                                                            |
 | `^D`                | Exit (when buffer is empty)                                                                                |
 | `^C`                | Interrupt or cancel                                                                                        |
 | `^L`                | Clear console screen                                                                                       |
@@ -219,7 +233,7 @@ to do so), or pressing Esc and then the key.
 | meta-Return/Enter   | Insert new line without executing it                                                                       |
 | `?` or `;`          | Enter help or shell mode (when at start of a line)                                                         |
 | `^R`, `^S`          | Incremental history search, described above                                                                |
-| **Cursor movement** |                                                                                                            |
+| **Cursor movement** |                                                                                                            |
 | Right arrow, `^F`   | Move right one character                                                                                   |
 | Left arrow, `^B`    | Move left one character                                                                                    |
 | ctrl-Right, `meta-F`| Move right one word                                                                                        |
@@ -237,7 +251,7 @@ to do so), or pressing Esc and then the key.
 | `^-Space ^-Space`   | Set the "mark" in the editing region and make the region "active", i.e. highlighted                        |
 | `^G`                | De-activate the region (i.e. make it not highlighted)                                                      |
 | `^X^X`              | Exchange the current position with the mark                                                                |
-| **Editing**         |                                                                                                            |
+| **Editing**         |                                                                                                            |
 | Backspace, `^H`     | Delete the previous character, or the whole region when it's active                                        |
 | Delete, `^D`        | Forward delete one character (when buffer has text)                                                        |
 | meta-Backspace      | Delete the previous word                                                                                   |
@@ -299,6 +313,27 @@ Users should refer to `LineEdit.jl` to discover the available actions on key inp
 In both the Julian and help modes of the REPL, one can enter the first few characters of a function
 or type and then press the tab key to get a list all matches:
 
+```julia-repl
+julia> x[TAB]
+julia> xor
+```
+
+In some cases it only completes part of the name, up to the next ambiguity:
+
+```julia-repl
+julia> mapf[TAB]
+julia> mapfold
+```
+
+If you hit tab again, then you get the list of things that might complete this:
+
+```julia-repl
+julia> mapfold[TAB]
+mapfoldl mapfoldr
+```
+
+Like other components of the REPL, the search is case-sensitive:
+
 ```julia-repl
 julia> stri[TAB]
 stride     strides     string      strip
@@ -357,6 +392,46 @@ shell> /[TAB]
 .dockerinit bin/         dev/         home/        lib64/       mnt/         proc/        run/         srv/         tmp/         var/
 ```
 
+Dictionary keys can also be tab completed:
+
+```julia-repl
+julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
+Dict{String,Int64} with 3 entries:
+  "qwer2" => 2
+  "asdf"  => 3
+  "qwer1" => 1
+
+julia> foo["q[TAB]
+
+"qwer1" "qwer2"
+julia> foo["qwer
+```
+
+Tab completion can also help completing fields:
+
+```julia-repl
+julia> x = 3 + 4im;
+
+julia> julia> x.[TAB][TAB]
+im re
+
+julia> import UUIDs
+
+julia> UUIDs.uuid[TAB][TAB]
+uuid1        uuid4         uuid5        uuid_version
+```
+
+Fields for output from functions can also be completed:
+
+```julia-repl
+julia> split("","")[1].[TAB]
+lastindex  offset  string
+```
+
+The completion of fields for output from functions uses type inference, and it can only suggest
+fields if the function is type stable.
+
+
 Tab completion can help with investigation of the available methods matching the input arguments:
 
 ```julia-repl
@@ -384,38 +459,54 @@ The completion of the methods uses type inference and can therefore see if the a
 even if the arguments are output from functions. The function needs to be type stable for the
 completion to be able to remove non-matching methods.
 
-Tab completion can also help completing fields:
+If you wonder which methods can be used with particular argument types, use `?` as the function name.
+This shows an example of looking for functions in InteractiveUtils that accept a single string:
 
 ```julia-repl
-julia> import UUIDs
-
-julia> UUIDs.uuid[TAB]
-uuid1        uuid4         uuid_version
+julia> InteractiveUtils.?("somefile")[TAB]
+edit(path::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:197
+less(file::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:266
 ```
 
-Fields for output from functions can also be completed:
+This listed methods in the `InteractiveUtils` module that can be called on a string.
+By default, this excludes methods where all arguments are typed as `Any`,
+but you can see those too by holding down SHIFT-TAB instead of TAB:
 
 ```julia-repl
-julia> split("","")[1].[TAB]
-lastindex  offset  string
+julia> InteractiveUtils.?("somefile")[SHIFT-TAB]
+apropos(string) in REPL at REPL/src/docview.jl:796
+clipboard(x) in InteractiveUtils at InteractiveUtils/src/clipboard.jl:64
+code_llvm(f) in InteractiveUtils at InteractiveUtils/src/codeview.jl:221
+code_native(f) in InteractiveUtils at InteractiveUtils/src/codeview.jl:243
+edit(path::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:197
+edit(f) in InteractiveUtils at InteractiveUtils/src/editless.jl:225
+eval(x) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:3
+include(x) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:3
+less(file::AbstractString) in InteractiveUtils at InteractiveUtils/src/editless.jl:266
+less(f) in InteractiveUtils at InteractiveUtils/src/editless.jl:274
+report_bug(kind) in InteractiveUtils at InteractiveUtils/src/InteractiveUtils.jl:391
+separate_kwargs(args...; kwargs...) in InteractiveUtils at InteractiveUtils/src/macros.jl:7
 ```
 
-The completion of fields for output from functions uses type inference, and it can only suggest
-fields if the function is type stable.
+You can also use ` ?("somefile")[TAB]`  and look across all modules, but the method lists can be long.
 
-Dictionary keys can also be tab completed:
+By omitting the closing parenthesis, you can include functions that might require additional arguments:
 
 ```julia-repl
-julia> foo = Dict("qwer1"=>1, "qwer2"=>2, "asdf"=>3)
-Dict{String,Int64} with 3 entries:
-  "qwer2" => 2
-  "asdf"  => 3
-  "qwer1" => 1
-
-julia> foo["q[TAB]
-
-"qwer1" "qwer2"
-julia> foo["qwer
+julia> using Mmap
+
+help?> Mmap.?("file",[TAB]
+Mmap.Anonymous(name::String, readonly::Bool, create::Bool) in Mmap at Mmap/src/Mmap.jl:16
+mmap(file::AbstractString) in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}) where T<:Array in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}) where {T<:Array, N} in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}, offset::Integer; grow, shared) where {T<:Array, N} in Mmap at Mmap/src/Mmap.jl:245
+mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:Array in Mmap at Mmap/src/Mmap.jl:251
+mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:Array in Mmap at Mmap/src/Mmap.jl:251
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}) where {T<:BitArray, N} in Mmap at Mmap/src/Mmap.jl:316
+mmap(file::AbstractString, ::Type{T}, dims::Tuple{Vararg{Integer, N}}, offset::Integer; grow, shared) where {T<:BitArray, N} in Mmap at Mmap/src/Mmap.jl:316
+mmap(file::AbstractString, ::Type{T}, len::Integer) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
+mmap(file::AbstractString, ::Type{T}, len::Integer, offset::Integer; grow, shared) where T<:BitArray in Mmap at Mmap/src/Mmap.jl:322
 ```
 
 ## Customizing Colors
@@ -542,7 +633,7 @@ Output:
 
 ```
 Select the fruits you like:
-[press: d=done, a=all, n=none]
+[press: Enter=toggle, a=all, n=none, d=done, q=abort]
    [ ] apple
  > [X] orange
    [X] grape
@@ -568,7 +659,7 @@ For instance, the default multiple-selection menu
 julia> menu = MultiSelectMenu(options, pagesize=5);
 
 julia> request(menu) # ASCII is used by default
-[press: d=done, a=all, n=none]
+[press: Enter=toggle, a=all, n=none, d=done, q=abort]
    [ ] apple
    [X] orange
    [ ] grape
@@ -578,11 +669,11 @@ v  [ ] blueberry
 
 can instead be rendered with Unicode selection and navigation characters with
 
-```julia
+```julia-repl
 julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode);
 
 julia> request(menu)
-[press: d=done, a=all, n=none]
+[press: Enter=toggle, a=all, n=none, d=done, q=abort]
    ⬚ apple
    ✓ orange
    ⬚ grape
@@ -592,12 +683,12 @@ julia> request(menu)
 
 More fine-grained configuration is also possible:
 
-```julia
+```julia-repl
 julia> menu = MultiSelectMenu(options, pagesize=5, charset=:unicode, checked="YEP!", unchecked="NOPE", cursor='⧐');
 
 julia> request(menu)
 julia> request(menu)
-[press: d=done, a=all, n=none]
+[press: Enter=toggle, a=all, n=none, d=done, q=abort]
    NOPE apple
    YEP! orange
    NOPE grape
diff --git a/stdlib/REPL/src/LineEdit.jl b/stdlib/REPL/src/LineEdit.jl
index 08b11b939bfbbf..89f57383d5e486 100644
--- a/stdlib/REPL/src/LineEdit.jl
+++ b/stdlib/REPL/src/LineEdit.jl
@@ -97,13 +97,20 @@ mutable struct PromptState <: ModeState
     # indentation of lines which do not include the prompt
     # if negative, the width of the prompt is used
     indent::Int
-    refresh_lock::Threads.AbstractLock
+    refresh_lock::Threads.SpinLock
     # this would better be Threads.Atomic{Float64}, but not supported on some platforms
     beeping::Float64
     # this option is to detect when code is pasted in non-"bracketed paste mode" :
     last_newline::Float64 # register when last newline was entered
+    # this option is to speed up output
+    refresh_wait::Union{Timer,Nothing}
 end
 
+struct Modifiers
+    shift::Bool
+end
+Modifiers() = Modifiers(false)
+
 options(s::PromptState) =
     if isdefined(s.p, :repl) && isdefined(s.p.repl, :options)
         # we can't test isa(s.p.repl, LineEditREPL) as LineEditREPL is defined
@@ -180,7 +187,7 @@ function beep(s::PromptState, duration::Real=options(s).beep_duration,
     isinteractive() || return # some tests fail on some platforms
     s.beeping = min(s.beeping + duration, maxduration)
     let colors = Base.copymutable(colors)
-        @async begin
+        errormonitor(@async begin
             trylock(s.refresh_lock) || return
             try
                 orig_prefix = s.p.prompt_prefix
@@ -196,12 +203,10 @@ function beep(s::PromptState, duration::Real=options(s).beep_duration,
                 s.p.prompt_prefix = orig_prefix
                 refresh_multi_line(s, beeping=true)
                 s.beeping = 0.0
-            catch e
-                Base.showerror(stdout, e, catch_backtrace())
             finally
                 unlock(s.refresh_lock)
             end
-        end
+        end)
     end
     nothing
 end
@@ -321,7 +326,7 @@ function show_completions(s::PromptState, completions::Vector{String})
         for col = 0:num_cols
             idx = row + col*entries_per_col
             if idx <= length(completions)
-                cmove_col(terminal(s), (colmax+2)*col)
+                cmove_col(terminal(s), (colmax+2)*col+1)
                 print(terminal(s), completions[idx])
             end
         end
@@ -371,8 +376,17 @@ function complete_line(s::PromptState, repeats::Int)
     return true
 end
 
+function clear_input_area(terminal::AbstractTerminal, s::PromptState)
+    if s.refresh_wait !== nothing
+        close(s.refresh_wait)
+        s.refresh_wait = nothing
+    end
+    _clear_input_area(terminal, s.ias)
+    s.ias = InputAreaState(0, 0)
+end
 clear_input_area(terminal::AbstractTerminal, s::ModeState) = (_clear_input_area(terminal, s.ias); s.ias = InputAreaState(0, 0))
 clear_input_area(s::ModeState) = clear_input_area(s.terminal, s)
+
 function _clear_input_area(terminal::AbstractTerminal, state::InputAreaState)
     # Go to the last line
     if state.curs_row < state.num_rows
@@ -395,6 +409,13 @@ prompt_string(p::Prompt) = prompt_string(p.prompt)
 prompt_string(s::AbstractString) = s
 prompt_string(f::Function) = Base.invokelatest(f)
 
+function refresh_multi_line(s::PromptState; kw...)
+    if s.refresh_wait !== nothing
+        close(s.refresh_wait)
+        s.refresh_wait = nothing
+    end
+    refresh_multi_line(terminal(s), s; kw...)
+end
 refresh_multi_line(s::ModeState; kw...) = refresh_multi_line(terminal(s), s; kw...)
 refresh_multi_line(termbuf::TerminalBuffer, s::ModeState; kw...) = refresh_multi_line(termbuf, terminal(s), s; kw...)
 refresh_multi_line(termbuf::TerminalBuffer, term, s::ModeState; kw...) = (@assert term === terminal(s); refresh_multi_line(termbuf,s; kw...))
@@ -738,7 +759,7 @@ function edit_insert(s::PromptState, c::StringLike)
     buf = s.input_buffer
 
     if ! options(s).auto_indent_bracketed_paste
-        pos=position(buf)
+        pos = position(buf)
         if pos > 0
             if buf.data[pos] != _space && string(c) != " "
                 options(s).auto_indent_tmp_off = false
@@ -757,20 +778,55 @@ function edit_insert(s::PromptState, c::StringLike)
         end
     end
 
+    old_wait = s.refresh_wait !== nothing
+    if old_wait
+        close(s.refresh_wait)
+        s.refresh_wait = nothing
+    end
     str = string(c)
     edit_insert(buf, str)
-    offset = s.ias.curs_row == 1 || s.indent < 0 ?
-        sizeof(prompt_string(s.p.prompt)::String) : s.indent
-    if !('\n' in str) && eof(buf) &&
-        ((position(buf) - beginofline(buf) + # size of current line
-          offset + sizeof(str) - 1) < width(terminal(s)))
-        # Avoid full update when appending characters to the end
-        # and an update of curs_row isn't necessary (conservatively estimated)
-        write(terminal(s), str)
-    else
+    if '\n' in str
         refresh_line(s)
+    else
+        after = options(s).auto_refresh_time_delay
+        termbuf = terminal(s)
+        w = width(termbuf)
+        offset = s.ias.curs_row == 1 || s.indent < 0 ?
+            sizeof(prompt_string(s.p.prompt)::String) : s.indent
+        offset += position(buf) - beginofline(buf) # size of current line
+        spinner = '\0'
+        delayup = !eof(buf) || old_wait
+        if offset + textwidth(str) <= w && !(after == 0 && delayup)
+            # Avoid full update when appending characters to the end
+            # and an update of curs_row isn't necessary (conservatively estimated)
+            write(termbuf, str)
+            spinner = ' ' # temporarily clear under the cursor
+        elseif after == 0
+            refresh_line(s)
+            delayup = false
+        else # render a spinner for each key press
+            if old_wait || length(str) != 1
+                spinner = spin_seq[mod1(position(buf) - w, length(spin_seq))]
+            else
+                spinner = str[end]
+            end
+            delayup = true
+        end
+        if delayup
+            if spinner != '\0'
+                write(termbuf, spinner)
+                cmove_left(termbuf)
+            end
+            s.refresh_wait = Timer(after) do t
+                s.refresh_wait === t || return
+                s.refresh_wait = nothing
+                refresh_line(s)
+            end
+        end
     end
+    nothing
 end
+const spin_seq = ("⋯", "⋱", "⋮", "⋰")
 
 function edit_insert(buf::IOBuffer, c::StringLike)
     if eof(buf)
@@ -804,6 +860,7 @@ function edit_insert_newline(s::PromptState, align::Int = 0 - options(s).auto_in
     if ! options(s).auto_indent_bracketed_paste
         s.last_newline = time()
     end
+    nothing
 end
 
 # align: delete up to 4 spaces to align to a multiple of 4 chars
@@ -1380,7 +1437,7 @@ function normalize_keys(keymap::Union{Dict{Char,Any},AnyDict})
     return ret
 end
 
-function add_nested_key!(keymap::Dict, key, value; override = false)
+function add_nested_key!(keymap::Dict, key::Union{String, Char}, value; override = false)
     y = iterate(key)
     while y !== nothing
         c, i = y
@@ -1559,13 +1616,14 @@ function keymap_merge(target::Dict{Char,Any}, source::Union{Dict{Char,Any},AnyDi
     end
     # then redirected entries
     for key in setdiff(keys(source), keys(direct_keys))
+        key::Union{String, Char}
         # We first resolve redirects in the source
         value = source[key]
         visited = Vector{Any}()
         while isa(value, Union{Char,String})
             value = normalize_key(value)
             if value in visited
-                error("Eager redirection cycle detected for key " * escape_string(key))
+                throw_eager_redirection_cycle(key)
             end
             push!(visited,value)
             if !haskey(source,value)
@@ -1577,7 +1635,7 @@ function keymap_merge(target::Dict{Char,Any}, source::Union{Dict{Char,Any},AnyDi
         if isa(value, Union{Char,String})
             value = getEntry(ret, value)
             if value === nothing
-                error("Could not find redirected value " * escape_string(source[key]))
+                throw_could_not_find_redirected_value(key)
             end
         end
         add_nested_key!(ret, key, value; override = true)
@@ -1585,6 +1643,11 @@ function keymap_merge(target::Dict{Char,Any}, source::Union{Dict{Char,Any},AnyDi
     return ret
 end
 
+throw_eager_redirection_cycle(key::Union{Char, String}) =
+    error("Eager redirection cycle detected for key ", repr(key))
+throw_could_not_find_redirected_value(key::Union{Char, String}) =
+    error("Could not find redirected value ", repr(key))
+
 function keymap_unify(keymaps)
     ret = Dict{Char,Any}()
     for keymap in keymaps
@@ -1849,6 +1912,10 @@ mode(s::PromptState) = s.p          # ::Prompt
 mode(s::SearchState) = @assert false
 mode(s::PrefixSearchState) = s.histprompt.parent_prompt   # ::Prompt
 
+setmodifiers!(s::MIState, m::Modifiers) = setmodifiers!(mode(s), m)
+setmodifiers!(p::Prompt, m::Modifiers) = setmodifiers!(p.complete, m)
+setmodifiers!(c) = nothing
+
 # Search Mode completions
 function complete_line(s::SearchState, repeats)
     completions, partial, should_complete = complete_line(s.histprompt.complete, s)
@@ -1904,7 +1971,7 @@ function enter_prefix_search(s::MIState, p::PrefixHistoryPrompt, backward::Bool)
     parent = mode(s)
 
     transition(s, p) do
-        pss = state(s, p)
+        local pss = state(s, p)
         pss.parent = parent
         pss.histprompt.parent_prompt = parent
         pss.prefix = String(buf.data[1:position(buf)])
@@ -2116,6 +2183,11 @@ function edit_tab(s::MIState, jump_spaces::Bool=false, delete_trailing::Bool=jum
     return refresh_line(s)
 end
 
+function shift_tab_completion(s::MIState)
+    setmodifiers!(s, Modifiers(true))
+    return complete_line(s)
+end
+
 # return true iff the content of the buffer is modified
 # return false when only the position changed
 function edit_insert_tab(buf::IOBuffer, jump_spaces::Bool=false, delete_trailing::Bool=jump_spaces)
@@ -2151,6 +2223,8 @@ const default_keymap =
 AnyDict(
     # Tab
     '\t' => (s::MIState,o...)->edit_tab(s, true),
+    # Shift-tab
+    "\e[Z" => (s::MIState,o...)->shift_tab_completion(s),
     # Enter
     '\r' => (s::MIState,o...)->begin
         if on_enter(s) || (eof(buffer(s)) && s.key_repeats > 1)
@@ -2414,7 +2488,7 @@ run_interface(::Prompt) = nothing
 
 init_state(terminal, prompt::Prompt) =
     PromptState(terminal, prompt, IOBuffer(), :off, IOBuffer[], 1, InputAreaState(1, 1),
-                #=indent(spaces)=# -1, Threads.SpinLock(), 0.0, -Inf)
+                #=indent(spaces)=# -1, Threads.SpinLock(), 0.0, -Inf, nothing)
 
 function init_state(terminal, m::ModalInterface)
     s = MIState(m, m.modes[1], false, IdDict{Any,Any}())
diff --git a/stdlib/REPL/src/REPL.jl b/stdlib/REPL/src/REPL.jl
index 2187b36231be4d..bf3345f158168d 100644
--- a/stdlib/REPL/src/REPL.jl
+++ b/stdlib/REPL/src/REPL.jl
@@ -29,8 +29,7 @@ import Base:
     display,
     show,
     AnyDict,
-    ==,
-    catch_stack
+    ==
 
 _displaysize(io::IO) = displaysize(io)::Tuple{Int,Int}
 
@@ -56,6 +55,7 @@ import ..LineEdit:
     history_last,
     history_search,
     accept_result,
+    setmodifiers!,
     terminal,
     MIState,
     PromptState,
@@ -76,12 +76,15 @@ end
 answer_color(::AbstractREPL) = ""
 
 const JULIA_PROMPT = "julia> "
+const PKG_PROMPT = "pkg> "
+const SHELL_PROMPT = "shell> "
+const HELP_PROMPT = "help?> "
 
 mutable struct REPLBackend
     "channel for AST"
-    repl_channel::Channel
+    repl_channel::Channel{Any}
     "channel for results: (value, iserror)"
-    response_channel::Channel
+    response_channel::Channel{Any}
     "flag indicating the state of this backend"
     in_eval::Bool
     "transformation functions to apply before evaluating expressions"
@@ -123,6 +126,12 @@ const softscope! = softscope
 
 const repl_ast_transforms = Any[softscope] # defaults for new REPL backends
 
+# Allows an external package to add hooks into the code loading.
+# The hook should take a Vector{Symbol} of package names and
+# return true if all packages could be installed, false if not
+# to e.g. install packages on demand
+const install_packages_hooks = Any[]
+
 function eval_user_input(@nospecialize(ast), backend::REPLBackend)
     lasterr = nothing
     Base.sigatomic_begin()
@@ -130,17 +139,19 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend)
         try
             Base.sigatomic_end()
             if lasterr !== nothing
-                put!(backend.response_channel, (lasterr,true))
+                put!(backend.response_channel, Pair{Any, Bool}(lasterr, true))
             else
                 backend.in_eval = true
+                if !isempty(install_packages_hooks)
+                    check_for_missing_packages_and_run_hooks(ast)
+                end
                 for xf in backend.ast_transforms
                     ast = Base.invokelatest(xf, ast)
                 end
                 value = Core.eval(Main, ast)
                 backend.in_eval = false
-                # note: use jl_set_global to make sure value isn't passed through `expand`
-                ccall(:jl_set_global, Cvoid, (Any, Any, Any), Main, :ans, value)
-                put!(backend.response_channel, (value,false))
+                setglobal!(Main, :ans, value)
+                put!(backend.response_channel, Pair{Any, Bool}(value, false))
             end
             break
         catch err
@@ -148,22 +159,57 @@ function eval_user_input(@nospecialize(ast), backend::REPLBackend)
                 println("SYSTEM ERROR: Failed to report error to REPL frontend")
                 println(err)
             end
-            lasterr = catch_stack()
+            lasterr = current_exceptions()
         end
     end
     Base.sigatomic_end()
     nothing
 end
 
+function check_for_missing_packages_and_run_hooks(ast)
+    isa(ast, Expr) || return
+    mods = modules_to_be_loaded(ast)
+    filter!(mod -> isnothing(Base.identify_package(String(mod))), mods) # keep missing modules
+    if !isempty(mods)
+        for f in install_packages_hooks
+            Base.invokelatest(f, mods) && return
+        end
+    end
+end
+
+function modules_to_be_loaded(ast::Expr, mods::Vector{Symbol} = Symbol[])
+    ast.head == :quote && return mods # don't search if it's not going to be run during this eval
+    if ast.head in [:using, :import]
+        for arg in ast.args
+            arg = arg::Expr
+            arg1 = first(arg.args)
+            if arg1 isa Symbol # i.e. `Foo`
+                if arg1 != :. # don't include local imports
+                    push!(mods, arg1)
+                end
+            else # i.e. `Foo: bar`
+                push!(mods, first((arg1::Expr).args))
+            end
+        end
+    end
+    for arg in ast.args
+        if isexpr(arg, (:block, :if, :using, :import))
+            modules_to_be_loaded(arg, mods)
+        end
+    end
+    filter!(mod -> !in(String(mod), ["Base", "Main", "Core"]), mods) # Exclude special non-package modules
+    return unique(mods)
+end
+
 """
-    start_repl_backend(repl_channel::Channel,response_channel::Channel)
+    start_repl_backend(repl_channel::Channel, response_channel::Channel)
 
     Starts loop for REPL backend
     Returns a REPLBackend with backend_task assigned
 
     Deprecated since sync / async behavior cannot be selected
 """
-function start_repl_backend(repl_channel::Channel, response_channel::Channel)
+function start_repl_backend(repl_channel::Channel{Any}, response_channel::Channel{Any})
     # Maintain legacy behavior of asynchronous backend
     backend = REPLBackend(repl_channel, response_channel, false)
     # Assignment will be made twice, but will be immediately available
@@ -209,6 +255,7 @@ end
 ==(a::REPLDisplay, b::REPLDisplay) = a.repl === b.repl
 
 function display(d::REPLDisplay, mime::MIME"text/plain", x)
+    x = Ref{Any}(x)
     with_repl_linfo(d.repl) do io
         io = IOContext(io, :limit => true, :module => Main::Module)
         get(io, :color, false) && write(io, answer_color(d.repl))
@@ -216,14 +263,14 @@ function display(d::REPLDisplay, mime::MIME"text/plain", x)
             # this can override the :limit property set initially
             io = foldl(IOContext, d.repl.options.iocontext, init=io)
         end
-        show(io, mime, x)
+        show(io, mime, x[])
         println(io)
     end
     return nothing
 end
 display(d::REPLDisplay, x) = display(d, MIME("text/plain"), x)
 
-function print_response(repl::AbstractREPL, @nospecialize(response), show_value::Bool, have_color::Bool)
+function print_response(repl::AbstractREPL, response, show_value::Bool, have_color::Bool)
     repl.waserror = response[2]
     with_repl_linfo(repl) do io
         io = IOContext(io, :module => Main::Module)
@@ -231,13 +278,15 @@ function print_response(repl::AbstractREPL, @nospecialize(response), show_value:
     end
     return nothing
 end
-function print_response(errio::IO, @nospecialize(response), show_value::Bool, have_color::Bool, specialdisplay::Union{AbstractDisplay,Nothing}=nothing)
+function print_response(errio::IO, response, show_value::Bool, have_color::Bool, specialdisplay::Union{AbstractDisplay,Nothing}=nothing)
     Base.sigatomic_begin()
     val, iserr = response
     while true
         try
             Base.sigatomic_end()
             if iserr
+                val = Base.scrub_repl_backtrace(val)
+                Base.istrivialerror(val) || setglobal!(Main, :err, val)
                 Base.invokelatest(Base.display_error, errio, val)
             else
                 if val !== nothing && show_value
@@ -254,12 +303,14 @@ function print_response(errio::IO, @nospecialize(response), show_value::Bool, ha
                 end
             end
             break
-        catch
+        catch ex
             if iserr
                 println(errio) # an error during printing is likely to leave us mid-line
                 println(errio, "SYSTEM (REPL): showing an error caused an error")
                 try
-                    Base.invokelatest(Base.display_error, errio, catch_stack())
+                    excs = Base.scrub_repl_backtrace(current_exceptions())
+                    setglobal!(Main, :err, excs)
+                    Base.invokelatest(Base.display_error, errio, excs)
                 catch e
                     # at this point, only print the name of the type as a Symbol to
                     # minimize the possibility of further errors.
@@ -269,7 +320,7 @@ function print_response(errio::IO, @nospecialize(response), show_value::Bool, ha
                 end
                 break
             end
-            val = catch_stack()
+            val = current_exceptions()
             iserr = true
         end
     end
@@ -279,12 +330,12 @@ end
 
 # A reference to a backend that is not mutable
 struct REPLBackendRef
-    repl_channel::Channel
-    response_channel::Channel
+    repl_channel::Channel{Any}
+    response_channel::Channel{Any}
 end
 REPLBackendRef(backend::REPLBackend) = REPLBackendRef(backend.repl_channel, backend.response_channel)
 function destroy(ref::REPLBackendRef, state::Task)
-    if istaskfailed(state) && Base.task_result(state) isa Exception
+    if istaskfailed(state)
         close(ref.repl_channel, TaskFailedException(state))
         close(ref.response_channel, TaskFailedException(state))
     end
@@ -312,10 +363,12 @@ function run_repl(repl::AbstractREPL, @nospecialize(consumer = x -> nothing); ba
         end
     if backend_on_current_task
         t = @async run_frontend(repl, backend_ref)
+        errormonitor(t)
         Base._wait2(t, cleanup)
         start_repl_backend(backend, consumer)
     else
         t = @async start_repl_backend(backend, consumer)
+        errormonitor(t)
         Base._wait2(t, cleanup)
         run_frontend(repl, backend_ref)
     end
@@ -426,16 +479,22 @@ LineEditREPL(t::TextTerminal, hascolor::Bool, envcolors::Bool=false) =
         false, false, false, envcolors
     )
 
-mutable struct REPLCompletionProvider <: CompletionProvider end
+mutable struct REPLCompletionProvider <: CompletionProvider
+    modifiers::LineEdit.Modifiers
+end
+REPLCompletionProvider() = REPLCompletionProvider(LineEdit.Modifiers())
 mutable struct ShellCompletionProvider <: CompletionProvider end
 struct LatexCompletions <: CompletionProvider end
 
+setmodifiers!(c::REPLCompletionProvider, m::LineEdit.Modifiers) = c.modifiers = m
+
 beforecursor(buf::IOBuffer) = String(buf.data[1:buf.ptr-1])
 
 function complete_line(c::REPLCompletionProvider, s::PromptState)
     partial = beforecursor(s.input_buffer)
     full = LineEdit.input_string(s)
-    ret, range, should_complete = completions(full, lastindex(partial))
+    ret, range, should_complete = completions(full, lastindex(partial), Main, c.modifiers.shift)
+    c.modifiers = LineEdit.Modifiers()
     return unique!(map(completion_text, ret)), partial[range], should_complete
 end
 
@@ -447,9 +506,9 @@ function complete_line(c::ShellCompletionProvider, s::PromptState)
     return unique!(map(completion_text, ret)), partial[range], should_complete
 end
 
-function complete_line(c::LatexCompletions, s::PromptState)
+function complete_line(c::LatexCompletions, s)
     partial = beforecursor(LineEdit.buffer(s))
-    full = LineEdit.input_string(s)
+    full = LineEdit.input_string(s)::String
     ret, range, should_complete = bslash_completions(full, lastindex(partial))[2]
     return unique!(map(completion_text, ret)), partial[range], should_complete
 end
@@ -467,6 +526,7 @@ end
 
 mutable struct REPLHistoryProvider <: HistoryProvider
     history::Vector{String}
+    file_path::String
     history_file::Union{Nothing,IO}
     start_idx::Int
     cur_idx::Int
@@ -477,7 +537,7 @@ mutable struct REPLHistoryProvider <: HistoryProvider
     modes::Vector{Symbol}
 end
 REPLHistoryProvider(mode_mapping::Dict{Symbol}) =
-    REPLHistoryProvider(String[], nothing, 0, 0, -1, IOBuffer(),
+    REPLHistoryProvider(String[], "", nothing, 0, 0, -1, IOBuffer(),
                         nothing, mode_mapping, UInt8[])
 
 invalid_history_message(path::String) = """
@@ -490,6 +550,12 @@ munged_history_message(path::String) = """
 Invalid history file ($path) format:
 An editor may have converted tabs to spaces at line """
 
+function hist_open_file(hp::REPLHistoryProvider)
+    f = open(hp.file_path, read=true, write=true, create=true)
+    hp.history_file = f
+    seekend(f)
+end
+
 function hist_from_file(hp::REPLHistoryProvider, path::String)
     getline(lines, i) = i > length(lines) ? "" : lines[i]
     file_lines = readlines(path)
@@ -558,7 +624,14 @@ function add_history(hist::REPLHistoryProvider, s::PromptState)
     $(replace(str, r"^"ms => "\t"))
     """
     # TODO: write-lock history file
-    seekend(hist.history_file)
+    try
+        seekend(hist.history_file)
+    catch err
+        (err isa SystemError) || rethrow()
+        # File handle might get stale after a while, especially under network file systems
+        # If this doesn't fix it (e.g. when file is deleted), we'll end up rethrowing anyway
+        hist_open_file(hist)
+    end
     print(hist.history_file, entry)
     flush(hist.history_file)
     nothing
@@ -791,7 +864,7 @@ function respond(f, repl, main; pass_empty::Bool = false, suppress_on_semicolon:
                 ast = Base.invokelatest(f, line)
                 response = eval_with_backend(ast, backend(repl))
             catch
-                response = (catch_stack(), true)
+                response = Pair{Any, Bool}(current_exceptions(), true)
             end
             hide_output = suppress_on_semicolon && ends_with_semicolon(line)
             print_response(repl, response, !hide_output, hascolor(repl))
@@ -894,19 +967,19 @@ function setup_interface(
         on_enter = return_callback)
 
     # Setup help mode
-    help_mode = Prompt("help?> ",
+    help_mode = Prompt(HELP_PROMPT,
         prompt_prefix = hascolor ? repl.help_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
         repl = repl,
         complete = replc,
         # When we're done transform the entered line into a call to helpmode function
-        on_done = respond(line->helpmode(outstream(repl), line), repl, julia_prompt,
+        on_done = respond(line::String->helpmode(outstream(repl), line), repl, julia_prompt,
                           pass_empty=true, suppress_on_semicolon=false))
 
 
     # Set up shell mode
-    shell_mode = Prompt("shell> ";
+    shell_mode = Prompt(SHELL_PROMPT;
         prompt_prefix = hascolor ? repl.shell_color : "",
         prompt_suffix = hascolor ?
             (repl.envcolors ? Base.input_color : repl.input_color) : "",
@@ -919,7 +992,8 @@ function setup_interface(
             Expr(:call, :(Base.repl_cmd),
                 :(Base.cmd_gen($(Base.shell_parse(line::String)[1]))),
                 outstream(repl))
-        end)
+        end,
+        sticky = true)
 
 
     ################################# Stage II #############################
@@ -933,16 +1007,15 @@ function setup_interface(
         try
             hist_path = find_hist_file()
             mkpath(dirname(hist_path))
-            f = open(hist_path, read=true, write=true, create=true)
-            hp.history_file = f
-            seekend(f)
+            hp.file_path = hist_path
+            hist_open_file(hp)
             finalizer(replc) do replc
-                close(f)
+                close(hp.history_file)
             end
             hist_from_file(hp, hist_path)
         catch
             # use REPL.hascolor to avoid using the local variable with the same name
-            print_response(repl, (catch_stack(),true), true, REPL.hascolor(repl))
+            print_response(repl, Pair{Any, Bool}(current_exceptions(), true), true, REPL.hascolor(repl))
             println(outstream(repl))
             @info "Disabling history file for this session"
             repl.history_file = false
@@ -959,6 +1032,12 @@ function setup_interface(
     search_prompt, skeymap = LineEdit.setup_search_keymap(hp)
     search_prompt.complete = LatexCompletions()
 
+    jl_prompt_len = length(JULIA_PROMPT)
+    pkg_prompt_len = length(PKG_PROMPT)
+    shell_prompt_len = length(SHELL_PROMPT)
+    help_prompt_len = length(HELP_PROMPT)
+    pkg_prompt_regex = r"^(?:\(.+\) )?pkg> "
+
     # Canonicalize user keymap input
     if isa(extra_repl_keymap, Dict)
         extra_repl_keymap = AnyDict[extra_repl_keymap]
@@ -1011,12 +1090,15 @@ function setup_interface(
             oldpos = firstindex(input)
             firstline = true
             isprompt_paste = false
-            jl_prompt_len = 7 # "julia> "
+            curr_prompt_len = 0
+            pasting_help = false
+
             while oldpos <= lastindex(input) # loop until all lines have been executed
                 if JL_PROMPT_PASTE[]
-                    # Check if the next statement starts with "julia> ", in that case
-                    # skip it. But first skip whitespace
-                    while input[oldpos] in ('\n', ' ', '\t')
+                    # Check if the next statement starts with a prompt i.e. "julia> ", in that case
+                    # skip it. But first skip whitespace unless pasting in a docstring which may have
+                    # indented prompt examples that we don't want to execute
+                    while input[oldpos] in (pasting_help ? ('\n') : ('\n', ' ', '\t'))
                         oldpos = nextind(input, oldpos)
                         oldpos >= sizeof(input) && return
                     end
@@ -1024,7 +1106,32 @@ function setup_interface(
                     if (firstline || isprompt_paste) && startswith(SubString(input, oldpos), JULIA_PROMPT)
                         isprompt_paste = true
                         oldpos += jl_prompt_len
-                    # If we are prompt pasting and current statement does not begin with julia> , skip to next line
+                        curr_prompt_len = jl_prompt_len
+                        transition(s, julia_prompt)
+                        pasting_help = false
+                    # Check if input line starts with "pkg> " or "(...) pkg> ", remove it if we are in prompt paste mode and switch mode
+                    elseif (firstline || isprompt_paste) && startswith(SubString(input, oldpos), pkg_prompt_regex)
+                        detected_pkg_prompt = match(pkg_prompt_regex, SubString(input, oldpos)).match
+                        isprompt_paste = true
+                        curr_prompt_len = sizeof(detected_pkg_prompt)
+                        oldpos += curr_prompt_len
+                        Base.active_repl.interface.modes[1].keymap_dict[']'](s, o...)
+                        pasting_help = false
+                    # Check if input line starts with "shell> ", remove it if we are in prompt paste mode and switch mode
+                    elseif (firstline || isprompt_paste) && startswith(SubString(input, oldpos), SHELL_PROMPT)
+                        isprompt_paste = true
+                        oldpos += shell_prompt_len
+                        curr_prompt_len = shell_prompt_len
+                        transition(s, shell_mode)
+                        pasting_help = false
+                    # Check if input line starts with "help?> ", remove it if we are in prompt paste mode and switch mode
+                    elseif (firstline || isprompt_paste) && startswith(SubString(input, oldpos), HELP_PROMPT)
+                        isprompt_paste = true
+                        oldpos += help_prompt_len
+                        curr_prompt_len = help_prompt_len
+                        transition(s, help_mode)
+                        pasting_help = true
+                    # If we are prompt pasting and current statement does not begin with a mode prefix, skip to next line
                     elseif isprompt_paste
                         while input[oldpos] != '\n'
                             oldpos = nextind(input, oldpos)
@@ -1033,11 +1140,35 @@ function setup_interface(
                         continue
                     end
                 end
-                ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false)
-                if (isa(ast, Expr) && (ast.head === :error || ast.head === :incomplete)) ||
-                        (pos > ncodeunits(input) && !endswith(input, '\n'))
-                    # remaining text is incomplete (an error, or parser ran to the end but didn't stop with a newline):
-                    # Insert all the remaining text as one line (might be empty)
+                dump_tail = false
+                nl_pos = findfirst('\n', input[oldpos:end])
+                if s.current_mode == julia_prompt
+                    ast, pos = Meta.parse(input, oldpos, raise=false, depwarn=false)
+                    if (isa(ast, Expr) && (ast.head === :error || ast.head === :incomplete)) ||
+                            (pos > ncodeunits(input) && !endswith(input, '\n'))
+                        # remaining text is incomplete (an error, or parser ran to the end but didn't stop with a newline):
+                        # Insert all the remaining text as one line (might be empty)
+                        dump_tail = true
+                    end
+                elseif isnothing(nl_pos) # no newline at end, so just dump the tail into the prompt and don't execute
+                    dump_tail = true
+                elseif s.current_mode == shell_mode # handle multiline shell commands
+                    lines = split(input[oldpos:end], '\n')
+                    pos = oldpos + sizeof(lines[1]) + 1
+                    if length(lines) > 1
+                        for line in lines[2:end]
+                            # to be recognized as a multiline shell command, the lines must be indented to the
+                            # same prompt position
+                            if !startswith(line, ' '^curr_prompt_len)
+                                break
+                            end
+                            pos += sizeof(line) + 1
+                        end
+                    end
+                else
+                    pos = oldpos + nl_pos
+                end
+                if dump_tail
                     tail = input[oldpos:end]
                     if !firstline
                         # strip leading whitespace, but only if it was the result of executing something
@@ -1045,7 +1176,7 @@ function setup_interface(
                         tail = lstrip(tail)
                     end
                     if isprompt_paste # remove indentation spaces corresponding to the prompt
-                        tail = replace(tail, r"^"m * ' '^jl_prompt_len => "")
+                        tail = replace(tail, r"^"m * ' '^curr_prompt_len => "")
                     end
                     LineEdit.replace_line(s, tail, true)
                     LineEdit.refresh_line(s)
@@ -1055,7 +1186,7 @@ function setup_interface(
                 line = strip(input[oldpos:prevind(input, pos)])
                 if !isempty(line)
                     if isprompt_paste # remove indentation spaces corresponding to the prompt
-                        line = replace(line, r"^"m * ' '^jl_prompt_len => "")
+                        line = replace(line, r"^"m * ' '^curr_prompt_len => "")
                     end
                     # put the line on the screen and history
                     LineEdit.replace_line(s, line)
@@ -1083,7 +1214,12 @@ function setup_interface(
             if n <= 0 || n > length(linfos) || startswith(linfos[n][1], "REPL[")
                 @goto writeback
             end
-            InteractiveUtils.edit(linfos[n][1], linfos[n][2])
+            try
+                InteractiveUtils.edit(linfos[n][1], linfos[n][2])
+            catch ex
+                ex isa ProcessFailedException || ex isa Base.IOError || ex isa SystemError || rethrow()
+                @info "edit failed" _exception=ex
+            end
             LineEdit.refresh_line(s)
             return
             @label writeback
@@ -1149,57 +1285,50 @@ answer_color(r::StreamREPL) = r.answer_color
 input_color(r::LineEditREPL) = r.envcolors ? Base.input_color() : r.input_color
 input_color(r::StreamREPL) = r.input_color
 
-# heuristic function to decide if the presence of a semicolon
-# at the end of the expression was intended for suppressing output
-function ends_with_semicolon(line::AbstractString)
-    match = findlast(isequal(';'), line)::Union{Nothing,Int}
-    if match !== nothing
-        # state for comment parser, assuming that the `;` isn't in a string or comment
-        # so input like ";#" will still thwart this to give the wrong (anti-conservative) answer
-        comment = false
-        comment_start = false
-        comment_close = false
-        comment_multi = 0
-        for c in line[(match + 1):end]
-            if comment_multi > 0
-                # handle nested multi-line comments
-                if comment_close && c == '#'
-                    comment_close = false
-                    comment_multi -= 1
-                elseif comment_start && c == '='
-                    comment_start = false
-                    comment_multi += 1
-                else
-                    comment_start = (c == '#')
-                    comment_close = (c == '=')
-                end
-            elseif comment
-                # handle line comments
-                if c == '\r' || c == '\n'
-                    comment = false
+let matchend = Dict("\"" => r"\"", "\"\"\"" => r"\"\"\"", "'" => r"'",
+    "`" => r"`", "```" => r"```", "#" => r"$"m, "#=" => r"=#|#=")
+    global _rm_strings_and_comments
+    function _rm_strings_and_comments(code::Union{String,SubString{String}})
+        buf = IOBuffer(sizehint = sizeof(code))
+        pos = 1
+        while true
+            i = findnext(r"\"(?!\"\")|\"\"\"|'|`(?!``)|```|#(?!=)|#=", code, pos)
+            isnothing(i) && break
+            match = SubString(code, i)
+            j = findnext(matchend[match]::Regex, code, nextind(code, last(i)))
+            if match == "#=" # possibly nested
+                nested = 1
+                while j !== nothing
+                    nested += SubString(code, j) == "#=" ? +1 : -1
+                    iszero(nested) && break
+                    j = findnext(r"=#|#=", code, nextind(code, last(j)))
                 end
-            elseif comment_start
-                # see what kind of comment this is
-                comment_start = false
-                if c == '='
-                    comment_multi = 1
-                else
-                    comment = true
+            elseif match[1] != '#' # quote match: check non-escaped
+                while j !== nothing
+                    notbackslash = findprev(!=('\\'), code, prevind(code, first(j)))::Int
+                    isodd(first(j) - notbackslash) && break # not escaped
+                    j = findnext(matchend[match]::Regex, code, nextind(code, first(j)))
                 end
-            elseif c == '#'
-                # start handling for a comment
-                comment_start = true
+            end
+            isnothing(j) && break
+            if match[1] == '#'
+                print(buf, SubString(code, pos, prevind(code, first(i))))
             else
-                # outside of a comment, encountering anything but whitespace
-                # means the semi-colon was internal to the expression
-                isspace(c) || return false
+                print(buf, SubString(code, pos, last(i)), ' ', SubString(code, j))
             end
+            pos = nextind(code, last(j))
         end
-        return true
+        print(buf, SubString(code, pos, lastindex(code)))
+        return String(take!(buf))
     end
-    return false
 end
 
+# heuristic function to decide if the presence of a semicolon
+# at the end of the expression was intended for suppressing output
+ends_with_semicolon(code::AbstractString) = ends_with_semicolon(String(code))
+ends_with_semicolon(code::Union{String,SubString{String}}) =
+    contains(_rm_strings_and_comments(code), r";\s*$")
+
 function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
     have_color = hascolor(repl)
     Base.banner(repl.stream)
@@ -1230,12 +1359,4 @@ function run_frontend(repl::StreamREPL, backend::REPLBackendRef)
     nothing
 end
 
-function start_repl_server(port::Int)
-    return listen(port) do server, status
-        client = accept(server)
-        run_repl(client)
-        nothing
-    end
-end
-
 end # module
diff --git a/stdlib/REPL/src/REPLCompletions.jl b/stdlib/REPL/src/REPLCompletions.jl
index e51b811ed9d6f9..162d1184d18c39 100644
--- a/stdlib/REPL/src/REPLCompletions.jl
+++ b/stdlib/REPL/src/REPLCompletions.jl
@@ -9,6 +9,10 @@ using Base: propertynames, something
 
 abstract type Completion end
 
+struct TextCompletion <: Completion
+    text::String
+end
+
 struct KeywordCompletion <: Completion
     keyword::String
 end
@@ -37,9 +41,9 @@ struct FieldCompletion <: Completion
 end
 
 struct MethodCompletion <: Completion
-    func
-    input_types::Type
+    tt # may be used by an external consumer to infer return type, etc.
     method::Method
+    MethodCompletion(@nospecialize(tt), method::Method) = new(tt, method)
 end
 
 struct BslashCompletion <: Completion
@@ -57,7 +61,9 @@ end
 
 # interface definition
 function Base.getproperty(c::Completion, name::Symbol)
-    if name === :keyword
+    if name === :text
+        return getfield(c, :text)::String
+    elseif name === :keyword
         return getfield(c, :keyword)::String
     elseif name === :path
         return getfield(c, :path)::String
@@ -83,13 +89,14 @@ function Base.getproperty(c::Completion, name::Symbol)
     return getfield(c, name)
 end
 
+_completion_text(c::TextCompletion) = c.text
 _completion_text(c::KeywordCompletion) = c.keyword
 _completion_text(c::PathCompletion) = c.path
 _completion_text(c::ModuleCompletion) = c.mod
 _completion_text(c::PackageCompletion) = c.package
 _completion_text(c::PropertyCompletion) = string(c.property)
 _completion_text(c::FieldCompletion) = string(c.field)
-_completion_text(c::MethodCompletion) = sprint(io -> show(io, c.method))
+_completion_text(c::MethodCompletion) = repr(c.method)
 _completion_text(c::BslashCompletion) = c.bslash
 _completion_text(c::ShellCompletion) = c.text
 _completion_text(c::DictCompletion) = c.key
@@ -124,7 +131,7 @@ function filtered_mod_names(ffunc::Function, mod::Module, name::AbstractString,
 end
 
 # REPL Symbol Completions
-function complete_symbol(sym::String, ffunc, context_module::Module=Main)
+function complete_symbol(sym::String, @nospecialize(ffunc), context_module::Module=Main)
     mod = context_module
     name = sym
 
@@ -143,7 +150,7 @@ function complete_symbol(sym::String, ffunc, context_module::Module=Main)
             if isa(b, Module)
                 mod = b
                 lookup_module = true
-            elseif Base.isstructtype(typeof(b))
+            else
                 lookup_module = false
                 t = typeof(b)
             end
@@ -314,7 +321,7 @@ end
 function should_method_complete(s::AbstractString)
     method_complete = false
     for c in reverse(s)
-        if c in [',', '(']
+        if c in [',', '(', ';']
             method_complete = true
             break
         elseif !(c in whitespace_chars)
@@ -334,9 +341,25 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
     in_single_quotes = false
     in_double_quotes = false
     in_back_ticks = false
+    in_comment = 0
     while i <= ncodeunits(r)
         c, i = iterate(r, i)
-        if !in_single_quotes && !in_double_quotes && !in_back_ticks
+        if c == '#' && i <= ncodeunits(r) && iterate(r, i)[1] == '='
+            c, i = iterate(r, i) # consume '='
+            new_comments = 1
+            # handle #=#=#=#, by counting =# pairs
+            while i <= ncodeunits(r) && iterate(r, i)[1] == '#'
+                c, i = iterate(r, i) # consume '#'
+                iterate(r, i)[1] == '=' || break
+                c, i = iterate(r, i) # consume '='
+                new_comments += 1
+            end
+            if c == '='
+                in_comment += new_comments
+            else
+                in_comment -= new_comments
+            end
+        elseif !in_single_quotes && !in_double_quotes && !in_back_ticks && in_comment == 0
             if c == c_start
                 braces += 1
             elseif c == c_end
@@ -349,15 +372,31 @@ function find_start_brace(s::AbstractString; c_start='(', c_end=')')
                 in_back_ticks = true
             end
         else
-            if !in_back_ticks && !in_double_quotes &&
+            if in_single_quotes &&
                 c == '\'' && i <= ncodeunits(r) && iterate(r, i)[1] != '\\'
-                in_single_quotes = !in_single_quotes
-            elseif !in_back_ticks && !in_single_quotes &&
+                in_single_quotes = false
+            elseif in_double_quotes &&
                 c == '"' && i <= ncodeunits(r) && iterate(r, i)[1] != '\\'
-                in_double_quotes = !in_double_quotes
-            elseif !in_single_quotes && !in_double_quotes &&
+                in_double_quotes = false
+            elseif in_back_ticks &&
                 c == '`' && i <= ncodeunits(r) && iterate(r, i)[1] != '\\'
-                in_back_ticks = !in_back_ticks
+                in_back_ticks = false
+            elseif in_comment > 0 &&
+                c == '=' && i <= ncodeunits(r) && iterate(r, i)[1] == '#'
+                # handle =#=#=#=, by counting #= pairs
+                c, i = iterate(r, i) # consume '#'
+                old_comments = 1
+                while i <= ncodeunits(r) && iterate(r, i)[1] == '='
+                    c, i = iterate(r, i) # consume '='
+                    iterate(r, i)[1] == '#' || break
+                    c, i = iterate(r, i) # consume '#'
+                    old_comments += 1
+                end
+                if c == '#'
+                    in_comment -= old_comments
+                else
+                    in_comment += old_comments
+                end
             end
         end
         braces == 1 && break
@@ -374,55 +413,52 @@ end
 # will show it consist of Expr, QuoteNode's and Symbol's which all needs to
 # be handled differently to iterate down to get the value of whitespace_chars.
 function get_value(sym::Expr, fn)
+    if sym.head === :quote || sym.head === :inert
+        return sym.args[1], true
+    end
     sym.head !== :. && return (nothing, false)
     for ex in sym.args
+        ex, found = get_value(ex, fn)
+        !found && return (nothing, false)
         fn, found = get_value(ex, fn)
         !found && return (nothing, false)
     end
     return (fn, true)
 end
 get_value(sym::Symbol, fn) = isdefined(fn, sym) ? (getfield(fn, sym), true) : (nothing, false)
-get_value(sym::QuoteNode, fn) = isdefined(fn, sym.value) ? (getfield(fn, sym.value), true) : (nothing, false)
+get_value(sym::QuoteNode, fn) = (sym.value, true)
+get_value(sym::GlobalRef, fn) = get_value(sym.name, sym.mod)
 get_value(sym, fn) = (sym, true)
 
-# Return the value of a getfield call expression
-function get_value_getfield(ex::Expr, fn)
-    # Example :((top(getfield))(Base,:max))
-    val, found = get_value_getfield(ex.args[2],fn) #Look up Base in Main and returns the module
-    (found && length(ex.args) >= 3) || return (nothing, false)
-    return get_value_getfield(ex.args[3], val) #Look up max in Base and returns the function if found.
-end
-get_value_getfield(sym, fn) = get_value(sym, fn)
-
-# Determines the return type with Base.return_types of a function call using the type information of the arguments.
-function get_type_call(expr::Expr)
+# Return the type of a getfield call expression
+function get_type_getfield(ex::Expr, fn::Module)
+    length(ex.args) == 3 || return Any, false # should never happen, but just for safety
+    fld, found = get_value(ex.args[3], fn)
+    fld isa Symbol || return Any, false
+    obj = ex.args[2]
+    objt, found = get_type(obj, fn)
+    found || return Any, false
+    objt isa DataType || return Any, false
+    hasfield(objt, fld) || return Any, false
+    return fieldtype(objt, fld), true
+end
+
+# Determines the return type with the Compiler of a function call using the type information of the arguments.
+function get_type_call(expr::Expr, fn::Module)
     f_name = expr.args[1]
-    # The if statement should find the f function. How f is found depends on how f is referenced
-    if isa(f_name, GlobalRef) && isconst(f_name.mod,f_name.name) && isdefined(f_name.mod,f_name.name)
-        ft = typeof(eval(f_name))
-        found = true
-    else
-        ft, found = get_type(f_name, Main)
-    end
+    f, found = get_type(f_name, fn)
     found || return (Any, false) # If the function f is not found return Any.
     args = Any[]
-    for ex in expr.args[2:end] # Find the type of the function arguments
-        typ, found = get_type(ex, Main)
+    for i in 2:length(expr.args) # Find the type of the function arguments
+        typ, found = get_type(expr.args[i], fn)
         found ? push!(args, typ) : push!(args, Any)
     end
-    # use _methods_by_ftype as the function is supplied as a type
     world = Base.get_world_counter()
-    matches = Base._methods_by_ftype(Tuple{ft, args...}, -1, world)
-    length(matches) == 1 || return (Any, false)
-    match = first(matches)
-    # Typeinference
-    interp = Core.Compiler.NativeInterpreter()
-    return_type = Core.Compiler.typeinf_type(interp, match.method, match.spec_types, match.sparams)
-    return_type === nothing && return (Any, false)
+    return_type = Core.Compiler.return_type(Tuple{f, args...}, world)
     return (return_type, true)
 end
 
-# Returns the return type. example: get_type(:(Base.strip("", ' ')), Main) returns (String, true)
+# Returns the return type. example: get_type(:(Base.strip("", ' ')), Main) returns (SubString{String}, true)
 function try_get_type(sym::Expr, fn::Module)
     val, found = get_value(sym, fn)
     found && return Core.Typeof(val), found
@@ -430,12 +466,10 @@ function try_get_type(sym::Expr, fn::Module)
         # getfield call is special cased as the evaluation of getfield provides good type information,
         # is inexpensive and it is also performed in the complete_symbol function.
         a1 = sym.args[1]
-        if isa(a1,GlobalRef) && isconst(a1.mod,a1.name) && isdefined(a1.mod,a1.name) &&
-            eval(a1) === Core.getfield
-            val, found = get_value_getfield(sym, Main)
-            return found ? Core.Typeof(val) : Any, found
+        if a1 === :getfield || a1 === GlobalRef(Core, :getfield)
+            return get_type_getfield(sym, fn)
         end
-        return get_type_call(sym)
+        return get_type_call(sym, fn)
     elseif sym.head === :thunk
         thk = sym.args[1]
         rt = ccall(:jl_infer_thunk, Any, (Any, Any), thk::Core.CodeInfo, fn)
@@ -443,7 +477,7 @@ function try_get_type(sym::Expr, fn::Module)
     elseif sym.head === :ref
         # some simple cases of `expand`
         return try_get_type(Expr(:call, GlobalRef(Base, :getindex), sym.args...), fn)
-    elseif sym.head === :.  && sym.args[2] isa QuoteNode # second check catches broadcasting
+    elseif sym.head === :. && sym.args[2] isa QuoteNode # second check catches broadcasting
         return try_get_type(Expr(:call, GlobalRef(Core, :getfield), sym.args...), fn)
     end
     return (Any, false)
@@ -455,7 +489,21 @@ function get_type(sym::Expr, fn::Module)
     # try to analyze nests of calls. if this fails, try using the expanded form.
     val, found = try_get_type(sym, fn)
     found && return val, found
-    return try_get_type(Meta.lower(fn, sym), fn)
+    # https://github.com/JuliaLang/julia/issues/27184
+    if isexpr(sym, :macrocall)
+        _, found = get_type(first(sym.args), fn)
+        found || return Any, false
+    end
+    newsym = try
+        Meta.lower(fn, sym)
+    catch e
+        e isa LoadError && return Any, false
+        # If e is not a LoadError then Meta.lower crashed in an unexpected way.
+        # Since this is not a specific to the user code but an internal error,
+        # rethrow the error to allow reporting it.
+        rethrow()
+    end
+    return try_get_type(newsym, fn)
 end
 
 function get_type(sym, fn::Module)
@@ -463,39 +511,116 @@ function get_type(sym, fn::Module)
     return found ? Core.Typeof(val) : Any, found
 end
 
+function get_type(T, found::Bool, default_any::Bool)
+    return found ? T :
+           default_any ? Any : throw(ArgumentError("argument not found"))
+end
+
 # Method completion on function call expression that look like :(max(1))
+MAX_METHOD_COMPLETIONS::Int = 40
 function complete_methods(ex_org::Expr, context_module::Module=Main)
-    args_ex = Any[]
-    func, found = get_value(ex_org.args[1], context_module)::Tuple{Any,Bool}
-    !found && return Completion[]
+    out = Completion[]
+    funct, found = get_type(ex_org.args[1], context_module)::Tuple{Any,Bool}
+    !found && return out
+
+    args_ex, kwargs_ex = complete_methods_args(ex_org.args[2:end], ex_org, context_module, true, true)
+    push!(args_ex, Vararg{Any})
+    complete_methods!(out, funct, args_ex, kwargs_ex, MAX_METHOD_COMPLETIONS)
 
-    funargs = ex_org.args[2:end]
-    # handle broadcasting, but only handle number of arguments instead of
-    # argument types
-    if ex_org.head === :. && ex_org.args[2] isa Expr
+    return out
+end
+
+MAX_ANY_METHOD_COMPLETIONS::Int = 10
+function complete_any_methods(ex_org::Expr, callee_module::Module, context_module::Module, moreargs::Bool, shift::Bool)
+    out = Completion[]
+    args_ex, kwargs_ex = try
+        # this may throw, since we set default_any to false
+        complete_methods_args(ex_org.args[2:end], ex_org, context_module, false, false)
+    catch ex
+        ex isa ArgumentError || rethrow()
+        return out
+    end
+    moreargs && push!(args_ex, Vararg{Any})
+
+    seen = Base.IdSet()
+    for name in names(callee_module; all=true)
+        if !Base.isdeprecated(callee_module, name) && isdefined(callee_module, name)
+            func = getfield(callee_module, name)
+            if !isa(func, Module)
+                funct = Core.Typeof(func)
+                if !in(funct, seen)
+                    push!(seen, funct)
+                    complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS)
+                end
+            elseif callee_module === Main && isa(func, Module)
+                callee_module2 = func
+                for name in names(callee_module2)
+                    if !Base.isdeprecated(callee_module2, name) && isdefined(callee_module2, name)
+                        func = getfield(callee_module, name)
+                        if !isa(func, Module)
+                            funct = Core.Typeof(func)
+                            if !in(funct, seen)
+                                push!(seen, funct)
+                                complete_methods!(out, funct, args_ex, kwargs_ex, MAX_ANY_METHOD_COMPLETIONS)
+                            end
+                        end
+                    end
+                end
+            end
+        end
+    end
+
+    if !shift
+        # Filter out methods where all arguments are `Any`
+        filter!(out) do c
+            isa(c, TextCompletion) && return false
+            isa(c, MethodCompletion) || return true
+            sig = Base.unwrap_unionall(c.method.sig)::DataType
+            return !all(T -> T === Any || T === Vararg{Any}, sig.parameters[2:end])
+        end
+    end
+
+    return out
+end
+
+function complete_methods_args(funargs::Vector{Any}, ex_org::Expr, context_module::Module, default_any::Bool, allow_broadcasting::Bool)
+    args_ex = Any[]
+    kwargs_ex = false
+    if allow_broadcasting && ex_org.head === :. && ex_org.args[2] isa Expr
+        # handle broadcasting, but only handle number of arguments instead of
+        # argument types
         for _ in (ex_org.args[2]::Expr).args
             push!(args_ex, Any)
         end
     else
         for ex in funargs
-            val, found = get_type(ex, context_module)
-            push!(args_ex, val)
+            if isexpr(ex, :parameters)
+                if !isempty(ex.args)
+                    kwargs_ex = true
+                end
+            elseif isexpr(ex, :kw)
+                kwargs_ex = true
+            else
+                push!(args_ex, get_type(get_type(ex, context_module)..., default_any))
+            end
         end
     end
+    return args_ex, kwargs_ex
+end
 
-    out = Completion[]
-    t_in = Tuple{Core.Typeof(func), args_ex...} # Input types
-    na = length(args_ex)+1
-    ml = methods(func)
-    for method in ml
-        ms = method.sig
-
-        # Check if the method's type signature intersects the input types
-        if typeintersect(Base.rewrap_unionall(Tuple{(Base.unwrap_unionall(ms)::DataType).parameters[1 : min(na, end)]...}, ms), t_in) !== Union{}
-            push!(out, MethodCompletion(func, t_in, method))
-        end
+function complete_methods!(out::Vector{Completion}, @nospecialize(funct), args_ex::Vector{Any}, kwargs_ex::Bool, max_method_completions::Int)
+    # Input types and number of arguments
+    t_in = Tuple{funct, args_ex...}
+    m = Base._methods_by_ftype(t_in, nothing, max_method_completions, Base.get_world_counter(),
+        #=ambig=# true, Ref(typemin(UInt)), Ref(typemax(UInt)), Ptr{Int32}(C_NULL))
+    if m === false
+        push!(out, TextCompletion(sprint(Base.show_signature_function, funct) * "( too many methods to show )"))
+    end
+    m isa Vector || return
+    for match in m
+        # TODO: if kwargs_ex, filter out methods without kwargs?
+        push!(out, MethodCompletion(match.spec_types, match.method))
     end
-    return out
 end
 
 include("latex_symbols.jl")
@@ -508,6 +633,11 @@ const whitespace_chars = [" \t\n\r"...]
 # bslash_completions function to try and complete on escaped characters in strings
 const bslash_separators = [whitespace_chars..., "\"'`"...]
 
+const subscripts = Dict(k[3]=>v[1] for (k,v) in latex_symbols if startswith(k, "\\_") && length(k)==3)
+const subscript_regex = Regex("^\\\\_[" * join(isdigit(k) || isletter(k) ? "$k" : "\\$k" for k in keys(subscripts)) * "]+\\z")
+const superscripts = Dict(k[3]=>v[1] for (k,v) in latex_symbols if startswith(k, "\\^") && length(k)==3)
+const superscript_regex = Regex("^\\\\\\^[" * join(isdigit(k) || isletter(k) ? "$k" : "\\$k" for k in keys(superscripts)) * "]+\\z")
+
 # Aux function to detect whether we're right after a
 # using or import keyword
 function afterusing(string::String, startpos::Int)
@@ -521,6 +651,21 @@ function afterusing(string::String, startpos::Int)
     return occursin(r"^\b(using|import)\s*((\w+[.])*\w+\s*,\s*)*$", str[fr:end])
 end
 
+function close_path_completion(str, startpos, r, paths, pos)
+    length(paths) == 1 || return false  # Only close if there's a single choice...
+    _path = str[startpos:prevind(str, first(r))] * (paths[1]::PathCompletion).path
+    path = expanduser(replace(_path, r"\\ " => " "))
+    # ...except if it's a directory...
+    try
+        isdir(path)
+    catch e
+        e isa Base.IOError || rethrow() # `path` cannot be determined to be a file
+    end && return false
+    # ...and except if there's already a " at the cursor.
+    return lastindex(str) <= pos || str[nextind(str, pos)] != '"'
+end
+
+
 function bslash_completions(string::String, pos::Int)
     slashpos = something(findprev(isequal('\\'), string, pos), 0)
     if (something(findprev(in(bslash_separators), string, pos), 0) < slashpos &&
@@ -530,6 +675,12 @@ function bslash_completions(string::String, pos::Int)
         latex = get(latex_symbols, s, "")
         if !isempty(latex) # complete an exact match
             return (true, (Completion[BslashCompletion(latex)], slashpos:pos, true))
+        elseif occursin(subscript_regex, s)
+            sub = map(c -> subscripts[c], s[3:end])
+            return (true, (Completion[BslashCompletion(sub)], slashpos:pos, true))
+        elseif occursin(superscript_regex, s)
+            sup = map(c -> superscripts[c], s[3:end])
+            return (true, (Completion[BslashCompletion(sup)], slashpos:pos, true))
         end
         emoji = get(emoji_symbols, s, "")
         if !isempty(emoji)
@@ -547,7 +698,7 @@ function bslash_completions(string::String, pos::Int)
     return (false, (Completion[], 0:-1, false))
 end
 
-function dict_identifier_key(str::String, tag::Symbol, context_module::Module = Main)
+function dict_identifier_key(str::String, tag::Symbol, context_module::Module=Main)
     if tag === :string
         str_close = str*"\""
     elseif tag === :cmd
@@ -583,24 +734,55 @@ end
 
 function project_deps_get_completion_candidates(pkgstarts::String, project_file::String)
     loading_candidates = String[]
-    p = Base.TOML.Parser()
-    Base.TOML.reinit!(p, read(project_file, String); filepath=project_file)
-    d = Base.TOML.parse(p)
-    pkg = get(d, "name", nothing)
+    d = Base.parsed_toml(project_file)
+    pkg = get(d, "name", nothing)::Union{String, Nothing}
     if pkg !== nothing && startswith(pkg, pkgstarts)
         push!(loading_candidates, pkg)
     end
-    for (pkg, _) in get(d, "deps", [])
-        startswith(pkg, pkgstarts) && push!(loading_candidates, pkg)
+    deps = get(d, "deps", nothing)::Union{Dict{String, Any}, Nothing}
+    if deps !== nothing
+        for (pkg, _) in deps
+            startswith(pkg, pkgstarts) && push!(loading_candidates, pkg)
+        end
     end
     return Completion[PackageCompletion(name) for name in loading_candidates]
 end
 
-function completions(string::String, pos::Int, context_module::Module=Main)
+function completions(string::String, pos::Int, context_module::Module=Main, shift::Bool=true)
     # First parse everything up to the current position
     partial = string[1:pos]
     inc_tag = Base.incomplete_tag(Meta.parse(partial, raise=false, depwarn=false))
 
+    # ?(x, y)TAB lists methods you can call with these objects
+    # ?(x, y TAB lists methods that take these objects as the first two arguments
+    # MyModule.?(x, y)TAB restricts the search to names in MyModule
+    rexm = match(r"(\w+\.|)\?\((.*)$", partial)
+    if rexm !== nothing
+        # Get the module scope
+        if isempty(rexm.captures[1])
+            callee_module = context_module
+        else
+            modname = Symbol(rexm.captures[1][1:end-1])
+            if isdefined(context_module, modname)
+                callee_module = getfield(context_module, modname)
+                if !isa(callee_module, Module)
+                    callee_module = context_module
+                end
+            else
+                callee_module = context_module
+            end
+        end
+        moreargs = !endswith(rexm.captures[2], ')')
+        callstr = "_(" * rexm.captures[2]
+        if moreargs
+            callstr *= ')'
+        end
+        ex_org = Meta.parse(callstr, raise=false, depwarn=false)
+        if isa(ex_org, Expr)
+            return complete_any_methods(ex_org, callee_module::Module, context_module, moreargs, shift), (0:length(rexm.captures[1])+1) .+ rexm.offset, false
+        end
+    end
+
     # if completing a key in a Dict
     identifier, partial_key, loc = dict_identifier_key(partial, inc_tag, context_module)
     if identifier !== nothing
@@ -620,13 +802,8 @@ function completions(string::String, pos::Int, context_module::Module=Main)
 
         paths, r, success = complete_path(replace(string[r], r"\\ " => " "), pos)
 
-        if inc_tag === :string &&
-           length(paths) == 1 &&  # Only close if there's a single choice,
-           !isdir(expanduser(replace(string[startpos:prevind(string, first(r))] * paths[1].path,
-                                     r"\\ " => " "))) &&  # except if it's a directory
-           (lastindex(string) <= pos ||
-            string[nextind(string,pos)] != '"')  # or there's already a " at the cursor.
-            paths[1] = PathCompletion(paths[1].path * "\"")
+        if inc_tag === :string && close_path_completion(string, startpos, r, paths, pos)
+            paths[1] = PathCompletion((paths[1]::PathCompletion).path * "\"")
         end
 
         #Latex symbols can be completed for strings
@@ -710,19 +887,21 @@ function completions(string::String, pos::Int, context_module::Module=Main)
     dotpos < startpos && (dotpos = startpos - 1)
     s = string[startpos:pos]
     comp_keywords && append!(suggestions, complete_keyword(s))
-    # The case where dot and start pos is equal could look like: "(""*"").d","". or  CompletionFoo.test_y_array[1].y
-    # This case can be handled by finding the beginning of the expression. This is done below.
-    if dotpos == startpos
+    # if the start of the string is a `.`, try to consume more input to get back to the beginning of the last expression
+    if 0 < startpos <= lastindex(string) && string[startpos] == '.'
         i = prevind(string, startpos)
         while 0 < i
             c = string[i]
-            if c in [')', ']']
-                if c==')'
-                    c_start='('; c_end=')'
-                elseif c==']'
-                    c_start='['; c_end=']'
+            if c in (')', ']')
+                if c == ')'
+                    c_start = '('
+                    c_end = ')'
+                elseif c == ']'
+                    c_start = '['
+                    c_end = ']'
                 end
                 frange, end_of_identifier = find_start_brace(string[1:prevind(string, i)], c_start=c_start, c_end=c_end)
+                isempty(frange) && break # unbalanced parens
                 startpos = first(frange)
                 i = prevind(string, startpos)
             elseif c in ('\'', '\"', '\`')
@@ -774,4 +953,24 @@ function shell_completions(string, pos)
     return Completion[], 0:-1, false
 end
 
+function UndefVarError_hint(io::IO, ex::UndefVarError)
+    var = ex.var
+    if var === :or
+        print(io, "\nsuggestion: Use `||` for short-circuiting boolean OR.")
+    elseif var === :and
+        print(io, "\nsuggestion: Use `&&` for short-circuiting boolean AND.")
+    elseif var === :help
+        println(io)
+        # Show friendly help message when user types help or help() and help is undefined
+        show(io, MIME("text/plain"), Base.Docs.parsedoc(Base.Docs.keywords[:help]))
+    elseif var === :quit
+        print(io, "\nsuggestion: To exit Julia, use Ctrl-D, or type exit() and press enter.")
+    end
+end
+
+function __init__()
+    Base.Experimental.register_error_hint(UndefVarError_hint, UndefVarError)
+    nothing
+end
+
 end # module
diff --git a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
index c6fc1989b1c39a..127d0cd88a2cfa 100644
--- a/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/AbstractMenu.jl
@@ -64,6 +64,8 @@ end
 # TODO Julia2.0: get rid of parametric intermediate, making it just
 #   abstract type ConfiguredMenu <: AbstractMenu end
 # Or perhaps just make all menus ConfiguredMenus
+# Also consider making `cursor` a mandatory field in the Menu structs
+# instead of going via the RefValue in `request`.
 abstract type _ConfiguredMenu{C} <: AbstractMenu end
 const ConfiguredMenu = _ConfiguredMenu{<:AbstractConfig}
 
@@ -147,6 +149,9 @@ keypress(m::AbstractMenu, i::UInt32) = false
     numoptions(m::AbstractMenu) -> Int
 
 Return the number of options in menu `m`. Defaults to `length(options(m))`.
+
+!!! compat "Julia 1.6"
+    This function requires Julia 1.6 or later.
 """
 numoptions(m::AbstractMenu) = length(options(m))
 
@@ -162,26 +167,32 @@ selected(m::AbstractMenu) = m.selected
     request(m::AbstractMenu; cursor=1)
 
 Display the menu and enter interactive mode. `cursor` indicates the item
-number used for the initial cursor position.
+number used for the initial cursor position. `cursor` can be either an
+`Int` or a `RefValue{Int}`. The latter is useful for observation and
+control of the cursor position from the outside.
 
 Returns `selected(m)`.
+
+!!! compat "Julia 1.6"
+    The `cursor` argument requires Julia 1.6 or later.
 """
 request(m::AbstractMenu; kwargs...) = request(terminal, m; kwargs...)
 
-function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Int=1, suppress_output=false)
-    menu_header = header(m)
-    !suppress_output && !isempty(menu_header) && println(term.out_stream, menu_header)
+function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Union{Int, Base.RefValue{Int}}=1, suppress_output=false)
+    if cursor isa Int
+        cursor = Ref(cursor)
+    end
 
     state = nothing
     if !suppress_output
-        state = printmenu(term.out_stream, m, cursor, init=true)
+        state = printmenu(term.out_stream, m, cursor[], init=true)
     end
 
     raw_mode_enabled = try
         REPL.Terminals.raw!(term, true)
         true
     catch err
-        @warn("TerminalMenus: Unable to enter raw mode: $err")
+        suppress_output || @warn "TerminalMenus: Unable to enter raw mode: " exception=(err, catch_backtrace())
         false
     end
     # hide the cursor
@@ -193,22 +204,22 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Int=
             c = readkey(term.in_stream)
 
             if c == Int(ARROW_UP)
-                cursor = move_up!(m, cursor, lastoption)
+                cursor[] = move_up!(m, cursor[], lastoption)
             elseif c == Int(ARROW_DOWN)
-                cursor = move_down!(m, cursor, lastoption)
+                cursor[] = move_down!(m, cursor[], lastoption)
             elseif c == Int(PAGE_UP)
-                cursor = page_up!(m, cursor, lastoption)
+                cursor[] = page_up!(m, cursor[], lastoption)
             elseif c == Int(PAGE_DOWN)
-                cursor = page_down!(m, cursor, lastoption)
+                cursor[] = page_down!(m, cursor[], lastoption)
             elseif c == Int(HOME_KEY)
-                cursor = 1
+                cursor[] = 1
                 m.pageoffset = 0
             elseif c == Int(END_KEY)
-                cursor = lastoption
+                cursor[] = lastoption
                 m.pageoffset = lastoption - m.pagesize
             elseif c == 13 # <enter>
                 # will break if pick returns true
-                pick(m, cursor) && break
+                pick(m, cursor[]) && break
             elseif c == UInt32('q')
                 cancel(m)
                 break
@@ -221,7 +232,7 @@ function request(term::REPL.Terminals.TTYTerminal, m::AbstractMenu; cursor::Int=
             end
 
             if !suppress_output
-                state = printmenu(term.out_stream, m, cursor, oldstate=state)
+                state = printmenu(term.out_stream, m, cursor[], oldstate=state)
             end
         end
     finally # always disable raw mode
@@ -295,8 +306,8 @@ end
 """
     printmenu(out, m::AbstractMenu, cursoridx::Int; init::Bool=false, oldstate=nothing) -> newstate
 
-Display the state of a menu. `init=true` causes `m.pageoffset` to be initialized to zero,
-and starts printing at the current cursor location; when `init` is false, the terminal will
+Display the state of a menu. `init=true` causes `m.pageoffset` to be initialized to start printing at
+or just above the current cursor location; when `init` is false, the terminal will
 preserve the current setting of `m.pageoffset` and overwrite the previous display.
 Returns `newstate`, which can be passed in as `oldstate` on the next call to allow accurate
 overwriting of the previous display.
@@ -314,9 +325,23 @@ function printmenu(out::IO, m::AbstractMenu, cursoridx::Int; oldstate=nothing, i
     ncleared = oldstate === nothing ? m.pagesize-1 : oldstate
 
     if init
-        m.pageoffset = 0
+        # like clamp, except this takes the min if max < min
+        m.pageoffset = max(0, min(cursoridx - m.pagesize ÷ 2, lastoption - m.pagesize))
     else
-        print(buf, "\x1b[999D\x1b[$(ncleared)A")   # move left 999 spaces and up `ncleared` lines
+        print(buf, "\r")
+        if ncleared > 0
+            # Move up `ncleared` lines. However, moving up zero lines
+            # is interpreted as one line, so need to do this
+            # conditionally. (More specifically, the `0` value means
+            # to use the default, and for move up this is one.)
+            print(buf, "\x1b[$(ncleared)A")
+        end
+    end
+
+    nheaderlines = 0
+    for headerline in split(header(m), "\n", keepempty=false)
+        print(buf, "\x1b[2K", headerline, "\r\n")
+        nheaderlines += 1
     end
 
     firstline = m.pageoffset+1
@@ -342,10 +367,11 @@ function printmenu(out::IO, m::AbstractMenu, cursoridx::Int; oldstate=nothing, i
         printcursor(buf, m, i == cursoridx)
         writeline(buf, m, i, i == cursoridx)
 
-        (firstline == lastline || i != lastline) && print(buf, "\r\n")
+        (i != lastline) && print(buf, "\r\n")
     end
 
-    newstate = lastline-firstline  # final line doesn't have `\n`
+    newstate = nheaderlines + lastline - firstline  # final line doesn't have `\n`
+
     if newstate < ncleared && oldstate !== nothing
         # we printed fewer lines than last time. Erase the leftovers.
         for i = newstate+1:ncleared
@@ -362,27 +388,27 @@ end
 scroll_wrap(m::ConfiguredMenu) = scroll_wrap(m.config)
 scroll_wrap(c::AbstractConfig) = scroll_wrap(c.config)
 scroll_wrap(c::Config) = c.scroll_wrap
-scroll_wrap(::AbstractMenu) = CONFIG[:scroll_wrap]
+scroll_wrap(::AbstractMenu) = CONFIG[:scroll_wrap]::Bool
 
 ctrl_c_interrupt(m::ConfiguredMenu) = ctrl_c_interrupt(m.config)
 ctrl_c_interrupt(c::AbstractConfig) = ctrl_c_interrupt(c.config)
 ctrl_c_interrupt(c::Config) = c.ctrl_c_interrupt
-ctrl_c_interrupt(::AbstractMenu) = CONFIG[:ctrl_c_interrupt]
+ctrl_c_interrupt(::AbstractMenu) = CONFIG[:ctrl_c_interrupt]::Bool
 
 up_arrow(m::ConfiguredMenu) = up_arrow(m.config)
 up_arrow(c::AbstractConfig) = up_arrow(c.config)
 up_arrow(c::Config) = c.up_arrow
-up_arrow(::AbstractMenu) = CONFIG[:up_arrow]
+up_arrow(::AbstractMenu) = CONFIG[:up_arrow]::Char
 
 down_arrow(m::ConfiguredMenu) = down_arrow(m.config)
 down_arrow(c::AbstractConfig) = down_arrow(c.config)
 down_arrow(c::Config) = c.down_arrow
-down_arrow(::AbstractMenu) = CONFIG[:down_arrow]
+down_arrow(::AbstractMenu) = CONFIG[:down_arrow]::Char
 
 updown_arrow(m::ConfiguredMenu) = updown_arrow(m.config)
 updown_arrow(c::AbstractConfig) = updown_arrow(c.config)
 updown_arrow(c::Config) = c.updown_arrow
-updown_arrow(::AbstractMenu) = CONFIG[:updown_arrow]
+updown_arrow(::AbstractMenu) = CONFIG[:updown_arrow]::Char
 
 printcursor(buf, m::ConfiguredMenu, iscursor::Bool) = print(buf, iscursor ? cursor(m.config) : ' ', ' ')
 cursor(c::AbstractConfig) = cursor(c.config)
diff --git a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
index b68255c3ecef29..5c3ecf3808c496 100644
--- a/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/MultiSelectMenu.jl
@@ -8,10 +8,10 @@ A menu that allows a user to select a multiple options from a list.
 
 # Sample Output
 
-```julia
+```julia-repl
 julia> request(MultiSelectMenu(options))
 Select the fruits you like:
-[press: d=done, a=all, n=none]
+[press: Enter=toggle, a=all, n=none, d=done, q=abort]
    [ ] apple
  > [X] orange
    [X] grape
@@ -51,6 +51,9 @@ were selected by the user.
   - `selected=[]`: pre-selected items. `i ∈ selected` means that `options[i]` is preselected.
 
 Any additional keyword arguments will be passed to [`TerminalMenus.MultiSelectConfig`](@ref).
+
+!!! compat "Julia 1.6"
+    The `selected` argument requires Julia 1.6 or later.
 """
 function MultiSelectMenu(options::Array{String,1}; pagesize::Int=10, selected=Int[], warn::Bool=true, kwargs...)
     length(options) < 1 && error("MultiSelectMenu must have at least one option")
@@ -59,7 +62,7 @@ function MultiSelectMenu(options::Array{String,1}; pagesize::Int=10, selected=In
     pagesize = pagesize == -1 ? length(options) : pagesize
     # pagesize shouldn't be bigger than options
     pagesize = min(length(options), pagesize)
-    # after other checks, pagesize must be greater than 2
+    # after other checks, pagesize must be at least 1
     pagesize < 1 && error("pagesize must be >= 1")
 
     pageoffset = 0
@@ -83,7 +86,7 @@ end
 # See AbstractMenu.jl
 #######################################
 
-header(m::MultiSelectMenu) = "[press: d=done, a=all, n=none]"
+header(m::MultiSelectMenu) = "[press: Enter=toggle, a=all, n=none, d=done, q=abort]"
 
 options(m::MultiSelectMenu) = m.options
 
diff --git a/stdlib/REPL/src/TerminalMenus/Pager.jl b/stdlib/REPL/src/TerminalMenus/Pager.jl
new file mode 100644
index 00000000000000..c823a5dedd1ba7
--- /dev/null
+++ b/stdlib/REPL/src/TerminalMenus/Pager.jl
@@ -0,0 +1,42 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+mutable struct Pager{C} <: _ConfiguredMenu{C}
+    lines::Vector{String}
+    pagesize::Int
+    pageoffset::Int
+    selected::Nothing
+    config::C
+end
+
+function Pager(text::AbstractString; pagesize::Int=10, kwargs...)
+    lines = readlines(IOBuffer(text))
+    return Pager(lines, pagesize, 0, nothing, Config(; kwargs...))
+end
+
+function header(p::Pager)
+    total = length(p.lines)
+    current = min(p.pageoffset + p.pagesize, total)
+    percent = round(Int, (current / total) * 100)
+    return "($(lpad(current, ndigits(total))) / $total) $(lpad(percent, 3))%"
+end
+
+options(p::Pager) = p.lines
+
+cancel(::Pager) = nothing
+
+pick(::Pager, ::Int) = true
+
+function writeline(buf::IOBuffer, pager::Pager{Config}, idx::Int, iscursor::Bool)
+    print(buf, pager.lines[idx])
+end
+
+function pager(terminal, object)
+    lines, columns = displaysize(terminal)::Tuple{Int,Int}
+    columns -= 3
+    buffer = IOBuffer()
+    ctx = IOContext(buffer, :color => REPL.Terminals.hascolor(terminal), :displaysize => (lines, columns))
+    show(ctx, "text/plain", object)
+    pager = Pager(String(take!(buffer)); pagesize = div(lines, 2))
+    return request(terminal, pager)
+end
+pager(object) = pager(terminal, object)
diff --git a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
index c8bdc557377b99..32a6373b719d70 100644
--- a/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
+++ b/stdlib/REPL/src/TerminalMenus/RadioMenu.jl
@@ -8,7 +8,7 @@ A menu that allows a user to select a single option from a list.
 
 # Sample Output
 
-```julia
+```julia-repl
 julia> request(RadioMenu(options, pagesize=4))
 Choose your favorite fruit:
 ^  grape
@@ -21,6 +21,7 @@ Your favorite fruit is blueberry!
 """
 mutable struct RadioMenu{C} <: _ConfiguredMenu{C}
     options::Array{String,1}
+    keybindings::Vector{Char}
     pagesize::Int
     pageoffset::Int
     selected::Int
@@ -30,7 +31,9 @@ end
 
 """
 
-    RadioMenu(options::Array{String,1}; pagesize::Int=10, kwargs...)
+    RadioMenu(options::Array{String,1}; pagesize::Int=10,
+                                        keybindings::Vector{Char}=Char[],
+                                        kwargs...)
 
 Create a RadioMenu object. Use `request(menu::RadioMenu)` to get user input.
 `request()` returns an `Int` which is the index of the option selected by the
@@ -40,11 +43,16 @@ user.
 
   - `options::Array{String, 1}`: Options to be displayed
   - `pagesize::Int=10`: The number of options to be displayed at one time, the menu will scroll if length(options) > pagesize
+  - `keybindings::Vector{Char}=Char[]`: Shortcuts to pick corresponding entry from `options`
 
 Any additional keyword arguments will be passed to [`TerminalMenus.Config`](@ref).
+
+!!! compat "Julia 1.8"
+    The `keybindings` argument requires Julia 1.8 or later.
 """
-function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true, kwargs...)
+function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true, keybindings::Vector{Char}=Char[], kwargs...)
     length(options) < 1 && error("RadioMenu must have at least one option")
+    length(keybindings) in [0, length(options)] || error("RadioMenu must have either no keybindings, or one per option")
 
     # if pagesize is -1, use automatic paging
     pagesize = pagesize == -1 ? length(options) : pagesize
@@ -57,10 +65,10 @@ function RadioMenu(options::Array{String,1}; pagesize::Int=10, warn::Bool=true,
     selected = -1 # none
 
     if !isempty(kwargs)
-        RadioMenu(options, pagesize, pageoffset, selected, Config(; kwargs...))
+        RadioMenu(options, keybindings, pagesize, pageoffset, selected, Config(; kwargs...))
     else
         warn && Base.depwarn("Legacy `RadioMenu` interface is deprecated, set a configuration option such as `RadioMenu(options; charset=:ascii)` to trigger the new interface.", :RadioMenu)
-        RadioMenu(options, pagesize, pageoffset, selected, CONFIG)
+        RadioMenu(options, keybindings, pagesize, pageoffset, selected, CONFIG)
     end
 end
 
@@ -83,6 +91,14 @@ function writeline(buf::IOBuffer, menu::RadioMenu{Config}, idx::Int, iscursor::B
     print(buf, replace(menu.options[idx], "\n" => "\\n"))
 end
 
+function keypress(m::RadioMenu, i::UInt32)
+    isempty(m.keybindings) && return false
+    i = findfirst(isequal(i), Int.(m.keybindings))
+    isnothing(i) && return false
+    m.selected = i
+    return true
+end
+
 # Legacy interface
 function writeLine(buf::IOBuffer, menu::RadioMenu{<:Dict}, idx::Int, cursor::Bool)
     # print a ">" on the selected entry
diff --git a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
index d9d3dc8598f7da..87869e84d98388 100644
--- a/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
+++ b/stdlib/REPL/src/TerminalMenus/TerminalMenus.jl
@@ -17,10 +17,12 @@ include("config.jl")
 include("AbstractMenu.jl")
 include("RadioMenu.jl")
 include("MultiSelectMenu.jl")
+include("Pager.jl")
 
 export
     RadioMenu,
     MultiSelectMenu,
+    Pager,
     request
 
 # TODO: remove in Julia 2.0
diff --git a/stdlib/REPL/src/TerminalMenus/config.jl b/stdlib/REPL/src/TerminalMenus/config.jl
index 9c8f001be6c5f0..8ff6fccb0b9bd6 100644
--- a/stdlib/REPL/src/TerminalMenus/config.jl
+++ b/stdlib/REPL/src/TerminalMenus/config.jl
@@ -39,7 +39,7 @@ Configure behavior for selection menus via keyword arguments:
 Subtypes of `ConfiguredMenu` will print `cursor`, `up_arrow`, and `down_arrow` automatically
 as needed, your `writeline` method should not print them.
 
-!!! compat Julia 1.6
+!!! compat "Julia 1.6"
     `Config` is available as of Julia 1.6. On older releases use the global `CONFIG`.
 """
 function Config(;
@@ -81,7 +81,7 @@ All other keyword arguments are as described for [`TerminalMenus.Config`](@ref).
 `checked` and `unchecked` are not printed automatically, and should be printed by
 your `writeline` method.
 
-!!! compat Julia 1.6
+!!! compat "Julia 1.6"
     `MultiSelectConfig` is available as of Julia 1.6. On older releases use the global `CONFIG`.
 """
 function MultiSelectConfig(;
@@ -109,7 +109,7 @@ end
 
 Global menu configuration parameters
 
-!!! compat Julia 1.6
+!!! compat "Julia 1.6"
     `CONFIG` is deprecated, instead configure menus via their constructors.
 """
 const CONFIG = Dict{Symbol,Union{Char,String,Bool}}()
@@ -130,7 +130,7 @@ Keyword-only function to configure global menu parameters
  - `supress_output::Bool=false`: Ignored legacy argument, pass `suppress_output` as a keyword argument to `request` instead.
  - `ctrl_c_interrupt::Bool=true`: If `false`, return empty on ^C, if `true` throw InterruptException() on ^C
 
-!!! compat Julia 1.6
+!!! compat "Julia 1.6"
     As of Julia 1.6, `config` is deprecated. Use `Config` or `MultiSelectConfig` instead.
 """
 function config(;charset::Symbol = :na,
diff --git a/stdlib/REPL/src/TerminalMenus/util.jl b/stdlib/REPL/src/TerminalMenus/util.jl
index 8ad9ec0e4100d3..91e336070d2cfe 100644
--- a/stdlib/REPL/src/TerminalMenus/util.jl
+++ b/stdlib/REPL/src/TerminalMenus/util.jl
@@ -17,24 +17,24 @@ readbyte(stream::IO=stdin) = read(stream, Char)
 # Read the next key from stdin. It is also able to read several bytes for
 #   escaped keys such as the arrow keys, home/end keys, etc.
 # Escaped keys are returned using the `Key` enum.
-readkey(stream::Base.LibuvStream=stdin) = UInt32(_readkey(stream))
-function _readkey(stream::Base.LibuvStream=stdin)
+readkey(stream::IO=stdin) = UInt32(_readkey(stream))
+function _readkey(stream::IO=stdin)
     c = readbyte(stream)
 
     # Escape characters
     if c == '\x1b'
-        stream.buffer.size < 2 && return '\x1b'
+        bytesavailable(stream) < 1 && return '\x1b'
         esc_a = readbyte(stream)
         esc_a == 'v' && return PAGE_UP  # M-v
         esc_a == '<' && return HOME_KEY # M-<
         esc_a == '>' && return END_KEY  # M->
 
-        stream.buffer.size < 3 && return '\x1b'
+        bytesavailable(stream) < 1 && return '\x1b'
         esc_b = readbyte(stream)
 
         if esc_a == '[' || esc_a == 'O'
             if esc_b >= '0' && esc_b <= '9'
-                stream.buffer.size < 4 && return '\x1b'
+                bytesavailable(stream) < 1 && return '\x1b'
                 esc_c = readbyte(stream)
                 if esc_c == '~'
                     esc_b == '1' && return HOME_KEY
diff --git a/stdlib/REPL/src/docview.jl b/stdlib/REPL/src/docview.jl
index 3d4dddc72176c6..fe55ea6b128af7 100644
--- a/stdlib/REPL/src/docview.jl
+++ b/stdlib/REPL/src/docview.jl
@@ -69,10 +69,11 @@ end
 _helpmode(line::AbstractString) = _helpmode(stdout, line)
 
 # Print vertical lines along each docstring if there are multiple docs
-function insert_hlines(io::IO, docs::Markdown.MD)
+function insert_hlines(io::IO, docs)
     if !isa(docs, Markdown.MD) || !haskey(docs.meta, :results) || isempty(docs.meta[:results])
         return docs
     end
+    docs = docs::Markdown.MD
     v = Any[]
     for (n, doc) in enumerate(docs.content)
         push!(v, doc)
@@ -128,13 +129,15 @@ end
 function _trimdocs(md::Markdown.MD, brief::Bool)
     content, trimmed = [], false
     for c in md.content
-        if isa(c, Markdown.Header{1}) && isa(c.text, AbstractArray) &&
-            !isempty(c.text) && isa(c.text[1], AbstractString) &&
-            lowercase(c.text[1]) ∈ ("extended help",
-                                    "extended documentation",
-                                    "extended docs")
-            trimmed = true
-            break
+        if isa(c, Markdown.Header{1}) && isa(c.text, AbstractArray) && !isempty(c.text)
+            item = c.text[1]
+            if isa(item, AbstractString) &&
+                lowercase(item) ∈ ("extended help",
+                                   "extended documentation",
+                                   "extended docs")
+                trimmed = true
+                break
+            end
         end
         c, trm = _trimdocs(c, brief)
         trimmed |= trm
@@ -216,12 +219,14 @@ function lookup_doc(ex)
     end
     if isa(ex, Symbol) && Base.isoperator(ex)
         str = string(ex)
-        if endswith(str, "=") && Base.operator_precedence(ex) == Base.prec_assignment
-            op = str[1:end-1]
-            return Markdown.parse("`x $op= y` is a synonym for `x = x $op y`")
-        elseif startswith(str, ".")
+        isdotted = startswith(str, ".")
+        if endswith(str, "=") && Base.operator_precedence(ex) == Base.prec_assignment && ex !== :(:=)
+            op = chop(str)
+            eq = isdotted ? ".=" : "="
+            return Markdown.parse("`x $op= y` is a synonym for `x $eq x $op y`")
+        elseif isdotted && ex !== :(..)
             op = str[2:end]
-            return Markdown.parse("`x $ex y` is equivalent to `broadcast($op, x, y)`. See [`broadcast`](@ref).")
+            return Markdown.parse("`x $ex y` is akin to `broadcast($op, x, y)`. See [`broadcast`](@ref).")
         end
     end
     binding = esc(bindingexpr(namify(ex)))
@@ -238,11 +243,14 @@ end
 
 function summarize(binding::Binding, sig)
     io = IOBuffer()
-    println(io, "No documentation found.\n")
     if defined(binding)
-        summarize(io, resolve(binding), binding)
+        binding_res = resolve(binding)
+        !isa(binding_res, Module) && println(io, "No documentation found.\n")
+        summarize(io, binding_res, binding)
     else
-        println(io, "Binding `", binding, "` does not exist.")
+        println(io, "No documentation found.\n")
+        quot = any(isspace, sprint(print, binding)) ? "'" : ""
+        println(io, "Binding ", quot, "`", binding, "`", quot, " does not exist.")
     end
     md = Markdown.parse(seekstart(io))
     # Save metadata in the generated markdown.
@@ -258,44 +266,96 @@ function summarize(io::IO, λ::Function, binding::Binding)
     println(io, "```\n", methods(λ), "\n```")
 end
 
-function summarize(io::IO, T::DataType, binding::Binding)
+function summarize(io::IO, TT::Type, binding::Binding)
     println(io, "# Summary")
-    println(io, "```")
-    println(io,
-            T.abstract ? "abstract type" :
-            T.mutable  ? "mutable struct" :
-            Base.isstructtype(T) ? "struct" : "primitive type",
-            " ", T, " <: ", supertype(T)
-            )
-    println(io, "```")
-    if !T.abstract && T.name !== Tuple.name && !isempty(fieldnames(T))
-        println(io, "# Fields")
+    T = Base.unwrap_unionall(TT)
+    if T isa DataType
         println(io, "```")
-        pad = maximum(length(string(f)) for f in fieldnames(T))
-        for (f, t) in zip(fieldnames(T), T.types)
-            println(io, rpad(f, pad), " :: ", t)
-        end
+        print(io,
+            Base.isabstracttype(T) ? "abstract type " :
+            Base.ismutabletype(T)  ? "mutable struct " :
+            Base.isstructtype(T) ? "struct " :
+            "primitive type ")
+        supert = supertype(T)
+        println(io, T)
         println(io, "```")
-    end
-    if !isempty(subtypes(T))
-        println(io, "# Subtypes")
-        println(io, "```")
-        for t in subtypes(T)
-            println(io, t)
+        if !Base.isabstracttype(T) && T.name !== Tuple.name && !isempty(fieldnames(T))
+            println(io, "# Fields")
+            println(io, "```")
+            pad = maximum(length(string(f)) for f in fieldnames(T))
+            for (f, t) in zip(fieldnames(T), fieldtypes(T))
+                println(io, rpad(f, pad), " :: ", t)
+            end
+            println(io, "```")
         end
-        println(io, "```")
-    end
-    if supertype(T) != Any
-        println(io, "# Supertype Hierarchy")
-        println(io, "```")
-        Base.show_supertypes(io, T)
-        println(io)
-        println(io, "```")
+        subt = subtypes(TT)
+        if !isempty(subt)
+            println(io, "# Subtypes")
+            println(io, "```")
+            for t in subt
+                println(io, Base.unwrap_unionall(t))
+            end
+            println(io, "```")
+        end
+        if supert != Any
+            println(io, "# Supertype Hierarchy")
+            println(io, "```")
+            Base.show_supertypes(io, T)
+            println(io)
+            println(io, "```")
+        end
+    elseif T isa Union
+        println(io, "`", binding, "` is of type `", typeof(TT), "`.\n")
+        println(io, "# Union Composed of Types")
+        for T1 in Base.uniontypes(T)
+            println(io, " - `", Base.rewrap_unionall(T1, TT), "`")
+        end
+    else # unreachable?
+        println(io, "`", binding, "` is of type `", typeof(TT), "`.\n")
     end
 end
 
-function summarize(io::IO, m::Module, binding::Binding)
-    println(io, "No docstring found for module `", m, "`.\n")
+function find_readme(m::Module)::Union{String, Nothing}
+    mpath = pathof(m)
+    isnothing(mpath) && return nothing
+    !isfile(mpath) && return nothing # modules in sysimage, where src files are omitted
+    path = dirname(mpath)
+    top_path = pkgdir(m)
+    while true
+        for file in readdir(path; join=true, sort=true)
+            isfile(file) && (basename(lowercase(file)) in ["readme.md", "readme"]) || continue
+            return file
+        end
+        path == top_path && break # go no further than pkgdir
+        path = dirname(path) # work up through nested modules
+    end
+    return nothing
+end
+function summarize(io::IO, m::Module, binding::Binding; nlines::Int = 200)
+    readme_path = find_readme(m)
+    if isnothing(readme_path)
+        println(io, "No docstring or readme file found for module `$m`.\n")
+    else
+        println(io, "No docstring found for module `$m`.")
+    end
+    exports = filter!(!=(nameof(m)), names(m))
+    if isempty(exports)
+        println(io, "Module does not export any names.")
+    else
+        println(io, "# Exported names")
+        print(io, "  `")
+        join(io, exports, "`, `")
+        println(io, "`\n")
+    end
+    if !isnothing(readme_path)
+        readme_lines = readlines(readme_path)
+        isempty(readme_lines) && return  # don't say we are going to print empty file
+        println(io, "# Displaying contents of readme found at `$(readme_path)`")
+        for line in first(readme_lines, nlines)
+            println(io, line)
+        end
+        length(readme_lines) > nlines && println(io, "\n[output truncated to first $nlines lines]")
+    end
 end
 
 function summarize(io::IO, @nospecialize(T), binding::Binding)
@@ -306,17 +366,23 @@ end
 
 # repl search and completions for help
 
+
+quote_spaces(x) = any(isspace, x) ? "'" * x * "'" : x
+
 function repl_search(io::IO, s::Union{Symbol,String})
     pre = "search:"
     print(io, pre)
-    printmatches(io, s, doc_completions(s), cols = _displaysize(io)[2] - length(pre))
+    printmatches(io, s, map(quote_spaces, doc_completions(s)), cols = _displaysize(io)[2] - length(pre))
     println(io, "\n")
 end
 repl_search(s) = repl_search(stdout, s)
 
 function repl_corrections(io::IO, s)
     print(io, "Couldn't find ")
-    printstyled(io, s, '\n', color=:cyan)
+    quot = any(isspace, s) ? "'" : ""
+    print(io, quot)
+    printstyled(io, s, color=:cyan)
+    print(io, quot, '\n')
     print_correction(io, s)
 end
 repl_corrections(s) = repl_corrections(stdout, s)
@@ -329,13 +395,25 @@ function symbol_latex(s::String)
                                         REPLCompletions.emoji_symbols))
             symbols_latex[v] = k
         end
+
+        # Overwrite with canonical mapping when a symbol has several completions (#39148)
+        merge!(symbols_latex, REPLCompletions.symbols_latex_canonical)
     end
+
     return get(symbols_latex, s, "")
 end
-function repl_latex(io::IO, s::String)
-    # decompose NFC-normalized identifier to match tab-completion input
-    s = normalize(s, :NFD)
-    latex = symbol_latex(s)
+function repl_latex(io::IO, s0::String)
+    # This has rampant `Core.Box` problems (#15276). Use the tricks of
+    # https://docs.julialang.org/en/v1/manual/performance-tips/#man-performance-captured
+    # We're changing some of the values so the `let` trick isn't applicable.
+    s::String = s0
+    latex::String = symbol_latex(s)
+    if isempty(latex)
+        # Decompose NFC-normalized identifier to match tab-completion
+        # input if the first search came up empty.
+        s = normalize(s, :NFD)
+        latex = symbol_latex(s)
+    end
     if !isempty(latex)
         print(io, "\"")
         printstyled(io, s, color=:cyan)
@@ -346,15 +424,36 @@ function repl_latex(io::IO, s::String)
         print(io, "\"")
         printstyled(io, s, color=:cyan)
         print(io, "\" can be typed by ")
+        state::Char = '\0'
         with_output_color(:cyan, io) do io
             for c in s
                 cstr = string(c)
                 if haskey(symbols_latex, cstr)
-                    print(io, symbols_latex[cstr], "<tab>")
+                    latex = symbols_latex[cstr]
+                    if length(latex) == 3 && latex[2] in ('^','_')
+                        # coalesce runs of sub/superscripts
+                        if state != latex[2]
+                            '\0' != state && print(io, "<tab>")
+                            print(io, latex[1:2])
+                            state = latex[2]
+                        end
+                        print(io, latex[3])
+                    else
+                        if '\0' != state
+                            print(io, "<tab>")
+                            state = '\0'
+                        end
+                        print(io, latex, "<tab>")
+                    end
                 else
+                    if '\0' != state
+                        print(io, "<tab>")
+                        state = '\0'
+                    end
                     print(io, c)
                 end
             end
+            '\0' != state && print(io, "<tab>")
         end
         println(io, '\n')
     end
@@ -491,8 +590,10 @@ function matchinds(needle, haystack; acronym::Bool = false)
     is = Int[]
     lastc = '\0'
     for (i, char) in enumerate(haystack)
+        while !isempty(chars) && isspace(first(chars))
+            popfirst!(chars) # skip spaces
+        end
         isempty(chars) && break
-        while chars[1] == ' ' popfirst!(chars) end # skip spaces
         if lowercase(char) == lowercase(chars[1]) &&
            (!acronym || !isletter(lastc))
             push!(is, i)
@@ -571,8 +672,6 @@ function printmatch(io::IO, word, match)
     end
 end
 
-printmatch(args...) = printfuzzy(stdout, args...)
-
 function printmatches(io::IO, word, matches; cols::Int = _displaysize(io)[2])
     total = 0
     for match in matches
@@ -599,7 +698,7 @@ end
 print_joined_cols(args...; cols::Int = _displaysize(stdout)[2]) = print_joined_cols(stdout, args...; cols=cols)
 
 function print_correction(io::IO, word::String)
-    cors = levsort(word, accessible(Main))
+    cors = map(quote_spaces, levsort(word, accessible(Main)))
     pre = "Perhaps you meant "
     print(io, pre)
     print_joined_cols(io, cors, ", ", " or "; cols = _displaysize(io)[2] - length(pre))
@@ -621,14 +720,26 @@ accessible(mod::Module) =
            map(names, moduleusings(mod))...;
            collect(keys(Base.Docs.keywords))] |> unique |> filtervalid
 
-doc_completions(name) = fuzzysort(name, accessible(Main))
+function doc_completions(name)
+    res = fuzzysort(name, accessible(Main))
+
+    # to insert an entry like `raw""` for `"@raw_str"` in `res`
+    ms = match.(r"^@(.*?)_str$", res)
+    idxs = findall(!isnothing, ms)
+
+    # avoid messing up the order while inserting
+    for i in reverse(idxs)
+        insert!(res, i, "$(only(ms[i].captures))\"\"")
+    end
+    res
+end
 doc_completions(name::Symbol) = doc_completions(string(name))
 
 
 # Searching and apropos
 
 # Docsearch simply returns true or false if an object contains the given needle
-docsearch(haystack::AbstractString, needle) = findfirst(needle, haystack) !== nothing
+docsearch(haystack::AbstractString, needle) = occursin(needle, haystack)
 docsearch(haystack::Symbol, needle) = docsearch(string(haystack), needle)
 docsearch(::Nothing, needle) = false
 function docsearch(haystack::Array, needle)
@@ -701,6 +812,11 @@ stripmd(x::Markdown.Table) =
 Search available docstrings for entries containing `pattern`.
 
 When `pattern` is a string, case is ignored. Results are printed to `io`.
+
+`apropos` can be called from the help mode in the REPL by wrapping the query in double quotes:
+```
+help?> "pattern"
+```
 """
 apropos(string) = apropos(stdout, string)
 apropos(io::IO, string) = apropos(io, Regex("\\Q$string", "i"))
diff --git a/stdlib/REPL/src/emoji_symbols.jl b/stdlib/REPL/src/emoji_symbols.jl
index 40f943cf246ddf..49a55c97f6564c 100644
--- a/stdlib/REPL/src/emoji_symbols.jl
+++ b/stdlib/REPL/src/emoji_symbols.jl
@@ -1,21 +1,37 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-#=
+#==
+using Pkg: @pkg_str
+pkg"activate --temp"
+pkg"add JSON@0.21"
+
 import JSON
-emojis = JSON.parsefile(download("https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json"))
 
-result = Dict()
-for emj in emojis
-    name = "\\:" * emj["short_name"] * ":"
-    unicode = emj["unified"]
-    if '-' in unicode
-        continue
+function emoji_data(url)
+    emojis = JSON.parsefile(download(url))
+    result = Dict()
+    for emj in emojis
+        name = "\\:" * emj["short_name"] * ":"
+        unicode = emj["unified"]
+        if '-' in unicode
+            continue
+        end
+        result[name] = "$(Char(parse(UInt32, unicode, base = 16)))"
     end
-    result[name] = "$(Char(parse(UInt32, unicode, base = 16)))"
+    return result
 end
 
-skeys = sort(collect(keys(result)))
+# We combine multiple versions as the data changes, and not only by growing.
+result = mapfoldr(emoji_data, merge, [
+    # Newer versions must be added to the bottom list as we want the newer versions to
+    # overwrite the old with names that changed but still keep old ones that were removed
+    "https://raw.githubusercontent.com/iamcal/emoji-data/0f0cf4ea8845eb52d26df2a48c3c31c3b8cad14e/emoji_pretty.json",
+    "https://raw.githubusercontent.com/iamcal/emoji-data/e512953312c012f6bd00e3f2ef6bf152ca3710f8/emoji_pretty.json",
+    ];
+    init=Dict()
+)
 
+skeys = sort(collect(keys(result)))
 open("emoji_symbols.jl", "w") do fh
     println(fh, "const emoji_symbols = Dict(")
     for key in skeys
@@ -34,14 +50,22 @@ const emoji_symbols = Dict(
     "\\:8ball:" => "🎱",
     "\\:a:" => "🅰",
     "\\:ab:" => "🆎",
+    "\\:abacus:" => "🧮",
     "\\:abc:" => "🔤",
     "\\:abcd:" => "🔡",
     "\\:accept:" => "🉑",
+    "\\:accordion:" => "🪗",
+    "\\:adhesive_bandage:" => "🩹",
+    "\\:adult:" => "🧑",
     "\\:aerial_tramway:" => "🚡",
     "\\:airplane:" => "✈",
+    "\\:airplane_arriving:" => "🛬",
+    "\\:airplane_departure:" => "🛫",
     "\\:alarm_clock:" => "⏰",
     "\\:alien:" => "👽",
     "\\:ambulance:" => "🚑",
+    "\\:amphora:" => "🏺",
+    "\\:anatomical_heart:" => "🫀",
     "\\:anchor:" => "⚓",
     "\\:angel:" => "👼",
     "\\:anger:" => "💢",
@@ -76,40 +100,59 @@ const emoji_symbols = Dict(
     "\\:astonished:" => "😲",
     "\\:athletic_shoe:" => "👟",
     "\\:atm:" => "🏧",
+    "\\:auto_rickshaw:" => "🛺",
+    "\\:avocado:" => "🥑",
+    "\\:axe:" => "🪓",
     "\\:b:" => "🅱",
     "\\:baby:" => "👶",
     "\\:baby_bottle:" => "🍼",
     "\\:baby_chick:" => "🐤",
     "\\:baby_symbol:" => "🚼",
     "\\:back:" => "🔙",
+    "\\:bacon:" => "🥓",
+    "\\:badger:" => "🦡",
+    "\\:badminton_racquet_and_shuttlecock:" => "🏸",
+    "\\:bagel:" => "🥯",
     "\\:baggage_claim:" => "🛄",
+    "\\:baguette_bread:" => "🥖",
+    "\\:ballet_shoes:" => "🩰",
     "\\:balloon:" => "🎈",
     "\\:ballot_box_with_check:" => "☑",
     "\\:bamboo:" => "🎍",
     "\\:banana:" => "🍌",
     "\\:bangbang:" => "‼",
+    "\\:banjo:" => "🪕",
     "\\:bank:" => "🏦",
     "\\:bar_chart:" => "📊",
     "\\:barber:" => "💈",
     "\\:baseball:" => "⚾",
+    "\\:basket:" => "🧺",
     "\\:basketball:" => "🏀",
+    "\\:bat:" => "🦇",
     "\\:bath:" => "🛀",
     "\\:bathtub:" => "🛁",
     "\\:battery:" => "🔋",
     "\\:bear:" => "🐻",
+    "\\:bearded_person:" => "🧔",
+    "\\:beaver:" => "🦫",
     "\\:bee:" => "🐝",
     "\\:beer:" => "🍺",
     "\\:beers:" => "🍻",
-    "\\:beetle:" => "🐞",
+    "\\:beetle:" => "🪲",
     "\\:beginner:" => "🔰",
     "\\:bell:" => "🔔",
+    "\\:bell_pepper:" => "🫑",
     "\\:bento:" => "🍱",
+    "\\:beverage_box:" => "🧃",
     "\\:bicyclist:" => "🚴",
     "\\:bike:" => "🚲",
     "\\:bikini:" => "👙",
+    "\\:billed_cap:" => "🧢",
     "\\:bird:" => "🐦",
     "\\:birthday:" => "🎂",
+    "\\:bison:" => "🦬",
     "\\:black_circle:" => "⚫",
+    "\\:black_heart:" => "🖤",
     "\\:black_joker:" => "🃏",
     "\\:black_large_square:" => "⬛",
     "\\:black_medium_small_square:" => "◾",
@@ -122,59 +165,88 @@ const emoji_symbols = Dict(
     "\\:blue_book:" => "📘",
     "\\:blue_car:" => "🚙",
     "\\:blue_heart:" => "💙",
+    "\\:blueberries:" => "🫐",
     "\\:blush:" => "😊",
     "\\:boar:" => "🐗",
     "\\:boat:" => "⛵",
     "\\:bomb:" => "💣",
+    "\\:bone:" => "🦴",
     "\\:book:" => "📖",
     "\\:bookmark:" => "🔖",
     "\\:bookmark_tabs:" => "📑",
     "\\:books:" => "📚",
     "\\:boom:" => "💥",
+    "\\:boomerang:" => "🪃",
     "\\:boot:" => "👢",
     "\\:bouquet:" => "💐",
     "\\:bow:" => "🙇",
+    "\\:bow_and_arrow:" => "🏹",
+    "\\:bowl_with_spoon:" => "🥣",
     "\\:bowling:" => "🎳",
+    "\\:boxing_glove:" => "🥊",
     "\\:boy:" => "👦",
+    "\\:brain:" => "🧠",
     "\\:bread:" => "🍞",
+    "\\:breast-feeding:" => "🤱",
+    "\\:bricks:" => "🧱",
     "\\:bride_with_veil:" => "👰",
     "\\:bridge_at_night:" => "🌉",
     "\\:briefcase:" => "💼",
+    "\\:briefs:" => "🩲",
+    "\\:broccoli:" => "🥦",
     "\\:broken_heart:" => "💔",
+    "\\:broom:" => "🧹",
+    "\\:brown_heart:" => "🤎",
+    "\\:bubble_tea:" => "🧋",
+    "\\:bucket:" => "🪣",
     "\\:bug:" => "🐛",
     "\\:bulb:" => "💡",
     "\\:bullettrain_front:" => "🚅",
     "\\:bullettrain_side:" => "🚄",
+    "\\:burrito:" => "🌯",
     "\\:bus:" => "🚌",
     "\\:busstop:" => "🚏",
     "\\:bust_in_silhouette:" => "👤",
     "\\:busts_in_silhouette:" => "👥",
+    "\\:butter:" => "🧈",
+    "\\:butterfly:" => "🦋",
     "\\:cactus:" => "🌵",
     "\\:cake:" => "🍰",
     "\\:calendar:" => "📆",
+    "\\:call_me_hand:" => "🤙",
     "\\:calling:" => "📲",
     "\\:camel:" => "🐫",
     "\\:camera:" => "📷",
+    "\\:camera_with_flash:" => "📸",
     "\\:cancer:" => "♋",
     "\\:candy:" => "🍬",
+    "\\:canned_food:" => "🥫",
+    "\\:canoe:" => "🛶",
     "\\:capital_abcd:" => "🔠",
     "\\:capricorn:" => "♑",
     "\\:car:" => "🚗",
     "\\:card_index:" => "📇",
     "\\:carousel_horse:" => "🎠",
+    "\\:carpentry_saw:" => "🪚",
+    "\\:carrot:" => "🥕",
     "\\:cat2:" => "🐈",
     "\\:cat:" => "🐱",
     "\\:cd:" => "💿",
+    "\\:chair:" => "🪑",
+    "\\:champagne:" => "🍾",
     "\\:chart:" => "💹",
     "\\:chart_with_downwards_trend:" => "📉",
     "\\:chart_with_upwards_trend:" => "📈",
     "\\:checkered_flag:" => "🏁",
+    "\\:cheese_wedge:" => "🧀",
     "\\:cherries:" => "🍒",
     "\\:cherry_blossom:" => "🌸",
     "\\:chestnut:" => "🌰",
     "\\:chicken:" => "🐔",
+    "\\:child:" => "🧒",
     "\\:children_crossing:" => "🚸",
     "\\:chocolate_bar:" => "🍫",
+    "\\:chopsticks:" => "🥢",
     "\\:christmas_tree:" => "🎄",
     "\\:church:" => "⛪",
     "\\:cinema:" => "🎦",
@@ -184,6 +256,7 @@ const emoji_symbols = Dict(
     "\\:cl:" => "🆑",
     "\\:clap:" => "👏",
     "\\:clapper:" => "🎬",
+    "\\:clinking_glasses:" => "🥂",
     "\\:clipboard:" => "📋",
     "\\:clock1030:" => "🕥",
     "\\:clock10:" => "🕙",
@@ -213,10 +286,17 @@ const emoji_symbols = Dict(
     "\\:closed_lock_with_key:" => "🔐",
     "\\:closed_umbrella:" => "🌂",
     "\\:cloud:" => "☁",
+    "\\:clown_face:" => "🤡",
     "\\:clubs:" => "♣",
+    "\\:coat:" => "🧥",
+    "\\:cockroach:" => "🪳",
     "\\:cocktail:" => "🍸",
+    "\\:coconut:" => "🥥",
     "\\:coffee:" => "☕",
+    "\\:coin:" => "🪙",
+    "\\:cold_face:" => "🥶",
     "\\:cold_sweat:" => "😰",
+    "\\:compass:" => "🧭",
     "\\:computer:" => "💻",
     "\\:confetti_ball:" => "🎊",
     "\\:confounded:" => "😖",
@@ -235,20 +315,30 @@ const emoji_symbols = Dict(
     "\\:couplekiss:" => "💏",
     "\\:cow2:" => "🐄",
     "\\:cow:" => "🐮",
+    "\\:crab:" => "🦀",
     "\\:credit_card:" => "💳",
     "\\:crescent_moon:" => "🌙",
+    "\\:cricket:" => "🦗",
+    "\\:cricket_bat_and_ball:" => "🏏",
     "\\:crocodile:" => "🐊",
+    "\\:croissant:" => "🥐",
+    "\\:crossed_fingers:" => "🤞",
     "\\:crossed_flags:" => "🎌",
     "\\:crown:" => "👑",
     "\\:cry:" => "😢",
     "\\:crying_cat_face:" => "😿",
     "\\:crystal_ball:" => "🔮",
+    "\\:cucumber:" => "🥒",
+    "\\:cup_with_straw:" => "🥤",
+    "\\:cupcake:" => "🧁",
     "\\:cupid:" => "💘",
+    "\\:curling_stone:" => "🥌",
     "\\:curly_loop:" => "➰",
     "\\:currency_exchange:" => "💱",
     "\\:curry:" => "🍛",
     "\\:custard:" => "🍮",
     "\\:customs:" => "🛃",
+    "\\:cut_of_meat:" => "🥩",
     "\\:cyclone:" => "🌀",
     "\\:dancer:" => "💃",
     "\\:dancers:" => "👯",
@@ -256,15 +346,22 @@ const emoji_symbols = Dict(
     "\\:dart:" => "🎯",
     "\\:dash:" => "💨",
     "\\:date:" => "📅",
+    "\\:deaf_person:" => "🧏",
     "\\:deciduous_tree:" => "🌳",
+    "\\:deer:" => "🦌",
     "\\:department_store:" => "🏬",
     "\\:diamond_shape_with_a_dot_inside:" => "💠",
     "\\:diamonds:" => "♦",
     "\\:disappointed:" => "😞",
     "\\:disappointed_relieved:" => "😥",
+    "\\:disguised_face:" => "🥸",
+    "\\:diving_mask:" => "🤿",
+    "\\:diya_lamp:" => "🪔",
     "\\:dizzy:" => "💫",
     "\\:dizzy_face:" => "😵",
+    "\\:dna:" => "🧬",
     "\\:do_not_litter:" => "🚯",
+    "\\:dodo:" => "🦤",
     "\\:dog2:" => "🐕",
     "\\:dog:" => "🐶",
     "\\:dollar:" => "💵",
@@ -276,20 +373,29 @@ const emoji_symbols = Dict(
     "\\:dragon_face:" => "🐲",
     "\\:dress:" => "👗",
     "\\:dromedary_camel:" => "🐪",
+    "\\:drooling_face:" => "🤤",
+    "\\:drop_of_blood:" => "🩸",
     "\\:droplet:" => "💧",
+    "\\:drum_with_drumsticks:" => "🥁",
+    "\\:duck:" => "🦆",
+    "\\:dumpling:" => "🥟",
     "\\:dvd:" => "📀",
     "\\:e-mail:" => "📧",
+    "\\:eagle:" => "🦅",
     "\\:ear:" => "👂",
     "\\:ear_of_rice:" => "🌾",
+    "\\:ear_with_hearing_aid:" => "🦻",
     "\\:earth_africa:" => "🌍",
     "\\:earth_americas:" => "🌎",
     "\\:earth_asia:" => "🌏",
-    "\\:egg:" => "🍳",
+    "\\:egg:" => "🥚",
     "\\:eggplant:" => "🍆",
     "\\:eight_pointed_black_star:" => "✴",
     "\\:eight_spoked_asterisk:" => "✳",
     "\\:electric_plug:" => "🔌",
     "\\:elephant:" => "🐘",
+    "\\:elevator:" => "🛗",
+    "\\:elf:" => "🧝",
     "\\:email:" => "✉",
     "\\:end:" => "🔚",
     "\\:envelope_with_arrow:" => "📩",
@@ -298,22 +404,41 @@ const emoji_symbols = Dict(
     "\\:european_post_office:" => "🏤",
     "\\:evergreen_tree:" => "🌲",
     "\\:exclamation:" => "❗",
+    "\\:exploding_head:" => "🤯",
     "\\:expressionless:" => "😑",
     "\\:eyeglasses:" => "👓",
     "\\:eyes:" => "👀",
+    "\\:face_palm:" => "🤦",
+    "\\:face_vomiting:" => "🤮",
+    "\\:face_with_cowboy_hat:" => "🤠",
+    "\\:face_with_hand_over_mouth:" => "🤭",
+    "\\:face_with_head_bandage:" => "🤕",
+    "\\:face_with_monocle:" => "🧐",
+    "\\:face_with_raised_eyebrow:" => "🤨",
+    "\\:face_with_rolling_eyes:" => "🙄",
+    "\\:face_with_symbols_on_mouth:" => "🤬",
+    "\\:face_with_thermometer:" => "🤒",
     "\\:facepunch:" => "👊",
     "\\:factory:" => "🏭",
+    "\\:fairy:" => "🧚",
+    "\\:falafel:" => "🧆",
     "\\:fallen_leaf:" => "🍂",
     "\\:family:" => "👪",
     "\\:fast_forward:" => "⏩",
     "\\:fax:" => "📠",
     "\\:fearful:" => "😨",
+    "\\:feather:" => "🪶",
     "\\:feet:" => "🐾",
+    "\\:fencer:" => "🤺",
     "\\:ferris_wheel:" => "🎡",
+    "\\:field_hockey_stick_and_ball:" => "🏑",
     "\\:file_folder:" => "📁",
     "\\:fire:" => "🔥",
     "\\:fire_engine:" => "🚒",
+    "\\:fire_extinguisher:" => "🧯",
+    "\\:firecracker:" => "🧨",
     "\\:fireworks:" => "🎆",
+    "\\:first_place_medal:" => "🥇",
     "\\:first_quarter_moon:" => "🌓",
     "\\:first_quarter_moon_with_face:" => "🌛",
     "\\:fish:" => "🐟",
@@ -321,17 +446,27 @@ const emoji_symbols = Dict(
     "\\:fishing_pole_and_fish:" => "🎣",
     "\\:fist:" => "✊",
     "\\:flags:" => "🎏",
+    "\\:flamingo:" => "🦩",
     "\\:flashlight:" => "🔦",
+    "\\:flatbread:" => "🫓",
     "\\:floppy_disk:" => "💾",
     "\\:flower_playing_cards:" => "🎴",
     "\\:flushed:" => "😳",
+    "\\:fly:" => "🪰",
+    "\\:flying_disc:" => "🥏",
+    "\\:flying_saucer:" => "🛸",
     "\\:foggy:" => "🌁",
+    "\\:fondue:" => "🫕",
+    "\\:foot:" => "🦶",
     "\\:football:" => "🏈",
     "\\:footprints:" => "👣",
     "\\:fork_and_knife:" => "🍴",
+    "\\:fortune_cookie:" => "🥠",
     "\\:fountain:" => "⛲",
     "\\:four_leaf_clover:" => "🍀",
+    "\\:fox_face:" => "🦊",
     "\\:free:" => "🆓",
+    "\\:fried_egg:" => "🍳",
     "\\:fried_shrimp:" => "🍤",
     "\\:fries:" => "🍟",
     "\\:frog:" => "🐸",
@@ -340,25 +475,35 @@ const emoji_symbols = Dict(
     "\\:full_moon:" => "🌕",
     "\\:full_moon_with_face:" => "🌝",
     "\\:game_die:" => "🎲",
+    "\\:garlic:" => "🧄",
     "\\:gem:" => "💎",
     "\\:gemini:" => "♊",
+    "\\:genie:" => "🧞",
     "\\:ghost:" => "👻",
     "\\:gift:" => "🎁",
     "\\:gift_heart:" => "💝",
+    "\\:giraffe_face:" => "🦒",
     "\\:girl:" => "👧",
+    "\\:glass_of_milk:" => "🥛",
     "\\:globe_with_meridians:" => "🌐",
+    "\\:gloves:" => "🧤",
+    "\\:goal_net:" => "🥅",
     "\\:goat:" => "🐐",
+    "\\:goggles:" => "🥽",
     "\\:golf:" => "⛳",
+    "\\:gorilla:" => "🦍",
     "\\:grapes:" => "🍇",
     "\\:green_apple:" => "🍏",
     "\\:green_book:" => "📗",
     "\\:green_heart:" => "💚",
+    "\\:green_salad:" => "🥗",
     "\\:grey_exclamation:" => "❕",
     "\\:grey_question:" => "❔",
     "\\:grimacing:" => "😬",
     "\\:grin:" => "😁",
     "\\:grinning:" => "😀",
     "\\:guardsman:" => "💂",
+    "\\:guide_dog:" => "🦮",
     "\\:guitar:" => "🎸",
     "\\:gun:" => "🔫",
     "\\:haircut:" => "💇",
@@ -367,10 +512,13 @@ const emoji_symbols = Dict(
     "\\:hamster:" => "🐹",
     "\\:hand:" => "✋",
     "\\:handbag:" => "👜",
+    "\\:handball:" => "🤾",
+    "\\:handshake:" => "🤝",
     "\\:hankey:" => "💩",
     "\\:hatched_chick:" => "🐥",
     "\\:hatching_chick:" => "🐣",
     "\\:headphones:" => "🎧",
+    "\\:headstone:" => "🪦",
     "\\:hear_no_evil:" => "🙉",
     "\\:heart:" => "❤",
     "\\:heart_decoration:" => "💟",
@@ -385,24 +533,36 @@ const emoji_symbols = Dict(
     "\\:heavy_minus_sign:" => "➖",
     "\\:heavy_multiplication_x:" => "✖",
     "\\:heavy_plus_sign:" => "➕",
+    "\\:hedgehog:" => "🦔",
     "\\:helicopter:" => "🚁",
     "\\:herb:" => "🌿",
     "\\:hibiscus:" => "🌺",
     "\\:high_brightness:" => "🔆",
     "\\:high_heel:" => "👠",
+    "\\:hiking_boot:" => "🥾",
+    "\\:hindu_temple:" => "🛕",
+    "\\:hippopotamus:" => "🦛",
     "\\:hocho:" => "🔪",
     "\\:honey_pot:" => "🍯",
+    "\\:hook:" => "🪝",
     "\\:horse:" => "🐴",
     "\\:horse_racing:" => "🏇",
     "\\:hospital:" => "🏥",
+    "\\:hot_face:" => "🥵",
+    "\\:hotdog:" => "🌭",
     "\\:hotel:" => "🏨",
     "\\:hotsprings:" => "♨",
     "\\:hourglass:" => "⌛",
     "\\:hourglass_flowing_sand:" => "⏳",
     "\\:house:" => "🏠",
     "\\:house_with_garden:" => "🏡",
+    "\\:hugging_face:" => "🤗",
     "\\:hushed:" => "😯",
+    "\\:hut:" => "🛖",
+    "\\:i_love_you_hand_sign:" => "🤟",
     "\\:ice_cream:" => "🍨",
+    "\\:ice_cube:" => "🧊",
+    "\\:ice_hockey_stick_and_puck:" => "🏒",
     "\\:icecream:" => "🍦",
     "\\:id:" => "🆔",
     "\\:ideograph_advantage:" => "🉐",
@@ -421,8 +581,12 @@ const emoji_symbols = Dict(
     "\\:japanese_goblin:" => "👺",
     "\\:japanese_ogre:" => "👹",
     "\\:jeans:" => "👖",
+    "\\:jigsaw:" => "🧩",
     "\\:joy:" => "😂",
     "\\:joy_cat:" => "😹",
+    "\\:juggling:" => "🤹",
+    "\\:kaaba:" => "🕋",
+    "\\:kangaroo:" => "🦘",
     "\\:key:" => "🔑",
     "\\:keycap_ten:" => "🔟",
     "\\:kimono:" => "👘",
@@ -432,78 +596,136 @@ const emoji_symbols = Dict(
     "\\:kissing_closed_eyes:" => "😚",
     "\\:kissing_heart:" => "😘",
     "\\:kissing_smiling_eyes:" => "😙",
+    "\\:kite:" => "🪁",
+    "\\:kiwifruit:" => "🥝",
+    "\\:kneeling_person:" => "🧎",
+    "\\:knot:" => "🪢",
     "\\:koala:" => "🐨",
     "\\:koko:" => "🈁",
+    "\\:lab_coat:" => "🥼",
+    "\\:lacrosse:" => "🥍",
+    "\\:ladder:" => "🪜",
+    "\\:ladybug:" => "🐞",
     "\\:large_blue_circle:" => "🔵",
     "\\:large_blue_diamond:" => "🔷",
+    "\\:large_blue_square:" => "🟦",
+    "\\:large_brown_circle:" => "🟤",
+    "\\:large_brown_square:" => "🟫",
+    "\\:large_green_circle:" => "🟢",
+    "\\:large_green_square:" => "🟩",
+    "\\:large_orange_circle:" => "🟠",
     "\\:large_orange_diamond:" => "🔶",
+    "\\:large_orange_square:" => "🟧",
+    "\\:large_purple_circle:" => "🟣",
+    "\\:large_purple_square:" => "🟪",
+    "\\:large_red_square:" => "🟥",
+    "\\:large_yellow_circle:" => "🟡",
+    "\\:large_yellow_square:" => "🟨",
     "\\:last_quarter_moon:" => "🌗",
     "\\:last_quarter_moon_with_face:" => "🌜",
     "\\:laughing:" => "😆",
+    "\\:leafy_green:" => "🥬",
     "\\:leaves:" => "🍃",
     "\\:ledger:" => "📒",
+    "\\:left-facing_fist:" => "🤛",
     "\\:left_luggage:" => "🛅",
     "\\:left_right_arrow:" => "↔",
     "\\:leftwards_arrow_with_hook:" => "↩",
+    "\\:leg:" => "🦵",
     "\\:lemon:" => "🍋",
     "\\:leo:" => "♌",
     "\\:leopard:" => "🐆",
     "\\:libra:" => "♎",
     "\\:light_rail:" => "🚈",
     "\\:link:" => "🔗",
+    "\\:lion_face:" => "🦁",
     "\\:lips:" => "👄",
     "\\:lipstick:" => "💄",
+    "\\:lizard:" => "🦎",
+    "\\:llama:" => "🦙",
+    "\\:lobster:" => "🦞",
     "\\:lock:" => "🔒",
     "\\:lock_with_ink_pen:" => "🔏",
     "\\:lollipop:" => "🍭",
+    "\\:long_drum:" => "🪘",
     "\\:loop:" => "➿",
+    "\\:lotion_bottle:" => "🧴",
     "\\:loud_sound:" => "🔊",
     "\\:loudspeaker:" => "📢",
     "\\:love_hotel:" => "🏩",
     "\\:love_letter:" => "💌",
     "\\:low_brightness:" => "🔅",
+    "\\:luggage:" => "🧳",
+    "\\:lungs:" => "🫁",
+    "\\:lying_face:" => "🤥",
     "\\:m:" => "Ⓜ",
     "\\:mag:" => "🔍",
     "\\:mag_right:" => "🔎",
+    "\\:mage:" => "🧙",
+    "\\:magic_wand:" => "🪄",
+    "\\:magnet:" => "🧲",
     "\\:mahjong:" => "🀄",
     "\\:mailbox:" => "📫",
     "\\:mailbox_closed:" => "📪",
     "\\:mailbox_with_mail:" => "📬",
     "\\:mailbox_with_no_mail:" => "📭",
+    "\\:mammoth:" => "🦣",
     "\\:man:" => "👨",
+    "\\:man_and_woman_holding_hands:" => "👫",
+    "\\:man_dancing:" => "🕺",
     "\\:man_with_gua_pi_mao:" => "👲",
     "\\:man_with_turban:" => "👳",
+    "\\:mango:" => "🥭",
     "\\:mans_shoe:" => "👞",
+    "\\:manual_wheelchair:" => "🦽",
     "\\:maple_leaf:" => "🍁",
+    "\\:martial_arts_uniform:" => "🥋",
     "\\:mask:" => "😷",
     "\\:massage:" => "💆",
+    "\\:mate_drink:" => "🧉",
     "\\:meat_on_bone:" => "🍖",
+    "\\:mechanical_arm:" => "🦾",
+    "\\:mechanical_leg:" => "🦿",
     "\\:mega:" => "📣",
     "\\:melon:" => "🍈",
     "\\:memo:" => "📝",
+    "\\:menorah_with_nine_branches:" => "🕎",
     "\\:mens:" => "🚹",
+    "\\:merperson:" => "🧜",
     "\\:metro:" => "🚇",
+    "\\:microbe:" => "🦠",
     "\\:microphone:" => "🎤",
     "\\:microscope:" => "🔬",
+    "\\:middle_finger:" => "🖕",
+    "\\:military_helmet:" => "🪖",
     "\\:milky_way:" => "🌌",
     "\\:minibus:" => "🚐",
     "\\:minidisc:" => "💽",
+    "\\:mirror:" => "🪞",
     "\\:mobile_phone_off:" => "📴",
+    "\\:money_mouth_face:" => "🤑",
     "\\:money_with_wings:" => "💸",
     "\\:moneybag:" => "💰",
     "\\:monkey:" => "🐒",
     "\\:monkey_face:" => "🐵",
     "\\:monorail:" => "🚝",
     "\\:moon:" => "🌔",
+    "\\:moon_cake:" => "🥮",
     "\\:mortar_board:" => "🎓",
+    "\\:mosque:" => "🕌",
+    "\\:mosquito:" => "🦟",
+    "\\:motor_scooter:" => "🛵",
+    "\\:motorized_wheelchair:" => "🦼",
     "\\:mount_fuji:" => "🗻",
     "\\:mountain_bicyclist:" => "🚵",
     "\\:mountain_cableway:" => "🚠",
     "\\:mountain_railway:" => "🚞",
     "\\:mouse2:" => "🐁",
     "\\:mouse:" => "🐭",
+    "\\:mouse_trap:" => "🪤",
     "\\:movie_camera:" => "🎥",
     "\\:moyai:" => "🗿",
+    "\\:mrs_claus:" => "🤶",
     "\\:muscle:" => "💪",
     "\\:mushroom:" => "🍄",
     "\\:musical_keyboard:" => "🎹",
@@ -512,8 +734,12 @@ const emoji_symbols = Dict(
     "\\:mute:" => "🔇",
     "\\:nail_care:" => "💅",
     "\\:name_badge:" => "📛",
+    "\\:nauseated_face:" => "🤢",
+    "\\:nazar_amulet:" => "🧿",
     "\\:necktie:" => "👔",
     "\\:negative_squared_cross_mark:" => "❎",
+    "\\:nerd_face:" => "🤓",
+    "\\:nesting_dolls:" => "🪆",
     "\\:neutral_face:" => "😐",
     "\\:new:" => "🆕",
     "\\:new_moon:" => "🌑",
@@ -521,6 +747,7 @@ const emoji_symbols = Dict(
     "\\:newspaper:" => "📰",
     "\\:ng:" => "🆖",
     "\\:night_with_stars:" => "🌃",
+    "\\:ninja:" => "🥷",
     "\\:no_bell:" => "🔕",
     "\\:no_bicycles:" => "🚳",
     "\\:no_entry:" => "⛔",
@@ -539,55 +766,89 @@ const emoji_symbols = Dict(
     "\\:o2:" => "🅾",
     "\\:o:" => "⭕",
     "\\:ocean:" => "🌊",
+    "\\:octagonal_sign:" => "🛑",
     "\\:octopus:" => "🐙",
     "\\:oden:" => "🍢",
     "\\:office:" => "🏢",
     "\\:ok:" => "🆗",
     "\\:ok_hand:" => "👌",
     "\\:ok_woman:" => "🙆",
+    "\\:older_adult:" => "🧓",
     "\\:older_man:" => "👴",
     "\\:older_woman:" => "👵",
+    "\\:olive:" => "🫒",
     "\\:on:" => "🔛",
     "\\:oncoming_automobile:" => "🚘",
     "\\:oncoming_bus:" => "🚍",
     "\\:oncoming_police_car:" => "🚔",
     "\\:oncoming_taxi:" => "🚖",
+    "\\:one-piece_swimsuit:" => "🩱",
+    "\\:onion:" => "🧅",
     "\\:open_file_folder:" => "📂",
     "\\:open_hands:" => "👐",
     "\\:open_mouth:" => "😮",
     "\\:ophiuchus:" => "⛎",
     "\\:orange_book:" => "📙",
+    "\\:orange_heart:" => "🧡",
+    "\\:orangutan:" => "🦧",
+    "\\:otter:" => "🦦",
     "\\:outbox_tray:" => "📤",
+    "\\:owl:" => "🦉",
     "\\:ox:" => "🐂",
+    "\\:oyster:" => "🦪",
     "\\:package:" => "📦",
     "\\:page_facing_up:" => "📄",
     "\\:page_with_curl:" => "📃",
     "\\:pager:" => "📟",
     "\\:palm_tree:" => "🌴",
+    "\\:palms_up_together:" => "🤲",
+    "\\:pancakes:" => "🥞",
     "\\:panda_face:" => "🐼",
     "\\:paperclip:" => "📎",
+    "\\:parachute:" => "🪂",
     "\\:parking:" => "🅿",
+    "\\:parrot:" => "🦜",
     "\\:part_alternation_mark:" => "〽",
     "\\:partly_sunny:" => "⛅",
+    "\\:partying_face:" => "🥳",
     "\\:passport_control:" => "🛂",
     "\\:peach:" => "🍑",
+    "\\:peacock:" => "🦚",
+    "\\:peanuts:" => "🥜",
     "\\:pear:" => "🍐",
     "\\:pencil2:" => "✏",
     "\\:penguin:" => "🐧",
     "\\:pensive:" => "😔",
+    "\\:people_hugging:" => "🫂",
     "\\:performing_arts:" => "🎭",
     "\\:persevere:" => "😣",
+    "\\:person_climbing:" => "🧗",
+    "\\:person_doing_cartwheel:" => "🤸",
     "\\:person_frowning:" => "🙍",
+    "\\:person_in_lotus_position:" => "🧘",
+    "\\:person_in_steamy_room:" => "🧖",
+    "\\:person_in_tuxedo:" => "🤵",
     "\\:person_with_blond_hair:" => "👱",
+    "\\:person_with_headscarf:" => "🧕",
     "\\:person_with_pouting_face:" => "🙎",
+    "\\:petri_dish:" => "🧫",
     "\\:phone:" => "☎",
+    "\\:pickup_truck:" => "🛻",
+    "\\:pie:" => "🥧",
     "\\:pig2:" => "🐖",
     "\\:pig:" => "🐷",
     "\\:pig_nose:" => "🐽",
     "\\:pill:" => "💊",
+    "\\:pinata:" => "🪅",
+    "\\:pinched_fingers:" => "🤌",
+    "\\:pinching_hand:" => "🤏",
     "\\:pineapple:" => "🍍",
     "\\:pisces:" => "♓",
     "\\:pizza:" => "🍕",
+    "\\:placard:" => "🪧",
+    "\\:place_of_worship:" => "🛐",
+    "\\:pleading_face:" => "🥺",
+    "\\:plunger:" => "🪠",
     "\\:point_down:" => "👇",
     "\\:point_left:" => "👈",
     "\\:point_right:" => "👉",
@@ -595,16 +856,24 @@ const emoji_symbols = Dict(
     "\\:point_up_2:" => "👆",
     "\\:police_car:" => "🚓",
     "\\:poodle:" => "🐩",
+    "\\:popcorn:" => "🍿",
     "\\:post_office:" => "🏣",
     "\\:postal_horn:" => "📯",
     "\\:postbox:" => "📮",
     "\\:potable_water:" => "🚰",
+    "\\:potato:" => "🥔",
+    "\\:potted_plant:" => "🪴",
     "\\:pouch:" => "👝",
     "\\:poultry_leg:" => "🍗",
     "\\:pound:" => "💷",
     "\\:pouting_cat:" => "😾",
     "\\:pray:" => "🙏",
+    "\\:prayer_beads:" => "📿",
+    "\\:pregnant_woman:" => "🤰",
+    "\\:pretzel:" => "🥨",
+    "\\:prince:" => "🤴",
     "\\:princess:" => "👸",
+    "\\:probing_cane:" => "🦯",
     "\\:purple_heart:" => "💜",
     "\\:purse:" => "👛",
     "\\:pushpin:" => "📌",
@@ -612,19 +881,24 @@ const emoji_symbols = Dict(
     "\\:question:" => "❓",
     "\\:rabbit2:" => "🐇",
     "\\:rabbit:" => "🐰",
+    "\\:raccoon:" => "🦝",
     "\\:racehorse:" => "🐎",
     "\\:radio:" => "📻",
     "\\:radio_button:" => "🔘",
     "\\:rage:" => "😡",
     "\\:railway_car:" => "🚃",
     "\\:rainbow:" => "🌈",
+    "\\:raised_back_of_hand:" => "🤚",
     "\\:raised_hands:" => "🙌",
     "\\:raising_hand:" => "🙋",
     "\\:ram:" => "🐏",
     "\\:ramen:" => "🍜",
     "\\:rat:" => "🐀",
+    "\\:razor:" => "🪒",
+    "\\:receipt:" => "🧾",
     "\\:recycle:" => "♻",
     "\\:red_circle:" => "🔴",
+    "\\:red_envelope:" => "🧧",
     "\\:registered:" => "®",
     "\\:relaxed:" => "☺",
     "\\:relieved:" => "😌",
@@ -633,14 +907,22 @@ const emoji_symbols = Dict(
     "\\:restroom:" => "🚻",
     "\\:revolving_hearts:" => "💞",
     "\\:rewind:" => "⏪",
+    "\\:rhinoceros:" => "🦏",
     "\\:ribbon:" => "🎀",
     "\\:rice:" => "🍚",
     "\\:rice_ball:" => "🍙",
     "\\:rice_cracker:" => "🍘",
     "\\:rice_scene:" => "🎑",
+    "\\:right-facing_fist:" => "🤜",
     "\\:ring:" => "💍",
+    "\\:ringed_planet:" => "🪐",
+    "\\:robot_face:" => "🤖",
+    "\\:rock:" => "🪨",
     "\\:rocket:" => "🚀",
+    "\\:roll_of_paper:" => "🧻",
     "\\:roller_coaster:" => "🎢",
+    "\\:roller_skate:" => "🛼",
+    "\\:rolling_on_the_floor_laughing:" => "🤣",
     "\\:rooster:" => "🐓",
     "\\:rose:" => "🌹",
     "\\:rotating_light:" => "🚨",
@@ -650,41 +932,70 @@ const emoji_symbols = Dict(
     "\\:runner:" => "🏃",
     "\\:running_shirt_with_sash:" => "🎽",
     "\\:sa:" => "🈂",
+    "\\:safety_pin:" => "🧷",
+    "\\:safety_vest:" => "🦺",
     "\\:sagittarius:" => "♐",
     "\\:sake:" => "🍶",
+    "\\:salt:" => "🧂",
     "\\:sandal:" => "👡",
+    "\\:sandwich:" => "🥪",
     "\\:santa:" => "🎅",
+    "\\:sari:" => "🥻",
     "\\:satellite:" => "📡",
+    "\\:satellite_antenna:" => "📡",
+    "\\:sauropod:" => "🦕",
     "\\:saxophone:" => "🎷",
+    "\\:scarf:" => "🧣",
     "\\:school:" => "🏫",
     "\\:school_satchel:" => "🎒",
     "\\:scissors:" => "✂",
+    "\\:scooter:" => "🛴",
+    "\\:scorpion:" => "🦂",
     "\\:scorpius:" => "♏",
     "\\:scream:" => "😱",
     "\\:scream_cat:" => "🙀",
+    "\\:screwdriver:" => "🪛",
     "\\:scroll:" => "📜",
+    "\\:seal:" => "🦭",
     "\\:seat:" => "💺",
+    "\\:second_place_medal:" => "🥈",
     "\\:secret:" => "㊙",
     "\\:see_no_evil:" => "🙈",
     "\\:seedling:" => "🌱",
+    "\\:selfie:" => "🤳",
+    "\\:sewing_needle:" => "🪡",
+    "\\:shallow_pan_of_food:" => "🥘",
+    "\\:shark:" => "🦈",
     "\\:shaved_ice:" => "🍧",
     "\\:sheep:" => "🐑",
     "\\:shell:" => "🐚",
     "\\:ship:" => "🚢",
     "\\:shirt:" => "👕",
+    "\\:shopping_trolley:" => "🛒",
+    "\\:shorts:" => "🩳",
     "\\:shower:" => "🚿",
+    "\\:shrimp:" => "🦐",
+    "\\:shrug:" => "🤷",
+    "\\:shushing_face:" => "🤫",
     "\\:signal_strength:" => "📶",
     "\\:six_pointed_star:" => "🔯",
+    "\\:skateboard:" => "🛹",
     "\\:ski:" => "🎿",
-    "\\:skin-tone-2:" => "\U1f3fb",
-    "\\:skin-tone-3:" => "\U1f3fc",
-    "\\:skin-tone-4:" => "\U1f3fd",
-    "\\:skin-tone-5:" => "\U1f3fe",
-    "\\:skin-tone-6:" => "\U1f3ff",
+    "\\:skin-tone-2:" => "🏻",
+    "\\:skin-tone-3:" => "🏼",
+    "\\:skin-tone-4:" => "🏽",
+    "\\:skin-tone-5:" => "🏾",
+    "\\:skin-tone-6:" => "🏿",
     "\\:skull:" => "💀",
+    "\\:skunk:" => "🦨",
+    "\\:sled:" => "🛷",
     "\\:sleeping:" => "😴",
+    "\\:sleeping_accommodation:" => "🛌",
     "\\:sleepy:" => "😪",
+    "\\:slightly_frowning_face:" => "🙁",
+    "\\:slightly_smiling_face:" => "🙂",
     "\\:slot_machine:" => "🎰",
+    "\\:sloth:" => "🦥",
     "\\:small_blue_diamond:" => "🔹",
     "\\:small_orange_diamond:" => "🔸",
     "\\:small_red_triangle:" => "🔺",
@@ -693,17 +1004,24 @@ const emoji_symbols = Dict(
     "\\:smile_cat:" => "😸",
     "\\:smiley:" => "😃",
     "\\:smiley_cat:" => "😺",
+    "\\:smiling_face_with_3_hearts:" => "🥰",
+    "\\:smiling_face_with_tear:" => "🥲",
     "\\:smiling_imp:" => "😈",
     "\\:smirk:" => "😏",
     "\\:smirk_cat:" => "😼",
     "\\:smoking:" => "🚬",
     "\\:snail:" => "🐌",
     "\\:snake:" => "🐍",
+    "\\:sneezing_face:" => "🤧",
     "\\:snowboarder:" => "🏂",
     "\\:snowflake:" => "❄",
     "\\:snowman:" => "⛄",
+    "\\:snowman_without_snow:" => "⛄",
+    "\\:soap:" => "🧼",
     "\\:sob:" => "😭",
     "\\:soccer:" => "⚽",
+    "\\:socks:" => "🧦",
+    "\\:softball:" => "🥎",
     "\\:soon:" => "🔜",
     "\\:sos:" => "🆘",
     "\\:sound:" => "🔉",
@@ -718,45 +1036,71 @@ const emoji_symbols = Dict(
     "\\:speaker:" => "🔈",
     "\\:speech_balloon:" => "💬",
     "\\:speedboat:" => "🚤",
+    "\\:spock-hand:" => "🖖",
+    "\\:sponge:" => "🧽",
+    "\\:spoon:" => "🥄",
+    "\\:sports_medal:" => "🏅",
+    "\\:squid:" => "🦑",
+    "\\:standing_person:" => "🧍",
+    "\\:star-struck:" => "🤩",
     "\\:star2:" => "🌟",
     "\\:star:" => "⭐",
     "\\:stars:" => "🌠",
     "\\:station:" => "🚉",
     "\\:statue_of_liberty:" => "🗽",
     "\\:steam_locomotive:" => "🚂",
+    "\\:stethoscope:" => "🩺",
     "\\:stew:" => "🍲",
     "\\:straight_ruler:" => "📏",
     "\\:strawberry:" => "🍓",
     "\\:stuck_out_tongue:" => "😛",
     "\\:stuck_out_tongue_closed_eyes:" => "😝",
     "\\:stuck_out_tongue_winking_eye:" => "😜",
+    "\\:stuffed_flatbread:" => "🥙",
     "\\:sun_with_face:" => "🌞",
     "\\:sunflower:" => "🌻",
     "\\:sunglasses:" => "😎",
     "\\:sunny:" => "☀",
     "\\:sunrise:" => "🌅",
     "\\:sunrise_over_mountains:" => "🌄",
+    "\\:superhero:" => "🦸",
+    "\\:supervillain:" => "🦹",
     "\\:surfer:" => "🏄",
     "\\:sushi:" => "🍣",
     "\\:suspension_railway:" => "🚟",
+    "\\:swan:" => "🦢",
     "\\:sweat:" => "😓",
     "\\:sweat_drops:" => "💦",
     "\\:sweat_smile:" => "😅",
     "\\:sweet_potato:" => "🍠",
     "\\:swimmer:" => "🏊",
     "\\:symbols:" => "🔣",
+    "\\:synagogue:" => "🕍",
     "\\:syringe:" => "💉",
+    "\\:t-rex:" => "🦖",
+    "\\:table_tennis_paddle_and_ball:" => "🏓",
+    "\\:taco:" => "🌮",
     "\\:tada:" => "🎉",
+    "\\:takeout_box:" => "🥡",
+    "\\:tamale:" => "🫔",
     "\\:tanabata_tree:" => "🎋",
     "\\:tangerine:" => "🍊",
     "\\:taurus:" => "♉",
     "\\:taxi:" => "🚕",
     "\\:tea:" => "🍵",
+    "\\:teapot:" => "🫖",
+    "\\:teddy_bear:" => "🧸",
     "\\:telephone_receiver:" => "📞",
     "\\:telescope:" => "🔭",
     "\\:tennis:" => "🎾",
     "\\:tent:" => "⛺",
+    "\\:test_tube:" => "🧪",
+    "\\:the_horns:" => "🤘",
+    "\\:thinking_face:" => "🤔",
+    "\\:third_place_medal:" => "🥉",
+    "\\:thong_sandal:" => "🩴",
     "\\:thought_balloon:" => "💭",
+    "\\:thread:" => "🧵",
     "\\:ticket:" => "🎫",
     "\\:tiger2:" => "🐅",
     "\\:tiger:" => "🐯",
@@ -766,6 +1110,9 @@ const emoji_symbols = Dict(
     "\\:tokyo_tower:" => "🗼",
     "\\:tomato:" => "🍅",
     "\\:tongue:" => "👅",
+    "\\:toolbox:" => "🧰",
+    "\\:tooth:" => "🦷",
+    "\\:toothbrush:" => "🪥",
     "\\:top:" => "🔝",
     "\\:tophat:" => "🎩",
     "\\:tractor:" => "🚜",
@@ -784,6 +1131,8 @@ const emoji_symbols = Dict(
     "\\:truck:" => "🚚",
     "\\:trumpet:" => "🎺",
     "\\:tulip:" => "🌷",
+    "\\:tumbler_glass:" => "🥃",
+    "\\:turkey:" => "🦃",
     "\\:turtle:" => "🐢",
     "\\:tv:" => "📺",
     "\\:twisted_rightwards_arrows:" => "🔀",
@@ -802,11 +1151,15 @@ const emoji_symbols = Dict(
     "\\:u7981:" => "🈲",
     "\\:u7a7a:" => "🈳",
     "\\:umbrella:" => "☔",
+    "\\:umbrella_with_rain_drops:" => "☔",
     "\\:unamused:" => "😒",
     "\\:underage:" => "🔞",
+    "\\:unicorn_face:" => "🦄",
     "\\:unlock:" => "🔓",
     "\\:up:" => "🆙",
+    "\\:upside_down_face:" => "🙃",
     "\\:v:" => "✌",
+    "\\:vampire:" => "🧛",
     "\\:vertical_traffic_light:" => "🚦",
     "\\:vhs:" => "📼",
     "\\:vibration_mode:" => "📳",
@@ -815,15 +1168,19 @@ const emoji_symbols = Dict(
     "\\:violin:" => "🎻",
     "\\:virgo:" => "♍",
     "\\:volcano:" => "🌋",
+    "\\:volleyball:" => "🏐",
     "\\:vs:" => "🆚",
+    "\\:waffle:" => "🧇",
     "\\:walking:" => "🚶",
     "\\:waning_crescent_moon:" => "🌘",
     "\\:waning_gibbous_moon:" => "🌖",
     "\\:warning:" => "⚠",
     "\\:watch:" => "⌚",
     "\\:water_buffalo:" => "🐃",
+    "\\:water_polo:" => "🤽",
     "\\:watermelon:" => "🍉",
     "\\:wave:" => "👋",
+    "\\:waving_black_flag:" => "🏴",
     "\\:wavy_dash:" => "〰",
     "\\:waxing_crescent_moon:" => "🌒",
     "\\:wc:" => "🚾",
@@ -835,25 +1192,40 @@ const emoji_symbols = Dict(
     "\\:white_check_mark:" => "✅",
     "\\:white_circle:" => "⚪",
     "\\:white_flower:" => "💮",
+    "\\:white_heart:" => "🤍",
     "\\:white_large_square:" => "⬜",
     "\\:white_medium_small_square:" => "◽",
     "\\:white_medium_square:" => "◻",
     "\\:white_small_square:" => "▫",
     "\\:white_square_button:" => "🔳",
+    "\\:wilted_flower:" => "🥀",
     "\\:wind_chime:" => "🎐",
+    "\\:window:" => "🪟",
     "\\:wine_glass:" => "🍷",
     "\\:wink:" => "😉",
     "\\:wolf:" => "🐺",
     "\\:woman:" => "👩",
     "\\:womans_clothes:" => "👚",
+    "\\:womans_flat_shoe:" => "🥿",
     "\\:womans_hat:" => "👒",
     "\\:womens:" => "🚺",
+    "\\:wood:" => "🪵",
+    "\\:woozy_face:" => "🥴",
+    "\\:worm:" => "🪱",
     "\\:worried:" => "😟",
     "\\:wrench:" => "🔧",
+    "\\:wrestlers:" => "🤼",
     "\\:x:" => "❌",
+    "\\:yarn:" => "🧶",
+    "\\:yawning_face:" => "🥱",
     "\\:yellow_heart:" => "💛",
     "\\:yen:" => "💴",
+    "\\:yo-yo:" => "🪀",
     "\\:yum:" => "😋",
+    "\\:zany_face:" => "🤪",
     "\\:zap:" => "⚡",
+    "\\:zebra_face:" => "🦓",
+    "\\:zipper_mouth_face:" => "🤐",
+    "\\:zombie:" => "🧟",
     "\\:zzz:" => "💤",
 )
diff --git a/stdlib/REPL/src/latex_symbols.jl b/stdlib/REPL/src/latex_symbols.jl
index 16ed9f5b529826..87a3c289661d96 100644
--- a/stdlib/REPL/src/latex_symbols.jl
+++ b/stdlib/REPL/src/latex_symbols.jl
@@ -76,6 +76,10 @@ end
 # Finally, we also add some symbols manually (at the top) as needed,
 # and edited others for consistency (e.g. #21646 and #14751).
 
+# When a symbol has several completions, a canonical reverse mapping is
+# specified at the bottom of this file. The complete reverse mapping is
+# generated lazily in docview.jl.
+
 # "font" prefixes
 const bold = "\\bf"
 const italic = "\\it"
@@ -106,6 +110,8 @@ const latex_symbols = Dict(
     "\\backpprime" => "‶",
     "\\backppprime" => "‷",
     "\\xor" => "⊻",
+    "\\nand" => "⊼",
+    "\\nor" => "⊽",
     "\\iff" => "⟺",
     "\\implies" => "⟹",
     "\\impliedby" => "⟸",
@@ -182,7 +188,7 @@ const latex_symbols = Dict(
     "\\^iota" => "ᶥ",
     "\\^phi" => "ᵠ",
     "\\^chi" => "ᵡ",
-    "\\^Phi" => "ᶲ",
+    "\\^ltphi" => "ᶲ",
     "\\^uparrow" => "ꜛ",
     "\\^downarrow" => "ꜜ",
     "\\^!" => "ꜝ",
@@ -416,8 +422,8 @@ const latex_symbols = Dict(
     "\\lq" => "‘",
     "\\rq" => "’",
     "\\reapos" => "‛",
-    "\\quotedblleft" => "“",
-    "\\quotedblright" => "”",
+    "\\ldq" => "“",
+    "\\rdq" => "”",
     "\\dagger" => "†",
     "\\ddagger" => "‡",
     "\\bullet" => "•",
@@ -590,6 +596,7 @@ const latex_symbols = Dict(
     "\\triangleq" => "≜",
     "\\questeq" => "≟",
     "\\ne" => "≠",
+    "\\neq" => "≠",
     "\\equiv" => "≡",
     "\\nequiv" => "≢",
     "\\le" => "≤",
@@ -665,8 +672,13 @@ const latex_symbols = Dict(
     "\\dashv" => "⊣",
     "\\top" => "⊤",
     "\\bot" => "⊥",
+    "\\Top" => "⫪",
+    "\\Bot" => "⫫",
+    "\\indep" => "⫫",
     "\\models" => "⊧",
     "\\vDash" => "⊨",
+    "\\downvDash" => "⫪",
+    "\\upvDash" => "⫫",
     "\\Vdash" => "⊩",
     "\\Vvdash" => "⊪",
     "\\VDash" => "⊫",
@@ -714,7 +726,6 @@ const latex_symbols = Dict(
     "\\gtreqless" => "⋛",
     "\\curlyeqprec" => "⋞",
     "\\curlyeqsucc" => "⋟",
-    "\\sqspne" => "⋥",
     "\\lnsim" => "⋦",
     "\\gnsim" => "⋧",
     "\\precnsim" => "⋨",
@@ -1115,6 +1126,7 @@ const latex_symbols = Dict(
     "\\nsqsubseteq" => "⋢",  # not, square subset, equals
     "\\nsqsupseteq" => "⋣",  # not, square superset, equals
     "\\sqsubsetneq" => "⋤",  # square subset, not equals
+    "\\sqsupsetneq" => "⋥",  # square superset, not equals
     "\\disin" => "⋲",  # element of with long horizontal stroke
     "\\varisins" => "⋳",  # element of with vertical bar at end of horizontal stroke
     "\\isins" => "⋴",  # small element of with vertical bar at end of horizontal stroke
@@ -1827,6 +1839,7 @@ const latex_symbols = Dict(
     frak*"E" => "𝔈",  # mathematical fraktur capital e
     frak*"F" => "𝔉",  # mathematical fraktur capital f
     frak*"G" => "𝔊",  # mathematical fraktur capital g
+    frak*"I" => "ℑ",  # black-letter capital i (manual addition)
     frak*"J" => "𝔍",  # mathematical fraktur capital j
     frak*"K" => "𝔎",  # mathematical fraktur capital k
     frak*"L" => "𝔏",  # mathematical fraktur capital l
@@ -1835,6 +1848,7 @@ const latex_symbols = Dict(
     frak*"O" => "𝔒",  # mathematical fraktur capital o
     frak*"P" => "𝔓",  # mathematical fraktur capital p
     frak*"Q" => "𝔔",  # mathematical fraktur capital q
+    frak*"R" => "ℜ",  # black-letter capital r (manual addition)
     frak*"S" => "𝔖",  # mathematical fraktur capital s
     frak*"T" => "𝔗",  # mathematical fraktur capital t
     frak*"U" => "𝔘",  # mathematical fraktur capital u
@@ -2257,7 +2271,7 @@ const latex_symbols = Dict(
     bold*"beta" => "𝛃",  # mathematical bold small beta
     bold*"gamma" => "𝛄",  # mathematical bold small gamma
     bold*"delta" => "𝛅",  # mathematical bold small delta
-    bold*"epsilon" => "𝛆",  # mathematical bold small epsilon
+    bold*"varepsilon" => "𝛆",  # mathematical bold small epsilon
     bold*"zeta" => "𝛇",  # mathematical bold small zeta
     bold*"eta" => "𝛈",  # mathematical bold small eta
     bold*"theta" => "𝛉",  # mathematical bold small theta
@@ -2279,7 +2293,7 @@ const latex_symbols = Dict(
     bold*"psi" => "𝛙",  # mathematical bold small psi
     bold*"omega" => "𝛚",  # mathematical bold small omega
     bold*"partial" => "𝛛",  # mathematical bold partial differential
-    bold*"varepsilon" => "𝛜",  # mathematical bold epsilon symbol
+    bold*"epsilon" => "𝛜",  # mathematical bold epsilon symbol
     bold*"vartheta" => "𝛝",  # mathematical bold theta symbol
     bold*"varkappa" => "𝛞",  # mathematical bold kappa symbol
     bold*"phi" => "𝛟",  # mathematical bold phi symbol
@@ -2315,7 +2329,7 @@ const latex_symbols = Dict(
     italic*"beta" => "𝛽",  # mathematical italic small beta
     italic*"gamma" => "𝛾",  # mathematical italic small gamma
     italic*"delta" => "𝛿",  # mathematical italic small delta
-    italic*"epsilon" => "𝜀",  # mathematical italic small epsilon
+    italic*"varepsilon" => "𝜀",  # mathematical italic small epsilon
     italic*"zeta" => "𝜁",  # mathematical italic small zeta
     italic*"eta" => "𝜂",  # mathematical italic small eta
     italic*"theta" => "𝜃",  # mathematical italic small theta
@@ -2332,15 +2346,15 @@ const latex_symbols = Dict(
     italic*"sigma" => "𝜎",  # mathematical italic small sigma
     italic*"tau" => "𝜏",  # mathematical italic small tau
     italic*"upsilon" => "𝜐",  # mathematical italic small upsilon
-    italic*"phi" => "𝜑",  # mathematical italic small phi
+    italic*"varphi" => "𝜑",  # mathematical italic small phi
     italic*"chi" => "𝜒",  # mathematical italic small chi
     italic*"psi" => "𝜓",  # mathematical italic small psi
     italic*"omega" => "𝜔",  # mathematical italic small omega
     italic*"partial" => "𝜕",  # mathematical italic partial differential
-    italic*"varepsilon" => "𝜖",  # mathematical italic epsilon symbol
+    italic*"epsilon" => "𝜖",  # mathematical italic epsilon symbol
     italic*"vartheta" => "𝜗",  # mathematical italic theta symbol
     italic*"varkappa" => "𝜘",  # mathematical italic kappa symbol
-    italic*"varphi" => "𝜙",  # mathematical italic phi symbol
+    italic*"phi" => "𝜙",  # mathematical italic phi symbol
     italic*"varrho" => "𝜚",  # mathematical italic rho symbol
     italic*"varpi" => "𝜛",  # mathematical italic pi symbol
     bolditalic*"Alpha" => "𝜜",  # mathematical bold italic capital alpha
@@ -2373,7 +2387,7 @@ const latex_symbols = Dict(
     bolditalic*"beta" => "𝜷",  # mathematical bold italic small beta
     bolditalic*"gamma" => "𝜸",  # mathematical bold italic small gamma
     bolditalic*"delta" => "𝜹",  # mathematical bold italic small delta
-    bolditalic*"epsilon" => "𝜺",  # mathematical bold italic small epsilon
+    bolditalic*"varepsilon" => "𝜺",  # mathematical bold italic small epsilon
     bolditalic*"zeta" => "𝜻",  # mathematical bold italic small zeta
     bolditalic*"eta" => "𝜼",  # mathematical bold italic small eta
     bolditalic*"theta" => "𝜽",  # mathematical bold italic small theta
@@ -2390,15 +2404,15 @@ const latex_symbols = Dict(
     bolditalic*"sigma" => "𝝈",  # mathematical bold italic small sigma
     bolditalic*"tau" => "𝝉",  # mathematical bold italic small tau
     bolditalic*"upsilon" => "𝝊",  # mathematical bold italic small upsilon
-    bolditalic*"phi" => "𝝋",  # mathematical bold italic small phi
+    bolditalic*"varphi" => "𝝋",  # mathematical bold italic small phi
     bolditalic*"chi" => "𝝌",  # mathematical bold italic small chi
     bolditalic*"psi" => "𝝍",  # mathematical bold italic small psi
     bolditalic*"omega" => "𝝎",  # mathematical bold italic small omega
     bolditalic*"partial" => "𝝏",  # mathematical bold italic partial differential
-    bolditalic*"varepsilon" => "𝝐",  # mathematical bold italic epsilon symbol
+    bolditalic*"epsilon" => "𝝐",  # mathematical bold italic epsilon symbol
     bolditalic*"vartheta" => "𝝑",  # mathematical bold italic theta symbol
     bolditalic*"varkappa" => "𝝒",  # mathematical bold italic kappa symbol
-    bolditalic*"varphi" => "𝝓",  # mathematical bold italic phi symbol
+    bolditalic*"phi" => "𝝓",  # mathematical bold italic phi symbol
     bolditalic*"varrho" => "𝝔",  # mathematical bold italic rho symbol
     bolditalic*"varpi" => "𝝕",  # mathematical bold italic pi symbol
     boldsans*"Alpha" => "𝝖",  # mathematical sans-serif bold capital alpha
@@ -2431,7 +2445,7 @@ const latex_symbols = Dict(
     boldsans*"beta" => "𝝱",  # mathematical sans-serif bold small beta
     boldsans*"gamma" => "𝝲",  # mathematical sans-serif bold small gamma
     boldsans*"delta" => "𝝳",  # mathematical sans-serif bold small delta
-    boldsans*"epsilon" => "𝝴",  # mathematical sans-serif bold small epsilon
+    boldsans*"varepsilon" => "𝝴",  # mathematical sans-serif bold small epsilon
     boldsans*"zeta" => "𝝵",  # mathematical sans-serif bold small zeta
     boldsans*"eta" => "𝝶",  # mathematical sans-serif bold small eta
     boldsans*"theta" => "𝝷",  # mathematical sans-serif bold small theta
@@ -2448,15 +2462,15 @@ const latex_symbols = Dict(
     boldsans*"sigma" => "𝞂",  # mathematical sans-serif bold small sigma
     boldsans*"tau" => "𝞃",  # mathematical sans-serif bold small tau
     boldsans*"upsilon" => "𝞄",  # mathematical sans-serif bold small upsilon
-    boldsans*"phi" => "𝞅",  # mathematical sans-serif bold small phi
+    boldsans*"varphi" => "𝞅",  # mathematical sans-serif bold small phi
     boldsans*"chi" => "𝞆",  # mathematical sans-serif bold small chi
     boldsans*"psi" => "𝞇",  # mathematical sans-serif bold small psi
     boldsans*"omega" => "𝞈",  # mathematical sans-serif bold small omega
     boldsans*"partial" => "𝞉",  # mathematical sans-serif bold partial differential
-    boldsans*"varepsilon" => "𝞊",  # mathematical sans-serif bold epsilon symbol
+    boldsans*"epsilon" => "𝞊",  # mathematical sans-serif bold epsilon symbol
     boldsans*"vartheta" => "𝞋",  # mathematical sans-serif bold theta symbol
     boldsans*"varkappa" => "𝞌",  # mathematical sans-serif bold kappa symbol
-    boldsans*"varphi" => "𝞍",  # mathematical sans-serif bold phi symbol
+    boldsans*"phi" => "𝞍",  # mathematical sans-serif bold phi symbol
     boldsans*"varrho" => "𝞎",  # mathematical sans-serif bold rho symbol
     boldsans*"varpi" => "𝞏",  # mathematical sans-serif bold pi symbol
     bolditalicsans*"Alpha" => "𝞐",  # mathematical sans-serif bold italic capital alpha
@@ -2489,7 +2503,7 @@ const latex_symbols = Dict(
     bolditalicsans*"beta" => "𝞫",  # mathematical sans-serif bold italic small beta
     bolditalicsans*"gamma" => "𝞬",  # mathematical sans-serif bold italic small gamma
     bolditalicsans*"delta" => "𝞭",  # mathematical sans-serif bold italic small delta
-    bolditalicsans*"epsilon" => "𝞮",  # mathematical sans-serif bold italic small epsilon
+    bolditalicsans*"varepsilon" => "𝞮",  # mathematical sans-serif bold italic small epsilon
     bolditalicsans*"zeta" => "𝞯",  # mathematical sans-serif bold italic small zeta
     bolditalicsans*"eta" => "𝞰",  # mathematical sans-serif bold italic small eta
     bolditalicsans*"theta" => "𝞱",  # mathematical sans-serif bold italic small theta
@@ -2506,15 +2520,15 @@ const latex_symbols = Dict(
     bolditalicsans*"sigma" => "𝞼",  # mathematical sans-serif bold italic small sigma
     bolditalicsans*"tau" => "𝞽",  # mathematical sans-serif bold italic small tau
     bolditalicsans*"upsilon" => "𝞾",  # mathematical sans-serif bold italic small upsilon
-    bolditalicsans*"phi" => "𝞿",  # mathematical sans-serif bold italic small phi
+    bolditalicsans*"varphi" => "𝞿",  # mathematical sans-serif bold italic small phi
     bolditalicsans*"chi" => "𝟀",  # mathematical sans-serif bold italic small chi
     bolditalicsans*"psi" => "𝟁",  # mathematical sans-serif bold italic small psi
     bolditalicsans*"omega" => "𝟂",  # mathematical sans-serif bold italic small omega
     bolditalicsans*"partial" => "𝟃",  # mathematical sans-serif bold italic partial differential
-    bolditalicsans*"varepsilon" => "𝟄",  # mathematical sans-serif bold italic epsilon symbol
+    bolditalicsans*"epsilon" => "𝟄",  # mathematical sans-serif bold italic epsilon symbol
     bolditalicsans*"vartheta" => "𝟅",  # mathematical sans-serif bold italic theta symbol
     bolditalicsans*"varkappa" => "𝟆",  # mathematical sans-serif bold italic kappa symbol
-    bolditalicsans*"varphi" => "𝟇",  # mathematical sans-serif bold italic phi symbol
+    bolditalicsans*"phi" => "𝟇",  # mathematical sans-serif bold italic phi symbol
     bolditalicsans*"varrho" => "𝟈",  # mathematical sans-serif bold italic rho symbol
     bolditalicsans*"varpi" => "𝟉",  # mathematical sans-serif bold italic pi symbol
     bold*"Digamma" => "\U1d7ca",  # mathematical bold capital digamma
@@ -2575,7 +2589,6 @@ const latex_symbols = Dict(
     "\\leftouterjoin" => "⟕",  # left outer join
     "\\rightouterjoin" => "⟖",  # right outer join
     "\\fullouterjoin" => "⟗",  # full outer join
-    "\\Join" => "⨝",  # join
     "\\join" => "⨝",  # join
     "\\underbar" => "̲",  # combining low line
     "\\underleftrightarrow" => "͍",  # underleftrightarrow accent
@@ -2613,3 +2626,42 @@ const latex_symbols = Dict(
     "\\0/3" => "↉", # vulgar fraction zero thirds
     "\\1/4" => "¼", # vulgar fraction one quarter
 )
+
+
+# Canonical reverse mapping for symbols that have several completions (#39148).
+#
+# These duplicate mappings can be investigated with the following commands:
+#=
+ls = REPL.REPLCompletions.latex_symbols; symbols = values(ls)
+duplicates = [v for v in unique(symbols) if count(==(v), symbols) > 1]
+[(v, REPL.symbol_latex(v)) => findall(==(v), ls) for v in duplicates]
+=#
+const symbols_latex_canonical = Dict(
+    "⫫" => "\\Bot",
+    "ð" => "\\dh",
+    "…" => "\\ldots",
+    "∅" => "\\emptyset",
+    "ℯ" => "\\euler",
+    "♀" => "\\female",
+    "≥" => "\\ge",
+    "⟺" => "\\iff",
+    "ℑ" => "\\Im",
+    "⟸" => "\\impliedby",
+    "⟹" => "\\implies",
+    "≤" => "\\le",
+    "⟦" => "\\llbracket",
+    "♂" => "\\male",
+    "∇" => "\\del",
+    "ℎ" => "\\planck",
+    "ℜ" => "\\Re",
+    "⟧" => "\\rrbracket",
+    "√" => "\\sqrt",
+    "̶" => "\\sout",
+    "→" => "\\to",
+    "⫪" => "\\Top",
+    "ε" => "\\varepsilon",
+    "⊻" => "\\xor",
+    "⊼" => "\\nand",
+    "⊽" => "\\nor",
+    "≠" => "\\ne",
+)
diff --git a/stdlib/REPL/src/options.jl b/stdlib/REPL/src/options.jl
index 6f9da0a9c90298..3ce0ab6ff00dc0 100644
--- a/stdlib/REPL/src/options.jl
+++ b/stdlib/REPL/src/options.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ## User Options
 
 mutable struct Options
@@ -23,6 +25,8 @@ mutable struct Options
     auto_indent_bracketed_paste::Bool # set to true if terminal knows paste mode
     # cancel auto-indent when next character is entered within this time frame :
     auto_indent_time_threshold::Float64
+    # refresh after time delay
+    auto_refresh_time_delay::Float64
     # default IOContext settings at the REPL
     iocontext::Dict{Symbol,Any}
 end
@@ -42,6 +46,7 @@ Options(;
         auto_indent_tmp_off = false,
         auto_indent_bracketed_paste = false,
         auto_indent_time_threshold = 0.005,
+        auto_refresh_time_delay = Sys.iswindows() ? 0.05 : 0.0,
         iocontext = Dict{Symbol,Any}()) =
             Options(hascolor, extra_keymap, tabwidth,
                     kill_ring_max, region_animation_duration,
@@ -49,7 +54,7 @@ Options(;
                     beep_colors, beep_use_current,
                     backspace_align, backspace_adjust, confirm_exit,
                     auto_indent, auto_indent_tmp_off, auto_indent_bracketed_paste,
-                    auto_indent_time_threshold,
+                    auto_indent_time_threshold, auto_refresh_time_delay,
                     iocontext)
 
 # for use by REPLs not having an options field
diff --git a/stdlib/REPL/test/TerminalMenus/legacytests/config.jl b/stdlib/REPL/test/TerminalMenus/legacytests/config.jl
index 6654da261cc0e4..dec09f24a25e89 100644
--- a/stdlib/REPL/test/TerminalMenus/legacytests/config.jl
+++ b/stdlib/REPL/test/TerminalMenus/legacytests/config.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # This file tests the Julia 1.0-1.5 extension interface of TerminalMenus
 
 # scroll must only accept symbols
diff --git a/stdlib/REPL/test/TerminalMenus/legacytests/old_multiselect_menu.jl b/stdlib/REPL/test/TerminalMenus/legacytests/old_multiselect_menu.jl
index fad96980d53d7d..2a78d18bfa7390 100644
--- a/stdlib/REPL/test/TerminalMenus/legacytests/old_multiselect_menu.jl
+++ b/stdlib/REPL/test/TerminalMenus/legacytests/old_multiselect_menu.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # This file tests the legacy Julia 1.0-1.5 extension interface of TerminalMenus
 # They are run with `warn=false` to avoid triggering test failures.
 
@@ -13,7 +14,7 @@
 
 multi_menu = MultiSelectMenu(string.(1:20), warn=false)
 @test TerminalMenus.options(multi_menu) == string.(1:20)
-@test TerminalMenus.header(multi_menu) == "[press: d=done, a=all, n=none]"
+@test TerminalMenus.header(multi_menu) == "[press: Enter=toggle, a=all, n=none, d=done, q=abort]"
 
 # Output
 TerminalMenus.config() # Use default chars
@@ -32,6 +33,6 @@ TerminalMenus.writeLine(buf, multi_menu, 1, true)
 
 # Test SDTIN
 multi_menu = MultiSelectMenu(string.(1:10), warn=false)
-@test simulate_input(Set([1,2]), multi_menu, :enter, :down, :enter, 'd')
+@test simulate_input(multi_menu, :enter, :down, :enter, 'd') == Set([1,2])
 multi_menu = MultiSelectMenu(["single option"], warn=false)
-@test simulate_input(Set([1]), multi_menu, :up, :up, :down, :enter, 'd')
+@test simulate_input(multi_menu, :up, :up, :down, :enter, 'd') == Set([1])
diff --git a/stdlib/REPL/test/TerminalMenus/legacytests/old_radio_menu.jl b/stdlib/REPL/test/TerminalMenus/legacytests/old_radio_menu.jl
index 84f07ebca88129..248d5cd6a31835 100644
--- a/stdlib/REPL/test/TerminalMenus/legacytests/old_radio_menu.jl
+++ b/stdlib/REPL/test/TerminalMenus/legacytests/old_radio_menu.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # This file tests the legacy Julia 1.0-1.5 extension interface of TerminalMenus
 # They are run with `warn=false` to avoid triggering test failures.
 
@@ -35,8 +36,8 @@ TerminalMenus.writeLine(buf, radio_menu, 1, true)
 
 # Test using stdin
 radio_menu = RadioMenu(string.(1:10), warn=false)
-@test simulate_input(3, radio_menu, :down, :down, :enter)
+@test simulate_input(radio_menu, :down, :down, :enter) == 3
 radio_menu = RadioMenu(["single option"], warn=false)
-@test simulate_input(1, radio_menu, :up, :up, :down, :up, :enter)
+@test simulate_input(radio_menu, :up, :up, :down, :up, :enter) == 1
 radio_menu = RadioMenu(string.(1:3), pagesize=1, warn=false)
-@test simulate_input(3, radio_menu, :down, :down, :down, :down, :enter)
+@test simulate_input(radio_menu, :down, :down, :down, :down, :enter) == 3
diff --git a/stdlib/REPL/test/TerminalMenus/multiselect_menu.jl b/stdlib/REPL/test/TerminalMenus/multiselect_menu.jl
index d47d6285be3ce3..61d2dba95a0b06 100644
--- a/stdlib/REPL/test/TerminalMenus/multiselect_menu.jl
+++ b/stdlib/REPL/test/TerminalMenus/multiselect_menu.jl
@@ -14,7 +14,7 @@
 
 multi_menu = MultiSelectMenu(string.(1:20), charset=:ascii)
 @test TerminalMenus.options(multi_menu) == string.(1:20)
-@test TerminalMenus.header(multi_menu) == "[press: d=done, a=all, n=none]"
+@test TerminalMenus.header(multi_menu) == "[press: Enter=toggle, a=all, n=none, d=done, q=abort]"
 
 # Output
 for kws in ((charset=:ascii,),
@@ -30,10 +30,10 @@ for kws in ((charset=:ascii,),
     TerminalMenus.writeline(buf, multi_menu, 1, true)
     @test String(take!(buf)) == "$uck 1"
     TerminalMenus.printmenu(buf, multi_menu, 1; init=true)
-    @test startswith(String(take!(buf)), string("\e[2K $cur $uck 1"))
+    @test startswith(String(take!(buf)), string("\e[2K[press: Enter=toggle, a=all, n=none, d=done, q=abort]\r\n\e[2K $cur $uck 1"))
     push!(multi_menu.selected, 1)
     TerminalMenus.printmenu(buf, multi_menu, 2; init=true)
-    @test startswith(String(take!(buf)), string("\e[2K   $chk 1\r\n\e[2K $cur $uck 2"))
+    @test startswith(String(take!(buf)), string("\e[2K[press: Enter=toggle, a=all, n=none, d=done, q=abort]\r\n\e[2K   $chk 1\r\n\e[2K $cur $uck 2"))
 end
 
 # Preselection
@@ -52,6 +52,6 @@ end
 
 # Test SDTIN
 multi_menu = MultiSelectMenu(string.(1:10), charset=:ascii)
-@test simulate_input(Set([1,2]), multi_menu, :enter, :down, :enter, 'd')
+@test simulate_input(multi_menu, :enter, :down, :enter, 'd') == Set([1,2])
 multi_menu = MultiSelectMenu(["single option"], charset=:ascii)
-@test simulate_input(Set([1]), multi_menu, :up, :up, :down, :enter, 'd')
+@test simulate_input(multi_menu, :up, :up, :down, :enter, 'd') == Set([1])
diff --git a/stdlib/REPL/test/TerminalMenus/multiselect_with_skip_menu.jl b/stdlib/REPL/test/TerminalMenus/multiselect_with_skip_menu.jl
new file mode 100644
index 00000000000000..609b168c2ddbac
--- /dev/null
+++ b/stdlib/REPL/test/TerminalMenus/multiselect_with_skip_menu.jl
@@ -0,0 +1,130 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# Like MultiSelect but adds `n`/`p` to move to next/previous
+# unselected item and `N`/`P` to move to next/previous selected item.
+mutable struct MultiSelectWithSkipMenu <: TerminalMenus._ConfiguredMenu{TerminalMenus.Config}
+    options::Array{String,1}
+    pagesize::Int
+    pageoffset::Int
+    selected::Set{Int}
+    cursor::Base.RefValue{Int}
+    config::TerminalMenus.MultiSelectConfig
+end
+
+function MultiSelectWithSkipMenu(options::Array{String,1}; pagesize::Int=10,
+                                 selected=Int[], kwargs...)
+    length(options) < 1 && error("MultiSelectWithSkipMenu must have at least one option")
+
+    pagesize = pagesize == -1 ? length(options) : pagesize
+    pagesize = min(length(options), pagesize)
+    pagesize < 1 && error("pagesize must be >= 1")
+
+    pageoffset = 0
+    _selected = Set{Int}()
+    for item in selected
+        push!(_selected, item)
+    end
+
+    MultiSelectWithSkipMenu(options, pagesize, pageoffset, _selected,
+                            Ref{Int}(1),
+                            TerminalMenus.MultiSelectConfig(; kwargs...))
+end
+
+TerminalMenus.header(m::MultiSelectWithSkipMenu) = "[press: d=done, a=all, c=none, npNP=move with skip, $(length(m.selected)) items selected]"
+
+TerminalMenus.options(m::MultiSelectWithSkipMenu) = m.options
+
+TerminalMenus.cancel(m::MultiSelectWithSkipMenu) = m.selected = Set{Int}()
+
+# Do not exit menu when a user selects one of the options
+function TerminalMenus.pick(menu::MultiSelectWithSkipMenu, cursor::Int)
+    if cursor in menu.selected
+        delete!(menu.selected, cursor)
+    else
+        push!(menu.selected, cursor)
+    end
+
+    return false
+end
+
+function TerminalMenus.writeline(buf::IOBuffer,
+                                 menu::MultiSelectWithSkipMenu,
+                                 idx::Int, iscursor::Bool)
+    if idx in menu.selected
+        print(buf, menu.config.checked, " ")
+    else
+        print(buf, menu.config.unchecked, " ")
+    end
+
+    print(buf, replace(menu.options[idx], "\n" => "\\n"))
+end
+
+# d: Done, return from request
+# a: Select all
+# c: Deselect all
+# n: Move to next unselected
+# p: Move to previous unselected
+# N: Move to next selected
+# P: Move to previous selected
+function TerminalMenus.keypress(menu::MultiSelectWithSkipMenu, key::UInt32)
+    if key == UInt32('d') || key == UInt32('D')
+        return true # break
+    elseif key == UInt32('a') || key == UInt32('A')
+        menu.selected = Set(1:length(menu.options))
+    elseif key == UInt32('c') || key == UInt32('C')
+        menu.selected = Set{Int}()
+    elseif key == UInt32('n')
+        move_cursor!(menu, 1, false)
+    elseif key == UInt32('p')
+        move_cursor!(menu, -1, false)
+    elseif key == UInt32('N')
+        move_cursor!(menu, 1, true)
+    elseif key == UInt32('P')
+        move_cursor!(menu, -1, true)
+    end
+    false # don't break
+end
+
+function move_cursor!(menu, direction, selected)
+    c = menu.cursor[]
+    while true
+        c += direction
+        if !(1 <= c <= length(menu.options))
+            return
+        end
+        if (c in menu.selected) == selected
+            break
+        end
+    end
+    menu.cursor[] = c
+    if menu.pageoffset >= c - 1
+        menu.pageoffset = max(c - 2, 0)
+    end
+    if menu.pageoffset + menu.pagesize <= c
+        menu.pageoffset = min(c + 1, length(menu.options)) - menu.pagesize
+    end
+end
+
+# Intercept the `request` call to insert the cursor field.
+function TerminalMenus.request(term::REPL.Terminals.TTYTerminal,
+                               m::MultiSelectWithSkipMenu;
+                               cursor::Int=1, kwargs...)
+    m.cursor[] = cursor
+    invoke(TerminalMenus.request, Tuple{REPL.Terminals.TTYTerminal,
+                                        TerminalMenus.AbstractMenu},
+           term, m; cursor=m.cursor, kwargs...)
+end
+
+# These tests are specifically designed to verify that a `RefValue`
+# input to the AbstractMenu `request` function works as intended.
+menu = MultiSelectWithSkipMenu(string.(1:5), selected=[2, 3])
+buf = IOBuffer()
+TerminalMenus.printmenu(buf, menu, 1; init=true)
+@test occursin("2 items selected", String(take!(buf)))
+@test simulate_input(menu, 'n', :enter, 'd') == Set([2, 3, 4])
+buf = IOBuffer()
+TerminalMenus.printmenu(buf, menu, 1; init=true)
+@test occursin("3 items selected", String(take!(buf)))
+
+menu = MultiSelectWithSkipMenu(string.(1:5), selected=[2, 3])
+@test simulate_input(menu, 'P', :enter, 'd', cursor=5) == Set([2])
diff --git a/stdlib/REPL/test/TerminalMenus/pager.jl b/stdlib/REPL/test/TerminalMenus/pager.jl
new file mode 100644
index 00000000000000..1d6579b8f5fc99
--- /dev/null
+++ b/stdlib/REPL/test/TerminalMenus/pager.jl
@@ -0,0 +1,39 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+content =
+    """
+    Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor
+    incididunt ut labore et dolore magna aliqua. Arcu non sodales neque sodales.
+    Placerat orci nulla pellentesque dignissim enim sit amet venenatis. Mauris
+    augue neque gravida in fermentum et sollicitudin. Amet venenatis urna cursus
+    eget. Enim praesent elementum facilisis leo vel fringilla est. Vitae sapien
+    pellentesque habitant morbi tristique. Ornare lectus sit amet est placerat in.
+    Leo urna molestie at elementum eu facilisis. Aliquam vestibulum morbi blandit
+    cursus risus at ultrices. Id aliquet lectus proin nibh. Facilisi etiam
+    dignissim diam quis enim lobortis scelerisque fermentum. Pretium lectus quam id
+    leo in vitae turpis massa sed. Elementum facilisis leo vel fringilla est.
+    Vulputate ut pharetra sit amet aliquam. Quis enim lobortis scelerisque
+    fermentum dui faucibus in ornare. Cursus turpis massa tincidunt dui ut.
+
+    A arcu cursus vitae congue mauris rhoncus. Tellus rutrum tellus pellentesque
+    eu. Fringilla phasellus faucibus scelerisque eleifend donec pretium. Aliquam
+    etiam erat velit scelerisque. Volutpat lacus laoreet non curabitur gravida.
+    Felis imperdiet proin fermentum leo vel orci. Viverra tellus in hac habitasse
+    platea dictumst vestibulum rhoncus est. Ullamcorper dignissim cras tincidunt
+    lobortis feugiat vivamus. Sit amet luctus venenatis lectus. Odio facilisis
+    mauris sit amet massa vitae tortor condimentum. Purus sit amet volutpat
+    consequat mauris nunc congue. Enim nunc faucibus a pellentesque sit amet. Purus
+    non enim praesent elementum facilisis leo vel fringilla est.
+    """ |> strip
+
+let p = Pager(content)
+    @test p.pagesize == 10
+    @test length(p.lines) == 22
+    @test startswith(content, p.lines[1])
+    @test endswith(content, p.lines[end])
+    buffer = IOBuffer()
+    TerminalMenus.printmenu(buffer, p, 1)
+    str = String(take!(buffer))
+    @test contains(str, "(10 / 22)  45%")
+    @test endswith(str, "leo in vitae turpis massa sed. Elementum facilisis leo vel fringilla est.")
+end
diff --git a/stdlib/REPL/test/TerminalMenus/radio_menu.jl b/stdlib/REPL/test/TerminalMenus/radio_menu.jl
index e0645afe403711..5ca64227174258 100644
--- a/stdlib/REPL/test/TerminalMenus/radio_menu.jl
+++ b/stdlib/REPL/test/TerminalMenus/radio_menu.jl
@@ -34,11 +34,21 @@ for kws in ((charset=:ascii,),
     TerminalMenus.printmenu(buf, radio_menu, 2; init=true)
     @test startswith(String(take!(buf)), string("\e[2K   1\r\n\e[2K $c 2"))
 end
+@testset begin "cursor page"
+    radio_menu = RadioMenu(string.(1:20); charset=:ascii)
+    buf = IOBuffer()
+    TerminalMenus.printmenu(buf, radio_menu, 19; init=true)
+    @test String(take!(buf)) == "\e[2K^  11\r\n\e[2K   12\r\n\e[2K   13\r\n\e[2K   14\r\n\e[2K   15\r\n\e[2K   16\r\n\e[2K   17\r\n\e[2K   18\r\n\e[2K > 19\r\n\e[2K   20"
+    TerminalMenus.printmenu(buf, radio_menu, 8; init=true)
+    @test String(take!(buf)) == "\e[2K^  4\r\n\e[2K   5\r\n\e[2K   6\r\n\e[2K   7\r\n\e[2K > 8\r\n\e[2K   9\r\n\e[2K   10\r\n\e[2K   11\r\n\e[2K   12\r\n\e[2Kv  13"
+end
 
 # Test using stdin
 radio_menu = RadioMenu(string.(1:10); charset=:ascii)
-@test simulate_input(3, radio_menu, :down, :down, :enter)
+@test simulate_input(radio_menu, :down, :down, :enter) == 3
 radio_menu = RadioMenu(["single option"], charset=:ascii)
-@test simulate_input(1, radio_menu, :up, :up, :down, :up, :enter)
+@test simulate_input(radio_menu, :up, :up, :down, :up, :enter) == 1
 radio_menu = RadioMenu(string.(1:3), pagesize=1, charset=:ascii)
-@test simulate_input(3, radio_menu, :down, :down, :down, :down, :enter)
+@test simulate_input(radio_menu, :down, :down, :down, :down, :enter) == 3
+radio_menu = RadioMenu(["apple", "banana", "cherry"]; keybindings=collect('a':'c'), charset=:ascii)
+@test simulate_input(radio_menu, 'b') == 2
diff --git a/stdlib/REPL/test/TerminalMenus/runtests.jl b/stdlib/REPL/test/TerminalMenus/runtests.jl
index d4e1b5c1a83bd7..c594958a366701 100644
--- a/stdlib/REPL/test/TerminalMenus/runtests.jl
+++ b/stdlib/REPL/test/TerminalMenus/runtests.jl
@@ -4,25 +4,29 @@ import REPL
 using REPL.TerminalMenus
 using Test
 
-function simulate_input(expected, menu::TerminalMenus.AbstractMenu, keys...)
+function simulate_input(menu::TerminalMenus.AbstractMenu, keys...; kwargs...)
     keydict =  Dict(:up => "\e[A",
                     :down => "\e[B",
                     :enter => "\r")
 
+    new_stdin = Base.BufferStream()
     for key in keys
         if isa(key, Symbol)
-            write(stdin.buffer, keydict[key])
+            write(new_stdin, keydict[key])
         else
-            write(stdin.buffer, "$key")
+            write(new_stdin, "$key")
         end
     end
+    TerminalMenus.terminal.in_stream = new_stdin
 
-    request(menu; suppress_output=true) == expected
+    return request(menu; suppress_output=true, kwargs...)
 end
 
 include("radio_menu.jl")
 include("multiselect_menu.jl")
 include("dynamic_menu.jl")
+include("multiselect_with_skip_menu.jl")
+include("pager.jl")
 
 # Legacy tests
 include("legacytests/old_radio_menu.jl")
diff --git a/stdlib/REPL/test/docview.jl b/stdlib/REPL/test/docview.jl
index 89e07da952106f..22701ead7883d4 100644
--- a/stdlib/REPL/test/docview.jl
+++ b/stdlib/REPL/test/docview.jl
@@ -1,7 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test
-import REPL
+import REPL, REPL.REPLCompletions
+import Markdown
 
 @testset "symbol completion" begin
     @test startswith(let buf = IOBuffer()
@@ -13,4 +14,54 @@ import REPL
             Core.eval(Main, REPL.helpmode(buf, "🐨"))
             String(take!(buf))
         end, "\"🐨\" can be typed by \\:koala:<tab>\n")
+
+    @test startswith(let buf = IOBuffer()
+            Core.eval(Main, REPL.helpmode(buf, "ᵞ₁₂₃¹²³α"))
+            String(take!(buf))
+        end, "\"ᵞ₁₂₃¹²³α\" can be typed by \\^gamma<tab>\\_123<tab>\\^123<tab>\\alpha<tab>\n")
+
+    # Check that all symbols with several completions have a canonical mapping (#39148)
+    symbols = values(REPLCompletions.latex_symbols)
+    duplicates = [v for v in unique(symbols) if count(==(v), symbols) > 1]
+    @test all(duplicates .∈ Ref(keys(REPLCompletions.symbols_latex_canonical)))
+end
+
+@testset "quoting in doc search" begin
+    str = let buf = IOBuffer()
+        Core.eval(Main, REPL.helpmode(buf, "mutable s"))
+        String(take!(buf))
+    end
+    @test occursin("'mutable struct'", str)
+    @test occursin("Couldn't find 'mutable s'", str)
+end
+
+@testset "Non-Markdown" begin
+    # https://github.com/JuliaLang/julia/issues/37765
+    @test isa(REPL.insert_hlines(IOBuffer(), Markdown.Text("foo")), Markdown.Text)
+    # https://github.com/JuliaLang/julia/issues/37757
+    @test REPL.insert_hlines(IOBuffer(), nothing) === nothing
+end
+
+@testset "Check @var_str also completes to var\"\" in REPL.doc_completions()" begin
+    checks = ["var", "raw", "r"]
+    symbols = "@" .* checks .* "_str"
+    results = checks .* "\"\""
+    for (i,r) in zip(symbols,results)
+        @test r ∈ REPL.doc_completions(i)
+    end
+end
+@testset "fuzzy score" begin
+    # https://github.com/JunoLab/FuzzyCompletions.jl/issues/7
+    # shouldn't throw when there is a space in a middle of query
+    @test (REPL.matchinds("a ", "a file.txt"); true)
+end
+
+@testset "Unicode doc lookup (#41589)" begin
+    @test REPL.lookup_doc(:(÷=)) isa Markdown.MD
+end
+
+@testset "#44009" begin
+    R = Complex{<:Integer}
+    b = REPL.Binding(@__MODULE__, :R)
+    @test REPL.summarize(b, Tuple{}) isa Markdown.MD
 end
diff --git a/stdlib/REPL/test/repl.jl b/stdlib/REPL/test/repl.jl
index ea2268ebbd9877..f34b00a8f05954 100644
--- a/stdlib/REPL/test/repl.jl
+++ b/stdlib/REPL/test/repl.jl
@@ -104,11 +104,23 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
     let cmd = "\"Hello REPL\""
         write(stdin_write, "$(curmod_prefix)inc || wait($(curmod_prefix)b); r = $cmd; notify($(curmod_prefix)c); r\r")
     end
-    inc = true
-    notify(b)
-    wait(c)
+    let t = @async begin
+            inc = true
+            notify(b)
+            wait(c)
+        end
+        while (d = readline(stdout_read)) != ""
+            # first line [optional]: until 80th char of input
+            # second line: until end of input
+            # third line: "Hello REPL"
+            # last line: blank
+            # last+1 line: next prompt
+        end
+        wait(t)
+    end
 
     # Latex completions
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "\x32\\alpha\t")
     readuntil(stdout_read, "α")
     # Bracketed paste in search mode
@@ -139,6 +151,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
             readuntil(stdout_read, "\n")
             readuntil(stdout_read, "\n")
             @test samefile(".", tmpdir)
+            write(stdin_write, "\b")
 
             # Test using `cd` to move to the home directory
             write(stdin_write, ";")
@@ -148,6 +161,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
             readuntil(stdout_read, "\n")
             readuntil(stdout_read, "\n")
             @test samefile(".", homedir_pwd)
+            write(stdin_write, "\b")
 
             # Test using `-` to jump backward to tmpdir
             write(stdin_write, ";")
@@ -157,6 +171,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
             readuntil(stdout_read, "\n")
             readuntil(stdout_read, "\n")
             @test samefile(".", tmpdir)
+            write(stdin_write, "\b")
 
             # Test using `~` (Base.expanduser) in `cd` commands
             if !Sys.iswindows()
@@ -167,6 +182,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
                 readuntil(stdout_read, "\n")
                 readuntil(stdout_read, "\n")
                 @test samefile(".", homedir_pwd)
+                write(stdin_write, "\b")
             end
         finally
             cd(origpwd)
@@ -196,6 +212,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         s = readuntil(stdout_read, "\n\n")
         @test startswith(s, "\e[0mERROR: unterminated single quote\nStacktrace:\n  [1] ") ||
               startswith(s, "\e[0m\e[1m\e[91mERROR: \e[39m\e[22m\e[91munterminated single quote\e[39m\nStacktrace:\n  [1] ")
+        write(stdin_write, "\b")
     end
 
     # issue #27293
@@ -219,6 +236,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
             close(proc_stdout)
             # check for the correct, expanded response
             @test occursin(expanduser("~"), fetch(get_stdout))
+            write(stdin_write, "\b")
         end
     end
 
@@ -246,7 +264,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         write(stdin_write, ";")
         readuntil(stdout_read, "shell> ")
         Base.print_shell_escaped(stdin_write, Base.julia_cmd().exec..., special=Base.shell_special)
-        write(stdin_write, """ -e "println(\\"HI\\")\" """)
+        write(stdin_write, """ -e "println(\\"HI\\")\"""")
         readuntil(stdout_read, ")\"")
         proc_stdout_read, proc_stdout = redirect_stdout()
         get_stdout = @async read(proc_stdout_read, String)
@@ -270,6 +288,7 @@ fake_repl(options = REPL.Options(confirm_exit=false,hascolor=true)) do stdin_wri
         end
         close(proc_stdout)
         @test fetch(get_stdout) == "HI\n"
+        write(stdin_write, "\b")
     end
 
     # Issue #7001
@@ -435,6 +454,8 @@ for prompt = ["TestΠ", () -> randstring(rand(1:10))]
         # gets displayed by intercepting the display
         repl.specialdisplay = REPL.REPLDisplay(repl)
 
+        errormonitor(@async write(devnull, stdout_read)) # redirect stdout to devnull so we drain the output pipe
+
         repl.interface = REPL.setup_interface(repl)
         repl_mode = repl.interface.modes[1]
         shell_mode = repl.interface.modes[2]
@@ -729,6 +750,34 @@ fake_repl() do stdin_write, stdout_read, repl
     readuntil(stdout_read, "begin")
     @test readuntil(stdout_read, "end", keep=true) == "\n\r\e[7C    α=1\n\r\e[7C    β=2\n\r\e[7Cend"
 
+    # Test switching repl modes
+    redirect_stdout(devnull) do # to suppress "foo" echoes
+    sendrepl2("""\e[200~
+            julia> A = 1
+            1
+
+            shell> echo foo
+            foo
+
+            shell> echo foo
+                   foo
+            foo foo
+
+            help?> Int
+            Dummy docstring
+
+                Some text
+
+                julia> error("If this error throws, the paste handler has failed to ignore this docstring example")
+
+            julia> B = 2
+            2\e[201~
+             """)
+    wait(c)
+    @test Main.A == 1
+    @test Main.B == 2
+    end # redirect_stdout
+
     # Close repl
     write(stdin_write, '\x04')
     Base.wait(repltask)
@@ -777,12 +826,12 @@ end
 
 Base.exit_on_sigint(true)
 
-let exename = Base.julia_cmd()
+let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # Test REPL in dumb mode
     with_fake_pty() do pts, ptm
         nENV = copy(ENV)
         nENV["TERM"] = "dumb"
-        p = run(detach(setenv(`$exename --startup-file=no -q`, nENV)), pts, pts, pts, wait=false)
+        p = run(detach(setenv(`$exename -q`, nENV)), pts, pts, pts, wait=false)
         Base.close_stdio(pts)
         output = readuntil(ptm, "julia> ", keep=true)
         if ccall(:jl_running_on_valgrind, Cint,()) == 0
@@ -820,7 +869,7 @@ let exename = Base.julia_cmd()
     end
 
     # Test stream mode
-    p = open(`$exename --startup-file=no -q`, "r+")
+    p = open(`$exename -q`, "r+")
     write(p, "1\nexit()\n")
     @test read(p, String) == "1\n"
 end # let exename
@@ -830,7 +879,7 @@ mutable struct Error19864 <: Exception; end
 function test19864()
     @eval Base.showerror(io::IO, e::Error19864) = print(io, "correct19864")
     buf = IOBuffer()
-    fake_response = (Any[(Error19864(), Ptr{Cvoid}[])], true)
+    fake_response = (Base.ExceptionStack([(exception=Error19864(),backtrace=Ptr{Cvoid}[])]),true)
     REPL.print_response(buf, fake_response, false, false, nothing)
     return String(take!(buf))
 end
@@ -838,13 +887,13 @@ end
 
 # Test containers in error messages are limited #18726
 let io = IOBuffer()
-    Base.display_error(io,
-        try
+    Base.display_error(io, Base.ExceptionStack(Any[(exception =
+        (try
             [][trues(6000)]
             @assert false
         catch e
             e
-        end, [])
+        end), backtrace = [])]))
     @test length(String(take!(io))) < 1500
 end
 
@@ -866,16 +915,29 @@ end
 let ends_with_semicolon = REPL.ends_with_semicolon
     @test !ends_with_semicolon("")
     @test ends_with_semicolon(";")
-    @test !ends_with_semicolon("a")
+    @test !ends_with_semicolon("ä")
+    @test !ends_with_semicolon("ä # äsdf ;")
+    @test ends_with_semicolon("""a * "#ä" ;""")
+    @test ends_with_semicolon("a; #=#=# =# =#\n")
     @test ends_with_semicolon("1;")
     @test ends_with_semicolon("1;\n")
     @test ends_with_semicolon("1;\r")
     @test ends_with_semicolon("1;\r\n   \t\f")
-    @test ends_with_semicolon("1;#text\n")
-    @test ends_with_semicolon("a; #=#=# =# =#\n")
+    @test ends_with_semicolon("1;#äsdf\n")
+    @test ends_with_semicolon("""1;\n#äsdf\n""")
+    @test !ends_with_semicolon("\"\\\";\"#\"")
+    @test ends_with_semicolon("\"\\\\\";#\"")
     @test !ends_with_semicolon("begin\na;\nb;\nend")
     @test !ends_with_semicolon("begin\na; #=#=#\n=#b=#\nend")
     @test ends_with_semicolon("\na; #=#=#\n=#b=#\n# test\n#=\nfoobar\n=##bazbax\n")
+    @test ends_with_semicolon("f()= 1; # é ; 2")
+    @test ends_with_semicolon("f()= 1; # é")
+    @test !ends_with_semicolon("f()= 1; \"é\"")
+    @test !ends_with_semicolon("""("f()= 1; # é")""")
+    @test !ends_with_semicolon(""" "f()= 1; # é" """)
+    @test ends_with_semicolon("f()= 1;")
+    # the next result does not matter because this is not legal syntax
+    @test_nowarn ends_with_semicolon("1; #=# 2")
 end
 
 # PR #20794, TTYTerminal with other kinds of streams
@@ -1020,13 +1082,16 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, "TestShowTypeREPL.TypeA\n")
     @test endswith(readline(stdout_read), "\r\e[7CTestShowTypeREPL.TypeA\r\e[29C")
     readline(stdout_read)
-    readline(stdout_read)
+    @test readline(stdout_read) == ""
     @eval Main using .TestShowTypeREPL
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "TypeA\n")
     @test endswith(readline(stdout_read), "\r\e[7CTypeA\r\e[12C")
     readline(stdout_read)
+    @test readline(stdout_read) == ""
 
     # Close REPL ^D
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1056,7 +1121,7 @@ for (line, expr) in Pair[
 end
 
 # PR 30754, Issues #22013, #24871, #26933, #29282, #29361, #30348
-for line in ["′", "abstract", "type"]
+for line in ["′", "type"]
     @test occursin("No documentation found.",
         sprint(show, help_result(line)::Union{Markdown.MD,Nothing}))
 end
@@ -1069,6 +1134,12 @@ end
 @test occursin("identical", sprint(show, help_result("===")))
 @test occursin("broadcast", sprint(show, help_result(".<=")))
 
+# Issue 39427
+@test occursin("does not exist", sprint(show, help_result(":=")))
+
+# Issue #40563
+@test occursin("does not exist", sprint(show, help_result("..")))
+
 # Issue #25930
 
 # Brief and extended docs (issue #25930)
@@ -1145,10 +1216,13 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, "Expr(:call, GlobalRef(Base.Math, :float), Core.SlotNumber(1))\n")
     readline(stdout_read)
     @test readline(stdout_read) == "\e[0m:(Base.Math.float(_1))"
+    @test readline(stdout_read) == ""
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "ans\n")
     readline(stdout_read)
-    readline(stdout_read)
     @test readline(stdout_read) == "\e[0m:(Base.Math.float(_1))"
+    @test readline(stdout_read) == ""
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1161,10 +1235,15 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, "struct Errs end\n")
     readline(stdout_read)
     readline(stdout_read)
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "Base.show(io::IO, ::Errs) = throw(Errs())\n")
     readline(stdout_read)
     readline(stdout_read)
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, "Errs()\n")
+    readline(stdout_read)
+    readline(stdout_read)
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     wait(repltask)
     @test istaskdone(repltask)
@@ -1177,7 +1256,8 @@ fake_repl() do stdin_write, stdout_read, repl
     end
     write(stdin_write, "?;\n")
     readline(stdout_read)
-    @test endswith(readline(stdout_read),";")
+    @test endswith(readline(stdout_read), "search: ;")
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1190,6 +1270,7 @@ fake_repl() do stdin_write, stdout_read, repl
     write(stdin_write, "global x\n")
     readline(stdout_read)
     @test !occursin("ERROR", readline(stdout_read))
+    readuntil(stdout_read, "julia> ", keep=true)
     write(stdin_write, '\x04')
     Base.wait(repltask)
 end
@@ -1216,15 +1297,15 @@ end
 # AST transformations (softscope, Revise, OhMyREPL, etc.)
 @testset "AST Transformation" begin
     backend = REPL.REPLBackend()
-    @async REPL.start_repl_backend(backend)
+    errormonitor(@async REPL.start_repl_backend(backend))
     put!(backend.repl_channel, (:(1+1), false))
     reply = take!(backend.response_channel)
-    @test reply == (2, false)
+    @test reply == Pair{Any, Bool}(2, false)
     twice(ex) = Expr(:tuple, ex, ex)
     push!(backend.ast_transforms, twice)
     put!(backend.repl_channel, (:(1+1), false))
     reply = take!(backend.response_channel)
-    @test reply == ((2, 2), false)
+    @test reply == Pair{Any, Bool}((2, 2), false)
     put!(backend.repl_channel, (nothing, -1))
     Base.wait(backend.backend_task)
 end
@@ -1236,12 +1317,12 @@ frontend_task = @async begin
         @testset "AST Transformations Async" begin
             put!(backend.repl_channel, (:(1+1), false))
             reply = take!(backend.response_channel)
-            @test reply == (2, false)
+            @test reply == Pair{Any, Bool}(2, false)
             twice(ex) = Expr(:tuple, ex, ex)
             push!(backend.ast_transforms, twice)
             put!(backend.repl_channel, (:(1+1), false))
             reply = take!(backend.response_channel)
-            @test reply == ((2, 2), false)
+            @test reply == Pair{Any, Bool}((2, 2), false)
         end
     catch e
         Base.rethrow(e)
@@ -1255,3 +1336,96 @@ Base.wait(frontend_task)
 macro throw_with_linenumbernode(err)
     Expr(:block, LineNumberNode(42, Symbol("test.jl")), :(() -> throw($err)))
 end
+
+@testset "Install missing packages via hooks" begin
+    @testset "Parse AST for packages" begin
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo, Bar"))
+        @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo, Bar"))
+        @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Foo.bar, Foo.baz"))
+        @test mods == [:Foo]
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo end"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false if false using Foo end end"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo, Bar end"))
+        @test mods == [:Foo, :Bar]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("if false using Foo: bar end"))
+        @test mods == [:Foo]
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("import Foo.bar as baz"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using .Foo"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Base"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Base: nope"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Main"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("using Core"))
+        @test isempty(mods)
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line(":(using Foo)"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("ex = :(using Foo)"))
+        @test isempty(mods)
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("Foo"))
+        @test isempty(mods)
+
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("@eval using Foo"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("begin using Foo; @eval using Bar end"))
+        @test mods == [:Foo]
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("Core.eval(Main,\"using Foo\")"))
+        @test isempty(mods)
+        mods = REPL.modules_to_be_loaded(Base.parse_input_line("begin using Foo; Core.eval(Main,\"using Foo\") end"))
+        @test mods == [:Foo]
+    end
+end
+
+# err should reprint error if deeper than top-level
+fake_repl() do stdin_write, stdout_read, repl
+    repltask = @async begin
+        REPL.run_repl(repl)
+    end
+    # initialize `err` to `nothing`
+    write(stdin_write, "global err = nothing\n")
+    readline(stdout_read)
+    readline(stdout_read) == "\e[0m"
+    readuntil(stdout_read, "julia> ", keep=true)
+    # generate top-level error
+    write(stdin_write, "foobar\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    @test readline(stdout_read) == ""
+    readuntil(stdout_read, "julia> ", keep=true)
+    # check that top-level error did not change `err`
+    write(stdin_write, "err\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0m"
+    readuntil(stdout_read, "julia> ", keep=true)
+    # generate deeper error
+    write(stdin_write, "foo() = foobar\n")
+    readline(stdout_read)
+    readuntil(stdout_read, "julia> ", keep=true)
+    write(stdin_write, "foo()\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0mERROR: UndefVarError: foobar not defined"
+    readuntil(stdout_read, "julia> ", keep=true)
+    # check that deeper error did set `err`
+    write(stdin_write, "err\n")
+    readline(stdout_read)
+    @test readline(stdout_read) == "\e[0m1-element ExceptionStack:"
+    @test readline(stdout_read) == "UndefVarError: foobar not defined"
+    @test readline(stdout_read) == "Stacktrace:"
+    write(stdin_write, '\x04')
+    Base.wait(repltask)
+end
diff --git a/stdlib/REPL/test/replcompletions.jl b/stdlib/REPL/test/replcompletions.jl
index a0d614edce5c16..f156100b1df471 100644
--- a/stdlib/REPL/test/replcompletions.jl
+++ b/stdlib/REPL/test/replcompletions.jl
@@ -7,7 +7,7 @@ using REPL
     @testset "Check symbols previously not shown by REPL.doc_completions()" begin
     symbols = ["?","=","[]","[","]","{}","{","}",";","","'","&&","||","julia","Julia","new","@var_str"]
         for i in symbols
-            @test REPL.doc_completions(i)[1]==i
+            @test i ∈ REPL.doc_completions(i)
         end
     end
 let ex = quote
@@ -31,6 +31,29 @@ let ex = quote
         macro foobar()
             :()
         end
+        macro barfoo(ex)
+            ex
+        end
+        macro error_expanding()
+            error("cannot expand @error_expanding")
+            :()
+        end
+        macro error_lowering_conditional(a)
+            if isa(a, Number)
+                return a
+            end
+            throw(AssertionError("Not a Number"))
+            :()
+        end
+        macro error_throwing()
+            return quote
+                error("@error_throwing throws an error")
+            end
+        end
+
+        primitive type NonStruct 8 end
+        Base.propertynames(::NonStruct) = (:a, :b, :c)
+        x = reinterpret(NonStruct, 0x00)
 
         # Support non-Dict AbstractDicts, #19441
         mutable struct CustomDict{K, V} <: AbstractDict{K, V}
@@ -64,7 +87,22 @@ let ex = quote
         test6()=[a, a]
         test7() = rand(Bool) ? 1 : 1.0
         test8() = Any[1][1]
+        test9(x::Char) = pass
+        test9(x::Char, i::Int) = pass
+
+        test10(a, x::Int...) = pass
+        test10(a::Integer, b::Integer, c) = pass
+        test10(a, y::Bool...) = pass
+        test10(a, d::Integer, z::Signed...) = pass
+        test10(s::String...) = pass
+
+        test11(a::Integer, b, c) = pass
+        test11(u, v::Integer, w) = pass
+        test11(x::Int, y::Int, z) = pass
+        test11(_, _, s::String) = pass
+
         kwtest(; x=1, y=2, w...) = pass
+        kwtest2(a; x=1, y=2, w...) = pass
 
         array = [1, 1]
         varfloat = 0.1
@@ -96,10 +134,12 @@ function map_completion_text(completions)
     return map(completion_text, c), r, res
 end
 
-test_complete(s) = map_completion_text(@inferred(completions(s,lastindex(s))))
-test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s,lastindex(s))))
-test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s,lastindex(s)))[2])
-test_complete_context(s) =  map_completion_text(@inferred(completions(s,lastindex(s),Main.CompletionFoo)))
+test_complete(s) = map_completion_text(@inferred(completions(s, lastindex(s))))
+test_scomplete(s) =  map_completion_text(@inferred(shell_completions(s, lastindex(s))))
+test_bslashcomplete(s) =  map_completion_text(@inferred(bslash_completions(s, lastindex(s)))[2])
+test_complete_context(s, m) =  map_completion_text(@inferred(completions(s,lastindex(s), m)))
+test_complete_foo(s) = test_complete_context(s, Main.CompletionFoo)
+test_complete_noshift(s) = map_completion_text(@inferred(completions(s, lastindex(s), Main, false)))
 
 module M32377 end
 test_complete_32377(s) = map_completion_text(completions(s,lastindex(s), M32377))
@@ -116,6 +156,10 @@ let s = "using REP"
     @test count(isequal("REPL"), c) == 1
     # issue #30234
     @test !Base.isbindingresolved(M32377, :tanh)
+    # check what happens if REPL is already imported
+    M32377.eval(:(using REPL))
+    c, r = test_complete_32377(s)
+    @test count(isequal("REPL"), c) == 1
 end
 
 let s = "Comp"
@@ -292,7 +336,7 @@ end
 
 # test latex symbol completion in getindex expressions (#24705)
 let s = "tuple[\\alpha"
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test c[1] == "α"
     @test r == 7:12
     @test length(c) == 1
@@ -348,11 +392,14 @@ let
 end
 
 # Test completion of methods with input concrete args and args where typeinference determine their type
-let s = "CompletionFoo.test(1,1, "
+let s = "CompletionFoo.test(1, 1, "
     c, r, res = test_complete(s)
     @test !res
     @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
-    @test length(c) == 3
+    @test c[2] == string(first(methods(Main.CompletionFoo.test, Tuple{}))) # corresponding to the vararg
+    @test length(c) == 2
+    # In particular, this checks that test(x::Real, y::Real) is not a valid completion
+    # since it is strictly less specific than test(x::T, y::T) where T
     @test r == 1:18
     @test s[r] == "CompletionFoo.test"
 end
@@ -370,6 +417,7 @@ let s = "CompletionFoo.test(1,1,1,"
     c, r, res = test_complete(s)
     @test !res
     @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Any, Any, Any})))
+    @test length(c) == 1
     @test r == 1:18
     @test s[r] == "CompletionFoo.test"
 end
@@ -483,6 +531,7 @@ let s = "CompletionFoo.test3(@time([1, 2] + CompletionFoo.varfloat),"
 end
 #################################################################
 
+# method completions with kwargs
 let s = "CompletionFoo.kwtest( "
     c, r, res = test_complete(s)
     @test !res
@@ -490,6 +539,209 @@ let s = "CompletionFoo.kwtest( "
     @test occursin("x, y, w...", c[1])
 end
 
+for s in ("CompletionFoo.kwtest(;",
+          "CompletionFoo.kwtest(; x=1, ",
+          "CompletionFoo.kwtest(; kw=1, ",
+          )
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("x, y, w...", c[1])
+end
+
+for s in ("CompletionFoo.kwtest2(1; x=1,",
+          "CompletionFoo.kwtest2(1; kw=1, ",
+          )
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("a; x, y, w...", c[1])
+end
+
+#################################################################
+
+# method completion with `?` (arbitrary method with given argument types)
+let s = "CompletionFoo.?([1,2,3], 2.0)"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("test(x::AbstractArray{T}, y) where T<:Real", c[1])
+    # In particular, this checks that test(args...) is not a valid completion
+    # since it is strictly less specific than test(x::AbstractArray{T}, y)
+end
+
+let s = "CompletionFoo.?([1,2,3], 2.0"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test(x::AbstractArray{T}, y) where T<:Real", str), c)
+    @test  any(str->occursin("test(args...)", str), c)
+    @test !any(str->occursin("test3(x::AbstractArray{Int", str), c)
+    @test !any(str->occursin("test4", str), c)
+end
+
+let s = "CompletionFoo.?('c')"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test9(x::Char)", str), c)
+    @test  any(str->occursin("test10(a, ", str), c)
+    @test !any(str->occursin("test9(x::Char, i::Int", str), c)
+end
+
+let s = "CompletionFoo.?('c'"
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test9(x::Char)", str), c)
+    @test  any(str->occursin("test10(a, ", str), c)
+    @test  any(str->occursin("test9(x::Char, i::Int", str), c)
+end
+
+let s = "CompletionFoo.?(false, \"a\", 3, "
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 2
+    @test occursin("test(args...)", c[1])
+    @test occursin("test11(a::Integer, b, c)", c[2])
+end
+
+let s = "CompletionFoo.?(false, \"a\", 3, "
+    c, r, res = test_complete_noshift(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("test11(a::Integer, b, c)", c[1])
+end
+
+let s = "CompletionFoo.?(\"a\", 3, "
+    c, r, res = test_complete(s)
+    @test !res
+    @test  any(str->occursin("test10(a, x::$Int...)", str), c)
+    @test !any(str->occursin("test10(a, y::Bool...)", str), c)
+    @test !any(str->occursin("test10(s::String...)", str), c)
+end
+
+let s = "CompletionFoo.?()"
+    c, r, res = test_complete(s)
+    @test !res
+    @test any(str->occursin("foo()", str), c)
+    @test any(str->occursin("kwtest(;", str), c)
+    @test any(str->occursin("test(args...)", str), c)
+end
+
+let s = "CompletionFoo.?()"
+    c, r, res = test_complete_noshift(s)
+    @test !res
+    @test length(c) == 1
+    @test occursin("test10(s::String...)", c[1])
+end
+
+#################################################################
+
+# Test method completion with varargs
+let s = "CompletionFoo.test10(z, Integer[]...,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 5
+    @test all(startswith("test10("), c)
+    @test allunique(c)
+end
+
+let s = "CompletionFoo.test10(3, Integer[]...,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 4
+    @test all(startswith("test10("), c)
+    @test allunique(c)
+    @test !any(str->occursin("test10(s::String...)", str), c)
+end
+
+let s = "CompletionFoo.test10(3, 4,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3
+    @test any(str->occursin("test10(a, x::$Int...)", str), c)
+    @test any(str->occursin("test10(a::Integer, b::Integer, c)", str), c)
+    @test any(str->occursin("test10(a, d::Integer, z::Signed...)", str), c)
+end
+
+let s = "CompletionFoo.test10(3, 4, 5,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3
+    @test any(str->occursin("test10(a, x::$Int...)", str), c)
+    @test any(str->occursin("test10(a::Integer, b::Integer, c)", str), c) # show it even though the call would result in an ambiguity error
+    @test any(str->occursin("test10(a, d::Integer, z::Signed...)", str), c)
+    # the last one is not eliminated by specificity since the complete call could be
+    # test10(3, 4, 5, Int8(6)) for instance
+end
+
+let s = "CompletionFoo.test10(z, z, 0, "
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3
+    @test any(str->occursin("test10(a, x::$Int...)", str), c)
+    @test any(str->occursin("test10(a::Integer, b::Integer, c)", str), c) # show it even though the call would result in an ambiguity error
+    @test any(str->occursin("test10(a, d::Integer, z::Signed...)", str), c)
+end
+
+let s = "CompletionFoo.test10(\"a\", Union{Signed,Bool,String}[3][1], "
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 4
+    @test all(startswith("test10("), c)
+    @test allunique(c)
+    @test !any(str->occursin("test10(a::Integer, b::Integer, c)", str), c)
+end
+
+# Test method completion with ambiguity
+let s = "CompletionFoo.test11(Integer[false][1], Integer[14][1], "
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 4
+    @test all(startswith("test11("), c)
+    @test allunique(c)
+end
+
+let s = "CompletionFoo.test11(Integer[-7][1], Integer[0x6][1], 6,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3
+    @test any(str->occursin("test11(a::Integer, b, c)", str), c)
+    @test any(str->occursin("test11(u, v::Integer, w)", str), c)
+    @test any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
+end
+
+let s = "CompletionFoo.test11(3, 4,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 4
+    @test any(str->occursin("test11(x::$Int, y::$Int, z)", str), c)
+    @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
+end
+
+let s = "CompletionFoo.test11(0x8, 5,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 3
+    @test any(str->occursin("test11(a::Integer, b, c)", str), c)
+    @test any(str->occursin("test11(u, v::Integer, w)", str), c)
+    @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
+end
+
+let s = "CompletionFoo.test11(0x8, 'c',"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 2
+    @test any(str->occursin("test11(a::Integer, b, c)", str), c)
+    @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
+end
+
+let s = "CompletionFoo.test11('d', 3,"
+    c, r, res = test_complete(s)
+    @test !res
+    @test length(c) == 2
+    @test any(str->occursin("test11(u, v::Integer, w)", str), c)
+    @test any(str->occursin("test11(::Any, ::Any, s::String)", str), c)
+end
+
 # Test of inference based getfield completion
 let s = "(1+2im)."
     c,r = test_complete(s)
@@ -642,11 +894,14 @@ let s, c, r
     @test s[r] == "tmp"
 
     # This should match things that are inside the tmp directory
-    if !isdir("/tmp/tmp")
-        s = "/tmp/"
+    s = tempdir()
+    if !endswith(s, "/")
+        s = string(s, "/")
+    end
+    if !isdir(joinpath(s, "tmp"))
         c,r = test_scomplete(s)
         @test !("tmp/" in c)
-        @test r === 6:5
+        @test r === length(s) + 1:0
         @test s[r] == ""
     end
 
@@ -678,20 +933,22 @@ let s, c, r
     end
 
     # Tests homedir expansion
-    let path, s, c, r
-        path = homedir()
-        dir = joinpath(path, "tmpfoobar")
-        mkdir(dir)
-        s = "\"" * path * "/tmpfoob"
-        c,r = test_complete(s)
-        @test "tmpfoobar/" in c
-        l = 3 + length(path)
-        @test r == l:l+6
-        @test s[r] == "tmpfoob"
-        s = "\"~"
-        @test "tmpfoobar/" in c
-        c,r = test_complete(s)
-        rm(dir)
+    mktempdir() do tmphome
+        withenv("HOME" => tmphome, "USERPROFILE" => tmphome) do
+            path = homedir()
+            dir = joinpath(path, "tmpfoobar")
+            mkdir(dir)
+            s = "\"" * path * "/tmpfoob"
+            c,r = test_complete(s)
+            @test "tmpfoobar/" in c
+            l = 3 + length(path)
+            @test r == l:l+6
+            @test s[r] == "tmpfoob"
+            s = "\"~"
+            @test "tmpfoobar/" in c
+            c,r = test_complete(s)
+            rm(dir)
+        end
     end
 
     # Tests detecting of files in the env path (in shell mode)
@@ -748,6 +1005,45 @@ let s, c, r
 end
 end
 
+#test that it does not crash on files for which `stat` errors
+let current_dir, forbidden
+    # Issue #36855
+    if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
+        mktempdir() do path
+            selfsymlink = joinpath(path, "selfsymlink")
+            symlink(selfsymlink, selfsymlink)
+            @test try
+                stat(selfsymlink) # should crash with a IOError
+                false
+            catch e
+                e isa Base.IOError && occursin("ELOOP", e.msg)
+            end
+            c, r = test_complete("\"$(joinpath(path, "selfsym"))")
+            @test c == ["selfsymlink"]
+        end
+    end
+
+    # Issue #32797
+    forbidden = Sys.iswindows() ? "C:\\S" : "/root/x"
+    test_complete(forbidden); @test true # simply check that it did not crash
+
+     # Issue #19310
+    if Sys.iswindows()
+        current_dir = pwd()
+        cd("C:\\")
+        test_complete("C"); @test true
+        test_complete("C:"); @test true
+        test_complete("C:\\"); @test true
+        if isdir("D:\\")
+            cd("D:\\")
+            test_complete("C"); @test true
+            test_complete("C:"); @test true
+            test_complete("C:\\"); @test true
+        end
+        cd(current_dir)
+    end
+end
+
 #test that it can auto complete with spaces in file/path
 mktempdir() do path
     space_folder = randstring() * " α"
@@ -860,6 +1156,17 @@ let s = "CompletionFoo.tuple."
     @test isempty(c)
 end
 
+@testset "sub/superscripts" begin
+    @test "⁽¹²³⁾ⁿ" in test_complete("\\^(123)n")[1]
+    @test "ⁿ" in test_complete("\\^n")[1]
+    @test "ᵞ" in test_complete("\\^gamma")[1]
+    @test isempty(test_complete("\\^(123)nq")[1])
+    @test "₍₁₂₃₎ₙ" in test_complete("\\_(123)n")[1]
+    @test "ₙ" in test_complete("\\_n")[1]
+    @test "ᵧ" in test_complete("\\_gamma")[1]
+    @test isempty(test_complete("\\_(123)nq")[1])
+end
+
 # test Dicts
 function test_dict_completion(dict_name)
     s = "$dict_name[\"ab"
@@ -946,13 +1253,13 @@ end
 
 # No CompletionFoo.CompletionFoo
 let s = ""
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test !("CompletionFoo" in c)
 end
 
 # Can see `rand()` after `using Random`
 let s = "r"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "rand" in c
     @test r == 1:1
     @test s[r] == "r"
@@ -960,7 +1267,7 @@ end
 
 # Can see `Test.AbstractTestSet` after `import Test`
 let s = "Test.A"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "AbstractTestSet" in c
     @test r == 6:6
     @test s[r] == "A"
@@ -968,21 +1275,21 @@ end
 
 # Can complete relative import
 let s = "import ..M"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test_broken "Main" in c
     @test r == 10:10
     @test s[r] == "M"
 end
 
 let s = ""
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "bar" in c
     @test r === 1:0
     @test s[r] == ""
 end
 
 let s = "f"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "foo" in c
     @test r == 1:1
     @test s[r] == "f"
@@ -990,7 +1297,7 @@ let s = "f"
 end
 
 let s = "@f"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "@foobar" in c
     @test r == 1:2
     @test s[r] == "@f"
@@ -998,57 +1305,60 @@ let s = "@f"
 end
 
 let s = "type_test.x"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "xx" in c
     @test r == 11:11
     @test s[r] == "x"
 end
 
 let s = "bar.no_val_available"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test length(c)==0
 end
 
 let s = "type_test.xx.y"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "yy" in c
     @test r == 14:14
     @test s[r] == "y"
 end
 
 let s = ":(function foo(::Int) end).args[1].args[2]."
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test c == Any[]
 end
 
 let s = "log(log.(x),"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test !isempty(c)
 end
 
 let s = "Base.return_types(getin"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test "getindex" in c
     @test r == 19:23
     @test s[r] == "getin"
 end
 
 let s = "using Test, Random"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test !("RandomDevice" in c)
 end
 
 let s = "test(1,1, "
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test !res
     @test c[1] == string(first(methods(Main.CompletionFoo.test, Tuple{Int, Int})))
-    @test length(c) == 3
+    @test c[2] == string(first(methods(Main.CompletionFoo.test, Tuple{})))  # corresponding to the vararg
+    @test length(c) == 2
+    # In particular, this checks that test(x::Real, y::Real) is not a valid completion
+    # since it is strictly less specific than test(x::T, y::T) where T
     @test r == 1:4
     @test s[r] == "test"
 end
 
 let s = "test.(1,1, "
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test !res
     @test length(c) == 4
     @test r == 1:4
@@ -1056,7 +1366,7 @@ let s = "test.(1,1, "
 end
 
 let s = "prevind(\"θ\",1,"
-    c, r, res = test_complete_context(s)
+    c, r, res = test_complete_foo(s)
     @test c[1] == string(first(methods(prevind, Tuple{String, Int})))
     @test r == 1:7
     @test s[r] == "prevind"
@@ -1064,11 +1374,87 @@ end
 
 # Issue #32840
 let s = "typeof(+)."
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test length(c) == length(fieldnames(DataType))
 end
 
 let s = "test_dict[\"ab"
-    c, r = test_complete_context(s)
+    c, r = test_complete_foo(s)
     @test c == Any["\"abc\"", "\"abcd\""]
 end
+
+let s = "CompletionFoo.x."
+    c, r = test_complete(s)
+    @test "a" in c
+end
+
+# https://github.com/JuliaLang/julia/issues/27184
+let
+    (test_complete("@noexist."); @test true)
+    (test_complete("Main.@noexist."); @test true)
+    (test_complete("@Main.noexist."); @test true)
+end
+
+let # Check that completion does not crash on (possibly invalid) macro calls
+    (test_complete("@show."); @test true)
+    (test_complete("@macroexpand."); @test true)
+    (test_complete("@.."); @test true)
+    (test_complete("CompletionFoo.@foobar."); @test true)
+    (test_complete("CompletionFoo.@foobar()."); @test true)
+    (test_complete("CompletionFoo.@foobar(4)."); @test true)
+    (test_complete("CompletionFoo.@barfoo."); @test true)
+    (test_complete("CompletionFoo.@barfoo()."); @test true)
+    (test_complete("CompletionFoo.@barfoo(6)."); @test true)
+    (test_complete("CompletionFoo.@error_expanding."); @test true)
+    (test_complete("CompletionFoo.@error_expanding()."); @test true)
+    (test_complete("CompletionFoo.@error_lowering_conditional."); @test true)
+    (test_complete("CompletionFoo.@error_lowering_conditional()."); @test true)
+    (test_complete("CompletionFoo.@error_lowering_conditional(3)."); @test true)
+    (test_complete("CompletionFoo.@error_lowering_conditional('a')."); @test true)
+    (test_complete("CompletionFoo.@error_throwing."); @test true)
+    (test_complete("CompletionFoo.@error_throwing()."); @test true)
+end
+
+@testset "https://github.com/JuliaLang/julia/issues/40247" begin
+    # getfield type completion can work for complicated expression
+
+    let
+        m = Module()
+        @eval m begin
+            struct Rs
+                rs::Vector{Regex}
+            end
+            var = nothing
+            function foo()
+                global var = 1
+                return Rs([r"foo"])
+            end
+        end
+
+        c, r = test_complete_context("foo(#=#==#=##==#).rs[1].", m)
+        @test m.var === nothing # getfield type completion should never execute `foo()`
+        @test length(c) == fieldcount(Regex)
+    end
+
+    let
+        m = Module()
+        @eval m begin
+            struct R
+                r::Regex
+            end
+            var = nothing
+            function foo()
+                global var = 1
+                return R(r"foo")
+            end
+        end
+
+        c, r = test_complete_context("foo().r.", m)
+        @test m.var === nothing # getfield type completion should never execute `foo()`
+        @test length(c) == fieldcount(Regex)
+
+        c, r = test_complete_context("foo(#=#=# =#= =#).r.", m)
+        @test m.var === nothing # getfield type completion should never execute `foo()`
+        @test length(c) == fieldcount(Regex)
+    end
+end
diff --git a/stdlib/Random/Project.toml b/stdlib/Random/Project.toml
index 6aa9f653745399..199dcab940c868 100644
--- a/stdlib/Random/Project.toml
+++ b/stdlib/Random/Project.toml
@@ -3,12 +3,14 @@ uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 
 [deps]
 Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
 
 [extras]
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 Future = "9fa8497b-333b-5362-9e8d-4d0656e87820"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
 
 [targets]
-test = ["Test", "SparseArrays", "LinearAlgebra", "Future"]
+test = ["Test", "SparseArrays", "LinearAlgebra", "Future", "Statistics"]
diff --git a/stdlib/Random/docs/src/index.md b/stdlib/Random/docs/src/index.md
index 0de918f10a1af7..0f7636cf2444f5 100644
--- a/stdlib/Random/docs/src/index.md
+++ b/stdlib/Random/docs/src/index.md
@@ -4,18 +4,25 @@
 DocTestSetup = :(using Random)
 ```
 
-Random number generation in Julia uses the [Mersenne Twister library](http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/SFMT/#dSFMT)
-via `MersenneTwister` objects. Julia has a global RNG, which is used by default. Other RNG types
-can be plugged in by inheriting the `AbstractRNG` type; they can then be used to have multiple
-streams of random numbers. Besides `MersenneTwister`, Julia also provides the `RandomDevice` RNG
-type, which is a wrapper over the OS provided entropy.
-
-Most functions related to random generation accept an optional `AbstractRNG` object as first argument,
-which defaults to the global one if not provided. Moreover, some of them accept optionally
-dimension specifications `dims...` (which can be given as a tuple) to generate arrays of random
-values.
-
-A `MersenneTwister` or `RandomDevice` RNG can generate uniformly random numbers of the following types:
+Random number generation in Julia uses the [Xoshiro256++](https://prng.di.unimi.it/) algorithm
+by default, with per-`Task` state.
+Other RNG types can be plugged in by inheriting the `AbstractRNG` type; they can then be used to
+obtain multiple streams of random numbers.
+
+The PRNGs (pseudorandom number generators) exported by the `Random` package are:
+* `TaskLocalRNG`: a token that represents use of the currently active Task-local stream, deterministically seeded from the parent task, or by `RandomDevice` (with system randomness) at program start
+* `Xoshiro`: generates a high-quality stream of random numbers with a small state vector and high performance using the Xoshiro256++ algorithm
+* `RandomDevice`: for OS-provided entropy. This may be used for cryptographically secure random numbers (CS(P)RNG).
+* `MersenneTwister`: an alternate high-quality PRNG which was the default in older versions of Julia, and is also quite fast, but requires much more space to store the state vector and generate a random sequence.
+
+Most functions related to random generation accept an optional `AbstractRNG` object as first argument.
+Some also accept dimension specifications `dims...` (which can also be given as a tuple) to generate
+arrays of random values.
+In a multi-threaded program, you should generally use different RNG objects from different threads
+or tasks in order to be thread-safe. However, the default RNG is thread-safe as of Julia 1.3
+(using a per-thread RNG up to version 1.6, and per-task thereafter).
+
+The provided RNGs can generate uniform random numbers of the following types:
 [`Float16`](@ref), [`Float32`](@ref), [`Float64`](@ref), [`BigFloat`](@ref), [`Bool`](@ref),
 [`Int8`](@ref), [`UInt8`](@ref), [`Int16`](@ref), [`UInt16`](@ref), [`Int32`](@ref),
 [`UInt32`](@ref), [`Int64`](@ref), [`UInt64`](@ref), [`Int128`](@ref), [`UInt128`](@ref),
@@ -63,8 +70,11 @@ Random.shuffle!
 ## Generators (creation and seeding)
 
 ```@docs
+Random.default_rng
 Random.seed!
 Random.AbstractRNG
+Random.TaskLocalRNG
+Random.Xoshiro
 Random.MersenneTwister
 Random.RandomDevice
 ```
@@ -145,22 +155,22 @@ Scalar and array methods for `Die` now work as expected:
 
 ```jldoctest Die; setup = :(Random.seed!(1))
 julia> rand(Die)
-Die(10)
+Die(5)
 
 julia> rand(MersenneTwister(0), Die)
-Die(16)
+Die(11)
 
 julia> rand(Die, 3)
 3-element Vector{Die}:
- Die(5)
- Die(20)
  Die(9)
+ Die(15)
+ Die(14)
 
 julia> a = Vector{Die}(undef, 3); rand!(a)
 3-element Vector{Die}:
- Die(11)
- Die(20)
- Die(10)
+ Die(19)
+ Die(7)
+ Die(17)
 ```
 
 #### A simple sampler without pre-computed data
@@ -173,13 +183,13 @@ In order to define random generation out of objects of type `S`, the following m
 julia> Random.rand(rng::AbstractRNG, d::Random.SamplerTrivial{Die}) = rand(rng, 1:d[].nsides);
 
 julia> rand(Die(4))
-2
+1
 
 julia> rand(Die(4), 3)
 3-element Vector{Any}:
- 1
- 4
  2
+ 3
+ 3
 ```
 
 Given a collection type `S`, it's currently assumed that if `rand(::S)` is defined, an object of type `eltype(S)` will be produced. In the last example, a `Vector{Any}` is produced; the reason is that `eltype(Die) == Any`. The remedy is to define `Base.eltype(::Type{Die}) = Int`.
@@ -332,11 +342,25 @@ DocTestSetup = nothing
 
 # Reproducibility
 
-By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom number sequence when running your program multiple times.  However, a minor release of Julia (e.g. 1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed.  (Even if the sequence produced by a low-level function like [`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may change due to algorithm updates.)   Rationale: guaranteeing that pseudorandom streams never change prohibits many algorithmic improvements.
-
-If you need to guarantee exact reproducibility of random data, it is advisable to simply *save the data* (e.g. as a supplementary attachment in a scientific publication).  (You can also, of course, specify a
-particular Julia version and package manifest, especially if you require bit reproducibility.)
-
-Software tests that rely on *specific* "random" data should also generally save the data or embed it into the test code.  On the other hand, tests that should pass for *most* random data (e.g. testing `A \ (A*x) ≈ x` for a random matrix `A = randn(n,n)`) can use an RNG with a fixed seed to ensure that simply running the test many times does not encounter a failure due to very improbable data (e.g. an extremely ill-conditioned matrix).
-
-The statistical *distribution* from which random samples are drawn *is* guaranteed to be the same across any minor Julia releases.
+By using an RNG parameter initialized with a given seed, you can reproduce the same pseudorandom
+number sequence when running your program multiple times. However, a minor release of Julia (e.g.
+1.3 to 1.4) *may change* the sequence of pseudorandom numbers generated from a specific seed, in
+particular if `MersenneTwister` is used. (Even if the sequence produced by a low-level function like
+[`rand`](@ref) does not change, the output of higher-level functions like [`randsubseq`](@ref) may
+change due to algorithm updates.) Rationale: guaranteeing that pseudorandom streams never change
+prohibits many algorithmic improvements.
+
+If you need to guarantee exact reproducibility of random data, it is advisable to simply *save the
+data* (e.g. as a supplementary attachment in a scientific publication). (You can also, of course,
+specify a particular Julia version and package manifest, especially if you require bit
+reproducibility.)
+
+Software tests that rely on *specific* "random" data should also generally either save the data,
+embed it into the test code, or use third-party packages like
+[StableRNGs.jl](https://github.com/JuliaRandom/StableRNGs.jl). On the other hand, tests that should
+pass for *most* random data (e.g. testing `A \ (A*x) ≈ x` for a random matrix `A = randn(n,n)`) can
+use an RNG with a fixed seed to ensure that simply running the test many times does not encounter a
+failure due to very improbable data (e.g. an extremely ill-conditioned matrix).
+
+The statistical *distribution* from which random samples are drawn *is* guaranteed to be the same
+across any minor Julia releases.
diff --git a/stdlib/Random/src/RNGs.jl b/stdlib/Random/src/RNGs.jl
index 281acad533dad9..115034d3e39882 100644
--- a/stdlib/Random/src/RNGs.jl
+++ b/stdlib/Random/src/RNGs.jl
@@ -2,60 +2,6 @@
 
 ## RandomDevice
 
-# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...}
-SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...}
-const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...)
-
-if Sys.iswindows()
-    struct RandomDevice <: AbstractRNG
-        buffer::Vector{UInt128}
-
-        RandomDevice() = new(Vector{UInt128}(undef, 1))
-    end
-
-    function rand(rd::RandomDevice, sp::SamplerBoolBitInteger)
-        rand!(rd, rd.buffer)
-        @inbounds return rd.buffer[1] % sp[]
-    end
-else # !windows
-    struct RandomDevice <: AbstractRNG
-        unlimited::Bool
-
-        RandomDevice(; unlimited::Bool=true) = new(unlimited)
-    end
-
-    rand(rd::RandomDevice, sp::SamplerBoolBitInteger) = read(getfile(rd), sp[])
-    rand(rd::RandomDevice, ::SamplerType{Bool}) = read(getfile(rd), UInt8) % Bool
-
-    function getfile(rd::RandomDevice)
-        devrandom = rd.unlimited ? DEV_URANDOM : DEV_RANDOM
-        # TODO: there is a data-race, this can leak up to nthreads() copies of the file descriptors,
-        # so use a "thread-once" utility once available
-        isassigned(devrandom) || (devrandom[] = open(rd.unlimited ? "/dev/urandom" : "/dev/random"))
-        devrandom[]
-    end
-
-    const DEV_RANDOM  = Ref{IOStream}()
-    const DEV_URANDOM = Ref{IOStream}()
-
-end # os-test
-
-# NOTE: this can't be put within the if-else block above
-for T in (Bool, BitInteger_types...)
-    if Sys.iswindows()
-        @eval function rand!(rd::RandomDevice, A::Array{$T}, ::SamplerType{$T})
-            Base.windowserror("SystemFunction036 (RtlGenRandom)", 0 == ccall(
-                (:SystemFunction036, :Advapi32), stdcall, UInt8, (Ptr{Cvoid}, UInt32),
-                  A, sizeof(A)))
-            A
-        end
-    else
-        @eval rand!(rd::RandomDevice, A::Array{$T}, ::SamplerType{$T}) = read!(getfile(rd), A)
-    end
-end
-
-# RandomDevice produces natively UInt64
-rng_native_52(::RandomDevice) = UInt64
 
 """
     RandomDevice()
@@ -64,11 +10,31 @@ Create a `RandomDevice` RNG object.
 Two such objects will always generate different streams of random numbers.
 The entropy is obtained from the operating system.
 """
-RandomDevice
-
-RandomDevice(::Nothing) = RandomDevice()
+struct RandomDevice <: AbstractRNG; end
+RandomDevice(seed::Nothing) = RandomDevice()
 seed!(rng::RandomDevice) = rng
 
+rand(rd::RandomDevice, sp::SamplerBoolBitInteger) = Libc.getrandom!(Ref{sp[]}())[]
+rand(rd::RandomDevice, ::SamplerType{Bool}) = rand(rd, UInt8) % Bool
+function rand!(rd::RandomDevice, A::Array{Bool}, ::SamplerType{Bool})
+    Libc.getrandom!(A)
+    # we need to mask the result so that only the LSB in each byte can be non-zero
+    GC.@preserve A begin
+        p = Ptr{UInt8}(pointer(A))
+        for i = 1:length(A)
+            unsafe_store!(p, unsafe_load(p) & 0x1)
+            p += 1
+        end
+    end
+    return A
+end
+for T in BitInteger_types
+    @eval rand!(rd::RandomDevice, A::Array{$T}, ::SamplerType{$T}) = Libc.getrandom!(A)
+end
+
+# RandomDevice produces natively UInt64
+rng_native_52(::RandomDevice) = UInt64
+
 
 ## MersenneTwister
 
@@ -85,14 +51,22 @@ mutable struct MersenneTwister <: AbstractRNG
     idxF::Int
     idxI::Int
 
-    function MersenneTwister(seed, state, vals, ints, idxF, idxI)
+    # counters for show
+    adv::Int64          # state of advance at the DSFMT_state level
+    adv_jump::BigInt    # number of skipped Float64 values via randjump
+    adv_vals::Int64     # state of advance when vals is filled-up
+    adv_ints::Int64     # state of advance when ints is filled-up
+
+    function MersenneTwister(seed, state, vals, ints, idxF, idxI,
+                             adv, adv_jump, adv_vals, adv_ints)
         length(vals) == MT_CACHE_F && 0 <= idxF <= MT_CACHE_F ||
             throw(DomainError((length(vals), idxF),
                       "`length(vals)` and `idxF` must be consistent with $MT_CACHE_F"))
         length(ints) == MT_CACHE_I >> 4 && 0 <= idxI <= MT_CACHE_I ||
             throw(DomainError((length(ints), idxI),
                       "`length(ints)` and `idxI` must be consistent with $MT_CACHE_I"))
-        new(seed, state, vals, ints, idxF, idxI)
+        new(seed, state, vals, ints, idxF, idxI,
+            adv, adv_jump, adv_vals, adv_ints)
     end
 end
 
@@ -100,7 +74,7 @@ MersenneTwister(seed::Vector{UInt32}, state::DSFMT_state) =
     MersenneTwister(seed, state,
                     Vector{Float64}(undef, MT_CACHE_F),
                     Vector{UInt128}(undef, MT_CACHE_I >> 4),
-                    MT_CACHE_F, 0)
+                    MT_CACHE_F, 0, 0, 0, -1, -1)
 
 """
     MersenneTwister(seed)
@@ -147,12 +121,16 @@ function copy!(dst::MersenneTwister, src::MersenneTwister)
     copyto!(dst.ints, src.ints)
     dst.idxF = src.idxF
     dst.idxI = src.idxI
+    dst.adv = src.adv
+    dst.adv_jump = src.adv_jump
+    dst.adv_vals = src.adv_vals
+    dst.adv_ints = src.adv_ints
     dst
 end
 
 copy(src::MersenneTwister) =
     MersenneTwister(copy(src.seed), copy(src.state), copy(src.vals), copy(src.ints),
-                    src.idxF, src.idxI)
+                    src.idxF, src.idxI, src.adv, src.adv_jump, src.adv_vals, src.adv_ints)
 
 
 ==(r1::MersenneTwister, r2::MersenneTwister) =
@@ -164,17 +142,46 @@ copy(src::MersenneTwister) =
 hash(r::MersenneTwister, h::UInt) =
     foldr(hash, (r.seed, r.state, r.vals, r.ints, r.idxF, r.idxI); init=h)
 
-function fillcache_zeros!(r::MersenneTwister)
-    # the use of this function is not strictly necessary, but it makes
-    # comparing two MersenneTwister RNGs easier
+function show(io::IO, rng::MersenneTwister)
+    # seed
+    seed = from_seed(rng.seed)
+    seed_str = seed <= typemax(Int) ? string(seed) : "0x" * string(seed, base=16) # DWIM
+    if rng.adv_jump == 0 && rng.adv == 0
+        return print(io, MersenneTwister, "(", seed_str, ")")
+    end
+    print(io, MersenneTwister, "(", seed_str, ", (")
+    # state
+    adv = Integer[rng.adv_jump, rng.adv]
+    if rng.adv_vals != -1 || rng.adv_ints != -1
+        if rng.adv_vals == -1
+            @assert rng.idxF == MT_CACHE_F
+            push!(adv, 0, 0) # "(0, 0)" is nicer on the eyes than (-1, 1002)
+        else
+            push!(adv, rng.adv_vals, rng.idxF)
+        end
+    end
+    if rng.adv_ints != -1
+        idxI = (length(rng.ints)*16 - rng.idxI) / 8 # 8 represents one Int64
+        idxI = Int(idxI) # idxI should always be an integer when using public APIs
+        push!(adv, rng.adv_ints, idxI)
+    end
+    join(io, adv, ", ")
+    print(io, "))")
+end
+
+### low level API
+
+function reset_caches!(r::MersenneTwister)
+    # zeroing the caches makes comparing two MersenneTwister RNGs easier
     fill!(r.vals, 0.0)
     fill!(r.ints, zero(UInt128))
+    mt_setempty!(r)
+    mt_setempty!(r, UInt128)
+    r.adv_vals = -1
+    r.adv_ints = -1
     r
 end
 
-
-### low level API
-
 #### floats
 
 mt_avail(r::MersenneTwister) = MT_CACHE_F - r.idxF
@@ -183,8 +190,9 @@ mt_setfull!(r::MersenneTwister) = r.idxF = 0
 mt_setempty!(r::MersenneTwister) = r.idxF = MT_CACHE_F
 mt_pop!(r::MersenneTwister) = @inbounds return r.vals[r.idxF+=1]
 
-function gen_rand(r::MersenneTwister)
-    GC.@preserve r dsfmt_fill_array_close1_open2!(r.state, pointer(r.vals), length(r.vals))
+@noinline function gen_rand(r::MersenneTwister)
+    r.adv_vals = r.adv
+    GC.@preserve r fill_array!(r, pointer(r.vals), length(r.vals), CloseOpen12())
     mt_setfull!(r)
 end
 
@@ -212,7 +220,34 @@ mt_avail(r::MersenneTwister, ::Type{T}) where {T<:BitInteger} =
     r.idxI >> logsizeof(T)
 
 function mt_setfull!(r::MersenneTwister, ::Type{<:BitInteger})
-    rand!(r, r.ints)
+    r.adv_ints = r.adv
+    ints = r.ints
+
+    @assert length(ints) == 501
+    # dSFMT natively randomizes 52 out of 64 bits of each UInt64 words,
+    # i.e. 12 bits are missing;
+    # by generating 5 words == 5*52 == 260 bits, we can fully
+    # randomize 4 UInt64 = 256 bits; IOW, at the array level, we must
+    # randomize ceil(501*1.25) = 627 UInt128 words (with 2*52 bits each),
+    # which we then condense into fully randomized 501 UInt128 words
+
+    len = 501 + 126 # 126 == ceil(501 / 4)
+    resize!(ints, len)
+    p = pointer(ints) # must be *after* resize!
+    GC.@preserve r fill_array!(r, Ptr{Float64}(p), len*2, CloseOpen12_64())
+
+    k = 501
+    n = 0
+    @inbounds while n != 500
+        u = ints[k+=1]
+        ints[n+=1] ⊻= u << 48
+        ints[n+=1] ⊻= u << 36
+        ints[n+=1] ⊻= u << 24
+        ints[n+=1] ⊻= u << 12
+    end
+    @assert k == len - 1
+    @inbounds ints[501] ⊻= ints[len] << 48
+    resize!(ints, 501)
     r.idxI = MT_CACHE_I
 end
 
@@ -232,12 +267,11 @@ function mt_pop!(r::MersenneTwister, ::Type{T}) where T<:BitInteger
     (x128 >> (i128 * (sizeof(T) << 3))) % T
 end
 
-# not necessary, but very slightly more efficient
 function mt_pop!(r::MersenneTwister, ::Type{T}) where {T<:Union{Int128,UInt128}}
     reserve1(r, T)
-    @inbounds res = r.ints[r.idxI >> 4]
-    r.idxI -= 16
-    res % T
+    idx = r.idxI >> 4
+    r.idxI = idx << 4 - 16
+    @inbounds r.ints[idx] % T
 end
 
 
@@ -249,18 +283,10 @@ end
 function make_seed()
     try
         return rand(RandomDevice(), UInt32, 4)
-    catch
-        println(stderr,
-                "Entropy pool not available to seed RNG; using ad-hoc entropy sources.")
-        seed = reinterpret(UInt64, time())
-        seed = hash(seed, UInt64(getpid()))
-        try
-            seed = hash(seed, parse(UInt64,
-                                    read(pipeline(`ifconfig`, `sha1sum`), String)[1:40],
-                                    base = 16))
-        catch
-        end
-        return make_seed(seed)
+    catch ex
+        ex isa IOError || rethrow()
+        @warn "Entropy pool not available to seed RNG; using ad-hoc entropy sources."
+        return make_seed(Libc.rand())
     end
 end
 
@@ -276,64 +302,73 @@ function make_seed(n::Integer)
     end
 end
 
+# inverse of make_seed(::Integer)
+from_seed(a::Vector{UInt32})::BigInt = sum(a[i] * big(2)^(32*(i-1)) for i in 1:length(a))
+
+
 #### seed!()
 
 function seed!(r::MersenneTwister, seed::Vector{UInt32})
     copyto!(resize!(r.seed, length(seed)), seed)
     dsfmt_init_by_array(r.state, r.seed)
-    mt_setempty!(r)
-    mt_setempty!(r, UInt128)
-    fillcache_zeros!(r)
+    reset_caches!(r)
+    r.adv = 0
+    r.adv_jump = 0
     return r
 end
 
-seed!(r::MersenneTwister=default_rng()) = seed!(r, make_seed())
+seed!(r::MersenneTwister) = seed!(r, make_seed())
 seed!(r::MersenneTwister, n::Integer) = seed!(r, make_seed(n))
-seed!(seed::Union{Integer,Vector{UInt32}}) = seed!(default_rng(), seed)
 
 
 ### Global RNG
 
-const THREAD_RNGs = MersenneTwister[]
-@inline default_rng() = default_rng(Threads.threadid())
-@noinline function default_rng(tid::Int)
-    0 < tid <= length(THREAD_RNGs) || _rng_length_assert()
-    if @inbounds isassigned(THREAD_RNGs, tid)
-        @inbounds MT = THREAD_RNGs[tid]
-    else
-        MT = MersenneTwister()
-        @inbounds THREAD_RNGs[tid] = MT
-    end
-    return MT
+struct _GLOBAL_RNG <: AbstractRNG
+    global const GLOBAL_RNG = _GLOBAL_RNG.instance
 end
-@noinline _rng_length_assert() =  @assert false "0 < tid <= length(THREAD_RNGs)"
 
-function __init__()
-    resize!(empty!(THREAD_RNGs), Threads.nthreads()) # ensures that we didn't save a bad object
-end
+# GLOBAL_RNG currently uses TaskLocalRNG
+typeof_rng(::_GLOBAL_RNG) = TaskLocalRNG
 
+"""
+    default_rng() -> rng
 
-struct _GLOBAL_RNG <: AbstractRNG
-    global const GLOBAL_RNG = _GLOBAL_RNG.instance
-end
+Return the default global random number generator (RNG).
 
-# GLOBAL_RNG currently represents a MersenneTwister
-typeof_rng(::_GLOBAL_RNG) = MersenneTwister
+!!! note
+    What the default RNG is is an implementation detail.  Across different versions of
+    Julia, you should not expect the default RNG to be always the same, nor that it will
+    return the same stream of random numbers for a given seed.
 
-copy!(dst::MersenneTwister, ::_GLOBAL_RNG) = copy!(dst, default_rng())
-copy!(::_GLOBAL_RNG, src::MersenneTwister) = copy!(default_rng(), src)
+!!! compat "Julia 1.3"
+    This function was introduced in Julia 1.3.
+"""
+@inline default_rng() = TaskLocalRNG()
+@inline default_rng(tid::Int) = TaskLocalRNG()
+
+copy!(dst::Xoshiro, ::_GLOBAL_RNG) = copy!(dst, default_rng())
+copy!(::_GLOBAL_RNG, src::Xoshiro) = copy!(default_rng(), src)
 copy(::_GLOBAL_RNG) = copy(default_rng())
 
-seed!(::_GLOBAL_RNG, seed::Vector{UInt32}) = seed!(default_rng(), seed)
-seed!(::_GLOBAL_RNG, n::Integer) = seed!(default_rng(), n)
-seed!(::_GLOBAL_RNG, ::Nothing) = seed!(default_rng(), nothing)
-seed!(::_GLOBAL_RNG) = seed!(default_rng(), nothing)
+GLOBAL_SEED = 0
+set_global_seed!(seed) = global GLOBAL_SEED = seed
+
+function seed!(::_GLOBAL_RNG, seed=rand(RandomDevice(), UInt64, 4))
+    global GLOBAL_SEED = seed
+    seed!(default_rng(), seed)
+end
+
+seed!(rng::_GLOBAL_RNG, ::Nothing) = seed!(rng)  # to resolve ambiguity
+
+seed!(seed::Union{Nothing,Integer,Vector{UInt32},Vector{UInt64}}=nothing) =
+    seed!(GLOBAL_RNG, seed)
 
 rng_native_52(::_GLOBAL_RNG) = rng_native_52(default_rng())
 rand(::_GLOBAL_RNG, sp::SamplerBoolBitInteger) = rand(default_rng(), sp)
 for T in (:(SamplerTrivial{UInt52Raw{UInt64}}),
           :(SamplerTrivial{UInt2x52Raw{UInt128}}),
           :(SamplerTrivial{UInt104Raw{UInt128}}),
+          :(SamplerTrivial{CloseOpen01_64}),
           :(SamplerTrivial{CloseOpen12_64}),
           :(SamplerUnion(Int64, UInt64, Int128, UInt128)),
           :(SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)),
@@ -351,6 +386,10 @@ for T in BitInteger_types
     @eval rand!(::_GLOBAL_RNG, A::Array{$T}, I::SamplerType{$T}) = rand!(default_rng(), A, I)
 end
 
+function __init__()
+    seed!(GLOBAL_RNG)
+end
+
 
 ### generation
 
@@ -437,7 +476,7 @@ Base.getindex(a::UnsafeView, i::Int) = unsafe_load(a.ptr, i)
 Base.setindex!(a::UnsafeView, x, i::Int) = unsafe_store!(a.ptr, x, i)
 Base.pointer(a::UnsafeView) = a.ptr
 Base.size(a::UnsafeView) = (a.len,)
-Base.elsize(::UnsafeView{T}) where {T} = sizeof(T)
+Base.elsize(::Type{UnsafeView{T}}) where {T} = sizeof(T)
 
 # this is essentially equivalent to rand!(r, ::AbstractArray{Float64}, I) above, but due to
 # optimizations which can't be done currently when working with pointers, we have to re-order
@@ -465,6 +504,10 @@ function _rand_max383!(r::MersenneTwister, A::UnsafeView{Float64}, I::FloatInter
     A
 end
 
+function fill_array!(rng::MersenneTwister, A::Ptr{Float64}, n::Int, I)
+    rng.adv += n
+    fill_array!(rng.state, A, n, I)
+end
 
 fill_array!(s::DSFMT_state, A::Ptr{Float64}, n::Int, ::CloseOpen01_64) =
     dsfmt_fill_array_close_open!(s, A, n)
@@ -489,10 +532,10 @@ function rand!(r::MersenneTwister, A::UnsafeView{Float64},
     align = Csize_t(pA) % 16
     if align > 0
         pA2 = pA + 16 - align
-        fill_array!(r.state, pA2, n2, I[]) # generate the data in-place, but shifted
+        fill_array!(r, pA2, n2, I[]) # generate the data in-place, but shifted
         unsafe_copyto!(pA, pA2, n2) # move the data to the beginning of the array
     else
-        fill_array!(r.state, pA, n2, I[])
+        fill_array!(r, pA, n2, I[])
     end
     for i=n2+1:n
         A[i] = rand(r, I[])
@@ -654,5 +697,110 @@ end
 
 # Old randjump methods are deprecated, the scalar version is in the Future module.
 
-_randjump(r::MersenneTwister, jumppoly::DSFMT.GF2X) =
-    fillcache_zeros!(MersenneTwister(copy(r.seed), DSFMT.dsfmt_jump(r.state, jumppoly)))
+function _randjump(r::MersenneTwister, jumppoly::DSFMT.GF2X)
+    adv = r.adv
+    adv_jump = r.adv_jump
+    s = MersenneTwister(copy(r.seed), DSFMT.dsfmt_jump(r.state, jumppoly))
+    reset_caches!(s)
+    s.adv = adv
+    s.adv_jump = adv_jump
+    s
+end
+
+# NON-PUBLIC
+function jump(r::MersenneTwister, steps::Integer)
+    iseven(steps) || throw(DomainError(steps, "steps must be even"))
+    # steps >= 0 checked in calc_jump (`steps >> 1 < 0` if `steps < 0`)
+    j = _randjump(r, Random.DSFMT.calc_jump(steps >> 1))
+    j.adv_jump += steps
+    j
+end
+
+# NON-PUBLIC
+jump!(r::MersenneTwister, steps::Integer) = copy!(r, jump(r, steps))
+
+
+### constructors matching show (EXPERIMENTAL)
+
+# parameters in the tuples are:
+# 1: .adv_jump (jump steps)
+# 2: .adv (number of generated floats at the DSFMT_state level since seeding, besides jumps)
+# 3, 4: .adv_vals, .idxF (counters to reconstruct the float chache, optional if 5-6 not shown))
+# 5, 6: .adv_ints, .idxI (counters to reconstruct the integer chache, optional)
+
+Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{6,Integer}) =
+    advance!(MersenneTwister(seed), advance...)
+
+Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{4,Integer}) =
+    MersenneTwister(seed, (advance..., 0, 0))
+
+Random.MersenneTwister(seed::Union{Integer,Vector{UInt32}}, advance::NTuple{2,Integer}) =
+    MersenneTwister(seed, (advance..., 0, 0, 0, 0))
+
+# advances raw state (per fill_array!) of r by n steps (Float64 values)
+function _advance_n!(r::MersenneTwister, n::Int64, work::Vector{Float64})
+    n == 0 && return
+    n < 0 && throw(DomainError(n, "can't advance $r to the specified state"))
+    ms = dsfmt_get_min_array_size() % Int64
+    @assert n >= ms
+    lw = ms + n % ms
+    resize!(work, lw)
+    GC.@preserve work fill_array!(r, pointer(work), lw, CloseOpen12())
+    c::Int64 = lw
+    GC.@preserve work while n > c
+        fill_array!(r, pointer(work), ms, CloseOpen12())
+        c += ms
+    end
+    @assert n == c
+end
+
+function _advance_to!(r::MersenneTwister, adv::Int64, work)
+    _advance_n!(r, adv - r.adv, work)
+    @assert r.adv == adv
+end
+
+function _advance_F!(r::MersenneTwister, adv_vals, idxF, work)
+    _advance_to!(r, adv_vals, work)
+    gen_rand(r)
+    @assert r.adv_vals == adv_vals
+    r.idxF = idxF
+end
+
+function _advance_I!(r::MersenneTwister, adv_ints, idxI, work)
+    _advance_to!(r, adv_ints, work)
+    mt_setfull!(r, Int) # sets r.adv_ints
+    @assert r.adv_ints == adv_ints
+    r.idxI = 16*length(r.ints) - 8*idxI
+end
+
+function advance!(r::MersenneTwister, adv_jump, adv, adv_vals, idxF, adv_ints, idxI)
+    adv_jump = BigInt(adv_jump)
+    adv, adv_vals, adv_ints = Int64.((adv, adv_vals, adv_ints))
+    idxF, idxI = Int.((idxF, idxI))
+
+    ms = dsfmt_get_min_array_size() % Int
+    work = sizehint!(Vector{Float64}(), 2ms)
+
+    adv_jump != 0 && jump!(r, adv_jump)
+    advF = (adv_vals, idxF) != (0, 0)
+    advI = (adv_ints, idxI) != (0, 0)
+
+    if advI && advF
+        @assert adv_vals != adv_ints
+        if adv_vals < adv_ints
+            _advance_F!(r, adv_vals, idxF, work)
+            _advance_I!(r, adv_ints, idxI, work)
+        else
+            _advance_I!(r, adv_ints, idxI, work)
+            _advance_F!(r, adv_vals, idxF, work)
+        end
+    elseif advF
+        _advance_F!(r, adv_vals, idxF, work)
+    elseif advI
+        _advance_I!(r, adv_ints, idxI, work)
+    else
+        @assert adv == 0
+    end
+    _advance_to!(r, adv, work)
+    r
+end
diff --git a/stdlib/Random/src/Random.jl b/stdlib/Random/src/Random.jl
index 5197ac1c34e7b7..4eb7a418734c9f 100644
--- a/stdlib/Random/src/Random.jl
+++ b/stdlib/Random/src/Random.jl
@@ -13,11 +13,12 @@ include("DSFMT.jl")
 using .DSFMT
 using Base.GMP.MPZ
 using Base.GMP: Limb
+import SHA
 
 using Base: BitInteger, BitInteger_types, BitUnsigned, require_one_based_indexing
 
 import Base: copymutable, copy, copy!, ==, hash, convert,
-             rand, randn
+             rand, randn, show
 
 export rand!, randn!,
        randexp, randexp!,
@@ -27,7 +28,7 @@ export rand!, randn!,
        shuffle, shuffle!,
        randperm, randperm!,
        randcycle, randcycle!,
-       AbstractRNG, MersenneTwister, RandomDevice
+       AbstractRNG, MersenneTwister, RandomDevice, TaskLocalRNG, Xoshiro
 
 ## general definitions
 
@@ -142,8 +143,10 @@ Sampler(rng::AbstractRNG, ::Type{X}, r::Repetition=Val(Inf)) where {X} =
 
 typeof_rng(rng::AbstractRNG) = typeof(rng)
 
-Sampler(::Type{<:AbstractRNG}, sp::Sampler, ::Repetition) =
-    throw(ArgumentError("Sampler for this object is not defined"))
+# this method is necessary to prevent rand(rng::AbstractRNG, X) from
+# recursively constructing nested Sampler types.
+Sampler(T::Type{<:AbstractRNG}, sp::Sampler, r::Repetition) =
+    throw(MethodError(Sampler, (T, sp, r)))
 
 # default shortcut for the general case
 Sampler(::Type{RNG}, X) where {RNG<:AbstractRNG} = Sampler(RNG, X, Val(Inf))
@@ -291,16 +294,22 @@ rand(                ::Type{X}, dims::Dims) where {X} = rand(default_rng(), X, d
 rand(r::AbstractRNG, ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(r, X, Dims((d, dims...)))
 rand(                ::Type{X}, d::Integer, dims::Integer...) where {X} = rand(X, Dims((d, dims...)))
 
+# SamplerUnion(X, Y, ...}) == Union{SamplerType{X}, SamplerType{Y}, ...}
+SamplerUnion(U...) = Union{Any[SamplerType{T} for T in U]...}
+const SamplerBoolBitInteger = SamplerUnion(Bool, BitInteger_types...)
 
+
+include("Xoshiro.jl")
 include("RNGs.jl")
 include("generation.jl")
 include("normal.jl")
 include("misc.jl")
+include("XoshiroSimd.jl")
 
 ## rand & rand! & seed! docstrings
 
 """
-    rand([rng=GLOBAL_RNG], [S], [dims...])
+    rand([rng=default_rng()], [S], [dims...])
 
 Pick a random element or array of random elements from the set of values specified by `S`;
 `S` can be
@@ -345,14 +354,14 @@ julia> rand(Float64, (2, 3))
     The complexity of `rand(rng, s::Union{AbstractDict,AbstractSet})`
     is linear in the length of `s`, unless an optimized method with
     constant complexity is available, which is the case for `Dict`,
-    `Set` and `BitSet`. For more than a few calls, use `rand(rng,
+    `Set` and dense `BitSet`s. For more than a few calls, use `rand(rng,
     collect(s))` instead, or either `rand(rng, Dict(s))` or `rand(rng,
     Set(s))` as appropriate.
 """
 rand
 
 """
-    rand!([rng=GLOBAL_RNG], A, [S=eltype(A)])
+    rand!([rng=default_rng()], A, [S=eltype(A)])
 
 Populate the array `A` with random values. If `S` is specified
 (`S` can be a type or a collection, cf. [`rand`](@ref) for details),
@@ -376,8 +385,8 @@ julia> rand!(rng, zeros(5))
 rand!
 
 """
-    seed!([rng=GLOBAL_RNG], seed) -> rng
-    seed!([rng=GLOBAL_RNG]) -> rng
+    seed!([rng=default_rng()], seed) -> rng
+    seed!([rng=default_rng()]) -> rng
 
 Reseed the random number generator: `rng` will give a reproducible
 sequence of numbers if and only if a `seed` is provided. Some RNGs
@@ -386,7 +395,7 @@ After the call to `seed!`, `rng` is equivalent to a newly created
 object initialized with the same seed.
 
 If `rng` is not specified, it defaults to seeding the state of the
-shared thread-local generator.
+shared task-local generator.
 
 # Examples
 ```julia-repl
diff --git a/stdlib/Random/src/Xoshiro.jl b/stdlib/Random/src/Xoshiro.jl
new file mode 100644
index 00000000000000..5b8aa4644d1402
--- /dev/null
+++ b/stdlib/Random/src/Xoshiro.jl
@@ -0,0 +1,211 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## Xoshiro RNG
+# Lots of implementation is shared with TaskLocalRNG
+
+"""
+    Xoshiro(seed)
+    Xoshiro()
+
+Xoshiro256++ is a fast pseudorandom number generator described by David Blackman and
+Sebastiano Vigna in "Scrambled Linear Pseudorandom Number Generators",
+ACM Trans. Math. Softw., 2021. Reference implementation is available
+at http://prng.di.unimi.it
+
+Apart from the high speed, Xoshiro has a small memory footprint, making it suitable for
+applications where many different random states need to be held for long time.
+
+Julia's Xoshiro implementation has a bulk-generation mode; this seeds new virtual PRNGs
+from the parent, and uses SIMD to generate in parallel (i.e. the bulk stream consists of
+multiple interleaved xoshiro instances).
+The virtual PRNGs are discarded once the bulk request has been serviced (and should cause
+no heap allocations).
+
+# Examples
+```jldoctest
+julia> using Random
+
+julia> rng = Xoshiro(1234);
+
+julia> x1 = rand(rng, 2)
+2-element Vector{Float64}:
+ 0.32597672886359486
+ 0.5490511363155669
+
+julia> rng = Xoshiro(1234);
+
+julia> x2 = rand(rng, 2)
+2-element Vector{Float64}:
+ 0.32597672886359486
+ 0.5490511363155669
+
+julia> x1 == x2
+true
+```
+"""
+mutable struct Xoshiro <: AbstractRNG
+    s0::UInt64
+    s1::UInt64
+    s2::UInt64
+    s3::UInt64
+
+    Xoshiro(s0::Integer, s1::Integer, s2::Integer, s3::Integer) = new(s0, s1, s2, s3)
+    Xoshiro(seed=nothing) = seed!(new(), seed)
+end
+
+function setstate!(x::Xoshiro, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+    x.s0 = s0
+    x.s1 = s1
+    x.s2 = s2
+    x.s3 = s3
+    x
+end
+
+copy(rng::Xoshiro) = Xoshiro(rng.s0, rng.s1, rng.s2, rng.s3)
+
+function copy!(dst::Xoshiro, src::Xoshiro)
+    dst.s0, dst.s1, dst.s2, dst.s3 = src.s0, src.s1, src.s2, src.s3
+    dst
+end
+
+function ==(a::Xoshiro, b::Xoshiro)
+    a.s0 == b.s0 && a.s1 == b.s1 && a.s2 == b.s2 && a.s3 == b.s3
+end
+
+rng_native_52(::Xoshiro) = UInt64
+
+@inline function rand(rng::Xoshiro, ::SamplerType{UInt64})
+    s0, s1, s2, s3 = rng.s0, rng.s1, rng.s2, rng.s3
+    tmp = s0 + s3
+    res = ((tmp << 23) | (tmp >> 41)) + s0
+    t = s1 << 17
+    s2 = xor(s2, s0)
+    s3 = xor(s3, s1)
+    s1 = xor(s1, s2)
+    s0 = xor(s0, s3)
+    s2 = xor(s2, t)
+    s3 = s3 << 45 | s3 >> 19
+    rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
+    res
+end
+
+
+## Task local RNG
+
+"""
+    TaskLocalRNG
+
+The `TaskLocalRNG` has state that is local to its task, not its thread.
+It is seeded upon task creation, from the state of its parent task.
+Therefore, task creation is an event that changes the parent's RNG state.
+
+As an upside, the `TaskLocalRNG` is pretty fast, and permits reproducible
+multithreaded simulations (barring race conditions), independent of scheduler
+decisions. As long as the number of threads is not used to make decisions on
+task creation, simulation results are also independent of the number of available
+threads / CPUs. The random stream should not depend on hardware specifics, up to
+endianness and possibly word size.
+
+Using or seeding the RNG of any other task than the one returned by `current_task()`
+is undefined behavior: it will work most of the time, and may sometimes fail silently.
+"""
+struct TaskLocalRNG <: AbstractRNG end
+TaskLocalRNG(::Nothing) = TaskLocalRNG()
+rng_native_52(::TaskLocalRNG) = UInt64
+
+function setstate!(x::TaskLocalRNG, s0::UInt64, s1::UInt64, s2::UInt64, s3::UInt64)
+    t = current_task()
+    t.rngState0 = s0
+    t.rngState1 = s1
+    t.rngState2 = s2
+    t.rngState3 = s3
+    x
+end
+
+@inline function rand(::TaskLocalRNG, ::SamplerType{UInt64})
+    task = current_task()
+    s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+    tmp = s0 + s3
+    res = ((tmp << 23) | (tmp >> 41)) + s0
+    t = s1 << 17
+    s2 = xor(s2, s0)
+    s3 = xor(s3, s1)
+    s1 = xor(s1, s2)
+    s0 = xor(s0, s3)
+    s2 = xor(s2, t)
+    s3 = s3 << 45 | s3 >> 19
+    task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    res
+end
+
+# Shared implementation between Xoshiro and TaskLocalRNG -- seeding
+
+function seed!(rng::Union{TaskLocalRNG,Xoshiro})
+    # as we get good randomness from RandomDevice, we can skip hashing
+    rd = RandomDevice()
+    setstate!(rng, rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64), rand(rd, UInt64))
+end
+
+function seed!(rng::Union{TaskLocalRNG,Xoshiro}, seed::Union{Vector{UInt32}, Vector{UInt64}})
+    c = SHA.SHA2_256_CTX()
+    SHA.update!(c, reinterpret(UInt8, seed))
+    s0, s1, s2, s3 = reinterpret(UInt64, SHA.digest!(c))
+    setstate!(rng, s0, s1, s2, s3)
+end
+
+seed!(rng::Union{TaskLocalRNG, Xoshiro}, seed::Integer) = seed!(rng, make_seed(seed))
+
+
+@inline function rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{UInt128})
+    first = rand(rng, UInt64)
+    second = rand(rng,UInt64)
+    second + UInt128(first)<<64
+end
+
+@inline rand(rng::Union{TaskLocalRNG, Xoshiro}, ::SamplerType{Int128}) = rand(rng, UInt128) % Int128
+
+@inline function rand(rng::Union{TaskLocalRNG, Xoshiro},
+                      T::SamplerUnion(Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64))
+    S = T[]
+    # use upper bits
+    (rand(rng, UInt64) >>> (64 - 8*sizeof(S))) % S
+end
+
+function copy(rng::TaskLocalRNG)
+    t = current_task()
+    Xoshiro(t.rngState0, t.rngState1, t.rngState2, t.rngState3)
+end
+
+function copy!(dst::TaskLocalRNG, src::Xoshiro)
+    t = current_task()
+    t.rngState0, t.rngState1, t.rngState2, t.rngState3 = src.s0, src.s1, src.s2, src.s3
+    dst
+end
+
+function copy!(dst::Xoshiro, src::TaskLocalRNG)
+    t = current_task()
+    dst.s0, dst.s1, dst.s2, dst.s3 = t.rngState0, t.rngState1, t.rngState2, t.rngState3
+    dst
+end
+
+function ==(a::Xoshiro, b::TaskLocalRNG)
+    t = current_task()
+    a.s0 == t.rngState0 && a.s1 == t.rngState1 && a.s2 == t.rngState2 && a.s3 == t.rngState3
+end
+
+==(a::TaskLocalRNG, b::Xoshiro) = b == a
+
+# for partial words, use upper bits from Xoshiro
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52Raw{UInt64}}) = rand(r, UInt64) >>> 12
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt52{UInt64}})    = rand(r, UInt64) >>> 12
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{UInt104{UInt128}})  = rand(r, UInt104Raw())
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float16}}) =
+    Float16(Float32(rand(r, UInt16) >>> 5) * Float32(0x1.0p-11))
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01{Float32}}) =
+    Float32(rand(r, UInt32) >>> 8) * Float32(0x1.0p-24)
+
+rand(r::Union{TaskLocalRNG, Xoshiro}, ::SamplerTrivial{CloseOpen01_64}) =
+    Float64(rand(r, UInt64) >>> 11) * 0x1.0p-53
diff --git a/stdlib/Random/src/XoshiroSimd.jl b/stdlib/Random/src/XoshiroSimd.jl
new file mode 100644
index 00000000000000..9fb03f95726889
--- /dev/null
+++ b/stdlib/Random/src/XoshiroSimd.jl
@@ -0,0 +1,308 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module XoshiroSimd
+# Getting the xoroshiro RNG to reliably vectorize is somewhat of a hassle without Simd.jl.
+import ..Random: TaskLocalRNG, rand, rand!, Xoshiro, CloseOpen01, UnsafeView,
+                 SamplerType, SamplerTrivial
+using Base: BitInteger_types
+using Core.Intrinsics: llvmcall
+
+# Vector-width. Influences random stream.
+xoshiroWidth() = Val(8)
+# Simd threshold. Influences random stream.
+simdThreshold(::Type{T}) where T = 64
+simdThreshold(::Type{Bool}) = 640
+
+@inline _rotl45(x::UInt64) = (x<<45)|(x>>19)
+@inline _shl17(x::UInt64) = x<<17
+@inline _rotl23(x::UInt64) = (x<<23)|(x>>41)
+@inline _plus(x::UInt64,y::UInt64) = x+y
+@inline _xor(x::UInt64,y::UInt64) = xor(x,y)
+@inline _and(x::UInt64, y::UInt64) = x & y
+@inline _or(x::UInt64, y::UInt64) = x | y
+@inline _lshr(x, y::Int32) = _lshr(x, y % Int64)
+@inline _lshr(x::UInt64, y::Int64) = llvmcall("""
+    %res = lshr i64 %0, %1
+    ret i64 %res
+    """,
+    UInt64,
+    Tuple{UInt64, Int64},
+    x, y)
+
+@inline _bits2float(x::UInt64, ::Type{Float64}) = reinterpret(UInt64, Float64(x >>> 11) * 0x1.0p-53)
+@inline function _bits2float(x::UInt64, ::Type{Float32})
+    #=
+    # this implementation uses more high bits, but is harder to vectorize
+    x = x >>> 16  # discard low 16 bits
+    u = Float32(x >>> 24) * Float32(0x1.0p-24)
+    l = Float32(x & 0x00ffffff) * Float32(0x1.0p-24)
+    =#
+    ui = (x>>>32) % UInt32
+    li = x % UInt32
+    u = Float32(ui >>> 8) * Float32(0x1.0p-24)
+    l = Float32(li >>> 8) * Float32(0x1.0p-24)
+    (UInt64(reinterpret(UInt32, u)) << 32) | UInt64(reinterpret(UInt32, l))
+end
+
+# required operations. These could be written more concisely with `ntuple`, but the compiler
+# sometimes refuses to properly vectorize.
+for N in [4,8,16]
+    let code, s, fshl = "llvm.fshl.v$(N)i64",
+        VT = :(NTuple{$N, VecElement{UInt64}})
+
+        s = ntuple(_->VecElement(UInt64(45)), N)
+        @eval @inline _rotl45(x::$VT) = ccall($fshl, llvmcall, $VT, ($VT, $VT, $VT), x, x, $s)
+
+        s = ntuple(_->VecElement(UInt64(23)), N)
+        @eval @inline _rotl23(x::$VT) = ccall($fshl, llvmcall, $VT, ($VT, $VT, $VT), x, x, $s)
+
+        code = """
+        %lshiftOp = shufflevector <1 x i64> <i64 17>, <1 x i64> undef, <$N x i32> zeroinitializer
+        %res = shl <$N x i64> %0, %lshiftOp
+        ret <$N x i64> %res
+        """
+        @eval @inline _shl17(x::$VT) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %res = add <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _plus(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %res = xor <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _xor(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %res = and <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _and(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %res = or <$N x i64> %1, %0
+        ret <$N x i64> %res
+        """
+        @eval @inline _or(x::$VT, y::$VT) = llvmcall($code, $VT, Tuple{$VT, $VT}, x, y)
+
+        code = """
+        %tmp = insertelement <1 x i64> undef, i64 %1, i32 0
+        %shift = shufflevector <1 x i64> %tmp, <1 x i64> %tmp, <$N x i32> zeroinitializer
+        %res = lshr <$N x i64> %0, %shift
+        ret <$N x i64> %res
+        """
+        @eval @inline _lshr(x::$VT, y::Int64) = llvmcall($code, $VT, Tuple{$VT, Int64}, x, y)
+
+        code = """
+        %shiftamt = shufflevector <1 x i64> <i64 11>, <1 x i64> undef, <$N x i32> zeroinitializer
+        %sh = lshr <$N x i64> %0, %shiftamt
+        %f = uitofp <$N x i64> %sh to <$N x double>
+        %scale = shufflevector <1 x double> <double 0x3ca0000000000000>, <1 x double> undef, <$N x i32> zeroinitializer
+        %m = fmul <$N x double> %f, %scale
+        %i = bitcast <$N x double> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float64}) = llvmcall($code, $VT, Tuple{$VT}, x)
+
+        code = """
+        %as32 = bitcast <$N x i64> %0 to <$(2N) x i32>
+        %shiftamt = shufflevector <1 x i32> <i32 8>, <1 x i32> undef, <$(2N) x i32> zeroinitializer
+        %sh = lshr <$(2N) x i32> %as32, %shiftamt
+        %f = uitofp <$(2N) x i32> %sh to <$(2N) x float>
+        %scale = shufflevector <1 x float> <float 0x3e70000000000000>, <1 x float> undef, <$(2N) x i32> zeroinitializer
+        %m = fmul <$(2N) x float> %f, %scale
+        %i = bitcast <$(2N) x float> %m to <$N x i64>
+        ret <$N x i64> %i
+        """
+        @eval @inline _bits2float(x::$VT, ::Type{Float32}) = llvmcall($code, $VT, Tuple{$VT}, x)
+    end
+end
+
+
+function forkRand(rng::Union{TaskLocalRNG, Xoshiro}, ::Val{N}) where N
+    # constants have nothing up their sleeve. For more discussion, cf rng_split in task.c
+    # 0x02011ce34bce797f == hash(UInt(1))|0x01
+    # 0x5a94851fb48a6e05 == hash(UInt(2))|0x01
+    # 0x3688cf5d48899fa7 == hash(UInt(3))|0x01
+    # 0x867b4bb4c42e5661 == hash(UInt(4))|0x01
+    s0 = ntuple(i->VecElement(0x02011ce34bce797f * rand(rng, UInt64)), Val(N))
+    s1 = ntuple(i->VecElement(0x5a94851fb48a6e05 * rand(rng, UInt64)), Val(N))
+    s2 = ntuple(i->VecElement(0x3688cf5d48899fa7 * rand(rng, UInt64)), Val(N))
+    s3 = ntuple(i->VecElement(0x867b4bb4c42e5661 * rand(rng, UInt64)), Val(N))
+    (s0, s1, s2, s3)
+end
+
+_id(x, T) = x
+
+@inline function xoshiro_bulk(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, T::Union{Type{UInt8}, Type{Bool}, Type{Float32}, Type{Float64}}, ::Val{N}, f::F = _id) where {N, F}
+    if len >= simdThreshold(T)
+        written = xoshiro_bulk_simd(rng, dst, len, T, Val(N), f)
+        len -= written
+        dst += written
+    end
+    if len != 0
+        xoshiro_bulk_nosimd(rng, dst, len, T, f)
+    end
+    nothing
+end
+
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, f::F) where {T, F}
+    if rng isa TaskLocalRNG
+        task = current_task()
+        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+    else
+        (; s0, s1, s2, s3) = rng::Xoshiro
+    end
+
+    i = 0
+    while i+8 <= len
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
+        unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), f(res, T))
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        i += 8
+    end
+    if i < len
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        ref = Ref(f(res, T))
+        # TODO: This may make the random-stream dependent on system endianness
+        ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
+    end
+    if rng isa TaskLocalRNG
+        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    else
+       rng.s0, rng.s1, rng.s2, rng.s3 =  s0, s1, s2, s3
+    end
+    nothing
+end
+
+@noinline function xoshiro_bulk_nosimd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, f)
+    if rng isa TaskLocalRNG
+        task = current_task()
+        s0, s1, s2, s3 = task.rngState0, task.rngState1, task.rngState2, task.rngState3
+    else
+        (; s0, s1, s2, s3) = rng::Xoshiro
+    end
+
+    i = 0
+    while i+8 <= len
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
+        shift = 0
+        while i+8 <= len && shift < 8
+            resLoc = _and(_lshr(res, shift), 0x0101010101010101)
+            unsafe_store!(reinterpret(Ptr{UInt64}, dst + i), resLoc)
+            i += 8
+            shift += 1
+        end
+
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+    end
+    if i < len
+        # we may overgenerate some bytes here, if len mod 64 <= 56 and len mod 8 != 0
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
+        resLoc = _and(res, 0x0101010101010101)
+        ref = Ref(resLoc)
+        ccall(:memcpy, Ptr{Cvoid}, (Ptr{UInt8}, Ptr{UInt64}, Csize_t), dst+i, ref, len-i)
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+    end
+    if rng isa TaskLocalRNG
+        task.rngState0, task.rngState1, task.rngState2, task.rngState3 = s0, s1, s2, s3
+    else
+        rng.s0, rng.s1, rng.s2, rng.s3 = s0, s1, s2, s3
+    end
+    nothing
+end
+
+
+@noinline function xoshiro_bulk_simd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{T}, ::Val{N}, f::F) where {T,N,F}
+    s0, s1, s2, s3 = forkRand(rng, Val(N))
+
+    i = 0
+    while i + 8*N <= len
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        unsafe_store!(reinterpret(Ptr{NTuple{N,VecElement{UInt64}}}, dst + i), f(res, T))
+        i += 8*N
+    end
+    return i
+end
+
+@noinline function xoshiro_bulk_simd(rng::Union{TaskLocalRNG, Xoshiro}, dst::Ptr{UInt8}, len::Int, ::Type{Bool}, ::Val{N}, f) where {N}
+    s0, s1, s2, s3 = forkRand(rng, Val(N))
+    msk = ntuple(i->VecElement(0x0101010101010101), Val(N))
+    i = 0
+    while i + 64*N <= len
+        res = _plus(_rotl23(_plus(s0,s3)),s0)
+        t = _shl17(s1)
+        s2 = _xor(s2, s0)
+        s3 = _xor(s3, s1)
+        s1 = _xor(s1, s2)
+        s0 = _xor(s0, s3)
+        s2 = _xor(s2, t)
+        s3 = _rotl45(s3)
+        for k=0:7
+            tmp = _lshr(res, k)
+            toWrite = _and(tmp, msk)
+            unsafe_store!(reinterpret(Ptr{NTuple{N,VecElement{UInt64}}}, dst + i + k*N*8), toWrite)
+        end
+        i += 64*N
+    end
+    return i
+end
+
+
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float32}, ::SamplerTrivial{CloseOpen01{Float32}})
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*4, Float32, xoshiroWidth(), _bits2float)
+    dst
+end
+
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Float64}, ::SamplerTrivial{CloseOpen01{Float64}})
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*8, Float64, xoshiroWidth(), _bits2float)
+    dst
+end
+
+for T in BitInteger_types
+    @eval function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Union{Array{$T}, UnsafeView{$T}}, ::SamplerType{$T})
+        GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst)*sizeof($T), UInt8, xoshiroWidth())
+        dst
+    end
+end
+
+function rand!(rng::Union{TaskLocalRNG, Xoshiro}, dst::Array{Bool}, ::SamplerType{Bool})
+    GC.@preserve dst xoshiro_bulk(rng, convert(Ptr{UInt8}, pointer(dst)), length(dst), Bool, xoshiroWidth())
+    dst
+end
+
+end # module
diff --git a/stdlib/Random/src/generation.jl b/stdlib/Random/src/generation.jl
index 48eed781ce0e9d..ddbf6dce98bec3 100644
--- a/stdlib/Random/src/generation.jl
+++ b/stdlib/Random/src/generation.jl
@@ -56,8 +56,8 @@ end
 Sampler(::Type{<:AbstractRNG}, I::FloatInterval{BigFloat}, ::Repetition) =
     SamplerBigFloat{typeof(I)}(precision(BigFloat))
 
-function _rand(rng::AbstractRNG, sp::SamplerBigFloat)
-    z = BigFloat()
+function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat)
+    precision(z) == sp.prec || throw(ArgumentError("incompatible BigFloat precision"))
     limbs = sp.limbs
     rand!(rng, limbs)
     @inbounds begin
@@ -67,17 +67,17 @@ function _rand(rng::AbstractRNG, sp::SamplerBigFloat)
     end
     z.sign = 1
     GC.@preserve limbs unsafe_copyto!(z.d, pointer(limbs), sp.nlimbs)
-    (z, randbool)
+    randbool
 end
 
-function _rand(rng::AbstractRNG, sp::SamplerBigFloat, ::CloseOpen12{BigFloat})
-    z = _rand(rng, sp)[1]
+function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat, ::CloseOpen12{BigFloat})
+    _rand!(rng, z, sp)
     z.exp = 1
     z
 end
 
-function _rand(rng::AbstractRNG, sp::SamplerBigFloat, ::CloseOpen01{BigFloat})
-    z, randbool = _rand(rng, sp)
+function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat, ::CloseOpen01{BigFloat})
+    randbool = _rand!(rng, z, sp)
     z.exp = 0
     randbool &&
         ccall((:mpfr_sub_d, :libmpfr), Int32,
@@ -88,15 +88,21 @@ end
 
 # alternative, with 1 bit less of precision
 # TODO: make an API for requesting full or not-full precision
-function _rand(rng::AbstractRNG, sp::SamplerBigFloat, ::CloseOpen01{BigFloat}, ::Nothing)
-    z = _rand(rng, sp, CloseOpen12(BigFloat))
+function _rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat, ::CloseOpen01{BigFloat},
+                ::Nothing)
+    _rand!(rng, z, sp, CloseOpen12(BigFloat))
     ccall((:mpfr_sub_ui, :libmpfr), Int32, (Ref{BigFloat}, Ref{BigFloat}, Culong, Base.MPFR.MPFRRoundingMode),
           z, z, 1, Base.MPFR.ROUNDING_MODE[])
     z
 end
 
+rand!(rng::AbstractRNG, z::BigFloat, sp::SamplerBigFloat{T}
+      ) where {T<:FloatInterval{BigFloat}} =
+          _rand!(rng, z, sp, T())
+
 rand(rng::AbstractRNG, sp::SamplerBigFloat{T}) where {T<:FloatInterval{BigFloat}} =
-    _rand(rng, sp, T())
+    rand!(rng, BigFloat(; precision=sp.prec), sp)
+
 
 ### random integers
 
@@ -166,10 +172,10 @@ end
 
 ### BitInteger
 
-# there are three implemented samplers for unit ranges, the two first of which
-# assume that Float64 (i.e. 52 random bits) is the native type for the RNG:
-# 1) "Fast" (SamplerRangeFast), which is most efficient when the underlying RNG produces
-#    rand(Float64) "fast enough".
+# there are three implemented samplers for unit ranges, the second one
+# assumes that Float64 (i.e. 52 random bits) is the native type for the RNG:
+# 1) "Fast" (SamplerRangeFast), which is most efficient when the range length is close
+#    (or equal) to a power of 2 from below.
 #    The tradeoff is faster creation of the sampler, but more consumption of entropy bits.
 # 2) "Slow" (SamplerRangeInt) which tries to use as few entropy bits as possible, at the
 #    cost of a bigger upfront price associated with the creation of the sampler.
@@ -218,20 +224,32 @@ function rand(rng::AbstractRNG, sp::SamplerRangeFast{UInt32,T}) where T
     (x + a % UInt32) % T
 end
 
+has_fast_64(rng::AbstractRNG) = rng_native_52(rng) != Float64
+# for MersenneTwister, both options have very similar performance
+
 function rand(rng::AbstractRNG, sp::SamplerRangeFast{UInt64,T}) where T
     a, bw, m, mask = sp.a, sp.bw, sp.m, sp.mask
-    x = bw <= 52 ? rand(rng, LessThan(m, Masked(mask, UInt52Raw()))) :
-                   rand(rng, LessThan(m, Masked(mask, uniform(UInt64))))
+    if !has_fast_64(rng) && bw <= 52
+        x = rand(rng, LessThan(m, Masked(mask, UInt52Raw())))
+    else
+        x = rand(rng, LessThan(m, Masked(mask, uniform(UInt64))))
+    end
     (x + a % UInt64) % T
 end
 
 function rand(rng::AbstractRNG, sp::SamplerRangeFast{UInt128,T}) where T
     a, bw, m, mask = sp.a, sp.bw, sp.m, sp.mask
-    x = bw <= 52  ?
-        rand(rng, LessThan(m % UInt64, Masked(mask % UInt64, UInt52Raw()))) % UInt128 :
-    bw <= 104 ?
-        rand(rng, LessThan(m, Masked(mask, UInt104Raw()))) :
-        rand(rng, LessThan(m, Masked(mask, uniform(UInt128))))
+    if has_fast_64(rng)
+        x = bw <= 64 ?
+            rand(rng, LessThan(m % UInt64, Masked(mask % UInt64, uniform(UInt64)))) % UInt128 :
+            rand(rng, LessThan(m, Masked(mask, uniform(UInt128))))
+    else
+        x = bw <= 52  ?
+            rand(rng, LessThan(m % UInt64, Masked(mask % UInt64, UInt52Raw()))) % UInt128 :
+        bw <= 104 ?
+            rand(rng, LessThan(m, Masked(mask, UInt104Raw()))) :
+            rand(rng, LessThan(m, Masked(mask, uniform(UInt128))))
+    end
     x % T + a
 end
 
@@ -340,40 +358,56 @@ end
 
 ### BigInt
 
-struct SamplerBigInt <: Sampler{BigInt}
+struct SamplerBigInt{SP<:Sampler{Limb}} <: Sampler{BigInt}
     a::BigInt         # first
     m::BigInt         # range length - 1
     nlimbs::Int       # number of limbs in generated BigInt's (z ∈ [0, m])
     nlimbsmax::Int    # max number of limbs for z+a
-    mask::Limb        # applied to the highest limb
+    highsp::SP        # sampler for the highest limb of z
 end
 
-function Sampler(::Type{<:AbstractRNG}, r::AbstractUnitRange{BigInt}, ::Repetition)
+function SamplerBigInt(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition=Val(Inf)
+                       ) where {RNG<:AbstractRNG}
     m = last(r) - first(r)
-    m < 0 && throw(ArgumentError("range must be non-empty"))
-    nd = ndigits(m, base=2)
-    nlimbs, highbits = divrem(nd, 8*sizeof(Limb))
-    highbits > 0 && (nlimbs += 1)
-    mask = highbits == 0 ? ~zero(Limb) : one(Limb)<<highbits - one(Limb)
+    m.size < 0 && throw(ArgumentError("range must be non-empty"))
+    nlimbs = Int(m.size)
+    hm = nlimbs == 0 ? Limb(0) : GC.@preserve m unsafe_load(m.d, nlimbs)
+    highsp = Sampler(RNG, Limb(0):hm, N)
     nlimbsmax = max(nlimbs, abs(last(r).size), abs(first(r).size))
-    return SamplerBigInt(first(r), m, nlimbs, nlimbsmax, mask)
+    return SamplerBigInt(first(r), m, nlimbs, nlimbsmax, highsp)
 end
 
-function rand(rng::AbstractRNG, sp::SamplerBigInt)
-    x = MPZ.realloc2(sp.nlimbsmax*8*sizeof(Limb))
+Sampler(::Type{RNG}, r::AbstractUnitRange{BigInt}, N::Repetition) where {RNG<:AbstractRNG} =
+    SamplerBigInt(RNG, r, N)
+
+rand(rng::AbstractRNG, sp::SamplerBigInt) =
+    rand!(rng, BigInt(nbits = sp.nlimbsmax*8*sizeof(Limb)), sp)
+
+function rand!(rng::AbstractRNG, x::BigInt, sp::SamplerBigInt)
+    nlimbs = sp.nlimbs
+    nlimbs == 0 && return MPZ.set!(x, sp.a)
+    MPZ.realloc2!(x, sp.nlimbsmax*8*sizeof(Limb))
+    @assert x.alloc >= nlimbs
+    # we randomize x ∈ [0, m] with rejection sampling:
+    # 1. the first nlimbs-1 limbs of x are uniformly randomized
+    # 2. the high limb hx of x is sampled from 0:hm where hm is the
+    #    high limb of m
+    # We repeat 1. and 2. until x <= m
+    hm = GC.@preserve sp unsafe_load(sp.m.d, nlimbs)
     GC.@preserve x begin
-        limbs = UnsafeView(x.d, sp.nlimbs)
+        limbs = UnsafeView(x.d, nlimbs-1)
         while true
             rand!(rng, limbs)
-            limbs[end] &= sp.mask
-            MPZ.mpn_cmp(x, sp.m, sp.nlimbs) <= 0 && break
+            hx = limbs[nlimbs] = rand(rng, sp.highsp)
+            hx < hm && break # avoid calling mpn_cmp most of the time
+            MPZ.mpn_cmp(x, sp.m, nlimbs) <= 0 && break
         end
         # adjust x.size (normally done by mpz_limbs_finish, in GMP version >= 6)
-        x.size = sp.nlimbs
-        while x.size > 0
-            limbs[x.size] != 0 && break
-            x.size -= 1
+        while nlimbs > 0
+            limbs[nlimbs] != 0 && break
+            nlimbs -= 1
         end
+        x.size = nlimbs
     end
     MPZ.add!(x, sp.a)
 end
diff --git a/stdlib/Random/src/misc.jl b/stdlib/Random/src/misc.jl
index 465ba27b393322..ab6c796e5f5399 100644
--- a/stdlib/Random/src/misc.jl
+++ b/stdlib/Random/src/misc.jl
@@ -11,7 +11,7 @@ function rand!(rng::AbstractRNG, B::BitArray, ::SamplerType{Bool})
 end
 
 """
-    bitrand([rng=GLOBAL_RNG], [dims...])
+    bitrand([rng=default_rng()], [dims...])
 
 Generate a `BitArray` of random boolean values.
 
@@ -22,14 +22,14 @@ julia> rng = MersenneTwister(1234);
 julia> bitrand(rng, 10)
 10-element BitVector:
  0
- 1
- 1
- 1
- 1
+ 0
+ 0
  0
  1
  0
  0
+ 0
+ 1
  1
 ```
 """
@@ -43,7 +43,7 @@ bitrand(dims::Integer...) = rand!(BitArray(undef, convert(Dims, dims)))
 ## randstring (often useful for temporary filenames/dirnames)
 
 """
-    randstring([rng=GLOBAL_RNG], [chars], [len=8])
+    randstring([rng=default_rng()], [chars], [len=8])
 
 Create a random string of length `len`, consisting of characters from
 `chars`, which defaults to the set of upper- and lower-case letters
@@ -53,13 +53,13 @@ number generator, see [Random Numbers](@ref).
 # Examples
 ```jldoctest
 julia> Random.seed!(3); randstring()
-"4zSHdXlw"
+"Lxz5hUwn"
 
 julia> randstring(MersenneTwister(3), 'a':'z', 6)
-"bzlhqn"
+"ocucay"
 
 julia> randstring("ACGT")
-"AGGACATT"
+"TGCTCCTC"
 ```
 
 !!! note
@@ -71,7 +71,12 @@ function randstring end
 
 let b = UInt8['0':'9';'A':'Z';'a':'z']
     global randstring
-    randstring(r::AbstractRNG, chars=b, n::Integer=8) = String(rand(r, chars, n))
+    function randstring(r::AbstractRNG, chars=b, n::Integer=8)
+        T = eltype(chars)
+        v = T === UInt8 ? Base.StringVector(n) : Vector{T}(undef, n)
+        rand!(r, v, chars)
+        return String(v)
+    end
     randstring(r::AbstractRNG, n::Integer) = randstring(r, b, n)
     randstring(chars=b, n::Integer=8) = randstring(default_rng(), chars, n)
     randstring(n::Integer) = randstring(default_rng(), b, n)
@@ -121,7 +126,7 @@ function randsubseq!(r::AbstractRNG, S::AbstractArray, A::AbstractArray, p::Real
 end
 
 """
-    randsubseq!([rng=GLOBAL_RNG,] S, A, p)
+    randsubseq!([rng=default_rng(),] S, A, p)
 
 Like [`randsubseq`](@ref), but the results are stored in `S`
 (which is resized as needed).
@@ -149,7 +154,7 @@ randsubseq(r::AbstractRNG, A::AbstractArray{T}, p::Real) where {T} =
     randsubseq!(r, T[], A, p)
 
 """
-    randsubseq([rng=GLOBAL_RNG,] A, p) -> Vector
+    randsubseq([rng=default_rng(),] A, p) -> Vector
 
 Return a vector consisting of a random subsequence of the given array `A`, where each
 element of `A` is included (in order) with independent probability `p`. (Complexity is
@@ -177,7 +182,7 @@ ltm52(n::Int, mask::Int=nextpow(2, n)-1) = LessThan(n-1, Masked(mask, UInt52Raw(
 ## shuffle & shuffle!
 
 """
-    shuffle!([rng=GLOBAL_RNG,] v::AbstractArray)
+    shuffle!([rng=default_rng(),] v::AbstractArray)
 
 In-place version of [`shuffle`](@ref): randomly permute `v` in-place,
 optionally supplying the random-number generator `rng`.
@@ -223,7 +228,7 @@ end
 shuffle!(a::AbstractArray) = shuffle!(default_rng(), a)
 
 """
-    shuffle([rng=GLOBAL_RNG,] v::AbstractArray)
+    shuffle([rng=default_rng(),] v::AbstractArray)
 
 Return a randomly permuted copy of `v`. The optional `rng` argument specifies a random
 number generator (see [Random Numbers](@ref)).
@@ -255,7 +260,7 @@ shuffle(a::AbstractArray) = shuffle(default_rng(), a)
 ## randperm & randperm!
 
 """
-    randperm([rng=GLOBAL_RNG,] n::Integer)
+    randperm([rng=default_rng(),] n::Integer)
 
 Construct a random permutation of length `n`. The optional `rng`
 argument specifies a random number generator (see [Random
@@ -283,7 +288,7 @@ randperm(r::AbstractRNG, n::T) where {T <: Integer} = randperm!(r, Vector{T}(und
 randperm(n::Integer) = randperm(default_rng(), n)
 
 """
-    randperm!([rng=GLOBAL_RNG,] A::Array{<:Integer})
+    randperm!([rng=default_rng(),] A::Array{<:Integer})
 
 Construct in `A` a random permutation of length `length(A)`. The
 optional `rng` argument specifies a random number generator (see
@@ -323,7 +328,7 @@ randperm!(a::Array{<:Integer}) = randperm!(default_rng(), a)
 ## randcycle & randcycle!
 
 """
-    randcycle([rng=GLOBAL_RNG,] n::Integer)
+    randcycle([rng=default_rng(),] n::Integer)
 
 Construct a random cyclic permutation of length `n`. The optional `rng`
 argument specifies a random number generator, see [Random Numbers](@ref).
@@ -349,7 +354,7 @@ randcycle(r::AbstractRNG, n::T) where {T <: Integer} = randcycle!(r, Vector{T}(u
 randcycle(n::Integer) = randcycle(default_rng(), n)
 
 """
-    randcycle!([rng=GLOBAL_RNG,] A::Array{<:Integer})
+    randcycle!([rng=default_rng(),] A::Array{<:Integer})
 
 Construct in `A` a random cyclic permutation of length `length(A)`.
 The optional `rng` argument specifies a random number generator, see
diff --git a/stdlib/Random/src/normal.jl b/stdlib/Random/src/normal.jl
index dc5ac5101e39d3..9d0f1595f052fa 100644
--- a/stdlib/Random/src/normal.jl
+++ b/stdlib/Random/src/normal.jl
@@ -10,7 +10,7 @@
 ## randn
 
 """
-    randn([rng=GLOBAL_RNG], [T=Float64], [dims...])
+    randn([rng=default_rng()], [T=Float64], [dims...])
 
 Generate a normally-distributed random number of type `T`
 with mean 0 and standard deviation 1.
@@ -35,7 +35,25 @@ julia> randn(rng, ComplexF32, (2, 3))
   0.611224+1.56403im   0.355204-0.365563im  0.0905552+1.31012im
 ```
 """
-@inline randn(rng::AbstractRNG=default_rng()) = _randn(rng, rand(rng, UInt52Raw()))
+@inline function randn(rng::AbstractRNG=default_rng())
+    #=
+    When defining
+    `@inline randn(rng::AbstractRNG=default_rng()) = _randn(rng, rand(rng, UInt52Raw()))`
+    the function call to `_randn` is currently not inlined, resulting in slightly worse
+    performance for scalar random normal numbers than repeating the code of `_randn`
+    inside the following function.
+    =#
+    @inbounds begin
+        r = rand(rng, UInt52())
+
+        # the following code is identical to the one in `_randn(rng::AbstractRNG, r::UInt64)`
+        rabs = Int64(r>>1) # One bit for the sign
+        idx = rabs & 0xFF
+        x = ifelse(r % Bool, -rabs, rabs)*wi[idx+1]
+        rabs < ki[idx+1] && return x # 99.3% of the time we return here 1st try
+        return randn_unlikely(rng, idx, rabs, x)
+    end
+end
 
 @inline function _randn(rng::AbstractRNG, r::UInt64)
     @inbounds begin
@@ -72,10 +90,19 @@ randn(rng::AbstractRNG, ::Type{Complex{T}}) where {T<:AbstractFloat} =
     Complex{T}(SQRT_HALF * randn(rng, T), SQRT_HALF * randn(rng, T))
 
 
+### fallback randn for float types defining rand:
+function randn(rng::AbstractRNG, ::Type{T}) where {T<:AbstractFloat}
+    # Marsaglia polar variant of Box–Muller transform:
+    while true
+        x, y = 2rand(rng, T)-1, 2rand(rng, T)-1
+        0 < (s = x^2 + y^2) < 1 && return x * sqrt(-2log(s)/s)
+    end
+end
+
 ## randexp
 
 """
-    randexp([rng=GLOBAL_RNG], [T=Float64], [dims...])
+    randexp([rng=default_rng()], [T=Float64], [dims...])
 
 Generate a random number of type `T` according to the
 exponential distribution with scale 1.
@@ -119,11 +146,14 @@ end
     end
 end
 
+### fallback randexp for float types defining rand:
+randexp(rng::AbstractRNG, ::Type{T}) where {T<:AbstractFloat} =
+    -log1p(-rand(rng, T))
 
 ## arrays & other scalar methods
 
 """
-    randn!([rng=GLOBAL_RNG], A::AbstractArray) -> A
+    randn!([rng=default_rng()], A::AbstractArray) -> A
 
 Fill the array `A` with normally-distributed (mean 0, standard deviation 1) random numbers.
 Also see the [`rand`](@ref) function.
@@ -144,7 +174,7 @@ julia> randn!(rng, zeros(5))
 function randn! end
 
 """
-    randexp!([rng=GLOBAL_RNG], A::AbstractArray) -> A
+    randexp!([rng=default_rng()], A::AbstractArray) -> A
 
 Fill the array `A` with random numbers following the exponential distribution
 (with scale 1).
@@ -195,6 +225,22 @@ for randfun in [:randn, :randexp]
             A
         end
 
+        # optimization for Xoshiro, which randomizes natively Array{UInt64}
+        function $randfun!(rng::Union{Xoshiro, TaskLocalRNG}, A::Array{Float64})
+            if length(A) < 7
+                for i in eachindex(A)
+                    @inbounds A[i] = $randfun(rng, Float64)
+                end
+            else
+                GC.@preserve A rand!(rng, UnsafeView{UInt64}(pointer(A), length(A)))
+
+                for i in eachindex(A)
+                    @inbounds A[i] = $_randfun(rng, reinterpret(UInt64, A[i]) >>> 12)
+                end
+            end
+            A
+        end
+
         $randfun!(A::AbstractArray) = $randfun!(default_rng(), A)
 
         # generating arrays
diff --git a/stdlib/Random/test/runtests.jl b/stdlib/Random/test/runtests.jl
index b78e3ae4b8a1f0..a396cfa9e727dc 100644
--- a/stdlib/Random/test/runtests.jl
+++ b/stdlib/Random/test/runtests.jl
@@ -2,6 +2,7 @@
 
 using Test, SparseArrays
 using Test: guardseed
+using Statistics: mean
 
 const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
 isdefined(Main, :OffsetArrays) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "OffsetArrays.jl"))
@@ -46,8 +47,8 @@ let A = zeros(2, 2)
                 0.9103565379264364  0.17732884646626457]
 end
 let A = zeros(2, 2)
-    @test_throws ArgumentError rand!(MersenneTwister(0), A, 5)
-    @test rand(MersenneTwister(0), Int64, 1) == [2118291759721269919]
+    @test_throws MethodError rand!(MersenneTwister(0), A, 5)
+    @test rand(MersenneTwister(0), Int64, 1) == [-3433174948434291912]
 end
 let A = zeros(Int64, 2, 2)
     rand!(MersenneTwister(0), A)
@@ -253,16 +254,20 @@ let mt = MersenneTwister(0)
     end
 
     Random.seed!(mt, 0)
+    Aend = Any[]
+    Bend = Any[]
     for (i,T) in enumerate([Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Int128, Float16, Float32])
         A = Vector{T}(undef, 16)
         B = Vector{T}(undef, 31)
         rand!(mt, A)
         rand!(mt, B)
-        @test A[end] == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0xc8e6453e139271f3,
-                            69855512850528774484795047199183096941, Float16(0.16895), 0.21086597f0][i]
-        @test B[end] == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, 2120308604158549401, 0xcb28c236e9c0f608,
-                            61881313582466480231846019869039259750, Float16(0.38672), 0.20027375f0][i]
+        push!(Aend, A[end])
+        push!(Bend, B[end])
     end
+    @test Aend == Any[21, 0x7b, 17385, 0x3086, -1574090021, 0xadcb4460, 6797283068698303107, 0x68a9f9865393cfd6,
+                      33687499368208574024854346399216845930, Float16(0.7744), 0.97259974f0]
+    @test Bend == Any[49, 0x65, -3725, 0x719d, 814246081, 0xdf61843a, -3433174948434291912, 0xd461716f27c91500,
+                      -85900088726243933988214632401750448432, Float16(0.10645), 0.13879478f0]
 
     Random.seed!(mt, 0)
     AF64 = Vector{Float64}(undef, Random.dsfmt_get_min_array_size()-1)
@@ -302,9 +307,32 @@ let a = [rand(RandomDevice(), UInt128) for i=1:10]
     @test reduce(|, a)>>>64 != 0
 end
 
+# wrapper around Float64 to check fallback random generators
+struct FakeFloat64 <: AbstractFloat
+    x::Float64
+end
+Base.rand(rng::AbstractRNG, ::Random.SamplerTrivial{Random.CloseOpen01{FakeFloat64}}) = FakeFloat64(rand(rng))
+for f in (:sqrt, :log, :log1p, :one, :zero, :abs, :+, :-)
+    @eval Base.$f(x::FakeFloat64) = FakeFloat64($f(x.x))
+end
+for f in (:+, :-, :*, :/)
+    @eval begin
+        Base.$f(x::FakeFloat64, y::FakeFloat64) = FakeFloat64($f(x.x,y.x))
+        Base.$f(x::FakeFloat64, y::Real) = FakeFloat64($f(x.x,y))
+        Base.$f(x::Real, y::FakeFloat64) = FakeFloat64($f(x,y.x))
+    end
+end
+for f in (:<, :<=, :>, :>=, :(==), :(!=))
+    @eval begin
+        Base.$f(x::FakeFloat64, y::FakeFloat64) = $f(x.x,y.x)
+        Base.$f(x::FakeFloat64, y::Real) = $f(x.x,y)
+        Base.$f(x::Real, y::FakeFloat64) = $f(x,y.x)
+    end
+end
+
 # test all rand APIs
-for rng in ([], [MersenneTwister(0)], [RandomDevice()])
-    ftypes = [Float16, Float32, Float64]
+for rng in ([], [MersenneTwister(0)], [RandomDevice()], [Xoshiro()])
+    ftypes = [Float16, Float32, Float64, FakeFloat64, BigFloat]
     cftypes = [ComplexF16, ComplexF32, ComplexF64, ftypes...]
     types = [Bool, Char, BigFloat, Base.BitInteger_types..., ftypes...]
     randset = Set(rand(Int, 20))
@@ -401,15 +429,12 @@ for rng in ([], [MersenneTwister(0)], [RandomDevice()])
     rand!(rng..., BitMatrix(undef, 2, 3))  ::BitArray{2}
 
     # Test that you cannot call randn or randexp with non-Float types.
-    for r in [randn, randexp, randn!, randexp!]
-        local r
+    for r in [randn, randexp]
         @test_throws MethodError r(Int)
         @test_throws MethodError r(Int32)
         @test_throws MethodError r(Bool)
         @test_throws MethodError r(String)
         @test_throws MethodError r(AbstractFloat)
-        # TODO(#17627): Consider adding support for randn(BigFloat) and removing this test.
-        @test_throws MethodError r(BigFloat)
 
         @test_throws MethodError r(Int64, (2,3))
         @test_throws MethodError r(String, 1)
@@ -428,7 +453,7 @@ function hist(X, n)
 end
 
 # test uniform distribution of floats
-for rng in [MersenneTwister(), RandomDevice()],
+for rng in [MersenneTwister(), RandomDevice(), Xoshiro()],
     T in [Float16, Float32, Float64, BigFloat],
         prec in (T == BigFloat ? [3, 53, 64, 100, 256, 1000] : [256])
     setprecision(BigFloat, prec) do
@@ -444,12 +469,13 @@ end
 @testset "rand(Bool) uniform distribution" begin
     for n in [rand(1:8), rand(9:16), rand(17:64)]
         a = zeros(Bool, n)
+        a8 = unsafe_wrap(Array, Ptr{UInt8}(pointer(a)), length(a); own=false) # unsafely observe the actual bit patterns in `a`
         as = zeros(Int, n)
         # we will test statistical properties for each position of a,
         # but also for 3 linear combinations of positions (for the array version)
         lcs = unique!.([rand(1:n, 2), rand(1:n, 3), rand(1:n, 5)])
         aslcs = zeros(Int, 3)
-        for rng = (MersenneTwister(), RandomDevice())
+        for rng = (MersenneTwister(), RandomDevice(), Xoshiro())
             for scalar = [false, true]
                 fill!(a, 0)
                 fill!(as, 0)
@@ -461,6 +487,7 @@ end
                         end
                     else
                         as .+= rand!(rng, a)
+                        @test all(x -> x === 0x00 || x === 0x01, a8)
                         aslcs .+= [xor(getindex.(Ref(a), lcs[i])...) for i in 1:3]
                     end
                 end
@@ -473,8 +500,8 @@ end
     end
 end
 
-# test reproducility of methods
-let mta = MersenneTwister(42), mtb = MersenneTwister(42)
+@testset "reproducility of methods for $RNG" for RNG=(MersenneTwister,Xoshiro)
+    mta, mtb = RNG(42), RNG(42)
 
     @test rand(mta) == rand(mtb)
     @test rand(mta,10) == rand(mtb,10)
@@ -592,16 +619,16 @@ end
 @test_throws DomainError DSFMT.DSFMT_state(zeros(Int32, rand(0:DSFMT.JN32-1)))
 
 @test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, 10), zeros(UInt128, MT_CACHE_I>>4), 0, 0)
+                                         zeros(Float64, 10), zeros(UInt128, MT_CACHE_I>>4), 0, 0, 0, 0, -1, -1)
 
 @test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), -1, 0)
+                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), -1, 0, 0, 0, -1, -1)
 
 @test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>3), 0, 0)
+                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>3), 0, 0, 0, 0, -1, -1)
 
 @test_throws DomainError MersenneTwister(zeros(UInt32, 1), DSFMT.DSFMT_state(),
-                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), 0, -1)
+                                         zeros(Float64, MT_CACHE_F), zeros(UInt128, MT_CACHE_I>>4), 0, -1, 0, 0, -1, -1)
 
 # seed is private to MersenneTwister
 let seed = rand(UInt32, 10)
@@ -623,7 +650,7 @@ guardseed() do
     m = MersenneTwister(0)
     @test Random.seed!() === g
     @test Random.seed!(rand(UInt)) === g
-    @test Random.seed!(rand(UInt32, rand(1:10))) === g
+    @test Random.seed!(rand(UInt32, rand(1:8))) === g
     @test Random.seed!(m) === m
     @test Random.seed!(m, rand(UInt)) === m
     @test Random.seed!(m, rand(UInt32, rand(1:10))) === m
@@ -657,9 +684,9 @@ let b = ['0':'9';'A':'Z';'a':'z']
 end
 
 # this shouldn't crash (#22403)
-@test_throws ArgumentError rand!(Union{UInt,Int}[1, 2, 3])
+@test_throws MethodError rand!(Union{UInt,Int}[1, 2, 3])
 
-@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice)
+@testset "$RNG() & Random.seed!(rng::$RNG) initializes randomly" for RNG in (MersenneTwister, RandomDevice, Xoshiro)
     m = RNG()
     a = rand(m, Int)
     m = RNG()
@@ -680,20 +707,60 @@ end
     @test rand(m, Int) ∉ (a, b, c, d)
 end
 
-@testset "MersenneTwister($seed) & Random.seed!(m::MersenneTwister, $seed) produce the same stream" for seed in [0:5; 10000:10005]
-    m = MersenneTwister(seed)
-    a = [rand(m) for _=1:100]
-    Random.seed!(m, seed)
-    @test a == [rand(m) for _=1:100]
+@testset "$RNG(seed) & Random.seed!(m::$RNG, seed) produce the same stream" for RNG=(MersenneTwister,Xoshiro)
+    seeds = Any[0, 1, 2, 10000, 10001, rand(UInt32, 8), rand(UInt128, 3)...]
+    if RNG == Xoshiro
+        push!(seeds, rand(UInt64, rand(1:4)))
+    end
+    for seed=seeds
+        m = RNG(seed)
+        a = [rand(m) for _=1:100]
+        Random.seed!(m, seed)
+        @test a == [rand(m) for _=1:100]
+    end
+end
+
+@testset "Random.seed!(seed) sets Random.GLOBAL_SEED" begin
+    seeds = Any[0, rand(UInt128), rand(UInt64, 4)]
+
+    for seed=seeds
+        Random.seed!(seed)
+        @test Random.GLOBAL_SEED === seed
+    end
+    # two separate loops as otherwise we are no sure that the second call (with GLOBAL_RNG)
+    # actually sets GLOBAL_SEED
+    for seed=seeds
+        Random.seed!(Random.GLOBAL_RNG, seed)
+        @test Random.GLOBAL_SEED === seed
+    end
+
+    Random.seed!(nothing)
+    seed1 = Random.GLOBAL_SEED
+    @test seed1 isa Vector{UInt64} # could change, but must not be nothing
+
+    Random.seed!(Random.GLOBAL_RNG, nothing)
+    seed2 = Random.GLOBAL_SEED
+    @test seed2 isa Vector{UInt64}
+    @test seed2 != seed1
+
+    Random.seed!()
+    seed3 = Random.GLOBAL_SEED
+    @test seed3 isa Vector{UInt64}
+    @test seed3 != seed2
+
+    Random.seed!(Random.GLOBAL_RNG)
+    seed4 = Random.GLOBAL_SEED
+    @test seed4 isa Vector{UInt64}
+    @test seed4 != seed3
 end
 
 struct RandomStruct23964 end
 @testset "error message when rand not defined for a type" begin
-    @test_throws ArgumentError rand(nothing)
-    @test_throws ArgumentError rand(RandomStruct23964())
+    @test_throws MethodError rand(nothing)
+    @test_throws MethodError rand(RandomStruct23964())
 end
 
-@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice()),
+@testset "rand(::$(typeof(RNG)), ::UnitRange{$T}" for RNG ∈ (MersenneTwister(rand(UInt128)), RandomDevice(), Xoshiro()),
                                                         T ∈ (Int8, Int16, Int32, UInt32, Int64, Int128, UInt128)
     for S in (SamplerRangeInt, SamplerRangeFast, SamplerRangeNDL)
         S == SamplerRangeNDL && sizeof(T) > 8 && continue
@@ -747,28 +814,26 @@ end
     @test Random.seed!(GLOBAL_RNG, 0) === LOCAL_RNG
     @test Random.seed!(GLOBAL_RNG) === LOCAL_RNG
 
-    mt = MersenneTwister(1)
-    @test copy!(mt, GLOBAL_RNG) === mt
-    @test mt == LOCAL_RNG
-    Random.seed!(mt, 2)
-    @test mt != LOCAL_RNG
-    @test copy!(GLOBAL_RNG, mt) === LOCAL_RNG
-    @test mt == LOCAL_RNG
-    mt2 = copy(GLOBAL_RNG)
-    @test mt2 isa typeof(LOCAL_RNG)
-    @test mt2 !== LOCAL_RNG
-    @test mt2 == LOCAL_RNG
+    xo = Xoshiro()
+    @test copy!(xo, GLOBAL_RNG) === xo
+    @test xo == LOCAL_RNG
+    Random.seed!(xo, 2)
+    @test xo != LOCAL_RNG
+    @test copy!(GLOBAL_RNG, xo) === LOCAL_RNG
+    @test xo == LOCAL_RNG
+    xo2 = copy(GLOBAL_RNG)
+    @test xo2 !== LOCAL_RNG
+    @test xo2 == LOCAL_RNG
 
     for T in (Random.UInt52Raw{UInt64},
-              Random.UInt2x52Raw{UInt128},
               Random.UInt104Raw{UInt128},
               Random.CloseOpen12_64)
         x = Random.SamplerTrivial(T())
-        @test rand(GLOBAL_RNG, x) === rand(mt, x)
+        @test rand(GLOBAL_RNG, x) === rand(xo, x)
     end
     for T in (Int64, UInt64, Int128, UInt128, Bool, Int8, UInt8, Int16, UInt16, Int32, UInt32)
         x = Random.SamplerType{T}()
-        @test rand(GLOBAL_RNG, x) === rand(mt, x)
+        @test rand(GLOBAL_RNG, x) === rand(xo, x)
     end
 
     A = fill(0.0, 100, 100)
@@ -777,27 +842,156 @@ end
     vB = view(B, :, :)
     I1 = Random.SamplerTrivial(Random.CloseOpen01{Float64}())
     I2 = Random.SamplerTrivial(Random.CloseOpen12{Float64}())
-    @test rand!(GLOBAL_RNG, A, I1) === A == rand!(mt, B, I1) === B
+    @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B
     B = fill!(B, 1.0)
     @test rand!(GLOBAL_RNG, vA, I1) === vA
-    rand!(mt, vB, I1)
+    rand!(xo, vB, I1)
     @test A == B
     for T in (Float16, Float32)
         B = fill!(B, 1.0)
-        @test rand!(GLOBAL_RNG, A, I2) === A == rand!(mt, B, I2) === B
+        @test rand!(GLOBAL_RNG, A, I2) === A == rand!(xo, B, I2) === B
         B = fill!(B, 1.0)
-        @test rand!(GLOBAL_RNG, A, I1) === A == rand!(mt, B, I1) === B
+        @test rand!(GLOBAL_RNG, A, I1) === A == rand!(xo, B, I1) === B
     end
     for T in Base.BitInteger_types
         x = Random.SamplerType{T}()
         B = fill!(B, 1.0)
-        @test rand!(GLOBAL_RNG, A, x) === A == rand!(mt, B, x) === B
+        @test rand!(GLOBAL_RNG, A, x) === A == rand!(xo, B, x) === B
     end
     # issue #33170
     @test Sampler(GLOBAL_RNG, 2:4, Val(1)) isa SamplerRangeNDL
     @test Sampler(GLOBAL_RNG, 2:4, Val(Inf)) isa SamplerRangeNDL
+
+    rng = copy(GLOBAL_RNG)
+    # make sure _GLOBAL_RNG and the underlying implementation use the same code path
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
+    @test rand(rng) == rand(GLOBAL_RNG)
 end
 
 @testset "RNGs broadcast as scalars: T" for T in (MersenneTwister, RandomDevice)
     @test length.(rand.(T(), 1:3)) == 1:3
 end
+
+@testset "generated scalar integers do not overlap" begin
+    m = MersenneTwister()
+    xs = reinterpret(UInt64, m.ints)
+    x = rand(m, UInt128)  # m.idxI % 16 == 0
+    @test x % UInt64 == xs[end-1]
+    x = rand(m, UInt64)
+    @test x == xs[end-2]
+    x = rand(m, UInt64)
+    @test x == xs[end-3]
+    x = rand(m, UInt64)
+    @test x == xs[end-4]
+    x = rand(m, UInt128) # m.idxI % 16 == 8
+    @test (x >> 64) % UInt64 == xs[end-6]
+    @test x % UInt64 == xs[end-7]
+    x = rand(m, UInt64)
+    @test x == xs[end-8] # should not be == xs[end-7]
+
+    s = Set{UInt64}()
+    n = 0
+    for _=1:2000
+        x = rand(m, rand((UInt64, UInt128, Int64, Int128)))
+        if sizeof(x) == 8
+            push!(s, x % UInt64)
+            n += 1
+        else
+            push!(s, x % UInt64, (x >> 64) % UInt64)
+            n += 2
+        end
+    end
+    @test length(s) == n
+end
+
+@testset "show" begin
+    @testset "MersenneTwister" begin
+        m = MersenneTwister(123)
+        @test string(m) == "MersenneTwister(123)"
+        Random.jump!(m, 2*big(10)^20)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 0))"
+        @test m == MersenneTwister(123, (200000000000000000000, 0))
+        rand(m)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 1002, 0, 1))"
+
+        @test m == MersenneTwister(123, (200000000000000000000, 1002, 0, 1))
+        rand(m, Int64)
+        @test string(m) == "MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))"
+        @test m == MersenneTwister(123, (200000000000000000000, 2256, 0, 1, 1002, 1))
+
+        m = MersenneTwister(0x0ecfd77f89dcd508caa37a17ebb7556b)
+        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b)"
+        rand(m, Int64)
+        @test string(m) == "MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))"
+        @test m == MersenneTwister(0xecfd77f89dcd508caa37a17ebb7556b, (0, 1254, 0, 0, 0, 1))
+
+        m = MersenneTwister(0); rand(m, Int64); rand(m)
+        @test string(m) == "MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))"
+        @test m == MersenneTwister(0, (0, 2256, 1254, 1, 0, 1))
+    end
+
+    @testset "RandomDevice" begin
+        @test string(RandomDevice()) == "$RandomDevice()"
+    end
+end
+
+@testset "rand[!] for BigInt/BigFloat" begin
+    rng = MersenneTwister()
+    s = Random.SamplerBigInt(MersenneTwister, 1:big(9))
+    x = rand(s)
+    @test x isa BigInt
+    y = rand!(rng, x, s)
+    @test y === x
+    @test x in 1:9
+
+    for t = BigInt[0, 10, big(2)^100]
+        s = Random.Sampler(rng, t:t) # s.nlimbs == 0
+        @test rand(rng, s) == t
+        @test x === rand!(rng, x, s) == t
+
+        s = Random.Sampler(rng, big(-1):t) # s.nlimbs != 0
+        @test rand(rng, s) ∈ -1:t
+        @test x === rand!(rng, x, s) ∈ -1:t
+
+    end
+
+    s = Random.Sampler(MersenneTwister, Random.CloseOpen01(BigFloat))
+    x = rand(s)
+    @test x isa BigFloat
+    y = rand!(rng, x, s)
+    @test y === x
+    @test 0 <= x < 1
+    s = Random.Sampler(MersenneTwister, Random.CloseOpen12(BigFloat))
+    y = rand!(rng, x, s)
+    @test y === x
+    @test 1 <= x < 2
+
+    old_prec = precision(BigFloat)
+    setprecision(100) do
+        x = rand(s) # should use precision of s
+        @test precision(x) == old_prec
+        x = BigFloat()
+        @test_throws ArgumentError rand!(rng, x, s) # incompatible precision
+    end
+    s = setprecision(100) do
+        Random.Sampler(MersenneTwister, Random.CloseOpen01(BigFloat))
+    end
+    x = rand(s) # should use precision of s
+    @test precision(x) == 100
+    x = BigFloat()
+    @test_throws ArgumentError rand!(rng, x, s) # incompatible precision
+end
+
+@testset "shuffle! for BitArray" begin
+    # Test that shuffle! is uniformly random on BitArrays
+    rng = MersenneTwister(123)
+    a = (reshape(1:(4*5), 4, 5) .<= 2) # 4x5 BitMatrix whose first two elements are true, rest are false
+    m = mean(1:50_000) do _
+        shuffle!(rng, a)
+    end # mean result of shuffle!-ing a 50_000 times. If the shuffle! is uniform, then each index has a
+    # 10% chance of having a true in it, so each value should converge to 0.1.
+    @test minimum(m) >= 0.094
+    @test maximum(m) <= 0.106
+end
diff --git a/stdlib/SHA.version b/stdlib/SHA.version
new file mode 100644
index 00000000000000..f2242a336c6fe1
--- /dev/null
+++ b/stdlib/SHA.version
@@ -0,0 +1,4 @@
+SHA_BRANCH = master
+SHA_SHA1 = 2d1f84e6f8417a1a368de48318640d948b023e7a
+SHA_GIT_URL := https://github.com/JuliaCrypto/SHA.jl.git
+SHA_TAR_URL = https://api.github.com/repos/JuliaCrypto/SHA.jl/tarball/$1
diff --git a/stdlib/SHA/LICENSE.md b/stdlib/SHA/LICENSE.md
deleted file mode 100644
index eec075ce6f2ffe..00000000000000
--- a/stdlib/SHA/LICENSE.md
+++ /dev/null
@@ -1,58 +0,0 @@
-The SHA.jl package is licensed under the MIT "Expat" License:
-
-> Copyright (c) 2014: Elliot Saba.
->
-> Permission is hereby granted, free of charge, to any person obtaining
-> a copy of this software and associated documentation files (the
-> "Software"), to deal in the Software without restriction, including
-> without limitation the rights to use, copy, modify, merge, publish,
-> distribute, sublicense, and/or sell copies of the Software, and to
-> permit persons to whom the Software is furnished to do so, subject to
-> the following conditions:
->
-> The above copyright notice and this permission notice shall be
-> included in all copies or substantial portions of the Software.
->
-> THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-> EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-> MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-> IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-> CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-> TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-> SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-This package was inspired by the SHA2 [source code from Minix](https://github.com/minix3/minix/blob/b6cbf7203b080219de306404f8022a65b7884f33/common/lib/libc/hash/sha2/sha2.c), itself released under the BSD license:
-
-> sha2.c
->
-> Version 1.0.0beta1
->
-> Written by Aaron D. Gifford <me@aarongifford.com>
->
-> Copyright 2000 Aaron D. Gifford.  All rights reserved.
->
-> Redistribution and use in source and binary forms, with or without
-> modification, are permitted provided that the following conditions
-> are met:
->
-> 1. Redistributions of source code must retain the above copyright
->    notice, this list of conditions and the following disclaimer.
->
-> 2. Redistributions in binary form must reproduce the above copyright
->    notice, this list of conditions and the following disclaimer in the
->    documentation and/or other materials provided with the distribution.
->
-> 3. Neither the name of the copyright holder nor the names of contributors
->    may be used to endorse or promote products derived from this software
->    without specific prior written permission.
->
-> THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) AND CONTRIBUTOR(S) ``AS IS'' AND
-> ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-> IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-> ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR(S) OR CONTRIBUTOR(S) BE LIABLE
-> FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-> DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-> OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-> HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-> LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-> OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
diff --git a/stdlib/SHA/Project.toml b/stdlib/SHA/Project.toml
deleted file mode 100644
index 7fe8ff0d5c1921..00000000000000
--- a/stdlib/SHA/Project.toml
+++ /dev/null
@@ -1,8 +0,0 @@
-name = "SHA"
-uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
-
-[extras]
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test"]
diff --git a/stdlib/SHA/docs/src/index.md b/stdlib/SHA/docs/src/index.md
deleted file mode 100644
index e49c0c9e3e8cc2..00000000000000
--- a/stdlib/SHA/docs/src/index.md
+++ /dev/null
@@ -1,75 +0,0 @@
-# SHA
-
-
-Usage is very straightforward:
-```julia
-julia> using SHA
-
-julia> bytes2hex(sha256("test"))
-"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
-```
-
-Each exported function (at the time of this writing, SHA-1, SHA-2 224, 256, 384 and 512, and SHA-3 224, 256, 384 and 512 functions are implemented) takes in either an `Array{UInt8}`, a `ByteString` or an `IO` object.  This makes it trivial to checksum a file:
-
-```julia
-shell> cat /tmp/test.txt
-test
-julia> using SHA
-
-julia> open("/tmp/test.txt") do f
-           sha2_256(f)
-       end
-32-element Array{UInt8,1}:
- 0x9f
- 0x86
- 0xd0
- 0x81
- 0x88
- 0x4c
- 0x7d
- 0x65
-    ⋮
- 0x5d
- 0x6c
- 0x15
- 0xb0
- 0xf0
- 0x0a
- 0x08
-```
-
-Due to the colloquial usage of `sha256` to refer to `sha2_256`, convenience functions are provided, mapping `shaxxx()` function calls to `sha2_xxx()`.  For SHA-3, no such colloquialisms exist and the user must use the full `sha3_xxx()` names.
-
-`shaxxx()` takes `AbstractString` and array-like objects (`NTuple` and `Array`) with elements of type `UInt8`.
-
-To create a hash from multiple items the `SHAX_XXX_CTX()` types can be used to create a stateful hash object that
-is updated with `update!` and finalized with `digest!`
-
-```julia
-julia> ctx = SHA2_256_CTX()
-SHA2 256-bit hash state
-
-julia> update!(ctx, b"some data")
-0x0000000000000009
-
-julia> update!(ctx, b"some more data")
-0x0000000000000017
-
-julia> digest!(ctx)
-32-element Vector{UInt8}:
- 0xbe
- 0xcf
- 0x23
- 0xda
- 0xaf
- 0x02
-    ⋮
- 0x25
- 0x52
- 0x19
- 0xa0
- 0x8b
- 0xc5
-```
-
-Note that, at the time of this writing, the SHA3 code is not optimized, and as such is roughly an order of magnitude slower than SHA2.
diff --git a/stdlib/SHA/src/SHA.jl b/stdlib/SHA/src/SHA.jl
deleted file mode 100644
index 7edaab403156a6..00000000000000
--- a/stdlib/SHA/src/SHA.jl
+++ /dev/null
@@ -1,137 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    SHA
-
-The SHA module provides hashing functionality for SHA1, SHA2 and SHA3 algorithms.
-
-They are implemented as both pure functions for hashing single pieces of data,
-or a stateful context which can be updated with the `update!` function and
-finalized with `digest!`.
-
-```julia-repl
-julia> sha1(b"some data")
-20-element Vector{UInt8}:
- 0xba
- 0xf3
-    ⋮
- 0xe3
- 0x56
-
-
-julia> ctx = SHA1_CTX()
-SHA1 hash state
-
-julia> update!(ctx, b"some data")
-0x0000000000000009
-
-julia> digest!(ctx)
-20-element Vector{UInt8}:
- 0xba
- 0xf3
-    ⋮
- 0xe3
- 0x56
-"""
-module SHA
-
-# Export convenience functions, context types, update!() and digest!() functions
-export sha1, SHA1_CTX, update!, digest!
-export sha224, sha256, sha384, sha512
-export sha2_224, sha2_256, sha2_384, sha2_512
-export sha3_224, sha3_256, sha3_384, sha3_512
-export SHA224_CTX, SHA256_CTX, SHA384_CTX, SHA512_CTX
-export SHA2_224_CTX, SHA2_256_CTX, SHA2_384_CTX, SHA2_512_CTX
-export SHA3_224_CTX, SHA3_256_CTX, SHA3_384_CTX, SHA3_512_CTX
-export HMAC_CTX, hmac_sha1
-export hmac_sha224, hmac_sha256, hmac_sha384, hmac_sha512
-export hmac_sha2_224, hmac_sha2_256, hmac_sha2_384, hmac_sha2_512
-export hmac_sha3_224, hmac_sha3_256, hmac_sha3_384, hmac_sha3_512
-
-# data to be hashed:
-const AbstractBytes = Union{AbstractVector{UInt8},NTuple{N,UInt8} where N}
-
-include("constants.jl")
-include("types.jl")
-include("base_functions.jl")
-include("sha1.jl")
-include("sha2.jl")
-include("sha3.jl")
-include("common.jl")
-include("hmac.jl")
-
-# Create data types and convenience functions for each hash implemented
-for (f, ctx) in [(:sha1, :SHA1_CTX),
-                 (:sha224, :SHA224_CTX),
-                 (:sha256, :SHA256_CTX),
-                 (:sha384, :SHA384_CTX),
-                 (:sha512, :SHA512_CTX),
-                 (:sha2_224, :SHA2_224_CTX),
-                 (:sha2_256, :SHA2_256_CTX),
-                 (:sha2_384, :SHA2_384_CTX),
-                 (:sha2_512, :SHA2_512_CTX),
-                 (:sha3_224, :SHA3_224_CTX),
-                 (:sha3_256, :SHA3_256_CTX),
-                 (:sha3_384, :SHA3_384_CTX),
-                 (:sha3_512, :SHA3_512_CTX),]
-    g = Symbol(:hmac_, f)
-
-    @eval begin
-        # Our basic function is to process arrays of bytes
-        """
-            $($f)(data)
-
-        Hash data using the $($f) algorithm and return the resulting digest.
-        See also [`$($ctx)`](@ref).
-        """
-        function $f(data::AbstractBytes)
-            ctx = $ctx()
-            update!(ctx, data)
-            return digest!(ctx)
-
-        """
-            $($g)(key, data)
-
-        Hash data using the $($f) algorithm using the passed key
-        See also [`HMAC_CTX`](@ref).
-        """
-        end
-        function $g(key::Vector{UInt8}, data::AbstractBytes)
-            ctx = HMAC_CTX($ctx(), key)
-            update!(ctx, data)
-            return digest!(ctx)
-        end
-
-        # AbstractStrings are a pretty handy thing to be able to crunch through
-        $f(str::AbstractString) = $f(String(str)) # always crunch UTF-8 repr
-        $f(str::String) = $f(codeunits(str))
-        $g(key::Vector{UInt8}, str::AbstractString) = $g(key, String(str))
-        $g(key::Vector{UInt8}, str::String) = $g(key, codeunits(str))
-
-        """
-            $($f)(io::IO)
-
-        Hash data from io using $($f) algorithm from io.
-        """
-        function $f(io::IO, chunk_size=4*1024)
-            ctx = $ctx()
-            buff = Vector{UInt8}(undef, chunk_size)
-            while !eof(io)
-                num_read = readbytes!(io, buff)
-                update!(ctx, buff, num_read)
-            end
-            return digest!(ctx)
-        end
-        function $g(key::Vector{UInt8}, io::IO, chunk_size=4*1024)
-            ctx = HMAC_CTX($ctx(), key)
-            buff = Vector{UInt8}(chunk_size)
-            while !eof(io)
-                num_read = readbytes!(io, buff)
-                update!(ctx, buff, num_read)
-            end
-            return digest!(ctx)
-        end
-    end
-end
-
-end #module SHA
diff --git a/stdlib/SHA/src/base_functions.jl b/stdlib/SHA/src/base_functions.jl
deleted file mode 100644
index 0b6216fdbdf18d..00000000000000
--- a/stdlib/SHA/src/base_functions.jl
+++ /dev/null
@@ -1,42 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# THE SIX LOGICAL FUNCTIONS
-#
-# Bit shifting and rotation (used by the six SHA-XYZ logical functions:
-#
-#   NOTE:  The naming of R and S appears backwards here (R is a SHIFT and
-#   S is a ROTATION) because the SHA2-256/384/512 description document
-#   (see http://csrc.nist.gov/cryptval/shs/sha256-384-512.pdf) uses this
-#   same "backwards" definition.
-
-# 32-bit Rotate-right (equivalent to S32 in SHA-256) and rotate-left
-rrot(b,x,width) = ((x >> b) | (x << (width - b)))
-lrot(b,x,width) = ((x << b) | (x >> (width - b)))
-
-# Shift-right (used in SHA-256, SHA-384, and SHA-512):
-R(b,x)   = (x >> b)
-# 32-bit Rotate-right (used in SHA-256):
-S32(b,x) = rrot(b,x,32)
-# 64-bit Rotate-right (used in SHA-384 and SHA-512):
-S64(b,x) = rrot(b,x,64)
-# 64-bit Rotate-left (used in SHA3)
-L64(b,x) = lrot(b,x,64)
-
-# Two of six logical functions used in SHA-256, SHA-384, and SHA-512:
-Ch(x,y,z)  = ((x & y) ⊻ (~x & z))
-Maj(x,y,z) = ((x & y) ⊻ (x & z) ⊻ (y & z))
-
-# Four of six logical functions used in SHA-256:
-Sigma0_256(x) = (S32(2,  UInt32(x)) ⊻ S32(13, UInt32(x)) ⊻ S32(22, UInt32(x)))
-Sigma1_256(x) = (S32(6,  UInt32(x)) ⊻ S32(11, UInt32(x)) ⊻ S32(25, UInt32(x)))
-sigma0_256(x) = (S32(7,  UInt32(x)) ⊻ S32(18, UInt32(x)) ⊻ R(3 ,   UInt32(x)))
-sigma1_256(x) = (S32(17, UInt32(x)) ⊻ S32(19, UInt32(x)) ⊻ R(10,   UInt32(x)))
-
-# Four of six logical functions used in SHA-384 and SHA-512:
-Sigma0_512(x) = (S64(28, UInt64(x)) ⊻ S64(34, UInt64(x)) ⊻ S64(39, UInt64(x)))
-Sigma1_512(x) = (S64(14, UInt64(x)) ⊻ S64(18, UInt64(x)) ⊻ S64(41, UInt64(x)))
-sigma0_512(x) = (S64( 1, UInt64(x)) ⊻ S64( 8, UInt64(x)) ⊻ R( 7,   UInt64(x)))
-sigma1_512(x) = (S64(19, UInt64(x)) ⊻ S64(61, UInt64(x)) ⊻ R( 6,   UInt64(x)))
-
-# Let's be able to bswap arrays of these types as well
-bswap!(x::Vector{<:Integer}) = map!(bswap, x, x)
diff --git a/stdlib/SHA/src/common.jl b/stdlib/SHA/src/common.jl
deleted file mode 100644
index 5500a372f5fa23..00000000000000
--- a/stdlib/SHA/src/common.jl
+++ /dev/null
@@ -1,116 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Common update and digest functions which work across SHA1 and SHA2
-
-# update! takes in variable-length data, buffering it into blocklen()-sized pieces,
-# calling transform!() when necessary to update the internal hash state.
-"""
-    update!(context, data[, datalen])
-
-Update the SHA context with the bytes in data. See also [`digest!`](@ref) for
-finalizing the hash.
-
-# Examples
-```julia-repl
-julia> ctx = SHA1_CTX()
-SHA1 hash state
-
-julia> update!(ctx, b"data to to be hashed")
-```
-"""
-function update!(context::T, data::U, datalen=length(data)) where {T<:SHA_CTX, U<:AbstractBytes}
-    # We need to do all our arithmetic in the proper bitwidth
-    UIntXXX = typeof(context.bytecount)
-
-    # Process as many complete blocks as possible
-    0 ≤ datalen ≤ length(data) || throw(BoundsError(data, firstindex(data)+datalen-1))
-    len = convert(UIntXXX, datalen)
-    data_idx = convert(UIntXXX, firstindex(data)-1)
-    usedspace = context.bytecount % blocklen(T)
-    while len - data_idx + usedspace >= blocklen(T)
-        # Fill up as much of the buffer as we can with the data given us
-        copyto!(context.buffer, usedspace + 1, data, data_idx + 1, blocklen(T) - usedspace)
-
-        transform!(context)
-        context.bytecount += blocklen(T) - usedspace
-        data_idx += blocklen(T) - usedspace
-        usedspace = convert(UIntXXX, 0)
-    end
-
-    # There is less than a complete block left, but we need to save the leftovers into context.buffer:
-    if len > data_idx
-        copyto!(context.buffer, usedspace + 1, data, data_idx + 1, len - data_idx)
-        context.bytecount += len - data_idx
-    end
-end
-
-# Pad the remainder leaving space for the bitcount
-function pad_remainder!(context::T) where T<:SHA_CTX
-    usedspace = context.bytecount % blocklen(T)
-    # If we have anything in the buffer still, pad and transform that data
-    if usedspace > 0
-        # Begin padding with a 1 bit:
-        context.buffer[usedspace+1] = 0x80
-        usedspace += 1
-
-        # If we have room for the bitcount, then pad up to the short blocklen
-        if usedspace <= short_blocklen(T)
-            for i = 1:(short_blocklen(T) - usedspace)
-                context.buffer[usedspace + i] = 0x0
-            end
-        else
-            # Otherwise, pad out this entire block, transform it, then pad up to short blocklen
-            for i = 1:(blocklen(T) - usedspace)
-                context.buffer[usedspace + i] = 0x0
-            end
-            transform!(context)
-            for i = 1:short_blocklen(T)
-                context.buffer[i] = 0x0
-            end
-        end
-    else
-        # If we don't have anything in the buffer, pad an entire shortbuffer
-        context.buffer[1] = 0x80
-        for i = 2:short_blocklen(T)
-            context.buffer[i] = 0x0
-        end
-    end
-end
-
-
-# Clear out any saved data in the buffer, append total bitlength, and return our precious hash!
-# Note: SHA3_CTX has a more specialised method
-"""
-    digest!(context)
-
-Finalize the SHA context and return the hash as array of bytes (Array{Uint8, 1}).
-
-# Examples
-```julia-repl
-julia> ctx = SHA1_CTX()
-SHA1 hash state
-
-julia> update!(ctx, b"data to to be hashed")
-
-julia> digest!(ctx)
-20-element Array{UInt8,1}:
- 0x83
- 0xe4
- ⋮
- 0x89
- 0xf5
-```
-"""
-function digest!(context::T) where T<:SHA_CTX
-    pad_remainder!(context)
-    # Store the length of the input data (in bits) at the end of the padding
-    bitcount_idx = div(short_blocklen(T), sizeof(context.bytecount)) + 1
-    pbuf = Ptr{typeof(context.bytecount)}(pointer(context.buffer))
-    unsafe_store!(pbuf, bswap(context.bytecount * 8), bitcount_idx)
-
-    # Final transform:
-    transform!(context)
-
-    # Return the digest
-    return reinterpret(UInt8, bswap!(context.state))[1:digestlen(T)]
-end
diff --git a/stdlib/SHA/src/constants.jl b/stdlib/SHA/src/constants.jl
deleted file mode 100644
index 3c5fde92d3863e..00000000000000
--- a/stdlib/SHA/src/constants.jl
+++ /dev/null
@@ -1,131 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# SHA initial hash values and constants
-
-# Hash constant words K for SHA1
-const K1 = UInt32[
-    0x5a827999, 0x6ed9eba1, 0x8f1bbcdc, 0xca62c1d6
-]
-
-# Initial hash value H for SHA1
-const SHA1_initial_hash_value = UInt32[
-    0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
-]
-
-
-
-# Hash constant words K for SHA-256:
-const K256 = UInt32[
-    0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
-    0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
-    0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
-    0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
-    0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
-    0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
-    0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
-    0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
-    0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
-    0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
-    0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
-    0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
-    0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
-    0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
-    0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
-    0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
-]
-
-# Initial hash value H for SHA-224:
-const SHA2_224_initial_hash_value = UInt32[
-    0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
-    0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
-]
-
-
-const SHA2_256_initial_hash_value = UInt32[
-    0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
-    0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
-]
-
-# Hash constant words K for SHA-384 and SHA-512:
-const K512 = UInt64[
-    0x428a2f98d728ae22, 0x7137449123ef65cd,
-    0xb5c0fbcfec4d3b2f, 0xe9b5dba58189dbbc,
-    0x3956c25bf348b538, 0x59f111f1b605d019,
-    0x923f82a4af194f9b, 0xab1c5ed5da6d8118,
-    0xd807aa98a3030242, 0x12835b0145706fbe,
-    0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2,
-    0x72be5d74f27b896f, 0x80deb1fe3b1696b1,
-    0x9bdc06a725c71235, 0xc19bf174cf692694,
-    0xe49b69c19ef14ad2, 0xefbe4786384f25e3,
-    0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65,
-    0x2de92c6f592b0275, 0x4a7484aa6ea6e483,
-    0x5cb0a9dcbd41fbd4, 0x76f988da831153b5,
-    0x983e5152ee66dfab, 0xa831c66d2db43210,
-    0xb00327c898fb213f, 0xbf597fc7beef0ee4,
-    0xc6e00bf33da88fc2, 0xd5a79147930aa725,
-    0x06ca6351e003826f, 0x142929670a0e6e70,
-    0x27b70a8546d22ffc, 0x2e1b21385c26c926,
-    0x4d2c6dfc5ac42aed, 0x53380d139d95b3df,
-    0x650a73548baf63de, 0x766a0abb3c77b2a8,
-    0x81c2c92e47edaee6, 0x92722c851482353b,
-    0xa2bfe8a14cf10364, 0xa81a664bbc423001,
-    0xc24b8b70d0f89791, 0xc76c51a30654be30,
-    0xd192e819d6ef5218, 0xd69906245565a910,
-    0xf40e35855771202a, 0x106aa07032bbd1b8,
-    0x19a4c116b8d2d0c8, 0x1e376c085141ab53,
-    0x2748774cdf8eeb99, 0x34b0bcb5e19b48a8,
-    0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb,
-    0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3,
-    0x748f82ee5defb2fc, 0x78a5636f43172f60,
-    0x84c87814a1f0ab72, 0x8cc702081a6439ec,
-    0x90befffa23631e28, 0xa4506cebde82bde9,
-    0xbef9a3f7b2c67915, 0xc67178f2e372532b,
-    0xca273eceea26619c, 0xd186b8c721c0c207,
-    0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178,
-    0x06f067aa72176fba, 0x0a637dc5a2c898a6,
-    0x113f9804bef90dae, 0x1b710b35131c471b,
-    0x28db77f523047d84, 0x32caab7b40c72493,
-    0x3c9ebe0a15c9bebc, 0x431d67c49c100d4c,
-    0x4cc5d4becb3e42b6, 0x597f299cfc657e2a,
-    0x5fcb6fab3ad6faec, 0x6c44198c4a475817
-]
-
-# Initial hash value H for SHA-384
-const SHA2_384_initial_hash_value = UInt64[
-    0xcbbb9d5dc1059ed8, 0x629a292a367cd507,
-    0x9159015a3070dd17, 0x152fecd8f70e5939,
-    0x67332667ffc00b31, 0x8eb44a8768581511,
-    0xdb0c2e0d64f98fa7, 0x47b5481dbefa4fa4
-]
-
-# Initial hash value H for SHA-512
-const SHA2_512_initial_hash_value = UInt64[
-    0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
-    0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1,
-    0x510e527fade682d1, 0x9b05688c2b3e6c1f,
-    0x1f83d9abfb41bd6b, 0x5be0cd19137e2179
-]
-
-# Round constants for SHA3 rounds
-const SHA3_ROUND_CONSTS = UInt64[
-    0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
-    0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
-    0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
-    0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
-    0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
-    0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
-    0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
-    0x8000000000008080, 0x0000000080000001, 0x8000000080008008
-]
-
-# Rotation constants for SHA3 rounds
-const SHA3_ROTC = UInt64[
-    1,  3,  6,  10, 15, 21, 28, 36, 45, 55, 2,  14,
-    27, 41, 56, 8,  25, 43, 62, 18, 39, 61, 20, 44
-]
-
-# Permutation indices for SHA3 rounds (+1'ed so as to work with julia's 1-based indexing)
-const SHA3_PILN = Int[
-    11, 8,  12, 18, 19, 4, 6,  17, 9,  22, 25, 5,
-    16, 24, 20, 14, 13, 3, 21, 15, 23, 10,  7,  2
-]
diff --git a/stdlib/SHA/src/hmac.jl b/stdlib/SHA/src/hmac.jl
deleted file mode 100644
index 1ba9b95c6109df..00000000000000
--- a/stdlib/SHA/src/hmac.jl
+++ /dev/null
@@ -1,35 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-struct HMAC_CTX{CTX<:SHA_CTX}
-    context::CTX
-    outer::Vector{UInt8}
-
-    function HMAC_CTX(ctx::CTX, key::Vector{UInt8}, blocksize::Integer=blocklen(CTX)) where CTX
-        if length(key) > blocksize
-            _ctx = CTX()
-            update!(_ctx, key)
-            key = digest!(_ctx)
-        end
-
-        pad = blocksize - length(key)
-
-        if pad > 0
-            key = [key; fill(0x00, pad)]
-        end
-
-        update!(ctx, key .⊻ 0x36)
-        new{CTX}(ctx, key .⊻ 0x5c)
-    end
-end
-
-function update!(ctx::HMAC_CTX, data, datalen=length(data))
-    update!(ctx.context, data, datalen)
-end
-
-function digest!(ctx::HMAC_CTX{CTX}) where CTX
-    digest = digest!(ctx.context)
-    _ctx = CTX()
-    update!(_ctx, ctx.outer)
-    update!(_ctx, digest)
-    digest!(_ctx)
-end
diff --git a/stdlib/SHA/src/sha1.jl b/stdlib/SHA/src/sha1.jl
deleted file mode 100644
index 71fd55e20fc1e2..00000000000000
--- a/stdlib/SHA/src/sha1.jl
+++ /dev/null
@@ -1,95 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Nonlinear functions, in order to encourage inlining, these sadly are not an array of lambdas
-function Round0(b,c,d)
-    return UInt32((b & c) | (~b & d))
-end
-
-function Round1And3(b,c,d)
-    return UInt32(b ⊻ c ⊻ d)
-end
-
-function Round2(b,c,d)
-    return UInt32((b & c) | (b & d) | (c & d))
-end
-
-function transform!(context::SHA1_CTX)
-    # Buffer is 16 elements long, we expand to 80
-    pbuf = buffer_pointer(context)
-    for i in 1:16
-        context.W[i] = bswap(unsafe_load(pbuf, i))
-    end
-
-    # First round of expansions
-    for i in 17:32
-        @inbounds begin
-            context.W[i] = lrot(1, context.W[i-3] ⊻ context.W[i-8] ⊻ context.W[i-14] ⊻ context.W[i-16], 32)
-        end
-    end
-
-    # Second round of expansions (possibly 4-way SIMD-able)
-    for i in 33:80
-        @inbounds begin
-            context.W[i] = lrot(2, context.W[i-6] ⊻ context.W[i-16] ⊻ context.W[i-28] ⊻ context.W[i-32], 32)
-        end
-    end
-
-    # Initialize registers with the previous intermediate values (our state)
-    a = context.state[1]
-    b = context.state[2]
-    c = context.state[3]
-    d = context.state[4]
-    e = context.state[5]
-
-    # Run our rounds, manually separated into the four rounds, unfortunately using an array of lambdas
-    # really kills performance and causes a huge number of allocations, so we make it easy on the compiler
-    for i = 1:20
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round0(b,c,d) + e + context.W[i] + K1[1])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    for i = 21:40
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round1And3(b,c,d) + e + context.W[i] + K1[2])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    for i = 41:60
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round2(b,c,d) + e + context.W[i] + K1[3])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    for i = 61:80
-        @inbounds begin
-            temp = UInt32(lrot(5, a, 32) + Round1And3(b,c,d) + e + context.W[i] + K1[4])
-            e = d
-            d = c
-            c = lrot(30, b, 32)
-            b = a
-            a = temp
-        end
-    end
-
-    context.state[1] += a
-    context.state[2] += b
-    context.state[3] += c
-    context.state[4] += d
-    context.state[5] += e
-end
diff --git a/stdlib/SHA/src/sha2.jl b/stdlib/SHA/src/sha2.jl
deleted file mode 100644
index 5cc4363786e390..00000000000000
--- a/stdlib/SHA/src/sha2.jl
+++ /dev/null
@@ -1,136 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function transform!(context::T) where {T<:Union{SHA2_224_CTX,SHA2_256_CTX}}
-    pbuf = buffer_pointer(context)
-    # Initialize registers with the previous intermediate values (our state)
-    a = context.state[1]
-    b = context.state[2]
-    c = context.state[3]
-    d = context.state[4]
-    e = context.state[5]
-    f = context.state[6]
-    g = context.state[7]
-    h = context.state[8]
-
-    # Run initial rounds
-    for j = 1:16
-        @inbounds begin
-            # We bitswap every input byte
-            v = bswap(unsafe_load(pbuf, j))
-            unsafe_store!(pbuf, v, j)
-
-            # Apply the SHA-256 compression function to update a..h
-            T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + v
-            T2 = Sigma0_256(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = UInt32(d + T1)
-            d = c
-            c = b
-            b = a
-            a = UInt32(T1 + T2)
-        end
-    end
-
-    for j = 17:64
-        @inbounds begin
-            # Implicit message block expansion:
-            s0 = unsafe_load(pbuf, mod1(j + 1, 16))
-            s0 = sigma0_256(s0)
-            s1 = unsafe_load(pbuf, mod1(j + 14, 16))
-            s1 = sigma1_256(s1)
-
-            # Apply the SHA-256 compression function to update a..h
-            v = unsafe_load(pbuf, mod1(j, 16)) + s1 + unsafe_load(pbuf, mod1(j + 9, 16)) + s0
-            unsafe_store!(pbuf, v, mod1(j, 16))
-            T1 = h + Sigma1_256(e) + Ch(e, f, g) + K256[j] + v
-            T2 = Sigma0_256(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = UInt32(d + T1)
-            d = c
-            c = b
-            b = a
-            a = UInt32(T1 + T2)
-        end
-    end
-
-    # Compute the current intermediate hash value
-    context.state[1] += a
-    context.state[2] += b
-    context.state[3] += c
-    context.state[4] += d
-    context.state[5] += e
-    context.state[6] += f
-    context.state[7] += g
-    context.state[8] += h
-end
-
-
-function transform!(context::Union{SHA2_384_CTX,SHA2_512_CTX})
-    pbuf = buffer_pointer(context)
-    # Initialize registers with the prev. intermediate value
-    a = context.state[1]
-    b = context.state[2]
-    c = context.state[3]
-    d = context.state[4]
-    e = context.state[5]
-    f = context.state[6]
-    g = context.state[7]
-    h = context.state[8]
-
-    for j = 1:16
-        @inbounds begin
-            v = bswap(unsafe_load(pbuf, j))
-            unsafe_store!(pbuf, v, j)
-
-            # Apply the SHA-512 compression function to update a..h
-            T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + v
-            T2 = Sigma0_512(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = d + T1
-            d = c
-            c = b
-            b = a
-            a = T1 + T2
-        end
-    end
-
-    for j = 17:80
-        @inbounds begin
-            # Implicit message block expansion:
-            s0 = unsafe_load(pbuf, mod1(j + 1, 16))
-            s0 = sigma0_512(s0)
-            s1 = unsafe_load(pbuf, mod1(j + 14, 16))
-            s1 = sigma1_512(s1)
-
-            # Apply the SHA-512 compression function to update a..h
-            v = unsafe_load(pbuf, mod1(j, 16)) + s1 + unsafe_load(pbuf, mod1(j + 9, 16)) + s0
-            unsafe_store!(pbuf, v, mod1(j, 16))
-            T1 = h + Sigma1_512(e) + Ch(e, f, g) + K512[j] + v
-            T2 = Sigma0_512(a) + Maj(a, b, c)
-            h = g
-            g = f
-            f = e
-            e = d + T1
-            d = c
-            c = b
-            b = a
-            a = T1 + T2
-        end
-    end
-
-    # Compute the current intermediate hash value
-    context.state[1] += a
-    context.state[2] += b
-    context.state[3] += c
-    context.state[4] += d
-    context.state[5] += e
-    context.state[6] += f
-    context.state[7] += g
-    context.state[8] += h
-end
diff --git a/stdlib/SHA/src/sha3.jl b/stdlib/SHA/src/sha3.jl
deleted file mode 100644
index 6f94495630742a..00000000000000
--- a/stdlib/SHA/src/sha3.jl
+++ /dev/null
@@ -1,83 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-function transform!(context::T) where {T<:SHA3_CTX}
-    # First, update state with buffer
-    pbuf = Ptr{eltype(context.state)}(pointer(context.buffer))
-    for idx in 1:div(blocklen(T),8)
-        context.state[idx] = context.state[idx] ⊻ unsafe_load(pbuf, idx)
-    end
-    bc = context.bc
-    state = context.state
-
-    # We always assume 24 rounds
-    @inbounds for round in 0:23
-        # Theta function
-        for i in 1:5
-            bc[i] = state[i] ⊻ state[i + 5] ⊻ state[i + 10] ⊻ state[i + 15] ⊻ state[i + 20]
-        end
-
-        for i in 0:4
-            temp = bc[rem(i + 4, 5) + 1] ⊻ L64(1, bc[rem(i + 1, 5) + 1])
-            j = 0
-            while j <= 20
-                state[Int(i + j + 1)] = state[i + j + 1] ⊻ temp
-                j += 5
-            end
-        end
-
-        # Rho Pi
-        temp = state[2]
-        for i in 1:24
-            j = SHA3_PILN[i]
-            bc[1] = state[j]
-            state[j] = L64(SHA3_ROTC[i], temp)
-            temp = bc[1]
-        end
-
-        # Chi
-        j = 0
-        while j <= 20
-            for i in 1:5
-                bc[i] = state[i + j]
-            end
-            for i in 0:4
-                state[j + i + 1] = state[j + i + 1] ⊻ (~bc[rem(i + 1, 5) + 1] & bc[rem(i + 2, 5) + 1])
-            end
-            j += 5
-        end
-
-        # Iota
-        state[1] = state[1] ⊻ SHA3_ROUND_CONSTS[round+1]
-    end
-
-    return context.state
-end
-
-
-
-# Finalize data in the buffer, append total bitlength, and return our precious hash!
-function digest!(context::T) where {T<:SHA3_CTX}
-    usedspace = context.bytecount % blocklen(T)
-    # If we have anything in the buffer still, pad and transform that data
-    if usedspace < blocklen(T) - 1
-        # Begin padding with a 0x06
-        context.buffer[usedspace+1] = 0x06
-        # Fill with zeros up until the last byte
-        context.buffer[usedspace+2:end-1] .= 0x00
-        # Finish it off with a 0x80
-        context.buffer[end] = 0x80
-    else
-        # Otherwise, we have to add on a whole new buffer just for the zeros and 0x80
-        context.buffer[end] = 0x06
-        transform!(context)
-
-        context.buffer[1:end-1] = 0x0
-        context.buffer[end] = 0x80
-    end
-
-    # Final transform:
-    transform!(context)
-
-    # Return the digest
-    return reinterpret(UInt8, context.state)[1:digestlen(T)]
-end
diff --git a/stdlib/SHA/src/types.jl b/stdlib/SHA/src/types.jl
deleted file mode 100644
index 3534be4fafc96d..00000000000000
--- a/stdlib/SHA/src/types.jl
+++ /dev/null
@@ -1,230 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Type hierarchy to aid in splitting up of SHA2 algorithms
-# as SHA224/256 are similar, and SHA-384/512 are similar
-abstract type SHA_CTX end
-abstract type SHA2_CTX <: SHA_CTX end
-abstract type SHA3_CTX <: SHA_CTX end
-import Base: copy
-
-# We derive SHA1_CTX straight from SHA_CTX since it doesn't have a
-# family of types like SHA2 or SHA3 do
-mutable struct SHA1_CTX <: SHA_CTX
-    state::Array{UInt32,1}
-    bytecount::UInt64
-    buffer::Array{UInt8,1}
-    W::Array{UInt32,1}
-end
-
-# SHA2 224/256/384/512-bit Context Structures
-mutable struct SHA2_224_CTX <: SHA2_CTX
-    state::Array{UInt32,1}
-    bytecount::UInt64
-    buffer::Array{UInt8,1}
-end
-
-mutable struct SHA2_256_CTX <: SHA2_CTX
-    state::Array{UInt32,1}
-    bytecount::UInt64
-    buffer::Array{UInt8,1}
-end
-
-mutable struct SHA2_384_CTX <: SHA2_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-end
-
-mutable struct SHA2_512_CTX <: SHA2_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-end
-
-function Base.getproperty(ctx::SHA2_CTX, fieldname::Symbol)
-    if fieldname === :state
-        return getfield(ctx, :state)::Union{Vector{UInt32},Vector{UInt64}}
-    elseif fieldname === :bytecount
-        return getfield(ctx, :bytecount)::Union{UInt64,UInt128}
-    elseif fieldname === :buffer
-        return getfield(ctx, :buffer)::Vector{UInt8}
-    elseif fieldname === :W
-        return getfield(ctx, :W)::Vector{UInt32}
-    else
-        error("SHA2_CTX has no field ", fieldname)
-    end
-end
-
-
-# Typealias common nicknames for SHA2 family of functions
-const SHA224_CTX = SHA2_224_CTX
-const SHA256_CTX = SHA2_256_CTX
-const SHA384_CTX = SHA2_384_CTX
-const SHA512_CTX = SHA2_512_CTX
-
-
-# SHA3 224/256/384/512-bit context structures
-mutable struct SHA3_224_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-mutable struct SHA3_256_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-mutable struct SHA3_384_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-mutable struct SHA3_512_CTX <: SHA3_CTX
-    state::Array{UInt64,1}
-    bytecount::UInt128
-    buffer::Array{UInt8,1}
-    bc::Array{UInt64,1}
-end
-
-function Base.getproperty(ctx::SHA3_CTX, fieldname::Symbol)
-    if fieldname === :state
-        return getfield(ctx, :state)::Vector{UInt64}
-    elseif fieldname === :bytecount
-        return getfield(ctx, :bytecount)::UInt128
-    elseif fieldname === :buffer
-        return getfield(ctx, :buffer)::Vector{UInt8}
-    elseif fieldname === :bc
-        return getfield(ctx, :bc)::Vector{UInt64}
-    else
-        error("type ", typeof(ctx), " has no field ", fieldname)
-    end
-end
-
-# Define constants via functions so as not to bloat context objects.  Yay dispatch!
-
-# Digest lengths for SHA1, SHA2 and SHA3.  This is easy to figure out from the typename
-digestlen(::Type{SHA1_CTX}) = 20
-digestlen(::Type{SHA2_224_CTX}) = 28
-digestlen(::Type{SHA3_224_CTX}) = 28
-digestlen(::Type{SHA2_256_CTX}) = 32
-digestlen(::Type{SHA3_256_CTX}) = 32
-digestlen(::Type{SHA2_384_CTX}) = 48
-digestlen(::Type{SHA3_384_CTX}) = 48
-digestlen(::Type{SHA2_512_CTX}) = 64
-digestlen(::Type{SHA3_512_CTX}) = 64
-
-# SHA1 and SHA2 have differing element types for the internal state objects
-state_type(::Type{SHA1_CTX}) = UInt32
-state_type(::Type{SHA2_224_CTX}) = UInt32
-state_type(::Type{SHA2_256_CTX}) = UInt32
-state_type(::Type{SHA2_384_CTX}) = UInt64
-state_type(::Type{SHA2_512_CTX}) = UInt64
-state_type(::Type{SHA3_CTX}) = UInt64
-
-# blocklen is the number of bytes of data processed by the transform!() function at once
-blocklen(::Type{SHA1_CTX}) = UInt64(64)
-blocklen(::Type{SHA2_224_CTX}) = UInt64(64)
-blocklen(::Type{SHA2_256_CTX}) = UInt64(64)
-blocklen(::Type{SHA2_384_CTX}) = UInt64(128)
-blocklen(::Type{SHA2_512_CTX}) = UInt64(128)
-
-blocklen(::Type{SHA3_224_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_224_CTX))
-blocklen(::Type{SHA3_256_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_256_CTX))
-blocklen(::Type{SHA3_384_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_384_CTX))
-blocklen(::Type{SHA3_512_CTX}) = UInt64(25*8 - 2*digestlen(SHA3_512_CTX))
-
-
-# short_blocklen is the size of a block minus the width of bytecount
-short_blocklen(::Type{T}) where {T<:SHA_CTX} = blocklen(T) - 2*sizeof(state_type(T))
-
-# Once the "blocklen" methods are defined, we can define our outer constructors for SHA types:
-
-"""
-    SHA2_224_CTX()
-
-Construct an empty SHA2_224 context.
-"""
-SHA2_224_CTX() = SHA2_224_CTX(copy(SHA2_224_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_224_CTX)))
-"""
-    SHA2_256_CTX()
-
-Construct an empty SHA2_256 context.
-"""
-SHA2_256_CTX() = SHA2_256_CTX(copy(SHA2_256_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_256_CTX)))
-"""
-    SHA2_384()
-
-Construct an empty SHA2_384 context.
-"""
-SHA2_384_CTX() = SHA2_384_CTX(copy(SHA2_384_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_384_CTX)))
-"""
-    SHA2_512_CTX()
-
-Construct an empty SHA2_512 context.
-"""
-SHA2_512_CTX() = SHA2_512_CTX(copy(SHA2_512_initial_hash_value), 0, zeros(UInt8, blocklen(SHA2_512_CTX)))
-
-"""
-    SHA3_224_CTX()
-
-Construct an empty SHA3_224 context.
-"""
-SHA3_224_CTX() = SHA3_224_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_224_CTX)), Vector{UInt64}(undef, 5))
-"""
-    SHA3_256_CTX()
-
-Construct an empty SHA3_256 context.
-"""
-SHA3_256_CTX() = SHA3_256_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_256_CTX)), Vector{UInt64}(undef, 5))
-"""
-    SHA3_384_CTX()
-
-Construct an empty SHA3_384 context.
-"""
-SHA3_384_CTX() = SHA3_384_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_384_CTX)), Vector{UInt64}(undef, 5))
-"""
-    SHA3_512_CTX()
-
-Construct an empty SHA3_512 context.
-"""
-SHA3_512_CTX() = SHA3_512_CTX(zeros(UInt64, 25), 0, zeros(UInt8, blocklen(SHA3_512_CTX)), Vector{UInt64}(undef, 5))
-
-# Nickname'd outer constructor methods for SHA2
-const SHA224_CTX = SHA2_224_CTX
-const SHA256_CTX = SHA2_256_CTX
-const SHA384_CTX = SHA2_384_CTX
-const SHA512_CTX = SHA2_512_CTX
-
-# SHA1 is special; he needs extra workspace
-"""
-    SHA1_CTX()
-
-Construct an empty SHA1 context.
-"""
-SHA1_CTX() = SHA1_CTX(copy(SHA1_initial_hash_value), 0, zeros(UInt8, blocklen(SHA1_CTX)), Vector{UInt32}(undef, 80))
-
-
-# Copy functions
-copy(ctx::T) where {T<:SHA1_CTX} = T(copy(ctx.state), ctx.bytecount, copy(ctx.buffer), copy(ctx.W))
-copy(ctx::T) where {T<:SHA2_CTX} = T(copy(ctx.state), ctx.bytecount, copy(ctx.buffer))
-copy(ctx::T) where {T<:SHA3_CTX} = T(copy(ctx.state), ctx.bytecount, copy(ctx.buffer), Vector{UInt64}(undef, 5))
-
-
-# Make printing these types a little friendlier
-import Base.show
-show(io::IO, ::SHA1_CTX) = print(io, "SHA1 hash state")
-show(io::IO, ::SHA2_224_CTX) = print(io, "SHA2 224-bit hash state")
-show(io::IO, ::SHA2_256_CTX) = print(io, "SHA2 256-bit hash state")
-show(io::IO, ::SHA2_384_CTX) = print(io, "SHA2 384-bit hash state")
-show(io::IO, ::SHA2_512_CTX) = print(io, "SHA2 512-bit hash state")
-show(io::IO, ::SHA3_224_CTX) = print(io, "SHA3 224-bit hash state")
-show(io::IO, ::SHA3_256_CTX) = print(io, "SHA3 256-bit hash state")
-show(io::IO, ::SHA3_384_CTX) = print(io, "SHA3 384-bit hash state")
-show(io::IO, ::SHA3_512_CTX) = print(io, "SHA3 512-bit hash state")
-
-
-# use our types to define a method to get a pointer to the state buffer
-buffer_pointer(ctx::T) where {T<:SHA_CTX} = Ptr{state_type(T)}(pointer(ctx.buffer))
diff --git a/stdlib/SHA/test/perf.jl b/stdlib/SHA/test/perf.jl
deleted file mode 100644
index 08dbbe9b1cb96f..00000000000000
--- a/stdlib/SHA/test/perf.jl
+++ /dev/null
@@ -1,47 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SHA
-
-if isempty(ARGS)
-    error("need file to test sha perf")
-elseif !isfile(ARGS[1])
-    error("file $(ARGS[1]) does not exist")
-end
-
-
-function do_tests(filepath)
-    # test performance
-    print("read:    ")
-    @time begin
-        fh = open(filepath, "r")
-        bytes = read(fh)
-    end
-    GC.gc()
-
-    print("SHA-1:   ")
-    sha1(bytes)
-    GC.gc()
-    @time sha1(bytes)
-
-    print("SHA2-256: ")
-    sha256(bytes)
-    GC.gc()
-    @time sha256(bytes)
-
-    print("SHA2-512: ")
-    sha512(bytes)
-    GC.gc()
-    @time sha512(bytes)
-
-    print("SHA3-256: ")
-    sha3_256(bytes)
-    GC.gc()
-    @time sha3_256(bytes)
-
-    print("SHA3-512: ")
-    sha3_512(bytes)
-    GC.gc()
-    @time sha3_512(bytes)
-end
-
-do_tests(ARGS[1])
diff --git a/stdlib/SHA/test/runtests.jl b/stdlib/SHA/test/runtests.jl
deleted file mode 100644
index 2430f8d0b1089f..00000000000000
--- a/stdlib/SHA/test/runtests.jl
+++ /dev/null
@@ -1,302 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SHA
-using Test
-
-const VERBOSE = false
-
-# Define some data we will run our tests on
-lorem = "Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."
-so_many_as_array = repeat([0x61], 1000000)
-so_many_as_tuple = ntuple((i) -> 0x61, 1000000)
-tempdir = mktempdir()
-file = joinpath(tempdir, ".sha")
-fIO = open(file, "w")
-write(fIO, '\0')
-close(fIO)
-data = Any["", "test", lorem, file, so_many_as_array, so_many_as_tuple]
-
-# Descriptions of the data, the SHA functions we'll run on the data, etc...
-data_desc = ["the empty string", "the string \"test\"", "lorem ipsum",
-             "0 file", "one million a's Array", "one million a's Tuple"]
-sha_types = Dict(sha1 => SHA.SHA1_CTX,
-            sha2_224 => SHA.SHA2_224_CTX, sha2_256 => SHA.SHA2_256_CTX, sha2_384 => SHA.SHA2_384_CTX, sha2_512 => SHA.SHA2_512_CTX,
-            sha3_224 => SHA.SHA3_224_CTX, sha3_256 => SHA.SHA3_256_CTX, sha3_384 => SHA.SHA3_384_CTX, sha3_512 => SHA.SHA3_512_CTX)
-sha_funcs = [sha1,
-             sha2_224, sha2_256, sha2_384, sha2_512,
-             sha3_224, sha3_256, sha3_384, sha3_512]
-ctxs = [SHA1_CTX,
-        SHA2_224_CTX, SHA2_256_CTX, SHA2_384_CTX, SHA2_512_CTX,
-        SHA3_224_CTX, SHA3_256_CTX, SHA3_384_CTX, SHA3_512_CTX]
-shws = ["SHA1 hash state",
-        "SHA2 224-bit hash state", "SHA2 256-bit hash state", "SHA2 384-bit hash state", "SHA2 512-bit hash state",
-        "SHA3 224-bit hash state", "SHA3 256-bit hash state", "SHA3 384-bit hash state", "SHA3 512-bit hash state"]
-
-answers = Dict(
-sha1 => [
-"da39a3ee5e6b4b0d3255bfef95601890afd80709",
-"a94a8fe5ccb19ba61c4c0873d391e987982fbbd3",
-"19afa2a4a37462c7b940a6c4c61363d49c3a35f4",
-"5ba93c9db0cff93f52b521d7420e43f6eda2784f",
-"34aa973cd4c4daa4f61eeb2bdbad27316534016f",
-"34aa973cd4c4daa4f61eeb2bdbad27316534016f"
-],
-sha2_224 => [
-"d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f",
-"90a3ed9e32b2aaf4c61c410eb925426119e1a9dc53d4286ade99a809",
-"6a0644abcf1e2cecbec2814443dab5f24b7ad8ebb66c75667ab67959",
-"fff9292b4201617bdc4d3053fce02734166a683d7d858a7f5f59b073",
-"20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67",
-"20794655980c91d8bbb4c1ea97618a4bf03f42581948b2ee4ee7ad67"
-],
-sha2_256 => [
-"e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855",
-"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08",
-"2c7c3d5f244f1a40069a32224215e0cf9b42485c99d80f357d76f006359c7a18",
-"6e340b9cffb37a989ca544e6bb780a2c78901d3fb33738768511a30617afa01d",
-"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0",
-"cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"
-],
-sha2_384 => [
-"38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b",
-"768412320f7b0aa5812fce428dc4706b3cae50e02a64caa16a782249bfe8efc4b7ef1ccb126255d196047dfedf17a0a9",
-"63980fd0425cd2c3d8a400ee0f2671ef135db03b947ec1af21b6e28f19c16ca272036469541f4d8e336ac6d1da50580f",
-"bec021b4f368e3069134e012c2b4307083d3a9bdd206e24e5f0d86e13d6636655933ec2b413465966817a9c208a11717",
-"9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b07b8b3dc38ecc4ebae97ddd87f3d8985",
-"9d0e1809716474cb086e834e310a4a1ced149e9c00f248527972cec5704c2a5b07b8b3dc38ecc4ebae97ddd87f3d8985"
-],
-sha2_512 => [
-"cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e",
-"ee26b0dd4af7e749aa1a8ee3c10ae9923f618980772e473f8819a5d4940e0db27ac185f8a0e1d5f84f88bc887fd67b143732c304cc5fa9ad8e6f57f50028a8ff",
-"f41d92bc9fc1157a0d1387e67f3d0893b70f7039d3d46d8115b5079d45ad601159398c79c281681e2da09bf7d9f8c23b41d1a0a3c5b528a7f2735933a4353194",
-"b8244d028981d693af7b456af8efa4cad63d282e19ff14942c246e50d9351d22704a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee",
-"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973ebde0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b",
-"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973ebde0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b"
-],
-sha3_224 => [
-"6b4e03423667dbb73b6e15454f0eb1abd4597f9a1b078e3f5b5a6bc7",
-"3797bf0afbbfca4a7bbba7602a2b552746876517a7f9b7ce2db0ae7b",
-"ea5395370949ad8c7d2ca3e7c045ef3306fe3a3f4740de452ef87a28",
-"bdd5167212d2dc69665f5a8875ab87f23d5ce7849132f56371a19096",
-"d69335b93325192e516a912e6d19a15cb51c6ed5c15243e7a7fd653c",
-"d69335b93325192e516a912e6d19a15cb51c6ed5c15243e7a7fd653c"
-],
-sha3_256 => [
-"a7ffc6f8bf1ed76651c14756a061d662f580ff4de43b49fa82d80a4b80f8434a",
-"36f028580bb02cc8272a9a020f4200e346e276ae664e45ee80745574e2f5ab80",
-"8c8142d2ca964ab307ace567ddd5764f17ebb76eb8ff25543ab54c14fe2ab139",
-"5d53469f20fef4f8eab52b88044ede69c77a6a68a60728609fc4a65ff531e7d0",
-"5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1",
-"5c8875ae474a3634ba4fd55ec85bffd661f32aca75c6d699d0cdcb6c115891c1",
-],
-sha3_384 => [
-"0c63a75b845e4f7d01107d852e4c2485c51a50aaaa94fc61995e71bbee983a2ac3713831264adb47fb6bd1e058d5f004",
-"e516dabb23b6e30026863543282780a3ae0dccf05551cf0295178d7ff0f1b41eecb9db3ff219007c4e097260d58621bd",
-"eb9fbba3eb916a4efe384b3125f5d03ceb9c5c1b94431ac30fa86c54408b92701ca5d2628cd7113aa5541177ec3ccd1d",
-"127677f8b66725bbcb7c3eae9698351ca41e0eb6d66c784bd28dcdb3b5fb12d0c8e840342db03ad1ae180b92e3504933",
-"eee9e24d78c1855337983451df97c8ad9eedf256c6334f8e948d252d5e0e76847aa0774ddb90a842190d2c558b4b8340",
-"eee9e24d78c1855337983451df97c8ad9eedf256c6334f8e948d252d5e0e76847aa0774ddb90a842190d2c558b4b8340",
-],
-sha3_512 => [
-"a69f73cca23a9ac5c8b567dc185a756e97c982164fe25859e0d1dcc1475c80a615b2123af1f5f94c11e3e9402c3ac558f500199d95b6d3e301758586281dcd26",
-"9ece086e9bac491fac5c1d1046ca11d737b92a2b2ebd93f005d7b710110c0a678288166e7fbe796883a4f2e9b3ca9f484f521d0ce464345cc1aec96779149c14",
-"3a4318353396a12dfd20442cfce1d8ad4d7e732e85cc56b01b4cf9057a41c8827c0a03c70812e76ace68d776759225c213b4f581aac0dba5dd43b785b1a33fe5",
-"7127aab211f82a18d06cf7578ff49d5089017944139aa60d8bee057811a15fb55a53887600a3eceba004de51105139f32506fe5b53e1913bfa6b32e716fe97da",
-"3c3a876da14034ab60627c077bb98f7e120a2a5370212dffb3385a18d4f38859ed311d0a9d5141ce9cc5c66ee689b266a8aa18ace8282a0e0db596c90b0a7b87",
-"3c3a876da14034ab60627c077bb98f7e120a2a5370212dffb3385a18d4f38859ed311d0a9d5141ce9cc5c66ee689b266a8aa18ace8282a0e0db596c90b0a7b87",
-]
-)
-
-function describe_hash(T::Type{S}) where {S <: SHA.SHA_CTX}
-    if T <: SHA.SHA1_CTX return "SHA1" end
-    if T <: SHA.SHA2_CTX return "SHA2-$(SHA.digestlen(T)*8)" end
-    if T <: SHA.SHA3_CTX return "SHA3-$(SHA.digestlen(T)*8)" end
-end
-
-VERBOSE && println("Loaded hash types: $(join(sort([describe_hash(t[2]) for t in sha_types]), ", ", " and "))")
-
-# First, test processing the data in one go
-nerrors = 0
-for idx in 1:length(data)
-    global nerrors
-
-    desc = data_desc[idx]
-    VERBOSE && print("Testing on $desc$(join(["." for z in 1:(34-length(desc))]))")
-    nerrors_old = nerrors
-    for sha_idx in 1:length(sha_funcs)
-        sha_func = sha_funcs[sha_idx]
-
-        if idx == 4
-            open(data[idx]) do f
-                hash = bytes2hex(sha_func(f))
-            end
-        else
-            hash = bytes2hex(sha_func(data[idx]))
-        end
-
-        if hash != answers[sha_func][idx]
-            print("\n")
-            @warn(
-            """
-            For $(describe_hash(sha_types[sha_func])) expected:
-                $(answers[sha_func][idx])
-            Calculated:
-                $(hash)
-            """)
-            nerrors += 1
-        else
-            VERBOSE && print(".")
-        end
-    end
-    VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-end
-
-# Do another test on the "so many a's" data where we chunk up the data into
-# two chunks, (sized appropriately to AVOID overflow from one update to another)
-# in order to test multiple update!() calls
-VERBOSE && print("Testing on one million a's (chunked properly)")
-nerrors_old = nerrors
-for sha_idx in 1:length(sha_funcs)
-    global nerrors
-
-    ctx = sha_types[sha_funcs[sha_idx]]()
-    SHA.update!(ctx, so_many_as_array[1:2*SHA.blocklen(typeof(ctx))])
-    SHA.update!(ctx, so_many_as_array[2*SHA.blocklen(typeof(ctx))+1:end])
-    hash = bytes2hex(SHA.digest!(ctx))
-    if hash != answers[sha_funcs[sha_idx]][end]
-        print("\n")
-        @warn(
-        """
-        For $(describe_hash(sha_types[sha_funcs[sha_idx]])) expected:
-            $(answers[sha_funcs[sha_idx]][end-1])
-        Calculated:
-            $(hash)
-        """)
-        nerrors += 1
-    else
-        VERBOSE && print(".")
-    end
-end
-VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-# Do another test on the "so many a's" data where we chunk up the data into
-# three chunks, (sized appropriately to CAUSE overflow from one update to another)
-# in order to test multiple update!() calls as well as the overflow codepaths
-VERBOSE && print("Testing on one million a's (chunked clumsily)")
-nerrors_old = nerrors
-for sha_idx in 1:length(sha_funcs)
-    global nerrors
-    ctx = sha_types[sha_funcs[sha_idx]]()
-
-    # Get indices awkwardly placed for the blocklength of this hash type
-    idx0 = round(Int, 0.3*SHA.blocklen(typeof(ctx)))
-    idx1 = round(Int, 1.7*SHA.blocklen(typeof(ctx)))
-    idx2 = round(Int, 2.6*SHA.blocklen(typeof(ctx)))
-
-    # Feed data in according to our dastardly blocking scheme
-    SHA.update!(ctx, so_many_as_array[0      + 1:1*idx0])
-    SHA.update!(ctx, so_many_as_array[1*idx0 + 1:2*idx0])
-    SHA.update!(ctx, so_many_as_array[2*idx0 + 1:3*idx0])
-    SHA.update!(ctx, so_many_as_array[3*idx0 + 1:4*idx0])
-    SHA.update!(ctx, so_many_as_array[4*idx0 + 1:idx1])
-    SHA.update!(ctx, so_many_as_array[idx1 + 1:idx2])
-    SHA.update!(ctx, so_many_as_array[idx2 + 1:end])
-
-    # Ensure the hash is the appropriate one
-    hash = bytes2hex(SHA.digest!(ctx))
-    if hash != answers[sha_funcs[sha_idx]][end]
-        print("\n")
-        @warn(
-        """
-        For $(describe_hash(sha_types[sha_funcs[sha_idx]])) expected:
-            $(answers[sha_funcs[sha_idx]][end-1])
-        Calculated:
-            $(hash)
-        """)
-        nerrors += 1
-    else
-        VERBOSE && print(".")
-    end
-end
-VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-# test hmac correctness using the examples on [wiki](https://en.wikipedia.org/wiki/Hash-based_message_authentication_code#Examples)
-VERBOSE && print("Testing on the hmac functions")
-nerrors_old = nerrors
-for (key, msg, fun, hash) in (
-    (b"", b"", hmac_sha1, "fbdb1d1b18aa6c08324b7d64b71fb76370690e1d"),
-    (b"", b"", hmac_sha256, "b613679a0814d9ec772f95d778c35fc5ff1697c493715653c6c712144292c5ad"),
-    (b"key", b"The quick brown fox jumps over the lazy dog", hmac_sha1, "de7c9b85b8b78aa6bc8a7a36f70a90701c9db4d9"),
-    (b"key", b"The quick brown fox jumps over the lazy dog", hmac_sha256, "f7bc83f430538424b13298e6aa6fb143ef4d59a14946175997479dbc2d1a3cd8"),
-)
-    global nerrors
-    digest = bytes2hex(fun(Vector(key), Vector(msg)))
-    if digest != hash
-        print("\n")
-        @warn(
-        """
-        For $fun($(String(key)), $(String(msg))) expected:
-            $hash
-        Calculated:
-            $digest
-        """)
-        nerrors += 1
-    else
-        VERBOSE && print(".")
-    end
-end
-VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-replstr(x) = sprint((io, x) -> show(IOContext(io, :limit => true), MIME("text/plain"), x), x)
-
-for idx in 1:length(ctxs)
-    global nerrors
-    # Part #1: copy
-    VERBOSE && print("Testing copy function @ $(ctxs[idx]) ...")
-    try
-        copy(ctxs[idx]())
-    catch
-        print("\n")
-        @warn("Some weird copy error happened with $(ctxs[idx])")
-        nerrors += 1
-    end
-    VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-
-    # Part #2: show
-    VERBOSE && print("Testing show function @ $(ctxs[idx]) ...")
-    if replstr(ctxs[idx]()) != shws[idx]
-        print("\n")
-        @warn("Some weird show error happened with $(ctxs[idx])")
-        nerrors += 1
-    end
-    VERBOSE && println("Done! [$(nerrors - nerrors_old) errors]")
-end
-
-# test error if eltype of input is not UInt8
-for f in sha_funcs
-    global nerrors
-    local data = UInt32[0x23467, 0x324775]
-    try
-        f(data)
-    catch ex
-        if ex isa MethodError &&
-            ex.f === f &&
-            ex.args === (data,)
-            continue
-        end
-        rethrow()
-    end
-    @warn("Non-UInt8 Arrays should fail")
-    nerrors += 1
-end
-
-# Clean up the I/O mess
-rm(file)
-rm(tempdir)
-
-if nerrors == 0
-    VERBOSE && println("ALL OK")
-else
-    println("Failed with $nerrors failures")
-end
-@test nerrors == 0
diff --git a/stdlib/Serialization/src/Serialization.jl b/stdlib/Serialization/src/Serialization.jl
index 9476e984583858..f86fb1b858b051 100644
--- a/stdlib/Serialization/src/Serialization.jl
+++ b/stdlib/Serialization/src/Serialization.jl
@@ -22,7 +22,8 @@ mutable struct Serializer{I<:IO} <: AbstractSerializer
     table::IdDict{Any,Any}
     pending_refs::Vector{Int}
     known_object_data::Dict{UInt64,Any}
-    Serializer{I}(io::I) where I<:IO = new(io, 0, IdDict(), Int[], Dict{UInt64,Any}())
+    version::Int
+    Serializer{I}(io::I) where I<:IO = new(io, 0, IdDict(), Int[], Dict{UInt64,Any}(), ser_version)
 end
 
 Serializer(io::IO) = Serializer{typeof(io)}(io)
@@ -78,7 +79,10 @@ const TAGS = Any[
 
 @assert length(TAGS) == 255
 
-const ser_version = 12 # do not make changes without bumping the version #!
+const ser_version = 19 # do not make changes without bumping the version #!
+
+format_version(::AbstractSerializer) = ser_version
+format_version(s::Serializer) = s.version
 
 const NTAGS = length(TAGS)
 
@@ -413,24 +417,38 @@ function serialize(s::AbstractSerializer, meth::Method)
     serialize(s, meth.slot_syms)
     serialize(s, meth.nargs)
     serialize(s, meth.isva)
+    serialize(s, meth.is_for_opaque_closure)
+    serialize(s, meth.constprop)
+    serialize(s, meth.purity)
     if isdefined(meth, :source)
         serialize(s, Base._uncompressed_ast(meth, meth.source))
     else
         serialize(s, nothing)
     end
     if isdefined(meth, :generator)
-        serialize(s, Base._uncompressed_ast(meth, meth.generator.inferred)) # XXX: what was this supposed to do?
+        serialize(s, meth.generator)
     else
         serialize(s, nothing)
     end
+    if isdefined(meth, :recursion_relation)
+        serialize(s, method.recursion_relation)
+    else
+        serialize(s, nothing)
+    end
+    if isdefined(meth, :external_mt)
+        error("cannot serialize Method objects with external method tables")
+    end
     nothing
 end
 
 function serialize(s::AbstractSerializer, linfo::Core.MethodInstance)
     serialize_cycle(s, linfo) && return
-    isa(linfo.def, Module) || error("can only serialize toplevel MethodInstance objects")
     writetag(s.io, METHODINSTANCE_TAG)
-    serialize(s, linfo.uninferred)
+    if isdefined(linfo, :uninferred)
+        serialize(s, linfo.uninferred)
+    else
+        writetag(s.io, UNDEFREF_TAG)
+    end
     serialize(s, nothing)  # for backwards compat
     serialize(s, linfo.sparam_vals)
     serialize(s, Any)  # for backwards compat
@@ -444,18 +462,26 @@ function serialize(s::AbstractSerializer, t::Task)
     if istaskstarted(t) && !istaskdone(t)
         error("cannot serialize a running Task")
     end
-    state = [t.code, t.storage, t.state, t.result, t.exception]
     writetag(s.io, TASK_TAG)
-    for fld in state
-        serialize(s, fld)
+    serialize(s, t.code)
+    serialize(s, t.storage)
+    serialize(s, t.state)
+    if t._isexception && (stk = Base.current_exceptions(t); !isempty(stk))
+        # the exception stack field is hidden inside the task, so if there
+        # is any information there make a CapturedException from it instead.
+        # TODO: Handle full exception chain, not just the first one.
+        serialize(s, CapturedException(stk[1].exception, stk[1].backtrace))
+    else
+        serialize(s, t.result)
     end
+    serialize(s, t._isexception)
 end
 
 function serialize(s::AbstractSerializer, g::GlobalRef)
     if (g.mod === __deserialized_types__ ) ||
         (g.mod === Main && isdefined(g.mod, g.name) && isconst(g.mod, g.name))
 
-        v = getfield(g.mod, g.name)
+        v = getglobal(g.mod, g.name)
         unw = unwrap_unionall(v)
         if isa(unw,DataType) && v === unw.name.wrapper && should_send_whole_type(s, unw)
             # handle references to types in Main by sending the whole type.
@@ -485,9 +511,10 @@ function serialize_typename(s::AbstractSerializer, t::Core.TypeName)
     serialize(s, primary.parameters)
     serialize(s, primary.types)
     serialize(s, isdefined(primary, :instance))
-    serialize(s, primary.abstract)
-    serialize(s, primary.mutable)
-    serialize(s, primary.ninitialized)
+    serialize(s, t.flags & 0x1 == 0x1) # .abstract
+    serialize(s, t.flags & 0x2 == 0x2) # .mutable
+    serialize(s, Int32(length(primary.types) - t.n_uninitialized))
+    serialize(s, t.max_methods)
     if isdefined(t, :mt) && t.mt !== Symbol.name.mt
         serialize(s, t.mt.name)
         serialize(s, collect(Base.MethodList(t.mt)))
@@ -514,7 +541,7 @@ function should_send_whole_type(s, t::DataType)
         isanonfunction = mod === Main && # only Main
             t.super === Function && # only Functions
             unsafe_load(unsafe_convert(Ptr{UInt8}, tn.name)) == UInt8('#') && # hidden type
-            (!isdefined(mod, name) || t != typeof(getfield(mod, name))) # XXX: 95% accurate test for this being an inner function
+            (!isdefined(mod, name) || t != typeof(getglobal(mod, name))) # XXX: 95% accurate test for this being an inner function
             # TODO: more accurate test? (tn.name !== "#" name)
         #TODO: iskw = startswith(tn.name, "#kw#") && ???
         #TODO: iskw && return send-as-kwftype
@@ -638,7 +665,7 @@ function serialize_any(s::AbstractSerializer, @nospecialize(x))
         serialize_type(s, t)
         write(s.io, x)
     else
-        if t.mutable
+        if ismutable(x)
             serialize_cycle(s, x) && return
             serialize_type(s, t, true)
         else
@@ -716,20 +743,33 @@ function readheader(s::AbstractSerializer)
         error("""Cannot read stream serialized with a newer version of Julia.
                  Got data version $version > current version $ser_version""")
     end
+    s.version = version
+    return
 end
 
 """
     serialize(stream::IO, value)
 
 Write an arbitrary value to a stream in an opaque format, such that it can be read back by
-[`deserialize`](@ref). The read-back value will be as identical as possible to the original.
-In general, this process will not work if the reading and writing are done by different
-versions of Julia, or an instance of Julia with a different system image. `Ptr` values are
-serialized as all-zero bit patterns (`NULL`).
+[`deserialize`](@ref). The read-back value will be as identical as possible to the original,
+but note that `Ptr` values are serialized as all-zero bit patterns (`NULL`).
 
 An 8-byte identifying header is written to the stream first. To avoid writing the header,
 construct a `Serializer` and use it as the first argument to `serialize` instead.
 See also [`Serialization.writeheader`](@ref).
+
+The data format can change in minor (1.x) Julia releases, but files written by prior 1.x
+versions will remain readable. The main exception to this is when the definition of a
+type in an external package changes. If that occurs, it may be necessary to specify
+an explicit compatible version of the affected package in your environment.
+Renaming functions, even private functions, inside packages can also put existing files
+out of sync. Anonymous functions require special care: because their names are automatically
+generated, minor code changes can cause them to be renamed.
+Serializing anonymous functions should be avoided in files intended for long-term storage.
+
+In some cases, the word size (32- or 64-bit) of the reading and writing machines must match.
+In rarer cases the OS or architecture must also match, for example when using packages
+that contain platform-dependent code.
 """
 function serialize(s::IO, x)
     ss = Serializer(s)
@@ -754,8 +794,8 @@ serialize(filename::AbstractString, x) = open(io->serialize(io, x), filename, "w
 
 Read a value written by [`serialize`](@ref). `deserialize` assumes the binary data read from
 `stream` is correct and has been serialized by a compatible implementation of [`serialize`](@ref).
-It has been designed with simplicity and performance as a goal and does not validate
-the data read. Malformed data can result in process termination. The caller has to ensure
+`deserialize` is designed for simplicity and performance, and so does not validate
+the data read. Malformed data can result in process termination. The caller must ensure
 the integrity and correctness of data read from `stream`.
 """
 deserialize(s::IO) = deserialize(Serializer(s))
@@ -913,7 +953,7 @@ function handle_deserialize(s::AbstractSerializer, b::Int32)
         return deserialize_dict(s, t)
     end
     t = desertag(b)::DataType
-    if t.mutable && length(t.types) > 0  # manual specialization of fieldcount
+    if ismutabletype(t) && length(t.types) > 0  # manual specialization of fieldcount
         slot = s.counter; s.counter += 1
         push!(s.pending_refs, slot)
     end
@@ -946,7 +986,7 @@ function deserialize_module(s::AbstractSerializer)
         end
         m = Base.root_module(mkey[1])
         for i = 2:length(mkey)
-            m = getfield(m, mkey[i])::Module
+            m = getglobal(m, mkey[i])::Module
         end
     else
         name = String(deserialize(s)::Symbol)
@@ -954,7 +994,7 @@ function deserialize_module(s::AbstractSerializer)
         m = Base.root_module(pkg)
         mname = deserialize(s)
         while mname !== ()
-            m = getfield(m, mname)::Module
+            m = getglobal(m, mname)::Module
             mname = deserialize(s)
         end
     end
@@ -986,8 +1026,27 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
     end
     nargs = deserialize(s)::Int32
     isva = deserialize(s)::Bool
-    template = deserialize(s)
+    is_for_opaque_closure = false
+    constprop = 0x00
+    purity = 0x00
+    template_or_is_opaque = deserialize(s)
+    if isa(template_or_is_opaque, Bool)
+        is_for_opaque_closure = template_or_is_opaque
+        if format_version(s) >= 14
+            constprop = deserialize(s)::UInt8
+        end
+        if format_version(s) >= 17
+            purity = deserialize(s)::UInt8
+        end
+        template = deserialize(s)
+    else
+        template = template_or_is_opaque
+    end
     generator = deserialize(s)
+    recursion_relation = nothing
+    if format_version(s) >= 15
+        recursion_relation = deserialize(s)
+    end
     if makenew
         meth.module = mod
         meth.name = name
@@ -996,6 +1055,9 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         meth.sig = sig
         meth.nargs = nargs
         meth.isva = isva
+        meth.is_for_opaque_closure = is_for_opaque_closure
+        meth.constprop = constprop
+        meth.purity = purity
         if template !== nothing
             # TODO: compress template
             meth.source = template::CodeInfo
@@ -1006,15 +1068,16 @@ function deserialize(s::AbstractSerializer, ::Type{Method})
         end
         meth.slot_syms = slot_syms
         if generator !== nothing
-            linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, ())
-            linfo.specTypes = Tuple
-            linfo.inferred = generator
-            linfo.def = meth
-            meth.generator = linfo
+            meth.generator = generator
         end
-        mt = ccall(:jl_method_table_for, Any, (Any,), sig)
-        if mt !== nothing && nothing === ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), mt, sig, typemax(UInt))
-            ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, meth, C_NULL)
+        if recursion_relation !== nothing
+            meth.recursion_relation = recursion_relation
+        end
+        if !is_for_opaque_closure
+            mt = ccall(:jl_method_table_for, Any, (Any,), sig)
+            if mt !== nothing && nothing === ccall(:jl_methtable_lookup, Any, (Any, Any, UInt), mt, sig, typemax(UInt))
+                ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, meth, C_NULL)
+            end
         end
         remember_object(s, meth, lnumber)
     end
@@ -1024,7 +1087,10 @@ end
 function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo = ccall(:jl_new_method_instance_uninit, Ref{Core.MethodInstance}, (Ptr{Cvoid},), C_NULL)
     deserialize_cycle(s, linfo)
-    linfo.uninferred = deserialize(s)::CodeInfo
+    tag = Int32(read(s.io, UInt8)::UInt8)
+    if tag != UNDEFREF_TAG
+        linfo.uninferred = handle_deserialize(s, tag)::CodeInfo
+    end
     tag = Int32(read(s.io, UInt8)::UInt8)
     if tag != UNDEFREF_TAG
         # for reading files prior to v1.2
@@ -1033,7 +1099,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.MethodInstance})
     linfo.sparam_vals = deserialize(s)::SimpleVector
     _rettype = deserialize(s)  # for backwards compat
     linfo.specTypes = deserialize(s)
-    linfo.def = deserialize(s)::Module
+    linfo.def = deserialize(s)
     return linfo
 end
 
@@ -1046,7 +1112,7 @@ function deserialize(s::AbstractSerializer, ::Type{Core.LineInfoNode})
         method = mod
         mod = Main
     end
-    return Core.LineInfoNode(mod, method, deserialize(s)::Symbol, deserialize(s)::Int, deserialize(s)::Int)
+    return Core.LineInfoNode(mod, method, deserialize(s)::Symbol, Int32(deserialize(s)::Union{Int32, Int}), Int32(deserialize(s)::Union{Int32, Int}))
 end
 
 function deserialize(s::AbstractSerializer, ::Type{PhiNode})
@@ -1087,7 +1153,13 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
         ci.ssavaluetypes = deserialize(s)
         ci.linetable = deserialize(s)
     end
-    ci.ssaflags = deserialize(s)
+    ssaflags = deserialize(s)
+    if length(ssaflags) ≠ length(code)
+        # make sure the length of `ssaflags` matches that of `code`
+        # so that the latest inference doesn't throw on IRs serialized from old versions
+        ssaflags = UInt8[0x00 for _ in 1:length(code)]
+    end
+    ci.ssaflags = ssaflags
     if pre_12
         ci.slotflags = deserialize(s)
     else
@@ -1114,6 +1186,12 @@ function deserialize(s::AbstractSerializer, ::Type{CodeInfo})
     ci.inlineable = deserialize(s)
     ci.propagate_inbounds = deserialize(s)
     ci.pure = deserialize(s)
+    if format_version(s) >= 14
+        ci.constprop = deserialize(s)::UInt8
+    end
+    if format_version(s) >= 17
+        ci.purity = deserialize(s)::UInt8
+    end
     return ci
 end
 
@@ -1205,10 +1283,11 @@ function deserialize_typename(s::AbstractSerializer, number)
     else
         # reuse the same name for the type, if possible, for nicer debugging
         tn_name = isdefined(__deserialized_types__, name) ? gensym() : name
-        tn = ccall(:jl_new_typename_in, Ref{Core.TypeName}, (Any, Any),
-                   tn_name, __deserialized_types__)
+        tn = ccall(:jl_new_typename_in, Any, (Any, Any, Cint, Cint),
+                   tn_name, __deserialized_types__, false, false)
         makenew = true
     end
+    tn = tn::Core.TypeName
     remember_object(s, tn, number)
     deserialize_cycle(s, tn)
 
@@ -1216,25 +1295,31 @@ function deserialize_typename(s::AbstractSerializer, number)
     super = deserialize(s)::Type
     parameters = deserialize(s)::SimpleVector
     types = deserialize(s)::SimpleVector
+    attrs = Core.svec()
     has_instance = deserialize(s)::Bool
     abstr = deserialize(s)::Bool
     mutabl = deserialize(s)::Bool
     ninitialized = deserialize(s)::Int32
+    maxm = format_version(s) >= 18 ? deserialize(s)::UInt8 : UInt8(0)
 
     if makenew
-        tn.names = names
         # TODO: there's an unhanded cycle in the dependency graph at this point:
         # while deserializing super and/or types, we may have encountered
         # tn.wrapper and throw UndefRefException before we get to this point
-        ndt = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Any, Cint, Cint, Cint),
-                    tn, tn.module, super, parameters, names, types,
+        ndt = ccall(:jl_new_datatype, Any, (Any, Any, Any, Any, Any, Any, Any, Cint, Cint, Cint),
+                    tn, tn.module, super, parameters, names, types, attrs,
                     abstr, mutabl, ninitialized)
-        tn.wrapper = ndt.name.wrapper
+        @assert tn == ndt.name
         ccall(:jl_set_const, Cvoid, (Any, Any, Any), tn.module, tn.name, tn.wrapper)
         ty = tn.wrapper
-        if has_instance && !isdefined(ty, :instance)
-            # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty
-            Core.setfield!(ty, :instance, ccall(:jl_new_struct, Any, (Any, Any...), ty))
+        tn.max_methods = maxm
+        if has_instance
+            ty = ty::DataType
+            if !isdefined(ty, :instance)
+                singleton = ccall(:jl_new_struct, Any, (Any, Any...), ty)
+                # use setfield! directly to avoid `fieldtype` lowering expecting to see a Singleton object already on ty
+                ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), ty, Base.fieldindex(DataType, :instance)-1, singleton)
+            end
         end
     end
 
@@ -1244,15 +1329,16 @@ function deserialize_typename(s::AbstractSerializer, number)
         defs = deserialize(s)
         maxa = deserialize(s)::Int
         if makenew
-            tn.mt = ccall(:jl_new_method_table, Any, (Any, Any), name, tn.module)
+            mt = ccall(:jl_new_method_table, Any, (Any, Any), name, tn.module)
             if !isempty(parameters)
-                tn.mt.offs = 0
+                mt.offs = 0
             end
-            tn.mt.name = mtname
-            tn.mt.max_args = maxa
+            mt.name = mtname
+            mt.max_args = maxa
+            ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), tn, Base.fieldindex(Core.TypeName, :mt)-1, mt)
             for def in defs
                 if isdefined(def, :sig)
-                    ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), tn.mt, def, C_NULL)
+                    ccall(:jl_method_table_insert, Cvoid, (Any, Any, Ptr{Cvoid}), mt, def, C_NULL)
                 end
             end
         end
@@ -1263,8 +1349,11 @@ function deserialize_typename(s::AbstractSerializer, number)
                 tn.mt.kwsorter = kws
             end
         end
+    elseif makenew
+        mt = Symbol.name.mt
+        ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), tn, Base.fieldindex(Core.TypeName, :mt)-1, mt)
     end
-    return tn::Core.TypeName
+    return tn
 end
 
 function deserialize_datatype(s::AbstractSerializer, full::Bool)
@@ -1275,7 +1364,7 @@ function deserialize_datatype(s::AbstractSerializer, full::Bool)
     else
         name = deserialize(s)::Symbol
         mod = deserialize(s)::Module
-        ty = getfield(mod,name)
+        ty = getglobal(mod, name)
     end
     if isa(ty,DataType) && isempty(ty.parameters)
         t = ty
@@ -1348,7 +1437,15 @@ function deserialize(s::AbstractSerializer, ::Type{Task})
         @assert false
     end
     t.result = deserialize(s)
-    t.exception = deserialize(s)
+    exc = deserialize(s)
+    if exc === nothing
+        t._isexception = false
+    elseif exc isa Bool
+        t._isexception = exc
+    else
+        t._isexception = true
+        t.result = exc
+    end
     t
 end
 
@@ -1364,7 +1461,7 @@ function deserialize(s::AbstractSerializer, t::DataType)
     if nf == 0 && t.size > 0
         # bits type
         return read(s.io, t)
-    elseif t.mutable
+    elseif ismutabletype(t)
         x = ccall(:jl_new_struct_uninit, Any, (Any,), t)
         deserialize_cycle(s, x)
         for i in 1:nf
@@ -1443,4 +1540,32 @@ function deserialize(s::AbstractSerializer, ::Type{Base.StackTraces.StackFrame})
     return Base.StackTraces.StackFrame(func, file, line, nothing, from_c, inlined, pointer)
 end
 
+function serialize(s::AbstractSerializer, lock::Base.AbstractLock)
+    # assert_havelock(lock)
+    serialize_cycle_header(s, lock)
+    nothing
+end
+
+function deserialize(s::AbstractSerializer, ::Type{T}) where T<:Base.AbstractLock
+    lock = T()
+    deserialize_cycle(s, lock)
+    return lock
+end
+
+function serialize(s::AbstractSerializer, cond::Base.GenericCondition)
+    serialize_cycle_header(s, cond) && return
+    serialize(s, cond.lock)
+    nothing
+end
+
+function deserialize(s::AbstractSerializer, ::Type{T}) where T<:Base.GenericCondition
+    lock = deserialize(s)
+    cond = T(lock)
+    deserialize_cycle(s, cond)
+    return cond
+end
+
+serialize(s::AbstractSerializer, l::LazyString) =
+    invoke(serialize, Tuple{AbstractSerializer,Any}, s, Base._LazyString((), string(l)))
+
 end
diff --git a/stdlib/Serialization/test/runtests.jl b/stdlib/Serialization/test/runtests.jl
index 24bd08145b2618..104b3e97d61181 100644
--- a/stdlib/Serialization/test/runtests.jl
+++ b/stdlib/Serialization/test/runtests.jl
@@ -609,3 +609,44 @@ let s = join(rand('a':'z', 1024)), io = IOBuffer()
     s2 = deserialize(io)
     @test Base.summarysize(s2) < 2*sizeof(s)
 end
+
+# issue #39895
+@eval Main begin
+    using Test, Serialization
+    let g = gensym(:g)
+        closure = eval(:(f -> $g(x) = f(x)))
+        inc(x) = x + 1
+        b = IOBuffer()
+        serialize(b, closure(inc))
+        seekstart(b)
+        f = deserialize(b)
+        # this should not crash
+        @test_broken f(1) == 2
+    end
+end
+
+let c1 = Threads.Condition()
+    c2 = Threads.Condition(c1.lock)
+    lock(c2)
+    t = @task nothing
+    Base._wait2(c1, t)
+    c3, c4 = deserialize(IOBuffer(sprint(serialize, [c1, c2])))::Vector{Threads.Condition}
+    @test c3.lock === c4.lock
+    @test islocked(c1)
+    @test !islocked(c3)
+    @test !isempty(c1.waitq)
+    @test isempty(c2.waitq)
+    @test isempty(c3.waitq)
+    @test isempty(c4.waitq)
+    notify(c1)
+    unlock(c2)
+    wait(t)
+end
+
+@testset "LazyString" begin
+    l1 = lazy"a $1 b $2"
+    l2 = deserialize(IOBuffer(sprint(serialize, l1)))
+    @test l2.str === l1.str
+    @test l2 == l1
+    @test l2.parts === ()
+end
diff --git a/stdlib/SharedArrays/src/SharedArrays.jl b/stdlib/SharedArrays/src/SharedArrays.jl
index c80a74e6172272..a961be4e534b36 100644
--- a/stdlib/SharedArrays/src/SharedArrays.jl
+++ b/stdlib/SharedArrays/src/SharedArrays.jl
@@ -7,7 +7,7 @@ module SharedArrays
 
 using Mmap, Distributed, Random
 
-import Base: length, size, ndims, IndexStyle, reshape, convert, deepcopy_internal,
+import Base: length, size, elsize, ndims, IndexStyle, reshape, convert, deepcopy_internal,
              show, getindex, setindex!, fill!, similar, reduce, map!, copyto!, unsafe_convert
 import Random
 using Serialization
@@ -206,7 +206,7 @@ function SharedArray{T,N}(filename::AbstractString, dims::NTuple{N,Int}, offset:
     # Create the file if it doesn't exist, map it if it does
     refs = Vector{Future}(undef, length(pids))
     func_mmap = mode -> open(filename, mode) do io
-        Mmap.mmap(io, Array{T,N}, dims, offset; shared=true)
+        mmap(io, Array{T,N}, dims, offset; shared=true)
     end
     s = Array{T}(undef, ntuple(d->0,N))
     if onlocalhost
@@ -292,16 +292,24 @@ SharedVector(A::Vector) = SharedArray(A)
 SharedMatrix(A::Matrix) = SharedArray(A)
 
 size(S::SharedArray) = S.dims
+elsize(::Type{SharedArray{T,N}}) where {T,N} = elsize(Array{T,N}) # aka fieldtype(T, :s)
 IndexStyle(::Type{<:SharedArray}) = IndexLinear()
 
+function local_array_by_id(refid)
+    if isa(refid, Future)
+        refid = remoteref_id(refid)
+    end
+    fetch(channel_from_id(refid))
+end
+
 function reshape(a::SharedArray{T}, dims::NTuple{N,Int}) where {T,N}
     if length(a) != prod(dims)
         throw(DimensionMismatch("dimensions must be consistent with array size"))
     end
     refs = Vector{Future}(undef, length(a.pids))
     for (i, p) in enumerate(a.pids)
-        refs[i] = remotecall(p, a.refs[i], dims) do r,d
-            reshape(fetch(r),d)
+        refs[i] = remotecall(p, a.refs[i], dims) do r, d
+            reshape(local_array_by_id(r), d)
         end
     end
 
@@ -381,7 +389,7 @@ function shared_pids(pids)
         # only use workers on the current host
         pids = procs(myid())
         if length(pids) > 1
-            pids = filter(x -> x != 1, pids)
+            pids = filter(!=(1), pids)
         end
 
         onlocalhost = true
@@ -418,13 +426,7 @@ sub_1dim(S::SharedArray, pidx) = view(S.s, range_1dim(S, pidx))
 function init_loc_flds(S::SharedArray{T,N}, empty_local=false) where T where N
     if myid() in S.pids
         S.pidx = findfirst(isequal(myid()), S.pids)
-        if isa(S.refs[1], Future)
-            refid = remoteref_id(S.refs[S.pidx])
-        else
-            refid = S.refs[S.pidx]
-        end
-        c = channel_from_id(refid)
-        S.s = fetch(c)
+        S.s = local_array_by_id(S.refs[S.pidx])
         S.loc_subarr_1d = sub_1dim(S, S.pidx)
     else
         S.pidx = 0
@@ -505,9 +507,9 @@ end
 Array(S::SharedArray) = S.s
 
 # pass through getindex and setindex! - unlike DArrays, these always work on the complete array
-getindex(S::SharedArray, i::Real) = getindex(S.s, i)
+Base.@propagate_inbounds getindex(S::SharedArray, i::Real) = getindex(S.s, i)
 
-setindex!(S::SharedArray, x, i::Real) = setindex!(S.s, x, i)
+Base.@propagate_inbounds setindex!(S::SharedArray, x, i::Real) = setindex!(S.s, x, i)
 
 function fill!(S::SharedArray, v)
     vT = convert(eltype(S), v)
@@ -667,7 +669,7 @@ function _shm_mmap_array(T, dims, shm_seg_name, mode)
     readonly = !((mode & JL_O_RDWR) == JL_O_RDWR)
     create = (mode & JL_O_CREAT) == JL_O_CREAT
     s = Mmap.Anonymous(shm_seg_name, readonly, create)
-    Mmap.mmap(s, Array{T,length(dims)}, dims, zero(Int64))
+    mmap(s, Array{T,length(dims)}, dims, zero(Int64))
 end
 
 # no-op in windows
@@ -687,13 +689,19 @@ function _shm_mmap_array(T, dims, shm_seg_name, mode)
         systemerror("ftruncate() failed for shm segment " * shm_seg_name, rc != 0)
     end
 
-    Mmap.mmap(s, Array{T,length(dims)}, dims, zero(Int64); grow=false)
+    mmap(s, Array{T,length(dims)}, dims, zero(Int64); grow=false)
 end
 
 shm_unlink(shm_seg_name) = ccall(:shm_unlink, Cint, (Cstring,), shm_seg_name)
-shm_open(shm_seg_name, oflags, permissions) = ccall(:shm_open, Cint,
-    (Cstring, Cint, Base.Cmode_t), shm_seg_name, oflags, permissions)
-
+function shm_open(shm_seg_name, oflags, permissions)
+    # On macOS, `shm_open()` is a variadic function, so to properly match
+    # calling ABI, we must declare our arguments as variadic as well.
+    @static if Sys.isapple()
+        return ccall(:shm_open, Cint, (Cstring, Cint, Base.Cmode_t...), shm_seg_name, oflags, permissions)
+    else
+        return ccall(:shm_open, Cint, (Cstring, Cint, Base.Cmode_t), shm_seg_name, oflags, permissions)
+    end
+end
 end # os-test
 
 end # module
diff --git a/stdlib/SharedArrays/test/runtests.jl b/stdlib/SharedArrays/test/runtests.jl
index 58a4e9d2a672f1..7f1bbb6891ce06 100644
--- a/stdlib/SharedArrays/test/runtests.jl
+++ b/stdlib/SharedArrays/test/runtests.jl
@@ -3,7 +3,9 @@
 using Test, Distributed, SharedArrays, Random
 include(joinpath(Sys.BINDIR, "..", "share", "julia", "test", "testenv.jl"))
 
-addprocs_with_testenv(4)
+# These processes explicitly want to share memory, we can't have
+# them in separate rr sessions
+addprocs_with_testenv(4; rr_allowed=false)
 @test nprocs() == 5
 
 @everywhere using Test, SharedArrays
@@ -142,6 +144,7 @@ read!(fn3, filedata)
 @test all(filedata[1:4] .== 0x01)
 @test all(filedata[5:end] .== 0x02)
 finalize(S)
+@test Base.elsize(S) == Base.elsize(typeof(S)) == Base.elsize(Vector{UInt8})
 
 # call gc 3 times to avoid unlink: operation not permitted (EPERM) on Windows
 S = nothing
@@ -165,6 +168,7 @@ S = @inferred(SharedArray{Int}(1,2))
 S = @inferred(SharedArray{Int}(1,2,3))
 @test size(S) == (1,2,3)
 @test typeof(S) <: SharedArray{Int}
+@test Base.elsize(S) == Base.elsize(typeof(S)) == Base.elsize(Vector{Int})
 
 # reshape
 
@@ -172,6 +176,12 @@ d = SharedArrays.shmem_fill(1.0, (10,10,10))
 @test fill(1., 100, 10) == reshape(d,(100,10))
 d = SharedArrays.shmem_fill(1.0, (10,10,10))
 @test_throws DimensionMismatch reshape(d,(50,))
+# issue #40249, reshaping on another process
+let m = SharedArray{ComplexF64}(10, 20, 30)
+    m2 = remotecall_fetch(() -> reshape(m, (100, :)), id_other)
+    @test size(m2) == (100, 60)
+    @test m2 isa SharedArray
+end
 
 # rand, randn
 d = SharedArrays.shmem_rand(dims)
diff --git a/stdlib/Sockets/src/PipeServer.jl b/stdlib/Sockets/src/PipeServer.jl
index bc203f5f314362..4a8965c8f04629 100644
--- a/stdlib/Sockets/src/PipeServer.jl
+++ b/stdlib/Sockets/src/PipeServer.jl
@@ -70,6 +70,9 @@ end
     listen(path::AbstractString) -> PipeServer
 
 Create and listen on a named pipe / UNIX domain socket.
+
+!!! note
+    Path length on Unix is limited to somewhere between 92 and 108 bytes (cf. `man unix`).
 """
 function listen(path::AbstractString)
     sock = PipeServer()
@@ -93,5 +96,8 @@ end
     connect(path::AbstractString) -> PipeEndpoint
 
 Connect to the named pipe / UNIX domain socket at `path`.
+
+!!! note
+    Path length on Unix is limited to somewhere between 92 and 108 bytes (cf. `man unix`).
 """
 connect(path::AbstractString) = connect(PipeEndpoint(), path)
diff --git a/stdlib/Sockets/src/Sockets.jl b/stdlib/Sockets/src/Sockets.jl
index 6af8c19a2bbb3c..82dedb72e6eccd 100644
--- a/stdlib/Sockets/src/Sockets.jl
+++ b/stdlib/Sockets/src/Sockets.jl
@@ -97,7 +97,7 @@ end
 function TCPSocket(fd::OS_HANDLE)
     tcp = TCPSocket()
     iolock_begin()
-    err = ccall(:uv_tcp_open, Int32, (Ptr{Cvoid}, OS_HANDLE), pipe.handle, fd)
+    err = ccall(:uv_tcp_open, Int32, (Ptr{Cvoid}, OS_HANDLE), tcp.handle, fd)
     uv_error("tcp_open", err)
     tcp.status = StatusOpen
     iolock_end()
@@ -139,9 +139,6 @@ function TCPServer(; delay=true)
     return tcp
 end
 
-isreadable(io::TCPSocket) = isopen(io) || bytesavailable(io) > 0
-iswritable(io::TCPSocket) = isopen(io) && io.status != StatusClosing
-
 """
     accept(server[, client])
 
@@ -203,7 +200,6 @@ end
 show(io::IO, stream::UDPSocket) = print(io, typeof(stream), "(", uv_status_string(stream), ")")
 
 function _uv_hook_close(sock::UDPSocket)
-    sock.handle = C_NULL
     lock(sock.cond)
     try
         sock.status = StatusClosed
@@ -367,7 +363,7 @@ function recvfrom(sock::UDPSocket)
     end
 end
 
-alloc_buf_hook(sock::UDPSocket, size::UInt) = (Libc.malloc(size), size) # size is always 64k from libuv
+alloc_buf_hook(sock::UDPSocket, size::UInt) = (Libc.malloc(size), Int(size)) # size is always 64k from libuv
 
 function uv_recvcb(handle::Ptr{Cvoid}, nread::Cssize_t, buf::Ptr{Cvoid}, addr::Ptr{Cvoid}, flags::Cuint)
     sock = @handle_as handle UDPSocket
@@ -572,14 +568,17 @@ end
     nagle(socket::Union{TCPServer, TCPSocket}, enable::Bool)
 
 Enables or disables Nagle's algorithm on a given TCP server or socket.
+
+!!! compat "Julia 1.3"
+    This function requires Julia 1.3 or later.
 """
 function nagle(sock::Union{TCPServer, TCPSocket}, enable::Bool)
     # disable or enable Nagle's algorithm on all OSes
-    Sockets.iolock_begin()
-    Sockets.check_open(sock)
+    iolock_begin()
+    check_open(sock)
     err = ccall(:uv_tcp_nodelay, Cint, (Ptr{Cvoid}, Cint), sock.handle, Cint(!enable))
     # TODO: check err
-    Sockets.iolock_end()
+    iolock_end()
     return err
 end
 
@@ -589,15 +588,15 @@ end
 On Linux systems, the TCP_QUICKACK is disabled or enabled on `socket`.
 """
 function quickack(sock::Union{TCPServer, TCPSocket}, enable::Bool)
-    Sockets.iolock_begin()
-    Sockets.check_open(sock)
+    iolock_begin()
+    check_open(sock)
     @static if Sys.islinux()
         # tcp_quickack is a linux only option
         if ccall(:jl_tcp_quickack, Cint, (Ptr{Cvoid}, Cint), sock.handle, Cint(enable)) < 0
             @warn "Networking unoptimized ( Error enabling TCP_QUICKACK : $(Libc.strerror(Libc.errno())) )" maxlog=1
         end
     end
-    Sockets.iolock_end()
+    iolock_end()
     nothing
 end
 
@@ -803,6 +802,7 @@ socket is connected to. Valid only for connected TCP sockets.
 getpeername(sock::TCPSocket) = _sockname(sock, false)
 
 function _sockname(sock, self=true)
+    sock.status == StatusInit || check_open(sock)
     rport = Ref{Cushort}(0)
     raddress = zeros(UInt8, 16)
     rfamily = Ref{Cuint}(0)
diff --git a/stdlib/Sockets/src/addrinfo.jl b/stdlib/Sockets/src/addrinfo.jl
index a4eed4d16d6fa7..586463ba0fa21e 100644
--- a/stdlib/Sockets/src/addrinfo.jl
+++ b/stdlib/Sockets/src/addrinfo.jl
@@ -260,7 +260,7 @@ julia> getipaddr(IPv6)
 ip"fe80::9731:35af:e1c5:6e49"
 ```
 
-See also: [`getipaddrs`](@ref)
+See also [`getipaddrs`](@ref).
 """
 function getipaddr(addr_type::Type{T}) where T<:IPAddr
     addrs = getipaddrs(addr_type)
@@ -305,7 +305,7 @@ julia> getipaddrs(IPv6)
  ip"fe80::445e:5fff:fe5d:5500"
 ```
 
-See also: [`islinklocaladdr`](@ref), `split(ENV["SSH_CONNECTION"], ' ')[3]`
+See also [`islinklocaladdr`](@ref).
 """
 function getipaddrs(addr_type::Type{T}=IPAddr; loopback::Bool=false) where T<:IPAddr
     addresses = T[]
diff --git a/stdlib/Sockets/test/runtests.jl b/stdlib/Sockets/test/runtests.jl
index e8dad4d95ee4bb..a27bb89408f1d9 100644
--- a/stdlib/Sockets/test/runtests.jl
+++ b/stdlib/Sockets/test/runtests.jl
@@ -16,12 +16,12 @@ function killjob(d)
     end
     if @isdefined(SIGINFO)
         ccall(:uv_kill, Cint, (Cint, Cint), getpid(), SIGINFO)
-        sleep(1)
+        sleep(5) # Allow time for profile to collect and print before killing
     end
     ccall(:uv_kill, Cint, (Cint, Cint), getpid(), Base.SIGTERM)
     nothing
 end
-Timer(t -> killjob("KILLING BY SOCKETS TEST WATCHDOG\n"), 600)
+sockets_watchdog_timer = Timer(t -> killjob("KILLING BY SOCKETS TEST WATCHDOG\n"), 600)
 
 @testset "parsing" begin
     @test ip"127.0.0.1" == IPv4(127,0,0,1)
@@ -196,6 +196,31 @@ end
 
 
 @testset "getnameinfo on some unroutable IP addresses (RFC 5737)" begin
+    try
+        getnameinfo(ip"192.0.2.1")
+        getnameinfo(ip"198.51.100.1")
+        getnameinfo(ip"203.0.113.1")
+        getnameinfo(ip"0.1.1.1")
+        getnameinfo(ip"::ffff:0.1.1.1")
+        getnameinfo(ip"::ffff:192.0.2.1")
+        getnameinfo(ip"2001:db8::1")
+    catch
+        # NOTE: Default Ubuntu installations contain a faulty DNS configuration
+        # that returns `EAI_AGAIN` instead of `EAI_NONAME`.  To fix this, try
+        # installing `libnss-resolve`, which installs the `systemd-resolve`
+        # backend for NSS, which should fix it.
+        #
+        # If you are running tests inside Docker, you'll need to install
+        # `libnss-resolve` both outside Docker (i.e. on the host machine) and
+        # inside the Docker container.
+        if Sys.islinux()
+            error_msg = string(
+                "`getnameinfo` failed on an unroutable IP address. ",
+                "If your DNS setup seems to be working, try installing libnss-resolve",
+            )
+            @error(error_msg)
+        end
+    end
     @test getnameinfo(ip"192.0.2.1") == "192.0.2.1"
     @test getnameinfo(ip"198.51.100.1") == "198.51.100.1"
     @test getnameinfo(ip"203.0.113.1") == "203.0.113.1"
@@ -526,17 +551,54 @@ end
         r = @async close(s)
         @test_throws Base._UVError("connect", Base.UV_ECANCELED) Sockets.wait_connected(s)
         fetch(r)
+        close(srv)
     end
 end
 
 @testset "iswritable" begin
     let addr = Sockets.InetAddr(ip"127.0.0.1", 4445)
         srv = listen(addr)
-        s = Sockets.TCPSocket()
-        Sockets.connect!(s, addr)
-        @test iswritable(s)
-        close(s)
-        @test !iswritable(s)
+        let s = Sockets.TCPSocket()
+            Sockets.connect!(s, addr)
+            @test iswritable(s) broken=Sys.iswindows()
+            close(s)
+            @test !iswritable(s)
+        end
+        let s = Sockets.connect(addr)
+            @test iswritable(s)
+            closewrite(s)
+            @test !iswritable(s)
+            close(s)
+        end
+        close(srv)
+        srv = listen(addr)
+        let s = Sockets.connect(addr)
+            let c = accept(srv)
+                Base.errormonitor(@async try; write(c, c); finally; close(c); end)
+            end
+            @test iswritable(s)
+            write(s, "hello world\n")
+            closewrite(s)
+            @test !iswritable(s)
+            @test isreadable(s)
+            @test read(s, String) == "hello world\n"
+            @test !isreadable(s)
+            @test !isopen(s)
+            close(s)
+        end
+        close(srv)
+    end
+end
+
+@testset "TCPSocket RawFD constructor" begin
+    if Sys.islinux()
+        let fd = ccall(:socket, Int32, (Int32, Int32, Int32),
+                       2, # AF_INET
+                       1, # SOCK_STREAM
+                       0)
+            s = Sockets.TCPSocket(RawFD(fd))
+            close(s)
+        end
     end
 end
 
@@ -617,3 +679,6 @@ end
         end
     end
 end
+
+
+close(sockets_watchdog_timer)
diff --git a/stdlib/SparseArrays.version b/stdlib/SparseArrays.version
new file mode 100644
index 00000000000000..343462a534a2f6
--- /dev/null
+++ b/stdlib/SparseArrays.version
@@ -0,0 +1,4 @@
+SPARSEARRAYS_BRANCH = main
+SPARSEARRAYS_SHA1 = 96820d3aba22dad0fbd2b4877e6a1f0f7af76721
+SPARSEARRAYS_GIT_URL := https://github.com/JuliaSparse/SparseArrays.jl.git
+SPARSEARRAYS_TAR_URL = https://api.github.com/repos/JuliaSparse/SparseArrays.jl/tarball/$1
diff --git a/stdlib/SparseArrays/Project.toml b/stdlib/SparseArrays/Project.toml
deleted file mode 100644
index 53d4a9f064ad3d..00000000000000
--- a/stdlib/SparseArrays/Project.toml
+++ /dev/null
@@ -1,14 +0,0 @@
-name = "SparseArrays"
-uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-
-[deps]
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-
-[extras]
-Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
-InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Dates", "Test", "InteractiveUtils"]
diff --git a/stdlib/SparseArrays/docs/src/index.md b/stdlib/SparseArrays/docs/src/index.md
deleted file mode 100644
index 61b521940b90a2..00000000000000
--- a/stdlib/SparseArrays/docs/src/index.md
+++ /dev/null
@@ -1,232 +0,0 @@
-# Sparse Arrays
-
-```@meta
-DocTestSetup = :(using SparseArrays, LinearAlgebra)
-```
-
-Julia has support for sparse vectors and [sparse matrices](https://en.wikipedia.org/wiki/Sparse_matrix)
-in the `SparseArrays` stdlib module. Sparse arrays are arrays that contain enough zeros
-that storing them in a special data structure leads to savings in space and execution time,
-compared to dense arrays.
-
-## [Compressed Sparse Column (CSC) Sparse Matrix Storage](@id man-csc)
-
-In Julia, sparse matrices are stored in the [Compressed Sparse Column (CSC) format](https://en.wikipedia.org/wiki/Sparse_matrix#Compressed_sparse_column_.28CSC_or_CCS.29).
-Julia sparse matrices have the type [`SparseMatrixCSC{Tv,Ti}`](@ref), where `Tv` is the
-type of the stored values, and `Ti` is the integer type for storing column pointers and
-row indices. The internal representation of `SparseMatrixCSC` is as follows:
-
-```julia
-struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
-    m::Int                  # Number of rows
-    n::Int                  # Number of columns
-    colptr::Vector{Ti}      # Column j is in colptr[j]:(colptr[j+1]-1)
-    rowval::Vector{Ti}      # Row indices of stored values
-    nzval::Vector{Tv}       # Stored values, typically nonzeros
-end
-```
-
-The compressed sparse column storage makes it easy and quick to access the elements in the column
-of a sparse matrix, whereas accessing the sparse matrix by rows is considerably slower. Operations
-such as insertion of previously unstored entries one at a time in the CSC structure tend to be slow. This is
-because all elements of the sparse matrix that are beyond the point of insertion have to be moved
-one place over.
-
-All operations on sparse matrices are carefully implemented to exploit the CSC data structure
-for performance, and to avoid expensive operations.
-
-If you have data in CSC format from a different application or library, and wish to import it
-in Julia, make sure that you use 1-based indexing. The row indices in every column need to be
-sorted. If your `SparseMatrixCSC` object contains unsorted row indices, one quick way to sort
-them is by doing a double transpose.
-
-In some applications, it is convenient to store explicit zero values in a `SparseMatrixCSC`. These
-*are* accepted by functions in `Base` (but there is no guarantee that they will be preserved in
-mutating operations). Such explicitly stored zeros are treated as structural nonzeros by many
-routines. The [`nnz`](@ref) function returns the number of elements explicitly stored in the
-sparse data structure, including non-structural zeros. In order to count the exact number of
-numerical nonzeros, use [`count(!iszero, x)`](@ref), which inspects every stored element of a sparse
-matrix. [`dropzeros`](@ref), and the in-place [`dropzeros!`](@ref), can be used to
-remove stored zeros from the sparse matrix.
-
-```jldoctest
-julia> A = sparse([1, 1, 2, 3], [1, 3, 2, 3], [0, 1, 2, 0])
-3×3 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
- 0  ⋅  1
- ⋅  2  ⋅
- ⋅  ⋅  0
-
-julia> dropzeros(A)
-3×3 SparseMatrixCSC{Int64, Int64} with 2 stored entries:
- ⋅  ⋅  1
- ⋅  2  ⋅
- ⋅  ⋅  ⋅
-```
-
-## Sparse Vector Storage
-
-Sparse vectors are stored in a close analog to compressed sparse column format for sparse
-matrices. In Julia, sparse vectors have the type [`SparseVector{Tv,Ti}`](@ref) where `Tv`
-is the type of the stored values and `Ti` the integer type for the indices. The internal
-representation is as follows:
-
-```julia
-struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
-    n::Int              # Length of the sparse vector
-    nzind::Vector{Ti}   # Indices of stored values
-    nzval::Vector{Tv}   # Stored values, typically nonzeros
-end
-```
-
-As for [`SparseMatrixCSC`](@ref), the `SparseVector` type can also contain explicitly
-stored zeros. (See [Sparse Matrix Storage](@ref man-csc).).
-
-## Sparse Vector and Matrix Constructors
-
-The simplest way to create a sparse array is to use a function equivalent to the [`zeros`](@ref)
-function that Julia provides for working with dense arrays. To produce a
-sparse array instead, you can use the same name with an `sp` prefix:
-
-```jldoctest
-julia> spzeros(3)
-3-element SparseVector{Float64, Int64} with 0 stored entries
-```
-
-The [`sparse`](@ref) function is often a handy way to construct sparse arrays. For
-example, to construct a sparse matrix we can input a vector `I` of row indices, a vector
-`J` of column indices, and a vector `V` of stored values (this is also known as the
-[COO (coordinate) format](https://en.wikipedia.org/wiki/Sparse_matrix#Coordinate_list_.28COO.29)).
-`sparse(I,J,V)` then constructs a sparse matrix such that `S[I[k], J[k]] = V[k]`. The
-equivalent sparse vector constructor is [`sparsevec`](@ref), which takes the (row) index
-vector `I` and the vector `V` with the stored values and constructs a sparse vector `R`
-such that `R[I[k]] = V[k]`.
-
-```jldoctest sparse_function
-julia> I = [1, 4, 3, 5]; J = [4, 7, 18, 9]; V = [1, 2, -5, 3];
-
-julia> S = sparse(I,J,V)
-5×18 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
-⠀⠈⠀⡀⠀⠀⠀⠀⠠
-⠀⠀⠀⠀⠁⠀⠀⠀⠀
-
-julia> R = sparsevec(I,V)
-5-element SparseVector{Int64, Int64} with 4 stored entries:
-  [1]  =  1
-  [3]  =  -5
-  [4]  =  2
-  [5]  =  3
-```
-
-The inverse of the [`sparse`](@ref) and [`sparsevec`](@ref) functions is
-[`findnz`](@ref), which retrieves the inputs used to create the sparse array.
-[`findall(!iszero, x)`](@ref) returns the cartesian indices of non-zero entries in `x`
-(including stored entries equal to zero).
-
-```jldoctest sparse_function
-julia> findnz(S)
-([1, 4, 5, 3], [4, 7, 9, 18], [1, 2, 3, -5])
-
-julia> findall(!iszero, S)
-4-element Vector{CartesianIndex{2}}:
- CartesianIndex(1, 4)
- CartesianIndex(4, 7)
- CartesianIndex(5, 9)
- CartesianIndex(3, 18)
-
-julia> findnz(R)
-([1, 3, 4, 5], [1, -5, 2, 3])
-
-julia> findall(!iszero, R)
-4-element Vector{Int64}:
- 1
- 3
- 4
- 5
-```
-
-Another way to create a sparse array is to convert a dense array into a sparse array using
-the [`sparse`](@ref) function:
-
-```jldoctest
-julia> sparse(Matrix(1.0I, 5, 5))
-5×5 SparseMatrixCSC{Float64, Int64} with 5 stored entries:
- 1.0   ⋅    ⋅    ⋅    ⋅
-  ⋅   1.0   ⋅    ⋅    ⋅
-  ⋅    ⋅   1.0   ⋅    ⋅
-  ⋅    ⋅    ⋅   1.0   ⋅
-  ⋅    ⋅    ⋅    ⋅   1.0
-
-julia> sparse([1.0, 0.0, 1.0])
-3-element SparseVector{Float64, Int64} with 2 stored entries:
-  [1]  =  1.0
-  [3]  =  1.0
-```
-
-You can go in the other direction using the [`Array`](@ref) constructor. The [`issparse`](@ref)
-function can be used to query if a matrix is sparse.
-
-```jldoctest
-julia> issparse(spzeros(5))
-true
-```
-
-## Sparse matrix operations
-
-Arithmetic operations on sparse matrices also work as they do on dense matrices. Indexing of,
-assignment into, and concatenation of sparse matrices work in the same way as dense matrices.
-Indexing operations, especially assignment, are expensive, when carried out one element at a time.
-In many cases it may be better to convert the sparse matrix into `(I,J,V)` format using [`findnz`](@ref),
-manipulate the values or the structure in the dense vectors `(I,J,V)`, and then reconstruct
-the sparse matrix.
-
-## Correspondence of dense and sparse methods
-
-The following table gives a correspondence between built-in methods on sparse matrices and their
-corresponding methods on dense matrix types. In general, methods that generate sparse matrices
-differ from their dense counterparts in that the resulting matrix follows the same sparsity pattern
-as a given sparse matrix `S`, or that the resulting sparse matrix has density `d`, i.e. each matrix
-element has a probability `d` of being non-zero.
-
-Details can be found in the [Sparse Vectors and Matrices](@ref stdlib-sparse-arrays)
-section of the standard library reference.
-
-| Sparse                     | Dense                  | Description                                                                                                                                                           |
-|:-------------------------- |:---------------------- |:--------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| [`spzeros(m,n)`](@ref)     | [`zeros(m,n)`](@ref)   | Creates a *m*-by-*n* matrix of zeros. ([`spzeros(m,n)`](@ref) is empty.)                                                                                              |
-| [`sparse(I,n,n)`](@ref)  | [`Matrix(I,n,n)`](@ref)| Creates a *n*-by-*n* identity matrix.                                                                                                                                 |
-| [`sparse(A)`](@ref)        | [`Array(S)`](@ref)   | Interconverts between dense and sparse formats.                                                                                                                       |
-| [`sprand(m,n,d)`](@ref)    | [`rand(m,n)`](@ref)    | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements distributed uniformly on the half-open interval ``[0, 1)``.                            |
-| [`sprandn(m,n,d)`](@ref)   | [`randn(m,n)`](@ref)   | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements distributed according to the standard normal (Gaussian) distribution.                  |
-| [`sprandn(rng,m,n,d)`](@ref) | [`randn(rng,m,n)`](@ref) | Creates a *m*-by-*n* random matrix (of density *d*) with iid non-zero elements generated with the `rng` random number generator                                   |
-
-# [Sparse Arrays](@id stdlib-sparse-arrays)
-
-```@docs
-SparseArrays.AbstractSparseArray
-SparseArrays.AbstractSparseVector
-SparseArrays.AbstractSparseMatrix
-SparseArrays.SparseVector
-SparseArrays.SparseMatrixCSC
-SparseArrays.sparse
-SparseArrays.sparsevec
-SparseArrays.issparse
-SparseArrays.nnz
-SparseArrays.findnz
-SparseArrays.spzeros
-SparseArrays.spdiagm
-SparseArrays.blockdiag
-SparseArrays.sprand
-SparseArrays.sprandn
-SparseArrays.nonzeros
-SparseArrays.rowvals
-SparseArrays.nzrange
-SparseArrays.droptol!
-SparseArrays.dropzeros!
-SparseArrays.dropzeros
-SparseArrays.permute
-permute!{Tv, Ti, Tp <: Integer, Tq <: Integer}(::SparseMatrixCSC{Tv,Ti}, ::SparseMatrixCSC{Tv,Ti}, ::AbstractArray{Tp,1}, ::AbstractArray{Tq,1})
-```
-
-```@meta
-DocTestSetup = nothing
-```
diff --git a/stdlib/SparseArrays/src/SparseArrays.jl b/stdlib/SparseArrays/src/SparseArrays.jl
deleted file mode 100644
index 0616763205696e..00000000000000
--- a/stdlib/SparseArrays/src/SparseArrays.jl
+++ /dev/null
@@ -1,63 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-Support for sparse arrays. Provides `AbstractSparseArray` and subtypes.
-"""
-module SparseArrays
-
-using Base: ReshapedArray, promote_op, setindex_shape_check, to_shape, tail,
-    require_one_based_indexing
-using Base.Sort: Forward
-using LinearAlgebra
-
-import Base: +, -, *, \, /, &, |, xor, ==, zero
-import LinearAlgebra: mul!, ldiv!, rdiv!, cholesky, adjoint!, diag, eigen, dot,
-    issymmetric, istril, istriu, lu, tr, transpose!, tril!, triu!,
-    cond, diagm, factorize, ishermitian, norm, opnorm, lmul!, rmul!, tril, triu, matprod
-
-import Base: acos, acosd, acot, acotd, acsch, asech, asin, asind, asinh,
-    atan, atand, atanh, broadcast!, conj!, cos, cosc, cosd, cosh, cospi, cot,
-    cotd, coth, count, csc, cscd, csch,
-    exp10, exp2, findprev, findnext, floor, hash, argmin, inv,
-    log10, log2, sec, secd, sech, show,
-    sin, sinc, sind, sinh, sinpi, dropdims, sum, summary, tan,
-    tand, tanh, trunc, abs, abs2,
-    broadcast, ceil, complex, conj, convert, copy, copyto!, adjoint,
-    exp, expm1, findall, findmax, findmin, float, getindex,
-    vcat, hcat, hvcat, cat, imag, argmax, kron, kron!, length, log, log1p, max, min,
-    maximum, minimum, one, promote_eltype, real, reshape, rot180,
-    rotl90, rotr90, round, setindex!, similar, size, transpose,
-    vec, permute!, map, map!, Array, diff, circshift!, circshift
-
-using Random: default_rng, AbstractRNG, randsubseq, randsubseq!
-
-export AbstractSparseArray, AbstractSparseMatrix, AbstractSparseVector,
-    SparseMatrixCSC, SparseVector, blockdiag, droptol!, dropzeros!, dropzeros,
-    issparse, nonzeros, nzrange, rowvals, sparse, sparsevec, spdiagm,
-    sprand, sprandn, spzeros, nnz, permute, findnz
-
-include("abstractsparse.jl")
-include("sparsematrix.jl")
-include("sparseconvert.jl")
-include("sparsevector.jl")
-include("higherorderfns.jl")
-include("linalg.jl")
-include("deprecated.jl")
-
-
-# temporarily moved here and commented out from from base/linalg/diagonal.jl, base/linalg/tridiag.jl
-# and base/linalg/bidiag.jl due to their usage of spzeros
-similar(B::Bidiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
-similar(D::Diagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
-similar(S::SymTridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
-similar(M::Tridiagonal, ::Type{T}, dims::Union{Dims{1},Dims{2}}) where {T} = spzeros(T, dims...)
-
-zero(a::AbstractSparseArray) = spzeros(eltype(a), size(a)...)
-
-const BiTriSym = Union{Bidiagonal,SymTridiagonal,Tridiagonal}
-function *(A::BiTriSym, B::BiTriSym)
-    TS = promote_op(matprod, eltype(A), eltype(B))
-    mul!(similar(A, TS, size(A)...), A, B)
-end
-
-end
diff --git a/stdlib/SparseArrays/src/abstractsparse.jl b/stdlib/SparseArrays/src/abstractsparse.jl
deleted file mode 100644
index 3886fa001c93ec..00000000000000
--- a/stdlib/SparseArrays/src/abstractsparse.jl
+++ /dev/null
@@ -1,123 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-"""
-    AbstractSparseArray{Tv,Ti,N}
-
-Supertype for `N`-dimensional sparse arrays (or array-like types) with elements
-of type `Tv` and index type `Ti`. [`SparseMatrixCSC`](@ref), [`SparseVector`](@ref)
-and `SuiteSparse.CHOLMOD.Sparse` are subtypes of this.
-"""
-abstract type AbstractSparseArray{Tv,Ti,N} <: AbstractArray{Tv,N} end
-
-"""
-    AbstractSparseVector{Tv,Ti}
-
-Supertype for one-dimensional sparse arrays (or array-like types) with elements
-of type `Tv` and index type `Ti`. Alias for `AbstractSparseArray{Tv,Ti,1}`.
-"""
-const AbstractSparseVector{Tv,Ti} = AbstractSparseArray{Tv,Ti,1}
-"""
-    AbstractSparseMatrix{Tv,Ti}
-
-Supertype for two-dimensional sparse arrays (or array-like types) with elements
-of type `Tv` and index type `Ti`. Alias for `AbstractSparseArray{Tv,Ti,2}`.
-"""
-const AbstractSparseMatrix{Tv,Ti} = AbstractSparseArray{Tv,Ti,2}
-
-"""
-    AbstractSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti}
-
-Supertype for matrix with compressed sparse column (CSC).
-"""
-abstract type AbstractSparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrix{Tv,Ti} end
-
-"""
-    issparse(S)
-
-Returns `true` if `S` is sparse, and `false` otherwise.
-
-# Examples
-```jldoctest
-julia> sv = sparsevec([1, 4], [2.3, 2.2], 10)
-10-element SparseVector{Float64, Int64} with 2 stored entries:
-  [1 ]  =  2.3
-  [4 ]  =  2.2
-
-julia> issparse(sv)
-true
-
-julia> issparse(Array(sv))
-false
-```
-"""
-issparse(A::AbstractArray) = false
-issparse(S::AbstractSparseArray) = true
-issparse(S::LinearAlgebra.Adjoint{<:Any,<:AbstractSparseArray}) = true
-issparse(S::LinearAlgebra.Transpose{<:Any,<:AbstractSparseArray}) = true
-
-issparse(S::LinearAlgebra.Symmetric{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.Hermitian{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.LowerTriangular{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.UnitLowerTriangular{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.UpperTriangular{<:Any,<:AbstractSparseMatrix}) = true
-issparse(S::LinearAlgebra.UnitUpperTriangular{<:Any,<:AbstractSparseMatrix}) = true
-
-indtype(S::AbstractSparseArray{<:Any,Ti}) where {Ti} = Ti
-
-function Base.reinterpret(::Type, A::AbstractSparseArray)
-    error("""
-          `reinterpret` on sparse arrays is discontinued.
-          Try reinterpreting the value itself instead.
-          """)
-end
-
-# The following two methods should be overloaded by concrete types to avoid
-# allocating the I = findall(...)
-_sparse_findnextnz(v::AbstractSparseArray, i) = (I = findall(!iszero, v); n = searchsortedfirst(I, i); n<=length(I) ? I[n] : nothing)
-_sparse_findprevnz(v::AbstractSparseArray, i) = (I = findall(!iszero, v); n = searchsortedlast(I, i);  !iszero(n)   ? I[n] : nothing)
-
-function findnext(f::Function, v::AbstractSparseArray, i)
-    # short-circuit the case f == !iszero because that avoids
-    # allocating e.g. zero(BigInt) for the f(zero(...)) test.
-    if nnz(v) == length(v) || (f != (!iszero) && f(zero(eltype(v))))
-        return invoke(findnext, Tuple{Function,Any,Any}, f, v, i)
-    end
-    j = _sparse_findnextnz(v, i)
-    while j !== nothing && !f(v[j])
-        j = _sparse_findnextnz(v, nextind(v, j))
-    end
-    return j
-end
-
-function findprev(f::Function, v::AbstractSparseArray, i)
-    # short-circuit the case f == !iszero because that avoids
-    # allocating e.g. zero(BigInt) for the f(zero(...)) test.
-    if nnz(v) == length(v) || (f != (!iszero) && f(zero(eltype(v))))
-        return invoke(findprev, Tuple{Function,Any,Any}, f, v, i)
-    end
-    j = _sparse_findprevnz(v, i)
-    while j !== nothing && !f(v[j])
-        j = _sparse_findprevnz(v, prevind(v, j))
-    end
-    return j
-end
-
-"""
-    findnz(A::SparseMatrixCSC)
-
-Return a tuple `(I, J, V)` where `I` and `J` are the row and column indices of the stored
-("structurally non-zero") values in sparse matrix `A`, and `V` is a vector of the values.
-
-# Examples
-```jldoctest
-julia> A = sparse([1 2 0; 0 0 3; 0 4 0])
-3×3 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
- 1  2  ⋅
- ⋅  ⋅  3
- ⋅  4  ⋅
-
-julia> findnz(A)
-([1, 1, 3, 2], [1, 2, 2, 3], [1, 2, 4, 3])
-```
-"""
-function findnz end
diff --git a/stdlib/SparseArrays/src/deprecated.jl b/stdlib/SparseArrays/src/deprecated.jl
deleted file mode 100644
index db9cc06564214c..00000000000000
--- a/stdlib/SparseArrays/src/deprecated.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# BEGIN 1.0 deprecations
-
-# END 1.0 deprecations
diff --git a/stdlib/SparseArrays/src/higherorderfns.jl b/stdlib/SparseArrays/src/higherorderfns.jl
deleted file mode 100644
index e01be3697097cc..00000000000000
--- a/stdlib/SparseArrays/src/higherorderfns.jl
+++ /dev/null
@@ -1,1157 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module HigherOrderFns
-
-# This module provides higher order functions specialized for sparse arrays,
-# particularly map[!]/broadcast[!] for SparseVectors and SparseMatrixCSCs at present.
-import Base: map, map!, broadcast, copy, copyto!
-
-using Base: front, tail, to_shape
-using ..SparseArrays: SparseVector, SparseMatrixCSC, AbstractSparseVector, AbstractSparseMatrixCSC,
-                      AbstractSparseMatrix, AbstractSparseArray, indtype, nnz, nzrange,
-                      SparseVectorUnion, AdjOrTransSparseVectorUnion, nonzeroinds, nonzeros, rowvals, getcolptr
-using Base.Broadcast: BroadcastStyle, Broadcasted, flatten
-using LinearAlgebra
-
-# This module is organized as follows:
-# (0) Define BroadcastStyle rules and convenience types for dispatch
-# (1) Define a common interface to SparseVectors and SparseMatrixCSCs sufficient for
-#       map[!]/broadcast[!]'s purposes. The methods below are written against this interface.
-# (2) Define entry points for map[!] (short children of _map_[not]zeropres!).
-# (3) Define entry points for broadcast[!] (short children of _broadcast_[not]zeropres!).
-# (4) Define _map_[not]zeropres! specialized for a single (input) sparse vector/matrix.
-# (5) Define _map_[not]zeropres! specialized for a pair of (input) sparse vectors/matrices.
-# (6) Define general _map_[not]zeropres! capable of handling >2 (input) sparse vectors/matrices.
-# (7) Define _broadcast_[not]zeropres! specialized for a single (input) sparse vector/matrix.
-# (8) Define _broadcast_[not]zeropres! specialized for a pair of (input) sparse vectors/matrices.
-# (9) Define general _broadcast_[not]zeropres! capable of handling >2 (input) sparse vectors/matrices.
-# (10) Define broadcast methods handling combinations of broadcast scalars and sparse vectors/matrices.
-# (11) Define broadcast[!] methods handling combinations of scalars, sparse vectors/matrices,
-#       structured matrices, and one- and two-dimensional Arrays.
-# (12) Define map[!] methods handling combinations of sparse and structured matrices.
-
-
-# (0) BroadcastStyle rules and convenience types for dispatch
-
-SparseVecOrMat = Union{SparseVector,AbstractSparseMatrixCSC}
-
-# broadcast container type promotion for combinations of sparse arrays and other types
-struct SparseVecStyle <: Broadcast.AbstractArrayStyle{1} end
-struct SparseMatStyle <: Broadcast.AbstractArrayStyle{2} end
-Broadcast.BroadcastStyle(::Type{<:SparseVector}) = SparseVecStyle()
-Broadcast.BroadcastStyle(::Type{<:AbstractSparseMatrixCSC}) = SparseMatStyle()
-const SPVM = Union{SparseVecStyle,SparseMatStyle}
-
-# SparseVecStyle handles 0-1 dimensions, SparseMatStyle 0-2 dimensions.
-# SparseVecStyle promotes to SparseMatStyle for 2 dimensions.
-# Fall back to DefaultArrayStyle for higher dimensionality.
-SparseVecStyle(::Val{0}) = SparseVecStyle()
-SparseVecStyle(::Val{1}) = SparseVecStyle()
-SparseVecStyle(::Val{2}) = SparseMatStyle()
-SparseVecStyle(::Val{N}) where N = Broadcast.DefaultArrayStyle{N}()
-SparseMatStyle(::Val{0}) = SparseMatStyle()
-SparseMatStyle(::Val{1}) = SparseMatStyle()
-SparseMatStyle(::Val{2}) = SparseMatStyle()
-SparseMatStyle(::Val{N}) where N = Broadcast.DefaultArrayStyle{N}()
-
-Broadcast.BroadcastStyle(::SparseMatStyle, ::SparseVecStyle) = SparseMatStyle()
-
-# Tuples promote to dense
-Broadcast.BroadcastStyle(::SparseVecStyle, ::Broadcast.Style{Tuple}) = Broadcast.DefaultArrayStyle{1}()
-Broadcast.BroadcastStyle(::SparseMatStyle, ::Broadcast.Style{Tuple}) = Broadcast.DefaultArrayStyle{2}()
-
-struct PromoteToSparse <: Broadcast.AbstractArrayStyle{2} end
-PromoteToSparse(::Val{0}) = PromoteToSparse()
-PromoteToSparse(::Val{1}) = PromoteToSparse()
-PromoteToSparse(::Val{2}) = PromoteToSparse()
-PromoteToSparse(::Val{N}) where N = Broadcast.DefaultArrayStyle{N}()
-
-const StructuredMatrix = Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal}
-Broadcast.BroadcastStyle(::Type{<:Adjoint{T,<:Union{SparseVector,SparseMatrixCSC}} where T}) = PromoteToSparse()
-Broadcast.BroadcastStyle(::Type{<:Transpose{T,<:Union{SparseVector,SparseMatrixCSC}} where T}) = PromoteToSparse()
-
-Broadcast.BroadcastStyle(s::SPVM, ::Broadcast.AbstractArrayStyle{0}) = s
-Broadcast.BroadcastStyle(s::SPVM, ::Broadcast.DefaultArrayStyle{0}) = s
-Broadcast.BroadcastStyle(::SPVM, ::Broadcast.DefaultArrayStyle{1}) = PromoteToSparse()
-Broadcast.BroadcastStyle(::SPVM, ::Broadcast.DefaultArrayStyle{2}) = PromoteToSparse()
-
-Broadcast.BroadcastStyle(::SPVM, ::LinearAlgebra.StructuredMatrixStyle{<:StructuredMatrix}) = PromoteToSparse()
-Broadcast.BroadcastStyle(::PromoteToSparse, ::LinearAlgebra.StructuredMatrixStyle{<:StructuredMatrix}) = PromoteToSparse()
-
-Broadcast.BroadcastStyle(::PromoteToSparse, ::SPVM) = PromoteToSparse()
-Broadcast.BroadcastStyle(::PromoteToSparse, ::Broadcast.Style{Tuple}) = Broadcast.DefaultArrayStyle{2}()
-
-# FIXME: currently sparse broadcasts are only well-tested on known array types, while any AbstractArray
-# could report itself as a DefaultArrayStyle().
-# See https://github.com/JuliaLang/julia/pull/23939#pullrequestreview-72075382 for more details
-is_supported_sparse_broadcast() = true
-is_supported_sparse_broadcast(::AbstractArray, rest...) = false
-is_supported_sparse_broadcast(::AbstractSparseArray, rest...) = is_supported_sparse_broadcast(rest...)
-is_supported_sparse_broadcast(::StructuredMatrix, rest...) = is_supported_sparse_broadcast(rest...)
-is_supported_sparse_broadcast(::Array, rest...) = is_supported_sparse_broadcast(rest...)
-is_supported_sparse_broadcast(t::Union{Transpose, Adjoint}, rest...) = is_supported_sparse_broadcast(t.parent, rest...)
-is_supported_sparse_broadcast(x, rest...) = axes(x) === () && is_supported_sparse_broadcast(rest...)
-is_supported_sparse_broadcast(x::Ref, rest...) = is_supported_sparse_broadcast(rest...)
-
-can_skip_sparsification(f, rest...) = false
-can_skip_sparsification(::typeof(*), ::SparseVectorUnion, ::AdjOrTransSparseVectorUnion) = true
-
-# Dispatch on broadcast operations by number of arguments
-const Broadcasted0{Style<:Union{Nothing,BroadcastStyle},Axes,F} =
-    Broadcasted{Style,Axes,F,Tuple{}}
-const SpBroadcasted1{Style<:SPVM,Axes,F,Args<:Tuple{SparseVecOrMat}} =
-    Broadcasted{Style,Axes,F,Args}
-const SpBroadcasted2{Style<:SPVM,Axes,F,Args<:Tuple{SparseVecOrMat,SparseVecOrMat}} =
-    Broadcasted{Style,Axes,F,Args}
-
-# (1) The definitions below provide a common interface to sparse vectors and matrices
-# sufficient for the purposes of map[!]/broadcast[!]. This interface treats sparse vectors
-# as n-by-one sparse matrices which, though technically incorrect, is how broacast[!] views
-# sparse vectors in practice.
-@inline numrows(A::SparseVector) = length(A)
-@inline numrows(A::AbstractSparseMatrixCSC) = size(A, 1)
-@inline numcols(A::SparseVector) = 1
-@inline numcols(A::AbstractSparseMatrixCSC) = size(A, 2)
-# numrows and numcols respectively yield size(A, 1) and size(A, 2), but avoid a branch
-@inline columns(A::SparseVector) = 1
-@inline columns(A::AbstractSparseMatrixCSC) = 1:size(A, 2)
-@inline colrange(A::SparseVector, j) = 1:length(nonzeroinds(A))
-@inline colrange(A::AbstractSparseMatrixCSC, j) = nzrange(A, j)
-@inline colstartind(A::SparseVector, j) = one(indtype(A))
-@inline colboundind(A::SparseVector, j) = convert(indtype(A), length(nonzeroinds(A)) + 1)
-@inline colstartind(A::AbstractSparseMatrixCSC, j) = getcolptr(A)[j]
-@inline colboundind(A::AbstractSparseMatrixCSC, j) = getcolptr(A)[j + 1]
-@inline storedinds(A::SparseVector) = nonzeroinds(A)
-@inline storedinds(A::AbstractSparseMatrixCSC) = rowvals(A)
-@inline storedvals(A::SparseVecOrMat) = nonzeros(A)
-@inline setcolptr!(A::SparseVector, j, val) = val
-@inline setcolptr!(A::AbstractSparseMatrixCSC, j, val) = getcolptr(A)[j] = val
-function trimstorage!(A::SparseVecOrMat, maxstored)
-    resize!(storedinds(A), maxstored)
-    resize!(storedvals(A), maxstored)
-    return maxstored
-end
-function expandstorage!(A::SparseVecOrMat, maxstored)
-    length(storedinds(A)) < maxstored && resize!(storedinds(A), maxstored)
-    length(storedvals(A)) < maxstored && resize!(storedvals(A), maxstored)
-    return maxstored
-end
-
-
-# (2) map[!] entry points
-map(f::Tf, A::SparseVector) where {Tf} = _noshapecheck_map(f, A)
-map(f::Tf, A::AbstractSparseMatrixCSC) where {Tf} = _noshapecheck_map(f, A)
-map(f::Tf, A::AbstractSparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) where {Tf,N} =
-    (_checksameshape(A, Bs...); _noshapecheck_map(f, A, Bs...))
-map(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} =
-    (_checksameshape(A, Bs...); _noshapecheck_map(f, A, Bs...))
-map!(f::Tf, C::AbstractSparseMatrixCSC, A::AbstractSparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) where {Tf,N} =
-    (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, A, Bs...))
-map!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N} =
-    (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, A, Bs...))
-function _noshapecheck_map!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = _iszero(fofzeros)
-    return fpreszeros ? _map_zeropres!(f, C, A, Bs...) :
-                        _map_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-function _noshapecheck_map(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = _iszero(fofzeros)
-    maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A)
-    entrytypeC = Base.Broadcast.combine_eltypes(f, (A, Bs...))
-    indextypeC = _promote_indtype(A, Bs...)
-    C = _allocres(size(A), indextypeC, entrytypeC, maxnnzC)
-    return fpreszeros ? _map_zeropres!(f, C, A, Bs...) :
-                        _map_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-# (3) broadcast[!] entry points
-copy(bc::SpBroadcasted1) = _noshapecheck_map(bc.f, bc.args[1])
-
-@inline function copyto!(C::SparseVecOrMat, bc::Broadcasted0{Nothing})
-    isempty(C) && return _finishempty!(C)
-    f = bc.f
-    fofnoargs = f()
-    if _iszero(fofnoargs) # f() is zero, so empty C
-        trimstorage!(C, 0)
-        _finishempty!(C)
-    else # f() is nonzero, so densify C and fill with independent calls to f()
-        _densestructure!(C)
-        storedvals(C)[1] = fofnoargs
-        broadcast!(f, view(storedvals(C), 2:length(storedvals(C))))
-    end
-    return C
-end
-
-function _diffshape_broadcast(f::Tf, A::SparseVecOrMat, Bs::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    fofzeros = f(_zeros_eltypes(A, Bs...)...)
-    fpreszeros = _iszero(fofzeros)
-    indextypeC = _promote_indtype(A, Bs...)
-    entrytypeC = Base.Broadcast.combine_eltypes(f, (A, Bs...))
-    shapeC = to_shape(Base.Broadcast.combine_axes(A, Bs...))
-    maxnnzC = fpreszeros ? _checked_maxnnzbcres(shapeC, A, Bs...) : _densennz(shapeC)
-    C = _allocres(shapeC, indextypeC, entrytypeC, maxnnzC)
-    return fpreszeros ? _broadcast_zeropres!(f, C, A, Bs...) :
-                        _broadcast_notzeropres!(f, fofzeros, C, A, Bs...)
-end
-# helper functions for map[!]/broadcast[!] entry points (and related methods below)
-@inline _sumnnzs(A) = nnz(A)
-@inline _sumnnzs(A, Bs...) = nnz(A) + _sumnnzs(Bs...)
-@inline _iszero(x) = x == 0
-@inline _iszero(x::Number) = Base.iszero(x)
-@inline _iszero(x::AbstractArray) = Base.iszero(x)
-@inline _zeros_eltypes(A) = (zero(eltype(A)),)
-@inline _zeros_eltypes(A, Bs...) = (zero(eltype(A)), _zeros_eltypes(Bs...)...)
-@inline _promote_indtype(A) = indtype(A)
-@inline _promote_indtype(A, Bs...) = promote_type(indtype(A), _promote_indtype(Bs...))
-@inline _aresameshape(A) = true
-@inline _aresameshape(A, B) = size(A) == size(B)
-@inline _aresameshape(A, B, Cs...) = _aresameshape(A, B) ? _aresameshape(B, Cs...) : false
-@inline _checksameshape(As...) = _aresameshape(As...) || throw(DimensionMismatch("argument shapes must match"))
-@inline _all_args_isa(t::Tuple{Any}, ::Type{T}) where T = isa(t[1], T)
-@inline _all_args_isa(t::Tuple{Any,Vararg{Any}}, ::Type{T}) where T = isa(t[1], T) & _all_args_isa(tail(t), T)
-@inline _all_args_isa(t::Tuple{Broadcasted}, ::Type{T}) where T = _all_args_isa(t[1].args, T)
-@inline _all_args_isa(t::Tuple{Broadcasted,Vararg{Any}}, ::Type{T}) where T = _all_args_isa(t[1].args, T) & _all_args_isa(tail(t), T)
-@inline _densennz(shape::NTuple{1}) = shape[1]
-@inline _densennz(shape::NTuple{2}) = shape[1] * shape[2]
-_maxnnzfrom(shape::NTuple{1}, A::SparseVector) = nnz(A) * div(shape[1], length(A))
-_maxnnzfrom(shape::NTuple{2}, A::SparseVector) = nnz(A) * div(shape[1], length(A)) * shape[2]
-_maxnnzfrom(shape::NTuple{2}, A::AbstractSparseMatrixCSC) = nnz(A) * div(shape[1], size(A, 1)) * div(shape[2], size(A, 2))
-@inline _maxnnzfrom_each(shape, ::Tuple{}) = ()
-@inline _maxnnzfrom_each(shape, As) = (_maxnnzfrom(shape, first(As)), _maxnnzfrom_each(shape, tail(As))...)
-@inline _unchecked_maxnnzbcres(shape, As::Tuple) = min(_densennz(shape), sum(_maxnnzfrom_each(shape, As)))
-@inline _unchecked_maxnnzbcres(shape, As...) = _unchecked_maxnnzbcres(shape, As)
-@inline _checked_maxnnzbcres(shape::NTuple{1}, As...) = shape[1] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
-@inline _checked_maxnnzbcres(shape::NTuple{2}, As...) = shape[1] != 0 && shape[2] != 0 ? _unchecked_maxnnzbcres(shape, As) : 0
-@inline function _allocres(shape::NTuple{1}, indextype, entrytype, maxnnz)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseVector(shape..., storedinds, storedvals)
-end
-@inline function _allocres(shape::NTuple{2}, indextype, entrytype, maxnnz)
-    pointers = ones(indextype, shape[2] + 1)
-    storedinds = Vector{indextype}(undef, maxnnz)
-    storedvals = Vector{entrytype}(undef, maxnnz)
-    return SparseMatrixCSC(shape..., pointers, storedinds, storedvals)
-end
-
-# (4) _map_zeropres!/_map_notzeropres! specialized for a single sparse vector/matrix
-"Stores only the nonzero entries of `map(f, Array(A))` in `C`."
-function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
-    Ck = 1
-    @inbounds for j in columns(C)
-        setcolptr!(C, j, Ck)
-        for Ak in colrange(A, j)
-            Cx = f(storedvals(A)[Ak])
-            if !_iszero(Cx)
-                Ck > spaceC && (spaceC = expandstorage!(C, Ck + nnz(A) - (Ak - 1)))
-                storedinds(C)[Ck] = storedinds(A)[Ak]
-                storedvals(C)[Ck] = Cx
-                Ck += 1
-            end
-        end
-    end
-    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
-    trimstorage!(C, Ck - 1)
-    return C
-end
-"""
-Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
-`f(A[i])`/`f(A[i,j])` in place of each stored entry `A[i]`/`A[i,j]` in `A`.
-"""
-function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(storedvals(C), fillvalue)
-    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-        for Ak in colrange(A, j)
-            Cx = f(storedvals(A)[Ak])
-            Cx != fillvalue && (storedvals(C)[jo + storedinds(A)[Ak]] = Cx)
-        end
-    end
-    # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
-    # nonsequential access of storedvals(C) does not appear to improve performance.
-    return C
-end
-# helper functions for these methods and some of those below
-@inline _densecoloffsets(A::SparseVector) = 0
-@inline _densecoloffsets(A::AbstractSparseMatrixCSC) = 0:size(A, 1):(size(A, 1)*(size(A, 2) - 1))
-function _densestructure!(A::SparseVector)
-    expandstorage!(A, length(A))
-    copyto!(nonzeroinds(A), 1:length(A))
-    return A
-end
-function _densestructure!(A::AbstractSparseMatrixCSC)
-    nnzA = size(A, 1) * size(A, 2)
-    expandstorage!(A, nnzA)
-    copyto!(getcolptr(A), 1:size(A, 1):(nnzA + 1))
-    for k in _densecoloffsets(A)
-        copyto!(rowvals(A), k + 1, 1:size(A, 1))
-    end
-    return A
-end
-
-
-# (5) _map_zeropres!/_map_notzeropres! specialized for a pair of sparse vectors/matrices
-function _map_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
-    rowsentinelA = convert(indtype(A), numrows(C) + 1)
-    rowsentinelB = convert(indtype(B), numrows(C) + 1)
-    Ck = 1
-    @inbounds for j in columns(C)
-        setcolptr!(C, j, Ck)
-        Ak, stopAk = colstartind(A, j), colboundind(A, j)
-        Bk, stopBk = colstartind(B, j), colboundind(B, j)
-        Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-        Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-        while true
-            if Ai == Bi
-                Ai == rowsentinelA && break # column complete
-                Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
-                Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-            elseif Ai < Bi
-                Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
-                Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-            else # Bi < Ai
-                Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
-                Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-            end
-            # NOTE: The ordering of the conditional chain above impacts which matrices this
-            # method performs best for. In the map situation (arguments have same shape, and
-            # likely same or similar stored entry pattern), the Ai == Bi and termination
-            # cases are equally or more likely than the Ai < Bi and Bi < Ai cases. Hence
-            # the ordering of the conditional chain above differs from that in the
-            # corresponding broadcast code (below).
-            if !_iszero(Cx)
-                Ck > spaceC && (spaceC = expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1))))
-                storedinds(C)[Ck] = Ci
-                storedvals(C)[Ck] = Cx
-                Ck += 1
-            end
-        end
-    end
-    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
-    trimstorage!(C, Ck - 1)
-    return C
-end
-function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(storedvals(C), fillvalue)
-    # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
-    # nonsequential access of storedvals(C) does not appear to improve performance.
-    rowsentinelA = convert(indtype(A), numrows(A) + 1)
-    rowsentinelB = convert(indtype(B), numrows(B) + 1)
-    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-        Ak, stopAk = colstartind(A, j), colboundind(A, j)
-        Bk, stopBk = colstartind(B, j), colboundind(B, j)
-        Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-        Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-        while true
-            if Ai == Bi
-                Ai == rowsentinelA && break # column complete
-                Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
-                Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-            elseif Ai < Bi
-                Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
-                Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-            else # Bi < Ai
-                Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
-                Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-            end
-            Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
-        end
-    end
-    return C
-end
-
-
-# (6) _map_zeropres!/_map_notzeropres! for more than two sparse matrices / vectors
-function _map_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
-    rowsentinel = numrows(C) + 1
-    Ck = 1
-    stopks = _colstartind_all(1, As)
-    @inbounds for j in columns(C)
-        setcolptr!(C, j, Ck)
-        ks = stopks
-        stopks = _colboundind_all(j, As)
-        rows = _rowforind_all(rowsentinel, ks, stopks, As)
-        activerow = min(rows...)
-        while activerow < rowsentinel
-            vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
-            Cx = f(vals...)
-            if !_iszero(Cx)
-                Ck > spaceC && (spaceC = expandstorage!(C, min(length(C), Ck + _sumnnzs(As...) - (sum(ks) - N))))
-                storedinds(C)[Ck] = activerow
-                storedvals(C)[Ck] = Cx
-                Ck += 1
-            end
-            activerow = min(rows...)
-        end
-    end
-    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
-    trimstorage!(C, Ck - 1)
-    return C
-end
-function _map_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(storedvals(C), fillvalue)
-    # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
-    # nonsequential access of nonzeros(C) does not appear to improve performance.
-    rowsentinel = numrows(C) + 1
-    stopks = _colstartind_all(1, As)
-    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-        ks = stopks
-        stopks = _colboundind_all(j, As)
-        rows = _rowforind_all(rowsentinel, ks, stopks, As)
-        activerow = min(rows...)
-        while activerow < rowsentinel
-            vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
-            Cx = f(vals...)
-            Cx != fillvalue && (storedvals(C)[jo + activerow] = Cx)
-            activerow = min(rows...)
-        end
-    end
-    return C
-end
-
-# helper methods for map/map! methods just above
-@inline _colstartind(j, A) = colstartind(A, j)
-@inline _colstartind_all(j, ::Tuple{}) = ()
-@inline _colstartind_all(j, As) = (
-    _colstartind(j, first(As)),
-    _colstartind_all(j, tail(As))...)
-@inline _colboundind(j, A) = colboundind(A, j)
-@inline _colboundind_all(j, ::Tuple{}) = ()
-@inline _colboundind_all(j, As) = (
-    _colboundind(j, first(As)),
-    _colboundind_all(j, tail(As))...)
-@inline _rowforind(rowsentinel, k, stopk, A) =
-    k < stopk ? storedinds(A)[k] : convert(indtype(A), rowsentinel)
-@inline _rowforind_all(rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-@inline _rowforind_all(rowsentinel, ks, stopks, As) = (
-    _rowforind(rowsentinel, first(ks), first(stopks), first(As)),
-    _rowforind_all(rowsentinel, tail(ks), tail(stopks), tail(As))...)
-
-@inline function _fusedupdate(rowsentinel, activerow, row, k, stopk, A)
-    # returns (val, nextk, nextrow)
-    if row == activerow
-        nextk = k + oneunit(k)
-        (storedvals(A)[k], nextk, (nextk < stopk ? storedinds(A)[nextk] : oftype(row, rowsentinel)))
-    else
-        (zero(eltype(A)), k, row)
-    end
-end
-@inline _fusedupdate_all(rowsentinel, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ((#=vals=#), (#=nextks=#), (#=nextrows=#))
-@inline function _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As)
-    val, nextk, nextrow = _fusedupdate(rowsentinel, activerow, first(rows), first(ks), first(stopks), first(As))
-    vals, nextks, nextrows = _fusedupdate_all(rowsentinel, activerow, tail(rows), tail(ks), tail(stopks), tail(As))
-    return ((val, vals...), (nextk, nextks...), (nextrow, nextrows...))
-end
-
-
-# (7) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a single (input) sparse vector/matrix
-function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
-    # C and A cannot have the same shape, as we directed that case to map in broadcast's
-    # entry point; here we need efficiently handle only heterogeneous C-A combinations where
-    # one or both of C and A has at least one singleton dimension.
-    #
-    # We first divide the cases into two groups: those in which the input argument does not
-    # expand vertically, and those in which the input argument expands vertically.
-    #
-    # Cases without vertical expansion
-    Ck = 1
-    if numrows(A) == numrows(C)
-        @inbounds for j in columns(C)
-            setcolptr!(C, j, Ck)
-            bccolrangejA = numcols(A) == 1 ? colrange(A, 1) : colrange(A, j)
-            for Ak in bccolrangejA
-                Cx = f(storedvals(A)[Ak])
-                if !_iszero(Cx)
-                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A)))
-                    storedinds(C)[Ck] = storedinds(A)[Ak]
-                    storedvals(C)[Ck] = Cx
-                    Ck += 1
-                end
-            end
-        end
-    # Cases with vertical expansion
-    else # numrows(A) != numrows(C) (=> numrows(A) == 1)
-        @inbounds for j in columns(C)
-            setcolptr!(C, j, Ck)
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
-            fofAx = f(Ax)
-            # if fofAx is zero, then either A's jth column is empty, or A's jth column
-            # contains a nonzero value x but f(Ax) is nonetheless zero, so we need store
-            # nothing in C's jth column. if to the contrary fofAx is nonzero, then we must
-            # densely populate C's jth column with fofAx.
-            if !_iszero(fofAx)
-                for Ci::indtype(C) in 1:numrows(C)
-                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A)))
-                    storedinds(C)[Ck] = Ci
-                    storedvals(C)[Ck] = fofAx
-                    Ck += 1
-                end
-            end
-        end
-    end
-    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
-    trimstorage!(C, Ck - 1)
-    return C
-end
-function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat) where Tf
-    # For information on this code, see comments in similar code in _broadcast_zeropres! above
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(storedvals(C), fillvalue)
-    # Cases without vertical expansion
-    if numrows(A) == numrows(C)
-        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-            bccolrangejA = numcols(A) == 1 ? colrange(A, 1) : colrange(A, j)
-            for Ak in bccolrangejA
-                Cx, Ci = f(storedvals(A)[Ak]), storedinds(A)[Ak]
-                Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
-            end
-        end
-    # Cases with vertical expansion
-    else # numrows(A) != numrows(C) (=> numrows(A) == 1)
-        svA, svC = storedvals(A), storedvals(C)
-        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Ax = Ak < stopAk ? svA[Ak] : zero(eltype(A))
-            fofAx = f(Ax)
-            if fofAx != fillvalue
-                for i in (jo + 1):(jo + numrows(C))
-                    svC[i] = fofAx
-                end
-            end
-        end
-    end
-    return C
-end
-
-
-# (8) _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse vectors/matrices
-function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
-    rowsentinelA = convert(indtype(A), numrows(C) + 1)
-    rowsentinelB = convert(indtype(B), numrows(C) + 1)
-    # C, A, and B cannot all have the same shape, as we directed that case to map in broadcast's
-    # entry point; here we need efficiently handle only heterogeneous combinations of mats/vecs
-    # with no singleton dimensions, one singleton dimension, and two singleton dimensions.
-    # Cases involving objects with two singleton dimensions should be rare and optimizing
-    # that case complicates the code appreciably, so we largely ignore that case's
-    # performance below.
-    #
-    # We first divide the cases into two groups: those in which neither input argument
-    # expands vertically, and those in which at least one argument expands vertically.
-    #
-    # NOTE: Placing the loops over columns outside the conditional chain segregating
-    # argument shape combinations eliminates some code replication but unfortunately
-    # hurts performance appreciably in some cases.
-    #
-    # Cases without vertical expansion
-    Ck = 1
-    if numrows(A) == numrows(B) == numrows(C)
-        @inbounds for j in columns(C)
-            setcolptr!(C, j, Ck)
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            # Restructuring this k/stopk code to avoid unnecessary colptr retrievals does
-            # not improve performance signicantly. Leave in this less complex form.
-            Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-            Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-            while true
-                if Ai != Bi
-                    if Ai < Bi
-                        Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
-                        Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                    else # Ai > Bi
-                        Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
-                        Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                    end
-                elseif #= Ai == Bi && =# Ai == rowsentinelA
-                    break # column complete
-                else #= Ai == Bi != rowsentinel =#
-                    Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
-                    Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                    Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                end
-                # NOTE: The ordering of the conditional chain above impacts which matrices
-                # this method perform best for. In contrast to the map situation (arguments
-                # have same shape, and likely same or similar stored entry pattern), where
-                # the Ai == Bi and termination cases are equally or more likely than the
-                # Ai < Bi and Bi < Ai cases, in the broadcast situation (arguments have
-                # different shape, and likely largely disjoint expanded stored entry
-                # pattern) the Ai < Bi and Bi < Ai cases are equally or more likely than the
-                # Ai == Bi and termination cases. Hence the ordering of the conditional
-                # chain above differs from that in the corresponding map code.
-                if !_iszero(Cx)
-                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
-                    storedinds(C)[Ck] = Ci
-                    storedvals(C)[Ck] = Cx
-                    Ck += 1
-                end
-            end
-        end
-    # Cases with vertical expansion
-    elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B
-        @inbounds for j in columns(C)
-            setcolptr!(C, j, Ck)
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
-            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
-            Cx = f(Ax, Bx)
-            if !_iszero(Cx)
-                for Ci::indtype(C) in 1:numrows(C)
-                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
-                    storedinds(C)[Ck] = Ci
-                    storedvals(C)[Ck] = Cx
-                    Ck += 1
-                end
-            end
-        end
-    elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1 , vertically expand only A
-        @inbounds for j in columns(C)
-            setcolptr!(C, j, Ck)
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
-            fvAzB = f(Ax, zero(eltype(B)))
-            if _iszero(fvAzB)
-                # either A's jth column is empty, or A's jth column contains a nonzero value
-                # Ax but f(Ax, zero(eltype(B))) is nonetheless zero, so we can scan through
-                # B's jth column without storing every entry in C's jth column
-                while Bk < stopBk
-                    Cx = f(Ax, storedvals(B)[Bk])
-                    if !_iszero(Cx)
-                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
-                        storedinds(C)[Ck] = storedinds(B)[Bk]
-                        storedvals(C)[Ck] = Cx
-                        Ck += 1
-                    end
-                    Bk += oneunit(Bk)
-                end
-            else
-                # A's jth column is nonempty and f(Ax, zero(eltype(B))) is not zero, so
-                # we must store (likely) every entry in C's jth column
-                Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                for Ci::indtype(C) in 1:numrows(C)
-                    if Bi == Ci
-                        Cx = f(Ax, storedvals(B)[Bk])
-                        Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                    else
-                        Cx = fvAzB
-                    end
-                    if !_iszero(Cx)
-                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
-                        storedinds(C)[Ck] = Ci
-                        storedvals(C)[Ck] = Cx
-                        Ck += 1
-                    end
-                end
-            end
-        end
-    else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B
-        @inbounds for j in columns(C)
-            setcolptr!(C, j, Ck)
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
-            fzAvB = f(zero(eltype(A)), Bx)
-            if _iszero(fzAvB)
-                # either B's jth column is empty, or B's jth column contains a nonzero value
-                # Bx but f(zero(eltype(A)), Bx) is nonetheless zero, so we can scan through
-                # A's jth column without storing every entry in C's jth column
-                while Ak < stopAk
-                    Cx = f(storedvals(A)[Ak], Bx)
-                    if !_iszero(Cx)
-                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
-                        storedinds(C)[Ck] = storedinds(A)[Ak]
-                        storedvals(C)[Ck] = Cx
-                        Ck += 1
-                    end
-                    Ak += oneunit(Ak)
-                end
-            else
-                # B's jth column is nonempty and f(zero(eltype(A)), Bx) is not zero, so
-                # we must store (likely) every entry in C's jth column
-                Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                for Ci::indtype(C) in 1:numrows(C)
-                    if Ai == Ci
-                        Cx = f(storedvals(A)[Ak], Bx)
-                        Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                    else
-                        Cx = fzAvB
-                    end
-                    if !_iszero(Cx)
-                        Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), A, B)))
-                        storedinds(C)[Ck] = Ci
-                        storedvals(C)[Ck] = Cx
-                        Ck += 1
-                    end
-                end
-            end
-        end
-    end
-    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
-    trimstorage!(C, Ck - 1)
-    return C
-end
-function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, A::SparseVecOrMat, B::SparseVecOrMat) where Tf
-    # For information on this code, see comments in similar code in _broadcast_zeropres! above
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(storedvals(C), fillvalue)
-    rowsentinelA = convert(indtype(A), numrows(C) + 1)
-    rowsentinelB = convert(indtype(B), numrows(C) + 1)
-    # Cases without vertical expansion
-    if numrows(A) == numrows(B) == numrows(C)
-        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-            Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-            while true
-                if Ai < Bi
-                    Cx, Ci = f(storedvals(A)[Ak], zero(eltype(B))), Ai
-                    Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                elseif Ai > Bi
-                    Cx, Ci = f(zero(eltype(A)), storedvals(B)[Bk]), Bi
-                    Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                elseif #= Ai == Bi && =# Ai == rowsentinelA
-                    break # column complete
-                else #= Ai == Bi != rowsentinel =#
-                    Cx, Ci::indtype(C) = f(storedvals(A)[Ak], storedvals(B)[Bk]), Ai
-                    Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                    Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                end
-                Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
-            end
-        end
-    # Cases with vertical expansion
-    elseif numrows(A) == numrows(B) == 1 # && numrows(C) != 1, vertically expand both A and B
-        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
-            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
-            Cx = f(Ax, Bx)
-            if Cx != fillvalue
-                for Ck::Int in (jo + 1):(jo + numrows(C))
-                    storedvals(C)[Ck] = Cx
-                end
-            end
-        end
-    elseif numrows(A) == 1 # && numrows(B) == numrows(C) != 1, vertically expand only A
-        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Ax = Ak < stopAk ? storedvals(A)[Ak] : zero(eltype(A))
-            fvAzB = f(Ax, zero(eltype(B)))
-            if fvAzB == fillvalue
-                while Bk < stopBk
-                    Cx = f(Ax, storedvals(B)[Bk])
-                    Cx != fillvalue && (storedvals(C)[jo + storedinds(B)[Bk]] = Cx)
-                    Bk += oneunit(Bk)
-                end
-            else
-                Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                for Ci::indtype(C) in 1:numrows(C)
-                    if Bi == Ci
-                        Cx = f(Ax, storedvals(B)[Bk])
-                        Bk += oneunit(Bk); Bi = Bk < stopBk ? storedinds(B)[Bk] : rowsentinelB
-                    else
-                        Cx = fvAzB
-                    end
-                    Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
-                end
-            end
-        end
-    else # numrows(B) == 1 && numrows(A) == numrows(C) != 1, vertically expand only B
-        @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-            Ak, stopAk = numcols(A) == 1 ? (colstartind(A, 1), colboundind(A, 1)) : (colstartind(A, j), colboundind(A, j))
-            Bk, stopBk = numcols(B) == 1 ? (colstartind(B, 1), colboundind(B, 1)) : (colstartind(B, j), colboundind(B, j))
-            Bx = Bk < stopBk ? storedvals(B)[Bk] : zero(eltype(B))
-            fzAvB = f(zero(eltype(A)), Bx)
-            if fzAvB == fillvalue
-                while Ak < stopAk
-                    Cx = f(storedvals(A)[Ak], Bx)
-                    Cx != fillvalue && (storedvals(C)[jo + storedinds(A)[Ak]] = Cx)
-                    Ak += oneunit(Ak)
-                end
-            else
-                Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                for Ci::indtype(C) in 1:numrows(C)
-                    if Ai == Ci
-                        Cx = f(storedvals(A)[Ak], Bx)
-                        Ak += oneunit(Ak); Ai = Ak < stopAk ? storedinds(A)[Ak] : rowsentinelA
-                    else
-                        Cx = fzAvB
-                    end
-                    Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
-                end
-            end
-        end
-    end
-    return C
-end
-_finishempty!(C::SparseVector) = C
-_finishempty!(C::AbstractSparseMatrixCSC) = (fill!(getcolptr(C), 1); C)
-
-# special case - vector outer product
-_copy(f::typeof(*), x::SparseVectorUnion, y::AdjOrTransSparseVectorUnion) = _outer(x, y)
-@inline _outer(x::SparseVectorUnion, y::Adjoint) = return _outer(conj, x, parent(y))
-@inline _outer(x::SparseVectorUnion, y::Transpose) = return _outer(identity, x, parent(y))
-function _outer(trans::Tf, x, y) where Tf
-    nx = length(x)
-    ny = length(y)
-    rowvalx = nonzeroinds(x)
-    rowvaly = nonzeroinds(y)
-    nzvalsx = nonzeros(x)
-    nzvalsy = nonzeros(y)
-    nnzx = length(nzvalsx)
-    nnzy = length(nzvalsy)
-
-    nnzC = nnzx * nnzy
-    Tv = typeof(oneunit(eltype(x)) * oneunit(eltype(y)))
-    Ti = promote_type(indtype(x), indtype(y))
-    colptrC = zeros(Ti, ny + 1)
-    rowvalC = Vector{Ti}(undef, nnzC)
-    nzvalsC = Vector{Tv}(undef, nnzC)
-
-    idx = 0
-    @inbounds colptrC[1] = 1
-    @inbounds for jj = 1:nnzy
-        yval = nzvalsy[jj]
-        iszero(yval) && continue
-        col = rowvaly[jj]
-        yval = trans(yval)
-
-        for ii = 1:nnzx
-            xval = nzvalsx[ii]
-            iszero(xval) && continue
-            idx += 1
-            colptrC[col+1] += 1
-            rowvalC[idx] = rowvalx[ii]
-            nzvalsC[idx] = xval * yval
-        end
-    end
-    cumsum!(colptrC, colptrC)
-
-    return SparseMatrixCSC(nx, ny, colptrC, rowvalC, nzvalsC)
-end
-
-# (9) _broadcast_zeropres!/_broadcast_notzeropres! for more than two (input) sparse vectors/matrices
-function _broadcast_zeropres!(f::Tf, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    isempty(C) && return _finishempty!(C)
-    spaceC::Int = min(length(storedinds(C)), length(storedvals(C)))
-    expandsverts = _expandsvert_all(C, As)
-    expandshorzs = _expandshorz_all(C, As)
-    rowsentinel = numrows(C) + 1
-    Ck = 1
-    @inbounds for j in columns(C)
-        setcolptr!(C, j, Ck)
-        ks = _startindforbccol_all(j, expandshorzs, As)
-        stopks = _stopindforbccol_all(j, expandshorzs, As)
-        # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated
-        # colptr lookups, improves performance significantly. So keep the less complex approach here.
-        isemptys = _isemptycol_all(ks, stopks)
-        defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As)
-        rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As)
-        defaultCx = f(defargs...)
-        activerow = min(rows...)
-        if _iszero(defaultCx) # zero-preserving column scan
-            while activerow < rowsentinel
-                args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                Cx = f(args...)
-                if !_iszero(Cx)
-                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As)))
-                    storedinds(C)[Ck] = activerow
-                    storedvals(C)[Ck] = Cx
-                    Ck += 1
-                end
-                activerow = min(rows...)
-            end
-        else # zero-non-preserving column scan
-            for Ci in 1:numrows(C)
-                if Ci == activerow
-                    args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                    Cx = f(args...)
-                    activerow = min(rows...)
-                else
-                    Cx = defaultCx
-                end
-                if !_iszero(Cx)
-                    Ck > spaceC && (spaceC = expandstorage!(C, _unchecked_maxnnzbcres(size(C), As)))
-                    storedinds(C)[Ck] = Ci
-                    storedvals(C)[Ck] = Cx
-                    Ck += 1
-                end
-            end
-        end
-    end
-    @inbounds setcolptr!(C, numcols(C) + 1, Ck)
-    trimstorage!(C, Ck - 1)
-    return C
-end
-function _broadcast_notzeropres!(f::Tf, fillvalue, C::SparseVecOrMat, As::Vararg{SparseVecOrMat,N}) where {Tf,N}
-    isempty(C) && return _finishempty!(C)
-    # Build dense matrix structure in C, expanding storage if necessary
-    _densestructure!(C)
-    # Populate values
-    fill!(storedvals(C), fillvalue)
-    expandsverts = _expandsvert_all(C, As)
-    expandshorzs = _expandshorz_all(C, As)
-    rowsentinel = numrows(C) + 1
-    @inbounds for (j, jo) in zip(columns(C), _densecoloffsets(C))
-        ks = _startindforbccol_all(j, expandshorzs, As)
-        stopks = _stopindforbccol_all(j, expandshorzs, As)
-        # Neither fusing ks and stopks construction, nor restructuring them to avoid repeated
-        # colptr lookups, improves performance significantly. So keep the less complex approach here.
-        isemptys = _isemptycol_all(ks, stopks)
-        defargs = _defargforcol_all(j, isemptys, expandsverts, ks, As)
-        rows = _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As)
-        defaultCx = f(defargs...)
-        activerow = min(rows...)
-        if defaultCx == fillvalue # fillvalue-preserving column scan
-            while activerow < rowsentinel
-                args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                Cx = f(args...)
-                Cx != fillvalue && (storedvals(C)[jo + activerow] = Cx)
-                activerow = min(rows...)
-            end
-        else # fillvalue-non-preserving column scan
-            for Ci in 1:numrows(C)
-                if Ci == activerow
-                    args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-                    Cx = f(args...)
-                    activerow = min(rows...)
-                else
-                    Cx = defaultCx
-                end
-                Cx != fillvalue && (storedvals(C)[jo + Ci] = Cx)
-            end
-        end
-    end
-    return C
-end
-
-# helper method for broadcast/broadcast! methods just above
-@inline _expandsvert(C, A) = numrows(A) != numrows(C)
-@inline _expandsvert_all(C, ::Tuple{}) = ()
-@inline _expandsvert_all(C, As) = (_expandsvert(C, first(As)), _expandsvert_all(C, tail(As))...)
-@inline _expandshorz(C, A) = numcols(A) != numcols(C)
-@inline _expandshorz_all(C, ::Tuple{}) = ()
-@inline _expandshorz_all(C, As) = (_expandshorz(C, first(As)), _expandshorz_all(C, tail(As))...)
-@inline _startindforbccol(j, expandshorz, A) = expandshorz ? colstartind(A, 1) : colstartind(A, j)
-@inline _startindforbccol_all(j, ::Tuple{}, ::Tuple{}) = ()
-@inline _startindforbccol_all(j, expandshorzs, As) = (
-    _startindforbccol(j, first(expandshorzs), first(As)),
-    _startindforbccol_all(j, tail(expandshorzs), tail(As))...)
-@inline _stopindforbccol(j, expandshorz, A) = expandshorz ? colboundind(A, 1) : colboundind(A, j)
-@inline _stopindforbccol_all(j, ::Tuple{}, ::Tuple{}) = ()
-@inline _stopindforbccol_all(j, expandshorzs, As) = (
-    _stopindforbccol(j, first(expandshorzs), first(As)),
-    _stopindforbccol_all(j, tail(expandshorzs), tail(As))...)
-@inline _isemptycol(k, stopk) = k == stopk
-@inline _isemptycol_all(::Tuple{}, ::Tuple{}) = ()
-@inline _isemptycol_all(ks, stopks) = (
-    _isemptycol(first(ks), first(stopks)),
-    _isemptycol_all(tail(ks), tail(stopks))...)
-@inline _initrowforcol(j, rowsentinel, isempty, expandsvert, k, A) =
-    expandsvert || isempty ? convert(indtype(A), rowsentinel) : storedinds(A)[k]
-@inline _initrowforcol_all(j, rowsentinel, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-@inline _initrowforcol_all(j, rowsentinel, isemptys, expandsverts, ks, As) = (
-    _initrowforcol(j, rowsentinel, first(isemptys), first(expandsverts), first(ks), first(As)),
-    _initrowforcol_all(j, rowsentinel, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...)
-@inline _defargforcol(j, isempty, expandsvert, k, A) =
-    expandsvert && !isempty ? storedvals(A)[k] : zero(eltype(A))
-@inline _defargforcol_all(j, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ()
-@inline _defargforcol_all(j, isemptys, expandsverts, ks, As) = (
-    _defargforcol(j, first(isemptys), first(expandsverts), first(ks), first(As)),
-    _defargforcol_all(j, tail(isemptys), tail(expandsverts), tail(ks), tail(As))...)
-@inline function _fusedupdatebc(rowsentinel, activerow, row, defarg, k, stopk, A)
-    # returns (val, nextk, nextrow)
-    if row == activerow
-        nextk = k + oneunit(k)
-        (storedvals(A)[k], nextk, (nextk < stopk ? storedinds(A)[nextk] : oftype(row, rowsentinel)))
-    else
-        (defarg, k, row)
-    end
-end
-@inline _fusedupdatebc_all(rowsent, activerow, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}, ::Tuple{}) = ((#=vals=#), (#=nextks=#), (#=nextrows=#))
-@inline function _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As)
-    val, nextk, nextrow = _fusedupdatebc(rowsentinel, activerow, first(rows), first(defargs), first(ks), first(stopks), first(As))
-    vals, nextks, nextrows = _fusedupdatebc_all(rowsentinel, activerow, tail(rows), tail(defargs), tail(ks), tail(stopks), tail(As))
-    return ((val, vals...), (nextk, nextks...), (nextrow, nextrows...))
-end
-
-
-# (10) broadcast over combinations of broadcast scalars and sparse vectors/matrices
-
-# broadcast entry points for combinations of sparse arrays and other (scalar) types
-@inline function copy(bc::Broadcasted{<:SPVM})
-    bcf = flatten(bc)
-    return _copy(bcf.f, bcf.args...)
-end
-
-_copy(f, args::SparseVector...) = _shapecheckbc(f, args...)
-_copy(f, args::AbstractSparseMatrixCSC...) = _shapecheckbc(f, args...)
-_copy(f, args::SparseVecOrMat...) = _diffshape_broadcast(f, args...)
-# Otherwise, we incorporate scalars into the function and re-dispatch
-function _copy(f, args...)
-    parevalf, passedargstup = capturescalars(f, args)
-    return _copy(parevalf, passedargstup...)
-end
-_copy(f) = throw(MethodError(_copy, (f,)))  # avoid method ambiguity
-
-function _shapecheckbc(f, args...)
-    _aresameshape(args...) ? _noshapecheck_map(f, args...) : _diffshape_broadcast(f, args...)
-end
-
-
-@inline function copyto!(dest::SparseVecOrMat, bc::Broadcasted{<:SPVM})
-    if bc.f === identity && bc isa SpBroadcasted1 && Base.axes(dest) == (A = bc.args[1]; Base.axes(A))
-        return copyto!(dest, A)
-    end
-    bcf = flatten(bc)
-    As = map(arg->Base.unalias(dest, arg), bcf.args)
-    return _copyto!(bcf.f, dest, As...)
-end
-
-@inline function _copyto!(f, dest, As::SparseVecOrMat...)
-    _aresameshape(dest, As...) && return _noshapecheck_map!(f, dest, As...)
-    Base.Broadcast.check_broadcast_axes(axes(dest), As...)
-    fofzeros = f(_zeros_eltypes(As...)...)
-    if _iszero(fofzeros)
-        return _broadcast_zeropres!(f, dest, As...)
-    else
-        return _broadcast_notzeropres!(f, fofzeros, dest, As...)
-    end
-end
-
-@inline function _copyto!(f, dest, args...)
-    # args contains nothing but SparseVecOrMat and scalars
-    # See below for capturescalars
-    parevalf, passedsrcargstup = capturescalars(f, args)
-    _copyto!(parevalf, dest, passedsrcargstup...)
-end
-
-# capturescalars takes a function (f) and a tuple of mixed sparse vectors/matrices and
-# broadcast scalar arguments (mixedargs), and returns a function (parevalf, i.e. partially
-# evaluated f) and a reduced argument tuple (passedargstup) containing only the sparse
-# vectors/matrices in mixedargs in their original order, and such that the result of
-# broadcast(parevalf, passedargstup...) is broadcast(f, mixedargs...)
-@inline function capturescalars(f, mixedargs)
-    let (passedsrcargstup, makeargs) = _capturescalars(mixedargs...)
-        parevalf = (passed...) -> f(makeargs(passed...)...)
-        return (parevalf, passedsrcargstup)
-    end
-end
-# Work around losing Type{T}s as DataTypes within the tuple that makeargs creates
-@inline capturescalars(f, mixedargs::Tuple{Ref{Type{T}}, Vararg{Any}}) where {T} =
-    capturescalars((args...)->f(T, args...), Base.tail(mixedargs))
-@inline capturescalars(f, mixedargs::Tuple{Ref{Type{T}}, Ref{Type{S}}, Vararg{Any}}) where {T, S} =
-    # This definition is identical to the one above and necessary only for
-    # avoiding method ambiguity.
-    capturescalars((args...)->f(T, args...), Base.tail(mixedargs))
-@inline capturescalars(f, mixedargs::Tuple{SparseVecOrMat, Ref{Type{T}}, Vararg{Any}}) where {T} =
-    capturescalars((a1, args...)->f(a1, T, args...), (mixedargs[1], Base.tail(Base.tail(mixedargs))...))
-@inline capturescalars(f, mixedargs::Tuple{Union{Ref,AbstractArray{<:Any,0}}, Ref{Type{T}}, Vararg{Any}}) where {T} =
-    capturescalars((args...)->f(mixedargs[1], T, args...), Base.tail(Base.tail(mixedargs)))
-
-nonscalararg(::SparseVecOrMat) = true
-nonscalararg(::Any) = false
-scalarwrappedarg(::Union{AbstractArray{<:Any,0},Ref}) = true
-scalarwrappedarg(::Any) = false
-
-@inline function _capturescalars()
-    return (), () -> ()
-end
-@inline function _capturescalars(arg, mixedargs...)
-    let (rest, f) = _capturescalars(mixedargs...)
-        if nonscalararg(arg)
-            return (arg, rest...), @inline function(head, tail...)
-                (head, f(tail...)...)
-            end # pass-through to broadcast
-        elseif scalarwrappedarg(arg)
-            return rest, @inline function(tail...)
-                (arg[], f(tail...)...) # TODO: This can put a Type{T} in a tuple
-            end # unwrap and add back scalararg after (in makeargs)
-        else
-            return rest, @inline function(tail...)
-                (arg, f(tail...)...)
-            end # add back scalararg after (in makeargs)
-        end
-    end
-end
-@inline function _capturescalars(arg) # this definition is just an optimization (to bottom out the recursion slightly sooner)
-    if nonscalararg(arg)
-        return (arg,), (head,) -> (head,) # pass-through
-    elseif scalarwrappedarg(arg)
-        return (), () -> (arg[],) # unwrap
-    else
-        return (), () -> (arg,) # add scalararg
-    end
-end
-
-# NOTE: The following two method definitions work around #19096.
-broadcast(f::Tf, ::Type{T}, A::AbstractSparseMatrixCSC) where {Tf,T} = broadcast(y -> f(T, y), A)
-broadcast(f::Tf, A::AbstractSparseMatrixCSC, ::Type{T}) where {Tf,T} = broadcast(x -> f(x, T), A)
-
-
-# (11) broadcast[!] over combinations of scalars, sparse vectors/matrices, structured matrices,
-# and one- and two-dimensional Arrays (via promotion of structured matrices and Arrays)
-#
-# for combinations involving only scalars, sparse arrays, structured matrices, and dense
-# vectors/matrices, promote all structured matrices and dense vectors/matrices to sparse
-# and rebroadcast. otherwise, divert to generic AbstractArray broadcast code.
-
-function copy(bc::Broadcasted{PromoteToSparse})
-    bcf = flatten(bc)
-    if can_skip_sparsification(bcf.f, bcf.args...)
-        return _copy(bcf.f, bcf.args...)
-    elseif is_supported_sparse_broadcast(bcf.args...)
-        return _copy(bcf.f, map(_sparsifystructured, bcf.args)...)
-    else
-        return copy(convert(Broadcasted{Broadcast.DefaultArrayStyle{length(axes(bc))}}, bc))
-    end
-end
-
-@inline function copyto!(dest::SparseVecOrMat, bc::Broadcasted{PromoteToSparse})
-    bcf = flatten(bc)
-    broadcast!(bcf.f, dest, map(_sparsifystructured, bcf.args)...)
-end
-
-_sparsifystructured(M::AbstractMatrix) = SparseMatrixCSC(M)
-_sparsifystructured(V::AbstractVector) = SparseVector(V)
-_sparsifystructured(M::AbstractSparseMatrix) = SparseMatrixCSC(M)
-_sparsifystructured(V::AbstractSparseVector) = SparseVector(V)
-_sparsifystructured(S::SparseVecOrMat) = S
-_sparsifystructured(x) = x
-
-
-# (12) map[!] over combinations of sparse and structured matrices
-SparseOrStructuredMatrix = Union{SparseMatrixCSC,LinearAlgebra.StructuredMatrix}
-map(f::Tf, A::SparseOrStructuredMatrix, Bs::Vararg{SparseOrStructuredMatrix,N}) where {Tf,N} =
-    (_checksameshape(A, Bs...); _noshapecheck_map(f, _sparsifystructured(A), map(_sparsifystructured, Bs)...))
-map!(f::Tf, C::AbstractSparseMatrixCSC, A::SparseOrStructuredMatrix, Bs::Vararg{SparseOrStructuredMatrix,N}) where {Tf,N} =
-    (_checksameshape(C, A, Bs...); _noshapecheck_map!(f, C, _sparsifystructured(A), map(_sparsifystructured, Bs)...))
-
-end
diff --git a/stdlib/SparseArrays/src/linalg.jl b/stdlib/SparseArrays/src/linalg.jl
deleted file mode 100644
index 37d95b12a89783..00000000000000
--- a/stdlib/SparseArrays/src/linalg.jl
+++ /dev/null
@@ -1,1629 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import LinearAlgebra: checksquare, sym_uplo
-using Random: rand!
-
-# In matrix-vector multiplication, the correct orientation of the vector is assumed.
-const StridedOrTriangularMatrix{T} = Union{StridedMatrix{T}, LowerTriangular{T}, UnitLowerTriangular{T}, UpperTriangular{T}, UnitUpperTriangular{T}}
-const AdjOrTransStridedOrTriangularMatrix{T} = Union{StridedOrTriangularMatrix{T},Adjoint{<:Any,<:StridedOrTriangularMatrix{T}},Transpose{<:Any,<:StridedOrTriangularMatrix{T}}}
-
-for op ∈ (:+, :-), Wrapper ∈ (:Hermitian, :Symmetric)
-    @eval begin
-        $op(A::AbstractSparseMatrix, B::$Wrapper{<:Any,<:AbstractSparseMatrix}) = $op(A, sparse(B))
-        $op(A::$Wrapper{<:Any,<:AbstractSparseMatrix}, B::AbstractSparseMatrix) = $op(sparse(A), B)
-
-        $op(A::AbstractSparseMatrix, B::$Wrapper) = $op(A, collect(B))
-        $op(A::$Wrapper, B::AbstractSparseMatrix) = $op(collect(A), B)
-    end
-end
-for op ∈ (:+, :-)
-    @eval begin
-        $op(A::Symmetric{<:Any,  <:AbstractSparseMatrix}, B::Hermitian{<:Any,  <:AbstractSparseMatrix}) = $op(sparse(A), sparse(B))
-        $op(A::Hermitian{<:Any,  <:AbstractSparseMatrix}, B::Symmetric{<:Any,  <:AbstractSparseMatrix}) = $op(sparse(A), sparse(B))
-        $op(A::Symmetric{<:Real, <:AbstractSparseMatrix}, B::Hermitian{<:Any,  <:AbstractSparseMatrix}) = $op(Hermitian(parent(A), sym_uplo(A.uplo)), B)
-        $op(A::Hermitian{<:Any,  <:AbstractSparseMatrix}, B::Symmetric{<:Real, <:AbstractSparseMatrix}) = $op(A, Hermitian(parent(B), sym_uplo(B.uplo)))
-    end
-end
-
-function mul!(C::StridedVecOrMat, A::AbstractSparseMatrixCSC, B::Union{StridedVector,AdjOrTransStridedOrTriangularMatrix}, α::Number, β::Number)
-    size(A, 2) == size(B, 1) || throw(DimensionMismatch())
-    size(A, 1) == size(C, 1) || throw(DimensionMismatch())
-    size(B, 2) == size(C, 2) || throw(DimensionMismatch())
-    nzv = nonzeros(A)
-    rv = rowvals(A)
-    if β != 1
-        β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
-    end
-    for k = 1:size(C, 2)
-        @inbounds for col = 1:size(A, 2)
-            αxj = B[col,k] * α
-            for j = getcolptr(A)[col]:(getcolptr(A)[col + 1] - 1)
-                C[rv[j], k] += nzv[j]*αxj
-            end
-        end
-    end
-    C
-end
-*(A::SparseMatrixCSCUnion{TA}, x::StridedVector{Tx}) where {TA,Tx} =
-    (T = promote_op(matprod, TA, Tx); mul!(similar(x, T, size(A, 1)), A, x, true, false))
-*(A::SparseMatrixCSCUnion{TA}, B::AdjOrTransStridedOrTriangularMatrix{Tx}) where {TA,Tx} =
-    (T = promote_op(matprod, TA, Tx); mul!(similar(B, T, (size(A, 1), size(B, 2))), A, B, true, false))
-
-function mul!(C::StridedVecOrMat, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, B::Union{StridedVector,AdjOrTransStridedOrTriangularMatrix}, α::Number, β::Number)
-    A = adjA.parent
-    size(A, 2) == size(C, 1) || throw(DimensionMismatch())
-    size(A, 1) == size(B, 1) || throw(DimensionMismatch())
-    size(B, 2) == size(C, 2) || throw(DimensionMismatch())
-    nzv = nonzeros(A)
-    rv = rowvals(A)
-    if β != 1
-        β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
-    end
-    for k = 1:size(C, 2)
-        @inbounds for col = 1:size(A, 2)
-            tmp = zero(eltype(C))
-            for j = getcolptr(A)[col]:(getcolptr(A)[col + 1] - 1)
-                tmp += adjoint(nzv[j])*B[rv[j],k]
-            end
-            C[col,k] += tmp * α
-        end
-    end
-    C
-end
-*(adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::StridedVector{Tx}) where {Tx} =
-    (T = promote_op(matprod, eltype(adjA), Tx); mul!(similar(x, T, size(adjA, 1)), adjA, x, true, false))
-*(adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTransStridedOrTriangularMatrix) =
-    (T = promote_op(matprod, eltype(adjA), eltype(B)); mul!(similar(B, T, (size(adjA, 1), size(B, 2))), adjA, B, true, false))
-
-function mul!(C::StridedVecOrMat, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::Union{StridedVector,AdjOrTransStridedOrTriangularMatrix}, α::Number, β::Number)
-    A = transA.parent
-    size(A, 2) == size(C, 1) || throw(DimensionMismatch())
-    size(A, 1) == size(B, 1) || throw(DimensionMismatch())
-    size(B, 2) == size(C, 2) || throw(DimensionMismatch())
-    nzv = nonzeros(A)
-    rv = rowvals(A)
-    if β != 1
-        β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
-    end
-    for k = 1:size(C, 2)
-        @inbounds for col = 1:size(A, 2)
-            tmp = zero(eltype(C))
-            for j = getcolptr(A)[col]:(getcolptr(A)[col + 1] - 1)
-                tmp += transpose(nzv[j])*B[rv[j],k]
-            end
-            C[col,k] += tmp * α
-        end
-    end
-    C
-end
-*(transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::StridedVector{Tx}) where {Tx} =
-    (T = promote_op(matprod, eltype(transA), Tx); mul!(similar(x, T, size(transA, 1)), transA, x, true, false))
-*(transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::AdjOrTransStridedOrTriangularMatrix) =
-    (T = promote_op(matprod, eltype(transA), eltype(B)); mul!(similar(B, T, (size(transA, 1), size(B, 2))), transA, B, true, false))
-
-# For compatibility with dense multiplication API. Should be deleted when dense multiplication
-# API is updated to follow BLAS API.
-mul!(C::StridedVecOrMat, A::AbstractSparseMatrixCSC, B::Union{StridedVector,AdjOrTransStridedOrTriangularMatrix}) =
-    mul!(C, A, B, true, false)
-mul!(C::StridedVecOrMat, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, B::Union{StridedVector,AdjOrTransStridedOrTriangularMatrix}) =
-    mul!(C, adjA, B, true, false)
-mul!(C::StridedVecOrMat, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::Union{StridedVector,AdjOrTransStridedOrTriangularMatrix}) =
-    mul!(C, transA, B, true, false)
-
-function mul!(C::StridedVecOrMat, X::AdjOrTransStridedOrTriangularMatrix, A::AbstractSparseMatrixCSC, α::Number, β::Number)
-    mX, nX = size(X)
-    nX == size(A, 1) || throw(DimensionMismatch())
-    mX == size(C, 1) || throw(DimensionMismatch())
-    size(A, 2) == size(C, 2) || throw(DimensionMismatch())
-    rv = rowvals(A)
-    nzv = nonzeros(A)
-    if β != 1
-        β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
-    end
-    @inbounds for multivec_row=1:mX, col = 1:size(A, 2), k=getcolptr(A)[col]:(getcolptr(A)[col+1]-1)
-        C[multivec_row, col] += α * X[multivec_row, rv[k]] * nzv[k] # perhaps suboptimal position of α?
-    end
-    C
-end
-*(X::AdjOrTransStridedOrTriangularMatrix, A::SparseMatrixCSCUnion{TvA}) where {TvA} =
-    (T = promote_op(matprod, eltype(X), TvA); mul!(similar(X, T, (size(X, 1), size(A, 2))), X, A, true, false))
-
-function mul!(C::StridedVecOrMat, X::AdjOrTransStridedOrTriangularMatrix, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, α::Number, β::Number)
-    A = adjA.parent
-    mX, nX = size(X)
-    nX == size(A, 2) || throw(DimensionMismatch())
-    mX == size(C, 1) || throw(DimensionMismatch())
-    size(A, 1) == size(C, 2) || throw(DimensionMismatch())
-    rv = rowvals(A)
-    nzv = nonzeros(A)
-    if β != 1
-        β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
-    end
-    @inbounds for col = 1:size(A, 2), k=getcolptr(A)[col]:(getcolptr(A)[col+1]-1), multivec_col=1:mX
-        C[multivec_col, rv[k]] += α * X[multivec_col, col] * adjoint(nzv[k]) # perhaps suboptimal position of α?
-    end
-    C
-end
-*(X::AdjOrTransStridedOrTriangularMatrix, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) =
-    (T = promote_op(matprod, eltype(X), eltype(adjA)); mul!(similar(X, T, (size(X, 1), size(adjA, 2))), X, adjA, true, false))
-
-function mul!(C::StridedVecOrMat, X::AdjOrTransStridedOrTriangularMatrix, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, α::Number, β::Number)
-    A = transA.parent
-    mX, nX = size(X)
-    nX == size(A, 2) || throw(DimensionMismatch())
-    mX == size(C, 1) || throw(DimensionMismatch())
-    size(A, 1) == size(C, 2) || throw(DimensionMismatch())
-    rv = rowvals(A)
-    nzv = nonzeros(A)
-    if β != 1
-        β != 0 ? rmul!(C, β) : fill!(C, zero(eltype(C)))
-    end
-    @inbounds for col = 1:size(A, 2), k=getcolptr(A)[col]:(getcolptr(A)[col+1]-1), multivec_col=1:mX
-        C[multivec_col, rv[k]] += α * X[multivec_col, col] * transpose(nzv[k]) # perhaps suboptimal position of α?
-    end
-    C
-end
-*(X::AdjOrTransStridedOrTriangularMatrix, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}) =
-    (T = promote_op(matprod, eltype(X), eltype(transA)); mul!(similar(X, T, (size(X, 1), size(transA, 2))), X, transA, true, false))
-
-function (*)(D::Diagonal, A::AbstractSparseMatrixCSC)
-    T = Base.promote_op(*, eltype(D), eltype(A))
-    mul!(LinearAlgebra.copy_oftype(A, T), D, A)
-end
-function (*)(A::AbstractSparseMatrixCSC, D::Diagonal)
-    T = Base.promote_op(*, eltype(D), eltype(A))
-    mul!(LinearAlgebra.copy_oftype(A, T), A, D)
-end
-function (/)(A::AbstractSparseMatrixCSC, D::Diagonal)
-    T = typeof(oneunit(eltype(A))/oneunit(eltype(D)))
-    rdiv!(LinearAlgebra.copy_oftype(A, T), D)
-end
-
-# Sparse matrix multiplication as described in [Gustavson, 1978]:
-# http://dl.acm.org/citation.cfm?id=355796
-
-const SparseTriangular{Tv,Ti} = Union{UpperTriangular{Tv,<:SparseMatrixCSCUnion{Tv,Ti}},LowerTriangular{Tv,<:SparseMatrixCSCUnion{Tv,Ti}}}
-const SparseOrTri{Tv,Ti} = Union{SparseMatrixCSCUnion{Tv,Ti},SparseTriangular{Tv,Ti}}
-
-*(A::SparseOrTri, B::AbstractSparseVector) = spmatmulv(A, B)
-*(A::SparseOrTri, B::SparseColumnView) = spmatmulv(A, B)
-*(A::SparseOrTri, B::SparseVectorView) = spmatmulv(A, B)
-*(A::SparseMatrixCSCUnion, B::SparseMatrixCSCUnion) = spmatmul(A,B)
-*(A::SparseTriangular, B::SparseMatrixCSCUnion) = spmatmul(A,B)
-*(A::SparseMatrixCSCUnion, B::SparseTriangular) = spmatmul(A,B)
-*(A::SparseTriangular, B::SparseTriangular) = spmatmul1(A,B)
-*(A::SparseOrTri, B::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) = spmatmul(A, copy(B))
-*(A::SparseOrTri, B::Transpose{<:Any,<:AbstractSparseMatrixCSC}) = spmatmul(A, copy(B))
-*(A::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::SparseOrTri) = spmatmul(copy(A), B)
-*(A::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, B::SparseOrTri) = spmatmul(copy(A), B)
-*(A::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, B::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) = spmatmul(copy(A), copy(B))
-*(A::Transpose{<:Any,<:AbstractSparseMatrixCSC}, B::Transpose{<:Any,<:AbstractSparseMatrixCSC}) = spmatmul(copy(A), copy(B))
-
-# Gustavson's matrix multiplication algorithm revisited.
-# The result rowval vector is already sorted by construction.
-# The auxiliary Vector{Ti} xb is replaced by a Vector{Bool} of same length.
-# The optional argument controlling a sorting algorithm is obsolete.
-# depending on expected execution speed the sorting of the result column is
-# done by a quicksort of the row indices or by a full scan of the dense result vector.
-# The last is faster, if more than ≈ 1/32 of the result column is nonzero.
-# TODO: extend to SparseMatrixCSCUnion to allow for SubArrays (view(X, :, r)).
-function spmatmul(A::SparseOrTri{TvA,TiA}, B::Union{SparseOrTri{TvB,TiB},SparseVectorUnion{TvB,TiB},SubArray{TvB,<:Any,<:AbstractSparseArray{TvB,TiB}}}) where {TvA,TiA,TvB,TiB}
-
-    Tv = promote_op(matprod, TvA, TvB)
-    Ti = promote_type(TiA, TiB)
-    mA, nA = size(A)
-    nB = size(B, 2)
-    nA == size(B, 1) || throw(DimensionMismatch())
-
-    nnzC = min(estimate_mulsize(mA, nnz(A), nA, nnz(B), nB) * 11 ÷ 10 + mA, mA*nB)
-    colptrC = Vector{Ti}(undef, nB+1)
-    rowvalC = Vector{Ti}(undef, nnzC)
-    nzvalC = Vector{Tv}(undef, nnzC)
-
-    @inbounds begin
-        ip = 1
-        xb = fill(false, mA)
-        for i in 1:nB
-            if ip + mA - 1 > nnzC
-                nnzC += max(mA, nnzC>>2)
-                resize!(rowvalC, nnzC)
-                resize!(nzvalC, nnzC)
-            end
-            colptrC[i] = ip
-            ip = spcolmul!(rowvalC, nzvalC, xb, i, ip, A, B)
-        end
-        colptrC[nB+1] = ip
-    end
-
-    resize!(rowvalC, ip - 1)
-    resize!(nzvalC, ip - 1)
-
-    # This modification of Gustavson algorithm has sorted row indices
-    C = SparseMatrixCSC(mA, nB, colptrC, rowvalC, nzvalC)
-    return C
-end
-
-# process single rhs column
-function spcolmul!(rowvalC, nzvalC, xb, i, ip, A, B)
-    rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    rowvalB = rowvals(B); nzvalB = nonzeros(B)
-    mA = size(A, 1)
-    ip0 = ip
-    k0 = ip - 1
-    @inbounds begin
-        for jp in nzrange(B, i)
-            nzB = nzvalB[jp]
-            j = rowvalB[jp]
-            for kp in nzrange(A, j)
-                nzC = nzvalA[kp] * nzB
-                k = rowvalA[kp]
-                if xb[k]
-                    nzvalC[k+k0] += nzC
-                else
-                    nzvalC[k+k0] = nzC
-                    xb[k] = true
-                    rowvalC[ip] = k
-                    ip += 1
-                end
-            end
-        end
-        if ip > ip0
-            if prefer_sort(ip-k0, mA)
-                # in-place sort of indices. Effort: O(nnz*ln(nnz)).
-                sort!(rowvalC, ip0, ip-1, QuickSort, Base.Order.Forward)
-                for vp = ip0:ip-1
-                    k = rowvalC[vp]
-                    xb[k] = false
-                    nzvalC[vp] = nzvalC[k+k0]
-                end
-            else
-                # scan result vector (effort O(mA))
-                for k = 1:mA
-                    if xb[k]
-                        xb[k] = false
-                        rowvalC[ip0] = k
-                        nzvalC[ip0] = nzvalC[k+k0]
-                        ip0 += 1
-                    end
-                end
-            end
-        end
-    end
-    return ip
-end
-
-# special cases of same twin Upper/LowerTriangular
-spmatmul1(A, B) = spmatmul(A, B)
-function spmatmul1(A::UpperTriangular, B::UpperTriangular)
-    UpperTriangular(spmatmul(A, B))
-end
-function spmatmul1(A::LowerTriangular, B::LowerTriangular)
-    LowerTriangular(spmatmul(A, B))
-end
-# exploit spmatmul for sparse vectors and column views
-function spmatmulv(A, B)
-    spmatmul(A, B)[:,1]
-end
-
-# estimated number of non-zeros in matrix product
-# it is assumed, that the non-zero indices are distributed independently and uniformly
-# in both matrices. Over-estimation is possible if that is not the case.
-function estimate_mulsize(m::Integer, nnzA::Integer, n::Integer, nnzB::Integer, k::Integer)
-    p = (nnzA / (m * n)) * (nnzB / (n * k))
-    p >= 1 ? m*k : p > 0 ? Int(ceil(-expm1(log1p(-p) * n)*m*k)) : 0 # (1-(1-p)^n)*m*k
-end
-
-# determine if sort! shall be used or the whole column be scanned
-# based on empirical data on i7-3610QM CPU
-# measuring runtimes of the scanning and sorting loops of the algorithm.
-# The parameters 6 and 3 might be modified for different architectures.
-prefer_sort(nz::Integer, m::Integer) = m > 6 && 3 * ilog2(nz) * nz < m
-
-# minimal number of bits required to represent integer; ilog2(n) >= log2(n)
-ilog2(n::Integer) = sizeof(n)<<3 - leading_zeros(n)
-
-# Frobenius dot/inner product: trace(A'B)
-function dot(A::AbstractSparseMatrixCSC{T1,S1},B::AbstractSparseMatrixCSC{T2,S2}) where {T1,T2,S1,S2}
-    m, n = size(A)
-    size(B) == (m,n) || throw(DimensionMismatch("matrices must have the same dimensions"))
-    r = dot(zero(T1), zero(T2))
-    @inbounds for j = 1:n
-        ia = getcolptr(A)[j]; ia_nxt = getcolptr(A)[j+1]
-        ib = getcolptr(B)[j]; ib_nxt = getcolptr(B)[j+1]
-        if ia < ia_nxt && ib < ib_nxt
-            ra = rowvals(A)[ia]; rb = rowvals(B)[ib]
-            while true
-                if ra < rb
-                    ia += oneunit(S1)
-                    ia < ia_nxt || break
-                    ra = rowvals(A)[ia]
-                elseif ra > rb
-                    ib += oneunit(S2)
-                    ib < ib_nxt || break
-                    rb = rowvals(B)[ib]
-                else # ra == rb
-                    r += dot(nonzeros(A)[ia], nonzeros(B)[ib])
-                    ia += oneunit(S1); ib += oneunit(S2)
-                    ia < ia_nxt && ib < ib_nxt || break
-                    ra = rowvals(A)[ia]; rb = rowvals(B)[ib]
-                end
-            end
-        end
-    end
-    return r
-end
-
-function dot(x::AbstractVector, A::AbstractSparseMatrixCSC, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    m, n = size(A)
-    (length(x) == m && n == length(y)) || throw(DimensionMismatch())
-    if iszero(m) || iszero(n)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    T = promote_type(eltype(x), eltype(A), eltype(y))
-    r = zero(T)
-    rvals = getrowval(A)
-    nzvals = getnzval(A)
-    @inbounds for col in 1:n
-        ycol = y[col]
-        if !iszero(ycol)
-            temp = zero(T)
-            for k in nzrange(A, col)
-                temp += adjoint(x[rvals[k]]) * nzvals[k]
-            end
-            r += temp * ycol
-        end
-    end
-    return r
-end
-function dot(x::SparseVector, A::AbstractSparseMatrixCSC, y::SparseVector)
-    m, n = size(A)
-    length(x) == m && n == length(y) || throw(DimensionMismatch())
-    if iszero(m) || iszero(n)
-        return dot(zero(eltype(x)), zero(eltype(A)), zero(eltype(y)))
-    end
-    r = zero(promote_type(eltype(x), eltype(A), eltype(y)))
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    ynzind = nonzeroinds(y)
-    ynzval = nonzeros(y)
-    Acolptr = getcolptr(A)
-    Arowval = getrowval(A)
-    Anzval = getnzval(A)
-    for (yi, yv) in zip(ynzind, ynzval)
-        A_ptr_lo = Acolptr[yi]
-        A_ptr_hi = Acolptr[yi+1] - 1
-        if A_ptr_lo <= A_ptr_hi
-            r += _spdot(dot, 1, length(xnzind), xnzind, xnzval,
-                                            A_ptr_lo, A_ptr_hi, Arowval, Anzval) * yv
-        end
-    end
-    r
-end
-
-## triangular sparse handling
-
-possible_adjoint(adj::Bool, a::Real) = a
-possible_adjoint(adj::Bool, a) = adj ? adjoint(a) : a
-
-const UnitDiagonalTriangular = Union{UnitUpperTriangular,UnitLowerTriangular}
-
-const LowerTriangularPlain{T} = Union{
-            LowerTriangular{T,<:SparseMatrixCSCUnion{T}},
-            UnitLowerTriangular{T,<:SparseMatrixCSCUnion{T}}}
-
-const LowerTriangularWrapped{T} = Union{
-            Adjoint{T,<:UpperTriangular{T,<:SparseMatrixCSCUnion{T}}},
-            Adjoint{T,<:UnitUpperTriangular{T,<:SparseMatrixCSCUnion{T}}},
-            Transpose{T,<:UpperTriangular{T,<:SparseMatrixCSCUnion{T}}},
-            Transpose{T,<:UnitUpperTriangular{T,<:SparseMatrixCSCUnion{T}}}} where T
-
-const UpperTriangularPlain{T} = Union{
-            UpperTriangular{T,<:SparseMatrixCSCUnion{T}},
-            UnitUpperTriangular{T,<:SparseMatrixCSCUnion{T}}}
-
-const UpperTriangularWrapped{T} = Union{
-            Adjoint{T,<:LowerTriangular{T,<:SparseMatrixCSCUnion{T}}},
-            Adjoint{T,<:UnitLowerTriangular{T,<:SparseMatrixCSCUnion{T}}},
-            Transpose{T,<:LowerTriangular{T,<:SparseMatrixCSCUnion{T}}},
-            Transpose{T,<:UnitLowerTriangular{T,<:SparseMatrixCSCUnion{T}}}} where T
-
-const UpperTriangularSparse{T} = Union{
-            UpperTriangularWrapped{T}, UpperTriangularPlain{T}} where T
-
-const LowerTriangularSparse{T} = Union{
-            LowerTriangularWrapped{T}, LowerTriangularPlain{T}} where T
-
-const TriangularSparse{T} = Union{
-            LowerTriangularSparse{T}, UpperTriangularSparse{T}} where T
-
-## triangular multipliers
-function lmul!(A::TriangularSparse{T}, B::StridedVecOrMat{T}) where T
-    require_one_based_indexing(A, B)
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    ncol = LinearAlgebra.checksquare(A)
-    if nrowB != ncol
-        throw(DimensionMismatch("A is $(ncol) columns and B has $(nrowB) rows"))
-    end
-    _lmul!(A, B)
-end
-
-# forward multiplication for UpperTriangular SparseCSC matrices
-function _lmul!(U::UpperTriangularPlain, B::StridedVecOrMat)
-    A = U.data
-    unit = U isa UnitDiagonalTriangular
-
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-
-    joff = 0
-    for k = 1:ncolB
-        for j = 1:nrowB
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-            done = unit
-
-            bj = B[joff + j]
-            for ii = i1:i2
-                jai = ja[ii]
-                aii = aa[ii]
-                if jai < j
-                    B[joff + jai] += aii * bj
-                elseif jai == j
-                    if !unit
-                        B[joff + j] *= aii
-                        done = true
-                    end
-                else
-                    break
-                end
-            end
-            if !done
-                B[joff + j] -= B[joff + j]
-            end
-        end
-        joff += nrowB
-    end
-    B
-end
-
-# backward multiplication for LowerTriangular SparseCSC matrices
-function _lmul!(L::LowerTriangularPlain, B::StridedVecOrMat)
-    A = L.data
-    unit = L isa UnitDiagonalTriangular
-
-    nrowB, ncolB = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-
-    joff = 0
-    for k = 1:ncolB
-        for j = nrowB:-1:1
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-            done = unit
-
-            bj = B[joff + j]
-            for ii = i2:-1:i1
-                jai = ja[ii]
-                aii = aa[ii]
-                if jai > j
-                    B[joff + jai] += aii * bj
-                elseif jai == j
-                    if !unit
-                        B[joff + j] *= aii
-                        done = true
-                    end
-                else
-                    break
-                end
-            end
-            if !done
-                B[joff + j] -= B[joff + j]
-            end
-        end
-        joff += nrowB
-    end
-    B
-end
-
-# forward multiplication for adjoint and transpose of LowerTriangular CSC matrices
-function _lmul!(U::UpperTriangularWrapped, B::StridedVecOrMat)
-    A = U.parent.data
-    unit = U.parent isa UnitDiagonalTriangular
-    adj = U isa Adjoint
-
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-    Z = zero(eltype(A))
-
-    joff = 0
-    for k = 1:ncolB
-        for j = 1:nrowB
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-            akku = Z
-            j0 = !unit ? j : j + 1
-
-            # loop through column j of A - only structural non-zeros
-            for ii = i2:-1:i1
-                jai = ja[ii]
-                if jai >= j0
-                    aai = possible_adjoint(adj, aa[ii])
-                    akku += B[joff + jai] * aai
-                else
-                    break
-                end
-            end
-            if unit
-                akku += B[joff + j]
-            end
-            B[joff + j] = akku
-        end
-        joff += nrowB
-    end
-    B
-end
-
-# backward multiplication with adjoint and transpose of LowerTriangular CSC matrices
-function _lmul!(L::LowerTriangularWrapped, B::StridedVecOrMat)
-    A = L.parent.data
-    unit = L.parent isa UnitDiagonalTriangular
-    adj = L isa Adjoint
-
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-    Z = zero(eltype(A))
-
-    joff = 0
-    for k = 1:ncolB
-        for j = nrowB:-1:1
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-            akku = Z
-            j0 = !unit ? j : j - 1
-
-            # loop through column j of A - only structural non-zeros
-            for ii = i1:i2
-                jai = ja[ii]
-                if jai <= j0
-                    aai = possible_adjoint(adj, aa[ii])
-                    akku += B[joff + jai] * aai
-                else
-                    break
-                end
-            end
-            if unit
-                akku += B[joff + j]
-            end
-            B[joff + j] = akku
-        end
-        joff += nrowB
-    end
-    B
-end
-
-## triangular solvers
-function ldiv!(A::TriangularSparse{T}, B::StridedVecOrMat{T}) where T
-    require_one_based_indexing(A, B)
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    ncol = LinearAlgebra.checksquare(A)
-    if nrowB != ncol
-        throw(DimensionMismatch("A is $(ncol) columns and B has $(nrowB) rows"))
-    end
-    _ldiv!(A, B)
-end
-
-# forward substitution for LowerTriangular CSC matrices
-function _ldiv!(L::LowerTriangularPlain, B::StridedVecOrMat)
-    A = L.data
-    unit = L isa UnitDiagonalTriangular
-
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-
-    joff = 0
-    for k = 1:ncolB
-        for j = 1:nrowB
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-
-            # find diagonal element
-            ii = searchsortedfirst(ja, j, i1, i2, Base.Order.Forward)
-            jai = ii > i2 ? zero(eltype(ja)) : ja[ii]
-
-            bj = B[joff + j]
-            # check for zero pivot and divide with pivot
-            if jai == j
-                if !unit
-                    bj /= aa[ii]
-                    B[joff + j] = bj
-                end
-                ii += 1
-            elseif !unit
-                throw(LinearAlgebra.SingularException(j))
-            end
-
-            # update remaining part
-            for i = ii:i2
-                B[joff + ja[i]] -= bj * aa[i]
-            end
-        end
-        joff += nrowB
-    end
-    B
-end
-
-# backward substitution for UpperTriangular CSC matrices
-function _ldiv!(U::UpperTriangularPlain, B::StridedVecOrMat)
-    A = U.data
-    unit = U isa UnitDiagonalTriangular
-
-    nrowB, ncolB = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-
-    joff = 0
-    for k = 1:ncolB
-        for j = nrowB:-1:1
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-
-            # find diagonal element
-            ii = searchsortedlast(ja, j, i1, i2, Base.Order.Forward)
-            jai = ii < i1 ? zero(eltype(ja)) : ja[ii]
-
-            bj = B[joff + j]
-            # check for zero pivot and divide with pivot
-            if jai == j
-                if !unit
-                    bj /= aa[ii]
-                    B[joff + j] = bj
-                end
-                ii -= 1
-            elseif !unit
-                throw(LinearAlgebra.SingularException(j))
-            end
-
-            # update remaining part
-            for i = ii:-1:i1
-                B[joff + ja[i]] -= bj * aa[i]
-            end
-        end
-        joff += nrowB
-    end
-    B
-end
-
-# forward substitution for adjoint and transpose of UpperTriangular CSC matrices
-function _ldiv!(L::LowerTriangularWrapped, B::StridedVecOrMat)
-    A = L.parent.data
-    unit = L.parent isa UnitDiagonalTriangular
-    adj = L isa Adjoint
-
-    nrowB, ncolB  = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-
-    joff = 0
-    for k = 1:ncolB
-        for j = 1:nrowB
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-            akku = B[joff + j]
-            done = false
-
-            # loop through column j of A - only structural non-zeros
-            for ii = i1:i2
-                jai = ja[ii]
-                if jai < j
-                    aai = possible_adjoint(adj, aa[ii])
-                    akku -= B[joff + jai] * aai
-                elseif jai == j
-                    if !unit
-                        aai = possible_adjoint(adj, aa[ii])
-                        akku /= aai
-                    end
-                    done = true
-                    break
-                else
-                    break
-                end
-            end
-            if !done && !unit
-                throw(LinearAlgebra.SingularException(j))
-            end
-            B[joff + j] = akku
-        end
-        joff += nrowB
-    end
-    B
-end
-
-# backward substitution for adjoint and transpose of LowerTriangular CSC matrices
-function _ldiv!(U::UpperTriangularWrapped, B::StridedVecOrMat)
-    A = U.parent.data
-    unit = U.parent isa UnitDiagonalTriangular
-    adj = U isa Adjoint
-
-    nrowB, ncolB = size(B, 1), size(B, 2)
-    aa = getnzval(A)
-    ja = getrowval(A)
-    ia = getcolptr(A)
-
-    joff = 0
-    for k = 1:ncolB
-        for j = nrowB:-1:1
-            i1 = ia[j]
-            i2 = ia[j + 1] - 1
-            akku = B[joff + j]
-            done = false
-
-            # loop through column j of A - only structural non-zeros
-            for ii = i2:-1:i1
-                jai = ja[ii]
-                if jai > j
-                    aai = possible_adjoint(adj, aa[ii])
-                    akku -= B[joff + jai] * aai
-                elseif jai == j
-                    if !unit
-                        aai = possible_adjoint(adj, aa[ii])
-                        akku /= aai
-                    end
-                    done = true
-                    break
-                else
-                    break
-                end
-            end
-            if !done && !unit
-                throw(LinearAlgebra.SingularException(j))
-            end
-            B[joff + j] = akku
-        end
-        joff += nrowB
-    end
-    B
-end
-
-(\)(L::TriangularSparse, B::AbstractSparseMatrixCSC) = ldiv!(L, Array(B))
-#(*)(L::TriangularSparse, B::AbstractSparseMatrixCSC) = lmul!(L, Array(B))
-
-## end of triangular
-
-# y .= A * x
-mul!(y::StridedVecOrMat, A::SparseMatrixCSCSymmHerm, x::StridedVecOrMat) = mul!(y,A,x,1,0)
-
-# C .= α * A * B + β * C
-function mul!(C::StridedVecOrMat{T}, sA::SparseMatrixCSCSymmHerm, B::StridedVecOrMat,
-              α::Number, β::Number) where T
-    fuplo = sA.uplo == 'U' ? nzrangeup : nzrangelo
-    _mul!(fuplo, C, sA, B, T(α), T(β))
-end
-
-function _mul!(nzrang::Function, C::StridedVecOrMat{T}, sA, B, α, β) where T
-    A = sA.data
-    n = size(A, 2)
-    m = size(B, 2)
-    n == size(B, 1) == size(C, 1) && m == size(C, 2) || throw(DimensionMismatch())
-    rv = rowvals(A)
-    nzv = nonzeros(A)
-    let z = T(0), sumcol=z, αxj=z, aarc=z, α = α
-        if β != 1
-            β != 0 ? rmul!(C, β) : fill!(C, z)
-        end
-        @inbounds for k = 1:m
-            for col = 1:n
-                αxj = B[col,k] * α
-                sumcol = z
-                for j = nzrang(A, col)
-                    row = rv[j]
-                    aarc = nzv[j]
-                    if row == col
-                        sumcol += (sA isa Hermitian ? real : identity)(aarc) * B[row,k]
-                    else
-                        C[row,k] += aarc * αxj
-                        sumcol += (sA isa Hermitian ? adjoint : transpose)(aarc) * B[row,k]
-                    end
-                end
-                C[col,k] += α * sumcol
-            end
-        end
-    end
-    C
-end
-
-# row range up to and including diagonal
-function nzrangeup(A, i)
-    r = nzrange(A, i); r1 = r.start; r2 = r.stop
-    rv = rowvals(A)
-    @inbounds r2 < r1 || rv[r2] <= i ? r : r1:searchsortedlast(rv, i, r1, r2, Forward)
-end
-# row range from diagonal (included) to end
-function nzrangelo(A, i)
-    r = nzrange(A, i); r1 = r.start; r2 = r.stop
-    rv = rowvals(A)
-    @inbounds r2 < r1 || rv[r1] >= i ? r : searchsortedfirst(rv, i, r1, r2, Forward):r2
-end
-## end of symmetric/Hermitian
-
-\(A::Transpose{<:Real,<:Hermitian{<:Real,<:AbstractSparseMatrixCSC}}, B::Vector) = A.parent \ B
-\(A::Transpose{<:Complex,<:Hermitian{<:Complex,<:AbstractSparseMatrixCSC}}, B::Vector) = copy(A) \ B
-\(A::Transpose{<:Number,<:Symmetric{<:Number,<:AbstractSparseMatrixCSC}}, B::Vector) = A.parent \ B
-
-function rdiv!(A::AbstractSparseMatrixCSC{T}, D::Diagonal{T}) where T
-    dd = D.diag
-    if (k = length(dd)) ≠ size(A, 2)
-        throw(DimensionMismatch("size(A, 2)=$(size(A, 2)) should be size(D, 1)=$k"))
-    end
-    nonz = nonzeros(A)
-    @inbounds for j in 1:k
-        ddj = dd[j]
-        if iszero(ddj)
-            throw(LinearAlgebra.SingularException(j))
-        end
-        for i in nzrange(A, j)
-            nonz[i] /= ddj
-        end
-    end
-    A
-end
-
-rdiv!(A::AbstractSparseMatrixCSC{T}, adjD::Adjoint{<:Any,<:Diagonal{T}}) where {T} =
-    (D = adjD.parent; rdiv!(A, conj(D)))
-rdiv!(A::AbstractSparseMatrixCSC{T}, transD::Transpose{<:Any,<:Diagonal{T}}) where {T} =
-    (D = transD.parent; rdiv!(A, D))
-
-function ldiv!(D::Diagonal{T}, A::AbstractSparseMatrixCSC{T}) where {T}
-    # require_one_based_indexing(A)
-    if size(A, 1) != length(D.diag)
-        throw(DimensionMismatch("diagonal matrix is $(length(D.diag)) by $(length(D.diag)) but right hand side has $(size(A, 1)) rows"))
-    end
-    nonz = nonzeros(A)
-    Arowval = rowvals(A)
-    b = D.diag
-    for i=1:length(b)
-        iszero(b[i]) && throw(SingularException(i))
-    end
-    @inbounds for col = 1:size(A, 2), p = getcolptr(A)[col]:(getcolptr(A)[col + 1] - 1)
-        nonz[p] = b[Arowval[p]] \ nonz[p]
-    end
-    A
-end
-ldiv!(adjD::Adjoint{<:Any,<:Diagonal{T}}, A::AbstractSparseMatrixCSC{T}) where {T} =
-    (D = adjD.parent; ldiv!(conj(D), A))
-ldiv!(transD::Transpose{<:Any,<:Diagonal{T}}, A::AbstractSparseMatrixCSC{T}) where {T} =
-    (D = transD.parent; ldiv!(D, A))
-
-## triu, tril
-
-function triu(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
-    m,n = size(S)
-    colptr = Vector{Ti}(undef, n+1)
-    nnz = 0
-    for col = 1 : min(max(k+1,1), n+1)
-        colptr[col] = 1
-    end
-    for col = max(k+1,1) : n
-        for c1 = getcolptr(S)[col] : getcolptr(S)[col+1]-1
-            rowvals(S)[c1] > col - k && break
-            nnz += 1
-        end
-        colptr[col+1] = nnz+1
-    end
-    rowval = Vector{Ti}(undef, nnz)
-    nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
-    for col = max(k+1,1) : n
-        c1 = getcolptr(S)[col]
-        for c2 = getcolptr(A)[col] : getcolptr(A)[col+1]-1
-            rowvals(A)[c2] = rowvals(S)[c1]
-            nonzeros(A)[c2] = nonzeros(S)[c1]
-            c1 += 1
-        end
-    end
-    A
-end
-
-function tril(S::AbstractSparseMatrixCSC{Tv,Ti}, k::Integer=0) where {Tv,Ti}
-    m,n = size(S)
-    colptr = Vector{Ti}(undef, n+1)
-    nnz = 0
-    colptr[1] = 1
-    for col = 1 : min(n, m+k)
-        l1 = getcolptr(S)[col+1]-1
-        for c1 = 0 : (l1 - getcolptr(S)[col])
-            rowvals(S)[l1 - c1] < col - k && break
-            nnz += 1
-        end
-        colptr[col+1] = nnz+1
-    end
-    for col = max(min(n, m+k)+2,1) : n+1
-        colptr[col] = nnz+1
-    end
-    rowval = Vector{Ti}(undef, nnz)
-    nzval = Vector{Tv}(undef, nnz)
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
-    for col = 1 : min(n, m+k)
-        c1 = getcolptr(S)[col+1]-1
-        l2 = getcolptr(A)[col+1]-1
-        for c2 = 0 : l2 - getcolptr(A)[col]
-            rowvals(A)[l2 - c2] = rowvals(S)[c1]
-            nonzeros(A)[l2 - c2] = nonzeros(S)[c1]
-            c1 -= 1
-        end
-    end
-    A
-end
-
-## diff
-
-function sparse_diff1(S::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    m,n = size(S)
-    m > 1 || return SparseMatrixCSC(0, n, fill(one(Ti),n+1), Ti[], Tv[])
-    colptr = Vector{Ti}(undef, n+1)
-    numnz = 2 * nnz(S) # upper bound; will shrink later
-    rowval = Vector{Ti}(undef, numnz)
-    nzval = Vector{Tv}(undef, numnz)
-    numnz = 0
-    colptr[1] = 1
-    for col = 1 : n
-        last_row = 0
-        last_val = 0
-        for k = getcolptr(S)[col] : getcolptr(S)[col+1]-1
-            row = rowvals(S)[k]
-            val = nonzeros(S)[k]
-            if row > 1
-                if row == last_row + 1
-                    nzval[numnz] += val
-                    nzval[numnz]==zero(Tv) && (numnz -= 1)
-                else
-                    numnz += 1
-                    rowval[numnz] = row - 1
-                    nzval[numnz] = val
-                end
-            end
-            if row < m
-                numnz += 1
-                rowval[numnz] = row
-                nzval[numnz] = -val
-            end
-            last_row = row
-            last_val = val
-        end
-        colptr[col+1] = numnz+1
-    end
-    deleteat!(rowval, numnz+1:length(rowval))
-    deleteat!(nzval, numnz+1:length(nzval))
-    return SparseMatrixCSC(m-1, n, colptr, rowval, nzval)
-end
-
-function sparse_diff2(a::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    m,n = size(a)
-    colptr = Vector{Ti}(undef, max(n,1))
-    numnz = 2 * nnz(a) # upper bound; will shrink later
-    rowval = Vector{Ti}(undef, numnz)
-    nzval = Vector{Tv}(undef, numnz)
-
-    z = zero(Tv)
-
-    colptr_a = getcolptr(a)
-    rowval_a = rowvals(a)
-    nzval_a = nonzeros(a)
-
-    ptrS = 1
-    colptr[1] = 1
-
-    n == 0 && return SparseMatrixCSC(m, n, colptr, rowval, nzval)
-
-    startA = colptr_a[1]
-    stopA = colptr_a[2]
-
-    rA = startA : stopA - 1
-    rowvalA = rowval_a[rA]
-    nzvalA = nzval_a[rA]
-    lA = stopA - startA
-
-    for col = 1:n-1
-        startB, stopB = startA, stopA
-        startA = colptr_a[col+1]
-        stopA = colptr_a[col+2]
-
-        rowvalB = rowvalA
-        nzvalB = nzvalA
-        lB = lA
-
-        rA = startA : stopA - 1
-        rowvalA = rowval_a[rA]
-        nzvalA = nzval_a[rA]
-        lA = stopA - startA
-
-        ptrB = 1
-        ptrA = 1
-
-        while ptrA <= lA && ptrB <= lB
-            rowA = rowvalA[ptrA]
-            rowB = rowvalB[ptrB]
-            if rowA < rowB
-                rowval[ptrS] = rowA
-                nzval[ptrS] = nzvalA[ptrA]
-                ptrS += 1
-                ptrA += 1
-            elseif rowB < rowA
-                rowval[ptrS] = rowB
-                nzval[ptrS] = -nzvalB[ptrB]
-                ptrS += 1
-                ptrB += 1
-            else
-                res = nzvalA[ptrA] - nzvalB[ptrB]
-                if res != z
-                    rowval[ptrS] = rowA
-                    nzval[ptrS] = res
-                    ptrS += 1
-                end
-                ptrA += 1
-                ptrB += 1
-            end
-        end
-
-        while ptrA <= lA
-            rowval[ptrS] = rowvalA[ptrA]
-            nzval[ptrS] = nzvalA[ptrA]
-            ptrS += 1
-            ptrA += 1
-        end
-
-        while ptrB <= lB
-            rowval[ptrS] = rowvalB[ptrB]
-            nzval[ptrS] = -nzvalB[ptrB]
-            ptrS += 1
-            ptrB += 1
-        end
-
-        colptr[col+1] = ptrS
-    end
-    deleteat!(rowval, ptrS:length(rowval))
-    deleteat!(nzval, ptrS:length(nzval))
-    return SparseMatrixCSC(m, n-1, colptr, rowval, nzval)
-end
-
-diff(a::AbstractSparseMatrixCSC; dims::Integer) = dims==1 ? sparse_diff1(a) : sparse_diff2(a)
-
-## norm and rank
-norm(A::AbstractSparseMatrixCSC, p::Real=2) = norm(view(nonzeros(A), 1:nnz(A)), p)
-
-function opnorm(A::AbstractSparseMatrixCSC, p::Real=2)
-    m, n = size(A)
-    if m == 0 || n == 0 || isempty(A)
-        return float(real(zero(eltype(A))))
-    elseif m == 1
-        if p == 1
-            return norm(nzvalview(A), Inf)
-        elseif p == 2
-            return norm(nzvalview(A), 2)
-        elseif p == Inf
-            return norm(nzvalview(A), 1)
-        end
-    elseif n == 1 && p in (1, 2, Inf)
-        return norm(nzvalview(A), p)
-    else
-        Tnorm = typeof(float(real(zero(eltype(A)))))
-        Tsum = promote_type(Float64,Tnorm)
-        if p==1
-            nA::Tsum = 0
-            for j=1:n
-                colSum::Tsum = 0
-                for i = getcolptr(A)[j]:getcolptr(A)[j+1]-1
-                    colSum += abs(nonzeros(A)[i])
-                end
-                nA = max(nA, colSum)
-            end
-            return convert(Tnorm, nA)
-        elseif p==2
-            throw(ArgumentError("2-norm not yet implemented for sparse matrices. Try opnorm(Array(A)) or opnorm(A, p) where p=1 or Inf."))
-        elseif p==Inf
-            rowSum = zeros(Tsum,m)
-            for i=1:length(nonzeros(A))
-                rowSum[rowvals(A)[i]] += abs(nonzeros(A)[i])
-            end
-            return convert(Tnorm, maximum(rowSum))
-        end
-    end
-    throw(ArgumentError("invalid operator p-norm p=$p. Valid: 1, Inf"))
-end
-
-# TODO rank
-
-# cond
-function cond(A::AbstractSparseMatrixCSC, p::Real=2)
-    if p == 1
-        normAinv = opnormestinv(A)
-        normA = opnorm(A, 1)
-        return normA * normAinv
-    elseif p == Inf
-        normAinv = opnormestinv(copy(A'))
-        normA = opnorm(A, Inf)
-        return normA * normAinv
-    elseif p == 2
-        throw(ArgumentError("2-norm condition number is not implemented for sparse matrices, try cond(Array(A), 2) instead"))
-    else
-        throw(ArgumentError("second argument must be either 1 or Inf, got $p"))
-    end
-end
-
-function opnormestinv(A::AbstractSparseMatrixCSC{T}, t::Integer = min(2,maximum(size(A)))) where T
-    maxiter = 5
-    # Check the input
-    n = checksquare(A)
-    F = factorize(A)
-    if t <= 0
-        throw(ArgumentError("number of blocks must be a positive integer"))
-    end
-    if t > n
-        throw(ArgumentError("number of blocks must not be greater than $n"))
-    end
-    ind = Vector{Int64}(undef, n)
-    ind_hist = Vector{Int64}(undef, maxiter * t)
-
-    Ti = typeof(float(zero(T)))
-
-    S = zeros(T <: Real ? Int : Ti, n, t)
-
-    function _any_abs_eq(v,n::Int)
-        for vv in v
-            if abs(vv)==n
-                return true
-            end
-        end
-        return false
-    end
-
-    # Generate the block matrix
-    X = Matrix{Ti}(undef, n, t)
-    X[1:n,1] .= 1
-    for j = 2:t
-        while true
-            rand!(view(X,1:n,j), (-1, 1))
-            yaux = X[1:n,j]' * X[1:n,1:j-1]
-            if !_any_abs_eq(yaux,n)
-                break
-            end
-        end
-    end
-    rmul!(X, inv(n))
-
-    iter = 0
-    local est
-    local est_old
-    est_ind = 0
-    while iter < maxiter
-        iter += 1
-        Y = F \ X
-        est = zero(real(eltype(Y)))
-        est_ind = 0
-        for i = 1:t
-            y = norm(Y[1:n,i], 1)
-            if y > est
-                est = y
-                est_ind = i
-            end
-        end
-        if iter == 1
-            est_old = est
-        end
-        if est > est_old || iter == 2
-            ind_best = est_ind
-        end
-        if iter >= 2 && est <= est_old
-            est = est_old
-            break
-        end
-        est_old = est
-        S_old = copy(S)
-        for j = 1:t
-            for i = 1:n
-                S[i,j] = Y[i,j]==0 ? one(Y[i,j]) : sign(Y[i,j])
-            end
-        end
-
-        if T <: Real
-            # Check whether cols of S are parallel to cols of S or S_old
-            for j = 1:t
-                while true
-                    repeated = false
-                    if j > 1
-                        saux = S[1:n,j]' * S[1:n,1:j-1]
-                        if _any_abs_eq(saux,n)
-                            repeated = true
-                        end
-                    end
-                    if !repeated
-                        saux2 = S[1:n,j]' * S_old[1:n,1:t]
-                        if _any_abs_eq(saux2,n)
-                            repeated = true
-                        end
-                    end
-                    if repeated
-                        rand!(view(S,1:n,j), (-1, 1))
-                    else
-                        break
-                    end
-                end
-            end
-        end
-
-        # Use the conjugate transpose
-        Z = F' \ S
-        h_max = zero(real(eltype(Z)))
-        h = zeros(real(eltype(Z)), n)
-        h_ind = 0
-        for i = 1:n
-            h[i] = norm(Z[i,1:t], Inf)
-            if h[i] > h_max
-                h_max = h[i]
-                h_ind = i
-            end
-            ind[i] = i
-        end
-        if iter >=2 && ind_best == h_ind
-            break
-        end
-        p = sortperm(h, rev=true)
-        h = h[p]
-        permute!(ind, p)
-        if t > 1
-            addcounter = t
-            elemcounter = 0
-            while addcounter > 0 && elemcounter < n
-                elemcounter = elemcounter + 1
-                current_element = ind[elemcounter]
-                found = false
-                for i = 1:t * (iter - 1)
-                    if current_element == ind_hist[i]
-                        found = true
-                        break
-                    end
-                end
-                if !found
-                    addcounter = addcounter - 1
-                    for i = 1:current_element - 1
-                        X[i,t-addcounter] = 0
-                    end
-                    X[current_element,t-addcounter] = 1
-                    for i = current_element + 1:n
-                        X[i,t-addcounter] = 0
-                    end
-                    ind_hist[iter * t - addcounter] = current_element
-                else
-                    if elemcounter == t && addcounter == t
-                        break
-                    end
-                end
-            end
-        else
-            ind_hist[1:t] = ind[1:t]
-            for j = 1:t
-                for i = 1:ind[j] - 1
-                    X[i,j] = 0
-                end
-                X[ind[j],j] = 1
-                for i = ind[j] + 1:n
-                    X[i,j] = 0
-                end
-            end
-        end
-    end
-    return est
-end
-
-## kron
-@inline function kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
-    nnzC = nnz(A)*nnz(B)
-    mA, nA = size(A); mB, nB = size(B)
-    mC, nC = mA*mB, nA*nB
-
-    rowvalC = rowvals(C)
-    nzvalC = nonzeros(C)
-    colptrC = getcolptr(C)
-
-    @boundscheck begin
-        length(colptrC) == nC+1 || throw(DimensionMismatch("expect C to be preallocated with $(nC+1) colptrs "))
-        length(rowvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) rowvals"))
-        length(nzvalC) == nnzC || throw(DimensionMismatch("expect C to be preallocated with $(nnzC) nzvals"))
-    end
-
-    col = 1
-    @inbounds for j = 1:nA
-        startA = getcolptr(A)[j]
-        stopA = getcolptr(A)[j+1] - 1
-        lA = stopA - startA + 1
-        for i = 1:nB
-            startB = getcolptr(B)[i]
-            stopB = getcolptr(B)[i+1] - 1
-            lB = stopB - startB + 1
-            ptr_range = (1:lB) .+ (colptrC[col]-1)
-            colptrC[col+1] = colptrC[col] + lA*lB
-            col += 1
-            for ptrA = startA : stopA
-                ptrB = startB
-                for ptr = ptr_range
-                    rowvalC[ptr] = (rowvals(A)[ptrA]-1)*mB + rowvals(B)[ptrB]
-                    nzvalC[ptr] = nonzeros(A)[ptrA] * nonzeros(B)[ptrB]
-                    ptrB += 1
-                end
-                ptr_range = ptr_range .+ lB
-            end
-        end
-    end
-    return C
-end
-
-@inline function kron!(z::SparseVector, x::SparseVector, y::SparseVector)
-    nnzx = nnz(x); nnzy = nnz(y); nnzz = nnz(z);
-    nzind = nonzeroinds(z)
-    nzval = nonzeros(z)
-
-    @boundscheck begin
-        nnzval = length(nzval); nnzind = length(nzind)
-        nnzz = nnzx*nnzy
-        nnzval == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
-        nnzind == nnzz || throw(DimensionMismatch("expect z to be preallocated with $nnzz nonzeros"))
-    end
-
-    @inbounds for i = 1:nnzx, j = 1:nnzy
-        this_ind = (i-1)*nnzy+j
-        nzind[this_ind] = (nonzeroinds(x)[i]-1)*length(y) + nonzeroinds(y)[j]
-        nzval[this_ind] = nonzeros(x)[i] * nonzeros(y)[j]
-    end
-    return z
-end
-
-# sparse matrix ⊗ sparse matrix
-function kron(A::AbstractSparseMatrixCSC{T1,S1}, B::AbstractSparseMatrixCSC{T2,S2}) where {T1,S1,T2,S2}
-    nnzC = nnz(A)*nnz(B)
-    mA, nA = size(A); mB, nB = size(B)
-    mC, nC = mA*mB, nA*nB
-    Tv = typeof(one(T1)*one(T2))
-    Ti = promote_type(S1,S2)
-    colptrC = Vector{Ti}(undef, nC+1)
-    rowvalC = Vector{Ti}(undef, nnzC)
-    nzvalC  = Vector{Tv}(undef, nnzC)
-    colptrC[1] = 1
-    # skip sparse_check
-    C = SparseMatrixCSC{Tv, Ti}(mC, nC, colptrC, rowvalC, nzvalC)
-    return @inbounds kron!(C, A, B)
-end
-
-# sparse vector ⊗ sparse vector
-function kron(x::SparseVector{T1,S1}, y::SparseVector{T2,S2}) where {T1,S1,T2,S2}
-    nnzx = nnz(x); nnzy = nnz(y)
-    nnzz = nnzx*nnzy # number of nonzeros in new vector
-    nzind = Vector{promote_type(S1,S2)}(undef, nnzz) # the indices of nonzeros
-    nzval = Vector{typeof(one(T1)*one(T2))}(undef, nnzz) # the values of nonzeros
-    z = SparseVector(length(x)*length(y), nzind, nzval)
-    return @inbounds kron!(z, x, y)
-end
-
-# sparse matrix ⊗ sparse vector & vice versa
-Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::AbstractSparseMatrixCSC, x::SparseVector) = kron!(C, A, SparseMatrixCSC(x))
-Base.@propagate_inbounds kron!(C::SparseMatrixCSC, x::SparseVector, A::AbstractSparseMatrixCSC) = kron!(C, SparseMatrixCSC(x), A)
-
-kron(A::AbstractSparseMatrixCSC, x::SparseVector) = kron(A, SparseMatrixCSC(x))
-kron(x::SparseVector, A::AbstractSparseMatrixCSC) = kron(SparseMatrixCSC(x), A)
-
-# sparse vec/mat ⊗ vec/mat and vice versa
-Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::Union{SparseVector,AbstractSparseMatrixCSC}, B::VecOrMat) = kron!(C, A, sparse(B))
-Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::VecOrMat, B::Union{SparseVector,AbstractSparseMatrixCSC}) = kron!(C, sparse(A), B)
-
-kron(A::Union{SparseVector,AbstractSparseMatrixCSC}, B::VecOrMat) = kron(A, sparse(B))
-kron(A::VecOrMat, B::Union{SparseVector,AbstractSparseMatrixCSC}) = kron(sparse(A), B)
-
-# sparse vec/mat ⊗ Diagonal and vice versa
-Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::Diagonal{T}, B::Union{SparseVector{S}, AbstractSparseMatrixCSC{S}}) where {T<:Number, S<:Number} = kron!(C, sparse(A), B)
-Base.@propagate_inbounds kron!(C::SparseMatrixCSC, A::Union{SparseVector{T}, AbstractSparseMatrixCSC{T}}, B::Diagonal{S}) where {T<:Number, S<:Number} = kron!(C, A, sparse(B))
-
-kron(A::Diagonal{T}, B::Union{SparseVector{S}, AbstractSparseMatrixCSC{S}}) where {T<:Number, S<:Number} = kron(sparse(A), B)
-kron(A::Union{SparseVector{T}, AbstractSparseMatrixCSC{T}}, B::Diagonal{S}) where {T<:Number, S<:Number} = kron(A, sparse(B))
-
-# sparse outer product
-kron!(C::SparseMatrixCSC, A::SparseVectorUnion, B::AdjOrTransSparseVectorUnion) = broadcast!(*, C, A, B)
-kron(A::SparseVectorUnion, B::AdjOrTransSparseVectorUnion) = A .* B
-
-## det, inv, cond
-
-inv(A::AbstractSparseMatrixCSC) = error("The inverse of a sparse matrix can often be dense and can cause the computer to run out of memory. If you are sure you have enough memory, please convert your matrix to a dense matrix, e.g. by calling `Matrix`.")
-
-# TODO
-
-## scale methods
-
-# Copy colptr and rowval from one sparse matrix to another
-function copyinds!(C::AbstractSparseMatrixCSC, A::AbstractSparseMatrixCSC)
-    if getcolptr(C) !== getcolptr(A)
-        resize!(getcolptr(C), length(getcolptr(A)))
-        copyto!(getcolptr(C), getcolptr(A))
-    end
-    if rowvals(C) !== rowvals(A)
-        resize!(rowvals(C), length(rowvals(A)))
-        copyto!(rowvals(C), rowvals(A))
-    end
-end
-
-# multiply by diagonal matrix as vector
-function mul!(C::AbstractSparseMatrixCSC, A::AbstractSparseMatrixCSC, D::Diagonal)
-    m, n = size(A)
-    b    = D.diag
-    (n==length(b) && size(A)==size(C)) || throw(DimensionMismatch())
-    copyinds!(C, A)
-    Cnzval = nonzeros(C)
-    Anzval = nonzeros(A)
-    resize!(Cnzval, length(Anzval))
-    for col = 1:n, p = getcolptr(A)[col]:(getcolptr(A)[col+1]-1)
-        @inbounds Cnzval[p] = Anzval[p] * b[col]
-    end
-    C
-end
-
-function mul!(C::AbstractSparseMatrixCSC, D::Diagonal, A::AbstractSparseMatrixCSC)
-    m, n = size(A)
-    b    = D.diag
-    (m==length(b) && size(A)==size(C)) || throw(DimensionMismatch())
-    copyinds!(C, A)
-    Cnzval = nonzeros(C)
-    Anzval = nonzeros(A)
-    Arowval = rowvals(A)
-    resize!(Cnzval, length(Anzval))
-    for col = 1:n, p = getcolptr(A)[col]:(getcolptr(A)[col+1]-1)
-        @inbounds Cnzval[p] = b[Arowval[p]] * Anzval[p]
-    end
-    C
-end
-
-function mul!(C::AbstractSparseMatrixCSC, A::AbstractSparseMatrixCSC, b::Number)
-    size(A)==size(C) || throw(DimensionMismatch())
-    copyinds!(C, A)
-    resize!(nonzeros(C), length(nonzeros(A)))
-    mul!(nonzeros(C), nonzeros(A), b)
-    C
-end
-
-function mul!(C::AbstractSparseMatrixCSC, b::Number, A::AbstractSparseMatrixCSC)
-    size(A)==size(C) || throw(DimensionMismatch())
-    copyinds!(C, A)
-    resize!(nonzeros(C), length(nonzeros(A)))
-    mul!(nonzeros(C), b, nonzeros(A))
-    C
-end
-
-function rmul!(A::AbstractSparseMatrixCSC, b::Number)
-    rmul!(nonzeros(A), b)
-    return A
-end
-
-function lmul!(b::Number, A::AbstractSparseMatrixCSC)
-    lmul!(b, nonzeros(A))
-    return A
-end
-
-function rmul!(A::AbstractSparseMatrixCSC, D::Diagonal)
-    m, n = size(A)
-    (n == size(D, 1)) || throw(DimensionMismatch())
-    Anzval = nonzeros(A)
-    @inbounds for col = 1:n, p = getcolptr(A)[col]:(getcolptr(A)[col + 1] - 1)
-         Anzval[p] = Anzval[p] * D.diag[col]
-    end
-    return A
-end
-
-function lmul!(D::Diagonal, A::AbstractSparseMatrixCSC)
-    m, n = size(A)
-    (m == size(D, 2)) || throw(DimensionMismatch())
-    Anzval = nonzeros(A)
-    Arowval = rowvals(A)
-    @inbounds for col = 1:n, p = getcolptr(A)[col]:(getcolptr(A)[col + 1] - 1)
-        Anzval[p] = D.diag[Arowval[p]] * Anzval[p]
-    end
-    return A
-end
-
-function \(A::AbstractSparseMatrixCSC, B::AbstractVecOrMat)
-    require_one_based_indexing(A, B)
-    m, n = size(A)
-    if m == n
-        if istril(A)
-            if istriu(A)
-                return \(Diagonal(Vector(diag(A))), B)
-            else
-                return \(LowerTriangular(A), B)
-            end
-        elseif istriu(A)
-            return \(UpperTriangular(A), B)
-        end
-        if ishermitian(A)
-            return \(Hermitian(A), B)
-        end
-        return \(lu(A), B)
-    else
-        return \(qr(A), B)
-    end
-end
-for (xformtype, xformop) in ((:Adjoint, :adjoint), (:Transpose, :transpose))
-    @eval begin
-        function \(xformA::($xformtype){<:Any,<:AbstractSparseMatrixCSC}, B::AbstractVecOrMat)
-            A = xformA.parent
-            require_one_based_indexing(A, B)
-            m, n = size(A)
-            if m == n
-                if istril(A)
-                    if istriu(A)
-                        return \($xformop(Diagonal(Vector(diag(A)))), B)
-                    else
-                        return \($xformop(LowerTriangular(A)), B)
-                    end
-                elseif istriu(A)
-                    return \($xformop(UpperTriangular(A)), B)
-                end
-                if ishermitian(A)
-                    return \($xformop(Hermitian(A)), B)
-                end
-                return \($xformop(lu(A)), B)
-            else
-                return \($xformop(qr(A)), B)
-            end
-        end
-    end
-end
-
-function factorize(A::AbstractSparseMatrixCSC)
-    m, n = size(A)
-    if m == n
-        if istril(A)
-            if istriu(A)
-                return Diagonal(A)
-            else
-                return LowerTriangular(A)
-            end
-        elseif istriu(A)
-            return UpperTriangular(A)
-        end
-        if ishermitian(A)
-            return factorize(Hermitian(A))
-        end
-        return lu(A)
-    else
-        return qr(A)
-    end
-end
-
-# function factorize(A::Symmetric{Float64,AbstractSparseMatrixCSC{Float64,Ti}}) where Ti
-#     F = cholesky(A)
-#     if LinearAlgebra.issuccess(F)
-#         return F
-#     else
-#         ldlt!(F, A)
-#         return F
-#     end
-# end
-function factorize(A::LinearAlgebra.RealHermSymComplexHerm{Float64,<:AbstractSparseMatrixCSC})
-    F = cholesky(A; check = false)
-    if LinearAlgebra.issuccess(F)
-        return F
-    else
-        ldlt!(F, A)
-        return F
-    end
-end
-
-eigen(A::AbstractSparseMatrixCSC) =
-    error("eigen(A) not supported for sparse matrices. Use for example eigs(A) from the Arpack package instead.")
diff --git a/stdlib/SparseArrays/src/sparseconvert.jl b/stdlib/SparseArrays/src/sparseconvert.jl
deleted file mode 100644
index e235d332ac291e..00000000000000
--- a/stdlib/SparseArrays/src/sparseconvert.jl
+++ /dev/null
@@ -1,286 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-import LinearAlgebra: AbstractTriangular
-
-"""
-    SparseMatrixCSCSymmHerm
-
-`Symmetric` or `Hermitian` of a `SparseMatrixCSC` or `SparseMatrixCSCView`.
-"""
-const SparseMatrixCSCSymmHerm{Tv,Ti} = Union{Symmetric{Tv,<:SparseMatrixCSCUnion{Tv,Ti}},
-                                            Hermitian{Tv,<:SparseMatrixCSCUnion{Tv,Ti}}}
-
-const AbstractTriangularSparse{Tv,Ti} = AbstractTriangular{Tv,<:SparseMatrixCSCUnion{Tv,Ti}}
-
-# converting Symmetric/Hermitian/AbstractTriangular/SubArray of SparseMatrixCSC
-# and Transpose/Adjoint of AbstractTriangular of SparseMatrixCSC to SparseMatrixCSC
-for wr in (Symmetric, Hermitian, Transpose, Adjoint,
-           UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular,
-           SubArray)
-
-    @eval SparseMatrixCSC(A::$wr) = _sparsem(A)
-    @eval SparseMatrixCSC{Tv}(A::$wr{Tv}) where Tv = _sparsem(A)
-    @eval SparseMatrixCSC{Tv}(A::$wr) where Tv = SparseMatrixCSC{Tv}(_sparsem(A))
-    @eval SparseMatrixCSC{Tv,Ti}(A::$wr) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(_sparsem(A))
-end
-
-"""
-    iswrsparse(::S)
-    iswrsparse(::Type{S})
-
-Returns `true` if type `S` is backed by a sparse array, and `false` otherwise.
-"""
-iswrsparse(::T) where T<:AbstractArray = iswrsparse(T)
-iswrsparse(::Type) = false
-iswrsparse(::Type{T}) where T<:AbstractSparseArray = true
-
-"""
-    depth(::Type{S})
-
-Returns 0 for unwrapped S, and nesting depth for wrapped (nested) abstract arrays.
-"""
-depth(::T) where T = depth(T)
-depth(::Type{T}) where T<:AbstractArray = 0
-
-for wr in (Symmetric, Hermitian,
-           LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitUpperTriangular,
-           Transpose, Adjoint, SubArray,
-           Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal)
-
-    pl = wr === SubArray ? :($wr{<:Any,<:Any,T}) : :($wr{<:Any,T})
-    @eval iswrsparse(::Type{<:$pl}) where T = iswrsparse(T)
-    @eval depth(::Type{<:$pl}) where T = depth(T) + 1
-end
-
-# convert parent and re-wrap in same wrapper
-_sparsewrap(A::Symmetric) = Symmetric(_sparsem(parent(A)), A.uplo == 'U' ? :U : :L)
-_sparsewrap(A::Hermitian) = Hermitian(_sparsem(parent(A)), A.uplo == 'U' ? :U : :L)
-_sparsewrap(A::SubArray) = SubArray(_sparsem(parent(A)), A.indices)
-for ty in ( LowerTriangular, UnitLowerTriangular,
-            UpperTriangular, UnitUpperTriangular,
-            Transpose, Adjoint)
-
-    @eval _sparsewrap(A::$ty) = $ty(_sparsem(parent(A)))
-end
-function _sparsewrap(A::Union{Diagonal,Bidiagonal,Tridiagonal,SymTridiagonal})
-    dropzeros!(sparse(A))
-end
-
-"""
-    unwrap(A::AbstractMatrix)
-
-In case A is a wrapper type (`SubArray, Symmetric, Adjoint, SubArray, Triangular, Tridiagonal`, etc.)
-convert to `Matrix` or `SparseMatrixCSC`, depending on final storage type of A.
-For other types return A itself.
-"""
-unwrap(A::Any) = A
-unwrap(A::AbstractMatrix) = iswrsparse(A) ? convert(SparseMatrixCSC, A) : convert(Array, A)
-
-# For pure sparse matrices and vectors return A.
-# For wrapped sparse matrices or vectors convert to SparseMatrixCSC.
-# Handle nested wrappers properly.
-# Use abstract matrix fallback if A is not sparse.
-function _sparsem(@nospecialize A::AbstractArray{Tv}) where Tv
-    if iswrsparse(A)
-        if depth(A) >= 1
-            _sparsem(_sparsewrap(A))
-        else
-            A
-        end
-    else
-        # explicitly call abstract matrix fallback using getindex(A,...)
-        invoke(SparseMatrixCSC{Tv,Int}, Tuple{AbstractMatrix}, A)
-    end
-end
-
-_sparsem(A::AbstractSparseMatrix) = A
-_sparsem(A::AbstractSparseVector) = A
-
-# Transpose/Adjoint of sparse vector (returning sparse matrix)
-function _sparsem(A::Union{Transpose{<:Any,<:AbstractSparseVector},Adjoint{<:Any,<:AbstractSparseVector}})
-    B = parent(A)
-    n = length(B)
-    Ti = eltype(nonzeroinds(B))
-    fadj = A isa Transpose ? transpose : adjoint
-    colptr = fill!(Vector{Ti}(undef, n + 1), 0)
-    colptr[1] = 1
-    colptr[nonzeroinds(B) .+ 1] .= 1
-    cumsum!(colptr, colptr)
-    rowval = fill!(similar(nonzeroinds(B)), 1)
-    nzval = fadj.(nonzeros(B))
-    SparseMatrixCSC(1, n, colptr, rowval, nzval)
-end
-
-function _sparsem(A::Union{Transpose{<:Any,<:AbstractSparseMatrixCSC},Adjoint{<:Any,<:AbstractSparseMatrixCSC}})
-    ftranspose(parent(A), A isa Transpose ? transpose : adjoint)
-end
-
-# Symmetric/Hermitian of sparse matrix
-_sparsem(A::SparseMatrixCSCSymmHerm) = _sparsem(A.uplo == 'U' ? nzrangeup : nzrangelo, A)
-# Triangular of sparse matrix
-_sparsem(A::UpperTriangular{T,<:AbstractSparseMatrix}) where T = triu(A.data)
-_sparsem(A::LowerTriangular{T,<:AbstractSparseMatrix}) where T = tril(A.data)
-# view of sparse matrix
-_sparsem(S::SubArray{<:Any,2,<:AbstractSparseMatrixCSC}) = getindex(S.parent,S.indices...)
-
-# 4 cases: (Symmetric|Hermitian) variants (:U|:L)
-function _sparsem(fnzrange::Function, sA::SparseMatrixCSCSymmHerm{Tv}) where {Tv}
-    A = sA.data
-    rowval = rowvals(A)
-    nzval = nonzeros(A)
-    m, n = size(A)
-    Ti = eltype(rowval)
-    fadj = sA isa Symmetric ? transpose : adjoint
-    newcolptr = Vector{Ti}(undef, n+1)
-    diagmap = fadj == transpose ? identity : real
-
-    newcolptr[1] = 1
-    colrange = fnzrange === nzrangeup ? (1:n) : (n:-1:1)
-    @inbounds for j = colrange
-        r = fnzrange(A, j); r1 = r.start; r2 = r.stop
-        newcolptr[j+1] = r2 - r1 + 1
-        for k = r1:r2
-            row = rowval[k]
-            if row != j
-                newcolptr[row+1] += 1
-            end
-        end
-    end
-    cumsum!(newcolptr, newcolptr)
-    nz = newcolptr[n+1] - 1
-    newrowval = Vector{Ti}(undef, nz)
-    newnzval = Vector{Tv}(undef, nz)
-    @inbounds for j = 1:n
-        newk = newcolptr[j]
-        for k = fnzrange(A, j)
-            i = rowval[k]
-            nzv = nzval[k]
-            if i != j
-                newrowval[newk] = i
-                newnzval[newk] = nzv
-                newk += 1
-                ni = newcolptr[i]
-                newrowval[ni] = j
-                newnzval[ni] = fadj(nzv)
-                newcolptr[i] = ni + 1
-            else
-                newrowval[newk] = i
-                newnzval[newk] = diagmap(nzv)
-                newk += 1
-            end
-        end
-        newcolptr[j] = newk
-    end
-    _sparse_gen(m, n, newcolptr, newrowval, newnzval)
-end
-
-# 2 cases: Unit(Upper|Lower)Triangular{Tv,AbstractSparseMatrixCSC}
-function _sparsem(A::AbstractTriangularSparse{Tv}) where Tv
-    S = A.data
-    rowval = rowvals(S)
-    nzval = nonzeros(S)
-    m, n = size(S)
-    Ti = eltype(rowval)
-    fnzrange = A isa Union{UpperTriangular,UnitUpperTriangular} ? nzrangeup : nzrangelo
-    unit = A isa Union{UnitUpperTriangular,UnitLowerTriangular}
-    nz = nnz(S) + n * unit
-    newcolptr = Vector{Ti}(undef, n+1)
-    newrowval = Vector{Ti}(undef, nz)
-    newnzval = Vector{Tv}(undef, nz)
-    newcolptr[1] = 1
-    uplo = fnzrange == nzrangeup
-    newk = 1
-    @inbounds for j = 1:n
-        newkk = newk
-        if unit
-            newk += !uplo
-        end
-        r = fnzrange(S, j); r1 = r.start; r2 = r.stop
-        for k = r1:r2
-            i = rowval[k]
-            if i != j || i == j && !unit
-                newrowval[newk] = i
-                newnzval[newk] = nzval[k]
-                newk += 1
-            end
-        end
-        if unit
-            uplo && (newkk = newk)
-            newrowval[newkk] = j
-            newnzval[newkk] = one(Tv)
-            newk += uplo
-        end
-        newcolptr[j+1] = newk
-    end
-    nz = newcolptr[n+1] - 1
-    resize!(newrowval, nz)
-    resize!(newnzval, nz)
-    SparseMatrixCSC(m, n, newcolptr, newrowval, newnzval)
-end
-
-# 8 cases: (Transpose|Adjoint){Tv,[Unit](Upper|Lower)Triangular}
-function _sparsem(taA::Union{Transpose{Tv,<:AbstractTriangularSparse},
-                             Adjoint{Tv,<:AbstractTriangularSparse}}) where {Tv}
-
-    sA = taA.parent
-    A = sA.data
-    rowval = rowvals(A)
-    nzval = nonzeros(A)
-    m, n = size(A)
-    Ti = eltype(rowval)
-    fnzrange = sA isa Union{UpperTriangular,UnitUpperTriangular} ? nzrangeup : nzrangelo
-    fadj = taA isa Transpose ? transpose : adjoint
-    unit = sA isa Union{UnitUpperTriangular,UnitLowerTriangular}
-    uplo = A isa Union{UpperTriangular,UnitUpperTriangular}
-
-    newcolptr = Vector{Ti}(undef, n+1)
-    fill!(newcolptr, unit)
-    newcolptr[1] = 1
-    @inbounds for j = 1:n
-        for k = fnzrange(A, j)
-            i = rowval[k]
-            if i != j || i == j && !unit
-                newcolptr[i+1] += 1
-            end
-        end
-    end
-    cumsum!(newcolptr, newcolptr)
-    nz = newcolptr[n+1] - 1
-    newrowval = Vector{Ti}(undef, nz)
-    newnzval = Vector{Tv}(undef, nz)
-
-    @inbounds for j = 1:n
-        if !uplo && unit
-            ni = newcolptr[j]
-            newrowval[ni] = j
-            newnzval[ni] = fadj(one(Tv))
-            newcolptr[j] = ni + 1
-        end
-        for k = fnzrange(A, j)
-            i = rowval[k]
-            nzv = nzval[k]
-            if i != j || i == j && !unit
-                ni = newcolptr[i]
-                newrowval[ni] = j
-                newnzval[ni] = fadj(nzv)
-                newcolptr[i] = ni + 1
-            end
-        end
-        if uplo && unit
-            ni = newcolptr[j]
-            newrowval[ni] = j
-            newnzval[ni] = fadj(one(Tv))
-            newcolptr[j] = ni + 1
-        end
-    end
-    _sparse_gen(n, m, newcolptr, newrowval, newnzval)
-end
-
-function _sparse_gen(m, n, newcolptr, newrowval, newnzval)
-    @inbounds for j = n:-1:1
-        newcolptr[j+1] = newcolptr[j]
-    end
-    newcolptr[1] = 1
-    SparseMatrixCSC(m, n, newcolptr, newrowval, newnzval)
-end
-
diff --git a/stdlib/SparseArrays/src/sparsematrix.jl b/stdlib/SparseArrays/src/sparsematrix.jl
deleted file mode 100644
index fbb2adf3b66a1a..00000000000000
--- a/stdlib/SparseArrays/src/sparsematrix.jl
+++ /dev/null
@@ -1,3744 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Compressed sparse columns data structure
-# No assumptions about stored zeros in the data structure
-# Assumes that row values in rowval for each column are sorted
-#      issorted(rowval[colptr[i]:(colptr[i+1]-1)]) == true
-# Assumes that 1 <= colptr[i] <= colptr[i+1] for i in 1..n
-# Assumes that nnz <= length(rowval) < typemax(Ti)
-# Assumes that 0   <= length(nzval) < typemax(Ti)
-
-"""
-    SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrixCSC{Tv,Ti}
-
-Matrix type for storing sparse matrices in the
-[Compressed Sparse Column](@ref man-csc) format. The standard way
-of constructing SparseMatrixCSC is through the [`sparse`](@ref) function.
-See also [`spzeros`](@ref), [`spdiagm`](@ref) and [`sprand`](@ref).
-"""
-struct SparseMatrixCSC{Tv,Ti<:Integer} <: AbstractSparseMatrixCSC{Tv,Ti}
-    m::Int                  # Number of rows
-    n::Int                  # Number of columns
-    colptr::Vector{Ti}      # Column i is in colptr[i]:(colptr[i+1]-1)
-    rowval::Vector{Ti}      # Row indices of stored values
-    nzval::Vector{Tv}       # Stored values, typically nonzeros
-
-    function SparseMatrixCSC{Tv,Ti}(m::Integer, n::Integer, colptr::Vector{Ti},
-                            rowval::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti<:Integer}
-        @noinline throwsz(str, lbl, k) =
-            throw(ArgumentError("number of $str ($lbl) must be ≥ 0, got $k"))
-        m < 0 && throwsz("rows", 'm', m)
-        n < 0 && throwsz("columns", 'n', n)
-        new(Int(m), Int(n), colptr, rowval, nzval)
-    end
-end
-function SparseMatrixCSC(m::Integer, n::Integer, colptr::Vector, rowval::Vector, nzval::Vector)
-    Tv = eltype(nzval)
-    Ti = promote_type(eltype(colptr), eltype(rowval))
-    sparse_check_Ti(m, n, Ti)
-    sparse_check(n, colptr, rowval, nzval)
-    # silently shorten rowval and nzval to usable index positions.
-    maxlen = abs(widemul(m, n))
-    isbitstype(Ti) && (maxlen = min(maxlen, typemax(Ti) - 1))
-    length(rowval) > maxlen && resize!(rowval, maxlen)
-    length(nzval) > maxlen && resize!(nzval, maxlen)
-    SparseMatrixCSC{Tv,Ti}(m, n, colptr, rowval, nzval)
-end
-
-function sparse_check_Ti(m::Integer, n::Integer, Ti::Type)
-        @noinline throwTi(str, lbl, k) =
-            throw(ArgumentError("$str ($lbl = $k) does not fit in Ti = $(Ti)"))
-        0 ≤ m && (!isbitstype(Ti) || m ≤ typemax(Ti)) || throwTi("number of rows", "m", m)
-        0 ≤ n && (!isbitstype(Ti) || n ≤ typemax(Ti)) || throwTi("number of columns", "n", n)
-end
-
-function sparse_check(n::Integer, colptr::Vector{Ti}, rowval, nzval) where Ti
-    sparse_check_length("colptr", colptr, n+1, String) # don't check upper bound
-    ckp = Ti(1)
-    ckp == colptr[1] || throw(ArgumentError("$ckp == colptr[1] != 1"))
-    @inbounds for k = 2:n+1
-        ck = colptr[k]
-        ckp <= ck || throw(ArgumentError("$ckp == colptr[$(k-1)] > colptr[$k] == $ck"))
-        ckp = ck
-    end
-    sparse_check_length("rowval", rowval, ckp-1, Ti)
-    sparse_check_length("nzval", nzval, 0, Ti) # we allow empty nzval !!!
-end
-function sparse_check_length(rowstr, rowval, minlen, Ti)
-    len = length(rowval)
-    len >= minlen || throw(ArgumentError("$len == length($rowstr) < $minlen"))
-    !isbitstype(Ti) || len < typemax(Ti) ||
-        throw(ArgumentError("$len == length($rowstr) >= $(typemax(Ti))"))
-end
-
-size(S::SparseMatrixCSC) = (getfield(S, :m), getfield(S, :n))
-
-# Define an alias for views of a SparseMatrixCSC which include all rows and a unit range of the columns.
-# Also define a union of SparseMatrixCSC and this view since many methods can be defined efficiently for
-# this union by extracting the fields via the get function: getcolptr, getrowval, and getnzval. The key
-# insight is that getcolptr on a SparseMatrixCSCView returns an offset view of the colptr of the
-# underlying SparseMatrixCSC
-const SparseMatrixCSCView{Tv,Ti} =
-    SubArray{Tv,2,<:AbstractSparseMatrixCSC{Tv,Ti},
-        Tuple{Base.Slice{Base.OneTo{Int}},I}} where {I<:AbstractUnitRange}
-const SparseMatrixCSCUnion{Tv,Ti} = Union{AbstractSparseMatrixCSC{Tv,Ti}, SparseMatrixCSCView{Tv,Ti}}
-
-getcolptr(S::SparseMatrixCSC)     = getfield(S, :colptr)
-getcolptr(S::SparseMatrixCSCView) = view(getcolptr(parent(S)), first(axes(S, 2)):(last(axes(S, 2)) + 1))
-getrowval(S::AbstractSparseMatrixCSC) = rowvals(S)
-getrowval(S::SparseMatrixCSCView) = rowvals(parent(S))
-getnzval( S::AbstractSparseMatrixCSC) = nonzeros(S)
-getnzval( S::SparseMatrixCSCView) = nonzeros(parent(S))
-nzvalview(S::AbstractSparseMatrixCSC) = view(nonzeros(S), 1:nnz(S))
-
-"""
-    nnz(A)
-
-Returns the number of stored (filled) elements in a sparse array.
-
-# Examples
-```jldoctest
-julia> A = sparse(2I, 3, 3)
-3×3 SparseMatrixCSC{Int64, Int64} with 3 stored entries:
- 2  ⋅  ⋅
- ⋅  2  ⋅
- ⋅  ⋅  2
-
-julia> nnz(A)
-3
-```
-"""
-nnz(S::AbstractSparseMatrixCSC) = Int(getcolptr(S)[size(S, 2) + 1] - 1)
-nnz(S::ReshapedArray{<:Any,1,<:AbstractSparseMatrixCSC}) = nnz(parent(S))
-nnz(S::UpperTriangular{<:Any,<:AbstractSparseMatrixCSC}) = nnz1(S)
-nnz(S::LowerTriangular{<:Any,<:AbstractSparseMatrixCSC}) = nnz1(S)
-nnz(S::SparseMatrixCSCView) = nnz1(S)
-nnz1(S) = sum(length.(nzrange.(Ref(S), axes(S, 2))))
-
-function count(pred, S::AbstractSparseMatrixCSC)
-    count(pred, nzvalview(S)) + pred(zero(eltype(S)))*(prod(size(S)) - nnz(S))
-end
-
-"""
-    nonzeros(A)
-
-Return a vector of the structural nonzero values in sparse array `A`. This
-includes zeros that are explicitly stored in the sparse array. The returned
-vector points directly to the internal nonzero storage of `A`, and any
-modifications to the returned vector will mutate `A` as well. See
-[`rowvals`](@ref) and [`nzrange`](@ref).
-
-# Examples
-```jldoctest
-julia> A = sparse(2I, 3, 3)
-3×3 SparseMatrixCSC{Int64, Int64} with 3 stored entries:
- 2  ⋅  ⋅
- ⋅  2  ⋅
- ⋅  ⋅  2
-
-julia> nonzeros(A)
-3-element Vector{Int64}:
- 2
- 2
- 2
-```
-"""
-nonzeros(S::SparseMatrixCSC) = getfield(S, :nzval)
-nonzeros(S::SparseMatrixCSCView)  = nonzeros(S.parent)
-nonzeros(S::UpperTriangular{<:Any,<:SparseMatrixCSCUnion}) = nonzeros(S.data)
-nonzeros(S::LowerTriangular{<:Any,<:SparseMatrixCSCUnion}) = nonzeros(S.data)
-
-"""
-    rowvals(A::AbstractSparseMatrixCSC)
-
-Return a vector of the row indices of `A`. Any modifications to the returned
-vector will mutate `A` as well. Providing access to how the row indices are
-stored internally can be useful in conjunction with iterating over structural
-nonzero values. See also [`nonzeros`](@ref) and [`nzrange`](@ref).
-
-# Examples
-```jldoctest
-julia> A = sparse(2I, 3, 3)
-3×3 SparseMatrixCSC{Int64, Int64} with 3 stored entries:
- 2  ⋅  ⋅
- ⋅  2  ⋅
- ⋅  ⋅  2
-
-julia> rowvals(A)
-3-element Vector{Int64}:
- 1
- 2
- 3
-```
-"""
-rowvals(S::SparseMatrixCSC) = getfield(S, :rowval)
-rowvals(S::SparseMatrixCSCView) = rowvals(S.parent)
-rowvals(S::UpperTriangular{<:Any,<:SparseMatrixCSCUnion}) = rowvals(S.data)
-rowvals(S::LowerTriangular{<:Any,<:SparseMatrixCSCUnion}) = rowvals(S.data)
-
-"""
-    nzrange(A::AbstractSparseMatrixCSC, col::Integer)
-
-Return the range of indices to the structural nonzero values of a sparse matrix
-column. In conjunction with [`nonzeros`](@ref) and
-[`rowvals`](@ref), this allows for convenient iterating over a sparse matrix :
-
-    A = sparse(I,J,V)
-    rows = rowvals(A)
-    vals = nonzeros(A)
-    m, n = size(A)
-    for j = 1:n
-       for i in nzrange(A, j)
-          row = rows[i]
-          val = vals[i]
-          # perform sparse wizardry...
-       end
-    end
-"""
-nzrange(S::AbstractSparseMatrixCSC, col::Integer) = getcolptr(S)[col]:(getcolptr(S)[col+1]-1)
-nzrange(S::SparseMatrixCSCView, col::Integer) = nzrange(S.parent, S.indices[2][col])
-nzrange(S::UpperTriangular{<:Any,<:SparseMatrixCSCUnion}, i::Integer) = nzrangeup(S.data, i)
-nzrange(S::LowerTriangular{<:Any,<:SparseMatrixCSCUnion}, i::Integer) = nzrangelo(S.data, i)
-
-function Base.isstored(A::AbstractSparseMatrixCSC, i::Integer, j::Integer)
-    @boundscheck checkbounds(A, i, j)
-    rows = rowvals(A)
-    for istored in nzrange(A, j) # could do binary search if the row indices are sorted?
-        i == rows[istored] && return true
-    end
-    return false
-end
-
-Base.replace_in_print_matrix(A::AbstractSparseMatrix, i::Integer, j::Integer, s::AbstractString) =
-    Base.isstored(A, i, j) ? s : Base.replace_with_centered_mark(s)
-
-function Base.show(io::IO, ::MIME"text/plain", S::AbstractSparseMatrixCSC)
-    xnnz = nnz(S)
-    m, n = size(S)
-    print(io, m, "×", n, " ", typeof(S), " with ", xnnz, " stored ",
-              xnnz == 1 ? "entry" : "entries")
-    if !(m == 0 || n == 0)
-        print(io, ":")
-        show(IOContext(io, :typeinfo => eltype(S)), S)
-    end
-end
-
-Base.show(io::IO, S::AbstractSparseMatrixCSC) = Base.show(convert(IOContext, io), S::AbstractSparseMatrixCSC)
-
-const brailleBlocks = UInt16['⠁', '⠂', '⠄', '⡀', '⠈', '⠐', '⠠', '⢀']
-function _show_with_braille_patterns(io::IOContext, S::AbstractSparseMatrixCSC)
-    m, n = size(S)
-    (m == 0 || n == 0) && return show(io, MIME("text/plain"), S)
-
-    # The maximal number of characters we allow to display the matrix
-    local maxHeight::Int, maxWidth::Int
-    maxHeight = displaysize(io)[1] - 4 # -4 from [Prompt, header, newline after elements, new prompt]
-    maxWidth = displaysize(io)[2] ÷ 2
-
-    # In the process of generating the braille pattern to display the nonzero
-    # structure of `S`, we need to be able to scale the matrix `S` to a
-    # smaller matrix with the same aspect ratio as `S`, but fits on the
-    # available screen space. The size of that smaller matrix is stored
-    # in the variables `scaleHeight` and `scaleWidth`. If no scaling is needed,
-    # we can use the size `m × n` of `S` directly.
-    # We determine if scaling is needed and set the scaling factors
-    # `scaleHeight` and `scaleWidth` accordingly. Note that each available
-    # character can contain up to 4 braille dots in its height (⡇) and up to
-    # 2 braille dots in its width (⠉).
-    if get(io, :limit, true) && (m > 4maxHeight || n > 2maxWidth)
-        s = min(2maxWidth / n, 4maxHeight / m)
-        scaleHeight = floor(Int, s * m)
-        scaleWidth = floor(Int, s * n)
-    else
-        scaleHeight = m
-        scaleWidth = n
-    end
-
-    # `brailleGrid` is used to store the needed braille characters for
-    # the matrix `S`. Each row of the braille pattern to print is stored
-    # in a column of `brailleGrid`.
-    brailleGrid = fill(UInt16(10240), (scaleWidth - 1) ÷ 2 + 2, (scaleHeight - 1) ÷ 4 + 1)
-    brailleGrid[end, :] .= '\n'
-
-    rvals = rowvals(S)
-    rowscale = max(1, scaleHeight - 1) / max(1, m - 1)
-    colscale = max(1, scaleWidth - 1) / max(1, n - 1)
-    @inbounds for j = 1:n
-        # Scale the column index `j` to the best matching column index
-        # of a matrix of size `scaleHeight × scaleWidth`
-        sj = round(Int, (j - 1) * colscale + 1)
-        for x in nzrange(S, j)
-            # Scale the row index `i` to the best matching row index
-            # of a matrix of size `scaleHeight × scaleWidth`
-            si = round(Int, (rvals[x] - 1) * rowscale + 1)
-
-            # Given the index pair `(si, sj)` of the scaled matrix,
-            # calculate the corresponding triple `(k, l, p)` such that the
-            # element at `(si, sj)` can be found at position `(k, l)` in the
-            # braille grid `brailleGrid` and corresponds to the 1-dot braille
-            # character `brailleBlocks[p]`
-            k = (sj - 1) ÷ 2 + 1
-            l = (si - 1) ÷ 4 + 1
-            p = ((sj - 1) % 2) * 4 + ((si - 1) % 4 + 1)
-
-            brailleGrid[k, l] |= brailleBlocks[p]
-        end
-    end
-    foreach(c -> print(io, Char(c)), @view brailleGrid[1:end-1])
-end
-
-function Base.show(io::IOContext, S::AbstractSparseMatrixCSC)
-    if max(size(S)...) < 16 && !(get(io, :compact, false)::Bool)
-        ioc = IOContext(io, :compact => true)
-        println(ioc)
-        Base.print_matrix(ioc, S)
-        return
-    end
-    println(io)
-    _show_with_braille_patterns(io, S)
-end
-
-## Reshape
-
-function sparse_compute_reshaped_colptr_and_rowval(colptrS::Vector{Ti}, rowvalS::Vector{Ti},
-                                                   mS::Int, nS::Int, colptrA::Vector{Ti},
-                                                   rowvalA::Vector{Ti}, mA::Int, nA::Int) where Ti
-    lrowvalA = length(rowvalA)
-    maxrowvalA = (lrowvalA > 0) ? maximum(rowvalA) : zero(Ti)
-    ((length(colptrA) == (nA+1)) && (maximum(colptrA) <= (lrowvalA+1)) && (maxrowvalA <= mA)) || throw(BoundsError())
-
-    colptrS[1] = 1
-    colA = 1
-    colS = 1
-    ptr = 1
-
-    @inbounds while colA <= nA
-        offsetA = (colA - 1) * mA
-        while ptr <= colptrA[colA+1]-1
-            rowA = rowvalA[ptr]
-            i = offsetA + rowA - 1
-            colSn = div(i, mS) + 1
-            rowS = mod(i, mS) + 1
-            while colS < colSn
-                colptrS[colS+1] = ptr
-                colS += 1
-            end
-            rowvalS[ptr] = rowS
-            ptr += 1
-        end
-        colA += 1
-    end
-    @inbounds while colS <= nS
-        colptrS[colS+1] = ptr
-        colS += 1
-    end
-end
-
-function copy(ra::ReshapedArray{<:Any,2,<:AbstractSparseMatrixCSC})
-    mS,nS = size(ra)
-    a = parent(ra)
-    mA,nA = size(a)
-    numnz = nnz(a)
-    colptr = similar(getcolptr(a), nS+1)
-    rowval = similar(rowvals(a))
-    nzval = copy(nonzeros(a))
-
-    sparse_compute_reshaped_colptr_and_rowval(colptr, rowval, mS, nS, getcolptr(a), rowvals(a), mA, nA)
-
-    return SparseMatrixCSC(mS, nS, colptr, rowval, nzval)
-end
-
-## Alias detection and prevention
-using Base: dataids, unaliascopy
-Base.dataids(S::AbstractSparseMatrixCSC) = (dataids(getcolptr(S))..., dataids(rowvals(S))..., dataids(nonzeros(S))...)
-Base.unaliascopy(S::AbstractSparseMatrixCSC) = typeof(S)(size(S, 1), size(S, 2), unaliascopy(getcolptr(S)), unaliascopy(rowvals(S)), unaliascopy(nonzeros(S)))
-
-## Constructors
-
-copy(S::AbstractSparseMatrixCSC) =
-    SparseMatrixCSC(size(S, 1), size(S, 2), copy(getcolptr(S)), copy(rowvals(S)), copy(nonzeros(S)))
-
-function copyto!(A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC)
-    # If the two matrices have the same length then all the
-    # elements in A will be overwritten.
-    if length(A) == length(B)
-        resize!(nonzeros(A), length(nonzeros(B)))
-        resize!(rowvals(A), length(rowvals(B)))
-        if size(A) == size(B)
-            # Simple case: we can simply copy the internal fields of B to A.
-            copyto!(getcolptr(A), getcolptr(B))
-            copyto!(rowvals(A), rowvals(B))
-        else
-            # This is like a "reshape B into A".
-            sparse_compute_reshaped_colptr_and_rowval(getcolptr(A), rowvals(A), size(A, 1), size(A, 2), getcolptr(B), rowvals(B), size(B, 1), size(B, 2))
-        end
-    else
-        length(A) >= length(B) || throw(BoundsError())
-        lB = length(B)
-        nnzA = nnz(A)
-        nnzB = nnz(B)
-        # Up to which col, row, and ptr in rowval/nzval will A be overwritten?
-        lastmodcolA = div(lB - 1, size(A, 1)) + 1
-        lastmodrowA = mod(lB - 1, size(A, 1)) + 1
-        lastmodptrA = getcolptr(A)[lastmodcolA]
-        while lastmodptrA < getcolptr(A)[lastmodcolA+1] && rowvals(A)[lastmodptrA] <= lastmodrowA
-            lastmodptrA += 1
-        end
-        lastmodptrA -= 1
-        if lastmodptrA >= nnzB
-            # A will have fewer non-zero elements; unmodified elements are kept at the end.
-            deleteat!(rowvals(A), nnzB+1:lastmodptrA)
-            deleteat!(nonzeros(A), nnzB+1:lastmodptrA)
-        else
-            # A will have more non-zero elements; unmodified elements are kept at the end.
-            resize!(rowvals(A), nnzB + nnzA - lastmodptrA)
-            resize!(nonzeros(A), nnzB + nnzA - lastmodptrA)
-            copyto!(rowvals(A), nnzB+1, rowvals(A), lastmodptrA+1, nnzA-lastmodptrA)
-            copyto!(nonzeros(A), nnzB+1, nonzeros(A), lastmodptrA+1, nnzA-lastmodptrA)
-        end
-        # Adjust colptr accordingly.
-        @inbounds for i in 2:length(getcolptr(A))
-            getcolptr(A)[i] += nnzB - lastmodptrA
-        end
-        sparse_compute_reshaped_colptr_and_rowval(getcolptr(A), rowvals(A), size(A, 1), lastmodcolA-1, getcolptr(B), rowvals(B), size(B, 1), size(B, 2))
-    end
-    copyto!(nonzeros(A), nonzeros(B))
-    return A
-end
-
-copyto!(A::AbstractMatrix, B::AbstractSparseMatrixCSC) = _sparse_copyto!(A, B)
-# Ambiguity resolution
-copyto!(A::PermutedDimsArray, B::AbstractSparseMatrixCSC) = _sparse_copyto!(A, B)
-
-function _sparse_copyto!(dest::AbstractMatrix, src::AbstractSparseMatrixCSC)
-    (dest === src || isempty(src)) && return dest
-    z = convert(eltype(dest), zero(eltype(src))) # should throw if not possible
-    isrc = LinearIndices(src)
-    checkbounds(dest, isrc)
-    # If src is not dense, zero out the portion of dest spanned by isrc
-    if length(src) > nnz(src)
-        for i in isrc
-            @inbounds dest[i] = z
-        end
-    end
-    @inbounds for col in axes(src, 2), ptr in nzrange(src, col)
-        row = rowvals(src)[ptr]
-        val = nonzeros(src)[ptr]
-        dest[isrc[row, col]] = val
-    end
-    return dest
-end
-
-function copyto!(dest::AbstractMatrix, Rdest::CartesianIndices{2},
-                 src::AbstractSparseMatrixCSC{T}, Rsrc::CartesianIndices{2}) where {T}
-    isempty(Rdest) && return dest
-    if size(Rdest) != size(Rsrc)
-        throw(ArgumentError("source and destination must have same size (got $(size(Rsrc)) and $(size(Rdest)))"))
-    end
-    checkbounds(dest, Rdest)
-    checkbounds(src, Rsrc)
-    src′ = Base.unalias(dest, src)
-    for I in Rdest
-        @inbounds dest[I] = zero(T) # implicitly convert to eltype(dest), throw if not possible
-    end
-    rows, cols = Rsrc.indices
-    lin = LinearIndices(Base.IdentityUnitRange.(Rsrc.indices))
-    @inbounds for col in cols, ptr in nzrange(src′, col)
-        row = rowvals(src′)[ptr]
-        if row in rows
-            val = nonzeros(src′)[ptr]
-            I = Rdest[lin[row, col]]
-            dest[I] = val
-        end
-    end
-    return dest
-end
-
-## similar
-#
-# parent method for similar that preserves stored-entry structure (for when new and old dims match)
-function _sparsesimilar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}) where {TvNew,TiNew}
-    newcolptr = copyto!(similar(getcolptr(S), TiNew), getcolptr(S))
-    newrowval = copyto!(similar(rowvals(S), TiNew), rowvals(S))
-    return SparseMatrixCSC(size(S, 1), size(S, 2), newcolptr, newrowval, similar(nonzeros(S), TvNew))
-end
-# parent methods for similar that preserves only storage space (for when new and old dims differ)
-_sparsesimilar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{2}) where {TvNew,TiNew} =
-    SparseMatrixCSC(dims..., fill(one(TiNew), last(dims)+1), similar(rowvals(S), TiNew), similar(nonzeros(S), TvNew))
-# parent method for similar that allocates an empty sparse vector (when new dims are single)
-_sparsesimilar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{1}) where {TvNew,TiNew} =
-    SparseVector(dims..., similar(rowvals(S), TiNew, 0), similar(nonzeros(S), TvNew, 0))
-#
-# The following methods hook into the AbstractArray similar hierarchy. The first method
-# covers similar(A[, Tv]) calls, which preserve stored-entry structure, and the latter
-# methods cover similar(A[, Tv], shape...) calls, which preserve storage space when the shape
-# calls for a two-dimensional result.
-similar(S::AbstractSparseMatrixCSC{<:Any,Ti}, ::Type{TvNew}) where {Ti,TvNew} = _sparsesimilar(S, TvNew, Ti)
-similar(S::AbstractSparseMatrixCSC{<:Any,Ti}, ::Type{TvNew}, dims::Union{Dims{1},Dims{2}}) where {Ti,TvNew} =
-    _sparsesimilar(S, TvNew, Ti, dims)
-# The following methods cover similar(A, Tv, Ti[, shape...]) calls, which specify the
-# result's index type in addition to its entry type, and aren't covered by the hooks above.
-# The calls without shape again preserve stored-entry structure, whereas those with shape
-# preserve storage space when the shape calls for a two-dimensional result.
-similar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}) where{TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew)
-similar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, dims::Union{Dims{1},Dims{2}}) where {TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew, dims)
-similar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, m::Integer) where {TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew, (m,))
-similar(S::AbstractSparseMatrixCSC, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) where {TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew, (m, n))
-
-
-# converting between SparseMatrixCSC types
-SparseMatrixCSC(S::AbstractSparseMatrixCSC) = copy(S)
-AbstractMatrix{Tv}(A::AbstractSparseMatrixCSC) where {Tv} = SparseMatrixCSC{Tv}(A)
-SparseMatrixCSC{Tv}(S::AbstractSparseMatrixCSC{Tv}) where {Tv} = copy(S)
-SparseMatrixCSC{Tv}(S::AbstractSparseMatrixCSC) where {Tv} = SparseMatrixCSC{Tv,eltype(getcolptr(S))}(S)
-SparseMatrixCSC{Tv,Ti}(S::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = copy(S)
-function SparseMatrixCSC{Tv,Ti}(S::AbstractSparseMatrixCSC) where {Tv,Ti}
-    eltypeTicolptr = Vector{Ti}(getcolptr(S))
-    eltypeTirowval = Vector{Ti}(rowvals(S))
-    eltypeTvnzval = Vector{Tv}(nonzeros(S))
-    return SparseMatrixCSC(size(S, 1), size(S, 2), eltypeTicolptr, eltypeTirowval, eltypeTvnzval)
-end
-
-# converting from other matrix types to SparseMatrixCSC (also see sparse())
-SparseMatrixCSC(M::Matrix) = sparse(M)
-function SparseMatrixCSC(T::Tridiagonal{Tv}) where Tv
-    m = length(T.d)
-
-    colptr = Vector{Int}(undef, m+1)
-    colptr[1] = 1
-    @inbounds for i=1:m-1
-        colptr[i+1] = 3i
-    end
-    colptr[end] = 3m-1
-
-    rowval = Vector{Int}(undef, 3m-2)
-    rowval[1] = 1
-    rowval[2] = 2
-    @inbounds for i=2:m-1, j=-1:1
-        rowval[3i+j-2] = i+j
-    end
-    rowval[end-1] = m - 1
-    rowval[end] = m
-
-    nzval = Vector{Tv}(undef, 3m-2)
-    @inbounds for i=1:(m-1)
-        nzval[3i-2] = T.d[i]
-        nzval[3i-1] = T.dl[i]
-        nzval[3i]   = T.du[i]
-    end
-    nzval[end] = T.d[end]
-
-    return SparseMatrixCSC(m, m, colptr, rowval, nzval)
-end
-function SparseMatrixCSC(T::SymTridiagonal{Tv}) where Tv
-    m = length(T.dv)
-
-    colptr = Vector{Int}(undef, m+1)
-    colptr[1] = 1
-    @inbounds for i=1:m-1
-        colptr[i+1] = 3i
-    end
-    colptr[end] = 3m-1
-
-    rowval = Vector{Int}(undef, 3m-2)
-    rowval[1] = 1
-    rowval[2] = 2
-    @inbounds for i=2:m-1, j=-1:1
-        rowval[3i+j-2] = i+j
-    end
-    rowval[end-1] = m - 1
-    rowval[end] = m
-
-    nzval = Vector{Tv}(undef, 3m-2)
-    @inbounds for i=1:(m-1)
-        nzval[3i-2] = T.dv[i]
-        nzval[3i-1] = T.ev[i]
-        nzval[3i]   = T.ev[i]
-    end
-    nzval[end] = T.dv[end]
-
-    return SparseMatrixCSC(m, m, colptr, rowval, nzval)
-end
-function SparseMatrixCSC(B::Bidiagonal{Tv}) where Tv
-    m = length(B.dv)
-
-    colptr = Vector{Int}(undef, m+1)
-    colptr[1] = 1
-    @inbounds for i=1:m-1
-        colptr[i+1] = B.uplo == 'U' ? 2i : 2i+1
-    end
-    colptr[end] = 2m
-
-    rowval = Vector{Int}(undef, 2m-1)
-    @inbounds for i=1:m-1
-        rowval[2i-1] = i
-        rowval[2i]   = B.uplo == 'U' ? i : i+1
-    end
-    rowval[end] = m
-
-    nzval = Vector{Tv}(undef, 2m-1)
-    nzval[1] = B.dv[1]
-    @inbounds for i=1:m-1
-        nzval[2i-1] = B.dv[i]
-        nzval[2i]   = B.ev[i]
-    end
-    nzval[end] = B.dv[end]
-
-    return SparseMatrixCSC(m, m, colptr, rowval, nzval)
-end
-function SparseMatrixCSC(D::Diagonal{T}) where T
-    m = length(D.diag)
-    return SparseMatrixCSC(m, m, Vector(1:(m+1)), Vector(1:m), Vector{T}(D.diag))
-end
-SparseMatrixCSC(M::AbstractMatrix{Tv}) where {Tv} = SparseMatrixCSC{Tv,Int}(M)
-SparseMatrixCSC{Tv}(M::AbstractMatrix{Tv}) where {Tv} = SparseMatrixCSC{Tv,Int}(M)
-function SparseMatrixCSC{Tv,Ti}(M::AbstractMatrix) where {Tv,Ti}
-    require_one_based_indexing(M)
-    I = Ti[]
-    V = Tv[]
-    i = 0
-    for v in M
-        i += 1
-        if !iszero(v)
-            push!(I, i)
-            push!(V, v)
-        end
-    end
-    return sparse_sortedlinearindices!(I, V, size(M)...)
-end
-
-function SparseMatrixCSC{Tv,Ti}(M::StridedMatrix) where {Tv,Ti}
-    nz = count(!iszero, M)
-    colptr = zeros(Ti, size(M, 2) + 1)
-    nzval = Vector{Tv}(undef, nz)
-    rowval = Vector{Ti}(undef, nz)
-    colptr[1] = 1
-    cnt = 1
-    @inbounds for j in 1:size(M, 2)
-        for i in 1:size(M, 1)
-            v = M[i, j]
-            if !iszero(v)
-                rowval[cnt] = i
-                nzval[cnt] = v
-                cnt += 1
-            end
-        end
-        colptr[j+1] = cnt
-    end
-    return SparseMatrixCSC(size(M, 1), size(M, 2), colptr, rowval, nzval)
-end
-SparseMatrixCSC(M::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) = copy(M)
-SparseMatrixCSC(M::Transpose{<:Any,<:AbstractSparseMatrixCSC}) = copy(M)
-SparseMatrixCSC{Tv}(M::Adjoint{Tv,<:AbstractSparseMatrixCSC{Tv}}) where {Tv} = copy(M)
-SparseMatrixCSC{Tv}(M::Transpose{Tv,<:AbstractSparseMatrixCSC{Tv}}) where {Tv} = copy(M)
-SparseMatrixCSC{Tv,Ti}(M::Adjoint{Tv,<:AbstractSparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} = copy(M)
-SparseMatrixCSC{Tv,Ti}(M::Transpose{Tv,<:AbstractSparseMatrixCSC{Tv,Ti}}) where {Tv,Ti} = copy(M)
-
-# converting from adjoint or transpose sparse matrices to sparse matrices with different eltype
-SparseMatrixCSC{Tv}(M::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) where {Tv} = SparseMatrixCSC{Tv}(copy(M))
-SparseMatrixCSC{Tv}(M::Transpose{<:Any,<:AbstractSparseMatrixCSC}) where {Tv} = SparseMatrixCSC{Tv}(copy(M))
-SparseMatrixCSC{Tv,Ti}(M::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(copy(M))
-SparseMatrixCSC{Tv,Ti}(M::Transpose{<:Any,<:AbstractSparseMatrixCSC}) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(copy(M))
-
-# converting from SparseMatrixCSC to other matrix types
-function Matrix(S::AbstractSparseMatrixCSC{Tv}) where Tv
-    A = Matrix{Tv}(undef, size(S, 1), size(S, 2))
-    copyto!(A, S)
-    return A
-end
-Array(S::AbstractSparseMatrixCSC) = Matrix(S)
-
-convert(T::Type{<:AbstractSparseMatrixCSC}, m::AbstractMatrix) = m isa T ? m : T(m)
-
-float(S::SparseMatrixCSC) = SparseMatrixCSC(size(S, 1), size(S, 2), copy(getcolptr(S)), copy(rowvals(S)), float.(nonzeros(S)))
-complex(S::SparseMatrixCSC) = SparseMatrixCSC(size(S, 1), size(S, 2), copy(getcolptr(S)), copy(rowvals(S)), complex(copy(nonzeros(S))))
-
-"""
-    sparse(A)
-
-Convert an AbstractMatrix `A` into a sparse matrix.
-
-# Examples
-```jldoctest
-julia> A = Matrix(1.0I, 3, 3)
-3×3 Matrix{Float64}:
- 1.0  0.0  0.0
- 0.0  1.0  0.0
- 0.0  0.0  1.0
-
-julia> sparse(A)
-3×3 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
- 1.0   ⋅    ⋅
-  ⋅   1.0   ⋅
-  ⋅    ⋅   1.0
-```
-"""
-sparse(A::AbstractMatrix{Tv}) where {Tv} = convert(SparseMatrixCSC{Tv,Int}, A)
-
-sparse(S::AbstractSparseMatrixCSC) = copy(S)
-
-sparse(T::SymTridiagonal) = SparseMatrixCSC(T)
-
-sparse(T::Tridiagonal) = SparseMatrixCSC(T)
-
-sparse(B::Bidiagonal) = SparseMatrixCSC(B)
-
-sparse(D::Diagonal) = SparseMatrixCSC(D)
-
-"""
-    sparse(I, J, V,[ m, n, combine])
-
-Create a sparse matrix `S` of dimensions `m x n` such that `S[I[k], J[k]] = V[k]`. The
-`combine` function is used to combine duplicates. If `m` and `n` are not specified, they
-are set to `maximum(I)` and `maximum(J)` respectively. If the `combine` function is not
-supplied, `combine` defaults to `+` unless the elements of `V` are Booleans in which case
-`combine` defaults to `|`. All elements of `I` must satisfy `1 <= I[k] <= m`, and all
-elements of `J` must satisfy `1 <= J[k] <= n`. Numerical zeros in (`I`, `J`, `V`) are
-retained as structural nonzeros; to drop numerical zeros, use [`dropzeros!`](@ref).
-
-For additional documentation and an expert driver, see `SparseArrays.sparse!`.
-
-# Examples
-```jldoctest
-julia> Is = [1; 2; 3];
-
-julia> Js = [1; 2; 3];
-
-julia> Vs = [1; 2; 3];
-
-julia> sparse(Is, Js, Vs)
-3×3 SparseMatrixCSC{Int64, Int64} with 3 stored entries:
- 1  ⋅  ⋅
- ⋅  2  ⋅
- ⋅  ⋅  3
-```
-"""
-function sparse(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{Tv}, m::Integer, n::Integer, combine) where {Tv,Ti<:Integer}
-    require_one_based_indexing(I, J, V)
-    coolen = length(I)
-    if length(J) != coolen || length(V) != coolen
-        throw(ArgumentError(string("the first three arguments' lengths must match, ",
-              "length(I) (=$(length(I))) == length(J) (= $(length(J))) == length(V) (= ",
-              "$(length(V)))")))
-    end
-    if Base.hastypemax(Ti) && coolen >= typemax(Ti)
-        throw(ArgumentError("the index type $Ti cannot hold $coolen elements; use a larger index type"))
-    end
-    if m == 0 || n == 0 || coolen == 0
-        if coolen != 0
-            if n == 0
-                throw(ArgumentError("column indices J[k] must satisfy 1 <= J[k] <= n"))
-            elseif m == 0
-                throw(ArgumentError("row indices I[k] must satisfy 1 <= I[k] <= m"))
-            end
-        end
-        SparseMatrixCSC(m, n, fill(one(Ti), n+1), Vector{Ti}(), Vector{Tv}())
-    else
-        # Allocate storage for CSR form
-        csrrowptr = Vector{Ti}(undef, m+1)
-        csrcolval = Vector{Ti}(undef, coolen)
-        csrnzval = Vector{Tv}(undef, coolen)
-
-        # Allocate storage for the CSC form's column pointers and a necessary workspace
-        csccolptr = Vector{Ti}(undef, n+1)
-        klasttouch = Vector{Ti}(undef, n)
-
-        # Allocate empty arrays for the CSC form's row and nonzero value arrays
-        # The parent method called below automagically resizes these arrays
-        cscrowval = Vector{Ti}()
-        cscnzval = Vector{Tv}()
-
-        sparse!(I, J, V, m, n, combine, klasttouch,
-                csrrowptr, csrcolval, csrnzval,
-                csccolptr, cscrowval, cscnzval)
-    end
-end
-
-sparse(I::AbstractVector, J::AbstractVector, V::AbstractVector, m::Integer, n::Integer, combine) =
-    sparse(AbstractVector{Int}(I), AbstractVector{Int}(J), V, m, n, combine)
-
-"""
-    sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti}, V::AbstractVector{Tv},
-            m::Integer, n::Integer, combine, klasttouch::Vector{Ti},
-            csrrowptr::Vector{Ti}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
-            [csccolptr::Vector{Ti}], [cscrowval::Vector{Ti}, cscnzval::Vector{Tv}] ) where {Tv,Ti<:Integer}
-
-Parent of and expert driver for [`sparse`](@ref);
-see [`sparse`](@ref) for basic usage. This method
-allows the user to provide preallocated storage for `sparse`'s intermediate objects and
-result as described below. This capability enables more efficient successive construction
-of [`SparseMatrixCSC`](@ref)s from coordinate representations, and also enables extraction
-of an unsorted-column representation of the result's transpose at no additional cost.
-
-This method consists of three major steps: (1) Counting-sort the provided coordinate
-representation into an unsorted-row CSR form including repeated entries. (2) Sweep through
-the CSR form, simultaneously calculating the desired CSC form's column-pointer array,
-detecting repeated entries, and repacking the CSR form with repeated entries combined;
-this stage yields an unsorted-row CSR form with no repeated entries. (3) Counting-sort the
-preceding CSR form into a fully-sorted CSC form with no repeated entries.
-
-Input arrays `csrrowptr`, `csrcolval`, and `csrnzval` constitute storage for the
-intermediate CSR forms and require `length(csrrowptr) >= m + 1`,
-`length(csrcolval) >= length(I)`, and `length(csrnzval >= length(I))`. Input
-array `klasttouch`, workspace for the second stage, requires `length(klasttouch) >= n`.
-Optional input arrays `csccolptr`, `cscrowval`, and `cscnzval` constitute storage for the
-returned CSC form `S`. `csccolptr` requires `length(csccolptr) >= n + 1`. If necessary,
-`cscrowval` and `cscnzval` are automatically resized to satisfy
-`length(cscrowval) >= nnz(S)` and `length(cscnzval) >= nnz(S)`; hence, if `nnz(S)` is
-unknown at the outset, passing in empty vectors of the appropriate type (`Vector{Ti}()`
-and `Vector{Tv}()` respectively) suffices, or calling the `sparse!` method
-neglecting `cscrowval` and `cscnzval`.
-
-On return, `csrrowptr`, `csrcolval`, and `csrnzval` contain an unsorted-column
-representation of the result's transpose.
-
-You may reuse the input arrays' storage (`I`, `J`, `V`) for the output arrays
-(`csccolptr`, `cscrowval`, `cscnzval`). For example, you may call
-`sparse!(I, J, V, csrrowptr, csrcolval, csrnzval, I, J, V)`.
-
-For the sake of efficiency, this method performs no argument checking beyond
-`1 <= I[k] <= m` and `1 <= J[k] <= n`. Use with care. Testing with `--check-bounds=yes`
-is wise.
-
-This method runs in `O(m, n, length(I))` time. The HALFPERM algorithm described in
-F. Gustavson, "Two fast algorithms for sparse matrices: multiplication and permuted
-transposition," ACM TOMS 4(3), 250-269 (1978) inspired this method's use of a pair of
-counting sorts.
-"""
-function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
-        V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
-        csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
-        csccolptr::Vector{Ti}, cscrowval::Vector{Ti}, cscnzval::Vector{Tv}) where {Tv,Ti<:Integer,Tj<:Integer}
-
-    require_one_based_indexing(I, J, V)
-    sparse_check_Ti(m, n, Ti)
-    sparse_check_length("I", I, 0, Tj)
-    # Compute the CSR form's row counts and store them shifted forward by one in csrrowptr
-    fill!(csrrowptr, Tj(0))
-    coolen = length(I)
-    min(length(J), length(V)) >= coolen || throw(ArgumentError("J and V need length >= length(I) = $coolen"))
-    @inbounds for k in 1:coolen
-        Ik = I[k]
-        if 1 > Ik || m < Ik
-            throw(ArgumentError("row indices I[k] must satisfy 1 <= I[k] <= m"))
-        end
-        csrrowptr[Ik+1] += Tj(1)
-    end
-
-    # Compute the CSR form's rowptrs and store them shifted forward by one in csrrowptr
-    countsum = Tj(1)
-    csrrowptr[1] = Tj(1)
-    @inbounds for i in 2:(m+1)
-        overwritten = csrrowptr[i]
-        csrrowptr[i] = countsum
-        countsum += overwritten
-    end
-
-    # Counting-sort the column and nonzero values from J and V into csrcolval and csrnzval
-    # Tracking write positions in csrrowptr corrects the row pointers
-    @inbounds for k in 1:coolen
-        Ik, Jk = I[k], J[k]
-        if Ti(1) > Jk || Ti(n) < Jk
-            throw(ArgumentError("column indices J[k] must satisfy 1 <= J[k] <= n"))
-        end
-        csrk = csrrowptr[Ik+1]
-        @assert csrk >= Tj(1) "index into csrcolval exceeds typemax(Ti)"
-        csrrowptr[Ik+1] = csrk + Tj(1)
-        csrcolval[csrk] = Jk
-        csrnzval[csrk] = V[k]
-    end
-    # This completes the unsorted-row, has-repeats CSR form's construction
-
-    # Sweep through the CSR form, simultaneously (1) calculating the CSC form's column
-    # counts and storing them shifted forward by one in csccolptr; (2) detecting repeated
-    # entries; and (3) repacking the CSR form with the repeated entries combined.
-    #
-    # Minimizing extraneous communication and nonlocality of reference, primarily by using
-    # only a single auxiliary array in this step, is the key to this method's performance.
-    fill!(csccolptr, Ti(0))
-    fill!(klasttouch, Tj(0))
-    writek = Tj(1)
-    newcsrrowptri = Ti(1)
-    origcsrrowptri = Tj(1)
-    origcsrrowptrip1 = csrrowptr[2]
-    @inbounds for i in 1:m
-        for readk in origcsrrowptri:(origcsrrowptrip1-Tj(1))
-            j = csrcolval[readk]
-            if klasttouch[j] < newcsrrowptri
-                klasttouch[j] = writek
-                if writek != readk
-                    csrcolval[writek] = j
-                    csrnzval[writek] = csrnzval[readk]
-                end
-                writek += Tj(1)
-                csccolptr[j+1] += Ti(1)
-            else
-                klt = klasttouch[j]
-                csrnzval[klt] = combine(csrnzval[klt], csrnzval[readk])
-            end
-        end
-        newcsrrowptri = writek
-        origcsrrowptri = origcsrrowptrip1
-        origcsrrowptrip1 != writek && (csrrowptr[i+1] = writek)
-        i < m && (origcsrrowptrip1 = csrrowptr[i+2])
-    end
-
-    # Compute the CSC form's colptrs and store them shifted forward by one in csccolptr
-    countsum = Tj(1)
-    csccolptr[1] = Ti(1)
-    @inbounds for j in 2:(n+1)
-        overwritten = csccolptr[j]
-        csccolptr[j] = countsum
-        countsum += overwritten
-        Base.hastypemax(Ti) && (countsum <= typemax(Ti) || throw(ArgumentError("more than typemax(Ti)-1 == $(typemax(Ti)-1) entries")))
-    end
-
-    # Now knowing the CSC form's entry count, resize cscrowval and cscnzval if necessary
-    cscnnz = countsum - Tj(1)
-    length(cscrowval) < cscnnz && resize!(cscrowval, cscnnz)
-    length(cscnzval) < cscnnz && resize!(cscnzval, cscnnz)
-
-    # Finally counting-sort the row and nonzero values from the CSR form into cscrowval and
-    # cscnzval. Tracking write positions in csccolptr corrects the column pointers.
-    @inbounds for i in 1:m
-        for csrk in csrrowptr[i]:(csrrowptr[i+1]-Tj(1))
-            j = csrcolval[csrk]
-            x = csrnzval[csrk]
-            csck = csccolptr[j+1]
-            csccolptr[j+1] = csck + Ti(1)
-            cscrowval[csck] = i
-            cscnzval[csck] = x
-        end
-    end
-
-    SparseMatrixCSC(m, n, csccolptr, cscrowval, cscnzval)
-end
-function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
-        V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
-        csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv},
-        csccolptr::Vector{Ti}) where {Tv,Ti<:Integer,Tj<:Integer}
-    sparse!(I, J, V, m, n, combine, klasttouch,
-            csrrowptr, csrcolval, csrnzval,
-            csccolptr, Vector{Ti}(), Vector{Tv}())
-end
-function sparse!(I::AbstractVector{Ti}, J::AbstractVector{Ti},
-        V::AbstractVector{Tv}, m::Integer, n::Integer, combine, klasttouch::Vector{Tj},
-        csrrowptr::Vector{Tj}, csrcolval::Vector{Ti}, csrnzval::Vector{Tv}) where {Tv,Ti<:Integer,Tj<:Integer}
-    sparse!(I, J, V, m, n, combine, klasttouch,
-            csrrowptr, csrcolval, csrnzval,
-            Vector{Ti}(undef, n+1), Vector{Ti}(), Vector{Tv}())
-end
-
-dimlub(I) = isempty(I) ? 0 : Int(maximum(I)) #least upper bound on required sparse matrix dimension
-
-sparse(I,J,v::Number) = sparse(I, J, fill(v,length(I)))
-
-sparse(I,J,V::AbstractVector) = sparse(I, J, V, dimlub(I), dimlub(J))
-
-sparse(I,J,v::Number,m,n) = sparse(I, J, fill(v,length(I)), Int(m), Int(n))
-
-sparse(I,J,V::AbstractVector,m,n) = sparse(I, J, V, Int(m), Int(n), +)
-
-sparse(I,J,V::AbstractVector{Bool},m,n) = sparse(I, J, V, Int(m), Int(n), |)
-
-sparse(I,J,v::Number,m,n,combine::Function) = sparse(I, J, fill(v,length(I)), Int(m), Int(n), combine)
-
-## Transposition and permutation methods
-
-"""
-    halfperm!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{TvA,Ti},
-              q::AbstractVector{<:Integer}, f::Function = identity) where {Tv,TvA,Ti}
-
-Column-permute and transpose `A`, simultaneously applying `f` to each entry of `A`, storing
-the result `(f(A)Q)^T` (`map(f, transpose(A[:,q]))`) in `X`.
-
-Element type `Tv` of `X` must match `f(::TvA)`, where `TvA` is the element type of `A`.
-`X`'s dimensions must match those of `transpose(A)` (`size(X, 1) == size(A, 2)` and
-`size(X, 2) == size(A, 1)`), and `X` must have enough storage to accommodate all allocated
-entries in `A` (`length(rowvals(X)) >= nnz(A)` and `length(nonzeros(X)) >= nnz(A)`).
-Column-permutation `q`'s length must match `A`'s column count (`length(q) == size(A, 2)`).
-
-This method is the parent of several methods performing transposition and permutation
-operations on [`SparseMatrixCSC`](@ref)s. As this method performs no argument checking,
-prefer the safer child methods (`[c]transpose[!]`, `permute[!]`) to direct use.
-
-This method implements the `HALFPERM` algorithm described in F. Gustavson, "Two fast
-algorithms for sparse matrices: multiplication and permuted transposition," ACM TOMS 4(3),
-250-269 (1978). The algorithm runs in `O(size(A, 1), size(A, 2), nnz(A))` time and requires no space
-beyond that passed in.
-"""
-function halfperm!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{TvA,Ti},
-        q::AbstractVector{<:Integer}, f::Function = identity) where {Tv,TvA,Ti}
-    _computecolptrs_halfperm!(X, A)
-    _distributevals_halfperm!(X, A, q, f)
-    return X
-end
-"""
-Helper method for `halfperm!`. Computes `transpose(A[:,q])`'s column pointers, storing them
-shifted one position forward in `getcolptr(X)`; `_distributevals_halfperm!` fixes this shift.
-"""
-function _computecolptrs_halfperm!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{TvA,Ti}) where {Tv,TvA,Ti}
-    # Compute `transpose(A[:,q])`'s column counts. Store shifted forward one position in getcolptr(X).
-    fill!(getcolptr(X), 0)
-    @inbounds for k in 1:nnz(A)
-        getcolptr(X)[rowvals(A)[k] + 1] += 1
-    end
-    # Compute `transpose(A[:,q])`'s column pointers. Store shifted forward one position in getcolptr(X).
-    getcolptr(X)[1] = 1
-    countsum = 1
-    @inbounds for k in 2:(size(A, 1) + 1)
-        overwritten = getcolptr(X)[k]
-        getcolptr(X)[k] = countsum
-        countsum += overwritten
-    end
-end
-"""
-Helper method for `halfperm!`. With `transpose(A[:,q])`'s column pointers shifted one
-position forward in `getcolptr(X)`, computes `map(f, transpose(A[:,q]))` by appropriately
-distributing `rowvals(A)` and `f`-transformed `nonzeros(A)` into `rowvals(X)` and `nonzeros(X)`
-respectively. Simultaneously fixes the one-position-forward shift in `getcolptr(X)`.
-"""
-@noinline function _distributevals_halfperm!(X::AbstractSparseMatrixCSC{Tv,Ti},
-        A::AbstractSparseMatrixCSC{TvA,Ti}, q::AbstractVector{<:Integer}, f::Function) where {Tv,TvA,Ti}
-    @inbounds for Xi in 1:size(A, 2)
-        Aj = q[Xi]
-        for Ak in nzrange(A, Aj)
-            Ai = rowvals(A)[Ak]
-            Xk = getcolptr(X)[Ai + 1]
-            rowvals(X)[Xk] = Xi
-            nonzeros(X)[Xk] = f(nonzeros(A)[Ak])
-            getcolptr(X)[Ai + 1] += 1
-        end
-    end
-    return # kill potential type instability
-end
-
-function ftranspose!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}, f::Function) where {Tv,Ti}
-    # Check compatibility of source argument A and destination argument X
-    if size(X, 2) != size(A, 1)
-        throw(DimensionMismatch(string("destination argument `X`'s column count, ",
-            "`size(X, 2) (= $(size(X, 2)))`, must match source argument `A`'s row count, `size(A, 1) (= $(size(A, 1)))`")))
-    elseif size(X, 1) != size(A, 2)
-        throw(DimensionMismatch(string("destination argument `X`'s row count,
-            `size(X, 1) (= $(size(X, 1)))`, must match source argument `A`'s column count, `size(A, 2) (= $(size(A, 2)))`")))
-    elseif length(rowvals(X)) < nnz(A)
-        throw(ArgumentError(string("the length of destination argument `X`'s `rowval` ",
-            "array, `length(rowvals(X)) (= $(length(rowvals(X))))`, must be greater than or ",
-            "equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
-    elseif length(nonzeros(X)) < nnz(A)
-        throw(ArgumentError(string("the length of destination argument `X`'s `nzval` ",
-            "array, `length(nonzeros(X)) (= $(length(nonzeros(X))))`, must be greater than or ",
-            "equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
-    end
-    halfperm!(X, A, 1:size(A, 2), f)
-end
-transpose!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = ftranspose!(X, A, identity)
-adjoint!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = ftranspose!(X, A, conj)
-
-# manually specifying eltype allows to avoid calling return_type of f on TvA
-function ftranspose(A::AbstractSparseMatrixCSC{TvA,Ti}, f::Function, eltype::Type{Tv} = TvA) where {Tv,TvA,Ti}
-    X = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1)+1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
-    halfperm!(X, A, 1:size(A, 2), f)
-end
-adjoint(A::AbstractSparseMatrixCSC) = Adjoint(A)
-transpose(A::AbstractSparseMatrixCSC) = Transpose(A)
-Base.copy(A::Adjoint{<:Any,<:AbstractSparseMatrixCSC}) =
-    ftranspose(A.parent, x -> adjoint(copy(x)), eltype(A))
-Base.copy(A::Transpose{<:Any,<:AbstractSparseMatrixCSC}) =
-    ftranspose(A.parent, x -> transpose(copy(x)), eltype(A))
-function Base.permutedims(A::AbstractSparseMatrixCSC, (a,b))
-    (a, b) == (2, 1) && return ftranspose(A, identity)
-    (a, b) == (1, 2) && return copy(A)
-    throw(ArgumentError("no valid permutation of dimensions"))
-end
-
-"""
-    unchecked_noalias_permute!(X::AbstractSparseMatrixCSC{Tv,Ti},
-        A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-        q::AbstractVector{<:Integer}, C::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-
-See [`permute!`](@ref) for basic usage. Parent of `permute[!]`
-methods operating on `SparseMatrixCSC`s that assume none of `X`, `A`, and `C` alias each
-other. As this method performs no argument checking, prefer the safer child methods
-(`permute[!]`) to direct use.
-
-This method consists of two major steps: (1) Column-permute (`Q`,`I[:,q]`) and transpose `A`
-to generate intermediate result `(AQ)^T` (`transpose(A[:,q])`) in `C`. (2) Column-permute
-(`P^T`, I[:,p]) and transpose intermediate result `(AQ)^T` to generate result
-`((AQ)^T P^T)^T = PAQ` (`A[p,q]`) in `X`.
-
-The first step is a call to `halfperm!`, and the second is a variant on `halfperm!` that
-avoids an unnecessary length-`nnz(A)` array-sweep and associated recomputation of column
-pointers. See [`halfperm!`](:func:SparseArrays.halfperm!) for additional algorithmic
-information.
-
-See also: `unchecked_aliasing_permute!`
-"""
-function unchecked_noalias_permute!(X::AbstractSparseMatrixCSC{Tv,Ti},
-        A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-        q::AbstractVector{<:Integer}, C::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    halfperm!(C, A, q)
-    _computecolptrs_permute!(X, A, q, getcolptr(X))
-    _distributevals_halfperm!(X, C, p, identity)
-    return X
-end
-"""
-    unchecked_aliasing_permute!(A::AbstractSparseMatrixCSC{Tv,Ti},
-        p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer},
-        C::AbstractSparseMatrixCSC{Tv,Ti}, workcolptr::Vector{Ti}) where {Tv,Ti}
-
-See [`permute!`](@ref) for basic usage. Parent of `permute!`
-methods operating on [`SparseMatrixCSC`](@ref)s where the source and destination matrices
-are the same. See `unchecked_noalias_permute!`
-for additional information; these methods are identical but for this method's requirement of
-the additional `workcolptr`, `length(workcolptr) >= size(A, 2) + 1`, which enables efficient
-handling of the source-destination aliasing.
-"""
-function unchecked_aliasing_permute!(A::AbstractSparseMatrixCSC{Tv,Ti},
-        p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer},
-        C::AbstractSparseMatrixCSC{Tv,Ti}, workcolptr::Vector{Ti}) where {Tv,Ti}
-    halfperm!(C, A, q)
-    _computecolptrs_permute!(A, A, q, workcolptr)
-    _distributevals_halfperm!(A, C, p, identity)
-    return A
-end
-"""
-Helper method for `unchecked_noalias_permute!` and `unchecked_aliasing_permute!`.
-Computes `PAQ`'s column pointers, storing them shifted one position forward in `getcolptr(X)`;
-`_distributevals_halfperm!` fixes this shift. Saves some work relative to
-`_computecolptrs_halfperm!` as described in `uncheckednoalias_permute!`'s documentation.
-"""
-function _computecolptrs_permute!(X::AbstractSparseMatrixCSC{Tv,Ti},
-        A::AbstractSparseMatrixCSC{Tv,Ti}, q::AbstractVector{<:Integer}, workcolptr::Vector{Ti}) where {Tv,Ti}
-    # Compute `A[p,q]`'s column counts. Store shifted forward one position in workcolptr.
-    @inbounds for k in 1:size(A, 2)
-        workcolptr[k+1] = getcolptr(A)[q[k] + 1] - getcolptr(A)[q[k]]
-    end
-    # Compute `A[p,q]`'s column pointers. Store shifted forward one position in getcolptr(X).
-    getcolptr(X)[1] = 1
-    countsum = 1
-    @inbounds for k in 2:(size(X, 2) + 1)
-        overwritten = workcolptr[k]
-        getcolptr(X)[k] = countsum
-        countsum += overwritten
-    end
-end
-
-"""
-Helper method for `permute` and `permute!` methods operating on `SparseMatrixCSC`s.
-Checks compatibility of source argument `A`, row-permutation argument `p`, and
-column-permutation argument `q`.
-"""
-function _checkargs_sourcecompatperms_permute!(A::AbstractSparseMatrixCSC,
-        p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer})
-    require_one_based_indexing(p, q)
-    if length(q) != size(A, 2)
-         throw(DimensionMismatch(string("the length of column-permutation argument `q`, ",
-             "`length(q) (= $(length(q)))`, must match source argument `A`'s column ",
-             "count, `size(A, 2) (= $(size(A, 2)))`")))
-     elseif length(p) != size(A, 1)
-         throw(DimensionMismatch(string("the length of row-permutation argument `p`, ",
-             "`length(p) (= $(length(p)))`, must match source argument `A`'s row count, ",
-             "`size(A, 1) (= $(size(A, 1)))`")))
-     end
-end
-"""
-Helper method for `permute` and `permute!` methods operating on `SparseMatrixCSC`s.
-Checks whether row- and column- permutation arguments `p` and `q` are valid permutations.
-"""
-function _checkargs_permutationsvalid_permute!(
-        p::AbstractVector{<:Integer}, pcheckspace::Vector{Ti},
-        q::AbstractVector{<:Integer}, qcheckspace::Vector{Ti}) where Ti<:Integer
-    if !_ispermutationvalid_permute!(p, pcheckspace)
-        throw(ArgumentError("row-permutation argument `p` must be a valid permutation"))
-    elseif !_ispermutationvalid_permute!(q, qcheckspace)
-        throw(ArgumentError("column-permutation argument `q` must be a valid permutation"))
-    end
-end
-function _ispermutationvalid_permute!(perm::AbstractVector{<:Integer},
-        checkspace::Vector{<:Integer})
-    require_one_based_indexing(perm)
-    n = length(perm)
-    checkspace[1:n] .= 0
-    for k in perm
-        (0 < k ≤ n) && ((checkspace[k] ⊻= 1) == 1) || return false
-    end
-    return true
-end
-"""
-Helper method for `permute` and `permute!` methods operating on `SparseMatrixCSC`s.
-Checks compatibility of source argument `A` and destination argument `X`.
-"""
-function _checkargs_sourcecompatdest_permute!(A::AbstractSparseMatrixCSC{Tv,Ti},
-        X::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    if size(X, 1) != size(A, 1)
-        throw(DimensionMismatch(string("destination argument `X`'s row count, ",
-            "`size(X, 1) (= $(size(X, 1)))`, must match source argument `A`'s row count, `size(A, 1) (= $(size(A, 1)))`")))
-    elseif size(X, 2) != size(A, 2)
-        throw(DimensionMismatch(string("destination argument `X`'s column count, ",
-            "`size(X, 2) (= $(size(X, 2)))`, must match source argument `A`'s column count, `size(A, 2) (= $(size(A, 2)))`")))
-    elseif length(rowvals(X)) < nnz(A)
-        throw(ArgumentError(string("the length of destination argument `X`'s `rowval` ",
-            "array, `length(rowvals(X)) (= $(length(rowvals(X))))`, must be greater than or ",
-            "equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
-    elseif length(nonzeros(X)) < nnz(A)
-        throw(ArgumentError(string("the length of destination argument `X`'s `nzval` ",
-            "array, `length(nonzeros(X)) (= $(length(nonzeros(X))))`, must be greater than or ",
-            "equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
-    end
-end
-"""
-Helper method for `permute` and `permute!` methods operating on `SparseMatrixCSC`s.
-Checks compatibility of source argument `A` and intermediate result argument `C`.
-"""
-function _checkargs_sourcecompatworkmat_permute!(A::AbstractSparseMatrixCSC{Tv,Ti},
-        C::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    if size(C, 2) != size(A, 1)
-        throw(DimensionMismatch(string("intermediate result argument `C`'s column count, ",
-            "`size(C, 2) (= $(size(C, 2)))`, must match source argument `A`'s row count, `size(A, 1) (= $(size(A, 1)))`")))
-    elseif size(C, 1) != size(A, 2)
-        throw(DimensionMismatch(string("intermediate result argument `C`'s row count, ",
-            "`size(C, 1) (= $(size(C, 1)))`, must match source argument `A`'s column count, `size(A, 2) (= $(size(A, 2)))`")))
-    elseif length(rowvals(C)) < nnz(A)
-        throw(ArgumentError(string("the length of intermediate result argument `C`'s ",
-            "`rowval` array, `length(rowvals(C)) (= $(length(rowvals(C))))`, must be greater than ",
-            "or equal to source argument `A`'s allocated entry count, `nnz(A) (= $(nnz(A)))`")))
-    elseif length(nonzeros(C)) < nnz(A)
-        throw(ArgumentError(string("the length of intermediate result argument `C`'s ",
-            "`rowval` array, `length(nonzeros(C)) (= $(length(nonzeros(C))))`, must be greater than ",
-            "or equal to source argument `A`'s allocated entry count, `nnz(A)` (= $(nnz(A)))")))
-    end
-end
-"""
-Helper method for `permute` and `permute!` methods operating on `SparseMatrixCSC`s.
-Checks compatibility of source argument `A` and workspace argument `workcolptr`.
-"""
-function _checkargs_sourcecompatworkcolptr_permute!(A::AbstractSparseMatrixCSC{Tv,Ti},
-        workcolptr::Vector{Ti}) where {Tv,Ti}
-    if length(workcolptr) <= size(A, 2)
-        throw(DimensionMismatch(string("argument `workcolptr`'s length, ",
-            "`length(workcolptr) (= $(length(workcolptr)))`, must exceed source argument ",
-            "`A`'s column count, `size(A, 2) (= $(size(A, 2)))`")))
-    end
-end
-"""
-    permute!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti},
-             p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer},
-             [C::AbstractSparseMatrixCSC{Tv,Ti}]) where {Tv,Ti}
-
-Bilaterally permute `A`, storing result `PAQ` (`A[p,q]`) in `X`. Stores intermediate result
-`(AQ)^T` (`transpose(A[:,q])`) in optional argument `C` if present. Requires that none of
-`X`, `A`, and, if present, `C` alias each other; to store result `PAQ` back into `A`, use
-the following method lacking `X`:
-
-    permute!(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-             q::AbstractVector{<:Integer}[, C::AbstractSparseMatrixCSC{Tv,Ti},
-             [workcolptr::Vector{Ti}]]) where {Tv,Ti}
-
-`X`'s dimensions must match those of `A` (`size(X, 1) == size(A, 1)` and `size(X, 2) == size(A, 2)`), and `X` must
-have enough storage to accommodate all allocated entries in `A` (`length(rowvals(X)) >= nnz(A)`
-and `length(nonzeros(X)) >= nnz(A)`). Column-permutation `q`'s length must match `A`'s column
-count (`length(q) == size(A, 2)`). Row-permutation `p`'s length must match `A`'s row count
-(`length(p) == size(A, 1)`).
-
-`C`'s dimensions must match those of `transpose(A)` (`size(C, 1) == size(A, 2)` and `size(C, 2) == size(A, 1)`), and `C`
-must have enough storage to accommodate all allocated entries in `A` (`length(rowvals(C)) >= nnz(A)`
-and `length(nonzeros(C)) >= nnz(A)`).
-
-For additional (algorithmic) information, and for versions of these methods that forgo
-argument checking, see (unexported) parent methods `unchecked_noalias_permute!`
-and `unchecked_aliasing_permute!`.
-
-See also: [`permute`](@ref).
-"""
-function permute!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti},
-        p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer}) where {Tv,Ti}
-    _checkargs_sourcecompatdest_permute!(A, X)
-    _checkargs_sourcecompatperms_permute!(A, p, q)
-    C = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
-    _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, getcolptr(X))
-    unchecked_noalias_permute!(X, A, p, q, C)
-end
-function permute!(X::AbstractSparseMatrixCSC{Tv,Ti}, A::AbstractSparseMatrixCSC{Tv,Ti},
-        p::AbstractVector{<:Integer}, q::AbstractVector{<:Integer},
-        C::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    _checkargs_sourcecompatdest_permute!(A, X)
-    _checkargs_sourcecompatperms_permute!(A, p, q)
-    _checkargs_sourcecompatworkmat_permute!(A, C)
-    _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, getcolptr(X))
-    unchecked_noalias_permute!(X, A, p, q, C)
-end
-function permute!(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-        q::AbstractVector{<:Integer}) where {Tv,Ti}
-    _checkargs_sourcecompatperms_permute!(A, p, q)
-    C = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
-    workcolptr = Vector{Ti}(undef, size(A, 2) + 1)
-    _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, workcolptr)
-    unchecked_aliasing_permute!(A, p, q, C, workcolptr)
-end
-function permute!(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-        q::AbstractVector{<:Integer}, C::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    _checkargs_sourcecompatperms_permute!(A, p, q)
-    _checkargs_sourcecompatworkmat_permute!(A, C)
-    workcolptr = Vector{Ti}(undef, size(A, 2) + 1)
-    _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, workcolptr)
-    unchecked_aliasing_permute!(A, p, q, C, workcolptr)
-end
-function permute!(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-        q::AbstractVector{<:Integer}, C::AbstractSparseMatrixCSC{Tv,Ti},
-        workcolptr::Vector{Ti}) where {Tv,Ti}
-    _checkargs_sourcecompatperms_permute!(A, p, q)
-    _checkargs_sourcecompatworkmat_permute!(A, C)
-    _checkargs_sourcecompatworkcolptr_permute!(A, workcolptr)
-    _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, workcolptr)
-    unchecked_aliasing_permute!(A, p, q, C, workcolptr)
-end
-"""
-    permute(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-            q::AbstractVector{<:Integer}) where {Tv,Ti}
-
-Bilaterally permute `A`, returning `PAQ` (`A[p,q]`). Column-permutation `q`'s length must
-match `A`'s column count (`length(q) == size(A, 2)`). Row-permutation `p`'s length must match `A`'s
-row count (`length(p) == size(A, 1)`).
-
-For expert drivers and additional information, see [`permute!`](@ref).
-
-# Examples
-```jldoctest
-julia> A = spdiagm(0 => [1, 2, 3, 4], 1 => [5, 6, 7])
-4×4 SparseMatrixCSC{Int64, Int64} with 7 stored entries:
- 1  5  ⋅  ⋅
- ⋅  2  6  ⋅
- ⋅  ⋅  3  7
- ⋅  ⋅  ⋅  4
-
-julia> permute(A, [4, 3, 2, 1], [1, 2, 3, 4])
-4×4 SparseMatrixCSC{Int64, Int64} with 7 stored entries:
- ⋅  ⋅  ⋅  4
- ⋅  ⋅  3  7
- ⋅  2  6  ⋅
- 1  5  ⋅  ⋅
-
-julia> permute(A, [1, 2, 3, 4], [4, 3, 2, 1])
-4×4 SparseMatrixCSC{Int64, Int64} with 7 stored entries:
- ⋅  ⋅  5  1
- ⋅  6  2  ⋅
- 7  3  ⋅  ⋅
- 4  ⋅  ⋅  ⋅
-```
-"""
-function permute(A::AbstractSparseMatrixCSC{Tv,Ti}, p::AbstractVector{<:Integer},
-        q::AbstractVector{<:Integer}) where {Tv,Ti}
-    _checkargs_sourcecompatperms_permute!(A, p, q)
-    X = SparseMatrixCSC(size(A, 1), size(A, 2),
-                        ones(Ti, size(A, 2) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
-    C = SparseMatrixCSC(size(A, 2), size(A, 1),
-                        ones(Ti, size(A, 1) + 1),
-                        Vector{Ti}(undef, nnz(A)),
-                        Vector{Tv}(undef, nnz(A)))
-    _checkargs_permutationsvalid_permute!(p, getcolptr(C), q, getcolptr(X))
-    unchecked_noalias_permute!(X, A, p, q, C)
-end
-
-## fkeep! and children tril!, triu!, droptol!, dropzeros[!]
-
-"""
-    fkeep!(A::AbstractSparseArray, f)
-
-Keep elements of `A` for which test `f` returns `true`. `f`'s signature should be
-
-    f(i::Integer, [j::Integer,] x) -> Bool
-
-where `i` and `j` are an element's row and column indices and `x` is the element's
-value. This method makes a single sweep
-through `A`, requiring `O(size(A, 2), nnz(A))`-time for matrices and `O(nnz(A))`-time for vectors
-and no space beyond that passed in.
-
-# Examples
-```jldoctest
-julia> A = sparse(Diagonal([1, 2, 3, 4]))
-4×4 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
- 1  ⋅  ⋅  ⋅
- ⋅  2  ⋅  ⋅
- ⋅  ⋅  3  ⋅
- ⋅  ⋅  ⋅  4
-
-julia> SparseArrays.fkeep!(A, (i, j, v) -> isodd(v))
-4×4 SparseMatrixCSC{Int64, Int64} with 2 stored entries:
- 1  ⋅  ⋅  ⋅
- ⋅  ⋅  ⋅  ⋅
- ⋅  ⋅  3  ⋅
- ⋅  ⋅  ⋅  ⋅
-```
-"""
-function fkeep!(A::AbstractSparseMatrixCSC, f, trim::Bool = true)
-    An = size(A, 2)
-    Acolptr = getcolptr(A)
-    Arowval = rowvals(A)
-    Anzval = nonzeros(A)
-
-    # Sweep through columns, rewriting kept elements in their new positions
-    # and updating the column pointers accordingly as we go.
-    Awritepos = 1
-    oldAcolptrAj = 1
-    @inbounds for Aj in 1:An
-        for Ak in oldAcolptrAj:(Acolptr[Aj+1]-1)
-            Ai = Arowval[Ak]
-            Ax = Anzval[Ak]
-            # If this element should be kept, rewrite in new position
-            if f(Ai, Aj, Ax)
-                if Awritepos != Ak
-                    Arowval[Awritepos] = Ai
-                    Anzval[Awritepos] = Ax
-                end
-                Awritepos += 1
-            end
-        end
-        oldAcolptrAj = Acolptr[Aj+1]
-        Acolptr[Aj+1] = Awritepos
-    end
-
-    # Trim A's storage if necessary
-    Annz = Acolptr[end] - 1
-    resize!(Arowval, Annz)
-    resize!(Anzval, Annz)
-
-    return A
-end
-
-tril!(A::AbstractSparseMatrixCSC, k::Integer = 0) =
-    fkeep!(A, (i, j, x) -> i + k >= j)
-triu!(A::AbstractSparseMatrixCSC, k::Integer = 0) =
-    fkeep!(A, (i, j, x) -> j >= i + k)
-
-"""
-    droptol!(A::AbstractSparseMatrixCSC, tol)
-
-Removes stored values from `A` whose absolute value is less than or equal to `tol`.
-"""
-droptol!(A::AbstractSparseMatrixCSC, tol) =
-    fkeep!(A, (i, j, x) -> abs(x) > tol)
-
-"""
-    dropzeros!(A::AbstractSparseMatrixCSC;)
-
-Removes stored numerical zeros from `A`.
-
-For an out-of-place version, see [`dropzeros`](@ref). For
-algorithmic information, see `fkeep!`.
-"""
-dropzeros!(A::AbstractSparseMatrixCSC) = fkeep!(A, (i, j, x) -> !iszero(x))
-"""
-    dropzeros(A::AbstractSparseMatrixCSC;)
-
-Generates a copy of `A` and removes stored numerical zeros from that copy.
-
-For an in-place version and algorithmic information, see [`dropzeros!`](@ref).
-
-# Examples
-```jldoctest
-julia> A = sparse([1, 2, 3], [1, 2, 3], [1.0, 0.0, 1.0])
-3×3 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
- 1.0   ⋅    ⋅
-  ⋅   0.0   ⋅
-  ⋅    ⋅   1.0
-
-julia> dropzeros(A)
-3×3 SparseMatrixCSC{Float64, Int64} with 2 stored entries:
- 1.0   ⋅    ⋅
-  ⋅    ⋅    ⋅
-  ⋅    ⋅   1.0
-```
-"""
-dropzeros(A::AbstractSparseMatrixCSC) = dropzeros!(copy(A))
-
-## Find methods
-
-function findall(S::AbstractSparseMatrixCSC)
-    return findall(identity, S)
-end
-
-function findall(p::Function, S::AbstractSparseMatrixCSC)
-    if p(zero(eltype(S)))
-        return invoke(findall, Tuple{Function, Any}, p, S)
-    end
-
-    numnz = nnz(S)
-    inds = Vector{CartesianIndex{2}}(undef, numnz)
-
-    count = 0
-    @inbounds for col = 1 : size(S, 2), k = getcolptr(S)[col] : (getcolptr(S)[col+1]-1)
-        if p(nonzeros(S)[k])
-            count += 1
-            inds[count] = CartesianIndex(rowvals(S)[k], col)
-        end
-    end
-
-    resize!(inds, count)
-
-    return inds
-end
-findall(p::Base.Fix2{typeof(in)}, x::AbstractSparseMatrixCSC) =
-    invoke(findall, Tuple{Base.Fix2{typeof(in)}, AbstractArray}, p, x)
-
-function findnz(S::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    numnz = nnz(S)
-    I = Vector{Ti}(undef, numnz)
-    J = Vector{Ti}(undef, numnz)
-    V = Vector{Tv}(undef, numnz)
-
-    count = 1
-    @inbounds for col = 1 : size(S, 2), k = getcolptr(S)[col] : (getcolptr(S)[col+1]-1)
-        I[count] = rowvals(S)[k]
-        J[count] = col
-        V[count] = nonzeros(S)[k]
-        count += 1
-    end
-
-    return (I, J, V)
-end
-
-function _sparse_findnextnz(m::AbstractSparseMatrixCSC, ij::CartesianIndex{2})
-    row, col = Tuple(ij)
-    col > size(m, 2) && return nothing
-
-    lo, hi = getcolptr(m)[col], getcolptr(m)[col+1]
-    n = searchsortedfirst(rowvals(m), row, lo, hi-1, Base.Order.Forward)
-    if lo <= n <= hi-1
-        return CartesianIndex(rowvals(m)[n], col)
-    end
-    nextcol = searchsortedfirst(getcolptr(m), hi + 1, col + 1, length(getcolptr(m)), Base.Order.Forward)
-    nextcol > length(getcolptr(m)) && return nothing
-    nextlo = getcolptr(m)[nextcol-1]
-    return CartesianIndex(rowvals(m)[nextlo], nextcol - 1)
-end
-
-function _sparse_findprevnz(m::AbstractSparseMatrixCSC, ij::CartesianIndex{2})
-    row, col = Tuple(ij)
-    iszero(col) && return nothing
-
-    lo, hi = getcolptr(m)[col], getcolptr(m)[col+1]
-    n = searchsortedlast(rowvals(m), row, lo, hi-1, Base.Order.Forward)
-    if lo <= n <= hi-1
-        return CartesianIndex(rowvals(m)[n], col)
-    end
-    prevcol = searchsortedlast(getcolptr(m), lo - 1, 1, col - 1, Base.Order.Forward)
-    prevcol < 1 && return nothing
-    prevhi = getcolptr(m)[prevcol+1]
-    return CartesianIndex(rowvals(m)[prevhi-1], prevcol)
-end
-
-
-function sparse_sortedlinearindices!(I::Vector{Ti}, V::Vector, m::Int, n::Int) where Ti
-    length(I) == length(V) || throw(ArgumentError("I and V should have the same length"))
-    nnz = length(V)
-    colptr = Vector{Ti}(undef, n + 1)
-    j, colm = 1, 0
-    @inbounds for col = 1:n+1
-        colptr[col] = j
-        while j <= nnz && (I[j] -= colm) <= m
-            j += 1
-        end
-        j <= nnz && (I[j] += colm)
-        colm += m
-    end
-    return SparseMatrixCSC(m, n, colptr, I, V)
-end
-
-"""
-    sprand([rng],[type],m,[n],p::AbstractFloat,[rfn])
-
-Create a random length `m` sparse vector or `m` by `n` sparse matrix, in
-which the probability of any element being nonzero is independently given by
-`p` (and hence the mean density of nonzeros is also exactly `p`). Nonzero
-values are sampled from the distribution specified by `rfn` and have the type `type`. The uniform
-distribution is used in case `rfn` is not specified. The optional `rng`
-argument specifies a random number generator, see [Random Numbers](@ref).
-
-# Examples
-```jldoctest; setup = :(using Random; Random.seed!(1234))
-julia> sprand(Bool, 2, 2, 0.5)
-2×2 SparseMatrixCSC{Bool, Int64} with 1 stored entry:
- ⋅  ⋅
- ⋅  1
-
-julia> sprand(Float64, 3, 0.75)
-3-element SparseVector{Float64, Int64} with 1 stored entry:
-  [3]  =  0.298614
-```
-"""
-function sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat, rfn::Function, ::Type{T}=eltype(rfn(r, 1))) where T
-    m, n = Int(m), Int(n)
-    (m < 0 || n < 0) && throw(ArgumentError("invalid Array dimensions"))
-    0 <= density <= 1 || throw(ArgumentError("$density not in [0,1]"))
-    I = randsubseq(r, 1:(m*n), density)
-    return sparse_sortedlinearindices!(I, convert(Vector{T}, rfn(r,length(I))), m, n)
-end
-
-sprand(m::Integer, n::Integer, density::AbstractFloat, rfn::Function, ::Type{T} = eltype(rfn(1))) where {T} =
-    sprand(default_rng(), m, n, density, (r, i) -> rfn(i))
-
-truebools(r::AbstractRNG, n::Integer) = fill(true, n)
-
-sprand(m::Integer, n::Integer, density::AbstractFloat) = sprand(default_rng(), m, n, density)
-
-sprand(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) =
-    sprand(r, m, n, density, rand, Float64)
-sprand(r::AbstractRNG, ::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where {T} =
-    sprand(r, m, n, density, (r, i) -> rand(r, T, i), T)
-sprand(r::AbstractRNG, ::Type{Bool}, m::Integer, n::Integer, density::AbstractFloat) =
-    sprand(r, m, n, density, truebools, Bool)
-sprand(::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where {T} =
-    sprand(default_rng(), T, m, n, density)
-
-"""
-    sprandn([rng][,Type],m[,n],p::AbstractFloat)
-
-Create a random sparse vector of length `m` or sparse matrix of size `m` by `n`
-with the specified (independent) probability `p` of any entry being nonzero,
-where nonzero values are sampled from the normal distribution. The optional `rng`
-argument specifies a random number generator, see [Random Numbers](@ref).
-
-!!! compat "Julia 1.1"
-    Specifying the output element type `Type` requires at least Julia 1.1.
-
-# Examples
-```jldoctest; setup = :(using Random; Random.seed!(0))
-julia> sprandn(2, 2, 0.75)
-2×2 SparseMatrixCSC{Float64, Int64} with 2 stored entries:
-  ⋅   0.586617
-  ⋅   0.297336
-```
-"""
-sprandn(r::AbstractRNG, m::Integer, n::Integer, density::AbstractFloat) =
-    sprand(r, m, n, density, randn, Float64)
-sprandn(m::Integer, n::Integer, density::AbstractFloat) =
-    sprandn(default_rng(), m, n, density)
-sprandn(r::AbstractRNG, ::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where {T} =
-    sprand(r, m, n, density, (r, i) -> randn(r, T, i), T)
-sprandn(::Type{T}, m::Integer, n::Integer, density::AbstractFloat) where {T} =
-    sprandn(default_rng(), T, m, n, density)
-
-LinearAlgebra.fillstored!(S::AbstractSparseMatrixCSC, x) = (fill!(nzvalview(S), x); S)
-
-"""
-    spzeros([type,]m[,n])
-
-Create a sparse vector of length `m` or sparse matrix of size `m x n`. This
-sparse array will not contain any nonzero values. No storage will be allocated
-for nonzero values during construction. The type defaults to [`Float64`](@ref) if not
-specified.
-
-# Examples
-```jldoctest
-julia> spzeros(3, 3)
-3×3 SparseMatrixCSC{Float64, Int64} with 0 stored entries:
-  ⋅    ⋅    ⋅
-  ⋅    ⋅    ⋅
-  ⋅    ⋅    ⋅
-
-julia> spzeros(Float32, 4)
-4-element SparseVector{Float32, Int64} with 0 stored entries
-```
-"""
-spzeros(m::Integer, n::Integer) = spzeros(Float64, m, n)
-spzeros(::Type{Tv}, m::Integer, n::Integer) where {Tv} = spzeros(Tv, Int, m, n)
-function spzeros(::Type{Tv}, ::Type{Ti}, m::Integer, n::Integer) where {Tv, Ti}
-    ((m < 0) || (n < 0)) && throw(ArgumentError("invalid Array dimensions"))
-    SparseMatrixCSC(m, n, fill(one(Ti), n+1), Vector{Ti}(), Vector{Tv}())
-end
-# de-splatting variant
-function spzeros(::Type{Tv}, ::Type{Ti}, sz::Tuple{Integer,Integer}) where {Tv, Ti}
-    spzeros(Tv, Ti, sz[1], sz[2])
-end
-
-import Base._one
-function Base._one(unit::T, S::AbstractSparseMatrixCSC) where T
-    size(S, 1) == size(S, 2) || throw(DimensionMismatch("multiplicative identity only defined for square matrices"))
-    return SparseMatrixCSC{T}(I, size(S, 1), size(S, 2))
-end
-
-## SparseMatrixCSC construction from UniformScaling
-SparseMatrixCSC{Tv,Ti}(s::UniformScaling, m::Integer, n::Integer) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(s, Dims((m, n)))
-SparseMatrixCSC{Tv}(s::UniformScaling, m::Integer, n::Integer) where {Tv} = SparseMatrixCSC{Tv}(s, Dims((m, n)))
-SparseMatrixCSC(s::UniformScaling, m::Integer, n::Integer) = SparseMatrixCSC(s, Dims((m, n)))
-SparseMatrixCSC{Tv}(s::UniformScaling, dims::Dims{2}) where {Tv} = SparseMatrixCSC{Tv,Int}(s, dims)
-SparseMatrixCSC(s::UniformScaling, dims::Dims{2}) = SparseMatrixCSC{eltype(s)}(s, dims)
-function SparseMatrixCSC{Tv,Ti}(s::UniformScaling, dims::Dims{2}) where {Tv,Ti}
-    @boundscheck first(dims) < 0 && throw(ArgumentError("first dimension invalid ($(first(dims)) < 0)"))
-    @boundscheck last(dims) < 0 && throw(ArgumentError("second dimension invalid ($(last(dims)) < 0)"))
-    iszero(s.λ) && return spzeros(Tv, Ti, dims...)
-    m, n, k = dims..., min(dims...)
-    nzval = fill!(Vector{Tv}(undef, k), Tv(s.λ))
-    rowval = copyto!(Vector{Ti}(undef, k), 1:k)
-    colptr = copyto!(Vector{Ti}(undef, n + 1), 1:(k + 1))
-    for i in (k + 2):(n + 1) colptr[i] = (k + 1) end
-    SparseMatrixCSC{Tv,Ti}(dims..., colptr, rowval, nzval)
-end
-
-Base.iszero(A::AbstractSparseMatrixCSC) = iszero(nzvalview(A))
-
-function Base.isone(A::AbstractSparseMatrixCSC)
-    m, n = size(A)
-    m == n && getcolptr(A)[n+1] >= n+1 || return false
-    for j in 1:n, k in getcolptr(A)[j]:(getcolptr(A)[j+1] - 1)
-        i, x = rowvals(A)[k], nonzeros(A)[k]
-        ifelse(i == j, isone(x), iszero(x)) || return false
-    end
-    return true
-end
-
-sparse(s::UniformScaling, dims::Dims{2}) = SparseMatrixCSC(s, dims)
-sparse(s::UniformScaling, m::Integer, n::Integer) = sparse(s, Dims((m, n)))
-
-# TODO: More appropriate location?
-function conj!(A::AbstractSparseMatrixCSC)
-    map!(conj, nzvalview(A), nzvalview(A))
-    return A
-end
-function (-)(A::AbstractSparseMatrixCSC)
-    nzval = similar(nonzeros(A), typeof(-zero(eltype(A))))
-    map!(-, view(nzval, 1:nnz(A)), nzvalview(A))
-    return SparseMatrixCSC(size(A, 1), size(A, 2), copy(getcolptr(A)), copy(rowvals(A)), nzval)
-end
-
-# the rest of real, conj, imag are handled correctly via AbstractArray methods
-function conj(A::AbstractSparseMatrixCSC{<:Complex})
-    nzval = similar(nonzeros(A))
-    map!(conj, view(nzval, 1:nnz(A)), nzvalview(A))
-    return SparseMatrixCSC(size(A, 1), size(A, 2), copy(getcolptr(A)), copy(rowvals(A)), nzval)
-end
-imag(A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv<:Real,Ti} = spzeros(Tv, Ti, size(A, 1), size(A, 2))
-
-## Binary arithmetic and boolean operators
-(+)(A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC) = map(+, A, B)
-(-)(A::AbstractSparseMatrixCSC, B::AbstractSparseMatrixCSC) = map(-, A, B)
-
-(+)(A::AbstractSparseMatrixCSC, B::Array) = Array(A) + B
-(+)(A::Array, B::AbstractSparseMatrixCSC) = A + Array(B)
-(-)(A::AbstractSparseMatrixCSC, B::Array) = Array(A) - B
-(-)(A::Array, B::AbstractSparseMatrixCSC) = A - Array(B)
-
-## full equality
-function ==(A1::AbstractSparseMatrixCSC, A2::AbstractSparseMatrixCSC)
-    size(A1) != size(A2) && return false
-    vals1, vals2 = nonzeros(A1), nonzeros(A2)
-    rows1, rows2 = rowvals(A1), rowvals(A2)
-    m, n = size(A1)
-    @inbounds for i = 1:n
-        nz1,nz2 = nzrange(A1,i), nzrange(A2,i)
-        j1,j2 = first(nz1), first(nz2)
-        # step through the rows of both matrices at once:
-        while j1 <= last(nz1) && j2 <= last(nz2)
-            r1,r2 = rows1[j1], rows2[j2]
-            if r1==r2
-                vals1[j1]!=vals2[j2] && return false
-                j1+=1
-                j2+=1
-            else
-                if r1<r2
-                    vals1[j1]!=0 && return false
-                    j1+=1
-                else
-                    vals2[j2]!=0 && return false
-                    j2+=1
-                end
-            end
-        end
-        # finish off any left-overs:
-        for j = j1:last(nz1)
-            vals1[j]!=0 && return false
-        end
-        for j = j2:last(nz2)
-            vals2[j]!=0 && return false
-        end
-    end
-    return true
-end
-
-## Reductions
-
-# In general, output of sparse matrix reductions will not be sparse,
-# and computing reductions along columns into SparseMatrixCSC is
-# non-trivial, so use Arrays for output. Array element type is given by `R`.
-function Base.reducedim_initarray(A::AbstractSparseMatrixCSC, region, v0, ::Type{R}) where {R}
-    fill!(Array{R}(undef, Base.to_shape(Base.reduced_indices(A, region))), v0)
-end
-
-# General mapreduce
-function _mapreducezeros(f, op, ::Type{T}, nzeros::Integer, v0) where T
-    nzeros == 0 && return v0
-
-    # Reduce over first zero
-    zeroval = f(zero(T))
-    v = op(v0, zeroval)
-    isequal(v, v0) && return v
-
-    # Reduce over remaining zeros
-    for i = 2:nzeros
-        lastv = v
-        v = op(v, zeroval)
-        # Bail out early if we reach a fixed point
-        isequal(v, lastv) && break
-    end
-
-    v
-end
-
-function Base._mapreduce(f, op, ::Base.IndexCartesian, A::AbstractSparseMatrixCSC{T}) where T
-    z = nnz(A)
-    n = length(A)
-    if z == 0
-        if n == 0
-            Base.mapreduce_empty(f, op, T)
-        else
-            _mapreducezeros(f, op, T, n-z-1, f(zero(T)))
-        end
-    else
-        _mapreducezeros(f, op, T, n-z, Base._mapreduce(f, op, nzvalview(A)))
-    end
-end
-
-# Specialized mapreduce for +/*
-_mapreducezeros(f, ::typeof(+), ::Type{T}, nzeros::Integer, v0) where {T} =
-    nzeros == 0 ? v0 : f(zero(T))*nzeros + v0
-_mapreducezeros(f, ::typeof(*), ::Type{T}, nzeros::Integer, v0) where {T} =
-    nzeros == 0 ? v0 : f(zero(T))^nzeros * v0
-
-function Base._mapreduce(f, op::typeof(*), A::AbstractSparseMatrixCSC{T}) where T
-    nzeros = length(A)-nnz(A)
-    if nzeros == 0
-        # No zeros, so don't compute f(0) since it might throw
-        Base._mapreduce(f, op, nzvalview(A))
-    else
-        v = f(zero(T))^(nzeros)
-        # Bail out early if initial reduction value is zero
-        v == zero(T) ? v : v*Base._mapreduce(f, op, nzvalview(A))
-    end
-end
-
-# General mapreducedim
-function _mapreducerows!(f, op, R::AbstractArray, A::AbstractSparseMatrixCSC{T}) where T
-    require_one_based_indexing(A, R)
-    colptr = getcolptr(A)
-    rowval = rowvals(A)
-    nzval = nonzeros(A)
-    m, n = size(A)
-    @inbounds for col = 1:n
-        r = R[1, col]
-        @simd for j = colptr[col]:colptr[col+1]-1
-            r = op(r, f(nzval[j]))
-        end
-        R[1, col] = _mapreducezeros(f, op, T, m-(colptr[col+1]-colptr[col]), r)
-    end
-    R
-end
-
-function _mapreducecols!(f, op, R::AbstractArray, A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    require_one_based_indexing(A, R)
-    colptr = getcolptr(A)
-    rowval = rowvals(A)
-    nzval = nonzeros(A)
-    m, n = size(A)
-    rownz = fill(convert(Ti, n), m)
-    @inbounds for col = 1:n
-        @simd for j = colptr[col]:colptr[col+1]-1
-            row = rowval[j]
-            R[row, 1] = op(R[row, 1], f(nzval[j]))
-            rownz[row] -= 1
-        end
-    end
-    @inbounds for i = 1:m
-        R[i, 1] = _mapreducezeros(f, op, Tv, Int(rownz[i]), R[i, 1])
-    end
-    R
-end
-
-function Base._mapreducedim!(f, op, R::AbstractArray, A::AbstractSparseMatrixCSC{T}) where T
-    require_one_based_indexing(A, R)
-    lsiz = Base.check_reducedims(R,A)
-    isempty(A) && return R
-
-    if size(R, 1) == size(R, 2) == 1
-        # Reduction along both columns and rows
-        R[1, 1] = mapreduce(f, op, A)
-    elseif size(R, 1) == 1
-        # Reduction along rows
-        _mapreducerows!(f, op, R, A)
-    elseif size(R, 2) == 1
-        # Reduction along columns
-        _mapreducecols!(f, op, R, A)
-    else
-        # Reduction along a dimension > 2
-        # Compute op(R, f(A))
-        m, n = size(A)
-        nzval = nonzeros(A)
-        if length(nzval) == m*n
-            # No zeros, so don't compute f(0) since it might throw
-            for col = 1:n
-                @simd for row = 1:size(A, 1)
-                    @inbounds R[row, col] = op(R[row, col], f(nzval[(col-1)*m+row]))
-                end
-            end
-        else
-            colptr = getcolptr(A)
-            rowval = rowvals(A)
-            zeroval = f(zero(T))
-            @inbounds for col = 1:n
-                lastrow = 0
-                for j = colptr[col]:colptr[col+1]-1
-                    row = rowval[j]
-                    @simd for i = lastrow+1:row-1 # Zeros before this nonzero
-                        R[i, col] = op(R[i, col], zeroval)
-                    end
-                    R[row, col] = op(R[row, col], f(nzval[j]))
-                    lastrow = row
-                end
-                @simd for i = lastrow+1:m         # Zeros at end
-                    R[i, col] = op(R[i, col], zeroval)
-                end
-            end
-        end
-    end
-    R
-end
-
-# Specialized mapreducedim for + cols to avoid allocating a
-# temporary array when f(0) == 0
-function _mapreducecols!(f, op::typeof(+), R::AbstractArray, A::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti}
-    require_one_based_indexing(A, R)
-    nzval = nonzeros(A)
-    m, n = size(A)
-    if length(nzval) == m*n
-        # No zeros, so don't compute f(0) since it might throw
-        for col = 1:n
-            @simd for row = 1:size(A, 1)
-                @inbounds R[row, 1] = op(R[row, 1], f(nzval[(col-1)*m+row]))
-            end
-        end
-    else
-        colptr = getcolptr(A)
-        rowval = rowvals(A)
-        zeroval = f(zero(Tv))
-        if isequal(zeroval, zero(Tv))
-            # Case where f(0) == 0
-            @inbounds for col = 1:size(A, 2)
-                @simd for j = colptr[col]:colptr[col+1]-1
-                    R[rowval[j], 1] += f(nzval[j])
-                end
-            end
-        else
-            # Case where f(0) != 0
-            rownz = fill(convert(Ti, n), m)
-            @inbounds for col = 1:size(A, 2)
-                @simd for j = colptr[col]:colptr[col+1]-1
-                    row = rowval[j]
-                    R[row, 1] += f(nzval[j])
-                    rownz[row] -= 1
-                end
-            end
-            for i = 1:m
-                R[i, 1] += rownz[i]*zeroval
-            end
-        end
-    end
-    R
-end
-
-# findmax/min and argmax/min methods
-# find first zero value in sparse matrix - return linear index in full matrix
-# non-structural zeros are identified by x == 0 in line with the sparse constructors.
-function _findz(A::AbstractSparseMatrixCSC{Tv,Ti}, rows=1:size(A, 1), cols=1:size(A, 2)) where {Tv,Ti}
-    colptr = getcolptr(A); rowval = rowvals(A); nzval = nonzeros(A)
-    zval = 0
-    row = 0
-    rowmin = rows[1]; rowmax = rows[end]
-    allrows = (rows == 1:size(A, 1))
-    @inbounds for col in cols
-        r1::Int = colptr[col]
-        r2::Int = colptr[col+1] - 1
-        if !allrows && (r1 <= r2)
-            r1 = searchsortedfirst(rowval, rowmin, r1, r2, Forward)
-            (r1 <= r2 ) && (r2 = searchsortedlast(rowval, rowmax, r1, r2, Forward))
-        end
-        row = rowmin
-        while (r1 <= r2) && (row == rowval[r1]) && (nzval[r1] != zval)
-            r1 += 1
-            row += 1
-        end
-        (row <= rowmax) && (return CartesianIndex(row, col))
-    end
-    return CartesianIndex(0, 0)
-end
-
-function _findr(op, A, region, Tv)
-    require_one_based_indexing(A)
-    Ti = eltype(keys(A))
-    i1 = first(keys(A))
-    N = nnz(A)
-    L = length(A)
-    if L == 0
-        if prod(map(length, Base.reduced_indices(A, region))) != 0
-            throw(ArgumentError("array slices must be non-empty"))
-        else
-            ri = Base.reduced_indices0(A, region)
-            return (similar(A, ri), zeros(Ti, ri))
-        end
-    end
-
-    colptr = getcolptr(A); rowval = rowvals(A); nzval = nonzeros(A); m = size(A, 1); n = size(A, 2)
-    zval = zero(Tv)
-    szA = size(A)
-
-    if region == 1 || region == (1,)
-        (N == 0) && (return (fill(zval,1,n), fill(i1,1,n)))
-        S = Vector{Tv}(undef, n); I = Vector{Ti}(undef, n)
-        @inbounds for i = 1 : n
-            Sc = zval; Ic = _findz(A, 1:m, i:i)
-            if Ic == CartesianIndex(0, 0)
-                j = colptr[i]
-                Ic = CartesianIndex(rowval[j], i)
-                Sc = nzval[j]
-            end
-            for j = colptr[i] : colptr[i+1]-1
-                if op(nzval[j], Sc)
-                    Sc = nzval[j]
-                    Ic = CartesianIndex(rowval[j], i)
-                end
-            end
-            S[i] = Sc; I[i] = Ic
-        end
-        return(reshape(S,1,n), reshape(I,1,n))
-    elseif region == 2 || region == (2,)
-        (N == 0) && (return (fill(zval,m,1), fill(i1,m,1)))
-        S = Vector{Tv}(undef, m)
-        I = Vector{Ti}(undef, m)
-        @inbounds for row in 1:m
-            S[row] = zval; I[row] = _findz(A, row:row, 1:n)
-            if I[row] == CartesianIndex(0, 0)
-                I[row] = CartesianIndex(row, 1)
-                S[row] = A[row,1]
-            end
-        end
-        @inbounds for i = 1 : n, j = colptr[i] : colptr[i+1]-1
-            row = rowval[j]
-            if op(nzval[j], S[row])
-                S[row] = nzval[j]
-                I[row] = CartesianIndex(row, i)
-            end
-        end
-        return (reshape(S,m,1), reshape(I,m,1))
-    elseif region == (1,2)
-        (N == 0) && (return (fill(zval,1,1), fill(i1,1,1)))
-        hasz = nnz(A) != length(A)
-        Sv = hasz ? zval : nzval[1]
-        Iv::(Ti) = hasz ? _findz(A) : i1
-        @inbounds for i = 1 : size(A, 2), j = colptr[i] : (colptr[i+1]-1)
-            if op(nzval[j], Sv)
-                Sv = nzval[j]
-                Iv = CartesianIndex(rowval[j], i)
-            end
-        end
-        return (fill(Sv,1,1), fill(Iv,1,1))
-    else
-        throw(ArgumentError("invalid value for region; must be 1, 2, or (1,2)"))
-    end
-end
-
-_isless_fm(a, b)    =  b == b && ( a != a || isless(a, b) )
-_isgreater_fm(a, b) =  b == b && ( a != a || isless(b, a) )
-
-findmin(A::AbstractSparseMatrixCSC{Tv,Ti}, region) where {Tv,Ti} = _findr(_isless_fm, A, region, Tv)
-findmax(A::AbstractSparseMatrixCSC{Tv,Ti}, region) where {Tv,Ti} = _findr(_isgreater_fm, A, region, Tv)
-findmin(A::AbstractSparseMatrixCSC) = (r=findmin(A,(1,2)); (r[1][1], r[2][1]))
-findmax(A::AbstractSparseMatrixCSC) = (r=findmax(A,(1,2)); (r[1][1], r[2][1]))
-
-argmin(A::AbstractSparseMatrixCSC) = findmin(A)[2]
-argmax(A::AbstractSparseMatrixCSC) = findmax(A)[2]
-
-## getindex
-function rangesearch(haystack::AbstractRange, needle)
-    (i,rem) = divrem(needle - first(haystack), step(haystack))
-    (rem==0 && 1<=i+1<=length(haystack)) ? i+1 : 0
-end
-
-getindex(A::AbstractSparseMatrixCSC, I::Tuple{Integer,Integer}) = getindex(A, I[1], I[2])
-
-function getindex(A::AbstractSparseMatrixCSC{T}, i0::Integer, i1::Integer) where T
-    if !(1 <= i0 <= size(A, 1) && 1 <= i1 <= size(A, 2)); throw(BoundsError()); end
-    r1 = Int(getcolptr(A)[i1])
-    r2 = Int(getcolptr(A)[i1+1]-1)
-    (r1 > r2) && return zero(T)
-    r1 = searchsortedfirst(rowvals(A), i0, r1, r2, Forward)
-    ((r1 > r2) || (rowvals(A)[r1] != i0)) ? zero(T) : nonzeros(A)[r1]
-end
-
-# Colon translation
-getindex(A::AbstractSparseMatrixCSC, ::Colon, ::Colon) = copy(A)
-getindex(A::AbstractSparseMatrixCSC, i, ::Colon)       = getindex(A, i, 1:size(A, 2))
-getindex(A::AbstractSparseMatrixCSC, ::Colon, i)       = getindex(A, 1:size(A, 1), i)
-
-function getindex_cols(A::AbstractSparseMatrixCSC{Tv,Ti}, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, J)
-    # for indexing whole columns
-    (m, n) = size(A)
-    nJ = length(J)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-
-    colptrS = Vector{Ti}(undef, nJ+1)
-    colptrS[1] = 1
-    nnzS = 0
-
-    @inbounds for j = 1:nJ
-        col = J[j]
-        1 <= col <= n || throw(BoundsError())
-        nnzS += colptrA[col+1] - colptrA[col]
-        colptrS[j+1] = nnzS + 1
-    end
-
-    rowvalS = Vector{Ti}(undef, nnzS)
-    nzvalS  = Vector{Tv}(undef, nnzS)
-    ptrS = 0
-
-    @inbounds for j = 1:nJ
-        col = J[j]
-        for k = colptrA[col]:colptrA[col+1]-1
-            ptrS += 1
-            rowvalS[ptrS] = rowvalA[k]
-            nzvalS[ptrS] = nzvalA[k]
-        end
-    end
-    return SparseMatrixCSC(m, nJ, colptrS, rowvalS, nzvalS)
-end
-
-getindex_traverse_col(::AbstractUnitRange, lo::Integer, hi::Integer) = lo:hi
-getindex_traverse_col(I::StepRange, lo::Integer, hi::Integer) = step(I) > 0 ? (lo:1:hi) : (hi:-1:lo)
-
-function getindex(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractRange, J::AbstractVector) where {Tv,Ti<:Integer}
-    require_one_based_indexing(A, I, J)
-    # Ranges for indexing rows
-    (m, n) = size(A)
-    # whole columns:
-    if I == 1:m
-        return getindex_cols(A, J)
-    end
-
-    nI = length(I)
-    nI == 0 || (minimum(I) >= 1 && maximum(I) <= m) || throw(BoundsError())
-    nJ = length(J)
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    colptrS = Vector{Ti}(undef, nJ+1)
-    colptrS[1] = 1
-    nnzS = 0
-
-    # Form the structure of the result and compute space
-    @inbounds for j = 1:nJ
-        col = J[j]
-        1 <= col <= n || throw(BoundsError())
-        @simd for k in colptrA[col]:colptrA[col+1]-1
-            nnzS += rowvalA[k] in I # `in` is fast for ranges
-        end
-        colptrS[j+1] = nnzS+1
-    end
-
-    # Populate the values in the result
-    rowvalS = Vector{Ti}(undef, nnzS)
-    nzvalS  = Vector{Tv}(undef, nnzS)
-    ptrS    = 1
-
-    @inbounds for j = 1:nJ
-        col = J[j]
-        for k = getindex_traverse_col(I, colptrA[col], colptrA[col+1]-1)
-            rowA = rowvalA[k]
-            i = rangesearch(I, rowA)
-            if i > 0
-                rowvalS[ptrS] = i
-                nzvalS[ptrS] = nzvalA[k]
-                ptrS += 1
-            end
-        end
-    end
-
-    return SparseMatrixCSC(nI, nJ, colptrS, rowvalS, nzvalS)
-end
-
-function getindex_I_sorted(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, I, J)
-    # Sorted vectors for indexing rows.
-    # Similar to getindex_general but without the transpose trick.
-    (m, n) = size(A)
-
-    nI   = length(I)
-    nzA  = nnz(A)
-    avgM = div(nzA,n)
-    # Heuristics based on experiments discussed in:
-    # https://github.com/JuliaLang/julia/issues/12860
-    # https://github.com/JuliaLang/julia/pull/12934
-    alg = ((m > nzA) && (m > nI)) ? 0 :
-          ((nI - avgM) > 2^8) ? 1 :
-          ((avgM - nI) > 2^10) ? 0 : 2
-
-    (alg == 0) ? getindex_I_sorted_bsearch_A(A, I, J) :
-    (alg == 1) ? getindex_I_sorted_bsearch_I(A, I, J) :
-    getindex_I_sorted_linear(A, I, J)
-end
-
-function getindex_I_sorted_bsearch_A(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, I, J)
-    nI = length(I)
-    nJ = length(J)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    colptrS = Vector{Ti}(undef, nJ+1)
-    colptrS[1] = 1
-
-    ptrS = 1
-    # determine result size
-    @inbounds for j = 1:nJ
-        col = J[j]
-        ptrI::Int = 1 # runs through I
-        ptrA::Int = colptrA[col]
-        stopA::Int = colptrA[col+1]-1
-        if ptrA <= stopA
-            while ptrI <= nI
-                rowI = I[ptrI]
-                ptrI += 1
-                (rowvalA[ptrA] > rowI) && continue
-                ptrA = searchsortedfirst(rowvalA, rowI, ptrA, stopA, Base.Order.Forward)
-                (ptrA <= stopA) || break
-                if rowvalA[ptrA] == rowI
-                    ptrS += 1
-                end
-            end
-        end
-        colptrS[j+1] = ptrS
-    end
-
-    rowvalS = Vector{Ti}(undef, ptrS-1)
-    nzvalS  = Vector{Tv}(undef, ptrS-1)
-
-    # fill the values
-    ptrS = 1
-    @inbounds for j = 1:nJ
-        col = J[j]
-        ptrI::Int = 1 # runs through I
-        ptrA::Int = colptrA[col]
-        stopA::Int = colptrA[col+1]-1
-        if ptrA <= stopA
-            while ptrI <= nI
-                rowI = I[ptrI]
-                if rowvalA[ptrA] <= rowI
-                    ptrA = searchsortedfirst(rowvalA, rowI, ptrA, stopA, Base.Order.Forward)
-                    (ptrA <= stopA) || break
-                    if rowvalA[ptrA] == rowI
-                        rowvalS[ptrS] = ptrI
-                        nzvalS[ptrS] = nzvalA[ptrA]
-                        ptrS += 1
-                    end
-                end
-                ptrI += 1
-            end
-        end
-    end
-    return SparseMatrixCSC(nI, nJ, colptrS, rowvalS, nzvalS)
-end
-
-function getindex_I_sorted_linear(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, I, J)
-    nI = length(I)
-    nJ = length(J)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    colptrS = Vector{Ti}(undef, nJ+1)
-    colptrS[1] = 1
-    cacheI = zeros(Int, size(A, 1))
-
-    ptrS   = 1
-    # build the cache and determine result size
-    @inbounds for j = 1:nJ
-        col = J[j]
-        ptrI::Int = 1 # runs through I
-        ptrA::Int = colptrA[col]
-        stopA::Int = colptrA[col+1]
-        while ptrI <= nI && ptrA < stopA
-            rowA = rowvalA[ptrA]
-            rowI = I[ptrI]
-
-            if rowI > rowA
-                ptrA += 1
-            elseif rowI < rowA
-                ptrI += 1
-            else
-                (cacheI[rowA] == 0) && (cacheI[rowA] = ptrI)
-                ptrS += 1
-                ptrI += 1
-            end
-        end
-        colptrS[j+1] = ptrS
-    end
-
-    rowvalS = Vector{Ti}(undef, ptrS-1)
-    nzvalS  = Vector{Tv}(undef, ptrS-1)
-
-    # fill the values
-    ptrS = 1
-    @inbounds for j = 1:nJ
-        col = J[j]
-        ptrA::Int = colptrA[col]
-        stopA::Int = colptrA[col+1]
-        while ptrA < stopA
-            rowA = rowvalA[ptrA]
-            ptrI = cacheI[rowA]
-            if ptrI > 0
-                while ptrI <= nI && I[ptrI] == rowA
-                    rowvalS[ptrS] = ptrI
-                    nzvalS[ptrS] = nzvalA[ptrA]
-                    ptrS += 1
-                    ptrI += 1
-                end
-            end
-            ptrA += 1
-        end
-    end
-    return SparseMatrixCSC(nI, nJ, colptrS, rowvalS, nzvalS)
-end
-
-function getindex_I_sorted_bsearch_I(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, I, J)
-    nI = length(I)
-    nJ = length(J)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    colptrS = Vector{Ti}(undef, nJ+1)
-    colptrS[1] = 1
-
-    m = size(A, 1)
-
-    # cacheI is used first to store num occurrences of each row in columns of interest
-    # and later to store position of first occurrence of each row in I
-    cacheI = zeros(Int, m)
-
-    # count rows
-    @inbounds for j = 1:nJ
-        col = J[j]
-        for ptrA in colptrA[col]:(colptrA[col+1]-1)
-            cacheI[rowvalA[ptrA]] += 1
-        end
-    end
-
-    # fill cache and count nnz
-    ptrS::Int = 0
-    ptrI::Int = 1
-    @inbounds for j = 1:m
-        cval = cacheI[j]
-        (cval == 0) && continue
-        ptrI = searchsortedfirst(I, j, ptrI, nI, Base.Order.Forward)
-        cacheI[j] = ptrI
-        while ptrI <= nI && I[ptrI] == j
-            ptrS += cval
-            ptrI += 1
-        end
-        if ptrI > nI
-            @simd for i=(j+1):m; @inbounds cacheI[i]=ptrI; end
-            break
-        end
-    end
-    rowvalS = Vector{Ti}(undef, ptrS)
-    nzvalS  = Vector{Tv}(undef, ptrS)
-    colptrS[nJ+1] = ptrS+1
-
-    # fill the values
-    ptrS = 1
-    @inbounds for j = 1:nJ
-        col = J[j]
-        ptrA::Int = colptrA[col]
-        stopA::Int = colptrA[col+1]
-        while ptrA < stopA
-            rowA = rowvalA[ptrA]
-            ptrI = cacheI[rowA]
-            (ptrI > nI) && break
-            if ptrI > 0
-                while I[ptrI] == rowA
-                    rowvalS[ptrS] = ptrI
-                    nzvalS[ptrS] = nzvalA[ptrA]
-                    ptrS += 1
-                    ptrI += 1
-                    (ptrI > nI) && break
-                end
-            end
-            ptrA += 1
-        end
-        colptrS[j+1] = ptrS
-    end
-    return SparseMatrixCSC(nI, nJ, colptrS, rowvalS, nzvalS)
-end
-
-function permute_rows!(S::AbstractSparseMatrixCSC{Tv,Ti}, pI::Vector{Int}) where {Tv,Ti}
-    (m, n) = size(S)
-    colptrS = getcolptr(S); rowvalS = rowvals(S); nzvalS = nonzeros(S)
-    # preallocate temporary sort space
-    nr = min(nnz(S), m)
-
-    rowperm = Vector{Int}(undef, nr)
-    rowval_temp = Vector{Ti}(undef, nr)
-    rnzval_temp = Vector{Tv}(undef, nr)
-    perm = Base.Perm(Base.ord(isless, identity, false, Base.Order.Forward), rowval_temp)
-
-    @inbounds for j in 1:n
-        rowrange = nzrange(S, j)
-        nr = length(rowrange)
-        resize!(rowperm, nr)
-        resize!(rowval_temp, nr)
-        (nr > 0) || continue
-        k = 1
-        for i in rowrange
-            rowA = rowvalS[i]
-            rowval_temp[k] = pI[rowA]
-            rnzval_temp[k] = nzvalS[i]
-            k += 1
-        end
-
-        if nr <= 16
-            alg = Base.Sort.InsertionSort
-        else
-            alg = Base.Sort.QuickSort
-        end
-
-        # Reset permutation
-        rowperm .= 1:nr
-        sort!(rowperm, alg, perm)
-
-        k = 1
-        for i in rowrange
-            kperm = rowperm[k]
-            rowvalS[i] = rowval_temp[kperm]
-            nzvalS[i] = rnzval_temp[kperm]
-            k += 1
-        end
-    end
-    S
-end
-
-function getindex_general(A::AbstractSparseMatrixCSC, I::AbstractVector, J::AbstractVector)
-    require_one_based_indexing(A, I, J)
-    pI = sortperm(I)
-    @inbounds Is = I[pI]
-    permute_rows!(getindex_I_sorted(A, Is, J), pI)
-end
-
-# the general case:
-function getindex(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, I, J)
-    (m, n) = size(A)
-
-    if !isempty(J)
-        minj, maxj = extrema(J)
-        ((minj < 1) || (maxj > n)) && throw(BoundsError())
-    end
-
-    if !isempty(I)
-        mini, maxi = extrema(I)
-        ((mini < 1) || (maxi > m)) && throw(BoundsError())
-    end
-
-    if isempty(I) || isempty(J) || (0 == nnz(A))
-        return spzeros(Tv, Ti, length(I), length(J))
-    end
-
-    if issorted(I)
-        return getindex_I_sorted(A, I, J)
-    else
-        return getindex_general(A, I, J)
-    end
-end
-
-function getindex(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractArray) where {Tv,Ti}
-    require_one_based_indexing(A, I)
-    szA = size(A)
-    nA = szA[1]*szA[2]
-    colptrA = getcolptr(A)
-    rowvalA = rowvals(A)
-    nzvalA = nonzeros(A)
-
-    n = length(I)
-    outm = size(I,1)
-    outn = size(I,2)
-    szB = (outm, outn)
-    colptrB = zeros(Ti, outn+1)
-    rowvalB = Vector{Ti}(undef, n)
-    nzvalB = Vector{Tv}(undef, n)
-
-    colB = 1
-    rowB = 1
-    colptrB[colB] = 1
-    idxB = 1
-
-    for i in 1:n
-        ((I[i] < 1) | (I[i] > nA)) && throw(BoundsError())
-        row,col = Base._ind2sub(szA, I[i])
-        for r in colptrA[col]:(colptrA[col+1]-1)
-            @inbounds if rowvalA[r] == row
-                rowB,colB = Base._ind2sub(szB, i)
-                colptrB[colB+1] += 1
-                rowvalB[idxB] = rowB
-                nzvalB[idxB] = nzvalA[r]
-                idxB += 1
-                break
-            end
-        end
-    end
-    cumsum!(colptrB,colptrB)
-    if n > (idxB-1)
-        deleteat!(nzvalB, idxB:n)
-        deleteat!(rowvalB, idxB:n)
-    end
-    SparseMatrixCSC(outm, outn, colptrB, rowvalB, nzvalB)
-end
-
-# logical getindex
-getindex(A::AbstractSparseMatrixCSC{<:Any,<:Integer}, I::AbstractRange{Bool}, J::AbstractVector{Bool}) = error("Cannot index with AbstractRange{Bool}")
-getindex(A::AbstractSparseMatrixCSC{<:Any,<:Integer}, I::AbstractRange{Bool}, J::AbstractVector{<:Integer}) = error("Cannot index with AbstractRange{Bool}")
-
-getindex(A::AbstractSparseMatrixCSC, I::AbstractRange{<:Integer}, J::AbstractVector{Bool}) = A[I,findall(J)]
-getindex(A::AbstractSparseMatrixCSC, I::Integer, J::AbstractVector{Bool}) = A[I,findall(J)]
-getindex(A::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::Integer) = A[findall(I),J]
-getindex(A::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::AbstractVector{Bool}) = A[findall(I),findall(J)]
-getindex(A::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, J::AbstractVector{Bool}) = A[I,findall(J)]
-getindex(A::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::AbstractVector{<:Integer}) = A[findall(I),J]
-
-## setindex!
-
-# dispatch helper for #29034
-setindex!(A::AbstractSparseMatrixCSC, _v, _i::Integer, _j::Integer) = _setindex_scalar!(A, _v, _i, _j)
-
-function _setindex_scalar!(A::AbstractSparseMatrixCSC{Tv,Ti}, _v, _i::Integer, _j::Integer) where {Tv,Ti<:Integer}
-    v = convert(Tv, _v)
-    i = convert(Ti, _i)
-    j = convert(Ti, _j)
-    if !((1 <= i <= size(A, 1)) & (1 <= j <= size(A, 2)))
-        throw(BoundsError(A, (i,j)))
-    end
-    coljfirstk = Int(getcolptr(A)[j])
-    coljlastk = Int(getcolptr(A)[j+1] - 1)
-    searchk = searchsortedfirst(rowvals(A), i, coljfirstk, coljlastk, Base.Order.Forward)
-    if searchk <= coljlastk && rowvals(A)[searchk] == i
-        # Column j contains entry A[i,j]. Update and return
-        nonzeros(A)[searchk] = v
-        return A
-    end
-    # Column j does not contain entry A[i,j]. If v is nonzero, insert entry A[i,j] = v
-    # and return. If to the contrary v is zero, then simply return.
-    if !iszero(v)
-        nz = getcolptr(A)[size(A, 2)+1]
-        # throw exception before state is partially modified
-        !isbitstype(Ti) || nz < typemax(Ti) ||
-            throw(ArgumentError("nnz(A) going to exceed typemax(Ti) = $(typemax(Ti))"))
-
-        # if nnz(A) < length(rowval/nzval): no need to grow rowval and preserve values
-        _insert!(rowvals(A), searchk, i, nz)
-        _insert!(nonzeros(A), searchk, v, nz)
-        @simd for m in (j + 1):(size(A, 2) + 1)
-            @inbounds getcolptr(A)[m] += Ti(1)
-        end
-    end
-    return A
-end
-
-# insert item at position pos, shifting only from pos+1 to nz
-function _insert!(v::Vector, pos::Integer, item, nz::Integer)
-    if nz > length(v)
-        insert!(v, pos, item)
-    else # nz < length(v)
-        Base.unsafe_copyto!(v, pos+1, v, pos, nz - pos)
-        v[pos] = item
-        v
-    end
-end
-
-function Base.fill!(V::SubArray{Tv, <:Any, <:AbstractSparseMatrixCSC, Tuple{Vararg{Union{Integer, AbstractVector{<:Integer}},2}}}, x) where Tv
-    A = V.parent
-    I, J = V.indices
-    if isempty(I) || isempty(J); return A; end
-    # lt=≤ to check for strict sorting
-    if !issorted(I, lt=≤); I = sort!(unique(I)); end
-    if !issorted(J, lt=≤); J = sort!(unique(J)); end
-    if (I[1] < 1 || I[end] > size(A, 1)) || (J[1] < 1 || J[end] > size(A, 2))
-        throw(BoundsError(A, (I, J)))
-    end
-    if x == 0
-        _spsetz_setindex!(A, I, J)
-    else
-        _spsetnz_setindex!(A, convert(Tv, x), I, J)
-    end
-end
-"""
-Helper method for immediately preceding setindex! method. For all (i,j) such that i in I and
-j in J, assigns zero to A[i,j] if A[i,j] is a presently-stored entry, and otherwise does nothing.
-"""
-function _spsetz_setindex!(A::AbstractSparseMatrixCSC,
-        I::Union{Integer, AbstractVector{<:Integer}}, J::Union{Integer, AbstractVector{<:Integer}})
-    require_one_based_indexing(A, I, J)
-    lengthI = length(I)
-    for j in J
-        coljAfirstk = getcolptr(A)[j]
-        coljAlastk = getcolptr(A)[j+1] - 1
-        coljAfirstk > coljAlastk && continue
-        kA = coljAfirstk
-        kI = 1
-        entrykArow = rowvals(A)[kA]
-        entrykIrow = I[kI]
-        while true
-            if entrykArow < entrykIrow
-                kA += 1
-                kA > coljAlastk && break
-                entrykArow = rowvals(A)[kA]
-            elseif entrykArow > entrykIrow
-                kI += 1
-                kI > lengthI && break
-                entrykIrow = I[kI]
-            else # entrykArow == entrykIrow
-                nonzeros(A)[kA] = 0
-                kA += 1
-                kI += 1
-                (kA > coljAlastk || kI > lengthI) && break
-                entrykArow = rowvals(A)[kA]
-                entrykIrow = I[kI]
-            end
-        end
-    end
-end
-"""
-Helper method for immediately preceding setindex! method. For all (i,j) such that i in I
-and j in J, assigns x to A[i,j] if A[i,j] is a presently-stored entry, and allocates and
-assigns x to A[i,j] if A[i,j] is not presently stored.
-"""
-function _spsetnz_setindex!(A::AbstractSparseMatrixCSC{Tv}, x::Tv,
-        I::Union{Integer, AbstractVector{<:Integer}}, J::Union{Integer, AbstractVector{<:Integer}}) where Tv
-    require_one_based_indexing(A, I, J)
-    m, n = size(A)
-    lenI = length(I)
-
-    nnzA = nnz(A) + lenI * length(J)
-
-    rowvalA = rowval = rowvals(A)
-    nzvalA = nzval = nonzeros(A)
-
-    rowidx = 1
-    nadd = 0
-    @inbounds for col in 1:n
-        rrange = nzrange(A, col)
-        if nadd > 0
-            getcolptr(A)[col] = getcolptr(A)[col] + nadd
-        end
-
-        if col in J
-            if isempty(rrange) # set new vals only
-                nincl = lenI
-                if nadd == 0
-                    rowval = copy(rowvalA)
-                    nzval = copy(nzvalA)
-                    resize!(rowvalA, nnzA)
-                    resize!(nzvalA, nnzA)
-                end
-                r = rowidx:(rowidx+nincl-1)
-                rowvalA[r] = I
-                nzvalA[r] = x
-                rowidx += nincl
-                nadd += nincl
-            else # set old + new vals
-                old_ptr = rrange[1]
-                old_stop = rrange[end]
-                new_ptr = 1
-                new_stop = lenI
-
-                while true
-                    old_row = rowval[old_ptr]
-                    new_row = I[new_ptr]
-                    if old_row < new_row
-                        rowvalA[rowidx] = old_row
-                        nzvalA[rowidx] = nzval[old_ptr]
-                        rowidx += 1
-                        old_ptr += 1
-                    else
-                        if old_row == new_row
-                            old_ptr += 1
-                        else
-                            if nadd == 0
-                                rowval = copy(rowvalA)
-                                nzval = copy(nzvalA)
-                                resize!(rowvalA, nnzA)
-                                resize!(nzvalA, nnzA)
-                            end
-                            nadd += 1
-                        end
-                        rowvalA[rowidx] = new_row
-                        nzvalA[rowidx] = x
-                        rowidx += 1
-                        new_ptr += 1
-                    end
-
-                    if old_ptr > old_stop
-                        if new_ptr <= new_stop
-                            if nadd == 0
-                                rowval = copy(rowvalA)
-                                nzval = copy(nzvalA)
-                                resize!(rowvalA, nnzA)
-                                resize!(nzvalA, nnzA)
-                            end
-                            r = rowidx:(rowidx+(new_stop-new_ptr))
-                            rowvalA[r] = I[new_ptr:new_stop]
-                            nzvalA[r] = x
-                            rowidx += length(r)
-                            nadd += length(r)
-                        end
-                        break
-                    end
-
-                    if new_ptr > new_stop
-                        nincl = old_stop-old_ptr+1
-                        copyto!(rowvalA, rowidx, rowval, old_ptr, nincl)
-                        copyto!(nzvalA, rowidx, nzval, old_ptr, nincl)
-                        rowidx += nincl
-                        break
-                    end
-                end
-            end
-        elseif !isempty(rrange) # set old vals only
-            nincl = length(rrange)
-            copyto!(rowvalA, rowidx, rowval, rrange[1], nincl)
-            copyto!(nzvalA, rowidx, nzval, rrange[1], nincl)
-            rowidx += nincl
-        end
-    end
-
-    if nadd > 0
-        getcolptr(A)[n+1] = rowidx
-        deleteat!(rowvalA, rowidx:nnzA)
-        deleteat!(nzvalA, rowidx:nnzA)
-    end
-    return A
-end
-
-# Nonscalar A[I,J] = B: Convert B to a SparseMatrixCSC of the appropriate shape first
-_to_same_csc(::AbstractSparseMatrixCSC{Tv, Ti}, V::AbstractMatrix, I...) where {Tv,Ti} = convert(SparseMatrixCSC{Tv,Ti}, V)
-_to_same_csc(::AbstractSparseMatrixCSC{Tv, Ti}, V::AbstractVector, I...) where {Tv,Ti} = convert(SparseMatrixCSC{Tv,Ti}, reshape(V, map(length, I)))
-
-setindex!(A::AbstractSparseMatrixCSC{Tv}, B::AbstractVecOrMat, I::Integer, J::Integer) where {Tv} = _setindex_scalar!(A, B, I, J)
-
-function setindex!(A::AbstractSparseMatrixCSC{Tv,Ti}, V::AbstractVecOrMat, Ix::Union{Integer, AbstractVector{<:Integer}, Colon}, Jx::Union{Integer, AbstractVector{<:Integer}, Colon}) where {Tv,Ti<:Integer}
-    require_one_based_indexing(A, V, Ix, Jx)
-    (I, J) = Base.ensure_indexable(to_indices(A, (Ix, Jx)))
-    checkbounds(A, I, J)
-    Base.setindex_shape_check(V, length(I), length(J))
-    B = _to_same_csc(A, V, I, J)
-
-    issortedI = issorted(I)
-    issortedJ = issorted(J)
-
-    if !issortedI && !issortedJ
-        pI = sortperm(I); @inbounds I = I[pI]
-        pJ = sortperm(J); @inbounds J = J[pJ]
-        B = B[pI, pJ]
-    elseif !issortedI
-        pI = sortperm(I); @inbounds I = I[pI]
-        B = B[pI,:]
-    elseif !issortedJ
-        pJ = sortperm(J); @inbounds J = J[pJ]
-        B = B[:, pJ]
-    end
-
-    m, n = size(A)
-    mB, nB = size(B)
-
-    if (!isempty(I) && (I[1] < 1 || I[end] > m)) || (!isempty(J) && (J[1] < 1 || J[end] > n))
-        throw(BoundsError(A, (I, J)))
-    end
-
-    if isempty(I) || isempty(J)
-        return A
-    end
-
-    nI = length(I)
-    nJ = length(J)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    colptrB = getcolptr(B); rowvalB = rowvals(B); nzvalB = nonzeros(B)
-
-    nnzS = nnz(A) + nnz(B)
-
-    colptrS = copy(getcolptr(A))
-    rowvalS = copy(rowvals(A))
-    nzvalS = copy(nonzeros(A))
-
-    resize!(rowvalA, nnzS)
-    resize!(nzvalA, nnzS)
-
-    colB = 1
-    asgn_col = J[colB]
-
-    I_asgn = falses(m)
-    fill!(view(I_asgn, I), true)
-
-    ptrS = 1
-
-    @inbounds for col = 1:n
-
-        # Copy column of A if it is not being assigned into
-        if colB > nJ || col != J[colB]
-            colptrA[col+1] = colptrA[col] + (colptrS[col+1]-colptrS[col])
-
-            for k = colptrS[col]:colptrS[col+1]-1
-                rowvalA[ptrS] = rowvalS[k]
-                nzvalA[ptrS] = nzvalS[k]
-                ptrS += 1
-            end
-            continue
-        end
-
-        ptrA::Int  = colptrS[col]
-        stopA::Int = colptrS[col+1]
-        ptrB::Int  = colptrB[colB]
-        stopB::Int = colptrB[colB+1]
-
-        while ptrA < stopA && ptrB < stopB
-            rowA = rowvalS[ptrA]
-            rowB = I[rowvalB[ptrB]]
-            if rowA < rowB
-                rowvalA[ptrS] = rowA
-                nzvalA[ptrS] = I_asgn[rowA] ? zero(Tv) : nzvalS[ptrA]
-                ptrS += 1
-                ptrA += 1
-            elseif rowB < rowA
-                if nzvalB[ptrB] != zero(Tv)
-                    rowvalA[ptrS] = rowB
-                    nzvalA[ptrS] = nzvalB[ptrB]
-                    ptrS += 1
-                end
-                ptrB += 1
-            else
-                rowvalA[ptrS] = rowB
-                nzvalA[ptrS] = nzvalB[ptrB]
-                ptrS += 1
-                ptrB += 1
-                ptrA += 1
-            end
-        end
-
-        while ptrA < stopA
-            rowA = rowvalS[ptrA]
-            rowvalA[ptrS] = rowA
-            nzvalA[ptrS] = I_asgn[rowA] ? zero(Tv) : nzvalS[ptrA]
-            ptrS += 1
-            ptrA += 1
-        end
-
-        while ptrB < stopB
-            rowB = I[rowvalB[ptrB]]
-            if nzvalB[ptrB] != zero(Tv)
-                rowvalA[ptrS] = rowB
-                nzvalA[ptrS] = nzvalB[ptrB]
-                ptrS += 1
-            end
-            ptrB += 1
-        end
-
-        colptrA[col+1] = ptrS
-        colB += 1
-    end
-
-    deleteat!(rowvalA, colptrA[end]:length(rowvalA))
-    deleteat!(nzvalA, colptrA[end]:length(nzvalA))
-
-    return A
-end
-
-# Logical setindex!
-
-setindex!(A::Matrix, x::AbstractSparseMatrixCSC, I::Integer, J::AbstractVector{Bool}) = setindex!(A, Array(x), I, findall(J))
-setindex!(A::Matrix, x::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::Integer) = setindex!(A, Array(x), findall(I), J)
-setindex!(A::Matrix, x::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::AbstractVector{Bool}) = setindex!(A, Array(x), findall(I), findall(J))
-setindex!(A::Matrix, x::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, J::AbstractVector{Bool}) = setindex!(A, Array(x), I, findall(J))
-setindex!(A::Matrix, x::AbstractSparseMatrixCSC, I::AbstractVector{Bool}, J::AbstractVector{<:Integer}) = setindex!(A, Array(x), findall(I), J)
-
-function setindex!(A::AbstractSparseMatrixCSC, x::AbstractArray, I::AbstractMatrix{Bool})
-    require_one_based_indexing(A, x, I)
-    checkbounds(A, I)
-    n = sum(I)
-    (n == 0) && (return A)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    colptrB = colptrA; rowvalB = rowvalA; nzvalB = nzvalA
-    nadd = 0
-    bidx = xidx = 1
-    r1 = r2 = 0
-
-    @inbounds for col in 1:size(A, 2)
-        r1 = Int(colptrA[col])
-        r2 = Int(colptrA[col+1]-1)
-
-        for row in 1:size(A, 1)
-            if I[row, col]
-                v = x[xidx]
-                xidx += 1
-
-                if r1 <= r2
-                    copylen = searchsortedfirst(rowvalA, row, r1, r2, Forward) - r1
-                    if (copylen > 0)
-                        if (nadd > 0)
-                            copyto!(rowvalB, bidx, rowvalA, r1, copylen)
-                            copyto!(nzvalB, bidx, nzvalA, r1, copylen)
-                        end
-                        bidx += copylen
-                        r1 += copylen
-                    end
-                end
-
-                # 0: no change, 1: update, 2: add new
-                mode = ((r1 <= r2) && (rowvalA[r1] == row)) ? 1 : ((v == 0) ? 0 : 2)
-
-                if (mode > 1) && (nadd == 0)
-                    # copy storage to take changes
-                    colptrA = copy(colptrB)
-                    memreq = (x == 0) ? 0 : n
-                    # this x == 0 check and approach doesn't jive with use of v above
-                    # and may not make sense generally, as scalar x == 0 probably
-                    # means this section should never be called. also may not be generic.
-                    # TODO: clean this up, maybe separate scalar and array X cases
-                    rowvalA = copy(rowvalB)
-                    nzvalA = copy(nzvalB)
-                    resize!(rowvalB, length(rowvalA)+memreq)
-                    resize!(nzvalB, length(rowvalA)+memreq)
-                end
-                if mode == 1
-                    rowvalB[bidx] = row
-                    nzvalB[bidx] = v
-                    bidx += 1
-                    r1 += 1
-                elseif mode == 2
-                    rowvalB[bidx] = row
-                    nzvalB[bidx] = v
-                    bidx += 1
-                    nadd += 1
-                end
-                (xidx > n) && break
-            end # if I[row, col]
-        end # for row in 1:size(A, 1)
-
-        if (nadd != 0)
-            l = r2-r1+1
-            if l > 0
-                copyto!(rowvalB, bidx, rowvalA, r1, l)
-                copyto!(nzvalB, bidx, nzvalA, r1, l)
-                bidx += l
-            end
-            colptrB[col+1] = bidx
-
-            if (xidx > n) && (length(colptrB) > (col+1))
-                diff = nadd
-                colptrB[(col+2):end] = colptrA[(col+2):end] .+ diff
-                r1 = colptrA[col+1]
-                r2 = colptrA[end]-1
-                l = r2-r1+1
-                if l > 0
-                    copyto!(rowvalB, bidx, rowvalA, r1, l)
-                    copyto!(nzvalB, bidx, nzvalA, r1, l)
-                    bidx += l
-                end
-            end
-        else
-            bidx = colptrA[col+1]
-        end
-        (xidx > n) && break
-    end # for col in 1:size(A, 2)
-
-    if (nadd != 0)
-        n = length(nzvalB)
-        if n > (bidx-1)
-            deleteat!(nzvalB, bidx:n)
-            deleteat!(rowvalB, bidx:n)
-        end
-    end
-    A
-end
-
-function setindex!(A::AbstractSparseMatrixCSC, x::AbstractArray, Ix::AbstractVector{<:Integer})
-    require_one_based_indexing(A, x, Ix)
-    (I,) = Base.ensure_indexable(to_indices(A, (Ix,)))
-    # We check bounds after sorting I
-    n = length(I)
-    (n == 0) && (return A)
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A); szA = size(A)
-    colptrB = colptrA; rowvalB = rowvalA; nzvalB = nzvalA
-    nadd = 0
-    bidx = aidx = 1
-
-    S = issorted(I) ? (1:n) : sortperm(I)
-    sxidx = r1 = r2 = 0
-
-    if (!isempty(I) && (I[S[1]] < 1 || I[S[end]] > length(A)))
-        throw(BoundsError(A, I))
-    end
-
-    isa(x, AbstractArray) && setindex_shape_check(x, length(I))
-
-    lastcol = 0
-    (nrowA, ncolA) = szA
-    @inbounds for xidx in 1:n
-        sxidx = S[xidx]
-        (sxidx < n) && (I[sxidx] == I[sxidx+1]) && continue
-
-        row,col = Base._ind2sub(szA, I[sxidx])
-        v = x[sxidx]
-
-        if col > lastcol
-            r1 = Int(colptrA[col])
-            r2 = Int(colptrA[col+1] - 1)
-
-            # copy from last position till current column
-            if (nadd > 0)
-                colptrB[(lastcol+1):col] = colptrA[(lastcol+1):col] .+ nadd
-                copylen = r1 - aidx
-                if copylen > 0
-                    copyto!(rowvalB, bidx, rowvalA, aidx, copylen)
-                    copyto!(nzvalB, bidx, nzvalA, aidx, copylen)
-                    aidx += copylen
-                    bidx += copylen
-                end
-            else
-                aidx = bidx = r1
-            end
-            lastcol = col
-        end
-
-        if r1 <= r2
-            copylen = searchsortedfirst(rowvalA, row, r1, r2, Forward) - r1
-            if (copylen > 0)
-                if (nadd > 0)
-                    copyto!(rowvalB, bidx, rowvalA, r1, copylen)
-                    copyto!(nzvalB, bidx, nzvalA, r1, copylen)
-                end
-                bidx += copylen
-                r1 += copylen
-                aidx += copylen
-            end
-        end
-
-        # 0: no change, 1: update, 2: add new
-        mode = ((r1 <= r2) && (rowvalA[r1] == row)) ? 1 : ((v == 0) ? 0 : 2)
-
-        if (mode > 1) && (nadd == 0)
-            # copy storage to take changes
-            colptrA = copy(colptrB)
-            memreq = (x == 0) ? 0 : n
-            # see comment/TODO for same statement in preceding logical setindex! method
-            rowvalA = copy(rowvalB)
-            nzvalA = copy(nzvalB)
-            resize!(rowvalB, length(rowvalA)+memreq)
-            resize!(nzvalB, length(rowvalA)+memreq)
-        end
-        if mode == 1
-            rowvalB[bidx] = row
-            nzvalB[bidx] = v
-            bidx += 1
-            aidx += 1
-            r1 += 1
-        elseif mode == 2
-            rowvalB[bidx] = row
-            nzvalB[bidx] = v
-            bidx += 1
-            nadd += 1
-        end
-    end
-
-    # copy the rest
-    @inbounds if (nadd > 0)
-        colptrB[(lastcol+1):end] = colptrA[(lastcol+1):end] .+ nadd
-        r1 = colptrA[end]-1
-        copylen = r1 - aidx + 1
-        if copylen > 0
-            copyto!(rowvalB, bidx, rowvalA, aidx, copylen)
-            copyto!(nzvalB, bidx, nzvalA, aidx, copylen)
-            aidx += copylen
-            bidx += copylen
-        end
-
-        n = length(nzvalB)
-        if n > (bidx-1)
-            deleteat!(nzvalB, bidx:n)
-            deleteat!(rowvalB, bidx:n)
-        end
-    end
-    A
-end
-
-## dropstored! methods
-"""
-    dropstored!(A::AbstractSparseMatrixCSC, i::Integer, j::Integer)
-
-Drop entry `A[i,j]` from `A` if `A[i,j]` is stored, and otherwise do nothing.
-
-```jldoctest
-julia> A = sparse([1 2; 0 0])
-2×2 SparseMatrixCSC{Int64, Int64} with 2 stored entries:
- 1  2
- ⋅  ⋅
-
-julia> SparseArrays.dropstored!(A, 1, 2); A
-2×2 SparseMatrixCSC{Int64, Int64} with 1 stored entry:
- 1  ⋅
- ⋅  ⋅
-```
-"""
-function dropstored!(A::AbstractSparseMatrixCSC, i::Integer, j::Integer)
-    if !((1 <= i <= size(A, 1)) & (1 <= j <= size(A, 2)))
-        throw(BoundsError(A, (i,j)))
-    end
-    coljfirstk = Int(getcolptr(A)[j])
-    coljlastk = Int(getcolptr(A)[j+1] - 1)
-    searchk = searchsortedfirst(rowvals(A), i, coljfirstk, coljlastk, Base.Order.Forward)
-    if searchk <= coljlastk && rowvals(A)[searchk] == i
-        # Entry A[i,j] is stored. Drop and return.
-        deleteat!(rowvals(A), searchk)
-        deleteat!(nonzeros(A), searchk)
-        @simd for m in (j+1):(size(A, 2) + 1)
-            @inbounds getcolptr(A)[m] -= 1
-        end
-    end
-    return A
-end
-"""
-    dropstored!(A::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, J::AbstractVector{<:Integer})
-
-For each `(i,j)` where `i in I` and `j in J`, drop entry `A[i,j]` from `A` if `A[i,j]` is
-stored and otherwise do nothing. Derivative forms:
-
-    dropstored!(A::AbstractSparseMatrixCSC, i::Integer, J::AbstractVector{<:Integer})
-    dropstored!(A::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, j::Integer)
-
-# Examples
-```jldoctest
-julia> A = sparse(Diagonal([1, 2, 3, 4]))
-4×4 SparseMatrixCSC{Int64, Int64} with 4 stored entries:
- 1  ⋅  ⋅  ⋅
- ⋅  2  ⋅  ⋅
- ⋅  ⋅  3  ⋅
- ⋅  ⋅  ⋅  4
-
-julia> SparseArrays.dropstored!(A, [1, 2], [1, 1])
-4×4 SparseMatrixCSC{Int64, Int64} with 3 stored entries:
- ⋅  ⋅  ⋅  ⋅
- ⋅  2  ⋅  ⋅
- ⋅  ⋅  3  ⋅
- ⋅  ⋅  ⋅  4
-```
-"""
-function dropstored!(A::AbstractSparseMatrixCSC,
-        I::AbstractVector{<:Integer}, J::AbstractVector{<:Integer})
-    require_one_based_indexing(A, I, J)
-    m, n = size(A)
-    nnzA = nnz(A)
-    (nnzA == 0) && (return A)
-
-    !issorted(I) && (I = sort(I))
-    !issorted(J) && (J = sort(J))
-
-    if (!isempty(I) && (I[1] < 1 || I[end] > m)) || (!isempty(J) && (J[1] < 1 || J[end] > n))
-        throw(BoundsError(A, (I, J)))
-    end
-
-    if isempty(I) || isempty(J)
-        return A
-    end
-
-    rowval = rowvalA = rowvals(A)
-    nzval = nzvalA = nonzeros(A)
-    rowidx = 1
-    ndel = 0
-    @inbounds for col in 1:n
-        rrange = nzrange(A, col)
-        if ndel > 0
-            getcolptr(A)[col] = getcolptr(A)[col] - ndel
-        end
-
-        if isempty(rrange) || !(col in J)
-            nincl = length(rrange)
-            if(ndel > 0) && !isempty(rrange)
-                copyto!(rowvalA, rowidx, rowval, rrange[1], nincl)
-                copyto!(nzvalA, rowidx, nzval, rrange[1], nincl)
-            end
-            rowidx += nincl
-        else
-            for ridx in rrange
-                if rowval[ridx] in I
-                    if ndel == 0
-                        rowval = copy(rowvalA)
-                        nzval = copy(nzvalA)
-                    end
-                    ndel += 1
-                else
-                    if ndel > 0
-                        rowvalA[rowidx] = rowval[ridx]
-                        nzvalA[rowidx] = nzval[ridx]
-                    end
-                    rowidx += 1
-                end
-            end
-        end
-    end
-
-    if ndel > 0
-        getcolptr(A)[n+1] = rowidx
-        deleteat!(rowvalA, rowidx:nnzA)
-        deleteat!(nzvalA, rowidx:nnzA)
-    end
-    return A
-end
-dropstored!(A::AbstractSparseMatrixCSC, i::Integer, J::AbstractVector{<:Integer}) = dropstored!(A, [i], J)
-dropstored!(A::AbstractSparseMatrixCSC, I::AbstractVector{<:Integer}, j::Integer) = dropstored!(A, I, [j])
-dropstored!(A::AbstractSparseMatrixCSC, ::Colon, j::Union{Integer,AbstractVector}) = dropstored!(A, 1:size(A,1), j)
-dropstored!(A::AbstractSparseMatrixCSC, i::Union{Integer,AbstractVector}, ::Colon) = dropstored!(A, i, 1:size(A,2))
-dropstored!(A::AbstractSparseMatrixCSC, ::Colon, ::Colon) = dropstored!(A, 1:size(A,1), 1:size(A,2))
-dropstored!(A::AbstractSparseMatrixCSC, ::Colon) = dropstored!(A, :, :)
-# TODO: Several of the preceding methods are optimization candidates.
-# TODO: Implement linear indexing methods for dropstored! ?
-# TODO: Implement logical indexing methods for dropstored! ?
-
-# Sparse concatenation
-
-function vcat(X::AbstractSparseMatrixCSC...)
-    num = length(X)
-    mX = Int[ size(x, 1) for x in X ]
-    nX = Int[ size(x, 2) for x in X ]
-    m = sum(mX)
-    n = nX[1]
-
-    for i = 2 : num
-        if nX[i] != n
-            throw(DimensionMismatch("All inputs to vcat should have the same number of columns"))
-        end
-    end
-
-    Tv = promote_eltype(X...)
-    Ti = promote_eltype(map(x->rowvals(x), X)...)
-
-    nnzX = Int[ nnz(x) for x in X ]
-    nnz_res = sum(nnzX)
-    colptr = Vector{Ti}(undef, n+1)
-    rowval = Vector{Ti}(undef, nnz_res)
-    nzval  = Vector{Tv}(undef, nnz_res)
-
-    colptr[1] = 1
-    for c = 1:n
-        mX_sofar = 0
-        ptr_res = colptr[c]
-        for i = 1 : num
-            colptrXi = getcolptr(X[i])
-            col_length = (colptrXi[c + 1] - 1) - colptrXi[c]
-            ptr_Xi = colptrXi[c]
-
-            stuffcol!(X[i], colptr, rowval, nzval,
-                      ptr_res, ptr_Xi, col_length, mX_sofar)
-
-            ptr_res += col_length + 1
-            mX_sofar += mX[i]
-        end
-        colptr[c + 1] = ptr_res
-    end
-    SparseMatrixCSC(m, n, colptr, rowval, nzval)
-end
-
-@inline function stuffcol!(Xi::AbstractSparseMatrixCSC, colptr, rowval, nzval,
-                           ptr_res, ptr_Xi, col_length, mX_sofar)
-    colptrXi = getcolptr(Xi)
-    rowvalXi = rowvals(Xi)
-    nzvalXi  = nonzeros(Xi)
-
-    for k=ptr_res:(ptr_res + col_length)
-        @inbounds rowval[k] = rowvalXi[ptr_Xi] + mX_sofar
-        @inbounds nzval[k]  = nzvalXi[ptr_Xi]
-        ptr_Xi += 1
-    end
-end
-
-function hcat(X::AbstractSparseMatrixCSC...)
-    num = length(X)
-    mX = Int[ size(x, 1) for x in X ]
-    nX = Int[ size(x, 2) for x in X ]
-    m = mX[1]
-    for i = 2 : num
-        if mX[i] != m; throw(DimensionMismatch("")); end
-    end
-    n = sum(nX)
-
-    Tv = promote_eltype(X...)
-    Ti = promote_eltype(map(x->rowvals(x), X)...)
-
-    colptr = Vector{Ti}(undef, n+1)
-    nnzX = Int[ nnz(x) for x in X ]
-    nnz_res = sum(nnzX)
-    rowval = Vector{Ti}(undef, nnz_res)
-    nzval = Vector{Tv}(undef, nnz_res)
-
-    nnz_sofar = 0
-    nX_sofar = 0
-    @inbounds for i = 1 : num
-        XI = X[i]
-        colptr[(1 : nX[i] + 1) .+ nX_sofar] = getcolptr(XI) .+ nnz_sofar
-        if nnzX[i] == length(rowvals(XI))
-            rowval[(1 : nnzX[i]) .+ nnz_sofar] = rowvals(XI)
-            nzval[(1 : nnzX[i]) .+ nnz_sofar] = nonzeros(XI)
-        else
-            rowval[(1 : nnzX[i]) .+ nnz_sofar] = rowvals(XI)[1:nnzX[i]]
-            nzval[(1 : nnzX[i]) .+ nnz_sofar] = nonzeros(XI)[1:nnzX[i]]
-        end
-        nnz_sofar += nnzX[i]
-        nX_sofar += nX[i]
-    end
-
-    SparseMatrixCSC(m, n, colptr, rowval, nzval)
-end
-
-"""
-    blockdiag(A...)
-
-Concatenate matrices block-diagonally. Currently only implemented for sparse matrices.
-
-# Examples
-```jldoctest
-julia> blockdiag(sparse(2I, 3, 3), sparse(4I, 2, 2))
-5×5 SparseMatrixCSC{Int64, Int64} with 5 stored entries:
- 2  ⋅  ⋅  ⋅  ⋅
- ⋅  2  ⋅  ⋅  ⋅
- ⋅  ⋅  2  ⋅  ⋅
- ⋅  ⋅  ⋅  4  ⋅
- ⋅  ⋅  ⋅  ⋅  4
-```
-"""
-blockdiag() = spzeros(promote_type(), Int, 0, 0)
-
-function blockdiag(X::AbstractSparseMatrixCSC{Tv, Ti}...) where {Tv, Ti <: Integer}
-    _blockdiag(Tv, Ti, X...)
-end
-
-function blockdiag(X::AbstractSparseMatrixCSC...)
-    Tv = promote_type(map(x->eltype(nonzeros(x)), X)...)
-    Ti = promote_type(map(x->eltype(rowvals(x)), X)...)
-    _blockdiag(Tv, Ti, X...)
-end
-
-function _blockdiag(::Type{Tv}, ::Type{Ti}, X::AbstractSparseMatrixCSC...) where {Tv, Ti <: Integer}
-    num = length(X)
-    mX = Int[ size(x, 1) for x in X ]
-    nX = Int[ size(x, 2) for x in X ]
-    m = sum(mX)
-    n = sum(nX)
-
-    colptr = Vector{Ti}(undef, n+1)
-    nnzX = Int[ nnz(x) for x in X ]
-    nnz_res = sum(nnzX)
-    rowval = Vector{Ti}(undef, nnz_res)
-    nzval = Vector{Tv}(undef, nnz_res)
-
-    nnz_sofar = 0
-    nX_sofar = 0
-    mX_sofar = 0
-    for i = 1 : num
-        colptr[(1 : nX[i] + 1) .+ nX_sofar] = getcolptr(X[i]) .+ nnz_sofar
-        rowval[(1 : nnzX[i]) .+ nnz_sofar] = rowvals(X[i]) .+ mX_sofar
-        nzval[(1 : nnzX[i]) .+ nnz_sofar] = nonzeros(X[i])
-        nnz_sofar += nnzX[i]
-        nX_sofar += nX[i]
-        mX_sofar += mX[i]
-    end
-    colptr[n+1] = nnz_sofar + 1
-
-    SparseMatrixCSC(m, n, colptr, rowval, nzval)
-end
-
-## Structure query functions
-issymmetric(A::AbstractSparseMatrixCSC) = is_hermsym(A, identity)
-
-ishermitian(A::AbstractSparseMatrixCSC) = is_hermsym(A, conj)
-
-function is_hermsym(A::AbstractSparseMatrixCSC, check::Function)
-    m, n = size(A)
-    if m != n; return false; end
-
-    colptr = getcolptr(A)
-    rowval = rowvals(A)
-    nzval = nonzeros(A)
-    tracker = copy(getcolptr(A))
-    for col = 1:size(A, 2)
-        # `tracker` is updated such that, for symmetric matrices,
-        # the loop below starts from an element at or below the
-        # diagonal element of column `col`"
-        for p = tracker[col]:colptr[col+1]-1
-            val = nzval[p]
-            row = rowval[p]
-
-            # Ignore stored zeros
-            if val == 0
-                continue
-            end
-
-            # If the matrix was symmetric we should have updated
-            # the tracker to start at the diagonal or below. Here
-            # we are above the diagonal so the matrix can't be symmetric.
-            if row < col
-                return false
-            end
-
-            # Diagonal element
-            if row == col
-                if val != check(val)
-                    return false
-                end
-            else
-                offset = tracker[row]
-
-                # If the matrix is unsymmetric, there might not exist
-                # a rowval[offset]
-                if offset > length(rowval)
-                    return false
-                end
-
-                row2 = rowval[offset]
-
-                # row2 can be less than col if the tracker didn't
-                # get updated due to stored zeros in previous elements.
-                # We therefore "catch up" here while making sure that
-                # the elements are actually zero.
-                while row2 < col
-                    if !iszero(nzval[offset])
-                        return false
-                    end
-                    offset += 1
-                    row2 = rowval[offset]
-                    tracker[row] += 1
-                end
-
-                # Non zero A[i,j] exists but A[j,i] does not exist
-                if row2 > col
-                    return false
-                end
-
-                # A[i,j] and A[j,i] exists
-                if row2 == col
-                    if val != check(nzval[offset])
-                        return false
-                    end
-                    tracker[row] += 1
-                end
-            end
-        end
-    end
-    return true
-end
-
-function istriu(A::AbstractSparseMatrixCSC)
-    m, n = size(A)
-    colptr = getcolptr(A)
-    rowval = rowvals(A)
-    nzval  = nonzeros(A)
-
-    for col = 1:min(n, m-1)
-        l1 = colptr[col+1]-1
-        for i = 0 : (l1 - colptr[col])
-            if rowval[l1-i] <= col
-                break
-            end
-            if !iszero(nzval[l1-i])
-                return false
-            end
-        end
-    end
-    return true
-end
-
-function istril(A::AbstractSparseMatrixCSC)
-    m, n = size(A)
-    colptr = getcolptr(A)
-    rowval = rowvals(A)
-    nzval  = nonzeros(A)
-
-    for col = 2:n
-        for i = colptr[col] : (colptr[col+1]-1)
-            if rowval[i] >= col
-                break
-            end
-            if !iszero(nzval[i])
-                return false
-            end
-        end
-    end
-    return true
-end
-
-
-function spdiagm_internal(kv::Pair{<:Integer,<:AbstractVector}...)
-    ncoeffs = 0
-    for p in kv
-        ncoeffs += length(p.second)
-    end
-    I = Vector{Int}(undef, ncoeffs)
-    J = Vector{Int}(undef, ncoeffs)
-    V = Vector{promote_type(map(x -> eltype(x.second), kv)...)}(undef, ncoeffs)
-    i = 0
-    for p in kv
-        dia = p.first
-        vect = p.second
-        numel = length(vect)
-        if dia < 0
-            row = -dia
-            col = 0
-        elseif dia > 0
-            row = 0
-            col = dia
-        else
-            row = 0
-            col = 0
-        end
-        r = 1+i:numel+i
-        I[r] = row+1:row+numel
-        J[r] = col+1:col+numel
-        copyto!(view(V, r), vect)
-        i += numel
-    end
-    return I, J, V
-end
-
-"""
-    spdiagm(kv::Pair{<:Integer,<:AbstractVector}...)
-    spdiagm(m::Integer, n::Integer, kv::Pair{<:Integer,<:AbstractVector}...)
-
-Construct a sparse diagonal matrix from `Pair`s of vectors and diagonals.
-Each vector `kv.second` will be placed on the `kv.first` diagonal.  By
-default, the matrix is square and its size is inferred
-from `kv`, but a non-square size `m`×`n` (padded with zeros as needed)
-can be specified by passing `m,n` as the first arguments.
-
-# Examples
-```jldoctest
-julia> spdiagm(-1 => [1,2,3,4], 1 => [4,3,2,1])
-5×5 SparseMatrixCSC{Int64, Int64} with 8 stored entries:
- ⋅  4  ⋅  ⋅  ⋅
- 1  ⋅  3  ⋅  ⋅
- ⋅  2  ⋅  2  ⋅
- ⋅  ⋅  3  ⋅  1
- ⋅  ⋅  ⋅  4  ⋅
-```
-"""
-spdiagm(kv::Pair{<:Integer,<:AbstractVector}...) = _spdiagm(nothing, kv...)
-spdiagm(m::Integer, n::Integer, kv::Pair{<:Integer,<:AbstractVector}...) = _spdiagm((Int(m),Int(n)), kv...)
-function _spdiagm(size, kv::Pair{<:Integer,<:AbstractVector}...)
-    I, J, V = spdiagm_internal(kv...)
-    mmax, nmax = dimlub(I), dimlub(J)
-    mnmax = max(mmax, nmax)
-    m, n = something(size, (mnmax,mnmax))
-    (m ≥ mmax && n ≥ nmax) || throw(DimensionMismatch("invalid size=$size"))
-    return sparse(I, J, V, m, n)
-end
-
-## expand a colptr or rowptr into a dense index vector
-function expandptr(V::Vector{<:Integer})
-    if V[1] != 1 throw(ArgumentError("first index must be one")) end
-    res = similar(V, (Int64(V[end]-1),))
-    for i in 1:(length(V)-1), j in V[i]:(V[i+1] - 1); res[j] = i end
-    res
-end
-
-
-function diag(A::AbstractSparseMatrixCSC{Tv,Ti}, d::Integer=0) where {Tv,Ti}
-    m, n = size(A)
-    k = Int(d)
-    l = k < 0 ? min(m+k,n) : min(n-k,m)
-    r, c = k <= 0 ? (-k, 0) : (0, k) # start row/col -1
-    ind = Vector{Ti}()
-    val = Vector{Tv}()
-    for i in 1:l
-        r += 1; c += 1
-        r1 = Int(getcolptr(A)[c])
-        r2 = Int(getcolptr(A)[c+1]-1)
-        r1 > r2 && continue
-        r1 = searchsortedfirst(rowvals(A), r, r1, r2, Forward)
-        ((r1 > r2) || (rowvals(A)[r1] != r)) && continue
-        push!(ind, i)
-        push!(val, nonzeros(A)[r1])
-    end
-    return SparseVector{Tv,Ti}(l, ind, val)
-end
-
-function tr(A::AbstractSparseMatrixCSC{Tv}) where Tv
-    n = checksquare(A)
-    s = zero(Tv)
-    for i in 1:n
-        s += A[i,i]
-    end
-    return s
-end
-
-
-# Sort all the indices in each column of a CSC sparse matrix
-# sortSparseMatrixCSC!(A, sortindices = :sortcols)        # Sort each column with sort()
-# sortSparseMatrixCSC!(A, sortindices = :doubletranspose) # Sort with a double transpose
-function sortSparseMatrixCSC!(A::AbstractSparseMatrixCSC{Tv,Ti}; sortindices::Symbol = :sortcols) where {Tv,Ti}
-    if sortindices === :doubletranspose
-        nB, mB = size(A)
-        B = SparseMatrixCSC(mB, nB, Vector{Ti}(undef, nB+1), similar(rowvals(A)), similar(nonzeros(A)))
-        transpose!(B, A)
-        transpose!(A, B)
-        return A
-    end
-
-    m, n = size(A)
-    colptr = getcolptr(A); rowval = rowvals(A); nzval = nonzeros(A)
-
-    index = zeros(Ti, m)
-    row = zeros(Ti, m)
-    val = zeros(Tv, m)
-
-    perm = Base.Perm(Base.ord(isless, identity, false, Base.Order.Forward), row)
-
-    @inbounds for i = 1:n
-        nzr = nzrange(A, i)
-        numrows = length(nzr)
-        if numrows <= 1
-            continue
-        elseif numrows == 2
-            f = first(nzr)
-            s = f+1
-            if rowval[f] > rowval[s]
-                rowval[f], rowval[s] = rowval[s], rowval[f]
-                nzval[f],  nzval[s]  = nzval[s],  nzval[f]
-            end
-            continue
-        end
-        resize!(row, numrows)
-        resize!(index, numrows)
-
-        jj = 1
-        @simd for j = nzr
-            row[jj] = rowval[j]
-            val[jj] = nzval[j]
-            jj += 1
-        end
-
-        if numrows <= 16
-            alg = Base.Sort.InsertionSort
-        else
-            alg = Base.Sort.QuickSort
-        end
-
-        # Reset permutation
-        index .= 1:numrows
-
-        sort!(index, alg, perm)
-
-        jj = 1
-        @simd for j = nzr
-            rowval[j] = row[index[jj]]
-            nzval[j] = val[index[jj]]
-            jj += 1
-        end
-    end
-
-    return A
-end
-
-## rotations
-
-function rot180(A::AbstractSparseMatrixCSC)
-    I,J,V = findnz(A)
-    m,n = size(A)
-    for i=1:length(I)
-        I[i] = m - I[i] + 1
-        J[i] = n - J[i] + 1
-    end
-    return sparse(I,J,V,m,n)
-end
-
-function rotr90(A::AbstractSparseMatrixCSC)
-    I,J,V = findnz(A)
-    m,n = size(A)
-    #old col inds are new row inds
-    for i=1:length(I)
-        I[i] = m - I[i] + 1
-    end
-    return sparse(J, I, V, n, m)
-end
-
-function rotl90(A::AbstractSparseMatrixCSC)
-    I,J,V = findnz(A)
-    m,n = size(A)
-    #old row inds are new col inds
-    for i=1:length(J)
-        J[i] = n - J[i] + 1
-    end
-    return sparse(J, I, V, n, m)
-end
-
-## Uniform matrix arithmetic
-
-(+)(A::AbstractSparseMatrixCSC, J::UniformScaling) = A + sparse(J, size(A)...)
-(-)(A::AbstractSparseMatrixCSC, J::UniformScaling) = A - sparse(J, size(A)...)
-(-)(J::UniformScaling, A::AbstractSparseMatrixCSC) = sparse(J, size(A)...) - A
-
-## circular shift
-
-function circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, (r,c)::Base.DimsInteger{2})
-    nnz = length(nonzeros(X))
-
-    iszero(nnz) && return copy!(O, X)
-
-    ##### column shift
-    c = mod(c, size(X, 2))
-    if iszero(c)
-        copy!(O, X)
-    else
-        ##### readjust output
-        resize!(getcolptr(O), size(X, 2) + 1)
-        resize!(rowvals(O), nnz)
-        resize!(nonzeros(O), nnz)
-        getcolptr(O)[size(X, 2) + 1] = nnz + 1
-
-        # exchange left and right blocks
-        nleft = getcolptr(X)[size(X, 2) - c + 1] - 1
-        nright = nnz - nleft
-        @inbounds for i=c+1:size(X, 2)
-            getcolptr(O)[i] = getcolptr(X)[i-c] + nright
-        end
-        @inbounds for i=1:c
-            getcolptr(O)[i] = getcolptr(X)[size(X, 2) - c + i] - nleft
-        end
-        # rotate rowval and nzval by the right number of elements
-        circshift!(rowvals(O), rowvals(X), (nright,))
-        circshift!(nonzeros(O), nonzeros(X), (nright,))
-    end
-    ##### row shift
-    r = mod(r, size(X, 1))
-    iszero(r) && return O
-    @inbounds for i=1:size(O, 2)
-        subvector_shifter!(rowvals(O), nonzeros(O), getcolptr(O)[i], getcolptr(O)[i+1]-1, size(O, 1), r)
-    end
-    return O
-end
-
-circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, (r,)::Base.DimsInteger{1}) = circshift!(O, X, (r,0))
-circshift!(O::AbstractSparseMatrixCSC, X::AbstractSparseMatrixCSC, r::Real) = circshift!(O, X, (Integer(r),0))
diff --git a/stdlib/SparseArrays/src/sparsevector.jl b/stdlib/SparseArrays/src/sparsevector.jl
deleted file mode 100644
index 3990ff6162d276..00000000000000
--- a/stdlib/SparseArrays/src/sparsevector.jl
+++ /dev/null
@@ -1,2124 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-### Common definitions
-
-import Base: sort, findall, copy!
-import LinearAlgebra: promote_to_array_type, promote_to_arrays_
-
-### The SparseVector
-
-### Types
-
-"""
-    SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
-
-Vector type for storing sparse vectors.
-"""
-struct SparseVector{Tv,Ti<:Integer} <: AbstractSparseVector{Tv,Ti}
-    n::Int              # Length of the sparse vector
-    nzind::Vector{Ti}   # Indices of stored values
-    nzval::Vector{Tv}   # Stored values, typically nonzeros
-
-    function SparseVector{Tv,Ti}(n::Integer, nzind::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti<:Integer}
-        n >= 0 || throw(ArgumentError("The number of elements must be non-negative."))
-        length(nzind) == length(nzval) ||
-            throw(ArgumentError("index and value vectors must be the same length"))
-        new(convert(Int, n), nzind, nzval)
-    end
-end
-
-SparseVector(n::Integer, nzind::Vector{Ti}, nzval::Vector{Tv}) where {Tv,Ti} =
-    SparseVector{Tv,Ti}(n, nzind, nzval)
-
-# Define an alias for a view of a whole column of a SparseMatrixCSC. Many methods can be written for the
-# union of such a view and a SparseVector so we define an alias for such a union as well
-const SparseColumnView{Tv,Ti}  = SubArray{Tv,1,<:AbstractSparseMatrixCSC{Tv,Ti},Tuple{Base.Slice{Base.OneTo{Int}},Int},false}
-const SparseVectorView{Tv,Ti}  = SubArray{Tv,1,<:AbstractSparseVector{Tv,Ti},Tuple{Base.Slice{Base.OneTo{Int}}},false}
-const SparseVectorUnion{Tv,Ti} = Union{SparseVector{Tv,Ti}, SparseColumnView{Tv,Ti}, SparseVectorView{Tv,Ti}}
-const AdjOrTransSparseVectorUnion{Tv,Ti} = LinearAlgebra.AdjOrTrans{Tv, <:SparseVectorUnion{Tv,Ti}}
-
-### Basic properties
-
-size(x::SparseVector)     = (getfield(x, :n),)
-count(f, x::SparseVector) = count(f, nonzeros(x)) + f(zero(eltype(x)))*(length(x) - nnz(x))
-
-# implement the nnz - nzrange - nonzeros - rowvals interface for sparse vectors
-
-nnz(x::SparseVector)      = length(nonzeros(x))
-function nnz(x::SparseColumnView)
-    rowidx, colidx = parentindices(x)
-    return length(nzrange(parent(x), colidx))
-end
-nnz(x::SparseVectorView) = nnz(x.parent)
-
-"""
-    nzrange(x::SparseVectorUnion, col)
-
-Give the range of indices to the structural nonzero values of a sparse vector.
-The column index `col` is ignored (assumed to be `1`).
-"""
-function nzrange(x::SparseVectorUnion, j::Integer)
-    j == 1 ? (1:nnz(x)) : throw(BoundsError(x, (":", j)))
-end
-
-nonzeros(x::SparseVector) = getfield(x, :nzval)
-function nonzeros(x::SparseColumnView)
-    rowidx, colidx = parentindices(x)
-    A = parent(x)
-    @inbounds y = view(nonzeros(A), nzrange(A, colidx))
-    return y
-end
-nonzeros(x::SparseVectorView) = nonzeros(parent(x))
-
-nonzeroinds(x::SparseVector) = getfield(x, :nzind)
-function nonzeroinds(x::SparseColumnView)
-    rowidx, colidx = parentindices(x)
-    A = parent(x)
-    @inbounds y = view(rowvals(A), nzrange(A, colidx))
-    return y
-end
-nonzeroinds(x::SparseVectorView) = nonzeroinds(parent(x))
-
-rowvals(x::SparseVectorUnion) = nonzeroinds(x)
-
-indtype(x::SparseColumnView) = indtype(parent(x))
-indtype(x::SparseVectorView) = indtype(parent(x))
-
-## similar
-#
-# parent method for similar that preserves stored-entry structure (for when new and old dims match)
-_sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}) where {TvNew,TiNew} =
-    SparseVector(length(S), copyto!(similar(nonzeroinds(S), TiNew), nonzeroinds(S)), similar(nonzeros(S), TvNew))
-# parent method for similar that preserves nothing (for when old and new dims differ, and new is 1d)
-_sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{1}) where {TvNew,TiNew} =
-    SparseVector(dims..., similar(nonzeroinds(S), TiNew, 0), similar(nonzeros(S), TvNew, 0))
-# parent method for similar that preserves storage space (for old and new dims differ, and new is 2d)
-_sparsesimilar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Dims{2}) where {TvNew,TiNew} =
-    SparseMatrixCSC(dims..., fill(one(TiNew), last(dims)+1), similar(nonzeroinds(S), TiNew), similar(nonzeros(S), TvNew))
-# The following methods hook into the AbstractArray similar hierarchy. The first method
-# covers similar(A[, Tv]) calls, which preserve stored-entry structure, and the latter
-# methods cover similar(A[, Tv], shape...) calls, which preserve nothing if the dims
-# specify a SparseVector result and storage space if the dims specify a SparseMatrixCSC result.
-similar(S::SparseVector{<:Any,Ti}, ::Type{TvNew}) where {Ti,TvNew} =
-    _sparsesimilar(S, TvNew, Ti)
-similar(S::SparseVector{<:Any,Ti}, ::Type{TvNew}, dims::Union{Dims{1},Dims{2}}) where {Ti,TvNew} =
-    _sparsesimilar(S, TvNew, Ti, dims)
-# The following methods cover similar(A, Tv, Ti[, shape...]) calls, which specify the
-# result's index type in addition to its entry type, and aren't covered by the hooks above.
-# The calls without shape again preserve stored-entry structure, whereas those with
-# one-dimensional shape preserve nothing, and those with two-dimensional shape
-# preserve storage space.
-similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}) where{TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew)
-similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, dims::Union{Dims{1},Dims{2}}) where {TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew, dims)
-similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer) where {TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew, (m,))
-similar(S::SparseVector, ::Type{TvNew}, ::Type{TiNew}, m::Integer, n::Integer) where {TvNew,TiNew} =
-    _sparsesimilar(S, TvNew, TiNew, (m, n))
-
-## Alias detection and prevention
-using Base: dataids, unaliascopy
-Base.dataids(S::SparseVector) = (dataids(nonzeroinds(S))..., dataids(nonzeros(S))...)
-Base.unaliascopy(S::SparseVector) = typeof(S)(length(S), unaliascopy(nonzeroinds(S)), unaliascopy(nonzeros(S)))
-
-### Construct empty sparse vector
-
-spzeros(len::Integer) = spzeros(Float64, len)
-spzeros(::Type{T}, len::Integer) where {T} = SparseVector(len, Int[], T[])
-spzeros(::Type{Tv}, ::Type{Ti}, len::Integer) where {Tv,Ti<:Integer} = SparseVector(len, Ti[], Tv[])
-
-LinearAlgebra.fillstored!(x::SparseVector, y) = (fill!(nonzeros(x), y); x)
-
-### Construction from lists of indices and values
-
-function _sparsevector!(I::Vector{<:Integer}, V::Vector, len::Integer)
-    # pre-condition: no duplicate indices in I
-    if !isempty(I)
-        p = sortperm(I)
-        permute!(I, p)
-        permute!(V, p)
-    end
-    SparseVector(len, I, V)
-end
-
-function _sparsevector!(I::Vector{<:Integer}, V::Vector, len::Integer, combine::Function)
-    if !isempty(I)
-        p = sortperm(I)
-        permute!(I, p)
-        permute!(V, p)
-        m = length(I)
-        r = 1
-        l = 1       # length of processed part
-        i = I[r]    # row-index of current element
-
-        # main loop
-        while r < m
-            r += 1
-            i2 = I[r]
-            if i2 == i  # accumulate r-th to the l-th entry
-                V[l] = combine(V[l], V[r])
-            else  # advance l, and move r-th to l-th
-                pv = V[l]
-                l += 1
-                i = i2
-                if l < r
-                    I[l] = i; V[l] = V[r]
-                end
-            end
-        end
-        if l < m
-            resize!(I, l)
-            resize!(V, l)
-        end
-    end
-    SparseVector(len, I, V)
-end
-
-"""
-    sparsevec(I, V, [m, combine])
-
-Create a sparse vector `S` of length `m` such that `S[I[k]] = V[k]`.
-Duplicates are combined using the `combine` function, which defaults to
-`+` if no `combine` argument is provided, unless the elements of `V` are Booleans
-in which case `combine` defaults to `|`.
-
-# Examples
-```jldoctest
-julia> II = [1, 3, 3, 5]; V = [0.1, 0.2, 0.3, 0.2];
-
-julia> sparsevec(II, V)
-5-element SparseVector{Float64, Int64} with 3 stored entries:
-  [1]  =  0.1
-  [3]  =  0.5
-  [5]  =  0.2
-
-julia> sparsevec(II, V, 8, -)
-8-element SparseVector{Float64, Int64} with 3 stored entries:
-  [1]  =  0.1
-  [3]  =  -0.1
-  [5]  =  0.2
-
-julia> sparsevec([1, 3, 1, 2, 2], [true, true, false, false, false])
-3-element SparseVector{Bool, Int64} with 3 stored entries:
-  [1]  =  1
-  [2]  =  0
-  [3]  =  1
-```
-"""
-function sparsevec(I::AbstractVector{<:Integer}, V::AbstractVector, combine::Function)
-    require_one_based_indexing(I, V)
-    length(I) == length(V) ||
-        throw(ArgumentError("index and value vectors must be the same length"))
-    len = 0
-    for i in I
-        i >= 1 || error("Index must be positive.")
-        if i > len
-            len = i
-        end
-    end
-    _sparsevector!(Vector(I), Vector(V), len, combine)
-end
-
-function sparsevec(I::AbstractVector{<:Integer}, V::AbstractVector, len::Integer, combine::Function)
-    require_one_based_indexing(I, V)
-    length(I) == length(V) ||
-        throw(ArgumentError("index and value vectors must be the same length"))
-    for i in I
-        1 <= i <= len || throw(ArgumentError("An index is out of bound."))
-    end
-    _sparsevector!(Vector(I), Vector(V), len, combine)
-end
-
-sparsevec(I::AbstractVector, V::Union{Number, AbstractVector}, args...) =
-    sparsevec(Vector{Int}(I), V, args...)
-
-sparsevec(I::AbstractVector, V::Union{Number, AbstractVector}) =
-    sparsevec(I, V, +)
-
-sparsevec(I::AbstractVector, V::Union{Number, AbstractVector}, len::Integer) =
-    sparsevec(I, V, len, +)
-
-sparsevec(I::AbstractVector, V::Union{Bool, AbstractVector{Bool}}) =
-    sparsevec(I, V, |)
-
-sparsevec(I::AbstractVector, V::Union{Bool, AbstractVector{Bool}}, len::Integer) =
-    sparsevec(I, V, len, |)
-
-sparsevec(I::AbstractVector, v::Number, combine::Function) =
-    sparsevec(I, fill(v, length(I)), combine)
-
-sparsevec(I::AbstractVector, v::Number, len::Integer, combine::Function) =
-    sparsevec(I, fill(v, length(I)), len, combine)
-
-
-### Construction from dictionary
-"""
-    sparsevec(d::Dict, [m])
-
-Create a sparse vector of length `m` where the nonzero indices are keys from
-the dictionary, and the nonzero values are the values from the dictionary.
-
-# Examples
-```jldoctest
-julia> sparsevec(Dict(1 => 3, 2 => 2))
-2-element SparseVector{Int64, Int64} with 2 stored entries:
-  [1]  =  3
-  [2]  =  2
-```
-"""
-function sparsevec(dict::AbstractDict{Ti,Tv}) where {Tv,Ti<:Integer}
-    m = length(dict)
-    nzind = Vector{Ti}(undef, m)
-    nzval = Vector{Tv}(undef, m)
-
-    cnt = 0
-    len = zero(Ti)
-    for (k, v) in dict
-        k >= 1 || throw(ArgumentError("index must be positive."))
-        if k > len
-            len = k
-        end
-        cnt += 1
-        @inbounds nzind[cnt] = k
-        @inbounds nzval[cnt] = v
-    end
-    resize!(nzind, cnt)
-    resize!(nzval, cnt)
-    _sparsevector!(nzind, nzval, len)
-end
-
-function sparsevec(dict::AbstractDict{Ti,Tv}, len::Integer) where {Tv,Ti<:Integer}
-    m = length(dict)
-    nzind = Vector{Ti}(undef, m)
-    nzval = Vector{Tv}(undef, m)
-
-    cnt = 0
-    maxk = convert(Ti, len)
-    for (k, v) in dict
-        1 <= k <= maxk || throw(ArgumentError("an index (key) is out of bound."))
-        cnt += 1
-        @inbounds nzind[cnt] = k
-        @inbounds nzval[cnt] = v
-    end
-    resize!(nzind, cnt)
-    resize!(nzval, cnt)
-    _sparsevector!(nzind, nzval, len)
-end
-
-
-### Element access
-
-function setindex!(x::SparseVector{Tv,Ti}, v::Tv, i::Ti) where {Tv,Ti<:Integer}
-    checkbounds(x, i)
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-
-    m = length(nzind)
-    k = searchsortedfirst(nzind, i)
-    if 1 <= k <= m && nzind[k] == i  # i found
-        nzval[k] = v
-    else  # i not found
-        if !iszero(v)
-            insert!(nzind, k, i)
-            insert!(nzval, k, v)
-        end
-    end
-    x
-end
-
-setindex!(x::SparseVector{Tv,Ti}, v, i::Integer) where {Tv,Ti<:Integer} =
-    setindex!(x, convert(Tv, v), convert(Ti, i))
-
-
-### dropstored!
-"""
-    dropstored!(x::SparseVector, i::Integer)
-
-Drop entry `x[i]` from `x` if `x[i]` is stored and otherwise do nothing.
-
-# Examples
-```jldoctest
-julia> x = sparsevec([1, 3], [1.0, 2.0])
-3-element SparseVector{Float64, Int64} with 2 stored entries:
-  [1]  =  1.0
-  [3]  =  2.0
-
-julia> SparseArrays.dropstored!(x, 3)
-3-element SparseVector{Float64, Int64} with 1 stored entry:
-  [1]  =  1.0
-
-julia> SparseArrays.dropstored!(x, 2)
-3-element SparseVector{Float64, Int64} with 1 stored entry:
-  [1]  =  1.0
-```
-"""
-function dropstored!(x::SparseVector, i::Integer)
-    if !(1 <= i <= length(x::SparseVector))
-        throw(BoundsError(x, i))
-    end
-    searchk = searchsortedfirst(nonzeroinds(x), i)
-    if searchk <= length(nonzeroinds(x)) && nonzeroinds(x)[searchk] == i
-        # Entry x[i] is stored. Drop and return.
-        deleteat!(nonzeroinds(x), searchk)
-        deleteat!(nonzeros(x), searchk)
-    end
-    return x
-end
-# TODO: Implement linear collection indexing methods for dropstored! ?
-# TODO: Implement logical indexing methods for dropstored! ?
-
-
-### Conversion
-
-# convert SparseMatrixCSC to SparseVector
-function SparseVector{Tv,Ti}(s::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti<:Integer}
-    size(s, 2) == 1 || throw(ArgumentError("The input argument must have a single-column."))
-    SparseVector(size(s, 1), rowvals(s), nonzeros(s))
-end
-
-SparseVector{Tv}(s::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = SparseVector{Tv,Ti}(s)
-
-SparseVector(s::AbstractSparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = SparseVector{Tv,Ti}(s)
-
-# convert Vector to SparseVector
-
-"""
-    sparsevec(A)
-
-Convert a vector `A` into a sparse vector of length `m`.
-
-# Examples
-```jldoctest
-julia> sparsevec([1.0, 2.0, 0.0, 0.0, 3.0, 0.0])
-6-element SparseVector{Float64, Int64} with 3 stored entries:
-  [1]  =  1.0
-  [2]  =  2.0
-  [5]  =  3.0
-```
-"""
-sparsevec(a::AbstractVector{T}) where {T} = SparseVector{T, Int}(a)
-sparsevec(a::AbstractArray) = sparsevec(vec(a))
-sparsevec(a::AbstractSparseArray) = vec(a)
-sparsevec(a::AbstractSparseVector) = vec(a)
-sparse(a::AbstractVector) = sparsevec(a)
-
-function _dense2indval!(nzind::Vector{Ti}, nzval::Vector{Tv}, s::AbstractArray{Tv}) where {Tv,Ti}
-    require_one_based_indexing(s)
-    cap = length(nzind);
-    @assert cap == length(nzval)
-    n = length(s)
-    c = 0
-    @inbounds for i = 1:n
-        v = s[i]
-        if !iszero(v)
-            if c >= cap
-                cap *= 2
-                resize!(nzind, cap)
-                resize!(nzval, cap)
-            end
-            c += 1
-            nzind[c] = i
-            nzval[c] = v
-        end
-    end
-    if c < cap
-        resize!(nzind, c)
-        resize!(nzval, c)
-    end
-    return (nzind, nzval)
-end
-
-function _dense2sparsevec(s::AbstractArray{Tv}, initcap::Ti) where {Tv,Ti}
-    nzind, nzval = _dense2indval!(Vector{Ti}(undef, initcap), Vector{Tv}(undef, initcap), s)
-    SparseVector(length(s), nzind, nzval)
-end
-
-SparseVector{Tv,Ti}(s::AbstractVector{Tv}) where {Tv,Ti} =
-    _dense2sparsevec(s, convert(Ti, max(8, div(length(s), 8))))
-
-SparseVector{Tv}(s::AbstractVector{Tv}) where {Tv} = SparseVector{Tv,Int}(s)
-
-SparseVector(s::AbstractVector{Tv}) where {Tv} = SparseVector{Tv,Int}(s)
-
-# copy-constructors
-SparseVector(s::SparseVector{Tv,Ti}) where {Tv,Ti} = SparseVector{Tv,Ti}(s)
-SparseVector{Tv}(s::SparseVector{<:Any,Ti}) where {Tv,Ti} = SparseVector{Tv,Ti}(s)
-function SparseVector{Tv,Ti}(s::SparseVector) where {Tv,Ti}
-    copyind = Vector{Ti}(nonzeroinds(s))
-    copynz = Vector{Tv}(nonzeros(s))
-    SparseVector{Tv,Ti}(length(s), copyind, copynz)
-end
-
-# convert between different types of SparseVector
-convert(T::Type{<:SparseVector}, m::AbstractVector) = m isa T ? m : T(m)
-convert(T::Type{<:SparseVector}, m::AbstractSparseMatrixCSC) = T(m)
-convert(T::Type{<:AbstractSparseMatrixCSC}, v::SparseVector) = T(v)
-
-### copying
-function prep_sparsevec_copy_dest!(A::SparseVector, lB, nnzB)
-    lA = length(A)
-    lA >= lB || throw(BoundsError())
-    # If the two vectors have the same length then all the elements in A will be overwritten.
-    if length(A) == lB
-        resize!(nonzeros(A), nnzB)
-        resize!(nonzeroinds(A), nnzB)
-    else
-        nnzA = nnz(A)
-
-        lastmodindA = searchsortedlast(nonzeroinds(A), lB)
-        if lastmodindA >= nnzB
-            # A will have fewer non-zero elements; unmodified elements are kept at the end.
-            deleteat!(nonzeroinds(A), nnzB+1:lastmodindA)
-            deleteat!(nonzeros(A), nnzB+1:lastmodindA)
-        else
-            # A will have more non-zero elements; unmodified elements are kept at the end.
-            resize!(nonzeroinds(A), nnzB + nnzA - lastmodindA)
-            resize!(nonzeros(A), nnzB + nnzA - lastmodindA)
-            copyto!(nonzeroinds(A), nnzB+1, nonzeroinds(A), lastmodindA+1, nnzA-lastmodindA)
-            copyto!(nonzeros(A), nnzB+1, nonzeros(A), lastmodindA+1, nnzA-lastmodindA)
-        end
-    end
-end
-
-function copyto!(A::SparseVector, B::SparseVector)
-    prep_sparsevec_copy_dest!(A, length(B), nnz(B))
-    copyto!(nonzeroinds(A), nonzeroinds(B))
-    copyto!(nonzeros(A), nonzeros(B))
-    return A
-end
-
-copyto!(A::SparseVector, B::AbstractVector) = copyto!(A, sparsevec(B))
-
-function copyto!(A::SparseVector, B::AbstractSparseMatrixCSC)
-    prep_sparsevec_copy_dest!(A, length(B), nnz(B))
-
-    ptr = 1
-    @assert length(nonzeroinds(A)) >= length(rowvals(B))
-    maximum(getcolptr(B))-1 <= length(rowvals(B)) || throw(BoundsError())
-    @inbounds for col=1:length(getcolptr(B))-1
-        offsetA = (col - 1) * size(B, 1)
-        while ptr <= getcolptr(B)[col+1]-1
-            nonzeroinds(A)[ptr] = rowvals(B)[ptr] + offsetA
-            ptr += 1
-        end
-    end
-    copyto!(nonzeros(A), nonzeros(B))
-    return A
-end
-
-copyto!(A::AbstractSparseMatrixCSC, B::SparseVector{TvB,TiB}) where {TvB,TiB} =
-    copyto!(A, SparseMatrixCSC{TvB,TiB}(length(B), 1, TiB[1, length(nonzeroinds(B))+1], nonzeroinds(B), nonzeros(B)))
-
-
-### Rand Construction
-sprand(n::Integer, p::AbstractFloat, rfn::Function, ::Type{T}) where {T} = sprand(default_rng(), n, p, rfn, T)
-function sprand(r::AbstractRNG, n::Integer, p::AbstractFloat, rfn::Function, ::Type{T}) where T
-    I = randsubseq(r, 1:convert(Int, n), p)
-    V = rfn(r, T, length(I))
-    SparseVector(n, I, V)
-end
-
-sprand(n::Integer, p::AbstractFloat, rfn::Function) = sprand(default_rng(), n, p, rfn)
-function sprand(r::AbstractRNG, n::Integer, p::AbstractFloat, rfn::Function)
-    I = randsubseq(r, 1:convert(Int, n), p)
-    V = rfn(r, length(I))
-    SparseVector(n, I, V)
-end
-
-sprand(n::Integer, p::AbstractFloat) = sprand(default_rng(), n, p, rand)
-
-sprand(r::AbstractRNG, n::Integer, p::AbstractFloat) = sprand(r, n, p, rand)
-sprand(r::AbstractRNG, ::Type{T}, n::Integer, p::AbstractFloat) where {T} = sprand(r, n, p, (r, i) -> rand(r, T, i))
-sprand(r::AbstractRNG, ::Type{Bool}, n::Integer, p::AbstractFloat) = sprand(r, n, p, truebools)
-sprand(::Type{T}, n::Integer, p::AbstractFloat) where {T} = sprand(default_rng(), T, n, p)
-
-sprandn(n::Integer, p::AbstractFloat) = sprand(default_rng(), n, p, randn)
-sprandn(r::AbstractRNG, n::Integer, p::AbstractFloat) = sprand(r, n, p, randn)
-sprandn(::Type{T}, n::Integer, p::AbstractFloat) where T = sprand(default_rng(), n, p, (r, i) -> randn(r, T, i))
-sprandn(r::AbstractRNG, ::Type{T}, n::Integer, p::AbstractFloat) where T = sprand(r, n, p, (r, i) -> randn(r, T, i))
-
-## Indexing into Matrices can return SparseVectors
-
-# Column slices
-function getindex(x::AbstractSparseMatrixCSC, ::Colon, j::Integer)
-    checkbounds(x, :, j)
-    r1 = convert(Int, getcolptr(x)[j])
-    r2 = convert(Int, getcolptr(x)[j+1]) - 1
-    SparseVector(size(x, 1), rowvals(x)[r1:r2], nonzeros(x)[r1:r2])
-end
-
-function getindex(x::AbstractSparseMatrixCSC, I::AbstractUnitRange, j::Integer)
-    checkbounds(x, I, j)
-    # Get the selected column
-    c1 = convert(Int, getcolptr(x)[j])
-    c2 = convert(Int, getcolptr(x)[j+1]) - 1
-    # Restrict to the selected rows
-    r1 = searchsortedfirst(rowvals(x), first(I), c1, c2, Forward)
-    r2 = searchsortedlast(rowvals(x), last(I), c1, c2, Forward)
-    SparseVector(length(I), [rowvals(x)[i] - first(I) + 1 for i = r1:r2], nonzeros(x)[r1:r2])
-end
-
-# In the general case, we piggy back upon SparseMatrixCSC's optimized solution
-@inline function getindex(A::AbstractSparseMatrixCSC, I::AbstractVector, J::Integer)
-    M = A[I, [J]]
-    SparseVector(size(M, 1), rowvals(M), nonzeros(M))
-end
-
-# Row slices
-getindex(A::AbstractSparseMatrixCSC, i::Integer, ::Colon) = A[i, 1:end]
-function Base.getindex(A::AbstractSparseMatrixCSC{Tv,Ti}, i::Integer, J::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, J)
-    checkbounds(A, i, J)
-    nJ = length(J)
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-
-    nzinds = Vector{Ti}()
-    nzvals = Vector{Tv}()
-
-    # adapted from SparseMatrixCSC's sorted_bsearch_A
-    ptrI = 1
-    @inbounds for j = 1:nJ
-        col = J[j]
-        rowI = i
-        ptrA = Int(colptrA[col])
-        stopA = Int(colptrA[col+1]-1)
-        if ptrA <= stopA
-            if rowvalA[ptrA] <= rowI
-                ptrA = searchsortedfirst(rowvalA, rowI, ptrA, stopA, Base.Order.Forward)
-                if ptrA <= stopA && rowvalA[ptrA] == rowI
-                    push!(nzinds, j)
-                    push!(nzvals, nzvalA[ptrA])
-                end
-            end
-            ptrI += 1
-        end
-    end
-    return SparseVector(nJ, nzinds, nzvals)
-end
-
-
-# Logical and linear indexing into SparseMatrices
-getindex(A::AbstractSparseMatrixCSC, I::AbstractVector{Bool}) = _logical_index(A, I) # Ambiguities
-getindex(A::AbstractSparseMatrixCSC, I::AbstractArray{Bool}) = _logical_index(A, I)
-function _logical_index(A::AbstractSparseMatrixCSC{Tv}, I::AbstractArray{Bool}) where Tv
-    require_one_based_indexing(A, I)
-    checkbounds(A, I)
-    n = sum(I)
-    nnzB = min(n, nnz(A))
-
-    colptrA = getcolptr(A); rowvalA = rowvals(A); nzvalA = nonzeros(A)
-    rowvalB = Vector{Int}(undef, nnzB)
-    nzvalB = Vector{Tv}(undef, nnzB)
-    c = 1
-    rowB = 1
-
-    @inbounds for col in 1:size(A, 2)
-        r1 = colptrA[col]
-        r2 = colptrA[col+1]-1
-
-        for row in 1:size(A, 1)
-            if I[row, col]
-                while (r1 <= r2) && (rowvalA[r1] < row)
-                    r1 += 1
-                end
-                if (r1 <= r2) && (rowvalA[r1] == row)
-                    nzvalB[c] = nzvalA[r1]
-                    rowvalB[c] = rowB
-                    c += 1
-                end
-                rowB += 1
-                (rowB > n) && break
-            end
-        end
-        (rowB > n) && break
-    end
-    if nnzB > (c-1)
-        deleteat!(nzvalB, c:nnzB)
-        deleteat!(rowvalB, c:nnzB)
-    end
-    SparseVector(n, rowvalB, nzvalB)
-end
-
-# TODO: further optimizations are available for ::Colon and other types of AbstractRange
-getindex(A::AbstractSparseMatrixCSC, ::Colon) = A[1:end]
-
-function getindex(A::AbstractSparseMatrixCSC{Tv}, I::AbstractUnitRange) where Tv
-    require_one_based_indexing(A, I)
-    checkbounds(A, I)
-    szA = size(A)
-    nA = szA[1]*szA[2]
-    colptrA = getcolptr(A)
-    rowvalA = rowvals(A)
-    nzvalA = nonzeros(A)
-
-    n = length(I)
-    nnzB = min(n, nnz(A))
-    rowvalB = Vector{Int}(undef, nnzB)
-    nzvalB = Vector{Tv}(undef, nnzB)
-
-    rowstart,colstart = Base._ind2sub(szA, first(I))
-    rowend,colend = Base._ind2sub(szA, last(I))
-
-    idxB = 1
-    @inbounds for col in colstart:colend
-        minrow = (col == colstart ? rowstart : 1)
-        maxrow = (col == colend ? rowend : szA[1])
-        for r in colptrA[col]:(colptrA[col+1]-1)
-            rowA = rowvalA[r]
-            if minrow <= rowA <= maxrow
-                rowvalB[idxB] = Base._sub2ind(szA, rowA, col) - first(I) + 1
-                nzvalB[idxB] = nzvalA[r]
-                idxB += 1
-            end
-        end
-    end
-    if nnzB > (idxB-1)
-        deleteat!(nzvalB, idxB:nnzB)
-        deleteat!(rowvalB, idxB:nnzB)
-    end
-    SparseVector(n, rowvalB, nzvalB)
-end
-
-function getindex(A::AbstractSparseMatrixCSC{Tv,Ti}, I::AbstractVector) where {Tv,Ti}
-    require_one_based_indexing(A, I)
-    @boundscheck checkbounds(A, I)
-    szA = size(A)
-    nA = szA[1]*szA[2]
-    colptrA = getcolptr(A)
-    rowvalA = rowvals(A)
-    nzvalA = nonzeros(A)
-
-    n = length(I)
-    nnzB = min(n, nnz(A))
-    rowvalB = Vector{Ti}(undef, nnzB)
-    nzvalB = Vector{Tv}(undef, nnzB)
-
-    idxB = 1
-    for i in 1:n
-        row,col = Base._ind2sub(szA, I[i])
-        for r in colptrA[col]:(colptrA[col+1]-1)
-            @inbounds if rowvalA[r] == row
-                if idxB <= nnzB
-                    rowvalB[idxB] = i
-                    nzvalB[idxB] = nzvalA[r]
-                    idxB += 1
-                else # this can happen if there are repeated indices in I
-                    push!(rowvalB, i)
-                    push!(nzvalB, nzvalA[r])
-                end
-                break
-            end
-        end
-    end
-    if nnzB > (idxB-1)
-        deleteat!(nzvalB, idxB:nnzB)
-        deleteat!(rowvalB, idxB:nnzB)
-    end
-    SparseVector(n, rowvalB, nzvalB)
-end
-
-Base.copy(a::SubArray{<:Any,<:Any,<:Union{SparseVector, AbstractSparseMatrixCSC}}) = a.parent[a.indices...]
-
-function findall(x::SparseVector)
-    return findall(identity, x)
-end
-
-function findall(p::Function, x::SparseVector{<:Any,Ti}) where Ti
-    if p(zero(eltype(x)))
-        return invoke(findall, Tuple{Function, Any}, p, x)
-    end
-    numnz = nnz(x)
-    I = Vector{Ti}(undef, numnz)
-
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-
-    count = 1
-    @inbounds for i = 1 : numnz
-        if p(nzval[i])
-            I[count] = nzind[i]
-            count += 1
-        end
-    end
-
-    count -= 1
-    if numnz != count
-        deleteat!(I, (count+1):numnz)
-    end
-
-    return I
-end
-findall(p::Base.Fix2{typeof(in)}, x::SparseVector{<:Any,Ti}) where {Ti} =
-    invoke(findall, Tuple{Base.Fix2{typeof(in)}, AbstractArray}, p, x)
-
-"""
-    findnz(x::SparseVector)
-
-Return a tuple `(I, V)`  where `I` is the indices of the stored ("structurally non-zero")
-values in sparse vector `x` and `V` is a vector of the values.
-
-# Examples
-```jldoctest
-julia> x = sparsevec([1 2 0; 0 0 3; 0 4 0])
-9-element SparseVector{Int64, Int64} with 4 stored entries:
-  [1]  =  1
-  [4]  =  2
-  [6]  =  4
-  [8]  =  3
-
-julia> findnz(x)
-([1, 4, 6, 8], [1, 2, 4, 3])
-```
-"""
-function findnz(x::SparseVector{Tv,Ti}) where {Tv,Ti}
-    numnz = nnz(x)
-
-    I = Vector{Ti}(undef, numnz)
-    V = Vector{Tv}(undef, numnz)
-
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-
-    @inbounds for i = 1 : numnz
-        I[i] = nzind[i]
-        V[i] = nzval[i]
-    end
-
-    return (I, V)
-end
-
-function _sparse_findnextnz(v::SparseVector, i::Integer)
-    n = searchsortedfirst(nonzeroinds(v), i)
-    if n > length(nonzeroinds(v))
-        return nothing
-    else
-        return nonzeroinds(v)[n]
-    end
-end
-
-function _sparse_findprevnz(v::SparseVector, i::Integer)
-    n = searchsortedlast(nonzeroinds(v), i)
-    if iszero(n)
-        return nothing
-    else
-        return nonzeroinds(v)[n]
-    end
-end
-
-### Generic functions operating on AbstractSparseVector
-
-### getindex
-
-function _spgetindex(m::Int, nzind::AbstractVector{Ti}, nzval::AbstractVector{Tv}, i::Integer) where {Tv,Ti}
-    ii = searchsortedfirst(nzind, convert(Ti, i))
-    (ii <= m && nzind[ii] == i) ? nzval[ii] : zero(Tv)
-end
-
-function getindex(x::AbstractSparseVector, i::Integer)
-    checkbounds(x, i)
-    _spgetindex(nnz(x), nonzeroinds(x), nonzeros(x), i)
-end
-
-function getindex(x::AbstractSparseVector{Tv,Ti}, I::AbstractUnitRange) where {Tv,Ti}
-    checkbounds(x, I)
-    xlen = length(x)
-    i0 = first(I)
-    i1 = last(I)
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    m = length(xnzind)
-
-    # locate the first j0, s.t. xnzind[j0] >= i0
-    j0 = searchsortedfirst(xnzind, i0)
-    # locate the last j1, s.t. xnzind[j1] <= i1
-    j1 = searchsortedlast(xnzind, i1, j0, m, Forward)
-
-    # compute the number of non-zeros
-    jrgn = j0:j1
-    mr = length(jrgn)
-    rind = Vector{Ti}(undef, mr)
-    rval = Vector{Tv}(undef, mr)
-    if mr > 0
-        c = 0
-        for j in jrgn
-            c += 1
-            rind[c] = convert(Ti, xnzind[j] - i0 + 1)
-            rval[c] = xnzval[j]
-        end
-    end
-    SparseVector(length(I), rind, rval)
-end
-
-getindex(x::AbstractSparseVector, I::AbstractVector{Bool}) = x[findall(I)]
-getindex(x::AbstractSparseVector, I::AbstractArray{Bool}) = x[LinearIndices(I)[findall(I)]]
-@inline function getindex(x::AbstractSparseVector{Tv,Ti}, I::AbstractVector) where {Tv,Ti}
-    # SparseMatrixCSC has a nicely optimized routine for this; punt
-    S = SparseMatrixCSC(length(x::SparseVector), 1, Ti[1,length(nonzeroinds(x))+1], nonzeroinds(x), nonzeros(x))
-    S[I, 1]
-end
-
-function getindex(x::AbstractSparseVector{Tv,Ti}, I::AbstractArray) where {Tv,Ti}
-    # punt to SparseMatrixCSC
-    S = SparseMatrixCSC(length(x::SparseVector), 1, Ti[1,length(nonzeroinds(x))+1], nonzeroinds(x), nonzeros(x))
-    S[I]
-end
-
-getindex(x::AbstractSparseVector, ::Colon) = copy(x)
-
-function Base.isstored(x::AbstractSparseVector, i::Integer)
-    @boundscheck checkbounds(x, i)
-    return i in nonzeroinds(x)
-end
-
-### show and friends
-
-function show(io::IO, ::MIME"text/plain", x::AbstractSparseVector)
-    xnnz = length(nonzeros(x))
-    print(io, length(x), "-element ", typeof(x), " with ", xnnz,
-           " stored ", xnnz == 1 ? "entry" : "entries")
-    if xnnz != 0
-        println(io, ":")
-        show(IOContext(io, :typeinfo => eltype(x)), x)
-    end
-end
-
-show(io::IO, x::AbstractSparseVector) = show(convert(IOContext, io), x)
-function show(io::IOContext, x::AbstractSparseVector)
-    # TODO: make this a one-line form
-    n = length(x)
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-    if isempty(nzind)
-        return show(io, MIME("text/plain"), x)
-    end
-    limit = get(io, :limit, false)::Bool
-    half_screen_rows = limit ? div(displaysize(io)[1] - 8, 2) : typemax(Int)
-    pad = ndigits(n)
-    if !haskey(io, :compact)
-        io = IOContext(io, :compact => true)
-    end
-    for k = eachindex(nzind)
-        if k < half_screen_rows || k > length(nzind) - half_screen_rows
-            print(io, "  ", '[', rpad(nzind[k], pad), "]  =  ")
-            if isassigned(nzval, Int(k))
-                show(io, nzval[k])
-            else
-                print(io, Base.undef_ref_str)
-            end
-            k != length(nzind) && println(io)
-        elseif k == half_screen_rows
-            println(io, "   ", " "^pad, "   \u22ee")
-        end
-    end
-end
-
-### Conversion to matrix
-
-function SparseMatrixCSC{Tv,Ti}(x::AbstractSparseVector) where {Tv,Ti}
-    require_one_based_indexing(x)
-    n = length(x)
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    m = length(xnzind)
-    colptr = Ti[1, m+1]
-    # Note that this *cannot* share data like normal array conversions, since
-    # modifying one would put the other in an inconsistent state
-    rowval = Vector{Ti}(xnzind)
-    nzval = Vector{Tv}(xnzval)
-    SparseMatrixCSC(n, 1, colptr, rowval, nzval)
-end
-
-SparseMatrixCSC{Tv}(x::AbstractSparseVector{<:Any,Ti}) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(x)
-
-SparseMatrixCSC(x::AbstractSparseVector{Tv,Ti}) where {Tv,Ti} = SparseMatrixCSC{Tv,Ti}(x)
-
-function Vector(x::AbstractSparseVector{Tv}) where Tv
-    require_one_based_indexing(x)
-    n = length(x)
-    n == 0 && return Vector{Tv}()
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-    r = zeros(Tv, n)
-    for k in 1:nnz(x)
-        i = nzind[k]
-        v = nzval[k]
-        r[i] = v
-    end
-    return r
-end
-Array(x::AbstractSparseVector) = Vector(x)
-
-### Array manipulation
-
-vec(x::AbstractSparseVector) = x
-copy(x::AbstractSparseVector) =
-    SparseVector(length(x), copy(nonzeroinds(x)), copy(nonzeros(x)))
-
-float(x::AbstractSparseVector{<:AbstractFloat}) = x
-float(x::AbstractSparseVector) =
-    SparseVector(length(x), copy(nonzeroinds(x)), float(nonzeros(x)))
-
-complex(x::AbstractSparseVector{<:Complex}) = x
-complex(x::AbstractSparseVector) =
-    SparseVector(length(x), copy(nonzeroinds(x)), complex(nonzeros(x)))
-
-
-### Concatenation
-
-# Without the first of these methods, horizontal concatenations of SparseVectors fall
-# back to the horizontal concatenation method that ensures that combinations of
-# sparse/special/dense matrix/vector types concatenate to SparseMatrixCSCs, instead
-# of _absspvec_hcat below. The <:Integer qualifications are necessary for correct dispatch.
-hcat(X::SparseVector{Tv,Ti}...) where {Tv,Ti<:Integer} = _absspvec_hcat(X...)
-hcat(X::AbstractSparseVector{Tv,Ti}...) where {Tv,Ti<:Integer} = _absspvec_hcat(X...)
-function _absspvec_hcat(X::AbstractSparseVector{Tv,Ti}...) where {Tv,Ti}
-    # check sizes
-    n = length(X)
-    m = length(X[1])
-    tnnz = nnz(X[1])
-    for j = 2:n
-        length(X[j]) == m ||
-            throw(DimensionMismatch("Inconsistent column lengths."))
-        tnnz += nnz(X[j])
-    end
-
-    # construction
-    colptr = Vector{Ti}(undef, n+1)
-    nzrow = Vector{Ti}(undef, tnnz)
-    nzval = Vector{Tv}(undef, tnnz)
-    roff = 1
-    @inbounds for j = 1:n
-        xj = X[j]
-        xnzind = nonzeroinds(xj)
-        xnzval = nonzeros(xj)
-        colptr[j] = roff
-        copyto!(nzrow, roff, xnzind)
-        copyto!(nzval, roff, xnzval)
-        roff += length(xnzind)
-    end
-    colptr[n+1] = roff
-    SparseMatrixCSC{Tv,Ti}(m, n, colptr, nzrow, nzval)
-end
-
-# Without the first of these methods, vertical concatenations of SparseVectors fall
-# back to the vertical concatenation method that ensures that combinations of
-# sparse/special/dense matrix/vector types concatenate to SparseMatrixCSCs, instead
-# of _absspvec_vcat below. The <:Integer qualifications are necessary for correct dispatch.
-vcat(X::SparseVector{Tv,Ti}...) where {Tv,Ti<:Integer} = _absspvec_vcat(X...)
-vcat(X::AbstractSparseVector{Tv,Ti}...) where {Tv,Ti<:Integer} = _absspvec_vcat(X...)
-function vcat(X::SparseVector...)
-    commeltype = promote_type(map(eltype, X)...)
-    commindtype = promote_type(map(indtype, X)...)
-    vcat(map(x -> SparseVector{commeltype,commindtype}(x), X)...)
-end
-function _absspvec_vcat(X::AbstractSparseVector{Tv,Ti}...) where {Tv,Ti}
-    # check sizes
-    n = length(X)
-    tnnz = 0
-    for j = 1:n
-        tnnz += nnz(X[j])
-    end
-
-    # construction
-    rnzind = Vector{Ti}(undef, tnnz)
-    rnzval = Vector{Tv}(undef, tnnz)
-    ir = 0
-    len = 0
-    @inbounds for j = 1:n
-        xj = X[j]
-        xnzind = nonzeroinds(xj)
-        xnzval = nonzeros(xj)
-        xnnz = length(xnzind)
-        for i = 1:xnnz
-            rnzind[ir + i] = xnzind[i] + len
-        end
-        copyto!(rnzval, ir+1, xnzval)
-        ir += xnnz
-        len += length(xj)
-    end
-    SparseVector(len, rnzind, rnzval)
-end
-
-hcat(Xin::Union{Vector, AbstractSparseVector}...) = hcat(map(sparse, Xin)...)
-vcat(Xin::Union{Vector, AbstractSparseVector}...) = vcat(map(sparse, Xin)...)
-# Without the following method, vertical concatenations of SparseVectors with Vectors
-# fall back to the vertical concatenation method that ensures that combinations of
-# sparse/special/dense matrix/vector types concatenate to SparseMatrixCSCs (because
-# the vcat method immediately above is less specific, being defined in AbstractSparseVector
-# rather than SparseVector).
-vcat(X::Union{Vector,SparseVector}...) = vcat(map(sparse, X)...)
-
-
-### Concatenation of un/annotated sparse/special/dense vectors/matrices
-
-# TODO: These methods and definitions should be moved to a more appropriate location,
-# particularly some future equivalent of base/linalg/special.jl dedicated to interactions
-# between a broader set of matrix types.
-
-# TODO: A definition similar to the third exists in base/linalg/bidiag.jl. These definitions
-# should be consolidated in a more appropriate location, e.g. base/linalg/special.jl.
-const _SparseArrays = Union{SparseVector, AbstractSparseMatrixCSC, Adjoint{<:Any,<:SparseVector}, Transpose{<:Any,<:SparseVector}}
-const _SpecialArrays = Union{Diagonal, Bidiagonal, Tridiagonal, SymTridiagonal}
-const _SparseConcatArrays = Union{_SpecialArrays, _SparseArrays}
-
-const _Symmetric_SparseConcatArrays{T,A<:_SparseConcatArrays} = Symmetric{T,A}
-const _Hermitian_SparseConcatArrays{T,A<:_SparseConcatArrays} = Hermitian{T,A}
-const _Triangular_SparseConcatArrays{T,A<:_SparseConcatArrays} = LinearAlgebra.AbstractTriangular{T,A}
-const _Annotated_SparseConcatArrays = Union{_Triangular_SparseConcatArrays, _Symmetric_SparseConcatArrays, _Hermitian_SparseConcatArrays}
-
-const _Symmetric_DenseArrays{T,A<:Matrix} = Symmetric{T,A}
-const _Hermitian_DenseArrays{T,A<:Matrix} = Hermitian{T,A}
-const _Triangular_DenseArrays{T,A<:Matrix} = LinearAlgebra.AbstractTriangular{T,A}
-const _Annotated_DenseArrays = Union{_Triangular_DenseArrays, _Symmetric_DenseArrays, _Hermitian_DenseArrays}
-const _Annotated_Typed_DenseArrays{T} = Union{_Triangular_DenseArrays{T}, _Symmetric_DenseArrays{T}, _Hermitian_DenseArrays{T}}
-
-const _SparseConcatGroup = Union{Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _SparseConcatArrays, _Annotated_SparseConcatArrays, _Annotated_DenseArrays}
-const _DenseConcatGroup = Union{Vector, Adjoint{<:Any,<:Vector}, Transpose{<:Any,<:Vector}, Matrix, _Annotated_DenseArrays}
-const _TypedDenseConcatGroup{T} = Union{Vector{T}, Adjoint{T,Vector{T}}, Transpose{T,Vector{T}}, Matrix{T}, _Annotated_Typed_DenseArrays{T}}
-
-# Concatenations involving un/annotated sparse/special matrices/vectors should yield sparse arrays
-function Base._cat(dims, Xin::_SparseConcatGroup...)
-    X = map(x -> SparseMatrixCSC(issparse(x) ? x : sparse(x)), Xin)
-    T = promote_eltype(Xin...)
-    Base.cat_t(T, X...; dims=dims)
-end
-function hcat(Xin::_SparseConcatGroup...)
-    X = map(x -> SparseMatrixCSC(issparse(x) ? x : sparse(x)), Xin)
-    hcat(X...)
-end
-function vcat(Xin::_SparseConcatGroup...)
-    X = map(x -> SparseMatrixCSC(issparse(x) ? x : sparse(x)), Xin)
-    vcat(X...)
-end
-function hvcat(rows::Tuple{Vararg{Int}}, X::_SparseConcatGroup...)
-    nbr = length(rows)  # number of block rows
-
-    tmp_rows = Vector{SparseMatrixCSC}(undef, nbr)
-    k = 0
-    @inbounds for i = 1 : nbr
-        tmp_rows[i] = hcat(X[(1 : rows[i]) .+ k]...)
-        k += rows[i]
-    end
-    vcat(tmp_rows...)
-end
-
-# make sure UniformScaling objects are converted to sparse matrices for concatenation
-promote_to_array_type(A::Tuple{Vararg{Union{_SparseConcatGroup,UniformScaling}}}) = SparseMatrixCSC
-promote_to_array_type(A::Tuple{Vararg{Union{_DenseConcatGroup,UniformScaling}}}) = Matrix
-promote_to_arrays_(n::Int, ::Type{SparseMatrixCSC}, J::UniformScaling) = sparse(J, n, n)
-
-# Concatenations strictly involving un/annotated dense matrices/vectors should yield dense arrays
-Base._cat(dims, xs::_DenseConcatGroup...) = Base.cat_t(promote_eltype(xs...), xs...; dims=dims)
-vcat(A::Vector...) = Base.typed_vcat(promote_eltype(A...), A...)
-vcat(A::_DenseConcatGroup...) = Base.typed_vcat(promote_eltype(A...), A...)
-hcat(A::Vector...) = Base.typed_hcat(promote_eltype(A...), A...)
-hcat(A::_DenseConcatGroup...) = Base.typed_hcat(promote_eltype(A...), A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_DenseConcatGroup...) = Base.typed_hvcat(promote_eltype(xs...), rows, xs...)
-# For performance, specially handle the case where the matrices/vectors have homogeneous eltype
-Base._cat(dims, xs::_TypedDenseConcatGroup{T}...) where {T} = Base.cat_t(T, xs...; dims=dims)
-vcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_vcat(T, A...)
-hcat(A::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hcat(T, A...)
-hvcat(rows::Tuple{Vararg{Int}}, xs::_TypedDenseConcatGroup{T}...) where {T} = Base.typed_hvcat(T, rows, xs...)
-
-
-### math functions
-
-### Unary Map
-
-# zero-preserving functions (z->z, nz->nz)
--(x::SparseVector) = SparseVector(length(x), copy(nonzeroinds(x)), -(nonzeros(x)))
-
-# functions f, such that
-#   f(x) can be zero or non-zero when x != 0
-#   f(x) = 0 when x == 0
-#
-macro unarymap_nz2z_z2z(op, TF)
-    esc(quote
-        function $(op)(x::AbstractSparseVector{Tv,Ti}) where Tv<:$(TF) where Ti<:Integer
-            require_one_based_indexing(x)
-            R = typeof($(op)(zero(Tv)))
-            xnzind = nonzeroinds(x)
-            xnzval = nonzeros(x)
-            m = length(xnzind)
-
-            ynzind = Vector{Ti}(undef, m)
-            ynzval = Vector{R}(undef, m)
-            ir = 0
-            @inbounds for j = 1:m
-                i = xnzind[j]
-                v = $(op)(xnzval[j])
-                if v != zero(v)
-                    ir += 1
-                    ynzind[ir] = i
-                    ynzval[ir] = v
-                end
-            end
-            resize!(ynzind, ir)
-            resize!(ynzval, ir)
-            SparseVector(length(x), ynzind, ynzval)
-        end
-    end)
-end
-
-# the rest of real, conj, imag are handled correctly via AbstractArray methods
-@unarymap_nz2z_z2z real Complex
-conj(x::SparseVector{<:Complex}) = SparseVector(length(x), copy(nonzeroinds(x)), conj(nonzeros(x)))
-imag(x::AbstractSparseVector{Tv,Ti}) where {Tv<:Real,Ti<:Integer} = SparseVector(length(x), Ti[], Tv[])
-@unarymap_nz2z_z2z imag Complex
-
-# function that does not preserve zeros
-
-macro unarymap_z2nz(op, TF)
-    esc(quote
-        function $(op)(x::AbstractSparseVector{Tv,<:Integer}) where Tv<:$(TF)
-            require_one_based_indexing(x)
-            v0 = $(op)(zero(Tv))
-            R = typeof(v0)
-            xnzind = nonzeroinds(x)
-            xnzval = nonzeros(x)
-            n = length(x)
-            m = length(xnzind)
-            y = fill(v0, n)
-            @inbounds for j = 1:m
-                y[xnzind[j]] = $(op)(xnzval[j])
-            end
-            y
-        end
-    end)
-end
-
-### Binary Map
-
-# mode:
-# 0: f(nz, nz) -> nz, f(z, nz) -> z, f(nz, z) ->  z
-# 1: f(nz, nz) -> z/nz, f(z, nz) -> nz, f(nz, z) -> nz
-# 2: f(nz, nz) -> z/nz, f(z, nz) -> z/nz, f(nz, z) -> z/nz
-
-function _binarymap(f::Function,
-                    x::AbstractSparseVector{Tx},
-                    y::AbstractSparseVector{Ty},
-                    mode::Int) where {Tx,Ty}
-    0 <= mode <= 2 || throw(ArgumentError("Incorrect mode $mode."))
-    R = typeof(f(zero(Tx), zero(Ty)))
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch())
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    ynzind = nonzeroinds(y)
-    ynzval = nonzeros(y)
-    mx = length(xnzind)
-    my = length(ynzind)
-    cap = (mode == 0 ? min(mx, my) : mx + my)::Int
-
-    rind = Vector{Int}(undef, cap)
-    rval = Vector{R}(undef, cap)
-    ir = 0
-    ix = 1
-    iy = 1
-
-    ir = (
-        mode == 0 ? _binarymap_mode_0!(f, mx, my,
-            xnzind, xnzval, ynzind, ynzval, rind, rval) :
-        mode == 1 ? _binarymap_mode_1!(f, mx, my,
-            xnzind, xnzval, ynzind, ynzval, rind, rval) :
-        _binarymap_mode_2!(f, mx, my,
-            xnzind, xnzval, ynzind, ynzval, rind, rval)
-    )::Int
-
-    resize!(rind, ir)
-    resize!(rval, ir)
-    return SparseVector(n, rind, rval)
-end
-
-function _binarymap_mode_0!(f::Function, mx::Int, my::Int,
-                            xnzind, xnzval, ynzind, ynzval, rind, rval)
-    # f(nz, nz) -> nz, f(z, nz) -> z, f(nz, z) ->  z
-    ir = 0; ix = 1; iy = 1
-    @inbounds while ix <= mx && iy <= my
-        jx = xnzind[ix]
-        jy = ynzind[iy]
-        if jx == jy
-            v = f(xnzval[ix], ynzval[iy])
-            ir += 1; rind[ir] = jx; rval[ir] = v
-            ix += 1; iy += 1
-        elseif jx < jy
-            ix += 1
-        else
-            iy += 1
-        end
-    end
-    return ir
-end
-
-function _binarymap_mode_1!(f::Function, mx::Int, my::Int,
-                            xnzind, xnzval::AbstractVector{Tx},
-                            ynzind, ynzval::AbstractVector{Ty},
-                            rind, rval) where {Tx,Ty}
-    # f(nz, nz) -> z/nz, f(z, nz) -> nz, f(nz, z) -> nz
-    ir = 0; ix = 1; iy = 1
-    @inbounds while ix <= mx && iy <= my
-        jx = xnzind[ix]
-        jy = ynzind[iy]
-        if jx == jy
-            v = f(xnzval[ix], ynzval[iy])
-            if v != zero(v)
-                ir += 1; rind[ir] = jx; rval[ir] = v
-            end
-            ix += 1; iy += 1
-        elseif jx < jy
-            v = f(xnzval[ix], zero(Ty))
-            ir += 1; rind[ir] = jx; rval[ir] = v
-            ix += 1
-        else
-            v = f(zero(Tx), ynzval[iy])
-            ir += 1; rind[ir] = jy; rval[ir] = v
-            iy += 1
-        end
-    end
-    @inbounds while ix <= mx
-        v = f(xnzval[ix], zero(Ty))
-        ir += 1; rind[ir] = xnzind[ix]; rval[ir] = v
-        ix += 1
-    end
-    @inbounds while iy <= my
-        v = f(zero(Tx), ynzval[iy])
-        ir += 1; rind[ir] = ynzind[iy]; rval[ir] = v
-        iy += 1
-    end
-    return ir
-end
-
-function _binarymap_mode_2!(f::Function, mx::Int, my::Int,
-                            xnzind, xnzval::AbstractVector{Tx},
-                            ynzind, ynzval::AbstractVector{Ty},
-                            rind, rval) where {Tx,Ty}
-    # f(nz, nz) -> z/nz, f(z, nz) -> z/nz, f(nz, z) -> z/nz
-    ir = 0; ix = 1; iy = 1
-    @inbounds while ix <= mx && iy <= my
-        jx = xnzind[ix]
-        jy = ynzind[iy]
-        if jx == jy
-            v = f(xnzval[ix], ynzval[iy])
-            if v != zero(v)
-                ir += 1; rind[ir] = jx; rval[ir] = v
-            end
-            ix += 1; iy += 1
-        elseif jx < jy
-            v = f(xnzval[ix], zero(Ty))
-            if v != zero(v)
-                ir += 1; rind[ir] = jx; rval[ir] = v
-            end
-            ix += 1
-        else
-            v = f(zero(Tx), ynzval[iy])
-            if v != zero(v)
-                ir += 1; rind[ir] = jy; rval[ir] = v
-            end
-            iy += 1
-        end
-    end
-    @inbounds while ix <= mx
-        v = f(xnzval[ix], zero(Ty))
-        if v != zero(v)
-            ir += 1; rind[ir] = xnzind[ix]; rval[ir] = v
-        end
-        ix += 1
-    end
-    @inbounds while iy <= my
-        v = f(zero(Tx), ynzval[iy])
-        if v != zero(v)
-            ir += 1; rind[ir] = ynzind[iy]; rval[ir] = v
-        end
-        iy += 1
-    end
-    return ir
-end
-
-# definition of a few known broadcasted/mapped binary functions — all others defer to HigherOrderFunctions
-
-_bcast_binary_map(f, x, y, mode) = length(x) == length(y) ? _binarymap(f, x, y, mode) : HigherOrderFns._diffshape_broadcast(f, x, y)
-for (fun, mode) in [(:+, 1), (:-, 1), (:*, 0), (:min, 2), (:max, 2)]
-    fun in (:+, :-) && @eval begin
-        # Addition and subtraction can be defined directly on the arrays (without map/broadcast)
-        $(fun)(x::AbstractSparseVector, y::AbstractSparseVector) = _binarymap($(fun), x, y, $mode)
-    end
-    @eval begin
-        map(::typeof($fun), x::AbstractSparseVector, y::AbstractSparseVector) = _binarymap($fun, x, y, $mode)
-        map(::typeof($fun), x::SparseVector, y::SparseVector) = _binarymap($fun, x, y, $mode)
-        broadcast(::typeof($fun), x::AbstractSparseVector, y::AbstractSparseVector) = _bcast_binary_map($fun, x, y, $mode)
-        broadcast(::typeof($fun), x::SparseVector, y::SparseVector) = _bcast_binary_map($fun, x, y, $mode)
-    end
-end
-
-### Reduction
-
-sum(x::AbstractSparseVector) = sum(nonzeros(x))
-
-function maximum(x::AbstractSparseVector{T}) where T<:Real
-    n = length(x)
-    n > 0 || throw(ArgumentError("maximum over empty array is not allowed."))
-    m = nnz(x)
-    (m == 0 ? zero(T) :
-     m == n ? maximum(nonzeros(x)) :
-     max(zero(T), maximum(nonzeros(x))))::T
-end
-
-function minimum(x::AbstractSparseVector{T}) where T<:Real
-    n = length(x)
-    n > 0 || throw(ArgumentError("minimum over empty array is not allowed."))
-    m = nnz(x)
-    (m == 0 ? zero(T) :
-     m == n ? minimum(nonzeros(x)) :
-     min(zero(T), minimum(nonzeros(x))))::T
-end
-
-for f in [:sum, :maximum, :minimum], op in [:abs, :abs2]
-    SV = :AbstractSparseVector
-    if f === :minimum
-        @eval ($f)(::typeof($op), x::$SV{T}) where {T<:Number} = nnz(x) < length(x) ? ($op)(zero(T)) : ($f)($op, nonzeros(x))
-    else
-        @eval ($f)(::typeof($op), x::$SV) = ($f)($op, nonzeros(x))
-    end
-end
-
-norm(x::SparseVectorUnion, p::Real=2) = norm(nonzeros(x), p)
-
-### linalg.jl
-
-# Transpose
-# (The only sparse matrix structure in base is CSC, so a one-row sparse matrix is worse than dense)
-transpose(sv::SparseVector) = Transpose(sv)
-adjoint(sv::SparseVector) = Adjoint(sv)
-
-### BLAS Level-1
-
-# axpy
-
-function LinearAlgebra.axpy!(a::Number, x::SparseVectorUnion, y::AbstractVector)
-    require_one_based_indexing(x, y)
-    length(x) == length(y) || throw(DimensionMismatch())
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-    m = length(nzind)
-
-    if a == oneunit(a)
-        for i = 1:m
-            @inbounds ii = nzind[i]
-            @inbounds v = nzval[i]
-            y[ii] += v
-        end
-    elseif a == -oneunit(a)
-        for i = 1:m
-            @inbounds ii = nzind[i]
-            @inbounds v = nzval[i]
-            y[ii] -= v
-        end
-    else
-        for i = 1:m
-            @inbounds ii = nzind[i]
-            @inbounds v = nzval[i]
-            y[ii] += a * v
-        end
-    end
-    return y
-end
-
-
-# scaling
-
-function rmul!(x::SparseVectorUnion, a::Real)
-    rmul!(nonzeros(x), a)
-    return x
-end
-function rmul!(x::SparseVectorUnion, a::Complex)
-    rmul!(nonzeros(x), a)
-    return x
-end
-function lmul!(a::Real, x::SparseVectorUnion)
-    rmul!(nonzeros(x), a)
-    return x
-end
-function lmul!(a::Complex, x::SparseVectorUnion)
-    rmul!(nonzeros(x), a)
-    return x
-end
-
-(*)(x::SparseVectorUnion, a::Number) = SparseVector(length(x), copy(nonzeroinds(x)), nonzeros(x) * a)
-(*)(a::Number, x::SparseVectorUnion) = SparseVector(length(x), copy(nonzeroinds(x)), a * nonzeros(x))
-(/)(x::SparseVectorUnion, a::Number) = SparseVector(length(x), copy(nonzeroinds(x)), nonzeros(x) / a)
-
-# dot
-function dot(x::AbstractVector{Tx}, y::SparseVectorUnion{Ty}) where {Tx<:Number,Ty<:Number}
-    require_one_based_indexing(x, y)
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch())
-    nzind = nonzeroinds(y)
-    nzval = nonzeros(y)
-    s = dot(zero(Tx), zero(Ty))
-    @inbounds for i = 1:length(nzind)
-        s += dot(x[nzind[i]], nzval[i])
-    end
-    return s
-end
-
-function dot(x::SparseVectorUnion{Tx}, y::AbstractVector{Ty}) where {Tx<:Number,Ty<:Number}
-    require_one_based_indexing(x, y)
-    n = length(y)
-    length(x) == n || throw(DimensionMismatch())
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-    s = dot(zero(Tx), zero(Ty))
-    @inbounds for i = 1:length(nzind)
-        s += dot(nzval[i], y[nzind[i]])
-    end
-    return s
-end
-
-function _spdot(f::Function,
-                xj::Int, xj_last::Int, xnzind, xnzval,
-                yj::Int, yj_last::Int, ynzind, ynzval)
-    # dot product between ranges of non-zeros,
-    s = f(zero(eltype(xnzval)), zero(eltype(ynzval)))
-    @inbounds while xj <= xj_last && yj <= yj_last
-        ix = xnzind[xj]
-        iy = ynzind[yj]
-        if ix == iy
-            s += f(xnzval[xj], ynzval[yj])
-            xj += 1
-            yj += 1
-        elseif ix < iy
-            xj += 1
-        else
-            yj += 1
-        end
-    end
-    s
-end
-
-function dot(x::SparseVectorUnion{<:Number}, y::SparseVectorUnion{<:Number})
-    x === y && return sum(abs2, x)
-    n = length(x)
-    length(y) == n || throw(DimensionMismatch())
-
-    xnzind = nonzeroinds(x)
-    ynzind = nonzeroinds(y)
-    xnzval = nonzeros(x)
-    ynzval = nonzeros(y)
-
-    _spdot(dot,
-           1, length(xnzind), xnzind, xnzval,
-           1, length(ynzind), ynzind, ynzval)
-end
-
-
-### BLAS-2 / dense A * sparse x -> dense y
-
-# lowrankupdate (BLAS.ger! like)
-function LinearAlgebra.lowrankupdate!(A::StridedMatrix, x::AbstractVector, y::SparseVectorUnion, α::Number = 1)
-    require_one_based_indexing(A, x, y)
-    nzi = nonzeroinds(y)
-    nzv = nonzeros(y)
-    @inbounds for (j,v) in zip(nzi,nzv)
-        αv = α*conj(v)
-        for i in axes(x, 1)
-            A[i,j] += x[i]*αv
-        end
-    end
-    return A
-end
-
-# * and mul!
-
-const _StridedOrTriangularMatrix{T} = Union{StridedMatrix{T}, LowerTriangular{T}, UnitLowerTriangular{T}, UpperTriangular{T}, UnitUpperTriangular{T}}
-
-function (*)(A::_StridedOrTriangularMatrix{Ta}, x::AbstractSparseVector{Tx}) where {Ta,Tx}
-    require_one_based_indexing(A, x)
-    m, n = size(A)
-    length(x) == n || throw(DimensionMismatch())
-    Ty = promote_op(matprod, eltype(A), eltype(x))
-    y = Vector{Ty}(undef, m)
-    mul!(y, A, x)
-end
-
-mul!(y::AbstractVector{Ty}, A::_StridedOrTriangularMatrix, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, A, x, true, false)
-
-function mul!(y::AbstractVector, A::_StridedOrTriangularMatrix, x::AbstractSparseVector, α::Number, β::Number)
-    require_one_based_indexing(y, A, x)
-    m, n = size(A)
-    length(x) == n && length(y) == m || throw(DimensionMismatch())
-    m == 0 && return y
-    if β != one(β)
-        β == zero(β) ? fill!(y, zero(eltype(y))) : rmul!(y, β)
-    end
-    α == zero(α) && return y
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    @inbounds for i = 1:length(xnzind)
-        v = xnzval[i]
-        if v != zero(v)
-            j = xnzind[i]
-            αv = v * α
-            for r = 1:m
-                y[r] += A[r,j] * αv
-            end
-        end
-    end
-    return y
-end
-
-# * and mul!(C, transpose(A), B)
-
-function *(transA::Transpose{<:Any,<:_StridedOrTriangularMatrix{Ta}}, x::AbstractSparseVector{Tx}) where {Ta,Tx}
-    require_one_based_indexing(transA, x)
-    m, n = size(transA)
-    length(x) == n || throw(DimensionMismatch())
-    Ty = promote_op(matprod, eltype(transA), eltype(x))
-    y = Vector{Ty}(undef, m)
-    mul!(y, transA, x)
-end
-
-mul!(y::AbstractVector{Ty}, transA::Transpose{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, transA, x, true, false)
-
-function mul!(y::AbstractVector, transA::Transpose{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector, α::Number, β::Number)
-    require_one_based_indexing(y, transA, x)
-    m, n = size(transA)
-    length(x) == n && length(y) == m || throw(DimensionMismatch())
-    m == 0 && return y
-    if β != one(β)
-        β == zero(β) ? fill!(y, zero(eltype(y))) : rmul!(y, β)
-    end
-    α == zero(α) && return y
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    _nnz = length(xnzind)
-    _nnz == 0 && return y
-
-    A = transA.parent
-    Ty = promote_op(matprod, eltype(A), eltype(x))
-    @inbounds for j = 1:m
-        s = zero(Ty)
-        for i = 1:_nnz
-            s += transpose(A[xnzind[i], j]) * xnzval[i]
-        end
-        y[j] += s * α
-    end
-    return y
-end
-
-# * and mul!(C, adjoint(A), B)
-
-function *(adjA::Adjoint{<:Any,<:_StridedOrTriangularMatrix{Ta}}, x::AbstractSparseVector{Tx}) where {Ta,Tx}
-    require_one_based_indexing(adjA, x)
-    m, n = size(adjA)
-    length(x) == n || throw(DimensionMismatch())
-    Ty = promote_op(matprod, eltype(adjA), eltype(x))
-    y = Vector{Ty}(undef, m)
-    mul!(y, adjA, x)
-end
-
-mul!(y::AbstractVector{Ty}, adjA::Adjoint{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, adjA, x, true, false)
-
-function mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:_StridedOrTriangularMatrix}, x::AbstractSparseVector, α::Number, β::Number)
-    require_one_based_indexing(y, adjA, x)
-    m, n = size(adjA)
-    length(x) == n && length(y) == m || throw(DimensionMismatch())
-    m == 0 && return y
-    if β != one(β)
-        β == zero(β) ? fill!(y, zero(eltype(y))) : rmul!(y, β)
-    end
-    α == zero(α) && return y
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    _nnz = length(xnzind)
-    _nnz == 0 && return y
-
-    A = adjA.parent
-    Ty = promote_op(matprod, eltype(A), eltype(x))
-    @inbounds for j = 1:m
-        s = zero(Ty)
-        for i = 1:_nnz
-            s += adjoint(A[xnzind[i], j]) * xnzval[i]
-        end
-        y[j] += s * α
-    end
-    return y
-end
-
-
-### BLAS-2 / sparse A * sparse x -> dense y
-
-function densemv(A::AbstractSparseMatrixCSC, x::AbstractSparseVector; trans::AbstractChar='N')
-    local xlen::Int, ylen::Int
-    require_one_based_indexing(A, x)
-    m, n = size(A)
-    if trans == 'N' || trans == 'n'
-        xlen = n; ylen = m
-    elseif trans == 'T' || trans == 't' || trans == 'C' || trans == 'c'
-        xlen = m; ylen = n
-    else
-        throw(ArgumentError("Invalid trans character $trans"))
-    end
-    xlen == length(x) || throw(DimensionMismatch())
-    T = promote_op(matprod, eltype(A), eltype(x))
-    y = Vector{T}(undef, ylen)
-    if trans == 'N' || trans == 'n'
-        mul!(y, A, x)
-    elseif trans == 'T' || trans == 't'
-        mul!(y, transpose(A), x)
-    elseif trans == 'C' || trans == 'c'
-        mul!(y, adjoint(A), x)
-    else
-        throw(ArgumentError("Invalid trans character $trans"))
-    end
-    y
-end
-
-# * and mul!
-
-mul!(y::AbstractVector{Ty}, A::AbstractSparseMatrixCSC, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    mul!(y, A, x, true, false)
-
-function mul!(y::AbstractVector, A::AbstractSparseMatrixCSC, x::AbstractSparseVector, α::Number, β::Number)
-    require_one_based_indexing(y, A, x)
-    m, n = size(A)
-    length(x) == n && length(y) == m || throw(DimensionMismatch())
-    m == 0 && return y
-    if β != one(β)
-        β == zero(β) ? fill!(y, zero(eltype(y))) : rmul!(y, β)
-    end
-    α == zero(α) && return y
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    Acolptr = getcolptr(A)
-    Arowval = rowvals(A)
-    Anzval = nonzeros(A)
-
-    @inbounds for i = 1:length(xnzind)
-        v = xnzval[i]
-        if v != zero(v)
-            αv = v * α
-            j = xnzind[i]
-            for r = Acolptr[j]:(Acolptr[j+1]-1)
-                y[Arowval[r]] += Anzval[r] * αv
-            end
-        end
-    end
-    return y
-end
-
-# * and *(Tranpose(A), B)
-
-mul!(y::AbstractVector{Ty}, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    (A = transA.parent; mul!(y, transpose(A), x, true, false))
-
-mul!(y::AbstractVector, transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector, α::Number, β::Number) =
-    (A = transA.parent; _At_or_Ac_mul_B!((a,b) -> transpose(a) * b, y, A, x, α, β))
-
-mul!(y::AbstractVector{Ty}, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector{Tx}) where {Tx,Ty} =
-    (A = adjA.parent; mul!(y, adjoint(A), x, true, false))
-
-mul!(y::AbstractVector, adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector, α::Number, β::Number) =
-    (A = adjA.parent; _At_or_Ac_mul_B!((a,b) -> adjoint(a) * b, y, A, x, α, β))
-
-function _At_or_Ac_mul_B!(tfun::Function,
-                          y::AbstractVector, A::AbstractSparseMatrixCSC, x::AbstractSparseVector,
-                          α::Number, β::Number)
-    require_one_based_indexing(y, A, x)
-    m, n = size(A)
-    length(x) == m && length(y) == n || throw(DimensionMismatch())
-    n == 0 && return y
-    if β != one(β)
-        β == zero(β) ? fill!(y, zero(eltype(y))) : rmul!(y, β)
-    end
-    α == zero(α) && return y
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    Acolptr = getcolptr(A)
-    Arowval = rowvals(A)
-    Anzval = nonzeros(A)
-    mx = length(xnzind)
-
-    for j = 1:n
-        # s <- dot(A[:,j], x)
-        s = _spdot(tfun, Acolptr[j], Acolptr[j+1]-1, Arowval, Anzval,
-                   1, mx, xnzind, xnzval)
-        @inbounds y[j] += s * α
-    end
-    return y
-end
-
-
-### BLAS-2 / sparse A * sparse x -> dense y
-
-function *(A::AbstractSparseMatrixCSC, x::AbstractSparseVector)
-    require_one_based_indexing(A, x)
-    y = densemv(A, x)
-    initcap = min(nnz(A), size(A,1))
-    _dense2sparsevec(y, initcap)
-end
-
-*(transA::Transpose{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector) =
-    (A = transA.parent; _At_or_Ac_mul_B((a,b) -> transpose(a) * b, A, x, promote_op(matprod, eltype(transA), eltype(x))))
-
-*(adjA::Adjoint{<:Any,<:AbstractSparseMatrixCSC}, x::AbstractSparseVector) =
-    (A = adjA.parent; _At_or_Ac_mul_B((a,b) -> adjoint(a) * b, A, x, promote_op(matprod, eltype(adjA), eltype(x))))
-
-function _At_or_Ac_mul_B(tfun::Function, A::AbstractSparseMatrixCSC{TvA,TiA}, x::AbstractSparseVector{TvX,TiX},
-                         Tv = promote_op(matprod, TvA, TvX)) where {TvA,TiA,TvX,TiX}
-    require_one_based_indexing(A, x)
-    m, n = size(A)
-    length(x) == m || throw(DimensionMismatch())
-    Ti = promote_type(TiA, TiX)
-
-    xnzind = nonzeroinds(x)
-    xnzval = nonzeros(x)
-    Acolptr = getcolptr(A)
-    Arowval = rowvals(A)
-    Anzval = nonzeros(A)
-    mx = length(xnzind)
-
-    ynzind = Vector{Ti}(undef, n)
-    ynzval = Vector{Tv}(undef, n)
-
-    jr = 0
-    for j = 1:n
-        s = _spdot(tfun, Acolptr[j], Acolptr[j+1]-1, Arowval, Anzval,
-                   1, mx, xnzind, xnzval)
-        if s != zero(s)
-            jr += 1
-            ynzind[jr] = j
-            ynzval[jr] = s
-        end
-    end
-    if jr < n
-        resize!(ynzind, jr)
-        resize!(ynzval, jr)
-    end
-    SparseVector(n, ynzind, ynzval)
-end
-
-
-# define matrix division operations involving triangular matrices and sparse vectors
-# the valid left-division operations are A[t|c]_ldiv_B[!] and \
-# the valid right-division operations are A(t|c)_rdiv_B[t|c][!]
-# see issue #14005 for discussion of these methods
-for isunittri in (true, false), islowertri in (true, false)
-    unitstr = isunittri ? "Unit" : ""
-    halfstr = islowertri ? "Lower" : "Upper"
-    tritype = :(LinearAlgebra.$(Symbol(unitstr, halfstr, "Triangular")))
-
-    # build out-of-place left-division operations
-    for (istrans, applyxform, xformtype, xformop) in (
-            (false, false, :identity,  :identity),
-            (true,  true,  :Transpose, :transpose),
-            (true,  true,  :Adjoint,   :adjoint) )
-
-        # broad method where elements are Numbers
-        xformtritype = applyxform ? :($xformtype{<:TA,<:$tritype{<:Any,<:AbstractMatrix}}) :
-                                    :($tritype{<:TA,<:AbstractMatrix})
-        @eval function \(xformA::$xformtritype, b::SparseVector{Tb}) where {TA<:Number,Tb<:Number}
-            A = $(applyxform ? :(xformA.parent) : :(xformA) )
-            TAb = $(isunittri ?
-                :(typeof(zero(TA)*zero(Tb) + zero(TA)*zero(Tb))) :
-                :(typeof((zero(TA)*zero(Tb) + zero(TA)*zero(Tb))/one(TA))) )
-            LinearAlgebra.ldiv!($xformop(convert(AbstractArray{TAb}, A)), convert(Array{TAb}, b))
-        end
-
-        # faster method requiring good view support of the
-        # triangular matrix type. hence the StridedMatrix restriction.
-        xformtritype = applyxform ? :($xformtype{<:TA,<:$tritype{<:Any,<:StridedMatrix}}) :
-                                    :($tritype{<:TA,<:StridedMatrix})
-        @eval function \(xformA::$xformtritype, b::SparseVector{Tb}) where {TA<:Number,Tb<:Number}
-            A = $(applyxform ? :(xformA.parent) : :(xformA) )
-            TAb = $(isunittri ?
-                :(typeof(zero(TA)*zero(Tb) + zero(TA)*zero(Tb))) :
-                :(typeof((zero(TA)*zero(Tb) + zero(TA)*zero(Tb))/one(TA))) )
-            r = convert(Array{TAb}, b)
-            # If b has no nonzero entries, then r is necessarily zero. If b has nonzero
-            # entries, then the operation involves only b[nzrange], so we extract and
-            # operate on solely b[nzrange] for efficiency.
-            if nnz(b) != 0
-                nzrange = $( (islowertri && !istrans) || (!islowertri && istrans) ?
-                    :(nonzeroinds(b)[1]:length(b::SparseVector)) :
-                    :(1:nonzeroinds(b)[end]) )
-                nzrangeviewr = view(r, nzrange)
-                nzrangeviewA = $tritype(view(A.data, nzrange, nzrange))
-                LinearAlgebra.ldiv!($xformop(convert(AbstractArray{TAb}, nzrangeviewA)), nzrangeviewr)
-            end
-            r
-        end
-
-        # fallback where elements are not Numbers
-        xformtritype = applyxform ? :($xformtype{<:Any,<:$tritype}) : :($tritype)
-        @eval function \(xformA::$xformtritype, b::SparseVector)
-            A = $(applyxform ? :(xformA.parent) : :(xformA) )
-            LinearAlgebra.ldiv!($xformop(A), copy(b))
-        end
-    end
-
-    # build in-place left-division operations
-    for (istrans, applyxform, xformtype, xformop) in (
-            (false, false, :identity,  :identity),
-            (true,  true,  :Transpose, :transpose),
-            (true,  true,  :Adjoint,   :adjoint) )
-        xformtritype = applyxform ? :($xformtype{<:Any,<:$tritype{<:Any,<:StridedMatrix}}) :
-                                    :($tritype{<:Any,<:StridedMatrix})
-
-        # the generic in-place left-division methods handle these cases, but
-        # we can achieve greater efficiency where the triangular matrix provides
-        # good view support. hence the StridedMatrix restriction.
-        @eval function ldiv!(xformA::$xformtritype, b::SparseVector)
-            A = $(applyxform ? :(xformA.parent) : :(xformA) )
-            # If b has no nonzero entries, the result is necessarily zero and this call
-            # reduces to a no-op. If b has nonzero entries, then...
-            if nnz(b) != 0
-                # densify the relevant part of b in one shot rather
-                # than potentially repeatedly reallocating during the solve
-                $( (islowertri && !istrans) || (!islowertri && istrans) ?
-                    :(_densifyfirstnztoend!(b)) :
-                    :(_densifystarttolastnz!(b)) )
-                # this operation involves only the densified section, so
-                # for efficiency we extract and operate on solely that section
-                # furthermore we operate on that section as a dense vector
-                # such that dispatch has a chance to exploit, e.g., tuned BLAS
-                nzrange = $( (islowertri && !istrans) || (!islowertri && istrans) ?
-                    :(nonzeroinds(b)[1]:length(b::SparseVector)) :
-                    :(1:nonzeroinds(b)[end]) )
-                nzrangeviewbnz = view(nonzeros(b), nzrange .- (nonzeroinds(b)[1] - 1))
-                nzrangeviewA = $tritype(view(A.data, nzrange, nzrange))
-                LinearAlgebra.ldiv!($xformop(nzrangeviewA), nzrangeviewbnz)
-            end
-            b
-        end
-    end
-end
-
-# helper functions for in-place matrix division operations defined above
-"Densifies `x::SparseVector` from its first nonzero (`x[nonzeroinds(x)[1]]`) through its end (`x[length(x::SparseVector)]`)."
-function _densifyfirstnztoend!(x::SparseVector)
-    # lengthen containers
-    oldnnz = nnz(x)
-    newnnz = length(x::SparseVector) - nonzeroinds(x)[1] + 1
-    resize!(nonzeros(x), newnnz)
-    resize!(nonzeroinds(x), newnnz)
-    # redistribute nonzero values over lengthened container
-    # initialize now-allocated zero values simultaneously
-    nextpos = newnnz
-    @inbounds for oldpos in oldnnz:-1:1
-        nzi = nonzeroinds(x)[oldpos]
-        nzv = nonzeros(x)[oldpos]
-        newpos = nzi - nonzeroinds(x)[1] + 1
-        newpos < nextpos && (nonzeros(x)[newpos+1:nextpos] .= 0)
-        newpos == oldpos && break
-        nonzeros(x)[newpos] = nzv
-        nextpos = newpos - 1
-    end
-    # finally update lengthened nzinds
-    nonzeroinds(x)[2:end] = (nonzeroinds(x)[1]+1):length(x::SparseVector)
-    x
-end
-"Densifies `x::SparseVector` from its beginning (`x[1]`) through its last nonzero (`x[nonzeroinds(x)[end]]`)."
-function _densifystarttolastnz!(x::SparseVector)
-    # lengthen containers
-    oldnnz = nnz(x)
-    newnnz = nonzeroinds(x)[end]
-    resize!(nonzeros(x), newnnz)
-    resize!(nonzeroinds(x), newnnz)
-    # redistribute nonzero values over lengthened container
-    # initialize now-allocated zero values simultaneously
-    nextpos = newnnz
-    @inbounds for oldpos in oldnnz:-1:1
-        nzi = nonzeroinds(x)[oldpos]
-        nzv = nonzeros(x)[oldpos]
-        nzi < nextpos && (nonzeros(x)[nzi+1:nextpos] .= 0)
-        nzi == oldpos && (nextpos = 0; break)
-        nonzeros(x)[nzi] = nzv
-        nextpos = nzi - 1
-    end
-    nextpos > 0 && (nonzeros(x)[1:nextpos] .= 0)
-    # finally update lengthened nzinds
-    nonzeroinds(x)[1:newnnz] = 1:newnnz
-    x
-end
-
-#sorting
-function sort(x::SparseVector{Tv,Ti}; kws...) where {Tv,Ti}
-    allvals = push!(copy(nonzeros(x)),zero(Tv))
-    sinds = sortperm(allvals;kws...)
-    n,k = length(x),length(allvals)
-    z = findfirst(isequal(k),sinds)::Int
-    newnzind = Vector{Ti}(1:k-1)
-    newnzind[z:end] .+= n-k+1
-    newnzvals = allvals[deleteat!(sinds[1:k],z)]
-    SparseVector(n,newnzind,newnzvals)
-end
-
-function fkeep!(x::SparseVector, f)
-    n = length(x::SparseVector)
-    nzind = nonzeroinds(x)
-    nzval = nonzeros(x)
-
-    x_writepos = 1
-    @inbounds for xk in 1:nnz(x)
-        xi = nzind[xk]
-        xv = nzval[xk]
-        # If this element should be kept, rewrite in new position
-        if f(xi, xv)
-            if x_writepos != xk
-                nzind[x_writepos] = xi
-                nzval[x_writepos] = xv
-            end
-            x_writepos += 1
-        end
-    end
-
-    # Trim x's storage if necessary
-    x_nnz = x_writepos - 1
-    resize!(nzval, x_nnz)
-    resize!(nzind, x_nnz)
-
-    return x
-end
-
-"""
-    droptol!(x::SparseVector, tol)
-
-Removes stored values from `x` whose absolute value is less than or equal to `tol`.
-"""
-droptol!(x::SparseVector, tol) = fkeep!(x, (i, x) -> abs(x) > tol)
-
-"""
-    dropzeros!(x::SparseVector)
-
-Removes stored numerical zeros from `x`.
-
-For an out-of-place version, see [`dropzeros`](@ref). For
-algorithmic information, see `fkeep!`.
-"""
-dropzeros!(x::SparseVector) = fkeep!(x, (i, x) -> !iszero(x))
-
-"""
-    dropzeros(x::SparseVector)
-
-Generates a copy of `x` and removes numerical zeros from that copy.
-
-For an in-place version and algorithmic information, see [`dropzeros!`](@ref).
-
-# Examples
-```jldoctest
-julia> A = sparsevec([1, 2, 3], [1.0, 0.0, 1.0])
-3-element SparseVector{Float64, Int64} with 3 stored entries:
-  [1]  =  1.0
-  [2]  =  0.0
-  [3]  =  1.0
-
-julia> dropzeros(A)
-3-element SparseVector{Float64, Int64} with 2 stored entries:
-  [1]  =  1.0
-  [3]  =  1.0
-```
-"""
-dropzeros(x::SparseVector) = dropzeros!(copy(x))
-
-function copy!(dst::SparseVector, src::SparseVector)
-    length(dst::SparseVector) == length(src::SparseVector) || throw(ArgumentError("Sparse vectors should have the same length for copy!"))
-    copy!(nonzeros(dst), nonzeros(src))
-    copy!(nonzeroinds(dst), nonzeroinds(src))
-    return dst
-end
-
-function copy!(dst::SparseVector, src::AbstractVector)
-    length(dst::SparseVector) == length(src) || throw(ArgumentError("Sparse vector should have the same length as source for copy!"))
-    _dense2indval!(nonzeroinds(dst), nonzeros(dst), src)
-    return dst
-end
-
-function _fillnonzero!(arr::AbstractSparseMatrixCSC{Tv, Ti}, val) where {Tv,Ti}
-    m, n = size(arr)
-    resize!(getcolptr(arr), n+1)
-    resize!(rowvals(arr), m*n)
-    resize!(nonzeros(arr), m*n)
-    copyto!(getcolptr(arr), 1:m:n*m+1)
-    fill!(nonzeros(arr), val)
-    index = 1
-    @inbounds for _ in 1:n
-        for i in 1:m
-            rowvals(arr)[index] = Ti(i)
-            index += 1
-        end
-    end
-    arr
-end
-
-function _fillnonzero!(arr::SparseVector{Tv,Ti}, val) where {Tv,Ti}
-    n = length(arr::SparseVector)
-    resize!(nonzeroinds(arr), n)
-    resize!(nonzeros(arr), n)
-    @inbounds for i in 1:n
-        nonzeroinds(arr)[i] = Ti(i)
-    end
-    fill!(nonzeros(arr), val)
-    arr
-end
-
-import Base.fill!
-function fill!(A::Union{SparseVector, AbstractSparseMatrixCSC}, x)
-    T = eltype(A)
-    xT = convert(T, x)
-    if xT == zero(T)
-        fill!(nonzeros(A), xT)
-    else
-        _fillnonzero!(A, xT)
-    end
-    return A
-end
-
-
-
-# in-place swaps (dense) blocks start:split and split+1:fin in col
-function _swap!(col::AbstractVector, start::Integer, fin::Integer, split::Integer)
-    split == fin && return
-    reverse!(col, start, split)
-    reverse!(col, split + 1, fin)
-    reverse!(col, start, fin)
-    return
-end
-
-
-# in-place shifts a sparse subvector by r. Used also by sparsematrix.jl
-function subvector_shifter!(R::AbstractVector, V::AbstractVector, start::Integer, fin::Integer, m::Integer, r::Integer)
-    split = fin
-    @inbounds for j = start:fin
-        # shift positions ...
-        R[j] += r
-        if R[j] <= m
-            split = j
-        else
-            R[j] -= m
-        end
-    end
-    # ...but rowval should be sorted within columns
-    _swap!(R, start, fin, split)
-    _swap!(V, start, fin, split)
-end
-
-
-function circshift!(O::SparseVector, X::SparseVector, (r,)::Base.DimsInteger{1})
-    copy!(O, X)
-    subvector_shifter!(nonzeroinds(O), nonzeros(O), 1, length(nonzeroinds(O)), length(O), mod(r, length(X)))
-    return O
-end
-
-
-circshift!(O::SparseVector, X::SparseVector, r::Real,) = circshift!(O, X, (Integer(r),))
diff --git a/stdlib/SparseArrays/test/ambiguous_exec.jl b/stdlib/SparseArrays/test/ambiguous_exec.jl
deleted file mode 100644
index a466f2534794a6..00000000000000
--- a/stdlib/SparseArrays/test/ambiguous_exec.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, SparseArrays
-@test detect_ambiguities(SparseArrays; imported=true, recursive=true) == []
diff --git a/stdlib/SparseArrays/test/forbidproperties.jl b/stdlib/SparseArrays/test/forbidproperties.jl
deleted file mode 100644
index fd182cf065f1a5..00000000000000
--- a/stdlib/SparseArrays/test/forbidproperties.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SparseArrays
-Base.getproperty(S::SparseMatrixCSC, ::Symbol) = error("use accessor function")
-Base.getproperty(S::SparseVector, ::Symbol) = error("use accessor function")
diff --git a/stdlib/SparseArrays/test/higherorderfns.jl b/stdlib/SparseArrays/test/higherorderfns.jl
deleted file mode 100644
index cd77fca6951a58..00000000000000
--- a/stdlib/SparseArrays/test/higherorderfns.jl
+++ /dev/null
@@ -1,690 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# These tests cover the higher order functions specialized for sparse arrays defined in
-# base/sparse/higherorderfns.jl, particularly map[!]/broadcast[!] for SparseVectors and
-# SparseMatrixCSCs at present.
-
-module HigherOrderFnsTests
-
-using Test
-using SparseArrays
-using LinearAlgebra
-using Random
-include("forbidproperties.jl")
-
-@testset "map[!] implementation specialized for a single (input) sparse vector/matrix" begin
-    N, M = 10, 12
-    for shapeA in ((N,), (N, M))
-        A = sprand(shapeA..., 0.4); fA = Array(A)
-        # --> test map entry point
-        @test map(sin, A) == sparse(map(sin, fA))
-        @test map(cos, A) == sparse(map(cos, fA))
-        # --> test map! entry point
-        fX = copy(fA); X = sparse(fX)
-        map!(sin, X, A); X = sparse(fX) # warmup for @allocated
-        @test (@allocated map!(sin, X, A)) == 0
-        @test map!(sin, X, A) == sparse(map!(sin, fX, fA))
-        @test map!(cos, X, A) == sparse(map!(cos, fX, fA))
-        @test_throws DimensionMismatch map!(sin, X, spzeros((shapeA .- 1)...))
-    end
-end
-
-@testset "map[!] implementation specialized for a pair of (input) sparse vectors/matrices" begin
-    N, M = 10, 12
-    f(x, y) = x + y + 1
-    for shapeA in ((N,), (N, M))
-        A, Bo = sprand(shapeA..., 0.3), sprand(shapeA..., 0.3)
-        B = ndims(Bo) == 1 ? SparseVector{Float32, Int32}(Bo) : SparseMatrixCSC{Float32,Int32}(Bo)
-        # use different types to check internal type stability via allocation tests below
-        fA, fB = map(Array, (A, B))
-        # --> test map entry point
-        @test map(+, A, B) == sparse(map(+, fA, fB))
-        @test map(*, A, B) == sparse(map(*, fA, fB))
-        @test map(f, A, B) == sparse(map(f, fA, fB))
-        @test_throws DimensionMismatch map(+, A, spzeros((shapeA .- 1)...))
-        # --> test map! entry point
-        fX = map(+, fA, fB); X = sparse(fX)
-        map!(+, X, A, B); X = sparse(fX) # warmup for @allocated
-        @test (@allocated map!(+, X, A, B)) < 300
-        @test map!(+, X, A, B) == sparse(map!(+, fX, fA, fB))
-        fX = map(*, fA, fB); X = sparse(fX)
-        map!(*, X, A, B); X = sparse(fX) # warmup for @allocated
-        @test (@allocated map!(*, X, A, B)) < 300
-        @test map!(*, X, A, B) == sparse(map!(*, fX, fA, fB))
-        @test map!(f, X, A, B) == sparse(map!(f, fX, fA, fB))
-        @test_throws DimensionMismatch map!(f, X, A, spzeros((shapeA .- 1)...))
-    end
-end
-
-@testset "map[!] implementation capable of handling >2 (input) sparse vectors/matrices" begin
-    N, M = 10, 12
-    f(x, y, z) = x + y + z + 1
-    for shapeA in ((N,), (N, M))
-        A, B, Co = sprand(shapeA..., 0.2), sprand(shapeA..., 0.2), sprand(shapeA..., 0.2)
-        C = ndims(Co) == 1 ? SparseVector{Float32,Int32}(Co) : SparseMatrixCSC{Float32,Int32}(Co)
-        # use different types to check internal type stability via allocation tests below
-        fA, fB, fC = map(Array, (A, B, C))
-        # --> test map entry point
-        @test map(+, A, B, C) == sparse(map(+, fA, fB, fC))
-        @test map(*, A, B, C) == sparse(map(*, fA, fB, fC))
-        @test map(f, A, B, C) == sparse(map(f, fA, fB, fC))
-        @test_throws DimensionMismatch map(+, A, B, spzeros(N, M - 1))
-        # --> test map! entry point
-        fX = map(+, fA, fB, fC); X = sparse(fX)
-        map!(+, X, A, B, C); X = sparse(fX) # warmup for @allocated
-        @test (@allocated map!(+, X, A, B, C)) < 300
-        @test map!(+, X, A, B, C) == sparse(map!(+, fX, fA, fB, fC))
-        fX = map(*, fA, fB, fC); X = sparse(fX)
-        map!(*, X, A, B, C); X = sparse(fX) # warmup for @allocated
-        @test (@allocated map!(*, X, A, B, C)) < 300
-        @test map!(*, X, A, B, C) == sparse(map!(*, fX, fA, fB, fC))
-        @test map!(f, X, A, B, C) == sparse(map!(f, fX, fA, fB, fC))
-        @test_throws DimensionMismatch map!(f, X, A, B, spzeros((shapeA .- 1)...))
-    end
-end
-
-@testset "broadcast! implementation specialized for solely an output sparse vector/matrix (no inputs)" begin
-    N, M, p = 10, 12, 0.4
-    V, C = sprand(N, p), sprand(N, M, p)
-    fV, fC = Array(V), Array(C)
-    @test broadcast!(() -> 0, V) == sparse(broadcast!(() -> 0, fV))
-    @test broadcast!(() -> 0, C) == sparse(broadcast!(() -> 0, fC))
-    @test let z = 0, fz = 0; broadcast!(() -> z += 1, V) == broadcast!(() -> fz += 1, fV); end
-    @test let z = 0, fz = 0; broadcast!(() -> z += 1, C) == broadcast!(() -> fz += 1, fC); end
-end
-
-@testset "broadcast implementation specialized for a single (input) sparse vector/matrix" begin
-    # broadcast for a single (input) sparse vector/matrix falls back to map, tested
-    # extensively above. here we simply lightly exercise the relevant broadcast entry
-    # point.
-    N, M, p = 10, 12, 0.4
-    a, A = sprand(N, p), sprand(N, M, p)
-    fa, fA = Array(a), Array(A)
-    @test broadcast(sin, a) == sparse(broadcast(sin, fa))
-    @test broadcast(sin, A) == sparse(broadcast(sin, fA))
-    # also test the typed broadcast
-    @test broadcast(convert, Float32, A) == sparse(broadcast(convert, Float32, fA))
-end
-
-@testset "broadcast! implementation specialized for a single (input) sparse vector/matrix" begin
-    N, M, p = 10, 12, 0.3
-    f(x, y) = x + y + 1
-    mats = (sprand(N, M, p), sprand(N, 1, p), sprand(1, M, p), sprand(1, 1, 1.0), spzeros(1, 1))
-    vecs = (sprand(N, p), sprand(1, 1.0), spzeros(1))
-    # --> test with matrix destination (Z/fZ)
-    fZ = Array(first(mats))
-    for Xo in (mats..., vecs...)
-        X = ndims(Xo) == 1 ? SparseVector{Float32,Int32}(Xo) : SparseMatrixCSC{Float32,Int32}(Xo)
-        shapeX, fX = size(X), Array(X)
-        # --> test broadcast! entry point / zero-preserving op
-        broadcast!(sin, fZ, fX); Z = sparse(fZ)
-        broadcast!(sin, Z, X); Z = sparse(fZ) # warmup for @allocated
-        @test (@allocated broadcast!(sin, Z, X)) < 300
-        @test broadcast!(sin, Z, X) == sparse(broadcast!(sin, fZ, fX))
-        # --> test broadcast! entry point / not-zero-preserving op
-        broadcast!(cos, fZ, fX); Z = sparse(fZ)
-        broadcast!(cos, Z, X); Z = sparse(fZ) # warmup for @allocated
-        @test (@allocated broadcast!(cos, Z, X)) < 300
-        @test broadcast!(cos, Z, X) == sparse(broadcast!(cos, fZ, fX))
-        # --> test shape checks for broadcast! entry point
-        # TODO strengthen this test, avoiding dependence on checking whether
-        # check_broadcast_axes throws to determine whether sparse broadcast should throw
-        try
-            Base.Broadcast.check_broadcast_axes(axes(Z), spzeros((shapeX .- 1)...))
-        catch
-            @test_throws DimensionMismatch broadcast!(sin, Z, spzeros((shapeX .- 1)...))
-        end
-    end
-    # --> test with vector destination (V/fV)
-    fV = Array(first(vecs))
-    for Xo in vecs # vector target
-        X = SparseVector{Float32,Int32}(Xo)
-        shapeX, fX = size(X), Array(X)
-        # --> test broadcast! entry point / zero-preserving op
-        broadcast!(sin, fV, fX); V = sparse(fV)
-        broadcast!(sin, V, X); V = sparse(fV) # warmup for @allocated
-        @test (@allocated broadcast!(sin, V, X)) < 300
-        @test broadcast!(sin, V, X) == sparse(broadcast!(sin, fV, fX))
-        # --> test broadcast! entry point / not-zero-preserving
-        broadcast!(cos, fV, fX); V = sparse(fV)
-        broadcast!(cos, V, X); V = sparse(fV) # warmup for @allocated
-        @test (@allocated broadcast!(cos, V, X)) < 300
-        @test broadcast!(cos, V, X) == sparse(broadcast!(cos, fV, fX))
-        # --> test shape checks for broadcast! entry point
-        # TODO strengthen this test, avoiding dependence on checking whether
-        # check_broadcast_axes throws to determine whether sparse broadcast should throw
-        try
-            Base.Broadcast.check_broadcast_axes(axes(V), spzeros((shapeX .- 1)...))
-        catch
-            @test_throws DimensionMismatch broadcast!(sin, V, spzeros((shapeX .- 1)...))
-        end
-    end
-    # Tests specific to #19895, i.e. for broadcast!(identity, C, A) specializations
-    Z = copy(first(mats)); fZ = Array(Z)
-    V = copy(first(vecs)); fV = Array(V)
-    for X in (mats..., vecs...)
-        @test broadcast!(identity, Z, X) == sparse(broadcast!(identity, fZ, Array(X)))
-        X isa SparseVector && @test broadcast!(identity, V, X) == sparse(broadcast!(identity, fV, Array(X)))
-    end
-end
-
-@testset "broadcast[!] implementation specialized for pairs of (input) sparse vectors/matrices" begin
-    N, M, p = 10, 12, 0.3
-    f(x, y) = x + y + 1
-    mats = (sprand(N, M, p), sprand(N, 1, p), sprand(1, M, p), sprand(1, 1, 1.0), spzeros(1, 1))
-    vecs = (sprand(N, p), sprand(1, 1.0), spzeros(1))
-    tens = (mats..., vecs...)
-    fZ = Array(first(mats))
-    for Xo in tens
-        X = ndims(Xo) == 1 ? SparseVector{Float32,Int32}(Xo) : SparseMatrixCSC{Float32,Int32}(Xo)
-        # use different types to check internal type stability via allocation tests below
-        shapeX, fX = size(X), Array(X)
-        for Y in tens
-            fY = Array(Y)
-            # --> test broadcast entry point
-            @test broadcast(+, X, Y) == sparse(broadcast(+, fX, fY))
-            @test broadcast(-, X, Y) == sparse(broadcast(-, fX, fY))
-            @test broadcast(*, X, Y) == sparse(broadcast(*, fX, fY))
-            @test broadcast(f, X, Y) == sparse(broadcast(f, fX, fY))
-            # TODO strengthen this test, avoiding dependence on checking whether
-            # check_broadcast_axes throws to determine whether sparse broadcast should throw
-            try
-                Base.Broadcast.combine_axes(spzeros((shapeX .- 1)...), Y)
-            catch
-                @test_throws DimensionMismatch broadcast(+, spzeros((shapeX .- 1)...), Y)
-            end
-            # --> test broadcast! entry point / +-like zero-preserving op
-            broadcast!(+, fZ, fX, fY); Z = sparse(fZ)
-            broadcast!(+, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
-            @test (@allocated broadcast!(+, Z, X, Y)) < 300
-            @test broadcast!(+, Z, X, Y) == sparse(broadcast!(+, fZ, fX, fY))
-            # --> test broadcast! entry point / *-like zero-preserving op
-            broadcast!(*, fZ, fX, fY); Z = sparse(fZ)
-            broadcast!(*, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
-            @test (@allocated broadcast!(*, Z, X, Y)) < 300
-            @test broadcast!(*, Z, X, Y) == sparse(broadcast!(*, fZ, fX, fY))
-            # --> test broadcast! entry point / not zero-preserving op
-            broadcast!(f, fZ, fX, fY); Z = sparse(fZ)
-            broadcast!(f, Z, X, Y); Z = sparse(fZ) # warmup for @allocated
-            @test (@allocated broadcast!(f, Z, X, Y)) < 300
-            @test broadcast!(f, Z, X, Y) == sparse(broadcast!(f, fZ, fX, fY))
-            # --> test shape checks for both broadcast and broadcast! entry points
-            # TODO strengthen this test, avoiding dependence on checking whether
-            # check_broadcast_axes throws to determine whether sparse broadcast should throw
-            try
-                Base.Broadcast.check_broadcast_axes(axes(Z), spzeros((shapeX .- 1)...), Y)
-            catch
-                @test_throws DimensionMismatch broadcast!(f, Z, spzeros((shapeX .- 1)...), Y)
-            end
-        end
-    end
-
-    # fix#23857
-    @test sparse([1; 0]) ./ [1] == sparse([1.0; 0.0])
-    @test isequal(sparse([1 2; 1 0]) ./ [1; 0], sparse([1.0 2; Inf NaN]))
-    @test sparse([1  0]) ./ [1] == sparse([1.0 0.0])
-    @test isequal(sparse([1 2; 1 0]) ./ [1 0], sparse([1.0 Inf; 1 NaN]))
-
-    @test sparse([1]) .\ sparse([1; 0]) == sparse([1.0; 0.0])
-    @test isequal(sparse([1; 0]) .\ sparse([1 2; 1 0]), sparse([1.0 2; Inf NaN]))
-    @test sparse([1]) .\ sparse([1  0]) == sparse([1.0 0.0])
-    @test isequal(sparse([1 0]) .\ sparse([1 2; 1 0]), sparse([1.0 Inf; 1 NaN]))
-
-end
-
-
-@testset "broadcast[!] implementation capable of handling >2 (input) sparse vectors/matrices" begin
-    N, M, p = 10, 12, 0.3
-    f(x, y, z) = x + y + z + 1
-    mats = (sprand(N, M, p), sprand(N, 1, p), sprand(1, M, p), sprand(1, 1, 1.0), spzeros(1, 1))
-    vecs = (sprand(N, p), sprand(1, 1.0), spzeros(1))
-    tens = (mats..., vecs...)
-    for Xo in tens
-        X = ndims(Xo) == 1 ? SparseVector{Float32,Int32}(Xo) : SparseMatrixCSC{Float32,Int32}(Xo)
-        # use different types to check internal type stability via allocation tests below
-        shapeX, fX = size(X), Array(X)
-        for Y in tens, Z in tens
-            fY, fZ = Array(Y), Array(Z)
-            # --> test broadcast entry point
-            @test broadcast(+, X, Y, Z) == sparse(broadcast(+, fX, fY, fZ))
-            @test broadcast(*, X, Y, Z) == sparse(broadcast(*, fX, fY, fZ))
-            @test broadcast(f, X, Y, Z) == sparse(broadcast(f, fX, fY, fZ))
-            # TODO strengthen this test, avoiding dependence on checking whether
-            # check_broadcast_axes throws to determine whether sparse broadcast should throw
-            try
-                Base.Broadcast.combine_axes(spzeros((shapeX .- 1)...), Y, Z)
-            catch
-                @test_throws DimensionMismatch broadcast(+, spzeros((shapeX .- 1)...), Y, Z)
-            end
-            # --> test broadcast! entry point / +-like zero-preserving op
-            fQ = broadcast(+, fX, fY, fZ); Q = sparse(fQ)
-            broadcast!(+, Q, X, Y, Z); Q = sparse(fQ) # warmup for @allocated
-            @test (@allocated broadcast!(+, Q, X, Y, Z)) < 300
-            @test broadcast!(+, Q, X, Y, Z) == sparse(broadcast!(+, fQ, fX, fY, fZ))
-            # --> test broadcast! entry point / *-like zero-preserving op
-            fQ = broadcast(*, fX, fY, fZ); Q = sparse(fQ)
-            broadcast!(*, Q, X, Y, Z); Q = sparse(fQ) # warmup for @allocated
-            @test (@allocated broadcast!(*, Q, X, Y, Z)) < 300
-            @test broadcast!(*, Q, X, Y, Z) == sparse(broadcast!(*, fQ, fX, fY, fZ))
-            # --> test broadcast! entry point / not zero-preserving op
-            fQ = broadcast(f, fX, fY, fZ); Q = sparse(fQ)
-            broadcast!(f, Q, X, Y, Z); Q = sparse(fQ) # warmup for @allocated
-            @test (@allocated broadcast!(f, Q, X, Y, Z)) < 300
-            @test broadcast!(f, Q, X, Y, Z) == sparse(broadcast!(f, fQ, fX, fY, fZ))
-            # --> test shape checks for both broadcast and broadcast! entry points
-            # TODO strengthen this test, avoiding dependence on checking whether
-            # check_broadcast_axes throws to determine whether sparse broadcast should throw
-            try
-                Base.Broadcast.check_broadcast_axes(axes(Q), spzeros((shapeX .- 1)...), Y, Z)
-            catch
-                @test_throws DimensionMismatch broadcast!(f, Q, spzeros((shapeX .- 1)...), Y, Z)
-            end
-        end
-    end
-end
-
-@testset "sparse map/broadcast with result eltype not a concrete subtype of Number (#19561/#19589)" begin
-    N = 4
-    A, fA = sparse(1.0I, N, N), Matrix(1.0I, N, N)
-    B, fB = spzeros(1, N), zeros(1, N)
-    intorfloat_zeropres(xs...) = all(iszero, xs) ? zero(Float64) : Int(1)
-    intorfloat_notzeropres(xs...) = all(iszero, xs) ? Int(1) : zero(Float64)
-    for fn in (intorfloat_zeropres, intorfloat_notzeropres)
-        @test map(fn, A) == sparse(map(fn, fA))
-        @test broadcast(fn, A) == sparse(broadcast(fn, fA))
-        @test broadcast(fn, A, B) == sparse(broadcast(fn, fA, fB))
-        @test broadcast(fn, B, A) == sparse(broadcast(fn, fB, fA))
-    end
-    for fn in (intorfloat_zeropres,)
-        @test broadcast(fn, A, B, A) == sparse(broadcast(fn, fA, fB, fA))
-    end
-end
-
-@testset "broadcast[!] over combinations of scalars and sparse vectors/matrices" begin
-    N, M, p = 10, 12, 0.5
-    elT = Float64
-    s = Float32(2.0)
-    V = sprand(elT, N, p)
-    Vᵀ = transpose(sprand(elT, 1, N, p))
-    A = sprand(elT, N, M, p)
-    Aᵀ = transpose(sprand(elT, M, N, p))
-    fV, fA, fVᵀ, fAᵀ = Array(V), Array(A), Array(Vᵀ), Array(Aᵀ)
-    # test combinations involving one to three scalars and one to five sparse vectors/matrices
-    spargseq, dargseq = Iterators.cycle((A, V, Aᵀ, Vᵀ)), Iterators.cycle((fA, fV, fAᵀ, fVᵀ))
-    for nargs in 1:5 # number of tensor arguments
-        nargsl = cld(nargs, 2) # number in "left half" of tensor arguments
-        nargsr = fld(nargs, 2) # number in "right half" of tensor arguments
-        spargsl = tuple(Iterators.take(spargseq, nargsl)...) # "left half" of tensor args
-        spargsr = tuple(Iterators.take(spargseq, nargsr)...) # "right half" of tensor args
-        dargsl = tuple(Iterators.take(dargseq, nargsl)...) # "left half" of tensor args, densified
-        dargsr = tuple(Iterators.take(dargseq, nargsr)...) # "right half" of tensor args, densified
-        for (sparseargs, denseargs) in ( # argument combinations including scalars
-                # a few combinations involving one scalar
-                ((s, spargsl..., spargsr...), (s, dargsl..., dargsr...)),
-                ((spargsl..., s, spargsr...), (dargsl..., s, dargsr...)),
-                ((spargsl..., spargsr..., s), (dargsl..., dargsr..., s)),
-                # a few combinations involving two scalars
-                ((s, spargsl..., s, spargsr...), (s, dargsl..., s, dargsr...)),
-                ((s, spargsl..., spargsr..., s), (s, dargsl..., dargsr..., s)),
-                ((spargsl..., s, spargsr..., s), (dargsl..., s, dargsr..., s)),
-                ((s, s, spargsl..., spargsr...), (s, s, dargsl..., dargsr...)),
-                ((spargsl..., s, s, spargsr...), (dargsl..., s, s, dargsr...)),
-                ((spargsl..., spargsr..., s, s), (dargsl..., dargsr..., s, s)),
-                # a few combinations involving three scalars
-                ((s, spargsl..., s, spargsr..., s), (s, dargsl..., s, dargsr..., s)),
-                ((s, spargsl..., s, s, spargsr...), (s, dargsl..., s, s, dargsr...)),
-                ((spargsl..., s, s, spargsr..., s), (dargsl..., s, s, dargsr..., s)),
-                ((spargsl..., s, s, s, spargsr...), (dargsl..., s, s, s, dargsr...)), )
-            # test broadcast entry point
-            @test broadcast(*, sparseargs...) == sparse(broadcast(*, denseargs...))
-            @test isa(@inferred(broadcast(*, sparseargs...)), SparseMatrixCSC{elT})
-            # test broadcast! entry point
-            fX = broadcast(*, sparseargs...); X = sparse(fX)
-            @test broadcast!(*, X, sparseargs...) == sparse(broadcast!(*, fX, denseargs...))
-            @test isa(@inferred(broadcast!(*, X, sparseargs...)), SparseMatrixCSC{elT})
-            X = sparse(fX) # reset / warmup for @allocated test
-            # And broadcasting over Transposes currently requires making a CSC copy, so we must account for that in the bounds
-            @test (@allocated broadcast!(*, X, sparseargs...)) <= (sum(x->isa(x, Transpose) ? @allocated(SparseMatrixCSC(x)) + 128 : 0, sparseargs) + 128 + 900) # about zero to 3k bytes
-        end
-    end
-    # test combinations at the limit of inference (eight arguments net)
-    for (sparseargs, denseargs) in (
-            ((s, s, s, A, s, s, s, s), (s, s, s, fA, s, s, s, s)), # seven scalars, one sparse matrix
-            ((s, s, V, s, s, A, s, s), (s, s, fV, s, s, fA, s, s)), # six scalars, two sparse vectors/matrices
-            ((s, s, V, s, A, s, V, s), (s, s, fV, s, fA, s, fV, s)), # five scalars, three sparse vectors/matrices
-            ((s, V, s, A, s, V, s, A), (s, fV, s, fA, s, fV, s, fA)), # four scalars, four sparse vectors/matrices
-            ((s, V, A, s, V, A, s, A), (s, fV, fA, s, fV, fA, s, fA)), # three scalars, five sparse vectors/matrices
-            ((V, A, V, s, A, V, A, s), (fV, fA, fV, s, fA, fV, fA, s)), # two scalars, six sparse vectors/matrices
-            ((V, A, V, A, s, V, A, V), (fV, fA, fV, fA, s, fV, fA, fV)) ) # one scalar, seven sparse vectors/matrices
-        # test broadcast entry point
-        @test broadcast(*, sparseargs...) == sparse(broadcast(*, denseargs...))
-        @test isa(@inferred(broadcast(*, sparseargs...)), SparseMatrixCSC{elT})
-        # test broadcast! entry point
-        fX = broadcast(*, sparseargs...); X = sparse(fX)
-        @test broadcast!(*, X, sparseargs...) == sparse(broadcast!(*, fX, denseargs...))
-        @test isa(@inferred(broadcast!(*, X, sparseargs...)), SparseMatrixCSC{elT})
-        X = sparse(fX) # reset / warmup for @allocated test
-        @test (@allocated broadcast!(*, X, sparseargs...)) <= 900
-    end
-end
-
-@testset "broadcast[!] over combinations of scalars, sparse arrays, structured matrices, and dense vectors/matrices" begin
-    N, p = 10, 0.4
-    s = rand()
-    V = sprand(N, p)
-    A = sprand(N, N, p)
-    Z = copy(A)
-    sparsearrays = (V, A)
-    fV, fA = map(Array, sparsearrays)
-    D = Diagonal(rand(N))
-    B = Bidiagonal(rand(N), rand(N - 1), :U)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    S = SymTridiagonal(rand(N), rand(N - 1))
-    structuredarrays = (D, B, T, S)
-    fstructuredarrays = map(Array, structuredarrays)
-    for (X, fX) in zip(structuredarrays, fstructuredarrays)
-        @test (Q = broadcast(+, V, A, X); Q isa SparseMatrixCSC && Q == sparse(broadcast(+, fV, fA, fX)))
-        @test broadcast!(+, Z, V, A, X) == sparse(broadcast(+, fV, fA, fX))
-        @test (Q = broadcast(*, s, V, A, X); Q isa SparseMatrixCSC && Q == sparse(broadcast(*, s, fV, fA, fX)))
-        @test broadcast!(*, Z, s, V, A, X) == sparse(broadcast(*, s, fV, fA, fX))
-        for (Y, fY) in zip(structuredarrays, fstructuredarrays)
-            @test broadcast!(+, Z, X, Y) == sparse(broadcast(+, fX, fY))
-            @test broadcast!(*, Z, X, Y) == sparse(broadcast(*, fX, fY))
-        end
-    end
-    C = Array(sprand(N, 0.4))
-    M = Array(sprand(N, N, 0.4))
-    densearrays = (C, M)
-    fD, fB = Array(D), Array(B)
-    for X in densearrays
-        @test broadcast!(+, Z, D, X) == sparse(broadcast(+, fD, X))
-        @test broadcast!(*, Z, s, B, X) == sparse(broadcast(*, s, fB, X))
-        @test broadcast(+, V, B, X)::SparseMatrixCSC == sparse(broadcast(+, fV, fB, X))
-        @test broadcast!(+, Z, V, B, X) == sparse(broadcast(+, fV, fB, X))
-        @test broadcast(+, V, A, X)::SparseMatrixCSC == sparse(broadcast(+, fV, fA, X))
-        @test broadcast!(+, Z, V, A, X) == sparse(broadcast(+, fV, fA, X))
-        @test broadcast(*, s, V, A, X)::SparseMatrixCSC == sparse(broadcast(*, s, fV, fA, X))
-        @test broadcast!(*, Z, s, V, A, X) == sparse(broadcast(*, s, fV, fA, X))
-        # Issue #20954 combinations of sparse arrays and Adjoint/Transpose vectors
-        if X isa Vector
-            @test broadcast(+, A, X')::SparseMatrixCSC == sparse(broadcast(+, fA, X'))
-            @test broadcast(*, V, X')::SparseMatrixCSC == sparse(broadcast(*, fV, X'))
-        end
-    end
-    @test V .+ ntuple(identity, N) isa Vector
-    @test A .+ ntuple(identity, N) isa Matrix
-end
-
-@testset "map[!] over combinations of sparse and structured matrices" begin
-    N, p = 10, 0.4
-    A = sprand(N, N, p)
-    Z, fA = copy(A), Array(A)
-    D = Diagonal(rand(N))
-    B = Bidiagonal(rand(N), rand(N - 1), :U)
-    T = Tridiagonal(rand(N - 1), rand(N), rand(N - 1))
-    S = SymTridiagonal(rand(N), rand(N - 1))
-    structuredarrays = (D, B, T, S)
-    fstructuredarrays = map(Array, structuredarrays)
-    for (X, fX) in zip(structuredarrays, fstructuredarrays)
-        @test map!(sin, Z, X) == sparse(map(sin, fX))
-        @test map!(cos, Z, X) == sparse(map(cos, fX))
-        @test (Q = map(+, A, X); Q isa SparseMatrixCSC && Q == sparse(map(+, fA, fX)))
-        @test map!(+, Z, A, X) == sparse(map(+, fA, fX))
-        for (Y, fY) in zip(structuredarrays, fstructuredarrays)
-            @test map!(+, Z, X, Y) == sparse(map(+, fX, fY))
-            @test map!(*, Z, X, Y) == sparse(map(*, fX, fY))
-            @test (Q = map(+, X, A, Y); Q isa SparseMatrixCSC && Q == sparse(map(+, fX, fA, fY)))
-            @test map!(+, Z, X, A, Y) == sparse(map(+, fX, fA, fY))
-        end
-    end
-end
-
-# Older tests of sparse broadcast, now largely covered by the tests above
-@testset "assorted tests of sparse broadcast over two input arguments" begin
-    N, p = 10, 0.3
-    A, B, CF = sprand(N, N, p), sprand(N, N, p), rand(N, N)
-    AF, BF, C = Array(A), Array(B), sparse(CF)
-
-    @test A .* B == AF .* BF
-    @test A[1,:] .* B == AF[1,:] .* BF
-    @test A[:,1] .* B == AF[:,1] .* BF
-    @test A .* B[1,:] == AF .*  BF[1,:]
-    @test A .* B[:,1] == AF .*  BF[:,1]
-
-    @test A .* B == AF .* BF
-    @test A[1,:] .* BF == AF[1,:] .* BF
-    @test A[:,1] .* BF == AF[:,1] .* BF
-    @test A .* BF[1,:] == AF .*  BF[1,:]
-    @test A .* BF[:,1] == AF .*  BF[:,1]
-
-    @test A .* B == AF .* BF
-    @test AF[1,:] .* B == AF[1,:] .* BF
-    @test AF[:,1] .* B == AF[:,1] .* BF
-    @test AF .* B[1,:] == AF .*  BF[1,:]
-    @test AF .* B[:,1] == AF .*  BF[:,1]
-
-    @test A .* B == AF .* BF
-    @test A[1,:] .* B == AF[1,:] .* BF
-    @test A[:,1] .* B == AF[:,1] .* BF
-    @test A .* B[1,:] == AF .*  BF[1,:]
-    @test A .* B[:,1] == AF .*  BF[:,1]
-
-    @test A .* 3 == AF .* 3
-    @test 3 .* A == 3 .* AF
-    @test A[1,:] .* 3 == AF[1,:] .* 3
-    @test A[:,1] .* 3 == AF[:,1] .* 3
-
-    @test A .- 3 == AF .- 3
-    @test 3 .- A == 3 .- AF
-    @test A .- B == AF .- BF
-    @test A - AF == zeros(size(AF))
-    @test AF - A == zeros(size(AF))
-    @test A[1,:] .- B == AF[1,:] .- BF
-    @test A[:,1] .- B == AF[:,1] .- BF
-    @test A .- B[1,:] == AF .-  BF[1,:]
-    @test A .- B[:,1] == AF .-  BF[:,1]
-
-    @test A .+ 3 == AF .+ 3
-    @test 3 .+ A == 3 .+ AF
-    @test A .+ B == AF .+ BF
-    @test A + AF == AF + A
-    @test (A .< B) == (AF .< BF)
-    @test (A .!= B) == (AF .!= BF)
-
-    @test A ./ 3 == AF ./ 3
-    @test A .\ 3 == AF .\ 3
-    @test 3 ./ A == 3 ./ AF
-    @test 3 .\ A == 3 .\ AF
-    @test A .\ C == AF .\ CF
-    @test A ./ C == AF ./ CF
-    @test A ./ CF[:,1] == AF ./ CF[:,1]
-    @test A .\ CF[:,1] == AF .\ CF[:,1]
-    @test BF ./ C == BF ./ CF
-    @test BF .\ C == BF .\ CF
-
-    @test A .^ 3 == AF .^ 3
-    @test 3 .^ A == 3 .^ AF
-    @test A .^ BF[:,1] == AF .^ BF[:,1]
-    @test BF[:,1] .^ A == BF[:,1] .^ AF
-
-    @test spzeros(0,0)  + spzeros(0,0) == zeros(0,0)
-    @test spzeros(0,0)  * spzeros(0,0) == zeros(0,0)
-    @test spzeros(1,0) .+ spzeros(2,1) == zeros(2,0)
-    @test spzeros(1,0) .* spzeros(2,1) == zeros(2,0)
-    @test spzeros(1,2) .+ spzeros(0,1) == zeros(0,2)
-    @test spzeros(1,2) .* spzeros(0,1) == zeros(0,2)
-end
-
-@testset "sparse vector broadcast of two arguments" begin
-    sv1, sv5 = sprand(1, 1.), sprand(5, 1.)
-    for (sa, sb) in ((sv1, sv1), (sv1, sv5), (sv5, sv1), (sv5, sv5))
-        fa, fb = Vector(sa), Vector(sb)
-        for f in (+, -, *, min, max)
-            @test @inferred(broadcast(f, sa, sb))::SparseVector == broadcast(f, fa, fb)
-            @test @inferred(broadcast(f, Vector(sa), sb))::SparseVector == broadcast(f, fa, fb)
-            @test @inferred(broadcast(f, sa, Vector(sb)))::SparseVector == broadcast(f, fa, fb)
-            @test @inferred(broadcast(f, SparseMatrixCSC(sa), sb))::SparseMatrixCSC == broadcast(f, reshape(fa, Val(2)), fb)
-            @test @inferred(broadcast(f, sa, SparseMatrixCSC(sb)))::SparseMatrixCSC == broadcast(f, fa, reshape(fb, Val(2)))
-            if length(fa) == length(fb)
-                @test @inferred(map(f, sa, sb))::SparseVector == broadcast(f, fa, fb)
-            end
-        end
-        if length(fa) == length(fb)
-            for f in (+, -)
-                @test @inferred(f(sa, sb))::SparseVector == f(fa, fb)
-                @test @inferred(f(Vector(sa), sb))::SparseVector == f(fa, fb)
-                @test @inferred(f(sa, Vector(sb)))::SparseVector == f(fa, fb)
-            end
-        end
-    end
-end
-
-@testset "aliasing and indexed assignment or broadcast!" begin
-    A = sparsevec([0, 0, 1, 1])
-    B = sparsevec([1, 1, 0, 0])
-    A .+= B
-    @test A == sparse([1,1,1,1])
-
-    A = sprandn(10, 10, 0.1)
-    fA = Array(A)
-    b = randn(10);
-    broadcast!(/, A, A, b)
-    @test A == fA ./ Array(b)
-
-    a = sparse([1,3,5])
-    b = sparse([3,1,2])
-    a[b] = a
-    @test a == [3,5,1]
-    a = sparse([3,2,1])
-    a[a] = [4,5,6]
-    @test a == [6,5,4]
-
-    A = sparse([1,2,3,4])
-    V = view(A, A)
-    @test V == A
-    V[1] = 2
-    @test V == A == [2,2,3,4]
-    V[1] = 2^30
-    @test V == A == [2^30, 2, 3, 4]
-
-    A = sparse([2,1,4,3])
-    V = view(A, :)
-    A[V] = (1:4) .+ 2^30
-    @test A == [2,1,4,3] .+ 2^30
-
-    A = sparse([2,1,4,3])
-    R = reshape(view(A, :), 2, 2)
-    A[R] = (1:4) .+ 2^30
-    @test A == [2,1,4,3] .+ 2^30
-
-    A = sparse([2,1,4,3])
-    R = reshape(A, 2, 2)
-    A[R] = (1:4) .+ 2^30
-    @test A == [2,1,4,3] .+ 2^30
-
-    # And broadcasting
-    a = sparse([1,3,5])
-    b = sparse([3,1,2])
-    a[b] .= a
-    @test a == [3,5,1]
-    a = sparse([3,2,1])
-    a[a] .= [4,5,6]
-    @test a == [6,5,4]
-
-    A = sparse([2,1,4,3])
-    V = view(A, :)
-    A[V] .= (1:4) .+ 2^30
-    @test A == [2,1,4,3] .+ 2^30
-
-    A = sparse([2,1,4,3])
-    R = reshape(view(A, :), 2, 2)
-    A[R] .= reshape((1:4) .+ 2^30, 2, 2)
-    @test A == [2,1,4,3] .+ 2^30
-
-    A = sparse([2,1,4,3])
-    R = reshape(A, 2, 2)
-    A[R] .= reshape((1:4) .+ 2^30, 2, 2)
-    @test A == [2,1,4,3] .+ 2^30
-end
-
-@testset "1-dimensional 'opt-out' (non) sparse broadcasting" begin
-    # SparseArrays intentionally only promotes to sparse for limited array types
-    # More support may be added in the future, but for now let's make sure that
-    # broadcast still performs as expected (issue #26977)
-    A = spzeros(5)
-    @test A .+ (1:5) == 1:5
-    @test A .* 2 .+ view(collect(1:10), 1:5) == 1:5
-    @test 2 .* A .+ view(1:10, 1:5) == 1:5
-    @test (A .+ (1:5)) .* 2 == 2:2:10
-    @test ((1:5) .+ A) .* 2 == 2:2:10
-    @test 2 .* ((1:5) .+ A) == 2:2:10
-    @test 2 .* (A .+ (1:5)) == 2:2:10
-
-    @test Diagonal(spzeros(5)) \ view(rand(10), 1:5) == [Inf,Inf,Inf,Inf,Inf]
-end
-
-@testset "Issue #27836" begin
-    @test minimum(sparse([1, 2], [1, 2], ones(Int32, 2)), dims = 1) isa Matrix
-end
-
-@testset "Issue #30118" begin
-    @test ((_, x) -> x).(Int, spzeros(3)) == spzeros(3)
-    @test ((_, _, x) -> x).(Int, Int, spzeros(3)) == spzeros(3)
-    @test ((_, _, _, x) -> x).(Int, Int, Int, spzeros(3)) == spzeros(3)
-    @test_broken ((_, _, _, _, x) -> x).(Int, Int, Int, Int, spzeros(3)) == spzeros(3)
-end
-
-using SparseArrays.HigherOrderFns: SparseVecStyle, SparseMatStyle
-
-@testset "Issue #30120: method ambiguity" begin
-    # HigherOrderFns._copy(f) was ambiguous.  It may be impossible to
-    # invoke this from dot notation and it is an error anyway.  But
-    # when someone invokes it by accident, we want it to produce a
-    # meaningful error.
-    err = try
-        copy(Broadcast.Broadcasted{SparseVecStyle}(rand, ()))
-    catch err
-        err
-    end
-    @test err isa MethodError
-    @test !occursin("is ambiguous", sprint(showerror, err))
-    @test occursin("no method matching _copy(::typeof(rand))", sprint(showerror, err))
-end
-
-@testset "Sparse outer product, for type $T and vector $op" for
-         op in (transpose, adjoint),
-         T in (Float64, ComplexF64)
-    m, n, p = 100, 250, 0.1
-    A = sprand(T, m, n, p)
-    a, b = view(A, :, 1), sprand(T, m, p)
-    av, bv = Vector(a), Vector(b)
-    v = @inferred a .* op(b)
-    w = @inferred b .* op(a)
-    @test issparse(v)
-    @test issparse(w)
-    @test v == av .* op(bv)
-    @test w == bv .* op(av)
-end
-
-@testset "issue #31758: out of bounds write in _map_zeropres!" begin
-    y = sparsevec([2,7], [1., 2.], 10)
-    x1 = sparsevec(fill(1.0, 10))
-    x2 = sparsevec([2,7], [1., 2.], 10)
-    x3 = sparsevec(fill(1.0, 10))
-    f(x, y, z) = x == y == z == 0 ? 0.0 : NaN
-    y .= f.(x1, x2, x3)
-    @test all(isnan, y)
-end
-
-@testset "Vec/Mat Style" begin
-    @test SparseVecStyle(Val(0)) == SparseVecStyle()
-    @test SparseVecStyle(Val(1)) == SparseVecStyle()
-    @test SparseVecStyle(Val(2)) == SparseMatStyle()
-    @test SparseVecStyle(Val(3)) == Broadcast.DefaultArrayStyle{3}()
-    @test SparseMatStyle(Val(0)) == SparseMatStyle()
-    @test SparseMatStyle(Val(1)) == SparseMatStyle()
-    @test SparseMatStyle(Val(2)) == SparseMatStyle()
-    @test SparseMatStyle(Val(3)) == Broadcast.DefaultArrayStyle{3}()
-end
-
-end # module
diff --git a/stdlib/SparseArrays/test/runtests.jl b/stdlib/SparseArrays/test/runtests.jl
deleted file mode 100644
index 29581313c18d5c..00000000000000
--- a/stdlib/SparseArrays/test/runtests.jl
+++ /dev/null
@@ -1,5 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-for file in readlines(joinpath(@__DIR__, "testgroups"))
-    include(file * ".jl")
-end
diff --git a/stdlib/SparseArrays/test/simplesmatrix.jl b/stdlib/SparseArrays/test/simplesmatrix.jl
deleted file mode 100644
index 04c971246ea504..00000000000000
--- a/stdlib/SparseArrays/test/simplesmatrix.jl
+++ /dev/null
@@ -1,52 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-struct SimpleSMatrix{N,M,T} <: AbstractMatrix{T}
-    m::Matrix{T}
-end
-
-SimpleSMatrix{N,M}(m::AbstractMatrix{T}) where {N,M,T} =
-    size(m) == (N, M) ? SimpleSMatrix{N,M,T}(m) : throw(error("Wrong matrix size"))
-
-Base.:*(a::SimpleSMatrix{N,O}, b::SimpleSMatrix{O,M}) where {N,O,M} =
-    SimpleSMatrix{N,M}(a.m * b.m)
-
-Base.:*(a::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix{O,N}}, b::SimpleSMatrix{O,M}) where {N,O,M} =
-    SimpleSMatrix{N,M}(adjoint(a.parent.m) * b.m)
-
-Base.:*(a::SimpleSMatrix{N,O}, b::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix{M,O}}) where {N,O,M} =
-    SimpleSMatrix{N,M}(a.m * adjoint(b.parent.m))
-
-Base.:+(a::SimpleSMatrix{N,M}, b::SimpleSMatrix{N,M}) where {N,M} =
-    SimpleSMatrix{N,M}(a.m + b.m)
-
-Base.:+(a::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix{M,N}}, b::SimpleSMatrix{N,M}) where {N,M} =
-    SimpleSMatrix{N,M}(adjoint(a.parent.m) + b.m)
-
-Base.:+(a::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix}, b::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix}) =
-    (a' + b')'
-
-Base.:+(a::SimpleSMatrix{N,M}, b::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix{M,N}}) where {N,M} =
-    SimpleSMatrix{N,M}(a.m + adjoint(b.parent.m))
-
-Base.:-(a::SimpleSMatrix{N,M}, b::SimpleSMatrix{N,M}) where {N,M} =
-    SimpleSMatrix{N,M}(a.m - b.m)
-
-Base.:-(a::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix{M,N}}, b::SimpleSMatrix{N,M}) where {N,M} =
-    SimpleSMatrix{N,M}(adjoint(a.parent.m) - b.m)
-
-Base.:-(a::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix}, b::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix}) =
-    (a' - b')'
-
-Base.:-(a::SimpleSMatrix{N,M}, b::LinearAlgebra.Adjoint{<:Any, <:SimpleSMatrix{M,N}}) where {N,M} =
-    SimpleSMatrix{N,M}(a.m - adjoint(b.parent.m))
-
-Base.size(a::SimpleSMatrix{N,M}) where {N,M} = (N, M)
-
-Base.length(a::SimpleSMatrix{N,M}) where {N,M} = N * M
-
-Base.zero(::Type{S}) where {N,M,T,S<:SimpleSMatrix{N,M,T}} = SimpleSMatrix{N,M}(zeros(T, N, M))
-
-Base.getindex(s::SimpleSMatrix, inds...) = getindex(s.m, inds...)
-
-Base.convert(::Type{S}, value::Matrix) where {N,M,T,S<:SimpleSMatrix{N,M,T}} =
-    SimpleSMatrix{N,M}(T.(value))
diff --git a/stdlib/SparseArrays/test/sparse.jl b/stdlib/SparseArrays/test/sparse.jl
deleted file mode 100644
index 79e5a98d6bc228..00000000000000
--- a/stdlib/SparseArrays/test/sparse.jl
+++ /dev/null
@@ -1,3085 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module SparseTests
-
-using Test
-using SparseArrays
-using SparseArrays: getcolptr, nonzeroinds, _show_with_braille_patterns
-using LinearAlgebra
-using Printf: @printf # for debug
-using Random
-using Test: guardseed
-using InteractiveUtils: @which
-using Dates
-include("forbidproperties.jl")
-include("simplesmatrix.jl")
-
-@testset "Issue #33169" begin
-    m21 = sparse([1, 2], [2, 2], SimpleSMatrix{2,1}.([rand(2, 1), rand(2, 1)]), 2, 2)
-    m12 = sparse([1, 2], [2, 2], SimpleSMatrix{1,2}.([rand(1, 2), rand(1, 2)]), 2, 2)
-    m22 = sparse([1, 2], [2, 2], SimpleSMatrix{2,2}.([rand(2, 2), rand(2, 2)]), 2, 2)
-    m23 = sparse([1, 2], [2, 2], SimpleSMatrix{2,3}.([rand(2, 3), rand(2, 3)]), 2, 2)
-    v12 = sparsevec([2], SimpleSMatrix{1,2}.([rand(1, 2)]))
-    v21 = sparsevec([2], SimpleSMatrix{2,1}.([rand(2, 1)]))
-    @test m22 * m21 ≈ Matrix(m22) * Matrix(m21)
-    @test m22' * m21 ≈ Matrix(m22') * Matrix(m21)
-    @test m21' * m22 ≈ Matrix(m21') * Matrix(m22)
-    @test m23' * m22 * m21 ≈ Matrix(m23') * Matrix(m22) * Matrix(m21)
-    @test m21 * v12 ≈ Matrix(m21) * Vector(v12)
-    @test m12' * v12 ≈ Matrix(m12') * Vector(v12)
-    @test v21' * m22 ≈ Vector(v21)' * Matrix(m22)
-    @test v12' * m21' ≈ Vector(v12)' * Matrix(m21)'
-    @test v21' * v21 ≈ Vector(v21)' * Vector(v21)
-    @test v21' * m22 * v21 ≈ Vector(v21)' * Matrix(m22) * Vector(v21)
-end
-
-@testset "issparse" begin
-    @test issparse(sparse(fill(1,5,5)))
-    @test !issparse(fill(1,5,5))
-    @test nnz(zero(sparse(fill(1,5,5)))) == 0
-end
-
-@testset "iszero specialization for SparseMatrixCSC" begin
-    @test !iszero(sparse(I, 3, 3))                  # test failure
-    @test iszero(spzeros(3, 3))                     # test success with no stored entries
-    S = sparse(I, 3, 3)
-    S[:] .= 0
-    @test iszero(S)  # test success with stored zeros via broadcasting
-    S = sparse(I, 3, 3)
-    fill!(S, 0)
-    @test iszero(S)  # test success with stored zeros via fill!
-    @test iszero(SparseMatrixCSC(2, 2, [1,2,3], [1,2], [0,0,1])) # test success with nonzeros beyond data range
-end
-@testset "isone specialization for SparseMatrixCSC" begin
-    @test isone(sparse(I, 3, 3))    # test success
-    @test !isone(sparse(I, 3, 4))   # test failure for non-square matrix
-    @test !isone(spzeros(3, 3))     # test failure for too few stored entries
-    @test !isone(sparse(2I, 3, 3))  # test failure for non-one diagonal entries
-    @test !isone(sparse(Bidiagonal(fill(1, 3), fill(1, 2), :U))) # test failure for non-zero off-diag entries
-end
-
-@testset "indtype" begin
-    @test SparseArrays.indtype(sparse(Int8[1,1],Int8[1,1],[1,1])) == Int8
-end
-
-@testset "spzeros de-splatting" begin
-    @test spzeros(Float64, Int64, (2, 2)) == spzeros(Float64, Int64, 2, 2)
-end
-
-@testset "conversion to AbstractMatrix/SparseMatrix of same eltype" begin
-    a = sprand(5, 5, 0.2)
-    @test AbstractMatrix{eltype(a)}(a) == a
-    @test SparseMatrixCSC{eltype(a)}(a) == a
-    @test SparseMatrixCSC{eltype(a), Int}(a) == a
-    @test SparseMatrixCSC{eltype(a)}(Array(a)) == a
-    @test Array(SparseMatrixCSC{eltype(a), Int8}(a)) == Array(a)
-end
-
-@testset "sparse matrix construction" begin
-    @test (A = fill(1.0+im,5,5); isequal(Array(sparse(A)), A))
-    @test_throws ArgumentError sparse([1,2,3], [1,2], [1,2,3], 3, 3)
-    @test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2], 3, 3)
-    @test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2,3], 0, 1)
-    @test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2,3], 1, 0)
-    @test_throws ArgumentError sparse([1,2,4], [1,2,3], [1,2,3], 3, 3)
-    @test_throws ArgumentError sparse([1,2,3], [1,2,4], [1,2,3], 3, 3)
-    @test isequal(sparse(Int[], Int[], Int[], 0, 0), SparseMatrixCSC(0, 0, Int[1], Int[], Int[]))
-    @test isequal(sparse(big.([1,1,1,2,2,3,4,5]),big.([1,2,3,2,3,3,4,5]),big.([1,2,4,3,5,6,7,8]), 6, 6),
-        SparseMatrixCSC(6, 6, big.([1,2,4,7,8,9,9]), big.([1,1,2,1,2,3,4,5]), big.([1,2,3,4,5,6,7,8])))
-    @test sparse(Any[1,2,3], Any[1,2,3], Any[1,1,1]) == sparse([1,2,3], [1,2,3], [1,1,1])
-    @test sparse(Any[1,2,3], Any[1,2,3], Any[1,1,1], 5, 4) == sparse([1,2,3], [1,2,3], [1,1,1], 5, 4)
-    # with combine
-    @test sparse([1, 1, 2, 2, 2], [1, 2, 1, 2, 2], 1.0, 2, 2, +) == sparse([1, 1, 2, 2], [1, 2, 1, 2], [1.0, 1.0, 1.0, 2.0], 2, 2)
-    @test sparse([1, 1, 2, 2, 2], [1, 2, 1, 2, 2], -1.0, 2, 2, *) == sparse([1, 1, 2, 2], [1, 2, 1, 2], [-1.0, -1.0, -1.0, 1.0], 2, 2)
-end
-
-@testset "SparseMatrixCSC construction from UniformScaling" begin
-    @test_throws ArgumentError SparseMatrixCSC(I, -1, 3)
-    @test_throws ArgumentError SparseMatrixCSC(I, 3, -1)
-    @test SparseMatrixCSC(2I, 3, 3)::SparseMatrixCSC{Int,Int} == Matrix(2I, 3, 3)
-    @test SparseMatrixCSC(2I, 3, 4)::SparseMatrixCSC{Int,Int} == Matrix(2I, 3, 4)
-    @test SparseMatrixCSC(2I, 4, 3)::SparseMatrixCSC{Int,Int} == Matrix(2I, 4, 3)
-    @test SparseMatrixCSC(2.0I, 3, 3)::SparseMatrixCSC{Float64,Int} == Matrix(2I, 3, 3)
-    @test SparseMatrixCSC{Real}(2I, 3, 3)::SparseMatrixCSC{Real,Int} == Matrix(2I, 3, 3)
-    @test SparseMatrixCSC{Float64}(2I, 3, 3)::SparseMatrixCSC{Float64,Int} == Matrix(2I, 3, 3)
-    @test SparseMatrixCSC{Float64,Int32}(2I, 3, 3)::SparseMatrixCSC{Float64,Int32} == Matrix(2I, 3, 3)
-    @test SparseMatrixCSC{Float64,Int32}(0I, 3, 3)::SparseMatrixCSC{Float64,Int32} == Matrix(0I, 3, 3)
-end
-@testset "sparse(S::UniformScaling, shape...) convenience constructors" begin
-    # we exercise these methods only lightly as these methods call the SparseMatrixCSC
-    # constructor methods well-exercised by the immediately preceding testset
-    @test sparse(2I, 3, 4)::SparseMatrixCSC{Int,Int} == Matrix(2I, 3, 4)
-    @test sparse(2I, (3, 4))::SparseMatrixCSC{Int,Int} == Matrix(2I, 3, 4)
-end
-
-se33 = SparseMatrixCSC{Float64}(I, 3, 3)
-do33 = fill(1.,3)
-
-@testset "sparse binary operations" begin
-    @test isequal(se33 * se33, se33)
-
-    @test Array(se33 + convert(SparseMatrixCSC{Float32,Int32}, se33)) == Matrix(2I, 3, 3)
-    @test Array(se33 * convert(SparseMatrixCSC{Float32,Int32}, se33)) == Matrix(I, 3, 3)
-
-    @testset "shape checks for sparse elementwise binary operations equivalent to map" begin
-        sqrfloatmat, colfloatmat = sprand(4, 4, 0.5), sprand(4, 1, 0.5)
-        @test_throws DimensionMismatch (+)(sqrfloatmat, colfloatmat)
-        @test_throws DimensionMismatch (-)(sqrfloatmat, colfloatmat)
-        @test_throws DimensionMismatch map(min, sqrfloatmat, colfloatmat)
-        @test_throws DimensionMismatch map(max, sqrfloatmat, colfloatmat)
-        sqrboolmat, colboolmat = sprand(Bool, 4, 4, 0.5), sprand(Bool, 4, 1, 0.5)
-        @test_throws DimensionMismatch map(&, sqrboolmat, colboolmat)
-        @test_throws DimensionMismatch map(|, sqrboolmat, colboolmat)
-        @test_throws DimensionMismatch map(xor, sqrboolmat, colboolmat)
-    end
-end
-
-@testset "Issue #30006" begin
-    A = SparseMatrixCSC{Float64,Int32}(spzeros(3,3))
-    A[:, 1] = [1, 2, 3]
-    @test nnz(A) == 3
-    @test nonzeros(A) == [1, 2, 3]
-end
-
-@testset "concatenation tests" begin
-    sp33 = sparse(1.0I, 3, 3)
-
-    @testset "horizontal concatenation" begin
-        @test [se33 se33] == [Array(se33) Array(se33)]
-        @test length(nonzeros([sp33 0I])) == 3
-    end
-
-    @testset "vertical concatenation" begin
-        @test [se33; se33] == [Array(se33); Array(se33)]
-        se33_32bit = convert(SparseMatrixCSC{Float32,Int32}, se33)
-        @test [se33; se33_32bit] == [Array(se33); Array(se33_32bit)]
-        @test length(nonzeros([sp33; 0I])) == 3
-    end
-
-    se44 = sparse(1.0I, 4, 4)
-    sz42 = spzeros(4, 2)
-    sz41 = spzeros(4, 1)
-    sz34 = spzeros(3, 4)
-    se77 = sparse(1.0I, 7, 7)
-    @testset "h+v concatenation" begin
-        @test [se44 sz42 sz41; sz34 se33] == se77
-        @test length(nonzeros([sp33 0I; 1I 0I])) == 6
-    end
-
-    @testset "blockdiag concatenation" begin
-        @test blockdiag(se33, se33) == sparse(1:6,1:6,fill(1.,6))
-        @test blockdiag() == spzeros(0, 0)
-        @test nnz(blockdiag()) == 0
-    end
-
-    @testset "concatenation promotion" begin
-        sz41_f32 = spzeros(Float32, 4, 1)
-        se33_i32 = sparse(Int32(1)I, 3, 3)
-        @test [se44 sz42 sz41_f32; sz34 se33_i32] == se77
-    end
-
-    @testset "mixed sparse-dense concatenation" begin
-        sz33 = spzeros(3, 3)
-        de33 = Matrix(1.0I, 3, 3)
-        @test [se33 de33; sz33 se33] == Array([se33 se33; sz33 se33 ])
-    end
-
-    # check splicing + concatenation on random instances, with nested vcat and also side-checks sparse ref
-    @testset "splicing + concatenation on random instances" begin
-        for i = 1 : 10
-            a = sprand(5, 4, 0.5)
-            @test [a[1:2,1:2] a[1:2,3:4]; a[3:5,1] [a[3:4,2:4]; a[5:5,2:4]]] == a
-        end
-    end
-end
-
-let
-    a116 = copy(reshape(1:16, 4, 4))
-    s116 = sparse(a116)
-
-    @testset "sparse ref" begin
-        p = [4, 1, 2, 3, 2]
-        @test Array(s116[p,:]) == a116[p,:]
-        @test Array(s116[:,p]) == a116[:,p]
-        @test Array(s116[p,p]) == a116[p,p]
-    end
-
-    @testset "sparse assignment" begin
-        p = [4, 1, 3]
-        a116[p, p] .= -1
-        s116[p, p] .= -1
-        @test a116 == s116
-
-        p = [2, 1, 4]
-        a116[p, p] = reshape(1:9, 3, 3)
-        s116[p, p] = reshape(1:9, 3, 3)
-        @test a116 == s116
-    end
-end
-
-@testset "dropdims" begin
-    for i = 1:5
-        am = sprand(20, 1, 0.2)
-        av = dropdims(am, dims=2)
-        @test ndims(av) == 1
-        @test all(av.==am)
-        am = sprand(1, 20, 0.2)
-        av = dropdims(am, dims=1)
-        @test ndims(av) == 1
-        @test all(av' .== am)
-    end
-end
-
-@testset "Issue #28963" begin
-    @test_throws DimensionMismatch (spzeros(10,10)[:, :] = sprand(10,20,0.5))
-end
-
-@testset "matrix-vector multiplication (non-square)" begin
-    for i = 1:5
-        a = sprand(10, 5, 0.5)
-        b = rand(5)
-        @test maximum(abs.(a*b - Array(a)*b)) < 100*eps()
-    end
-end
-
-@testset "sparse matrix * BitArray" begin
-    A = sprand(5,5,0.2)
-    B = trues(5)
-    @test A*B ≈ Array(A)*B
-    B = trues(5,5)
-    @test A*B ≈ Array(A)*B
-    @test B*A ≈ B*Array(A)
-end
-
-@testset "complex matrix-vector multiplication and left-division" begin
-    if Base.USE_GPL_LIBS
-    for i = 1:5
-        a = I + 0.1*sprandn(5, 5, 0.2)
-        b = randn(5,3) + im*randn(5,3)
-        c = randn(5) + im*randn(5)
-        d = randn(5) + im*randn(5)
-        α = rand(ComplexF64)
-        β = rand(ComplexF64)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(mul!(similar(b), a, b) - Array(a)*b)) < 100*eps()) # for compatibility with present matmul API. Should go away eventually.
-        @test (maximum(abs.(mul!(similar(c), a, c) - Array(a)*c)) < 100*eps()) # for compatibility with present matmul API. Should go away eventually.
-        @test (maximum(abs.(mul!(similar(b), transpose(a), b) - transpose(Array(a))*b)) < 100*eps()) # for compatibility with present matmul API. Should go away eventually.
-        @test (maximum(abs.(mul!(similar(c), transpose(a), c) - transpose(Array(a))*c)) < 100*eps()) # for compatibility with present matmul API. Should go away eventually.
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-        @test (maximum(abs.((a'*c + d) - (Array(a)'*c + d))) < 1000*eps())
-        @test (maximum(abs.((α*transpose(a)*c + β*d) - (α*transpose(Array(a))*c + β*d))) < 1000*eps())
-        @test (maximum(abs.((transpose(a)*c + d) - (transpose(Array(a))*c + d))) < 1000*eps())
-        c = randn(6) + im*randn(6)
-        @test_throws DimensionMismatch α*transpose(a)*c + β*c
-        @test_throws DimensionMismatch α*transpose(a)*fill(1.,5) + β*c
-
-        a = I + 0.1*sprandn(5, 5, 0.2) + 0.1*im*sprandn(5, 5, 0.2)
-        b = randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        a = I + tril(0.1*sprandn(5, 5, 0.2))
-        b = randn(5,3) + im*randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        a = I + tril(0.1*sprandn(5, 5, 0.2) + 0.1*im*sprandn(5, 5, 0.2))
-        b = randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        a = I + triu(0.1*sprandn(5, 5, 0.2))
-        b = randn(5,3) + im*randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        a = I + triu(0.1*sprandn(5, 5, 0.2) + 0.1*im*sprandn(5, 5, 0.2))
-        b = randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        a = I + triu(0.1*sprandn(5, 5, 0.2))
-        b = randn(5,3) + im*randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        # UpperTriangular/LowerTriangular solve
-        a = UpperTriangular(I + triu(0.1*sprandn(5, 5, 0.2)))
-        b = sprandn(5, 5, 0.2)
-        @test (maximum(abs.(a\b - Array(a)\Array(b))) < 1000*eps())
-        # test error throwing for bwdTrisolve
-        @test_throws DimensionMismatch a\Matrix{Float64}(I, 6, 6)
-        a = LowerTriangular(I + tril(0.1*sprandn(5, 5, 0.2)))
-        b = sprandn(5, 5, 0.2)
-        @test (maximum(abs.(a\b - Array(a)\Array(b))) < 1000*eps())
-        # test error throwing for fwdTrisolve
-        @test_throws DimensionMismatch a\Matrix{Float64}(I, 6, 6)
-
-
-
-        a = sparse(Diagonal(randn(5) + im*randn(5)))
-        b = randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-
-        b = randn(5,3) + im*randn(5,3)
-        @test (maximum(abs.(a*b - Array(a)*b)) < 100*eps())
-        @test (maximum(abs.(a'b - Array(a)'b)) < 100*eps())
-        @test (maximum(abs.(transpose(a)*b - transpose(Array(a))*b)) < 100*eps())
-        @test (maximum(abs.(a\b - Array(a)\b)) < 1000*eps())
-        @test (maximum(abs.(a'\b - Array(a')\b)) < 1000*eps())
-        @test (maximum(abs.(transpose(a)\b - Array(transpose(a))\b)) < 1000*eps())
-    end
-    end
-end
-
-@testset "matrix multiplication" begin
-    for (m, p, n, q, k) in (
-                            (10, 0.7, 5, 0.3, 15),
-                            (100, 0.01, 100, 0.01, 20),
-                            (100, 0.1, 100, 0.2, 100),
-                           )
-        a = sprand(m, n, p)
-        b = sprand(n, k, q)
-        as = sparse(a')
-        bs = sparse(b')
-        ab = a * b
-        aab = Array(a) * Array(b)
-        @test maximum(abs.(ab - aab)) < 100*eps()
-        @test a*bs' == ab
-        @test as'*b == ab
-        @test as'*bs' == ab
-        f = Diagonal(rand(n))
-        @test Array(a*f) == Array(a)*f
-        @test Array(f*b) == f*Array(b)
-        A = rand(2n, 2n)
-        sA = view(A, 1:2:2n, 1:2:2n)
-        @test Array(sA*b) ≈ Array(sA)*Array(b)
-        @test Array(a*sA) ≈ Array(a)*Array(sA)
-        c = sprandn(ComplexF32, n, n, q)
-        @test Array(sA*c') ≈ Array(sA)*Array(c)'
-        @test Array(c'*sA) ≈ Array(c)'*Array(sA)
-    end
-end
-
-@testset "multiplication of sparse matrix and triangular matrix" begin
-    _sparse_test_matrix(n, T) =  T == Int ? sparse(rand(0:4, n, n)) : sprandn(T, n, n, 0.6)
-    _triangular_test_matrix(n, TA, T) = T == Int ? TA(rand(0:9, n, n)) : TA(randn(T, n, n))
-
-    n = 5
-    for T1 in (Int, Float64, ComplexF32)
-        S = _sparse_test_matrix(n, T1)
-        MS = Matrix(S)
-        for T2 in (Int, Float64, ComplexF32)
-            for TM in (LowerTriangular, UnitLowerTriangular, UpperTriangular, UnitLowerTriangular)
-                T = _triangular_test_matrix(n, TM, T2)
-                MT = Matrix(T)
-                @test isa(T * S, DenseMatrix)
-                @test isa(S * T, DenseMatrix)
-                for transT in (identity, adjoint, transpose), transS in (identity, adjoint, transpose)
-                    @test transT(T) * transS(S) ≈ transT(MT) * transS(MS)
-                    @test transS(S) * transT(T) ≈ transS(MS) * transT(MT)
-                end
-            end
-        end
-    end
-end
-
-@testset "Issue #30502" begin
-    @test nnz(sprand(UInt8(16), UInt8(16), 1.0)) == 256
-    @test nnz(sprand(UInt8(16), UInt8(16), 1.0, ones)) == 256
-end
-
-@testset "kronecker product" begin
-    for (m,n) in ((5,10), (13,8), (14,10))
-        a = sprand(m, 5, 0.4); a_d = Matrix(a)
-        b = sprand(n, 6, 0.3); b_d = Matrix(b)
-        v = view(a, :, 1); v_d = Vector(v)
-        x = sprand(m, 0.4); x_d = Vector(x)
-        y = sprand(n, 0.3); y_d = Vector(y)
-        c_di = Diagonal(rand(m)); c = sparse(c_di); c_d = Array(c_di)
-        d_di = Diagonal(rand(n)); d = sparse(d_di); d_d = Array(d_di)
-        # mat ⊗ mat
-        @test Array(kron(a, b)) == kron(a_d, b_d)
-        @test Array(kron(a_d, b)) == kron(a_d, b_d)
-        @test Array(kron(a, b_d)) == kron(a_d, b_d)
-        @test issparse(kron(c, d_di))
-        @test Array(kron(c, d_di)) == kron(c_d, d_d)
-        @test issparse(kron(c_di, d))
-        @test Array(kron(c_di, d)) == kron(c_d, d_d)
-        @test issparse(kron(c_di, y))
-        @test Array(kron(c_di, y)) == kron(c_di, y_d)
-        @test issparse(kron(x, d_di))
-        @test Array(kron(x, d_di)) == kron(x_d, d_di)
-        # vec ⊗ vec
-        @test Vector(kron(x, y)) == kron(x_d, y_d)
-        @test Vector(kron(x_d, y)) == kron(x_d, y_d)
-        @test Vector(kron(x, y_d)) == kron(x_d, y_d)
-        # mat ⊗ vec
-        @test Array(kron(a, y)) == kron(a_d, y_d)
-        @test Array(kron(a_d, y)) == kron(a_d, y_d)
-        @test Array(kron(a, y_d)) == kron(a_d, y_d)
-        # vec ⊗ mat
-        @test Array(kron(x, b)) == kron(x_d, b_d)
-        @test Array(kron(x_d, b)) == kron(x_d, b_d)
-        @test Array(kron(x, b_d)) == kron(x_d, b_d)
-        # vec ⊗ vec'
-        @test issparse(kron(v, y'))
-        @test issparse(kron(x, y'))
-        @test Array(kron(v, y')) == kron(v_d, y_d')
-        @test Array(kron(x, y')) == kron(x_d, y_d')
-        # test different types
-        z = convert(SparseVector{Float16, Int8}, y); z_d = Vector(z)
-        @test Vector(kron(x, z)) == kron(x_d, z_d)
-        @test Array(kron(a, z)) == kron(a_d, z_d)
-        @test Array(kron(z, b)) == kron(z_d, b_d)
-    end
-end
-
-@testset "sparse Frobenius dot/inner product" begin
-    for i = 1:5
-        A = sprand(ComplexF64,10,15,0.4)
-        B = sprand(ComplexF64,10,15,0.5)
-        @test dot(A,B) ≈ dot(Matrix(A),Matrix(B))
-    end
-    @test_throws DimensionMismatch dot(sprand(5,5,0.2),sprand(5,6,0.2))
-end
-
-@testset "generalized dot product" begin
-    for i = 1:5
-        A = sprand(ComplexF64, 10, 15, 0.4)
-        Av = view(A, :, :)
-        x = sprand(ComplexF64, 10, 0.5)
-        y = sprand(ComplexF64, 15, 0.5)
-        @test dot(x, A, y) ≈ dot(Vector(x), A, Vector(y)) ≈ (Vector(x)' * Matrix(A)) * Vector(y)
-        @test dot(x, A, y) ≈ dot(x, Av, y)
-    end
-end
-
-const BASE_TEST_PATH = joinpath(Sys.BINDIR, "..", "share", "julia", "test")
-isdefined(Main, :Quaternions) || @eval Main include(joinpath($(BASE_TEST_PATH), "testhelpers", "Quaternions.jl"))
-using .Main.Quaternions
-
-sA = sprandn(3, 7, 0.5)
-sC = similar(sA)
-dA = Array(sA)
-
-@testset "scaling with * and mul!, rmul!, and lmul!" begin
-    b = randn(7)
-    @test dA * Diagonal(b) == sA * Diagonal(b)
-    @test dA * Diagonal(b) == mul!(sC, sA, Diagonal(b))
-    @test dA * Diagonal(b) == rmul!(copy(sA), Diagonal(b))
-    b = randn(3)
-    @test Diagonal(b) * dA == Diagonal(b) * sA
-    @test Diagonal(b) * dA == mul!(sC, Diagonal(b), sA)
-    @test Diagonal(b) * dA == lmul!(Diagonal(b), copy(sA))
-
-    @test dA * 0.5            == sA * 0.5
-    @test dA * 0.5            == mul!(sC, sA, 0.5)
-    @test dA * 0.5            == rmul!(copy(sA), 0.5)
-    @test 0.5 * dA            == 0.5 * sA
-    @test 0.5 * dA            == mul!(sC, sA, 0.5)
-    @test 0.5 * dA            == lmul!(0.5, copy(sA))
-    @test mul!(sC, 0.5, sA)   == mul!(sC, sA, 0.5)
-
-    @testset "inverse scaling with mul!" begin
-        bi = inv.(b)
-        @test lmul!(Diagonal(bi), copy(dA)) ≈ ldiv!(Diagonal(b), copy(sA))
-        @test lmul!(Diagonal(bi), copy(dA)) ≈ ldiv!(transpose(Diagonal(b)), copy(sA))
-        @test lmul!(Diagonal(conj(bi)), copy(dA)) ≈ ldiv!(adjoint(Diagonal(b)), copy(sA))
-        Aob = Diagonal(b) \ sA
-        @test Aob == ldiv!(Diagonal(b), copy(sA))
-        @test issparse(Aob)
-        @test_throws DimensionMismatch ldiv!(Diagonal(fill(1., length(b)+1)), copy(sA))
-        @test_throws LinearAlgebra.SingularException ldiv!(Diagonal(zeros(length(b))), copy(sA))
-
-        dAt = copy(transpose(dA))
-        sAt = copy(transpose(sA))
-        @test rmul!(copy(dAt), Diagonal(bi)) ≈ rdiv!(copy(sAt), Diagonal(b))
-        @test rmul!(copy(dAt), Diagonal(bi)) ≈ rdiv!(copy(sAt), transpose(Diagonal(b)))
-        @test rmul!(copy(dAt), Diagonal(conj(bi))) ≈ rdiv!(copy(sAt), adjoint(Diagonal(b)))
-        Atob = sAt / Diagonal(b)
-        @test Atob == rdiv!(copy(dAt), Diagonal(b))
-        @test issparse(Atob)
-        @test_throws DimensionMismatch rdiv!(copy(sAt), Diagonal(fill(1., length(b)+1)))
-        @test_throws LinearAlgebra.SingularException rdiv!(copy(sAt), Diagonal(zeros(length(b))))
-    end
-
-    @testset "non-commutative multiplication" begin
-        # non-commutative multiplication
-        Avals = Quaternion.(randn(10), randn(10), randn(10), randn(10))
-        sA = sparse(rand(1:3, 10), rand(1:7, 10), Avals, 3, 7)
-        sC = copy(sA)
-        dA = Array(sA)
-
-        b = Quaternion.(randn(7), randn(7), randn(7), randn(7))
-        D = Diagonal(b)
-        @test Array(sA * D) ≈ dA * D
-        @test rmul!(copy(sA), D) ≈ dA * D
-        @test mul!(sC, copy(sA), D) ≈ dA * D
-
-        b = Quaternion.(randn(3), randn(3), randn(3), randn(3))
-        D = Diagonal(b)
-        @test Array(D * sA) ≈ D * dA
-        @test lmul!(D, copy(sA)) ≈ D * dA
-        @test mul!(sC, D, copy(sA)) ≈ D * dA
-    end
-end
-
-@testset "copyto!" begin
-    A = sprand(5, 5, 0.2)
-    B = sprand(5, 5, 0.2)
-    copyto!(A, B)
-    @test A == B
-    @test pointer(nonzeros(A)) != pointer(nonzeros(B))
-    @test pointer(rowvals(A)) != pointer(rowvals(B))
-    @test pointer(getcolptr(A)) != pointer(getcolptr(B))
-    # Test size(A) != size(B), but length(A) == length(B)
-    B = sprand(25, 1, 0.2)
-    copyto!(A, B)
-    @test A[:] == B[:]
-    # Test various size(A) / size(B) combinations
-    for mA in [5, 10, 20], nA in [5, 10, 20], mB in [5, 10, 20], nB in [5, 10, 20]
-        A = sprand(mA,nA,0.4)
-        Aorig = copy(A)
-        B = sprand(mB,nB,0.4)
-        if mA*nA >= mB*nB
-            copyto!(A,B)
-            @assert(A[1:length(B)] == B[:])
-            @assert(A[length(B)+1:end] == Aorig[length(B)+1:end])
-        else
-            @test_throws BoundsError copyto!(A,B)
-        end
-    end
-    # Test eltype(A) != eltype(B), size(A) != size(B)
-    A = sprand(5, 5, 0.2)
-    Aorig = copy(A)
-    B = sparse(rand(Float32, 3, 3))
-    copyto!(A, B)
-    @test A[1:9] == B[:]
-    @test A[10:end] == Aorig[10:end]
-    # Test eltype(A) != eltype(B), size(A) == size(B)
-    A = sparse(rand(Float64, 3, 3))
-    B = sparse(rand(Float32, 3, 3))
-    copyto!(A, B)
-    @test A == B
-    # Test copyto!(dense, sparse)
-    B = sprand(5, 5, 1.0)
-    A = rand(5,5)
-    A´ = similar(A)
-    @test copyto!(A, B) == copyto!(A´, Matrix(B))
-    # Test copyto!(dense, Rdest, sparse, Rsrc)
-    A = rand(5,5)
-    A´ = similar(A)
-    Rsrc = CartesianIndices((3:4, 2:3))
-    Rdest = CartesianIndices((2:3, 1:2))
-    copyto!(A, Rdest, B, Rsrc)
-    copyto!(A´, Rdest, Matrix(B), Rsrc)
-    @test A[Rdest] == A´[Rdest] == Matrix(B)[Rsrc]
-    # Test unaliasing of B´
-    B´ = copy(B)
-    copyto!(B´, Rdest, B´, Rsrc)
-    @test Matrix(B´)[Rdest] == Matrix(B)[Rsrc]
-    # Test that only elements at overlapping linear indices are overwritten
-    A = sprand(3, 3, 1.0); B = ones(4, 4)
-    copyto!(B, A)
-    @test B[4, :] != B[:, 4] == ones(4)
-    # Allow no-op copyto! with empty source even for incompatible eltypes
-    A = sparse(fill("", 0, 0))
-    @test copyto!(B, A) == B
-end
-
-@testset "conj" begin
-    cA = sprandn(5,5,0.2) + im*sprandn(5,5,0.2)
-    @test Array(conj.(cA)) == conj(Array(cA))
-    @test Array(conj!(copy(cA))) == conj(Array(cA))
-end
-
-@testset "SparseMatrixCSC [c]transpose[!] and permute[!]" begin
-    smalldim = 5
-    largedim = 10
-    nzprob = 0.4
-    (m, n) = (smalldim, smalldim)
-    A = sprand(m, n, nzprob)
-    X = similar(A)
-    C = copy(transpose(A))
-    p = randperm(m)
-    q = randperm(n)
-    @testset "common error checking of [c]transpose! methods (ftranspose!)" begin
-        @test_throws DimensionMismatch transpose!(A[:, 1:(smalldim - 1)], A)
-        @test_throws DimensionMismatch transpose!(A[1:(smalldim - 1), 1], A)
-        @test_throws ArgumentError transpose!((B = similar(A); resize!(rowvals(B), nnz(A) - 1); B), A)
-        @test_throws ArgumentError transpose!((B = similar(A); resize!(nonzeros(B), nnz(A) - 1); B), A)
-    end
-    @testset "common error checking of permute[!] methods / source-perm compat" begin
-        @test_throws DimensionMismatch permute(A, p[1:(end - 1)], q)
-        @test_throws DimensionMismatch permute(A, p, q[1:(end - 1)])
-    end
-    @testset "common error checking of permute[!] methods / source-dest compat" begin
-        @test_throws DimensionMismatch permute!(A[1:(m - 1), :], A, p, q)
-        @test_throws DimensionMismatch permute!(A[:, 1:(m - 1)], A, p, q)
-        @test_throws ArgumentError permute!((Y = copy(X); resize!(rowvals(Y), nnz(A) - 1); Y), A, p, q)
-        @test_throws ArgumentError permute!((Y = copy(X); resize!(nonzeros(Y), nnz(A) - 1); Y), A, p, q)
-    end
-    @testset "common error checking of permute[!] methods / source-workmat compat" begin
-        @test_throws DimensionMismatch permute!(X, A, p, q, C[1:(m - 1), :])
-        @test_throws DimensionMismatch permute!(X, A, p, q, C[:, 1:(m - 1)])
-        @test_throws ArgumentError permute!(X, A, p, q, (D = copy(C); resize!(rowvals(D), nnz(A) - 1); D))
-        @test_throws ArgumentError permute!(X, A, p, q, (D = copy(C); resize!(nonzeros(D), nnz(A) - 1); D))
-    end
-    @testset "common error checking of permute[!] methods / source-workcolptr compat" begin
-        @test_throws DimensionMismatch permute!(A, p, q, C, Vector{eltype(rowvals(A))}(undef, length(getcolptr(A)) - 1))
-    end
-    @testset "common error checking of permute[!] methods / permutation validity" begin
-        @test_throws ArgumentError permute!(A, (r = copy(p); r[2] = r[1]; r), q)
-        @test_throws ArgumentError permute!(A, (r = copy(p); r[2] = m + 1; r), q)
-        @test_throws ArgumentError permute!(A, p, (r = copy(q); r[2] = r[1]; r))
-        @test_throws ArgumentError permute!(A, p, (r = copy(q); r[2] = n + 1; r))
-    end
-    @testset "overall functionality of [c]transpose[!] and permute[!]" begin
-        for (m, n) in ((smalldim, smalldim), (smalldim, largedim), (largedim, smalldim))
-            A = sprand(m, n, nzprob)
-            At = copy(transpose(A))
-            # transpose[!]
-            fullAt = Array(transpose(A))
-            @test copy(transpose(A)) == fullAt
-            @test transpose!(similar(At), A) == fullAt
-            # adjoint[!]
-            C = A + im*A/2
-            fullCh = Array(C')
-            @test copy(C') == fullCh
-            @test adjoint!(similar(sparse(fullCh)), C) == fullCh
-            # permute[!]
-            p = randperm(m)
-            q = randperm(n)
-            fullPAQ = Array(A)[p,q]
-            @test permute(A, p, q) == sparse(Array(A[p,q]))
-            @test permute!(similar(A), A, p, q) == fullPAQ
-            @test permute!(similar(A), A, p, q, similar(At)) == fullPAQ
-            @test permute!(copy(A), p, q) == fullPAQ
-            @test permute!(copy(A), p, q, similar(At)) == fullPAQ
-            @test permute!(copy(A), p, q, similar(At), similar(getcolptr(A))) == fullPAQ
-        end
-    end
-end
-
-@testset "transpose of SubArrays" begin
-    A = view(sprandn(10, 10, 0.3), 1:4, 1:4)
-    @test copy(transpose(Array(A))) == Array(transpose(A))
-    @test copy(adjoint(Array(A))) == Array(adjoint(A))
-end
-
-@testset "exp" begin
-    A = sprandn(5,5,0.2)
-    @test ℯ.^A ≈ ℯ.^Array(A)
-end
-
-@testset "reductions" begin
-    pA = sparse(rand(3, 7))
-    p28227 = sparse(Real[0 0.5])
-
-    for arr in (se33, sA, pA, p28227)
-        for f in (sum, prod, minimum, maximum)
-            farr = Array(arr)
-            @test f(arr) ≈ f(farr)
-            @test f(arr, dims=1) ≈ f(farr, dims=1)
-            @test f(arr, dims=2) ≈ f(farr, dims=2)
-            @test f(arr, dims=(1, 2)) ≈ [f(farr)]
-            @test isequal(f(arr, dims=3), f(farr, dims=3))
-        end
-    end
-
-    for f in (sum, prod, minimum, maximum)
-        # Test with a map function that maps to non-zero
-        for arr in (se33, sA, pA)
-            @test f(x->x+1, arr) ≈ f(arr .+ 1)
-        end
-
-        # case where f(0) would throw
-        @test f(x->sqrt(x-1), pA .+ 1) ≈ f(sqrt.(pA))
-        # these actually throw due to #10533
-        # @test f(x->sqrt(x-1), pA .+ 1, dims=1) ≈ f(sqrt(pA), dims=1)
-        # @test f(x->sqrt(x-1), pA .+ 1, dims=2) ≈ f(sqrt(pA), dims=2)
-        # @test f(x->sqrt(x-1), pA .+ 1, dims=3) ≈ f(pA)
-    end
-
-    @testset "empty cases" begin
-        @test sum(sparse(Int[])) === 0
-        @test prod(sparse(Int[])) === 1
-        @test_throws ArgumentError minimum(sparse(Int[]))
-        @test_throws ArgumentError maximum(sparse(Int[]))
-
-        for f in (sum, prod)
-            @test isequal(f(spzeros(0, 1), dims=1), f(Matrix{Int}(I, 0, 1), dims=1))
-            @test isequal(f(spzeros(0, 1), dims=2), f(Matrix{Int}(I, 0, 1), dims=2))
-            @test isequal(f(spzeros(0, 1), dims=(1, 2)), f(Matrix{Int}(I, 0, 1), dims=(1, 2)))
-            @test isequal(f(spzeros(0, 1), dims=3), f(Matrix{Int}(I, 0, 1), dims=3))
-        end
-        for f in (minimum, maximum, findmin, findmax)
-            @test_throws ArgumentError f(spzeros(0, 1), dims=1)
-            @test isequal(f(spzeros(0, 1), dims=2), f(Matrix{Int}(I, 0, 1), dims=2))
-            @test_throws ArgumentError f(spzeros(0, 1), dims=(1, 2))
-            @test isequal(f(spzeros(0, 1), dims=3), f(Matrix{Int}(I, 0, 1), dims=3))
-        end
-    end
-end
-
-@testset "issue #5190" begin
-    @test_throws ArgumentError sparsevec([3,5,7],[0.1,0.0,3.2],4)
-end
-
-@testset "what used to be issue #5386" begin
-    K,J,V = findnz(SparseMatrixCSC(2,1,[1,3],[1,2],[1.0,0.0]))
-    @test length(K) == length(J) == length(V) == 2
-end
-
-@testset "findall" begin
-    # issue described in https://groups.google.com/d/msg/julia-users/Yq4dh8NOWBQ/GU57L90FZ3EJ
-    A = sparse(I, 5, 5)
-    @test findall(A) == findall(x -> x == true, A) == findall(Array(A))
-    # Non-stored entries are true
-    @test findall(x -> x == false, A) == findall(x -> x == false, Array(A))
-
-    # Not all stored entries are true
-    @test findall(sparse([true false])) == [CartesianIndex(1, 1)]
-    @test findall(x -> x > 1, sparse([1 2])) == [CartesianIndex(1, 2)]
-end
-
-@testset "issue #5824" begin
-    @test sprand(4,5,0.5).^0 == sparse(fill(1,4,5))
-end
-
-@testset "issue #5985" begin
-    @test sprand(Bool, 4, 5, 0.0) == sparse(zeros(Bool, 4, 5))
-    @test sprand(Bool, 4, 5, 1.00) == sparse(fill(true, 4, 5))
-    sprb45nnzs = zeros(5)
-    for i=1:5
-        sprb45 = sprand(Bool, 4, 5, 0.5)
-        @test length(sprb45) == 20
-        sprb45nnzs[i] = sum(sprb45)[1]
-    end
-    @test 4 <= sum(sprb45nnzs)/length(sprb45nnzs) <= 16
-end
-
-@testset "issue #5853, sparse diff" begin
-    for i=1:2, a=Any[[1 2 3], reshape([1, 2, 3],(3,1)), Matrix(1.0I, 3, 3)]
-        @test diff(sparse(a),dims=i) == diff(a,dims=i)
-    end
-end
-
-@testset "access to undefined error types that initially allocate elements as #undef" begin
-    @test sparse(1:2, 1:2, Number[1,2])^2 == sparse(1:2, 1:2, [1,4])
-    sd1 = diff(sparse([1,1,1], [1,2,3], Number[1,2,3]), dims=1)
-end
-
-@testset "issue #6036" begin
-    P = spzeros(Float64, 3, 3)
-    for i = 1:3
-        P[i,i] = i
-    end
-
-    @test minimum(P) === 0.0
-    @test maximum(P) === 3.0
-    @test minimum(-P) === -3.0
-    @test maximum(-P) === 0.0
-
-    @test maximum(P, dims=(1,)) == [1.0 2.0 3.0]
-    @test maximum(P, dims=(2,)) == reshape([1.0,2.0,3.0],3,1)
-    @test maximum(P, dims=(1,2)) == reshape([3.0],1,1)
-
-    @test maximum(sparse(fill(-1,3,3))) == -1
-    @test minimum(sparse(fill(1,3,3))) == 1
-end
-
-@testset "unary functions" begin
-    A = sprand(5, 15, 0.5)
-    C = A + im*A
-    Afull = Array(A)
-    Cfull = Array(C)
-    # Test representatives of [unary functions that map zeros to zeros and may map nonzeros to zeros]
-    @test sin.(Afull) == Array(sin.(A))
-    @test tan.(Afull) == Array(tan.(A)) # should be redundant with sin test
-    @test ceil.(Afull) == Array(ceil.(A))
-    @test floor.(Afull) == Array(floor.(A)) # should be redundant with ceil test
-    @test real.(Afull) == Array(real.(A)) == Array(real(A))
-    @test imag.(Afull) == Array(imag.(A)) == Array(imag(A))
-    @test conj.(Afull) == Array(conj.(A)) == Array(conj(A))
-    @test real.(Cfull) == Array(real.(C)) == Array(real(C))
-    @test imag.(Cfull) == Array(imag.(C)) == Array(imag(C))
-    @test conj.(Cfull) == Array(conj.(C)) == Array(conj(C))
-    # Test representatives of [unary functions that map zeros to zeros and nonzeros to nonzeros]
-    @test expm1.(Afull) == Array(expm1.(A))
-    @test abs.(Afull) == Array(abs.(A))
-    @test abs2.(Afull) == Array(abs2.(A))
-    @test abs.(Cfull) == Array(abs.(C))
-    @test abs2.(Cfull) == Array(abs2.(C))
-    # Test representatives of [unary functions that map both zeros and nonzeros to nonzeros]
-    @test cos.(Afull) == Array(cos.(A))
-    # Test representatives of remaining vectorized-nonbroadcast unary functions
-    @test ceil.(Int, Afull) == Array(ceil.(Int, A))
-    @test floor.(Int, Afull) == Array(floor.(Int, A))
-    # Tests of real, imag, abs, and abs2 for SparseMatrixCSC{Int,X}s previously elsewhere
-    for T in (Int, Float16, Float32, Float64, BigInt, BigFloat)
-        R = rand(T[1:100;], 2, 2)
-        I = rand(T[1:100;], 2, 2)
-        D = R + I*im
-        S = sparse(D)
-        spR = sparse(R)
-
-        @test R == real.(S) == real(S)
-        @test I == imag.(S) == imag(S)
-        @test conj(Array(S)) == conj.(S) == conj(S)
-        @test real.(spR) == R
-        @test nnz(imag.(spR)) == nnz(imag(spR)) == 0
-        @test abs.(S) == abs.(D)
-        @test abs2.(S) == abs2.(D)
-
-        # test aliasing of real and conj of real valued matrix
-        @test real(spR) === spR
-        @test conj(spR) === spR
-    end
-end
-
-@testset "getindex" begin
-    ni = 23
-    nj = 32
-    a116 = reshape(1:(ni*nj), ni, nj)
-    s116 = sparse(a116)
-
-    ad116 = diagm(0 => diag(a116))
-    sd116 = sparse(ad116)
-
-    for (aa116, ss116) in [(a116, s116), (ad116, sd116)]
-        ij=11; i=3; j=2
-        @test ss116[ij] == aa116[ij]
-        @test ss116[(i,j)] == aa116[i,j]
-        @test ss116[i,j] == aa116[i,j]
-        @test ss116[i-1,j] == aa116[i-1,j]
-        ss116[i,j] = 0
-        @test ss116[i,j] == 0
-        ss116 = sparse(aa116)
-
-        @test ss116[:,:] == copy(ss116)
-
-        @test convert(SparseMatrixCSC{Float32,Int32}, sd116)[2:5,:] == convert(SparseMatrixCSC{Float32,Int32}, sd116[2:5,:])
-
-        # range indexing
-        @test Array(ss116[i,:]) == aa116[i,:]
-        @test Array(ss116[:,j]) == aa116[:,j]
-        @test Array(ss116[i,1:2:end]) == aa116[i,1:2:end]
-        @test Array(ss116[1:2:end,j]) == aa116[1:2:end,j]
-        @test Array(ss116[i,end:-2:1]) == aa116[i,end:-2:1]
-        @test Array(ss116[end:-2:1,j]) == aa116[end:-2:1,j]
-        # float-range indexing is not supported
-
-        # sorted vector indexing
-        @test Array(ss116[i,[3:2:end-3;]]) == aa116[i,[3:2:end-3;]]
-        @test Array(ss116[[3:2:end-3;],j]) == aa116[[3:2:end-3;],j]
-        @test Array(ss116[i,[end-3:-2:1;]]) == aa116[i,[end-3:-2:1;]]
-        @test Array(ss116[[end-3:-2:1;],j]) == aa116[[end-3:-2:1;],j]
-
-        # unsorted vector indexing with repetition
-        p = [4, 1, 2, 3, 2, 6]
-        @test Array(ss116[p,:]) == aa116[p,:]
-        @test Array(ss116[:,p]) == aa116[:,p]
-        @test Array(ss116[p,p]) == aa116[p,p]
-
-        # bool indexing
-        li = bitrand(size(aa116,1))
-        lj = bitrand(size(aa116,2))
-        @test Array(ss116[li,j]) == aa116[li,j]
-        @test Array(ss116[li,:]) == aa116[li,:]
-        @test Array(ss116[i,lj]) == aa116[i,lj]
-        @test Array(ss116[:,lj]) == aa116[:,lj]
-        @test Array(ss116[li,lj]) == aa116[li,lj]
-
-        # empty indices
-        for empty in (1:0, Int[])
-            @test Array(ss116[empty,:]) == aa116[empty,:]
-            @test Array(ss116[:,empty]) == aa116[:,empty]
-            @test Array(ss116[empty,lj]) == aa116[empty,lj]
-            @test Array(ss116[li,empty]) == aa116[li,empty]
-            @test Array(ss116[empty,empty]) == aa116[empty,empty]
-        end
-
-        # out of bounds indexing
-        @test_throws BoundsError ss116[0, 1]
-        @test_throws BoundsError ss116[end+1, 1]
-        @test_throws BoundsError ss116[1, 0]
-        @test_throws BoundsError ss116[1, end+1]
-        for j in (1, 1:size(s116,2), 1:1, Int[1], trues(size(s116, 2)), 1:0, Int[])
-            @test_throws BoundsError ss116[0:1, j]
-            @test_throws BoundsError ss116[[0, 1], j]
-            @test_throws BoundsError ss116[end:end+1, j]
-            @test_throws BoundsError ss116[[end, end+1], j]
-        end
-        for i in (1, 1:size(s116,1), 1:1, Int[1], trues(size(s116, 1)), 1:0, Int[])
-            @test_throws BoundsError ss116[i, 0:1]
-            @test_throws BoundsError ss116[i, [0, 1]]
-            @test_throws BoundsError ss116[i, end:end+1]
-            @test_throws BoundsError ss116[i, [end, end+1]]
-        end
-    end
-
-    # indexing by array of CartesianIndex (issue #30981)
-    S = sprand(10, 10, 0.4)
-    inds_sparse = S[findall(S .> 0.2)]
-    M = Matrix(S)
-    inds_dense = M[findall(M .> 0.2)]
-    @test Array(inds_sparse) == inds_dense
-    inds_out = Array([CartesianIndex(1, 1), CartesianIndex(0, 1)])
-    @test_throws BoundsError S[inds_out]
-    pop!(inds_out); push!(inds_out, CartesianIndex(1, 0))
-    @test_throws BoundsError S[inds_out]
-    pop!(inds_out); push!(inds_out, CartesianIndex(11, 1))
-    @test_throws BoundsError S[inds_out]
-    pop!(inds_out); push!(inds_out, CartesianIndex(1, 11))
-    @test_throws BoundsError S[inds_out]
-
-    # workaround issue #7197: comment out let-block
-    #let S = SparseMatrixCSC(3, 3, UInt8[1,1,1,1], UInt8[], Int64[])
-    S1290 = SparseMatrixCSC(3, 3, UInt8[1,1,1,1], UInt8[], Int64[])
-        S1290[1,1] = 1
-        S1290[5] = 2
-        S1290[end] = 3
-        @test S1290[end] == (S1290[1] + S1290[2,2])
-        @test 6 == sum(diag(S1290))
-        @test Array(S1290)[[3,1],1] == Array(S1290[[3,1],1])
-
-        # check that indexing with an abstract array returns matrix
-        # with same colptr and rowval eltypes as input. Tests PR 24548
-        r1 = S1290[[5,9]]
-        r2 = S1290[[1 2;5 9]]
-        @test isa(r1, SparseVector{Int64,UInt8})
-        @test isa(r2, SparseMatrixCSC{Int64,UInt8})
-    # end
-end
-
-@testset "setindex" begin
-    a = spzeros(Int, 10, 10)
-    @test count(!iszero, a) == 0
-    a[1,:] .= 1
-    @test count(!iszero, a) == 10
-    @test a[1,:] == sparse(fill(1,10))
-    a[:,2] .= 2
-    @test count(!iszero, a) == 19
-    @test a[:,2] == sparse(fill(2,10))
-    b = copy(a)
-
-    # Zero-assignment behavior of setindex!(A, v, i, j)
-    a[1,3] = 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 18
-    a[2,1] = 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 18
-
-    # Zero-assignment behavior of setindex!(A, v, I, J)
-    a[1,:] .= 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 9
-    a[2,:] .= 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 8
-    a[:,1] .= 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 8
-    a[:,2] .= 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 0
-    a = copy(b)
-    a[:,:] .= 0
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 0
-
-    # Zero-assignment behavior of setindex!(A, B::SparseMatrixCSC, I, J)
-    a = copy(b)
-    a[1:2,:] = spzeros(2, 10)
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 8
-    a[1:2,1:3] = sparse([1 0 1; 0 0 1])
-    @test nnz(a) == 20
-    @test count(!iszero, a) == 11
-    a = copy(b)
-    a[1:2,:] = let c = sparse(fill(1,2,10)); fill!(nonzeros(c), 0); c; end
-    @test nnz(a) == 19
-    @test count(!iszero, a) == 8
-    a[1:2,1:3] = let c = sparse(fill(1,2,3)); c[1,2] = c[2,1] = c[2,2] = 0; c; end
-    @test nnz(a) == 20
-    @test count(!iszero, a) == 11
-
-    a[1,:] = 1:10
-    @test a[1,:] == sparse([1:10;])
-    a[:,2] = 1:10
-    @test a[:,2] == sparse([1:10;])
-
-    a[1,1:0] = []
-    @test a[1,:] == sparse([1; 1; 3:10])
-    a[1:0,2] = []
-    @test a[:,2] == sparse([1:10;])
-    a[1,1:0] .= 0
-    @test a[1,:] == sparse([1; 1; 3:10])
-    a[1:0,2] .= 0
-    @test a[:,2] == sparse([1:10;])
-    a[1,1:0] .= 1
-    @test a[1,:] == sparse([1; 1; 3:10])
-    a[1:0,2] .= 1
-    @test a[:,2] == sparse([1:10;])
-
-    @test_throws BoundsError a[:,11] = spzeros(10,1)
-    @test_throws BoundsError a[11,:] = spzeros(1,10)
-    @test_throws BoundsError a[:,-1] = spzeros(10,1)
-    @test_throws BoundsError a[-1,:] = spzeros(1,10)
-    @test_throws BoundsError a[0:9] = spzeros(1,10)
-    @test_throws BoundsError (a[:,11] .= 0; a)
-    @test_throws BoundsError (a[11,:] .= 0; a)
-    @test_throws BoundsError (a[:,-1] .= 0; a)
-    @test_throws BoundsError (a[-1,:] .= 0; a)
-    @test_throws BoundsError (a[0:9] .= 0; a)
-    @test_throws BoundsError (a[:,11] .= 1; a)
-    @test_throws BoundsError (a[11,:] .= 1; a)
-    @test_throws BoundsError (a[:,-1] .= 1; a)
-    @test_throws BoundsError (a[-1,:] .= 1; a)
-    @test_throws BoundsError (a[0:9] .= 1; a)
-
-    @test_throws DimensionMismatch a[1:2,1:2] = 1:3
-    @test_throws DimensionMismatch a[1:2,1] = 1:3
-    @test_throws DimensionMismatch a[1,1:2] = 1:3
-    @test_throws DimensionMismatch a[1:2] = 1:3
-
-    A = spzeros(Int, 10, 20)
-    A[1:5,1:10] .= 10
-    A[1:5,1:10] .= 10
-    @test count(!iszero, A) == 50
-    @test A[1:5,1:10] == fill(10, 5, 10)
-    A[6:10,11:20] .= 0
-    @test count(!iszero, A) == 50
-    A[6:10,11:20] .= 20
-    @test count(!iszero, A) == 100
-    @test A[6:10,11:20] == fill(20, 5, 10)
-    A[4:8,8:16] .= 15
-    @test count(!iszero, A) == 121
-    @test A[4:8,8:16] == fill(15, 5, 9)
-
-    ASZ = 1000
-    TSZ = 800
-    A = sprand(ASZ, 2*ASZ, 0.0001)
-    B = copy(A)
-    nA = count(!iszero, A)
-    x = A[1:TSZ, 1:(2*TSZ)]
-    nx = count(!iszero, x)
-    A[1:TSZ, 1:(2*TSZ)] .= 0
-    nB = count(!iszero, A)
-    @test nB == (nA - nx)
-    A[1:TSZ, 1:(2*TSZ)] = x
-    @test count(!iszero, A) == nA
-    @test A == B
-    A[1:TSZ, 1:(2*TSZ)] .= 10
-    @test count(!iszero, A) == nB + 2*TSZ*TSZ
-    A[1:TSZ, 1:(2*TSZ)] = x
-    @test count(!iszero, A) == nA
-    @test A == B
-
-    A = sparse(1I, 5, 5)
-    lininds = 1:10
-    X=reshape([trues(10); falses(15)],5,5)
-    @test A[lininds] == A[X] == [1,0,0,0,0,0,1,0,0,0]
-    A[lininds] = [1:10;]
-    @test A[lininds] == A[X] == 1:10
-    A[lininds] = zeros(Int, 10)
-    @test nnz(A) == 13
-    @test count(!iszero, A) == 3
-    @test A[lininds] == A[X] == zeros(Int, 10)
-    c = Vector(11:20); c[1] = c[3] = 0
-    A[lininds] = c
-    @test nnz(A) == 13
-    @test count(!iszero, A) == 11
-    @test A[lininds] == A[X] == c
-    A = sparse(1I, 5, 5)
-    A[lininds] = c
-    @test nnz(A) == 12
-    @test count(!iszero, A) == 11
-    @test A[lininds] == A[X] == c
-
-    let # prevent assignment to I from overwriting UniformSampling in enclosing scope
-        S = sprand(50, 30, 0.5, x -> round.(Int, rand(x) * 100))
-        I = sprand(Bool, 50, 30, 0.2)
-        FS = Array(S)
-        FI = Array(I)
-        @test sparse(FS[FI]) == S[I] == S[FI]
-        @test sum(S[FI]) + sum(S[.!FI]) == sum(S)
-        @test count(!iszero, I) == count(I)
-
-        sumS1 = sum(S)
-        sumFI = sum(S[FI])
-        nnzS1 = nnz(S)
-        S[FI] .= 0
-        sumS2 = sum(S)
-        cnzS2 = count(!iszero, S)
-        @test sum(S[FI]) == 0
-        @test nnz(S) == nnzS1
-        @test (sum(S) + sumFI) == sumS1
-
-        S[FI] .= 10
-        nnzS3 = nnz(S)
-        @test sum(S) == sumS2 + 10*sum(FI)
-        S[FI] .= 0
-        @test sum(S) == sumS2
-        @test nnz(S) == nnzS3
-        @test count(!iszero, S) == cnzS2
-
-        S[FI] .= [1:sum(FI);]
-        @test sum(S) == sumS2 + sum(1:sum(FI))
-
-        S = sprand(50, 30, 0.5, x -> round.(Int, rand(x) * 100))
-        N = length(S) >> 2
-        I = randperm(N) .* 4
-        J = randperm(N)
-        sumS1 = sum(S)
-        sumS2 = sum(S[I])
-        S[I] .= 0
-        @test sum(S) == (sumS1 - sumS2)
-        S[I] .= J
-        @test sum(S) == (sumS1 - sumS2 + sum(J))
-    end
-
-    # setindex with a Matrix{Bool}
-    Is = fill(false, 10, 10)
-    Is[1, 1] = true
-    Is[10, 10] = true
-    A = sprand(10, 10, 0.2)
-    A[Is] = [0.1, 0.5]
-    @test A[1, 1] == 0.1
-    @test A[10, 10] == 0.5
-    A = spzeros(10, 10)
-    A[Is] = [0.1, 0.5]
-    @test nnz(A) == 2
-end
-
-@testset "dropstored!" begin
-    A = spzeros(Int, 10, 10)
-    # Introduce nonzeros in row and column two
-    A[1,:] .= 1
-    A[:,2] .= 2
-    @test nnz(A) == 19
-
-    # Test argument bounds checking for dropstored!(A, i, j)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 0, 1)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 1, 0)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 1, 11)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 11, 1)
-
-    # Test argument bounds checking for dropstored!(A, I, J)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 0:1, 1:1)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 1:1, 0:1)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 10:11, 1:1)
-    @test_throws BoundsError SparseArrays.dropstored!(A, 1:1, 10:11)
-
-    # Test behavior of dropstored!(A, i, j)
-    # --> Test dropping a single stored entry
-    SparseArrays.dropstored!(A, 1, 2)
-    @test nnz(A) == 18
-    # --> Test dropping a single nonstored entry
-    SparseArrays.dropstored!(A, 2, 1)
-    @test nnz(A) == 18
-
-    # Test behavior of dropstored!(A, I, J) and derivs.
-    # --> Test dropping a single row including stored and nonstored entries
-    SparseArrays.dropstored!(A, 1, :)
-    @test nnz(A) == 9
-    # --> Test dropping a single column including stored and nonstored entries
-    SparseArrays.dropstored!(A, :, 2)
-    @test nnz(A) == 0
-    # --> Introduce nonzeros in rows one and two and columns two and three
-    A[1:2,:] .= 1
-    A[:,2:3] .= 2
-    @test nnz(A) == 36
-    # --> Test dropping multiple rows containing stored and nonstored entries
-    SparseArrays.dropstored!(A, 1:3, :)
-    @test nnz(A) == 14
-    # --> Test dropping multiple columns containing stored and nonstored entries
-    SparseArrays.dropstored!(A, :, 2:4)
-    @test nnz(A) == 0
-    # --> Introduce nonzeros in every other row
-    A[1:2:9, :] .= 1
-    @test nnz(A) == 50
-    # --> Test dropping a block of the matrix towards the upper left
-    SparseArrays.dropstored!(A, 2:5, 2:5)
-    @test nnz(A) == 42
-    # --> Test dropping all elements
-    SparseArrays.dropstored!(A, :)
-    @test nnz(A) == 0
-    A[1:2:9, :] .= 1
-    @test nnz(A) == 50
-    SparseArrays.dropstored!(A, :, :)
-    @test nnz(A) == 0
-end
-
-@testset "issue #7507" begin
-    @test (i7507=sparsevec(Dict{Int64, Float64}(), 10))==spzeros(10)
-end
-
-@testset "issue #7650" begin
-    S = spzeros(3, 3)
-    @test size(reshape(S, 9, 1)) == (9,1)
-end
-
-@testset "sparsevec from matrices" begin
-    X = Matrix(1.0I, 5, 5)
-    M = rand(5,4)
-    C = spzeros(3,3)
-    SX = sparse(X); SM = sparse(M)
-    VX = vec(X); VSX = vec(SX)
-    VM = vec(M); VSM1 = vec(SM); VSM2 = sparsevec(M)
-    VC = vec(C)
-    @test VX == VSX
-    @test VM == VSM1
-    @test VM == VSM2
-    @test size(VC) == (9,)
-    @test nnz(VC) == 0
-    @test nnz(VSX) == 5
-end
-
-@testset "issue #7677" begin
-    A = sprand(5,5,0.5,(n)->rand(Float64,n))
-    ACPY = copy(A)
-    B = reshape(A,25,1)
-    @test A == ACPY
-end
-
-@testset "issue #8225" begin
-    @test_throws ArgumentError sparse([0],[-1],[1.0],2,2)
-end
-
-@testset "issue #8363" begin
-    @test_throws ArgumentError sparsevec(Dict(-1=>1,1=>2))
-end
-
-@testset "issue #8976" begin
-    @test conj.(sparse([1im])) == sparse(conj([1im]))
-    @test conj!(sparse([1im])) == sparse(conj!([1im]))
-end
-
-@testset "issue #9525" begin
-    @test_throws ArgumentError sparse([3], [5], 1.0, 3, 3)
-end
-
-@testset "argmax, argmin, findmax, findmin" begin
-    S = sprand(100,80, 0.5)
-    A = Array(S)
-    @test argmax(S) == argmax(A)
-    @test argmin(S) == argmin(A)
-    @test findmin(S) == findmin(A)
-    @test findmax(S) == findmax(A)
-    for region in [(1,), (2,), (1,2)], m in [findmax, findmin]
-        @test m(S, dims=region) == m(A, dims=region)
-    end
-
-    S = spzeros(10,8)
-    A = Array(S)
-    @test argmax(S) == argmax(A) == CartesianIndex(1,1)
-    @test argmin(S) == argmin(A) == CartesianIndex(1,1)
-
-    A = Matrix{Int}(I, 0, 0)
-    S = sparse(A)
-    iA = try argmax(A); catch; end
-    iS = try argmax(S); catch; end
-    @test iA === iS === nothing
-    iA = try argmin(A); catch; end
-    iS = try argmin(S); catch; end
-    @test iA === iS === nothing
-end
-
-@testset "findmin/findmax/minimum/maximum" begin
-    A = sparse([1.0 5.0 6.0;
-                5.0 2.0 4.0])
-    for (tup, rval, rind) in [((1,), [1.0 2.0 4.0], [CartesianIndex(1,1) CartesianIndex(2,2) CartesianIndex(2,3)]),
-                              ((2,), reshape([1.0,2.0], 2, 1), reshape([CartesianIndex(1,1),CartesianIndex(2,2)], 2, 1)),
-                              ((1,2), fill(1.0,1,1),fill(CartesianIndex(1,1),1,1))]
-        @test findmin(A, tup) == (rval, rind)
-    end
-
-    for (tup, rval, rind) in [((1,), [5.0 5.0 6.0], [CartesianIndex(2,1) CartesianIndex(1,2) CartesianIndex(1,3)]),
-                              ((2,), reshape([6.0,5.0], 2, 1), reshape([CartesianIndex(1,3),CartesianIndex(2,1)], 2, 1)),
-                              ((1,2), fill(6.0,1,1),fill(CartesianIndex(1,3),1,1))]
-        @test findmax(A, tup) == (rval, rind)
-    end
-
-    #issue 23209
-
-    A = sparse([1.0 5.0 6.0;
-                NaN 2.0 4.0])
-    for (tup, rval, rind) in [((1,), [NaN 2.0 4.0], [CartesianIndex(2,1) CartesianIndex(2,2) CartesianIndex(2,3)]),
-                              ((2,), reshape([1.0, NaN], 2, 1), reshape([CartesianIndex(1,1),CartesianIndex(2,1)], 2, 1)),
-                              ((1,2), fill(NaN,1,1),fill(CartesianIndex(2,1),1,1))]
-        @test isequal(findmin(A, tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((1,), [NaN 5.0 6.0], [CartesianIndex(2,1) CartesianIndex(1,2) CartesianIndex(1,3)]),
-                              ((2,), reshape([6.0, NaN], 2, 1), reshape([CartesianIndex(1,3),CartesianIndex(2,1)], 2, 1)),
-                              ((1,2), fill(NaN,1,1),fill(CartesianIndex(2,1),1,1))]
-        @test isequal(findmax(A, tup), (rval, rind))
-    end
-
-    A = sparse([1.0 NaN 6.0;
-                NaN 2.0 4.0])
-    for (tup, rval, rind) in [((1,), [NaN NaN 4.0], [CartesianIndex(2,1) CartesianIndex(1,2) CartesianIndex(2,3)]),
-                              ((2,), reshape([NaN, NaN], 2, 1), reshape([CartesianIndex(1,2),CartesianIndex(2,1)], 2, 1)),
-                              ((1,2), fill(NaN,1,1),fill(CartesianIndex(2,1),1,1))]
-        @test isequal(findmin(A, tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((1,), [NaN NaN 6.0], [CartesianIndex(2,1) CartesianIndex(1,2) CartesianIndex(1,3)]),
-                              ((2,), reshape([NaN, NaN], 2, 1), reshape([CartesianIndex(1,2),CartesianIndex(2,1)], 2, 1)),
-                              ((1,2), fill(NaN,1,1),fill(CartesianIndex(2,1),1,1))]
-        @test isequal(findmax(A, tup), (rval, rind))
-    end
-
-    A = sparse([Inf -Inf Inf  -Inf;
-                Inf  Inf -Inf -Inf])
-    for (tup, rval, rind) in [((1,), [Inf -Inf -Inf -Inf], [CartesianIndex(1,1) CartesianIndex(1,2) CartesianIndex(2,3) CartesianIndex(1,4)]),
-                              ((2,), reshape([-Inf -Inf], 2, 1), reshape([CartesianIndex(1,2),CartesianIndex(2,3)], 2, 1)),
-                              ((1,2), fill(-Inf,1,1),fill(CartesianIndex(1,2),1,1))]
-        @test isequal(findmin(A, tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((1,), [Inf Inf Inf -Inf], [CartesianIndex(1,1) CartesianIndex(2,2) CartesianIndex(1,3) CartesianIndex(1,4)]),
-                              ((2,), reshape([Inf Inf], 2, 1), reshape([CartesianIndex(1,1),CartesianIndex(2,1)], 2, 1)),
-                              ((1,2), fill(Inf,1,1),fill(CartesianIndex(1,1),1,1))]
-        @test isequal(findmax(A, tup), (rval, rind))
-    end
-
-    A = sparse([BigInt(10)])
-    for (tup, rval, rind) in [((2,), [BigInt(10)], [1])]
-        @test isequal(findmin(A, dims=tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((2,), [BigInt(10)], [1])]
-        @test isequal(findmax(A, dims=tup), (rval, rind))
-    end
-
-    A = sparse([BigInt(-10)])
-    for (tup, rval, rind) in [((2,), [BigInt(-10)], [1])]
-        @test isequal(findmin(A, dims=tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((2,), [BigInt(-10)], [1])]
-        @test isequal(findmax(A, dims=tup), (rval, rind))
-    end
-
-    A = sparse([BigInt(10) BigInt(-10)])
-    for (tup, rval, rind) in [((2,), reshape([BigInt(-10)], 1, 1), reshape([CartesianIndex(1,2)], 1, 1))]
-        @test isequal(findmin(A, dims=tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((2,), reshape([BigInt(10)], 1, 1), reshape([CartesianIndex(1,1)], 1, 1))]
-        @test isequal(findmax(A, dims=tup), (rval, rind))
-    end
-
-    # sparse arrays of types without zero(T) are forbidden
-    @test_throws MethodError sparse(["a", "b"])
-end
-
-# Support the case when user defined `zero` and `isless` for non-numerical type
-struct CustomType
-    x::String
-end
-Base.zero(::Type{CustomType}) = CustomType("")
-Base.zero(x::CustomType) = zero(CustomType)
-Base.isless(x::CustomType, y::CustomType) = isless(x.x, y.x)
-@testset "findmin/findmax for non-numerical type" begin
-    A = sparse([CustomType("a"), CustomType("b")])
-
-    for (tup, rval, rind) in [((1,), [CustomType("a")], [1])]
-        @test isequal(findmin(A, dims=tup), (rval, rind))
-    end
-
-    for (tup, rval, rind) in [((1,), [CustomType("b")], [2])]
-        @test isequal(findmax(A, dims=tup), (rval, rind))
-    end
-end
-
-@testset "rotations" begin
-    a = sparse( [1,1,2,3], [1,3,4,1], [1,2,3,4] )
-
-    @test rot180(a,2) == a
-    @test rot180(a,1) == sparse( [3,3,2,1], [4,2,1,4], [1,2,3,4] )
-    @test rotr90(a,1) == sparse( [1,3,4,1], [3,3,2,1], [1,2,3,4] )
-    @test rotl90(a,1) == sparse( [4,2,1,4], [1,1,2,3], [1,2,3,4] )
-    @test rotl90(a,2) == rot180(a)
-    @test rotr90(a,2) == rot180(a)
-    @test rotl90(a,3) == rotr90(a)
-    @test rotr90(a,3) == rotl90(a)
-
-    #ensure we have preserved the correct dimensions!
-
-    a = sparse(1.0I, 3, 5)
-    @test size(rot180(a)) == (3,5)
-    @test size(rotr90(a)) == (5,3)
-    @test size(rotl90(a)) == (5,3)
-end
-
-function test_getindex_algs(A::SparseMatrixCSC{Tv,Ti}, I::AbstractVector, J::AbstractVector, alg::Int) where {Tv,Ti}
-    # Sorted vectors for indexing rows.
-    # Similar to getindex_general but without the transpose trick.
-    (m, n) = size(A)
-    !isempty(I) && ((I[1] < 1) || (I[end] > m)) && BoundsError()
-    if !isempty(J)
-        minj, maxj = extrema(J)
-        ((minj < 1) || (maxj > n)) && BoundsError()
-    end
-
-    (alg == 0) ? SparseArrays.getindex_I_sorted_bsearch_A(A, I, J) :
-    (alg == 1) ? SparseArrays.getindex_I_sorted_bsearch_I(A, I, J) :
-    SparseArrays.getindex_I_sorted_linear(A, I, J)
-end
-
-@testset "test_getindex_algs" begin
-    M=2^14
-    N=2^4
-    Irand = randperm(M)
-    Jrand = randperm(N)
-    SA = [sprand(M, N, d) for d in [1., 0.1, 0.01, 0.001, 0.0001, 0.]]
-    IA = [sort(Irand[1:round(Int,n)]) for n in [M, M*0.1, M*0.01, M*0.001, M*0.0001, 0.]]
-    debug = false
-
-    if debug
-        println("row sizes: $([round(Int,nnz(S)/size(S, 2)) for S in SA])")
-        println("I sizes: $([length(I) for I in IA])")
-        @printf("    S    |    I    | binary S | binary I |  linear  | best\n")
-    end
-
-    J = Jrand
-    for I in IA
-        for S in SA
-            res = Any[1,2,3]
-            times = Float64[0,0,0]
-            best = [typemax(Float64), 0]
-            for searchtype in [0, 1, 2]
-                GC.gc()
-                tres = @timed test_getindex_algs(S, I, J, searchtype)
-                res[searchtype+1] = tres[1]
-                times[searchtype+1] = tres[2]
-                if best[1] > tres[2]
-                    best[1] = tres[2]
-                    best[2] = searchtype
-                end
-            end
-
-            if debug
-                @printf(" %7d | %7d | %4.2e | %4.2e | %4.2e | %s\n", round(Int,nnz(S)/size(S, 2)), length(I), times[1], times[2], times[3],
-                            (0 == best[2]) ? "binary S" : (1 == best[2]) ? "binary I" : "linear")
-            end
-            if res[1] != res[2]
-                println("1 and 2")
-            elseif res[2] != res[3]
-                println("2, 3")
-            end
-            @test res[1] == res[2] == res[3]
-        end
-    end
-
-    M = 2^8
-    N=2^3
-    Irand = randperm(M)
-    Jrand = randperm(N)
-    II = sort([Irand; Irand; Irand])
-    J = [Jrand; Jrand]
-
-    SA = [sprand(M, N, d) for d in [1., 0.1, 0.01, 0.001, 0.0001, 0.]]
-    for S in SA
-        res = Any[1,2,3]
-        for searchtype in [0, 1, 2]
-            res[searchtype+1] = test_getindex_algs(S, II, J, searchtype)
-        end
-
-        @test res[1] == res[2] == res[3]
-    end
-
-    M = 2^14
-    N=2^4
-    II = randperm(M)
-    J = randperm(N)
-    Jsorted = sort(J)
-
-    SA = [sprand(M, N, d) for d in [1., 0.1, 0.01, 0.001, 0.0001, 0.]]
-    IA = [II[1:round(Int,n)] for n in [M, M*0.1, M*0.01, M*0.001, M*0.0001, 0.]]
-    debug = false
-    if debug
-        @printf("         |         |         |        times        |        memory       |\n")
-        @printf("    S    |    I    |    J    |  sorted  | unsorted |  sorted  | unsorted |\n")
-    end
-    for I in IA
-        Isorted = sort(I)
-        for S in SA
-            GC.gc()
-            ru = @timed S[I, J]
-            GC.gc()
-            rs = @timed S[Isorted, Jsorted]
-            if debug
-                @printf(" %7d | %7d | %7d | %4.2e | %4.2e | %4.2e | %4.2e |\n", round(Int,nnz(S)/size(S, 2)), length(I), length(J), rs[2], ru[2], rs[3], ru[3])
-            end
-        end
-    end
-end
-
-@testset "getindex bounds checking" begin
-    S = sprand(10, 10, 0.1)
-    @test_throws BoundsError S[[0,1,2], [1,2]]
-    @test_throws BoundsError S[[1,2], [0,1,2]]
-    @test_throws BoundsError S[[0,2,1], [1,2]]
-    @test_throws BoundsError S[[2,1], [0,1,2]]
-end
-
-@testset "test that sparse / sparsevec constructors work for AbstractMatrix subtypes" begin
-    D = Diagonal(fill(1,10))
-    sm = sparse(D)
-    sv = sparsevec(D)
-
-    @test count(!iszero, sm) == 10
-    @test count(!iszero, sv) == 10
-
-    @test count(!iszero, sparse(Diagonal(Int[]))) == 0
-    @test count(!iszero, sparsevec(Diagonal(Int[]))) == 0
-end
-
-@testset "explicit zeros" begin
-    if Base.USE_GPL_LIBS
-        a = SparseMatrixCSC(2, 2, [1, 3, 5], [1, 2, 1, 2], [1.0, 0.0, 0.0, 1.0])
-        @test lu(a)\[2.0, 3.0] ≈ [2.0, 3.0]
-        @test cholesky(a)\[2.0, 3.0] ≈ [2.0, 3.0]
-    end
-end
-
-@testset "issue #9917" begin
-    @test sparse([]') == reshape(sparse([]), 1, 0)
-    @test Array(sparse([])) == zeros(0)
-    @test_throws BoundsError sparse([])[1]
-    @test_throws BoundsError sparse([])[1] = 1
-    x = sparse(1.0I, 100, 100)
-    @test_throws BoundsError x[-10:10]
-end
-
-@testset "issue #10407" begin
-    @test maximum(spzeros(5, 5)) == 0.0
-    @test minimum(spzeros(5, 5)) == 0.0
-end
-
-@testset "issue #10411" begin
-    for (m,n) in ((2,-2),(-2,2),(-2,-2))
-        @test_throws ArgumentError spzeros(m,n)
-        @test_throws ArgumentError sparse(1.0I, m, n)
-        @test_throws ArgumentError sprand(m,n,0.2)
-    end
-end
-
-@testset "issues #10837 & #32466, sparse constructors from special matrices" begin
-    T = Tridiagonal(randn(4),randn(5),randn(4))
-    S = sparse(T)
-    S2 = SparseMatrixCSC(T)
-    @test Array(T) == Array(S) == Array(S2)
-    @test S == S2
-    T = SymTridiagonal(randn(5),rand(4))
-    S = sparse(T)
-    S2 = SparseMatrixCSC(T)
-    @test Array(T) == Array(S) == Array(S2)
-    @test S == S2
-    B = Bidiagonal(randn(5),randn(4),:U)
-    S = sparse(B)
-    S2 = SparseMatrixCSC(B)
-    @test Array(B) == Array(S) == Array(S2)
-    @test S == S2
-    B = Bidiagonal(randn(5),randn(4),:L)
-    S = sparse(B)
-    S2 = SparseMatrixCSC(B)
-    @test Array(B) == Array(S) == Array(S2)
-    @test S == S2
-    D = Diagonal(randn(5))
-    S = sparse(D)
-    S2 = SparseMatrixCSC(D)
-    @test Array(D) == Array(S) == Array(S2)
-    @test S == S2
-end
-
-@testset "error conditions for reshape, and dropdims" begin
-    local A = sprand(Bool, 5, 5, 0.2)
-    @test_throws DimensionMismatch reshape(A,(20, 2))
-    @test_throws ArgumentError dropdims(A,dims=(1, 1))
-end
-
-@testset "float" begin
-    local A
-    A = sprand(Bool, 5, 5, 0.0)
-    @test eltype(float(A)) == Float64  # issue #11658
-    A = sprand(Bool, 5, 5, 0.2)
-    @test float(A) == float(Array(A))
-end
-
-@testset "complex" begin
-    A = sprand(Bool, 5, 5, 0.0)
-    @test eltype(complex(A)) == Complex{Bool}
-    A = sprand(Bool, 5, 5, 0.2)
-    @test complex(A) == complex(Array(A))
-end
-
-@testset "sparsevec" begin
-    local A = sparse(fill(1, 5, 5))
-    @test sparsevec(A) == fill(1, 25)
-    @test sparsevec([1:5;], 1) == fill(1, 5)
-    @test_throws ArgumentError sparsevec([1:5;], [1:4;])
-end
-
-@testset "sparse" begin
-    local A = sparse(fill(1, 5, 5))
-    @test sparse(A) == A
-    @test sparse([1:5;], [1:5;], 1) == sparse(1.0I, 5, 5)
-end
-
-@testset "one(A::SparseMatrixCSC)" begin
-    @test_throws DimensionMismatch one(sparse([1 1 1; 1 1 1]))
-    @test one(sparse([1 1; 1 1]))::SparseMatrixCSC == [1 0; 0 1]
-end
-
-@testset "istriu/istril" begin
-    local A = fill(1, 5, 5)
-    @test istriu(sparse(triu(A)))
-    @test !istriu(sparse(A))
-    @test istril(sparse(tril(A)))
-    @test !istril(sparse(A))
-end
-
-@testset "droptol" begin
-    local A = guardseed(1234321) do
-        triu(sprand(10, 10, 0.2))
-    end
-    @test getcolptr(SparseArrays.droptol!(A, 0.01)) == [1, 2, 2, 3, 4, 5, 5, 6, 8, 10, 13]
-    @test isequal(SparseArrays.droptol!(sparse([1], [1], [1]), 1), SparseMatrixCSC(1, 1, Int[1, 1], Int[], Int[]))
-end
-
-@testset "dropzeros[!]" begin
-    smalldim = 5
-    largedim = 10
-    nzprob = 0.4
-    targetnumposzeros = 5
-    targetnumnegzeros = 5
-    for (m, n) in ((largedim, largedim), (smalldim, largedim), (largedim, smalldim))
-        local A = sprand(m, n, nzprob)
-        struczerosA = findall(x -> x == 0, A)
-        poszerosinds = unique(rand(struczerosA, targetnumposzeros))
-        negzerosinds = unique(rand(struczerosA, targetnumnegzeros))
-        Aposzeros = copy(A)
-        Aposzeros[poszerosinds] .= 2
-        Anegzeros = copy(A)
-        Anegzeros[negzerosinds] .= -2
-        Abothsigns = copy(Aposzeros)
-        Abothsigns[negzerosinds] .= -2
-        map!(x -> x == 2 ? 0.0 : x, nonzeros(Aposzeros), nonzeros(Aposzeros))
-        map!(x -> x == -2 ? -0.0 : x, nonzeros(Anegzeros), nonzeros(Anegzeros))
-        map!(x -> x == 2 ? 0.0 : x == -2 ? -0.0 : x, nonzeros(Abothsigns), nonzeros(Abothsigns))
-        for Awithzeros in (Aposzeros, Anegzeros, Abothsigns)
-            # Basic functionality / dropzeros!
-            @test dropzeros!(copy(Awithzeros)) == A
-            # Basic functionality / dropzeros
-            @test dropzeros(Awithzeros) == A
-            # Check trimming works as expected
-            @test length(nonzeros(dropzeros!(copy(Awithzeros)))) == length(nonzeros(A))
-            @test length(rowvals(dropzeros!(copy(Awithzeros)))) == length(rowvals(A))
-        end
-    end
-    # original lone dropzeros test
-    local A = sparse([1 2 3; 4 5 6; 7 8 9])
-    nonzeros(A)[2] = nonzeros(A)[6] = nonzeros(A)[7] = 0
-    @test getcolptr(dropzeros!(A)) == [1, 3, 5, 7]
-    # test for issue #5169, modified for new behavior following #15242/#14798
-    @test nnz(sparse([1, 1], [1, 2], [0.0, -0.0])) == 2
-    @test nnz(dropzeros!(sparse([1, 1], [1, 2], [0.0, -0.0]))) == 0
-    # test for issue #5437, modified for new behavior following #15242/#14798
-    @test nnz(sparse([1, 2, 3], [1, 2, 3], [0.0, 1.0, 2.0])) == 3
-    @test nnz(dropzeros!(sparse([1, 2, 3],[1, 2, 3],[0.0, 1.0, 2.0]))) == 2
-end
-
-@testset "trace" begin
-    @test_throws DimensionMismatch tr(spzeros(5,6))
-    @test tr(sparse(1.0I, 5, 5)) == 5
-end
-
-@testset "spdiagm" begin
-    x = fill(1, 2)
-    @test spdiagm(0 => x, -1 => x) == [1 0 0; 1 1 0; 0 1 0]
-    @test spdiagm(0 => x,  1 => x) == [1 1 0; 0 1 1; 0 0 0]
-
-    for (x, y) in ((rand(5), rand(4)),(sparse(rand(5)), sparse(rand(4))))
-        @test spdiagm(-1 => x)::SparseMatrixCSC         == diagm(-1 => x)
-        @test spdiagm( 0 => x)::SparseMatrixCSC         == diagm( 0 => x) == sparse(Diagonal(x))
-        @test spdiagm(-1 => x)::SparseMatrixCSC         == diagm(-1 => x)
-        @test spdiagm(0 => x, -1 => y)::SparseMatrixCSC == diagm(0 => x, -1 => y)
-        @test spdiagm(0 => x,  1 => y)::SparseMatrixCSC == diagm(0 => x,  1 => y)
-    end
-    # promotion
-    @test spdiagm(0 => [1,2], 1 => [3.5], -1 => [4+5im]) == [1 3.5; 4+5im 2]
-
-    # non-square:
-    for m=1:4, n=2:4
-        if m < 2 || n < 3
-            @test_throws DimensionMismatch spdiagm(m,n, 0 => x,  1 => x)
-        else
-            M = zeros(m,n)
-            M[1:2,1:3] = [1 1 0; 0 1 1]
-            @test spdiagm(m,n, 0 => x,  1 => x) == M
-        end
-    end
-end
-
-@testset "diag" begin
-    for T in (Float64, ComplexF64)
-        S1 = sprand(T,  5,  5, 0.5)
-        S2 = sprand(T, 10,  5, 0.5)
-        S3 = sprand(T,  5, 10, 0.5)
-        for S in (S1, S2, S3)
-            local A = Matrix(S)
-            @test diag(S)::SparseVector{T,Int} == diag(A)
-            for k in -size(S,1):size(S,2)
-                @test diag(S, k)::SparseVector{T,Int} == diag(A, k)
-            end
-            @test_throws ArgumentError diag(S, -size(S,1)-1)
-            @test_throws ArgumentError diag(S,  size(S,2)+1)
-        end
-    end
-    # test that stored zeros are still stored zeros in the diagonal
-    S = sparse([1,3],[1,3],[0.0,0.0]); V = diag(S)
-    @test nonzeroinds(V) == [1,3]
-    @test nonzeros(V) == [0.0,0.0]
-end
-
-@testset "expandptr" begin
-    local A = sparse(1.0I, 5, 5)
-    @test SparseArrays.expandptr(getcolptr(A)) == 1:5
-    A[1,2] = 1
-    @test SparseArrays.expandptr(getcolptr(A)) == [1; 2; 2; 3; 4; 5]
-    @test_throws ArgumentError SparseArrays.expandptr([2; 3])
-end
-
-@testset "triu/tril" begin
-    n = 5
-    local A = sprand(n, n, 0.2)
-    AF = Array(A)
-    @test Array(triu(A,1)) == triu(AF,1)
-    @test Array(tril(A,1)) == tril(AF,1)
-    @test Array(triu!(copy(A), 2)) == triu(AF,2)
-    @test Array(tril!(copy(A), 2)) == tril(AF,2)
-    @test tril(A, -n - 2) == zero(A)
-    @test tril(A, n) == A
-    @test triu(A, -n) == A
-    @test triu(A, n + 2) == zero(A)
-
-    # fkeep trim option
-    @test isequal(length(rowvals(tril!(sparse([1,2,3], [1,2,3], [1,2,3], 3, 4), -1))), 0)
-end
-
-@testset "norm" begin
-    local A
-    A = sparse(Int[],Int[],Float64[],0,0)
-    @test norm(A) == zero(eltype(A))
-    A = sparse([1.0])
-    @test norm(A) == 1.0
-    @test_throws ArgumentError opnorm(sprand(5,5,0.2),3)
-    @test_throws ArgumentError opnorm(sprand(5,5,0.2),2)
-end
-
-@testset "ishermitian/issymmetric" begin
-    local A
-    # real matrices
-    A = sparse(1.0I, 5, 5)
-    @test ishermitian(A) == true
-    @test issymmetric(A) == true
-    A[1,3] = 1.0
-    @test ishermitian(A) == false
-    @test issymmetric(A) == false
-    A[3,1] = 1.0
-    @test ishermitian(A) == true
-    @test issymmetric(A) == true
-
-    # complex matrices
-    A = sparse((1.0 + 1.0im)I, 5, 5)
-    @test ishermitian(A) == false
-    @test issymmetric(A) == true
-    A[1,4] = 1.0 + im
-    @test ishermitian(A) == false
-    @test issymmetric(A) == false
-
-    A = sparse(ComplexF64(1)I, 5, 5)
-    A[3,2] = 1.0 + im
-    @test ishermitian(A) == false
-    @test issymmetric(A) == false
-    A[2,3] = 1.0 - im
-    @test ishermitian(A) == true
-    @test issymmetric(A) == false
-
-    A = sparse(zeros(5,5))
-    @test ishermitian(A) == true
-    @test issymmetric(A) == true
-
-    # explicit zeros
-    A = sparse(ComplexF64(1)I, 5, 5)
-    A[3,1] = 2
-    nonzeros(A)[2] = 0.0
-    @test ishermitian(A) == true
-    @test issymmetric(A) == true
-
-    # 15504
-    m = n = 5
-    colptr = [1, 5, 9, 13, 13, 17]
-    rowval = [1, 2, 3, 5, 1, 2, 3, 5, 1, 2, 3, 5, 1, 2, 3, 5]
-    nzval = [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 1.0]
-    A = SparseMatrixCSC(m, n, colptr, rowval, nzval)
-    @test issymmetric(A) == true
-    nonzeros(A)[end - 3]  = 2.0
-    @test issymmetric(A) == false
-
-    # 16521
-    @test issymmetric(sparse([0 0; 1 0])) == false
-    @test issymmetric(sparse([0 1; 0 0])) == false
-    @test issymmetric(sparse([0 0; 1 1])) == false
-    @test issymmetric(sparse([1 0; 1 0])) == false
-    @test issymmetric(sparse([0 1; 1 0])) == true
-    @test issymmetric(sparse([1 1; 1 0])) == true
-end
-
-@testset "equality ==" begin
-    A1 = sparse(1.0I, 10, 10)
-    A2 = sparse(1.0I, 10, 10)
-    nonzeros(A1)[end]=0
-    @test A1!=A2
-    nonzeros(A1)[end]=1
-    @test A1==A2
-    A1[1:4,end] .= 1
-    @test A1!=A2
-    nonzeros(A1)[end-4:end-1].=0
-    @test A1==A2
-    A2[1:4,end-1] .= 1
-    @test A1!=A2
-    nonzeros(A2)[end-5:end-2].=0
-    @test A1==A2
-    A2[2:3,1] .= 1
-    @test A1!=A2
-    nonzeros(A2)[2:3].=0
-    @test A1==A2
-    A1[2:5,1] .= 1
-    @test A1!=A2
-    nonzeros(A1)[2:5].=0
-    @test A1==A2
-    @test sparse([1,1,0])!=sparse([0,1,1])
-end
-
-@testset "UniformScaling" begin
-    local A = sprandn(10, 10, 0.5)
-    @test A + I == Array(A) + I
-    @test I + A == I + Array(A)
-    @test A - I == Array(A) - I
-    @test I - A == I - Array(A)
-end
-
-@testset "issue #12177, error path if triplet vectors are not all the same length" begin
-    @test_throws ArgumentError sparse([1,2,3], [1,2], [1,2,3], 3, 3)
-    @test_throws ArgumentError sparse([1,2,3], [1,2,3], [1,2], 3, 3)
-end
-
-@testset "issue #12118: sparse matrices are closed under +, -, min, max" begin
-    A12118 = sparse([1,2,3,4,5], [1,2,3,4,5], [1,2,3,4,5])
-    B12118 = sparse([1,2,4,5],   [1,2,3,5],   [2,1,-1,-2])
-
-    @test A12118 + B12118 == sparse([1,2,3,4,4,5], [1,2,3,3,4,5], [3,3,3,-1,4,3])
-    @test typeof(A12118 + B12118) == SparseMatrixCSC{Int,Int}
-
-    @test A12118 - B12118 == sparse([1,2,3,4,4,5], [1,2,3,3,4,5], [-1,1,3,1,4,7])
-    @test typeof(A12118 - B12118) == SparseMatrixCSC{Int,Int}
-
-    @test max.(A12118, B12118) == sparse([1,2,3,4,5], [1,2,3,4,5], [2,2,3,4,5])
-    @test typeof(max.(A12118, B12118)) == SparseMatrixCSC{Int,Int}
-
-    @test min.(A12118, B12118) == sparse([1,2,4,5], [1,2,3,5], [1,1,-1,-2])
-    @test typeof(min.(A12118, B12118)) == SparseMatrixCSC{Int,Int}
-end
-
-@testset "unary minus for SparseMatrixCSC{Bool}" begin
-    A = sparse([1,3], [1,3], [true, true])
-    B = sparse([1,3], [1,3], [-1, -1])
-    @test -A == B
-end
-
-@testset "sparse matrix norms" begin
-    Ac = sprandn(10,10,.1) + im* sprandn(10,10,.1)
-    Ar = sprandn(10,10,.1)
-    Ai = ceil.(Int,Ar*100)
-    @test opnorm(Ac,1) ≈ opnorm(Array(Ac),1)
-    @test opnorm(Ac,Inf) ≈ opnorm(Array(Ac),Inf)
-    @test norm(Ac) ≈ norm(Array(Ac))
-    @test opnorm(Ar,1) ≈ opnorm(Array(Ar),1)
-    @test opnorm(Ar,Inf) ≈ opnorm(Array(Ar),Inf)
-    @test norm(Ar) ≈ norm(Array(Ar))
-    @test opnorm(Ai,1) ≈ opnorm(Array(Ai),1)
-    @test opnorm(Ai,Inf) ≈ opnorm(Array(Ai),Inf)
-    @test norm(Ai) ≈ norm(Array(Ai))
-    Ai = trunc.(Int, Ar*100)
-    @test opnorm(Ai,1) ≈ opnorm(Array(Ai),1)
-    @test opnorm(Ai,Inf) ≈ opnorm(Array(Ai),Inf)
-    @test norm(Ai) ≈ norm(Array(Ai))
-    Ai = round.(Int, Ar*100)
-    @test opnorm(Ai,1) ≈ opnorm(Array(Ai),1)
-    @test opnorm(Ai,Inf) ≈ opnorm(Array(Ai),Inf)
-    @test norm(Ai) ≈ norm(Array(Ai))
-    # make certain entries in nzval beyond
-    # the range specified in colptr do not
-    # impact norm of a sparse matrix
-    foo = sparse(1.0I, 4, 4)
-    resize!(nonzeros(foo), 5)
-    setindex!(nonzeros(foo), NaN, 5)
-    @test norm(foo) == 2.0
-
-    # Test (m x 1) sparse matrix
-    colM = sprandn(10, 1, 0.6)
-    @test opnorm(colM, 1) ≈ opnorm(Array(colM), 1)
-    @test opnorm(colM) ≈ opnorm(Array(colM))
-    @test opnorm(colM, Inf) ≈ opnorm(Array(colM), Inf)
-    @test_throws ArgumentError opnorm(colM, 3)
-
-    # Test (1 x n) sparse matrix
-    rowM = sprandn(1, 10, 0.6)
-    @test opnorm(rowM, 1) ≈ opnorm(Array(rowM), 1)
-    @test opnorm(rowM) ≈ opnorm(Array(rowM))
-    @test opnorm(rowM, Inf) ≈ opnorm(Array(rowM), Inf)
-    @test_throws ArgumentError opnorm(rowM, 3)
-end
-
-@testset "sparse matrix cond" begin
-    local A = sparse(reshape([1.0], 1, 1))
-    Ac = sprandn(20, 20,.5) + im*sprandn(20, 20,.5)
-    Ar = sprandn(20, 20,.5) + eps()*I
-    @test cond(A, 1) == 1.0
-    # For a discussion of the tolerance, see #14778
-    if Base.USE_GPL_LIBS
-        @test 0.99 <= cond(Ar, 1) \ opnorm(Ar, 1) * opnorm(inv(Array(Ar)), 1) < 3
-        @test 0.99 <= cond(Ac, 1) \ opnorm(Ac, 1) * opnorm(inv(Array(Ac)), 1) < 3
-        @test 0.99 <= cond(Ar, Inf) \ opnorm(Ar, Inf) * opnorm(inv(Array(Ar)), Inf) < 3
-        @test 0.99 <= cond(Ac, Inf) \ opnorm(Ac, Inf) * opnorm(inv(Array(Ac)), Inf) < 3
-    end
-    @test_throws ArgumentError cond(A,2)
-    @test_throws ArgumentError cond(A,3)
-    Arect = spzeros(10, 6)
-    @test_throws DimensionMismatch cond(Arect, 1)
-    @test_throws ArgumentError cond(Arect,2)
-    @test_throws DimensionMismatch cond(Arect, Inf)
-end
-
-@testset "sparse matrix opnormestinv" begin
-    Random.seed!(1234)
-    Ac = sprandn(20,20,.5) + im* sprandn(20,20,.5)
-    Aci = ceil.(Int64, 100*sprand(20,20,.5)) + im*ceil.(Int64, sprand(20,20,.5))
-    Ar = sprandn(20,20,.5)
-    Ari = ceil.(Int64, 100*Ar)
-    if Base.USE_GPL_LIBS
-        # NOTE: opnormestinv is probabilistic, so requires a fixed seed (set above in Random.seed!(1234))
-        @test SparseArrays.opnormestinv(Ac,3) ≈ opnorm(inv(Array(Ac)),1) atol=1e-4
-        @test SparseArrays.opnormestinv(Aci,3) ≈ opnorm(inv(Array(Aci)),1) atol=1e-4
-        @test SparseArrays.opnormestinv(Ar) ≈ opnorm(inv(Array(Ar)),1) atol=1e-4
-        @test_throws ArgumentError SparseArrays.opnormestinv(Ac,0)
-        @test_throws ArgumentError SparseArrays.opnormestinv(Ac,21)
-    end
-    @test_throws DimensionMismatch SparseArrays.opnormestinv(sprand(3,5,.9))
-end
-
-@testset "issue #13008" begin
-    @test_throws ArgumentError sparse(Vector(1:100), Vector(1:100), fill(5,100), 5, 5)
-    @test_throws ArgumentError sparse(Int[], Vector(1:5), Vector(1:5))
-end
-
-@testset "issue #13024" begin
-    A13024 = sparse([1,2,3,4,5], [1,2,3,4,5], fill(true,5))
-    B13024 = sparse([1,2,4,5],   [1,2,3,5],   fill(true,4))
-
-    @test broadcast(&, A13024, B13024) == sparse([1,2,5], [1,2,5], fill(true,3))
-    @test typeof(broadcast(&, A13024, B13024)) == SparseMatrixCSC{Bool,Int}
-
-    @test broadcast(|, A13024, B13024) == sparse([1,2,3,4,4,5], [1,2,3,3,4,5], fill(true,6))
-    @test typeof(broadcast(|, A13024, B13024)) == SparseMatrixCSC{Bool,Int}
-
-    @test broadcast(⊻, A13024, B13024) == sparse([3,4,4], [3,3,4], fill(true,3), 5, 5)
-    @test typeof(broadcast(⊻, A13024, B13024)) == SparseMatrixCSC{Bool,Int}
-
-    @test broadcast(max, A13024, B13024) == sparse([1,2,3,4,4,5], [1,2,3,3,4,5], fill(true,6))
-    @test typeof(broadcast(max, A13024, B13024)) == SparseMatrixCSC{Bool,Int}
-
-    @test broadcast(min, A13024, B13024) == sparse([1,2,5], [1,2,5], fill(true,3))
-    @test typeof(broadcast(min, A13024, B13024)) == SparseMatrixCSC{Bool,Int}
-
-    for op in (+, -)
-        @test op(A13024, B13024) == op(Array(A13024), Array(B13024))
-    end
-    for op in (max, min, &, |, xor)
-        @test op.(A13024, B13024) == op.(Array(A13024), Array(B13024))
-    end
-end
-
-@testset "fillstored!" begin
-    @test LinearAlgebra.fillstored!(sparse(2.0I, 5, 5), 1) == Matrix(I, 5, 5)
-end
-
-@testset "factorization" begin
-    Random.seed!(123)
-    local A
-    A = sparse(Diagonal(rand(5))) + sprandn(5, 5, 0.2) + im*sprandn(5, 5, 0.2)
-    A = A + copy(A')
-    @test !Base.USE_GPL_LIBS || abs(det(factorize(Hermitian(A)))) ≈ abs(det(factorize(Array(A))))
-    A = sparse(Diagonal(rand(5))) + sprandn(5, 5, 0.2) + im*sprandn(5, 5, 0.2)
-    A = A*A'
-    @test !Base.USE_GPL_LIBS || abs(det(factorize(Hermitian(A)))) ≈ abs(det(factorize(Array(A))))
-    A = sparse(Diagonal(rand(5))) + sprandn(5, 5, 0.2)
-    A = A + copy(transpose(A))
-    @test !Base.USE_GPL_LIBS || abs(det(factorize(Symmetric(A)))) ≈ abs(det(factorize(Array(A))))
-    A = sparse(Diagonal(rand(5))) + sprandn(5, 5, 0.2)
-    A = A*transpose(A)
-    @test !Base.USE_GPL_LIBS || abs(det(factorize(Symmetric(A)))) ≈ abs(det(factorize(Array(A))))
-    @test factorize(triu(A)) == triu(A)
-    @test isa(factorize(triu(A)), UpperTriangular{Float64, SparseMatrixCSC{Float64, Int}})
-    @test factorize(tril(A)) == tril(A)
-    @test isa(factorize(tril(A)), LowerTriangular{Float64, SparseMatrixCSC{Float64, Int}})
-    C, b = A[:, 1:4], fill(1., size(A, 1))
-    @test !Base.USE_GPL_LIBS || factorize(C)\b ≈ Array(C)\b
-    @test_throws ErrorException eigen(A)
-    @test_throws ErrorException inv(A)
-end
-
-@testset "issue #13792, use sparse triangular solvers for sparse triangular solves" begin
-    local A, n, x
-    n = 100
-    A, b = sprandn(n, n, 0.5) + sqrt(n)*I, fill(1., n)
-    @test LowerTriangular(A)\(LowerTriangular(A)*b) ≈ b
-    @test UpperTriangular(A)\(UpperTriangular(A)*b) ≈ b
-    A[2,2] = 0
-    dropzeros!(A)
-    @test_throws LinearAlgebra.SingularException LowerTriangular(A)\b
-    @test_throws LinearAlgebra.SingularException UpperTriangular(A)\b
-end
-
-@testset "issue described in https://groups.google.com/forum/#!topic/julia-dev/QT7qpIpgOaA" begin
-    @test sparse([1,1], [1,1], [true, true]) == sparse([1,1], [1,1], [true, true], 1, 1) == fill(true, 1, 1)
-    @test sparsevec([1,1], [true, true]) == sparsevec([1,1], [true, true], 1) == fill(true, 1)
-end
-
-@testset "issparse for specialized matrix types" begin
-    m = sprand(10, 10, 0.1)
-    @test issparse(Symmetric(m))
-    @test issparse(Hermitian(m))
-    @test issparse(LowerTriangular(m))
-    @test issparse(LinearAlgebra.UnitLowerTriangular(m))
-    @test issparse(UpperTriangular(m))
-    @test issparse(LinearAlgebra.UnitUpperTriangular(m))
-    @test issparse(adjoint(m))
-    @test issparse(transpose(m))
-    @test issparse(Symmetric(Array(m))) == false
-    @test issparse(Hermitian(Array(m))) == false
-    @test issparse(LowerTriangular(Array(m))) == false
-    @test issparse(LinearAlgebra.UnitLowerTriangular(Array(m))) == false
-    @test issparse(UpperTriangular(Array(m))) == false
-    @test issparse(LinearAlgebra.UnitUpperTriangular(Array(m))) == false
-end
-
-@testset "issparse for sparse vectors #34253" begin
-    v = sprand(10, 0.5)
-    @test issparse(v)
-    @test issparse(v')
-    @test issparse(transpose(v))
-end
-
-@testset "test created type of sprand{T}(::Type{T}, m::Integer, n::Integer, density::AbstractFloat)" begin
-    m = sprand(Float32, 10, 10, 0.1)
-    @test eltype(m) == Float32
-    m = sprand(Float64, 10, 10, 0.1)
-    @test eltype(m) == Float64
-    m = sprand(Int32, 10, 10, 0.1)
-    @test eltype(m) == Int32
-end
-
-@testset "issue #16073" begin
-    @inferred sprand(1, 1, 1.0)
-    @inferred sprand(1, 1, 1.0, rand, Float64)
-    @inferred sprand(1, 1, 1.0, x -> round.(Int, rand(x) * 100))
-end
-
-# Test that concatenations of combinations of sparse matrices with sparse matrices or dense
-# matrices/vectors yield sparse arrays
-@testset "sparse and dense concatenations" begin
-    N = 4
-    densevec = fill(1., N)
-    densemat = diagm(0 => densevec)
-    spmat = spdiagm(0 => densevec)
-    # Test that concatenations of pairs of sparse matrices yield sparse arrays
-    @test issparse(vcat(spmat, spmat))
-    @test issparse(hcat(spmat, spmat))
-    @test issparse(hvcat((2,), spmat, spmat))
-    @test issparse(cat(spmat, spmat; dims=(1,2)))
-    # Test that concatenations of a sparse matrice with a dense matrix/vector yield sparse arrays
-    @test issparse(vcat(spmat, densemat))
-    @test issparse(vcat(densemat, spmat))
-    for densearg in (densevec, densemat)
-        @test issparse(hcat(spmat, densearg))
-        @test issparse(hcat(densearg, spmat))
-        @test issparse(hvcat((2,), spmat, densearg))
-        @test issparse(hvcat((2,), densearg, spmat))
-        @test issparse(cat(spmat, densearg; dims=(1,2)))
-        @test issparse(cat(densearg, spmat; dims=(1,2)))
-    end
-end
-
-@testset "issue #14816" begin
-    m = 5
-    intmat = fill(1, m, m)
-    ltintmat = LowerTriangular(rand(1:5, m, m))
-    @test \(transpose(ltintmat), sparse(intmat)) ≈ \(transpose(ltintmat), intmat)
-end
-
-# Test temporary fix for issue #16548 in PR #16979. Somewhat brittle. Expect to remove with `\` revisions.
-@testset "issue #16548" begin
-    ms = methods(\, (SparseMatrixCSC, AbstractVecOrMat)).ms
-    @test all(m -> m.module == SparseArrays, ms)
-end
-
-@testset "row indexing a SparseMatrixCSC with non-Int integer type" begin
-    local A = sparse(UInt32[1,2,3], UInt32[1,2,3], [1.0,2.0,3.0])
-    @test A[1,1:3] == A[1,:] == [1,0,0]
-end
-
-# Check that `broadcast` methods specialized for unary operations over `SparseMatrixCSC`s
-# are called. (Issue #18705.) EDIT: #19239 unified broadcast over a single sparse matrix,
-# eliminating the former operation classes.
-@testset "issue #18705" begin
-    S = sparse(Diagonal(1.0:5.0))
-    @test isa(sin.(S), SparseMatrixCSC)
-end
-
-@testset "issue #19225" begin
-    X = sparse([1 -1; -1 1])
-    for T in (Symmetric, Hermitian)
-        Y = T(copy(X))
-        _Y = similar(Y)
-        copyto!(_Y, Y)
-        @test _Y == Y
-
-        W = T(copy(X), :L)
-        copyto!(W, Y)
-        @test W.data == Y.data
-        @test W.uplo != Y.uplo
-
-        W[1,1] = 4
-        @test W == T(sparse([4 -1; -1 1]))
-        @test_throws ArgumentError (W[1,2] = 2)
-
-        @test Y + I == T(sparse([2 -1; -1 2]))
-        @test Y - I == T(sparse([0 -1; -1 0]))
-        @test Y * I == Y
-
-        @test Y .+ 1 == T(sparse([2 0; 0 2]))
-        @test Y .- 1 == T(sparse([0 -2; -2 0]))
-        @test Y * 2 == T(sparse([2 -2; -2 2]))
-        @test Y / 1 == Y
-    end
-end
-
-@testset "issue #19304" begin
-    @inferred hcat(sparse(rand(2,1)), I)
-    @inferred hcat(sparse(rand(2,1)), 1.0I)
-    @inferred hcat(sparse(rand(2,1)), Matrix(I, 2, 2))
-    @inferred hcat(sparse(rand(2,1)), Matrix(1.0I, 2, 2))
-end
-
-# Check that `broadcast` methods specialized for unary operations over
-# `SparseMatrixCSC`s determine a reasonable return type.
-@testset "issue #18974" begin
-    S = sparse(Diagonal(Int64(1):Int64(4)))
-    @test eltype(sin.(S)) == Float64
-end
-
-# Check calling of unary minus method specialized for SparseMatrixCSCs
-@testset "issue #19503" begin
-    @test which(-, (SparseMatrixCSC,)).module == SparseArrays
-end
-
-@testset "issue #14398" begin
-    @test collect(view(sparse(I, 10, 10), 1:5, 1:5)') ≈ Matrix(I, 5, 5)
-end
-
-@testset "dropstored issue #20513" begin
-    x = sparse(rand(3,3))
-    SparseArrays.dropstored!(x, 1, 1)
-    @test x[1, 1] == 0.0
-    @test getcolptr(x) == [1, 3, 6, 9]
-    SparseArrays.dropstored!(x, 2, 1)
-    @test getcolptr(x) == [1, 2, 5, 8]
-    @test x[2, 1] == 0.0
-    SparseArrays.dropstored!(x, 2, 2)
-    @test getcolptr(x) == [1, 2, 4, 7]
-    @test x[2, 2] == 0.0
-    SparseArrays.dropstored!(x, 2, 3)
-    @test getcolptr(x) == [1, 2, 4, 6]
-    @test x[2, 3] == 0.0
-end
-
-@testset "setindex issue #20657" begin
-    local A = spzeros(3, 3)
-    I = [1, 1, 1]; J = [1, 1, 1]
-    A[I, 1] .= 1
-    @test nnz(A) == 1
-    A[1, J] .= 1
-    @test nnz(A) == 1
-    A[I, J] .= 1
-    @test nnz(A) == 1
-end
-
-@testset "setindex with vector eltype (#29034)" begin
-    A = sparse([1], [1], [Vector{Float64}(undef, 3)], 3, 3)
-    A[1,1] = [1.0, 2.0, 3.0]
-    @test A[1,1] == [1.0, 2.0, 3.0]
-end
-
-@testset "isstored" begin
-    m = 5
-    n = 4
-    I = [1, 2, 5, 3]
-    J = [2, 3, 4, 2]
-    A = sparse(I, J, [1, 2, 3, 4], m, n)
-    stored_indices = [CartesianIndex(i, j) for (i, j) in zip(I, J)]
-    unstored_indices = [c for c in CartesianIndices((m, n)) if !(c in stored_indices)]
-    for c in stored_indices
-        @test Base.isstored(A, c[1], c[2]) == true
-    end
-    for c in unstored_indices
-        @test Base.isstored(A, c[1], c[2]) == false
-    end
-end
-
-@testset "show" begin
-    io = IOBuffer()
-    show(io, MIME"text/plain"(), spzeros(Float64, Int64, 0, 0))
-    @test String(take!(io)) == "0×0 SparseArrays.SparseMatrixCSC{Float64, Int64} with 0 stored entries"
-    show(io, MIME"text/plain"(), sparse(Int64[1], Int64[1], [1.0]))
-    @test String(take!(io)) == "1×1 SparseArrays.SparseMatrixCSC{Float64, Int64} with 1 stored entry:\n 1.0"
-    show(io, MIME"text/plain"(), spzeros(Float32, Int64, 2, 2))
-    @test String(take!(io)) == "2×2 SparseArrays.SparseMatrixCSC{Float32, Int64} with 0 stored entries:\n  ⋅    ⋅ \n  ⋅    ⋅ "
-
-    A = sparse(Int64[1, 1], Int64[1, 2], [1.0, 2.0])
-    show(io, MIME"text/plain"(), A)
-    @test String(take!(io)) == "1×2 SparseArrays.SparseMatrixCSC{Float64, Int64} with 2 stored entries:\n 1.0  2.0"
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "⠉"
-
-    # every 1-dot braille pattern
-    for (i, b) in enumerate(split("⠁⠂⠄⡀⠈⠐⠠⢀", ""))
-        A = spzeros(Int64, Int64, 4, 2)
-        A[i] = 1
-        _show_with_braille_patterns(convert(IOContext, io), A)
-        @test String(take!(io)) == b
-    end
-
-    # empty braille pattern Char(10240)
-    A = spzeros(Int64, Int64, 4, 2)
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "" * Char(10240)
-
-    A = sparse(Int64[1, 2, 4, 2, 3], Int64[1, 1, 1, 2, 2], Int64[1, 1, 1, 1, 1], 4, 2)
-    show(io, MIME"text/plain"(), A)
-    @test String(take!(io)) == "4×2 SparseArrays.SparseMatrixCSC{Int64, Int64} with 5 stored entries:\n 1  ⋅\n 1  1\n ⋅  1\n 1  ⋅"
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "⡳"
-
-    A = sparse(Int64[1, 3, 2, 4], Int64[1, 1, 2, 2], Int64[1, 1, 1, 1], 7, 3)
-    show(io, MIME"text/plain"(), A)
-    @test String(take!(io)) == "7×3 SparseArrays.SparseMatrixCSC{Int64, Int64} with 4 stored entries:\n 1  ⋅  ⋅\n ⋅  1  ⋅\n 1  ⋅  ⋅\n ⋅  1  ⋅\n ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅\n ⋅  ⋅  ⋅"
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    @test String(take!(io)) == "⢕" * Char(10240) * "\n" * Char(10240)^2
-
-    A = sparse(Int64[1:10;], Int64[1:10;], fill(Float64(1), 10))
-    _show_with_braille_patterns(convert(IOContext, io), A)
-    brailleString = "⠑⢄" * Char(10240)^3 * "\n" * Char(10240)^2 * "⠑⢄" * Char(10240) * "\n" * Char(10240)^4 * "⠑"
-    @test String(take!(io)) == brailleString
-
-    # Issue #30589
-    @test repr("text/plain", sparse([true true])) == "1×2 SparseArrays.SparseMatrixCSC{Bool, $Int} with 2 stored entries:\n 1  1"
-
-    function _filled_sparse(m::Integer, n::Integer)
-        C = CartesianIndices((m, n))[:]
-        Is = [Int64(x[1]) for x in C]
-        Js = [Int64(x[2]) for x in C]
-        return sparse(Is, Js, true, m, n)
-    end
-
-    # vertical scaling
-    ioc = IOContext(io, :displaysize => (5, 80), :limit => true)
-    _show_with_braille_patterns(ioc, _filled_sparse(10, 10))
-    @test String(take!(io)) == "⣿⣿"
-
-    _show_with_braille_patterns(ioc, _filled_sparse(20, 10))
-    @test String(take!(io)) == "⣿"
-
-    # horizontal scaling
-    ioc = IOContext(io, :displaysize => (80, 4), :limit => true)
-    _show_with_braille_patterns(ioc, _filled_sparse(8, 8))
-    @test String(take!(io)) == "⣿⣿"
-
-    _show_with_braille_patterns(ioc, _filled_sparse(8, 16))
-    @test String(take!(io)) == "⠛⠛"
-end
-
-@testset "check buffers" for n in 1:3
-    local A
-    rowval = [1,2,3]
-    nzval1  = Int[]
-    nzval2  = [1,1,1]
-    A = SparseMatrixCSC(n, n, [1:n+1;], rowval, nzval1)
-    @test nnz(A) == n
-    @test_throws BoundsError A[n,n]
-    A = SparseMatrixCSC(n, n, [1:n+1;], rowval, nzval2)
-    @test nnz(A) == n
-    @test A      == Matrix(I, n, n)
-end
-
-@testset "reverse search direction if step < 0 #21986" begin
-    local A, B
-    A = guardseed(1234) do
-        sprand(5, 5, 1/5)
-    end
-    A = max.(A, copy(A'))
-    LinearAlgebra.fillstored!(A, 1)
-    B = A[5:-1:1, 5:-1:1]
-    @test issymmetric(B)
-end
-
-@testset "similar should not alias the input sparse array" begin
-    a = sparse(rand(3,3) .+ 0.1)
-    b = similar(a, Float32, Int32)
-    c = similar(b, Float32, Int32)
-    SparseArrays.dropstored!(b, 1, 1)
-    @test length(rowvals(c)) == 9
-    @test length(nonzeros(c)) == 9
-end
-
-@testset "similar with type conversion" begin
-    local A = sparse(1.0I, 5, 5)
-    @test size(similar(A, ComplexF64, Int)) == (5, 5)
-    @test typeof(similar(A, ComplexF64, Int)) == SparseMatrixCSC{ComplexF64, Int}
-    @test size(similar(A, ComplexF64, Int8)) == (5, 5)
-    @test typeof(similar(A, ComplexF64, Int8)) == SparseMatrixCSC{ComplexF64, Int8}
-    @test similar(A, ComplexF64,(6, 6)) == spzeros(ComplexF64, 6, 6)
-    @test convert(Matrix, A) == Array(A) # lolwut, are you lost, test?
-end
-
-@testset "similar for SparseMatrixCSC" begin
-    local A = sparse(1.0I, 5, 5)
-    # test similar without specifications (preserves stored-entry structure)
-    simA = similar(A)
-    @test typeof(simA) == typeof(A)
-    @test size(simA) == size(A)
-    @test getcolptr(simA) == getcolptr(A)
-    @test rowvals(simA) == rowvals(A)
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type specification (preserves stored-entry structure)
-    simA = similar(A, Float32)
-    @test typeof(simA) == SparseMatrixCSC{Float32,eltype(getcolptr(A))}
-    @test size(simA) == size(A)
-    @test getcolptr(simA) == getcolptr(A)
-    @test rowvals(simA) == rowvals(A)
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry and index type specification (preserves stored-entry structure)
-    simA = similar(A, Float32, Int8)
-    @test typeof(simA) == SparseMatrixCSC{Float32,Int8}
-    @test size(simA) == size(A)
-    @test getcolptr(simA) == getcolptr(A)
-    @test rowvals(simA) == rowvals(A)
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with Dims{2} specification (preserves storage space only, not stored-entry structure)
-    simA = similar(A, (6,6))
-    @test typeof(simA) == typeof(A)
-    @test size(simA) == (6,6)
-    @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(rowvals(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type and Dims{2} specification (preserves storage space only)
-    simA = similar(A, Float32, (6,6))
-    @test typeof(simA) == SparseMatrixCSC{Float32,eltype(getcolptr(A))}
-    @test size(simA) == (6,6)
-    @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(rowvals(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type, index type, and Dims{2} specification (preserves storage space only)
-    simA = similar(A, Float32, Int8, (6,6))
-    @test typeof(simA) == SparseMatrixCSC{Float32, Int8}
-    @test size(simA) == (6,6)
-    @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(rowvals(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with Dims{1} specification (preserves nothing)
-    simA = similar(A, (6,))
-    @test typeof(simA) == SparseVector{eltype(nonzeros(A)),eltype(getcolptr(A))}
-    @test size(simA) == (6,)
-    @test length(nonzeroinds(simA)) == 0
-    @test length(nonzeros(simA)) == 0
-    # test similar with entry type and Dims{1} specification (preserves nothing)
-    simA = similar(A, Float32, (6,))
-    @test typeof(simA) == SparseVector{Float32,eltype(getcolptr(A))}
-    @test size(simA) == (6,)
-    @test length(nonzeroinds(simA)) == 0
-    @test length(nonzeros(simA)) == 0
-    # test similar with entry type, index type, and Dims{1} specification (preserves nothing)
-    simA = similar(A, Float32, Int8, (6,))
-    @test typeof(simA) == SparseVector{Float32,Int8}
-    @test size(simA) == (6,)
-    @test length(nonzeroinds(simA)) == 0
-    @test length(nonzeros(simA)) == 0
-    # test entry points to similar with entry type, index type, and non-Dims shape specification
-    @test similar(A, Float32, Int8, 6, 6) == similar(A, Float32, Int8, (6, 6))
-    @test similar(A, Float32, Int8, 6) == similar(A, Float32, Int8, (6,))
-end
-
-@testset "count specializations" begin
-    # count should throw for sparse arrays for which zero(eltype) does not exist
-    @test_throws MethodError count(SparseMatrixCSC(2, 2, Int[1, 2, 3], Int[1, 2], Any[true, true]))
-    @test_throws MethodError count(SparseVector(2, Int[1], Any[true]))
-    # count should run only over nonzeros(S)[1:nnz(S)], not nonzeros(S) in full
-    @test count(SparseMatrixCSC(2, 2, Int[1, 2, 3], Int[1, 2], Bool[true, true, true])) == 2
-end
-
-@testset "sparse findprev/findnext operations" begin
-
-    x = [0,0,0,0,1,0,1,0,1,1,0]
-    x_sp = sparse(x)
-
-    for i=1:length(x)
-        @test findnext(!iszero, x,i) == findnext(!iszero, x_sp,i)
-        @test findprev(!iszero, x,i) == findprev(!iszero, x_sp,i)
-    end
-
-    y = [7 0 0 0 0;
-         1 0 1 0 0;
-         1 7 0 7 1;
-         0 0 1 0 0;
-         1 0 1 1 0.0]
-    y_sp = [x == 7 ? -0.0 : x for x in sparse(y)]
-    y = Array(y_sp)
-    @test isequal(y_sp[1,1], -0.0)
-
-    for i in keys(y)
-        @test findnext(!iszero, y,i) == findnext(!iszero, y_sp,i)
-        @test findprev(!iszero, y,i) == findprev(!iszero, y_sp,i)
-        @test findnext(iszero, y,i) == findnext(iszero, y_sp,i)
-        @test findprev(iszero, y,i) == findprev(iszero, y_sp,i)
-    end
-
-    z_sp = sparsevec(Dict(1=>1, 5=>1, 8=>0, 10=>1))
-    z = collect(z_sp)
-
-    for i in keys(z)
-        @test findnext(!iszero, z,i) == findnext(!iszero, z_sp,i)
-        @test findprev(!iszero, z,i) == findprev(!iszero, z_sp,i)
-    end
-
-    # issue 32568
-    for T = (UInt, BigInt)
-        @test findnext(!iszero, x_sp, T(4)) isa keytype(x_sp)
-        @test findnext(!iszero, x_sp, T(5)) isa keytype(x_sp)
-        @test findprev(!iszero, x_sp, T(5)) isa keytype(x_sp)
-        @test findprev(!iszero, x_sp, T(6)) isa keytype(x_sp)
-        @test findnext(iseven, x_sp, T(4)) isa keytype(x_sp)
-        @test findnext(iseven, x_sp, T(5)) isa keytype(x_sp)
-        @test findprev(iseven, x_sp, T(4)) isa keytype(x_sp)
-        @test findprev(iseven, x_sp, T(5)) isa keytype(x_sp)
-        @test findnext(!iszero, z_sp, T(4)) isa keytype(z_sp)
-        @test findnext(!iszero, z_sp, T(5)) isa keytype(z_sp)
-        @test findprev(!iszero, z_sp, T(4)) isa keytype(z_sp)
-        @test findprev(!iszero, z_sp, T(5)) isa keytype(z_sp)
-    end
-end
-
-# #20711
-@testset "vec returns a view" begin
-    local A = sparse(Matrix(1.0I, 3, 3))
-    local v = vec(A)
-    v[1] = 2
-    @test A[1,1] == 2
-end
-
-# #25943
-@testset "operations on Integer subtypes" begin
-    s = sparse(UInt8[1, 2, 3], UInt8[1, 2, 3], UInt8[1, 2, 3])
-    @test sum(s, dims=2) == reshape([1, 2, 3], 3, 1)
-end
-
-@testset "mapreduce of sparse matrices with trailing elements in nzval #26534" begin
-    B = SparseMatrixCSC{Int,Int}(2, 3,
-        [1, 3, 4, 5],
-        [1, 2, 1, 2, 999, 999, 999, 999],
-        [1, 2, 3, 6, 999, 999, 999, 999]
-    )
-    @test maximum(B) == 6
-end
-
-_length_or_count_or_five(::Colon) = 5
-_length_or_count_or_five(x::AbstractVector{Bool}) = count(x)
-_length_or_count_or_five(x) = length(x)
-@testset "nonscalar setindex!" begin
-    for I in (1:4, :, 5:-1:2, [], trues(5), setindex!(falses(5), true, 2), 3),
-        J in (2:4, :, 4:-1:1, [], setindex!(trues(5), false, 3), falses(5), 4)
-        V = sparse(1 .+ zeros(_length_or_count_or_five(I)*_length_or_count_or_five(J)))
-        M = sparse(1 .+ zeros(_length_or_count_or_five(I), _length_or_count_or_five(J)))
-        if I isa Integer && J isa Integer
-            @test_throws MethodError spzeros(5,5)[I, J] = V
-            @test_throws MethodError spzeros(5,5)[I, J] = M
-            continue
-        end
-        @test setindex!(spzeros(5, 5), V, I, J) == setindex!(zeros(5,5), V, I, J)
-        @test setindex!(spzeros(5, 5), M, I, J) == setindex!(zeros(5,5), M, I, J)
-        @test setindex!(spzeros(5, 5), Array(M), I, J) == setindex!(zeros(5,5), M, I, J)
-        @test setindex!(spzeros(5, 5), Array(V), I, J) == setindex!(zeros(5,5), V, I, J)
-    end
-    @test setindex!(spzeros(5, 5), 1:25, :) == setindex!(zeros(5,5), 1:25, :) == reshape(1:25, 5, 5)
-    @test setindex!(spzeros(5, 5), (25:-1:1).+spzeros(25), :) == setindex!(zeros(5,5), (25:-1:1).+spzeros(25), :) == reshape(25:-1:1, 5, 5)
-    for X in (1:20, sparse(1:20), reshape(sparse(1:20), 20, 1), (1:20) .+ spzeros(20, 1), collect(1:20), collect(reshape(1:20, 20, 1)))
-        @test setindex!(spzeros(5, 5), X, 6:25) == setindex!(zeros(5,5), 1:20, 6:25)
-        @test setindex!(spzeros(5, 5), X, 21:-1:2) == setindex!(zeros(5,5), 1:20, 21:-1:2)
-        b = trues(25)
-        b[[6, 8, 13, 15, 23]] .= false
-        @test setindex!(spzeros(5, 5), X, b) == setindex!(zeros(5, 5), X, b)
-    end
-end
-
-@testset "sparse transpose adjoint" begin
-    A = sprand(10, 10, 0.75)
-    @test A' == SparseMatrixCSC(A')
-    @test SparseMatrixCSC(A') isa SparseMatrixCSC
-    @test transpose(A) == SparseMatrixCSC(transpose(A))
-    @test SparseMatrixCSC(transpose(A)) isa SparseMatrixCSC
-    @test SparseMatrixCSC{eltype(A)}(transpose(A)) == transpose(A)
-    @test SparseMatrixCSC{eltype(A), Int}(transpose(A)) == transpose(A)
-    @test SparseMatrixCSC{Float16}(transpose(A)) == transpose(SparseMatrixCSC{Float16}(A))
-    @test SparseMatrixCSC{Float16, Int}(transpose(A)) == transpose(SparseMatrixCSC{Float16}(A))
-    B = sprand(ComplexF64, 10, 10, 0.75)
-    @test SparseMatrixCSC{eltype(B)}(adjoint(B)) == adjoint(B)
-    @test SparseMatrixCSC{eltype(B), Int}(adjoint(B)) == adjoint(B)
-    @test SparseMatrixCSC{ComplexF16}(adjoint(B)) == adjoint(SparseMatrixCSC{ComplexF16}(B))
-    @test SparseMatrixCSC{ComplexF16, Int8}(adjoint(B)) == adjoint(SparseMatrixCSC{ComplexF16, Int8}(B))
-end
-
-# PR 28242
-@testset "forward and backward solving of transpose/adjoint triangular matrices" begin
-    rng = MersenneTwister(20180730)
-    n = 10
-    A = sprandn(rng, n, n, 0.8); A += Diagonal((1:n) - diag(A))
-    B = ones(n, 2)
-    for (Ttri, triul ) in ((UpperTriangular, triu), (LowerTriangular, tril))
-        for trop in (adjoint, transpose)
-            AT = Ttri(A)           # ...Triangular wrapped
-            AC = triul(A)          # copied part of A
-            ATa = trop(AT)         # wrapped Adjoint
-            ACa = sparse(trop(AC)) # copied and adjoint
-            @test AT \ B ≈ AC \ B
-            @test ATa \ B ≈ ACa \ B
-            @test ATa \ sparse(B) == ATa \ B
-            @test Matrix(ATa) \ B ≈ ATa \ B
-            @test ATa * ( ATa \ B ) ≈ B
-        end
-    end
-end
-
-@testset "Issue #28369" begin
-    M = reshape([[1 2; 3 4], [9 10; 11 12], [5 6; 7 8], [13 14; 15 16]], (2,2))
-    MP = reshape([[1 2; 3 4], [5 6; 7 8], [9 10; 11 12], [13 14; 15 16]], (2,2))
-    S = sparse(M)
-    SP = sparse(MP)
-    @test isa(transpose(S), Transpose)
-    @test transpose(S) == copy(transpose(S))
-    @test Array(transpose(S)) == copy(transpose(M))
-    @test permutedims(S) == SP
-    @test permutedims(S, (2,1)) == SP
-    @test permutedims(S, (1,2)) == S
-    @test permutedims(S, (1,2)) !== S
-    @test_throws ArgumentError permutedims(S, (1,3))
-    MC = reshape([[(1+im) 2; 3 4], [9 10; 11 12], [(5 + 2im) 6; 7 8], [13 14; 15 16]], (2,2))
-    SC = sparse(MC)
-    @test isa(adjoint(SC), Adjoint)
-    @test adjoint(SC) == copy(adjoint(SC))
-    @test adjoint(MC) == copy(adjoint(SC))
-end
-
-begin
-    rng = Random.MersenneTwister(0)
-    n = 1000
-    B = ones(n)
-    A = sprand(rng, n, n, 0.01)
-    MA = Matrix(A)
-    lA = sprand(rng, n, n+10, 0.01)
-    @testset "triangular multiply with $tr($wr)" for tr in (identity, adjoint, transpose),
-    wr in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        AW = tr(wr(A))
-        MAW = tr(wr(MA))
-        @test AW * B ≈ MAW * B
-        # and for SparseMatrixCSCView - a view of all rows and unit range of cols
-        vAW = tr(wr(view(A, :, 1:n)))
-        vMAW = tr(wr(view(MA, :, 1:n)))
-        @test vAW * B ≈ vMAW * B
-    end
-    A = A - Diagonal(diag(A)) + 2I # avoid rounding errors by division
-    MA = Matrix(A)
-    @testset "triangular solver for $tr($wr)" for tr in (identity, adjoint, transpose),
-    wr in (UpperTriangular, LowerTriangular, UnitUpperTriangular, UnitLowerTriangular)
-        AW = tr(wr(A))
-        MAW = tr(wr(MA))
-        @test AW \ B ≈ MAW \ B
-    end
-    @testset "triangular singular exceptions" begin
-        A = LowerTriangular(sparse([0 2.0;0 1]))
-        @test_throws SingularException(1) A \ ones(2)
-        A = UpperTriangular(sparse([1.0 0;0 0]))
-        @test_throws SingularException(2) A \ ones(2)
-    end
-end
-
-@testset "Issue #28634" begin
-    a = SparseMatrixCSC{Int8, Int16}([1 2; 3 4])
-    na = SparseMatrixCSC(a)
-    @test typeof(a) === typeof(na)
-end
-
-#PR #29045
-@testset "Issue #28934" begin
-    A = sprand(5,5,0.5)
-    D = Diagonal(rand(5))
-    C = copy(A)
-    m1 = @which mul!(C,A,D)
-    m2 = @which mul!(C,D,A)
-    @test m1.module == SparseArrays
-    @test m2.module == SparseArrays
-end
-
-@testset "Symmetric of sparse matrix mul! dense vector" begin
-    rng = Random.MersenneTwister(1)
-    n = 1000
-    p = 0.02
-    q = 1 - sqrt(1-p)
-    Areal = sprandn(rng, n, n, p)
-    Breal = randn(rng, n)
-    Acomplex = sprandn(rng, n, n, q) + sprandn(rng, n, n, q) * im
-    Bcomplex = Breal + randn(rng, n) * im
-    @testset "symmetric/Hermitian sparse multiply with $S($U)" for S in (Symmetric, Hermitian), U in (:U, :L), (A, B) in ((Areal,Breal), (Acomplex,Bcomplex))
-        Asym = S(A, U)
-        As = sparse(Asym) # takes most time
-        @test which(mul!, (typeof(B), typeof(Asym), typeof(B))).module == SparseArrays
-        @test norm(Asym * B - As * B, Inf) <= eps() * n * p * 10
-    end
-end
-
-@testset "Symmetric of view of sparse matrix mul! dense vector" begin
-    rng = Random.MersenneTwister(1)
-    n = 1000
-    p = 0.02
-    q = 1 - sqrt(1-p)
-    Areal = view(sprandn(rng, n, n+10, p), :, 6:n+5)
-    Breal = randn(rng, n)
-    Acomplex = view(sprandn(rng, n, n+10, q) + sprandn(rng, n, n+10, q) * im, :, 6:n+5)
-    Bcomplex = Breal + randn(rng, n) * im
-    @testset "symmetric/Hermitian sparseview multiply with $S($U)" for S in (Symmetric, Hermitian), U in (:U, :L), (A, B) in ((Areal,Breal), (Acomplex,Bcomplex))
-        Asym = S(A, U)
-        As = sparse(Asym) # takes most time
-        @test which(mul!, (typeof(B), typeof(Asym), typeof(B))).module == SparseArrays
-        @test norm(Asym * B - As * B, Inf) <= eps() * n * p * 10
-    end
-end
-
-@testset "sprand" begin
-    p=0.3; m=1000; n=2000;
-    for s in 1:10
-        # build a (dense) random matrix with randsubset + rand
-        Random.seed!(s);
-        v = randsubseq(1:m*n,p);
-        x = zeros(m,n);
-        x[v] .= rand(length(v));
-        # redo the same with sprand
-        Random.seed!(s);
-        a = sprand(m,n,p);
-        @test x == a
-    end
-end
-
-@testset "sprandn with type $T" for T in (Float64, Float32, Float16, ComplexF64, ComplexF32, ComplexF16)
-    @test sprandn(T, 5, 5, 0.5) isa AbstractSparseMatrix{T}
-end
-@testset "sprandn with invalid type $T" for T in (AbstractFloat, BigFloat, Complex)
-    @test_throws MethodError sprandn(T, 5, 5, 0.5)
-end
-
-@testset "method ambiguity" begin
-    # Ambiguity test is run inside a clean process.
-    # https://github.com/JuliaLang/julia/issues/28804
-    script = joinpath(@__DIR__, "ambiguous_exec.jl")
-    cmd = `$(Base.julia_cmd()) --startup-file=no $script`
-    @test success(pipeline(cmd; stdout=stdout, stderr=stderr))
-end
-
-@testset "oneunit of sparse matrix" begin
-    A = sparse([Second(0) Second(0); Second(0) Second(0)])
-    @test oneunit(sprand(2, 2, 0.5)) isa SparseMatrixCSC{Float64}
-    @test oneunit(A) isa SparseMatrixCSC{Second}
-    @test one(sprand(2, 2, 0.5)) isa SparseMatrixCSC{Float64}
-    @test one(A) isa SparseMatrixCSC{Int}
-end
-
-@testset "circshift" begin
-    m,n = 17,15
-    A = sprand(m, n, 0.5)
-    for rshift in (-1, 0, 1, 10), cshift in (-1, 0, 1, 10)
-        shifts = (rshift, cshift)
-        # using dense circshift to compare
-        B = circshift(Matrix(A), shifts)
-        # sparse circshift
-        C = circshift(A, shifts)
-        @test C == B
-        # sparse circshift should not add structural zeros
-        @test nnz(C) == nnz(A)
-        # test circshift!
-        D = similar(A)
-        circshift!(D, A, shifts)
-        @test D == B
-        @test nnz(D) == nnz(A)
-        # test different in/out types
-        A2 = floor.(100A)
-        E1 = spzeros(Int64, m, n)
-        E2 = spzeros(Int64, m, n)
-        circshift!(E1, A2, shifts)
-        circshift!(E2, Matrix(A2), shifts)
-        @test E1 == E2
-    end
-end
-
-@testset "wrappers of sparse" begin
-    m = n = 10
-    A = spzeros(ComplexF64, m, n)
-    A[:,1] = 1:m
-    A[:,2] = [1 3 0 0 0 0 0 0 0 0]'
-    A[:,3] = [2 4 0 0 0 0 0 0 0 0]'
-    A[:,4] = [0 0 0 0 5 3 0 0 0 0]'
-    A[:,5] = [0 0 0 0 6 2 0 0 0 0]'
-    A[:,6] = [0 0 0 0 7 4 0 0 0 0]'
-    A[:,7:n] = rand(ComplexF64, m, n-6)
-    B = Matrix(A)
-    dowrap(wr, A) = wr(A)
-    dowrap(wr::Tuple, A) = (wr[1])(A, wr[2:end]...)
-
-    @testset "sparse($wr(A))" for wr in (
-                        Symmetric, (Symmetric, :L), Hermitian, (Hermitian, :L),
-                        Transpose, Adjoint,
-                        UpperTriangular, LowerTriangular,
-                        UnitUpperTriangular, UnitLowerTriangular,
-                        (view, 3:6, 2:5))
-
-        @test SparseMatrixCSC(dowrap(wr, A)) == Matrix(dowrap(wr, B))
-    end
-
-    @testset "sparse($at($wr))" for at = (Transpose, Adjoint), wr =
-        (UpperTriangular, LowerTriangular,
-         UnitUpperTriangular, UnitLowerTriangular)
-
-        @test SparseMatrixCSC(at(wr(A))) == Matrix(at(wr(B)))
-    end
-
-    @test sparse([1,2,3,4,5]') == SparseMatrixCSC([1 2 3 4 5])
-    @test sparse(UpperTriangular(A')) == UpperTriangular(B')
-    @test sparse(Adjoint(UpperTriangular(A'))) == Adjoint(UpperTriangular(B'))
-    @test sparse(UnitUpperTriangular(spzeros(5,5))) == I
-    deepwrap(A) = (Adjoint(LowerTriangular(view(Symmetric(A), 5:7, 4:6))))
-    @test sparse(deepwrap(A)) == Matrix(deepwrap(B))
-end
-
-@testset "unary operations on matrices where length(nzval)>nnz" begin
-    # this should create a sparse matrix with length(nzval)>nnz
-    A = SparseMatrixCSC(Complex{BigInt}[1+im 2+2im]')'[1:1, 2:2]
-    # ...ensure it does! If necessary, the test needs to be updated to use
-    # another mechanism to create a suitable A.
-    resize!(nonzeros(A), 2)
-    @assert length(nonzeros(A)) > nnz(A)
-    @test -A == fill(-2-2im, 1, 1)
-    @test conj(A) == fill(2-2im, 1, 1)
-    conj!(A)
-    @test A == fill(2-2im, 1, 1)
-end
-
-@testset "issue #31453" for T in [UInt8, Int8, UInt16, Int16, UInt32, Int32]
-    i = Int[1, 2]
-    j = Int[2, 1]
-    i2 = T.(i)
-    j2 = T.(j)
-    v = [500, 600]
-    x1 = sparse(i, j, v)
-    x2 = sparse(i2, j2, v)
-    @test sum(x1) == sum(x2) == 1100
-    @test sum(x1, dims=1) == sum(x2, dims=1)
-    @test sum(x1, dims=2) == sum(x2, dims=2)
-end
-
-@testset "Ti cannot store all potential values #31024" begin
-    # m * n >= typemax(Ti) but nnz < typemax(Ti)
-    A = SparseMatrixCSC(12, 12, fill(Int8(1),13), Int8[], Int[])
-    @test size(A) == (12,12) && nnz(A) == 0
-    I1 = [Int8(i) for i in 1:20 for _ in 1:20]
-    J1 = [Int8(i) for _ in 1:20 for i in 1:20]
-    # m * n >= typemax(Ti) and nnz >= typemax(Ti)
-    @test_throws ArgumentError sparse(I1, J1, ones(length(I1)))
-    I1 = Int8.(rand(1:10, 500))
-    J1 = Int8.(rand(1:10, 500))
-    V1 = ones(500)
-    # m * n < typemax(Ti) and length(I) >= typemax(Ti) - combining values
-    @test_throws ArgumentError sparse(I1, J1, V1, 10, 10)
-    # m * n >= typemax(Ti) and length(I) >= typemax(Ti)
-    @test_throws ArgumentError sparse(I1, J1, V1, 12, 13)
-    I1 = Int8.(rand(1:10, 126))
-    J1 = Int8.(rand(1:10, 126))
-    V1 = ones(126)
-    # m * n >= typemax(Ti) and length(I) < typemax(Ti)
-    @test size(sparse(I1, J1, V1, 100, 100)) == (100,100)
-end
-
-@testset "Typecheck too strict #31435" begin
-    A = SparseMatrixCSC{Int,Int8}(70, 2, fill(Int8(1), 3), Int8[], Int[])
-    A[5:67,1:2] .= ones(Int, 63, 2)
-    @test nnz(A) == 126
-    # nnz >= typemax
-    @test_throws ArgumentError A[2,1] = 42
-    # colptr short
-    @test_throws ArgumentError SparseMatrixCSC(1, 1, Int[], Int[], Float64[])
-    # colptr[1] must be 1
-    @test_throws ArgumentError SparseMatrixCSC(10, 3, [0,1,1,1], Int[], Float64[])
-    # colptr not ascending
-    @test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,1,2], Int[], Float64[])
-    # rowwal (and nzval) short
-    @test_throws ArgumentError SparseMatrixCSC(10, 3, [1,2,2,4], [1,2], Float64[])
-    # nzval short
-    @test SparseMatrixCSC(10, 3, [1,2,2,4], [1,2,3], Float64[]) !== nothing
-    # length(rowval) >= typemax
-    @test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3])
-    @test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], fill(Int8(1),127), Int[1,2,3]) != 0
-    # length(nzval) >= typemax
-    @test_throws ArgumentError SparseMatrixCSC(5, 1, Int8[1,2], Int8[1], fill(7, 127))
-    @test SparseMatrixCSC{Int,Int8}(5, 1, Int8[1,2], Int8[1], fill(7, 127)) != 0
-
-    # length(I) >= typemax
-    @test_throws ArgumentError sparse(UInt8.(1:255), fill(UInt8(1), 255), fill(1, 255))
-    # m > typemax
-    @test_throws ArgumentError sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 256, 1)
-    # n > typemax
-    @test_throws ArgumentError sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 255, 256)
-    # n, m maximal
-    @test sparse(UInt8.(1:254), fill(UInt8(1), 254), fill(1, 254), 255, 255) !== nothing
-end
-
-@testset "Sparse promotion in sparse matmul" begin
-    A = SparseMatrixCSC{Float32, Int8}(2, 2, Int8[1, 2, 3], Int8[1, 2], Float32[1., 2.])
-    B = SparseMatrixCSC{ComplexF32, Int32}(2, 2, Int32[1, 2, 3], Int32[1, 2], ComplexF32[1. + im, 2. - im])
-    @test A*transpose(B)                  ≈ Array(A) * transpose(Array(B))
-    @test A*adjoint(B)                    ≈ Array(A) * adjoint(Array(B))
-    @test transpose(A)*B                  ≈ transpose(Array(A)) * Array(B)
-    @test transpose(A)*transpose(B)       ≈ transpose(Array(A)) * transpose(Array(B))
-    @test adjoint(B)*A                    ≈ adjoint(Array(B)) * Array(A)
-    @test adjoint(B)*adjoint(complex.(A)) ≈ adjoint(Array(B)) * adjoint(Array(complex.(A)))
-end
-
-@testset "copy a ReshapedArray of SparseMatrixCSC" begin
-    A = sprand(20, 10, 0.2)
-    rA = reshape(A, 10, 20)
-    crA = copy(rA)
-    @test reshape(crA, 20, 10) == A
-end
-
-@testset "avoid aliasing of fields during constructing $T (issue #34630)" for T in
-    (SparseMatrixCSC, SparseMatrixCSC{Float64}, SparseMatrixCSC{Float64,Int16})
-
-    A = sparse([1 1; 1 0])
-    B = T(A)
-    @test A == B
-    A[2,2] = 1
-    @test A != B
-    @test getcolptr(A) !== getcolptr(B)
-    @test rowvals(A) !== rowvals(B)
-    @test nonzeros(A) !== nonzeros(B)
-end
-
-@testset "SparseMatrixCSCView" begin
-    A  = sprand(10, 10, 0.2)
-    vA = view(A, :, 1:5) # a CSCView contains all rows and a UnitRange of the columns
-    @test SparseArrays.getnzval(vA)  == SparseArrays.getnzval(A)
-    @test SparseArrays.getrowval(vA) == SparseArrays.getrowval(A)
-    @test SparseArrays.getcolptr(vA) == SparseArrays.getcolptr(A[:, 1:5])
-end
-
-@testset "mapreducecols" begin
-    n = 20
-    m = 10
-    A = sprand(n, m, 0.2)
-    B = mapreduce(identity, +, A, dims=2)
-    for row in 1:n
-        @test B[row] ≈ sum(A[row, :])
-    end
-    @test B ≈ mapreduce(identity, +, Matrix(A), dims=2)
-    # case when f(0) =\= 0
-    B = mapreduce(x->x+1, +, A, dims=2)
-    for row in 1:n
-        @test B[row] ≈ sum(A[row, :] .+ 1)
-    end
-    @test B ≈ mapreduce(x->x+1, +, Matrix(A), dims=2)
-    # case when there are no zeros in the sparse matrix
-    A = sparse(rand(n, m))
-    B = mapreduce(identity, +, A, dims=2)
-    for row in 1:n
-        @test B[row] ≈ sum(A[row, :])
-    end
-    @test B ≈ mapreduce(identity, +, Matrix(A), dims=2)
-end
-
-@testset "Symmetric and Hermitian #35325" begin
-    A = sprandn(ComplexF64, 10, 10, 0.1)
-    B = sprandn(ComplexF64, 10, 10, 0.1)
-
-    @test Symmetric(real(A)) + Hermitian(B) isa Hermitian{ComplexF64, <:SparseMatrixCSC}
-    @test Hermitian(A) + Symmetric(real(B)) isa Hermitian{ComplexF64, <:SparseMatrixCSC}
-    @test Hermitian(A) + Symmetric(B) isa SparseMatrixCSC
-    @testset "$Wrapper $op" for op ∈ (+, -), Wrapper ∈ (Hermitian, Symmetric)
-        AWU = Wrapper(A, :U)
-        AWL = Wrapper(A, :L)
-        BWU = Wrapper(B, :U)
-        BWL = Wrapper(B, :L)
-
-        @test op(AWU, B) isa SparseMatrixCSC
-        @test op(A, BWL) isa SparseMatrixCSC
-
-        @test op(AWU, B) ≈ op(collect(AWU), B)
-        @test op(AWL, B) ≈ op(collect(AWL), B)
-        @test op(A, BWU) ≈ op(A, collect(BWU))
-        @test op(A, BWL) ≈ op(A, collect(BWL))
-
-        @test op(AWU, BWL) isa Wrapper{ComplexF64, <:SparseMatrixCSC}
-
-        @test op(AWU, BWU) ≈ op(collect(AWU), collect(BWU))
-        @test op(AWU, BWL) ≈ op(collect(AWU), collect(BWL))
-        @test op(AWL, BWU) ≈ op(collect(AWL), collect(BWU))
-        @test op(AWL, BWL) ≈ op(collect(AWL), collect(BWL))
-    end
-end
-
-@testset "Multiplying with triangular sparse matrices #35609 #35610" begin
-    n = 10
-    A = sprand(n, n, 5/n)
-    U = UpperTriangular(A)
-    L = LowerTriangular(A)
-    AM = Matrix(A)
-    UM = Matrix(U)
-    LM = Matrix(L)
-    Y = A * U
-    @test Y ≈ AM * UM
-    @test typeof(Y) == typeof(A)
-    Y = A * L
-    @test Y ≈ AM * LM
-    @test typeof(Y) == typeof(A)
-    Y = U * A
-    @test Y ≈ UM * AM
-    @test typeof(Y) == typeof(A)
-    Y = L * A
-    @test Y ≈ LM * AM
-    @test typeof(Y) == typeof(A)
-    Y = U * U
-    @test Y ≈ UM * UM
-    @test typeof(Y) == typeof(U)
-    Y = L * L
-    @test Y ≈ LM * LM
-    @test typeof(Y) == typeof(L)
-    Y = L * U
-    @test Y ≈ LM * UM
-    @test typeof(Y) == typeof(A)
-    Y = U * L
-    @test Y ≈ UM * LM
-    @test typeof(Y) == typeof(A)
-end
-
-#testing the sparse matrix/vector access functions nnz, nzrange, rowvals, nonzeros
-@testset "generic sparse matrix access functions" begin
-    I = [1,3,4,5, 1,3,4,5, 1,3,4,5];
-    J = [4,4,4,4, 5,5,5,5, 6,6,6,6];
-    V = [14,34,44,54, 15,35,45,55, 16,36,46,56];
-    A = sparse(I, J, V, 9, 9);
-    AU = UpperTriangular(A)
-    AL = LowerTriangular(A)
-    b = SparseVector(9, I[1:4], V[1:4])
-    c = view(A, :, 5)
-    d = view(b, :)
-
-    @testset "nnz $n" for (n, M, nz) in (("A", A, 12), ("AU", AU, 11), ("AL", AL, 3),
-                                         ("b", b, 4), ("c", c, 4), ("d", d, 4))
-        @test nnz(M) == nz
-        @test_throws BoundsError nzrange(M, 0)
-        @test_throws BoundsError nzrange(M, size(M, 2) + 1)
-    end
-    @testset "nzrange(A, $i)" for (i, nzr) in ((1,1:0),(4,1:4),(5,5:8),(6,9:12),(9,13:12))
-        @test nzrange(A, i) == nzr
-    end
-    @testset "nzrange(AU, $i)" for (i, nzr) in ((2,1:0),(4,1:3),(5,5:8),(6,9:12),(8,13:12))
-        @test nzrange(AU, i) == nzr
-    end
-    @testset "nzrange(AL, $i)" for (i, nzr) in ((3,1:0),(4,3:4),(5,8:8),(6,13:12),(7,13:12))
-        @test nzrange(AL, i) == nzr
-    end
-    @test nzrange(b, 1) == 1:4
-    @test nzrange(c, 1) == 1:4
-    @test nzrange(d, 1) == 1:4
-
-    @test rowvals(A) == I
-    @test rowvals(AL) == I
-    @test rowvals(AL) == I
-    @test rowvals(b) == I[1:4]
-    @test rowvals(c) == I[5:8]
-    @test rowvals(d) == I[1:4]
-
-    @test nonzeros(A) == V
-    @test nonzeros(AU) == V
-    @test nonzeros(AL) == V
-    @test nonzeros(b) == V[1:4]
-    @test nonzeros(c) == V[5:8]
-    @test nonzeros(d) == V[1:4]
-end
-
-end # module
diff --git a/stdlib/SparseArrays/test/sparsevector.jl b/stdlib/SparseArrays/test/sparsevector.jl
deleted file mode 100644
index 5e433c8c12eb74..00000000000000
--- a/stdlib/SparseArrays/test/sparsevector.jl
+++ /dev/null
@@ -1,1460 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module SparseVectorTests
-
-using Test
-using SparseArrays
-using SparseArrays: nonzeroinds, getcolptr
-using LinearAlgebra
-using Random
-include("forbidproperties.jl")
-
-### Data
-
-spv_x1 = SparseVector(8, [2, 5, 6], [1.25, -0.75, 3.5])
-
-@test isa(spv_x1, SparseVector{Float64,Int})
-
-x1_full = zeros(length(spv_x1))
-x1_full[SparseArrays.nonzeroinds(spv_x1)] = nonzeros(spv_x1)
-
-@testset "basic properties" begin
-    x = spv_x1
-    @test eltype(x) == Float64
-    @test ndims(x) == 1
-    @test length(x) == 8
-    @test size(x) == (8,)
-    @test size(x,1) == 8
-    @test size(x,2) == 1
-    @test !isempty(x)
-
-    @test count(!iszero, x) == 3
-    @test nnz(x) == 3
-    @test SparseArrays.nonzeroinds(x) == [2, 5, 6]
-    @test nonzeros(x) == [1.25, -0.75, 3.5]
-    @test count(SparseVector(8, [2, 5, 6], [true,false,true])) == 2
-end
-
-@testset "isstored" begin
-    x = spv_x1
-    stored_inds = [2, 5, 6]
-    nonstored_inds = [1, 3, 4, 7, 8]
-    for i in stored_inds
-        @test Base.isstored(x, i) == true
-    end
-    for i in nonstored_inds
-        @test Base.isstored(x, i) == false
-    end
-end
-
-@testset "conversion to dense Array" begin
-    for (x, xf) in [(spv_x1, x1_full)]
-        @test isa(Array(x), Vector{Float64})
-        @test Array(x) == xf
-        @test Vector(x) == xf
-    end
-end
-@testset "show" begin
-    @test occursin("1.25", string(spv_x1))
-    @test occursin("-0.75", string(spv_x1))
-    @test occursin("3.5", string(spv_x1))
-
-    # issue #30589
-    @test repr("text/plain", sparse([true])) == "1-element SparseArrays.SparseVector{Bool, $Int} with 1 stored entry:\n  [1]  =  1"
-end
-
-### Comparison helper to ensure exact equality with internal structure
-function exact_equal(x::AbstractSparseVector, y::AbstractSparseVector)
-    eltype(x) == eltype(y) &&
-    eltype(SparseArrays.nonzeroinds(x)) == eltype(SparseArrays.nonzeroinds(y)) &&
-    length(x) == length(y) &&
-    SparseArrays.nonzeroinds(x) == SparseArrays.nonzeroinds(y) &&
-    nonzeros(x) == nonzeros(y)
-end
-
-@testset "other constructors" begin
-    # construct empty sparse vector
-
-    @test exact_equal(spzeros(Float64, 8), SparseVector(8, Int[], Float64[]))
-
-    @testset "from list of indices and values" begin
-        @test exact_equal(
-            sparsevec(Int[], Float64[], 8),
-            SparseVector(8, Int[], Float64[]))
-
-        @test exact_equal(
-            sparsevec(Int[], Float64[]),
-            SparseVector(0, Int[], Float64[]))
-
-        @test exact_equal(
-            sparsevec([3, 3], [5.0, -5.0], 8),
-            SparseVector(8, [3], [0.0]))
-
-        @test exact_equal(
-            sparsevec([2, 3, 6], [12.0, 18.0, 25.0]),
-            SparseVector(6, [2, 3, 6], [12.0, 18.0, 25.0]))
-
-        let x0 = SparseVector(8, [2, 3, 6], [12.0, 18.0, 25.0])
-            @test exact_equal(
-                sparsevec([2, 3, 6], [12.0, 18.0, 25.0], 8), x0)
-
-            @test exact_equal(
-                sparsevec([3, 6, 2], [18.0, 25.0, 12.0], 8), x0)
-
-            @test exact_equal(
-                sparsevec([2, 3, 4, 4, 6], [12.0, 18.0, 5.0, -5.0, 25.0], 8),
-                SparseVector(8, [2, 3, 4, 6], [12.0, 18.0, 0.0, 25.0]))
-
-            @test exact_equal(
-                sparsevec([1, 1, 1, 2, 3, 3, 6], [2.0, 3.0, -5.0, 12.0, 10.0, 8.0, 25.0], 8),
-                SparseVector(8, [1, 2, 3, 6], [0.0, 12.0, 18.0, 25.0]))
-
-            @test exact_equal(
-                sparsevec([2, 3, 6, 7, 7], [12.0, 18.0, 25.0, 5.0, -5.0], 8),
-                SparseVector(8, [2, 3, 6, 7], [12.0, 18.0, 25.0, 0.0]))
-        end
-
-        @test exact_equal(
-            sparsevec(Any[1, 3], [1, 1]),
-            sparsevec([1, 3], [1, 1]))
-
-        @test exact_equal(
-            sparsevec(Any[1, 3], [1, 1], 5),
-            sparsevec([1, 3], [1, 1], 5))
-    end
-    @testset "from dictionary" begin
-        function my_intmap(x)
-            a = Dict{Int,eltype(x)}()
-            for i in SparseArrays.nonzeroinds(x)
-                a[i] = x[i]
-            end
-            return a
-        end
-
-        let x = spv_x1
-            a = my_intmap(x)
-            xc = sparsevec(a, 8)
-            @test exact_equal(x, xc)
-
-            xc = sparsevec(a)
-            @test exact_equal(xc, SparseVector(6, [2, 5, 6], [1.25, -0.75, 3.5]))
-
-            d = Dict{Int, Float64}((1 => 0.0, 2 => 1.0, 3 => 2.0))
-            @test exact_equal(sparsevec(d), SparseVector(3, [1, 2, 3], [0.0, 1.0, 2.0]))
-        end
-    end
-    @testset "fillstored!" begin
-        x = SparseVector(8, [2, 3, 6], [12.0, 18.0, 25.0])
-        y = LinearAlgebra.fillstored!(copy(x), 1)
-        @test (x .!= 0) == (y .!= 0)
-        @test y == SparseVector(8, [2, 3, 6], [1.0, 1.0, 1.0])
-    end
-
-    @testset "sprand & sprandn" begin
-        let xr = sprand(1000, 0.9)
-            @test isa(xr, SparseVector{Float64,Int})
-            @test length(xr) == 1000
-            @test all(nonzeros(xr) .>= 0.0)
-        end
-
-        let xr = sprand(Float32, 1000, 0.9)
-            @test isa(xr, SparseVector{Float32,Int})
-            @test length(xr) == 1000
-            @test all(nonzeros(xr) .>= 0.0)
-        end
-
-        let xr = sprandn(1000, 0.9)
-            @test isa(xr, SparseVector{Float64,Int})
-            @test length(xr) == 1000
-            if !isempty(nonzeros(xr))
-                @test any(nonzeros(xr) .> 0.0) && any(nonzeros(xr) .< 0.0)
-            end
-        end
-
-        let xr = sprandn(ComplexF64, 1000, 0.9)
-            @test isa(xr, SparseVector{ComplexF64,Int})
-            @test length(xr) == 1000
-        end
-
-        let xr = sprand(Bool, 1000, 0.9)
-            @test isa(xr, SparseVector{Bool,Int})
-            @test length(xr) == 1000
-            @test all(nonzeros(xr))
-        end
-
-        let r1 = MersenneTwister(0), r2 = MersenneTwister(0)
-            @test sprand(r1, 100, .9) == sprand(r2, 100, .9)
-            @test sprandn(r1, 100, .9) == sprandn(r2, 100, .9)
-            @test sprand(r1, Bool, 100, .9) == sprand(r2,  Bool, 100, .9)
-            @test sprandn(r1, Float16, 100, .9) == sprandn(r2,  Float16, 100, .9)
-        end
-
-        # test sprand with function inputs
-        let xr = sprand(1000, 0.9, rand)
-            @test isa(xr, SparseVector{Float64,Int})
-            @test length(xr) == 1000
-            if !isempty(nonzeros(xr))
-                @test all(nonzeros(xr) .> 0.0)
-            end
-        end
-        let xr = sprand(1000, 0.9, rand, Float32)
-            @test isa(xr, SparseVector{Float32,Int})
-            @test length(xr) == 1000
-            if !isempty(nonzeros(xr))
-                @test all(nonzeros(xr) .> 0.0)
-            end
-        end
-    end
-end
-### Element access
-
-@testset "getindex" begin
-    @testset "single integer index" begin
-        for (x, xf) in [(spv_x1, x1_full)]
-            for i = 1:length(x)
-                @test x[i] == xf[i]
-            end
-        end
-    end
-    @testset "generic array index" begin
-        let x = sprand(100, 0.5)
-            I = rand(1:length(x), 20)
-            r = x[I]
-            @test isa(r, SparseVector{Float64,Int})
-            @test all(!iszero, nonzeros(r))
-            @test Array(r) == Array(x)[I]
-        end
-
-        # issue 24534
-        let x = convert(SparseVector{Float64,UInt32},sprandn(100,0.5))
-            I = rand(1:length(x), 20)
-            r = x[I]
-            @test isa(r, SparseVector{Float64,UInt32})
-            @test all(!iszero, nonzeros(r))
-            @test Array(r) == Array(x)[I]
-        end
-
-        # issue 24534
-        let x = convert(SparseVector{Float64,UInt32},sprandn(100,0.5))
-            I = rand(1:length(x), 20,1)
-            r = x[I]
-            @test isa(r, SparseMatrixCSC{Float64,UInt32})
-            @test all(!iszero, nonzeros(r))
-            @test Array(r) == Array(x)[I]
-        end
-    end
-    @testset "boolean array index" begin
-        let x = sprand(10, 10, 0.5)
-            I = rand(1:size(x, 2), 10)
-            bI = falses(size(x, 2))
-            bI[I] .= true
-            r = x[1,bI]
-            @test isa(r, SparseVector{Float64,Int})
-            @test all(!iszero, nonzeros(r))
-            @test Array(r) == Array(x)[1,bI]
-        end
-
-        let x = sprand(10, 0.5)
-            I = rand(1:length(x), 5)
-            bI = falses(length(x))
-            bI[I] .= true
-            r = x[bI]
-            @test isa(r, SparseVector{Float64,Int})
-            @test all(!iszero, nonzeros(r))
-            @test Array(r) == Array(x)[bI]
-            bI = falses(length(x), 1) # AbstractArray rather than AbstractVector
-            bI[I, 1] .= true
-            r = x[bI]
-            @test isa(r, SparseVector{Float64,Int})
-            @test all(!iszero, nonzeros(r))
-            bIv = falses(length(x))
-            bIv[I] .= true
-            @test Array(r) == Array(x)[bIv]
-        end
-    end
-end
-@testset "setindex" begin
-    let xc = spzeros(Float64, 8)
-        xc[3] = 2.0
-        @test exact_equal(xc, SparseVector(8, [3], [2.0]))
-    end
-
-    let xc = copy(spv_x1)
-        xc[5] = 2.0
-        @test exact_equal(xc, SparseVector(8, [2, 5, 6], [1.25, 2.0, 3.5]))
-    end
-
-    let xc = copy(spv_x1)
-        xc[3] = 4.0
-        @test exact_equal(xc, SparseVector(8, [2, 3, 5, 6], [1.25, 4.0, -0.75, 3.5]))
-
-        xc[1] = 6.0
-        @test exact_equal(xc, SparseVector(8, [1, 2, 3, 5, 6], [6.0, 1.25, 4.0, -0.75, 3.5]))
-
-        xc[8] = -1.5
-        @test exact_equal(xc, SparseVector(8, [1, 2, 3, 5, 6, 8], [6.0, 1.25, 4.0, -0.75, 3.5, -1.5]))
-    end
-
-    let xc = copy(spv_x1)
-        xc[5] = 0.0
-        @test exact_equal(xc, SparseVector(8, [2, 5, 6], [1.25, 0.0, 3.5]))
-
-        xc[6] = 0.0
-        @test exact_equal(xc, SparseVector(8, [2, 5, 6], [1.25, 0.0, 0.0]))
-
-        xc[2] = 0.0
-        @test exact_equal(xc, SparseVector(8, [2, 5, 6], [0.0, 0.0, 0.0]))
-
-        xc[1] = 0.0
-        @test exact_equal(xc, SparseVector(8, [2, 5, 6], [0.0, 0.0, 0.0]))
-    end
-end
-@testset "dropstored!" begin
-    x = SparseVector(10, [2, 7, 9], [2.0, 7.0, 9.0])
-    # Test argument bounds checking for dropstored!(x, i)
-    @test_throws BoundsError SparseArrays.dropstored!(x, 0)
-    @test_throws BoundsError SparseArrays.dropstored!(x, 11)
-    # Test behavior of dropstored!(x, i)
-    # --> Test dropping a single stored entry
-    @test SparseArrays.dropstored!(x, 2) == SparseVector(10, [7, 9], [7.0, 9.0])
-    # --> Test dropping a single nonstored entry
-    @test SparseArrays.dropstored!(x, 5) == SparseVector(10, [7, 9], [7.0, 9.0])
-end
-
-@testset "findall and findnz" begin
-    @test findall(!iszero, spv_x1) == findall(!iszero, x1_full)
-    @test findall(spv_x1 .> 1) == findall(x1_full .> 1)
-    @test findall(x->x>1, spv_x1) == findall(x->x>1, x1_full)
-    @test findnz(spv_x1) == (findall(!iszero, x1_full), filter(x->x!=0, x1_full))
-    let xc = SparseVector(8, [2, 3, 5], [1.25, 0, -0.75]), fc = Array(xc)
-        @test findall(!iszero, xc) == findall(!iszero, fc)
-        @test findnz(xc) == ([2, 3, 5], [1.25, 0, -0.75])
-    end
-end
-### Array manipulation
-
-@testset "copy[!]" begin
-
-    let x = spv_x1
-        xc = copy(x)
-        @test isa(xc, SparseVector{Float64,Int})
-        @test nonzeroinds(x) !== nonzeros(xc)
-        @test nonzeros(x) !== nonzeros(xc)
-        @test exact_equal(x, xc)
-    end
-
-    let x1 = SparseVector(8, [2, 5, 6], [12.2, 1.4, 5.0])
-        x2 = SparseVector(8, [3, 4], [1.2, 3.4])
-        copyto!(x2, x1)
-        @test x2 == x1
-        x2 = SparseVector(8, [2, 4, 8], [10.3, 7.4, 3.1])
-        copyto!(x2, x1)
-        @test x2 == x1
-        x2 = SparseVector(8, [1, 3, 4, 7], [0.3, 1.2, 3.4, 0.1])
-        copyto!(x2, x1)
-        @test x2 == x1
-        x2 = SparseVector(10, [3, 4], [1.2, 3.4])
-        copyto!(x2, x1)
-        @test x2[1:8] == x1
-        @test x2[9:10] == spzeros(2)
-        x2 = SparseVector(10, [3, 4, 9], [1.2, 3.4, 17.8])
-        copyto!(x2, x1)
-        @test x2[1:8] == x1
-        @test x2[9] == 17.8
-        @test x2[10] == 0
-        x2 = SparseVector(10, [3, 4, 5, 6, 9], [8.3, 7.2, 1.2, 3.4, 17.8])
-        copyto!(x2, x1)
-        @test x2[1:8] == x1
-        @test x2[9] == 17.8
-        @test x2[10] == 0
-        x2 = SparseVector(6, [3, 4], [1.2, 3.4])
-        @test_throws BoundsError copyto!(x2, x1)
-    end
-
-    let x1 = sparse([2, 1, 2], [1, 3, 3], [12.2, 1.4, 5.0], 2, 4)
-        x2 = SparseVector(8, [3, 4], [1.2, 3.4])
-        copyto!(x2, x1)
-        @test x2[:] == x1[:]
-        x2 = SparseVector(8, [2, 4, 8], [10.3, 7.4, 3.1])
-        copyto!(x2, x1)
-        @test x2[:] == x1[:]
-        x2 = SparseVector(8, [1, 3, 4, 7], [0.3, 1.2, 3.4, 0.1])
-        copyto!(x2, x1)
-        @test x2[:] == x1[:]
-        x2 = SparseVector(10, [3, 4], [1.2, 3.4])
-        copyto!(x2, x1)
-        @test x2[1:8] == x1[:]
-        @test x2[9:10] == spzeros(2)
-        x2 = SparseVector(10, [3, 4, 9], [1.2, 3.4, 17.8])
-        copyto!(x2, x1)
-        @test x2[1:8] == x1[:]
-        @test x2[9] == 17.8
-        @test x2[10] == 0
-        x2 = SparseVector(10, [3, 4, 5, 6, 9], [8.3, 7.2, 1.2, 3.4, 17.8])
-        copyto!(x2, x1)
-        @test x2[1:8] == x1[:]
-        @test x2[9] == 17.8
-        @test x2[10] == 0
-        x2 = SparseVector(6, [3, 4], [1.2, 3.4])
-        @test_throws BoundsError copyto!(x2, x1)
-    end
-
-    let x1 = SparseVector(8, [2, 5, 6], [12.2, 1.4, 5.0])
-        x2 = sparse([1, 2], [2, 2], [1.2, 3.4], 2, 4)
-        copyto!(x2, x1)
-        @test x2[:] == x1[:]
-        x2 = sparse([2, 2, 2], [1, 3, 4], [10.3, 7.4, 3.1], 2, 4)
-        copyto!(x2, x1)
-        @test x2[:] == x1[:]
-        x2 = sparse([1, 1, 2, 1], [1, 2, 2, 4], [0.3, 1.2, 3.4, 0.1], 2, 4)
-        copyto!(x2, x1)
-        @test x2[:] == x1[:]
-        x2 = sparse([1, 2], [2, 2], [1.2, 3.4], 2, 5)
-        copyto!(x2, x1)
-        @test x2[1:8] == x1
-        @test x2[9:10] == spzeros(2)
-        x2 = sparse([1, 2, 1], [2, 2, 5], [1.2, 3.4, 17.8], 2, 5)
-        copyto!(x2, x1)
-        @test x2[1:8] == x1
-        @test x2[9] == 17.8
-        @test x2[10] == 0
-        x2 = sparse([1, 2, 1, 2, 1], [2, 2, 3, 3, 5], [8.3, 7.2, 1.2, 3.4, 17.8], 2, 5)
-        copyto!(x2, x1)
-        @test x2[1:8] == x1
-        @test x2[9] == 17.8
-        @test x2[10] == 0
-        x2 = sparse([1, 2], [2, 2], [1.2, 3.4], 2, 3)
-        @test_throws BoundsError copyto!(x2, x1)
-    end
-    let x = 1:6
-        x2 = spzeros(length(x))
-        copyto!(x2, x) # copyto!(SparseVector, AbstractVector)
-        @test Vector(x2) == collect(x)
-    end
-end
-@testset "vec/reinterpret/float/complex" begin
-    a = SparseVector(8, [2, 5, 6], Int32[12, 35, 72])
-    # vec
-    @test vec(a) == a
-
-    # float
-    af = float(a)
-    @test float(af) == af
-    @test isa(af, SparseVector{Float64,Int})
-    @test exact_equal(af, SparseVector(8, [2, 5, 6], [12., 35., 72.]))
-    @test sparsevec(transpose(transpose(af))) == af
-
-    # complex
-    acp = complex(af)
-    @test complex(acp) == acp
-    @test isa(acp, SparseVector{ComplexF64,Int})
-    @test exact_equal(acp, SparseVector(8, [2, 5, 6], complex([12., 35., 72.])))
-    @test sparsevec((acp')') == acp
-end
-
-@testset "Type conversion" begin
-    let x = convert(SparseVector, sparse([2, 5, 6], [1, 1, 1], [1.25, -0.75, 3.5], 8, 1))
-        @test isa(x, SparseVector{Float64,Int})
-        @test exact_equal(x, spv_x1)
-    end
-
-    let x = spv_x1, xf = x1_full
-        xc = convert(SparseVector, xf)
-        @test isa(xc, SparseVector{Float64,Int})
-        @test exact_equal(xc, x)
-
-        xc = convert(SparseVector{Float32,Int}, x)
-        xf32 = SparseVector(8, [2, 5, 6], [1.25f0, -0.75f0, 3.5f0])
-        @test isa(xc, SparseVector{Float32,Int})
-        @test exact_equal(xc, xf32)
-
-        xc = convert(SparseVector{Float32}, x)
-        @test isa(xc, SparseVector{Float32,Int})
-        @test exact_equal(xc, xf32)
-
-        xm = convert(SparseMatrixCSC, x)
-        @test isa(xm, SparseMatrixCSC{Float64,Int})
-        @test Array(xm) == reshape(xf, 8, 1)
-
-        xm = convert(SparseMatrixCSC{Float32}, x)
-        @test isa(xm, SparseMatrixCSC{Float32,Int})
-        @test Array(xm) == reshape(convert(Vector{Float32}, xf), 8, 1)
-    end
-end
-
-@testset "Concatenation" begin
-    let m = 80, n = 100
-        A = Vector{SparseVector{Float64,Int}}(undef, n)
-        tnnz = 0
-        for i = 1:length(A)
-            A[i] = sprand(m, 0.3)
-            tnnz += nnz(A[i])
-        end
-
-        H = hcat(A...)
-        @test isa(H, SparseMatrixCSC{Float64,Int})
-        @test size(H) == (m, n)
-        @test nnz(H) == tnnz
-        Hr = zeros(m, n)
-        for j = 1:n
-            Hr[:,j] = Array(A[j])
-        end
-        @test Array(H) == Hr
-
-        V = vcat(A...)
-        @test isa(V, SparseVector{Float64,Int})
-        @test length(V) == m * n
-        Vr = vec(Hr)
-        @test Array(V) == Vr
-    end
-
-    @testset "concatenation of sparse vectors with other types" begin
-        # Test that concatenations of combinations of sparse vectors with various other
-        # matrix/vector types yield sparse arrays
-        let N = 4
-            spvec = spzeros(N)
-            spmat = spzeros(N, 1)
-            densevec = fill(1., N)
-            densemat = fill(1., N, 1)
-            diagmat = Diagonal(densevec)
-            # Test that concatenations of pairwise combinations of sparse vectors with dense
-            # vectors/matrices, sparse matrices, or special matrices yield sparse arrays
-            for othervecormat in (densevec, densemat, spmat)
-                @test issparse(vcat(spvec, othervecormat))
-                @test issparse(vcat(othervecormat, spvec))
-            end
-            for othervecormat in (densevec, densemat, spmat, diagmat)
-                @test issparse(hcat(spvec, othervecormat))
-                @test issparse(hcat(othervecormat, spvec))
-                @test issparse(hvcat((2,), spvec, othervecormat))
-                @test issparse(hvcat((2,), othervecormat, spvec))
-                @test issparse(cat(spvec, othervecormat; dims=(1,2)))
-                @test issparse(cat(othervecormat, spvec; dims=(1,2)))
-            end
-            # The preceding tests should cover multi-way combinations of those types, but for good
-            # measure test a few multi-way combinations involving those types
-            @test issparse(vcat(spvec, densevec, spmat, densemat))
-            @test issparse(vcat(densevec, spvec, densemat, spmat))
-            @test issparse(hcat(spvec, densemat, spmat, densevec, diagmat))
-            @test issparse(hcat(densemat, spmat, spvec, densevec, diagmat))
-            @test issparse(hvcat((5,), diagmat, densevec, spvec, densemat, spmat))
-            @test issparse(hvcat((5,), spvec, densemat, diagmat, densevec, spmat))
-            @test issparse(cat(densemat, diagmat, spmat, densevec, spvec; dims=(1,2)))
-            @test issparse(cat(spvec, diagmat, densevec, spmat, densemat; dims=(1,2)))
-        end
-        @testset "vertical concatenation of SparseVectors with different el- and ind-type (#22225)" begin
-            spv6464 = SparseVector(0, Int64[], Int64[])
-            @test isa(vcat(spv6464, SparseVector(0, Int64[], Int32[])), SparseVector{Int64,Int64})
-            @test isa(vcat(spv6464, SparseVector(0, Int32[], Int64[])), SparseVector{Int64,Int64})
-            @test isa(vcat(spv6464, SparseVector(0, Int32[], Int32[])), SparseVector{Int64,Int64})
-        end
-    end
-end
-@testset "sparsemat: combinations with sparse matrix" begin
-    let S = sprand(4, 8, 0.5)
-        Sf = Array(S)
-        @assert isa(Sf, Matrix{Float64})
-
-        # get a single column
-        for j = 1:size(S,2)
-            col = S[:, j]
-            @test isa(col, SparseVector{Float64,Int})
-            @test length(col) == size(S,1)
-            @test Array(col) == Sf[:,j]
-        end
-
-        # Get a reshaped vector
-        v = S[:]
-        @test isa(v, SparseVector{Float64,Int})
-        @test length(v) == length(S)
-        @test Array(v) == Sf[:]
-
-        # Get a linear subset
-        for i=0:length(S)
-            v = S[1:i]
-            @test isa(v, SparseVector{Float64,Int})
-            @test length(v) == i
-            @test Array(v) == Sf[1:i]
-        end
-        for i=1:length(S)+1
-            v = S[i:end]
-            @test isa(v, SparseVector{Float64,Int})
-            @test length(v) == length(S) - i + 1
-            @test Array(v) == Sf[i:end]
-        end
-        for i=0:div(length(S),2)
-            v = S[1+i:end-i]
-            @test isa(v, SparseVector{Float64,Int})
-            @test length(v) == length(S) - 2i
-            @test Array(v) == Sf[1+i:end-i]
-        end
-    end
-
-    let r = [1,10], S = sparse(r, r, r)
-        Sf = Array(S)
-        @assert isa(Sf, Matrix{Int})
-
-        inds = [1,1,1,1,1,1]
-        v = S[inds]
-        @test isa(v, SparseVector{Int,Int})
-        @test length(v) == length(inds)
-        @test Array(v) == Sf[inds]
-
-        inds = [2,2,2,2,2,2]
-        v = S[inds]
-        @test isa(v, SparseVector{Int,Int})
-        @test length(v) == length(inds)
-        @test Array(v) == Sf[inds]
-
-        # get a single column
-        for j = 1:size(S,2)
-            col = S[:, j]
-            @test isa(col, SparseVector{Int,Int})
-            @test length(col) == size(S,1)
-            @test Array(col) == Sf[:,j]
-        end
-
-        # Get a reshaped vector
-        v = S[:]
-        @test isa(v, SparseVector{Int,Int})
-        @test length(v) == length(S)
-        @test Array(v) == Sf[:]
-
-        # Get a linear subset
-        for i=0:length(S)
-            v = S[1:i]
-            @test isa(v, SparseVector{Int,Int})
-            @test length(v) == i
-            @test Array(v) == Sf[1:i]
-        end
-        for i=1:length(S)+1
-            v = S[i:end]
-            @test isa(v, SparseVector{Int,Int})
-            @test length(v) == length(S) - i + 1
-            @test Array(v) == Sf[i:end]
-        end
-        for i=0:div(length(S),2)
-            v = S[1+i:end-i]
-            @test isa(v, SparseVector{Int,Int})
-            @test length(v) == length(S) - 2i
-            @test Array(v) == Sf[1+i:end-i]
-        end
-    end
-end
-## math
-
-### Data
-
-rnd_x0 = sprand(50, 0.6)
-rnd_x0f = Array(rnd_x0)
-
-rnd_x1 = sprand(50, 0.7) * 4.0
-rnd_x1f = Array(rnd_x1)
-
-spv_x1 = SparseVector(8, [2, 5, 6], [1.25, -0.75, 3.5])
-spv_x2 = SparseVector(8, [1, 2, 6, 7], [3.25, 4.0, -5.5, -6.0])
-
-@testset "Arithmetic operations" begin
-
-    let x = spv_x1, x2 = spv_x2
-        # negate
-        @test exact_equal(-x, SparseVector(8, [2, 5, 6], [-1.25, 0.75, -3.5]))
-
-        # abs and abs2
-        @test exact_equal(abs.(x), SparseVector(8, [2, 5, 6], abs.([1.25, -0.75, 3.5])))
-        @test exact_equal(abs2.(x), SparseVector(8, [2, 5, 6], abs2.([1.25, -0.75, 3.5])))
-
-        # plus and minus
-        xa = SparseVector(8, [1,2,5,6,7], [3.25,5.25,-0.75,-2.0,-6.0])
-
-        @test exact_equal(x + x, x * 2)
-        @test exact_equal(x + x2, xa)
-        @test exact_equal(x2 + x, xa)
-
-        xb = SparseVector(8, [1,2,5,6,7], [-3.25,-2.75,-0.75,9.0,6.0])
-        @test exact_equal(x - x, SparseVector(8, Int[], Float64[]))
-        @test exact_equal(x - x2, xb)
-        @test exact_equal(x2 - x, -xb)
-
-        @test Array(x) + x2 == Array(xa)
-        @test Array(x) - x2 == Array(xb)
-        @test x + Array(x2) == Array(xa)
-        @test x - Array(x2) == Array(xb)
-
-        # multiplies
-        xm = SparseVector(8, [2, 6], [5.0, -19.25])
-        @test exact_equal(x .* x, abs2.(x))
-        @test exact_equal(x .* x2, xm)
-        @test exact_equal(x2 .* x, xm)
-
-        @test Array(x) .* x2 == Array(xm)
-        @test x .* Array(x2) == Array(xm)
-
-        # max & min
-        @test exact_equal(max.(x, x), x)
-        @test exact_equal(min.(x, x), x)
-        @test exact_equal(max.(x, x2),
-            SparseVector(8, Int[1, 2, 6], Float64[3.25, 4.0, 3.5]))
-        @test exact_equal(min.(x, x2),
-            SparseVector(8, Int[2, 5, 6, 7], Float64[1.25, -0.75, -5.5, -6.0]))
-    end
-
-    ### Complex
-
-    let x = spv_x1, x2 = spv_x2
-        # complex
-        @test exact_equal(complex.(x, x),
-            SparseVector(8, [2,5,6], [1.25+1.25im, -0.75-0.75im, 3.5+3.5im]))
-        @test exact_equal(complex.(x, x2),
-            SparseVector(8, [1,2,5,6,7], [3.25im, 1.25+4.0im, -0.75+0.0im, 3.5-5.5im, -6.0im]))
-        @test exact_equal(complex.(x2, x),
-            SparseVector(8, [1,2,5,6,7], [3.25+0.0im, 4.0+1.25im, -0.75im, -5.5+3.5im, -6.0+0.0im]))
-
-        # real, imag and conj
-
-        @test real(x) === x
-        @test exact_equal(imag(x), spzeros(Float64, length(x)))
-        @test conj(x) === x
-
-        xcp = complex.(x, x2)
-        @test exact_equal(real(xcp), x)
-        @test exact_equal(imag(xcp), x2)
-        @test exact_equal(conj(xcp), complex.(x, -x2))
-    end
-end
-@testset "Zero-preserving math functions: sparse -> sparse" begin
-    x1operations = (floor, ceil, trunc, round)
-    x0operations = (log1p,  expm1,  sinpi,
-                    sin,    tan,    sind,   tand,
-                    asin,   atan,   asind,  atand,
-                    sinh,   tanh,   asinh,  atanh)
-
-    for (spvec, densevec, operations) in (
-            (rnd_x0, rnd_x0f, x0operations),
-            (rnd_x1, rnd_x1f, x1operations) )
-        for op in operations
-            spresvec = op.(spvec)
-            @test spresvec == op.(densevec)
-            @test all(!iszero, nonzeros(spresvec))
-            resvaltype = typeof(op(zero(eltype(spvec))))
-            resindtype = SparseArrays.indtype(spvec)
-            @test isa(spresvec, SparseVector{resvaltype,resindtype})
-        end
-    end
-end
-@testset "Non-zero-preserving math functions: sparse -> dense" begin
-    for op in (exp, exp2, exp10, log, log2, log10,
-            cos, cosd, acos, cosh, cospi,
-            csc, cscd, acot, csch, acsch,
-            cot, cotd, acosd, coth,
-            sec, secd, acotd, sech, asech)
-        spvec = rnd_x0
-        densevec = rnd_x0f
-        spresvec = op.(spvec)
-        @test spresvec == op.(densevec)
-        resvaltype = typeof(op(zero(eltype(spvec))))
-        resindtype = SparseArrays.indtype(spvec)
-        @test isa(spresvec, SparseVector{resvaltype,resindtype})
-    end
-end
-
-### Reduction
-
-@testset "sum, norm" begin
-    x = spv_x1
-    @test sum(x) == 4.0
-    @test sum(abs, x) == 5.5
-    @test sum(abs2, x) == 14.375
-
-    @test norm(x) == sqrt(14.375)
-    @test norm(x, 1) == 5.5
-    @test norm(x, 2) == sqrt(14.375)
-    @test norm(x, Inf) == 3.5
-end
-
-@testset "maximum, minimum" begin
-    let x = spv_x1
-        @test maximum(x) == 3.5
-        @test minimum(x) == -0.75
-        @test maximum(abs, x) == 3.5
-        @test minimum(abs, x) == 0.0
-    end
-
-    let x = abs.(spv_x1)
-        @test maximum(x) == 3.5
-        @test minimum(x) == 0.0
-    end
-
-    let x = -abs.(spv_x1)
-        @test maximum(x) == 0.0
-        @test minimum(x) == -3.5
-    end
-
-    let x = SparseVector(3, [1, 2, 3], [-4.5, 2.5, 3.5])
-        @test maximum(x) == 3.5
-        @test minimum(x) == -4.5
-        @test maximum(abs, x) == 4.5
-        @test minimum(abs, x) == 2.5
-    end
-
-    let x = spzeros(Float64, 8)
-        @test maximum(x) == 0.0
-        @test minimum(x) == 0.0
-        @test maximum(abs, x) == 0.0
-        @test minimum(abs, x) == 0.0
-    end
-end
-
-### linalg
-
-@testset "BLAS Level-1" begin
-
-    let x = sprand(16, 0.5), x2 = sprand(16, 0.4)
-        xf = Array(x)
-        xf2 = Array(x2)
-
-        @testset "axpy!" begin
-            for c in [1.0, -1.0, 2.0, -2.0]
-                y = Array(x)
-                @test LinearAlgebra.axpy!(c, x2, y) === y
-                @test y == Array(x2 * c + x)
-            end
-        end
-        @testset "scale" begin
-            α = 2.5
-            sx = SparseVector(length(x::SparseVector), nonzeroinds(x), nonzeros(x) * α)
-            @test exact_equal(x * α, sx)
-            @test exact_equal(x * (α + 0.0*im), complex(sx))
-            @test exact_equal(α * x, sx)
-            @test exact_equal((α + 0.0*im) * x, complex(sx))
-            @test exact_equal(x * α, sx)
-            @test exact_equal(α * x, sx)
-            @test exact_equal(x .* α, sx)
-            @test exact_equal(α .* x, sx)
-            @test exact_equal(x / α, SparseVector(length(x::SparseVector), nonzeroinds(x), nonzeros(x) / α))
-
-            xc = copy(x)
-            @test rmul!(xc, α) === xc
-            @test exact_equal(xc, sx)
-            xc = copy(x)
-            @test lmul!(α, xc) === xc
-            @test exact_equal(xc, sx)
-            xc = copy(x)
-            @test rmul!(xc, complex(α, 0.0)) === xc
-            @test exact_equal(xc, sx)
-            xc = copy(x)
-            @test lmul!(complex(α, 0.0), xc) === xc
-            @test exact_equal(xc, sx)
-        end
-
-        @testset "dot" begin
-            dv = dot(xf, xf2)
-            @test dot(x, x) == sum(abs2, x)
-            @test dot(x2, x2) == sum(abs2, x2)
-            @test dot(x, x2) ≈ dv
-            @test dot(x2, x) ≈ dv
-            @test dot(Array(x), x2) ≈ dv
-            @test dot(x, Array(x2)) ≈ dv
-        end
-    end
-
-    let x = complex.(sprand(32, 0.6), sprand(32, 0.6)),
-        y = complex.(sprand(32, 0.6), sprand(32, 0.6))
-        xf = Array(x)::Vector{ComplexF64}
-        yf = Array(y)::Vector{ComplexF64}
-        @test dot(x, x) ≈ dot(xf, xf)
-        @test dot(x, y) ≈ dot(xf, yf)
-    end
-end
-
-@testset "BLAS Level-2" begin
-    @testset "dense A * sparse x -> dense y" begin
-        for TA in (Float64, ComplexF64), Tx in (Float64, ComplexF64)
-            T = Base.promote_op(LinearAlgebra.matprod, TA, Tx)
-            let A = randn(TA, 9, 16), x = sprand(Tx, 16, 0.7)
-                xf = Array(x)
-                for α in [0.0, 1.0, 2.0], β in [0.0, 0.5, 1.0]
-                    y = rand(T, 9)
-                    rr = α*A*xf + β*y
-                    @test mul!(y, A, x, α, β) === y
-                    @test y ≈ rr
-                end
-                y = A*x
-                @test isa(y, Vector{T})
-                @test A*x ≈ A*xf
-            end
-
-            let A = randn(TA, 16, 9), x = sprand(Tx, 16, 0.7)
-                xf = Array(x)
-                for α in [0.0, 1.0, 2.0], β in [0.0, 0.5, 1.0]
-                    y = rand(T, 9)
-                    rr = α*transpose(A)*xf + β*y
-                    @test mul!(y, transpose(A), x, α, β) === y
-                    @test y ≈ rr
-                end
-                y = *(transpose(A), x)
-                @test isa(y, Vector{T})
-                @test y ≈ *(transpose(A), xf)
-            end
-
-            let A = randn(TA, 16, 9), x = sprand(Tx, 16, 0.7)
-                xf = Array(x)
-                for α in [0.0, 1.0, 2.0], β in [0.0, 0.5, 1.0]
-                    y = rand(T, 9)
-                    rr = α*A'xf + β*y
-                    @test mul!(y, adjoint(A), x, α, β) === y
-                    @test y ≈ rr
-                end
-                y = *(adjoint(A), x)
-                @test isa(y, Vector{T})
-                @test y ≈ *(adjoint(A), xf)
-            end
-        end
-    end
-    @testset "sparse A * sparse x -> dense y" begin
-        let A = sprandn(9, 16, 0.5), x = sprand(16, 0.7)
-            Af = Array(A)
-            xf = Array(x)
-            for α in [0.0, 1.0, 2.0], β in [0.0, 0.5, 1.0]
-                y = rand(9)
-                rr = α*Af*xf + β*y
-                @test mul!(y, A, x, α, β) === y
-                @test y ≈ rr
-            end
-            y = SparseArrays.densemv(A, x)
-            @test isa(y, Vector{Float64})
-            @test y ≈ Af*xf
-        end
-
-        let A = sprandn(16, 9, 0.5), x = sprand(16, 0.7)
-            Af = Array(A)
-            xf = Array(x)
-            for α in [0.0, 1.0, 2.0], β in [0.0, 0.5, 1.0]
-                y = rand(9)
-                rr = α*Af'xf + β*y
-                @test mul!(y, transpose(A), x, α, β) === y
-                @test y ≈ rr
-            end
-            y = SparseArrays.densemv(A, x; trans='T')
-            @test isa(y, Vector{Float64})
-            @test y ≈ *(transpose(Af), xf)
-        end
-
-        let A = complex.(sprandn(7, 8, 0.5), sprandn(7, 8, 0.5)),
-            x = complex.(sprandn(8, 0.6), sprandn(8, 0.6)),
-            x2 = complex.(sprandn(7, 0.75), sprandn(7, 0.75))
-            Af = Array(A)
-            xf = Array(x)
-            x2f = Array(x2)
-            @test SparseArrays.densemv(A, x; trans='N') ≈ Af * xf
-            @test SparseArrays.densemv(A, x2; trans='T') ≈ transpose(Af) * x2f
-            @test SparseArrays.densemv(A, x2; trans='C') ≈ Af'x2f
-            @test_throws ArgumentError SparseArrays.densemv(A, x; trans='D')
-        end
-
-        let A = sparse(bitrand(9, 16)), x = sparse(bitrand(16))
-            Af = Array(A)
-            xf = Array(x)
-            y = SparseArrays.densemv(A, x)
-            @test isa(y, Vector{Int})
-            @test y == Af*xf
-        end
-    end
-    @testset "sparse A * sparse x -> sparse y" begin
-        let A = sprandn(9, 16, 0.5), x = sprand(16, 0.7), x2 = sprand(9, 0.7)
-            Af = Array(A)
-            xf = Array(x)
-            x2f = Array(x2)
-
-            y = A*x
-            @test isa(y, SparseVector{Float64,Int})
-            @test all(nonzeros(y) .!= 0.0)
-            @test Array(y) ≈ Af * xf
-
-            y = *(transpose(A), x2)
-            @test isa(y, SparseVector{Float64,Int})
-            @test all(nonzeros(y) .!= 0.0)
-            @test Array(y) ≈ Af'x2f
-        end
-
-        let A = complex.(sprandn(7, 8, 0.5), sprandn(7, 8, 0.5)),
-            x = complex.(sprandn(8, 0.6), sprandn(8, 0.6)),
-            x2 = complex.(sprandn(7, 0.75), sprandn(7, 0.75))
-            Af = Array(A)
-            xf = Array(x)
-            x2f = Array(x2)
-
-            y = A*x
-            @test isa(y, SparseVector{ComplexF64,Int})
-            @test Array(y) ≈ Af * xf
-
-            y = *(transpose(A), x2)
-            @test isa(y, SparseVector{ComplexF64,Int})
-            @test Array(y) ≈ transpose(Af) * x2f
-
-            y = *(adjoint(A), x2)
-            @test isa(y, SparseVector{ComplexF64,Int})
-            @test Array(y) ≈ Af'x2f
-        end
-
-        let A = sparse(bitrand(9, 16)), x = sparse(bitrand(16)), x2 = sparse(bitrand(9))
-            Af = Array(A)
-            xf = Array(x)
-            x2f = Array(x2)
-
-            y = A*x
-            @test isa(y, SparseVector{Int, Int})
-            @test Array(y) == Af*xf
-
-            y = A'*x2
-            @test isa(y, SparseVector{Int, Int})
-            @test Array(y) == Af'x2f
-        end
-    end
-    @testset "sparse A * dense x -> dense y" begin
-        let A = sparse(bitrand(9, 16)), x = Vector(bitrand(16)), x2 = Vector(bitrand(9))
-            Af = Array(A)
-            xf = Array(x)
-            x2f = Array(x2)
-
-            y = A*x
-            @test isa(y, Vector{Int})
-            @test y == Af*xf
-
-            y = A'*x2
-            @test isa(y, Vector{Int})
-            @test y == Af'x2f
-        end
-    end
-    @testset "ldiv ops with triangular matrices and sparse vecs (#14005)" begin
-        m = 10
-        sparsefloatvecs = SparseVector[sprand(m, 0.4) for k in 1:3]
-        sparseintvecs = SparseVector[SparseVector(m, nonzeroinds(sprvec), round.(Int, nonzeros(sprvec)*10)) for sprvec in sparsefloatvecs]
-        sparsecomplexvecs = SparseVector[SparseVector(m, nonzeroinds(sprvec), complex.(nonzeros(sprvec), nonzeros(sprvec))) for sprvec in sparsefloatvecs]
-
-        sprmat = sprand(m, m, 0.2)
-        sparsefloatmat = I + sprmat/(2m)
-        sparsecomplexmat = I + SparseMatrixCSC(m, m, getcolptr(sprmat), rowvals(sprmat), complex.(nonzeros(sprmat), nonzeros(sprmat))/(4m))
-        sparseintmat = 10m*I + SparseMatrixCSC(m, m, getcolptr(sprmat), rowvals(sprmat), round.(Int, nonzeros(sprmat)*10))
-
-        denseintmat = I*10m + rand(1:m, m, m)
-        densefloatmat = I + randn(m, m)/(2m)
-        densecomplexmat = I + randn(ComplexF64, m, m)/(4m)
-
-        inttypes = (Int32, Int64, BigInt)
-        floattypes = (Float32, Float64, BigFloat)
-        complextypes = (ComplexF32, ComplexF64)
-        eltypes = (inttypes..., floattypes..., complextypes...)
-
-        for eltypemat in eltypes
-            (densemat, sparsemat) = eltypemat in inttypes ? (denseintmat, sparseintmat) :
-                                    eltypemat in floattypes ? (densefloatmat, sparsefloatmat) :
-                                    eltypemat in complextypes && (densecomplexmat, sparsecomplexmat)
-            densemat = convert(Matrix{eltypemat}, densemat)
-            sparsemat = convert(SparseMatrixCSC{eltypemat}, sparsemat)
-            trimats = (LowerTriangular(densemat), UpperTriangular(densemat),
-                       LowerTriangular(sparsemat), UpperTriangular(sparsemat) )
-            unittrimats = (LinearAlgebra.UnitLowerTriangular(densemat), LinearAlgebra.UnitUpperTriangular(densemat),
-                           LinearAlgebra.UnitLowerTriangular(sparsemat), LinearAlgebra.UnitUpperTriangular(sparsemat) )
-
-            for eltypevec in eltypes
-                spvecs = eltypevec in inttypes ? sparseintvecs :
-                         eltypevec in floattypes ? sparsefloatvecs :
-                         eltypevec in complextypes && sparsecomplexvecs
-                spvecs = SparseVector[SparseVector(m, nonzeroinds(spvec), convert(Vector{eltypevec}, nonzeros(spvec))) for spvec in spvecs]
-
-                for spvec in spvecs
-                    fspvec = convert(Array, spvec)
-                    # test out-of-place left-division methods
-                    for mat in (trimats..., unittrimats...)
-                        @test \(mat, spvec)            ≈ \(mat, fspvec)
-                        @test \(adjoint(mat), spvec)   ≈ \(adjoint(mat), fspvec)
-                        @test \(transpose(mat), spvec) ≈ \(transpose(mat), fspvec)
-                    end
-                    # test in-place left-division methods not involving quotients
-                    if eltypevec == typeof(zero(eltypemat)*zero(eltypevec) + zero(eltypemat)*zero(eltypevec))
-                        for mat in unittrimats
-                            @test ldiv!(mat, copy(spvec)) ≈ ldiv!(mat, copy(fspvec))
-                            @test ldiv!(adjoint(mat), copy(spvec)) ≈ ldiv!(adjoint(mat), copy(fspvec))
-                            @test ldiv!(transpose(mat), copy(spvec)) ≈ ldiv!(transpose(mat), copy(fspvec))
-                        end
-                    end
-                    # test in-place left-division methods involving quotients
-                    if eltypevec == typeof((zero(eltypemat)*zero(eltypevec) + zero(eltypemat)*zero(eltypevec))/one(eltypemat))
-                        for mat in trimats
-                            @test ldiv!(mat, copy(spvec)) ≈ ldiv!(mat, copy(fspvec))
-                            @test ldiv!(adjoint(mat), copy(spvec)) ≈ ldiv!(adjoint(mat), copy(fspvec))
-                            @test ldiv!(transpose(mat), copy(spvec)) ≈ ldiv!(transpose(mat), copy(fspvec))
-                        end
-                    end
-                end
-            end
-        end
-    end
-    @testset "#16716" begin
-        # The preceding tests miss the edge case where the sparse vector is empty
-        origmat = [-1.5 -0.7; 0.0 1.0]
-        transmat = copy(origmat')
-        utmat = UpperTriangular(origmat)
-        ltmat = LowerTriangular(transmat)
-        uutmat = LinearAlgebra.UnitUpperTriangular(origmat)
-        ultmat = LinearAlgebra.UnitLowerTriangular(transmat)
-
-        zerospvec = spzeros(Float64, 2)
-        zerodvec = zeros(Float64, 2)
-
-        for mat in (utmat, ltmat, uutmat, ultmat)
-            @test isequal(\(mat, zerospvec), zerodvec)
-            @test isequal(\(adjoint(mat), zerospvec), zerodvec)
-            @test isequal(\(transpose(mat), zerospvec), zerodvec)
-            @test isequal(ldiv!(mat, copy(zerospvec)), zerospvec)
-            @test isequal(ldiv!(adjoint(mat), copy(zerospvec)), zerospvec)
-            @test isequal(ldiv!(transpose(mat), copy(zerospvec)), zerospvec)
-        end
-    end
-    @testset "Triangular and SparseVector multiplications" begin
-        n = 10
-        types = (Int, Float64, ComplexF64)
-        tritypes = (LowerTriangular, UnitUpperTriangular)
-        for ta in types
-            for tri in tritypes
-                if ta == Int
-                    T = tri(rand(1:9, n, n))
-                else
-                    T = tri(randn(ta, n, n))
-                end
-                for tb in types
-                    if tb == Int
-                        x = sparse(rand(0:4, n))
-                    else
-                        x = sprandn(tb, n, 0.6)
-                    end
-                    @test T * x ≈ Array(T) * Array(x)
-                    @test T' * x ≈ Array(T)' * Array(x)
-                    @test transpose(T) * x ≈ transpose(Array(T)) * Array(x)
-                    @test x' * T ≈ Array(x)' * Array(T)
-                    @test x' * T' ≈ Array(x)' * Array(T)'
-                    @test x' * transpose(T) ≈ Array(x)' * transpose(Array(T))
-                end
-            end
-        end
-
-        # 0-dimensional case
-        x = sparse(zeros(0))
-        for tri in tritypes
-            T = tri(zeros(0, 0))
-            @test T*x == Array(T) * Array(x)
-            @test T' * x == Array(T)' * Array(x)
-            @test transpose(T) * x == transpose(Array(T)) * Array(x)
-            @test x' * T == Array(x)' * Array(T)
-            @test x' * T' == Array(x)' * Array(T)'
-            @test x' * transpose(T) == Array(x)' * transpose(Array(T))
-        end
-    end
-end
-
-@testset "fkeep!" begin
-    x = sparsevec(1:7, [3., 2., -1., 1., -2., -3., 3.], 7)
-    # droptol
-    xdrop = SparseArrays.droptol!(copy(x), 1.5)
-    @test exact_equal(xdrop, SparseVector(7, [1, 2, 5, 6, 7], [3., 2., -2., -3., 3.]))
-    SparseArrays.droptol!(xdrop, 2.5)
-    @test exact_equal(xdrop, SparseVector(7, [1, 6, 7], [3., -3., 3.]))
-    SparseArrays.droptol!(xdrop, 3.)
-    @test exact_equal(xdrop, SparseVector(7, Int[], Float64[]))
-
-    xdrop = copy(x)
-    # This will keep index 1, 3, 4, 7 in xdrop
-    f_drop(i, x) = (abs(x) == 1.) || (i in [1, 7])
-    SparseArrays.fkeep!(xdrop, f_drop)
-    @test exact_equal(xdrop, SparseVector(7, [1, 3, 4, 7], [3., -1., 1., 3.]))
-end
-
-@testset "dropzeros[!] with length=$m" for m in (10, 20, 30)
-    Random.seed!(123)
-    nzprob, targetnumposzeros, targetnumnegzeros = 0.4, 5, 5
-    v = sprand(m, nzprob)
-    struczerosv = findall(x -> x == 0, v)
-    poszerosinds = unique(rand(struczerosv, targetnumposzeros))
-    negzerosinds = unique(rand(struczerosv, targetnumnegzeros))
-    vposzeros = copy(v)
-    vposzeros[poszerosinds] .= 2
-    vnegzeros = copy(v)
-    vnegzeros[negzerosinds] .= -2
-    vbothsigns = copy(vposzeros)
-    vbothsigns[negzerosinds] .= -2
-    map!(x -> x == 2 ? 0.0 : x, nonzeros(vposzeros), nonzeros(vposzeros))
-    map!(x -> x == -2 ? -0.0 : x, nonzeros(vnegzeros), nonzeros(vnegzeros))
-    map!(x -> x == 2 ? 0.0 : x == -2 ? -0.0 : x, nonzeros(vbothsigns), nonzeros(vbothsigns))
-    for vwithzeros in (vposzeros, vnegzeros, vbothsigns)
-        # Basic functionality / dropzeros!
-        @test dropzeros!(copy(vwithzeros)) == v
-        # Basic functionality / dropzeros
-        @test dropzeros(vwithzeros) == v
-        # Check trimming works as expected
-        @test length(nonzeros(dropzeros!(copy(vwithzeros)))) == length(nonzeros(v))
-        @test length(nonzeroinds(dropzeros!(copy(vwithzeros)))) == length(nonzeroinds(v))
-    end
-end
-
-@testset "original dropzeros! test" begin
-    xdrop = sparsevec(1:7, [3., 2., -1., 1., -2., -3., 3.], 7)
-    nonzeros(xdrop)[[2, 4, 6]] .= 0.0
-    SparseArrays.dropzeros!(xdrop)
-    @test exact_equal(xdrop, SparseVector(7, [1, 3, 5, 7], [3, -1., -2., 3.]))
-end
-
-# It's tempting to share data between a SparseVector and a SparseMatrix,
-# but if that's done, then modifications to one or the other will cause
-# an inconsistent state:
-sv = sparse(1:10)
-sm = convert(SparseMatrixCSC, sv)
-sv[1] = 0
-@test Array(sm)[2:end] == 2:10
-
-# Ensure that sparsevec with all-zero values returns an array of zeros
-@test sparsevec([1,2,3],[0,0,0]) == [0,0,0]
-
-@testset "stored zero semantics" begin
-    # Compare stored zero semantics between SparseVector and SparseMatrixCSC
-    let S = SparseMatrixCSC(10,1,[1,6],[1,3,5,6,7],[0,1,2,0,3]), x = SparseVector(10,[1,3,5,6,7],[0,1,2,0,3])
-        @test nnz(S) == nnz(x) == 5
-        for I = (:, 1:10, Vector(1:10))
-            @test S[I,1] == S[I] == x[I] == x
-            @test nnz(S[I,1]) == nnz(S[I]) == nnz(x[I]) == nnz(x)
-        end
-        for I = (2:9, 1:2, 9:10, [3,6,1], [10,9,8], [])
-            @test S[I,1] == S[I] == x[I]
-            @test nnz(S[I,1]) == nnz(S[I]) == nnz(x[I])
-        end
-        @test S[[1 3 5; 2 4 6]] == x[[1 3 5; 2 4 6]]
-        @test nnz(S[[1 3 5; 2 4 6]]) == nnz(x[[1 3 5; 2 4 6]])
-    end
-end
-
-@testset "Issue 14013" begin
-    s14013 = sparse([10.0 0.0 30.0; 0.0 1.0 0.0])
-    a14013 = [10.0 0.0 30.0; 0.0 1.0 0.0]
-    @test s14013 == a14013
-    @test vec(s14013) == s14013[:] == a14013[:]
-    @test Array(s14013)[1,:] == s14013[1,:] == a14013[1,:] == [10.0, 0.0, 30.0]
-    @test Array(s14013)[2,:] == s14013[2,:] == a14013[2,:] == [0.0, 1.0, 0.0]
-end
-@testset "Issue 14046" begin
-    s14046 = sprand(5, 1.0)
-    @test spzeros(5) + s14046 == s14046
-    @test 2*s14046 == s14046 + s14046
-end
-@testset "Issue 14589" begin
-    # test vectors with no zero elements
-    let x = sparsevec(1:7, [3., 2., -1., 1., -2., -3., 3.], 7)
-        @test Vector(sort(x)) == sort(Vector(x))
-    end
-    # test vectors with all zero elements
-    let x = sparsevec(Int64[], Float64[], 7)
-        @test Vector(sort(x)) == sort(Vector(x))
-    end
-    # test vector with sparsity approx 1/2
-    let x = sparsevec(1:7, [3., 2., -1., 1., -2., -3., 3.], 15)
-        @test Vector(sort(x)) == sort(Vector(x))
-        # apply three distinct transformations where zeros sort into start/middle/end
-        @test Vector(sort(x, by=abs)) == sort(Vector(x), by=abs)
-        @test Vector(sort(x, by=sign)) == sort(Vector(x), by=sign)
-        @test Vector(sort(x, by=inv)) == sort(Vector(x), by=inv)
-    end
-end
-@testset "fill!" begin
-    for Tv in [Float32, Float64, Int64, Int32, ComplexF64]
-        for Ti in [Int16, Int32, Int64, BigInt]
-            sptypes = (SparseMatrixCSC{Tv, Ti}, SparseVector{Tv, Ti})
-            sizes = [(3, 4), (3,)]
-            for (siz, Sp) in zip(sizes, sptypes)
-                arr = rand(Tv, siz...)
-                sparr = Sp(arr)
-                x = rand(Tv)
-                @test fill!(sparr, x) == fill(x, siz)
-                @test fill!(sparr, 0) == fill(0, siz)
-            end
-        end
-    end
-end
-
-@testset "13130 and 16661" begin
-    @test issparse([sprand(10,10,.1) sprand(10,.1)])
-    @test issparse([sprand(10,1,.1); sprand(10,.1)])
-
-    @test issparse([sprand(10,10,.1) rand(10)])
-    @test issparse([sprand(10,1,.1)  rand(10)])
-    @test issparse([sprand(10,2,.1) sprand(10,1,.1) rand(10)])
-    @test issparse([sprand(10,1,.1); rand(10)])
-
-    @test issparse([sprand(10,.1)  rand(10)])
-    @test issparse([sprand(10,.1); rand(10)])
-end
-
-mutable struct t20488 end
-
-@testset "show" begin
-    io = IOBuffer()
-    show(io, MIME"text/plain"(), sparsevec(Int64[1], [1.0]))
-    @test String(take!(io)) == "1-element SparseArrays.SparseVector{Float64, Int64} with 1 stored entry:\n  [1]  =  1.0"
-    show(io, MIME"text/plain"(),  spzeros(Float64, Int64, 2))
-    @test String(take!(io)) == "2-element SparseArrays.SparseVector{Float64, Int64} with 0 stored entries"
-    show(io, similar(sparsevec(rand(3) .+ 0.1), t20488))
-    @test String(take!(io)) == "  [1]  =  #undef\n  [2]  =  #undef\n  [3]  =  #undef"
-end
-
-@testset "spzeros with index type" begin
-    @test typeof(spzeros(Float32, Int16, 3)) == SparseVector{Float32,Int16}
-end
-
-@testset "corner cases of broadcast arithmetic operations with scalars (#21515)" begin
-    # test both scalar literals and variables
-    areequal(a, b, c) = isequal(a, b) && isequal(b, c)
-    inf, zeroh, zv, spzv = Inf, 0.0, zeros(1), spzeros(1)
-    @test areequal(spzv .* Inf,  spzv .* inf,    sparsevec(zv .* Inf))
-    @test areequal(Inf .* spzv,  inf .* spzv,    sparsevec(Inf .* zv))
-    @test areequal(spzv ./ 0.0,  spzv ./ zeroh,  sparsevec(zv ./ 0.0))
-    @test areequal(0.0 .\ spzv,  zeroh .\ spzv,  sparsevec(0.0 .\ zv))
-end
-
-@testset "similar for SparseVector" begin
-    A = SparseVector(10, Int[1, 3, 5, 7], Float64[1.0, 3.0, 5.0, 7.0])
-    # test similar without specifications (preserves stored-entry structure)
-    simA = similar(A)
-    @test typeof(simA) == typeof(A)
-    @test size(simA) == size(A)
-    @test nonzeroinds(simA) == nonzeroinds(A)
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type specification (preserves stored-entry structure)
-    simA = similar(A, Float32)
-    @test typeof(simA) == SparseVector{Float32,eltype(nonzeroinds(A))}
-    @test size(simA) == size(A)
-    @test nonzeroinds(simA) == nonzeroinds(A)
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry and index type specification (preserves stored-entry structure)
-    simA = similar(A, Float32, Int8)
-    @test typeof(simA) == SparseVector{Float32,Int8}
-    @test size(simA) == size(A)
-    @test nonzeroinds(simA) == nonzeroinds(A)
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with Dims{1} specification (preserves nothing)
-    simA = similar(A, (6,))
-    @test typeof(simA) == typeof(A)
-    @test size(simA) == (6,)
-    @test length(nonzeroinds(simA)) == 0
-    @test length(nonzeros(simA)) == 0
-    # test similar with entry type and Dims{1} specification (preserves nothing)
-    simA = similar(A, Float32, (6,))
-    @test typeof(simA) == SparseVector{Float32,eltype(nonzeroinds(A))}
-    @test size(simA) == (6,)
-    @test length(nonzeroinds(simA)) == 0
-    @test length(nonzeros(simA)) == 0
-    # test similar with entry type, index type, and Dims{1} specification (preserves nothing)
-    simA = similar(A, Float32, Int8, (6,))
-    @test typeof(simA) == SparseVector{Float32,Int8}
-    @test size(simA) == (6,)
-    @test length(nonzeroinds(simA)) == 0
-    @test length(nonzeros(simA)) == 0
-    # test entry points to similar with entry type, index type, and non-Dims shape specification
-    @test similar(A, Float32, Int8, 6, 6) == similar(A, Float32, Int8, (6, 6))
-    @test similar(A, Float32, Int8, 6) == similar(A, Float32, Int8, (6,))
-    # test similar with Dims{2} specification (preserves storage space only, not stored-entry structure)
-    simA = similar(A, (6,6))
-    @test typeof(simA) == SparseMatrixCSC{eltype(nonzeros(A)),eltype(nonzeroinds(A))}
-    @test size(simA) == (6,6)
-    @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(nonzeroinds(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type and Dims{2} specification (preserves storage space only)
-    simA = similar(A, Float32, (6,6))
-    @test typeof(simA) == SparseMatrixCSC{Float32,eltype(nonzeroinds(A))}
-    @test size(simA) == (6,6)
-    @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(nonzeroinds(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-    # test similar with entry type, index type, and Dims{2} specification (preserves storage space only)
-    simA = similar(A, Float32, Int8, (6,6))
-    @test typeof(simA) == SparseMatrixCSC{Float32, Int8}
-    @test size(simA) == (6,6)
-    @test getcolptr(simA) == fill(1, 6+1)
-    @test length(rowvals(simA)) == length(nonzeroinds(A))
-    @test length(nonzeros(simA)) == length(nonzeros(A))
-end
-
-@testset "Fast operations on full column views" begin
-    n = 1000
-    A = sprandn(n, n, 0.01)
-    for j in 1:5:n
-        Aj, Ajview = A[:, j], view(A, :, j)
-        @test norm(Aj)          == norm(Ajview)
-        @test dot(Aj, copy(Aj)) == dot(Ajview, Aj) # don't alias since it takes a different code path
-        @test rmul!(Aj, 0.1)    == rmul!(Ajview, 0.1)
-        @test Aj*0.1            == Ajview*0.1
-        @test 0.1*Aj            == 0.1*Ajview
-        @test Aj/0.1            == Ajview/0.1
-        @test LinearAlgebra.axpy!(1.0, Aj,     sparse(fill(1., n))) ==
-              LinearAlgebra.axpy!(1.0, Ajview, sparse(fill(1., n)))
-        @test LinearAlgebra.lowrankupdate!(Matrix(1.0*I, n, n), fill(1.0, n), Aj) ==
-              LinearAlgebra.lowrankupdate!(Matrix(1.0*I, n, n), fill(1.0, n), Ajview)
-    end
-end
-
-@testset "SparseVector circshift" begin
-    n = 100
-    v = sprand(n, 0.5)
-    for shift in (0,-1,1,5,-7,n+10)
-        x = circshift(Vector(v), shift)
-        w = circshift(v, shift)
-        @test nnz(v) == nnz(w)
-        @test w == x
-        # test circshift!
-        v1 = similar(v)
-        circshift!(v1, v, shift)
-        @test v1 == x
-        # test different in/out types
-        y1 = spzeros(Int64, n)
-        y2 = spzeros(Int64, n)
-        v2 = floor.(100v)
-        circshift!(y1, v2, shift)
-        circshift!(y2, Vector(v2), shift)
-        @test y1 == y2
-    end
-end
-
-@testset "SparseColumnView properties" begin
-    n = 10
-    A = sprand(n, n, 0.5)
-    scv = view(A, :, 1)
-    @test SparseArrays.indtype(scv) == SparseArrays.indtype(A)
-    @test nnz(scv) == nnz(A[:, 1])
-end
-
-@testset "avoid aliasing of fields during constructing $T (issue #34630)" for T in
-    (SparseVector, SparseVector{Float64}, SparseVector{Float64,Int16})
-
-    A = sparse([1; 0])
-    B = T(A)
-    @test A == B
-    A[2] = 1
-    @test A != B
-    @test nonzeroinds(A) !== nonzeroinds(B)
-    @test nonzeros(A) !== nonzeros(B)
-end
-
-@testset "multiplication of Triangular sparse matrices with sparse vectors #35642" begin
-    n = 10
-    A = sprand(n, n, 5/n)
-    U = UpperTriangular(A)
-    L = LowerTriangular(A)
-    x = sprand(n, 5/n)
-    y = view(A, :, 6)
-    z = view(x, :)
-    ty = typeof
-    @testset "matvec multiplication $(ty(X)) * $(ty(v))" for X in (U, L), v in (x, y, z)
-        @test X * v ≈ Matrix(X) * Vector(v)
-        @test typeof(X * v) == typeof(x)
-    end
-end
-
-end # module
diff --git a/stdlib/SparseArrays/test/testgroups b/stdlib/SparseArrays/test/testgroups
deleted file mode 100644
index e49039d94946c6..00000000000000
--- a/stdlib/SparseArrays/test/testgroups
+++ /dev/null
@@ -1,3 +0,0 @@
-higherorderfns
-sparse
-sparsevector
diff --git a/stdlib/Statistics.version b/stdlib/Statistics.version
index 8f4c6c96bd0a0f..7ad39f00f4cbe1 100644
--- a/stdlib/Statistics.version
+++ b/stdlib/Statistics.version
@@ -1,2 +1,4 @@
 STATISTICS_BRANCH = master
-STATISTICS_SHA1 = b384104d35ff0e7cf311485607b177223ed72b9a
+STATISTICS_SHA1 = 61a021bcb330e6c52f2435f2abaffc77875ab6f2
+STATISTICS_GIT_URL := https://github.com/JuliaLang/Statistics.jl.git
+STATISTICS_TAR_URL = https://api.github.com/repos/JuliaLang/Statistics.jl/tarball/$1
diff --git a/stdlib/SuiteSparse.version b/stdlib/SuiteSparse.version
new file mode 100644
index 00000000000000..27e835befbc38b
--- /dev/null
+++ b/stdlib/SuiteSparse.version
@@ -0,0 +1,4 @@
+SUITESPARSE_BRANCH = master
+SUITESPARSE_SHA1 = f63732c1c6adecb277d8f2981cc8c1883c321bcc
+SUITESPARSE_GIT_URL := https://github.com/JuliaSparse/SuiteSparse.jl.git
+SUITESPARSE_TAR_URL = https://api.github.com/repos/JuliaSparse/SuiteSparse.jl/tarball/$1
diff --git a/stdlib/SuiteSparse/Project.toml b/stdlib/SuiteSparse/Project.toml
deleted file mode 100644
index bacb81664bc3fc..00000000000000
--- a/stdlib/SuiteSparse/Project.toml
+++ /dev/null
@@ -1,18 +0,0 @@
-name = "SuiteSparse"
-uuid = "4607b0f0-06f3-5cda-b6b1-a6196a1729e9"
-
-[deps]
-Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
-LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
-
-[extras]
-DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
-Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
-Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
-Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
-
-[targets]
-test = ["Test", "Printf", "Random", "DelimitedFiles", "Serialization"]
diff --git a/stdlib/SuiteSparse/docs/src/index.md b/stdlib/SuiteSparse/docs/src/index.md
deleted file mode 100644
index e8654caf943b40..00000000000000
--- a/stdlib/SuiteSparse/docs/src/index.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Sparse Linear Algebra
-
-```@meta
-DocTestSetup = :(using LinearAlgebra, SparseArrays, SuiteSparse)
-```
-
-Sparse matrix solvers call functions from [SuiteSparse](http://suitesparse.com). The following factorizations are available:
-
-| Type                              | Description                                   |
-|:--------------------------------- |:--------------------------------------------- |
-| `SuiteSparse.CHOLMOD.Factor`      | Cholesky factorization                        |
-| `SuiteSparse.UMFPACK.UmfpackLU`   | LU factorization                              |
-| `SuiteSparse.SPQR.QRSparse`       | QR factorization                              |
-
-Other solvers such as [Pardiso.jl](https://github.com/JuliaSparse/Pardiso.jl/) are as external packages. [Arpack.jl](https://julialinearalgebra.github.io/Arpack.jl/stable/) provides `eigs` and `svds` for iterative solution of eigensystems and singular value decompositions.
-
-These factorizations are described in the [`Linear Algebra`](@ref man-linalg) section of the manual:
-1. [`cholesky`](@ref)
-2. [`ldlt`](@ref)
-3. [`lu`](@ref)
-4. [`qr`](@ref)
-
-```@docs
-SuiteSparse.CHOLMOD.lowrankupdate
-SuiteSparse.CHOLMOD.lowrankupdate!
-SuiteSparse.CHOLMOD.lowrankdowndate
-SuiteSparse.CHOLMOD.lowrankdowndate!
-SuiteSparse.CHOLMOD.lowrankupdowndate!
-```
-
-
-```@meta
-DocTestSetup = nothing
-```
diff --git a/stdlib/SuiteSparse/src/SuiteSparse.jl b/stdlib/SuiteSparse/src/SuiteSparse.jl
deleted file mode 100644
index e07e6aaea1c5ae..00000000000000
--- a/stdlib/SuiteSparse/src/SuiteSparse.jl
+++ /dev/null
@@ -1,31 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module SuiteSparse
-
-import Base: \
-import LinearAlgebra: ldiv!, rdiv!
-
-## Functions to switch to 0-based indexing to call external sparse solvers
-
-# Convert from 1-based to 0-based indices
-function decrement!(A::AbstractArray{T}) where T<:Integer
-    for i in eachindex(A); A[i] -= oneunit(T) end
-    A
-end
-decrement(A::AbstractArray{<:Integer}) = decrement!(copy(A))
-
-# Convert from 0-based to 1-based indices
-function increment!(A::AbstractArray{T}) where T<:Integer
-    for i in eachindex(A); A[i] += oneunit(T) end
-    A
-end
-increment(A::AbstractArray{<:Integer}) = increment!(copy(A))
-
-if Base.USE_GPL_LIBS
-    include("umfpack.jl")
-    include("cholmod.jl")
-    include("spqr.jl")
-    include("deprecated.jl")
-end
-
-end # module SuiteSparse
diff --git a/stdlib/SuiteSparse/src/cholmod.jl b/stdlib/SuiteSparse/src/cholmod.jl
deleted file mode 100644
index 4b0935e55f7a0f..00000000000000
--- a/stdlib/SuiteSparse/src/cholmod.jl
+++ /dev/null
@@ -1,1871 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# Theoretically CHOLMOD supports both Int32 and Int64 indices on 64-bit.
-# However experience suggests that using both in the same session causes memory
-# leaks, so we restrict indices to be SuiteSparse_long (see cholmod_h.jl).
-# Ref: https://github.com/JuliaLang/julia/issues/12664
-
-# Additionally, only Float64/ComplexF64 are supported in practice.
-# Ref: https://github.com/JuliaLang/julia/issues/25986
-
-module CHOLMOD
-
-import Base: (*), convert, copy, eltype, getindex, getproperty, show, size,
-             IndexStyle, IndexLinear, IndexCartesian, adjoint, axes
-using Base: require_one_based_indexing
-
-using LinearAlgebra
-import LinearAlgebra: (\),
-                 cholesky, cholesky!, det, diag, ishermitian, isposdef,
-                 issuccess, issymmetric, ldlt, ldlt!, logdet
-
-using SparseArrays
-using SparseArrays: getcolptr
-import Libdl
-
-export
-    Dense,
-    Factor,
-    Sparse
-
-import SparseArrays: AbstractSparseMatrix, SparseMatrixCSC, indtype, sparse, spzeros, nnz
-
-import ..increment, ..increment!, ..decrement, ..decrement!
-
-#########
-# Setup #
-#########
-
-include("cholmod_h.jl")
-
-const CHOLMOD_MIN_VERSION = v"2.1.1"
-
-const common_struct = Vector{Vector{UInt8}}()
-
-const common_supernodal = Vector{Ptr{Cint}}()
-const common_final_ll   = Vector{Ptr{Cint}}()
-const common_print      = Vector{Ptr{Cint}}()
-const common_itype      = Vector{Ptr{Cint}}()
-const common_dtype      = Vector{Ptr{Cint}}()
-const common_nmethods   = Vector{Ptr{Cint}}()
-const common_postorder  = Vector{Ptr{Cint}}()
-
-### These offsets are defined in SuiteSparse_wrapper.c
-const common_size = ccall((:jl_cholmod_common_size,:libsuitesparse_wrapper),Int,())
-
-const cholmod_com_offsets = Vector{Csize_t}(undef, 19)
-ccall((:jl_cholmod_common_offsets, :libsuitesparse_wrapper),
-    Nothing, (Ptr{Csize_t},), cholmod_com_offsets)
-
-## macro to generate the name of the C function according to the integer type
-macro cholmod_name(nm)
-    string("cholmod_l_", nm)
-end
-
-function start(a::Vector{UInt8})
-    @isok ccall((@cholmod_name("start"), :libcholmod),
-        Cint, (Ptr{UInt8},), a)
-    return a
-end
-
-function finish(a::Vector{UInt8})
-    @isok ccall((@cholmod_name("finish"), :libcholmod),
-        Cint, (Ptr{UInt8},), a)
-    return a
-end
-
-function defaults(a::Vector{UInt8})
-    @isok ccall((@cholmod_name("defaults"), :libcholmod),
-        Cint, (Ptr{UInt8},), a)
-    return a
-end
-
-const build_version_array = Vector{Cint}(undef, 3)
-ccall((:jl_cholmod_version, :libsuitesparse_wrapper), Cint, (Ptr{Cint},), build_version_array)
-const build_version = VersionNumber(build_version_array...)
-
-function __init__()
-    try
-        ### Check if the linked library is compatible with the Julia code
-        if Libdl.dlsym_e(Libdl.dlopen("libcholmod"), :cholmod_version) != C_NULL
-            current_version_array = Vector{Cint}(undef, 3)
-            ccall((:cholmod_version, :libcholmod), Cint, (Ptr{Cint},), current_version_array)
-            current_version = VersionNumber(current_version_array...)
-        else # CHOLMOD < 2.1.1 does not include cholmod_version()
-            current_version = v"0.0.0"
-        end
-
-
-        if current_version < CHOLMOD_MIN_VERSION
-            @warn """
-                CHOLMOD version incompatibility
-
-                Julia was compiled with CHOLMOD version $build_version. It is
-                currently linked with a version older than
-                $(CHOLMOD_MIN_VERSION). This might cause Julia to
-                terminate when working with sparse matrix factorizations,
-                e.g. solving systems of equations with \\.
-
-                It is recommended that you use Julia with a recent version
-                of CHOLMOD, or download the generic binaries
-                from www.julialang.org, which ship with the correct
-                versions of all dependencies.
-                """
-        elseif build_version_array[1] != current_version_array[1]
-            @warn """
-                CHOLMOD version incompatibility
-
-                Julia was compiled with CHOLMOD version $build_version. It is
-                currently linked with version $current_version.
-                This might cause Julia to terminate when working with
-                sparse matrix factorizations, e.g. solving systems of
-                equations with \\.
-
-                It is recommended that you use Julia with the same major
-                version of CHOLMOD as the one used during the build, or
-                download the generic binaries from www.julialang.org,
-                which ship with the correct versions of all dependencies.
-                """
-        end
-
-        intsize = Int(ccall((:jl_cholmod_sizeof_long,:libsuitesparse_wrapper),Csize_t,()))
-        if intsize != 4length(IndexTypes)
-            @error """
-                 CHOLMOD integer size incompatibility
-
-                 Julia was compiled with a version of CHOLMOD that
-                 supported $(32length(IndexTypes)) bit integers. It is
-                 currently linked with version that supports $(8intsize)
-                 integers. This might cause Julia to terminate when
-                 working with sparse matrix factorizations, e.g. solving
-                 systems of equations with \\.
-
-                 This problem can be fixed by modifying the Julia build
-                 configuration or by downloading the OS X or generic
-                 Linux binary from www.julialang.org, which include
-                 the correct versions of all dependencies.
-                 """
-        end
-
-        ### Initiate CHOLMOD
-        ### common_struct controls the type of factorization and keeps pointers
-        ### to temporary memory. We need to manage a copy for each thread.
-        nt = Threads.nthreads()
-        resize!(common_struct    , nt)
-        resize!(common_supernodal, nt)
-        resize!(common_final_ll  , nt)
-        resize!(common_print     , nt)
-        resize!(common_itype     , nt)
-        resize!(common_dtype     , nt)
-        resize!(common_nmethods  , nt)
-        resize!(common_postorder , nt)
-        for i in 1:nt
-            common_struct[i] = fill(0xff, common_size)
-
-            common_supernodal[i] = pointer(common_struct[i], cholmod_com_offsets[4] + 1)
-            common_final_ll[i]   = pointer(common_struct[i], cholmod_com_offsets[7] + 1)
-            common_print[i]      = pointer(common_struct[i], cholmod_com_offsets[13] + 1)
-            common_itype[i]      = pointer(common_struct[i], cholmod_com_offsets[18] + 1)
-            common_dtype[i]      = pointer(common_struct[i], cholmod_com_offsets[19] + 1)
-            common_nmethods[i]   = pointer(common_struct[i], cholmod_com_offsets[15] + 1)
-            common_postorder[i]  = pointer(common_struct[i], cholmod_com_offsets[17] + 1)
-
-            start(common_struct[i])              # initializes CHOLMOD
-            set_print_level(common_struct[i], 0) # no printing from CHOLMOD by default
-        end
-
-        # Register gc tracked allocator if CHOLMOD is new enough
-        if current_version >= v"3.0.0"
-            cnfg = cglobal((:SuiteSparse_config, :libsuitesparseconfig), Ptr{Cvoid})
-            unsafe_store!(cnfg, cglobal(:jl_malloc, Ptr{Cvoid}), 1)
-            unsafe_store!(cnfg, cglobal(:jl_calloc, Ptr{Cvoid}), 2)
-            unsafe_store!(cnfg, cglobal(:jl_realloc, Ptr{Cvoid}), 3)
-            unsafe_store!(cnfg, cglobal(:jl_free, Ptr{Cvoid}), 4)
-        end
-
-    catch ex
-        @error "Error during initialization of module CHOLMOD" exception=ex,catch_backtrace()
-    end
-end
-
-function set_print_level(cm::Vector{UInt8}, lev::Integer)
-    unsafe_store!(common_print[Threads.threadid()], lev)
-end
-
-####################
-# Type definitions #
-####################
-
-abstract type SuiteSparseStruct end
-
-# The three core data types for CHOLMOD: Dense, Sparse and Factor.
-# CHOLMOD manages the memory, so the Julia versions only wrap a
-# pointer to a struct.  Therefore finalizers should be registered each
-# time a pointer is returned from CHOLMOD.
-
-# Dense
-struct C_Dense{T<:VTypes} <: SuiteSparseStruct
-    nrow::Csize_t
-    ncol::Csize_t
-    nzmax::Csize_t
-    d::Csize_t
-    x::Ptr{T}
-    z::Ptr{Cvoid}
-    xtype::Cint
-    dtype::Cint
-end
-
-mutable struct Dense{Tv<:VTypes} <: DenseMatrix{Tv}
-    ptr::Ptr{C_Dense{Tv}}
-    function Dense{Tv}(ptr::Ptr{C_Dense{Tv}}) where Tv<:VTypes
-        if ptr == C_NULL
-            throw(ArgumentError("dense matrix construction failed for " *
-                "unknown reasons. Please submit a bug report."))
-        end
-        s = unsafe_load(ptr)
-        if s.xtype != xtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("xtype=$(s.xtype) not supported"))
-        elseif s.dtype != dtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("dtype=$(s.dtype) not supported"))
-        end
-        A = new(ptr)
-        finalizer(free!, A)
-        return A
-    end
-end
-Dense(p::Ptr{C_Dense{Tv}}) where {Tv<:VTypes} = Dense{Tv}(p)
-
-# Sparse
-# allow Cvoid pointer for reading matrices of unknown type from files as in
-# cholmod_read_sparse
-struct C_Sparse{Tv<:Union{Cvoid, VTypes}} <: SuiteSparseStruct
-    nrow::Csize_t
-    ncol::Csize_t
-    nzmax::Csize_t
-    p::Ptr{SuiteSparse_long}
-    i::Ptr{SuiteSparse_long}
-    nz::Ptr{SuiteSparse_long}
-    x::Ptr{Tv}
-    z::Ptr{Cvoid}
-    stype::Cint
-    itype::Cint
-    xtype::Cint
-    dtype::Cint
-    sorted::Cint
-    packed::Cint
-end
-
-mutable struct Sparse{Tv<:VTypes} <: AbstractSparseMatrix{Tv,SuiteSparse_long}
-    ptr::Ptr{C_Sparse{Tv}}
-    function Sparse{Tv}(ptr::Ptr{C_Sparse{Tv}}) where Tv<:VTypes
-        if ptr == C_NULL
-            throw(ArgumentError("sparse matrix construction failed for " *
-                "unknown reasons. Please submit a bug report."))
-        end
-        s = unsafe_load(ptr)
-        if s.itype != ityp(SuiteSparse_long)
-            free!(ptr)
-            throw(CHOLMODException("itype=$(s.itype) not supported"))
-        elseif s.xtype != xtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("xtype=$(s.xtype) not supported"))
-        elseif s.dtype != dtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("dtype=$(s.dtype) not supported"))
-        end
-        A = new(ptr)
-        finalizer(free!, A)
-        return A
-    end
-end
-Sparse(p::Ptr{C_Sparse{Tv}}) where {Tv<:VTypes} = Sparse{Tv}(p)
-
-# Useful when reading in files, but not type stable
-function Sparse(p::Ptr{C_Sparse{Cvoid}})
-    if p == C_NULL
-        throw(ArgumentError("sparse matrix construction failed for " *
-                            "unknown reasons. Please submit a bug report."))
-    end
-    s = unsafe_load(p)
-    Tv = s.xtype == REAL ? Float64 : ComplexF64
-    Sparse(convert(Ptr{C_Sparse{Tv}}, p))
-end
-
-Base.unsafe_convert(::Type{Ptr{Tv}}, A::Sparse{Tv}) where {Tv} = getfield(A, :ptr)
-
-# Factor
-# Cvoid is used for pattern-only factors
-struct C_Factor{Tv<:VTypes} <: SuiteSparseStruct
-    n::Csize_t
-    minor::Csize_t
-    Perm::Ptr{SuiteSparse_long}
-    ColCount::Ptr{SuiteSparse_long}
-    @static if build_version >= v"2.1.0"
-        IPerm::Ptr{SuiteSparse_long}  # this pointer was added in version 2.1.0
-    end
-    nzmax::Csize_t
-    p::Ptr{SuiteSparse_long}
-    i::Ptr{SuiteSparse_long}
-    x::Ptr{Tv}
-    z::Ptr{Cvoid}
-    nz::Ptr{SuiteSparse_long}
-    next::Ptr{SuiteSparse_long}
-    prev::Ptr{SuiteSparse_long}
-    nsuper::Csize_t
-    ssize::Csize_t
-    xsize::Csize_t
-    maxcsize::Csize_t
-    maxesize::Csize_t
-    super::Ptr{SuiteSparse_long}
-    pi::Ptr{SuiteSparse_long}
-    px::Ptr{SuiteSparse_long}
-    s::Ptr{SuiteSparse_long}
-    ordering::Cint
-    is_ll::Cint
-    is_super::Cint
-    is_monotonic::Cint
-    itype::Cint
-    xtype::Cint
-    dtype::Cint
-end
-
-mutable struct Factor{Tv<:VTypes} <: Factorization{Tv}
-    ptr::Ptr{C_Factor{Tv}}
-    function Factor{Tv}(ptr::Ptr{C_Factor{Tv}}, register_finalizer = true) where Tv
-        if ptr == C_NULL
-            throw(ArgumentError("factorization construction failed for " *
-                "unknown reasons. Please submit a bug report."))
-        end
-        s = unsafe_load(ptr)
-        if s.itype != ityp(SuiteSparse_long)
-            free!(ptr)
-            throw(CHOLMODException("itype=$(s.itype) not supported"))
-        elseif s.xtype != xtyp(Tv) && s.xtype != PATTERN
-            free!(ptr)
-            throw(CHOLMODException("xtype=$(s.xtype) not supported"))
-        elseif s.dtype != dtyp(Tv)
-            free!(ptr)
-            throw(CHOLMODException("dtype=$(s.dtype) not supported"))
-        end
-        F = new(ptr)
-        if register_finalizer
-            finalizer(free!, F)
-        end
-        return F
-    end
-end
-Factor(ptr::Ptr{C_Factor{Tv}}) where {Tv<:VTypes} = Factor{Tv}(ptr)
-Factor(x::Factor) = x
-
-Base.adjoint(F::Factor) = Adjoint(F)
-Base.transpose(F::Factor) = Transpose(F)
-
-# All pointer loads should be checked to make sure that SuiteSparse is not called with
-# a C_NULL pointer which could cause a segfault. Pointers are set to null
-# when serialized so this can happen when multiple processes are in use.
-function Base.unsafe_convert(::Type{Ptr{T}}, x::Union{Dense,Sparse,Factor}) where T<:SuiteSparseStruct
-    xp = getfield(x, :ptr)
-    if xp == C_NULL
-        throw(ArgumentError("pointer to the $T object is null. This can " *
-            "happen if the object has been serialized."))
-    else
-        return xp
-    end
-end
-Base.pointer(x::Dense{Tv}) where {Tv}  = Base.unsafe_convert(Ptr{C_Dense{Tv}}, x)
-Base.pointer(x::Sparse{Tv}) where {Tv} = Base.unsafe_convert(Ptr{C_Sparse{Tv}}, x)
-Base.pointer(x::Factor{Tv}) where {Tv} = Base.unsafe_convert(Ptr{C_Factor{Tv}}, x)
-
-# FactorComponent, for encoding particular factors from a factorization
-mutable struct FactorComponent{Tv,S} <: AbstractMatrix{Tv}
-    F::Factor{Tv}
-
-    function FactorComponent{Tv,S}(F::Factor{Tv}) where {Tv,S}
-        s = unsafe_load(pointer(F))
-        if s.is_ll != 0
-            if !(S === :L || S === :U || S === :PtL || S === :UP)
-                throw(CHOLMODException(string(S, " not supported for sparse ",
-                    "LLt matrices; try :L, :U, :PtL, or :UP")))
-            end
-        elseif !(S === :L || S === :U || S === :PtL || S === :UP ||
-                S === :D || S === :LD || S === :DU || S === :PtLD || S === :DUP)
-            throw(CHOLMODException(string(S, " not supported for sparse LDLt ",
-                "matrices; try :L, :U, :PtL, :UP, :D, :LD, :DU, :PtLD, or :DUP")))
-        end
-        new(F)
-    end
-end
-function FactorComponent(F::Factor{Tv}, sym::Symbol) where Tv
-    FactorComponent{Tv,sym}(F)
-end
-
-Factor(FC::FactorComponent) = Factor(FC.F)
-
-#################
-# Thin wrappers #
-#################
-
-# Dense wrappers
-
-### cholmod_core_h ###
-function allocate_dense(m::Integer, n::Integer, d::Integer, ::Type{Tv}) where {Tv<:VTypes}
-    Dense(ccall((@cholmod_name("allocate_dense"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Csize_t, Cint, Ptr{Cvoid}),
-                m, n, d, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function free!(p::Ptr{C_Dense{Tv}}) where {Tv<:VTypes}
-    @isok ccall((@cholmod_name("free_dense"), :libcholmod), Cint,
-                (Ref{Ptr{C_Dense{Tv}}}, Ptr{Cvoid}),
-                p, common_struct[Threads.threadid()])
-end
-function zeros(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("zeros"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-zeros(m::Integer, n::Integer) = zeros(m, n, Float64)
-
-function ones(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("ones"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-ones(m::Integer, n::Integer) = ones(m, n, Float64)
-
-function eye(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("eye"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-eye(m::Integer, n::Integer) = eye(m, n, Float64)
-eye(n::Integer) = eye(n, n, Float64)
-
-function copy(A::Dense{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("copy_dense"), :libcholmod), Ptr{C_Dense{Tv}},
-                (Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()]))
-end
-
-function sort!(S::Sparse{Tv}) where Tv<:VTypes
-    @isok ccall((@cholmod_name("sort"), :libcholmod), Cint,
-                (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                S, common_struct[Threads.threadid()])
-    return S
-end
-
-### cholmod_matrixops.h ###
-function norm_dense(D::Dense{Tv}, p::Integer) where Tv<:VTypes
-    s = unsafe_load(pointer(D))
-    if p == 2
-        if s.ncol > 1
-            throw(ArgumentError("2 norm only supported when matrix has one column"))
-        end
-    elseif p != 0 && p != 1
-        throw(ArgumentError("second argument must be either 0 (Inf norm), 1, or 2"))
-    end
-    ccall((@cholmod_name("norm_dense"), :libcholmod), Cdouble,
-        (Ptr{C_Dense{Tv}}, Cint, Ptr{UInt8}),
-          D, p, common_struct[Threads.threadid()])
-end
-
-### cholmod_check.h ###
-function check_dense(A::Dense{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("check_dense"), :libcholmod), Cint,
-          (Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-          pointer(A), common_struct[Threads.threadid()]) != 0
-end
-
-# Non-Dense wrappers
-### cholmod_core.h ###
-function allocate_sparse(nrow::Integer, ncol::Integer, nzmax::Integer,
-        sorted::Bool, packed::Bool, stype::Integer, ::Type{Tv}) where {Tv<:VTypes}
-    Sparse(ccall((@cholmod_name("allocate_sparse"), :libcholmod),
-            Ptr{C_Sparse{Tv}},
-                (Csize_t, Csize_t, Csize_t, Cint,
-                 Cint, Cint, Cint, Ptr{Cvoid}),
-                nrow, ncol, nzmax, sorted,
-                packed, stype, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function free!(ptr::Ptr{C_Sparse{Tv}}) where Tv<:VTypes
-    @isok ccall((@cholmod_name("free_sparse"), :libcholmod), Cint,
-            (Ref{Ptr{C_Sparse{Tv}}}, Ptr{UInt8}),
-                ptr, common_struct[Threads.threadid()])
-end
-
-function free!(ptr::Ptr{C_Factor{Tv}}) where Tv<:VTypes
-    # Warning! Important that finalizer doesn't modify the global Common struct.
-    @isok ccall((@cholmod_name("free_factor"), :libcholmod), Cint,
-            (Ref{Ptr{C_Factor{Tv}}}, Ptr{Cvoid}),
-                ptr, common_struct[Threads.threadid()])
-end
-
-function aat(A::Sparse{Tv}, fset::Vector{SuiteSparse_long}, mode::Integer) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("aat"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{SuiteSparse_long}, Csize_t, Cint, Ptr{UInt8}),
-                A, fset, length(fset), mode, common_struct[Threads.threadid()]))
-end
-
-function sparse_to_dense(A::Sparse{Tv}) where Tv<:VTypes
-    Dense(ccall((@cholmod_name("sparse_to_dense"),:libcholmod),
-        Ptr{C_Dense{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()]))
-end
-function dense_to_sparse(D::Dense{Tv}, ::Type{SuiteSparse_long}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("dense_to_sparse"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Dense{Tv}}, Cint, Ptr{UInt8}),
-                D, true, common_struct[Threads.threadid()]))
-end
-
-function factor_to_sparse!(F::Factor{Tv}) where Tv<:VTypes
-    ss = unsafe_load(pointer(F))
-    ss.xtype == PATTERN && throw(CHOLMODException("only numeric factors are supported"))
-    Sparse(ccall((@cholmod_name("factor_to_sparse"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-                F, common_struct[Threads.threadid()]))
-end
-
-function change_factor!(F::Factor{Tv}, to_ll::Bool, to_super::Bool, to_packed::Bool,
-                        to_monotonic::Bool) where Tv<:VTypes
-    @isok ccall((@cholmod_name("change_factor"),:libcholmod), Cint,
-            (Cint, Cint, Cint, Cint, Cint, Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-                xtyp(Tv), to_ll, to_super, to_packed, to_monotonic, F, common_struct[Threads.threadid()])
-end
-
-function check_sparse(A::Sparse{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("check_sparse"),:libcholmod), Cint,
-          (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-           A, common_struct[Threads.threadid()]) != 0
-end
-
-function check_factor(F::Factor{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("check_factor"),:libcholmod), Cint,
-          (Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-           F, common_struct[Threads.threadid()]) != 0
-end
-
-function nnz(A::Sparse{Tv}) where Tv<:VTypes
-    ccall((@cholmod_name("nnz"),:libcholmod), Int,
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()])
-end
-
-function speye(m::Integer, n::Integer, ::Type{Tv}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("speye"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Csize_t, Csize_t, Cint, Ptr{UInt8}),
-                m, n, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function spzeros(m::Integer, n::Integer, nzmax::Integer, ::Type{Tv}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("spzeros"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Csize_t, Csize_t, Csize_t, Cint, Ptr{UInt8}),
-             m, n, nzmax, xtyp(Tv), common_struct[Threads.threadid()]))
-end
-
-function transpose_(A::Sparse{Tv}, values::Integer) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("transpose"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-                A, values, common_struct[Threads.threadid()]))
-end
-
-function copy(F::Factor{Tv}) where Tv<:VTypes
-    Factor(ccall((@cholmod_name("copy_factor"),:libcholmod),
-        Ptr{C_Factor{Tv}},
-            (Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-                F, common_struct[Threads.threadid()]))
-end
-function copy(A::Sparse{Tv}) where Tv<:VTypes
-    Sparse(ccall((@cholmod_name("copy_sparse"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, common_struct[Threads.threadid()]))
-end
-function copy(A::Sparse{Tv}, stype::Integer, mode::Integer) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("copy"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Cint, Cint, Ptr{UInt8}),
-                A, stype, mode, common_struct[Threads.threadid()]))
-end
-
-### cholmod_check.h ###
-function print_sparse(A::Sparse{Tv}, name::String) where Tv<:VTypes
-    isascii(name) || error("non-ASCII name: $name")
-    set_print_level(common_struct[Threads.threadid()], 3)
-    @isok ccall((@cholmod_name("print_sparse"),:libcholmod), Cint,
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}, Ptr{UInt8}),
-                 A, name, common_struct[Threads.threadid()])
-    nothing
-end
-function print_factor(F::Factor{Tv}, name::String) where Tv<:VTypes
-    set_print_level(common_struct[Threads.threadid()], 3)
-    @isok ccall((@cholmod_name("print_factor"),:libcholmod), Cint,
-            (Ptr{C_Factor{Tv}}, Ptr{UInt8}, Ptr{UInt8}),
-                F, name, common_struct[Threads.threadid()])
-    nothing
-end
-
-### cholmod_matrixops.h ###
-function ssmult(A::Sparse{Tv}, B::Sparse{Tv}, stype::Integer,
-        values::Bool, sorted::Bool) where Tv<:VRealTypes
-    lA = unsafe_load(pointer(A))
-    lB = unsafe_load(pointer(B))
-    if lA.ncol != lB.nrow
-        throw(DimensionMismatch("inner matrix dimensions do not fit"))
-    end
-    Sparse(ccall((@cholmod_name("ssmult"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{C_Sparse{Tv}}, Cint, Cint,
-                Cint, Ptr{UInt8}),
-             A, B, stype, values,
-                sorted, common_struct[Threads.threadid()]))
-end
-
-function norm_sparse(A::Sparse{Tv}, norm::Integer) where Tv<:VTypes
-    if norm != 0 && norm != 1
-        throw(ArgumentError("norm argument must be either 0 or 1"))
-    end
-    ccall((@cholmod_name("norm_sparse"), :libcholmod), Cdouble,
-            (Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-                A, norm, common_struct[Threads.threadid()])
-end
-
-function horzcat(A::Sparse{Tv}, B::Sparse{Tv}, values::Bool) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("horzcat"), :libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-             A, B, values, common_struct[Threads.threadid()]))
-end
-
-function scale!(S::Dense{Tv}, scale::Integer, A::Sparse{Tv}) where Tv<:VRealTypes
-    sS = unsafe_load(pointer(S))
-    sA = unsafe_load(pointer(A))
-    if sS.ncol != 1 && sS.nrow != 1
-        throw(DimensionMismatch("first argument must be a vector"))
-    end
-    if scale == SCALAR && sS.nrow != 1
-        throw(DimensionMismatch("scaling argument must have length one"))
-    elseif scale == ROW && sS.nrow*sS.ncol != sA.nrow
-        throw(DimensionMismatch("scaling vector has length $(sS.nrow*sS.ncol), " *
-            "but matrix has $(sA.nrow) rows."))
-    elseif scale == COL && sS.nrow*sS.ncol != sA.ncol
-        throw(DimensionMismatch("scaling vector has length $(sS.nrow*sS.ncol), " *
-            "but matrix has $(sA.ncol) columns"))
-    elseif scale == SYM
-        if sA.nrow != sA.ncol
-            throw(DimensionMismatch("matrix must be square"))
-        elseif sS.nrow*sS.ncol != sA.nrow
-            throw(DimensionMismatch("scaling vector has length $(sS.nrow*sS.ncol), " *
-                "but matrix has $(sA.ncol) columns and rows"))
-        end
-    end
-
-    sA = unsafe_load(pointer(A))
-    @isok ccall((@cholmod_name("scale"),:libcholmod), Cint,
-            (Ptr{C_Dense{Tv}}, Cint, Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                S, scale, A, common_struct[Threads.threadid()])
-    A
-end
-
-function sdmult!(A::Sparse{Tv}, transpose::Bool,
-        α::Number, β::Number, X::Dense{Tv}, Y::Dense{Tv}) where Tv<:VTypes
-    m, n = size(A)
-    nc = transpose ? m : n
-    nr = transpose ? n : m
-    if nc != size(X, 1)
-        throw(DimensionMismatch("incompatible dimensions, $nc and $(size(X,1))"))
-    end
-    @isok ccall((@cholmod_name("sdmult"),:libcholmod), Cint,
-            (Ptr{C_Sparse{Tv}}, Cint,
-             Ref{ComplexF64}, Ref{ComplexF64},
-             Ptr{C_Dense{Tv}}, Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-                A, transpose, α, β, X, Y, common_struct[Threads.threadid()])
-    Y
-end
-
-function vertcat(A::Sparse{Tv}, B::Sparse{Tv}, values::Bool) where Tv<:VRealTypes
-    Sparse(ccall((@cholmod_name("vertcat"), :libcholmod),
-            Ptr{C_Sparse{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{C_Sparse{Tv}}, Cint, Ptr{UInt8}),
-                A, B, values, common_struct[Threads.threadid()]))
-end
-
-function symmetry(A::Sparse{Tv}, option::Integer) where Tv<:VTypes
-    xmatched = Ref{SuiteSparse_long}()
-    pmatched = Ref{SuiteSparse_long}()
-    nzoffdiag = Ref{SuiteSparse_long}()
-    nzdiag = Ref{SuiteSparse_long}()
-    rv = ccall((@cholmod_name("symmetry"), :libcholmod), Cint,
-            (Ptr{C_Sparse{Tv}}, Cint, Ptr{SuiteSparse_long}, Ptr{SuiteSparse_long},
-                Ptr{SuiteSparse_long}, Ptr{SuiteSparse_long}, Ptr{UInt8}),
-                    A, option, xmatched, pmatched,
-                        nzoffdiag, nzdiag, common_struct[Threads.threadid()])
-    rv, xmatched[], pmatched[], nzoffdiag[], nzdiag[]
-end
-
-# cholmod_cholesky.h
-# For analyze, analyze_p, and factorize_p!, the Common argument must be
-# supplied in order to control if the factorization is LLt or LDLt
-function analyze(A::Sparse{Tv}, cmmn::Vector{UInt8}) where Tv<:VTypes
-    Factor(ccall((@cholmod_name("analyze"),:libcholmod),
-        Ptr{C_Factor{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                A, cmmn))
-end
-function analyze_p(A::Sparse{Tv}, perm::Vector{SuiteSparse_long},
-                   cmmn::Vector{UInt8}) where Tv<:VTypes
-    length(perm) != size(A,1) && throw(BoundsError())
-    Factor(ccall((@cholmod_name("analyze_p"),:libcholmod),
-            Ptr{C_Factor{Tv}},
-            (Ptr{C_Sparse{Tv}}, Ptr{SuiteSparse_long}, Ptr{SuiteSparse_long},
-                Csize_t, Ptr{UInt8}),
-                A, perm, C_NULL, 0, cmmn))
-end
-function factorize!(A::Sparse{Tv}, F::Factor{Tv}, cmmn::Vector{UInt8}) where Tv<:VTypes
-    @isok ccall((@cholmod_name("factorize"),:libcholmod), Cint,
-        (Ptr{C_Sparse{Tv}}, Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-            A, F, cmmn)
-    F
-end
-function factorize_p!(A::Sparse{Tv}, β::Real, F::Factor{Tv}, cmmn::Vector{UInt8}) where Tv<:VTypes
-    # note that β is passed as a complex number (double beta[2]),
-    # but the CHOLMOD manual says that only beta[0] (real part) is used
-    @isok ccall((@cholmod_name("factorize_p"),:libcholmod), Cint,
-        (Ptr{C_Sparse{Tv}}, Ref{ComplexF64}, Ptr{SuiteSparse_long}, Csize_t,
-         Ptr{C_Factor{Tv}}, Ptr{UInt8}),
-            A, β, C_NULL, 0, F, cmmn)
-    F
-end
-
-function solve(sys::Integer, F::Factor{Tv}, B::Dense{Tv}) where Tv<:VTypes
-    if size(F,1) != size(B,1)
-        throw(DimensionMismatch("LHS and RHS should have the same number of rows. " *
-            "LHS has $(size(F,1)) rows, but RHS has $(size(B,1)) rows."))
-    end
-    if !issuccess(F)
-        s = unsafe_load(pointer(F))
-        if s.is_ll == 1
-            throw(LinearAlgebra.PosDefException(s.minor))
-        else
-            throw(LinearAlgebra.ZeroPivotException(s.minor))
-        end
-    end
-    Dense(ccall((@cholmod_name("solve"),:libcholmod), Ptr{C_Dense{Tv}},
-            (Cint, Ptr{C_Factor{Tv}}, Ptr{C_Dense{Tv}}, Ptr{UInt8}),
-                sys, F, B, common_struct[Threads.threadid()]))
-end
-
-function spsolve(sys::Integer, F::Factor{Tv}, B::Sparse{Tv}) where Tv<:VTypes
-    if size(F,1) != size(B,1)
-        throw(DimensionMismatch("LHS and RHS should have the same number of rows. " *
-            "LHS has $(size(F,1)) rows, but RHS has $(size(B,1)) rows."))
-    end
-    Sparse(ccall((@cholmod_name("spsolve"),:libcholmod),
-        Ptr{C_Sparse{Tv}},
-            (Cint, Ptr{C_Factor{Tv}}, Ptr{C_Sparse{Tv}}, Ptr{UInt8}),
-                sys, F, B, common_struct[Threads.threadid()]))
-end
-
-# Autodetects the types
-function read_sparse(file::Libc.FILE, ::Type{SuiteSparse_long})
-    ptr = ccall((@cholmod_name("read_sparse"), :libcholmod),
-        Ptr{C_Sparse{Cvoid}},
-            (Ptr{Cvoid}, Ptr{UInt8}),
-                file.ptr, common_struct[Threads.threadid()])
-    if ptr == C_NULL
-        throw(ArgumentError("sparse matrix construction failed. Check that input file is valid."))
-    end
-    Sparse(ptr)
-end
-
-function read_sparse(file::IO, T)
-    cfile = Libc.FILE(file)
-    try return read_sparse(cfile, T)
-    finally close(cfile)
-    end
-end
-
-function get_perm(F::Factor)
-    s = unsafe_load(pointer(F))
-    p = unsafe_wrap(Array, s.Perm, s.n, own = false)
-    p .+ 1
-end
-get_perm(FC::FactorComponent) = get_perm(Factor(FC))
-
-#########################
-# High level interfaces #
-#########################
-
-# Conversion/construction
-function Dense{T}(A::StridedVecOrMat) where T<:VTypes
-    d = allocate_dense(size(A, 1), size(A, 2), stride(A, 2), T)
-    GC.@preserve d begin
-        s = unsafe_load(pointer(d))
-        for (i, c) in enumerate(eachindex(A))
-            unsafe_store!(s.x, A[c], i)
-        end
-    end
-    d
-end
-function Dense{T}(A::Union{Adjoint{<:Any, <:StridedVecOrMat}, Transpose{<:Any, <:StridedVecOrMat}}) where T<:VTypes
-    d = allocate_dense(size(A, 1), size(A, 2), size(A, 1), T)
-    GC.@preserve d begin
-        s = unsafe_load(pointer(d))
-        for (i, c) in enumerate(eachindex(A))
-            unsafe_store!(s.x, A[c], i)
-        end
-    end
-    d
-end
-function Dense(A::Union{StridedVecOrMat, Adjoint{<:Any, <:StridedVecOrMat}, Transpose{<:Any, <:StridedVecOrMat}})
-    T = promote_type(eltype(A), Float64)
-    return Dense{T}(A)
-end
-Dense(A::Sparse) = sparse_to_dense(A)
-
-# This constructior assumes zero based colptr and rowval
-function Sparse(m::Integer, n::Integer,
-        colptr0::Vector{SuiteSparse_long}, rowval0::Vector{SuiteSparse_long},
-        nzval::Vector{Tv}, stype) where Tv<:VTypes
-    # checks
-    ## length of input
-    if length(colptr0) <= n
-        throw(ArgumentError("length of colptr0 must be at least n + 1 = $(n + 1) but was $(length(colptr0))"))
-    end
-    if colptr0[n + 1] > length(rowval0)
-        throw(ArgumentError("length of rowval0 is $(length(rowval0)) but value of colptr0 requires length to be at least $(colptr0[n + 1])"))
-    end
-    if colptr0[n + 1] > length(nzval)
-        throw(ArgumentError("length of nzval is $(length(nzval)) but value of colptr0 requires length to be at least $(colptr0[n + 1])"))
-    end
-    ## columns are sorted
-    iss = true
-    for i = 2:length(colptr0)
-        if !issorted(view(rowval0, colptr0[i - 1] + 1:colptr0[i]))
-            iss = false
-            break
-        end
-    end
-
-    o = allocate_sparse(m, n, colptr0[n + 1], iss, true, stype, Tv)
-    s = unsafe_load(pointer(o))
-
-    unsafe_copyto!(s.p, pointer(colptr0), n + 1)
-    unsafe_copyto!(s.i, pointer(rowval0), colptr0[n + 1])
-    unsafe_copyto!(s.x, pointer(nzval) , colptr0[n + 1])
-
-    @isok check_sparse(o)
-
-    return o
-end
-
-function Sparse(m::Integer, n::Integer,
-        colptr0::Vector{SuiteSparse_long},
-        rowval0::Vector{SuiteSparse_long},
-        nzval::Vector{<:VTypes})
-    o = Sparse(m, n, colptr0, rowval0, nzval, 0)
-
-    # sort indices
-    sort!(o)
-
-    # check if array is symmetric and change stype if it is
-    if ishermitian(o)
-        change_stype!(o, -1)
-    end
-    o
-end
-
-function Sparse{Tv}(A::SparseMatrixCSC, stype::Integer) where Tv<:VTypes
-    ## Check length of input. This should never fail but see #20024
-    if length(getcolptr(A)) <= size(A, 2)
-        throw(ArgumentError("length of colptr must be at least size(A,2) + 1 = $(size(A, 2) + 1) but was $(length(getcolptr(A)))"))
-    end
-    if nnz(A) > length(rowvals(A))
-        throw(ArgumentError("length of rowval is $(length(rowvals(A))) but value of colptr requires length to be at least $(nnz(A))"))
-    end
-    if nnz(A) > length(nonzeros(A))
-        throw(ArgumentError("length of nzval is $(length(nonzeros(A))) but value of colptr requires length to be at least $(nnz(A))"))
-    end
-
-    o = allocate_sparse(size(A, 1), size(A, 2), nnz(A), true, true, stype, Tv)
-    s = unsafe_load(pointer(o))
-    for i = 1:(size(A, 2) + 1)
-        unsafe_store!(s.p, getcolptr(A)[i] - 1, i)
-    end
-    for i = 1:nnz(A)
-        unsafe_store!(s.i, rowvals(A)[i] - 1, i)
-    end
-    if Tv <: Complex && stype != 0
-        # Need to remove any non real elements in the diagonal because, in contrast to
-        # BLAS/LAPACK these are not ignored by CHOLMOD. If even tiny imaginary parts are
-        # present CHOLMOD will fail with a non-positive definite/zero pivot error.
-        for j = 1:size(A, 2)
-            for ip = getcolptr(A)[j]:getcolptr(A)[j + 1] - 1
-                v = nonzeros(A)[ip]
-                unsafe_store!(s.x, rowvals(A)[ip] == j ? Complex(real(v)) : v, ip)
-            end
-        end
-    elseif Tv == eltype(nonzeros(A))
-        unsafe_copyto!(s.x, pointer(nonzeros(A)), nnz(A))
-    else
-        for i = 1:nnz(A)
-            unsafe_store!(s.x, nonzeros(A)[i], i)
-        end
-    end
-
-    @isok check_sparse(o)
-
-    return o
-end
-
-# handle promotion
-function Sparse(A::SparseMatrixCSC{Tv,SuiteSparse_long}, stype::Integer) where {Tv}
-    T = promote_type(Tv, Float64)
-    return Sparse{T}(A, stype)
-end
-
-# convert SparseVectors into CHOLMOD Sparse types through a mx1 CSC matrix
-Sparse(A::SparseVector) = Sparse(SparseMatrixCSC(A))
-function Sparse(A::SparseMatrixCSC)
-    o = Sparse(A, 0)
-    # check if array is symmetric and change stype if it is
-    if ishermitian(o)
-        change_stype!(o, -1)
-    end
-    o
-end
-
-Sparse(A::Symmetric{Tv, SparseMatrixCSC{Tv,Ti}}) where {Tv<:Real, Ti} =
-    Sparse(A.data, A.uplo == 'L' ? -1 : 1)
-Sparse(A::Hermitian{Tv,SparseMatrixCSC{Tv,Ti}}) where {Tv, Ti} =
-    Sparse(A.data, A.uplo == 'L' ? -1 : 1)
-
-Sparse(A::Dense) = dense_to_sparse(A, SuiteSparse_long)
-Sparse(L::Factor) = factor_to_sparse!(copy(L))
-function Sparse(filename::String)
-    open(filename) do f
-        return read_sparse(f, SuiteSparse_long)
-    end
-end
-
-## conversion back to base Julia types
-function Matrix{T}(D::Dense{T}) where T
-    s = unsafe_load(pointer(D))
-    a = Matrix{T}(undef, s.nrow, s.ncol)
-    copyto!(a, D)
-end
-
-Base.copyto!(dest::Base.PermutedDimsArrays.PermutedDimsArray, src::Dense) = _copy!(dest, src) # ambig
-Base.copyto!(dest::Dense{T}, D::Dense{T}) where {T<:VTypes} = _copy!(dest, D)
-Base.copyto!(dest::AbstractArray{T}, D::Dense{T}) where {T<:VTypes} = _copy!(dest, D)
-Base.copyto!(dest::AbstractArray{T,2}, D::Dense{T}) where {T<:VTypes} = _copy!(dest, D)
-Base.copyto!(dest::AbstractArray, D::Dense) = _copy!(dest, D)
-
-function _copy!(dest::AbstractArray, D::Dense)
-    require_one_based_indexing(dest)
-    s = unsafe_load(pointer(D))
-    n = s.nrow*s.ncol
-    n <= length(dest) || throw(BoundsError(dest, n))
-    if s.d == s.nrow && isa(dest, Array)
-        unsafe_copyto!(pointer(dest), s.x, s.d*s.ncol)
-    else
-        k = 0
-        for j = 1:s.ncol
-            for i = 1:s.nrow
-                dest[k+=1] = unsafe_load(s.x, i + (j - 1)*s.d)
-            end
-        end
-    end
-    dest
-end
-Matrix(D::Dense{T}) where {T} = Matrix{T}(D)
-function Vector{T}(D::Dense{T}) where T
-    if size(D, 2) > 1
-        throw(DimensionMismatch("input must be a vector but had $(size(D, 2)) columns"))
-    end
-    copyto!(Vector{T}(undef, size(D, 1)), D)
-end
-Vector(D::Dense{T}) where {T} = Vector{T}(D)
-
-function SparseMatrixCSC{Tv,SuiteSparse_long}(A::Sparse{Tv}) where Tv
-    s = unsafe_load(pointer(A))
-    if s.stype != 0
-        throw(ArgumentError("matrix has stype != 0. Convert to matrix " *
-            "with stype == 0 before converting to SparseMatrixCSC"))
-    end
-
-    B = SparseMatrixCSC(s.nrow, s.ncol,
-        increment(unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)),
-        increment(unsafe_wrap(Array, s.i, (s.nzmax,), own = false)),
-        copy(unsafe_wrap(Array, s.x, (s.nzmax,), own = false)))
-
-    if s.sorted == 0
-        return SparseArrays.sortSparseMatrixCSC!(B)
-    else
-        return B
-    end
-end
-
-function Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}}(A::Sparse{Float64})
-    s = unsafe_load(pointer(A))
-    if !issymmetric(A)
-        throw(ArgumentError("matrix is not symmetric"))
-    end
-
-    B = Symmetric(SparseMatrixCSC(s.nrow, s.ncol,
-        increment(unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)),
-        increment(unsafe_wrap(Array, s.i, (s.nzmax,), own = false)),
-        copy(unsafe_wrap(Array, s.x, (s.nzmax,), own = false))), s.stype > 0 ? :U : :L)
-
-    if s.sorted == 0
-        return SparseArrays.sortSparseMatrixCSC!(B.data)
-    else
-        return B
-    end
-end
-convert(T::Type{Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}}}, A::Sparse{Float64}) = T(A)
-
-function Hermitian{Tv,SparseMatrixCSC{Tv,SuiteSparse_long}}(A::Sparse{Tv}) where Tv<:VTypes
-    s = unsafe_load(pointer(A))
-    if !ishermitian(A)
-        throw(ArgumentError("matrix is not Hermitian"))
-    end
-
-    B = Hermitian(SparseMatrixCSC(s.nrow, s.ncol,
-        increment(unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)),
-        increment(unsafe_wrap(Array, s.i, (s.nzmax,), own = false)),
-        copy(unsafe_wrap(Array, s.x, (s.nzmax,), own = false))), s.stype > 0 ? :U : :L)
-
-    if s.sorted == 0
-        return SparseArrays.sortSparseMatrixCSC!(B.data)
-    else
-        return B
-    end
-end
-convert(T::Type{Hermitian{Tv,SparseMatrixCSC{Tv,SuiteSparse_long}}}, A::Sparse{Tv}) where {Tv<:VTypes} = T(A)
-
-function sparse(A::Sparse{Float64}) # Notice! Cannot be type stable because of stype
-    s = unsafe_load(pointer(A))
-    if s.stype == 0
-        return SparseMatrixCSC{Float64,SuiteSparse_long}(A)
-    end
-    return Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}}(A)
-end
-function sparse(A::Sparse{ComplexF64}) # Notice! Cannot be type stable because of stype
-    s = unsafe_load(pointer(A))
-    if s.stype == 0
-        return SparseMatrixCSC{ComplexF64,SuiteSparse_long}(A)
-    end
-    return Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,SuiteSparse_long}}(A)
-end
-function sparse(F::Factor)
-    s = unsafe_load(pointer(F))
-    if s.is_ll != 0
-        L = Sparse(F)
-        A = sparse(L*L')
-    else
-        LD = sparse(F.LD)
-        L, d = getLd!(LD)
-        A = (L * Diagonal(d)) * L'
-    end
-    SparseArrays.sortSparseMatrixCSC!(A)
-    p = get_perm(F)
-    if p != [1:s.n;]
-        pinv = Vector{Int}(undef, length(p))
-        for k = 1:length(p)
-            pinv[p[k]] = k
-        end
-        A = A[pinv,pinv]
-    end
-    A
-end
-
-sparse(D::Dense) = sparse(Sparse(D))
-
-function sparse(FC::FactorComponent{Tv,:L}) where Tv
-    F = Factor(FC)
-    s = unsafe_load(pointer(F))
-    if s.is_ll == 0
-        throw(CHOLMODException("sparse: supported only for :LD on LDLt factorizations"))
-    end
-    sparse(Sparse(F))
-end
-sparse(FC::FactorComponent{Tv,:LD}) where {Tv} = sparse(Sparse(Factor(FC)))
-
-# Calculate the offset into the stype field of the cholmod_sparse_struct and
-# change the value
-let offset = fieldoffset(C_Sparse{Float64}, findfirst(name -> name === :stype, fieldnames(C_Sparse{Float64}))::Int)
-    global change_stype!
-    function change_stype!(A::Sparse, i::Integer)
-        unsafe_store!(convert(Ptr{Cint}, pointer(A)), i, div(offset, 4) + 1)
-        return A
-    end
-end
-
-free!(A::Dense)  = free!(pointer(A))
-free!(A::Sparse) = free!(pointer(A))
-free!(F::Factor) = free!(pointer(F))
-
-eltype(::Type{Dense{T}}) where {T<:VTypes} = T
-eltype(::Type{Factor{T}}) where {T<:VTypes} = T
-eltype(::Type{Sparse{T}}) where {T<:VTypes} = T
-
-nnz(F::Factor) = nnz(Sparse(F))
-
-function show(io::IO, F::Factor)
-    println(io, typeof(F))
-    showfactor(io, F)
-end
-
-function show(io::IO, FC::FactorComponent)
-    println(io, typeof(FC))
-    showfactor(io, Factor(FC))
-end
-
-function showfactor(io::IO, F::Factor)
-    s = unsafe_load(pointer(F))
-    print(io, """
-        type:    $(s.is_ll!=0 ? "LLt" : "LDLt")
-        method:  $(s.is_super!=0 ? "supernodal" : "simplicial")
-        maxnnz:  $(Int(s.nzmax))
-        nnz:     $(nnz(F))
-        success: $(s.minor == size(F, 1))
-        """)
-end
-
-# getindex not defined for these, so don't use the normal array printer
-show(io::IO, ::MIME"text/plain", FC::FactorComponent) = show(io, FC)
-show(io::IO, ::MIME"text/plain", F::Factor) = show(io, F)
-
-isvalid(A::Dense) = check_dense(A)
-isvalid(A::Sparse) = check_sparse(A)
-isvalid(A::Factor) = check_factor(A)
-
-function size(A::Union{Dense,Sparse})
-    s = unsafe_load(pointer(A))
-    return (Int(s.nrow), Int(s.ncol))
-end
-function size(F::Factor, i::Integer)
-    if i < 1
-        throw(ArgumentError("dimension must be positive"))
-    end
-    s = unsafe_load(pointer(F))
-    if i <= 2
-        return Int(s.n)
-    end
-    return 1
-end
-size(F::Factor) = (size(F, 1), size(F, 2))
-axes(A::Union{Dense,Sparse,Factor}) = map(Base.OneTo, size(A))
-
-IndexStyle(::Dense) = IndexLinear()
-
-size(FC::FactorComponent, i::Integer) = size(FC.F, i)
-size(FC::FactorComponent) = size(FC.F)
-
-adjoint(FC::FactorComponent{Tv,:L}) where {Tv} = FactorComponent{Tv,:U}(FC.F)
-adjoint(FC::FactorComponent{Tv,:U}) where {Tv} = FactorComponent{Tv,:L}(FC.F)
-adjoint(FC::FactorComponent{Tv,:PtL}) where {Tv} = FactorComponent{Tv,:UP}(FC.F)
-adjoint(FC::FactorComponent{Tv,:UP}) where {Tv} = FactorComponent{Tv,:PtL}(FC.F)
-adjoint(FC::FactorComponent{Tv,:D}) where {Tv} = FC
-adjoint(FC::FactorComponent{Tv,:LD}) where {Tv} = FactorComponent{Tv,:DU}(FC.F)
-adjoint(FC::FactorComponent{Tv,:DU}) where {Tv} = FactorComponent{Tv,:LD}(FC.F)
-adjoint(FC::FactorComponent{Tv,:PtLD}) where {Tv} = FactorComponent{Tv,:DUP}(FC.F)
-adjoint(FC::FactorComponent{Tv,:DUP}) where {Tv} = FactorComponent{Tv,:PtLD}(FC.F)
-
-function getindex(A::Dense, i::Integer)
-    s = unsafe_load(pointer(A))
-    0 < i <= s.nrow*s.ncol || throw(BoundsError())
-    unsafe_load(s.x, i)
-end
-
-IndexStyle(::Sparse) = IndexCartesian()
-function getindex(A::Sparse{T}, i0::Integer, i1::Integer) where T
-    s = unsafe_load(pointer(A))
-    !(1 <= i0 <= s.nrow && 1 <= i1 <= s.ncol) && throw(BoundsError())
-    s.stype < 0 && i0 < i1 && return conj(A[i1,i0])
-    s.stype > 0 && i0 > i1 && return conj(A[i1,i0])
-
-    r1 = Int(unsafe_load(s.p, i1) + 1)
-    r2 = Int(unsafe_load(s.p, i1 + 1))
-    (r1 > r2) && return zero(T)
-    r1 = Int(searchsortedfirst(unsafe_wrap(Array, s.i, (s.nzmax,), own = false),
-        i0 - 1, r1, r2, Base.Order.Forward))
-    ((r1 > r2) || (unsafe_load(s.i, r1) + 1 != i0)) ? zero(T) : unsafe_load(s.x, r1)
-end
-
-@inline function getproperty(F::Factor, sym::Symbol)
-    if sym === :p
-        return get_perm(F)
-    elseif sym === :ptr
-        return getfield(F, :ptr)
-    else
-        return FactorComponent(F, sym)
-    end
-end
-
-function getLd!(S::SparseMatrixCSC)
-    d = Vector{eltype(S)}(undef, size(S, 1))
-    fill!(d, 0)
-    col = 1
-    for k = 1:nnz(S)
-        while k >= getcolptr(S)[col+1]
-            col += 1
-        end
-        if rowvals(S)[k] == col
-            d[col] = nonzeros(S)[k]
-            nonzeros(S)[k] = 1
-        end
-    end
-    S, d
-end
-
-## Multiplication
-(*)(A::Sparse, B::Sparse) = ssmult(A, B, 0, true, true)
-(*)(A::Sparse, B::Dense) = sdmult!(A, false, 1., 0., B, zeros(size(A, 1), size(B, 2)))
-(*)(A::Sparse, B::VecOrMat) = (*)(A, Dense(B))
-
-function *(A::Sparse{Tv}, adjB::Adjoint{Tv,Sparse{Tv}}) where Tv<:VRealTypes
-    B = adjB.parent
-    if A !== B
-        aa1 = transpose_(B, 2)
-        ## result of ssmult will have stype==0, contain numerical values and be sorted
-        return ssmult(A, aa1, 0, true, true)
-    end
-
-    ## The A*A' case is handled by cholmod_aat. This routine requires
-    ## A->stype == 0 (storage of upper and lower parts). If necessary
-    ## the matrix A is first converted to stype == 0
-    s = unsafe_load(pointer(A))
-    fset = s.ncol == 0 ? SuiteSparse_long[] : SuiteSparse_long[0:s.ncol-1;]
-    if s.stype != 0
-        aa1 = copy(A, 0, 1)
-        return aat(aa1, fset, 1)
-    else
-        return aat(A, fset, 1)
-    end
-end
-
-function *(adjA::Adjoint{<:Any,<:Sparse}, B::Sparse)
-    A = adjA.parent
-    aa1 = transpose_(A, 2)
-    if A === B
-        return *(aa1, adjoint(aa1))
-    end
-    ## result of ssmult will have stype==0, contain numerical values and be sorted
-    return ssmult(aa1, B, 0, true, true)
-end
-
-*(adjA::Adjoint{<:Any,<:Sparse}, B::Dense) =
-    (A = adjA.parent; sdmult!(A, true, 1., 0., B, zeros(size(A, 2), size(B, 2))))
-*(adjA::Adjoint{<:Any,<:Sparse}, B::VecOrMat) =
-    (A = adjA.parent; *(adjoint(A), Dense(B)))
-
-
-## Factorization methods
-
-## Compute that symbolic factorization only
-function fact_(A::Sparse{<:VTypes}, cm::Array{UInt8};
-    perm::Union{Nothing,AbstractVector{SuiteSparse_long}}=nothing,
-    postorder::Bool=true, userperm_only::Bool=true)
-
-    sA = unsafe_load(pointer(A))
-    sA.stype == 0 && throw(ArgumentError("sparse matrix is not symmetric/Hermitian"))
-
-    if !postorder
-        unsafe_store!(common_postorder[Threads.threadid()], 0)
-    end
-
-    if perm === nothing || isempty(perm) # TODO: deprecate empty perm
-        F = analyze(A, cm)
-    else # user permutation provided
-        if userperm_only # use perm even if it is worse than AMD
-            unsafe_store!(common_nmethods[Threads.threadid()], 1)
-        end
-        F = analyze_p(A, SuiteSparse_long[p-1 for p in perm], cm)
-    end
-
-    return F
-end
-
-function cholesky!(F::Factor{Tv}, A::Sparse{Tv};
-                   shift::Real=0.0, check::Bool = true) where Tv
-    # Makes it an LLt
-    unsafe_store!(common_final_ll[Threads.threadid()], 1)
-
-    # Compute the numerical factorization
-    factorize_p!(A, shift, F, common_struct[Threads.threadid()])
-
-    check && (issuccess(F) || throw(LinearAlgebra.PosDefException(1)))
-    return F
-end
-
-"""
-    cholesky!(F::CHOLMOD.Factor, A::SparseMatrixCSC; shift = 0.0, check = true) -> CHOLMOD.Factor
-
-Compute the Cholesky (``LL'``) factorization of `A`, reusing the symbolic
-factorization `F`. `A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/
-[`Hermitian`](@ref) view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-
-See also [`cholesky`](@ref).
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-"""
-cholesky!(F::Factor, A::Union{SparseMatrixCSC{T},
-          SparseMatrixCSC{Complex{T}},
-          Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-          Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-          Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-          shift = 0.0, check::Bool = true) where {T<:Real} =
-    cholesky!(F, Sparse(A); shift = shift, check = check)
-
-function cholesky(A::Sparse; shift::Real=0.0, check::Bool = true,
-    perm::Union{Nothing,AbstractVector{SuiteSparse_long}}=nothing)
-
-    cm = defaults(common_struct[Threads.threadid()])
-    set_print_level(cm, 0)
-
-    # Compute the symbolic factorization
-    F = fact_(A, cm; perm = perm)
-
-    # Compute the numerical factorization
-    cholesky!(F, A; shift = shift, check = check)
-
-    return F
-end
-
-"""
-    cholesky(A::SparseMatrixCSC; shift = 0.0, check = true, perm = nothing) -> CHOLMOD.Factor
-
-Compute the Cholesky factorization of a sparse positive definite matrix `A`.
-`A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/[`Hermitian`](@ref)
-view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-If `perm` is not given, a fill-reducing permutation is used.
-`F = cholesky(A)` is most frequently used to solve systems of equations with `F\\b`,
-but also the methods [`diag`](@ref), [`det`](@ref), and
-[`logdet`](@ref) are defined for `F`.
-You can also extract individual factors from `F`, using `F.L`.
-However, since pivoting is on by default, the factorization is internally
-represented as `A == P'*L*L'*P` with a permutation matrix `P`;
-using just `L` without accounting for `P` will give incorrect answers.
-To include the effects of permutation,
-it's typically preferable to extract "combined" factors like `PtL = F.PtL`
-(the equivalent of `P'*L`) and `LtP = F.UP` (the equivalent of `L'*P`).
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-Setting the optional `shift` keyword argument computes the factorization of
-`A+shift*I` instead of `A`. If the `perm` argument is provided,
-it should be a permutation of `1:size(A,1)` giving the ordering to use
-(instead of CHOLMOD's default AMD ordering).
-
-# Examples
-
-In the following example, the fill-reducing permutation used is `[3, 2, 1]`.
-If `perm` is set to `1:3` to enforce no permutation, the number of nonzero
-elements in the factor is 6.
-```jldoctest
-julia> A = [2 1 1; 1 2 0; 1 0 2]
-3×3 Matrix{Int64}:
- 2  1  1
- 1  2  0
- 1  0  2
-
-julia> C = cholesky(sparse(A))
-SuiteSparse.CHOLMOD.Factor{Float64}
-type:    LLt
-method:  simplicial
-maxnnz:  5
-nnz:     5
-success: true
-
-julia> C.p
-3-element Vector{Int64}:
- 3
- 2
- 1
-
-julia> L = sparse(C.L);
-
-julia> Matrix(L)
-3×3 Matrix{Float64}:
- 1.41421   0.0       0.0
- 0.0       1.41421   0.0
- 0.707107  0.707107  1.0
-
-julia> L * L' ≈ A[C.p, C.p]
-true
-
-julia> P = sparse(1:3, C.p, ones(3))
-3×3 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
-  ⋅    ⋅   1.0
-  ⋅   1.0   ⋅
- 1.0   ⋅    ⋅
-
-julia> P' * L * L' * P ≈ A
-true
-
-julia> C = cholesky(sparse(A), perm=1:3)
-SuiteSparse.CHOLMOD.Factor{Float64}
-type:    LLt
-method:  simplicial
-maxnnz:  6
-nnz:     6
-success: true
-
-julia> L = sparse(C.L);
-
-julia> Matrix(L)
-3×3 Matrix{Float64}:
- 1.41421    0.0       0.0
- 0.707107   1.22474   0.0
- 0.707107  -0.408248  1.1547
-
-julia> L * L' ≈ A
-true
-```
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-
-    Many other functions from CHOLMOD are wrapped but not exported from the
-    `Base.SparseArrays.CHOLMOD` module.
-"""
-cholesky(A::Union{SparseMatrixCSC{T}, SparseMatrixCSC{Complex{T}},
-    Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-    Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-    Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-    kws...) where {T<:Real} = cholesky(Sparse(A); kws...)
-
-
-function ldlt!(F::Factor{Tv}, A::Sparse{Tv};
-               shift::Real=0.0, check::Bool = true) where Tv
-    cm = defaults(common_struct[Threads.threadid()])
-    set_print_level(cm, 0)
-
-    # Makes it an LDLt
-    change_factor!(F, false, false, true, false)
-
-    # Compute the numerical factorization
-    factorize_p!(A, shift, F, cm)
-
-    check && (issuccess(F) || throw(LinearAlgebra.ZeroPivotException(1)))
-    return F
-end
-
-"""
-    ldlt!(F::CHOLMOD.Factor, A::SparseMatrixCSC; shift = 0.0, check = true) -> CHOLMOD.Factor
-
-Compute the ``LDL'`` factorization of `A`, reusing the symbolic factorization `F`.
-`A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/[`Hermitian`](@ref)
-view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-
-See also [`ldlt`](@ref).
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-"""
-ldlt!(F::Factor, A::Union{SparseMatrixCSC{T},
-    SparseMatrixCSC{Complex{T}},
-    Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-    Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-    Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-    shift = 0.0, check::Bool = true) where {T<:Real} =
-    ldlt!(F, Sparse(A), shift = shift, check = check)
-
-function ldlt(A::Sparse; shift::Real=0.0, check::Bool = true,
-    perm::Union{Nothing,AbstractVector{SuiteSparse_long}}=nothing)
-
-    cm = defaults(common_struct[Threads.threadid()])
-    set_print_level(cm, 0)
-
-    # Makes it an LDLt
-    unsafe_store!(common_final_ll[Threads.threadid()], 0)
-    # Really make sure it's an LDLt by avoiding supernodal factorization
-    unsafe_store!(common_supernodal[Threads.threadid()], 0)
-
-    # Compute the symbolic factorization
-    F = fact_(A, cm; perm = perm)
-
-    # Compute the numerical factorization
-    ldlt!(F, A; shift = shift, check = check)
-
-    return F
-end
-
-"""
-    ldlt(A::SparseMatrixCSC; shift = 0.0, check = true, perm=nothing) -> CHOLMOD.Factor
-
-Compute the ``LDL'`` factorization of a sparse matrix `A`.
-`A` must be a [`SparseMatrixCSC`](@ref) or a [`Symmetric`](@ref)/[`Hermitian`](@ref)
-view of a `SparseMatrixCSC`. Note that even if `A` doesn't
-have the type tag, it must still be symmetric or Hermitian.
-A fill-reducing permutation is used. `F = ldlt(A)` is most frequently
-used to solve systems of equations `A*x = b` with `F\\b`. The returned
-factorization object `F` also supports the methods [`diag`](@ref),
-[`det`](@ref), [`logdet`](@ref), and [`inv`](@ref).
-You can extract individual factors from `F` using `F.L`.
-However, since pivoting is on by default, the factorization is internally
-represented as `A == P'*L*D*L'*P` with a permutation matrix `P`;
-using just `L` without accounting for `P` will give incorrect answers.
-To include the effects of permutation, it is typically preferable to extract
-"combined" factors like `PtL = F.PtL` (the equivalent of
-`P'*L`) and `LtP = F.UP` (the equivalent of `L'*P`).
-The complete list of supported factors is `:L, :PtL, :D, :UP, :U, :LD, :DU, :PtLD, :DUP`.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-Setting the optional `shift` keyword argument computes the factorization of
-`A+shift*I` instead of `A`. If the `perm` argument is provided,
-it should be a permutation of `1:size(A,1)` giving the ordering to use
-(instead of CHOLMOD's default AMD ordering).
-
-!!! note
-    This method uses the CHOLMOD library from SuiteSparse, which only supports
-    doubles or complex doubles. Input matrices not of those element types will
-    be converted to `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}`
-    as appropriate.
-
-    Many other functions from CHOLMOD are wrapped but not exported from the
-    `Base.SparseArrays.CHOLMOD` module.
-"""
-ldlt(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}},
-    Symmetric{T,SparseMatrixCSC{T,SuiteSparse_long}},
-    Hermitian{Complex{T},SparseMatrixCSC{Complex{T},SuiteSparse_long}},
-    Hermitian{T,SparseMatrixCSC{T,SuiteSparse_long}}};
-    kws...) where {T<:Real} = ldlt(Sparse(A); kws...)
-
-## Rank updates
-
-"""
-    lowrankupdowndate!(F::CHOLMOD.Factor, C::Sparse, update::Cint)
-
-Update an `LDLt` or `LLt` Factorization `F` of `A` to a factorization of `A ± C*C'`.
-
-If sparsity preserving factorization is used, i.e. `L*L' == P*A*P'` then the new
-factor will be `L*L' == P*A*P' + C'*C`
-
-`update`: `Cint(1)` for `A + CC'`, `Cint(0)` for `A - CC'`
-"""
-function lowrankupdowndate!(F::Factor{Tv}, C::Sparse{Tv}, update::Cint) where Tv<:VTypes
-    lF = unsafe_load(pointer(F))
-    lC = unsafe_load(pointer(C))
-    if lF.n != lC.nrow
-        throw(DimensionMismatch("matrix dimensions do not fit"))
-    end
-    @isok ccall((@cholmod_name("updown"), :libcholmod), Cint,
-        (Cint, Ptr{C_Sparse{Tv}}, Ptr{C_Factor{Tv}}, Ptr{Cvoid}),
-        update, C, F, common_struct[Threads.threadid()])
-    F
-end
-
-#Helper functions for rank updates
-lowrank_reorder(V::AbstractArray,p) = Sparse(sparse(V[p,:]))
-lowrank_reorder(V::AbstractSparseArray,p) = Sparse(V[p,:])
-
-"""
-    lowrankupdate!(F::CHOLMOD.Factor, C::AbstractArray)
-
-Update an `LDLt` or `LLt` Factorization `F` of `A` to a factorization of `A + C*C'`.
-
-`LLt` factorizations are converted to `LDLt`.
-
-See also [`lowrankupdate`](@ref), [`lowrankdowndate`](@ref), [`lowrankdowndate!`](@ref).
-"""
-function lowrankupdate!(F::Factor{Tv}, V::AbstractArray{Tv}) where Tv<:VTypes
-    #Reorder and copy V to account for permutation
-    C = lowrank_reorder(V, get_perm(F))
-    lowrankupdowndate!(F, C, Cint(1))
-end
-
-"""
-    lowrankdowndate!(F::CHOLMOD.Factor, C::AbstractArray)
-
-Update an `LDLt` or `LLt` Factorization `F` of `A` to a factorization of `A - C*C'`.
-
-`LLt` factorizations are converted to `LDLt`.
-
-See also [`lowrankdowndate`](@ref), [`lowrankupdate`](@ref), [`lowrankupdate!`](@ref).
-"""
-function lowrankdowndate!(F::Factor{Tv}, V::AbstractArray{Tv}) where Tv<:VTypes
-    #Reorder and copy V to account for permutation
-    C = lowrank_reorder(V, get_perm(F))
-    lowrankupdowndate!(F, C, Cint(0))
-end
-
-"""
-    lowrankupdate(F::CHOLMOD.Factor, C::AbstractArray) -> FF::CHOLMOD.Factor
-
-Get an `LDLt` Factorization of `A + C*C'` given an `LDLt` or `LLt` factorization `F` of `A`.
-
-The returned factor is always an `LDLt` factorization.
-
-See also [`lowrankupdate!`](@ref), [`lowrankdowndate`](@ref), [`lowrankdowndate!`](@ref).
-"""
-lowrankupdate(F::Factor{Tv}, V::AbstractArray{Tv}) where {Tv<:VTypes} =
-    lowrankupdate!(copy(F), V)
-
-"""
-    lowrankupdate(F::CHOLMOD.Factor, C::AbstractArray) -> FF::CHOLMOD.Factor
-
-Get an `LDLt` Factorization of `A + C*C'` given an `LDLt` or `LLt` factorization `F` of `A`.
-
-The returned factor is always an `LDLt` factorization.
-
-See also [`lowrankdowndate!`](@ref), [`lowrankupdate`](@ref), [`lowrankupdate!`](@ref).
-"""
-lowrankdowndate(F::Factor{Tv}, V::AbstractArray{Tv}) where {Tv<:VTypes} =
-    lowrankdowndate!(copy(F), V)
-
-## Solvers
-
-for (T, f) in ((:Dense, :solve), (:Sparse, :spsolve))
-    @eval begin
-        # Solve Lx = b and L'x=b where A = L*L'
-        function (\)(L::FactorComponent{T,:L}, B::$T) where T
-            ($f)(CHOLMOD_L, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:U}, B::$T) where T
-            ($f)(CHOLMOD_Lt, Factor(L), B)
-        end
-        # Solve PLx = b and L'P'x=b where A = P*L*L'*P'
-        function (\)(L::FactorComponent{T,:PtL}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_L, F, ($f)(CHOLMOD_P, F, B)) # Confusingly, CHOLMOD_P solves P'x = b
-        end
-        function (\)(L::FactorComponent{T,:UP}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_Pt, F, ($f)(CHOLMOD_Lt, F, B))
-        end
-        # Solve various equations for A = L*D*L' and A = P*L*D*L'*P'
-        function (\)(L::FactorComponent{T,:D}, B::$T) where T
-            ($f)(CHOLMOD_D, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:LD}, B::$T) where T
-            ($f)(CHOLMOD_LD, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:DU}, B::$T) where T
-            ($f)(CHOLMOD_DLt, Factor(L), B)
-        end
-        function (\)(L::FactorComponent{T,:PtLD}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_LD, F, ($f)(CHOLMOD_P, F, B))
-        end
-        function (\)(L::FactorComponent{T,:DUP}, B::$T) where T
-            F = Factor(L)
-            ($f)(CHOLMOD_Pt, F, ($f)(CHOLMOD_DLt, F, B))
-        end
-    end
-end
-
-SparseVecOrMat{Tv,Ti} = Union{SparseVector{Tv,Ti}, SparseMatrixCSC{Tv,Ti}}
-
-function (\)(L::FactorComponent, b::Vector)
-    reshape(Matrix(L\Dense(b)), length(b))
-end
-function (\)(L::FactorComponent, B::Matrix)
-    Matrix(L\Dense(B))
-end
-function (\)(L::FactorComponent, B::SparseVecOrMat)
-    sparse(L\Sparse(B,0))
-end
-
-\(adjL::Adjoint{<:Any,<:FactorComponent}, B::Union{VecOrMat,SparseVecOrMat}) = (L = adjL.parent; adjoint(L)\B)
-
-(\)(L::Factor{T}, B::Dense{T}) where {T<:VTypes} = solve(CHOLMOD_A, L, B)
-# Explicit typevars are necessary to avoid ambiguities with defs in linalg/factorizations.jl
-# Likewise the two following explicit Vector and Matrix defs (rather than a single VecOrMat)
-(\)(L::Factor{T}, B::Vector{Complex{T}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-(\)(L::Factor{T}, B::Matrix{Complex{T}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-(\)(L::Factor{T}, B::Adjoint{<:Any, <:Matrix{Complex{T}}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-(\)(L::Factor{T}, B::Transpose{<:Any, <:Matrix{Complex{T}}}) where {T<:Float64} = complex.(L\real(B), L\imag(B))
-
-(\)(L::Factor{T}, b::StridedVector) where {T<:VTypes} = Vector(L\Dense{T}(b))
-(\)(L::Factor{T}, B::StridedMatrix) where {T<:VTypes} = Matrix(L\Dense{T}(B))
-(\)(L::Factor{T}, B::Adjoint{<:Any, <:StridedMatrix}) where {T<:VTypes} = Matrix(L\Dense{T}(B))
-(\)(L::Factor{T}, B::Transpose{<:Any, <:StridedMatrix}) where {T<:VTypes} = Matrix(L\Dense{T}(B))
-
-(\)(L::Factor, B::Sparse) = spsolve(CHOLMOD_A, L, B)
-# When right hand side is sparse, we have to ensure that the rhs is not marked as symmetric.
-(\)(L::Factor, B::SparseMatrixCSC) = sparse(spsolve(CHOLMOD_A, L, Sparse(B, 0)))
-(\)(L::Factor, B::SparseVector) = sparse(spsolve(CHOLMOD_A, L, Sparse(B)))
-
-\(adjL::Adjoint{<:Any,<:Factor}, B::Dense) = (L = adjL.parent; solve(CHOLMOD_A, L, B))
-\(adjL::Adjoint{<:Any,<:Factor}, B::Sparse) = (L = adjL.parent; spsolve(CHOLMOD_A, L, B))
-\(adjL::Adjoint{<:Any,<:Factor}, B::SparseVecOrMat) = (L = adjL.parent; \(adjoint(L), Sparse(B)))
-
-function \(adjL::Adjoint{<:Any,<:Factor}, b::StridedVector)
-    L = adjL.parent
-    return Vector(solve(CHOLMOD_A, L, Dense(b)))
-end
-function \(adjL::Adjoint{<:Any,<:Factor}, B::StridedMatrix)
-    L = adjL.parent
-    return Matrix(solve(CHOLMOD_A, L, Dense(B)))
-end
-
-const RealHermSymComplexHermF64SSL = Union{
-    Symmetric{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}},
-    Hermitian{Float64,SparseMatrixCSC{Float64,SuiteSparse_long}},
-    Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,SuiteSparse_long}}}
-const StridedVecOrMatInclAdjAndTrans = Union{StridedVecOrMat, Adjoint{<:Any, <:StridedVecOrMat}, Transpose{<:Any, <:StridedVecOrMat}}
-function \(A::RealHermSymComplexHermF64SSL, B::StridedVecOrMatInclAdjAndTrans)
-    F = cholesky(A; check = false)
-    if issuccess(F)
-        return \(F, B)
-    else
-        ldlt!(F, A; check = false)
-        if issuccess(F)
-            return \(F, B)
-        else
-            return \(lu(SparseMatrixCSC{eltype(A), SuiteSparse_long}(A)), B)
-        end
-    end
-end
-function \(adjA::Adjoint{<:Any,<:RealHermSymComplexHermF64SSL}, B::StridedVecOrMatInclAdjAndTrans)
-    A = adjA.parent
-    F = cholesky(A; check = false)
-    if issuccess(F)
-        return \(adjoint(F), B)
-    else
-        ldlt!(F, A; check = false)
-        if issuccess(F)
-            return \(adjoint(F), B)
-        else
-            return \(adjoint(lu(SparseMatrixCSC{eltype(A), SuiteSparse_long}(A))), B)
-        end
-    end
-end
-
-## Other convenience methods
-function diag(F::Factor{Tv}) where Tv
-    f = unsafe_load(pointer(F))
-    fsuper = f.super
-    fpi = f.pi
-    res = Base.zeros(Tv, Int(f.n))
-    xv  = f.x
-    if f.is_super!=0
-        px = f.px
-        pos = 1
-        for i in 1:f.nsuper
-            base = unsafe_load(px, i) + 1
-            res[pos] = unsafe_load(xv, base)
-            pos += 1
-            for j in 1:unsafe_load(fsuper, i + 1) - unsafe_load(fsuper, i) - 1
-                res[pos] = unsafe_load(xv, base + j*(unsafe_load(fpi, i + 1) -
-                    unsafe_load(fpi, i) + 1))
-                pos += 1
-            end
-        end
-    else
-        c0 = f.p
-        r0 = f.i
-        xv = f.x
-        for j in 1:f.n
-            jj = unsafe_load(c0, j) + 1
-            @assert(unsafe_load(r0, jj) == j - 1)
-            res[j] = unsafe_load(xv, jj)
-        end
-    end
-    res
-end
-
-function logdet(F::Factor{Tv}) where Tv<:VTypes
-    f = unsafe_load(pointer(F))
-    res = zero(Tv)
-    for d in diag(F); res += log(abs(d)) end
-    f.is_ll != 0 ? 2res : res
-end
-
-det(L::Factor) = exp(logdet(L))
-
-function issuccess(F::Factor)
-    s = unsafe_load(pointer(F))
-    return s.minor == size(F, 1)
-end
-
-function isposdef(F::Factor)
-    if issuccess(F)
-        s = unsafe_load(pointer(F))
-        if s.is_ll == 1
-            return true
-        else
-            # try conversion to LLt
-            change_factor!(F, true, s.is_super, true, s.is_monotonic)
-            b = issuccess(F)
-            # convert back
-            change_factor!(F, false, s.is_super, true, s.is_monotonic)
-            return b
-        end
-    else
-        return false
-    end
-end
-
-function ishermitian(A::Sparse{Float64})
-    s = unsafe_load(pointer(A))
-    if s.stype != 0
-        return true
-    else
-        i = symmetry(A, 1)[1]
-        if i < 0
-            throw(CHOLMODException("negative value returned from CHOLMOD's symmetry function. This
-                is either because the indices are not sorted or because of a memory error"))
-        end
-        return i == MM_SYMMETRIC || i == MM_SYMMETRIC_POSDIAG
-    end
-end
-function ishermitian(A::Sparse{ComplexF64})
-    s = unsafe_load(pointer(A))
-    if s.stype != 0
-        return true
-    else
-        i = symmetry(A, 1)[1]
-        if i < 0
-            throw(CHOLMODException("negative value returned from CHOLMOD's symmetry function. This
-                is either because the indices are not sorted or because of a memory error"))
-        end
-        return i == MM_HERMITIAN || i == MM_HERMITIAN_POSDIAG
-    end
-end
-
-(*)(A::Symmetric{Float64,SparseMatrixCSC{Float64,Ti}},
-    B::SparseVecOrMat{Float64,Ti}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,Ti}},
-    B::SparseVecOrMat{ComplexF64,Ti}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::Hermitian{Float64,SparseMatrixCSC{Float64,Ti}},
-    B::SparseVecOrMat{Float64,Ti}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-
-(*)(A::SparseVecOrMat{Float64,Ti},
-    B::Symmetric{Float64,SparseMatrixCSC{Float64,Ti}}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::SparseVecOrMat{ComplexF64,Ti},
-    B::Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,Ti}}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-(*)(A::SparseVecOrMat{Float64,Ti},
-    B::Hermitian{Float64,SparseMatrixCSC{Float64,Ti}}) where {Ti} = sparse(Sparse(A)*Sparse(B))
-
-end #module
diff --git a/stdlib/SuiteSparse/src/cholmod_h.jl b/stdlib/SuiteSparse/src/cholmod_h.jl
deleted file mode 100644
index 26bb046b45dfca..00000000000000
--- a/stdlib/SuiteSparse/src/cholmod_h.jl
+++ /dev/null
@@ -1,79 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## CHOLMOD
-const TRUE  = Int32(1)
-const FALSE = Int32(0)
-
-## itype defines the types of integer used:
-const INT     = Int32(0)  # all integer arrays are int
-const INTLONG = Int32(1)  # most are int, some are SuiteSparse_long
-const LONG    = Int32(2)  # all integer arrays are SuiteSparse_long
-
-## dtype defines what the numerical type is (double or float):
-const DOUBLE = Int32(0)        # all numerical values are double
-const SINGLE = Int32(1)        # all numerical values are float
-dtyp(::Type{Float32}) = SINGLE
-dtyp(::Type{Float64}) = DOUBLE
-dtyp(::Type{ComplexF32}) = SINGLE
-dtyp(::Type{ComplexF64}) = DOUBLE
-
-## xtype defines the kind of numerical values used:
-const PATTERN = Int32(0)       # pattern only, no numerical values
-const REAL    = Int32(1)       # a real matrix
-const COMPLEX = Int32(2)       # a complex matrix (ANSI C99 compatible)
-const ZOMPLEX = Int32(3)       # a complex matrix (MATLAB compatible)
-xtyp(::Type{Float32})    = REAL
-xtyp(::Type{Float64})    = REAL
-xtyp(::Type{ComplexF32}) = COMPLEX
-xtyp(::Type{ComplexF64}) = COMPLEX
-
-## Scaling modes, selected by the scale input parameter:
-const SCALAR = Int32(0)        # A = s*A
-const ROW    = Int32(1)        # A = diag(s)*A
-const COL    = Int32(2)        # A = A*diag(s)
-const SYM    = Int32(3)        # A = diag(s)*A*diag(s)
-
-## Types of systems to solve
-const CHOLMOD_A    = Int32(0)          # solve Ax=b
-const CHOLMOD_LDLt = Int32(1)          # solve LDL'x=b
-const CHOLMOD_LD   = Int32(2)          # solve LDx=b
-const CHOLMOD_DLt  = Int32(3)          # solve DL'x=b
-const CHOLMOD_L    = Int32(4)          # solve Lx=b
-const CHOLMOD_Lt   = Int32(5)          # solve L'x=b
-const CHOLMOD_D    = Int32(6)          # solve Dx=b
-const CHOLMOD_P    = Int32(7)          # permute x=Px
-const CHOLMOD_Pt   = Int32(8)          # permute x=P'x
-
-# Symmetry types
-const EMPTY                 =-1
-const MM_RECTANGULAR        = 1
-const MM_UNSYMMETRIC        = 2
-const MM_SYMMETRIC          = 3
-const MM_HERMITIAN          = 4
-const MM_SKEW_SYMMETRIC     = 5
-const MM_SYMMETRIC_POSDIAG  = 6
-const MM_HERMITIAN_POSDIAG  = 7
-
-# check the size of SuiteSparse_long
-if Int(ccall((:jl_cholmod_sizeof_long, :libsuitesparse_wrapper),Csize_t,())) == 4
-    const SuiteSparse_long = Int32
-    const IndexTypes = (:Int32,)
-    const ITypes = Union{Int32}
-else
-    const SuiteSparse_long = Int64
-    const IndexTypes = (:Int32, :Int64)
-    const ITypes = Union{Int32, Int64}
-end
-ityp(::Type{SuiteSparse_long}) = LONG
-
-
-const VTypes = Union{ComplexF64, Float64}
-const VRealTypes = Union{Float64}
-
-struct CHOLMODException <: Exception
-    msg::AbstractString
-end
-
-macro isok(A)
-    :($(esc(A)) == TRUE || throw(CHOLMODException("")))
-end
diff --git a/stdlib/SuiteSparse/src/deprecated.jl b/stdlib/SuiteSparse/src/deprecated.jl
deleted file mode 100644
index ee28d60dc44066..00000000000000
--- a/stdlib/SuiteSparse/src/deprecated.jl
+++ /dev/null
@@ -1 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
diff --git a/stdlib/SuiteSparse/src/spqr.jl b/stdlib/SuiteSparse/src/spqr.jl
deleted file mode 100644
index 79897b6c0de765..00000000000000
--- a/stdlib/SuiteSparse/src/spqr.jl
+++ /dev/null
@@ -1,458 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module SPQR
-
-import Base: \
-using Base: require_one_based_indexing
-using LinearAlgebra
-
-# ordering options */
-const ORDERING_FIXED   = Int32(0)
-const ORDERING_NATURAL = Int32(1)
-const ORDERING_COLAMD  = Int32(2)
-const ORDERING_GIVEN   = Int32(3) # only used for C/C++ interface
-const ORDERING_CHOLMOD = Int32(4) # CHOLMOD best-effort (COLAMD, METIS,...)
-const ORDERING_AMD     = Int32(5) # AMD(A'*A)
-const ORDERING_METIS   = Int32(6) # metis(A'*A)
-const ORDERING_DEFAULT = Int32(7) # SuiteSparseQR default ordering
-const ORDERING_BEST    = Int32(8) # try COLAMD, AMD, and METIS; pick best
-const ORDERING_BESTAMD = Int32(9) # try COLAMD and AMD; pick best#
-const ORDERINGS = [ORDERING_FIXED, ORDERING_NATURAL, ORDERING_COLAMD, ORDERING_CHOLMOD,
-                   ORDERING_AMD, ORDERING_METIS, ORDERING_DEFAULT, ORDERING_BEST,
-                   ORDERING_BESTAMD]
-
-# Let [m n] = size of the matrix after pruning singletons.  The default
-# ordering strategy is to use COLAMD if m <= 2*n.  Otherwise, AMD(A'A) is
-# tried.  If there is a high fill-in with AMD then try METIS(A'A) and take
-# the best of AMD and METIS. METIS is not tried if it isn't installed.
-
-using SparseArrays
-using SparseArrays: getcolptr
-using ..SuiteSparse.CHOLMOD
-using ..SuiteSparse.CHOLMOD: change_stype!, free!
-
-function _qr!(ordering::Integer, tol::Real, econ::Integer, getCTX::Integer,
-        A::Sparse{Tv},
-        Bsparse::Union{Sparse{Tv}                      , Ptr{Cvoid}} = C_NULL,
-        Bdense::Union{Dense{Tv}                        , Ptr{Cvoid}} = C_NULL,
-        Zsparse::Union{Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}  , Ptr{Cvoid}} = C_NULL,
-        Zdense::Union{Ref{Ptr{CHOLMOD.C_Dense{Tv}}}    , Ptr{Cvoid}} = C_NULL,
-        R::Union{Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}        , Ptr{Cvoid}} = C_NULL,
-        E::Union{Ref{Ptr{CHOLMOD.SuiteSparse_long}}    , Ptr{Cvoid}} = C_NULL,
-        H::Union{Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}        , Ptr{Cvoid}} = C_NULL,
-        HPinv::Union{Ref{Ptr{CHOLMOD.SuiteSparse_long}}, Ptr{Cvoid}} = C_NULL,
-        HTau::Union{Ref{Ptr{CHOLMOD.C_Dense{Tv}}}      , Ptr{Cvoid}} = C_NULL) where {Tv<:CHOLMOD.VTypes}
-
-    ordering ∈ ORDERINGS || error("unknown ordering $ordering")
-
-    AA   = unsafe_load(pointer(A))
-    m, n = AA.nrow, AA.ncol
-    rnk  = ccall((:SuiteSparseQR_C, :libspqr), CHOLMOD.SuiteSparse_long,
-        (Cint, Cdouble, CHOLMOD.SuiteSparse_long, Cint,
-         Ptr{CHOLMOD.C_Sparse{Tv}}, Ptr{CHOLMOD.C_Sparse{Tv}}, Ptr{CHOLMOD.C_Dense{Tv}},
-         Ptr{Ptr{CHOLMOD.C_Sparse{Tv}}}, Ptr{Ptr{CHOLMOD.C_Dense{Tv}}}, Ptr{Ptr{CHOLMOD.C_Sparse{Tv}}},
-         Ptr{Ptr{CHOLMOD.SuiteSparse_long}}, Ptr{Ptr{CHOLMOD.C_Sparse{Tv}}}, Ptr{Ptr{CHOLMOD.SuiteSparse_long}},
-         Ptr{Ptr{CHOLMOD.C_Dense{Tv}}}, Ptr{Cvoid}),
-        ordering,       # all, except 3:given treated as 0:fixed
-        tol,            # columns with 2-norm <= tol treated as 0
-        econ,           # e = max(min(m,econ),rank(A))
-        getCTX,         # 0: Z=C (e-by-k), 1: Z=C', 2: Z=X (e-by-k)
-        A,              # m-by-n sparse matrix to factorize
-        Bsparse,        # sparse m-by-k B
-        Bdense,         # dense  m-by-k B
-        # /* outputs: */
-        Zsparse,        # sparse Z
-        Zdense,         # dense Z
-        R,              # e-by-n sparse matrix */
-        E,              # size n column perm, NULL if identity */
-        H,              # m-by-nh Householder vectors
-        HPinv,          # size m row permutation
-        HTau,           # 1-by-nh Householder coefficients
-        CHOLMOD.common_struct[Threads.threadid()]) # /* workspace and parameters */
-
-    if rnk < 0
-        error("Sparse QR factorization failed")
-    end
-
-    e = E[]
-    if e == C_NULL
-        _E = Vector{CHOLMOD.SuiteSparse_long}()
-    else
-        _E = Vector{CHOLMOD.SuiteSparse_long}(undef, n)
-        for i in 1:n
-            @inbounds _E[i] = unsafe_load(e, i) + 1
-        end
-        # Free memory allocated by SPQR. This call will make sure that the
-        # correct deallocator function is called and that the memory count in
-        # the common struct is updated
-        ccall((:cholmod_l_free, :libcholmod), Cvoid,
-            (Csize_t, Cint, Ptr{CHOLMOD.SuiteSparse_long}, Ptr{Cvoid}),
-            n, sizeof(CHOLMOD.SuiteSparse_long), e, CHOLMOD.common_struct[Threads.threadid()])
-    end
-    hpinv = HPinv[]
-    if hpinv == C_NULL
-        _HPinv = Vector{CHOLMOD.SuiteSparse_long}()
-    else
-        _HPinv = Vector{CHOLMOD.SuiteSparse_long}(undef, m)
-        for i in 1:m
-            @inbounds _HPinv[i] = unsafe_load(hpinv, i) + 1
-        end
-        # Free memory allocated by SPQR. This call will make sure that the
-        # correct deallocator function is called and that the memory count in
-        # the common struct is updated
-        ccall((:cholmod_l_free, :libcholmod), Cvoid,
-            (Csize_t, Cint, Ptr{CHOLMOD.SuiteSparse_long}, Ptr{Cvoid}),
-            m, sizeof(CHOLMOD.SuiteSparse_long), hpinv, CHOLMOD.common_struct[Threads.threadid()])
-    end
-
-    return rnk, _E, _HPinv
-end
-
-# Struct for storing sparse QR from SPQR such that
-# A[invperm(rpivinv), cpiv] = (I - factors[:,1]*τ[1]*factors[:,1]')*...*(I - factors[:,k]*τ[k]*factors[:,k]')*R
-# with k = size(factors, 2).
-struct QRSparse{Tv,Ti} <: LinearAlgebra.Factorization{Tv}
-    factors::SparseMatrixCSC{Tv,Ti}
-    τ::Vector{Tv}
-    R::SparseMatrixCSC{Tv,Ti}
-    cpiv::Vector{Ti}
-    rpivinv::Vector{Ti}
-end
-
-Base.size(F::QRSparse) = (size(F.factors, 1), size(F.R, 2))
-function Base.size(F::QRSparse, i::Integer)
-    if i == 1
-        return size(F.factors, 1)
-    elseif i == 2
-        return size(F.R, 2)
-    elseif i > 2
-        return 1
-    else
-        throw(ArgumentError("second argument must be positive"))
-    end
-end
-Base.axes(F::QRSparse) = map(Base.OneTo, size(F))
-
-struct QRSparseQ{Tv<:CHOLMOD.VTypes,Ti<:Integer} <: LinearAlgebra.AbstractQ{Tv}
-    factors::SparseMatrixCSC{Tv,Ti}
-    τ::Vector{Tv}
-    n::Int # Number of columns in original matrix
-end
-
-Base.size(Q::QRSparseQ) = (size(Q.factors, 1), size(Q.factors, 1))
-Base.axes(Q::QRSparseQ) = map(Base.OneTo, size(Q))
-
-Matrix{T}(Q::QRSparseQ) where {T} = lmul!(Q, Matrix{T}(I, size(Q, 1), min(size(Q, 1), Q.n)))
-
-# From SPQR manual p. 6
-_default_tol(A::SparseMatrixCSC) =
-    20*sum(size(A))*eps(real(eltype(A)))*maximum(norm(view(A, :, i)) for i in 1:size(A, 2))
-
-"""
-    qr(A::SparseMatrixCSC; tol=_default_tol(A), ordering=ORDERING_DEFAULT) -> QRSparse
-
-Compute the `QR` factorization of a sparse matrix `A`. Fill-reducing row and column permutations
-are used such that `F.R = F.Q'*A[F.prow,F.pcol]`. The main application of this type is to
-solve least squares or underdetermined problems with [`\\`](@ref). The function calls the C library SPQR.
-
-!!! note
-    `qr(A::SparseMatrixCSC)` uses the SPQR library that is part of SuiteSparse.
-    As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, as of Julia v1.4 `qr` converts `A` into a copy that is
-    of type `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
-
-# Examples
-```jldoctest
-julia> A = sparse([1,2,3,4], [1,1,2,2], [1.0,1.0,1.0,1.0])
-4×2 SparseMatrixCSC{Float64, Int64} with 4 stored entries:
- 1.0   ⋅
- 1.0   ⋅
-  ⋅   1.0
-  ⋅   1.0
-
-julia> qr(A)
-SuiteSparse.SPQR.QRSparse{Float64, Int64}
-Q factor:
-4×4 SuiteSparse.SPQR.QRSparseQ{Float64, Int64}:
- -0.707107   0.0        0.0       -0.707107
-  0.0       -0.707107  -0.707107   0.0
-  0.0       -0.707107   0.707107   0.0
- -0.707107   0.0        0.0        0.707107
-R factor:
-2×2 SparseMatrixCSC{Float64, Int64} with 2 stored entries:
- -1.41421    ⋅
-   ⋅       -1.41421
-Row permutation:
-4-element Vector{Int64}:
- 1
- 3
- 4
- 2
-Column permutation:
-2-element Vector{Int64}:
- 1
- 2
-```
-"""
-function LinearAlgebra.qr(A::SparseMatrixCSC{Tv}; tol=_default_tol(A), ordering=ORDERING_DEFAULT) where {Tv <: CHOLMOD.VTypes}
-    R     = Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}()
-    E     = Ref{Ptr{CHOLMOD.SuiteSparse_long}}()
-    H     = Ref{Ptr{CHOLMOD.C_Sparse{Tv}}}()
-    HPinv = Ref{Ptr{CHOLMOD.SuiteSparse_long}}()
-    HTau  = Ref{Ptr{CHOLMOD.C_Dense{Tv}}}(C_NULL)
-
-    # SPQR doesn't accept symmetric matrices so we explicitly set the stype
-    r, p, hpinv = _qr!(ordering, tol, 0, 0, Sparse(A, 0),
-        C_NULL, C_NULL, C_NULL, C_NULL,
-        R, E, H, HPinv, HTau)
-
-    R_ = SparseMatrixCSC(Sparse(R[]))
-    return QRSparse(SparseMatrixCSC(Sparse(H[])),
-                    vec(Array(CHOLMOD.Dense(HTau[]))),
-                    SparseMatrixCSC(min(size(A)...),
-                                    size(R_, 2),
-                                    getcolptr(R_),
-                                    rowvals(R_),
-                                    nonzeros(R_)),
-                    p, hpinv)
-end
-LinearAlgebra.qr(A::SparseMatrixCSC{<:Union{Float16,Float32}}; tol=_default_tol(A)) =
-    qr(convert(SparseMatrixCSC{Float64}, A); tol=tol)
-LinearAlgebra.qr(A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32}}; tol=_default_tol(A)) =
-    qr(convert(SparseMatrixCSC{ComplexF64}, A); tol=tol)
-LinearAlgebra.qr(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   tol=_default_tol(A)) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported. ",
-    "Try qr(convert(SparseMatrixCSC{Float64/ComplexF64, Int}, A)) for ",
-    "sparse floating point QR using SPQR or qr(Array(A)) for generic ",
-    "dense QR.")))
-LinearAlgebra.qr(A::SparseMatrixCSC; tol=_default_tol(A)) = qr(float(A); tol=tol)
-
-function LinearAlgebra.lmul!(Q::QRSparseQ, A::StridedVecOrMat)
-    if size(A, 1) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    for l in size(Q.factors, 2):-1:1
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        for j in 1:size(A, 2)
-            a = view(A, :, j)
-            LinearAlgebra.axpy!(τl*dot(h, a), h, a)
-        end
-    end
-    return A
-end
-
-function LinearAlgebra.rmul!(A::StridedMatrix, Q::QRSparseQ)
-    if size(A, 2) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    tmp = similar(A, size(A, 1))
-    for l in 1:size(Q.factors, 2)
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        LinearAlgebra.mul!(tmp, A, h)
-        LinearAlgebra.lowrankupdate!(A, tmp, h, τl)
-    end
-    return A
-end
-
-function LinearAlgebra.lmul!(adjQ::Adjoint{<:Any,<:QRSparseQ}, A::StridedVecOrMat)
-    Q = adjQ.parent
-    if size(A, 1) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    for l in 1:size(Q.factors, 2)
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        for j in 1:size(A, 2)
-            a = view(A, :, j)
-            LinearAlgebra.axpy!(τl'*dot(h, a), h, a)
-        end
-    end
-    return A
-end
-
-function LinearAlgebra.rmul!(A::StridedMatrix, adjQ::Adjoint{<:Any,<:QRSparseQ})
-    Q = adjQ.parent
-    if size(A, 2) != size(Q, 1)
-        throw(DimensionMismatch("size(Q) = $(size(Q)) but size(A) = $(size(A))"))
-    end
-    tmp = similar(A, size(A, 1))
-    for l in size(Q.factors, 2):-1:1
-        τl = -Q.τ[l]
-        h = view(Q.factors, :, l)
-        LinearAlgebra.mul!(tmp, A, h)
-        LinearAlgebra.lowrankupdate!(A, tmp, h, τl')
-    end
-    return A
-end
-
-"""
-    getproperty(F::QRSparse, d::Symbol)
-
-Extract factors of a QRSparse factorization. Possible values of `d` are
-- `Q` : `QRSparseQ` matrix of the ``Q`` factor in Householder form
-- `R` : `UpperTriangular` ``R`` factor
-- `prow` : Vector of the row permutations applied to the factorized matrix
-- `pcol` : Vector of the column permutations applied to the factorized matrix
-
-# Examples
-```jldoctest
-julia> F = qr(sparse([1,3,2,3,4], [1,1,2,3,4], [1.0,2.0,3.0,4.0,5.0]));
-
-julia> F.Q
-4×4 SuiteSparse.SPQR.QRSparseQ{Float64, Int64}:
- 1.0  0.0  0.0  0.0
- 0.0  1.0  0.0  0.0
- 0.0  0.0  1.0  0.0
- 0.0  0.0  0.0  1.0
-
-julia> F.R
-4×4 SparseMatrixCSC{Float64, Int64} with 5 stored entries:
- 3.0   ⋅    ⋅    ⋅
-  ⋅   4.0   ⋅   2.0
-  ⋅    ⋅   5.0   ⋅
-  ⋅    ⋅    ⋅   1.0
-
-julia> F.prow
-4-element Vector{Int64}:
- 2
- 3
- 4
- 1
-
-julia> F.pcol
-4-element Vector{Int64}:
- 2
- 3
- 4
- 1
-```
-"""
-@inline function Base.getproperty(F::QRSparse, d::Symbol)
-    if d === :Q
-        return QRSparseQ(F.factors, F.τ, size(F, 2))
-    elseif d === :prow
-        return invperm(F.rpivinv)
-    elseif d === :pcol
-        return F.cpiv
-    else
-        getfield(F, d)
-    end
-end
-
-function Base.propertynames(F::QRSparse, private::Bool=false)
-    public = (:R, :Q, :prow, :pcol)
-    private ? ((public ∪ fieldnames(typeof(F)))...,) : public
-end
-
-function Base.show(io::IO, mime::MIME{Symbol("text/plain")}, F::QRSparse)
-    summary(io, F); println(io)
-    println(io, "Q factor:")
-    show(io, mime, F.Q)
-    println(io, "\nR factor:")
-    show(io, mime, F.R)
-    println(io, "\nRow permutation:")
-    show(io, mime, F.prow)
-    println(io, "\nColumn permutation:")
-    show(io, mime, F.pcol)
-end
-
-# With a real lhs and complex rhs with the same precision, we can reinterpret
-# the complex rhs as a real rhs with twice the number of columns
-#
-# This definition is similar to the definition in factorization.jl except that
-# here we have to use \ instead of ldiv! because of limitations in SPQR
-
-## Two helper methods
-_ret_size(F::QRSparse, b::AbstractVector) = (size(F, 2),)
-_ret_size(F::QRSparse, B::AbstractMatrix) = (size(F, 2), size(B, 2))
-
-LinearAlgebra.rank(F::QRSparse) = reduce(max, view(rowvals(F.R), 1:nnz(F.R)), init = eltype(rowvals(F.R))(0))
-LinearAlgebra.rank(S::SparseMatrixCSC) = rank(qr(S))
-
-function (\)(F::QRSparse{T}, B::VecOrMat{Complex{T}}) where T<:LinearAlgebra.BlasReal
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      ->       |y1|y2|y3|y4|     ->      |x2|y2|     ->    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    require_one_based_indexing(F, B)
-    c2r = reshape(copy(transpose(reinterpret(T, reshape(B, (1, length(B)))))), size(B, 1), 2*size(B, 2))
-    x = F\c2r
-
-# |z1|z3|  reinterpret  |x1|x2|x3|x4|  transpose  |x1|y1|  reshape  |x1|y1|x3|y3|
-# |z2|z4|      <-       |y1|y2|y3|y4|     <-      |x2|y2|     <-    |x2|y2|x4|y4|
-#                                                 |x3|y3|
-#                                                 |x4|y4|
-    return collect(reshape(reinterpret(Complex{T}, copy(transpose(reshape(x, (length(x) >> 1), 2)))), _ret_size(F, B)))
-end
-
-function _ldiv_basic(F::QRSparse, B::StridedVecOrMat)
-    if size(F, 1) != size(B, 1)
-        throw(DimensionMismatch("size(F) = $(size(F)) but size(B) = $(size(B))"))
-    end
-
-    # The rank of F equal might be reduced
-    rnk = rank(F)
-
-    # allocate an array for the return value large enough to hold B and X
-    # For overdetermined problem, B is larger than X and vice versa
-    X   = similar(B, ntuple(i -> i == 1 ? max(size(F, 2), size(B, 1)) : size(B, 2), Val(ndims(B))))
-
-    # Fill will zeros. These will eventually become the zeros in the basic solution
-    # fill!(X, 0)
-    # Apply left permutation to the solution and store in X
-    for j in 1:size(B, 2)
-        for i in 1:length(F.rpivinv)
-            @inbounds X[F.rpivinv[i], j] = B[i, j]
-        end
-    end
-
-    # Make a view into X corresponding to the size of B
-    X0 = view(X, 1:size(B, 1), :)
-
-    # Apply Q' to B
-    LinearAlgebra.lmul!(adjoint(F.Q), X0)
-
-    # Zero out to get basic solution
-    X[rnk + 1:end, :] .= 0
-
-    # Solve R*X = B
-    LinearAlgebra.ldiv!(UpperTriangular(F.R[Base.OneTo(rnk), Base.OneTo(rnk)]),
-                        view(X0, Base.OneTo(rnk), :))
-
-    # Apply right permutation and extract solution from X
-    # NB: cpiv == [] if SPQR was called with ORDERING_FIXED
-    if length(F.cpiv) == 0
-      return getindex(X, ntuple(i -> i == 1 ? (1:size(F,2)) : :, Val(ndims(B)))...)
-    end
-    return getindex(X, ntuple(i -> i == 1 ? invperm(F.cpiv) : :, Val(ndims(B)))...)
-end
-
-(\)(F::QRSparse{T}, B::StridedVecOrMat{T}) where {T} = _ldiv_basic(F, B)
-"""
-    (\\)(F::QRSparse, B::StridedVecOrMat)
-
-Solve the least squares problem ``\\min\\|Ax - b\\|^2`` or the linear system of equations
-``Ax=b`` when `F` is the sparse QR factorization of ``A``. A basic solution is returned
-when the problem is underdetermined.
-
-# Examples
-```jldoctest
-julia> A = sparse([1,2,4], [1,1,1], [1.0,1.0,1.0], 4, 2)
-4×2 SparseMatrixCSC{Float64, Int64} with 3 stored entries:
- 1.0   ⋅
- 1.0   ⋅
-  ⋅    ⋅
- 1.0   ⋅
-
-julia> qr(A)\\fill(1.0, 4)
-2-element Vector{Float64}:
- 1.0
- 0.0
-```
-"""
-(\)(F::QRSparse, B::StridedVecOrMat) = F\convert(AbstractArray{eltype(F)}, B)
-
-end # module
diff --git a/stdlib/SuiteSparse/src/umfpack.jl b/stdlib/SuiteSparse/src/umfpack.jl
deleted file mode 100644
index a6e0cf54d4b23a..00000000000000
--- a/stdlib/SuiteSparse/src/umfpack.jl
+++ /dev/null
@@ -1,643 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-module UMFPACK
-
-export UmfpackLU
-
-import Base: (\), getproperty, show, size
-using LinearAlgebra
-import LinearAlgebra: Factorization, det, lu, lu!, ldiv!
-
-using SparseArrays
-using SparseArrays: getcolptr
-import SparseArrays: nnz
-
-import Serialization: AbstractSerializer, deserialize
-
-import ..increment, ..increment!, ..decrement, ..decrement!
-
-include("umfpack_h.jl")
-struct MatrixIllConditionedException <: Exception
-    msg::AbstractString
-end
-
-function umferror(status::Integer)
-    if status==UMFPACK_OK
-        return
-    elseif status==UMFPACK_WARNING_singular_matrix
-        throw(LinearAlgebra.SingularException(0))
-    elseif status==UMFPACK_WARNING_determinant_underflow
-        throw(MatrixIllConditionedException("the determinant is nonzero but underflowed"))
-    elseif status==UMFPACK_WARNING_determinant_overflow
-        throw(MatrixIllConditionedException("the determinant overflowed"))
-    elseif status==UMFPACK_ERROR_out_of_memory
-        throw(OutOfMemoryError())
-    elseif status==UMFPACK_ERROR_invalid_Numeric_object
-        throw(ArgumentError("invalid UMFPack numeric object"))
-    elseif status==UMFPACK_ERROR_invalid_Symbolic_object
-        throw(ArgumentError("invalid UMFPack symbolic object"))
-    elseif status==UMFPACK_ERROR_argument_missing
-        throw(ArgumentError("a required argument to UMFPack is missing"))
-    elseif status==UMFPACK_ERROR_n_nonpositive
-        throw(ArgumentError("the number of rows or columns of the matrix must be greater than zero"))
-    elseif status==UMFPACK_ERROR_invalid_matrix
-        throw(ArgumentError("invalid matrix"))
-    elseif status==UMFPACK_ERROR_different_pattern
-        throw(ArgumentError("pattern of the matrix changed"))
-    elseif status==UMFPACK_ERROR_invalid_system
-        throw(ArgumentError("invalid sys argument provided to UMFPack solver"))
-    elseif status==UMFPACK_ERROR_invalid_permutation
-        throw(ArgumentError("invalid permutation"))
-    elseif status==UMFPACK_ERROR_file_IO
-        throw(ErrorException("error saving / loading UMFPack decomposition"))
-    elseif status==UMFPACK_ERROR_ordering_failed
-        throw(ErrorException("the ordering method failed"))
-    elseif status==UMFPACK_ERROR_internal_error
-        throw(ErrorException("an internal error has occurred, of unknown cause"))
-    else
-        throw(ErrorException("unknown UMFPack error code: $status"))
-    end
-end
-
-macro isok(A)
-    :(umferror($(esc(A))))
-end
-
-# check the size of SuiteSparse_long
-if Int(ccall((:jl_cholmod_sizeof_long,:libsuitesparse_wrapper),Csize_t,())) == 4
-    const UmfpackIndexTypes = (:Int32,)
-    const UMFITypes = Int32
-else
-    const UmfpackIndexTypes = (:Int32, :Int64)
-    const UMFITypes = Union{Int32, Int64}
-end
-
-const UMFVTypes = Union{Float64,ComplexF64}
-
-## UMFPACK
-
-# the control and info arrays
-const umf_ctrl = Vector{Float64}(undef, UMFPACK_CONTROL)
-ccall((:umfpack_dl_defaults,:libumfpack), Cvoid, (Ptr{Float64},), umf_ctrl)
-const umf_info = Vector{Float64}(undef, UMFPACK_INFO)
-
-function show_umf_ctrl(level::Real = 2.0)
-    old_prt::Float64 = umf_ctrl[1]
-    umf_ctrl[1] = Float64(level)
-    ccall((:umfpack_dl_report_control, :libumfpack), Cvoid, (Ptr{Float64},), umf_ctrl)
-    umf_ctrl[1] = old_prt
-end
-
-function show_umf_info(level::Real = 2.0)
-    old_prt::Float64 = umf_ctrl[1]
-    umf_ctrl[1] = Float64(level)
-    ccall((:umfpack_dl_report_info, :libumfpack), Cvoid,
-          (Ptr{Float64}, Ptr{Float64}), umf_ctrl, umf_info)
-    umf_ctrl[1] = old_prt
-end
-
-## Should this type be immutable?
-mutable struct UmfpackLU{Tv<:UMFVTypes,Ti<:UMFITypes} <: Factorization{Tv}
-    symbolic::Ptr{Cvoid}
-    numeric::Ptr{Cvoid}
-    m::Int
-    n::Int
-    colptr::Vector{Ti}                  # 0-based column pointers
-    rowval::Vector{Ti}                  # 0-based row indices
-    nzval::Vector{Tv}
-    status::Int
-end
-
-Base.adjoint(F::UmfpackLU) = Adjoint(F)
-Base.transpose(F::UmfpackLU) = Transpose(F)
-
-"""
-    lu(A::SparseMatrixCSC; check = true) -> F::UmfpackLU
-
-Compute the LU factorization of a sparse matrix `A`.
-
-For sparse `A` with real or complex element type, the return type of `F` is
-`UmfpackLU{Tv, Ti}`, with `Tv` = [`Float64`](@ref) or `ComplexF64` respectively and
-`Ti` is an integer type ([`Int32`](@ref) or [`Int64`](@ref)).
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-The individual components of the factorization `F` can be accessed by indexing:
-
-| Component | Description                         |
-|:----------|:------------------------------------|
-| `L`       | `L` (lower triangular) part of `LU` |
-| `U`       | `U` (upper triangular) part of `LU` |
-| `p`       | right permutation `Vector`          |
-| `q`       | left permutation `Vector`           |
-| `Rs`      | `Vector` of scaling factors         |
-| `:`       | `(L,U,p,q,Rs)` components           |
-
-The relation between `F` and `A` is
-
-`F.L*F.U == (F.Rs .* A)[F.p, F.q]`
-
-`F` further supports the following functions:
-
-- [`\\`](@ref)
-- [`cond`](@ref)
-- [`det`](@ref)
-
-!!! note
-    `lu(A::SparseMatrixCSC)` uses the UMFPACK library that is part of
-    SuiteSparse. As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, `lu` converts `A` into a copy that is of type
-    `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
-"""
-function lu(S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool = true)
-    zerobased = getcolptr(S)[1] == 0
-    res = UmfpackLU(C_NULL, C_NULL, size(S, 1), size(S, 2),
-                    zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S)),
-                    zerobased ? copy(rowvals(S)) : decrement(rowvals(S)),
-                    copy(nonzeros(S)), 0)
-    finalizer(umfpack_free_symbolic, res)
-    umfpack_numeric!(res)
-    check && (issuccess(res) || throw(LinearAlgebra.SingularException(0)))
-    return res
-end
-lu(A::SparseMatrixCSC{<:Union{Float16,Float32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu(convert(SparseMatrixCSC{Float64,Ti}, A); check = check)
-lu(A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu(convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check)
-lu(A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   check::Bool = true) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported. ",
-    "Try lu(convert(SparseMatrixCSC{Float64/ComplexF64,Int}, A)) for ",
-    "sparse floating point LU using UMFPACK or lu(Array(A)) for generic ",
-    "dense LU.")))
-lu(A::SparseMatrixCSC; check::Bool = true) = lu(float(A); check = check)
-
-"""
-    lu!(F::UmfpackLU, A::SparseMatrixCSC; check=true) -> F::UmfpackLU
-
-Compute the LU factorization of a sparse matrix `A`, reusing the symbolic
-factorization of an already existing LU factorization stored in `F`. The
-sparse matrix `A` must have an identical nonzero pattern as the matrix used
-to create the LU factorization `F`, otherwise an error is thrown.
-
-When `check = true`, an error is thrown if the decomposition fails.
-When `check = false`, responsibility for checking the decomposition's
-validity (via [`issuccess`](@ref)) lies with the user.
-
-!!! note
-    `lu!(F::UmfpackLU, A::SparseMatrixCSC)` uses the UMFPACK library that is part of
-    SuiteSparse. As this library only supports sparse matrices with [`Float64`](@ref) or
-    `ComplexF64` elements, `lu!` converts `A` into a copy that is of type
-    `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate.
-
-!!! compat "Julia 1.5"
-    `lu!` for `UmfpackLU` requires at least Julia 1.5.
-
-# Examples
-```jldoctest
-julia> A = sparse(Float64[1.0 2.0; 0.0 3.0]);
-
-julia> F = lu(A);
-
-julia> B = sparse(Float64[1.0 1.0; 0.0 1.0]);
-
-julia> lu!(F, B);
-
-julia> F \\ ones(2)
-2-element Vector{Float64}:
- 0.0
- 1.0
-```
-"""
-function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool=true)
-    zerobased = getcolptr(S)[1] == 0
-    F.m = size(S, 1)
-    F.n = size(S, 2)
-    F.colptr = zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S))
-    F.rowval = zerobased ? copy(rowvals(S)) : decrement(rowvals(S))
-    F.nzval = copy(nonzeros(S))
-
-    umfpack_numeric!(F, reuse_numeric = false)
-    check && (issuccess(F) || throw(LinearAlgebra.SingularException(0)))
-    return F
-end
-lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{Float16,Float32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu!(F, convert(SparseMatrixCSC{Float64,Ti}, A); check = check)
-lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti};
-   check::Bool = true) where {Ti<:UMFITypes} =
-    lu!(F, convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check)
-lu!(F::UmfpackLU, A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}};
-   check::Bool = true) where {T<:AbstractFloat} =
-    throw(ArgumentError(string("matrix type ", typeof(A), "not supported.")))
-lu!(F::UmfpackLU, A::SparseMatrixCSC; check::Bool = true) = lu!(F, float(A); check = check)
-
-size(F::UmfpackLU) = (F.m, F.n)
-function size(F::UmfpackLU, dim::Integer)
-    if dim < 1
-        throw(ArgumentError("size: dimension $dim out of range"))
-    elseif dim == 1
-        return Int(F.m)
-    elseif dim == 2
-        return Int(F.n)
-    else
-        return 1
-    end
-end
-
-function show(io::IO, mime::MIME{Symbol("text/plain")}, F::UmfpackLU)
-    if F.numeric != C_NULL
-        if issuccess(F)
-            summary(io, F); println(io)
-            println(io, "L factor:")
-            show(io, mime, F.L)
-            println(io, "\nU factor:")
-            show(io, mime, F.U)
-        else
-            print(io, "Failed factorization of type $(typeof(F))")
-        end
-    end
-end
-
-function deserialize(s::AbstractSerializer, t::Type{UmfpackLU{Tv,Ti}}) where {Tv,Ti}
-    symbolic = deserialize(s)
-    numeric  = deserialize(s)
-    m        = deserialize(s)
-    n        = deserialize(s)
-    colptr   = deserialize(s)
-    rowval   = deserialize(s)
-    nzval    = deserialize(s)
-    status   = deserialize(s)
-    obj      = UmfpackLU{Tv,Ti}(symbolic, numeric, m, n, colptr, rowval, nzval, status)
-
-    finalizer(umfpack_free_symbolic, obj)
-
-    return obj
-end
-
-## Wrappers for UMFPACK functions
-
-# generate the name of the C function according to the value and integer types
-umf_nm(nm,Tv,Ti) = "umfpack_" * (Tv === :Float64 ? "d" : "z") * (Ti === :Int64 ? "l_" : "i_") * nm
-
-for itype in UmfpackIndexTypes
-    sym_r = umf_nm("symbolic", :Float64, itype)
-    sym_c = umf_nm("symbolic", :ComplexF64, itype)
-    num_r = umf_nm("numeric", :Float64, itype)
-    num_c = umf_nm("numeric", :ComplexF64, itype)
-    sol_r = umf_nm("solve", :Float64, itype)
-    sol_c = umf_nm("solve", :ComplexF64, itype)
-    det_r = umf_nm("get_determinant", :Float64, itype)
-    det_z = umf_nm("get_determinant", :ComplexF64, itype)
-    lunz_r = umf_nm("get_lunz", :Float64, itype)
-    lunz_z = umf_nm("get_lunz", :ComplexF64, itype)
-    get_num_r = umf_nm("get_numeric", :Float64, itype)
-    get_num_z = umf_nm("get_numeric", :ComplexF64, itype)
-    @eval begin
-        function umfpack_symbolic!(U::UmfpackLU{Float64,$itype})
-            if U.symbolic != C_NULL return U end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            @isok ccall(($sym_r, :libumfpack), $itype,
-                        ($itype, $itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Cvoid},
-                         Ptr{Float64}, Ptr{Float64}),
-                        U.m, U.n, U.colptr, U.rowval, U.nzval, tmp,
-                        umf_ctrl, umf_info)
-            U.symbolic = tmp[1]
-            return U
-        end
-        function umfpack_symbolic!(U::UmfpackLU{ComplexF64,$itype})
-            if U.symbolic != C_NULL return U end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            @isok ccall(($sym_c, :libumfpack), $itype,
-                        ($itype, $itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Float64}, Ptr{Cvoid},
-                         Ptr{Float64}, Ptr{Float64}),
-                        U.m, U.n, U.colptr, U.rowval, real(U.nzval), imag(U.nzval), tmp,
-                        umf_ctrl, umf_info)
-            U.symbolic = tmp[1]
-            return U
-        end
-        function umfpack_numeric!(U::UmfpackLU{Float64,$itype}; reuse_numeric = true)
-            if (reuse_numeric && U.numeric != C_NULL) return U end
-            if U.symbolic == C_NULL umfpack_symbolic!(U) end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            status = ccall(($num_r, :libumfpack), $itype,
-                           (Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Cvoid}, Ptr{Cvoid},
-                            Ptr{Float64}, Ptr{Float64}),
-                           U.colptr, U.rowval, U.nzval, U.symbolic, tmp,
-                           umf_ctrl, umf_info)
-            U.status = status
-            if status != UMFPACK_WARNING_singular_matrix
-                umferror(status)
-            end
-            U.numeric != C_NULL && umfpack_free_numeric(U)
-            U.numeric = tmp[1]
-            return U
-        end
-        function umfpack_numeric!(U::UmfpackLU{ComplexF64,$itype}; reuse_numeric = true)
-            if (reuse_numeric && U.numeric != C_NULL) return U end
-            if U.symbolic == C_NULL umfpack_symbolic!(U) end
-            tmp = Vector{Ptr{Cvoid}}(undef, 1)
-            status = ccall(($num_c, :libumfpack), $itype,
-                           (Ptr{$itype}, Ptr{$itype}, Ptr{Float64}, Ptr{Float64}, Ptr{Cvoid}, Ptr{Cvoid},
-                            Ptr{Float64}, Ptr{Float64}),
-                           U.colptr, U.rowval, real(U.nzval), imag(U.nzval), U.symbolic, tmp,
-                           umf_ctrl, umf_info)
-            U.status = status
-            if status != UMFPACK_WARNING_singular_matrix
-                umferror(status)
-            end
-            U.numeric != C_NULL && umfpack_free_numeric(U)
-            U.numeric = tmp[1]
-            return U
-        end
-        function solve!(x::StridedVector{Float64}, lu::UmfpackLU{Float64,$itype}, b::StridedVector{Float64}, typ::Integer)
-            if x === b
-                throw(ArgumentError("output array must not be aliased with input array"))
-            end
-            if stride(x, 1) != 1 || stride(b, 1) != 1
-                throw(ArgumentError("in and output vectors must have unit strides"))
-            end
-            umfpack_numeric!(lu)
-            (size(b,1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
-            @isok ccall(($sol_r, :libumfpack), $itype,
-                ($itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64},
-                 Ptr{Float64}, Ptr{Float64}, Ptr{Cvoid}, Ptr{Float64},
-                 Ptr{Float64}),
-                typ, lu.colptr, lu.rowval, lu.nzval,
-                x, b, lu.numeric, umf_ctrl,
-                umf_info)
-            return x
-        end
-        function solve!(x::StridedVector{ComplexF64}, lu::UmfpackLU{ComplexF64,$itype}, b::StridedVector{ComplexF64}, typ::Integer)
-            if x === b
-                throw(ArgumentError("output array must not be aliased with input array"))
-            end
-            if stride(x, 1) != 1 || stride(b, 1) != 1
-                throw(ArgumentError("in and output vectors must have unit strides"))
-            end
-            umfpack_numeric!(lu)
-            (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch())
-            n = size(b, 1)
-            @isok ccall(($sol_c, :libumfpack), $itype,
-                        ($itype, Ptr{$itype}, Ptr{$itype}, Ptr{Float64},
-                         Ptr{Float64}, Ptr{Float64}, Ptr{Float64}, Ptr{Float64},
-                         Ptr{Float64}, Ptr{Cvoid}, Ptr{Float64}, Ptr{Float64}),
-                        typ, lu.colptr, lu.rowval, lu.nzval,
-                        C_NULL, x, C_NULL, b,
-                        C_NULL, lu.numeric, umf_ctrl, umf_info)
-            return x
-        end
-        function det(lu::UmfpackLU{Float64,$itype})
-            mx = Ref{Float64}()
-            @isok ccall(($det_r,:libumfpack), $itype,
-                           (Ptr{Float64},Ptr{Float64},Ptr{Cvoid},Ptr{Float64}),
-                           mx, C_NULL, lu.numeric, umf_info)
-            mx[]
-        end
-        function det(lu::UmfpackLU{ComplexF64,$itype})
-            mx = Ref{Float64}()
-            mz = Ref{Float64}()
-            @isok ccall(($det_z,:libumfpack), $itype,
-                        (Ptr{Float64},Ptr{Float64},Ptr{Float64},Ptr{Cvoid},Ptr{Float64}),
-                        mx, mz, C_NULL, lu.numeric, umf_info)
-            complex(mx[], mz[])
-        end
-        function umf_lunz(lu::UmfpackLU{Float64,$itype})
-            lnz = Ref{$itype}()
-            unz = Ref{$itype}()
-            n_row = Ref{$itype}()
-            n_col = Ref{$itype}()
-            nz_diag = Ref{$itype}()
-            @isok ccall(($lunz_r,:libumfpack), $itype,
-                           (Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{Cvoid}),
-                           lnz, unz, n_row, n_col, nz_diag, lu.numeric)
-            (lnz[], unz[], n_row[], n_col[], nz_diag[])
-        end
-        function umf_lunz(lu::UmfpackLU{ComplexF64,$itype})
-            lnz = Ref{$itype}()
-            unz = Ref{$itype}()
-            n_row = Ref{$itype}()
-            n_col = Ref{$itype}()
-            nz_diag = Ref{$itype}()
-            @isok ccall(($lunz_z,:libumfpack), $itype,
-                           (Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{$itype},Ptr{Cvoid}),
-                           lnz, unz, n_row, n_col, nz_diag, lu.numeric)
-            (lnz[], unz[], n_row[], n_col[], nz_diag[])
-        end
-        function umf_extract(lu::UmfpackLU{Float64,$itype})
-            umfpack_numeric!(lu)        # ensure the numeric decomposition exists
-            (lnz, unz, n_row, n_col, nz_diag) = umf_lunz(lu)
-            Lp = Vector{$itype}(undef, n_row + 1)
-            Lj = Vector{$itype}(undef, lnz) # L is returned in CSR (compressed sparse row) format
-            Lx = Vector{Float64}(undef, lnz)
-            Up = Vector{$itype}(undef, n_col + 1)
-            Ui = Vector{$itype}(undef, unz)
-            Ux = Vector{Float64}(undef, unz)
-            P  = Vector{$itype}(undef, n_row)
-            Q  = Vector{$itype}(undef, n_col)
-            Rs = Vector{Float64}(undef, n_row)
-            @isok ccall(($get_num_r,:libumfpack), $itype,
-                        (Ptr{$itype},Ptr{$itype},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Cvoid},
-                         Ref{$itype},Ptr{Float64},Ptr{Cvoid}),
-                        Lp,Lj,Lx,
-                        Up,Ui,Ux,
-                        P, Q, C_NULL,
-                        0, Rs, lu.numeric)
-            (copy(transpose(SparseMatrixCSC(min(n_row, n_col), n_row, increment!(Lp), increment!(Lj), Lx))),
-             SparseMatrixCSC(min(n_row, n_col), n_col, increment!(Up), increment!(Ui), Ux),
-             increment!(P), increment!(Q), Rs)
-        end
-        function umf_extract(lu::UmfpackLU{ComplexF64,$itype})
-            umfpack_numeric!(lu)        # ensure the numeric decomposition exists
-            (lnz, unz, n_row, n_col, nz_diag) = umf_lunz(lu)
-            Lp = Vector{$itype}(undef, n_row + 1)
-            Lj = Vector{$itype}(undef, lnz) # L is returned in CSR (compressed sparse row) format
-            Lx = Vector{Float64}(undef, lnz)
-            Lz = Vector{Float64}(undef, lnz)
-            Up = Vector{$itype}(undef, n_col + 1)
-            Ui = Vector{$itype}(undef, unz)
-            Ux = Vector{Float64}(undef, unz)
-            Uz = Vector{Float64}(undef, unz)
-            P  = Vector{$itype}(undef, n_row)
-            Q  = Vector{$itype}(undef, n_col)
-            Rs = Vector{Float64}(undef, n_row)
-            @isok ccall(($get_num_z,:libumfpack), $itype,
-                        (Ptr{$itype},Ptr{$itype},Ptr{Float64},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Float64},Ptr{Float64},
-                         Ptr{$itype},Ptr{$itype},Ptr{Cvoid}, Ptr{Cvoid},
-                         Ref{$itype},Ptr{Float64},Ptr{Cvoid}),
-                        Lp,Lj,Lx,Lz,
-                        Up,Ui,Ux,Uz,
-                        P, Q, C_NULL, C_NULL,
-                        0, Rs, lu.numeric)
-            (copy(transpose(SparseMatrixCSC(min(n_row, n_col), n_row, increment!(Lp), increment!(Lj), complex.(Lx, Lz)))),
-             SparseMatrixCSC(min(n_row, n_col), n_col, increment!(Up), increment!(Ui), complex.(Ux, Uz)),
-             increment!(P), increment!(Q), Rs)
-        end
-    end
-end
-
-function nnz(lu::UmfpackLU)
-    lnz, unz, = umf_lunz(lu)
-    return Int(lnz + unz)
-end
-
-LinearAlgebra.issuccess(lu::UmfpackLU) = lu.status == UMFPACK_OK
-
-### Solve with Factorization
-
-import LinearAlgebra.ldiv!
-
-ldiv!(lu::UmfpackLU{T}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    ldiv!(B, lu, copy(B))
-ldiv!(translu::Transpose{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = translu.parent; ldiv!(B, transpose(lu), copy(B)))
-ldiv!(adjlu::Adjoint{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = adjlu.parent; ldiv!(B, adjoint(lu), copy(B)))
-ldiv!(lu::UmfpackLU{Float64}, B::StridedVecOrMat{<:Complex}) =
-    ldiv!(B, lu, copy(B))
-ldiv!(translu::Transpose{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{<:Complex}) =
-    (lu = translu.parent; ldiv!(B, transpose(lu), copy(B)))
-ldiv!(adjlu::Adjoint{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{<:Complex}) =
-    (lu = adjlu.parent; ldiv!(B, adjoint(lu), copy(B)))
-
-ldiv!(X::StridedVecOrMat{T}, lu::UmfpackLU{T}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    _Aq_ldiv_B!(X, lu, B, UMFPACK_A)
-ldiv!(X::StridedVecOrMat{T}, translu::Transpose{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = translu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_Aat))
-ldiv!(X::StridedVecOrMat{T}, adjlu::Adjoint{T,<:UmfpackLU{T}}, B::StridedVecOrMat{T}) where {T<:UMFVTypes} =
-    (lu = adjlu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_At))
-ldiv!(X::StridedVecOrMat{Tb}, lu::UmfpackLU{Float64}, B::StridedVecOrMat{Tb}) where {Tb<:Complex} =
-    _Aq_ldiv_B!(X, lu, B, UMFPACK_A)
-ldiv!(X::StridedVecOrMat{Tb}, translu::Transpose{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{Tb}) where {Tb<:Complex} =
-    (lu = translu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_Aat))
-ldiv!(X::StridedVecOrMat{Tb}, adjlu::Adjoint{Float64,<:UmfpackLU{Float64}}, B::StridedVecOrMat{Tb}) where {Tb<:Complex} =
-    (lu = adjlu.parent; _Aq_ldiv_B!(X, lu, B, UMFPACK_At))
-
-function _Aq_ldiv_B!(X::StridedVecOrMat, lu::UmfpackLU, B::StridedVecOrMat, transposeoptype)
-    if size(X, 2) != size(B, 2)
-        throw(DimensionMismatch("input and output arrays must have same number of columns"))
-    end
-    _AqldivB_kernel!(X, lu, B, transposeoptype)
-    return X
-end
-function _AqldivB_kernel!(x::StridedVector{T}, lu::UmfpackLU{T},
-                          b::StridedVector{T}, transposeoptype) where T<:UMFVTypes
-    solve!(x, lu, b, transposeoptype)
-end
-function _AqldivB_kernel!(X::StridedMatrix{T}, lu::UmfpackLU{T},
-                          B::StridedMatrix{T}, transposeoptype) where T<:UMFVTypes
-    for col in 1:size(X, 2)
-        solve!(view(X, :, col), lu, view(B, :, col), transposeoptype)
-    end
-end
-function _AqldivB_kernel!(x::StridedVector{Tb}, lu::UmfpackLU{Float64},
-                          b::StridedVector{Tb}, transposeoptype) where Tb<:Complex
-    r, i = similar(b, Float64), similar(b, Float64)
-    solve!(r, lu, Vector{Float64}(real(b)), transposeoptype)
-    solve!(i, lu, Vector{Float64}(imag(b)), transposeoptype)
-    map!(complex, x, r, i)
-end
-function _AqldivB_kernel!(X::StridedMatrix{Tb}, lu::UmfpackLU{Float64},
-                          B::StridedMatrix{Tb}, transposeoptype) where Tb<:Complex
-    r = similar(B, Float64, size(B, 1))
-    i = similar(B, Float64, size(B, 1))
-    for j in 1:size(B, 2)
-        solve!(r, lu, Vector{Float64}(real(view(B, :, j))), transposeoptype)
-        solve!(i, lu, Vector{Float64}(imag(view(B, :, j))), transposeoptype)
-        map!(complex, view(X, :, j), r, i)
-    end
-end
-
-
-@inline function getproperty(lu::UmfpackLU, d::Symbol)
-    if d === :L || d === :U || d === :p || d === :q || d === :Rs || d === :(:)
-        # Guard the call to umf_extract behaind a branch to avoid infinite recursion
-        L, U, p, q, Rs = umf_extract(lu)
-        if d === :L
-            return L
-        elseif d === :U
-            return U
-        elseif d === :p
-            return p
-        elseif d === :q
-            return q
-        elseif d === :Rs
-            return Rs
-        elseif d === :(:)
-            return (L, U, p, q, Rs)
-        end
-    else
-        getfield(lu, d)
-    end
-end
-
-for Tv in (:Float64, :ComplexF64), Ti in UmfpackIndexTypes
-    f = Symbol(umf_nm("free_symbolic", Tv, Ti))
-    @eval begin
-        function ($f)(symb::Ptr{Cvoid})
-            tmp = [symb]
-            ccall(($(string(f)), :libumfpack), Cvoid, (Ptr{Cvoid},), tmp)
-        end
-
-        function umfpack_free_symbolic(lu::UmfpackLU{$Tv,$Ti})
-            if lu.symbolic == C_NULL return lu end
-            umfpack_free_numeric(lu)
-            ($f)(lu.symbolic)
-            lu.symbolic = C_NULL
-            return lu
-        end
-    end
-
-    f = Symbol(umf_nm("free_numeric", Tv, Ti))
-    @eval begin
-        function ($f)(num::Ptr{Cvoid})
-            tmp = [num]
-            ccall(($(string(f)), :libumfpack), Cvoid, (Ptr{Cvoid},), tmp)
-        end
-        function umfpack_free_numeric(lu::UmfpackLU{$Tv,$Ti})
-            if lu.numeric == C_NULL return lu end
-            ($f)(lu.numeric)
-            lu.numeric = C_NULL
-            return lu
-        end
-    end
-end
-
-function umfpack_report_symbolic(symb::Ptr{Cvoid}, level::Real)
-    old_prl::Float64 = umf_ctrl[UMFPACK_PRL]
-    umf_ctrl[UMFPACK_PRL] = Float64(level)
-    @isok ccall((:umfpack_dl_report_symbolic, :libumfpack), Int,
-                (Ptr{Cvoid}, Ptr{Float64}), symb, umf_ctrl)
-    umf_ctrl[UMFPACK_PRL] = old_prl
-end
-
-umfpack_report_symbolic(symb::Ptr{Cvoid}) = umfpack_report_symbolic(symb, 4.)
-
-function umfpack_report_symbolic(lu::UmfpackLU, level::Real)
-    umfpack_report_symbolic(umfpack_symbolic!(lu).symbolic, level)
-end
-
-umfpack_report_symbolic(lu::UmfpackLU) = umfpack_report_symbolic(lu.symbolic,4.)
-function umfpack_report_numeric(num::Ptr{Cvoid}, level::Real)
-    old_prl::Float64 = umf_ctrl[UMFPACK_PRL]
-    umf_ctrl[UMFPACK_PRL] = Float64(level)
-    @isok ccall((:umfpack_dl_report_numeric, :libumfpack), Int,
-                (Ptr{Cvoid}, Ptr{Float64}), num, umf_ctrl)
-    umf_ctrl[UMFPACK_PRL] = old_prl
-end
-
-umfpack_report_numeric(num::Ptr{Cvoid}) = umfpack_report_numeric(num, 4.)
-function umfpack_report_numeric(lu::UmfpackLU, level::Real)
-    umfpack_report_numeric(umfpack_numeric!(lu).numeric, level)
-end
-
-umfpack_report_numeric(lu::UmfpackLU) = umfpack_report_numeric(lu,4.)
-
-end # UMFPACK module
diff --git a/stdlib/SuiteSparse/src/umfpack_h.jl b/stdlib/SuiteSparse/src/umfpack_h.jl
deleted file mode 100644
index 985f9387fcc756..00000000000000
--- a/stdlib/SuiteSparse/src/umfpack_h.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-## UMFPACK
-
-## Type of solve
-const UMFPACK_A     =  0     # Ax=b
-const UMFPACK_At    =  1     # adjoint(A)x=b
-const UMFPACK_Aat   =  2     # transpose(A)x=b
-const UMFPACK_Pt_L  =  3     # adjoint(P)Lx=b
-const UMFPACK_L     =  4     # Lx=b
-const UMFPACK_Lt_P  =  5     # adjoint(L)Px=b
-const UMFPACK_Lat_P =  6     # transpose(L)Px=b
-const UMFPACK_Lt    =  7     # adjoint(L)x=b
-const UMFPACK_Lat   =  8     # transpose(L)x=b
-const UMFPACK_U_Qt  =  9     # U*adjoint(Q)x=b
-const UMFPACK_U     =  10    # Ux=b
-const UMFPACK_Q_Ut  =  11    # Q*adjoint(U)x=b
-const UMFPACK_Q_Uat =  12    # Q*transpose(U)x=b
-const UMFPACK_Ut    =  13    # adjoint(U)x=b
-const UMFPACK_Uat   =  14    # transpose(U)x=b
-
-## Sizes of Control and Info arrays for returning information from solver
-const UMFPACK_INFO = 90
-const UMFPACK_CONTROL = 20
-const UMFPACK_PRL = 1
-
-## Status codes
-const UMFPACK_OK = 0
-const UMFPACK_WARNING_singular_matrix       = 1
-const UMFPACK_WARNING_determinant_underflow = 2
-const UMFPACK_WARNING_determinant_overflow  = 3
-const UMFPACK_ERROR_out_of_memory           = -1
-const UMFPACK_ERROR_invalid_Numeric_object  = -3
-const UMFPACK_ERROR_invalid_Symbolic_object = -4
-const UMFPACK_ERROR_argument_missing        = -5
-const UMFPACK_ERROR_n_nonpositive           = -6
-const UMFPACK_ERROR_invalid_matrix          = -8
-const UMFPACK_ERROR_different_pattern       = -11
-const UMFPACK_ERROR_invalid_system          = -13
-const UMFPACK_ERROR_invalid_permutation     = -15
-const UMFPACK_ERROR_internal_error          = -911
-const UMFPACK_ERROR_file_IO                 = -17
-const UMFPACK_ERROR_ordering_failed         = -18
diff --git a/stdlib/SuiteSparse/test/cholmod.jl b/stdlib/SuiteSparse/test/cholmod.jl
deleted file mode 100644
index 4ca98cabf2b3e6..00000000000000
--- a/stdlib/SuiteSparse/test/cholmod.jl
+++ /dev/null
@@ -1,887 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SuiteSparse.CHOLMOD
-using DelimitedFiles
-using Test
-using Random
-using Serialization
-using LinearAlgebra: issuccess, PosDefException, ZeroPivotException
-using SparseArrays
-using SparseArrays: getcolptr
-
-# CHOLMOD tests
-Random.seed!(123)
-
-@testset "based on deps/SuiteSparse-4.0.2/CHOLMOD/Demo/" begin
-
-# chm_rdsp(joinpath(Sys.BINDIR, "../../deps/SuiteSparse-4.0.2/CHOLMOD/Demo/Matrix/bcsstk01.tri"))
-# because the file may not exist in binary distributions and when a system suitesparse library
-# is used
-
-## Result from C program
-## ---------------------------------- cholmod_demo:
-## norm (A,inf) = 3.57095e+09
-## norm (A,1)   = 3.57095e+09
-## CHOLMOD sparse:  A:  48-by-48, nz 224, upper.  OK
-## CHOLMOD dense:   B:  48-by-1,   OK
-## bnorm 1.97917
-## Analyze: flop 6009 lnz 489
-## Factorizing A
-## CHOLMOD factor:  L:  48-by-48  simplicial, LDL'. nzmax 489.  nz 489  OK
-## Ordering: AMD     fl/lnz       12.3  lnz/anz        2.2
-## ints in L: 782, doubles in L: 489
-## factor flops 6009 nnz(L)             489 (w/no amalgamation)
-## nnz(A*A'):             224
-## flops / nnz(L):      12.3
-## nnz(L) / nnz(A):      2.2
-## analyze cputime:        0.0000
-## factor  cputime:         0.0000 mflop:      0.0
-## solve   cputime:         0.0000 mflop:      0.0
-## overall cputime:         0.0000 mflop:      0.0
-## peak memory usage:            0 (MB)
-## residual  2.5e-19 (|Ax-b|/(|A||x|+|b|))
-## residual  1.3e-19 (|Ax-b|/(|A||x|+|b|)) after iterative refinement
-## rcond     9.5e-06
-
-    n = 48
-    A = CHOLMOD.Sparse(n, n,
-        CHOLMOD.SuiteSparse_long[0,1,2,3,6,9,12,15,18,20,25,30,34,36,39,43,47,52,58,
-        62,67,71,77,84,90,93,95,98,103,106,110,115,119,123,130,136,142,146,150,155,
-        161,167,174,182,189,197,207,215,224], # zero-based column pointers
-        CHOLMOD.SuiteSparse_long[0,1,2,1,2,3,0,2,4,0,1,5,0,4,6,1,3,7,2,8,1,3,7,8,9,
-        0,4,6,8,10,5,6,7,11,6,12,7,11,13,8,10,13,14,9,13,14,15,8,10,12,14,16,7,11,
-        12,13,16,17,0,12,16,18,1,5,13,15,19,2,4,14,20,3,13,15,19,20,21,2,4,12,16,18,
-        20,22,1,5,17,18,19,23,0,5,24,1,25,2,3,26,2,3,25,26,27,4,24,28,0,5,24,29,6,
-        11,24,28,30,7,25,27,31,8,9,26,32,8,9,25,27,31,32,33,10,24,28,30,32,34,6,11,
-        29,30,31,35,12,17,30,36,13,31,35,37,14,15,32,34,38,14,15,33,37,38,39,16,32,
-        34,36,38,40,12,17,31,35,36,37,41,12,16,17,18,23,36,40,42,13,14,15,19,37,39,
-        43,13,14,15,20,21,38,43,44,13,14,15,20,21,37,39,43,44,45,12,16,17,22,36,40,
-        42,46,12,16,17,18,23,41,42,46,47],
-        [2.83226851852e6,1.63544753086e6,1.72436728395e6,-2.0e6,-2.08333333333e6,
-        1.00333333333e9,1.0e6,-2.77777777778e6,1.0675e9,2.08333333333e6,
-        5.55555555555e6,1.53533333333e9,-3333.33333333,-1.0e6,2.83226851852e6,
-        -6666.66666667,2.0e6,1.63544753086e6,-1.68e6,1.72436728395e6,-2.0e6,4.0e8,
-        2.0e6,-2.08333333333e6,1.00333333333e9,1.0e6,2.0e8,-1.0e6,-2.77777777778e6,
-        1.0675e9,-2.0e6,2.08333333333e6,5.55555555555e6,1.53533333333e9,-2.8e6,
-        2.8360994695e6,-30864.1975309,-5.55555555555e6,1.76741074446e6,
-        -15432.0987654,2.77777777778e6,517922.131816,3.89003806848e6,
-        -3.33333333333e6,4.29857058902e6,-2.6349902747e6,1.97572063531e9,
-        -2.77777777778e6,3.33333333333e8,-2.14928529451e6,2.77777777778e6,
-        1.52734651547e9,5.55555555555e6,6.66666666667e8,2.35916180402e6,
-        -5.55555555555e6,-1.09779731332e8,1.56411143711e9,-2.8e6,-3333.33333333,
-        1.0e6,2.83226851852e6,-30864.1975309,-5.55555555555e6,-6666.66666667,
-        -2.0e6,1.63544753086e6,-15432.0987654,2.77777777778e6,-1.68e6,
-        1.72436728395e6,-3.33333333333e6,2.0e6,4.0e8,-2.0e6,-2.08333333333e6,
-        1.00333333333e9,-2.77777777778e6,3.33333333333e8,-1.0e6,2.0e8,1.0e6,
-        2.77777777778e6,1.0675e9,5.55555555555e6,6.66666666667e8,-2.0e6,
-        2.08333333333e6,-5.55555555555e6,1.53533333333e9,-28935.1851852,
-        -2.08333333333e6,60879.6296296,-1.59791666667e6,3.37291666667e6,
-        -28935.1851852,2.08333333333e6,2.41171296296e6,-2.08333333333e6,
-        1.0e8,-2.5e6,-416666.666667,1.5e9,-833333.333333,1.25e6,5.01833333333e8,
-        2.08333333333e6,1.0e8,416666.666667,5.025e8,-28935.1851852,
-        -2.08333333333e6,-4166.66666667,-1.25e6,3.98587962963e6,-1.59791666667e6,
-        -8333.33333333,2.5e6,3.41149691358e6,-28935.1851852,2.08333333333e6,
-        -2.355e6,2.43100308642e6,-2.08333333333e6,1.0e8,-2.5e6,5.0e8,2.5e6,
-        -416666.666667,1.50416666667e9,-833333.333333,1.25e6,2.5e8,-1.25e6,
-        -3.47222222222e6,1.33516666667e9,2.08333333333e6,1.0e8,-2.5e6,
-        416666.666667,6.94444444444e6,2.16916666667e9,-28935.1851852,
-        -2.08333333333e6,-3.925e6,3.98587962963e6,-1.59791666667e6,
-        -38580.2469136,-6.94444444444e6,3.41149691358e6,-28935.1851852,
-        2.08333333333e6,-19290.1234568,3.47222222222e6,2.43100308642e6,
-        -2.08333333333e6,1.0e8,-4.16666666667e6,2.5e6,-416666.666667,
-        1.50416666667e9,-833333.333333,-3.47222222222e6,4.16666666667e8,
-        -1.25e6,3.47222222222e6,1.33516666667e9,2.08333333333e6,1.0e8,
-        6.94444444445e6,8.33333333333e8,416666.666667,-6.94444444445e6,
-        2.16916666667e9,-3830.95098171,1.14928529451e6,-275828.470683,
-        -28935.1851852,-2.08333333333e6,-4166.66666667,1.25e6,64710.5806113,
-        -131963.213599,-517922.131816,-2.29857058902e6,-1.59791666667e6,
-        -8333.33333333,-2.5e6,3.50487988027e6,-517922.131816,-2.16567078453e6,
-        551656.941366,-28935.1851852,2.08333333333e6,-2.355e6,517922.131816,
-        4.57738374749e6,2.29857058902e6,-551656.941367,4.8619365099e8,
-        -2.08333333333e6,1.0e8,2.5e6,5.0e8,-4.79857058902e6,134990.2747,
-        2.47238730198e9,-1.14928529451e6,2.29724661236e8,-5.57173510779e7,
-        -833333.333333,-1.25e6,2.5e8,2.39928529451e6,9.61679848804e8,275828.470683,
-        -5.57173510779e7,1.09411960038e7,2.08333333333e6,1.0e8,-2.5e6,
-        140838.195984,-1.09779731332e8,5.31278103775e8], 1)
-    @test CHOLMOD.norm_sparse(A, 0) ≈ 3.570948074697437e9
-    @test CHOLMOD.norm_sparse(A, 1) ≈ 3.570948074697437e9
-    @test_throws ArgumentError CHOLMOD.norm_sparse(A, 2)
-    @test CHOLMOD.isvalid(A)
-
-    x = fill(1., n)
-    b = A*x
-
-    chma = ldlt(A)                      # LDL' form
-    @test CHOLMOD.isvalid(chma)
-    @test unsafe_load(pointer(chma)).is_ll == 0    # check that it is in fact an LDLt
-    @test chma\b ≈ x
-    @test nnz(ldlt(A, perm=1:size(A,1))) > nnz(chma)
-    @test size(chma) == size(A)
-    chmal = CHOLMOD.FactorComponent(chma, :L)
-    @test size(chmal) == size(A)
-    @test size(chmal, 1) == size(A, 1)
-
-    chma = cholesky(A)                      # LL' form
-    @test CHOLMOD.isvalid(chma)
-    @test unsafe_load(pointer(chma)).is_ll == 1    # check that it is in fact an LLt
-    @test chma\b ≈ x
-    @test nnz(chma) == 489
-    @test nnz(cholesky(A, perm=1:size(A,1))) > nnz(chma)
-    @test size(chma) == size(A)
-    chmal = CHOLMOD.FactorComponent(chma, :L)
-    @test size(chmal) == size(A)
-    @test size(chmal, 1) == size(A, 1)
-
-    @testset "eltype" begin
-        @test eltype(Dense(fill(1., 3))) == Float64
-        @test eltype(A) == Float64
-        @test eltype(chma) == Float64
-    end
-end
-
-@testset "lp_afiro example" begin
-    afiro = CHOLMOD.Sparse(27, 51,
-        CHOLMOD.SuiteSparse_long[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
-        23,25,27,29,33,37,41,45,47,49,51,53,55,57,59,63,65,67,69,71,75,79,83,87,89,
-        91,93,95,97,99,101,102],
-        CHOLMOD.SuiteSparse_long[2,3,6,7,8,9,12,13,16,17,18,19,20,21,22,23,24,25,26,
-        0,1,2,23,0,3,0,21,1,25,4,5,6,24,4,5,7,24,4,5,8,24,4,5,9,24,6,20,7,20,8,20,9,
-        20,3,4,4,22,5,26,10,11,12,21,10,13,10,23,10,20,11,25,14,15,16,22,14,15,17,
-        22,14,15,18,22,14,15,19,22,16,20,17,20,18,20,19,20,13,15,15,24,14,26,15],
-        [1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,
-        1.0,-1.0,-1.06,1.0,0.301,1.0,-1.0,1.0,-1.0,1.0,1.0,-1.0,-1.06,1.0,0.301,
-        -1.0,-1.06,1.0,0.313,-1.0,-0.96,1.0,0.313,-1.0,-0.86,1.0,0.326,-1.0,2.364,
-        -1.0,2.386,-1.0,2.408,-1.0,2.429,1.4,1.0,1.0,-1.0,1.0,1.0,-1.0,-0.43,1.0,
-        0.109,1.0,-1.0,1.0,-1.0,1.0,-1.0,1.0,1.0,-0.43,1.0,1.0,0.109,-0.43,1.0,1.0,
-        0.108,-0.39,1.0,1.0,0.108,-0.37,1.0,1.0,0.107,-1.0,2.191,-1.0,2.219,-1.0,
-        2.249,-1.0,2.279,1.4,-1.0,1.0,-1.0,1.0,1.0,1.0], 0)
-    afiro2 = CHOLMOD.aat(afiro, CHOLMOD.SuiteSparse_long[0:50;], CHOLMOD.SuiteSparse_long(1))
-    CHOLMOD.change_stype!(afiro2, -1)
-    chmaf = cholesky(afiro2)
-    y = afiro'*fill(1., size(afiro,1))
-    sol = chmaf\(afiro*y) # least squares solution
-    @test CHOLMOD.isvalid(sol)
-    pred = afiro'*sol
-    @test norm(afiro * (convert(Matrix, y) - convert(Matrix, pred))) < 1e-8
-end
-
-@testset "Issue 9160" begin
-    local A, B
-    A = sprand(10, 10, 0.1)
-    A = convert(SparseMatrixCSC{Float64,CHOLMOD.SuiteSparse_long}, A)
-    cmA = CHOLMOD.Sparse(A)
-
-    B = sprand(10, 10, 0.1)
-    B = convert(SparseMatrixCSC{Float64,CHOLMOD.SuiteSparse_long}, B)
-    cmB = CHOLMOD.Sparse(B)
-
-    # Ac_mul_B
-    @test sparse(cmA'*cmB) ≈ A'*B
-
-    # A_mul_Bc
-    @test sparse(cmA*cmB') ≈ A*B'
-
-    # A_mul_Ac
-    @test sparse(cmA*cmA') ≈ A*A'
-
-    # Ac_mul_A
-    @test sparse(cmA'*cmA) ≈ A'*A
-
-    # A_mul_Ac for symmetric A
-    A = 0.5*(A + copy(A'))
-    cmA = CHOLMOD.Sparse(A)
-    @test sparse(cmA*cmA') ≈ A*A'
-end
-
-@testset "Issue #9915" begin
-    sparseI = sparse(1.0I, 2, 2)
-    @test sparseI \ sparseI == sparseI
-end
-
-@testset "test Sparse constructor Symmetric and Hermitian input (and issymmetric and ishermitian)" begin
-    ACSC = sprandn(10, 10, 0.3) + I
-    @test issymmetric(Sparse(Symmetric(ACSC, :L)))
-    @test issymmetric(Sparse(Symmetric(ACSC, :U)))
-    @test ishermitian(Sparse(Hermitian(complex(ACSC), :L)))
-    @test ishermitian(Sparse(Hermitian(complex(ACSC), :U)))
-end
-
-@testset "test Sparse constructor for C_Sparse{Cvoid} (and read_sparse)" begin
-    mktempdir() do temp_dir
-        testfile = joinpath(temp_dir, "tmp.mtx")
-
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate real symmetric","3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1"])
-        @test sparse(CHOLMOD.Sparse(testfile)) == [1 0 0;0 1 0.5;0 0.5 1]
-        rm(testfile)
-
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate complex Hermitian",
-                        "3 3 4","1 1 1.0 0.0","2 2 1.0 0.0","3 2 0.5 0.5","3 3 1.0 0.0"])
-        @test sparse(CHOLMOD.Sparse(testfile)) == [1 0 0;0 1 0.5-0.5im;0 0.5+0.5im 1]
-        rm(testfile)
-
-        writedlm(testfile, ["%%MatrixMarket matrix coordinate real symmetric","%3 3 4","1 1 1","2 2 1","3 2 0.5","3 3 1"])
-        @test_throws ArgumentError sparse(CHOLMOD.Sparse(testfile))
-        rm(testfile)
-    end
-end
-
-@testset "test that Sparse(Ptr) constructor throws the right places" begin
-    @test_throws ArgumentError CHOLMOD.Sparse(convert(Ptr{CHOLMOD.C_Sparse{Float64}}, C_NULL))
-    @test_throws ArgumentError CHOLMOD.Sparse(convert(Ptr{CHOLMOD.C_Sparse{Cvoid}}, C_NULL))
-end
-
-## The struct pointer must be constructed by the library constructor and then modified afterwards to checks that the method throws
-@testset "illegal dtype (for now but should be supported at some point)" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, CHOLMOD.SINGLE, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 4)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal dtype" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, 5, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 4)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal xtype" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, 3, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 3)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal itype I" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint, CHOLMOD.INTLONG, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 2)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-@testset "illegal itype II" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Cvoid}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    puint = convert(Ptr{UInt32}, p)
-    unsafe_store!(puint,  5, 3*div(sizeof(Csize_t), 4) + 5*div(sizeof(Ptr{Cvoid}), 4) + 2)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.Sparse(p)
-end
-
-# Test Dense wrappers (only Float64 supported a present)
-
-@testset "High level interface" for elty in (Float64, ComplexF64)
-    local A, b
-    if elty == Float64
-        A = randn(5, 5)
-        b = randn(5)
-    else
-        A = complex.(randn(5, 5), randn(5, 5))
-        b = complex.(randn(5), randn(5))
-    end
-    ADense = CHOLMOD.Dense(A)
-    bDense = CHOLMOD.Dense(b)
-
-    @test_throws BoundsError ADense[6, 1]
-    @test_throws BoundsError ADense[1, 6]
-    @test copy(ADense) == ADense
-    @test CHOLMOD.norm_dense(ADense, 1) ≈ opnorm(A, 1)
-    @test CHOLMOD.norm_dense(ADense, 0) ≈ opnorm(A, Inf)
-    @test_throws ArgumentError CHOLMOD.norm_dense(ADense, 2)
-    @test_throws ArgumentError CHOLMOD.norm_dense(ADense, 3)
-
-    @test CHOLMOD.norm_dense(bDense, 2) ≈ norm(b)
-    @test CHOLMOD.check_dense(bDense)
-
-    AA = CHOLMOD.eye(3)
-    unsafe_store!(convert(Ptr{Csize_t}, pointer(AA)), 2, 1) # change size, but not stride, of Dense
-    @test convert(Matrix, AA) == Matrix(I, 2, 3)
-end
-
-@testset "Low level interface" begin
-    @test isa(CHOLMOD.zeros(3, 3, Float64), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.zeros(3, 3), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.zeros(3, 3, Float64), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.ones(3, 3), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.eye(3, 4, Float64), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.eye(3, 4), CHOLMOD.Dense{Float64})
-    @test isa(CHOLMOD.eye(3), CHOLMOD.Dense{Float64})
-    @test isa(copy(CHOLMOD.eye(3)), CHOLMOD.Dense{Float64})
-end
-
-# Test Sparse and Factor
-@testset "test free!" begin
-    p = ccall((:cholmod_l_allocate_sparse, :libcholmod), Ptr{CHOLMOD.C_Sparse{Float64}},
-        (Csize_t, Csize_t, Csize_t, Cint, Cint, Cint, Cint, Ptr{Cvoid}),
-        1, 1, 1, true, true, 0, CHOLMOD.REAL, CHOLMOD.common_struct[Threads.threadid()])
-    @test CHOLMOD.free!(p)
-end
-
-@testset "Core functionality" for elty in (Float64, ComplexF64)
-    A1 = sparse([1:5; 1], [1:5; 2], elty == Float64 ? randn(6) : complex.(randn(6), randn(6)))
-    A2 = sparse([1:5; 1], [1:5; 2], elty == Float64 ? randn(6) : complex.(randn(6), randn(6)))
-    A1pd = A1'A1
-    A1Sparse = CHOLMOD.Sparse(A1)
-    A2Sparse = CHOLMOD.Sparse(A2)
-    A1pdSparse = CHOLMOD.Sparse(
-        size(A1pd, 1),
-        size(A1pd, 2),
-        SuiteSparse.decrement(getcolptr(A1pd)),
-        SuiteSparse.decrement(rowvals(A1pd)),
-        nonzeros(A1pd))
-
-    ## High level interface
-    @test isa(CHOLMOD.Sparse(3, 3, [0,1,3,4], [0,2,1,2], fill(1., 4)), CHOLMOD.Sparse) # Sparse doesn't require columns to be sorted
-    @test_throws BoundsError A1Sparse[6, 1]
-    @test_throws BoundsError A1Sparse[1, 6]
-    @test sparse(A1Sparse) == A1
-    for i = 1:size(A1, 1)
-        A1[i, i] = real(A1[i, i])
-    end #Construct Hermitian matrix properly
-    @test CHOLMOD.sparse(CHOLMOD.Sparse(Hermitian(A1, :L))) == Hermitian(A1, :L)
-    @test CHOLMOD.sparse(CHOLMOD.Sparse(Hermitian(A1, :U))) == Hermitian(A1, :U)
-    @test_throws ArgumentError convert(SparseMatrixCSC{elty,Int}, A1pdSparse)
-    if elty <: Real
-        @test_throws ArgumentError convert(Symmetric{Float64,SparseMatrixCSC{Float64,Int}}, A1Sparse)
-    else
-        @test_throws ArgumentError convert(Hermitian{ComplexF64,SparseMatrixCSC{ComplexF64,Int}}, A1Sparse)
-    end
-    @test copy(A1Sparse) == A1Sparse
-    @test size(A1Sparse, 3) == 1
-    if elty <: Real # multiplication only defined for real matrices in CHOLMOD
-        @test A1Sparse*A2Sparse ≈ A1*A2
-        @test_throws DimensionMismatch CHOLMOD.Sparse(A1[:,1:4])*A2Sparse
-        @test A1Sparse'A2Sparse ≈ A1'A2
-        @test A1Sparse*A2Sparse' ≈ A1*A2'
-
-        @test A1Sparse*A1Sparse ≈ A1*A1
-        @test A1Sparse'A1Sparse ≈ A1'A1
-        @test A1Sparse*A1Sparse' ≈ A1*A1'
-
-        @test A1pdSparse*A1pdSparse ≈ A1pd*A1pd
-        @test A1pdSparse'A1pdSparse ≈ A1pd'A1pd
-        @test A1pdSparse*A1pdSparse' ≈ A1pd*A1pd'
-
-        @test_throws DimensionMismatch A1Sparse*CHOLMOD.eye(4, 5, elty)
-    end
-
-    # Factor
-    @test_throws ArgumentError cholesky(A1)
-    @test_throws ArgumentError cholesky(A1)
-    @test_throws ArgumentError cholesky(A1, shift=1.0)
-    @test_throws ArgumentError ldlt(A1)
-    @test_throws ArgumentError ldlt(A1, shift=1.0)
-    C = A1 + copy(adjoint(A1))
-    λmaxC = eigmax(Array(C))
-    b = fill(1., size(A1, 1))
-    @test_throws PosDefException cholesky(C - 2λmaxC*I)
-    @test_throws PosDefException cholesky(C, shift=-2λmaxC)
-    @test_throws ZeroPivotException ldlt(C - C[1,1]*I)
-    @test_throws ZeroPivotException ldlt(C, shift=-real(C[1,1]))
-    @test !isposdef(cholesky(C - 2λmaxC*I; check = false))
-    @test !isposdef(cholesky(C, shift=-2λmaxC; check = false))
-    @test !issuccess(ldlt(C - C[1,1]*I; check = false))
-    @test !issuccess(ldlt(C, shift=-real(C[1,1]); check = false))
-    F = cholesky(A1pd)
-    tmp = IOBuffer()
-    show(tmp, F)
-    @test tmp.size > 0
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test_throws DimensionMismatch F\CHOLMOD.Dense(fill(elty(1), 4))
-    @test_throws DimensionMismatch F\CHOLMOD.Sparse(sparse(fill(elty(1), 4)))
-    b = fill(1., 5)
-    bT = fill(elty(1), 5)
-    @test F'\bT ≈ Array(A1pd)'\b
-    @test F'\sparse(bT) ≈ Array(A1pd)'\b
-    @test transpose(F)\bT ≈ conj(A1pd)'\bT
-    @test F\CHOLMOD.Sparse(sparse(bT)) ≈ A1pd\b
-    @test logdet(F) ≈ logdet(Array(A1pd))
-    @test det(F) == exp(logdet(F))
-    let # to test supernodal, we must use a larger matrix
-        Ftmp = sprandn(100, 100, 0.1)
-        Ftmp = Ftmp'Ftmp + I
-        @test logdet(cholesky(Ftmp)) ≈ logdet(Array(Ftmp))
-    end
-    @test logdet(ldlt(A1pd)) ≈ logdet(Array(A1pd))
-    @test isposdef(A1pd)
-    @test !isposdef(A1)
-    @test !isposdef(A1 + copy(A1') |> t -> t - 2eigmax(Array(t))*I)
-
-    if elty <: Real
-        @test CHOLMOD.issymmetric(Sparse(A1pd, 0))
-        @test CHOLMOD.Sparse(cholesky(Symmetric(A1pd, :L))) == CHOLMOD.Sparse(cholesky(A1pd))
-        F1 = CHOLMOD.Sparse(cholesky(Symmetric(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(cholesky(A1pd, shift=2))
-        @test F1 == F2
-        @test CHOLMOD.Sparse(ldlt(Symmetric(A1pd, :L))) == CHOLMOD.Sparse(ldlt(A1pd))
-        F1 = CHOLMOD.Sparse(ldlt(Symmetric(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(ldlt(A1pd, shift=2))
-        @test F1 == F2
-    else
-        @test !CHOLMOD.issymmetric(Sparse(A1pd, 0))
-        @test CHOLMOD.ishermitian(Sparse(A1pd, 0))
-        @test CHOLMOD.Sparse(cholesky(Hermitian(A1pd, :L))) == CHOLMOD.Sparse(cholesky(A1pd))
-        F1 = CHOLMOD.Sparse(cholesky(Hermitian(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(cholesky(A1pd, shift=2))
-        @test F1 == F2
-        @test CHOLMOD.Sparse(ldlt(Hermitian(A1pd, :L))) == CHOLMOD.Sparse(ldlt(A1pd))
-        F1 = CHOLMOD.Sparse(ldlt(Hermitian(A1pd, :L), shift=2))
-        F2 = CHOLMOD.Sparse(ldlt(A1pd, shift=2))
-        @test F1 == F2
-    end
-
-    ### cholesky!/ldlt!
-    F = cholesky(A1pd)
-    CHOLMOD.change_factor!(F, false, false, true, true)
-    @test unsafe_load(pointer(F)).is_ll == 0
-    CHOLMOD.change_factor!(F, true, false, true, true)
-    @test CHOLMOD.Sparse(cholesky!(copy(F), A1pd)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-    @test size(F, 2) == 5
-    @test size(F, 3) == 1
-    @test_throws ArgumentError size(F, 0)
-
-    F = cholesky(A1pdSparse, shift=2)
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test CHOLMOD.Sparse(cholesky!(copy(F), A1pd, shift=2.0)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-
-    F = ldlt(A1pd)
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test CHOLMOD.Sparse(ldlt!(copy(F), A1pd)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-
-    F = ldlt(A1pdSparse, shift=2)
-    @test isa(CHOLMOD.Sparse(F), CHOLMOD.Sparse{elty})
-    @test CHOLMOD.Sparse(ldlt!(copy(F), A1pd, shift=2.0)) ≈ CHOLMOD.Sparse(F) # surprisingly, this can cause small ulp size changes so we cannot test exact equality
-
-    @test isa(CHOLMOD.factor_to_sparse!(F), CHOLMOD.Sparse)
-    @test_throws CHOLMOD.CHOLMODException CHOLMOD.factor_to_sparse!(F)
-
-    ## Low level interface
-    @test CHOLMOD.nnz(A1Sparse) == nnz(A1)
-    @test CHOLMOD.speye(5, 5, elty) == Matrix(I, 5, 5)
-    @test CHOLMOD.spzeros(5, 5, 5, elty) == zeros(elty, 5, 5)
-    if elty <: Real
-        @test CHOLMOD.copy(A1Sparse, 0, 1) == A1Sparse
-        @test CHOLMOD.horzcat(A1Sparse, A2Sparse, true) == [A1 A2]
-        @test CHOLMOD.vertcat(A1Sparse, A2Sparse, true) == [A1; A2]
-        svec = fill(elty(1), 1)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SCALAR, A1Sparse) == A1Sparse
-        svec = fill(elty(1), 5)
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SCALAR, A1Sparse)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.ROW, A1Sparse) == A1Sparse
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense([svec; 1]), CHOLMOD.ROW, A1Sparse)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.COL, A1Sparse) == A1Sparse
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense([svec; 1]), CHOLMOD.COL, A1Sparse)
-        @test CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SYM, A1Sparse) == A1Sparse
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense([svec; 1]), CHOLMOD.SYM, A1Sparse)
-        @test_throws DimensionMismatch CHOLMOD.scale!(CHOLMOD.Dense(svec), CHOLMOD.SYM, CHOLMOD.Sparse(A1[:,1:4]))
-    else
-        @test_throws MethodError CHOLMOD.copy(A1Sparse, 0, 1) == A1Sparse
-        @test_throws MethodError CHOLMOD.horzcat(A1Sparse, A2Sparse, true) == [A1 A2]
-        @test_throws MethodError CHOLMOD.vertcat(A1Sparse, A2Sparse, true) == [A1; A2]
-    end
-
-    if elty <: Real
-        @test CHOLMOD.ssmult(A1Sparse, A2Sparse, 0, true, true) ≈ A1*A2
-        @test CHOLMOD.aat(A1Sparse, [0:size(A1,2)-1;], 1) ≈ A1*A1'
-        @test CHOLMOD.aat(A1Sparse, [0:1;], 1) ≈ A1[:,1:2]*A1[:,1:2]'
-        @test CHOLMOD.copy(A1Sparse, 0, 1) == A1Sparse
-    end
-
-    @test CHOLMOD.Sparse(CHOLMOD.Dense(A1Sparse)) == A1Sparse
-end
-
-@testset "extract factors" begin
-    Af = float([4 12 -16; 12 37 -43; -16 -43 98])
-    As = sparse(Af)
-    Lf = float([2 0 0; 6 1 0; -8 5 3])
-    LDf = float([4 0 0; 3 1 0; -4 5 9])  # D is stored along the diagonal
-    L_f = float([1 0 0; 3 1 0; -4 5 1])  # L by itself in LDLt of Af
-    D_f = float([4 0 0; 0 1 0; 0 0 9])
-    p = [2,3,1]
-    p_inv = [3,1,2]
-
-    @testset "cholesky, no permutation" begin
-        Fs = cholesky(As, perm=[1:3;])
-        @test Fs.p == [1:3;]
-        @test sparse(Fs.L) ≈ Lf
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ Lf\b
-        @test Fs.U\b ≈ Lf'\b
-        @test Fs.L'\b ≈ Lf'\b
-        @test Fs.U'\b ≈ Lf\b
-        @test Fs.PtL\b ≈ Lf\b
-        @test Fs.UP\b ≈ Lf'\b
-        @test Fs.PtL'\b ≈ Lf'\b
-        @test Fs.UP'\b ≈ Lf\b
-        @test_throws CHOLMOD.CHOLMODException Fs.D
-        @test_throws CHOLMOD.CHOLMODException Fs.LD
-        @test_throws CHOLMOD.CHOLMODException Fs.DU
-        @test_throws CHOLMOD.CHOLMODException Fs.PLD
-        @test_throws CHOLMOD.CHOLMODException Fs.DUPt
-    end
-
-    @testset "cholesky, with permutation" begin
-        Fs = cholesky(As, perm=p)
-        @test Fs.p == p
-        Afp = Af[p,p]
-        Lfp = cholesky(Afp).L
-        Ls = sparse(Fs.L)
-        @test Ls ≈ Lfp
-        @test Ls * Ls' ≈ Afp
-        P = sparse(1:3, Fs.p, ones(3))
-        @test P' * Ls * Ls' * P ≈ As
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ Lfp\b
-        @test Fs.U'\b ≈ Lfp\b
-        @test Fs.U\b ≈ Lfp'\b
-        @test Fs.L'\b ≈ Lfp'\b
-        @test Fs.PtL\b ≈ Lfp\b[p]
-        @test Fs.UP\b ≈ (Lfp'\b)[p_inv]
-        @test Fs.PtL'\b ≈ (Lfp'\b)[p_inv]
-        @test Fs.UP'\b ≈ Lfp\b[p]
-        @test_throws CHOLMOD.CHOLMODException Fs.PL
-        @test_throws CHOLMOD.CHOLMODException Fs.UPt
-        @test_throws CHOLMOD.CHOLMODException Fs.D
-        @test_throws CHOLMOD.CHOLMODException Fs.LD
-        @test_throws CHOLMOD.CHOLMODException Fs.DU
-        @test_throws CHOLMOD.CHOLMODException Fs.PLD
-        @test_throws CHOLMOD.CHOLMODException Fs.DUPt
-    end
-
-    @testset "ldlt, no permutation" begin
-        Fs = ldlt(As, perm=[1:3;])
-        @test Fs.p == [1:3;]
-        @test sparse(Fs.LD) ≈ LDf
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtLD\b) ≈ Af\b
-        @test Fs.DUP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ L_f\b
-        @test Fs.U\b ≈ L_f'\b
-        @test Fs.L'\b ≈ L_f'\b
-        @test Fs.U'\b ≈ L_f\b
-        @test Fs.PtL\b ≈ L_f\b
-        @test Fs.UP\b ≈ L_f'\b
-        @test Fs.PtL'\b ≈ L_f'\b
-        @test Fs.UP'\b ≈ L_f\b
-        @test Fs.D\b ≈ D_f\b
-        @test Fs.D'\b ≈ D_f\b
-        @test Fs.LD\b ≈ D_f\(L_f\b)
-        @test Fs.DU'\b ≈ D_f\(L_f\b)
-        @test Fs.LD'\b ≈ L_f'\(D_f\b)
-        @test Fs.DU\b ≈ L_f'\(D_f\b)
-        @test Fs.PtLD\b ≈ D_f\(L_f\b)
-        @test Fs.DUP'\b ≈ D_f\(L_f\b)
-        @test Fs.PtLD'\b ≈ L_f'\(D_f\b)
-        @test Fs.DUP\b ≈ L_f'\(D_f\b)
-    end
-
-    @testset "ldlt, with permutation" begin
-        Fs = ldlt(As, perm=p)
-        @test Fs.p == p
-        @test sparse(Fs) ≈ As
-        b = rand(3)
-        Asp = As[p,p]
-        LDp = sparse(ldlt(Asp, perm=[1,2,3]).LD)
-        # LDp = sparse(Fs.LD)
-        Lp, dp = SuiteSparse.CHOLMOD.getLd!(copy(LDp))
-        Dp = sparse(Diagonal(dp))
-        @test Fs\b ≈ Af\b
-        @test Fs.UP\(Fs.PtLD\b) ≈ Af\b
-        @test Fs.DUP\(Fs.PtL\b) ≈ Af\b
-        @test Fs.L\b ≈ Lp\b
-        @test Fs.U\b ≈ Lp'\b
-        @test Fs.L'\b ≈ Lp'\b
-        @test Fs.U'\b ≈ Lp\b
-        @test Fs.PtL\b ≈ Lp\b[p]
-        @test Fs.UP\b ≈ (Lp'\b)[p_inv]
-        @test Fs.PtL'\b ≈ (Lp'\b)[p_inv]
-        @test Fs.UP'\b ≈ Lp\b[p]
-        @test Fs.LD\b ≈ Dp\(Lp\b)
-        @test Fs.DU'\b ≈ Dp\(Lp\b)
-        @test Fs.LD'\b ≈ Lp'\(Dp\b)
-        @test Fs.DU\b ≈ Lp'\(Dp\b)
-        @test Fs.PtLD\b ≈ Dp\(Lp\b[p])
-        @test Fs.DUP'\b ≈ Dp\(Lp\b[p])
-        @test Fs.PtLD'\b ≈ (Lp'\(Dp\b))[p_inv]
-        @test Fs.DUP\b ≈ (Lp'\(Dp\b))[p_inv]
-        @test_throws CHOLMOD.CHOLMODException Fs.DUPt
-        @test_throws CHOLMOD.CHOLMODException Fs.PLD
-    end
-
-    @testset "Element promotion and type inference" begin
-        @inferred cholesky(As)\fill(1, size(As, 1))
-        @inferred ldlt(As)\fill(1, size(As, 1))
-    end
-end
-
-@testset "Issue 11745 - row and column pointers were not sorted in sparse(Factor)" begin
-    A = Float64[10 1 1 1; 1 10 0 0; 1 0 10 0; 1 0 0 10]
-    @test sparse(cholesky(sparse(A))) ≈ A
-end
-GC.gc()
-
-@testset "Issue 11747 - Wrong show method defined for FactorComponent" begin
-    v = cholesky(sparse(Float64[ 10 1 1 1; 1 10 0 0; 1 0 10 0; 1 0 0 10])).L
-    for s in (sprint(show, MIME("text/plain"), v), sprint(show, v))
-        @test occursin("method:  simplicial", s)
-        @test !occursin("#undef", s)
-    end
-end
-
-@testset "Issue 14076" begin
-    @test cholesky(sparse([1,2,3,4], [1,2,3,4], Float32[1,4,16,64]))\[1,4,16,64] == fill(1, 4)
-end
-
-@testset "Issue 29367" begin
-    if Int != Int32
-        @test_throws MethodError cholesky(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float64[1,4,16,64]))
-        @test_throws MethodError cholesky(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float32[1,4,16,64]))
-        @test_throws MethodError ldlt(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float64[1,4,16,64]))
-        @test_throws MethodError ldlt(sparse(Int32[1,2,3,4], Int32[1,2,3,4], Float32[1,4,16,64]))
-    end
-end
-
-@testset "Issue 14134" begin
-    A = CHOLMOD.Sparse(sprandn(10,5,0.1) + I |> t -> t't)
-    b = IOBuffer()
-    serialize(b, A)
-    seekstart(b)
-    Anew = deserialize(b)
-    @test_throws ArgumentError show(Anew)
-    @test_throws ArgumentError size(Anew)
-    @test_throws ArgumentError Anew[1]
-    @test_throws ArgumentError Anew[2,1]
-    F = cholesky(A)
-    serialize(b, F)
-    seekstart(b)
-    Fnew = deserialize(b)
-    @test_throws ArgumentError Fnew\fill(1., 5)
-    @test_throws ArgumentError show(Fnew)
-    @test_throws ArgumentError size(Fnew)
-    @test_throws ArgumentError diag(Fnew)
-    @test_throws ArgumentError logdet(Fnew)
-end
-
-@testset "Issue #28985" begin
-    @test typeof(cholesky(sparse(I, 4, 4))'\rand(4)) == Array{Float64, 1}
-    @test typeof(cholesky(sparse(I, 4, 4))'\rand(4,1)) == Array{Float64, 2}
-end
-
-@testset "Issue with promotion during conversion to CHOLMOD.Dense" begin
-    @test CHOLMOD.Dense(fill(1, 5)) == fill(1, 5, 1)
-    @test CHOLMOD.Dense(fill(1f0, 5)) == fill(1, 5, 1)
-    @test CHOLMOD.Dense(fill(1f0 + 0im, 5, 2)) == fill(1, 5, 2)
-end
-
-@testset "Further issue with promotion #14894" begin
-    x = fill(1., 5)
-    @test cholesky(sparse(Float16(1)I, 5, 5))\x == x
-    @test cholesky(Symmetric(sparse(Float16(1)I, 5, 5)))\x == x
-    @test cholesky(Hermitian(sparse(Complex{Float16}(1)I, 5, 5)))\x == x
-    @test_throws TypeError cholesky(sparse(BigFloat(1)I, 5, 5))
-    @test_throws TypeError cholesky(Symmetric(sparse(BigFloat(1)I, 5, 5)))
-    @test_throws TypeError cholesky(Hermitian(sparse(Complex{BigFloat}(1)I, 5, 5)))
-end
-
-@testset "test \\ for Factor and StridedVecOrMat" begin
-    x = rand(5)
-    A = cholesky(sparse(Diagonal(x.\1)))
-    @test A\view(fill(1.,10),1:2:10) ≈ x
-    @test A\view(Matrix(1.0I, 5, 5), :, :) ≈ Matrix(Diagonal(x))
-end
-
-@testset "Test \\ for Factor and SparseVecOrMat" begin
-    sparseI = sparse(1.0I, 100, 100)
-    sparseb = sprandn(100, 0.5)
-    sparseB = sprandn(100, 100, 0.5)
-    chI = cholesky(sparseI)
-    @test chI \ sparseb ≈ sparseb
-    @test chI \ sparseB ≈ sparseB
-    @test chI \ sparseI ≈ sparseI
-end
-
-@testset "Real factorization and complex rhs" begin
-    A = sprandn(5, 5, 0.4) |> t -> t't + I
-    B = complex.(randn(5, 2), randn(5, 2))
-    @test cholesky(A)\B ≈ A\B
-end
-
-@testset "Make sure that ldlt performs an LDLt (Issue #19032)" begin
-    m, n = 400, 500
-    A = sprandn(m, n, .2)
-    M = [I copy(A'); A -I]
-    b = M * fill(1., m+n)
-    F = ldlt(M)
-    s = unsafe_load(pointer(F))
-    @test s.is_super == 0
-    @test F\b ≈ fill(1., m+n)
-    F2 = cholesky(M; check = false)
-    @test !issuccess(F2)
-    ldlt!(F2, M)
-    @test issuccess(F2)
-    @test F2\b ≈ fill(1., m+n)
-end
-
-@testset "Test that imaginary parts in Hermitian{T,SparseMatrixCSC{T}} are ignored" begin
-    A = sparse([1,2,3,4,1], [1,2,3,4,2], [complex(2.0,1),2,2,2,1])
-    Fs = cholesky(Hermitian(A))
-    Fd = cholesky(Hermitian(Array(A)))
-    @test sparse(Fs) ≈ Hermitian(A)
-    @test Fs\fill(1., 4) ≈ Fd\fill(1., 4)
-end
-
-@testset "\\ '\\ and transpose(...)\\" begin
-    # Test that \ and '\ and transpose(...)\ work for Symmetric and Hermitian. This is just
-    # a dispatch exercise so it doesn't matter that the complex matrix has
-    # zero imaginary parts
-    Apre = sprandn(10, 10, 0.2) - I
-    for A in (Symmetric(Apre), Hermitian(Apre),
-              Symmetric(Apre + 10I), Hermitian(Apre + 10I),
-              Hermitian(complex(Apre)), Hermitian(complex(Apre) + 10I))
-        local A, x, b
-        x = fill(1., 10)
-        b = A*x
-        @test x ≈ A\b
-        @test transpose(A)\b ≈ A'\b
-    end
-end
-
-@testset "Check that Symmetric{SparseMatrixCSC} can be constructed from CHOLMOD.Sparse" begin
-    Int === Int32 && Random.seed!(124)
-    A = sprandn(10, 10, 0.1)
-    B = CHOLMOD.Sparse(A)
-    C = B'B
-    # Change internal representation to symmetric (upper/lower)
-    o = fieldoffset(CHOLMOD.C_Sparse{eltype(C)}, findall(fieldnames(CHOLMOD.C_Sparse{eltype(C)}) .== :stype)[1])
-    for uplo in (1, -1)
-        unsafe_store!(Ptr{Int8}(pointer(C)), uplo, Int(o) + 1)
-        @test convert(Symmetric{Float64,SparseMatrixCSC{Float64,Int}}, C) ≈ Symmetric(A'A)
-    end
-end
-
-@testset "Check inputs to Sparse. Related to #20024" for A_ in (
-    SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[]),
-    SparseMatrixCSC(2, 2, [1, 2, 3], CHOLMOD.SuiteSparse_long[1,2], Float64[1.0]))
-    args = (size(A_)..., getcolptr(A_) .- 1, rowvals(A_) .- 1, nonzeros(A_))
-    @test_throws ArgumentError CHOLMOD.Sparse(args...)
-    @test_throws ArgumentError CHOLMOD.Sparse(A_)
-end
-
-@testset "sparse right multiplication of Symmetric and Hermitian matrices #21431" begin
-    S = sparse(1.0I, 2, 2)
-    @test issparse(S*S*S)
-    for T in (Symmetric, Hermitian)
-        @test issparse(S*T(S)*S)
-        @test issparse(S*(T(S)*S))
-        @test issparse((S*T(S))*S)
-    end
-end
-
-@testset "Test sparse low rank update for cholesky decomposion" begin
-    A = SparseMatrixCSC{Float64,CHOLMOD.SuiteSparse_long}(10, 5, [1,3,6,8,10,13], [6,7,1,2,9,3,5,1,7,6,7,9],
-        [-0.138843, 2.99571, -0.556814, 0.669704, -1.39252, 1.33814,
-        1.02371, -0.502384, 1.10686, 0.262229, -1.6935, 0.525239])
-    AtA = A'*A
-    C0 = [1., 2., 0, 0, 0]
-    # Test both cholesky and LDLt with and without automatic permutations
-    for F in (cholesky(AtA), cholesky(AtA, perm=1:5), ldlt(AtA), ldlt(AtA, perm=1:5))
-        local F
-        x0 = F\(b = fill(1., 5))
-        #Test both sparse/dense and vectors/matrices
-        for Ctest in (C0, sparse(C0), [C0 2*C0], sparse([C0 2*C0]))
-            local x, C, F1
-            C = copy(Ctest)
-            F1 = copy(F)
-            x = (AtA+C*C')\b
-
-            #Test update
-            F11 = CHOLMOD.lowrankupdate(F1, C)
-            @test Array(sparse(F11)) ≈ AtA+C*C'
-            @test F11\b ≈ x
-            #Make sure we get back the same factor again
-            F10 = CHOLMOD.lowrankdowndate(F11, C)
-            @test Array(sparse(F10)) ≈ AtA
-            @test F10\b ≈ x0
-
-            #Test in-place update
-            CHOLMOD.lowrankupdate!(F1, C)
-            @test Array(sparse(F1)) ≈ AtA+C*C'
-            @test F1\b ≈ x
-            #Test in-place downdate
-            CHOLMOD.lowrankdowndate!(F1, C)
-            @test Array(sparse(F1)) ≈ AtA
-            @test F1\b ≈ x0
-
-            @test C == Ctest    #Make sure C didn't change
-        end
-    end
-end
-
-@testset "Issue #22335" begin
-    local A, F
-    A = sparse(1.0I, 3, 3)
-    @test issuccess(cholesky(A))
-    A[3, 3] = -1
-    F = cholesky(A; check = false)
-    @test !issuccess(F)
-    @test issuccess(ldlt!(F, A))
-    A[3, 3] = 1
-    @test A[:, 3:-1:1]\fill(1., 3) == [1, 1, 1]
-end
-
-@testset "Non-positive definite matrices" begin
-    A = sparse(Float64[1 2; 2 1])
-    B = sparse(ComplexF64[1 2; 2 1])
-    for M in (A, B, Symmetric(A), Hermitian(B))
-        F = cholesky(M; check = false)
-        @test_throws PosDefException cholesky(M)
-        @test_throws PosDefException cholesky!(F, M)
-        @test !issuccess(cholesky(M; check = false))
-        @test !issuccess(cholesky!(F, M; check = false))
-    end
-    A = sparse(Float64[0 0; 0 0])
-    B = sparse(ComplexF64[0 0; 0 0])
-    for M in (A, B, Symmetric(A), Hermitian(B))
-        F = ldlt(M; check = false)
-        @test_throws ZeroPivotException ldlt(M)
-        @test_throws ZeroPivotException ldlt!(F, M)
-        @test !issuccess(ldlt(M; check = false))
-        @test !issuccess(ldlt!(F, M; check = false))
-    end
-end
-
-@testset "Issue #27860" begin
-    for typeA in (Float64, ComplexF64), typeB in (Float64, ComplexF64), transform in (adjoint, transpose)
-        A = sparse(typeA[2.0 0.1; 0.1 2.0])
-        B = randn(typeB, 2, 2)
-        @test A \ transform(B) ≈ cholesky(A) \ transform(B) ≈ Matrix(A) \ transform(B)
-    end
-end
-
-@testset "Issue #33365" begin
-    A = Sparse(spzeros(0, 0))
-    @test A * A' == A
-    @test A' * A == A
-    B = Sparse(spzeros(0, 4))
-    @test B * B' == Sparse(spzeros(0, 0))
-    @test B' * B == Sparse(spzeros(4, 4))
-    C = Sparse(spzeros(3, 0))
-    @test C * C' == Sparse(spzeros(3, 3))
-    @test C' * C == Sparse(spzeros(0, 0))
-end
diff --git a/stdlib/SuiteSparse/test/runtests.jl b/stdlib/SuiteSparse/test/runtests.jl
deleted file mode 100644
index cde54e9488818b..00000000000000
--- a/stdlib/SuiteSparse/test/runtests.jl
+++ /dev/null
@@ -1,30 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Random
-using SuiteSparse, LinearAlgebra, SparseArrays
-
-if Base.USE_GPL_LIBS
-    include("umfpack.jl")
-    include("cholmod.jl")
-    include("spqr.jl")
-
-    # Test multithreaded execution
-    let p, cmd = `$(Base.julia_cmd()) --depwarn=error --startup-file=no threads.jl`
-        # test both nthreads==1 and nthreads>1. spawn a process to test whichever
-        # case we are not running currently.
-        other_nthreads = Threads.nthreads() == 1 ? 4 : 1
-        p = run(
-                pipeline(
-                    setenv(
-                        cmd,
-                        "JULIA_NUM_THREADS" => other_nthreads,
-                        dir=@__DIR__()),
-                    stdout = stdout,
-                    stderr = stderr),
-                wait = false)
-        include("threads.jl")
-        if !success(p)
-            error("SuiteSparse threads test failed with nthreads == $other_nthreads")
-        end
-    end
-end
diff --git a/stdlib/SuiteSparse/test/spqr.jl b/stdlib/SuiteSparse/test/spqr.jl
deleted file mode 100644
index d008bd58201b41..00000000000000
--- a/stdlib/SuiteSparse/test/spqr.jl
+++ /dev/null
@@ -1,134 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SuiteSparse.SPQR
-using SuiteSparse.CHOLMOD
-using LinearAlgebra: rmul!, lmul!, Adjoint, Transpose
-
-@testset "Sparse QR" begin
-m, n = 100, 10
-nn = 100
-
-@test size(qr(sprandn(m, n, 0.1)).Q) == (m, m)
-
-@testset "element type of A: $eltyA" for eltyA in (Float64, ComplexF64)
-    if eltyA <: Real
-        A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], randn(nn), m, n)
-    else
-        A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], complex.(randn(nn), randn(nn)), m, n)
-    end
-
-    F = qr(A)
-    @test size(F) == (m,n)
-    @test size(F, 1) == m
-    @test size(F, 2) == n
-    @test size(F, 3) == 1
-    @test_throws ArgumentError size(F, 0)
-
-    @testset "getindex" begin
-        @test istriu(F.R)
-        @test isperm(F.pcol)
-        @test isperm(F.prow)
-        @test_throws ErrorException F.T
-    end
-
-    @testset "apply Q" begin
-        Q = F.Q
-        Imm = Matrix{Float64}(I, m, m)
-        @test Q' * (Q*Imm) ≈ Imm
-        @test (Imm*Q) * Q' ≈ Imm
-
-        # test that Q'Pl*A*Pr = R
-        R0 = Q'*Array(A[F.prow, F.pcol])
-        @test R0[1:n, :] ≈ F.R
-        @test norm(R0[n + 1:end, :], 1) < 1e-12
-
-        offsizeA = Matrix{Float64}(I, m+1, m+1)
-        @test_throws DimensionMismatch lmul!(Q, offsizeA)
-        @test_throws DimensionMismatch lmul!(adjoint(Q), offsizeA)
-        @test_throws DimensionMismatch rmul!(offsizeA, Q)
-        @test_throws DimensionMismatch rmul!(offsizeA, adjoint(Q))
-    end
-
-    @testset "element type of B: $eltyB" for eltyB in (Int, Float64, ComplexF64)
-        if eltyB == Int
-            B = rand(1:10, m, 2)
-        elseif eltyB <: Real
-            B = randn(m, 2)
-        else
-            B = complex.(randn(m, 2), randn(m, 2))
-        end
-
-        @inferred A\B
-        @test A\B[:,1] ≈ Array(A)\B[:,1]
-        @test A\B ≈ Array(A)\B
-        @test_throws DimensionMismatch A\B[1:m-1,:]
-        C, x = A[1:9, :], fill(eltyB(1), 9)
-        @test C*(C\x) ≈ x # Underdetermined system
-    end
-
-    # Make sure that conversion to Sparse doesn't use SuiteSparse's symmetric flag
-    @test qr(SparseMatrixCSC{eltyA}(I, 5, 5)) \ fill(eltyA(1), 5) == fill(1, 5)
-end
-
-@testset "basic solution of rank deficient ls" begin
-    A = sprandn(m, 5, 0.9)*sprandn(5, n, 0.9)
-    b = randn(m)
-    xs = A\b
-    xd = Array(A)\b
-
-    # check that basic solution has more zeros
-    @test count(!iszero, xs) < count(!iszero, xd)
-    @test A*xs ≈ A*xd
-end
-
-@testset "Issue 26367" begin
-    A = sparse([0.0 1 0 0; 0 0 0 0])
-    @test Matrix(qr(A).Q) == Matrix(qr(Matrix(A)).Q) == Matrix(I, 2, 2)
-end
-
-@testset "Issue 26368" begin
-    A = sparse([0.0 1 0 0; 0 0 0 0])
-    F = qr(A)
-    @test F.Q*F.R == A[F.prow,F.pcol]
-end
-
-@testset "select ordering overdetermined" begin
-     A = sparse([1:n; rand(1:m, nn - n)], [1:n; rand(1:n, nn - n)], randn(nn), m, n)
-     b = randn(m)
-     xref = Array(A) \ b
-     for ordering ∈ SuiteSparse.SPQR.ORDERINGS
-         QR = qr(A, ordering=ordering)
-         x = QR \ b
-         @test x ≈ xref
-     end
-     @test_throws ErrorException qr(A, ordering=Int32(10))
-end
-
-@testset "select ordering underdetermined" begin
-     A = sparse([1:n; rand(1:n, nn - n)], [1:n; rand(1:m, nn - n)], randn(nn), n, m)
-     b = A * ones(m)
-     for ordering ∈ SuiteSparse.SPQR.ORDERINGS
-         QR = qr(A, ordering=ordering)
-         x = QR \ b
-         # x ≂̸ Array(A) \ b; LAPACK returns a min-norm x while SPQR returns a basic x
-         @test A * x ≈ b
-     end
-     @test_throws ErrorException qr(A, ordering=Int32(10))
-end
-
-@testset "propertynames of QRSparse" begin
-    A = sparse([0.0 1 0 0; 0 0 0 0])
-    F = qr(A)
-    @test propertynames(F) == (:R, :Q, :prow, :pcol)
-    @test propertynames(F, true) == (:R, :Q, :prow, :pcol, :factors, :τ, :cpiv, :rpivinv)
-end
-
-@testset "rank" begin
-    S = sprandn(10, 5, 1.0)*sprandn(5, 10, 1.0)
-    @test rank(qr(S)) == 5
-    @test rank(S) == 5
-    @test all(iszero, (rank(qr(spzeros(10, i))) for i in 1:10))
-    @test all(iszero, (rank(spzeros(10, i)) for i in 1:10))
-end
-
-end
diff --git a/stdlib/SuiteSparse/test/threads.jl b/stdlib/SuiteSparse/test/threads.jl
deleted file mode 100644
index 76b4acd5407799..00000000000000
--- a/stdlib/SuiteSparse/test/threads.jl
+++ /dev/null
@@ -1,20 +0,0 @@
-using Test, LinearAlgebra, SparseArrays
-
-@testset "threaded SuiteSparse tests" begin
-    A = sprandn(200, 200, 0.2)
-    b = rand(200)
-
-    function test(n::Integer)
-        _A = A[1:n, 1:n]
-        _b = b[1:n]
-        x = qr(_A) \ _b
-        return norm(x)
-    end
-
-    res_threads = zeros(100)
-    Threads.@threads for i in 1:100
-        res_threads[i] = test(i + 100)
-    end
-
-    @test res_threads ≈ [test(i + 100) for i in 1:100]
-end
diff --git a/stdlib/SuiteSparse/test/umfpack.jl b/stdlib/SuiteSparse/test/umfpack.jl
deleted file mode 100644
index dc17d71a727e02..00000000000000
--- a/stdlib/SuiteSparse/test/umfpack.jl
+++ /dev/null
@@ -1,236 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using SuiteSparse: increment!
-using Serialization
-using LinearAlgebra: Adjoint, Transpose, SingularException
-
-@testset "UMFPACK wrappers" begin
-    se33 = sparse(1.0I, 3, 3)
-    do33 = fill(1., 3)
-    @test isequal(se33 \ do33, do33)
-
-    # based on deps/Suitesparse-4.0.2/UMFPACK/Demo/umfpack_di_demo.c
-
-    A0 = sparse(increment!([0,4,1,1,2,2,0,1,2,3,4,4]),
-                increment!([0,4,0,2,1,2,1,4,3,2,1,2]),
-                [2.,1.,3.,4.,-1.,-3.,3.,6.,2.,1.,4.,2.], 5, 5)
-
-    @testset "Core functionality for $Tv elements" for Tv in (Float64, ComplexF64)
-        # We might be able to support two index sizes one day
-        for Ti in Base.uniontypes(SuiteSparse.UMFPACK.UMFITypes)
-            A = convert(SparseMatrixCSC{Tv,Ti}, A0)
-            lua = lu(A)
-            @test nnz(lua) == 18
-            @test_throws ErrorException lua.Z
-            L,U,p,q,Rs = lua.:(:)
-            @test (Diagonal(Rs) * A)[p,q] ≈ L * U
-
-            det(lua) ≈ det(Array(A))
-
-            b = [8., 45., -3., 3., 19.]
-            x = lua\b
-            @test x ≈ float([1:5;])
-
-            @test A*x ≈ b
-            z = complex.(b)
-            x = LinearAlgebra.ldiv!(lua, z)
-            @test x ≈ float([1:5;])
-            @test z === x
-            y = similar(z)
-            LinearAlgebra.ldiv!(y, lua, complex.(b))
-            @test y ≈ x
-
-            @test A*x ≈ b
-
-            b = [8., 20., 13., 6., 17.]
-            x = lua'\b
-            @test x ≈ float([1:5;])
-
-            @test A'*x ≈ b
-            z = complex.(b)
-            x = LinearAlgebra.ldiv!(adjoint(lua), z)
-            @test x ≈ float([1:5;])
-            @test x === z
-            y = similar(x)
-            LinearAlgebra.ldiv!(y, adjoint(lua), complex.(b))
-            @test y ≈ x
-
-            @test A'*x ≈ b
-            x = transpose(lua) \ b
-            @test x ≈ float([1:5;])
-
-            @test transpose(A) * x ≈ b
-            x = LinearAlgebra.ldiv!(transpose(lua), complex.(b))
-            @test x ≈ float([1:5;])
-            y = similar(x)
-            LinearAlgebra.ldiv!(y, transpose(lua), complex.(b))
-            @test y ≈ x
-
-            @test transpose(A) * x ≈ b
-
-            # Element promotion and type inference
-            @inferred lua\fill(1, size(A, 2))
-        end
-    end
-
-    @testset "More tests for complex cases" begin
-        Ac0 = complex.(A0,A0)
-        for Ti in Base.uniontypes(SuiteSparse.UMFPACK.UMFITypes)
-            Ac = convert(SparseMatrixCSC{ComplexF64,Ti}, Ac0)
-            x  = fill(1.0 + im, size(Ac,1))
-            lua = lu(Ac)
-            L,U,p,q,Rs = lua.:(:)
-            @test (Diagonal(Rs) * Ac)[p,q] ≈ L * U
-            b  = Ac*x
-            @test Ac\b ≈ x
-            b  = Ac'*x
-            @test Ac'\b ≈ x
-            b  = transpose(Ac)*x
-            @test transpose(Ac)\b ≈ x
-        end
-    end
-
-    @testset "Rectangular cases. elty=$elty, m=$m, n=$n" for
-        elty in (Float64, ComplexF64),
-            (m, n) in ((10,5), (5, 10))
-
-        Random.seed!(30072018)
-        A = sparse([1:min(m,n); rand(1:m, 10)], [1:min(m,n); rand(1:n, 10)], elty == Float64 ? randn(min(m, n) + 10) : complex.(randn(min(m, n) + 10), randn(min(m, n) + 10)))
-        F = lu(A)
-        L, U, p, q, Rs = F.:(:)
-        @test (Diagonal(Rs) * A)[p,q] ≈ L * U
-    end
-
-    @testset "Issue #4523 - complex sparse \\" begin
-        A, b = sparse((1.0 + im)I, 2, 2), fill(1., 2)
-        @test A * (lu(A)\b) ≈ b
-
-        @test det(sparse([1,3,3,1], [1,1,3,3], [1,1,1,1])) == 0
-    end
-
-    @testset "UMFPACK_ERROR_n_nonpositive" begin
-        @test_throws ArgumentError lu(sparse(Int[], Int[], Float64[], 5, 0))
-    end
-
-    @testset "Issue #15099" for (Tin, Tout) in (
-            (ComplexF16, ComplexF64),
-            (ComplexF32, ComplexF64),
-            (ComplexF64, ComplexF64),
-            (Float16, Float64),
-            (Float32, Float64),
-            (Float64, Float64),
-            (Int, Float64),
-        )
-
-        F = lu(sparse(fill(Tin(1), 1, 1)))
-        L = sparse(fill(Tout(1), 1, 1))
-        @test F.p == F.q == [1]
-        @test F.Rs == [1.0]
-        @test F.L == F.U == L
-        @test F.:(:) == (L, L, [1], [1], [1.0])
-    end
-
-    @testset "BigFloat not supported" for T in (BigFloat, Complex{BigFloat})
-        @test_throws ArgumentError lu(sparse(fill(T(1), 1, 1)))
-    end
-
-    @testset "size(::UmfpackLU)" begin
-        m = n = 1
-        F = lu(sparse(fill(1., m, n)))
-        @test size(F) == (m, n)
-        @test size(F, 1) == m
-        @test size(F, 2) == n
-        @test size(F, 3) == 1
-        @test_throws ArgumentError size(F,-1)
-    end
-
-    @testset "Test aliasing" begin
-        a = rand(5)
-        @test_throws ArgumentError SuiteSparse.UMFPACK.solve!(a, lu(sparse(1.0I, 5, 5)), a, SuiteSparse.UMFPACK.UMFPACK_A)
-        aa = complex(a)
-        @test_throws ArgumentError SuiteSparse.UMFPACK.solve!(aa, lu(sparse((1.0im)I, 5, 5)), aa, SuiteSparse.UMFPACK.UMFPACK_A)
-    end
-
-    @testset "Issues #18246,18244 - lu sparse pivot" begin
-        A = sparse(1.0I, 4, 4)
-        A[1:2,1:2] = [-.01 -200; 200 .001]
-        F = lu(A)
-        @test F.p == [3 ; 4 ; 2 ; 1]
-    end
-
-    @testset "Test that A[c|t]_ldiv_B!{T<:Complex}(X::StridedMatrix{T}, lu::UmfpackLU{Float64}, B::StridedMatrix{T}) works as expected." begin
-        N = 10
-        p = 0.5
-        A = N*I + sprand(N, N, p)
-        X = zeros(ComplexF64, N, N)
-        B = complex.(rand(N, N), rand(N, N))
-        luA, lufA = lu(A), lu(Array(A))
-        @test LinearAlgebra.ldiv!(copy(X), luA, B) ≈ LinearAlgebra.ldiv!(copy(X), lufA, B)
-        @test LinearAlgebra.ldiv!(copy(X), adjoint(luA), B) ≈ LinearAlgebra.ldiv!(copy(X), adjoint(lufA), B)
-        @test LinearAlgebra.ldiv!(copy(X), transpose(luA), B) ≈ LinearAlgebra.ldiv!(copy(X), transpose(lufA), B)
-    end
-
-    @testset "singular matrix" begin
-        for A in sparse.((Float64[1 2; 0 0], ComplexF64[1 2; 0 0]))
-            @test_throws SingularException lu(A)
-            @test !issuccess(lu(A; check = false))
-        end
-    end
-
-    @testset "deserialization" begin
-        A  = 10*I + sprandn(10, 10, 0.4)
-        F1 = lu(A)
-        b  = IOBuffer()
-        serialize(b, F1)
-        seekstart(b)
-        F2 = deserialize(b)
-        for nm in (:colptr, :m, :n, :nzval, :rowval, :status)
-            @test getfield(F1, nm) == getfield(F2, nm)
-        end
-    end
-
-    @testset "Reuse symbolic LU factorization" begin
-        A1 = sparse(increment!([0,4,1,1,2,2,0,1,2,3,4,4]),
-                    increment!([0,4,0,2,1,2,1,4,3,2,1,2]),
-                    [2.,1.,3.,4.,-1.,-3.,3.,9.,2.,1.,4.,2.], 5, 5)
-        for Tv in (Float64, ComplexF64, Float16, Float32, ComplexF16, ComplexF32)
-            for Ti in Base.uniontypes(SuiteSparse.UMFPACK.UMFITypes)
-                A = convert(SparseMatrixCSC{Tv,Ti}, A0)
-                B = convert(SparseMatrixCSC{Tv,Ti}, A1)
-                b = Tv[8., 45., -3., 3., 19.]
-                F = lu(A)
-                lu!(F, B)
-                @test F\b ≈ B\b ≈ Matrix(B)\b
-
-                # singular matrix
-                C = copy(B)
-                C[4, 3] = Tv(0)
-                F = lu(A)
-                @test_throws SingularException lu!(F, C)
-
-                # change of nonzero pattern
-                D = copy(B)
-                D[5, 1] = Tv(1.0)
-                F = lu(A)
-                @test_throws ArgumentError lu!(F, D)
-            end
-        end
-    end
-
-end
-
-@testset "REPL printing of UmfpackLU" begin
-    # regular matrix
-    A = sparse([1, 2], [1, 2], Float64[1.0, 1.0])
-    F = lu(A)
-    facstring = sprint((t, s) -> show(t, "text/plain", s), F)
-    lstring = sprint((t, s) -> show(t, "text/plain", s), F.L)
-    ustring = sprint((t, s) -> show(t, "text/plain", s), F.U)
-    @test facstring == "$(summary(F))\nL factor:\n$lstring\nU factor:\n$ustring"
-
-    # singular matrix
-    B = sparse(zeros(Float64, 2, 2))
-    F = lu(B; check=false)
-    facstring = sprint((t, s) -> show(t, "text/plain", s), F)
-    @test facstring == "Failed factorization of type $(summary(F))"
-end
diff --git a/stdlib/SuiteSparse_jll/Project.toml b/stdlib/SuiteSparse_jll/Project.toml
new file mode 100644
index 00000000000000..f36ce756c834c5
--- /dev/null
+++ b/stdlib/SuiteSparse_jll/Project.toml
@@ -0,0 +1,18 @@
+name = "SuiteSparse_jll"
+uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
+version = "5.10.1+0"
+
+[deps]
+libblastrampoline_jll = "8e850b90-86db-534c-a0d3-1478176c7d93"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.7"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
new file mode 100644
index 00000000000000..2940970ceff9fe
--- /dev/null
+++ b/stdlib/SuiteSparse_jll/src/SuiteSparse_jll.jl
@@ -0,0 +1,132 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/SuiteSparse_jll.jl
+baremodule SuiteSparse_jll
+using Base, Libdl, libblastrampoline_jll
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libamd, libbtf, libcamd, libccolamd, libcholmod, libcolamd, libklu, libldl, librbio, libspqr, libsuitesparseconfig, libumfpack
+
+# These get calculated in __init__()
+# Man I can't wait until these are automatically handled by an in-Base JLLWrappers clone.
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libamd_handle = C_NULL
+libamd_path = ""
+libbtf_handle = C_NULL
+libbtf_path = ""
+libcamd_handle = C_NULL
+libcamd_path = ""
+libccolamd_handle = C_NULL
+libccolamd_path = ""
+libcholmod_handle = C_NULL
+libcholmod_path = ""
+libcolamd_handle = C_NULL
+libcolamd_path = ""
+libklu_handle = C_NULL
+libklu_path = ""
+libldl_handle = C_NULL
+libldl_path = ""
+librbio_handle = C_NULL
+librbio_path = ""
+libspqr_handle = C_NULL
+libspqr_path = ""
+libsuitesparseconfig_handle = C_NULL
+libsuitesparseconfig_path = ""
+libumfpack_handle = C_NULL
+libumfpack_path = ""
+
+if Sys.iswindows()
+    const libamd = "libamd.dll"
+    const libbtf = "libbtf.dll"
+    const libcamd = "libcamd.dll"
+    const libccolamd = "libccolamd.dll"
+    const libcholmod = "libcholmod.dll"
+    const libcolamd = "libcolamd.dll"
+    const libklu = "libklu.dll"
+    const libldl = "libldl.dll"
+    const librbio = "librbio.dll"
+    const libspqr = "libspqr.dll"
+    const libsuitesparseconfig = "libsuitesparseconfig.dll"
+    const libumfpack = "libumfpack.dll"
+elseif Sys.isapple()
+    const libamd = "@rpath/libamd.2.dylib"
+    const libbtf = "@rpath/libbtf.1.dylib"
+    const libcamd = "@rpath/libcamd.2.dylib"
+    const libccolamd = "@rpath/libccolamd.2.dylib"
+    const libcholmod = "@rpath/libcholmod.3.dylib"
+    const libcolamd = "@rpath/libcolamd.2.dylib"
+    const libklu = "@rpath/libklu.1.dylib"
+    const libldl = "@rpath/libldl.2.dylib"
+    const librbio = "@rpath/librbio.2.dylib"
+    const libspqr = "@rpath/libspqr.2.dylib"
+    const libsuitesparseconfig = "@rpath/libsuitesparseconfig.5.dylib"
+    const libumfpack = "@rpath/libumfpack.5.dylib"
+else
+    const libamd = "libamd.so.2"
+    const libbtf = "libbtf.so.1"
+    const libcamd = "libcamd.so.2"
+    const libccolamd = "libccolamd.so.2"
+    const libcholmod = "libcholmod.so.3"
+    const libcolamd = "libcolamd.so.2"
+    const libklu = "libklu.so.1"
+    const libldl = "libldl.so.2"
+    const librbio = "librbio.so.2"
+    const libspqr = "libspqr.so.2"
+    const libsuitesparseconfig = "libsuitesparseconfig.so.5"
+    const libumfpack = "libumfpack.so.5"
+end
+
+function __init__()
+    global libamd_handle = dlopen(libamd)
+    global libamd_path = dlpath(libamd_handle)
+    global libbtf_handle = dlopen(libbtf)
+    global libbtf_path = dlpath(libbtf_handle)
+    global libcamd_handle = dlopen(libcamd)
+    global libcamd_path = dlpath(libcamd_handle)
+    global libccolamd_handle = dlopen(libccolamd)
+    global libccolamd_path = dlpath(libccolamd_handle)
+    global libcholmod_handle = dlopen(libcholmod)
+    global libcholmod_path = dlpath(libcholmod_handle)
+    global libcolamd_handle = dlopen(libcolamd)
+    global libcolamd_path = dlpath(libcolamd_handle)
+    global libklu_handle = dlopen(libklu)
+    global libklu_path = dlpath(libklu_handle)
+    global libldl_handle = dlopen(libldl)
+    global libldl_path = dlpath(libldl_handle)
+    global librbio_handle = dlopen(librbio)
+    global librbio_path = dlpath(librbio_handle)
+    global libspqr_handle = dlopen(libspqr)
+    global libspqr_path = dlpath(libspqr_handle)
+    global libsuitesparseconfig_handle = dlopen(libsuitesparseconfig)
+    global libsuitesparseconfig_path = dlpath(libsuitesparseconfig_handle)
+    global libumfpack_handle = dlopen(libumfpack)
+    global libumfpack_path = dlpath(libumfpack_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libamd_path() = libamd_path
+get_libbtf_path() = libbtf_path
+get_libcamd_path() = libcamd_path
+get_libccolamd_path() = libccolamd_path
+get_libcholmod_path() = libcholmod_path
+get_libcolamd_path() = libcolamd_path
+get_libklu_path() = libklu_path
+get_libldl_path() = libldl_path
+get_librbio_path() = librbio_path
+get_libspqr_path() = libspqr_path
+get_libsuitesparseconfig_path() = libsuitesparseconfig_path
+get_libumfpack_path() = libumfpack_path
+
+end  # module SuiteSparse_jll
diff --git a/stdlib/SuiteSparse_jll/test/runtests.jl b/stdlib/SuiteSparse_jll/test/runtests.jl
new file mode 100644
index 00000000000000..ca356951f99e22
--- /dev/null
+++ b/stdlib/SuiteSparse_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, SuiteSparse_jll
+
+@testset "SuiteSparse_jll" begin
+    @test ccall((:SuiteSparse_version, libsuitesparseconfig), Cint, (Ptr{Cint},), C_NULL) == 5010
+end
diff --git a/stdlib/TOML/Project.toml b/stdlib/TOML/Project.toml
index d63450a94ede2c..48bf828a370c9a 100644
--- a/stdlib/TOML/Project.toml
+++ b/stdlib/TOML/Project.toml
@@ -1,6 +1,6 @@
 name = "TOML"
 uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
-version = "0.1.0"
+version = "1.0.0"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
diff --git a/stdlib/TOML/benchmark/benchmarks.jl b/stdlib/TOML/benchmark/benchmarks.jl
index 91f21d8eb80070..5710aee198b52e 100644
--- a/stdlib/TOML/benchmark/benchmarks.jl
+++ b/stdlib/TOML/benchmark/benchmarks.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using BenchmarkTools
 import TOML
 import Random
diff --git a/stdlib/TOML/benchmark/tune.json b/stdlib/TOML/benchmark/tune.json
index d8d7ca2ebf8897..f1b12c393587f3 100644
--- a/stdlib/TOML/benchmark/tune.json
+++ b/stdlib/TOML/benchmark/tune.json
@@ -1 +1 @@
-[{"Julia":"1.5.0","BenchmarkTools":"0.4.3"},[["BenchmarkGroup",{"data":{"strings":["BenchmarkGroup",{"data":{"long":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"short":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"numbers":["BenchmarkGroup",{"data":{"integers":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"floats":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"registry":["BenchmarkGroup",{"data":{"Registry.toml":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"Compat.toml":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"arrays":["BenchmarkGroup",{"data":{"heterogeneous":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"homogeneous":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"array of tables":["BenchmarkGroup",{"data":{"empty":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"parse empty":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":340,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}]]]
\ No newline at end of file
+[{"Julia":"1.5.0","BenchmarkTools":"0.4.3"},[["BenchmarkGroup",{"data":{"strings":["BenchmarkGroup",{"data":{"long":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"short":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"numbers":["BenchmarkGroup",{"data":{"integers":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"floats":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"registry":["BenchmarkGroup",{"data":{"Registry.toml":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"Compat.toml":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"arrays":["BenchmarkGroup",{"data":{"heterogeneous":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}],"homogeneous":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"array of tables":["BenchmarkGroup",{"data":{"empty":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":1,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}],"parse empty":["BenchmarkTools.Parameters",{"gctrial":true,"time_tolerance":0.05,"samples":10000,"evals":340,"gcsample":false,"seconds":5.0,"overhead":0.0,"memory_tolerance":0.01}]},"tags":[]}]]]
diff --git a/stdlib/TOML/docs/make.jl b/stdlib/TOML/docs/make.jl
index 81d4cec69a5688..3abe55fb74c52a 100644
--- a/stdlib/TOML/docs/make.jl
+++ b/stdlib/TOML/docs/make.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Documenter
 import TOML
 
diff --git a/stdlib/TOML/docs/src/index.md b/stdlib/TOML/docs/src/index.md
index e34bb8d85f2f1a..36e8ec62481083 100644
--- a/stdlib/TOML/docs/src/index.md
+++ b/stdlib/TOML/docs/src/index.md
@@ -65,8 +65,6 @@ format.
 ```jldoctest
 julia> using TOML
 
-julia> fname = tempname();
-
 julia> data = Dict(
           "names" => ["Julia", "Julio"],
           "age" => [10, 20],
@@ -75,6 +73,17 @@ julia> data = Dict(
 julia> TOML.print(data)
 names = ["Julia", "Julio"]
 age = [10, 20]
+
+julia> fname = tempname();
+
+julia> open(fname, "w") do io
+           TOML.print(io, data)
+       end
+
+julia> TOML.parsefile(fname)
+Dict{String, Any} with 2 entries:
+  "names" => ["Julia", "Julio"]
+  "age"   => [10, 20]
 ```
 
 Keys can be sorted according to some value
diff --git a/stdlib/TOML/src/TOML.jl b/stdlib/TOML/src/TOML.jl
index 08d7080707d6fa..4765a05c05f527 100644
--- a/stdlib/TOML/src/TOML.jl
+++ b/stdlib/TOML/src/TOML.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 module TOML
 
 module Internals
@@ -15,6 +17,9 @@ module Internals
     end
 end
 
+# https://github.com/JuliaLang/julia/issues/36605
+readstring(f::AbstractString) = isfile(f) ? read(f, String) : error(repr(f), ": No such file")
+
 """
     Parser()
 
@@ -33,12 +38,12 @@ const Parser = Internals.Parser
 Parse file `f` and return the resulting table (dictionary). Throw a
 [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.tryparsefile`](@ref)
+See also [`TOML.tryparsefile`](@ref).
 """
 parsefile(f::AbstractString) =
-    Internals.parse(Parser(read(f, String); filepath=abspath(f)))
+    Internals.parse(Parser(readstring(f); filepath=abspath(f)))
 parsefile(p::Parser, f::AbstractString) =
-    Internals.parse(Internals.reinit!(p, read(f, String); filepath=abspath(f)))
+    Internals.parse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
 
 """
     tryparsefile(f::AbstractString)
@@ -47,12 +52,12 @@ parsefile(p::Parser, f::AbstractString) =
 Parse file `f` and return the resulting table (dictionary). Return a
 [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.parsefile`](@ref)
+See also [`TOML.parsefile`](@ref).
 """
 tryparsefile(f::AbstractString) =
-    Internals.tryparse(Parser(read(f, String); filepath=abspath(f)))
+    Internals.tryparse(Parser(readstring(f); filepath=abspath(f)))
 tryparsefile(p::Parser, f::AbstractString) =
-    Internals.tryparse(Internals.reinit!(p, read(f, String); filepath=abspath(f)))
+    Internals.tryparse(Internals.reinit!(p, readstring(f); filepath=abspath(f)))
 
 """
     parse(x::Union{AbstractString, IO})
@@ -61,7 +66,7 @@ tryparsefile(p::Parser, f::AbstractString) =
 Parse the string  or stream `x`, and return the resulting table (dictionary).
 Throw a [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.tryparse`](@ref)
+See also [`TOML.tryparse`](@ref).
 """
 parse(str::AbstractString) =
     Internals.parse(Parser(String(str)))
@@ -77,7 +82,7 @@ parse(p::Parser, io::IO) = parse(p, read(io, String))
 Parse the string or stream `x`, and return the resulting table (dictionary).
 Return a [`ParserError`](@ref) upon failure.
 
-See also: [`TOML.parse`](@ref)
+See also [`TOML.parse`](@ref).
 """
 tryparse(str::AbstractString) =
     Internals.tryparse(Parser(String(str)))
diff --git a/stdlib/TOML/src/print.jl b/stdlib/TOML/src/print.jl
index 244ef7f0e986c4..059414152f7271 100644
--- a/stdlib/TOML/src/print.jl
+++ b/stdlib/TOML/src/print.jl
@@ -2,8 +2,37 @@
 
 import Dates
 
+import Base: @invokelatest
 import ..isvalid_barekey_char
 
+function print_toml_escaped(io::IO, s::AbstractString)
+    for c::AbstractChar in s
+        if !isvalid(c)
+            error("TOML print: invalid character $(repr(c)) encountered when printing string")
+        end
+        if c == '\b'
+            Base.print(io, '\\', 'b')
+        elseif c == '\t'
+            Base.print(io, '\\', 't')
+        elseif c == '\n'
+            Base.print(io, '\\', 'n')
+        elseif c == '\f'
+            Base.print(io, '\\', 'f')
+        elseif c == '\r'
+            Base.print(io, '\\', 'r')
+        elseif c == '"'
+            Base.print(io, '\\', '"')
+        elseif c == '\\'
+            Base.print(io, "\\", '\\')
+        elseif Base.iscntrl(c)
+            Base.print(io, "\\u")
+            Base.print(io, string(UInt32(c), base=16, pad=4))
+        else
+            Base.print(io, c)
+        end
+    end
+end
+
 function printkey(io::IO, keys::Vector{String})
     for (i, k) in enumerate(keys)
         i != 1 && Base.print(io, ".")
@@ -12,7 +41,9 @@ function printkey(io::IO, keys::Vector{String})
             Base.print(io, "\"\"")
         elseif any(!isvalid_barekey_char, k)
             # quoted key
-            Base.print(io, "\"", escape_string(k) ,"\"")
+            Base.print(io, "\"")
+            print_toml_escaped(io, k)
+            Base.print(io, "\"")
         else
             Base.print(io, k)
         end
@@ -20,46 +51,40 @@ function printkey(io::IO, keys::Vector{String})
 end
 
 const MbyFunc = Union{Function, Nothing}
-const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, String}
-function printvalue(f::MbyFunc, io::IO, value::AbstractVector; sorted=false)
+const TOMLValue = Union{AbstractVector, AbstractDict, Dates.DateTime, Dates.Time, Dates.Date, Bool, Integer, AbstractFloat, AbstractString}
+function printvalue(f::MbyFunc, io::IO, value::AbstractVector; sorted=false, by=identity)
     Base.print(io, "[")
     for (i, x) in enumerate(value)
         i != 1 && Base.print(io, ", ")
         if isa(x, AbstractDict)
-            _print(f, io, x; sorted)
+            _print(f, io, x; sorted, by)
         else
-            printvalue(f, io, x; sorted)
+            printvalue(f, io, x; sorted, by)
         end
     end
     Base.print(io, "]")
 end
-function printvalue(f::MbyFunc, io::IO, value; sorted)
-    if f === nothing
-        error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
-    end
-    toml_value = f(value)
-    if !(toml_value isa TOMLValue)
-        error("TOML syntax function for type `$(typeof(value))` did not return a valid TOML type but a `$(typeof(toml_value))`")
-    end
-    Base.invokelatest(printvalue, f, io, toml_value; sorted)
-end
-printvalue(f::MbyFunc, io::IO, value::AbstractDict; sorted) =
-    _print(f, io, value; sorted)
-printvalue(f::MbyFunc, io::IO, value::Dates.DateTime; sorted) =
+printvalue(f::MbyFunc, io::IO, value::AbstractDict; sorted=false, by=identity) =
+    _print(f, io, value; sorted, by)
+printvalue(f::MbyFunc, io::IO, value::Dates.DateTime; _...) =
     Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd\THH:MM:SS.sss\Z"))
-printvalue(f::MbyFunc, io::IO, value::Dates.Time; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Dates.Time; _...) =
     Base.print(io, Dates.format(value, Dates.dateformat"HH:MM:SS.sss"))
-printvalue(f::MbyFunc, io::IO, value::Dates.Date; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Dates.Date; _...) =
     Base.print(io, Dates.format(value, Dates.dateformat"YYYY-mm-dd"))
-printvalue(f::MbyFunc, io::IO, value::Bool; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Bool; _...) =
     Base.print(io, value ? "true" : "false")
-printvalue(f::MbyFunc, io::IO, value::Integer; sorted) =
+printvalue(f::MbyFunc, io::IO, value::Integer; _...) =
     Base.print(io, Int64(value))  # TOML specifies 64-bit signed long range for integer
-printvalue(f::MbyFunc, io::IO, value::AbstractFloat; sorted) =
+printvalue(f::MbyFunc, io::IO, value::AbstractFloat; _...) =
     Base.print(io, isnan(value) ? "nan" :
                    isinf(value) ? string(value > 0 ? "+" : "-", "inf") :
                    Float64(value))  # TOML specifies IEEE 754 binary64 for float
-printvalue(f::MbyFunc, io::IO, value::AbstractString; sorted) = Base.print(io, "\"", escape_string(value), "\"")
+function printvalue(f::MbyFunc, io::IO, value::AbstractString; _...)
+    Base.print(io, "\"")
+    print_toml_escaped(io, value)
+    Base.print(io, "\"")
+end
 
 is_table(value)           = isa(value, AbstractDict)
 is_array_of_tables(value) = isa(value, AbstractArray) &&
@@ -70,8 +95,8 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
     ks::Vector{String} = String[];
     indent::Int = 0,
     first_block::Bool = true,
-    sorted::Bool,
-    by::Function,
+    sorted::Bool = false,
+    by::Function = identity,
 )
     akeys = keys(a)
     if sorted
@@ -82,11 +107,25 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
     for key in akeys
         value = a[key]
         is_tabular(value) && continue
-        Base.print(io, ' '^4max(0,indent-1))
-        printkey(io, [String(key)])
-        Base.print(io, " = ") # print separator
-        printvalue(f, io, value; sorted)
-        Base.print(io, "\n")  # new line?
+        if !isa(value, TOMLValue)
+            if f === nothing
+                error("type `$(typeof(value))` is not a valid TOML type, pass a conversion function to `TOML.print`")
+            end
+            toml_value = f(value)
+            if !(toml_value isa TOMLValue)
+                error("TOML syntax function for type `$(typeof(value))` did not return a valid TOML type but a `$(typeof(toml_value))`")
+            end
+            value = toml_value
+        end
+        if is_tabular(value)
+            _print(f, io, Dict(key => value); indent, first_block, sorted, by)
+        else
+            Base.print(io, ' '^4max(0,indent-1))
+            printkey(io, [String(key)])
+            Base.print(io, " = ") # print separator
+            printvalue(f, io, value; sorted, by)
+            Base.print(io, "\n")  # new line?
+        end
         first_block = false
     end
 
@@ -94,7 +133,7 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
         value = a[key]
         if is_table(value)
             push!(ks, String(key))
-            header = !all(is_tabular(v) for v in values(value))::Bool
+            header = isempty(value) || !all(is_tabular(v) for v in values(value))::Bool
             if header
                 # print table
                 first_block || println(io)
@@ -105,7 +144,7 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]\n")
             end
             # Use runtime dispatch here since the type of value seems not to be enforced other than as AbstractDict
-            Base.invokelatest(_print, f, io, value, ks; indent = indent + header, first_block = header, sorted, by)
+            @invokelatest _print(f, io, value, ks; indent = indent + header, first_block = header, sorted, by)
             pop!(ks)
         elseif is_array_of_tables(value)
             # print array of tables
@@ -119,7 +158,7 @@ function _print(f::MbyFunc, io::IO, a::AbstractDict,
                 Base.print(io,"]]\n")
                 # TODO, nicer error here
                 !isa(v, AbstractDict) && error("array should contain only tables")
-                Base.invokelatest(_print, f, io, v, ks; indent = indent + 1, sorted, by)
+                @invokelatest _print(f, io, v, ks; indent = indent + 1, sorted, by)
             end
             pop!(ks)
         end
diff --git a/stdlib/TOML/test/error_printing.jl b/stdlib/TOML/test/error_printing.jl
index 3149127118f8aa..4842bd4d1373f8 100644
--- a/stdlib/TOML/test/error_printing.jl
+++ b/stdlib/TOML/test/error_printing.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 import TOML: tryparsefile
 
diff --git a/stdlib/TOML/test/invalids.jl b/stdlib/TOML/test/invalids.jl
index fdd2777236ceae..2f1cb3b63a7a2f 100644
--- a/stdlib/TOML/test/invalids.jl
+++ b/stdlib/TOML/test/invalids.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 @testset "errors" begin
 
 str = """
diff --git a/stdlib/TOML/test/parse.jl b/stdlib/TOML/test/parse.jl
index 5e9d8304c76076..12f68acbdb5bf0 100644
--- a/stdlib/TOML/test/parse.jl
+++ b/stdlib/TOML/test/parse.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using TOML, Test
 using TOML: ParserError
 
@@ -36,6 +38,8 @@ using TOML: ParserError
     @test_throws ParserError TOML.parsefile(SubString(invalid_path))
     @test_throws ParserError TOML.parsefile(p, invalid_path)
     @test_throws ParserError TOML.parsefile(p, SubString(invalid_path))
+    @test_throws ErrorException TOML.parsefile(homedir())
+    @test_throws ErrorException TOML.parsefile(p, homedir())
     # TOML.tryparsefile
     @test TOML.tryparsefile(path) == TOML.tryparsefile(SubString(path)) ==
           TOML.tryparsefile(p, path) == TOML.tryparsefile(p, SubString(path)) == dict
@@ -43,4 +47,6 @@ using TOML: ParserError
     @test TOML.tryparsefile(SubString(invalid_path)) isa ParserError
     @test TOML.tryparsefile(p, invalid_path) isa ParserError
     @test TOML.tryparsefile(p, SubString(invalid_path)) isa ParserError
+    @test_throws ErrorException TOML.tryparsefile(homedir())
+    @test_throws ErrorException TOML.tryparsefile(p, homedir())
 end
diff --git a/stdlib/TOML/test/print.jl b/stdlib/TOML/test/print.jl
index f36d9ab885c09a..4ab5e2d8d066d4 100644
--- a/stdlib/TOML/test/print.jl
+++ b/stdlib/TOML/test/print.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 toml_str(a; kwargs...) = sprint(io -> TOML.print(io, a; kwargs...))
 toml_str(f, a; kwargs...) = sprint(io -> TOML.print(f, io, a; kwargs...))
 
@@ -19,10 +21,53 @@ struct MyStruct
     a::Int
 end
 @test_throws ErrorException toml_str(Dict("foo" => MyStruct(1)))
+# simple value
 @test toml_str(Dict("foo" => MyStruct(1))) do x
         x isa MyStruct && return x.a
     end == """
         foo = 1
         """
 
+# tabular values
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+         x isa MyStruct && return [x.a]
+     end == """
+         foo = [1]
+         """
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+        x isa MyStruct && return Dict(:bar => x.a)
+    end == """
+        [foo]
+        bar = 1
+        """
+
+# validation against the usual case
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+         x isa MyStruct && return [x.a]
+     end == toml_str(Dict("foo" => [1]); sorted=true)
+@test toml_str(Dict("foo" => MyStruct(1)); sorted=true) do x
+        x isa MyStruct && return Dict(:bar => x.a)
+    end == toml_str(Dict("foo" => Dict(:bar => 1)); sorted=true)
+
 @test toml_str(Dict("b" => SubString("foo"))) == "b = \"foo\"\n"
+
+@testset "empty dict print" begin
+    s = """
+    user = "me"
+    [julia]
+    [option]
+    """
+    d = TOML.parse(s)
+    @test toml_str(d) == "user = \"me\"\n\n[julia]\n\n[option]\n"
+end
+
+@testset "special characters" begin
+    s = """
+    "\U1f355 \0 \x0 \x1 \t \b" = "\U1f355 \0 \x0 \x1 \t \b"
+    "\x7f" = "\x7f"
+    """
+    @test roundtrip(s)
+
+    d = Dict("str" => string(Char(0xd800)))
+    @test_throws ErrorException TOML.print(devnull, d)
+end
diff --git a/stdlib/TOML/test/readme.jl b/stdlib/TOML/test/readme.jl
index e981bbf7ddf44e..21961cc6f7ec81 100644
--- a/stdlib/TOML/test/readme.jl
+++ b/stdlib/TOML/test/readme.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # This test stuff in the TOML README at https://github.com/toml-lang/toml
 @testset "README" begin
 
@@ -669,7 +671,7 @@ str = """
 [ g .  h  . i ]    # same as [g.h.i]
 [ j . "ʞ" . 'l' ]  # same as [j."ʞ".'l']
 """
-@test_broken roundtrip(str) # Printer removes empty tables right now
+@test roundtrip(str)
 d = parse(str)
 @test d == Dict(
   "a" => Dict("b" => Dict("c" => Dict())),
@@ -687,7 +689,7 @@ str = """
 
 [x] # defining a super-table afterward is ok
 """
-@test_broken roundtrip(str) # Printer removes empty tables right now
+@test roundtrip(str)
 d = parse(str)
 @test d == Dict("x" => Dict("y" => Dict("z" => Dict("w" => Dict()))))
 
@@ -745,7 +747,7 @@ str = """
 [animal]
 [fruit.orange]
 """
-@test_broken roundtrip(str) # Printer removes empty tables right now
+@test roundtrip(str)
 d = parse(str)
 @test d == Dict(
   "fruit" => Dict("apple" => Dict(), "orange" => Dict()),
@@ -758,7 +760,7 @@ str = """
 [fruit.orange]
 [animal]
 """
-@test_broken roundtrip(str) # Printer removes empty tables right now
+@test roundtrip(str)
 @test d == Dict(
   "fruit" => Dict("apple" => Dict(), "orange" => Dict()),
   "animal" => Dict()
diff --git a/stdlib/TOML/test/runtests.jl b/stdlib/TOML/test/runtests.jl
index a886d210a99802..6228b3c2fc11cb 100644
--- a/stdlib/TOML/test/runtests.jl
+++ b/stdlib/TOML/test/runtests.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 using Dates
 
diff --git a/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml b/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml
index 8e2f0bef135ba8..558ed37d93c5c3 100644
--- a/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml
+++ b/stdlib/TOML/test/testfiles/invalid/key-single-open-bracket.toml
@@ -1 +1 @@
-[
\ No newline at end of file
+[
diff --git a/stdlib/TOML/test/testfiles/invalid/key-space.toml b/stdlib/TOML/test/testfiles/invalid/key-space.toml
index 201806d2801328..7c22703e888e74 100644
--- a/stdlib/TOML/test/testfiles/invalid/key-space.toml
+++ b/stdlib/TOML/test/testfiles/invalid/key-space.toml
@@ -1 +1 @@
-a b = 1
\ No newline at end of file
+a b = 1
diff --git a/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml b/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml
index a195e1b5dcd84c..3f34e15c072169 100644
--- a/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml
+++ b/stdlib/TOML/test/testfiles/invalid/multi-line-inline-table.toml
@@ -1,4 +1,4 @@
 json_like = {
           first = "Tom",
           last = "Preston-Werner"
-}
\ No newline at end of file
+}
diff --git a/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml b/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml
index aa81356dc94dca..592db75bb0c34c 100644
--- a/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml
+++ b/stdlib/TOML/test/testfiles/invalid/string-bad-codepoint.toml
@@ -1 +1 @@
-invalid-codepoint = "This string contains a non scalar unicode codepoint \uD801"
\ No newline at end of file
+invalid-codepoint = "This string contains a non scalar unicode codepoint \uD801"
diff --git a/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml b/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml
index 79bbcb1e298324..0a6a6a69725c4a 100644
--- a/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml
+++ b/stdlib/TOML/test/testfiles/invalid/table-whitespace.toml
@@ -1 +1 @@
-[invalid key]
\ No newline at end of file
+[invalid key]
diff --git a/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml b/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml
index 0d8edb524fe1af..e7b777ecfb305f 100644
--- a/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml
+++ b/stdlib/TOML/test/testfiles/invalid/table-with-pound.toml
@@ -1,2 +1,2 @@
 [key#group]
-answer = 42
\ No newline at end of file
+answer = 42
diff --git a/stdlib/TOML/test/testfiles/valid/array-empty.jl b/stdlib/TOML/test/testfiles/valid/array-empty.jl
index 78a2489844b1a7..da5f04f7da1a82 100644
--- a/stdlib/TOML/test/testfiles/valid/array-empty.jl
+++ b/stdlib/TOML/test/testfiles/valid/array-empty.jl
@@ -1 +1 @@
-Dict{String,Any}("thevoid" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[],"type" => "array")],"type" => "array")],"type" => "array")],"type" => "array")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("thevoid" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[],"type" => "array")],"type" => "array")],"type" => "array")],"type" => "array")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-nospaces.jl b/stdlib/TOML/test/testfiles/valid/array-nospaces.jl
index e5b8c98f00f3e9..3f8b61a2880d48 100644
--- a/stdlib/TOML/test/testfiles/valid/array-nospaces.jl
+++ b/stdlib/TOML/test/testfiles/valid/array-nospaces.jl
@@ -1 +1 @@
-Dict{String,Any}("ints" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer"), Dict{String,Any}("value" => "3","type" => "integer")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("ints" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer"), Dict{String,Any}("value" => "3","type" => "integer")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl
index 0c11baa1b27bf0..6e6862dc300800 100644
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl
+++ b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma-2.jl
@@ -1 +1 @@
-Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => " \", ","type" => "string")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => " \", ","type" => "string")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl
index c291fb0b2b51fc..d570f5e2a433a6 100644
--- a/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl
+++ b/stdlib/TOML/test/testfiles/valid/array-string-quote-comma.jl
@@ -1 +1 @@
-Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "Client: \"XXXX\", Job: XXXX","type" => "string"), Dict{String,Any}("value" => "Code: XXXX","type" => "string")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "Client: \"XXXX\", Job: XXXX","type" => "string"), Dict{String,Any}("value" => "Code: XXXX","type" => "string")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl b/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl
index fac0d3f5098bd4..83727c9f05954f 100644
--- a/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl
+++ b/stdlib/TOML/test/testfiles/valid/array-string-with-comma.jl
@@ -1 +1 @@
-Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "Client: XXXX, Job: XXXX","type" => "string"), Dict{String,Any}("value" => "Code: XXXX","type" => "string")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("title" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "Client: XXXX, Job: XXXX","type" => "string"), Dict{String,Any}("value" => "Code: XXXX","type" => "string")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl b/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl
index 2350b3cd70cba9..0c0ad7fe793bb6 100644
--- a/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl
+++ b/stdlib/TOML/test/testfiles/valid/array-table-array-string-backslash.jl
@@ -1 +1 @@
-Dict{String,Any}("foo" => Any[Dict{String,Any}("bar" => Dict{String,Any}("value" => "\"{{baz}}\"","type" => "string"))])
\ No newline at end of file
+Dict{String,Any}("foo" => Any[Dict{String,Any}("bar" => Dict{String,Any}("value" => "\"{{baz}}\"","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl b/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl
index dc143c8f8e685e..7f66b6052096a2 100644
--- a/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl
+++ b/stdlib/TOML/test/testfiles/valid/arrays-hetergeneous.jl
@@ -1 +1 @@
-Dict{String,Any}("mixed" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string"), Dict{String,Any}("value" => "b","type" => "string")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1.1","type" => "float"), Dict{String,Any}("value" => "2.1","type" => "float")],"type" => "array")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("mixed" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string"), Dict{String,Any}("value" => "b","type" => "string")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1.1","type" => "float"), Dict{String,Any}("value" => "2.1","type" => "float")],"type" => "array")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/arrays-nested.jl b/stdlib/TOML/test/testfiles/valid/arrays-nested.jl
index 69e925e4e36f86..4f3280552e9da2 100644
--- a/stdlib/TOML/test/testfiles/valid/arrays-nested.jl
+++ b/stdlib/TOML/test/testfiles/valid/arrays-nested.jl
@@ -1 +1 @@
-Dict{String,Any}("nest" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "b","type" => "string")],"type" => "array")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("nest" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string")],"type" => "array"), Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "b","type" => "string")],"type" => "array")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/arrays.jl b/stdlib/TOML/test/testfiles/valid/arrays.jl
index e00d308bf577e5..dc0ccdfc4f4141 100644
--- a/stdlib/TOML/test/testfiles/valid/arrays.jl
+++ b/stdlib/TOML/test/testfiles/valid/arrays.jl
@@ -1 +1 @@
-Dict{String,Any}("strings" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string"), Dict{String,Any}("value" => "b","type" => "string"), Dict{String,Any}("value" => "c","type" => "string")],"type" => "array"),"ints" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer"), Dict{String,Any}("value" => "3","type" => "integer")],"type" => "array"),"dates" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"), Dict{String,Any}("value" => "1979-05-27T07:32:00Z","type" => "datetime"), Dict{String,Any}("value" => "2006-06-01T11:00:00Z","type" => "datetime")],"type" => "array"),"comments" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer")],"type" => "array"),"floats" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1.1","type" => "float"), Dict{String,Any}("value" => "2.1","type" => "float"), Dict{String,Any}("value" => "3.1","type" => "float")],"type" => "array"))
\ No newline at end of file
+Dict{String,Any}("strings" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "a","type" => "string"), Dict{String,Any}("value" => "b","type" => "string"), Dict{String,Any}("value" => "c","type" => "string")],"type" => "array"),"ints" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer"), Dict{String,Any}("value" => "3","type" => "integer")],"type" => "array"),"dates" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"), Dict{String,Any}("value" => "1979-05-27T07:32:00Z","type" => "datetime"), Dict{String,Any}("value" => "2006-06-01T11:00:00Z","type" => "datetime")],"type" => "array"),"comments" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1","type" => "integer"), Dict{String,Any}("value" => "2","type" => "integer")],"type" => "array"),"floats" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "1.1","type" => "float"), Dict{String,Any}("value" => "2.1","type" => "float"), Dict{String,Any}("value" => "3.1","type" => "float")],"type" => "array"))
diff --git a/stdlib/TOML/test/testfiles/valid/bool.jl b/stdlib/TOML/test/testfiles/valid/bool.jl
index aaa55c790e4098..5ce242aae39159 100644
--- a/stdlib/TOML/test/testfiles/valid/bool.jl
+++ b/stdlib/TOML/test/testfiles/valid/bool.jl
@@ -1 +1 @@
-Dict{String,Any}("f" => Dict{String,Any}("value" => "false","type" => "bool"),"t" => Dict{String,Any}("value" => "true","type" => "bool"))
\ No newline at end of file
+Dict{String,Any}("f" => Dict{String,Any}("value" => "false","type" => "bool"),"t" => Dict{String,Any}("value" => "true","type" => "bool"))
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl b/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl
index 230bf448a5740b..45392c32b0ba1b 100644
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl
+++ b/stdlib/TOML/test/testfiles/valid/comments-at-eof.jl
@@ -1 +1 @@
-Dict{String,Any}("key" => Dict{String,Any}("value" => "value","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("key" => Dict{String,Any}("value" => "value","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl
index 230bf448a5740b..45392c32b0ba1b 100644
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl
+++ b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.jl
@@ -1 +1 @@
-Dict{String,Any}("key" => Dict{String,Any}("value" => "value","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("key" => Dict{String,Any}("value" => "value","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml
index 026c93a8b8d782..090b474834610c 100644
--- a/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml
+++ b/stdlib/TOML/test/testfiles/valid/comments-at-eof2.toml
@@ -1,2 +1,2 @@
 # This is a full-line comment
-key = "value" # This is a comment at the end of a line
\ No newline at end of file
+key = "value" # This is a comment at the end of a line
diff --git a/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl b/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl
index 3a0cc4b062facb..dd43fd70576e97 100644
--- a/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl
+++ b/stdlib/TOML/test/testfiles/valid/comments-everywhere.jl
@@ -1 +1 @@
-Dict{String,Any}("group" => Dict{String,Any}("more" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "42","type" => "integer"), Dict{String,Any}("value" => "42","type" => "integer")],"type" => "array"),"answer" => Dict{String,Any}("value" => "42","type" => "integer")))
\ No newline at end of file
+Dict{String,Any}("group" => Dict{String,Any}("more" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "42","type" => "integer"), Dict{String,Any}("value" => "42","type" => "integer")],"type" => "array"),"answer" => Dict{String,Any}("value" => "42","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl b/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl
index 7741e94a33b345..1759fd10e086c8 100644
--- a/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl
+++ b/stdlib/TOML/test/testfiles/valid/datetime-timezone.jl
@@ -1 +1 @@
-Dict{String,Any}("bestdayever" => Dict{String,Any}("value" => "2017-06-06T12:34:56-05:00","type" => "datetime"))
\ No newline at end of file
+Dict{String,Any}("bestdayever" => Dict{String,Any}("value" => "2017-06-06T12:34:56-05:00","type" => "datetime"))
diff --git a/stdlib/TOML/test/testfiles/valid/datetime.jl b/stdlib/TOML/test/testfiles/valid/datetime.jl
index a64b34c1e22473..8d6c630023e3f1 100644
--- a/stdlib/TOML/test/testfiles/valid/datetime.jl
+++ b/stdlib/TOML/test/testfiles/valid/datetime.jl
@@ -1 +1 @@
-Dict{String,Any}("milliseconds" => Dict{String,Any}("value" => "1977-12-21T03:32:00.555+00:00","type" => "datetime"),"bestdayever" => Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"),"numoffset" => Dict{String,Any}("value" => "1977-06-28T12:32:00Z","type" => "datetime"))
\ No newline at end of file
+Dict{String,Any}("milliseconds" => Dict{String,Any}("value" => "1977-12-21T03:32:00.555+00:00","type" => "datetime"),"bestdayever" => Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"),"numoffset" => Dict{String,Any}("value" => "1977-06-28T12:32:00Z","type" => "datetime"))
diff --git a/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl b/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl
index fccbb9e75005ce..934675aacf2195 100644
--- a/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl
+++ b/stdlib/TOML/test/testfiles/valid/double-quote-escape.jl
@@ -1 +1 @@
-Dict{String,Any}("test" => Dict{String,Any}("value" => "\"one\"","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("test" => Dict{String,Any}("value" => "\"one\"","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/empty.jl b/stdlib/TOML/test/testfiles/valid/empty.jl
index edc491b03230c1..1adb380ba335bb 100644
--- a/stdlib/TOML/test/testfiles/valid/empty.jl
+++ b/stdlib/TOML/test/testfiles/valid/empty.jl
@@ -1 +1 @@
-Dict{String,Any}()
\ No newline at end of file
+Dict{String,Any}()
diff --git a/stdlib/TOML/test/testfiles/valid/escaped-escape.jl b/stdlib/TOML/test/testfiles/valid/escaped-escape.jl
index 97c80799c4290f..ed710ff1b4ff64 100644
--- a/stdlib/TOML/test/testfiles/valid/escaped-escape.jl
+++ b/stdlib/TOML/test/testfiles/valid/escaped-escape.jl
@@ -1 +1 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "\\x64","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("answer" => Dict{String,Any}("value" => "\\x64","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/example.jl b/stdlib/TOML/test/testfiles/valid/example.jl
index 83071332172635..b5b2bb86c53630 100644
--- a/stdlib/TOML/test/testfiles/valid/example.jl
+++ b/stdlib/TOML/test/testfiles/valid/example.jl
@@ -1 +1 @@
-Dict{String,Any}("best-day-ever" => Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"),"numtheory" => Dict{String,Any}("perfection" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "6","type" => "integer"), Dict{String,Any}("value" => "28","type" => "integer"), Dict{String,Any}("value" => "496","type" => "integer")],"type" => "array"),"boring" => Dict{String,Any}("value" => "false","type" => "bool")))
\ No newline at end of file
+Dict{String,Any}("best-day-ever" => Dict{String,Any}("value" => "1987-07-05T17:45:00Z","type" => "datetime"),"numtheory" => Dict{String,Any}("perfection" => Dict{String,Any}("value" => Any[Dict{String,Any}("value" => "6","type" => "integer"), Dict{String,Any}("value" => "28","type" => "integer"), Dict{String,Any}("value" => "496","type" => "integer")],"type" => "array"),"boring" => Dict{String,Any}("value" => "false","type" => "bool")))
diff --git a/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl b/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl
index 5446e515ed2cb1..34ed0bebb2fc02 100644
--- a/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl
+++ b/stdlib/TOML/test/testfiles/valid/exponent-part-float.jl
@@ -1 +1 @@
-Dict{String,Any}("million" => Dict{String,Any}("value" => "1000000","type" => "float"),"minustenth" => Dict{String,Any}("value" => "-0.1","type" => "float"),"beast" => Dict{String,Any}("value" => "666","type" => "float"))
\ No newline at end of file
+Dict{String,Any}("million" => Dict{String,Any}("value" => "1000000","type" => "float"),"minustenth" => Dict{String,Any}("value" => "-0.1","type" => "float"),"beast" => Dict{String,Any}("value" => "666","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/float-exponent.jl b/stdlib/TOML/test/testfiles/valid/float-exponent.jl
index b35991f2467fab..e64817ce85e92d 100644
--- a/stdlib/TOML/test/testfiles/valid/float-exponent.jl
+++ b/stdlib/TOML/test/testfiles/valid/float-exponent.jl
@@ -1 +1 @@
-Dict{String,Any}("neg" => Dict{String,Any}("value" => "0.03","type" => "float"),"zero" => Dict{String,Any}("value" => "3.0","type" => "float"),"pointupper" => Dict{String,Any}("value" => "310.0","type" => "float"),"lower" => Dict{String,Any}("value" => "300.0","type" => "float"),"upper" => Dict{String,Any}("value" => "300.0","type" => "float"),"pos" => Dict{String,Any}("value" => "300.0","type" => "float"),"pointlower" => Dict{String,Any}("value" => "310.0","type" => "float"))
\ No newline at end of file
+Dict{String,Any}("neg" => Dict{String,Any}("value" => "0.03","type" => "float"),"zero" => Dict{String,Any}("value" => "3.0","type" => "float"),"pointupper" => Dict{String,Any}("value" => "310.0","type" => "float"),"lower" => Dict{String,Any}("value" => "300.0","type" => "float"),"upper" => Dict{String,Any}("value" => "300.0","type" => "float"),"pos" => Dict{String,Any}("value" => "300.0","type" => "float"),"pointlower" => Dict{String,Any}("value" => "310.0","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/float-underscore.jl b/stdlib/TOML/test/testfiles/valid/float-underscore.jl
index c48c5ed7aadf61..e175c937f4d5b2 100644
--- a/stdlib/TOML/test/testfiles/valid/float-underscore.jl
+++ b/stdlib/TOML/test/testfiles/valid/float-underscore.jl
@@ -1 +1 @@
-Dict{String,Any}("after" => Dict{String,Any}("value" => "3141.5927","type" => "float"),"exponent" => Dict{String,Any}("value" => "3e14","type" => "float"),"before" => Dict{String,Any}("value" => "3141.5927","type" => "float"))
\ No newline at end of file
+Dict{String,Any}("after" => Dict{String,Any}("value" => "3141.5927","type" => "float"),"exponent" => Dict{String,Any}("value" => "3e14","type" => "float"),"before" => Dict{String,Any}("value" => "3141.5927","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/float.jl b/stdlib/TOML/test/testfiles/valid/float.jl
index 45a52e3af16751..d36893db363a30 100644
--- a/stdlib/TOML/test/testfiles/valid/float.jl
+++ b/stdlib/TOML/test/testfiles/valid/float.jl
@@ -1 +1 @@
-Dict{String,Any}("negpi" => Dict{String,Any}("value" => "-3.14","type" => "float"),"pospi" => Dict{String,Any}("value" => "3.14","type" => "float"),"pi" => Dict{String,Any}("value" => "3.14","type" => "float"),"zero-intpart" => Dict{String,Any}("value" => "0.123","type" => "float"))
\ No newline at end of file
+Dict{String,Any}("negpi" => Dict{String,Any}("value" => "-3.14","type" => "float"),"pospi" => Dict{String,Any}("value" => "3.14","type" => "float"),"pi" => Dict{String,Any}("value" => "3.14","type" => "float"),"zero-intpart" => Dict{String,Any}("value" => "0.123","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl
index f1ebc4aa65af0d..376f0b95cf7e8d 100644
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl
+++ b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-after.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))),"better" => Dict{String,Any}("value" => "43","type" => "integer")))
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))),"better" => Dict{String,Any}("value" => "43","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl
index f1ebc4aa65af0d..376f0b95cf7e8d 100644
--- a/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl
+++ b/stdlib/TOML/test/testfiles/valid/implicit-and-explicit-before.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))),"better" => Dict{String,Any}("value" => "43","type" => "integer")))
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))),"better" => Dict{String,Any}("value" => "43","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/implicit-groups.jl b/stdlib/TOML/test/testfiles/valid/implicit-groups.jl
index 2fa2c2156bb676..5481705ddbc4e0 100644
--- a/stdlib/TOML/test/testfiles/valid/implicit-groups.jl
+++ b/stdlib/TOML/test/testfiles/valid/implicit-groups.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table-array.jl b/stdlib/TOML/test/testfiles/valid/inline-table-array.jl
index 7e9f0ede913687..c9b1c336003d2d 100644
--- a/stdlib/TOML/test/testfiles/valid/inline-table-array.jl
+++ b/stdlib/TOML/test/testfiles/valid/inline-table-array.jl
@@ -1 +1 @@
-Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Eric","type" => "string"),"last_name" => Dict{String,Any}("value" => "Clapton","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bob","type" => "string"),"last_name" => Dict{String,Any}("value" => "Seger","type" => "string"))])
\ No newline at end of file
+Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Eric","type" => "string"),"last_name" => Dict{String,Any}("value" => "Clapton","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bob","type" => "string"),"last_name" => Dict{String,Any}("value" => "Seger","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/inline-table.jl b/stdlib/TOML/test/testfiles/valid/inline-table.jl
index 39f9f52be24f35..ecbaec3304cadc 100644
--- a/stdlib/TOML/test/testfiles/valid/inline-table.jl
+++ b/stdlib/TOML/test/testfiles/valid/inline-table.jl
@@ -1 +1 @@
-Dict{String,Any}("point" => Dict{String,Any}("x" => Dict{String,Any}("value" => "1","type" => "integer"),"y" => Dict{String,Any}("value" => "2","type" => "integer")),"name" => Dict{String,Any}("first" => Dict{String,Any}("value" => "Tom","type" => "string"),"last" => Dict{String,Any}("value" => "Preston-Werner","type" => "string")),"str-key" => Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")),"simple" => Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")),"table-array" => Any[Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")), Dict{String,Any}("b" => Dict{String,Any}("value" => "2","type" => "integer"))])
\ No newline at end of file
+Dict{String,Any}("point" => Dict{String,Any}("x" => Dict{String,Any}("value" => "1","type" => "integer"),"y" => Dict{String,Any}("value" => "2","type" => "integer")),"name" => Dict{String,Any}("first" => Dict{String,Any}("value" => "Tom","type" => "string"),"last" => Dict{String,Any}("value" => "Preston-Werner","type" => "string")),"str-key" => Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")),"simple" => Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")),"table-array" => Any[Dict{String,Any}("a" => Dict{String,Any}("value" => "1","type" => "integer")), Dict{String,Any}("b" => Dict{String,Any}("value" => "2","type" => "integer"))])
diff --git a/stdlib/TOML/test/testfiles/valid/integer-underscore.jl b/stdlib/TOML/test/testfiles/valid/integer-underscore.jl
index 47a91e29343b47..84b2dfa1ad44e0 100644
--- a/stdlib/TOML/test/testfiles/valid/integer-underscore.jl
+++ b/stdlib/TOML/test/testfiles/valid/integer-underscore.jl
@@ -1 +1 @@
-Dict{String,Any}("kilo" => Dict{String,Any}("value" => "1000","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("kilo" => Dict{String,Any}("value" => "1000","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/integer.jl b/stdlib/TOML/test/testfiles/valid/integer.jl
index ad8a94c4ccd9a8..7150736c814152 100644
--- a/stdlib/TOML/test/testfiles/valid/integer.jl
+++ b/stdlib/TOML/test/testfiles/valid/integer.jl
@@ -1 +1 @@
-Dict{String,Any}("zero" => Dict{String,Any}("value" => "0","type" => "integer"),"posanswer" => Dict{String,Any}("value" => "42","type" => "integer"),"answer" => Dict{String,Any}("value" => "42","type" => "integer"),"neganswer" => Dict{String,Any}("value" => "-42","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("zero" => Dict{String,Any}("value" => "0","type" => "integer"),"posanswer" => Dict{String,Any}("value" => "42","type" => "integer"),"answer" => Dict{String,Any}("value" => "42","type" => "integer"),"neganswer" => Dict{String,Any}("value" => "-42","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl b/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl
index 8b553e2655481f..b88a68c41a2c1e 100644
--- a/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl
+++ b/stdlib/TOML/test/testfiles/valid/key-equals-nospace.jl
@@ -1 +1 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-numeric.jl b/stdlib/TOML/test/testfiles/valid/key-numeric.jl
index 10cd8e0e807820..b6d00e0041bbeb 100644
--- a/stdlib/TOML/test/testfiles/valid/key-numeric.jl
+++ b/stdlib/TOML/test/testfiles/valid/key-numeric.jl
@@ -1 +1 @@
-Dict{String,Any}("1" => Dict{String,Any}("value" => "1","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("1" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-space.jl b/stdlib/TOML/test/testfiles/valid/key-space.jl
index 97439fcfccdd09..c43b2619a1c91d 100644
--- a/stdlib/TOML/test/testfiles/valid/key-space.jl
+++ b/stdlib/TOML/test/testfiles/valid/key-space.jl
@@ -1 +1 @@
-Dict{String,Any}("a b" => Dict{String,Any}("value" => "1","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("a b" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/key-special-chars.jl b/stdlib/TOML/test/testfiles/valid/key-special-chars.jl
index 90d934f45741eb..31b05979dbf195 100644
--- a/stdlib/TOML/test/testfiles/valid/key-special-chars.jl
+++ b/stdlib/TOML/test/testfiles/valid/key-special-chars.jl
@@ -1 +1 @@
-Dict{String,Any}("~!@\$^&*()_+-`1234567890[]|/?><.,;:'" => Dict{String,Any}("value" => "1","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("~!@\$^&*()_+-`1234567890[]|/?><.,;:'" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl b/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl
index 52607b35b91eae..2d700e6e091ec8 100644
--- a/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl
+++ b/stdlib/TOML/test/testfiles/valid/keys-with-dots.jl
@@ -1 +1 @@
-Dict{String,Any}("with.dot" => Dict{String,Any}("value" => "2","type" => "integer"),"plain_table" => Dict{String,Any}("with.dot" => Dict{String,Any}("value" => "4","type" => "integer"),"plain" => Dict{String,Any}("value" => "3","type" => "integer")),"table" => Dict{String,Any}("withdot" => Dict{String,Any}("key.with.dots" => Dict{String,Any}("value" => "6","type" => "integer"),"plain" => Dict{String,Any}("value" => "5","type" => "integer"))),"plain" => Dict{String,Any}("value" => "1","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("with.dot" => Dict{String,Any}("value" => "2","type" => "integer"),"plain_table" => Dict{String,Any}("with.dot" => Dict{String,Any}("value" => "4","type" => "integer"),"plain" => Dict{String,Any}("value" => "3","type" => "integer")),"table" => Dict{String,Any}("withdot" => Dict{String,Any}("key.with.dots" => Dict{String,Any}("value" => "6","type" => "integer"),"plain" => Dict{String,Any}("value" => "5","type" => "integer"))),"plain" => Dict{String,Any}("value" => "1","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/keys-with-dots.json b/stdlib/TOML/test/testfiles/valid/keys-with-dots.json
index d2ee0021f63025..6dd7b28e636e20 100644
--- a/stdlib/TOML/test/testfiles/valid/keys-with-dots.json
+++ b/stdlib/TOML/test/testfiles/valid/keys-with-dots.json
@@ -11,4 +11,4 @@
       "key.with.dots": {"type": "integer", "value": "6"}
     }
   }
-}
\ No newline at end of file
+}
diff --git a/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml b/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml
index 24905929b22f34..65fcddf96a491e 100644
--- a/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml
+++ b/stdlib/TOML/test/testfiles/valid/keys-with-dots.toml
@@ -7,4 +7,4 @@ plain = 3
 
 [table.withdot]
 plain = 5
-"key.with.dots" = 6
\ No newline at end of file
+"key.with.dots" = 6
diff --git a/stdlib/TOML/test/testfiles/valid/long-float.jl b/stdlib/TOML/test/testfiles/valid/long-float.jl
index b960a20d97605a..d59e96f1cc0193 100644
--- a/stdlib/TOML/test/testfiles/valid/long-float.jl
+++ b/stdlib/TOML/test/testfiles/valid/long-float.jl
@@ -1 +1 @@
-Dict{String,Any}("longpi" => Dict{String,Any}("value" => "3.141592653589793","type" => "float"),"neglongpi" => Dict{String,Any}("value" => "-3.141592653589793","type" => "float"))
\ No newline at end of file
+Dict{String,Any}("longpi" => Dict{String,Any}("value" => "3.141592653589793","type" => "float"),"neglongpi" => Dict{String,Any}("value" => "-3.141592653589793","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/long-integer.jl b/stdlib/TOML/test/testfiles/valid/long-integer.jl
index 051da8e7c940b2..63ae15b3d84c5b 100644
--- a/stdlib/TOML/test/testfiles/valid/long-integer.jl
+++ b/stdlib/TOML/test/testfiles/valid/long-integer.jl
@@ -1 +1 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "9223372036854775807","type" => "integer"),"neganswer" => Dict{String,Any}("value" => "-9223372036854775808","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("answer" => Dict{String,Any}("value" => "9223372036854775807","type" => "integer"),"neganswer" => Dict{String,Any}("value" => "-9223372036854775808","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/multiline-string.jl b/stdlib/TOML/test/testfiles/valid/multiline-string.jl
index ba1eb06c418682..dad787a454c56f 100644
--- a/stdlib/TOML/test/testfiles/valid/multiline-string.jl
+++ b/stdlib/TOML/test/testfiles/valid/multiline-string.jl
@@ -1 +1 @@
-Dict{String,Any}("equivalent_two" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"multiline_empty_four" => Dict{String,Any}("value" => "","type" => "string"),"multiline_empty_one" => Dict{String,Any}("value" => "","type" => "string"),"equivalent_three" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"equivalent_one" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"multiline_empty_two" => Dict{String,Any}("value" => "","type" => "string"),"multiline_empty_three" => Dict{String,Any}("value" => "","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("equivalent_two" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"multiline_empty_four" => Dict{String,Any}("value" => "","type" => "string"),"multiline_empty_one" => Dict{String,Any}("value" => "","type" => "string"),"equivalent_three" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"equivalent_one" => Dict{String,Any}("value" => "The quick brown fox jumps over the lazy dog.","type" => "string"),"multiline_empty_two" => Dict{String,Any}("value" => "","type" => "string"),"multiline_empty_three" => Dict{String,Any}("value" => "","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl b/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl
index 32520dc02ae1b1..0bc1d39c166089 100644
--- a/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl
+++ b/stdlib/TOML/test/testfiles/valid/nested-inline-table-array.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Any[Dict{String,Any}("b" => Dict{String,Any}())])
\ No newline at end of file
+Dict{String,Any}("a" => Any[Dict{String,Any}("b" => Dict{String,Any}())])
diff --git a/stdlib/TOML/test/testfiles/valid/newline-crlf.jl b/stdlib/TOML/test/testfiles/valid/newline-crlf.jl
index 489a35df0ccf09..1bb4161f1a2a2b 100644
--- a/stdlib/TOML/test/testfiles/valid/newline-crlf.jl
+++ b/stdlib/TOML/test/testfiles/valid/newline-crlf.jl
@@ -1 +1 @@
-Dict{String,Any}("newline" => Dict{String,Any}("value" => "crlf","type" => "string"),"os" => Dict{String,Any}("value" => "DOS","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("newline" => Dict{String,Any}("value" => "crlf","type" => "string"),"os" => Dict{String,Any}("value" => "DOS","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/newline-lf.jl b/stdlib/TOML/test/testfiles/valid/newline-lf.jl
index f422b1a0014a58..e9bb103ab934d3 100644
--- a/stdlib/TOML/test/testfiles/valid/newline-lf.jl
+++ b/stdlib/TOML/test/testfiles/valid/newline-lf.jl
@@ -1 +1 @@
-Dict{String,Any}("newline" => Dict{String,Any}("value" => "lf","type" => "string"),"os" => Dict{String,Any}("value" => "unix","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("newline" => Dict{String,Any}("value" => "lf","type" => "string"),"os" => Dict{String,Any}("value" => "unix","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl b/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl
index 7b5dcfb55251ac..054b671ad564dd 100644
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl
+++ b/stdlib/TOML/test/testfiles/valid/raw-multiline-string-win.jl
@@ -1 +1 @@
-Dict{String,Any}("multiline" => Dict{String,Any}("value" => "This string\r\nhas ' a quote character\r\nand more than\r\none newline\r\nin it.","type" => "string"),"firstnl" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"),"oneline" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("multiline" => Dict{String,Any}("value" => "This string\r\nhas ' a quote character\r\nand more than\r\none newline\r\nin it.","type" => "string"),"firstnl" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"),"oneline" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl b/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl
index 308070b558fa44..e05360e1fcd848 100644
--- a/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl
+++ b/stdlib/TOML/test/testfiles/valid/raw-multiline-string.jl
@@ -1 +1 @@
-Dict{String,Any}("multiline" => Dict{String,Any}("value" => "This string\nhas ' a quote character\nand more than\none newline\nin it.","type" => "string"),"firstnl" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"),"oneline" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("multiline" => Dict{String,Any}("value" => "This string\nhas ' a quote character\nand more than\none newline\nin it.","type" => "string"),"firstnl" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"),"oneline" => Dict{String,Any}("value" => "This string has a ' quote character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/raw-string.jl b/stdlib/TOML/test/testfiles/valid/raw-string.jl
index c7e01501bb8f18..58a1929689bd3e 100644
--- a/stdlib/TOML/test/testfiles/valid/raw-string.jl
+++ b/stdlib/TOML/test/testfiles/valid/raw-string.jl
@@ -1 +1 @@
-Dict{String,Any}("slash" => Dict{String,Any}("value" => "This string has a \\/ slash character.","type" => "string"),"formfeed" => Dict{String,Any}("value" => "This string has a \\f form feed character.","type" => "string"),"backslash" => Dict{String,Any}("value" => "This string has a \\\\ backslash character.","type" => "string"),"newline" => Dict{String,Any}("value" => "This string has a \\n new line character.","type" => "string"),"carriage" => Dict{String,Any}("value" => "This string has a \\r carriage return character.","type" => "string"),"backspace" => Dict{String,Any}("value" => "This string has a \\b backspace character.","type" => "string"),"tab" => Dict{String,Any}("value" => "This string has a \\t tab character.","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("slash" => Dict{String,Any}("value" => "This string has a \\/ slash character.","type" => "string"),"formfeed" => Dict{String,Any}("value" => "This string has a \\f form feed character.","type" => "string"),"backslash" => Dict{String,Any}("value" => "This string has a \\\\ backslash character.","type" => "string"),"newline" => Dict{String,Any}("value" => "This string has a \\n new line character.","type" => "string"),"carriage" => Dict{String,Any}("value" => "This string has a \\r carriage return character.","type" => "string"),"backspace" => Dict{String,Any}("value" => "This string has a \\b backspace character.","type" => "string"),"tab" => Dict{String,Any}("value" => "This string has a \\t tab character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl
index 38c34b74ac9379..25393187ca54d0 100644
--- a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl
+++ b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.jl
@@ -1 +1 @@
-Dict{String,Any}("black" => Dict{String,Any}("allow_prereleases" => Dict{String,Any}("value" => "true","type" => "bool"),"python" => Dict{String,Any}("value" => ">3.6","type" => "string"),"version" => Dict{String,Any}("value" => ">=18.9b0","type" => "string")))
\ No newline at end of file
+Dict{String,Any}("black" => Dict{String,Any}("allow_prereleases" => Dict{String,Any}("value" => "true","type" => "bool"),"python" => Dict{String,Any}("value" => ">3.6","type" => "string"),"version" => Dict{String,Any}("value" => ">=18.9b0","type" => "string")))
diff --git a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json
index a6c11ea86eea8f..7fc7d6dafff068 100644
--- a/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json
+++ b/stdlib/TOML/test/testfiles/valid/right-curly-brace-after-boolean.json
@@ -13,4 +13,4 @@
           "value":">=18.9b0"
        }
     }
- }
\ No newline at end of file
+ }
diff --git a/stdlib/TOML/test/testfiles/valid/string-empty.jl b/stdlib/TOML/test/testfiles/valid/string-empty.jl
index 42004373795c9b..4adba9eed74f94 100644
--- a/stdlib/TOML/test/testfiles/valid/string-empty.jl
+++ b/stdlib/TOML/test/testfiles/valid/string-empty.jl
@@ -1 +1 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("answer" => Dict{String,Any}("value" => "","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-escapes.jl b/stdlib/TOML/test/testfiles/valid/string-escapes.jl
index 2f7c117d161317..d153276492df3d 100644
--- a/stdlib/TOML/test/testfiles/valid/string-escapes.jl
+++ b/stdlib/TOML/test/testfiles/valid/string-escapes.jl
@@ -1 +1 @@
-Dict{String,Any}("formfeed" => Dict{String,Any}("value" => "This string has a \f form feed character.","type" => "string"),"notunicode2" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"backslash" => Dict{String,Any}("value" => "This string has a \\ backslash character.","type" => "string"),"notunicode3" => Dict{String,Any}("value" => "This string does not have a unicode \\u0075 escape.","type" => "string"),"notunicode4" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"tab" => Dict{String,Any}("value" => "This string has a \t tab character.","type" => "string"),"carriage" => Dict{String,Any}("value" => "This string has a \r carriage return character.","type" => "string"),"quote" => Dict{String,Any}("value" => "This string has a \" quote character.","type" => "string"),"newline" => Dict{String,Any}("value" => "This string has a \n new line character.","type" => "string"),"notunicode1" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"backspace" => Dict{String,Any}("value" => "This string has a \b backspace character.","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("formfeed" => Dict{String,Any}("value" => "This string has a \f form feed character.","type" => "string"),"notunicode2" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"backslash" => Dict{String,Any}("value" => "This string has a \\ backslash character.","type" => "string"),"notunicode3" => Dict{String,Any}("value" => "This string does not have a unicode \\u0075 escape.","type" => "string"),"notunicode4" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"tab" => Dict{String,Any}("value" => "This string has a \t tab character.","type" => "string"),"carriage" => Dict{String,Any}("value" => "This string has a \r carriage return character.","type" => "string"),"quote" => Dict{String,Any}("value" => "This string has a \" quote character.","type" => "string"),"newline" => Dict{String,Any}("value" => "This string has a \n new line character.","type" => "string"),"notunicode1" => Dict{String,Any}("value" => "This string does not have a unicode \\u escape.","type" => "string"),"backspace" => Dict{String,Any}("value" => "This string has a \b backspace character.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-nl.jl b/stdlib/TOML/test/testfiles/valid/string-nl.jl
index 839d5d29a887c4..1d60e431ee1bb7 100644
--- a/stdlib/TOML/test/testfiles/valid/string-nl.jl
+++ b/stdlib/TOML/test/testfiles/valid/string-nl.jl
@@ -1 +1 @@
-Dict{String,Any}("nl_end" => Dict{String,Any}("value" => "value\n","type" => "string"),"lit_nl_mid" => Dict{String,Any}("value" => "val\\nue","type" => "string"),"nl_mid" => Dict{String,Any}("value" => "val\nue","type" => "string"),"lit_nl_uni" => Dict{String,Any}("value" => "val\\ue","type" => "string"),"lit_nl_end" => Dict{String,Any}("value" => "value\\n","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("nl_end" => Dict{String,Any}("value" => "value\n","type" => "string"),"lit_nl_mid" => Dict{String,Any}("value" => "val\\nue","type" => "string"),"nl_mid" => Dict{String,Any}("value" => "val\nue","type" => "string"),"lit_nl_uni" => Dict{String,Any}("value" => "val\\ue","type" => "string"),"lit_nl_end" => Dict{String,Any}("value" => "value\\n","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-simple.jl b/stdlib/TOML/test/testfiles/valid/string-simple.jl
index ea78bcb43c4b39..dbee3f00e38d97 100644
--- a/stdlib/TOML/test/testfiles/valid/string-simple.jl
+++ b/stdlib/TOML/test/testfiles/valid/string-simple.jl
@@ -1 +1 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "You are not drinking enough whisky.","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("answer" => Dict{String,Any}("value" => "You are not drinking enough whisky.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/string-with-pound.jl b/stdlib/TOML/test/testfiles/valid/string-with-pound.jl
index d8f25e780830fc..0acceceab61607 100644
--- a/stdlib/TOML/test/testfiles/valid/string-with-pound.jl
+++ b/stdlib/TOML/test/testfiles/valid/string-with-pound.jl
@@ -1 +1 @@
-Dict{String,Any}("pound" => Dict{String,Any}("value" => "We see no # comments here.","type" => "string"),"poundcomment" => Dict{String,Any}("value" => "But there are # some comments here.","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("pound" => Dict{String,Any}("value" => "We see no # comments here.","type" => "string"),"poundcomment" => Dict{String,Any}("value" => "But there are # some comments here.","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl b/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl
index e255197c60b5cd..fc8c932d672e7b 100644
--- a/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-array-implicit.jl
@@ -1 +1 @@
-Dict{String,Any}("albums" => Dict{String,Any}("songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Glory Days","type" => "string"))]))
\ No newline at end of file
+Dict{String,Any}("albums" => Dict{String,Any}("songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Glory Days","type" => "string"))]))
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-many.jl b/stdlib/TOML/test/testfiles/valid/table-array-many.jl
index 7e9f0ede913687..c9b1c336003d2d 100644
--- a/stdlib/TOML/test/testfiles/valid/table-array-many.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-array-many.jl
@@ -1 +1 @@
-Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Eric","type" => "string"),"last_name" => Dict{String,Any}("value" => "Clapton","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bob","type" => "string"),"last_name" => Dict{String,Any}("value" => "Seger","type" => "string"))])
\ No newline at end of file
+Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Eric","type" => "string"),"last_name" => Dict{String,Any}("value" => "Clapton","type" => "string")), Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bob","type" => "string"),"last_name" => Dict{String,Any}("value" => "Seger","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-nest.jl b/stdlib/TOML/test/testfiles/valid/table-array-nest.jl
index f9fbb34b6a39c9..68ef1c97f41a44 100644
--- a/stdlib/TOML/test/testfiles/valid/table-array-nest.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-array-nest.jl
@@ -1 +1 @@
-Dict{String,Any}("albums" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Born to Run","type" => "string"),"songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Jungleland","type" => "string")), Dict{String,Any}("name" => Dict{String,Any}("value" => "Meeting Across the River","type" => "string"))]), Dict{String,Any}("name" => Dict{String,Any}("value" => "Born in the USA","type" => "string"),"songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Glory Days","type" => "string")), Dict{String,Any}("name" => Dict{String,Any}("value" => "Dancing in the Dark","type" => "string"))])])
\ No newline at end of file
+Dict{String,Any}("albums" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Born to Run","type" => "string"),"songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Jungleland","type" => "string")), Dict{String,Any}("name" => Dict{String,Any}("value" => "Meeting Across the River","type" => "string"))]), Dict{String,Any}("name" => Dict{String,Any}("value" => "Born in the USA","type" => "string"),"songs" => Any[Dict{String,Any}("name" => Dict{String,Any}("value" => "Glory Days","type" => "string")), Dict{String,Any}("name" => Dict{String,Any}("value" => "Dancing in the Dark","type" => "string"))])])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-one.jl b/stdlib/TOML/test/testfiles/valid/table-array-one.jl
index 26f1597e0696bb..830e3af323fc76 100644
--- a/stdlib/TOML/test/testfiles/valid/table-array-one.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-array-one.jl
@@ -1 +1 @@
-Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string"))])
\ No newline at end of file
+Dict{String,Any}("people" => Any[Dict{String,Any}("first_name" => Dict{String,Any}("value" => "Bruce","type" => "string"),"last_name" => Dict{String,Any}("value" => "Springsteen","type" => "string"))])
diff --git a/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl b/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl
index 536330b3afe5e1..d379c1d3daca78 100644
--- a/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-array-table-array.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Any[Dict{String,Any}("b" => Any[Dict{String,Any}("c" => Dict{String,Any}("d" => Dict{String,Any}("value" => "val0","type" => "string"))), Dict{String,Any}("c" => Dict{String,Any}("d" => Dict{String,Any}("value" => "val1","type" => "string")))])])
\ No newline at end of file
+Dict{String,Any}("a" => Any[Dict{String,Any}("b" => Any[Dict{String,Any}("c" => Dict{String,Any}("d" => Dict{String,Any}("value" => "val0","type" => "string"))), Dict{String,Any}("c" => Dict{String,Any}("d" => Dict{String,Any}("value" => "val1","type" => "string")))])])
diff --git a/stdlib/TOML/test/testfiles/valid/table-empty.jl b/stdlib/TOML/test/testfiles/valid/table-empty.jl
index 8ed753e5f5e577..a62b1dc36cdf34 100644
--- a/stdlib/TOML/test/testfiles/valid/table-empty.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-empty.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}())
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}())
diff --git a/stdlib/TOML/test/testfiles/valid/table-no-eol.jl b/stdlib/TOML/test/testfiles/valid/table-no-eol.jl
index e9014a1e55ee84..4a103a5e13f549 100644
--- a/stdlib/TOML/test/testfiles/valid/table-no-eol.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-no-eol.jl
@@ -1 +1 @@
-Dict{String,Any}("table" => Dict{String,Any}())
\ No newline at end of file
+Dict{String,Any}("table" => Dict{String,Any}())
diff --git a/stdlib/TOML/test/testfiles/valid/table-no-eol.toml b/stdlib/TOML/test/testfiles/valid/table-no-eol.toml
index 741b2d1c2056a7..f1098fdacaa271 100644
--- a/stdlib/TOML/test/testfiles/valid/table-no-eol.toml
+++ b/stdlib/TOML/test/testfiles/valid/table-no-eol.toml
@@ -1 +1 @@
-[table]
\ No newline at end of file
+[table]
diff --git a/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl b/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl
index ced2225a6cd909..448cd9237d7d0a 100644
--- a/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-sub-empty.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}()))
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}()))
diff --git a/stdlib/TOML/test/testfiles/valid/table-whitespace.jl b/stdlib/TOML/test/testfiles/valid/table-whitespace.jl
index 7a74b1b6b0fa65..1af4cc9cb98e80 100644
--- a/stdlib/TOML/test/testfiles/valid/table-whitespace.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-whitespace.jl
@@ -1 +1 @@
-Dict{String,Any}("valid key" => Dict{String,Any}())
\ No newline at end of file
+Dict{String,Any}("valid key" => Dict{String,Any}())
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl b/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl
index b4ea19cf15d48a..7157a1b75e6ea6 100644
--- a/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-with-literal-string.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}("\"b\"" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}("\"b\"" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-pound.jl b/stdlib/TOML/test/testfiles/valid/table-with-pound.jl
index d95d29b2e7eaed..d1c99bb09e8ab0 100644
--- a/stdlib/TOML/test/testfiles/valid/table-with-pound.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-with-pound.jl
@@ -1 +1 @@
-Dict{String,Any}("key#group" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))
\ No newline at end of file
+Dict{String,Any}("key#group" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))
diff --git a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl b/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl
index 2fa2c2156bb676..5481705ddbc4e0 100644
--- a/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl
+++ b/stdlib/TOML/test/testfiles/valid/table-with-single-quotes.jl
@@ -1 +1 @@
-Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
\ No newline at end of file
+Dict{String,Any}("a" => Dict{String,Any}("b" => Dict{String,Any}("c" => Dict{String,Any}("answer" => Dict{String,Any}("value" => "42","type" => "integer")))))
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-float.jl b/stdlib/TOML/test/testfiles/valid/underscored-float.jl
index 7ee220ca0cf8b0..420cefd96e481f 100644
--- a/stdlib/TOML/test/testfiles/valid/underscored-float.jl
+++ b/stdlib/TOML/test/testfiles/valid/underscored-float.jl
@@ -1 +1 @@
-Dict{String,Any}("electron_mass" => Dict{String,Any}("value" => "9.109109383e-31","type" => "float"))
\ No newline at end of file
+Dict{String,Any}("electron_mass" => Dict{String,Any}("value" => "9.109109383e-31","type" => "float"))
diff --git a/stdlib/TOML/test/testfiles/valid/underscored-integer.jl b/stdlib/TOML/test/testfiles/valid/underscored-integer.jl
index 0aa27784aba48c..4fb9d43398a9ca 100644
--- a/stdlib/TOML/test/testfiles/valid/underscored-integer.jl
+++ b/stdlib/TOML/test/testfiles/valid/underscored-integer.jl
@@ -1 +1 @@
-Dict{String,Any}("million" => Dict{String,Any}("value" => "1000000","type" => "integer"))
\ No newline at end of file
+Dict{String,Any}("million" => Dict{String,Any}("value" => "1000000","type" => "integer"))
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-escape.jl b/stdlib/TOML/test/testfiles/valid/unicode-escape.jl
index a2e66db0d51e99..d773bc04b9ce55 100644
--- a/stdlib/TOML/test/testfiles/valid/unicode-escape.jl
+++ b/stdlib/TOML/test/testfiles/valid/unicode-escape.jl
@@ -1 +1 @@
-Dict{String,Any}("answer8" => Dict{String,Any}("value" => "δ","type" => "string"),"answer4" => Dict{String,Any}("value" => "δ","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("answer8" => Dict{String,Any}("value" => "δ","type" => "string"),"answer4" => Dict{String,Any}("value" => "δ","type" => "string"))
diff --git a/stdlib/TOML/test/testfiles/valid/unicode-literal.jl b/stdlib/TOML/test/testfiles/valid/unicode-literal.jl
index bdcb5d4cf0ea0c..675b94774c3439 100644
--- a/stdlib/TOML/test/testfiles/valid/unicode-literal.jl
+++ b/stdlib/TOML/test/testfiles/valid/unicode-literal.jl
@@ -1 +1 @@
-Dict{String,Any}("answer" => Dict{String,Any}("value" => "δ","type" => "string"))
\ No newline at end of file
+Dict{String,Any}("answer" => Dict{String,Any}("value" => "δ","type" => "string"))
diff --git a/stdlib/TOML/test/toml_test.jl b/stdlib/TOML/test/toml_test.jl
index 3a8d5062ce6fc7..45fbd20dbcdaba 100644
--- a/stdlib/TOML/test/toml_test.jl
+++ b/stdlib/TOML/test/toml_test.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using TOML
 
 using Test
diff --git a/stdlib/TOML/test/utils/convert_json_to_jl.jl b/stdlib/TOML/test/utils/convert_json_to_jl.jl
index d3614ca453658d..00d4fac69084b6 100644
--- a/stdlib/TOML/test/utils/convert_json_to_jl.jl
+++ b/stdlib/TOML/test/utils/convert_json_to_jl.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # This converts the ground-truth JSON files to the Julia repr format so
 # we can use that without requiring a JSON parser during testing.
 
@@ -14,4 +16,4 @@ function convert_json_files()
             write(splitext(file)[1] * ".jl", d_jl)
         end
     end
-end
\ No newline at end of file
+end
diff --git a/stdlib/TOML/test/values.jl b/stdlib/TOML/test/values.jl
index 00d19bb2311e78..8337bb5a547148 100644
--- a/stdlib/TOML/test/values.jl
+++ b/stdlib/TOML/test/values.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 using TOML
 using TOML: Internals
@@ -37,6 +39,7 @@ end
     @test testval("1.0e0"       , 1.0)
     @test testval("1.0e+0"      , 1.0)
     @test testval("1.0e-0"      , 1.0)
+    @test testval("0e-3"        , 0.0)
     @test testval("1.001e-0"    , 1.001)
     @test testval("2e10"        , 2e10)
     @test testval("2e+10"       , 2e10)
@@ -51,8 +54,8 @@ end
     @test testval("+1_000" , 1000  |> Int64)
     @test testval("-1_000" , -1000 |> Int64)
 
-    @test failval("0_"     , Internals.ErrLeadingZeroNotAllowedInteger)
-    @test failval("0__0"   , Internals.ErrLeadingZeroNotAllowedInteger)
+    @test failval("0_"     , Internals.ErrUnderscoreNotSurroundedByDigits)
+    @test failval("0__0"   , Internals.ErrUnderscoreNotSurroundedByDigits)
     @test failval("__0"    , Internals.ErrUnexpectedStartOfValue)
     @test failval("1_0_"   , Internals.ErrTrailingUnderscoreNumber)
     @test failval("1_0__0" , Internals.ErrUnderscoreNotSurroundedByDigits)
diff --git a/stdlib/Tar.version b/stdlib/Tar.version
new file mode 100644
index 00000000000000..7ba08fd461f885
--- /dev/null
+++ b/stdlib/Tar.version
@@ -0,0 +1,4 @@
+TAR_BRANCH = master
+TAR_SHA1 = 56062695b92920c8b75e997fb0c8c3b015d04b78
+TAR_GIT_URL := https://github.com/JuliaIO/Tar.jl.git
+TAR_TAR_URL = https://api.github.com/repos/JuliaIO/Tar.jl/tarball/$1
diff --git a/stdlib/Test/Project.toml b/stdlib/Test/Project.toml
index fcb95bcfc99525..ee1ae15fd71547 100644
--- a/stdlib/Test/Project.toml
+++ b/stdlib/Test/Project.toml
@@ -2,7 +2,13 @@ name = "Test"
 uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [deps]
-Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
 InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
 Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+Serialization = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
+
+[extras]
+Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b"
+
+[targets]
+test = ["Distributed"]
diff --git a/stdlib/Test/docs/src/index.md b/stdlib/Test/docs/src/index.md
index 8c22c3ead20dc1..077d3505547753 100644
--- a/stdlib/Test/docs/src/index.md
+++ b/stdlib/Test/docs/src/index.md
@@ -19,7 +19,8 @@ Base.runtests
 The `Test` module provides simple *unit testing* functionality. Unit testing is a way to
 see if your code is correct by checking that the results are what you expect. It can be helpful
 to ensure your code still works after you make changes, and can be used when developing as a way
-of specifying the behaviors your code should have when complete.
+of specifying the behaviors your code should have when complete. You may also want to look at the
+documentation for [adding tests to your Julia Package](https://pkgdocs.julialang.org/dev/creating-packages/#Adding-tests-to-the-package).
 
 Simple unit testing can be performed with the `@test` and `@test_throws` macros:
 
@@ -102,23 +103,24 @@ or could not be evaluated due to an error, the test set will then throw a `TestS
 
 ```@docs
 Test.@testset
+Test.TestSetException
 ```
 
 We can put our tests for the `foo(x)` function in a test set:
 
-```jldoctest testfoo
+```jldoctest testfoo; filter = r"[0-9\.]+s"
 julia> @testset "Foo Tests" begin
            @test foo("a")   == 1
            @test foo("ab")  == 4
            @test foo("abc") == 9
        end;
-Test Summary: | Pass  Total
-Foo Tests     |    3      3
+Test Summary: | Pass  Total  Time
+Foo Tests     |    3      3  0.0s
 ```
 
 Test sets can also be nested:
 
-```jldoctest testfoo
+```jldoctest testfoo; filter = r"[0-9\.]+s"
 julia> @testset "Foo Tests" begin
            @testset "Animals" begin
                @test foo("cat") == 9
@@ -129,14 +131,49 @@ julia> @testset "Foo Tests" begin
                @test foo(fill(1.0, i)) == i^2
            end
        end;
-Test Summary: | Pass  Total
-Foo Tests     |    8      8
+Test Summary: | Pass  Total  Time
+Foo Tests     |    8      8  0.0s
 ```
 
+As well as call functions:
+
+```jldoctest testfoo; filter = r"[0-9\.]+s"
+julia> f(x) = @test isone(x)
+f (generic function with 1 method)
+
+julia> @testset f(1);
+Test Summary: | Pass  Total  Time
+f             |    1      1  0.0s
+```
+
+This can be used to allow for factorization of test sets, making it easier to run individual
+test sets by running the associated functions instead.
+Note that in the case of functions, the test set will be given the name of the called function.
 In the event that a nested test set has no failures, as happened here, it will be hidden in the
-summary. If we do have a test failure, only the details for the failed test sets will be shown:
+summary, unless the `verbose=true` option is passed:
 
-```julia-repl
+```jldoctest testfoo; filter = r"[0-9\.]+s"
+julia> @testset verbose = true "Foo Tests" begin
+           @testset "Animals" begin
+               @test foo("cat") == 9
+               @test foo("dog") == foo("cat")
+           end
+           @testset "Arrays $i" for i in 1:3
+               @test foo(zeros(i)) == i^2
+               @test foo(fill(1.0, i)) == i^2
+           end
+       end;
+Test Summary: | Pass  Total  Time
+Foo Tests     |    8      8  0.0s
+  Animals     |    2      2  0.0s
+  Arrays 1    |    2      2  0.0s
+  Arrays 2    |    2      2  0.0s
+  Arrays 3    |    2      2  0.0s
+```
+
+If we do have a test failure, only the details for the failed test sets will be shown:
+
+```julia-repl; filter = r"[0-9\.]+s"
 julia> @testset "Foo Tests" begin
            @testset "Animals" begin
                @testset "Felines" begin
@@ -156,13 +193,23 @@ Arrays: Test Failed
   Expression: foo(fill(1.0, 4)) == 15
    Evaluated: 16 == 15
 [...]
-Test Summary: | Pass  Fail  Total
-Foo Tests     |    3     1      4
-  Animals     |    2            2
-  Arrays      |    1     1      2
+Test Summary: | Pass  Fail  Total  Time
+Foo Tests     |    3     1      4  0.0s
+  Animals     |    2            2  0.0s
+  Arrays      |    1     1      2  0.0s
 ERROR: Some tests did not pass: 3 passed, 1 failed, 0 errored, 0 broken.
 ```
 
+## Testing Log Statements
+
+One can use the [`@test_logs`](@ref) macro to test log statements, or use a [`TestLogger`](@ref).
+
+```@docs
+Test.@test_logs
+Test.TestLogger
+Test.LogRecord
+```
+
 ## Other Test Macros
 
 As calculations on floating-point values can be imprecise, you can perform approximate equality
@@ -179,10 +226,16 @@ Test Failed at none:1
    Evaluated: 1 ≈ 0.999999
 ERROR: There was an error during testing
 ```
+You can specify relative and absolute tolerances by setting the `rtol` and `atol` keyword arguments of `isapprox`, respectively,
+after the `≈` comparison:
+```jldoctest
+julia> @test 1 ≈ 0.999999  rtol=1e-5
+Test Passed
+```
+Note that this is not a specific feature of the `≈` but rather a general feature of the `@test` macro: `@test a <op> b key=val` is transformed by the macro into `@test op(a, b, key=val)`. It is, however, particularly useful for `≈` tests.
 
 ```@docs
 Test.@inferred
-Test.@test_logs
 Test.@test_deprecated
 Test.@test_warn
 Test.@test_nowarn
@@ -267,6 +320,18 @@ And using that testset looks like:
 end
 ```
 
+## Test utilities
+
+```@docs
+Test.GenericArray
+Test.GenericDict
+Test.GenericOrder
+Test.GenericSet
+Test.GenericString
+Test.detect_ambiguities
+Test.detect_unbound_args
+```
+
 ```@meta
 DocTestSetup = nothing
 ```
diff --git a/stdlib/Test/src/Test.jl b/stdlib/Test/src/Test.jl
index 7518a8aeb10127..18082aa5038572 100644
--- a/stdlib/Test/src/Test.jl
+++ b/stdlib/Test/src/Test.jl
@@ -22,13 +22,13 @@ export @inferred
 export detect_ambiguities, detect_unbound_args
 export GenericString, GenericSet, GenericDict, GenericArray, GenericOrder
 export TestSetException
-
-import Distributed: myid
+export TestLogger, LogRecord
 
 using Random
 using Random: AbstractRNG, default_rng
 using InteractiveUtils: gen_call_with_extracted_types
-using Core.Compiler: typesubtract
+using Base: typesplit
+using Serialization: Serialization
 
 const DISPLAY_FAILED = (
     :isequal,
@@ -38,6 +38,7 @@ const DISPLAY_FAILED = (
     :startswith,
     :endswith,
     :isempty,
+    :contains
 )
 
 #-----------------------------------------------------------------------
@@ -60,7 +61,7 @@ function scrub_backtrace(bt)
 end
 
 function scrub_exc_stack(stack)
-    return Any[ (x[1], scrub_backtrace(x[2])) for x in stack ]
+    return Any[ (x[1], scrub_backtrace(x[2]::Vector{Union{Ptr{Nothing},Base.InterpreterIP}})) for x in stack ]
 end
 
 # define most of the test infrastructure without type specialization
@@ -85,19 +86,22 @@ struct Pass <: Result
     orig_expr
     data
     value
+    source::Union{Nothing,LineNumberNode}
+    message_only::Bool
+    function Pass(test_type::Symbol, orig_expr, data, thrown, source::Union{Nothing,LineNumberNode}=nothing, message_only::Bool=false)
+        return new(test_type, orig_expr, data, thrown, source, message_only)
+    end
 end
+
 function Base.show(io::IO, t::Pass)
     printstyled(io, "Test Passed"; bold = true, color=:green)
-    if !(t.orig_expr === nothing)
-        print(io, "\n  Expression: ", t.orig_expr)
-    end
     if t.test_type === :test_throws
         # The correct type of exception was thrown
-        print(io, "\n      Thrown: ", typeof(t.value))
-    elseif t.test_type === :test && t.data !== nothing
-        # The test was an expression, so display the term-by-term
-        # evaluated version as well
-        print(io, "\n   Evaluated: ", t.data)
+        if t.message_only
+            print(io, "\n     Message: ", t.value)
+        else
+            print(io, "\n      Thrown: ", typeof(t.value))
+        end
     end
 end
 
@@ -107,30 +111,46 @@ end
 The test condition was false, i.e. the expression evaluated to false or
 the correct exception was not thrown.
 """
-mutable struct Fail <: Result
+struct Fail <: Result
     test_type::Symbol
-    orig_expr
-    data
-    value
+    orig_expr::String
+    data::Union{Nothing, String}
+    value::String
     source::LineNumberNode
+    message_only::Bool
+    function Fail(test_type::Symbol, orig_expr, data, value, source::LineNumberNode, message_only::Bool=false)
+        return new(test_type,
+            string(orig_expr),
+            data === nothing ? nothing : string(data),
+            string(isa(data, Type) ? typeof(value) : value),
+            source,
+            message_only)
+    end
 end
+
 function Base.show(io::IO, t::Fail)
     printstyled(io, "Test Failed"; bold=true, color=Base.error_color())
     print(io, " at ")
     printstyled(io, something(t.source.file, :none), ":", t.source.line, "\n"; bold=true, color=:default)
     print(io, "  Expression: ", t.orig_expr)
+    value, data = t.value, t.data
     if t.test_type === :test_throws_wrong
         # An exception was thrown, but it was of the wrong type
-        print(io, "\n    Expected: ", t.data)
-        print(io, "\n      Thrown: ", isa(t.data, Type) ? typeof(t.value) : t.value)
+        if t.message_only
+            print(io, "\n    Expected: ", data)
+            print(io, "\n     Message: ", value)
+        else
+            print(io, "\n    Expected: ", data)
+            print(io, "\n      Thrown: ", value)
+        end
     elseif t.test_type === :test_throws_nothing
         # An exception was expected, but no exception was thrown
-        print(io, "\n    Expected: ", t.data)
+        print(io, "\n    Expected: ", data)
         print(io, "\n  No exception thrown")
-    elseif t.test_type === :test && t.data !== nothing
+    elseif t.test_type === :test && data !== nothing
         # The test was an expression, so display the term-by-term
         # evaluated version as well
-        print(io, "\n   Evaluated: ", t.data)
+        print(io, "\n   Evaluated: ", data)
     end
 end
 
@@ -142,29 +162,49 @@ it evaluated to something other than a [`Bool`](@ref).
 In the case of `@test_broken` it is used to indicate that an
 unexpected `Pass` `Result` occurred.
 """
-mutable struct Error <: Result
+struct Error <: Result
     test_type::Symbol
-    orig_expr
-    value
-    backtrace
+    orig_expr::String
+    value::String
+    backtrace::String
     source::LineNumberNode
 
-    function Error(test_type, orig_expr, value, bt, source)
+    function Error(test_type::Symbol, orig_expr, value, bt, source::LineNumberNode)
         if test_type === :test_error
             bt = scrub_exc_stack(bt)
         end
         if test_type === :test_error || test_type === :nontest_error
-            bt_str = sprint(Base.show_exception_stack, bt; context=stdout)
+            bt_str = try # try the latest world for this, since we might have eval'd new code for show
+                    Base.invokelatest(sprint, Base.show_exception_stack, bt; context=stdout)
+                catch ex
+                    "#=ERROR showing exception stack=# " *
+                        try
+                            sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                        catch
+                            "of type " * string(typeof(ex))
+                        end
+                end
         else
             bt_str = ""
         end
-        new(test_type,
-            orig_expr,
-            sprint(show, value, context = :limit => true),
+        value = try # try the latest world for this, since we might have eval'd new code for show
+                Base.invokelatest(sprint, show, value, context = :limit => true)
+            catch ex
+                "#=ERROR showing error of type " * string(typeof(value)) * "=# " *
+                    try
+                        sprint(Base.showerror, ex, catch_backtrace(); context=stdout)
+                    catch
+                        "of type " * string(typeof(ex))
+                    end
+            end
+        return new(test_type,
+            string(orig_expr),
+            value,
             bt_str,
             source)
     end
 end
+
 function Base.show(io::IO, t::Error)
     if t.test_type === :test_interrupted
         printstyled(io, "Interrupted", color=Base.error_color())
@@ -201,10 +241,11 @@ end
 The test condition is the expected (failed) result of a broken test,
 or was explicitly skipped with `@test_skip`.
 """
-mutable struct Broken <: Result
+struct Broken <: Result
     test_type::Symbol
     orig_expr
 end
+
 function Base.show(io::IO, t::Broken)
     printstyled(io, "Test Broken\n"; bold=true, color=Base.warn_color())
     if t.test_type === :skipped && !(t.orig_expr === nothing)
@@ -214,6 +255,27 @@ function Base.show(io::IO, t::Broken)
     end
 end
 
+# Types that appear in TestSetException.errors_and_fails we convert eagerly into strings
+# other types we convert lazily
+function Serialization.serialize(s::Serialization.AbstractSerializer, t::Pass)
+    Serialization.serialize_type(s, typeof(t))
+    Serialization.serialize(s, t.test_type)
+    Serialization.serialize(s, t.orig_expr === nothing ? nothing : string(t.orig_expr))
+    Serialization.serialize(s, t.data === nothing ? nothing : string(t.data))
+    Serialization.serialize(s, string(t.value))
+    Serialization.serialize(s, t.source === nothing ? nothing : t.source)
+    Serialization.serialize(s, t.message_only)
+    nothing
+end
+
+function Serialization.serialize(s::Serialization.AbstractSerializer, t::Broken)
+    Serialization.serialize_type(s, typeof(t))
+    Serialization.serialize(s, t.test_type)
+    Serialization.serialize(s, t.orig_expr === nothing ? nothing : string(t.orig_expr))
+    nothing
+end
+
+
 #-----------------------------------------------------------------------
 
 abstract type ExecutionResult end
@@ -226,23 +288,23 @@ end
 
 struct Threw <: ExecutionResult
     exception
-    backtrace
+    backtrace::Union{Nothing,Vector{Any}}
     source::LineNumberNode
 end
 
 function eval_test(evaluated::Expr, quoted::Expr, source::LineNumberNode, negate::Bool=false)
-    res = true
-    i = 1
     evaled_args = evaluated.args
     quoted_args = quoted.args
     n = length(evaled_args)
     kw_suffix = ""
     if evaluated.head === :comparison
         args = evaled_args
+        res = true
+        i = 1
         while i < n
             a, op, b = args[i], args[i+1], args[i+2]
             if res
-                res = op(a, b) === true  # Keep `res` type stable
+                res = op(a, b)
             end
             quoted_args[i] = a
             quoted_args[i+2] = b
@@ -251,14 +313,14 @@ function eval_test(evaluated::Expr, quoted::Expr, source::LineNumberNode, negate
 
     elseif evaluated.head === :call
         op = evaled_args[1]
-        kwargs = evaled_args[2].args  # Keyword arguments from `Expr(:parameters, ...)`
+        kwargs = (evaled_args[2]::Expr).args  # Keyword arguments from `Expr(:parameters, ...)`
         args = evaled_args[3:n]
 
-        res = op(args...; kwargs...) === true
+        res = op(args...; kwargs...)
 
         # Create "Evaluated" expression which looks like the original call but has all of
         # the arguments evaluated
-        func_sym = quoted_args[1]
+        func_sym = quoted_args[1]::Union{Symbol,Expr}
         if isempty(kwargs)
             quoted = Expr(:call, func_sym, args...)
         elseif func_sym === :≈ && !res
@@ -279,7 +341,7 @@ function eval_test(evaluated::Expr, quoted::Expr, source::LineNumberNode, negate
 
     Returned(res,
              # stringify arguments in case of failure, for easy remote printing
-             res ? quoted : sprint(io->print(IOContext(io, :limit => true), quoted))*kw_suffix,
+             res === true ? quoted : sprint(print, quoted, context=(:limit => true)) * kw_suffix,
              source)
 end
 
@@ -309,10 +371,13 @@ end
 """
     @test ex
     @test f(args...) key=val ...
+    @test ex broken=true
+    @test ex skip=true
 
-Tests that the expression `ex` evaluates to `true`.
-Returns a `Pass` `Result` if it does, a `Fail` `Result` if it is
+Test that the expression `ex` evaluates to `true`.
+If executed inside a `@testset`, return a `Pass` `Result` if it does, a `Fail` `Result` if it is
 `false`, and an `Error` `Result` if it could not be evaluated.
+If executed outside a `@testset`, throw an exception instead of returning `Fail` or `Error`.
 
 # Examples
 ```jldoctest
@@ -335,12 +400,71 @@ Test Passed
 This is equivalent to the uglier test `@test ≈(π, 3.14, atol=0.01)`.
 It is an error to supply more than one expression unless the first
 is a call expression and the rest are assignments (`k=v`).
+
+You can use any key for the `key=val` arguments, except for `broken` and `skip`,
+which have special meanings in the context of `@test`:
+
+* `broken=cond` indicates a test that should pass but currently consistently
+  fails when `cond==true`.  Tests that the expression `ex` evaluates to `false`
+  or causes an exception.  Returns a `Broken` `Result` if it does, or an `Error`
+  `Result` if the expression evaluates to `true`.  Regular `@test ex` is
+  evaluated when `cond==false`.
+* `skip=cond` marks a test that should not be executed but should be included in
+  test summary reporting as `Broken`, when `cond==true`.  This can be useful for
+  tests that intermittently fail, or tests of not-yet-implemented functionality.
+  Regular `@test ex` is evaluated when `cond==false`.
+
+# Examples
+
+```jldoctest
+julia> @test 2 + 2 ≈ 6 atol=1 broken=true
+Test Broken
+  Expression: ≈(2 + 2, 6, atol = 1)
+
+julia> @test 2 + 2 ≈ 5 atol=1 broken=false
+Test Passed
+
+julia> @test 2 + 2 == 5 skip=true
+Test Broken
+  Skipped: 2 + 2 == 5
+
+julia> @test 2 + 2 == 4 skip=false
+Test Passed
+```
+
+!!! compat "Julia 1.7"
+     The `broken` and `skip` keyword arguments require at least Julia 1.7.
 """
 macro test(ex, kws...)
+    # Collect the broken/skip keywords and remove them from the rest of keywords
+    broken = [kw.args[2] for kw in kws if kw.args[1] === :broken]
+    skip = [kw.args[2] for kw in kws if kw.args[1] === :skip]
+    kws = filter(kw -> kw.args[1] ∉ (:skip, :broken), kws)
+    # Validation of broken/skip keywords
+    for (kw, name) in ((broken, :broken), (skip, :skip))
+        if length(kw) > 1
+            error("invalid test macro call: cannot set $(name) keyword multiple times")
+        end
+    end
+    if length(skip) > 0 && length(broken) > 0
+        error("invalid test macro call: cannot set both skip and broken keywords")
+    end
+
+    # Build the test expression
     test_expr!("@test", ex, kws...)
     orig_ex = Expr(:inert, ex)
+
     result = get_test_result(ex, __source__)
-    :(do_test($result, $orig_ex))
+
+    return quote
+        if $(length(skip) > 0 && esc(skip[1]))
+            record(get_testset(), Broken(:skipped, $orig_ex))
+        else
+            let _do = $(length(broken) > 0 && esc(broken[1])) ? do_broken_test : do_test
+                _do($result, $orig_ex)
+            end
+        end
+    end
 end
 
 """
@@ -350,7 +474,8 @@ end
 Indicates a test that should pass but currently consistently fails.
 Tests that the expression `ex` evaluates to `false` or causes an
 exception. Returns a `Broken` `Result` if it does, or an `Error` `Result`
-if the expression evaluates to `true`.
+if the expression evaluates to `true`.  This is equivalent to
+[`@test ex broken=true`](@ref @test).
 
 The `@test_broken f(args...) key=val...` form works as for the `@test` macro.
 
@@ -379,7 +504,8 @@ end
 
 Marks a test that should not be executed but should be included in test
 summary reporting as `Broken`. This can be useful for tests that intermittently
-fail, or tests of not-yet-implemented functionality.
+fail, or tests of not-yet-implemented functionality.  This is equivalent to
+[`@test ex skip=true`](@ref @test).
 
 The `@test_skip f(args...) key=val...` form works as for the `@test` macro.
 
@@ -420,6 +546,12 @@ function get_test_result(ex, source)
         first(string(ex.args[1])) != '.' && !is_splat(ex.args[2]) && !is_splat(ex.args[3]) &&
         (ex.args[1] === :(==) || Base.operator_precedence(ex.args[1]) == comparison_prec)
         ex = Expr(:comparison, ex.args[2], ex.args[1], ex.args[3])
+
+    # Mark <: and >: as :comparison expressions
+    elseif isa(ex, Expr) && length(ex.args) == 2 &&
+        !is_splat(ex.args[1]) && !is_splat(ex.args[2]) &&
+        Base.operator_precedence(ex.head) == comparison_prec
+        ex = Expr(:comparison, ex.args[1], ex.head, ex.args[2])
     end
     if isa(ex, Expr) && ex.head === :comparison
         # pass all terms of the comparison to `eval_comparison`, as an Expr
@@ -454,6 +586,10 @@ function get_test_result(ex, source)
                     push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a.args[1]), esc(a.args[2])))
                 elseif isa(a, Expr) && a.head === :...
                     push!(escaped_kwargs, Expr(:..., esc(a.args[1])))
+                elseif isa(a, Expr) && a.head === :.
+                    push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a.args[2].value), esc(Expr(:., a.args[1], QuoteNode(a.args[2].value)))))
+                elseif isa(a, Symbol)
+                    push!(escaped_kwargs, Expr(:call, :(=>), QuoteNode(a), esc(a)))
                 end
             end
         end
@@ -483,7 +619,7 @@ function get_test_result(ex, source)
             $testret
         catch _e
             _e isa InterruptException && rethrow()
-            Threw(_e, Base.catch_stack(), $(QuoteNode(source)))
+            Threw(_e, Base.current_exceptions(), $(QuoteNode(source)))
         end
     end
     Base.remove_linenums!(result)
@@ -504,7 +640,7 @@ function do_test(result::ExecutionResult, orig_expr)
         value = result.value
         testres = if isa(value, Bool)
             # a true value Passes
-            value ? Pass(:test, nothing, nothing, value) :
+            value ? Pass(:test, orig_expr, result.data, value, result.source) :
                     Fail(:test, orig_expr, result.data, value, result.source)
         else
             # If the result is non-Boolean, this counts as an Error
@@ -514,8 +650,9 @@ function do_test(result::ExecutionResult, orig_expr)
         # The predicate couldn't be evaluated without throwing an
         # exception, so that is an Error and not a Fail
         @assert isa(result, Threw)
-        testres = Error(:test_error, orig_expr, result.exception, result.backtrace, result.source)
+        testres = Error(:test_error, orig_expr, result.exception, result.backtrace::Vector{Any}, result.source)
     end
+    isa(testres, Pass) || trigger_test_failure_break(result)
     record(get_testset(), testres)
 end
 
@@ -538,6 +675,8 @@ end
 
 Tests that the expression `expr` throws `exception`.
 The exception may specify either a type,
+a string, regular expression, or list of strings occurring in the displayed error message,
+a matching function,
 or a value (which will be tested for equality by comparing fields).
 Note that `@test_throws` does not support a trailing keyword form.
 
@@ -550,7 +689,17 @@ Test Passed
 julia> @test_throws DimensionMismatch [1, 2, 3] + [1, 2]
 Test Passed
       Thrown: DimensionMismatch
+
+julia> @test_throws "Try sqrt(Complex" sqrt(-1)
+Test Passed
+     Message: "DomainError with -1.0:\\nsqrt will only return a complex result if called with a complex argument. Try sqrt(Complex(x))."
 ```
+
+In the final example, instead of matching a single string it could alternatively have been performed with:
+
+- `["Try", "Complex"]` (a list of strings)
+- `r"Try sqrt\\([Cc]omplex"` (a regular expression)
+- `str -> occursin("complex", str)` (a matching function)
 """
 macro test_throws(extype, ex)
     orig_ex = Expr(:inert, ex)
@@ -568,16 +717,34 @@ macro test_throws(extype, ex)
     :(do_test_throws($result, $orig_ex, $(esc(extype))))
 end
 
+const MACROEXPAND_LIKE = Symbol.(("@macroexpand", "@macroexpand1", "macroexpand"))
+
 # An internal function, called by the code generated by @test_throws
 # to evaluate and catch the thrown exception - if it exists
 function do_test_throws(result::ExecutionResult, orig_expr, extype)
     if isa(result, Threw)
         # Check that the right type of exception was thrown
         success = false
+        message_only = false
         exc = result.exception
+        # NB: Throwing LoadError from macroexpands is deprecated, but in order to limit
+        # the breakage in package tests we add extra logic here.
+        from_macroexpand =
+            orig_expr isa Expr &&
+            orig_expr.head in (:call, :macrocall) &&
+            orig_expr.args[1] in MACROEXPAND_LIKE
         if isa(extype, Type)
-            success = isa(exc, extype)
-        else
+            success =
+                if from_macroexpand && extype == LoadError && exc isa Exception
+                    Base.depwarn("macroexpand no longer throws a LoadError so `@test_throws LoadError ...` is deprecated and passed without checking the error type!", :do_test_throws)
+                    true
+                else
+                    isa(exc, extype)
+                end
+        elseif isa(extype, Exception) || !isa(exc, Exception)
+            if extype isa LoadError && !(exc isa LoadError) && typeof(extype.error) == typeof(exc)
+                extype = extype.error # deprecated
+            end
             if isa(exc, typeof(extype))
                 success = true
                 for fld in 1:nfields(extype)
@@ -587,11 +754,21 @@ function do_test_throws(result::ExecutionResult, orig_expr, extype)
                     end
                 end
             end
+        else
+            message_only = true
+            exc = sprint(showerror, exc)
+            success = contains_warn(exc, extype)
+            exc = repr(exc)
+            if isa(extype, AbstractString)
+                extype = repr(extype)
+            elseif isa(extype, Function)
+                extype = "< match function >"
+            end
         end
         if success
-            testres = Pass(:test_throws, nothing, nothing, exc)
+            testres = Pass(:test_throws, orig_expr, extype, exc, result.source, message_only)
         else
-            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source)
+            testres = Fail(:test_throws_wrong, orig_expr, extype, exc, result.source, message_only)
         end
     else
         testres = Fail(:test_throws_nothing, orig_expr, extype, nothing, result.source)
@@ -652,7 +829,26 @@ with this macro. Use [`@test_logs`](@ref) instead.
 """
 macro test_nowarn(expr)
     quote
-        @test_warn r"^(?!.)"s $(esc(expr))
+        # Duplicate some code from `@test_warn` to allow printing the content of
+        # `stderr` again to `stderr` here while suppressing it for `@test_warn`.
+        # If that shouldn't be used, it would be possible to just use
+        #     @test_warn isempty $(esc(expr))
+        # here.
+        let fname = tempname()
+            try
+                ret = open(fname, "w") do f
+                    redirect_stderr(f) do
+                        $(esc(expr))
+                    end
+                end
+                stderr_content = read(fname, String)
+                print(stderr, stderr_content) # this is helpful for debugging
+                @test isempty(stderr_content)
+                ret
+            finally
+                rm(fname, force=true)
+            end
+        end
     end
 end
 
@@ -679,9 +875,20 @@ function record end
     finish(ts::AbstractTestSet)
 
 Do any final processing necessary for the given testset. This is called by the
-`@testset` infrastructure after a test block executes. One common use for this
-function is to record the testset to the parent's results list, using
-`get_testset`.
+`@testset` infrastructure after a test block executes.
+
+Custom `AbstractTestSet` subtypes should call `record` on their parent (if there
+is one) to add themselves to the tree of test results. This might be implemented
+as:
+
+```julia
+if get_testset_depth() != 0
+    # Attach this test set to the parent test set
+    parent_ts = get_testset()
+    record(parent_ts, self)
+    return self
+end
+```
 """
 function finish end
 
@@ -721,7 +928,7 @@ struct FallbackTestSet <: AbstractTestSet end
 fallback_testset = FallbackTestSet()
 
 struct FallbackTestSetException <: Exception
-    msg::AbstractString
+    msg::String
 end
 
 function Base.showerror(io::IO, ex::FallbackTestSetException, bt; backtrace=true)
@@ -730,8 +937,8 @@ end
 
 # Records nothing, and throws an error immediately whenever a Fail or
 # Error occurs. Takes no action in the event of a Pass or Broken result
-record(ts::FallbackTestSet, t::Union{Pass,Broken}) = t
-function record(ts::FallbackTestSet, t::Union{Fail,Error})
+record(ts::FallbackTestSet, t::Union{Pass, Broken}) = t
+function record(ts::FallbackTestSet, t::Union{Fail, Error})
     println(t)
     throw(FallbackTestSetException("There was an error during testing"))
 end
@@ -748,12 +955,16 @@ are any `Fail`s or `Error`s, an exception will be thrown only at the end,
 along with a summary of the test results.
 """
 mutable struct DefaultTestSet <: AbstractTestSet
-    description::AbstractString
-    results::Vector
+    description::String
+    results::Vector{Any}
     n_passed::Int
     anynonpass::Bool
+    verbose::Bool
+    showtiming::Bool
+    time_start::Float64
+    time_end::Union{Float64,Nothing}
 end
-DefaultTestSet(desc) = DefaultTestSet(desc, [], 0, false)
+DefaultTestSet(desc::AbstractString; verbose::Bool = false, showtiming::Bool = true) = DefaultTestSet(String(desc)::String, [], 0, false, verbose, showtiming, time(), nothing)
 
 # For a broken result, simply store the result
 record(ts::DefaultTestSet, t::Broken) = (push!(ts.results, t); t)
@@ -763,8 +974,8 @@ record(ts::DefaultTestSet, t::Pass) = (ts.n_passed += 1; t)
 # For the other result types, immediately print the error message
 # but do not terminate. Print a backtrace.
 function record(ts::DefaultTestSet, t::Union{Fail, Error})
-    if myid() == 1
-        printstyled(ts.description, ": ", color=:white)
+    if TESTSET_PRINT_ENABLE[]
+        print(ts.description, ": ")
         # don't print for interrupted tests
         if !(t isa Error) || t.test_type !== :test_interrupted
             print(t)
@@ -775,7 +986,6 @@ function record(ts::DefaultTestSet, t::Union{Fail, Error})
         end
     end
     push!(ts.results, t)
-    isa(t, Error) || backtrace()
     return t
 end
 
@@ -788,9 +998,9 @@ record(ts::DefaultTestSet, t::AbstractTestSet) = push!(ts.results, t)
 
 function print_test_errors(ts::DefaultTestSet)
     for t in ts.results
-        if (isa(t, Error) || isa(t, Fail)) && myid() == 1
+        if isa(t, Error) || isa(t, Fail)
             println("Error in testset $(ts.description):")
-            Base.show(stdout,t)
+            show(t)
             println()
         elseif isa(t, DefaultTestSet)
             print_test_errors(t)
@@ -801,7 +1011,7 @@ end
 function print_test_results(ts::DefaultTestSet, depth_pad=0)
     # Calculate the overall number for each type so each of
     # the test result types are aligned
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = get_test_counts(ts)
+    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
     total_pass   = passes + c_passes
     total_fail   = fails  + c_fails
     total_error  = errors + c_errors
@@ -819,12 +1029,13 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
     error_width  = dig_error  > 0 ? max(length("Error"),  dig_error)  : 0
     broken_width = dig_broken > 0 ? max(length("Broken"), dig_broken) : 0
     total_width  = dig_total  > 0 ? max(length("Total"),  dig_total)  : 0
+    duration_width = max(length("Time"), length(duration))
     # Calculate the alignment of the test result counts by
     # recursively walking the tree of test sets
     align = max(get_alignment(ts, 0), length("Test Summary:"))
     # Print the outer test set header once
     pad = total == 0 ? "" : " "
-    printstyled(rpad("Test Summary:", align, " "), " |", pad; bold=true, color=:white)
+    printstyled(rpad("Test Summary:", align, " "), " |", pad; bold=true)
     if pass_width > 0
         printstyled(lpad("Pass", pass_width, " "), "  "; bold=true, color=:green)
     end
@@ -838,11 +1049,14 @@ function print_test_results(ts::DefaultTestSet, depth_pad=0)
         printstyled(lpad("Broken", broken_width, " "), "  "; bold=true, color=Base.warn_color())
     end
     if total_width > 0
-        printstyled(lpad("Total", total_width, " "); bold=true, color=Base.info_color())
+        printstyled(lpad("Total", total_width, " "), "  "; bold=true, color=Base.info_color())
+    end
+    if ts.showtiming
+        printstyled(lpad("Time", duration_width, " "); bold=true)
     end
     println()
     # Recursively print a summary at every level
-    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width)
+    print_counts(ts, depth_pad, align, pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
 end
 
 
@@ -851,6 +1065,7 @@ const TESTSET_PRINT_ENABLE = Ref(true)
 # Called at the end of a @testset, behaviour depends on whether
 # this is a child of another testset, or the "root" testset
 function finish(ts::DefaultTestSet)
+    ts.time_end = time()
     # If we are a nested test set, do not print a full summary
     # now - let the parent test set do the printing
     if get_testset_depth() != 0
@@ -859,7 +1074,7 @@ function finish(ts::DefaultTestSet)
         record(parent_ts, ts)
         return ts
     end
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = get_test_counts(ts)
+    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
     total_pass   = passes + c_passes
     total_fail   = fails  + c_fails
     total_error  = errors + c_errors
@@ -874,7 +1089,7 @@ function finish(ts::DefaultTestSet)
     if total != total_pass + total_broken
         # Get all the error/failures and bring them along for the ride
         efs = filter_errors(ts)
-        throw(TestSetException(total_pass,total_fail,total_error, total_broken, efs))
+        throw(TestSetException(total_pass, total_fail, total_error, total_broken, efs))
     end
 
     # return the testset so it is returned from the @testset macro
@@ -889,8 +1104,8 @@ end
 function get_alignment(ts::DefaultTestSet, depth::Int)
     # The minimum width at this depth is
     ts_width = 2*depth + length(ts.description)
-    # If all passing, no need to look at children
-    !ts.anynonpass && return ts_width
+    # If not verbose and all passing, no need to look at children
+    !ts.verbose && !ts.anynonpass && return ts_width
     # Return the maximum of this width and the minimum width
     # for all children (if they exist)
     isempty(ts.results) && return ts_width
@@ -923,7 +1138,7 @@ function get_test_counts(ts::DefaultTestSet)
         isa(t, Error)  && (errors += 1)
         isa(t, Broken) && (broken += 1)
         if isa(t, DefaultTestSet)
-            np, nf, ne, nb, ncp, ncf, nce , ncb = get_test_counts(t)
+            np, nf, ne, nb, ncp, ncf, nce , ncb, duration = get_test_counts(t)
             c_passes += np + ncp
             c_fails  += nf + ncf
             c_errors += ne + nce
@@ -931,16 +1146,28 @@ function get_test_counts(ts::DefaultTestSet)
         end
     end
     ts.anynonpass = (fails + errors + c_fails + c_errors > 0)
-    return passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken
+    duration = if isnothing(ts.time_end)
+        ""
+    else
+        dur_s = ts.time_end - ts.time_start
+        if dur_s < 60
+            string(round(dur_s, digits = 1), "s")
+        else
+            m, s = divrem(dur_s, 60)
+            s = lpad(string(round(s, digits = 1)), 4, "0")
+            string(round(Int, m), "m", s, "s")
+        end
+    end
+    return passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration
 end
 
 # Recursive function that prints out the results at each level of
 # the tree of test sets
 function print_counts(ts::DefaultTestSet, depth, align,
-                      pass_width, fail_width, error_width, broken_width, total_width)
+                      pass_width, fail_width, error_width, broken_width, total_width, duration_width, showtiming)
     # Count results by each type at this level, and recursively
     # through any child test sets
-    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = get_test_counts(ts)
+    passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken, duration = get_test_counts(ts)
     subtotal = passes + fails + errors + broken + c_passes + c_fails + c_errors + c_broken
     # Print test set header, with an alignment that ensures all
     # the test results appear above each other
@@ -979,18 +1206,23 @@ function print_counts(ts::DefaultTestSet, depth, align,
     end
 
     if np == 0 && nf == 0 && ne == 0 && nb == 0
-        printstyled("No tests", color=Base.info_color())
+        printstyled(lpad("None", total_width, " "), "  ", color=Base.info_color())
     else
-        printstyled(lpad(string(subtotal), total_width, " "), color=Base.info_color())
+        printstyled(lpad(string(subtotal), total_width, " "), "  ", color=Base.info_color())
+    end
+
+    if showtiming
+        printstyled(lpad(string(duration), duration_width, " "))
     end
     println()
 
-    # Only print results at lower levels if we had failures
-    if np + nb != subtotal
+    # Only print results at lower levels if we had failures or if the user
+    # wants.
+    if (np + nb != subtotal) || (ts.verbose)
         for t in ts.results
             if isa(t, DefaultTestSet)
                 print_counts(t, depth + 1, align,
-                    pass_width, fail_width, error_width, broken_width, total_width)
+                    pass_width, fail_width, error_width, broken_width, total_width, duration_width, ts.showtiming)
             end
         end
     end
@@ -1018,6 +1250,7 @@ end
     @testset [CustomTestSet] [option=val  ...] ["description"] begin ... end
     @testset [CustomTestSet] [option=val  ...] ["description \$v"] for v in (...) ... end
     @testset [CustomTestSet] [option=val  ...] ["description \$v, \$w"] for v in (...), w in (...) ... end
+    @testset [CustomTestSet] [option=val  ...] ["description \$v, \$w"] foo()
 
 Starts a new test set, or multiple test sets if a `for` loop is provided.
 
@@ -1028,11 +1261,16 @@ along with a summary of the test results.
 
 Any custom testset type (subtype of `AbstractTestSet`) can be given and it will
 also be used for any nested `@testset` invocations. The given options are only
-applied to the test set where they are given. The default test set type does
-not take any options.
+applied to the test set where they are given. The default test set type
+accepts two boolean options:
+- `verbose`: if `true`, the result summary of the nested testsets is shown even
+when they all pass (the default is `false`).
+- `showtiming`: if `true`, the duration of each displayed testset is shown
+(the default is `true`).
 
 The description string accepts interpolation from the loop indices.
 If no description is provided, one is constructed based on the variables.
+If a function call is provided, its name will be used. Explicit description strings override this behavior.
 
 By default the `@testset` macro will return the testset object itself, though
 this behavior can be customized in other testset types. If a `for` loop is used
@@ -1049,7 +1287,7 @@ re-arrangements of `@testset`s regardless of their side-effect on the
 global RNG state.
 
 # Examples
-```jldoctest
+```jldoctest; filter = r"trigonometric identities |    4      4  [0-9\\.]+s"
 julia> @testset "trigonometric identities" begin
            θ = 2/3*π
            @test sin(-θ) ≈ -sin(θ)
@@ -1057,8 +1295,8 @@ julia> @testset "trigonometric identities" begin
            @test sin(2θ) ≈ 2*sin(θ)*cos(θ)
            @test cos(2θ) ≈ cos(θ)^2 - sin(θ)^2
        end;
-Test Summary:            | Pass  Total
-trigonometric identities |    4      4
+Test Summary:            | Pass  Total  Time
+trigonometric identities |    4      4  0.2s
 ```
 """
 macro testset(args...)
@@ -1067,24 +1305,32 @@ macro testset(args...)
     tests = args[end]
 
     # Determine if a single block or for-loop style
-    if !isa(tests,Expr) || (tests.head !== :for && tests.head !== :block)
-        error("Expected begin/end block or for loop as argument to @testset")
+    if !isa(tests,Expr) || (tests.head !== :for && tests.head !== :block && tests.head != :call)
+
+        error("Expected function call, begin/end block or for loop as argument to @testset")
     end
 
     if tests.head === :for
         return testset_forloop(args, tests, __source__)
     else
-        return testset_beginend(args, tests, __source__)
+        return testset_beginend_call(args, tests, __source__)
     end
 end
 
+trigger_test_failure_break(@nospecialize(err)) =
+    ccall(:jl_test_failure_breakpoint, Cvoid, (Any,), err)
+
 """
-Generate the code for a `@testset` with a `begin`/`end` argument
+Generate the code for a `@testset` with a function call or `begin`/`end` argument
 """
-function testset_beginend(args, tests, source)
+function testset_beginend_call(args, tests, source)
     desc, testsettype, options = parse_testset_args(args[1:end-1])
     if desc === nothing
-        desc = "test set"
+        if tests.head === :call
+            desc = string(tests.args[1]) # use the function name as test name
+        else
+            desc = "test set"
+        end
     end
     # If we're at the top level we'll default to DefaultTestSet. Otherwise
     # default to the type of the parent testset
@@ -1106,9 +1352,10 @@ function testset_beginend(args, tests, source)
         # by wrapping the body in a function
         local RNG = default_rng()
         local oldrng = copy(RNG)
+        local oldseed = Random.GLOBAL_SEED
         try
             # RNG is re-seeded with its own seed to ease reproduce a failed test
-            Random.seed!(RNG.seed)
+            Random.seed!(Random.GLOBAL_SEED)
             let
                 $(esc(tests))
             end
@@ -1116,9 +1363,11 @@ function testset_beginend(args, tests, source)
             err isa InterruptException && rethrow()
             # something in the test block threw an error. Count that as an
             # error in this test set
-            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.catch_stack(), $(QuoteNode(source))))
+            trigger_test_failure_break(err)
+            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
         finally
             copy!(RNG, oldrng)
+            Random.set_global_seed!(oldseed)
             pop_testset()
             ret = finish(ts)
         end
@@ -1190,7 +1439,8 @@ function testset_forloop(args, testloop, source)
             err isa InterruptException && rethrow()
             # Something in the test block threw an error. Count that as an
             # error in this test set
-            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.catch_stack(), $(QuoteNode(source))))
+            trigger_test_failure_break(err)
+            record(ts, Error(:nontest_error, Expr(:tuple), err, Base.current_exceptions(), $(QuoteNode(source))))
         end
     end
     quote
@@ -1199,7 +1449,8 @@ function testset_forloop(args, testloop, source)
         local ts
         local RNG = default_rng()
         local oldrng = copy(RNG)
-        Random.seed!(RNG.seed)
+        local oldseed = Random.GLOBAL_SEED
+        Random.seed!(Random.GLOBAL_SEED)
         local tmprng = copy(RNG)
         try
             let
@@ -1212,6 +1463,7 @@ function testset_forloop(args, testloop, source)
                 push!(arr, finish(ts))
             end
             copy!(RNG, oldrng)
+            Random.set_global_seed!(oldseed)
         end
         arr
     end
@@ -1237,7 +1489,7 @@ function parse_testset_args(args)
         elseif isa(arg, Expr) && arg.head === :(=)
             # we're building up a Dict literal here
             key = Expr(:quote, arg.args[1])
-            push!(options.args, Expr(:call, :(=>), key, arg.args[2]))
+            push!(options.args, Expr(:call, :(=>), key, esc(arg.args[2])))
         else
             error("Unexpected argument $arg to @testset")
         end
@@ -1263,7 +1515,7 @@ end
 """
     push_testset(ts::AbstractTestSet)
 
-Adds the test set to the task_local_storage.
+Adds the test set to the `task_local_storage`.
 """
 function push_testset(ts::AbstractTestSet)
     testsets = get(task_local_storage(), :__BASETESTNEXT__, AbstractTestSet[])
@@ -1274,7 +1526,7 @@ end
 """
     pop_testset()
 
-Pops the last test set added to the task_local_storage. If there are no
+Pops the last test set added to the `task_local_storage`. If there are no
 active test sets, returns the fallback default test set.
 """
 function pop_testset()
@@ -1319,10 +1571,11 @@ julia> typeof(f(2))
 Int64
 
 julia> @code_warntype f(2)
-Variables
+MethodInstance for f(::Int64)
+  from f(a) in Main at none:1
+Arguments
   #self#::Core.Const(f)
   a::Int64
-
 Body::UNION{FLOAT64, INT64}
 1 ─ %1 = (a > 1)::Bool
 └──      goto #3 if not %1
@@ -1366,56 +1619,77 @@ function _inferred(ex, mod, allow = :(Union{}))
     end
     Meta.isexpr(ex, :call)|| error("@inferred requires a call expression")
     farg = ex.args[1]
-    if isa(farg, Symbol) && first(string(farg)) == '.'
+    if isa(farg, Symbol) && farg !== :.. && first(string(farg)) == '.'
         farg = Symbol(string(farg)[2:end])
         ex = Expr(:call, GlobalRef(Test, :_materialize_broadcasted),
             farg, ex.args[2:end]...)
     end
-    Base.remove_linenums!(quote
-        let
-            allow = $(esc(allow))
-            allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
-            $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
-                # Has keywords
-                args = gensym()
-                kwargs = gensym()
-                quote
-                    $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
-                    inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
-                end
-            else
-                # No keywords
-                quote
-                    args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
-                    result = $(esc(ex.args[1]))(args...)
-                    inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
-                end
-            end)
-            @assert length(inftypes) == 1
-            rettype = result isa Type ? Type{result} : typeof(result)
-            rettype <: allow || rettype == typesubtract(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
-            result
+    Base.remove_linenums!(let ex = ex;
+        quote
+            let
+                allow = $(esc(allow))
+                allow isa Type || throw(ArgumentError("@inferred requires a type as second argument"))
+                $(if any(a->(Meta.isexpr(a, :kw) || Meta.isexpr(a, :parameters)), ex.args)
+                    # Has keywords
+                    args = gensym()
+                    kwargs = gensym()
+                    quote
+                        $(esc(args)), $(esc(kwargs)), result = $(esc(Expr(:call, _args_and_call, ex.args[2:end]..., ex.args[1])))
+                        inftypes = $(gen_call_with_extracted_types(mod, Base.return_types, :($(ex.args[1])($(args)...; $(kwargs)...))))
+                    end
+                else
+                    # No keywords
+                    quote
+                        args = ($([esc(ex.args[i]) for i = 2:length(ex.args)]...),)
+                        result = $(esc(ex.args[1]))(args...)
+                        inftypes = Base.return_types($(esc(ex.args[1])), Base.typesof(args...))
+                    end
+                end)
+                @assert length(inftypes) == 1
+                rettype = result isa Type ? Type{result} : typeof(result)
+                rettype <: allow || rettype == typesplit(inftypes[1], allow) || error("return type $rettype does not match inferred return type $(inftypes[1])")
+                result
+            end
         end
     end)
 end
 
+function is_in_mods(m::Module, recursive::Bool, mods)
+    while true
+        m in mods && return true
+        recursive || return false
+        p = parentmodule(m)
+        p === m && return false
+        m = p
+    end
+end
+
 """
-    detect_ambiguities(mod1, mod2...; imported=false, recursive=false, ambiguous_bottom=false)
+    detect_ambiguities(mod1, mod2...; recursive=false,
+                                      ambiguous_bottom=false,
+                                      allowed_undefineds=nothing)
 
 Returns a vector of `(Method,Method)` pairs of ambiguous methods
 defined in the specified modules.
-Use `imported=true` if you wish to also test functions that were
-imported into these modules from elsewhere.
 Use `recursive=true` to test in all submodules.
 
 `ambiguous_bottom` controls whether ambiguities triggered only by
 `Union{}` type parameters are included; in most cases you probably
 want to set this to `false`. See [`Base.isambiguous`](@ref).
+
+See [`Test.detect_unbound_args`](@ref) for an explanation of
+`allowed_undefineds`.
+
+!!! compat "Julia 1.8"
+    `allowed_undefineds` requires at least Julia 1.8.
 """
-function detect_ambiguities(mods...;
-                            imported::Bool = false,
+function detect_ambiguities(mods::Module...;
                             recursive::Bool = false,
-                            ambiguous_bottom::Bool = false)
+                            ambiguous_bottom::Bool = false,
+                            allowed_undefineds = nothing)
+    @nospecialize
+    ambs = Set{Tuple{Method,Method}}()
+    mods = collect(mods)::Vector{Module}
     function sortdefs(m1::Method, m2::Method)
         ord12 = m1.file < m2.file
         if !ord12 && (m1.file == m2.file)
@@ -1423,102 +1697,120 @@ function detect_ambiguities(mods...;
         end
         return ord12 ? (m1, m2) : (m2, m1)
     end
-    ambs = Set{Tuple{Method,Method}}()
-    for mod in mods
-        for n in names(mod, all = true, imported = imported)
-            Base.isdeprecated(mod, n) && continue
-            if !isdefined(mod, n)
-                println("Skipping ", mod, '.', n)  # typically stale exports
-                continue
-            end
-            f = Base.unwrap_unionall(getfield(mod, n))
-            if recursive && isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
-                subambs = detect_ambiguities(f,
-                    imported=imported, recursive=recursive, ambiguous_bottom=ambiguous_bottom)
-                union!(ambs, subambs)
-            elseif isa(f, DataType) && isdefined(f.name, :mt) && f.name.mt !== Symbol.name.mt
-                mt = Base.MethodList(f.name.mt)
-                for m in mt
-                    ambig = Int32[0]
-                    for match2 in Base._methods_by_ftype(m.sig, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
-                        ambig[1] == 0 && break
-                        m2 = match2.method
-                        if Base.isambiguous(m, m2, ambiguous_bottom=ambiguous_bottom)
-                            push!(ambs, sortdefs(m, m2))
-                        end
+    function examine(mt::Core.MethodTable)
+        for m in Base.MethodList(mt)
+            m.sig == Tuple && continue # ignore Builtins
+            is_in_mods(m.module, recursive, mods) || continue
+            world = Base.get_world_counter()
+            ambig = Ref{Int32}(0)
+            ms = Base._methods_by_ftype(m.sig, nothing, -1, world, true, Ref(typemin(UInt)), Ref(typemax(UInt)), ambig)::Vector
+            ambig[] == 0 && continue
+            for match2 in ms
+                match2 = match2::Core.MethodMatch
+                m2 = match2.method
+                 if !(m === m2 || Base.morespecific(m2.sig, m.sig))
+                    if Base.isambiguous(m, m2; ambiguous_bottom)
+                        push!(ambs, sortdefs(m, m2))
                     end
                 end
             end
         end
     end
-    function is_in_mods(m::Module)
-        while true
-            m in mods && return true
-            recursive || return false
-            p = parentmodule(m)
-            p === m && return false
-            m = p
-        end
-    end
-    let mt = Base.MethodList(Symbol.name.mt)
-        for m in mt
-            if is_in_mods(m.module)
-                ambig = Int32[0]
-                for match2 in Base._methods_by_ftype(m.sig, -1, typemax(UInt), true, UInt[typemin(UInt)], UInt[typemax(UInt)], ambig)
-                    ambig[1] == 0 && break
-                    m2 = match2.method
-                    if Base.isambiguous(m, m2, ambiguous_bottom=ambiguous_bottom)
-                        push!(ambs, sortdefs(m, m2))
+    work = Base.loaded_modules_array()
+    filter!(mod -> mod === parentmodule(mod), work) # some items in loaded_modules_array are not top modules (really just Base)
+    while !isempty(work)
+        mod = pop!(work)
+        for n in names(mod, all = true)
+            Base.isdeprecated(mod, n) && continue
+            if !isdefined(mod, n)
+                if is_in_mods(mod, recursive, mods)
+                    if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
+                        println("Skipping ", mod, '.', n)  # typically stale exports
                     end
                 end
+                continue
+            end
+            f = Base.unwrap_unionall(getfield(mod, n))
+            if isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
+                push!(work, f)
+            elseif isa(f, DataType) && isdefined(f.name, :mt) && f.name.module === mod && f.name.name === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
+                examine(f.name.mt)
             end
         end
     end
+    examine(Symbol.name.mt)
+    examine(DataType.name.mt)
     return collect(ambs)
 end
 
 """
-    detect_unbound_args(mod1, mod2...; imported=false, recursive=false)
+    detect_unbound_args(mod1, mod2...; recursive=false, allowed_undefineds=nothing)
 
 Returns a vector of `Method`s which may have unbound type parameters.
-Use `imported=true` if you wish to also test functions that were
-imported into these modules from elsewhere.
 Use `recursive=true` to test in all submodules.
+
+By default, any undefined symbols trigger a warning. This warning can
+be suppressed by supplying a collection of `GlobalRef`s for which
+the warning can be skipped. For example, setting
+
+```
+allow_undefineds = Set([GlobalRef(Base, :active_repl),
+                        GlobalRef(Base, :active_repl_backend)])
+```
+
+would suppress warnings about `Base.active_repl` and
+`Base.active_repl_backend`.
+
+!!! compat "Julia 1.8"
+    `allowed_undefineds` requires at least Julia 1.8.
 """
 function detect_unbound_args(mods...;
-                             imported::Bool = false,
-                             recursive::Bool = false)
+                             recursive::Bool = false,
+                             allowed_undefineds=nothing)
+    @nospecialize mods
     ambs = Set{Method}()
-    for mod in mods
-        for n in names(mod, all = true, imported = imported)
+    mods = collect(mods)::Vector{Module}
+    function examine(mt::Core.MethodTable)
+        for m in Base.MethodList(mt)
+            is_in_mods(m.module, recursive, mods) || continue
+            has_unbound_vars(m.sig) || continue
+            tuple_sig = Base.unwrap_unionall(m.sig)::DataType
+            if Base.isvatuple(tuple_sig)
+                params = tuple_sig.parameters[1:(end - 1)]
+                tuple_sig = Base.rewrap_unionall(Tuple{params...}, m.sig)
+                world = Base.get_world_counter()
+                mf = ccall(:jl_gf_invoke_lookup, Any, (Any, Any, UInt), tuple_sig, nothing, world)
+                if mf !== nothing && mf !== m && mf.sig <: tuple_sig
+                    continue
+                end
+            end
+            push!(ambs, m)
+        end
+    end
+    work = Base.loaded_modules_array()
+    filter!(mod -> mod === parentmodule(mod), work) # some items in loaded_modules_array are not top modules (really just Base)
+    while !isempty(work)
+        mod = pop!(work)
+        for n in names(mod, all = true)
             Base.isdeprecated(mod, n) && continue
             if !isdefined(mod, n)
-                println("Skipping ", mod, '.', n)  # typically stale exports
+                if is_in_mods(mod, recursive, mods)
+                    if allowed_undefineds === nothing || GlobalRef(mod, n) ∉ allowed_undefineds
+                        println("Skipping ", mod, '.', n)  # typically stale exports
+                    end
+                end
                 continue
             end
             f = Base.unwrap_unionall(getfield(mod, n))
-            if recursive && isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
-                subambs = detect_unbound_args(f, imported=imported, recursive=recursive)
-                union!(ambs, subambs)
-            elseif isa(f, DataType) && isdefined(f.name, :mt)
-                mt = Base.MethodList(f.name.mt)
-                for m in mt
-                    if has_unbound_vars(m.sig)
-                        tuple_sig = Base.unwrap_unionall(m.sig)::DataType
-                        if Base.isvatuple(tuple_sig)
-                            params = tuple_sig.parameters[1:(end - 1)]
-                            tuple_sig = Base.rewrap_unionall(Tuple{params...}, m.sig)
-                            mf = ccall(:jl_gf_invoke_lookup, Any, (Any, UInt), tuple_sig, typemax(UInt))
-                            if mf !== nothing && mf !== m && mf.sig <: tuple_sig
-                                continue
-                            end
-                        end
-                        push!(ambs, m)
-                    end
-                end
+            if isa(f, Module) && f !== mod && parentmodule(f) === mod && nameof(f) === n
+                push!(work, f)
+            elseif isa(f, DataType) && isdefined(f.name, :mt) && f.name.module === mod && f.name.name === n && f.name.mt !== Symbol.name.mt && f.name.mt !== DataType.name.mt
+                examine(f.name.mt)
             end
         end
     end
+    examine(Symbol.name.mt)
+    examine(DataType.name.mt)
     return collect(ambs)
 end
 
@@ -1548,9 +1840,8 @@ function constrains_param(var::TypeVar, @nospecialize(typ), covariant::Bool)
                 end
                 lastp = typ.parameters[fc]
                 vararg = Base.unwrap_unionall(lastp)
-                if vararg isa DataType && vararg.name === Base._va_typename
-                    N = vararg.parameters[2]
-                    constrains_param(var, N, covariant) && return true
+                if vararg isa Core.TypeofVararg && isdefined(vararg, :N)
+                    constrains_param(var, vararg.N, covariant) && return true
                     # T = vararg.parameters[1] doesn't constrain var
                 else
                     constrains_param(var, lastp, covariant) && return true
@@ -1670,7 +1961,7 @@ end
 
 "`guardseed(f, seed)` is equivalent to running `Random.seed!(seed); f()` and
 then restoring the state of the global RNG as it was before."
-guardseed(f::Function, seed::Union{Vector{UInt32},Integer}) = guardseed() do
+guardseed(f::Function, seed::Union{Vector{UInt64},Vector{UInt32},Integer,NTuple{4,UInt64}}) = guardseed() do
     Random.seed!(seed)
     f()
 end
diff --git a/stdlib/Test/src/logging.jl b/stdlib/Test/src/logging.jl
index 7a254a80b90385..d7146b121d47db 100644
--- a/stdlib/Test/src/logging.jl
+++ b/stdlib/Test/src/logging.jl
@@ -1,12 +1,23 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Logging
-import Logging: Info,
-    shouldlog, handle_message, min_enabled_level, catch_exceptions
+using Logging: Logging, AbstractLogger, LogLevel, Info, with_logger
 import Base: occursin
 
 #-------------------------------------------------------------------------------
-# Log records
+"""
+    LogRecord
+
+Stores the results of a single log event. Fields:
+
+* `level`: the [`LogLevel`](@ref) of the log message
+* `message`: the textual content of the log message
+* `_module`: the module of the log event
+* `group`: the logging group (by default, the name of the file containing the log event)
+* `id`: the ID of the log event
+* `file`: the file containing the log event
+* `line`: the line within the file of the log event
+* `kwargs`: any keyword arguments passed to the log event
+"""
 struct LogRecord
     level
     message
@@ -28,23 +39,75 @@ mutable struct TestLogger <: AbstractLogger
     min_level::LogLevel
     catch_exceptions::Bool
     shouldlog_args
+    message_limits::Dict{Any,Int}
+    respect_maxlog::Bool
 end
 
-TestLogger(; min_level=Info, catch_exceptions=false) = TestLogger(LogRecord[], min_level, catch_exceptions, nothing)
-min_enabled_level(logger::TestLogger) = logger.min_level
+"""
+    TestLogger(; min_level=Info, catch_exceptions=false)
+
+Create a `TestLogger` which captures logged messages in its `logs::Vector{LogRecord}` field.
+
+Set `min_level` to control the `LogLevel`, `catch_exceptions` for whether or not exceptions
+thrown as part of log event generation should be caught, and `respect_maxlog` for whether
+or not to follow the convention of logging messages with `maxlog=n` for some integer `n` at
+most `n` times.
+
+See also: [`LogRecord`](@ref).
+
+## Example
+
+```jldoctest
+julia> using Test, Logging
+
+julia> f() = @info "Hi" number=5;
+
+julia> test_logger = TestLogger();
+
+julia> with_logger(test_logger) do
+           f()
+           @info "Bye!"
+       end
 
-function shouldlog(logger::TestLogger, level, _module, group, id)
-    logger.shouldlog_args = (level, _module, group, id)
-    true
+julia> @test test_logger.logs[1].message == "Hi"
+Test Passed
+
+julia> @test test_logger.logs[1].kwargs[:number] == 5
+Test Passed
+
+julia> @test test_logger.logs[2].message == "Bye!"
+Test Passed
+```
+"""
+TestLogger(; min_level=Info, catch_exceptions=false, respect_maxlog=true) =
+    TestLogger(LogRecord[], min_level, catch_exceptions, nothing, Dict{Any, Int}(), respect_maxlog)
+Logging.min_enabled_level(logger::TestLogger) = logger.min_level
+
+function Logging.shouldlog(logger::TestLogger, level, _module, group, id)
+    if get(logger.message_limits, id, 1) > 0
+        logger.shouldlog_args = (level, _module, group, id)
+        true
+    else
+        false
+    end
 end
 
-function handle_message(logger::TestLogger, level, msg, _module,
-                        group, id, file, line; kwargs...)
+function Logging.handle_message(logger::TestLogger, level, msg, _module,
+                                group, id, file, line; kwargs...)
+    @nospecialize
+    if logger.respect_maxlog
+        maxlog = get(kwargs, :maxlog, nothing)
+        if maxlog isa Core.BuiltinInts
+            remaining = get!(logger.message_limits, id, Int(maxlog)::Int)
+            logger.message_limits[id] = remaining - 1
+            remaining > 0 || return
+        end
+    end
     push!(logger.logs, LogRecord(level, msg, _module, group, id, file, line, kwargs))
 end
 
 # Catch exceptions for the test logger only if specified
-catch_exceptions(logger::TestLogger) = logger.catch_exceptions
+Logging.catch_exceptions(logger::TestLogger) = logger.catch_exceptions
 
 function collect_test_logs(f; kwargs...)
     logger = TestLogger(; kwargs...)
@@ -83,7 +146,7 @@ function record(::FallbackTestSet, t::LogTestFailure)
 end
 
 function record(ts::DefaultTestSet, t::LogTestFailure)
-    if myid() == 1
+    if TESTSET_PRINT_ENABLE[]
         printstyled(ts.description, ": ", color=:white)
         print(t)
         Base.show_backtrace(stdout, scrub_backtrace(backtrace()))
@@ -134,17 +197,28 @@ We can test the info message using
 If we also wanted to test the debug messages, these need to be enabled with the
 `min_level` keyword:
 
-    @test_logs (:info,"Doing foo with n=2") (:debug,"Iteration 1") (:debug,"Iteration 2") min_level=Debug foo(2)
+    using Logging
+    @test_logs (:info,"Doing foo with n=2") (:debug,"Iteration 1") (:debug,"Iteration 2") min_level=Logging.Debug foo(2)
 
 If you want to test that some particular messages are generated while ignoring the rest,
 you can set the keyword `match_mode=:any`:
 
-    @test_logs (:info,) (:debug,"Iteration 42") min_level=Debug match_mode=:any foo(100)
+    using Logging
+    @test_logs (:info,) (:debug,"Iteration 42") min_level=Logging.Debug match_mode=:any foo(100)
 
 The macro may be chained with `@test` to also test the returned value:
 
     @test (@test_logs (:info,"Doing foo with n=2") foo(2)) == 42
 
+If you want to test for the absence of warnings, you can omit specifying log
+patterns and set the `min_level` accordingly:
+
+    # test that the expression logs no messages when the logger level is warn:
+    @test_logs min_level=Logging.Warn @info("Some information") # passes
+    @test_logs min_level=Logging.Warn @warn("Some information") # fails
+
+If you want to test the absence of warnings (or error messages) in
+[`stderr`](@ref) which are not generated by `@warn`, see [`@test_nowarn`](@ref).
 """
 macro test_logs(exs...)
     length(exs) >= 1 || throw(ArgumentError("""`@test_logs` needs at least one arguments.
@@ -168,13 +242,13 @@ macro test_logs(exs...)
                     $(esc(expression))
                 end
                 if didmatch
-                    testres = Pass(:test, nothing, nothing, value)
+                    testres = Pass(:test, $orig_expr, nothing, value, $sourceloc)
                 else
                     testres = LogTestFailure($orig_expr, $sourceloc,
                                              $(QuoteNode(exs[1:end-1])), logs)
                 end
             catch e
-                testres = Error(:test_error, $orig_expr, e, Base.catch_stack(), $sourceloc)
+                testres = Error(:test_error, $orig_expr, e, Base.current_exceptions(), $sourceloc)
             end
             Test.record(Test.get_testset(), testres)
             value
@@ -260,4 +334,3 @@ macro test_deprecated(exs...)
     res.args[4].args[3].args[2].args[2].args[2] = __source__
     res
 end
-
diff --git a/stdlib/Test/test/nothrow_testset.jl b/stdlib/Test/test/nothrow_testset.jl
new file mode 100644
index 00000000000000..5b5c775960a9a3
--- /dev/null
+++ b/stdlib/Test/test/nothrow_testset.jl
@@ -0,0 +1,8 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+mutable struct NoThrowTestSet <: Test.AbstractTestSet
+    results::Vector
+    NoThrowTestSet(desc) = new([])
+end
+Test.record(ts::NoThrowTestSet, t::Test.Result) = (push!(ts.results, t); t)
+Test.finish(ts::NoThrowTestSet) = ts.results
diff --git a/stdlib/Test/test/runtests.jl b/stdlib/Test/test/runtests.jl
index 1f94d0e0f8e551..579b81cd5ace96 100644
--- a/stdlib/Test/test/runtests.jl
+++ b/stdlib/Test/test/runtests.jl
@@ -1,11 +1,15 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Test, Distributed, Random
+using Test, Random
 using Test: guardseed
+using Serialization
+using Distributed: RemoteException
 
-import Logging: Debug, Info, Warn
+import Logging: Debug, Info, Warn, with_logger
 
 @testset "@test" begin
+    atol = 1
+    a = (; atol=2)
     @test true
     @test 1 == 1
     @test 1 != 2
@@ -18,11 +22,28 @@ import Logging: Debug, Info, Warn
     @test isapprox(1, 1, atol=0.1)
     @test isapprox(1, 1; atol=0.1)
     @test isapprox(1, 1; [(:atol, 0)]...)
+    @test isapprox(1, 2; atol)
+    @test isapprox(1, 3; a.atol)
+end
+@testset "@test with skip/broken kwargs" begin
+    # Make sure the local variables can be used in conditions
+    a = 1
+    @test 2 + 2 == 4 broken=false
+    @test error() broken=true
+    @test !Sys.iswindows() broken=Sys.iswindows()
+    @test 1 ≈ 2 atol=1 broken=a==2
+    @test false skip=true
+    @test true skip=false
+    @test Grogu skip=isone(a)
+    @test 41 ≈ 42 rtol=1 skip=false
 end
 @testset "@test keyword precedence" begin
+    atol = 2
     # post-semicolon keyword, suffix keyword, pre-semicolon keyword
     @test isapprox(1, 2, atol=0) atol=1
     @test isapprox(1, 3, atol=0; atol=2) atol=1
+    @test isapprox(1, 2, atol=0; atol)
+    @test isapprox(1, 3, atol=0; atol) atol=1
 end
 @testset "@test should only evaluate the arguments once" begin
     g = Int[]
@@ -75,14 +96,20 @@ end
                    "Thrown: ErrorException")
     @test endswith(sprint(show, @test_throws ErrorException("test") error("test")),
                    "Thrown: ErrorException")
+    @test endswith(sprint(show, @test_throws "a test" error("a test")),
+                   "Message: \"a test\"")
+    @test occursin("Message: \"DomainError",
+                   sprint(show, @test_throws r"sqrt\([Cc]omplex" sqrt(-1)))
+    @test endswith(sprint(show, @test_throws str->occursin("a t", str) error("a test")),
+                   "Message: \"a test\"")
+    @test endswith(sprint(show, @test_throws ["BoundsError", "access", "1-element", "at index [2]"] [1][2]),
+                   "Message: \"BoundsError: attempt to access 1-element Vector{$Int} at index [2]\"")
+    @test_throws "\"" throw("\"")
+    @test_throws Returns(false) throw(Returns(false))
 end
 # Test printing of Fail results
-mutable struct NoThrowTestSet <: Test.AbstractTestSet
-    results::Vector
-    NoThrowTestSet(desc) = new([])
-end
-Test.record(ts::NoThrowTestSet, t::Test.Result) = (push!(ts.results, t); t)
-Test.finish(ts::NoThrowTestSet) = ts.results
+include("nothrow_testset.jl")
+
 let fails = @testset NoThrowTestSet begin
         # 1 - Fail - wrong exception
         @test_throws OverflowError error()
@@ -127,6 +154,15 @@ let fails = @testset NoThrowTestSet begin
         @test startswith(str1, str2)
         # 20 - Fail - endswith
         @test endswith(str1, str2)
+        # 21 - Fail - contains
+        @test contains(str1, str2)
+        # 22 - Fail - Type Comparison
+        @test typeof(1) <: typeof("julia")
+        # 23 - 26 - Fail - wrong message
+        @test_throws "A test" error("a test")
+        @test_throws r"sqrt\([Cc]omplx" sqrt(-1)
+        @test_throws str->occursin("a T", str) error("a test")
+        @test_throws ["BoundsError", "acess", "1-element", "at index [2]"] [1][2]
     end
     for fail in fails
         @test fail isa Test.Fail
@@ -231,13 +267,50 @@ let fails = @testset NoThrowTestSet begin
         @test occursin("Expression: endswith(str1, str2)", str)
         @test occursin("Evaluated: endswith(\"Hello\", \"World\")", str)
     end
+
+    let str = sprint(show, fails[21])
+        @test occursin("Expression: contains(str1, str2)", str)
+        @test occursin("Evaluated: contains(\"Hello\", \"World\")", str)
+    end
+
+    let str = sprint(show, fails[22])
+        @test occursin("Expression: typeof(1) <: typeof(\"julia\")", str)
+        @test occursin("Evaluated: $(typeof(1)) <: $(typeof("julia"))", str)
+    end
+
+    let str = sprint(show, fails[23])
+        @test occursin("Expected: \"A test\"", str)
+        @test occursin("Message: \"a test\"", str)
+    end
+
+    let str = sprint(show, fails[24])
+        @test occursin("Expected: r\"sqrt\\([Cc]omplx\"", str)
+        @test occursin(r"Message: .*Try sqrt\(Complex", str)
+    end
+
+    let str = sprint(show, fails[25])
+        @test occursin("Expected: < match function >", str)
+        @test occursin("Message: \"a test\"", str)
+    end
+
+    let str = sprint(show, fails[26])
+        @test occursin("Expected: [\"BoundsError\", \"acess\", \"1-element\", \"at index [2]\"]", str)
+        @test occursin(r"Message: \"BoundsError.* 1-element.*at index \[2\]", str)
+    end
+
 end
 
+struct BadError <: Exception end
+Base.show(io::IO, ::BadError) = throw("I am a bad error")
 let errors = @testset NoThrowTestSet begin
         # 1 - Error - unexpected pass
         @test_broken true
         # 2 - Error - converting a call into a comparison
         @test ==(1, 1:2...)
+        # 3 - Error - objects with broken show
+        @test throw(BadError())
+        @test BadError()
+        throw(BadError())
     end
 
     for err in errors
@@ -253,11 +326,23 @@ let errors = @testset NoThrowTestSet begin
         @test occursin("Expression: ==(1, 1:2...)", str)
         @test occursin("MethodError: no method matching ==(::$Int, ::$Int, ::$Int)", str)
     end
+
+    let str = sprint(show, errors[3])
+        @test occursin("Expression: throw(BadError())\n  #=ERROR showing exception stack=# \"I am a bad error\"\n  Stacktrace:\n", str)
+    end
+
+    let str = sprint(show, errors[4])
+        @test occursin("Expression: BadError()\n       Value: #=ERROR showing error of type $BadError=# \"I am a bad error\"\nStacktrace:\n", str)
+    end
+
+    let str = sprint(show, errors[5])
+        @test occursin("Got exception outside of a @test\n  #=ERROR showing exception stack=# \"I am a bad error\"\n  Stacktrace:\n", str)
+    end
 end
 
 let retval_tests = @testset NoThrowTestSet begin
         ts = Test.DefaultTestSet("Mock for testing retval of record(::DefaultTestSet, ::T <: Result) methods")
-        pass_mock = Test.Pass(:test, 1, 2, LineNumberNode(0, "A Pass Mock"))
+        pass_mock = Test.Pass(:test, 1, 2, 3, LineNumberNode(0, "A Pass Mock"))
         @test Test.record(ts, pass_mock) isa Test.Pass
         error_mock = Test.Error(:test, 1, 2, 3, LineNumberNode(0, "An Error Mock"))
         @test Test.record(ts, error_mock) isa Test.Error
@@ -394,7 +479,7 @@ end
         @test total_broken == 0
     end
     ts.anynonpass = false
-    deleteat!(Test.get_testset().results,1)
+    deleteat!(Test.get_testset().results, 1)
 end
 
 @test .1+.1+.1 ≈ .3
@@ -479,7 +564,7 @@ import Test: record, finish
 using Test: get_testset_depth, get_testset
 using Test: AbstractTestSet, Result, Pass, Fail, Error
 struct CustomTestSet <: Test.AbstractTestSet
-    description::AbstractString
+    description::String
     foo::Int
     results::Vector
     # constructor takes a description string and options keyword arguments
@@ -566,6 +651,13 @@ for i in 1:6
     @test typeof(tss[i].results[4].results[1]) == (iseven(i) ? Pass : Fail)
 end
 
+# test that second argument is escaped correctly
+foo = 3
+tss = @testset CustomTestSet foo=foo "custom testset - escaping" begin
+    @test true
+end
+@test tss.foo == 3
+
 # test @inferred
 uninferrable_function(i) = (1, "1")[i]
 uninferrable_small_union(i) = (1, nothing)[i]
@@ -668,13 +760,13 @@ let msg = read(pipeline(ignorestatus(`$(Base.julia_cmd()) --startup-file=no --co
                 @test foo(fill(1., 4)) == 15
             end
         end'`), stderr=devnull), String)
-    @test occursin("""
-        Test Summary: | Pass  Fail  Total
-        Foo Tests     |    2     2      4
-          Animals     |    1     1      2
-            Felines   |    1            1
-            Canines   |          1      1
-          Arrays      |    1     1      2
+    @test occursin(r"""
+        Test Summary: | Pass  Fail  Total  Time
+        Foo Tests     |    2     2      4  \s*\d*.\ds
+          Animals     |    1     1      2  \s*\d*.\ds
+            Felines   |    1            1  \s*\d*.\ds
+            Canines   |          1      1  \s*\d*.\ds
+          Arrays      |    1     1      2  \s*\d*.\ds
         """, msg)
 end
 
@@ -745,6 +837,8 @@ end
     @test occursin("Evaluated: 0.9 ≈ 0.1 (nans=true, atol=0.01)", msg)
 end
 
+erronce() = @error "an error" maxlog=1
+
 @testset "@test_logs" begin
     function foo(n)
         @info "Doing foo with n=$n"
@@ -773,6 +867,17 @@ end
 
     @test_logs (Debug,"Iteration 5") min_level=Debug match_mode=:any foo(10)
 
+    # Respect `maxlog` (#41625). We check we only find one logging message.
+    @test_logs (:error, "an error") (erronce(); erronce())
+
+    # Test `respect_maxlog=false`:
+    test_logger = Test.TestLogger(; respect_maxlog=false)
+    with_logger(test_logger) do
+        erronce()
+        erronce()
+    end
+    @test length(test_logger.logs) == 2
+
     # Test failures
     fails = @testset NoThrowTestSet "check that @test_logs detects bad input" begin
         @test_logs (Warn,) foo(1)
@@ -788,33 +893,34 @@ end
     @test startswith(fails[4].value, "ErrorException")
 end
 
-function newfunc()
-    42
-end
-@deprecate oldfunc newfunc
+let code = quote
+        function newfunc()
+            42
+        end
+        @deprecate oldfunc newfunc
 
-@testset "@test_deprecated" begin
-    @test_deprecated oldfunc()
+        @testset "@test_deprecated" begin
+            @test_deprecated oldfunc()
+            @test Base.JLOptions().depwarn == 1
 
-    # Expression passthrough
-    if Base.JLOptions().depwarn != 2
-        @test (@test_deprecated oldfunc()) == 42
+            @test (@test_deprecated oldfunc()) == 42
 
-        fails = @testset NoThrowTestSet "check that @test_deprecated detects bad input" begin
-            @test_deprecated newfunc()
-            @test_deprecated r"Not found in message" oldfunc()
+            fails = @testset NoThrowTestSet "check that @test_deprecated detects bad input" begin
+                @test_deprecated newfunc()
+                @test_deprecated r"Not found in message" oldfunc()
+            end
+            @test length(fails) == 2
+            @test fails[1] isa Test.LogTestFailure
+            @test fails[2] isa Test.LogTestFailure
         end
-        @test length(fails) == 2
-        @test fails[1] isa Test.LogTestFailure
-        @test fails[2] isa Test.LogTestFailure
-    else
-        @warn """Omitting `@test_deprecated` tests which can't yet
-                 be tested in --depwarn=error mode"""
     end
+    incl = "include($(repr(joinpath(@__DIR__, "nothrow_testset.jl"))))"
+    cmd = `$(Base.julia_cmd()) --startup-file=no --depwarn=yes -e 'using Test' -e $incl -e $code`
+    @test success(pipeline(cmd))
 end
 
 @testset "@testset preserves GLOBAL_RNG's state, and re-seeds it" begin
-    # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_RNG.seed)` block
+    # i.e. it behaves as if it was wrapped in a `guardseed(GLOBAL_SEED)` block
     seed = rand(UInt128)
     Random.seed!(seed)
     a = rand()
@@ -830,6 +936,29 @@ end
     Random.seed!(seed)
     @test a == rand()
     @test b == rand()
+
+    # Even when seed!() is called within a testset A, subsequent testsets
+    # should start with the same "global RNG state" as what A started with,
+    # such that the test `refvalue == rand(Int)` below succeeds.
+    # Currently, this means that Random.GLOBAL_SEED has to be restored,
+    # in addition to the state of Random.default_rng().
+    GLOBAL_SEED_orig = Random.GLOBAL_SEED
+    local refvalue
+    @testset "GLOBAL_SEED is also preserved (setup)" begin
+        @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+        refvalue = rand(Int)
+        Random.seed!()
+        @test GLOBAL_SEED_orig != Random.GLOBAL_SEED
+    end
+    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "GLOBAL_SEED is also preserved (forloop)" for _=1:3
+        @test refvalue == rand(Int)
+        Random.seed!()
+    end
+    @test GLOBAL_SEED_orig == Random.GLOBAL_SEED
+    @testset "GLOBAL_SEED is also preserved (beginend)" begin
+        @test refvalue == rand(Int)
+    end
 end
 
 @testset "InterruptExceptions #21043" begin
@@ -946,3 +1075,244 @@ end
     end
     @test ok
 end
+
+let ex = :(something_complex + [1, 2, 3])
+    b = PipeBuffer()
+    let t = Test.Pass(:test, (ex, 1), (ex, 2), (ex, 3), LineNumberNode(@__LINE__, @__FILE__))
+        serialize(b, t)
+        @test string(t) == string(deserialize(b))
+        @test eof(b)
+    end
+    let t = Test.Broken(:test, ex)
+        serialize(b, t)
+        @test string(t) == string(deserialize(b))
+        @test eof(b)
+    end
+end
+
+@testset "verbose option" begin
+    expected = r"""
+    Test Summary:             | Pass  Total  Time
+    Parent                    |    9      9  \s*\d*.\ds
+      Child 1                 |    3      3  \s*\d*.\ds
+        Child 1.1 (long name) |    1      1  \s*\d*.\ds
+        Child 1.2             |    1      1  \s*\d*.\ds
+        Child 1.3             |    1      1  \s*\d*.\ds
+      Child 2                 |    3      3  \s*\d*.\ds
+      Child 3                 |    3      3  \s*\d*.\ds
+        Child 3.1             |    1      1  \s*\d*.\ds
+        Child 3.2             |    1      1  \s*\d*.\ds
+        Child 3.3             |    1      1  \s*\d*.\ds
+    """
+
+    mktemp() do f, _
+        write(f,
+        """
+        using Test
+
+        @testset "Parent" verbose = true begin
+            @testset "Child 1" verbose = true begin
+                @testset "Child 1.1 (long name)" begin
+                    @test 1 == 1
+                end
+
+                @testset "Child 1.2" begin
+                    @test 1 == 1
+                end
+
+                @testset "Child 1.3" begin
+                    @test 1 == 1
+                end
+            end
+
+            @testset "Child 2" begin
+                @testset "Child 2.1" begin
+                    @test 1 == 1
+                end
+
+                @testset "Child 2.2" begin
+                    @test 1 == 1
+                end
+
+                @testset "Child 2.3" begin
+                    @test 1 == 1
+                end
+            end
+
+            @testset "Child 3" verbose = true begin
+                @testset "Child 3.1" begin
+                    @test 1 == 1
+                end
+
+                @testset "Child 3.2" begin
+                    @test 1 == 1
+                end
+
+                @testset "Child 3.3" begin
+                    @test 1 == 1
+                end
+            end
+        end
+        """)
+        cmd    = `$(Base.julia_cmd()) --startup-file=no --color=no $f`
+        result = read(pipeline(ignorestatus(cmd), stderr=devnull), String)
+        @test occursin(expected, result)
+    end
+end
+
+# Non-booleans in @test (#35888)
+struct T35888 end
+Base.isequal(::T35888, ::T35888) = T35888()
+Base.:!(::T35888) = missing
+let errors = @testset NoThrowTestSet begin
+        # 1 - evaluates to non-Boolean
+        @test missing
+        # 2 - evaluates to non-Boolean
+        @test !missing
+        # 3 - evaluates to non-Boolean
+        @test isequal(5)
+        # 4 - evaluates to non-Boolean
+        @test !isequal(5)
+        # 5 - evaluates to non-Boolean
+        @test isequal(T35888(), T35888())
+        # 6 - evaluates to non-Boolean
+        @test !isequal(T35888(), T35888())
+        # 7 - evaluates to non-Boolean
+        @test 1 < 2 < missing
+        # 8 - evaluates to non-Boolean
+        @test !(1 < 2 < missing)
+        # 9 - TypeError in chained comparison
+        @test 1 < 2 < missing < 4
+        # 10 - TypeError in chained comparison
+        @test !(1 < 2 < missing < 4)
+    end
+
+    for err in errors
+        @test err isa Test.Error
+    end
+
+    let str = sprint(show, errors[1])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: missing", str)
+        @test occursin("Value: missing", str)
+    end
+
+    let str = sprint(show, errors[2])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: !missing", str)
+        @test occursin("Value: missing", str)
+    end
+
+    let str = sprint(show, errors[3])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: isequal(5)", str)
+    end
+
+    let str = sprint(show, errors[4])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: !(isequal(5))", str)
+    end
+
+    let str = sprint(show, errors[5])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: isequal(T35888(), T35888())", str)
+        @test occursin("Value: $T35888()", str)
+    end
+
+    let str = sprint(show, errors[6])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: !(isequal(T35888(), T35888()))", str)
+        @test occursin("Value: missing", str)
+    end
+
+    let str = sprint(show, errors[7])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: 1 < 2 < missing", str)
+        @test occursin("Value: missing", str)
+    end
+
+    let str = sprint(show, errors[8])
+        @test occursin("Expression evaluated to non-Boolean", str)
+        @test occursin("Expression: !(1 < 2 < missing)", str)
+        @test occursin("Value: missing", str)
+    end
+
+    let str = sprint(show, errors[9])
+        @test occursin("TypeError: non-boolean (Missing) used in boolean context", str)
+        @test occursin("Expression: 1 < 2 < missing < 4", str)
+    end
+
+    let str = sprint(show, errors[10])
+        @test occursin("TypeError: non-boolean (Missing) used in boolean context", str)
+        @test occursin("Expression: !(1 < 2 < missing < 4)", str)
+    end
+end
+
+macro test_macro_throw_1()
+    throw(ErrorException("Real error"))
+end
+macro test_macro_throw_2()
+    throw(LoadError("file", 111, ErrorException("Real error")))
+end
+
+@testset "Soft deprecation of @test_throws LoadError [@]macroexpand[1]" begin
+    # If a macroexpand was detected, undecorated LoadErrors can stand in for any error.
+    # This will throw a deprecation warning.
+    @test_deprecated (@test_throws LoadError macroexpand(@__MODULE__, :(@test_macro_throw_1)))
+    @test_deprecated (@test_throws LoadError @macroexpand @test_macro_throw_1)
+    # Decorated LoadErrors are unwrapped if the actual exception matches the inner, but not the outer, exception, regardless of whether or not a macroexpand is detected.
+    # This will not throw a deprecation warning.
+    @test_throws LoadError("file", 111, ErrorException("Real error")) macroexpand(@__MODULE__, :(@test_macro_throw_1))
+    @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_1
+    # Decorated LoadErrors are not unwrapped if a LoadError was thrown.
+    @test_throws LoadError("file", 111, ErrorException("Real error")) @macroexpand @test_macro_throw_2
+end
+
+# Issue 25483
+mutable struct PassInformationTestSet <: Test.AbstractTestSet
+    results::Vector
+    PassInformationTestSet(desc) = new([])
+end
+Test.record(ts::PassInformationTestSet, t::Test.Result) = (push!(ts.results, t); t)
+Test.finish(ts::PassInformationTestSet) = ts
+@testset "Information in Pass result (Issue 25483)" begin
+    ts = @testset PassInformationTestSet begin
+        @test 1 == 1
+        @test_throws ErrorException throw(ErrorException("Msg"))
+    end
+    test_line_number = (@__LINE__) - 3
+    test_throws_line_number =  (@__LINE__) - 3
+    @test ts.results[1].test_type == :test
+    @test ts.results[1].orig_expr == :(1 == 1)
+    @test ts.results[1].data == Expr(:comparison, 1, :(==), 1)
+    @test ts.results[1].value == true
+    @test ts.results[1].source == LineNumberNode(test_line_number, @__FILE__)
+    @test ts.results[2].test_type == :test_throws
+    @test ts.results[2].orig_expr == :(throw(ErrorException("Msg")))
+    @test ts.results[2].data == ErrorException
+    @test ts.results[2].value == ErrorException("Msg")
+    @test ts.results[2].source == LineNumberNode(test_throws_line_number, @__FILE__)
+end
+
+let
+    f(x) = @test isone(x)
+    function h(x)
+        @testset f(x)
+        @testset "success" begin @test true end
+        @testset for i in 1:3
+            @test !iszero(i)
+        end
+    end
+    tret = @testset h(1)
+    tdesc = @testset "description" h(1)
+    @testset "Function calls" begin
+        @test tret.description == "h"
+        @test tdesc.description == "description"
+        @test length(tret.results) == 5
+        @test tret.results[1].description == "f"
+        @test tret.results[2].description == "success"
+        for i in 1:3
+            @test tret.results[2+i].description == "i = $i"
+        end
+    end
+end
diff --git a/stdlib/Test/test/test_pop_testset_exec.jl b/stdlib/Test/test/test_pop_testset_exec.jl
index 3c5fde63f4ad42..858e9f28d024eb 100644
--- a/stdlib/Test/test/test_pop_testset_exec.jl
+++ b/stdlib/Test/test/test_pop_testset_exec.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 
 @testset begin
diff --git a/stdlib/UUIDs/src/UUIDs.jl b/stdlib/UUIDs/src/UUIDs.jl
index 7fb83a393be004..41d5319fec24d1 100644
--- a/stdlib/UUIDs/src/UUIDs.jl
+++ b/stdlib/UUIDs/src/UUIDs.jl
@@ -102,7 +102,7 @@ detail that may change in the future.
 julia> rng = MersenneTwister(1234);
 
 julia> uuid4(rng)
-UUID("196f2941-2d58-45ba-9f13-43a2532b2fa8")
+UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
 ```
 """
 function uuid4(rng::AbstractRNG=Random.RandomDevice())
@@ -126,10 +126,10 @@ as specified by RFC 4122.
 julia> rng = MersenneTwister(1234);
 
 julia> u4 = uuid4(rng)
-UUID("196f2941-2d58-45ba-9f13-43a2532b2fa8")
+UUID("7a052949-c101-4ca3-9a7e-43a2532b2fa8")
 
 julia> u5 = uuid5(u4, "julia")
-UUID("b37756f8-b0c0-54cd-a466-19b3d25683bc")
+UUID("086cc5bb-2461-57d8-8068-0aed7f5b5cd1")
 ```
 """
 function uuid5(ns::UUID, name::String)
diff --git a/stdlib/Unicode/docs/src/index.md b/stdlib/Unicode/docs/src/index.md
index aba9d80c3e8b54..2771c8a9f01cce 100644
--- a/stdlib/Unicode/docs/src/index.md
+++ b/stdlib/Unicode/docs/src/index.md
@@ -1,7 +1,9 @@
 # Unicode
 
 ```@docs
+Unicode.julia_chartransform
 Unicode.isassigned
+Unicode.isequal_normalized
 Unicode.normalize
 Unicode.graphemes
 ```
diff --git a/stdlib/Unicode/src/Unicode.jl b/stdlib/Unicode/src/Unicode.jl
index 7ac3a9f9b1d4bd..0467a8d50aa6bc 100644
--- a/stdlib/Unicode/src/Unicode.jl
+++ b/stdlib/Unicode/src/Unicode.jl
@@ -2,7 +2,51 @@
 
 module Unicode
 
-export graphemes
+export graphemes, isequal_normalized
+
+"""
+    Unicode.julia_chartransform(c::Union{Char,Integer})
+
+Map the Unicode character (`Char`) or codepoint (`Integer`) `c` to the corresponding
+"equivalent" character or codepoint, respectively, according to the custom equivalence
+used within the Julia parser (in addition to NFC normalization).
+
+For example, `'µ'` (U+00B5 micro) is treated as equivalent to `'μ'` (U+03BC mu) by
+Julia's parser, so `julia_chartransform` performs this transformation while leaving
+other characters unchanged:
+```jldoctest
+julia> Unicode.julia_chartransform('\u00B5')
+'μ': Unicode U+03BC (category Ll: Letter, lowercase)
+
+julia> Unicode.julia_chartransform('x')
+'x': ASCII/Unicode U+0078 (category Ll: Letter, lowercase)
+```
+
+`julia_chartransform` is mainly useful for passing to the [`Unicode.normalize`](@ref)
+function in order to mimic the normalization used by the Julia parser:
+```jldoctest
+julia> s = "\u00B5o\u0308"
+"µö"
+
+julia> s2 = Unicode.normalize(s, compose=true, stable=true, chartransform=Unicode.julia_chartransform)
+"μö"
+
+julia> collect(s2)
+2-element Vector{Char}:
+ 'μ': Unicode U+03BC (category Ll: Letter, lowercase)
+ 'ö': Unicode U+00F6 (category Ll: Letter, lowercase)
+
+julia> s2 == string(Meta.parse(s))
+true
+```
+
+!!! compat "Julia 1.8"
+    This function was introduced in Julia 1.8.
+"""
+function julia_chartransform end
+julia_chartransform(codepoint::UInt32) = get(Base.Unicode._julia_charmap, codepoint, codepoint)
+julia_chartransform(codepoint::Integer) = julia_chartransform(UInt32(codepoint))
+julia_chartransform(char::Char) = Char(julia_chartransform(UInt32(char)))
 
 """
     Unicode.normalize(s::AbstractString; keywords...)
@@ -42,6 +86,13 @@ options (which all default to `false` except for `compose`) are specified:
 * `rejectna=true`: throw an error if unassigned code points are found
 * `stable=true`: enforce Unicode versioning stability (never introduce characters missing from earlier Unicode versions)
 
+You can also use the `chartransform` keyword (which defaults to `identity`) to pass an arbitrary
+*function* mapping `Integer` codepoints to codepoints, which is is called on each
+character in `s` as it is processed, in order to perform arbitrary additional normalizations.
+For example, by passing `chartransform=Unicode.julia_chartransform`, you can apply a few Julia-specific
+character normalizations that are performed by Julia when parsing identifiers (in addition to
+NFC normalization: `compose=true, stable=true`).
+
 For example, NFKC corresponds to the options `compose=true, compat=true, stable=true`.
 
 # Examples
@@ -58,6 +109,9 @@ julia> Unicode.normalize("JuLiA", casefold=true)
 julia> Unicode.normalize("JúLiA", stripmark=true)
 "JuLiA"
 ```
+
+!!! compat "Julia 1.8"
+    The `chartransform` keyword argument requires Julia 1.8.
 """
 function normalize end
 normalize(s::AbstractString, nf::Symbol) = Base.Unicode.normalize(s, nf)
@@ -89,4 +143,141 @@ letter combined with an accent mark is a single grapheme.)
 """
 graphemes(s::AbstractString) = Base.Unicode.GraphemeIterator{typeof(s)}(s)
 
+"""
+    graphemes(s::AbstractString, m:n) -> SubString
+
+Returns a [`SubString`](@ref) of `s` consisting of the `m`-th
+through `n`-th graphemes of the string `s`, where the second
+argument `m:n` is an integer-valued [`AbstractUnitRange`](@ref).
+
+Loosely speaking, this corresponds to the `m:n`-th user-perceived
+"characters" in the string.  For example:
+
+```jldoctest
+julia> s = graphemes("exposé", 3:6)
+"posé"
+
+julia> collect(s)
+5-element Vector{Char}:
+ 'p': ASCII/Unicode U+0070 (category Ll: Letter, lowercase)
+ 'o': ASCII/Unicode U+006F (category Ll: Letter, lowercase)
+ 's': ASCII/Unicode U+0073 (category Ll: Letter, lowercase)
+ 'e': ASCII/Unicode U+0065 (category Ll: Letter, lowercase)
+ '́': Unicode U+0301 (category Mn: Mark, nonspacing)
+```
+This consists of the 3rd to *7th* codepoints ([`Char`](@ref)s) in `"exposé"`,
+because the grapheme `"é"` is actually *two* Unicode codepoints
+(an `'e'` followed by an acute-accent combining character U+0301).
+
+Because finding grapheme boundaries requires iteration over the
+string contents, the `graphemes(s, m:n)` function requires time
+proportional to the length of the string (number of codepoints)
+before the end of the substring.
+
+!!! compat "Julia 1.9"
+    The `m:n` argument of `graphemes` requires Julia 1.9.
+"""
+function graphemes(s::AbstractString, r::AbstractUnitRange{<:Integer})
+    m, n = Int(first(r)), Int(last(r))
+    m > 0 || throw(ArgumentError("starting index $m is not ≥ 1"))
+    n < m && return @view s[1:0]
+    c0 = eltype(s)(0x00000000)
+    state = Ref{Int32}(0)
+    count = 0
+    i, iprev, ilast = 1, 1, lastindex(s)
+    # find the start of the m-th grapheme
+    while i ≤ ilast && count < m
+        @inbounds c = s[i]
+        count += Base.Unicode.isgraphemebreak!(state, c0, c)
+        c0 = c
+        i, iprev = nextind(s, i), i
+    end
+    start = iprev
+    count < m && throw(BoundsError(s, i))
+    # find the end of the n-th grapheme
+    while i ≤ ilast
+        @inbounds c = s[i]
+        count += Base.Unicode.isgraphemebreak!(state, c0, c)
+        count > n && break
+        c0 = c
+        i, iprev = nextind(s, i), i
+    end
+    count < n && throw(BoundsError(s, i))
+    return @view s[start:iprev]
+end
+
+using Base.Unicode: utf8proc_error, UTF8PROC_DECOMPOSE, UTF8PROC_CASEFOLD, UTF8PROC_STRIPMARK
+
+function _decompose_char!(codepoint::Union{Integer,Char}, dest::Vector{UInt32}, options::Integer)
+    ret = @ccall utf8proc_decompose_char(codepoint::UInt32, dest::Ptr{UInt32}, length(dest)::Int, options::Cint, C_NULL::Ptr{Cint})::Int
+    ret < 0 && utf8proc_error(ret)
+    return ret
+end
+
+"""
+    isequal_normalized(s1::AbstractString, s2::AbstractString; casefold=false, stripmark=false, chartransform=identity)
+
+Return whether `s1` and `s2` are canonically equivalent Unicode strings.   If `casefold=true`,
+ignores case (performs Unicode case-folding); if `stripmark=true`, strips diacritical marks
+and other combining characters.
+
+As with [`Unicode.normalize`](@ref), you can also pass an arbitrary
+function via the `chartransform` keyword (mapping `Integer` codepoints to codepoints)
+to perform custom normalizations, such as [`Unicode.julia_chartransform`](@ref).
+
+# Examples
+
+For example, the string `"noël"` can be constructed in two canonically equivalent ways
+in Unicode, depending on whether `"ë"` is formed from a single codepoint U+00EB or
+from the ASCII character `'o'` followed by the U+0308 combining-diaeresis character.
+
+```jldoctest
+julia> s1 = "no\u00EBl"
+"noël"
+
+julia> s2 = "noe\u0308l"
+"noël"
+
+julia> s1 == s2
+false
+
+julia> isequal_normalized(s1, s2)
+true
+
+julia> isequal_normalized(s1, "noel", stripmark=true)
+true
+
+julia> isequal_normalized(s1, "NOËL", casefold=true)
+true
+```
+"""
+function isequal_normalized(s1::AbstractString, s2::AbstractString; casefold::Bool=false, stripmark::Bool=false, chartransform=identity)
+    function decompose_next_char!(c, state, d, options, s)
+        n = _decompose_char!(c, d, options)
+        if n > length(d) # may be possible in future Unicode versions?
+            n = _decompose_char!(c, resize!(d, n), options)
+        end
+        return 1, n, iterate(s, state)
+    end
+    options = UTF8PROC_DECOMPOSE
+    casefold && (options |= UTF8PROC_CASEFOLD)
+    stripmark && (options |= UTF8PROC_STRIPMARK)
+    i1,i2 = iterate(s1),iterate(s2)
+    d1,d2 = Vector{UInt32}(undef, 4), Vector{UInt32}(undef, 4) # codepoint buffers
+    n1 = n2 = 0 # lengths of codepoint buffers
+    j1 = j2 = 1 # indices in d1, d2
+    while true
+        if j1 > n1
+            i1 === nothing && return i2 === nothing && j2 > n2
+            j1, n1, i1 = decompose_next_char!(chartransform(UInt32(i1[1])), i1[2], d1, options, s1)
+        end
+        if j2 > n2
+            i2 === nothing && return false
+            j2, n2, i2 = decompose_next_char!(chartransform(UInt32(i2[1])), i2[2], d2, options, s2)
+        end
+        d1[j1] == d2[j2] || return false
+        j1 += 1; j2 += 1
+    end
+end
+
 end
diff --git a/stdlib/Unicode/test/runtests.jl b/stdlib/Unicode/test/runtests.jl
index aa4fe6e01e5750..5c5a75b33e3635 100644
--- a/stdlib/Unicode/test/runtests.jl
+++ b/stdlib/Unicode/test/runtests.jl
@@ -2,11 +2,11 @@
 
 using Test
 using Unicode
-using Unicode: normalize, isassigned
+using Unicode: normalize, isassigned, julia_chartransform
 
 @testset "string normalization" begin
     # normalize (Unicode normalization etc.):
-    @test normalize("\u006e\u0303", :NFC) == "\u00f1"
+    @test normalize("\u006e\u0303", :NFC) == "\u00f1" == normalize(SubString("ab\u006e\u0303cd",3,4), :NFC)
     @test "\u006e\u0303" == normalize("\u00f1", :NFD)
     @test normalize("\ufb00", :NFC) != "ff"
     @test normalize("\ufb00", :NFKC) == "ff"
@@ -25,6 +25,11 @@ using Unicode: normalize, isassigned
     @test normalize("\t\r", stripcc=true) == "  "
     @test normalize("\t\r", stripcc=true, newline2ls=true) == " \u2028"
     @test normalize("\u0072\u0307\u0323", :NFC) == "\u1E5B\u0307" #26917
+
+    # julia_chartransform identifier normalization
+    @test normalize("julia\u025B\u00B5\u00B7\u0387\u2212", chartransform=julia_chartransform) ==
+        "julia\u03B5\u03BC\u22C5\u22C5\u002D"
+    @test julia_chartransform('\u00B5') === '\u03BC'
 end
 
 @testset "unicode sa#15" begin
@@ -93,7 +98,7 @@ end
 @testset "#5939 uft8proc character predicates" begin
     alower=['a', 'd', 'j', 'y', 'z']
     ulower=['α', 'β', 'γ', 'δ', 'ф', 'я']
-    for c in vcat(alower,ulower)
+    for c in vcat(alower,ulower,['ª'])
         @test islowercase(c) == true
         @test isuppercase(c) == false
         @test isdigit(c) == false
@@ -101,17 +106,20 @@ end
     end
 
     aupper=['A', 'D', 'J', 'Y', 'Z']
-    uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'ǅ', 'Ж', 'Д']
+    uupper= ['Δ', 'Γ', 'Π', 'Ψ', 'Ж', 'Д']
 
-    for c in vcat(aupper,uupper)
+    for c in vcat(aupper,uupper,['Ⓐ'])
         @test islowercase(c) == false
         @test isuppercase(c) == true
         @test isdigit(c) == false
         @test isnumeric(c) == false
     end
 
+    @test !isuppercase('ǅ') # titlecase is not uppercase
+    @test Base.Unicode.iscased('ǅ') # but is "cased"
+
     nocase=['א','ﺵ']
-    alphas=vcat(alower,ulower,aupper,uupper,nocase)
+    alphas=vcat(alower,ulower,aupper,uupper,nocase,['ǅ'])
 
     for c in alphas
         @test isletter(c) == true
@@ -260,6 +268,19 @@ end
             end
         end
     end
+
+    @test Base.Unicode.isgraphemebreak('α', 'β')
+    @test !Base.Unicode.isgraphemebreak('α', '\u0302')
+
+    for pre in ("","ä"), post in ("","x̂")
+        prelen = length(graphemes(pre))
+        @test graphemes(pre * "öü" * post, (1:2) .+ prelen) == "öü"
+        @test graphemes(pre * "ö" * post, (1:1) .+ prelen) == "ö"
+    end
+    @test graphemes("äöüx", 6:5)::SubString{String} == ""
+    @test_throws BoundsError graphemes("äöüx", 2:5)
+    @test_throws BoundsError graphemes("äöüx", 5:5)
+    @test_throws ArgumentError graphemes("äöüx", 0:1)
 end
 
 @testset "#3721, #6939 up-to-date character widths" begin
@@ -330,6 +351,12 @@ end
     @test collect(g) == ["1","2","3","α","5"]
 end
 
+@testset "#37680: initial graphemes" begin
+    @test collect(graphemes("🤦🏼‍♂️")) == ["🤦🏼‍♂️"]
+    @test collect(graphemes("👨🏻‍🤝‍👨🏽")) == ["👨🏻‍🤝‍👨🏽"]
+    @test collect(graphemes("🇸🇪🇸🇪")) == ["🇸🇪","🇸🇪"]
+end
+
 @testset "uppercasefirst/lowercasefirst" begin
     @test uppercasefirst("Hola")=="Hola"
     @test uppercasefirst("hola")=="Hola"
@@ -379,6 +406,7 @@ end
         @test titlecase("abc-def")                     == "Abc-Def"
         @test titlecase("abc-def", wordsep = !Base.Unicode.iscased) == "Abc-Def"
         @test titlecase("abc-def", wordsep = isspace)  == "Abc-def"
+        @test titlecase("bôrked") == "Bôrked"
     end
 end
 
@@ -404,3 +432,38 @@ end
     @test prod(["*" for i in 1:3]) == "***"
     @test prod(["*" for i in 1:0]) == ""
 end
+
+@testset "Grapheme breaks and iterator" begin
+    u1 = reinterpret(Char, UInt32(0xc0) << 24)
+    u2 = reinterpret(Char, UInt32(0xc1) << 24)
+
+    overlong_uint =  UInt32(0xc0) << 24
+    overlong_char = reinterpret(Char, overlong_uint)
+
+    state = Ref(Int32(1))
+    @test Base.Unicode.isgraphemebreak(u1, u2)
+    @test Base.Unicode.isgraphemebreak!(state, u1, u2)
+    @test state[] == 0
+
+    @test_throws(
+        ErrorException("An unknown error occurred while processing UTF-8 data."),
+        Base.Unicode.utf8proc_error(2)
+    )
+    gi = Base.Unicode.graphemes("This is a string")
+    @test gi isa Base.Unicode.GraphemeIterator{String}
+    @test Base.Unicode.isvalid(Char, 'c')
+    @test !Base.Unicode.isvalid(Char, overlong_char)
+end
+
+@testset "Unicode equivalence" begin
+    @test isequal_normalized("no\u00EBl", "noe\u0308l")
+    @test !isequal_normalized("no\u00EBl", "noe\u0308l ")
+    @test isequal_normalized("", "")
+    @test !isequal_normalized("", " ")
+    @test !isequal_normalized("no\u00EBl", "NOËL")
+    @test isequal_normalized("no\u00EBl", "NOËL", casefold=true)
+    @test !isequal_normalized("no\u00EBl", "noel")
+    @test isequal_normalized("no\u00EBl", "noel", stripmark=true)
+    @test isequal_normalized("no\u00EBl", "NOEL", stripmark=true, casefold=true)
+    @test isequal_normalized("\u00B5\u0302m", "\u03BC\u0302m", chartransform=julia_chartransform)
+end
diff --git a/stdlib/Zlib_jll/Project.toml b/stdlib/Zlib_jll/Project.toml
new file mode 100644
index 00000000000000..77e1da5f9c22e8
--- /dev/null
+++ b/stdlib/Zlib_jll/Project.toml
@@ -0,0 +1,15 @@
+name = "Zlib_jll"
+uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
+version = "1.2.12+3"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/Zlib_jll/src/Zlib_jll.jl b/stdlib/Zlib_jll/src/Zlib_jll.jl
new file mode 100644
index 00000000000000..c05e26c4c6993f
--- /dev/null
+++ b/stdlib/Zlib_jll/src/Zlib_jll.jl
@@ -0,0 +1,45 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/Zlib_jll.jl
+baremodule Zlib_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libz
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libz_handle = C_NULL
+libz_path = ""
+
+if Sys.iswindows()
+    const libz = "libz.dll"
+elseif Sys.isapple()
+    const libz = "@rpath/libz.1.dylib"
+else
+    const libz = "libz.so.1"
+end
+
+function __init__()
+    global libz_handle = dlopen(libz)
+    global libz_path = dlpath(libz_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libz_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libz_path() = libz_path
+
+end  # module Zlib_jll
diff --git a/stdlib/Zlib_jll/test/runtests.jl b/stdlib/Zlib_jll/test/runtests.jl
new file mode 100644
index 00000000000000..cc9e64188a0aa6
--- /dev/null
+++ b/stdlib/Zlib_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Zlib_jll
+
+@testset "Zlib_jll" begin
+    @test VersionNumber(unsafe_string(ccall((:zlibVersion, libz), Cstring, ()))) == v"1.2.12"
+end
diff --git a/stdlib/dSFMT_jll/Project.toml b/stdlib/dSFMT_jll/Project.toml
new file mode 100644
index 00000000000000..4e3e80f918f0b8
--- /dev/null
+++ b/stdlib/dSFMT_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "dSFMT_jll"
+uuid = "05ff407c-b0c1-5878-9df8-858cc2e60c36"
+version = "2.2.4+1"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/dSFMT_jll/src/dSFMT_jll.jl b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
new file mode 100644
index 00000000000000..f1d6d019faf592
--- /dev/null
+++ b/stdlib/dSFMT_jll/src/dSFMT_jll.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/dSFMT_jll.jl
+
+baremodule dSFMT_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libdSFMT
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libdSFMT_handle = C_NULL
+libdSFMT_path = ""
+
+if Sys.iswindows()
+    const libdSFMT = "libdSFMT.dll"
+elseif Sys.isapple()
+    const libdSFMT = "@rpath/libdSFMT.dylib"
+else
+    const libdSFMT = "libdSFMT.so"
+end
+
+function __init__()
+    global libdSFMT_handle = dlopen(libdSFMT)
+    global libdSFMT_path = dlpath(libdSFMT_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libdSFMT_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libdSFMT_path() = libdSFMT_path
+
+end  # module dSFMT_jll
diff --git a/stdlib/dSFMT_jll/test/runtests.jl b/stdlib/dSFMT_jll/test/runtests.jl
new file mode 100644
index 00000000000000..be311ec1a25250
--- /dev/null
+++ b/stdlib/dSFMT_jll/test/runtests.jl
@@ -0,0 +1,8 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, dSFMT_jll
+
+@testset "dSFMT_jll" begin
+    idstring = ccall((:dsfmt_get_idstring, libdSFMT), Ptr{UInt8}, ())
+    @test startswith(unsafe_string(idstring), "dSFMT2-")
+end
diff --git a/stdlib/libLLVM_jll/Project.toml b/stdlib/libLLVM_jll/Project.toml
new file mode 100644
index 00000000000000..64de5adc434bae
--- /dev/null
+++ b/stdlib/libLLVM_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "libLLVM_jll"
+uuid = "8f36deef-c2a5-5394-99ed-8e07531fb29a"
+version = "13.0.1+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.8"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/libLLVM_jll/src/libLLVM_jll.jl b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
new file mode 100644
index 00000000000000..09e01207ec9d63
--- /dev/null
+++ b/stdlib/libLLVM_jll/src/libLLVM_jll.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/libLLVM_jll.jl
+
+baremodule libLLVM_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libLLVM
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libLLVM_handle = C_NULL
+libLLVM_path = ""
+
+if Sys.iswindows()
+    const libLLVM = "libLLVM-13jl.dll"
+elseif Sys.isapple()
+    const libLLVM = "@rpath/libLLVM.dylib"
+else
+    const libLLVM = "libLLVM-13jl.so"
+end
+
+function __init__()
+    global libLLVM_handle = dlopen(libLLVM)
+    global libLLVM_path = dlpath(libLLVM_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libLLVM_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libLLVM_path() = libLLVM_path
+
+end  # module libLLVM_jll
diff --git a/stdlib/libLLVM_jll/test/runtests.jl b/stdlib/libLLVM_jll/test/runtests.jl
new file mode 100644
index 00000000000000..ea678108ae012a
--- /dev/null
+++ b/stdlib/libLLVM_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, libLLVM_jll
+
+@testset "libLLVM_jll" begin
+    @test dlsym(libLLVM_jll.libLLVM_handle, :LLVMInitializeTarget; throw_error=false) !== nothing
+end
diff --git a/stdlib/libblastrampoline_jll/Project.toml b/stdlib/libblastrampoline_jll/Project.toml
new file mode 100644
index 00000000000000..44dd330f000a6f
--- /dev/null
+++ b/stdlib/libblastrampoline_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "libblastrampoline_jll"
+uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
+version = "5.1.0+0"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.8"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
new file mode 100644
index 00000000000000..77882067ed633f
--- /dev/null
+++ b/stdlib/libblastrampoline_jll/src/libblastrampoline_jll.jl
@@ -0,0 +1,46 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/libblastrampoline_jll.jl
+
+baremodule libblastrampoline_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libblastrampoline
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libblastrampoline_handle = C_NULL
+libblastrampoline_path = ""
+
+const libblastrampoline = if Sys.iswindows()
+    "libblastrampoline.dll"
+elseif Sys.isapple()
+    "@rpath/libblastrampoline.dylib"
+else
+    "libblastrampoline.so"
+end
+
+function __init__()
+    global libblastrampoline_handle = dlopen(libblastrampoline)
+    global libblastrampoline_path = dlpath(libblastrampoline_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libblastrampoline_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libblastrampoline_path() = libblastrampoline_path
+
+end  # module libblastrampoline_jll
diff --git a/stdlib/libblastrampoline_jll/test/runtests.jl b/stdlib/libblastrampoline_jll/test/runtests.jl
new file mode 100644
index 00000000000000..80095e70f0c76c
--- /dev/null
+++ b/stdlib/libblastrampoline_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, libblastrampoline_jll
+
+@testset "libblastrampoline_jll" begin
+    @test isa(Libdl.dlsym(Libdl.dlopen(:libblastrampoline), :dgemm_64_), Ptr{Nothing})
+end
diff --git a/stdlib/nghttp2_jll/Project.toml b/stdlib/nghttp2_jll/Project.toml
new file mode 100644
index 00000000000000..3051afe57d23a9
--- /dev/null
+++ b/stdlib/nghttp2_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "nghttp2_jll"
+uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
+version = "1.41.0+1"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/nghttp2_jll/src/nghttp2_jll.jl b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
new file mode 100644
index 00000000000000..09af3506369430
--- /dev/null
+++ b/stdlib/nghttp2_jll/src/nghttp2_jll.jl
@@ -0,0 +1,45 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/nghttp2_jll.jl
+baremodule nghttp2_jll
+using Base, Libdl
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export libnghttp2
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+libnghttp2_handle = C_NULL
+libnghttp2_path = ""
+
+if Sys.iswindows()
+    const libnghttp2 = "libnghttp2-14.dll"
+elseif Sys.isapple()
+    const libnghttp2 = "@rpath/libnghttp2.14.dylib"
+else
+    const libnghttp2 = "libnghttp2.so.14"
+end
+
+function __init__()
+    global libnghttp2_handle = dlopen(libnghttp2)
+    global libnghttp2_path = dlpath(libnghttp2_handle)
+    global artifact_dir = dirname(Sys.BINDIR)
+    LIBPATH[] = dirname(libnghttp2_path)
+    push!(LIBPATH_list, LIBPATH[])
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+get_libnghttp2_path() = libnghttp2_path
+
+end  # module nghttp2_jll
diff --git a/stdlib/nghttp2_jll/test/runtests.jl b/stdlib/nghttp2_jll/test/runtests.jl
new file mode 100644
index 00000000000000..07e0a3b8c7730c
--- /dev/null
+++ b/stdlib/nghttp2_jll/test/runtests.jl
@@ -0,0 +1,15 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, nghttp2_jll
+
+struct nghttp2_info
+    age::Cint
+    version_num::Cint
+    version_str::Cstring
+    proto_str::Cstring
+end
+
+@testset "nghttp2_jll" begin
+    info = unsafe_load(ccall((:nghttp2_version,libnghttp2), Ptr{nghttp2_info}, (Cint,), 0))
+    @test VersionNumber(unsafe_string(info.version_str)) == v"1.41.0"
+end
diff --git a/stdlib/p7zip_jll/Project.toml b/stdlib/p7zip_jll/Project.toml
new file mode 100644
index 00000000000000..75e04b6362fdf9
--- /dev/null
+++ b/stdlib/p7zip_jll/Project.toml
@@ -0,0 +1,16 @@
+name = "p7zip_jll"
+uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
+version = "16.2.1+1"
+
+[deps]
+Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
+Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
+
+[compat]
+julia = "1.6"
+
+[extras]
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+
+[targets]
+test = ["Test"]
diff --git a/stdlib/p7zip_jll/src/p7zip_jll.jl b/stdlib/p7zip_jll/src/p7zip_jll.jl
new file mode 100644
index 00000000000000..4320003b282f71
--- /dev/null
+++ b/stdlib/p7zip_jll/src/p7zip_jll.jl
@@ -0,0 +1,100 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+## dummy stub for https://github.com/JuliaBinaryWrappers/p7zip_jll.jl
+baremodule p7zip_jll
+using Base
+Base.Experimental.@compiler_options compile=min optimize=0 infer=false
+
+const PATH_list = String[]
+const LIBPATH_list = String[]
+
+export p7zip
+
+# These get calculated in __init__()
+const PATH = Ref("")
+const LIBPATH = Ref("")
+artifact_dir = ""
+p7zip_path = ""
+if Sys.iswindows()
+    const p7zip_exe = "7z.exe"
+else
+    const p7zip_exe = "7z"
+end
+
+if Sys.iswindows()
+    const LIBPATH_env = "PATH"
+    const LIBPATH_default = ""
+    const pathsep = ';'
+elseif Sys.isapple()
+    const LIBPATH_env = "DYLD_FALLBACK_LIBRARY_PATH"
+    const LIBPATH_default = "~/lib:/usr/local/lib:/lib:/usr/lib"
+    const pathsep = ':'
+else
+    const LIBPATH_env = "LD_LIBRARY_PATH"
+    const LIBPATH_default = ""
+    const pathsep = ':'
+end
+
+function adjust_ENV!(env::Dict{keytype(Base.EnvDict),valtype(Base.EnvDict)}, PATH::String, LIBPATH::String, adjust_PATH::Bool, adjust_LIBPATH::Bool)
+    if adjust_LIBPATH
+        LIBPATH_base = get(env, LIBPATH_env, expanduser(LIBPATH_default))
+        if !isempty(LIBPATH_base)
+            env[LIBPATH_env] = string(LIBPATH, pathsep, LIBPATH_base)
+        else
+            env[LIBPATH_env] = LIBPATH
+        end
+    end
+    if adjust_PATH && (LIBPATH_env != "PATH" || !adjust_LIBPATH)
+        if adjust_PATH
+            if !isempty(get(env, "PATH", ""))
+                env["PATH"] = string(PATH, pathsep, env["PATH"])
+            else
+                env["PATH"] = PATH
+            end
+        end
+    end
+    return env
+end
+
+function p7zip(f::Function; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    withenv(env...) do
+        return f(p7zip_path)
+    end
+end
+function p7zip(; adjust_PATH::Bool = true, adjust_LIBPATH::Bool = true)
+    env = adjust_ENV!(copy(ENV), PATH[], LIBPATH[], adjust_PATH, adjust_LIBPATH)
+    return Cmd(Cmd([p7zip_path]); env)
+end
+
+function init_p7zip_path()
+    # Prefer our own bundled p7zip, but if we don't have one, pick it up off of the PATH
+    # If this is an in-tree build, `7z` will live in `bin`.  Otherwise, it'll be in `libexec`
+    for bundled_p7zip_path in (joinpath(Sys.BINDIR, Base.LIBEXECDIR, p7zip_exe),
+                               joinpath(Sys.BINDIR, p7zip_exe))
+        if isfile(bundled_p7zip_path)
+            global p7zip_path = abspath(bundled_p7zip_path)
+            return
+        end
+    end
+    global p7zip_path = something(Sys.which(p7zip_exe), p7zip_exe)
+end
+
+function __init__()
+    global artifact_dir = dirname(Sys.BINDIR)
+    init_p7zip_path()
+    PATH[] = dirname(p7zip_path)
+    push!(PATH_list, PATH[])
+    append!(LIBPATH_list, [joinpath(Sys.BINDIR, Base.LIBDIR, "julia"), joinpath(Sys.BINDIR, Base.LIBDIR)])
+    LIBPATH[] = join(LIBPATH_list, pathsep)
+end
+
+# JLLWrappers API compatibility shims.  Note that not all of these will really make sense.
+# For instance, `find_artifact_dir()` won't actually be the artifact directory, because
+# there isn't one.  It instead returns the overall Julia prefix.
+is_available() = true
+find_artifact_dir() = artifact_dir
+dev_jll() = error("stdlib JLLs cannot be dev'ed")
+best_wrapper = nothing
+
+end  # module p7zip_jll
diff --git a/stdlib/p7zip_jll/test/runtests.jl b/stdlib/p7zip_jll/test/runtests.jl
new file mode 100644
index 00000000000000..4497e85b74d891
--- /dev/null
+++ b/stdlib/p7zip_jll/test/runtests.jl
@@ -0,0 +1,7 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Libdl, p7zip_jll
+
+@testset "p7zip_jll" begin
+    @test isfile(p7zip_jll.p7zip_path)
+end
diff --git a/sysimage.mk b/sysimage.mk
index 1b78b247a408e3..2d154672d81306 100644
--- a/sysimage.mk
+++ b/sysimage.mk
@@ -15,7 +15,7 @@ VERSDIR := v`cut -d. -f1-2 < $(JULIAHOME)/VERSION`
 $(build_private_libdir)/%.$(SHLIB_EXT): $(build_private_libdir)/%-o.a
 	@$(call PRINT_LINK, $(CXX) $(LDFLAGS) -shared $(fPIC) -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -o $@ \
 		$(WHOLE_ARCHIVE) $< $(NO_WHOLE_ARCHIVE) \
-		$(if $(findstring -debug,$(notdir $@)),-ljulia-debug,-ljulia) \
+		$(if $(findstring -debug,$(notdir $@)),-ljulia-internal-debug -ljulia-debug,-ljulia-internal -ljulia) \
 		$$([ $(OS) = WINNT ] && echo '' -lssp))
 	@$(INSTALL_NAME_CMD)$(notdir $@) $@
 	@$(DSYMUTIL) $@
@@ -60,12 +60,13 @@ RELBUILDROOT := $(call rel_path,$(JULIAHOME)/base,$(BUILDROOT)/base)/ # <-- make
 $(build_private_libdir)/corecompiler.ji: $(COMPILER_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
 	$(call spawn,$(JULIA_EXECUTABLE)) -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp \
-		--startup-file=no --warn-overwrite=yes -g0 -O0 compiler/compiler.jl)
+		--startup-file=no --warn-overwrite=yes -g$(BOOTSTRAP_DEBUG_LEVEL) -O0 compiler/compiler.jl)
 	@mv $@.tmp $@
 
 $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAHOME)/VERSION $(BASE_SRCS) $(STDLIB_SRCS)
 	@$(call PRINT_JULIA, cd $(JULIAHOME)/base && \
-	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) $(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
+	if ! JULIA_BINDIR=$(call cygpath_w,$(build_bindir)) WINEPATH="$(call cygpath_w,$(build_bindir));$$WINEPATH" \
+			$(call spawn, $(JULIA_EXECUTABLE)) -g1 -O0 -C "$(JULIA_CPU_TARGET)" --output-ji $(call cygpath_w,$@).tmp $(JULIA_SYSIMG_BUILD_FLAGS) \
 			--startup-file=no --warn-overwrite=yes --sysimage $(call cygpath_w,$<) sysimg.jl $(RELBUILDROOT); then \
 		echo '*** This error might be fixed by running `make clean`. If the error persists$(COMMA) try `make cleanall`. ***'; \
 		false; \
@@ -75,8 +76,10 @@ $(build_private_libdir)/sys.ji: $(build_private_libdir)/corecompiler.ji $(JULIAH
 define sysimg_builder
 $$(build_private_libdir)/sys$1-o.a $$(build_private_libdir)/sys$1-bc.a : $$(build_private_libdir)/sys$1-%.a : $$(build_private_libdir)/sys.ji
 	@$$(call PRINT_JULIA, cd $$(JULIAHOME)/base && \
-	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) $$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
-		--startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
+	if ! JULIA_BINDIR=$$(call cygpath_w,$(build_bindir)) WINEPATH="$$(call cygpath_w,$$(build_bindir));$$$$WINEPATH" \
+			JULIA_NUM_THREADS=1 \
+			$$(call spawn, $3) $2 -C "$$(JULIA_CPU_TARGET)" --output-$$* $$(call cygpath_w,$$@).tmp $$(JULIA_SYSIMG_BUILD_FLAGS) \
+			--startup-file=no --warn-overwrite=yes --sysimage $$(call cygpath_w,$$<) $$(call cygpath_w,$$(JULIAHOME)/contrib/generate_precompile.jl) $(JULIA_PRECOMPILE); then \
 		echo '*** This error is usually fixed by running `make clean`. If the error persists$$(COMMA) try `make cleanall`. ***'; \
 		false; \
 	fi )
diff --git a/test/Makefile b/test/Makefile
index 77a71c6d684d8f..24e137a5b14925 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -7,7 +7,7 @@ STDLIBDIR := $(build_datarootdir)/julia/stdlib/$(VERSDIR)
 # TODO: this Makefile ignores BUILDDIR, except for computing JULIA_EXECUTABLE
 
 TESTGROUPS = unicode strings compiler
-TESTS = all stdlib $(TESTGROUPS) \
+TESTS = all default stdlib $(TESTGROUPS) \
 		$(patsubst $(STDLIBDIR)/%/,%,$(dir $(wildcard $(STDLIBDIR)/*/.))) \
 		$(filter-out runtests testdefs, \
 			$(patsubst $(SRCDIR)/%.jl,%,$(wildcard $(SRCDIR)/*.jl))) \
@@ -19,15 +19,15 @@ EMBEDDING_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/embedding" "CC=$(CC
 
 GCEXT_ARGS := "JULIA=$(JULIA_EXECUTABLE)" "BIN=$(SRCDIR)/gcext" "CC=$(CC)"
 
-default: all
+default:
 
 $(TESTS):
 	@cd $(SRCDIR) && \
-	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no ./runtests.jl $@)
+	$(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl $@)
 
 $(addprefix revise-, $(TESTS)): revise-% :
 	@cd $(SRCDIR) && \
-    $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no ./runtests.jl --revise $*)
+    $(call PRINT_JULIA, $(call spawn,$(JULIA_EXECUTABLE)) --check-bounds=yes --startup-file=no --depwarn=error ./runtests.jl --revise $*)
 
 embedding:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(EMBEDDING_ARGS)
@@ -35,8 +35,11 @@ embedding:
 gcext:
 	@$(MAKE) -C $(SRCDIR)/$@ check $(GCEXT_ARGS)
 
+clangsa:
+	@$(MAKE) -C $(SRCDIR)/$@
+
 clean:
 	@$(MAKE) -C embedding $@ $(EMBEDDING_ARGS)
 	@$(MAKE) -C gcext $@ $(GCEXT_ARGS)
 
-.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clean
+.PHONY: $(TESTS) $(addprefix revise-, $(TESTS)) embedding gcext clangsa clean
diff --git a/test/TestPkg/Project.toml b/test/TestPkg/Project.toml
index 0786722612bf30..0dfe48c3e9acb5 100644
--- a/test/TestPkg/Project.toml
+++ b/test/TestPkg/Project.toml
@@ -1,6 +1,6 @@
 name = "TestPkg"
 uuid = "69145d58-7df6-11e8-0660-cf7622583916"
-
+version = "1.2.3"
 
 [deps]
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
diff --git a/test/abstractarray.jl b/test/abstractarray.jl
index 2db1638c8c950a..df2dbe1c198b90 100644
--- a/test/abstractarray.jl
+++ b/test/abstractarray.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using Random, LinearAlgebra, SparseArrays
+using Random, LinearAlgebra
 
 A = rand(5,4,3)
 @testset "Bounds checking" begin
@@ -93,6 +93,11 @@ end
     @test checkbounds(Bool, A, trues(1, 5), trues(1, 4, 1), trues(1, 1, 2)) == false
     @test checkbounds(Bool, A, trues(1, 5), trues(1, 5, 1), trues(1, 1, 3)) == false
     @test checkbounds(Bool, A, trues(1, 5), :, 2) == false
+    @test checkbounds(Bool, A, trues(5, 4), trues(3)) == true
+    @test checkbounds(Bool, A, trues(4, 4), trues(3)) == true
+    @test checkbounds(Bool, A, trues(5, 4), trues(2)) == false
+    @test checkbounds(Bool, A, trues(6, 4), trues(3)) == false
+    @test checkbounds(Bool, A, trues(5, 4), trues(4)) == false
 end
 
 @testset "array of CartesianIndex" begin
@@ -126,6 +131,9 @@ end
             @test CartesianIndices(i)[1] == CartesianIndex()
             @test_throws BoundsError CartesianIndices(i)[2]
             @test_throws BoundsError CartesianIndices(i)[1:2]
+            io = IOBuffer()
+            show(io, CartesianIndices(i))
+            @test String(take!(io)) == "CartesianIndices(())"
         end
     end
 
@@ -155,7 +163,7 @@ end
         @test last(li)  == li[3] == 3
         io = IOBuffer()
         show(io, ci)
-        @test String(take!(io)) == "CartesianIndex{1}[CartesianIndex(2,), CartesianIndex(3,), CartesianIndex(4,)]"
+        @test String(take!(io)) == "CartesianIndices((2:4,))"
     end
 
     @testset "2-dimensional" begin
@@ -181,6 +189,9 @@ end
         @test linear[2:3] === 2:3
         @test linear[3:-1:1] === 3:-1:1
         @test_throws BoundsError linear[4:13]
+        io = IOBuffer()
+        show(io, cartesian)
+        @test String(take!(io)) == "CartesianIndices((4, 3))"
     end
 
     @testset "3-dimensional" begin
@@ -225,6 +236,58 @@ end
     end
 end
 
+@testset "LinearIndices" begin
+    @testset "constructors" begin
+        for oinds in [
+            (2, 3),
+            (UInt8(2), 3),
+            (2, UInt8(3)),
+            (2, 1:3),
+            (Base.OneTo(2), 1:3)
+        ]
+            R = LinearIndices(oinds)
+            @test size(R) == (2, 3)
+            @test axes(R) == (Base.OneTo(2), Base.OneTo(3))
+            @test R[begin] == 1
+            @test R[end] == 6
+        end
+
+        for oinds in [(2, ), (2, 3), (2, 3, 4)]
+            R = CartesianIndices(oinds)
+            @test size(R) == oinds
+        end
+    end
+
+    @testset "IdentityUnitRange" begin
+        function _collect(A)
+            rst = eltype(A)[]
+            for i in A
+                push!(rst, i)
+            end
+            rst
+        end
+        function _simd_collect(A)
+            rst = eltype(A)[]
+            @simd for i in A
+                push!(rst, i)
+            end
+            rst
+        end
+
+        for oinds in [
+            (Base.IdentityUnitRange(0:1),),
+            (Base.IdentityUnitRange(0:1), Base.IdentityUnitRange(0:2)),
+            (Base.IdentityUnitRange(0:1), Base.OneTo(3)),
+        ]
+            R = LinearIndices(oinds)
+            @test axes(R) === oinds
+            @test _collect(R) == _simd_collect(R) == vec(collect(R))
+        end
+        R = LinearIndices((Base.IdentityUnitRange(0:1), 0:1))
+        @test axes(R) == (Base.IdentityUnitRange(0:1), Base.OneTo(2))
+    end
+end
+
 # token type on which to dispatch testing methods in order to avoid potential
 # name conflicts elsewhere in the base test suite
 mutable struct TestAbstractArray end
@@ -242,10 +305,10 @@ function test_scalar_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
     B = T(A)
     @test A == B
     # Test indexing up to 5 dimensions
-    trailing5 = CartesianIndex(ntuple(x->1, max(ndims(B)-5, 0)))
-    trailing4 = CartesianIndex(ntuple(x->1, max(ndims(B)-4, 0)))
-    trailing3 = CartesianIndex(ntuple(x->1, max(ndims(B)-3, 0)))
-    trailing2 = CartesianIndex(ntuple(x->1, max(ndims(B)-2, 0)))
+    trailing5 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-5, 0)))
+    trailing4 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-4, 0)))
+    trailing3 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-3, 0)))
+    trailing2 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-2, 0)))
     i=0
     for i5 = 1:size(B, 5)
         for i4 = 1:size(B, 4)
@@ -362,10 +425,10 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
         N = prod(shape)
         A = reshape(Vector(1:N), shape)
         B = T(A)
-        trailing5 = CartesianIndex(ntuple(x->1, max(ndims(B)-5, 0)))
-        trailing4 = CartesianIndex(ntuple(x->1, max(ndims(B)-4, 0)))
-        trailing3 = CartesianIndex(ntuple(x->1, max(ndims(B)-3, 0)))
-        trailing2 = CartesianIndex(ntuple(x->1, max(ndims(B)-2, 0)))
+        trailing5 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-5, 0)))
+        trailing4 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-4, 0)))
+        trailing3 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-3, 0)))
+        trailing2 = CartesianIndex(ntuple(Returns(1), max(ndims(B)-2, 0)))
         idxs = rand(1:N, 3, 3, 3)
         @test B[idxs] == A[idxs] == idxs
         @test B[vec(idxs)] == A[vec(idxs)] == vec(idxs)
@@ -401,6 +464,13 @@ function test_vector_indexing(::Type{T}, shape, ::Type{TestAbstractArray}) where
             @test B[mask1, mask2, trailing2] == A[mask1, mask2, trailing2] ==
                 B[LinearIndices(mask1)[findall(mask1)], LinearIndices(mask2)[findall(mask2)], trailing2]
             @test B[mask1, 1, trailing2] == A[mask1, 1, trailing2] == LinearIndices(mask)[findall(mask1)]
+
+            if ndims(B) > 1
+                maskfront = bitrand(shape[1:end-1])
+                Bslice = B[ntuple(i->(:), ndims(B)-1)..., 1]
+                @test B[maskfront,1] == Bslice[maskfront]
+                @test size(B[maskfront, 1:1]) == (sum(maskfront), 1)
+            end
         end
     end
 end
@@ -450,13 +520,16 @@ function test_primitives(::Type{T}, shape, ::Type{TestAbstractArray}) where T
     @test convert(Matrix, Y) == Y
     @test convert(Matrix, view(Y, 1:2, 1:2)) == Y
     @test_throws MethodError convert(Matrix, X)
+
+    # convert(::Type{Union{}}, A::AbstractMatrix)
+    @test_throws MethodError convert(Union{}, X)
 end
 
 mutable struct TestThrowNoGetindex{T} <: AbstractVector{T} end
 @testset "ErrorException if getindex is not defined" begin
     Base.length(::TestThrowNoGetindex) = 2
     Base.size(::TestThrowNoGetindex) = (2,)
-    @test_throws ErrorException isassigned(TestThrowNoGetindex{Float64}(), 1)
+    @test_throws Base.CanonicalIndexError isassigned(TestThrowNoGetindex{Float64}(), 1)
 end
 
 function test_in_bounds(::Type{TestAbstractArray})
@@ -492,10 +565,10 @@ end
 function test_getindex_internals(::Type{TestAbstractArray})
     U = UnimplementedFastArray{Int, 2}()
     V = UnimplementedSlowArray{Int, 2}()
-    @test_throws ErrorException getindex(U, 1)
-    @test_throws ErrorException Base.unsafe_getindex(U, 1)
-    @test_throws ErrorException getindex(V, 1, 1)
-    @test_throws ErrorException Base.unsafe_getindex(V, 1, 1)
+    @test_throws Base.CanonicalIndexError getindex(U, 1)
+    @test_throws Base.CanonicalIndexError Base.unsafe_getindex(U, 1)
+    @test_throws Base.CanonicalIndexError getindex(V, 1, 1)
+    @test_throws Base.CanonicalIndexError Base.unsafe_getindex(V, 1, 1)
 end
 
 function test_setindex!_internals(::Type{T}, shape, ::Type{TestAbstractArray}) where T
@@ -510,10 +583,10 @@ end
 function test_setindex!_internals(::Type{TestAbstractArray})
     U = UnimplementedFastArray{Int, 2}()
     V = UnimplementedSlowArray{Int, 2}()
-    @test_throws ErrorException setindex!(U, 0, 1)
-    @test_throws ErrorException Base.unsafe_setindex!(U, 0, 1)
-    @test_throws ErrorException setindex!(V, 0, 1, 1)
-    @test_throws ErrorException Base.unsafe_setindex!(V, 0, 1, 1)
+    @test_throws Base.CanonicalIndexError setindex!(U, 0, 1)
+    @test_throws Base.CanonicalIndexError Base.unsafe_setindex!(U, 0, 1)
+    @test_throws Base.CanonicalIndexError setindex!(V, 0, 1, 1)
+    @test_throws Base.CanonicalIndexError Base.unsafe_setindex!(V, 0, 1, 1)
 end
 
 function test_get(::Type{TestAbstractArray})
@@ -534,6 +607,31 @@ function test_get(::Type{TestAbstractArray})
     @test get(TSlow([]), (), 0) == 0
     @test get(TSlow([1]), (), 0) == 1
     @test get(TSlow(fill(1)), (), 0) == 1
+
+    global c = 0
+    f() = (global c = c+1; 0)
+    @test get(f, A, ()) == 0
+    @test c == 1
+    @test get(f, B, ()) == 0
+    @test c == 2
+    @test get(f, A, (1,)) == get(f, A, 1) == A[1] == 1
+    @test c == 2
+    @test get(f, B, (1,)) == get(f, B, 1) == B[1] == 1
+    @test c == 2
+    @test get(f, A, (25,)) == get(f, A, 25) == 0
+    @test c == 4
+    @test get(f, B, (25,)) == get(f, B, 25) == 0
+    @test c == 6
+    @test get(f, A, (1,1,1)) == A[1,1,1] == 1
+    @test get(f, B, (1,1,1)) == B[1,1,1] == 1
+    @test get(f, A, (1,1,3)) == 0
+    @test c == 7
+    @test get(f, B, (1,1,3)) == 0
+    @test c == 8
+    @test get(f, TSlow([]), ()) == 0
+    @test c == 9
+    @test get(f, TSlow([1]), ()) == 1
+    @test get(f, TSlow(fill(1)), ()) == 1
 end
 
 function test_cat(::Type{TestAbstractArray})
@@ -541,16 +639,24 @@ function test_cat(::Type{TestAbstractArray})
     b_int = reshape([1:27...], 3, 3, 3)
     b_float = reshape(Float64[1:27...], 3, 3, 3)
     b2hcat = Array{Float64}(undef, 3, 6, 3)
+    b2vcat = Array{Float64}(undef, 6, 3, 3)
     b1 = reshape([1:9...], 3, 3)
     b2 = reshape([10:18...], 3, 3)
     b3 = reshape([19:27...], 3, 3)
     b2hcat[:, :, 1] = hcat(b1, b1)
     b2hcat[:, :, 2] = hcat(b2, b2)
     b2hcat[:, :, 3] = hcat(b3, b3)
+    b2vcat[:, :, 1] = vcat(b1, b1)
+    b2vcat[:, :, 2] = vcat(b2, b2)
+    b2vcat[:, :, 3] = vcat(b3, b3)
     b3hcat = Array{Float64}(undef, 3, 9, 3)
     b3hcat[:, :, 1] = hcat(b1, b1, b1)
     b3hcat[:, :, 2] = hcat(b2, b2, b2)
     b3hcat[:, :, 3] = hcat(b3, b3, b3)
+    b3vcat = Array{Float64}(undef, 9, 3, 3)
+    b3vcat[:, :, 1] = vcat(b1, b1, b1)
+    b3vcat[:, :, 2] = vcat(b2, b2, b2)
+    b3vcat[:, :, 3] = vcat(b3, b3, b3)
     B = TSlow(b_int)
     B1 = TSlow([1:24...])
     B2 = TSlow([1:25...])
@@ -561,14 +667,20 @@ function test_cat(::Type{TestAbstractArray})
     i = rand(1:10)
 
     @test cat(;dims=i) == Any[]
+    @test Base.typed_hcat(Float64) == Vector{Float64}()
+    @test Base.typed_vcat(Float64) == Vector{Float64}()
     @test vcat() == Any[]
     @test hcat() == Any[]
+    @test vcat(1, 1.0, 3, 3.0) == [1.0, 1.0, 3.0, 3.0]
     @test hcat(1, 1.0, 3, 3.0) == [1.0 1.0 3.0 3.0]
     @test_throws ArgumentError hcat(B1, B2)
     @test_throws ArgumentError vcat(C1, C2)
 
     @test vcat(B) == B
     @test hcat(B) == B
+    @test Base.typed_vcat(Float64, B) == TSlow(b_float)
+    @test Base.typed_vcat(Float64, B, B) == TSlow(b2vcat)
+    @test Base.typed_vcat(Float64, B, B, B) == TSlow(b3vcat)
     @test Base.typed_hcat(Float64, B) == TSlow(b_float)
     @test Base.typed_hcat(Float64, B, B) == TSlow(b2hcat)
     @test Base.typed_hcat(Float64, B, B, B) == TSlow(b3hcat)
@@ -614,6 +726,17 @@ function test_cat(::Type{TestAbstractArray})
     # 36041
     @test_throws MethodError cat(["a"], ["b"], dims=[1, 2])
     @test cat([1], [1], dims=[1, 2]) == I(2)
+
+    # inferrability
+    As = [zeros(2, 2) for _ = 1:2]
+    @test @inferred(cat(As...; dims=Val(3))) == zeros(2, 2, 2)
+    cat3v(As) = cat(As...; dims=Val(3))
+    @test @inferred(cat3v(As)) == zeros(2, 2, 2)
+    @test @inferred(cat(As...; dims=Val((1,2)))) == zeros(4, 4)
+
+    r = rand(Float32, 56, 56, 64, 1);
+    f(r) = cat(r, r, dims=(3,))
+    @inferred f(r);
 end
 
 function test_ind2sub(::Type{TestAbstractArray})
@@ -714,24 +837,6 @@ A = TSlowNIndexes(rand(2,2))
     @test @inferred(axes(rand(3,2), 3)) == 1:1
 end
 
-@testset "#17088" begin
-    n = 10
-    M = rand(n, n)
-    @testset "vector of vectors" begin
-        v = [[M]; [M]] # using vcat
-        @test size(v) == (2,)
-        @test !issparse(v)
-    end
-    @testset "matrix of vectors" begin
-        m1 = [[M] [M]] # using hcat
-        m2 = [[M] [M];] # using hvcat
-        @test m1 == m2
-        @test size(m1) == (1,2)
-        @test !issparse(m1)
-        @test !issparse(m2)
-    end
-end
-
 @testset "isinteger and isreal" begin
     @test all(isinteger, Diagonal(rand(1:5,5)))
     @test isreal(Diagonal(rand(5)))
@@ -751,6 +856,11 @@ end
 @testset "ndims and friends" begin
     @test ndims(Diagonal(rand(1:5,5))) == 2
     @test ndims(Diagonal{Float64}) == 2
+    @test ndims(Diagonal) == 2
+    @test ndims(Vector) == 1
+    @test ndims(Matrix) == 2
+    @test ndims(Array{<:Any, 0}) == 0
+    @test_throws MethodError ndims(Array)
 end
 
 @testset "Issue #17811" begin
@@ -773,6 +883,18 @@ end
 @testset "to_shape" begin
     @test Base.to_shape(()) === ()
     @test Base.to_shape(1) === 1
+    @test Base.to_shape(big(1)) === Base.to_shape(1)
+    @test Base.to_shape(Int8(1)) === Base.to_shape(1)
+end
+
+@testset "issue #39923: similar" begin
+    for ax in [(big(2), big(3)), (big(2), 3), (UInt64(2), 3), (2, UInt32(3)),
+        (big(2), Base.OneTo(3)), (Base.OneTo(2), Base.OneTo(big(3)))]
+
+        A = similar(ones(), Int, ax)
+        @test axes(A) === (Base.OneTo(2), Base.OneTo(3))
+        @test eltype(A) === Int
+    end
 end
 
 @testset "issue #19267" begin
@@ -786,6 +908,13 @@ end
     @test ndims((1:3)[:,:,1:1,:,:,[1]]) == 6
 end
 
+@testset "issue #38192" begin
+    img = cat([1 2; 3 4], [1 5; 6 7]; dims=3)
+    mask = img[:,:,1] .== img[:,:,2]
+    img[mask,2] .= 0
+    @test img == cat([1 2; 3 4], [0 5; 6 7]; dims=3)
+end
+
 @testset "dispatch loop introduced in #19305" begin
     Z22, O33 = fill(0, 2, 2), fill(1, 3, 3)
     @test [(1:2) Z22; O33] == [[1,2] Z22; O33] == [[1 2]' Z22; O33]
@@ -880,7 +1009,6 @@ end
         s = Vector([1, 2])
         for a = ([1], UInt[1], [3, 4, 5], UInt[3, 4, 5])
             @test s === copy!(s, Vector(a)) == Vector(a)
-            @test s === copy!(s, SparseVector(a)) == Vector(a)
         end
         # issue #35649
         s = [1, 2, 3, 4]
@@ -1141,3 +1269,344 @@ end
     @test last(itr, 1) == [itr[end]]
     @test_throws ArgumentError last(itr, -6)
 end
+
+@testset "Base.rest" begin
+    a = reshape(1:4, 2, 2)'
+    @test Base.rest(a) == a[:]
+    _, st = iterate(a)
+    @test Base.rest(a, st) == [3, 2, 4]
+end
+
+@testset "issue #37741, non-int cat" begin
+    @test [1; 1:BigInt(5)] == [1; 1:5]
+    @test [1:BigInt(5); 1] == [1:5; 1]
+end
+
+@testset "Base.isstored" begin
+    a = rand(3, 4, 5)
+    @test Base.isstored(a, 1, 2, 3)
+    @test_throws BoundsError Base.isstored(a, 4, 4, 5)
+    @test_throws BoundsError Base.isstored(a, 3, 5, 5)
+    @test_throws BoundsError Base.isstored(a, 3, 4, 6)
+end
+
+mutable struct TestPushArray{T, N} <: AbstractArray{T, N}
+    data::Array{T}
+end
+Base.push!(tpa::TestPushArray{T}, a::T) where T = push!(tpa.data, a)
+Base.pushfirst!(tpa::TestPushArray{T}, a::T) where T = pushfirst!(tpa.data, a)
+
+@testset "push! and pushfirst!" begin
+    a_orig = [1]
+    tpa = TestPushArray{Int, 2}(a_orig)
+    push!(tpa, 2, 3, 4, 5, 6)
+    @test tpa.data == collect(1:6)
+    a_orig = [1]
+    tpa = TestPushArray{Int, 2}(a_orig)
+    pushfirst!(tpa, 6, 5, 4, 3, 2)
+    @test tpa.data == reverse(collect(1:6))
+end
+
+@testset "splatting into hvcat" begin
+    t = (1, 2)
+    @test [t...; 3 4] == [1 2; 3 4]
+    @test [0 t...; t... 0] == [0 1 2; 1 2 0]
+    @test_throws ArgumentError [t...; 3 4 5]
+
+    @test Int[t...; 3 4] == [1 2; 3 4]
+    @test Int[0 t...; t... 0] == [0 1 2; 1 2 0]
+    @test_throws ArgumentError Int[t...; 3 4 5]
+end
+
+@testset "issue #39896, modified getindex " begin
+    for arr = ([1:10;], reshape([1.0:16.0;],4,4), reshape(['a':'h';],2,2,2))
+        for inds = (2:5, Base.OneTo(5), BigInt(3):BigInt(5), UInt(4):UInt(3),
+            Base.IdentityUnitRange(Base.OneTo(4)))
+            @test arr[inds] == arr[collect(inds)]
+            @test arr[inds] isa AbstractVector{eltype(arr)}
+        end
+    end
+    # Test that ranges and arrays behave identically for indices with 1-based axes
+    for r in (1:10, 1:1:10, Base.OneTo(10),
+        Base.IdentityUnitRange(Base.OneTo(10)), Base.IdentityUnitRange(1:10))
+        for inds = (2:5, Base.OneTo(5), BigInt(3):BigInt(5), UInt(4):UInt(3),
+            Base.IdentityUnitRange(Base.OneTo(4)))
+            @test r[inds] == r[collect(inds)] == collect(r)[inds] == collect(r)[collect(inds)]
+        end
+    end
+    for arr = ([1], reshape([1.0],1,1), reshape(['a'],1,1,1))
+        @test arr[true:true] == [arr[1]]
+        @test arr[true:true] isa AbstractVector{eltype(arr)}
+        @test arr[false:false] == []
+        @test arr[false:false] isa AbstractVector{eltype(arr)}
+    end
+    for arr = ([1:10;], reshape([1.0:16.0;],4,4), reshape(['a':'h';],2,2,2))
+        @test_throws BoundsError arr[true:true]
+        @test_throws BoundsError arr[false:false]
+    end
+end
+
+using Base: typed_hvncat
+@testset "hvncat" begin
+    a = fill(1, (2,3,2,4,5))
+    b = fill(2, (1,1,2,4,5))
+    c = fill(3, (1,2,2,4,5))
+    d = fill(4, (1,1,1,4,5))
+    e = fill(5, (1,1,1,4,5))
+    f = fill(6, (1,1,1,4,5))
+    g = fill(7, (2,3,1,4,5))
+    h = fill(8, (3,3,3,1,2))
+    i = fill(9, (3,2,3,3,2))
+    j = fill(10, (3,1,3,3,2))
+
+    result = [a; b c ;;; d e f ; g ;;;;; h ;;;; i j]
+    @test size(result) == (3,3,3,4,7)
+    @test result == [a; [b ;; c] ;;; [d e f] ; g ;;;;; h ;;;; i ;; j]
+    @test result == cat(cat([a ; b c], [d e f ; g], dims = 3), cat(h, [i j], dims = 4), dims = 5)
+
+    # terminating semicolons extend dimensions
+    @test [1;] == [1]
+    @test [1;;] == fill(1, (1,1))
+
+    for v in (1, fill(1), fill(1,1,1), fill(1, 1, 1, 1))
+        @test_throws ArgumentError [v; v;; v]
+        @test_throws ArgumentError [v; v;; v; v; v]
+        @test_throws ArgumentError [v; v; v;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v; v;; v; v]
+        @test_throws ArgumentError [v; v;; v; v;;; v; v;; v; v; v]
+        # ensure a wrong shape with the right number of elements doesn't pass through
+        @test_throws ArgumentError [v; v;; v; v;;; v; v; v; v]
+
+        @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
+        @test [v; v;; v; v;;;] == fill(1, 2, 2, 1)
+        @test [v; v;; v; v] == fill(1, ndims(v) == 3 ? (2, 2, 1) : (2,2))
+        @test [v v; v v;;;] == fill(1, 2, 2, 1)
+        @test [v; v;; v; v;;; v; v;; v; v;;] == fill(1, 2, 2, 2)
+        @test [v; v; v;; v; v; v;;; v; v; v;; v; v; v;;] == fill(1, 3, 2, 2)
+        @test [v v; v v;;; v v; v v] == fill(1, 2, 2, 2)
+        @test [v v v; v v v;;; v v v; v v v] == fill(1, 2, 3, 2)
+    end
+
+    # mixed scalars and arrays work, for numbers and strings
+    for v = (1, "test")
+        @test [v v;;; fill(v, 1, 2)] == fill(v, 1, 2, 2)
+    end
+
+    # output dimensions are maximum of input dimensions and concatenation dimension
+    begin
+        v1 = fill(1, 1, 1)
+        v2 = fill(1, 1, 1, 1, 1)
+        v3 = fill(1, 1, 2, 1, 1)
+        @test [v1 ;;; v2] == [1 ;;; 1 ;;;;]
+        @test [v2 ;;; v1] == [1 ;;; 1 ;;;;]
+        @test [v3 ;;; v1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 v1 ;;; v3] == [1 1 ;;; 1 1 ;;;;]
+        @test [v2 v1 ;;; v1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 v1 ;;; v1 v2] == [1 1 ;;; 1 1 ;;;;]
+        @test [v2 ;;; 1] == [1 ;;; 1 ;;;;]
+        @test [1 ;;; v2] == [1 ;;; 1 ;;;;]
+        @test [v3 ;;; 1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 1 ;;; v3] == [1 1 ;;; 1 1 ;;;;]
+        @test [v2 1 ;;; v1 v1] == [1 1 ;;; 1 1 ;;;;]
+        @test [v1 1 ;;; v1 v2] == [1 1 ;;; 1 1 ;;;;]
+    end
+
+    # dims form
+    for v ∈ ((), (1,), ([1],), (1, [1]), ([1], 1), ([1], [1]))
+        # reject dimension < 0
+        @test_throws ArgumentError hvncat(-1, v...)
+
+        # reject shape tuple with no elements
+        @test_throws ArgumentError hvncat(((),), true, v...)
+    end
+
+    # reject dims or shape with negative or zero values
+    for v1 ∈ (-1, 0, 1)
+        for v2 ∈ (-1, 0, 1)
+            v1 == v2 == 1 && continue
+            for v3 ∈ ((), (1,), ([1],), (1, [1]), ([1], 1), ([1], [1]))
+                @test_throws ArgumentError hvncat((v1, v2), true, v3...)
+                @test_throws str->(occursin("`shape` argument must consist of positive integers", str) ||
+                                   occursin("reducing over an empty collection is not allowed", str)) hvncat(((v1,), (v2,)), true, v3...)
+            end
+        end
+    end
+
+    for v ∈ ((1, [1]), ([1], 1), ([1], [1]))
+        # reject shape with more than one end value
+        @test_throws ArgumentError hvncat(((1, 1),), true, v...)
+    end
+
+    for v ∈ ((1, 2, 3), (1, 2, [3]), ([1], [2], [3]))
+        # reject shape with more values in later level
+        @test_throws ArgumentError hvncat(((2, 1), (1, 1, 1)), true, v...)
+    end
+
+    # reject shapes that don't nest evenly between levels (e.g. 1 + 2 does not fit into 2)
+    @test_throws ArgumentError hvncat(((1, 2, 1), (2, 2), (4,)), true, [1 2], [3], [4], [1 2; 3 4])
+
+    # zero-length arrays are handled appropriately
+    @test [zeros(Int, 1, 2, 0) ;;; 1 3] == [1 3;;;]
+    @test [[] ;;; [] ;;; []] == Array{Any}(undef, 0, 1, 3)
+    @test [[] ; 1 ;;; 2 ; []] == [1 ;;; 2]
+    @test [[] ; [] ;;; [] ; []] == Array{Any}(undef, 0, 1, 2)
+    @test [[] ; 1 ;;; 2] == [1 ;;; 2]
+    @test [[] ; [] ;;; [] ;;; []] == Array{Any}(undef, 0, 1, 3)
+    z = zeros(Int, 0, 0, 0)
+    [z z ; z ;;; z ;;; z] == Array{Int}(undef, 0, 0, 0)
+
+    for v1 ∈ (zeros(Int, 0, 0), zeros(Int, 0, 0, 0, 0), zeros(Int, 0, 0, 0, 0, 0, 0, 0))
+        for v2 ∈ (1, [1])
+            for v3 ∈ (2, [2])
+                @test_throws ArgumentError [v1 ;;; v2]
+                @test_throws ArgumentError [v1 ;;; v2 v3]
+                @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+            end
+        end
+    end
+    v1 = zeros(Int, 0, 0, 0)
+    for v2 ∈ (1, [1])
+        for v3 ∈ (2, [2])
+            # current behavior, not potentially dangerous.
+            # should throw error like above loop
+            @test [v1 ;;; v2 v3] == [v2 v3;;;]
+            @test_throws ArgumentError [v1 ;;; v2]
+            @test_throws ArgumentError [v1 v1 ;;; v2 v3]
+        end
+    end
+
+    # 0-dimension behaviors
+    # exactly one argument, placed in an array
+    # if already an array, copy, with type conversion as necessary
+    @test_throws ArgumentError hvncat(0)
+    @test hvncat(0, 1) == fill(1)
+    @test hvncat(0, [1]) == [1]
+    @test_throws ArgumentError hvncat(0, 1, 1)
+    @test_throws ArgumentError typed_hvncat(Float64, 0)
+    @test typed_hvncat(Float64, 0, 1) == fill(1.0)
+    @test typed_hvncat(Float64, 0, [1]) == Float64[1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, 0, 1, 1)
+    @test_throws ArgumentError hvncat((), true) == []
+    @test hvncat((), true, 1) == fill(1)
+    @test hvncat((), true, [1]) == [1]
+    @test_throws ArgumentError hvncat((), true, 1, 1)
+    @test_throws ArgumentError typed_hvncat(Float64, (), true) == Float64[]
+    @test typed_hvncat(Float64, (), true, 1) == fill(1.0)
+    @test typed_hvncat(Float64, (), true, [1]) == [1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, (), true, 1, 1)
+
+    # 1-dimension behaviors
+    # int form
+    @test hvncat(1) == []
+    @test hvncat(1, 1) == [1]
+    @test hvncat(1, [1]) == [1]
+    @test hvncat(1, [1 2; 3 4]) == [1 2; 3 4]
+    @test hvncat(1, 1, 1) == [1 ; 1]
+    @test typed_hvncat(Float64, 1) == Float64[]
+    @test typed_hvncat(Float64, 1, 1) == Float64[1.0]
+    @test typed_hvncat(Float64, 1, [1]) == Float64[1.0]
+    @test typed_hvncat(Float64, 1, 1, 1) == Float64[1.0 ; 1.0]
+    # dims form
+    @test_throws ArgumentError hvncat((1,), true)
+    @test hvncat((2,), true, 1, 1) == [1; 1]
+    @test hvncat((2,), true, [1], [1]) == [1; 1]
+    @test_throws ArgumentError hvncat((2,), true, 1)
+    @test typed_hvncat(Float64, (2,), true, 1, 1) == Float64[1.0; 1.0]
+    @test typed_hvncat(Float64, (2,), true, [1], [1]) == Float64[1.0; 1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, (2,), true, 1)
+    # row_first has no effect with just one dimension of the dims form
+    @test hvncat((2,), false, 1, 1) == [1; 1]
+    @test typed_hvncat(Float64, (2,), false, 1, 1) == Float64[1.0; 1.0]
+    # shape form
+    @test hvncat(((2,),), true, 1, 1) == [1 1]
+    @test hvncat(((2,),), true, [1], [1]) == [1 1]
+    @test_throws ArgumentError hvncat(((2,),), true, 1)
+    @test hvncat(((2,),), false, 1, 1) == [1; 1]
+    @test hvncat(((2,),), false, [1], [1]) == [1; 1]
+    @test typed_hvncat(Float64, ((2,),), true, 1, 1) == Float64[1.0 1.0]
+    @test typed_hvncat(Float64, ((2,),), true, [1], [1]) == Float64[1.0 1.0]
+    @test_throws ArgumentError typed_hvncat(Float64, ((2,),), true, 1)
+    @test typed_hvncat(Float64, ((2,),), false, 1, 1) == Float64[1.0; 1.0]
+    @test typed_hvncat(Float64, ((2,),), false, [1], [1]) == Float64[1.0; 1.0]
+
+    # zero-value behaviors for int form above dimension zero
+    # e.g. [;;], [;;;], though that isn't valid syntax
+    @test [] == hvncat(1) isa Array{Any, 1}
+    @test Array{Any, 2}(undef, 0, 0) == hvncat(2) isa Array{Any, 2}
+    @test Array{Any, 3}(undef, 0, 0, 0) == hvncat(3) isa Array{Any, 3}
+    @test Int[] == typed_hvncat(Int, 1) isa Array{Int, 1}
+    @test Array{Int, 2}(undef, 0, 0) == typed_hvncat(Int, 2) isa Array{Int, 2}
+    @test Array{Int, 3}(undef, 0, 0, 0) == typed_hvncat(Int, 3) isa Array{Int, 3}
+
+    # Issue 43933 - semicolon precedence mistake should produce an error
+    @test_throws ArgumentError [[1 1]; 2 ;; 3 ; [3 4]]
+    @test_throws ArgumentError [[1 ;;; 1]; 2 ;;; 3 ; [3 ;;; 4]]
+end
+
+@testset "keepat!" begin
+    a = [1:6;]
+    @test a === keepat!(a, 1:5)
+    @test a == 1:5
+    @test keepat!(a, [2, 4]) == [2, 4]
+    @test isempty(keepat!(a, []))
+
+    a = [1:6;]
+    @test_throws BoundsError keepat!(a, 1:10) # make sure this is not a no-op
+    @test_throws BoundsError keepat!(a, 2:10)
+    @test_throws ArgumentError keepat!(a, [2, 4, 3])
+
+    b = BitVector([1, 1, 1, 0, 0])
+    @test b === keepat!(b, 1:5)
+    @test b == [1, 1, 1, 0, 0]
+    @test keepat!(b, 2:4) == [1, 1, 0]
+    @test_throws BoundsError keepat!(a, -1:10)
+    @test_throws ArgumentError keepat!(a, [2, 1])
+    @test isempty(keepat!(a, []))
+end
+
+@testset "reshape methods for AbstractVectors" begin
+    r = Base.IdentityUnitRange(3:4)
+    @test reshape(r, :) === reshape(r, (:,)) === r
+end
+
+@testset "strides for ReshapedArray" begin
+    # Type-based contiguous check is tested in test/compiler/inline.jl
+    # General contiguous check
+    a = view(rand(10,10), 1:10, 1:10)
+    @test strides(vec(a)) == (1,)
+    b = view(parent(a), 1:9, 1:10)
+    @test_throws "Parent must be contiguous." strides(vec(b))
+    # StridedVector parent
+    for n in 1:3
+        a = view(collect(1:60n), 1:n:60n)
+        @test strides(reshape(a, 3, 4, 5)) == (n, 3n, 12n)
+        @test strides(reshape(a, 5, 6, 2)) == (n, 5n, 30n)
+        b = view(parent(a), 60n:-n:1)
+        @test strides(reshape(b, 3, 4, 5)) == (-n, -3n, -12n)
+        @test strides(reshape(b, 5, 6, 2)) == (-n, -5n, -30n)
+    end
+end
+
+@testset "stride for 0 dims array #44087" begin
+    struct Fill44087 <: AbstractArray{Int,0}
+        a::Int
+    end
+    # `stride` shouldn't work if `strides` is not defined.
+    @test_throws MethodError stride(Fill44087(1), 1)
+    # It is intentionally to only check the return type. (The value is somehow arbitrary)
+    @test stride(fill(1), 1) isa Int
+    @test stride(reinterpret(Float64, fill(Int64(1))), 1) isa Int
+    @test stride(reinterpret(reshape, Float64, fill(Int64(1))), 1) isa Int
+    @test stride(Base.ReshapedArray(fill(1), (), ()), 1) isa Int
+end
+
+@testset "to_indices inference (issue #42001 #44059)" begin
+    @test (@inferred to_indices([], ntuple(Returns(CartesianIndex(1)), 32))) == ntuple(Returns(1), 32)
+    @test (@inferred to_indices([], ntuple(Returns(CartesianIndices(1:1)), 32))) == ntuple(Returns(Base.OneTo(1)), 32)
+    @test (@inferred to_indices([], (CartesianIndex(),1,CartesianIndex(1,1,1)))) == ntuple(Returns(1), 4)
+    A = randn(2,2,2,2,2,2);
+    i = CartesianIndex((1,1))
+    @test (@inferred A[i,i,i]) === A[1]
+end
diff --git a/test/ambiguous.jl b/test/ambiguous.jl
index 9c9927ece4b452..8d8c3efab53b9b 100644
--- a/test/ambiguous.jl
+++ b/test/ambiguous.jl
@@ -11,8 +11,6 @@ ambig(x::Int, y::Int) = 4
 ambig(x::Number, y) = 5
 # END OF LINE NUMBER SENSITIVITY
 
-using LinearAlgebra, SparseArrays
-
 # For curmod_*
 include("testenv.jl")
 
@@ -68,7 +66,7 @@ end
 ## Other ways of accessing functions
 # Test that non-ambiguous cases work
 let io = IOBuffer()
-    @test precompile(ambig, (Int, Int)) == true
+    @test precompile(ambig, (Int, Int))
     cf = @eval @cfunction(ambig, Int, (Int, Int))
     @test ccall(cf, Int, (Int, Int), 1, 2) == 4
     @test length(code_lowered(ambig, (Int, Int))) == 1
@@ -77,7 +75,7 @@ end
 
 # Test that ambiguous cases fail appropriately
 let io = IOBuffer()
-    @test precompile(ambig, (UInt8, Int)) == false
+    @test !precompile(ambig, (UInt8, Int))
     cf = @eval @cfunction(ambig, Int, (UInt8, Int))  # test for a crash (doesn't throw an error)
     @test_throws(MethodError(ambig, (UInt8(1), Int(2)), get_world_counter()),
                  ccall(cf, Int, (UInt8, Int), 1, 2))
@@ -102,6 +100,24 @@ ambig(x::Union{Char, Int16}) = 's'
 @test ambig(Int16(1)) == 's'
 
 # Automatic detection of ambiguities
+
+const allowed_undefineds = Set([
+    GlobalRef(Base, :active_repl),
+    GlobalRef(Base, :active_repl_backend),
+    GlobalRef(Base.Filesystem, :JL_O_TEMPORARY),
+    GlobalRef(Base.Filesystem, :JL_O_SHORT_LIVED),
+    GlobalRef(Base.Filesystem, :JL_O_SEQUENTIAL),
+    GlobalRef(Base.Filesystem, :JL_O_RANDOM),
+])
+
+let Distributed = get(Base.loaded_modules,
+                      Base.PkgId(Base.UUID("8ba89e20-285c-5b6f-9357-94700520ee1b"), "Distributed"),
+                      nothing)
+    if Distributed !== nothing
+        push!(allowed_undefineds, GlobalRef(Distributed, :cluster_manager))
+    end
+end
+
 module Ambig1
 ambig(x, y) = 1
 ambig(x::Integer, y) = 2
@@ -149,21 +165,36 @@ end
 ambs = detect_ambiguities(Ambig5)
 @test length(ambs) == 2
 
+
+using LinearAlgebra, SparseArrays, SuiteSparse
+
 # Test that Core and Base are free of ambiguities
 # not using isempty so this prints more information when it fails
-@test detect_ambiguities(Core, Base; imported=true, recursive=true, ambiguous_bottom=false) == []
-# some ambiguities involving Union{} type parameters are expected, but not required
-@test !isempty(detect_ambiguities(Core, Base; imported=true, ambiguous_bottom=true))
+@testset "detect_ambiguities" begin
+    let ambig = Set{Any}(((m1.sig, m2.sig) for (m1, m2) in detect_ambiguities(Core, Base; recursive=true, ambiguous_bottom=false, allowed_undefineds)))
+        good = true
+        for (sig1, sig2) in ambig
+            @test sig1 === sig2 # print this ambiguity
+            good = false
+        end
+        @test good
+    end
 
-module AmbigStdlib
-using Test
+    # some ambiguities involving Union{} type parameters are expected, but not required
+    let ambig = Set(detect_ambiguities(Core; recursive=true, ambiguous_bottom=true))
+        @test !isempty(ambig)
+    end
 
-# List standard libraries.  Exclude modules such as Main.
-modules = [mod for (pkg, mod) in Base.loaded_modules if pkg.uuid !== nothing]
+    STDLIB_DIR = Sys.STDLIB
+    STDLIBS = filter!(x -> x != "LinearAlgebra" && x != "SparseArrays" && # Some packages run this test themselves
+                           isfile(joinpath(STDLIB_DIR, x, "src", "$(x).jl")),
+                      readdir(STDLIB_DIR))
 
-# not using isempty so this prints more information when it fails
-@test detect_ambiguities(modules...; imported=true, recursive=true) == []
-end  # module
+    # List standard libraries. Exclude modules such as Main, Base, and Core.
+    let modules = [mod for (pkg, mod) in Base.loaded_modules if pkg.uuid !== nothing && String(pkg.name) in STDLIBS]
+        @test isempty(detect_ambiguities(modules...; recursive=true, allowed_undefineds))
+    end
+end
 
 amb_1(::Int8, ::Int) = 1
 amb_1(::Integer, x) = 2
@@ -252,7 +283,7 @@ end
 for f in (Ambig8.f, Ambig8.g)
     @test length(methods(f, (Integer,))) == 2 # 1 is also acceptable
     @test length(methods(f, (Signed,))) == 1 # 2 is also acceptable
-    @test length(Base.methods_including_ambiguous(f, (Signed,))) == 3
+    @test length(Base.methods_including_ambiguous(f, (Signed,))) == 2
     @test f(0x00) == 1
     @test f(Ambig8.Irrational2()) == 2
     @test f(MathConstants.γ) == 3
@@ -291,15 +322,16 @@ end
         @test need_to_handle_undef_sparam == Set()
     end
     let need_to_handle_undef_sparam =
-            Set{Method}(detect_unbound_args(Base; recursive=true))
+            Set{Method}(detect_unbound_args(Base; recursive=true, allowed_undefineds))
         pop!(need_to_handle_undef_sparam, which(Base._totuple, (Type{Tuple{Vararg{E}}} where E, Any, Any)))
         pop!(need_to_handle_undef_sparam, which(Base.eltype, Tuple{Type{Tuple{Any}}}))
         pop!(need_to_handle_undef_sparam, first(methods(Base.same_names)))
         @test_broken need_to_handle_undef_sparam == Set()
         pop!(need_to_handle_undef_sparam, which(Base._cat, Tuple{Any, AbstractArray}))
         pop!(need_to_handle_undef_sparam, which(Base.byteenv, (Union{AbstractArray{Pair{T,V}, 1}, Tuple{Vararg{Pair{T,V}}}} where {T<:AbstractString,V},)))
-        pop!(need_to_handle_undef_sparam, which(Base._cat, (Any, SparseArrays._TypedDenseConcatGroup{T} where T)))
         pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{AbstractArray{Union{Missing, T},N} where {T, N}}))
+        pop!(need_to_handle_undef_sparam, which(Base.float, Tuple{Type{Union{Missing, T}} where T}))
+        pop!(need_to_handle_undef_sparam, which(Base.complex, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.zero, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.one, Tuple{Type{Union{Missing, T}} where T}))
         pop!(need_to_handle_undef_sparam, which(Base.oneunit, Tuple{Type{Union{Missing, T}} where T}))
@@ -324,8 +356,51 @@ f35983(::Type, ::Type) = 2
 @test first(Base.methods_including_ambiguous(f35983, (Any, Any))).sig == Tuple{typeof(f35983), Type, Type}
 @test length(Base.methods(f35983, (Any, Any))) == 2
 @test first(Base.methods(f35983, (Any, Any))).sig == Tuple{typeof(f35983), Type, Type}
+let ambig = Ref{Int32}(0)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test length(ms) == 1
+    @test ambig[] == 0
+end
 f35983(::Type{Int16}, ::Any) = 3
-@test length(Base.methods_including_ambiguous(f35983, (Type, Type))) == 3
+@test length(Base.methods_including_ambiguous(f35983, (Type, Type))) == 2
 @test length(Base.methods(f35983, (Type, Type))) == 2
+let ambig = Ref{Int32}(0)
+    ms = Base._methods_by_ftype(Tuple{typeof(f35983), Type, Type}, nothing, -1, typemax(UInt), true, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test length(ms) == 2
+    @test ambig[] == 1
+end
+
+struct B38280 <: Real; val; end
+let ambig = Ref{Int32}(0)
+    ms = Base._methods_by_ftype(Tuple{Type{B38280}, Any}, nothing, 1, typemax(UInt), false, Ref{UInt}(typemin(UInt)), Ref{UInt}(typemax(UInt)), ambig)
+    @test ms isa Vector
+    @test length(ms) == 1
+    @test ambig[] == 1
+end
+
+# issue #11407
+f11407(::Dict{K,V}, ::Dict{Any,V}) where {K,V} = 1
+f11407(::Dict{K,V}, ::Dict{K,Any}) where {K,V} = 2
+@test_throws MethodError f11407(Dict{Any,Any}(), Dict{Any,Any}()) # ambiguous
+@test f11407(Dict{Any,Int}(), Dict{Any,Int}()) == 1
+f11407(::Dict{Any,Any}, ::Dict{Any,Any}) where {K,V} = 3
+@test f11407(Dict{Any,Any}(), Dict{Any,Any}()) == 3
+
+# issue #12814
+abstract type A12814{N, T} end
+struct B12814{N, T} <: A12814{N, T}
+    x::NTuple{N, T}
+end
+(::Type{T})(x::X) where {T <: A12814, X <: Array} = 1
+@test_throws MethodError B12814{3, Float64}([1, 2, 3]) # ambiguous
+@test B12814{3,Float64}((1, 2, 3)).x === (1.0, 2.0, 3.0)
+
+# issue #43040
+module M43040
+   struct C end
+   stripType(::Type{C}) where {T} = C # where {T} is intentionally incorrect
+end
+
+@test isempty(detect_ambiguities(M43040; recursive=true))
 
-nothing # don't return a module from the remote include
+nothing
diff --git a/test/arrayops.jl b/test/arrayops.jl
index f44a6999013ea0..b2badb66ce93da 100644
--- a/test/arrayops.jl
+++ b/test/arrayops.jl
@@ -6,8 +6,6 @@ using .Main.OffsetArrays
 
 isdefined(@__MODULE__, :T24Linear) || include("testhelpers/arrayindexingtypes.jl")
 
-using SparseArrays
-
 using Random, LinearAlgebra
 using Dates
 
@@ -477,6 +475,7 @@ end
     @test a == [2, 3, 4]
     @test popat!(a, 2) == 3
     @test a == [2, 4]
+    @test popat!(a, 1, "default") == 2
     badpop() = @inbounds popat!([1], 2)
     @test_throws BoundsError badpop()
 end
@@ -553,6 +552,7 @@ end
     @test findfirst(a.==0) == 1
     @test findfirst(a.==5) == nothing
     @test findfirst(Dict(1=>false, 2=>true)) == 2
+    @test findfirst(Dict(1=>false)) == nothing
     @test findfirst(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findfirst(!isequal(1), [1,2,4,1,2,3,4]) == 2
     @test findfirst(isodd, [2,4,6,3,9,2,0]) == 4
@@ -565,6 +565,7 @@ end
     @test findlast(!iszero, a) == 8
     @test findlast(a.==0) == 5
     @test findlast(a.==5) == nothing
+    @test findlast(false) == nothing # test non-AbstractArray findlast
     @test findlast(isequal(3), [1,2,4,1,2,3,4]) == 6
     @test findlast(isodd, [2,4,6,3,9,2,0]) == 5
     @test findlast(isodd, [2,4,6,2,0]) == nothing
@@ -591,6 +592,10 @@ end
         @test findprev(b, T(1)) isa keytype(b)
         @test findprev(b, T(2)) isa keytype(b)
     end
+
+    @testset "issue 43078" begin
+        @test_throws TypeError findall([1])
+    end
 end
 @testset "find with Matrix" begin
     A = [1 2 0; 3 4 0]
@@ -699,6 +704,10 @@ end
         perm = randperm(4)
         @test isequal(A,permutedims(permutedims(A,perm),invperm(perm)))
         @test isequal(A,permutedims(permutedims(A,invperm(perm)),perm))
+
+        @test sum(permutedims(A,perm)) ≈ sum(PermutedDimsArray(A,perm))
+        @test sum(permutedims(A,perm), dims=2) ≈ sum(PermutedDimsArray(A,perm), dims=2)
+        @test sum(permutedims(A,perm), dims=(2,4)) ≈ sum(PermutedDimsArray(A,perm), dims=(2,4))
     end
 
     m = [1 2; 3 4]
@@ -742,6 +751,12 @@ end
     @test res === dst == [5 6 4; 2 3 1]
     res = circshift!(dst, src, (3.0, 2.0))
     @test res === dst == [5 6 4; 2 3 1]
+
+    # https://github.com/JuliaLang/julia/issues/41402
+    src = Float64[]
+    @test circshift(src, 1) == src
+    src = zeros(Bool, (4,0))
+    @test circshift(src, 1) == src
 end
 
 @testset "circcopy" begin
@@ -778,6 +793,10 @@ let A, B, C, D
 
     # With hash collisions
     @test map(x -> x.x, unique(map(HashCollision, B), dims=1)) == C
+
+    # With NaNs:
+    E = [1 NaN 3; 1 NaN 3; 1 NaN 3];
+    @test isequal(unique(E, dims=1), [1  NaN  3])
 end
 
 @testset "large matrices transpose" begin
@@ -1100,6 +1119,11 @@ end
     @test isequal(intersect([1,2,3], Float64[]), Float64[])
     @test isequal(intersect(Int64[], [1,2,3]), Int64[])
     @test isequal(intersect(Int64[]), Int64[])
+    @test isequal(intersect([1, 3], 1:typemax(Int)), [1, 3])
+    @test isequal(intersect(1:typemax(Int), [1, 3]), [1, 3])
+    @test isequal(intersect([1, 2, 3], 2:0.1:5), [2., 3.])
+    @test isequal(intersect([1.0, 2.0, 3.0], 2:5), [2., 3.])
+
     @test isequal(setdiff([1,2,3,4], [2,5,4]), [1,3])
     @test isequal(setdiff([1,2,3,4], [7,8,9]), [1,2,3,4])
     @test isequal(setdiff([1,2,3,4], Int64[]), Int64[1,2,3,4])
@@ -1151,17 +1175,17 @@ end
     # issue #5177
 
     c = fill(1,2,3,4)
-    m1 = mapslices(x-> fill(1,2,3), c, dims=[1,2])
-    m2 = mapslices(x-> fill(1,2,4), c, dims=[1,3])
-    m3 = mapslices(x-> fill(1,3,4), c, dims=[2,3])
+    m1 = mapslices(_ -> fill(1,2,3), c, dims=[1,2])
+    m2 = mapslices(_ -> fill(1,2,4), c, dims=[1,3])
+    m3 = mapslices(_ -> fill(1,3,4), c, dims=[2,3])
     @test size(m1) == size(m2) == size(m3) == size(c)
 
-    n1 = mapslices(x-> fill(1,6), c, dims=[1,2])
-    n2 = mapslices(x-> fill(1,6), c, dims=[1,3])
-    n3 = mapslices(x-> fill(1,6), c, dims=[2,3])
-    n1a = mapslices(x-> fill(1,1,6), c, dims=[1,2])
-    n2a = mapslices(x-> fill(1,1,6), c, dims=[1,3])
-    n3a = mapslices(x-> fill(1,1,6), c, dims=[2,3])
+    n1 =  mapslices(_ -> fill(1,6)  , c, dims=[1,2])
+    n2 =  mapslices(_ -> fill(1,6)  , c, dims=[1,3])
+    n3 =  mapslices(_ -> fill(1,6)  , c, dims=[2,3])
+    n1a = mapslices(_ -> fill(1,1,6), c, dims=[1,2])
+    n2a = mapslices(_ -> fill(1,1,6), c, dims=[1,3])
+    n3a = mapslices(_ -> fill(1,1,6), c, dims=[2,3])
     @test size(n1a) == (1,6,4) && size(n2a) == (1,3,6)  && size(n3a) == (2,1,6)
     @test size(n1) == (6,1,4) && size(n2) == (6,3,1)  && size(n3) == (2,6,1)
 
@@ -1175,9 +1199,6 @@ end
     m = mapslices(x->tuple(x), [1 2; 3 4], dims=1)
     @test m[1,1] == ([1,3],)
     @test m[1,2] == ([2,4],)
-
-    # issue #21123
-    @test mapslices(nnz, sparse(1.0I, 3, 3), dims=1) == [1 1 1]
 end
 
 @testset "single multidimensional index" begin
@@ -1295,6 +1316,9 @@ end
     @test cmp([1, 2], [1, 1]) == 1
     @test cmp([1], [1, 1]) == -1
     @test cmp([1, 1], [1]) == 1
+    @test cmp([UInt8(1), UInt8(0)], [UInt8(0), UInt8(0)]) == 1
+    @test cmp([UInt8(1), UInt8(0)], [UInt8(1), UInt8(0)]) == 0
+    @test cmp([UInt8(0), UInt8(0)], [UInt8(1), UInt8(1)]) == -1
 end
 
 @testset "sort on arrays" begin
@@ -1428,8 +1452,8 @@ end
 
     # non-1-indexed array
     oa = OffsetArray(Vector(1:10), -5)
-    filter!(x -> x > 5, oa)
-    @test oa == OffsetArray(Vector(6:10), -5)
+    oa = oa[oa.>5] # deleteat! is not supported for OffsetArrays
+    @test oa == Vector(6:10)
 
     # empty non-1-indexed array
     eoa = OffsetArray([], -5)
@@ -1437,6 +1461,26 @@ end
     @test isempty(eoa)
 end
 
+@testset "logical keepat!" begin
+    # Vector
+    a = Vector(1:10)
+    keepat!(a, [falses(5); trues(5)])
+    @test a == 6:10
+    @test_throws BoundsError keepat!(a, trues(1))
+    @test_throws BoundsError keepat!(a, trues(11))
+
+    # BitVector
+    ba = rand(10) .> 0.5
+    @test isa(ba, BitArray)
+    keepat!(ba, ba)
+    @test all(ba)
+
+    # empty array
+    ea = []
+    keepat!(ea, Bool[])
+    @test isempty(ea)
+end
+
 @testset "deleteat!" begin
     for idx in Any[1, 2, 5, 9, 10, 1:0, 2:1, 1:1, 2:2, 1:2, 2:4, 9:8, 10:9, 9:9, 10:10,
                    8:9, 9:10, 6:9, 7:10]
@@ -1472,6 +1516,11 @@ end
     @test_throws BoundsError deleteat!([], [2])
     @test deleteat!([], []) == []
     @test deleteat!([], Bool[]) == []
+    let a = Vector{Any}(undef, 2)
+        a[1] = 1
+        @test isassigned(deleteat!(copy(a), [2]), 1)
+        @test !isassigned(deleteat!(copy(a), [1]), 1)
+    end
 end
 
 @testset "comprehensions" begin
@@ -1532,9 +1581,16 @@ end
     @test reverse!([1:10;],6,10) == [1,2,3,4,5,10,9,8,7,6]
     @test reverse!([1:10;], 11) == [1:10;]
     @test_throws BoundsError reverse!([1:10;], 1, 11)
+    @test_throws BoundsError reverse!([1:10;], 0, 10)
     @test reverse!(Any[]) == Any[]
 end
 
+@testset "reverseind" begin
+    @test reverseind([1, 2, 3], 2) == 2
+    @test reverseind([1, 2, 3], 0) == 4
+    @test reverseind([1, 2, 3], 3) == 1
+end
+
 @testset "reverse dim" begin
     @test isequal(reverse([2,3,1], dims=1), [1,3,2])
     @test_throws ArgumentError reverse([2,3,1], dims=2)
@@ -1613,6 +1669,14 @@ end
     @test append!([1,2], g) == [1,2] == push!([1,2], g...)
     @test prepend!([1,2], g) == [1,2] == pushfirst!([1,2], g...)
 
+    # multiple items
+    A = [1]
+    @test append!(A, [2, 3], [4], [5, 6]) === A
+    @test A == [1, 2, 3, 4, 5, 6]
+    A = [1]
+    @test prepend!(A, [2, 3], [4], [5, 6]) === A
+    @test A == [2, 3, 4, 5, 6, 1]
+
     # offset array
     @test append!([1,2], OffsetArray([9,8], (-3,))) == [1,2,9,8]
     @test prepend!([1,2], OffsetArray([9,8], (-3,))) == [9,8,1,2]
@@ -1645,7 +1709,7 @@ end
 Nmax = 3 # TODO: go up to CARTESIAN_DIMS+2 (currently this exposes problems)
 for N = 1:Nmax
     #indexing with (UnitRange, UnitRange, UnitRange)
-    args = ntuple(d->UnitRange{Int}, N)
+    args = ntuple(Returns(UnitRange{Int}), N)
     @test Base.return_types(getindex, Tuple{Array{Float32, N}, args...}) == [Array{Float32, N}]
     @test Base.return_types(getindex, Tuple{BitArray{N}, args...}) == Any[BitArray{N}]
     @test Base.return_types(setindex!, Tuple{Array{Float32, N}, Array{Int, 1}, args...}) == [Array{Float32, N}]
@@ -1772,7 +1836,7 @@ end
         @test mdsum(A) == 15
         @test mdsum2(A) == 15
         AA = reshape(aa, tuple(2, shp...))
-        B = view(AA, 1:1, ntuple(i->Colon(), i)...)
+        B = view(AA, 1:1, ntuple(Returns(:), i)...)
         @test isa(Base.IndexStyle(B), Base.IteratorsMD.IndexCartesian)
         @test mdsum(B) == 15
         @test mdsum2(B) == 15
@@ -1785,7 +1849,7 @@ end
         A = reshape(a, tuple(shp...))
         @test mdsum(A) == 55
         @test mdsum2(A) == 55
-        B = view(A, ntuple(i->Colon(), i)...)
+        B = view(A, ntuple(Returns(:), i)...)
         @test mdsum(B) == 55
         @test mdsum2(B) == 55
         insert!(shp, 2, 1)
@@ -1890,13 +1954,6 @@ end
     @test isless(CartesianIndex((2,1)), CartesianIndex((1,2)))
     @test !isless(CartesianIndex((1,2)), CartesianIndex((2,1)))
 
-    a = spzeros(2,3)
-    @test CartesianIndices(size(a)) == eachindex(a)
-    a[CartesianIndex{2}(2,3)] = 5
-    @test a[2,3] == 5
-    b = view(a, 1:2, 2:3)
-    b[CartesianIndex{2}(1,1)] = 7
-    @test a[1,2] == 7
     @test 2*CartesianIndex{3}(1,2,3) == CartesianIndex{3}(2,4,6)
     @test CartesianIndex{3}(1,2,3)*2 == CartesianIndex{3}(2,4,6)
     @test_throws ErrorException iterate(CartesianIndex{3}(1,2,3))
@@ -1949,16 +2006,6 @@ end
     y = iterate(itr, y[2])
     @test y === nothing
     @test r[val] == 3
-    r = sparse(2:3:8)
-    itr = eachindex(r)
-    y = iterate(itr)
-    @test y !== nothing
-    y = iterate(itr, y[2])
-    y = iterate(itr, y[2])
-    @test y !== nothing
-    val, state = y
-    @test r[val] == 8
-    @test iterate(itr, state) == nothing
 end
 
 R = CartesianIndices((1,3))
@@ -2271,10 +2318,12 @@ let A = zeros(Int, 2, 2), B = zeros(Float64, 2, 2)
     f40() = Float64[A A]
     f41() = [A B]
     f42() = Int[A B]
+    f43() = Int[A...]
+    f44() = Float64[A..., B...]
 
     for f in [f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, f16,
               f17, f18, f19, f20, f21, f22, f23, f24, f25, f26, f27, f28, f29, f30,
-              f31, f32, f33, f34, f35, f36, f37, f38, f39, f40, f41, f42]
+              f31, f32, f33, f34, f35, f36, f37, f38, f39, f40, f41, f42, f43, f44]
         @test isconcretetype(Base.return_types(f, ())[1])
     end
 end
@@ -2524,6 +2573,14 @@ end
     arr = randn(4)
     @test accumulate(*, arr; init=1) ≈ accumulate(*, arr)
 
+    # bad kwarg
+    arr_B = similar(arr)
+    @test_throws ArgumentError accumulate(*, arr; bad_init=1)
+    @test_throws ArgumentError accumulate!(*, arr_B, arr; bad_init=1)
+    # must provide dims
+    md_arr = randn(4, 5)
+    @test_throws ArgumentError accumulate!(*, similar(md_arr), md_arr)
+
     N = 5
     for arr in [rand(Float64, N), rand(Bool, N), rand(-2:2, N)]
         for (op, cumop) in [(+, cumsum), (*, cumprod)]
@@ -2634,7 +2691,7 @@ let TT = Union{UInt8, Int8}
     resize!(b, 1)
     @assert pointer(a) == pa
     @assert pointer(b) == pb
-    unsafe_store!(pa, 0x1, 2) # reset a[2] to 1
+    unsafe_store!(Ptr{UInt8}(pa), 0x1, 2) # reset a[2] to 1
     @test length(a) == length(b) == 1
     @test a[1] == b[1] == 0x0
     @test a == b
@@ -2792,7 +2849,7 @@ end
     @test setindex!(zeros(2), ones(2), CI0, :, CI0) == ones(2)
     @test setindex!(zeros(2), ones(2), CI0, CI0, :) == ones(2)
     @test setindex!([fill(0.0)], fill(1.0), 1) == [fill(1.0)]
-    # 0-dimensional assigment into ≥1-dimensional arrays
+    # 0-dimensional assignment into ≥1-dimensional arrays
     @test setindex!(zeros(2), fill(1.0), 1, CI0) == [1.0, 0.0]
     @test setindex!(zeros(2), fill(1.0), CI0, 1) == [1.0, 0.0]
     @test setindex!(zeros(2,2), fill(1.0), 1, 1, CI0) == [1.0 0.0; 0.0 0.0]
@@ -2834,7 +2891,7 @@ end
     b = IOBuffer()
     showerror(b, err)
     @test String(take!(b)) ==
-        "BoundsError: attempt to access 2×2 Matrix{Float64} at index [10, Bool[1, 1]]"
+        "BoundsError: attempt to access 2×2 Matrix{Float64} at index [10, 2-element BitVector]"
 
     # Also test : directly for custom types for which it may appear as-is
     err = BoundsError(x, (10, :))
@@ -2852,3 +2909,43 @@ end
     @test String(take!(b)) ==
         "BoundsError: attempt to access 2×2 Matrix{Float64} at index [10, \"bad index\"]"
 end
+
+@testset "inference of Union{T,Nothing} arrays 26771" begin
+    f(a) = (v = [1, nothing]; [v[x] for x in a])
+    @test only(Base.return_types(f, (Int,))) === Union{Array{Int,0}, Array{Nothing,0}}
+    @test only(Base.return_types(f, (UnitRange{Int},))) <: Vector
+end
+
+@testset "hcat error checking" begin
+    a = [2 for i in 1:4]
+    b = [2 for i in 1:5]
+    @test_throws DimensionMismatch hcat(a, b)
+end
+
+@testset "similar(::ReshapedArray)" begin
+    a = reshape(TSlow(rand(Float64, 4, 4)), 2, :)
+
+    as = similar(a)
+    @test as isa TSlow{Float64,2}
+    @test size(as) == (2, 8)
+
+    as = similar(a, Int, (3, 5, 1))
+    @test as isa TSlow{Int,3}
+    @test size(as) == (3, 5, 1)
+end
+
+@testset "0-dimensional shape checking #39608" begin
+    @test [fill(1); [2; 2]] == [1; 2; 2]
+    @test [fill(1); fill(2, (2,1,1))] == reshape([1; 2; 2], (3, 1, 1))
+    @test_throws DimensionMismatch [fill(1); rand(2, 2, 2)]
+end
+
+@testset "eltype of zero for arrays (issue #41348)" begin
+    for a in Any[[DateTime(2020), DateTime(2021)], [Date(2000), Date(2001)], [Time(1), Time(2)]]
+        @test a + zero(a) == a
+        b = reshape(a, :, 1)
+        @test b + zero(b) == b
+        c = view(b, 1:1, 1:1)
+        @test c + zero(c) == c
+    end
+end
diff --git a/test/asyncmap.jl b/test/asyncmap.jl
index 04c215af7bb603..ec49230dbce144 100644
--- a/test/asyncmap.jl
+++ b/test/asyncmap.jl
@@ -54,6 +54,19 @@ len_only_iterable = (1,2,3,4,5)
 @test_throws ArgumentError asyncmap(identity, 1:10; batch_size="10")
 @test_throws ArgumentError asyncmap(identity, 1:10; ntasks="10")
 
+# Check that we throw a `CapturedException` holding the stacktrace if `f` throws
+f42105(i) = i == 5 ? error("captured") :  i
+let
+    e = try
+        asyncmap(f42105, 1:5)
+    catch e
+        e
+    end
+    @test e isa CapturedException
+    @test e.ex == ErrorException("captured")
+    @test e.processed_bt[2][1].func == :f42105
+end
+
 include("generic_map_tests.jl")
 generic_map_tests(asyncmap, asyncmap!)
 
diff --git a/test/atomics.jl b/test/atomics.jl
new file mode 100644
index 00000000000000..15ffd84a2c0a23
--- /dev/null
+++ b/test/atomics.jl
@@ -0,0 +1,383 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Base.Threads
+using Core: ConcurrencyViolationError
+import Base: copy
+
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
+mutable struct ARefxy{T}
+    @atomic x::T
+    y::T
+    ARefxy(x::T, y::T) where {T} = new{T}(x, y)
+    ARefxy{T}(x, y) where {T} = new{T}(x, y)
+    ARefxy{T}() where {T} = new{T}()
+end
+
+mutable struct Refxy{T}
+    x::T
+    y::T
+    Refxy(x::T, y::T) where {T} = new{T}(x, y)
+    Refxy{T}(x, y) where {T} = new{T}(x, y)
+    Refxy{T}() where {T} = new() # unused, but sets ninitialized to 0
+end
+
+@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+    @atomic x::T
+    @atomic y::T
+end
+@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+    x::T
+    y::T
+end
+@test_throws ErrorException("invalid redefinition of constant ARefxy") @eval mutable struct ARefxy{T}
+    x::T
+    @atomic y::T
+end
+@test_throws ErrorException("invalid redefinition of constant Refxy") @eval mutable struct Refxy{T}
+    x::T
+    @atomic y::T
+end
+
+copy(r::Union{Refxy,ARefxy}) = typeof(r)(r.x, r.y)
+function add(x::T, y)::T where {T}; x + y; end
+swap(x, y) = y
+
+let T1 = Refxy{NTuple{3,UInt8}},
+    T2 = ARefxy{NTuple{3,UInt8}}
+    @test sizeof(T1) == 6
+    @test sizeof(T2) == 8
+    @test fieldoffset(T1, 1) == 0
+    @test fieldoffset(T2, 1) == 0
+    @test fieldoffset(T1, 2) == 3
+    @test fieldoffset(T2, 2) == 4
+    @test !Base.datatype_haspadding(T1)
+    @test Base.datatype_haspadding(T2)
+    @test Base.datatype_alignment(T1) == 1
+    @test Base.datatype_alignment(T2) == 4
+end
+
+# check that very large types are getting locks
+let (x, y) = (Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+    ar = ARefxy(x, y)
+    r = Refxy(x, y)
+    @test 64 == sizeof(r) < sizeof(ar)
+    @test sizeof(r) == sizeof(ar) - Int(fieldoffset(typeof(ar), 1))
+end
+
+struct PadIntA <: Number # internal padding
+    a::Int8
+    b::Int16
+    PadIntA(x) = new(82, x)
+end
+struct PadIntB <: Number # external padding
+    a::UInt8
+    b::UInt8
+    c::UInt8
+    PadIntB(x) = new(x & 0xff, (x >> 8) & 0xff, (x >> 16) & 0xff)
+end
+primitive type Int24 <: Signed 24 end # integral padding
+Int24(x::Int) = Core.Intrinsics.trunc_int(Int24, x)
+Base.Int(x::PadIntB) = x.a + (Int(x.b) << 8) + (Int(x.c) << 16)
+Base.:(+)(x::PadIntA, b::Int) = PadIntA(x.b + b)
+Base.:(+)(x::PadIntB, b::Int) = PadIntB(Int(x) + b)
+Base.:(+)(x::Int24, b::Int) = Core.Intrinsics.add_int(x, Int24(b))
+Base.show(io::IO, x::PadIntA) = print(io, "PadIntA(", x.b, ")")
+Base.show(io::IO, x::PadIntB) = print(io, "PadIntB(", Int(x), ")")
+Base.show(io::IO, x::Int24) = print(io, "Int24(", Core.Intrinsics.zext_int(Int, x), ")")
+
+@noinline function _test_field_operators(r)
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+    T = typeof(getfield(r, :x))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_10)
+    @test setfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_1)
+    @test getfield(r, :x, :sequentially_consistent) === T(123_1)
+    @test replacefield!(r, :x, 123_1 % UInt, T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), false))
+    @test replacefield!(r, :x, T(123_1), T(123_30), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_1), true))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_30)
+    @test replacefield!(r, :x, T(123_1), T(123_1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(123_30), false))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_30)
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_30), T(123_31))
+    @test modifyfield!(r, :x, add, 1, :sequentially_consistent) === Pair{TT,TT}(T(123_31), T(123_32))
+    @test getfield(r, :x, :sequentially_consistent) === T(123_32)
+    @test swapfield!(r, :x, T(123_1), :sequentially_consistent) === T(123_32)
+    @test getfield(r, :x, :sequentially_consistent) === T(123_1)
+    nothing
+end
+@noinline function test_field_operators(r)
+    _test_field_operators(Ref(copy(r)))
+    _test_field_operators(Ref{Any}(copy(r)))
+    nothing
+end
+test_field_operators(ARefxy{Int}(123_10, 123_20))
+test_field_operators(ARefxy{Any}(123_10, 123_20))
+test_field_operators(ARefxy{Union{Nothing,Int}}(123_10, nothing))
+test_field_operators(ARefxy{Complex{Int32}}(123_10, 123_20))
+test_field_operators(ARefxy{Complex{Int128}}(123_10, 123_20))
+test_field_operators(ARefxy{PadIntA}(123_10, 123_20))
+test_field_operators(ARefxy{PadIntB}(123_10, 123_20))
+#FIXME: test_field_operators(ARefxy{Int24}(123_10, 123_20))
+test_field_operators(ARefxy{Float64}(123_10, 123_20))
+
+@noinline function _test_field_orderings(r, x, y)
+    @nospecialize x y
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+
+    @test getfield(r, :x) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :u)
+    @test_throws ConcurrencyViolationError("getfield: atomic field cannot be accessed non-atomically") getfield(r, :x, :not_atomic)
+    @test getfield(r, :x, :unordered) === x
+    @test getfield(r, :x, :monotonic) === x
+    @test getfield(r, :x, :acquire) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :release) === x
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :x, :acquire_release) === x
+    @test getfield(r, :x, :sequentially_consistent) === x
+    @test isdefined(r, :x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :x, :u)
+    @test_throws ConcurrencyViolationError("isdefined: atomic field cannot be accessed non-atomically") isdefined(r, :x, :not_atomic)
+    @test isdefined(r, :x, :unordered)
+    @test isdefined(r, :x, :monotonic)
+    @test isdefined(r, :x, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :x, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :x, :acquire_release)
+    @test isdefined(r, :x, :sequentially_consistent)
+
+    @test getfield(r, :y) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :y, :u)
+    @test getfield(r, :y, :not_atomic) === y
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :unordered)
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :monotonic)
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :y, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") getfield(r, :y, :acquire_release)
+    @test_throws ConcurrencyViolationError("getfield: non-atomic field cannot be accessed atomically") getfield(r, :y, :sequentially_consistent)
+    @test isdefined(r, :y)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :y, :u)
+    @test isdefined(r, :y, :not_atomic)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :unordered)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :monotonic)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :y, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") isdefined(r, :y, :acquire_release)
+    @test_throws ConcurrencyViolationError("isdefined: non-atomic field cannot be accessed atomically") isdefined(r, :y, :sequentially_consistent)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :x, y, :u)
+    @test_throws ConcurrencyViolationError("setfield!: atomic field cannot be written non-atomically") setfield!(r, :x, y)
+    @test_throws ConcurrencyViolationError("setfield!: atomic field cannot be written non-atomically") setfield!(r, :x, y, :not_atomic)
+    @test getfield(r, :x) === x
+    @test setfield!(r, :x, y, :unordered) === y
+    @test setfield!(r, :x, y, :monotonic) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :x, y, :acquire) === y
+    @test setfield!(r, :x, y, :release) === y
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :x, y, :acquire_release) === y
+    @test setfield!(r, :x, y, :sequentially_consistent) === y
+    @test getfield(r, :x) === y
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :y, x, :u)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :unordered)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :y, x, :acquire)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") setfield!(r, :y, x, :acquire_release)
+    @test_throws ConcurrencyViolationError("setfield!: non-atomic field cannot be written atomically") setfield!(r, :y, x, :sequentially_consistent)
+    @test getfield(r, :y) === y
+    @test setfield!(r, :y, x) === x
+    @test setfield!(r, :y, x, :not_atomic) === x
+    @test getfield(r, :y) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :y, y, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :y, y, :unordered)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :monotonic)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :acquire)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :release)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :acquire_release)
+    @test_throws ConcurrencyViolationError("swapfield!: non-atomic field cannot be written atomically") swapfield!(r, :y, y, :sequentially_consistent)
+    @test swapfield!(r, :y, y, :not_atomic) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :y, swap, y, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :y, swap, y, :unordered)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :monotonic)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :release)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :acquire_release)
+    @test_throws ConcurrencyViolationError("modifyfield!: non-atomic field cannot be written atomically") modifyfield!(r, :y, swap, y, :sequentially_consistent)
+    @test modifyfield!(r, :y, swap, x, :not_atomic) === Pair{TT,TT}(y, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :u, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :monotonic, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :acquire, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :acquire_release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: non-atomic field cannot be written atomically") replacefield!(r, :y, y, y, :sequentially_consistent, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :acquire_release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :y, y, y, :not_atomic, :sequentially_consistent)
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :y, x, y, :not_atomic, :not_atomic) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :y, y, y, :not_atomic) === ReplaceType{TT}((y, true))
+    @test replacefield!(r, :y, y, y) === ReplaceType{TT}((y, true))
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :u)
+    @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x, :not_atomic)
+    @test_throws ConcurrencyViolationError("swapfield!: atomic field cannot be written non-atomically") swapfield!(r, :x, x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") swapfield!(r, :x, x, :unordered) === y
+    @test swapfield!(r, :x, x, :monotonic) === y
+    @test swapfield!(r, :x, x, :acquire) === x
+    @test swapfield!(r, :x, x, :release) === x
+    @test swapfield!(r, :x, x, :acquire_release) === x
+    @test swapfield!(r, :x, x, :sequentially_consistent) === x
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :u)
+    @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x, :not_atomic)
+    @test_throws ConcurrencyViolationError("modifyfield!: atomic field cannot be written non-atomically") modifyfield!(r, :x, swap, x)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") modifyfield!(r, :x, swap, x, :unordered)
+    @test modifyfield!(r, :x, swap, x, :monotonic) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :acquire_release) === Pair{TT,TT}(x, x)
+    @test modifyfield!(r, :x, swap, x, :sequentially_consistent) === Pair{TT,TT}(x, x)
+
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :u, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, x, x)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be written non-atomically") replacefield!(r, :x, y, x, :not_atomic, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :unordered, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :monotonic, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :acquire, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :acquire_release, :not_atomic)
+    @test_throws ConcurrencyViolationError("replacefield!: atomic field cannot be accessed non-atomically") replacefield!(r, :x, x, x, :sequentially_consistent, :not_atomic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :u)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :unordered)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :monotonic)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :acquire)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :acquire_release)
+    @test_throws ConcurrencyViolationError("invalid atomic ordering") replacefield!(r, :x, x, x, :not_atomic, :sequentially_consistent)
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((x, true))
+    @test replacefield!(r, :x, x, y, :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((y, x === y))
+    @test replacefield!(r, :x, y, x, :sequentially_consistent) === ReplaceType{TT}((y, true))
+    nothing
+end
+@noinline function test_field_orderings(r, x, y)
+    _test_field_orderings(Ref(copy(r)), x, y)
+    _test_field_orderings(Ref{Any}(copy(r)), x, y)
+    nothing
+end
+@noinline test_field_orderings(x, y) = (@nospecialize; test_field_orderings(ARefxy(x, y), x, y))
+test_field_orderings(10, 20)
+test_field_orderings(true, false)
+test_field_orderings("hi", "bye")
+test_field_orderings(:hi, :bye)
+test_field_orderings(nothing, nothing)
+test_field_orderings(ARefxy{Any}(123_10, 123_20), 123_10, 123_20)
+test_field_orderings(ARefxy{Any}(true, false), true, false)
+test_field_orderings(ARefxy{Union{Nothing,Missing}}(nothing, missing), nothing, missing)
+test_field_orderings(ARefxy{Union{Nothing,Int}}(nothing, 123_1), nothing, 123_1)
+test_field_orderings(Complex{Int128}(10, 30), Complex{Int128}(20, 40))
+test_field_orderings(10.0, 20.0)
+test_field_orderings(NaN, Inf)
+
+struct UndefComplex{T}
+    re::T
+    im::T
+    UndefComplex{T}() where {T} = new{T}()
+end
+Base.convert(T::Type{<:UndefComplex}, S) = T()
+@noinline function _test_field_undef(r)
+    r = r[]
+    TT = fieldtype(typeof(r), :x)
+    x = convert(TT, 12345_10)
+    @test_throws UndefRefError getfield(r, :x)
+    @test_throws UndefRefError getfield(r, :x, :sequentially_consistent)
+    @test_throws UndefRefError modifyfield!(r, :x, add, 1, :sequentially_consistent)
+    @test_throws (TT === Any ? UndefRefError : TypeError) replacefield!(r, :x, 1, 1.0, :sequentially_consistent)
+    @test_throws UndefRefError replacefield!(r, :x, 1, x, :sequentially_consistent)
+    @test_throws UndefRefError getfield(r, :x, :sequentially_consistent)
+    @test_throws UndefRefError swapfield!(r, :x, x, :sequentially_consistent)
+    @test getfield(r, :x, :sequentially_consistent) === x === getfield(r, :x)
+    nothing
+end
+@noinline function test_field_undef(TT)
+    _test_field_undef(Ref(TT()))
+    _test_field_undef(Ref{Any}(TT()))
+    nothing
+end
+test_field_undef(ARefxy{BigInt})
+test_field_undef(ARefxy{Any})
+test_field_undef(ARefxy{Union{Nothing,Integer}})
+test_field_undef(ARefxy{UndefComplex{Any}})
+test_field_undef(ARefxy{UndefComplex{UndefComplex{Any}}})
+
+@test_throws ErrorException @macroexpand @atomic foo()
+@test_throws ErrorException @macroexpand @atomic foo += bar
+@test_throws ErrorException @macroexpand @atomic foo += bar
+@test_throws ErrorException @macroexpand @atomic foo = bar
+@test_throws ErrorException @macroexpand @atomic foo()
+@test_throws ErrorException @macroexpand @atomic foo(bar)
+@test_throws ErrorException @macroexpand @atomic foo(bar, baz)
+@test_throws ErrorException @macroexpand @atomic foo(bar, baz, bax)
+@test_throws ErrorException @macroexpand @atomicreplace foo bar
+
+# test macroexpansions
+let a = ARefxy(1, -1)
+    @test 1 === @atomic a.x
+    @test 2 === @atomic :sequentially_consistent a.x = 2
+    @test 3 === @atomic :monotonic a.x = 3
+    local four = 4
+    @test 4 === @atomic :monotonic a.x = four
+    @test 3 === @atomic :monotonic a.x = four - 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x = 2
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x += 1
+
+    @test 3 === @atomic :monotonic a.x
+    @test 5 === @atomic a.x += 2
+    @test 4 === @atomic :monotonic a.x -= 1
+    @test 12 === @atomic :monotonic a.x *= 3
+
+    @test 12 === @atomic a.x
+    @test (12 => 13) === @atomic a.x + 1
+    @test (13 => 15) === @atomic :monotonic a.x + 2
+    @test (15 => 19) === @atomic a.x max 19
+    @test (19 => 20) === @atomic :monotonic a.x max 20
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x + 1
+    @test_throws ConcurrencyViolationError @atomic :not_atomic a.x max 30
+
+    @test 20 === @atomic a.x
+    @test 20 === @atomicswap a.x = 1
+    @test 1 === @atomicswap :monotonic a.x = 2
+    @test_throws ConcurrencyViolationError @atomicswap :not_atomic a.x = 1
+
+    @test 2 === @atomic a.x
+    @test ReplaceType{Int}((2, true)) === @atomicreplace a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic a.x 2 => 1
+    @test ReplaceType{Int}((1, false)) === @atomicreplace :monotonic :monotonic a.x 2 => 1
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x 1 => 2
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x 1 => 2
+
+    @test 1 === @atomic a.x
+    xchg = 1 => 2
+    @test ReplaceType{Int}((1, true)) === @atomicreplace a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :monotonic a.x xchg
+    @test ReplaceType{Int}((2, false)) === @atomicreplace :acquire_release :monotonic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :not_atomic a.x xchg
+    @test_throws ConcurrencyViolationError @atomicreplace :monotonic :acquire a.x xchg
+end
+
+# atomic getfield with boundcheck
+# via codegen
+getx(a, boundcheck) = getfield(a, :x, :sequentially_consistent, boundcheck)
+@test getx(ARefxy{Any}(42, 42), true) == 42
+@test getx(ARefxy{Any}(42, 42), false) == 42
+# via interpreter
+ans = getfield(ARefxy{Any}(42, 42), :x, :sequentially_consistent, true)
+@test ans == 42
+ans = getfield(ARefxy{Any}(42, 42), :x, :sequentially_consistent, false)
+@test ans == 42
diff --git a/test/backtrace.jl b/test/backtrace.jl
index 8b6ca94c779703..35b607137a5c26 100644
--- a/test/backtrace.jl
+++ b/test/backtrace.jl
@@ -184,7 +184,7 @@ end
 
 # issue 28618
 let bt, found = false
-    @info ""
+    @debug ""
     bt = backtrace()
     for frame in map(lookup, bt)
         if frame[1].line == @__LINE__() - 2 && frame[1].file == Symbol(@__FILE__)
@@ -254,7 +254,92 @@ let code = """
     """
 
     bt_str = read(`$(Base.julia_cmd()) --startup-file=no --compile=min -e $code`, String)
-    @test occursin("InterpreterIP in MethodInstance for foo", bt_str)
+    @test occursin(r"InterpreterIP in MethodInstance for .*A\.foo", bt_str)
     @test occursin("InterpreterIP in top-level CodeInfo for Main.A", bt_str)
 end
 
+"""
+    _reformat_sp(bt_data...) -> sp::Vector{Ptr{Cvoid}}
+
+Convert the output `bt_data` of `jl_backtrace_from_here` with `returnsp` flag set to a
+vector of valid stack pointers `sp`; i.e., `sp` is a subset of `bt_data[3]`.
+
+See also `Base._reformat_bt`.
+"""
+function _reformat_sp(
+    bt_raw::Array{Ptr{Cvoid},1},
+    bt2::Array{Any,1},
+    sp_raw::Array{Ptr{Cvoid},1},
+)
+    bt = Base._reformat_bt(bt_raw, bt2)
+    sp = empty!(similar(sp_raw))
+    i = j = 0
+    while true
+        # Advance `i` such that `bt[i] isa Ptr{Cvoid}` (native pointer).
+        local ip
+        while true
+            if i == lastindex(bt)
+                return sp
+            end
+            i += 1
+            x = bt[i]
+            if x isa Ptr{Cvoid}
+                ip = x
+                break
+            end
+        end
+        # Advance `j` such that `bt_raw[j] == bt[i]` to find a valid stack pointer.
+        while true
+            if j == lastindex(bt_raw)
+                return sp
+            end
+            j += 1
+            if bt_raw[j] == ip
+                push!(sp, sp_raw[j])
+                break
+            end
+        end
+    end
+end
+
+"""
+    withframeaddress(f)
+
+Call function `f` with an address `ptr::Ptr{Cvoid}` of an independent frame
+immediately outer to `f`.
+"""
+withframeaddress
+@eval @noinline function withframeaddress(f)
+    sp = Core.Intrinsics.llvmcall(
+        ($"""
+        declare i8* @llvm.frameaddress(i32)
+        define private i$(Sys.WORD_SIZE) @frameaddr() {
+            %1 = call i8* @llvm.frameaddress(i32 0)
+            %2 = ptrtoint i8* %1 to i$(Sys.WORD_SIZE)
+            ret i$(Sys.WORD_SIZE) %2
+        }""", "frameaddr"),
+        UInt,
+        Tuple{},
+    )
+    @noinline f(Ptr{Cvoid}(sp))
+end
+
+function sandwiched_backtrace()
+    local ptr1, ptr2, bt
+    withframeaddress() do p1
+        ptr1 = p1
+        bt = ccall(:jl_backtrace_from_here, Ref{Base.SimpleVector}, (Cint, Cint), true, 0)
+        withframeaddress() do p2
+            ptr2 = p2
+        end
+    end
+    return ptr1, ptr2, bt
+end
+
+@testset "stack pointers" begin
+    ptr1, ptr2, bt_data = sandwiched_backtrace()
+    sp = _reformat_sp(bt_data...)
+    @test ptr2 < sp[2]
+    @test sp[1] < ptr1
+    @test all(diff(Int128.(UInt.(sp))) .> 0)
+end
diff --git a/test/binaryplatforms.jl b/test/binaryplatforms.jl
index f2d9d5e10f9004..793a9b1f06a410 100644
--- a/test/binaryplatforms.jl
+++ b/test/binaryplatforms.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test, Base.BinaryPlatforms, Base.BinaryPlatforms.CPUID
 
 @testset "CPUID" begin
@@ -106,12 +108,18 @@ end
     @test triplet(P("x86_64", "linux")) == "x86_64-linux-gnu"
     @test triplet(P("armv6l", "linux")) == "armv6l-linux-gnueabihf"
     @test triplet(P("x86_64", "macos")) == "x86_64-apple-darwin"
+    @test triplet(P("x86_64", "macos"; os_version=v"16")) == "x86_64-apple-darwin16"
     @test triplet(P("x86_64", "freebsd")) == "x86_64-unknown-freebsd"
     @test triplet(P("i686", "freebsd")) == "i686-unknown-freebsd"
 
     # Now test libgfortran/cxxstring ABIs
     @test triplet(P("x86_64", "linux"; libgfortran_version=v"3", cxxstring_abi="cxx11")) == "x86_64-linux-gnu-libgfortran3-cxx11"
     @test triplet(P("armv7l", "linux"; libc="musl", cxxstring_abi="cxx03")) == "armv7l-linux-musleabihf-cxx03"
+    if !isnothing(detect_libgfortran_version())
+        # When `libgfortran` can be detected at runtime, make sure
+        # `HostPlatform` has the appropriate key.
+        @test tags(HostPlatform())["libgfortran_version"] == string(detect_libgfortran_version())
+    end
 
     # Test tags()
     t = tags(P("x86_64", "linux"))
@@ -130,6 +138,16 @@ end
 
     # Test that trying to set illegal tags fails
     @test_throws ArgumentError p["os"] = "a+b"
+
+    # Test that our `hash()` is stable
+    @test hash(HostPlatform()) == hash(HostPlatform())
+
+    # Test that round-tripping through `triplet` for a does not
+    # maintain equality, as we end up losing the `compare_strategies`:
+    p = Platform("x86_64", "linux"; cuda = v"11")
+    Base.BinaryPlatforms.set_compare_strategy!(p, "cuda", Base.BinaryPlatforms.compare_version_cap)
+    q = parse(Platform, triplet(p))
+    @test q != p
 end
 
 @testset "Triplet parsing" begin
@@ -176,14 +194,16 @@ end
     # Test extended attributes
     @test R("x86_64-linux-gnu-march+avx2") == P("x86_64", "linux"; march="avx2")
     @test R("x86_64-linux-gnu-march+x86_64-cuda+10.1") == P("x86_64", "linux"; march="x86_64", cuda="10.1")
-    @test_throws ArgumentError R("x86_64-linux-gnu-march+generic";)
-    @test_throws ArgumentError R("x86_64-linux-gnu-cuda+version")
 
-    # Round-trip our little homie through `triplet()`
-    @test parse(Platform, triplet(HostPlatform())) == HostPlatform()
+    # Round-trip our little homie through `triplet()`, with some bending
+    # of the rules for MacOS and FreeBSD, who have incomplete `os_version`
+    # numbers embedded within their triplets.
+    p = Platform("x86_64", "linux")
+    @test parse(Platform, triplet(p)) == p
 
     # Also test round-tripping through `repr()`:
-    @test eval(Meta.parse(repr(HostPlatform()))) == HostPlatform()
+    p = Platform("aarch64", "macos"; os_version=v"20", march="armv8_4_crypto_sve")
+    @test eval(Meta.parse(repr(p))) == p
 end
 
 @testset "platforms_match()" begin
@@ -243,21 +263,25 @@ end
     @test parse_dl_name_version("libgfortran.so.3.4", "linux") == ("libgfortran", v"3.4")
     @test_throws ArgumentError parse_dl_name_version("libgfortran.so.3.4a", "linux")
     @test_throws ArgumentError parse_dl_name_version("libgfortran", "linux")
+    @test_throws ArgumentError parse_dl_name_version("libgfortranso", "linux")
     @test parse_dl_name_version("libgfortran.so", "freebsd") == ("libgfortran", nothing)
     @test parse_dl_name_version("libgfortran.so.3", "freebsd") == ("libgfortran", v"3")
     @test parse_dl_name_version("libgfortran.so.3.4", "freebsd") == ("libgfortran", v"3.4")
     @test_throws ArgumentError parse_dl_name_version("libgfortran.so.3.4a", "freebsd")
     @test_throws ArgumentError parse_dl_name_version("libgfortran", "freebsd")
+    @test_throws ArgumentError parse_dl_name_version("libgfortranso", "freebsd")
     @test parse_dl_name_version("libgfortran.dylib", "macos") == ("libgfortran", nothing)
     @test parse_dl_name_version("libgfortran.3.dylib", "macos") == ("libgfortran", v"3")
     @test parse_dl_name_version("libgfortran.3.4.dylib", "macos") == ("libgfortran", v"3.4")
     @test parse_dl_name_version("libgfortran.3.4a.dylib", "macos") == ("libgfortran.3.4a", nothing)
     @test_throws ArgumentError parse_dl_name_version("libgfortran", "macos")
+    @test_throws ArgumentError parse_dl_name_version("libgfortrandylib", "macos")
     @test parse_dl_name_version("libgfortran.dll", "windows") == ("libgfortran", nothing)
     @test parse_dl_name_version("libgfortran-3.dll", "windows") == ("libgfortran", v"3")
     @test parse_dl_name_version("libgfortran-3.4.dll", "windows") == ("libgfortran", v"3.4")
     @test parse_dl_name_version("libgfortran-3.4a.dll", "windows") == ("libgfortran-3.4a", nothing)
     @test_throws ArgumentError parse_dl_name_version("libgfortran", "windows")
+    @test_throws ArgumentError parse_dl_name_version("libgfortrandll", "windows")
 end
 
 @testset "Sys.is* overloading" begin
@@ -344,7 +368,7 @@ end
 
 
     # Next, an asymmetric comparison strategy.  We'll create a "less than or equal to" constraint
-    # that uses the `{a,b}_requested` paramters to determine which number represents the limit.
+    # that uses the `{a,b}_requested` parameters to determine which number represents the limit.
     function less_than_constraint(a::String, b::String, a_requested::Bool, b_requested::Bool)
         a = parse(Int, a)
         b = parse(Int, b)
diff --git a/test/bitarray.jl b/test/bitarray.jl
index 06cc99e5257479..9ce3775a5d4095 100644
--- a/test/bitarray.jl
+++ b/test/bitarray.jl
@@ -98,6 +98,20 @@ end
 
 timesofar("conversions")
 
+@testset "Promotions for size $sz" for (sz, T) in allsizes
+    @test isequal(promote(falses(sz...), zeros(sz...)),
+                 (zeros(sz...), zeros(sz...)))
+    @test isequal(promote(trues(sz...), ones(sz...)),
+                 (ones(sz...), ones(sz...)))
+    ae = falses(1, sz...)
+    ex = (@test_throws ErrorException promote(ae, ones(sz...))).value
+    @test startswith(ex.msg, "promotion of types Bit")
+    ex = (@test_throws ErrorException promote(ae, falses(sz...))).value
+    @test startswith(ex.msg, "promotion of types Bit")
+end
+
+timesofar("promotions")
+
 @testset "utility functions" begin
     b1 = bitrand(v1)
     @test isequal(fill!(b1, true), trues(size(b1)))
@@ -198,6 +212,9 @@ timesofar("utils")
                   ((x+y)%5==2 for x = 1:n1 for y = 1:n2))
             @test BitArray(g) == BitArray(collect(g))
         end
+        @test_throws DimensionMismatch BitVector(false)
+        @test_throws DimensionMismatch BitVector((iszero(i%4) for i in 1:n1, j in 1:n2))
+        @test_throws DimensionMismatch BitMatrix((isodd(i) for i in 1:3))
     end
 
     @testset "constructor from NTuple" begin
@@ -829,6 +846,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, b2)  BitMatrix
         @check_bit_operation broadcast(|, b1, b2)  BitMatrix
         @check_bit_operation broadcast(xor, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, b2)  BitMatrix
         @check_bit_operation (+)(b1, b2)  Matrix{Int}
         @check_bit_operation (-)(b1, b2)  Matrix{Int}
         @check_bit_operation broadcast(*, b1, b2) BitMatrix
@@ -858,6 +877,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b0, b0)  BitVector
         @check_bit_operation broadcast(|, b0, b0)  BitVector
         @check_bit_operation broadcast(xor, b0, b0)  BitVector
+        @check_bit_operation broadcast(nand, b0, b0)  BitVector
+        @check_bit_operation broadcast(nor, b0, b0)  BitVector
         @check_bit_operation broadcast(*, b0, b0) BitVector
         @check_bit_operation (*)(b0, b0') BitMatrix
     end
@@ -868,6 +889,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(|, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(xor, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nand, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nor, b1, i2)  Matrix{Int}
         @check_bit_operation (+)(b1, i2)  Matrix{Int}
         @check_bit_operation (-)(b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(*, b1, i2) Matrix{Int}
@@ -899,6 +922,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(|, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(xor, i1, b2)  Matrix{Int}
+        @check_bit_operation broadcast(nand, i1, b2)  Matrix{Int}
+        @check_bit_operation broadcast(nor, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(+, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(-, i1, b2)  Matrix{Int}
         @check_bit_operation broadcast(*, i1, b2) Matrix{Int}
@@ -906,6 +931,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(|, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(xor, u1, b2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nand, u1, b2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nor, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(+, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(-, u1, b2)  Matrix{UInt8}
         @check_bit_operation broadcast(*, u1, b2) Matrix{UInt8}
@@ -983,6 +1010,14 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(xor, b1, false)  BitMatrix
         @check_bit_operation broadcast(xor, true, b1)   BitMatrix
         @check_bit_operation broadcast(xor, false, b1)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, true)   BitMatrix
+        @check_bit_operation broadcast(nand, b1, false)  BitMatrix
+        @check_bit_operation broadcast(nand, true, b1)   BitMatrix
+        @check_bit_operation broadcast(nand, false, b1)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, true)   BitMatrix
+        @check_bit_operation broadcast(nor, b1, false)  BitMatrix
+        @check_bit_operation broadcast(nor, true, b1)   BitMatrix
+        @check_bit_operation broadcast(nor, false, b1)  BitMatrix
         @check_bit_operation broadcast(+, b1, true)   Matrix{Int}
         @check_bit_operation broadcast(+, b1, false)  Matrix{Int}
         @check_bit_operation broadcast(-, b1, true)   Matrix{Int}
@@ -999,12 +1034,18 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, b2)  BitMatrix
         @check_bit_operation broadcast(|, b1, b2)  BitMatrix
         @check_bit_operation broadcast(xor, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, b2)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, b2)  BitMatrix
         @check_bit_operation broadcast(&, b2, b1)  BitMatrix
         @check_bit_operation broadcast(|, b2, b1)  BitMatrix
         @check_bit_operation broadcast(xor, b2, b1)  BitMatrix
+        @check_bit_operation broadcast(nand, b2, b1)  BitMatrix
+        @check_bit_operation broadcast(nor, b2, b1)  BitMatrix
         @check_bit_operation broadcast(&, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(|, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(xor, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nand, b1, i2)  Matrix{Int}
+        @check_bit_operation broadcast(nor, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(+, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(-, b1, i2)  Matrix{Int}
         @check_bit_operation broadcast(*, b1, i2) Matrix{Int}
@@ -1015,6 +1056,8 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(&, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(|, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(xor, b1, u2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nand, b1, u2)  Matrix{UInt8}
+        @check_bit_operation broadcast(nor, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(+, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(-, b1, u2)  Matrix{UInt8}
         @check_bit_operation broadcast(*, b1, u2) Matrix{UInt8}
@@ -1083,6 +1126,14 @@ timesofar("unary arithmetic")
         @check_bit_operation broadcast(xor, b1, transpose(b3))  BitMatrix
         @check_bit_operation broadcast(xor, b2, b1)             BitMatrix
         @check_bit_operation broadcast(xor, transpose(b3), b1)  BitMatrix
+        @check_bit_operation broadcast(nand, b1, b2)             BitMatrix
+        @check_bit_operation broadcast(nand, b1, transpose(b3))  BitMatrix
+        @check_bit_operation broadcast(nand, b2, b1)             BitMatrix
+        @check_bit_operation broadcast(nand, transpose(b3), b1)  BitMatrix
+        @check_bit_operation broadcast(nor, b1, b2)             BitMatrix
+        @check_bit_operation broadcast(nor, b1, transpose(b3))  BitMatrix
+        @check_bit_operation broadcast(nor, b2, b1)             BitMatrix
+        @check_bit_operation broadcast(nor, transpose(b3), b1)  BitMatrix
         @check_bit_operation broadcast(+, b1, b2)             Matrix{Int}
         @check_bit_operation broadcast(+, b1, transpose(b3))  Matrix{Int}
         @check_bit_operation broadcast(+, b2, b1)             Matrix{Int}
@@ -1177,8 +1228,8 @@ timesofar("datamove")
 
         @check_bit_operation findfirst(x->x, b1)     Union{Int,Nothing}
         @check_bit_operation findfirst(x->!x, b1)    Union{Int,Nothing}
-        @check_bit_operation findfirst(x->true, b1)  Union{Int,Nothing}
-        @check_bit_operation findfirst(x->false, b1) Union{Int,Nothing}
+        @check_bit_operation findfirst(Returns(true ), b1)  Union{Int,Nothing}
+        @check_bit_operation findfirst(Returns(false), b1) Union{Int,Nothing}
 
         @check_bit_operation findall(b1) Vector{Int}
     end
@@ -1219,6 +1270,9 @@ timesofar("datamove")
         @check_bit_operation findall(falses(t)) ret_type
         @check_bit_operation findall(bitrand(t)) ret_type
     end
+
+    @test count(trues(2, 2), init=0x03) === 0x07
+    @test count(trues(2, 2, 2), dims=2) == fill(2, 2, 1, 2)
 end
 
 timesofar("find")
@@ -1269,49 +1323,51 @@ timesofar("find")
     @test_throws BoundsError findprevnot(b2, 1001)
     @test_throws BoundsError findprev(!, b2, 1001)
     @test_throws BoundsError findprev(identity, b1, 1001)
-    @test_throws BoundsError findprev(x->false, b1, 1001)
-    @test_throws BoundsError findprev(x->true, b1, 1001)
+    @test_throws BoundsError findprev(Returns(false), b1, 1001)
+    @test_throws BoundsError findprev(Returns(true ), b1, 1001)
     @test findprev(b1, 1000) == findprevnot(b2, 1000) == findprev(!, b2, 1000) == 777
     @test findprev(b1, 777)  == findprevnot(b2, 777)  == findprev(!, b2, 777)  == 777
     @test findprev(b1, 776)  == findprevnot(b2, 776)  == findprev(!, b2, 776)  == 77
     @test findprev(b1, 77)   == findprevnot(b2, 77)   == findprev(!, b2, 77)   == 77
     @test findprev(b1, 76)   == findprevnot(b2, 76)   == findprev(!, b2, 76)   == nothing
     @test findprev(b1, -1)   == findprevnot(b2, -1)   == findprev(!, b2, -1)   == nothing
-    @test findprev(identity, b1, -1) == findprev(x->false, b1, -1) == findprev(x->true, b1, -1) == nothing
+    @test findprev(identity, b1, -1) == nothing
+    @test findprev(Returns(false), b1, -1) == nothing
+    @test findprev(Returns(true), b1, -1) == nothing
     @test_throws BoundsError findnext(b1, -1)
     @test_throws BoundsError findnextnot(b2, -1)
     @test_throws BoundsError findnext(!, b2, -1)
     @test_throws BoundsError findnext(identity, b1, -1)
-    @test_throws BoundsError findnext(x->false, b1, -1)
-    @test_throws BoundsError findnext(x->true, b1, -1)
+    @test_throws BoundsError findnext(Returns(false), b1, -1)
+    @test_throws BoundsError findnext(Returns(true), b1, -1)
     @test findnext(b1, 1)    == findnextnot(b2, 1)    == findnext(!, b2, 1)    == 77
     @test findnext(b1, 77)   == findnextnot(b2, 77)   == findnext(!, b2, 77)   == 77
     @test findnext(b1, 78)   == findnextnot(b2, 78)   == findnext(!, b2, 78)   == 777
     @test findnext(b1, 777)  == findnextnot(b2, 777)  == findnext(!, b2, 777)  == 777
     @test findnext(b1, 778)  == findnextnot(b2, 778)  == findnext(!, b2, 778)  == nothing
     @test findnext(b1, 1001) == findnextnot(b2, 1001) == findnext(!, b2, 1001) == nothing
-    @test findnext(identity, b1, 1001) == findnext(x->false, b1, 1001) == findnext(x->true, b1, 1001) == nothing
+    @test findnext(identity, b1, 1001) == findnext(Returns(false), b1, 1001) == findnext(Returns(true), b1, 1001) == nothing
 
     @test findlast(b1) == Base.findlastnot(b2) == 777
     @test findfirst(b1) == Base.findfirstnot(b2) == 77
 
     b0 = BitVector()
-    @test findprev(x->true, b0, -1) == nothing
-    @test_throws BoundsError findprev(x->true, b0, 1)
-    @test_throws BoundsError findnext(x->true, b0, -1)
-    @test findnext(x->true, b0, 1) == nothing
+    @test findprev(Returns(true), b0, -1) == nothing
+    @test_throws BoundsError findprev(Returns(true), b0, 1)
+    @test_throws BoundsError findnext(Returns(true), b0, -1)
+    @test findnext(Returns(true), b0, 1) == nothing
 
     b1 = falses(10)
-    @test findprev(x->true, b1, 5) == 5
-    @test findnext(x->true, b1, 5) == 5
-    @test findprev(x->true, b1, -1) == nothing
-    @test findnext(x->true, b1, 11) == nothing
-    @test findprev(x->false, b1, 5) == nothing
-    @test findnext(x->false, b1, 5) == nothing
-    @test findprev(x->false, b1, -1) == nothing
-    @test findnext(x->false, b1, 11) == nothing
-    @test_throws BoundsError findprev(x->true, b1, 11)
-    @test_throws BoundsError findnext(x->true, b1, -1)
+    @test findprev(Returns(true), b1, 5) == 5
+    @test findnext(Returns(true), b1, 5) == 5
+    @test findprev(Returns(true), b1, -1) == nothing
+    @test findnext(Returns(true), b1, 11) == nothing
+    @test findprev(Returns(false), b1, 5) == nothing
+    @test findnext(Returns(false), b1, 5) == nothing
+    @test findprev(Returns(false), b1, -1) == nothing
+    @test findnext(Returns(false), b1, 11) == nothing
+    @test_throws BoundsError findprev(Returns(true), b1, 11)
+    @test_throws BoundsError findnext(Returns(true), b1, -1)
 
     @testset "issue 32568" for T = (UInt, BigInt)
         for x = (1, 2)
@@ -1376,12 +1432,14 @@ timesofar("reductions")
         b2 = bitrand(l)
         @test map(~, b1) == map(x->~x, b1) == broadcast(~, b1)
         @test map(identity, b1) == map(x->x, b1) == b1
-        @test map(zero, b1) == map(x->false, b1) == falses(l)
-        @test map(one, b1) == map(x->true, b1) == trues(l)
+        @test map(zero, b1) == map(Returns(false), b1) == falses(l)
+        @test map(one, b1) == map(Returns(true), b1) == trues(l)
 
         @test map(&, b1, b2) == map((x,y)->x&y, b1, b2) == broadcast(&, b1, b2)
         @test map(|, b1, b2) == map((x,y)->x|y, b1, b2) == broadcast(|, b1, b2)
         @test map(⊻, b1, b2) == map((x,y)->x⊻y, b1, b2) == broadcast(⊻, b1, b2) == broadcast(xor, b1, b2)
+        @test map(⊼, b1, b2) == map((x,y)->x⊼y, b1, b2) == broadcast(⊼, b1, b2) == broadcast(nand, b1, b2)
+        @test map(⊽, b1, b2) == map((x,y)->x⊽y, b1, b2) == broadcast(⊽, b1, b2) == broadcast(nor, b1, b2)
 
         @test map(^, b1, b2) == map((x,y)->x^y, b1, b2) == b1 .^ b2
         @test map(*, b1, b2) == map((x,y)->x*y, b1, b2) == b1 .* b2
@@ -1401,8 +1459,8 @@ timesofar("reductions")
             @test map!(~, b, b1) == map!(x->~x, b, b1) == broadcast(~, b1) == b
             @test map!(!, b, b1) == map!(x->!x, b, b1) == broadcast(~, b1) == b
             @test map!(identity, b, b1) == map!(x->x, b, b1) == b1 == b
-            @test map!(zero, b, b1) == map!(x->false, b, b1) == falses(l) == b
-            @test map!(one, b, b1) == map!(x->true, b, b1) == trues(l) == b
+            @test map!(zero, b, b1) == map!(Returns(false), b, b1) == falses(l) == b
+            @test map!(one, b, b1) == map!(Returns(true), b, b1) == trues(l) == b
 
             @test map!(&, b, b1, b2) == map!((x,y)->x&y, b, b1, b2) == broadcast(&, b1, b2) == b
             @test map!(|, b, b1, b2) == map!((x,y)->x|y, b, b1, b2) == broadcast(|, b1, b2) == b
@@ -1660,3 +1718,67 @@ end
     @check_bit_operation all!(falses(100), trues(100, 100))
     @check_bit_operation all!(falses(1000), trues(1000, 100))
 end
+
+@testset "multidimensional concatenation returns BitArrays" begin
+    a = BitVector(ones(5))
+    @test typeof([a ;;; a]) <: BitArray
+    @test typeof([a a ;;; a a]) <: BitArray
+    @test typeof([a a ;;; [a a]]) <: BitArray
+end
+
+@testset "deleteat! additional tests" begin
+    for v in ([1, 2, 3], [true, true, true], trues(3))
+        @test_throws BoundsError deleteat!(v, true:true)
+    end
+
+    for v in ([1], [true], trues(1))
+        @test length(deleteat!(v, false:false)) == 1
+        @test isempty(deleteat!(v, true:true))
+    end
+
+    x = trues(3)
+    x[3] = false
+    @test deleteat!(x, [UInt8(2)]) == [true, false]
+    @test_throws ArgumentError deleteat!(x, Any[true])
+    @test_throws ArgumentError deleteat!(x, Any[1, true])
+    @test_throws ArgumentError deleteat!(x, Any[2, 1])
+    @test_throws BoundsError deleteat!(x, Any[4])
+    @test_throws BoundsError deleteat!(x, Any[2, 4])
+
+    function test_equivalence(n::Int)
+        x1 = rand(Bool, n)
+        x2 = BitVector(x1)
+        inds1 = rand(Bool, n)
+        inds2 = BitVector(inds1)
+        return deleteat!(copy(x1), findall(inds1)) ==
+               deleteat!(copy(x1), inds1) ==
+               deleteat!(copy(x2), inds1) ==
+               deleteat!(copy(x1), inds2) ==
+               deleteat!(copy(x2), inds2)
+    end
+
+    Random.seed!(1234)
+    for n in 1:20, _ in 1:100
+        @test test_equivalence(n)
+    end
+end
+
+@testset "fill! for BitArray with contiguous view (#42795)" begin
+    # change values in range `rangein`, `rangeout` should stay unchanged
+    for (rangein, rangeout) in ((1:5, 6:10), (5:10, 1:4))
+        bitvector = trues(10)
+        bitarray  = trues(10, 10)
+        viewvector = view(bitvector, rangein)
+        viewarray  = view(bitarray, rangein, rangein)
+        @test which(fill!, (typeof(viewvector), Bool)).sig == Tuple{typeof(fill!), SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}}}, Any}
+        @test which(fill!, (typeof(viewarray), Bool)).sig == Tuple{typeof(fill!), SubArray{Bool, <:Any, <:BitArray, <:Tuple{AbstractUnitRange{Int}, Vararg{Union{Int,AbstractUnitRange{Int}}}}}, Any}
+        fill!(viewvector, false)
+        fill!(viewarray, false)
+        @test all(bitvector[rangein] .== false)
+        @test all(bitvector[rangeout] .== true)
+        @test all(bitarray[rangein, rangein] .== false)
+        @test all(bitarray[rangeout, rangeout] .== true)
+        @test all(bitarray[rangeout, rangein] .== true)
+        @test all(bitarray[rangein, rangeout] .== true)
+    end
+end
diff --git a/test/bitset.jl b/test/bitset.jl
index f227bbe180080b..1919da4f3702a3 100644
--- a/test/bitset.jl
+++ b/test/bitset.jl
@@ -196,7 +196,7 @@ end
 
     @test intersect(BitSet([1,2,3])) == BitSet([1,2,3])
     @test intersect(BitSet(1:7), BitSet(3:10)) ==
-    	  intersect(BitSet(3:10), BitSet(1:7)) == BitSet(3:7)
+          intersect(BitSet(3:10), BitSet(1:7)) == BitSet(3:7)
     @test intersect(BitSet(1:10), BitSet(1:4), 1:5, [2,3,10]) == BitSet([2,3])
 end
 
diff --git a/test/boundscheck_exec.jl b/test/boundscheck_exec.jl
index 62a20921bd44ea..715700e00378fc 100644
--- a/test/boundscheck_exec.jl
+++ b/test/boundscheck_exec.jl
@@ -251,5 +251,48 @@ if bc_opt == bc_default || bc_opt == bc_off
     @test occursin("vector.body", sprint(code_llvm, g27079, Tuple{Vector{Int}}))
 end
 
+# Boundschecking removal of indices with different type, see #40281
+getindex_40281(v, a, b, c) = @inbounds getindex(v, a, b, c)
+typed_40281 = sprint((io, args...) -> code_warntype(io, args...; optimize=true), getindex_40281, Tuple{Array{Float64, 3}, Int, UInt8, Int})
+if bc_opt == bc_default || bc_opt == bc_off
+    @test occursin("arrayref(false", typed_40281)
+    @test !occursin("arrayref(true", typed_40281)
+end
+
+# Given this is a sub-processed test file, not using @testsets avoids
+# leaking the report print into the Base test runner report
+begin # Pass inbounds meta to getindex on CartesianIndices (#42115)
+    @inline getindex_42115(r, i) = @inbounds getindex(r, i)
+    @inline getindex_42115(r, i, j) = @inbounds getindex(r, i, j)
+
+    R = CartesianIndices((5, 5))
+    if bc_opt == bc_on
+        @test_throws BoundsError getindex_42115(R, -1, -1)
+        @test_throws BoundsError getindex_42115(R, 1, -1)
+    else
+        @test getindex_42115(R, -1, -1) == CartesianIndex(-1, -1)
+        @test getindex_42115(R, 1, -1) == CartesianIndex(1, -1)
+    end
+
+    if bc_opt == bc_on
+        @test_throws BoundsError getindex_42115(R, CartesianIndices((6, 6)))
+        @test_throws BoundsError getindex_42115(R, -1:3, :)
+    else
+        @test getindex_42115(R, CartesianIndices((6, 6))) == CartesianIndices((6, 6))
+        @test getindex_42115(R, -1:3, :) == CartesianIndices((-1:3, 1:5))
+    end
+end
+
+
+# Test that --check-bounds=off doesn't permit const prop of indices into
+# function that are not dynamically reachable (the same test for @inbounds
+# is in the compiler tests).
+function f_boundscheck_elim(n)
+    # Inbounds here assumes that this is only ever called with n==0, but of
+    # course the compiler has no way of knowing that, so it must not attempt
+    # to run the @inbounds `getfield(sin, 1)`` that ntuple generates.
+    ntuple(x->getfield(sin, x), n)
+end
+@test Tuple{} <: code_typed(f_boundscheck_elim, Tuple{Int})[1][2]
 
 end
diff --git a/test/broadcast.jl b/test/broadcast.jl
index 5f4b7f853c5ddd..113614505ba742 100644
--- a/test/broadcast.jl
+++ b/test/broadcast.jl
@@ -365,7 +365,7 @@ end
 let f17314 = x -> x < 0 ? false : x
     @test eltype(broadcast(f17314, 1:3)) === Int
     @test eltype(broadcast(f17314, -1:1)) === Integer
-    @test eltype(broadcast(f17314, Int[])) == Union{Bool,Int}
+    @test eltype(broadcast(f17314, Int[])) === Integer
 end
 let io = IOBuffer()
     broadcast(x->print(io,x), 1:5) # broadcast with side effects
@@ -516,7 +516,7 @@ Base.BroadcastStyle(::Type{T}) where {T<:AD2Dim} = AD2DimStyle()
     @test a .+ 1 .* 2  == @inferred(fadd2(aa))
     @test a .* a' == @inferred(fprod(aa))
     @test isequal(a .+ [missing; 1:9], fadd3(aa))
-    @test_broken Core.Compiler.return_type(fadd3, (typeof(aa),)) <: Array19745{<:Union{Float64, Missing}}
+    @test Core.Compiler.return_type(fadd3, Tuple{typeof(aa),}) <: Array19745{<:Union{Float64, Missing}}
     @test isa(aa .+ 1, Array19745)
     @test isa(aa .+ 1 .* 2, Array19745)
     @test isa(aa .* aa', Array19745)
@@ -691,16 +691,19 @@ end
     @test a == [1 1; 2 2; 3 3]
 end
 
-@testset "scalar .=" begin
-    A = [[1,2,3],4:5,6]
+@testset "scalar .= and promotion" begin
+    A = [[1, 2, 3], 4:5, 6]
+    @test A isa Vector{Any}
     A[1] .= 0
-    @test A[1] == [0,0,0]
-    @test_throws ErrorException A[2] .= 0
+    @test A[1] == [0, 0, 0]
+    @test_throws Base.CanonicalIndexError A[2] .= 0
     @test_throws MethodError A[3] .= 0
-    A = [[1,2,3],4:5]
+    A = [[1, 2, 3], 4:5]
+    @test A isa Vector{Vector{Int}}
     A[1] .= 0
-    @test A[1] == [0,0,0]
-    @test_throws ErrorException A[2] .= 0
+    A[2] .= 0
+    @test A[1] == [0, 0, 0]
+    @test A[2] == [0, 0]
 end
 
 # Issue #22180
@@ -914,6 +917,12 @@ end
     # hit the `foldl` branch:
     @test IndexStyle(bcraw) == IndexCartesian()
     @test reduce(paren, bcraw) == foldl(paren, xs)
+
+    # issue #41055
+    bc = Broadcast.instantiate(Broadcast.broadcasted(Base.literal_pow, Ref(^), [1,2], Ref(Val(2))))
+    @test sum(bc, dims=1, init=0) == [5]
+    bc = Broadcast.instantiate(Broadcast.broadcasted(*, ['a','b'], 'c'))
+    @test prod(bc, dims=1, init="") == ["acbc"]
 end
 
 # treat Pair as scalar:
@@ -944,3 +953,137 @@ p = rand(4,4); r = rand(2,4);
 p0 = copy(p)
 @views @. p[1:2, :] += r
 @test p[1:2, :] ≈ p0[1:2, :] + r
+
+@test identity(.+) == Broadcast.BroadcastFunction(+)
+@test identity.(.*) == Broadcast.BroadcastFunction(*)
+@test map(.+, [[1,2], [3,4]], [5, 6]) == [[6,7], [9,10]]
+@test repr(.!) == "Base.Broadcast.BroadcastFunction(!)"
+@test eval(:(.+)) == Base.BroadcastFunction(+)
+
+@testset "Issue #5187: Broadcasting of short-circuiting ops" begin
+    ex = Meta.parse("A .< 1 .|| A .> 2")
+    @test ex == :((A .< 1) .|| (A .> 2))
+    @test ex.head == :.||
+    ex = Meta.parse("A .< 1 .&& A .> 2")
+    @test ex == :((A .< 1) .&& (A .> 2))
+    @test ex.head == :.&&
+
+    A = -1:4
+    @test (A .< 1 .|| A .> 2) == [true, true, false, false, true, true]
+    @test (A .>= 1 .&& A .<= 2) == [false, false, true, true, false, false]
+
+    mutable struct F5187; x; end
+    (f::F5187)(x) = (f.x += x)
+    @test (iseven.(1:4) .&& (F5187(0)).(ones(4))) == [false, 1, false, 2]
+    @test (iseven.(1:4) .|| (F5187(0)).(ones(4))) == [1, true, 2, true]
+    r = 1:4; o = ones(4); f = F5187(0);
+    @test (@. iseven(r) && f(o)) == [false, 1, false, 2]
+    @test (@. iseven(r) || f(o)) == [3, true, 4, true]
+
+    @test (iseven.(1:8) .&& iseven.((F5187(0)).(ones(8))) .&& (F5187(0)).(ones(8))) == [false,false,false,1,false,false,false,2]
+    @test (iseven.(1:8) .|| iseven.((F5187(0)).(ones(8))) .|| (F5187(0)).(ones(8))) == [1,true,true,true,2,true,true,true]
+    r = 1:8; o = ones(8); f1 = F5187(0); f2 = F5187(0)
+    @test (@. iseven(r) && iseven(f1(o)) && f2(o)) == [false,false,false,1,false,false,false,2]
+    @test (@. iseven(r) || iseven(f1(o)) || f2(o)) == [3,true,true,true,4,true,true,true]
+    @test (iseven.(1:8) .&& iseven.((F5187(0)).(ones(8))) .&& (F5187(0)).(ones(8))) == [false,false,false,1,false,false,false,2]
+    @test (iseven.(1:8) .|| iseven.((F5187(0)).(ones(8))) .|| (F5187(0)).(ones(8))) == [1,true,true,true,2,true,true,true]
+end
+
+@testset "Issue #28382: inferrability of broadcast with Union eltype" begin
+    @test isequal([1, 2] .+ [3.0, missing], [4.0, missing])
+    @test Core.Compiler.return_type(broadcast, Tuple{typeof(+), Vector{Int},
+                                                     Vector{Union{Float64, Missing}}}) ==
+        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
+    @test Core.Compiler.return_type(+, Tuple{Vector{Int},
+                                             Vector{Union{Float64, Missing}}}) ==
+        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
+    @test isequal(tuple.([1, 2], [3.0, missing]), [(1, 3.0), (2, missing)])
+    @test Core.Compiler.return_type(broadcast, Tuple{typeof(tuple), Vector{Int},
+                                                     Vector{Union{Float64, Missing}}}) ==
+        Union{Vector{Tuple{Int, Missing}}, Vector{Tuple{Int, Any}}, Vector{Tuple{Int, Float64}}}
+    # Check that corner cases do not throw an error
+    @test isequal(broadcast(x -> x === 1 ? nothing : x, [1, 2, missing]),
+                  [nothing, 2, missing])
+    @test isequal(broadcast(x -> x === 1 ? nothing : x, Any[1, 2, 3.0, missing]),
+                  [nothing, 2, 3, missing])
+    @test broadcast((x,y)->(x==1 ? 1.0 : x, y), [1 2 3], ["a", "b", "c"]) ==
+        [(1.0, "a") (2, "a") (3, "a")
+         (1.0, "b") (2, "b") (3, "b")
+         (1.0, "c") (2, "c") (3, "c")]
+    @test typeof.([iszero, isdigit]) == [typeof(iszero), typeof(isdigit)]
+    @test typeof.([iszero, iszero]) == [typeof(iszero), typeof(iszero)]
+    @test isequal(identity.(Vector{<:Union{Int, Missing}}[[1, 2],[missing, 1]]),
+                  [[1, 2],[missing, 1]])
+    @test broadcast(i -> ((x=i, y=(i==1 ? 1 : "a")), 3), 1:4) isa
+        Vector{Tuple{NamedTuple{(:x, :y)}, Int}}
+end
+
+@testset "Issue #28382: eltype inconsistent with getindex" begin
+    struct Cyclotomic <: Number
+    end
+
+    Base.eltype(::Type{<:Cyclotomic}) = Tuple{Int,Int}
+
+    Base.:*(c::T, x::Cyclotomic) where {T<:Real} = [1, 2]
+    Base.:*(x::Cyclotomic, c::T) where {T<:Real} = [1, 2]
+
+    @test Cyclotomic() .* [2, 3] == [[1, 2], [1, 2]]
+end
+
+@testset "inplace broadcast with trailing singleton dims" begin
+    for (a, b, c) in (([1, 2], reshape([3 4], :, 1), reshape([5, 6], :, 1, 1)),
+            ([1 2; 3 4], reshape([5 6; 7 8], 2, 2, 1), reshape([9 10; 11 12], 2, 2, 1, 1)))
+
+        a_ = copy(a)
+        a_ .= b
+        @test a_ == dropdims(b, dims=(findall(==(1), size(b))...,))
+
+        a_ = copy(a)
+        a_ .= b
+        @test a_ == dropdims(b, dims=(findall(==(1), size(b))...,))
+
+        a_ = copy(a)
+        a_ .= b .+ c
+        @test a_ == dropdims(b .+ c, dims=(findall(==(1), size(c))...,))
+
+        a_ = copy(a)
+        a_ .*= c
+        @test a_ == dropdims(a .* c, dims=(findall(==(1), size(c))...,))
+    end
+end
+
+@testset "Issue #40309: still gives a range after #40320" begin
+    @test Base.broadcasted_kwsyntax(+, [1], [2]) isa Broadcast.Broadcasted{<:Any, <:Any, typeof(+)}
+    @test Broadcast.BroadcastFunction(+)(2:3, 2:3) == 4:2:6
+    @test Broadcast.BroadcastFunction(+)(2:3, 2:3) isa AbstractRange
+end
+
+@testset "#42063" begin
+    buf = IOBuffer()
+    @test println.(buf, [1,2,3]) == [nothing, nothing, nothing]
+    @test String(take!(buf)) == "1\n2\n3\n"
+end
+
+@testset "Memory allocation inconsistency in broadcasting #41565" begin
+    function test(y)
+        y .= 0 .- y ./ (y.^2) # extra allocation
+        return y
+    end
+    arr = rand(1000)
+    @allocated test(arr)
+    @test (@allocated test(arr)) == 0
+end
+
+@testset "Fix type unstable .&& #43470" begin
+    function test(x, y)
+        return (x .> 0.0) .&& (y .> 0.0)
+    end
+    x = randn(2)
+    y = randn(2)
+    @inferred(test(x, y)) == [0, 0]
+end
+
+# test that `Broadcast` definition is defined as total and eligible for concrete evaluation
+import Base.Broadcast: BroadcastStyle, DefaultArrayStyle
+@test Base.infer_effects(BroadcastStyle, (DefaultArrayStyle{1},DefaultArrayStyle{2},)) |>
+    Core.Compiler.is_concrete_eval_eligible
diff --git a/test/cartesian.jl b/test/cartesian.jl
index e769d03ae80358..b3cb8315decad7 100644
--- a/test/cartesian.jl
+++ b/test/cartesian.jl
@@ -7,6 +7,379 @@ ex = Base.Cartesian.exprresolve(:(if 5 > 4; :x; else :y; end))
 @test Base.Cartesian.lreplace!("val_col", Base.Cartesian.LReplace{String}(:col, "col", 1)) == "val_1"
 @test Base.setindex(CartesianIndex(1,5,4),3,2) == CartesianIndex(1, 3, 4)
 
+@testset "CartesianIndices constructions" begin
+    @testset "AbstractUnitRange" begin
+        for oinds in [
+            (2, 3),
+            (UInt8(2), 3),
+            (2, UInt8(3)),
+            (2, 1:3),
+            (Base.OneTo(2), 1:3)
+        ]
+            R = CartesianIndices(oinds)
+            @test size(R) == (2, 3)
+            @test axes(R) == (Base.OneTo(2), Base.OneTo(3))
+            @test step.(R.indices) == (1, 1)
+            @test step(R) == CartesianIndex(1, 1)
+
+            @test R[begin] == CartesianIndex(1, 1)
+            @test R[2] == CartesianIndex(2, 1)
+            @test R[1, 2] == CartesianIndex(1, 2)
+            @test R[end] == CartesianIndex(2, 3)
+        end
+        @test CartesianIndices((2, 3)) == CartesianIndex(1, 1):CartesianIndex(2, 3)
+
+        R = CartesianIndices((0:5, 0:5))
+        @test R[begin] == R[1] == first(R) == CartesianIndex(0, 0)
+        @test R[2, 1] == R[2] == CartesianIndex(1, 0)
+        @test R[1, 2] == R[7] == CartesianIndex(0, 1)
+        @test R[end] == R[length(R)] == last(R) == CartesianIndex(5, 5)
+
+        for oinds in [(2, ), (2, 3), (2, 3, 4)]
+            R = CartesianIndices(oinds)
+            @test eltype(R) == CartesianIndex{length(oinds)}
+            @test ndims(R) == length(oinds)
+            @test size(R) == oinds
+        end
+
+        # generic iterators doesn't have axes interface
+        iter = Iterators.repeated([1 2], 4)
+        @test_throws MethodError CartesianIndices(iter)
+    end
+
+    @testset "Step Range" begin
+        for oinds in [
+            (2, 1:2:6),
+            (Base.OneTo(2), 1:2:6),
+            (UInt8(2), 1:2:6),
+            (2, UInt8(1):UInt8(2):UInt8(6))
+        ]
+            R = CartesianIndices(oinds)
+            @test size(R) == (2, 3)
+            @test axes(R) == (Base.OneTo(2), Base.OneTo(3))
+            @test step.(R.indices) == (1, 2)
+            @test step(R) == CartesianIndex(1, 2)
+
+            @test R[begin] == CartesianIndex(1, 1)
+            @test R[2] == CartesianIndex(2, 1)
+            @test R[1, 2] == CartesianIndex(1, 3)
+            @test R[end] == CartesianIndex(2, 5)
+        end
+
+        @test CartesianIndices((1:2:5, 1:3:7)) == CartesianIndex(1, 1):CartesianIndex(2,3):CartesianIndex(5,7)
+
+        R = CartesianIndex(0, 0):CartesianIndex(2, 3):CartesianIndex(5, 7)
+        @test R[begin] == R[1] == first(R) == CartesianIndex(0, 0)
+        @test R[2, 1] == R[2] == CartesianIndex(2, 0)
+        @test R[1, 2] == R[4] == CartesianIndex(0, 3)
+        @test R[end] == R[length(R)] == last(R) == CartesianIndex(4, 6)
+
+        for oinds in [(1:2:5, ), (1:2:5, 1:3:7), (1:2:5, 1:3:7, 1:4:11)]
+            R = CartesianIndices(oinds)
+            @test eltype(R) == CartesianIndex{length(oinds)}
+            @test ndims(R) == length(oinds)
+            @test size(R) == length.(oinds)
+        end
+
+        R = CartesianIndices((1:2:5, 7:-3:1))
+        @test R == CartesianIndex(1, 7):CartesianIndex(2,-3):CartesianIndex(5, 1)
+        @test step.(R.indices) == (2, -3)
+        @test R[begin] == R[1] == first(R) == CartesianIndex(1, 7)
+        @test R[2, 1] == R[2] == CartesianIndex(3, 7)
+        @test R[1, 2] == R[4] == CartesianIndex(1, 4)
+        @test R[end] == R[length(R)] == last(R) == CartesianIndex(5, 1)
+    end
+
+    @testset "IdentityUnitRange" begin
+        function _collect(A)
+            rst = eltype(A)[]
+            for i in A
+                push!(rst, i)
+            end
+            rst
+        end
+        function _simd_collect(A)
+            rst = eltype(A)[]
+            @simd for i in A
+                push!(rst, i)
+            end
+            rst
+        end
+
+        for oinds in [
+            (Base.IdentityUnitRange(0:1),),
+            (Base.IdentityUnitRange(0:1), Base.IdentityUnitRange(0:2)),
+            (Base.IdentityUnitRange(0:1), Base.OneTo(3)),
+        ]
+            R = CartesianIndices(oinds)
+            @test axes(R) === oinds
+            @test _collect(R) == _simd_collect(R) == vec(collect(R))
+        end
+        R = CartesianIndices((Base.IdentityUnitRange(0:1), 0:1))
+        @test axes(R) == (Base.IdentityUnitRange(0:1), Base.OneTo(2))
+
+    end
+
+    for oinds in [(2, 3), (0:1, 0:2), (0:1:1, 0:1:2), (Base.IdentityUnitRange(0:1), Base.IdentityUnitRange(0:2)) ]
+        R = CartesianIndices(oinds)
+        @test vec(LinearIndices(R)) == 1:6
+    end
+    # TODO: non-1 steps are not supported yet, but may change in the future
+    @test_throws ArgumentError LinearIndices(CartesianIndices((1:2:5, )))
+    @test_throws ArgumentError LinearIndices(CartesianIndices((1:1:5, 1:2:5)))
+end
+
+module TestOffsetArray
+    isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+    using .Main.OffsetArrays
+    using Test
+
+    A = OffsetArray(rand(2, 3), -1, -1)
+    R = CartesianIndices(A)
+    @test R == CartesianIndices((0:1, 0:2))
+    @test axes(R) == (0:1, 0:2)
+    for i in eachindex(A)
+        @test A[i] == A[R[i]]
+    end
+    for i in R
+        @test A[i] == A[Tuple(i)...]
+    end
+end
+
+@testset "CartesianIndices getindex" begin
+    @testset "0D array" begin
+        a = zeros()
+        c = CartesianIndices(a)
+        @test a[c] == a
+        @test c[c] === c
+        @test c[] == CartesianIndex()
+    end
+
+    @testset "AbstractUnitRange" begin
+        for oinds in [(2, ), (2, 3), (2, 3, 4)]
+            A = rand(1:10, oinds)
+            R = CartesianIndices(A)
+            @test R == CartesianIndices(oinds)
+
+            @test A[R] == A
+            @test axes(A) == axes(R)
+            @test all(i->A[i]==A[R[i]], eachindex(A))
+            @test all(i->A[i]==A[R[i]], R)
+            @test all(i->A[i]==A[R[i]], collect(R))
+            @test all(i->i in R, collect(R))
+
+            # Indexing a CartesianIndices with another CartesianIndices having the same ndims
+            # forwards the indexing to the component ranges and retains the wrapper
+            @test R[R] === R
+
+            R_array = collect(R)
+
+            all_onetoone = ntuple(x -> 1:1, Val(ndims(R)))
+            R2 = R[all_onetoone...]
+            @test R2 isa CartesianIndices{ndims(R)}
+
+            all_one = ntuple(x -> 1, Val(ndims(R)))
+            @test R2[all_one...] == R_array[all_one...]
+
+            @test R2 == R_array[all_onetoone...]
+
+            R3 = R[ntuple(x -> Colon(), Val(ndims(R)))...]
+            @test R3 === R
+
+            # test a mix of Colons and ranges
+            # up to two leading axes are colons, while the rest are UnitRanges
+            indstrailing = (1:1 for _ in min(ndims(R), 2)+1:ndims(R))
+            R4 = R[(Colon() for _ in 1:min(ndims(R), 2))..., indstrailing...]
+            @test R4 isa CartesianIndices{ndims(R)}
+            indsleading = CartesianIndices(axes(A)[1:min(ndims(A), 2)])
+            for I in indsleading
+                @test R4[I, indstrailing...] == R_array[I, indstrailing...]
+            end
+        end
+    end
+
+    @testset "StepRange" begin
+        for oinds in [(1:2:5, ), (1:2:5, 1:3:7), (1:2:5, 1:3:7, 1:4:11)]
+            A = rand(1:10, last.(oinds))
+            R = CartesianIndices(A)
+
+            SR = CartesianIndex(first.(oinds)):CartesianIndex(step.(oinds)):CartesianIndex(last.(oinds))
+            @test A[oinds...] == A[SR]
+            @test A[SR] == A[R[SR]]
+
+            # TODO: A[SR] == A[Linearindices(SR)] should hold for StepRange CartesianIndices
+            @test_broken A[SR] == A[LinearIndices(SR)]
+
+            # Create a CartesianIndices with StepRange indices to test indexing into it
+            R = CartesianIndices(oinds)
+            R_array = collect(R)
+
+            all_onetoone = ntuple(x -> 1:1, Val(ndims(R)))
+            R2 = R[all_onetoone...]
+            @test R2 isa CartesianIndices{ndims(R)}
+
+            all_one = ntuple(x -> 1, Val(ndims(R)))
+            @test R2[all_one...] == R_array[all_one...]
+            @test R2 == R_array[all_onetoone...]
+
+            R3 = R[ntuple(x -> Colon(), Val(ndims(R)))...]
+            @test R3 === R
+
+            # test a mix of Colons and ranges
+            # up to two leading axes are colons, while the rest are UnitRanges
+            indstrailing = (1:1 for _ in min(ndims(R), 2)+1:ndims(R))
+            R4 = R[(Colon() for _ in 1:min(ndims(R), 2))..., indstrailing...]
+            @test R4 isa CartesianIndices{ndims(R)}
+            indsleading = CartesianIndices(axes(R)[1:min(ndims(R), 2)])
+            for I in indsleading
+                @test R4[I, indstrailing...] == R_array[I, indstrailing...]
+            end
+        end
+
+        # CartesianIndices whole indices have a unit step may be their own axes
+        for oinds in [(1:1:4, ), (1:1:4, 1:1:5), (1:1:4, 1:1:5, 1:1:3)]
+            R = CartesianIndices(oinds)
+            @test R[R] === R
+            # test a mix of UnitRanges and StepRanges
+            R = CartesianIndices((oinds..., 1:3))
+            @test R[R] === R
+            R = CartesianIndices((1:3, oinds...))
+            @test R[R] === R
+        end
+    end
+
+    @testset "logical indexing of CartesianIndices with ranges" begin
+        c = CartesianIndices((1:0, 1:2))
+        c2 = c[true:false, 1:2]
+        @test c2 == c
+
+        for (inds, r) in Any[(1:2, false:true), (1:2, false:true:true),
+            (1:2:3, false:true), (1:2:3, false:true:true)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test c2[1, :] == c[2, :]
+        end
+
+        for (inds, r) in Any[(1:1, true:true), (1:1, true:true:true),
+            (1:1:1, true:true), (1:1:1, true:true:true)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test c2[1, :] == c[1, :]
+        end
+
+        for (inds, r) in Any[(1:1, false:false), (1:1, false:true:false),
+            (1:1:1, false:false), (1:1:1, false:true:false)]
+
+            c = CartesianIndices((inds, 1:2))
+            c2 = c[r, 1:2]
+            @test c2 isa CartesianIndices{ndims(c)}
+            @test size(c2, 1) == 0
+        end
+    end
+end
+
+@testset "range interface" begin
+    for (I, i, i_next) in [
+        (CartesianIndices((1:2:5, )), CartesianIndex(2, ), CartesianIndex(4, )),
+        (1:2:5, 2, 4),
+    ]
+        # consistent with ranges behavior
+        @test !(i in I)
+        @test iterate(I, i) == (i_next, i_next)
+    end
+
+    # check iteration behavior on boundary
+    R = CartesianIndex(1, 1):CartesianIndex(2, 3):CartesianIndex(4, 5)
+    @test R.indices == (1:2:3, 1:3:4)
+    i = CartesianIndex(4, 1)
+    i_next = CartesianIndex(1, 4)
+    @test !(i in R) && iterate(R, i) == (i_next, i_next)
+
+    for R in [
+        CartesianIndices((1:-1:-1, 1:2:5)),
+        CartesianIndices((2, 3)),
+        CartesianIndex(1, 2) .- CartesianIndices((1:-1:-1, 1:2:5)),
+        CartesianIndex(1, 2) .- CartesianIndices((2, 3)),
+    ]
+        Rc = collect(R)
+        @test all(map(==, R, Rc))
+    end
+end
+
+@testset "Cartesian simd/broadcasting" begin
+    @testset "AbstractUnitRange" begin
+        A = rand(-5:5, 64, 64)
+        @test abs.(A) == map(abs, A)
+
+        function test_simd(f, @nospecialize(A); init=zero(eltype(A)))
+            val_simd = init
+            @simd for i in CartesianIndices(A)
+                val_simd = f(val_simd, A[i])
+            end
+
+            val_iter = init
+            for i in CartesianIndices(A)
+                val_iter = f(val_iter, A[i])
+            end
+
+            @test val_iter == reduce(f, A, init=init)
+            @test val_iter ≈ val_simd
+        end
+
+        test_simd(+, A)
+    end
+
+    R = CartesianIndex(-1, -1):CartesianIndex(6, 7)
+    @test R .+ CartesianIndex(1, 2) == CartesianIndex(0, 1):CartesianIndex(7, 9)
+    @test R .- CartesianIndex(1, 2) == CartesianIndex(-2, -3):CartesianIndex(5, 5)
+    # 37867: collect is needed
+    @test collect(CartesianIndex(1, 2) .- R) == CartesianIndex(2, 3):CartesianIndex(-1, -1):CartesianIndex(-5, -5)
+
+    R = CartesianIndex(-1, -1):CartesianIndex(2, 3):CartesianIndex(6, 7)
+    @test R .+ CartesianIndex(2, 2) == CartesianIndex(1, 1):CartesianIndex(2, 3):CartesianIndex(8, 9)
+    @test R .- CartesianIndex(2, 2) == CartesianIndex(-3, -3):CartesianIndex(2, 3):CartesianIndex(4, 5)
+    # 37867: collect is needed
+    @test collect(CartesianIndex(1, 1) .- R) == CartesianIndex(2, 2):CartesianIndex(-2, -3):CartesianIndex(-4, -4)
+end
+
+@testset "Iterators" begin
+    @testset "Reverse" begin
+        R = CartesianIndices((0:5, 0:5))
+        RR = Iterators.Reverse(R)
+        rR = reverse(R)
+        @test rR == collect(RR)
+        @test rR.indices == (5:-1:0, 5:-1:0)
+
+        @test eltype(RR) == CartesianIndex{2}
+        @test size(RR) == size(R)
+        @test axes(RR) == axes(R)
+
+        @test first(RR) == last(R) == CartesianIndex(5, 5)
+        @test last(RR) == first(R) == CartesianIndex(0, 0)
+        RRR = collect(Iterators.Reverse(collect(RR)))
+        @test R == RRR
+    end
+
+    @testset "collect" begin
+        for oinds in [(0:5, ), (2:2:7, ), (2:-1:0, ),
+                      (0:5, 2:8), (2:2:7, 3:3:10), (2:-1:0, 2:7),]
+            R = CartesianIndices(oinds)
+            @test collect(R) == R
+        end
+    end
+end
+
+@testset "set operations" begin
+    R1 = CartesianIndices((3, 4, 5))
+    R2 = CartesianIndices((-2:2, -3:3, -4:4))
+    R = CartesianIndices((2, 3, 4))
+    @test intersect(R1, R2) == R
+end
+
 # test conversions for CartesianIndex
 
 @testset "CartesianIndex Conversions" begin
@@ -19,25 +392,89 @@ ex = Base.Cartesian.exprresolve(:(if 5 > 4; :x; else :y; end))
 end
 
 @testset "CartesianIndices overflow" begin
-    I = CartesianIndices((1:typemax(Int),))
-    i = last(I)
-    @test iterate(I, i) === nothing
+    @testset "incremental steps" begin
+        I = CartesianIndices((1:typemax(Int),))
+        i = last(I)
+        @test iterate(I, i) === nothing
+
+        I = CartesianIndices((1:2:typemax(Int), ))
+        i = CartesianIndex(typemax(Int)-1)
+        @test iterate(I, i) === nothing
+
+        I = CartesianIndices((1:(typemax(Int)-1),))
+        i = CartesianIndex(typemax(Int))
+        @test iterate(I, i) === nothing
 
-    I = CartesianIndices((1:(typemax(Int)-1),))
-    i = CartesianIndex(typemax(Int))
-    @test iterate(I, i) === nothing
+        I = CartesianIndices((1:2:typemax(Int)-1, ))
+        i = CartesianIndex(typemax(Int)-1)
+        @test iterate(I, i) === nothing
 
-    I = CartesianIndices((1:typemax(Int), 1:typemax(Int)))
-    i = last(I)
-    @test iterate(I, i) === nothing
+        I = CartesianIndices((1:typemax(Int), 1:typemax(Int)))
+        i = last(I)
+        @test iterate(I, i) === nothing
+
+        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        i = CartesianIndex(typemax(Int)-1, typemax(Int)-1)
+        @test iterate(I, i) === nothing
+
+        I = CartesianIndices((1:typemax(Int), 1:typemax(Int)))
+        i = CartesianIndex(typemax(Int), 1)
+        @test iterate(I, i) === (CartesianIndex(1, 2), CartesianIndex(1,2))
+
+        I = CartesianIndices((1:2:typemax(Int), 1:2:typemax(Int)))
+        i = CartesianIndex(typemax(Int)-1, 1)
+        @test iterate(I, i) === (CartesianIndex(1, 3), CartesianIndex(1, 3))
+
+        I = CartesianIndices((typemin(Int):(typemin(Int)+3),))
+        i = last(I)
+        @test iterate(I, i) === nothing
+
+        I = CartesianIndices(((typemin(Int):2:typemin(Int)+3), ))
+        i = CartesianIndex(typemin(Int)+2)
+        @test iterate(I, i) === nothing
+    end
 
-    i = CartesianIndex(typemax(Int), 1)
-    @test iterate(I, i) === (CartesianIndex(1, 2), CartesianIndex(1,2))
+    @testset "decremental steps" begin
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):typemin(Int)+10, )))
+        i = last(I)
+        @test iterate(I, i) === nothing
 
-    # reverse cartesian indices
-    I = CartesianIndices((typemin(Int):(typemin(Int)+3),))
-    i = last(I)
-    @test iterate(I, i) === nothing
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):2:typemin(Int)+10, )))
+        i = last(I)
+        @test iterate(I, i) === nothing
+
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):typemin(Int)+10, )))
+        i = CartesianIndex(typemin(Int))
+        @test iterate(I, i) === nothing
+
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):2:typemin(Int)+10, )))
+        i = CartesianIndex(typemin(Int))
+        @test iterate(I, i) === nothing
+
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):typemin(Int)+10, typemin(Int):typemin(Int)+10)))
+        i = last(I)
+        @test iterate(I, i) === nothing
+
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):2:typemin(Int)+10, typemin(Int):2:typemin(Int)+10)))
+        i = CartesianIndex(typemin(Int), typemin(Int))
+        @test iterate(I, i) === nothing
+
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):typemin(Int)+10, typemin(Int):typemin(Int)+10)))
+        i = CartesianIndex(typemin(Int), typemin(Int)+1)
+        @test iterate(I, i) === (CartesianIndex(typemin(Int)+10, typemin(Int)), CartesianIndex(typemin(Int)+10, typemin(Int)))
+
+        I = Iterators.Reverse(CartesianIndices((typemin(Int):2:typemin(Int)+10, typemin(Int):2:typemin(Int)+10)))
+        i = CartesianIndex(typemin(Int), typemin(Int)+2)
+        @test iterate(I, i) === (CartesianIndex(typemin(Int)+10, typemin(Int)), CartesianIndex(typemin(Int)+10, typemin(Int)))
+
+        I = CartesianIndices((typemax(Int):-1:typemax(Int)-10, ))
+        i = last(I)
+        @test iterate(I, i) === nothing
+
+        I = CartesianIndices((typemax(Int):-2:typemax(Int)-10, ))
+        i = last(I)
+        @test iterate(I, i) === nothing
+    end
 end
 
 @testset "CartesianIndices iteration" begin
@@ -73,3 +510,14 @@ end
 
     @test @inferred(intersect(I, J)) == CartesianIndices((2:3, 4:5))
 end
+
+# issue #39705
+f39705() = Base.Cartesian.@nany 0 _ -> true
+@test f39705() === false
+
+@testset "CartesianIndices with Bool" begin
+    @test @inferred(CartesianIndices((true,))) == CartesianIndices((1,))
+    @test @inferred(CartesianIndices((false,))) == CartesianIndices((0,))
+    @test @inferred(CartesianIndices((true, false))) == CartesianIndices((1, 0))
+    @test @inferred(CartesianIndices((false, true))) == CartesianIndices((0, 1))
+end
diff --git a/test/ccall.jl b/test/ccall.jl
index 424fe80368855b..3a1b6ff3db7338 100644
--- a/test/ccall.jl
+++ b/test/ccall.jl
@@ -8,6 +8,9 @@ import Libdl
 
 # for cfunction_closure
 include("testenv.jl")
+# for cfunction error
+isdefined(Main, :MacroCalls) || @eval Main include("testhelpers/MacroCalls.jl")
+using Main.MacroCalls
 
 const libccalltest = "libccalltest"
 
@@ -839,7 +842,7 @@ function check_code_trampoline(f, t, n::Int)
     @nospecialize(f, t)
     @test Base.return_types(f, t) == Any[Any]
     llvm = sprint(code_llvm, f, t)
-    @test count(x -> true, eachmatch(r"@jl_get_cfunction_trampoline\(", llvm)) == n
+    @test count(Returns(true), eachmatch(r"@jl_get_cfunction_trampoline\(", llvm)) == n
 end
 check_code_trampoline(testclosure, (Any, Any, Bool, Type), 2)
 check_code_trampoline(testclosure, (Any, Int, Bool, Type{Int}), 2)
@@ -903,7 +906,7 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         global function $fname(s::$t)
             verbose && println("B: ", s)
             @test s == $v
-            if($(t).mutable)
+            if ismutable(s)
                 @test !(s === $a)
             end
             global c = s
@@ -921,64 +924,100 @@ for (t, v) in ((Complex{Int32}, :ci32), (Complex{Int64}, :ci64),
         @test b === c
         let cf = @cfunction($fname1, Ref{$t}, (Ref{$t},))
             b = ccall(cf, Ref{$t}, (Ref{$t},), a)
+            verbose && println("C: ", b)
+            @test b == $v
+            @test b === a
+            @test b === c
         end
-        verbose && println("C: ", b)
-        @test b == $v
-        @test b === a
-        @test b === c
         let cf = @cfunction($fname, $t, ($t,))
             b = ccall(cf, $t, ($t,), a)
-        end
-        verbose && println("C: ",b)
-        @test b == $v
-        if ($(t).mutable)
-            @test !(b === c)
-            @test !(b === a)
+            verbose && println("C: ",b)
+            @test b == $v
+            if ismutable($v)
+                @test !(b === c)
+                @test !(b === a)
+            end
         end
         let cf = @cfunction($fname1, $t, (Ref{$t},))
             b = ccall(cf, $t, (Ref{$t},), a)
-        end
-        verbose && println("C: ",b)
-        @test b == $v
-        if ($(t).mutable)
-            @test !(b === c)
-            @test !(b === a)
+            verbose && println("C: ",b)
+            @test b == $v
+            if ismutable($v)
+                @test !(b === c)
+                @test !(b === a)
+            end
         end
         let cf = @cfunction($fname, Ref{$t}, ($t,))
             b = ccall(cf, Ref{$t}, ($t,), a)
-        end
-        verbose && println("C: ",b)
-        @test b == $v
-        @test b === c
-        if ($(t).mutable)
-            @test !(b === a)
+            verbose && println("C: ",b)
+            @test b == $v
+            @test b === c
+            if ismutable($v)
+                @test !(b === a)
+            end
         end
         let cf = @cfunction($fname, Any, (Ref{$t},))
             b = ccall(cf, Any, (Ref{$t},), $v)
-        end
-        verbose && println("C: ",b)
-        @test b == $v
-        @test b === c
-        if ($(t).mutable)
-            @test !(b === a)
+            verbose && println("C: ",b)
+            @test b == $v
+            @test b === c
+            if ismutable($v)
+                @test !(b === a)
+            end
         end
         let cf = @cfunction($fname, Any, (Ref{Any},))
             b = ccall(cf, Any, (Ref{Any},), $v)
+            @test b == $v
+            @test b === c
+            if ismutable($v)
+                @test !(b === a)
+            end
         end
-        @test b == $v
-        @test b === c
-        if ($(t).mutable)
-            @test !(b === a)
+        a isa Complex && let cf = @cfunction($fname, Ref{Complex}, (Ref{Any},))
+            b = ccall(cf, Ref{Complex}, (Ref{Any},), $v)
+            @test b == $v
+            @test b === c
+            if ismutable($v)
+                @test !(b === a)
+            end
         end
         let cf = @cfunction($fname, Ref{AbstractString}, (Ref{Any},))
             @test_throws TypeError ccall(cf, Any, (Ref{Any},), $v)
         end
-        let cf = @cfunction($fname, AbstractString, (Ref{Any},))
+        let cf = @cfunction($fname, Ref{Ptr}, (Ref{Any},))
+            @test_throws TypeError ccall(cf, Any, (Ref{Any},), $v)
+        end
+        let cf = @cfunction($fname, Ref{Complex{UInt32}}, (Ref{Any},))
             @test_throws TypeError ccall(cf, Any, (Ref{Any},), $v)
         end
+        if a isa Complex
+            local mkcb(::Type{T}) where {T} = @cfunction($fname, Ref{Complex{T}}, (Ref{Any},))
+            @test ccall(mkcb(typeof(a.re)), Any, (Ref{Any},), $v) === $v
+        end
+        #FIXME: @test_throws TypeError ccall(mkcb(UInt32), Any, (Ref{Any},), $v)
     end
 end
 
+
+#issue 40164
+@testset "llvm parameter attributes on cfunction closures" begin
+    struct Struct40164
+        x::Cdouble
+        y::Cdouble
+        z::Cdouble
+    end
+
+    function test_40164()
+        ret = Struct40164[]
+        f = x::Struct40164 -> (push!(ret, x); nothing)
+        f_c = @cfunction($f, Cvoid, (Struct40164,))
+        ccall(f_c.ptr, Ptr{Cvoid}, (Struct40164,), Struct40164(0, 1, 2))
+        ret
+    end
+
+    @test test_40164() == [Struct40164(0, 1, 2)]
+end
+
 else
 
 @test_broken "cfunction: no support for closures on this platform"
@@ -1000,6 +1039,12 @@ unstable26078(x) = x > 0 ? x : "foo"
 handle26078 = @cfunction(unstable26078, Int32, (Int32,))
 @test ccall(handle26078, Int32, (Int32,), 1) == 1
 
+# issue #39804
+let f = @cfunction(Base.last, String, (Tuple{Int,String},))
+    # String inside a struct is a pointer even though String.size == 0
+    @test ccall(f, Ref{String}, (Tuple{Int,String},), (1, "a string?")) === "a string?"
+end
+
 # issue 17219
 function ccall_reassigned_ptr(ptr::Ptr{Cvoid})
     ptr = Libdl.dlsym(Libdl.dlopen(libccalltest), "test_echo_p")
@@ -1325,6 +1370,29 @@ for i in 1:3
     ccall((:test_echo_p, libccalltest), Ptr{Cvoid}, (Any,), f17413())
 end
 
+let r = Ref{Any}(10)
+    @GC.preserve r begin
+        pa = Base.unsafe_convert(Ptr{Any}, r) # pointer to value
+        pv = Base.unsafe_convert(Ptr{Cvoid}, r) # pointer to data
+        @test Ptr{Cvoid}(pa) != pv
+        @test unsafe_load(pa) === 10
+        @test unsafe_load(Ptr{Ptr{Cvoid}}(pa)) === pv
+        @test unsafe_load(Ptr{Int}(pv)) === 10
+    end
+end
+
+let r = Ref{Any}("123456789")
+    @GC.preserve r begin
+        pa = Base.unsafe_convert(Ptr{Any}, r) # pointer to value
+        pv = Base.unsafe_convert(Ptr{Cvoid}, r) # pointer to data
+        @test Ptr{Cvoid}(pa) != pv
+        @test unsafe_load(pa) === r[]
+        @test unsafe_load(Ptr{Ptr{Cvoid}}(pa)) === pv
+        @test unsafe_load(Ptr{Int}(pv)) === length(r[])
+    end
+end
+
+
 struct SpillPint
     a::Ptr{Cint}
     b::Ptr{Cint}
@@ -1411,15 +1479,42 @@ end
              eval(:(f20835(x) = ccall(:fn, Cvoid, (Ptr{typeof(x)},), x))))
 @test_throws(UndefVarError(:Something_not_defined_20835),
              eval(:(f20835(x) = ccall(:fn, Something_not_defined_20835, (Ptr{typeof(x)},), x))))
-
-@noinline f21104at(::Type{T}) where {T} = ccall(:fn, Cvoid, (Some{T},), Some(0))
-@noinline f21104rt(::Type{T}) where {T} = ccall(:fn, Some{T}, ())
-@test code_llvm(devnull, f21104at, (Type{Float64},)) === nothing
-@test code_llvm(devnull, f21104rt, (Type{Float64},)) === nothing
-@test_throws(ErrorException("ccall argument 1 doesn't correspond to a C type"),
-             f21104at(Float64))
-@test_throws(ErrorException("ccall return type doesn't correspond to a C type"),
-             f21104rt(Float64))
+@test isempty(methods(f20835))
+
+@test_throws(ErrorException("ccall method definition: argument 1 type doesn't correspond to a C type"),
+             @eval f21104(::Type{T}) where {T} = ccall(:fn, Cvoid, (Some{T},), Some(0)))
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval f21104(::Type{T}) where {T} = ccall(:fn, Some{T}, ()))
+@test isempty(methods(f21104))
+@test_throws(ErrorException("ccall method definition: argument 1 type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Cvoid, (Some.body,), Some(0)); end)
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Some.body, ()); end)
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Tuple, ()); end)
+## TODO: lowering is broken on this (throws "syntax: ssavalue with no def")
+#@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+#             @eval if false; ccall(:fn, Tuple{Val{T}} where T, ()); end)
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval if false; ccall(:fn, Tuple{Val}, ()); end)
+@test_throws(TypeError, @eval if false; ccall(:fn, Some.var, ()); end)
+@test_throws(TypeError, @eval if false; ccall(:fn, Cvoid, (Some.var,), Some(0)); end)
+@test_throws(ErrorException("ccall method definition: Vararg not allowed for argument list"),
+             @eval ccall(:fn, Int, (Vararg{Int},), 1))
+@test_throws(ErrorException("ccall method definition: argument 1 type doesn't correspond to a C type"),
+             @eval ccall(:fn, Int, (Integer,), 1))
+@test_throws(ErrorException("ccall method definition: argument 1 type doesn't correspond to a C type"),
+             @eval ccall(:fn, Int, (Ptr,), C_NULL))
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval ccall(:fn, Integer, (Integer,), 1))
+@test_throws(ErrorException("ccall method definition: return type doesn't correspond to a C type"),
+             @eval ccall(:fn, Ptr, ()))
+# This is hard to test: @test_throws(ErrorException("ccall argument 1 type doesn't correspond to a C type"),
+#                                    @eval ccall(:fn, Int, (Union{},), 1))
+@test_throws(ErrorException("ccall argument 1 type doesn't correspond to a C type"),
+             @eval ccall(:fn, Int, (Nothing,), nothing))
+@test_throws(ErrorException("ccall return type struct fields cannot contain a reference"),
+             @eval ccall(:fn, typeof(Ref("")), ()))
 
 # test for malformed syntax errors
 @test Expr(:error, "more arguments than types for ccall") == Meta.lower(@__MODULE__, :(ccall(:fn, A, (), x)))
@@ -1450,21 +1545,32 @@ end
 
 evalf_callback_19805(ci::callinfos_19805{FUNC_FT}) where {FUNC_FT} = ci.f(0.5)::Float64
 
-evalf_callback_c_19805(ci::callinfos_19805{FUNC_FT}) where {FUNC_FT} = @cfunction(
-    evalf_callback_19805, Float64, (callinfos_19805{FUNC_FT},))
-
-@test_throws(ErrorException("cfunction argument 1 doesn't correspond to a C type"),
-             evalf_callback_c_19805( callinfos_19805(sin) ))
-@test_throws(ErrorException("cfunction argument 2 doesn't correspond to a C type"),
-             @cfunction(+, Int, (Int, Nothing)))
-@test_throws(ErrorException("cfunction: Vararg syntax not allowed for argument list"),
-             @cfunction(+, Int, (Vararg{Int},)))
+@test_throws(ErrorException("cfunction method definition: argument 1 type doesn't correspond to a C type"),
+             @eval evalf_callback_c_19805(ci::callinfos_19805{FUNC_FT}) where {FUNC_FT} =
+                 @cfunction(evalf_callback_19805, Float64, (callinfos_19805{FUNC_FT},)))
+@test isempty(methods(evalf_callback_c_19805))
+@test_throws(ErrorException("cfunction method definition: Vararg not allowed for argument list"),
+             @eval if false; @cfunction(+, Int, (Vararg{Int},)); end)
 @test_throws(ErrorException("could not evaluate cfunction argument type (it might depend on a local variable)"),
              @eval () -> @cfunction(+, Int, (Ref{T}, Ref{T})) where T)
 @test_throws(ErrorException("could not evaluate cfunction return type (it might depend on a local variable)"),
              @eval () -> @cfunction(+, Ref{T}, (Int, Int)) where T)
+@test_throws(ErrorException("cfunction argument 2 type doesn't correspond to a C type"),
+             @eval @cfunction(+, Int, (Int, Nothing)))
+@test_throws(ErrorException("cfunction argument 2 type doesn't correspond to a C type"),
+             @eval @cfunction(+, Int, (Int, Union{})))
 @test_throws(ErrorException("cfunction return type Ref{Any} is invalid. Use Any or Ptr{Any} instead."),
-             @cfunction(+, Ref{Any}, (Int, Int)))
+             @eval @cfunction(+, Ref{Any}, (Int, Int)))
+@test_throws(ErrorException("cfunction method definition: argument 1 type doesn't correspond to a C type"),
+             @eval @cfunction(+, Int, (Integer, Integer)))
+@test_throws(ErrorException("cfunction method definition: argument 1 type doesn't correspond to a C type"),
+             @eval @cfunction(+, Int, (Ptr,)))
+@test_throws(ErrorException("cfunction method definition: return type doesn't correspond to a C type"),
+             @eval @cfunction(+, Integer, (Int, Int)))
+@test_throws(ErrorException("cfunction method definition: return type doesn't correspond to a C type"),
+             @eval @cfunction(+, Ptr, (Int, Int)))
+@test_throws(ErrorException("cfunction return type struct fields cannot contain a reference"),
+             @eval @cfunction(+, typeof(Ref("")), ()))
 
 # test Ref{abstract_type} calling parameter passes a heap box
 abstract type Abstract22734 end
@@ -1553,9 +1659,23 @@ let
     @test arr[1] == '0'
 end
 
+# issue #38751
+let
+    function f38751!(dest::Vector{UInt8}, src::Vector{UInt8}, n::UInt)
+        d, s = pointer(dest), pointer(src)
+        GC.@preserve dest src ccall(:memcpy, Cvoid, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), d, s, n)
+        return dest
+    end
+    dest = zeros(UInt8, 8)
+    @test f38751!(dest, collect(0x1:0x8), UInt(8)) == 0x1:0x8
+    llvm = sprint(code_llvm, f38751!, (Vector{UInt8}, Vector{UInt8}, UInt))
+    @test !occursin("call void inttoptr", llvm)
+end
+
 # issue #34061
 let o_file = tempname(), err = Base.PipeEndpoint()
-    run(pipeline(Cmd(`$(Base.julia_cmd()) --output-o=$o_file -e 'Base.reinit_stdio();
+    run(pipeline(Cmd(`$(Base.julia_cmd()) --color=no --output-o=$o_file -e '
+        Base.reinit_stdio();
         f() = ccall((:dne, :does_not_exist), Cvoid, ());
         f()'`; ignorestatus=true), stderr=err), wait=false)
     output = read(err, String)
@@ -1653,6 +1773,10 @@ end
     @test_throws ArgumentError("interpolated function `PROGRAM_FILE` was not a Ptr{Cvoid}, but String") @ccall $PROGRAM_FILE("foo"::Cstring)::Cvoid
 end
 
+@testset "check error path for @cfunction" begin
+    @test_throws ArgumentError("@cfunction argument types must be a literal tuple") @macrocall(@cfunction(identity, Cstring, Cstring))
+end
+
 # call some c functions
 @testset "run @ccall with C standard library functions" begin
     @test @ccall(sqrt(4.0::Cdouble)::Cdouble) == 2.0
@@ -1687,15 +1811,15 @@ end
     @test str == "hi+1-2-3-4-5-6-7-8-9-10-11-12-13-14-15-1.1-2.2-3.3-4.4-5.5-6.6-7.7-8.8-9.9\n"
 end
 
+
 @testset "Cwstring" begin
-    n = 100
-    buffer = Array{Cwchar_t}(undef, n)
-    if Sys.iswindows()
-        # sprintf throws an error on Windows, see https://github.com/JuliaLang/julia/pull/36040#issuecomment-634774055
-        len = @ccall swprintf_s(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
-    else
-        len = @ccall swprintf(buffer::Ptr{Cwchar_t}, n::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
-    end
+    buffer = Array{Cwchar_t}(undef, 100)
+    len = @static if Sys.iswindows()
+            @ccall swprintf_s(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
+        else
+            @ccall swprintf(buffer::Ptr{Cwchar_t}, length(buffer)::Csize_t, "α+%ls=%hhd"::Cwstring; "β"::Cwstring, 0xf::UInt8)::Cint
+        end
+    Libc.systemerror("swprintf", len < 0)
     str = GC.@preserve buffer unsafe_string(pointer(buffer), len)
     @test str == "α+β=15"
     str = GC.@preserve buffer unsafe_string(Cwstring(pointer(buffer)))
@@ -1709,3 +1833,69 @@ ccall_lazy_lib_name(x) = ccall((:testUcharX, compute_lib_name()), Int32, (UInt8,
 @test ccall_lazy_lib_name(3) == 1
 ccall_with_undefined_lib() = ccall((:time, xx_nOt_DeFiNeD_xx), Cint, (Ptr{Cvoid},), C_NULL)
 @test_throws UndefVarError(:xx_nOt_DeFiNeD_xx) ccall_with_undefined_lib()
+
+@testset "transcode for UInt8 and UInt16" begin
+    a   = [UInt8(1), UInt8(2), UInt8(3)]
+    a16 = transcode(UInt16, a)
+    a8  = transcode(UInt8, a16)
+    @test a8 == a
+    b   = [UInt16(1), UInt16(2), UInt16(3)]
+    b8  = transcode(UInt8, b)
+    b16 = transcode(UInt16, b8)
+    @test b16 == b
+end
+
+@testset "transcode String to String" begin
+    a = "Julia strings and things"
+    @test transcode(String, a) === a
+end
+
+# issue 33413
+@testset "cglobal lowering" begin
+    # crash in cglobal33413_ptrinline[_notype]() specifically requires the library pointer be
+    # retrieved inside the function; using global pointer variable doesn't trigger the crash
+    function cglobal33413_ptrvar()
+        libh = Libdl.dlopen(libccalltest)
+        sym = Libdl.dlsym(libh, :global_var)
+        return cglobal(sym, Cint)
+    end
+    function cglobal33413_ptrvar_notype()
+        libh = Libdl.dlopen(libccalltest)
+        sym = Libdl.dlsym(libh, :global_var)
+        return cglobal(sym)
+    end
+    function cglobal33413_ptrinline()
+        libh = Libdl.dlopen(libccalltest)
+        return cglobal(Libdl.dlsym(libh, :global_var), Cint)
+    end
+    function cglobal33413_ptrinline_notype()
+        libh = Libdl.dlopen(libccalltest)
+        return cglobal(Libdl.dlsym(libh, :global_var))
+    end
+    function cglobal33413_tupleliteral()
+        return cglobal((:global_var, libccalltest), Cint)
+    end
+    function cglobal33413_tupleliteral_notype()
+        return cglobal((:global_var, libccalltest))
+    end
+    function cglobal33413_literal()
+        return cglobal(:sin, Cint)
+    end
+    function cglobal33413_literal_notype()
+        return cglobal(:sin)
+    end
+    @test unsafe_load(cglobal33413_ptrvar()) == 1
+    @test unsafe_load(cglobal33413_ptrinline()) == 1
+    @test unsafe_load(cglobal33413_tupleliteral()) == 1
+    @test unsafe_load(convert(Ptr{Cint}, cglobal33413_ptrvar_notype())) == 1
+    @test unsafe_load(convert(Ptr{Cint}, cglobal33413_ptrinline_notype())) == 1
+    @test unsafe_load(convert(Ptr{Cint}, cglobal33413_tupleliteral_notype())) == 1
+    @test cglobal33413_literal() != C_NULL
+    @test cglobal33413_literal_notype() != C_NULL
+end
+
+@testset "ccall_effects" begin
+    ctest_total(x) = @Base.assume_effects :total @ccall libccalltest.ctest(x::Complex{Int})::Complex{Int}
+    ctest_total_const() = Val{ctest_total(1 + 2im)}()
+    Core.Compiler.return_type(ctest_total_const, Tuple{}) == Val{2 + 0im}
+end
diff --git a/test/channels.jl b/test/channels.jl
index aae89fb009dcad..1b7f96ad528bf8 100644
--- a/test/channels.jl
+++ b/test/channels.jl
@@ -2,6 +2,7 @@
 
 using Random
 using Base: Experimental
+using Base: n_avail
 
 @testset "single-threaded Condition usage" begin
     a = Condition()
@@ -255,12 +256,14 @@ using Distributed
 end
 
 @testset "timedwait" begin
-    @test timedwait(() -> true, 0) === :ok
-    @test timedwait(() -> false, 0) === :timed_out
-    @test_throws ArgumentError timedwait(() -> true, 0; pollint=0)
+    alwaystrue() = true
+    alwaysfalse() = false
+    @test timedwait(alwaystrue, 0) === :ok
+    @test timedwait(alwaysfalse, 0) === :timed_out
+    @test_throws ArgumentError timedwait(alwaystrue, 0; pollint=0)
 
     # Allowing a smaller positive `pollint` results in `timewait` hanging
-    @test_throws ArgumentError timedwait(() -> true, 0, pollint=1e-4)
+    @test_throws ArgumentError timedwait(alwaystrue, 0, pollint=1e-4)
 
     # Callback passed in raises an exception
     failure_cb = function (fail_on_call=1)
@@ -272,27 +275,16 @@ end
         end
     end
 
-    try
-        timedwait(failure_cb(1), 0)
-        @test false
-    catch e
-        @test e isa CapturedException
-        @test e.ex isa ErrorException
-    end
-
-    try
-        timedwait(failure_cb(2), 0)
-        @test false
-    catch e
-        @test e isa CapturedException
-        @test e.ex isa ErrorException
-    end
+    @test_throws ErrorException("callback failed") timedwait(failure_cb(1), 0)
+    @test_throws ErrorException("callback failed") timedwait(failure_cb(2), 0)
 
-    duration = @elapsed timedwait(() -> false, 1)  # Using default pollint of 0.1
-    @test duration ≈ 1 atol=0.4
+    # Validate that `timedwait` actually waits. Ideally we should also test that `timedwait`
+    # doesn't exceed a maximum duration but that would require guarantees from the OS.
+    duration = @elapsed timedwait(alwaysfalse, 1)  # Using default pollint of 0.1
+    @test duration >= 1
 
-    duration = @elapsed timedwait(() -> false, 0; pollint=1)
-    @test duration ≈ 1 atol=0.4
+    duration = @elapsed timedwait(alwaysfalse, 0; pollint=1)
+    @test duration >= 1
 end
 
 @testset "timedwait on multiple channels" begin
@@ -329,7 +321,7 @@ end
     # interpreting the calling function.
     @noinline garbage_finalizer(f) = (finalizer(f, "gar" * "bage"); nothing)
     run = Ref(0)
-    garbage_finalizer(x -> nothing) # warmup
+    garbage_finalizer(Returns(nothing)) # warmup
     @test GC.enable(false)
     # test for finalizers trying to yield leading to failed attempts to context switch
     garbage_finalizer((x) -> (run[] += 1; sleep(1)))
@@ -353,11 +345,9 @@ end
     @test istaskdone(t)
     @test fetch(t)
     @test run[] == 3
-    @test fetch(errstream) == """
-        error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")
-        error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")
-        error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")
-        """
+    output = fetch(errstream)
+    @test 3 == length(findall(
+        """error in running finalizer: ErrorException("task switch not allowed from inside gc finalizer")""", output))
     # test for invalid state in Workqueue during yield
     t = @async nothing
     t._state = 66
@@ -390,6 +380,7 @@ end
         t = Timer(0) do t
             tc[] += 1
         end
+        cb = first(t.cond.waitq)
         Libc.systemsleep(0.005)
         @test isopen(t)
         Base.process_events()
@@ -397,29 +388,35 @@ end
         @test tc[] == 0
         yield()
         @test tc[] == 1
+        @test istaskdone(cb)
     end
 
     let tc = Ref(0)
         t = Timer(0) do t
             tc[] += 1
         end
+        cb = first(t.cond.waitq)
         Libc.systemsleep(0.005)
         @test isopen(t)
         close(t)
         @test !isopen(t)
-        sleep(0.1)
+        wait(cb)
         @test tc[] == 0
+        @test t.handle === C_NULL
     end
 
     let tc = Ref(0)
         async = Base.AsyncCondition() do async
             tc[] += 1
         end
+        cb = first(async.cond.waitq)
         @test isopen(async)
         ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
         ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
+        @test isempty(Base.Workqueue)
         Base.process_events() # schedule event
         Sys.iswindows() && Base.process_events() # schedule event (windows?)
+        @test length(Base.Workqueue) == 1
         ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
         @test tc[] == 0
         yield() # consume event
@@ -440,13 +437,16 @@ end
         yield() # consume event & then close
         @test tc[] == 3
         sleep(0.1) # no further events
+        wait(cb)
         @test tc[] == 3
+        @test async.handle === C_NULL
     end
 
     let tc = Ref(0)
         async = Base.AsyncCondition() do async
             tc[] += 1
         end
+        cb = first(async.cond.waitq)
         @test isopen(async)
         ccall(:uv_async_send, Cvoid, (Ptr{Cvoid},), async)
         Base.process_events() # schedule event
@@ -457,8 +457,10 @@ end
         @test tc[] == 0
         yield() # consume event & then close
         @test tc[] == 1
-        sleep(0.1)
+        sleep(0.1) # no further events
+        wait(cb)
         @test tc[] == 1
+        @test async.handle === C_NULL
     end
 end
 
@@ -517,6 +519,13 @@ let a = []
     @test a == [1]
 end
 
+# make sure that we don't accidentally create a one-shot timer
+let
+    t = Timer(Returns(nothing), 10, interval=0.00001)
+    @test ccall(:uv_timer_get_repeat, UInt64, (Ptr{Cvoid},), t) == 1
+    close(t)
+end
+
 # make sure repeating timers work
 @noinline function make_unrooted_timer(a)
     t = Timer(0.0, interval = 0.1)
@@ -557,3 +566,43 @@ let c = Channel(3)
     close(c)
     @test repr(MIME("text/plain"), c) == "Channel{Any}(3) (closed)"
 end
+
+# PR #41833: data races in Channel
+@testset "n_avail(::Channel)" begin
+    # Buffered: n_avail() = buffer length + number of waiting tasks
+    let c = Channel(2)
+        @test n_avail(c) == 0;   put!(c, 0)
+        @test n_avail(c) == 1;   put!(c, 0)
+        @test n_avail(c) == 2;   t1 = @task put!(c, 0); yield(t1)
+        @test n_avail(c) == 3;   t2 = @task put!(c, 0); yield(t2)
+        @test n_avail(c) == 4
+        # Test n_avail(c) after interrupting a task waiting on the channel
+                                t3 = @task put!(c, 0)
+                                yield(t3)
+        @test n_avail(c) == 5
+                                @async Base.throwto(t3, ErrorException("Exit put!"))
+                                try wait(t3) catch end
+        @test n_avail(c) == 4
+                                close(c)
+                                try wait(t1) catch end
+                                try wait(t2) catch end
+        @test n_avail(c) == 2    # Already-buffered items remain
+    end
+    # Unbuffered: n_avail() = number of waiting tasks
+    let c = Channel()
+        @test n_avail(c) == 0;   t1 = @task put!(c, 0); yield(t1)
+        @test n_avail(c) == 1;   t2 = @task put!(c, 0); yield(t2)
+        @test n_avail(c) == 2
+        # Test n_avail(c) after interrupting a task waiting on the channel
+                                t3 = @task put!(c, 0)
+                                yield(t3)
+        @test n_avail(c) == 3
+                                @async Base.throwto(t3, ErrorException("Exit put!"))
+                                try wait(t3) catch end
+        @test n_avail(c) == 2
+                                close(c)
+                                try wait(t1) catch end
+                                try wait(t2) catch end
+        @test n_avail(c) == 0
+    end
+end
diff --git a/test/char.jl b/test/char.jl
index 325f97098b0378..1639c62ec819de 100644
--- a/test/char.jl
+++ b/test/char.jl
@@ -2,7 +2,9 @@
 
 @testset "basic properties" begin
 
+    @test typemax(Char) == reinterpret(Char, typemax(UInt32))
     @test typemin(Char) == Char(0)
+    @test typemax(Char) == reinterpret(Char, 0xffffffff)
     @test ndims(Char) == 0
     @test getindex('a', 1) == 'a'
     @test_throws BoundsError getindex('a', 2)
@@ -19,7 +21,7 @@
 
     @test widen('a') === 'a'
     # just check this works
-    @test_throws Base.CodePointError Base.code_point_err(UInt32(1))
+    @test_throws Base.CodePointError Base.throw_code_point_err(UInt32(1))
 end
 
 @testset "ASCII conversion to/from Integer" begin
@@ -99,6 +101,7 @@ end
     #getindex(c::Char) = c
     for x in testarrays
         @test getindex(x) == x
+        @test getindex(x, CartesianIndex()) == x
     end
 
     #first(c::Char) = c
@@ -247,6 +250,7 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
 
 @testset "abstractchar" begin
     @test AbstractChar('x') === AbstractChar(UInt32('x')) === 'x'
+    @test convert(AbstractChar, 2.0) == Char(2)
 
     @test isascii(ASCIIChar('x'))
     @test ASCIIChar('x') < 'y'
@@ -254,6 +258,9 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
     @test ASCIIChar('x')^3 == "xxx"
     @test repr(ASCIIChar('x')) == "'x'"
     @test string(ASCIIChar('x')) == "x"
+    @test length(ASCIIChar('x')) == 1
+    @test !isempty(ASCIIChar('x'))
+    @test eltype(ASCIIChar) == ASCIIChar
     @test_throws MethodError write(IOBuffer(), ASCIIChar('x'))
     @test_throws MethodError read(IOBuffer('x'), ASCIIChar)
 end
@@ -290,6 +297,7 @@ end
 
 @testset "broadcasting of Char" begin
     @test identity.('a') == 'a'
+    @test 'a' .* ['b', 'c'] == ["ab", "ac"]
 end
 
 @testset "code point format of U+ syntax (PR 33291)" begin
@@ -300,3 +308,27 @@ end
     @test repr("text/plain", '\U001f428') == "'🐨': Unicode U+1F428 (category So: Symbol, other)"
     @test repr("text/plain", '\U010f321') == "'\\U10f321': Unicode U+10F321 (category Co: Other, private use)"
 end
+
+@testset "malformed chars" begin
+    u1 = UInt32(0xc0) << 24
+    u2 = UInt32(0xc1) << 24
+    u3 = UInt32(0x0704) << 21
+    u4 = UInt32(0x0f08) << 20
+
+    overlong_uints = [u1, u2, u3, u4]
+    overlong_chars = reinterpret.(Char, overlong_uints)
+    @test all(Base.is_overlong_enc, overlong_uints)
+    @test all(Base.isoverlong, overlong_chars)
+    @test all(Base.ismalformed, overlong_chars)
+    @test repr("text/plain", overlong_chars[1]) ==
+        "'\\xc0': Malformed UTF-8 (category Ma: Malformed, bad data)"
+end
+
+@testset "More fallback tests" begin
+    @test length(ASCIIChar('x')) == 1
+    @test firstindex(ASCIIChar('x')) == 1
+    @test !isempty(ASCIIChar('x'))
+    @test hash(ASCIIChar('x'), UInt(10)) == hash('x', UInt(10))
+    @test Base.IteratorSize(Char) == Base.HasShape{0}()
+    @test convert(ASCIIChar, 1) == Char(1)
+end
diff --git a/test/checked.jl b/test/checked.jl
index bd1467b6425f97..bacda3db75deca 100644
--- a/test/checked.jl
+++ b/test/checked.jl
@@ -302,6 +302,14 @@ end
     @test checked_abs(BigInt(1)) == BigInt(1)
     @test checked_neg(BigInt(-1)) == BigInt(1)
     @test checked_neg(BigInt(1)) == BigInt(-1)
+    @test checked_add(BigInt(1), BigInt(1)) == BigInt(2)
+    @test checked_sub(BigInt(1), BigInt(2)) == BigInt(-1)
+    @test checked_mul(BigInt(2), BigInt(10)) == BigInt(20)
+    @test checked_div(BigInt(10), BigInt(2)) == BigInt(5)
+    @test checked_rem(BigInt(9), BigInt(4)) == BigInt(1)
+    @test checked_fld(BigInt(10), BigInt(3)) == BigInt(3)
+    @test checked_mod(BigInt(9), BigInt(4)) == BigInt(1)
+    @test checked_cld(BigInt(10), BigInt(3)) == BigInt(4)
 end
 
 @testset "Additional tests" begin
@@ -334,3 +342,19 @@ end
     @test_throws OverflowError checked_mul(UInt128(2)^127, UInt128(2))
 
 end
+
+@testset "Multiple arguments for add, mul" begin
+    @test checked_add(1, 2, 3) === 6
+    @test checked_add(1, 2, 3, 4) === 10
+    @test checked_add(1, 2, 3, 4, 5) === 15
+    @test checked_add(1, 2, 3, 4, 5, 6) === 21
+    @test checked_add(1, 2, 3, 4, 5, 6, 7) === 28
+    @test checked_add(1, 2, 3, 4, 5, 6, 7, 8) === 36
+
+    @test checked_mul(1, 2, 3) === 6
+    @test checked_mul(1, 2, 3, 4) === 24
+    @test checked_mul(1, 2, 3, 4, 5) === 120
+    @test checked_mul(1, 2, 3, 4, 5, 6) === 720
+    @test checked_mul(1, 2, 3, 4, 5, 6, 7) === 5040
+    @test checked_mul(1, 2, 3, 4, 5, 6, 7, 8) === 40320
+end
diff --git a/test/choosetests.jl b/test/choosetests.jl
index 9ca97a543123c8..099dfa18a71c56 100644
--- a/test/choosetests.jl
+++ b/test/choosetests.jl
@@ -5,9 +5,34 @@ using Random, Sockets
 const STDLIB_DIR = Sys.STDLIB
 const STDLIBS = filter!(x -> isfile(joinpath(STDLIB_DIR, x, "src", "$(x).jl")), readdir(STDLIB_DIR))
 
-"""
+const TESTNAMES = [
+        "subarray", "core", "compiler", "worlds", "atomics",
+        "keywordargs", "numbers", "subtype",
+        "char", "strings", "triplequote", "unicode", "intrinsics",
+        "dict", "hashing", "iobuffer", "staged", "offsetarray",
+        "arrayops", "tuple", "reduce", "reducedim", "abstractarray",
+        "intfuncs", "simdloop", "vecelement", "rational",
+        "bitarray", "copy", "math", "fastmath", "functional", "iterators",
+        "operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
+        "sorting", "spawn", "backtrace", "exceptions",
+        "file", "read", "version", "namedtuple",
+        "mpfr", "broadcast", "complex",
+        "floatapprox", "stdlib", "reflection", "regex", "float16",
+        "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
+        "euler", "show", "client",
+        "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
+        "some", "meta", "stacktraces", "docs",
+        "misc", "threads", "stress", "binaryplatforms", "atexit",
+        "enums", "cmdlineargs", "int", "interpreter",
+        "checked", "bitset", "floatfuncs", "precompile",
+        "boundscheck", "error", "ambiguous", "cartesian", "osutils",
+        "channels", "iostream", "secretbuffer", "specificity",
+        "reinterpretarray", "syntax", "corelogging", "missing", "asyncmap",
+        "smallarrayshrink", "opaque_closure", "filesystem", "download",
+]
 
-`tests, net_on, exit_on_error, seed = choosetests(choices)` selects a set of tests to be
+"""
+`(; tests, net_on, exit_on_error, seed) = choosetests(choices)` selects a set of tests to be
 run. `choices` should be a vector of test names; if empty or set to
 `["all"]`, all tests are selected.
 
@@ -15,7 +40,7 @@ This function also supports "test collections": specifically, "linalg"
  refers to collections of tests in the correspondingly-named
 directories.
 
-Upon return:
+The function returns a named tuple with the following elements:
   - `tests` is a vector of fully-expanded test names,
   - `net_on` is true if networking is available (required for some tests),
   - `exit_on_error` is true if an error in one test should cancel
@@ -23,75 +48,93 @@ Upon return:
   - `seed` is a seed which will be used to initialize the global RNG for each
     test to be run.
 
-Three options can be passed to `choosetests` by including a special token
+Several options can be passed to `choosetests` by including a special token
 in the `choices` argument:
    - "--skip", which makes all tests coming after be skipped,
    - "--exit-on-error" which sets the value of `exit_on_error`,
    - "--seed=SEED", which sets the value of `seed` to `SEED`
      (parsed as an `UInt128`); `seed` is otherwise initialized randomly.
      This option can be used to reproduce failed tests.
+   - "--help", which prints a help message and then skips all tests.
+   - "--help-list", which prints the options computed without running them.
 """
 function choosetests(choices = [])
-    testnames = [
-        "subarray", "core", "compiler", "worlds",
-        "keywordargs", "numbers", "subtype",
-        "char", "strings", "triplequote", "unicode", "intrinsics",
-        "dict", "hashing", "iobuffer", "staged", "offsetarray",
-        "arrayops", "tuple", "reduce", "reducedim", "abstractarray",
-        "intfuncs", "simdloop", "vecelement", "rational",
-        "bitarray", "copy", "math", "fastmath", "functional", "iterators",
-        "operators", "ordering", "path", "ccall", "parse", "loading", "gmp",
-        "sorting", "spawn", "backtrace", "exceptions",
-        "file", "read", "version", "namedtuple",
-        "mpfr", "broadcast", "complex",
-        "floatapprox", "stdlib", "reflection", "regex", "float16",
-        "combinatorics", "sysinfo", "env", "rounding", "ranges", "mod2pi",
-        "euler", "show", "client",
-        "errorshow", "sets", "goto", "llvmcall", "llvmcall2", "ryu",
-        "some", "meta", "stacktraces", "docs",
-        "misc", "threads", "stress", "binaryplatforms",
-        "enums", "cmdlineargs", "int", "interpreter",
-        "checked", "bitset", "floatfuncs", "precompile",
-        "boundscheck", "error", "ambiguous", "cartesian", "osutils",
-        "channels", "iostream", "secretbuffer", "specificity",
-        "reinterpretarray", "syntax", "logging", "missing", "asyncmap", "atexit"
-    ]
-
     tests = []
-    skip_tests = []
+    skip_tests = Set()
     exit_on_error = false
     use_revise = false
     seed = rand(RandomDevice(), UInt128)
+    ci_option_passed = false
+    dryrun = false
 
     for (i, t) in enumerate(choices)
         if t == "--skip"
-            skip_tests = choices[i + 1:end]
+            union!(skip_tests, choices[i + 1:end])
             break
         elseif t == "--exit-on-error"
             exit_on_error = true
         elseif t == "--revise"
             use_revise = true
         elseif startswith(t, "--seed=")
-            seed = parse(UInt128, t[8:end])
+            seed = parse(UInt128, t[(length("--seed=") + 1):end])
+        elseif t == "--ci"
+            ci_option_passed = true
+        elseif t == "--help-list"
+            dryrun = true
+        elseif t == "--help"
+            println("""
+                USAGE: ./julia runtests.jl [options] [tests]
+                OPTIONS:
+                  --exit-on-error      : stop tests immediately when a test group fails
+                  --help               : prints this help message
+                  --help-list          : prints the options computed without running them
+                  --revise             : load Revise
+                  --seed=<SEED>        : set the initial seed for all testgroups (parsed as a UInt128)
+                  --skip <NAMES>...    : skip test or collection tagged with <NAMES>
+                TESTS:
+                  Can be special tokens, such as "all", "unicode", "stdlib", the names of stdlib \
+                  modules, or the names of any file in the TESTNAMES array (defaults to "all").
+
+                  Or prefix a name with `-` (such as `-core`) to skip a particular test.
+                """)
+            return (; tests = [],
+                      net_on = false,
+                      exit_on_error = false,
+                      use_revise = false,
+                      seed = UInt128(0))
+        elseif startswith(t, "--")
+            error("unknown option: $t")
+        elseif startswith(t, "-")
+            push!(skip_tests, t[2:end])
         else
             push!(tests, t)
         end
     end
 
-    if tests == ["all"] || isempty(tests)
-        tests = testnames
+    unhandled = copy(skip_tests)
+
+    requested_all     = "all"     in tests
+    requested_default = "default" in tests
+    if isempty(tests) || requested_all || requested_default
+        append!(tests, TESTNAMES)
     end
+    filter!(x -> x != "all",     tests)
+    filter!(x -> x != "default", tests)
 
     function filtertests!(tests, name, files=[name])
        flt = x -> (x != name && !(x in files))
        if name in skip_tests
            filter!(flt, tests)
+           pop!(unhandled, name)
        elseif name in tests
            filter!(flt, tests)
            prepend!(tests, files)
        end
     end
 
+    explicit_pkg            = "Pkg"            in tests
+    explicit_libgit2_online = "LibGit2/online" in tests
+
     filtertests!(tests, "unicode", ["unicode/utf8"])
     filtertests!(tests, "strings", ["strings/basic", "strings/search", "strings/util",
                    "strings/io", "strings/types"])
@@ -99,7 +142,10 @@ function choosetests(choices = [])
     filtertests!(tests, "subarray")
     filtertests!(tests, "compiler", ["compiler/inference", "compiler/validation",
         "compiler/ssair", "compiler/irpasses", "compiler/codegen",
-        "compiler/inline", "compiler/contextual"])
+        "compiler/inline", "compiler/contextual", "compiler/AbstractInterpreter",
+        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
+    filtertests!(tests, "compiler/EscapeAnalysis", [
+        "compiler/EscapeAnalysis/local", "compiler/EscapeAnalysis/interprocedural"])
     filtertests!(tests, "stdlib", STDLIBS)
     # do ambiguous first to avoid failing if ambiguities are introduced by other tests
     filtertests!(tests, "ambiguous")
@@ -107,21 +153,40 @@ function choosetests(choices = [])
     if startswith(string(Sys.ARCH), "arm")
         # Remove profile from default tests on ARM since it currently segfaults
         # Allow explicitly adding it for testing
-        @warn "Skipping Profile tests"
+        @warn "Skipping Profile tests because the architecture is ARM"
         filter!(x -> (x != "Profile"), tests)
     end
 
-    net_required_for = ["Sockets", "LibGit2", "LibCURL", "Downloads"]
+    net_required_for = [
+        "Artifacts",
+        "Downloads",
+        "LazyArtifacts",
+        "LibCURL",
+        "LibGit2",
+        "Sockets",
+        "download",
+    ]
     net_on = true
-    try
-        ipa = getipaddr()
-    catch
-        @warn "Networking unavailable: Skipping tests [" * join(net_required_for, ", ") * "]"
-        net_on = false
-    end
-
-    if !net_on
-        filter!(!in(net_required_for), tests)
+    JULIA_TEST_NETWORKING_AVAILABLE = get(ENV, "JULIA_TEST_NETWORKING_AVAILABLE", "") |>
+                                      strip |>
+                                      lowercase |>
+                                      s -> tryparse(Bool, s) |>
+                                      x -> x === true
+    # If the `JULIA_TEST_NETWORKING_AVAILABLE` environment variable is set to `true`, we
+    # always set `net_on` to `true`.
+    # Otherwise, we set `net_on` to true if and only if networking is actually available.
+    if !JULIA_TEST_NETWORKING_AVAILABLE
+        try
+            ipa = getipaddr()
+        catch
+            if ci_option_passed
+                @error("Networking unavailable, but `--ci` was passed")
+                rethrow()
+            end
+            net_on = false
+            @warn "Networking unavailable: Skipping tests [" * join(net_required_for, ", ") * "]"
+            filter!(!in(net_required_for), tests)
+        end
     end
 
     if ccall(:jl_running_on_valgrind,Cint,()) != 0 && "rounding" in tests
@@ -129,10 +194,9 @@ function choosetests(choices = [])
         filter!(x -> x != "rounding", tests)
     end
 
+    filter!(!in(tests), unhandled)
     filter!(!in(skip_tests), tests)
 
-    explicit_pkg3    =  "Pkg/pkg"       in tests
-    explicit_libgit2 =  "LibGit2/online" in tests
     new_tests = String[]
     for test in tests
         if test in STDLIBS
@@ -148,11 +212,31 @@ function choosetests(choices = [])
     end
     filter!(x -> (x != "stdlib" && !(x in STDLIBS)) , tests)
     append!(tests, new_tests)
-    explicit_pkg3    || filter!(x -> x != "Pkg/pkg",       tests)
-    explicit_libgit2 || filter!(x -> x != "LibGit2/online", tests)
+
+    requested_all || explicit_pkg            || filter!(x -> x != "Pkg",            tests)
+    requested_all || explicit_libgit2_online || filter!(x -> x != "LibGit2/online", tests)
 
     # Filter out tests from the test groups in the stdlibs
+    filter!(!in(tests), unhandled)
     filter!(!in(skip_tests), tests)
 
-    tests, net_on, exit_on_error, use_revise, seed
+    if !isempty(unhandled)
+        @warn "Not skipping tests: $(join(unhandled, ", "))"
+    end
+
+    if dryrun
+        print("Tests enabled to run:")
+        foreach(t -> print("\n  ", t), tests)
+        if !isempty(skip_tests)
+            print("\n\nTests skipped:")
+            foreach(t -> print("\n  ", t), skip_tests)
+        end
+        print("\n")
+        exit_on_error && (print("\nwith option "); printstyled("exit_on_error", bold=true))
+        use_revise && (print("\nwith option "); printstyled("use_revise", bold=true); print(" (Revise.jl)"))
+        print("\n\n")
+        empty!(tests)
+    end
+
+    return (; tests, net_on, exit_on_error, use_revise, seed)
 end
diff --git a/test/clangsa/GCPushPop.cpp b/test/clangsa/GCPushPop.cpp
index a992630291bb5e..f8dcfdafa5aa99 100644
--- a/test/clangsa/GCPushPop.cpp
+++ b/test/clangsa/GCPushPop.cpp
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker -x c++ %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -Xclang -verify -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -x c++ %s
 
 #include "julia.h"
 
diff --git a/test/clangsa/ImplicitAtomicsTest.c b/test/clangsa/ImplicitAtomicsTest.c
new file mode 100644
index 00000000000000..87154347d97570
--- /dev/null
+++ b/test/clangsa/ImplicitAtomicsTest.c
@@ -0,0 +1,112 @@
+// This file is a part of Julia. License is MIT: https://julialang.org/license
+
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -x c -std=c11 | FileCheck --check-prefixes=CHECK,CHECK-C %s
+// RUN: clang-tidy %s --checks=-*,concurrency-implicit-atomics -load libImplicitAtomicsPlugin%shlibext -- -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} ${CXXFLAGS} -x c++ -std=c++11 | FileCheck --check-prefixes=CHECK,CHECK-CXX %s
+
+#include "julia_atomics.h"
+
+_Atomic(int) x, *px;
+struct Atomic_xy_t {
+    _Atomic(int) x;
+    _Atomic(int) *px;
+    int y;
+} y, *py;
+_Atomic(int) z[2];
+
+
+// jwn: add tests for casts, and *py = y;
+
+void hiddenAtomics(void) {
+    px = &x; // CHECK-NOT: [[@LINE]]
+    py = &y; // CHECK-NOT: [[@LINE]]
+    y.px = &y.x; // CHECK-NOT: [[@LINE]]
+    ++x; // CHECK: [[@LINE]]:7: warning: Implicit Atomic seq_cst synchronization
+    --x; // CHECK: [[@LINE]]:7: warning: Implicit Atomic seq_cst synchronization
+    x++; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    x--; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    x += 2; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    x -= 2; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+1]]
+    x *= 2; // CHECK-C: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    x = // CHECK-C: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+        x; // CHECK-C: [[@LINE]]:9: warning: Implicit Atomic seq_cst synchronization
+#endif
+    x = 2; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    x + 2; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+
+    ++*px; // CHECK: [[@LINE]]:8: warning: Implicit Atomic seq_cst synchronization
+    --*px; // CHECK: [[@LINE]]:8: warning: Implicit Atomic seq_cst synchronization
+    px++; // CHECK-NOT: [[@LINE]]
+    px--; // CHECK-NOT: [[@LINE]]
+    1 + *px++; // CHECK: [[@LINE]]:10: warning: Implicit Atomic seq_cst synchronization
+    1 + *px--; // CHECK: [[@LINE]]:10: warning: Implicit Atomic seq_cst synchronization
+    (*px)++; // CHECK: [[@LINE]]:7: warning: Implicit Atomic seq_cst synchronization
+    (*px)--; // CHECK: [[@LINE]]:7: warning: Implicit Atomic seq_cst synchronization
+    *px += 2; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px -= 2; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+1]]
+    *px *= 2; // CHECK-C: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px = // CHECK-C: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+        x; // CHECK-C: [[@LINE]]:9: warning: Implicit Atomic seq_cst synchronization
+    x = // CHECK-C: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+        *px; // CHECK-C: [[@LINE]]:10: warning: Implicit Atomic seq_cst synchronization
+#endif
+    *px = 2; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+    *px + 2; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+
+    *(int*)&x = 3; // CHECK-NOT: [[@LINE]]
+    *(int*)px = 3; // CHECK-NOT: [[@LINE]]
+
+    y.y = 2; // CHECK-NOT: [[@LINE]]
+    py->y = 2; // CHECK-NOT: [[@LINE]]
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+1]]
+    *py = // TODO
+        y; // TODO
+    y = // TODO
+       *py; // TODO
+#endif
+    *(_Atomic(int)*)&y.y = 2; // CHECK: [[@LINE]]:22: warning: Implicit Atomic seq_cst synchronization
+    *(_Atomic(int)*)&py->y = 2; // CHECK: [[@LINE]]:22: warning: Implicit Atomic seq_cst synchronization
+
+    y.x = 1; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    *y.px = 1; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+
+#ifndef __cplusplus // invalid C++ code
+    // CHECK-CXX-NOT: [[@LINE+1]]
+    x = // CHECK-C: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+        py->x; // CHECK-C: [[@LINE]]:9: warning: Implicit Atomic seq_cst synchronization
+    x = // CHECK-C: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+        *py->px; // CHECK-C: [[@LINE]]:10: warning: Implicit Atomic seq_cst synchronization
+#endif
+    py->x = 1; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    *py->px = 1; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+
+    z[1] = 1; // CHECK: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    *z = 1; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+    *z += 1; // CHECK: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+
+#ifdef __cplusplus // check initialization / finalization
+    // CHECK-NOT: [[@LINE+1]]
+    _Atomic(int) lx{2};
+    lx = 3; // CHECK-CXX: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+    lx += 1; // CHECK-CXX: [[@LINE]]:5: warning: Implicit Atomic seq_cst synchronization
+
+    // CHECK-NOT: [[@LINE+1]]
+    struct large_type { int x[16]; };
+    // CHECK-NOT: [[@LINE+1]]
+    auto *ly = new std::atomic<struct large_type>();
+    *ly = // CHECK-CXX: [[@LINE]]:6: warning: Implicit Atomic seq_cst synchronization
+        ly->load(); // CHECK-NOT: [[@LINE]]
+    struct large_type a = *ly; // CHECK-CXX: [[@LINE]]:28: warning: Implicit Atomic seq_cst synchronization
+    delete ly; // CHECK-NOT: [[@LINE]]
+
+#if 0 // enable for C++2a
+    std::atomic_ref<int> lz(*(int*)px);
+    lz = 3;
+    lz += 1;
+#endif
+#endif
+}
diff --git a/test/clangsa/Makefile b/test/clangsa/Makefile
index 850f9ea76985a1..3bebd45c9a5a6c 100644
--- a/test/clangsa/Makefile
+++ b/test/clangsa/Makefile
@@ -3,11 +3,11 @@ JULIAHOME := $(abspath $(SRCDIR)/../..)
 BUILDDIR := .
 include $(JULIAHOME)/Make.inc
 
-check: $(SRCDIR)
+check: .
 
 TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.c) $(wildcard $(SRCDIR)/*.cpp))
 
-$(SRCDIR) $(TESTS):
+. $(TESTS):
 	@$(MAKE) -C $(BUILDDIR)/../../src $(build_includedir)/julia/julia_version.h
 	@$(MAKE) -C $(BUILDDIR)/../../src clangsa
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
@@ -17,6 +17,6 @@ $(SRCDIR) $(TESTS):
 	CPPFLAGS_FLAGS="${CPPFLAGS_FLAGS}" \
 	CFLAGS_FLAGS="${CFLAGS_FLAGS}" \
 	CXXFLAGS_FLAGS="${CXXFLAGS_FLAGS}" \
-	$(build_depsbindir)/lit/lit.py -v $@
+	$(build_depsbindir)/lit/lit.py -v $(addprefix $(SRCDIR)/,$@)
 
-.PHONY: $(TESTS) $(SRCDIR) check all
+.PHONY: $(TESTS) check all .
diff --git a/test/clangsa/MissingRoots.c b/test/clangsa/MissingRoots.c
index 10c28e6bdbe9e3..f0b32c54bc7b83 100644
--- a/test/clangsa/MissingRoots.c
+++ b/test/clangsa/MissingRoots.c
@@ -1,6 +1,6 @@
 // This file is a part of Julia. License is MIT: https://julialang.org/license
 
-// RUN: clang --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
+// RUN: clang -D__clang_gcanalyzer__ --analyze -Xanalyzer -analyzer-output=text -Xclang -load -Xclang libGCCheckerPlugin%shlibext -I%julia_home/src -I%julia_home/src/support -I%julia_home/usr/include ${CLANGSA_FLAGS} ${CPPFLAGS} ${CFLAGS} -Xclang -analyzer-checker=core,julia.GCChecker --analyzer-no-default-checks -Xclang -verify -x c %s
 
 #include "julia.h"
 #include "julia_internal.h"
@@ -329,7 +329,7 @@ jl_module_t *propagation(jl_module_t *m JL_PROPAGATES_ROOT);
 void module_member(jl_module_t *m)
 {
     for(int i=(int)m->usings.len-1; i >= 0; --i) {
-      jl_module_t *imp = (jl_module_t*)m->usings.items[i];
+      jl_module_t *imp = propagation(m);
       jl_gc_safepoint();
       look_at_value((jl_value_t*)imp);
       jl_module_t *prop = propagation(imp);
@@ -409,14 +409,6 @@ void stack_rooted(jl_value_t *lb JL_MAYBE_UNROOTED, jl_value_t *ub JL_MAYBE_UNRO
     JL_GC_POP();
 }
 
-void JL_NORETURN throw_internal(jl_value_t *e JL_MAYBE_UNROOTED)
-{
-    jl_ptls_t ptls = jl_get_ptls_states();
-    ptls->sig_exception = e;
-    jl_gc_unsafe_enter(ptls);
-    look_at_value(e);
-}
-
 JL_DLLEXPORT jl_value_t *jl_totally_used_function(int i)
 {
     jl_value_t *v = jl_box_int32(i); // expected-note{{Started tracking value here}}
diff --git a/test/clangsa/lit.cfg.py b/test/clangsa/lit.cfg.py
index 5790eab812e9ce..bb48f0b891acff 100644
--- a/test/clangsa/lit.cfg.py
+++ b/test/clangsa/lit.cfg.py
@@ -14,8 +14,6 @@
     platform.system() == 'Windows' else '.so'))
 config.substitutions.append(("%julia_home", os.path.join(os.path.dirname(__file__), "../..")))
 
-path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH']))
-config.environment['PATH'] = path
 config.environment['HOME'] = "/tmp"
 config.environment['CLANGSA_FLAGS'] = os.environ.get('CLANGSA_FLAGS', "")
 config.environment['CLANGSA_CXXFLAGS'] = os.environ.get('CLANGSA_CXXFLAGS', "")
diff --git a/test/client.jl b/test/client.jl
index 497cc54b135342..195743b1d62083 100644
--- a/test/client.jl
+++ b/test/client.jl
@@ -18,14 +18,22 @@ nested_error_pattern = r"""
 
 @testset "display_error" begin
     # Display of errors which cause more than one entry on the exception stack
-    err_str = try
+    excs = try
         eval(nested_error_expr)
     catch
-        excs = Base.catch_stack()
-        @test typeof.(first.(excs)) == [UndefVarError, DivideError]
-        sprint(Base.display_error, excs)
+        Base.current_exceptions()
     end
-    @test occursin(nested_error_pattern, err_str)
+    @test typeof.(first.(excs)) == [UndefVarError, DivideError]
+    @test occursin(nested_error_pattern, sprint(Base.display_error, excs))
+
+    @test occursin(r"""
+        2-element ExceptionStack:
+        DivideError: integer division error
+        Stacktrace:.*
+
+        caused by: UndefVarError: __not_a_binding__ not defined
+        Stacktrace:.*
+        """s, sprint(show, excs))
 end
 
 @testset "Fallback REPL" begin
@@ -35,3 +43,12 @@ end
     err_str = String(take!(errio))
     @test occursin(nested_error_pattern, err_str)
 end
+
+@testset "display_error(io, er, bt) works" begin
+    errio = IOBuffer()
+    Base.display_error(errio, ErrorException, [])
+    err_str = String(take!(errio))
+    @test occursin(r"""
+        ERROR: ErrorException
+        """s, err_str)
+end
diff --git a/test/cmdlineargs.jl b/test/cmdlineargs.jl
index 2a85330a051e61..d9fb7192a63715 100644
--- a/test/cmdlineargs.jl
+++ b/test/cmdlineargs.jl
@@ -50,7 +50,7 @@ let
     @test format_filename("%a%%b") == "a%b"
 end
 
-let exename = `$(Base.julia_cmd()) --startup-file=no`
+let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # tests for handling of ENV errors
     let v = writereadpipeline("println(\"REPL: \", @which(less), @isdefined(InteractiveUtils))",
                 setenv(`$exename -i -E 'empty!(LOAD_PATH); @isdefined InteractiveUtils'`,
@@ -71,6 +71,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
     end
     let v = readchomperrors(`$exename -i -e '
             empty!(LOAD_PATH)
+            @eval Sys STDLIB=mktempdir()
             Base.unreference_module(Base.PkgId(Base.UUID(0xb77e0a4c_d291_57a0_90e8_8db25a27a240), "InteractiveUtils"))
             '`)
         # simulate not having a working version of InteractiveUtils,
@@ -92,9 +93,29 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
         @test v[2] == "1"
         @test isempty(v[3])
     end
+
+    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options", "HOME" => homedir()))
+        @test v[1]
+        @test contains(v[2], r"print-options + = 1")
+        @test contains(v[2], r"combiner-store-merge-dependence-limit + = 4")
+        @test contains(v[2], r"enable-tail-merge + = 2")
+        @test isempty(v[3])
+    end
+    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -combiner-store-merge-dependence-limit=6", "HOME" => homedir()))
+        @test v[1]
+        @test contains(v[2], r"print-options + = 1")
+        @test contains(v[2], r"combiner-store-merge-dependence-limit + = 6")
+        @test contains(v[2], r"enable-tail-merge + = 1")
+        @test isempty(v[3])
+    end
+    let v = readchomperrors(setenv(`$exename -e 0`, "JULIA_LLVM_ARGS" => "-print-options -enable-tail-merge=1 -enable-tail-merge=1", "HOME" => homedir()))
+        @test !v[1]
+        @test isempty(v[2])
+        @test v[3] == "julia: for the --enable-tail-merge option: may only occur zero or one times!"
+    end
 end
 
-let exename = `$(Base.julia_cmd()) --startup-file=no`
+let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     # --version
     let v = split(read(`$exename -v`, String), "julia version ")[end]
         @test Base.VERSION_STRING == chomp(v)
@@ -102,16 +123,23 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
     @test read(`$exename -v`, String) == read(`$exename --version`, String)
 
     # --help
-    let header = "julia [switches] -- [programfile] [args...]"
+    let header = "\n    julia [switches] -- [programfile] [args...]"
         @test startswith(read(`$exename -h`, String), header)
         @test startswith(read(`$exename --help`, String), header)
     end
 
     # ~ expansion in --project and JULIA_PROJECT
     if !Sys.iswindows()
-        expanded = abspath(expanduser("~/foo"))
-        @test occursin(expanded, readchomp(`$exename --project='~/foo' -E 'Base.active_project()'`))
-        @test occursin(expanded, readchomp(setenv(`$exename -E 'Base.active_project()'`, "JULIA_PROJECT" => "~/foo", "HOME" => homedir())))
+        let expanded = abspath(expanduser("~/foo/Project.toml"))
+            @test expanded == readchomp(`$exename --project='~/foo' -e 'println(Base.active_project())'`)
+            @test expanded == readchomp(setenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "~/foo", "HOME" => homedir()))
+        end
+    end
+
+    # handling of @projectname in --project and JULIA_PROJECT
+    let expanded = abspath(Base.load_path_expand("@foo"))
+        @test expanded == readchomp(`$exename --project='@foo' -e 'println(Base.active_project())'`)
+        @test expanded == readchomp(addenv(`$exename -e 'println(Base.active_project())'`, "JULIA_PROJECT" => "@foo", "HOME" => homedir()))
     end
 
     # --quiet, --banner
@@ -190,8 +218,8 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
 
     # -t, --threads
     code = "print(Threads.nthreads())"
-    cpu_threads = string(ccall(:jl_cpu_threads, Int32, ()))
-    @test cpu_threads ==
+    cpu_threads = ccall(:jl_effective_threads, Int32, ())
+    @test string(cpu_threads) ==
           read(`$exename --threads auto -e $code`, String) ==
           read(`$exename --threads=auto -e $code`, String) ==
           read(`$exename -tauto -e $code`, String) ==
@@ -202,10 +230,15 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
                   read(`$exename -t 2 -e $code`, String) == "2"
         end
     end
-    cpu_threads *= "0"
-    @test read(`$exename -t $cpu_threads -e $code`, String) == cpu_threads
-    withenv("JULIA_NUM_THREADS" => cpu_threads) do
-        @test read(`$exename -e $code`, String) == cpu_threads
+    # We want to test oversubscription, but on manycore machines, this can
+    # actually exhaust limited PID spaces
+    cpu_threads = max(2*cpu_threads, min(50, 10*cpu_threads))
+    if Sys.WORD_SIZE == 32
+        cpu_threads = min(cpu_threads, 50)
+    end
+    @test read(`$exename -t $cpu_threads -e $code`, String) == string(cpu_threads)
+    withenv("JULIA_NUM_THREADS" => string(cpu_threads)) do
+        @test read(`$exename -e $code`, String) == string(cpu_threads)
     end
     @test !success(`$exename -t 0`)
     @test !success(`$exename -t -1`)
@@ -293,6 +326,35 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
         rm(covfile)
         @test occursin(expected, got) || (expected, got)
         @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in specific file
+        tfile = realpath(inputfile)
+        @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+            --code-coverage=$covfile --code-coverage=@$tfile`) == "(3, $(repr(tfile)))"
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+        @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in directory
+        tdir = dirname(realpath(inputfile))
+        @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(tdir)))"
+        @test isfile(covfile)
+        got = read(covfile, String)
+        rm(covfile)
+        @test occursin(expected, got) || (expected, got)
+        @test_broken occursin(expected_good, got)
+
+        # Ask for coverage in a different directory
+        tdir = mktempdir() # a dir that contains no code
+        @test readchomp(`$exename -E "(Base.JLOptions().code_coverage, unsafe_string(Base.JLOptions().tracked_path))" -L $inputfile
+            --code-coverage=$covfile --code-coverage=@$tdir`) == "(3, $(repr(tdir)))"
+        @test isfile(covfile)
+        got = read(covfile, String)
+        @test isempty(got)
+        rm(covfile)
     end
 
     # --track-allocation
@@ -303,27 +365,32 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
     @test readchomp(`$exename -E "Base.JLOptions().malloc_log != 0" --track-allocation=user`) == "true"
     mktempdir() do dir
         helperdir = joinpath(@__DIR__, "testhelpers")
-        inputfile = joinpath(helperdir, "allocation_file.jl")
+        inputfile = joinpath(dir, "allocation_file.jl")
+        cp(joinpath(helperdir,"allocation_file.jl"), inputfile)
         pid = readchomp(`$exename -E "getpid()" -L $inputfile --track-allocation=user`)
         memfile = "$inputfile.$pid.mem"
         got = readlines(memfile)
         rm(memfile)
         @test popfirst!(got) == "        0 g(x) = x + 123456"
         @test popfirst!(got) == "        - function f(x)"
-        @test popfirst!(got) == "       80     []"
+        @test popfirst!(got) == "        -     []"
         if Sys.WORD_SIZE == 64
             # P64 pools with 64 bit tags
-            @test popfirst!(got) == "       32     Base.invokelatest(g, 0)"
-            @test popfirst!(got) == "       48     Base.invokelatest(g, x)"
+            @test popfirst!(got) == "       16     Base.invokelatest(g, 0)"
+            @test popfirst!(got) == "       32     Base.invokelatest(g, x)"
         elseif 12 == (() -> @allocated ccall(:jl_gc_allocobj, Ptr{Cvoid}, (Csize_t,), 8))()
             # See if we have a 12-byte pool with 32 bit tags (MAX_ALIGN = 4)
-            @test popfirst!(got) == "       24     Base.invokelatest(g, 0)"
-            @test popfirst!(got) == "       36     Base.invokelatest(g, x)"
+            @test popfirst!(got) == "       12     Base.invokelatest(g, 0)"
+            @test popfirst!(got) == "       24     Base.invokelatest(g, x)"
         else # MAX_ALIGN >= 8
-            @test popfirst!(got) == "       16     Base.invokelatest(g, 0)"
-            @test popfirst!(got) == "       48     Base.invokelatest(g, x)"
+            @test popfirst!(got) == "        8     Base.invokelatest(g, 0)"
+            @test popfirst!(got) == "       32     Base.invokelatest(g, x)"
+        end
+        if Sys.WORD_SIZE == 64
+            @test popfirst!(got) == "       48     []"
+        else
+            @test popfirst!(got) == "       32     []"
         end
-        @test popfirst!(got) == "       80     []"
         @test popfirst!(got) == "        - end"
         @test popfirst!(got) == "        - f(1.23)"
         @test isempty(got) || got
@@ -336,6 +403,9 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
     @test readchomp(`$exename -E "Base.JLOptions().opt_level" --optimize`) == "3"
     @test readchomp(`$exename -E "Base.JLOptions().opt_level" -O0`) == "0"
 
+    @test readchomp(`$exename -E "Base.JLOptions().opt_level_min"`) == "0"
+    @test readchomp(`$exename -E "Base.JLOptions().opt_level_min" --min-optlevel=2`) == "2"
+
     # -g
     @test readchomp(`$exename -E "Base.JLOptions().debug_level" -g`) == "2"
     let code = writereadpipeline("code_llvm(stdout, +, (Int64, Int64), raw=true, dump_module=true)", `$exename -g0`)
@@ -371,6 +441,8 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
         filter!(a -> !startswith(a, "--check-bounds="), exename_default_checkbounds.exec)
         @test parse(Int, readchomp(`$exename_default_checkbounds -E "Int(Base.JLOptions().check_bounds)"`)) ==
             JL_OPTIONS_CHECK_BOUNDS_DEFAULT
+        @test parse(Int, readchomp(`$exename -E "Int(Base.JLOptions().check_bounds)"
+            --check-bounds=auto`)) == JL_OPTIONS_CHECK_BOUNDS_DEFAULT
         @test parse(Int, readchomp(`$exename -E "Int(Base.JLOptions().check_bounds)"
             --check-bounds=yes`)) == JL_OPTIONS_CHECK_BOUNDS_ON
         @test parse(Int, readchomp(`$exename -E "Int(Base.JLOptions().check_bounds)"
@@ -438,7 +510,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
         @test parse(Int,readchomp(`$exename --math-mode=ieee -E
             "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_OFF
         @test parse(Int,readchomp(`$exename --math-mode=fast -E
-            "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_ON
+            "Int(Base.JLOptions().fast_math)"`)) == JL_OPTIONS_FAST_MATH_DEFAULT
     end
 
     # --worker takes default / custom as argument (default/custom arguments
@@ -453,10 +525,10 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
             println(ARGS)
             """)
         close(io)
-        mkpath(joinpath(dir, ".julia", "config"))
-        cp(testfile, joinpath(dir, ".julia", "config", "startup.jl"))
+        mkpath(joinpath(dir, "config"))
+        cp(testfile, joinpath(dir, "config", "startup.jl"))
 
-        withenv((Sys.iswindows() ? "USERPROFILE" : "HOME") => dir) do
+        withenv("JULIA_DEPOT_PATH" => dir) do
             output = "[\"foo\", \"-bar\", \"--baz\"]"
             @test readchomp(`$exename $testfile foo -bar --baz`) == output
             @test readchomp(`$exename $testfile -- foo -bar --baz`) == output
@@ -482,7 +554,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
 
         a = joinpath(dir, "a.jl")
         b = joinpath(dir, "b.jl")
-        c = joinpath(dir, ".julia", "config", "startup.jl")
+        c = joinpath(dir, "config", "startup.jl")
 
         write(a, """
             println(@__FILE__)
@@ -498,7 +570,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
 
         readsplit(cmd) = split(readchomp(cmd), '\n')
 
-        withenv((Sys.iswindows() ? "USERPROFILE" : "HOME") => dir) do
+        withenv("JULIA_DEPOT_PATH" => dir) do
             @test readsplit(`$exename $a`) ==
                 [a, a,
                  b, a]
@@ -572,7 +644,7 @@ end
 
 
 # test error handling code paths of running --sysimage
-let exename = Base.julia_cmd()
+let exename = `$(Base.julia_cmd().exec[1]) -t 1`
     sysname = unsafe_string(Base.JLOptions().image_file)
     for nonexist_image in (
             joinpath(@__DIR__, "nonexistent"),
@@ -607,9 +679,9 @@ let exename = Base.julia_cmd()
     # --startup-file
     let JL_OPTIONS_STARTUPFILE_ON = 1,
         JL_OPTIONS_STARTUPFILE_OFF = 2
-        # `HOME=$tmpdir` to avoid errors in the user startup.jl, which hangs the tests. Issue #17642
+        # `JULIA_DEPOT_PATH=$tmpdir` to avoid errors in the user startup.jl, which hangs the tests. Issue #17642
         mktempdir() do tmpdir
-            withenv("HOME"=>tmpdir) do
+            withenv("JULIA_DEPOT_PATH"=>tmpdir) do
                 @test parse(Int,readchomp(`$exename -E "Base.JLOptions().startupfile" --startup-file=yes`)) == JL_OPTIONS_STARTUPFILE_ON
             end
         end
@@ -670,7 +742,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
 end
 
 # issue #6310
-let exename = `$(Base.julia_cmd()) --startup-file=no`
+let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     @test writereadpipeline("2+2", exename) == ("4\n", true)
     @test writereadpipeline("2+2\n3+3\n4+4", exename) == ("4\n6\n8\n", true)
     @test writereadpipeline("", exename) == ("", true)
@@ -695,7 +767,7 @@ let exename = `$(Base.julia_cmd()) --startup-file=no`
 end
 
 # incomplete inputs to stream REPL
-let exename = `$(Base.julia_cmd()) --startup-file=no`
+let exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
     in = Pipe(); out = Pipe(); err = Pipe()
     proc = run(pipeline(exename, stdin = in, stdout = out, stderr = err), wait=false)
     write(in, "f(\n")
@@ -707,7 +779,7 @@ end
 
 # Issue #29855
 for yn in ("no", "yes")
-    exename = `$(Base.julia_cmd()) --inline=no --startup-file=no --inline=$yn`
+    exename = `$(Base.julia_cmd()) --inline=no --startup-file=no --color=no --inline=$yn`
     v = writereadpipeline("Base.julia_cmd()", exename)
     if yn == "no"
         @test occursin(r" --inline=no", v[1])
@@ -716,3 +788,25 @@ for yn in ("no", "yes")
     end
     @test v[2]
 end
+
+# issue #39259, shadowing `ARGS`
+@test success(`$(Base.julia_cmd()) --startup-file=no -e 'ARGS=1'`)
+
+@testset "- as program file reads from stdin" begin
+    for args in (`- foo bar`, `-- - foo bar`)
+        cmd = `$(Base.julia_cmd()) --startup-file=no $(args)`
+        io = IOBuffer()
+        open(cmd, io; write=true) do proc
+            write(proc, """
+                println(PROGRAM_FILE)
+                println(@__FILE__)
+                foreach(println, ARGS)
+            """)
+        end
+        lines = collect(eachline(seekstart(io)))
+        @test lines[1] == "-"
+        @test lines[2] == "stdin"
+        @test lines[3] == "foo"
+        @test lines[4] == "bar"
+    end
+end
diff --git a/test/compiler/AbstractInterpreter.jl b/test/compiler/AbstractInterpreter.jl
new file mode 100644
index 00000000000000..9d1be42891042f
--- /dev/null
+++ b/test/compiler/AbstractInterpreter.jl
@@ -0,0 +1,111 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+const CC = Core.Compiler
+import Core: MethodInstance, CodeInstance
+import .CC: WorldRange, WorldView
+
+# define new `AbstractInterpreter` that satisfies the minimum interface requirements
+# while managing its cache independently
+macro newinterp(name)
+    cachename = gensym(string(name, "Cache"))
+    name = esc(name)
+    quote
+        struct $cachename
+            dict::IdDict{MethodInstance,CodeInstance}
+        end
+        struct $name <: CC.AbstractInterpreter
+            interp::CC.NativeInterpreter
+            cache::$cachename
+            $name(world = Base.get_world_counter();
+                interp = CC.NativeInterpreter(world),
+                cache = $cachename(IdDict{MethodInstance,CodeInstance}())
+                ) = new(interp, cache)
+        end
+        CC.InferenceParams(interp::$name) = CC.InferenceParams(interp.interp)
+        CC.OptimizationParams(interp::$name) = CC.OptimizationParams(interp.interp)
+        CC.get_world_counter(interp::$name) = CC.get_world_counter(interp.interp)
+        CC.get_inference_cache(interp::$name) = CC.get_inference_cache(interp.interp)
+        CC.code_cache(interp::$name) = WorldView(interp.cache, WorldRange(CC.get_world_counter(interp)))
+        CC.get(wvc::WorldView{<:$cachename}, mi::MethodInstance, default) = get(wvc.cache.dict, mi, default)
+        CC.getindex(wvc::WorldView{<:$cachename}, mi::MethodInstance) = getindex(wvc.cache.dict, mi)
+        CC.haskey(wvc::WorldView{<:$cachename}, mi::MethodInstance) = haskey(wvc.cache.dict, mi)
+        CC.setindex!(wvc::WorldView{<:$cachename}, ci::CodeInstance, mi::MethodInstance) = setindex!(wvc.cache.dict, ci, mi)
+    end
+end
+
+# `OverlayMethodTable`
+# --------------------
+import Base.Experimental: @MethodTable, @overlay
+
+@newinterp MTOverlayInterp
+@MethodTable(OverlayedMT)
+CC.method_table(interp::MTOverlayInterp) = CC.OverlayMethodTable(CC.get_world_counter(interp), OverlayedMT)
+
+strangesin(x) = sin(x)
+@overlay OverlayedMT strangesin(x::Float64) = iszero(x) ? nothing : cos(x)
+
+# inference should use the overlayed method table
+@test Base.return_types((Float64,); interp=MTOverlayInterp()) do x
+    strangesin(x)
+end |> only === Union{Float64,Nothing}
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    Base.@invoke strangesin(x::Float64)
+end |> only === Union{Float64,Nothing}
+
+# effect analysis should figure out that the overlayed method is used
+@test Base.infer_effects((Float64,); interp=MTOverlayInterp()) do x
+    strangesin(x)
+end |> !Core.Compiler.is_nonoverlayed
+@test Base.infer_effects((Any,); interp=MTOverlayInterp()) do x
+    Base.@invoke strangesin(x::Float64)
+end |> !Core.Compiler.is_nonoverlayed
+
+# but it should never apply for the native compilation
+@test Base.infer_effects((Float64,)) do x
+    strangesin(x)
+end |> Core.Compiler.is_nonoverlayed
+@test Base.infer_effects((Any,)) do x
+    Base.@invoke strangesin(x::Float64)
+end |> Core.Compiler.is_nonoverlayed
+
+# fallback to the internal method table
+@test Base.return_types((Int,); interp=MTOverlayInterp()) do x
+    cos(x)
+end |> only === Float64
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    Base.@invoke cos(x::Float64)
+end |> only === Float64
+
+# not fully covered overlay method match
+overlay_match(::Any) = nothing
+@overlay OverlayedMT overlay_match(::Int) = missing
+@test Base.return_types((Any,); interp=MTOverlayInterp()) do x
+    overlay_match(x)
+end |> only === Union{Nothing,Missing}
+
+# partial pure/concrete evaluation
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    isbitstype(Int) ? nothing : missing
+end |> only === Nothing
+Base.@assume_effects :terminates_globally function issue41694(x)
+    res = 1
+    1 < x < 20 || throw("bad")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    issue41694(3) == 6 ? nothing : missing
+end |> only === Nothing
+
+# disable partial pure/concrete evaluation when tainted by any overlayed call
+Base.@assume_effects :total totalcall(f, args...) = f(args...)
+@test Base.return_types(; interp=MTOverlayInterp()) do
+    if totalcall(strangesin, 1.0) == cos(1.0)
+        return nothing
+    else
+        return missing
+    end
+end |> only === Nothing
diff --git a/test/compiler/EscapeAnalysis/EAUtils.jl b/test/compiler/EscapeAnalysis/EAUtils.jl
new file mode 100644
index 00000000000000..3ae9b41a0ddac4
--- /dev/null
+++ b/test/compiler/EscapeAnalysis/EAUtils.jl
@@ -0,0 +1,385 @@
+module EAUtils
+
+export code_escapes, @code_escapes, __clear_cache!
+
+const CC = Core.Compiler
+const EA = CC.EscapeAnalysis
+
+# entries
+# -------
+
+import Base: unwrap_unionall, rewrap_unionall
+import InteractiveUtils: gen_call_with_extracted_types_and_kwargs
+
+"""
+    @code_escapes [options...] f(args...)
+
+Evaluates the arguments to the function call, determines its types, and then calls
+[`code_escapes`](@ref) on the resulting expression.
+As with `@code_typed` and its family, any of `code_escapes` keyword arguments can be given
+as the optional arguments like `@code_escapes optimize=false myfunc(myargs...)`.
+"""
+macro code_escapes(ex0...)
+    return gen_call_with_extracted_types_and_kwargs(__module__, :code_escapes, ex0)
+end
+
+"""
+    code_escapes(f, argtypes=Tuple{}; [debuginfo::Symbol = :none], [optimize::Bool = true]) -> result::EscapeResult
+
+Runs the escape analysis on optimized IR of a generic function call with the given type signature.
+
+# Keyword Arguments
+
+- `optimize::Bool = true`:
+  if `true` returns escape information of post-inlining IR (used for local optimization),
+  otherwise returns escape information of pre-inlining IR (used for interprocedural escape information generation)
+- `debuginfo::Symbol = :none`:
+  controls the amount of code metadata present in the output, possible options are `:none` or `:source`.
+"""
+function code_escapes(@nospecialize(f), @nospecialize(types=Base.default_tt(f));
+                      world::UInt = get_world_counter(),
+                      interp::Core.Compiler.AbstractInterpreter = Core.Compiler.NativeInterpreter(world),
+                      debuginfo::Symbol = :none,
+                      optimize::Bool = true)
+    ft = Core.Typeof(f)
+    if isa(types, Type)
+        u = unwrap_unionall(types)
+        tt = rewrap_unionall(Tuple{ft, u.parameters...}, types)
+    else
+        tt = Tuple{ft, types...}
+    end
+    interp = EscapeAnalyzer(interp, tt, optimize)
+    results = Base.code_typed_by_type(tt; optimize=true, world, interp)
+    isone(length(results)) || throw(ArgumentError("`code_escapes` only supports single analysis result"))
+    return EscapeResult(interp.ir, interp.state, interp.linfo, debuginfo===:source)
+end
+
+# in order to run a whole analysis from ground zero (e.g. for benchmarking, etc.)
+__clear_cache!() = empty!(GLOBAL_CODE_CACHE)
+
+# AbstractInterpreter
+# -------------------
+
+# imports
+import .CC:
+    AbstractInterpreter, NativeInterpreter, WorldView, WorldRange,
+    InferenceParams, OptimizationParams, get_world_counter, get_inference_cache, code_cache,
+    lock_mi_inference, unlock_mi_inference, add_remark!,
+    may_optimize, may_compress, may_discard_trees, verbose_stmt_info
+# usings
+import Core:
+    CodeInstance, MethodInstance, CodeInfo
+import .CC:
+    InferenceResult, OptimizationState, IRCode, copy as cccopy,
+    @timeit, convert_to_ircode, slot2reg, compact!, ssa_inlining_pass!, sroa_pass!,
+    adce_pass!, type_lift_pass!, JLOptions, verify_ir, verify_linetable
+import .EA: analyze_escapes, ArgEscapeCache, EscapeInfo, EscapeState, is_ipo_profitable
+
+# when working outside of Core.Compiler,
+# cache entire escape state for later inspection and debugging
+struct EscapeCache
+    cache::ArgEscapeCache
+    state::EscapeState # preserved just for debugging purpose
+    ir::IRCode         # preserved just for debugging purpose
+end
+
+mutable struct EscapeAnalyzer{State} <: AbstractInterpreter
+    native::NativeInterpreter
+    cache::IdDict{InferenceResult,EscapeCache}
+    entry_tt
+    optimize::Bool
+    ir::IRCode
+    state::State
+    linfo::MethodInstance
+    EscapeAnalyzer(native::NativeInterpreter, @nospecialize(tt), optimize::Bool) =
+        new{EscapeState}(native, IdDict{InferenceResult,EscapeCache}(), tt, optimize)
+end
+
+CC.InferenceParams(interp::EscapeAnalyzer)    = InferenceParams(interp.native)
+CC.OptimizationParams(interp::EscapeAnalyzer) = OptimizationParams(interp.native)
+CC.get_world_counter(interp::EscapeAnalyzer)  = get_world_counter(interp.native)
+
+CC.lock_mi_inference(::EscapeAnalyzer,   ::MethodInstance) = nothing
+CC.unlock_mi_inference(::EscapeAnalyzer, ::MethodInstance) = nothing
+
+CC.add_remark!(interp::EscapeAnalyzer, sv, s) = add_remark!(interp.native, sv, s)
+
+CC.may_optimize(interp::EscapeAnalyzer)      = may_optimize(interp.native)
+CC.may_compress(interp::EscapeAnalyzer)      = may_compress(interp.native)
+CC.may_discard_trees(interp::EscapeAnalyzer) = may_discard_trees(interp.native)
+CC.verbose_stmt_info(interp::EscapeAnalyzer) = verbose_stmt_info(interp.native)
+
+CC.get_inference_cache(interp::EscapeAnalyzer) = get_inference_cache(interp.native)
+
+const GLOBAL_CODE_CACHE = IdDict{MethodInstance,CodeInstance}()
+
+function CC.code_cache(interp::EscapeAnalyzer)
+    worlds = WorldRange(get_world_counter(interp))
+    return WorldView(GlobalCache(), worlds)
+end
+
+struct GlobalCache end
+
+CC.haskey(wvc::WorldView{GlobalCache}, mi::MethodInstance) = haskey(GLOBAL_CODE_CACHE, mi)
+
+CC.get(wvc::WorldView{GlobalCache}, mi::MethodInstance, default) = get(GLOBAL_CODE_CACHE, mi, default)
+
+CC.getindex(wvc::WorldView{GlobalCache}, mi::MethodInstance) = getindex(GLOBAL_CODE_CACHE, mi)
+
+function CC.setindex!(wvc::WorldView{GlobalCache}, ci::CodeInstance, mi::MethodInstance)
+    GLOBAL_CODE_CACHE[mi] = ci
+    add_callback!(mi) # register the callback on invalidation
+    return nothing
+end
+
+function add_callback!(linfo)
+    if !isdefined(linfo, :callbacks)
+        linfo.callbacks = Any[invalidate_cache!]
+    else
+        if !any(@nospecialize(cb)->cb===invalidate_cache!, linfo.callbacks)
+            push!(linfo.callbacks, invalidate_cache!)
+        end
+    end
+    return nothing
+end
+
+function invalidate_cache!(replaced, max_world, depth = 0)
+    delete!(GLOBAL_CODE_CACHE, replaced)
+
+    if isdefined(replaced, :backedges)
+        for mi in replaced.backedges
+            mi = mi::MethodInstance
+            if !haskey(GLOBAL_CODE_CACHE, mi)
+                continue # otherwise fall into infinite loop
+            end
+            invalidate_cache!(mi, max_world, depth+1)
+        end
+    end
+    return nothing
+end
+
+function CC.optimize(interp::EscapeAnalyzer,
+    opt::OptimizationState, params::OptimizationParams, caller::InferenceResult)
+    ir = run_passes_with_ea(interp, opt.src, opt, caller)
+    return CC.finish(interp, opt, params, ir, caller)
+end
+
+function CC.cache_result!(interp::EscapeAnalyzer, caller::InferenceResult)
+    if haskey(interp.cache, caller)
+        GLOBAL_ESCAPE_CACHE[caller.linfo] = interp.cache[caller]
+    end
+    return Base.@invoke CC.cache_result!(interp::AbstractInterpreter, caller::InferenceResult)
+end
+
+const GLOBAL_ESCAPE_CACHE = IdDict{MethodInstance,EscapeCache}()
+
+"""
+    cache_escapes!(caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
+
+Transforms escape information of call arguments of `caller`,
+and then caches it into a global cache for later interprocedural propagation.
+"""
+function cache_escapes!(interp::EscapeAnalyzer,
+    caller::InferenceResult, estate::EscapeState, cacheir::IRCode)
+    cache = ArgEscapeCache(estate)
+    ecache = EscapeCache(cache, estate, cacheir)
+    interp.cache[caller] = ecache
+    return cache
+end
+
+function get_escape_cache(interp::EscapeAnalyzer)
+    return function (linfo::Union{InferenceResult,MethodInstance})
+        if isa(linfo, InferenceResult)
+            ecache = get(interp.cache, linfo, nothing)
+        else
+            ecache = get(GLOBAL_ESCAPE_CACHE, linfo, nothing)
+        end
+        return ecache !== nothing ? ecache.cache : nothing
+    end
+end
+
+function run_passes_with_ea(interp::EscapeAnalyzer, ci::CodeInfo, sv::OptimizationState,
+    caller::InferenceResult)
+    @timeit "convert"   ir = convert_to_ircode(ci, sv)
+    @timeit "slot2reg"  ir = slot2reg(ir, ci, sv)
+    # TODO: Domsorting can produce an updated domtree - no need to recompute here
+    @timeit "compact 1" ir = compact!(ir)
+    nargs = let def = sv.linfo.def; isa(def, Method) ? Int(def.nargs) : 0; end
+    local state
+    if is_ipo_profitable(ir, nargs) || caller.linfo.specTypes === interp.entry_tt
+        try
+            @timeit "[IPO EA]" begin
+                state = analyze_escapes(ir, nargs, false, get_escape_cache(interp))
+                cache_escapes!(interp, caller, state, cccopy(ir))
+            end
+        catch err
+            @error "error happened within [IPO EA], insepct `Main.ir` and `Main.nargs`"
+            @eval Main (ir = $ir; nargs = $nargs)
+            rethrow(err)
+        end
+    end
+    if caller.linfo.specTypes === interp.entry_tt && !interp.optimize
+        # return back the result
+        interp.ir = cccopy(ir)
+        interp.state = state
+        interp.linfo = sv.linfo
+    end
+    @timeit "Inlining"  ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds)
+    # @timeit "verify 2" verify_ir(ir)
+    @timeit "compact 2" ir = compact!(ir)
+    if caller.linfo.specTypes === interp.entry_tt && interp.optimize
+        try
+            @timeit "[Local EA]" state = analyze_escapes(ir, nargs, true, get_escape_cache(interp))
+        catch err
+            @error "error happened within [Local EA], insepct `Main.ir` and `Main.nargs`"
+            @eval Main (ir = $ir; nargs = $nargs)
+            rethrow(err)
+        end
+        # return back the result
+        interp.ir = cccopy(ir)
+        interp.state = state
+        interp.linfo = sv.linfo
+    end
+    @timeit "SROA"      ir = sroa_pass!(ir)
+    @timeit "ADCE"      ir = adce_pass!(ir)
+    @timeit "type lift" ir = type_lift_pass!(ir)
+    @timeit "compact 3" ir = compact!(ir)
+    if JLOptions().debug_level == 2
+        @timeit "verify 3" (verify_ir(ir); verify_linetable(ir.linetable))
+    end
+    return ir
+end
+
+# printing
+# --------
+
+import Core: Argument, SSAValue
+import .CC: widenconst, singleton_type
+
+Base.getindex(estate::EscapeState, @nospecialize(x)) = CC.getindex(estate, x)
+
+function get_name_color(x::EscapeInfo, symbol::Bool = false)
+    getname(x) = string(nameof(x))
+    if x === EA.⊥
+        name, color = (getname(EA.NotAnalyzed), "◌"), :plain
+    elseif EA.has_no_escape(EA.ignore_argescape(x))
+        if EA.has_arg_escape(x)
+            name, color = (getname(EA.ArgEscape), "✓"), :cyan
+        else
+            name, color = (getname(EA.NoEscape), "✓"), :green
+        end
+    elseif EA.has_all_escape(x)
+        name, color = (getname(EA.AllEscape), "X"), :red
+    elseif EA.has_return_escape(x)
+        name = (getname(EA.ReturnEscape), "↑")
+        color = EA.has_thrown_escape(x) ? :yellow : :blue
+    else
+        name = (nothing, "*")
+        color = EA.has_thrown_escape(x) ? :yellow : :bold
+    end
+    name = symbol ? last(name) : first(name)
+    if name !== nothing && !isa(x.AliasInfo, Bool)
+        name = string(name, "′")
+    end
+    return name, color
+end
+
+# pcs = sprint(show, collect(x.EscapeSites); context=:limit=>true)
+function Base.show(io::IO, x::EscapeInfo)
+    name, color = get_name_color(x)
+    if isnothing(name)
+        Base.@invoke show(io::IO, x::Any)
+    else
+        printstyled(io, name; color)
+    end
+end
+function Base.show(io::IO, ::MIME"application/prs.juno.inline", x::EscapeInfo)
+    name, color = get_name_color(x)
+    if isnothing(name)
+        return x # use fancy tree-view
+    else
+        printstyled(io, name; color)
+    end
+end
+
+struct EscapeResult
+    ir::IRCode
+    state::EscapeState
+    linfo::Union{Nothing,MethodInstance}
+    source::Bool
+    function EscapeResult(ir::IRCode, state::EscapeState,
+        linfo::Union{Nothing,MethodInstance} = nothing,
+        source::Bool=false)
+        return new(ir, state, linfo, source)
+    end
+end
+Base.show(io::IO, result::EscapeResult) = print_with_info(io, result)
+@eval Base.iterate(res::EscapeResult, state=1) =
+    return state > $(fieldcount(EscapeResult)) ? nothing : (getfield(res, state), state+1)
+
+Base.show(io::IO, cached::EscapeCache) = show(io, EscapeResult(cached.ir, cached.state, nothing))
+
+# adapted from https://github.com/JuliaDebug/LoweredCodeUtils.jl/blob/4612349432447e868cf9285f647108f43bd0a11c/src/codeedges.jl#L881-L897
+function print_with_info(io::IO, (; ir, state, linfo, source)::EscapeResult)
+    # print escape information on SSA values
+    function preprint(io::IO)
+        ft = ir.argtypes[1]
+        f = singleton_type(ft)
+        if f === nothing
+            f = widenconst(ft)
+        end
+        print(io, f, '(')
+        for i in 1:state.nargs
+            arg = state[Argument(i)]
+            i == 1 && continue
+            c, color = get_name_color(arg, true)
+            printstyled(io, c, ' ', '_', i, "::", ir.argtypes[i]; color)
+            i ≠ state.nargs && print(io, ", ")
+        end
+        print(io, ')')
+        if !isnothing(linfo)
+            def = linfo.def
+            printstyled(io, " in ", (isa(def, Module) ? (def,) : (def.module, " at ", def.file, ':', def.line))...; color=:bold)
+        end
+        println(io)
+    end
+
+    # print escape information on SSA values
+    # nd = ndigits(length(ssavalues))
+    function preprint(io::IO, idx::Int)
+        c, color = get_name_color(state[SSAValue(idx)], true)
+        # printstyled(io, lpad(idx, nd), ' ', c, ' '; color)
+        printstyled(io, rpad(c, 2), ' '; color)
+    end
+
+    print_with_info(preprint, (args...)->nothing, io, ir, source)
+end
+
+function print_with_info(preprint, postprint, io::IO, ir::IRCode, source::Bool)
+    io = IOContext(io, :displaysize=>displaysize(io))
+    used = Base.IRShow.stmts_used(io, ir)
+    if source
+        line_info_preprinter = function (io::IO, indent::String, idx::Int)
+            r = Base.IRShow.inline_linfo_printer(ir)(io, indent, idx)
+            idx ≠ 0 && preprint(io, idx)
+            return r
+        end
+    else
+        line_info_preprinter = Base.IRShow.lineinfo_disabled
+    end
+    line_info_postprinter = Base.IRShow.default_expr_type_printer
+    preprint(io)
+    bb_idx_prev = bb_idx = 1
+    for idx = 1:length(ir.stmts)
+        preprint(io, idx)
+        bb_idx = Base.IRShow.show_ir_stmt(io, ir, idx, line_info_preprinter, line_info_postprinter, used, ir.cfg, bb_idx)
+        postprint(io, idx, bb_idx != bb_idx_prev)
+        bb_idx_prev = bb_idx
+    end
+    max_bb_idx_size = ndigits(length(ir.cfg.blocks))
+    line_info_preprinter(io, " "^(max_bb_idx_size + 2), 0)
+    postprint(io)
+    return nothing
+end
+
+end # module EAUtils
diff --git a/test/compiler/EscapeAnalysis/interprocedural.jl b/test/compiler/EscapeAnalysis/interprocedural.jl
new file mode 100644
index 00000000000000..42a2505e03c087
--- /dev/null
+++ b/test/compiler/EscapeAnalysis/interprocedural.jl
@@ -0,0 +1,262 @@
+# IPO EA Test
+# ===========
+# EA works on pre-inlining IR
+
+include(normpath(@__DIR__, "setup.jl"))
+
+# callsites
+# ---------
+
+noescape(a) = nothing
+noescape(a, b) = nothing
+function global_escape!(x)
+    GR[] = x
+    return nothing
+end
+union_escape!(x) = global_escape!(x)
+union_escape!(x::SafeRef) = nothing
+union_escape!(x::SafeRefs) = nothing
+Base.@constprop :aggressive function conditional_escape!(cnd, x)
+    cnd && global_escape!(x)
+    return nothing
+end
+
+# MethodMatchInfo -- global cache
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        return noescape(x)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        identity(x)
+        return nothing
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        return identity(x)
+    end
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        return Ref(x)
+    end
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        r = Ref{SafeRef{String}}()
+        r[] = x
+        return r
+    end
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        global_escape!(x)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+# UnionSplitInfo
+let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
+        x = c ? s : SafeRef(s)
+        union_escape!(x)
+    end
+    @test has_all_escape(result.state[Argument(3)]) # s
+end
+let result = code_escapes((Bool,Vector{Any}); optimize=false) do c, s
+        x = c ? SafeRef(s) : SafeRefs(s, s)
+        union_escape!(x)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+# ConstCallInfo -- local cache
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        return conditional_escape!(false, x)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+# InvokeCallInfo
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        return Base.@invoke noescape(x::Any)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        return Base.@invoke conditional_escape!(false::Any, x::Any)
+    end
+    @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+end
+
+# MethodError
+# -----------
+# accounts for ThrownEscape via potential MethodError
+
+# no method error
+identity_if_string(x::SafeRef) = nothing
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        identity_if_string(x)
+    end
+    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test !has_thrown_escape(result.state[Argument(2)], i)
+    @test !has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
+        identity_if_string(x)
+    end
+    i = only(findall(iscall((result.ir, identity_if_string)), result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_thrown_escape(result.state[Argument(2)], i)
+    @test !has_return_escape(result.state[Argument(2)], r)
+end
+let result = code_escapes((SafeRef{String},); optimize=false) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test !has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Union{SafeRef{String},Vector{String}},); optimize=false) do x
+        try
+            identity_if_string(x)
+        catch err
+            global GV = err
+        end
+        return nothing
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+# method ambiguity error
+ambig_error_test(a::SafeRef, b) = nothing
+ambig_error_test(a, b::SafeRef) = nothing
+ambig_error_test(a, b) = nothing
+let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
+        ambig_error_test(x, y)
+    end
+    i = only(findall(iscall((result.ir, ambig_error_test)), result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_thrown_escape(result.state[Argument(2)], i)  # x
+    @test has_thrown_escape(result.state[Argument(3)], i)  # y
+    @test !has_return_escape(result.state[Argument(2)], r)  # x
+    @test !has_return_escape(result.state[Argument(3)], r)  # y
+end
+let result = code_escapes((SafeRef{String},Any); optimize=false) do x, y
+        try
+            ambig_error_test(x, y)
+        catch err
+            global GV = err
+        end
+    end
+    @test has_all_escape(result.state[Argument(2)])  # x
+    @test has_all_escape(result.state[Argument(3)])  # y
+end
+
+# Local EA integration
+# --------------------
+
+# propagate escapes imposed on call arguments
+
+# FIXME handle _apply_iterate
+# FIXME currently we can't prove the effect-freeness of `getfield(RefValue{String}, :x)`
+# because of this check https://github.com/JuliaLang/julia/blob/94b9d66b10e8e3ebdb268e4be5f7e1f43079ad4e/base/compiler/tfuncs.jl#L745
+# and thus it leads to the following two broken tests
+
+@noinline broadcast_noescape1(a) = (broadcast(identity, a); nothing)
+let result = code_escapes() do
+        broadcast_noescape1(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    @test_broken !has_return_escape(result.state[SSAValue(i)])
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
+end
+@noinline broadcast_noescape2(b) = broadcast(identity, b)
+let result = code_escapes() do
+        broadcast_noescape2(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    @test_broken !has_return_escape(result.state[SSAValue(i)])
+    @test_broken !has_thrown_escape(result.state[SSAValue(i)])
+end
+@noinline allescape_argument(a) = (global GV = a) # obvious escape
+let result = code_escapes() do
+        allescape_argument(Ref("Hi"))
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    @test has_all_escape(result.state[SSAValue(i)])
+end
+# if we can't determine the matching method statically, we should be conservative
+let result = code_escapes((Ref{Any},)) do a
+        may_exist(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+let result = code_escapes((Ref{Any},)) do a
+        Base.@invokelatest broadcast_noescape1(a)
+    end
+    @test has_all_escape(result.state[Argument(2)])
+end
+
+# handling of simple union-split (just exploit the inliner's effort)
+@noinline unionsplit_noescape(a)      = string(nothing)
+@noinline unionsplit_noescape(a::Int) = a + 10
+let result = code_escapes((Union{Int,Nothing},)) do x
+        s = SafeRef{Union{Int,Nothing}}(x)
+        unionsplit_noescape(s[])
+        return nothing
+    end
+    inds = findall(isnew, result.ir.stmts.inst) # find allocation statement
+    @assert !isempty(inds)
+    for i in inds
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+end
+
+@noinline function unused_argument(a)
+    println("prevent inlining")
+    return Base.inferencebarrier(nothing)
+end
+let result = code_escapes() do
+        a = Ref("foo") # shouldn't be "return escape"
+        b = unused_argument(a)
+        nothing
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test !has_return_escape(result.state[SSAValue(i)], r)
+
+    result = code_escapes() do
+        a = Ref("foo") # still should be "return escape"
+        b = unused_argument(a)
+        return a
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+
+# should propagate escape information imposed on return value to the aliased call argument
+@noinline returnescape_argument(a) = (println("prevent inlining"); a)
+let result = code_escapes() do
+        obj = Ref("foo")           # should be "return escape"
+        ret = returnescape_argument(obj)
+        return ret                 # alias of `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+end
+@noinline noreturnescape_argument(a) = (println("prevent inlining"); identity("hi"))
+let result = code_escapes() do
+        obj = Ref("foo")              # better to not be "return escape"
+        ret = noreturnescape_argument(obj)
+        return ret                    # must not alias to `obj`
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test !has_return_escape(result.state[SSAValue(i)], r)
+end
diff --git a/test/compiler/EscapeAnalysis/local.jl b/test/compiler/EscapeAnalysis/local.jl
new file mode 100644
index 00000000000000..e5d8f1bf2c9407
--- /dev/null
+++ b/test/compiler/EscapeAnalysis/local.jl
@@ -0,0 +1,2206 @@
+# Local EA Test
+# =============
+# EA works on post-inlining IR
+
+include(normpath(@__DIR__, "setup.jl"))
+
+@testset "basics" begin
+    let # arg return
+        result = code_escapes((Any,)) do a # return to caller
+            return nothing
+        end
+        @test has_arg_escape(result.state[Argument(2)])
+        # return
+        result = code_escapes((Any,)) do a
+            return a
+        end
+        i = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_arg_escape(result.state[Argument(1)]) # self
+        @test !has_return_escape(result.state[Argument(1)], i) # self
+        @test has_arg_escape(result.state[Argument(2)]) # a
+        @test has_return_escape(result.state[Argument(2)], i) # a
+    end
+    let # global store
+        result = code_escapes((Any,)) do a
+            global GV = a
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let # global load
+        result = code_escapes() do
+            global GV
+            return GV
+        end
+        i = only(findall(has_return_escape, map(i->result.state[SSAValue(i)], 1:length(result.ir.stmts))))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # global store / load (https://github.com/aviatesk/EscapeAnalysis.jl/issues/56)
+        result = code_escapes((Any,)) do s
+            global GV
+            GV = s
+            return GV
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+    end
+    let # :gc_preserve_begin / :gc_preserve_end
+        result = code_escapes((String,)) do s
+            m = SafeRef(s)
+            GC.@preserve m begin
+                return nothing
+            end
+        end
+        i = findfirst(isT(SafeRef{String}), result.ir.stmts.type) # find allocation statement
+        @test !isnothing(i)
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+    let # :isdefined
+        result = code_escapes((String, Bool, )) do a, b
+            if b
+                s = Ref(a)
+            end
+            return @isdefined(s)
+        end
+        i = findfirst(isT(Base.RefValue{String}), result.ir.stmts.type) # find allocation statement
+        @test !isnothing(i)
+        @test has_no_escape(result.state[SSAValue(i)])
+    end
+    let # ϕ-node
+        result = code_escapes((Bool,Any,Any)) do cond, a, b
+            c = cond ? a : b # ϕ(a, b)
+            return c
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PhiNode), result.ir.stmts.inst)
+        i = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(3)], i) # a
+        @test has_return_escape(result.state[Argument(4)], i) # b
+    end
+    let # π-node
+        result = code_escapes((Any,)) do a
+            if isa(a, Regex) # a::π(Regex)
+                return a
+            end
+            return nothing
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PiNode), result.ir.stmts.inst)
+        @test any(findall(isreturn, result.ir.stmts.inst)) do i
+            has_return_escape(result.state[Argument(2)], i)
+        end
+    end
+    let # φᶜ-node / ϒ-node
+        result = code_escapes((Any,String)) do a, b
+            local x::String
+            try
+                x = a
+            catch err
+                x = b
+            end
+            return x
+        end
+        @assert any(@nospecialize(x)->isa(x, Core.PhiCNode), result.ir.stmts.inst)
+        @assert any(@nospecialize(x)->isa(x, Core.UpsilonNode), result.ir.stmts.inst)
+        i = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], i)
+        @test has_return_escape(result.state[Argument(3)], i)
+    end
+    let # branching
+        result = code_escapes((Any,Bool,)) do a, c
+            if c
+                return nothing # a doesn't escape in this branch
+            else
+                return a # a escapes to a caller
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let # loop
+        result = code_escapes((Int,)) do n
+            c = SafeRef{Bool}(false)
+            while n > 0
+                rand(Bool) && return c
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)])
+    end
+    let # try/catch
+        result = code_escapes((Any,)) do a
+            try
+                nothing
+            catch err
+                return a # return escape
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            try
+                nothing
+            finally
+                return a # return escape
+            end
+        end
+        @test has_return_escape(result.state[Argument(2)])
+    end
+    let # :foreigncall
+        result = code_escapes((Any,)) do x
+            ccall(:some_ccall, Any, (Any,), x)
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+end
+
+let # simple allocation
+    result = code_escapes((Bool,)) do c
+        mm = SafeRef{Bool}(c) # just allocated, never escapes
+        return mm[] ? nothing : 1
+    end
+    i = only(findall(isnew, result.ir.stmts.inst))
+    @test has_no_escape(result.state[SSAValue(i)])
+end
+
+@testset "builtins" begin
+    let # throw
+        r = code_escapes((Any,)) do a
+            throw(a)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+    end
+
+    let # implicit throws
+        r = code_escapes((Any,)) do a
+            getfield(a, :may_not_field)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+
+        r = code_escapes((Any,)) do a
+            sizeof(a)
+        end
+        @test has_thrown_escape(r.state[Argument(2)])
+    end
+
+    let # :===
+        result = code_escapes((Bool, SafeRef{String})) do cond, s
+            m = cond ? s : nothing
+            c = m === nothing
+            return c
+        end
+        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+    end
+
+    let # sizeof
+        result = code_escapes((Vector{Any},)) do xs
+            sizeof(xs)
+        end
+        @test has_no_escape(ignore_argescape(result.state[Argument(2)]))
+    end
+
+    let # ifelse
+        result = code_escapes((Bool,)) do c
+            r = ifelse(c, Ref("yes"), Ref("no"))
+            return r
+        end
+        inds = findall(isnew, result.ir.stmts.inst)
+        @assert !isempty(inds)
+        for i in inds
+            @test has_return_escape(result.state[SSAValue(i)])
+        end
+    end
+    let # ifelse (with constant condition)
+        result = code_escapes() do
+            r = ifelse(true, Ref("yes"), Ref(nothing))
+            return r
+        end
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{String})(result.ir.stmts.type[i])
+                @test has_return_escape(result.state[SSAValue(i)])
+            elseif isnew(result.ir.stmts.inst[i]) && isT(Base.RefValue{Nothing})(result.ir.stmts.type[i])
+                @test has_no_escape(result.state[SSAValue(i)])
+            end
+        end
+    end
+
+    let # typeassert
+        result = code_escapes((Any,)) do x
+            y = x::String
+            return y
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test !has_all_escape(result.state[Argument(2)])
+    end
+
+    let # isdefined
+        result = code_escapes((Any,)) do x
+            isdefined(x, :foo) ? x : throw("undefined")
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test !has_all_escape(result.state[Argument(2)])
+
+        result = code_escapes((Module,)) do m
+            isdefined(m, 10) # throws
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+end
+
+@testset "flow-sensitivity" begin
+    # ReturnEscape
+    let result = code_escapes((Bool,)) do cond
+            r = Ref("foo")
+            if cond
+                return cond
+            end
+            return r
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        rts = findall(isreturn, result.ir.stmts.inst)
+        @assert length(rts) == 2
+        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 1
+    end
+    let result = code_escapes((Bool,)) do cond
+            r = Ref("foo")
+            cnt = 0
+            while rand(Bool)
+                cnt += 1
+                rand(Bool) && return r
+            end
+            rand(Bool) && return r
+            return cnt
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        rts = findall(isreturn, result.ir.stmts.inst) # return statement
+        @assert length(rts) == 3
+        @test count(rt->has_return_escape(result.state[SSAValue(i)], rt), rts) == 2
+    end
+end
+
+@testset "escape through exceptions" begin
+    M = @eval Module() begin
+        unsafeget(x) = isassigned(x) ? x[] : throw(x)
+        @noinline function escape_rethrow!()
+            try
+                rethrow()
+            catch err
+                GR[] = err
+            end
+        end
+        @noinline function escape_current_exceptions!()
+            excs = Base.current_exceptions()
+            GR[] = excs
+        end
+        const GR = Ref{Any}()
+        @__MODULE__
+    end
+
+    let # simple: return escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err
+                ret = err
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)])
+    end
+
+    let # simple: global escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret # prevent DCE
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err
+                global GV = err
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+
+    let # account for possible escapes via nested throws
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            try
+                try
+                    unsafeget(r)
+                catch err1
+                    throw(err1)
+                end
+            catch err2
+                GR[] = err2
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            try
+                try
+                    unsafeget(r)
+                catch err1
+                    rethrow(err1)
+                end
+            catch err2
+                GR[] = err2
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                escape_rethrow!()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `rethrow`
+        result = @eval M $code_escapes() do
+            local t
+            try
+                r = Ref{String}()
+                t = unsafeget(r)
+            catch err
+                t = typeof(err)
+                escape_rethrow!()
+            end
+            return t
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `Base.current_exceptions`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                GR[] = Base.current_exceptions()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+    let # account for possible escapes via `Base.current_exceptions`
+        result = @eval M $code_escapes() do
+            try
+                r = Ref{String}()
+                unsafeget(r)
+            catch
+                escape_current_exceptions!()
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+    end
+
+    let # contextual: escape information imposed on `err` shouldn't propagate to `r2`, but only to `r1`
+        result = @eval M $code_escapes() do
+            r1 = Ref{String}()
+            r2 = Ref{String}()
+            local ret
+            try
+                s1 = unsafeget(r1)
+                ret = sizeof(s1)
+            catch err
+                global GV = err
+            end
+            s2 = unsafeget(r2)
+            return s2, r2
+        end
+        is = findall(isnew, result.ir.stmts.inst)
+        @test length(is) == 2
+        i1, i2 = is
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test !has_all_escape(result.state[SSAValue(i2)])
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+    end
+
+    # XXX test cases below are currently broken because of the technical reason described in `escape_exception!`
+
+    let # limited propagation: exception is caught within a frame => doesn't escape to a caller
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch
+                ret = nothing
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
+    end
+    let # sequential: escape information imposed on `err1` and `err2 should propagate separately
+        result = @eval M $code_escapes() do
+            r1 = Ref{String}()
+            r2 = Ref{String}()
+            local ret
+            try
+                s1 = unsafeget(r1)
+                ret = sizeof(s1)
+            catch err1
+                global GV = err1
+            end
+            try
+                s2 = unsafeget(r2)
+                ret = sizeof(s2)
+            catch err2
+                ret = err2
+            end
+            return ret
+        end
+        is = findall(isnew, result.ir.stmts.inst)
+        @test length(is) == 2
+        i1, i2 = is
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+        @test_broken !has_all_escape(result.state[SSAValue(i2)])
+    end
+    let # nested: escape information imposed on `inner` shouldn't propagate to `s`
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                try
+                    ret = sizeof(s)
+                catch inner
+                    return inner
+                end
+            catch outer
+                ret = nothing
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test_broken !has_return_escape(result.state[SSAValue(i)])
+    end
+    let # merge: escape information imposed on `err1` and `err2 should be merged
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err1
+                return err1
+            end
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            catch err2
+                return err2
+            end
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        rs = findall(isreturn, result.ir.stmts.inst)
+        @test_broken !has_all_escape(result.state[SSAValue(i)])
+        for r in rs
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let # no exception handling: should keep propagating the escape
+        result = @eval M $code_escapes() do
+            r = Ref{String}()
+            local ret
+            try
+                s = unsafeget(r)
+                ret = sizeof(s)
+            finally
+                if !@isdefined(ret)
+                    ret = 42
+                end
+            end
+            return ret
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
+    end
+end
+
+@testset "field analysis / alias analysis" begin
+    # escaped allocations
+    # -------------------
+
+    # escaped object should escape its fields as well
+    let result = code_escapes((Any,)) do a
+            global GV = SafeRef{Any}(a)
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            global GV = (a,)
+            nothing
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            o0 = SafeRef{Any}(a)
+            global GV = SafeRef(o0)
+            nothing
+        end
+        is = findall(isnew, result.ir.stmts.inst)
+        @test length(is) == 2
+        i0, i1 = is
+        @test has_all_escape(result.state[SSAValue(i0)])
+        @test has_all_escape(result.state[SSAValue(i1)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do a
+            t0 = (a,)
+            global GV = (t0,)
+            nothing
+        end
+        inds = findall(iscall((result.ir, tuple)), result.ir.stmts.inst)
+        @assert length(inds) == 2
+        for i in inds; @test has_all_escape(result.state[SSAValue(i)]); end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    # global escape through `setfield!`
+    let result = code_escapes((Any,)) do a
+            r = SafeRef{Any}(:init)
+            global GV = r
+            r[] = a
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(a)
+            global GV = r
+            r[] = b
+            nothing
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test has_all_escape(result.state[SSAValue(i)])
+        @test has_all_escape(result.state[Argument(2)]) # a
+        @test has_all_escape(result.state[Argument(3)]) # b
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef{String}("Rx")
+            $code_escapes((String,)) do s
+                Rx[] = s
+                Core.sizeof(Rx[])
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef{String}("Rx")
+            $code_escapes((String,)) do s
+                setfield!(Rx, :x, s)
+                Core.sizeof(Rx[])
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    let M = EATModule()
+        @eval M module ___xxx___
+            import ..SafeRef
+            const Rx = SafeRef("Rx")
+        end
+        result = @eval M begin
+            $code_escapes((String,)) do s
+                rx = getfield(___xxx___, :Rx)
+                rx[] = s
+                nothing
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+
+    # field escape
+    # ------------
+
+    # field escape should propagate to :new arguments
+    let result = code_escapes((String,)) do a
+            o = SafeRef(a)
+            f = o[]
+            return f
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((String,)) do a
+            t = SafeRef((a,))
+            f = t[][1]
+            return f
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        result.state[SSAValue(i)].AliasInfo
+    end
+    let result = code_escapes((String, String)) do a, b
+            obj = SafeRefs(a, b)
+            fld1 = obj[1]
+            fld2 = obj[2]
+            return (fld1, fld2)
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # field escape should propagate to `setfield!` argument
+    let result = code_escapes((String,)) do a
+            o = SafeRef("foo")
+            o[] = a
+            f = o[]
+            return f
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # propagate escape information imposed on return value of `setfield!` call
+    let result = code_escapes((String,)) do a
+            obj = SafeRef("foo")
+            return (obj[] = a)
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # nested allocations
+    let result = code_escapes((String,)) do a
+            o1 = SafeRef(a)
+            o2 = SafeRef(o1)
+            return o2[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.inst[i]) && isT(SafeRef{String})(result.ir.stmts.type[i])
+                @test has_return_escape(result.state[SSAValue(i)], r)
+            elseif isnew(result.ir.stmts.inst[i]) && isT(SafeRef{SafeRef{String}})(result.ir.stmts.type[i])
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((String,)) do a
+            o1 = (a,)
+            o2 = (o1,)
+            return o2[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in 1:length(result.ir.stmts)
+            if isnew(result.ir.stmts.inst[i]) && isT(Tuple{String})(result.ir.stmts.type[i])
+                @test has_return_escape(result.state[SSAValue(i)], r)
+            elseif isnew(result.ir.stmts.inst[i]) && isT(Tuple{Tuple{String}})(result.ir.stmts.type[i])
+                @test is_load_forwardable(result.state[SSAValue(i)])
+            end
+        end
+    end
+    let result = code_escapes((String,)) do a
+            o1  = SafeRef(a)
+            o2  = SafeRef(o1)
+            o1′ = o2[]
+            a′  = o1′[]
+            return a′
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes() do
+            o1 = SafeRef("foo")
+            o2 = SafeRef(o1)
+            return o2
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = code_escapes() do
+            o1   = SafeRef("foo")
+            o2′  = SafeRef(nothing)
+            o2   = SafeRef{SafeRef}(o2′)
+            o2[] = o1
+            return o2
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        findall(1:length(result.ir.stmts)) do i
+            if isnew(result.ir.stmts[i][:inst])
+                t = result.ir.stmts[i][:type]
+                return t === SafeRef{String}  || # o1
+                       t === SafeRef{SafeRef}    # o2
+            end
+            return false
+        end |> x->foreach(x) do i
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = code_escapes((String,)) do x
+            broadcast(identity, Ref(x))
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # ϕ-node allocations
+    let result = code_escapes((Bool,Any,Any)) do cond, x, y
+            if cond
+                ϕ = SafeRef{Any}(x)
+            else
+                ϕ = SafeRef{Any}(y)
+            end
+            return ϕ[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test has_return_escape(result.state[Argument(4)], r) # y
+        i = only(findall(isϕ, result.ir.stmts.inst))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes((Bool,Any,Any)) do cond, x, y
+            if cond
+                ϕ2 = ϕ1 = SafeRef{Any}(x)
+            else
+                ϕ2 = ϕ1 = SafeRef{Any}(y)
+            end
+            return ϕ1[], ϕ2[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test has_return_escape(result.state[Argument(4)], r) # y
+        for i in findall(isϕ, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    # when ϕ-node merges values with different types
+    let result = code_escapes((Bool,String,String,String)) do cond, x, y, z
+            local out
+            if cond
+                ϕ = SafeRef(x)
+                out = ϕ[]
+            else
+                ϕ = SafeRefs(z, y)
+            end
+            return @isdefined(out) ? out : throw(ϕ)
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
+        ϕ = only(findall(isT(Union{SafeRef{String},SafeRefs{String,String}}), result.ir.stmts.type))
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test !has_return_escape(result.state[Argument(4)], r) # y
+        @test has_return_escape(result.state[Argument(5)], r) # z
+        @test has_thrown_escape(result.state[SSAValue(ϕ)], t)
+    end
+
+    # alias analysis
+    # --------------
+
+    # alias via getfield & Expr(:new)
+    let result = code_escapes((String,)) do s
+            r = SafeRef(s)
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        @test !isaliased(Argument(2), SSAValue(i), result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = SafeRef(s)
+            r2 = SafeRef(r1)
+            return r2[]
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
+        @test isaliased(SSAValue(i1), val, result.state)
+        @test !isaliased(SSAValue(i2), val, result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = SafeRef(s)
+            r2 = SafeRef(r1)
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef("Rx")
+            $code_escapes((String,)) do s
+                r = SafeRef(Rx)
+                rx = r[] # rx aliased to Rx
+                rx[] = s
+                nothing
+            end
+        end
+        i = findfirst(isnew, result.ir.stmts.inst)
+        @test has_all_escape(result.state[Argument(2)])
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # alias via getfield & setfield!
+    let result = code_escapes((String,)) do s
+            r = Ref{String}()
+            r[] = s
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        @test !isaliased(Argument(2), SSAValue(i), result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = Ref(s)
+            r2 = Ref{Base.RefValue{String}}()
+            r2[] = r1
+            return r2[]
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test !isaliased(SSAValue(i1), SSAValue(i2), result.state)
+        @test isaliased(SSAValue(i1), val, result.state)
+        @test !isaliased(SSAValue(i2), val, result.state)
+    end
+    let result = code_escapes((String,)) do s
+            r1 = Ref{String}()
+            r2 = Ref{Base.RefValue{String}}()
+            r2[] = r1
+            r1[] = s
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+        result = code_escapes((String,)) do s
+            r1 = Ref{String}()
+            r2 = Ref{Base.RefValue{String}}()
+            r1[] = s
+            r2[] = r1
+            return r2[][]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test isaliased(Argument(2), val, result.state)
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test !isaliased(SSAValue(i), val, result.state)
+        end
+    end
+    let result = @eval EATModule() begin
+            const Rx = SafeRef("Rx")
+            $code_escapes((SafeRef{String}, String,)) do _rx, s
+                r = SafeRef(_rx)
+                r[] = Rx
+                rx = r[] # rx aliased to Rx
+                rx[] = s
+                nothing
+            end
+        end
+        i = findfirst(isnew, result.ir.stmts.inst)
+        @test has_all_escape(result.state[Argument(3)])
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    # alias via typeassert
+    let result = code_escapes((Any,)) do a
+            r = a::String
+            return r
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test isaliased(Argument(2), val, result.state)       # a <-> r
+    end
+    let result = code_escapes((Any,)) do a
+            global GV
+            (g::SafeRef{Any})[] = a
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)])
+    end
+    # alias via ifelse
+    let result = code_escapes((Bool,Any,Any)) do c, a, b
+            r = ifelse(c, a, b)
+            return r
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(3)], r) # a
+        @test has_return_escape(result.state[Argument(4)], r) # b
+        @test !isaliased(Argument(2), val, result.state)      # c <!-> r
+        @test isaliased(Argument(3), val, result.state)       # a <-> r
+        @test isaliased(Argument(4), val, result.state)       # b <-> r
+    end
+    let result = @eval EATModule() begin
+            const Lx, Rx = SafeRef("Lx"), SafeRef("Rx")
+            $code_escapes((Bool,String,)) do c, a
+                r = ifelse(c, Lx, Rx)
+                r[] = a
+                nothing
+            end
+        end
+        @test has_all_escape(result.state[Argument(3)]) # a
+    end
+    # alias via ϕ-node
+    let result = code_escapes((Bool,String)) do cond, x
+            if cond
+                ϕ2 = ϕ1 = SafeRef("foo")
+            else
+                ϕ2 = ϕ1 = SafeRef("bar")
+            end
+            ϕ2[] = x
+            return ϕ1[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(3)], r) # x
+        @test isaliased(Argument(3), val, result.state) # x
+        for i in findall(isϕ, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    let result = code_escapes((Bool,Bool,String)) do cond1, cond2, x
+            if cond1
+                ϕ2 = ϕ1 = SafeRef("foo")
+            else
+                ϕ2 = ϕ1 = SafeRef("bar")
+            end
+            cond2 && (ϕ2[] = x)
+            return ϕ1[]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        val = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(4)], r) # x
+        @test isaliased(Argument(4), val, result.state) # x
+        for i in findall(isϕ, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+    # alias via π-node
+    let result = code_escapes((Any,)) do x
+            if isa(x, String)
+                return x
+            end
+            throw("error!")
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        rval = (result.ir.stmts.inst[r]::ReturnNode).val::SSAValue
+        @test has_return_escape(result.state[Argument(2)], r) # x
+        @test isaliased(Argument(2), rval, result.state)
+    end
+    let result = code_escapes((String,)) do x
+            global GV
+            l = g
+            if isa(l, SafeRef{String})
+                l[] = x
+            end
+            nothing
+        end
+        @test has_all_escape(result.state[Argument(2)]) # x
+    end
+    # circular reference
+    let result = code_escapes() do
+            x = Ref{Any}()
+            x[] = x
+            return x[]
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+    let result = @eval Module() begin
+            const Rx = Ref{Any}()
+            Rx[] = Rx
+            $code_escapes() do
+                r = Rx[]::Base.RefValue{Any}
+                return r[]
+            end
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        for i in findall(iscall((result.ir, getfield)), result.ir.stmts.inst)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = @eval Module() begin
+            @noinline function genr()
+                r = Ref{Any}()
+                r[] = r
+                return r
+            end
+            $code_escapes() do
+                x = genr()
+                return x[]
+            end
+        end
+        i = only(findall(isinvoke(:genr), result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+
+    # dynamic semantics
+    # -----------------
+
+    # conservatively handle untyped objects
+    let result = @eval code_escapes((Any,Any,)) do T, x
+            obj = $(Expr(:new, :T, :x))
+        end
+        t = only(findall(isnew, result.ir.stmts.inst))
+        @test #=T=# has_thrown_escape(result.state[Argument(2)], t) # T
+        @test #=x=# has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
+            obj = $(Expr(:new, :T, :x, :y))
+            return getfield(obj, :x)
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test #=x=# has_return_escape(result.state[Argument(3)], r)
+        @test #=y=# has_return_escape(result.state[Argument(4)], r)
+        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
+    end
+    let result = @eval code_escapes((Any,Any,Any,Any)) do T, x, y, z
+            obj = $(Expr(:new, :T, :x))
+            setfield!(obj, :x, y)
+            return getfield(obj, :x)
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test #=x=# has_return_escape(result.state[Argument(3)], r)
+        @test #=y=# has_return_escape(result.state[Argument(4)], r)
+        @test #=z=# !has_return_escape(result.state[Argument(5)], r)
+    end
+
+    # conservatively handle unknown field:
+    # all fields should be escaped, but the allocation itself doesn't need to be escaped
+    let result = code_escapes((String, Symbol)) do a, fld
+            obj = SafeRef(a)
+            return getfield(obj, fld)
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((String, String, Symbol)) do a, b, fld
+            obj = SafeRefs(a, b)
+            return getfield(obj, fld) # should escape both `a` and `b`
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((String, String, Int)) do a, b, idx
+            obj = SafeRefs(a, b)
+            return obj[idx] # should escape both `a` and `b`
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((String, String, Symbol)) do a, b, fld
+            obj = SafeRefs("a", "b")
+            setfield!(obj, fld, a)
+            return obj[2] # should escape `a`
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((String, Symbol)) do a, fld
+            obj = SafeRefs("a", "b")
+            setfield!(obj, fld, a)
+            return obj[1] # this should escape `a`
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+    let result = code_escapes((String, String, Int)) do a, b, idx
+            obj = SafeRefs("a", "b")
+            obj[idx] = a
+            return obj[2] # should escape `a`
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test !has_return_escape(result.state[Argument(3)], r) # b
+        @test !is_load_forwardable(result.state[SSAValue(i)]) # obj
+    end
+
+    # interprocedural
+    # ---------------
+
+    let result = @eval EATModule() begin
+            @noinline getx(obj) = obj[]
+            $code_escapes((String,)) do a
+                obj = SafeRef(a)
+                fld = getx(obj)
+                return fld
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)
+        # NOTE we can't scalar replace `obj`, but still we may want to stack allocate it
+        @test_broken is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # TODO interprocedural alias analysis
+    let result = code_escapes((SafeRef{String},)) do s
+            s[] = "bar"
+            global GV = s[]
+            nothing
+        end
+        @test_broken !has_all_escape(result.state[Argument(2)])
+    end
+
+    # aliasing between arguments
+    let result = @eval EATModule() begin
+            @noinline setxy!(x, y) = x[] = y
+            $code_escapes((String,)) do y
+                x = SafeRef("init")
+                setxy!(x, y)
+                return x
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+        @test has_return_escape(result.state[Argument(2)], r) # y
+    end
+    let result = @eval EATModule() begin
+            @noinline setxy!(x, y) = x[] = y
+            $code_escapes((String,)) do y
+                x1 = SafeRef("init")
+                x2 = SafeRef(y)
+                setxy!(x1, x2[])
+                return x1
+            end
+        end
+        i1, i2 = findall(isnew, result.ir.stmts.inst)
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i1)], r)
+        @test !has_return_escape(result.state[SSAValue(i2)], r)
+        @test has_return_escape(result.state[Argument(2)], r) # y
+    end
+    let result = @eval EATModule() begin
+            @noinline mysetindex!(x, a) = x[1] = a
+            const Ax = Vector{Any}(undef, 1)
+            $code_escapes((String,)) do s
+                mysetindex!(Ax, s)
+            end
+        end
+        @test has_all_escape(result.state[Argument(2)]) # s
+    end
+
+    # TODO flow-sensitivity?
+    # ----------------------
+
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(a)
+            r[] = b
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Any,Any)) do a, b
+            r = SafeRef{Any}(:init)
+            r[] = a
+            r[] = b
+            return r[]
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # a
+        @test has_return_escape(result.state[Argument(3)], r) # b
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = code_escapes((Any,Any,Bool)) do a, b, cond
+            r = SafeRef{Any}(:init)
+            if cond
+                r[] = a
+                return r[]
+            else
+                r[] = b
+                return nothing
+            end
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        r = only(findall(result.ir.stmts.inst) do @nospecialize x
+            isreturn(x) && isa(x.val, Core.SSAValue)
+        end)
+        @test has_return_escape(result.state[Argument(2)], r) # a
+        @test_broken !has_return_escape(result.state[Argument(3)], r) # b
+    end
+
+    # handle conflicting field information correctly
+    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
+            if cnd
+                o = SafeRef("foo")
+            else
+                o = SafeRefs("bar", baz)
+                r = getfield(o, 2)
+            end
+            if cnd
+                o = o::SafeRef
+                setfield!(o, 1, qux)
+                r = getfield(o, 1)
+            end
+            r
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(3)], r) # baz
+        @test has_return_escape(result.state[Argument(4)], r) # qux
+        for new in findall(isnew, result.ir.stmts.inst)
+            @test is_load_forwardable(result.state[SSAValue(new)])
+        end
+    end
+    let result = code_escapes((Bool,String,String,)) do cnd, baz, qux
+            if cnd
+                o = SafeRefs("foo", "bar")
+                r = setfield!(o, 2, baz)
+            else
+                o = SafeRef(qux)
+            end
+            if !cnd
+                o = o::SafeRef
+                r = getfield(o, 1)
+            end
+            r
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(3)], r) # baz
+        @test has_return_escape(result.state[Argument(4)], r) # qux
+    end
+
+    # foreigncall should disable field analysis
+    let result = code_escapes((Any,Nothing,Int,UInt)) do t, mt, lim, world
+            ambig = false
+            min = Ref{UInt}(typemin(UInt))
+            max = Ref{UInt}(typemax(UInt))
+            has_ambig = Ref{Int32}(0)
+            mt = ccall(:jl_matching_methods, Any,
+                (Any, Any, Cint, Cint, UInt, Ptr{UInt}, Ptr{UInt}, Ref{Int32}),
+                t, mt, lim, ambig, world, min, max, has_ambig)::Union{Array{Any,1}, Bool}
+            return mt, has_ambig[]
+        end
+        for i in findall(isnew, result.ir.stmts.inst)
+            @test !is_load_forwardable(result.state[SSAValue(i)])
+        end
+    end
+end
+
+# demonstrate the power of our field / alias analysis with a realistic end to end example
+abstract type AbstractPoint{T} end
+mutable struct MPoint{T} <: AbstractPoint{T}
+    x::T
+    y::T
+end
+add(a::P, b::P) where P<:AbstractPoint = P(a.x + b.x, a.y + b.y)
+function compute(T, ax, ay, bx, by)
+    a = T(ax, ay)
+    b = T(bx, by)
+    for i in 0:(100000000-1)
+        c = add(a, b) # replaceable
+        a = add(c, b) # replaceable
+    end
+    a.x, a.y
+end
+let result = @code_escapes compute(MPoint, 1+.5im, 2+.5im, 2+.25im, 4+.75im)
+    for i in findall(1:length(result.ir.stmts)) do idx
+                 inst = EscapeAnalysis.getinst(result.ir, idx)
+                 stmt = inst[:inst]
+                 return (isnew(stmt) || isϕ(stmt)) && inst[:type] <: MPoint
+             end
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+end
+function compute(a, b)
+    for i in 0:(100000000-1)
+        c = add(a, b) # replaceable
+        a = add(c, b) # unreplaceable (aliased to the call argument `a`)
+    end
+    a.x, a.y
+end
+let result = @code_escapes compute(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+    idxs = findall(1:length(result.ir.stmts)) do idx
+        inst = EscapeAnalysis.getinst(result.ir, idx)
+        stmt = inst[:inst]
+        return isnew(stmt) && inst[:type] <: MPoint
+    end
+    @assert length(idxs) == 2
+    @test count(i->is_load_forwardable(result.state[SSAValue(i)]), idxs) == 1
+end
+function compute!(a, b)
+    for i in 0:(100000000-1)
+        c = add(a, b)  # replaceable
+        a′ = add(c, b) # replaceable
+        a.x = a′.x
+        a.y = a′.y
+    end
+end
+let result = @code_escapes compute!(MPoint(1+.5im, 2+.5im), MPoint(2+.25im, 4+.75im))
+    for i in findall(1:length(result.ir.stmts)) do idx
+                 inst = EscapeAnalysis.getinst(result.ir, idx)
+                 stmt = inst[:inst]
+                 return isnew(stmt) && inst[:type] <: MPoint
+             end
+        @test is_load_forwardable(result.state[SSAValue(i)])
+    end
+end
+
+@testset "array primitives" begin
+    inbounds = Base.JLOptions().check_bounds == 0
+
+    # arrayref
+    let result = code_escapes((Vector{String},Int)) do xs, i
+            s = Base.arrayref(true, xs, i)
+            return s
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)   # xs
+        @test has_thrown_escape(result.state[Argument(2)])      # xs
+        @test !has_return_escape(result.state[Argument(3)], r)  # i
+    end
+    let result = code_escapes((Vector{String},Int)) do xs, i
+            s = Base.arrayref(false, xs, i)
+            return s
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)   # xs
+        @test !has_thrown_escape(result.state[Argument(2)])     # xs
+        @test !has_return_escape(result.state[Argument(3)], r)  # i
+    end
+    inbounds && let result = code_escapes((Vector{String},Int)) do xs, i
+            s = @inbounds xs[i]
+            return s
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r)   # xs
+        @test !has_thrown_escape(result.state[Argument(2)])     # xs
+        @test !has_return_escape(result.state[Argument(3)], r)  # i
+    end
+    let result = code_escapes((Vector{String},Bool)) do xs, i
+            c = Base.arrayref(true, xs, i) # TypeError will happen here
+            return c
+        end
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+    let result = code_escapes((String,Int)) do xs, i
+            c = Base.arrayref(true, xs, i) # TypeError will happen here
+            return c
+        end
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+    let result = code_escapes((AbstractVector{String},Int)) do xs, i
+            c = Base.arrayref(true, xs, i) # TypeError may happen here
+            return c
+        end
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+    let result = code_escapes((Vector{String},Any)) do xs, i
+            c = Base.arrayref(true, xs, i) # TypeError may happen here
+            return c
+        end
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+
+    # arrayset
+    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
+            Base.arrayset(true, xs, x, i)
+            return xs
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # xs
+        @test has_thrown_escape(result.state[Argument(2)])    # xs
+        @test has_return_escape(result.state[Argument(3)], r) # x
+    end
+    let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
+            Base.arrayset(false, xs, x, i)
+            return xs
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # xs
+        @test !has_thrown_escape(result.state[Argument(2)])    # xs
+        @test has_return_escape(result.state[Argument(3)], r) # x
+    end
+    inbounds && let result = code_escapes((Vector{String},String,Int,)) do xs, x, i
+            @inbounds xs[i] = x
+            return xs
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[Argument(2)], r) # xs
+        @test !has_thrown_escape(result.state[Argument(2)])    # xs
+        @test has_return_escape(result.state[Argument(3)], r) # x
+    end
+    let result = code_escapes((String,String,String,)) do s, t, u
+            xs = Vector{String}(undef, 3)
+            Base.arrayset(true, xs, s, 1)
+            Base.arrayset(true, xs, t, 2)
+            Base.arrayset(true, xs, u, 3)
+            return xs
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+        for i in 2:result.state.nargs
+            @test has_return_escape(result.state[Argument(i)], r)
+        end
+    end
+    let result = code_escapes((Vector{String},String,Bool,)) do xs, x, i
+            Base.arrayset(true, xs, x, i) # TypeError will happen here
+            return xs
+        end
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+        @test has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    let result = code_escapes((String,String,Int,)) do xs, x, i
+            Base.arrayset(true, xs, x, i) # TypeError will happen here
+            return xs
+        end
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs::String
+        @test has_thrown_escape(result.state[Argument(3)], t) # x::String
+    end
+    let result = code_escapes((AbstractVector{String},String,Int,)) do xs, x, i
+            Base.arrayset(true, xs, x, i) # TypeError may happen here
+            return xs
+        end
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+        @test has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    let result = code_escapes((Vector{String},AbstractString,Int,)) do xs, x, i
+            Base.arrayset(true, xs, x, i) # TypeError may happen here
+            return xs
+        end
+        t = only(findall(iscall((result.ir, Base.arrayset)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+        @test has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+
+    # arrayref and arrayset
+    let result = code_escapes() do
+            a = Vector{Vector{Any}}(undef, 1)
+            b = Any[]
+            a[1] = b
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
+            isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
+        end)
+        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
+            isarrayalloc(x) && x.args[2] === Vector{Any}
+        end)
+        @test !has_return_escape(result.state[SSAValue(ai)], r)
+        @test has_return_escape(result.state[SSAValue(bi)], r)
+    end
+    let result = code_escapes() do
+            a = Vector{Vector{Any}}(undef, 1)
+            b = Any[]
+            a[1] = b
+            return a
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        ai = only(findall(result.ir.stmts.inst) do @nospecialize x
+            isarrayalloc(x) && x.args[2] === Vector{Vector{Any}}
+        end)
+        bi = only(findall(result.ir.stmts.inst) do @nospecialize x
+            isarrayalloc(x) && x.args[2] === Vector{Any}
+        end)
+        @test has_return_escape(result.state[SSAValue(ai)], r)
+        @test has_return_escape(result.state[SSAValue(bi)], r)
+    end
+    let result = code_escapes((Vector{Any},String,Int,Int)) do xs, s, i, j
+            x = SafeRef(s)
+            xs[i] = x
+            xs[j] # potential error
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(3)], t) # s
+        @test has_thrown_escape(result.state[SSAValue(i)], t) # x
+    end
+
+    # arraysize
+    let result = code_escapes((Vector{Any},)) do xs
+            Core.arraysize(xs, 1)
+        end
+        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
+        @test !has_thrown_escape(result.state[Argument(2)], t)
+    end
+    let result = code_escapes((Vector{Any},Int,)) do xs, dim
+            Core.arraysize(xs, dim)
+        end
+        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
+        @test !has_thrown_escape(result.state[Argument(2)], t)
+    end
+    let result = code_escapes((Any,)) do xs
+            Core.arraysize(xs, 1)
+        end
+        t = only(findall(iscall((result.ir, Core.arraysize)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t)
+    end
+
+    # arraylen
+    let result = code_escapes((Vector{Any},)) do xs
+            Base.arraylen(xs)
+        end
+        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
+        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+    let result = code_escapes((String,)) do xs
+            Base.arraylen(xs)
+        end
+        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+    let result = code_escapes((Vector{Any},)) do xs
+            Base.arraylen(xs, 1)
+        end
+        t = only(findall(iscall((result.ir, Base.arraylen)), result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[Argument(2)], t) # xs
+    end
+
+    # array resizing
+    # without BoundsErrors
+    let result = code_escapes((Vector{Any},String)) do xs, x
+            @ccall jl_array_grow_beg(xs::Any, 2::UInt)::Cvoid
+            xs[1] = x
+            xs
+        end
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
+        @test !has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    let result = code_escapes((Vector{Any},String)) do xs, x
+            @ccall jl_array_grow_end(xs::Any, 2::UInt)::Cvoid
+            xs[1] = x
+            xs
+        end
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test !has_thrown_escape(result.state[Argument(2)], t) # xs
+        @test !has_thrown_escape(result.state[Argument(3)], t) # x
+    end
+    # with possible BoundsErrors
+    let result = code_escapes((String,)) do x
+            xs = Any[1,2,3]
+            xs[3] = x
+            @ccall jl_array_del_beg(xs::Any, 2::UInt)::Cvoid # can potentially throw
+            xs
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
+        @test has_thrown_escape(result.state[Argument(2)], t) # x
+    end
+    let result = code_escapes((String,)) do x
+            xs = Any[1,2,3]
+            xs[1] = x
+            @ccall jl_array_del_end(xs::Any, 2::UInt)::Cvoid # can potentially throw
+            xs
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
+        @test has_thrown_escape(result.state[Argument(2)], t) # x
+    end
+    let result = code_escapes((String,)) do x
+            xs = Any[x]
+            @ccall jl_array_grow_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
+        @test has_thrown_escape(result.state[Argument(2)], t) # x
+    end
+    let result = code_escapes((String,)) do x
+            xs = Any[x]
+            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
+        @test has_thrown_escape(result.state[Argument(2)], t) # x
+    end
+    inbounds && let result = code_escapes((String,)) do x
+            xs = @inbounds Any[x]
+            @ccall jl_array_del_at(xs::Any, 1::UInt, 2::UInt)::Cvoid # can potentially throw
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        t = only(findall(isarrayresize, result.ir.stmts.inst))
+        @test has_thrown_escape(result.state[SSAValue(i)], t) # xs
+        @test has_thrown_escape(result.state[Argument(2)], t) # x
+    end
+
+    # array copy
+    let result = code_escapes((Vector{Any},)) do xs
+            return copy(xs)
+        end
+        i = only(findall(isarraycopy, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+        @test_broken !has_return_escape(result.state[Argument(2)], r)
+    end
+    let result = code_escapes((String,)) do s
+            xs = String[s]
+            xs′ = copy(xs)
+            return xs′[1]
+        end
+        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
+        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i1)])
+        @test !has_return_escape(result.state[SSAValue(i2)])
+        @test has_return_escape(result.state[Argument(2)], r) # s
+    end
+    let result = code_escapes((Vector{Any},)) do xs
+            xs′ = copy(xs)
+            return xs′[1] # may potentially throw BoundsError, should escape `xs` conservatively (i.e. escape its elements)
+        end
+        i = only(findall(isarraycopy, result.ir.stmts.inst))
+        ref = only(findall(iscall((result.ir, Base.arrayref)), result.ir.stmts.inst))
+        ret = only(findall(isreturn, result.ir.stmts.inst))
+        @test_broken !has_thrown_escape(result.state[SSAValue(i)], ref)
+        @test_broken !has_return_escape(result.state[SSAValue(i)], ret)
+        @test has_thrown_escape(result.state[Argument(2)], ref)
+        @test has_return_escape(result.state[Argument(2)], ret)
+    end
+    let result = code_escapes((String,)) do s
+            xs = Vector{String}(undef, 1)
+            xs[1] = s
+            xs′ = copy(xs)
+            length(xs′) > 2 && throw(xs′)
+            return xs′
+        end
+        i1 = only(findall(isarrayalloc, result.ir.stmts.inst))
+        i2 = only(findall(isarraycopy, result.ir.stmts.inst))
+        t = only(findall(iscall((result.ir, throw)), result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test_broken !has_thrown_escape(result.state[SSAValue(i1)], t)
+        @test_broken !has_return_escape(result.state[SSAValue(i1)], r)
+        @test has_thrown_escape(result.state[SSAValue(i2)], t)
+        @test has_return_escape(result.state[SSAValue(i2)], r)
+        @test has_thrown_escape(result.state[Argument(2)], t)
+        @test has_return_escape(result.state[Argument(2)], r)
+    end
+
+    # isassigned
+    let result = code_escapes((Vector{Any},Int)) do xs, i
+            return isassigned(xs, i)
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[Argument(2)], r)
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+
+    # indexing analysis
+    # -----------------
+
+    # safe case
+    let result = code_escapes((String,String)) do s, t
+            a = Vector{Any}(undef, 2)
+            a[1] = s
+            a[2] = t
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        @test has_return_escape(result.state[Argument(2)], r) # s
+        @test !has_return_escape(result.state[Argument(3)], r) # t
+    end
+    let result = code_escapes((String,String)) do s, t
+            a = Matrix{Any}(undef, 1, 2)
+            a[1, 1] = s
+            a[1, 2] = t
+            return a[1, 1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        @test has_return_escape(result.state[Argument(2)], r) # s
+        @test !has_return_escape(result.state[Argument(3)], r) # t
+    end
+    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
+            a = Vector{Any}(undef, 2)
+            if c
+                a[1] = s
+                a[2] = u
+            else
+                a[1] = t
+                a[2] = u
+            end
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test is_load_forwardable(result.state[SSAValue(i)])
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test has_return_escape(result.state[Argument(3)], r) # s
+        @test has_return_escape(result.state[Argument(4)], r) # t
+        @test !has_return_escape(result.state[Argument(5)], r) # u
+    end
+    let result = code_escapes((Bool,String,String,String)) do c, s, t, u
+            a = Any[nothing, nothing] # TODO how to deal with loop indexing?
+            if c
+                a[1] = s
+                a[2] = u
+            else
+                a[1] = t
+                a[2] = u
+            end
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test_broken is_load_forwardable(result.state[SSAValue(i)])
+        @test has_return_escape(result.state[Argument(3)], r) # s
+        @test has_return_escape(result.state[Argument(4)], r) # t
+        @test_broken !has_return_escape(result.state[Argument(5)], r) # u
+    end
+    let result = code_escapes((String,)) do s
+            a = Vector{Vector{Any}}(undef, 1)
+            b = Any[s]
+            a[1] = b
+            return a[1][1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        is = findall(isarrayalloc, result.ir.stmts.inst)
+        @assert length(is) == 2
+        ia, ib = is
+        @test !has_return_escape(result.state[SSAValue(ia)], r)
+        @test is_load_forwardable(result.state[SSAValue(ia)])
+        @test !has_return_escape(result.state[SSAValue(ib)], r)
+        @test_broken is_load_forwardable(result.state[SSAValue(ib)])
+        @test has_return_escape(result.state[Argument(2)], r) # s
+    end
+    let result = code_escapes((Bool,String,String,Regex,Regex,)) do c, s1, s2, t1, t2
+            if c
+                a = Vector{String}(undef, 2)
+                a[1] = s1
+                a[2] = s2
+            else
+                a = Vector{Regex}(undef, 2)
+                a[1] = t1
+                a[2] = t2
+            end
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        for i in findall(isarrayalloc, result.ir.stmts.inst)
+            @test !has_return_escape(result.state[SSAValue(i)], r)
+            @test is_load_forwardable(result.state[SSAValue(i)])
+        end
+        @test has_return_escape(result.state[Argument(3)], r) # s1
+        @test !has_return_escape(result.state[Argument(4)], r) # s2
+        @test has_return_escape(result.state[Argument(5)], r) # t1
+        @test !has_return_escape(result.state[Argument(6)], r) # t2
+    end
+    let result = code_escapes((String,String,Int)) do s, t, i
+            a = Any[s]
+            push!(a, t)
+            return a[2]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test_broken is_load_forwardable(result.state[SSAValue(i)])
+        @test_broken !has_return_escape(result.state[Argument(2)], r) # s
+        @test has_return_escape(result.state[Argument(3)], r) # t
+    end
+    # unsafe cases
+    let result = code_escapes((String,String,Int)) do s, t, i
+            a = Vector{Any}(undef, 2)
+            a[1] = s
+            a[2] = t
+            return a[i]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test !is_load_forwardable(result.state[SSAValue(i)])
+        @test has_return_escape(result.state[Argument(2)], r) # s
+        @test has_return_escape(result.state[Argument(3)], r) # t
+    end
+    let result = code_escapes((String,String,Int)) do s, t, i
+            a = Vector{Any}(undef, 2)
+            a[1] = s
+            a[i] = t
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test !is_load_forwardable(result.state[SSAValue(i)])
+        @test has_return_escape(result.state[Argument(2)], r) # s
+        @test has_return_escape(result.state[Argument(3)], r) # t
+    end
+    let result = code_escapes((String,String,Int,Int,Int)) do s, t, i, j, k
+            a = Vector{Any}(undef, 2)
+            a[3] = s # BoundsError
+            a[1] = t
+            return a[1]
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+        @test !is_load_forwardable(result.state[SSAValue(i)])
+    end
+    let result = @eval Module() begin
+            @noinline some_resize!(a) = pushfirst!(a, nothing)
+            $code_escapes((String,String,Int)) do s, t, i
+                a = Vector{Any}(undef, 2)
+                a[1] = s
+                some_resize!(a)
+                return a[2]
+            end
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test_broken !has_return_escape(result.state[SSAValue(i)], r)
+        @test !is_load_forwardable(result.state[SSAValue(i)])
+    end
+
+    # circular reference
+    let result = code_escapes() do
+            xs = Vector{Any}(undef, 1)
+            xs[1] = xs
+            return xs[1]
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+    let result = @eval Module() begin
+            const Ax = Vector{Any}(undef, 1)
+            Ax[1] = Ax
+            $code_escapes() do
+                xs = Ax[1]::Vector{Any}
+                return xs[1]
+            end
+        end
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        for i in findall(iscall((result.ir, Core.arrayref)), result.ir.stmts.inst)
+            @test has_return_escape(result.state[SSAValue(i)], r)
+        end
+    end
+    let result = @eval Module() begin
+            @noinline function genxs()
+                xs = Vector{Any}(undef, 1)
+                xs[1] = xs
+                return xs
+            end
+            $code_escapes() do
+                xs = genxs()
+                return xs[1]
+            end
+        end
+        i = only(findall(isinvoke(:genxs), result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test has_return_escape(result.state[SSAValue(i)], r)
+    end
+end
+
+# demonstrate array primitive support with a realistic end to end example
+let result = code_escapes((Int,String,)) do n,s
+        xs = String[]
+        for i in 1:n
+            push!(xs, s)
+        end
+        xs
+    end
+    i = only(findall(isarrayalloc, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+    Base.JLOptions().check_bounds ≠ 0 && @test has_thrown_escape(result.state[SSAValue(i)])
+    @test has_return_escape(result.state[Argument(3)], r) # s
+    Base.JLOptions().check_bounds ≠ 0 && @test has_thrown_escape(result.state[Argument(3)])    # s
+end
+let result = code_escapes((Int,String,)) do n,s
+        xs = String[]
+        for i in 1:n
+            pushfirst!(xs, s)
+        end
+        xs
+    end
+    i = only(findall(isarrayalloc, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[SSAValue(i)], r) # xs
+    @test has_thrown_escape(result.state[SSAValue(i)])    # xs
+    @test has_return_escape(result.state[Argument(3)], r) # s
+    @test has_thrown_escape(result.state[Argument(3)])    # s
+end
+let result = code_escapes((String,String,String)) do s, t, u
+        xs = String[]
+        resize!(xs, 3)
+        xs[1] = s
+        xs[1] = t
+        xs[1] = u
+        xs
+    end
+    i = only(findall(isarrayalloc, result.ir.stmts.inst))
+    r = only(findall(isreturn, result.ir.stmts.inst))
+    @test has_return_escape(result.state[SSAValue(i)], r)
+    @test has_thrown_escape(result.state[SSAValue(i)])    # xs
+    @test has_return_escape(result.state[Argument(2)], r) # s
+    @test has_return_escape(result.state[Argument(3)], r) # t
+    @test has_return_escape(result.state[Argument(4)], r) # u
+end
+
+@static if isdefined(Core, :ImmutableArray)
+
+import Core: ImmutableArray, arrayfreeze, mutating_arrayfreeze, arraythaw
+
+@testset "ImmutableArray" begin
+    # arrayfreeze
+    let result = code_escapes((Vector{Any},)) do xs
+            arrayfreeze(xs)
+        end
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Vector,)) do xs
+            arrayfreeze(xs)
+        end
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do xs
+            arrayfreeze(xs)
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((ImmutableArray{Any,1},)) do xs
+            arrayfreeze(xs)
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes() do
+            xs = Any[]
+            arrayfreeze(xs)
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test has_no_escape(result.state[SSAValue(1)])
+    end
+
+    # mutating_arrayfreeze
+    let result = code_escapes((Vector{Any},)) do xs
+            mutating_arrayfreeze(xs)
+        end
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Vector,)) do xs
+            mutating_arrayfreeze(xs)
+        end
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do xs
+            mutating_arrayfreeze(xs)
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((ImmutableArray{Any,1},)) do xs
+            mutating_arrayfreeze(xs)
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes() do
+            xs = Any[]
+            mutating_arrayfreeze(xs)
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test has_no_escape(result.state[SSAValue(1)])
+    end
+
+    # arraythaw
+    let result = code_escapes((ImmutableArray{Any,1},)) do xs
+            arraythaw(xs)
+        end
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((ImmutableArray,)) do xs
+            arraythaw(xs)
+        end
+        @test !has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Any,)) do xs
+            arraythaw(xs)
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes((Vector{Any},)) do xs
+            arraythaw(xs)
+        end
+        @test has_thrown_escape(result.state[Argument(2)])
+    end
+    let result = code_escapes() do
+            xs = ImmutableArray(Any[])
+            arraythaw(xs)
+        end
+        i = only(findall(isarrayalloc, result.ir.stmts.inst))
+        @test has_no_escape(result.state[SSAValue(1)])
+    end
+end
+
+# demonstrate some arrayfreeze optimizations
+# !has_return_escape(ary) means ary is eligible for arrayfreeze to mutating_arrayfreeze optimization
+let result = code_escapes((Int,)) do n
+        xs = collect(1:n)
+        ImmutableArray(xs)
+    end
+    i = only(findall(isarrayalloc, result.ir.stmts.inst))
+    @test !has_return_escape(result.state[SSAValue(i)])
+end
+let result = code_escapes((Vector{Float64},)) do xs
+        ys = sin.(xs)
+        ImmutableArray(ys)
+    end
+    i = only(findall(isarrayalloc, result.ir.stmts.inst))
+    @test !has_return_escape(result.state[SSAValue(i)])
+end
+let result = code_escapes((Vector{Pair{Int,String}},)) do xs
+        n = maximum(first, xs)
+        ys = Vector{String}(undef, n)
+        for (i, s) in xs
+            ys[i] = s
+        end
+        ImmutableArray(xs)
+    end
+    i = only(findall(isarrayalloc, result.ir.stmts.inst))
+    @test !has_return_escape(result.state[SSAValue(i)])
+end
+
+end # @static if isdefined(Core, :ImmutableArray)
+
+# demonstrate a simple type level analysis can sometimes improve the analysis accuracy
+# by compensating the lack of yet unimplemented analyses
+@testset "special-casing bitstype" begin
+    let result = code_escapes((Nothing,)) do a
+            global GV = a
+        end
+        @test !(has_all_escape(result.state[Argument(2)]))
+    end
+
+    let result = code_escapes((Int,)) do a
+            o = SafeRef(a)
+            f = o[]
+            return f
+        end
+        i = only(findall(isnew, result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[SSAValue(i)], r)
+    end
+
+    # an escaped tuple stmt will not propagate to its Int argument (since `Int` is of bitstype)
+    let result = code_escapes((Int,Any,)) do a, b
+            t = tuple(a, b)
+            return t
+        end
+        i = only(findall(iscall((result.ir, tuple)), result.ir.stmts.inst))
+        r = only(findall(isreturn, result.ir.stmts.inst))
+        @test !has_return_escape(result.state[Argument(2)], r)
+        @test has_return_escape(result.state[Argument(3)], r)
+    end
+end
+
+# # TODO implement a finalizer elision pass
+# mutable struct WithFinalizer
+#     v
+#     function WithFinalizer(v)
+#         x = new(v)
+#         f(t) = @async println("Finalizing $t.")
+#         return finalizer(x, x)
+#     end
+# end
+# make_m(v = 10) = MyMutable(v)
+# function simple(cond)
+#     m = make_m()
+#     if cond
+#         # println(m.v)
+#         return nothing # <= insert `finalize` call here
+#     end
+#     return m
+# end
diff --git a/test/compiler/EscapeAnalysis/setup.jl b/test/compiler/EscapeAnalysis/setup.jl
new file mode 100644
index 00000000000000..4e7d6fb5159aae
--- /dev/null
+++ b/test/compiler/EscapeAnalysis/setup.jl
@@ -0,0 +1,72 @@
+include(normpath(@__DIR__, "EAUtils.jl"))
+using Test, Core.Compiler.EscapeAnalysis, .EAUtils
+import Core: Argument, SSAValue, ReturnNode
+const EA = Core.Compiler.EscapeAnalysis
+import .EA: ignore_argescape
+
+isT(T) = (@nospecialize x) -> x === T
+isreturn(@nospecialize x) = isa(x, Core.ReturnNode) && isdefined(x, :val)
+isthrow(@nospecialize x) = Meta.isexpr(x, :call) && Core.Compiler.is_throw_call(x)
+isnew(@nospecialize x) = Meta.isexpr(x, :new)
+isϕ(@nospecialize x) = isa(x, Core.PhiNode)
+function with_normalized_name(@nospecialize(f), @nospecialize(x))
+    if Meta.isexpr(x, :foreigncall)
+        name = x.args[1]
+        nn = EA.normalize(name)
+        return isa(nn, Symbol) && f(nn)
+    end
+    return false
+end
+isarrayalloc(@nospecialize x) = with_normalized_name(nn->!isnothing(Core.Compiler.alloc_array_ndims(nn)), x)
+isarrayresize(@nospecialize x) = with_normalized_name(nn->!isnothing(EA.array_resize_info(nn)), x)
+isarraycopy(@nospecialize x) = with_normalized_name(nn->EA.is_array_copy(nn), x)
+import Core.Compiler: argextype, singleton_type
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((ir, f), @nospecialize(x))
+    return iscall(x) do @nospecialize x
+        singleton_type(Core.Compiler.argextype(x, ir, Any[])) === f
+    end
+end
+iscall(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
+isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
+isinvoke(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :invoke) && pred(x.args[1]::Core.MethodInstance)
+
+"""
+    is_load_forwardable(x::EscapeInfo) -> Bool
+
+Queries if `x` is elibigle for store-to-load forwarding optimization.
+"""
+function is_load_forwardable(x::EA.EscapeInfo)
+    AliasInfo = x.AliasInfo
+    # NOTE technically we also need to check `!has_thrown_escape(x)` here as well,
+    # but we can also do equivalent check during forwarding
+    return isa(AliasInfo, EA.IndexableFields) || isa(AliasInfo, EA.IndexableElements)
+end
+
+let setup_ex = quote
+        mutable struct SafeRef{T}
+            x::T
+        end
+        Base.getindex(s::SafeRef) = getfield(s, 1)
+        Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
+
+        mutable struct SafeRefs{S,T}
+            x1::S
+            x2::T
+        end
+        Base.getindex(s::SafeRefs, idx::Int) = getfield(s, idx)
+        Base.setindex!(s::SafeRefs, x, idx::Int) = setfield!(s, idx, x)
+
+        global GV::Any
+        const global GR = Ref{Any}()
+    end
+    global function EATModule(setup_ex = setup_ex)
+        M = Module()
+        Core.eval(M, setup_ex)
+        return M
+    end
+    Core.eval(@__MODULE__, setup_ex)
+end
diff --git a/test/compiler/codegen.jl b/test/compiler/codegen.jl
index ba846d4903dd2d..9724091637f978 100644
--- a/test/compiler/codegen.jl
+++ b/test/compiler/codegen.jl
@@ -4,6 +4,7 @@
 
 using Random
 using InteractiveUtils
+using Libdl
 
 const opt_level = Base.JLOptions().opt_level
 const coverage = (Base.JLOptions().code_coverage > 0) || (Base.JLOptions().malloc_log > 0)
@@ -50,38 +51,57 @@ end
 
 # This function tests if functions are output when compiled if jl_dump_compiles is enabled.
 # Have to go through pains with recursive function (eval probably not required) to make sure
-# that inlining won't happen.
+# that inlining won't happen. (Tests SnoopCompile.jl's @snoopc.)
 function test_jl_dump_compiles()
-    tfile = tempname()
-    io = open(tfile, "w")
-    @eval(test_jl_dump_compiles_internal(x) = x)
-    ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), io.handle)
-    @eval test_jl_dump_compiles_internal(1)
-    ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), C_NULL)
-    close(io)
-    tstats = stat(tfile)
-    tempty = tstats.size == 0
-    rm(tfile)
-    @test tempty == false
+    mktemp() do tfile, io
+        @eval(test_jl_dump_compiles_internal(x) = x)
+        ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), io.handle)
+        @eval test_jl_dump_compiles_internal(1)
+        ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), C_NULL)
+        close(io)
+        tstats = stat(tfile)
+        tempty = tstats.size == 0
+        @test tempty == false
+    end
 end
 
 # This function tests if a toplevel thunk is output if jl_dump_compiles is enabled.
-# The eval statement creates the toplevel thunk.
+# The eval statement creates the toplevel thunk. (Tests SnoopCompile.jl's @snoopc.)
 function test_jl_dump_compiles_toplevel_thunks()
-    tfile = tempname()
-    io = open(tfile, "w")
-    # Make sure to cause compilation of the eval function
-    # before calling it below.
-    Core.eval(Main, Any[:(nothing)][1])
-    topthunk = Meta.lower(Main, :(for i in 1:10; end))
-    ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), io.handle)
-    Core.eval(Main, topthunk)
-    ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), C_NULL)
-    close(io)
-    tstats = stat(tfile)
-    tempty = tstats.size == 0
-    rm(tfile)
-    @test tempty == true
+    mktemp() do tfile, io
+        # Make sure to cause compilation of the eval function
+        # before calling it below.
+        Core.eval(Main, Any[:(nothing)][1])
+        GC.enable(false)  # avoid finalizers to be compiled
+        topthunk = Meta.lower(Main, :(for i in 1:10; end))
+        ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), io.handle)
+        Core.eval(Main, topthunk)
+        ccall(:jl_dump_compiles, Cvoid, (Ptr{Cvoid},), C_NULL)
+        close(io)
+        GC.enable(true)
+        tstats = stat(tfile)
+        tempty = tstats.size == 0
+        @test tempty == true
+    end
+end
+
+# This function tests if LLVM optimization info is dumped when enabled (Tests
+# SnoopCompile.jl's @snoopl.)
+function test_jl_dump_llvm_opt()
+    mktemp() do func_file, func_io
+        mktemp() do llvm_file, llvm_io
+            @eval(test_jl_dump_compiles_internal(x) = x)
+            ccall(:jl_dump_emitted_mi_name, Cvoid, (Ptr{Cvoid},), func_io.handle)
+            ccall(:jl_dump_llvm_opt, Cvoid, (Ptr{Cvoid},), llvm_io.handle)
+            @eval test_jl_dump_compiles_internal(1)
+            ccall(:jl_dump_emitted_mi_name, Cvoid, (Ptr{Cvoid},), C_NULL)
+            ccall(:jl_dump_llvm_opt, Cvoid, (Ptr{Cvoid},), C_NULL)
+            close(func_io)
+            close(llvm_io)
+            @test stat(func_file).size !== 0
+            @test stat(llvm_file).size !== 0
+        end
+    end
 end
 
 if opt_level > 0
@@ -105,6 +125,7 @@ if opt_level > 0
 
     test_jl_dump_compiles()
     test_jl_dump_compiles_toplevel_thunks()
+    test_jl_dump_llvm_opt()
 end
 
 # Make sure we will not elide the allocation
@@ -343,16 +364,16 @@ macro aliasscope(body)
     end)
 end
 
-struct Const{T<:Array}
+struct ConstAliasScope{T<:Array}
     a::T
 end
 
-@eval Base.getindex(A::Const, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
-@eval Base.getindex(A::Const, i1::Int, i2::Int, I::Int...) =  (Base.@_inline_meta; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
+@eval Base.getindex(A::ConstAliasScope, i1::Int) = Core.const_arrayref($(Expr(:boundscheck)), A.a, i1)
+@eval Base.getindex(A::ConstAliasScope, i1::Int, i2::Int, I::Int...) =  (@inline; Core.const_arrayref($(Expr(:boundscheck)), A.a, i1, i2, I...))
 
 function foo31018!(a, b)
     @aliasscope for i in eachindex(a, b)
-        a[i] = Const(b)[i]
+        a[i] = ConstAliasScope(b)[i]
     end
 end
 io = IOBuffer()
@@ -473,3 +494,243 @@ end
     @test contains(llvmstr, str) || llvmstr
     @test f37262(Base.inferencebarrier(true)) === nothing
 end
+
+# issue #37671
+let d = Dict((:a,) => 1, (:a, :b) => 2)
+    @test d[(:a,)] == 1
+    @test d[(:a, :b)] == 2
+end
+
+# issue #37880
+primitive type Has256Bits 256 end
+let x = reinterpret(Has256Bits, [0xfcdac822cac89d82de4f9b3326da8294, 0x6ebac4d5982880ca703c57e37657f1ee])[]
+    shifted = [0xeefcdac822cac89d82de4f9b3326da82, 0x006ebac4d5982880ca703c57e37657f1]
+    f(x) = Base.lshr_int(x, 0x8)
+    @test reinterpret(UInt128, [f(x)]) == shifted
+    @test reinterpret(UInt128, [Base.lshr_int(x, 0x8)]) == shifted
+    g(x) = Base.ashr_int(x, 0x8)
+    @test reinterpret(UInt128, [g(x)]) == shifted
+    @test reinterpret(UInt128, [Base.ashr_int(x, 0x8)]) == shifted
+    lshifted = [0xdac822cac89d82de4f9b3326da829400, 0xbac4d5982880ca703c57e37657f1eefc]
+    h(x) = Base.shl_int(x, 0x8)
+    @test reinterpret(UInt128, [h(x)]) == lshifted
+    @test reinterpret(UInt128, [Base.shl_int(x, 0x8)]) == lshifted
+end
+
+# issue #37872
+let f(@nospecialize(x)) = x===Base.ImmutableDict(Int128=>:big)
+    @test !f(Dict(Int=>Int))
+end
+
+# issue #37974
+primitive type UInt24 24 end
+let a = Core.Intrinsics.trunc_int(UInt24, 3),
+    f(t) = t[2]
+    @test f((a, true)) === true
+    @test f((a, false)) === false
+    @test sizeof(Tuple{UInt24,Bool}) == 8
+    @test sizeof(UInt24) == 3
+    @test sizeof(Union{UInt8,UInt24}) == 3
+    @test sizeof(Base.RefValue{Union{UInt8,UInt24}}) == 8
+end
+
+# issue #39232
+function f39232(a)
+    z = Any[]
+    for (i, ai) in enumerate(a)
+        push!(z, ai)
+    end
+    return z
+end
+@test f39232((+, -)) == Any[+, -]
+
+@testset "GC.@preserve" begin
+    # main use case
+    function f1(cond)
+        val = [1]
+        GC.@preserve val begin end
+    end
+    @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f1, Tuple{Bool}, true, false, false))
+
+    # stack allocated objects (JuliaLang/julia#34241)
+    function f3(cond)
+        val = ([1],)
+        GC.@preserve val begin end
+    end
+    @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f3, Tuple{Bool}, true, false, false))
+
+    # unions of immutables (JuliaLang/julia#39501)
+    function f2(cond)
+        val = cond ? 1 : 1f0
+        GC.@preserve val begin end
+    end
+    @test !occursin("llvm.julia.gc_preserve_begin", get_llvm(f2, Tuple{Bool}, true, false, false))
+    # make sure the fix for the above doesn't regress #34241
+    function f4(cond)
+        val = cond ? ([1],) : ([1f0],)
+        GC.@preserve val begin end
+    end
+    @test occursin("llvm.julia.gc_preserve_begin", get_llvm(f4, Tuple{Bool}, true, false, false))
+end
+
+# issue #32843
+function f32843(vals0, v)
+    (length(vals0) > 1) && (vals = v[1])
+    (length(vals0) == 1 && vals0[1]==1) && (vals = 1:2)
+    vals
+end
+@test_throws UndefVarError f32843([6], Vector[[1]])
+
+# issue #40855, struct constants with union fields
+@enum T40855 X40855
+struct A40855
+    d::Union{Nothing, T40855}
+    b::Union{Nothing, Int}
+end
+g() = string(A40855(X40855, 1))
+let mod_prefix = (@__MODULE__) == Core.Main ? "" : "$(@__MODULE__)."
+    @test g() == "$(mod_prefix)A40855($(mod_prefix)X40855, 1)"
+end
+
+# issue #40612
+f40612(a, b) = a|b === a|b
+g40612(a, b) = a[]|a[] === b[]|b[]
+@test f40612(true, missing)
+@test !g40612(Union{Bool,Missing}[missing], Union{Bool,Missing}[true])
+@test !g40612(Union{Bool,Missing}[false], Union{Bool,Missing}[true])
+@test g40612(Union{Bool,Missing}[missing], Union{Bool,Missing}[missing])
+@test g40612(Union{Bool,Missing}[true], Union{Bool,Missing}[true])
+@test g40612(Union{Bool,Missing}[false], Union{Bool,Missing}[false])
+
+# issue #41438
+struct A41438{T}
+  x::Ptr{T}
+end
+struct B41438{T}
+  x::T
+end
+f41438(y) = y[].x
+@test A41438.body.layout != C_NULL
+@test B41438.body.layout === C_NULL
+@test f41438(Ref{A41438}(A41438(C_NULL))) === C_NULL
+@test f41438(Ref{B41438}(B41438(C_NULL))) === C_NULL
+
+const S41438 = Pair{Any, Ptr{T}} where T
+g41438() = Array{S41438,1}(undef,1)[1].first
+get_llvm(g41438, ()); # cause allocation of layout
+@test S41438.body.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438.body)
+@test S41438{Int}.layout != C_NULL
+@test !Base.datatype_pointerfree(S41438{Int})
+
+
+# issue #43303
+struct A43303{T}
+    x::Pair{Ptr{T},Ptr{T}}
+end
+@test A43303.body.layout != C_NULL
+@test isbitstype(A43303{Int})
+@test A43303.body.types[1].layout != C_NULL
+
+# issue #41157
+f41157(a, b) = a[1] = b[1]
+@test_throws BoundsError f41157(Tuple{Int}[], Tuple{Union{}}[])
+
+# issue #41096
+struct Modulate41096{M<:Union{Function, Val{true}, Val{false}}, id}
+    modulate::M
+    Modulate41096(id::Symbol, modulate::Function) = new{typeof(modulate), id}(modulate)
+    Modulate41096(id::Symbol, modulate::Bool=true) = new{Val{modulate}, id}(modulate|>Val)
+end
+@inline ismodulatable41096(modulate::Modulate41096) = ismodulatable41096(typeof(modulate))
+@inline ismodulatable41096(::Type{<:Modulate41096{Val{B}}}) where B = B
+@inline ismodulatable41096(::Type{<:Modulate41096{<:Function}}) = true
+
+mutable struct Term41096{I, M<:Modulate41096}
+    modulate::M
+    Term41096{I}(modulate::Modulate41096) where I = new{I, typeof(modulate)}(modulate)
+end
+@inline ismodulatable41096(term::Term41096) = ismodulatable41096(typeof(term))
+@inline ismodulatable41096(::Type{<:Term41096{I, M} where I}) where M = ismodulatable41096(M)
+
+function newexpand41096(gen, name::Symbol)
+    flag = ismodulatable41096(getfield(gen, name))
+    if flag
+        return true
+    else
+        return false
+    end
+end
+
+t41096 = Term41096{:t}(Modulate41096(:t, false))
+μ41096 = Term41096{:μ}(Modulate41096(:μ, false))
+U41096 = Term41096{:U}(Modulate41096(:U, false))
+
+@test !newexpand41096((t=t41096, μ=μ41096, U=U41096), :U)
+
+# test that we can start julia with libjulia-codegen removed; PR #41936
+mktempdir() do pfx
+    cp(dirname(Sys.BINDIR), pfx; force=true)
+    libpath = relpath(dirname(dlpath("libjulia-codegen")), dirname(Sys.BINDIR))
+    libs_deleted = 0
+    for f in filter(f -> startswith(f, "libjulia-codegen"), readdir(joinpath(pfx, libpath)))
+        rm(joinpath(pfx, libpath, f); force=true, recursive=true)
+        libs_deleted += 1
+    end
+    @test libs_deleted > 0
+    @test readchomp(`$pfx/bin/$(Base.julia_exename()) -e 'print("no codegen!\n")'`) == "no codegen!"
+end
+
+# issue #42645
+mutable struct A42645{T}
+    x::Bool
+    function A42645(a::Vector{T}) where T
+        r = new{T}()
+        r.x = false
+        return r
+    end
+end
+mutable struct B42645{T}
+  y::A42645{T}
+end
+x42645 = 1
+function f42645()
+  res = B42645(A42645([x42645]))
+  res.y = A42645([x42645])
+  res.y.x = true
+  res
+end
+@test ((f42645()::B42645).y::A42645{Int}).x
+
+struct A44921{T}
+    x::T
+end
+function f44921(a)
+    if a == :x
+        A44921(_f) # _f purposefully undefined
+    elseif a == :p
+        g44921(a)
+    end
+end
+function g44921(a)
+    if !@isdefined _f # just needs to be some non constprop-able condition
+        A44921(())
+    end
+end
+@test f44921(:p) isa A44921
+
+# issue #43123
+@noinline cmp43123(a::Some, b::Some) = something(a) === something(b)
+@noinline cmp43123(a, b) = a[] === b[]
+@test cmp43123(Some{Function}(+), Some{Union{typeof(+), typeof(-)}}(+))
+@test !cmp43123(Some{Function}(+), Some{Union{typeof(+), typeof(-)}}(-))
+@test cmp43123(Ref{Function}(+), Ref{Union{typeof(+), typeof(-)}}(+))
+@test !cmp43123(Ref{Function}(+), Ref{Union{typeof(+), typeof(-)}}(-))
+@test cmp43123(Function[+], Union{typeof(+), typeof(-)}[+])
+@test !cmp43123(Function[+], Union{typeof(+), typeof(-)}[-])
+
+# Test that donotdelete survives through to LLVM time
+f_donotdelete_input(x) = Base.donotdelete(x+1)
+f_donotdelete_const() = Base.donotdelete(1+1)
+@test occursin("call void (...) @jl_f_donotdelete(i64", get_llvm(f_donotdelete_input, Tuple{Int64}, true, false, false))
+@test occursin("call void (...) @jl_f_donotdelete()", get_llvm(f_donotdelete_const, Tuple{}, true, false, false))
diff --git a/test/compiler/contextual.jl b/test/compiler/contextual.jl
index 9386c66cfbf00a..e89b56e4bf6de8 100644
--- a/test/compiler/contextual.jl
+++ b/test/compiler/contextual.jl
@@ -46,6 +46,7 @@ module MiniCassette
         # Insert one SSAValue for every argument statement
         prepend!(code, [Expr(:call, getfield, SlotNumber(4), i) for i = 1:nargs])
         prepend!(ci.codelocs, [0 for i = 1:nargs])
+        prepend!(ci.ssaflags, [0x00 for i = 1:nargs])
         ci.ssavaluetypes += nargs
         function map_slot_number(slot)
             if slot == 1
@@ -71,11 +72,8 @@ module MiniCassette
         end
 
         tt = Tuple{f, args...}
-        mthds = _methods_by_ftype(tt, -1, typemax(UInt))
-        @assert length(mthds) == 1
-        match = mthds[1]
-        mi = ccall(:jl_specializations_get_linfo, Ref{MethodInstance},
-            (Any, Any, Any), match.method, match.spec_types, match.sparams)
+        match = Base._which(tt, typemax(UInt))
+        mi = Core.Compiler.specialize_method(match)
         # Unsupported in this mini-cassette
         @assert !mi.def.isva
         code_info = retrieve_code_info(mi)
@@ -119,7 +117,7 @@ f() = 2
 @test overdub(Ctx(), gcd, 10, 20) === gcd(10, 20)
 
 # Test that pure propagates for Cassette
-Base.@pure isbitstype(T) = T.isbitstype
+Base.@pure isbitstype(T) = Base.isbitstype(T)
 f31012(T) = Val(isbitstype(T))
 @test @inferred(overdub(Ctx(), f31012, Int64)) == Val(true)
 
@@ -138,3 +136,82 @@ let method = which(func2, ())
 end
 func3() = func2()
 @test_throws UndefVarError func3()
+
+
+
+## overlay method tables
+
+module OverlayModule
+
+using Base.Experimental: @MethodTable, @overlay
+
+@MethodTable(mt)
+
+@overlay mt function sin(x::Float64)
+    1
+end
+
+# short function def
+@overlay mt cos(x::Float64) = 2
+
+# parametric function def
+@overlay mt tan(x::T) where {T} = 3
+
+end
+
+methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, nothing, 1, Base.get_world_counter())
+@test only(methods).method.module === Base.Math
+
+methods = Base._methods_by_ftype(Tuple{typeof(sin), Float64}, OverlayModule.mt, 1, Base.get_world_counter())
+@test only(methods).method.module === OverlayModule
+
+methods = Base._methods_by_ftype(Tuple{typeof(sin), Int}, OverlayModule.mt, 1, Base.get_world_counter())
+@test isempty(methods)
+
+# precompilation
+
+load_path = mktempdir()
+depot_path = mktempdir()
+try
+    pushfirst!(LOAD_PATH, load_path)
+    pushfirst!(DEPOT_PATH, depot_path)
+
+    write(joinpath(load_path, "Foo.jl"),
+          """
+          module Foo
+          Base.Experimental.@MethodTable(mt)
+          Base.Experimental.@overlay mt sin(x::Int) = 1
+          end
+          """)
+
+     # precompiling Foo serializes the overlay method through the `mt` binding in the module
+     Foo = Base.require(Main, :Foo)
+     @test length(Foo.mt) == 1
+
+    write(joinpath(load_path, "Bar.jl"),
+          """
+          module Bar
+          Base.Experimental.@MethodTable(mt)
+          end
+          """)
+
+    write(joinpath(load_path, "Baz.jl"),
+          """
+          module Baz
+          using Bar
+          Base.Experimental.@overlay Bar.mt sin(x::Int) = 1
+          end
+          """)
+
+     # when referring an method table in another module,
+     # the overlay method needs to be discovered explicitly
+     Bar = Base.require(Main, :Bar)
+     @test length(Bar.mt) == 0
+     Baz = Base.require(Main, :Baz)
+     @test length(Bar.mt) == 1
+finally
+    rm(load_path, recursive=true, force=true)
+    rm(depot_path, recursive=true, force=true)
+    filter!((≠)(load_path), LOAD_PATH)
+    filter!((≠)(depot_path), DEPOT_PATH)
+end
diff --git a/test/compiler/inference.jl b/test/compiler/inference.jl
index 50eb5e9734b353..b400f17cb1fb3a 100644
--- a/test/compiler/inference.jl
+++ b/test/compiler/inference.jl
@@ -7,6 +7,16 @@ isdispatchelem(@nospecialize x) = !isa(x, Type) || Core.Compiler.isdispatchelem(
 using Random, Core.IR
 using InteractiveUtils: code_llvm
 
+include("irutils.jl")
+
+f39082(x::Vararg{T}) where {T <: Number} = x[1]
+let ast = only(code_typed(f39082, Tuple{Vararg{Rational}}))[1]
+    @test ast.slottypes == Any[Const(f39082), Tuple{Vararg{Rational}}]
+end
+let ast = only(code_typed(f39082, Tuple{Rational, Vararg{Rational}}))[1]
+    @test ast.slottypes == Any[Const(f39082), Tuple{Rational, Vararg{Rational}}]
+end
+
 # demonstrate some of the type-size limits
 @test Core.Compiler.limit_type_size(Ref{Complex{T} where T}, Ref, Ref, 100, 0) == Ref
 @test Core.Compiler.limit_type_size(Ref{Complex{T} where T}, Ref{Complex{T} where T}, Ref, 100, 0) == Ref{Complex{T} where T}
@@ -36,6 +46,52 @@ let t = Tuple{Ref{T},T,T} where T, c = Tuple{Ref, T, T} where T # #36407
     @test t <: Core.Compiler.limit_type_size(t, c, Union{}, 1, 100)
 end
 
+# obtain Vararg with 2 undefined fields
+let va = ccall(:jl_type_intersection_with_env, Any, (Any, Any), Tuple{Tuple}, Tuple{Tuple{Vararg{Any, N}}} where N)[2][1]
+    @test Core.Compiler.__limit_type_size(Tuple, va, Core.svec(va, Union{}), 2, 2) === Tuple
+end
+
+# issue #42835
+@test !Core.Compiler.type_more_complex(Int, Any, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Int, Type{Int}, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{Int}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Type{Int}, Core.svec(Type{Int}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Int, Core.svec(Type{Int}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Int}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Int}}}, Type{Type{Int}}, Core.svec(Type{Type{Int}}), 1, 1, 1)
+
+@test  Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(ComplexF32, Any, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(ComplexF32, Type{ComplexF32}, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(Type{Type{ComplexF32}}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Type{Type{ComplexF32}}, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{ComplexF32}, ComplexF32, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{ComplexF32}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, Type{ComplexF32}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{ComplexF32}}, ComplexF32, Core.svec(ComplexF32), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{ComplexF32}}}, Type{Type{ComplexF32}}, Core.svec(Type{ComplexF32}), 1, 1, 1)
+
+# n.b. Type{Type{Union{}} === Type{Core.TypeofBottom}
+@test !Core.Compiler.type_more_complex(Type{Union{}}, Any, Core.svec(), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{Type{Union{}}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Any, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Union{}}}}, Type{Type{Union{}}}, Core.svec(Type{Type{Union{}}}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Type{Type{Type{Union{}}}}}, Type{Type{Type{Union{}}}}, Core.svec(Type{Type{Type{Union{}}}}), 1, 1, 1)
+
+@test !Core.Compiler.type_more_complex(Type{1}, Type{2}, Core.svec(), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test !Core.Compiler.type_more_complex(Type{Union{Float32,Float64}}, Union{Float32,Float64}, Core.svec(Union{Float32,Float64}), 0, 1, 1)
+@test_broken Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Type{Union{Float32,Float64}}, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+@test  Core.Compiler.type_more_complex(Type{<:Union{Float32,Float64}}, Any, Core.svec(Union{Float32,Float64}), 1, 1, 1)
+
+
+let # 40336
+    t = Type{Type{Int}}
+    c = Type{Int}
+    r = Core.Compiler.limit_type_size(t, c, c, 100, 100)
+    @test t !== r && t <: r
+end
+
 @test Core.Compiler.unionlen(Union{}) == 1
 @test Core.Compiler.unionlen(Int8) == 1
 @test Core.Compiler.unionlen(Union{Int8, Int16}) == 2
@@ -95,12 +151,19 @@ tmerge_test(Tuple{ComplexF64, ComplexF64, ComplexF32}, Tuple{Vararg{Union{Comple
 tmerge_test(Tuple{}, Tuple{Complex, Vararg{Union{ComplexF32, ComplexF64}}},
     Tuple{Vararg{Complex}})
 @test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+    Union{Nothing, Tuple{}, Tuple{ComplexF32, ComplexF32}}
+@test Core.Compiler.tmerge(Tuple{}, Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{Vararg{ComplexF32}}}
 @test Core.Compiler.tmerge(Union{Nothing, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
+    Union{Nothing, Tuple{ComplexF32}, Tuple{ComplexF32, ComplexF32}}
+@test Core.Compiler.tmerge(Union{Nothing, Tuple{}, Tuple{ComplexF32}}, Union{Nothing, Tuple{ComplexF32, ComplexF32}}) ==
     Union{Nothing, Tuple{Vararg{ComplexF32}}}
-@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Vector{Bool})) == Vector
+@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Vector{Bool})) ==
+    Union{Vector{Bool}, Vector{Int}, Vector{String}}
+@test Core.Compiler.tmerge(Vector{Int}, Core.Compiler.tmerge(Vector{String}, Union{Vector{Bool}, Vector{Symbol}})) == Vector
 @test Core.Compiler.tmerge(Base.BitIntegerType, Union{}) === Base.BitIntegerType
 @test Core.Compiler.tmerge(Union{}, Base.BitIntegerType) === Base.BitIntegerType
+@test Core.Compiler.tmerge(Core.Compiler.InterConditional(1, Int, Union{}), Core.Compiler.InterConditional(2, String, Union{})) === Core.Compiler.Const(true)
 
 struct SomethingBits
     x::Base.BitIntegerType
@@ -214,6 +277,15 @@ barTuple2() = fooTuple{tuple(:y)}()
           Dict{Int64,Tuple{UnitRange{Int64},UnitRange{Int64}}},
           Core.Compiler.Const(:vals)) == Array{Tuple{UnitRange{Int64},UnitRange{Int64}},1}
 
+# assert robustness of `getfield_tfunc`
+struct GetfieldRobustness
+    field::String
+end
+@test Base.return_types((GetfieldRobustness,String,)) do obj, s
+    t = (10, s) # to form `PartialStruct`
+    getfield(obj, t)
+end |> only === Union{}
+
 # issue #12476
 function f12476(a)
     (k, v) = a
@@ -284,7 +356,7 @@ code_llvm(devnull, invoke_g10878, ())
 @test isa(code_typed(promote,(Any,Any,Vararg{Any})), Array)
 find_tvar10930(sig::Type{T}) where {T<:Tuple} = 1
 function find_tvar10930(arg)
-    if arg<:Tuple
+    if isa(arg, Type) && arg<:Tuple
         find_tvar10930(arg[random_var_name])
     end
     return 1
@@ -627,7 +699,7 @@ function maybe_vararg_tuple_1()
 end
 @test Type{Tuple{Vararg{Int}}} <: Base.return_types(maybe_vararg_tuple_1, ())[1]
 function maybe_vararg_tuple_2()
-    x = Type[Vararg{Int}][1]
+    x = [Vararg{Int}][1]
     Tuple{x}
 end
 @test Type{Tuple{Vararg{Int}}} <: Base.return_types(maybe_vararg_tuple_2, ())[1]
@@ -653,10 +725,10 @@ let fieldtype_tfunc = Core.Compiler.fieldtype_tfunc,
     @test fieldtype_tfunc(Union{Type{Base.RefValue{T}}, Type{Int32}} where {T<:Real}, Const(:x)) == Type{<:Real}
     @test fieldtype_tfunc(Union{Type{Base.RefValue{<:Array}}, Type{Int32}}, Const(:x)) == Type{Array}
     @test fieldtype_tfunc(Union{Type{Base.RefValue{<:Real}}, Type{Int32}}, Const(:x)) == Const(Real)
-    @test fieldtype_tfunc(Const(Union{Base.RefValue{<:Real}, Type{Int32}}), Const(:x)) == Type
-    @test fieldtype_tfunc(Type{Union{Base.RefValue{T}, Type{Int32}}} where {T<:Real}, Const(:x)) == Type
-    @test fieldtype_tfunc(Type{<:Tuple}, Const(1)) == Type
-    @test fieldtype_tfunc(Type{<:Tuple}, Any) == Type
+    @test fieldtype_tfunc(Const(Union{Base.RefValue{<:Real}, Type{Int32}}), Const(:x)) == Const(Real)
+    @test fieldtype_tfunc(Type{Union{Base.RefValue{T}, Type{Int32}}} where {T<:Real}, Const(:x)) == Type{<:Real}
+    @test fieldtype_tfunc(Type{<:Tuple}, Const(1)) == Any
+    @test fieldtype_tfunc(Type{<:Tuple}, Any) == Any
     @test fieldtype_nothrow(Type{Base.RefValue{<:Real}}, Const(:x))
     @test !fieldtype_nothrow(Type{Union{}}, Const(:x))
     @test !fieldtype_nothrow(Union{Type{Base.RefValue{T}}, Int32} where {T<:Real}, Const(:x))
@@ -673,6 +745,7 @@ let fieldtype_tfunc = Core.Compiler.fieldtype_tfunc,
     @test fieldtype_nothrow(Type{Tuple{Vararg{Int}}}, Const(2))
     @test fieldtype_nothrow(Type{Tuple{Vararg{Int}}}, Const(42))
     @test !fieldtype_nothrow(Type{<:Tuple{Vararg{Int}}}, Const(1))
+    @test TypeVar <: fieldtype_tfunc(Any, Any)
 end
 
 # issue #11480
@@ -769,20 +842,23 @@ g11015(::Type{Bool}, ::Bool) = 2.0
 
 # better inference of apply (#20343)
 f20343(::String, ::Int) = 1
-f20343(::Int, ::String, ::Int, ::Int) = 1
-f20343(::Int, ::Int, ::String, ::Int, ::Int, ::Int) = 1
-f20343(::Union{Int,String}...) = Int8(1)
+f20343(::Int, ::String, ::Int, ::Int) = 2
+f20343(::Int, ::Int, ::String, ::Int, ::Int, ::Int) = 3
+f20343(::Int, ::Int, ::Int, ::String, ::Int, ::Int, ::Int, ::Int, ::Int, ::Int, ::Int, ::Int) = 4
+f20343(::Union{Int,String}...) = Int8(5)
 f20343(::Any...) = "no"
 function g20343()
     n = rand(1:3)
-    i = ntuple(i->n==i ? "" : 0, 2n)::Union{Tuple{String,Int},Tuple{Int,String,Int,Int},Tuple{Int,Int,String,Int,Int,Int}}
+    T = Union{Tuple{String, Int}, Tuple{Int, String, Int, Int}, Tuple{Int, Int, String, Int, Int, Int}}
+    i = ntuple(i -> n == i ? "" : 0, 2n)::T
     f20343(i...)
 end
 @test Base.return_types(g20343, ()) == [Int]
 function h20343()
     n = rand(1:3)
-    i = ntuple(i->n==i ? "" : 0, 3)::Union{Tuple{String,Int,Int},Tuple{Int,String,Int},Tuple{Int,Int,String}}
-    f20343(i..., i...)
+    T = Union{Tuple{String, Int, Int}, Tuple{Int, String, Int}, Tuple{Int, Int, String}}
+    i = ntuple(i -> n == i ? "" : 0, 3)::T
+    f20343(i..., i..., i..., i...)
 end
 @test Base.return_types(h20343, ()) == [Union{Int8, Int}]
 function i20343()
@@ -851,7 +927,7 @@ end
 aa20704(x) = x(nothing)
 @test code_typed(aa20704, (typeof(a20704),))[1][1].pure
 
-#issue #21065, elision of _apply when splatted expression is not effect_free
+#issue #21065, elision of _apply_iterate when splatted expression is not effect_free
 function f21065(x,y)
     println("x=$x, y=$y")
     return x, y
@@ -861,7 +937,7 @@ function test_no_apply(expr::Expr)
     return all(test_no_apply, expr.args)
 end
 function test_no_apply(ref::GlobalRef)
-    return ref.mod != Core || ref.name !== :_apply
+    return ref.mod != Core || ref.name !== :_apply_iterate
 end
 test_no_apply(::Any) = true
 @test all(test_no_apply, code_typed(g21065, Tuple{Int,Int})[1].first.code)
@@ -1069,6 +1145,21 @@ end
 @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(1)) === Const(true)
 @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(2)) === Bool
 @test isdefined_tfunc(Tuple{Any,Vararg{Any}}, Const(3)) === Bool
+@testset "isdefined check for `NamedTuple`s" begin
+    # concrete `NamedTuple`s
+    @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:x)) === Const(true)
+    @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:y)) === Const(true)
+    @test isdefined_tfunc(NamedTuple{(:x,:y),Tuple{Int,Int}}, Const(:z)) === Const(false)
+    # non-concrete `NamedTuple`s
+    @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:x)) === Const(true)
+    @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:y)) === Const(true)
+    @test isdefined_tfunc(NamedTuple{(:x,:y),<:Tuple{Int,Any}}, Const(:z)) === Const(false)
+end
+struct UnionIsdefinedA; x; end
+struct UnionIsdefinedB; x; end
+@test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:x)) === Const(true)
+@test isdefined_tfunc(Union{UnionIsdefinedA,UnionIsdefinedB}, Const(:y)) === Const(false)
+@test isdefined_tfunc(Union{UnionIsdefinedA,Nothing}, Const(:x)) === Bool
 
 @noinline map3_22347(f, t::Tuple{}) = ()
 @noinline map3_22347(f, t::Tuple) = (f(t[1]), map3_22347(f, Base.tail(t))...)
@@ -1083,10 +1174,10 @@ end
 
 # issue #22875
 
-typeargs = (Type{Int},)
+typeargs = Tuple{Type{Int},}
 @test Base.Core.Compiler.return_type((args...) -> one(args...), typeargs) === Int
 
-typeargs = (Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int})
+typeargs = Tuple{Type{Int},Type{Int},Type{Int},Type{Int},Type{Int},Type{Int}}
 @test Base.Core.Compiler.return_type(promote_type, typeargs) === Type{Int}
 
 # demonstrate that inference must converge
@@ -1143,14 +1234,9 @@ function get_linfo(@nospecialize(f), @nospecialize(t))
         throw(ArgumentError("argument is not a generic function"))
     end
     # get the MethodInstance for the method match
-    meth = which(f, t)
-    t = Base.to_tuple_type(t)
-    ft = isa(f, Type) ? Type{f} : typeof(f)
-    tt = Tuple{ft, t.parameters...}
-    precompile(tt) # does inference (calls jl_type_infer) on this signature
-    (ti, env) = ccall(:jl_type_intersection_with_env, Ref{Core.SimpleVector}, (Any, Any), tt, meth.sig)
-    return ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance},
-                 (Any, Any, Any), meth, tt, env)
+    match = Base._which(Base.signature_type(f, t), Base.get_world_counter())
+    precompile(match.spec_types)
+    return Core.Compiler.specialize_method(match)
 end
 
 function test_const_return(@nospecialize(f), @nospecialize(t), @nospecialize(val))
@@ -1238,7 +1324,8 @@ end
 push!(constvec, 10)
 @test @inferred(sizeof_constvec()) == sizeof(Int) * 4
 
-test_const_return((x)->isdefined(x, :re), Tuple{ComplexF64}, true)
+test_const_return(x->isdefined(x, :re), Tuple{ComplexF64}, true)
+
 isdefined_f3(x) = isdefined(x, 3)
 @test @inferred(isdefined_f3(())) == false
 @test find_call(first(code_typed(isdefined_f3, Tuple{Tuple{Vararg{Int}}})[1]), isdefined, 3)
@@ -1449,6 +1536,45 @@ using Core.Compiler: typeof_tfunc
 f_typeof_tfunc(x) = typeof(x)
 @test Base.return_types(f_typeof_tfunc, (Union{<:T, Int} where T<:Complex,)) == Any[Union{Type{Int}, Type{Complex{T}} where T<:Real}]
 
+# arrayref / arrayset / arraysize
+import Core.Compiler: Const, arrayref_tfunc, arrayset_tfunc, arraysize_tfunc
+@test arrayref_tfunc(Const(true), Vector{Int}, Int) === Int
+@test arrayref_tfunc(Const(true), Vector{<:Integer}, Int) === Integer
+@test arrayref_tfunc(Const(true), Vector, Int) === Any
+@test arrayref_tfunc(Const(true), Vector{Int}, Int, Vararg{Int}) === Int
+@test arrayref_tfunc(Const(true), Vector{Int}, Vararg{Int}) === Int
+@test arrayref_tfunc(Const(true), Vector{Int}) === Union{}
+@test arrayref_tfunc(Const(true), String, Int) === Union{}
+@test arrayref_tfunc(Const(true), Vector{Int}, Float64) === Union{}
+@test arrayref_tfunc(Int, Vector{Int}, Int) === Union{}
+@test arrayset_tfunc(Const(true), Vector{Int}, Int, Int) === Vector{Int}
+let ua = Vector{<:Integer}
+    @test arrayset_tfunc(Const(true), ua, Int, Int) === ua
+end
+@test arrayset_tfunc(Const(true), Vector, Int, Int) === Vector
+@test arrayset_tfunc(Const(true), Any, Int, Int) === Any
+@test arrayset_tfunc(Const(true), Vector{String}, String, Int, Vararg{Int}) === Vector{String}
+@test arrayset_tfunc(Const(true), Vector{String}, String, Vararg{Int}) === Vector{String}
+@test arrayset_tfunc(Const(true), Vector{String}, String) === Union{}
+@test arrayset_tfunc(Const(true), String, Char, Int) === Union{}
+@test arrayset_tfunc(Const(true), Vector{Int}, Int, Float64) === Union{}
+@test arrayset_tfunc(Int, Vector{Int}, Int, Int) === Union{}
+@test arrayset_tfunc(Const(true), Vector{Int}, Float64, Int) === Union{}
+@test arraysize_tfunc(Vector, Int) === Int
+@test arraysize_tfunc(Vector, Float64) === Union{}
+@test arraysize_tfunc(String, Int) === Union{}
+
+let tuple_tfunc
+    function tuple_tfunc(@nospecialize xs...)
+        return Core.Compiler.tuple_tfunc(Any[xs...])
+    end
+    @test Core.Compiler.widenconst(tuple_tfunc(Type{Int})) === Tuple{DataType}
+    # https://github.com/JuliaLang/julia/issues/44705
+    @test tuple_tfunc(Union{Type{Int32},Type{Int64}}) === Tuple{Type}
+    @test tuple_tfunc(DataType) === Tuple{DataType}
+    @test tuple_tfunc(UnionAll) === Tuple{UnionAll}
+end
+
 function f23024(::Type{T}, ::Int) where T
     1 + 1
 end
@@ -1492,7 +1618,6 @@ let linfo = get_linfo(Base.convert, Tuple{Type{Int64}, Int32}),
     @test opt.src.ssavaluetypes isa Vector{Any}
     @test !opt.src.inferred
     @test opt.mod === Base
-    @test opt.nargs == 3
 end
 
 # approximate static parameters due to unions
@@ -1519,12 +1644,12 @@ f_pure_add() = (1 + 1 == 2) ? true : "FAIL"
 @test @inferred f_pure_add()
 
 # inference of `T.mutable`
-@test Core.Compiler.getfield_tfunc(Const(Int), Const(:mutable)) == Const(false)
-@test Core.Compiler.getfield_tfunc(Const(Vector{Int}), Const(:mutable)) == Const(true)
-@test Core.Compiler.getfield_tfunc(DataType, Const(:mutable)) == Bool
+@test Core.Compiler.getfield_tfunc(Const(Int.name), Const(:flags)) == Const(0x4)
+@test Core.Compiler.getfield_tfunc(Const(Vector{Int}.name), Const(:flags)) == Const(0x2)
+@test Core.Compiler.getfield_tfunc(Core.TypeName, Const(:flags)) == UInt8
 
 # getfield on abstract named tuples. issue #32698
-import Core.Compiler.getfield_tfunc
+import Core.Compiler: getfield_tfunc, Const
 @test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
                      Const(:y)) == Union{Missing, Float64}
 @test getfield_tfunc(NamedTuple{(:id, :y), T} where {T <: Tuple{Int, Union{Float64, Missing}}},
@@ -1538,6 +1663,130 @@ import Core.Compiler.getfield_tfunc
 @test getfield_tfunc(NamedTuple{<:Any, T} where {T <: Tuple{Int, Union{Float64, Missing}}},
                      Const(:x)) == Union{Missing, Float64, Int}
 
+mutable struct ARef{T}
+    @atomic x::T
+end
+@test getfield_tfunc(ARef{Int},Const(:x),Symbol) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Bool) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Symbol,Bool) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Symbol,Vararg{Symbol}) === Int # `Vararg{Symbol}` might be empty
+@test getfield_tfunc(ARef{Int},Const(:x),Vararg{Symbol}) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Any,) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Any,Any) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Any,Vararg{Any}) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Vararg{Any}) === Int
+@test getfield_tfunc(ARef{Int},Const(:x),Int) === Union{}
+@test getfield_tfunc(ARef{Int},Const(:x),Bool,Symbol) === Union{}
+@test getfield_tfunc(ARef{Int},Const(:x),Symbol,Symbol) === Union{}
+@test getfield_tfunc(ARef{Int},Const(:x),Bool,Bool) === Union{}
+
+import Core.Compiler: setfield!_tfunc, setfield!_nothrow, Const
+mutable struct XY{X,Y}
+    x::X
+    y::Y
+end
+mutable struct ABCDconst
+    const a
+    const b::Int
+    c
+    const d::Union{Int,Nothing}
+end
+@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int) === Int
+@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Int, Symbol) === Int
+@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int) === Int
+@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Int, Symbol) === Int
+@test setfield!_tfunc(Base.RefValue{Int}, Int, Int) === Int
+@test setfield!_tfunc(Base.RefValue{Any}, Const(:x), Int) === Int
+@test setfield!_tfunc(Base.RefValue{Any}, Const(:x), Int, Symbol) === Int
+@test setfield!_tfunc(Base.RefValue{Any}, Const(1), Int) === Int
+@test setfield!_tfunc(Base.RefValue{Any}, Const(1), Int, Symbol) === Int
+@test setfield!_tfunc(Base.RefValue{Any}, Int, Int) === Int
+@test setfield!_tfunc(XY{Any,Any}, Const(1), Int) === Int
+@test setfield!_tfunc(XY{Any,Any}, Const(2), Float64) === Float64
+@test setfield!_tfunc(XY{Int,Float64}, Const(1), Int) === Int
+@test setfield!_tfunc(XY{Int,Float64}, Const(2), Float64) === Float64
+@test setfield!_tfunc(ABCDconst, Const(:c), Any) === Any
+@test setfield!_tfunc(ABCDconst, Const(3), Any) === Any
+@test setfield!_tfunc(ABCDconst, Symbol, Any) === Any
+@test setfield!_tfunc(ABCDconst, Int, Any) === Any
+@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Const(:x), Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Const(1), Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Symbol, Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue{Any},Some{Any}}, Int, Int) === Int
+@test setfield!_tfunc(Union{Base.RefValue,Some{Any}}, Int, Int) === Int
+@test setfield!_tfunc(Any, Symbol, Int) === Int
+@test setfield!_tfunc(Any, Int, Int) === Int
+@test setfield!_tfunc(Any, Any, Int) === Int
+@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Float64) === Union{}
+@test setfield!_tfunc(Base.RefValue{Int}, Const(:x), Float64, Symbol) === Union{}
+@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Float64) === Union{}
+@test setfield!_tfunc(Base.RefValue{Int}, Const(1), Float64, Symbol) === Union{}
+@test setfield!_tfunc(Base.RefValue{Int}, Int, Float64) === Union{}
+@test setfield!_tfunc(Base.RefValue{Any}, Const(:y), Int) === Union{}
+@test setfield!_tfunc(Base.RefValue{Any}, Const(:y), Int, Bool) === Union{}
+@test setfield!_tfunc(Base.RefValue{Any}, Const(2), Int) === Union{}
+@test setfield!_tfunc(Base.RefValue{Any}, Const(2), Int, Bool) === Union{}
+@test setfield!_tfunc(Base.RefValue{Any}, String, Int) === Union{}
+@test setfield!_tfunc(Some{Any}, Const(:value), Int) === Union{}
+@test setfield!_tfunc(Some, Const(:value), Int) === Union{}
+@test setfield!_tfunc(Some{Any}, Const(1), Int) === Union{}
+@test setfield!_tfunc(Some, Const(1), Int) === Union{}
+@test setfield!_tfunc(Some{Any}, Symbol, Int) === Union{}
+@test setfield!_tfunc(Some, Symbol, Int) === Union{}
+@test setfield!_tfunc(Some{Any}, Int, Int) === Union{}
+@test setfield!_tfunc(Some, Int, Int) === Union{}
+@test setfield!_tfunc(Const(@__MODULE__), Const(:v), Int) === Union{}
+@test setfield!_tfunc(Const(@__MODULE__), Int, Int) === Union{}
+@test setfield!_tfunc(Module, Const(:v), Int) === Union{}
+@test setfield!_tfunc(Union{Module,Base.RefValue{Any}}, Const(:v), Int) === Union{}
+@test setfield!_tfunc(ABCDconst, Const(:a), Any) === Union{}
+@test setfield!_tfunc(ABCDconst, Const(:b), Any) === Union{}
+@test setfield!_tfunc(ABCDconst, Const(:d), Any) === Union{}
+@test setfield!_tfunc(ABCDconst, Const(1), Any) === Union{}
+@test setfield!_tfunc(ABCDconst, Const(2), Any) === Union{}
+@test setfield!_tfunc(ABCDconst, Const(4), Any) === Union{}
+@test setfield!_nothrow(Base.RefValue{Int}, Const(:x), Int)
+@test setfield!_nothrow(Base.RefValue{Int}, Const(1), Int)
+@test setfield!_nothrow(Base.RefValue{Any}, Const(:x), Int)
+@test setfield!_nothrow(Base.RefValue{Any}, Const(1), Int)
+@test setfield!_nothrow(XY{Any,Any}, Const(:x), Int)
+@test setfield!_nothrow(XY{Any,Any}, Const(:x), Any)
+@test setfield!_nothrow(XY{Int,Float64}, Const(:x), Int)
+@test setfield!_nothrow(ABCDconst, Const(:c), Any)
+@test setfield!_nothrow(ABCDconst, Const(3), Any)
+@test !setfield!_nothrow(XY{Int,Float64}, Symbol, Any)
+@test !setfield!_nothrow(XY{Int,Float64}, Int, Any)
+@test !setfield!_nothrow(Base.RefValue{Int}, Const(:x), Any)
+@test !setfield!_nothrow(Base.RefValue{Int}, Const(1), Any)
+@test !setfield!_nothrow(Any[Base.RefValue{Any}, Const(:x), Int, Symbol])
+@test !setfield!_nothrow(Base.RefValue{Any}, Symbol, Int)
+@test !setfield!_nothrow(Base.RefValue{Any}, Int, Int)
+@test !setfield!_nothrow(XY{Int,Float64}, Const(:y), Int)
+@test !setfield!_nothrow(XY{Int,Float64}, Symbol, Int)
+@test !setfield!_nothrow(XY{Int,Float64}, Int, Int)
+@test !setfield!_nothrow(ABCDconst, Const(:a), Any)
+@test !setfield!_nothrow(ABCDconst, Const(:b), Any)
+@test !setfield!_nothrow(ABCDconst, Const(:d), Any)
+@test !setfield!_nothrow(ABCDconst, Symbol, Any)
+@test !setfield!_nothrow(ABCDconst, Const(1), Any)
+@test !setfield!_nothrow(ABCDconst, Const(2), Any)
+@test !setfield!_nothrow(ABCDconst, Const(4), Any)
+@test !setfield!_nothrow(ABCDconst, Int, Any)
+@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(:x), Int)
+@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(:x), Int)
+@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Const(1), Int)
+@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Const(1), Int)
+@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Symbol, Int)
+@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Symbol, Int)
+@test !setfield!_nothrow(Union{Base.RefValue{Any},Some{Any}}, Int, Int)
+@test !setfield!_nothrow(Union{Base.RefValue,Some{Any}}, Int, Int)
+@test !setfield!_nothrow(Any, Symbol, Int)
+@test !setfield!_nothrow(Any, Int, Int)
+@test !setfield!_nothrow(Any, Any, Int)
+
 struct Foo_22708
     x::Ptr{Foo_22708}
 end
@@ -1705,6 +1954,201 @@ for expr25261 in opt25261[i:end]
 end
 @test foundslot
 
+@testset "inter-procedural conditional constraint propagation" begin
+    # simple cases
+    isaint(a) = isa(a, Int)
+    @test Base.return_types((Any,)) do a
+        isaint(a) && return a # a::Int
+        return 0
+    end == Any[Int]
+    eqnothing(a) = a === nothing
+    @test Base.return_types((Union{Nothing,Int},)) do a
+        eqnothing(a) && return 0
+        return a # a::Int
+    end == Any[Int]
+
+    # more complicated cases
+    ispositive(a) = isa(a, Int) && a > 0
+    @test Base.return_types((Any,)) do a
+        ispositive(a) && return a # a::Int
+        return 0
+    end == Any[Int]
+    global isaint2
+    isaint2(a::Int)           = true
+    isaint2(@nospecialize(_)) = false
+    @test Base.return_types((Any,)) do a
+        isaint2(a) && return a # a::Int
+        return 0
+    end == Any[Int]
+    global ispositive2
+    ispositive2(a::Int)           = a > 0
+    ispositive2(@nospecialize(_)) = false
+    @test Base.return_types((Any,)) do a
+        ispositive2(a) && return a # a::Int
+        return 0
+    end == Any[Int]
+
+    # type constraints from multiple constant boolean return types
+    function f(x)
+        isa(x, Int) && return true
+        isa(x, Symbol) && return true
+        return false
+    end
+    @test Base.return_types((Any,)) do x
+        f(x) && return x # x::Union{Int,Symbol}
+        return nothing
+    end == Any[Union{Int,Symbol,Nothing}]
+
+    # constraint on non-vararg argument of `isva` method
+    isaint_isvapositive(a, va...) = isa(a, Int) && sum(va) > 0
+    @test Base.return_types((Any,Int,Int)) do a, b, c
+        isaint_isvapositive(a, b, c) && return a # a::Int
+        0
+    end == Any[Int]
+
+    # slot as SSA
+    isaT(x, T) = isa(x, T)
+    @test Base.return_types((Any,Int)) do a, b
+        c = a
+        if isaT(c, typeof(b))
+            return c # c::Int
+        end
+        return 0
+    end |> only === Int
+
+    # with Base functions
+    @test Base.return_types((Any,)) do a
+        Base.Fix2(isa, Int)(a) && return a # a::Int
+        return 0
+    end == Any[Int]
+    @test Base.return_types((Union{Nothing,Int},)) do a
+        isnothing(a) && return 0
+        return a # a::Int
+    end == Any[Int]
+    @test Base.return_types((Union{Missing,Int},)) do a
+        ismissing(a) && return 0
+        return a # a::Int
+    end == Any[Int]
+    @test Base.return_types((Any,)) do x
+        Meta.isexpr(x, :call) && return x # x::Expr
+        return nothing
+    end == Any[Union{Nothing,Expr}]
+
+    # handle the edge case
+    let ts = @eval Module() begin
+            edgecase(_) = $(Core.Compiler.InterConditional(2, Int, Any))
+            # create cache
+            Base.return_types(edgecase, (Any,))
+            Base.return_types((Any,)) do x
+                edgecase(x) ? x : nothing # ::Any
+            end
+        end
+        @test ts == Any[Any]
+    end
+
+    # a tricky case: if constant inference derives `Const` while non-constant inference has
+    # derived `InterConditional`, we should not discard that constant information
+    iszero_simple(x) = x === 0
+    @test Base.return_types() do
+        iszero_simple(0) ? nothing : missing
+    end |> only === Nothing
+end
+
+@testset "branching on conditional object" begin
+    # simple
+    @test Base.return_types((Union{Nothing,Int},)) do a
+        b = a === nothing
+        return b ? 0 : a # ::Int
+    end == Any[Int]
+
+    # can use multiple times (as far as the subject of condition hasn't changed)
+    @test Base.return_types((Union{Nothing,Int},)) do a
+        b = a === nothing
+        c = b ? 0 : a # c::Int
+        d = !b ? a : 0 # d::Int
+        return c, d # ::Tuple{Int,Int}
+    end == Any[Tuple{Int,Int}]
+
+    # should invalidate old constraint when the subject of condition has changed
+    @test Base.return_types((Union{Nothing,Int},)) do a
+        cond = a === nothing
+        r1 = cond ? 0 : a # r1::Int
+        a = 0
+        r2 = cond ? a : 1 # r2::Int, not r2::Union{Nothing,Int}
+        return r1, r2 # ::Tuple{Int,Int}
+    end == Any[Tuple{Int,Int}]
+end
+
+# https://github.com/JuliaLang/julia/issues/42090#issuecomment-911824851
+# `PartialStruct` shoudln't wrap `Conditional`
+let M = Module()
+    @eval M begin
+        struct BePartialStruct
+            val::Int
+            cond
+        end
+    end
+
+    rt = @eval M begin
+        Base.return_types((Union{Nothing,Int},)) do a
+            cond = a === nothing
+            obj = $(Expr(:new, M.BePartialStruct, 42, :cond))
+            r1 = getfield(obj, :cond) ? 0 : a # r1::Union{Nothing,Int}, not r1::Int (because PartialStruct doesn't wrap Conditional)
+            a = $(gensym(:anyvar))::Any
+            r2 = getfield(obj, :cond) ? a : nothing # r2::Any, not r2::Const(nothing) (we don't need to worry about constraint invalidation here)
+            return r1, r2 # ::Tuple{Union{Nothing,Int},Any}
+        end |> only
+    end
+    @test rt == Tuple{Union{Nothing,Int},Any}
+end
+
+@testset "conditional constraint propagation from non-`Conditional` object" begin
+    @test Base.return_types((Bool,)) do b
+        if b
+            return !b ? nothing : 1 # ::Int
+        else
+            return 0
+        end
+    end == Any[Int]
+
+    @test Base.return_types((Any,)) do b
+        if b
+            return b # ::Bool
+        else
+            return nothing
+        end
+    end == Any[Union{Bool,Nothing}]
+end
+
+@testset "`from_interprocedural!`: translate inter-procedural information" begin
+    # TODO come up with a test case to check the functionality of `collect_limitations!`
+    # one heavy test case would be to use https://github.com/aviatesk/JET.jl and
+    # check `julia /path/to/JET/jet /path/to/JET/src/JET.jl` doesn't result in errors
+    # because of nested `LimitedAccuracy`es
+
+    # `InterConditional` handling: `abstract_invoke`
+    ispositive(a) = isa(a, Int) && a > 0
+    @test Base.return_types((Any,)) do a
+        if Base.@invoke ispositive(a::Any)
+            return a
+        end
+        return 0
+    end |> only == Int
+    # the `fargs = nothing` edge case
+    @test Base.return_types((Any,)) do a
+        Core.Compiler.return_type(invoke, Tuple{typeof(ispositive), Type{Tuple{Any}}, Any})
+    end |> only == Type{Bool}
+
+    # `InterConditional` handling: `abstract_call_opaque_closure`
+    @test Base.return_types((Any,)) do a
+        f = Base.Experimental.@opaque a -> isa(a, Int) && a > 0
+        if f(a)
+            return a
+        end
+        return 0
+    end |> only === Int
+end
+
 function f25579(g)
     h = g[]
     t = (h === nothing)
@@ -1806,6 +2250,67 @@ function _g_ifelse_isa_()
 end
 @test Base.return_types(_g_ifelse_isa_, ()) == [Int]
 
+@testset "Conditional forwarding" begin
+    # forward `Conditional` if it conveys a constraint on any other argument
+    ifelselike(cnd, x, y) = cnd ? x : y
+
+    @test Base.return_types((Any,Int,)) do x, y
+        ifelselike(isa(x, Int), x, y)
+    end |> only == Int
+
+    # should work nicely with union-split
+    @test Base.return_types((Union{Int,Nothing},)) do x
+        ifelselike(isa(x, Int), x, 0)
+    end |> only == Int
+
+    @test Base.return_types((Any,Int)) do x, y
+        ifelselike(!isa(x, Int), y, x)
+    end |> only == Int
+
+    @test Base.return_types((Any,Int)) do x, y
+        a = ifelselike(x === 0, x, 0) # ::Const(0)
+        if a == 0
+            return y
+        else
+            return nothing # dead branch
+        end
+    end |> only == Int
+
+    # pick up the first if there are multiple constrained arguments
+    @test Base.return_types((Any,)) do x
+        ifelselike(isa(x, Int), x, x)
+    end |> only == Any
+
+    # just propagate multiple constraints
+    ifelselike2(cnd1, cnd2, x, y, z) = cnd1 ? x : cnd2 ? y : z
+    @test Base.return_types((Any,Any)) do x, y
+        ifelselike2(isa(x, Int), isa(y, Int), x, y, 0)
+    end |> only == Int
+
+    # work with `invoke`
+    @test Base.return_types((Any,Any)) do x, y
+        Base.@invoke ifelselike(isa(x, Int), x, y::Int)
+    end |> only == Int
+
+    # don't be confused with vararg method
+    vacond(cnd, va...) = cnd ? va : 0
+    @test Base.return_types((Any,)) do x
+        # at runtime we will see `va::Tuple{Tuple{Int,Int}, Tuple{Int,Int}}`
+        vacond(isa(x, Tuple{Int,Int}), x, x)
+    end |> only == Union{Int,Tuple{Any,Any}}
+
+    # demonstrate extra constraint propagation for Base.ifelse
+    @test Base.return_types((Any,Int,)) do x, y
+        ifelse(isa(x, Int), x, y)
+    end |> only == Int
+
+    # slot as SSA
+    @test Base.return_types((Any,Vector{Any})) do x, y
+        z = x
+        ifelselike(isa(z, Int), z, length(y))
+    end |> only === Int
+end
+
 # Equivalence of Const(T.instance) and T for singleton types
 @test Const(nothing) ⊑ Nothing && Nothing ⊑ Const(nothing)
 
@@ -1815,8 +2320,8 @@ end
 @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any,Any}) == Type
 @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Int}) == Union{}
 @test Core.Compiler.return_type(Core.apply_type, Tuple{Type{Union},Any,Int}) == Union{}
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Any}) == Type
-@test Core.Compiler.return_type(Core.apply_type, Tuple{Any,Any}) == Type
+@test Core.Compiler.return_type(Core.apply_type, Tuple{Any}) == Any
+@test Core.Compiler.return_type(Core.apply_type, Tuple{Any,Any}) == Any
 
 # PR 27351, make sure optimized type intersection for method invalidation handles typevars
 
@@ -2027,6 +2532,7 @@ T27078 = Vector{Vector{T}} where T
 # issue #28070
 g28070(f, args...) = f(args...)
 @test @inferred g28070(Core._apply, Base.:/, (1.0, 1.0)) == 1.0
+@test @inferred g28070(Core._apply_iterate, Base.iterate, Base.:/, (1.0, 1.0)) == 1.0
 
 # issue #28079
 struct Foo28079 end
@@ -2063,12 +2569,10 @@ code28279 = code_lowered(f28279, (Bool,))[1].code
 oldcode28279 = deepcopy(code28279)
 ssachangemap = fill(0, length(code28279))
 labelchangemap = fill(0, length(code28279))
-worklist = Int[]
 let i
     for i in 1:length(code28279)
         stmt = code28279[i]
         if isa(stmt, GotoIfNot)
-            push!(worklist, i)
             ssachangemap[i] = 1
             if i < length(code28279)
                 labelchangemap[i + 1] = 1
@@ -2099,7 +2603,11 @@ end
 # issue #28356
 # unit test to make sure countunionsplit overflows gracefully
 # we don't care what number is returned as long as it's large
-@test Core.Compiler.countunionsplit(Any[Union{Int32,Int64} for i=1:80]) > 100000
+@test Core.Compiler.unionsplitcost(Any[Union{Int32, Int64} for i=1:80]) > 100000
+@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}]) == 2
+@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32, Int64}, Int8]) == 8
+@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32, Int64}, Union{Int8, Int16, Int32}, Int8]) == 6
+@test Core.Compiler.unionsplitcost(Any[Union{Int8, Int16, Int32}, Union{Int8, Int16, Int32, Int64}, Int8]) == 6
 
 # make sure compiler doesn't hang in union splitting
 
@@ -2280,9 +2788,9 @@ end
 
 @test @inferred(g28955((1,), 1.0)) === Bool
 
-# Test that inlining can look through repeated _applys
+# Test that inlining can look through repeated _apply_iterates
 foo_inlining_apply(args...) = ccall(:jl_, Nothing, (Any,), args[1])
-bar_inlining_apply() = Core._apply(Core._apply, (foo_inlining_apply,), ((1,),))
+bar_inlining_apply() = Core._apply_iterate(iterate, Core._apply_iterate, (iterate,), (foo_inlining_apply,), ((1,),))
 let ci = code_typed(bar_inlining_apply, Tuple{})[1].first
     @test length(ci.code) == 2
     @test ci.code[1].head == :foreigncall
@@ -2521,7 +3029,7 @@ const DenseIdx = Union{IntRange,Integer}
     foo_26724((result..., length(r)), I...)
 @test @inferred(foo_26724((), 1:4, 1:5, 1:6)) === (4, 5, 6)
 
-# Non uniformity in expresions with PartialTypeVar
+# Non uniformity in expressions with PartialTypeVar
 @test Core.Compiler.:⊑(Core.Compiler.PartialTypeVar(TypeVar(:N), true, true), TypeVar)
 let N = TypeVar(:N)
     @test Core.Compiler.apply_type_nothrow([Core.Compiler.Const(NTuple),
@@ -2623,7 +3131,23 @@ end
 
 f() = _foldl_iter(step, (Missing[],), [0.0], 1)
 end
-@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}) == Tuple{Int}
+@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 0) == Tuple{Int}
+@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 1) == Tuple{Int}
+@test Core.Compiler.typesubtract(Tuple{Union{Int,Char}}, Tuple{Char}, 2) == Tuple{Int}
+@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 0) ==
+        Tuple{Int, Union{Char, Int}, Union{Char, Int}}
+@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, Tuple{Char, Any, Any}, 10) ==
+        Union{Tuple{Int, Char, Char}, Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
+@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 0) ==
+        NTuple{3, Union{Int, Char}}
+@test Core.Compiler.typesubtract(NTuple{3, Union{Int, Char}}, NTuple{3, Char}, 10) ==
+        Union{Tuple{Char, Char, Int}, Tuple{Char, Int, Char}, Tuple{Char, Int, Int}, Tuple{Int, Char, Char},
+              Tuple{Int, Char, Int}, Tuple{Int, Int, Char}, Tuple{Int, Int, Int}}
+# Test that these don't throw
+@test Core.Compiler.typesubtract(Tuple{Vararg{Int}}, Tuple{Vararg{Char}}, 0) == Tuple{Vararg{Int}}
+@test Core.Compiler.typesubtract(NTuple{3, Real}, NTuple{3, Char}, 0) == NTuple{3, Real}
+@test Core.Compiler.typesubtract(NTuple{3, Union{Real, Char}}, NTuple{2, Char}, 0) == NTuple{3, Union{Real, Char}}
+
 @test Base.return_types(Issue35566.f) == [Val{:expected}]
 
 # constant prop through keyword arguments
@@ -2633,11 +3157,12 @@ _use_unstable_kw_2() = _unstable_kw(x = 2, y = rand())
 @test Base.return_types(_use_unstable_kw_1) == Any[String]
 @test Base.return_types(_use_unstable_kw_2) == Any[String]
 @eval struct StructWithSplatNew
-    x::Int
+    x::String
     StructWithSplatNew(t) = $(Expr(:splatnew, :StructWithSplatNew, :t))
 end
 _construct_structwithsplatnew() = StructWithSplatNew(("",))
 @test Base.return_types(_construct_structwithsplatnew) == Any[StructWithSplatNew]
+@test isa(_construct_structwithsplatnew(), StructWithSplatNew)
 
 # case where a call cycle can be broken by constant propagation
 struct NotQRSparse
@@ -2697,6 +3222,21 @@ function symcmp36230(vec)
 end
 @test Base.return_types(symcmp36230, (Vector{Any},)) == Any[Bool]
 
+function foo42190(r::Union{Nothing,Int}, n::Int)
+    while r !== nothing && r < n
+        return r # `r::Int`
+    end
+    return n
+end
+@test Base.return_types(foo42190, (Union{Nothing, Int}, Int)) == Any[Int]
+function bar42190(r::Union{Nothing,Int}, n::Int)
+    while r === nothing || r < n
+        return n
+    end
+    return r # `r::Int`
+end
+@test Base.return_types(bar42190, (Union{Nothing, Int}, Int)) == Any[Int]
+
 # Issue #36531, double varargs in abstract_iteration
 f36531(args...) = tuple((args...)...)
 @test @inferred(f36531(1,2,3)) == (1,2,3)
@@ -2708,9 +3248,24 @@ partial_return_2(x) = Val{partial_return_1(x)[2]}
 
 @test Base.return_types(partial_return_2, (Int,)) == Any[Type{Val{1}}]
 
-# Precision of abstract_iteration
+# Soundness and precision of abstract_iteration
+f41839() = (1:100...,)
+@test NTuple{100,Int} <: only(Base.return_types(f41839, ())) <: Tuple{Vararg{Int}}
 f_splat(x) = (x...,)
 @test Base.return_types(f_splat, (Pair{Int,Int},)) == Any[Tuple{Int, Int}]
+@test Base.return_types(f_splat, (UnitRange{Int},)) == Any[Tuple{Vararg{Int}}]
+struct Itr41839_1 end # empty or infinite
+Base.iterate(::Itr41839_1) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_1, ::Nothing) = (nothing, nothing)
+@test Base.return_types(f_splat, (Itr41839_1,)) == Any[Tuple{}]
+struct Itr41839_2 end # empty or failing
+Base.iterate(::Itr41839_2) = rand(Bool) ? (nothing, nothing) : nothing
+Base.iterate(::Itr41839_2, ::Nothing) = error()
+@test Base.return_types(f_splat, (Itr41839_2,)) == Any[Tuple{}]
+struct Itr41839_3 end
+Base.iterate(::Itr41839_3 ) = rand(Bool) ? nothing : (nothing, 1)
+Base.iterate(::Itr41839_3 , i) = i < 16 ? (i, i + 1) : nothing
+@test only(Base.return_types(f_splat, (Itr41839_3,))) <: Tuple{Vararg{Union{Nothing, Int}}}
 
 # issue #32699
 f32699(a) = (id = a[1],).id
@@ -2782,11 +3337,12 @@ end
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Any, Vararg}) == Bool
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Any, Any, Vararg}) == Bool
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(===), Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Vararg{Symbol}}) == Symbol
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Vararg{Symbol}}) == Union{}
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Vararg{Symbol}}) == Symbol
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Vararg{Integer}}) == Integer
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Vararg}) == Integer
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Vararg}) == Union{}
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Vararg}) == Integer
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(setfield!), Any, Symbol, Integer, Any, Any, Vararg}) == Union{}
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Vararg}) == Expr
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Any, Vararg}) == Expr
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(Core._expr), Any, Any, Vararg}) == Expr
@@ -2797,17 +3353,783 @@ end
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Vararg}) == Int
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Vararg}) == Int
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Vararg}) == Int
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Any, Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Vararg}) == Type
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Vararg}) == Type
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Vararg}) == Type
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Vararg}) == Type
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Tuple{Int}, Any, Any, Any, Vararg}) == Int
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(getfield), Any, Any, Any, Any, Any, Vararg}) == Union{}
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Vararg}) == Any
 @test Core.Compiler.return_type(apply26826, Tuple{typeof(fieldtype), Any, Any, Any, Any, Vararg}) == Union{}
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Vararg}) == Type
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Vararg}) == Type
-@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Any, Vararg}) == Type
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Vararg}) == Any
+@test Core.Compiler.return_type(apply26826, Tuple{typeof(Core.apply_type), Any, Any, Vararg}) == Any
 f_apply_cglobal(args...) = cglobal(args...)
 @test Core.Compiler.return_type(f_apply_cglobal, Tuple{Vararg{Type{Int}}}) == Ptr
 @test Core.Compiler.return_type(f_apply_cglobal, Tuple{Any, Vararg{Type{Int}}}) == Ptr
 @test Core.Compiler.return_type(f_apply_cglobal, Tuple{Any, Type{Int}, Vararg{Type{Int}}}) == Ptr{Int}
 @test Core.Compiler.return_type(f_apply_cglobal, Tuple{Any, Type{Int}, Type{Int}, Vararg{Type{Int}}}) == Union{}
+
+# issue #37532
+@test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{Int}}, Int])
+@test Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr{T}} where T, Ptr])
+@test !Core.Compiler.intrinsic_nothrow(Core.bitcast, Any[Type{Ptr}, Ptr])
+f37532(T, x) = (Core.bitcast(Ptr{T}, x); x)
+@test Base.return_types(f37532, Tuple{Any, Int}) == Any[Int]
+
+# PR #37749
+# Helper functions for Core.Compiler.Timings. These are normally accessed via a package -
+# usually (SnoopCompileCore).
+function time_inference(f)
+    Core.Compiler.Timings.reset_timings()
+    Core.Compiler.__set_measure_typeinf(true)
+    f()
+    Core.Compiler.__set_measure_typeinf(false)
+    Core.Compiler.Timings.close_current_timer()
+    return Core.Compiler.Timings._timings[1]
+end
+function depth(t::Core.Compiler.Timings.Timing)
+    maximum(depth.(t.children), init=0) + 1
+end
+function flatten_times(t::Core.Compiler.Timings.Timing)
+    collect(Iterators.flatten([(t.time => t.mi_info,), flatten_times.(t.children)...]))
+end
+# Some very limited testing of timing the type inference (#37749).
+@testset "Core.Compiler.Timings" begin
+    # Functions that call each other
+    @eval module M1
+        i(x) = x+5
+        i2(x) = x+2
+        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
+        g(y::Integer, x) = h(Any[y]) + Int(x)
+    end
+    timing1 = time_inference() do
+        @eval M1.g(2, 3.0)
+    end
+    @test occursin(r"Core.Compiler.Timings.Timing\(InferenceFrameInfo for Core.Compiler.Timings.ROOT\(\)\) with \d+ children", sprint(show, timing1))
+    # The last two functions to be inferred should be `i` and `i2`, inferred at runtime with
+    # their concrete types.
+    @test sort([mi_info.mi.def.name for (time,mi_info) in flatten_times(timing1)[end-1:end]]) == [:i, :i2]
+    @test all(child->isa(child.bt, Vector), timing1.children)
+    @test all(child->child.bt===nothing, timing1.children[1].children)
+    # Test the stacktrace
+    @test isa(stacktrace(timing1.children[1].bt), Vector{Base.StackTraces.StackFrame})
+    # Test that inference has cached some of the Method Instances
+    timing2 = time_inference() do
+        @eval M1.g(2, 3.0)
+    end
+    @test length(flatten_times(timing2)) < length(flatten_times(timing1))
+    # Printing of InferenceFrameInfo for mi.def isa Module
+    @eval module M2
+        i(x) = x+5
+        i2(x) = x+2
+        h(a::Array) = i2(a[1]::Integer) + i(a[1]::Integer) + 2
+        g(y::Integer, x) = h(Any[y]) + Int(x)
+    end
+    # BEGIN LINE NUMBER SENSITIVITY (adjust the line offset below as needed)
+    timingmod = time_inference() do
+        @eval @testset "Outer" begin
+            @testset "Inner" begin
+                for i = 1:2 M2.g(2, 3.0) end
+            end
+        end
+    end
+    @test occursin("thunk from $(@__MODULE__) starting at $(@__FILE__):$((@__LINE__) - 5)", string(timingmod.children))
+    # END LINE NUMBER SENSITIVITY
+
+    # Recursive function
+    @eval module _Recursive f(n::Integer) = n == 0 ? 0 : f(n-1) + 1 end
+    timing = time_inference() do
+        @eval _Recursive.f(Base.inferencebarrier(5))
+    end
+    @test 2 <= depth(timing) <= 3  # root -> f (-> +)
+    @test 2 <= length(flatten_times(timing)) <= 3  # root, f, +
+
+    # Functions inferred with multiple constants
+    @eval module C
+        i(x) = x === 0 ? 0 : 1 / x
+        a(x) = i(0) * i(x)
+        b() = i(0) * i(1) * i(0)
+        function loopc(n)
+            s = 0
+            for i = 1:n
+                s += i
+            end
+            return s
+        end
+        call_loopc() = loopc(5)
+        myfloor(::Type{T}, x) where T = floor(T, x)
+        d(x) = myfloor(Int16, x)
+    end
+    timing = time_inference() do
+        @eval C.a(2)
+        @eval C.b()
+        @eval C.call_loopc()
+        @eval C.d(3.2)
+    end
+    ft = flatten_times(timing)
+    @test !isempty(ft)
+    str = sprint(show, ft)
+    @test occursin("InferenceFrameInfo for /(1::$Int, ::$Int)", str)  # inference constants
+    @test occursin("InferenceFrameInfo for Core.Compiler.Timings.ROOT()", str) # qualified
+    # loopc has internal slots, check constant printing in this case
+    sel = filter(ti -> ti.second.mi.def.name === :loopc, ft)
+    ifi = sel[end].second
+    @test length(ifi.slottypes) > ifi.nargs
+    str = sprint(show, sel)
+    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.loopc(5::$Int)", str)
+    # check that types aren't double-printed as `T::Type{T}`
+    sel = filter(ti -> ti.second.mi.def.name === :myfloor, ft)
+    str = sprint(show, sel)
+    @test occursin("InferenceFrameInfo for $(@__MODULE__).C.myfloor(::Type{Int16}, ::Float64)", str)
+end
+
+# issue #37638
+@test isa(Core.Compiler.return_type(() -> (nothing, Any[]...)[2], Tuple{}), Type)
+
+# Issue #37943
+f37943(x::Any, i::Int) = getfield((x::Pair{false, Int}), i)
+g37943(i::Int) = fieldtype(Pair{false, T} where T, i)
+@test only(Base.return_types(f37943, Tuple{Any, Int})) === Union{}
+@test only(Base.return_types(g37943, Tuple{Int})) === Union{Type{Union{}}, Type{Any}}
+
+# Don't let PartialStruct prevent const prop
+f_partial_struct_constprop(a, b) = (a[1]+b[1], nothing)
+g_partial_struct_constprop() = Val{f_partial_struct_constprop((1,), (1,))[1]}()
+@test only(Base.return_types(g_partial_struct_constprop, Tuple{})) === Val{2}
+
+# N parameter of Vararg is known to be Int
+gVarargInt(x::Int) = 1
+gVarargInt(x) = 2
+fVarargInt(::Tuple{Vararg{Int, N}}) where {N} = Val{gVarargInt(N)}()
+@test only(Base.return_types(fVarargInt, Tuple{Tuple{Vararg{Int}}})) == Val{1}
+
+# issue #38888
+struct S38888{T}
+    S38888(x::S) where {S<:Int} = new{S}()
+    S38888(x::S, y) where {S2<:Int,S<:S2} = new{S}()
+end
+f38888() = S38888(Base.inferencebarrier(3))
+@test f38888() isa S38888
+g38888() = S38888(Base.inferencebarrier(3), nothing)
+@test g38888() isa S38888
+
+f_inf_error_bottom(x::Vector) = isempty(x) ? error(x[1]) : x
+@test Core.Compiler.return_type(f_inf_error_bottom, Tuple{Vector{Any}}) == Vector{Any}
+
+# @constprop :aggressive
+@noinline g_nonaggressive(y, x) = Val{x}()
+@noinline Base.@constprop :aggressive g_aggressive(y, x) = Val{x}()
+
+f_nonaggressive(x) = g_nonaggressive(x, 1)
+f_aggressive(x) = g_aggressive(x, 1)
+
+# The first test just makes sure that improvements to the compiler don't
+# render the annotation effectless.
+@test Base.return_types(f_nonaggressive, Tuple{Int})[1] == Val
+@test Base.return_types(f_aggressive, Tuple{Int})[1] == Val{1}
+
+# @constprop :none
+@noinline Base.@constprop :none g_noaggressive(flag::Bool) = flag ? 1 : 1.0
+ftrue_noaggressive() = g_noaggressive(true)
+@test only(Base.return_types(ftrue_noaggressive, Tuple{})) == Union{Int,Float64}
+
+
+function splat_lotta_unions()
+    a = Union{Tuple{Int},Tuple{String,Vararg{Int}},Tuple{Int,Vararg{Int}}}[(2,)][1]
+    b = Union{Int8,Int16,Int32,Int64,Int128}[1][1]
+    c = Union{Int8,Int16,Int32,Int64,Int128}[1][1]
+    (a...,b...,c...)
+end
+@test Core.Compiler.return_type(splat_lotta_unions, Tuple{}) >: Tuple{Int,Int,Int}
+
+# Bare Core.Argument in IR
+@eval f_bare_argument(x) = $(Core.Argument(2))
+@test Base.return_types(f_bare_argument, (Int,))[1] == Int
+
+# issue #39611
+@test Base.return_types((Union{Int,Nothing},)) do x
+    if x === nothing || x < 0
+        return 0
+    end
+    x
+end == [Int]
+
+# issue #29100
+let f() = Val(fieldnames(Complex{Int}))
+    @test @inferred(f()) === Val((:re,:im))
+end
+
+@testset "switchtupleunion" begin
+    # signature tuple
+    let
+        tunion = Core.Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Nothing})
+        @test Tuple{Int32, Nothing} in tunion
+        @test Tuple{Int64, Nothing} in tunion
+    end
+    let
+        tunion = Core.Compiler.switchtupleunion(Tuple{Union{Int32,Int64}, Union{Float32,Float64}, Nothing})
+        @test Tuple{Int32, Float32, Nothing} in tunion
+        @test Tuple{Int32, Float64, Nothing} in tunion
+        @test Tuple{Int64, Float32, Nothing} in tunion
+        @test Tuple{Int64, Float64, Nothing} in tunion
+    end
+
+    # argtypes
+    let
+        tunion = Core.Compiler.switchtupleunion(Any[Union{Int32,Int64}, Core.Const(nothing)])
+        @test length(tunion) == 2
+        @test Any[Int32, Core.Const(nothing)] in tunion
+        @test Any[Int64, Core.Const(nothing)] in tunion
+    end
+    let
+        tunion = Core.Compiler.switchtupleunion(Any[Union{Int32,Int64}, Union{Float32,Float64}, Core.Const(nothing)])
+        @test length(tunion) == 4
+        @test Any[Int32, Float32, Core.Const(nothing)] in tunion
+        @test Any[Int32, Float64, Core.Const(nothing)] in tunion
+        @test Any[Int64, Float32, Core.Const(nothing)] in tunion
+        @test Any[Int64, Float64, Core.Const(nothing)] in tunion
+    end
+end
+
+@testset "constant prop' for union split signature" begin
+    # indexing into tuples really relies on constant prop', and we will get looser result
+    # (`Union{Int,String,Char}`) if constant prop' doesn't happen for splitunion signatures
+    tt = (Union{Tuple{Int,String},Tuple{Int,Char}},)
+    @test Base.return_types(tt) do t
+        getindex(t, 1)
+    end == Any[Int]
+    @test Base.return_types(tt) do t
+        getindex(t, 2)
+    end == Any[Union{String,Char}]
+    @test Base.return_types(tt) do t
+        a, b = t
+        a
+    end == Any[Int]
+    @test Base.return_types(tt) do t
+        a, b = t
+        b
+    end == Any[Union{String,Char}]
+
+    @test (@eval Module() begin
+        struct F32
+            val::Float32
+            _v::Int
+        end
+        struct F64
+            val::Float64
+            _v::Int
+        end
+        Base.return_types((Union{F32,F64},)) do f
+            f.val
+        end
+    end) == Any[Union{Float32,Float64}]
+
+    @test (@eval Module() begin
+        struct F32
+            val::Float32
+            _v
+        end
+        struct F64
+            val::Float64
+            _v
+        end
+        Base.return_types((Union{F32,F64},)) do f
+            f.val
+        end
+    end) == Any[Union{Float32,Float64}]
+
+    @test Base.return_types((Union{Tuple{Nothing,Any,Any},Tuple{Nothing,Any}},)) do t
+        getindex(t, 1)
+    end == Any[Nothing]
+
+    # issue #37610
+    @test Base.return_types((typeof(("foo" => "bar", "baz" => nothing)), Int)) do a, i
+        y = iterate(a, i)
+        if y !== nothing
+            (k, v), st = y
+            return k, v
+        end
+        return y
+    end == Any[Union{Nothing, Tuple{String, Union{Nothing, String}}}]
+end
+
+@test Base.return_types((Int,)) do x
+    if x === 0
+        Some(0.0)
+    elseif x == 1
+        Some(1)
+    else
+        Some(0x2)
+    end
+end == [Union{Some{Float64}, Some{Int}, Some{UInt8}}]
+
+# https://github.com/JuliaLang/julia/issues/40336
+@testset "make sure a call with signatures with recursively nested Types terminates" begin
+    @test @eval Module() begin
+        f(@nospecialize(t)) = f(Type{t})
+
+        code_typed() do
+            f(Int)
+        end
+        true
+    end
+
+    @test @eval Module() begin
+        f(@nospecialize(t)) = tdepth(t) == 10 ? t : f(Type{t})
+        tdepth(@nospecialize(t)) = isempty(t.parameters) ? 1 : 1+tdepth(t.parameters[1])
+
+        code_typed() do
+            f(Int)
+        end
+        true
+    end
+end
+
+# Make sure that const prop doesn't fall into cycles that aren't problematic
+# in the type domain
+f_recurse(x) = x > 1000000 ? x : f_recurse(x+1)
+@test Base.return_types() do
+    f_recurse(1)
+end |> first === Int
+
+# issue #39915
+function f33915(a_tuple, which_ones)
+    rest = f33915(Base.tail(a_tuple), Base.tail(which_ones))
+    if first(which_ones)
+        (first(a_tuple), rest...)
+    else
+        rest
+    end
+end
+f33915(a_tuple::Tuple{}, which_ones::Tuple{}) = ()
+g39915(a_tuple) = f33915(a_tuple, (true, false, true, false))
+@test Base.return_types() do
+    g39915((1, 1.0, "a", :a))
+end |> first === Tuple{Int, String}
+
+# issue #40742
+@test Base.return_types(string, (Vector{Tuple{:x}},)) == Any[String]
+
+# issue #40804
+@test Base.return_types(()) do; ===(); end == Any[Union{}]
+@test Base.return_types(()) do; typeassert(); end == Any[Union{}]
+
+primitive type UInt24ish 24 end
+f34288(x) = Core.Intrinsics.checked_sdiv_int(x, Core.Intrinsics.trunc_int(UInt24ish, 0))
+@test Base.return_types(f34288, (UInt24ish,)) == Any[UInt24ish]
+
+# Inference of PhiNode showing up in lowered AST
+function f_convert_me_to_ir(b, x)
+    a = b ? sin(x) : cos(x)
+    return a
+end
+
+let
+    # Test the presence of PhiNodes in lowered IR by taking the above function,
+    # running it through SSA conversion and then putting it into an opaque
+    # closure.
+    mi = Core.Compiler.specialize_method(first(methods(f_convert_me_to_ir)),
+        Tuple{Bool, Float64}, Core.svec())
+    ci = Base.uncompressed_ast(mi.def)
+    ci.ssavaluetypes = Any[Any for i = 1:ci.ssavaluetypes]
+    sv = Core.Compiler.OptimizationState(mi, Core.Compiler.OptimizationParams(),
+        Core.Compiler.NativeInterpreter())
+    ir = Core.Compiler.convert_to_ircode(ci, sv)
+    ir = Core.Compiler.slot2reg(ir, ci, sv)
+    ir = Core.Compiler.compact!(ir)
+    Core.Compiler.replace_code_newstyle!(ci, ir, 4)
+    ci.ssavaluetypes = length(ci.code)
+    @test any(x->isa(x, Core.PhiNode), ci.code)
+    oc = @eval b->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+        Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(b, 1.0)
+    @test Base.return_types(oc, Tuple{Bool}) == Any[Float64]
+
+    oc = @eval ()->$(Expr(:new_opaque_closure, Tuple{Bool, Float64}, Any, Any,
+        Expr(:opaque_closure_method, nothing, 2, false, LineNumberNode(0, nothing), ci)))(true, 1.0)
+    @test Base.return_types(oc, Tuple{}) == Any[Float64]
+end
+
+@testset "constant prop' on `invoke` calls" begin
+    m = Module()
+
+    # simple cases
+    @eval m begin
+        f(a::Any,    sym::Bool) = sym ? Any : :any
+        f(a::Number, sym::Bool) = sym ? Number : :number
+    end
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Any, true::Bool)
+    end) == Any[Type{Any}]
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Number, true::Bool)
+    end) == Any[Type{Number}]
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Any, false::Bool)
+    end) == Any[Symbol]
+    @test (@eval m Base.return_types((Any,)) do a
+        Base.@invoke f(a::Number, false::Bool)
+    end) == Any[Symbol]
+
+    # https://github.com/JuliaLang/julia/issues/41024
+    @eval m begin
+        # mixin, which expects common field `x::Int`
+        abstract type AbstractInterface end
+        Base.getproperty(x::AbstractInterface, sym::Symbol) =
+            sym === :x ? getfield(x, sym)::Int :
+            return getfield(x, sym) # fallback
+
+        # extended mixin, which expects additional field `y::Rational{Int}`
+        abstract type AbstractInterfaceExtended <: AbstractInterface end
+        Base.getproperty(x::AbstractInterfaceExtended, sym::Symbol) =
+            sym === :y ? getfield(x, sym)::Rational{Int} :
+            return Base.@invoke getproperty(x::AbstractInterface, sym::Symbol)
+    end
+    @test (@eval m Base.return_types((AbstractInterfaceExtended,)) do x
+        x.x
+    end) == Any[Int]
+end
+
+@testset "fieldtype for unions" begin # e.g. issue #40177
+    f40177(::Type{T}) where {T} = fieldtype(T, 1)
+    for T in [
+        Union{Tuple{Val}, Tuple{Tuple}},
+        Union{Base.RefValue{T}, Type{Int32}} where T<:Real,
+        Union{Tuple{Vararg{Symbol}}, Tuple{Float64, Vararg{Float32}}},
+    ]
+        @test @inferred(f40177(T)) == fieldtype(T, 1)
+    end
+end
+
+# issue #41908
+f41908(x::Complex{T}) where {String<:T<:String} = 1
+g41908() = f41908(Any[1][1])
+@test only(Base.return_types(g41908, ())) <: Int
+
+# issue #42022
+let x = Tuple{Int,Any}[
+        #= 1=# (0, Expr(:(=), Core.SlotNumber(3), 1))
+        #= 2=# (0, Expr(:enter, 18))
+        #= 3=# (2, Expr(:(=), Core.SlotNumber(3), 2.0))
+        #= 4=# (2, Expr(:enter, 12))
+        #= 5=# (4, Expr(:(=), Core.SlotNumber(3), '3'))
+        #= 6=# (4, Core.GotoIfNot(Core.SlotNumber(2), 9))
+        #= 7=# (4, Expr(:leave, 2))
+        #= 8=# (0, Core.ReturnNode(1))
+        #= 9=# (4, Expr(:call, GlobalRef(Main, :throw)))
+        #=10=# (4, Expr(:leave, 1))
+        #=11=# (2, Core.GotoNode(16))
+        #=12=# (4, Expr(:leave, 1))
+        #=13=# (2, Expr(:(=), Core.SlotNumber(4), Expr(:the_exception)))
+        #=14=# (2, Expr(:call, GlobalRef(Main, :rethrow)))
+        #=15=# (2, Expr(:pop_exception, Core.SSAValue(4)))
+        #=16=# (2, Expr(:leave, 1))
+        #=17=# (0, Core.GotoNode(22))
+        #=18=# (2, Expr(:leave, 1))
+        #=19=# (0, Expr(:(=), Core.SlotNumber(5), Expr(:the_exception)))
+        #=20=# (0, nothing)
+        #=21=# (0, Expr(:pop_exception, Core.SSAValue(2)))
+        #=22=# (0, Core.ReturnNode(Core.SlotNumber(3)))
+    ]
+    handler_at = Core.Compiler.compute_trycatch(last.(x), Core.Compiler.BitSet())
+    @test handler_at == first.(x)
+end
+
+@test only(Base.return_types((Bool,)) do y
+        x = 1
+        try
+            x = 2.0
+            try
+                x = '3'
+                y ? (return 1) : throw()
+            catch ex1
+                rethrow()
+            end
+        catch ex2
+            nothing
+        end
+        return x
+    end) === Union{Int, Float64, Char}
+
+# issue #42097
+struct Foo42097{F} end
+Foo42097(f::F, args) where {F} = Foo42097{F}()
+Foo42097(A) = Foo42097(Base.inferencebarrier(+), Base.inferencebarrier(1)...)
+foo42097() = Foo42097([1]...)
+@test foo42097() isa Foo42097{typeof(+)}
+
+# eliminate unbound `TypeVar`s on `argtypes` construction
+let
+    a0(a01, a02, a03, a04, a05, a06, a07, a08, a09, a10, va...) = nothing
+    method = only(methods(a0))
+    unbound = TypeVar(:Unbound, Integer)
+    specTypes = Tuple{typeof(a0),
+               # TypeVar
+        #=01=# Bound,                  # => Integer
+        #=02=# unbound,                # => Integer (invalid `TypeVar` widened beforehand)
+               # DataType
+        #=03=# Type{Bound},            # => Type{Bound} where Bound<:Integer
+        #=04=# Type{unbound},          # => Type
+        #=05=# Vector{Bound},          # => Vector{Bound} where Bound<:Integer
+        #=06=# Vector{unbound},        # => Any
+               # UnionAll
+        #=07=# Type{<:Bound},          # => Type{<:Bound} where Bound<:Integer
+        #=08=# Type{<:unbound},        # => Any
+               # Union
+        #=09=# Union{Nothing,Bound},   # => Union{Nothing,Bound} where Bound<:Integer
+        #=10=# Union{Nothing,unbound}, # => Any
+               # Vararg
+        #=va=# Bound, unbound,         # => Tuple{Integer,Integer} (invalid `TypeVar` widened beforehand)
+        } where Bound<:Integer
+    argtypes = Core.Compiler.most_general_argtypes(method, specTypes, true)
+    popfirst!(argtypes)
+    @test argtypes[1] == Integer
+    @test argtypes[2] == Integer
+    @test argtypes[3] == Type{Bound} where Bound<:Integer
+    @test argtypes[4] == Type
+    @test argtypes[5] == Vector{Bound} where Bound<:Integer
+    @test argtypes[6] == Any
+    @test argtypes[7] == Type{<:Bound} where Bound<:Integer
+    @test argtypes[8] == Any
+    @test argtypes[9] == Union{Nothing,Bound} where Bound<:Integer
+    @test argtypes[10] == Any
+    @test argtypes[11] == Tuple{Integer,Integer}
+end
+
+# make sure not to call `widenconst` on `TypeofVararg` objects
+@testset "unhandled Vararg" begin
+    struct UnhandledVarargCond
+        val::Bool
+    end
+    function Base.:+(a::UnhandledVarargCond, xs...)
+        if a.val
+            return nothing
+        else
+            s = 0
+            for x in xs
+                s += x
+            end
+            return s
+        end
+    end
+    @test Base.return_types((Vector{Int},)) do xs
+        +(UnhandledVarargCond(false), xs...)
+    end |> only === Int
+
+    @test (Base.return_types((Vector{Any},)) do xs
+        Core.kwfunc(xs...)
+    end; true)
+
+    @test Base.return_types((Vector{Vector{Int}},)) do xs
+        Tuple(xs...)
+    end |> only === Tuple{Vararg{Int}}
+end
+
+# issue #42646
+@test only(Base.return_types(getindex, (Array{undef}, Int))) >: Union{} # check that it does not throw
+
+# form PartialStruct for extra type information propagation
+struct FieldTypeRefinement{S,T}
+    s::S
+    t::T
+end
+@test Base.return_types((Int,)) do s
+    o = FieldTypeRefinement{Any,Int}(s, s)
+    o.s
+end |> only == Int
+@test Base.return_types((Int,)) do s
+    o = FieldTypeRefinement{Int,Any}(s, s)
+    o.t
+end |> only == Int
+@test Base.return_types((Int,)) do s
+    o = FieldTypeRefinement{Any,Any}(s, s)
+    o.s, o.t
+end |> only == Tuple{Int,Int}
+@test Base.return_types((Int,)) do a
+    s1 = Some{Any}(a)
+    s2 = Some{Any}(s1)
+    s2.value.value
+end |> only == Int
+
+# issue #42986
+@testset "narrow down `Union` using `isdefined` checks" begin
+    # basic functionality
+    @test Base.return_types((Union{Nothing,Core.CodeInstance},)) do x
+        if isdefined(x, :inferred)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Core.CodeInstance
+
+    @test Base.return_types((Union{Nothing,Core.CodeInstance},)) do x
+        if isdefined(x, :not_exist)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Union{}
+
+    # even when isdefined is malformed, we can filter out types with no fields
+    @test Base.return_types((Union{Nothing, Core.CodeInstance},)) do x
+        if isdefined(x, 5)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Core.CodeInstance
+
+    struct UnionNarrowingByIsdefinedA; x; end
+    struct UnionNarrowingByIsdefinedB; x; end
+    struct UnionNarrowingByIsdefinedC; x; end
+
+    # > 2 types in the union
+    @test  Base.return_types((Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB, UnionNarrowingByIsdefinedC},)) do x
+        if isdefined(x, :x)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB, UnionNarrowingByIsdefinedC}
+
+    # > 2 types in the union and some aren't defined
+    @test  Base.return_types((Union{UnionNarrowingByIsdefinedA, Core.CodeInstance, UnionNarrowingByIsdefinedC},)) do x
+        if isdefined(x, :x)
+            return x
+        else
+            throw("invalid")
+        end
+    end |> only === Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedC}
+
+    # should respect `Const` information still
+    @test Base.return_types((Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB},)) do x
+        if isdefined(x, :x)
+            return x
+        else
+            return nothing # dead branch
+        end
+    end |> only === Union{UnionNarrowingByIsdefinedA, UnionNarrowingByIsdefinedB}
+end
+
+# issue #43784
+@testset "issue #43784" begin
+    init = Base.ImmutableDict{Any,Any}()
+    a = Const(init)
+    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, Any])
+    c = Core.Compiler.tmerge(a, b)
+    @test ⊑(a, c)
+    @test ⊑(b, c)
+
+    init = Base.ImmutableDict{Number,Number}()
+    a = Const(init)
+    b = Core.PartialStruct(typeof(init), Any[Const(init), Any, ComplexF64])
+    c = Core.Compiler.tmerge(a, b)
+    @test ⊑(a, c) && ⊑(b, c)
+    @test c === typeof(init)
+
+    a = Core.PartialStruct(typeof(init), Any[Const(init), ComplexF64, ComplexF64])
+    c = Core.Compiler.tmerge(a, b)
+    @test ⊑(a, c) && ⊑(b, c)
+    @test c.fields[2] === Any # or Number
+    @test c.fields[3] === ComplexF64
+
+    b = Core.PartialStruct(typeof(init), Any[Const(init), ComplexF32, Union{ComplexF32,ComplexF64}])
+    c = Core.Compiler.tmerge(a, b)
+    @test ⊑(a, c)
+    @test ⊑(b, c)
+    @test c.fields[2] === Complex
+    @test c.fields[3] === Complex
+
+    global const ginit43784 = Base.ImmutableDict{Any,Any}()
+    @test Base.return_types() do
+            g = ginit43784
+            while true
+                g = Base.ImmutableDict(g, 1=>2)
+            end
+        end |> only === Union{}
+end
+
+# Test that purity modeling doesn't accidentally introduce new world age issues
+f_redefine_me(x) = x+1
+f_call_redefine() = f_redefine_me(0)
+f_mk_opaque() = @Base.Experimental.opaque ()->Base.inferencebarrier(f_call_redefine)()
+const op_capture_world = f_mk_opaque()
+f_redefine_me(x) = x+2
+@test op_capture_world() == 1
+@test f_mk_opaque()() == 2
+
+# Test that purity doesn't try to accidentally run unreachable code due to
+# boundscheck elimination
+function f_boundscheck_elim(n)
+    # Inbounds here assumes that this is only ever called with n==0, but of
+    # course the compiler has no way of knowing that, so it must not attempt
+    # to run the @inbounds `getfield(sin, 1)`` that ntuple generates.
+    ntuple(x->(@inbounds getfield(sin, x)), n)
+end
+@test Tuple{} <: code_typed(f_boundscheck_elim, Tuple{Int})[1][2]
+
+@test !Core.Compiler.builtin_nothrow(Core.get_binding_type, Any[Rational{Int}, Core.Const(:foo)], Any)
+
+# Test that max_methods works as expected
+@Base.Experimental.max_methods 1 function f_max_methods end
+f_max_methods(x::Int) = 1
+f_max_methods(x::Float64) = 2
+g_max_methods(x) = f_max_methods(x)
+@test Core.Compiler.return_type(g_max_methods, Tuple{Int}) === Int
+@test Core.Compiler.return_type(g_max_methods, Tuple{Any}) === Any
+
+# Unit tests for BitSetBoundedMinPrioritySet
+let bsbmp = Core.Compiler.BitSetBoundedMinPrioritySet(5)
+    Core.Compiler.push!(bsbmp, 2)
+    Core.Compiler.push!(bsbmp, 2)
+    @test Core.Compiler.popfirst!(bsbmp) == 2
+    Core.Compiler.push!(bsbmp, 1)
+    @test Core.Compiler.popfirst!(bsbmp) == 1
+    @test Core.Compiler.isempty(bsbmp)
+end
+
+# Make sure return_type_tfunc doesn't accidentally cause bad inference if used
+# at top level.
+@test let
+    Base.Experimental.@force_compile
+    Core.Compiler.return_type(+, NTuple{2, Rational})
+end == Rational
+
+# https://github.com/JuliaLang/julia/issues/44965
+let t = Core.Compiler.tuple_tfunc(Any[Core.Const(42), Vararg{Any}])
+    @test Core.Compiler.issimplertype(t, t)
+end
+
+# https://github.com/JuliaLang/julia/issues/44763
+global x44763::Int = 0
+increase_x44763!(n) = (global x44763; x44763 += n)
+invoke44763(x) = Base.@invoke increase_x44763!(x)
+@test Base.return_types() do
+    invoke44763(42)
+end |> only === Int
+@test x44763 == 0
+
+# backedge insertion for Any-typed, effect-free frame
+const CONST_DICT = let d = Dict()
+    for c in 'A':'z'
+        push!(d, c => Int(c))
+    end
+    d
+end
+Base.@assume_effects :total_may_throw getcharid(c) = CONST_DICT[c]
+@noinline callf(f, args...) = f(args...)
+function entry_to_be_invalidated(c)
+    return callf(getcharid, c)
+end
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> Core.Compiler.is_concrete_eval_eligible
+@test fully_eliminated(; retval=97) do
+    entry_to_be_invalidated('a')
+end
+getcharid(c) = CONST_DICT[c] # now this is not eligible for concrete evaluation
+@test Base.infer_effects((Char,)) do x
+    entry_to_be_invalidated(x)
+end |> !Core.Compiler.is_concrete_eval_eligible
+@test !fully_eliminated() do
+    entry_to_be_invalidated('a')
+end
diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl
index 85ac8fd6a0f4c3..06cbfbb3ce2279 100644
--- a/test/compiler/inline.jl
+++ b/test/compiler/inline.jl
@@ -4,6 +4,8 @@ using Test
 using Base.Meta
 using Core: ReturnNode
 
+include(normpath(@__DIR__, "irutils.jl"))
+
 """
 Helper to walk the AST and call a function on every node.
 """
@@ -150,9 +152,9 @@ end
     @test !any(x -> x isa Expr && x.head === :invoke, src.code)
 end
 
-# check that type.mutable can be fully eliminated
-f_mutable_nothrow(s::String) = Val{typeof(s).mutable}
-@test length(code_typed(f_mutable_nothrow, (String,))[1][1].code) == 1
+# check that ismutabletype(type) can be fully eliminated
+f_mutable_nothrow(s::String) = Val{typeof(s).name.flags}
+@test fully_eliminated(f_mutable_nothrow, (String,))
 
 # check that ifelse can be fully eliminated
 function f_ifelse(x)
@@ -160,10 +162,9 @@ function f_ifelse(x)
     b = ifelse(a, true, false)
     return b ? x + 1 : x
 end
-# 2 for now because the compiler leaves a GotoNode around
-@test_broken length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
+@test length(code_typed(f_ifelse, (String,))[1][1].code) <= 2
 
-# Test that inlining of _apply properly hits the inference cache
+# Test that inlining of _apply_iterate properly hits the inference cache
 @noinline cprop_inline_foo1() = (1, 1)
 @noinline cprop_inline_foo2() = (2, 2)
 function cprop_inline_bar(x...)
@@ -193,7 +194,7 @@ end
 function cprop_inline_baz1()
     return cprop_inline_bar(cprop_inline_foo1()..., cprop_inline_foo1()...)
 end
-@test length(code_typed(cprop_inline_baz1, ())[1][1].code) == 1
+@test fully_eliminated(cprop_inline_baz1, ())
 
 function cprop_inline_baz2()
     return cprop_inline_bar(cprop_inline_foo2()..., cprop_inline_foo2()...)
@@ -205,14 +206,14 @@ function f_apply_typevar(T)
     NTuple{N, T} where N
     return T
 end
-@test length(code_typed(f_apply_typevar, (Type{Any},))[1][1].code) == 1
+@test fully_eliminated(f_apply_typevar, (Type{Any},))
 
 # check that div can be fully eliminated
 function f_div(x)
-	div(x, 1)
-	return x
+    div(x, 1)
+    return x
 end
-@test length(code_typed(f_div, (Int,))[1][1].code) == 1
+@test fully_eliminated(f_div, (Int,); retval=Core.Argument(2))
 # ...unless we div by an unknown amount
 function f_div(x, y)
     div(x, y)
@@ -221,12 +222,12 @@ end
 @test length(code_typed(f_div, (Int, Int))[1][1].code) > 1
 
 f_identity_splat(t) = (t...,)
-@test length(code_typed(f_identity_splat, (Tuple{Int,Int},))[1][1].code) == 1
+@test fully_eliminated(f_identity_splat, (Tuple{Int,Int},))
 
 # splatting one tuple into (,) plus zero or more empties should reduce
 # this pattern appears for example in `fill_to_length`
 f_splat_with_empties(t) = (()..., t..., ()..., ()...)
-@test length(code_typed(f_splat_with_empties, (NTuple{200,UInt8},))[1][1].code) == 1
+@test fully_eliminated(f_splat_with_empties, (NTuple{200,UInt8},))
 
 # check that <: can be fully eliminated
 struct SomeArbitraryStruct; end
@@ -234,10 +235,7 @@ function f_subtype()
     T = SomeArbitraryStruct
     T <: Bool
 end
-let code = code_typed(f_subtype, Tuple{})[1][1].code
-    @test length(code) == 1
-    @test code[1] == ReturnNode(false)
-end
+@test fully_eliminated(f_subtype, Tuple{}; retval=false)
 
 # check that pointerref gets deleted if unused
 f_pointerref(T::Type{S}) where S = Val(length(T.parameters))
@@ -261,9 +259,7 @@ function foo_apply_apply_type_svec()
     B = Tuple{Float32, Float32}
     Core.apply_type(A..., B.types...)
 end
-let ci = code_typed(foo_apply_apply_type_svec, Tuple{})[1].first
-    @test length(ci.code) == 1 && ci.code[1] == ReturnNode(NTuple{3, Float32})
-end
+@test fully_eliminated(foo_apply_apply_type_svec, Tuple{}; retval=NTuple{3, Float32})
 
 # The that inlining doesn't drop ambiguity errors (#30118)
 c30118(::Tuple{Ref{<:Type}, Vararg}) = nothing
@@ -277,16 +273,17 @@ b30118(x...) = c30118(x)
 f34900(x::Int, y) = x
 f34900(x, y::Int) = y
 f34900(x::Int, y::Int) = invoke(f34900, Tuple{Int, Any}, x, y)
-let ci = code_typed(f34900, Tuple{Int, Int})[1].first
-    @test length(ci.code) == 1 && isa(ci.code[1], ReturnNode) &&
-        ci.code[1].val.n == 2
-end
+@test fully_eliminated(f34900, Tuple{Int, Int}; retval=Core.Argument(2))
 
 @testset "check jl_ir_flag_inlineable for inline macro" begin
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), first(methods(@inline x -> x)).source)
-    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), first(methods( x -> x)).source)
-    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), first(methods(@inline function f(x) x end)).source)
-    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), first(methods(function f(x) x end)).source)
+    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(@inline x -> x)).source)
+    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(x -> (@inline; x))).source)
+    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(x -> x)).source)
+    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(@inline function f(x) x end)).source)
+    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(function f(x) @inline; x end)).source)
+    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods(function f(x) x end)).source)
+    @test ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods() do x @inline; x end).source)
+    @test !ccall(:jl_ir_flag_inlineable, Bool, (Any,), only(methods() do x x end).source)
 end
 
 const _a_global_array = [1]
@@ -307,3 +304,958 @@ let ci = code_typed(f_29115, Tuple{Pair{Int64, Int64}})[1].first
     @test length(ci.code) == 4 && isexpr(ci.code[1], :call) &&
         ci.code[end-1].args[1] === GlobalRef(Core, :tuple)
 end
+
+# Issue #37182 & #37555 - Inlining of pending nodes
+function f37555(x::Int; kwargs...)
+    @assert x < 10
+    +(x, kwargs...)
+end
+@test f37555(1) == 1
+
+# Test that we can inline small constants even if they are not isbits
+struct NonIsBitsDims
+    dims::NTuple{N, Int} where N
+end
+NonIsBitsDims() = NonIsBitsDims(())
+@test fully_eliminated(NonIsBitsDims, (); retval=QuoteNode(NonIsBitsDims()))
+
+struct NonIsBitsDimsUndef
+    dims::NTuple{N, Int} where N
+    NonIsBitsDimsUndef() = new()
+end
+@test Core.Compiler.is_inlineable_constant(NonIsBitsDimsUndef())
+@test !Core.Compiler.is_inlineable_constant((("a"^1000, "b"^1000), nothing))
+
+# More nothrow modeling for apply_type
+f_apply_type_typeof(x) = (Ref{typeof(x)}; nothing)
+@test fully_eliminated(f_apply_type_typeof, Tuple{Any})
+@test fully_eliminated(f_apply_type_typeof, Tuple{Vector})
+@test fully_eliminated(x->(Val{x}; nothing), Tuple{Int})
+@test fully_eliminated(x->(Val{x}; nothing), Tuple{Symbol})
+@test fully_eliminated(x->(Val{x}; nothing), Tuple{Tuple{Int, Int}})
+@test !fully_eliminated(x->(Val{x}; nothing), Tuple{String})
+@test !fully_eliminated(x->(Val{x}; nothing), Tuple{Any})
+@test !fully_eliminated(x->(Val{x}; nothing), Tuple{Tuple{Int, String}})
+
+struct RealConstrained{T <: Real}; end
+@test !fully_eliminated(x->(RealConstrained{x}; nothing), Tuple{Int})
+@test !fully_eliminated(x->(RealConstrained{x}; nothing), Tuple{Type{Vector{T}} where T})
+
+# Check that pure functions with non-inlineable results still get deleted
+struct Big
+    x::NTuple{1024, Int}
+end
+@Base.pure Big() = Big(ntuple(identity, 1024))
+function pure_elim_full()
+    Big()
+    nothing
+end
+
+@test fully_eliminated(pure_elim_full, Tuple{})
+
+# Union splitting of convert
+f_convert_missing(x) = convert(Int64, x)
+let ci = code_typed(f_convert_missing, Tuple{Union{Int64, Missing}})[1][1],
+    ci_unopt = code_typed(f_convert_missing, Tuple{Union{Int64, Missing}}; optimize=false)[1][1]
+    # We want to check that inlining was able to union split this, but we don't
+    # want to make the test too specific to the exact structure that inlining
+    # generates, so instead, we just check that the compiler made it bigger.
+    # There are performance tests that are also sensitive to union splitting
+    # here, so a non-obvious regression
+    @test length(ci.code) >
+        length(ci_unopt.code)
+end
+
+# OC getfield elim
+using Base.Experimental: @opaque
+f_oc_getfield(x) = (@opaque ()->x)()
+@test fully_eliminated(f_oc_getfield, Tuple{Int})
+
+import Core.Compiler: argextype, singleton_type
+const EMPTY_SPTYPES = Any[]
+
+code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::Core.CodeInfo
+get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+
+# check if `x` is a dynamic call of a given function
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((src, f)::Tuple{Core.CodeInfo,Base.Callable}, @nospecialize(x))
+    return iscall(x) do @nospecialize x
+        singleton_type(argextype(x, src, EMPTY_SPTYPES)) === f
+    end
+end
+iscall(pred::Base.Callable, @nospecialize(x)) = Meta.isexpr(x, :call) && pred(x.args[1])
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
+isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
+isinvoke(pred::Function, @nospecialize(x)) = Meta.isexpr(x, :invoke) && pred(x.args[1]::Core.MethodInstance)
+
+@testset "@inline/@noinline annotation before definition" begin
+    M = Module()
+    @eval M begin
+        @inline function _def_inline(x)
+            # this call won't be resolved and thus will prevent inlining to happen if we don't
+            # annotate `@inline` at the top of this function body
+            return unresolved_call(x)
+        end
+        def_inline(x) = _def_inline(x)
+        @noinline _def_noinline(x) = x # obviously will be inlined otherwise
+        def_noinline(x) = _def_noinline(x)
+
+        # test that they don't conflict with other "before-definition" macros
+        @inline Base.@constprop :aggressive function _def_inline_noconflict(x)
+            # this call won't be resolved and thus will prevent inlining to happen if we don't
+            # annotate `@inline` at the top of this function body
+            return unresolved_call(x)
+        end
+        def_inline_noconflict(x) = _def_inline_noconflict(x)
+        @noinline Base.@constprop :aggressive _def_noinline_noconflict(x) = x # obviously will be inlined otherwise
+        def_noinline_noconflict(x) = _def_noinline_noconflict(x)
+    end
+
+    let code = get_code(M.def_inline, (Int,))
+        @test all(!isinvoke(:_def_inline), code)
+    end
+    let code = get_code(M.def_noinline, (Int,))
+        @test any(isinvoke(:_def_noinline), code)
+    end
+    # test that they don't conflict with other "before-definition" macros
+    let code = get_code(M.def_inline_noconflict, (Int,))
+        @test all(!isinvoke(:_def_inline_noconflict), code)
+    end
+    let code = get_code(M.def_noinline_noconflict, (Int,))
+        @test any(isinvoke(:_def_noinline_noconflict), code)
+    end
+end
+
+@testset "@inline/@noinline annotation within a function body" begin
+    M = Module()
+    @eval M begin
+        function _body_inline(x)
+            @inline
+            # this call won't be resolved and thus will prevent inlining to happen if we don't
+            # annotate `@inline` at the top of this function body
+            return unresolved_call(x)
+        end
+        body_inline(x) = _body_inline(x)
+        function _body_noinline(x)
+            @noinline
+            return x # obviously will be inlined otherwise
+        end
+        body_noinline(x) = _body_noinline(x)
+
+        # test annotations for `do` blocks
+        @inline simple_caller(a) = a()
+        function do_inline(x)
+            simple_caller() do
+                @inline
+                # this call won't be resolved and thus will prevent inlining to happen if we don't
+                # annotate `@inline` at the top of this anonymous function body
+                return unresolved_call(x)
+            end
+        end
+        function do_noinline(x)
+            simple_caller() do
+                @noinline
+                return x # obviously will be inlined otherwise
+            end
+        end
+    end
+
+    let code = get_code(M.body_inline, (Int,))
+        @test all(!isinvoke(:_body_inline), code)
+    end
+    let code = get_code(M.body_noinline, (Int,))
+        @test any(isinvoke(:_body_noinline), code)
+    end
+    # test annotations for `do` blocks
+    let code = get_code(M.do_inline, (Int,))
+        # what we test here is that both `simple_caller` and the anonymous function that the
+        # `do` block creates should inlined away, and as a result there is only the unresolved call
+        @test all(code) do @nospecialize x
+            !isinvoke(:simple_caller, x) &&
+            !isinvoke(x) do mi
+                startswith(string(mi.def.name), '#')
+            end
+        end
+    end
+    let code = get_code(M.do_noinline, (Int,))
+        # the anonymous function that the `do` block created shouldn't be inlined here
+        @test any(code) do @nospecialize x
+            isinvoke(x) do mi
+                startswith(string(mi.def.name), '#')
+            end
+        end
+    end
+end
+
+@testset "callsite @inline/@noinline annotations" begin
+    M = Module()
+    @eval M begin
+        # this global variable prevents inference to fold everything as constant, and/or the optimizer to inline the call accessing to this
+        g = 0
+
+        @noinline noinlined_explicit(x) = x
+        force_inline_explicit(x)        = @inline noinlined_explicit(x)
+        force_inline_block_explicit(x)  = @inline noinlined_explicit(x) + noinlined_explicit(x)
+        noinlined_implicit(x)          = g
+        force_inline_implicit(x)       = @inline noinlined_implicit(x)
+        force_inline_block_implicit(x) = @inline noinlined_implicit(x) + noinlined_implicit(x)
+
+        @inline inlined_explicit(x)      = x
+        force_noinline_explicit(x)       = @noinline inlined_explicit(x)
+        force_noinline_block_explicit(x) = @noinline inlined_explicit(x) + inlined_explicit(x)
+        inlined_implicit(x)              = x
+        force_noinline_implicit(x)       = @noinline inlined_implicit(x)
+        force_noinline_block_implicit(x) = @noinline inlined_implicit(x) + inlined_implicit(x)
+
+        # test callsite annotations for constant-prop'ed calls
+
+        @noinline Base.@constprop :aggressive noinlined_constprop_explicit(a) = a+g
+        force_inline_constprop_explicit() = @inline noinlined_constprop_explicit(0)
+        Base.@constprop :aggressive noinlined_constprop_implicit(a) = a+g
+        force_inline_constprop_implicit() = @inline noinlined_constprop_implicit(0)
+
+        @inline Base.@constprop :aggressive inlined_constprop_explicit(a) = a+g
+        force_noinline_constprop_explicit() = @noinline inlined_constprop_explicit(0)
+        @inline Base.@constprop :aggressive inlined_constprop_implicit(a) = a+g
+        force_noinline_constprop_implicit() = @noinline inlined_constprop_implicit(0)
+
+        @noinline notinlined(a) = a
+        function nested(a0, b0)
+            @noinline begin
+                a = @inline notinlined(a0) # this call should be inlined
+                b = notinlined(b0) # this call should NOT be inlined
+                return a, b
+            end
+        end
+    end
+
+    let code = get_code(M.force_inline_explicit, (Int,))
+        @test all(!isinvoke(:noinlined_explicit), code)
+    end
+    let code = get_code(M.force_inline_block_explicit, (Int,))
+        @test all(code) do @nospecialize x
+            !isinvoke(:noinlined_explicit, x) &&
+            !isinvoke(:(+), x)
+        end
+    end
+    let code = get_code(M.force_inline_implicit, (Int,))
+        @test all(!isinvoke(:noinlined_implicit), code)
+    end
+    let code = get_code(M.force_inline_block_implicit, (Int,))
+        @test all(!isinvoke(:noinlined_explicit), code)
+    end
+
+    let code = get_code(M.force_noinline_explicit, (Int,))
+        @test any(isinvoke(:inlined_explicit), code)
+    end
+    let code = get_code(M.force_noinline_block_explicit, (Int,))
+        @test count(isinvoke(:inlined_explicit), code) == 2
+    end
+    let code = get_code(M.force_noinline_implicit, (Int,))
+        @test any(isinvoke(:inlined_implicit), code)
+    end
+    let code = get_code(M.force_noinline_block_implicit, (Int,))
+        @test count(isinvoke(:inlined_implicit), code) == 2
+    end
+
+    let code = get_code(M.force_inline_constprop_explicit)
+        @test all(!isinvoke(:noinlined_constprop_explicit), code)
+    end
+    let code = get_code(M.force_inline_constprop_implicit)
+        @test all(!isinvoke(:noinlined_constprop_implicit), code)
+    end
+
+    let code = get_code(M.force_noinline_constprop_explicit)
+        @test any(isinvoke(:inlined_constprop_explicit), code)
+    end
+    let code = get_code(M.force_noinline_constprop_implicit)
+        @test any(isinvoke(:inlined_constprop_implicit), code)
+    end
+
+    let code = get_code(M.nested, (Int,Int))
+        @test count(isinvoke(:notinlined), code) == 1
+    end
+end
+
+# force constant-prop' for `setproperty!`
+# https://github.com/JuliaLang/julia/pull/41882
+let code = @eval Module() begin
+        # if we don't force constant-prop', `T = fieldtype(Foo, ::Symbol)` will be union-split to
+        # `Union{Type{Any},Type{Int}` and it will make `convert(T, nothing)` too costly
+        # and it leads to inlining failure
+        mutable struct Foo
+            val
+            _::Int
+        end
+
+        function setter(xs)
+            for x in xs
+                x.val = nothing
+            end
+        end
+
+        $get_code(setter, (Vector{Foo},))
+    end
+
+    @test !any(isinvoke(:setproperty!), code)
+end
+
+# Issue #41299 - inlining deletes error check in :>
+g41299(f::Tf, args::Vararg{Any,N}) where {Tf,N} = f(args...)
+@test_throws TypeError g41299(>:, 1, 2)
+
+# https://github.com/JuliaLang/julia/issues/42078
+# idempotency of callsite inling
+function getcache(mi::Core.MethodInstance)
+    cache = Core.Compiler.code_cache(Core.Compiler.NativeInterpreter())
+    codeinf = Core.Compiler.get(cache, mi, nothing)
+    return isnothing(codeinf) ? nothing : codeinf
+end
+@noinline f42078(a) = sum(sincos(a))
+let
+    ninlined = let
+        code = get_code((Int,)) do a
+            @inline f42078(a)
+        end
+        @test all(!isinvoke(:f42078), code)
+        length(code)
+    end
+
+    let # codegen will discard the source because it's not supposed to be inlined in general context
+        a = 42
+        f42078(a)
+    end
+    let # make sure to discard the inferred source
+        specs = collect(only(methods(f42078)).specializations)
+        mi = specs[findfirst(!isnothing, specs)]::Core.MethodInstance
+        codeinf = getcache(mi)::Core.CodeInstance
+        codeinf.inferred = nothing
+    end
+
+    let # inference should re-infer `f42078(::Int)` and we should get the same code
+        code = get_code((Int,)) do a
+            @inline f42078(a)
+        end
+        @test all(!isinvoke(:f42078), code)
+        @test ninlined == length(code)
+    end
+end
+
+begin
+    # more idempotency of callsite inling
+    # -----------------------------------
+    # this test case requires forced constant propagation for callsite inlined function call,
+    # particularly, in the following example, the inlinear will look up `+ₚ(::Point, ::Const(Point(2.25, 4.75)))`
+    # and the callsite inlining needs the corresponding constant result to exist in the local cache
+
+    struct Point
+        x::Float64
+        y::Float64
+    end
+    @noinline a::Point +ₚ b::Point = Point(a.x + b.x, a.y + b.y)
+
+    function compute_idem_n(n)
+        a = Point(1.5, 2.5)
+        b = Point(2.25, 4.75)
+        for i in 0:(n-1)
+            a = @inline (a +ₚ b) +ₚ b
+        end
+        return a.x, a.y
+    end
+    let src = code_typed1(compute_idem_n, (Int,))
+        @test count(isinvoke(:+ₚ), src.code) == 0 # successful inlining
+    end
+
+    function compute_idem_n(n)
+        a = Point(1.5, 2.5)
+        b = Point(2.25, 4.75)
+        for i in 0:(n-1)
+            a = (a +ₚ b) +ₚ b
+        end
+        return a.x, a.y
+    end
+    let src = code_typed1(compute_idem_n, (Int,))
+        @test count(isinvoke(:+ₚ), src.code) == 2 # no inlining
+    end
+
+    compute_idem_n(42) # this execution should discard the cache of `+ₚ` since it's declared as `@noinline`
+
+    function compute_idem_n(n)
+        a = Point(1.5, 2.5)
+        b = Point(2.25, 4.75)
+        for i in 0:(n-1)
+            @inline a = (a +ₚ b) +ₚ b
+        end
+        return a.x, a.y
+    end
+    let src = code_typed1(compute_idem_n, (Int,))
+        @test count(isinvoke(:+ₚ), src.code) == 0 # no inlining !?
+    end
+end
+
+# https://github.com/JuliaLang/julia/issues/42246
+@test mktempdir() do dir
+    cd(dir) do
+        code = quote
+            issue42246() = @noinline IOBuffer("a")
+            let
+                ci, rt = only(code_typed(issue42246))
+                if any(ci.code) do stmt
+                       Meta.isexpr(stmt, :invoke) &&
+                       stmt.args[1].def.name === nameof(IOBuffer)
+                   end
+                    exit(0)
+                else
+                    exit(1)
+               end
+            end
+        end |> string
+        cmd = `$(Base.julia_cmd()) --code-coverage=tmp.info -e $code`
+        success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr))
+    end
+end
+
+# Issue #42264 - crash on certain union splits
+let f(x) = (x...,)
+    # Test splatting with a Union of non-{Tuple, SimpleVector} types that require creating new `iterate` calls
+    # in inlining. For this particular case, we're relying on `iterate(::CaretesianIndex)` throwing an error, such
+    # the the original apply call is not union-split, but the inserted `iterate` call is.
+    @test code_typed(f, Tuple{Union{Int64, CartesianIndex{1}, CartesianIndex{3}}})[1][2] == Tuple{Int64}
+end
+
+# https://github.com/JuliaLang/julia/issues/42754
+# inline union-split constant-prop'ed results
+mutable struct X42754
+    # NOTE in order to confuse `fieldtype_tfunc`, we need to have at least two fields with different types
+    a::Union{Nothing, Int}
+    b::Symbol
+end
+let src = code_typed1((X42754, Union{Nothing,Int})) do x, a
+        # this `setproperty` call would be union-split and constant-prop will happen for
+        # each signature: inlining would fail if we don't use constant-prop'ed source
+        # since the approximate inlining cost of `convert(fieldtype(X, sym), a)` would
+        # end up very high if we don't propagate `sym::Const(:a)`
+        x.a = a
+        x
+    end
+    @test all(src.code) do @nospecialize x
+        !(isinvoke(:setproperty!, x) || iscall((src, setproperty!), x))
+    end
+end
+
+import Base: @constprop
+
+# test union-split callsite with successful and unsuccessful constant-prop' results
+# (also for https://github.com/JuliaLang/julia/issues/43287)
+@constprop :aggressive @inline f42840(cond::Bool, xs::Tuple, a::Int) =  # should be successful, and inlined with constant prop' result
+    cond ? xs[a] : @noinline(length(xs))
+@constprop :none @noinline f42840(::Bool, xs::AbstractVector, a::Int) = # should be unsuccessful, but still statically resolved
+    xs[a]
+let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
+             f42840(true, xs, 2)
+         end |> only |> first
+    # `f43287(true, xs::Tuple{Int,Int,Int}, 2)` => `getfield(xs, 2)`
+    # `f43287(true, xs::Vector{Int}, 2)` => `:invoke f43287(true, xs, 2)`
+    @test count(iscall((src, getfield)), src.code) == 1
+    @test count(isinvoke(:length), src.code) == 0
+    @test count(isinvoke(:f42840), src.code) == 1
+end
+# a bit weird, but should handle this kind of case as well
+@constprop :aggressive @noinline g42840(xs, a::Int) = xs[a]         # should be successful, but only statically resolved
+@constprop :none @inline g42840(xs::AbstractVector, a::Int) = xs[a] # should be unsuccessful, still inlined
+let src = code_typed((Union{Tuple{Int,Int,Int}, Vector{Int}},)) do xs
+        g42840(xs, 2)
+    end |> only |> first
+    # `(xs::Vector{Int})[a::Const(2)]` => `Base.arrayref(true, xs, 2)`
+    @test count(iscall((src, Base.arrayref)), src.code) == 1
+    @test count(isinvoke(:g42840), src.code) == 1
+end
+
+# test single, non-dispatchtuple callsite inlining
+
+@constprop :none @inline test_single_nondispatchtuple(@nospecialize(t)) =
+    isa(t, DataType) && t.name === Type.body.name
+let
+    src = code_typed1((Any,)) do x
+        test_single_nondispatchtuple(x)
+    end
+    @test all(src.code) do @nospecialize x
+        !(isinvoke(:test_single_nondispatchtuple, x) || iscall((src, test_single_nondispatchtuple), x))
+    end
+end
+
+@constprop :aggressive @inline test_single_nondispatchtuple(c, @nospecialize(t)) =
+    c && isa(t, DataType) && t.name === Type.body.name
+let
+    src = code_typed1((Any,)) do x
+        test_single_nondispatchtuple(true, x)
+    end
+    @test all(src.code) do @nospecialize(x)
+        !(isinvoke(:test_single_nondispatchtuple, x) || iscall((src, test_single_nondispatchtuple), x))
+    end
+end
+
+# validate inlining processing
+
+@constprop :none @inline validate_unionsplit_inlining(@nospecialize(t)) = throw("invalid inlining processing detected")
+@constprop :none @noinline validate_unionsplit_inlining(i::Integer) = (println(IOBuffer(), "prevent inlining"); false)
+let
+    invoke(xs) = validate_unionsplit_inlining(xs[1])
+    @test invoke(Any[10]) === false
+end
+
+@constprop :aggressive @inline validate_unionsplit_inlining(c, @nospecialize(t)) = c && throw("invalid inlining processing detected")
+@constprop :aggressive @noinline validate_unionsplit_inlining(c, i::Integer) = c && (println(IOBuffer(), "prevent inlining"); false)
+let
+    invoke(xs) = validate_unionsplit_inlining(true, xs[1])
+    @test invoke(Any[10]) === false
+end
+
+# test union-split, non-dispatchtuple callsite inlining
+
+@constprop :none @noinline abstract_unionsplit(@nospecialize x::Any) = Base.inferencebarrier(:Any)
+@constprop :none @noinline abstract_unionsplit(@nospecialize x::Number) = Base.inferencebarrier(:Number)
+let src = code_typed1((Any,)) do x
+        abstract_unionsplit(x)
+    end
+    @test count(isinvoke(:abstract_unionsplit), src.code) == 2
+    @test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
+end
+let src = code_typed1((Union{Type,Number},)) do x
+        abstract_unionsplit(x)
+    end
+    @test count(isinvoke(:abstract_unionsplit), src.code) == 2
+    @test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
+end
+
+@constprop :none @noinline abstract_unionsplit_fallback(@nospecialize x::Type) = Base.inferencebarrier(:Any)
+@constprop :none @noinline abstract_unionsplit_fallback(@nospecialize x::Number) = Base.inferencebarrier(:Number)
+let src = code_typed1((Any,)) do x
+        abstract_unionsplit_fallback(x)
+    end
+    @test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
+    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+end
+let src = code_typed1((Union{Type,Number},)) do x
+        abstract_unionsplit_fallback(x)
+    end
+    @test count(isinvoke(:abstract_unionsplit_fallback), src.code) == 2
+    @test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
+end
+
+@constprop :aggressive @inline abstract_unionsplit(c, @nospecialize x::Any) = (c && println("erase me"); typeof(x))
+@constprop :aggressive @inline abstract_unionsplit(c, @nospecialize x::Number) = (c && println("erase me"); typeof(x))
+let src = code_typed1((Any,)) do x
+        abstract_unionsplit(false, x)
+    end
+    @test count(iscall((src, typeof)), src.code) == 2
+    @test count(isinvoke(:println), src.code) == 0
+    @test count(iscall((src, println)), src.code) == 0
+    @test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
+end
+let src = code_typed1((Union{Type,Number},)) do x
+        abstract_unionsplit(false, x)
+    end
+    @test count(iscall((src, typeof)), src.code) == 2
+    @test count(isinvoke(:println), src.code) == 0
+    @test count(iscall((src, println)), src.code) == 0
+    @test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
+end
+
+@constprop :aggressive @inline abstract_unionsplit_fallback(c, @nospecialize x::Type) = (c && println("erase me"); typeof(x))
+@constprop :aggressive @inline abstract_unionsplit_fallback(c, @nospecialize x::Number) = (c && println("erase me"); typeof(x))
+let src = code_typed1((Any,)) do x
+        abstract_unionsplit_fallback(false, x)
+    end
+    @test count(iscall((src, typeof)), src.code) == 2
+    @test count(isinvoke(:println), src.code) == 0
+    @test count(iscall((src, println)), src.code) == 0
+    @test count(iscall((src, abstract_unionsplit_fallback)), src.code) == 1 # fallback dispatch
+end
+let src = code_typed1((Union{Type,Number},)) do x
+        abstract_unionsplit_fallback(false, x)
+    end
+    @test count(iscall((src, typeof)), src.code) == 2
+    @test count(isinvoke(:println), src.code) == 0
+    @test count(iscall((src, println)), src.code) == 0
+    @test count(iscall((src, abstract_unionsplit)), src.code) == 0 # no fallback dispatch
+end
+
+abstract_diagonal_dispatch(x::Int, y::Int) = 1
+abstract_diagonal_dispatch(x::Real, y::Int) = 2
+abstract_diagonal_dispatch(x::Int, y::Real) = 3
+function test_abstract_diagonal_dispatch(xs)
+    @test abstract_diagonal_dispatch(xs[1], xs[2]) == 1
+    @test abstract_diagonal_dispatch(xs[3], xs[4]) == 3
+    @test abstract_diagonal_dispatch(xs[5], xs[6]) == 2
+    @test_throws MethodError abstract_diagonal_dispatch(xs[7], xs[8])
+end
+test_abstract_diagonal_dispatch(Any[
+    1, 1,    # => 1
+    1, 1.0,  # => 3
+    1.0, 1,  # => 2
+    1.0, 1.0 # => MethodError
+])
+
+constrained_dispatch(x::T, y::T) where T<:Real = 0
+let src = code_typed1((Real,Real,)) do x, y
+        constrained_dispatch(x, y)
+    end
+    @test any(iscall((src, constrained_dispatch)), src.code) # should account for MethodError
+end
+@test_throws MethodError let
+    x, y = 1.0, 1
+    constrained_dispatch(x, y)
+end
+
+# issue 43104
+
+@inline isGoodType(@nospecialize x::Type) =
+    x !== Any && !(@noinline Base.has_free_typevars(x))
+let # aggressive inlining of single, abstract method match
+    src = code_typed((Type, Any,)) do x, y
+        isGoodType(x), isGoodType(y)
+    end |> only |> first
+    # both callsites should be inlined
+    @test count(isinvoke(:has_free_typevars), src.code) == 2
+    # `isGoodType(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    @test count(iscall((src,isGoodType)), src.code) == 1
+end
+
+@inline isGoodType2(cnd, @nospecialize x::Type) =
+    x !== Any && !(@noinline (cnd ? Core.Compiler.isType : Base.has_free_typevars)(x))
+let # aggressive inlining of single, abstract method match (with constant-prop'ed)
+    src = code_typed((Type, Any,)) do x, y
+        isGoodType2(true, x), isGoodType2(true, y)
+    end |> only |> first
+    # both callsite should be inlined with constant-prop'ed result
+    @test count(isinvoke(:isType), src.code) == 2
+    @test count(isinvoke(:has_free_typevars), src.code) == 0
+    # `isGoodType(y::Any)` isn't fully convered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    @test count(iscall((src,isGoodType2)), src.code) == 1
+end
+
+@noinline function checkBadType!(@nospecialize x::Type)
+    if x === Any || Base.has_free_typevars(x)
+        println(x)
+    end
+    return nothing
+end
+let # aggressive static dispatch of single, abstract method match
+    src = code_typed((Type, Any,)) do x, y
+        checkBadType!(x), checkBadType!(y)
+    end |> only |> first
+    # both callsites should be resolved statically
+    @test count(isinvoke(:checkBadType!), src.code) == 2
+    # `checkBadType!(y::Any)` isn't fully covered, thus a runtime type check and fallback dynamic dispatch should be inserted
+    @test count(iscall((src,checkBadType!)), src.code) == 1
+end
+
+@testset "late_inline_special_case!" begin
+    let src = code_typed((Symbol,Any,Any)) do a, b, c
+            TypeVar(a, b, c)
+        end |> only |> first
+        @test count(iscall((src,TypeVar)), src.code) == 0
+        @test count(iscall((src,Core._typevar)), src.code) == 1
+    end
+    let src = code_typed((TypeVar,Any)) do a, b
+            UnionAll(a, b)
+        end |> only |> first
+        @test count(iscall((src,UnionAll)), src.code) == 0
+    end
+end
+
+# have_fma elimination inside ^
+f_pow() = ^(2.0, -1.0)
+@test fully_eliminated(f_pow, Tuple{})
+
+# bug where Conditional wasn't being properly marked as ConstAPI
+let
+    @noinline fcond(a, b) = a === b
+    ftest(a) = (fcond(a, nothing); a)
+    @test fully_eliminated(ftest, Tuple{Bool})
+end
+
+# sqrt not considered volatile
+f_sqrt() = sqrt(2)
+@test fully_eliminated(f_sqrt, Tuple{})
+
+# use constant prop' result even when the return type doesn't get refined
+const Gx = Ref{Any}()
+Base.@constprop :aggressive function conditional_escape!(cnd, x)
+    if cnd
+        Gx[] = x
+    end
+    return nothing
+end
+@test fully_eliminated((String,)) do x
+    Base.@invoke conditional_escape!(false::Any, x::Any)
+end
+
+@testset "strides for ReshapedArray (PR#44027)" begin
+    # Type-based contiguous check
+    a = vec(reinterpret(reshape,Int16,reshape(view(reinterpret(Int32,randn(10)),2:11),5,:)))
+    f(a) = only(strides(a));
+    @test fully_eliminated(f, Tuple{typeof(a)}) && f(a) == 1
+end
+
+@testset "elimination of `get_binding_type`" begin
+    m = Module()
+    @eval m begin
+        global x::Int
+        f() = Core.get_binding_type($m, :x)
+        g() = Core.get_binding_type($m, :y)
+    end
+
+    @test fully_eliminated(m.f, Tuple{}; retval=Int)
+    src = code_typed(m.g, ())[][1]
+    @test count(iscall((src, Core.get_binding_type)), src.code) == 1
+    @test m.g() === Any
+end
+
+# have_fma elimination inside ^
+f_pow() = ^(2.0, -1.0)
+@test fully_eliminated(f_pow, Tuple{})
+
+# unused total, noinline function
+@noinline function f_total_noinline(x)
+    return x + 1.0
+end
+@noinline function f_voltatile_escape(ptr)
+    unsafe_store!(ptr, 0)
+end
+function f_call_total_noinline_unused(x)
+    f_total_noinline(x)
+    return x
+end
+function f_call_volatile_escape(ptr)
+    f_voltatile_escape(ptr)
+    return ptr
+end
+
+@test fully_eliminated(f_call_total_noinline_unused, Tuple{Float64})
+@test !fully_eliminated(f_call_volatile_escape, Tuple{Ptr{Int}})
+
+let b = Expr(:block, (:(y += sin($x)) for x in randn(1000))...)
+    @eval function f_sin_perf()
+        y = 0.0
+        $b
+        y
+    end
+end
+@test fully_eliminated(f_sin_perf, Tuple{})
+
+# Test that we inline the constructor of something that is not const-inlineable
+const THE_REF_NULL = Ref{Int}()
+const THE_REF = Ref{Int}(0)
+struct FooTheRef
+    x::Ref
+    FooTheRef(v) = new(v === nothing ? THE_REF_NULL : THE_REF)
+end
+let src = code_typed1() do
+        FooTheRef(nothing)
+    end
+    @test count(isnew, src.code) == 1
+end
+let src = code_typed1() do
+        FooTheRef(0)
+    end
+    @test count(isnew, src.code) == 1
+end
+let src = code_typed1() do
+        Base.@invoke FooTheRef(nothing::Any)
+    end
+    @test count(isnew, src.code) == 1
+end
+let src = code_typed1() do
+        Base.@invoke FooTheRef(0::Any)
+    end
+    @test count(isnew, src.code) == 1
+end
+@test fully_eliminated() do
+    FooTheRef(nothing)
+    nothing
+end
+@test fully_eliminated() do
+    FooTheRef(0)
+    nothing
+end
+@test fully_eliminated() do
+    Base.@invoke FooTheRef(nothing::Any)
+    nothing
+end
+@test fully_eliminated() do
+    Base.@invoke FooTheRef(0::Any)
+    nothing
+end
+
+# Test that the Core._apply_iterate bail path taints effects
+function f_apply_bail(f)
+    f(()...)
+    return nothing
+end
+f_call_apply_bail(f) = f_apply_bail(f)
+@test !fully_eliminated(f_call_apply_bail, Tuple{Function})
+
+# Test that arraysize has proper effect modeling
+@test fully_eliminated(M->(size(M, 2); nothing), Tuple{Matrix{Float64}})
+
+# DCE of non-inlined callees
+@noinline noninlined_dce_simple(a) = identity(a)
+@test fully_eliminated((String,)) do s
+    noninlined_dce_simple(s)
+    nothing
+end
+@noinline noninlined_dce_new(a::String) = Some(a)
+@test fully_eliminated((String,)) do s
+    noninlined_dce_new(s)
+    nothing
+end
+mutable struct SafeRef{T}
+    x::T
+end
+Base.getindex(s::SafeRef) = getfield(s, 1)
+Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
+@noinline noninlined_dce_new(a::Symbol) = SafeRef(a)
+@test fully_eliminated((Symbol,)) do s
+    noninlined_dce_new(s)
+    nothing
+end
+# should be resolved once we merge https://github.com/JuliaLang/julia/pull/43923
+@test_broken fully_eliminated((Union{Symbol,String},)) do s
+    noninlined_dce_new(s)
+    nothing
+end
+
+# Test that ambigous calls don't accidentally get nothrow effect
+ambig_effect_test(a::Int, b) = 1
+ambig_effect_test(a, b::Int) = 1
+ambig_effect_test(a, b) = 1
+global ambig_unknown_type_global=1
+@noinline function conditionally_call_ambig(b::Bool, a)
+    if b
+        ambig_effect_test(a, ambig_unknown_type_global)
+    end
+    return 0
+end
+function call_call_ambig(b::Bool)
+    conditionally_call_ambig(b, 1)
+    return 1
+end
+@test !fully_eliminated(call_call_ambig, Tuple{Bool})
+
+# Test that a missing methtable identification gets tainted
+# appropriately
+struct FCallback; f::Union{Nothing, Function}; end
+f_invoke_callback(fc) = let f=fc.f; (f !== nothing && f(); nothing); end
+function f_call_invoke_callback(f::FCallback)
+    f_invoke_callback(f)
+    return nothing
+end
+@test !fully_eliminated(f_call_invoke_callback, Tuple{FCallback})
+
+# https://github.com/JuliaLang/julia/issues/41694
+Base.@assume_effects :terminates_globally function issue41694(x)
+    res = 1
+    1 < x < 20 || throw("bad")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+@test fully_eliminated() do
+    issue41694(2)
+end
+
+Base.@assume_effects :terminates_globally function recur_termination1(x)
+    x == 1 && return 1
+    1 < x < 20 || throw("bad")
+    return x * recur_termination1(x-1)
+end
+@test fully_eliminated() do
+    recur_termination1(12)
+end
+Base.@assume_effects :terminates_globally function recur_termination21(x)
+    x == 1 && return 1
+    1 < x < 20 || throw("bad")
+    return recur_termination22(x)
+end
+recur_termination22(x) = x * recur_termination21(x-1)
+@test fully_eliminated() do
+    recur_termination21(12) + recur_termination22(12)
+end
+
+const ___CONST_DICT___ = Dict{Any,Any}(Symbol(c) => i for (i, c) in enumerate('a':'z'))
+Base.@assume_effects :total_may_throw concrete_eval(
+    f, args...; kwargs...) = f(args...; kwargs...)
+@test fully_eliminated() do
+    concrete_eval(getindex, ___CONST_DICT___, :a)
+end
+
+# https://github.com/JuliaLang/julia/issues/44732
+struct Component44732
+    v
+end
+struct Container44732
+    x::Union{Nothing,Component44732}
+end
+
+# NOTE make sure to prevent inference bail out
+validate44732(::Component44732) = nothing
+validate44732(::Any) = error("don't erase this error!")
+
+function issue44732(c::Container44732)
+    validate44732(c.x)
+    return nothing
+end
+
+let src = code_typed1(issue44732, (Container44732,))
+    @test any(isinvoke(:validate44732), src.code)
+end
+@test_throws ErrorException("don't erase this error!") issue44732(Container44732(nothing))
+
+global x44200::Int = 0
+function f44200()
+    global x44200 = 0
+    while x44200 < 10
+        x44200 += 1
+    end
+    x44200
+end
+let src = code_typed1(f44200)
+    @test count(x -> isa(x, Core.PiNode), src.code) == 0
+end
+
+# Test that peeling off one case from (::Any) doesn't introduce
+# a dynamic dispatch.
+@noinline f_peel(x::Int) = Base.inferencebarrier(1)
+@noinline f_peel(@nospecialize(x::Any)) = Base.inferencebarrier(2)
+g_call_peel(x) = f_peel(x)
+let src = code_typed1(g_call_peel, Tuple{Any})
+    @test count(isinvoke(:f_peel), src.code) == 2
+end
+
+const my_defined_var = 42
+@test fully_eliminated((); retval=42) do
+    getglobal(@__MODULE__, :my_defined_var, :monotonic)
+end
+@test !fully_eliminated() do
+    getglobal(@__MODULE__, :my_defined_var, :foo)
+end
+
+# Test for deletion of value-dependent control flow that is apparent
+# at inference time, but hard to delete later.
+function maybe_error_int(x::Int)
+    if x > 2
+        Base.donotdelete(Base.inferencebarrier(x))
+        error()
+    end
+    return 1
+end
+@test fully_eliminated() do
+    return maybe_error_int(1)
+end
diff --git a/test/compiler/irpasses.jl b/test/compiler/irpasses.jl
index 26bdced75983ee..48682b9af3b952 100644
--- a/test/compiler/irpasses.jl
+++ b/test/compiler/irpasses.jl
@@ -2,14 +2,19 @@
 
 using Test
 using Base.Meta
-using Core: PhiNode, SSAValue, GotoNode, PiNode, QuoteNode, ReturnNode, GotoIfNot
+import Core:
+    CodeInfo, Argument, SSAValue, GotoNode, GotoIfNot, PiNode, PhiNode,
+    QuoteNode, ReturnNode
 
-# Tests for domsort
+include(normpath(@__DIR__, "irutils.jl"))
+
+# domsort
+# =======
 
 ## Test that domsort doesn't mangle single-argument phis (#29262)
 let m = Meta.@lower 1 + 1
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         # block 1
         Expr(:call, :opaque),
@@ -34,7 +39,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src)
     Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg)
+    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
     phi = ir.stmts.inst[3]
@@ -44,7 +49,7 @@ end
 # test that we don't stack-overflow in SNCA with large functions.
 let m = Meta.@lower 1 + 1
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     code = Any[]
     N = 2^15
     for i in 1:2:N
@@ -62,12 +67,525 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src)
     Core.Compiler.verify_ir(ir)
-    domtree = Core.Compiler.construct_domtree(ir.cfg)
+    domtree = Core.Compiler.construct_domtree(ir.cfg.blocks)
     ir = Core.Compiler.domsort_ssa!(ir, domtree)
     Core.Compiler.verify_ir(ir)
 end
 
-# Tests for SROA
+# SROA
+# ====
+
+import Core.Compiler: widenconst
+
+is_load_forwarded(src::CodeInfo) = !any(iscall((src, getfield)), src.code)
+is_scalar_replaced(src::CodeInfo) =
+    is_load_forwarded(src) && !any(iscall((src, setfield!)), src.code) && !any(isnew, src.code)
+
+function is_load_forwarded(@nospecialize(T), src::CodeInfo)
+    for i in 1:length(src.code)
+        x = src.code[i]
+        if iscall((src, getfield), x)
+            widenconst(argextype(x.args[1], src)) <: T && return false
+        end
+    end
+    return true
+end
+function is_scalar_replaced(@nospecialize(T), src::CodeInfo)
+    is_load_forwarded(T, src) || return false
+    for i in 1:length(src.code)
+        x = src.code[i]
+        if iscall((src, setfield!), x)
+            widenconst(argextype(x.args[1], src)) <: T && return false
+        elseif isnew(x)
+            widenconst(argextype(SSAValue(i), src)) <: T && return false
+        end
+    end
+    return true
+end
+
+struct ImmutableXYZ; x; y; z; end
+mutable struct MutableXYZ; x; y; z; end
+struct ImmutableOuter{T}; x::T; y::T; z::T; end
+mutable struct MutableOuter{T}; x::T; y::T; z::T; end
+struct ImmutableRef{T}; x::T; end
+Base.getindex(r::ImmutableRef) = r.x
+mutable struct SafeRef{T}; x::T; end
+Base.getindex(s::SafeRef) = getfield(s, 1)
+Base.setindex!(s::SafeRef, x) = setfield!(s, 1, x)
+
+# simple immutability
+# -------------------
+
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        xyz.x, xyz.y, xyz.z
+    end
+    @test is_scalar_replaced(src)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=z=# Core.Argument(4)]
+    end
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = (x, y, z)
+        xyz[1], xyz[2], xyz[3]
+    end
+    @test is_scalar_replaced(src)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=z=# Core.Argument(4)]
+    end
+end
+
+# simple mutability
+# -----------------
+
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        xyz.x, xyz.y, xyz.z
+    end
+    @test is_scalar_replaced(src)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=z=# Core.Argument(4)]
+    end
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        xyz.y = 42
+        xyz.x, xyz.y, xyz.z
+    end
+    @test is_scalar_replaced(src)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), 42, #=x=# Core.Argument(4)]
+    end
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        xyz.x, xyz.z = xyz.z, xyz.x
+        xyz.x, xyz.y, xyz.z
+    end
+    @test is_scalar_replaced(src)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=z=# Core.Argument(4), #=y=# Core.Argument(3), #=x=# Core.Argument(2)]
+    end
+end
+
+# uninitialized fields
+# --------------------
+
+# safe cases
+let src = code_typed1() do
+        r = Ref{Any}()
+        r[] = 42
+        return r[]
+    end
+    @test is_scalar_replaced(src)
+end
+let src = code_typed1((Bool,)) do cond
+        r = Ref{Any}()
+        if cond
+            r[] = 42
+            return r[]
+        else
+            r[] = 32
+            return r[]
+        end
+    end
+    @test is_scalar_replaced(src)
+end
+let src = code_typed1((Bool,)) do cond
+        r = Ref{Any}()
+        if cond
+            r[] = 42
+        else
+            r[] = 32
+        end
+        return r[]
+    end
+    @test is_scalar_replaced(src)
+end
+let src = code_typed1((Bool,Bool,Any,Any,Any)) do c1, c2, x, y, z
+        r = Ref{Any}()
+        if c1
+            if c2
+                r[] = x
+            else
+                r[] = y
+            end
+        else
+            r[] = z
+        end
+        return r[]
+    end
+    @test is_scalar_replaced(src)
+end
+
+# unsafe cases
+let src = code_typed1() do
+        r = Ref{Any}()
+        return r[]
+    end
+    @test count(isnew, src.code) == 1
+    @test count(iscall((src, getfield)), src.code) == 1
+end
+let src = code_typed1((Bool,)) do cond
+        r = Ref{Any}()
+        if cond
+            r[] = 42
+        end
+        return r[]
+    end
+    # N.B. `r` should be allocated since `cond` might be `false` and then it will be thrown
+    @test count(isnew, src.code) == 1
+    @test count(iscall((src, setfield!)), src.code) == 1
+    @test count(iscall((src, getfield)), src.code) == 1
+end
+let src = code_typed1((Bool,Bool,Any,Any)) do c1, c2, x, y
+        r = Ref{Any}()
+        if c1
+            if c2
+                r[] = x
+            end
+        else
+            r[] = y
+        end
+        return r[]
+    end
+    # N.B. `r` should be allocated since `c2` might be `false` and then it will be thrown
+    @test count(isnew, src.code) == 1
+    @test count(iscall((src, setfield!)), src.code) == 2
+    @test count(iscall((src, getfield)), src.code) == 1
+end
+
+# aliased load forwarding
+# -----------------------
+# TODO fix broken examples with EscapeAnalysis
+
+# OK: immutable(immutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        outer = ImmutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test !any(src.code) do @nospecialize x
+        Meta.isexpr(x, :new)
+    end
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
+    end
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        # #42831 forms ::PartialStruct(ImmutableOuter{Any}, Any[ImmutableXYZ, ImmutableXYZ, ImmutableXYZ])
+        # so the succeeding `getproperty`s are type stable and inlined
+        outer = ImmutableOuter{Any}(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test !any(isnew, src.code)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
+    end
+end
+
+# OK (mostly): immutable(mutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        t   = (xyz,)
+        v = t[1].x
+        v, v, v
+    end
+    @test is_scalar_replaced(src)
+end
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        outer = ImmutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test is_scalar_replaced(src)
+    @test any(src.code) do @nospecialize x
+        iscall((src, tuple), x) &&
+        x.args[2:end] == Any[#=x=# Core.Argument(2), #=y=# Core.Argument(3), #=y=# Core.Argument(4)]
+    end
+end
+let # this is a simple end to end test case, which demonstrates allocation elimination
+    # by handling `mutable[RefValue{String}](immutable[Tuple](...))` case correctly
+    # NOTE this test case isn't so robust and might be subject to future changes of the broadcasting implementation,
+    # in that case you don't really need to stick to keeping this test case around
+    simple_sroa(s) = broadcast(identity, Ref(s))
+    let src = code_typed1(simple_sroa, (String,))
+        @test is_scalar_replaced(src)
+    end
+    s = Base.inferencebarrier("julia")::String
+    simple_sroa(s)
+    # NOTE don't hard-code `"julia"` in `@allocated` clause and make sure to execute the
+    # compiled code for `simple_sroa`, otherwise everything can be folded even without SROA
+    @test @allocated(simple_sroa(s)) == 0
+end
+let # FIXME: some nested example
+    src = code_typed1((Int,)) do x
+        Ref(Ref(x))[][]
+    end
+    @test_broken is_scalar_replaced(src)
+
+    src = code_typed1((Int,)) do x
+        Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref(Ref((x)))))))))))[][][][][][][][][][]
+    end
+    @test_broken is_scalar_replaced(src)
+end
+
+# FIXME: immutable(mutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = ImmutableXYZ(x, y, z)
+        outer = MutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test_broken !any(isnew, src.code)
+end
+# FIXME: mutable(mutable(...)) case
+let src = code_typed1((Any,Any,Any)) do x, y, z
+        xyz = MutableXYZ(x, y, z)
+        outer = MutableOuter(xyz, xyz, xyz)
+        outer.x.x, outer.y.y, outer.z.z
+    end
+    @test_broken !any(isnew, src.code)
+end
+
+let # should work with constant globals
+    # immutable case
+    # --------------
+    src = @eval Module() begin
+        const REF_FLD = :x
+        struct ImmutableRef{T}
+            x::T
+        end
+
+        code_typed((Int,)) do x
+            r = ImmutableRef{Int}(x) # should be eliminated
+            x = getfield(r, REF_FLD) # should be eliminated
+            return sin(x)
+        end |> only |> first
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+    @test count(isnew, src.code) == 0
+
+    # mutable case
+    # ------------
+    src = @eval Module() begin
+        const REF_FLD = :x
+        code_typed() do
+            r = Ref{Int}(42) # should be eliminated
+            x = getfield(r, REF_FLD) # should be eliminated
+            return sin(x)
+        end |> only |> first
+    end
+    @test count(iscall((src, getfield)), src.code) == 0
+    @test count(isnew, src.code) == 0
+end
+
+# should work nicely with inlining to optimize away a complicated case
+# adapted from http://wiki.luajit.org/Allocation-Sinking-Optimization#implementation%5B
+struct Point
+    x::Float64
+    y::Float64
+end
+#=@inline=# add(a::Point, b::Point) = Point(a.x + b.x, a.y + b.y)
+function compute_points()
+    a = Point(1.5, 2.5)
+    b = Point(2.25, 4.75)
+    for i in 0:(100000000-1)
+        a = add(add(a, b), b)
+    end
+    a.x, a.y
+end
+let src = code_typed1(compute_points)
+    @test !any(isnew, src.code)
+end
+
+# preserve elimination
+# --------------------
+
+function ispreserved(@nospecialize(x))
+    return function (@nospecialize(stmt),)
+        if Meta.isexpr(stmt, :foreigncall)
+            nccallargs = length(stmt.args[3]::Core.SimpleVector)
+            for pidx = (6+nccallargs):length(stmt.args)
+                if stmt.args[pidx] === x
+                    return true
+                end
+            end
+        end
+        return false
+    end
+end
+
+let src = code_typed1((String,)) do s
+        ccall(:some_ccall, Cint, (Ptr{String},), Ref(s))
+    end
+    @test count(isnew, src.code) == 0
+    @test any(ispreserved(#=s=#Core.Argument(2)), src.code)
+end
+
+# if the mutable struct is directly used, we shouldn't eliminate it
+let src = code_typed1() do
+        a = MutableXYZ(-512275808,882558299,-2133022131)
+        b = Int32(42)
+        ccall(:some_ccall, Cvoid, (MutableXYZ, Int32), a, b)
+        return a.x
+    end
+    @test count(isnew, src.code) == 1
+end
+
+# should eliminate allocation whose address isn't taked even if it has unintialized field(s)
+mutable struct BadRef
+    x::String
+    y::String
+    BadRef(x) = new(x)
+end
+Base.cconvert(::Type{Ptr{BadRef}}, a::String) = BadRef(a)
+Base.unsafe_convert(::Type{Ptr{BadRef}}, ar::BadRef) = Ptr{BadRef}(pointer_from_objref(ar.x))
+let src = code_typed1((String,)) do s
+        ccall(:jl_breakpoint, Cvoid, (Ptr{BadRef},), s)
+    end
+    @test count(isnew, src.code) == 0
+    @test any(ispreserved(#=s=#Core.Argument(2)), src.code)
+end
+
+# isdefined elimination
+# ---------------------
+
+let src = code_typed1((Any,)) do a
+        r = Ref{Any}()
+        r[] = a
+        if isassigned(r)
+            return r[]
+        end
+        return nothing
+    end
+    @test is_scalar_replaced(src)
+end
+
+let src = code_typed1((Bool, Any,)) do cnd, a
+        r = Ref{Any}()
+        if cnd
+            r[] = a # this `setfield!` shouldn't be eliminated
+        end
+        return isassigned(r)
+    end
+    @test count(isnew, src.code) == 1
+    @test count(iscall((src, setfield!)), src.code) == 1
+end
+
+callit(f, args...) = f(args...)
+function isdefined_elim()
+    local arr::Vector{Any}
+    callit() do
+        arr = Any[]
+    end
+    return arr
+end
+let src = code_typed1(isdefined_elim)
+    @test is_scalar_replaced(src)
+end
+@test isdefined_elim() == Any[]
+
+function abmult(r::Int, x0)
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return @inline f(x0)
+end
+let src = code_typed1(abmult, (Int,Int))
+    @test is_scalar_replaced(src)
+end
+@test abmult(-3, 3) == 9
+
+function abmult2(r0::Int, x0)
+    r::Int = r0
+    if r < 0
+        r = -r
+    end
+    f = x -> x * r
+    return f(x0)
+end
+let src = code_typed1(abmult2, (Int,Int))
+    @test is_scalar_replaced(src)
+end
+@test abmult2(-3, 3) == 9
+
+# comparison lifting
+# ==================
+
+let # lifting `===`
+    src = code_typed1((Bool,Int,)) do c, x
+        y = c ? x : nothing
+        y === nothing # => ϕ(false, true)
+    end
+    @test count(iscall((src, ===)), src.code) == 0
+
+    # should optimize away the iteration protocol
+    src = code_typed1((Int,)) do n
+        s = 0
+        for i in 1:n
+            s += i
+        end
+        s
+    end
+    @test !any(src.code) do @nospecialize x
+        iscall((src, ===), x) && argextype(x.args[2], src) isa Union
+    end
+end
+
+let # lifting `isa`
+    src = code_typed1((Bool,Int,)) do c, x
+        y = c ? x : nothing
+        isa(y, Int) # => ϕ(true, false)
+    end
+    @test count(iscall((src, isa)), src.code) == 0
+
+    src = code_typed1((Int,)) do n
+        s = 0
+        itr = 1:n
+        st = iterate(itr)
+        while !isa(st, Nothing)
+            i, st = itr
+            s += i
+            st = iterate(itr, st)
+        end
+        s
+    end
+    @test !any(src.code) do @nospecialize x
+        iscall((src, isa), x) && argextype(x.args[2], src) isa Union
+    end
+end
+
+let # lifting `isdefined`
+    src = code_typed1((Bool,Some{Int},)) do c, x
+        y = c ? x : nothing
+        isdefined(y, 1) # => ϕ(true, false)
+    end
+    @test count(iscall((src, isdefined)), src.code) == 0
+
+    src = code_typed1((Int,)) do n
+        s = 0
+        itr = 1:n
+        st = iterate(itr)
+        while isdefined(st, 2)
+            i, st = itr
+            s += i
+            st = iterate(itr, st)
+        end
+        s
+    end
+    @test !any(src.code) do @nospecialize x
+        iscall((src, isdefined), x) && argextype(x.args[2], src) isa Union
+    end
+end
 
 mutable struct Foo30594; x::Float64; end
 Base.copy(x::Foo30594) = Foo30594(x.x)
@@ -128,21 +646,19 @@ let nt = (a=1, b=2)
     @test_throws ArgumentError blah31139(nt)
 end
 
-# Expr(:new) annoted as PartialStruct
+# Expr(:new) annotated as PartialStruct
 struct FooPartial
     x
     y
     global f_partial
     f_partial(x) = new(x, 2).x
 end
-let ci = code_typed(f_partial, Tuple{Float64})[1].first
-    @test length(ci.code) == 1 && isa(ci.code[1], ReturnNode)
-end
+@test fully_eliminated(f_partial, Tuple{Float64})
 
 # A SSAValue after the compaction line
 let m = Meta.@lower 1 + 1
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         # block 1
         nothing,
@@ -180,7 +696,7 @@ let m = Meta.@lower 1 + 1
     src.ssaflags = fill(Int32(0), nstmts)
     ir = Core.Compiler.inflate_ir(src, Any[], Any[Any, Any])
     @test Core.Compiler.verify_ir(ir) === nothing
-    ir = @test_nowarn Core.Compiler.getfield_elim_pass!(ir)
+    ir = @test_nowarn Core.Compiler.sroa_pass!(ir)
     @test Core.Compiler.verify_ir(ir) === nothing
 end
 
@@ -205,7 +721,7 @@ end
 let m = Meta.@lower 1 + 1
     # Test that CFG simplify combines redundant basic blocks
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         Core.Compiler.GotoNode(2),
         Core.Compiler.GotoNode(3),
@@ -230,7 +746,7 @@ end
 let m = Meta.@lower 1 + 1
     # Test that CFG simplify doesn't mess up when chaining past return blocks
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         Core.Compiler.GotoIfNot(Core.Compiler.Argument(2), 3),
         Core.Compiler.GotoNode(4),
@@ -260,7 +776,7 @@ let m = Meta.@lower 1 + 1
     # Test that CFG simplify doesn't try to merge every block in a loop into
     # its predecessor
     @assert Meta.isexpr(m, :thunk)
-    src = m.args[1]::Core.CodeInfo
+    src = m.args[1]::CodeInfo
     src.code = Any[
         # Block 1
         Core.Compiler.GotoNode(2),
@@ -315,6 +831,12 @@ let K = rand(2,2)
     @test test_29253(K) == 2
 end
 
+function no_op_refint(r)
+    r[]
+    return
+end
+@test fully_eliminated(no_op_refint,Tuple{Base.RefValue{Int}}; retval=nothing)
+
 # check getfield elim handling of GlobalRef
 const _some_coeffs = (1,[2],3,4)
 splat_from_globalref(x) = (x, _some_coeffs...,)
@@ -342,3 +864,200 @@ let code = code_typed(pi_on_argument, Tuple{Any})[1].first.code,
     @test nisa == 1
     @test found_pi
 end
+
+# issue #38936
+# check that getfield elim can handle unions of tuple types
+mutable struct S38936{T} content::T end
+struct PrintAll{T} <: Function
+    parts::T
+end
+function (f::PrintAll)(io::IO)
+    for x in f.parts
+        print(io, x)
+    end
+end
+let f = PrintAll((S38936("<span>"), "data", S38936("</span")))
+    @test !any(code_typed(f, (IOBuffer,))[1][1].code) do stmt
+        stmt isa Expr && stmt.head === :call && stmt.args[1] === GlobalRef(Core, :tuple)
+    end
+end
+
+exc39508 = ErrorException("expected")
+@noinline function test39508()
+    local err
+    try
+        err = exc39508::Exception
+        throw(err)
+        false
+    catch ex
+        @test ex === err
+    end
+    return err
+end
+@test test39508() === exc39508
+
+let
+    # `typeassert` elimination after SROA
+    # NOTE we can remove this optimization once inference is able to reason about memory-effects
+    src = @eval Module() begin
+        mutable struct Foo; x; end
+
+        code_typed((Int,)) do a
+            x1 = Foo(a)
+            x2 = Foo(x1)
+            return typeassert(x2.x, Foo).x
+        end |> only |> first
+    end
+    # eliminate `typeassert(x2.x, Foo)`
+    @test count(iscall((src, typeassert)), src.code) == 0
+end
+
+let
+    # Test for https://github.com/JuliaLang/julia/issues/43402
+    # Ensure that structs required not used outside of the ccall,
+    # still get listed in the ccall_preserves
+
+    src = @eval Module() begin
+        @inline function effectful()
+            s1 = Ref{Csize_t}()
+            s2 = Ref{Csize_t}()
+            ccall(:some_ccall, Cvoid,
+                  (Ref{Csize_t},Ref{Csize_t}),
+                  s1, s2)
+            return s1[], s2[]
+        end
+
+        code_typed() do
+            s1, s2 = effectful()
+            return s1
+        end |> only |> first
+    end
+
+    refs = map(Core.SSAValue, findall(x->x isa Expr && x.head == :new, src.code))
+    some_ccall = findfirst(x -> x isa Expr && x.head == :foreigncall && x.args[1] == :(:some_ccall), src.code)
+    @assert some_ccall !== nothing
+    stmt = src.code[some_ccall]
+    nccallargs = length(stmt.args[3]::Core.SimpleVector)
+    preserves = stmt.args[6+nccallargs:end]
+    @test length(refs) == 2
+    @test length(preserves) == 2
+    @test all(alloc -> alloc in preserves, refs)
+end
+
+# test `stmt_effect_free` and DCE
+# ===============================
+
+let # effect-freeness computation for array allocation
+
+    # should eliminate dead allocations
+    good_dims = (0, 2)
+    for dim in good_dims, N in 0:10
+        dims = ntuple(i->dim, N)
+        @eval @test fully_eliminated(()) do
+            Array{Int,$N}(undef, $(dims...))
+            nothing
+        end
+    end
+
+    # shouldn't eliminate errorneous dead allocations
+    bad_dims = [-1,           # should keep "invalid Array dimensions"
+                typemax(Int)] # should keep "invalid Array size"
+    for dim in bad_dims, N in 1:10
+        dims = ntuple(i->dim, N)
+        @eval @test !fully_eliminated(()) do
+            Array{Int,$N}(undef, $(dims...))
+            nothing
+        end
+    end
+
+    # some high-level examples
+    @test fully_eliminated(()) do
+        Int[]
+        nothing
+    end
+    @test fully_eliminated(()) do
+        Matrix{Tuple{String,String}}(undef, 4, 4)
+        nothing
+    end
+    @test fully_eliminated(()) do
+        IdDict{Any,Any}()
+        nothing
+    end
+end
+
+# allow branch folding to look at type information
+let ci = code_typed1(optimize=false) do
+        cond = 1 + 1 == 2
+        if !cond
+            gcd(24, 36)
+        else
+            gcd(64, 128)
+        end
+    end
+    ir = Core.Compiler.inflate_ir(ci)
+    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 1
+    ir = Core.Compiler.compact!(ir, true)
+    @test count(@nospecialize(stmt)->isa(stmt, Core.GotoIfNot), ir.stmts.inst) == 0
+end
+
+# Test that adce_pass! can drop phi node uses that can be concluded unused
+# from PiNode analysis.
+let src = @eval Module() begin
+        @noinline mkfloat() = rand(Float64)
+        @noinline use(a::Float64) = ccall(:jl_, Cvoid, (Any,), a)
+        dispatch(a::Float64) = use(a)
+        dispatch(a::Tuple) = nothing
+        function foo(b)
+            a = mkfloat()
+            a = b ? (a, 2.0) : a
+            dispatch(a)
+        end
+        code_typed(foo, Tuple{Bool})[1][1]
+    end
+    @test count(iscall((src, Core.tuple)), src.code) == 0
+end
+
+# Test that cfg_simplify can converging control flow through empty blocks
+function foo_cfg_empty(b)
+    if b
+        @goto x
+    end
+    @label x
+    return 1
+end
+let ci = code_typed(foo_cfg_empty, Tuple{Bool}, optimize=true)[1][1]
+    ir = Core.Compiler.inflate_ir(ci)
+    @test length(ir.stmts) == 3
+    @test length(ir.cfg.blocks) == 3
+    Core.Compiler.verify_ir(ir)
+    ir = Core.Compiler.cfg_simplify!(ir)
+    Core.Compiler.verify_ir(ir)
+    @test length(ir.cfg.blocks) <= 2
+    @test isa(ir.stmts[length(ir.stmts)][:inst], ReturnNode)
+end
+
+@test Core.Compiler.is_effect_free(Base.infer_effects(getfield, (Complex{Int}, Symbol)))
+@test Core.Compiler.is_effect_free(Base.infer_effects(getglobal, (Module, Symbol)))
+
+# Test that UseRefIterator gets SROA'd inside of new_to_regular (#44557)
+# expression and new_to_regular offset are arbitrary here, we just want to see the UseRefIterator erased
+let e = Expr(:call, Core.GlobalRef(Base, :arrayset), false, Core.SSAValue(4), Core.SSAValue(9), Core.SSAValue(8))
+    new_to_reg(expr) = Core.Compiler.new_to_regular(expr, 1)
+    @allocated new_to_reg(e) # warmup call
+    @test (@allocated new_to_reg(e)) == 0
+end
+
+# Test that SROA doesn't try to forward a previous iteration's SSA value
+let sroa_no_forward() = begin
+    res = (0, 0)
+    for i in 1:5
+        a = first(res)
+        a == 5 && error()
+        if i == 1
+            res = (i, 2.0)
+        end
+    end
+    return res
+    end
+    @test sroa_no_forward() == (1, 2.0)
+end
diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl
new file mode 100644
index 00000000000000..06d261720bdf82
--- /dev/null
+++ b/test/compiler/irutils.jl
@@ -0,0 +1,34 @@
+import Core: CodeInfo, ReturnNode, MethodInstance
+import Core.Compiler: argextype, singleton_type
+import Base.Meta: isexpr
+
+argextype(@nospecialize args...) = argextype(args..., Any[])
+code_typed1(args...; kwargs...) = first(only(code_typed(args...; kwargs...)))::CodeInfo
+get_code(args...; kwargs...) = code_typed1(args...; kwargs...).code
+
+# check if `x` is a statement with a given `head`
+isnew(@nospecialize x) = isexpr(x, :new)
+isreturn(@nospecialize x) = isa(x, ReturnNode)
+
+# check if `x` is a dynamic call of a given function
+iscall(y) = @nospecialize(x) -> iscall(y, x)
+function iscall((src, f)::Tuple{CodeInfo,Base.Callable}, @nospecialize(x))
+    return iscall(x) do @nospecialize x
+        singleton_type(argextype(x, src)) === f
+    end
+end
+iscall(pred::Base.Callable, @nospecialize(x)) = isexpr(x, :call) && pred(x.args[1])
+
+# check if `x` is a statically-resolved call of a function whose name is `sym`
+isinvoke(y) = @nospecialize(x) -> isinvoke(y, x)
+isinvoke(sym::Symbol, @nospecialize(x)) = isinvoke(mi->mi.def.name===sym, x)
+isinvoke(pred::Function, @nospecialize(x)) = isexpr(x, :invoke) && pred(x.args[1]::MethodInstance)
+
+function fully_eliminated(@nospecialize args...; retval=(@__FILE__), kwargs...)
+    code = code_typed1(args...; kwargs...).code
+    if retval !== (@__FILE__)
+        return length(code) == 1 && isreturn(code[1]) && code[1].val == retval
+    else
+        return length(code) == 1 && isreturn(code[1])
+    end
+end
diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl
index ae8e86c2c9a5ed..f1bd442e7f0937 100644
--- a/test/compiler/ssair.jl
+++ b/test/compiler/ssair.jl
@@ -36,7 +36,7 @@ end
 #        false, false, false, false
 #    ))
 #
-#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), 0, 0)
+#    NullLineInfo = Core.LineInfoNode(Main, Symbol(""), Symbol(""), Int32(0), Int32(0))
 #    Compiler.run_passes(ci, 1, [NullLineInfo])
 #    # XXX: missing @test
 #end
@@ -67,10 +67,10 @@ let cfg = CFG(BasicBlock[
     make_bb([2, 3] , [5]   ),
     make_bb([2, 4] , []    ),
 ], Int[])
-    dfs = Compiler.DFS(cfg, Compiler.BBNumber(1))
-    @test dfs.numbering[dfs.parents[dfs.reverse[5]]] == 4
-    let correct_idoms = Compiler.naive_idoms(cfg)
-        @test Compiler.SNCA(cfg) == correct_idoms
+    dfs = Compiler.DFS(cfg.blocks)
+    @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4
+    let correct_idoms = Compiler.naive_idoms(cfg.blocks)
+        @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms
         # For completeness, reverse the order of pred/succ in the CFG and verify
         # the answer doesn't change (it does change the which node is chosen
         # as the semi-dominator, since it changes the DFS numbering).
@@ -81,7 +81,7 @@ let cfg = CFG(BasicBlock[
                 c && (blocks[4] = make_bb(reverse(blocks[4].preds), blocks[4].succs))
                 d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs))
                 cfg′ = CFG(blocks, cfg.index)
-                @test Compiler.SNCA(cfg′) == correct_idoms
+                @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms
             end
         end
     end
@@ -121,7 +121,7 @@ let cfg = CFG(BasicBlock[
     make_bb([2, 3]    , []    ),
 ], Int[])
     insts = Compiler.InstructionStream([], [], Any[], Int32[], UInt8[])
-    code = Compiler.IRCode(insts, cfg, LineInfoNode[], [], [], [])
+    code = Compiler.IRCode(insts, cfg, LineInfoNode[], [], Expr[], [])
     compact = Compiler.IncrementalCompact(code, true)
     @test length(compact.result_bbs) == 4 && 0 in compact.result_bbs[3].preds
 end
@@ -172,15 +172,10 @@ let ci = make_ci([
 end
 
 # Test that GlobalRef in value position is non-canonical
-let ci = (Meta.@lower 1 + 1).args[1]
-    ci.code = [
+let ci = make_ci([
         Expr(:call, GlobalRef(Main, :something_not_defined_please))
         ReturnNode(SSAValue(1))
-    ]
-    nstmts = length(ci.code)
-    ci.ssavaluetypes = nstmts
-    ci.codelocs = fill(Int32(1), nstmts)
-    ci.ssaflags = fill(Int32(0), nstmts)
+    ])
     ir = Core.Compiler.inflate_ir(ci)
     ir = Core.Compiler.compact!(ir, true)
     @test_throws ErrorException Core.Compiler.verify_ir(ir, false)
@@ -233,3 +228,109 @@ let ci = make_ci([
     ir = Core.Compiler.compact!(ir, true)
     @test Core.Compiler.verify_ir(ir) == nothing
 end
+
+# issue #37919
+let ci = code_lowered(()->@isdefined(_not_def_37919_), ())[1]
+    ir = Core.Compiler.inflate_ir(ci)
+    @test Core.Compiler.verify_ir(ir) === nothing
+end
+
+# Test dynamic update of domtree with edge insertions and deletions in the
+# following CFG:
+#
+#     1,1
+#     |  \
+#     |   \
+#     |    3,4 <
+#     |    |    \
+#     2,2  4,5   |
+#     |    |    /
+#     |    6,6 /
+#     |   /
+#     |  /
+#     5,3
+#
+# Nodes indicate BB number, preorder number
+# Edges point down, except the arrow that points up
+let cfg = CFG(BasicBlock[
+        make_bb([],     [3, 2]), # the order of the successors is deliberate
+        make_bb([1],    [5]),    # and is to determine the preorder numbers
+        make_bb([1, 6], [4]),
+        make_bb([3],    [6]),
+        make_bb([2, 6], []),
+        make_bb([4],    [5, 3]),
+    ], Int[])
+    domtree = Compiler.construct_domtree(cfg.blocks)
+    @test domtree.dfs_tree.to_pre == [1, 2, 4, 5, 3, 6]
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Test removal of edge between a parent and child in the DFS tree, which
+    # should trigger complete recomputation of domtree (first case in algorithm
+    # for removing edge from domtree dynamically)
+    Compiler.cfg_delete_edge!(cfg, 2, 5)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 6, 4]
+    # Add edge back (testing first case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 2, 5)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 2, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Test second case in algorithm for removing edges from domtree, in which
+    # `from` is on a semidominator path from the semidominator of `to` to `to`
+    Compiler.cfg_delete_edge!(cfg, 6, 5)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 2, 4]
+    # Add edge back (testing second case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 6, 5)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 5)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Test last case for removing edges, in which edge does not satisfy either
+    # of the above conditions
+    Compiler.cfg_delete_edge!(cfg, 6, 3)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+    # Add edge back (testing second case for insertion)
+    Compiler.cfg_insert_edge!(cfg, 6, 3)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 6, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+
+    # Try removing all edges from root
+    Compiler.cfg_delete_edge!(cfg, 1, 2)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 1, 3, 6, 4]
+    Compiler.cfg_delete_edge!(cfg, 1, 3)
+    Compiler.domtree_delete_edge!(domtree, cfg.blocks, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 0, 0, 0, 0, 0]
+    # Add edges back
+    Compiler.cfg_insert_edge!(cfg, 1, 2)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 2)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 0, 0, 2, 0]
+    Compiler.cfg_insert_edge!(cfg, 1, 3)
+    Compiler.domtree_insert_edge!(domtree, cfg.blocks, 1, 3)
+    @test domtree.idoms_bb == Compiler.naive_idoms(cfg.blocks) == [0, 1, 1, 3, 1, 4]
+end
+
+# Issue #41975 - SSA conversion drops type check
+f_if_typecheck() = (if nothing; end; unsafe_load(Ptr{Int}(0)))
+@test_throws TypeError f_if_typecheck()
+
+@test let # https://github.com/JuliaLang/julia/issues/42258
+    code = quote
+        function foo()
+            a = @noinline rand(rand(0:10))
+            if isempty(a)
+                err = BoundsError(a)
+                throw(err)
+                return nothing
+            end
+            return a
+        end
+        code_typed(foo; optimize=true)
+
+        code_typed(Core.Compiler.setindex!, (Core.Compiler.UseRef,Core.Compiler.NewSSAValue); optimize=true)
+    end |> string
+    cmd = `$(Base.julia_cmd()) -g 2 -e $code`
+    stderr = IOBuffer()
+    success(pipeline(Cmd(cmd); stdout=stdout, stderr=stderr)) && isempty(String(take!(stderr)))
+end
diff --git a/test/compiler/validation.jl b/test/compiler/validation.jl
index d07007069b5c8b..ffa79ed1c823d4 100644
--- a/test/compiler/validation.jl
+++ b/test/compiler/validation.jl
@@ -19,9 +19,9 @@ function f22938(a, b, x...)
 end
 
 msig = Tuple{typeof(f22938),Int,Int,Int,Int}
-world = typemax(UInt)
+world = Base.get_world_counter()
 match = Base._methods_by_ftype(msig, -1, world)[]
-mi = Core.Compiler.specialize_method(match, false)
+mi = Core.Compiler.specialize_method(match)
 c0 = Core.Compiler.retrieve_code_info(mi)
 
 @test isempty(Core.Compiler.validate_code(mi))
@@ -105,6 +105,14 @@ end
     @test errors[1].kind === Core.Compiler.SSAVALUETYPES_MISMATCH_UNINFERRED
 end
 
+@testset "SSAFLAGS_MISMATCH" begin
+    c = copy(c0)
+    empty!(c.ssaflags)
+    errors = Core.Compiler.validate_code(c)
+    @test length(errors) == 1
+    @test errors[1].kind === Core.Compiler.SSAFLAGS_MISMATCH
+end
+
 @testset "SIGNATURE_NARGS_MISMATCH" begin
     old_sig = mi.def.sig
     mi.def.sig = Tuple{1,2}
diff --git a/test/complex.jl b/test/complex.jl
index 07183d7fca3545..20470dd5617e7d 100644
--- a/test/complex.jl
+++ b/test/complex.jl
@@ -123,6 +123,8 @@ end
             @test atanh(x) ≈ atanh(big(x))
             @test cis(real(x)) ≈ cis(real(big(x)))
             @test cis(x) ≈ cis(big(x))
+            @test cispi(real(x)) ≈ cispi(real(big(x)))
+            @test cispi(x) ≈ cispi(big(x))
             @test cos(x) ≈ cos(big(x))
             @test cosh(x) ≈ cosh(big(x))
             @test exp(x) ≈ exp(big(x))
@@ -918,6 +920,21 @@ end
     @test cis(1.0+0.0im) ≈ 0.54030230586813971740093660744297660373231042061+0.84147098480789650665250232163029899962256306079im
     @test cis(pi) ≈ -1.0+0.0im
     @test cis(pi/2) ≈ 0.0+1.0im
+    @test cispi(false) == 1
+    @test cispi(true) == -1
+    @test cispi(-1) == -1
+    @test cispi(0) == 1
+    @test cispi(1) == -1
+    @test cispi(2) == 1
+    @test cispi(0.0) == cispi(0)
+    @test cispi(1.0) == cispi(1)
+    @test cispi(2.0) == cispi(2)
+    @test cispi(0.5) == im
+    @test cispi(1.5) == -im
+    @test cispi(0.25) ≈ cis(π/4)
+    @test cispi(0.0+0.0im) == cispi(0)
+    @test cispi(1.0+0.0im) == cispi(1)
+    @test cispi(2.0+0.0im) == cispi(2)
 end
 
 @testset "exp2" begin
@@ -1022,7 +1039,7 @@ end
 @testset "corner cases of division, issue #22983" begin
     # These results abide by ISO/IEC 10967-3:2006(E) and
     # mathematical definition of division of complex numbers.
-    for T in (Float32, Float64, BigFloat)
+    for T in (Float16, Float32, Float64, BigFloat)
         @test isequal(one(T) / zero(Complex{T}), one(Complex{T}) / zero(Complex{T}))
         @test isequal(one(T) / zero(Complex{T}), Complex{T}(NaN, NaN))
         @test isequal(one(Complex{T}) / zero(T), Complex{T}(Inf, NaN))
@@ -1033,7 +1050,7 @@ end
 end
 
 @testset "division by Inf, issue#23134" begin
-    @testset "$T" for T in (Float32, Float64, BigFloat)
+    @testset "$T" for T in (Float16, Float32, Float64, BigFloat)
         @test isequal(one(T) / complex(T(Inf)),         complex(zero(T), -zero(T)))
         @test isequal(one(T) / complex(T(Inf), one(T)), complex(zero(T), -zero(T)))
         @test isequal(one(T) / complex(T(Inf), T(NaN)), complex(zero(T), -zero(T)))
@@ -1071,16 +1088,10 @@ end
         @test isequal(one(T) / complex(T(-NaN),  T(-Inf)), complex(-zero(T), zero(T)))
 
         # divide complex by complex Inf
-        if T == Float64
-            @test_broken isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
-            @test_broken isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
-        elseif T == Float32
-            @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
-            @test_broken isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
-        else
-            @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
-            @test isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
-        end
+        @test isequal(complex(one(T)) / complex(T(Inf), T(-Inf)), complex(zero(T), zero(T)))
+        @test isequal(complex(one(T)) / complex(T(-Inf), T(Inf)), complex(-zero(T), -zero(T)))
+        @test isequal(complex(T(Inf)) / complex(T(Inf), T(-Inf)), complex(T(NaN), T(NaN)))
+        @test isequal(complex(T(NaN)) / complex(T(-Inf), T(Inf)), complex(T(NaN), T(NaN)))
     end
 end
 
@@ -1187,3 +1198,13 @@ end
 
 # complex with non-concrete eltype
 @test_throws ErrorException complex(Union{Complex{Int}, Nothing}[])
+
+@testset "ispow2 and iseven/isodd" begin
+    @test ispow2(4+0im)
+    @test ispow2(0.25+0im)
+    @test !ispow2(4+5im)
+    @test !ispow2(7+0im)
+    @test iseven(6+0im) && !isodd(6+0im)
+    @test !iseven(7+0im) && isodd(7+0im)
+    @test !iseven(6+1im) && !isodd(7+1im)
+end
diff --git a/test/copy.jl b/test/copy.jl
index 34d1c20c5f4fa7..28d34e4756a6b1 100644
--- a/test/copy.jl
+++ b/test/copy.jl
@@ -233,4 +233,8 @@ end
     @test copyto!(s, view(Int[],Int[])) == [1, 2]
     @test copyto!(s, Float64[]) == [1, 2]
     @test copyto!(s, String[]) == [1, 2] # No error
-end
\ No newline at end of file
+end
+
+@testset "deepcopy_internal arrays" begin
+    @test (@inferred Base.deepcopy_internal(zeros(), IdDict())) == zeros()
+end
diff --git a/test/core.jl b/test/core.jl
index 5d5a66be209f68..8362a7a27bec60 100644
--- a/test/core.jl
+++ b/test/core.jl
@@ -2,14 +2,111 @@
 
 # test core language features
 
-using Random, SparseArrays, InteractiveUtils
+using Random, InteractiveUtils
 
 const Bottom = Union{}
 
-
 # For curmod_*
 include("testenv.jl")
 
+## tests that `const` field declarations
+
+# sanity tests that our built-in types are marked correctly for const fields
+for (T, c) in (
+        (Core.CodeInfo, []),
+        (Core.CodeInstance, [:def]),
+        (Core.Method, [#=:name, :module, :file, :line, :primary_world, :sig, :slot_syms, :external_mt, :nargs, :called, :nospecialize, :nkw, :isva, :pure, :is_for_opaque_closure, :constprop=#]),
+        (Core.MethodInstance, [#=:def, :specTypes, :sparam_vals]=#]),
+        (Core.MethodTable, [:module]),
+        (Core.TypeMapEntry, [:sig, :simplesig, :guardsigs, :min_world, :max_world, :func, :isleafsig, :issimplesig, :va]),
+        (Core.TypeMapLevel, []),
+        (Core.TypeName, [:name, :module, :names, :atomicfields, :constfields, :wrapper, :mt, :hash, :n_uninitialized, :flags]),
+        (DataType, [:name, :super, :parameters, :instance, :hash]),
+    )
+    @test Set((fieldname(T, i) for i in 1:fieldcount(T) if isconst(T, i))) == Set(c)
+end
+
+@test_throws(ErrorException("setfield!: const field .name of type DataType cannot be changed"),
+    setfield!(Int, :name, Int.name))
+@test_throws(ErrorException("setfield!: const field .name of type DataType cannot be changed"),
+    (Base.Experimental.@force_compile; setfield!(Int, :name, Int.name)))
+
+@test_throws(ErrorException("invalid field attribute const for immutable struct"),
+    @eval struct ABCDconst
+        const abcd
+    end)
+mutable struct ABCDconst
+    const a
+    const b::Int
+    c
+    const d::Union{Int,Nothing}
+end
+@test_throws(ErrorException("invalid redefinition of constant ABCDconst"),
+    mutable struct ABCDconst
+        const a
+        const b::Int
+        c
+        d::Union{Int,Nothing}
+    end)
+@test_throws(ErrorException("invalid redefinition of constant ABCDconst"),
+    mutable struct ABCDconst
+        a
+        b::Int
+        c
+        d::Union{Int,Nothing}
+    end)
+let abcd = ABCDconst(1, 2, 3, 4)
+    @test (1, 2, 3, 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
+    @test_throws(ErrorException("setfield!: const field .a of type ABCDconst cannot be changed"),
+        abcd.a = 0)
+    @test_throws(ErrorException("replacefield!: const field .a of type ABCDconst cannot be changed"),
+        replacefield!(abcd, :a, 1, 0))
+    @test_throws(ErrorException("modifyfield!: const field .a of type ABCDconst cannot be changed"),
+        modifyfield!(abcd, :a, +, 1))
+    @test_throws(ErrorException("swapfield!: const field .a of type ABCDconst cannot be changed"),
+        swapfield!(abcd, :a, 0))
+    @test_throws(ErrorException("setfield!: const field .b of type ABCDconst cannot be changed"),
+        abcd.b = 0)
+    abcd.c = "not constant"
+    @test_throws(ErrorException("setfield!: const field .d of type ABCDconst cannot be changed"),
+        abcd.d = nothing)
+    @test (1, 2, "not constant", 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
+end
+# repeat with the compiler
+let abcd = ABCDconst(1, 2, 3, 4)
+    Base.Experimental.@force_compile
+    @test (1, 2, 3, 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
+    @test_throws(ErrorException("setfield!: const field .a of type ABCDconst cannot be changed"),
+        abcd.a = 0)
+    @test_throws(ErrorException("replacefield!: const field .a of type ABCDconst cannot be changed"),
+        replacefield!(abcd, :a, 1, 0))
+    @test_throws(ErrorException("modifyfield!: const field .a of type ABCDconst cannot be changed"),
+        modifyfield!(abcd, :a, +, 1))
+    @test_throws(ErrorException("swapfield!: const field .a of type ABCDconst cannot be changed"),
+        swapfield!(abcd, :a, 0))
+    @test_throws(ErrorException("setfield!: const field .b of type ABCDconst cannot be changed"),
+        abcd.b = 0)
+    abcd.c = "not constant"
+    @test_throws(ErrorException("setfield!: const field .d of type ABCDconst cannot be changed"),
+        abcd.d = nothing)
+    @test (1, 2, "not constant", 4) === (abcd.a, abcd.b, abcd.c, abcd.d)
+end
+
+# test `===` handling null pointer in struct #44712
+struct N44712
+    a::Some{Any}
+    b::Int
+    N44712() = new()
+end
+let a  = Int[0, 1], b = Int[0, 2]
+    GC.@preserve a b begin
+        @test unsafe_load(Ptr{N44712}(pointer(a))) !== unsafe_load(Ptr{N44712}(pointer(b)))
+    end
+end
+
+# another possible issue in #44712
+@test (("", 0),) !== (("", 1),)
+
 f47(x::Vector{Vector{T}}) where {T} = 0
 @test_throws MethodError f47(Vector{Vector}())
 @test f47(Vector{Vector{Int}}()) == 0
@@ -240,6 +337,15 @@ end
 #struct S22624{A,B,C} <: Ref{S22624{Int64,A}}; end
 @test_broken @isdefined S22624
 
+# issue #42297
+mutable struct Node42297{T, V}
+    value::V
+    next::Union{Node42297{T, T}, Node42297{T, Val{T}}, Nothing}
+    Node42297{T}(value) where {T} = new{T, typeof(value)}(value, nothing)
+end
+@test fieldtype(Node42297{Int,Val{Int}}, 1) === Val{Int}
+@test fieldtype(Node42297{Int,Int}, 1) === Int
+
 # issue #3890
 mutable struct A3890{T1}
     x::Matrix{Complex{T1}}
@@ -322,9 +428,6 @@ function typeassert_instead_of_decl()
 end
 @test_throws TypeError typeassert_instead_of_decl()
 
-# type declarations on globals not implemented yet
-@test_throws ErrorException eval(Meta.parse("global x20327::Int"))
-
 y20327 = 1
 @test_throws TypeError y20327::Float64
 
@@ -411,7 +514,7 @@ function foo23996(xs...)
     bar(::AbstractFloat) = push!(rets, 2)
     bar(::Bool) = foobar()
     for x in xs
-	bar(x)
+        bar(x)
     end
     rets
 end
@@ -998,6 +1101,8 @@ end
 # Module() constructor
 @test names(Module(:anonymous), all = true, imported = true) == [:anonymous]
 @test names(Module(:anonymous, false), all = true, imported = true) == [:anonymous]
+@test Module(:anonymous, false, true).Core == Core
+@test_throws UndefVarError Module(:anonymous, false, false).Core
 
 # exception from __init__()
 let didthrow =
@@ -1100,9 +1205,9 @@ end
 let strct = LoadError("yofile", 0, "bad")
     @test nfields(strct) == 3 # sanity test
     @test_throws BoundsError(strct, 10) getfield(strct, 10)
-    @test_throws ErrorException("setfield! immutable struct of type LoadError cannot be changed") setfield!(strct, 0, "")
-    @test_throws ErrorException("setfield! immutable struct of type LoadError cannot be changed") setfield!(strct, 4, "")
-    @test_throws ErrorException("setfield! immutable struct of type LoadError cannot be changed") setfield!(strct, :line, 0)
+    @test_throws ErrorException("setfield!: immutable struct of type LoadError cannot be changed") setfield!(strct, 0, "")
+    @test_throws ErrorException("setfield!: immutable struct of type LoadError cannot be changed") setfield!(strct, 4, "")
+    @test_throws ErrorException("setfield!: immutable struct of type LoadError cannot be changed") setfield!(strct, :line, 0)
     @test strct.file == "yofile"
     @test strct.line === 0
     @test strct.error == "bad"
@@ -1124,7 +1229,7 @@ let mstrct = TestMutable("melm", 1, nothing)
     @test_throws BoundsError(mstrct, 4) setfield!(mstrct, 4, "")
 end
 let strct = LoadError("yofile", 0, "bad")
-    @test_throws(ErrorException("setfield! immutable struct of type LoadError cannot be changed"),
+    @test_throws(ErrorException("setfield!: immutable struct of type LoadError cannot be changed"),
                  ccall(:jl_set_nth_field, Cvoid, (Any, Csize_t, Any), strct, 0, ""))
 end
 let mstrct = TestMutable("melm", 1, nothing)
@@ -1219,7 +1324,7 @@ end
 # issue #22842
 f22842(x::UnionAll) = UnionAll
 f22842(x::DataType) = length(x.parameters)
-@test f22842(Tuple{Vararg{Int64,N} where N}) == 1
+@test f22842(Tuple{Vararg{Int64}}) == 1
 @test f22842(Tuple{Vararg{Int64,N}} where N) === UnionAll
 
 # issue #1153
@@ -1442,6 +1547,12 @@ let
     @test invoke(i2169, Tuple{Array}, Int8[1]) === Int8(-128)
 end
 
+# issue #44227
+struct F{T} end
+F{Int32}(; y=1) = 1
+F{Int64}(; y=1) = invoke(F{Int32}, Tuple{}; y)
+@test F{Int64}() === 1
+
 # issue #2365
 mutable struct B2365{T}
      v::Union{T, Nothing}
@@ -2081,7 +2192,7 @@ mutable struct A6142 <: AbstractMatrix{Float64}; end
 +(x::A6142, y::AbstractRange) = "AbstractRange method called" #16324 ambiguity
 
 # issue #6175
-function g6175(); print(""); (); end
+function g6175(); GC.safepoint(); (); end
 g6175(i::Real, I...) = g6175(I...)
 g6175(i, I...) = tuple(length(i), g6175(I...)...)
 @test g6175(1:5) === (5,)
@@ -2221,7 +2332,7 @@ day_in(obj6387)
 function segfault6793(;gamma=1)
     A = 1
     B = 1
-    print()
+    GC.safepoint()
     return
     -gamma
     nothing
@@ -2390,19 +2501,14 @@ let ex = Expr(:(=), :(f8338(x;y=4)), :(x*y))
 end
 
 # call overloading (#2403)
-(x::Int)(y::Int) = x + 3y
 issue2403func(f) = f(7)
-let x = 10
-    @test x(3) == 19
-    @test x((3,)...) == 19
-    @test issue2403func(x) == 31
-end
 mutable struct Issue2403
     x
 end
 (i::Issue2403)(y) = i.x + 2y
 let x = Issue2403(20)
     @test x(3) == 26
+    @test x((3,)...) == 26
     @test issue2403func(x) == 34
 end
 
@@ -3327,7 +3433,7 @@ function f11065()
         if i == 1
             z = "z is defined"
         elseif i == 2
-            print(z)
+            print(z) # z is undefined
         end
     end
 end
@@ -3522,9 +3628,10 @@ end
 @test_throws TypeError Union{Int, 1}
 
 @test_throws ErrorException Vararg{Any,-2}
-@test_throws ErrorException Vararg{Int, N} where N<:T where T
-@test_throws ErrorException Vararg{Int, N} where N<:Integer
-@test_throws ErrorException Vararg{Int, N} where N>:Integer
+# Disabled due to #39698, see src/jltypes.c
+#@test_throws ErrorException Vararg{Int, N} where N<:T where T
+#@test_throws ErrorException Vararg{Int, N} where N<:Integer
+#@test_throws ErrorException Vararg{Int, N} where N>:Integer
 
 mutable struct FooNTuple{N}
     z::Tuple{Integer, Vararg{Int, N}}
@@ -3578,7 +3685,7 @@ f12092(x::Int, y::Int...) = 2
 # NOTE: should have > MAX_TUPLETYPE_LEN arguments
 f12063(tt, g, p, c, b, v, cu::T, d::AbstractArray{T, 2}, ve) where {T} = 1
 f12063(args...) = 2
-g12063() = f12063(0, 0, 0, 0, 0, 0, 0.0, spzeros(0,0), Int[])
+g12063() = f12063(0, 0, 0, 0, 0, 0, 0.0, zeros(0,0), Int[])
 @test g12063() == 1
 
 # issue #11587
@@ -3649,7 +3756,7 @@ end
 
 end
 
-# don't allow redefining types if ninitialized changes
+# don't allow redefining types if n_uninitialized changes
 struct NInitializedTestType
     a
 end
@@ -4125,15 +4232,6 @@ let ex = quote
     @test ex.args[2] == :test
 end
 
-# issue #25652
-x25652 = 1
-x25652_2 = let (x25652, _) = (x25652, nothing)
-    x25652 = x25652 + 1
-    x25652
-end
-@test x25652_2 == 2
-@test x25652 == 1
-
 # issue #15180
 function f15180(x::T) where T
     X = Vector{T}(undef, 1)
@@ -4244,7 +4342,10 @@ end
 end
 # disable GC to make sure no collection/promotion happens
 # when we are constructing the objects
+get_finalizers_inhibited() = ccall(:jl_gc_get_finalizers_inhibited, Int32, (Ptr{Cvoid},), C_NULL)
 let gc_enabled13995 = GC.enable(false)
+    @assert gc_enabled13995
+    @assert get_finalizers_inhibited() == 0
     finalized13995 = [false, false, false, false]
     create_dead_object13995(finalized13995)
     GC.enable(true)
@@ -4256,6 +4357,30 @@ let gc_enabled13995 = GC.enable(false)
     GC.enable(gc_enabled13995)
 end
 
+# Ensure an independent GC frame
+@noinline outlined(f) = f()
+
+@testset "finalizers must not change the sticky flag" begin
+    GC.enable(false)
+    try
+        outlined() do
+            local obj = Ref(0)
+            finalizer(obj) do _
+                @async nothing
+            end
+            Base.donotdelete(obj)
+        end
+        task = Threads.@spawn begin
+            GC.enable(true)
+            GC.gc()
+        end
+        wait(task)
+        @test !task.sticky
+    finally
+        GC.enable(true)
+    end
+end
+
 # issue #15283
 j15283 = 0
 let
@@ -4998,7 +5123,8 @@ gVararg(a::fVararg(Int)) = length(a)
     false
 catch e
     (e::ErrorException).msg
-end == "The function body AST defined by this @generated function is not pure. This likely means it contains a closure or comprehension."
+end == "The function body AST defined by this @generated function is not pure. " *
+       "This likely means it contains a closure, a comprehension or a generator."
 
 let x = 1
     global g18444
@@ -5188,6 +5314,30 @@ end
 @test let_Box5()() == 46
 @test let_noBox()() == 21
 
+# issue #37690
+function foo37690()
+    local f
+    local x
+    for k = 1:2
+        x = k
+        if k == 1
+            f = () -> x
+        end
+    end
+    f
+end
+@test foo37690()() == 2
+
+function g37690()
+    local x
+    local f
+    for k = 1:2
+    end
+    x = 0
+    ()->x
+end
+@test g37690().x === 0
+
 function _assigns_and_captures_arg(a)
     a = a
     return ()->a
@@ -5281,6 +5431,16 @@ if Sys.WORD_SIZE == 64
     @test_nowarn tester20360()
 end
 
+# issue #39717
+let a = Base.StringVector(2^17)
+    b = String(a)
+    c = String(a)
+    GC.gc()
+    @test sizeof(a) == 0
+    @test sizeof(b) == 2^17
+    @test sizeof(c) == 0
+end
+
 @test_throws ArgumentError eltype(Bottom)
 
 # issue #16424, re-evaluating type definitions
@@ -5623,11 +5783,9 @@ f_isdefined_unionvar(y, t) = (t > 0 && (x = (t == 1 ? 1 : y)); @isdefined x)
 @test !f_isdefined_unionvar(1, 0)
 f_isdefined_splat(x...) = @isdefined x
 @test f_isdefined_splat(1, 2, 3)
-let err = try; @macroexpand @isdefined :x; false; catch ex; ex; end,
+let e = try; @macroexpand @isdefined :x; false; catch ex; ex; end,
     __source__ = LineNumberNode(@__LINE__() - 1, Symbol(@__FILE__))
-    @test err.file === string(__source__.file)
-    @test err.line === __source__.line
-    e = err.error::MethodError
+    e::MethodError
     @test e.f === getfield(@__MODULE__, Symbol("@isdefined"))
     @test e.args === (__source__, @__MODULE__, :(:x))
 end
@@ -5944,11 +6102,11 @@ end
 for U in boxedunions
     local U
     for N in (1, 2, 3, 4)
-        A = Array{U}(undef, ntuple(x->0, N)...)
+        A = Array{U}(undef, ntuple(Returns(0), N)...)
         @test isempty(A)
         @test sizeof(A) == 0
 
-        A = Array{U}(undef, ntuple(x->10, N)...)
+        A = Array{U}(undef, ntuple(Returns(10), N)...)
         @test length(A) == 10^N
         @test sizeof(A) == sizeof(Int) * (10^N)
         @test !isassigned(A, 1)
@@ -6029,11 +6187,11 @@ using Serialization
 for U in unboxedunions
     local U
     for N in (1, 2, 3, 4)
-        A = Array{U}(undef, ntuple(x->0, N)...)
+        A = Array{U}(undef, ntuple(Returns(0), N)...)
         @test isempty(A)
         @test sizeof(A) == 0
 
-        len = ntuple(x->10, N)
+        len = ntuple(Returns(10), N)
         mxsz = maximum(sizeof, Base.uniontypes(U))
         A = Array{U}(undef, len)
         @test length(A) == prod(len)
@@ -6983,7 +7141,6 @@ end
 @test_throws ArgumentError Array{Int, 2}(undef, -1, -1)
 
 # issue #28812
-@test Tuple{Vararg{Array{T},3} where T} === Tuple{Array,Array,Array}
 @test Tuple{Vararg{Array{T} where T,3}} === Tuple{Array,Array,Array}
 
 # issue #29145
@@ -7197,8 +7354,57 @@ end
 struct B33954
     x::Q33954{B33954}
 end
-@test_broken isbitstype(Tuple{B33954})
-@test_broken isbitstype(B33954)
+@test isbitstype(Tuple{B33954})
+@test isbitstype(B33954)
+
+struct A41503{d}
+    e::d
+end
+struct B41503{j,k} <: AbstractArray{A41503{B41503{Any,k}},Any}
+    l::k
+end
+@test !isbitstype(B41503{Any,Any})
+@test_broken isbitstype(B41503{Any,Int})
+
+struct B40050 <: Ref{Tuple{B40050}}
+end
+@test string((B40050(),)) == "($B40050(),)"
+@test_broken isbitstype(Tuple{B40050})
+
+# issue #41654
+struct X41654 <: Ref{X41654}
+end
+@test isbitstype(X41654)
+@test ('a'=>X41654(),)[1][2] isa X41654
+
+# issue #43411
+struct A43411{S, T}
+    x::NamedTuple{S, T}
+end
+@test isbitstype(A43411{(:a,), Tuple{Int}})
+
+# issue #44614
+struct T44614_1{T}
+    m::T
+end
+struct T44614_2{L}
+    tuple::NTuple{3, Int64}
+    T44614_2{L}(t::NTuple{3, Int64}) where {L} = new{sum(t)}(t)
+end
+struct T44614_3{L, N}
+    a::Tuple{T44614_2{L}}
+    param::NTuple{N, T44614_1}
+    T44614_3(a::Tuple{T44614_2{L}}, pars::NTuple{N, T44614_1}) where {L, N} = new{L, N}(a, pars)
+end
+@test sizeof((T44614_2{L} where L).body) == 24
+let T = T44614_3{L,2} where L
+    # these values are computable, but we currently don't know how to compute them properly
+    ex = ErrorException("Argument is an incomplete T44614_3 type and does not have a definite size.")
+    @test_throws ex sizeof(T.body)
+    @test_throws ex sizeof(T)
+    @test_throws BoundsError fieldoffset(T.body, 2)
+    @test fieldoffset(T{1}, 2) == 24
+end
 
 # Issue #34206/34207
 function mre34206(a, n)
@@ -7262,12 +7468,28 @@ end
 
 # issue #36104
 module M36104
+using Test
 struct T36104
     v::Vector{M36104.T36104}
 end
 struct T36104   # check that redefining it works, issue #21816
     v::Vector{T36104}
 end
+# with a gensymmed unionall
+struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
+    data::S
+    uplo::Char
+end
+struct Symmetric{T,S<:AbstractMatrix{<:T}} <: AbstractMatrix{T}
+    data::S
+    uplo::Char
+end
+@test_throws ErrorException begin
+    struct Symmetric{T,S<:AbstractMatrix{T}} <: AbstractMatrix{T}
+        data::S
+        uplo::Char
+    end
+end
 end
 @test fieldtypes(M36104.T36104) == (Vector{M36104.T36104},)
 @test_throws ErrorException("expected") @eval(struct X36104; x::error("expected"); end)
@@ -7484,3 +7706,97 @@ let array = Int[]
 end
 @test compare_union37557(Ref{Union{Int,Vector{Int}}}(1),
                          Ref{Union{Int,Vector{Int}}}(1))
+
+# issue #38224
+struct S38224
+    i::Union{Int,Missing}
+end
+@test S38224.flags & 0x10 == 0x10 # .zeroinit
+for _ in 1:5
+    let a = Vector{S38224}(undef, 1000000)
+        @test all(x->ismissing(x.i), a)
+    end
+end
+
+# Redefining types with Vararg
+abstract type RedefineVararg; end
+const RedefineVarargN{N} = Tuple{Vararg{RedefineVararg, N}}
+const RedefineVarargN{N} = Tuple{Vararg{RedefineVararg, N}}
+
+# NTuples with non-types
+@test NTuple{3, 2} == Tuple{2, 2, 2}
+
+# issue #18621
+function f18621()
+   g = (k(i) for i in 1:5)
+   k = identity
+   return collect(g)
+end
+@test f18621() == 1:5
+@test [_ for _ in 1:5] == 1:5
+
+# issue #35130
+const T35130 = Tuple{Vector{Int}, <:Any}
+@eval struct A35130
+    x::Vector{Tuple{Vector{Int}, Any}}
+    A35130(x) = $(Expr(:new, :A35130, :x))
+end
+h35130(x) = A35130(Any[x][1]::Vector{T35130})
+@test h35130(T35130[([1],1)]) isa A35130
+
+# issue #41503
+let S = Tuple{Tuple{Tuple{K, UInt128} where K<:Tuple{Int64}, Int64}},
+    T = Tuple{Tuple{Tuple{Tuple{Int64}, UInt128}, Int64}}
+    @test pointer_from_objref(T) === pointer_from_objref(S)
+    @test isbitstype(T)
+end
+
+# avoid impossible normalization (don't try to form Tuple{Complex{String}} here)
+@test Tuple{Complex{T} where String<:T<:String} == Tuple{Complex{T} where String<:T<:String}
+
+# control over compilation/interpreter
+@testset "Experimental.@force_compile" begin
+    function trim_after_eval(str::AbstractString)
+        rng = findfirst("eval(", str)
+        @test !isempty(rng)
+        return str[1:first(rng)-1]
+    end
+    btc = eval(quote
+        Base.Experimental.@force_compile
+        backtrace()
+    end)
+    bti = eval(quote
+        backtrace()
+    end)
+    @test !occursin(r"(interpreter|do_call)", trim_after_eval(string(stacktrace(btc, true))))
+    @test  occursin(r"(interpreter|do_call)", trim_after_eval(string(stacktrace(bti, true))))
+end
+
+@testset "rest(svec, ...)" begin
+    x = Core.svec(1, 2, 3)
+    a..., = x
+    @test a == Core.svec(1, 2, 3)
+    a, b... = x
+    @test a == 1
+    @test b == Core.svec(2, 3)
+end
+
+@testset "setproperty! on modules" begin
+    m = Module()
+    @eval m global x::Int
+
+    setglobal!(m, :x, 1)
+    @test m.x === 1
+    setglobal!(m, :x, 2, :release)
+    @test m.x === 2
+    @test_throws ConcurrencyViolationError setglobal!(m, :x, 3, :not_atomic)
+    @test_throws ErrorException setglobal!(m, :x, 4., :release)
+
+    m.x = 1
+    @test m.x === 1
+    setproperty!(m, :x, 2, :release)
+    @test m.x === 2
+    @test_throws ConcurrencyViolationError setproperty!(m, :x, 3, :not_atomic)
+    m.x = 4.
+    @test m.x === 4
+end
diff --git a/test/corelogging.jl b/test/corelogging.jl
new file mode 100644
index 00000000000000..1b1254e78b3d62
--- /dev/null
+++ b/test/corelogging.jl
@@ -0,0 +1,456 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test, Base.CoreLogging
+import Base.CoreLogging: BelowMinLevel, Debug, Info, Warn, Error,
+    handle_message, shouldlog, min_enabled_level, catch_exceptions
+
+import Test: collect_test_logs, TestLogger
+using Printf: @sprintf
+
+isdefined(Main, :MacroCalls) || @eval Main include("testhelpers/MacroCalls.jl")
+using Main.MacroCalls
+
+#-------------------------------------------------------------------------------
+@testset "Logging" begin
+
+@testset "Basic logging" begin
+    @test_logs (Debug, "a") min_level=Debug @debug "a"
+    @test_logs (Info,  "a") @info  "a"
+    @test_logs (Warn,  "a") @warn  "a"
+    @test_logs (Error, "a") @error "a"
+end
+
+#-------------------------------------------------------------------------------
+# Front end
+
+@testset "Log message formatting" begin
+    @test_logs (Info, "sum(A) = 16.0") @info begin
+        A = fill(1.0, 4, 4)
+        "sum(A) = $(sum(A))"
+    end
+    x = 10.50
+    @test_logs (Info, "10.5") @info "$x"
+    @test_logs (Info, "10.500") @info @sprintf("%.3f", x)
+end
+
+@testset "Programmatically defined levels" begin
+    level = Info
+    @test_logs (Info, "X") @logmsg level "X"
+    level = Warn
+    @test_logs (Warn, "X") @logmsg level "X"
+end
+
+@testset "Structured logging with key value pairs" begin
+    foo_val = 10
+    bar_val = 100
+    logs,_ = collect_test_logs() do
+        @info "test"  bar_val  progress=0.1  foo=foo_val  2*3  bar(x)=1.2  real_line=(@__LINE__)
+        @info begin
+            value_in_msg_block = 1000.0
+            "test2"
+        end value_in_msg_block
+        test_splatting(;kws...) = @info "test3" kws...
+        test_splatting(a=1,b=2.0)
+    end
+    @test length(logs) == 3
+
+    record = logs[1]
+    kwargs = record.kwargs
+
+    # Builtin metadata
+    @test record._module == @__MODULE__
+    @test record.file == Base.source_path()
+    @test record.line == kwargs[:real_line]
+    @test record.id isa Symbol
+    @test occursin(r"^.*logging_[[:xdigit:]]{8}$", String(record.id))
+
+    # User-defined metadata
+    @test kwargs[:bar_val] === bar_val
+    @test kwargs[:progress] == 0.1
+    @test kwargs[:foo] === foo_val
+    @test kwargs[Symbol(:(2*3))] === 6
+    @test kwargs[Symbol(:(bar(x)))] === 1.2
+
+    # Keyword values accessible from message block
+    record2 = logs[2]
+    @test occursin((Info, "test2"), record2)
+    kwargs = record2.kwargs
+    @test kwargs[:value_in_msg_block] === 1000.0
+
+    # Splatting of keywords
+    record3 = logs[3]
+    @test occursin((Info, "test3"), record3)
+    kwargs = record3.kwargs
+    @test sort(collect(keys(kwargs))) == [:a, :b]
+    @test kwargs[:a] === 1
+    @test kwargs[:b] === 2.0
+end
+
+@testset "Log message exception handling" begin
+    # Exceptions in message creation are caught by default
+    @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true @info "foo $(1÷0)"
+    # Exceptions propagate if explicitly disabled for the logger (by default
+    # for the test logger)
+    @test_throws DivideError collect_test_logs() do
+        @info  "foo $(1÷0)"
+    end
+    # trivial expressions create the errors explicitly instead of throwing them (to avoid try/catch)
+    for i in 1:2
+        local msg, x, y
+        logmsg = (function() @info msg x=y end,
+                  function() @info msg x=y z=1+1 end)[i]
+        @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
+        @test_throws UndefVarError(:msg) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:msg)
+        msg = "the msg"
+        @test_logs (Error, Test.Ignored(), Test.Ignored(), :logevent_error) catch_exceptions=true logmsg()
+        @test_throws UndefVarError(:y) collect_test_logs(logmsg)
+        @test (only(collect_test_logs(logmsg, catch_exceptions=true)[1]).kwargs[:exception]::Tuple{UndefVarError, Vector})[1] === UndefVarError(:y)
+        y = "the y"
+        @test_logs (Info,"the msg") logmsg()
+        @test only(collect_test_logs(logmsg)[1]).kwargs[:x] === "the y"
+    end
+end
+
+@testset "Special keywords" begin
+    logger = TestLogger()
+    with_logger(logger) do
+        @info "foo" _module=Base.Core _id=:asdf _group=:somegroup _file="/a/file" _line=-10
+    end
+    @test length(logger.logs) == 1
+    record = logger.logs[1]
+    @test record._module == Base.Core
+    @test record.group == :somegroup
+    @test record.id == :asdf
+    @test record.file == "/a/file"
+    @test record.line == -10
+    # Test consistency with shouldlog() function arguments
+    @test record.level   == logger.shouldlog_args[1]
+    @test record._module == logger.shouldlog_args[2]
+    @test record.group   == logger.shouldlog_args[3]
+    @test record.id      == logger.shouldlog_args[4]
+
+    # handling of nothing
+    logger = TestLogger()
+    with_logger(logger) do
+        @info "foo" _module = nothing _file = nothing _line = nothing
+    end
+    @test length(logger.logs) == 1
+    record = logger.logs[1]
+    @test record._module == nothing
+    @test record.file == nothing
+    @test record.line == nothing
+end
+
+# PR #28209
+@testset "0-arg MethodErrors" begin
+    @test_throws MethodError @macrocall(@logmsg :Notice)
+    @test_throws MethodError @macrocall(@debug)
+    @test_throws MethodError @macrocall(@info)
+    @test_throws MethodError @macrocall(@warn)
+    @test_throws MethodError @macrocall(@error)
+end
+
+@testset "Any type" begin
+    @test_logs (:info, sum) @info sum
+    # TODO: make this work (here we want `@test_logs` to fail)
+    # @test_fails @test_logs (:info, "sum") @info sum   # `sum` works, `"sum"` does not
+
+    # check that the message delivered to the user works
+    mktempdir() do dir
+        path_stdout = joinpath(dir, "stdout.txt")
+        path_stderr = joinpath(dir, "stderr.txt")
+        redirect_stdio(stdout=path_stdout, stderr=path_stderr) do
+            @info sum
+        end
+        @test occursin("Info: sum", read(path_stderr, String))
+    end
+end
+
+#-------------------------------------------------------------------------------
+# Early log level filtering
+
+@testset "Early log filtering" begin
+    @testset "Log filtering, per task logger" begin
+        @test_logs (Warn, "c") min_level=Warn begin
+            @info "b"
+            @warn "c"
+        end
+    end
+
+    @testset "Log level filtering - global flag" begin
+        # Test utility: Log once at each standard level
+        function log_each_level()
+            @debug "a"
+            @info  "b"
+            @warn  "c"
+            @error "d"
+        end
+
+        disable_logging(BelowMinLevel)
+        @test_logs (Debug, "a") (Info, "b") (Warn, "c") (Error, "d") min_level=Debug  log_each_level()
+
+        disable_logging(Debug)
+        @test_logs (Info, "b") (Warn, "c") (Error, "d") min_level=Debug  log_each_level()
+
+        disable_logging(Info)
+        @test_logs (Warn, "c") (Error, "d") min_level=Debug  log_each_level()
+
+        disable_logging(Warn)
+        @test_logs (Error, "d") min_level=Debug  log_each_level()
+
+        disable_logging(Error)
+        @test_logs log_each_level()
+
+        # Reset to default
+        disable_logging(BelowMinLevel)
+    end
+
+    @testset "Log level filtering - ENV" begin
+        logger = TestLogger()
+        with_logger(logger) do
+            for (e, r) in (("", false),
+                            (",,,,", false),
+                            ("al", false),
+                            ("all", true),
+                            ("a,b,all,c", true),
+                            ("a,b,,c", false),
+                            ("Mainb", false),
+                            ("aMain", false),
+                            ("Main", true),
+                            ("a,b,Main,c", true),
+                            ("Base", true),
+                            ("a,b,Base,c", true),
+                            ("Filesystem", true),
+                            ("a,b,Filesystem,c", true),
+                            ("a,b,Base.Filesystem,c", false),
+                            ("!al", true),
+                            ("all,!al", true),
+                            ("all,!al,!all", false),
+                            ("!all,Main", true),
+                            ("!all,!Main", false),
+                            ("!all,a,b,!Main,c", false),
+                            ("!all,Filesystem", true),
+                            ("!all,Base.Filesystem", false),
+                            ("a,b,all,!all,c", false),
+                            ("!Main", false),
+                            ("a,b,!Main,c", false),
+                            ("!Base", false),
+                            ("all,!Base", false),
+                            ("!all,Base", true),
+                            ("!all,!Base", false),
+                            ("a,b,!Base,c", false),
+                            ("all,a,b,!Base,c", false),
+                            ("!all,a,b,Base,c", true),
+                            ("!all,a,b,!Base,c", false),
+                            ("!Filesystem", false),
+                            ("all,!Filesystem", false),
+                            ("!all,Filesystem", true),
+                            ("!all,!Filesystem", false),
+                            ("a,b,!Filesystem,c", false),
+                            ("all,a,b,!Filesystem,c", false),
+                            ("!all,a,b,Filesystem,c", true),
+                            ("!all,a,b,!Filesystem,c", false),
+                            ("a,b,!Base.Filesystem,c", true),
+                            ("all,a,b,!Base.Filesystem,c", true),
+                            ("!all,a,b,Base.Filesystem,c", false),
+                            ("!all,a,b,!Base.Filesystem,c", false),
+                           )
+                ENV["JULIA_DEBUG"] = e
+                @test CoreLogging.env_override_minlevel(:Main, Base.Filesystem) === r
+                @test CoreLogging.current_logger_for_env(BelowMinLevel, :Main, Base.Filesystem) === (r ? logger : nothing)
+                @test CoreLogging.current_logger_for_env(Info, :Main, Base.Filesystem) === logger
+            end
+        end
+    end
+    ENV["JULIA_DEBUG"] = ""
+end
+
+#-------------------------------------------------------------------------------
+
+@eval module LogModuleTest
+    function a()
+        @info  "a"
+    end
+
+    module Submodule
+        function b()
+            @info  "b"
+        end
+    end
+end
+
+@testset "Capture of module information" begin
+    @test_logs(
+        (Info, "a", LogModuleTest),
+        (Info, "b", LogModuleTest.Submodule),
+        begin
+            LogModuleTest.a()
+            LogModuleTest.Submodule.b()
+        end
+    )
+end
+
+
+#-------------------------------------------------------------------------------
+@testset "Logger installation and access" begin
+    @testset "Global logger" begin
+        logger1 = global_logger()
+        logger2 = TestLogger()
+        # global_logger() returns the previously installed logger
+        @test logger1 === global_logger(logger2)
+        # current logger looks up global logger by default.
+        @test current_logger() === logger2
+        global_logger(logger1) # Restore global logger
+    end
+end
+
+#-------------------------------------------------------------------------------
+
+# Custom log levels
+
+@eval module LogLevelTest
+    using Base.CoreLogging
+
+    struct MyLevel
+        level::Int
+    end
+
+    Base.convert(::Type{LogLevel}, l::MyLevel) = LogLevel(l.level)
+
+    const critical = MyLevel(10000)
+    const debug_verbose = MyLevel(-10000)
+end
+
+@testset "Custom log levels" begin
+    @test_logs (LogLevelTest.critical, "blah") @logmsg LogLevelTest.critical "blah"
+    logs,_ = collect_test_logs(min_level=Debug) do
+        @logmsg LogLevelTest.debug_verbose "blah"
+    end
+    @test length(logs) == 0
+end
+
+
+#-------------------------------------------------------------------------------
+
+@testset "SimpleLogger" begin
+    # Log level limiting
+    @test min_enabled_level(SimpleLogger(devnull, Debug)) == Debug
+    @test min_enabled_level(SimpleLogger(devnull, Error)) == Error
+
+    # Log limiting
+    logger = SimpleLogger(devnull)
+    @test shouldlog(logger, Info, Base, :group, :asdf) === true
+    handle_message(logger, Info, "msg", Base, :group, :asdf, "somefile", 1, maxlog=2)
+    @test shouldlog(logger, Info, Base, :group, :asdf) === true
+    handle_message(logger, Info, "msg", Base, :group, :asdf, "somefile", 1, maxlog=2)
+    @test shouldlog(logger, Info, Base, :group, :asdf) === false
+    @test catch_exceptions(logger) === false
+
+    # Log formatting
+    function genmsg(level, message, _module, filepath, line; kws...)
+        io = IOBuffer()
+        logger = SimpleLogger(io, Debug)
+        handle_message(logger, level, message, _module, :group, :id,
+                       filepath, line; kws...)
+        String(take!(io))
+    end
+
+    function genmsg_err(level, message, _module, filepath, line; kws...)
+        fname = tempname()
+        f = open(fname, "w")
+        logger = SimpleLogger()
+        redirect_stderr(f) do
+            handle_message(logger, level, message, _module, :group, :id,
+                           filepath, line; kws...)
+        end
+        close(f)
+        buf = read(fname)
+        rm(fname)
+        String(buf)
+    end
+
+    # Simple
+    @test genmsg_err(Info, "msg", Main, "some/path.jl", 101) ==
+    """
+    ┌ Info: msg
+    └ @ Main some/path.jl:101
+    """
+
+    # Multiline message
+    @test genmsg_err(Warn, "line1\nline2", Main, "some/path.jl", 101) ==
+    """
+    ┌ Warning: line1
+    │ line2
+    └ @ Main some/path.jl:101
+    """
+
+    # Keywords
+    @test genmsg(Error, "msg", Base, "other.jl", 101, a=1, b="asdf") ==
+    """
+    ┌ Error: msg
+    │   a = 1
+    │   b = asdf
+    └ @ Base other.jl:101
+    """
+
+    # nothing values
+    @test genmsg(Warn, "msg", nothing, nothing, nothing) ==
+    """
+    ┌ Warning: msg
+    └ @ nothing nothing:nothing
+    """
+end
+
+# Issue #26273
+let m = Module(:Bare26273i, false)
+    Core.eval(m, :(import Base: @error))
+    @test_logs (:error, "Hello") Core.eval(m, quote
+        @error "Hello"
+    end)
+end
+
+@testset "#26335: _module and _file kwargs" begin
+    ignored = Test.Ignored()
+    @test_logs (:warn, "a", ignored, ignored, ignored, "foo.jl") (@warn "a" _file="foo.jl")
+    @test_logs (:warn, "a", Base) (@warn "a" _module=Base)
+end
+
+# Issue #28786
+@testset "ID generation" begin
+    logs,_ = collect_test_logs() do
+        for i in 1:2
+            @info "test"
+            @info "test"
+        end
+    end
+    @test length(logs) == 4
+    @test logs[1].id == logs[3].id
+    @test logs[2].id == logs[4].id
+    @test logs[1].id != logs[2].id
+end
+
+# Issue #34485
+@testset "`_group` must be a `Symbol`" begin
+    (record,), _ = collect_test_logs() do
+        @info "test"
+    end
+    @test record.group == :corelogging  # name of this file
+end
+
+@testset "complicated kwargs logging macro" begin
+    @test_logs (:warn, "foo")  @warn "foo" argvals=:((DoNotCare{$(Expr(:escape, :Any))}(),))
+end
+
+@testset "stdlib path" begin
+    logger = TestLogger()
+    with_logger(logger) do
+        @info "foo" _file=joinpath(Sys.BUILD_STDLIB_PATH, "InteractiveUtils", "src", "InteractiveUtils.jl")
+    end
+    logs = logger.logs
+    @test length(logs) == 1
+    record = logs[1]
+    @test isfile(record.file)
+end
+
+end
diff --git a/test/deprecation_exec.jl b/test/deprecation_exec.jl
index 8d86626ead0dd8..194632279397c0 100644
--- a/test/deprecation_exec.jl
+++ b/test/deprecation_exec.jl
@@ -43,7 +43,12 @@ struct T21972
     end
 end
 
-@testset "@deprecate" begin
+# Create a consistent call frame for nowarn tests
+@noinline call(f, args...) = @noinline f(args...)
+
+# Given this is a sub-processed test file, not using @testsets avoids
+# leaking the report print into the Base test runner report
+begin # @deprecate
     using .DeprecationTests
     using .Foo1234
     @test foo1234(3) == 4
@@ -55,26 +60,35 @@ end
     @test_warn "importing deprecated binding" eval(ex)
     @test @test_nowarn(DeprecationTests.bar(4)) == 7
 
-    # enable when issue #22043 is fixed
-    # @test @test_warn "f1 is deprecated, use f instead." f1()
-    # @test @test_nowarn f1()
+    @test @test_warn "`f1` is deprecated, use `f` instead." f1()
+
+    @test_throws UndefVarError f2() # not exported
+    @test @test_warn "`f2` is deprecated, use `f` instead." DeprecationTests.f2()
+
+    @test @test_warn "`f3()` is deprecated, use `f()` instead." f3()
 
-    # @test_throws UndefVarError f2() # not exported
-    # @test @test_warn "f2 is deprecated, use f instead." DeprecationTests.f2()
-    # @test @test_nowarn DeprecationTests.f2()
+    @test_throws UndefVarError f4() # not exported
+    @test @test_warn "`f4()` is deprecated, use `f()` instead." DeprecationTests.f4()
 
-    # @test @test_warn "f3() is deprecated, use f() instead." f3()
-    # @test @test_nowarn f3()
+    @test @test_warn "`f5(x::T) where T` is deprecated, use `f()` instead." f5(1)
 
-    # @test_throws UndefVarError f4() # not exported
-    # @test @test_warn "f4() is deprecated, use f() instead." DeprecationTests.f4()
-    # @test @test_nowarn DeprecationTests.f4()
+    @test @test_warn "`A{T}(x::S) where {T, S}` is deprecated, use `f()` instead." A{Int}(1.)
 
-    # @test @test_warn "f5(x::T) where T is deprecated, use f() instead." f5(1)
-    # @test @test_nowarn f5(1)
+    redirect_stderr(devnull) do
+        @test call(f1)
+        @test call(DeprecationTests.f2)
+        @test call(f3)
+        @test call(DeprecationTests.f4)
+        @test call(f5, 1)
+        @test call(A{Int}, 1.)
+    end
 
-    # @test @test_warn "A{T}(x::S) where {T, S} is deprecated, use f() instead." A{Int}(1.)
-    # @test @test_nowarn A{Int}(1.)
+    @test @test_nowarn call(f1)
+    @test @test_nowarn call(DeprecationTests.f2)
+    @test @test_nowarn call(f3)
+    @test @test_nowarn call(DeprecationTests.f4)
+    @test @test_nowarn call(f5, 1)
+    @test @test_nowarn call(A{Int}, 1.)
 
     # issue #21972
     @noinline function f21972()
@@ -87,7 +101,7 @@ f24658() = depwarn24658()
 
 depwarn24658() = Base.firstcaller(backtrace(), :_func_not_found_)
 
-@testset "firstcaller" begin
+begin # firstcaller
     # issue #24658
     @test eval(:(if true; f24658(); end)) == (Ptr{Cvoid}(0),StackTraces.UNKNOWN)
 end
@@ -113,10 +127,19 @@ global_logger(prev_logger)
 #-------------------------------------------------------------------------------
 # BEGIN 0.7 deprecations
 
-@testset "parser syntax deprecations" begin
+begin # parser syntax deprecations
     # #15524
     # @test (@test_deprecated Meta.parse("for a=b f() end")) == :(for a=b; f() end)
     @test_broken length(Test.collect_test_logs(()->Meta.parse("for a=b f() end"))[1]) > 0
 end
 
 # END 0.7 deprecations
+
+begin # tuple indexed by float deprecation
+    @test_deprecated getindex((1,), 1.0) === 1
+    @test_deprecated getindex((1,2), 2.0) === 2
+    @test Base.JLOptions().depwarn == 1
+    @test_throws Exception @test_warn r"`getindex(t::Tuple, i::Real)` is deprecated" getindex((), 1.0)
+    @test_throws Exception @test_warn r"`getindex(t::Tuple, i::Real)` is deprecated" getindex((1,2), 0.0)
+    @test_throws Exception @test_warn r"`getindex(t::Tuple, i::Real)` is deprecated" getindex((1,2), -1.0)
+end
diff --git a/test/dict.jl b/test/dict.jl
index 57e3e00cc81bfc..3cf5e92ea42512 100644
--- a/test/dict.jl
+++ b/test/dict.jl
@@ -159,6 +159,14 @@ end
     d = Dict(i==1 ? (1=>2) : (2.0=>3.0) for i=1:2)
     @test isa(d, Dict{Real,Real})
     @test d == Dict{Real,Real}(2.0=>3.0, 1=>2)
+
+    # issue #39117
+    @test Dict(t[1]=>t[2] for t in zip((1,"2"), (2,"2"))) == Dict{Any,Any}(1=>2, "2"=>"2")
+end
+
+@testset "empty tuple ctor" begin
+    h = Dict(())
+    @test length(h) == 0
 end
 
 @testset "type of Dict constructed from varargs of Pairs" begin
@@ -578,6 +586,13 @@ end
     @test length(d.ht) >= 10^4
     @test d === Base.rehash!(d, 123452) # number needs to be even
 
+    # filter!
+    d = IdDict(1=>1, 2=>3, 3=>2)
+    filter!(x->isodd(x[2]), d)
+    @test d[1] == 1
+    @test d[2] == 3
+    @test !haskey(d, 3)
+
     # not an iterator of tuples or pairs
     @test_throws ArgumentError IdDict([1, 2, 3, 4])
     # test rethrow of error in ctor
@@ -674,6 +689,7 @@ import Base.ImmutableDict
     d4 = ImmutableDict(d3, k2 => v1)
     dnan = ImmutableDict{String, Float64}(k2, NaN)
     dnum = ImmutableDict(dnan, k2 => 1)
+    f(x) = x^2
 
     @test isempty(collect(d))
     @test !isempty(collect(d1))
@@ -719,6 +735,18 @@ import Base.ImmutableDict
     @test get(d4, "key1", :default) === v2
     @test get(d4, "foo", :default) === :default
     @test get(d, k1, :default) === :default
+    @test get(d1, "key1") do
+        f(2)
+    end === v1
+    @test get(d4, "key1") do
+        f(4)
+    end === v2
+    @test get(d4, "foo") do
+        f(6)
+    end === 36
+    @test get(d, k1) do
+        f(8)
+    end === 64
     @test d1["key1"] === v1
     @test d4["key1"] === v2
     @test empty(d3) === d
@@ -893,15 +921,40 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
 
     # issue #26939
     d26939 = WeakKeyDict()
-    d26939[big"1.0" + 1.1] = 1
-    GC.gc() # make sure this doesn't segfault
+    (@noinline d -> d[big"1.0" + 1.1] = 1)(d26939)
+    GC.gc() # primarily to make sure this doesn't segfault
+    @test count(d26939) == 0
+    @test length(d26939.ht) == 1
+    @test length(d26939) == 0
+    @test isempty(d26939)
+    empty!(d26939)
+    for i in 1:8
+        (@noinline (d, i) -> d[big(i + 12345)] = 1)(d26939, i)
+    end
+    lock(GC.gc, d26939)
+    @test length(d26939.ht) == 8
+    @test count(d26939) == 0
+    @test !haskey(d26939, nothing)
+    @test_throws KeyError(nothing) d26939[nothing]
+    @test_throws KeyError(nothing) get(d26939, nothing, 1)
+    @test_throws KeyError(nothing) get(() -> 1, d26939, nothing)
+    @test_throws KeyError(nothing) pop!(d26939, nothing)
+    @test getkey(d26939, nothing, 321) === 321
+    @test pop!(d26939, nothing, 321) === 321
+    @test delete!(d26939, nothing) === d26939
+    @test length(d26939.ht) == 8
+    @test_throws ArgumentError d26939[nothing] = 1
+    @test_throws ArgumentError get!(d26939, nothing, 1)
+    @test_throws ArgumentError get!(() -> 1, d26939, nothing)
+    @test isempty(d26939)
+    @test length(d26939.ht) == 0
+    @test length(d26939) == 0
 
     # WeakKeyDict does not convert keys on setting
     @test_throws ArgumentError WeakKeyDict{Vector{Int},Any}([5.0]=>1)
     wkd = WeakKeyDict(A=>2)
     @test_throws ArgumentError get!(wkd, [2.0], 2)
-    @test_throws ArgumentError get!(wkd, [1.0], 2) # get! fails even if the key is only
-                                                   # used for getting and not setting
+    @test get!(wkd, [1.0], 2) === 2
 
     # WeakKeyDict does convert on getting
     wkd = WeakKeyDict(A=>2)
@@ -913,16 +966,18 @@ Dict(1 => rand(2,3), 'c' => "asdf") # just make sure this does not trigger a dep
 
     # map! on values of WKD
     wkd = WeakKeyDict(A=>2, B=>3)
-    map!(v->v-1, values(wkd))
+    map!(v -> v-1, values(wkd))
     @test wkd == WeakKeyDict(A=>1, B=>2)
 
     # get!
     wkd = WeakKeyDict(A=>2)
-    get!(wkd, B, 3)
+    @test get!(wkd, B, 3) == 3
     @test wkd == WeakKeyDict(A=>2, B=>3)
-    get!(()->4, wkd, C)
+    @test get!(()->4, wkd, C) == 4
     @test wkd == WeakKeyDict(A=>2, B=>3, C=>4)
-    @test_throws ArgumentError get!(()->5, wkd, [1.0])
+    @test get!(()->5, wkd, [1.0]) == 2
+
+    GC.@preserve A B C D nothing
 end
 
 @testset "issue #19995, hash of dicts" begin
@@ -1020,6 +1075,26 @@ end
     check_merge([Dict(3=>4), Dict(:a=>5)], Dict(:a => 5, 3 => 4))
 end
 
+@testset "AbstractDict mergewith!" begin
+# we use IdDict to test the mergewith! implementation for AbstractDict
+    d1 = IdDict(1 => 1, 2 => 2)
+    d2 = IdDict(2 => 3, 3 => 4)
+    d3 = IdDict{Int, Float64}(1 => 5, 3 => 6)
+    d = copy(d1)
+    @inferred mergewith!(-, d, d2)
+    @test d == IdDict(1 => 1, 2 => -1, 3 => 4)
+    d = copy(d1)
+    @inferred mergewith!(-, d, d3)
+    @test d == IdDict(1 => -4, 2 => 2, 3 => 6)
+    d = copy(d1)
+    @inferred mergewith!(+, d, d2, d3)
+    @test d == IdDict(1 => 6, 2 => 5, 3 => 10)
+    @inferred mergewith(+, d1, d2, d3)
+    d = mergewith(+, d1, d2, d3)
+    @test d isa Dict{Int, Float64}
+    @test d == Dict(1 => 6, 2 => 5, 3 => 10)
+end
+
 @testset "misc error/io" begin
     d = Dict('a'=>1, 'b'=>1, 'c'=> 3)
     @test_throws ErrorException 'a' in d
@@ -1109,6 +1184,8 @@ end
             @test s === copy!(s, Base.ImmutableDict(a[])) == Dict(a[])
         end
     end
+    s2 = copy(s)
+    @test copy!(s, s) == s2
 end
 
 @testset "map!(f, values(dict))" begin
@@ -1127,6 +1204,7 @@ end
         map!(v->v-1, values(testdict))
         @test testdict[:a] == 0
         @test testdict[:b] == 1
+        @test sizehint!(testdict, 1) === testdict
     end
     @testset "Dict" begin
         testdict = Dict(:a=>1, :b=>2)
@@ -1135,3 +1213,49 @@ end
         @test testdict[:b] == 1
     end
 end
+
+# WeakKeyDict soundness (#38727)
+mutable struct ComparesWithGC38727
+    i::Int
+end
+const armed = Ref{Bool}(true)
+@noinline fwdab38727(a, b) = invoke(Base.isequal, Tuple{Any, WeakRef}, a, b)
+function Base.isequal(a::ComparesWithGC38727, b::WeakRef)
+    # This GC.gc() here simulates a GC during compilation in the original issue
+    armed[] && GC.gc()
+    armed[] = false
+    fwdab38727(a, b)
+end
+Base.isequal(a::WeakRef, b::ComparesWithGC38727) = isequal(b, a)
+Base.:(==)(a::ComparesWithGC38727, b::ComparesWithGC38727) = a.i == b.i
+Base.hash(a::ComparesWithGC38727, u::UInt) = Base.hash(a.i, u)
+function make_cwgc38727(wkd, i)
+    f = ComparesWithGC38727(i)
+    function fin(f)
+        f.i = -1
+    end
+    finalizer(fin, f)
+    f
+end
+@noinline mk38727(wkd) = wkd[make_cwgc38727(wkd, 1)] = nothing
+function bar()
+    wkd = WeakKeyDict{Any, Nothing}()
+    mk38727(wkd)
+    armed[] = true
+    z = getkey(wkd, ComparesWithGC38727(1), missing)
+end
+# Run this twice, in case compilation the first time around
+# masks something.
+let c = bar()
+    @test c === missing || c == ComparesWithGC38727(1)
+end
+let c = bar()
+    @test c === missing || c == ComparesWithGC38727(1)
+end
+
+@testset "shrinking" begin
+    d = Dict(i => i for i = 1:1000)
+    filter!(x -> x.first < 10, d)
+    sizehint!(d, 10)
+    @test length(d.slots) < 100
+end
diff --git a/test/docs.jl b/test/docs.jl
index c1c79fd9d53f4e..762a481ee4801d 100644
--- a/test/docs.jl
+++ b/test/docs.jl
@@ -17,7 +17,22 @@ function docstrings_equal(d1, d2)
     io2 = IOBuffer()
     show(io1, MIME"text/markdown"(), d1)
     show(io2, MIME"text/markdown"(), d2)
-    String(take!(io1)) == String(take!(io2))
+    s1 = String(take!(io1))
+    s2 = String(take!(io2))
+    #if s1 != s2 # for debugging
+    #    e1 = eachline(IOBuffer(s1))
+    #    e2 = eachline(IOBuffer(s2))
+    #    for (l1, l2) in zip(e1, e2)
+    #        l1 == l2 || println(l1, "\n", l2, "\n")
+    #    end
+    #    for l1 in e1
+    #        println(l1, "\n[missing]\n")
+    #    end
+    #    for l2 in e2
+    #        println("[missing]\n", l2, "\n")
+    #    end
+    #end
+    return s1 == s2
 end
 docstrings_equal(d1::DocStr, d2) = docstrings_equal(parsedoc(d1), d2)
 
@@ -73,6 +88,11 @@ end
 @test docstrings_equal(@doc(ModuleMacroDoc), doc"I am a module")
 @test docstrings_equal(@doc(ModuleMacroDoc.@m), doc"I am a macro")
 
+# issue #38819
+
+module NoDocStrings end
+@test meta(NoDocStrings) === getfield(NoDocStrings, Base.Docs.META)
+
 # General tests for docstrings.
 
 const LINE_NUMBER = @__LINE__() + 1
@@ -773,14 +793,7 @@ end
 
 # Issue #13905.
 let err = try; @macroexpand(@doc "" f() = @x); false; catch ex; ex; end
-    __source__ = LineNumberNode(@__LINE__() -  1, Symbol(@__FILE__))
-    err::LoadError
-    @test err.file === string(__source__.file)
-    @test err.line === __source__.line
-    err = err.error::LoadError
-    @test err.file === string(__source__.file)
-    @test err.line === __source__.line
-    err = err.error::UndefVarError
+    err::UndefVarError
     @test err.var == Symbol("@x")
  end
 
@@ -789,6 +802,8 @@ let err = try; @macroexpand(@doc "" f() = @x); false; catch ex; ex; end
 
 module Undocumented
 
+export A, B, C, at0, pt2
+
 abstract type A end
 abstract type B <: A end
 
@@ -800,13 +815,54 @@ struct D <: B
     three::Float64
 end
 
+abstract type at0{T<:Number,N} end
+abstract type at1{T>:Integer,N} <:at0{T,N} end
+
+const at_ = at0{Int64}
+
+primitive type pt2{T<:Number,N,A>:Integer} <:at0{T,N} 32 end
+
+struct st3{T<:Integer,N} <: at0{T,N}
+    a::NTuple{N,T}
+    b::Array{Int64,N}
+    c::Int64
+end
+
+struct st4{T,N} <: at0{T,N}
+    a::T
+    b::NTuple{N,T}
+end
+
+struct st5{T>:Int64,N} <:at1{T,N}
+    c::st3{T,N}
+end
+
+mutable struct mt6{T<:Integer,N} <:at1{T,N}
+    d::st5{T,N}
+end
+
+const ut7 = Union{st5, mt6}
+
+const ut8 = Union{at1, pt2, st3, st4}
+
+const ut9{T} = Union{at1{T}, pt2{T}, st3{T}, st4{T}}
+
 f = () -> nothing
 
 undocumented() = 1
 undocumented(x) = 2
 undocumented(x,y) = 3
 
-end
+end # module
+
+doc_str = Markdown.parse("""
+No docstring or readme file found for module `$(curmod_prefix)Undocumented`.
+
+# Exported names
+
+`A`, `B`, `C`, `at0`, `pt2`
+""")
+@test docstrings_equal(@doc(Undocumented), doc"$doc_str")
 
 doc_str = Markdown.parse("""
 No documentation found.
@@ -820,7 +876,7 @@ No documentation found.
 
 # Summary
 ```
-abstract type $(curmod_prefix)Undocumented.A <: Any
+abstract type $(curmod_prefix)Undocumented.A
 ```
 
 # Subtypes
@@ -836,7 +892,7 @@ No documentation found.
 
 # Summary
 ```
-abstract type $(curmod_prefix)Undocumented.B <: $(curmod_prefix)Undocumented.A
+abstract type $(curmod_prefix)Undocumented.B
 ```
 
 # Subtypes
@@ -856,7 +912,7 @@ No documentation found.
 
 # Summary
 ```
-mutable struct $(curmod_prefix)Undocumented.C <: $(curmod_prefix)Undocumented.A
+mutable struct $(curmod_prefix)Undocumented.C
 ```
 
 # Supertype Hierarchy
@@ -871,7 +927,7 @@ No documentation found.
 
 # Summary
 ```
-struct $(curmod_prefix)Undocumented.D <: $(curmod_prefix)Undocumented.B
+struct $(curmod_prefix)Undocumented.D
 ```
 
 # Fields
@@ -888,6 +944,217 @@ $(curmod_prefix)Undocumented.D <: $(curmod_prefix)Undocumented.B <: $(curmod_pre
 """)
 @test docstrings_equal(@doc(Undocumented.D), doc"$doc_str")
 
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+abstract type $(curmod_prefix)Undocumented.at0{T<:Number, N}
+```
+
+# Subtypes
+
+```
+$(curmod_prefix)Undocumented.at1{Integer<:T<:Number, N}
+$(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer}
+$(curmod_prefix)Undocumented.st3{T<:Integer, N}
+$(curmod_prefix)Undocumented.st4{T<:Number, N}
+```
+""")
+@test docstrings_equal(@doc(Undocumented.at0), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+abstract type $(curmod_prefix)Undocumented.at1{T>:Integer, N}
+```
+
+# Subtypes
+
+```
+$(curmod_prefix)Undocumented.mt6{Integer, N}
+```
+
+# Supertype Hierarchy
+```
+$(curmod_prefix)Undocumented.at1{T>:Integer, N} <: $(curmod_prefix)Undocumented.at0{T>:Integer, N} <: Any
+```
+""")
+@test docstrings_equal(@doc(Undocumented.at1), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+abstract type $(curmod_prefix)Undocumented.at0{Int64, N}
+```
+
+# Subtypes
+
+```
+$(curmod_prefix)Undocumented.pt2{Int64, N, A>:Integer}
+$(curmod_prefix)Undocumented.st3{Int64, N}
+$(curmod_prefix)Undocumented.st4{Int64, N}
+```
+""")
+@test docstrings_equal(@doc(Undocumented.at_), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+primitive type $(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer}
+```
+
+# Supertype Hierarchy
+
+```
+$(curmod_prefix)Undocumented.pt2{T<:Number, N, A>:Integer} <: $(curmod_prefix)Undocumented.at0{T<:Number, N} <: Any
+```
+""")
+@test docstrings_equal(@doc(Undocumented.pt2), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+struct $(curmod_prefix)Undocumented.st3{T<:Integer, N}
+```
+
+# Fields
+```
+a :: Tuple{Vararg{T<:Integer, N}}
+b :: Array{Int64, N}
+c :: Int64
+```
+
+# Supertype Hierarchy
+```
+$(curmod_prefix)Undocumented.st3{T<:Integer, N} <: $(curmod_prefix)Undocumented.at0{T<:Integer, N} <: Any
+```
+""")
+@test docstrings_equal(@doc(Undocumented.st3), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+struct $(curmod_prefix)Undocumented.st4{T, N}
+```
+
+# Fields
+```
+a :: T
+b :: Tuple{Vararg{T, N}}
+```
+
+# Supertype Hierarchy
+```
+$(curmod_prefix)Undocumented.st4{T, N} <: $(curmod_prefix)Undocumented.at0{T, N} <: Any
+```
+""")
+@test docstrings_equal(@doc(Undocumented.st4), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+struct $(curmod_prefix)Undocumented.st5{T>:Int64, N}
+```
+
+# Fields
+```
+c :: $(curmod_prefix)Undocumented.st3{T>:Int64, N}
+```
+
+# Supertype Hierarchy
+```
+$(curmod_prefix)Undocumented.st5{T>:Int64, N} <: $(curmod_prefix)Undocumented.at1{T>:Int64, N} <: $(curmod_prefix)Undocumented.at0{T>:Int64, N} <: Any
+```
+""")
+@test docstrings_equal(@doc(Undocumented.st5), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+```
+mutable struct $(curmod_prefix)Undocumented.mt6{T<:Integer, N}
+```
+
+# Fields
+```
+d :: $(curmod_prefix)Undocumented.st5{T<:Integer, N}
+```
+
+# Supertype Hierarchy
+```
+$(curmod_prefix)Undocumented.mt6{T<:Integer, N} <: $(curmod_prefix)Undocumented.at1{T<:Integer, N} <: $(curmod_prefix)Undocumented.at0{T<:Integer, N} <: Any
+```
+""")
+@test docstrings_equal(@doc(Undocumented.mt6), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+`$(curmod_prefix)Undocumented.ut7` is of type `Union`.
+
+# Union Composed of Types
+
+ - `$(curmod_prefix)Undocumented.mt6`
+ - `$(curmod_prefix)Undocumented.st5`
+""")
+@test docstrings_equal(@doc(Undocumented.ut7), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+`$(curmod_prefix)Undocumented.ut8` is of type `Union`.
+
+# Union Composed of Types
+
+ - `$(curmod_prefix)Undocumented.at1`
+ - `$(curmod_prefix)Undocumented.pt2`
+ - `$(curmod_prefix)Undocumented.st3`
+ - `$(curmod_prefix)Undocumented.st4`
+""")
+@test docstrings_equal(@doc(Undocumented.ut8), doc"$doc_str")
+
+doc_str = Markdown.parse("""
+No documentation found.
+
+# Summary
+
+`$(curmod_prefix)Undocumented.ut9` is of type `UnionAll`.
+
+# Union Composed of Types
+
+ - `$(curmod_prefix)Undocumented.at1{T} where T`
+ - `$(curmod_prefix)Undocumented.pt2{T} where T`
+ - `$(curmod_prefix)Undocumented.st3{T} where T`
+ - `$(curmod_prefix)Undocumented.st4`
+""")
+@test docstrings_equal(@doc(Undocumented.ut9), doc"$doc_str")
+
 let d = @doc(Undocumented.f)
     io = IOBuffer()
     show(io, MIME"text/markdown"(), d)
@@ -1004,6 +1271,8 @@ end
 
 # issue #36378 (\u1e8b and x\u307 are the fully composed and decomposed forms of ẋ, respectively)
 @test sprint(repl_latex, "\u1e8b") == "\"x\u307\" can be typed by x\\dot<tab>\n\n"
+# issue 39814
+@test sprint(repl_latex, "\u2209") == "\"\u2209\" can be typed by \\notin<tab>\n\n"
 
 # issue #15684
 begin
@@ -1231,3 +1500,16 @@ Test.collect_test_logs() do                          # suppress printing of any
     eval(quote "Second docstring" Module29432 end)   # requires toplevel
 end
 @test docstrings_equal(@doc(Module29432), doc"Second docstring")
+
+# Issue #13109
+eval(Expr(:block, Expr(:macrocall, GlobalRef(Core, Symbol("@doc")), nothing, "...", Expr(:module, false, :MBareModuleEmpty, Expr(:block)))))
+@test docstrings_equal(@doc(MBareModuleEmpty), doc"...")
+
+# issue #41727
+"struct docstring"
+struct S41727
+    "x is $(2*2)"
+    x
+end
+@test S41727(1) isa S41727
+@test string(@repl S41727.x) == "x is 4\n"
diff --git a/test/download.jl b/test/download.jl
index 11d33a91fa1fc4..a37afae231a2a5 100644
--- a/test/download.jl
+++ b/test/download.jl
@@ -1,50 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# Test that `Base.download_url()` is altered by `Base.DOWNLOAD_HOOKS`.
-let urls = ["http://httpbin.julialang.org/ip", "https://httpbin.julialang.org/ip"]
-    for url in urls
-        @test Base.download_url(url) == url
-    end
-    push!(Base.DOWNLOAD_HOOKS, url->replace(url, r"^http://" => "https://"))
-    for url in urls
-        @test Base.download_url(url) == urls[end]
-    end
-    pop!(Base.DOWNLOAD_HOOKS)
-    for url in urls
-        @test Base.download_url(url) == url
-    end
-end
-
-mktempdir() do temp_dir
-    # Download a file
-    file = joinpath(temp_dir, "ip")
-    @test download("https://httpbin.julialang.org/ip", file) == file
-    @test isfile(file)
-    @test !isempty(read(file))
-    ip = read(file, String)
-
-    # Test download rewrite hook
-    push!(Base.DOWNLOAD_HOOKS, url->replace(url, r"/status/404$" => "/ip"))
-    @test download("https://httpbin.julialang.org/status/404", file) == file
-    @test isfile(file)
-    @test !isempty(read(file))
-    @test ip == read(file, String)
-    pop!(Base.DOWNLOAD_HOOKS)
-
-    # Download an empty file
-    empty_file = joinpath(temp_dir, "empty")
-    @test download("https://httpbin.julialang.org/status/200", empty_file) == empty_file
-
-    # Windows and older versions of curl do not create the empty file (https://github.com/curl/curl/issues/183)
-    @test !isfile(empty_file) || isempty(read(empty_file))
-
-    # Make sure that failed downloads do not leave files around
-    missing_file = joinpath(temp_dir, "missing")
-    @test_throws ErrorException download("https://httpbin.julialang.org/status/404", missing_file)
-    @test !isfile(missing_file)
-
-    # Use a TEST-NET (192.0.2.0/24) address which shouldn't be bound
-    invalid_host_file = joinpath(temp_dir, "invalid_host")
-    @test_throws ErrorException download("http://192.0.2.1", invalid_host_file)
-    @test !isfile(invalid_host_file)
+cmd = `$(Base.julia_cmd()) --depwarn=no --startup-file=no download_exec.jl`
+if !success(pipeline(cmd; stdout=stdout, stderr=stderr))
+    error("download test failed, cmd : $cmd")
 end
diff --git a/test/download_exec.jl b/test/download_exec.jl
new file mode 100644
index 00000000000000..777fb6773c463e
--- /dev/null
+++ b/test/download_exec.jl
@@ -0,0 +1,32 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+module TestDownload
+
+using Test
+
+mktempdir() do temp_dir
+    # Download a file
+    file = joinpath(temp_dir, "ip")
+    @test download("https://httpbin.julialang.org/ip", file) == file
+    @test isfile(file)
+    @test !isempty(read(file))
+    ip = read(file, String)
+
+    # Download an empty file
+    empty_file = joinpath(temp_dir, "empty")
+    @test download("https://httpbin.julialang.org/status/200", empty_file) == empty_file
+    @test isfile(empty_file)
+    @test isempty(read(empty_file))
+
+    # Make sure that failed downloads do not leave files around
+    missing_file = joinpath(temp_dir, "missing")
+    @test_throws Exception download("https://httpbin.julialang.org/status/404", missing_file)
+    @test !isfile(missing_file)
+
+    # Use a TEST-NET (192.0.2.0/24) address which shouldn't be bound
+    invalid_host_file = joinpath(temp_dir, "invalid_host")
+    @test_throws Exception download("http://192.0.2.1", invalid_host_file)
+    @test !isfile(invalid_host_file)
+end
+
+end # module
diff --git a/test/embedding/embedding-test.jl b/test/embedding/embedding-test.jl
index f358ff2a74cdf6..797f6dabd9a89f 100644
--- a/test/embedding/embedding-test.jl
+++ b/test/embedding/embedding-test.jl
@@ -20,8 +20,7 @@ end
     close(out.in)
     close(err.in)
     out_task = @async readlines(out)
-    err = read(err, String)
-    @test err == "MethodError: no method matching this_function_has_no_methods()\n"
+    @test readline(err) == "MethodError: no method matching this_function_has_no_methods()"
     @test success(p)
     lines = fetch(out_task)
     @test length(lines) == 10
@@ -29,4 +28,5 @@ end
     @test lines[8] == "called bar"
     @test lines[9] == "calling new bar"
     @test lines[10] == "      From worker 2:\tTaking over the world..."
+    @test readline(err) == "exception caught from C"
 end
diff --git a/test/embedding/embedding.c b/test/embedding/embedding.c
index 463d45da51a78d..d082366c908de4 100644
--- a/test/embedding/embedding.c
+++ b/test/embedding/embedding.c
@@ -4,7 +4,7 @@
 #include <stdio.h>
 #include <math.h>
 
-JULIA_DEFINE_FAST_TLS() // only define this once, in an executable
+JULIA_DEFINE_FAST_TLS // only define this once, in an executable
 
 #ifdef _OS_WINDOWS_
 __declspec(dllexport) __cdecl
@@ -32,6 +32,9 @@ jl_value_t *checked_eval_string(const char* code)
 
 int main()
 {
+    // check that setting options works
+    jl_options.opt_level = 1;
+
     jl_init();
 
     {
@@ -40,6 +43,12 @@ int main()
         checked_eval_string("println(sqrt(2.0))");
     }
 
+    if (jl_options.opt_level != 1) {
+        jl_printf(jl_stderr_stream(), "setting jl_options didn't work\n");
+        jl_atexit_hook(1);
+        exit(1);
+    }
+
     {
         // Accessing the return value
 
@@ -161,7 +170,7 @@ int main()
         // disable the package manager
         "    ENV[\"JULIA_PKGDIR\"] = joinpath(dir, \"disabled\")\n"
         // locate files relative to the "embedding" executable
-        "    stdlib = filter(env -> startswith(Base.find_package(Base, \"Distributed\"), env), Base.load_path())[end]\n"
+        "    stdlib = filter(env -> startswith(Base.find_package(\"Distributed\"), env), Base.load_path())[end]\n"
         "    push!(empty!(LOAD_PATH), dir, stdlib)\n"
         "end"
         );
@@ -175,6 +184,13 @@ int main()
         checked_eval_string("f28825()");
     }
 
+    JL_TRY {
+        jl_error("exception thrown");
+    }
+    JL_CATCH {
+        jl_printf(jl_stderr_stream(), "exception caught from C\n");
+    }
+
     int ret = 0;
     jl_atexit_hook(ret);
     return ret;
diff --git a/test/enums.jl b/test/enums.jl
index d3c585678c572f..5a83e1b4dfa424 100644
--- a/test/enums.jl
+++ b/test/enums.jl
@@ -143,6 +143,10 @@ let io = IOBuffer()
     @test String(take!(io)) == sprint(print, Fruit)
 end
 
+# Test printing of invalid enums
+@test repr("text/plain", reinterpret(Fruit, Int32(11))) == "<invalid #11>::Fruit = 11"
+@test repr("text/plain", reinterpret(Fruit, Int32(-5))) == "<invalid #-5>::Fruit = -5"
+
 @enum LogLevel DEBUG INFO WARN ERROR CRITICAL
 @test DEBUG < CRITICAL
 
@@ -160,6 +164,9 @@ end
 @test repr("text/plain", sevn)  == "$(string(sevn))::UI8 = 0x07"
 @test repr("text/plain", fiftn) == "$(string(fiftn))::UI8 = 0xf0"
 
+@test repr("text/plain", reinterpret(UI8, 0x01)) == "<invalid #1>::UI8 = 0x01"
+@test repr("text/plain", reinterpret(UI8, 0xff)) == "<invalid #255>::UI8 = 0xff"
+
 # test block form
 @enum BritishFood begin
     blackpudding = 1
diff --git a/test/env.jl b/test/env.jl
index 176074201f23e9..644d956af8fd47 100644
--- a/test/env.jl
+++ b/test/env.jl
@@ -27,6 +27,16 @@ end
         @test isempty(ENV) || first(ENV) in c
     end
 end
+
+# issue #43486
+struct Obj43486 end
+(::Obj43486)() = ENV["KEY"] == "VALUE"
+let
+    f = Obj43486()
+    @test !(f isa Function)
+    @test withenv(f, "KEY" => "VALUE")
+end
+
 @testset "non-existent keys" begin
     key = randstring(25)
     @test !haskey(ENV,key)
diff --git a/test/error.jl b/test/error.jl
index bb97a0e66ed0bd..1dae62fb91e585 100644
--- a/test/error.jl
+++ b/test/error.jl
@@ -6,11 +6,11 @@
     @test maximum(ExponentialBackOff(n=10, max_delay=0.06)) == 0.06
     ratio(x) = x[2:end]./x[1:end-1]
     @test all(x->x ≈ 10.0, ratio(collect(ExponentialBackOff(n=10, max_delay=Inf, factor=10, jitter=0.0))))
-    Test.guardseed(12345) do
-        x = ratio(collect(ExponentialBackOff(n=100, max_delay=Inf, factor=1, jitter=0.1)))
-        xm = sum(x) / length(x)
-        @test (xm - 1.0) < 1e-4
-    end
+    Libc.srand(12345)
+    x = ratio(collect(ExponentialBackOff(n=100, max_delay=Inf, factor=1, jitter=0.1)))
+    xm = sum(x) / length(x)
+    @test abs(xm - 1.0) < 0.01
+    Libc.srand()
 end
 @testset "retrying after errors" begin
     function foo_error(c, n)
@@ -81,3 +81,18 @@ end
     # non-Functions
     @test retry(Float64)(1) === 1.0
 end
+
+@testset "SystemError initialization" begin
+    e = SystemError("fail")
+    @test e.extrainfo === nothing
+end
+
+@testset "MethodError for methods without line numbers" begin
+    try
+        eval(Expr(:function, :(f44319()), 0))
+        f44319(1)
+    catch e
+        s = sprint(showerror, e)
+        @test s == "MethodError: no method matching f44319(::Int$(Sys.WORD_SIZE))\nClosest candidates are:\n  f44319() at none:0"
+    end
+end
diff --git a/test/errorshow.jl b/test/errorshow.jl
index 2d370d7f05246f..72a2ebb1e9cbea 100644
--- a/test/errorshow.jl
+++ b/test/errorshow.jl
@@ -48,8 +48,9 @@ include("testenv.jl")
     end
 end
 
-
-cfile = " at $(@__FILE__):"
+file = @__FILE__
+Base.stacktrace_contract_userdir() && (file = Base.contractuser(file))
+cfile = " at $file:"
 c1line = @__LINE__() + 1
 method_c1(x::Float64, s::AbstractString...) = true
 
@@ -124,7 +125,7 @@ PR16155line2 = @__LINE__() + 1
 (::Type{T})(arg::Any) where {T<:PR16155} = "replace call-to-convert method from sysimg"
 
 Base.show_method_candidates(buf, MethodError(PR16155,(1.0, 2.0, Int64(3))))
-@test String(take!(buf)) == "\nClosest candidates are:\n  $(curmod_prefix)PR16155(::Any, ::Any)$cfile$PR16155line\n  (::Type{T})(::Any) where T<:$(curmod_prefix)PR16155$cfile$PR16155line2\n  $(curmod_prefix)PR16155(!Matched::Int64, ::Any)$cfile$PR16155line"
+@test String(take!(buf)) == "\nClosest candidates are:\n  $(curmod_prefix)PR16155(::Any, ::Any)$cfile$PR16155line\n  $(curmod_prefix)PR16155(!Matched::Int64, ::Any)$cfile$PR16155line\n  (::Type{T})(::Any) where T<:$(curmod_prefix)PR16155$cfile$PR16155line2"
 
 Base.show_method_candidates(buf, MethodError(PR16155,(Int64(3), 2.0, Int64(3))))
 @test String(take!(buf)) == "\nClosest candidates are:\n  $(curmod_prefix)PR16155(::Int64, ::Any)$cfile$PR16155line\n  $(curmod_prefix)PR16155(::Any, ::Any)$cfile$PR16155line\n  (::Type{T})(::Any) where T<:$(curmod_prefix)PR16155$cfile$PR16155line2"
@@ -184,6 +185,11 @@ addConstraint_15639(c::Int64; uncset=nothing) = addConstraint_15639(Int32(c), un
 Base.show_method_candidates(buf, MethodError(addConstraint_15639, (Int32(1),)), pairs((uncset = nothing,)))
 @test String(take!(buf)) == "\nClosest candidates are:\n  addConstraint_15639(::Int32)$cfile$(ac15639line + 1) got unsupported keyword argument \"uncset\"\n  addConstraint_15639(!Matched::Int64; uncset)$cfile$(ac15639line + 2)"
 
+# Busted Vararg method definitions
+bad_vararg_decl(x::Int, y::Vararg) = 1   # don't do this, instead use (x::Int, y...)
+Base.show_method_candidates(buf, try bad_vararg_decl("hello", 3) catch e e end)
+@test occursin("bad_vararg_decl(!Matched::$Int, ::Any...)", String(take!(buf)))
+
 macro except_str(expr, err_type)
     return quote
         let err = nothing
@@ -271,7 +277,7 @@ let
     @test occursin("column vector", err_str)
 end
 
-struct TypeWithIntParam{T <: Integer} end
+struct TypeWithIntParam{T<:Integer, Vector{T}<:A<:AbstractArray{T}} end
 struct Bounded  # not an AbstractArray
     bound::Int
 end
@@ -297,6 +303,8 @@ let undefvar
     @test err_str == "BoundsError: attempt to access 3-element Vector{$Int} at index [-2, 1]"
     err_str = @except_str [5, 4, 3][1:5] BoundsError
     @test err_str == "BoundsError: attempt to access 3-element Vector{$Int} at index [1:5]"
+    err_str = @except_str [5, 4, 3][trues(6,7)] BoundsError
+    @test err_str == "BoundsError: attempt to access 3-element Vector{$Int} at index [6×7 BitMatrix]"
 
     err_str = @except_str Bounded(2)[3] BoundsError
     @test err_str == "BoundsError: attempt to access 2-size Bounded at index [3]"
@@ -315,8 +323,14 @@ let undefvar
     @test err_str == "TypeError: in Type, in parameter, expected Type, got a value of type String"
     err_str = @except_str TypeWithIntParam{Any} TypeError
     @test err_str == "TypeError: in TypeWithIntParam, in T, expected T<:Integer, got Type{Any}"
+    err_str = @except_str TypeWithIntParam{Int64,Vector{Float64}} TypeError
+    @test err_str == "TypeError: in TypeWithIntParam, in A, expected Vector{Int64}<:A<:(AbstractArray{Int64}), got Type{Vector{Float64}}"
+    err_str = @except_str TypeWithIntParam{Int64}{Vector{Float64}} TypeError
+    @test err_str == "TypeError: in TypeWithIntParam, in A, expected Vector{Int64}<:A<:(AbstractArray{Int64}), got Type{Vector{Float64}}"
     err_str = @except_str Type{Vararg} TypeError
     @test err_str == "TypeError: in Type, in parameter, expected Type, got Vararg"
+    err_str = @except_str Ref{Vararg} TypeError
+    @test err_str == "TypeError: in Type, in parameter, expected Type, got Vararg"
 
     err_str = @except_str mod(1,0) DivideError
     @test err_str == "DivideError: integer division error"
@@ -426,7 +440,7 @@ let err_str,
     @test startswith(sprint(show, which(Complex{Int}, Tuple{Int})),
                      "Complex{T}(")
     @test startswith(sprint(show, which(getfield(Base, Symbol("@doc")), Tuple{LineNumberNode, Module, Vararg{Any}})),
-                     "@doc(__source__::LineNumberNode, __module__::Module, x...) in Core at boot.jl:")
+                     "var\"@doc\"(__source__::LineNumberNode, __module__::Module, x...) in Core at boot.jl:")
     @test startswith(sprint(show, which(FunctionLike(), Tuple{})),
                      "(::$(curmod_prefix)FunctionLike)() in $curmod_str at $sp:$(method_defs_lineno + 7)")
     @test startswith(sprint(show, which(StructWithUnionAllMethodDefs{<:Integer}, (Any,))),
@@ -473,12 +487,6 @@ let
     @test (@macroexpand @fastmath +      ) == :(Base.FastMath.add_fast)
     @test (@macroexpand @fastmath min(1) ) == :(Base.FastMath.min_fast(1))
     let err = try; @macroexpand @doc "" f() = @x; catch ex; ex; end
-        file, line = @__FILE__, @__LINE__() - 1
-        err = err::LoadError
-        @test err.file == file && err.line == line
-        err = err.error::LoadError
-        @test err.file == file && err.line == line
-        err = err.error::UndefVarError
         @test err == UndefVarError(Symbol("@x"))
     end
     @test (@macroexpand @seven_dollar $bar) == 7
@@ -632,6 +640,18 @@ catch ex
 end
 pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't copy this
 
+struct ANumber <: Number end
+let err_str
+    err_str = @except_str ANumber()(3 + 4) MethodError
+    @test occursin("objects of type $(curmod_prefix)ANumber are not callable", err_str)
+    @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
+    # issue 40478
+    err_str = @except_str ANumber()(3 + 4) MethodError
+    @test count(==("Maybe you forgot to use an operator such as *, ^, %, / etc. ?"), split(err_str, '\n')) == 1
+end
+
+# Execute backtrace once before checking formatting, see #38858
+backtrace()
 
 # issue #28442
 @testset "Long stacktrace printing" begin
@@ -645,13 +665,8 @@ pop!(Base.Experimental._hint_handlers[DomainError])  # order is undefined, don't
     @test occursin("g28442", output[3])
     @test lstrip(output[5])[1:3] == "[2]"
     @test occursin("f28442", output[5])
-    # Issue #30233
-    # Note that we can't use @test_broken on FreeBSD here, because the tests actually do
-    # pass with some compilation options, e.g. with assertions enabled
-    if !Sys.isfreebsd()
-        @test occursin("the last 2 lines are repeated 5000 more times", output[7])
-        @test lstrip(output[8])[1:7] == "[10003]"
-    end
+    @test occursin("the last 2 lines are repeated 5000 more times", output[7])
+    @test lstrip(output[8])[1:7] == "[10003]"
 end
 
 @testset "Line number correction" begin
@@ -674,6 +689,7 @@ end
     @test getline(outputc) == getline(output0) + 2
 end
 
+
 # issue #30633
 @test_throws ArgumentError("invalid index: \"foo\" of type String") [1]["foo"]
 @test_throws ArgumentError("invalid index: nothing of type Nothing") [1][nothing]
@@ -719,7 +735,7 @@ end
 
 # Test that implementation detail of include() is hidden from the user by default
 let bt = try
-        include("testhelpers/include_error.jl")
+        @noinline include("testhelpers/include_error.jl")
     catch
         catch_backtrace()
     end
@@ -731,7 +747,7 @@ end
 # Test backtrace printing
 module B
     module C
-        f(x; y=2.0) = error()
+        @noinline f(x; y=2.0) = error()
     end
     module D
         import ..C: f
@@ -740,7 +756,8 @@ module B
 end
 
 @testset "backtrace" begin
-    bt = try B.D.g()
+    bt = try
+        B.D.g()
     catch
         catch_backtrace()
     end
@@ -750,3 +767,139 @@ end
     @test contains(bt_str, "@ $m.B.C")
     @test contains(bt_str, "@ $m.B.D")
 end
+# 1d/2d error shouldn't appear in unsupported keywords arg #36325
+let err = nothing
+    try
+        identity([1 1]; bad_kwards = :julia)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !occursin("2d", err_str)
+    end
+end
+
+# issue #37587
+# TODO: enable on more platforms
+if (Sys.isapple() || Sys.islinux()) && Sys.ARCH === :x86_64
+    single_repeater() = single_repeater()
+    pair_repeater_a() = pair_repeater_b()
+    pair_repeater_b() = pair_repeater_a()
+
+    @testset "repeated stack frames" begin
+        let bt = try
+                single_repeater()
+            catch
+                catch_backtrace()
+            end
+            bt_str = sprint(Base.show_backtrace, bt)
+            @test occursin(r"repeats \d+ times", bt_str)
+        end
+
+        let bt = try
+                pair_repeater_a()
+            catch
+                catch_backtrace()
+            end
+            bt_str = sprint(Base.show_backtrace, bt)
+            @test occursin(r"the last 2 lines are repeated \d+ more times", bt_str)
+        end
+    end
+end
+
+@testset "ScheduledAfterSyncException" begin
+    t = :DummyTask
+    msg = sprint(showerror, Base.ScheduledAfterSyncException(Any[t]))
+    @test occursin(":DummyTask is registered after the end of a `@sync` block", msg)
+    msg = sprint(showerror, Base.ScheduledAfterSyncException(Any[t, t]))
+    @test occursin(
+        ":DummyTask and one more Symbol are registered after the end of a `@sync` block",
+        msg,
+    )
+    msg = sprint(showerror, Base.ScheduledAfterSyncException(Any[t, t, t]))
+    @test occursin(
+        ":DummyTask and 2 more objects are registered after the end of a `@sync` block",
+        msg,
+    )
+end
+
+@testset "error message hints relative modules #40959" begin
+    m = Module()
+    expr = :(module Foo
+        module Bar
+        end
+
+        using Bar
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        Bar = 3
+
+        using Bar
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        using Bar
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        module Bar end
+        module Buzz
+            using Bar
+        end
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test contains(err_str, "maybe you meant `import/using ..Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        Bar = 3
+        module Buzz
+            using Bar
+        end
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test !contains(err_str, "maybe you meant `import/using ..Bar`")
+    end
+
+    m = Module()
+    expr = :(module Foo
+        module Bar end
+        module Buzz
+            module Bar end
+            using Bar
+        end
+    end)
+    try
+        Base.eval(m, expr)
+    catch err
+        err_str = sprint(showerror, err)
+        @test contains(err_str, "maybe you meant `import/using .Bar`")
+    end
+end
diff --git a/test/exceptions.jl b/test/exceptions.jl
index 7b8a54da2c6ebd..d8d1e7b45b8b53 100644
--- a/test/exceptions.jl
+++ b/test/exceptions.jl
@@ -1,52 +1,51 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 using Test
-using Base: catch_stack
 
 @testset "Basic exception stack handling" begin
     # Exiting the catch block normally pops the exception
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # Exiting via a finally block does not pop the exception
     try
         try
             error("A")
         finally
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
         end
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
     end
     # The combined try-catch-finally form obeys the same rules as above
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
     finally
-        @test length(catch_stack()) == 0
+        @test length(current_exceptions()) == 0
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # Errors are pushed onto the stack according to catch block nesting
     try
         error("RootCause")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         try
             error("B")
         catch
-            stack = catch_stack()
+            stack = current_exceptions()
             @test length(stack) == 2
-            @test stack[1][1].msg == "RootCause"
-            @test stack[2][1].msg == "B"
+            @test stack[1].exception.msg == "RootCause"
+            @test stack[2].exception.msg == "B"
         end
         # Stack pops correctly
-        stack = catch_stack()
+        stack = current_exceptions()
         @test length(stack) == 1
-        @test stack[1][1].msg == "RootCause"
+        @test stack[1].exception.msg == "RootCause"
     end
 end
 
@@ -55,7 +54,7 @@ end
     val = try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         1
     end
     @test val == 1
@@ -64,11 +63,11 @@ end
         try
             error("A")
         catch
-            length(catch_stack())
+            length(current_exceptions())
         end
     end
     @test test_exc_stack_tailpos() == 1
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 end
 
 @testset "Exception stacks - early exit from try or catch" begin
@@ -78,7 +77,7 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             return
         end
     end
@@ -88,7 +87,7 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             break
         end
     end
@@ -97,19 +96,19 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             break
         finally
-            @test length(catch_stack()) == 0
+            @test length(current_exceptions()) == 0
         end
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 
     for i=1:1
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             continue
         end
     end
@@ -117,38 +116,38 @@ end
         try
             error("A")
         catch
-            @test length(catch_stack()) == 1
+            @test length(current_exceptions()) == 1
             continue
         finally
-            @test length(catch_stack()) == 0
+            @test length(current_exceptions()) == 0
         end
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         @goto outofcatch
     end
     @label outofcatch
     try
         error("A")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         @goto outofcatch2
     finally
-        @test length(catch_stack()) == 0
+        @test length(current_exceptions()) == 0
     end
     @label outofcatch2
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 
     # Exiting from a try block in various ways should not affect the exception
     # stack state.
     try
         error("ExceptionInOuterTry")
     catch
-        @test length(catch_stack()) == 1
+        @test length(current_exceptions()) == 1
         function test_exc_stack_try_return()
             try
                 return
@@ -173,8 +172,8 @@ end
         catch
         end
         @label outoftry
-        @test length(catch_stack()) == 1
-        @test catch_stack()[1][1] == ErrorException("ExceptionInOuterTry")
+        @test length(current_exceptions()) == 1
+        @test current_exceptions()[1].exception == ErrorException("ExceptionInOuterTry")
     end
 end
 
@@ -195,10 +194,10 @@ end
             # Explicit return => exception should be popped before finally block
             return
         finally
-            @test length(Base.catch_stack()) == 0
+            @test length(Base.current_exceptions()) == 0
         end
     end)()
-    @test length(Base.catch_stack()) == 0
+    @test length(Base.current_exceptions()) == 0
 
     while true
         try
@@ -209,11 +208,11 @@ end
                 # exception should not be popped inside finally block
                 break
             finally
-                @test length(Base.catch_stack()) == 1
+                @test length(Base.current_exceptions()) == 1
             end
         end
     end
-    @test length(Base.catch_stack()) == 0
+    @test length(Base.current_exceptions()) == 0
 
     # Nested finally handling with `return`: each finally block should observe
     # only the active exceptions as according to its nesting depth.
@@ -232,16 +231,16 @@ end
                     end
                 finally
                     # At this point err2 is dealt with
-                    @test length(Base.catch_stack()) == 1
-                    @test Base.catch_stack()[1][1] == ErrorException("err1")
+                    @test length(Base.current_exceptions()) == 1
+                    @test Base.current_exceptions()[1].exception == ErrorException("err1")
                 end
             end
         finally
             # At this point err1 is dealt with
-            @test length(Base.catch_stack()) == 0
+            @test length(Base.current_exceptions()) == 0
         end
     end)()
-    @test length(Base.catch_stack()) == 0
+    @test length(Base.current_exceptions()) == 0
 end
 
 @testset "Deep exception stacks" begin
@@ -260,10 +259,10 @@ end
     @test try
         test_exc_stack_deep(100)
     catch
-        @test catch_stack()[1][1] == ErrorException("RootCause")
-        length(catch_stack())
+        @test current_exceptions()[1].exception == ErrorException("RootCause")
+        length(current_exceptions())
     end == 100
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
 end
 
 @testset "Exception stacks and Tasks" begin
@@ -280,10 +279,10 @@ end
         @test t.state == :done
         @test t.result == ErrorException("B")
         # Task exception state is preserved around task switches
-        @test length(catch_stack()) == 1
-        @test catch_stack()[1][1] == ErrorException("A")
+        @test length(current_exceptions()) == 1
+        @test current_exceptions()[1].exception == ErrorException("A")
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # test rethrow() rethrows correct state
     bt = []
     try
@@ -306,7 +305,7 @@ end
         @test exc == ErrorException("A")
         @test bt == catch_backtrace()
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # test rethrow with argument
     bt = []
     try
@@ -328,7 +327,7 @@ end
         @test exc == ErrorException("C")
         @test bt == catch_backtrace()
     end
-    @test length(catch_stack()) == 0
+    @test length(current_exceptions()) == 0
     # Exception stacks on other tasks
     t = @task try
         error("A")
@@ -338,7 +337,10 @@ end
     yield(t)
     @test t.state == :failed
     @test t.result == ErrorException("B")
-    @test catch_stack(t, include_bt=false) == [ErrorException("A"), ErrorException("B")]
+    @test current_exceptions(t, backtrace=false) == [
+        (exception=ErrorException("A"),backtrace=nothing),
+        (exception=ErrorException("B"),backtrace=nothing)
+    ]
     # Exception stacks for tasks which never get the chance to start
     t = @task nothing
     @test (try
@@ -347,12 +349,12 @@ end
     catch e
         e
     end).task.exception == ErrorException("expected")
-    @test length(catch_stack(t)) == 1
-    @test length(catch_stack(t)[1][2]) > 0 # backtrace is nonempty
+    @test length(current_exceptions(t)) == 1
+    @test length(current_exceptions(t)[1].backtrace) > 0 # backtrace is nonempty
     # Exception stacks should not be accessed on concurrently running tasks
     t = @task ()->nothing
     @test_throws ErrorException("Inspecting the exception stack of a task which might "*
-                                "be running concurrently isn't allowed.") catch_stack(t)
+                                "be running concurrently isn't allowed.") current_exceptions(t)
 end
 
 @testset "rethrow" begin
@@ -396,5 +398,5 @@ end
     undef_var_in_catch()
     []
 catch
-    catch_stack()
+    current_exceptions()
 end) == 2
diff --git a/test/fastmath.jl b/test/fastmath.jl
index bce043d7b32b02..e93fb93330b4f5 100644
--- a/test/fastmath.jl
+++ b/test/fastmath.jl
@@ -60,9 +60,6 @@ fm_fast_64_upd(x) = @fastmath (r=x; r+=eps64_2; r+=eps64_2)
         @test @fastmath(cmp(two,two)) == cmp(two,two)
         @test @fastmath(cmp(two,three)) == cmp(two,three)
         @test @fastmath(cmp(three,two)) == cmp(three,two)
-        @test @fastmath(one/zero) == convert(T,Inf)
-        @test @fastmath(-one/zero) == -convert(T,Inf)
-        @test isnan(@fastmath(zero/zero)) # must not throw
 
         for x in (zero, two, convert(T, Inf), convert(T, NaN))
             @test @fastmath(isfinite(x))
@@ -252,3 +249,13 @@ end
     @test (@fastmath "a" * "b") == "ab"
     @test (@fastmath "a" ^ 2) == "aa"
 end
+
+
+@testset "exp overflow and underflow" begin
+    for T in (Float32,Float64)
+        for func in (@fastmath exp2,exp,exp10)
+            @test func(T(2000)) == T(Inf)
+            @test func(T(-2000)) == T(0)
+        end
+    end
+end
diff --git a/test/file.jl b/test/file.jl
index c8dbdde1c4f855..2c09b3edaed2e8 100644
--- a/test/file.jl
+++ b/test/file.jl
@@ -15,37 +15,78 @@ subdir = joinpath(dir, "adir")
 mkdir(subdir)
 subdir2 = joinpath(dir, "adir2")
 mkdir(subdir2)
-@test_throws Base._UVError("mkdir", Base.UV_EEXIST) mkdir(file)
+@test_throws Base._UVError("mkdir($(repr(file)); mode=0o777)", Base.UV_EEXIST) mkdir(file)
 let err = nothing
     try
         mkdir(file)
     catch err
         io = IOBuffer()
         showerror(io, err)
-        @test startswith(String(take!(io)), "IOError: mkdir: file already exists (EEXIST)")
+        @test startswith(String(take!(io)), "IOError: mkdir") && err.code == Base.UV_EEXIST
     end
 end
 
 if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
     dirlink = joinpath(dir, "dirlink")
     symlink(subdir, dirlink)
+    @test stat(dirlink) == stat(subdir)
+    @test readdir(dirlink) == readdir(subdir)
+
     # relative link
-    cd(subdir)
     relsubdirlink = joinpath(subdir, "rel_subdirlink")
     reldir = joinpath("..", "adir2")
     symlink(reldir, relsubdirlink)
-    cd(pwd_)
+    @test stat(relsubdirlink) == stat(subdir2)
+    @test readdir(relsubdirlink) == readdir(subdir2)
+
+    # creation of symlink to directory that does not yet exist
+    new_dir = joinpath(subdir, "new_dir")
+    foo_file = joinpath(subdir, "new_dir", "foo")
+    nedlink = joinpath(subdir, "non_existant_dirlink")
+    symlink("new_dir", nedlink; dir_target=true)
+    try
+        readdir(nedlink)
+        @test false
+    catch e
+        @test isa(e, Base.IOError)
+        # It's surprisingly difficult to know what numeric value this will be across platforms
+        # so we'll just check the string representation instead. :(
+        @test endswith(e.msg, "(ENOENT)")
+    end
+    mkdir(new_dir)
+    touch(foo_file)
+    @test readdir(new_dir) == readdir(nedlink)
+
+    rm(foo_file)
+    rm(new_dir)
+    rm(nedlink)
 end
 
-if !Sys.iswindows()
-    link = joinpath(dir, "afilelink.txt")
+if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
+    link = joinpath(dir, "afilesymlink.txt")
     symlink(file, link)
+    @test stat(file) == stat(link)
+
     # relative link
-    cd(subdir)
-    rellink = joinpath(subdir, "rel_afilelink.txt")
+    rellink = joinpath(subdir, "rel_afilesymlink.txt")
     relfile = joinpath("..", "afile.txt")
     symlink(relfile, rellink)
-    cd(pwd_)
+    @test stat(rellink) == stat(file)
+end
+
+@testset "hardlink" begin
+    link = joinpath(dir, "afilehardlink.txt")
+    hardlink(file, link)
+    @test stat(file) == stat(link)
+
+    # when the destination exists
+    @test_throws Base.IOError hardlink(file, link)
+
+    rm(link)
+
+    # the source file does not exist
+    missing_file = joinpath(dir, "for-sure-missing-file.txt")
+    @test_throws Base.IOError hardlink(missing_file, link)
 end
 
 using Random
@@ -56,10 +97,12 @@ using Random
     temps = map(1:100) do _
         path, io = mktemp(cleanup=false)
         close(io)
-        rm(path, force=true)
         return path
     end
     @test allunique(temps)
+    foreach(temps) do path
+        rm(path, force=true)
+    end
 end
 
 @testset "tempname with parent" begin
@@ -70,6 +113,16 @@ end
         @test dirname(t) == d
     end
     @test_throws ArgumentError tempname(randstring())
+
+    # 38873: check that `TMPDIR` being set does not
+    # override the parent argument to `tempname`.
+    mktempdir() do d
+        withenv("TMPDIR"=>tempdir()) do
+            t = tempname(d)
+            @test dirname(t) == d
+        end
+    end
+    @test_throws ArgumentError tempname(randstring())
 end
 
 child_eval(code::String) = eval(Meta.parse(readchomp(`$(Base.julia_cmd()) -E $code`)))
@@ -140,7 +193,7 @@ end
             t = i % 2 == 0 ? mktempfile() : mktempdir()
             push!(temps, t)
             @test ispath(t)
-            @test length(TEMP_CLEANUP) == i 
+            @test length(TEMP_CLEANUP) == i
             @test TEMP_CLEANUP_MAX[] == n
             # delete 1/3 of the temp paths
             i % 3 == 0 && rm(t, recursive=true, force=true)
@@ -362,9 +415,8 @@ function test_stat_error(stat::Function, pth)
     end
     ex = try; stat(pth); false; catch ex; ex; end::Base.IOError
     @test ex.code == (pth isa AbstractString ? Base.UV_EACCES : Base.UV_EBADF)
-    @test startswith(ex.msg, "stat: ")
     pth isa AbstractString || (pth = Base.INVALID_OS_HANDLE)
-    @test endswith(ex.msg, repr(pth))
+    @test startswith(ex.msg, "stat($(repr(pth)))")
     nothing
 end
 @testset "stat errors" begin # PR 32031
@@ -443,8 +495,8 @@ cp(newfile, c_file)
 
 @test isdir(c_subdir)
 @test isfile(c_file)
-@test_throws SystemError rm(c_tmpdir)
-@test_throws SystemError rm(c_tmpdir, force=true)
+@test_throws Base.IOError rm(c_tmpdir)
+@test_throws Base.IOError rm(c_tmpdir, force=true)
 
 # create temp dir in specific directory
 d_tmpdir = mktempdir(c_tmpdir)
@@ -459,14 +511,40 @@ close(f)
 
 rm(c_tmpdir, recursive=true)
 @test !isdir(c_tmpdir)
-@test_throws Base._UVError(Sys.iswindows() ? "chmod" : "unlink", Base.UV_ENOENT) rm(c_tmpdir)
+@test_throws Base._UVError("unlink($(repr(c_tmpdir)))", Base.UV_ENOENT) rm(c_tmpdir)
 @test rm(c_tmpdir, force=true) === nothing
-@test_throws Base._UVError(Sys.iswindows() ? "chmod" : "unlink", Base.UV_ENOENT) rm(c_tmpdir, recursive=true)
+@test_throws Base._UVError("unlink($(repr(c_tmpdir)))", Base.UV_ENOENT) rm(c_tmpdir, recursive=true)
 @test rm(c_tmpdir, force=true, recursive=true) === nothing
 
+# Some operations can return multiple different error codes depending on the system environment.
+function throws_matching_exception(f::Function, acceptable_exceptions::AbstractVector)
+    try
+        f()
+        @error "No exception was thrown."
+        return false
+    catch ex
+        if ex in acceptable_exceptions
+            return true
+        else
+            @error "The thrown exception is not in the list of acceptable exceptions" acceptable_exceptions exception=(ex, catch_backtrace())
+            return false
+        end
+    end
+end
+function throws_matching_uv_error(f::Function, pfx::AbstractString, codes::AbstractVector{<:Integer})
+    acceptable_exceptions = multiple_uv_errors(pfx, codes)
+    return throws_matching_exception(f, acceptable_exceptions)
+end
+function multiple_uv_errors(pfx::AbstractString, codes::AbstractVector{<:Integer})
+    return [Base._UVError(pfx, code) for code in codes]
+end
+
 if !Sys.iswindows()
     # chown will give an error if the user does not have permissions to change files
-    if get(ENV, "USER", "") == "root" || get(ENV, "HOME", "") == "/root"
+    uid = Libc.geteuid()
+    @test stat(file).uid == uid
+    @test uid == Libc.getuid()
+    if uid == 0 # root user
         chown(file, -2, -1)  # Change the file owner to nobody
         @test stat(file).uid != 0
         chown(file, 0, -2)  # Change the file group to nogroup (and owner back to root)
@@ -476,8 +554,12 @@ if !Sys.iswindows()
         @test stat(file).gid == 0
         @test stat(file).uid == 0
     else
-        @test_throws Base._UVError("chown", Base.UV_EPERM) chown(file, -2, -1)  # Non-root user cannot change ownership to another user
-        @test_throws Base._UVError("chown", Base.UV_EPERM) chown(file, -1, -2)  # Non-root user cannot change group to a group they are not a member of (eg: nogroup)
+        @test throws_matching_uv_error("chown($(repr(file)), -2, -1)", [Base.UV_EPERM, Base.UV_EINVAL]) do
+            chown(file, -2, -1)  # Non-root user cannot change ownership to another user
+        end
+        @test throws_matching_uv_error("chown($(repr(file)), -1, -2)", [Base.UV_EPERM, Base.UV_EINVAL]) do
+            chown(file, -1, -2)  # Non-root user cannot change group to a group they are not a member of (eg: nogroup)
+        end
     end
 else
     # test that chown doesn't cause any errors for Windows
@@ -541,24 +623,11 @@ end
     # NOTE: not the actual max path on UNIX, but true in the Windows case for this function.
     # NOTE: we subtract 9 to account for i = 0:9.
     MAX_PATH = (Sys.iswindows() ? 260 - length(PATH_PREFIX) : 255)  - 9
-    for i = 0:8
+    for i = 0:9
         local tmp = joinpath(PATH_PREFIX, "x"^MAX_PATH * "123456789"[1:i])
         @test withenv(var => tmp) do
             tempdir()
-        end == (tmp)
-    end
-    for i = 9
-        local tmp = joinpath(PATH_PREFIX, "x"^MAX_PATH * "123456789"[1:i])
-        if Sys.iswindows()
-            # libuv bug
-            @test_broken withenv(var => tmp) do
-                tempdir()
-            end == tmp
-        else
-            @test withenv(var => tmp) do
-                tempdir()
-            end == tmp
-        end
+        end == tmp
     end
 end
 
@@ -688,7 +757,7 @@ let
     @test a_stat.size == b_stat.size
     @test a_stat.size == c_stat.size
 
-    @test parse(Int, match(r"mode=(.*),", sprint(show, a_stat)).captures[1]) == a_stat.mode
+    @test parse(Int, split(sprint(show, a_stat),"mode: ")[2][1:8]) == a_stat.mode
 
     close(af)
     rm(afile)
@@ -939,10 +1008,10 @@ if !Sys.iswindows() || Sys.windows_version() >= Sys.WINDOWS_VISTA_VER
         @test_throws(ArgumentError("'$nonexisting_src' is not a directory. Use `cp(src, dst)`"),
                      Base.cptree(nonexisting_src, dst; force=true, follow_symlinks=true))
         # cp
-        @test_throws Base._UVError("open", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=false)
-        @test_throws Base._UVError("open", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=true)
+        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=false)
+        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) cp(nonexisting_src, dst; force=true, follow_symlinks=true)
         # mv
-        @test_throws Base._UVError("open", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
+        @test_throws Base._UVError("open($(repr(nonexisting_src)), $(Base.JL_O_RDONLY), 0)", Base.UV_ENOENT) mv(nonexisting_src, dst; force=true)
     end
 end
 
@@ -1205,8 +1274,9 @@ if !Sys.iswindows() || (Sys.windows_version() >= Sys.WINDOWS_VISTA_VER)
 else
     @test_throws ErrorException symlink(file, "ba\0d")
 end
+using Downloads: download
 @test_throws ArgumentError download("good", "ba\0d")
-@test_throws ArgumentError download("ba\0d", "good")
+@test_throws ArgumentError download("ba\0d", tempname())
 
 ###################
 #     walkdir     #
@@ -1314,7 +1384,7 @@ cd(dirwalk) do
     @test files == ["file1", "file2"]
 
     rm(joinpath("sub_dir1"), recursive=true)
-    @test_throws(Base._UVError("readdir", Base.UV_ENOENT, "with ", repr(joinpath(".", "sub_dir1"))),
+    @test_throws(Base._UVError("readdir($(repr(joinpath(".", "sub_dir1"))))", Base.UV_ENOENT),
                  take!(chnl_error)) # throws an error because sub_dir1 do not exist
 
     root, dirs, files = take!(chnl_noerror)
@@ -1334,7 +1404,7 @@ cd(dirwalk) do
         symlink("foo", foo)
 
         let files = walkdir(joinpath(".", "sub_dir3"); follow_symlinks=true)
-            @test_throws Base._UVError("stat", Base.UV_ELOOP, "for file ", repr(foo)) take!(files)
+            @test_throws Base._UVError("stat($(repr(foo)))", Base.UV_ELOOP)  take!(files)
         end
         root, dirs, files = take!(walkdir(joinpath(".", "sub_dir3"); follow_symlinks=false))
         @test root == joinpath(".", "sub_dir3")
@@ -1361,11 +1431,9 @@ end
 ############
 # Clean up #
 ############
-if !Sys.iswindows()
+if !Sys.iswindows() || (Sys.windows_version() >= Sys.WINDOWS_VISTA_VER)
     rm(link)
     rm(rellink)
-end
-if !Sys.iswindows() || (Sys.windows_version() >= Sys.WINDOWS_VISTA_VER)
     rm(dirlink)
     rm(relsubdirlink)
 end
@@ -1438,6 +1506,31 @@ let n = tempname()
     rm(n)
 end
 
+# PR #39906
+if !Sys.iswindows()
+    @testset "rm empty directories without read permissions" begin
+        mktempdir() do d
+            mkdir(joinpath(d, "nonempty"))
+            touch(joinpath(d, "nonempty", "a"))
+            mkdir(joinpath(d, "empty_outer"))
+            mkdir(joinpath(d, "empty_outer", "empty_inner"))
+
+            chmod(joinpath(d, "nonempty"), 0o333)
+            chmod(joinpath(d, "empty_outer", "empty_inner"), 0o333)
+
+            # Test that an empty directory, even when we can't read its contents, is deletable
+            rm(joinpath(d, "empty_outer"); recursive=true, force=true)
+            @test !isdir(joinpath(d, "empty_outer"))
+
+            # But a non-empty directory is not
+            @test_throws Base.IOError rm(joinpath(d, "nonempty"); recursive=true, force=true)
+            chmod(joinpath(d, "nonempty"), 0o777)
+            rm(joinpath(d, "nonempty"); recursive=true, force=true)
+            @test !isdir(joinpath(d, "nonempty"))
+        end
+    end
+end
+
 @test_throws ArgumentError mkpath("fakepath", mode = -1)
 
 @testset "mktempdir 'prefix' argument" begin
@@ -1524,8 +1617,8 @@ end
                 rm(d, recursive=true)
                 @test !ispath(d)
                 @test isempty(readdir())
-                @test_throws Base._UVError("readdir", Base.UV_ENOENT, "with ", repr(d)) readdir(d)
-                @test_throws Base._UVError("cwd", Base.UV_ENOENT) readdir(join=true)
+                @test_throws Base._UVError("readdir($(repr(d)))", Base.UV_ENOENT) readdir(d)
+                @test_throws Base._UVError("pwd()", Base.UV_ENOENT) readdir(join=true)
             end
         end
     end
@@ -1554,3 +1647,75 @@ end
         chmod(dir, 0o777; recursive=true)
     end
 end
+
+if Sys.iswindows()
+@testset "mkdir/rm permissions" begin
+    # test delete permission in system folders (i.e. impliclty test chmod permissions)
+    # issue #38433
+    @test withenv("TMP" => "C:\\") do
+        mktempdir() do dir end
+    end === nothing
+    # same as above, but test rm explicitly
+    tmp = mkdir(tempname("C:\\"))
+    @test rm(tmp) === nothing
+end
+end
+
+@testset "StatStruct show's extended details" begin
+    f, io = mktemp()
+    s = stat(f)
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(")
+    @test endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for ")
+    @test rstrip(stat_show_str_multi) == stat_show_str_multi # no trailing \n
+    @test occursin(repr(f), stat_show_str)
+    @test occursin(repr(f), stat_show_str_multi)
+    if Sys.iswindows()
+        @test occursin("mode: 0o100666 (-rw-rw-rw-)", stat_show_str)
+        @test occursin("mode: 0o100666 (-rw-rw-rw-)\n", stat_show_str_multi)
+    else
+        @test occursin("mode: 0o100600 (-rw-------)", stat_show_str)
+        @test occursin("mode: 0o100600 (-rw-------)\n", stat_show_str_multi)
+    end
+    if Sys.iswindows() == false
+        @test !isnothing(Base.Filesystem.getusername(s.uid))
+        @test !isnothing(Base.Filesystem.getgroupname(s.gid))
+    end
+    d = mktempdir()
+    s = stat(d)
+    stat_show_str = sprint(show, s)
+    stat_show_str_multi = sprint(show, MIME("text/plain"), s)
+    @test startswith(stat_show_str, "StatStruct(")
+    @test endswith(stat_show_str, ")")
+    @test startswith(stat_show_str_multi, "StatStruct for ")
+    @test rstrip(stat_show_str_multi) == stat_show_str_multi # no trailing \n
+    @test occursin(repr(d), stat_show_str)
+    @test occursin(repr(d), stat_show_str_multi)
+    if Sys.iswindows()
+        @test occursin("mode: 0o040666 (drw-rw-rw-)", stat_show_str)
+        @test occursin("mode: 0o040666 (drw-rw-rw-)\n", stat_show_str_multi)
+    else
+        @test occursin("mode: 0o040700 (drwx------)", stat_show_str)
+        @test occursin("mode: 0o040700 (drwx------)\n", stat_show_str_multi)
+    end
+    if Sys.iswindows() == false
+        @test !isnothing(Base.Filesystem.getusername(s.uid))
+        @test !isnothing(Base.Filesystem.getgroupname(s.gid))
+    end
+end
+
+@testset "diskstat() works" begin
+    # Sanity check assuming disk is smaller than 32PB
+    PB = Int64(2)^44
+
+    dstat = diskstat()
+    @test dstat.total < 32PB
+    @test dstat.used + dstat.available == dstat.total
+    @test occursin(r"^DiskStat\(total=\d+, used=\d+, available=\d+\)$", sprint(show, dstat))
+    # Test diskstat(::AbstractString)
+    dstat = diskstat(pwd())
+    @test dstat.total < 32PB
+    @test dstat.used + dstat.available == dstat.total
+end
diff --git a/test/filesystem.jl b/test/filesystem.jl
index b739b1ecdbf0ab..79beea9f66ac11 100644
--- a/test/filesystem.jl
+++ b/test/filesystem.jl
@@ -10,6 +10,7 @@ mktempdir() do dir
   # test filesystem truncate (shorten)
   file = Base.Filesystem.open(filename, Base.Filesystem.JL_O_RDWR)
   Base.Filesystem.truncate(file, 2)
+  text = text[1:2]
   @test length(read(file)) == 2
   close(file)
 
@@ -33,3 +34,9 @@ mktempdir() do dir
   close(file)
 
 end
+
+import Base.Filesystem: S_IRUSR, S_IRGRP, S_IROTH
+@testset "types of permission mask constants" begin
+  @test S_IRUSR & ~S_IRGRP == S_IRUSR
+  @test typeof(S_IRUSR) == typeof(S_IRGRP) == typeof(S_IROTH)
+end
diff --git a/test/float16.jl b/test/float16.jl
index 4dd581f76538f4..75f9b55b6d51c2 100644
--- a/test/float16.jl
+++ b/test/float16.jl
@@ -21,6 +21,20 @@ g = Float16(1.)
     @test isequal(Float16(0.0), Float16(0.0))
     @test !isequal(Float16(-0.0), Float16(0.0))
     @test !isequal(Float16(0.0), Float16(-0.0))
+
+    for T = Base.BitInteger_types
+        @test -Inf16 < typemin(T)
+        @test -Inf16 <= typemin(T)
+        @test typemin(T) > -Inf16
+        @test typemin(T) >= -Inf16
+        @test typemin(T) != -Inf16
+
+        @test Inf16 > typemax(T)
+        @test Inf16 >= typemax(T)
+        @test typemax(T) < Inf16
+        @test typemax(T) <= Inf16
+        @test typemax(T) != Inf16
+    end
 end
 
 @testset "convert" begin
@@ -76,9 +90,13 @@ end
     @test Float16(0.5f0)^2 ≈ Float16(0.5f0^2)
     @test sin(f) ≈ sin(2f0)
     @test log10(Float16(100)) == Float16(2.0)
+    @test sin(ComplexF16(f)) ≈ sin(complex(2f0))
 
     # no domain error is thrown for negative values
     @test cbrt(Float16(-1.0)) == -1.0
+    # test zero and Inf
+    @test cbrt(Float16(0.0)) == Float16(0.0)
+    @test cbrt(Inf16) == Inf16
 end
 @testset "binary ops" begin
     @test f+g === Float16(3f0)
@@ -156,6 +174,10 @@ end
     # halfway between and last bit is 0
     ff = reinterpret(Float32,                           0b00111110101010100001000000000000)
     @test Float32(Float16(ff)) === reinterpret(Float32, 0b00111110101010100000000000000000)
+
+    for x = (typemin(Int64), typemin(Int128)), R = (RoundUp, RoundToZero)
+        @test Float16(x, R) == nextfloat(-Inf16)
+    end
 end
 
 # issue #5948
@@ -180,3 +202,25 @@ const minsubf16_32 = Float32(minsubf16)
 
 # issues #33076
 @test Float16(1f5) == Inf16
+
+@testset "conversion to Float16 from" begin
+    for T in (Float32, Float64, BigFloat)
+        @testset "conversion from $T" begin
+            for i in 1:2^16
+                f = reinterpret(Float16, UInt16(i-1))
+                isfinite(f) || continue
+                if f < 0
+                    epsdown = T(eps(f))/2
+                    epsup   = issubnormal(f) ? epsdown : T(eps(nextfloat(f)))/2
+                else
+                    epsup   = T(eps(f))/2
+                    epsdown = issubnormal(f) ? epsup : T(eps(prevfloat(f)))/2
+                end
+                @test isequal(f*(-1)^(f === Float16(0)),  Float16(nextfloat(T(f) - epsdown)))
+                @test isequal(f*(-1)^(f === -Float16(0)), Float16(prevfloat(T(f) + epsup)))
+                @test isequal(prevfloat(f), Float16(prevfloat(T(f) - epsdown)))
+                @test isequal(nextfloat(f), Float16(nextfloat(T(f) + epsup)))
+            end
+        end
+    end
+end
diff --git a/test/floatfuncs.jl b/test/floatfuncs.jl
index ff105f425ea81c..7e9d8021ac5df4 100644
--- a/test/floatfuncs.jl
+++ b/test/floatfuncs.jl
@@ -40,6 +40,26 @@ end
     end
 end
 
+@testset "ispow2 and iseven/isodd" begin
+    for T in (Float16,Float32,Float64,BigFloat)
+        for x in (0.25, 1.0, 4.0, exp2(T(exponent(floatmax(T)))), exp2(T(exponent(floatmin(T)))))
+            @test ispow2(T(x))
+        end
+        for x in (1.5, 0.0, 7.0, NaN, Inf)
+            @test !ispow2(T(x))
+        end
+        for x in (0, 134)
+            @test iseven(T(x)) && iseven(T(-x))
+            @test isodd(T(x+1)) && isodd(T(-x-1))
+        end
+        let x = maxintfloat(T) * π
+            @test iseven(x) && iseven(-x)
+            @test !isodd(x) && !isodd(-x)
+        end
+        @test !iseven(0.5) && !isodd(0.5)
+    end
+end
+
 @testset "round" begin
     for elty in (Float32, Float64)
         x = rand(elty)
@@ -99,6 +119,23 @@ end
     @test round(Float32(1.2), sigdigits=5) === Float32(1.2)
     @test round(Float16(0.6), sigdigits=2) === Float16(0.6)
     @test round(Float16(1.1), sigdigits=70) === Float16(1.1)
+
+    # issue 37171
+    @test round(9.87654321e-308, sigdigits = 1) ≈ 1.0e-307
+    @test round(9.87654321e-308, sigdigits = 2) ≈ 9.9e-308
+    @test round(9.87654321e-308, sigdigits = 3) ≈ 9.88e-308
+    @test round(9.87654321e-308, sigdigits = 4) ≈ 9.877e-308
+    @test round(9.87654321e-308, sigdigits = 5) ≈ 9.8765e-308
+    @test round(9.87654321e-308, sigdigits = 6) ≈ 9.87654e-308
+    @test round(9.87654321e-308, sigdigits = 7) ≈ 9.876543e-308
+    @test round(9.87654321e-308, sigdigits = 8) ≈ 9.8765432e-308
+    @test round(9.87654321e-308, sigdigits = 9) ≈ 9.87654321e-308
+    @test round(9.87654321e-308, sigdigits = 10) ≈ 9.87654321e-308
+    @test round(9.87654321e-308, sigdigits = 11) ≈ 9.87654321e-308
+
+    @inferred round(Float16(1.), sigdigits=2)
+    @inferred round(Float32(1.), sigdigits=2)
+    @inferred round(Float64(1.), sigdigits=2)
 end
 
 @testset "literal pow matches runtime pow matches optimized pow" begin
@@ -167,3 +204,8 @@ end
     @test ≈(1.0; atol=1).(1.0:3.0) == [true, true, false]
 
 end
+
+@testset "isnan for Number" begin
+    struct CustomNumber <: Number end
+    @test !isnan(CustomNumber())
+end
diff --git a/test/functional.jl b/test/functional.jl
index f2d6d4a898ede5..c9b0b270baeb77 100644
--- a/test/functional.jl
+++ b/test/functional.jl
@@ -134,6 +134,11 @@ let gen = ((x,y) for x in 1:10, y in 1:10 if x % 2 == 0 && y % 2 == 0),
     @test collect(gen) == collect(gen2)
 end
 
+# keys of a generator for find* and arg* (see #34678)
+@test keys(x^2 for x in -1:0.5:1) == 1:5
+@test findall(!iszero, x^2 for x in -1:0.5:1) == [1, 2, 4, 5]
+@test argmin(x^2 for x in -1:0.5:1) == 3
+
 # inference on vararg generator of a type (see #22907 comments)
 let f(x) = collect(Base.Generator(=>, x, x))
     @test @inferred(f((1,2))) == [1=>1, 2=>2]
diff --git a/test/gcext/gcext.c b/test/gcext/gcext.c
index 6b3ecd0726004e..b66f21bb660ee2 100644
--- a/test/gcext/gcext.c
+++ b/test/gcext/gcext.c
@@ -478,37 +478,29 @@ static int stack_grows_down(void) {
 
 void task_scanner(jl_task_t *task, int root_task)
 {
+    int var_on_frame;
+
     // The task scanner is not necessary for liveness, as the
     // corresponding task stack is already part of the stack.
     // Its purpose is simply to test that the task scanner
     // doing actual work does not trigger a problem.
-    size_t size;
-    int tid;
-    void *stack = jl_task_stack_buffer(task, &size, &tid);
-    if (tid >= 0) {
-        // this is the live stack of a thread. Is it ours?
-        if (stack && tid == jl_threadid()) {
-            // only scan the live portion of the stack.
-            char *end_stack = (char *) stack + size;
-            if (lt_ptr(stack, &size) && lt_ptr(&size, (char *)stack + size)) {
-                if (stack_grows_down()) {
-                    size = end_stack - (char *)&size;
-                    stack = (void *)&size;
-                }
-                else {
-                    size = (char *) end_stack - (char *) &size;
-                }
-            } else {
-                // error, current stack frame must be on the live stack.
-                jl_error("stack frame not part of the current task");
-            }
+    char *start_stack;
+    char *end_stack;
+    char *total_start_stack;
+    char *total_end_stack;
+    jl_active_task_stack(task, &start_stack, &end_stack, &total_start_stack, &total_end_stack);
+
+    // this is the live stack of a thread. Is it ours?
+    if (start_stack && task == (jl_task_t *)jl_get_current_task()) {
+        if (!(lt_ptr(start_stack, &var_on_frame) && lt_ptr(&var_on_frame, end_stack))) {
+            // error, current stack frame must be on the live stack.
+            jl_error("stack frame not part of the current task");
         }
-        else
-            stack = NULL;
     }
-    if (stack) {
-        void **start = (void **) stack;
-        void **end = start + size / sizeof(void *);
+
+    if (start_stack) {
+        void **start = (void **)start_stack;
+        void **end = (void **)end_stack;
         while (start < end) {
             void *p = *start++;
             void *q = jl_gc_internal_obj_base_ptr(p);
@@ -569,8 +561,10 @@ void sweep_stack_data(jl_value_t *p)
 {
     obj_sweeps++;
     dynstack_t *stk = (dynstack_t *)p;
-    if (stk->size > stk->capacity)
-        jl_error("internal error during sweeping");
+    if (stk->size > stk->capacity) {
+        assert(0 && "internal error during sweeping");
+        abort();
+    }
 }
 
 // Safely execute Julia code
@@ -655,10 +649,6 @@ int main()
             "let dir = dirname(unsafe_string(Base.JLOptions().julia_bin))\n"
             // disable the package manager
             "    ENV[\"JULIA_PKGDIR\"] = joinpath(dir, \"disabled\")\n"
-            // locate files relative to the "embedding" executable
-            "    stdlib = filter(env -> startswith(Base.find_package(Base, "
-            "\"Distributed\"), env), Base.load_path())[end]\n"
-            "    push!(empty!(LOAD_PATH), dir, stdlib)\n"
             "end");
 
     checked_eval_string(
diff --git a/test/generic_map_tests.jl b/test/generic_map_tests.jl
index 8fde731770bf38..b155370dd64650 100644
--- a/test/generic_map_tests.jl
+++ b/test/generic_map_tests.jl
@@ -53,6 +53,30 @@ function generic_map_tests(mapf, inplace_mapf=nothing)
         @test A == map(x->x*x*x, Float64[1:10...])
         @test A === B
     end
+
+    # Issue #28382: inferrability of map with Union eltype
+    @test isequal(map(+, [1, 2], [3.0, missing]), [4.0, missing])
+    @test Core.Compiler.return_type(map, Tuple{typeof(+), Vector{Int},
+                                               Vector{Union{Float64, Missing}}}) ==
+        Union{Vector{Missing}, Vector{Union{Missing, Float64}}, Vector{Float64}}
+    @test isequal(map(tuple, [1, 2], [3.0, missing]), [(1, 3.0), (2, missing)])
+    @test Core.Compiler.return_type(map, Tuple{typeof(tuple), Vector{Int},
+                                               Vector{Union{Float64, Missing}}}) ==
+        Vector{<:Tuple{Int, Any}}
+    # Check that corner cases do not throw an error
+    @test isequal(map(x -> x === 1 ? nothing : x, [1, 2, missing]),
+                  [nothing, 2, missing])
+    @test isequal(map(x -> x === 1 ? nothing : x, Any[1, 2, 3.0, missing]),
+                  [nothing, 2, 3, missing])
+    @test map((x,y)->(x==1 ? 1.0 : x, y), [1, 2, 3], ["a", "b", "c"]) ==
+        [(1.0, "a"), (2, "b"), (3, "c")]
+    @test map(typeof, [iszero, isdigit]) == [typeof(iszero), typeof(isdigit)]
+    @test map(typeof, [iszero, iszero]) == [typeof(iszero), typeof(iszero)]
+    @test isequal(map(identity, Vector{<:Union{Int, Missing}}[[1, 2],[missing, 1]]),
+                  [[1, 2],[missing, 1]])
+    @test map(x -> x < 0 ? false : x, Int[]) isa Vector{Integer}
+    @test map(i -> ((x=i, y=(i==1 ? 1 : "a")), 3), 1:4) isa
+        Vector{Tuple{NamedTuple{(:x, :y)}, Int}}
 end
 
 function testmap_equivalence(mapf, f, c...)
@@ -76,6 +100,6 @@ function run_map_equivalence_tests(mapf)
     testmap_equivalence(mapf, identity, (1,2,3,4))
     testmap_equivalence(mapf, (x,y,z)->x+y+z, 1,2,3)
     testmap_equivalence(mapf, x->x ? false : true, BitMatrix(undef, 10,10))
-    testmap_equivalence(mapf, x->"foobar", BitMatrix(undef, 10,10))
+    testmap_equivalence(mapf, Returns("foobar"), BitMatrix(undef, 10,10))
     testmap_equivalence(mapf, (x,y,z)->string(x,y,z), BitVector(undef, 10), fill(1.0, 10), "1234567890")
 end
diff --git a/test/gmp.jl b/test/gmp.jl
index 96ffdedb7a93c5..1125f57b195b34 100644
--- a/test/gmp.jl
+++ b/test/gmp.jl
@@ -69,6 +69,18 @@ ee = typemax(Int64)
             @test big(typeof(complex(x, x))) == typeof(big(complex(x, x)))
         end
     end
+    @testset "division" begin
+        oz = big(1 // 0)
+        zo = big(0 // 1)
+
+        @test_throws DivideError() oz / oz
+        @test oz == oz / one(oz)
+        @test -oz == oz / (-one(oz))
+        @test zero(oz) == one(oz) / oz
+        @test_throws DivideError() zo / zo
+        @test one(zo) / zo == big(1//0)
+        @test -one(zo) / zo == big(-1//0)
+    end
 end
 @testset "div, fld, mod, rem" begin
     for i = -10:10, j = [-10:-1; 1:10]
@@ -212,6 +224,9 @@ let a, b
     a = rand(1:100, 10000)
     b = map(BigInt, a)
     @test sum(a) == sum(b)
+    @test 0 == sum(BigInt[]) isa BigInt
+    @test prod(b) == foldl(*, b)
+    @test 1 == prod(BigInt[]) isa BigInt
 end
 
 @testset "Iterated arithmetic" begin
@@ -224,10 +239,15 @@ end
     g = parse(BigInt,"-1")
 
     @test +(a, b) == parse(BigInt,"327547")
+    @test 327547 == sum((a, b)) isa BigInt
     @test +(a, b, c) == parse(BigInt,"3426495623485904783805894")
+    @test 3426495623485904783805894 == sum((a, b, c)) isa BigInt
     @test +(a, b, c, d) == parse(BigInt,"3426495623485903384821764")
+    @test 3426495623485903384821764 == sum((a, b, c, d)) isa BigInt
     @test +(a, b, c, d, f) == parse(BigInt,"2413804710837418037418307081437318690130968843290370569228")
+    @test 2413804710837418037418307081437318690130968843290370569228 == sum((a, b, c, d, f)) isa BigInt
     @test +(a, b, c, d, f, g) == parse(BigInt,"2413804710837418037418307081437318690130968843290370569227")
+    @test 2413804710837418037418307081437318690130968843290370569227 == sum((a, b, c, d, f, g)) isa BigInt
 
     @test *(a, b) == parse(BigInt,"3911455620")
     @test *(a, b, c) == parse(BigInt,"13402585563389346256121263521460140")
@@ -241,6 +261,12 @@ end
     @test xor(a, b, c, d, f) == parse(BigInt,"-2413804710837418037418307081437316711364709261074607933698")
     @test xor(a, b, c, d, f, g) == parse(BigInt,"2413804710837418037418307081437316711364709261074607933697")
 
+    @test nand(a, b) == parse(BigInt,"-125")
+    @test ⊼(a, b) == parse(BigInt,"-125")
+
+    @test nor(a, b) == parse(BigInt,"-327424")
+    @test ⊽(a, b) == parse(BigInt,"-327424")
+
     @test (&)(a, b) == parse(BigInt,"124")
     @test (&)(a, b, c) == parse(BigInt,"72")
     @test (&)(a, b, c, d) == parse(BigInt,"8")
@@ -252,10 +278,23 @@ end
     @test (|)(a, b, c, d) == parse(BigInt,"-1396834561")
     @test (|)(a, b, c, d, f) == parse(BigInt,"-1358954753")
     @test (|)(a, b, c, d, f, g) == parse(BigInt,"-1")
+end
 
-    @test trailing_ones(a) == 8
-    @test trailing_zeros(b) == 2
-    @test count_ones(a) == 14
+@testset "bit operations" begin
+    for x in (315135, 12412, 3426495623485904783478347)
+        @test trailing_ones(big(x)) == trailing_ones(x)
+        @test trailing_zeros(big(x)) == trailing_zeros(x)
+        @test count_ones(big(x)) == count_ones(x)
+        @test count_zeros(-big(x)) == count_zeros(-x)
+    end
+
+    @test_throws DomainError trailing_zeros(big(0))
+    @test_throws DomainError trailing_ones(big(-1)) # -1 is all ones
+
+    @test_throws DomainError count_zeros(big(0))
+    @test_throws DomainError count_zeros(big(rand(UInt)))
+    @test_throws DomainError count_ones(big(-1))
+    @test_throws DomainError count_ones(-big(rand(UInt))-1)
 end
 
 # Large Fibonacci to exercise BigInt
@@ -340,28 +379,24 @@ end
     @test_throws InexactError convert(BigInt, 2.1)
     @test_throws InexactError convert(BigInt, big(2.1))
 end
-@testset "issue #13367" begin
-    @test trunc(BigInt,2.1) == 2
-    @test round(BigInt,2.1) == 2
-    @test floor(BigInt,2.1) == 2
-    @test ceil(BigInt,2.1) == 3
-
-    @test trunc(BigInt,2.1f0) == 2
-    @test round(BigInt,2.1f0) == 2
-    @test floor(BigInt,2.1f0) == 2
-    @test ceil(BigInt,2.1f0) == 3
-
-    @test_throws InexactError trunc(BigInt,Inf)
-    @test_throws InexactError round(BigInt,Inf)
-    @test_throws InexactError floor(BigInt,Inf)
-    @test_throws InexactError ceil(BigInt,Inf)
-
-    @test string(big(3), base = 2) == "11"
-    @test string(big(9), base = 8) == "11"
-    @test string(-big(9), base = 8) == "-11"
-    @test string(big(12), base = 16) == "c"
+@testset "truncation" begin
+    # cf. issue #13367
+    for T = (Float16, Float32, Float64)
+        @test trunc(BigInt, T(2.1)) == 2
+        @test unsafe_trunc(BigInt, T(2.1)) == 2
+        @test round(BigInt, T(2.1)) == 2
+        @test floor(BigInt, T(2.1)) == 2
+        @test ceil(BigInt, T(2.1)) == 3
+
+        @test_throws InexactError trunc(BigInt, T(Inf))
+        @test_throws InexactError round(BigInt, T(Inf))
+        @test_throws InexactError floor(BigInt, T(Inf))
+        @test_throws InexactError ceil(BigInt, T(Inf))
+    end
 end
-@testset "Issue #18849" begin
+
+@testset "string(::BigInt)" begin
+    # cf. issue #18849"
     # bin, oct, dec, hex should not call sizeof on BigInts
     # when padding is desired
     padding = 4
@@ -386,14 +421,19 @@ end
     @test string(-high, pad = padding, base = 8) == "-4000000"
     @test string(-high, pad = padding, base = 10) == "-1048576"
     @test string(-high, pad = padding, base = 16) == "-100000"
-end
 
-# respect 0-padding on big(0)
-for base in (2, 8, 10, 16)
-    local base
-    @test string(big(0), base=base, pad=0) == ""
+    # cf. issue #13367
+    @test string(big(3), base = 2) == "11"
+    @test string(big(9), base = 8) == "11"
+    @test string(-big(9), base = 8) == "-11"
+    @test string(big(12), base = 16) == "c"
+
+    # respect 0-padding on big(0)
+    for base in (2, 8, 10, 16)
+        @test string(big(0), base=base, pad=0) == ""
+    end
+    @test string(big(0), base = rand(2:62), pad = 0) == ""
 end
-@test string(big(0), base = rand(2:62), pad = 0) == ""
 
 @test isqrt(big(4)) == 2
 @test isqrt(big(5)) == 2
@@ -502,3 +542,164 @@ end
         @test T(big"2"^(n+1) - big"2"^(n-precision(T)) - 1) === floatmax(T)
     end
 end
+
+a = Rational{BigInt}(12345678901234567890123456789, 987654321987654320)
+b = Rational{BigInt}(12345678902222222212111111109, 987654321987654320)
+c = Rational{BigInt}(24691357802469135780246913578, 987654321987654320)
+d = Rational{BigInt}(- 12345678901234567890123456789, 493827160993827160)
+e = Rational{BigInt}(12345678901234567890123456789, 12345678902222222212111111109)
+@testset "big rational basics" begin
+    @test a+BigInt(1) == b
+    @test typeof(a+1) == Rational{BigInt}
+    @test a+1 == b
+    @test isequal(a+1, b)
+    @test b == a+1
+    @test !(b == a)
+    @test b > a
+    @test b >= a
+    @test !(b < a)
+    @test !(b <= a)
+
+    @test typeof(a * 2) == Rational{BigInt}
+    @test a*2 == c
+    @test c-a == a
+    @test c == a + a
+    @test c+1 == a+b
+
+    @test typeof(d) == Rational{BigInt}
+    @test d == -c
+
+
+    @test e == a // b
+
+    @testset "gmp cmp" begin
+        @test Base.GMP.MPQ.cmp(b, a) ==  1
+        @test Base.GMP.MPQ.cmp(a, b) == -1
+        @test Base.GMP.MPQ.cmp(a, a) ==  0
+    end
+
+    @testset "division errors" begin
+        oz = Rational{BigInt}(0, 1)
+        zo = Rational{BigInt}(1, 0)
+
+        @test oz + oz == 3 * oz == oz
+        @test oz // zo == oz
+        @test zo // oz == zo
+
+        @test_throws DivideError() zo - zo
+        @test_throws DivideError() zo + (-zo)
+        @test_throws DivideError() zo * oz
+        @test_throws DivideError() oz // oz
+        @test_throws DivideError() zo // zo
+    end
+
+    @testset "big infinities" begin
+        oz   = Rational{BigInt}(1, 0)
+        zo   = Rational{BigInt}(0, 1)
+        o    = Rational{BigInt}(1, 1)
+
+        @test oz + zo    == oz
+        @test zo - oz    == -oz
+        @test zo + (-oz) == -oz
+        @test -oz + zo   == -oz
+
+        @test (-oz) * (-oz) == oz
+        @test (-oz) * oz    == -oz
+
+        @test o // zo       == oz
+        @test (-o) // zo    == -oz
+
+        @test Rational{BigInt}(-1, 0) == -1//0
+        @test Rational{BigInt}(1, 0) == 1//0
+    end
+end
+
+
+aa = 1//2
+bb = -1//3
+cc = 3//2
+a = Rational{BigInt}(aa)
+b = Rational{BigInt}(bb)
+c = Rational{BigInt}(cc)
+t = Rational{BigInt}(0, 1)
+@testset "big rational inplace" begin
+    @test Base.GMP.MPQ.add!(t, a, b) == 1//6
+    @test t == 1//6
+    @test Base.GMP.MPQ.add!(t, t) == 1//3
+    @test t == 1//3
+
+    @test iszero(Base.GMP.MPQ.sub!(t, t))
+    @test iszero(t)
+    @test Base.GMP.MPQ.sub!(t, b, c) == -11//6
+    @test t == -11//6
+
+    @test Base.GMP.MPQ.mul!(t, a, b) == -1//6
+    @test t == -1//6
+    @test Base.GMP.MPQ.mul!(t, t) == 1//36
+    @test t == 1//36
+    @test iszero(Base.GMP.MPQ.mul!(t, Rational{BigInt}(0)))
+
+    @test Base.GMP.MPQ.div!(t, a, b) == -3//2
+    @test t == -3//2
+    @test Base.GMP.MPQ.div!(t, a) == -3//1
+    @test t == -3//1
+
+    @test aa == a && bb == b && cc == c
+
+    @testset "set" begin
+        @test Base.GMP.MPQ.set!(a, b) == b
+        @test a == b == bb
+
+        Base.GMP.MPQ.add!(a, b, c)
+        @test b == bb
+
+        @test Base.GMP.MPQ.set_z!(a, BigInt(0)) == 0
+        @test iszero(a)
+        @test Base.GMP.MPQ.set_z!(a, BigInt(3)) == 3
+        @test a == BigInt(3)
+
+        @test Base.GMP.MPQ.set_ui(1, 2)      == 1//2
+        @test Base.GMP.MPQ.set_ui(0, 1)      == 0//1
+        @test Base.GMP.MPQ.set_ui!(a, 1, 2)  == 1//2
+        @test a == 1//2
+
+        @test Base.GMP.MPQ.set_si(1, 2)      == 1//2
+        @test Base.GMP.MPQ.set_si(-1, 2)     == -1//2
+        @test Base.GMP.MPQ.set_si!(a, -1, 2) == -1//2
+        @test a == -1//2
+    end
+
+    @testset "infinities" begin
+        oz   = Rational{BigInt}(1, 0)
+        zo   = Rational{BigInt}(0, 1)
+        oo   = Rational{BigInt}(1, 1)
+
+        @test Base.GMP.MPQ.add!(zo, oz) == oz
+        @test zo == oz
+        zo = Rational{BigInt}(0, 1)
+
+        @test Base.GMP.MPQ.sub!(zo, oz) == -oz
+        @test zo == -oz
+        zo = Rational{BigInt}(0, 1)
+
+        @test Base.GMP.MPQ.add!(zo, -oz) == -oz
+        @test zo == -oz
+        zo = Rational{BigInt}(0, 1)
+
+        @test Base.GMP.MPQ.sub!(zo, -oz) == oz
+        @test zo == oz
+        zo = Rational{BigInt}(0, 1)
+
+        @test Base.GMP.MPQ.mul!(-oz, -oz) == oz
+        @test Base.GMP.MPQ.mul!(-oz, oz)  == -oz
+        @test Base.GMP.MPQ.mul!(oz, -oz)  == -1//0
+        @test oz == -1//0
+        oz = Rational{BigInt}(1, 0)
+
+        @test Base.GMP.MPQ.div!(oo, zo) == oz
+        @test oo == oz
+        oo = Rational{BigInt}(1, 1)
+
+        @test Base.GMP.MPQ.div!(-oo, zo) == -oz
+    end
+end
diff --git a/test/hashing.jl b/test/hashing.jl
index c2b3ed27f6a51a..9bd076554962f0 100644
--- a/test/hashing.jl
+++ b/test/hashing.jl
@@ -32,28 +32,29 @@ function coerce(T::Type, x)
     end
 end
 
-for T = types[2:end],
-    x = vals,
+for T = types[2:end], x = vals
     a = coerce(T, x)
-    @test hash(a,zero(UInt)) == invoke(hash, Tuple{Real, UInt}, a, zero(UInt))
-    @test hash(a,one(UInt)) == invoke(hash, Tuple{Real, UInt}, a, one(UInt))
+    @test hash(a, zero(UInt)) == invoke(hash, Tuple{Real, UInt}, a, zero(UInt))
+    @test hash(a, one(UInt)) == invoke(hash, Tuple{Real, UInt}, a, one(UInt))
 end
 
-for T = types,
-    S = types,
-    x = vals,
-    a = coerce(T, x),
-    b = coerce(S, x)
-    #println("$(typeof(a)) $a")
-    #println("$(typeof(b)) $b")
-    @test isequal(a,b) == (hash(a)==hash(b))
-    # for y=vals
-    #     println("T=$T; S=$S; x=$x; y=$y")
-    #     c = convert(T,x//y)
-    #     d = convert(S,x//y)
-    #     @test !isequal(a,b) || hash(a)==hash(b)
-    # end
+let collides = 0
+    for T = types, S = types, x = vals
+        a = coerce(T, x)
+        b = coerce(S, x)
+        eq = hash(a) == hash(b)
+        #println("$(typeof(a)) $a")
+        #println("$(typeof(b)) $b")
+        if isequal(a, b)
+            @test eq
+        else
+            collides += eq
+        end
+    end
+    # each pair of types has one collision for these values
+    @test collides <= (length(types) - 1)^2
 end
+@test hash(0.0) != hash(-0.0)
 
 # issue #8619
 @test hash(nextfloat(2.0^63)) == hash(UInt64(nextfloat(2.0^63)))
@@ -256,3 +257,30 @@ let p1 = Ptr{Int8}(1), p2 = Ptr{Int32}(1), p3 = Ptr{Int8}(2)
     @test isless(p1, p3)
     @test_throws MethodError isless(p1, p2)
 end
+
+# PR #40083
+@test hash(1:1000) == hash(collect(1:1000))
+
+@testset "test the other core data hashing functions" begin
+    @testset "hash_64_32" begin
+        vals = vcat(
+            typemin(UInt64) .+ UInt64[1:4;],
+            typemax(UInt64) .- UInt64[4:-1:0;]
+        )
+
+        for a in vals, b in vals
+            @test isequal(a, b) == (Base.hash_64_32(a) == Base.hash_64_32(b))
+        end
+    end
+
+    @testset "hash_32_32" begin
+        vals = vcat(
+            typemin(UInt32) .+ UInt32[1:4;],
+            typemax(UInt32) .- UInt32[4:-1:0;]
+        )
+
+        for a in vals, b in vals
+            @test isequal(a, b) == (Base.hash_32_32(a) == Base.hash_32_32(b))
+        end
+    end
+end
diff --git a/test/int.jl b/test/int.jl
index 160e42d04e6d9c..d7b79fb6c1e0c5 100644
--- a/test/int.jl
+++ b/test/int.jl
@@ -55,37 +55,37 @@ using Random
     end
 end
 @testset "signed and unsigned" begin
-    @test signed(3) == 3
-    @test signed(UInt(3)) == 3
+    @test signed(3) === 3
+    @test signed(UInt(3)) === 3
     @test isa(signed(UInt(3)), Int)
-    @test signed(UInt(0) - 1) == -1
+    @test signed(UInt(0) - 1) === -1
     @test_throws InexactError signed(UInt(-3))
-    @test signed(true) == 1
+    @test signed(true) === 1
     @test unsigned(true) isa Unsigned
-    @test unsigned(true) == unsigned(1)
+    @test unsigned(true) === unsigned(1)
 
-    @test signed(Bool) == Int
-    @test signed(Bool) == typeof(signed(true))
-    @test unsigned(Bool) == UInt
-    @test unsigned(Bool) == typeof(unsigned(true))
+    @test signed(Bool) === Int
+    @test signed(Bool) === typeof(signed(true))
+    @test unsigned(Bool) === UInt
+    @test unsigned(Bool) === typeof(unsigned(true))
 end
 @testset "bswap" begin
     @test bswap(Int8(3)) == 3
-    @test bswap(UInt8(3)) == 3
+    @test bswap(UInt8(3)) === 0x3
     @test bswap(Int16(3)) == 256*3
     @test bswap(Int16(256)) == 1
     @test bswap(Int16(257)) == 257
     @test bswap(Int32(1)) == 2^(3*8)
     @test bswap(Int32(2)^(3*8)) == 1
-    @test bswap(Int64(1)) == Int64(2)^(7*8)
+    @test bswap(Int64(1)) === Int64(2)^(7*8)
     @test bswap(Int64(2)^(7*8)) == 1
-    @test bswap(Int128(1)) == Int128(2)^(15*8)
-    @test bswap(Int128(2)^(15*8)) == Int128(1)
-    @test bswap(UInt128(2)^(15*8)) == UInt128(1)
+    @test bswap(Int128(1)) === Int128(2)^(15*8)
+    @test bswap(Int128(2)^(15*8)) === Int128(1)
+    @test bswap(UInt128(2)^(15*8)) === UInt128(1)
 end
 @testset "count_zeros" begin
-    @test count_zeros(10) == Sys.WORD_SIZE - 2
-    @test count_zeros(UInt8(10)) == 6
+    @test count_zeros(10) === Sys.WORD_SIZE - 2
+    @test count_zeros(UInt8(10)) === 6
 end
 @testset "Conversions" begin
     @test convert(Signed, UInt128(3)) === Int128(3)
@@ -104,11 +104,11 @@ end
 end
 
 @testset "trunc, floor, ceil" begin
-    @test trunc(3) == 3
-    @test trunc(Integer, 3) == 3
+    @test trunc(3) === 3
+    @test trunc(Integer, 3) === 3
 
-    @test floor(3) == 3
-    @test ceil(3) == 3
+    @test floor(3) === 3
+    @test ceil(3) === 3
 end
 
 @testset "big" begin
@@ -120,10 +120,11 @@ end
 end
 
 @test round(UInt8, 123) == 123
-@test mod(123, UInt8) == 0x7b
+@test mod(123, UInt8) === 0x7b
 
-primitive type MyBitsType <: Integer 8 end
+primitive type MyBitsType <: Signed 8 end
 @test_throws MethodError ~reinterpret(MyBitsType, 0x7b)
+@test signed(MyBitsType) === MyBitsType
 
 UItypes = Base.BitUnsigned_types
 SItypes = Base.BitSigned_types
@@ -141,6 +142,10 @@ SItypes = Base.BitSigned_types
         R = sizeof(S) < sizeof(Int) ? Int : S
         @test promote(R(3), T(3)) === (sizeof(R) < sizeof(T) ? (T(3), T(3)) : (R(3), R(3)))
     end
+
+    for i in 1:length(UItypes)
+        @test promote(UItypes[i](3), SItypes[i](3)) === (UItypes[i](3), UItypes[i](3))
+    end
 end
 @testset "limiting conversions" begin
     for T in (Int8, Int16, Int32, Int64)
@@ -207,8 +212,8 @@ end
     end
 
     val2 = 0xabcd
-    @test 0x5e6d == bitrotate(val2, 3)
-    @test 0xb579 == bitrotate(val2, -3)
+    @test 0x5e6d === bitrotate(val2, 3)
+    @test 0xb579 === bitrotate(val2, -3)
 end
 
 @testset "widen/widemul" begin
@@ -236,12 +241,12 @@ end
     @test typeof(widen(Int64(-3))) == Int128
     @test typeof(widen(Int128(-3))) == BigInt
 
-    @test widemul(false, false) == false
-    @test widemul(false, 3) == 0
-    @test widemul(3, true) == widemul(true, 3) == 3
+    @test widemul(false, false) === false
+    @test widemul(false, 3) === 0
+    @test widemul(3, true) === widemul(true, 3) === 3
 
     let i=Int64(2)^63-1, k=widemul(i,i)
-        @test widemul(i,i)==85070591730234615847396907784232501249
+        @test widemul(i,i)===85070591730234615847396907784232501249
         j=div(k,2)
         @test div(k,j)==2
         j=div(k,5)
@@ -320,10 +325,16 @@ end
     end
 end
 
-@testset "issue #21092" begin
+@testset "Underscores in big_str" begin
     @test big"1_0_0_0" == BigInt(1000)
     @test_throws ArgumentError big"1_0_0_0_"
     @test_throws ArgumentError big"_1_0_0_0"
+
+    @test big"1_0.2_5" == BigFloat(10.25)
+    @test_throws ArgumentError big"_1_0.2_5"
+    @test_throws ArgumentError big"1_0.2_5_"
+    @test_throws ArgumentError big"1_0_.2_5"
+    @test_throws ArgumentError big"1_0._2_5"
 end
 
 # issue #26779
@@ -341,25 +352,28 @@ end
 @testset "rounding division" begin
     for x = -100:100
         for y = 1:100
-            for rnd in (RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp)
+            for rnd in (RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp, RoundFromZero)
                 @test div(x,y,rnd) == round(x/y,rnd)
                 @test div(x,-y,rnd) == round(x/-y,rnd)
             end
+            @test divrem(x,y,RoundFromZero) == (div(x,y,RoundFromZero), rem(x,y,RoundFromZero))
+            @test divrem(x,-y,RoundFromZero) == (div(x,-y,RoundFromZero), rem(x,-y,RoundFromZero))
         end
     end
-    for (a, b, nearest, away, up) in (
-            (3, 2, 2, 2, 2),
-            (5, 3, 2, 2, 2),
-            (-3, 2, -2, -2, -1),
-            (5, 2, 2, 3, 3),
-            (-5, 2, -2, -3, -2),
-            (-5, 3, -2, -2, -2),
-            (5, -3, -2, -2, -2))
+    for (a, b, nearest, away, up, from_zero) in (
+            (3, 2, 2, 2, 2, 2),
+            (5, 3, 2, 2, 2, 2),
+            (-3, 2, -2, -2, -1, -2),
+            (5, 2, 2, 3, 3, 3),
+            (-5, 2, -2, -3, -2, -3),
+            (-5, 3, -2, -2, -2, -2),
+            (5, -3, -2, -2, -2, -2))
         for sign in (+1, -1)
             (a, b) = (a*sign, b*sign)
-            @test div(a, b, RoundNearest) == nearest
-            @test div(a, b, RoundNearestTiesAway) == away
-            @test div(a, b, RoundNearestTiesUp) == up
+            @test div(a, b, RoundNearest) === nearest
+            @test div(a, b, RoundNearestTiesAway) === away
+            @test div(a, b, RoundNearestTiesUp) === up
+            @test div(a, b, RoundFromZero) === from_zero
         end
     end
 
@@ -367,10 +381,10 @@ end
     @test div(-typemax(Int64), typemax(Int64)-1, RoundNearest) == -1
     @test div(typemax(Int64), 2, RoundNearest) == 4611686018427387904
     @test div(-typemax(Int64), 2, RoundNearestTiesUp) == -4611686018427387903
-    @test div(typemax(Int)-2, typemax(Int), RoundNearest) == 1
+    @test div(typemax(Int)-2, typemax(Int), RoundNearest) === 1
 
     # Exhaustively test (U)Int8 to catch any overflow-style issues
-    for r in (RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp)
+    for r in (RoundNearest, RoundNearestTiesAway, RoundNearestTiesUp, RoundFromZero)
         for T in (UInt8, Int8)
             for x in typemin(T):typemax(T)
                 for y in typemin(T):typemax(T)
@@ -395,3 +409,34 @@ end
     @test bitreverse(Int64(456618293)) === Int64(-6012608040035942400)
     @test bitreverse(Int32(456618293)) === Int32(-1399919400)
 end
+
+@testset "min/max of datatype" begin
+    @test typemin(Int8) === Int8(-128)
+    @test typemin(UInt8) === UInt8(0)
+    @test typemin(Int16) === Int16(-32768)
+    @test typemin(UInt16) === UInt16(0)
+    @test typemin(Int32) === Int32(-2147483648)
+    @test typemin(UInt32) === UInt32(0)
+    @test typemin(Int64) === Int64(-9223372036854775808)
+    @test typemin(UInt64) === UInt64(0)
+    @test typemin(Int128) === Int128(-170141183460469231731687303715884105728)
+    @test typemin(UInt128) === UInt128(0)
+
+    @test typemax(Int8) === Int8(127)
+    @test typemax(UInt8) === UInt8(255)
+    @test typemax(Int16) === Int16(32767)
+    @test typemax(UInt16) === UInt16(65535)
+    @test typemax(Int32) === Int32(2147483647)
+    @test typemax(UInt32) === UInt32(4294967295)
+    @test typemax(Int64) === Int64(9223372036854775807)
+    @test typemax(UInt64) === UInt64(0xffffffffffffffff)
+    @test typemax(Int128) === Int128(170141183460469231731687303715884105727)
+    @test typemax(UInt128) === UInt128(0xffffffffffffffffffffffffffffffff)
+end
+
+@testset "BitIntegerType" begin
+    @test Int isa Base.BitIntegerType
+    @test Base.BitIntegerType === Union{
+        Type{ Int8}, Type{ Int16}, Type{ Int32}, Type{ Int64}, Type{ Int128},
+        Type{UInt8}, Type{UInt16}, Type{UInt32}, Type{UInt64}, Type{UInt128}}
+end
diff --git a/test/intfuncs.jl b/test/intfuncs.jl
index bf992d9f888772..cf7ae89ea1dd7e 100644
--- a/test/intfuncs.jl
+++ b/test/intfuncs.jl
@@ -2,6 +2,8 @@
 
 using Random
 
+is_effect_free(args...) = Core.Compiler.is_effect_free(Base.infer_effects(args...))
+
 @testset "gcd/lcm" begin
     # All Integer data types take different code paths -- test all
     # TODO: Test gcd and lcm for BigInt.
@@ -12,6 +14,7 @@ using Random
         @test gcd(T(0), T(15)) === T(15)
         @test gcd(T(15), T(0)) === T(15)
         if T <: Signed
+            @test gcd(T(-12)) === T(12)
             @test gcd(T(0), T(-15)) === T(15)
             @test gcd(T(-15), T(0)) === T(15)
             @test gcd(T(3), T(-15)) === T(3)
@@ -78,6 +81,7 @@ using Random
         @test lcm(T(0), T(3)) === T(0)
         @test lcm(T(0), T(0)) === T(0)
         if T <: Signed
+            @test lcm(T(-12)) === T(12)
             @test lcm(T(0), T(-4)) === T(0)
             @test lcm(T(-4), T(0)) === T(0)
             @test lcm(T(4), T(-6)) === T(12)
@@ -144,6 +148,11 @@ using Random
     @test gcd(0xf, 20) == 5
     @test gcd(UInt32(6), Int8(-50)) == 2
     @test gcd(typemax(UInt), -16) == 1
+
+    @testset "effects" begin
+        @test is_effect_free(gcd, Tuple{Int,Int})
+        @test is_effect_free(lcm, Tuple{Int,Int})
+    end
 end
 
 @testset "gcd/lcm for arrays" begin
@@ -154,6 +163,7 @@ end
         @test gcd(T[3, 15]) === T(3)
         @test gcd(T[0, 15]) === T(15)
         if T <: Signed
+            @test gcd(T[-12]) === T(12)
             @test gcd(T[3,-15]) === T(3)
             @test gcd(T[-3,-15]) === T(3)
         end
@@ -163,12 +173,12 @@ end
         @test gcd(T[2, 4, 3, 5]) === T(1)
 
         @test lcm(T[]) === T(1)
-        @test lcm(T[2]) === T(2)
         @test lcm(T[2, 3]) === T(6)
         @test lcm(T[4, 6]) === T(12)
         @test lcm(T[3, 0]) === T(0)
         @test lcm(T[0, 0]) === T(0)
         if T <: Signed
+            @test lcm(T[-2]) === T(2)
             @test lcm(T[4, -6]) === T(12)
             @test lcm(T[-4, -6]) === T(12)
         end
@@ -204,14 +214,38 @@ end
 end
 
 @testset "invmod" begin
-    @test invmod(6, 31) == 26
-    @test invmod(-1, 3) == 2
-    @test invmod(1, -3) == -2
-    @test invmod(-1, -3) == -1
-    @test invmod(0x2, 0x3) == 2
-    @test invmod(2, 0x3) == 2
-    @test invmod(0x8, -3) == -1
+    @test invmod(6, 31) === 26
+    @test invmod(-1, 3) === 2
+    @test invmod(1, -3) === -2
+    @test invmod(-1, -3) === -1
+    @test invmod(0x2, 0x3) === 0x2
+    @test invmod(2, 0x3) === UInt(2)
+    @test invmod(0x8, -3) === -1
     @test_throws DomainError invmod(0, 3)
+
+    # For issue 29971
+    @test invmod(UInt8(1), typemax(UInt8))  === 0x01
+    @test invmod(UInt16(1), typemax(UInt16)) === 0x0001
+    @test invmod(UInt32(1), typemax(UInt32)) === 0x0000_0001
+    @test invmod(UInt64(1), typemax(UInt64)) === 0x0000_0000_0000_0001
+
+    for T in (UInt8, UInt16, UInt32, UInt64, UInt128, Int8, Int16, Int32, Int64, Int128, BigInt)
+        @test invmod(T(3), T(124))::T == 83
+    end
+
+    for T in (Int8, UInt8)
+        for x in typemin(T):typemax(T)
+            for m in typemin(T):typemax(T)
+                if m != 0 && try gcdx(x, m)[1] == 1 catch _ true end
+                    y = invmod(x, m)
+                    @test mod(widemul(y, x), m) == mod(1, m)
+                    @test div(y, m) == 0
+                else
+                    @test_throws DomainError invmod(x, m)
+                end
+            end
+        end
+    end
 end
 
 @testset "powermod" begin
@@ -237,6 +271,9 @@ end
     @test prevpow(2, 3) == 2
     @test prevpow(2, 4) == 4
     @test prevpow(2, 5) == 4
+    @test prevpow(Int64(10), Int64(1234567890123456789)) === Int64(1000000000000000000)
+    @test prevpow(10, 101.0) === 100
+    @test prevpow(10.0, 101) === 100.0
     @test_throws DomainError prevpow(0, 3)
     @test_throws DomainError prevpow(0, 3)
 end
@@ -298,12 +335,15 @@ end
 
 end
 
+primitive type BitString128 128 end
+
 @testset "bin/oct/dec/hex/bits" begin
     @test string(UInt32('3'), base = 2) == "110011"
     @test string(UInt32('3'), pad = 7, base = 2) == "0110011"
     @test string(3, base = 2) == "11"
     @test string(3, pad = 2, base = 2) == "11"
     @test string(3, pad = Int32(2), base = Int32(2)) == "11"
+    @test string(3, pad = typemin(Int128) + 3, base = 0x2) == "11"
     @test string(3, pad = 3, base = 2) == "011"
     @test string(-3, base = 2) == "-11"
     @test string(-3, pad = 3, base = 2) == "-011"
@@ -328,6 +368,7 @@ end
     @test bitstring(1035) == (Int == Int32 ? "00000000000000000000010000001011" :
         "0000000000000000000000000000000000000000000000000000010000001011")
     @test bitstring(Int128(3)) == "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011"
+    @test bitstring(reinterpret(BitString128, Int128(3))) == "00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000011"
 end
 
 @testset "digits/base" begin
@@ -338,6 +379,8 @@ end
     @test digits(-3, base = 2) == -[1, 1]
     @test digits(-42, base = 4) == -[2, 2, 2]
 
+    @test_throws DomainError string(5, base = typemin(Int128) + 10)
+
     @testset "digits/base with bases powers of 2" begin
         @test digits(4, base = 2) == [0, 0, 1]
         @test digits(5, base = Int32(2), pad=Int32(3)) == [1, 0, 1]
@@ -446,3 +489,25 @@ end
 for b in [-100:-2; 2:100;]
     @test Base.ndigits0z(0, b) == 0
 end
+
+@testset "constant prop in gcd" begin
+    ci = code_typed(() -> gcd(14, 21))[][1]
+    @test ci.code == Any[Core.ReturnNode(7)]
+
+    ci = code_typed(() -> 14 // 21)[][1]
+    @test ci.code == Any[Core.ReturnNode(2 // 3)]
+end
+@testset "binomial" begin
+    for T in (Int8, Int16, Int32, Int64)
+        for x in rand(-isqrt(typemax(T)):isqrt(typemax(T)), 1000)
+            @test binomial(x,T(1)) == x
+            x>=0 && @test binomial(x,x-T(1)) == x
+            @test binomial(x,T(2)) == div(x*(x-1), 2)
+            x>=0 && @test binomial(x,x-T(2)) == div(x*(x-1), 2)
+        end
+        @test @inferred(binomial(one(T),one(T))) isa T
+    end
+    for x in ((false,false), (false,true), (true,false), (true,true))
+        @test binomial(x...) == (x != (false,true))
+    end
+end
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index a5f3308c68639c..2f2ef0cd505d54 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -13,8 +13,7 @@ include("testenv.jl")
 @testset "runtime intrinsics" begin
     @test Core.Intrinsics.add_int(1, 1) == 2
     @test Core.Intrinsics.sub_int(1, 1) == 0
-    @test_throws ErrorException("fpext: output bitsize must be > input bitsize")    Core.Intrinsics.fpext(Int32, 0x0000_0000)
-    @test_throws ErrorException("fpext: output bitsize must be > input bitsize")    Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000)
+    @test_throws ErrorException("fpext: output bitsize must be >= input bitsize")    Core.Intrinsics.fpext(Int32, 0x0000_0000_0000_0000)
     @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Int32, 0x0000_0000)
     @test_throws ErrorException("fptrunc: output bitsize must be < input bitsize")  Core.Intrinsics.fptrunc(Int64, 0x0000_0000)
     @test_throws ErrorException("ZExt: output bitsize must be > input bitsize")     Core.Intrinsics.zext_int(Int8, 0x00)
@@ -101,8 +100,188 @@ let f = Core.Intrinsics.ashr_int
     @test f(Int32(2), -1) == 0
 end
 
+const ReplaceType = ccall(:jl_apply_cmpswap_type, Any, (Any,), T) where T
+
 # issue #29929
-@test unsafe_store!(Ptr{Nothing}(C_NULL), nothing) === Ptr{Nothing}(0)
-@test unsafe_load(Ptr{Nothing}(0)) === nothing
+let p = Ptr{Nothing}(0)
+    @test unsafe_store!(p, nothing) === C_NULL
+    @test unsafe_load(p) === nothing
+    @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === nothing
+    @test Core.Intrinsics.atomic_pointerset(p, nothing, :sequentially_consistent) === p
+    @test Core.Intrinsics.atomic_pointerswap(p, nothing, :sequentially_consistent) === nothing
+    @test Core.Intrinsics.atomic_pointermodify(p, (i, j) -> j, nothing, :sequentially_consistent) === Pair(nothing, nothing)
+    @test Core.Intrinsics.atomic_pointerreplace(p, nothing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, true))
+    @test Core.Intrinsics.atomic_pointerreplace(p, missing, nothing, :sequentially_consistent, :sequentially_consistent) === ReplaceType{Nothing}((nothing, false))
+end
+
 struct GhostStruct end
 @test unsafe_load(Ptr{GhostStruct}(rand(Int))) === GhostStruct()
+
+# macro to verify and compare the compiled output of an intrinsic with its runtime version
+macro test_intrinsic(intr, args...)
+    output = args[end]
+    inputs = args[1:end-1]
+    quote
+        function f()
+            $intr($(inputs...))
+        end
+        @test f() === Base.invokelatest($intr, $(inputs...))
+        @test f() == $output
+    end
+end
+
+@testset "Float16 intrinsics" begin
+    # unary
+    @test_intrinsic Core.Intrinsics.neg_float Float16(3.3) Float16(-3.3)
+    @test_intrinsic Core.Intrinsics.fpext Float32 Float16(3.3) 3.3007812f0
+    @test_intrinsic Core.Intrinsics.fpext Float64 Float16(3.3) 3.30078125
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 Float32(3.3) Float16(3.3)
+    @test_intrinsic Core.Intrinsics.fptrunc Float16 Float64(3.3) Float16(3.3)
+
+    # binary
+    @test_intrinsic Core.Intrinsics.add_float Float16(3.3) Float16(2) Float16(5.3)
+    @test_intrinsic Core.Intrinsics.sub_float Float16(3.3) Float16(2) Float16(1.301)
+    @test_intrinsic Core.Intrinsics.mul_float Float16(3.3) Float16(2) Float16(6.6)
+    @test_intrinsic Core.Intrinsics.div_float Float16(3.3) Float16(2) Float16(1.65)
+    @test_intrinsic Core.Intrinsics.rem_float Float16(3.3) Float16(2) Float16(1.301)
+
+    # ternary
+    @test_intrinsic Core.Intrinsics.fma_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02)
+    @test_intrinsic Core.Intrinsics.muladd_float Float16(3.3) Float16(4.4) Float16(5.5) Float16(20.02)
+
+    # boolean
+    @test_intrinsic Core.Intrinsics.eq_float Float16(3.3) Float16(3.3) true
+    @test_intrinsic Core.Intrinsics.eq_float Float16(3.3) Float16(2) false
+    @test_intrinsic Core.Intrinsics.ne_float Float16(3.3) Float16(3.3) false
+    @test_intrinsic Core.Intrinsics.ne_float Float16(3.3) Float16(2) true
+    @test_intrinsic Core.Intrinsics.le_float Float16(3.3) Float16(3.3) true
+    @test_intrinsic Core.Intrinsics.le_float Float16(3.3) Float16(2) false
+
+    # conversions
+    @test_intrinsic Core.Intrinsics.sitofp Float16 3 Float16(3f0)
+    @test_intrinsic Core.Intrinsics.uitofp Float16 UInt(3) Float16(3f0)
+    @test_intrinsic Core.Intrinsics.fptosi Int Float16(3.3) 3
+    @test_intrinsic Core.Intrinsics.fptoui UInt Float16(3.3) UInt(3)
+end
+
+using Base.Experimental: @force_compile
+@test_throws ConcurrencyViolationError("invalid atomic ordering") (@force_compile; Core.Intrinsics.atomic_fence(:u)) === nothing
+@test_throws ConcurrencyViolationError("invalid atomic ordering") (@force_compile; Core.Intrinsics.atomic_fence(Symbol("u", "x"))) === nothing
+@test_throws ConcurrencyViolationError("invalid atomic ordering") Core.Intrinsics.atomic_fence(Symbol("u", "x")) === nothing
+for order in (:not_atomic, :monotonic, :acquire, :release, :acquire_release, :sequentially_consistent)
+    @test Core.Intrinsics.atomic_fence(order) === nothing
+    @test (order -> Core.Intrinsics.atomic_fence(order))(order) === nothing
+    @test Base.invokelatest(@eval () -> Core.Intrinsics.atomic_fence($(QuoteNode(order)))) === nothing
+end
+@test Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent) == nothing
+@test (@force_compile; Core.Intrinsics.atomic_pointerref(C_NULL, :sequentially_consistent)) == nothing
+
+primitive type Int256 <: Signed 256 end
+Int256(i::Int) = Core.Intrinsics.sext_int(Int256, i)
+primitive type Int512 <: Signed 512 end
+Int512(i::Int) = Core.Intrinsics.sext_int(Int512, i)
+function add(i::T, j)::T where {T}; return i + j; end
+swap(i, j) = j
+
+for TT in (Int8, Int16, Int32, Int64, Int128, Int256, Int512, Complex{Int32}, Complex{Int512}, Any)
+    r = Ref{TT}(10)
+    GC.@preserve r begin
+        (function (::Type{TT}) where TT
+            p = Base.unsafe_convert(Ptr{TT}, r)
+            T(x) = convert(TT, x)
+            S = UInt32
+            if TT !== Any
+                @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
+                @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
+                @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent)
+            end
+            @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
+            if sizeof(r) > 8
+                @test_throws ErrorException("atomic_pointerref: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerset: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerswap: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerswap(p, T(100), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointermodify: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, T(100), T(2), :sequentially_consistent, :sequentially_consistent)
+                @test_throws ErrorException("atomic_pointerreplace: invalid pointer for atomic operation") Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
+                @test Core.Intrinsics.pointerref(p, 1, 1) === T(10) === r[]
+            else
+                TT !== Any && @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(10)
+                @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(1)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(1), true))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(100), false))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(100)
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(100), T(101))
+                @test Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent) === Pair{TT,TT}(T(101), T(102))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(102)
+                @test Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent) === T(102)
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((T(103), false))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(103)
+            end
+            if TT === Any
+                @test Core.Intrinsics.atomic_pointermodify(p, swap, S(103), :sequentially_consistent) === Pair{TT,TT}(T(103), S(103))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === S(103)
+                @test Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent) === p
+                @test Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent) === S(1)
+                @test Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), false))
+                @test Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent) === ReplaceType{TT}((S(100), true))
+                @test Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent) === T(2)
+            end
+        end)(TT,)
+    end
+end
+
+mutable struct IntWrap <: Signed
+    x::Int
+end
+Base.:(+)(a::IntWrap, b::Int) = IntWrap(a.x + b)
+Base.:(+)(a::IntWrap, b::IntWrap) = IntWrap(a.x + b.x)
+Base.show(io::IO, a::IntWrap) = print(io, "IntWrap(", a.x, ")")
+(function()
+    TT = IntWrap
+    T(x) = convert(TT, x)
+    r = Ref{TT}(10)
+    p = Base.unsafe_convert(Ptr{TT}, r)
+    GC.@preserve r begin
+        S = UInt32
+        @test_throws TypeError Core.Intrinsics.atomic_pointerset(p, S(1), :sequentially_consistent)
+        @test_throws TypeError Core.Intrinsics.atomic_pointerswap(p, S(100), :sequentially_consistent)
+        @test_throws TypeError Core.Intrinsics.atomic_pointerreplace(p, T(100), S(2), :sequentially_consistent, :sequentially_consistent)
+        r2 = Core.Intrinsics.pointerref(p, 1, 1)
+        @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[]
+        @test_throws TypeError Core.Intrinsics.atomic_pointermodify(p, swap, S(1), :sequentially_consistent)
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 10 === r[].x && r2 !== r[]
+        @test Core.Intrinsics.atomic_pointerset(p, T(1), :sequentially_consistent) === p
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 1 === r[].x && r2 !== r[]
+        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(100), :sequentially_consistent, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 1 && r[].x === 100 && r2 !== r[]
+        @test succ
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
+        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, T(1), T(1), :sequentially_consistent, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
+        @test !succ
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 === r[].x && r2 !== r[]
+        r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 100 !== r[].x && r2 !== r[]
+        @test r3 isa IntWrap && r3.x === 101 === r[].x && r3 !== r[]
+        r2, r3 = Core.Intrinsics.atomic_pointermodify(p, add, T(1), :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 101 !== r[].x && r2 !== r[]
+        @test r3 isa IntWrap && r3.x === 102 === r[].x && r3 !== r[]
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 102 === r[].x && r2 !== r[]
+        r2 = Core.Intrinsics.atomic_pointerswap(p, T(103), :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 102 !== r[].x && r[].x == 103 && r2 !== r[]
+        r2, succ = Core.Intrinsics.atomic_pointerreplace(p, S(100), T(2), :sequentially_consistent, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
+        @test !succ
+        r2 = Core.Intrinsics.atomic_pointerref(p, :sequentially_consistent)
+        @test r2 isa IntWrap && r2.x === 103 === r[].x && r2 !== r[]
+    end
+end)()
diff --git a/test/iobuffer.jl b/test/iobuffer.jl
index 1d781d0a1b8744..d8211aa7086b34 100644
--- a/test/iobuffer.jl
+++ b/test/iobuffer.jl
@@ -9,7 +9,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
 @testset "Read/write empty IOBuffer" begin
     io = IOBuffer()
     @test eof(io)
-    @test_throws EOFError read(io,UInt8)
+    @test_throws EOFError read(io, UInt8)
     @test write(io,"abc") === 3
     @test isreadable(io)
     @test iswritable(io)
@@ -18,7 +18,7 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     @test position(io) == 3
     @test eof(io)
     seek(io, 0)
-    @test read(io,UInt8) == convert(UInt8, 'a')
+    @test read(io, UInt8) == convert(UInt8, 'a')
     a = Vector{UInt8}(undef, 2)
     @test read!(io, a) == a
     @test a == UInt8['b','c']
@@ -34,22 +34,24 @@ bufcontents(io::Base.GenericIOBuffer) = unsafe_string(pointer(io.data), io.size)
     truncate(io, 10)
     @test position(io) == 0
     @test all(io.data .== 0)
-    @test write(io,Int16[1,2,3,4,5,6]) === 12
+    @test write(io, Int16[1, 2, 3, 4, 5, 6]) === 12
     seek(io, 2)
     truncate(io, 10)
     @test ioslength(io) == 10
     io.readable = false
-    @test_throws ArgumentError read!(io,UInt8[0])
+    @test_throws ArgumentError read!(io, UInt8[0])
     truncate(io, 0)
     @test write(io,"boston\ncambridge\n") > 0
     @test String(take!(io)) == "boston\ncambridge\n"
     @test String(take!(io)) == ""
     @test write(io, ComplexF64(0)) === 16
     @test write(io, Rational{Int64}(1//2)) === 16
-    close(io)
-    @test_throws ArgumentError write(io,UInt8[0])
-    @test_throws ArgumentError seek(io,0)
+    @test closewrite(io) === nothing
+    @test_throws ArgumentError write(io, UInt8[0])
     @test eof(io)
+    @test close(io) === nothing
+    @test_throws ArgumentError write(io, UInt8[0])
+    @test_throws ArgumentError seek(io, 0)
 end
 
 @testset "Read/write readonly IOBuffer" begin
@@ -237,7 +239,7 @@ end
     @test isreadable(bstream)
     @test iswritable(bstream)
     @test bytesavailable(bstream) == 0
-    @test sprint(show, bstream) == "BufferStream() bytes waiting:$(bytesavailable(bstream.buffer)), isopen:true"
+    @test sprint(show, bstream) == "BufferStream(bytes waiting=$(bytesavailable(bstream.buffer)), isopen=true)"
     a = rand(UInt8,10)
     write(bstream,a)
     @test !eof(bstream)
@@ -251,9 +253,10 @@ end
     @test !eof(bstream)
     read!(bstream,c)
     @test c == a[3:10]
-    @test close(bstream) === nothing
+    @test closewrite(bstream) === nothing
     @test eof(bstream)
     @test bytesavailable(bstream) == 0
+    @test close(bstream) === nothing
     flag = Ref{Bool}(false)
     event = Base.Event()
     bstream = Base.BufferStream()
@@ -341,3 +344,7 @@ end
     @test peek(io, Int32) == -476872221
     close(io)
 end
+
+@testset "bytesavailable devnull" begin
+    @test bytesavailable(devnull) == 0
+end
diff --git a/test/iostream.jl b/test/iostream.jl
index e49386913a1205..bc4751fb1fca7f 100644
--- a/test/iostream.jl
+++ b/test/iostream.jl
@@ -93,6 +93,7 @@ end
         #  with resizing of b
         b = view(UInt8[0, 0, 0], 1:0)
         @test_throws MethodError readbytes!(file, b, 2)
+        @test !islocked(file.lock) # Issue #37218
         @test isempty(b)
     end
 end
diff --git a/test/iterators.jl b/test/iterators.jl
index b9bec84bf9a581..554e120d94fd61 100644
--- a/test/iterators.jl
+++ b/test/iterators.jl
@@ -2,6 +2,8 @@
 
 using Base.Iterators
 using Random
+using Base: IdentityUnitRange
+using Dates: Date, Day
 
 @test Base.IteratorSize(Any) isa Base.SizeUnknown
 
@@ -100,6 +102,7 @@ end
 @test length(zip(cycle(1:3), 1:7, cycle(1:3))) == 7
 @test length(zip(1:3,product(1:7,cycle(1:3)))) == 3
 @test length(zip(1:3,product(1:7,cycle(1:3)),8)) == 1
+@test_throws ArgumentError length(zip()) # length of zip of empty tuple
 
 # map
 # ----
@@ -122,7 +125,7 @@ end
 
 # countfrom
 # ---------
-let i = 0, k = 1
+let i = 0, k = 1, l = 0
     for j = countfrom(0, 2)
         @test j == i*2
         i += 1
@@ -133,6 +136,15 @@ let i = 0, k = 1
         k += 1
         k <= 10 || break
     end
+    # test that `start` promotes to `typeof(start+step)`
+    for j = countfrom(Int[0, 0], Float64[1.0, 2.0])
+        @test j isa Vector{Float64}
+        @test j == l*[1, 2]
+        l += 1
+        l <= 10 || break
+    end
+    # test with `start` and `step` having different types
+    @test collect(take(countfrom(Date(2020,12,25), Day(1)), 12)) == range(Date(2020,12,25), step=Day(1), length=12)
 end
 
 # take
@@ -177,6 +189,13 @@ end
 @test length(drop(1:3,typemax(Int))) == 0
 @test Base.IteratorSize(drop(countfrom(1),3)) == Base.IsInfinite()
 @test_throws MethodError length(drop(countfrom(1), 3))
+@test Base.IteratorSize(Iterators.drop(Iterators.filter(i -> i>0, 1:10), 2)) == Base.SizeUnknown()
+
+let x = Iterators.drop(Iterators.Stateful("abc"), 2)
+    @test !Base.isdone(x, nothing)
+    iterate(x)
+    @test Base.isdone(x, nothing)
+end
 
 # double take
 # and take/drop canonicalization
@@ -198,9 +217,10 @@ end
     @test collect(takewhile(<(4),1:10)) == [1,2,3]
     @test collect(takewhile(<(4),Iterators.countfrom(1))) == [1,2,3]
     @test collect(takewhile(<(4),5:10)) == []
-    @test collect(takewhile(_->true,5:10)) == 5:10
+    @test collect(takewhile(Returns(true),5:10)) == 5:10
     @test collect(takewhile(isodd,[1,1,2,3])) == [1,1]
     @test collect(takewhile(<(2), takewhile(<(3), [1,1,2,3]))) == [1,1]
+    @test Base.IteratorEltype(typeof(takewhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
 end
 
 # dropwhile
@@ -209,10 +229,11 @@ end
     @test collect(dropwhile(<(4), 1:10)) == 4:10
     @test collect(dropwhile(<(4), 1:10)) isa Vector{Int}
     @test isempty(dropwhile(<(4), []))
-    @test collect(dropwhile(_->false,1:3)) == 1:3
-    @test isempty(dropwhile(_->true, 1:3))
+    @test collect(dropwhile(Returns(false),1:3)) == 1:3
+    @test isempty(dropwhile(Returns(true), 1:3))
     @test collect(dropwhile(isodd,[1,1,2,3])) == [2,3]
     @test collect(dropwhile(iseven,dropwhile(isodd,[1,1,2,3]))) == [3]
+    @test Base.IteratorEltype(typeof(dropwhile(<(4),Iterators.map(identity, 1:10)))) isa Base.EltypeUnknown
 end
 
 # cycle
@@ -223,6 +244,8 @@ let i = 0
         i += 1
         i <= 10 || break
     end
+    @test Base.isdone(cycle(0:3)) === Base.isdone(0:3) === missing
+    @test !Base.isdone(cycle(0:3), 1)
 end
 
 # repeated
@@ -291,6 +314,18 @@ let (a, b) = (1:3, [4 6;
     end
 end
 
+# collect stateful iterator
+let itr
+    itr = Iterators.Stateful(Iterators.map(identity, 1:5))
+    @test collect(itr) == 1:5
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+    itr = (i+1 for i in Base.Stateful([1, 2, 3]))
+    @test collect(itr) == [2, 3, 4]
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+    itr = (i-1 for i in Base.Stateful(zeros(Int, 0, 0)))
+    @test collect(itr) == Int[] # Stateful do not preserve shape
+end
+
 # with 1D inputs
 let a = 1:2,
     b = 1.0:10.0,
@@ -381,7 +416,7 @@ let a = 1:2,
     end
 
     # size infinite or unknown raises an error
-    for itr in Any[countfrom(1), Iterators.filter(i->0, 1:10)]
+    for itr in Any[countfrom(1), Iterators.filter(Returns(0), 1:10)]
         @test_throws ArgumentError length(product(itr))
         @test_throws ArgumentError   size(product(itr))
         @test_throws ArgumentError  ndims(product(itr))
@@ -405,6 +440,10 @@ end
 @test Base.IteratorSize(product(take(1:2, 1), take(1:2, 1))) == Base.HasShape{2}()
 @test Base.IteratorSize(product(take(1:2, 2)))               == Base.HasShape{1}()
 @test Base.IteratorSize(product([1 2; 3 4]))                 == Base.HasShape{2}()
+@test Base.IteratorSize(product((1,2,3,4), (5, 6, 7, 8)))    == Base.HasShape{2}()  # product of ::HasLength and ::HasLength
+@test Base.IteratorSize(product(1:2, 3:5, 5:6))              == Base.HasShape{3}()  # product of 3 iterators
+@test Base.IteratorSize(product([1 2; 3 4], 1:4))            == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasShape{1}
+@test Base.IteratorSize(product([1 2; 3 4], (1,2)))          == Base.HasShape{3}()  # product of ::HasShape{2} with ::HasLength
 
 # IteratorEltype trait business
 let f1 = Iterators.filter(i->i>0, 1:10)
@@ -422,12 +461,20 @@ end
 @test Base.IteratorEltype(product(take(1:2, 1), take(1:2, 1))) == Base.HasEltype()
 @test Base.IteratorEltype(product(take(1:2, 2)))               == Base.HasEltype()
 @test Base.IteratorEltype(product([1 2; 3 4]))                 == Base.HasEltype()
+@test Base.IteratorEltype(product())                           == Base.HasEltype()
 
 @test collect(product(1:2,3:4)) == [(1,3) (1,4); (2,3) (2,4)]
 @test isempty(collect(product(1:0,1:2)))
 @test length(product(1:2,1:10,4:6)) == 60
 @test Base.IteratorSize(product(1:2, countfrom(1))) == Base.IsInfinite()
 
+@test Base.iterate(product()) == ((), true)
+@test Base.iterate(product(), 1) == nothing
+
+# intersection
+@test intersect(product(1:3, 4:6), product(2:4, 3:5)) == Iterators.ProductIterator((2:3, 4:5))
+@test intersect(product(1:3, [4 5 ; 6 7]), product(2:4, [7 6 ; 5 4])).iterators == (2:3, [4, 6, 5, 7])
+
 # flatten
 # -------
 @test collect(flatten(Any[1:2, 4:5])) == Any[1,2,4,5]
@@ -443,17 +490,46 @@ end
 @test_throws ArgumentError length(flatten(NTuple[(1,), ()])) # #16680
 @test_throws ArgumentError length(flatten([[1], [1]]))
 
+@testset "IteratorSize trait for flatten" begin
+    @test Base.IteratorSize(Base.Flatten((i for i=1:2) for j=1:1)) == Base.SizeUnknown()
+    @test Base.IteratorSize(Base.Flatten((1,2))) == Base.HasLength()
+    @test Base.IteratorSize(Base.Flatten(1:2:4)) == Base.HasLength()
+end
+
 @test Base.IteratorEltype(Base.Flatten((i for i=1:2) for j=1:1)) == Base.EltypeUnknown()
 # see #29112, #29464, #29548
 @test Base.return_types(Base.IteratorEltype, Tuple{Array}) == [Base.HasEltype]
 
+# flatmap
+# -------
+@test flatmap(1:3) do j flatmap(1:3) do k
+    j!=k ? ((j,k),) : ()
+end end |> collect == [(j,k) for j in 1:3 for k in 1:3 if j!=k]
+# Test inspired by the monad associativity law
+fmf(x) = x<0 ? () : (x^2,)
+fmg(x) = x<1 ? () : (x/2,)
+fmdata = -2:0.75:2
+fmv1 = flatmap(tuple.(fmdata)) do h
+    flatmap(h) do x
+        gx = fmg(x)
+        flatmap(gx) do x
+            fmf(x)
+        end
+    end
+end
+fmv2 = flatmap(tuple.(fmdata)) do h
+    gh = flatmap(h) do x fmg(x) end
+    flatmap(gh) do x fmf(x) end
+end
+@test all(fmv1 .== fmv2)
+
 # partition(c, n)
 let v = collect(partition([1,2,3,4,5], 1))
     @test all(i->v[i][1] == i, v)
 end
 
 let v1 = collect(partition([1,2,3,4,5], 2)),
-    v2 = collect(partition(flatten([[1,2],[3,4],5]), 2)) # collecting partition with SizeUnkown
+    v2 = collect(partition(flatten([[1,2],[3,4],5]), 2)) # collecting partition with SizeUnknown
     @test v1[1] == v2[1] == [1,2]
     @test v1[2] == v2[2] == [3,4]
     @test v1[3] == v2[3] == [5]
@@ -525,12 +601,15 @@ end
                                                          (1,1), (8,8), (11, 13),
                                                          (1,1,1), (8, 4, 2), (11, 13, 17)),
                                                 part in (1, 7, 8, 11, 63, 64, 65, 142, 143, 144)
-    P = partition(CartesianIndices(dims), part)
-    for I in P
-        @test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
-        @test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
+    for fun in (i -> 1:i, i -> 1:2:2i, i -> Base.IdentityUnitRange(-i:i))
+        iter = CartesianIndices(map(fun, dims))
+        P = partition(iter, part)
+        for I in P
+            @test length(I) == iterate_length(I) == simd_iterate_length(I) == simd_trip_count(I)
+            @test collect(I) == iterate_elements(I) == simd_iterate_elements(I) == index_elements(I)
+        end
+        @test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), iter))
     end
-    @test all(Base.splat(==), zip(Iterators.flatten(map(collect, P)), CartesianIndices(dims)))
 end
 @testset "empty/invalid partitions" begin
     @test_throws ArgumentError partition(1:10, 0)
@@ -592,7 +671,7 @@ end
 end
 
 @testset "filter empty iterable #16704" begin
-    arr = filter(n -> true, 1:0)
+    arr = filter(Returns(true), 1:0)
     @test length(arr) == 0
     @test eltype(arr) == Int
 end
@@ -618,7 +697,7 @@ end
         @test isempty(d) || haskey(d, first(keys(d)))
         @test collect(v for (k, v) in d) == collect(A)
         if A isa NamedTuple
-            K = isempty(d) ? Union{} : Symbol
+            K = Symbol
             V = isempty(d) ? Union{} : Float64
             @test isempty(d) || haskey(d, :a)
             @test !haskey(d, :abc)
@@ -662,17 +741,26 @@ end
     for itr in (2:10, "∀ϵ>0", 1:0, "", (2,3,5,7,11), [2,3,5,7,11], rand(5,6), Z, 3, true, 'x', 4=>5,
                 eachindex("∀ϵ>0"), view(Z), view(rand(5,6),2:4,2:6), (x^2 for x in 1:10),
                 Iterators.Filter(isodd, 1:10), flatten((1:10, 50:60)), enumerate("foo"),
-                pairs(50:60), zip(1:10,21:30,51:60), product(1:3, 10:12), repeated(3.14159, 5))
-        @test squash(collect(Iterators.reverse(itr))) == reverse(squash(collect(itr)))
+                pairs(50:60), zip(1:10,21:30,51:60), product(1:3, 10:12), repeated(3.14159, 5),
+                (a=2, b=3, c=5, d=7, e=11))
+        arr = reverse(squash(collect(itr)))
+        itr = Iterators.reverse(itr)
+        @test squash(collect(itr)) == arr
+        if !isempty(arr)
+            @test first(itr) == first(arr)
+            @test last(itr) == last(arr)
+        end
     end
     @test collect(take(Iterators.reverse(cycle(1:3)), 7)) == collect(take(cycle(3:-1:1), 7))
     let r = repeated(3.14159)
         @test Iterators.reverse(r) === r
+        @test last(r) === 3.14159
     end
-    let t = (2,3,5,7,11)
+    for t in [(1,), (2, 3, 5, 7, 11), (a=1,), (a=2, b=3, c=5, d=7, e=11)]
         @test Iterators.reverse(Iterators.reverse(t)) === t
         @test first(Iterators.reverse(t)) === last(t)
         @test last(Iterators.reverse(t)) === first(t)
+        @test collect(Iterators.reverse(t)) == reverse(collect(t))
     end
 end
 
@@ -682,6 +770,7 @@ end
         @test popfirst!(a) == 'a'
         @test collect(Iterators.take(a, 3)) == ['b','c','d']
         @test collect(a) == ['e', 'f']
+        @test_throws EOFError popfirst!(a) # trying to pop from an empty stateful iterator.
     end
     let a = @inferred(Iterators.Stateful([1, 1, 1, 2, 3, 4]))
         for x in a; x == 1 || break; end
@@ -691,10 +780,10 @@ end
     @test eltype(Iterators.Stateful("a")) == Char
     # Interaction of zip/Stateful
     let a = Iterators.Stateful("a"), b = ""
-	@test isempty(collect(zip(a,b)))
-	@test !isempty(a)
-	@test isempty(collect(zip(b,a)))
-	@test !isempty(a)
+    @test isempty(collect(zip(a,b)))
+    @test !isempty(a)
+    @test isempty(collect(zip(b,a)))
+    @test !isempty(a)
     end
     let a = Iterators.Stateful("a"), b = "", c = Iterators.Stateful("c")
         @test isempty(collect(zip(a,b,c)))
@@ -796,6 +885,8 @@ end
     @test_throws ArgumentError only([])
     @test_throws ArgumentError only([3, 2])
 
+    @test only(fill(42)) === 42 # zero dimensional array containing a single value.
+
     @test @inferred(only((3,))) === 3
     @test_throws ArgumentError only(())
     @test_throws ArgumentError only((3, 2))
@@ -848,3 +939,45 @@ end
     @test cumprod(x + 1 for x in 1:3) == [2, 6, 24]
     @test accumulate(+, (x^2 for x in 1:3); init=100) == [101, 105, 114]
 end
+
+
+@testset "Iterators.tail_if_any" begin
+    @test Iterators.tail_if_any(()) == ()
+    @test Iterators.tail_if_any((1, 2)) == (2,)
+    @test Iterators.tail_if_any((1,)) == ()
+end
+
+@testset "IteratorSize trait for zip" begin
+    @test Base.IteratorSize(zip()) == Base.IsInfinite()                     # for zip of empty tuple
+    @test Base.IteratorSize(zip((1,2,3), repeated(0))) == Base.HasLength()  # for zip of ::HasLength and ::IsInfinite
+    @test Base.IteratorSize(zip( 1:5, repeated(0) )) == Base.HasLength()    # for zip of ::HasShape and ::IsInfinite
+    @test Base.IteratorSize(zip(repeated(0), (1,2,3))) == Base.HasLength()  # for zip of ::IsInfinite and ::HasLength
+    @test Base.IteratorSize(zip(repeated(0), 1:5 )) == Base.HasLength()     # for zip of ::IsInfinite and ::HasShape
+    @test Base.IteratorSize(zip((1,2,3), 1:5) ) == Base.HasLength()         # for zip of ::HasLength and ::HasShape
+    @test Base.IteratorSize(zip(1:5, (1,2,3)) ) == Base.HasLength()         # for zip of ::HasShape and ::HasLength
+end
+
+@testset "proper patition for non-1-indexed vector" begin
+    @test partition(IdentityUnitRange(11:19), 5) |> collect == [11:15,16:19] # IdentityUnitRange
+end
+
+@testset "Iterators.peel" begin
+    @test Iterators.peel([]) == nothing
+    @test Iterators.peel(1:10)[1] == 1
+    @test Iterators.peel(1:10)[2] |> collect == 2:10
+    @test Iterators.peel(x^2 for x in 2:4)[1] == 4
+    @test Iterators.peel(x^2 for x in 2:4)[2] |> collect == [9, 16]
+end
+
+@testset "last for iterators" begin
+    @test last(Iterators.map(identity, 1:3)) == 3
+    @test last(Iterators.filter(iseven, (Iterators.map(identity, 1:3)))) == 2
+end
+
+@testset "isempty and isdone for Generators" begin
+    itr = eachline(IOBuffer("foo\n"))
+    gen = (x for x in itr)
+    @test !isempty(gen)
+    @test !Base.isdone(gen)
+    @test collect(gen) == ["foo"]
+end
diff --git a/test/keywordargs.jl b/test/keywordargs.jl
index f9be8edd80dc09..9cbae2b1a0b19d 100644
--- a/test/keywordargs.jl
+++ b/test/keywordargs.jl
@@ -374,3 +374,16 @@ using InteractiveUtils
 no_kw_args(x::Int) = 0
 @test_throws MethodError no_kw_args(1, k=1)
 @test_throws MethodError no_kw_args("", k=1)
+
+# issue #40964
+f40964(xs::Int...=1; k = 2) = (xs, k)
+@test f40964() === ((1,), 2)
+@test f40964(7, 8) === ((7,8), 2)
+@test f40964(7, 8, k=0) === ((7,8), 0)
+# issue #41416
+@test f40964(; k = 1) === ((1,), 1)
+f41416(a...="a"; b=true) = (b, a)
+@test f41416()           === (true, ("a",))
+@test f41416(;b=false)   === (false, ("a",))
+@test f41416(33)         === (true, (33,))
+@test f41416(3; b=false) === (false, (3,))
diff --git a/test/llvmcall.jl b/test/llvmcall.jl
index 0fab2e2f848027..b7f78205ec8564 100644
--- a/test/llvmcall.jl
+++ b/test/llvmcall.jl
@@ -152,8 +152,12 @@ module ObjLoadTest
         didcall = true
         nothing
     end
+    @test_throws(ErrorException("@ccallable was already defined for this method name"),
+                 @eval @ccallable Cvoid jl_the_callback(not_the_method::Int) = "other")
     # Make sure everything up until here gets compiled
-    jl_the_callback(); didcall = false
+    @test jl_the_callback() === nothing
+    @test jl_the_callback(1) == "other"
+    didcall = false
     function do_the_call()
         llvmcall(
             ("""declare void @jl_the_callback()
@@ -233,3 +237,15 @@ end
 # issue 34166
 f34166(x) = Base.llvmcall("ret i$(Sys.WORD_SIZE) %0", Int, (Int,), x)
 @test_throws ErrorException f34166(1)
+
+# Test that codegen can construct constant LLVMPtr #38864
+struct MyStruct
+    kern::UInt64
+    ptr::Core.LLVMPtr{UInt8,1}
+end
+MyStruct(kern) = MyStruct(kern, reinterpret(Core.LLVMPtr{UInt8,1}, 0))
+MyStruct() = MyStruct(0)
+s = MyStruct()
+
+@test s.kern == 0
+@test reinterpret(Int, s.ptr) == 0
diff --git a/test/llvmcall2.jl b/test/llvmcall2.jl
index cfd20d210bfd7e..8926b962a35c6b 100644
--- a/test/llvmcall2.jl
+++ b/test/llvmcall2.jl
@@ -37,10 +37,26 @@ function ceilfloor(x::Float64)
 end
 @test ceilfloor(7.4) == 8.0
 
-# support for calling external functions
-begin
-    f() = ccall("time", llvmcall, Cvoid, (Ptr{Cvoid},), C_NULL)
-    @test_throws ErrorException f()
+let err = ErrorException("llvmcall only supports intrinsic calls")
+    # support for calling external functions
+    @test_throws err @eval ccall("time", llvmcall, Cvoid, (Ptr{Cvoid},), C_NULL)
     g() = ccall("extern time", llvmcall, Cvoid, (Ptr{Cvoid},), C_NULL)
     g()
+    @test_throws err @eval ccall("extern llvm.floor", llvmcall, Float64, (Float64,), 0.0)
+
+    # support for mangling
+    @test (@eval ccall("llvm.floor.f64", llvmcall, Float64, (Float64,), 0.0)) === 0.0
+    @test (@eval ccall("llvm.floor", llvmcall, Float64, (Float64,), 0.0),
+                 ccall("llvm.floor", llvmcall, Float32, (Float32,), 0.0)) === (0.0, 0.0f0)
+    @test_throws err @eval ccall("llvm.floor.f64", llvmcall, Float32, (Float64,), 0.0)
+    @test_throws err @eval ccall("llvm.floor.f64", llvmcall, Float32, (Float32,), 0.0f0)
+    @test_throws err @eval ccall("llvm.floor.f64", llvmcall, Float64, (Float32,), 0.0f0)
+    @test_throws err @eval ccall("llvm.floor.f64", llvmcall, Float64, (Int,), 0)
+    @test_throws err @eval ccall("llvm.floor.f64", llvmcall, Int, (Int,), 0)
+    @test_throws err @eval ccall("llvm.floor", llvmcall, Float64, (Float32,), 0.0f0)
+    @test_throws err @eval ccall("llvm.floor", llvmcall, Float64, (Int,), 0)
+    @test_throws err @eval ccall("llvm.floor", llvmcall, Int, (Int,), 0)
+
+    @test_throws err (@eval ccall("llvm.floor.f64", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0
+    @test_throws err (@eval ccall("llvm.floor", llvmcall, Float64, (Float64, Float64...,), 0.0)) === 0.0
 end
diff --git a/test/llvmpasses/Makefile b/test/llvmpasses/Makefile
index ef583fd451f076..a0b9cf977ede8c 100644
--- a/test/llvmpasses/Makefile
+++ b/test/llvmpasses/Makefile
@@ -2,13 +2,13 @@ SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
 JULIAHOME := $(abspath $(SRCDIR)/../..)
 include $(JULIAHOME)/Make.inc
 
-check: $(SRCDIR)
+check: .
 
 TESTS = $(patsubst $(SRCDIR)/%,%,$(wildcard $(SRCDIR)/*.jl $(SRCDIR)/*.ll))
 
-$(SRCDIR) $(TESTS):
+. $(TESTS):
 	PATH=$(build_bindir):$(build_depsbindir):$$PATH \
 	LD_LIBRARY_PATH=${build_libdir}:$$LD_LIBRARY_PATH \
-	$(build_depsbindir)/lit/lit.py -v $@
+	$(build_depsbindir)/lit/lit.py -v $(addprefix $(SRCDIR)/,$@)
 
-.PHONY: $(TESTS) $(SRCDIR) check all
+.PHONY: $(TESTS) check all .
diff --git a/test/llvmpasses/aliasscopes.jl b/test/llvmpasses/aliasscopes.jl
index 31b78cae922b50..5c0fe48091ade7 100644
--- a/test/llvmpasses/aliasscopes.jl
+++ b/test/llvmpasses/aliasscopes.jl
@@ -58,4 +58,3 @@ end
 emit(simple, Vector{Float64}, Vector{Float64})
 emit(constargs, Vector{Float64}, Const{Float64, 1})
 emit(micro_ker!, Matrix{Float64}, Vector{Float64}, Vector{Float64}, Int64, Int64, Int64)
-
diff --git a/test/llvmpasses/alloc-opt-gcframe.jl b/test/llvmpasses/alloc-opt-gcframe.jl
new file mode 100644
index 00000000000000..3b5fc3a51a606f
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-gcframe.jl
@@ -0,0 +1,281 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt,LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s
+
+isz = sizeof(UInt) == 8 ? "i64" : "i32"
+
+println("""
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@tag = external addrspace(10) global {}
+""")
+
+# CHECK-LABEL: @return_obj
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK: %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+# CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+# CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+# CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+# CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+# CHECK-NEXT: %v = call noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8* [[ptls_i8]], i32 [[SIZE_T:[0-9]+]], i32 16)
+# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+println("""
+define {} addrspace(10)* @return_obj() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  ret {} addrspace(10)* %v
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @return_load
+# CHECK: alloca i64
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+# CHECK-NOT: @tag
+# CHECK-NOT: @llvm.lifetime.end
+println("""
+define i64 @return_load(i64 %i) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
+  %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)*
+  store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
+  call void @external_function()
+  %l = load i64, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
+  ret i64 %l
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @ccall_obj
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK: @ijl_gc_pool_alloc
+# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+println("""
+define void @ccall_obj(i8* %fptr) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %f = bitcast i8* %fptr to void ({} addrspace(10)*)*
+  call void %f({} addrspace(10)* %v)
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @ccall_ptr
+# CHECK: alloca i64
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
+# CHECK: %f = bitcast i8* %fptr to void (i8*)*
+# Currently the GC frame lowering pass strips away all operand bundles
+# CHECK-NEXT: call void %f(i8*
+# CHECK-NEXT: ret void
+println("""
+define void @ccall_ptr(i8* %fptr) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
+  %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
+  %ptr = bitcast {}* %ptrj to i8*
+  %f = bitcast i8* %fptr to void (i8*)*
+  call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v), "unknown_bundle"(i8* %ptr) ]
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @ccall_unknown_bundle
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK: @ijl_gc_pool_alloc
+# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !4
+println("""
+define void @ccall_unknown_bundle(i8* %fptr) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
+  %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
+  %ptr = bitcast {}* %ptrj to i8*
+  %f = bitcast i8* %fptr to void (i8*)*
+  call void %f(i8* %ptr) [ "jl_not_jl_roots"({} addrspace(10)* %v) ]
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @lifetime_branches
+# CHECK: alloca i64
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK: L1:
+# CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
+# CHECK: %f = bitcast i8* %fptr to void (i8*)*
+# CHECK-NEXT: call void %f(i8*
+# CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+# CHECK: L2:
+# CHECK-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
+# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
+# CHECK-NEXT: call void %f2({}* null)
+
+# CHECK: L3:
+# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
+println("""
+define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  br i1 %b, label %L1, label %L3
+
+L1:
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
+  %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
+  %ptr = bitcast {}* %ptrj to i8*
+  %f = bitcast i8* %fptr to void (i8*)*
+  call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v) ]
+  br i1 %b2, label %L2, label %L3
+
+L2:
+  %f2 = bitcast i8* %fptr to void ({}*)*
+  call void %f2({}* null)
+  br label %L3
+
+L3:
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @object_field
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !4
+println("""
+define void @object_field({} addrspace(10)* %field) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
+  %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)*
+  store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @memcpy_opt
+# CHECK: alloca [16 x i8], align 16
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
+println("""
+define void @memcpy_opt(i8* %v22) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag)
+  %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
+  %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
+  call void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* %v21, i8* %v22, i64 16, i32 8, i1 false)
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @preserve_opt
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK-NOT: @julia.gc_alloc_obj
+# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK-NOT: @llvm.lifetime.end
+# CHECK: @external_function
+println("""
+define void @preserve_opt(i8* %v22) {
+top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 16, {} addrspace(10)* @tag)
+  %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
+  %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
+  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v19)
+  call void @external_function()
+  call void @llvm.julia.gc_preserve_end(token %tok)
+  call void @external_function()
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @preserve_branches
+# CHECK: call {}*** @julia.get_pgcstack()
+# CHECK: L1:
+# CHECK-NEXT: @external_function()
+# CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+# CHECK: L2:
+# CHECK: @external_function()
+# CHECK-NEXT: br label %L3
+
+# CHECK: L3:
+println("""
+define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %gcstack = bitcast {}*** %pgcstack to {}**
+  %current_task = getelementptr inbounds {}*, {}** %gcstack, i64 -12
+  br i1 %b, label %L1, label %L3
+
+L1:
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, $isz 8, {} addrspace(10)* @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
+  call void @external_function()
+  br i1 %b2, label %L2, label %L3
+
+L2:
+  call void @external_function()
+  br label %L3
+
+L3:
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*,
+# CHECK: declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*,
+println("""
+declare void @external_function()
+declare {}*** @julia.get_pgcstack()
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, $isz, {} addrspace(10)*)
+declare {}* @julia.pointer_from_objref({} addrspace(11)*)
+declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+declare token @llvm.julia.gc_preserve_begin(...)
+declare void @llvm.julia.gc_preserve_end(token)
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"jtbaa_tag", !2, i64 0}
+!2 = !{!"jtbaa_data", !3, i64 0}
+!3 = !{!"jtbaa"}
+!4 = !{!5, !5, i64 0}
+!5 = !{!"jtbaa_mutab", !6, i64 0}
+!6 = !{!"jtbaa_value", !2, i64 0}
+""")
diff --git a/test/llvmpasses/alloc-opt-pass.jl b/test/llvmpasses/alloc-opt-pass.jl
new file mode 100644
index 00000000000000..4912a1dc261941
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-pass.jl
@@ -0,0 +1,146 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -AllocOpt -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(AllocOpt)' -S - | FileCheck %s
+
+isz = sizeof(UInt) == 8 ? "i64" : "i32"
+
+println("""
+@tag = external addrspace(10) global {}
+""")
+
+# Test that the gc_preserve intrinsics are deleted directly.
+
+# CHECK-LABEL: @preserve_branches
+# CHECK: call {}*** @julia.ptls_states()
+# CHECK: L1:
+# CHECK-NOT: @llvm.julia.gc_preserve_begin
+# CHECK-NEXT: @external_function()
+# CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+# CHECK: L2:
+# CHECK: @external_function()
+# CHECK-NEXT: br label %L3
+
+# CHECK: L3:
+println("""
+define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  br i1 %b, label %L1, label %L3
+
+L1:
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
+  call void @external_function()
+  br i1 %b2, label %L2, label %L3
+
+L2:
+  call void @external_function()
+  br label %L3
+
+L3:
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @preserve_branches2
+# CHECK: call {}*** @julia.ptls_states()
+# CHECK: L1:
+# CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
+# CHECK-NEXT: @external_function()
+# CHECK-NEXT: br i1 %b2, label %L2, label %L3
+
+# CHECK: L2:
+# CHECK: @external_function()
+# CHECK-NEXT: br label %L3
+
+# CHECK: L3:
+println("""
+define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %v2 = call {} addrspace(10)* @external_function2()
+  br i1 %b, label %L1, label %L3
+
+L1:
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* %v2)
+  call void @external_function()
+  br i1 %b2, label %L2, label %L3
+
+L2:
+  call void @external_function()
+  br label %L3
+
+L3:
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+# CHECK-LABEL: @legal_int_types
+# CHECK: alloca [12 x i8]
+# CHECK-NOT: alloca i96
+# CHECK: ret void
+println("""
+define void @legal_int_types() {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %var1 = call {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 12, {} addrspace(10)* @tag)
+  %var2 = addrspacecast {} addrspace(10)* %var1 to {} addrspace(11)*
+  %var3 = call {}* @julia.pointer_from_objref({} addrspace(11)* %var2)
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
+
+
+
+println("""
+declare void @external_function()
+declare {} addrspace(10)* @external_function2()
+declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
+declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
+declare {}* @julia.pointer_from_objref({} addrspace(11)*)
+declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
+declare token @llvm.julia.gc_preserve_begin(...)
+declare void @llvm.julia.gc_preserve_end(token)
+""")
+
+# CHECK-LABEL: @memref_collision
+# CHECK: call {}*** @julia.ptls_states()
+# CHECK-NOT: store {}
+# CHECK: store i
+# CHECK-NOT: store {}
+# CHECK: L1:
+# CHECK: load {}
+# CHECK: L2:
+# CHECK: load i
+println("""
+define void @memref_collision($isz %x) {
+  %pgcstack = call {}*** @julia.get_pgcstack()
+  %ptls = call {}*** @julia.ptls_states()
+  %ptls_i8 = bitcast {}*** %ptls to i8*
+  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
+  %v_p = bitcast {} addrspace(10)* %v to $isz addrspace(10)*
+  store $isz %x, $isz addrspace(10)* %v_p
+  br i1 0, label %L1, label %L2
+
+L1:
+  %v1 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
+  %v1_x = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %v1
+  ret void
+
+L2:
+  %v2 = bitcast {} addrspace(10)* %v to $isz addrspace(10)*
+  %v2_x = load i64, i64 addrspace(10)* %v2
+  ret void
+}
+""")
+# CHECK-LABEL: }{{$}}
diff --git a/test/llvmpasses/alloc-opt-pipeline.jl b/test/llvmpasses/alloc-opt-pipeline.jl
new file mode 100644
index 00000000000000..9437913e4054b8
--- /dev/null
+++ b/test/llvmpasses/alloc-opt-pipeline.jl
@@ -0,0 +1,23 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# RUN: julia --startup-file=no %s %t -O
+# RUN: cat %t/* | FileCheck %s
+
+include(joinpath("..", "testhelpers", "llvmpasses.jl"))
+
+# JuliaLang/julia#38922
+function haszerolayout(x::NTuple{32, VecElement{UInt8}})
+    rx = Ref(x)
+    GC.@preserve rx begin
+        lower = iszero(unsafe_load(Ptr{UInt128}(pointer_from_objref(rx)), 1))
+        upper = iszero(unsafe_load(Ptr{UInt128}(pointer_from_objref(rx)), 2))
+        lower & upper
+    end
+end
+
+# CHECK-LABEL: @julia_haszerolayout
+# CHECK: top:
+# CHECK-NOT: @jl_gc_pool_alloc
+# CHECK: extractelement
+# CHECK: ret i8
+emit(haszerolayout, NTuple{32,VecElement{UInt8}})
diff --git a/test/llvmpasses/alloc-opt.jl b/test/llvmpasses/alloc-opt.jl
deleted file mode 100644
index 4a41fb7a5539fe..00000000000000
--- a/test/llvmpasses/alloc-opt.jl
+++ /dev/null
@@ -1,263 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# RUN: julia --startup-file=no %s | opt -load libjulia%shlibext -AllocOpt -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
-
-isz = sizeof(UInt) == 8 ? "i64" : "i32"
-
-println("""
-@tag = external addrspace(10) global {}
-""")
-
-# CHECK-LABEL: @return_obj
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
-println("""
-define {} addrspace(10)* @return_obj() {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  ret {} addrspace(10)* %v
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @return_load
-# CHECK: alloca i64
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-# CHECK-NOT: @tag
-# CHECK-NOT: @llvm.lifetime.end
-println("""
-define i64 @return_load(i64 %i) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-  %v64a11 = addrspacecast i64 addrspace(10)* %v64 to i64 addrspace(11)*
-  store i64 %i, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
-  call void @external_function()
-  %l = load i64, i64 addrspace(11)* %v64a11, align 16, !tbaa !4
-  ret i64 %l
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @ccall_obj
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: @jl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
-println("""
-define void @ccall_obj(i8* %fptr) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %f = bitcast i8* %fptr to void ({} addrspace(10)*)*
-  call void %f({} addrspace(10)* %v)
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @ccall_ptr
-# CHECK: alloca i64
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK: call void @llvm.lifetime.start{{.*}}(i64 8, i8*
-# CHECK: %f = bitcast i8* %fptr to void (i8*)*
-# Currently the GC frame lowering pass strips away all operand bundles
-# CHECK-NEXT: call void %f(i8*
-# CHECK-NEXT: ret void
-println("""
-define void @ccall_ptr(i8* %fptr) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
-  %ptr = bitcast {}* %ptrj to i8*
-  %f = bitcast i8* %fptr to void (i8*)*
-  call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v), "unknown_bundle"(i8* %ptr) ]
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @ccall_unknown_bundle
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK: @jl_gc_pool_alloc
-# CHECK: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}} unordered, align 8, !tbaa !0
-println("""
-define void @ccall_unknown_bundle(i8* %fptr) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
-  %ptr = bitcast {}* %ptrj to i8*
-  %f = bitcast i8* %fptr to void (i8*)*
-  call void %f(i8* %ptr) [ "jl_not_jl_roots"({} addrspace(10)* %v) ]
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @lifetime_branches
-# CHECK: alloca i64
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NEXT: call void @llvm.lifetime.start{{.*}}(i64 8,
-# CHECK: %f = bitcast i8* %fptr to void (i8*)*
-# CHECK-NEXT: call void %f(i8*
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK-NEXT: %f2 = bitcast i8* %fptr to void ({}*)*
-# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-# CHECK-NEXT: call void %f2({}* null)
-
-# CHECK: L3:
-# CHECK-NEXT: call void @llvm.lifetime.end{{.*}}(i64 8,
-println("""
-define void @lifetime_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  br i1 %b, label %L1, label %L3
-
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %ptrj = call {}* @julia.pointer_from_objref({} addrspace(11)* %va)
-  %ptr = bitcast {}* %ptrj to i8*
-  %f = bitcast i8* %fptr to void (i8*)*
-  call void %f(i8* %ptr) [ "jl_roots"({} addrspace(10)* %v) ]
-  br i1 %b2, label %L2, label %L3
-
-L2:
-  %f2 = bitcast i8* %fptr to void ({}*)*
-  call void %f2({}* null)
-  br label %L3
-
-L3:
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @object_field
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK-NOT: store {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* {{.*}}, align 8, !tbaa !0
-println("""
-define void @object_field({} addrspace(10)* %field) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %va = addrspacecast {} addrspace(10)* %v to {} addrspace(11)*
-  %vab = bitcast {} addrspace(11)* %va to {} addrspace(10)* addrspace(11)*
-  store {} addrspace(10)* %field, {} addrspace(10)* addrspace(11)* %vab, align 8
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @memcpy_opt
-# CHECK: alloca i128, align 16
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
-println("""
-define void @memcpy_opt(i8* %v22) {
-top:
-  %v6 = call {}*** @julia.ptls_states()
-  %v18 = bitcast {}*** %v6 to i8*
-  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag)
-  %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
-  %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
-  call void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* %v21, i8* %v22, i64 16, i32 8, i1 false)
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @preserve_opt
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK-NOT: @julia.gc_alloc_obj
-# CHECK-NOT: @jl_gc_pool_alloc
-# CHECK-NOT: @llvm.lifetime.end
-# CHECK: @external_function
-println("""
-define void @preserve_opt(i8* %v22) {
-top:
-  %v6 = call {}*** @julia.ptls_states()
-  %v18 = bitcast {}*** %v6 to i8*
-  %v19 = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %v18, $isz 16, {} addrspace(10)* @tag)
-  %v20 = bitcast {} addrspace(10)* %v19 to i8 addrspace(10)*
-  %v21 = addrspacecast i8 addrspace(10)* %v20 to i8 addrspace(11)*
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v19)
-  call void @external_function()
-  call void @llvm.julia.gc_preserve_end(token %tok)
-  call void @external_function()
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @preserve_branches
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
-
-# CHECK: L3:
-println("""
-define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  br i1 %b, label %L1, label %L3
-
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
-  call void @external_function()
-  br i1 %b2, label %L2, label %L3
-
-L2:
-  call void @external_function()
-  br label %L3
-
-L3:
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK: declare noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc(i8*,
-# CHECK: declare noalias nonnull {} addrspace(10)* @jl_gc_big_alloc(i8*,
-println("""
-declare void @external_function()
-declare {}*** @julia.ptls_states()
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
-declare {}* @julia.pointer_from_objref({} addrspace(11)*)
-declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare token @llvm.julia.gc_preserve_begin(...)
-declare void @llvm.julia.gc_preserve_end(token)
-
-!0 = !{!1, !1, i64 0}
-!1 = !{!"jtbaa_tag", !2, i64 0}
-!2 = !{!"jtbaa_data", !3, i64 0}
-!3 = !{!"jtbaa"}
-!4 = !{!5, !5, i64 0}
-!5 = !{!"jtbaa_mutab", !6, i64 0}
-!6 = !{!"jtbaa_value", !2, i64 0}
-""")
diff --git a/test/llvmpasses/alloc-opt2.jl b/test/llvmpasses/alloc-opt2.jl
deleted file mode 100644
index b50c6f03caa5a0..00000000000000
--- a/test/llvmpasses/alloc-opt2.jl
+++ /dev/null
@@ -1,91 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-# RUN: julia --startup-file=no %s | opt -load libjulia%shlibext -AllocOpt -S - | FileCheck %s
-
-isz = sizeof(UInt) == 8 ? "i64" : "i32"
-
-println("""
-@tag = external addrspace(10) global {}
-""")
-
-# Test that the gc_preserve intrinsics are deleted directly.
-
-# CHECK-LABEL: @preserve_branches
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NOT: @llvm.julia.gc_preserve_begin
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
-
-# CHECK: L3:
-println("""
-define void @preserve_branches(i8* %fptr, i1 %b, i1 %b2) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  br i1 %b, label %L1, label %L3
-
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v)
-  call void @external_function()
-  br i1 %b2, label %L2, label %L3
-
-L2:
-  call void @external_function()
-  br label %L3
-
-L3:
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-# CHECK-LABEL: @preserve_branches2
-# CHECK: call {}*** @julia.ptls_states()
-# CHECK: L1:
-# CHECK-NEXT: @llvm.julia.gc_preserve_begin{{.*}}{} addrspace(10)* %v2
-# CHECK-NEXT: @external_function()
-# CHECK-NEXT: br i1 %b2, label %L2, label %L3
-
-# CHECK: L2:
-# CHECK: @external_function()
-# CHECK-NEXT: br label %L3
-
-# CHECK: L3:
-println("""
-define void @preserve_branches2(i8* %fptr, i1 %b, i1 %b2) {
-  %ptls = call {}*** @julia.ptls_states()
-  %ptls_i8 = bitcast {}*** %ptls to i8*
-  %v2 = call {} addrspace(10)* @external_function2()
-  br i1 %b, label %L1, label %L3
-
-L1:
-  %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, $isz 8, {} addrspace(10)* @tag)
-  %tok = call token (...) @llvm.julia.gc_preserve_begin({} addrspace(10)* %v, {} addrspace(10)* %v2)
-  call void @external_function()
-  br i1 %b2, label %L2, label %L3
-
-L2:
-  call void @external_function()
-  br label %L3
-
-L3:
-  ret void
-}
-""")
-# CHECK-LABEL: }{{$}}
-
-println("""
-declare void @external_function()
-declare {} addrspace(10)* @external_function2()
-declare {}*** @julia.ptls_states()
-declare noalias {} addrspace(10)* @julia.gc_alloc_obj(i8*, $isz, {} addrspace(10)*)
-declare i64 @julia.pointer_from_objref({} addrspace(11)*)
-declare void @llvm.memcpy.p11i8.p0i8.i64(i8 addrspace(11)* nocapture writeonly, i8* nocapture readonly, i64, i32, i1)
-declare token @llvm.julia.gc_preserve_begin(...)
-declare void @llvm.julia.gc_preserve_end(token)
-""")
diff --git a/test/llvmpasses/cpu-features.ll b/test/llvmpasses/cpu-features.ll
new file mode 100644
index 00000000000000..ccb8cc69f0f66e
--- /dev/null
+++ b/test/llvmpasses/cpu-features.ll
@@ -0,0 +1,43 @@
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CPUFeatures -simplifycfg -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CPUFeatures,simplifycfg' -S %s | FileCheck %s
+
+declare i1 @julia.cpu.have_fma.f64()
+declare double @with_fma(double %0, double %1, double %2)
+declare double @without_fma(double %0, double %1, double %2)
+
+; CHECK: @fma1
+define double @fma1(double %0, double %1, double %2) #0 {
+top:
+  %3 = call i1 @julia.cpu.have_fma.f64()
+  br i1 %3, label %L1, label %L2
+
+; CHECK-NOT: @julia.cpu.have_fma
+; CHECK: @with_fma
+L1:                                               ; preds = %top
+  %4 = call double @with_fma(double %0, double %1, double %2)
+  ret double %4
+
+L2:                                               ; preds = %top
+  %5 = call double @without_fma(double %0, double %1, double %2)
+  ret double %5
+}
+
+; CHECK: @fma2
+define double @fma2(double %0, double %1, double %2) #1 {
+top:
+  %3 = call i1 @julia.cpu.have_fma.f64()
+  br i1 %3, label %L1, label %L2
+
+; CHECK-NOT: @julia.cpu.have_fma
+; CHECK: @without_fma
+L1:                                               ; preds = %top
+  %4 = call double @with_fma(double %0, double %1, double %2)
+  ret double %4
+
+L2:                                               ; preds = %top
+  %5 = call double @without_fma(double %0, double %1, double %2)
+  ret double %5
+}
+
+attributes #0 = { "target-features"="+fma" }
+attributes #1 = { "target-features"="-fma" }
diff --git a/test/llvmpasses/fastmath.jl b/test/llvmpasses/fastmath.jl
index 4e0d35e300d5c0..76b048c19a2a02 100644
--- a/test/llvmpasses/fastmath.jl
+++ b/test/llvmpasses/fastmath.jl
@@ -16,3 +16,19 @@ import Base.FastMath
 
 # CHECK: call fast float @llvm.sqrt.f32(float %0)
 emit(FastMath.sqrt_fast, Float32)
+
+
+# Float16 operations should be performed as Float32, unless @fastmath is specified
+# TODO: this is not true for platforms that natively support Float16
+
+foo(x::T,y::T) where T = x-y == zero(T)
+# LOWER: fsub half %0, %1
+# FINAL: %2 = fpext half %0 to float
+# FINAL: %3 = fpext half %1 to float
+# FINAL: fsub half %2, %3
+emit(foo, Float16, Float16)
+
+@fastmath foo(x::T,y::T) where T = x-y == zero(T)
+# LOWER: fsub fast half %0, %1
+# FINAL: fsub fast half %0, %1
+emit(foo, Float16, Float16)
diff --git a/test/llvmpasses/final-lower-gc.ll b/test/llvmpasses/final-lower-gc.ll
index ca15a60472550b..176b695ba918b1 100644
--- a/test/llvmpasses/final-lower-gc.ll
+++ b/test/llvmpasses/final-lower-gc.ll
@@ -1,12 +1,13 @@
-; RUN: opt -load libjulia%shlibext -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='FinalLowerGC' -S %s | FileCheck %s
+
 
 @tag = external addrspace(10) global {}
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
-declare {} addrspace(10)* @jl_box_int64(i64)
+declare {} addrspace(10)* @ijl_box_int64(i64)
 declare {}*** @julia.ptls_states()
-declare void @jl_safepoint()
-declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
+declare {}*** @julia.get_pgcstack()
 
 declare noalias nonnull {} addrspace(10)** @julia.new_gc_frame(i32)
 declare void @julia.push_gc_frame({} addrspace(10)**, i32)
@@ -21,12 +22,11 @@ top:
 ; CHECK-LABEL: @gc_frame_lowering
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
   %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-; CHECK: %ptls = call {}*** @julia.ptls_states()
-  %ptls = call {}*** @julia.ptls_states()
+; CHECK:  [[GCFRAME_SLOT:%.*]] = call {}*** @julia.get_pgcstack()
+  %pgcstack = call {}*** @julia.get_pgcstack()
 ; CHECK-DAG: [[GCFRAME_SIZE_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 0
 ; CHECK-DAG: [[GCFRAME_SIZE_PTR2:%.*]] = bitcast {} addrspace(10)** [[GCFRAME_SIZE_PTR]] to i64*
 ; CHECK-DAG: store i64 8, i64* [[GCFRAME_SIZE_PTR2]], align 8, !tbaa !0
-; CHECK-DAG: [[GCFRAME_SLOT:%.*]] = getelementptr inbounds {}**, {}*** %ptls, i32 0
 ; CHECK-DAG: [[PREV_GCFRAME_PTR:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
 ; CHECK-DAG: [[PREV_GCFRAME_PTR2:%.*]] = bitcast {} addrspace(10)** [[PREV_GCFRAME_PTR]] to {}***
 ; CHECK-DAG: [[PREV_GCFRAME:%.*]] = load {}**, {}*** [[GCFRAME_SLOT]], align 8
@@ -34,11 +34,11 @@ top:
 ; CHECK-DAG: [[GCFRAME_SLOT2:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)***
 ; CHECK-NEXT: store {} addrspace(10)** %gcframe, {} addrspace(10)*** [[GCFRAME_SLOT2]], align 8
   call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
-  %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+  %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
 ; CHECK: %frame_slot_1 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 3
   %frame_slot_1 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 1)
   store {} addrspace(10)* %aboxed, {} addrspace(10)** %frame_slot_1, align 8
-  %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
+  %bboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %b)
 ; CHECK: %frame_slot_2 = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
   %frame_slot_2 = call {} addrspace(10)** @julia.get_gc_frame_slot({} addrspace(10)** %gcframe, i32 0)
   store {} addrspace(10)* %bboxed, {} addrspace(10)** %frame_slot_2, align 8
@@ -46,8 +46,7 @@ top:
   call void @boxed_simple({} addrspace(10)* %aboxed, {} addrspace(10)* %bboxed)
 ; CHECK-NEXT: [[PREV_GCFRAME_PTR3:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 1
 ; CHECK-NEXT: [[PREV_GCFRAME_PTR4:%.*]] = load {} addrspace(10)*, {} addrspace(10)** [[PREV_GCFRAME_PTR3]], align 8, !tbaa !0
-; CHECK-NEXT: [[GCFRAME_SLOT3:%.*]] = getelementptr inbounds {}**, {}*** %ptls, i32 0
-; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT3]] to {} addrspace(10)**
+; CHECK-NEXT: [[GCFRAME_SLOT4:%.*]] = bitcast {}*** [[GCFRAME_SLOT]] to {} addrspace(10)**
 ; CHECK-NEXT: store {} addrspace(10)* [[PREV_GCFRAME_PTR4]], {} addrspace(10)** [[GCFRAME_SLOT4]], align 8, !tbaa !0
   call void @julia.pop_gc_frame({} addrspace(10)** %gcframe)
 ; CHECK-NEXT: ret void
@@ -57,9 +56,10 @@ top:
 define {} addrspace(10)* @gc_alloc_lowering() {
 top:
 ; CHECK-LABEL: @gc_alloc_lowering
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call noalias nonnull {} addrspace(10)* @jl_gc_pool_alloc
+; CHECK: %v = call noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc
   %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, i64 8)
   %0 = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
   %1 = getelementptr {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %0, i64 -1
diff --git a/test/llvmpasses/fmf.jl b/test/llvmpasses/fmf.jl
index 71c560b4ace7fa..3a792550fe1ea0 100644
--- a/test/llvmpasses/fmf.jl
+++ b/test/llvmpasses/fmf.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+
 # REQUIRES: x86_64
 # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | llc - -mtriple=x86_64-- -mattr=fma | FileCheck %s
diff --git a/test/llvmpasses/gcroots.ll b/test/llvmpasses/gcroots.ll
index 15bdaf433ee343..84f120712734b0 100644
--- a/test/llvmpasses/gcroots.ll
+++ b/test/llvmpasses/gcroots.ll
@@ -1,15 +1,18 @@
-; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
 
 define void @simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @simple
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 ; CHECK: call {} addrspace(10)* @jl_box_int64
@@ -33,6 +36,7 @@ define void @leftover_alloca({} addrspace(10)* %a) {
 ; relying on mem2reg to catch simple cases such as this earlier
 ; CHECK-LABEL: @leftover_alloca
 ; CHECK: %var = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %var = alloca {} addrspace(10)*
     store {} addrspace(10)* %a, {} addrspace(10)** %var
@@ -47,6 +51,7 @@ declare void @union_arg({{} addrspace(10)*, i8})
 
 define void @simple_union() {
 ; CHECK-LABEL: @simple_union
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; CHECK: %a = call { {} addrspace(10)*, i8 } @union_ret()
     %a = call { {} addrspace(10)*, i8 } @union_ret()
@@ -61,6 +66,7 @@ declare void @one_arg_boxed({} addrspace(10)*)
 
 define void @select_simple(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_simple
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %bboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %b)
@@ -74,6 +80,7 @@ define void @phi_simple(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_simple
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
     br i1 %cmp, label %alabel, label %blabel
@@ -96,6 +103,7 @@ declare void @one_arg_decayed(i64 addrspace(12)*)
 define void @select_lift(i64 %a, i64 %b) {
 ; CHECK-LABEL: @select_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -112,6 +120,7 @@ define void @phi_lift(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift
 ; CHECK: %gclift = phi {} addrspace(10)* [ %aboxed, %alabel ], [ %bboxed, %blabel ], [ %gclift, %common ]
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
     br i1 %cmp, label %alabel, label %blabel
@@ -133,6 +142,7 @@ common:
 define void @phi_lift_union(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @phi_lift_union
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %cmp = icmp eq i64 %a, %b
     br i1 %cmp, label %alabel, label %blabel
@@ -158,6 +168,7 @@ define void @live_if_live_out(i64 %a, i64 %b) {
 ; CHECK-LABEL: @live_if_live_out
 top:
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; The failure case is failing to realize that `aboxed` is live across the first
 ; one_arg_boxed safepoint and putting bboxed in the same root slot
@@ -175,6 +186,7 @@ succ:
 define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 ; CHECK-LABEL: @ret_use
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -185,6 +197,7 @@ define {} addrspace(10)* @ret_use(i64 %a, i64 %b) {
 define {{} addrspace(10)*, i8} @ret_use_struct() {
 ; CHECK-LABEL: @ret_use_struct
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 ; CHECK: %aunion = call { {} addrspace(10)*, i8 } @union_ret()
     %aunion = call { {} addrspace(10)*, i8 } @union_ret()
@@ -201,6 +214,7 @@ define i8 @nosafepoint({} addrspace(10)* dereferenceable(16)) {
 ; CHECK-LABEL: @nosafepoint
 ; CHECK-NOT: %gcframe
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %1 = call {}*** @julia.ptls_states()
   %2 = bitcast {}*** %1 to {} addrspace(10)**
   %3 = getelementptr {} addrspace(10)*, {} addrspace(10)** %2, i64 3
@@ -219,6 +233,7 @@ top:
 define void @global_ref() {
 ; CHECK-LABEL: @global_ref
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load {} addrspace(10)*, {} addrspace(10)** getelementptr ({} addrspace(10)*, {} addrspace(10)** inttoptr (i64 140540744325952 to {} addrspace(10)**), i64 1)
 ; CHECK: store {} addrspace(10)* %loaded, {} addrspace(10)**
@@ -230,6 +245,7 @@ define {} addrspace(10)* @no_redundant_rerooting(i64 %a, i1 %cond) {
 ; CHECK-LABEL: @no_redundant_rerooting
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -254,6 +270,7 @@ define void @memcpy_use(i64 %a, i64 *%aptr) {
 ; CHECK-LABEL: @memcpy_use
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -270,6 +287,7 @@ define void @gc_preserve(i64 %a) {
 ; CHECK-LABEL: @gc_preserve
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -291,6 +309,7 @@ define void @gc_preserve_vec([2 x <2 x {} addrspace(10)*>] addrspace(11)* nocapt
 ; CHECK-LABEL: @gc_preserve_vec
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 6
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v = load [2 x <2 x {} addrspace(10)*>], [2 x <2 x {} addrspace(10)*>] addrspace(11)* %0, align 8
 ; CHECK-DAG: [[EXTRACT11:%.*]] = extractvalue [2 x <2 x {} addrspace(10)*>] %v, 0
@@ -318,6 +337,7 @@ define {} addrspace(10)* @gv_const() {
 ; CHECK-LABEL: @gv_const
 ; CHECK-NOT: %gcframe
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %v10 = load {}*, {}** @gv1, !tbaa !2
     %v1 = addrspacecast {}* %v10 to {} addrspace(10)*
@@ -331,6 +351,7 @@ top:
 define {} addrspace(10)* @vec_jlcallarg({} addrspace(10)*, {} addrspace(10)**, i32) {
 ; CHECK-LABEL: @vec_jlcallarg
 ; CHECK-NOT: %gcframe
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %v5 = bitcast {} addrspace(10)** %1 to <2 x {} addrspace(10)*>*
   %v6 = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*>* %v5, align 8
@@ -343,6 +364,7 @@ declare {} addrspace(10) *@alloc()
 define {} addrspace(10)* @vec_loadobj() {
 ; CHECK-LABEL: @vec_loadobj
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
   %v1 = bitcast {} addrspace(10) * %obj to {} addrspace(10)* addrspace(10)*
@@ -356,6 +378,7 @@ define {} addrspace(10)* @vec_loadobj() {
 define {} addrspace(10)* @vec_gep() {
 ; CHECK-LABEL: @vec_gep
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %v4 = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
   %obj1 = bitcast {} addrspace(10) * %obj to {} addrspace(10)* addrspace(10)*
@@ -371,6 +394,7 @@ define void @loopyness(i1 %cond1, {} addrspace(10) *%arg) {
 ; CHECK-LABEL: @loopyness
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     br label %header
 
@@ -402,6 +426,7 @@ define {} addrspace(10)* @phi_union(i1 %cond) {
 ; CHECK-LABEL: @phi_union
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   br i1 %cond, label %a, label %b
 
@@ -426,6 +451,7 @@ define {} addrspace(10)* @select_union(i1 %cond) {
 ; CHECK-LABEL: @select_union
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %obj = call {} addrspace(10) *@alloc()
   %aobj = insertvalue {{} addrspace(10)*, i8} undef, {} addrspace(10)* %obj, 0
@@ -441,6 +467,7 @@ define i8 @simple_arrayptr() {
 ; CHECK-LABEL: @simple_arrayptr
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %obj2 = call {} addrspace(10) *@alloc()
@@ -457,6 +484,7 @@ define {} addrspace(10)* @vecstoreload(<2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecstoreload
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     call void @jl_safepoint()
@@ -470,6 +498,7 @@ define void @vecphi(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecphi
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     br i1 %cond, label %A, label %B
 
@@ -495,6 +524,7 @@ define i8 @phi_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @phi_arrayptr
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     br i1 %cond, label %A, label %B
 
@@ -533,6 +563,7 @@ define void @vecselect(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     call void @jl_safepoint()
@@ -548,6 +579,7 @@ top:
 define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
@@ -565,6 +597,7 @@ define void @vecselect_lift(i1 %cond, <2 x {} addrspace(10)*> *%arg) {
 define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 ; CHECK-LABEL: @vecvecselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %loaded = load <2 x {} addrspace(10)*>, <2 x {} addrspace(10)*> *%arg
     %decayed = addrspacecast <2 x {} addrspace(10)*> %loaded to <2 x i64 addrspace(12)*>
@@ -582,6 +615,7 @@ define void @vecvecselect_lift(<2 x i1> %cond, <2 x {} addrspace(10)*> *%arg) {
 define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 ; CHECK-LABEL: @vecscalarselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -600,6 +634,7 @@ define void @vecscalarselect_lift(<2 x i1> %cond, i64 %a) {
 define void @scalarvecselect_lift(i1 %cond, i64 %a) {
 ; CHECK-LABEL: @scalarvecselect_lift
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
     %adecayed = addrspacecast {} addrspace(10)* %aboxed to i64 addrspace(12)*
@@ -619,6 +654,7 @@ define i8 @select_arrayptr(i1 %cond) {
 ; CHECK-LABEL: @select_arrayptr
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %obj1 = call {} addrspace(10) *@alloc()
     %obj2 = call {} addrspace(10) *@alloc()
@@ -648,6 +684,7 @@ define i8 @vector_arrayptrs() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11) *
@@ -669,6 +706,7 @@ define i8 @masked_arrayptrs() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11) *
@@ -690,6 +728,7 @@ define i8 @gather_arrayptrs() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)*
@@ -710,6 +749,7 @@ define i8 @gather_arrayptrs_alltrue() {
 ; CHECK: store {} addrspace(10)* %obj1, {} addrspace(10)** [[GEP0]]
 ;
 top:
+   %pgcstack = call {}*** @julia.get_pgcstack()
    %ptls = call {}*** @julia.ptls_states()
    %obj1 = call {} addrspace(10) *@alloc()
    %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)*
@@ -728,6 +768,7 @@ define i8 @lost_select_decayed(i1 %arg1) {
 ; CHECK: [[GEP0:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)** %gcframe, i32 2
 ; CHECK: store {} addrspace(10)* [[SOMETHING:%.*]], {} addrspace(10)** [[GEP0]]
 top:
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %obj1 = call {} addrspace(10) *@alloc()
     %decayed = addrspacecast {} addrspace(10) *%obj1 to {} addrspace(11)*
diff --git a/test/llvmpasses/julia-licm.ll b/test/llvmpasses/julia-licm.ll
new file mode 100644
index 00000000000000..0fff844e3affc2
--- /dev/null
+++ b/test/llvmpasses/julia-licm.ll
@@ -0,0 +1,76 @@
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -JuliaLICM -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='JuliaLICM' -S %s | FileCheck %s
+
+@tag = external addrspace(10) global {}, align 16
+
+declare void @julia.write_barrier({}*, ...)
+
+declare {}*** @julia.get_pgcstack()
+
+define nonnull {} addrspace(10)* @julia_allocation_hoist(i64 signext %0) #0 {
+top:
+  %1 = call {}*** @julia.get_pgcstack()
+  %2 = icmp sgt i64 %0, 0
+  br i1 %2, label %L4, label %L3
+
+L3.loopexit:                                      ; preds = %L22
+  %.lcssa = phi {} addrspace(10)* [ %3, %L22 ]
+  br label %L3
+
+L3:                                               ; preds = %L3.loopexit, %top
+  %merge = phi {} addrspace(10)* [ addrspacecast ({}* inttoptr (i64 139952239804424 to {}*) to {} addrspace(10)*), %top ], [ %.lcssa, %L3.loopexit ]
+  ret {} addrspace(10)* %merge
+
+L4:                                               ; preds = %top
+  %current_task112 = getelementptr inbounds {}**, {}*** %1, i64 -12
+  %current_task1 = bitcast {}*** %current_task112 to {}**
+  ; CHECK: %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag)
+  ; CHECK-NEXT: br label %L22
+  br label %L22
+
+L22:                                              ; preds = %L4, %L22
+  %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
+  ; CHECK: %value_phi5 = phi i64 [ 1, %L4 ], [ %5, %L22 ]
+  ; CHECK-NEXT %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
+  %3 = call noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}** nonnull %current_task1, i64 8, {} addrspace(10)* @tag) #2
+  %4 = bitcast {} addrspace(10)* %3 to i64 addrspace(10)*
+  store i64 %value_phi5, i64 addrspace(10)* %4, align 8, !tbaa !2
+  %.not = icmp eq i64 %value_phi5, %0
+  %5 = add i64 %value_phi5, 1
+  br i1 %.not, label %L3.loopexit, label %L22
+}
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*) #2
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
+
+; Function Attrs: argmemonly nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
+
+; Function Attrs: inaccessiblemem_or_argmemonly
+declare void @ijl_gc_queue_root({} addrspace(10)*) #4
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_pool_alloc(i8*, i32, i32) #2
+
+; Function Attrs: allocsize(1)
+declare noalias nonnull {} addrspace(10)* @ijl_gc_big_alloc(i8*, i64) #2
+
+attributes #0 = { "probe-stack"="inline-asm" }
+attributes #1 = { "probe-stack"="inline-asm" "thunk" }
+attributes #2 = { allocsize(1) }
+attributes #3 = { argmemonly nofree nosync nounwind willreturn }
+attributes #4 = { inaccessiblemem_or_argmemonly }
+
+!llvm.module.flags = !{!0, !1}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"jtbaa_mutab", !4, i64 0}
+!4 = !{!"jtbaa_value", !5, i64 0}
+!5 = !{!"jtbaa_data", !6, i64 0}
+!6 = !{!"jtbaa", !7, i64 0}
+!7 = !{!"jtbaa"}
diff --git a/test/llvmpasses/late-lower-gc.ll b/test/llvmpasses/late-lower-gc.ll
index 115d703f65f92c..c2b67f70111eaa 100644
--- a/test/llvmpasses/late-lower-gc.ll
+++ b/test/llvmpasses/late-lower-gc.ll
@@ -1,21 +1,22 @@
-; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame)' -S %s | FileCheck %s
 
 @tag = external addrspace(10) global {}, align 16
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
-declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare {} addrspace(10)* @jl_apply_generic({} addrspace(10)*, {} addrspace(10)**, i32)
-declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj(i8*, i64, {} addrspace(10)*)
+declare noalias nonnull {} addrspace(10)* @julia.gc_alloc_obj({}**, i64, {} addrspace(10)*)
 declare i32 @rooting_callee({} addrspace(12)*, {} addrspace(12)*)
 
 define void @gc_frame_lowering(i64 %a, i64 %b) {
 top:
 ; CHECK-LABEL: @gc_frame_lowering
 ; CHECK: %gcframe = call {} addrspace(10)** @julia.new_gc_frame(i32 2)
-    %ptls = call {}*** @julia.ptls_states()
-; CHECK: %ptls = call {}*** @julia.ptls_states()
+; CHECK:  %pgcstack = call {}*** @julia.get_pgcstack()
+    %pgcstack = call {}*** @julia.get_pgcstack()
 ; CHECK-NEXT: call void @julia.push_gc_frame({} addrspace(10)** %gcframe, i32 2)
 ; CHECK-NEXT: call {} addrspace(10)* @jl_box_int64
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
@@ -37,13 +38,19 @@ top:
 define {} addrspace(10)* @gc_alloc_lowering() {
 top:
 ; CHECK-LABEL: @gc_alloc_lowering
-    %ptls = call {}*** @julia.ptls_states()
-    %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8)
+    %pgcstack = call {}*** @julia.get_pgcstack()
+    %0 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
 ; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
 ; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !0
-    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
 ; CHECK-NEXT: ret {} addrspace(10)* %v
     ret {} addrspace(10)* %v
 }
@@ -56,20 +63,26 @@ top:
 define void @gc_drop_aliasing() {
 top:
 ; CHECK-LABEL: @gc_drop_aliasing
-    %ptls = call {}*** @julia.ptls_states()
-    %ptls_i8 = bitcast {}*** %ptls to i8*
-; CHECK: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* %ptls_i8, [[SIZE_T:i.[0-9]+]] 8)
+    %pgcstack = call {}*** @julia.get_pgcstack()
+    %0 = bitcast {}*** %pgcstack to {}**
+    %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK: %current_task = getelementptr inbounds {}*, {}** %0, i64 -12
+; CHECK-NEXT: [[ptls_field:%.*]] = getelementptr inbounds {}*, {}** %current_task, i64 15
+; CHECK-NEXT: [[ptls_load:%.*]] = load {}*, {}** [[ptls_field]], align 8, !tbaa !0
+; CHECK-NEXT: [[ppjl_ptls:%.*]] = bitcast {}* [[ptls_load]] to {}**
+; CHECK-NEXT: [[ptls_i8:%.*]] = bitcast {}** [[ppjl_ptls]] to i8*
+; CHECK-NEXT: %v = call {} addrspace(10)* @julia.gc_alloc_bytes(i8* [[ptls_i8]], [[SIZE_T:i.[0-9]+]] 8)
 ; CHECK-NEXT: [[V2:%.*]] = bitcast {} addrspace(10)* %v to {} addrspace(10)* addrspace(10)*
 ; CHECK-NEXT: [[V_HEADROOM:%.*]] = getelementptr inbounds {} addrspace(10)*, {} addrspace(10)* addrspace(10)* [[V2]], i64 -1
-; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !0
-    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj(i8* %ptls_i8, i64 8, {} addrspace(10)* @tag)
+; CHECK-NEXT: store atomic {} addrspace(10)* @tag, {} addrspace(10)* addrspace(10)* [[V_HEADROOM]] unordered, align 8, !tbaa !4
+    %v = call noalias {} addrspace(10)* @julia.gc_alloc_obj({}** %current_task, i64 8, {} addrspace(10)* @tag)
 ; CHECK-NEXT: %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
     %v64 = bitcast {} addrspace(10)* %v to i64 addrspace(10)*
-; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !5
+; CHECK-NEXT: %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !7
     %loadedval = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !invariant.load !1
-; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !6
+; CHECK-NEXT: store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !8
     store i64 %loadedval, i64 addrspace(10)* %v64, align 8, !noalias !2
-; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !7, !range !5
+; CHECK-NEXT: %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !tbaa !9, !range !7
     %lv2 = load i64, i64 addrspace(10)* %v64, align 8, !range !0, !tbaa !4
 ; CHECK-NEXT: ret void
     ret void
@@ -79,7 +92,7 @@ define i32 @callee_root({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
 top:
 ; CHECK-LABEL: @callee_root
 ; CHECK-NOT: @julia.new_gc_frame
-  %v2 = call {}*** @julia.ptls_states()
+  %v2 = call {}*** @julia.get_pgcstack()
   %v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
   %v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
   %v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
@@ -93,6 +106,25 @@ top:
 ; CHECK: ret i32
 }
 
+define i32 @freeze({} addrspace(10)* %v0, {} addrspace(10)* %v1) {
+top:
+; CHECK-LABEL: @freeze
+; CHECK-NOT: @julia.new_gc_frame
+  %v2 = call {}*** @julia.get_pgcstack()
+  %v3 = bitcast {} addrspace(10)* %v0 to {} addrspace(10)* addrspace(10)*
+  %v4 = addrspacecast {} addrspace(10)* addrspace(10)* %v3 to {} addrspace(10)* addrspace(11)*
+  %v5 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v4 unordered, align 8
+  %v6 = bitcast {} addrspace(10)* %v1 to {} addrspace(10)* addrspace(10)*
+  %v7 = addrspacecast {} addrspace(10)* addrspace(10)* %v6 to {} addrspace(10)* addrspace(11)*
+  %v8 = load atomic {} addrspace(10)*, {} addrspace(10)* addrspace(11)* %v7 unordered, align 8
+  %fv8 = freeze {} addrspace(10)* %v8
+  %v9 = addrspacecast {} addrspace(10)* %v5 to {} addrspace(12)*
+  %v10 = addrspacecast {} addrspace(10)* %fv8 to {} addrspace(12)*
+  %v11 = call i32 @rooting_callee({} addrspace(12)* %v9, {} addrspace(12)* %v10)
+  ret i32 %v11
+; CHECK: ret i32
+}
+
 !0 = !{i64 0, i64 23}
 !1 = !{}
 !2 = distinct !{!2}
@@ -101,12 +133,11 @@ top:
 !5 = !{!"jtbaa"}
 
 ; CHECK:      !0 = !{!1, !1, i64 0}
-; CHECK-NEXT: !1 = !{!"jtbaa_tag", !2, i64 0}
-; CHECK-NEXT: !2 = !{!"jtbaa_data", !3, i64 0}
-; CHECK-NEXT: !3 = !{!"jtbaa", !4, i64 0}
-; CHECK-NEXT: !4 = !{!"jtbaa"}
-; CHECK-NEXT: !5 = !{i64 0, i64 23}
-; CHECK-NEXT: !6 = distinct !{!6}
-; CHECK-NEXT: !7 = !{!8, !8, i64 0}
-; CHECK-NEXT: !8 = !{!"jtbaa_const", !9}
-; CHECK-NEXT: !9 = !{!"jtbaa"}
+; CHECK-NEXT: !1 = !{!"jtbaa_gcframe", !2, i64 0}
+; CHECK-NEXT: !2 = !{!"jtbaa", !3, i64 0}
+; CHECK-NEXT: !3 = !{!"jtbaa"}
+; CHECK-NEXT: !4 = !{!5, !5, i64 0}
+; CHECK-NEXT: !5 = !{!"jtbaa_tag", !6, i64 0}
+; CHECK-NEXT: !6 = !{!"jtbaa_data", !2, i64 0}
+; CHECK-NEXT: !7 = !{i64 0, i64 23}
+; CHECK-NEXT: !8 = distinct !{!8}
diff --git a/test/llvmpasses/lit.cfg.py b/test/llvmpasses/lit.cfg.py
index 2054876ed9a5d3..f53854faf2559e 100644
--- a/test/llvmpasses/lit.cfg.py
+++ b/test/llvmpasses/lit.cfg.py
@@ -13,8 +13,6 @@
 config.substitutions.append(('%shlibext', '.dylib' if platform.system() == 'Darwin' else '.dll' if
     platform.system() == 'Windows' else '.so'))
 
-path = os.path.pathsep.join((os.path.join(os.path.dirname(__file__),"../../usr/tools"), os.path.join(os.path.dirname(__file__),"../../usr/bin"), config.environment['PATH']))
-config.environment['PATH'] = path
 config.environment['HOME'] = "/tmp"
 
 if platform.machine() == "x86_64":
diff --git a/test/llvmpasses/llvmcall.jl b/test/llvmpasses/llvmcall.jl
index c9cdf4db1fc38e..687abe0a8cd461 100644
--- a/test/llvmpasses/llvmcall.jl
+++ b/test/llvmpasses/llvmcall.jl
@@ -13,7 +13,7 @@ end
 @generated foo(x)=:(ccall("extern foo", llvmcall, $x, ($x,), x))
 bar(x) = ntuple(i -> VecElement{Float16}(x[i]), 2)
 
-# CHECK: call half @foo(half zeroext %{{[0-9]+}})
+# CHECK: call half @foo(half %{{[0-9]+}})
 emit(foo, Float16)
 
 # CHECK: call [2 x half] @foo([2 x half] %{{[0-9]+}})
@@ -28,5 +28,5 @@ emit(foo, Core.LLVMPtr{Float32, 3})
 # CHECK: call { i32, i32 } @foo({ i32, i32 } %{{[0-9]+}})
 emit(foo, Foo)
 
-# CHECK: define <2 x i16> @julia_bar_{{[0-9]+}}([2 x i16]
+# CHECK: define <2 x half> @julia_bar_{{[0-9]+}}([2 x half]
 emit(bar, NTuple{2, Float16})
diff --git a/test/llvmpasses/loopinfo.jl b/test/llvmpasses/loopinfo.jl
index 67dadbf118e41b..412bee7015c3e1 100644
--- a/test/llvmpasses/loopinfo.jl
+++ b/test/llvmpasses/loopinfo.jl
@@ -2,7 +2,8 @@
 
 # RUN: julia --startup-file=no %s %t && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s
-# RUN: cat %t/module.ll | opt -load libjulia%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER
+# RUN: cat %t/module.ll | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S - | FileCheck %s -check-prefix=LOWER
+# RUN: cat %t/module.ll | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S - | FileCheck %s -check-prefix=LOWER
 # RUN: julia --startup-file=no %s %t -O && llvm-link -S %t/* -o %t/module.ll
 # RUN: cat %t/module.ll | FileCheck %s -check-prefix=FINAL
 
@@ -104,7 +105,7 @@ end
 end
 
 # FINAL-LABEL: @julia_notunroll
-function notunroll(J, I)
+@eval function notunroll(J, I)
     for i in 1:10
         for j in J
             1 <= j <= I && continue
@@ -113,6 +114,7 @@ function notunroll(J, I)
 # FINAL: call void @j_iteration
 # FINAL-NOT: call void @j_iteration
         end
+        $(Expr(:loopinfo, (Symbol("llvm.loop.unroll.disable"),)))
     end
 end
 
diff --git a/test/llvmpasses/lower-handlers.ll b/test/llvmpasses/lower-handlers.ll
index daeb60261d69c5..01bc1ae728f15e 100644
--- a/test/llvmpasses/lower-handlers.ll
+++ b/test/llvmpasses/lower-handlers.ll
@@ -1,15 +1,17 @@
-; RUN: opt -load libjulia%shlibext -LowerExcHandlers -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerExcHandlers -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LowerExcHandlers)' -S %s | FileCheck %s
 
 attributes #1 = { returns_twice }
 declare i32 @julia.except_enter() #1
-declare void @jl_pop_handler(i32)
+declare void @ijl_pop_handler(i32)
 declare i8**** @julia.ptls_states()
+declare i8**** @julia.get_pgcstack()
 
 define void @simple() {
 top:
-    %ptls = call i8**** @julia.ptls_states()
+    %pgcstack = call i8**** @julia.get_pgcstack()
 ; CHECK: call void @llvm.lifetime.start
-; CHECK: call void @jl_enter_handler
+; CHECK: call void @ijl_enter_handler
 ; CHECK: setjmp
     %r = call i32 @julia.except_enter()
     %cmp = icmp eq i32 %r, 0
@@ -19,7 +21,7 @@ try:
 catch:
     br label %after
 after:
-    call void @jl_pop_handler(i32 1)
+    call void @ijl_pop_handler(i32 1)
 ; CHECK: llvm.lifetime.end
     ret void
 }
diff --git a/test/llvmpasses/muladd.ll b/test/llvmpasses/muladd.ll
index 83f5955a1c21eb..2eddb62cef3ecb 100644
--- a/test/llvmpasses/muladd.ll
+++ b/test/llvmpasses/muladd.ll
@@ -1,4 +1,6 @@
-; RUN: opt -load libjulia%shlibext -CombineMulAdd -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -CombineMulAdd -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='CombineMulAdd' -S %s | FileCheck %s
+
 
 define double @fast_muladd1(double %a, double %b, double %c) {
 top:
diff --git a/test/llvmpasses/noinline.jl b/test/llvmpasses/noinline.jl
index f542968b219794..c4aa22bf80a71b 100644
--- a/test/llvmpasses/noinline.jl
+++ b/test/llvmpasses/noinline.jl
@@ -17,5 +17,5 @@ include(joinpath("..", "testhelpers", "llvmpasses.jl"))
     return A + B
 end
 
-# CHECK: attributes #{{[0-9]+}} = {{{([a-z]+ )*}} noinline {{([a-z]+ )*}}}
+# CHECK: attributes #{{[0-9]+}} = {{{[^}]*}} noinline {{[^}]*}}}
 emit(simple_noinline, Float64, Float64)
diff --git a/test/llvmpasses/propagate-addrspace.ll b/test/llvmpasses/propagate-addrspace.ll
index 4df6f073c67bc5..84ad33310ab3f6 100644
--- a/test/llvmpasses/propagate-addrspace.ll
+++ b/test/llvmpasses/propagate-addrspace.ll
@@ -1,4 +1,5 @@
-; RUN: opt -load libjulia%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -PropagateJuliaAddrspaces -dce -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='PropagateJuliaAddrspaces,dce' -S %s | FileCheck %s
 
 define i64 @simple() {
 ; CHECK-LABEL: @simple
diff --git a/test/llvmpasses/refinements.ll b/test/llvmpasses/refinements.ll
index 2b23516d8818bc..cb2dea816c56b7 100644
--- a/test/llvmpasses/refinements.ll
+++ b/test/llvmpasses/refinements.ll
@@ -1,14 +1,17 @@
-; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
 
 
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare void @jl_safepoint()
 declare void @one_arg_boxed({} addrspace(10)*)
-declare {} addrspace(10)* @jl_box_int64(i64)
+declare {} addrspace(10)* @ijl_box_int64(i64)
 
 define void @argument_refinement({} addrspace(10)* %a) {
 ; CHECK-LABEL: @argument_refinement
 ; CHECK-NOT: %gcframe
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %casted1 = bitcast {} addrspace(10)* %a to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
@@ -22,8 +25,9 @@ define void @argument_refinement({} addrspace(10)* %a) {
 define void @heap_refinement1(i64 %a) {
 ; CHECK-LABEL: @heap_refinement1
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-    %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+    %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
 ; CHECK: store {} addrspace(10)* %aboxed
@@ -38,8 +42,9 @@ define void @heap_refinement1(i64 %a) {
 define void @heap_refinement2(i64 %a) {
 ; CHECK-LABEL: @heap_refinement2
 ; CHECK:   %gcframe = alloca {} addrspace(10)*, i32 3
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
-    %aboxed = call {} addrspace(10)* @jl_box_int64(i64 signext %a)
+    %aboxed = call {} addrspace(10)* @ijl_box_int64(i64 signext %a)
     %casted1 = bitcast {} addrspace(10)* %aboxed to {} addrspace(10)* addrspace(10)*
     %loaded1 = load {} addrspace(10)*, {} addrspace(10)* addrspace(10)* %casted1, !tbaa !1
 ; CHECK: store {} addrspace(10)* %loaded1
@@ -55,6 +60,7 @@ declare {} addrspace(10)* @allocate_some_value()
 define void @issue22770() {
 ; CHECK-LABEL: @issue22770
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
     %y = call {} addrspace(10)* @allocate_some_value()
     %casted1 = bitcast {} addrspace(10)* %y to {} addrspace(10)* addrspace(10)*
@@ -80,6 +86,7 @@ define void @refine_select_phi({} addrspace(10)* %x, {} addrspace(10)* %y, i1 %b
 ; CHECK-LABEL: @refine_select_phi
 ; CHECK-NOT: %gcframe
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %s = select i1 %b, {} addrspace(10)* %x, {} addrspace(10)* %y
   br i1 %b, label %L1, label %L2
@@ -101,6 +108,7 @@ define void @dont_refine_loop({} addrspace(10)* %x) {
 ; CHECK-LABEL: @dont_refine_loop
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 4
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   br label %L1
 
@@ -122,6 +130,7 @@ define void @refine_loop_const({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_const
 ; CHECK-NOT: %gcframe
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   br label %L1
 
@@ -142,6 +151,7 @@ define void @refine_loop_indirect({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %a = call {} addrspace(10)* @allocate_some_value()
   br label %L1
@@ -166,6 +176,7 @@ define void @refine_loop_indirect2({} addrspace(10)* %x) {
 ; CHECK-LABEL: @refine_loop_indirect2
 ; CHECK: %gcframe = alloca {} addrspace(10)*, i32 3
 top:
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %a = call {} addrspace(10)* @allocate_some_value()
   br label %L1
@@ -189,6 +200,7 @@ declare {} addrspace(10)* @julia.typeof({} addrspace(10)*) #0
 define {} addrspace(10)* @typeof({} addrspace(10)* %x) {
 ; CHECK-LABEL: @typeof(
 ; CHECK-NOT: %gcframe
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %v = call {} addrspace(10)* @julia.typeof({} addrspace(10)* %x)
   call void @one_arg_boxed({} addrspace(10)* %v)
@@ -200,7 +212,8 @@ declare void @julia.write_barrier({} addrspace(10)*, {} addrspace(10)*) #1
 define {} addrspace(10)* @setfield({} addrspace(10)* %p) {
 ; CHECK-LABEL: @setfield(
 ; CHECK-NOT: %gcframe
-; CHECK: call void @jl_gc_queue_root
+; CHECK: call void @ijl_gc_queue_root
+  %pgcstack = call {}*** @julia.get_pgcstack()
   %ptls = call {}*** @julia.ptls_states()
   %c = call {} addrspace(10)* @allocate_some_value()
   %fp = bitcast {} addrspace(10)* %p to {} addrspace(10)* addrspace(10)*
diff --git a/test/llvmpasses/remove-addrspaces.ll b/test/llvmpasses/remove-addrspaces.ll
index 97d4e17607570b..77a8a5e8150574 100644
--- a/test/llvmpasses/remove-addrspaces.ll
+++ b/test/llvmpasses/remove-addrspaces.ll
@@ -1,4 +1,5 @@
-; RUN: opt -load libjulia%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -RemoveJuliaAddrspaces -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='RemoveJuliaAddrspaces' -S %s | FileCheck %s
 
 
 define i64 @getindex({} addrspace(10)* nonnull align 16 dereferenceable(40)) {
@@ -41,3 +42,70 @@ top:
   store i64 %0, i64 addrspace(13)* %4, align 8
   ret {} addrspace(10)* %1
 }
+
+; COM: A type used for testing that remove-addrspaces can handle recursive types.
+%list = type { i64, %list* }
+
+; COM: There's nothing to remove in this function; but remove-addrspaces shouldn't crash.
+define i64 @sum.linked.list() #0 {
+; CHECK-LABEL: @sum.linked.list
+top:
+  %a = alloca %list
+  %b = alloca %list
+  %c = alloca %list
+  %a.car = getelementptr %list, %list* %a, i32 0, i32 0
+  %a.cdr = getelementptr %list, %list* %a, i32 0, i32 1
+  %b.car = getelementptr %list, %list* %b, i32 0, i32 0
+  %b.cdr = getelementptr %list, %list* %b, i32 0, i32 1
+  %c.car = getelementptr %list, %list* %c, i32 0, i32 0
+  %c.cdr = getelementptr %list, %list* %c, i32 0, i32 1
+; COM: Allow remove-addrspaces to rename the type but expect it to use the same prefix.
+; CHECK: getelementptr %list
+; CHECK-SAME: %list
+; CHECK-SAME: * %a
+; CHECK: getelementptr %list
+; CHECK-SAME: %list
+; CHECK-SAME: * %a
+; CHECK: getelementptr %list
+; CHECK-SAME: %list
+; CHECK-SAME: * %b
+; CHECK: getelementptr %list
+; CHECK-SAME: %list
+; CHECK-SAME: * %b
+; CHECK: getelementptr %list
+; CHECK-SAME: %list
+; CHECK-SAME: * %c
+; CHECK: getelementptr %list
+; CHECK-SAME: %list
+; CHECK-SAME: * %c
+  store i64 111, i64* %a.car
+  store i64 222, i64* %b.car
+  store i64 333, i64* %c.car
+  store %list* %b, %list** %a.cdr
+  store %list* %c, %list** %b.cdr
+  store %list* null, %list** %c.cdr
+  br label %loop
+
+loop:
+  %x = phi %list* [ %a, %top ], [ %x.cdr.value, %loop ]
+  %sum.prev = phi i64 [ 0, %top ], [ %sum, %loop ]
+  %x.car = getelementptr %list, %list* %x, i32 0, i32 0
+  %x.cdr = getelementptr %list, %list* %x, i32 0, i32 1
+  %x.car.value = load i64, i64* %x.car
+  %x.cdr.value = load %list*, %list** %x.cdr
+  %sum = add i64 %sum.prev, %x.car.value
+  %null.int = ptrtoint %list* null to i64
+  %x.cdr.value.int = ptrtoint %list* %x.cdr.value to i64
+  %cond = icmp eq i64 %x.cdr.value.int, %null.int
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret i64 %sum
+}
+
+
+; COM: check that address spaces in byval types are processed correctly
+define void @byval_type([1 x {} addrspace(10)*] addrspace(11)* byval([1 x {} addrspace(10)*]) %0) {
+; CHECK: define void @byval_type([1 x {}*]* byval([1 x {}*]) %0)
+  ret void
+}
diff --git a/test/llvmpasses/returnstwicegc.ll b/test/llvmpasses/returnstwicegc.ll
index 0c72143ed1836a..17791d630d61a9 100644
--- a/test/llvmpasses/returnstwicegc.ll
+++ b/test/llvmpasses/returnstwicegc.ll
@@ -1,9 +1,11 @@
-; RUN: opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S %s | FileCheck %s
 
 
 declare void @boxed_simple({} addrspace(10)*, {} addrspace(10)*)
 declare {} addrspace(10)* @jl_box_int64(i64)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 declare i32 @sigsetjmp(i8*, i32) returns_twice
 declare void @one_arg_boxed({} addrspace(10)*)
 
@@ -14,6 +16,7 @@ define void @try_catch(i64 %a, i64 %b)
 top:
     %sigframe = alloca [208 x i8], align 16
     %sigframe.sub = getelementptr inbounds [208 x i8], [208 x i8]* %sigframe, i64 0, i64 0
+    call {}*** @julia.get_pgcstack()
     call {}*** @julia.ptls_states()
     %aboxed = call {} addrspace(10)* @jl_box_int64(i64 %a)
     %val = call i32 @sigsetjmp(i8 *%sigframe.sub, i32 0) returns_twice
diff --git a/test/llvmpasses/safepoint_stress.jl b/test/llvmpasses/safepoint_stress.jl
index 4d00542069079e..dc6752e76d5954 100644
--- a/test/llvmpasses/safepoint_stress.jl
+++ b/test/llvmpasses/safepoint_stress.jl
@@ -1,13 +1,17 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-# RUN: julia --startup-file=no %s | opt -load libjulia%shlibext -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LateLowerGCFrame -FinalLowerGC -S - | FileCheck %s
+# RUN: julia --startup-file=no %s | opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='function(LateLowerGCFrame),FinalLowerGC' -S - | FileCheck %s
+
 
 println("""
 declare {} addrspace(10)* @alloc()
 declare void @one_arg_boxed({} addrspace(10)*)
 declare {}*** @julia.ptls_states()
+declare {}*** @julia.get_pgcstack()
 
 define void @stress(i64 %a, i64 %b) {
+    %pgcstack = call {}*** @julia.get_pgcstack()
     %ptls = call {}*** @julia.ptls_states()
 """)
 
diff --git a/test/llvmpasses/simdloop.ll b/test/llvmpasses/simdloop.ll
index 0438ee8cdf8fbd..894d3a1428a5cd 100644
--- a/test/llvmpasses/simdloop.ll
+++ b/test/llvmpasses/simdloop.ll
@@ -1,4 +1,5 @@
-; RUN: opt -load libjulia%shlibext -LowerSIMDLoop -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=0 -load libjulia-codegen%shlibext -LowerSIMDLoop -S %s | FileCheck %s
+; RUN: opt -enable-new-pm=1 --load-pass-plugin=libjulia-codegen%shlibext -passes='LowerSIMDLoop' -S %s | FileCheck %s
 
 declare void @julia.loopinfo_marker()
 
diff --git a/test/loading.jl b/test/loading.jl
index 8c9a200b3daa40..39e4790eee9d33 100644
--- a/test/loading.jl
+++ b/test/loading.jl
@@ -46,7 +46,7 @@ include_string_test_func = include_string(@__MODULE__, "include_string_test() =
 @test isdir(@__DIR__)
 @test @__DIR__() == dirname(@__FILE__)
 @test !endswith(@__DIR__, Base.Filesystem.path_separator)
-let exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no`,
+let exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no --color=no`,
     wd = sprint(show, pwd())
     s_dir = sprint(show, realpath(tempdir()))
     @test wd != s_dir
@@ -57,6 +57,15 @@ let exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no`,
     @test !endswith(s_dir, Base.Filesystem.path_separator)
 end
 
+@test Base.in_sysimage(Base.PkgId(Base.UUID("cf7118a7-6976-5b1a-9a39-7adc72f591a4"), "UUIDs"))
+@test Base.in_sysimage(Base.PkgId(Base.UUID("3a7fdc7e-7467-41b4-9f64-ea033d046d5b"), "NotAPackage")) == false
+
+## Unit tests for safe file operations ##
+
+@test Base.isaccessiblefile("/root/path/doesn't/exist") == false
+@test Base.isaccessiblepath("/root/path/doesn't/exist") == false
+@test Base.isaccessibledir("/root/path/doesn't/exist") == false
+
 # Issue #5789 and PR #13542:
 mktempdir() do dir
     cd(dir) do
@@ -115,6 +124,7 @@ let uuidstr = "ab"^4 * "-" * "ab"^2 * "-" * "ab"^2 * "-" * "ab"^2 * "-" * "ab"^6
     @test string(uuid) == uuidstr == sprint(print, uuid)
     @test "check $uuid" == "check $uuidstr"
     @test UUID(UInt128(uuid)) == uuid
+    @test UUID(uuid) === uuid
     @test UUID(convert(NTuple{2, UInt64}, uuid)) == uuid
     @test UUID(convert(NTuple{4, UInt32}, uuid)) == uuid
 
@@ -129,19 +139,54 @@ end
 @test_throws ArgumentError parse(UUID, "not a UUID")
 @test tryparse(UUID, "either is this") === nothing
 
-function subset(v::Vector{T}, m::Int) where T
-    T[v[j] for j = 1:length(v) if ((m >>> (j - 1)) & 1) == 1]
-end
+@testset "explicit_project_deps_get" begin
+    mktempdir() do dir
+        project_file = joinpath(dir, "Project.toml")
+        touch(project_file) # dummy_uuid calls realpath
+        # various UUIDs to work with
+        proj_uuid = dummy_uuid(project_file)
+        root_uuid = uuid4()
+        this_uuid = uuid4()
 
-function perm(p::Vector, i::Int)
-    for j = length(p):-1:1
-        i, k = divrem(i, j)
-        p[j], p[k+1] = p[k+1], p[j]
+        old_load_path = copy(LOAD_PATH)
+        try
+            copy!(LOAD_PATH, [project_file])
+            write(project_file, """
+            name = "Root"
+            uuid = "$root_uuid"
+            [deps]
+            This = "$this_uuid"
+            """)
+            # look up various packages by name
+            root = Base.identify_package("Root")
+            this = Base.identify_package("This")
+            that = Base.identify_package("That")
+
+            @test root.uuid == root_uuid
+            @test this.uuid == this_uuid
+            @test that == nothing
+
+            write(project_file, """
+            name = "Root"
+            This = "$this_uuid"
+            [deps]
+            """)
+            # look up various packages by name
+            root = Base.identify_package("Root")
+            this = Base.identify_package("This")
+            that = Base.identify_package("That")
+
+            @test root.uuid == proj_uuid
+            @test this == nothing
+            @test that == nothing
+        finally
+            copy!(LOAD_PATH, old_load_path)
+        end
     end
-    return p
 end
 
-@testset "explicit_project_deps_get" begin
+# extras
+@testset "extras" begin
     mktempdir() do dir
         project_file = joinpath(dir, "Project.toml")
         touch(project_file) # dummy_uuid calls realpath
@@ -149,66 +194,60 @@ end
         proj_uuid = dummy_uuid(project_file)
         root_uuid = uuid4()
         this_uuid = uuid4()
-        # project file to subset/permute
-        lines = split("""
-        name = "Root"
-        uuid = "$root_uuid"
-        [deps]
-        This = "$this_uuid"
-        """, '\n')
-        N = length(lines)
-        # test every permutation of every subset of lines
-        for m = 0:2^N-1
-            s = subset(lines, m) # each subset of lines
-            for i = 1:factorial(count_ones(m))
-                p = perm(s, i) # each permutation of the subset
-                open(project_file, write=true) do io
-                    for line in p
-                        println(io, line)
-                    end
-                end
-                # look at lines and their order
-                n = findfirst(line -> startswith(line, "name"), p)
-                u = findfirst(line -> startswith(line, "uuid"), p)
-                d = findfirst(line -> line == "[deps]", p)
-                t = findfirst(line -> startswith(line, "This"), p)
-                # look up various packages by name
-                cache = Base.TOMLCache()
-                root = Base.explicit_project_deps_get(project_file, "Root", cache)
-                this = Base.explicit_project_deps_get(project_file, "This", cache)
-                that = Base.explicit_project_deps_get(project_file, "That", cache)
-                # test that the correct answers are given
-                @test root == (something(n, N+1) ≥ something(d, N+1) ? nothing :
-                               something(u, N+1) < something(d, N+1) ? root_uuid : proj_uuid)
-                @test this == (something(d, N+1) < something(t, N+1) ≤ N ? this_uuid : nothing)
-                @test that == nothing
-            end
+
+        old_load_path = copy(LOAD_PATH)
+        try
+            copy!(LOAD_PATH, [project_file])
+            write(project_file, """
+            name = "Root"
+            uuid = "$root_uuid"
+            [extras]
+            This = "$this_uuid"
+            """)
+            # look up various packages by name
+            root = Base.identify_package("Root")
+            this = Base.identify_package("This")
+            that = Base.identify_package("That")
+
+            @test root.uuid == root_uuid
+            @test this == nothing
+            @test that == nothing
+
+            @test Base.get_uuid_name(project_file, this_uuid) == "This"
+        finally
+            copy!(LOAD_PATH, old_load_path)
         end
     end
 end
 
+
 ## functional testing of package identification, location & loading ##
 
 saved_load_path = copy(LOAD_PATH)
 saved_depot_path = copy(DEPOT_PATH)
 saved_active_project = Base.ACTIVE_PROJECT[]
+watcher_counter = Ref(0)
+push!(Base.active_project_callbacks, () -> watcher_counter[] += 1)
+push!(Base.active_project_callbacks, () -> error("broken"))
 
-push!(empty!(LOAD_PATH), "project")
-push!(empty!(DEPOT_PATH), "depot")
-Base.ACTIVE_PROJECT[] = nothing
-
-@test load_path() == [abspath("project","Project.toml")]
+push!(empty!(LOAD_PATH), joinpath(@__DIR__, "project"))
+append!(empty!(DEPOT_PATH), [mktempdir(), joinpath(@__DIR__, "depot")])
+@test watcher_counter[] == 0
+@test_logs (:error, r"active project callback .* failed") Base.set_active_project(nothing)
+@test watcher_counter[] == 1
+pop!(Base.active_project_callbacks)
 
+@test load_path() == [joinpath(@__DIR__, "project", "Project.toml")]
 
 # locate `tail(names)` package by following the search path graph through `names` starting from `where`
 function recurse_package(where::PkgId, name::String, names::String...)
-    pkg = identify_package(where, name, Base.TOMLCache())
+    pkg = identify_package(where, name)
     pkg === nothing && return nothing
     return recurse_package(pkg, names...)
 end
 
 recurse_package(pkg::String) = identify_package(pkg)
-recurse_package(where::PkgId, pkg::String) = identify_package(where, pkg, Base.TOMLCache())
+recurse_package(where::PkgId, pkg::String) = identify_package(where, pkg)
 
 function recurse_package(name::String, names::String...)
     pkg = identify_package(name)
@@ -229,6 +268,7 @@ end
         pkg = recurse_package(n...)
         @test pkg == PkgId(UUID(uuid), n[end])
         @test joinpath(@__DIR__, normpath(path)) == locate_package(pkg)
+        @test Base.compilecache_path(pkg, UInt64(0)) == Base.compilecache_path(pkg, UInt64(0))
     end
     @test identify_package("Baz") == nothing
     @test identify_package("Qux") == nothing
@@ -312,6 +352,11 @@ module NotPkgModule; end
         @test pkgdir(Foo.SubFoo1) == normpath(abspath(@__DIR__, "project/deps/Foo1"))
         @test pkgdir(Foo.SubFoo2) == normpath(abspath(@__DIR__, "project/deps/Foo1"))
         @test pkgdir(NotPkgModule) === nothing
+
+        @test pkgdir(Foo, "src") == normpath(abspath(@__DIR__, "project/deps/Foo1/src"))
+        @test pkgdir(Foo.SubFoo1, "src") == normpath(abspath(@__DIR__, "project/deps/Foo1/src"))
+        @test pkgdir(Foo.SubFoo2, "src") == normpath(abspath(@__DIR__, "project/deps/Foo1/src"))
+        @test pkgdir(NotPkgModule, "src") === nothing
     end
 
 end
@@ -602,12 +647,13 @@ end == "opening file $(repr(joinpath(@__DIR__, "notarealfile.jl")))"
 old_act_proj = Base.ACTIVE_PROJECT[]
 pushfirst!(LOAD_PATH, "@")
 try
-    Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
+    Base.set_active_project(joinpath(@__DIR__, "TestPkg"))
     @eval using TestPkg
 finally
-    Base.ACTIVE_PROJECT[] = old_act_proj
+    Base.set_active_project(old_act_proj)
     popfirst!(LOAD_PATH)
 end
+@test Base.pkgorigins[Base.PkgId(UUID("69145d58-7df6-11e8-0660-cf7622583916"), "TestPkg")].version == v"1.2.3"
 
 @testset "--project and JULIA_PROJECT paths should be absolutified" begin
     mktempdir() do dir; cd(dir) do
@@ -688,12 +734,14 @@ end
 
 append!(empty!(LOAD_PATH), saved_load_path)
 append!(empty!(DEPOT_PATH), saved_depot_path)
-Base.ACTIVE_PROJECT[] = saved_active_project
+pop!(Base.active_project_callbacks)
+Base.set_active_project(saved_active_project)
+@test watcher_counter[] == 3
 
 # issue #28190
-module Foo; import Libdl; end
-import .Foo.Libdl; import Libdl
-@test Foo.Libdl === Libdl
+module Foo28190; import Libdl; end
+import .Foo28190.Libdl; import Libdl
+@test Foo28190.Libdl === Libdl
 
 @testset "include with mapexpr" begin
     let exprs = Any[]
@@ -716,3 +764,152 @@ import .Foo.Libdl; import Libdl
         end
     end
 end
+
+@testset "`Base.project_names` and friends" begin
+    # Some functions in Pkg assumes that these tuples have the same length
+    n = length(Base.project_names)
+    @test length(Base.manifest_names) == n
+    @test length(Base.preferences_names) == n
+end
+
+@testset "Manifest formats" begin
+    deps = Dict{String,Any}(
+        "Serialization" => Any[Dict{String, Any}("uuid"=>"9e88b42a-f829-5b0c-bbe9-9e923198166b")],
+        "Random"        => Any[Dict{String, Any}("deps"=>["Serialization"], "uuid"=>"9a3f8284-a2c9-5f02-9a11-845980a1fd5c")],
+        "Logging"       => Any[Dict{String, Any}("uuid"=>"56ddb016-857b-54e1-b83d-db4d58db5568")]
+    )
+
+    @testset "v1.0" begin
+        env_dir = joinpath(@__DIR__, "manifest", "v1.0")
+        manifest_file = joinpath(env_dir, "Manifest.toml")
+        isfile(manifest_file) || error("Reference manifest is missing")
+        raw_manifest = Base.parsed_toml(manifest_file)
+        @test Base.is_v1_format_manifest(raw_manifest)
+        @test Base.get_deps(raw_manifest) == deps
+    end
+
+    @testset "v2.0" begin
+        env_dir = joinpath(@__DIR__, "manifest", "v2.0")
+        manifest_file = joinpath(env_dir, "Manifest.toml")
+        isfile(manifest_file) || error("Reference manifest is missing")
+        raw_manifest = Base.parsed_toml(manifest_file)
+        @test Base.is_v1_format_manifest(raw_manifest) == false
+        @test Base.get_deps(raw_manifest) == deps
+    end
+end
+
+@testset "error message loading pkg bad module name" begin
+    mktempdir() do tmp
+        old_loadpath = copy(LOAD_PATH)
+        try
+            push!(LOAD_PATH, tmp)
+            write(joinpath(tmp, "BadCase.jl"), "module badcase end")
+            @test_throws ErrorException("package `BadCase` did not define the expected module `BadCase`, \
+                                        check for typos in package module name") (@eval using BadCase)
+        finally
+            copy!(LOAD_PATH, old_loadpath)
+        end
+    end
+end
+
+@testset "Preferences loading" begin
+    mktempdir() do dir
+        this_uuid = uuid4()
+        that_uuid = uuid4()
+
+        # First, create outer environment with exported preferences
+        mkpath(joinpath(dir, "outer_env"))
+        open(joinpath(dir, "outer_env", "Project.toml"), write=true) do io
+            write(io, """
+            [deps]
+            This = "$(this_uuid)"
+            That = "$(that_uuid)"
+
+            [preferences.This]
+            pref1 = "outer-project"
+            pref2 = "outer-project"
+            pref3 = "outer-project"
+            pref4 = "outer-project"
+            pref5 = "outer-project"
+            pref6 = "outer-project"
+
+            [preferences.That]
+            pref1 = "outer-project"
+            """)
+        end
+
+        # Override some of those preferences above here:
+        open(joinpath(dir, "outer_env", "JuliaLocalPreferences.toml"), write=true) do io
+            write(io, """
+            [This]
+            pref2 = "outer-jlp"
+            """)
+        end
+
+        # Ensure that a `JuliaLocalPreferences.toml` disables `LocalPreferences.toml`
+        # We test that both overriding `pref2` and trying to clear `pref5` are ignored
+        open(joinpath(dir, "outer_env", "LocalPreferences.toml"), write=true) do io
+            write(io, """
+            [This]
+            pref2 = "outer-lp"
+            __clear__ = ["pref5"]
+            """)
+        end
+
+        # Next, set up an inner environment that will override some of the preferences
+        # set by the outer environment, even clearing `pref6`.
+        mkpath(joinpath(dir, "inner_env"))
+        open(joinpath(dir, "inner_env", "Project.toml"), write=true) do io
+            write(io, """
+            name = "Root"
+            uuid = "$(uuid4())"
+
+            [extras]
+            This = "$(this_uuid)"
+
+            [preferences.This]
+            pref3 = "inner-project"
+            pref4 = "inner-project"
+            __clear__ = ["pref6"]
+            """)
+        end
+
+        # And have an override here as well, this time only LocalPreferences.toml
+        open(joinpath(dir, "inner_env", "LocalPreferences.toml"), write=true) do io
+            write(io, """
+            [This]
+            pref4 = "inner-lp"
+            """)
+        end
+
+        # Finally, we load preferences with a stacked environment, and ensure that
+        # we get the appropriate outputs:
+        old_load_path = copy(LOAD_PATH)
+        try
+            copy!(LOAD_PATH, [joinpath(dir, "inner_env", "Project.toml"), joinpath(dir, "outer_env", "Project.toml")])
+
+            function test_this_prefs(this_prefs)
+                @test this_prefs["pref1"] == "outer-project"
+                @test this_prefs["pref2"] == "outer-jlp"
+                @test this_prefs["pref3"] == "inner-project"
+                @test this_prefs["pref4"] == "inner-lp"
+                @test this_prefs["pref5"] == "outer-project"
+                @test !haskey(this_prefs, "pref6")
+            end
+
+            # Test directly loading the UUID we're interested in
+            test_this_prefs(Base.get_preferences(this_uuid))
+
+            # Also test loading _all_ preferences
+            all_prefs = Base.get_preferences()
+            @test haskey(all_prefs, "This")
+            @test haskey(all_prefs, "That")
+            @test all_prefs["That"]["pref1"] == "outer-project"
+
+            # Ensure that the sub-tree of `This` still satisfies our tests
+            test_this_prefs(all_prefs["This"])
+        finally
+            copy!(LOAD_PATH, old_load_path)
+        end
+    end
+end
diff --git a/test/logging.jl b/test/logging.jl
deleted file mode 100644
index 0109664dff834b..00000000000000
--- a/test/logging.jl
+++ /dev/null
@@ -1,396 +0,0 @@
-# This file is a part of Julia. License is MIT: https://julialang.org/license
-
-using Test, Base.CoreLogging
-import Base.CoreLogging: BelowMinLevel, Debug, Info, Warn, Error,
-    handle_message, shouldlog, min_enabled_level, catch_exceptions
-
-import Test: collect_test_logs, TestLogger
-using Printf: @sprintf
-
-isdefined(Main, :MacroCalls) || @eval Main include("testhelpers/MacroCalls.jl")
-using Main.MacroCalls
-
-#-------------------------------------------------------------------------------
-@testset "Logging" begin
-
-@testset "Basic logging" begin
-    @test_logs (Debug, "a") min_level=Debug @debug "a"
-    @test_logs (Info,  "a") @info  "a"
-    @test_logs (Warn,  "a") @warn  "a"
-    @test_logs (Error, "a") @error "a"
-end
-
-#-------------------------------------------------------------------------------
-# Front end
-
-@testset "Log message formatting" begin
-    @test_logs (Info, "sum(A) = 16.0") @info begin
-        A = fill(1.0, 4, 4)
-        "sum(A) = $(sum(A))"
-    end
-    x = 10.50
-    @test_logs (Info, "10.5") @info "$x"
-    @test_logs (Info, "10.500") @info @sprintf("%.3f", x)
-end
-
-@testset "Programmatically defined levels" begin
-    level = Info
-    @test_logs (Info, "X") @logmsg level "X"
-    level = Warn
-    @test_logs (Warn, "X") @logmsg level "X"
-end
-
-@testset "Structured logging with key value pairs" begin
-    foo_val = 10
-    bar_val = 100
-    logs,_ = collect_test_logs() do
-        @info "test"  bar_val  progress=0.1  foo=foo_val  2*3  bar(x)=1.2  real_line=(@__LINE__)
-        @info begin
-            value_in_msg_block = 1000.0
-            "test2"
-        end value_in_msg_block
-        test_splatting(;kws...) = @info "test3" kws...
-        test_splatting(a=1,b=2.0)
-    end
-    @test length(logs) == 3
-
-    record = logs[1]
-    kwargs = record.kwargs
-
-    # Builtin metadata
-    @test record._module == @__MODULE__
-    @test record.file == Base.source_path()
-    @test record.line == kwargs[:real_line]
-    @test record.id isa Symbol
-    @test occursin(r"^.*logging_[[:xdigit:]]{8}$", String(record.id))
-
-    # User-defined metadata
-    @test kwargs[:bar_val] === bar_val
-    @test kwargs[:progress] == 0.1
-    @test kwargs[:foo] === foo_val
-    @test kwargs[Symbol(:(2*3))] === 6
-    @test kwargs[Symbol(:(bar(x)))] === 1.2
-
-    # Keyword values accessible from message block
-    record2 = logs[2]
-    @test occursin((Info, "test2"), record2)
-    kwargs = record2.kwargs
-    @test kwargs[:value_in_msg_block] === 1000.0
-
-    # Splatting of keywords
-    record3 = logs[3]
-    @test occursin((Info, "test3"), record3)
-    kwargs = record3.kwargs
-    @test sort(collect(keys(kwargs))) == [:a, :b]
-    @test kwargs[:a] === 1
-    @test kwargs[:b] === 2.0
-end
-
-@testset "Log message exception handling" begin
-    # Exceptions in message creation are caught by default
-    @test_logs (Error,Test.Ignored(),Test.Ignored(),:logevent_error) catch_exceptions=true  @info "foo $(1÷0)"
-    # Exceptions propagate if explicitly disabled for the logger (by default
-    # for the test logger)
-    @test_throws DivideError collect_test_logs() do
-        @info  "foo $(1÷0)"
-    end
-end
-
-@testset "Special keywords" begin
-    logger = TestLogger()
-    with_logger(logger) do
-        @info "foo" _module=Base.Core _id=:asdf _group=:somegroup _file="/a/file" _line=-10
-    end
-    @test length(logger.logs) == 1
-    record = logger.logs[1]
-    @test record._module == Base.Core
-    @test record.group == :somegroup
-    @test record.id == :asdf
-    @test record.file == "/a/file"
-    @test record.line == -10
-    # Test consistency with shouldlog() function arguments
-    @test record.level   == logger.shouldlog_args[1]
-    @test record._module == logger.shouldlog_args[2]
-    @test record.group   == logger.shouldlog_args[3]
-    @test record.id      == logger.shouldlog_args[4]
-
-    # handling of nothing
-    logger = TestLogger()
-    with_logger(logger) do
-        @info "foo" _module = nothing _file = nothing _line = nothing
-    end
-    @test length(logger.logs) == 1
-    record = logger.logs[1]
-    @test record._module == nothing
-    @test record.file == nothing
-    @test record.line == nothing
-end
-
-# PR #28209
-@testset "0-arg MethodErrors" begin
-    @test_throws MethodError @macrocall(@logmsg :Notice)
-    @test_throws MethodError @macrocall(@debug)
-    @test_throws MethodError @macrocall(@info)
-    @test_throws MethodError @macrocall(@warn)
-    @test_throws MethodError @macrocall(@error)
-end
-
-
-#-------------------------------------------------------------------------------
-# Early log level filtering
-
-@testset "Early log filtering" begin
-    @testset "Log filtering, per task logger" begin
-        @test_logs (Warn, "c") min_level=Warn begin
-            @info "b"
-            @warn "c"
-        end
-    end
-
-    @testset "Log level filtering - global flag" begin
-        # Test utility: Log once at each standard level
-        function log_each_level()
-            @debug "a"
-            @info  "b"
-            @warn  "c"
-            @error "d"
-        end
-
-        disable_logging(BelowMinLevel)
-        @test_logs (Debug, "a") (Info, "b") (Warn, "c") (Error, "d") min_level=Debug  log_each_level()
-
-        disable_logging(Debug)
-        @test_logs (Info, "b") (Warn, "c") (Error, "d") min_level=Debug  log_each_level()
-
-        disable_logging(Info)
-        @test_logs (Warn, "c") (Error, "d") min_level=Debug  log_each_level()
-
-        disable_logging(Warn)
-        @test_logs (Error, "d") min_level=Debug  log_each_level()
-
-        disable_logging(Error)
-        @test_logs log_each_level()
-
-        # Reset to default
-        disable_logging(BelowMinLevel)
-    end
-
-    @testset "Log level filtering - ENV" begin
-        logger = TestLogger()
-        with_logger(logger) do
-            for (e, r) in (("", false),
-                            (",,,,", false),
-                            ("al", false),
-                            ("all", true),
-                            ("a,b,all,c", true),
-                            ("a,b,,c", false),
-                            ("Mainb", false),
-                            ("aMain", false),
-                            ("Main", true),
-                            ("a,b,Main,c", true),
-                            ("Base", true),
-                            ("a,b,Base,c", true),
-                            ("Filesystem", true),
-                            ("a,b,Filesystem,c", true),
-                            ("a,b,Base.Filesystem,c", false),
-                            ("!al", true),
-                            ("all,!al", true),
-                            ("all,!al,!all", false),
-                            ("!all,Main", true),
-                            ("!all,!Main", false),
-                            ("!all,a,b,!Main,c", false),
-                            ("!all,Filesystem", true),
-                            ("!all,Base.Filesystem", false),
-                            ("a,b,all,!all,c", false),
-                            ("!Main", false),
-                            ("a,b,!Main,c", false),
-                            ("!Base", false),
-                            ("all,!Base", false),
-                            ("!all,Base", true),
-                            ("!all,!Base", false),
-                            ("a,b,!Base,c", false),
-                            ("all,a,b,!Base,c", false),
-                            ("!all,a,b,Base,c", true),
-                            ("!all,a,b,!Base,c", false),
-                            ("!Filesystem", false),
-                            ("all,!Filesystem", false),
-                            ("!all,Filesystem", true),
-                            ("!all,!Filesystem", false),
-                            ("a,b,!Filesystem,c", false),
-                            ("all,a,b,!Filesystem,c", false),
-                            ("!all,a,b,Filesystem,c", true),
-                            ("!all,a,b,!Filesystem,c", false),
-                            ("a,b,!Base.Filesystem,c", true),
-                            ("all,a,b,!Base.Filesystem,c", true),
-                            ("!all,a,b,Base.Filesystem,c", false),
-                            ("!all,a,b,!Base.Filesystem,c", false),
-                           )
-                ENV["JULIA_DEBUG"] = e
-                @test CoreLogging.env_override_minlevel(:Main, Base.Filesystem) === r
-                @test CoreLogging.current_logger_for_env(BelowMinLevel, :Main, Base.Filesystem) === (r ? logger : nothing)
-                @test CoreLogging.current_logger_for_env(Info, :Main, Base.Filesystem) === logger
-            end
-        end
-    end
-    ENV["JULIA_DEBUG"] = ""
-end
-
-#-------------------------------------------------------------------------------
-
-@eval module LogModuleTest
-    function a()
-        @info  "a"
-    end
-
-    module Submodule
-        function b()
-            @info  "b"
-        end
-    end
-end
-
-@testset "Capture of module information" begin
-    @test_logs(
-        (Info, "a", LogModuleTest),
-        (Info, "b", LogModuleTest.Submodule),
-        begin
-            LogModuleTest.a()
-            LogModuleTest.Submodule.b()
-        end
-    )
-end
-
-
-#-------------------------------------------------------------------------------
-@testset "Logger installation and access" begin
-    @testset "Global logger" begin
-        logger1 = global_logger()
-        logger2 = TestLogger()
-        # global_logger() returns the previously installed logger
-        @test logger1 === global_logger(logger2)
-        # current logger looks up global logger by default.
-        @test current_logger() === logger2
-        global_logger(logger1) # Restore global logger
-    end
-end
-
-#-------------------------------------------------------------------------------
-
-# Custom log levels
-
-@eval module LogLevelTest
-    using Base.CoreLogging
-
-    struct MyLevel
-        level::Int
-    end
-
-    Base.convert(::Type{LogLevel}, l::MyLevel) = LogLevel(l.level)
-
-    const critical = MyLevel(10000)
-    const debug_verbose = MyLevel(-10000)
-end
-
-@testset "Custom log levels" begin
-    @test_logs (LogLevelTest.critical, "blah") @logmsg LogLevelTest.critical "blah"
-    logs,_ = collect_test_logs(min_level=Debug) do
-        @logmsg LogLevelTest.debug_verbose "blah"
-    end
-    @test length(logs) == 0
-end
-
-
-#-------------------------------------------------------------------------------
-
-@testset "SimpleLogger" begin
-    # Log level limiting
-    @test min_enabled_level(SimpleLogger(devnull, Debug)) == Debug
-    @test min_enabled_level(SimpleLogger(devnull, Error)) == Error
-
-    # Log limiting
-    logger = SimpleLogger(devnull)
-    @test shouldlog(logger, Info, Base, :group, :asdf) === true
-    handle_message(logger, Info, "msg", Base, :group, :asdf, "somefile", 1, maxlog=2)
-    @test shouldlog(logger, Info, Base, :group, :asdf) === true
-    handle_message(logger, Info, "msg", Base, :group, :asdf, "somefile", 1, maxlog=2)
-    @test shouldlog(logger, Info, Base, :group, :asdf) === false
-    @test catch_exceptions(logger) === false
-
-    # Log formatting
-    function genmsg(level, message, _module, filepath, line; kws...)
-        io = IOBuffer()
-        logger = SimpleLogger(io, Debug)
-        handle_message(logger, level, message, _module, :group, :id,
-                       filepath, line; kws...)
-        String(take!(io))
-    end
-
-    # Simple
-    @test genmsg(Info, "msg", Main, "some/path.jl", 101) ==
-    """
-    ┌ Info: msg
-    └ @ Main some/path.jl:101
-    """
-
-    # Multiline message
-    @test genmsg(Warn, "line1\nline2", Main, "some/path.jl", 101) ==
-    """
-    ┌ Warning: line1
-    │ line2
-    └ @ Main some/path.jl:101
-    """
-
-    # Keywords
-    @test genmsg(Error, "msg", Base, "other.jl", 101, a=1, b="asdf") ==
-    """
-    ┌ Error: msg
-    │   a = 1
-    │   b = asdf
-    └ @ Base other.jl:101
-    """
-
-    # nothing values
-    @test genmsg(Warn, "msg", nothing, nothing, nothing) ==
-    """
-    ┌ Warning: msg
-    └ @ nothing nothing:nothing
-    """
-end
-
-# Issue #26273
-let m = Module(:Bare26273i, false)
-    Core.eval(m, :(import Base: @error))
-    @test_logs (:error, "Hello") Core.eval(m, quote
-        @error "Hello"
-    end)
-end
-
-@testset "#26335: _module and _file kwargs" begin
-    ignored = Test.Ignored()
-    @test_logs (:warn, "a", ignored, ignored, ignored, "foo.jl") (@warn "a" _file="foo.jl")
-    @test_logs (:warn, "a", Base) (@warn "a" _module=Base)
-end
-
-# Issue #28786
-@testset "ID generation" begin
-    logs,_ = collect_test_logs() do
-        for i in 1:2
-            @info "test"
-            @info "test"
-        end
-    end
-    @test length(logs) == 4
-    @test logs[1].id == logs[3].id
-    @test logs[2].id == logs[4].id
-    @test logs[1].id != logs[2].id
-end
-
-# Issue #34485
-@testset "`_group` must be a `Symbol`" begin
-    (record,), _ = collect_test_logs() do
-        @info "test"
-    end
-    @test record.group == :logging  # name of this file
-end
-
-end
diff --git a/test/manifest/v1.0/Manifest.toml b/test/manifest/v1.0/Manifest.toml
new file mode 100644
index 00000000000000..758314a2f5f6a4
--- /dev/null
+++ b/test/manifest/v1.0/Manifest.toml
@@ -0,0 +1,11 @@
+# This file is machine-generated - editing it directly is not advised
+
+[[Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/test/manifest/v2.0/Manifest.toml b/test/manifest/v2.0/Manifest.toml
new file mode 100644
index 00000000000000..f999fd6efb1c4b
--- /dev/null
+++ b/test/manifest/v2.0/Manifest.toml
@@ -0,0 +1,14 @@
+# This file is machine-generated - editing it directly is not advised
+
+julia_version = "1.7.0-DEV.1199"
+manifest_format = "2.0"
+
+[[deps.Logging]]
+uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
+
+[[deps.Random]]
+deps = ["Serialization"]
+uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+
+[[deps.Serialization]]
+uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
diff --git a/test/math.jl b/test/math.jl
index 35e341801a73b2..00847ee2832571 100644
--- a/test/math.jl
+++ b/test/math.jl
@@ -2,6 +2,7 @@
 
 using Random
 using LinearAlgebra
+using Base.Experimental: @force_compile
 
 function isnan_type(::Type{T}, x) where T
     isa(x, T) && isnan(x)
@@ -53,6 +54,11 @@ end
     @test occursin("3.14159", sprint(show, MIME"text/plain"(), π))
     @test repr(Any[pi ℯ; ℯ pi]) == "Any[π ℯ; ℯ π]"
     @test string(pi) == "π"
+
+    @test sin(π) === sinpi(1) == tan(π) == sinpi(1 // 1) == 0
+    @test cos(π) === cospi(1) == sec(π) == cospi(1 // 1) == -1
+    @test csc(π) == 1/0 && cot(π) == -1/0
+    @test sincos(π) === sincospi(1) == (0, -1)
 end
 
 @testset "frexp,ldexp,significand,exponent" begin
@@ -141,14 +147,16 @@ end
 # We compare to BigFloat instead of hard-coding
 # values, assuming that BigFloat has an independently tested implementation.
 @testset "basic math functions" begin
-    @testset "$T" for T in (Float32, Float64)
+    @testset "$T" for T in (Float16, Float32, Float64)
         x = T(1//3)
         y = T(1//2)
         yi = 4
         @testset "Random values" begin
-            @test x^y ≈ big(x)^big(y)
+            @test x^y === T(big(x)^big(y))
             @test x^1 === x
-            @test x^yi ≈ big(x)^yi
+            @test x^yi === T(big(x)^yi)
+            @test (-x)^yi == x^yi
+            @test (-x)^(yi+1) == -(x^(yi+1))
             @test acos(x) ≈ acos(big(x))
             @test acosh(1+x) ≈ acosh(big(1+x))
             @test asin(x) ≈ asin(big(x))
@@ -192,15 +200,17 @@ end
             @test atan(T(1),T(1)) ≈ T(pi)/4 atol=eps(T)
             @test isequal(cbrt(T(0)), T(0))
             @test isequal(cbrt(T(1)), T(1))
-            @test isequal(cbrt(T(1000000000)), T(1000))
+            @test isequal(cbrt(T(1000000000))^3, T(1000)^3)
             @test isequal(cos(T(0)), T(1))
             @test cos(T(pi)/2) ≈ T(0) atol=eps(T)
             @test isequal(cos(T(pi)), T(-1))
-            @test exp(T(1)) ≈ T(ℯ) atol=10*eps(T)
+            @test exp(T(1)) ≈ T(ℯ) atol=2*eps(T)
             @test isequal(exp10(T(1)), T(10))
             @test isequal(exp2(T(1)), T(2))
             @test isequal(expm1(T(0)), T(0))
-            @test expm1(T(1)) ≈ T(ℯ)-1 atol=10*eps(T)
+            @test isequal(expm1(-floatmax(T)), -one(T))
+            @test isequal(expm1(floatmax(T)), T(Inf))
+            @test expm1(T(1)) ≈ T(ℯ)-1 atol=2*eps(T)
             @test isequal(hypot(T(3),T(4)), T(5))
             @test isequal(hypot(floatmax(T),T(1)),floatmax(T))
             @test isequal(hypot(floatmin(T)*sqrt(eps(T)),T(0)),floatmin(T)*sqrt(eps(T)))
@@ -219,7 +229,7 @@ end
             @test sin(T(pi)) ≈ T(0) atol=eps(T)
             @test isequal(sqrt(T(0)), T(0))
             @test isequal(sqrt(T(1)), T(1))
-            @test isequal(sqrt(T(100000000)), T(10000))
+            @test isequal(sqrt(T(100000000))^2, T(10000)^2)
             @test isequal(tan(T(0)), T(0))
             @test tan(T(pi)/4) ≈ T(1) atol=eps(T)
             @test isequal(sec(T(pi)), -one(T))
@@ -228,6 +238,14 @@ end
             @test isequal(cscd(T(90)), one(T))
             @test isequal(sech(log(one(T))), one(T))
             @test isequal(csch(zero(T)), T(Inf))
+            @test zero(T)^y === zero(T)
+            @test zero(T)^zero(T) === one(T)
+            @test zero(T)^(-y) === T(Inf)
+            @test zero(T)^T(NaN) === T(NaN)
+            @test one(T)^y === one(T)
+            @test one(T)^zero(T) === one(T)
+            @test one(T)^T(NaN) === one(T)
+            @test isnan(T(NaN)^T(-.5))
         end
         @testset "Inverses" begin
             @test acos(cos(x)) ≈ x
@@ -283,51 +301,44 @@ end
             @test hypot(T(Inf), T(x)) === T(Inf)
             @test hypot(T(Inf), T(NaN)) === T(Inf)
             @test isnan_type(T, hypot(T(x), T(NaN)))
+            @test tanh(T(Inf)) === T(1)
         end
     end
-end
-
-@testset "exp function" for T in (Float64, Float32)
-    @testset "$T accuracy" begin
-        X = map(T, vcat(-10:0.0002:10, -80:0.001:80, 2.0^-27, 2.0^-28, 2.0^-14, 2.0^-13))
-        for x in X
-            y, yb = exp(x), exp(big(x))
-            @test abs(y-yb) <= 1.0*eps(T(yb))
-        end
-    end
-    @testset "$T edge cases" begin
-        @test isnan_type(T, exp(T(NaN)))
-        @test exp(T(-Inf)) === T(0.0)
-        @test exp(T(Inf)) === T(Inf)
-        @test exp(T(0.0)) === T(1.0) # exact
-        @test exp(T(5000.0)) === T(Inf)
-        @test exp(T(-5000.0)) === T(0.0)
+    @testset "Float16 expm1" begin
+        T=Float16
+        @test isequal(expm1(T(0)), T(0))
+        @test isequal(expm1(-floatmax(T)), -one(T))
+        @test isequal(expm1(floatmax(T)), T(Inf))
+        @test expm1(T(1)) ≈ T(ℯ)-1 atol=2*eps(T)
     end
 end
 
-@testset "exp10 function" begin
-    @testset "accuracy" begin
-        X = map(Float64, vcat(-10:0.00021:10, -35:0.0023:100, -300:0.001:300))
-        for x in X
-            y, yb = exp10(x), exp10(big(x))
-            @test abs(y-yb) <= 1.2*eps(Float64(yb))
+@testset "exponential functions" for T in (Float64, Float32, Float16)
+    for (func, invfunc) in ((exp2, log2), (exp, log), (exp10, log10))
+        @testset "$T $func accuracy" begin
+            minval, maxval = invfunc(floatmin(T)),prevfloat(invfunc(floatmax(T)))
+            # Test range and extensively test numbers near 0.
+            X = Iterators.flatten((minval:T(.1):maxval,
+                                   minval/100:T(.0021):maxval/100,
+                                   minval/10000:T(.000021):maxval/10000,
+                                   nextfloat(zero(T)),
+                                   T(-100):T(1):T(100) ))
+            for x in X
+                y, yb = func(x), func(widen(x))
+                if isfinite(eps(T(yb)))
+                    @test abs(y-yb) <= 1.2*eps(T(yb))
+                end
+            end
         end
-        X = map(Float32, vcat(-10:0.00021:10, -35:0.0023:35, -35:0.001:35))
-        for x in X
-            y, yb = exp10(x), exp10(big(x))
-            @test abs(y-yb) <= 1.2*eps(Float32(yb))
+        @testset "$T $func edge cases" begin
+            @test func(T(-Inf)) === T(0.0)
+            @test func(T(Inf)) === T(Inf)
+            @test func(T(NaN)) === T(NaN)
+            @test func(T(0.0)) === T(1.0) # exact
+            @test func(T(5000.0)) === T(Inf)
+            @test func(T(-5000.0)) === T(0.0)
         end
     end
-    @testset "$T edge cases" for T in (Float64, Float32)
-        @test isnan_type(T, exp10(T(NaN)))
-        @test exp10(T(-Inf)) === T(0.0)
-        @test exp10(T(Inf)) === T(Inf)
-        @test exp10(T(0.0)) === T(1.0) # exact
-        @test exp10(T(1.0)) === T(10.0)
-        @test exp10(T(3.0)) === T(1000.0)
-        @test exp10(T(5000.0)) === T(Inf)
-        @test exp10(T(-5000.0)) === T(0.0)
-    end
 end
 
 @testset "test abstractarray trig functions" begin
@@ -340,6 +351,35 @@ end
     @test Array(acosh.(STAA)) == acosh.(TAA)
     @test Array(acsch.(STAA)) == acsch.(TAA)
     @test Array(acoth.(STAA)) == acoth.(TAA)
+    @test sind(TAA) == sin(deg2rad.(TAA))
+    @test cosd(TAA) == cos(deg2rad.(TAA))
+    @test tand(TAA) == tan(deg2rad.(TAA))
+    @test asind(TAA) == rad2deg.(asin(TAA))
+    @test acosd(TAA) == rad2deg.(acos(TAA))
+    @test atand(TAA) == rad2deg.(atan(TAA))
+    @test asecd(TAA) == rad2deg.(asec(TAA))
+    @test acscd(TAA) == rad2deg.(acsc(TAA))
+    @test acotd(TAA) == rad2deg.(acot(TAA))
+
+    m = rand(3,2) # not square matrix
+    ex = @test_throws DimensionMismatch sind(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch cosd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch tand(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch asind(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch acosd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch atand(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch asecd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch acscd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
+    ex = @test_throws DimensionMismatch acotd(m)
+    @test startswith(ex.value.msg, "matrix is not square")
 end
 
 @testset "check exp2(::Integer) matches exp2(::Float)" begin
@@ -390,12 +430,18 @@ end
             T != Rational{Int} && @test sind(convert(T,-0.0))::fT === -zero(fT)
             @test sind(convert(T,-180.0))::fT === -zero(fT)
             @test sind(convert(T,-360.0))::fT === -zero(fT)
+            if T <: AbstractFloat
+                @test isnan(sind(T(NaN)))
+            end
         end
         @testset "cosd" begin
             @test cosd(convert(T,90))::fT === zero(fT)
             @test cosd(convert(T,270))::fT === zero(fT)
             @test cosd(convert(T,-90))::fT === zero(fT)
             @test cosd(convert(T,-270))::fT === zero(fT)
+            if T <: AbstractFloat
+                @test isnan(cosd(T(NaN)))
+            end
         end
         @testset "sincosd" begin
             @test sincosd(convert(T,-360))::fTsc === ( -zero(fT),  one(fT) )
@@ -406,6 +452,10 @@ end
             @test sincosd(convert(T,  90))::fTsc === (   one(fT), zero(fT) )
             @test sincosd(convert(T, 180))::fTsc === (  zero(fT), -one(fT) )
             @test sincosd(convert(T, 270))::fTsc === (  -one(fT), zero(fT) )
+            if T <: AbstractFloat
+                @test_throws DomainError sincosd(T(Inf))
+                @test all(isnan.(sincosd(T(NaN))))
+            end
         end
 
         @testset "$name" for (name, (sinpi, cospi)) in (
@@ -521,6 +571,27 @@ end
     end
 end
 
+@testset "half-integer and nan/infs for sincospi,sinpi,cospi" begin
+    @testset for T in (ComplexF32, ComplexF64)
+        @test sincospi(T(0.5, 0.0)) == (T(1.0,0.0), T(0.0, -0.0))
+        @test sincospi(T(1.5, 0.0)) == (T(-1.0,0.0), T(0.0, 0.0))
+        @test sinpi(T(1.5, 1.5)) ≈ T(-cosh(3*π/2), 0.0)
+        @test cospi(T(0.5, 0.5)) ≈ T(0.0, -sinh(π/2))
+        s, c = sincospi(T(Inf64, 0.0))
+        @test isnan(real(s)) && imag(s) == zero(real(T))
+        @test isnan(real(c)) && imag(c) == -zero(real(T))
+        s, c = sincospi(T(NaN, 0.0))
+        @test isnan(real(s)) && imag(s) == zero(real(T))
+        @test isnan(real(c)) && imag(c) == zero(real(T))
+        s, c = sincospi(T(NaN, Inf64))
+        @test isnan(real(s)) && isinf(imag(s))
+        @test isinf(real(c)) && isnan(imag(c))
+        s, c = sincospi(T(NaN, 2))
+        @test isnan(real(s)) && isnan(imag(s))
+        @test isnan(real(c)) && isnan(imag(c))
+    end
+end
+
 @testset "trig function type stability" begin
     @testset "$T $f" for T = (Float32,Float64,BigFloat,Rational{Int16},Complex{Int32},ComplexF16), f = (sind,cosd,sinpi,cospi)
         @test Base.return_types(f,Tuple{T}) == [float(T)]
@@ -582,14 +653,14 @@ end
 end
 
 @testset "modf" begin
-    @testset "$elty" for elty in (Float16, Float32, Float64)
-        @test modf( convert(elty,1.2) )[1] ≈ convert(elty,0.2)
-        @test modf( convert(elty,1.2) )[2] ≈ convert(elty,1.0)
-        @test modf( convert(elty,1.0) )[1] ≈ convert(elty,0.0)
-        @test modf( convert(elty,1.0) )[2] ≈ convert(elty,1.0)
-        @test isequal(modf( convert(elty,-Inf) ), (-0.0, -Inf))
-        @test isequal(modf( convert(elty,Inf) ), (0.0, Inf))
-        @test isequal(modf( convert(elty,NaN) ), (NaN, NaN))
+    @testset "$T" for T in (Float16, Float32, Float64)
+        @test modf(T(1.25)) === (T(0.25), T(1.0))
+        @test modf(T(1.0))  === (T(0.0), T(1.0))
+        @test modf(T(-Inf)) === (T(-0.0), T(-Inf))
+        @test modf(T(Inf))  === (T(0.0), T(Inf))
+        @test modf(T(NaN))  === (T(NaN), T(NaN))
+        @test modf(T(-0.0)) === (T(-0.0), T(-0.0))
+        @test modf(T(-1.0)) === (T(-0.0), T(-1.0))
     end
 end
 
@@ -646,6 +717,17 @@ end
         @test isnan_type(T, log1p(T(NaN)))
         @test_throws DomainError log1p(-2*one(T))
     end
+    @testset "log of subnormals" begin
+        # checked results with WolframAlpha
+        for (T, lr) in ((Float32, LinRange(2.f0^(-129), 2.f0^(-128), 1000)),
+                        (Float64, LinRange(2.0^(-1025), 2.0^(-1024), 1000)))
+            for x in lr
+                @test log(x)   ≈ T(log(widen(x))) rtol=2eps(T)
+                @test log2(x)  ≈ T(log2(widen(x))) rtol=2eps(T)
+                @test log10(x) ≈ T(log10(widen(x))) rtol=2eps(T)
+            end
+        end
+    end
 end
 
 @testset "vectorization of 2-arg functions" begin
@@ -709,6 +791,8 @@ end
     @test sincos(big(1.0)) == (sin(big(1.0)), cos(big(1.0)))
     @test sincos(NaN) === (NaN, NaN)
     @test sincos(NaN32) === (NaN32, NaN32)
+    @test_throws DomainError sincos(Inf32)
+    @test_throws DomainError sincos(Inf64)
 end
 
 @testset "test fallback definitions" begin
@@ -991,9 +1075,11 @@ end
         @test isnan_type(T, tanh(T(NaN)))
         for x in Iterators.flatten(pcnfloat.([H_SMALL_X(T), T(1.0), H_MEDIUM_X(T)]))
             @test tanh(x) ≈ tanh(big(x)) rtol=eps(T)
-            @test tanh(-x) ≈ tanh(big(-x)) rtol=eps(T)
+            @test tanh(-x) ≈ -tanh(big(x)) rtol=eps(T)
         end
     end
+    @test tanh(18.0) ≈ tanh(big(18.0)) rtol=eps(Float64)
+    @test tanh(8.0) ≈ tanh(big(8.0)) rtol=eps(Float32)
 end
 
 @testset "asinh" begin
@@ -1076,6 +1162,19 @@ float(x::FloatWrapper) = x
     @test isa(cos(z), Complex)
 end
 
+# Define simple wrapper of a Float type:
+struct FloatWrapper2 <: Real
+    x::Float64
+end
+
+float(x::FloatWrapper2) = x.x
+@testset "inverse hyperbolic trig functions of non-standard float" begin
+    x = FloatWrapper2(3.1)
+    @test asinh(sinh(x)) == asinh(sinh(3.1))
+    @test acosh(cosh(x)) == acosh(cosh(3.1))
+    @test atanh(tanh(x)) == atanh(tanh(3.1))
+end
+
 @testset "cbrt" begin
     for T in (Float32, Float64)
         @test cbrt(zero(T)) === zero(T)
@@ -1107,8 +1206,234 @@ end
 
     isdefined(Main, :Furlongs) || @eval Main include("testhelpers/Furlongs.jl")
     using .Main.Furlongs
-    @test hypot(Furlong(0), Furlong(0)) == Furlong(0.0)
-    @test hypot(Furlong(3), Furlong(4)) == Furlong(5.0)
-    @test hypot(Complex(3), Complex(4)) === 5.0
-    @test hypot(Complex(6, 8), Complex(8, 6)) === 10.0*sqrt(2)
+    @test (@inferred hypot(Furlong(0), Furlong(0))) == Furlong(0.0)
+    @test (@inferred hypot(Furlong(3), Furlong(4))) == Furlong(5.0)
+    @test (@inferred hypot(Furlong(NaN), Furlong(Inf))) == Furlong(Inf)
+    @test (@inferred hypot(Furlong(Inf), Furlong(NaN))) == Furlong(Inf)
+    @test (@inferred hypot(Furlong(0), Furlong(0), Furlong(0))) == Furlong(0.0)
+    @test (@inferred hypot(Furlong(Inf), Furlong(Inf))) == Furlong(Inf)
+    @test (@inferred hypot(Furlong(1), Furlong(1), Furlong(1))) == Furlong(sqrt(3))
+    @test (@inferred hypot(Furlong(Inf), Furlong(NaN), Furlong(0))) == Furlong(Inf)
+    @test (@inferred hypot(Furlong(Inf), Furlong(Inf), Furlong(Inf))) == Furlong(Inf)
+    @test isnan(hypot(Furlong(NaN), Furlong(0), Furlong(1)))
+    ex = @test_throws ErrorException hypot(Furlong(1), 1)
+    @test startswith(ex.value.msg, "promotion of types ")
+
+    @test_throws MethodError hypot()
+    @test (@inferred hypot(floatmax())) == floatmax()
+    @test (@inferred hypot(floatmax(), floatmax())) == Inf
+    @test (@inferred hypot(floatmin(), floatmin())) == √2floatmin()
+    @test (@inferred hypot(floatmin(), floatmin(), floatmin())) == √3floatmin()
+    @test (@inferred hypot(1e-162)) ≈ 1e-162
+    @test (@inferred hypot(2e-162, 1e-162, 1e-162)) ≈ hypot(2, 1, 1)*1e-162
+    @test (@inferred hypot(1e162)) ≈ 1e162
+    @test hypot(-2) === 2.0
+    @test hypot(-2, 0) === 2.0
+    let i = typemax(Int)
+        @test (@inferred hypot(i, i)) ≈ i * √2
+        @test (@inferred hypot(i, i, i)) ≈ i * √3
+        @test (@inferred hypot(i, i, i, i)) ≈ 2.0i
+        @test (@inferred hypot(i//1, 1//i, 1//i)) ≈ i
+    end
+    let i = typemin(Int)
+        @test (@inferred hypot(i, i)) ≈ -√2i
+        @test (@inferred hypot(i, i, i)) ≈ -√3i
+        @test (@inferred hypot(i, i, i, i)) ≈ -2.0i
+    end
+    @testset "$T" for T in (Float32, Float64)
+        @test (@inferred hypot(T(Inf), T(NaN))) == T(Inf) # IEEE754 says so
+        @test (@inferred hypot(T(Inf), T(3//2), T(NaN))) == T(Inf)
+        @test (@inferred hypot(T(1e10), T(1e10), T(1e10), T(1e10))) ≈ 2e10
+        @test isnan_type(T, hypot(T(3), T(3//4), T(NaN)))
+        @test hypot(T(1), T(0)) === T(1)
+        @test hypot(T(1), T(0), T(0)) === T(1)
+        @test (@inferred hypot(T(Inf), T(Inf), T(Inf))) == T(Inf)
+        for s in (zero(T), floatmin(T)*1e3, floatmax(T)*1e-3, T(Inf))
+            @test hypot(1s, 2s)     ≈ s * hypot(1, 2)   rtol=8eps(T)
+            @test hypot(1s, 2s, 3s) ≈ s * hypot(1, 2, 3) rtol=8eps(T)
+        end
+    end
+    @testset "$T" for T in (Float16, Float32, Float64, BigFloat)
+        let x = 1.1sqrt(floatmin(T))
+            @test (@inferred hypot(x, x/4)) ≈ x * sqrt(17/BigFloat(16))
+            @test (@inferred hypot(x, x/4, x/4)) ≈ x * sqrt(9/BigFloat(8))
+        end
+        let x = 2sqrt(nextfloat(zero(T)))
+            @test (@inferred hypot(x, x/4)) ≈ x * sqrt(17/BigFloat(16))
+            @test (@inferred hypot(x, x/4, x/4)) ≈ x * sqrt(9/BigFloat(8))
+        end
+        let x = sqrt(nextfloat(zero(T))/eps(T))/8, f = sqrt(4eps(T))
+            @test hypot(x, x*f) ≈ x * hypot(one(f), f) rtol=eps(T)
+            @test hypot(x, x*f, x*f) ≈ x * hypot(one(f), f, f) rtol=eps(T)
+        end
+        let x = floatmax(T)/2
+            @test (@inferred hypot(x, x/4)) ≈ x * sqrt(17/BigFloat(16))
+            @test (@inferred hypot(x, x/4, x/4)) ≈ x * sqrt(9/BigFloat(8))
+        end
+    end
+    # hypot on Complex returns Real
+    @test (@inferred hypot(3, 4im)) === 5.0
+    @test (@inferred hypot(3, 4im, 12)) === 13.0
+end
+
+struct BadFloatWrapper <: AbstractFloat
+    x::Float64
+end
+
+@testset "not impelemented errors" begin
+    x = BadFloatWrapper(1.9)
+    for f in (sin, cos, tan, sinh, cosh, tanh, atan, acos, asin, asinh, acosh, atanh, exp, log1p, expm1, log) #exp2, exp10 broken for now
+        @test_throws MethodError f(x)
+    end
+end
+
+@testset "fma" begin
+    fma_list = (fma, Base.fma_emulated)
+    if !(Sys.islinux() && Int == Int32) # test runtime fma (skip linux32)
+        fma_list = (fma_list..., Base.fma_float)
+    end
+    for func in fma_list
+        @test func(nextfloat(1.),nextfloat(1.),-1.0) === 4.440892098500626e-16
+        @test func(nextfloat(1f0),nextfloat(1f0),-1f0) === 2.3841858f-7
+        @testset "$T" for T in (Float32, Float64)
+            @test func(floatmax(T), T(2), -floatmax(T)) === floatmax(T)
+            @test func(floatmax(T), T(1), eps(floatmax((T)))) === T(Inf)
+            @test func(T(Inf), T(Inf), T(Inf)) === T(Inf)
+            @test func(floatmax(T), floatmax(T), -T(Inf)) === -T(Inf)
+            @test func(floatmax(T), -floatmax(T), T(Inf)) === T(Inf)
+            @test isnan_type(T, func(T(Inf), T(1), -T(Inf)))
+            @test isnan_type(T, func(T(Inf), T(0), -T(0)))
+            @test func(-zero(T), zero(T), -zero(T)) === -zero(T)
+            for _ in 1:2^18
+                a, b, c = reinterpret.(T, rand(Base.uinttype(T), 3))
+                @test isequal(func(a, b, c), fma(a, b, c)) || (a,b,c)
+            end
+        end
+        @test func(floatmax(Float64), nextfloat(1.0), -floatmax(Float64)) === 3.991680619069439e292
+        @test func(floatmax(Float32), nextfloat(1f0), -floatmax(Float32)) === 4.0564817f31
+        @test func(1.6341681540852291e308, -2., floatmax(Float64)) == -1.4706431733081426e308 # case where inv(a)*c*a == Inf
+        @test func(-2., 1.6341681540852291e308, floatmax(Float64)) == -1.4706431733081426e308 # case where inv(b)*c*b == Inf
+        @test func(-1.9369631f13, 2.1513551f-7, -1.7354427f-24) == -4.1670958f6
+    end
+end
+
+@testset "pow" begin
+    for T in (Float16, Float32, Float64)
+        for x in (0.0, -0.0, 1.0, 10.0, 2.0, Inf, NaN, -Inf, -NaN)
+            for y in (0.0, -0.0, 1.0, -3.0,-10.0 , Inf, NaN, -Inf, -NaN)
+                got, expected = T(x)^T(y), T(big(x))^T(y)
+                @test isnan_type(T, got) && isnan_type(T, expected) || (got === expected)
+            end
+        end
+        for _ in 1:2^16
+            x=rand(T)*100; y=rand(T)*200-100
+            got, expected = x^y, widen(x)^y
+            if isfinite(eps(T(expected)))
+                @test abs(expected-got) <= 1.3*eps(T(expected)) || (x,y)
+            end
+        end
+        for _ in 1:2^10
+            x=rand(T)*floatmin(T); y=rand(T)*2-1
+            got, expected = x^y, widen(x)^y
+            if isfinite(eps(T(expected)))
+                @test abs(expected-got) <= 1.3*eps(T(expected)) || (x,y)
+            end
+        end
+        # test (-x)^y for y larger than typemax(Int)
+        @test T(-1)^floatmax(T) === T(1)
+        @test prevfloat(T(-1))^floatmax(T) === T(Inf)
+        @test nextfloat(T(-1))^floatmax(T) === T(0.0)
+    end
+    # test for large negative exponent where error compensation matters
+    @test 0.9999999955206014^-1.0e8 == 1.565084574870928
+end
+
+# Test that sqrt behaves correctly and doesn't exhibit fp80 double rounding.
+# This happened on old glibc versions.
+# Test case from https://sourceware.org/bugzilla/show_bug.cgi?id=14032.
+@testset "sqrt double rounding" begin
+    testdata = [
+        (0x1.fffffffffffffp+1023, 0x1.fffffffffffffp+511),
+        (0x1.ffffffffffffbp+1023, 0x1.ffffffffffffdp+511),
+        (0x1.ffffffffffff7p+1023, 0x1.ffffffffffffbp+511),
+        (0x1.ffffffffffff3p+1023, 0x1.ffffffffffff9p+511),
+        (0x1.fffffffffffefp+1023, 0x1.ffffffffffff7p+511),
+        (0x1.fffffffffffebp+1023, 0x1.ffffffffffff5p+511),
+        (0x1.fffffffffffe7p+1023, 0x1.ffffffffffff3p+511),
+        (0x1.fffffffffffe3p+1023, 0x1.ffffffffffff1p+511),
+        (0x1.fffffffffffdfp+1023, 0x1.fffffffffffefp+511),
+        (0x1.fffffffffffdbp+1023, 0x1.fffffffffffedp+511),
+        (0x1.fffffffffffd7p+1023, 0x1.fffffffffffebp+511),
+        (0x1.0000000000003p-1022, 0x1.0000000000001p-511),
+        (0x1.0000000000007p-1022, 0x1.0000000000003p-511),
+        (0x1.000000000000bp-1022, 0x1.0000000000005p-511),
+        (0x1.000000000000fp-1022, 0x1.0000000000007p-511),
+        (0x1.0000000000013p-1022, 0x1.0000000000009p-511),
+        (0x1.0000000000017p-1022, 0x1.000000000000bp-511),
+        (0x1.000000000001bp-1022, 0x1.000000000000dp-511),
+        (0x1.000000000001fp-1022, 0x1.000000000000fp-511),
+        (0x1.0000000000023p-1022, 0x1.0000000000011p-511),
+        (0x1.0000000000027p-1022, 0x1.0000000000013p-511),
+        (0x1.000000000002bp-1022, 0x1.0000000000015p-511),
+        (0x1.000000000002fp-1022, 0x1.0000000000017p-511),
+        (0x1.0000000000033p-1022, 0x1.0000000000019p-511),
+        (0x1.0000000000037p-1022, 0x1.000000000001bp-511),
+        (0x1.7167bc36eaa3bp+6, 0x1.3384c7db650cdp+3),
+        (0x1.7570994273ad7p+6, 0x1.353186e89b8ffp+3),
+        (0x1.7dae969442fe6p+6, 0x1.389640fb18b75p+3),
+        (0x1.7f8444fcf67e5p+6, 0x1.395659e94669fp+3),
+        (0x1.8364650e63a54p+6, 0x1.3aea9efe1a3d7p+3),
+        (0x1.85bedd274edd8p+6, 0x1.3bdf20c867057p+3),
+        (0x1.8609cf496ab77p+6, 0x1.3bfd7e14b5eabp+3),
+        (0x1.873849c70a375p+6, 0x1.3c77ed341d27fp+3),
+        (0x1.8919c962cbaaep+6, 0x1.3d3a7113ee82fp+3),
+        (0x1.8de4493e22dc6p+6, 0x1.3f27d448220c3p+3),
+        (0x1.924829a17a288p+6, 0x1.40e9552eec28fp+3),
+        (0x1.92702cd992f12p+6, 0x1.40f94a6fdfddfp+3),
+        (0x1.92b763a8311fdp+6, 0x1.4115af614695fp+3),
+        (0x1.947da013c7293p+6, 0x1.41ca91102940fp+3),
+        (0x1.9536091c494d2p+6, 0x1.4213e334c77adp+3),
+        (0x1.61b04c6p-1019, 0x1.a98b88f18b46dp-510),
+        (0x1.93789f1p-1018, 0x1.4162ae43d5821p-509),
+        (0x1.a1989b4p-1018, 0x1.46f6736eb44bbp-509),
+        (0x1.f93bc9p-1018, 0x1.67a36ec403bafp-509),
+        (0x1.2f675e3p-1017, 0x1.8a22ab6dcfee1p-509),
+        (0x1.a158508p-1017, 0x1.ce418a96cf589p-509),
+        (0x1.cd31f078p-1017, 0x1.e5ef1c65dccebp-509),
+        (0x1.33b43b08p-1016, 0x1.18a9f607e1701p-508),
+        (0x1.6e66a858p-1016, 0x1.324402a00b45fp-508),
+        (0x1.8661cbf8p-1016, 0x1.3c212046bfdffp-508),
+        (0x1.bbb221b4p-1016, 0x1.510681b939931p-508),
+        (0x1.c4942f3cp-1016, 0x1.5461e59227ab5p-508),
+        (0x1.dbb258c8p-1016, 0x1.5cf7b0f78d3afp-508),
+        (0x1.57103ea4p-1015, 0x1.a31ab946d340bp-508),
+        (0x1.9b294f88p-1015, 0x1.cad197e28e85bp-508),
+        (0x1.0000000000001p+0, 0x1p+0),
+        (0x1.fffffffffffffp-1, 0x1.fffffffffffffp-1),
+    ]
+    for (x,y) in testdata
+        # Runtime version
+        @test sqrt(x) === y
+        # Interpreter compile-time version
+        @test Base.invokelatest((@eval ()->sqrt(Base.inferencebarrier($x)))) == y
+        # Inference const-prop version
+        @test Base.invokelatest((@eval ()->sqrt($x))) == y
+        # LLVM constant folding version
+        @test Base.invokelatest((@eval ()->(@force_compile; sqrt(Base.inferencebarrier($x))))) == y
+    end
+end
+
+# Test inference of x^0.0 (tested here because
+# it requires annotations in the math code. If
+# the compiler ever gets good enough to figure
+# that out by itself, move this to inference).
+@test code_typed(x->Val{x^0.0}(), Tuple{Float64})[1][2] == Val{1.0}
+
+function f44336()
+    as = ntuple(_ -> rand(), Val(32))
+    @inline hypot(as...)
+end
+@testset "Issue #44336" begin
+    f44336()
+    @test (@allocated f44336()) == 0
 end
diff --git a/test/meta.jl b/test/meta.jl
index eafa1dd04b1628..5bdb988f41b6da 100644
--- a/test/meta.jl
+++ b/test/meta.jl
@@ -123,6 +123,16 @@ using Base.Meta
 @test isexpr(:(1+1),(:call,))
 @test isexpr(1,:call)==false
 @test isexpr(:(1+1),:call,3)
+
+let
+    fakeline = LineNumberNode(100000,"A")
+    # Interop with __LINE__
+    @test macroexpand(@__MODULE__, replace_sourceloc!(fakeline, :(@__LINE__))) == fakeline.line
+    # replace_sourceloc! should recurse:
+    @test replace_sourceloc!(fakeline, :((@a) + 1)).args[2].args[2] == fakeline
+    @test replace_sourceloc!(fakeline, :(@a @b)).args[3].args[2] == fakeline
+end
+
 ioB = IOBuffer()
 show_sexpr(ioB,:(1+1))
 
@@ -210,3 +220,50 @@ let a = 1
     @test !macroexpand(@__MODULE__, :(@is_dollar_expr $a))
     @test @macroexpand @is_dollar_expr $a
 end
+
+@test Meta.parseatom("@foo", 1, filename=:bar)[1].args[2].file == :bar
+@test Meta.parseall("@foo", filename=:bar).args[1].file == :bar
+
+_lower(m::Module, ex, world::UInt) = ccall(:jl_expand_in_world, Any, (Any, Ref{Module}, Cstring, Cint, Csize_t), ex, m, "none", 0, world)
+
+module TestExpandInWorldModule
+macro m() 1 end
+wa = Base.get_world_counter()
+macro m() 2 end
+end
+
+@test _lower(TestExpandInWorldModule, :(@m), TestExpandInWorldModule.wa) == 1
+
+f(::T) where {T} = T
+ci = code_lowered(f, Tuple{Int})[1]
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(f),Int}, Any[Int], 0, 0, :propagate) ==
+    Any[Core.ReturnNode(QuoteNode(Int))]
+
+g(::Val{x}) where {x} = x ? 1 : 0
+ci = code_lowered(g, Tuple{Val{true}})[1]
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 0, :propagate)[1] ==
+   Core.GotoIfNot(QuoteNode(true), 3)
+@test Meta.partially_inline!(ci.code, [], Tuple{typeof(g),Val{true}}, Any[true], 0, 2, :propagate)[1] ==
+   Core.GotoIfNot(QuoteNode(true), 5)
+
+@testset "inlining with isdefined" begin
+    isdefined_slot(x) = @isdefined(x)
+    ci = code_lowered(isdefined_slot, Tuple{Int})[1]
+    @test Meta.partially_inline!(copy(ci.code), [], Tuple{typeof(isdefined_slot), Int},
+                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, Core.SlotNumber(2))
+    @test Meta.partially_inline!(copy(ci.code), [isdefined_slot, 1], Tuple{typeof(isdefined_slot), Int},
+                                 [], 0, 0, :propagate)[1] == true
+
+    isdefined_sparam(::T) where {T} = @isdefined(T)
+    ci = code_lowered(isdefined_sparam, Tuple{Int})[1]
+    @test Meta.partially_inline!(copy(ci.code), [], Tuple{typeof(isdefined_sparam), Int},
+                                 Any[Int], 0, 0, :propagate)[1] == true
+    @test Meta.partially_inline!(copy(ci.code), [], Tuple{typeof(isdefined_sparam), Int},
+                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, Expr(:static_parameter, 1))
+
+    @eval isdefined_globalref(x) = $(Expr(:isdefined, GlobalRef(Base, :foo)))
+    ci = code_lowered(isdefined_globalref, Tuple{Int})[1]
+    @test Meta.partially_inline!(copy(ci.code), Any[isdefined_globalref, 1], Tuple{typeof(isdefined_globalref), Int},
+                                 [], 0, 0, :propagate)[1] == Expr(:isdefined, GlobalRef(Base, :foo))
+
+end
diff --git a/test/misc.jl b/test/misc.jl
index e5fc2f461643c5..7e1ab36a841f50 100644
--- a/test/misc.jl
+++ b/test/misc.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 isdefined(Main, :FakePTYs) || @eval Main include("testhelpers/FakePTYs.jl")
+include("testhelpers/withlocales.jl")
 
 # Tests that do not really go anywhere else
 
@@ -99,9 +100,12 @@ let
         """
 end
 
+# Debugging tool: return the current state of the enable_finalizers counter.
+get_finalizers_inhibited() = ccall(:jl_gc_get_finalizers_inhibited, Int32, (Ptr{Cvoid},), C_NULL)
+
 # lock / unlock
 let l = ReentrantLock()
-    lock(l)
+    @test lock(l) === nothing
     @test islocked(l)
     success = Ref(false)
     @test trylock(l) do
@@ -114,14 +118,87 @@ let l = ReentrantLock()
     @test success[]
     t = @async begin
         @test trylock(l) do
-            @test false
+            error("unreachable")
         end === false
     end
+    @test get_finalizers_inhibited() == 1
     Base.wait(t)
-    unlock(l)
+    @test get_finalizers_inhibited() == 1
+    @test unlock(l) === nothing
+    @test get_finalizers_inhibited() == 0
     @test_throws ErrorException unlock(l)
 end
 
+for l in (Threads.SpinLock(), ReentrantLock())
+    @test get_finalizers_inhibited() == 0
+    @test lock(get_finalizers_inhibited, l) == 1
+    @test get_finalizers_inhibited() == 0
+    try
+        GC.enable_finalizers(false)
+        GC.enable_finalizers(false)
+        @test get_finalizers_inhibited() == 2
+        GC.enable_finalizers(true)
+        @test get_finalizers_inhibited() == 1
+    finally
+        @test get_finalizers_inhibited() == 1
+        GC.enable_finalizers(false)
+        @test get_finalizers_inhibited() == 2
+    end
+    @test get_finalizers_inhibited() == 2
+    GC.enable_finalizers(true)
+    @test get_finalizers_inhibited() == 1
+    GC.enable_finalizers(true)
+    @test get_finalizers_inhibited() == 0
+    if ccall(:jl_is_debugbuild, Cint, ()) != 0
+        # Note this warning only exists in debug builds
+        @test_warn "WARNING: GC finalizers already enabled on this thread." GC.enable_finalizers(true)
+    end
+
+    @test lock(l) === nothing
+    @test try unlock(l) finally end === nothing
+end
+
+@testset "Semaphore" begin
+    sem_size = 2
+    n = 100
+    s = Base.Semaphore(sem_size)
+
+    # explicit acquire-release form
+    clock = Threads.Atomic{Int}(1)
+    occupied = Threads.Atomic{Int}(0)
+    history = fill!(Vector{Int}(undef, 2n), -1)
+    @sync for _ in 1:n
+        @async begin
+            Base.acquire(s)
+            history[Threads.atomic_add!(clock, 1)] = Threads.atomic_add!(occupied, 1) + 1
+            sleep(rand(0:0.01:0.1))
+            history[Threads.atomic_add!(clock, 1)] = Threads.atomic_sub!(occupied, 1) - 1
+            Base.release(s)
+        end
+    end
+    @test all(<=(sem_size), history)
+    @test all(>=(0), history)
+    @test history[end] == 0
+
+    # do-block syntax
+    clock = Threads.Atomic{Int}(1)
+    occupied = Threads.Atomic{Int}(0)
+    history = fill!(Vector{Int}(undef, 2n), -1)
+    @sync for _ in 1:n
+        @async begin
+            @test Base.acquire(s) do
+                history[Threads.atomic_add!(clock, 1)] = Threads.atomic_add!(occupied, 1) + 1
+                sleep(rand(0:0.01:0.1))
+                history[Threads.atomic_add!(clock, 1)] = Threads.atomic_sub!(occupied, 1) - 1
+                return :resultvalue
+            end == :resultvalue
+        end
+    end
+    @test all(<=(sem_size), history)
+    @test all(>=(0), history)
+    @test history[end] == 0
+end
+
 # task switching
 
 @noinline function f6597(c)
@@ -146,6 +223,17 @@ end
 
 @test_throws ErrorException("deadlock detected: cannot wait on current task") wait(current_task())
 
+# issue #41347
+let t = @async 1
+    wait(t)
+    @test_throws ErrorException yield(t)
+end
+
+let t = @async error(42)
+    Base._wait(t)
+    @test_throws ErrorException("42") yieldto(t)
+end
+
 # test that @sync is lexical (PR #27164)
 
 const x27164 = Ref(0)
@@ -195,6 +283,77 @@ v11801, t11801 = @timed sin(1)
 
 @test names(@__MODULE__, all = true) == names_before_timing
 
+redirect_stdout(devnull) do # suppress time prints
+# Accepted @time argument formats
+@test @time true
+@test @time "message" true
+let msg = "message"
+    @test @time msg true
+end
+let foo() = "message"
+    @test @time foo() true
+end
+
+# Accepted @timev argument formats
+@test @timev true
+@test @timev "message" true
+let msg = "message"
+    @test @timev msg true
+end
+let foo() = "message"
+    @test @timev foo() true
+end
+
+# @showtime
+@test @showtime true
+let foo() = true
+    @test @showtime foo()
+end
+let foo() = false
+    @test (@showtime foo()) == false
+end
+
+# PR #39133, ensure that @time evaluates in the same scope
+function time_macro_scope()
+    try # try/throw/catch bypasses printing
+        @time (time_macro_local_var = 1; throw("expected"))
+        return time_macro_local_var
+    catch ex
+        ex === "expected" || rethrow()
+    end
+end
+@test time_macro_scope() == 1
+
+function timev_macro_scope()
+    try # try/throw/catch bypasses printing
+        @timev (time_macro_local_var = 1; throw("expected"))
+        return time_macro_local_var
+    catch ex
+        ex === "expected" || rethrow()
+    end
+end
+@test timev_macro_scope() == 1
+
+before_comp, before_recomp = Base.cumulative_compile_time_ns() # no need to turn timing on, @time will do that
+
+# exercise concurrent calls to `@time` for reentrant compilation time measurement.
+@sync begin
+    t1 = @async @time begin
+        sleep(2)
+        @eval module M ; f(x,y) = x+y ; end
+        @eval M.f(2,3)
+    end
+    t2 = @async begin
+        sleep(1)
+        @time 2 + 2
+    end
+end
+
+after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that
+@test after_comp >= before_comp;
+
+end # redirect_stdout
+
 # interactive utilities
 
 struct ambigconvert; end # inject a problematic `convert` method to ensure it still works
@@ -234,16 +393,14 @@ let vec = vcat(missing, ones(100000))
     @test length(unique(summarysize(vec) for i = 1:20)) == 1
 end
 
-# issue #13021
-let ex = try
-    Main.x13021 = 0
-    nothing
-catch ex
-    ex
-end
-    @test isa(ex, ErrorException) && ex.msg == "cannot assign variables in other modules"
+# issue #40773
+let s = Set(1:100)
+    @test summarysize([s]) > summarysize(s)
 end
 
+# issue #44780
+@test summarysize(BigInt(2)^1000) > summarysize(BigInt(2))
+
 ## test conversion from UTF-8 to UTF-16 (for Windows APIs)
 
 # empty arrays
@@ -274,10 +431,10 @@ V8 = [
     ([0xe1,0x88,0xb4],[0x1234])
     ([0xea,0xaf,0x8d],[0xabcd])
     ([0xed,0x9f,0xbf],[0xd7ff])
-    ([0xed,0xa0,0x80],[0xd800]) # invalid code point – high surrogate
-    ([0xed,0xaf,0xbf],[0xdbff]) # invalid code point – high surrogate
-    ([0xed,0xb0,0x80],[0xdc00]) # invalid code point – low surrogate
-    ([0xed,0xbf,0xbf],[0xdfff]) # invalid code point – low surrogate
+    ([0xed,0xa0,0x80],[0xd800]) # invalid code point – high surrogate
+    ([0xed,0xaf,0xbf],[0xdbff]) # invalid code point – high surrogate
+    ([0xed,0xb0,0x80],[0xdc00]) # invalid code point – low surrogate
+    ([0xed,0xbf,0xbf],[0xdfff]) # invalid code point – low surrogate
     ([0xee,0x80,0x80],[0xe000])
     ([0xef,0xbf,0xbf],[0xffff])
     # 4-byte
@@ -427,7 +584,7 @@ end
 
 let optstring = repr("text/plain", Base.JLOptions())
     @test startswith(optstring, "JLOptions(\n")
-    @test !occursin("Ptr", optstring)
+    @test !occursin("Ptr{UInt8}", optstring)
     @test endswith(optstring, "\n)")
     @test occursin(" = \"", optstring)
 end
@@ -435,7 +592,7 @@ let optstring = repr(Base.JLOptions())
     @test startswith(optstring, "JLOptions(")
     @test endswith(optstring, ")")
     @test !occursin("\n", optstring)
-    @test !occursin("Ptr", optstring)
+    @test !occursin("Ptr{UInt8}", optstring)
     @test occursin(" = \"", optstring)
 end
 
@@ -564,6 +721,26 @@ let buf = IOBuffer()
     # Check that boldness is turned off
     printstyled(buf_color, "foo"; bold=true, color=:red)
     @test String(take!(buf)) == "\e[31m\e[1mfoo\e[22m\e[39m"
+
+    # Check that underline is turned off
+    printstyled(buf_color, "foo"; color = :red, underline = true)
+    @test String(take!(buf)) == "\e[31m\e[4mfoo\e[24m\e[39m"
+
+    # Check that blink is turned off
+    printstyled(buf_color, "foo"; color = :red, blink = true)
+    @test String(take!(buf)) == "\e[31m\e[5mfoo\e[25m\e[39m"
+
+    # Check that reverse is turned off
+    printstyled(buf_color, "foo"; color = :red, reverse = true)
+    @test String(take!(buf)) == "\e[31m\e[7mfoo\e[27m\e[39m"
+
+    # Check that hidden is turned off
+    printstyled(buf_color, "foo"; color = :red, hidden = true)
+    @test String(take!(buf)) == "\e[31m\e[8mfoo\e[28m\e[39m"
+
+    # Check that all options can be turned on simultaneously
+    printstyled(buf_color, "foo"; color = :red, bold = true, underline = true, blink = true, reverse = true, hidden = true)
+    @test String(take!(buf)) == "\e[31m\e[1m\e[4m\e[5m\e[7m\e[8mfoo\e[28m\e[27m\e[25m\e[24m\e[22m\e[39m"
 end
 
 abstract type DA_19281{T, N} <: AbstractArray{T, N} end
@@ -646,6 +823,71 @@ let foo() = begin
     @test foo() == 1
 end
 
+module atinvokelatest
+f(x) = 1
+g(x, y; z=0) = x * y + z
+end
+
+let foo() = begin
+        @eval atinvokelatest.f(x::Int) = 3
+        return Base.@invokelatest atinvokelatest.f(0)
+    end
+    @test foo() == 3
+end
+
+let foo() = begin
+        @eval atinvokelatest.f(x::Int) = 3
+        return Base.@invokelatest atinvokelatest.f(0)
+    end
+    @test foo() == 3
+
+    bar() = begin
+        @eval atinvokelatest.g(x::Int, y::Int; z=3) = z
+        return Base.@invokelatest atinvokelatest.g(2, 3; z=1)
+    end
+    @test bar() == 1
+end
+
+@testset "@invoke macro" begin
+    # test against `invoke` doc example
+    let
+        f(x::Real) = x^2
+        f(x::Integer) = 1 + Base.@invoke f(x::Real)
+        @test f(2) == 5
+    end
+
+    let
+        f1(::Integer) = Integer
+        f1(::Real) = Real;
+        f2(x::Real) = _f2(x)
+        _f2(::Integer) = Integer
+        _f2(_) = Real
+        @test f1(1) === Integer
+        @test f2(1) === Integer
+        @test Base.@invoke(f1(1::Real)) === Real
+        @test Base.@invoke(f2(1::Real)) === Integer
+    end
+
+    # when argment's type annotation is omitted, it should be specified as `Any`
+    let
+        f(_) = Any
+        f(x::Integer) = Integer
+        @test f(1) === Integer
+        @test Base.@invoke(f(1::Any)) === Any
+        @test Base.@invoke(f(1)) === Any
+    end
+
+    # handle keyword arguments correctly
+    let
+        f(a; kw1 = nothing, kw2 = nothing) = a + max(kw1, kw2)
+        f(::Integer; kwargs...) = error("don't call me")
+
+        @test_throws Exception f(1; kw1 = 1, kw2 = 2)
+        @test 3 == Base.@invoke f(1::Any; kw1 = 1, kw2 = 2)
+        @test 3 == Base.@invoke f(1; kw1 = 1, kw2 = 2)
+    end
+end
+
 # Endian tests
 # For now, we only support little endian.
 # Add an `Sys.ARCH` test for big endian when/if we add support for that.
@@ -697,36 +939,19 @@ end
 
 # issue #27239
 @testset "strftime tests issue #27239" begin
-
-    # save current locales
-    locales = Dict()
-    for cat in 0:9999
-        cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL)
-        if cstr != C_NULL
-            locales[cat] = unsafe_string(cstr)
-        end
-    end
-
     # change to non-Unicode Korean
-    for (cat, _) in locales
-        korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
-        for lc in korloc
-            cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
-        end
-    end
-
-    # system dependent formats
-    timestr_c = Libc.strftime(0.0)
-    timestr_aAbBpZ = Libc.strftime("%a %A %b %B %p %Z", 0)
-
-    # recover locales
-    for (cat, lc) in locales
-        cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
+    korloc = ["ko_KR.EUC-KR", "ko_KR.CP949", "ko_KR.949", "Korean_Korea.949"]
+    timestrs = String[]
+    withlocales(korloc) do
+        # system dependent formats
+        push!(timestrs, Libc.strftime(0.0))
+        push!(timestrs, Libc.strftime("%a %A %b %B %p %Z", 0))
     end
-
     # tests
-    @test isvalid(timestr_c)
-    @test isvalid(timestr_aAbBpZ)
+    isempty(timestrs) && @warn "skipping stftime tests: no locale found for testing"
+    for s in timestrs
+        @test isvalid(s)
+    end
 end
 
 
@@ -794,10 +1019,11 @@ end
 
 @testset "exports of modules" begin
     for (_, mod) in Base.loaded_modules
-       for v in names(mod)
-           @test isdefined(mod, v)
-       end
-   end
+        mod === Main && continue # Main exports everything
+        for v in names(mod)
+            @test isdefined(mod, v)
+        end
+    end
 end
 
 @testset "ordering UUIDs" begin
@@ -837,13 +1063,55 @@ end
     GC.gc(true); GC.gc(false)
 
     GC.safepoint()
+
+    mktemp() do tmppath, _
+        open(tmppath, "w") do tmpio
+            redirect_stderr(tmpio) do
+                GC.enable_logging(true)
+                GC.gc()
+                GC.enable_logging(false)
+            end
+        end
+        @test occursin("GC: pause", read(tmppath, String))
+    end
 end
 
 @testset "fieldtypes Module" begin
-    @test fieldtypes(Module) isa Tuple
+    @test fieldtypes(Module) === ()
 end
 
 
 @testset "issue #28188" begin
     @test `$(@__FILE__)` == let file = @__FILE__; `$file` end
 end
+
+# Test that read fault on a prot-none region does not incorrectly give
+# ReadOnlyMemoryEror, but rather crashes the program
+const MAP_ANONYMOUS_PRIVATE = Sys.isbsd() ? 0x1002 : 0x22
+let script = :(
+        let ptr = Ptr{Cint}(ccall(:jl_mmap, Ptr{Cvoid},
+                                  (Ptr{Cvoid}, Csize_t, Cint, Cint, Cint, Int),
+                                  C_NULL, 16*1024, 0, $MAP_ANONYMOUS_PRIVATE, -1, 0))
+            try
+                unsafe_load(ptr)
+            catch e
+                println(e)
+            end
+        end
+    )
+    cmd = if Sys.isunix()
+        # Set the maximum core dump size to 0 to keep this expected crash from
+        # producing a (and potentially overwriting an existing) core dump file
+        `sh -c "ulimit -c 0; $(Base.shell_escape(Base.julia_cmd())) -e '$script'"`
+    else
+        `$(Base.julia_cmd()) -e '$script'`
+    end
+    @test !success(cmd)
+end
+
+# issue #41656
+@test success(`$(Base.julia_cmd()) -e 'isempty(x) = true'`)
+
+@testset "Base/timing.jl" begin
+    @test Base.jit_total_bytes() >= 0
+end
diff --git a/test/missing.jl b/test/missing.jl
index 8e1850e122542f..13ed684f1fc05a 100644
--- a/test/missing.jl
+++ b/test/missing.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
 @testset "MissingException" begin
     @test sprint(showerror, MissingException("test")) == "MissingException: test"
 end
@@ -83,7 +86,7 @@ end
     arithmetic_operators = [+, -, *, /, ^, Base.div, Base.mod, Base.fld, Base.rem]
 
     # All unary operators return missing when evaluating missing
-    for f in [!, ~, +, -]
+    for f in [!, ~, +, -, *, &, |, xor, nand, nor]
         @test ismissing(f(missing))
     end
 
@@ -106,6 +109,19 @@ end
     end
 end
 
+@testset "two-argument functions" begin
+    two_argument_functions = [atan, hypot, log]
+
+    # All two-argument functions return missing when operating on two missing's
+    # All two-argument functions return missing when operating on a scalar and an missing
+    # All two-argument functions return missing when operating on an missing and a scalar
+    for f in two_argument_functions
+        @test ismissing(f(missing, missing))
+        @test ismissing(f(1, missing))
+        @test ismissing(f(missing, 1))
+    end
+end
+
 @testset "bit operators" begin
     bit_operators = [&, |, ⊻]
 
@@ -128,6 +144,22 @@ end
     @test ismissing(xor(true, missing))
     @test ismissing(xor(missing, false))
     @test ismissing(xor(false, missing))
+    @test ismissing(nand(missing, true))
+    @test ismissing(nand(true, missing))
+    @test nand(missing, false) == true
+    @test nand(false, missing) == true
+    @test ismissing(⊼(missing, true))
+    @test ismissing(⊼(true, missing))
+    @test ⊼(missing, false) == true
+    @test ⊼(false, missing) == true
+    @test nor(missing, true) == false
+    @test nor(true, missing) == false
+    @test ismissing(nor(missing, false))
+    @test ismissing(nor(false, missing))
+    @test ⊽(missing, true) == false
+    @test ⊽(true, missing) == false
+    @test ismissing(⊽(missing, false))
+    @test ismissing(⊽(false, missing))
 
     @test ismissing(missing & 1)
     @test ismissing(1 & missing)
@@ -135,11 +167,21 @@ end
     @test ismissing(1 | missing)
     @test ismissing(xor(missing, 1))
     @test ismissing(xor(1, missing))
+    @test ismissing(nand(missing, 1))
+    @test ismissing(nand(1, missing))
+    @test ismissing(⊼(missing, 1))
+    @test ismissing(⊼(1, missing))
+    @test ismissing(nor(missing, 1))
+    @test ismissing(nor(1, missing))
+    @test ismissing(⊽(missing, 1))
+    @test ismissing(⊽(1, missing))
 end
 
-@testset "* string concatenation" begin
+@testset "* string/char concatenation" begin
     @test ismissing("a" * missing)
+    @test ismissing('a' * missing)
     @test ismissing(missing * "a")
+    @test ismissing(missing * 'a')
 end
 
 # Emulate a unitful type such as Dates.Minute
@@ -158,7 +200,7 @@ Base.one(::Type{Unit}) = 1
                             identity, zero, one, oneunit,
                             iseven, isodd, ispow2,
                             isfinite, isinf, isnan, iszero,
-                            isinteger, isreal, transpose, adjoint, float, inv]
+                            isinteger, isreal, transpose, adjoint, float, complex, inv]
 
     # All elementary functions return missing when evaluating missing
     for f in elementary_functions
@@ -171,11 +213,15 @@ Base.one(::Type{Unit}) = 1
         @test zero(Union{T, Missing}) === T(0)
         @test one(Union{T, Missing}) === T(1)
         @test oneunit(Union{T, Missing}) === T(1)
+        @test float(Union{T, Missing}) === Union{float(T), Missing}
+        @test complex(Union{T, Missing}) === Union{complex(T), Missing}
     end
 
     @test_throws MethodError zero(Union{Symbol, Missing})
     @test_throws MethodError one(Union{Symbol, Missing})
     @test_throws MethodError oneunit(Union{Symbol, Missing})
+    @test_throws MethodError float(Union{Symbol, Missing})
+    @test_throws MethodError complex(Union{Symbol, Missing})
 
     for T in (Unit,)
         @test zero(Union{T, Missing}) === T(0)
@@ -186,10 +232,14 @@ Base.one(::Type{Unit}) = 1
     @test zero(Missing) === missing
     @test one(Missing) === missing
     @test oneunit(Missing) === missing
+    @test float(Missing) === Missing
+    @test complex(Missing) === Missing
 
     @test_throws MethodError zero(Any)
     @test_throws MethodError one(Any)
     @test_throws MethodError oneunit(Any)
+    @test_throws MethodError float(Any)
+    @test_throws MethodError complex(Any)
 
     @test_throws MethodError zero(String)
     @test_throws MethodError zero(Union{String, Missing})
@@ -225,10 +275,10 @@ end
     @test sprint(show, [1 missing]) == "$(Union{Int, Missing})[1 missing]"
     b = IOBuffer()
     display(TextDisplay(b), [missing])
-    @test String(take!(b)) == "1-element Vector{$Missing}:\n missing"
+    @test String(take!(b)) == "1-element Vector{$Missing}:\n missing\n"
     b = IOBuffer()
     display(TextDisplay(b), [1 missing])
-    @test String(take!(b)) == "1×2 Matrix{$(Union{Int, Missing})}:\n 1  missing"
+    @test String(take!(b)) == "1×2 Matrix{$(Union{Int, Missing})}:\n 1  missing\n"
 end
 
 @testset "arrays with missing values" begin
@@ -428,10 +478,10 @@ end
             @test_throws BoundsError x[3, 1]
             @test findfirst(==(2), x) === nothing
             @test isempty(findall(==(2), x))
-            @test_throws ArgumentError argmin(x)
-            @test_throws ArgumentError findmin(x)
-            @test_throws ArgumentError argmax(x)
-            @test_throws ArgumentError findmax(x)
+            @test_throws "reducing over an empty collection is not allowed" argmin(x)
+            @test_throws "reducing over an empty collection is not allowed" findmin(x)
+            @test_throws "reducing over an empty collection is not allowed" argmax(x)
+            @test_throws "reducing over an empty collection is not allowed" findmax(x)
         end
     end
 
@@ -488,14 +538,27 @@ end
         for n in 0:3
             itr = skipmissing(Vector{Union{Int,Missing}}(fill(missing, n)))
             @test sum(itr) == reduce(+, itr) == mapreduce(identity, +, itr) === 0
-            @test_throws ArgumentError reduce(x -> x/2, itr)
-            @test_throws ArgumentError mapreduce(x -> x/2, +, itr)
+            @test_throws "reducing over an empty collection is not allowed" reduce(x -> x/2, itr)
+            @test_throws "reducing over an empty collection is not allowed" mapreduce(x -> x/2, +, itr)
         end
 
         # issue #35504
         nt = NamedTuple{(:x, :y),Tuple{Union{Missing, Int},Union{Missing, Float64}}}(
             (missing, missing))
         @test sum(skipmissing(nt)) === 0
+
+        # issues #38627 and #124
+        @testset for len in [1, 2, 15, 16, 1024, 1025]
+            v = repeat(Union{Int,Missing}[1], len)
+            oa = OffsetArray(v, typemax(Int)-length(v))
+            sm = skipmissing(oa)
+            @test sum(sm) == len
+
+            v = repeat(Union{Int,Missing}[missing], len)
+            oa = OffsetArray(v, typemax(Int)-length(v))
+            sm = skipmissing(oa)
+            @test sum(sm) == 0
+        end
     end
 
     @testset "filter" begin
@@ -525,6 +588,16 @@ end
     @test coalesce(missing, nothing) === nothing
 end
 
+@testset "@coalesce" begin
+    @test @coalesce() === missing
+    @test @coalesce(1) === 1
+    @test @coalesce(nothing) === nothing
+    @test @coalesce(missing) === missing
+
+    @test @coalesce(1, error("failed")) === 1
+    @test_throws ErrorException @coalesce(missing, error("failed"))
+end
+
 mutable struct Obj; x; end
 @testset "weak references" begin
     @noinline function mk_wr(r, wr)
@@ -544,3 +617,29 @@ end
     me = try missing(1) catch e e end
     @test sprint(showerror, me) == "MethodError: objects of type Missing are not callable"
 end
+
+@testset "sort and sortperm with $(eltype(X))" for (X, P, RP) in
+    (([2, missing, -2, 5, missing], [3, 1, 4, 2, 5], [2, 5, 4, 1, 3]),
+     ([NaN, missing, 5, -0.0, NaN, missing, Inf, 0.0, -Inf],
+      [9, 4, 8, 3, 7, 1, 5, 2, 6], [2, 6, 1, 5, 7, 3, 8, 4, 9]),
+     ([missing, "a", "c", missing, "b"], [2, 5, 3, 1, 4], [1, 4, 3, 5, 2]))
+    @test sortperm(X) == P
+    @test sortperm(X, alg=QuickSort) == P
+    @test sortperm(X, alg=MergeSort) == P
+
+    XP = X[P]
+    @test isequal(sort(X), XP)
+    @test isequal(sort(X, alg=QuickSort), XP)
+    @test isequal(sort(X, alg=MergeSort), XP)
+
+    @test sortperm(X, rev=true) == RP
+    @test sortperm(X, alg=QuickSort, rev=true) == RP
+    @test sortperm(X, alg=MergeSort, rev=true) == RP
+
+    XRP = X[RP]
+    @test isequal(sort(X, rev=true), XRP)
+    @test isequal(sort(X, alg=QuickSort, rev=true), XRP)
+    @test isequal(sort(X, alg=MergeSort, rev=true), XRP)
+end
+
+sortperm(reverse([NaN, missing, NaN, missing]))
diff --git a/test/mpfr.jl b/test/mpfr.jl
index 86c7d345f49fd3..1a0a0041bf94ec 100644
--- a/test/mpfr.jl
+++ b/test/mpfr.jl
@@ -338,23 +338,6 @@ end
     @test *(a, b, c, d, f) == parse(BigFloat,"5.214588134765625e+04")
     @test *(a, b, c, d, f, g) == parse(BigFloat,"1.6295587921142578125e+03")
 end
-@testset "< / > / <= / >=" begin
-    x = BigFloat(12)
-    y = BigFloat(42)
-    z = BigFloat(30)
-    @test y > x
-    @test y >= x
-    @test y > z
-    @test y >= z
-    @test x < y
-    @test x <= y
-    @test z < y
-    @test z <= y
-    @test y - x >= z
-    @test y - x <= z
-    @test !(x >= z)
-    @test !(y <= z)
-end
 @testset "rounding modes" begin
     setprecision(4) do
         # default mode is round to nearest
@@ -371,7 +354,6 @@ end
         end
     end
 end
-
 @testset "copysign / sign" begin
     x = BigFloat(1)
     y = BigFloat(-1)
@@ -473,10 +455,11 @@ end
     @test isnan(nextfloat(BigFloat(NaN), 1))
     @test isnan(prevfloat(BigFloat(NaN), 1))
 end
+
 # sqrt DomainError
 @test_throws DomainError sqrt(BigFloat(-1))
 
-@testset "precision" begin
+@testset "setprecision" begin
     old_precision = precision(BigFloat)
     x = BigFloat(0)
     @test precision(x) == old_precision
@@ -492,7 +475,8 @@ end
     @test precision(z) == 240
     x = BigFloat(12)
     @test precision(x) == old_precision
-    @test_throws DomainError setprecision(1)
+    @test precision(setprecision(1) do; BigFloat(23); end) == 1  # minimum-precision
+    @test_throws DomainError setprecision(0)
     @test_throws DomainError BigFloat(1, precision = 0)
     @test_throws DomainError BigFloat(big(1.1), precision = 0)
     @test_throws DomainError BigFloat(2.5, precision = -900)
@@ -512,7 +496,6 @@ end
     @test !isinteger(-BigFloat(Inf))
     @test !isinteger(BigFloat(NaN))
 end
-
 @testset "comparisons" begin
     x = BigFloat(1)
     y = BigFloat(-1)
@@ -521,9 +504,11 @@ end
     imi = BigFloat(-Inf)
     @test x > y
     @test x >= y
+    @test !(y >= x)
     @test x >= x
     @test y < x
     @test y <= x
+    @test !(x <= y)
     @test y <= y
     @test x < ipl
     @test x <= ipl
@@ -622,7 +607,8 @@ end
         @test log(x) == log(42)
         @test isinf(log(BigFloat(0)))
         @test_throws DomainError log(BigFloat(-1))
-        @test log2(x) == log2(42)
+        # issue #41450
+        @test_skip log2(x) == log2(42)
         @test isinf(log2(BigFloat(0)))
         @test_throws DomainError log2(BigFloat(-1))
         @test log10(x) == log10(42)
@@ -667,6 +653,10 @@ end
     @test typeof(round(Int64, x)) == Int64 && round(Int64, x) == 42
     @test typeof(round(Int, x)) == Int && round(Int, x) == 42
     @test typeof(round(UInt, x)) == UInt && round(UInt, x) == 0x2a
+
+    # Issue #44662
+    @test_throws InexactError round(Integer, big(Inf))
+    @test_throws InexactError round(Integer, big(NaN))
 end
 @testset "string representation" begin
     str = "1.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000012"
@@ -675,14 +665,17 @@ end
     end
     setprecision(21) do
         @test string(parse(BigFloat, "0.1")) == "0.10000002"
+        @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.9000015"
     end
     setprecision(40) do
         @test string(parse(BigFloat, "0.1")) == "0.10000000000002"
+        @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.8999999999942"
     end
     setprecision(123) do
         @test string(parse(BigFloat, "0.1")) == "0.0999999999999999999999999999999999999953"
+        @test string(parse(BigFloat, "0.5")) == "0.5"
         @test string(parse(BigFloat, "-9.9")) == "-9.8999999999999999999999999999999999997"
     end
 end
@@ -927,6 +920,7 @@ end
     @test i3+1 > f
     @test i3+1 >= f
 end
+
 # issue #8318
 @test convert(Int64,big(500_000_000_000_000.)) == 500_000_000_000_000
 
@@ -935,6 +929,7 @@ end
     @test MPFR.get_emin() == MPFR.get_emin_min()
     @test MPFR.get_emax() == MPFR.get_emax_max()
 end
+
 # issue #10994: handle embedded NUL chars for string parsing
 @test_throws ArgumentError parse(BigFloat, "1\0")
 
@@ -1025,10 +1020,22 @@ end
         @test to_string(big"-1.0") == "-1.0"
     end
 end
-
 @testset "big(::Type)" begin
     for x in (2f0, pi, 7.8, big(ℯ))
         @test big(typeof(x)) == typeof(big(x))
         @test big(typeof(complex(x, x))) == typeof(big(complex(x, x)))
     end
 end
+
+@testset "precision base" begin
+    setprecision(53) do
+        @test precision(Float64, base=10) == precision(BigFloat, base=10) == 15
+    end
+    for (p, b) in ((100,10), (50,100))
+        setprecision(p, base=b) do
+            @test precision(BigFloat, base=10) == 100
+            @test precision(BigFloat, base=100) == 50
+            @test precision(BigFloat) == precision(BigFloat, base=2) == 333
+        end
+    end
+end
diff --git a/test/namedtuple.jl b/test/namedtuple.jl
index fa1d28b27d822d..3b571b3c7d612c 100644
--- a/test/namedtuple.jl
+++ b/test/namedtuple.jl
@@ -21,9 +21,20 @@
 @test (a=3,)[:a] == 3
 @test (x=4, y=5, z=6).y == 5
 @test (x=4, y=5, z=6).z == 6
+@test (x=4, y=5, z=6)[(:x, :y)] == (x=4, y=5)
+@test (x=4, y=5, z=6)[(:x,)] == (x=4,)
+@test (x=4, y=5, z=6)[[:x, :y]] == (x=4, y=5)
+@test (x=4, y=5, z=6)[[:x]] == (x=4,)
+@test (x=4, y=5, z=6)[()] == NamedTuple()
+@test NamedTuple()[()] == NamedTuple()
 @test_throws ErrorException (x=4, y=5, z=6).a
 @test_throws BoundsError (a=2,)[0]
 @test_throws BoundsError (a=2,)[2]
+@test_throws ErrorException (x=4, y=5, z=6)[(:a,)]
+@test_throws ErrorException (x=4, y=5, z=6)[(:x, :a)]
+@test_throws ErrorException (x=4, y=5, z=6)[[:a]]
+@test_throws ErrorException (x=4, y=5, z=6)[[:x, :a]]
+@test_throws ErrorException (x=4, y=5, z=6)[(:x, :x)]
 
 @test length(NamedTuple()) == 0
 @test length((a=1,)) == 1
@@ -79,11 +90,20 @@ end
 @test Tuple((a=1, b=2, c=3)) == (1, 2, 3)
 
 @test isless((a=1,b=2), (a=1,b=3))
-@test_broken isless((a=1,), (a=1,b=2))
+@test_throws MethodError isless((a=1,), (a=1,b=2))
 @test !isless((a=1,b=2), (a=1,b=2))
 @test !isless((a=2,b=1), (a=1,b=2))
 @test_throws MethodError isless((a=1,), (x=2,))
 
+@test (a=1,b=2) < (a=1,b=3)
+@test_throws MethodError (a=1,) < (a=1,b=2)
+@test !((a=1,b=2) < (a=1,b=2))
+@test !((a=2,b=1) < (a=1,b=2))
+@test_throws MethodError (a=1,) < (x=2,)
+@test !((a=-0.0,) < (a=0.0,))
+@test ismissing((a=missing,) < (a=1,))
+@test ismissing((a=missing,) < (a=missing,))
+
 @test map(-, (x=1, y=2)) == (x=-1, y=-2)
 @test map(+, (x=1, y=2), (x=10, y=20)) == (x=11, y=22)
 @test_throws ArgumentError map(+, (x=1, y=2), (y=10, x=20))
@@ -167,7 +187,7 @@ namedtuple_get_a(x) = x.a
 @test Base.return_types(namedtuple_get_a, (typeof((b=1,a="")),)) == Any[String]
 
 namedtuple_fieldtype_a(x) = fieldtype(typeof(x), :a)
-@test Base.return_types(namedtuple_fieldtype_a, (NamedTuple,)) == Any[Type]
+@test Base.return_types(namedtuple_fieldtype_a, (NamedTuple,)) == Any[Union{Type, TypeVar}]
 @test Base.return_types(namedtuple_fieldtype_a, (typeof((b=1,a="")),)) == Any[Type{String}]
 namedtuple_fieldtype__(x, y) = fieldtype(typeof(x), y)
 @test Base.return_types(namedtuple_fieldtype__, (typeof((b=1,a="")),Symbol))[1] >: Union{Type{Int}, Type{String}}
@@ -306,3 +326,13 @@ let x = 1, y = 2
     @test Meta.lower(Main, Meta.parse("(; a.y, y)")) == Expr(:error, "field name \"y\" repeated in named tuple")
     @test (; a.y, x) === (y=2, x=1)
 end
+
+# issue #37926
+@test nextind((a=1,), 1) == nextind((1,), 1) == 2
+@test prevind((a=1,), 2) == prevind((1,), 2) == 1
+
+# issue #43045
+@test merge(NamedTuple(), Iterators.reverse(pairs((a=1,b=2)))) === (b = 2, a = 1)
+
+# issue #44086
+@test NamedTuple{(:x, :y, :z), Tuple{Int8, Int16, Int32}}((z=1, x=2, y=3)) === (x = Int8(2), y = Int16(3), z = Int32(1))
diff --git a/test/numbers.jl b/test/numbers.jl
index 15ca861b1f86d4..ad521d7382713b 100644
--- a/test/numbers.jl
+++ b/test/numbers.jl
@@ -38,6 +38,24 @@ const ≣ = isequal # convenient for comparing NaNs
     @test xor(true,  false) == true
     @test xor(false, true)  == true
     @test xor(true,  true)  == false
+
+    @test false ⊼ false == true
+    @test true ⊼ false == true
+    @test false ⊼ true == true
+    @test true ⊼ true == false
+    @test nand(false, false) == true
+    @test nand(true, false) == true
+    @test nand(false, true) == true
+    @test nand(true, true) == false
+
+    @test false ⊽ false == true
+    @test true ⊽ false == false
+    @test false ⊽ true == false
+    @test true ⊽ true == false
+    @test nor(false, false) == true
+    @test nor(true, false) == false
+    @test nor(false, true) == false
+    @test nor(true, true) == false
 end
 @testset "bool operator" begin
     @test Bool(false) == false
@@ -426,6 +444,15 @@ end
     @test sprint(show, -498796.2749933266, context=:compact => true) == "-4.98796e5"
     @test sprint(show, 123456.78, context=:compact=>true) == "1.23457e5"
 
+    # issue 37941
+    @test sprint(show, MIME("text/plain"), Float16(0.0)) == "Float16(0.0)"
+    @test sprint(show, MIME("text/plain"), -Float16(0.0)) == "Float16(-0.0)"
+    @test sprint(show, MIME("text/plain"), Float16(5.0)) == "Float16(5.0)"
+    @test sprint(show, MIME("text/plain"), -Float16(5.0)) == "Float16(-5.0)"
+    @test sprint(show, MIME("text/plain"), Float16(Inf)) == "Inf16"
+    @test sprint(show, MIME("text/plain"), -Float16(Inf)) == "-Inf16"
+    @test sprint(show, MIME("text/plain"), Float16(NaN)) == "NaN16"
+
     @test repr(1.0f0) == "1.0f0"
     @test repr(-1.0f0) == "-1.0f0"
     @test repr(0.0f0) == "0.0f0"
@@ -463,6 +490,8 @@ end
     @test isa(sign(2//3), Rational{Int})
     @test isa(2//3 + 2//3im, Complex{Rational{Int}})
     @test isa(sign(2//3 + 2//3im), ComplexF64)
+    @test sign(pi) === 1.0
+    @test sign(pi) === -sign(-pi)
     @test sign(one(UInt)) == 1
     @test sign(zero(UInt)) == 0
 
@@ -1682,8 +1711,7 @@ end
     @test isa(0b0000000000000000000000000000000000000000000000000000000000000000,UInt64)
     @test isa(0b00000000000000000000000000000000000000000000000000000000000000000,UInt128)
     @test isa(0b00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,UInt128)
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000")
+    @test isa(0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000,BigInt)
     @test isa(0b11111111,UInt8)
     @test isa(0b111111111,UInt16)
     @test isa(0b1111111111111111,UInt16)
@@ -1693,8 +1721,7 @@ end
     @test isa(0b1111111111111111111111111111111111111111111111111111111111111111,UInt64)
     @test isa(0b11111111111111111111111111111111111111111111111111111111111111111,UInt128)
     @test isa(0b11111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111,UInt128)
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("0b111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111")
+    @test isa(0b111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111111,BigInt)
 end
 @testset "octal literals" begin
     @test 0o10 == 0x8
@@ -1710,8 +1737,7 @@ end
     @test isa(0o000000000000000000000,UInt64)
     @test isa(0o0000000000000000000000,UInt64)
     @test isa(0o000000000000000000000000000000000000000000,UInt128)
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("0o00000000000000000000000000000000000000000000")
+    @test isa(0o00000000000000000000000000000000000000000000,BigInt)
     @test isa(0o11,UInt8)
     @test isa(0o111,UInt8)
     @test isa(0o11111,UInt16)
@@ -1723,9 +1749,8 @@ end
     @test isa(0o111111111111111111111111111111111111111111,UInt128)
     @test isa(0o1111111111111111111111111111111111111111111,UInt128)
     @test isa(0o3777777777777777777777777777777777777777777,UInt128)
-    @test_throws Meta.ParseError Meta.parse("0o4000000000000000000000000000000000000000000")
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("0o11111111111111111111111111111111111111111111")
+    @test isa(0o11111111111111111111111111111111111111111111,BigInt)
+    @test 0o4000000000000000000000000000000000000000000 == 340282366920938463463374607431768211456
     @test isa(0o077, UInt8)
     @test isa(0o377, UInt8)
     @test isa(0o400, UInt16)
@@ -1741,7 +1766,6 @@ end
     @test isa(0o0000000000000000000000000000000000000000000, UInt128)
     @test isa(0o1000000000000000000000000000000000000000000, UInt128)
     @test isa(0o2000000000000000000000000000000000000000000, UInt128)
-    @test_throws Meta.ParseError Meta.parse("0o4000000000000000000000000000000000000000000")
 
     @test String([0o110, 0o145, 0o154, 0o154, 0o157, 0o054, 0o040, 0o127, 0o157, 0o162, 0o154, 0o144, 0o041]) == "Hello, World!"
 
@@ -1756,8 +1780,7 @@ end
     @test isa(0x0000000000000000,UInt64)
     @test isa(0x00000000000000000,UInt128)
     @test isa(0x00000000000000000000000000000000,UInt128)
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("0x000000000000000000000000000000000")
+    @test isa(0x000000000000000000000000000000000,BigInt)
 
     @test isa(0x11,UInt8)
     @test isa(0x111,UInt16)
@@ -1768,8 +1791,7 @@ end
     @test isa(0x1111111111111111,UInt64)
     @test isa(0x11111111111111111,UInt128)
     @test isa(0x11111111111111111111111111111111,UInt128)
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("0x111111111111111111111111111111111")
+    @test isa(0x111111111111111111111111111111111,BigInt)
 end
 @testset "minus sign and unsigned literals" begin
     # "-" is not part of unsigned literals
@@ -1783,13 +1805,17 @@ end
     @test -0o0000000000000000000001 == -(0o0000000000000000000001)
     @test -0b00000000000000000000000000000000000000000000000000000000000000001 ==
         -(0b00000000000000000000000000000000000000000000000000000000000000001)
+    @test -0x000000000000000000000000000000001 == -(0x000000000000000000000000000000001)
+    @test -0o0000000000000000000000000000000000000000001 ==
+        -(0o0000000000000000000000000000000000000000001)
+    @test -0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001 ==
+        -(0b000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001)
 
     @test isa(-0x00,UInt8)
     @test isa(-0x0000000000000000,UInt64)
     @test isa(-0x00000000000000000,UInt128)
     @test isa(-0x00000000000000000000000000000000,UInt128)
-    # remove BigInt unsigned integer literals #11105
-    @test_throws Meta.ParseError Meta.parse("-0x000000000000000000000000000000000")
+    @test isa(-0x000000000000000000000000000000000,BigInt)
 end
 @testset "Float32 literals" begin
     @test isa(1f0,Float32)
@@ -1983,8 +2009,11 @@ end
     end
     @test nextpow(2, 56789) == 65536
     @test_throws DomainError nextpow(2, -56789)
+    @test_throws DomainError nextpow(Int8(4), 128)
     @test prevpow(2, 56789) == 32768
     @test_throws DomainError prevpow(2, -56789)
+    @test_throws DomainError prevpow(Int8(4), 128)
+    @test_throws OverflowError nextpow(Int8(4), 65)
     for i = 1:100
         @test nextpow(2, i) == nextpow(2, big(i))
         @test prevpow(2, i) == prevpow(2, big(i))
@@ -1993,6 +2022,14 @@ end
         @test nextpow(2, T(42)) === T(64)
         @test prevpow(2, T(42)) === T(32)
     end
+    for T in (Float16, Float32, Float64)
+        @test prevpow(2, prevfloat(T(1024.0))) == T(512.0)
+        @test nextpow(2, nextfloat(T(1024.0))) == T(2048.0)
+        @test prevpow(T(2.0), prevfloat(T(1024.0))) == T(512.0)
+        @test nextpow(T(2.0), nextfloat(T(1024.0))) == T(2048.0)
+        @test prevpow(T(2.0), prevfloat(T(Inf))) < T(Inf)
+        @test nextpow(T(2.0), prevfloat(T(Inf))) == T(Inf)
+    end
 end
 @testset "ispow2" begin
     @test  ispow2(64)
@@ -2283,6 +2320,23 @@ end
         @test_throws BoundsError getindex(x, 1, 0)
     end
 end
+@testset "get(x::Number, ...)" begin
+    for x in [1.23, 7, ℯ, 4//5] #[FP, Int, Irrational, Rat]
+        @test get(x, 1, 99) == x
+        @test get(x, (), 99) == x
+        @test get(x, (1,), 99) == x
+        @test get(x, 2, 99) == 99
+        @test get(x, 0, pi) == pi
+        @test get(x, (1,2), pi) == pi
+        c = Ref(0)
+        @test get(() -> c[]+=1, x, 1) == x
+        @test get(() -> c[]+=1, x, ()) == x
+        @test get(() -> c[]+=1, x, (1,1,1)) == x
+        @test get(() -> c[]+=1, x, 2) == 1
+        @test get(() -> c[]+=1, x, -1) == 2
+        @test get(() -> c[]+=1, x, (3,2,1)) == 3
+    end
+end
 @testset "copysign and flipsign" begin
     # copysign(x::Real, y::Real) = ifelse(signbit(x)!=signbit(y), -x, x)
     # flipsign(x::Real, y::Real) = ifelse(signbit(y), -x, x)
@@ -2373,6 +2427,12 @@ zero(::Type{TestNumber{Inner}}) where {Inner} = TestNumber(zero(Inner))
 big(test_number::TestNumber) = TestNumber(big(test_number.inner))
 @test big(TestNumber{Int}) == TestNumber{BigInt}
 
+# abstract abs2
+Base.:*(x::TestNumber, y::TestNumber) = TestNumber(x.inner*y.inner)
+Base.:(==)(x::TestNumber, y::TestNumber) = x.inner == y.inner
+Base.abs(x::TestNumber) = TestNumber(abs(x.inner))
+@test abs2(TestNumber(3+4im)) == TestNumber(25)
+
 @testset "multiplicative inverses" begin
     function testmi(numrange, denrange)
         for d in denrange
@@ -2470,18 +2530,34 @@ end
     @test rem(T(1), T(2), RoundNearest) == 1
     @test rem(T(1), T(2), RoundDown)    == 1
     @test rem(T(1), T(2), RoundUp)      == -1
+    @test rem(T(1), T(2), RoundFromZero) == -1
     @test rem(T(1.5), T(2), RoundToZero)  == 1.5
     @test rem(T(1.5), T(2), RoundNearest) == -0.5
     @test rem(T(1.5), T(2), RoundDown)    == 1.5
     @test rem(T(1.5), T(2), RoundUp)      == -0.5
+    @test rem(T(1.5), T(2), RoundFromZero) == -0.5
     @test rem(T(-1), T(2), RoundToZero)  == -1
     @test rem(T(-1), T(2), RoundNearest) == -1
     @test rem(T(-1), T(2), RoundDown)    == 1
     @test rem(T(-1), T(2), RoundUp)      == -1
+    @test rem(T(-1), T(2), RoundFromZero) == 1
     @test rem(T(-1.5), T(2), RoundToZero)  == -1.5
     @test rem(T(-1.5), T(2), RoundNearest) == 0.5
     @test rem(T(-1.5), T(2), RoundDown)    == 0.5
     @test rem(T(-1.5), T(2), RoundUp)      == -1.5
+    @test rem(T(-1.5), T(2), RoundFromZero) == 0.5
+    for mode in [RoundToZero, RoundNearest, RoundDown, RoundUp, RoundFromZero]
+        @test isnan(rem(T(1), T(0), mode))
+        @test isnan(rem(T(Inf), T(2), mode))
+        @test isnan(rem(T(1), T(NaN), mode))
+        # FIXME: The broken case erroneously returns -Inf
+        @test rem(T(4), floatmin(T) * 2, mode) == 0 broken=(T == BigFloat && mode in (RoundUp,RoundFromZero))
+    end
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundToZero),  -0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundNearest), -0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundDown),     0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundUp),       0.0)
+    @test isequal(rem(nextfloat(typemin(T)), T(2), RoundFromZero), 0.0)
 end
 
 @testset "rem for $T RoundNearest" for T in (Int8, Int16, Int32, Int64, Int128)
@@ -2497,6 +2573,41 @@ end
     end
 end
 
+@testset "divrem rounded" begin
+    #rounded Floats
+    for T in (Float16, Float32, Float64, BigFloat)
+        @test divrem(T(1.5), T(2), RoundToZero)[2]  == 1.5
+        @test divrem(T(1.5), T(2), RoundNearest)[2] == -0.5
+        @test divrem(T(1.5), T(2), RoundDown)[2]    == 1.5
+        @test divrem(T(1.5), T(2), RoundUp)[2]      == -0.5
+        @test divrem(T(-1.5), T(2), RoundToZero)[2]  == -1.5
+        @test divrem(T(-1.5), T(2), RoundNearest)[2] == 0.5
+        @test divrem(T(-1.5), T(2), RoundDown)[2]    == 0.5
+        @test divrem(T(-1.5), T(2), RoundUp)[2]      == -1.5
+    end
+    #rounded Integers
+    for (a, b) in (
+            (3, 2),
+            (5, 3),
+            (-3, 2),
+            (5, 2),
+            (-5, 2),
+            (-5, 3),
+            (5, -3))
+        for sign in (+1, -1)
+            (a, b) = (a*sign, b*sign)
+            @test divrem(a, b, RoundNearest) == (div(a, b, RoundNearest),rem(a, b, RoundNearest))
+        end
+    end
+
+    a = 122322388883338838388383888823233122323
+    b = 343443
+    c = 122322388883338838388383888823233122333
+    @test divrem(a, b) == (div(a,b), rem(a,b))
+    @test divrem(a, c) == (div(a,c), rem(a,c))
+    @test divrem(a,-(a-20), RoundDown) == (div(a,-(a-20), RoundDown), rem(a,-(a-20), RoundDown))
+end
+
 @testset "rem2pi $T" for T in (Float16, Float32, Float64, BigFloat)
     @test rem2pi(T(1), RoundToZero)  == 1
     @test rem2pi(T(1), RoundNearest) == 1
@@ -2600,6 +2711,10 @@ end
     @test !isone(triu(fill(1, 5, 5)))
     @test !isone(zeros(Int, 5, 5))
     @test isone(Matrix(1I, 5, 5))
+    @test !isone(view(rand(5,5), [1,3,4], :))
+    Dv = view(Diagonal([1,1, 1]), [1,2], 1:2)
+    @test isone(Dv)
+    @test (@allocated isone(Dv)) == 0
     @test isone(Matrix(1I, 1000, 1000)) # sizeof(X) > 2M == ISONE_CUTOFF
 end
 
@@ -2710,3 +2825,55 @@ end
     @test invoke(isfinite, Tuple{Number}, NaN) == false
     @test invoke(isfinite, Tuple{Number}, Inf) == false
 end
+
+struct MyRealFld <: Real
+    x::Real
+end
+@testset "fallback error throwing for fld/cld" begin
+    a = MyRealFld(2.0)
+    b = MyRealFld(3.0)
+    @test_throws MethodError fld(a, b)
+    @test_throws MethodError cld(a, b)
+end
+
+@testset "Bool rounding (#25074)" begin
+    @testset "round Bool" begin
+        @test_throws InexactError round(Bool, -4.1)
+        @test_throws InexactError round(Bool, 1.5)
+        @test true == round(Bool, 1.0)
+        @test false == round(Bool, 0.0)
+        @test true == round(Bool, 0.6)
+        @test false == round(Bool, 0.4)
+        @test false == round(Bool, 0.5)
+        @test false == round(Bool, -0.5)
+    end
+
+    @testset "trunc Bool" begin
+        @test_throws InexactError trunc(Bool, -4.1)
+        @test_throws InexactError trunc(Bool, 2.5)
+        @test true == trunc(Bool, 1.0)
+        @test false == trunc(Bool, 0.0)
+        @test false == trunc(Bool, 0.6)
+        @test false == trunc(Bool, 0.4)
+        @test true == trunc(Bool, 1.8)
+        @test false == trunc(Bool, -0.5)
+    end
+
+    @testset "floor Bool" begin
+        @test_throws InexactError floor(Bool, -0.1)
+        @test_throws InexactError floor(Bool, 2.5)
+        @test true == floor(Bool, 1.0)
+        @test false == floor(Bool, 0.0)
+        @test false == floor(Bool, 0.6)
+        @test true == floor(Bool, 1.8)
+    end
+
+    @testset "ceil Bool" begin
+        @test_throws InexactError ceil(Bool, -1.4)
+        @test_throws InexactError ceil(Bool, 1.5)
+        @test true == ceil(Bool, 1.0)
+        @test false == ceil(Bool, 0.0)
+        @test true == ceil(Bool, 0.6)
+        @test false == ceil(Bool, -0.7)
+    end
+end
diff --git a/test/offsetarray.jl b/test/offsetarray.jl
index cd5c5bc848ace2..515e0491ee994e 100644
--- a/test/offsetarray.jl
+++ b/test/offsetarray.jl
@@ -2,12 +2,12 @@
 
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
+import .Main.OffsetArrays: IdOffsetRange
 using DelimitedFiles
 using Random
 using LinearAlgebra
 using Statistics
-
-const OAs_name = join(fullname(OffsetArrays), ".")
+using Base: IdentityUnitRange
 
 if !isdefined(@__MODULE__, :T24Linear)
     include("testhelpers/arrayindexingtypes.jl")
@@ -18,7 +18,7 @@ let
 v0 = rand(4)
 v = OffsetArray(v0, (-3,))
 h = OffsetArray([-1,1,-2,2,0], (-3,))
-@test axes(v) == (-2:1,)
+@test axes(v) === (OffsetArrays.IdOffsetRange(Base.OneTo(4), -3),)
 @test size(v) == (4,)
 @test size(v, 1) == 4
 @test_throws DimensionMismatch Array(v)
@@ -26,7 +26,7 @@ h = OffsetArray([-1,1,-2,2,0], (-3,))
 A0 = [1 3; 2 4]
 A = OffsetArray(A0, (-1,2))                   # IndexLinear
 S = OffsetArray(view(A0, 1:2, 1:2), (-1,2))   # IndexCartesian
-@test axes(A) == axes(S) == (0:1, 3:4)
+@test axes(A) === axes(S) === (OffsetArrays.IdOffsetRange(Base.OneTo(2), -1), OffsetArrays.IdOffsetRange(Base.OneTo(2), 2))
 @test size(A) == (2,2)
 @test size(A, 1) == 2
 
@@ -111,6 +111,15 @@ let a1 = [11,12,13], a2 = [1 2; 3 4]
     @test_throws BoundsError i2[1:2:5]
 end
 
+# issue #37274
+let a = 1:3
+    oa = OffsetArray(a, 0:2)
+    b = @view oa[0]
+    @test b[] == b[1] == b[1,1] == 1
+    @test_throws BoundsError b[0]
+    @test_throws BoundsError b[2]
+end
+
 # logical indexing
 @test A[A .> 2] == [3,4]
 @test_throws BoundsError h[trues(2)]
@@ -133,13 +142,13 @@ S = view(A, :, 3)
 @test S[0] == 1
 @test S[1] == 2
 @test_throws BoundsError S[2]
-@test axes(S) === (Base.IdentityUnitRange(0:1),)
+@test axes(S) === (Base.IdentityUnitRange(OffsetArrays.IdOffsetRange(Base.OneTo(2), -1)),)
 S = view(A, 0, :)
 @test S == OffsetArray([1,3], (A.offsets[2],))
 @test S[3] == 1
 @test S[4] == 3
 @test_throws BoundsError S[1]
-@test axes(S) === (Base.IdentityUnitRange(3:4),)
+@test axes(S) === (Base.IdentityUnitRange(OffsetArrays.IdOffsetRange(Base.OneTo(2), 2)),)
 S = view(A, 0:0, 4)
 @test S == [3]
 @test S[1] == 3
@@ -158,17 +167,17 @@ S = view(A, :, :)
 @test S[0,4] == S[3] == 3
 @test S[1,4] == S[4] == 4
 @test_throws BoundsError S[1,1]
-@test axes(S) === Base.IdentityUnitRange.((0:1, 3:4))
+@test axes(S) === Base.IdentityUnitRange.((OffsetArrays.IdOffsetRange(Base.OneTo(2), -1), OffsetArrays.IdOffsetRange(Base.OneTo(2), 2)))
 # https://github.com/JuliaArrays/OffsetArrays.jl/issues/27
 g = OffsetArray(Vector(-2:3), (-3,))
 gv = view(g, -1:2)
 @test axes(gv, 1) === Base.OneTo(4)
 @test collect(gv) == -1:2
 gv = view(g, OffsetArray(-1:2, (-2,)))
-@test axes(gv, 1) === Base.IdentityUnitRange(-1:2)
+@test axes(gv, 1) === OffsetArrays.IdOffsetRange(Base.OneTo(4), -2)
 @test collect(gv) == -1:2
 gv = view(g, OffsetArray(-1:2, (-1,)))
-@test axes(gv, 1) === Base.IdentityUnitRange(0:3)
+@test axes(gv, 1) === OffsetArrays.IdOffsetRange(Base.OneTo(4), -1)
 @test collect(gv) == -1:2
 
 # iteration
@@ -199,7 +208,7 @@ str = String(take!(io))
 show(io, parent(v))
 @test str == String(take!(io))
 smry = summary(v)
-@test occursin("OffsetArray{Float64, 1", smry)
+@test occursin("OffsetArray(::Vector{Float64", smry)
 @test occursin("with indices -1:1", smry)
 function cmp_showf(printfunc, io, A; options = ())
     ioc = IOContext(io, :limit => true, :compact => true, options...)
@@ -216,18 +225,18 @@ cmp_showf(Base.print_matrix, io, OffsetArray(rand(10^3,10^3), (10,-9))) # neithe
 cmp_showf(Base.print_matrix, io, OffsetArray(reshape(range(-0.212121212121, stop=2/11, length=3*29), 3, 29), (-2, -15)); options=(:displaysize=>(53,210),))
 cmp_showf(show, io, OffsetArray(collect(1:100), (100,)))   # issue #31641
 
-targets1 = ["0-dimensional $OAs_name.OffsetArray{Float64, 0, Array{Float64, 0}}:\n1.0",
-            "1-element $OAs_name.OffsetArray{Float64, 1, Vector{Float64}} with indices 2:2:\n 1.0",
-            "1×1 $OAs_name.OffsetArray{Float64, 2, Matrix{Float64}} with indices 2:2×3:3:\n 1.0",
-            "1×1×1 $OAs_name.OffsetArray{Float64, 3, Array{Float64, 3}} with indices 2:2×3:3×4:4:\n[:, :, 4] =\n 1.0",
-            "1×1×1×1 $OAs_name.OffsetArray{Float64, 4, Array{Float64, 4}} with indices 2:2×3:3×4:4×5:5:\n[:, :, 4, 5] =\n 1.0"]
+targets1 = ["0-dimensional OffsetArray(::Array{Float64, 0}) with eltype Float64:\n1.0",
+            "1-element OffsetArray(::Vector{Float64}, 2:2) with eltype Float64 with indices 2:2:\n 1.0",
+            "1×1 OffsetArray(::Matrix{Float64}, 2:2, 3:3) with eltype Float64 with indices 2:2×3:3:\n 1.0",
+            "1×1×1 OffsetArray(::Array{Float64, 3}, 2:2, 3:3, 4:4) with eltype Float64 with indices 2:2×3:3×4:4:\n[:, :, 4] =\n 1.0",
+            "1×1×1×1 OffsetArray(::Array{Float64, 4}, 2:2, 3:3, 4:4, 5:5) with eltype Float64 with indices 2:2×3:3×4:4×5:5:\n[:, :, 4, 5] =\n 1.0"]
 targets2 = ["(fill(1.0), fill(1.0))",
             "([1.0], [1.0])",
-            "([1.0], [1.0])",
-            "([1.0], [1.0])",
-            "([1.0], [1.0])"]
+            "([1.0;;], [1.0;;])",
+            "([1.0;;;], [1.0;;;])",
+            "([1.0;;;;], [1.0;;;;])"]
 @testset "printing of OffsetArray with n=$n" for n = 0:4
-    a = OffsetArray(fill(1.,ntuple(d->1,n)), ntuple(identity,n))
+    a = OffsetArray(fill(1.,ntuple(Returns(1),n)), ntuple(identity,n))
     show(IOContext(io, :limit => true), MIME("text/plain"), a)
     @test String(take!(io)) == targets1[n+1]
     show(IOContext(io, :limit => true), MIME("text/plain"), (a,a))
@@ -235,7 +244,7 @@ targets2 = ["(fill(1.0), fill(1.0))",
 end
 P = OffsetArray(rand(8,8), (1,1))
 PV = view(P, 2:3, :)
-@test endswith(summary(PV), "with indices Base.OneTo(2)×2:9")
+@test endswith(summary(PV), "with indices Base.OneTo(2)×OffsetArrays.IdOffsetRange(2:9)")
 
 # Similar
 B = similar(A, Float32)
@@ -247,26 +256,26 @@ B = similar(A, (3,4))
 @test axes(B) === (Base.OneTo(3), Base.OneTo(4))
 B = similar(A, (-3:3,1:4))
 @test isa(B, OffsetArray{Int,2})
-@test axes(B) === Base.IdentityUnitRange.((-3:3, 1:4))
+@test axes(B) === (OffsetArrays.IdOffsetRange(Base.OneTo(7), -4), OffsetArrays.IdOffsetRange(Base.OneTo(4)))
 B = similar(parent(A), (-3:3,1:4))
 @test isa(B, OffsetArray{Int,2})
-@test axes(B) === Base.IdentityUnitRange.((-3:3, 1:4))
+@test axes(B) === (OffsetArrays.IdOffsetRange(Base.OneTo(7), -4), OffsetArrays.IdOffsetRange(Base.OneTo(4)))
 
 # Indexing with OffsetArray indices
 i1 = OffsetArray([2,1], (-5,))
 i1 = OffsetArray([2,1], -5)
 b = A0[i1, 1]
-@test axes(b) === (Base.IdentityUnitRange(-4:-3),)
+@test axes(b) === (OffsetArrays.IdOffsetRange(Base.OneTo(2), -5),)
 @test b[-4] == 2
 @test b[-3] == 1
 b = A0[1,i1]
-@test axes(b) === (Base.IdentityUnitRange(-4:-3),)
+@test axes(b) === (OffsetArrays.IdOffsetRange(Base.OneTo(2), -5),)
 @test b[-4] == 3
 @test b[-3] == 1
 v = view(A0, i1, 1)
-@test axes(v) === (Base.IdentityUnitRange(-4:-3),)
+@test axes(v) === (OffsetArrays.IdOffsetRange(Base.OneTo(2), -5),)
 v = view(A0, 1:1, i1)
-@test axes(v) === (Base.OneTo(1), Base.IdentityUnitRange(-4:-3))
+@test axes(v) === (Base.OneTo(1), OffsetArrays.IdOffsetRange(Base.OneTo(2), -5))
 
 # copyto! and fill!
 a = OffsetArray{Int}(undef, (-3:-1,))
@@ -395,7 +404,7 @@ v2 = copy(v)
 v = OffsetArray(v0, (-3,))
 @test lastindex(v) == 1
 @test v ≈ v
-@test axes(v') === (Base.OneTo(1),Base.IdentityUnitRange(-2:1))
+@test axes(v') === (Base.OneTo(1), OffsetArrays.IdOffsetRange(Base.OneTo(4), -3))
 @test parent(v) == collect(v)
 rv = reverse(v)
 @test axes(rv) == axes(v)
@@ -411,7 +420,7 @@ A = OffsetArray(rand(4,4), (-3,5))
 @test lastindex(A, 1) == 1
 @test lastindex(A, 2) == 9
 @test A ≈ A
-@test axes(A') === Base.IdentityUnitRange.((6:9, -2:1))
+@test axes(A') === (OffsetArrays.IdOffsetRange(Base.OneTo(4), 5), OffsetArrays.IdOffsetRange(Base.OneTo(4), -3))
 @test parent(copy(A')) == copy(parent(A)')
 @test collect(A) == parent(A)
 @test maximum(A) == maximum(parent(A))
@@ -583,9 +592,21 @@ module SimilarUR
     end
     ur = MyURange(1,3)
     a = Vector{Int}(undef, 2)
-    @test_throws MethodError similar(a, ur)
-    @test_throws MethodError similar(a, Float64, ur)
-    @test_throws MethodError similar(a, Float64, (ur,))
+
+    function catch_exception(f, args...)
+        try
+            f(args...)
+        catch err
+            return err
+        end
+    end
+    # type-piracy https://github.com/JuliaArrays/OffsetArrays.jl/issues/87
+    @test_broken (catch_exception(similar, a, ur) isa MethodError)
+    @test_broken (catch_exception(similar, a, Float64, ur) isa MethodError)
+    @test_broken (catch_exception(similar, a, Float64, (ur,)) isa MethodError)
+    # @test_throws MethodError similar(a, ur)
+    # @test_throws MethodError similar(a, Float64, ur)
+    # @test_throws MethodError similar(a, Float64, (ur,))
     @test_throws MethodError similar(a, (2.0,3.0))
 end
 
@@ -630,3 +651,183 @@ end
     @test last(v, 100) !== v
     @test last(v, 1) == [v[end]]
 end
+
+@testset "Resizing OffsetVectors" begin
+    local a = OffsetVector(rand(5),-3)
+    axes(a,1) == -2:2
+    length(a) == 5
+    resize!(a,3)
+    length(a) == 3
+    axes(a,1) == -2:0
+    @test_throws ArgumentError resize!(a,-3)
+end
+
+@testset "issue #37199: offset range indices" begin
+    # https://github.com/JuliaArrays/OffsetArrays.jl/issues/133
+    A0 = [1 3; 2 4]
+    A = OffsetArray(A0, (-1,2))
+
+    r = OffsetArrays.IdOffsetRange(1:2, -1)
+    v1 = view(A, r, 3)
+    @test v1[0] == 1
+    @test v1[1] == 2
+    @test axes(v1, 1) == axes(r, 1)
+    v2 = view(A, UnitRange(r), 3)
+    for (indflat, indoffset) in enumerate(r)
+        @test v1[indoffset] == v2[indflat]
+    end
+
+    r = OffsetArrays.IdOffsetRange(1:2, 2)
+    v1 = view(A, 1, r)
+    @test v1[3] == 2
+    @test v1[4] == 4
+    @test axes(v1, 1) == axes(r, 1)
+    v2 = view(A, 1, UnitRange(r))
+    for (indflat, indoffset) in enumerate(r)
+        @test v1[indoffset] == v2[indflat]
+    end
+
+    a12 = zeros(3:8, 3:4)
+    r = OffsetArrays.IdOffsetRange(Base.OneTo(3), 5)
+    a12[r, 4] .= 3
+    @test all(a12[r, 4] .== 3)
+    @test all(a12[UnitRange(r), 4] .== 3)
+
+    # https://github.com/JuliaArrays/OffsetArrays.jl/issues/100
+    S = view(A, axes(A)...)
+    @test S == A
+    @test S[0,3] == S[1] == 1
+    @test S[1,3] == S[2] == 2
+    @test S[0,4] == S[3] == 3
+    @test S[1,4] == S[4] == 4
+    @test_throws BoundsError S[1,1]
+    @test axes(S) == OffsetArrays.IdOffsetRange.((0:1, 3:4))
+    S = view(A, axes(A, 1), 3)
+    @test S == A[:, 3]
+    @test S[0] == 1
+    @test S[1] == 2
+    @test_throws BoundsError S[length(S)]
+    @test axes(S) == (OffsetArrays.IdOffsetRange(0:1), )
+    S = view(A, 1, axes(A, 2))
+    @test S == A[1, :]
+    @test S[3] == 2
+    @test S[4] == 4
+    @test_throws BoundsError S[1]
+    @test axes(S) == (OffsetArrays.IdOffsetRange(3:4), )
+
+    A0 = collect(reshape(1:24, 2, 3, 4))
+    A = OffsetArray(A0, (-1,2,1))
+    S = view(A, axes(A, 1), 3:4, axes(A, 3))
+    @test S == A[:, 3:4, :]
+    @test S[0, 1, 2] == A[0, 3, 2]
+    @test S[0, 2, 2] == A[0, 4, 2]
+    @test S[1, 1, 2] == A[1, 3, 2]
+    @test axes(S) == (OffsetArrays.IdOffsetRange(0:1), Base.OneTo(2), OffsetArrays.IdOffsetRange(2:5))
+end
+
+@testset "Zero-index indexing" begin
+    @test OffsetArray([6], 2:2)[] == 6
+    @test OffsetArray(fill(6, 1, 1), 2:2, 3:3)[] == 6
+    @test OffsetArray(fill(6))[] == 6
+    @test_throws BoundsError OffsetArray([6,7], 2:3)[]
+    @test_throws BoundsError OffsetArray([6 7], 2:2, 2:3)[]
+    @test_throws BoundsError OffsetArray([], 2:1)[]
+end
+
+@testset "IdentityUnitRange indexing" begin
+    a = OffsetVector(3:4, 2:3)
+    ax = IdentityUnitRange(2:3)
+    @test a[ax[2]] == a[ax][2]
+
+    s = -2:2:4
+    r = 5:8
+    y = OffsetArray(s, r)
+    @test axes(y) == (r,)
+    @test step(y) == step(s)
+
+    a = OffsetVector(3:4, 10:11)
+    ax = OffsetArrays.IdOffsetRange(5:6, 5)
+    @test axes(a[ax]) == axes(ax)
+    for i in axes(ax,1)
+        @test a[ax[i]] == a[ax][i]
+    end
+
+    ax = IdentityUnitRange(10:11)
+    @test axes(a[ax]) == axes(ax)
+    for i in axes(ax,1)
+        @test a[ax[i]] == a[ax][i]
+    end
+end
+
+@testset "show OffsetMatrix" begin
+    Y = reshape(1:25, 5, 5)
+    X = OffsetArray(Y, -2:2, -4:0)
+
+    io = IOBuffer()
+    show(io, X)
+    strX = String(take!(io))
+    show(io, Y)
+    strY = String(take!(io))
+    @test strX == strY
+
+    io_limit = IOContext(io, :limit => true)
+    show(io_limit, X)
+    strX = String(take!(io))
+    show(io_limit, Y)
+    strY = String(take!(io))
+    @test strX == strY
+end
+
+@testset "vector indexing (issue #39896)" begin
+    a = collect(1:10)
+    r = Base.IdentityUnitRange(2:3)
+    b = a[r]
+    @test axes(b) == axes(r)
+    for i in r
+        @test b[i] == a[r[i]]
+    end
+end
+
+@testset "proper partition for non-1-indexed vector" begin
+    @test Iterators.partition(OffsetArray(1:10,10), 5) |> collect == [1:5,6:10] # OffsetVector
+    @test Iterators.partition(OffsetArray(collect(1:10),10), 5) |> collect == [1:5,6:10] # OffsetVector
+    @test Iterators.partition(OffsetArray(reshape(1:9,3,3), (3,3)), 5) |> collect == [1:5,6:9] #OffsetMatrix
+    @test Iterators.partition(OffsetArray(reshape(collect(1:9),3,3), (3,3)), 5) |> collect == [1:5,6:9] #OffsetMatrix
+    @test Iterators.partition(IdOffsetRange(2:7,10), 5) |> collect == [12:16,17:17] # IdOffsetRange
+end
+
+@testset "reshape" begin
+    a = OffsetArray(4:5, 5:6)
+    @test reshape(a, :) === a
+    @test reshape(a, (:,)) === a
+end
+
+@testset "issue #41630: replace_ref_begin_end!/@view on offset-like arrays" begin
+    x = OffsetArray([1 2; 3 4], -10:-9, 9:10)  # 2×2 OffsetArray{...} with indices -10:-9×9:10
+
+    # begin/end with offset indices
+    @test (@view x[begin, 9])[] == 1
+    @test (@view x[-10, end])[] == 2
+    @test (@view x[-9, begin])[] == 3
+    @test (@view x[end, 10])[] == 4
+    @test (@view x[begin, begin])[] == 1
+    @test (@view x[begin, end])[] == 2
+    @test (@view x[end, begin])[] == 3
+    @test (@view x[end, end])[] == 4
+
+    # nested usages of begin/end
+    y = OffsetArray([-10, -9], (5,))
+    @test (@view x[begin, -y[end]])[] == 1
+    @test (@view x[y[begin], end])[] == 2
+    @test (@view x[end, -y[end]])[] == 3
+    @test (@view x[y[end], end])[] == 4
+end
+
+@testset "CartesianIndices (issue #40035)" begin
+    A = OffsetArray(big(1):big(2), 0);
+    B = OffsetArray(1:2, 0);
+    # axes of an OffsetArray may be converted to an AbstractUnitRange,
+    # but the conversion to an OrdinalRange was not defined.
+    # this is fixed in #40038, so the evaluation of its CartesianIndices should work
+    @test CartesianIndices(A) == CartesianIndices(B)
+end
diff --git a/test/opaque_closure.jl b/test/opaque_closure.jl
new file mode 100644
index 00000000000000..7fe53812c3a928
--- /dev/null
+++ b/test/opaque_closure.jl
@@ -0,0 +1,266 @@
+using Test
+using InteractiveUtils
+using Core: OpaqueClosure
+
+const_int() = 1
+
+const lno = LineNumberNode(1, :none)
+
+let ci = @code_lowered const_int()
+    @eval function oc_trivial()
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
+    end
+end
+@test isa(oc_trivial(), Core.OpaqueClosure{Tuple{}, Any})
+@test oc_trivial()() == 1
+
+let ci = @code_lowered const_int()
+    @eval function oc_simple_inf()
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci)))
+    end
+end
+@test isa(oc_simple_inf(), Core.OpaqueClosure{Tuple{}, Int})
+@test oc_simple_inf()() == 1
+
+struct OcClos2Int
+    a::Int
+    b::Int
+end
+(a::OcClos2Int)() = getfield(a, 1) + getfield(a, 2)
+let ci = @code_lowered OcClos2Int(1, 2)();
+    @eval function oc_trivial_clos()
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
+            1, 2))
+    end
+end
+@test oc_trivial_clos()() == 3
+
+let ci = @code_lowered OcClos2Int(1, 2)();
+    @eval function oc_self_call_clos()
+        $(Expr(:new_opaque_closure, Tuple{}, Int, Int,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
+            1, 2))()
+    end
+end
+@test @inferred(oc_self_call_clos()) == 3
+let opt = @code_typed oc_self_call_clos()
+    @test length(opt[1].code) == 1
+    @test isa(opt[1].code[1], Core.ReturnNode)
+end
+
+struct OcClos1Any
+    a
+end
+(a::OcClos1Any)() = getfield(a, 1)
+let ci = @code_lowered OcClos1Any(1)()
+    @eval function oc_pass_clos(x)
+        $(Expr(:new_opaque_closure, Tuple{}, Any, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
+            :x))
+    end
+end
+@test oc_pass_clos(1)() == 1
+@test oc_pass_clos("a")() == "a"
+
+let ci = @code_lowered OcClos1Any(1)()
+    @eval function oc_infer_pass_clos(x)
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
+            :x))
+    end
+end
+@test isa(oc_infer_pass_clos(1), Core.OpaqueClosure{Tuple{}, typeof(1)})
+@test isa(oc_infer_pass_clos("a"), Core.OpaqueClosure{Tuple{}, typeof("a")})
+@test oc_infer_pass_clos(1)() == 1
+@test oc_infer_pass_clos("a")() == "a"
+
+let ci = @code_lowered identity(1)
+    @eval function oc_infer_pass_id()
+        $(Expr(:new_opaque_closure, Tuple{Any}, Any, Any,
+            Expr(:opaque_closure_method, nothing, 1, false, lno, ci)))
+    end
+end
+function complicated_identity(x)
+    oc_infer_pass_id()(x)
+end
+@test @inferred(complicated_identity(1)) == 1
+@test @inferred(complicated_identity("a")) == "a"
+let ci = (@code_typed complicated_identity(1))[1]
+    @test length(ci.code) == 1
+    @test isa(ci.code[1], Core.ReturnNode)
+end
+
+struct OcOpt
+    A
+end
+
+(A::OcOpt)() = ndims(getfield(A, 1))
+
+let ci = @code_lowered OcOpt([1 2])()
+    @eval function oc_opt_ndims(A)
+        $(Expr(:new_opaque_closure, Tuple{}, Union{}, Any,
+            Expr(:opaque_closure_method, nothing, 0, false, lno, ci),
+            :A))
+    end
+end
+oc_opt_ndims([1 2])
+
+let A = [1 2]
+    let Oc = oc_opt_ndims(A)
+        @test_broken sizeof(Oc.env) == 0
+        @test Oc() == 2
+    end
+end
+
+using Base.Experimental: @opaque
+
+@test @opaque(x->2x)(8) == 16
+let f = @opaque (x::Int, y::Float64)->(2x, 3y)
+    @test_throws TypeError f(1, 1)
+    @test f(2, 3.0) === (4, 9.0)
+end
+function uses_frontend_opaque(x)
+    @opaque y->x+y
+end
+@test uses_frontend_opaque(10)(8) == 18
+
+# World age mechanism
+function test_oc_world_age end
+mk_oc_world_age() = @opaque ()->test_oc_world_age()
+g_world_age = @opaque ()->test_oc_world_age()
+h_world_age = mk_oc_world_age()
+@test isa(h_world_age, Core.OpaqueClosure{Tuple{}, Union{}})
+test_oc_world_age() = 1
+@test_throws MethodError g_world_age()
+@test_throws MethodError h_world_age()
+@test mk_oc_world_age()() == 1
+g_world_age = @opaque ()->test_oc_world_age()
+@test g_world_age() == 1
+@test isa(mk_oc_world_age(), Core.OpaqueClosure{Tuple{}, Int})
+
+function maybe_vararg(isva::Bool)
+    T = isva ? Vararg{Int} : Int
+    @opaque Tuple{T} (x...)->x
+end
+@test maybe_vararg(false)(1) == (1,)
+@test_throws MethodError maybe_vararg(false)(1,2,3)
+@test maybe_vararg(true)(1) == (1,)
+@test maybe_vararg(true)(1,2,3) == (1,2,3)
+@test (@opaque Tuple{Int, Int} (a, b, x...)->x)(1,2) === ()
+@test (@opaque Tuple{Int, Int} (a, x...)->x)(1,2) === (2,)
+@test (@opaque Tuple{Int, Vararg{Int}} (a, x...)->x)(1,2,3,4) === (2,3,4)
+@test (@opaque (a::Int, x::Int...)->x)(1,2,3) === (2,3)
+
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}} x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Vararg{Int}} x->x)
+@test_throws ErrorException (@opaque Tuple{Int, Int} x->x)
+@test_throws ErrorException (@opaque Tuple{Any} (x,y)->x)
+@test_throws ErrorException (@opaque Tuple{Vararg{Int}} (x,y...)->x)
+@test_throws ErrorException (@opaque Tuple{Int} (x,y,z...)->x)
+
+# cannot specify types both on arguments and separately
+@test_throws ErrorException @eval @opaque Tuple{Any} (x::Int)->x
+
+# Vargarg in complied mode
+mk_va_opaque() = @opaque (x...)->x
+@test mk_va_opaque()(1) == (1,)
+@test mk_va_opaque()(1,2) == (1,2)
+
+# OpaqueClosure show method
+@test repr(@opaque x->1) == "(::Any)::Any->◌"
+
+# Opaque closure in CodeInfo returned from generated functions
+function mk_ocg(args...)
+    ci = @code_lowered const_int()
+    cig = Meta.lower(@__MODULE__, Expr(:new_opaque_closure, Tuple{}, Any, Any,
+        Expr(:opaque_closure_method, nothing, 0, false, lno, ci))).args[1]
+    cig.slotnames = Symbol[Symbol("#self#")]
+    cig.slottypes = Any[Any]
+    cig.slotflags = UInt8[0x00]
+    cig
+end
+
+@eval function oc_trivial_generated()
+    $(Expr(:meta, :generated_only))
+    $(Expr(:meta,
+            :generated,
+            Expr(:new,
+                Core.GeneratedFunctionStub,
+                :mk_ocg,
+                Any[:oc_trivial_generated],
+                Any[],
+                @__LINE__,
+                QuoteNode(Symbol(@__FILE__)),
+                true)))
+end
+@test isa(oc_trivial_generated(), Core.OpaqueClosure{Tuple{}, Any})
+@test oc_trivial_generated()() == 1
+
+# Constprop through varargs OpaqueClosure
+function oc_varargs_constprop()
+    oc = @opaque (args...)->args[1]+args[2]+args[3]
+    return Val{oc(1,2,3)}()
+end
+@test Base.return_types(oc_varargs_constprop, Tuple{}) == Any[Val{6}]
+
+# OpaqueClosure ABI
+f_oc_noinline(x) = @opaque function (y)
+    @noinline
+    x + y
+end
+
+let oc = Base.inferencebarrier(f_oc_noinline(1))
+    @test oc(2) == 3
+end
+
+function f_oc_noinline_call(x, y)
+    return f_oc_noinline(x)(y)
+end
+@test f_oc_noinline_call(1, 2) == 3
+
+@test_throws MethodError (@opaque x->x+1)(1, 2)
+
+# https://github.com/JuliaLang/julia/issues/40409
+const GLOBAL_OPAQUE_CLOSURE = @opaque () -> 123
+call_global_opaque_closure() = GLOBAL_OPAQUE_CLOSURE()
+@test call_global_opaque_closure() == 123
+
+let foo::Int = 42
+    Base.Experimental.@force_compile
+    oc = Base.Experimental.@opaque a::Int->sin(a) + cos(foo)
+
+    @test only(Base.return_types(oc, (Int,))) === Float64
+    code, rt = first(code_typed(oc, (Int,)))
+    @test rt === Float64
+end
+
+let oc = @opaque a->sin(a)
+    @test length(code_typed(oc, (Int,))) == 1
+end
+
+# constructing an opaque closure from IRCode
+let ci = code_typed(+, (Int, Int))[1][1]
+    ir = Core.Compiler.inflate_ir(ci)
+    @test OpaqueClosure(ir; nargs=2, isva=false)(40, 2) == 42
+    @test OpaqueClosure(ci)(40, 2) == 42
+
+    ir = Core.Compiler.inflate_ir(ci, Any[], Any[Tuple{}, Int, Int])
+    @test OpaqueClosure(ir; nargs=2, isva=false)(40, 2) == 42
+    @test isa(OpaqueClosure(ir; nargs=2, isva=false), Core.OpaqueClosure{Tuple{Int, Int}, Int})
+    @test_throws TypeError OpaqueClosure(ir; nargs=2, isva=false)(40.0, 2)
+end
+
+let ci = code_typed((x, y...)->(x, y), (Int, Int))[1][1]
+    ir = Core.Compiler.inflate_ir(ci)
+    @test OpaqueClosure(ir; nargs=2, isva=true)(40, 2) === (40, (2,))
+    @test OpaqueClosure(ci)(40, 2) === (40, (2,))
+end
+
+let ci = code_typed((x, y...)->(x, y), (Int, Int))[1][1]
+    ir = Core.Compiler.inflate_ir(ci)
+    @test_throws MethodError OpaqueClosure(ir; nargs=2, isva=true)(1, 2, 3)
+    @test_throws MethodError OpaqueClosure(ci)(1, 2, 3)
+end
diff --git a/test/operators.jl b/test/operators.jl
index fb0af5b5520474..a1e27d0e1cd7b6 100644
--- a/test/operators.jl
+++ b/test/operators.jl
@@ -81,8 +81,38 @@ import Base.<
 @test isequal(minmax(TO23094(2), TO23094(1))[1], TO23094(1))
 @test isequal(minmax(TO23094(2), TO23094(1))[2], TO23094(2))
 
+let m = Module()
+    @eval m begin
+        struct Foo end
+        foo(xs) = isequal(xs[1], Foo())
+    end
+    @test !(@inferred m.foo(Any[42]))
+end
+
 @test isless('a','b')
 
+@testset "isgreater" begin
+    # isgreater should be compatible with min.
+    min1(a, b) = Base.isgreater(a, b) ? b : a
+    # min promotes numerical arguments to the same type, but our quick min1
+    # doesn't, so use float test values instead of ints.
+    values = (1.0, 5.0, NaN, missing, Inf)
+    for a in values, b in values
+        @test min(a, b) === min1(a, b)
+        @test min((a,), (b,)) === min1((a,), (b,))
+        @test all(min([a], [b]) .=== min1([a], [b]))
+    end
+end
+
+@testset "isunordered" begin
+    @test  isunordered(NaN)
+    @test  isunordered(NaN32)
+    @test  isunordered(missing)
+    @test !isunordered(1)
+    @test !isunordered([NaN, 1])
+    @test !isunordered([1.0, missing])
+end
+
 @testset "vectorized comparisons between numbers" begin
     @test 1 .!= 2
     @test 1 .== 1
@@ -138,12 +168,23 @@ Base.promote_rule(::Type{T19714}, ::Type{Int}) = T19714
 
     @test repr(uppercase ∘ first) == "uppercase ∘ first"
     @test sprint(show, "text/plain", uppercase ∘ first) == "uppercase ∘ first"
+
+    # test keyword ags in composition
+    function kwf(a;b,c); a + b + c; end
+    @test (abs2 ∘ kwf)(1,b=2,c=3) == 36
+
 end
 
 @testset "function negation" begin
     str = randstring(20)
     @test filter(!isuppercase, str) == replace(str, r"[A-Z]" => "")
     @test filter(!islowercase, str) == replace(str, r"[a-z]" => "")
+    @test !!isnan === isnan
+    @test repr(!isnan) == "!isnan"
+    @test repr((-) ∘ sin) == "(-) ∘ sin"
+    @test repr(cos ∘ (sin ∘ tan)) == "cos ∘ (sin ∘ tan)"
+    @test repr(!(cos ∘ !sin)) == "!(cos ∘ !sin)"
+    @test repr(cos ∘ sin ∘ tan) == "cos ∘ sin ∘ tan" == repr((cos ∘ sin) ∘ tan)
 end
 
 # issue #19891
@@ -243,3 +284,28 @@ end
     @test gt5(6) && !gt5(5)
     @test lt5(4) && !lt5(5)
 end
+
+@testset "ni" begin
+    @test ∋([1,5,10,11], 5)
+    @test !∋([1,10,11], 5)
+    @test ∋(5)([5,1])
+    @test !∋(42)([0,1,100])
+    @test ∌(0)(1:10)
+    @test ∋(0)(-2:2)
+end
+
+@test [Base.afoldl(+, 1:i...) for i = 1:40] == [i * (i + 1) ÷ 2 for i = 1:40]
+
+@testset "Returns" begin
+    @test @inferred(Returns(1)()   ) === 1
+    @test @inferred(Returns(1)(23) ) === 1
+    @test @inferred(Returns("a")(2,3)) == "a"
+    @test @inferred(Returns(1)(x=1, y=2)) === 1
+    @test @inferred(Returns(Int)()) === Int
+    @test @inferred(Returns(Returns(1))()) === Returns(1)
+    f = @inferred Returns(Int)
+    @inferred f(1,2)
+    val = [1,2,3]
+    @test Returns(val)(1) === val
+    @test sprint(show, Returns(1.0)) == "Returns{Float64}(1.0)"
+end
diff --git a/test/ordering.jl b/test/ordering.jl
index 0fb663e0ef3ede..547d8d8dd0e8ba 100644
--- a/test/ordering.jl
+++ b/test/ordering.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 
 import Base.Order: Forward, Reverse
@@ -36,3 +38,5 @@ struct SomeOtherOrder <: Base.Order.Ordering end
 
 @test_throws ErrorException sort([1, 2, 3], lt=(a, b) -> a - b < 0, order=SomeOtherOrder())
 
+@test reverse(Forward) === Reverse
+@test reverse(Reverse) === Forward
diff --git a/test/osutils.jl b/test/osutils.jl
index c9e3b9d91a3774..36f28780171294 100644
--- a/test/osutils.jl
+++ b/test/osutils.jl
@@ -1,4 +1,5 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
+using Libdl
 
 @testset "Operating system predicates" begin
     @test !Sys.isunix(:Windows)
@@ -44,6 +45,7 @@ end
     @test (@static if false 1 elseif false 2 else 3 end) === 3
     @test (@static if false 1 elseif false 2 elseif true && false 3 else 4 end) === 4
     @test (@static if false 1 elseif false 2 elseif true && false 3 end) === nothing
+    @test_throws ArgumentError("invalid @static macro") @macroexpand @static 1
 end
 
 if Sys.iswindows()
@@ -55,3 +57,15 @@ if Sys.iswindows()
         end
     end
 end
+
+if Sys.islinux() && Sys.which("readelf") !== nothing
+    @testset "stack is not marked as executable" begin
+        for f in intersect(dllist(),
+                           [readdir(joinpath(Sys.BINDIR, Base.LIBDIR), join=true);
+                            readdir(joinpath(Sys.BINDIR, Base.LIBDIR, "julia"), join=true)])
+            for l in eachline(open(`readelf -l $f`))
+                @test !(contains(l, "GNU_STACK") && contains(l, 'E'))
+            end
+        end
+    end
+end
diff --git a/test/parse.jl b/test/parse.jl
index 2deeecd516f2a7..ae07936b3a18ed 100644
--- a/test/parse.jl
+++ b/test/parse.jl
@@ -236,6 +236,13 @@ end
     @test_throws ArgumentError parse(Int, "2", base = 63)
 end
 
+@testset "issue #42616" begin
+    @test tryparse(Bool, "") === nothing
+    @test tryparse(Bool, " ") === nothing
+    @test_throws ArgumentError parse(Bool, "")
+    @test_throws ArgumentError parse(Bool, " ")
+end
+
 # issue #17333: tryparse should still throw on invalid base
 for T in (Int32, BigInt), base in (0,1,100)
     @test_throws ArgumentError tryparse(T, "0", base = base)
diff --git a/test/path.jl b/test/path.jl
index e09a46ef9370e5..31de4baffd1a04 100644
--- a/test/path.jl
+++ b/test/path.jl
@@ -9,6 +9,23 @@
         @test isabspath(S(homedir()))
         @test !isabspath(S("foo"))
     end
+    if Sys.iswindows()
+        @testset "issue #38491" begin
+            pwd_drive = uppercase(splitdrive(pwd())[1])
+            drive = (pwd_drive == "X:") ? "Y:" : "X:"
+            @test abspath("$(lowercase(drive))a\\b\\c") == "$(lowercase(drive))\\a\\b\\c"
+            @test abspath("$(uppercase(drive))a\\b\\c") == "$(uppercase(drive))\\a\\b\\c"
+            @test abspath("$(lowercase(drive))a") == "$(lowercase(drive))\\a"
+            @test abspath("$(uppercase(drive))a") == "$(uppercase(drive))\\a"
+            @test abspath(lowercase(drive)) == "$(lowercase(drive))\\"
+            @test abspath(uppercase(drive)) == "$(uppercase(drive))\\"
+
+            @test lowercase(abspath("$(pwd_drive)a\\b\\c")) == lowercase(joinpath(pwd(), "a\\b\\c"))
+            @test lowercase(abspath("$(pwd_drive)a")) == lowercase(joinpath(pwd(), "a"))
+            @test lowercase(abspath(lowercase(pwd_drive))) == lowercase("$(pwd())\\")
+            @test lowercase(abspath(uppercase(pwd_drive))) == lowercase("$(pwd())\\")
+        end
+    end
     @test basename(S("foo$(sep)bar")) == "bar"
     @test dirname(S("foo$(sep)bar")) == "foo"
 
@@ -42,6 +59,11 @@
         @test joinpath(S("foo"), S(homedir())) == homedir()
         @test joinpath(S(abspath("foo")), S(homedir())) == homedir()
 
+        for str in map(S, [sep, "a$(sep)b", "a$(sep)b$(sep)c", "a$(sep)b$(sep)c$(sep)d"])
+            @test str == joinpath(splitpath(str))
+            @test joinpath(splitpath(str)) == joinpath(splitpath(str)...)
+        end
+
         if Sys.iswindows()
             @test joinpath(S("foo"),S("bar:baz")) == "bar:baz"
             @test joinpath(S("C:"),S("foo"),S("D:"),S("bar")) == "D:bar"
@@ -58,6 +80,11 @@
             @test joinpath(S("\\\\server\\share"),S("a")) == "\\\\server\\share\\a"
             @test joinpath(S("\\\\server\\share\\"), S("a")) == "\\\\server\\share\\a"
 
+            for str in map(S, ["c:\\", "c:\\a", "c:\\a\\b", "c:\\a\\b\\c", "c:\\a\\b\\c\\d"])
+                @test str == joinpath(splitpath(str))
+                @test joinpath(splitpath(str)) == joinpath(splitpath(str)...)
+            end
+
         elseif Sys.isunix()
             @test joinpath(S("foo"),S("bar:baz")) == "foo$(sep)bar:baz"
             @test joinpath(S("C:"),S("foo"),S("D:"),S("bar")) == "C:$(sep)foo$(sep)D:$(sep)bar"
@@ -262,15 +289,39 @@
                     res = relpath(filep, startp)
                     idx += 1
                     @test res == relpath_expected_results[idx]
+                    if Sys.iswindows()
+                        @test relpath("e:$filep", "e:$startp") == relpath_expected_results[idx]
+                        @test relpath("e:$filep", "E:$startp") == relpath_expected_results[idx]
+                        @test relpath("E:$filep", "e:$startp") == relpath_expected_results[idx]
+                        @test relpath("E:$filep", "E:$startp") == relpath_expected_results[idx]
+                    end
                 end
             end
             # Additional cases
             @test_throws ArgumentError relpath(S("$(sep)home$(sep)user$(sep)dir_withendsep$(sep)"), "")
             @test_throws ArgumentError relpath(S(""), S("$(sep)home$(sep)user$(sep)dir_withendsep$(sep)"))
+
+            # issue 40237
+            path = "..$(sep)a$(sep)b$(sep)c"
+            @test relpath(abspath(path)) == path
         end
         test_relpath()
     end
 
+    if Sys.iswindows()
+        @testset "issue #23646" begin
+            @test lowercase(relpath("E:\\a\\b", "C:\\c")) == "e:\\a\\b"
+            @test lowercase(relpath("E:\\a\\b", "c:\\c")) == "e:\\a\\b"
+            @test lowercase(relpath("e:\\a\\b", "C:\\c")) == "e:\\a\\b"
+            @test lowercase(relpath("e:\\a\\b", "c:\\c")) == "e:\\a\\b"
+
+            @test relpath("C:\\a\\b", "c:\\a\\b") == "."
+            @test relpath("c:\\a\\b", "C:\\a\\b") == "."
+            @test lowercase(relpath("C:\\a\\b", "c:\\c\\d")) == "..\\..\\a\\b"
+            @test lowercase(relpath("c:\\a\\b", "C:\\c\\d")) == "..\\..\\a\\b"
+        end
+    end
+
     @testset "type stability" begin
         @test isa(joinpath(S("a"), S("b")), String)
         @test isa(joinpath(S(abspath("a")), S("b")), String)
diff --git a/test/precompile.jl b/test/precompile.jl
index 90c7e8e32699de..fb38f08dad93b1 100644
--- a/test/precompile.jl
+++ b/test/precompile.jl
@@ -12,12 +12,79 @@ FooBase_module = :FooBase4b3a94a1a081a8cb
 end
 using .ConflictingBindings
 
-(f -> f())() do # wrap in function scope, so we can test world errors
-dir = mktempdir()
-dir2 = mktempdir()
-insert!(LOAD_PATH, 1, dir)
-insert!(DEPOT_PATH, 1, dir)
-try
+function precompile_test_harness(@nospecialize(f), testset::String)
+    @testset "$testset" begin
+        precompile_test_harness(f, true)
+    end
+end
+function precompile_test_harness(@nospecialize(f), separate::Bool)
+    load_path = mktempdir()
+    load_cache_path = separate ? mktempdir() : load_path
+    try
+        pushfirst!(LOAD_PATH, load_path)
+        pushfirst!(DEPOT_PATH, load_cache_path)
+        f(load_path)
+    finally
+        rm(load_path, recursive=true, force=true)
+        separate && rm(load_cache_path, recursive=true, force=true)
+        filter!((≠)(load_path), LOAD_PATH)
+        separate && filter!((≠)(load_cache_path), DEPOT_PATH)
+    end
+    nothing
+end
+
+# method root provenance
+
+rootid(m::Module) = ccall(:jl_module_build_id, UInt64, (Any,), Base.parentmodule(m))
+rootid(m::Method) = rootid(m.module)
+
+function root_provenance(m::Method, i::Int)
+    mid = rootid(m)
+    isdefined(m, :root_blocks) || return mid
+    idxs = view(m.root_blocks, 2:2:length(m.root_blocks))
+    j = searchsortedfirst(idxs, i) - 1   # RLE roots are 0-indexed
+    j == 0 && return mid
+    return m.root_blocks[2*j-1]
+end
+
+struct RLEIterator{T}   # for method roots, T = UInt64 (even on 32-bit)
+    items::Vector{Any}
+    blocks::Vector{T}
+    defaultid::T
+end
+function RLEIterator(roots, blocks, defaultid)
+    T = promote_type(eltype(blocks), typeof(defaultid))
+    return RLEIterator{T}(convert(Vector{Any}, roots), blocks, defaultid)
+end
+RLEIterator(m::Method) = RLEIterator(m.roots, m.root_blocks, rootid(m))
+Base.iterate(iter::RLEIterator) = iterate(iter, (0, 0, iter.defaultid))
+function Base.iterate(iter::RLEIterator, (i, j, cid))
+    i += 1
+    i > length(iter.items) && return nothing
+    r = iter.items[i]
+    while (j + 1 < length(iter.blocks) && i > iter.blocks[j+2])
+        cid = iter.blocks[j+1]
+        j += 2
+    end
+    return cid => r, (i, j, cid)
+end
+
+function group_roots(m::Method)
+    mid = rootid(m)
+    isdefined(m, :root_blocks) || return Dict(mid => m.roots)
+    group_roots(RLEIterator(m.roots, m.root_blocks, mid))
+end
+function group_roots(iter::RLEIterator)
+    rootsby = Dict{typeof(iter.defaultid),Vector{Any}}()
+    for (id, r) in iter
+        list = get!(valtype(rootsby), rootsby, id)
+        push!(list, r)
+    end
+    return rootsby
+end
+
+precompile_test_harness("basic precompile functionality") do dir2
+precompile_test_harness(false) do dir
     Foo_file = joinpath(dir, "$Foo_module.jl")
     Foo2_file = joinpath(dir, "$Foo2_module.jl")
     FooBase_file = joinpath(dir, "$FooBase_module.jl")
@@ -67,15 +134,17 @@ try
               include_dependency("foo.jl")
               include_dependency("foo.jl")
               module Bar
-                  @doc "bar function" bar(x) = x + 2
                   include_dependency("bar.jl")
               end
+              @doc "Bar module" Bar # this needs to define the META dictionary via eval
+              @eval Bar @doc "bar function" bar(x) = x + 2
 
               # test for creation of some reasonably complicated type
               struct MyType{T} end
               const t17809s = Any[
                     Tuple{
                         Type{Ptr{MyType{i}}},
+                        Ptr{Type{MyType{i}}},
                         Array{Ptr{MyType{MyType{:sym}()}}(0), 0},
                         Val{Complex{Int}(1, 2)},
                         Val{3},
@@ -146,9 +215,8 @@ try
 
               let some_method = which(Base.include, (Module, String,))
                     # global const some_method // FIXME: support for serializing a direct reference to an external Method not implemented
-                  global const some_linfo =
-                      ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, (Any, Any, Any, UInt),
-                          some_method, Tuple{typeof(Base.include), Module, String}, Core.svec(), typemax(UInt))
+                  global const some_linfo = Core.Compiler.specialize_method(some_method,
+                      Tuple{typeof(Base.include), Module, String}, Core.svec())
               end
 
               g() = override(1.0)
@@ -172,8 +240,8 @@ try
               const layout3 = collect(x.match for x in eachmatch(r"..", "abcdefghijk"))::Vector{SubString{String}}
 
               # create a backedge that includes Type{Union{}}, to ensure lookup can handle that
-              call_bottom() = show(stdout::IO, Union{})
-              Core.Compiler.return_type(call_bottom, ())
+              call_bottom() = show(stdout, Union{})
+              Core.Compiler.return_type(call_bottom, Tuple{})
 
               # check that @ccallable works from precompiled modules
               Base.@ccallable Cint f35014(x::Cint) = x+Cint(1)
@@ -243,9 +311,11 @@ try
     cachefile = joinpath(cachedir, "$Foo_module.ji")
     # use _require_from_serialized to ensure that the test fails if
     # the module doesn't reload from the image:
-    @test_logs (:warn, "Replacing module `$Foo_module`") begin
-        ms = Base._require_from_serialized(cachefile, Base.TOMLCache())
-        @test isa(ms, Array{Any,1})
+    @test_warn "@ccallable was already defined for this method name" begin
+        @test_logs (:warn, "Replacing module `$Foo_module`") begin
+            ms = Base._require_from_serialized(Base.PkgId(Foo), cachefile)
+            @test isa(ms, Array{Any,1})
+        end
     end
 
     @test_throws MethodError Foo.foo(17) # world shouldn't be visible yet
@@ -262,6 +332,7 @@ try
         # issue #12284:
         @test string(Base.Docs.doc(Foo.foo)) == "foo function\n"
         @test string(Base.Docs.doc(Foo.Bar.bar)) == "bar function\n"
+        @test string(Base.Docs.doc(Foo.Bar)) == "Bar module\n"
 
         modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile)
         discard_module = mod_fl_mt -> (mod_fl_mt.filename, mod_fl_mt.mtime)
@@ -288,16 +359,15 @@ try
             Dict(let m = Base.root_module(Base, s)
                      Base.PkgId(m) => Base.module_build_id(m)
                  end for s in
-                [:Artifacts, :Base64, :CRC32c, :Dates, :DelimitedFiles, :Distributed, :FileWatching, :Markdown,
-                 :Future, :Libdl, :LinearAlgebra, :Logging, :Mmap, :Printf,
-                 :Profile, :Random, :Serialization, :SharedArrays, :SparseArrays, :SuiteSparse, :Test,
-                 :Unicode, :REPL, :InteractiveUtils, :Pkg, :LibGit2, :SHA, :UUIDs, :Sockets,
-                 :Statistics, :TOML, :MozillaCACerts_jll, :LibCURL_jll, :LibCURL, :Downloads,]),
-                # Plus precompilation module generated at build time
-                let id = Base.PkgId("__PackagePrecompilationStatementModule")
-                    Dict(id => Base.module_build_id(Base.root_module(id)))
-                end
-           )
+                [:ArgTools, :Artifacts, :Base64, :CompilerSupportLibraries_jll, :CRC32c, :Dates, :DelimitedFiles,
+                 :Distributed, :Downloads, :FileWatching, :Future, :InteractiveUtils, :libblastrampoline_jll,
+                 :LazyArtifacts, :LibCURL, :LibCURL_jll, :LibGit2, :Libdl, :LinearAlgebra,
+                 :Logging, :Markdown, :Mmap, :MozillaCACerts_jll, :NetworkOptions, :OpenBLAS_jll, :Pkg, :Printf,
+                 :Profile, :p7zip_jll, :REPL, :Random, :SHA, :Serialization, :SharedArrays, :Sockets,
+                 :SparseArrays, :Statistics, :SuiteSparse, :TOML, :Tar, :Test, :UUIDs, :Unicode,
+                 :nghttp2_jll]
+            ),
+        )
         @test discard_module.(deps) == deps1
         modules, (deps, requires), required_modules = Base.parse_cache_header(cachefile; srcfiles_only=true)
         @test map(x -> x.filename, deps) == [Foo_file]
@@ -320,15 +390,14 @@ try
         @test all(i -> Foo.t17809s[i + 1] ===
             Tuple{
                 Type{Ptr{Foo.MyType{i}}},
+                Ptr{Type{Foo.MyType{i}}},
                 Array{Ptr{Foo.MyType{Foo.MyType{:sym}()}}(0), 0},
                 Val{Complex{Int}(1, 2)},
                 Val{3},
                 Val{nothing}},
             0:25)
         some_method = which(Base.include, (Module, String,))
-        some_linfo =
-                ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, (Any, Any, Any, UInt),
-                    some_method, Tuple{typeof(Base.include), Module, String}, Core.svec(), typemax(UInt))
+        some_linfo = Core.Compiler.specialize_method(some_method, Tuple{typeof(Base.include), Module, String}, Core.svec())
         @test Foo.some_linfo::Core.MethodInstance === some_linfo
 
         ft = Base.datatype_fieldtypes
@@ -420,7 +489,8 @@ try
           """)
 
     cachefile = Base.compilecache(Base.PkgId("FooBar"))
-    @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"))
+    empty_prefs_hash = Base.get_preferences_hash(nothing, String[])
+    @test cachefile == Base.compilecache_path(Base.PkgId("FooBar"), empty_prefs_hash)
     @test isfile(joinpath(cachedir, "FooBar.ji"))
     @test Base.stale_cachefile(FooBar_file, joinpath(cachedir, "FooBar.ji")) isa Vector
     @test !isdefined(Main, :FooBar)
@@ -460,7 +530,7 @@ try
           error("break me")
           end
           """)
-    @test_warn "LoadError: break me\nStacktrace:\n [1] error" try
+    @test_warn r"LoadError: break me\nStacktrace:\n \[1\] [\e01m\[]*error" try
             Base.require(Main, :FooBar2)
             error("the \"break me\" test failed")
         catch exc
@@ -511,106 +581,365 @@ try
     rm(FooBarT_file)
     @test Base.stale_cachefile(FooBarT2_file, joinpath(cachedir2, "FooBarT2.ji")) === true
     @test Base.require(Main, :FooBarT2) isa Module
-finally
-    splice!(DEPOT_PATH, 1:2)
-    splice!(LOAD_PATH, 1)
-    rm(dir, recursive=true)
-    rm(dir2, recursive=true)
 end
 end
 
-# test --compiled-modules=no command line option
-let dir = mktempdir(),
-    Time_module = :Time4b3a94a1a081a8cb
-
-    try
-        write(joinpath(dir, "$Time_module.jl"),
-              """
-              module $Time_module
-                  time = Base.time()
+# method root provenance & external code caching
+precompile_test_harness("code caching") do dir
+    Bid = rootid(Base)
+    Cache_module = :Cacheb8321416e8a3e2f1
+    # Note: calling setindex!(::Dict{K,V}, ::Any, ::K) adds both compression and codegen roots
+    write(joinpath(dir, "$Cache_module.jl"),
+          """
+          module $Cache_module
+              struct X end
+              struct X2 end
+              @noinline function f(d)
+                  @noinline
+                  d[X()] = nothing
               end
-              """)
+              @noinline fpush(dest) = push!(dest, X())
+              function callboth()
+                  f(Dict{X,Any}())
+                  fpush(X[])
+                  nothing
+              end
+              function getelsize(list::Vector{T}) where T
+                  n = 0
+                  for item in list
+                      n += sizeof(T)
+                  end
+                  return n
+              end
+              precompile(callboth, ())
+              precompile(getelsize, (Vector{Int32},))
+          end
+          """)
+    Base.compilecache(Base.PkgId(string(Cache_module)))
+    @eval using $Cache_module
+    M = getfield(@__MODULE__, Cache_module)
+    # Test that this cache file "owns" all the roots
+    Mid = rootid(M)
+    for name in (:f, :fpush, :callboth)
+        func = getfield(M, name)
+        m = only(collect(methods(func)))
+        @test all(i -> root_provenance(m, i) == Mid, 1:length(m.roots))
+    end
+    # Check that we can cache external CodeInstances:
+    # size(::Vector) has an inferred specialization for Vector{X}
+    msize = which(size, (Vector{<:Any},))
+    hasspec = false
+    for i = 1:length(msize.specializations)
+        if isassigned(msize.specializations, i)
+            mi = msize.specializations[i]
+            if isa(mi, Core.MethodInstance)
+                tt = Base.unwrap_unionall(mi.specTypes)
+                if tt.parameters[2] == Vector{Cacheb8321416e8a3e2f1.X}
+                    if isdefined(mi, :cache) && isa(mi.cache, Core.CodeInstance) && mi.cache.max_world == typemax(UInt) && mi.cache.inferred !== nothing
+                        hasspec = true
+                        break
+                    end
+                end
+            end
+        end
+    end
+    @test hasspec
+    # Test that compilation adds to method roots with appropriate provenance
+    m = which(setindex!, (Dict{M.X,Any}, Any, M.X))
+    @test M.X ∈ m.roots
+    # Check that roots added outside of incremental builds get attributed to a moduleid of 0
+    Base.invokelatest() do
+        Dict{M.X2,Any}()[M.X2()] = nothing
+    end
+    @test M.X2 ∈ m.roots
+    groups = group_roots(m)
+    @test M.X ∈ groups[Mid]           # attributed to M
+    @test M.X2 ∈ groups[0]            # activate module is not known
+    @test !isempty(groups[Bid])
+    # Check that internal methods and their roots are accounted appropriately
+    minternal = which(M.getelsize, (Vector,))
+    mi = minternal.specializations[1]
+    @test Base.unwrap_unionall(mi.specTypes).parameters[2] == Vector{Int32}
+    ci = mi.cache
+    @test ci.relocatability == 1
+    @test ci.inferred !== nothing
+    # ...and that we can add "untracked" roots & non-relocatable CodeInstances to them too
+    Base.invokelatest() do
+        M.getelsize(M.X2[])
+    end
+    mi = minternal.specializations[2]
+    ci = mi.cache
+    @test ci.relocatability == 0
+    # PkgA loads PkgB, and both add roots to the same `push!` method (both before and after loading B)
+    Cache_module2 = :Cachea1544c83560f0c99
+    write(joinpath(dir, "$Cache_module2.jl"),
+          """
+          module $Cache_module2
+              struct Y end
+              @noinline f(dest) = push!(dest, Y())
+              callf() = f(Y[])
+              callf()
+              using $(Cache_module)
+              struct Z end
+              @noinline g(dest) = push!(dest, Z())
+              callg() = g(Z[])
+              callg()
+          end
+          """)
+    Base.compilecache(Base.PkgId(string(Cache_module2)))
+    @eval using $Cache_module2
+    M2 = getfield(@__MODULE__, Cache_module2)
+    M2id = rootid(M2)
+    dest = []
+    Base.invokelatest() do  # use invokelatest to see the results of loading the compile
+        M2.f(dest)
+        M.fpush(dest)
+        M2.g(dest)
+        @test dest == [M2.Y(), M.X(), M2.Z()]
+        @test M2.callf() == [M2.Y()]
+        @test M2.callg() == [M2.Z()]
+        @test M.fpush(M.X[]) == [M.X()]
+    end
+    mT = which(push!, (Vector{T} where T, Any))
+    groups = group_roots(mT)
+    @test M2.Y ∈ groups[M2id]
+    @test M2.Z ∈ groups[M2id]
+    @test M.X ∈ groups[Mid]
+    @test M.X ∉ groups[M2id]
+    # backedges of external MethodInstances
+    # Root gets used by RootA and RootB, and both consumers end up inferring the same MethodInstance from Root
+    # Do both callers get listed as backedges?
+    RootModule = :Root_0xab07d60518763a7e
+    write(joinpath(dir, "$RootModule.jl"),
+          """
+          module $RootModule
+          function f(x)
+              while x < 10
+                  x += oftype(x, 1)
+              end
+              return x
+          end
+          g1() = f(Int16(9))
+          g2() = f(Int16(9))
+          # all deliberately uncompiled
+          end
+          """)
+    RootA = :RootA_0xab07d60518763a7e
+    write(joinpath(dir, "$RootA.jl"),
+          """
+          module $RootA
+          using $RootModule
+          fA() = $RootModule.f(Int8(4))
+          fA()
+          $RootModule.g1()
+          end
+          """)
+    RootB = :RootB_0xab07d60518763a7e
+    write(joinpath(dir, "$RootB.jl"),
+          """
+          module $RootB
+          using $RootModule
+          fB() = $RootModule.f(Int8(4))
+          fB()
+          $RootModule.g2()
+          end
+          """)
+    Base.compilecache(Base.PkgId(string(RootA)))
+    Base.compilecache(Base.PkgId(string(RootB)))
+    @eval using $RootA
+    @eval using $RootB
+    MA = getfield(@__MODULE__, RootA)
+    MB = getfield(@__MODULE__, RootB)
+    M = getfield(MA, RootModule)
+    m = which(M.f, (Any,))
+    for mi in m.specializations
+        mi === nothing && continue
+        if mi.specTypes.parameters[2] === Int8
+            # external callers
+            mods = Module[]
+            for be in mi.backedges
+                push!(mods, be.def.module)
+            end
+            @test MA ∈ mods
+            @test MB ∈ mods
+            @test length(mods) == 2
+        elseif mi.specTypes.parameters[2] === Int16
+            # internal callers
+            meths = Method[]
+            for be in mi.backedges
+                push!(meths, be.def)
+            end
+            @test which(M.g1, ()) ∈ meths
+            @test which(M.g2, ()) ∈ meths
+            @test length(meths) == 2
+        end
+    end
 
-        eval(quote
-            insert!(LOAD_PATH, 1, $(dir))
-            insert!(DEPOT_PATH, 1, $(dir))
-            Base.compilecache(Base.PkgId("Time4b3a94a1a081a8cb"))
-        end)
+    # Invalidations (this test is adapted from from SnoopCompile)
+    function hasvalid(mi, world)
+        isdefined(mi, :cache) || return false
+        ci = mi.cache
+        while true
+            ci.max_world >= world && return true
+            isdefined(ci, :next) || return false
+            ci = ci.next
+        end
+    end
 
-        exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no`
+    StaleA = :StaleA_0xab07d60518763a7e
+    StaleB = :StaleB_0xab07d60518763a7e
+    StaleC = :StaleC_0xab07d60518763a7e
+    write(joinpath(dir, "$StaleA.jl"),
+        """
+        module $StaleA
 
-        testcode = """
-            insert!(LOAD_PATH, 1, $(repr(dir)))
-            insert!(DEPOT_PATH, 1, $(repr(dir)))
-            using $Time_module
-            getfield($Time_module, :time)
+        stale(x) = rand(1:8)
+        stale(x::Int) = length(digits(x))
+
+        not_stale(x::String) = first(x)
+
+        use_stale(c) = stale(c[1]) + not_stale("hello")
+        build_stale(x) = use_stale(Any[x])
+
+        # force precompilation
+        build_stale(37)
+        stale('c')
+
+        end
+        """
+    )
+    write(joinpath(dir, "$StaleB.jl"),
         """
+        module $StaleB
 
-        t1_yes = readchomp(`$exename --compiled-modules=yes -E $(testcode)`)
-        t2_yes = readchomp(`$exename --compiled-modules=yes -E $(testcode)`)
-        @test t1_yes == t2_yes
+        # StaleB does not know about StaleC when it is being built.
+        # However, if StaleC is loaded first, we get `"jl_insert_method_instance"`
+        # invalidations.
+        using $StaleA
 
-        t1_no = readchomp(`$exename --compiled-modules=no -E $(testcode)`)
-        t2_no = readchomp(`$exename --compiled-modules=no -E $(testcode)`)
-        @test t1_no != t2_no
-        @test parse(Float64, t1_no) < parse(Float64, t2_no)
+        # This will be invalidated if StaleC is loaded
+        useA() = $StaleA.stale("hello")
 
-    finally
-        splice!(DEPOT_PATH, 1)
-        splice!(LOAD_PATH, 1)
-        rm(dir, recursive=true)
-    end
-end
+        # force precompilation
+        useA()
 
-# test loading a package with conflicting namespace
-let dir = mktempdir()
-    Test_module = :Test6c92f26
-    try
-        write(joinpath(dir, "Iterators.jl"),
-              """
-              module Iterators
-              end
-              """)
+        end
+        """
+    )
+    write(joinpath(dir, "$StaleC.jl"),
+        """
+        module $StaleC
 
-        write(joinpath(dir, "$Test_module.jl"),
-              """
-              module $Test_module
-                   import Iterators # FIXME: use `using`
-              end
-              """)
+        using $StaleA
 
-        testcode = """
-            insert!(LOAD_PATH, 1, $(repr(dir)))
-            insert!(DEPOT_PATH, 1, $(repr(dir)))
-            using $Test_module
-            println(stderr, $Test_module.Iterators)
+        $StaleA.stale(x::String) = length(x)
+        call_buildstale(x) = $StaleA.build_stale(x)
+
+        call_buildstale("hey")
+
+        end # module
         """
+    )
+    for pkg in (StaleA, StaleB, StaleC)
+        Base.compilecache(Base.PkgId(string(pkg)))
+    end
+    @eval using $StaleA
+    @eval using $StaleC
+    @eval using $StaleB
+    MA = getfield(@__MODULE__, StaleA)
+    MB = getfield(@__MODULE__, StaleB)
+    MC = getfield(@__MODULE__, StaleC)
+    world = Base.get_world_counter()
+    m = only(methods(MA.use_stale))
+    mi = m.specializations[1]
+    @test hasvalid(mi, world)   # it was re-inferred by StaleC
+    m = only(methods(MA.build_stale))
+    mis = filter(!isnothing, collect(m.specializations))
+    @test length(mis) == 2
+    for mi in mis
+        if mi.specTypes.parameters[2] == Int
+            @test mi.cache.max_world < world
+        else
+            # The variant for String got "healed" by recompilation in StaleC
+            @test mi.specTypes.parameters[2] == String
+            @test mi.cache.max_world == typemax(UInt)
+        end
+    end
+    m = only(methods(MB.useA))
+    mi = m.specializations[1]
+    @test !hasvalid(mi, world)      # invalidated by the stale(x::String) method in StaleC
+    m = only(methods(MC.call_buildstale))
+    mi = m.specializations[1]
+    @test hasvalid(mi, world)       # was compiled with the new method
+end
 
-        exename = `$(Base.julia_cmd()) --startup-file=no`
-        let fname = tempname()
-            try
-                for i = 1:2
-                    @test readchomp(pipeline(`$exename -E $(testcode)`, stderr=fname)) == "nothing"
-                    @test read(fname, String) == "Iterators\n"
-                end
-            finally
-                rm(fname, force=true)
+# test --compiled-modules=no command line option
+precompile_test_harness("--compiled-modules=no") do dir
+    Time_module = :Time4b3a94a1a081a8cb
+    write(joinpath(dir, "$Time_module.jl"),
+          """
+          module $Time_module
+              time = Base.time()
+          end
+          """)
+    Base.compilecache(Base.PkgId("Time4b3a94a1a081a8cb"))
+    exename = `$(Base.julia_cmd()) --compiled-modules=yes --startup-file=no`
+    testcode = """
+        insert!(LOAD_PATH, 1, $(repr(dir)))
+        insert!(DEPOT_PATH, 1, $(repr(dir)))
+        using $Time_module
+        getfield($Time_module, :time)
+    """
+
+    t1_yes = readchomp(`$exename --compiled-modules=yes -E $(testcode)`)
+    t2_yes = readchomp(`$exename --compiled-modules=yes -E $(testcode)`)
+    @test t1_yes == t2_yes
+
+    t1_no = readchomp(`$exename --compiled-modules=no -E $(testcode)`)
+    t2_no = readchomp(`$exename --compiled-modules=no -E $(testcode)`)
+    @test t1_no != t2_no
+    @test parse(Float64, t1_no) < parse(Float64, t2_no)
+end
+
+# test loading a package with conflicting namespace
+precompile_test_harness("conflicting namespaces") do dir
+    Test_module = :Test6c92f26
+    write(joinpath(dir, "Iterators.jl"),
+          """
+          module Iterators
+          end
+          """)
+    write(joinpath(dir, "$Test_module.jl"),
+          """
+          module $Test_module
+               import Iterators # FIXME: use `using`
+          end
+          """)
+    testcode = """
+        insert!(LOAD_PATH, 1, $(repr(dir)))
+        insert!(DEPOT_PATH, 1, $(repr(dir)))
+        using $Test_module
+        println(stderr, $Test_module.Iterators)
+    """
+
+    exename = `$(Base.julia_cmd()) --startup-file=no`
+    let fname = tempname()
+        try
+            for i = 1:2
+                @test readchomp(pipeline(`$exename -E $(testcode)`, stderr=fname)) == "nothing"
+                @test read(fname, String) == "Iterators\n"
             end
+        finally
+            rm(fname, force=true)
         end
-    finally
-        rm(dir, recursive=true)
     end
 end
 
-let dir = mktempdir()
+precompile_test_harness("package_callbacks") do dir
+    loaded_modules = Channel{Symbol}(32)
+    callback = (mod::Base.PkgId) -> put!(loaded_modules, Symbol(mod.name))
+    push!(Base.package_callbacks, callback)
     try
-        insert!(LOAD_PATH, 1, dir)
-        insert!(DEPOT_PATH, 1, dir)
-
-        loaded_modules = Channel{Symbol}(32)
-        callback = (mod::Base.PkgId) -> put!(loaded_modules, Symbol(mod.name))
-        push!(Base.package_callbacks, callback)
-
         Test1_module = :Teste4095a81
         Test2_module = :Teste4095a82
         Test3_module = :Teste4095a83
@@ -620,8 +949,8 @@ let dir = mktempdir()
               module $(Test1_module)
               end
               """)
-
         Base.compilecache(Base.PkgId("$(Test1_module)"))
+
         write(joinpath(dir, "$(Test2_module).jl"),
               """
               module $(Test2_module)
@@ -629,6 +958,7 @@ let dir = mktempdir()
               end
               """)
         Base.compilecache(Base.PkgId("$(Test2_module)"))
+
         @test !Base.isbindingresolved(Main, Test2_module)
         Base.require(Main, Test2_module)
         @test take!(loaded_modules) == Test1_module
@@ -643,9 +973,27 @@ let dir = mktempdir()
         @test take!(loaded_modules) == Test3_module
     finally
         pop!(Base.package_callbacks)
-        splice!(DEPOT_PATH, 1)
-        splice!(LOAD_PATH, 1)
-        rm(dir, recursive=true)
+    end
+    L = ReentrantLock()
+    E = Base.Event()
+    t = errormonitor(@async lock(L) do
+                     wait(E)
+                     Base.root_module_key(Base)
+                     end)
+    Test4_module = :Teste4095a84
+    write(joinpath(dir, "$(Test4_module).jl"),
+          """
+          module $(Test4_module)
+          end
+          """)
+    Base.compilecache(Base.PkgId("$(Test4_module)"))
+    push!(Base.package_callbacks, _->(notify(E); lock(L) do; end))
+    # should not hang here
+    try
+        @eval using $(Symbol(Test4_module))
+        wait(t)
+    finally
+        pop!(Base.package_callbacks)
     end
 end
 
@@ -711,16 +1059,12 @@ end
 end
 
 # Ensure that module-loading plays nicely with Base.delete_method
-(f -> f())() do # wrap in function scope, so we can test world errors
-dir = mktempdir()
-insert!(LOAD_PATH, 1, dir)
-insert!(DEPOT_PATH, 1, dir)
-try
+# wrapped in function scope, so we can test world errors
+precompile_test_harness("delete_method") do dir
     A_module = :Aedb164bd3a126418
     B_module = :Bedb164bd3a126418
     A_file = joinpath(dir, "$A_module.jl")
     B_file = joinpath(dir, "$B_module.jl")
-
     write(A_file,
           """
           module $A_module
@@ -756,152 +1100,193 @@ try
     B = Base.require(Main, B_module)
     @test Base.invokelatest(B.bpc, 1) == Base.invokelatest(B.bpc, 1.0) == 2
     @test Base.invokelatest(B.bnopc, 1) == Base.invokelatest(B.bnopc, 1.0) == 2
-finally
-    popfirst!(LOAD_PATH)
-    popfirst!(DEPOT_PATH)
-    rm(dir, recursive=true)
 end
 
-# issue #19030 and #25279
-let
-    load_path = mktempdir()
-    load_cache_path = mktempdir()
-    try
-        ModuleA = :Issue19030
-
-        write(joinpath(load_path, "$ModuleA.jl"),
-            """
-            module $ModuleA
-                __init__() = push!(Base.package_callbacks, sym->nothing)
-            end
-            """)
-
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-
-        l0 = length(Base.package_callbacks)
-        @eval using $ModuleA
-        @test length(Base.package_callbacks) == l0 + 1
-    finally
-        rm(load_path, recursive=true)
-        rm(load_cache_path, recursive=true)
-    end
+precompile_test_harness("Issues #19030 and #25279") do load_path
+    ModuleA = :Issue19030
+    write(joinpath(load_path, "$ModuleA.jl"),
+        """
+        module $ModuleA
+            __init__() = push!(Base.package_callbacks, sym->nothing)
+        end
+        """)
+    l0 = length(Base.package_callbacks)
+    @eval using $ModuleA
+    @test length(Base.package_callbacks) == l0 + 1
 end
 
-let
-    load_path = mktempdir()
-    load_cache_path = mktempdir()
-    try
-        write(joinpath(load_path, "A25604.jl"),
-            """
-            module A25604
-            using B25604
-            using C25604
-            end
-            """)
-        write(joinpath(load_path, "B25604.jl"),
-            """
-            module B25604
-            end
-            """)
-        write(joinpath(load_path, "C25604.jl"),
-            """
-            module C25604
-            using B25604
-            end
-            """)
-
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-
-        Base.compilecache(Base.PkgId("A25604"))
-        @test_nowarn @eval using A25604
-    finally
-        rm(load_path, recursive=true)
-        rm(load_cache_path, recursive=true)
-    end
+precompile_test_harness("Issue #25604") do load_path
+    write(joinpath(load_path, "A25604.jl"),
+        """
+        module A25604
+        using B25604
+        using C25604
+        end
+        """)
+    write(joinpath(load_path, "B25604.jl"),
+        """
+        module B25604
+        end
+        """)
+    write(joinpath(load_path, "C25604.jl"),
+        """
+        module C25604
+        using B25604
+        end
+        """)
+    Base.compilecache(Base.PkgId("A25604"))
+    @test_nowarn @eval using A25604
 end
 
-let
-    load_path = mktempdir()
-    load_cache_path = mktempdir()
-    try
-        write(joinpath(load_path, "Foo26028.jl"),
-            """
-            module Foo26028
+precompile_test_harness("Issue #26028") do load_path
+    write(joinpath(load_path, "Foo26028.jl"),
+        """
+        module Foo26028
+        module Bar26028
+            x = 0
+        end
+        function __init__()
+            include(joinpath(@__DIR__, "Baz26028.jl"))
+        end
+        end
+        """)
+    write(joinpath(load_path, "Baz26028.jl"),
+        """
+        module Baz26028
+        import Foo26028.Bar26028.x
+        end
+        """)
+    Base.compilecache(Base.PkgId("Foo26028"))
+    @test_nowarn @eval using Foo26028
+end
 
-            module Bar26028
-                x = 0
-            end
+precompile_test_harness("Issue #29936") do load_path
+    write(joinpath(load_path, "Foo29936.jl"),
+          """
+          module Foo29936
+          const global m = Val{nothing}()
+          const global h = Val{:hey}()
+          wab = [("a", m), ("b", h),]
+          end
+          """)
+    @eval using Foo29936
+    @test [("Plan", Foo29936.m), ("Plan", Foo29936.h),] isa Vector{Tuple{String,Val}}
+end
 
-            function __init__()
-                include(joinpath(@__DIR__, "Baz26028.jl"))
-            end
+precompile_test_harness("Issue #25971") do load_path
+    sourcefile = joinpath(load_path, "Foo25971.jl")
+    write(sourcefile, "module Foo25971 end")
+    chmod(sourcefile, 0o666)
+    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    @test filemode(sourcefile) == filemode(cachefile)
+    chmod(sourcefile, 0o600)
+    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    @test filemode(sourcefile) == filemode(cachefile)
+    chmod(sourcefile, 0o444)
+    cachefile = Base.compilecache(Base.PkgId("Foo25971"))
+    # Check writable
+    @test touch(cachefile) == cachefile
+end
 
-            end
-            """)
-        write(joinpath(load_path, "Baz26028.jl"),
-            """
-            module Baz26028
-            import Foo26028.Bar26028.x
-            end
-            """)
+precompile_test_harness("Issue #38312") do load_path
+    TheType = """Array{Ref{Val{1}}, 1}"""
+    write(joinpath(load_path, "Foo38312.jl"),
+        """
+        module Foo38312
+        const TheType = $TheType
+        end
+        """)
+    write(joinpath(load_path, "Bar38312.jl"),
+        """
+        module Bar38312
+        const TheType = $TheType
+        end
+        """)
+    Base.compilecache(Base.PkgId("Foo38312"))
+    Base.compilecache(Base.PkgId("Bar38312"))
+    @test pointer_from_objref((@eval (using Foo38312; Foo38312)).TheType) ===
+          pointer_from_objref(eval(Meta.parse(TheType))) ===
+          pointer_from_objref((@eval (using Bar38312; Bar38312)).TheType)
+end
 
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
+precompile_test_harness("Opaque Closure") do load_path
+    write(joinpath(load_path, "OCPrecompile.jl"),
+        """
+        module OCPrecompile
+        using Base.Experimental: @opaque
+        f(x) = @opaque y->x+y
+        end
+        """)
+    Base.compilecache(Base.PkgId("OCPrecompile"))
+    f = (@eval (using OCPrecompile; OCPrecompile)).f
+    @test Base.invokelatest(f, 1)(2) == 3
+end
 
-        Base.compilecache(Base.PkgId("Foo26028"))
-        @test_nowarn @eval using Foo26028
-    finally
-        rm(load_path, recursive=true)
-        rm(load_cache_path, recursive=true)
-    end
+# issue #39405
+precompile_test_harness("Renamed Imports") do load_path
+    write(joinpath(load_path, "RenameImports.jl"),
+          """
+          module RenameImports
+          import Base.Experimental as ex
+          test() = ex
+          end
+          """)
+    Base.compilecache(Base.PkgId("RenameImports"))
+    @test (@eval (using RenameImports; RenameImports.test())) isa Module
 end
 
-# issue #29936
-let
-    load_path = mktempdir()
-    load_cache_path = mktempdir()
-    try
-        write(joinpath(load_path, "Foo29936.jl"),
-              """
-              module Foo29936
-              const global m = Val{nothing}()
-              const global h = Val{:hey}()
-              wab = [("a", m), ("b", h),]
-              end
-              """)
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-        @eval using Foo29936
-        @test [("Plan", Foo29936.m), ("Plan", Foo29936.h),] isa Vector{Tuple{String,Val}}
-    finally
-        rm(load_path, recursive=true)
-        rm(load_cache_path, recursive=true)
+# issue #41872 (example from #38983)
+precompile_test_harness("No external edges") do load_path
+    write(joinpath(load_path, "NoExternalEdges.jl"),
+          """
+          module NoExternalEdges
+          bar(x::Int) = hcat(rand())
+          @inline bar() = hcat(rand())
+          bar(x::Float64) = bar()
+          foo1() = bar(1)
+          foo2() = bar(1.0)
+          foo3() = bar()
+          foo4() = hcat(rand())
+          precompile(foo1, ())
+          precompile(foo2, ())
+          precompile(foo3, ())
+          precompile(foo4, ())
+          end
+          """)
+    Base.compilecache(Base.PkgId("NoExternalEdges"))
+    @eval begin
+        using NoExternalEdges
+        @test only(methods(NoExternalEdges.foo1)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo2)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo3)).specializations[1].cache.max_world != 0
+        @test only(methods(NoExternalEdges.foo4)).specializations[1].cache.max_world != 0
     end
 end
 
-# Issue #25971
-let
-    load_path = mktempdir()
-    load_cache_path = mktempdir()
-    try
-        pushfirst!(LOAD_PATH, load_path)
-        pushfirst!(DEPOT_PATH, load_cache_path)
-        sourcefile = joinpath(load_path, "Foo25971.jl")
-        write(sourcefile, "module Foo25971 end")
-        chmod(sourcefile, 0o666)
-        cachefile = Base.compilecache(Base.PkgId("Foo25971"))
-        @test filemode(sourcefile) == filemode(cachefile)
-        chmod(sourcefile, 0o600)
-        cachefile = Base.compilecache(Base.PkgId("Foo25971"))
-        @test filemode(sourcefile) == filemode(cachefile)
-    finally
-        rm(load_path, recursive=true)
-        rm(load_cache_path, recursive=true)
-        filter!((≠)(load_path), LOAD_PATH)
-        filter!((≠)(load_cache_path), DEPOT_PATH)
+@testset "issue 38149" begin
+    M = Module()
+    @eval M begin
+        @nospecialize
+        f(x, y) = x + y
+        f(x::Int, y) = 2x + y
     end
+    precompile(M.f, (Int, Any))
+    precompile(M.f, (AbstractFloat, Any))
+    mis = map(methods(M.f)) do m
+        m.specializations[1]
+    end
+    @test any(mi -> mi.specTypes.parameters[2] === Any, mis)
+    @test all(mi -> isa(mi.cache, Core.CodeInstance), mis)
 end
 
-end # !withenv
+# Test that the cachepath is available in pkgorigins during the
+# __init__ callback
+precompile_test_harness("__init__ cachepath") do load_path
+    write(joinpath(load_path, "InitCachePath.jl"),
+          """
+          module InitCachePath
+            __init__() = Base.pkgorigins[Base.PkgId(InitCachePath)]
+          end
+          """)
+    @test isa((@eval (using InitCachePath; InitCachePath)), Module)
+end
diff --git a/test/print_process_affinity.jl b/test/print_process_affinity.jl
new file mode 100644
index 00000000000000..3d0fd7db509b50
--- /dev/null
+++ b/test/print_process_affinity.jl
@@ -0,0 +1,31 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+const uv_thread_t = UInt # TODO: this is usually correct (or tolerated by the API), but not guaranteed
+
+function uv_thread_getaffinity()
+    masksize = ccall(:uv_cpumask_size, Cint, ())
+    self = ccall(:uv_thread_self, uv_thread_t, ())
+    ref = Ref(self)
+    cpumask = zeros(Bool, masksize)
+    err = ccall(
+        :uv_thread_getaffinity,
+        Cint,
+        (Ref{uv_thread_t}, Ptr{Bool}, Cssize_t),
+        ref,
+        cpumask,
+        masksize,
+    )
+    Base.uv_error("getaffinity", err)
+    n = something(findlast(cpumask)) # we must have at least one active core
+    resize!(cpumask, n)
+    return cpumask
+end
+
+function print_process_affinity()
+    join(stdout, findall(uv_thread_getaffinity()), ",")
+    println()
+end
+
+if Base.Filesystem.samefile(PROGRAM_FILE, @__FILE__)
+    print_process_affinity()
+end
diff --git a/test/profile_spawnmany_exec.jl b/test/profile_spawnmany_exec.jl
new file mode 100644
index 00000000000000..a061de40d51720
--- /dev/null
+++ b/test/profile_spawnmany_exec.jl
@@ -0,0 +1,14 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Profile
+
+function spawnmany(n)
+    if n > 2
+        m = n ÷ 2
+        t = Threads.@spawn spawnmany(m)
+        spawnmany(m)
+        wait(t)
+    end
+end
+
+@profile spawnmany(parse(Int, get(ENV, "NTASKS", "2000000")))
diff --git a/test/project/deps/Foo1/src/Foo.jl b/test/project/deps/Foo1/src/Foo.jl
index 579227b5a55622..770c2930fdbfb4 100644
--- a/test/project/deps/Foo1/src/Foo.jl
+++ b/test/project/deps/Foo1/src/Foo.jl
@@ -4,6 +4,14 @@ module Foo
 include("SubFoo1.jl")
 include(joinpath("subdir", "SubFoo2.jl"))
 import Bar, Baz, Qux
+
+# This tests pkgdir and pathof when executed in toplevel
+# on both the package itself and on its dependencies
+@assert isdir(pkgdir(Foo))
+@assert isdir(pkgdir(Bar))
+@assert isfile(pathof(Foo))
+@assert isfile(pathof(Bar))
+
 this = "Foo1"
 which = "path"
 end
diff --git a/test/ranges.jl b/test/ranges.jl
index f9202bd62af8bf..f84eaae46c3213 100644
--- a/test/ranges.jl
+++ b/test/ranges.jl
@@ -1,5 +1,65 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+using Base.Checked: checked_length
+
+@testset "range construction" begin
+    @test_throws ArgumentError range(start=1, step=1, stop=2, length=10)
+    @test_throws ArgumentError range(start=1, step=1, stop=10, length=11)
+
+    r = 3.0:2:11
+    @test r == range(start=first(r), step=step(r), stop=last(r)                  )
+    @test r == range(start=first(r), step=step(r),               length=length(r))
+    @test r == range(start=first(r),               stop=last(r), length=length(r))
+    @test r == range(                step=step(r), stop=last(r), length=length(r))
+
+    r = 4:9
+    @test r === range(start=first(r), stop=last(r)                  )
+    @test r === range(start=first(r),               length=length(r))
+    @test r === range(                stop=last(r), length=length(r))
+    @test r === range(first(r),       last(r)                       )
+    # the next ones use ==, because it changes the eltype
+    @test r ==  range(first(r),       last(r),      length(r)       )
+    @test r ==  range(start=first(r), stop=last(r), length=length(r))
+    @test r === range(                stop=last(r), length=length(r))
+
+    r = 1:5
+    o = Base.OneTo(5)
+    let start=first(r), step=step(r), stop=last(r), length=length(r)
+        @test o === range(;              stop        )
+        @test o === range(;                    length)
+        @test r === range(; start,       stop        )
+        @test r === range(;              stop, length)
+        # the next three lines uses ==, because it changes the eltype
+        @test r ==  range(; start,       stop, length)
+        @test r ==  range(; start, step,       length)
+        @test r ==  range(; stop=Float64(stop))
+    end
+
+    for T = (Int8, UInt32, Float64, Char)
+        @test typeof(range(start=T(5), length=3)) === typeof(range(stop=T(5), length=3))
+        @test typeof(range(start=T(5), length=Int8(3))) === typeof(range(stop=T(5), length=Int8(3)))
+    end
+    let T = Rational{Int16}
+        @test typeof(range(start=T(5), length=Int16(3))) === typeof(range(stop=T(5), length=Int16(3)))
+    end
+
+
+    @test first(10:3) === 10
+    @test last(10:3) === 9
+    @test step(10:3) === 1
+    @test isempty(10:3)
+
+    @test first(10:2:3) === 10
+    @test last(10:2:3) === 9
+    @test step(10:2:3) === 2
+    @test isempty(10:2:3)
+
+    @test first(10:0.2:3) === 10.0
+    @test last(10:0.2:3) === 9.8
+    @test step(10:0.2:3) === 0.2
+    @test isempty(10:0.2:3)
+end
+
 using Dates, Random
 isdefined(Main, :PhysQuantities) || @eval Main include("testhelpers/PhysQuantities.jl")
 using .Main.PhysQuantities
@@ -243,22 +303,28 @@ end
         end
     end
     @testset "length" begin
-        @test length(.1:.1:.3) == 3
-        @test length(1.1:1.1:3.3) == 3
-        @test length(1.1:1.3:3) == 2
-        @test length(1:1:1.8) == 1
-        @test length(1:.2:2) == 6
-        @test length(1.:.2:2.) == 6
-        @test length(2:-.2:1) == 6
-        @test length(2.:-.2:1.) == 6
-        @test length(2:.2:1) == 0
+        @test length(.1:.1:.3) == checked_length(.1:.1:.3) == 3
+        @test length(1.1:1.1:3.3) == checked_length(1.1:1.1:3.3) == 3
+        @test length(1.1:1.3:3) == checked_length(1.1:1.3:3) == 2
+        @test length(1:1:1.8) == checked_length(1:1:1.8) == 1
+        @test length(1:.2:2) == checked_length(1:.2:2) == 6
+        @test length(1.:.2:2.) == checked_length(1.:.2:2.) == 6
+        @test length(2:-.2:1) == checked_length(2:-.2:1) == 6
+        @test length(2.:-.2:1.) == checked_length(2.:-.2:1.) == 6
+        @test length(2:.2:1) == checked_length(2:.2:1) == 0
         @test length(2.:.2:1.) == 0
 
-        @test length(1:0) == 0
-        @test length(0.0:-0.5) == 0
-        @test length(1:2:0) == 0
-        @test length(Char(0):Char(0x001fffff)) == 2097152
-        @test length(typemax(UInt64)//one(UInt64):1:typemax(UInt64)//one(UInt64)) == 1
+        @test length(1:0) == checked_length(1:0) == 0
+        @test length(0.0:-0.5) == checked_length(0.0:-0.5) == 0
+        @test length(1:2:0) == checked_length(1:2:0) == 0
+        let r = Char(0):Char(0x001fffff)
+            @test length(r) == 2097152
+            @test_throws MethodError checked_length(r) == 2097152 # this would work if checked_sub is defined on Char
+        end
+        let r = typemax(UInt64)//one(UInt64):1:typemax(UInt64)//one(UInt64)
+            @test length(r) == 1
+            @test_throws MethodError checked_length(r) == 1 # this would work if checked_sub is defined on Rational
+        end
     end
     @testset "keys/values" begin
         keytype_is_correct(r) = keytype(r) == eltype(keys(r))
@@ -302,6 +368,7 @@ end
         end
     end
     @testset "findfirst" begin
+        @test findfirst(==(1), Base.IdentityUnitRange(-1:1)) == 1
         @test findfirst(isequal(3), Base.OneTo(10)) == 3
         @test findfirst(==(0), Base.OneTo(10)) == nothing
         @test findfirst(==(11), Base.OneTo(10)) == nothing
@@ -320,57 +387,71 @@ end
         @test reverse(reverse(typemin(Int):typemax(Int))) == typemin(Int):typemax(Int)
         @test reverse(reverse(typemin(Int):2:typemax(Int))) == typemin(Int):2:typemax(Int)
     end
+    @testset "reverse `[Step|Unit]Range{$T}`" for T in (Int8, UInt8, Int, UInt, Int128, UInt128)
+        @test reverse(T(1):T(10)) == T(10):-1:T(1)
+        @test reverse(typemin(T):typemax(T)) == typemax(T):-1:typemin(T)
+        @test reverse(typemin(T):2:typemax(T)) == typemax(T)-T(1):-2:typemin(T)
+        @test reverse(reverse(T(1):T(10))) == T(1):T(10)
+        @test reverse(reverse(typemin(T):typemax(T))) == typemin(T):typemax(T)
+        @test reverse(reverse(typemin(T):2:typemax(T))) == typemin(T):2:typemax(T)
+    end
     @testset "intersect" begin
-        @test intersect(1:5, 2:3) == 2:3
-        @test intersect(-3:5, 2:8) == 2:5
-        @test intersect(-8:-3, -8:-3) == -8:-3
-        @test intersect(1:5, 5:13) == 5:5
+        @test intersect(1:5, 2:3) === 2:3
+        @test intersect(-3:5, 2:8) === 2:5
+        @test intersect(-8:-3, -8:-3) === -8:-3
+        @test intersect(1:5, 5:13) === 5:5
         @test isempty(intersect(-8:-3, -2:2))
         @test isempty(intersect(-3:7, 2:1))
-        @test intersect(1:11, -2:3:15) == 1:3:10
-        @test intersect(1:11, -2:2:15) == 2:2:10
-        @test intersect(1:11, -2:1:15) == 1:11
-        @test intersect(1:11, 15:-1:-2) == 1:11
-        @test intersect(1:11, 15:-4:-2) == 3:4:11
-        @test intersect(-20:-5, -10:3:-2) == -10:3:-7
+        @test intersect(-8:-3, -2:2) === -2:-3
+        @test intersect(-3:7, 2:1) === 2:1
+        @test intersect(1:11, -2:3:15) === 1:3:10
+        @test intersect(1:11, -2:2:15) === 2:2:10
+        @test intersect(1:11, -2:1:15) === 1:1:11
+        @test intersect(1:11, 15:-1:-2) === 1:1:11
+        @test intersect(1:11, 15:-4:-2) === 3:4:11
+        @test intersect(-20:-5, -10:3:-2) === -10:3:-7
         @test isempty(intersect(-5:5, -6:13:20))
         @test isempty(intersect(1:11, 15:4:-2))
         @test isempty(intersect(11:1, 15:-4:-2))
-        #@test intersect(-5:5, 1+0*(1:3)) == 1:1
-        #@test isempty(intersect(-5:5, 6+0*(1:3)))
-        @test intersect(-15:4:7, -10:-2) == -7:4:-3
-        @test intersect(13:-2:1, -2:8) == 7:-2:1
+        @test intersect(-5:5, 1 .+ 0 .* (1:3)) == 1:1
+        @test isempty(intersect(-5:5, 6 .+ 0 .* (1:3)))
+        @test intersect(-15:4:7, -10:-2) === -7:4:-3
+        @test intersect(13:-2:1, -2:8) === 7:-2:1
         @test isempty(intersect(13:2:1, -2:8))
         @test isempty(intersect(13:-2:1, 8:-2))
-        #@test intersect(5+0*(1:4), 2:8) == 5+0*(1:4)
-        #@test isempty(intersect(5+0*(1:4), -7:3))
-        @test intersect(0:3:24, 0:4:24) == 0:12:24
-        @test intersect(0:4:24, 0:3:24) == 0:12:24
-        @test intersect(0:3:24, 24:-4:0) == 0:12:24
-        @test intersect(24:-3:0, 0:4:24) == 24:-12:0
-        @test intersect(24:-3:0, 24:-4:0) == 24:-12:0
-        @test intersect(1:3:24, 0:4:24) == 4:12:16
-        @test intersect(0:6:24, 0:4:24) == 0:12:24
+        @test intersect(5 .+ 0 .* (1:4), 2:8) == 5:5
+        @test isempty(intersect(5 .+ 0 .* (1:4), -7:3))
+        @test intersect(0:3:24, 0:4:24) === 0:12:24
+        @test intersect(0:4:24, 0:3:24) === 0:12:24
+        @test intersect(0:3:24, 24:-4:0) === 0:12:24
+        @test intersect(24:-3:0, 0:4:24) === 24:-12:0
+        @test intersect(24:-3:0, 24:-4:0) === 24:-12:0
+        @test intersect(1:3:24, 0:4:24) === 4:12:16
+        @test intersect(0:6:24, 0:4:24) === 0:12:24
         @test isempty(intersect(1:6:2400, 0:4:2400))
-        @test intersect(-51:5:100, -33:7:125) == -26:35:79
-        @test intersect(-51:5:100, -32:7:125) == -11:35:94
-        #@test intersect(0:6:24, 6+0*(0:4:24)) == 6:6:6
-        #@test intersect(12+0*(0:6:24), 0:4:24) == AbstractRange(12, 0, 5)
-        #@test isempty(intersect(6+0*(0:6:24), 0:4:24))
-        @test intersect(-10:3:24, -10:3:24) == -10:3:23
+        @test intersect(-51:5:100, -33:7:125) === -26:35:79
+        @test intersect(-51:5:100, -32:7:125) === -11:35:94
+        @test intersect(0:6:24, 6 .+ 0 .* (0:4:24)) == 6:6:6
+        @test intersect(12 .+ 0 .* (0:6:24), 0:4:24) == 12:12 # forms StepRangeLen(12, 0, 5)
+        @test isempty(intersect(6 .+ 0 .* (0:6:24), 0:4:24))
+        @test intersect(-10:3:24, -10:3:24) === -10:3:23
         @test isempty(intersect(-11:3:24, -10:3:24))
-        @test intersect(typemin(Int):2:typemax(Int),1:10) == 2:2:10
-        @test intersect(1:10,typemin(Int):2:typemax(Int)) == 2:2:10
+        @test intersect(-11:3:24, -10:3:24) === -11:3:-14
+        @test intersect(typemin(Int):2:typemax(Int),1:10) === 2:2:10
+        @test intersect(1:10, typemin(Int):2:typemax(Int)) === 2:2:10
 
         @test intersect(reverse(typemin(Int):2:typemax(Int)),typemin(Int):2:typemax(Int)) == reverse(typemin(Int):2:typemax(Int))
         @test intersect(typemin(Int):2:typemax(Int),reverse(typemin(Int):2:typemax(Int))) == typemin(Int):2:typemax(Int)
 
-        @test intersect(UnitRange(1,2),3) == UnitRange(3,2)
-        @test intersect(UnitRange(1,2), UnitRange(1,5), UnitRange(3,7), UnitRange(4,6)) == UnitRange(4,3)
+        @test intersect(UnitRange(1, 2), 3) === UnitRange(3, 2)
+        @test intersect(UnitRange(1, 2), UnitRange(1, 5), UnitRange(3, 7), UnitRange(4, 6)) === UnitRange(4, 2)
 
         @test intersect(1:3, 2) === intersect(2, 1:3) === 2:2
         @test intersect(1.0:3.0, 2) == intersect(2, 1.0:3.0) == [2.0]
 
+        @test intersect(1:typemax(Int), [1, 3]) == [1, 3]
+        @test intersect([1, 3], 1:typemax(Int)) == [1, 3]
+
         @testset "Support StepRange with a non-numeric step" begin
             start = Date(1914, 7, 28)
             stop = Date(1918, 11, 11)
@@ -380,6 +461,21 @@ end
             @test intersect(start-Day(10):Day(1):stop-Day(10), start:Day(5):stop) ==
                 start:Day(5):stop-Day(10)-mod(stop-start, Day(5))
         end
+
+        @testset "Two AbstractRanges" begin
+            struct DummyRange{T} <: AbstractRange{T}
+                r
+            end
+            Base.iterate(dr::DummyRange) = iterate(dr.r)
+            Base.iterate(dr::DummyRange, state) = iterate(dr.r, state)
+            Base.length(dr::DummyRange) = length(dr.r)
+            Base.in(x::Int, dr::DummyRange) = in(x, dr.r)
+            Base.unique(dr::DummyRange) = unique(dr.r)
+            r1 = DummyRange{Int}([1, 2, 3, 3, 4, 5])
+            r2 = DummyRange{Int}([3, 3, 4, 5, 6])
+            @test intersect(r1, r2) == [3, 4, 5]
+            @test intersect(r2, r1) == [3, 4, 5]
+        end
     end
     @testset "issubset" begin
         @test issubset(1:3, 1:typemax(Int)) #32461
@@ -435,6 +531,11 @@ end
 
         @test !(1 in 1:0)
         @test !(1.0 in 1.0:0.0)
+
+        for r = (1:10, 1//1:10//1, 1:2:5, 1//2:1//2:5//2, 1.0:5.0, LinRange(1.5, 5.5, 9)),
+            x = (NaN16, Inf32, -Inf64, 1//0, -1//0)
+            @test !(x in r)
+        end
     end
     @testset "in() works across types, including non-numeric types (#21728)" begin
         @test 1//1 in 1:3
@@ -472,22 +573,56 @@ for a=AbstractRange[3:6, 0:2:10], b=AbstractRange[0:1, 2:-1:0]
 end
 
 # avoiding intermediate overflow (#5065)
-@test length(1:4:typemax(Int)) == div(typemax(Int),4) + 1
+@test length(1:4:typemax(Int)) == div(typemax(Int), 4) + 1
+@test checked_length(1:4:typemax(Int)) == div(typemax(Int), 4) + 1 # computed exactly in modulo arithmetic
 
 @testset "overflow in length" begin
-    Tset = Int === Int64 ? (Int,UInt,Int128,UInt128) :
-                           (Int,UInt,Int64,UInt64,Int128, UInt128)
+    Tset = Int === Int64 ? (Int, UInt, Int128, UInt128) :
+                           (Int, UInt, Int64, UInt64, Int128, UInt128)
     for T in Tset
-        @test_throws OverflowError length(zero(T):typemax(T))
-        @test_throws OverflowError length(typemin(T):typemax(T))
-        @test_throws OverflowError length(zero(T):one(T):typemax(T))
-        @test_throws OverflowError length(typemin(T):one(T):typemax(T))
+        @test length(zero(T):typemax(T)) == typemin(T)
+        @test length(typemin(T):typemax(T)) == T(0)
+        @test length(zero(T):one(T):typemax(T)) == typemin(T)
+        @test length(typemin(T):one(T):typemax(T)) == T(0)
+        @test_throws OverflowError checked_length(zero(T):typemax(T))
+        @test_throws OverflowError checked_length(typemin(T):typemax(T))
+        @test_throws OverflowError checked_length(zero(T):one(T):typemax(T))
+        @test_throws OverflowError checked_length(typemin(T):one(T):typemax(T))
+        @test length(one(T):typemax(T)) == checked_length(one(T):typemax(T)) == typemax(T)
         if T <: Signed
-            @test_throws OverflowError length(-one(T):typemax(T)-one(T))
-            @test_throws OverflowError length(-one(T):one(T):typemax(T)-one(T))
+            @test length(-one(T):typemax(T)-one(T)) == typemin(T)
+            @test length(-one(T):one(T):typemax(T)-one(T)) == typemin(T)
+            @test length(-one(T):typemax(T)) == typemin(T) + T(1)
+            @test length(zero(T):typemin(T):typemin(T)) == 2
+            @test length(one(T):typemin(T):typemin(T)) == 2
+            @test length(typemax(T):typemin(T):typemin(T)) == 2
+            @test length(-one(T):typemin(T):typemin(T)) == 1
+            @test length(zero(T):typemin(T):zero(T)) == 1
+            @test length(zero(T):typemin(T):one(T)) == 0
+            @test_throws OverflowError checked_length(-one(T):typemax(T)-one(T))
+            @test_throws OverflowError checked_length(-one(T):one(T):typemax(T)-one(T))
+            @test_throws InexactError checked_length(zero(T):typemin(T):typemin(T)) == 2 # this can be improved
+            @test_throws InexactError checked_length(one(T):typemin(T):typemin(T)) == 2 # this can  be improved
+            @test_throws InexactError checked_length(typemax(T):typemin(T):typemin(T)) == 2 # this can  be improved
         end
     end
 end
+
+# A number type with the overflow behavior of `UInt8`. Conversion to `Integer` returns an
+# `Int32`, i.e., a type with different `typemin`/`typemax`. See  #41479
+struct OverflowingReal <: Real
+    val::UInt8
+end
+OverflowingReal(x::OverflowingReal) = x
+Base.:<(x::OverflowingReal, y::OverflowingReal) = x.val < y.val
+Base.:(==)(x::OverflowingReal, y::OverflowingReal) = x.val == y.val
+Base.:<=(x::OverflowingReal, y::OverflowingReal) = x.val <= y.val
+Base.:+(x::OverflowingReal, y::OverflowingReal) = OverflowingReal(x.val + y.val)
+Base.:-(x::OverflowingReal, y::OverflowingReal) = OverflowingReal(x.val - y.val)
+Base.round(x::OverflowingReal, ::RoundingMode) = x
+Base.Integer(x::OverflowingReal) = Int32(x.val)
+@test length(OverflowingReal(1):OverflowingReal(0)) == 0
+
 @testset "loops involving typemin/typemax" begin
     n = 0
     s = 0
@@ -586,22 +721,18 @@ end
 end
 @testset "broadcasted operations with scalars" for T in (Int, UInt, Int128)
     @test broadcast(-, T(1):3, 2) === T(1)-2:1
-    @test broadcast(-, T(1):3, 0.25) === T(1)-0.25:3-0.25
+    @test broadcast(-, T(1):3, 0.25) === range(T(1)-0.25, length=T(3)) == T(1)-0.25:3-0.25
     @test broadcast(+, T(1):3) === T(1):3
     @test broadcast(+, T(1):3, 2) === T(3):5
-    @test broadcast(+, T(1):3, 0.25) === T(1)+0.25:3+0.25
+    @test broadcast(+, T(1):3, 0.25) === range(T(1)+0.25, length=T(3)) == T(1)+0.25:3+0.25
     @test broadcast(+, T(1):2:6, 1) === T(2):2:6
-    @test broadcast(+, T(1):2:6, 0.3) === T(1)+0.3:2:5+0.3
+    @test broadcast(+, T(1):2:6, 0.3) === range(T(1)+0.3, step=2, length=T(3)) == T(1)+0.3:2:5+0.3
     @test broadcast(-, T(1):2:6, 1) === T(0):2:4
-    @test broadcast(-, T(1):2:6, 0.3) === T(1)-0.3:2:5-0.3
-    if T <: Unsigned
-        @test_broken broadcast(-, T(1):3) == -T(1):-1:-T(3)
-        @test_broken broadcast(-, 2, T(1):3) == T(1):-1:-T(1)
-    else
-        @test length(broadcast(-, T(1):3, 2)) === length(T(1)-2:T(3)-2)
-        @test broadcast(-, T(1):3) == -T(1):-1:-T(3)
-        @test broadcast(-, 2, T(1):3) == T(1):-1:-T(1)
-    end
+    @test broadcast(-, T(1):2:6, 0.3) === range(T(1)-0.3, step=2, length=T(3)) == T(1)-0.3:2:5-0.3
+    is_unsigned = T <: Unsigned
+    @test length(broadcast(-, T(1):3, 2)) === length(T(1)-2:T(3)-2) === (is_unsigned ? T(0) : T(3))
+    @test broadcast(-, T(1):3) == -T(1):-1:-T(3)
+    @test broadcast(-, 2, T(1):3) == T(1):-1:-T(1)
 end
 @testset "operations between ranges and arrays" for T in (Int, UInt, Int128)
     @test all(([T(1):5;] + (T(5):-1:1)) .=== T(6))
@@ -729,11 +860,11 @@ function range_fuzztests(::Type{T}, niter, nrange) where {T}
         @test m == length(r)
         @test strt == first(r)
         @test Δ == step(r)
-        @test_skip stop == last(r)
+        @test_skip stop ≈ last(r)
         l = range(strt, stop=stop, length=n)
         @test n == length(l)
         @test strt == first(l)
-        @test stop  == last(l)
+        @test stop == last(l)
     end
 end
 @testset "range fuzztests for $T" for T = (Float32, Float64,)
@@ -846,32 +977,45 @@ end
 end
 # issue #2959
 @test 1.0:1.5 == 1.0:1.0:1.5 == 1.0:1.0
-#@test 1.0:(.3-.1)/.1 == 1.0:2.0
+@test_broken 1.0:(.3-.1)/.1 == 1.0:2.0 # (this is just shy of 2.0)
 
 @testset "length with typemin/typemax" begin
-    let r = typemin(Int64):2:typemax(Int64), s = typemax(Int64):-2:typemin(Int64)
+    let r = typemin(Int64):2:typemax(Int64)
         @test first(r) == typemin(Int64)
-        @test last(r) == (typemax(Int64)-1)
-        @test_throws OverflowError length(r)
-
-        @test first(s) == typemax(Int64)
-        @test last(s) == (typemin(Int64)+1)
-        @test_throws OverflowError length(s)
+        @test last(r) == typemax(Int64) - 1
+        @test length(r) == typemin(Int64)
+        @test_throws OverflowError checked_length(r)
+    end
+    let r = typemax(Int64):-2:typemin(Int64)
+        @test first(r) == typemax(Int64)
+        @test last(r) == typemin(Int64) + 1
+        @test length(r) == typemin(Int64)
+        @test_throws OverflowError checked_length(r)
     end
 
-    @test length(typemin(Int64):3:typemax(Int64)) == 6148914691236517206
-    @test length(typemax(Int64):-3:typemin(Int64)) == 6148914691236517206
+    let r = typemin(Int64):3:typemax(Int64)
+        @test length(r) == checked_length(r) == 6148914691236517206
+    end
+    let r = typemax(Int64):-3:typemin(Int64)
+        @test length(r) == checked_length(r) == 6148914691236517206
+    end
 
     for s in 3:100
-        @test length(typemin(Int):s:typemax(Int)) == length(big(typemin(Int)):big(s):big(typemax(Int)))
-        @test length(typemax(Int):-s:typemin(Int)) == length(big(typemax(Int)):big(-s):big(typemin(Int)))
+        r = typemin(Int):s:typemax(Int)
+        br = big(typemin(Int)):big(s):big(typemax(Int))
+        @test length(r) == checked_length(r) == length(br)
+
+        r = typemax(Int):-s:typemin(Int)
+        br = big(typemax(Int)):big(-s):big(typemin(Int))
+        @test length(r) == checked_length(r) == length(br)
     end
 
-    @test length(UInt(1):UInt(1):UInt(0)) == 0
-    @test length(typemax(UInt):UInt(1):(typemax(UInt)-1)) == 0
-    @test length(typemax(UInt):UInt(2):(typemax(UInt)-1)) == 0
-    @test length((typemin(Int)+3):5:(typemin(Int)+1)) == 0
+    @test length(UInt(1):UInt(1):UInt(0)) == checked_length(UInt(1):UInt(1):UInt(0)) == 0
+    @test length(typemax(UInt):UInt(1):(typemax(UInt)-1)) == checked_length(typemax(UInt):UInt(1):(typemax(UInt)-1)) == 0
+    @test length(typemax(UInt):UInt(2):(typemax(UInt)-1)) == checked_length(typemax(UInt):UInt(2):(typemax(UInt)-1)) == 0
+    @test length((typemin(Int)+3):5:(typemin(Int)+1)) == checked_length((typemin(Int)+3):5:(typemin(Int)+1)) == 0
 end
+
 # issue #6364
 @test length((1:64)*(pi/5)) == 64
 
@@ -936,12 +1080,19 @@ end
     @test length(map(identity, UInt64(1):UInt64(5))) == 5
     @test length(map(identity, UInt128(1):UInt128(5))) == 5
 end
-@testset "issue #8531" begin
+@testset "issue #8531, issue #29801" begin
     smallint = (Int === Int64 ?
-                (Int8,UInt8,Int16,UInt16,Int32,UInt32) :
-                (Int8,UInt8,Int16,UInt16))
+                (Int8, UInt8, Int16, UInt16, Int32, UInt32) :
+                (Int8, UInt8, Int16, UInt16))
     for T in smallint
-        @test length(typemin(T):typemax(T)) == 2^(8*sizeof(T))
+        s = typemin(T):typemax(T)
+        @test length(s) === checked_length(s) === 2^(8*sizeof(T))
+        s = T(10):typemax(T):T(10)
+        @test length(s) === checked_length(s) === 1
+        s = T(10):typemax(T):T(0)
+        @test length(s) === checked_length(s) === 0
+        s = T(10):typemax(T):typemin(T)
+        @test length(s) === checked_length(s) === 0
     end
 end
 
@@ -949,7 +1100,7 @@ end
 @test (0:1//2:2)[1:2:3] == 0:1//1:1
 
 # issue #12278
-@test length(1:UInt(0)) == 0
+@test length(1:UInt(0)) == checked_length(1:UInt(0)) == 0
 
 @testset "zip" begin
     i = 0
@@ -1002,6 +1153,15 @@ end
     @test eltype(['a':'z', 1:2]) == (StepRange{T,Int} where T)
 end
 
+@testset "Ranges with <:Integer eltype but non-integer step (issue #32419)" begin
+    @test eltype(StepRange(1, 1//1, 2)) === Int
+    @test_throws ArgumentError StepRange(1, 1//2, 2)
+    @test eltype(StepRangeLen{Int}(1, 1//1, 2)) === Int
+    @test_throws ArgumentError StepRangeLen{Int}(1, 1//2, 2)
+    @test eltype(LinRange{Int}(1, 5, 3)) === Int
+    @test_throws ArgumentError LinRange{Int}(1, 5, 4)
+end
+
 @testset "LinRange ops" begin
     @test 2*LinRange(0,3,4) == LinRange(0,6,4)
     @test LinRange(0,3,4)*2 == LinRange(0,6,4)
@@ -1013,17 +1173,14 @@ end
     @test reverse(LinRange{Int}(0,3,4)) === LinRange{Int}(3,0,4)
     @test reverse(LinRange{Float64}(0.,3.,4)) === LinRange{Float64}(3.,0.,4)
 end
-@testset "Issue #11245" begin
-    io = IOBuffer()
-    show(io, range(1, stop=2, length=3))
-    str = String(take!(io))
-#    @test str == "range(1.0, stop=2.0, length=3)"
-    @test str == "1.0:0.5:2.0"
-end
+
+# issue #11245
+@test repr(range(1, stop=2, length=3)) == "1.0:0.5:2.0"
 
 @testset "issue 10950" begin
     r = 1//2:3
     @test length(r) == 3
+    @test checked_length(r) == 3
     i = 1
     for x in r
         @test x == i//2
@@ -1036,24 +1193,25 @@ end
     # repr/show should display the range nicely
     # to test print_range in range.jl
     replrepr(x) = repr("text/plain", x; context=IOContext(stdout, :limit=>true, :displaysize=>(24, 80)))
+    nb = Sys.WORD_SIZE
     @test replrepr(1:4) == "1:4"
     @test repr("text/plain", 1:4) == "1:4"
     @test repr("text/plain", range(1, stop=5, length=7)) == "1.0:0.6666666666666666:5.0"
-    @test repr("text/plain", LinRange{Float64}(1,5,7)) == "7-element LinRange{Float64}:\n 1.0,1.66667,2.33333,3.0,3.66667,4.33333,5.0"
+    @test repr("text/plain", LinRange{Float64}(1,5,7)) == "7-element LinRange{Float64, Int$nb}:\n 1.0, 1.66667, 2.33333, 3.0, 3.66667, 4.33333, 5.0"
     @test repr(range(1, stop=5, length=7)) == "1.0:0.6666666666666666:5.0"
-    @test repr(LinRange{Float64}(1,5,7)) == "range(1.0, stop=5.0, length=7)"
+    @test repr(LinRange{Float64}(1,5,7)) == "LinRange{Float64}(1.0, 5.0, 7)"
     @test replrepr(0:100.) == "0.0:1.0:100.0"
     # next is to test a very large range, which should be fast because print_range
     # only examines spacing of the left and right edges of the range, sufficient
     # to cover the designated screen size.
     @test replrepr(range(0, stop=100, length=10000)) == "0.0:0.010001000100010001:100.0"
-    @test replrepr(LinRange{Float64}(0,100, 10000)) == "10000-element LinRange{Float64}:\n 0.0,0.010001,0.020002,0.030003,0.040004,…,99.95,99.96,99.97,99.98,99.99,100.0"
+    @test replrepr(LinRange{Float64}(0,100, 10000)) == "10000-element LinRange{Float64, Int$nb}:\n 0.0, 0.010001, 0.020002, 0.030003, …, 99.96, 99.97, 99.98, 99.99, 100.0"
 
     @test sprint(show, UnitRange(1, 2)) == "1:2"
     @test sprint(show, StepRange(1, 2, 5)) == "1:2:5"
 end
 
-@testset "Issue 11049 and related" begin
+@testset "Issue 11049, and related" begin
     @test promote(range(0f0, stop=1f0, length=3), range(0., stop=5., length=2)) ===
         (range(0., stop=1., length=3), range(0., stop=5., length=2))
     @test convert(LinRange{Float64}, range(0., stop=1., length=3)) === LinRange(0., 1., 3)
@@ -1115,6 +1273,7 @@ end
     @test [reverse(range(1.0, stop=27.0, length=1275));] ==
         reverse([range(1.0, stop=27.0, length=1275);])
 end
+
 @testset "PR 12200 and related" begin
     for _r in (1:2:100, 1:100, 1f0:2f0:100f0, 1.0:2.0:100.0,
                range(1, stop=100, length=10), range(1f0, stop=100f0, length=10))
@@ -1187,6 +1346,7 @@ end
         @test size(similar(r, size(r))) == size(similar(r, length(r)))
     end
 end
+
 @testset "sign, conj, ~ (Issue #16067)" begin
     A = -1:1
     B = -1.0:1.0
@@ -1212,28 +1372,60 @@ end
     @test convert(Array{Float64,1}, r) == a
 end
 
+@testset "extrema" begin
+    @test_throws ArgumentError minimum(1:2:-1)
+    @test_throws ArgumentError argmin(Base.OneTo(-1))
+    @test_throws ArgumentError maximum(Base.OneTo(-1))
+    @test_throws ArgumentError argmax(1:-1)
+
+    for (r, imin, imax) in [
+            (Base.OneTo(5), 1, 5),
+            (1:10, 1, 10),
+            (10:-1:0, 11, 1),
+            (range(10, stop=20, length=5), 1, 5),
+            (range(10.3, step=-2, length=7), 7, 1),
+           ]
+        @test minimum(r) === r[imin]
+        @test maximum(r) === r[imax]
+        @test imin === argmin(r)
+        @test imax === argmax(r)
+        @test extrema(r) === (r[imin], r[imax])
+    end
+
+    r = 1f8-10:1f8
+    rv = collect(r)
+    @test argmin(r) == argmin(rv) == 1
+    @test r[argmax(r)] == r[argmax(rv)] == 1f8
+    @test argmax(r) == lastindex(r)
+    @test argmax(rv) != lastindex(r)
+end
+
 @testset "OneTo" begin
     let r = Base.OneTo(-5)
         @test isempty(r)
-        @test length(r) == 0
+        @test length(r) == checked_length(r) == 0
         @test size(r) == (0,)
+        @test first(r) === 1
+        @test last(r) === 0
     end
     let r = Base.OneTo(3)
         @test !isempty(r)
-        @test length(r) == 3
+        @test length(r) == checked_length(r) == 3
         @test size(r) == (3,)
         @test step(r) == 1
         @test first(r) == 1
         @test last(r) == 3
         @test minimum(r) == 1
         @test maximum(r) == 3
+        @test argmin(r) == 1
+        @test argmax(r) == 3
         @test r[2] == 2
         @test r[2:3] === 2:3
         @test_throws BoundsError r[4]
         @test_throws BoundsError r[0]
         @test broadcast(+, r, 1) === 2:4
-        @test 2*r === 2:2:6
-        @test r + r === 2:2:6
+        @test 2*r == 2:2:6
+        @test r + r == 2:2:6
         k = 0
         for i in r
             @test i == (k += 1)
@@ -1315,7 +1507,7 @@ end
 
 @testset "issue #20520" begin
     r = range(1.3173739f0, stop=1.3173739f0, length=3)
-    @test length(r) == 3
+    @test length(r) == checked_length(r) == 3
     @test first(r) === 1.3173739f0
     @test last(r)  === 1.3173739f0
     @test r[2]     === 1.3173739f0
@@ -1338,8 +1530,11 @@ isdefined(Main, :Furlongs) || @eval Main include("testhelpers/Furlongs.jl")
 using .Main.Furlongs
 
 @testset "dimensional correctness" begin
-    @test length(Vector(Furlong(2):Furlong(10))) == 9
-    @test length(range(Furlong(2), length=9)) == 9
+    @test_throws TypeError Furlong(2):Furlong(10)
+    @test_throws TypeError range(Furlong(2), length=9)
+    @test length(Vector(Furlong(2):Furlong(1):Furlong(10))) == 9
+    @test length(range(Furlong(2), step=Furlong(1), length=9)) == checked_length(range(Furlong(2), step=Furlong(1), length=9)) == 9
+    @test @inferred(length(StepRange(Furlong(2), Furlong(1), Furlong(1)))) == 0
     @test Vector(Furlong(2):Furlong(1):Furlong(10)) == Vector(range(Furlong(2), step=Furlong(1), length=9)) == Furlong.(2:10)
     @test Vector(Furlong(1.0):Furlong(0.5):Furlong(10.0)) ==
           Vector(Furlong(1):Furlong(0.5):Furlong(10)) == Furlong.(1:0.5:10)
@@ -1376,22 +1571,31 @@ end
     @test @inferred(r .+ x) === 3:7
     @test @inferred(r .- x) === -1:3
     @test @inferred(x .- r) === 1:-1:-3
-    @test @inferred(x .* r) === 2:2:10
-    @test @inferred(r .* x) === 2:2:10
+    @test @inferred(x .* r) == 2:2:10
+    @test @inferred(r .* x) == 2:2:10
     @test @inferred(r ./ x) === 0.5:0.5:2.5
     @test @inferred(x ./ r) == 2 ./ [r;] && isa(x ./ r, Vector{Float64})
     @test @inferred(r .\ x) == 2 ./ [r;] && isa(x ./ r, Vector{Float64})
     @test @inferred(x .\ r) === 0.5:0.5:2.5
 
-    @test @inferred(2 .* (r .+ 1) .+ 2) === 6:2:14
+    @test @inferred(2 .* (r .+ 1) .+ 2) == 6:2:14
+
+    # issue #42291
+    @test length((1:5) .- 1/7) == 5
+    @test length((1:5) .+ -1/7) == 5
+    @test length(-1/7 .+ (1:5)) == 5
 end
 
 @testset "Bad range calls" begin
     @test_throws ArgumentError range(1)
     @test_throws ArgumentError range(nothing)
     @test_throws ArgumentError range(1, step=4)
-    @test_throws ArgumentError range(nothing, length=2)
+    @test_throws ArgumentError range(; step=1, length=6)
+    @test_throws ArgumentError range(; step=2, stop=7.5)
     @test_throws ArgumentError range(1.0, step=0.25, stop=2.0, length=5)
+    @test_throws ArgumentError range(; stop=nothing)
+    @test_throws ArgumentError range(; length=nothing)
+    @test_throws TypeError range(; length=5.5)
 end
 
 @testset "issue #23300#issuecomment-371575548" begin
@@ -1408,11 +1612,67 @@ end
     @test x isa StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}
 end
 
+@testset "Issue #44292" begin
+    let x = @inferred range(0, step=0.2, length=5)
+        @test x isa StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}
+        @test x == [0.0, 0.2, 0.4, 0.6, 0.8]
+    end
+
+    let x = @inferred range(0.0, step=2, length=5)
+        @test x isa StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}
+        @test x == [0.0, 2.0, 4.0, 6.0, 8.0]
+        @test x === range(0.0, step=2.0, length=5)
+        @test x === range(0.0f0, step=2e0, length=5)
+        @test x === range(0e0, step=2.0f0, length=5)
+    end
+
+    # start::IEEEFloat and step::Complex
+    let x = @inferred range(2.0, step=1im, length=3)
+        @test typeof(x) === StepRangeLen{ComplexF64, Float64, Complex{Int}, Int}
+        @test x == range(2, step=1im, length=3)  # compare with integer range
+        @test x == 2.0 .+ [0im, 1im, 2im]
+    end
+
+    # start::Complex and step::IEEEFloat
+    let x = @inferred range(2im, step=1.0, length=3)
+        @test typeof(x) === StepRangeLen{ComplexF64, Complex{Int}, Float64, Int}
+        @test x == range(2im, step=1, length=3)  # compare with integer range
+    end
+
+    # stop::IEEEFloat and step::Complex
+    let x = @inferred range(stop=2.0, step=1im, length=3)
+        @test typeof(x) === StepRangeLen{ComplexF64, ComplexF64, Complex{Int}, Int}
+        @test x == range(stop=2, step=1im, length=3)  # compare with integer range
+        @test x == 2.0 .- [2im, 1im, 0im]
+    end
+
+    # stop::Complex and step::IEEEFloat
+    let x = @inferred range(stop=2im, step=1.0, length=3)
+        @test typeof(x) === StepRangeLen{ComplexF64, ComplexF64, Float64, Int}
+        @test x == range(stop=2im, step=1, length=3)  # compare with integer range
+    end
+
+    let x = @inferred range(stop=10, step=2.0, length=5)
+        @test x isa StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}
+        @test x === @inferred range(stop=10.0, step=2.0, length=5)
+        @test x === @inferred range(stop=10f0, step=2.0, length=5)
+        @test x === @inferred range(stop=10e0, step=2.0f0, length=5)
+        @test x == [2, 4, 6, 8, 10]
+    end
+
+    let x = @inferred range(stop=10.0, step=2, length=4)
+        @test x isa StepRangeLen{Float64,Base.TwicePrecision{Float64},Base.TwicePrecision{Float64}}
+        @test x == [4.0, 6.0, 8.0, 10.0]
+    end
+end
+
 @testset "Views of ranges" begin
     @test view(Base.OneTo(10), Base.OneTo(5)) === Base.OneTo(5)
     @test view(1:10, 1:5) === 1:5
     @test view(1:10, 1:2:5) === 1:2:5
     @test view(1:2:9, 1:5) === 1:2:9
+    @test view(1:10, :) === 1:10
+    @test view(1:2:9, :) === 1:2:9
 
     # Ensure we don't hit a fallback `view` if there's a better `getindex` implementation
     vmt = collect(methods(view, Tuple{AbstractRange, AbstractRange}))
@@ -1434,15 +1694,18 @@ module NonStandardIntegerRangeTest
 
 using Test
 
+using Base.Checked: checked_length
+import Base.Checked: checked_add, checked_sub
+
 struct Position <: Integer
     val::Int
 end
-Position(x::Position) = x # to resolve ambiguity with boot.jl:728
+Position(x::Position) = x # to resolve ambiguity with boot.jl:770
 
 struct Displacement <: Integer
     val::Int
 end
-Displacement(x::Displacement) = x # to resolve ambiguity with boot.jl:728
+Displacement(x::Displacement) = x # to resolve ambiguity with boot.jl:770
 
 Base.:-(x::Displacement) = Displacement(-x.val)
 Base.:-(x::Position, y::Position) = Displacement(x.val - y.val)
@@ -1459,14 +1722,68 @@ Base.Unsigned(x::Displacement) = Unsigned(x.val)
 Base.rem(x::Displacement, y::Displacement) = Displacement(rem(x.val, y.val))
 Base.div(x::Displacement, y::Displacement) = Displacement(div(x.val, y.val))
 
-# required for collect (summing lengths); alternatively, should unsafe_length return Int by default?
+# required for collect (summing lengths); alternatively, should length return Int by default?
 Base.promote_rule(::Type{Displacement}, ::Type{Int}) = Int
 Base.convert(::Type{Int}, x::Displacement) = x.val
 
+# Unsigned complement, for testing checked_length
+struct UPosition <: Unsigned
+    val::UInt
+end
+UPosition(x::UPosition) = x # to resolve ambiguity with boot.jl:770
+
+struct UDisplacement <: Unsigned
+    val::UInt
+end
+UDisplacement(x::UDisplacement) = x # to resolve ambiguity with boot.jl:770
+
+Base.show(io::IO, x::Union{Position, UPosition, Displacement, UDisplacement}) =
+    # should use show if we were to do this properly (instead of just a test-helper)
+    print(io, typeof(x).name.name, "(", x.val, ")")
+
+Base.:-(x::UPosition, y::UPosition) = UDisplacement(x.val - y.val)
+Base.:-(x::UPosition, y::UDisplacement) = UPosition(x.val - y.val)
+Base.:+(x::UPosition, y::UDisplacement) = UPosition(x.val + y.val)
+Base.:+(x::UDisplacement, y::Displacement) = UDisplacement(x.val + y.val)
+Base.:+(x::UDisplacement, y::UDisplacement) = UDisplacement(x.val + y.val)
+Base.:-(x::UPosition, y::Displacement) = UPosition(x.val - y.val)
+checked_sub(x::UPosition, y::UPosition) = UDisplacement(checked_sub(x.val, y.val))
+checked_sub(x::UPosition, y::UDisplacement) = UPosition(checked_sub(x.val, y.val))
+checked_sub(x::UDisplacement, y::UDisplacement) = UDisplacement(checked_sub(x.val, y.val))
+checked_add(x::UPosition, y::UDisplacement) = UPosition(checked_add(x.val, y.val))
+checked_add(x::UDisplacement, y::UDisplacement) = UDisplacement(checked_add(x.val, y.val))
+Base.:+(x::UPosition, y::Displacement) = UPosition(x.val + y.val)
+Base.:(<=)(x::UPosition, y::UPosition) = x.val <= y.val
+Base.:(<)(x::UPosition, y::UPosition) = x.val < y.val
+Base.:(<)(x::UDisplacement, y::UDisplacement) = x.val < y.val
+
+# for StepRange computation:
+Base.rem(x::UDisplacement, y::Displacement) = UDisplacement(rem(x.val, y.val))
+Base.div(x::UDisplacement, y::Displacement) = UDisplacement(div(x.val, y.val))
+Base.rem(x::UDisplacement, y::UDisplacement) = UDisplacement(rem(x.val, y.val))
+Base.div(x::UDisplacement, y::UDisplacement) = UDisplacement(div(x.val, y.val))
+
+#Base.promote_rule(::Type{UDisplacement}, ::Type{Int}) = Int
+#Base.convert(::Type{Int}, x::UDisplacement) = Int(x.val)
+
 @testset "Ranges with nonstandard Integers" begin
     for (start, stop) in [(2, 4), (3, 3), (3, -2)]
-        @test collect(Position(start) : Position(stop)) == Position.(start : stop)
-    end
+        r = Position(start) : Position(stop)
+        @test length(r) === Displacement(stop >= start ? stop - start + 1 : 0)
+        start >= 0 && stop >= 0 && @test UDisplacement(length(r).val) ===
+              checked_length(UPosition(start) : UPosition(stop)) ===
+              checked_length(UPosition(start) : Displacement(1) : UPosition(stop)) ===
+              checked_length(UPosition(start) : UDisplacement(1) : UPosition(stop))
+        @test collect(r) == Position.(start : stop)
+    end
+
+    @test length(UPosition(3):Displacement(7):UPosition(100)) === checked_length(UPosition(3):Displacement(7):UPosition(100)) === UDisplacement(14)
+    @test length(UPosition(100):Displacement(7):UPosition(3)) === checked_length(UPosition(100):Displacement(7):UPosition(3)) === UDisplacement(0)
+    @test length(UPosition(100):Displacement(-7):UPosition(3)) === checked_length(UPosition(100):Displacement(-7):UPosition(3)) === UDisplacement(14)
+    @test length(UPosition(3):Displacement(-7):UPosition(100)) === checked_length(UPosition(3):Displacement(-7):UPosition(100)) === UDisplacement(0)
+    @test_throws OverflowError checked_length(zero(UPosition):UPosition(typemax(UInt)))
+    @test_throws OverflowError checked_length(zero(UPosition):Displacement(1):UPosition(typemax(UInt)))
+    @test_throws OverflowError checked_length(UPosition(typemax(UInt)):Displacement(-1):zero(UPosition))
 
     for start in [3, 0, -2]
         @test collect(Base.OneTo(Position(start))) == Position.(Base.OneTo(start))
@@ -1488,7 +1805,7 @@ end
 end # module NonStandardIntegerRangeTest
 
 @testset "Issue #26619" begin
-    @test length(UInt(100) : -1 : 1) === UInt(100)
+    @test length(UInt(100) : -1 : 1) == checked_length(UInt(100) : -1 : 1) === UInt(100)
     @test collect(UInt(5) : -1 : 3) == [UInt(5), UInt(4), UInt(3)]
 
     let r = UInt(5) : -2 : 2
@@ -1508,23 +1825,35 @@ end # module NonStandardIntegerRangeTest
 end
 
 @testset "constant-valued ranges (issues #10391 and #29052)" begin
-    for r in ((1:4), (1:1:4), (1.0:4.0))
-        if eltype(r) === Int
-            @test_broken @inferred(0 * r) == [0.0, 0.0, 0.0, 0.0]
-            @test_broken @inferred(0 .* r) == [0.0, 0.0, 0.0, 0.0]
-            @test_broken @inferred(r + (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-            @test_broken @inferred(r .+ (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-        else
-            @test @inferred(0 * r) == [0.0, 0.0, 0.0, 0.0]
-            @test @inferred(0 .* r) == [0.0, 0.0, 0.0, 0.0]
-            @test @inferred(r + (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-            @test @inferred(r .+ (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
-        end
+    @testset "with $(nameof(typeof(r))) of $(eltype(r))" for r in ((1:4), (1:1:4), StepRangeLen(1,1,4), (1.0:4.0))
+        @test @inferred(0 * r) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(0 .* r) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(r .* 0) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(r + (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
+        @test @inferred(r .+ (4:-1:1)) == [5.0, 5.0, 5.0, 5.0]
+        @test @inferred(r - r) == [0.0, 0.0, 0.0, 0.0]
+        @test @inferred(r .- r) == [0.0, 0.0, 0.0, 0.0]
+
         @test @inferred(r .+ (4.0:-1:1)) == [5.0, 5.0, 5.0, 5.0]
         @test @inferred(0.0 * r) == [0.0, 0.0, 0.0, 0.0]
         @test @inferred(0.0 .* r) == [0.0, 0.0, 0.0, 0.0]
         @test @inferred(r / Inf) == [0.0, 0.0, 0.0, 0.0]
         @test @inferred(r ./ Inf) == [0.0, 0.0, 0.0, 0.0]
+
+        @test eval(Meta.parse(repr(0 * r))) == [0.0, 0.0, 0.0, 0.0]
+
+        # Not constant-valued, but related methods:
+        @test @inferred(-1 * r) == [-1,-2,-3,-4]
+        @test @inferred(r * -1) == [-1,-2,-3,-4]
+        @test @inferred(r / -1) == [-1,-2,-3,-4]
+
+        @test @inferred(-1.0 .* r) == [-1,-2,-3,-4]
+        @test @inferred(r .* -1.0) == [-1,-2,-3,-4]
+        @test @inferred(r ./ -1.0) == [-1,-2,-3,-4]
+
+        @test @inferred(-1 * reverse(r)) == [-4,-3,-2,-1]
+        @test @inferred(-1.0 .* reverse(r)) == [-4,-3,-2,-1]
+        @test @inferred(reverse(r) ./ -1.0) == [-4,-3,-2,-1]
     end
 
     @test_broken @inferred(range(0, step=0, length=4)) == [0, 0, 0, 0]
@@ -1537,7 +1866,7 @@ end
     @test @inferred(range(0.0, stop=0, length=4)) == [0.0, 0.0, 0.0, 0.0]
 
     z4 = 0.0 * (1:4)
-    @test @inferred(z4 .+ (1:4)) === 1.0:1.0:4.0
+    @test @inferred(z4 .+ (1:4)) == 1.0:1.0:4.0
     @test @inferred(z4 .+ z4) === z4
 end
 
@@ -1570,8 +1899,6 @@ end
             end
         end
     end
-    # require a keyword arg
-    @test_throws ArgumentError range(1, 100)
 end
 
 @testset "Reverse empty ranges" begin
@@ -1606,6 +1933,20 @@ end
     @test_throws DivideError mod(3, 1:0)
 end
 
+@testset "clamp with unitrange" begin
+    for n in -10:10
+        @test clamp(n, 0:4) == clamp(n, 0, 4)
+        @test clamp(n, Base.OneTo(5)) == clamp(n, 1, 5)
+    end
+    @test clamp(Int32(3), 1:5) === Int(3)
+    @test clamp(big(typemax(Int))+99, 0:4) == 4
+    @test_throws MethodError clamp(3.141, 1:5)
+    @test_throws MethodError clamp(3, UnitRange(1.0,5.0))
+    @test_throws MethodError clamp(3, 1:2:7)
+    @test clamp(3, 1:0) == clamp(3, 1, 0) == 0
+    @test clamp(-3, 1:0) == clamp(-3, 1, 0) == 1
+end
+
 @testset "issue #33882" begin
     r = StepRangeLen('a',2,4)
     @test step(r) === 2
@@ -1642,3 +1983,362 @@ end
     @test @inferred(intersect(big(1):big(5), 3)) == 3:3
     @test @inferred(intersect(3, big(1):big(5))) == 3:3
 end
+
+@testset "eltype of range(::Integer; step::Rational, length) (#37295)" begin
+    r = range(1, step=1//2, length=3)
+    @test r == [1//1, 3//2, 2//1]
+    @test eltype(r) === Rational{Int}
+    @test typeof(step(r)) === Rational{Int}
+
+    r = range(1//1, step=2, length=3)
+    @test r == [1, 3, 5]
+    @test eltype(r) === Rational{Int}
+    @test typeof(step(r)) === Int
+
+    r = range(Int16(1), step=Rational{Int8}(1,2), length=Int16(3))
+    @test r == [1//1, 3//2, 2//1]
+    @test eltype(r) === Rational{Int16}
+    @test typeof(step(r)) === Rational{Int8}
+
+    r = range(Rational{Int8}(1), step=Int16(2), length=Int8(3))
+    @test r == [1, 3, 5]
+    @test eltype(r) === Rational{Int16}
+    @test typeof(step(r)) === Int16
+
+    r = range('a', step=2, length=3)
+    @test r == ['a', 'c', 'e']
+    @test eltype(r) === Char
+    @test typeof(step(r)) === Int
+
+    r = range(typemax(Int)//1, step=1, length=0)
+    @test isempty(r)
+    @test eltype(r) === Rational{Int}
+    @test typeof(step(r)) === Int
+
+    r = range(typemin(Int), step=-1//1, length=0)
+    @test isempty(r)
+    @test eltype(r) === Rational{Int}
+    @test typeof(step(r)) === Rational{Int}
+
+    r = StepRangeLen(Int8(1), Int8(2), 3)
+    @test r == Int8[1, 3, 5]
+    @test eltype(r) === Int8
+    @test typeof(step(r)) === Int8
+
+    r = StepRangeLen(Int8(1), Int8(2), 3, 2)
+    @test r == Int8[-1, 1, 3]
+    @test eltype(r) === Int8
+    @test typeof(step(r)) === Int8
+end
+
+@testset "LinRange eltype for element types that wrap integers" begin
+    struct RealWrapper{T <: Real} <: Real
+        x :: T
+    end
+    Base.promote_rule(::Type{S}, ::Type{RealWrapper{T}}) where {T,S<:Real} = RealWrapper{promote_type(S, T)}
+    Base.:(-)(w::RealWrapper) = RealWrapper(-w.x)
+    for f in [:(+), :(-), :(*), :(/)]
+        @eval Base.$f(w::RealWrapper, y::RealWrapper) = RealWrapper($f(w.x, y.x))
+    end
+    for f in [:(<), :(==), :(<=)]
+        @eval Base.$f(w::RealWrapper, y::RealWrapper) = $f(w.x, y.x)
+    end
+    for T in [:Float32, :Float64]
+        @eval Base.$T(w::RealWrapper) = $T(w.x)
+    end
+    (::Type{RealWrapper{T}})(w::RealWrapper) where {T<:Real} = RealWrapper{T}(T(w.x))
+    (::Type{T})(w::RealWrapper{T}) where {T<:Real} = T(w.x)
+    Base.:(==)(w::RealWrapper, y::RealWrapper) = w.x == y.x
+    Base.isfinite(w::RealWrapper) = isfinite(w.x)
+    Base.signbit(w::RealWrapper) = signbit(w.x)
+
+    x = RealWrapper(2)
+    r1 = range(x, stop = 2x, length = 10)
+    r2 = range(Int(x), stop = Int(2x), length = 10)
+    for i in eachindex(r1, r2)
+        @test r1[i] ≈ r2[i]
+    end
+    r3 = LinRange(x, 2x, 10)
+    r4 = LinRange(x, 2x, 10)
+    for i in eachindex(r3, r4)
+        @test r3[i] ≈ r4[i]
+    end
+end
+
+@testset "Bool indexing of ranges" begin
+    @test_throws ArgumentError Base.OneTo(true)
+    @test_throws ArgumentError Base.OneTo(true:true:true)
+
+    @test_throws ArgumentError (1:2)[true]
+    @test_throws ArgumentError (big(1):big(2))[true]
+    @test_throws ArgumentError Base.OneTo(10)[true]
+    @test_throws ArgumentError (1:2:5)[true]
+    @test_throws ArgumentError LinRange(1,2,2)[true]
+    @test_throws ArgumentError (1.0:2.0:5.0)[true]
+    r = 3:2
+    r2 = r[true:false]
+    @test r2 == collect(r)[true:false]
+    @test r.start == r2.start && r.stop == r2.stop
+    @test_throws BoundsError r[true:true]
+    @test_throws BoundsError r[false:true]
+    r = 3:3
+    r2 = r[true:true]
+    @test r2 == collect(r)[true:true]
+    @test r.start == r2.start && r.stop == r2.stop
+    r2 = r[false:false]
+    @test r2.start == 3 && r2.stop == 2
+    @test_throws BoundsError r[true:false]
+    @test_throws BoundsError r[false:true]
+    r = 2:3
+    r2 = r[false:true]
+    @test r2 == collect(r)[false:true]
+    @test r2.start == r2.stop == 3
+    @test_throws BoundsError r[true:false]
+    @test_throws BoundsError r[true:true]
+
+    r = 2:1
+    r2 = r[true:true:false]
+    @test r2 == collect(r)[true:true:false]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 1
+    @test_throws BoundsError r[false:true:false]
+
+    r = 2:2
+    r2 = r[false:true:false]
+    @test r2 == collect(r)[false:true:false]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 1
+    r2 = r[true:true:true]
+    @test r2 == collect(r)[true:true:true]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[false:true:true]
+
+    r = 1:2
+    r2 = r[false:true:true]
+    @test r2 == collect(r)[false:true:true]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[true:true:true]
+
+    r = 2:1:1
+    r2 = r[true:true:false]
+    @test r2 == collect(r)[true:true:false]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 1
+    @test_throws BoundsError r[false:true:false]
+
+    r = 2:1:2
+    r2 = r[false:true:false]
+    @test r2 == collect(r)[false:true:false]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 1
+    r2 = r[true:true:true]
+    @test r2 == collect(r)[true:true:true]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[false:true:true]
+
+    r = 1:1:2
+    r2 = r[false:true:true]
+    @test r2 == collect(r)[false:true:true]
+    @test r2 isa StepRange && r2.start == 2 && r2.step == 1 && r2.stop == 2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[true:true:true]
+
+    r = 2.0:1.0:1.0
+    r2 = r[true:true:false]
+    @test r2 == collect(r)[true:true:false]
+    @test r2 isa StepRangeLen && r2 == 2:1
+    @test_throws BoundsError r[false:true:false]
+
+    r = 2.0:1.0:2.0
+    r2 = r[false:true:false]
+    @test r2 == collect(r)[false:true:false]
+    @test r2 isa StepRangeLen && r2 == 2:1
+    r2 = r[true:true:true]
+    @test r2 == collect(r)[true:true:true]
+    @test r2 isa StepRangeLen && r2 == 2:2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[false:true:true]
+
+    r = 1.0:1.0:2.0
+    r2 = r[false:true:true]
+    @test r2 == collect(r)[false:true:true]
+    @test r2 isa StepRangeLen && r2 == 2:2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[true:true:true]
+
+    r = StepRangeLen(2, 1, 0)
+    r2 = r[true:true:false]
+    @test r2 == collect(r)[true:true:false]
+    @test r2 isa StepRangeLen && r2 == 2:1
+    @test_throws BoundsError r[false:true:false]
+
+    r = StepRangeLen(2, 1, 1)
+    r2 = r[false:true:false]
+    @test r2 == collect(r)[false:true:false]
+    @test r2 isa StepRangeLen && r2 == 2:1
+    r2 = r[true:true:true]
+    @test r2 == collect(r)[true:true:true]
+    @test r2 isa StepRangeLen && r2 == 2:2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[false:true:true]
+
+    r = StepRangeLen(1, 1, 2)
+    r2 = r[false:true:true]
+    @test r2 == collect(r)[false:true:true]
+    @test r2 isa StepRangeLen && r2 == 2:2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[true:true:true]
+
+    r = LinRange(2, 1, 0)
+    r2 = r[true:true:false]
+    @test r2 == collect(r)[true:true:false]
+    @test r2 isa LinRange && r2 == 2:1
+    @test_throws BoundsError r[false:true:false]
+
+    r = LinRange(2, 2, 1)
+    r2 = r[false:true:false]
+    @test r2 == collect(r)[false:true:false]
+    @test r2 isa LinRange && r2 == 2:1
+    r2 = r[true:true:true]
+    @test r2 == collect(r)[true:true:true]
+    @test r2 isa LinRange && r2 == 2:2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[false:true:true]
+
+    r = LinRange(1, 2, 2)
+    r2 = r[false:true:true]
+    @test r2 == collect(r)[false:true:true]
+    @test r2 isa LinRange && r2 == 2:2
+    @test_throws BoundsError r[true:true:false]
+    @test_throws BoundsError r[true:true:true]
+end
+@testset "Non-Int64 endpoints that are identical (#39798)" begin
+    for T in DataType[Float16,Float32,Float64,Bool,Int8,Int16,Int32,Int64,Int128,UInt8,UInt16,UInt32,UInt64,UInt128],
+        r in [ LinRange(1, 1, 10), StepRangeLen(7, 0, 5) ]
+        if first(r) > typemax(T)
+            continue
+        end
+        let start=T(first(r)), stop=T(last(r)), step=T(step(r)), length=length(r)
+            @test range(  start, stop,       length) == r
+            @test range(  start, stop;       length) == r
+            @test range(  start; stop,       length) == r
+            @test range(; start, stop,       length) == r
+        end
+    end
+end
+@testset "PR 40320 fixes" begin
+    # found by nanosoldier
+    @test 0.2 * (-2:2) == -0.4:0.2:0.4  # from tests of AbstractFFTs, needs Base.TwicePrecision
+    @test 0.2f0 * (-2:2) == Float32.(-0.4:0.2:0.4)  # likewise needs Float64
+    @test 0.2 * (-2:1:2) == -0.4:0.2:0.4
+
+    # https://github.com/JuliaLang/julia/issues/40846
+    @test 0.1 .* (3:-1:1) ≈ [0.3, 0.2, 0.1]
+    @test (10:-1:1) * 0.1 == 1:-0.1:0.1
+    @test 0.2 * (-2:2:2) == [-0.4, 0, 0.4]
+end
+
+@testset "Indexing OneTo with IdentityUnitRange" begin
+    for endpt in Any[10, big(10), UInt(10)]
+        r = Base.OneTo(endpt)
+        inds = Base.IdentityUnitRange(3:5)
+        rs = r[inds]
+        @test rs === inds
+        @test_throws BoundsError r[Base.IdentityUnitRange(-1:100)]
+    end
+end
+
+@testset "non 1-based ranges indexing" begin
+    struct ZeroBasedUnitRange{T,A<:AbstractUnitRange{T}} <: AbstractUnitRange{T}
+        a :: A
+        function ZeroBasedUnitRange(a::AbstractUnitRange{T}) where {T}
+            @assert !Base.has_offset_axes(a)
+            new{T, typeof(a)}(a)
+        end
+    end
+
+    Base.parent(A::ZeroBasedUnitRange) = A.a
+    Base.first(A::ZeroBasedUnitRange) = first(parent(A))
+    Base.length(A::ZeroBasedUnitRange) = length(parent(A))
+    Base.last(A::ZeroBasedUnitRange) = last(parent(A))
+    Base.size(A::ZeroBasedUnitRange) = size(parent(A))
+    Base.axes(A::ZeroBasedUnitRange) = map(x -> Base.IdentityUnitRange(0:x-1), size(parent(A)))
+    Base.getindex(A::ZeroBasedUnitRange, i::Int) = parent(A)[i + 1]
+    Base.getindex(A::ZeroBasedUnitRange, i::Integer) = parent(A)[i + 1]
+    Base.firstindex(A::ZeroBasedUnitRange) = 0
+    function Base.show(io::IO, A::ZeroBasedUnitRange)
+        show(io, parent(A))
+        print(io, " with indices $(axes(A,1))")
+    end
+
+    r = ZeroBasedUnitRange(5:8)
+    @test r[0:2] == r[0]:r[2]
+    @test r[0:1:2] == r[0]:1:r[2]
+end
+
+@test length(range(1, 100, length=big(100)^100)) == big(100)^100
+@test length(range(big(1), big(100)^100, length=big(100)^100)) == big(100)^100
+@test length(0 * (1:big(100)^100)) == big(100)^100
+
+@testset "issue #41784" begin
+    # tests `in` when step equals 0
+    # test for Int
+    x = 41784
+    @test (x in StepRangeLen(x, 0, 0)) == false
+    @test (x in StepRangeLen(x, 0, rand(1:100))) == true
+    @test ((x - 1) in StepRangeLen(x, 0, rand(1:100))) == false
+    @test ((x + 1) in StepRangeLen(x, 0, rand(1:100))) == false
+
+    # test for Char
+    x = 'z'
+    @test (x in StepRangeLen(x, 0, 0)) == false
+    @test (x in StepRangeLen(x, 0, rand(1:100))) == true
+    @test ((x - 1) in StepRangeLen(x, 0, rand(1:100))) == false
+    @test ((x + 1) in StepRangeLen(x, 0, rand(1:100))) == false
+end
+
+@testset "issue #42528" begin
+    struct Fix42528 <: Unsigned
+        val::UInt
+    end
+    Fix42528(a::Fix42528) = a
+    Base.:(<)(a::Fix42528, b::Fix42528) = a.val < b.val
+    Base.:(>=)(a::Fix42528, b::Fix42528) = a.val >= b.val
+    Base.:(+)(a::Fix42528, b::Fix42528) = a.val+b.val
+    Base.promote_rule(::Type{Fix42528}, ::Type{<:Unsigned}) = Fix42528
+    Base.show(io::IO, ::MIME"text/plain", a::Fix42528) = print(io, "Fix42528(", a.val, ')')
+    Base.show(io::IO, a::Fix42528) = print(io, "Fix42528(", a.val, ')')
+    function Base.:(-)(a::Fix42528, b::Fix42528)
+        a.val < b.val && throw(DomainError("Can't subtract, result outside of domain"))
+        return a.val - b.val
+    end
+    Base.one(::Type{Fix42528}) = Fix42528(0x1)
+    @test Fix42528(0x0):Fix42528(0x1) == [Fix42528(0x0), Fix42528(0x01)]
+    @test iszero(length(Fix42528(0x1):Fix42528(0x0)))
+    @test_throws DomainError Fix42528(0x0) - Fix42528(0x1)
+end
+
+let r = Ptr{Cvoid}(20):-UInt(2):Ptr{Cvoid}(10)
+    @test isempty(r)
+    @test length(r) == 0
+    @test count(i -> true, r) == 0
+    @test isempty(collect(r))
+    @test first(r) === Ptr{Cvoid}(20)
+    @test step(r) === -UInt(2)
+    @test last(r) === Ptr{Cvoid}(10)
+end
+
+# test behavior of wrap-around and promotion of empty ranges (#35711)
+@test length(range(0, length=UInt(0))) === UInt(0)
+@test isempty(range(0, length=UInt(0)))
+@test length(range(typemax(Int), length=UInt(0))) === UInt(0)
+@test isempty(range(typemax(Int), length=UInt(0)))
+@test length(range(0, length=UInt(0), step=UInt(2))) == UInt(0)
+@test isempty(range(0, length=UInt(0), step=UInt(2)))
+@test length(range(typemax(Int), length=UInt(0), step=UInt(2))) === UInt(0)
+@test isempty(range(typemax(Int), length=UInt(0), step=UInt(2)))
+@test length(range(typemax(Int), length=UInt(0), step=2)) === UInt(0)
+@test isempty(range(typemax(Int), length=UInt(0), step=2))
+@test length(range(typemax(Int), length=0, step=UInt(2))) === 0
+@test isempty(range(typemax(Int), length=0, step=UInt(2)))
+
+@test length(range(1, length=typemax(Int128))) === typemax(Int128)
diff --git a/test/rational.jl b/test/rational.jl
index 81a9ab16c5ff77..1618156212af73 100644
--- a/test/rational.jl
+++ b/test/rational.jl
@@ -116,6 +116,14 @@ using Test
     @test abs(one(Rational{UInt})) === one(Rational{UInt})
     @test abs(one(Rational{Int})) === one(Rational{Int})
     @test abs(-one(Rational{Int})) === one(Rational{Int})
+
+    # inf addition
+    @test 1//0 + 1//0 == 1//0
+    @test -1//0 - 1//0 == -1//0
+    @test_throws DivideError 1//0 - 1//0
+    @test_throws DivideError -1//0 + 1//0
+    @test Int128(1)//0 + 1//0 isa Rational{Int128}
+    @test 1//0 + Int128(1)//0 isa Rational{Int128}
 end
 
 @testset "Rational methods" begin
@@ -480,6 +488,8 @@ end
         @test gcd(b, a) === T(2)//T(105)
         @test lcm(a, b) === T(30)//T(7)
         if T <: Signed
+            @test gcd(-a) === a
+            @test lcm(-b) === b
             @test gcdx(a, b) === (T(2)//T(105), T(-11), T(4))
             @test gcd(-a, b) === T(2)//T(105)
             @test gcd(a, -b) === T(2)//T(105)
@@ -594,3 +604,37 @@ end
     @test -1//5 * 0x3//0x2 == 0x3//0x2 * -1//5 == -3//10
     @test -2//3 * 0x1 == 0x1 * -2//3 == -2//3
 end
+
+@testset "ispow2 and iseven/isodd" begin
+    @test ispow2(4//1)
+    @test ispow2(1//8)
+    @test !ispow2(3//8)
+    @test !ispow2(0//1)
+    @test iseven(4//1) && !isodd(4//1)
+    @test !iseven(3//1) && isodd(3//1)
+    @test !iseven(3//8) && !isodd(3//8)
+end
+
+@testset "checked_den with different integer types" begin
+    @test Base.checked_den(Int8(4), Int32(8)) == Base.checked_den(Int32(4), Int32(8))
+end
+
+@testset "Rational{T} with non-concrete T (issue #41222)" begin
+    @test @inferred(Rational{Integer}(2,3)) isa Rational{Integer}
+end
+
+@testset "issue #41489" begin
+    @test Core.Compiler.return_type(+, NTuple{2, Rational}) == Rational
+    @test Core.Compiler.return_type(-, NTuple{2, Rational}) == Rational
+
+    A=Rational[1 1 1; 2 2 2; 3 3 3]
+    @test @inferred(A*A) isa Matrix{Rational}
+end
+
+@testset "issue #42560" begin
+    @test rationalize(0.5 + 0.5im) == 1//2 + 1//2*im
+    @test rationalize(float(pi)im) == 0//1 + 165707065//52746197*im
+    @test rationalize(Int8, float(pi)im) == 0//1 + 22//7*im
+    @test rationalize(1.192 + 2.233im) == 149//125 + 2233//1000*im
+    @test rationalize(Int8, 1.192 + 2.233im) == 118//99 + 67//30*im
+end
diff --git a/test/read.jl b/test/read.jl
index b31216811b75c1..7a5acbcca969ed 100644
--- a/test/read.jl
+++ b/test/read.jl
@@ -98,30 +98,27 @@ s = io(text)
 close(s)
 push!(l, ("PipeEndpoint", io))
 
-#FIXME See https://github.com/JuliaLang/julia/issues/14747
-#      Reading from open(::Command) seems to deadlock on Linux/Travis
-#=
-if !Sys.iswindows()
 
-# Windows type command not working?
-# See "could not spawn `type 'C:\Users\appveyor\AppData\Local\Temp\1\jul3516.tmp\file.txt'`"
-#https://ci.appveyor.com/project/StefanKarpinski/julia/build/1.0.12733/job/hpwjs4hmf03vs5ag#L1244
-
-# Pipe
+# Pipe (#14747)
 io = (text) -> begin
     write(filename, text)
-    open(`$(Sys.iswindows() ? "type" : "cat") $filename`)[1]
-#    Was open(`echo -n $text`)[1]
-#    See https://github.com/JuliaLang/julia/issues/14747
+    # we can skip using shell_escape_wincmd, since ", ^, and % aren't legal in
+    # a filename, so unconditionally wrapping in " is sufficient (okay, that's
+    # a lie, since ^ and % actually are legal, but DOS is broken)
+    if Sys.iswindows()
+        cmd = Cmd(["cmd.exe", "/c type \"$(replace(filename, '/' => '\\'))\""])
+        cmd = Cmd(cmd; windows_verbatim=true)
+        cmd = pipeline(cmd, stderr=devnull)
+    else
+        cmd = `cat $filename`
+    end
+    open(cmd)
 end
 s = io(text)
 @test isa(s, IO)
-@test isa(s, Pipe)
+@test isa(s, Base.Process)
 close(s)
-push!(l, ("Pipe", io))
-
-end
-=#
+push!(l, ("Process", io))
 
 
 open_streams = []
@@ -140,7 +137,6 @@ end
 verbose = false
 
 for (name, f) in l
-    local f
     local function io(text=text)
         local s = f(text)
         push!(open_streams, s)
@@ -297,6 +293,14 @@ for (name, f) in l
         @test collect(eachline(io(), keep=true)) == collect(eachline(filename, keep=true))
         @test collect(eachline(io())) == collect(eachline(IOBuffer(text)))
         @test collect(@inferred(eachline(io()))) == collect(@inferred(eachline(filename))) #20351
+        if try; seekend(io()); true; catch; false; end # reverse iteration only supports seekable streams
+            for keep in (true, false)
+                lines = readlines(io(); keep)
+                @test last(lines) == last(eachline(io(); keep))
+                @test last(lines,2) == last(eachline(io(); keep),2)
+                @test reverse!(lines) == collect(Iterators.reverse(eachline(io(); keep))) == collect(Iterators.reverse(eachline(IOBuffer(text); keep)))
+            end
+        end
 
         cleanup()
 
@@ -319,9 +323,9 @@ for (name, f) in l
     text = old_text
     write(filename, text)
 
-    if !(typeof(io()) in [Base.PipeEndpoint, Pipe, TCPSocket])
+    if !isa(io(), Union{Base.PipeEndpoint, Base.AbstractPipe, TCPSocket})
         verbose && println("$name position...")
-        @test (s = io(); read!(s, Vector{UInt8}(undef, 4)); position(s))  == 4
+        @test (s = io(); read!(s, Vector{UInt8}(undef, 4)); position(s)) == 4
 
         verbose && println("$name seek...")
         for n = 0:length(text)-1
@@ -465,7 +469,7 @@ rm(f)
 io = Base.Filesystem.open(f, Base.Filesystem.JL_O_WRONLY | Base.Filesystem.JL_O_CREAT | Base.Filesystem.JL_O_EXCL, 0o000)
 @test write(io, "abc") == 3
 close(io)
-if !Sys.iswindows() && get(ENV, "USER", "") != "root" && get(ENV, "HOME", "") != "/root"
+if !Sys.iswindows() && Libc.geteuid() != 0 # root user
     # msvcrt _wchmod documentation states that all files are readable,
     # so we don't test that it correctly set the umask on windows
     @test_throws SystemError open(f)
@@ -515,7 +519,7 @@ close(f1)
 close(f2)
 @test eof(f1)
 @test_throws Base.IOError eof(f2)
-if get(ENV, "USER", "") != "root" && get(ENV, "HOME", "") != "/root"
+if Libc.geteuid() != 0 # root user
     @test_throws SystemError open(f, "r+")
     @test_throws Base.IOError Base.Filesystem.open(f, Base.Filesystem.JL_O_RDWR)
 else
@@ -600,7 +604,7 @@ end
     read!(io, @view y[4:7])
     @test y[4:7] == v
     seekstart(io)
-    @test_throws ErrorException read!(io, @view z[4:6])
+    @test_throws Base.CanonicalIndexError read!(io, @view z[4:6])
 end
 
 # Bulk read from pipe
@@ -617,3 +621,40 @@ let p = Pipe()
     wait(t)
     close(p)
 end
+
+@testset "issue #27412" for itr in [eachline(IOBuffer("a")), readeach(IOBuffer("a"), Char)]
+    @test !isempty(itr)
+    # check that the earlier isempty did not consume the iterator
+    @test !isempty(itr)
+    first(itr) # consume the iterator
+    @test  isempty(itr) # now it is empty
+end
+
+# more tests for reverse(eachline)
+@testset "reverse(eachline)" begin
+    lines = vcat(repr.(1:4), ' '^50000 .* repr.(5:10), repr.(11:10^5))
+    for lines in (lines, reverse(lines)), finalnewline in (true, false), eol in ("\n", "\r\n")
+        buf = IOBuffer(join(lines, eol) * (finalnewline ? eol : ""))
+        @test reverse!(collect(Iterators.reverse(eachline(seekstart(buf))))) == lines
+        @test last(eachline(seekstart(buf))) == last(lines)
+        @test last(eachline(seekstart(buf)),10^4) == last(lines,10^4)
+        @test last(eachline(seekstart(buf)),length(lines)*2) == lines
+        @test reverse!(collect(Iterators.reverse(eachline(seek(buf, sum(sizeof, lines[1:100]) + 100*sizeof(eol)))))) == lines[101:end]
+        @test isempty(Iterators.reverse(eachline(buf)))
+    end
+
+    let rempty = Iterators.reverse(eachline(IOBuffer()))
+        @test isempty(rempty)
+        @test isempty(collect(rempty))
+    end
+
+    let buf = IOBuffer("foo\nbar")
+        @test readline(buf) == "foo"
+        r = Iterators.reverse(eachline(buf))
+        line, state = iterate(r)
+        @test line == "bar"
+        @test Base.isdone(r, state)
+        @test Base.isdone(r)
+        @test isempty(r) && isempty(collect(r))
+    end
+end
diff --git a/test/reduce.jl b/test/reduce.jl
index 4688709b099c9a..3a8f9532b47104 100644
--- a/test/reduce.jl
+++ b/test/reduce.jl
@@ -49,8 +49,8 @@ end
 @test reduce(max, [8 6 7 5 3 0 9]) == 9
 @test reduce(+, 1:5; init=1000) == (1000 + 1 + 2 + 3 + 4 + 5)
 @test reduce(+, 1) == 1
-@test_throws ArgumentError reduce(*, ())
-@test_throws ArgumentError reduce(*, Union{}[])
+@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, ())
+@test_throws "reducing with * over an empty collection of element type Union{} is not allowed" reduce(*, Union{}[])
 
 # mapreduce
 @test mapreduce(-, +, [-10 -9 -3]) == ((10 + 9) + 3)
@@ -87,8 +87,10 @@ end
 @test mapreduce(abs2, *, Float64[]) === 1.0
 @test mapreduce(abs2, max, Float64[]) === 0.0
 @test mapreduce(abs, max, Float64[]) === 0.0
-@test_throws ArgumentError mapreduce(abs2, &, Float64[])
-@test_throws ArgumentError mapreduce(abs2, |, Float64[])
+@test_throws ["reducing over an empty collection is not allowed",
+              "consider supplying `init`"] mapreduce(abs2, &, Float64[])
+@test_throws str -> !occursin("Closest candidates are", str) mapreduce(abs2, &, Float64[])
+@test_throws "reducing over an empty collection is not allowed" mapreduce(abs2, |, Float64[])
 
 # mapreduce() type stability
 @test typeof(mapreduce(*, +, Int8[10])) ===
@@ -138,8 +140,9 @@ fz = float(z)
 @test sum(z) === 136
 @test sum(fz) === 136.0
 
-@test_throws ArgumentError sum(Union{}[])
-@test_throws ArgumentError sum(sin, Int[])
+@test_throws "reducing with add_sum over an empty collection of element type Union{} is not allowed" sum(Union{}[])
+@test_throws ["reducing over an empty collection is not allowed",
+              "consider supplying `init`"] sum(sin, Int[])
 @test sum(sin, 3) == sin(3.0)
 @test sum(sin, [3]) == sin(3.0)
 a = sum(sin, z)
@@ -170,7 +173,7 @@ for f in (sum2, sum5, sum6, sum9, sum10)
 end
 for f in (sum3, sum4, sum7, sum8)
     @test sum(z) == f(z)
-    @test_throws ArgumentError f(Int[])
+    @test_throws "reducing over an empty" f(Int[])
     @test sum(Int[7]) == f(Int[7]) == 7
 end
 @test typeof(sum(Int8[])) == typeof(sum(Int8[1])) == typeof(sum(Int8[1 7]))
@@ -239,11 +242,17 @@ prod2(itr) = invoke(prod, Tuple{Any}, itr)
 
 # maximum & minimum & extrema
 
-@test_throws ArgumentError maximum(Int[])
-@test_throws ArgumentError minimum(Int[])
+@test_throws "reducing over an empty" maximum(Int[])
+@test_throws "reducing over an empty" minimum(Int[])
+@test_throws "reducing over an empty" extrema(Int[])
 
 @test maximum(Int[]; init=-1) == -1
 @test minimum(Int[]; init=-1) == -1
+@test extrema(Int[]; init=(1, -1)) == (1, -1)
+
+@test maximum(sin, []; init=-1) == -1
+@test minimum(sin, []; init=1) == 1
+@test extrema(sin, []; init=(1, -1)) == (1, -1)
 
 @test maximum(5) == 5
 @test minimum(5) == 5
@@ -254,6 +263,7 @@ let x = [4,3,5,2]
     @test maximum(x) == 5
     @test minimum(x) == 2
     @test extrema(x) == (2, 5)
+    @test Core.Compiler.extrema(x) == (2, 5)
 
     @test maximum(abs2, x) == 25
     @test minimum(abs2, x) == 4
@@ -385,6 +395,41 @@ A = circshift(reshape(1:24,2,3,4), (0,1,1))
         @test maximum(x) === minimum(x) === missing
         @test extrema(x) === (missing, missing)
     end
+    # inputs containing both missing and NaN
+    minimum([NaN;zeros(255);missing]) === missing
+    maximum([NaN;zeros(255);missing]) === missing
+end
+
+# findmin, findmax, argmin, argmax
+
+@testset "findmin(f, domain)" begin
+    @test findmin(-, 1:10) == (-10, 10)
+    @test findmin(identity, [1, 2, 3, missing]) === (missing, 4)
+    @test findmin(identity, [1, NaN, 3, missing]) === (missing, 4)
+    @test findmin(identity, [1, missing, NaN, 3]) === (missing, 2)
+    @test findmin(identity, [1, NaN, 3]) === (NaN, 2)
+    @test findmin(identity, [1, 3, NaN]) === (NaN, 3)
+    @test findmin(cos, 0:π/2:2π) == (-1.0, 3)
+end
+
+@testset "findmax(f, domain)" begin
+    @test findmax(-, 1:10) == (-1, 1)
+    @test findmax(identity, [1, 2, 3, missing]) === (missing, 4)
+    @test findmax(identity, [1, NaN, 3, missing]) === (missing, 4)
+    @test findmax(identity, [1, missing, NaN, 3]) === (missing, 2)
+    @test findmax(identity, [1, NaN, 3]) === (NaN, 2)
+    @test findmax(identity, [1, 3, NaN]) === (NaN, 3)
+    @test findmax(cos, 0:π/2:2π) == (1.0, 1)
+end
+
+@testset "argmin(f, domain)" begin
+    @test argmin(-, 1:10) == 10
+    @test argmin(sum, Iterators.product(1:5, 1:5)) == (1, 1)
+end
+
+@testset "argmax(f, domain)" begin
+    @test argmax(-, 1:10) == 1
+    @test argmax(sum, Iterators.product(1:5, 1:5)) == (5, 5)
 end
 
 # any & all
@@ -428,8 +473,8 @@ end
 @test reduce((a, b) -> a .& b, fill(trues(5), 24))  == trues(5)
 @test reduce((a, b) -> a .& b, fill(falses(5), 24)) == falses(5)
 
-@test_throws TypeError any(x->0, [false])
-@test_throws TypeError all(x->0, [false])
+@test_throws TypeError any(Returns(0), [false])
+@test_throws TypeError all(Returns(0), [false])
 
 # short-circuiting any and all
 
@@ -520,6 +565,12 @@ struct NonFunctionIsZero end
 @test count(NonFunctionIsZero(), [0]) == 1
 @test count(NonFunctionIsZero(), [1]) == 0
 
+@test count(Iterators.repeated(true, 3), init=0x04) === 0x07
+@test count(!=(2), Iterators.take(1:7, 3), init=Int32(0)) === Int32(2)
+@test count(identity, [true, false], init=Int8(5)) === Int8(6)
+@test count(!, [true false; false true], dims=:, init=Int16(0)) === Int16(2)
+@test isequal(count(identity, [true false; false true], dims=2, init=UInt(4)), reshape(UInt[5, 5], 2, 1))
+
 ## cumsum, cummin, cummax
 
 z = rand(10^6)
@@ -556,14 +607,22 @@ end
 # issue #18695
 test18695(r) = sum( t^2 for t in r )
 @test @inferred(test18695([1.0,2.0,3.0,4.0])) == 30.0
-@test_throws ArgumentError test18695(Any[])
+@test_throws str -> ( occursin("reducing over an empty", str) &&
+                      occursin("consider supplying `init`", str) &&
+                     !occursin("or defining", str)) test18695(Any[])
+
+# For Core.IntrinsicFunction
+@test_throws str -> ( occursin("reducing over an empty", str) &&
+                      occursin("consider supplying `init`", str) &&
+                     !occursin("or defining", str)) reduce(Base.xor_int, Int[])
 
 # issue #21107
 @test foldr(-,2:2) == 2
 
 # test neutral element not picked incorrectly for &, |
 @test @inferred(foldl(&, Int[1])) === 1
-@test_throws ArgumentError foldl(&, Int[])
+@test_throws ["reducing over an empty",
+              "consider supplying `init`"] foldl(&, Int[])
 
 # prod on Chars
 @test prod(Char[]) == ""
@@ -571,14 +630,14 @@ test18695(r) = sum( t^2 for t in r )
 @test prod(Char['a','b']) == "ab"
 
 @testset "optimized reduce(vcat/hcat, A) for arrays" begin
-    for args in ([1:2], [[1, 2]], [1:2, 3:4], [[3, 4, 5], 1:2], [1:2, [3.5, 4.5]],
+    for args in ([1:2], [[1, 2]], [1:2, 3:4], AbstractVector{Int}[[3, 4, 5], 1:2], AbstractVector[1:2, [3.5, 4.5]],
                  [[1 2], [3 4; 5 6]], [reshape([1, 2], 2, 1), 3:4])
         X = reduce(vcat, args)
         Y = vcat(args...)
         @test X == Y
         @test typeof(X) === typeof(Y)
     end
-    for args in ([1:2], [[1, 2]], [1:2, 3:4], [[3, 4, 5], 1:3], [1:2, [3.5, 4.5]],
+    for args in ([1:2], [[1, 2]], [1:2, 3:4], AbstractVector{Int}[[3, 4, 5], 1:3], AbstractVector[1:2, [3.5, 4.5]],
                  [[1 2; 3 4], [5 6; 7 8]], [1:2, [5 6; 7 8]], [[5 6; 7 8], [1, 2]])
         X = reduce(hcat, args)
         Y = hcat(args...)
@@ -602,3 +661,19 @@ x = [j+7 for j in i]
         Iterators.flatten((1:2, 3:4)),
     ) == (1, 4)
 end
+
+# make sure we specialize on mapfoldl(::Type, ...)
+@test @inferred(mapfoldl(Int, +, [1, 2, 3]; init=0)) === 6
+
+# issue #39281
+@test @inferred(extrema(rand(2), dims=1)) isa Vector{Tuple{Float64,Float64}}
+
+# issue #38627
+@testset "overflow in mapreduce" begin
+    # at len = 16 and len = 1025 there is a change in codepath
+    for len in [0, 1, 15, 16, 1024, 1025, 2048, 2049]
+        oa = OffsetArray(repeat([1], len), typemax(Int)-len)
+        @test sum(oa) == reduce(+, oa) == len
+        @test mapreduce(+, +, oa, oa) == 2len
+    end
+end
diff --git a/test/reducedim.jl b/test/reducedim.jl
index ae5a253b220015..512c94d1e2f02a 100644
--- a/test/reducedim.jl
+++ b/test/reducedim.jl
@@ -77,13 +77,26 @@ safe_minabs(A::Array{T}, region) where {T} = safe_mapslices(minimum, abs.(A), re
     @test @inferred(maximum(abs, Areduc, dims=region)) ≈ safe_maxabs(Areduc, region)
     @test @inferred(minimum(abs, Areduc, dims=region)) ≈ safe_minabs(Areduc, region)
     @test @inferred(count(!, Breduc, dims=region)) ≈ safe_count(.!Breduc, region)
+
+    @test isequal(
+        @inferred(count(Breduc, dims=region, init=0x02)),
+        safe_count(Breduc, region) .% UInt8 .+ 0x02,
+    )
+    @test isequal(
+        @inferred(count(!, Breduc, dims=region, init=Int16(0))),
+        safe_count(.!Breduc, region) .% Int16,
+    )
 end
 
 # Combining dims and init
 A = Array{Int}(undef, 0, 3)
-@test_throws ArgumentError maximum(A; dims=1)
+@test_throws "reducing over an empty collection is not allowed" maximum(A; dims=1)
 @test maximum(A; dims=1, init=-1) == reshape([-1,-1,-1], 1, 3)
 
+@test maximum(zeros(0, 2); dims=1, init=-1) == fill(-1, 1, 2)
+@test minimum(zeros(0, 2); dims=1, init=1) == ones(1, 2)
+@test extrema(zeros(0, 2); dims=1, init=(1, -1)) == fill((1, -1), 1, 2)
+
 # Test reduction along first dimension; this is special-cased for
 # size(A, 1) >= 16
 Breduc = rand(64, 3)
@@ -160,8 +173,9 @@ end
     A = Matrix{Int}(undef, 0,1)
     @test sum(A) === 0
     @test prod(A) === 1
-    @test_throws ArgumentError minimum(A)
-    @test_throws ArgumentError maximum(A)
+    @test_throws ["reducing over an empty",
+                  "consider supplying `init`"] minimum(A)
+    @test_throws "consider supplying `init`" maximum(A)
 
     @test isequal(sum(A, dims=1), zeros(Int, 1, 1))
     @test isequal(sum(A, dims=2), zeros(Int, 0, 1))
@@ -173,9 +187,9 @@ end
     @test isequal(prod(A, dims=3), fill(1, 0, 1))
 
     for f in (minimum, maximum)
-        @test_throws ArgumentError f(A, dims=1)
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=1)
         @test isequal(f(A, dims=2), zeros(Int, 0, 1))
-        @test_throws ArgumentError f(A, dims=(1, 2))
+        @test_throws "reducing over an empty collection is not allowed" f(A, dims=(1, 2))
         @test isequal(f(A, dims=3), zeros(Int, 0, 1))
     end
     for f in (findmin, findmax)
@@ -186,6 +200,7 @@ end
     end
 
 end
+
 ## findmin/findmax/minimum/maximum
 
 A = [1.0 5.0 6.0;
@@ -210,6 +225,39 @@ for (tup, rval, rind) in [((1,), [5.0 5.0 6.0], [CartesianIndex(2,1) CartesianIn
     @test isequal(maximum!(copy(rval), A, init=false), rval)
 end
 
+@testset "missing in findmin/findmax" begin
+    B = [1.0 missing NaN;
+         5.0 NaN missing]
+    for (tup, rval, rind) in [(1, [5.0 missing missing], [CartesianIndex(2, 1) CartesianIndex(1, 2) CartesianIndex(2, 3)]),
+                              (2, [missing; missing],    [CartesianIndex(1, 2) CartesianIndex(2, 3)] |> permutedims)]
+        (rval′, rind′) = findmax(B, dims=tup)
+        @test all(rval′ .=== rval)
+        @test all(rind′ .== rind)
+        @test all(maximum(B, dims=tup) .=== rval)
+    end
+
+    for (tup, rval, rind) in [(1, [1.0 missing missing], [CartesianIndex(1, 1) CartesianIndex(1, 2) CartesianIndex(2, 3)]),
+                              (2, [missing; missing],    [CartesianIndex(1, 2) CartesianIndex(2, 3)] |> permutedims)]
+        (rval′, rind′) = findmin(B, dims=tup)
+        @test all(rval′ .=== rval)
+        @test all(rind′ .== rind)
+        @test all(minimum(B, dims=tup) .=== rval)
+    end
+end
+
+@testset "reducedim_init min/max unorderable handling" begin
+    x = Any[1.0, NaN]
+    y = [1, missing]
+    for (v, rval1, rval2) in [(x, [NaN], x),
+                              (y, [missing], y),
+                              (Any[1. NaN; 1. 1.], Any[1. NaN], Any[NaN, 1.])]
+        for f in (minimum, maximum)
+            @test all(f(v, dims=1) .=== rval1)
+            @test all(f(v, dims=2) .=== rval2)
+        end
+    end
+end
+
 #issue #23209
 
 A = [1.0 3.0 6.0;
@@ -401,8 +449,8 @@ end
 
 @testset "argmin/argmax" begin
     B = reshape(3^3:-1:1, (3, 3, 3))
-    @test B[argmax(B, dims=[2, 3])] == maximum(B, dims=[2, 3])
-    @test B[argmin(B, dims=[2, 3])] == minimum(B, dims=[2, 3])
+    @test B[argmax(B, dims=[2, 3])] == @inferred(maximum(B, dims=[2, 3]))
+    @test B[argmin(B, dims=[2, 3])] == @inferred(minimum(B, dims=[2, 3]))
 end
 
 @testset "in-place reductions with mismatched dimensionalities" begin
@@ -410,19 +458,52 @@ end
     for R in (fill(0, 4), fill(0, 4, 1), fill(0, 4, 1, 1))
         @test @inferred(maximum!(R, B)) == reshape(21:24, size(R))
         @test @inferred(minimum!(R, B)) == reshape(1:4, size(R))
+        @test @inferred(extrema!(fill((0,0), size(R)), B)) == reshape(tuple.(1:4, 21:24), size(R))
     end
     for R in (fill(0, 1, 3), fill(0, 1, 3, 1))
         @test @inferred(maximum!(R, B)) == reshape(16:4:24, size(R))
         @test @inferred(minimum!(R, B)) == reshape(1:4:9, size(R))
+        @test @inferred(extrema!(fill((0,0), size(R)), B)) == reshape(tuple.(1:4:9, 16:4:24), size(R))
+    end
+    for (ini, f!) in zip((0,0,(0,0)), (maximum!, minimum!, extrema!))
+        @test_throws DimensionMismatch f!(fill(ini, 4, 1, 1, 1), B)
+        @test_throws DimensionMismatch f!(fill(ini, 1, 3, 1, 1), B)
+        @test_throws DimensionMismatch f!(fill(ini, 1, 1, 2, 1), B)
     end
-    @test_throws DimensionMismatch maximum!(fill(0, 4, 1, 1, 1), B)
-    @test_throws DimensionMismatch minimum!(fill(0, 4, 1, 1, 1), B)
-    @test_throws DimensionMismatch maximum!(fill(0, 1, 3, 1, 1), B)
-    @test_throws DimensionMismatch minimum!(fill(0, 1, 3, 1, 1), B)
-    @test_throws DimensionMismatch maximum!(fill(0, 1, 1, 2, 1), B)
-    @test_throws DimensionMismatch minimum!(fill(0, 1, 1, 2, 1), B)
 end
 
+function unordered_test_for_extrema(a; dims_test = ((), 1, 2, (1,2), 3))
+    for dims in dims_test
+        vext = extrema(a; dims)
+        vmin, vmax = minimum(a; dims), maximum(a; dims)
+        @test isequal(extrema!(copy(vext), a), vext)
+        @test all(x -> isequal(x[1], x[2:3]), zip(vext,vmin,vmax))
+    end
+end
+@testset "0.0,-0.0 test for extrema with dims" begin
+    @test extrema([-0.0;0.0], dims = 1)[1] === (-0.0,0.0)
+    @test tuple(extrema([-0.0;0.0], dims = 2)...) === ((-0.0, -0.0), (0.0, 0.0))
+end
+@testset "NaN/missing test for extrema with dims #43599" begin
+    for sz = (3, 10, 100)
+        for T in (Int, Float64, BigFloat)
+            Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
+            Aₘ[rand(1:sz*sz, sz)] .= missing
+            unordered_test_for_extrema(Aₘ)
+            if T <: AbstractFloat
+                Aₙ = map(i -> ismissing(i) ? T(NaN) : i, Aₘ)
+                unordered_test_for_extrema(Aₙ)
+                p = rand(1:sz*sz, sz)
+                Aₘ[p] .= NaN
+                unordered_test_for_extrema(Aₘ)
+            end
+        end
+    end
+end
+@test_broken minimum([missing;BigInt(1)], dims = 1)
+@test_broken maximum([missing;BigInt(1)], dims = 1)
+@test_broken extrema([missing;BigInt(1)], dims = 1)
+
 # issue #26709
 @testset "dimensional reduce with custom non-bitstype types" begin
     struct Variable
@@ -446,3 +527,31 @@ end
     @test_throws TypeError count([1], dims=1)
     @test_throws TypeError count!([1], [1])
 end
+
+@test @inferred(count(false:true, dims=:, init=0x0004)) === 0x0005
+@test @inferred(count(isodd, reshape(1:9, 3, 3), dims=:, init=Int128(0))) === Int128(5)
+
+@testset "reduced_index for BigInt (issue #39995)" begin
+    for T in [Int8, Int16, Int32, Int64, Int128, BigInt]
+        r = T(1):T(2)
+        ax = axes(r, 1)
+        axred = Base.reduced_index(ax)
+        @test axred == Base.OneTo(1)
+        @test typeof(axred) === typeof(ax)
+        r_red = reduce(+, r, dims = 1)
+        @test eltype(r_red) == T
+        @test r_red == [3]
+    end
+end
+
+@testset "type stability (issue #43461)" begin
+    @test (@inferred maximum(Float64, reshape(1:4,2,:); dims = 2)) == reshape([3,4],2,1)
+end
+
+@testset "Min/Max initialization test" begin
+    A = Vector{Union{Missing,Int}}(1:4)
+    A[2] = missing
+    @test_broken @inferred(minimum(exp, A; dims = 1))[1] === missing
+    @test_broken @inferred(maximum(exp, A; dims = 1))[1] === missing
+    @test_broken @inferred(extrema(exp, A; dims = 1))[1] === (missing, missing)
+end
diff --git a/test/reflection.jl b/test/reflection.jl
index ea54b833aeef08..10973f46793805 100644
--- a/test/reflection.jl
+++ b/test/reflection.jl
@@ -113,6 +113,9 @@ not_const = 1
 
 @test ismutable(1) == false
 @test ismutable([]) == true
+@test ismutabletype(Int) == false
+@test ismutabletype(Vector{Any}) == true
+@test ismutabletype(Union{Int, Vector{Any}}) == false
 
 ## find bindings tests
 @test ccall(:jl_get_module_of_binding, Any, (Any, Any), Base, :sin)==Base
@@ -194,9 +197,9 @@ let
     @test TestMod7648.TestModSub9475 == which(@__MODULE__, :a9475)
 end
 
-@test_throws ArgumentError("argument is not a generic function") which(===, Tuple{Int, Int})
-@test_throws ArgumentError("argument is not a generic function") code_typed(===, Tuple{Int, Int})
-@test_throws ArgumentError("argument is not a generic function") Base.return_types(===, Tuple{Int, Int})
+@test which(===, Tuple{Int, Int}) isa Method
+@test length(code_typed(===, Tuple{Int, Int})) === 1
+@test only(Base.return_types(===, Tuple{Int, Int})) === Any
 
 module TestingExported
 using Test
@@ -221,7 +224,7 @@ let ex = :(a + b)
 end
 foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N} = nothing
 @test startswith(string(first(methods(foo13825))),
-                 "foo13825(::Array{T, N}, ::Array, ::Vector{T} where T)")
+                 "foo13825(::Array{T, N}, ::Array, ::Vector) where {T, N} in")
 
 mutable struct TLayout
     x::Int8
@@ -542,7 +545,6 @@ end
 # code_typed_by_type
 @test Base.code_typed_by_type(Tuple{Type{<:Val}})[1][2] == Val
 @test Base.code_typed_by_type(Tuple{typeof(sin), Float64})[1][2] === Float64
-@test_throws ErrorException("signature does not correspond to a generic function") Base.code_typed_by_type(Tuple{Any})
 
 # New reflection methods in 0.6
 struct ReflectionExample{T<:AbstractFloat, N}
@@ -880,6 +882,7 @@ _test_at_locals2(1,1,0.5f0)
     _dump_function(f31687_parent, Tuple{},
                    #=native=#false, #=wrapper=#false, #=strip=#false,
                    #=dump_module=#true, #=syntax=#:att, #=optimize=#false, :none,
+                   #=binary=#false,
                    params)
 end
 
@@ -932,3 +935,64 @@ end
     @test f !== Core._apply
     @test occursin("f2#", String(nameof(f)))
 end
+
+
+@testset "code_typed(; world)" begin
+    mod = @eval module $(gensym()) end
+
+    @eval mod foo() = 1
+    world1 = Base.get_world_counter()
+    @test only(code_typed(mod.foo, ())).second == Int
+    @test only(code_typed(mod.foo, (); world=world1)).second == Int
+
+    @eval mod foo() = 2.
+    world2 = Base.get_world_counter()
+    @test only(code_typed(mod.foo, ())).second == Float64
+    @test only(code_typed(mod.foo, (); world=world1)).second == Int
+    @test only(code_typed(mod.foo, (); world=world2)).second == Float64
+end
+
+@testset "default_tt" begin
+    m = Module()
+    @eval m f1() = return
+    @test Base.default_tt(m.f1) == Tuple{}
+    @eval m f2(a) = return
+    @test Base.default_tt(m.f2) == Tuple{Any}
+    @eval m f3(a::Integer) = return
+    @test Base.default_tt(m.f3) == Tuple{Integer}
+    @eval m f4() = return
+    @eval m f4(a) = return
+    @test Base.default_tt(m.f4) == Tuple
+end
+
+Base.@assume_effects :terminates_locally function issue41694(x::Int)
+    res = 1
+    1 < x < 20 || throw("bad")
+    while x > 1
+        res *= x
+        x -= 1
+    end
+    return res
+end
+maybe_effectful(x::Int) = 42
+maybe_effectful(x::Any) = unknown_operation()
+function f_no_methods end
+
+@testset "infer_effects" begin
+    @test Base.infer_effects(issue41694, (Int,)) |> Core.Compiler.is_terminates
+    @test Base.infer_effects((Int,)) do x
+        issue41694(x)
+    end |> Core.Compiler.is_terminates
+    @test Base.infer_effects(issue41694) |> Core.Compiler.is_terminates # use `default_tt`
+    let effects = Base.infer_effects(maybe_effectful, (Any,)) # union split
+        @test !Core.Compiler.is_consistent(effects)
+        @test !Core.Compiler.is_effect_free(effects)
+        @test !Core.Compiler.is_nothrow(effects)
+        @test !Core.Compiler.is_terminates(effects)
+        @test !Core.Compiler.is_nonoverlayed(effects)
+    end
+    @test Base.infer_effects(f_no_methods) |> !Core.Compiler.is_nothrow
+    # builtins
+    @test Base.infer_effects(typeof, (Any,)) |> Core.Compiler.is_total
+    @test Base.infer_effects(===, (Any,Any)) |> Core.Compiler.is_total
+end
diff --git a/test/regex.jl b/test/regex.jl
index 4ee165a28bb7a2..0202dc4758e2fb 100644
--- a/test/regex.jl
+++ b/test/regex.jl
@@ -34,11 +34,6 @@
     @test map(m -> m.match, eachmatch(r"(\p{L}+)", "Tú lees.")) == ["Tú", "lees"]
     @test map(m -> m.match, eachmatch(r"(\p{L}+)", "¿Cuál es tu pregunta?")) == ["Cuál", "es", "tu", "pregunta"]
 
-    # Issue 9545 (32 bit)
-    buf = PipeBuffer()
-    show(buf, r"")
-    @test read(buf, String) == "r\"\""
-
     # see #10994, #11447: PCRE2 allows NUL chars in the pattern
     @test occursin(Regex("^a\0b\$"), "a\0b")
 
@@ -52,17 +47,39 @@
     subst = s"FROM: \g<name>\n MESSAGE: \1"
     @test replace(msg, re => subst) == "FROM: Julia\n MESSAGE: Hello"
 
+    # Issue #9545 (32 bit)
+    @test repr(r"") == "r\"\""
+    # Issue #36550
+    @test repr(s"\x") == raw"s\"\x\""
+    @test repr(s"\\x") == raw"s\"\\x\""
+    @test repr(s"\\\x") == raw"s\"\\\x\""
+    @test repr(s"x\\") == raw"s\"x\\\\\""
+    @test repr(s"a\1b") == raw"s\"a\1b\""
+    # Issue #29580
+    @test repr(r"\\\"") == raw"r\"\\\\\\\"\""
+    @test repr(s"\\\"\\") == raw"s\"\\\\\\\"\\\\\""
+
     # findall
     @test findall(r"\w+", "foo bar") == [1:3, 5:7]
     @test findall(r"\w+", "foo bar", overlap=true) == [1:3, 2:3, 3:3, 5:7, 6:7, 7:7]
     @test all(findall(r"\w*", "foo bar") .=== [1:3, 4:3, 5:7, 8:7]) # use === to compare empty ranges
     @test all(findall(r"\b", "foo bar") .=== [1:0, 4:3, 5:4, 8:7])  # use === to compare empty ranges
+    # with Char as argument
+    @test findall('a', "batman") == [2, 5]
+    @test findall('→', "OH⁻ + H₃CBr →  HOH₃CBr⁻ → HOCH₃ + Br⁻") == [17, 35]
+    @test findall('a', "") == Int[]
+    @test findall('c', "batman") == Int[]
 
     # count
     @test count(r"\w+", "foo bar") == 2
     @test count(r"\w+", "foo bar", overlap=true) == 6
     @test count(r"\w*", "foo bar") == 4
     @test count(r"\b", "foo bar") == 4
+    # count with char as argument
+    @test count('a', "batman") == 2
+    @test count('a', "aaa", overlap=true) == 3
+    @test count('a', "") == 0
+    @test count('→', "OH⁻ + H₃CBr →  (HOH₃CBr⁻)† → HOCH₃ + Br⁻") == 2
 
     # Unnamed subpatterns
     let m = match(r"(.)(.)(.)", "xyz")
@@ -81,6 +98,18 @@
         @test !haskey(m, "foo")
         @test (m[:a], m[2], m["b"]) == ("x", "y", "z")
         @test sprint(show, m) == "RegexMatch(\"xyz\", a=\"x\", 2=\"y\", b=\"z\")"
+        @test keys(m) == ["a", 2, "b"]
+    end
+
+    # Unicode named subpatterns and property mixes of scripts and classes (issues #35322/#35459 and #40231)
+    let m = match(r"(?<numéro>\d)[\pZs]*(?<文本>[\p{Han}\p{P}]+)", "1 孔生雪笠，聖裔也。為人蘊藉，工詩。")
+        @test haskey(m, :numéro)
+        @test haskey(m, "文本")
+        @test !haskey(m, "ゑ")
+        @test (m[:numéro], m[:文本]) == ("1", "孔生雪笠，聖裔也。為人蘊藉，工詩。")
+        @test (m[1], m[2]) == (m[:numéro], m[:文本])
+        @test sprint(show, m) == "RegexMatch(\"1 孔生雪笠，聖裔也。為人蘊藉，工詩。\", numéro=\"1\", 文本=\"孔生雪笠，聖裔也。為人蘊藉，工詩。\")"
+        @test keys(m) == ["numéro", "文本"]
     end
 
     # Backcapture reference in substitution string
@@ -149,6 +178,24 @@
         @test r"this|that"^2 == r"(?:this|that){2}"
     end
 
+    @testset "iterate" begin
+        m = match(r"(.) test (.+)", "a test 123")
+        @test first(m) == "a"
+        @test collect(m) == ["a", "123"]
+        for (i, capture) in enumerate(m)
+            i == 1 && @test capture == "a"
+            i == 2 && @test capture == "123"
+        end
+    end
+
+    @testset "Destructuring dispatch" begin
+        handle(::Nothing) = "not found"
+        handle((capture,)::RegexMatch) = "found $capture"
+
+        @test handle(match(r"a (\d)", "xyz")) == "not found"
+        @test handle(match(r"a (\d)", "a 1")) == "found 1"
+    end
+
     # Test that PCRE throws the correct kind of error
     # TODO: Uncomment this once the corresponding change has propagated to CI
     #@test_throws ErrorException Base.PCRE.info(C_NULL, Base.PCRE.INFO_NAMECOUNT, UInt32)
diff --git a/test/reinterpretarray.jl b/test/reinterpretarray.jl
index c6990066c14236..e623b407f70a69 100644
--- a/test/reinterpretarray.jl
+++ b/test/reinterpretarray.jl
@@ -3,36 +3,141 @@
 using Test
 isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
 using .Main.OffsetArrays
+isdefined(Main, :TSlow) || @eval Main include("testhelpers/arrayindexingtypes.jl")
+using .Main: TSlow, WrapperArray
 
 A = Int64[1, 2, 3, 4]
+As = TSlow(A)
+Ars = Int64[1 3; 2 4]
+Arss = TSlow(Ars)
 B = Complex{Int64}[5+6im, 7+8im, 9+10im]
+Bs = TSlow(B)
+Av = [Int32[1,2], Int32[3,4]]
+
+for Ar in (Ars, Arss)
+    @test @inferred(ndims(reinterpret(reshape, Complex{Int64}, Ar))) == 1
+    @test @inferred(axes(reinterpret(reshape, Complex{Int64}, Ar))) === (Base.OneTo(2),)
+    @test @inferred(size(reinterpret(reshape, Complex{Int64}, Ar))) == (2,)
+end
+for _B in (B, Bs)
+    @test @inferred(ndims(reinterpret(reshape, Int64, _B))) == 2
+    @test @inferred(axes(reinterpret(reshape, Int64, _B))) === (Base.OneTo(2), Base.OneTo(3))
+    @test @inferred(size(reinterpret(reshape, Int64, _B))) == (2, 3)
+    @test @inferred(ndims(reinterpret(reshape, Int128, _B))) == 1
+    @test @inferred(axes(reinterpret(reshape, Int128, _B))) === (Base.OneTo(3),)
+    @test @inferred(size(reinterpret(reshape, Int128, _B))) == (3,)
+end
+
+@test_throws ArgumentError("cannot reinterpret `Int64` as `Vector{Int64}`, type `Vector{Int64}` is not a bits type") reinterpret(Vector{Int64}, A)
+@test_throws ArgumentError("cannot reinterpret `Vector{Int32}` as `Int32`, type `Vector{Int32}` is not a bits type") reinterpret(Int32, Av)
+@test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int64` array to `Int32` which is of a different size") reinterpret(Int32, reshape([Int64(0)]))
+@test_throws ArgumentError("cannot reinterpret a zero-dimensional `Int32` array to `Int64` which is of a different size") reinterpret(Int64, reshape([Int32(0)]))
+@test_throws ArgumentError("""cannot reinterpret an `$Int` array to `Tuple{$Int, $Int}` whose first dimension has size `5`.
+                              The resulting array would have non-integral first dimension.
+                              """) reinterpret(Tuple{Int,Int}, [1,2,3,4,5])
+
+@test_throws ArgumentError("`reinterpret(reshape, Complex{Int64}, a)` where `eltype(a)` is Int64 requires that `axes(a, 1)` (got Base.OneTo(4)) be equal to 1:2 (from the ratio of element sizes)") reinterpret(reshape, Complex{Int64}, A)
+@test_throws ArgumentError("`reinterpret(reshape, T, a)` requires that one of `sizeof(T)` (got 24) and `sizeof(eltype(a))` (got 16) be an integer multiple of the other") reinterpret(reshape, NTuple{3, Int64}, B)
+@test_throws ArgumentError("cannot reinterpret `Int64` as `Vector{Int64}`, type `Vector{Int64}` is not a bits type") reinterpret(reshape, Vector{Int64}, Ars)
+@test_throws ArgumentError("cannot reinterpret a zero-dimensional `UInt8` array to `UInt16` which is of a larger size") reinterpret(reshape, UInt16, reshape([0x01]))
+
 # getindex
-@test reinterpret(Complex{Int64}, A) == [1 + 2im, 3 + 4im]
-@test reinterpret(Float64, A) == reinterpret.(Float64, A)
+for _A in (A, As)
+    @test reinterpret(Complex{Int64}, _A) == [1 + 2im, 3 + 4im]
+    @test reinterpret(Float64, _A) == reinterpret.(Float64, A)
+    @test reinterpret(reshape, Float64, _A) == reinterpret.(Float64, A)
+end
+for Ar in (Ars, Arss)
+    @test reinterpret(reshape, Complex{Int64}, Ar) == [1 + 2im, 3 + 4im]
+    @test reinterpret(reshape, Float64, Ar) == reinterpret.(Float64, Ars)
+end
 
-@test reinterpret(NTuple{3, Int64}, B) == [(5,6,7),(8,9,10)]
+for _B in (B, Bs)
+    @test reinterpret(NTuple{3, Int64}, _B) == [(5,6,7),(8,9,10)]
+    @test reinterpret(reshape, Int64, _B) == [5 7 9; 6 8 10]
+end
 
 # setindex
-let Ac = copy(A), Bc = copy(B)
-    reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im
-    @test Ac == [1, 2, -1, -2]
-    reinterpret(NTuple{3, Int64}, Bc)[2] = (4,5,6)
-    @test Bc == Complex{Int64}[5+6im, 7+4im, 5+6im]
-    reinterpret(NTuple{3, Int64}, Bc)[1] = (1,2,3)
-    @test Bc == Complex{Int64}[1+2im, 3+4im, 5+6im]
+for (_A, Ar, _B) in ((A, Ars, B), (As, Arss, Bs))
+    let Ac = copy(_A), Arsc = copy(Ar), Bc = copy(_B)
+        reinterpret(Complex{Int64}, Ac)[2] = -1 - 2im
+        @test Ac == [1, 2, -1, -2]
+        reinterpret(Complex{Int64}, Arsc)[2] = -1 - 2im
+        @test Arsc == [1 -1; 2 -2]
+        reinterpret(NTuple{3, Int64}, Bc)[2] = (4,5,6)
+        @test Bc == Complex{Int64}[5+6im, 7+4im, 5+6im]
+        B2 = reinterpret(NTuple{3, Int64}, Bc)
+        @test setindex!(B2, (1,2,3), 1) === B2
+        @test Bc == Complex{Int64}[1+2im, 3+4im, 5+6im]
+        Bc = copy(_B)
+        Brrs = reinterpret(reshape, Int64, Bc)
+        @test setindex!(Brrs, -5, 2, 3) === Brrs
+        @test Bc == Complex{Int64}[5+6im, 7+8im, 9-5im]
+        Brrs[last(eachindex(Brrs))] = 22
+        @test Bc == Complex{Int64}[5+6im, 7+8im, 9+22im]
 
-    A1 = reinterpret(Float64, A)
-    A2 = reinterpret(ComplexF64, A)
-    A1[1] = 1.0
-    @test real(A2[1]) == 1.0
+        A1 = reinterpret(Float64, _A)
+        A2 = reinterpret(ComplexF64, _A)
+        @test setindex!(A1, 1.0, 1) === A1
+        @test real(A2[1]) == 1.0
+        A1 = reinterpret(reshape, Float64, _A)
+        @test setindex!(A1, 2.5, 1) === A1
+        @test reinterpret(Float64, _A[1]) == 2.5
+        A1rs = reinterpret(Float64, Ar)
+        A2rs = reinterpret(ComplexF64, Ar)
+        @test setindex!(A1rs, 1.0, 1, 1) === A1rs
+        @test real(A2rs[1]) == 1.0
+        A1rs = reinterpret(reshape, Float64, Ar)
+        A2rs = reinterpret(reshape, ComplexF64, Ar)
+        @test setindex!(A1rs, 2.5, 1, 1) === A1rs
+        @test real(A2rs[1]) == 2.5
+    end
 end
+A3 = collect(reshape(1:18, 2, 3, 3))
+A3r = reinterpret(reshape, Complex{Int}, A3)
+@test A3r[4] === A3r[1,2] === A3r[CartesianIndex(1, 2)] === 7+8im
+A3r[2,3] = -8-15im
+@test A3[1,2,3] == -8
+@test A3[2,2,3] == -15
+A3r[4] = 100+200im
+@test A3[1,1,2] == 100
+@test A3[2,1,2] == 200
+A3r[CartesianIndex(1,2)] = 300+400im
+@test A3[1,1,2] == 300
+@test A3[2,1,2] == 400
 
 # same-size reinterpret where one of the types is non-primitive
-let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)]
-    @test reinterpret(Float32, a)[1] == reinterpret(Float32, 0x04030201)
-    reinterpret(Float32, a)[1] = 2.0
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(Float32, a)
+    @test ra[1] == reinterpret(Float32, 0x04030201)
+    @test setindex!(ra, 2.0) === ra
     @test reinterpret(Float32, a)[1] == 2.0
 end
+let a = NTuple{4,UInt8}[(0x01,0x02,0x03,0x04)], ra = reinterpret(reshape, Float32, a)
+    @test ra[1] == reinterpret(Float32, 0x04030201)
+    @test setindex!(ra, 2.0) === ra
+    @test reinterpret(reshape, Float32, a)[1] == 2.0
+end
+
+# Pass-through indexing
+B = Complex{Int64}[5+6im, 7+8im, 9+10im]
+Br = reinterpret(reshape, Int64, B)
+W = WrapperArray(Br)
+for (b, w) in zip(5:10, W)
+    @test b == w
+end
+for (i, j) in zip(eachindex(W), 11:16)
+    W[i] = j
+end
+@test B[1] === Complex{Int64}(11+12im)
+@test B[2] === Complex{Int64}(13+14im)
+@test B[3] === Complex{Int64}(15+16im)
+z3 = (0x00, 0x00, 0x00)
+Az = [z3 z3; z3 z3]
+Azr = reinterpret(reshape, UInt8, Az)
+W = WrapperArray(Azr)
+copyto!(W, fill(0x01, 3, 2, 2))
+@test all(isequal((0x01, 0x01, 0x01)), Az)
+@test eachindex(W, W) == eachindex(W)
 
 # ensure that reinterpret arrays aren't erroneously classified as strided
 let A = reshape(1:20, 5, 4)
@@ -52,12 +157,62 @@ let A = collect(reshape(1:20, 5, 4))
     @test reshape(R, :) isa StridedArray
 end
 
-# and ensure a reinterpret array containing a strided array can have strides computed
-let A = view(reinterpret(Int16, collect(reshape(UnitRange{Int64}(1, 20), 5, 4))), :, 1:2)
-    R = reinterpret(Int32, A)
-    @test strides(R) == (1, 10)
-    @test stride(R, 1) == 1
-    @test stride(R, 2) == 10
+function check_strides(A::AbstractArray)
+    # Make sure stride(A, i) is equivalent with strides(A)[i] (if 1 <= i <= ndims(A))
+    dims = ntuple(identity, ndims(A))
+    map(i -> stride(A, i), dims) == strides(A) || return false
+    # Test strides via value check.
+    for i in eachindex(IndexLinear(), A)
+        A[i] === Base.unsafe_load(pointer(A, i)) || return false
+    end
+    return true
+end
+
+@testset "strides for NonReshapedReinterpretArray" begin
+    A = Array{Int32}(reshape(1:88, 11, 8))
+    for viewax2 in (1:8, 1:2:6, 7:-1:1, 5:-2:1, 2:3:8, 7:-6:1, 3:5:11)
+        # dim1 is contiguous
+        for T in (Int16, Float32)
+            @test check_strides(reinterpret(T, view(A, 1:8, viewax2)))
+        end
+        if mod(step(viewax2), 2) == 0
+            @test check_strides(reinterpret(Int64, view(A, 1:8, viewax2)))
+        else
+            @test_throws "Parent's strides" strides(reinterpret(Int64, view(A, 1:8, viewax2)))
+        end
+        # non-integer-multipled classified
+        if mod(step(viewax2), 3) == 0
+            @test check_strides(reinterpret(NTuple{3,Int16}, view(A, 2:7, viewax2)))
+        else
+            @test_throws "Parent's strides" strides(reinterpret(NTuple{3,Int16}, view(A, 2:7, viewax2)))
+        end
+        if mod(step(viewax2), 5) == 0
+            @test check_strides(reinterpret(NTuple{5,Int16}, view(A, 2:11, viewax2)))
+        else
+            @test_throws "Parent's strides" strides(reinterpret(NTuple{5,Int16}, view(A, 2:11, viewax2)))
+        end
+        # dim1 is not contiguous
+        for T in (Int16, Int64)
+            @test_throws "Parent must" strides(reinterpret(T, view(A, 8:-1:1, viewax2)))
+        end
+        @test check_strides(reinterpret(Float32, view(A, 8:-1:1, viewax2)))
+    end
+end
+
+@testset "strides for ReshapedReinterpretArray" begin
+    A = Array{Int32}(reshape(1:192, 3, 8, 8))
+    for viewax1 in (1:8, 1:2:8, 8:-1:1, 8:-2:1), viewax2 in (1:2, 4:-1:1)
+        for T in (Int16, Float32)
+            @test check_strides(reinterpret(reshape, T, view(A, 1:2, viewax1, viewax2)))
+            @test check_strides(reinterpret(reshape, T, view(A, 1:2:3, viewax1, viewax2)))
+        end
+        if mod(step(viewax1), 2) == 0
+            @test check_strides(reinterpret(reshape, Int64, view(A, 1:2, viewax1, viewax2)))
+        else
+            @test_throws "Parent's strides" strides(reinterpret(reshape, Int64, view(A, 1:2, viewax1, viewax2)))
+        end
+        @test_throws "Parent must" strides(reinterpret(reshape, Int64, view(A, 1:2:3, viewax1, viewax2)))
+    end
 end
 
 @testset "strides" begin
@@ -93,7 +248,7 @@ let a = fill(1.0, 5, 3)
         @test_throws BoundsError r[badinds...] = -2
     end
     for goodinds in (1, 15, (1,1), (5,3))
-        r[goodinds...] = -2
+        @test setindex!(r, -2, goodinds...) === r
         @test r[goodinds...] == -2
     end
     r = reinterpret(Int32, a)
@@ -106,7 +261,7 @@ let a = fill(1.0, 5, 3)
         @test_throws BoundsError r[badinds...] = -3
     end
     for goodinds in (1, 30, (1,1), (10,3))
-        r[goodinds...] = -3
+        @test setindex!(r, -3, goodinds...) === r
         @test r[goodinds...] == -3
     end
     r = reinterpret(Int64, view(a, 1:2:5, :))
@@ -119,7 +274,7 @@ let a = fill(1.0, 5, 3)
         @test_throws BoundsError r[badinds...] = -4
     end
     for goodinds in (1, 9, (1,1), (3,3))
-        r[goodinds...] = -4
+        @test setindex!(r, -4, goodinds...) === r
         @test r[goodinds...] == -4
     end
     r = reinterpret(Int32, view(a, 1:2:5, :))
@@ -132,9 +287,15 @@ let a = fill(1.0, 5, 3)
         @test_throws BoundsError r[badinds...] = -5
     end
     for goodinds in (1, 18, (1,1), (6,3))
-        r[goodinds...] = -5
+        @test setindex!(r, -5, goodinds...) === r
         @test r[goodinds...] == -5
     end
+
+    ar = [(1,2), (3,4)]
+    arr = reinterpret(reshape, Int, ar)
+    @test @inferred(IndexStyle(arr)) == Base.IndexSCartesian2{2}()
+    @test @inferred(eachindex(arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
+    @test @inferred(eachindex(arr, arr)) == Base.SCartesianIndices2{2}(Base.OneTo(2))
 end
 
 # Error on reinterprets that would expose padding
@@ -165,8 +326,10 @@ let a = [0.1 0.2; 0.3 0.4], at = reshape([(i,i+1) for i = 1:2:8], 2, 2)
     @test r[1,2] === reinterpret(Int64, v[1,2])
     @test r[0,3] === reinterpret(Int64, v[0,3])
     @test r[1,3] === reinterpret(Int64, v[1,3])
-    @test_throws ArgumentError("cannot reinterpret a `Float64` array to `UInt32` when the first axis is Base.IdentityUnitRange(0:1). Try reshaping first.") reinterpret(UInt32, v)
+    @test_throws ArgumentError("cannot reinterpret a `Float64` array to `UInt32` when the first axis is OffsetArrays.IdOffsetRange(0:1). Try reshaping first.") reinterpret(UInt32, v)
+    @test_throws ArgumentError("`reinterpret(reshape, Tuple{Float64, Float64}, a)` where `eltype(a)` is Float64 requires that `axes(a, 1)` (got OffsetArrays.IdOffsetRange(0:1)) be equal to 1:2 (from the ratio of element sizes)") reinterpret(reshape, Tuple{Float64,Float64}, v)
     v = OffsetArray(a, (0, 1))
+    @test axes(reinterpret(reshape, Tuple{Float64,Float64}, v)) === (OffsetArrays.IdOffsetRange(Base.OneTo(2), 1),)
     r = reinterpret(UInt32, v)
     axsv = axes(v)
     @test axes(r) === (oftype(axsv[1], 1:4), axsv[2])
@@ -184,7 +347,7 @@ let a = [0.1 0.2; 0.3 0.4], at = reshape([(i,i+1) for i = 1:2:8], 2, 2)
     offsetvt = (-2, 4)
     vt = OffsetArray(at, offsetvt)
     istr = string(Int)
-    @test_throws ArgumentError("cannot reinterpret a `Tuple{$istr, $istr}` array to `$istr` when the first axis is Base.IdentityUnitRange(-1:0). Try reshaping first.") reinterpret(Int, vt)
+    @test_throws ArgumentError("cannot reinterpret a `Tuple{$istr, $istr}` array to `$istr` when the first axis is OffsetArrays.IdOffsetRange(-1:0). Try reshaping first.") reinterpret(Int, vt)
     vt = reshape(vt, 1:1, axes(vt)...)
     r = reinterpret(Int, vt)
     @test r == OffsetArray(reshape(1:8, 2, 2, 2), (0, offsetvt...))
@@ -205,9 +368,143 @@ end
 
 # Test 0-dimensional Arrays
 A = zeros(UInt32)
-B = reinterpret(Int32,A)
-@test size(B) == ()
-@test axes(B) == ()
-B[] = Int32(5)
+B = reinterpret(Int32, A)
+Brs = reinterpret(reshape,Int32, A)
+C = reinterpret(Tuple{UInt32}, A) # non-primitive type
+Crs = reinterpret(reshape, Tuple{UInt32}, A)  # non-primitive type
+@test size(B) == size(Brs) == size(C) == size(Crs) == ()
+@test axes(B) == axes(Brs) == axes(C) == axes(Crs) == ()
+@test setindex!(B, Int32(5)) === B
 @test B[] === Int32(5)
+@test Brs[] === Int32(5)
+@test C[] === (UInt32(5),)
+@test Crs[] === (UInt32(5),)
 @test A[] === UInt32(5)
+@test setindex!(Brs, Int32(12)) === Brs
+@test A[] === UInt32(12)
+@test setindex!(C, (UInt32(7),)) === C
+@test A[] === UInt32(7)
+@test setindex!(Crs, (UInt32(3),)) === Crs
+@test A[] === UInt32(3)
+
+
+a = [(1.0,2.0)]
+af = @inferred(reinterpret(reshape, Float64, a))
+anew = @inferred(reinterpret(reshape, Tuple{Float64,Float64}, vec(af)))
+@test anew[1] == a[1]
+@test ndims(anew) == 0
+
+# re-reinterpret
+a0 = reshape([0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04], 4, 2)
+a = reinterpret(reshape, NTuple{4,UInt8}, a0)
+@test a == [(0x22, 0x44, 0x88, 0xf0), (0x01, 0x02, 0x03, 0x04)]
+@test reinterpret(UInt8, a) == [0x22, 0x44, 0x88, 0xf0, 0x01, 0x02, 0x03, 0x04]
+@test reinterpret(reshape, UInt8, a) === a0
+
+# reductions
+a = [(1,2,3), (4,5,6)]
+ars = reinterpret(reshape, Int, a)
+@test sum(ars) == 21
+@test sum(ars; dims=1) == [6 15]
+@test sum(ars; dims=2) == reshape([5,7,9], (3, 1))
+@test sum(ars; dims=(1,2)) == reshape([21], (1, 1))
+# also test large sizes for the pairwise algorithm
+a = [(k,k+1,k+2) for k = 1:3:4000]
+ars = reinterpret(reshape, Int, a)
+@test sum(ars) == 8010003
+
+@testset "similar(::ReinterpretArray)" begin
+    a = reinterpret(NTuple{2,Float64}, TSlow(rand(Float64, 4, 4)))
+
+    as = similar(a)
+    @test as isa TSlow{NTuple{2,Float64},2}
+    @test size(as) == (2, 4)
+
+    as = similar(a, Int, (3, 5, 1))
+    @test as isa TSlow{Int,3}
+    @test size(as) == (3, 5, 1)
+
+    a = reinterpret(reshape, NTuple{4,Float64}, TSlow(rand(Float64, 4, 4)))
+
+    as = similar(a)
+    @test as isa TSlow{NTuple{4,Float64},1}
+    @test size(as) == (4,)
+end
+
+
+@testset "aliasing" begin
+    a = reinterpret(NTuple{2,Float64}, rand(Float64, 4, 4))
+    @test typeof(Base.unaliascopy(a)) === typeof(a)
+    a = reinterpret(reshape, NTuple{4,Float64}, rand(Float64, 4, 4))
+    @test typeof(Base.unaliascopy(a)) === typeof(a)
+end
+
+
+@testset "singleton types" begin
+    mutable struct NotASingleton end # not a singleton because it is mutable
+    struct SomeSingleton
+        # A singleton type that does not have the internal constructor SomeSingleton()
+        SomeSingleton(x) = new()
+    end
+
+    @test_throws ErrorException reinterpret(Int, nothing)
+    @test_throws ErrorException reinterpret(Missing, 3)
+    @test_throws ErrorException reinterpret(Missing, NotASingleton())
+    @test_throws ErrorException reinterpret(NotASingleton, ())
+
+    @test_throws ArgumentError reinterpret(NotASingleton, fill(nothing, ()))
+    @test_throws ArgumentError reinterpret(reshape, NotASingleton, fill(missing, 3))
+    @test_throws ArgumentError reinterpret(Tuple{}, fill(NotASingleton(), 2))
+    @test_throws ArgumentError reinterpret(reshape, Nothing, fill(NotASingleton(), ()))
+
+    t = fill(nothing, 3, 5)
+    @test reinterpret(SomeSingleton, t) == reinterpret(reshape, SomeSingleton, t)
+    @test reinterpret(SomeSingleton, t) == [SomeSingleton(i*j) for i in 1:3, j in 1:5]
+    @test reinterpret(Int, t) == fill(17, 0, 5)
+    @test_throws ArgumentError reinterpret(reshape, Float64, t)
+    @test_throws ArgumentError reinterpret(Nothing, 1:6)
+    @test_throws ArgumentError reinterpret(reshape, Missing, [0.0])
+
+    # reintepret of empty array with reshape
+    @test reinterpret(reshape, Nothing, fill(missing, (0,0,0))) == fill(nothing, (0,0,0))
+    @test_throws ArgumentError reinterpret(reshape, Nothing, fill(3.2, (0,0)))
+    @test_throws ArgumentError reinterpret(reshape, Float64, fill(nothing, 0))
+
+    # reinterpret of 0-dimensional array
+    z = reinterpret(Tuple{}, fill(missing, ()))
+    @test z == fill((), ())
+    @test z == reinterpret(reshape, Tuple{}, fill(nothing, ()))
+    @test z[] == ()
+    @test setindex!(z, ()) === z
+    @test_throws BoundsError z[2]
+    @test_throws BoundsError z[3] = ()
+    @test_throws ArgumentError reinterpret(UInt8, fill(nothing, ()))
+    @test_throws ArgumentError reinterpret(Missing, fill(1f0, ()))
+    @test_throws ArgumentError reinterpret(reshape, Float64, fill(nothing, ()))
+    @test_throws ArgumentError reinterpret(reshape, Nothing, fill(17, ()))
+    @test_throws MethodError z[] = nothing
+
+    @test @inferred(ndims(reinterpret(reshape, SomeSingleton, t))) == 2
+    @test @inferred(axes(reinterpret(reshape, Tuple{}, t))) == (Base.OneTo(3),Base.OneTo(5))
+    @test @inferred(size(reinterpret(reshape, Missing, t))) == (3,5)
+
+    x = reinterpret(Tuple{}, t)
+    @test x == reinterpret(reshape, Tuple{}, t)
+    @test x[3,5] === ()
+    x1 = fill((), 3, 5)
+    @test setindex!(x, (), 1, 1) == x1
+    @test_throws BoundsError x[17]
+    @test_throws BoundsError x[4,2]
+    @test_throws BoundsError x[1,2,3]
+    @test_throws BoundsError x[18] = ()
+    @test_throws MethodError x[1,3] = missing
+    @test x == fill((), (3, 5))
+    x = reinterpret(reshape, SomeSingleton, t)
+    @test_throws BoundsError x[19]
+    @test_throws BoundsError x[2,6] = SomeSingleton(0xa)
+    @test x[2,3] === SomeSingleton(:x)
+    x2 = fill(SomeSingleton(0.7), 3, 5)
+    @test x == x2
+    @test setindex!(x, SomeSingleton(:), 3, 5) == x2
+    @test_throws MethodError x[2,4] = nothing
+end
diff --git a/test/rounding.jl b/test/rounding.jl
index e4c51212e81fa7..0fe1513c6c4507 100644
--- a/test/rounding.jl
+++ b/test/rounding.jl
@@ -128,6 +128,16 @@ end
             else
                 @test u === r
             end
+
+            r = round(u, RoundFromZero)
+            if isfinite(u)
+                @test isfinite(r)
+                @test isinteger(r)
+                @test signbit(u) ? (r == floor(u)) : (r == ceil(u))
+                @test signbit(u) == signbit(r)
+            else
+                @test u === r
+            end
         end
     end
 end
@@ -171,6 +181,7 @@ end
                 @test round.(y) ≈ t[(i+1+isodd(i>>2))>>2 for i in r]
                 @test broadcast(x -> round(x, RoundNearestTiesAway), y) ≈ t[(i+1+(i>=0))>>2 for i in r]
                 @test broadcast(x -> round(x, RoundNearestTiesUp), y) ≈ t[(i+2)>>2 for i in r]
+                @test broadcast(x -> round(x, RoundFromZero), y) ≈ t[(i+3*(i>=0))>>2 for i in r]
             end
         end
     end
@@ -190,6 +201,10 @@ end
     @test round(Int,-2.5,RoundNearestTiesUp) == -2
     @test round(Int,-1.5,RoundNearestTiesUp) == -1
     @test round(Int,-1.9) == -2
+    @test round(Int,nextfloat(1.0),RoundFromZero) == 2
+    @test round(Int,-nextfloat(1.0),RoundFromZero) == -2
+    @test round(Int,prevfloat(1.0),RoundFromZero) == 1
+    @test round(Int,-prevfloat(1.0),RoundFromZero) == -1
     @test_throws InexactError round(Int64, 9.223372036854776e18)
     @test       round(Int64, 9.223372036854775e18) == 9223372036854774784
     @test_throws InexactError round(Int64, -9.223372036854778e18)
diff --git a/test/runtests.jl b/test/runtests.jl
index 74ff907deb4bc9..4c9ac1cfd869c9 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -10,15 +10,26 @@ using Base: Experimental
 include("choosetests.jl")
 include("testenv.jl")
 
-tests, net_on, exit_on_error, use_revise, seed = choosetests(ARGS)
+(; tests, net_on, exit_on_error, use_revise, seed) = choosetests(ARGS)
 tests = unique(tests)
 
+if Sys.islinux()
+    const SYS_rrcall_check_presence = 1008
+    global running_under_rr() = 0 == ccall(:syscall, Int,
+        (Int, Int, Int, Int, Int, Int, Int),
+        SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0)
+else
+    global running_under_rr() = false
+end
+
 if use_revise
     using Revise
+    union!(Revise.stdlib_names, Symbol.(STDLIBS))
     # Remote-eval the following to initialize Revise in workers
     const revise_init_expr = quote
         using Revise
         const STDLIBS = $STDLIBS
+        union!(Revise.stdlib_names, Symbol.(STDLIBS))
         revise_trackall()
     end
 end
@@ -63,6 +74,7 @@ end
 move_to_node1("precompile")
 move_to_node1("SharedArrays")
 move_to_node1("threads")
+move_to_node1("Distributed")
 # Ensure things like consuming all kernel pipe memory doesn't interfere with other tests
 move_to_node1("stress")
 
@@ -79,8 +91,26 @@ prepend!(tests, linalg_tests)
 
 import LinearAlgebra
 cd(@__DIR__) do
+    # `net_on` implies that we have access to the loopback interface which is
+    # necessary for Distributed multi-processing. There are some test
+    # environments that do not allow access to loopback, so we must disable
+    # addprocs when `net_on` is false. Note that there exist build environments,
+    # including Nix, where `net_on` is false but we still have access to the
+    # loopback interface. It would be great to make this check more specific to
+    # identify those situations somehow. See
+    #   * https://github.com/JuliaLang/julia/issues/6722
+    #   * https://github.com/JuliaLang/julia/pull/29384
+    #   * https://github.com/JuliaLang/julia/pull/40348
     n = 1
-    if net_on
+    JULIA_TEST_USE_MULTIPLE_WORKERS = get(ENV, "JULIA_TEST_USE_MULTIPLE_WORKERS", "") |>
+                                      strip |>
+                                      lowercase |>
+                                      s -> tryparse(Bool, s) |>
+                                      x -> x === true
+    # If the `JULIA_TEST_USE_MULTIPLE_WORKERS` environment variable is set to `true`, we use
+    # multiple worker processes regardless of the value of `net_on`.
+    # Otherwise, we use multiple worker processes if and only if `net_on` is true.
+    if net_on || JULIA_TEST_USE_MULTIPLE_WORKERS
         n = min(Sys.CPU_THREADS, length(tests))
         n > 1 && addprocs_with_testenv(n)
         LinearAlgebra.BLAS.set_num_threads(1)
@@ -94,6 +124,15 @@ cd(@__DIR__) do
         Distributed.remotecall_eval(Main, workers(), revise_init_expr)
     end
 
+    println("""
+        Running parallel tests with:
+          nworkers() = $(nworkers())
+          nthreads() = $(Threads.nthreads())
+          Sys.CPU_THREADS = $(Sys.CPU_THREADS)
+          Sys.total_memory() = $(Base.format_bytes(Sys.total_memory()))
+          Sys.free_memory() = $(Base.format_bytes(Sys.free_memory()))
+        """)
+
     #pretty print the information about gc and mem usage
     testgroupheader = "Test"
     workerheader = "(Worker)"
@@ -134,25 +173,39 @@ cd(@__DIR__) do
         finally
             unlock(print_lock)
         end
+        nothing
     end
 
     global print_testworker_started = (name, wrkr)->begin
+        pid = running_under_rr() ? remotecall_fetch(getpid, wrkr) : 0
+        at = lpad("($wrkr)", name_align - textwidth(name) + 1, " ")
         lock(print_lock)
         try
-            printstyled(name, color=:white)
-            printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
-                " "^elapsed_align, "started at $(now())\n", color=:white)
+            printstyled(name, at, " |", " "^elapsed_align,
+                    "started at $(now())",
+                    (pid > 0 ? " on pid $pid" : ""),
+                    "\n", color=:white)
         finally
             unlock(print_lock)
         end
+        nothing
     end
 
-    function print_testworker_errored(name, wrkr)
+    function print_testworker_errored(name, wrkr, @nospecialize(e))
         lock(print_lock)
         try
             printstyled(name, color=:red)
             printstyled(lpad("($wrkr)", name_align - textwidth(name) + 1, " "), " |",
                 " "^elapsed_align, " failed at $(now())\n", color=:red)
+            if isa(e, Test.TestSetException)
+                for t in e.errors_and_fails
+                    show(t)
+                    println()
+                end
+            elseif e !== nothing
+                Base.showerror(stdout, e)
+            end
+            println()
         finally
             unlock(print_lock)
         end
@@ -163,6 +216,7 @@ cd(@__DIR__) do
 
     local stdin_monitor
     all_tasks = Task[]
+    o_ts_duration = 0.0
     try
         # Monitor stdin and kill this task on ^C
         # but don't do this on Windows, because it may deadlock in the kernel
@@ -193,25 +247,26 @@ cd(@__DIR__) do
                 end
             end
         end
-        @Experimental.sync begin
+        o_ts_duration = @elapsed @Experimental.sync begin
             for p in workers()
                 @async begin
                     push!(all_tasks, current_task())
                     while length(tests) > 0
                         test = popfirst!(tests)
                         running_tests[test] = now()
-                        local resp
                         wrkr = p
-                        try
-                            resp = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
-                        catch e
-                            isa(e, InterruptException) && return
-                            resp = Any[e]
-                        end
+                        before = time()
+                        resp, duration = try
+                                r = remotecall_fetch(runtests, wrkr, test, test_path(test); seed=seed)
+                                r, time() - before
+                            catch e
+                                isa(e, InterruptException) && return
+                                Any[CapturedException(e, catch_backtrace())], time() - before
+                            end
                         delete!(running_tests, test)
-                        push!(results, (test, resp))
-                        if resp[1] isa Exception
-                            print_testworker_errored(test, wrkr)
+                        push!(results, (test, resp, duration))
+                        if length(resp) == 1
+                            print_testworker_errored(test, wrkr, exit_on_error ? nothing : resp[1])
                             if exit_on_error
                                 skipped = length(tests)
                                 empty!(tests)
@@ -260,14 +315,20 @@ cd(@__DIR__) do
             # to the overall aggregator
             isolate = true
             t == "SharedArrays" && (isolate = false)
-            local resp
-            try
-                resp = eval(Expr(:call, () -> runtests(t, test_path(t), isolate, seed=seed))) # runtests is defined by the include above
+            before = time()
+            resp, duration = try
+                    r = Base.invokelatest(runtests, t, test_path(t), isolate, seed=seed) # runtests is defined by the include above
+                    r, time() - before
+                catch e
+                    isa(e, InterruptException) && rethrow()
+                    Any[CapturedException(e, catch_backtrace())], time() - before
+                end
+            if length(resp) == 1
+                print_testworker_errored(t, 1, resp[1])
+            else
                 print_testworker_stats(t, 1, resp)
-            catch e
-                resp = Any[e]
             end
-            push!(results, (t, resp))
+            push!(results, (t, resp, duration))
         end
     catch e
         isa(e, InterruptException) || rethrow()
@@ -311,38 +372,28 @@ cd(@__DIR__) do
     Errored, and execution continues until the summary at the end of the test
     run, where the test file is printed out as the "failed expression".
     =#
+    Test.TESTSET_PRINT_ENABLE[] = false
     o_ts = Test.DefaultTestSet("Overall")
+    o_ts.time_end = o_ts.time_start + o_ts_duration # manually populate the timing
     Test.push_testset(o_ts)
     completed_tests = Set{String}()
-    for (testname, (resp,)) in results
+    for (testname, (resp,), duration) in results
         push!(completed_tests, testname)
         if isa(resp, Test.DefaultTestSet)
+            resp.time_end = resp.time_start + duration
             Test.push_testset(resp)
             Test.record(o_ts, resp)
             Test.pop_testset()
-        elseif isa(resp, Tuple{Int,Int})
-            fake = Test.DefaultTestSet(testname)
-            for i in 1:resp[1]
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing))
-            end
-            for i in 1:resp[2]
-                Test.record(fake, Test.Broken(:test, nothing))
-            end
-            Test.push_testset(fake)
-            Test.record(o_ts, fake)
-            Test.pop_testset()
-        elseif isa(resp, RemoteException) && isa(resp.captured.ex, Test.TestSetException)
-            println("Worker $(resp.pid) failed running test $(testname):")
-            Base.showerror(stdout, resp.captured)
-            println()
+        elseif isa(resp, Test.TestSetException)
             fake = Test.DefaultTestSet(testname)
-            for i in 1:resp.captured.ex.pass
-                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing))
+            fake.time_end = fake.time_start + duration
+            for i in 1:resp.pass
+                Test.record(fake, Test.Pass(:test, nothing, nothing, nothing, LineNumberNode(@__LINE__, @__FILE__)))
             end
-            for i in 1:resp.captured.ex.broken
+            for i in 1:resp.broken
                 Test.record(fake, Test.Broken(:test, nothing))
             end
-            for t in resp.captured.ex.errors_and_fails
+            for t in resp.errors_and_fails
                 Test.record(fake, t)
             end
             Test.push_testset(fake)
@@ -357,7 +408,8 @@ cd(@__DIR__) do
             # the test runner itself had some problem, so we may have hit a segfault,
             # deserialization errors or something similar.  Record this testset as Errored.
             fake = Test.DefaultTestSet(testname)
-            Test.record(fake, Test.Error(:test_error, testname, nothing, Any[(resp, [])], LineNumberNode(1)))
+            fake.time_end = fake.time_start + duration
+            Test.record(fake, Test.Error(:nontest_error, testname, nothing, Any[(resp, [])], LineNumberNode(1)))
             Test.push_testset(fake)
             Test.record(o_ts, fake)
             Test.pop_testset()
@@ -371,7 +423,9 @@ cd(@__DIR__) do
         Test.record(o_ts, fake)
         Test.pop_testset()
     end
+    Test.TESTSET_PRINT_ENABLE[] = true
     println()
+    # o_ts.verbose = true # set to true to show all timings when successful
     Test.print_test_results(o_ts, 1)
     if !o_ts.anynonpass
         println("    \033[32;1mSUCCESS\033[0m")
diff --git a/test/ryu.jl b/test/ryu.jl
index 9970942575e0a8..cf60e4867e2362 100644
--- a/test/ryu.jl
+++ b/test/ryu.jl
@@ -544,6 +544,15 @@ end # Float16
         @test Ryu.writefixed(7.018232e-82, 6) == "0.000000"
     end
 
+    @testset "Trimming of trailing zeros" begin
+        @test Ryu.writefixed(0.0, 1, false, false, false, UInt8('.'), true) == "0"
+        @test Ryu.writefixed(1.0, 1, false, false, false, UInt8('.'), true) == "1"
+        @test Ryu.writefixed(2.0, 1, false, false, false, UInt8('.'), true) == "2"
+
+        @test Ryu.writefixed(1.25e+5, 0, false, false, false, UInt8('.'), true) == "125000"
+        @test Ryu.writefixed(1.25e+5, 1, false, false, false, UInt8('.'), true) == "125000"
+        @test Ryu.writefixed(1.25e+5, 2, false, false, false, UInt8('.'), true) == "125000"
+    end
 end # fixed
 
 @testset "Ryu.writeexp" begin
@@ -736,6 +745,12 @@ end
     @test Ryu.writeexp(1e+83, 1) == "1.0e+83"
 end
 
+@testset "Consistency of trimtrailingzeros" begin
+    @test Ryu.writeexp(0.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "0e+00"
+    @test Ryu.writeexp(1.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "1e+00"
+    @test Ryu.writeexp(2.0, 1, false, false, false, UInt8('e'), UInt8('.'), true) == "2e+00"
+end
+
 end # exp
 
 @testset "compact" begin
diff --git a/test/secretbuffer.jl b/test/secretbuffer.jl
index aea2a662766c92..df67204dd63baa 100644
--- a/test/secretbuffer.jl
+++ b/test/secretbuffer.jl
@@ -99,6 +99,7 @@ using Test
         @test position(sb) == 0
         skip(sb, sb.size)
         @test position(sb) == sb.size
+        shred!(sb)
     end
     @testset "seekend" begin
         sb = SecretBuffer("hello")
@@ -108,7 +109,6 @@ using Test
     end
     @testset "position" begin
         sb = SecretBuffer("Julia")
-        println("testing position")
         initial_pos = (position(sb))
         seek(sb,2)
         mid_pos = position(sb)
@@ -120,5 +120,6 @@ using Test
         sb1 = SecretBuffer("hello")
         sb2 = SecretBuffer("juliaisawesome")
         @test hash(sb1, UInt(5)) === hash(sb2, UInt(5))
+        shred!(sb1); shred!(sb2)
     end
 end
diff --git a/test/sets.jl b/test/sets.jl
index b090a3d7097fae..b16ced60b8aaa2 100644
--- a/test/sets.jl
+++ b/test/sets.jl
@@ -22,6 +22,7 @@ using Dates
         @test isa(Set(sin(x) for x = 1:3), Set{Float64})
         @test isa(Set(f17741(x) for x = 1:3), Set{Int})
         @test isa(Set(f17741(x) for x = -1:1), Set{Integer})
+        @test isa(Set(f17741(x) for x = 1:0), Set{Integer})
     end
     let s1 = Set(["foo", "bar"]), s2 = Set(s1)
         @test s1 == s2
@@ -138,6 +139,10 @@ end
     @test !in(200,s)
 end
 
+@testset "copy(::KeySet) (issue #41537)" begin
+    @test union(keys(Dict(1=>2, 3=>4))) == copy(keys(Dict(1=>2, 3=>4))) == Set([1,3])
+end
+
 @testset "copy!" begin
     for S = (Set, BitSet)
         s = S([1, 2])
@@ -146,6 +151,9 @@ end
             @test s === copy!(s, BitSet(a)) == S(a)
         end
     end
+    s = Set([1, 2])
+    s2 = copy(s)
+    @test copy!(s, s) == s2
 end
 
 @testset "sizehint, empty" begin
@@ -220,6 +228,16 @@ end
     s2 = Set([nothing])
     union!(s2, [nothing])
     @test s2 == Set([nothing])
+
+    @testset "promotion" begin
+        ints = [1:5, [1, 2], Set([1, 2])]
+        floats = [2:0.1:3, [2.0, 3.5], Set([2.0, 3.5])]
+
+        for a in ints, b in floats
+            @test eltype(union(a, b)) == Float64
+            @test eltype(union(b, a)) == Float64
+        end
+    end
 end
 
 @testset "intersect" begin
@@ -227,6 +245,9 @@ end
         s = S([1,2]) ∩ S([3,4])
         @test s == S()
         s = intersect(S([5,6,7,8]), S([7,8,9]))
+        slong = S(collect(3:63))
+        # test #36339 length/order short-cut
+        @test intersect(S([5,6,7,8]), slong) == intersect(slong, S([5,6,7,8]))
         @test s == S([7,8])
         @test intersect(S([2,3,1]), S([4,2,3]), S([5,4,3,2])) == S([2,3])
         let s1 = S([1,2,3])
@@ -238,7 +259,9 @@ end
         end
     end
     @test intersect(Set([1]), BitSet()) isa Set{Int}
-    @test intersect(BitSet([1]), Set()) isa BitSet
+    @test intersect(BitSet([1]), Set()) isa Set{Any}
+    @test intersect(BitSet([1]), Set([1])) isa BitSet
+    @test intersect(BitSet([1]), Set([1]), Set([1])) isa BitSet
     @test intersect([1], BitSet()) isa Vector{Int}
     # intersect must uniquify
     @test intersect([1, 2, 1]) == intersect!([1, 2, 1]) == [1, 2]
@@ -249,7 +272,22 @@ end
     y = () ∩ (42,)
     @test isempty(x)
     @test isempty(y)
-    @test eltype(x) == eltype(y) == Union{}
+
+    # Discussed in PR#41769
+    @testset "promotion" begin
+        ints = [1:5, [1, 2], Set([1, 2])]
+        floats = [2:0.1:3, [2.0, 3.5], Set([2.0, 3.5])]
+
+        for a in ints, b in floats
+            @test eltype(intersect(a, b)) == Float64
+            @test eltype(intersect(b, a)) == Float64
+            @test eltype(intersect(a, a, b)) == Float64
+        end
+    end
+
+    # 3-argument version is correctly covered
+    @test intersect(Set([1,2]), Set([2]), Set([1,2,3])) == Set([2])
+    @test intersect(Set([1,2]), Set([2]), Set([1.,2,3])) == Set([2.])
 end
 
 @testset "setdiff" begin
@@ -388,6 +426,9 @@ end
     @test in(1, u)
     @test in(2, u)
     @test length(u) == 2
+    @test unique(iseven, []) == []
+    # type promotion
+    @test unique(x -> x^2, [1, 3.]) == [1, 3.]
     @test @inferred(unique(iseven, [5, 1, 8, 9, 3, 4, 10, 7, 2, 6])) == [5, 8]
     @test @inferred(unique(x->x^2, Integer[3, -4, 5, 4])) == Integer[3, -4, 5]
     @test @inferred(unique(iseven, Integer[3, -4, 5, 4]; seen=Set{Bool}())) == Integer[3, -4]
@@ -410,6 +451,8 @@ end
 end
 
 @testset "unique!" begin
+    u = []
+    @test unique!(u) === u
     u = [1,1,3,2,1]
     @inferred(unique!(u))
     @test u == [1,3,2]
@@ -468,6 +511,7 @@ end
     @test allunique(Date(2018, 8, 7):Day(1):Date(2018, 8, 11))  # JuliaCon 2018
     @test allunique(DateTime(2018, 8, 7):Hour(1):DateTime(2018, 8, 11))
     @test allunique(('a':1:'c')[1:2]) == true
+    @test allunique(collect(1:1001))
     for r = (Base.OneTo(-1), Base.OneTo(0), Base.OneTo(1), Base.OneTo(5),
              1:0, 1:1, 1:2, 1:10, 1:.5:.5, 1:.5:1, 1:.5:10, 3:-2:5, 3:-2:3, 3:-2:1,
              StepRangeLen(1.0, 2.0, 0), StepRangeLen(1.0, 2.0, 2), StepRangeLen(1.0, 2.0, 3),
@@ -476,6 +520,35 @@ end
         @test allunique(r) == invoke(allunique, Tuple{Any}, r)
     end
 end
+
+@testset "allequal" begin
+    @test allequal(Set())
+    @test allequal(Set(1))
+    @test !allequal(Set([1, 2]))
+    @test allequal(Dict())
+    @test allequal(Dict(:a => 1))
+    @test !allequal(Dict(:a => 1, :b => 2))
+    @test allequal([])
+    @test allequal([1])
+    @test allequal([1, 1])
+    @test !allequal([1, 1, 2])
+    @test allequal([:a, :a])
+    @test !allequal([:a, :b])
+    @test !allequal(1:2)
+    @test allequal(1:1)
+    @test !allequal(4.0:0.3:7.0)
+    @test allequal(4:-1:5)       # empty range
+    @test !allequal(7:-1:1)       # negative step
+    @test !allequal(Date(2018, 8, 7):Day(1):Date(2018, 8, 11))  # JuliaCon 2018
+    @test !allequal(DateTime(2018, 8, 7):Hour(1):DateTime(2018, 8, 11))
+    @test allequal(StepRangeLen(1.0, 0.0, 2))
+    @test !allequal(StepRangeLen(1.0, 1.0, 2))
+    @test allequal(LinRange(1, 1, 0))
+    @test allequal(LinRange(1, 1, 1))
+    @test allequal(LinRange(1, 1, 2))
+    @test !allequal(LinRange(1, 2, 2))
+end
+
 @testset "filter(f, ::$S)" for S = (Set, BitSet)
     s = S([1,2,3,4])
     @test s !== filter( isodd, s) == S([1,3])
@@ -586,11 +659,14 @@ end
 @testset "replace! & replace" begin
     a = [1, 2, 3, 1]
     @test replace(x -> iseven(x) ? 2x : x, a) == [1, 4, 3, 1]
+    @test replace(x -> iseven(x) ? 2x : x, Tuple(a)) === (1, 4, 3, 1)
     @test replace!(x -> iseven(x) ? 2x : x, a) === a
     @test a == [1, 4, 3, 1]
     @test replace(a, 1=>0) == [0, 4, 3, 0]
+    @test replace(Tuple(a), 1=>0) === (0, 4, 3, 0)
     for count = (1, 0x1, big(1))
         @test replace(a, 1=>0, count=count) == [0, 4, 3, 1]
+        @test replace(Tuple(a), 1=>0, count=count) === (0, 4, 3, 1)
     end
     @test replace!(a, 1=>2) === a
     @test a == [2, 4, 3, 2]
@@ -615,6 +691,7 @@ end
 
     for count = (0, 0x0, big(0)) # count == 0 --> no replacements
         @test replace([1, 2], 1=>0, 2=>0; count) == [1, 2]
+        @test replace((1, 2), 1=>0, 2=>0; count) === (1, 2)
         for dict = (Dict(1=>2, 2=>3), IdDict(1=>2, 2=>3))
             @test replace(dict, (1=>2) => (1=>3); count) == dict
         end
@@ -646,9 +723,31 @@ end
     x = @inferred replace([1, missing], missing=>2, 1=>missing)
     @test isequal(x, [missing, 2]) && x isa Vector{Union{Int, Missing}}
 
+    # eltype promotion for dicts
+    d = Dict(1=>2, 3=>4)
+    f = replace(d, (1=>2) => (1=>nothing))
+    @test f == Dict(3=>4, 1=>nothing)
+    @test eltype(f) == Pair{Int, Union{Nothing, Int}}
+    f = replace(d, (1=>2) => (1=>missing), (3=>4)=>(3=>missing))
+    @test valtype(f) == Union{Missing,Int}
+    f = replace(d, (1=>2) => (1=>'a'), (3=>4)=>(3=>'b'))
+    @test valtype(f) == Any
+    @test f == Dict(3=>'b', 1=>'a')
+
+    # eltype promotion for sets
+    s = Set([1, 2, 3])
+    f = replace(s, 2=>missing, 3=>nothing)
+    @test f == Set([1, missing, nothing])
+    @test eltype(f) == Union{Int,Missing,Nothing}
+    f = replace(s, 2=>'a')
+    @test eltype(f) == Any
+    @test f == Set([1, 3, 'a'])
+
     # test that isequal is used
     @test replace([NaN, 1.0], NaN=>0.0) == [0.0, 1.0]
+    @test replace((NaN, 1.0), NaN=>0.0) === (0.0, 1.0)
     @test replace([1, missing], missing=>0) == [1, 0]
+    @test replace((1, missing), missing=>0) === (1, 0)
 end
 
 @testset "⊆, ⊊, ⊈, ⊇, ⊋, ⊉, <, <=, issetequal" begin
@@ -730,3 +829,44 @@ Base.IteratorSize(::Type{<:OpenInterval}) = Base.SizeUnknown()
     @test 3 ∈ i
     @test issubset(3, i)
 end
+
+@testset "IdSet" begin
+    a = [1]
+    b = [2]
+    c = [3]
+    d = [4]
+    e = [5]
+    A = Base.IdSet{Vector{Int}}([a, b, c, d])
+    @test !isempty(A)
+    B = copy(A)
+    @test A ⊆ B
+    @test B ⊆ A
+    A = filter!(x->isodd(x[1]), A)
+    @test A ⊆ B
+    @test !(B ⊆ A)
+    @test !isempty(A)
+    a_ = pop!(A, a)
+    @test a_ === a
+    @test !isempty(A)
+    e_ = pop!(A, a, e)
+    @test e_ === e
+    @test !isempty(A)
+    A = empty!(A)
+    @test isempty(A)
+end
+
+@testset "⊊, ⊋" begin
+    @test !((1, 2) ⊊ (1, 2, 2))
+    @test !((1, 2, 2) ⊋ (1, 2))
+end
+
+@testset "AbstractSet & Fallback" begin
+    mutable struct TestSet{T} <: AbstractSet{T}
+        set::Set{T}
+        function TestSet{T}() where T
+            new{T}(Set{T}())
+        end
+    end
+    set = TestSet{Any}()
+    @test sizehint!(set, 1) === set
+end
diff --git a/test/show.jl b/test/show.jl
index f2cfc21a9578ff..48768c6e2c8be6 100644
--- a/test/show.jl
+++ b/test/show.jl
@@ -1,6 +1,6 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-using LinearAlgebra, SparseArrays
+using LinearAlgebra
 
 # For curmod_*
 include("testenv.jl")
@@ -20,6 +20,9 @@ showstr(x, kv::Pair...) = sprint((io,x) -> show(IOContext(io, :limit => true, :d
     @test ioc.io == io
     @test ioc.dict == Base.ImmutableDict(Base.ImmutableDict{Symbol, Any}(:x, 1),
                                          :y => 2)
+    @test Base.ImmutableDict((key => ioc[key] for key in keys(ioc))...) == ioc.dict
+    @test keys(IOBuffer()) isa Base.KeySet
+    @test length(keys(IOBuffer())) == 0
 end
 
 @test replstr(Array{Any}(undef, 2)) == "2-element Vector{Any}:\n #undef\n #undef"
@@ -149,6 +152,8 @@ end
 @test_repr ":(:(:(x)))"
 @test_repr "-\"\""
 @test_repr "-(<=)"
+@test_repr "\$x"
+@test_repr "\$(\"x\")"
 
 # order of operations
 @test_repr "x + y * z"
@@ -169,6 +174,7 @@ end
 @test_repr "a => b in c"
 @test_repr "*(a..., b)"
 @test_repr "+(a, b, c...)"
+@test_repr "f((x...)...)"
 
 # precedence tie resolution
 @test_repr "(a * b) * (c * d)"
@@ -205,6 +211,11 @@ end
 @test_repr "import A.B.C: a, x, y.z"
 @test_repr "import ..A: a, x, y.z"
 @test_repr "import A.B, C.D"
+@test_repr "import A as B"
+@test_repr "import A.x as y"
+@test_repr "import A: x as y"
+@test_repr "import A.B: x, y as z"
+@test_repr "import A.B: x, y as z, a.b as c, xx"
 
 # keyword args (issue #34023 and #32775)
 @test_repr "f(a, b=c)"
@@ -227,6 +238,16 @@ end
 @test repr(:(;)) == ":((;))"
 @test repr(:(-(;x))) == ":(-(; x))"
 @test repr(:(+(1, 2;x))) == ":(+(1, 2; x))"
+@test repr(:(1:2...)) == ":(1:2...)"
+
+@test repr(:(1 := 2)) == ":(1 := 2)"
+@test repr(:(1 ≔ 2)) == ":(1 ≔ 2)"
+@test repr(:(1 ⩴ 2)) == ":(1 ⩴ 2)"
+@test repr(:(1 ≕ 2)) == ":(1 ≕ 2)"
+
+@test repr(:(∓ 1)) == ":(∓1)"
+@test repr(:(± 1)) == ":(±1)"
+
 for ex in [Expr(:call, :f, Expr(:(=), :x, 1)),
            Expr(:ref, :f, Expr(:(=), :x, 1)),
            Expr(:vect, 1, 2, Expr(:kw, :x, 1)),
@@ -247,6 +268,7 @@ end
 @test repr(Expr(:import, :Foo)) == ":(\$(Expr(:import, :Foo)))"
 @test repr(Expr(:import, Expr(:(.), ))) == ":(\$(Expr(:import, :(\$(Expr(:.))))))"
 
+
 @test repr(Expr(:using, Expr(:(.), :A))) == ":(using A)"
 @test repr(Expr(:using, Expr(:(.), :A),
                         Expr(:(.), :B))) == ":(using A, B)"
@@ -472,6 +494,16 @@ end
 @test sprint(show, Symbol("'")) == "Symbol(\"'\")"
 @test_repr "var\"'\" = 5"
 
+# isidentifier
+@test Meta.isidentifier("x")
+@test Meta.isidentifier("x1")
+@test !Meta.isidentifier("x.1")
+@test !Meta.isidentifier("1x")
+@test Meta.isidentifier(Symbol("x"))
+@test Meta.isidentifier(Symbol("x1"))
+@test !Meta.isidentifier(Symbol("x.1"))
+@test !Meta.isidentifier(Symbol("1x"))
+
 # issue #32408: Printing of names which are invalid identifiers
 # Invalid identifiers which need `var` quoting:
 @test sprint(show, Expr(:call, :foo, Symbol("##")))   == ":(foo(var\"##\"))"
@@ -488,7 +520,15 @@ end
 # Hidden macro names
 @test sprint(show, Expr(:macrocall, Symbol("@#"), nothing, :a)) == ":(@var\"#\" a)"
 
-# issue #12477
+# PR #38418
+module M1 var"#foo#"() = 2 end
+@test occursin("M1.var\"#foo#\"", sprint(show, M1.var"#foo#", context = :module=>@__MODULE__))
+
+# PR #43932
+module var"#43932#" end
+@test endswith(sprint(show, var"#43932#"), ".var\"#43932#\"")
+
+# issue #12477
 @test sprint(show,  Union{Int64, Int32, Int16, Int8, Float64}) == "Union{Float64, Int16, Int32, Int64, Int8}"
 
 # Function and array reference precedence
@@ -610,7 +650,7 @@ end
 # `where` syntax
 @test_repr "A where T<:B"
 @test_repr "A where T<:(Array{T} where T<:Real)"
-@test_repr "Array{T} where T<:Array{S} where S<:Real"
+@test_repr "Array{T} where {S<:Real, T<:Array{S}}"
 @test_repr "x::Array{T} where T"
 @test_repr "(a::b) where T"
 @test_repr "a::b where T"
@@ -694,21 +734,6 @@ let filename = tempname()
     rm(filename)
 end
 
-# issue #12960
-mutable struct T12960 end
-import Base.zero
-Base.zero(::Type{T12960}) = T12960()
-Base.zero(x::T12960) = T12960()
-let
-    A = sparse(1.0I, 3, 3)
-    B = similar(A, T12960)
-    @test sprint(show, B)  == "\n #undef             ⋅            ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
-    @test sprint(print, B) == "\n #undef             ⋅            ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
-    B[1,2] = T12960()
-    @test sprint(show, B)  == "\n #undef          T12960()        ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
-    @test sprint(print, B) == "\n #undef          T12960()        ⋅    \n       ⋅      #undef             ⋅    \n       ⋅            ⋅      #undef"
-end
-
 # issue #13127
 function f13127()
     buf = IOBuffer()
@@ -757,7 +782,7 @@ else
 end
 
 # Method location correction (Revise integration)
-dummyloc(m::Method) = :nofile, 123456789
+dummyloc(m::Method) = :nofile, Int32(123456789)
 Base.methodloc_callback[] = dummyloc
 let repr = sprint(show, "text/plain", methods(Base.inbase))
     @test occursin("nofile:123456789", repr)
@@ -775,20 +800,27 @@ Base.methodloc_callback[] = nothing
     @test replstr(Matrix(1.0I, 10, 10)) == "10×10 Matrix{Float64}:\n 1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0\n 0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0\n 0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0\n 0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0\n 0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0\n 0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0\n 0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0  0.0\n 0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0  0.0\n 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0\n 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0"
     # an array too long vertically to fit on screen, and too long horizontally:
     @test replstr(Vector(1.:100.)) == "100-element Vector{Float64}:\n   1.0\n   2.0\n   3.0\n   4.0\n   5.0\n   6.0\n   7.0\n   8.0\n   9.0\n  10.0\n   ⋮\n  92.0\n  93.0\n  94.0\n  95.0\n  96.0\n  97.0\n  98.0\n  99.0\n 100.0"
-    @test occursin(r"1×100 (LinearAlgebra\.)?Adjoint{Float64, Vector{Float64}}:\n 1.0  2.0  3.0  4.0  5.0  6.0  7.0  …  95.0  96.0  97.0  98.0  99.0  100.0", replstr(Vector(1.:100.)'))
+    @test occursin(r"1×100 adjoint\(::Vector{Float64}\) with eltype Float64:\n 1.0  2.0  3.0  4.0  5.0  6.0  7.0  …  95.0  96.0  97.0  98.0  99.0  100.0", replstr(Vector(1.:100.)'))
     # too big in both directions to fit on screen:
     @test replstr((1.:100.)*(1:100)') == "100×100 Matrix{Float64}:\n   1.0    2.0    3.0    4.0    5.0    6.0  …    97.0    98.0    99.0    100.0\n   2.0    4.0    6.0    8.0   10.0   12.0      194.0   196.0   198.0    200.0\n   3.0    6.0    9.0   12.0   15.0   18.0      291.0   294.0   297.0    300.0\n   4.0    8.0   12.0   16.0   20.0   24.0      388.0   392.0   396.0    400.0\n   5.0   10.0   15.0   20.0   25.0   30.0      485.0   490.0   495.0    500.0\n   6.0   12.0   18.0   24.0   30.0   36.0  …   582.0   588.0   594.0    600.0\n   7.0   14.0   21.0   28.0   35.0   42.0      679.0   686.0   693.0    700.0\n   8.0   16.0   24.0   32.0   40.0   48.0      776.0   784.0   792.0    800.0\n   9.0   18.0   27.0   36.0   45.0   54.0      873.0   882.0   891.0    900.0\n  10.0   20.0   30.0   40.0   50.0   60.0      970.0   980.0   990.0   1000.0\n   ⋮                                  ⋮    ⋱                          \n  92.0  184.0  276.0  368.0  460.0  552.0     8924.0  9016.0  9108.0   9200.0\n  93.0  186.0  279.0  372.0  465.0  558.0     9021.0  9114.0  9207.0   9300.0\n  94.0  188.0  282.0  376.0  470.0  564.0     9118.0  9212.0  9306.0   9400.0\n  95.0  190.0  285.0  380.0  475.0  570.0     9215.0  9310.0  9405.0   9500.0\n  96.0  192.0  288.0  384.0  480.0  576.0  …  9312.0  9408.0  9504.0   9600.0\n  97.0  194.0  291.0  388.0  485.0  582.0     9409.0  9506.0  9603.0   9700.0\n  98.0  196.0  294.0  392.0  490.0  588.0     9506.0  9604.0  9702.0   9800.0\n  99.0  198.0  297.0  396.0  495.0  594.0     9603.0  9702.0  9801.0   9900.0\n 100.0  200.0  300.0  400.0  500.0  600.0     9700.0  9800.0  9900.0  10000.0"
 
     # test that no spurious visual lines are added when one element spans multiple lines
     v = fill!(Array{Any}(undef, 9), 0)
     v[1] = "look I'm wide! --- " ^ 9
-    @test replstr(v) == "9-element Vector{Any}:\n  \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0"
-    @test replstr([fill(0, 9) v]) == "9×2 Matrix{Any}:\n 0  …   \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0"
+    r = replstr(v)
+    @test startswith(r, "9-element Vector{Any}:\n  \"look I'm wide! ---")
+    @test endswith(r, "look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0")
+
     # test vertical/diagonal ellipsis
     v = fill!(Array{Any}(undef, 50), 0)
     v[1] = "look I'm wide! --- " ^ 9
-    @test replstr(v) == "50-element Vector{Any}:\n  \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n ⋮\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0"
-    @test replstr([fill(0, 50) v]) == "50×2 Matrix{Any}:\n 0  …   \"look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- look I'm wide! --- \"\n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0\n ⋮  ⋱  \n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0"
+    r = replstr(v)
+    @test startswith(r, "50-element Vector{Any}:\n  \"look I'm wide! ---")
+    @test endswith(r, "look I'm wide! --- \"\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n ⋮\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0\n 0")
+
+    r = replstr([fill(0, 50) v])
+    @test startswith(r, "50×2 Matrix{Any}:\n 0  …   \"look I'm wide! ---")
+    @test endswith(r, "look I'm wide! --- \"\n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0\n ⋮  ⋱  \n 0     0\n 0     0\n 0     0\n 0     0\n 0  …  0\n 0     0\n 0     0\n 0     0\n 0     0")
 
     # issue #34659
     @test replstr(Int32[]) == "Int32[]"
@@ -799,6 +831,45 @@ Base.methodloc_callback[] = nothing
     @test replstr([zeros(3,0),zeros(2,0)]) == "2-element Vector{Matrix{Float64}}:\n 3×0 Matrix{Float64}\n 2×0 Matrix{Float64}"
 end
 
+# string show with elision
+@testset "string show with elision" begin
+    @testset "elision logic" begin
+        strs = ["A", "∀", "∀A", "A∀", "😃"]
+        for limit = 0:100, len = 0:100, str in strs
+            str = str^len
+            str = str[1:nextind(str, 0, len)]
+            out = sprint() do io
+                show(io, MIME"text/plain"(), str; limit)
+            end
+            lower = length("\"\" ⋯ $(ncodeunits(str)) bytes ⋯ \"\"")
+            limit = max(limit, lower)
+            if length(str) + 2 ≤ limit
+                @test eval(Meta.parse(out)) == str
+            else
+                @test limit-!isascii(str) <= length(out) <= limit
+                re = r"(\"[^\"]*\") ⋯ (\d+) bytes ⋯ (\"[^\"]*\")"
+                m = match(re, out)
+                head = eval(Meta.parse(m.captures[1]))
+                tail = eval(Meta.parse(m.captures[3]))
+                skip = parse(Int, m.captures[2])
+                @test startswith(str, head)
+                @test endswith(str, tail)
+                @test ncodeunits(str) ==
+                    ncodeunits(head) + skip + ncodeunits(tail)
+            end
+        end
+    end
+
+    @testset "default elision limit" begin
+        r = replstr("x"^1000)
+        @test length(r) == 7*80
+        @test r == repr("x"^271) * " ⋯ 459 bytes ⋯ " * repr("x"^270)
+        r = replstr(["x"^1000])
+        @test length(r) < 120
+        @test r == "1-element Vector{String}:\n " * repr("x"^31) * " ⋯ 939 bytes ⋯ " * repr("x"^30)
+    end
+end
+
 # Issue 14121
 @test_repr "(A'x)'"
 
@@ -1122,7 +1193,7 @@ let x = [], y = [], z = Base.ImmutableDict(x => y)
         """
     dz = sprint(dump, z)
     @test 10 < countlines(IOBuffer(dz)) < 40
-    @test sum(x -> 1, eachmatch(r"circular reference", dz)) == 4
+    @test sum(Returns(1), eachmatch(r"circular reference", dz)) == 4
 end
 
 # PR 16221
@@ -1267,6 +1338,22 @@ test_repr("(:).a")
 @test repr(NTuple{7,Int64}) == "NTuple{7, Int64}"
 @test repr(Tuple{Float64, Float64, Float64, Float64}) == "NTuple{4, Float64}"
 @test repr(Tuple{Float32, Float32, Float32}) == "Tuple{Float32, Float32, Float32}"
+@test repr(Tuple{String, Int64, Int64, Int64}) == "Tuple{String, Int64, Int64, Int64}"
+@test repr(Tuple{String, Int64, Int64, Int64, Int64}) == "Tuple{String, Vararg{Int64, 4}}"
+
+@testset "issue #42931" begin
+    @test repr(NTuple{4, :A}) == "NTuple{4, :A}"
+    @test repr(NTuple{3, :A}) == "Tuple{:A, :A, :A}"
+    @test repr(NTuple{2, :A}) == "Tuple{:A, :A}"
+    @test repr(NTuple{1, :A}) == "Tuple{:A}"
+    @test repr(NTuple{0, :A}) == "Tuple{}"
+
+    @test repr(Tuple{:A, :A, :A, :B}) == "Tuple{:A, :A, :A, :B}"
+    @test repr(Tuple{:A, :A, :A, :A}) == "NTuple{4, :A}"
+    @test repr(Tuple{:A, :A, :A}) == "Tuple{:A, :A, :A}"
+    @test repr(Tuple{:A}) == "Tuple{:A}"
+    @test repr(Tuple{}) == "Tuple{}"
+end
 
 # Test that REPL/mime display of invalid UTF-8 data doesn't throw an exception:
 @test isa(repr("text/plain", String(UInt8[0x00:0xff;])), String)
@@ -1295,10 +1382,14 @@ end
 (::T20332{T})(x) where T = 0
 
 let m = which(T20332{Int}(), (Int,)),
-    mi = ccall(:jl_specializations_get_linfo, Ref{Core.MethodInstance}, (Any, Any, Any, UInt),
-               m, Tuple{T20332{T}, Int} where T, Core.svec(), typemax(UInt))
+    mi = Core.Compiler.specialize_method(m, Tuple{T20332{T}, Int} where T, Core.svec())
     # test that this doesn't throw an error
     @test occursin("MethodInstance for", repr(mi))
+    # issue #41928
+    str = sprint(mi; context=:color=>true) do io, mi
+        printstyled(io, mi; color=:light_cyan)
+    end
+    @test !occursin("\U1b[0m", str)
 end
 
 @test sprint(show, Main) == "Main"
@@ -1331,6 +1422,42 @@ end
 
 @test static_shown(QuoteNode(:x)) == ":(:x)"
 
+# PR #38049
+@test static_shown(sum) == "Base.sum"
+@test static_shown(+) == "Base.:(+)"
+@test static_shown(typeof(+)) == "typeof(Base.:(+))"
+
+struct var"#X#" end
+var"#f#"() = 2
+struct var"%X%" end  # Invalid name without '#'
+
+# (Just to make this test more sustainable,) we don't necesssarily need to test the exact
+# output format, just ensure that it prints at least the parts we expect:
+@test occursin(".var\"#X#\"", static_shown(var"#X#"))  # Leading `.` tests it printed a module name.
+@test occursin(r"Set{var\"[^\"]+\"} where var\"[^\"]+\"", static_shown(Set{<:Any}))
+
+# Test that static_shown is returning valid, correct julia expressions
+@testset "static_show() prints valid julia" begin
+    @testset for v in (
+            var"#X#",
+            var"#X#"(),
+            var"%X%",
+            var"%X%"(),
+            Vector,
+            Vector{<:Any},
+            Vector{var"#X#"},
+            +,
+            typeof(+),
+            var"#f#",
+            typeof(var"#f#"),
+        )
+        @test v == eval(Meta.parse(static_shown(v)))
+    end
+end
+
+# Test that static show prints something reasonable for `<:Function` types
+@test static_shown(:) == "Base.Colon()"
+
 # Test @show
 let fname = tempname()
     try
@@ -1345,14 +1472,14 @@ let fname = tempname()
     end
 end
 
-struct f_with_params{t} <: Function
-end
-
+module ModFWithParams
+struct f_with_params{t} <: Function end
 (::f_with_params)(x) = 2x
+end
 
 let io = IOBuffer()
-    show(io, MIME"text/html"(), f_with_params.body.name.mt)
-    @test occursin("f_with_params", String(take!(io)))
+    show(io, MIME"text/html"(), ModFWithParams.f_with_params.body.name.mt)
+    @test occursin("ModFWithParams.f_with_params", String(take!(io)))
 end
 
 @testset "printing of Val's" begin
@@ -1511,23 +1638,109 @@ end
 end
 
 let x = TypeVar(:_), y = TypeVar(:_)
-    @test repr(UnionAll(x, UnionAll(y, Pair{x,y}))) == "Pair{_1, _2} where _2 where _1"
-    @test repr(UnionAll(x, UnionAll(y, Pair{UnionAll(x,Ref{x}),y}))) == "Pair{Ref{_1} where _1, _1} where _1"
+    @test repr(UnionAll(x, UnionAll(y, Pair{x,y}))) == "Pair"
+    @test repr(UnionAll(y, UnionAll(x, Pair{x,y}))) == "Pair{_2, _1} where {_1, _2}"
+    @test repr(UnionAll(x, UnionAll(y, Pair{UnionAll(x,Ref{x}),y}))) == "Pair{Ref}"
+    @test repr(UnionAll(y, UnionAll(x, Pair{UnionAll(y,Ref{x}),y}))) == "Pair{Ref{_2}, _1} where {_1, _2}"
+end
+
+let x, y, x
     x = TypeVar(:a)
     y = TypeVar(:a)
     z = TypeVar(:a)
-    @test repr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{x,y,z})))) == "Tuple{a1, a2, a} where a2 where a1 where a"
+    @test repr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{x,y,z})))) == "Tuple{a1, a2, a} where {a, a1, a2}"
+    @test repr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{z,y,x})))) == "Tuple{a, a2, a1} where {a, a1, a2}"
+end
+
+let x = TypeVar(:_, Number), y = TypeVar(:_, Number)
+    @test repr(UnionAll(x, UnionAll(y, Pair{x,y}))) == "Pair{_1, _2} where {_1<:Number, _2<:Number}"
+    @test repr(UnionAll(y, UnionAll(x, Pair{x,y}))) == "Pair{_2, _1} where {_1<:Number, _2<:Number}"
+    @test repr(UnionAll(x, UnionAll(y, Pair{UnionAll(x,Ref{x}),y}))) == "Pair{Ref{_1} where _1<:Number, _1} where _1<:Number"
+    @test repr(UnionAll(y, UnionAll(x, Pair{UnionAll(y,Ref{x}),y}))) == "Pair{Ref{_2}, _1} where {_1<:Number, _2<:Number}"
+end
+
+
+is_juliarepr(x) = eval(Meta.parse(repr(x))) == x
+@testset "unionall types" begin
+    X = TypeVar(gensym())
+    Y = TypeVar(gensym(), Ref, Ref)
+    x, y, z = TypeVar(:a), TypeVar(:a), TypeVar(:a)
+    struct TestTVUpper{A<:Integer} end
+
+    # named typevars
+    @test is_juliarepr(Ref{A} where A)
+    @test is_juliarepr(Ref{A} where A>:Ref)
+    @test is_juliarepr(Ref{A} where A<:Ref)
+    @test is_juliarepr(Ref{A} where Ref<:A<:Ref)
+    @test is_juliarepr(TestTVUpper{<:Real})
+    @test is_juliarepr(TestTVUpper{<:Integer})
+    @test is_juliarepr(TestTVUpper{<:Signed})
+
+    # typearg order
+    @test is_juliarepr(UnionAll(X, Pair{X,<:Any}))
+    @test is_juliarepr(UnionAll(X, Pair{<:Any,X}))
+
+    # duplicates
+    @test is_juliarepr(UnionAll(X, Pair{X,X}))
+
+    # nesting
+    @test is_juliarepr(UnionAll(X, Ref{Ref{X}}))
+    @test is_juliarepr(Union{T, Int} where T)
+    @test is_juliarepr(Pair{A, <:A} where A)
+
+    # renumbered typevars with same names
+    @test is_juliarepr(UnionAll(z, UnionAll(x, UnionAll(y, Tuple{x,y,z}))))
+
+    # shortened typevar printing
+    @test repr(Ref{<:Any}) == "Ref"
+    @test repr(Pair{1, <:Any}) == "Pair{1}"
+    @test repr(Ref{<:Number}) == "Ref{<:Number}"
+    @test repr(Pair{1, <:Number}) == "Pair{1, <:Number}"
+    @test repr(Ref{<:Ref}) == "Ref{<:Ref}"
+    @test repr(Ref{>:Ref}) == "Ref{>:Ref}"
+    @test repr(Pair{<:Any, 1}) == "Pair{<:Any, 1}"
+    yname = sprint(Base.show_unquoted, Y.name)
+    @test repr(UnionAll(Y, Ref{Y})) == "Ref{$yname} where Ref<:$yname<:Ref"
+    @test endswith(repr(TestTVUpper{<:Real}), "TestTVUpper{<:Real}")
+    @test endswith(repr(TestTVUpper), "TestTVUpper")
+    @test endswith(repr(TestTVUpper{<:Signed}), "TestTVUpper{<:Signed}")
+
+    # exception for tuples
+    @test is_juliarepr(Tuple)
+    @test is_juliarepr(Tuple{})
+    @test is_juliarepr(Tuple{<:Any})
 end
 
 @testset "showarg" begin
+    io = IOBuffer()
+
     A = reshape(Vector(Int16(1):Int16(2*3*5)), 2, 3, 5)
     @test summary(A) == "2×3×5 Array{Int16, 3}"
+
     v = view(A, :, 3, 2:5)
     @test summary(v) == "2×4 view(::Array{Int16, 3}, :, 3, 2:5) with eltype Int16"
+    @test Base.showarg(io, v, false) === nothing
+    @test String(take!(io)) == "view(::Array{Int16, 3}, :, 3, 2:5)"
+
     r = reshape(v, 4, 2)
     @test summary(r) == "4×2 reshape(view(::Array{Int16, 3}, :, 3, 2:5), 4, 2) with eltype Int16"
+    @test Base.showarg(io, r, false) === nothing
+    @test String(take!(io)) == "reshape(view(::Array{Int16, 3}, :, 3, 2:5), 4, 2)"
+
     p = PermutedDimsArray(r, (2, 1))
     @test summary(p) == "2×4 PermutedDimsArray(reshape(view(::Array{Int16, 3}, :, 3, 2:5), 4, 2), (2, 1)) with eltype Int16"
+    @test Base.showarg(io, p, false) === nothing
+    @test String(take!(io)) == "PermutedDimsArray(reshape(view(::Array{Int16, 3}, :, 3, 2:5), 4, 2), (2, 1))"
+
+    p = reinterpret(reshape, Tuple{Float32,Float32}, [1.0f0 3.0f0; 2.0f0 4.0f0])
+    @test summary(p) == "2-element reinterpret(reshape, Tuple{Float32, Float32}, ::Matrix{Float32}) with eltype Tuple{Float32, Float32}"
+    @test Base.showarg(io, p, false) === nothing
+    @test String(take!(io)) == "reinterpret(reshape, Tuple{Float32, Float32}, ::Matrix{Float32})"
+
+    r = Base.IdentityUnitRange(2:2)
+    B = @view ones(2)[r]
+    Base.showarg(io, B, false)
+    @test String(take!(io)) == "view(::Vector{Float64}, $(repr(r)))"
 end
 
 @testset "Methods" begin
@@ -1572,7 +1785,7 @@ end
     # spurious binding resolutions
     show(IOContext(b, :module => TestShowType), Base.Pair)
     @test !Base.isbindingresolved(TestShowType, :Pair)
-    @test String(take!(b)) == "Base.Pair"
+    @test String(take!(b)) == "Core.Pair"
     show(IOContext(b, :module => TestShowType), Base.Complex)
     @test Base.isbindingresolved(TestShowType, :Complex)
     @test String(take!(b)) == "Complex"
@@ -1585,7 +1798,7 @@ end
     @test showstr([Float16(1)]) == "Float16[1.0]"
     @test showstr([[Float16(1)]]) == "Vector{Float16}[[1.0]]"
     @test replstr(Real[Float16(1)]) == "1-element Vector{Real}:\n Float16(1.0)"
-    @test replstr(Array{Real}[Real[1]]) == "1-element Vector{Array{Real, N} where N}:\n [1]"
+    @test replstr(Array{Real}[Real[1]]) == "1-element Vector{Array{Real}}:\n [1]"
     # printing tuples (Issue #25042)
     @test replstr(fill((Int64(1), zeros(Float16, 3)), 1)) ==
                  "1-element Vector{Tuple{Int64, Vector{Float16}}}:\n (1, [0.0, 0.0, 0.0])"
@@ -1601,7 +1814,7 @@ end
 
     # issue #27680
     @test showstr(Set([(1.0,1.0), (2.0,2.0), (3.0, 3.0)])) == (sizeof(Int) == 8 ?
-              "Set([(3.0, 3.0), (2.0, 2.0), (1.0, 1.0)])" :
+              "Set([(1.0, 1.0), (3.0, 3.0), (2.0, 2.0)])" :
               "Set([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0)])")
 
     # issue #27747
@@ -1612,7 +1825,7 @@ end
     end
 
     # issue #25857
-    @test repr([(1,),(1,2),(1,2,3)]) == "Tuple{$Int, Vararg{$Int, N} where N}[(1,), (1, 2), (1, 2, 3)]"
+    @test repr([(1,),(1,2),(1,2,3)]) == "Tuple{$Int, Vararg{$Int}}[(1,), (1, 2), (1, 2, 3)]"
 
     # issues #25466 & #26256
     @test replstr([:A => [1]]) == "1-element Vector{Pair{Symbol, Vector{$Int}}}:\n :A => [1]"
@@ -1628,28 +1841,33 @@ end
     @test showstr(Dict(true=>false)) == "Dict{Bool, Bool}(1 => 0)"
     @test showstr(Dict((1 => 2) => (3 => 4))) == "Dict((1 => 2) => (3 => 4))"
 
-    # issue #27979 (dislaying arrays of pairs containing arrays as first member)
+    # issue #27979 (displaying arrays of pairs containing arrays as first member)
     @test replstr([[1.0]=>1.0]) == "1-element Vector{Pair{Vector{Float64}, Float64}}:\n [1.0] => 1.0"
 
     # issue #28159
     @test replstr([(a=1, b=2), (a=3,c=4)]) == "2-element Vector{NamedTuple{names, Tuple{$Int, $Int}} where names}:\n (a = 1, b = 2)\n (a = 3, c = 4)"
 
-    @test replstr(Vector[Any[1]]) == "1-element Vector{Vector{T} where T}:\n Any[1]"
+    @test replstr(Vector[Any[1]]) == "1-element Vector{Vector}:\n Any[1]"
     @test replstr(AbstractDict{Integer,Integer}[Dict{Integer,Integer}(1=>2)]) ==
         "1-element Vector{AbstractDict{Integer, Integer}}:\n Dict(1 => 2)"
 
     # issue #34343
     @test showstr([[1], Int[]]) == "[[1], $Int[]]"
     @test showstr([Dict(1=>1), Dict{Int,Int}()]) == "[Dict(1 => 1), Dict{$Int, $Int}()]"
+
+    # issue #42719, NamedTuple with @var_str
+    @test replstr((; var"a b"=1)) == """(var"a b" = 1,)"""
+    @test replstr((; var"#var#"=1)) == """(var"#var#" = 1,)"""
+    @test replstr((; var"a"=1, b=2)) == "(a = 1, b = 2)"
+    @test replstr((; a=1, b=2)) == "(a = 1, b = 2)"
 end
 
 @testset "#14684: `display` should print associative types in full" begin
     d = Dict(1 => 2, 3 => 45)
-    buf = IOBuffer()
-    td = TextDisplay(buf)
+    td = TextDisplay(PipeBuffer())
 
     display(td, d)
-    result = String(take!(td.io))
+    result = read(td.io, String)
     @test occursin(summary(d), result)
 
     # Is every pair in the string?
@@ -1658,31 +1876,42 @@ end
     end
 end
 
-function _methodsstr(f)
+@testset "#43766: `display` trailing newline" begin
+    td = TextDisplay(PipeBuffer())
+    display(td, 1)
+    @test read(td.io, String) == "1\n"
+    show(td.io, 1)
+    @test read(td.io, String) == "1"
+end
+
+function _methodsstr(@nospecialize f)
     buf = IOBuffer()
     show(buf, methods(f))
-    String(take!(buf))
+    return String(take!(buf))
 end
 
 @testset "show function methods" begin
-    @test occursin("methods for generic function \"sin\":", _methodsstr(sin))
+    @test occursin("methods for generic function \"sin\":\n", _methodsstr(sin))
 end
 @testset "show macro methods" begin
-    @test startswith(_methodsstr(getfield(Base,Symbol("@show"))), "# 1 method for macro \"@show\":")
+    @test startswith(_methodsstr(getfield(Base,Symbol("@show"))), "# 1 method for macro \"@show\":\n")
 end
 @testset "show constructor methods" begin
-    @test occursin("methods for type constructor:\n", _methodsstr(Vector))
+    @test occursin(" methods for type constructor:\n", _methodsstr(Vector))
 end
 @testset "show builtin methods" begin
-    @test startswith(_methodsstr(typeof), "# built-in function; no methods")
+    @test startswith(_methodsstr(typeof), "# 1 method for builtin function \"typeof\":\n")
 end
 @testset "show callable object methods" begin
-    @test occursin("methods:", _methodsstr(:))
+    @test occursin("methods for callable object:\n", _methodsstr(:))
 end
 @testset "#20111 show for function" begin
     K20111(x) = y -> x
     @test startswith(_methodsstr(K20111(1)), "# 1 method for anonymous function")
 end
+@testset "show non-callable object" begin
+    @test "# 0 methods for callable object" == _methodsstr(1.0f0)
+end
 
 @generated f22798(x::Integer, y) = :x
 @testset "#22798" begin
@@ -1765,7 +1994,7 @@ h_line() = f_line()
 @test sprint(Base.show_unquoted, Core.Compiler.Argument(-2)) == "_-2"
 
 
-eval(Meta.parse("""function my_fun28173(x)
+eval(Meta._parse_string("""function my_fun28173(x)
     y = if x == 1
             "HI"
         elseif x == 2
@@ -1782,7 +2011,7 @@ eval(Meta.parse("""function my_fun28173(x)
             "three"
         end
     return y
-end""")) # use parse to control the line numbers
+end""", "a"^80, 1, 1, :statement)[1]) # use parse to control the line numbers
 let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
     fill!(src.codelocs, 0) # IRCode printing is only capable of printing partial line info
@@ -1792,13 +2021,13 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
         @test repr(src) == repr_ir
     end
     lines1 = split(repr(ir), '\n')
-    @test isempty(pop!(lines1))
-    Core.Compiler.insert_node!(ir, 1, Val{1}, QuoteNode(1), false)
-    Core.Compiler.insert_node!(ir, 1, Val{2}, QuoteNode(2), true)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Val{3}, QuoteNode(3), false)
-    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Val{4}, QuoteNode(4), true)
+    @test all(isspace, pop!(lines1))
+    Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(1), Val{1}), false)
+    Core.Compiler.insert_node!(ir, 1, Core.Compiler.NewInstruction(QuoteNode(2), Val{2}), true)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(3), Val{3}), false)
+    Core.Compiler.insert_node!(ir, length(ir.stmts.inst), Core.Compiler.NewInstruction(QuoteNode(4), Val{4}), true)
     lines2 = split(repr(ir), '\n')
-    @test isempty(pop!(lines2))
+    @test all(isspace, pop!(lines2))
     @test popfirst!(lines2) == "2  1 ──       $(QuoteNode(1))"
     @test popfirst!(lines2) == "   │          $(QuoteNode(2))" # TODO: this should print after the next statement
     let line1 = popfirst!(lines1)
@@ -1816,6 +2045,12 @@ let src = code_typed(my_fun28173, (Int,), debuginfo=:source)[1][1]
     @test pop!(lines2) == "   │          \$(QuoteNode(4))"
     @test pop!(lines2) == "17 │          \$(QuoteNode(3))" # TODO: this should print after the next statement
     @test lines1 == lines2
+
+    # verbose linetable
+    io = IOBuffer()
+    Base.IRShow.show_ir(io, ir, Base.IRShow.default_config(ir; verbose_linetable=true))
+    seekstart(io)
+    @test count(contains(r"@ a{80}:\d+ within `my_fun28173"), eachline(io)) == 10
 end
 
 # Verify that extra instructions at the end of the IR
@@ -1825,8 +2060,8 @@ let src = code_typed(gcd, (Int, Int), debuginfo=:source)[1][1]
     ir = Core.Compiler.inflate_ir(src)
     push!(ir.stmts.inst, Core.Compiler.ReturnNode())
     lines = split(sprint(show, ir), '\n')
-    @test isempty(pop!(lines))
-    @test pop!(lines) == "   ! ──       unreachable::#UNDEF"
+    @test all(isspace, pop!(lines))
+    @test pop!(lines) == "   !!! ──       unreachable::#UNDEF"
 end
 
 @testset "printing and interpolating nothing" begin
@@ -1855,7 +2090,7 @@ end
 
 @testset """printing "Any" is not skipped with nested arrays""" begin
     @test replstr(Union{X28004,Vector}[X28004(Any[X28004(1)])], :compact => true) ==
-        "1-element Vector{Union{X28004, Vector{T} where T}}:\n X(Any[X(1)])"
+        "1-element Vector{Union{X28004, Vector}}:\n X(Any[X(1)])"
 end
 
 # Issue 25589 - Underlines in cmd printing
@@ -2011,10 +2246,117 @@ end
 end
 
 module M37012
+export AValue, B2, SimpleU
 struct AnInteger{S<:Integer} end
 struct AStruct{N} end
 const AValue{S} = Union{AStruct{S}, AnInteger{S}}
+struct BStruct{T,S} end
+const B2{S,T} = BStruct{T,S}
+const SimpleU = Union{AnInteger, AStruct, BStruct}
 end
 @test Base.make_typealias(M37012.AStruct{1}) === nothing
 @test isempty(Base.make_typealiases(M37012.AStruct{1})[1])
 @test string(M37012.AStruct{1}) == "$(curmod_prefix)M37012.AStruct{1}"
+@test string(Union{Nothing, Number, Vector}) == "Union{Nothing, Number, Vector}"
+@test string(Union{Nothing, Number, Vector{<:Integer}}) == "Union{Nothing, Number, Vector{<:Integer}}"
+@test string(Union{Nothing, AbstractVecOrMat}) == "Union{Nothing, AbstractVecOrMat}"
+@test string(Union{Nothing, AbstractVecOrMat{<:Integer}}) == "Union{Nothing, AbstractVecOrMat{<:Integer}}"
+@test string(M37012.BStruct{T, T} where T) == "$(curmod_prefix)M37012.B2{T, T} where T"
+@test string(M37012.BStruct{T, S} where {T<:Unsigned, S<:Signed}) == "$(curmod_prefix)M37012.B2{S, T} where {T<:Unsigned, S<:Signed}"
+@test string(M37012.BStruct{T, S} where {T<:Signed, S<:T}) == "$(curmod_prefix)M37012.B2{S, T} where {T<:Signed, S<:T}"
+@test string(Union{M37012.SimpleU, Nothing}) == "Union{Nothing, $(curmod_prefix)M37012.SimpleU}"
+@test string(Union{M37012.SimpleU, Nothing, T} where T) == "Union{Nothing, $(curmod_prefix)M37012.SimpleU, T} where T"
+@test string(Union{AbstractVector{T}, T} where T) == "Union{AbstractVector{T}, T} where T"
+@test string(Union{AbstractVector, T} where T) == "Union{AbstractVector, T} where T"
+
+@test sprint(show, :(./)) == ":((./))"
+@test sprint(show, :((.|).(.&, b))) == ":((.|).((.&), b))"
+
+@test sprint(show, :(a'ᵀ)) == ":(a'ᵀ)"
+@test sprint(show, :((+)')) == ":((+)')"
+for s in (Symbol("'"), Symbol("'⁻¹"))
+    @test Base.isoperator(s)
+    @test !Base.isunaryoperator(s)
+    @test !Base.isbinaryoperator(s)
+    @test Base.ispostfixoperator(s)
+end
+
+@testset "method printing with non-standard identifiers ($mime)" for mime in (
+    MIME("text/plain"), MIME("text/html"),
+)
+    _show(io, x) = show(io, MIME(mime), x)
+
+    @eval var","(x) = x
+    @test occursin("var\",\"(x)", sprint(_show, methods(var",")))
+
+    @eval f1(var"a.b") = 3
+    @test occursin("f1(var\"a.b\")", sprint(_show, methods(f1)))
+
+    italic(s) = mime == MIME("text/html") ? "<i>$s</i>" : s
+
+    @eval f2(; var"123") = 5
+    @test occursin("f2(; $(italic("var\"123\"")))", sprint(_show, methods(f2)))
+
+    @eval f3(; var"%!"...) = 7
+    @test occursin("f3(; $(italic("var\"%!\"...")))", sprint(_show, methods(f3)))
+
+    @eval f4(; var"...") = 9
+    @test_broken occursin("f4(; $(italic("var\"...\"")))", sprint(_show, methods(f4)))
+end
+
+@testset "printing of syntactic operators" begin
+    @test sprint(show, :(var"::" + var"$")) == ":(var\"::\" + (\$))"
+    @test sprint(show, :(!var"...")) == ":(!var\"...\")"
+    @test sprint(show, :(var"'ᵀ" - 1)) == ":(var\"'ᵀ\" - 1)"
+    @test sprint(show, :(::)) == ":(::)"
+    @test sprint(show, :?) == ":?"
+    @test sprint(show, :(var"?" + var"::" + var"'")) == ":(var\"?\" + var\"::\" + var\"'\")"
+end
+
+@testset "printing of function types" begin
+    s = sprint(show, MIME("text/plain"), typeof(sin))
+    @test s == "typeof(sin) (singleton type of function sin, subtype of Function)"
+    s = sprint(show, MIME("text/plain"), ModFWithParams.f_with_params)
+    @test endswith(s, "ModFWithParams.f_with_params")
+    s = sprint(show, MIME("text/plain"), ModFWithParams.f_with_params{2})
+    @test endswith(s, "ModFWithParams.f_with_params{2}")
+    s = sprint(show, MIME("text/plain"), UnionAll)
+    @test s == "UnionAll"
+    s = sprint(show, MIME("text/plain"), Function)
+    @test s == "Function"
+end
+
+@testset "printing inline n-dimensional arrays and one-column matrices" begin
+    @test replstr([Int[1 2 3 ;;; 4 5 6]]) == "1-element Vector{Array{$Int, 3}}:\n [1 2 3;;; 4 5 6]"
+    @test replstr([Int[1 2 3 ;;; 4 5 6;;;;]]) == "1-element Vector{Array{$Int, 4}}:\n [1 2 3;;; 4 5 6;;;;]"
+    @test replstr([fill(1, (20,20,20))]) == "1-element Vector{Array{$Int, 3}}:\n [1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; … ;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1;;; 1 1 … 1 1; 1 1 … 1 1; … ; 1 1 … 1 1; 1 1 … 1 1]"
+    @test replstr([fill(1, 5, 1)]) == "1-element Vector{Matrix{$Int}}:\n [1; 1; … ; 1; 1;;]"
+    @test replstr([fill(1, 5, 2)]) == "1-element Vector{Matrix{$Int}}:\n [1 1; 1 1; … ; 1 1; 1 1]"
+    @test replstr([[1;]]) == "1-element Vector{Vector{$Int}}:\n [1]"
+    @test replstr([[1;;]]) == "1-element Vector{Matrix{$Int}}:\n [1;;]"
+    @test replstr([[1;;;]]) == "1-element Vector{Array{$Int, 3}}:\n [1;;;]"
+end
+
+@testset "ncat and nrow" begin
+    @test_repr "[1;;]"
+    @test_repr "[1;;;]"
+    @test_repr "[1;; 2]"
+    @test_repr "[1;;; 2]"
+    @test_repr "[1;;; 2 3;;; 4]"
+    @test_repr "[1;;; 2;;;; 3;;; 4]"
+
+    @test_repr "T[1;;]"
+    @test_repr "T[1;;;]"
+    @test_repr "T[1;; 2]"
+    @test_repr "T[1;;; 2]"
+    @test_repr "T[1;;; 2 3;;; 4]"
+    @test_repr "T[1;;; 2;;;; 3;;; 4]"
+end
+
+@testset "Cmd" begin
+    @test sprint(show, `true`) == "`true`"
+    @test sprint(show, setenv(`true`, "A" => "B")) == """setenv(`true`,["A=B"])"""
+    @test sprint(show, setcpuaffinity(`true`, [1, 2])) == "setcpuaffinity(`true`, [1, 2])"
+    @test sprint(show, setenv(setcpuaffinity(`true`, [1, 2]), "A" => "B")) ==
+          """setenv(setcpuaffinity(`true`, [1, 2]),["A=B"])"""
+end
diff --git a/test/simdloop.jl b/test/simdloop.jl
index 1920cfa6140b1d..88e41364ef2221 100644
--- a/test/simdloop.jl
+++ b/test/simdloop.jl
@@ -92,19 +92,6 @@ import Base.SimdLoop.SimdError
 
 # Test that @simd rejects inner loop body with invalid control flow statements
 # issue #8613
-macro test_throws(ty, ex)
-    return quote
-        Test.@test_throws $(esc(ty)) try
-            $(esc(ex))
-        catch err
-            @test err isa LoadError
-            @test err.file === $(string(__source__.file))
-            @test err.line === $(__source__.line + 1)
-            rethrow(err.error)
-        end
-    end
-end
-
 @test_throws SimdError("break is not allowed inside a @simd loop body") @macroexpand begin
     @simd for x = 1:10
         x == 1 && break
diff --git a/test/smallarrayshrink.jl b/test/smallarrayshrink.jl
new file mode 100644
index 00000000000000..a1a7df5aee5a5a
--- /dev/null
+++ b/test/smallarrayshrink.jl
@@ -0,0 +1,45 @@
+@testset "shrink small array" begin
+    x = [1, 2, 3, 4]
+    @test x[1] == 1
+    @test x[2] == 2
+    @test x[3] == 3
+    @test x[4] == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+    sizehint!(x, 10000)
+    @test x[1] == 1
+    @test x[2] == 2
+    @test x[3] == 3
+    @test x[4] == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 10000
+    sizehint!(x, 4)
+    @test x[1] == 1
+    @test x[2] == 2
+    @test x[3] == 3
+    @test x[4] == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+
+    x = [1, 2, 3, 4]
+    @test x[1] == 1
+    @test x[2] == 2
+    @test x[3] == 3
+    @test x[4] == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+    sizehint!(x, 1000000)
+    @test x[1] == 1
+    @test x[2] == 2
+    @test x[3] == 3
+    @test x[4] == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 1000000
+    sizehint!(x, 4)
+    @test x[1] == 1
+    @test x[2] == 2
+    @test x[3] == 3
+    @test x[4] == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 0) == 4
+    @test ccall(:jl_array_size, Int, (Any, UInt), x, 1) == 4
+end
diff --git a/test/some.jl b/test/some.jl
index 224eb8600814ca..27d50ca354a494 100644
--- a/test/some.jl
+++ b/test/some.jl
@@ -79,6 +79,21 @@
     @test something(missing, nothing, missing) === missing
 end
 
+@testset "@something" begin
+    @test_throws ArgumentError @something()
+    @test_throws ArgumentError @something(nothing)
+    @test @something(1) === 1
+    @test @something(Some(nothing)) === nothing
+
+    @test @something(1, error("failed")) === 1
+    @test_throws ErrorException @something(nothing, error("failed"))
+
+    # Ensure that the internal variable doesn't conflict with a user defined variable
+    @test let val = 1
+        @something(val)
+    end == 1
+end
+
 # issue #26927
 a = [missing, nothing, Some(nothing), Some(missing)]
 @test a isa Vector{Union{Missing, Nothing, Some}}
diff --git a/test/sorting.jl b/test/sorting.jl
index ad5438514261e4..2cb4eec93b380d 100644
--- a/test/sorting.jl
+++ b/test/sorting.jl
@@ -35,6 +35,8 @@ end
     @test sort([2,3,1], rev=true) == [3,2,1] == sort([2,3,1], order=Reverse)
     @test sort(['z':-1:'a';]) == ['a':'z';]
     @test sort(['a':'z';], rev=true) == ['z':-1:'a';]
+    @test sort(OffsetVector([3,1,2], -2)) == OffsetVector([1,2,3], -2)
+    @test sort(OffsetVector([3.0,1.0,2.0], 2), rev=true) == OffsetVector([3.0,2.0,1.0], 2)
 end
 
 @testset "sortperm" begin
@@ -46,6 +48,8 @@ end
         @test r === s
     end
     @test_throws ArgumentError sortperm!(view([1,2,3,4], 1:4), [2,3,1])
+    @test sortperm(OffsetVector([8.0,-2.0,0.5], -4)) == OffsetVector([-2, -1, -3], -4)
+    @test sortperm!(Int32[1,2], [2.0, 1.0]) == Int32[2, 1]
 end
 
 @testset "misc sorting" begin
@@ -53,6 +57,8 @@ end
     @test issorted([1,2,3])
     @test reverse([2,3,1]) == [1,3,2]
     @test sum(randperm(6)) == 21
+    @test length(reverse(0x1:0x2)) == 2
+    @test issorted(sort(rand(UInt64(1):UInt64(2), 7); rev=true); rev=true) # issue #43034
 end
 
 @testset "partialsort" begin
@@ -105,7 +111,7 @@ end
         @test searchsorted(fill(R(1), 15), T(1), 6, 10, Forward) == 6:10
     end
 
-    for (rg,I) in [(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
+    for (rg,I) in Any[(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
         rg_r = reverse(rg)
         rgv, rgv_r = [rg;], [rg_r;]
         for i = I
@@ -142,9 +148,29 @@ end
         @test searchsortedlast(500:1.0:600, 1.0e20) == 101
     end
 
+    @testset "issue 10966" begin
+        for R in numTypes, T in numTypes
+            @test searchsortedfirst(R(2):R(2), T(0)) == 1
+            @test searchsortedfirst(R(2):R(2), T(2)) == 1
+            @test searchsortedfirst(R(2):R(2), T(3)) == 2
+            @test searchsortedfirst(R(1):1//2:R(5), T(0)) == 1
+            @test searchsortedfirst(R(1):1//2:R(5), T(2)) == 3
+            @test searchsortedfirst(R(1):1//2:R(5), T(6)) == 10
+            @test searchsortedlast(R(2):R(2), T(0)) == 0
+            @test searchsortedlast(R(2):R(2), T(2)) == 1
+            @test searchsortedlast(R(2):R(2), T(3)) == 1
+            @test searchsortedlast(R(1):1//2:R(5), T(0)) == 0
+            @test searchsortedlast(R(1):1//2:R(5), T(2)) == 3
+            @test searchsortedlast(R(1):1//2:R(5), T(6)) == 9
+            @test searchsorted(R(2):R(2), T(0)) === 1:0
+            @test searchsorted(R(2):R(2), T(2)) == 1:1
+            @test searchsorted(R(2):R(2), T(3)) === 2:1
+        end
+    end
+
     @testset "issue 32568" begin
         for R in numTypes, T in numTypes
-            for arr in [R[1:5;], R(1):R(5), R(1):2:R(5)]
+            for arr in Any[R[1:5;], R(1):R(5), R(1):2:R(5)]
                 @test eltype(searchsorted(arr, T(2))) == keytype(arr)
                 @test eltype(searchsorted(arr, T(2), big(1), big(4), Forward)) == keytype(arr)
                 @test searchsortedfirst(arr, T(2)) isa keytype(arr)
@@ -164,29 +190,44 @@ end
         @test searchsorted([1,2], Inf) === 3:2
         @test searchsorted(1:2,   Inf) === 3:2
 
-        for coll in [
+        for coll in Any[
                 Base.OneTo(10),
                 1:2,
+                0x01:0x02,
                 -4:6,
                 5:2:10,
                 [1,2],
                 1.0:4,
                 [10.0,20.0],
             ]
-            for huge in [Inf, 1e300]
+            for huge in Any[Inf, 1e300, typemax(Int64), typemax(UInt64)]
                 @test searchsortedfirst(coll, huge) === lastindex(coll) + 1
-                @test searchsortedfirst(coll, -huge)=== firstindex(coll)
                 @test searchsortedlast(coll, huge)  === lastindex(coll)
-                @test searchsortedlast(coll, -huge) === firstindex(coll) - 1
                 @test searchsorted(coll, huge)      === lastindex(coll)+1 : lastindex(coll)
-                @test searchsorted(coll, -huge)     === firstindex(coll) : firstindex(coll) - 1
-
-                @test searchsortedfirst(reverse(coll), huge, rev=true) === firstindex(coll)
-                @test searchsortedfirst(reverse(coll), -huge, rev=true) === lastindex(coll) + 1
-                @test searchsortedlast(reverse(coll), huge, rev=true) === firstindex(coll) - 1
-                @test searchsortedlast(reverse(coll), -huge, rev=true) === lastindex(coll)
-                @test searchsorted(reverse(coll), huge, rev=true) === firstindex(coll):firstindex(coll) - 1
-                @test searchsorted(reverse(coll), -huge, rev=true) === lastindex(coll)+1:lastindex(coll)
+                if !(eltype(coll) <: Unsigned)
+                    @test searchsortedfirst(reverse(coll), huge, rev=true) === firstindex(coll)
+                    @test searchsortedlast(reverse(coll), huge, rev=true) === firstindex(coll) - 1
+                    @test searchsorted(reverse(coll), huge, rev=true) === firstindex(coll):firstindex(coll) - 1
+                end
+
+                if !(huge isa Unsigned)
+                    @test searchsortedfirst(coll, -huge)=== firstindex(coll)
+                    @test searchsortedlast(coll, -huge) === firstindex(coll) - 1
+                    @test searchsorted(coll, -huge)     === firstindex(coll) : firstindex(coll) - 1
+                    if !(eltype(coll) <: Unsigned)
+                        @test searchsortedfirst(reverse(coll), -huge, rev=true) === lastindex(coll) + 1
+                        @test searchsortedlast(reverse(coll), -huge, rev=true) === lastindex(coll)
+                        @test searchsorted(reverse(coll), -huge, rev=true) === lastindex(coll)+1:lastindex(coll)
+                    end
+                end
+            end
+        end
+
+        @testset "issue #34408" begin
+            r = 1f8-10:1f8
+            # collect(r) = Float32[9.999999e7, 9.999999e7, 9.999999e7, 9.999999e7, 1.0e8, 1.0e8, 1.0e8, 1.0e8, 1.0e8]
+            for i in r
+                @test_broken searchsorted(collect(r), i) == searchsorted(r, i)
             end
         end
     end
@@ -224,7 +265,8 @@ Base.step(r::ConstantRange) = 0
     @test searchsortedlast(r, UInt(1), Forward) == 5
 
     a = rand(1:10000, 1000)
-    for alg in [InsertionSort, MergeSort]
+    for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE]
+
         b = sort(a, alg=alg)
         @test issorted(b)
 
@@ -289,27 +331,97 @@ Base.step(r::ConstantRange) = 0
     end
 
     @testset "unstable algorithms" begin
-        for alg in [QuickSort, PartialQuickSort(length(a))]
+        for alg in [QuickSort, Base.DEFAULT_UNSTABLE]
             b = sort(a, alg=alg)
             @test issorted(b)
+            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a))))
             b = sort(a, alg=alg, rev=true)
             @test issorted(b, rev=true)
+            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), rev=true))
             b = sort(a, alg=alg, by=x->1/x)
             @test issorted(b, by=x->1/x)
+            @test last(b) == last(sort(a, alg=PartialQuickSort(length(a)), by=x->1/x))
+        end
+    end
+end
+@testset "insorted" begin
+    numTypes = [Int8,  Int16,  Int32,  Int64,  Int128,
+                UInt8, UInt16, UInt32, UInt64, UInt128,
+                Float16, Float32, Float64, BigInt, BigFloat]
+
+    @test insorted(1, collect(1:10), by=(>=(5)))
+    @test insorted(10, collect(1:10), by=(>=(5)))
+
+    for R in numTypes, T in numTypes
+        @test !insorted(T(0), R[1, 1, 2, 2, 3, 3])
+        @test insorted(T(1), R[1, 1, 2, 2, 3, 3])
+        @test insorted(T(2), R[1, 1, 2, 2, 3, 3])
+        @test !insorted(T(4), R[1, 1, 2, 2, 3, 3])
+        @test !insorted(2.5, R[1, 1, 2, 2, 3, 3])
+
+        @test !insorted(T(0), 1:3)
+        @test insorted(T(1), 1:3)
+        @test insorted(T(2), 1:3)
+        @test !insorted(T(4), 1:3)
+
+        @test insorted(T(1), R.(collect(1:10)), by=(>=(5)))
+        @test insorted(T(10), R.(collect(1:10)), by=(>=(5)))
+    end
+
+    for (rg,I) in Any[(49:57,47:59), (1:2:17,-1:19), (-3:0.5:2,-5:.5:4)]
+        rg_r = reverse(rg)
+        rgv, rgv_r = collect(rg), collect(rg_r)
+        for i = I
+            @test insorted(i,rg) === insorted(i,rgv)
+            @test insorted(i,rg_r) === insorted(i,rgv_r,rev=true)
         end
     end
+
+    rg = 0.0:0.01:1.0
+    for i = 2:101
+        @test insorted(rg[i], rg)
+        @test !insorted(prevfloat(rg[i]), rg)
+        @test !insorted(nextfloat(rg[i]), rg)
+    end
+
+    rg_r = reverse(rg)
+    for i = 1:100
+        @test insorted(rg_r[i], rg_r)
+        @test !insorted(prevfloat(rg_r[i]), rg_r)
+        @test !insorted(nextfloat(rg_r[i]), rg_r)
+    end
+
+    @test insorted(1, 1:10) == insorted(1, collect(1:10), by=(>=(5)))
+    @test insorted(10, 1:10) == insorted(10, collect(1:10), by=(>=(5)))
+
+    @test !insorted(0, [])
+    @test !insorted(0, [1,2,3])
+    @test !insorted(4, [1,2,3])
+    @test insorted(3, [10,8,6,9,4,7,2,5,3,1], by=(x -> iseven(x) ? x+5 : x), rev=true)
 end
 @testset "PartialQuickSort" begin
     a = rand(1:10000, 1000)
     # test PartialQuickSort only does a partial sort
+    let alg = PartialQuickSort(1:div(length(a), 10))
+        k = alg.k
+        b = sort(a, alg=alg)
+        c = sort(a, alg=alg, by=x->1/x)
+        d = sort(a, alg=alg, rev=true)
+        @test issorted(b[k])
+        @test issorted(c[k], by=x->1/x)
+        @test issorted(d[k], rev=true)
+        @test !issorted(b)
+        @test !issorted(c, by=x->1/x)
+        @test !issorted(d, rev=true)
+    end
     let alg = PartialQuickSort(div(length(a), 10))
         k = alg.k
         b = sort(a, alg=alg)
         c = sort(a, alg=alg, by=x->1/x)
         d = sort(a, alg=alg, rev=true)
-        @test issorted(b[1:k])
-        @test issorted(c[1:k], by=x->1/x)
-        @test issorted(d[1:k], rev=true)
+        @test b[k] == sort(a)[k]
+        @test c[k] == sort(a, by=x->1/x)[k]
+        @test d[k] == sort(a, rev=true)[k]
         @test !issorted(b)
         @test !issorted(c, by=x->1/x)
         @test !issorted(d, rev=true)
@@ -355,9 +467,10 @@ end
             @test c == v
 
             # stable algorithms
-            for alg in [MergeSort]
+            for alg in [MergeSort, Base.DEFAULT_STABLE]
                 p = sortperm(v, alg=alg, rev=rev)
-                @test p == sortperm(float(v), alg=alg, rev=rev)
+                p2 = sortperm(float(v), alg=alg, rev=rev)
+                @test p == p2
                 @test p == pi
                 s = copy(v)
                 permute!(s, p)
@@ -367,9 +480,10 @@ end
             end
 
             # unstable algorithms
-            for alg in [QuickSort, PartialQuickSort(n)]
+            for alg in [QuickSort, PartialQuickSort(1:n), Base.DEFAULT_UNSTABLE]
                 p = sortperm(v, alg=alg, rev=rev)
-                @test p == sortperm(float(v), alg=alg, rev=rev)
+                p2 = sortperm(float(v), alg=alg, rev=rev)
+                @test p == p2
                 @test isperm(p)
                 @test v[p] == si
                 s = copy(v)
@@ -378,12 +492,29 @@ end
                 invpermute!(s, p)
                 @test s == v
             end
+            for alg in [PartialQuickSort(n)]
+                p = sortperm(v, alg=alg, rev=rev)
+                p2 = sortperm(float(v), alg=alg, rev=rev)
+                if n == 0
+                    @test isempty(p) && isempty(p2)
+                else
+                    @test p[n] == p2[n]
+                    @test v[p][n] == si[n]
+                    @test isperm(p)
+                    s = copy(v)
+                    permute!(s, p)
+                    @test s[n] == si[n]
+                    invpermute!(s, p)
+                    @test s == v
+                end
+            end
         end
 
         v = randn_with_nans(n,0.1)
         # TODO: alg = PartialQuickSort(n) fails here
-        for alg in [InsertionSort, QuickSort, MergeSort],
+        for alg in [InsertionSort, QuickSort, MergeSort, Base.DEFAULT_UNSTABLE, Base.DEFAULT_STABLE],
             rev in [false,true]
+            alg === InsertionSort && n >= 3000 && continue
             # test float sorting with NaNs
             s = sort(v, alg=alg, rev=rev)
             @test issorted(s, rev=rev)
@@ -443,7 +574,7 @@ end
         @test all(issorted, [sp[inds.==x] for x in 1:200])
     end
 
-    for alg in [InsertionSort, MergeSort]
+    for alg in [InsertionSort, MergeSort, Base.DEFAULT_STABLE]
         sp = sortperm(inds, alg=alg)
         @test all(issorted, [sp[inds.==x] for x in 1:200])
     end
@@ -542,4 +673,145 @@ end
     @test issorted(a)
 end
 
+@testset "sort!(::OffsetMatrix; dims)" begin
+    x = OffsetMatrix(rand(5,5), 5, -5)
+    sort!(x; dims=1)
+    for i in axes(x, 2)
+        @test issorted(x[:,i])
+    end
+end
+
+@testset "searchsortedfirst/last with generalized indexing" begin
+    o = OffsetVector(1:3, -2)
+    @test searchsortedfirst(o, 4) == lastindex(o) + 1
+    @test searchsortedfirst(o, 1.5) == 0
+    @test searchsortedlast(o, 0) == firstindex(o) - 1
+    @test searchsortedlast(o, 1.5) == -1
+end
+
+function adaptive_sort_test(v; trusted=InsertionSort, kw...)
+    sm = sum(hash.(v))
+    truth = sort!(deepcopy(v); alg=trusted, kw...)
+    return (
+        v === sort!(v; kw...) &&
+        issorted(v; kw...) &&
+        sum(hash.(v)) == sm &&
+        all(v .=== truth))
+end
+@testset "AdaptiveSort" begin
+    len = 70
+
+    @testset "Bool" begin
+        @test sort([false, true, false]) == [false, false, true]
+        @test sort([false, true, false], by=x->0) == [false, true, false]
+        @test sort([false, true, false], rev=true) == [true, false, false]
+    end
+
+    @testset "fallback" begin
+        @test adaptive_sort_test(rand(1:typemax(Int32), len), by=x->x^2)# fallback
+        @test adaptive_sort_test(rand(Int, len), by=x->0, trusted=QuickSort)
+    end
+
+    @test adaptive_sort_test(rand(Int, 20)) # InsertionSort
+
+    @testset "large eltype" begin
+        for rev in [true, false]
+            @test adaptive_sort_test(rand(Int128, len), rev=rev) # direct ordered int
+            @test adaptive_sort_test(fill(rand(UInt128), len), rev=rev) # all same
+            @test adaptive_sort_test(rand(Int128.(1:len), len), rev=rev) # short int range
+        end
+    end
+
+    @test adaptive_sort_test(fill(rand(), len)) # All same
+
+    @testset "count sort" begin
+        @test adaptive_sort_test(rand(1:20, len))
+        @test adaptive_sort_test(rand(1:20, len), rev=true)
+    end
+
+    @testset "post-serialization count sort" begin
+        v = reinterpret(Float64, rand(1:20, len))
+        @test adaptive_sort_test(copy(v))
+        @test adaptive_sort_test(copy(v), rev=true)
+    end
+
+    @testset "presorted" begin
+        @test adaptive_sort_test(sort!(rand(len)))
+        @test adaptive_sort_test(sort!(rand(Float32, len), rev=true))
+        @test adaptive_sort_test(vcat(sort!(rand(Int16, len)), Int16(0)))
+        @test adaptive_sort_test(vcat(sort!(rand(UInt64, len), rev=true), 0))
+    end
+
+    @testset "lenm1 < 3bits fallback" begin
+        @test adaptive_sort_test(rand(len)) # InsertionSort
+        @test adaptive_sort_test(rand(130)) # QuickSort
+    end
+
+    @test adaptive_sort_test(rand(1000)) # RadixSort
+end
+
+@testset "uint mappings" begin
+
+    #Construct value lists
+    floats = [T[-π, -1.0, -1/π, 1/π, 1.0, π, -0.0, 0.0, Inf, -Inf, NaN, -NaN,
+                prevfloat(T(0)), nextfloat(T(0)), prevfloat(T(Inf)), nextfloat(T(-Inf))]
+        for T in [Float16, Float32, Float64]]
+
+    ints = [T[17, -T(17), 0, -one(T), 1, typemax(T), typemin(T), typemax(T)-1, typemin(T)+1]
+        for T in Base.BitInteger_types]
+
+    char = Char['\n', ' ', Char(0), Char(8), Char(17), typemax(Char)]
+
+    vals = vcat(floats, ints, [char])
+
+    #Add random values
+    UIntN(::Val{1}) = UInt8
+    UIntN(::Val{2}) = UInt16
+    UIntN(::Val{4}) = UInt32
+    UIntN(::Val{8}) = UInt64
+    UIntN(::Val{16}) = UInt128
+    map(vals) do x
+        T = eltype(x)
+        U = UIntN(Val(sizeof(T)))
+        append!(x, rand(T, 4))
+        append!(x, reinterpret.(T, rand(U, 4)))
+        if T <: AbstractFloat
+            mask = reinterpret(U, T(NaN))
+            append!(x, reinterpret.(T, mask .| rand(U, 4)))
+        end
+    end
+
+    for x in vals
+        T = eltype(x)
+        U = UIntN(Val(sizeof(T)))
+        for order in [Forward, Reverse, Base.Sort.Float.Left(), Base.Sort.Float.Right(), By(Forward, identity)]
+            if order isa Base.Order.By || T === Float16 ||
+                ((T <: AbstractFloat) == (order isa DirectOrdering))
+                @test Base.Sort.UIntMappable(T, order) === nothing
+                continue
+            end
+
+            @test Base.Sort.UIntMappable(T, order) === U
+            x2 = deepcopy(x)
+            u = Base.Sort.uint_map!(x2, 1, length(x), order)
+            @test eltype(u) === U
+            @test all(Base.Sort.uint_map.(x, (order,)) .=== u)
+            mn = rand(U)
+            u .-= mn
+            @test x2 === Base.Sort.uint_unmap!(x2, u, 1, length(x), order, mn)
+            @test all(x2 .=== x)
+
+            for a in x
+                for b in x
+                    if order === Base.Sort.Float.Left() || order === Base.Sort.Float.Right()
+                        # Left and Right orderings guarantee homogeneous sign and no NaNs
+                        (isnan(a) || isnan(b) || signbit(a) != signbit(b)) && continue
+                    end
+                    @test Base.Order.lt(order, a, b) === Base.Order.lt(Forward, Base.Sort.uint_map(a, order), Base.Sort.uint_map(b, order))
+                end
+            end
+        end
+    end
+end
+
 end
diff --git a/test/spawn.jl b/test/spawn.jl
index 189a750855ba71..a8a2af40643ff0 100644
--- a/test/spawn.jl
+++ b/test/spawn.jl
@@ -5,6 +5,7 @@
 ###################################
 
 using Random, Sockets
+using Downloads: download
 
 valgrind_off = ccall(:jl_running_on_valgrind, Cint, ()) == 0
 
@@ -54,8 +55,8 @@ out = read(`$echocmd hello` & `$echocmd world`, String)
 
 @test (run(`$printfcmd "       \033[34m[stdio passthrough ok]\033[0m\n"`); true)
 
-# Test for SIGPIPE being treated as normal termination (throws an error if broken)
-Sys.isunix() && run(pipeline(yescmd, `head`, devnull))
+# Test for SIGPIPE being a failure condition
+@test_throws ProcessFailedException run(pipeline(yescmd, `head`, devnull))
 
 let p = run(pipeline(yescmd, devnull), wait=false)
     t = @async kill(p)
@@ -214,13 +215,13 @@ let r, t, sock
 end
 
 # issue #4535
-exename = Base.julia_cmd()
+exename = `$(Base.julia_cmd()) --startup-file=no --color=no`
 if valgrind_off
     # If --trace-children=yes is passed to valgrind, we will get a
     # valgrind banner here, not "Hello World\n".
-    @test read(pipeline(`$exename --startup-file=no -e 'println(stderr,"Hello World")'`, stderr=catcmd), String) == "Hello World\n"
+    @test read(pipeline(`$exename -e 'println(stderr,"Hello World")'`, stderr=catcmd), String) == "Hello World\n"
     out = Pipe()
-    proc = run(pipeline(`$exename --startup-file=no -e 'println(stderr,"Hello World")'`, stderr = out), wait=false)
+    proc = run(pipeline(`$exename -e 'println(stderr,"Hello World")'`, stderr = out), wait=false)
     close(out.in)
     @test read(out, String) == "Hello World\n"
     @test success(proc)
@@ -228,7 +229,7 @@ end
 
 # setup_stdio for AbstractPipe
 let out = Pipe(),
-    proc = run(pipeline(`$exename --startup-file=no -e 'println(getpid())'`, stdout=IOContext(out, :foo => :bar)), wait=false)
+    proc = run(pipeline(`$exename -e 'println(getpid())'`, stdout=IOContext(out, :foo => :bar)), wait=false)
     # < don't block here before getpid call >
     pid = getpid(proc)
     close(out.in)
@@ -260,6 +261,74 @@ end
     end
 end
 
+@testset "redirect_stdio" begin
+
+    function hello_err_out()
+        println(stderr, "hello from stderr")
+        println(stdout, "hello from stdout")
+    end
+    @testset "same path for multiple streams" begin
+        @test_throws ArgumentError redirect_stdio(hello_err_out,
+                                            stdin="samepath.txt", stdout="samepath.txt")
+        @test_throws ArgumentError redirect_stdio(hello_err_out,
+                                            stdin="samepath.txt", stderr="samepath.txt")
+
+        @test_throws ArgumentError redirect_stdio(hello_err_out,
+                                            stdin=joinpath("tricky", "..", "samepath.txt"),
+                                            stderr="samepath.txt")
+        mktempdir() do dir
+            path = joinpath(dir, "stdouterr.txt")
+            redirect_stdio(hello_err_out, stdout=path, stderr=path)
+            @test read(path, String) == """
+            hello from stderr
+            hello from stdout
+            """
+        end
+    end
+
+    mktempdir() do dir
+        path_stdout = joinpath(dir, "stdout.txt")
+        path_stderr = joinpath(dir, "stderr.txt")
+        redirect_stdio(hello_err_out, stderr=devnull, stdout=path_stdout)
+        @test read(path_stdout, String) == "hello from stdout\n"
+
+        open(path_stderr, "w") do ioerr
+            redirect_stdio(hello_err_out, stderr=ioerr, stdout=devnull)
+        end
+        @test read(path_stderr, String) == "hello from stderr\n"
+    end
+
+    mktempdir() do dir
+        path_stderr = joinpath(dir, "stderr.txt")
+        path_stdin  = joinpath(dir, "stdin.txt")
+        path_stdout = joinpath(dir, "stdout.txt")
+
+        content_stderr = randstring()
+        content_stdout = randstring()
+
+        redirect_stdio(stdout=path_stdout, stderr=path_stderr) do
+            print(content_stdout)
+            print(stderr, content_stderr)
+        end
+
+        @test read(path_stderr, String) == content_stderr
+        @test read(path_stdout, String) == content_stdout
+    end
+
+    # stdin is unavailable on the workers. Run test on master.
+    ret = Core.eval(Main,
+            quote
+                remotecall_fetch(1) do
+                    mktempdir() do dir
+                        path = joinpath(dir, "stdin.txt")
+                        write(path, "hello from stdin\n")
+                        redirect_stdio(readline, stdin=path)
+                    end
+                end
+            end)
+    @test ret == "hello from stdin"
+end
+
 # issue #36136
 @testset "redirect to devnull" begin
     @test redirect_stdout(devnull) do; println("Hello") end === nothing
@@ -297,7 +366,7 @@ let fname = tempname(), p
     import Base.zzzInvalidIdentifier
     """
     try
-        io = open(pipeline(`$exename --startup-file=no`, stderr=stderr), "w")
+        io = open(pipeline(exename, stderr=stderr), "w")
         write(io, cmd)
         close(io)
         wait(io)
@@ -317,7 +386,7 @@ let bad = "bad\0name"
 end
 
 # issue #12829
-let out = Pipe(), echo = `$exename --startup-file=no -e 'print(stdout, " 1\t", read(stdin, String))'`, ready = Condition(), t, infd, outfd
+let out = Pipe(), echo = `$exename -e 'print(stdout, " 1\t", read(stdin, String))'`, ready = Condition(), t, infd, outfd
     @test_throws ArgumentError write(out, "not open error")
     inread = false
     t = @async begin # spawn writer task
@@ -398,7 +467,7 @@ let fname = tempname()
         run(cmd)
     end
     """
-    @test success(pipeline(`$catcmd $fname`, `$exename --startup-file=no -e $code`))
+    @test success(pipeline(`$catcmd $fname`, `$exename -e $code`))
     rm(fname)
 end
 
@@ -510,8 +579,8 @@ end
 @test_throws ArgumentError run(Base.AndCmds(`$truecmd`, ``))
 
 # tests for reducing over collection of Cmd
-@test_throws ArgumentError reduce(&, Base.AbstractCmd[])
-@test_throws ArgumentError reduce(&, Base.Cmd[])
+@test_throws "reducing over an empty collection is not allowed" reduce(&, Base.AbstractCmd[])
+@test_throws "reducing over an empty collection is not allowed" reduce(&, Base.Cmd[])
 @test reduce(&, [`$echocmd abc`, `$echocmd def`, `$echocmd hij`]) == `$echocmd abc` & `$echocmd def` & `$echocmd hij`
 
 # readlines(::Cmd), accidentally broken in #20203
@@ -519,7 +588,7 @@ end
 
 # issue #19864 (PR #20497)
 let c19864 = readchomp(pipeline(ignorestatus(
-        `$exename --startup-file=no -e '
+        `$exename -e '
             struct Error19864 <: Exception; end
             Base.showerror(io::IO, e::Error19864) = print(io, "correct19864")
             throw(Error19864())'`),
@@ -530,6 +599,7 @@ end
 # accessing the command elements as an array or iterator:
 let c = `ls -l "foo bar"`
     @test collect(c) == ["ls", "-l", "foo bar"]
+    @test collect(Iterators.reverse(c)) == reverse!(["ls", "-l", "foo bar"])
     @test first(c) == "ls" == c[1]
     @test last(c) == "foo bar" == c[3] == c[end]
     @test c[1:2] == ["ls", "-l"]
@@ -572,7 +642,7 @@ end
 
 # Logging macros should not output to finalized streams (#26687)
 let
-    cmd = `$exename --startup-file=no -e 'finalizer(x->@info(x), "Hello")'`
+    cmd = `$exename -e 'finalizer(x->@info(x), "Hello")'`
     output = readchomp(pipeline(cmd, stderr=catcmd))
     @test occursin("Info: Hello", output)
 end
@@ -581,8 +651,8 @@ end
 psep = if Sys.iswindows() ";" else ":" end
 withenv("PATH" => "$(Sys.BINDIR)$(psep)$(ENV["PATH"])") do
     julia_exe = joinpath(Sys.BINDIR, Base.julia_exename())
-    @test Sys.which("julia") == realpath(julia_exe)
-    @test Sys.which(julia_exe) == realpath(julia_exe)
+    @test Sys.which(Base.julia_exename()) == abspath(julia_exe)
+    @test Sys.which(julia_exe) == abspath(julia_exe)
 end
 
 # Check that which behaves correctly when passed an empty string
@@ -597,8 +667,8 @@ mktempdir() do dir
         touch(foo_path)
         chmod(foo_path, 0o777)
         if !Sys.iswindows()
-            @test Sys.which("foo") == realpath(foo_path)
-            @test Sys.which(foo_path) == realpath(foo_path)
+            @test Sys.which("foo") == abspath(foo_path)
+            @test Sys.which(foo_path) == abspath(foo_path)
 
             chmod(foo_path, 0o666)
             @test Sys.which("foo") === nothing
@@ -635,20 +705,20 @@ mktempdir() do dir
         touch(foo2_path)
         chmod(foo1_path, 0o777)
         chmod(foo2_path, 0o777)
-        @test Sys.which("foo") == realpath(foo1_path)
+        @test Sys.which("foo") == abspath(foo1_path)
 
         # chmod() doesn't change which() on Windows, so don't bother to test that
         if !Sys.iswindows()
             chmod(foo1_path, 0o666)
-            @test Sys.which("foo") == realpath(foo2_path)
+            @test Sys.which("foo") == abspath(foo2_path)
             chmod(foo1_path, 0o777)
         end
 
         if Sys.iswindows()
             # On windows, check that pwd() takes precedence, except when we provide a path
             cd(joinpath(dir, "bin2")) do
-                @test Sys.which("foo") == realpath(foo2_path)
-                @test Sys.which(foo1_path) == realpath(foo1_path)
+                @test Sys.which("foo") == abspath(foo2_path)
+                @test Sys.which(foo1_path) == abspath(foo1_path)
             end
         end
 
@@ -661,7 +731,9 @@ mktempdir() do dir
         touch(bar_path)
         chmod(bar_path, 0o777)
         cd(dir) do
-            @test Sys.which(joinpath("bin1", "bar")) == realpath(bar_path)
+            p = Sys.which(joinpath("bin1", "bar"))
+            @test p == abspath("bin1", basename(bar_path))
+            @test Base.samefile(p, bar_path)
         end
     end
 end
@@ -685,7 +757,7 @@ end
 
 let text = "input-test-text"
     b = PipeBuffer()
-    proc = open(Base.CmdRedirect(Base.CmdRedirect(```$exename --startup-file=no -E '
+    proc = open(Base.CmdRedirect(Base.CmdRedirect(```$exename -E '
                     in14 = Base.open(RawFD(14))
                     out15 = Base.open(RawFD(15))
                     write(out15, in14)'```,
@@ -694,7 +766,21 @@ let text = "input-test-text"
     @test read(proc, String) == string(length(text), '\n')
     @test success(proc)
     @test String(take!(b)) == text
+
+    out = Base.BufferStream()
+    proc = run(catcmd, IOBuffer(text), out, wait=false)
+    @test proc.out === out
+    @test read(out, String) == text
+    @test success(proc)
+
+    out = PipeBuffer()
+    proc = run(catcmd, IOBuffer(SubString(text)), out)
+    @test success(proc)
+    @test proc.out === proc.err === proc.in === devnull
+    @test String(take!(out)) == text
 end
+
+
 @test repr(Base.CmdRedirect(``, devnull, 0, false)) == "pipeline(``, stdin>Base.DevNull())"
 @test repr(Base.CmdRedirect(``, devnull, 1, true)) == "pipeline(``, stdout<Base.DevNull())"
 @test repr(Base.CmdRedirect(``, devnull, 11, true)) == "pipeline(``, 11<Base.DevNull())"
@@ -710,6 +796,61 @@ end
     @test strip(String(read(cmd))) == "bar bar"
     cmd = addenv(cmd, ["FOO=baz"])
     @test strip(String(read(cmd))) == "baz bar"
+
+    # Test that `addenv()` works properly with `inherit`
+    withenv("FOO" => "foo", "BAR" => nothing) do
+        cmd = Cmd(`$shcmd -c "echo \$FOO \$BAR"`)
+        @test strip(String(read(cmd))) == "foo"
+
+        cmd2 = addenv(cmd, "BAR" => "bar"; inherit=false)
+        @test strip(String(read(cmd2))) == "bar"
+
+        cmd2 = addenv(cmd, "BAR" => "bar"; inherit=true)
+        @test strip(String(read(cmd2))) == "foo bar"
+
+        # Changing the environment doesn't effect the command,
+        # because it was baked in at `addenv()` time
+        withenv("FOO" => "baz") do
+            @test strip(String(read(cmd2))) == "foo bar"
+        end
+
+        # Even with inheritance, `addenv()` dominates:
+        cmd2 = addenv(cmd, "FOO" => "foo2", "BAR" => "bar"; inherit=true)
+        @test strip(String(read(cmd2))) == "foo2 bar"
+    end
+    # Keys with value === nothing are deleted
+    cmd = Cmd(`$shcmd -c "echo \$FOO \$BAR"`, env=Dict("FOO" => "foo", "BAR" => "bar"))
+    cmd2 = addenv(cmd, "FOO" => nothing)
+    @test strip(String(read(cmd2))) == "bar"
+    # addenv keeps the cmd's dir (#42131)
+    dir = joinpath(pwd(), "dir")
+    cmd = addenv(setenv(`julia`; dir=dir), Dict())
+    @test cmd.dir == dir
+
+    @test addenv(``, ["a=b=c"], inherit=false).env == ["a=b=c"]
+    cmd = addenv(``, "a"=>"b=c", inherit=false)
+    @test cmd.env == ["a=b=c"]
+    cmd = addenv(cmd, "b"=>"b")
+    @test issetequal(cmd.env, ["b=b", "a=b=c"])
+end
+
+@testset "setenv with dir (with tests for #42131)" begin
+    dir1 = joinpath(pwd(), "dir1")
+    dir2 = joinpath(pwd(), "dir2")
+    cmd = Cmd(`julia`; dir=dir1)
+    @test cmd.dir == dir1
+    @test Cmd(cmd).dir == dir1
+    @test Cmd(cmd; dir=dir2).dir == dir2
+    @test Cmd(cmd; dir="").dir == ""
+    @test setenv(cmd).dir == dir1
+    @test setenv(cmd; dir=dir2).dir == dir2
+    @test setenv(cmd; dir="").dir == ""
+    @test setenv(cmd, "FOO"=>"foo").dir == dir1
+    @test setenv(cmd, "FOO"=>"foo"; dir=dir2).dir == dir2
+    @test setenv(cmd, "FOO"=>"foo"; dir="").dir == ""
+    @test setenv(cmd, Dict("FOO"=>"foo")).dir == dir1
+    @test setenv(cmd, Dict("FOO"=>"foo"); dir=dir2).dir == dir2
+    @test setenv(cmd, Dict("FOO"=>"foo"); dir="").dir == ""
 end
 
 
@@ -719,8 +860,21 @@ if Sys.iswindows()
 end
 
 
-# shell escaping on Windows
-@testset "shell_escape_winsomely" begin
+# test (t)csh escaping if tcsh is installed
+cshcmd = "/bin/tcsh"
+if isfile(cshcmd)
+    csh_echo(s) = chop(read(Cmd([cshcmd, "-c",
+                                 "echo " * Base.shell_escape_csh(s)]), String))
+    csh_test(s) = csh_echo(s) == s
+    @testset "shell_escape_csh" begin
+        for s in ["", "-a/b", "'", "'£\"", join(' ':'~') ^ 2,
+                  "\t", "\n", "'\n", "\"\n", "'\n\n\""]
+            @test csh_test(s)
+        end
+    end
+end
+
+@testset "shell escaping on Windows" begin
     # Note  argument A can be parsed both as A or "A".
     # We do not test that the parsing satisfies either of these conditions.
     # In other words, tests may fail even for valid parsing.
@@ -728,77 +882,101 @@ end
 
     # input :
     # output: ""
-    @test Base.shell_escape_winsomely("") == "\"\""
+    @test Base.escape_microsoft_c_args("") == "\"\""
 
-    @test Base.shell_escape_winsomely("A") == "A"
+    @test Base.escape_microsoft_c_args("A") == "A"
 
-    @test Base.shell_escape_winsomely(`A`) == "A"
+    @test Base.escape_microsoft_c_args(`A`) == "A"
 
     # input : hello world
     # output: "hello world"
-    @test Base.shell_escape_winsomely("hello world") == "\"hello world\""
+    @test Base.escape_microsoft_c_args("hello world") == "\"hello world\""
 
     # input : hello  world
     # output: "hello  world"
-    @test Base.shell_escape_winsomely("hello\tworld") == "\"hello\tworld\""
+    @test Base.escape_microsoft_c_args("hello\tworld") == "\"hello\tworld\""
 
     # input : hello"world
     # output: "hello\"world" (also valid) hello\"world
-    @test Base.shell_escape_winsomely("hello\"world") == "\"hello\\\"world\""
+    @test Base.escape_microsoft_c_args("hello\"world") == "\"hello\\\"world\""
 
     # input : hello""world
     # output: "hello\"\"world" (also valid) hello\"\"world
-    @test Base.shell_escape_winsomely("hello\"\"world") == "\"hello\\\"\\\"world\""
+    @test Base.escape_microsoft_c_args("hello\"\"world") == "\"hello\\\"\\\"world\""
 
     # input : hello\world
     # output: hello\world
-    @test Base.shell_escape_winsomely("hello\\world") == "hello\\world"
+    @test Base.escape_microsoft_c_args("hello\\world") == "hello\\world"
 
     # input : hello\\world
     # output: hello\\world
-    @test Base.shell_escape_winsomely("hello\\\\world") == "hello\\\\world"
+    @test Base.escape_microsoft_c_args("hello\\\\world") == "hello\\\\world"
 
     # input : hello\"world
     # output: "hello\"world" (also valid) hello\"world
-    @test Base.shell_escape_winsomely("hello\\\"world") == "\"hello\\\\\\\"world\""
+    @test Base.escape_microsoft_c_args("hello\\\"world") == "\"hello\\\\\\\"world\""
 
     # input : hello\\"world
     # output: "hello\\\\\"world" (also valid) hello\\\\\"world
-    @test Base.shell_escape_winsomely("hello\\\\\"world")  == "\"hello\\\\\\\\\\\"world\""
+    @test Base.escape_microsoft_c_args("hello\\\\\"world")  == "\"hello\\\\\\\\\\\"world\""
 
     # input : hello world\
     # output: "hello world\\"
-    @test Base.shell_escape_winsomely("hello world\\") == "\"hello world\\\\\""
+    @test Base.escape_microsoft_c_args("hello world\\") == "\"hello world\\\\\""
 
     # input : A\B
     # output: A\B"
-    @test Base.shell_escape_winsomely("A\\B") == "A\\B"
+    @test Base.escape_microsoft_c_args("A\\B") == "A\\B"
 
     # input : [A\, B]
     # output: "A\ B"
-    @test Base.shell_escape_winsomely("A\\", "B") == "A\\ B"
+    @test Base.escape_microsoft_c_args("A\\", "B") == "A\\ B"
 
     # input : A"B
     # output: "A\"B"
-    @test Base.shell_escape_winsomely("A\"B") ==  "\"A\\\"B\""
+    @test Base.escape_microsoft_c_args("A\"B") ==  "\"A\\\"B\""
 
     # input : [A B\, C]
     # output: "A B\\" C
-    @test Base.shell_escape_winsomely("A B\\", "C") == "\"A B\\\\\" C"
+    @test Base.escape_microsoft_c_args("A B\\", "C") == "\"A B\\\\\" C"
 
     # input : [A "B, C]
     # output: "A \"B" C
-    @test Base.shell_escape_winsomely("A \"B", "C") == "\"A \\\"B\" C"
+    @test Base.escape_microsoft_c_args("A \"B", "C") == "\"A \\\"B\" C"
 
     # input : [A B\, C]
     # output: "A B\\" C
-    @test Base.shell_escape_winsomely("A B\\", "C") == "\"A B\\\\\" C"
+    @test Base.escape_microsoft_c_args("A B\\", "C") == "\"A B\\\\\" C"
 
     # input :[A\ B\, C]
     # output: "A\ B\\" C
-    @test Base.shell_escape_winsomely("A\\ B\\", "C") == "\"A\\ B\\\\\" C"
+    @test Base.escape_microsoft_c_args("A\\ B\\", "C") == "\"A\\ B\\\\\" C"
 
     # input : [A\ B\, C, D K]
     # output: "A\ B\\" C "D K"
-    @test Base.shell_escape_winsomely("A\\ B\\", "C", "D K") == "\"A\\ B\\\\\" C \"D K\""
+    @test Base.escape_microsoft_c_args("A\\ B\\", "C", "D K") == "\"A\\ B\\\\\" C \"D K\""
+
+    # shell_escape_wincmd
+    @test Base.shell_escape_wincmd("") == ""
+    @test Base.shell_escape_wincmd("\"") == "^\""
+    @test Base.shell_escape_wincmd("\"\"") == "\"\""
+    @test Base.shell_escape_wincmd("\"\"\"") == "\"\"^\""
+    @test Base.shell_escape_wincmd("\"\"\"\"") == "\"\"\"\""
+    @test Base.shell_escape_wincmd("a^\"^o\"^u\"") == "a^^\"^o\"^^u^\""
+    @test Base.shell_escape_wincmd("ä^\"^ö\"^ü\"") == "ä^^\"^ö\"^^ü^\""
+    @test Base.shell_escape_wincmd("@@()!^<>&|\"") == "^@@^(^)^!^^^<^>^&^|^\""
+    @test_throws ArgumentError Base.shell_escape_wincmd("\0")
+    @test_throws ArgumentError Base.shell_escape_wincmd("\r")
+    @test_throws ArgumentError Base.shell_escape_wincmd("\n")
+
+    # combined tests of shell_escape_wincmd and escape_microsoft_c_args
+    @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(
+        "julia", "-e", "println(ARGS)", raw"He said \"a^2+b^2=c^2\"!" )) ==
+            "julia -e println^(ARGS^) \"He said \\\"a^^2+b^^2=c^^2\\\"!\""
+
+    ascii95 = String(range(' ',stop='~')); # all printable ASCII characters
+    args = ["ab ^` c", " \" ", "\"", ascii95, ascii95,
+            "\"\\\"\\", "", "|", "&&", ";"];
+    @test Base.shell_escape_wincmd(Base.escape_microsoft_c_args(args...)) == "\"ab ^` c\" \" \\\" \" \"\\\"\" \" !\\\"#\$%^&'^(^)*+,-./0123456789:;^<=^>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^^_`abcdefghijklmnopqrstuvwxyz{^|}~\" \" ^!\\\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\" \"\\\"\\\\\\\"\\\\\" \"\" ^| ^&^& ;"
+
 end
diff --git a/test/specificity.jl b/test/specificity.jl
index a34ed8a15eccbd..1a5c117ce5d9d9 100644
--- a/test/specificity.jl
+++ b/test/specificity.jl
@@ -62,7 +62,7 @@ _z_z_z_(::Int, c...) = 3
 @test args_morespecific(Tuple{Union{Int,String},Type{Pair{A,B} where B}} where A, Tuple{Integer,UnionAll})
 
 # PR #21750
-let A = Tuple{Any, Tuple{Vararg{Integer,N} where N}},
+let A = Tuple{Any, Tuple{Vararg{Integer}}},
     B = Tuple{Any, Tuple{Any}},
     C = Tuple{Any, Tuple{}}
     @test args_morespecific(A, B)
@@ -90,7 +90,12 @@ begin
     @test f((1,2,3), A) == 3
     @test f((1,2), A) == 2
     @test f((), reshape([1])) == 1
+
+    oldstderr = stderr
+    newstderr = redirect_stderr() # redirect stderr to avoid method definition overwrite warning
     f(dims::NTuple{N,Int}, A::AbstractArray{T,N}) where {T,N} = 4
+    redirect_stderr(oldstderr)
+
     @test f((1,2), A) == 4
     @test f((1,2,3), A) == 3
 end
@@ -111,16 +116,16 @@ f17016(f, t1::Tuple) = 1
 @test !args_morespecific(Tuple{Type{Any}, Any}, Tuple{Type{T}, Any} where T<:VecElement)
 @test !args_morespecific((Tuple{Type{T}, Any} where T<:VecElement), Tuple{Type{Any}, Any})
 
-@test !args_morespecific(Tuple{Type{T}, Tuple{Any, Vararg{Any, N} where N}} where T<:Tuple{Any, Vararg{Any, N} where N},
+@test !args_morespecific(Tuple{Type{T}, Tuple{Any, Vararg{Any}}} where T<:Tuple{Any, Vararg{Any}},
                          Tuple{Type{Any}, Any})
-@test !args_morespecific(Tuple{Type{T}, Tuple{Any, Vararg{Any, N} where N}} where T<:Tuple{Any, Vararg{Any, N} where N},
+@test !args_morespecific(Tuple{Type{T}, Tuple{Any, Vararg{Any}}} where T<:Tuple{Any, Vararg{Any}},
                          Tuple{Type{Tuple}, Tuple})
-@test !args_morespecific(Tuple{Type{T}, T} where T<:Tuple{Any, Vararg{Any, N} where N},
+@test !args_morespecific(Tuple{Type{T}, T} where T<:Tuple{Any, Vararg{Any}},
                          Tuple{Type{T}, Any} where T<:VecElement)
 
 @test args_morespecific(Tuple{Any, Tuple{}, Tuple{}}, Tuple{Any, Tuple{Any}})
 @test args_morespecific(Tuple{Any, Tuple{Any}, Tuple{Any}}, Tuple{Any, Tuple{Any, Any}})
-@test args_morespecific(Tuple{Any, Vararg{Tuple{}, N} where N}, Tuple{Any, Tuple{Any}})
+@test args_morespecific(Tuple{Any, Vararg{Tuple{}}}, Tuple{Any, Tuple{Any}})
 
 @test  args_morespecific(Tuple{T, T} where T<:AbstractFloat, Tuple{T, T, T} where T<:AbstractFloat)
 @test  args_morespecific(Tuple{T, Real, T} where T<:AbstractFloat, Tuple{T, T} where T<:Real)
@@ -137,10 +142,10 @@ f17016(f, t1::Tuple) = 1
                          Tuple{T, T} where T<:Union{Base.StepRangeLen, Base.LinRange})
 
 @test args_morespecific(Tuple{Type{Tuple}, Any, Any},
-                        Tuple{Type{Tuple{Vararg{E, N} where N}}, Any, Any} where E)
+                        Tuple{Type{Tuple{Vararg{E}}}, Any, Any} where E)
 
 @test args_morespecific(Tuple{Type{Tuple{}}, Tuple{}},
-                        Tuple{Type{T}, T} where T<:Tuple{Any, Vararg{Any, N} where N})
+                        Tuple{Type{T}, T} where T<:Tuple{Any, Vararg{Any}})
 
 @test args_morespecific(Tuple{Type{CartesianIndex{N}}} where N,
                         Tuple{Type{CartesianIndex{N}},Vararg{Int,N}} where N)
@@ -235,14 +240,14 @@ let N = Tuple{Type{Union{Nothing, T}}, Union{Nothing, T}} where T,
 end
 
 # issue #29528
-@test !args_morespecific(Tuple{Array,Vararg{Int64,N} where N}, Tuple{AbstractArray, Array})
+@test !args_morespecific(Tuple{Array,Vararg{Int64}}, Tuple{AbstractArray, Array})
 @test !args_morespecific(Tuple{Array,Vararg{Int64,N}} where N, Tuple{AbstractArray, Array})
 @test  args_morespecific(Tuple{Array,Int64}, Tuple{Array,Vararg{Int64,N}} where N)
-@test  args_morespecific(Tuple{Array,Int64}, Tuple{Array,Vararg{Int64,N} where N})
+@test  args_morespecific(Tuple{Array,Int64}, Tuple{Array,Vararg{Int64}})
 @test !args_morespecific(Tuple{Array,Int64}, Tuple{AbstractArray, Array})
 
 # issue #30114
-let T1 = Tuple{Type{Tuple{Vararg{AbstractUnitRange{Int64},N} where N}},CartesianIndices{N,R} where R<:Tuple{Vararg{AbstractUnitRange{Int64},N}}} where N
+let T1 = Tuple{Type{Tuple{Vararg{AbstractUnitRange{Int64}}}},CartesianIndices{N,R} where R<:Tuple{Vararg{AbstractUnitRange{Int64},N}}} where N
     T2 = Tuple{Type{T},T} where T<:AbstractArray
     T3 = Tuple{Type{AbstractArray{T,N} where N},AbstractArray} where T
     T4 = Tuple{Type{AbstractArray{T,N}},AbstractArray{s57,N} where s57} where N where T
@@ -254,7 +259,7 @@ let T1 = Tuple{Type{Tuple{Vararg{AbstractUnitRange{Int64},N} where N}},Cartesian
 end
 
 @test !args_morespecific(Tuple{Type{Tuple{Vararg{AbstractUnitRange{Int64},N}}},} where N,
-                         Tuple{Type{Tuple{Vararg{AbstractUnitRange,N} where N}},})
+                         Tuple{Type{Tuple{Vararg{AbstractUnitRange}}},})
 
 @test  args_morespecific(Tuple{Type{SubArray{T,2,P} where T}, Array{T}} where T where P,
                          Tuple{Type{AbstractArray{T,N} where N},AbstractArray} where T)
diff --git a/test/stack_overflow.jl b/test/stack_overflow.jl
index 33d667fa479abe..9f4bae6f3f5b32 100644
--- a/test/stack_overflow.jl
+++ b/test/stack_overflow.jl
@@ -15,5 +15,5 @@ end
 
 let exename = Base.julia_cmd()
     @show readchomperrors(`$exename -e "f() = f(); f()"`)
-    @show readchomperrors(`$exename -e "f() = f(); fetch(@schedule f())"`)
+    @show readchomperrors(`$exename -e "f() = f(); fetch(@async f())"`)
 end
diff --git a/test/staged.jl b/test/staged.jl
index 26e3dace1994bd..b99ef46a2bc1ef 100644
--- a/test/staged.jl
+++ b/test/staged.jl
@@ -303,4 +303,5 @@ end
 @generated function f33243()
     :(global x33243 = 2)
 end
-@test_throws ErrorException f33243()
+@test f33243() === 2
+@test x33243 === 2
diff --git a/test/stress_fd_exec.jl b/test/stress_fd_exec.jl
index ef23892b830ae2..846b98ba3571af 100644
--- a/test/stress_fd_exec.jl
+++ b/test/stress_fd_exec.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 using Test
 let ps = Pipe[]
     ulimit_n = tryparse(Int, readchomp(`sh -c 'ulimit -n'`))
diff --git a/test/strings/basic.jl b/test/strings/basic.jl
index 8dd54a40c08622..e7e5575fd681e6 100644
--- a/test/strings/basic.jl
+++ b/test/strings/basic.jl
@@ -435,6 +435,9 @@ end
         @test all(x -> x == "12", svec)
         @test svec isa Vector{AbstractString}
     end
+    # test startswith and endswith for AbstractString
+    @test endswith(GenericString("abcd"), GenericString("cd"))
+    @test startswith(GenericString("abcd"), GenericString("ab"))
 end
 
 @testset "issue #10307" begin
@@ -854,7 +857,7 @@ end
                     p = prevind(s, p)
                     @test prevind(s, x, j) == p
                 end
-                if n ≤ ncodeunits(s)
+                if n ≤ ncodeunits(s)
                     n = nextind(s, n)
                     @test nextind(s, x, j) == n
                 end
@@ -1039,8 +1042,9 @@ let s = "∀x∃y", u = codeunits(s)
     @test u[1] == 0xe2
     @test u[2] == 0x88
     @test u[8] == 0x79
-    @test_throws ErrorException (u[1] = 0x00)
+    @test_throws Base.CanonicalIndexError (u[1] = 0x00)
     @test collect(u) == b"∀x∃y"
+    @test Base.elsize(u) == Base.elsize(typeof(u)) == 1
 end
 
 # issue #24388
@@ -1093,3 +1097,34 @@ end
     @test sprint(summary, SubString("foα", 2)) == "3-codeunit SubString{String}"
     @test sprint(summary, "") == "empty String"
 end
+
+@testset "Plug holes in test coverage" begin
+    @test_throws MethodError checkbounds(Bool, "abc", [1.0, 2.0])
+
+    apple_uint8 = Vector{UInt8}("Apple")
+    @test apple_uint8 == [0x41, 0x70, 0x70, 0x6c, 0x65]
+
+    Base.String(::tstStringType) = "Test"
+    abstract_apple = tstStringType(apple_uint8)
+    @test hash(abstract_apple, UInt(1)) == hash("Test", UInt(1))
+
+    @test length("abc", 1, 3) == length("abc", UInt(1), UInt(3))
+
+    @test isascii(GenericString("abc"))
+
+    code_units = Base.CodeUnits("abc")
+    @test Base.IndexStyle(Base.CodeUnits) == IndexLinear()
+    @test Base.elsize(code_units) == sizeof(UInt8)
+    @test Base.unsafe_convert(Ptr{Int8}, code_units) == Base.unsafe_convert(Ptr{Int8}, code_units.s)
+end
+
+@testset "LazyString" begin
+    @test repr(lazy"$(1+2) is 3") == "\"3 is 3\""
+    let d = Dict(lazy"$(1+2) is 3" => 3)
+        @test d["3 is 3"] == 3
+    end
+    l = lazy"1+2"
+    @test codeunit(l) == UInt8
+    @test codeunit(l,2) == 0x2b
+    @test isvalid(l, 1)
+end
diff --git a/test/strings/io.jl b/test/strings/io.jl
index 9fd36d565408e6..91ad83b24e328c 100644
--- a/test/strings/io.jl
+++ b/test/strings/io.jl
@@ -64,6 +64,13 @@
     @test typeof(escape_string("test", "t")) == String
     @test escape_string("test", "t") == "\\tes\\t"
 
+    @test escape_string("\\cdot") == "\\\\cdot"
+    @test escape_string("\\cdot"; keep = '\\') == "\\cdot"
+    @test escape_string("\\cdot", '\\'; keep = '\\') == "\\\\cdot"
+    @test escape_string("\\cdot\n"; keep = "\\\n") == "\\cdot\n"
+    @test escape_string("\\cdot\n", '\n'; keep = "\\\n") == "\\cdot\\\n"
+    @test escape_string("\\cdot\n", "\\\n"; keep = "\\\n") == "\\\\cdot\\\n"
+
     for i = 1:size(cx,1)
         cp, ch, st = cx[i,:]
         @test cp == convert(UInt32, ch)
@@ -180,6 +187,38 @@ join(myio, "", "", 1)
     @test_throws ArgumentError unescape_string(IOBuffer(), string('\\',"N"))
     @test_throws ArgumentError unescape_string(IOBuffer(), string('\\',"m"))
 end
+
+@testset "sprint with context" begin
+    function f(io::IO)
+        println(io, "compact => ", get(io, :compact, false))
+        println(io, "limit   => ", get(io, :limit,   false))
+    end
+
+    str = sprint(f)
+    @test str == """
+        compact => false
+        limit   => false
+        """
+
+    str = sprint(f, context = :compact => true)
+    @test str == """
+        compact => true
+        limit   => false
+        """
+
+    str = sprint(f, context = (:compact => true, :limit => true))
+    @test str == """
+        compact => true
+        limit   => true
+        """
+
+    str = sprint(f, context = IOContext(stdout, :compact => true, :limit => true))
+    @test str == """
+        compact => true
+        limit   => true
+        """
+end
+
 @testset "#11659" begin
     # The indentation code was not correctly counting tab stops
     @test Base.indentation("      \t") == (8, true)
@@ -270,3 +309,11 @@ for i = 1:10
     print(buf, join(s22021, "\n"))
     @test isvalid(String, take!(buf))
 end
+
+@testset "string()" begin
+    # test the Float sizehints
+    @test string(2.f0) == "2.0"
+    @test string(2.f0, 2.0) == "2.02.0"
+    # test empty args
+    @test string() == ""
+end
diff --git a/test/strings/search.jl b/test/strings/search.jl
index 8a7abaec503097..d328168bfa4665 100644
--- a/test/strings/search.jl
+++ b/test/strings/search.jl
@@ -97,6 +97,8 @@ for str in [astr]
     @test findprev('l', str, 2) == nothing
     @test findlast(',', str) == 6
     @test findprev(',', str, 5) == nothing
+    @test findlast(str, "") == nothing
+    @test findlast(str^2, str) == nothing
     @test findlast('\n', str) == 14
 end
 
@@ -355,6 +357,9 @@ end
 # occursin with a String and Char needle
 @test occursin("o", "foo")
 @test occursin('o', "foo")
+# occursin in curried form
+@test occursin("foo")("o")
+@test occursin("foo")('o')
 
 # contains
 @test contains("foo", "o")
@@ -390,6 +395,42 @@ s_18109 = "fooα🐨βcd3"
     @test findall("aa", "aaaaaa", overlap=true) == [1:2, 2:3, 3:4, 4:5, 5:6]
 end
 
+# issue 37280
+@testset "UInt8, Int8 vector" begin
+    for T in [Int8, UInt8], VT in [Int8, UInt8]
+        A = T[0x40, 0x52, 0x00, 0x52, 0x00]
+
+        for A in (A, @view(A[1:end]), codeunits(String(copyto!(Vector{UInt8}(undef,5), A))))
+            @test findfirst(VT[0x30], A) === findfirst(==(VT(0x30)), A) == nothing
+            @test findfirst(VT[0x52], A) === 2:2
+            @test findfirst(==(VT(0x52)), A) === 2
+            @test findlast(VT[0x30], A) === findlast(==(VT(0x30)), A) === nothing
+            @test findlast(VT[0x52], A) === 4:4
+            @test findlast(==(VT(0x52)), A) === 4
+            @test findfirst(iszero, A) === 3 === findprev(iszero, A, 4)
+            @test findlast(iszero, A) === 5 === findnext(iszero, A, 4)
+
+            pattern = VT[0x52, 0x00]
+
+            @test findfirst(pattern, A) === 2:3
+            @test findnext(pattern, A, 2) === 2:3
+            @test findnext(pattern, A, 3) === 4:5
+            # 1 idx too far is allowed
+            @test findnext(pattern, A, length(A)+1) === nothing
+            @test_throws BoundsError findnext(pattern, A, -3)
+            @test_throws BoundsError findnext(pattern, A, length(A)+2)
+
+            @test findlast(pattern, A) === 4:5
+            @test findprev(pattern, A, 3) === 2:3
+            @test findprev(pattern, A, 5) === 4:5
+            @test findprev(pattern, A, 2) === nothing
+            @test findprev(pattern, A, length(A)+1) == findlast(pattern, A)
+            @test findprev(pattern, A, length(A)+2) == findlast(pattern, A)
+            @test_throws BoundsError findprev(pattern, A, -3)
+        end
+    end
+end
+
 # issue 32568
 for T = (UInt, BigInt)
     for x = (4, 5)
diff --git a/test/strings/util.jl b/test/strings/util.jl
index 7a144e3c350003..8957513e37f25b 100644
--- a/test/strings/util.jl
+++ b/test/strings/util.jl
@@ -1,5 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+SubStr(s) = SubString("abc$(s)de", firstindex(s) + 3, lastindex(s) + 3)
+
 @testset "padding (lpad and rpad)" begin
     @test lpad("foo", 2) == "foo"
     @test rpad("foo", 2) == "foo"
@@ -44,6 +46,11 @@
     # Issue #32160 (unsigned underflow in lpad/rpad)
     @test lpad("xx", UInt(1), " ") == "xx"
     @test rpad("xx", UInt(1), " ") == "xx"
+    # Issue #38256 (lpad/rpad defined in terms of textwidth)
+    @test lpad("⟨k|H₁|k̃⟩", 12) |> textwidth == 12
+    @test rpad("⟨k|H₁|k̃⟩", 12) |> textwidth == 12
+    @test lpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
+    @test rpad("⟨k|H₁|k⟩", 12) |> textwidth == 12
 end
 
 # string manipulation
@@ -158,6 +165,7 @@ end
     @test split("", "") == rsplit("", "") == [""]
     @test split("abc", "") == rsplit("abc", "") == ["a","b","c"]
     @test rsplit("abc", "", limit=2) == ["ab","c"]
+    @test rsplit("", "//") == [""]
     @test split("abc", "", limit=2) == ["a","bc"]
 
     @test split("", r"") == [""]
@@ -275,6 +283,11 @@ end
     # Issue 13332
     @test replace("abc", 'b' => 2.1) == "a2.1c"
 
+    # Issue 31456
+    @test replace("The fox.", r"fox(es)?" => s"bus\1") == "The bus."
+    @test replace("The foxes.", r"fox(es)?" => s"bus\1") == "The buses."
+    @test replace("The quick fox quickly.", r"(quick)?\sfox(es)?\s(run)?" => s"\1 bus\2 \3") == "The quick bus quickly."
+
     # test replace with a count for String and GenericString
     # check that replace is a no-op if count==0
     for s in ["aaa", Test.GenericString("aaa")]
@@ -302,36 +315,264 @@ end
 
 end
 
+@testset "replace many" begin
+    # PR 35414 Francesco Alemanno <francescoalemanno710@gmail.com>
+    @test replace("foobarbaz", "oo" => "zz", "ar" => "zz", "z" => "m") == "fzzbzzbam"
+    substmp=["z" => "m", "oo" => "zz", "ar" => "zz"]
+    for perm in [[1, 2, 3], [2, 1, 3], [3, 2, 1], [2, 3, 1], [1, 3, 2], [3, 1, 2]]
+        @test replace("foobarbaz", substmp[perm]...) == "fzzbzzbam"
+        @test replace("foobarbaz", substmp[perm]..., count=2) == "fzzbzzbaz"
+        @test replace("foobarbaz", substmp[perm]..., count=1) == "fzzbarbaz"
+    end
+    @test replace("foobarbaz", "z" => "m", r"a.*a" => uppercase) == "foobARBAm"
+    @test replace("foobarbaz", 'o' => 'z', 'a' => 'q', 'z' => 'm') == "fzzbqrbqm"
+
+
+    # PR #25732 Klaus Crusius <klaus.crusius@web.de>
+    @test replace("\u2202", '*' => '\0', "" => "") == "\u2202"
+
+    @test replace("foobar", 'o' => '0', "" => "") == "f00bar"
+    @test replace("foobar", 'o' => '0', count=1, "" => "") == "foobar"
+    @test replace("foobar", 'o' => '0', count=2, "" => "") == "f0obar"
+    @test replace("foobar", 'o' => "", "" => "") == "fbar"
+    @test replace("foobar", 'o' => "", count=1, "" => "") == "foobar"
+    @test replace("foobar", 'o' => "", count=2, "" => "") == "fobar"
+    @test replace("foobar", 'f' => 'F', "" => "") == "Foobar"
+    @test replace("foobar", 'r' => 'R', "" => "") == "foobaR"
+
+    @test replace("foofoofoo", "foo" => "bar", "" => "") == "barbarbar"
+    @test replace("foobarfoo", "foo" => "baz", "" => "") == "bazbarbaz"
+    @test replace("barfoofoo", "foo" => "baz", "" => "") == "barbazbaz"
+
+    @test replace("", "" => "", "" => "") == ""
+    @test replace("", "" => "x", "" => "") == "x"
+    @test replace("", "x" => "y", "" => "") == ""
+
+    @test replace("abcd", "" => "^", "" => "") == "^a^b^c^d^"
+    @test replace("abcd", "b" => "^", "" => "") == "a^cd"
+    @test replace("abcd", r"b?" => "^", "" => "") == "^a^c^d^"
+    @test replace("abcd", r"b+" => "^", "" => "") == "a^cd"
+    @test replace("abcd", r"b?c?" => "^", "" => "") == "^a^d^"
+    @test replace("abcd", r"[bc]?" => "^", "" => "") == "^a^^d^"
+
+    @test replace("foobarfoo", r"(fo|ba)" => "xx", "" => "") == "xxoxxrxxo"
+    @test replace("foobarfoo", r"(foo|ba)" => "bar", "" => "") == "barbarrbar"
+
+    @test replace("foobar", 'o' => 'ø', "" => "") == "føøbar"
+    @test replace("foobar", 'o' => 'ø', count=2, "" => "") == "føobar"
+    @test replace("føøbar", 'ø' => 'o', "" => "") == "foobar"
+    @test replace("føøbar", 'ø' => 'o', count=2, "" => "") == "foøbar"
+    @test replace("føøbar", 'ø' => 'ö', "" => "") == "fööbar"
+    @test replace("føøbar", 'ø' => 'ö', count=2, "" => "") == "föøbar"
+    @test replace("føøbar", 'ø' => "", "" => "") == "fbar"
+    @test replace("føøbar", 'ø' => "", count=2, "" => "") == "føbar"
+    @test replace("føøbar", 'f' => 'F', "" => "") == "Føøbar"
+    @test replace("ḟøøbar", 'ḟ' => 'F', "" => "") == "Føøbar"
+    @test replace("føøbar", 'f' => 'Ḟ', "" => "") == "Ḟøøbar"
+    @test replace("ḟøøbar", 'ḟ' => 'Ḟ', "" => "") == "Ḟøøbar"
+    @test replace("føøbar", 'r' => 'R', "" => "") == "føøbaR"
+    @test replace("føøbaṙ", 'ṙ' => 'R', "" => "") == "føøbaR"
+    @test replace("føøbar", 'r' => 'Ṙ', "" => "") == "føøbaṘ"
+    @test replace("føøbaṙ", 'ṙ' => 'Ṙ', "" => "") == "føøbaṘ"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", "" => "") == "barbarbar"
+    @test replace("ḟøøbarḟøø", "ḟøø" => "baz", "" => "") == "bazbarbaz"
+    @test replace("barḟøøḟøø", "ḟøø" => "baz", "" => "") == "barbazbaz"
+
+    @test replace("foofoofoo", "foo" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ"
+    @test replace("fooƀäṙfoo", "foo" => "baz", "" => "") == "bazƀäṙbaz"
+    @test replace("ƀäṙfoofoo", "foo" => "baz", "" => "") == "ƀäṙbazbaz"
+
+    @test replace("foofoofoo", "foo" => "bar", "" => "") == "barbarbar"
+    @test replace("foobarfoo", "foo" => "ƀäż", "" => "") == "ƀäżbarƀäż"
+    @test replace("barfoofoo", "foo" => "ƀäż", "" => "") == "barƀäżƀäż"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ"
+    @test replace("ḟøøƀäṙḟøø", "ḟøø" => "baz", "" => "") == "bazƀäṙbaz"
+    @test replace("ƀäṙḟøøḟøø", "ḟøø" => "baz", "" => "") == "ƀäṙbazbaz"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "bar", "" => "") == "barbarbar"
+    @test replace("ḟøøbarḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäżbarƀäż"
+    @test replace("barḟøøḟøø", "ḟøø" => "ƀäż", "" => "") == "barƀäżƀäż"
+
+    @test replace("ḟøøḟøøḟøø", "ḟøø" => "ƀäṙ", "" => "") == "ƀäṙƀäṙƀäṙ"
+    @test replace("ḟøøƀäṙḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäżƀäṙƀäż"
+    @test replace("ƀäṙḟøøḟøø", "ḟøø" => "ƀäż", "" => "") == "ƀäṙƀäżƀäż"
+
+    @test replace("", "" => "ẍ", "" => "") == "ẍ"
+    @test replace("", "ẍ" => "ÿ", "" => "") == ""
+
+    @test replace("äƀçđ", "" => "π", "" => "") == "πäπƀπçπđπ"
+    @test replace("äƀçđ", "ƀ" => "π", "" => "") == "äπçđ"
+    @test replace("äƀçđ", r"ƀ?" => "π", "" => "") == "πäπçπđπ"
+    @test replace("äƀçđ", r"ƀ+" => "π", "" => "") == "äπçđ"
+    @test replace("äƀçđ", r"ƀ?ç?" => "π", "" => "") == "πäπđπ"
+    @test replace("äƀçđ", r"[ƀç]?" => "π", "" => "") == "πäππđπ"
+
+    @test replace("foobarfoo", r"(fo|ba)" => "ẍẍ", "" => "") == "ẍẍoẍẍrẍẍo"
+
+    @test replace("ḟøøbarḟøø", r"(ḟø|ba)" => "xx", "" => "") == "xxøxxrxxø"
+    @test replace("ḟøøbarḟøø", r"(ḟøø|ba)" => "bar", "" => "") == "barbarrbar"
+
+    @test replace("fooƀäṙfoo", r"(fo|ƀä)" => "xx", "" => "") == "xxoxxṙxxo"
+    @test replace("fooƀäṙfoo", r"(foo|ƀä)" => "ƀäṙ", "" => "") == "ƀäṙƀäṙṙƀäṙ"
+
+    @test replace("ḟøøƀäṙḟøø", r"(ḟø|ƀä)" => "xx", "" => "") == "xxøxxṙxxø"
+    @test replace("ḟøøƀäṙḟøø", r"(ḟøø|ƀä)" => "ƀäṙ", "" => "") == "ƀäṙƀäṙṙƀäṙ"
+
+    @test replace("foo", "oo" => uppercase, "" => "") == "fOO"
+
+    # Issue 13332
+    @test replace("abc", 'b' => 2.1, "" => "") == "a2.1c"
+
+    # test replace with a count for String and GenericString
+    # check that replace is a no-op if count==0
+    for s in ["aaa", Test.GenericString("aaa")]
+        @test_throws DomainError replace(s, 'a' => "", count = -1, "" => "")
+        @test replace(s, 'a' => 'z', count=0, "" => "")::String == s
+        @test replace(s, 'a' => 'z', count=1, "" => "") == "zaa"
+        @test replace(s, 'a' => 'z', count=2, "" => "") == "zza"
+        @test replace(s, 'a' => 'z', count=3, "" => "") == "zzz"
+        @test replace(s, 'a' => 'z', count=4, "" => "") == "zzz"
+        @test replace(s, 'a' => 'z', count=typemax(Int), "" => "") == "zzz"
+        @test replace(s, 'a' => 'z', "" => "") == "zzz"
+    end
+
+    let s = "abc"
+        @test replace(s) === s
+        @test replace(s, 'a' => 'z', "" => "") === "zbc"
+        @test replace(s, 'a' => 'z', 'b' => 'y') == "zyc"
+        @test replace(s, 'a' => 'z', 'c' => 'x', "b" => 'y') == "zyx"
+        @test replace(s, '1' => 'z', "" => "") == s
+        @test replace(s, 'b' => "BbB", "" => "", count=2) == "aBbBc"
+    end
+
+    let s = "quick quicker quickest"
+        @test replace(s) === s
+        @test replace(s, "quickest" => 'z', "quicker" => uppercase, "quick" => 'a') == "a QUICKER z"
+        @test replace(s, "quick" => 'a', "quicker" => uppercase, "quickest" => 'z') == "a aer aest"
+        @test replace(s, "quickest" => "lame", "quicker" => "is", "quick" => "Duck", count=2) == "Duck is quickest"
+        @test "1q1u1i1c1k1 1q1u1i1c1k1e1r1 1q1u1i1c1k1e1s1t1" ==
+              replace(s, "" => '1', "" => "") ==
+              replace(s, "" => '1', "" => '2')
+        @test replace(s, "qu" => "QU", "qu" => "never happens", "ick" => "") == "QU QUer QUest"
+        @test replace(s, " " => '_', "r " => "r-") == "quick_quicker-quickest"
+        @test replace(s, r"[aeiou]" => "ä", "ui" => "ki", "i" => "I") == "qääck qääckär qääckäst"
+        @test replace(s, "i" => "I", "ui" => "ki", r"[aeiou]" => "ä") == "qkick qkickär qkickäst"
+        @test replace(s, r"[^ ]+" => "word", "quicker " => "X", count=big"99") == "word word word"
+        @test replace(s, "quicker " => "X", r"[^ ]+" => "word", count=big"99") == "word Xword"
+
+        @test replace(s, r"(quick)(e)" => s"\2-\1", "x" => "X") == "quick e-quickr e-quickst"
+
+        @test replace(s, 'q' => 'Q', 'u' => 'U') == "QUick QUicker QUickest"
+        @test replace(s, 'q' => 'Q', r"u" => 'U') == "QUick QUicker QUickest"
+        @test replace(s, 'q' => 'Q', ==('u') => uppercase) == "QUick QUicker QUickest"
+        @test replace(s, 'q' => 'Q', islowercase => '-') == "Q---- Q------ Q-------"
+        @test replace(s, ['q', 'u'] => 'K') == "KKick KKicker KKickest"
+        @test replace(s, occursin("uq") => 'K') == "KKick KKicker KKickest"
+        @test replace(s, ==('q') => "B") == "Buick Buicker Buickest"
+
+        @test replace(s, "qui" => "A", 'r' => 'R') == "Ack AckeR Ackest"
+        @test replace(s, 'r' => 'x', islowercase => uppercase) == "QUICK QUICKEx QUICKEST"
+        @test replace(s, islowercase => uppercase, 'r' => 'x') == "QUICK QUICKER QUICKEST"
+        @test replace(s, "q" => "z", islowercase => uppercase, 'r' => 'x') == "zUICK zUICKER zUICKEST"
+        @test replace(s, "qui" => "A", 'r' => 'x', islowercase => uppercase) == "ACK ACKEx ACKEST"
+        @test replace(s, "qui" => "A", 'r' => 'x', islowercase => uppercase) == "ACK ACKEx ACKEST"
+        @test replace(s, r"q" => "z", islowercase => uppercase, 'r' => 'x') == "zUICK zUICKER zUICKEST"
+
+        @test replace(s, "q" => s"a\0b") == "aqbuick aqbuicker aqbuickest"
+        @test replace(s, "q" => s"a\0b\n\\\g<0>") == "aqb\n\\quick aqb\n\\quicker aqb\n\\quickest"
+        @test_throws ErrorException("PCRE error: unknown substring") replace(s, r"q" => s"a\1b")
+        @test_throws ErrorException("Bad replacement string: pattern is not a Regex") replace(s, "q" => s"a\1b")
+    end
+end
+
 @testset "chomp/chop" begin
-    @test chomp("foo\n") == "foo"
-    @test chomp("fo∀\n") == "fo∀"
-    @test chomp("foo\r\n") == "foo"
-    @test chomp("fo∀\r\n") == "fo∀"
-    @test chomp("fo∀") == "fo∀"
-    @test chop("") == ""
-    @test chop("fooε") == "foo"
-    @test chop("foεo") == "foε"
-    @test chop("∃∃∃∃") == "∃∃∃"
-    @test chop("∀ϵ∃Δ", head=0, tail=0) == "∀ϵ∃Δ"
-    @test chop("∀ϵ∃Δ", head=0, tail=1) == "∀ϵ∃"
-    @test chop("∀ϵ∃Δ", head=0, tail=2) == "∀ϵ"
-    @test chop("∀ϵ∃Δ", head=0, tail=3) == "∀"
-    @test chop("∀ϵ∃Δ", head=0, tail=4) == ""
-    @test chop("∀ϵ∃Δ", head=0, tail=5) == ""
-    @test chop("∀ϵ∃Δ", head=1, tail=0) == "ϵ∃Δ"
-    @test chop("∀ϵ∃Δ", head=2, tail=0) == "∃Δ"
-    @test chop("∀ϵ∃Δ", head=3, tail=0) == "Δ"
-    @test chop("∀ϵ∃Δ", head=4, tail=0) == ""
-    @test chop("∀ϵ∃Δ", head=5, tail=0) == ""
-    @test chop("∀ϵ∃Δ", head=1, tail=1) == "ϵ∃"
-    @test chop("∀ϵ∃Δ", head=2, tail=2) == ""
-    @test chop("∀ϵ∃Δ", head=3, tail=3) == ""
-    @test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=3)
-    @test_throws ArgumentError chop("∀ϵ∃Δ", head=3, tail=-3)
-    @test_throws ArgumentError chop("∀ϵ∃Δ", head=-3, tail=-3)
-
-    @test isa(chomp("foo"), SubString)
-    @test isa(chop("foo"), SubString)
+    for S in (String, SubStr, Test.GenericString)
+        @test chomp(S("foo\n")) == "foo"
+        @test chomp(S("fo∀\n")) == "fo∀"
+        @test chomp(S("foo\r\n")) == "foo"
+        @test chomp(S("fo∀\r\n")) == "fo∀"
+        @test chomp(S("fo∀")) == "fo∀"
+        @test chop(S("")) == ""
+        @test chop(S("fooε")) == "foo"
+        @test chop(S("foεo")) == "foε"
+        @test chop(S("∃∃∃∃")) == "∃∃∃"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=0) == "∀ϵ∃Δ"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=1) == "∀ϵ∃"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=2) == "∀ϵ"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=3) == "∀"
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=4) == ""
+        @test chop(S("∀ϵ∃Δ"), head=0, tail=5) == ""
+        @test chop(S("∀ϵ∃Δ"), head=1, tail=0) == "ϵ∃Δ"
+        @test chop(S("∀ϵ∃Δ"), head=2, tail=0) == "∃Δ"
+        @test chop(S("∀ϵ∃Δ"), head=3, tail=0) == "Δ"
+        @test chop(S("∀ϵ∃Δ"), head=4, tail=0) == ""
+        @test chop(S("∀ϵ∃Δ"), head=5, tail=0) == ""
+        @test chop(S("∀ϵ∃Δ"), head=1, tail=1) == "ϵ∃"
+        @test chop(S("∀ϵ∃Δ"), head=2, tail=2) == ""
+        @test chop(S("∀ϵ∃Δ"), head=3, tail=3) == ""
+        @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=3)
+        @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=3, tail=-3)
+        @test_throws ArgumentError chop(S("∀ϵ∃Δ"), head=-3, tail=-3)
+
+        for T in (String, SubStr, Test.GenericString, Regex)
+            S === Test.GenericString && T === Regex && continue # not supported
+            @test chopprefix(S("fo∀\n"), T("bog")) == "fo∀\n"
+            @test chopprefix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
+            @test chopprefix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
+            @test chopprefix(S("fo∀\n"), T("f")) == "o∀\n"
+            @test chopprefix(S("fo∀\n"), T("fo")) == "∀\n"
+            @test chopprefix(S("fo∀\n"), T("fo∀")) == "\n"
+            @test chopprefix(S("fo∀\n"), T("fo∀\n")) == ""
+            @test chopprefix(S("\nfo∀"), T("bog")) == "\nfo∀"
+            @test chopprefix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
+            @test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopprefix(S("\nfo∀"), T("\n")) == "fo∀"
+            @test chopprefix(S("\nfo∀"), T("\nf")) == "o∀"
+            @test chopprefix(S("\nfo∀"), T("\nfo")) == "∀"
+            @test chopprefix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopprefix(S(""), T("")) == ""
+            @test chopprefix(S(""), T("asdf")) == ""
+            @test chopprefix(S(""), T("∃∃∃")) == ""
+            @test chopprefix(S("εfoo"), T("ε")) == "foo"
+            @test chopprefix(S("ofoε"), T("o")) == "foε"
+            @test chopprefix(S("∃∃∃∃"), T("∃")) == "∃∃∃"
+            @test chopprefix(S("∃∃∃∃"), T("")) == "∃∃∃∃"
+
+            @test chopsuffix(S("fo∀\n"), T("bog")) == "fo∀\n"
+            @test chopsuffix(S("fo∀\n"), T("\n∀foΔ")) == "fo∀\n"
+            @test chopsuffix(S("fo∀\n"), T("∀foΔ")) == "fo∀\n"
+            @test chopsuffix(S("fo∀\n"), T("\n")) == "fo∀"
+            @test chopsuffix(S("fo∀\n"), T("∀\n")) == "fo"
+            @test chopsuffix(S("fo∀\n"), T("o∀\n")) == "f"
+            @test chopsuffix(S("fo∀\n"), T("fo∀\n")) == ""
+            @test chopsuffix(S("\nfo∀"), T("bog")) == "\nfo∀"
+            @test chopsuffix(S("\nfo∀"), T("\n∀foΔ")) == "\nfo∀"
+            @test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopsuffix(S("\nfo∀"), T("∀")) == "\nfo"
+            @test chopsuffix(S("\nfo∀"), T("o∀")) == "\nf"
+            @test chopsuffix(S("\nfo∀"), T("fo∀")) == "\n"
+            @test chopsuffix(S("\nfo∀"), T("\nfo∀")) == ""
+            @test chopsuffix(S(""), T("")) == ""
+            @test chopsuffix(S(""), T("asdf")) == ""
+            @test chopsuffix(S(""), T("∃∃∃")) == ""
+            @test chopsuffix(S("fooε"), T("ε")) == "foo"
+            @test chopsuffix(S("εofo"), T("o")) == "εof"
+            @test chopsuffix(S("∃∃∃∃"), T("∃")) == "∃∃∃"
+            @test chopsuffix(S("∃∃∃∃"), T("")) == "∃∃∃∃"
+        end
+        @test isa(chomp(S("foo")), SubString)
+        @test isa(chop(S("foo")), SubString)
+
+        if S !== Test.GenericString
+            @test chopprefix(S("∃∃∃b∃"), r"∃+") == "b∃"
+            @test chopsuffix(S("∃b∃∃∃"), r"∃+") == "∃b"
+        end
+
+        @test isa(chopprefix(S("foo"), "fo"), SubString)
+        @test isa(chopsuffix(S("foo"), "oo"), SubString)
+    end
 end
 
 @testset "bytes2hex and hex2bytes" begin
@@ -376,6 +617,11 @@ end
         #non-hex characters
         @test_throws ArgumentError hex2bytes(b"0123456789abcdefABCDEFGH")
     end
+
+    @testset "Issue 39284" begin
+        @test "efcdabefcdab8967452301" == bytes2hex(Iterators.reverse(hex2bytes("0123456789abcdefABCDEF")))
+        @test hex2bytes(Iterators.reverse(b"CE1A85EECc")) == UInt8[0xcc, 0xee, 0x58, 0xa1, 0xec]
+    end
 end
 
 # b"" should be immutable
@@ -383,6 +629,30 @@ let testb() = b"0123"
     b = testb()
     @test eltype(b) === UInt8
     @test b isa AbstractVector
-    @test_throws ErrorException b[4] = '4'
+    @test_throws Base.CanonicalIndexError b[4] = '4'
     @test testb() == UInt8['0','1','2','3']
 end
+
+@testset "Base.rest" begin
+    s = "aβcd"
+    @test Base.rest(s) === SubString(s)
+    a, b, c... = s
+    @test c === SubString(s, 4)
+
+    s = SubString("aβcd", 2)
+    @test Base.rest(s) === SubString(s)
+    b, c... = s
+    @test c === SubString(s, 3)
+
+    s = GenericString("aβcd")
+    @test Base.rest(s) === "aβcd"
+    a, b, c... = s
+    @test c === "cd"
+end
+
+@testset "endswith" begin
+    A = "Fun times with Julialang"
+    B = "A language called Julialang"
+    @test endswith(A, split(B, ' ')[end])
+    @test endswith(A, 'g')
+end
diff --git a/test/subarray.jl b/test/subarray.jl
index 4fd189e2ae441a..cc8aab94e4c424 100644
--- a/test/subarray.jl
+++ b/test/subarray.jl
@@ -134,8 +134,8 @@ end
 function test_bounds(@nospecialize(A))
     @test_throws BoundsError A[0]
     @test_throws BoundsError A[end+1]
-    trailing2 = ntuple(x->1, max(ndims(A)-2, 0))
-    trailing3 = ntuple(x->1, max(ndims(A)-3, 0))
+    trailing2 = ntuple(Returns(1), max(ndims(A)-2, 0))
+    trailing3 = ntuple(Returns(1), max(ndims(A)-3, 0))
     @test_throws BoundsError A[1, 0, trailing2...]
     @test_throws BoundsError A[1, end+1, trailing2...]
     @test_throws BoundsError A[1, 1, 0, trailing3...]
@@ -214,10 +214,10 @@ end
 function runviews(SB::AbstractArray, indexN, indexNN, indexNNN)
     @assert ndims(SB) > 2
     for i3 in indexN, i2 in indexN, i1 in indexN
-        runsubarraytests(SB, i1, i2, i3, ntuple(x->1, max(ndims(SB)-3, 0))...)
+        runsubarraytests(SB, i1, i2, i3, ntuple(Returns(1), max(ndims(SB)-3, 0))...)
     end
     for i2 in indexN, i1 in indexN
-        runsubarraytests(SB, i1, i2, ntuple(x->1, max(ndims(SB)-2, 0))...)
+        runsubarraytests(SB, i1, i2, ntuple(Returns(1), max(ndims(SB)-2, 0))...)
     end
     for i1 in indexNNN
         runsubarraytests(SB, i1)
@@ -698,3 +698,42 @@ import InteractiveUtils
     @test M*v == copy(M)*v
     @test (InteractiveUtils.@which M*v) == (InteractiveUtils.@which copy(M)*v)
 end
+
+
+isdefined(Main, :InfiniteArrays) || @eval Main include("testhelpers/InfiniteArrays.jl")
+using .Main.InfiniteArrays, Base64
+
+@testset "PR #37741: non-Int sizes" begin
+    r = BigInt(1):BigInt(100_000_000)^100
+    v = SubArray(r, (r,))
+    @test size(v) == (last(r),)
+
+    v = SubArray(OneToInf(), (OneToInf(),))
+    @test size(v) == (Infinity(),)
+    @test stringmime("text/plain", v; context=(:limit => true)) == "$(Infinity())-element view(::$(OneToInf{Int}), 1:1:$(Infinity())) with eltype $Int with indices 1:1:$(Infinity()):\n  1\n  2\n  3\n  4\n  5\n  6\n  7\n  8\n  9\n 10\n  ⋮"
+end
+
+@testset "PR #39809: copy on 0-dimensional SubArray" begin
+    v = [[1]]
+    s = @view v[1]
+    @test copy(s) == fill([1])
+end
+
+@testset "issue 40314: views of CartesianIndices" begin
+    c = CartesianIndices((1:2, 1:4))
+    @test (@view c[c]) === c
+    for inds in Any[(1:1, 1:2), (1:1:1, 1:2)]
+        c2 = @view c[inds...]
+        @test c2 isa CartesianIndices{2}
+        for i2 in inds[2], i1 in inds[1]
+            @test c2[i1, i2] == c[i1, i2]
+        end
+    end
+    for inds in Any[(Colon(), 1:2), (Colon(), 1:1:2)]
+        c2 = @view c[inds...]
+        @test c2 isa CartesianIndices{2}
+        for i2 in inds[2], i1 in axes(c, 1)
+            @test c2[i1, i2] == c[i1, i2]
+        end
+    end
+end
diff --git a/test/subtype.jl b/test/subtype.jl
index be324df04e2f57..eff2c021b481f7 100644
--- a/test/subtype.jl
+++ b/test/subtype.jl
@@ -73,15 +73,15 @@ function test_2()
     @test !issub(Tuple{Tuple{Int,Int},Tuple{Int,}}, Tuple{NTuple{N,Int},NTuple{N,Int}} where N)
     @test NTuple{0} === Tuple{}
 
-    @test !issub(Tuple{Val{3}, Vararg{Val{3}}}, Tuple{Vararg{Val{N}, N} where N})
+    @test !issub(Tuple{Val{3}, Vararg{Val{3}}}, Tuple{Vararg{Val{N}, N}} where N)
 
     @test issub_strict(Tuple{Int,Int}, Tuple{Int,Int,Vararg{Int,N}} where N)
     @test issub_strict(Tuple{Int,Int}, Tuple{E,E,Vararg{E,N}} where E where N)
 
     @test issub(Type{Tuple{VecElement{Bool}}}, (Type{Tuple{Vararg{VecElement{T},N}}} where T where N))
 
-    @test isequal_type(Type{Tuple{Vararg{Int,N}} where N}, Type{Tuple{Vararg{Int,N} where N}})
-    @test Type{Tuple{Vararg{Int,N}} where N} !== Type{Tuple{Vararg{Int,N} where N}}
+    @test isequal_type(Type{Tuple{Vararg{Int,N}} where N}, Type{Tuple{Vararg{Int}}})
+    @test Type{Tuple{Vararg{Int,N}} where N} !== Type{Tuple{Vararg{Int}}}
 end
 
 function test_diagonal()
@@ -140,7 +140,7 @@ function test_diagonal()
     @test !issub(Type{Tuple{T,Any} where T},   Type{Tuple{T,T}} where T)
     @test !issub(Type{Tuple{T,Any,T} where T}, Type{Tuple{T,T,T}} where T)
     @test_broken issub(Type{Tuple{T} where T},       Type{Tuple{T}} where T)
-    @test_broken issub(Ref{Tuple{T} where T},        Ref{Tuple{T}} where T)
+    @test  issub(Ref{Tuple{T} where T},        Ref{Tuple{T}} where T)
     @test !issub(Type{Tuple{T,T} where T},     Type{Tuple{T,T}} where T)
     @test !issub(Type{Tuple{T,T,T} where T},   Type{Tuple{T,T,T}} where T)
     @test  isequal_type(Ref{Tuple{T, T} where Int<:T<:Int},
@@ -165,6 +165,10 @@ function test_diagonal()
                        Tuple{Ref{Tuple{N1,N1}}, Ref{N2}} where {N1, N2})
     @test !issub(Tuple{Type{Tuple{Vararg{T}} where T <: Integer}, Tuple{Float64, Int}},
                  Tuple{Type{Tuple{Vararg{T}}}, Tuple{Vararg{T}}} where T)
+
+    # non-types
+    @test issub_strict(Tuple{3,3}, NTuple)
+    @test !issub(Tuple{3,4}, NTuple)
 end
 
 # level 3: UnionAll
@@ -583,7 +587,7 @@ function test_old()
     @test !(Type{Tuple{Nothing}} <: Tuple{Type{Nothing}})
 end
 
-const menagerie =
+const easy_menagerie =
     Any[Bottom, Any, Int, Int8, Integer, Real,
         Array{Int,1}, AbstractArray{Int,1},
         Tuple{Int,Vararg{Integer}}, Tuple{Integer,Vararg{Int}}, Tuple{},
@@ -603,12 +607,14 @@ const menagerie =
         Array{(@UnionAll T<:Int T), 1},
         (@UnionAll T<:Real @UnionAll S<:AbstractArray{T,1} Tuple{T,S}),
         Union{Int,Ref{Union{Int,Int8}}},
-        (@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}}),
         ]
 
-let new = Any[]
-    # add variants of each type
-    for T in menagerie
+const hard_menagerie =
+    Any[(@UnionAll T Union{Tuple{T,Array{T,1}}, Tuple{T,Array{Int,1}}})]
+
+function add_variants!(types)
+    new = Any[]
+    for T in types
         push!(new, Ref{T})
         push!(new, Tuple{T})
         push!(new, Tuple{T,T})
@@ -616,9 +622,14 @@ let new = Any[]
         push!(new, @UnionAll S<:T S)
         push!(new, @UnionAll S<:T Ref{S})
     end
-    append!(menagerie, new)
+    append!(types, new)
 end
 
+add_variants!(easy_menagerie)
+add_variants!(hard_menagerie)
+
+const menagerie = [easy_menagerie; hard_menagerie]
+
 function test_properties()
     x→y = !x || y
     ¬T = @UnionAll X>:T Ref{X}
@@ -841,7 +852,7 @@ function test_intersection()
     @testintersect((@UnionAll N Tuple{Array{Int,N},Vararg{Int,N}}),
                    Tuple{Matrix{Int},Int,Vararg{Float64}}, Bottom)
 
-    @testintersect(Tuple{Array{Any,1}, Tuple{Int64, Int64, Vararg{Int64, N} where N}},
+    @testintersect(Tuple{Array{Any,1}, Tuple{Int64, Int64, Vararg{Int64}}},
                    Tuple{Array{T,N}, Tuple{Vararg{Int64,N}}} where N where T,
                    Bottom)
 
@@ -915,7 +926,7 @@ function test_intersection()
 
     @testintersect(Tuple{Type{S}, Tuple{Any, Vararg{Any}}} where S<:Tuple{Any, Vararg{Any}},
                    Tuple{Type{T}, T} where T,
-                   Tuple{Type{S},S} where S<:Tuple{Any,Vararg{Any,N} where N})
+                   Tuple{Type{S},S} where S<:Tuple{Any,Vararg{Any}})
 
     # part of issue #20450
     @testintersect(Tuple{Array{Ref{T}, 1}, Array{Pair{M, V}, 1}} where V where T where M,
@@ -936,7 +947,7 @@ function test_intersection()
                                     Tuple{Vector{T},Vector{T}} where T>:Vector})
 
     # part of issue #20344
-    @testintersect(Tuple{Type{Tuple{Vararg{T, N} where N}}, Tuple} where T,
+    @testintersect(Tuple{Type{Tuple{Vararg{T}}}, Tuple} where T,
                    Tuple{Type{Tuple{Vararg{T, N}}} where N where T, Any},
                    Bottom)
     @testintersect(Type{NTuple{N,UnitRange}} where N,
@@ -1039,10 +1050,10 @@ function test_intersection()
 
     @testintersect(Tuple{Type{Tuple{Vararg{Integer}}}, Tuple},
                    Tuple{Type{Tuple{Vararg{V}}}, Tuple{Vararg{V}}} where {V},
-                   Tuple{Type{Tuple{Vararg{Integer,N} where N}},Tuple{Vararg{Integer,N} where N}})
+                   Tuple{Type{Tuple{Vararg{Integer}}},Tuple{Vararg{Integer}}})
     @testintersect(Tuple{Type{Tuple{Vararg{Union{Int,Symbol}}}}, Tuple},
                    Tuple{Type{Tuple{Vararg{V}}}, Tuple{Vararg{V}}} where {V},
-                   Tuple{Type{Tuple{Vararg{Union{Int,Symbol},N} where N}},Tuple{Vararg{Union{Int,Symbol},N} where N}})
+                   Tuple{Type{Tuple{Vararg{Union{Int,Symbol}}}},Tuple{Vararg{Union{Int,Symbol}}}})
 
     # non types
     @testintersect(Tuple{1}, Tuple{Any}, Tuple{1})
@@ -1053,12 +1064,19 @@ function test_intersection()
 end
 
 function test_intersection_properties()
-    for T in menagerie
-        for S in menagerie
+    for i in eachindex(menagerie)
+        T = menagerie[i]
+        for j in eachindex(menagerie)
+            S = menagerie[j]
             I = _type_intersect(T,S)
             I2 = _type_intersect(S,T)
             @test isequal_type(I, I2)
-            @test issub(I, T) && issub(I, S)
+            if i > length(easy_menagerie) || j > length(easy_menagerie)
+                # TODO: these cases give a conservative answer
+                @test issub(I, T) || issub(I, S)
+            else
+                @test issub(I, T) && issub(I, S)
+            end
             if issub(T, S)
                 @test isequal_type(I, T)
             end
@@ -1373,10 +1391,10 @@ g25430(t::Vector{Tuple{>:Int}}) = true
 g24521(::T, ::T) where {T} = T
 @test_throws MethodError g24521(Tuple{Any}, Tuple{T} where T)
 @test g24521(Vector, Matrix) == UnionAll
-@test [Tuple{Vararg{Int64,N} where N}, Tuple{Vararg{Int64,N}} where N] isa Vector{Type}
+@test [Tuple{Vararg{Int64}}, Tuple{Vararg{Int64,N}} where N] isa Vector{Type}
 f24521(::Type{T}, ::Type{T}) where {T} = T
 @test f24521(Tuple{Any}, Tuple{T} where T) == Tuple{Any}
-@test f24521(Tuple{Vararg{Int64,N} where N}, Tuple{Vararg{Int64,N}} where N) == Tuple{Vararg{Int64,N}} where N
+@test f24521(Tuple{Vararg{Int64}}, Tuple{Vararg{Int64,N}} where N) == Tuple{Vararg{Int64,N}} where N
 
 # issue #26654
 @test !(Ref{Union{Int64, Ref{Number}}} <: Ref{Union{Ref{T}, T}} where T)
@@ -1559,7 +1577,7 @@ f31082(::Pair{B, C}, ::C, ::C) where {B, C} = 1
                Tuple{Type{Val{T}},Int,T} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T,
                Tuple{Type,Int,Integer},
-               Tuple{Type{Val{T}},Int,T} where T<:Integer)
+               Tuple{Type{Val{T}},Int,Integer} where T)
 @testintersect(Tuple{Type{Val{T}},Integer,T} where T>:Integer,
                Tuple{Type,Int,Integer},
                Tuple{Type{Val{T}},Int,Integer} where T>:Integer)
@@ -1650,7 +1668,7 @@ end
 # Various nasty varargs
 let T1 = Tuple{Int, Tuple{T}, Vararg{T, 3}} where T <: Int,
     T2 = Tuple{Int, Any, Any, Any, Integer},
-    T3 = Tuple{Int, Any, Any, Any, Integer, Vararg{Integer, N} where N}
+    T3 = Tuple{Int, Any, Any, Any, Integer, Vararg{Integer}}
 
     @test issub_strict(T1, T2)
     @test issub_strict(T2, T3)
@@ -1659,23 +1677,20 @@ end
 let A = Tuple{Float64, Vararg{Int64, 2}},
     B1 = Tuple{Float64, Vararg{T, 2}} where T <: Int64,
     B2 = Tuple{Float64, T, T} where T <: Int64,
-    C = Tuple{Float64, Any, Vararg{Integer, N} where N}
+    C = Tuple{Float64, Any, Vararg{Integer}}
 
     @test A == B1 == B2
     @test issub_strict(A, C)
     @test issub_strict(B1, C)
     @test issub_strict(B2, C)
 end
-let A = Tuple{Vararg{Val{N}, N} where N},
-    B = Tuple{Vararg{Val{N}, N}} where N,
+let B = Tuple{Vararg{Val{N}, N}} where N,
     C = Tuple{Val{2}, Val{2}}
 
-    @test isequal_type(A, B)
     @test issub(C, B)
-    @test issub(C, A)
 end
 @test isequal_type(Tuple{T, Vararg{T, 2}} where T<:Real, Tuple{Vararg{T, 3}} where T<: Real)
-@test !issub(Tuple{Vararg{T, 3}} where T<:Real, Tuple{Any, Any, Any, Any, Vararg{Any, N} where N})
+@test !issub(Tuple{Vararg{T, 3}} where T<:Real, Tuple{Any, Any, Any, Any, Vararg{Any}})
 @test !issub(Tuple{Vararg{T, 3}} where T<:Real, Tuple{Any, Any, Any, Any, Vararg{Any, N}} where N)
 @test issub_strict(Ref{Tuple{Int, Vararg{Int, N}}} where N, Ref{Tuple{Vararg{Int, N}}} where N)
 let T31805 = Tuple{Type{Tuple{}}, Tuple{Vararg{Int8, A}}} where A,
@@ -1690,6 +1705,13 @@ end
 
 @test !isequal_type(Tuple{Int, Vararg{T, 3}} where T<:Real, Tuple{Int, Real, Vararg{T, 2}} where T<:Integer)
 
+@test !isequal_type(Tuple{Tuple{Vararg{Int}},Tuple{Vararg{Int}}},
+                    Tuple{Tuple{Vararg{Int, N}}, Tuple{Vararg{Int, N}}} where N)
+
+let (_, E) = intersection_env(Tuple{Tuple{Vararg{Int}}}, Tuple{Tuple{Vararg{Int,N}}} where N)
+    @test !isa(E[1], Type)
+end
+
 # this is is a timing test, so it would fail on debug builds
 #let T = Type{Tuple{(Union{Int, Nothing} for i = 1:23)..., Union{String, Nothing}}},
 #    S = Type{T} where T<:Tuple{E, Vararg{E}} where E
@@ -1733,9 +1755,9 @@ c32703(::Type{<:Str{C}}, str::Str{C}) where {C<:CSE} = str
 
 # issue #33337
 @test !issub(Tuple{Type{T}, T} where T<:NTuple{30, Union{Nothing, Ref}},
-             Tuple{Type{Tuple{Vararg{V, N} where N}}, Tuple{Vararg{V, N} where N}} where V)
+             Tuple{Type{Tuple{Vararg{V}}}, Tuple{Vararg{V}}} where V)
 @test  issub(Tuple{Type{Any}, NTuple{4,Union{Int,Nothing}}},
-             Tuple{Type{V}, Tuple{Vararg{V, N} where N}} where V)
+             Tuple{Type{V}, Tuple{Vararg{V}}} where V)
 
 # issue #26065
 t26065 = Ref{Tuple{T,Ref{Union{Ref{Tuple{Ref{Union{Ref{Ref{Tuple{Ref{Tuple{Union{Tuple{Ref{Ref{T}},T}, T},T}},T}}}, T}},T}}, Ref{T}, T}}}} where T
@@ -1754,7 +1776,7 @@ s26065 = Ref{Tuple{T,Ref{Union{Ref{Tuple{Ref{Union{Ref{Ref{Tuple{Ref{Tuple{Union
                Union{})
 
 @test !issub(Tuple{Type{T}, T} where T<:Tuple{String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}, String, Union{Base.Regex, AbstractChar, AbstractString}},
-             Tuple{Type{Tuple{Vararg{V, N} where N}}, Tuple{Vararg{V, N} where N}} where V)
+             Tuple{Type{Tuple{Vararg{V}}}, Tuple{Vararg{V}}} where V)
 
 # issue 36100
 @test NamedTuple{(:a, :b), Tuple{Missing, Union{}}} == NamedTuple{(:a, :b), Tuple{Missing, Union{}}}
@@ -1772,3 +1794,191 @@ end
 
 # issue #37255
 @test Type{Union{}} == Type{T} where {Union{}<:T<:Union{}}
+
+# issue #38081
+struct AlmostLU{T, S<:AbstractMatrix{T}}
+end
+let X1 = Tuple{AlmostLU, Vector{T}} where T,
+    X2 = Tuple{AlmostLU{S, X} where X<:Matrix, Vector{S}} where S<:Union{Float32, Float64},
+    I = typeintersect(X1, X2)
+    # TODO: the quality of this intersection is not great; for now just test that it
+    # doesn't stack overflow
+    @test I<:X1 || I<:X2
+    actual = Tuple{Union{AlmostLU{S, X} where X<:Matrix{S}, AlmostLU{S, <:Matrix}}, Vector{S}} where S<:Union{Float32, Float64}
+    @test I == actual
+end
+
+let
+    # issue #22787
+    # for now check that these don't stack overflow
+    t = typeintersect(Tuple{Type{Q}, Q, Ref{Q}} where Q<:Ref,
+                      Tuple{Type{S}, Union{Ref{S}, Ref{R}}, R} where R where S)
+    @test_broken t != Union{}
+    t = typeintersect(Tuple{Type{T}, T, Ref{T}} where T,
+                      Tuple{Type{S}, Ref{S}, S} where S)
+    @test_broken t != Union{}
+
+    # issue #38279
+    t = typeintersect(Tuple{<:Array{T, N}, Val{T}} where {T<:Real, N},
+                      Tuple{<:Array{T, N}, Val{<:AbstractString}}  where {T<:Real, N})
+    @test t == Tuple{<:Array{Union{}, N}, Val{Union{}}} where N
+end
+
+# issue #36951
+@testintersect(Type{T} where T>:Missing,
+               Type{Some{T}} where T,
+               Union{})
+
+# issue #24333
+@test_broken (Type{Union{Ref,Cvoid}} <: Type{Union{T,Cvoid}} where T)
+
+# issue #38423
+let
+    Either{L, R} = Union{Ref{L}, Val{R}}
+    A = Tuple{Type{Ref{L}}, Type{Either{L, <:Any}}} where L
+    B = Tuple{Type{Ref{L2}}, Type{Either{L1, R}}} where {L1, R, L2 <: L1}
+    I = typeintersect(A, B)
+    @test I != Union{}
+    @test_broken I <: A
+    @test_broken I <: B
+end
+
+# issue #36804
+let
+    Either{L, R} = Union{Some{L}, Ref{R}}
+    f(::Type{Either{L2, R}}, ::Type{Either{L1, R}}) where {L1, R, L2 <: L1} = Either{L1, R}
+    f(::Type{Either{L, R1}}, ::Type{Either{L, R2}}) where {L, R1, R2 <: R1} = Either{L, R1}
+    @test f(Either{Int,Real}, Either{Int,Float32}) == Either{Int,Real}
+end
+
+# issue #36544
+let A = Tuple{T, Ref{T}, T} where {T},
+    B = Tuple{T, T, Ref{T}} where {T}
+    I = typeintersect(A, B)
+    @test I != Union{}
+    @test_broken I <: A
+    @test_broken I <: B
+end
+
+# issue #34170
+let A = Tuple{Type{T} where T<:Ref, Ref, Union{T, Union{Ref{T}, T}} where T<:Ref},
+    B = Tuple{Type{T}, Ref{T}, Union{Int, Ref{T}, T}} where T
+    I = typeintersect(A,B)
+    # this was a case where <: disagreed with === (due to a badly-normalized type)
+    @test I == typeintersect(A,B)
+    @test I == Tuple{Type{T}, Ref{T}, Ref} where T<:Ref
+end
+
+# issue #39218
+let A = Int, B = String, U = Union{A, B}
+    @test issub_strict(Union{Tuple{A, A}, Tuple{B, B}}, Tuple{U, U})
+    @test issub_strict(Union{Tuple{A, A}, Tuple{B, B}}, Tuple{Union{A, B}, Union{A, B}})
+end
+
+struct A39218 end
+struct B39218 end
+const AB39218 = Union{A39218,B39218}
+f39218(::T, ::T) where {T<:AB39218} = false
+g39218(a, b) = (@nospecialize; if a isa AB39218 && b isa AB39218; f39218(a, b); end;)
+@test g39218(A39218(), A39218()) === false
+@test_throws MethodError g39218(A39218(), B39218())
+
+# issue #39521
+@test Tuple{Type{Tuple{A}} where A, DataType, DataType} <: Tuple{Vararg{B}} where B
+@test Tuple{DataType, Type{Tuple{A}} where A, DataType} <: Tuple{Vararg{B}} where B
+
+let A = Tuple{Type{<:Union{Number, T}}, Ref{T}} where T,
+    B = Tuple{Type{<:Union{Number, T}}, Ref{T}} where T
+    # TODO: these are caught by the egal check, but the core algorithm gets them wrong
+    @test A == B
+    @test A <: B
+end
+
+# issue #39698
+let T = Type{T} where T<:(AbstractArray{I}) where I<:(Base.IteratorsMD.CartesianIndex),
+    S = Type{S} where S<:(Base.IteratorsMD.CartesianIndices{A, B} where B<:Tuple{Vararg{Any, A}} where A)
+    I = typeintersect(T, S)
+    @test_broken I <: T
+    @test I <: S
+    @test_broken I == typeintersect(S, T)
+end
+
+# issue #39948
+let A = Tuple{Array{Pair{T, JT} where JT<:Ref{T}, 1} where T, Vector},
+    I = typeintersect(A, Tuple{Vararg{Vector{T}}} where T)
+    @test I <: A
+    @test !Base.has_free_typevars(I)
+end
+
+# issue #8915
+struct D8915{T<:Union{Float32,Float64}}
+    D8915{T}(a) where {T} = 1
+    D8915{T}(a::Int) where {T} = 2
+end
+@test D8915{Float64}(1) == 2
+@test D8915{Float64}(1.0) == 1
+
+# issue #18985
+f18985(x::T, y...) where {T<:Union{Int32,Int64}} = (length(y), f18985(y[1], y[2:end]...)...)
+f18985(x::T) where {T<:Union{Int32,Int64}} = 100
+@test f18985(1, 2, 3) == (2, 1, 100)
+
+# issue #40048
+let A = Tuple{Ref{T}, Vararg{T}} where T,
+    B = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Int}, Union{Ref{S}, S}} where S where U,
+    C = Tuple{Ref{U}, Union{Ref{S}, Ref{U}, Ref{W}}, Union{Ref{S}, W, V}} where V<:AbstractArray where W where S where U
+    I = typeintersect(A, B)
+    @test I != Union{}
+    @test I <: A
+    @test I <: B
+    # avoid stack overflow
+    J = typeintersect(A, C)
+    @test_broken J != Union{}
+end
+
+let A = Tuple{Dict{I,T}, I, T} where T where I,
+    B = Tuple{AbstractDict{I,T}, T, I} where T where I
+    # TODO: we should probably have I == T here
+    @test typeintersect(A, B) == Tuple{Dict{I,T}, I, T} where {I, T}
+end
+
+let A = Tuple{UnionAll, Vector{Any}},
+    B = Tuple{Type{T}, T} where T<:AbstractArray,
+    I = typeintersect(A, B)
+    @test !isconcretetype(I)
+    @test I == Tuple{Type{T}, Vector{Any}} where T<:AbstractArray
+end
+
+@testintersect(Tuple{Type{Vector{<:T}}, T} where {T<:Integer},
+               Tuple{Type{T}, AbstractArray} where T<:Array,
+               Bottom)
+
+struct S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}
+end
+
+@testintersect(Tuple{Type{S40{_A, _B, _C, _D, _E, _F, _G, _H, _I, _J, _K, _L, _M, _N, _O, _P, _Q, _R, _S, _T, _U, _V, _W, _X, _Y, _Z, _Z1, _Z2, _Z3, _Z4, _Z5, _Z6, _Z7, _Z8, _Z9, _Z10, _Z11, _Z12, _Z13, _Z14}} where _Z14 where _Z13 where _Z12 where _Z11 where _Z10 where _Z9 where _Z8 where _Z7 where _Z6 where _Z5 where _Z4 where _Z3 where _Z2 where _Z1 where _Z where _Y where _X where _W where _V where _U where _T where _S where _R where _Q where _P where _O where _N where _M where _L where _K where _J where _I where _H where _G where _F where _E where _D where _C where _B where _A, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any},
+               Tuple{Type{S40{A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1}, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, A23, A24, A25, A26, A27, A28, A29, A30, A31, A32, A33, A34, A35, A36, A37, A38, A39, A40} where A40 where A39 where A38 where A37 where A36 where A35 where A34 where A33 where A32 where A31 where A30 where A29 where A28 where A27 where A26 where A25 where A24 where A23 where A22 where A21 where A20 where A19 where A18 where A17 where A16 where A15 where A14 where A13 where A12 where A11 where A10 where A9 where A8 where A7 where A6 where A5 where A4 where A3 where A2 where A1,
+               Bottom)
+
+let A = Tuple{Any, Type{Ref{_A}} where _A},
+    B = Tuple{Type{T}, Type{<:Union{Ref{T}, T}}} where T,
+    I = typeintersect(A, B)
+    @test I != Union{}
+    # TODO: this intersection result is still too narrow
+    @test_broken Tuple{Type{Ref{Integer}}, Type{Ref{Integer}}} <: I
+end
+
+@testintersect(Tuple{Type{T}, T} where T<:(Tuple{Vararg{_A, _B}} where _B where _A),
+               Tuple{Type{Tuple{Vararg{_A, N}} where _A<:F}, Pair{N, F}} where F where N,
+               Bottom)
+
+# issue #42409
+@testintersect(Tuple{Type{Pair{_A, S} where S<:AbstractArray{<:_A, 2}}, Dict} where _A,
+               Tuple{Type{Pair{_A, S} where S<:AbstractArray{<:_A, 2}} where _A, Union{Array, Pair}},
+               Bottom)
+
+# https://github.com/JuliaLang/julia/issues/44735
+@test_throws TypeError(:typeassert, Type, Vararg{Int}) typeintersect(Vararg{Int}, Int)
+@test_throws TypeError(:typeassert, Type, Vararg{Int}) typeintersect(Int, Vararg{Int})
+@test_throws TypeError(:typeassert, Type, 1) typeintersect(1, Int)
+@test_throws TypeError(:typeassert, Type, 1) typeintersect(Int, 1)
diff --git a/test/syntax.jl b/test/syntax.jl
index 7ceef25ce36ee5..59e84bb343f215 100644
--- a/test/syntax.jl
+++ b/test/syntax.jl
@@ -60,6 +60,9 @@ macro test999_str(args...); args; end
     a
     b""" == ("a\nb",)
 
+# make sure a trailing integer, not just a symbol, is allowed also
+@test test999"foo"123 == ("foo", 123)
+
 # issue #5997
 @test_throws ParseError Meta.parse(": x")
 @test_throws ParseError Meta.parse("""begin
@@ -615,15 +618,12 @@ end
 @test A15838.@f() === nothing
 @test A15838.@f(1) === :b
 let ex = :(A15838.@f(1, 2)), __source__ = LineNumberNode(@__LINE__, Symbol(@__FILE__))
-    nometh = try
+    e = try
         macroexpand(@__MODULE__, ex)
         false
     catch ex
         ex
-    end::LoadError
-    @test nometh.file === string(__source__.file)
-    @test nometh.line === __source__.line
-    e = nometh.error::MethodError
+    end::MethodError
     @test e.f === getfield(A15838, Symbol("@f"))
     @test e.args === (__source__, @__MODULE__, 1, 2)
 end
@@ -819,16 +819,30 @@ let f = function (x; kw...)
 end
 
 # normalization of Unicode symbols (#19464)
-let ε=1, μ=2, x=3, î=4
+let ε=1, μ=2, x=3, î=4, ⋅=5, (-)=6
     # issue #5434 (mu vs micro):
     @test Meta.parse("\u00b5") === Meta.parse("\u03bc")
     @test µ == μ == 2
     # NFC normalization of identifiers:
     @test Meta.parse("\u0069\u0302") === Meta.parse("\u00ee")
-    @test î == 4
+    @test î == 4
     # latin vs greek ε (#14751)
     @test Meta.parse("\u025B") === Meta.parse("\u03B5")
     @test ɛ == ε == 1
+    # middot char · or · vs math dot operator ⋅ (#25098)
+    @test Meta.parse("\u00b7") === Meta.parse("\u0387") === Meta.parse("\u22c5")
+    @test (·) == (·) == (⋅) == 5
+    # minus − vs hyphen-minus - (#26193)
+    @test Meta.parse("\u2212") === Meta.parse("-")
+    @test Meta.parse("\u221242") === Meta.parse("-42")
+    @test Meta.parse("\u2212 42") == Meta.parse("- 42")
+    @test Meta.parse("\u2212x") == Meta.parse("-x")
+    @test Meta.parse("x \u2212 42") == Meta.parse("x - 42")
+    @test Meta.parse("x \u2212= 42") == Meta.parse("x -= 42")
+    @test Meta.parse("100.0e\u22122") === Meta.parse("100.0E\u22122") === Meta.parse("100.0e-2")
+    @test Meta.parse("100.0f\u22122") === Meta.parse("100.0f-2")
+    @test Meta.parse("0x100p\u22128") === Meta.parse("0x100P\u22128") === Meta.parse("0x100p-8")
+    @test (−) == (-) == 6
 end
 
 # issue #8925
@@ -861,7 +875,7 @@ let f = function (x::T, y::S) where T<:S where S
 end
 
 # issue #20541
-@test Meta.parse("[a .!b]") == Expr(:hcat, :a, Expr(:call, :(.!), :b))
+@test Meta.parse("[a .!b]") == Expr(:hcat, :a, Expr(:call, :.!, :b))
 
 @test Meta.lower(Main, :(a{1} = b)) == Expr(:error, "invalid type parameter name \"1\"")
 @test Meta.lower(Main, :(a{2<:Any} = b)) == Expr(:error, "invalid type parameter name \"2\"")
@@ -1190,6 +1204,25 @@ end
 @test [(0,0)... 1] == [0 0 1]
 @test Float32[(0,0)... 1] == Float32[0 0 1]
 
+# issue #43960, evaluation order of splatting in `ref`
+let a = [], b = [4,3,2,1]
+    f() = (push!(a, 1); 2)
+    g() = (push!(a, 2); ())
+    @test b[f(), g()...] == 3
+    @test a == [1,2]
+end
+
+# issue #44239
+struct KWGetindex end
+Base.getindex(::KWGetindex, args...; kws...) = (args, NamedTuple(kws))
+let A = KWGetindex(), a = [], b = [4,3,2,1]
+    f() = (push!(a, 1); 2)
+    g() = (push!(a, 2); ())
+    @test A[f(), g()..., k = f()] === ((2,), (k = 2,))
+    @test a == [1, 2, 1]
+    @test A[var"end"=1] === ((), (var"end" = 1,))
+end
+
 @testset "raw_str macro" begin
     @test raw"$" == "\$"
     @test raw"\n" == "\\n"
@@ -1354,7 +1387,6 @@ end
 @test Meta.parse("√3x^2") == Expr(:call, :*, Expr(:call, :√, 3), Expr(:call, :^, :x, 2))
 @test Meta.parse("-3x^2") == Expr(:call, :*, -3, Expr(:call, :^, :x, 2))
 @test_throws ParseError Meta.parse("2!3")
-@test_throws ParseError Meta.parse("2√3")
 
 # issue #27914
 @test Meta.parse("2f(x)")        == Expr(:call, :*, 2, Expr(:call, :f, :x))
@@ -1420,6 +1452,14 @@ invalid assignment location "function (s, o...)
 end\""""
 end
 
+let ex = Meta.lower(@__MODULE__, :(function g end = 1))
+    @test isa(ex, Expr) && ex.head === :error
+    @test ex.args[1] == """
+invalid assignment location "function g
+end\""""
+end
+
+
 # issue #15229
 @test Meta.lower(@__MODULE__, :(function f(x); local x; 0; end)) ==
     Expr(:error, "local variable name \"x\" conflicts with an argument")
@@ -1496,8 +1536,18 @@ let ex = Meta.parse("@test27521(2) do y; y; end")
     @test macroexpand(@__MODULE__, ex) == Expr(:tuple, fex, 2)
 end
 
+# issue #43018
+module M43018
+    macro test43018(fn)
+        quote $(fn)() end
+    end
+end
+@test :(@M43018.test43018() do; end) == :(M43018.@test43018() do; end)
+@test @macroexpand(@M43018.test43018() do; end) == @macroexpand(M43018.@test43018() do; end)
+@test @M43018.test43018() do; 43018 end == 43018
+
 # issue #27129
-f27129(x = 1) = (@Base._inline_meta; x)
+f27129(x = 1) = (@inline; x)
 for meth in methods(f27129)
     @test ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), meth, C_NULL, meth.source).inlineable
 end
@@ -1861,13 +1911,18 @@ end
 @test_throws UndefVarError eval(:(1+$(Symbol(""))))
 
 # issue #31404
-f31404(a, b; kws...) = (a, b, kws.data)
+f31404(a, b; kws...) = (a, b, values(kws))
 @test f31404(+, (Type{T} where T,); optimize=false) === (+, (Type,), (optimize=false,))
 
 # issue #28992
 macro id28992(x) x end
 @test @id28992(1 .+ 2) == 3
-@test Meta.isexpr(Meta.lower(@__MODULE__, :(@id28992((.+)(a,b) = 0))), :error)
+@test Meta.@lower(.+(a,b) = 0) == Expr(:error, "invalid function name \".+\"")
+@test Meta.@lower((.+)(a,b) = 0) == Expr(:error, "invalid function name \"(.+)\"")
+let m = @__MODULE__
+    @test Meta.lower(m, :($m.@id28992(.+(a,b) = 0))) == Expr(:error, "invalid function name \"$(nameof(m)).:.+\"")
+    @test Meta.lower(m, :($m.@id28992((.+)(a,b) = 0))) == Expr(:error, "invalid function name \"(.$(nameof(m)).+)\"")
+end
 @test @id28992([1] .< [2] .< [3]) == [true]
 @test @id28992(2 ^ -2) == 0.25
 @test @id28992(2 .^ -2) == 0.25
@@ -1968,8 +2023,12 @@ let a(; b) = b
 end
 
 # issue #33987
-f33987(args::(Vararg{Any, N} where N); kwargs...) = args
-@test f33987(1,2,3) === (1,2,3)
+@test_deprecated eval(quote
+    # This syntax is deprecated. This test should be removed when the
+    # deprecation is.
+    f33987(args::(Vararg{Any, N} where N); kwargs...) = args
+    @test f33987(1,2,3) === (1,2,3)
+end)
 
 macro id_for_kwarg(x); x; end
 Xo65KdlD = @id_for_kwarg let x = 1
@@ -2093,6 +2152,16 @@ end
 end
 @test z28789 == 42
 
+# issue #38650, `struct` should always be a hard scope
+f38650() = 0
+@eval begin
+    $(Expr(:softscope, true))
+    struct S38650
+        f38650() = 1
+    end
+end
+@test f38650() == 0
+
 # issue #37126
 @test isempty(Test.collect_test_logs() do
     include_string(@__MODULE__, """
@@ -2141,6 +2210,12 @@ end
 @test Meta.parse("a ⟂ b ⟂ c") == Expr(:comparison, :a, :⟂, :b, :⟂, :c)
 @test Meta.parse("a ⟂ b ∥ c") == Expr(:comparison, :a, :⟂, :b, :∥, :c)
 
+# issue 39350
+@testset "binary ⫪ and ⫫" begin
+    @test Meta.parse("a ⫪ b") == Expr(:call, :⫪, :a, :b)
+    @test Meta.parse("a ⫫ b") == Expr(:call, :⫫, :a, :b)
+end
+
 # only allow certain characters after interpolated vars (#25231)
 @test Meta.parse("\"\$x෴  \"",raise=false) == Expr(:error, "interpolated variable \$x ends with invalid character \"෴\"; use \"\$(x)\" instead.")
 @test Base.incomplete_tag(Meta.parse("\"\$foo", raise=false)) == :string
@@ -2235,6 +2310,9 @@ h35201(x; k=1) = (x, k)
 f35201(c) = h35201((;c...), k=true)
 @test f35201(Dict(:a=>1,:b=>3)) === ((a=1,b=3), true)
 
+# issue #44343
+f44343(;kw...) = NamedTuple(kw)
+@test f44343(u = (; :a => 1)) === (u = (; :a => 1),)
 
 @testset "issue #34544/35367" begin
     # Test these evals shouldnt segfault
@@ -2342,3 +2420,948 @@ else
 end
 
 @test :(a +ꜝ b) == Expr(:call, :+ꜝ, :a, :b)
+
+function ncalls_in_lowered(ex, fname)
+    lowered_exprs = Meta.lower(Main, ex).args[1].code
+    return count(lowered_exprs) do ex
+        Meta.isexpr(ex, :call) && ex.args[1] == fname
+    end
+end
+
+@testset "standalone .op" begin
+    @test :(.+) == Expr(:., :+)
+    @test :(map(.-, a)) == Expr(:call, :map, Expr(:., :-), :a)
+
+    @test ncalls_in_lowered(:(.*), GlobalRef(Base, :BroadcastFunction)) == 1
+    @test ncalls_in_lowered(:((.^).(a, b)), GlobalRef(Base, :broadcasted)) == 1
+    @test ncalls_in_lowered(:((.^).(a, b)), GlobalRef(Base, :BroadcastFunction)) == 1
+    @test ncalls_in_lowered(:((.+)(a, b .- (.^)(c, 2))), GlobalRef(Base, :broadcasted)) == 3
+    @test ncalls_in_lowered(:((.+)(a, b .- (.^)(c, 2))), GlobalRef(Base, :materialize)) == 1
+    @test ncalls_in_lowered(:((.+)(a, b .- (.^)(c, 2))), GlobalRef(Base, :BroadcastFunction)) == 0
+end
+
+# issue #37656
+@test :(if true 'a' else 1 end) == Expr(:if, true, quote 'a' end, quote 1 end)
+
+# issue #37664
+@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a b")
+@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==#b")
+@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a #==#b")
+@test_throws ParseError("extra token \"b\" after end of expression") Meta.parse("a#==# b")
+
+@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 2")
+@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==#2")
+@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1 #==#2")
+@test_throws ParseError("extra token \"2\" after end of expression") Meta.parse("1#==# 2")
+
+@test size([1#==#2#==#3]) == size([1 2 3])
+@test size([1#==#2#==#3]) == size([1	2	3]) # tabs
+@test size([1#==#2#==#3]) == size([1	2 3]) # tabs and spaces
+@test size([1#==#2#==#3]) == size([1 2	3]) # tabs and spaces
+@test [zeros(Int,2,2)#==#[1;2]
+       [3#==#4]#==#5]          == [zeros(Int,2,2) [1; 2]
+                                   [3 4]          5     ] == [0 0 1
+                                                              0 0 2
+                                                              3 4 5]
+
+@test Meta.parse("for x in 1:10 g(x) end") ==
+  Meta.parse("for#==#x#==#in#==#1:10#==#g(x)#==#end")
+@test Meta.parse("(f->f(1))() do x x+1 end") ==
+  Meta.parse("(f->f(1))()#==#do#==#x#==#x+1#==#end")
+@test Meta.parse("while i < 10 i += 1 end") ==
+  Meta.parse("while#==#i#==#<#==#10#==#i#==#+=#==#1#==#end")
+@test Meta.parse("begin x=1 end") == Meta.parse("begin#==#x=1#==#end")
+@test Meta.parse("if x<y x+1 elseif y>0 y+1 else z end") ==
+  Meta.parse("if#==#x<y#==#x+1#==#elseif#==#y>0#==#y+1#==#else#==#z#==#end")
+@test Meta.parse("function(x) x end") == Meta.parse("function(x)#==#x#==#end")
+@test Meta.parse("a ? b : c") == Meta.parse("a#==#?#==#b#==#:#==#c")
+@test_throws ParseError("space before \"(\" not allowed in \"f (\" at none:1") begin
+  Meta.parse("f#==#(x)=x")
+end
+@test Meta.parse("try f() catch e g() finally h() end") ==
+  Meta.parse("try#==#f()#==#catch#==#e#==#g()#==#finally#==#h()#==#end")
+@test Meta.parse("@m a b") == Meta.parse("@m#==#a#==#b")
+
+# issue #37540
+macro m37540()
+    quote
+        x = 1
+        :($x)
+    end
+end
+@test @m37540() == 1
+
+# issue #37890
+struct A37890{A, B}
+    a
+    b
+    A37890(args::Tuple) = return new{typeof.(args)...}(args...)
+end
+@test A37890((1, "")) isa A37890{Int, String}
+@test_throws ErrorException A37890((1,1,1))
+@test_throws TypeError A37890((1,))
+
+struct B37890{A, B}
+    a
+    b
+    B37890(a, b) = new{Int, ()..., Int8}(a, b)
+end
+@test B37890(1.0, 2.0f0) isa B37890{Int, Int8}
+
+# import ... as
+@test_throws ParseError("invalid syntax \"using A as ...\"") Meta.parse("using A as B")
+@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using A.b as B")
+@test_throws ParseError("invalid syntax \"using A.b as ...\"") Meta.parse("using X, A.b as B")
+@test_throws ParseError("invalid syntax \"import A as B:\"") Meta.parse("import A as B: c")
+@test_throws ParseError("invalid syntax \"import A.b as B:\"") Meta.parse("import A.b as B: c")
+
+module TestImportAs
+using Test
+
+module Mod
+const x = 1
+global maybe_undef
+def() = (global maybe_undef = 0)
+func(x) = 2x + 1
+
+macro mac(x)
+    :($(esc(x)) + 1)
+end
+end
+
+module Mod2
+import ..Mod.x as x_from_mod
+import ..Mod.x as x_from_mod2
+const y = 2
+
+export x_from_mod2
+end
+
+import .Mod: x as x2
+
+@test x2 == 1
+@test !@isdefined(x)
+
+module_names = names(@__MODULE__; all=true, imported=true)
+@test :x2 ∈ module_names
+@test :x ∉ module_names
+
+import .Mod2.y as y2
+
+@test y2 == 2
+@test !@isdefined(y)
+
+@test_throws ErrorException eval(:(import .Mod.x as (a.b)))
+
+import .Mod.maybe_undef as mu
+@test_throws UndefVarError mu
+Mod.def()
+@test mu === 0
+
+using .Mod: func as f
+@test f(10) == 21
+@test !@isdefined(func)
+@test_throws ErrorException("error in method definition: function Mod.func must be explicitly imported to be extended") eval(:(f(x::Int) = x))
+
+z = 42
+import .z as also_z
+@test also_z == 42
+
+import .Mod.@mac as @m
+@test @m(3) == 4
+
+@test_throws ErrorException eval(:(import .Mod.@mac as notmacro))
+@test_throws ErrorException eval(:(import .Mod.func as @notmacro))
+@test_throws ErrorException eval(:(using .Mod: @mac as notmacro))
+@test_throws ErrorException eval(:(using .Mod: func as @notmacro))
+
+import .Mod2.x_from_mod
+
+@test @isdefined(x_from_mod)
+@test x_from_mod == Mod.x
+
+using .Mod2
+
+@test_nowarn @eval x_from_mod2
+@test @isdefined(x_from_mod2)
+@test x_from_mod2 == x_from_mod == Mod.x
+end
+
+import .TestImportAs.Mod2 as M2
+@test !@isdefined(Mod2)
+@test M2 === TestImportAs.Mod2
+
+@testset "unicode modifiers after '" begin
+    @test Meta.parse("a'ᵀ") == Expr(:call, Symbol("'ᵀ"), :a)
+    @test Meta.parse("a'⁻¹") == Expr(:call, Symbol("'⁻¹"), :a)
+    @test Meta.parse("a'ᵀb") == Expr(:call, :*, Expr(:call, Symbol("'ᵀ"), :a), :b)
+    @test Meta.parse("a'⁻¹b") == Expr(:call, :*, Expr(:call, Symbol("'⁻¹"), :a), :b)
+end
+
+@testset "issue #37393" begin
+    @test :(for outer i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;))
+    i = :i
+    @test :(for outer $i = 1:3; end) == Expr(:for, Expr(:(=), Expr(:outer, :i), :(1:3)), :(;;))
+    @test :(for outer = 1:3; end) == Expr(:for, Expr(:(=), :outer, :(1:3)), :(;;))
+    # TIL that this is possible
+    for outer $ i = 1:3
+        @test 1 $ 2 in 1:3
+    end
+
+    # 😭
+    @test Meta.isexpr(Meta.parse("""
+        [i for i
+        in 1:3]"""), :comprehension)
+    @test Meta.isexpr(Meta.parse("""
+        [i for outer
+        in 1:3]"""), :comprehension)
+    @test Meta.isexpr(Meta.parse("""
+        [i for outer
+        i in 1:3]"""), :comprehension)
+    @test Meta.isexpr(Meta.parse("""
+        f(i for i
+        in 1:3)""").args[2], :generator)
+    @test_throws Meta.ParseError Meta.parse("""
+        for i
+            in 1:3
+        end""")
+end
+
+# PR #37973
+@test Meta.parse("1¦2⌿3") == Expr(:call, :¦, 1, Expr(:call, :⌿, 2, 3))
+
+@testset "slurp in assignments" begin
+    res = begin x, y, z... = 1:7 end
+    @test res == 1:7
+    @test x == 1 && y == 2
+    @test z == Vector(3:7)
+
+    res = begin x, y, z... = [1, 2] end
+    @test res == [1, 2]
+    @test x == 1 && y == 2
+    @test z == Int[]
+
+    x = 1
+    res = begin x..., = x end
+    @test res == 1
+    @test x == 1
+
+    x, y, z... = 1:7
+    res = begin y, z, x... = z..., x, y end
+    @test res == ((3:7)..., 1, 2)
+    @test y == 3
+    @test z == 4
+    @test x == ((5:7)..., 1, 2)
+
+    res = begin x, _, y... = 1, 2 end
+    @test res == (1, 2)
+    @test x == 1
+    @test y == ()
+
+    res = begin x, y... = 1 end
+    @test res == 1
+    @test x == 1
+    @test y == Iterators.rest(1, nothing)
+
+    res = begin x, y, z... = 1, 2, 3:5 end
+    @test res == (1, 2, 3:5)
+    @test x == 1 && y == 2
+    @test z == (3:5,)
+
+    @test Meta.isexpr(Meta.@lower(begin a, b..., c... = 1, 2, 3 end), :error)
+
+    @test_throws BoundsError begin x, y, z... = 1:1 end
+    @test_throws BoundsError begin x, y, _, z... = 1, 2 end
+
+    car((a, d...)) = a
+    cdr((a, d...)) = d
+    @test car(1:3) == 1
+    @test cdr(1:3) == [2, 3]
+
+    @test begin a, b = (;c = 3, d = 4) end === (c = 3, d = 4)
+    @test begin a, b, c = (x = "", y = 2.0, z = 1) end === (x = "", y = 2.0, z = 1)
+    a, b, c = (x = "", y = 2.0, z = 1)
+    @test a === ""
+    @test b === 2.0
+    @test c === 1
+    @test begin a, b... = (x = "", y = 2.0, z = 1) end === (x = "", y = 2.0, z = 1)
+    a, b... = (x = "", y = 2.0, z = 1)
+    @test b === (y = 2.0, z = 1)
+    let t = (x = "", y = 1, z = 3.0)
+        _, a, b = t
+        @test a === 1
+        @test b === 3.0
+        a, b... = t
+        @test a === ""
+        @test b === (y = 1, z = 3.0)
+    end
+end
+
+@testset "issue #33460" begin
+    err = Expr(:error, "more than one semicolon in argument list")
+    @test Meta.lower(Main, :(f(a; b=1; c=2) = 2))  == err
+    @test Meta.lower(Main, :(f( ; b=1; c=2)))      == err
+    @test Meta.lower(Main, :(f(a; b=1; c=2)))      == err
+    @test Meta.lower(Main, :(f(a; b=1, c=2; d=3))) == err
+    @test Meta.lower(Main, :(f(a; b=1; c=2, d=3))) == err
+    @test Meta.lower(Main, :(f(a; b=1; c=2; d=3))) == err
+end
+
+@test eval(Expr(:if, Expr(:block, Expr(:&&, true, Expr(:call, :(===), 1, 1))), 1, 2)) == 1
+
+# issue #38386
+macro m38386()
+    fname = :f38386
+    :(function $(esc(fname)) end)
+end
+@m38386
+@test isempty(methods(f38386))
+
+@testset "all-underscore varargs on the rhs" begin
+    @test ncalls_in_lowered(quote _..., = a end, GlobalRef(Base, :rest)) == 0
+    @test ncalls_in_lowered(quote ___..., = a end, GlobalRef(Base, :rest)) == 0
+    @test ncalls_in_lowered(quote a, _... = b end, GlobalRef(Base, :rest)) == 0
+    @test ncalls_in_lowered(quote a, _... = b, c end, GlobalRef(Base, :rest)) == 0
+    @test ncalls_in_lowered(quote a, _... = (b...,) end, GlobalRef(Base, :rest)) == 0
+end
+
+# issue #38501
+@test :"a $b $("str") c" == Expr(:string, "a ", :b, " ", Expr(:string, "str"), " c")
+
+@testset "property destructuring" begin
+    res = begin (; num, den) = 1 // 2 end
+    @test res == 1 // 2
+    @test num == 1
+    @test den == 2
+
+    res = begin (; b, a) = (a=1, b=2, c=3) end
+    @test res == (a=1, b=2, c=3)
+    @test b == 2
+    @test a == 1
+
+    # could make this an error instead, but I think this is reasonable
+    res = begin (; a, b, a) = (a=5, b=6) end
+    @test res == (a=5, b=6)
+    @test a == 5
+    @test b == 6
+
+    @test_throws ErrorException (; a, b) = (x=1,)
+
+    @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x end), :error)
+    @test Meta.isexpr(Meta.@lower(begin (a, b; c) = x, y end), :error)
+    @test Meta.isexpr(Meta.@lower(begin (; c, a.b) = x end), :error)
+
+    f((; a, b)) = a, b
+    @test f((b=3, a=4)) == (4, 3)
+    @test f((b=3, c=2, a=4)) == (4, 3)
+    @test_throws ErrorException f((;))
+
+    # with type annotation
+    let num, den, a, b
+        res = begin (; num::UInt8, den::Float64) = 1 // 2 end
+        @test res === 1 // 2
+        @test num === 0x01
+        @test den === 2.0
+
+        res = begin (; b, a::Bool) = (a=1.0, b=2, c=0x03) end
+        @test res === (a=1.0, b=2, c=0x03)
+        @test b === 2
+        @test a === true
+    end
+
+    @test Meta.isexpr(Meta.@lower(f((; a, b::Int)) = a + b), :error)
+end
+
+# #33697
+@testset "N-dimensional concatenation" begin
+    @test :([1 2 5; 3 4 6;;; 0 9 3; 4 5 4]) ==
+        Expr(:ncat, 3, Expr(:nrow, 1, Expr(:row, 1, 2, 5), Expr(:row, 3, 4, 6)),
+                        Expr(:nrow, 1, Expr(:row, 0, 9, 3), Expr(:row, 4, 5, 4)))
+    @test :([1 ; 2 ;; 3 ; 4]) == Expr(:ncat, 2, Expr(:nrow, 1, 1, 2), Expr(:nrow, 1, 3, 4))
+
+    @test_throws ParseError Meta.parse("[1 2 ;; 3 4]") # cannot mix spaces and ;; except as line break
+    @test :([1 2 ;;
+            3 4]) == :([1 2 3 4])
+    @test :([1 2 ;;
+            3 4 ; 2 3 4 5]) == :([1 2 3 4 ; 2 3 4 5])
+
+    @test Meta.parse("[1;\n]") == :([1;]) # ensure line breaks following semicolons are treated correctly
+    @test Meta.parse("[1;\n\n]") == :([1;])
+    @test Meta.parse("[1\n;]") == :([1;]) # semicolons following a linebreak are fine
+    @test Meta.parse("[1\n;;; 2]") == :([1;;; 2])
+    @test_throws ParseError Meta.parse("[1;\n;2]") # semicolons cannot straddle a line break
+    @test_throws ParseError Meta.parse("[1; ;2]") # semicolons cannot be separated by a space
+end
+
+# issue #25652
+x25652 = 1
+x25652_2 = let (x25652, _) = (x25652, nothing)
+    x25652 = x25652 + 1
+    x25652
+end
+@test x25652_2 == 2
+@test x25652 == 1
+
+@test let x = x25652
+    x25652 = x+3
+    x25652
+end == 4
+@test let (x,) = (x25652,)
+    x25652 = x+3
+    x25652
+end == 4
+
+@testset "issue #39600" begin
+    A = 1:.5:2
+    @test (!).(1 .< A .< 2) == [true, false, true]
+    @test .!(1 .< A .< 2) == [true, false, true]
+    @test (.!)(1 .< A .< 2) == [true, false, true]
+
+    @test ncalls_in_lowered(:((!).(1 .< A .< 2)), GlobalRef(Base, :materialize)) == 1
+    @test ncalls_in_lowered(:(.!(1 .< A .< 2)), GlobalRef(Base, :materialize)) == 1
+    @test ncalls_in_lowered(:((.!)(1 .< A .< 2)), GlobalRef(Base, :materialize)) == 1
+end
+
+# issue #39705
+@eval f39705(x) = $(Expr(:||)) && x
+@test f39705(1) === false
+
+
+struct A x end
+Base.dotgetproperty(::A, ::Symbol) = [0, 0, 0]
+
+@testset "dotgetproperty" begin
+    a = (x = [1, 2, 3],)
+    @test @inferred((a -> a.x .+= 1)(a)) == [2, 3, 4]
+
+    b = [1, 2, 3]
+    @test A(b).x === b
+    @test begin A(b).x .= 1 end == [1, 1, 1]
+    @test begin A(b).x .+= 1 end == [2, 3, 4]
+    @test b == [1, 2, 3]
+end
+
+@test Meta.@lower((::T) = x) == Expr(:error, "invalid assignment location \"::T\"")
+@test Meta.@lower((::T,) = x) == Expr(:error, "invalid assignment location \"::T\"")
+@test Meta.@lower((; ::T) = x) == Expr(:error, "invalid assignment location \"::T\"")
+
+# flisp conversion for quoted SSAValues
+@test eval(:(x = $(QuoteNode(Core.SSAValue(1))))) == Core.SSAValue(1)
+@test eval(:(x = $(QuoteNode(Core.SlotNumber(1))))) == Core.SlotNumber(1)
+@test_throws ErrorException("syntax: SSAValue objects should not occur in an AST") eval(:(x = $(Core.SSAValue(1))))
+@test_throws ErrorException("syntax: Slot objects should not occur in an AST") eval(:(x = $(Core.SlotNumber(1))))
+
+# juxtaposition of radical symbols (#40094)
+@test Meta.parse("2√3") == Expr(:call, :*, 2, Expr(:call, :√, 3))
+@test Meta.parse("2∛3") == Expr(:call, :*, 2, Expr(:call, :∛, 3))
+@test Meta.parse("2∜3") == Expr(:call, :*, 2, Expr(:call, :∜, 3))
+
+macro m_underscore_hygiene()
+    return :(_ = 1)
+end
+
+@test @macroexpand(@m_underscore_hygiene()) == :(_ = 1)
+
+macro m_begin_hygiene(a)
+    return :($(esc(a))[begin])
+end
+
+@test @m_begin_hygiene([1, 2, 3]) == 1
+
+# issue 40258
+@test "a $("b $("c")")" == "a b c"
+
+@test "$(([[:a, :b], [:c, :d]]...)...)" == "abcd"
+
+@test eval(Expr(:string, "a", Expr(:string, "b", "c"))) == "abc"
+@test eval(Expr(:string, "a", Expr(:string, "b", Expr(:string, "c")))) == "abc"
+
+macro m_nospecialize_unnamed_hygiene()
+    return :(f(@nospecialize(::Any)) = Any)
+end
+
+@test @m_nospecialize_unnamed_hygiene()(1) === Any
+
+# https://github.com/JuliaLang/julia/issues/40574
+@testset "no mutation while destructuring" begin
+    x = [1, 2]
+    x[2], x[1] = x
+    @test x == [2, 1]
+
+    x = [1, 2, 3]
+    x[3], x[1:2]... = x
+    @test x == [2, 3, 1]
+end
+
+@testset "escaping newlines inside strings" begin
+    c = "c"
+
+    @test "a\
+b" == "ab"
+    @test "a\
+    b" == "ab"
+    @test raw"a\
+b" == "a\\\nb"
+    @test "a$c\
+b" == "acb"
+    @test "\\
+" == "\\\n"
+
+
+    @test """
+          a\
+          b""" == "ab"
+    @test """
+          a\
+            b""" == "ab"
+    @test """
+            a\
+          b""" == "ab"
+    @test raw"""
+          a\
+          b""" == "a\\\nb"
+    @test """
+          a$c\
+          b""" == "acb"
+
+    @test """
+          \
+          """ == ""
+    @test """
+          \\
+          """ == "\\\n"
+    @test """
+          \\\
+          """ == "\\"
+    @test """
+          \\\\
+          """ == "\\\\\n"
+    @test """
+          \\\\\
+          """ == "\\\\"
+    @test """
+          \
+          \
+          """ == ""
+    @test """
+          \\
+          \
+          """ == "\\\n"
+    @test """
+          \\\
+          \
+          """ == "\\"
+
+
+    @test `a\
+b` == `ab`
+    @test `a\
+    b` == `ab`
+    @test `a$c\
+b` == `acb`
+    @test `"a\
+b"` == `ab`
+    @test `'a\
+b'` == `$("a\\\nb")`
+    @test `\\
+` == `'\'`
+
+
+    @test ```
+          a\
+          b``` == `ab`
+    @test ```
+          a\
+            b``` == `ab`
+    @test ```
+            a\
+          b``` == `  ab`
+    @test ```
+          a$c\
+          b``` == `acb`
+    @test ```
+          "a\
+          b"``` == `ab`
+    @test ```
+          'a\
+          b'``` == `$("a\\\nb")`
+    @test ```
+          \\
+          ``` == `'\'`
+end
+
+# issue #41253
+@test (function (::Dict{}); end)(Dict()) === nothing
+
+@testset "issue #41330" begin
+    @test Meta.parse("\"a\\\r\nb\"") == "ab"
+    @test Meta.parse("\"a\\\rb\"") == "ab"
+    @test eval(Meta.parse("`a\\\r\nb`")) == `ab`
+    @test eval(Meta.parse("`a\\\rb`")) == `ab`
+end
+
+@testset "slurping into function def" begin
+    x, f1()... = [1, 2, 3]
+    @test x == 1
+    @test f1() == [2, 3]
+    # test that call to `Base.rest` is outside the definition of `f`
+    @test f1() === f1()
+
+    x, f2()... = 1, 2, 3
+    @test x == 1
+    @test f2() == (2, 3)
+end
+
+@testset "long function bodies" begin
+    ex = Expr(:block)
+    ex.args = fill!(Vector{Any}(undef, 700000), 1)
+    f = eval(Expr(:function, :(), ex))
+    @test f() == 1
+    ex = Expr(:vcat)
+    ex.args = fill!(Vector{Any}(undef, 600000), 1)
+    @test_throws ErrorException("syntax: expression too large") eval(ex)
+end
+
+# issue 25678
+@generated f25678(x::T) where {T} = code_lowered(sin, Tuple{x})[]
+@test f25678(pi/6) === sin(pi/6)
+
+@generated g25678(x) = return :x
+@test g25678(7) === 7
+
+# issue 25678: module of name `Core`
+# https://github.com/JuliaLang/julia/pull/40778/files#r784416018
+@test @eval Module() begin
+    Core = 1
+    @generated f() = 1
+    f() == 1
+end
+
+# issue 25678: argument of name `tmp`
+# https://github.com/JuliaLang/julia/pull/43823#discussion_r785365312
+@test @eval Module() begin
+    @generated f(tmp) = tmp
+    f(1) === Int
+end
+
+# issue #19012
+@test Meta.parse("\U2200", raise=false) == Symbol("∀")
+@test Meta.parse("\U2203", raise=false) == Symbol("∃")
+@test Meta.parse("a\U2203", raise=false) == Symbol("a∃")
+@test Meta.parse("\U2204", raise=false) == Symbol("∄")
+
+# issue 42220
+macro m42220()
+    return quote
+        function foo(::Type{T}=Float64) where {T}
+            return Vector{T}(undef, 10)
+        end
+    end
+end
+@test @m42220()() isa Vector{Float64}
+@test @m42220()(Bool) isa Vector{Bool}
+
+@testset "try else" begin
+    fails(f) = try f() catch; true else false end
+    @test fails(error)
+    @test !fails(() -> 1 + 2)
+
+    @test_throws ParseError Meta.parse("try foo() else bar() end")
+    @test_throws ParseError Meta.parse("try foo() else bar() catch; baz() end")
+    @test_throws ParseError Meta.parse("try foo() catch; baz() finally foobar() else bar() end")
+    @test_throws ParseError Meta.parse("try foo() finally foobar() else bar() catch; baz() end")
+
+    err = try
+        try
+            1 + 2
+        catch
+        else
+            error("foo")
+        end
+    catch e
+        e
+    end
+    @test err == ErrorException("foo")
+
+    x = 0
+    err = try
+        try
+            1 + 2
+        catch
+        else
+            error("foo")
+        finally
+            x += 1
+        end
+    catch e
+        e
+    end
+    @test err == ErrorException("foo")
+    @test x == 1
+
+    x = 0
+    err = try
+        try
+            1 + 2
+        catch
+            5 + 6
+        else
+            3 + 4
+        finally
+            x += 1
+        end
+    catch e
+        e
+    end
+    @test err == 3 + 4
+    @test x == 1
+
+    x = 0
+    err = try
+        try
+            error()
+        catch
+            5 + 6
+        else
+            3 + 4
+        finally
+            x += 1
+        end
+    catch e
+        e
+    end
+    @test err == 5 + 6
+    @test x == 1
+end
+
+@test_throws ParseError Meta.parse("""
+function checkUserAccess(u::User)
+	if u.accessLevel != "user\u202e \u2066# users are not allowed\u2069\u2066"
+		return true
+	end
+	return false
+end
+""")
+
+@test_throws ParseError Meta.parse("""
+function checkUserAccess(u::User)
+	#=\u202e \u2066if (u.isAdmin)\u2069 \u2066 begin admins only =#
+		return true
+	#= end admin only \u202e \u2066end\u2069 \u2066=#
+	return false
+end
+""")
+
+@testset "empty nd arrays" begin
+    @test :([])    == Expr(:vect)
+    @test :([;])   == Expr(:ncat, 1)
+    @test :([;;])  == Expr(:ncat, 2)
+    @test :([;;;]) == Expr(:ncat, 3)
+
+    @test []    == Array{Any}(undef, 0)
+    @test [;]   == Array{Any}(undef, 0)
+    @test [;;]  == Array{Any}(undef, 0, 0)
+    @test [;;;] == Array{Any}(undef, 0, 0, 0)
+
+    @test :(T[])    == Expr(:ref, :T)
+    @test :(T[;])   == Expr(:typed_ncat, :T, 1)
+    @test :(T[;;])  == Expr(:typed_ncat, :T, 2)
+    @test :(T[;;;]) == Expr(:typed_ncat, :T, 3)
+
+    @test Int[]    == Array{Int}(undef, 0)
+    @test Int[;]   == Array{Int}(undef, 0)
+    @test Int[;;]  == Array{Int}(undef, 0, 0)
+    @test Int[;;;] == Array{Int}(undef, 0, 0, 0)
+
+    @test :([  ]) == Expr(:vect)
+    @test :([
+            ]) == Expr(:vect)
+    @test :([ ;; ]) == Expr(:ncat, 2)
+    @test :([
+             ;;
+            ]) == Expr(:ncat, 2)
+
+    @test_throws ParseError Meta.parse("[; ;]")
+    @test_throws ParseError Meta.parse("[;; ;]")
+    @test_throws ParseError Meta.parse("[;\n;]")
+end
+
+@test Meta.parseatom("@foo", 1; filename="foo", lineno=7) == (Expr(:macrocall, :var"@foo", LineNumberNode(7, :foo)), 5)
+@test Meta.parseall("@foo"; filename="foo", lineno=3) == Expr(:toplevel, LineNumberNode(3, :foo), Expr(:macrocall, :var"@foo", LineNumberNode(3, :foo)))
+
+let ex = :(const $(esc(:x)) = 1; (::typeof(2))() = $(esc(:x)))
+    @test macroexpand(Main, Expr(:var"hygienic-scope", ex, Main)).args[3].args[1] == :((::$(GlobalRef(Main, :typeof))(2))())
+end
+
+struct Foo44013
+    x
+    f
+end
+
+@testset "issue #44013" begin
+    f = Foo44013(1, 2)
+    res = begin (; x, f) = f end
+    @test res == Foo44013(1, 2)
+    @test x == 1
+    @test f == 2
+
+    f = Foo44013(1, 2)
+    res = begin (; f, x) = f end
+    @test res == Foo44013(1, 2)
+    @test x == 1
+    @test f == 2
+end
+
+@testset "typed globals" begin
+    m = Module()
+    @eval m begin
+        x::Int = 1
+        f(y) = x + y
+    end
+    @test Base.return_types(m.f, (Int,)) == [Int]
+
+    m = Module()
+    @eval m begin
+        global x::Int
+        f(y) = x + y
+    end
+    @test Base.return_types(m.f, (Int,)) == [Int]
+
+    m = Module()
+    @test_throws ErrorException @eval m begin
+        function f()
+            global x
+            x::Int = 1
+            x = 2.
+        end
+        g() = x
+    end
+
+    m = Module()
+    @test_throws ErrorException @eval m function f()
+        global x
+        x::Int = 1
+        x::Float64 = 2.
+    end
+
+    m = Module()
+    @test_throws ErrorException @eval m begin
+        x::Int = 1
+        x::Float64 = 2
+    end
+
+    m = Module()
+    @test_throws ErrorException @eval m begin
+        x::Int = 1
+        const x = 2
+    end
+
+    m = Module()
+    @test_throws ErrorException @eval m begin
+        const x = 1
+        x::Int = 2
+    end
+
+    m = Module()
+    @test_throws ErrorException @eval m begin
+        x = 1
+        global x::Float64
+    end
+
+    m = Module()
+    @test_throws ErrorException @eval m begin
+        x = 1
+        global x::Int
+    end
+
+    m = Module()
+    @eval m module Foo
+        export bar
+        bar = 1
+    end
+    @eval m begin
+        using .Foo
+        bar::Float64 = 2
+    end
+    @test m.bar === 2.0
+    @test Core.get_binding_type(m, :bar) == Float64
+    @test m.Foo.bar === 1
+    @test Core.get_binding_type(m.Foo, :bar) == Any
+end
+
+# issue 44723
+demo44723()::Any = Base.Experimental.@opaque () -> true ? 1 : 2
+@test demo44723()() == 1
+
+@testset "slurping in non-final position" begin
+    res = begin x, y..., z = 1:7 end
+    @test res == 1:7
+    @test x == 1
+    @test y == Vector(2:6)
+    @test z == 7
+
+    res = begin x, y..., z = [1, 2] end
+    @test res == [1, 2]
+    @test x == 1
+    @test y == Int[]
+    @test z == 2
+
+    x, y, z... = 1:7
+    res = begin y, z..., x = z..., x, y end
+    @test res == ((3:7)..., 1, 2)
+    @test y == 3
+    @test z == ((4:7)..., 1)
+    @test x == 2
+
+    res = begin x, _..., y = 1, 2 end
+    @test res == (1, 2)
+    @test x == 1
+    @test y == 2
+
+    res = begin x, y..., z = 1, 2:4, 5 end
+    @test res == (1, 2:4, 5)
+    @test x == 1
+    @test y == (2:4,)
+    @test z == 5
+
+    @test_throws ArgumentError begin x, y..., z = 1:1 end
+    @test_throws BoundsError begin x, y, _..., z = 1, 2 end
+
+    last((a..., b)) = b
+    front((a..., b)) = a
+    @test last(1:3) == 3
+    @test front(1:3) == [1, 2]
+
+    res = begin x, y..., z = "abcde" end
+    @test res == "abcde"
+    @test x == 'a'
+    @test y == "bcd"
+    @test z == 'e'
+
+    res = begin x, y..., z = (a=1, b=2, c=3, d=4) end
+    @test res == (a=1, b=2, c=3, d=4)
+    @test x == 1
+    @test y == (b=2, c=3)
+    @test z == 4
+
+    v = rand(Bool, 7)
+    res = begin x, y..., z = v end
+    @test res === v
+    @test x == v[1]
+    @test y == v[2:6]
+    @test z == v[end]
+
+    res = begin x, y..., z = Core.svec(1, 2, 3, 4) end
+    @test res == Core.svec(1, 2, 3, 4)
+    @test x == 1
+    @test y == Core.svec(2, 3)
+    @test z == 4
+end
+
+# rewriting inner constructors with return type decls
+struct InnerCtorRT{T}
+    InnerCtorRT()::Int = new{Int}()
+    InnerCtorRT{T}() where {T} = ()->new()
+end
+@test_throws MethodError InnerCtorRT()
+@test InnerCtorRT{Int}()() isa InnerCtorRT{Int}
diff --git a/test/sysinfo.jl b/test/sysinfo.jl
index a34adf572abe43..e423f6071c9e09 100644
--- a/test/sysinfo.jl
+++ b/test/sysinfo.jl
@@ -6,3 +6,6 @@
 sprint(Base.Sys.cpu_summary)
 @test Base.Sys.uptime() > 0
 Base.Sys.loadavg()
+
+@test Base.libllvm_path() isa Symbol
+@test contains(String(Base.libllvm_path()), "LLVM")
diff --git a/test/testdefs.jl b/test/testdefs.jl
index 894a0cc110a7c0..1d36d8893e1998 100644
--- a/test/testdefs.jl
+++ b/test/testdefs.jl
@@ -18,28 +18,27 @@ function runtests(name, path, isolate=true; seed=nothing)
         let id = myid()
             wait(@spawnat 1 print_testworker_started(name, id))
         end
-        ex = quote
-            @timed @testset $"$name" begin
-                # Random.seed!(nothing) will fail
-                $seed != nothing && Random.seed!($seed)
-                include($"$path.jl")
-            end
+        res_and_time_data = @timed @testset "$name" begin
+            # Random.seed!(nothing) will fail
+            seed != nothing && Random.seed!(seed)
+            Base.include(m, "$path.jl")
         end
-        res_and_time_data = Core.eval(m, ex)
         rss = Sys.maxrss()
         #res_and_time_data[1] is the testset
-        passes,fails,error,broken,c_passes,c_fails,c_errors,c_broken = Test.get_test_counts(res_and_time_data[1])
-        if res_and_time_data[1].anynonpass == false
-            res_and_time_data = (
-                                 (passes+c_passes,broken+c_broken),
-                                 res_and_time_data[2],
-                                 res_and_time_data[3],
-                                 res_and_time_data[4],
-                                 res_and_time_data[5])
-        end
-        vcat(collect(res_and_time_data), rss)
-    finally
+        ts = res_and_time_data[1]
+        passes, fails, errors, broken, c_passes, c_fails, c_errors, c_broken = Test.get_test_counts(ts)
+        # simplify our stored data to just contain the counts
+        res_and_time_data = (TestSetException(passes+c_passes, fails+c_fails, errors+c_errors, broken+c_broken, Test.filter_errors(ts)),
+                             res_and_time_data[2],
+                             res_and_time_data[3],
+                             res_and_time_data[4],
+                             res_and_time_data[5],
+                             rss)
+        return res_and_time_data
+    catch ex
         Test.TESTSET_PRINT_ENABLE[] = old_print_setting
+        ex isa TestSetException || rethrow()
+        return Any[ex]
     end
 end
 
diff --git a/test/testenv.jl b/test/testenv.jl
index e07355301b55ae..41706dd24e75ed 100644
--- a/test/testenv.jl
+++ b/test/testenv.jl
@@ -18,6 +18,9 @@ if !@isdefined(testenv_defined)
         push!(test_exeflags.exec, "--startup-file=no")
         push!(test_exeflags.exec, "--depwarn=error")
     end
+    if haskey(ENV, "JULIA_TEST_EXTRA_EXEFLAGS")
+        append!(test_exeflags.exec, Base.shell_split(ENV["JULIA_TEST_EXTRA_EXEFLAGS"]))
+    end
 
     if haskey(ENV, "JULIA_TEST_EXENAME")
         popfirst!(test_exeflags.exec)
@@ -26,14 +29,31 @@ if !@isdefined(testenv_defined)
         const test_exename = popfirst!(test_exeflags.exec)
     end
 
-    addprocs_with_testenv(X; kwargs...) = addprocs(X; exename=test_exename, exeflags=test_exeflags, kwargs...)
+    if haskey(ENV, "JULIA_RR")
+        const rr_exename = `$(Base.shell_split(ENV["JULIA_RR"]))`
+    else
+        const rr_exename = ``
+    end
+
+    function addprocs_with_testenv(X; rr_allowed=true, kwargs...)
+        exename = rr_allowed ? `$rr_exename $test_exename` : test_exename
+        addprocs(X; exename=exename, exeflags=test_exeflags, kwargs...)
+    end
 
     const curmod = @__MODULE__
     const curmod_name = fullname(curmod)
     const curmod_str = curmod === Main ? "Main" : join(curmod_name, ".")
-    const curmod_prefix = "$(["$m." for m in curmod_name]...)"
+    const curmod_prefix = curmod === Main ? "" : "$(["$m." for m in curmod_name]...)"
 
     # platforms that support cfunction with closures
     # (requires LLVM back-end support for trampoline intrinsics)
     const cfunction_closure = Sys.ARCH === :x86_64 || Sys.ARCH === :i686
+
+    macro async_logerr(expr)
+        :(@async try
+            $(esc(expr))
+        catch err
+            @error("An async task failed", exception=(err, catch_backtrace()))
+        end)
+    end
 end
diff --git a/test/testhelpers/FakePTYs.jl b/test/testhelpers/FakePTYs.jl
index 03610665142e2d..17dd270cd24248 100644
--- a/test/testhelpers/FakePTYs.jl
+++ b/test/testhelpers/FakePTYs.jl
@@ -41,8 +41,8 @@ function open_fake_pty()
 
         fds = ccall(:open, Cint, (Ptr{UInt8}, Cint),
             ccall(:ptsname, Ptr{UInt8}, (Cint,), fdm), O_RDWR | O_NOCTTY)
+        pts = RawFD(fds)
 
-            pts = RawFD(fds)
         # pts = fdio(fds, true)
         # pts = Base.Filesystem.File(RawFD(fds))
         # pts = Base.TTY(RawFD(fds); readable = false)
diff --git a/test/testhelpers/Furlongs.jl b/test/testhelpers/Furlongs.jl
index 2f43834674e8e0..8ac22c6244cd33 100644
--- a/test/testhelpers/Furlongs.jl
+++ b/test/testhelpers/Furlongs.jl
@@ -14,13 +14,26 @@ struct Furlong{p,T<:Number} <: Number
 end
 Furlong(x::T) where {T<:Number} = Furlong{1,T}(x)
 Furlong(x::Furlong) = x
-(::Type{T})(x::Furlong) where {T<:Number} = T(x.val)::T
+(::Type{T})(x::Furlong{0}) where {T<:Number} = T(x.val)::T
+(::Type{T})(x::Furlong{0}) where {T<:Furlong{0}} = T(x.val)::T
+(::Type{T})(x::Furlong{0}) where {T<:Furlong} = typeassert(x, T)
 Furlong{p}(v::Number) where {p} = Furlong{p,typeof(v)}(v)
-Furlong{p}(x::Furlong{q}) where {p,q} = (@assert(p==q); Furlong{p,typeof(x.val)}(x.val))
-Furlong{p,T}(x::Furlong{q}) where {T,p,q} = (@assert(p==q); Furlong{p,T}(T(x.val)))
+Furlong{p}(x::Furlong{q}) where {p,q} = (typeassert(x, Furlong{p}); Furlong{p,typeof(x.val)}(x.val))
+Furlong{p,T}(x::Furlong{q}) where {T,p,q} = (typeassert(x, Furlong{p}); Furlong{p,T}(T(x.val)))
 
 Base.promote_type(::Type{Furlong{p,T}}, ::Type{Furlong{p,S}}) where {p,T,S} =
-    (Base.@_pure_meta; Furlong{p,promote_type(T,S)})
+    Furlong{p,promote_type(T,S)}
+
+# only Furlong{0} forms a ring and isa Number
+Base.convert(::Type{T}, y::Number) where {T<:Furlong{0}} = T(y)
+Base.convert(::Type{Furlong}, y::Number) = Furlong{0}(y)
+Base.convert(::Type{Furlong{<:Any,T}}, y::Number) where {T<:Number} = Furlong{0,T}(y)
+Base.convert(::Type{T}, y::Number) where {T<:Furlong} = typeassert(y, T) # throws, since cannot convert a Furlong{0} to a Furlong{p}
+# other Furlong{p} form a group
+Base.convert(::Type{T}, y::Furlong) where {T<:Furlong{0}} = T(y)
+Base.convert(::Type{Furlong}, y::Furlong) = y
+Base.convert(::Type{Furlong{<:Any,T}}, y::Furlong{p}) where {p,T<:Number} = Furlong{p,T}(y)
+Base.convert(::Type{T}, y::Furlong) where {T<:Furlong} = T(y)
 
 Base.one(x::Furlong{p,T}) where {p,T} = one(T)
 Base.one(::Type{Furlong{p,T}}) where {p,T} = one(T)
@@ -29,15 +42,21 @@ Base.oneunit(x::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(one(T))
 Base.zero(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(zero(T))
 Base.zero(::Type{Furlong{p,T}}) where {p,T} = Furlong{p,T}(zero(T))
 Base.iszero(x::Furlong) = iszero(x.val)
+Base.float(x::Furlong{p}) where {p} = Furlong{p}(float(x.val))
+Base.eps(::Type{Furlong{p,T}}) where {p,T<:AbstractFloat} = Furlong{p}(eps(T))
+Base.eps(::Furlong{p,T}) where {p,T<:AbstractFloat} = eps(Furlong{p,T})
+Base.floatmin(::Type{Furlong{p,T}}) where {p,T<:AbstractFloat} = Furlong{p}(floatmin(T))
+Base.floatmin(::Furlong{p,T}) where {p,T<:AbstractFloat} = floatmin(Furlong{p,T})
+Base.floatmax(::Type{Furlong{p,T}}) where {p,T<:AbstractFloat} = Furlong{p}(floatmax(T))
+Base.floatmax(::Furlong{p,T}) where {p,T<:AbstractFloat} = floatmax(Furlong{p,T})
+Base.conj(x::Furlong{p,T}) where {p,T} = Furlong{p,T}(conj(x.val))
 
-# convert Furlong exponent p to a canonical form.  This
-# is not type stable, but it doesn't matter since it is used
-# at compile time (in generated functions), not runtime
+# convert Furlong exponent p to a canonical form
 canonical_p(p) = isinteger(p) ? Int(p) : Rational{Int}(p)
 
 Base.abs(x::Furlong{p}) where {p} = Furlong{p}(abs(x.val))
-@generated Base.abs2(x::Furlong{p}) where {p} = :(Furlong{$(canonical_p(2p))}(abs2(x.val)))
-@generated Base.inv(x::Furlong{p}) where {p} = :(Furlong{$(canonical_p(-p))}(inv(x.val)))
+Base.abs2(x::Furlong{p}) where {p} = Furlong{canonical_p(2p)}(abs2(x.val))
+Base.inv(x::Furlong{p}) where {p} = Furlong{canonical_p(-p)}(inv(x.val))
 
 for f in (:isfinite, :isnan, :isreal, :isinf)
     @eval Base.$f(x::Furlong) = $f(x.val)
@@ -56,11 +75,10 @@ end
 for op in (:(==), :(!=), :<, :<=, :isless, :isequal)
     @eval $op(x::Furlong{p}, y::Furlong{p}) where {p} = $op(x.val, y.val)
 end
-# generated functions to allow type inference of the value of the exponent:
 for (f,op) in ((:_plus,:+),(:_minus,:-),(:_times,:*),(:_div,://))
-    @eval @generated function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q}
+    @eval function $f(v::T, ::Furlong{p}, ::Union{Furlong{q},Val{q}}) where {T,p,q}
         s = $op(p, q)
-        :(Furlong{$(canonical_p(s)),$T}(v))
+        Furlong{canonical_p(s),T}(v)
     end
 end
 for (op,eop) in ((:*, :_plus), (:/, :_minus), (://, :_minus), (:div, :_minus))
diff --git a/test/testhelpers/ImmutableArrays.jl b/test/testhelpers/ImmutableArrays.jl
new file mode 100644
index 00000000000000..df2a78387e07bf
--- /dev/null
+++ b/test/testhelpers/ImmutableArrays.jl
@@ -0,0 +1,28 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# ImmutableArrays (arrays that implement getindex but not setindex!)
+
+# This test file defines an array wrapper that is immutable. It can be used to
+# test the action of methods on immutable arrays.
+
+module ImmutableArrays
+
+export ImmutableArray
+
+"An immutable wrapper type for arrays."
+struct ImmutableArray{T,N,A<:AbstractArray} <: AbstractArray{T,N}
+    data::A
+end
+
+ImmutableArray(data::AbstractArray{T,N}) where {T,N} = ImmutableArray{T,N,typeof(data)}(data)
+
+# Minimal AbstractArray interface
+Base.size(A::ImmutableArray) = size(A.data)
+Base.size(A::ImmutableArray, d) = size(A.data, d)
+Base.getindex(A::ImmutableArray, i...) = getindex(A.data, i...)
+
+# The immutable array remains immutable after conversion to AbstractArray
+AbstractArray{T}(A::ImmutableArray) where {T} = ImmutableArray(AbstractArray{T}(A.data))
+AbstractArray{T,N}(A::ImmutableArray{S,N}) where {S,T,N} = ImmutableArray(AbstractArray{T,N}(A.data))
+
+end
diff --git a/test/testhelpers/InfiniteArrays.jl b/test/testhelpers/InfiniteArrays.jl
new file mode 100644
index 00000000000000..d69130f4d726aa
--- /dev/null
+++ b/test/testhelpers/InfiniteArrays.jl
@@ -0,0 +1,49 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# InfiniteArrays (arrays with infinite size)
+
+# This test file is designed to exercise support for generic sizing,
+# even though infinite arrays aren't implemented in Base.
+
+module InfiniteArrays
+
+export OneToInf, Infinity
+
+"""
+   Infinity()
+
+represents infinite cardinality. Note that `Infinity <: Integer` to support
+being treated as an index.
+"""
+struct Infinity <: Integer end
+
+Base.:(==)(::Infinity, ::Int) = false
+Base.:(==)(::Int, ::Infinity) = false
+Base.:(<)(::Int, ::Infinity) = true
+Base.:(≤)(::Int, ::Infinity) = true
+Base.:(≤)(::Infinity, ::Int) = false
+Base.:(≤)(::Infinity, ::Infinity) = true
+Base.:(-)(::Infinity, ::Int) = Infinity()
+Base.:(+)(::Infinity, ::Int) = Infinity()
+Base.:(:)(::Infinity, ::Infinity) = 1:0
+
+"""
+    OneToInf(n)
+
+Define an `AbstractInfUnitRange` that behaves like `1:∞`, with the added
+distinction that the limits are guaranteed (by the type system) to
+be 1 and ∞.
+"""
+struct OneToInf{T<:Integer} <: AbstractUnitRange{T} end
+
+OneToInf() = OneToInf{Int}()
+
+Base.axes(r::OneToInf) = (r,)
+Base.size(r::OneToInf) = (Infinity(),)
+Base.first(r::OneToInf{T}) where {T} = oneunit(T)
+Base.length(r::OneToInf) = Infinity()
+Base.last(r::OneToInf) = Infinity()
+Base.unitrange(r::OneToInf) = r
+Base.oneto(::Infinity) = OneToInf()
+
+end
diff --git a/test/testhelpers/OffsetArrays.jl b/test/testhelpers/OffsetArrays.jl
index efebb74ded2d8d..e71ec28ad46e58 100644
--- a/test/testhelpers/OffsetArrays.jl
+++ b/test/testhelpers/OffsetArrays.jl
@@ -5,141 +5,441 @@
 # This test file is designed to exercise support for generic indexing,
 # even though offset arrays aren't implemented in Base.
 
+# OffsetArrays v1.3.0
+# No compat patch and docstrings
 module OffsetArrays
 
-using Base: Indices, IndexCartesian, IndexLinear, tail
+using Base: tail, @propagate_inbounds
+using Base: IdentityUnitRange
 
-export OffsetArray
+export OffsetArray, OffsetMatrix, OffsetVector
 
+struct IdOffsetRange{T<:Integer,I<:AbstractUnitRange{T}} <: AbstractUnitRange{T}
+    parent::I
+    offset::T
+
+    IdOffsetRange{T,I}(r::I, offset::T) where {T<:Integer,I<:AbstractUnitRange{T}} = new{T,I}(r, offset)
+end
+
+# Construction/coercion from arbitrary AbstractUnitRanges
+function IdOffsetRange{T,I}(r::AbstractUnitRange, offset::Integer = 0) where {T<:Integer,I<:AbstractUnitRange{T}}
+    rc, o = offset_coerce(I, r)
+    return IdOffsetRange{T,I}(rc, convert(T, o+offset))
+end
+function IdOffsetRange{T}(r::AbstractUnitRange, offset::Integer = 0) where T<:Integer
+    rc = convert(AbstractUnitRange{T}, r)::AbstractUnitRange{T}
+    return IdOffsetRange{T,typeof(rc)}(rc, convert(T, offset))
+end
+IdOffsetRange(r::AbstractUnitRange{T}, offset::Integer = 0) where T<:Integer =
+    IdOffsetRange{T,typeof(r)}(r, convert(T, offset))
+
+# Coercion from other IdOffsetRanges
+IdOffsetRange{T,I}(r::IdOffsetRange{T,I}) where {T<:Integer,I<:AbstractUnitRange{T}} = r
+function IdOffsetRange{T,I}(r::IdOffsetRange) where {T<:Integer,I<:AbstractUnitRange{T}}
+    rc, offset = offset_coerce(I, r.parent)
+    return IdOffsetRange{T,I}(rc, r.offset+offset)
+end
+function IdOffsetRange{T}(r::IdOffsetRange) where T<:Integer
+    return IdOffsetRange(convert(AbstractUnitRange{T}, r.parent), r.offset)
+end
+IdOffsetRange(r::IdOffsetRange) = r
+
+AbstractUnitRange{T}(r::IdOffsetRange{T}) where {T} = r
+AbstractUnitRange{T}(r::IdOffsetRange) where {T} = IdOffsetRange{T}(r)
+
+# TODO: uncomment these when Julia is ready
+# # Conversion preserves both the values and the indexes, throwing an InexactError if this
+# # is not possible.
+# Base.convert(::Type{IdOffsetRange{T,I}}, r::IdOffsetRange{T,I}) where {T<:Integer,I<:AbstractUnitRange{T}} = r
+# Base.convert(::Type{IdOffsetRange{T,I}}, r::IdOffsetRange) where {T<:Integer,I<:AbstractUnitRange{T}} =
+#     IdOffsetRange{T,I}(convert(I, r.parent), r.offset)
+# Base.convert(::Type{IdOffsetRange{T,I}}, r::AbstractUnitRange) where {T<:Integer,I<:AbstractUnitRange{T}} =
+#     IdOffsetRange{T,I}(convert(I, r), 0)
+
+offset_coerce(::Type{Base.OneTo{T}}, r::Base.OneTo) where T<:Integer = convert(Base.OneTo{T}, r), 0
+function offset_coerce(::Type{Base.OneTo{T}}, r::AbstractUnitRange) where T<:Integer
+    o = first(r) - 1
+    return Base.OneTo{T}(last(r) - o), o
+end
+# function offset_coerce(::Type{Base.OneTo{T}}, r::IdOffsetRange) where T<:Integer
+#     rc, o = offset_coerce(Base.OneTo{T}, r.parent)
+
+# Fallback, specialze this method if `convert(I, r)` doesn't do what you need
+offset_coerce(::Type{I}, r::AbstractUnitRange) where I<:AbstractUnitRange{T} where T =
+    convert(I, r), 0
+
+@inline Base.parent(r::IdOffsetRange) = r.parent
+@inline Base.axes(r::IdOffsetRange) = (Base.axes1(r),)
+@inline Base.axes1(r::IdOffsetRange) = IdOffsetRange(Base.axes1(r.parent), r.offset)
+@inline Base.length(r::IdOffsetRange) = length(r.parent)
+Base.reduced_index(i::IdOffsetRange) = typeof(i)(first(i):first(i))
+# Workaround for #92 on Julia < 1.4
+Base.reduced_index(i::IdentityUnitRange{<:IdOffsetRange}) = typeof(i)(first(i):first(i))
+for f in [:firstindex, :lastindex]
+    @eval Base.$f(r::IdOffsetRange) = $f(r.parent) .+ r.offset
+end
+
+@inline function Base.iterate(r::IdOffsetRange)
+    ret = iterate(r.parent)
+    ret === nothing && return nothing
+    return (ret[1] + r.offset, ret[2])
+end
+@inline function Base.iterate(r::IdOffsetRange, i)
+    ret = iterate(r.parent, i)
+    ret === nothing && return nothing
+    return (ret[1] + r.offset, ret[2])
+end
+
+@inline Base.first(r::IdOffsetRange) = first(r.parent) + r.offset
+@inline Base.last(r::IdOffsetRange) = last(r.parent) + r.offset
+
+@propagate_inbounds Base.getindex(r::IdOffsetRange, i::Integer) = r.parent[i - r.offset] + r.offset
+@propagate_inbounds function Base.getindex(r::IdOffsetRange, s::AbstractUnitRange{<:Integer})
+    return r.parent[s .- r.offset] .+ r.offset
+end
+@propagate_inbounds function Base.getindex(r::IdOffsetRange, s::IdentityUnitRange)
+    return IdOffsetRange(r.parent[s .- r.offset], r.offset)
+end
+@propagate_inbounds function Base.getindex(r::IdOffsetRange, s::IdOffsetRange)
+    return IdOffsetRange(r.parent[s.parent .+ (s.offset - r.offset)] .+ (r.offset - s.offset), s.offset)
+end
+
+# offset-preserve broadcasting
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(-), r::IdOffsetRange{T}, x::Integer) where T =
+    IdOffsetRange{T}(r.parent .- x, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), r::IdOffsetRange{T}, x::Integer) where T =
+    IdOffsetRange{T}(r.parent .+ x, r.offset)
+Broadcast.broadcasted(::Base.Broadcast.DefaultArrayStyle{1}, ::typeof(+), x::Integer, r::IdOffsetRange{T}) where T =
+    IdOffsetRange{T}(x .+ r.parent, r.offset)
+
+Base.show(io::IO, r::IdOffsetRange) = print(io, "OffsetArrays.IdOffsetRange(", first(r), ':', last(r), ")")
+
+# Optimizations
+@inline Base.checkindex(::Type{Bool}, inds::IdOffsetRange, i::Real) = Base.checkindex(Bool, inds.parent, i - inds.offset)
+
+struct Origin{T <: Union{Tuple,Int}}
+    index::T
+end
+Origin(I::NTuple{N,Int}) where N = Origin{typeof(I)}(I)
+Origin(I::CartesianIndex) = Origin(I.I)
+Origin(I1::Int, In::Int...) = Origin((I1, In...))
+# Origin(0) != Origin((0, )) but they work the same with broadcasting
+Origin(n::Int) = Origin{Int}(n)
+
+(o::Origin)(A::AbstractArray) = o.index .- first.(axes(A))
+
+### Low-level utilities ###
+
+_indexoffset(r::AbstractRange) = first(r) - 1
+_indexoffset(i::Integer) = 0
+_indexoffset(i::Colon) = 0
+_indexlength(r::AbstractRange) = length(r)
+_indexlength(i::Integer) = i
+_indexlength(i::Colon) = Colon()
+
+_offset(axparent::AbstractUnitRange, ax::AbstractUnitRange) = first(ax) - first(axparent)
+_offset(axparent::AbstractUnitRange, ax::Integer) = 1 - first(axparent)
+
+abstract type AxisConversionStyle end
+struct SingleRange <: AxisConversionStyle end
+struct TupleOfRanges <: AxisConversionStyle end
+
+AxisConversionStyle(::Type) = SingleRange()
+AxisConversionStyle(::Type{<:CartesianIndices}) = TupleOfRanges()
+
+_convertTupleAbstractUnitRange(x) = _convertTupleAbstractUnitRange(AxisConversionStyle(typeof(x)), x)
+_convertTupleAbstractUnitRange(::SingleRange, x) = (convert(AbstractUnitRange{Int}, x),)
+_convertTupleAbstractUnitRange(::TupleOfRanges, x) = convert(Tuple{Vararg{AbstractUnitRange{Int}}}, x)
+
+_toAbstractUnitRanges(t::Tuple) = (_convertTupleAbstractUnitRange(first(t))..., _toAbstractUnitRanges(tail(t))...)
+_toAbstractUnitRanges(::Tuple{}) = ()
+
+# ensure that the indices are consistent in the constructor
+_checkindices(A::AbstractArray, indices, label) = _checkindices(ndims(A), indices, label)
+function _checkindices(N::Integer, indices, label)
+    throw_argumenterror(N, indices, label) = throw(ArgumentError(label * " $indices are not compatible with a $(N)D array"))
+    N == length(indices) || throw_argumenterror(N, indices, label)
+end
+
+
+# Technically we know the length of CartesianIndices but we need to convert it first, so here we
+# don't put it in OffsetAxisKnownLength.
+const OffsetAxisKnownLength = Union{Integer,AbstractUnitRange}
+const OffsetAxis = Union{OffsetAxisKnownLength,Colon}
+const ArrayInitializer = Union{UndefInitializer,Missing,Nothing}
+
+## OffsetArray
 struct OffsetArray{T,N,AA<:AbstractArray} <: AbstractArray{T,N}
     parent::AA
     offsets::NTuple{N,Int}
+    function OffsetArray{T,N,AA}(parent::AA, offsets::NTuple{N,Int}) where {T,N,AA <: AbstractArray}
+        @boundscheck overflow_check.(axes(parent), offsets)
+        new{T,N,AA}(parent, offsets)
+    end
+end
+
+const OffsetVector{T,AA <: AbstractArray} = OffsetArray{T,1,AA}
+
+const OffsetMatrix{T,AA <: AbstractArray} = OffsetArray{T,2,AA}
+
+function overflow_check(r, offset::T) where T
+    # This gives some performance boost https://github.com/JuliaLang/julia/issues/33273
+    throw_upper_overflow_error() = throw(ArgumentError("Boundary overflow detected: offset $offset should be equal or less than $(typemax(T) - last(r))"))
+    throw_lower_overflow_error() = throw(ArgumentError("Boundary overflow detected: offset $offset should be equal or greater than $(typemin(T) - first(r))"))
+
+    if offset > 0 && last(r) > typemax(T) - offset
+        throw_upper_overflow_error()
+    elseif offset < 0 && first(r) < typemin(T) - offset
+        throw_lower_overflow_error()
+    end
+end
+
+# Tuples of integers are treated as offsets
+# Empty Tuples are handled here
+function OffsetArray(A::AbstractArray, offsets::Tuple{Vararg{Integer}})
+    _checkindices(A, offsets, "offsets")
+    OffsetArray{eltype(A),ndims(A),typeof(A)}(A, offsets)
+end
+
+# These methods are necessary to disallow incompatible dimensions for
+# the OffsetVector and the OffsetMatrix constructors
+for (FT, ND) in ((:OffsetVector, :1), (:OffsetMatrix, :2))
+    @eval function $FT(A::AbstractArray{<:Any,$ND}, offsets::Tuple{Vararg{Integer}})
+        _checkindices(A, offsets, "offsets")
+        OffsetArray{eltype(A),$ND,typeof(A)}(A, offsets)
+    end
+    FTstr = string(FT)
+    @eval function $FT(A::AbstractArray, offsets::Tuple{Vararg{Integer}})
+        throw(ArgumentError($FTstr * " requires a " * string($ND) * "D array"))
+    end
+end
+
+## OffsetArray constructors
+for FT in (:OffsetArray, :OffsetVector, :OffsetMatrix)
+    # Nested OffsetArrays may strip off the wrapper and collate the offsets
+    @eval function $FT(A::OffsetArray, offsets::Tuple{Vararg{Integer}})
+        _checkindices(A, offsets, "offsets")
+        $FT(parent(A), map(+, A.offsets, offsets))
+    end
+
+    # In general, indices get converted to AbstractUnitRanges.
+    # CartesianIndices{N} get converted to N ranges
+    @eval function $FT(A::AbstractArray, inds::Tuple{Any,Vararg{Any}})
+        $FT(A, _toAbstractUnitRanges(to_indices(A, axes(A), inds)))
+    end
+
+    # convert ranges to offsets
+    @eval function $FT(A::AbstractArray, inds::Tuple{AbstractUnitRange,Vararg{AbstractUnitRange}})
+        _checkindices(A, inds, "indices")
+        # Performance gain by wrapping the error in a function: see https://github.com/JuliaLang/julia/issues/37558
+        throw_dimerr(lA, lI) = throw(DimensionMismatch("supplied axes do not agree with the size of the array (got size $lA for the array and $lI for the indices"))
+        lA = size(A)
+        lI = map(length, inds)
+        lA == lI || throw_dimerr(lA, lI)
+        $FT(A, map(_offset, axes(A), inds))
+    end
+
+    @eval $FT(A::AbstractArray, inds::Vararg) = $FT(A, inds)
+
+    @eval $FT(A::AbstractArray, origin::Origin) = $FT(A, origin(A))
 end
-OffsetVector{T,AA<:AbstractArray} = OffsetArray{T,1,AA}
 
-OffsetArray(A::AbstractArray{T,N}, offsets::NTuple{N,Int}) where {T,N} = OffsetArray{T,N,typeof(A)}(A, offsets)
-OffsetArray(A::AbstractArray{T,N}, offsets::Vararg{Int,N}) where {T,N} = OffsetArray(A, offsets)
+# array initialization
+function OffsetArray{T,N}(init::ArrayInitializer, inds::Tuple{Vararg{OffsetAxisKnownLength}}) where {T,N}
+    _checkindices(N, inds, "indices")
+    AA = Array{T,N}(init, map(_indexlength, inds))
+    OffsetArray{T,N,typeof(AA)}(AA, map(_indexoffset, inds))
+end
+function OffsetArray{T,N}(init::ArrayInitializer, inds::Tuple) where {T,N}
+    OffsetArray{T,N}(init, _toAbstractUnitRanges(inds))
+end
+OffsetArray{T,N}(init::ArrayInitializer, inds::Vararg) where {T,N} = OffsetArray{T,N}(init, inds)
 
-OffsetArray{T,N}(::UndefInitializer, inds::Indices{N}) where {T,N} =
-    OffsetArray{T,N,Array{T,N}}(Array{T,N}(undef, map(length, inds)), map(indsoffset, inds))
-OffsetArray{T}(::UndefInitializer, inds::Indices{N}) where {T,N} =
-    OffsetArray{T,N}(undef, inds)
+OffsetArray{T}(init::ArrayInitializer, inds::NTuple{N,OffsetAxisKnownLength}) where {T,N} = OffsetArray{T,N}(init, inds)
+function OffsetArray{T}(init::ArrayInitializer, inds::Tuple) where {T}
+    OffsetArray{T}(init, _toAbstractUnitRanges(inds))
+end
+OffsetArray{T}(init::ArrayInitializer, inds::Vararg) where {T} = OffsetArray{T}(init, inds)
 
-Base.IndexStyle(::Type{T}) where {T<:OffsetArray} = Base.IndexStyle(parenttype(T))
+Base.IndexStyle(::Type{OA}) where {OA <: OffsetArray} = IndexStyle(parenttype(OA))
 parenttype(::Type{OffsetArray{T,N,AA}}) where {T,N,AA} = AA
 parenttype(A::OffsetArray) = parenttype(typeof(A))
 
 Base.parent(A::OffsetArray) = A.parent
 
-Base.size(A::OffsetArray) = size(A.parent)
-Base.size(A::OffsetArray, d) = size(A.parent, d)
 Base.eachindex(::IndexCartesian, A::OffsetArray) = CartesianIndices(axes(A))
-Base.eachindex(::IndexLinear, A::OffsetVector) = axes(A, 1)
+Base.eachindex(::IndexLinear, A::OffsetVector)   = axes(A, 1)
+
+@inline Base.size(A::OffsetArray) = size(parent(A))
+@inline Base.size(A::OffsetArray, d) = size(parent(A), d)
 
-# Implementations of indices and axes1. Since bounds-checking is
-# performance-critical and relies on indices, these are usually worth
-# optimizing thoroughly.
-@inline Base.axes(A::OffsetArray, d) = 1 <= d <= length(A.offsets) ? Base.IdentityUnitRange(axes(parent(A))[d] .+ A.offsets[d]) : Base.IdentityUnitRange(1:1)
-@inline Base.axes(A::OffsetArray) = _indices(axes(parent(A)), A.offsets)  # would rather use ntuple, but see #15276
-@inline _indices(inds, offsets) = (Base.IdentityUnitRange(inds[1] .+ offsets[1]), _indices(tail(inds), tail(offsets))...)
-_indices(::Tuple{}, ::Tuple{}) = ()
-Base.axes1(A::OffsetArray{T,0}) where {T} = Base.IdentityUnitRange(1:1)  # we only need to specialize this one
+@inline Base.axes(A::OffsetArray) = map(IdOffsetRange, axes(parent(A)), A.offsets)
+@inline Base.axes(A::OffsetArray, d) = d <= ndims(A) ? IdOffsetRange(axes(parent(A), d), A.offsets[d]) : IdOffsetRange(axes(parent(A), d))
+@inline Base.axes1(A::OffsetArray{T,0}) where {T} = IdOffsetRange(axes(parent(A), 1))  # we only need to specialize this one
 
-const OffsetAxis = Union{Integer, UnitRange, Base.IdentityUnitRange{<:UnitRange}, Base.OneTo}
-function Base.similar(A::OffsetArray, T::Type, dims::Dims)
-    B = similar(parent(A), T, dims)
+Base.similar(A::OffsetArray, ::Type{T}, dims::Dims) where T =
+    similar(parent(A), T, dims)
+function Base.similar(A::AbstractArray, ::Type{T}, inds::Tuple{OffsetAxisKnownLength,Vararg{OffsetAxisKnownLength}}) where T
+    B = similar(A, T, map(_indexlength, inds))
+    return OffsetArray(B, map(_offset, axes(B), inds))
 end
-function Base.similar(A::AbstractArray, T::Type, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}})
-    B = similar(A, T, map(indslength, inds))
-    OffsetArray(B, map(indsoffset, inds))
+
+# reshape accepts a single colon
+Base.reshape(A::AbstractArray, inds::OffsetAxis...) = reshape(A, inds)
+function Base.reshape(A::AbstractArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}})
+    AR = reshape(A, map(_indexlength, inds))
+    return OffsetArray(AR, map(_offset, axes(AR), inds))
 end
 
-Base.similar(::Type{T}, shape::Tuple{OffsetAxis,Vararg{OffsetAxis}}) where {T<:AbstractArray} =
-    OffsetArray(T(undef, map(indslength, shape)), map(indsoffset, shape))
+# Reshaping OffsetArrays can "pop" the original OffsetArray wrapper and return
+# an OffsetArray(reshape(...)) instead of an OffsetArray(reshape(OffsetArray(...)))
+Base.reshape(A::OffsetArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) =
+    OffsetArray(reshape(parent(A), map(_indexlength, inds)), map(_indexoffset, inds))
+# And for non-offset axes, we can just return a reshape of the parent directly
+Base.reshape(A::OffsetArray, inds::Tuple{Union{Integer,Base.OneTo},Vararg{Union{Integer,Base.OneTo}}}) = reshape(parent(A), inds)
+Base.reshape(A::OffsetArray, inds::Dims) = reshape(parent(A), inds)
+Base.reshape(A::OffsetArray, ::Colon) = reshape(parent(A), Colon())
+Base.reshape(A::OffsetVector, ::Colon) = A
+Base.reshape(A::OffsetVector, ::Tuple{Colon}) = A
+Base.reshape(A::OffsetArray, inds::Union{Int,Colon}...) = reshape(parent(A), inds)
+Base.reshape(A::OffsetArray, inds::Tuple{Vararg{Union{Int,Colon}}}) = reshape(parent(A), inds)
 
-Base.reshape(A::AbstractArray, inds::Tuple{OffsetAxis,Vararg{OffsetAxis}}) = OffsetArray(reshape(A, map(indslength, inds)), map(indsoffset, inds))
+function Base.similar(::Type{T}, shape::Tuple{OffsetAxis,Vararg{OffsetAxis}}) where {T <: AbstractArray}
+    P = T(undef, map(_indexlength, shape))
+    OffsetArray(P, map(_offset, axes(P), shape))
+end
 
 Base.fill(v, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(OffsetArray(Array{typeof(v), N}(undef, map(indslength, inds)), map(indsoffset, inds)), v)
+    fill!(similar(Array{typeof(v)}, inds), v)
 Base.zeros(::Type{T}, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {T, N} =
-    fill!(OffsetArray(Array{T, N}(undef, map(indslength, inds)), map(indsoffset, inds)), zero(T))
+    fill!(similar(Array{T}, inds), zero(T))
 Base.ones(::Type{T}, inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {T, N} =
-    fill!(OffsetArray(Array{T, N}(undef, map(indslength, inds)), map(indsoffset, inds)), one(T))
+    fill!(similar(Array{T}, inds), one(T))
 Base.trues(inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(OffsetArray(BitArray{N}(undef, map(indslength, inds)), map(indsoffset, inds)), true)
+    fill!(similar(BitArray, inds), true)
 Base.falses(inds::NTuple{N, Union{Integer, AbstractUnitRange}}) where {N} =
-    fill!(OffsetArray(BitArray{N}(undef, map(indslength, inds)), map(indsoffset, inds)), false)
+    fill!(similar(BitArray, inds), false)
+
+## Indexing
+
+# Note this gets the index of the parent *array*, not the index of the parent *range*
+# Here's how one can think about this:
+#   Δi = i - first(r)
+#   i′ = first(r.parent) + Δi
+# and one obtains the result below.
+parentindex(r::IdOffsetRange, i) = i - r.offset
 
 @inline function Base.getindex(A::OffsetArray{T,N}, I::Vararg{Int,N}) where {T,N}
-    checkbounds(A, I...)
-    @inbounds ret = parent(A)[offset(A.offsets, I)...]
-    ret
+    @boundscheck checkbounds(A, I...)
+    J = map(parentindex, axes(A), I)
+    @inbounds parent(A)[J...]
 end
-# Vectors don't support one-based linear indexing; they always use the offsets
+
 @inline function Base.getindex(A::OffsetVector, i::Int)
-    checkbounds(A, i)
-    @inbounds ret = parent(A)[offset(A.offsets, (i,))[1]]
-    ret
-end
-# But multidimensional arrays allow one-based linear indexing
-@inline function Base.getindex(A::OffsetArray, i::Int)
-    checkbounds(A, i)
-    @inbounds ret = parent(A)[i]
-    ret
+    @boundscheck checkbounds(A, i)
+    @inbounds parent(A)[parentindex(Base.axes1(A), i)]
 end
+@propagate_inbounds Base.getindex(A::OffsetArray, i::Int)  = parent(A)[i]
+
 @inline function Base.setindex!(A::OffsetArray{T,N}, val, I::Vararg{Int,N}) where {T,N}
-    checkbounds(A, I...)
-    @inbounds parent(A)[offset(A.offsets, I)...] = val
-    val
+    @boundscheck checkbounds(A, I...)
+    J = @inbounds map(parentindex, axes(A), I)
+    @inbounds parent(A)[J...] = val
+    A
 end
+
 @inline function Base.setindex!(A::OffsetVector, val, i::Int)
-    checkbounds(A, i)
-    @inbounds parent(A)[offset(A.offsets, (i,))[1]] = val
-    val
+    @boundscheck checkbounds(A, i)
+    @inbounds parent(A)[parentindex(Base.axes1(A), i)] = val
+    A
 end
-@inline function Base.setindex!(A::OffsetArray, val, i::Int)
-    checkbounds(A, i)
-    @inbounds parent(A)[i] = val
-    val
+@propagate_inbounds function Base.setindex!(A::OffsetArray, val, i::Int)
+    parent(A)[i] = val
+    A
 end
 
-@inline function Base.deleteat!(A::OffsetArray, i::Int)
-    checkbounds(A, i)
-    @inbounds deleteat!(parent(A), offset(A.offsets, (i,))[1])
-end
+# For fast broadcasting: ref https://discourse.julialang.org/t/why-is-there-a-performance-hit-on-broadcasting-with-offsetarrays/32194
+Base.dataids(A::OffsetArray) = Base.dataids(parent(A))
+Broadcast.broadcast_unalias(dest::OffsetArray, src::OffsetArray) = parent(dest) === parent(src) ? src : Broadcast.unalias(dest, src)
 
-@inline function Base.deleteat!(A::OffsetArray{T,N}, I::Vararg{Int, N}) where {T,N}
-    checkbounds(A, I...)
-    @inbounds deleteat!(parent(A), offset(A.offsets, I)...)
-end
+### Special handling for AbstractRange
 
-@inline function Base.deleteat!(A::OffsetArray, i::UnitRange{Int})
-    checkbounds(A, first(i))
-    checkbounds(A, last(i))
-    first_idx = offset(A.offsets, (first(i),))[1]
-    last_idx = offset(A.offsets, (last(i),))[1]
-    @inbounds deleteat!(parent(A), first_idx:last_idx)
-end
+const OffsetRange{T} = OffsetArray{T,1,<:AbstractRange{T}}
+const IIUR = IdentityUnitRange{S} where S<:AbstractUnitRange{T} where T<:Integer
 
-function Base.push!(a::OffsetArray{T,1}, item) where T
-    # convert first so we don't grow the array if the assignment won't work
-    itemT = convert(T, item)
-    resize!(a, length(a)+1)
-    a[end] = itemT
-    return a
+Base.step(a::OffsetRange) = step(parent(a))
+
+@propagate_inbounds Base.getindex(a::OffsetRange, r::OffsetRange) = OffsetArray(a[parent(r)], r.offsets)
+@propagate_inbounds function Base.getindex(a::OffsetRange, r::IdOffsetRange)
+    OffsetArray(a.parent[r.parent .+ (r.offset - a.offsets[1])], r.offset)
 end
+@propagate_inbounds Base.getindex(r::OffsetRange, s::IIUR) =
+    OffsetArray(r[s.indices], s)
+@propagate_inbounds Base.getindex(a::OffsetRange, r::AbstractRange) = a.parent[r .- a.offsets[1]]
+@propagate_inbounds Base.getindex(a::AbstractRange, r::OffsetRange) = OffsetArray(a[parent(r)], r.offsets)
+
+@propagate_inbounds Base.getindex(r::UnitRange, s::IIUR) =
+    OffsetArray(r[s.indices], s)
+
+@propagate_inbounds Base.getindex(r::StepRange, s::IIUR) =
+    OffsetArray(r[s.indices], s)
 
-# Computing a shifted index (subtracting the offset)
-offset(offsets::NTuple{N,Int}, inds::NTuple{N,Int}) where {N} = _offset((), offsets, inds)
-_offset(out, ::Tuple{}, ::Tuple{}) = out
-@inline _offset(out, offsets, inds) = _offset((out..., inds[1]-offsets[1]), Base.tail(offsets), Base.tail(inds))
+# this method is needed for ambiguity resolution
+@propagate_inbounds Base.getindex(r::StepRangeLen{T,<:Base.TwicePrecision,<:Base.TwicePrecision}, s::IIUR) where T =
+    OffsetArray(r[s.indices], s)
 
-indsoffset(r::AbstractRange) = first(r) - 1
-indsoffset(i::Integer) = 0
-indslength(r::AbstractRange) = length(r)
-indslength(i::Integer) = i
+@propagate_inbounds Base.getindex(r::StepRangeLen{T}, s::IIUR) where {T} =
+    OffsetArray(r[s.indices], s)
 
+@propagate_inbounds Base.getindex(r::LinRange, s::IIUR) =
+    OffsetArray(r[s.indices], s)
+
+function Base.show(io::IO, r::OffsetRange)
+    show(io, r.parent)
+    o = r.offsets[1]
+    print(io, " with indices ", o+1:o+length(r))
+end
+Base.show(io::IO, ::MIME"text/plain", r::OffsetRange) = show(io, r)
+
+### Some mutating functions defined only for OffsetVector ###
 
 Base.resize!(A::OffsetVector, nl::Integer) = (resize!(A.parent, nl); A)
+Base.push!(A::OffsetVector, x...) = (push!(A.parent, x...); A)
+Base.pop!(A::OffsetVector) = pop!(A.parent)
+Base.append!(A::OffsetVector, items) = (append!(A.parent, items); A)
+Base.empty!(A::OffsetVector) = (empty!(A.parent); A)
+
+# These functions keep the summary compact
+function Base.inds2string(inds::Tuple{Vararg{Union{IdOffsetRange,IdentityUnitRange{<:IdOffsetRange}}}})
+    Base.inds2string(map(UnitRange, inds))
+end
+Base.showindices(io::IO, ind1::IdOffsetRange, inds::IdOffsetRange...) = Base.showindices(io, map(UnitRange, (ind1, inds...))...)
+
+function Base.showarg(io::IO, a::OffsetArray, toplevel)
+    print(io, "OffsetArray(")
+    Base.showarg(io, parent(a), false)
+    Base.showindices(io, axes(a)...)
+    print(io, ')')
+    toplevel && print(io, " with eltype ", eltype(a))
+end
 
+function Base.replace_in_print_matrix(A::OffsetArray{<:Any,2}, i::Integer, j::Integer, s::AbstractString)
+    J = map(parentindex, axes(A), (i,j))
+    Base.replace_in_print_matrix(parent(A), J..., s)
 end
+function Base.replace_in_print_matrix(A::OffsetArray{<:Any,1}, i::Integer, j::Integer, s::AbstractString)
+    ip = parentindex(axes(A,1), i)
+    Base.replace_in_print_matrix(parent(A), ip, j, s)
+end
+
+function no_offset_view(A::AbstractArray)
+    if Base.has_offset_axes(A)
+        OffsetArray(A, map(r->1-first(r), axes(A)))
+    else
+        A
+    end
+end
+
+no_offset_view(A::OffsetArray) = no_offset_view(parent(A))
+
+end # module
diff --git a/test/testhelpers/Quaternions.jl b/test/testhelpers/Quaternions.jl
index a3967c1aacc43c..1eddad322ec408 100644
--- a/test/testhelpers/Quaternions.jl
+++ b/test/testhelpers/Quaternions.jl
@@ -1,6 +1,7 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
 module Quaternions
+using Random
 
 export Quaternion
 
@@ -36,4 +37,17 @@ Base.:(*)(q::Quaternion, b::Bool) = b * q # remove method ambiguity
 Base.:(/)(q::Quaternion, w::Quaternion) = q * conj(w) * (1.0 / abs2(w))
 Base.:(\)(q::Quaternion, w::Quaternion) = conj(q) * w * (1.0 / abs2(q))
 
+# adapted from https://github.com/JuliaGeometry/Quaternions.jl/pull/42
+function Base.rand(rng::AbstractRNG, ::Random.SamplerType{Quaternion{T}}) where {T<:Real}
+    return Quaternion{T}(rand(rng, T), rand(rng, T), rand(rng, T), rand(rng, T))
+end
+function Base.randn(rng::AbstractRNG, ::Type{Quaternion{T}}) where {T<:AbstractFloat}
+    return Quaternion{T}(
+        randn(rng, T) / 2,
+        randn(rng, T) / 2,
+        randn(rng, T) / 2,
+        randn(rng, T) / 2,
+    )
+end
+
 end
diff --git a/test/testhelpers/SizedArrays.jl b/test/testhelpers/SizedArrays.jl
new file mode 100644
index 00000000000000..dfcc5b79f13875
--- /dev/null
+++ b/test/testhelpers/SizedArrays.jl
@@ -0,0 +1,40 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+# SizedArrays
+
+# This test file defines an array wrapper with statical size. It can be used to
+# test the action of LinearAlgebra with non-number eltype.
+
+module SizedArrays
+
+import Base: +, *, ==
+
+export SizedArray
+
+struct SizedArray{SZ,T,N,A<:AbstractArray} <: AbstractArray{T,N}
+    data::A
+    function SizedArray{SZ}(data::AbstractArray{T,N}) where {SZ,T,N}
+        SZ == size(data) || throw(ArgumentError("size mismatch!"))
+        new{SZ,T,N,typeof(data)}(data)
+    end
+    function SizedArray{SZ,T,N,A}(data::AbstractArray{T,N}) where {SZ,T,N,A}
+        SZ == size(data) || throw(ArgumentError("size mismatch!"))
+        new{SZ,T,N,A}(A(data))
+    end
+end
+Base.convert(::Type{SizedArray{SZ,T,N,A}}, data::AbstractArray) where {SZ,T,N,A} = SizedArray{SZ,T,N,A}(data)
+
+# Minimal AbstractArray interface
+Base.size(a::SizedArray) = size(typeof(a))
+Base.size(::Type{<:SizedArray{SZ}}) where {SZ} = SZ
+Base.getindex(A::SizedArray, i...) = getindex(A.data, i...)
+Base.zero(::Type{T}) where T <: SizedArray = SizedArray{size(T)}(zeros(eltype(T), size(T)))
++(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = SizedArray{SZ}(S1.data + S2.data)
+==(S1::SizedArray{SZ}, S2::SizedArray{SZ}) where {SZ} = S1.data == S2.data
+function *(S1::SizedArray, S2::SizedArray)
+    0 < ndims(S1) < 3 && 0 < ndims(S2) < 3 && size(S1, 2) == size(S2, 1) || throw(ArgumentError("size mismatch!"))
+    data = S1.data * S2.data
+    SZ = ndims(data) == 1 ? (size(S1, 1), ) : (size(S1, 1), size(S2, 2))
+    SizedArray{SZ}(data)
+end
+end
diff --git a/test/testhelpers/arrayindexingtypes.jl b/test/testhelpers/arrayindexingtypes.jl
index b26ffc9ec5f24b..0e956b5216c946 100644
--- a/test/testhelpers/arrayindexingtypes.jl
+++ b/test/testhelpers/arrayindexingtypes.jl
@@ -1,3 +1,5 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 ## Tests for the abstract array interfaces and operations with arrays
 ## with different indexing rules
 
@@ -52,3 +54,15 @@ Base.similar(A::TSlow, ::Type{T}, dims::Dims) where {T} = TSlow(T, dims)
 Base.IndexStyle(::Type{A}) where {A<:TSlow} = IndexCartesian()
 Base.getindex(A::TSlow{T,N}, i::Vararg{Int,N}) where {T,N} = get(A.data, i, zero(T))
 Base.setindex!(A::TSlow{T,N}, v, i::Vararg{Int,N}) where {T,N} = (A.data[i] = v)
+
+# An array type that just passes through to the parent
+struct WrapperArray{T,N,A<:AbstractArray{T,N}} <: AbstractArray{T,N}
+    parent::A
+end
+Base.IndexStyle(::Type{WrapperArray{T,N,A}}) where {T,N,A<:AbstractArray{T,N}} = IndexStyle(A)
+Base.parent(A::WrapperArray) = A.parent
+Base.size(A::WrapperArray) = size(A.parent)
+Base.axes(A::WrapperArray) = axes(A.parent)
+Base.getindex(A::WrapperArray, i::Int...) = A.parent[i...]
+Base.setindex!(A::WrapperArray, v, i::Int...) = A.parent[i...] = v
+Base.similar(A::WrapperArray, ::Type{T}, dims::Dims) where T = similar(A.parent, T, dims)
diff --git a/test/testhelpers/coverage_file.info b/test/testhelpers/coverage_file.info
index 9b4b1c1f2f96e7..c83e75dee80603 100644
--- a/test/testhelpers/coverage_file.info
+++ b/test/testhelpers/coverage_file.info
@@ -4,14 +4,15 @@ DA:4,1
 DA:5,0
 DA:7,1
 DA:8,1
-DA:9,5
+DA:9,3
+DA:10,5
 DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
-DA:19,1
+DA:19,2
 DA:20,1
 DA:22,1
-LH:10
-LF:13
+LH:12
+LF:14
 end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad b/test/testhelpers/coverage_file.info.bad
index 44e33a9df68c79..311f6379381eea 100644
--- a/test/testhelpers/coverage_file.info.bad
+++ b/test/testhelpers/coverage_file.info.bad
@@ -4,16 +4,17 @@ DA:4,1
 DA:5,0
 DA:7,1
 DA:8,1
-DA:9,5
+DA:9,3
+DA:10,5
 DA:11,1
 DA:12,1
 DA:14,0
 DA:17,1
 DA:18,0
-DA:19,1
+DA:19,2
 DA:20,1
 DA:22,1
 DA:1234,0
-LH:11
-LF:15
+LH:12
+LF:16
 end_of_record
diff --git a/test/testhelpers/coverage_file.info.bad2 b/test/testhelpers/coverage_file.info.bad2
new file mode 100644
index 00000000000000..a766597be4c175
--- /dev/null
+++ b/test/testhelpers/coverage_file.info.bad2
@@ -0,0 +1,20 @@
+SF:<FILENAME>
+DA:3,1
+DA:4,1
+DA:5,0
+DA:7,1
+DA:8,1
+DA:9,3
+DA:10,5
+DA:11,0
+DA:12,1
+DA:14,0
+DA:17,1
+DA:18,0
+DA:19,0
+DA:20,0
+DA:22,1
+DA:1234,0
+LH:9
+LF:16
+end_of_record
diff --git a/test/testhelpers/include_error.jl b/test/testhelpers/include_error.jl
index 1f5757597187aa..75a384338b66e5 100644
--- a/test/testhelpers/include_error.jl
+++ b/test/testhelpers/include_error.jl
@@ -1 +1,3 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
 error("Expected exception while running include")
diff --git a/test/testhelpers/llvmpasses.jl b/test/testhelpers/llvmpasses.jl
index 0b443c3eb1535e..9900dd15b5d401 100644
--- a/test/testhelpers/llvmpasses.jl
+++ b/test/testhelpers/llvmpasses.jl
@@ -24,4 +24,3 @@ function emit(f, tt...)
     end
     counter+=1
 end
-
diff --git a/test/testhelpers/withlocales.jl b/test/testhelpers/withlocales.jl
new file mode 100644
index 00000000000000..a3be17cce44642
--- /dev/null
+++ b/test/testhelpers/withlocales.jl
@@ -0,0 +1,28 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+function withlocales(f, newlocales)
+    # save current locales
+    locales = Dict{Int,String}()
+    for cat in 0:9999
+        cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, C_NULL)
+        if cstr != C_NULL
+            locales[cat] = unsafe_string(cstr)
+        end
+    end
+    timestrs = String[]
+    try
+        # change to each of given locales
+        for lc in newlocales
+            set = true
+            for (cat, _) in locales
+                set &= ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc) != C_NULL
+            end
+            set && f()
+        end
+    finally
+        # recover locales
+        for (cat, lc) in locales
+            cstr = ccall(:setlocale, Cstring, (Cint, Cstring), cat, lc)
+        end
+    end
+end
diff --git a/test/threadpool_latency.jl b/test/threadpool_latency.jl
new file mode 100644
index 00000000000000..bdf02b81da03fc
--- /dev/null
+++ b/test/threadpool_latency.jl
@@ -0,0 +1,50 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+using Base.Threads
+
+# This test has not been added to CI as there can be unexpected delays
+# which cause timing-dependent actions to fail.
+
+#=
+Test to ensure that the interactive threadpool works as designed.
+
+Task A is a standard task that does a lot of work (~2 seconds) without
+yielding. This would prevent ordinarily prevent other tasks from running.
+
+Task B is an interactive task that does a little work (~0.02 seconds) and
+yields.
+
+With an interactive threadpool, multiple task Bs should not see notable
+delays in execution even when multiple task As are occupying Julia's
+default threads.
+
+This test should fail in the absence of an interactive thread.
+=#
+const N = 263000000 # busywork(N) takes ~1 sec on an i7-9750H @ 2.6GHz
+function busywork(n::Int)
+    acc = 0
+    for i = 1:n
+        x = rand(2:10)
+        acc += i * x
+    end
+    return acc
+end
+
+function itask()
+    h = N ÷ 50
+    for i = 1:100
+        t1 = time()
+        busywork(h)
+        yield()
+        t2 = time()
+        @test t2 - t1 < 0.15
+    end
+end
+
+it1 = @spawn :interactive itask()
+ti1 = @spawn busywork(N * 2);
+it2 = @spawn :interactive itask()
+ti2 = @spawn busywork(N * 2);
+wait(it1)
+wait(it2)
diff --git a/test/threadpool_use.jl b/test/threadpool_use.jl
new file mode 100644
index 00000000000000..92a4458ee80760
--- /dev/null
+++ b/test/threadpool_use.jl
@@ -0,0 +1,16 @@
+# This file is a part of Julia. License is MIT: https://julialang.org/license
+
+using Test
+using Base.Threads
+
+@test nthreadpools() == 2
+@test threadpool() == :default
+@test threadpool(2) == :interactive
+dtask() = @test threadpool(current_task()) == :default
+itask() = @test threadpool(current_task()) == :interactive
+dt1 = @spawn dtask()
+dt2 = @spawn :default dtask()
+it = @spawn :interactive itask()
+wait(dt1)
+wait(dt2)
+wait(it)
diff --git a/test/threads.jl b/test/threads.jl
index 0a23e2ee6f950e..09e802757062b4 100644
--- a/test/threads.jl
+++ b/test/threads.jl
@@ -1,16 +1,155 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
-let cmd = `$(Base.julia_cmd()) --depwarn=error --startup-file=no threads_exec.jl`
+using Test
+
+using Base.Threads
+
+include("print_process_affinity.jl") # import `uv_thread_getaffinity`
+
+# simple sanity tests for locks under cooperative concurrent access
+let lk = ReentrantLock()
+    c1 = Event()
+    c2 = Event()
+    @test trylock(lk)
+    @test trylock(lk)
+    t1 = @async (notify(c1); lock(lk); unlock(lk); trylock(lk))
+    t2 = @async (notify(c2); trylock(lk))
+    wait(c1)
+    wait(c2)
+    @test t1.queue === lk.cond_wait.waitq
+    @test t2.queue !== lk.cond_wait.waitq
+    @test istaskdone(t2)
+    @test !fetch(t2)
+    unlock(lk)
+    @test t1.queue === lk.cond_wait.waitq
+    unlock(lk)
+    @test t1.queue !== lk.cond_wait.waitq
+    @test fetch(t1)
+end
+
+let e = Event(), started1 = Event(false), started2 = Event(false)
+    for i = 1:3
+        done1 = false
+        done2 = false
+        t1 = @async (notify(started1); wait(e); done1 = true)
+        t2 = @async (notify(started2); wait(e); done2 = true)
+        wait(started1)
+        wait(started2)
+        sleep(0.1)
+        @test !done1 && !done2
+        notify(e)
+        wait(t1)
+        @test done1
+        wait(t2)
+        @test done2
+        wait(e)
+        notify(e)
+        reset(e)
+    end
+end
+
+let e = Event(true), started1 = Event(true), started2 = Event(true), done = Event(true)
+    for i = 1:3
+        done1 = false
+        done2 = false
+        t1 = @async (notify(started1); wait(e); done1 = true; notify(done))
+        t2 = @async (notify(started2); wait(e); done2 = true; notify(done))
+        wait(started1)
+        wait(started2)
+        sleep(0.1)
+        @test !done1 && !done2
+        notify(e)
+        wait(done)
+        @test done1 ⊻ done2
+        done1 ? wait(t1) : wait(t2)
+        notify(e)
+        wait(t1)
+        @test done1
+        wait(t2)
+        @test done2
+        wait(done)
+    end
+end
+
+let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no threads_exec.jl`
     for test_nthreads in (1, 2, 4, 4) # run once to try single-threaded mode, then try a couple times to trigger bad races
-        run(pipeline(setenv(cmd, "JULIA_NUM_THREADS" => test_nthreads), stdout = stdout, stderr = stderr))
+        new_env = copy(ENV)
+        new_env["JULIA_NUM_THREADS"] = string(test_nthreads)
+        run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
+    end
+end
+
+# Timing-sensitive tests can fail on CI due to occasional unexpected delays,
+# so this test is disabled.
+#=
+let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no threadpool_latency.jl`
+    for test_nthreads in (1, 2)
+        new_env = copy(ENV)
+        new_env["JULIA_NUM_THREADS"] = string(test_nthreads, ",1")
+        run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
     end
 end
+=#
+let cmd = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no threadpool_use.jl`
+    new_env = copy(ENV)
+    new_env["JULIA_NUM_THREADS"] = "1,1"
+    run(pipeline(setenv(cmd, new_env), stdout = stdout, stderr = stderr))
+end
+
+function run_with_affinity(cpus)
+    script = joinpath(@__DIR__, "print_process_affinity.jl")
+    return readchomp(setcpuaffinity(`$(Base.julia_cmd()) $script`, cpus))
+end
 
 # issue #34415 - make sure external affinity settings work
-if Sys.islinux() && Sys.CPU_THREADS > 1 && Sys.which("taskset") !== nothing
-    run_with_affinity(spec) = readchomp(`taskset -c $spec $(Base.julia_cmd()) -e "run(\`taskset -p \$(getpid())\`)"`)
-    @test endswith(run_with_affinity("1"), "2")
-    @test endswith(run_with_affinity("0,1"), "3")
+if Sys.islinux()
+    const SYS_rrcall_check_presence = 1008
+    global running_under_rr() = 0 == ccall(:syscall, Int,
+        (Int, Int, Int, Int, Int, Int, Int),
+        SYS_rrcall_check_presence, 0, 0, 0, 0, 0, 0)
+else
+    global running_under_rr() = false
+end
+# Note also that libuv does not support affinity in macOS and it is known to
+# hang in FreeBSD. So, it's tested only in Linux and Windows:
+const AFFINITY_SUPPORTED = (Sys.islinux() || Sys.iswindows()) && !running_under_rr()
+
+if AFFINITY_SUPPORTED
+    allowed_cpus = findall(uv_thread_getaffinity())
+    if length(allowed_cpus) ≥ 2
+        @test run_with_affinity(allowed_cpus[1:1]) == "$(allowed_cpus[1])"
+        @test run_with_affinity(allowed_cpus[1:2]) == "$(allowed_cpus[1]),$(allowed_cpus[2])"
+    end
+end
+
+function get_nthreads(options = ``; cpus = nothing)
+    cmd = `$(Base.julia_cmd()) --startup-file=no $(options)`
+    cmd = `$cmd -e "print(Threads.nthreads())"`
+    cmd = addenv(cmd, "JULIA_EXCLUSIVE" => "0", "JULIA_NUM_THREADS" => "auto")
+    if cpus !== nothing
+        cmd = setcpuaffinity(cmd, cpus)
+    end
+    return parse(Int, read(cmd, String))
+end
+
+@testset "nthreads determined based on CPU affinity" begin
+    if AFFINITY_SUPPORTED
+        allowed_cpus = findall(uv_thread_getaffinity())
+        if length(allowed_cpus) ≥ 2
+            @test get_nthreads() ≥ 2
+            @test get_nthreads(cpus = allowed_cpus[1:1]) == 1
+            @test get_nthreads(cpus = allowed_cpus[2:2]) == 1
+            @test get_nthreads(cpus = allowed_cpus[1:2]) == 2
+            @test get_nthreads(`-t1`, cpus = allowed_cpus[1:1]) == 1
+            @test get_nthreads(`-t1`, cpus = allowed_cpus[2:2]) == 1
+            @test get_nthreads(`-t1`, cpus = allowed_cpus[1:2]) == 1
+
+            if length(allowed_cpus) ≥ 3
+                @test get_nthreads(cpus = allowed_cpus[1:2:3]) == 2
+                @test get_nthreads(cpus = allowed_cpus[2:3])   == 2
+            end
+        end
+    end
 end
 
 # issue #34769
@@ -89,6 +228,10 @@ function Base.uvfinalize(t::UvTestIdle)
     nothing
 end
 
+function Base.close(idle::UvTestIdle)
+    Base.uvfinalize(idle)
+end
+
 function Base.wait(idle::UvTestIdle)
     Base.iolock_begin()
     Base.preserve_handle(idle)
@@ -118,12 +261,13 @@ proc = open(pipeline(`$(Base.julia_cmd()) -e $cmd`; stderr=stderr); write=true)
 
 let idle=UvTestIdle()
     wait(idle)
+    close(idle)
 end
 
-using Base.Threads
 @threads for i = 1:1
     let idle=UvTestIdle()
         wait(idle)
+        close(idle)
     end
 end
 
@@ -131,3 +275,55 @@ end
 
 # We don't need the watchdog anymore
 close(proc.in)
+
+# https://github.com/JuliaLang/julia/pull/42973
+@testset "spawn and wait *a lot* of tasks in @profile" begin
+    # Not using threads_exec.jl for better isolation, reproducibility, and a
+    # tighter timeout.
+    script = "profile_spawnmany_exec.jl"
+    cmd_base = `$(Base.julia_cmd()) --depwarn=error --rr-detach --startup-file=no $script`
+    @testset for n in [20000, 200000, 2000000]
+        cmd = ignorestatus(setenv(cmd_base, "NTASKS" => n; dir = @__DIR__))
+        cmd = pipeline(cmd; stdout = stderr, stderr)
+        proc = run(cmd; wait = false)
+        done = Threads.Atomic{Bool}(false)
+        timeout = false
+        timer = Timer(100) do _
+            timeout = true
+            for sig in [Base.SIGTERM, Base.SIGHUP, Base.SIGKILL]
+                for _ in 1:1000
+                    kill(proc, sig)
+                    if done[]
+                        if sig != Base.SIGTERM
+                            @warn "Terminating `$script` required signal $sig"
+                        end
+                        return
+                    end
+                    sleep(0.001)
+                end
+            end
+        end
+        try
+            wait(proc)
+        finally
+            done[] = true
+            close(timer)
+        end
+        if ( !success(proc) ) || ( timeout )
+            @error "A \"spawn and wait lots of tasks\" test failed" n proc.exitcode proc.termsignal success(proc) timeout
+        end
+        if Sys.iswindows()
+            # Known failure: https://github.com/JuliaLang/julia/issues/43124
+            @test_skip success(proc)
+        else
+            @test success(proc)
+            @test !timeout
+        end
+    end
+end
+
+@testset "bad arguments to @threads" begin
+    @test_throws ArgumentError @macroexpand(@threads 1 2) # wrong number of args
+    @test_throws ArgumentError @macroexpand(@threads 1) # arg isn't an Expr
+    @test_throws ArgumentError @macroexpand(@threads if true 1 end) # arg doesn't start with for
+end
diff --git a/test/threads_exec.jl b/test/threads_exec.jl
index 691fca2fb2afa3..9cd5992d90a74a 100644
--- a/test/threads_exec.jl
+++ b/test/threads_exec.jl
@@ -16,7 +16,7 @@ function killjob(d)
     end
     if @isdefined(SIGINFO)
         ccall(:uv_kill, Cint, (Cint, Cint), getpid(), SIGINFO)
-        sleep(1)
+        sleep(5) # Allow time for profile to collect and print before killing
     end
     ccall(:uv_kill, Cint, (Cint, Cint), getpid(), Base.SIGTERM)
     nothing
@@ -27,6 +27,27 @@ end
 # (expected test duration is about 18-180 seconds)
 Timer(t -> killjob("KILLING BY THREAD TEST WATCHDOG\n"), 1200)
 
+# basic lock check
+if nthreads() > 1
+    let lk = Base.Threads.SpinLock()
+        c1 = Base.Event()
+        c2 = Base.Event()
+        @test trylock(lk)
+        @test !trylock(lk)
+        t1 = Threads.@spawn (notify(c1); lock(lk); unlock(lk); trylock(lk))
+        t2 = Threads.@spawn (notify(c2); trylock(lk))
+        Libc.systemsleep(0.1) # block our thread from scheduling for a bit
+        wait(c1)
+        wait(c2)
+        @test !fetch(t2)
+        @test istaskdone(t2)
+        @test !istaskdone(t1)
+        unlock(lk)
+        @test fetch(t1)
+        @test istaskdone(t1)
+    end
+end
+
 # threading constructs
 
 let a = zeros(Int, 2 * nthreads())
@@ -49,7 +70,23 @@ end
 
 # parallel loop with parallel atomic addition
 function threaded_loop(a, r, x)
+    counter = Threads.Atomic{Int}(min(Threads.nthreads(), length(r)))
     @threads for i in r
+        # synchronize the start given that each partition is started sequentially,
+        # meaning that without the wait, if the loop is too fast the iteration can happen in order
+        if counter[] != 0
+            Threads.atomic_sub!(counter, 1)
+            spins = 0
+            while counter[] != 0
+                GC.safepoint()
+                ccall(:jl_cpu_pause, Cvoid, ())
+                spins += 1
+                if spins > 500_000_000  # about 10 seconds
+                    @warn "Failed wait for all workers. Unfinished rogue tasks occupying worker threads?"
+                    break
+                end
+            end
+        end
         j = i - firstindex(r) + 1
         a[j] = 1 + atomic_add!(x, 1)
     end
@@ -62,18 +99,13 @@ function test_threaded_loop_and_atomic_add()
         a = zeros(Int, n)
         threaded_loop(a,r,x)
         found = zeros(Bool,n)
-        was_inorder = true
         for i=1:length(a)
-            was_inorder &= a[i]==i
             found[a[i]] = true
         end
         @test x[] == n
         # Next test checks that all loop iterations ran,
         # and were unique (via pigeon-hole principle).
         @test !(false in found)
-        if was_inorder && nthreads() > 1
-            println(stderr, "Warning: threaded loop executed in order")
-        end
     end
 end
 
@@ -471,7 +503,6 @@ end
 
 function test_thread_cfunction()
     # ensure a runtime call to `get_trampoline` will be created
-    # TODO: get_trampoline is not thread-safe (as this test shows)
     fs = [ Core.Box() for i in 1:1000 ]
     @noinline cf(f) = @cfunction $f Float64 ()
     cfs = Vector{Base.CFunction}(undef, length(fs))
@@ -483,7 +514,7 @@ function test_thread_cfunction()
     @test cfs[2] == cf(fs[2])
     @test length(unique(cfs)) == 1000
     ok = zeros(Int, nthreads())
-    @threads for i in 1:10000
+    @threads :static for i in 1:10000
         i = mod1(i, 1000)
         fi = fs[i]
         cfi = cf(fi)
@@ -494,20 +525,7 @@ function test_thread_cfunction()
     @test sum(ok) == 10000
 end
 if cfunction_closure
-    if nthreads() == 1
-        test_thread_cfunction()
-    else
-        @test_broken "cfunction trampoline code not thread-safe"
-    end
-end
-
-# Compare the two ways of checking if threading is enabled.
-# `jl_tls_states` should only be defined on non-threading build.
-if ccall(:jl_threading_enabled, Cint, ()) == 0
-    @test nthreads() == 1
-    cglobal(:jl_tls_states) != C_NULL
-else
-    @test_throws ErrorException cglobal(:jl_tls_states)
+    test_thread_cfunction()
 end
 
 function test_thread_range()
@@ -570,27 +588,12 @@ function test_thread_too_few_iters()
 end
 test_thread_too_few_iters()
 
-let e = Event(), started = Event()
-    done = false
-    t = @async (notify(started); wait(e); done = true)
-    wait(started)
-    sleep(0.1)
-    @test done == false
-    notify(e)
-    wait(t)
-    @test done == true
-    blocked = true
-    wait(@async (wait(e); blocked = false))
-    @test !blocked
-end
-
-
-@testset "InvasiveLinkedList" begin
-    @test eltype(Base.InvasiveLinkedList{Integer}) == Integer
+@testset "IntrusiveLinkedList" begin
+    @test eltype(Base.IntrusiveLinkedList{Integer}) == Integer
     @test eltype(Base.LinkedList{Integer}) == Integer
-    @test eltype(Base.InvasiveLinkedList{<:Integer}) == Any
+    @test eltype(Base.IntrusiveLinkedList{<:Integer}) == Any
     @test eltype(Base.LinkedList{<:Integer}) == Any
-    @test eltype(Base.InvasiveLinkedList{<:Base.LinkedListItem{Integer}}) == Any
+    @test eltype(Base.IntrusiveLinkedList{<:Base.LinkedListItem{Integer}}) == Any
 
     t = Base.LinkedList{Integer}()
     @test eltype(t) == Integer
@@ -713,9 +716,9 @@ let ch = Channel{Char}(0), t
     @test String(collect(ch)) == "hello"
 end
 
-# errors inside @threads
+# errors inside @threads :static
 function _atthreads_with_error(a, err)
-    Threads.@threads for i in eachindex(a)
+    Threads.@threads :static for i in eachindex(a)
         if err
             error("failed")
         end
@@ -730,21 +733,73 @@ let a = zeros(nthreads())
 end
 
 # static schedule
-function _atthreads_static_schedule()
+function _atthreads_static_schedule(n)
+    ids = zeros(Int, n)
+    Threads.@threads :static for i = 1:n
+        ids[i] = Threads.threadid()
+    end
+    return ids
+end
+@test _atthreads_static_schedule(nthreads()) == 1:nthreads()
+@test _atthreads_static_schedule(1) == [1;]
+@test_throws(
+    "`@threads :static` cannot be used concurrently or nested",
+    @threads(for i = 1:1; _atthreads_static_schedule(nthreads()); end),
+)
+
+# dynamic schedule
+function _atthreads_dynamic_schedule(n)
+    inc = Threads.Atomic{Int}(0)
+    flags = zeros(Int, n)
+    Threads.@threads :dynamic for i = 1:n
+        Threads.atomic_add!(inc, 1)
+        flags[i] = 1
+    end
+    return inc[], flags
+end
+@test _atthreads_dynamic_schedule(nthreads()) == (nthreads(), ones(nthreads()))
+@test _atthreads_dynamic_schedule(1) == (1, ones(1))
+@test _atthreads_dynamic_schedule(10) == (10, ones(10))
+@test _atthreads_dynamic_schedule(nthreads() * 2) == (nthreads() * 2, ones(nthreads() * 2))
+
+# nested dynamic schedule
+function _atthreads_dynamic_dynamic_schedule()
+    inc = Threads.Atomic{Int}(0)
+    Threads.@threads :dynamic for _ = 1:nthreads()
+        Threads.@threads :dynamic for _ = 1:nthreads()
+            Threads.atomic_add!(inc, 1)
+        end
+    end
+    return inc[]
+end
+@test _atthreads_dynamic_dynamic_schedule() == nthreads() * nthreads()
+
+function _atthreads_static_dynamic_schedule()
     ids = zeros(Int, nthreads())
+    inc = Threads.Atomic{Int}(0)
     Threads.@threads :static for i = 1:nthreads()
         ids[i] = Threads.threadid()
+        Threads.@threads :dynamic for _ = 1:nthreads()
+            Threads.atomic_add!(inc, 1)
+        end
     end
-    return ids
+    return ids, inc[]
 end
-@test _atthreads_static_schedule() == [1:nthreads();]
-@test_throws TaskFailedException @threads for i = 1:1; _atthreads_static_schedule(); end
+@test _atthreads_static_dynamic_schedule() == (1:nthreads(), nthreads() * nthreads())
+
+# errors inside @threads :dynamic
+function _atthreads_dynamic_with_error(a)
+    Threads.@threads :dynamic for i in eachindex(a)
+        error("user error in the loop body")
+    end
+    a
+end
+@test_throws "user error in the loop body" _atthreads_dynamic_with_error(zeros(nthreads()))
 
 try
     @macroexpand @threads(for i = 1:10, j = 1:10; end)
 catch ex
-    @test ex isa LoadError
-    @test ex.error isa ArgumentError
+    @test ex isa ArgumentError
 end
 
 @testset "@spawn interpolation" begin
@@ -846,6 +901,19 @@ fib34666(x) =
     end
 @test fib34666(25) == 75025
 
+# issue #41324
+@testset "Co-schedule" begin
+    parent = Threads.@spawn begin
+        @test current_task().sticky == false
+        child = @async begin end
+        @test current_task().sticky == true
+        @test Threads.threadid() == Threads.threadid(child)
+        wait(child)
+    end
+    wait(parent)
+    @test parent.sticky == true
+end
+
 function jitter_channel(f, k, delay, ntasks, schedule)
     x = Channel(ch -> foreach(i -> put!(ch, i), 1:k), 1)
     y = Channel(k) do ch
@@ -880,3 +948,117 @@ end
     end
     @test sort!(collect(ys)) == 1:3
 end
+
+# reproducible multi-threaded rand()
+
+using Random
+
+function reproducible_rand(r, i)
+    if i == 0
+        return UInt64(0)
+    end
+    r1 = rand(r, UInt64)*hash(i)
+    t1 = Threads.@spawn reproducible_rand(r, i-1)
+    t2 = Threads.@spawn reproducible_rand(r, i-1)
+    r2 = rand(r, UInt64)
+    return r1 + r2 + fetch(t1) + fetch(t2)
+end
+
+@testset "Task-local random" begin
+    r = Random.TaskLocalRNG()
+    Random.seed!(r, 23)
+    val = reproducible_rand(r, 10)
+    for i = 1:4
+        Random.seed!(r, 23)
+        @test reproducible_rand(r, 10) == val
+    end
+end
+
+# @spawn racying with sync_end
+
+hidden_spawn(f) = Threads.@spawn f()
+
+function sync_end_race()
+    y = Ref(:notset)
+    local t
+    @sync begin
+        for _ in 1:6  # tweaked to maximize `nerror` below
+            Threads.@spawn nothing
+        end
+        t = hidden_spawn() do
+            Threads.@spawn y[] = :completed
+        end
+    end
+    try
+        wait(t)
+    catch
+        return :notscheduled
+    end
+    return y[]
+end
+
+function check_sync_end_race()
+    @sync begin
+        done = Threads.Atomic{Bool}(false)
+        try
+            # `Threads.@spawn` must fail to be scheduled or complete its execution:
+            ncompleted = 0
+            nnotscheduled = 0
+            nerror = 0
+            for i in 1:1000
+                y = try
+                    yield()
+                    sync_end_race()
+                catch err
+                    if err isa CompositeException
+                        if err.exceptions[1] isa Base.ScheduledAfterSyncException
+                            nerror += 1
+                            continue
+                        end
+                    end
+                    rethrow()
+                end
+                y in (:completed, :notscheduled) || return (; i, y)
+                ncompleted += y === :completed
+                nnotscheduled += y === :notscheduled
+            end
+            # Useful for tuning the test:
+            @debug "`check_sync_end_race` done" nthreads() ncompleted nnotscheduled nerror
+        finally
+            done[] = true
+        end
+    end
+    return nothing
+end
+
+@testset "Racy `@spawn`" begin
+    @test check_sync_end_race() === nothing
+end
+
+# issue #41546, thread-safe package loading
+@testset "package loading" begin
+    ch = Channel{Bool}(nthreads())
+    barrier = Base.Event()
+    old_act_proj = Base.ACTIVE_PROJECT[]
+    try
+        pushfirst!(LOAD_PATH, "@")
+        Base.ACTIVE_PROJECT[] = joinpath(@__DIR__, "TestPkg")
+        @sync begin
+            for _ in 1:nthreads()
+                Threads.@spawn begin
+                    put!(ch, true)
+                    wait(barrier)
+                    @eval using TestPkg
+                end
+            end
+            for _ in 1:nthreads()
+                take!(ch)
+            end
+            notify(barrier)
+        end
+        @test Base.root_module(@__MODULE__, :TestPkg) isa Module
+    finally
+        Base.ACTIVE_PROJECT[] = old_act_proj
+        popfirst!(LOAD_PATH)
+    end
+end
diff --git a/test/tuple.jl b/test/tuple.jl
index 21d4932f1ec9b5..ef42f8fdcecbe3 100644
--- a/test/tuple.jl
+++ b/test/tuple.jl
@@ -1,5 +1,8 @@
 # This file is a part of Julia. License is MIT: https://julialang.org/license
 
+isdefined(Main, :OffsetArrays) || @eval Main include("testhelpers/OffsetArrays.jl")
+using .Main.OffsetArrays
+
 struct BitPerm_19352
     p::NTuple{8,UInt8}
     function BitPerm(p::NTuple{8,UInt8})
@@ -96,11 +99,13 @@ end
         @test BitPerm_19352(0,2,4,6,1,3,5,7).p[2] == 0x02
     end
 
-    @testset "ninitialized" begin
-        @test Tuple{Int,Any}.ninitialized == 2
-        @test Tuple.ninitialized == 0
-        @test Tuple{Int,Vararg{Any}}.ninitialized == 1
-        @test Tuple{Any,Any,Vararg{Any}}.ninitialized == 2
+    @testset "n_uninitialized" begin
+        @test Tuple.name.n_uninitialized == 0
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Int,Any}) == 2
+        @test Core.Compiler.datatype_min_ninitialized(Tuple) == 0
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Int,Vararg{Any}}) == 1
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Any,Any,Vararg{Any}}) == 2
+        @test Core.Compiler.datatype_min_ninitialized(Tuple{Any,Any,Vararg{Any,3}}) == 5
     end
 
     @test empty((1, 2.0, "c")) === ()
@@ -145,12 +150,6 @@ end
     @test_throws BoundsError getindex((1,2), 0)
     @test_throws BoundsError getindex((1,2), -1)
 
-    @test getindex((1,), 1.0) === 1
-    @test getindex((1,2), 2.0) === 2
-    @test_throws BoundsError getindex((), 1.0)
-    @test_throws BoundsError getindex((1,2), 0.0)
-    @test_throws BoundsError getindex((1,2), -1.0)
-
     @test getindex((5,6,7,8), [1,2,3]) === (5,6,7)
     @test_throws BoundsError getindex((1,2), [3,4])
 
@@ -176,6 +175,15 @@ end
         @test_throws MethodError (1,)[]
         @test_throws MethodError (1,1,1)[1,1]
     end
+
+    @testset "get() method for Tuple (Issue #40809)" begin
+        @test get((5, 6, 7), 1, 0) == 5
+        @test get((), 5, 0) == 0
+        @test get((1,), 3, 0) == 0
+        @test get(()->0, (5, 6, 7), 1) == 5
+        @test get(()->0, (), 4) == 0
+        @test get(()->0, (1,), 3) == 0
+    end
 end
 
 @testset "fill to length" begin
@@ -233,6 +241,7 @@ end
     foo(x, y) = x + y
     foo(x, y, z) = x + y + z
     longtuple = ntuple(identity, 20)
+    vlongtuple = ntuple(identity, 33)
 
     @testset "1 argument" begin
         @test map(foo, ()) === ()
@@ -240,6 +249,7 @@ end
         @test map(foo, (1,2)) === (2,4)
         @test map(foo, (1,2,3,4)) === (2,4,6,8)
         @test map(foo, longtuple) === ntuple(i->2i,20)
+        @test map(foo, vlongtuple) === ntuple(i->2i,33)
     end
 
     @testset "2 arguments" begin
@@ -248,6 +258,7 @@ end
         @test map(foo, (1,2), (1,2)) === (2,4)
         @test map(foo, (1,2,3,4), (1,2,3,4)) === (2,4,6,8)
         @test map(foo, longtuple, longtuple) === ntuple(i->2i,20)
+        @test map(foo, vlongtuple, vlongtuple) === ntuple(i->2i,33)
         @test_throws BoundsError map(foo, (), (1,))
         @test_throws BoundsError map(foo, (1,), ())
     end
@@ -258,24 +269,61 @@ end
         @test map(foo, (1,2), (1,2), (1,2)) === (3,6)
         @test map(foo, (1,2,3,4), (1,2,3,4), (1,2,3,4)) === (3,6,9,12)
         @test map(foo, longtuple, longtuple, longtuple) === ntuple(i->3i,20)
+        @test map(foo, vlongtuple, vlongtuple, vlongtuple) === ntuple(i->3i,33)
         @test_throws BoundsError map(foo, (), (1,), (1,))
         @test_throws BoundsError map(foo, (1,), (1,), ())
     end
 end
 
+@testset "foreach" begin
+    longtuple = ntuple(identity, 33)
+
+    @testset "1 argument" begin
+        foo(x) = push!(a, x)
+
+        a = []
+        foreach(foo, ())
+        @test a == []
+
+        a = []
+        foreach(foo, (1,))
+        @test a == [1]
+
+        a = []
+        foreach(foo, longtuple)
+        @test a == [longtuple...]
+    end
+
+    @testset "n arguments" begin
+        foo(x, y) = push!(a, (x, y))
+
+        a = []
+        foreach(foo, (), ())
+        @test a == []
+
+        a = []
+        foreach(foo, (1,), (2,))
+        @test a == [(1, 2)]
+
+        a = []
+        foreach(foo, longtuple, longtuple)
+        @test a == [(x, x) for x in longtuple]
+    end
+end
+
 @testset "mapfoldl" begin
     @test (((1=>2)=>3)=>4) == foldl(=>, (1,2,3,4)) ==
           mapfoldl(identity, =>, (1,2,3,4)) == mapfoldl(abs, =>, (-1,-2,-3,-4))
     @test mapfoldl(abs, =>, (-1,-2,-3,-4), init=-10) == ((((-10=>1)=>2)=>3)=>4)
     @test mapfoldl(abs, =>, (), init=-10) == -10
     @test mapfoldl(abs, Pair{Any,Any}, (-30:-1...,)) == mapfoldl(abs, Pair{Any,Any}, [-30:-1...,])
-    @test_throws ArgumentError mapfoldl(abs, =>, ())
+    @test_throws "reducing over an empty collection" mapfoldl(abs, =>, ())
 end
 
 @testset "filter" begin
     @test filter(isodd, (1,2,3)) == (1, 3)
     @test filter(isequal(2), (true, 2.0, 3)) === (2.0,)
-    @test filter(i -> true, ()) == ()
+    @test filter(Returns(true), ()) == ()
     @test filter(identity, (true,)) === (true,)
     longtuple = ntuple(identity, 20)
     @test filter(iseven, longtuple) == ntuple(i->2i, 10)
@@ -307,8 +355,8 @@ end
     @test hash((1,)) === hash(1, Base.tuplehash_seed)
     @test hash((1,2)) === hash(1, hash(2, Base.tuplehash_seed))
 
-    # Test Any16 methods
-    t = ntuple(identity, 16)
+    # Test Any32 methods
+    t = ntuple(identity, 32)
     @test isequal((t...,1,2,3), (t...,1,2,3))
     @test !isequal((t...,1,2,3), (t...,1,2,4))
     @test !isequal((t...,1,2,3), (t...,1,2))
@@ -327,7 +375,7 @@ end
     @test !isless((t...,1,2), (t...,1,2))
     @test !isless((t...,2,1), (t...,1,2))
 
-    @test hash(t) === foldr(hash, [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,(),UInt(0)])
+    @test hash(t) === foldr(hash, [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,(),UInt(0)])
 end
 
 @testset "functions" begin
@@ -347,6 +395,24 @@ end
     @test prod(()) === 1
     @test prod((1,2,3)) === 6
 
+    # issue 39182
+    @test sum((0xe1, 0x1f)) === sum([0xe1, 0x1f])
+    @test sum((Int8(3),)) === Int(3)
+    @test sum((UInt8(3),)) === UInt(3)
+    @test sum((3,)) === Int(3)
+    @test sum((3.0,)) === 3.0
+    @test sum(("a",)) == sum(["a"])
+    @test sum((0xe1, 0x1f), init=0x0) == sum([0xe1, 0x1f], init=0x0)
+
+    # issue 39183
+    @test prod((Int8(100), Int8(100))) === 10000
+    @test prod((Int8(3),)) === Int(3)
+    @test prod((UInt8(3),)) === UInt(3)
+    @test prod((3,)) === Int(3)
+    @test prod((3.0,)) === 3.0
+    @test prod(("a",)) == prod(["a"])
+    @test prod((0xe1, 0x1f), init=0x1) == prod([0xe1, 0x1f], init=0x1)
+
     @testset "all" begin
         @test all(()) === true
         @test all((false,)) === false
@@ -354,6 +420,14 @@ end
         @test all((true, true)) === true
         @test all((true, false)) === false
         @test all((false, false)) === false
+        @test all((missing, true)) === missing
+        @test all((true, missing)) === missing
+        @test all((missing, false)) === false
+        @test all((false, missing)) === false
+        @test all((missing, true, false)) === false
+        @test_throws TypeError all((missing, 3.2, true))
+        ts = (missing, true, false)
+        @test @allocated(all(ts)) == 0  # PR #44063
     end
 
     @testset "any" begin
@@ -371,6 +445,15 @@ end
         @test any((true,false,true)) === true
         @test any((true,true,false)) === true
         @test any((true,true,true)) === true
+        @test any((missing, true)) === true
+        @test any((true, missing)) === true
+        @test any((missing, false)) === missing
+        @test any((false, missing)) === missing
+        @test any((missing, true, false)) === true
+        @test any((missing, false, false)) === missing
+        @test_throws TypeError any((missing, 3.2, true))
+        ts = (missing, true, false)
+        @test @allocated(any(ts)) == 0  # PR #44063
     end
 end
 
@@ -492,6 +575,20 @@ end
         @test findnext(isequal(1), (1, 1), UInt(2)) isa Int
         @test findprev(isequal(1), (1, 1), UInt(1)) isa Int
     end
+
+    # recursive implementation should allow constant-folding for small tuples
+    @test Base.return_types() do
+        findfirst(==(2), (1.0,2,3f0))
+    end == Any[Int]
+    @test Base.return_types() do
+        findfirst(==(0), (1.0,2,3f0))
+    end == Any[Nothing]
+    @test Base.return_types() do
+        findlast(==(2), (1.0,2,3f0))
+    end == Any[Int]
+    @test Base.return_types() do
+        findlast(==(0), (1.0,2,3f0))
+    end == Any[Nothing]
 end
 
 @testset "properties" begin
@@ -504,6 +601,7 @@ end
 
 # tuple_type_tail on non-normalized vararg tuple
 @test Base.tuple_type_tail(Tuple{Vararg{T, 3}} where T<:Real) == Tuple{Vararg{T, 2}} where T<:Real
+@test Base.tuple_type_tail(Tuple{Vararg{Int}}) == Tuple{Vararg{Int}}
 
 @testset "setindex" begin
     @test Base.setindex((1, ), 2, 1) === (2, )
@@ -518,6 +616,9 @@ end
 
     @test Base.setindex((1, 2, 4), 4, true) === (4, 2, 4)
     @test_throws BoundsError Base.setindex((1, 2), 2, false)
+
+    f() = Base.setindex((1:1, 2:2, 3:3), 9, 1)
+    @test @inferred(f()) == (9, 2:2, 3:3)
 end
 
 @testset "inferrable range indexing with constant values" begin
@@ -575,3 +676,58 @@ end
     @test_throws BoundsError (1,2.0)[0:1]
     @test_throws BoundsError (1,2.0)[0:0]
 end
+
+@testset "Base.rest" begin
+    t = (1, 2.0, 0x03, 4f0)
+    @test Base.rest(t) === t
+    @test Base.rest(t, 2) === (2.0, 0x03, 4f0)
+
+    a = [1 2; 3 4]
+    @test Base.rest(a) == a[:]
+    @test pointer(Base.rest(a)) != pointer(a)
+    @test Base.rest(a, 3) == [2, 4]
+
+    itr = (-i for i in a)
+    @test Base.rest(itr) == itr
+    _, st = iterate(itr)
+    r = Base.rest(itr, st)
+    @test r isa Iterators.Rest
+    @test collect(r) == -[3, 2, 4]
+end
+
+# issue #38837
+f38837(xs) = map((F,x)->F(x), (Float32, Float64), xs)
+@test @inferred(f38837((1,2))) === (1.0f0, 2.0)
+
+@testset "indexing with UnitRanges" begin
+    f(t) = t[3:end-2]
+    @test @inferred(f(Tuple(1:10))) === Tuple(3:8)
+    @test @inferred(f((true, 2., 3, 4f0, 0x05, 6, 7.))) === (3, 4f0, 0x05)
+
+    f2(t) = t[Base.OneTo(5)]
+    @test @inferred(f2(Tuple(1:10))) === Tuple(1:5)
+    @test @inferred(f2((true, 2., 3, 4f0, 0x05, 6, 7.))) === (true, 2., 3, 4f0, 0x05)
+
+    @test @inferred((t -> t[1:end])(Tuple(1:15))) === Tuple(1:15)
+    @test @inferred((t -> t[2:end])(Tuple(1:15))) === Tuple(2:15)
+    @test @inferred((t -> t[3:end])(Tuple(1:15))) === Tuple(3:15)
+    @test @inferred((t -> t[1:end-1])(Tuple(1:15))) === Tuple(1:14)
+    @test @inferred((t -> t[1:end-2])(Tuple(1:15))) === Tuple(1:13)
+    @test @inferred((t -> t[3:2])(Tuple(1:15))) === ()
+
+    @test_throws BoundsError (1, 2)[1:4]
+    @test_throws BoundsError (1, 2)[0:2]
+    @test_throws ArgumentError (1, 2)[OffsetArrays.IdOffsetRange(1:2, -1)]
+end
+
+# https://github.com/JuliaLang/julia/issues/40814
+@test Base.return_types(NTuple{3,Int}, (Vector{Int},)) == Any[NTuple{3,Int}]
+
+# issue #42457
+f42457(a::NTuple{3,Int}, b::Tuple)::Bool = Base.isequal(a, Base.inferencebarrier(b)::Tuple)
+@test f42457((1, 1, 1), (1, 1, 1))
+@test !isempty(methods(Base._isequal, (NTuple{3, Int}, Tuple)))
+g42457(a, b) = Base.isequal(a, b) ? 1 : 2.0
+@test only(Base.return_types(g42457, (NTuple{3, Int}, Tuple))) === Union{Float64, Int}
+@test only(Base.return_types(g42457, (NTuple{3, Int}, NTuple))) === Union{Float64, Int}
+@test only(Base.return_types(g42457, (NTuple{3, Int}, NTuple{4}))) === Float64
diff --git a/test/vecelement.jl b/test/vecelement.jl
index 5652ea10d3aa6a..6638f06f4f3582 100644
--- a/test/vecelement.jl
+++ b/test/vecelement.jl
@@ -96,7 +96,7 @@ const _llvmtypes = Dict{DataType, String}(
     ret <$(N) x $(llvmT)> %3
     """
     return quote
-        Base.@_inline_meta
+        Base.@inline
         Core.getfield(Base, :llvmcall)($exp, Vec{$N, $T}, Tuple{Vec{$N, $T}, Vec{$N, $T}}, x, y)
     end
 end
diff --git a/test/version.jl b/test/version.jl
index d9083b9c49cf18..3723bb0f788e21 100644
--- a/test/version.jl
+++ b/test/version.jl
@@ -100,6 +100,12 @@ show(io,v"4.3.2+1.a")
 # construction from AbstractString
 @test VersionNumber("4.3.2+1.a") == v"4.3.2+1.a"
 
+# construct from VersionNumber
+let
+    v = VersionNumber("1.2.3")
+    @test VersionNumber(v) == v
+end
+
 # typemin and typemax
 @test typemin(VersionNumber) == v"0-"
 @test typemax(VersionNumber) == v"∞"
@@ -233,4 +239,3 @@ io = IOBuffer()
 @test VersionNumber(true, 0x2, Int128(3), (GenericString("rc"), 0x1)) == v"1.2.3-rc.1"
 @test VersionNumber(true, 0x2, Int128(3), (GenericString("rc"), 0x1)) == v"1.2.3-rc.1"
 @test VersionNumber(true, 0x2, Int128(3), (), (GenericString("sp"), 0x2)) == v"1.2.3+sp.2"
-
diff --git a/test/worlds.jl b/test/worlds.jl
index a6aae68da2ca78..015ff470a56dd9 100644
--- a/test/worlds.jl
+++ b/test/worlds.jl
@@ -12,13 +12,13 @@ begin
     f265a(x::Any) = 1
     @test g265a() == 1
     @test Base.return_types(g265a, ()) == Any[Int]
-    @test Core.Compiler.return_type(g265a, ()) == Int
+    @test Core.Compiler.return_type(g265a, Tuple{}) == Int
 
     f265a(x::Any) = 2.0
     @test g265a() == 2.0
 
     @test Base.return_types(g265a, ()) == Any[Float64]
-    @test Core.Compiler.return_type(g265a, ()) == Float64
+    @test Core.Compiler.return_type(g265a, Tuple{}) == Float64
 end
 
 # test signature widening
@@ -29,13 +29,13 @@ begin
     end
     @test g265b(1) == 1
     @test Base.return_types(g265b, (Int,)) == Any[Int]
-    @test Core.Compiler.return_type(g265b, (Int,)) == Int
+    @test Core.Compiler.return_type(g265b, Tuple{Int,}) == Int
 
     f265b(x::Any) = 2.0
     @test g265b(1) == 1
     @test g265b(2) == 2.0
     @test Base.return_types(g265b, (Int,)) == Any[Union{Int, Float64}]
-    @test Core.Compiler.return_type(g265b, (Int,)) == Union{Int, Float64}
+    @test Core.Compiler.return_type(g265b, Tuple{Int,}) == Union{Int, Float64}
 end
 
 # test signature narrowing
@@ -44,13 +44,13 @@ begin
     f265c(x::Any) = 1
     @test g265c() == 1
     @test Base.return_types(g265c, ()) == Any[Int]
-    @test Core.Compiler.return_type(g265c, ()) == Int
+    @test Core.Compiler.return_type(g265c, Tuple{}) == Int
 
     f265c(x::Int) = 2.0
     @test g265c() == 2.0
 
     @test Base.return_types(g265c, ()) == Any[Float64]
-    @test Core.Compiler.return_type(g265c, ()) == Float64
+    @test Core.Compiler.return_type(g265c, Tuple{}) == Float64
 end
 
 # test constructor narrowing
@@ -78,7 +78,7 @@ end
 @test_throws MethodError B265_(2)
 @test_throws MethodError B265_(3)
 @test Base.return_types(B265_, (Int,)) == Any[B265{Int}]
-@test Core.Compiler.return_type(B265_, (Int,)) == B265{Int}
+@test Core.Compiler.return_type(B265_, Tuple{Int,}) == B265{Int}
 
   # add new constructors
 B265(x::Float64, dummy::Nothing) = B265{Float64}(x, dummy)
@@ -89,8 +89,8 @@ B265(x::Any, dummy::Nothing) = B265{UInt8}(x, dummy)
 @test (B265_(2)::B265{Float64}).field1 === 2.0e0
 @test (B265_(3)::B265{UInt8}).field1 === 0x03
 
-@test Base.return_types(B265_, (Int,)) == Any[B265]
-@test Core.Compiler.return_type(B265_, (Int,)) == B265
+@test B265{UInt8} <: only(Base.return_types(B265_, (Int,))) <: B265
+@test B265{UInt8} <: Core.Compiler.return_type(B265_, Tuple{Int,}) <: B265
 
 
 # test oldworld call / inference
@@ -107,8 +107,24 @@ end
 
 g265() = [f265(x) for x in 1:3.]
 wc265 = get_world_counter()
-f265(::Any) = 1.0
-@test wc265 + 1 == get_world_counter()
+wc265_41332a = Task(tls_world_age)
+@test tls_world_age() == wc265
+(function ()
+    global wc265_41332b = Task(tls_world_age)
+    @eval f265(::Any) = 1.0
+    global wc265_41332c = Base.invokelatest(Task, tls_world_age)
+    global wc265_41332d = Task(tls_world_age)
+    nothing
+end)()
+@test wc265 + 2 == get_world_counter() == tls_world_age()
+schedule(wc265_41332a)
+schedule(wc265_41332b)
+schedule(wc265_41332c)
+schedule(wc265_41332d)
+@test wc265 == fetch(wc265_41332a)
+@test wc265 + 1 == fetch(wc265_41332b)
+@test wc265 + 2 == fetch(wc265_41332c)
+@test wc265 + 1 == fetch(wc265_41332d)
 chnls, tasks = Base.channeled_tasks(2, wfunc)
 t265 = tasks[1]
 
@@ -120,15 +136,15 @@ f265(::Int) = 1
 @test put_n_take!(tls_world_age, ()) == wc265
 
 @test g265() == Int[1, 1, 1]
-@test Core.Compiler.return_type(f265, (Any,)) == Union{Float64, Int}
-@test Core.Compiler.return_type(f265, (Int,)) == Int
-@test Core.Compiler.return_type(f265, (Float64,)) == Float64
+@test Core.Compiler.return_type(f265, Tuple{Any,}) == Union{Float64, Int}
+@test Core.Compiler.return_type(f265, Tuple{Int,}) == Int
+@test Core.Compiler.return_type(f265, Tuple{Float64,}) == Float64
 
 @test put_n_take!(g265, ()) == Float64[1.0, 1.0, 1.0]
-@test put_n_take!(Core.Compiler.return_type, (f265, (Any,))) == Float64
-@test put_n_take!(Core.Compiler.return_type, (f265, (Int,))) == Float64
-@test put_n_take!(Core.Compiler.return_type, (f265, (Float64,))) == Float64
-@test put_n_take!(Core.Compiler.return_type, (f265, (Float64,))) == Float64
+@test put_n_take!(Core.Compiler.return_type, (f265, Tuple{Any,})) == Float64
+@test put_n_take!(Core.Compiler.return_type, (f265, Tuple{Int,})) == Float64
+@test put_n_take!(Core.Compiler.return_type, (f265, Tuple{Float64,})) == Float64
+@test put_n_take!(Core.Compiler.return_type, (f265, Tuple{Float64,})) == Float64
 
 # test that reflection ignores worlds
 @test Base.return_types(f265, (Any,)) == Any[Int, Float64]
@@ -136,7 +152,9 @@ f265(::Int) = 1
 
 # test for method errors
 h265() = true
-loc_h265 = "$(@__FILE__):$(@__LINE__() - 1)"
+file = @__FILE__
+Base.stacktrace_contract_userdir() && (file = Base.contractuser(file))
+loc_h265 = "$file:$(@__LINE__() - 3)"
 @test h265()
 @test_throws TaskFailedException(t265) put_n_take!(h265, ())
 @test_throws TaskFailedException(t265) fetch(t265)
@@ -173,7 +191,7 @@ f_gen265(x::Type{Int}) = 3
 # intermediate worlds by later additions to the method table that
 # would have capped those specializations if they were still valid
 f26506(@nospecialize(x)) = 1
-g26506(x) = f26506(x[1])
+g26506(x) = Base.inferencebarrier(f26506)(x[1])
 z = Any["ABC"]
 f26506(x::Int) = 2
 g26506(z) # Places an entry for f26506(::String) in mt.name.cache
@@ -200,6 +218,14 @@ notify(c26506_1)
 wait(c26506_2)
 @test result26506[1] == 3
 
+# issue #38435
+f38435(::Int, ::Any) = 1
+f38435(::Any, ::Int) = 2
+g38435(x) = f38435(x, x)
+@test_throws MethodError(f38435, (1, 1), Base.get_world_counter()) g38435(1)
+f38435(::Int, ::Int) = 3.0
+@test g38435(1) === 3.0
+
 
 ## Invalidation tests
 
@@ -211,8 +237,8 @@ function instance(f, types)
     if isa(specs, Nothing)
     elseif isa(specs, Core.SimpleVector)
         for i = 1:length(specs)
-            if isassigned(specs, i)
-                mi = specs[i]::Core.MethodInstance
+            mi = specs[i]
+            if mi isa Core.MethodInstance
                 if mi.specTypes <: tt && tt <: mi.specTypes
                     inst = mi
                     break
@@ -305,7 +331,8 @@ src4 = code_typed(applyf35855_2, (Vector{Any},))[1]
 @test !(wany4 == wany3) || equal(src4, src3) # code doesn't change unless you invalidate
 
 ## ambiguities do not trigger invalidation
-mi = instance(+, (AbstractChar, UInt8))
+m = which(+, (Char, UInt8))
+mi = Core.Compiler.specialize_method(m, Tuple{typeof(+), AbstractChar, UInt8}, Core.svec())
 w = worlds(mi)
 
 abstract type FixedPoint35855{T <: Integer} <: Real end
diff --git a/ui/Makefile b/ui/Makefile
deleted file mode 100644
index 6ccb8c1fafb036..00000000000000
--- a/ui/Makefile
+++ /dev/null
@@ -1,94 +0,0 @@
-SRCDIR := $(abspath $(dir $(lastword $(MAKEFILE_LIST))))
-JULIAHOME := $(abspath $(SRCDIR)/..)
-BUILDDIR ?= .
-include $(JULIAHOME)/deps/Versions.make
-include $(JULIAHOME)/Make.inc
-include $(JULIAHOME)/deps/llvm-ver.make
-
-override CFLAGS += $(JCFLAGS)
-override CXXFLAGS += $(JCXXFLAGS)
-override CPPFLAGS += $(JCPPFLAGS)
-
-SRCS := repl
-
-HEADERS := $(addprefix $(JULIAHOME)/src/,julia.h julia_assert.h julia_threads.h julia_internal.h options.h) \
-	$(BUILDDIR)/../src/julia_version.h $(wildcard $(JULIAHOME)/src/support/*.h) $(LIBUV_INC)/uv.h
-
-FLAGS := -I$(BUILDROOT)/src -I$(JULIAHOME)/src -I$(JULIAHOME)/src/support -I$(build_includedir)
-ifneq ($(USEMSVC), 1)
-FLAGS += -Wall -Wno-strict-aliasing -fno-omit-frame-pointer -Wc++-compat
-endif
-
-OBJS := $(SRCS:%=$(BUILDDIR)/%.o)
-DOBJS := $(SRCS:%=$(BUILDDIR)/%.dbg.obj)
-DEBUGFLAGS += $(FLAGS)
-SHIPFLAGS += $(FLAGS)
-JLDFLAGS += $(LDFLAGS) $(NO_WHOLE_ARCHIVE) $(OSLIBS) $(RPATH)
-
-ifeq ($(USE_SYSTEM_LIBM),0)
-ifneq ($(UNTRUSTED_SYSTEM_LIBM),0)
-JLDFLAGS += $(WHOLE_ARCHIVE) $(build_libdir)/libopenlibm.a $(NO_WHOLE_ARCHIVE)
-endif
-endif
-
-ifeq ($(OS),WINNT)
-JLDFLAGS += -municode
-endif
-
-default: release
-all: release debug
-release debug :  % : julia-%
-
-$(BUILDDIR)/%.o: $(SRCDIR)/%.c $(HEADERS)
-	@$(call PRINT_CC, $(CC) $(CPPFLAGS) $(CFLAGS) $(SHIPFLAGS) -c $< -o $@)
-$(BUILDDIR)/%.dbg.obj: $(SRCDIR)/%.c $(HEADERS)
-	@$(call PRINT_CC, $(CC) $(CPPFLAGS) $(CFLAGS) $(DEBUGFLAGS) -c $< -o $@)
-
-ifeq ($(OS),WINNT)
-ifneq ($(USEMSVC), 1)
-$(BUILDDIR)/julia_res.o: $(JULIAHOME)/contrib/windows/julia.rc $(JULIAHOME)/VERSION
-	JLVER=`cat $(JULIAHOME)/VERSION` && \
-	JLVERi=`echo $$JLVER | perl -nle \
-		'/^(\d+)\.?(\d*)\.?(\d*)/ && \
-		print int $$1,",",int $$2,",0,",int $$3'` && \
-	$(CROSS_COMPILE)windres $< -O coff -o $@ -DJLVER=$$JLVERi -DJLVER_STR=\\\"$$JLVER\\\"
-OBJS += julia_res.o
-DOBJS += julia_res.o
-endif
-endif
-
-julia-release: $(build_bindir)/julia$(EXE)
-julia-debug: $(build_bindir)/julia-debug$(EXE)
-
-# Embed an Info.plist in the julia executable
-# Create an intermediate target Info.plist for Darwin code signing.
-ifeq ($(DARWIN_FRAMEWORK),1)
-$(BUILDDIR)/Info.plist: $(JULIAHOME)/VERSION
-	/usr/libexec/PlistBuddy -x -c "Clear dict" $@
-	/usr/libexec/PlistBuddy -x -c "Add :CFBundleName string julia" $@
-	/usr/libexec/PlistBuddy -x -c "Add :CFBundleIdentifier string $(darwin_codesign_id_julia_ui)" $@
-	/usr/libexec/PlistBuddy -x -c "Add :CFBundleInfoDictionaryVersion string 6.0" $@
-	/usr/libexec/PlistBuddy -x -c "Add :CFBundleVersion string $(JULIA_COMMIT)" $@
-	/usr/libexec/PlistBuddy -x -c "Add :CFBundleShortVersionString string $(JULIA_MAJOR_VERSION).$(JULIA_MINOR_VERSION).$(JULIA_PATCH_VERSION)" $@
-.INTERMEDIATE: $(BUILDDIR)/Info.plist # cleanup this file after we are done using it
-JLDFLAGS += -Wl,-sectcreate,__TEXT,__info_plist,Info.plist
-$(build_bindir)/julia$(EXE): $(BUILDDIR)/Info.plist
-$(build_bindir)/julia-debug$(EXE): $(BUILDDIR)/Info.plist
-endif
-
-ifneq ($(USEMSVC), 1)
-CXXLD := $(CXX)
-else
-CXXLD := $(LD)
-endif
-
-$(build_bindir)/julia$(EXE): $(OBJS)
-	@$(call PRINT_LINK, $(CXXLD) $(CXXFLAGS) $(CXXLDFLAGS) $(LINK_FLAGS) $(SHIPFLAGS) $(OBJS) -o $@ -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -ljulia $(JLDFLAGS) $(CXXLDFLAGS))
-$(build_bindir)/julia-debug$(EXE): $(DOBJS)
-	@$(call PRINT_LINK, $(CXXLD) $(CXXFLAGS) $(CXXLDFLAGS) $(LINK_FLAGS) $(DEBUGFLAGS) $(DOBJS) -o $@ -L$(build_private_libdir) -L$(build_libdir) -L$(build_shlibdir) -ljulia-debug $(JLDFLAGS) $(CXXLDFLAGS))
-
-clean: | $(CLEAN_TARGETS)
-	rm -f *.o *.dbg.obj
-	rm -f $(build_bindir)/julia*
-
-.PHONY: clean release debug julia-release julia-debug
diff --git a/ui/repl.c b/ui/repl.c
deleted file mode 100644
index 07e1949f93e1ea..00000000000000
--- a/ui/repl.c
+++ /dev/null
@@ -1,234 +0,0 @@
-// This file is a part of Julia. License is MIT: https://julialang.org/license
-
-/*
-  repl.c
-  system startup, main(), and console interaction
-*/
-
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <setjmp.h>
-#include <signal.h>
-#include <time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <limits.h>
-#include <errno.h>
-#include <math.h>
-#include <ctype.h>
-#include <inttypes.h>
-
-#include "uv.h"
-#include "../src/julia.h"
-#include "../src/options.h"
-#include "../src/julia_assert.h"
-
-JULIA_DEFINE_FAST_TLS()
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef JL_ASAN_ENABLED
-JL_DLLEXPORT const char* __asan_default_options() {
-    return "allow_user_segv_handler=1:detect_leaks=0";
-    // FIXME: enable LSAN after fixing leaks & defining __lsan_default_suppressions(),
-    //        or defining __lsan_default_options = exitcode=0 once publicly available
-    //        (here and in flisp/flmain.c)
-}
-#endif
-
-static int exec_program(char *program)
-{
-    JL_TRY {
-        jl_load(jl_main_module, program);
-    }
-    JL_CATCH {
-        jl_value_t *errs = jl_stderr_obj();
-        volatile int shown_err = 0;
-        jl_printf(JL_STDERR, "error during bootstrap:\n");
-        JL_TRY {
-            if (errs) {
-                jl_value_t *showf = jl_get_function(jl_base_module, "show");
-                if (showf != NULL) {
-                    jl_call2(showf, errs, jl_current_exception());
-                    jl_printf(JL_STDERR, "\n");
-                    shown_err = 1;
-                }
-            }
-        }
-        JL_CATCH {
-        }
-        if (!shown_err) {
-            jl_static_show(JL_STDERR, jl_current_exception());
-            jl_printf(JL_STDERR, "\n");
-        }
-        jlbacktrace();
-        jl_printf(JL_STDERR, "\n");
-        return 1;
-    }
-    return 0;
-}
-
-void jl_lisp_prompt();
-
-#ifdef JL_GF_PROFILE
-static void print_profile(void)
-{
-    size_t i;
-    void **table = jl_base_module->bindings.table;
-    for(i=1; i < jl_base_module->bindings.size; i+=2) {
-        if (table[i] != HT_NOTFOUND) {
-            jl_binding_t *b = (jl_binding_t*)table[i];
-            if (b->value != NULL && jl_is_function(b->value) &&
-                jl_is_gf(b->value)) {
-                jl_printf(JL_STDERR, "%d\t%s\n",
-                           jl_gf_mtable(b->value)->ncalls,
-                           jl_gf_name(b->value)->name);
-            }
-        }
-    }
-}
-#endif
-
-static NOINLINE int true_main(int argc, char *argv[])
-{
-    jl_set_ARGS(argc, argv);
-
-    jl_function_t *start_client = jl_base_module ?
-        (jl_function_t*)jl_get_global(jl_base_module, jl_symbol("_start")) : NULL;
-
-    if (start_client) {
-        JL_TRY {
-            size_t last_age = jl_get_ptls_states()->world_age;
-            jl_get_ptls_states()->world_age = jl_get_world_counter();
-            jl_apply(&start_client, 1);
-            jl_get_ptls_states()->world_age = last_age;
-        }
-        JL_CATCH {
-            jl_no_exc_handler(jl_current_exception());
-        }
-        return 0;
-    }
-
-    // run program if specified, otherwise enter REPL
-    if (argc > 0) {
-        if (strcmp(argv[0], "-")) {
-            return exec_program(argv[0]);
-        }
-    }
-
-    ios_puts("WARNING: Base._start not defined, falling back to economy mode repl.\n", ios_stdout);
-    if (!jl_errorexception_type)
-        ios_puts("WARNING: jl_errorexception_type not defined; any errors will be fatal.\n", ios_stdout);
-
-    while (!ios_eof(ios_stdin)) {
-        char *volatile line = NULL;
-        JL_TRY {
-            ios_puts("\njulia> ", ios_stdout);
-            ios_flush(ios_stdout);
-            line = ios_readline(ios_stdin);
-            jl_value_t *val = (jl_value_t*)jl_eval_string(line);
-            if (jl_exception_occurred()) {
-                jl_printf(JL_STDERR, "error during run:\n");
-                jl_static_show(JL_STDERR, jl_exception_occurred());
-                jl_exception_clear();
-            }
-            else if (val) {
-                jl_static_show(JL_STDOUT, val);
-            }
-            jl_printf(JL_STDOUT, "\n");
-            free(line);
-            line = NULL;
-            uv_run(jl_global_event_loop(),UV_RUN_NOWAIT);
-        }
-        JL_CATCH {
-            if (line) {
-                free(line);
-                line = NULL;
-            }
-            jl_printf(JL_STDERR, "\nparser error:\n");
-            jl_static_show(JL_STDERR, jl_current_exception());
-            jl_printf(JL_STDERR, "\n");
-            jlbacktrace();
-        }
-    }
-    return 0;
-}
-
-#ifndef _OS_WINDOWS_
-int main(int argc, char *argv[])
-{
-    uv_setup_args(argc, argv); // no-op on Windows
-#else
-
-static void lock_low32() {
-#if defined(_P64) && defined(JL_DEBUG_BUILD)
-    // Wine currently has a that causes it to answer VirtualQuery incorrectly.
-    // block usage of the 32-bit address space on win64, to catch pointer cast errors
-    char *const max32addr = (char*)0xffffffffL;
-    SYSTEM_INFO info;
-    MEMORY_BASIC_INFORMATION meminfo;
-    GetNativeSystemInfo(&info);
-    memset(&meminfo, 0, sizeof(meminfo));
-    meminfo.BaseAddress = info.lpMinimumApplicationAddress;
-    while ((char*)meminfo.BaseAddress < max32addr) {
-        size_t nbytes = VirtualQuery(meminfo.BaseAddress, &meminfo, sizeof(meminfo));
-        assert(nbytes == sizeof(meminfo));
-        if (meminfo.State == MEM_FREE) { // reserve all free pages in the first 4GB of memory
-            char *first = (char*)meminfo.BaseAddress;
-            char *last = first + meminfo.RegionSize;
-            if (last > max32addr)
-                last = max32addr;
-            // adjust first up to the first allocation granularity boundary
-            // adjust last down to the last allocation granularity boundary
-            first = (char*)(((long long)first + info.dwAllocationGranularity - 1) & ~(info.dwAllocationGranularity - 1));
-            last = (char*)((long long)last & ~(info.dwAllocationGranularity - 1));
-            if (last != first) {
-                void *p = VirtualAlloc(first, last - first, MEM_RESERVE, PAGE_NOACCESS); // reserve all memory in between
-                if ((char*)p != first)
-                    // Wine and Windows10 seem to have issues with reporting memory access information correctly
-                    // so we sometimes end up with unexpected results - this is just ignore those and continue
-                    // this is just a debugging aid to help find accidental pointer truncation anyways, so it's not critical
-                    VirtualFree(p, 0, MEM_RELEASE);
-            }
-        }
-        meminfo.BaseAddress += meminfo.RegionSize;
-    }
-#endif
-}
-int wmain(int argc, wchar_t *argv[], wchar_t *envp[])
-{
-    int i;
-    lock_low32();
-    for (i=0; i<argc; i++) { // write the command line to UTF8
-        wchar_t *warg = argv[i];
-        size_t len = WideCharToMultiByte(CP_UTF8, 0, warg, -1, NULL, 0, NULL, NULL);
-        if (!len) return 1;
-        char *arg = (char*)alloca(len);
-        if (!WideCharToMultiByte(CP_UTF8, 0, warg, -1, arg, len, NULL, NULL)) return 1;
-        argv[i] = (wchar_t*)arg;
-    }
-#endif
-    libsupport_init();
-    int lisp_prompt = (argc >= 2 && strcmp((char*)argv[1],"--lisp") == 0);
-    if (lisp_prompt) {
-        memmove(&argv[1], &argv[2], (argc-2)*sizeof(void*));
-        argc--;
-    }
-    jl_parse_opts(&argc, (char***)&argv);
-    julia_init(jl_options.image_file_specified ? JL_IMAGE_CWD : JL_IMAGE_JULIA_HOME);
-    if (lisp_prompt) {
-        jl_get_ptls_states()->world_age = jl_get_world_counter();
-        jl_lisp_prompt();
-        return 0;
-    }
-    int ret = true_main(argc, (char**)argv);
-    jl_atexit_hook(ret);
-    return ret;
-}
-
-#ifdef __cplusplus
-}
-#endif